diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
deleted file mode 100644
index a688be130711c..0000000000000
--- a/.devcontainer/Dockerfile
+++ /dev/null
@@ -1,3 +0,0 @@
-FROM julia:latest
-
-RUN apt-get update && apt-get install -y build-essential libatomic1 python gfortran perl wget m4 cmake pkg-config git
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index a3747ca019694..455f8bea3e952 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,8 +1,12 @@
 {
-    "extensions": [
-      "julialang.language-julia",
-      "ms-vscode.cpptools"
-    ],
-  
-    "dockerFile": "Dockerfile"
+  "image": "docker.io/library/julia:latest",
+  "customizations": {
+      "vscode": {
+        "extensions": [
+          "julialang.language-julia",
+          "ms-vscode.cpptools"
+        ]
+     }
+  },
+  "onCreateCommand": "apt-get update && apt-get install -y build-essential libatomic1 python3 gfortran perl wget m4 cmake pkg-config git"
 }
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 3af8ba86153a1..bf6e580ace8cf 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -7,3 +7,7 @@ e66bfa5dd32f93e76068c00ad882c1fc839c5af8
 100a741e7ab38c91d48cc929bb001afc8e09261f
 # whitespace: replace tabs => space
 b03e8ab9c7bd3e001add519571858fa04d6a249b
+# whitespace: replace 2-space => 4-space for indentation
+f1b567507731129f90ca0dffc8fbc0ed98b6a15d
+# whitespace: replace multiple spaces after period with a single space
+f942c29bb0d02cc24f19712c642ac72ffc85a26b
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index bf1380f5a07bc..81dfe4f94ac00 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -4,3 +4,19 @@ CODEOWNERS @JuliaLang/github-actions
 
 /.github/workflows/rerun_failed.yml @DilumAluthge
 /.github/workflows/statuses.yml @DilumAluthge
+/.github/workflows/PrAssignee.yml @LilithHafner @DilumAluthge
+/base/special/ @oscardssmith
+/base/sort.jl @LilithHafner
+/test/sorting.jl @LilithHafner
+/stdlib/*_jll @giordano
+/base/binaryplatforms.jl @giordano
+/src/julia_gcext.h @fingolfin
+/test/gcext/gcext.c @fingolfin
+/src/gc-common.* @d-netto
+/src/gc-debug.c @d-netto
+/src/gc-interface.h @d-netto
+/src/gc-page-profiler.* @d-netto
+/src/gc-pages.c @d-netto
+/src/gc-stock.* @d-netto
+/src/gc-tls-common.h @d-netto
+/src/gc-tls-stock.h @d-netto
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000000000..c558006ed18e4
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
+    open-pull-requests-limit: 100
+    labels:
+      - "dependencies"
+      - "github-actions"
+      - "ci"
diff --git a/.github/workflows/LabelCheck.yml b/.github/workflows/LabelCheck.yml
index 194b0c92065c9..c966e478e3fe0 100644
--- a/.github/workflows/LabelCheck.yml
+++ b/.github/workflows/LabelCheck.yml
@@ -11,9 +11,9 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 2
     steps:
-    - uses: yogevbd/enforce-label-action@2.2.2
+    - uses: yogevbd/enforce-label-action@a3c219da6b8fa73f6ba62b68ff09c469b3a1c024 # 2.2.2
       with:
         # REQUIRED_LABELS_ANY: "bug,enhancement,skip-changelog"
         # REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['bug','enhancement','skip-changelog']"
-        BANNED_LABELS: "needs docs,needs compat annotation,needs more info,needs nanosoldier run,needs news,needs pkgeval,needs tests,DO NOT MERGE"
-        BANNED_LABELS_DESCRIPTION: "A PR should not be merged with `needs *` or `DO NOT MERGE` labels"
+        BANNED_LABELS: "needs docs,needs compat annotation,needs more info,needs nanosoldier run,needs news,needs pkgeval,needs tests,needs decision,DO NOT MERGE,status:DO NOT MERGE"
+        BANNED_LABELS_DESCRIPTION: "A PR should not be merged with `needs *` or `status:DO NOT MERGE` labels"
diff --git a/.github/workflows/PrAssignee.yml b/.github/workflows/PrAssignee.yml
new file mode 100644
index 0000000000000..40ac0776f21d6
--- /dev/null
+++ b/.github/workflows/PrAssignee.yml
@@ -0,0 +1,210 @@
+name: PR Assignee
+on:
+  # Important security note: Do NOT use `actions/checkout`
+  # or any other method for checking out the pull request's source code.
+  # This is because the pull request's source code is untrusted, but the
+  # GITHUB_TOKEN has write permissions (because of the `on: pull_request_target` event).
+  #
+  # Quoting from the GitHub Docs:
+  # > For workflows that are triggered by the pull_request_target event, the GITHUB_TOKEN is granted
+  # > read/write repository permission unless the permissions key is specified and the workflow can access secrets,
+  # > even when it is triggered from a fork.
+  # >
+  # > Although the workflow runs in the context of the base of the pull request,
+  # > you should make sure that you do not check out, build, or run untrusted code from the pull request with this event.
+  #
+  # Source: https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#pull_request_target
+  #
+  # See also: https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/
+  pull_request_target:
+    types: [opened, reopened, ready_for_review]
+
+# Permissions for the `GITHUB_TOKEN`:
+permissions:
+  pull-requests: write # Needed in order to assign a user as the PR assignee
+
+jobs:
+  pr-assignee:
+    runs-on: ubuntu-latest
+    if: ${{ github.event.pull_request.draft != true }}
+    steps:
+      # Important security note: As discussed above, do NOT use `actions/checkout`
+      # or any other method for checking out the pull request's source code.
+      # This is because the pull request's source code is untrusted, but the
+      # GITHUB_TOKEN has write permissions (because of the `on: pull_request_target` event).
+      - name: Add Assignee
+        # We pin all third-party actions to a full length commit SHA
+        # https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions#using-third-party-actions
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+        with:
+          retries: 5 # retry GitHub API requests up to 5 times, with exponential backoff
+          retry-exempt-status-codes: 404
+          # Don't retry 404 because we will hit a 404 when the PR author is a committer.
+          # This 404 is normal and expected.
+          # Do retry 400 and other 4xx errors because github sometimes (erroneously)
+          # returns a 4xx error code due to server errors.
+          script: |
+            const oldPrAssignees = context.payload.pull_request.assignees
+              .map(obj => obj.login)
+            console.log('oldPrAssignees: ', oldPrAssignees);
+            const prAuthor = context.payload.pull_request.user.login;
+
+            // Check if the PR is opened by a collaborator on the repo, aka someone with write (commit) permissions or higher.
+            const relevantPerms = [
+              // 'triage', // Uncomment this line if you don't want PRs from triagers to get auto-assignees.
+              'push',
+              'maintain',
+              'admin',
+            ]
+            const allCollaboratorsNestedPromises = relevantPerms.map(
+              (perm) => github.paginate(
+                // We use the `/repos/{owner}/{repo}/collaborators` endpoint to avoid needing org scope permissions:
+                '/repos/{owner}/{repo}/collaborators',
+                {
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  per_page: 100,
+                  permission: perm,
+                },
+                (response) => response.data.map((collaboratorInfo) => collaboratorInfo.login),
+              )
+            )
+            const allCollaboratorsNested = await Promise.all(allCollaboratorsNestedPromises);
+            const allCollaboratorsFlattened = allCollaboratorsNested.flat();
+
+            // Skip BumpStdlibs.jl PRs
+            allCollaboratorsFlattened.push('DilumAluthgeBot');
+            // Skip Dependabot PRs
+            allCollaboratorsFlattened.push('dependabot');
+
+            const isCollaborator = allCollaboratorsFlattened.includes(prAuthor);
+
+            console.log('prAuthor: ', prAuthor);
+            console.log('isCollaborator: ', isCollaborator);
+
+            // Load the list of assignable reviewers from the JuliaLang/pr-assignment repo at:
+            // https://github.com/JuliaLang/pr-assignment/blob/main/users.txt
+            //
+            // NOTE to JuliaLang committers: If you want to be assigned to new PRs, please add your
+            // GitHub username to that file.
+
+            // Load file contents
+            const { data: fileContentsObj } = await github.rest.repos.getContent({
+              owner: 'JuliaLang',
+              repo: 'pr-assignment',
+              path: 'users.txt',
+              ref: 'main',
+            });
+
+            const fileContentsBufferObj = Buffer.from(fileContentsObj.content, "base64");
+            const fileContentsText = fileContentsBufferObj.toString("utf8");
+
+            // Find lines that match the following regex, and extract the usernames:
+            const regex = /^@([a-zA-Z0-9\-]+)(\s*?)?(#[\S]*?)?$/;
+            const assigneeCandidates = fileContentsText
+              .split('\n')
+              .map(line => line.trim())
+              .map(line => line.match(regex))
+              .filter(match => match !== null)
+              .map(match => match[1]);
+
+            console.log('assigneeCandidates: ', assigneeCandidates);
+            if (assigneeCandidates.length < 1) {
+              const msg = 'ERROR: Could not find any assigneeCandidates';
+              console.error(msg);
+              throw new Error(msg);
+            }
+
+            if (oldPrAssignees.length >= 1) {
+              console.log('Skipping this PR, because it already has at least one assignee');
+              return;
+            }
+
+
+            const RUNNER_DEBUG_original = process.env.RUNNER_DEBUG;
+            console.log('RUNNER_DEBUG_original: ', RUNNER_DEBUG_original);
+            if (RUNNER_DEBUG_original === undefined) {
+              var thisIsActionsRunnerDebugMode = false;
+            } else {
+              const RUNNER_DEBUG_trimmed = RUNNER_DEBUG_original.trim().toLowerCase()
+              if (RUNNER_DEBUG_trimmed.length < 1) {
+                var thisIsActionsRunnerDebugMode = false;
+              } else {
+                var thisIsActionsRunnerDebugMode = (RUNNER_DEBUG_trimmed == 'true') || (RUNNER_DEBUG_trimmed == '1');
+              }
+            }
+            console.log('thisIsActionsRunnerDebugMode: ', thisIsActionsRunnerDebugMode);
+
+            if (isCollaborator == true) {
+
+              if (thisIsActionsRunnerDebugMode) {
+                // The PR author is a committer
+                // But thisIsActionsRunnerDebugMode is true, so we proceed to still run the rest of the script
+                console.log('PR is authored by JuliaLang committer, but thisIsActionsRunnerDebugMode is true, so we will still run the rest of the script: ', prAuthor);
+              } else {
+                // The PR author is a committer, so we skip assigning them
+                console.log('Skipping PR authored by JuliaLang committer: ', prAuthor);
+                console.log('Note: If you want to run the full script (even though the PR author is a committer), simply re-run this job with Actions debug logging enabled');
+                return;
+              }
+            }
+
+            var weDidEncounterError = false;
+
+            // Assign random committer
+            const selectedAssignee = assigneeCandidates[Math.floor(Math.random()*assigneeCandidates.length)]
+            console.log('selectedAssignee: ', selectedAssignee);
+            console.log(`Attempting to assign @${selectedAssignee} to this PR...`);
+            await github.rest.issues.addAssignees({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.payload.pull_request.number,
+              assignees: selectedAssignee,
+            });
+
+            // The following is commented out because the label only makes sense in the presence of a larger state machine
+            // // Add the "pr review" label
+            // const prReviewLabel = 'status: waiting for PR reviewer';
+            // console.log('Attempting to add prReviewLabel to this PR...');
+            // await github.rest.issues.addLabels({
+            //   owner: context.repo.owner,
+            //   repo: context.repo.repo,
+            //   issue_number: context.payload.pull_request.number,
+            //   labels: [prReviewLabel],
+            // });
+
+            // Now get the updated PR info, and see if we were successful:
+            const updatedPrData = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: context.payload.pull_request.number,
+            });
+            const newPrAssignees = updatedPrData
+              .data
+              .assignees
+              .map(element => element.login)
+            console.log('newPrAssignees: ', newPrAssignees);
+            if (newPrAssignees.includes(selectedAssignee)) {
+              console.log(`Successfully assigned @${selectedAssignee}`);
+            } else {
+              weDidEncounterError = true;
+              console.log(`ERROR: Failed to assign @${selectedAssignee}`);
+            }
+            // const newPrLabels = updatedPrData
+            //   .data
+            //   .labels
+            //   .map(element => element.name)
+            // console.log('newPrLabels: ', newPrLabels);
+            // if (newPrLabels.includes(prReviewLabel)) {
+            //   console.log('Successfully added prReviewLabel');
+            // } else {
+            //   weDidEncounterError = true;
+            //   console.log('ERROR: Failed to add add prReviewLabel');
+            // }
+
+            // Exit with error if any problems were encountered earlier
+            if (weDidEncounterError) {
+              const msg = 'ERROR: Encountered at least one problem while running the script';
+              console.error(msg);
+              throw new Error(msg);
+            }
diff --git a/.github/workflows/Typos.yml b/.github/workflows/Typos.yml
new file mode 100644
index 0000000000000..df97613acfe26
--- /dev/null
+++ b/.github/workflows/Typos.yml
@@ -0,0 +1,70 @@
+name: Typos
+
+permissions: {}
+
+on: [pull_request]
+
+jobs:
+  typos-check:
+    name: Check for new typos
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout the JuliaLang/julia repository
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+        with:
+          persist-credentials: false
+      - name: Check spelling with typos
+        #uses: crate-ci/typos@c7af4712eda24dd1ef54bd8212973888489eb0ce # v1.23.5
+        env:
+          GH_TOKEN: "${{ github.token }}"
+        run: |
+          git fetch --depth=1 origin ${{ github.base_ref }}
+          OLD_FILES=$(git diff-index --name-only --diff-filter=ad FETCH_HEAD)
+          NEW_FILES=$(git diff-index --name-only --diff-filter=d FETCH_HEAD)
+
+          # This is necessary because the typos command interprets the
+          # empty string as "check all files" rather than "check no files".
+          if [ -z "$NEW_FILES" ]; then
+            echo "All edited files were deleted. Skipping typos check."
+            exit 0
+          fi
+
+          mkdir -p "${{ runner.temp }}/typos"
+          RELEASE_ASSET_URL="$(
+            gh api /repos/crate-ci/typos/releases/latest \
+            --jq '."assets"[] | select(."name" | test("^typos-.+-x86_64-unknown-linux-musl\\.tar\\.gz$")) | ."browser_download_url"'
+          )"
+          wget --secure-protocol=TLSv1_3 --max-redirect=1 --retry-on-host-error --retry-connrefused --tries=3 \
+            --quiet --output-document=- "${RELEASE_ASSET_URL}" \
+            | tar -xz -C "${{ runner.temp }}/typos" ./typos
+          "${{ runner.temp }}/typos/typos" --version
+
+          echo -n $NEW_FILES | xargs "${{ runner.temp }}/typos/typos" --format json >> ${{ runner.temp }}/new_typos.jsonl || true
+          git checkout FETCH_HEAD -- $OLD_FILES
+          if [ -z "$OLD_FILES" ]; then
+            touch "${{ runner.temp }}/old_typos.jsonl" # No old files, so no old typos.
+          else
+            echo -n $OLD_FILES | xargs "${{ runner.temp }}/typos/typos" --format json >> ${{ runner.temp }}/old_typos.jsonl || true
+          fi
+
+
+          python -c '
+          import sys, json
+          old = set()
+          with open(sys.argv[1]) as old_file:
+            for line in old_file:
+              j = json.loads(line)
+              if j["type"] == "typo":
+                old.add(j["typo"])
+          clean = True
+          with open(sys.argv[2]) as new_file:
+            for line in new_file:
+              new = json.loads(line)
+              if new["type"] == "typo" and new["typo"] not in old:
+                if len(new["typo"]) > 6: # Short typos might be false positives. Long are probably real.
+                  clean = False
+                print("::warning file={},line={},col={}::perhaps \"{}\" should be \"{}\".".format(
+                  new["path"], new["line_num"], new["byte_offset"],
+                  new["typo"], " or ".join(new["corrections"])))
+          sys.exit(1 if not clean else 0)' "${{ runner.temp }}/old_typos.jsonl" "${{ runner.temp }}/new_typos.jsonl"
diff --git a/.github/workflows/Whitespace.yml b/.github/workflows/Whitespace.yml
new file mode 100644
index 0000000000000..c2ee8b98c742d
--- /dev/null
+++ b/.github/workflows/Whitespace.yml
@@ -0,0 +1,26 @@
+name: Whitespace
+
+permissions: {}
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+jobs:
+  whitespace:
+    name: Check whitespace
+    runs-on: ubuntu-latest
+    timeout-minutes: 2
+    steps:
+      - name: Checkout the JuliaLang/julia repository
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+        with:
+          persist-credentials: false
+      - uses: julia-actions/setup-julia@5c9647d97b78a5debe5164e9eec09d653d29bd71 # v2.6.1
+        with:
+          version: '1.11.6'
+      - name: Check whitespace
+        run: |
+          contrib/check-whitespace.jl
diff --git a/.github/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml
new file mode 100644
index 0000000000000..e47db8400cb6e
--- /dev/null
+++ b/.github/workflows/cffconvert.yml
@@ -0,0 +1,33 @@
+name: cffconvert
+
+on:
+  push:
+    branches:
+      - 'master'
+      - 'release-*'
+    paths:
+      - CITATION.cff
+  pull_request:
+    branches:
+      - 'master'
+      - 'release-*'
+    paths:
+      - CITATION.cff
+
+permissions:
+  contents: read
+
+jobs:
+  validate:
+    name: "validate"
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out a copy of the repository
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+        with:
+          persist-credentials: false
+
+      - name: Check whether the citation metadata from CITATION.cff is valid
+        uses: citation-file-format/cffconvert-github-action@4cf11baa70a673bfdf9dad0acc7ee33b3f4b6084 # 2.0.0
+        with:
+          args: "--validate"
diff --git a/.gitignore b/.gitignore
index f0072fec9c91e..c4df2542005d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,8 +9,12 @@
 /usr-staging
 /Make.user
 /julia-*
+/deps/jlutilities/depot
 /source-dist.tmp
 /source-dist.tmp1
+/test/results_*.json
+/test/results_*.dat
+/test/deps
 
 *.expmap
 *.exe
@@ -34,11 +38,15 @@
 .DS_Store
 .idea/*
 .vscode/*
+.zed/*
 *.heapsnapshot
 .cache
 # Buildkite: Ignore the entire .buildkite directory
 /.buildkite
 
+# Builtkite: json test data
+/test/results.json
+
 # Buildkite: Ignore the unencrypted repo_key
 repo_key
 
diff --git a/.mailmap b/.mailmap
index e91501651d065..e278160d7381b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -284,9 +284,9 @@ Daniel Karrasch <Daniel.Karrasch@gmx.de> <daniel.karrasch@posteo.de>
 Roger Luo <rogerluo.rl18@gmail.com> <rogerluo.rl18@gmail.com>
 Roger Luo <rogerluo.rl18@gmail.com> <hiroger@qq.com>
 
-Frames Catherine White <me@oxinabox.net> <oxinabox@ucc.asn.au>
-Frames Catherine White <me@oxinabox.net> <lyndon.white@invenialabs.co.uk>
-Frames Catherine White <me@oxinabox.net> <lyndon.white@research.uwa.edu.au>
+Frames White <me@oxinabox.net> <oxinabox@ucc.asn.au>
+Frames White <me@oxinabox.net> <lyndon.white@invenialabs.co.uk>
+Frames White <me@oxinabox.net> <lyndon.white@research.uwa.edu.au>
 
 Claire Foster <aka.c42f@gmail.com> <chris42f@gmail.com>
 
@@ -295,3 +295,16 @@ Jishnu Bhattacharya <jishnub.github@gmail.com> <jishnub@users.noreply.github.com
 
 Shuhei Kadowaki <aviatesk@gmail.com> <aviatesk@gmail.com>
 Shuhei Kadowaki <aviatesk@gmail.com> <40514306+aviatesk@users.noreply.github.com>
+
+inky <git@wo-class.cn>
+inky <git@wo-class.cn> <inkydragon@users.noreply.github.com>
+
+Lilith Orion Hafner <lilithhafner@gmail.com> <Lilith.Hafner@gmail.com>
+Lilith Orion Hafner <lilithhafner@gmail.com> <60898866+LilithHafner@users.noreply.github.com>
+
+Timothy <git@tecosaur.net>
+
+Bhuminjay Soni <soni5happy@gmail.com>
+Bhuminjay Soni <soni5happy@gmail.com> <76656712+11happy@users.noreply.github.com>
+
+Florian Atteneder <florian.atteneder@gmail.com>
diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
new file mode 100644
index 0000000000000..dd4cd5eaa65ed
--- /dev/null
+++ b/.vscode/c_cpp_properties.json
@@ -0,0 +1,20 @@
+{
+    "configurations": [
+        {
+            "name": "Julia",
+            "includePath": [
+                "${workspaceFolder}/src/**",
+                "${workspaceFolder}/usr/include/**"
+            ],
+            "cStandard": "c11",
+            "cppStandard": "c++17",
+            "compileCommands": [
+                "${workspaceFolder}/src/compile_commands.json",
+                "${workspaceFolder}/src/flisp/compile_commands.json",
+                "${workspaceFolder}/src/support/compile_commands.json"
+            ]
+        }
+    ],
+    "version": 4,
+    "enableConfigurationSquiggles": true
+}
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000000000..d81defa16f136
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,121 @@
+# Information for AI agents
+
+## Module Organization
+- `base/` - Core standard library (loaded at startup)
+- `stdlib/` - Standard library packages (can be loaded independently)
+- `Compiler/` - Julia compiler as a separate module (can be swapped)
+- `src/` - C/C++ runtime and LLVM codegen
+- `cli/` - Command-line interface and loader
+- `doc/` - Documentation and User Manual
+
+## Running Julia
+
+You should have a recent binary copy of julia in your `$HOME/.juliaup/bin` directory.
+You may use this julia executable for validation.
+If a built version of Julia exists in the current source tree (at `usr/bin/julia`),
+prefer that version.
+Note that any changes you make to the source code after the binary is built
+will not be reflected, unless you use `Revise`.
+
+## For all changes
+
+1. Run `make fix-whitespace` before creating the PR to make sure you're not committing any whitespace errors.
+
+## Building Julia
+
+If you made changes to the runtime (any files in `src/`), you will need to rebuild
+julia. Run `make -j` to rebuild julia. This process may take up to 10 minutes
+depending on your changes.
+
+After making changes, run static analysis checks:
+  - First run `make -C src install-analysis-deps` to initialize dependencies (only needed once the first time).
+  - Run `make -C src analyze-<filename> --output-sync -j8` (replace `<filename>` with the basename of any C or C++ file you modified, excluding headers).
+  - Tests can also be rerun individually with `clang-sa-<filename>`, `clang-sagc-<filename>` or `clang-tidy-<filename>`.
+  - If `clang-sagc-<filename>` fails, it may require adding `JL_GC_PUSH` statements, or `JL_GC_PROMISE_ROOTED` statements., or require fixing locks. Remember arguments are assumed rooted, so check the callers to make sure that is handled. If the value is being temporarily moved around in a struct or arraylist, `JL_GC_PROMISE_ROOTED(struct->field)` may be needed as a statement (it return void) immediately after reloading the struct before any use of struct. Put that promise as early in the code as is legal, near the definition not the use.
+
+## Using Revise
+
+If you have made changes to files included in the system image (base/ or stdlib/),
+and need to run code with these changes included, you can use `Revise`.
+To do so, run `using Revise; Revise.track(Base)` (or Revise.track with the stdlib you modified).
+The test system supports doing this automatically (see below).
+
+For instance testing Base changes without rebuilding, using failfast, you can run:
+```
+JULIA_TEST_FAILFAST=1 ./julia -e 'using Revise; Revise.track(Base); include("test.jl")'
+```
+
+## Specific instructions for particular changes
+
+### Doctests
+
+#### Writing doctests
+
+If you are asked to write new doctests, first review `doc/src/devdocs/contributing/jldoctests.md`
+for best practices.
+
+#### Verifying doctests
+If you have changed any `jldoctest` code blocks you should take
+the following steps to verify your work:
+- Review `doc/src/devdocs/contributing/jldoctests.md`. In particular, determine
+  if any of the changed doctests require filters, labels or setup code.
+- Run the doctests to verify that your change works:
+    - To run doctest with the pre-built juliaup: `make -C doc doctest=true  revise=true JULIA_EXECUTABLE=$HOME/.juliaup/bin/julia`
+    - To run doctest with in-trr julia (preferred): `make -C doc doctest=true revise=true`. Do not pass any other options.
+    - IMPORTANT: The doctests may take up to 15 minutes. Do NOT terminate the doctests before completion. Do NOT use a timeout for doctests.
+    - If you are ChatGPT, you may have to increase yield_timeout_ms.
+
+Follow these steps for EVERY change you make in a doctest.
+
+### Test changes
+
+If you have changed a test (e.g. `foo`), you should run `make test-revise-foo` for the
+corresponding test to ensure that the test is still passing with your changes.
+- If you are adding a new test, add it to an existing test file. Do not create a new test file unless explicitly instructed.
+- Write one comment at the top of the test to explain what is being tested.
+  Otherwise keep comments minimal.
+- Use the environment variable `JULIA_TEST_FAILFAST=1` to make tests fail fast.
+
+### External dependencies
+
+When modifying external dependencies (patches in `deps/patches/` or version updates in `deps/`):
+
+1. Always test builds with `USE_BINARYBUILDER=0` to ensure source builds work correctly
+2. For patches to external libraries:
+   - Verify the patch applies cleanly by running the extraction and patch steps
+   - Test the full build of the dependency: `make -C deps USE_BINARYBUILDER=0 compile-<depname>`
+   - Prefer using the full upstream commit in `git am` format (e.g., `git format-patch`) which includes proper commit metadata
+3. When updating dependency versions, ensure all associated patches still apply
+
+### External JLLs
+
+To update a JLL to the latest version:
+- Update the version number in the appropriate jll folder
+- If the dependencies in the upstream jll changed, update the Project.toml
+- Run `make -f contrib/refresh_checksums.mk <jll>` to update the checksums. This may take a few minutes.
+
+### Writing code
+After writing code, look up the docstring for each function you used. If there
+are recommendations or additional considerations that apply to these functions,
+make sure to take them into account.
+
+#### Specific instructions
+- Do not `ccall` runtime C functions directly if there are existing wrappers for the function.
+- Do not explicitly add a module prefix if the code you're adding is in the same module. E.g. do not use `Base.` for code in Base unless required.
+
+## Commit messages and pull requests
+
+When writing commit messages, follow the format "component: Brief summary" for
+the title. In the body of the commit message, provide a brief prose summary
+of the purpose of the changes made. Do not specifically mention added tests, comments,
+documentation, etc., unless this is the main purpose of the change. Do not mention
+the test plan, unless it differs from what you were instructed to do in AGENTS.md.
+If your change fixes one or more issues, use the syntax "Fixes #" at the end of the commit message, but do not include it in the title.
+
+When referencing external GitHub PRs or issues, use proper GitHub interlinking format (e.g., `owner/repo#123` for PRs/issues).
+When fixing CI failures, include the link to the specific CI failure in the commit message.
+
+When creating pull requests:
+1. If the pull request consists of one commit only, use the body of the commit for the body of the pull request.
+2. If there are multiple commits in the pull request, follow the same guidelines for the pull request as for the commit body.
+3. Make sure that the base commit of the pull request is recent (within the past two days) - if not rebase your changes first.
diff --git a/CITATION.cff b/CITATION.cff
index c88727bcfa311..878ab94a4d86a 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,3 +1,4 @@
+# Official format description at https://citation-file-format.github.io
 cff-version: 1.2.0
 message: "Cite this paper whenever you use Julia"
 authors:
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 0000000000000..47dc3e3d863cf
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0131dcbc4a278..36ec53c6a181d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,7 +8,9 @@ If you are already familiar with Julia itself, this blog post by Katharine Hyatt
 
 [The learning page](https://julialang.org/learning) has a great list of resources for new and experienced users alike.
 
-## Before filing an issue
+## Filing an issue
+
+### Before filing an issue
 
 - Reporting a potential bug? Please read the "[How to file a bug report](https://github.com/JuliaLang/julia/blob/master/CONTRIBUTING.md#how-to-file-a-bug-report)" section to make sure that all necessary information is included.
 
@@ -16,25 +18,7 @@ If you are already familiar with Julia itself, this blog post by Katharine Hyatt
 
 - Library feature requests are generally not accepted on this issue tracker. New libraries should be developed as [packages](https://julialang.github.io/Pkg.jl/v1/creating-packages/). Discuss ideas for libraries at the [Julia Discourse forum](https://discourse.julialang.org). Doing so will often lead to pointers to existing projects and bring together collaborators with common interests.
 
-## Contributor Checklist
-
-* Create a [GitHub account](https://github.com/signup/free).
-
-* [Fork Julia](https://github.com/JuliaLang/julia/fork).
-
-* Build the software and libraries (the first time takes a while, but it's fast after that). Detailed build instructions are in the [README](https://github.com/JuliaLang/julia/tree/master/README.md). Julia depends on several external packages; most are automatically downloaded and installed, but are less frequently updated than Julia itself.
-
-* Keep Julia current. Julia is a fast-moving target, and many details of the language are still settling out. Keep the repository up-to-date and rebase work-in-progress frequently to make merges simpler.
-
-* Learn to use [git](https://git-scm.com), the version control system used by GitHub and the Julia project. Try a tutorial such as the one [provided by GitHub](https://try.GitHub.io/levels/1/challenges/1).
-
-* Review discussions on the [Julia Discourse forum](https://discourse.julialang.org).
-
-* For more detailed tips, read the [submission guide](https://github.com/JuliaLang/julia/blob/master/CONTRIBUTING.md#submitting-contributions) below.
-
-* Relax and have fun!
-
-## How to file a bug report
+### How to file a bug report
 
 A useful bug report filed as a GitHub issue provides information about how to reproduce the error.
 
@@ -56,312 +40,34 @@ A useful bug report filed as a GitHub issue provides information about how to re
 
 ## Submitting contributions
 
-### Writing tests
-
-There are never enough tests. Track [code coverage at Codecov](https://codecov.io/github/JuliaLang/julia), and help improve it.
-
-1. Go visit https://codecov.io/github/JuliaLang/julia.
-
-2. Browse through the source files and find some untested functionality (highlighted in red) that you think you might be able to write a test for.
-
-3. Write a test that exercises this functionality---you can add your test to one of the existing files, or start a new one, whichever seems most appropriate to you. If you're adding a new test file, make sure you include it in the list of tests in `test/choosetests.jl`. https://docs.julialang.org/en/v1/stdlib/Test/ may be helpful in explaining how the testing infrastructure works.
-
-4. Run `make test-all` to rebuild Julia and run your new test(s). If you had to fix a bug or add functionality in `base`, this will ensure that your test passes and that you have not introduced extraneous whitespace.
-
-5. Submit the test as a pull request (PR).
-
-* Code for the buildbot configuration is maintained at: https://github.com/staticfloat/julia-buildbot
-* You can see the current buildbot setup at: https://build.julialang.org/builders
-* [Issue 9493](https://github.com/JuliaLang/julia/issues/9493) and [issue 11885](https://github.com/JuliaLang/julia/issues/11885) have more detailed discussion on code coverage.
-
-Code coverage shows functionality that still needs "proof of concept" tests. These are important, as are tests for tricky edge cases, such as converting between integer types when the number to convert is near the maximum of the range of one of the integer types. Even if a function already has some coverage on Codecov, it may still benefit from tests for edge cases.
-
-### Improving documentation
-
-*By contributing documentation to Julia, you are agreeing to release it under the [MIT License](https://github.com/JuliaLang/julia/tree/master/LICENSE.md).*
-
-Julia's documentation source files are stored in the `doc/` directory and all docstrings are found in `base/`. Like everything else these can be modified using `git`. Documentation is built with [Documenter.jl](https://github.com/JuliaDocs/Documenter.jl), which uses Markdown syntax. The HTML documentation can be built locally by running
-
-```
-make docs
-```
-
-from Julia's root directory. This will rebuild the Julia system image, then install or update the package dependencies required to build the documentation, and finally build the HTML documentation and place the resulting files in `doc/_build/html/`.
-
-> **Note**
->
-> When making changes to any of Julia's documentation it is recommended that you run `make docs` to check that your changes are valid and do not produce any errors before opening a pull request.
-
-Below are outlined the three most common types of documentation changes and the steps required to perform them. Please note that the following instructions do not cover the full range of features provided by Documenter.jl. Refer to [Documenter's documentation](https://juliadocs.github.io/Documenter.jl/stable) if you encounter anything that is not covered by the sections below.
-
-#### Modifying files in `doc/src/`
-
-Most of the source text for the Julia Manual is located in `doc/src/`. To update or add new text to any one of the existing files the following steps should be followed:
-
-1. update the text in whichever `.md` files are applicable;
-2. run `make docs` from the root directory;
-3. check the output in `doc/_build/html/` to make sure the changes are correct;
-4. commit your changes and open a pull request.
-
-> **Note**
->
-> The contents of `doc/_build/` does **not** need to be committed when you make changes.
-
-To add a **new file** to `doc/src/` rather than updating a file replace step `1` above with
-
-1. add the file to the appropriate subdirectory in `doc/src/` and also add the file path to the `PAGES` vector in `doc/make.jl`.
-
-#### Modifying an existing docstring in `base/`
-
-All docstrings are written inline above the methods or types they are associated with and can be found by clicking on the `source` link that appears below each docstring in the HTML file. The steps needed to make a change to an existing docstring are listed below:
-
-1. find the docstring in `base/`;
-2. update the text in the docstring;
-3. run `make docs` from the root directory;
-4. check the output in `doc/_build/html/` to make sure the changes are correct;
-5. commit your changes and open a pull request.
-
-#### Adding a new docstring to `base/`
-
-The steps required to add a new docstring are listed below:
-
-1. find a suitable definition in `base/` that the docstring will be most applicable to;
-2. add a docstring above the definition;
-3. find a suitable `@docs` code block in one of the `doc/src/stdlib/` files where you would like the docstring to appear;
-4. add the name of the definition to the `@docs` code block. For example, with a docstring added to a function `bar`
-
-    ```julia
-    "..."
-    function bar(args...)
-        # ...
-    end
-    ```
-
-   you would add the name `bar` to a `@docs` block in `doc/src/stdlib/`
-
-        ```@docs
-        foo
-        bar # <-- Added this one.
-        baz
-        ```
-
-5. run `make docs` from the root directory;
-6. check the output in `doc/_build/html` to make sure the changes are correct;
-7. commit your changes and open a pull request.
-
-#### Doctests
+### Contributor Checklist
 
-Examples written within docstrings can be used as testcases known as "doctests" by annotating code blocks with `jldoctest`.
-
-    ```jldoctest
-    julia> uppercase("Docstring test")
-    "DOCSTRING TEST"
-    ```
-
-A doctest needs to match an interactive REPL including the `julia>` prompt. It is recommended to add the header `# Examples` above the doctests.
-
-To run doctests you need to run `make -C doc doctest=true` from the root directory. You can use `make -C doc doctest=true revise=true` if you are modifying the doctests and don't want to rebuild Julia after each change (see details below about the Revise.jl workflow).
-
-#### News-worthy changes
-
-For new functionality and other substantial changes, add a brief summary to `NEWS.md`. The news item should cross reference the pull request (PR) parenthetically, in the form `([#pr])`. To add the PR reference number, first create the PR, then push an additional commit updating `NEWS.md` with the PR reference number.  We periodically run `./julia doc/NEWS-update.jl` from the julia directory to update the cross-reference links, but this should not be done in a typical PR in order to avoid conflicting commits.
-
-#### Annotations for new features, deprecations and behavior changes
-
-API additions and deprecations, and minor behavior changes are allowed in minor version releases.
-For documented features that are part of the public API, a compatibility note should be added into
-the manual or the docstring. It should state the Julia minor version that changed the behavior
-and have a brief message describing the change.
-
-At the moment, this should always be done with the following `compat` admonition
-(so that it would be possible to programmatically find the annotations in the future):
-
-  ```
-  !!! compat "Julia 1.X"
-      This method was added in Julia 1.X.
-  ```
-
-### Contributing to core functionality or base libraries
-
-*By contributing code to Julia, you are agreeing to release it under the [MIT License](https://github.com/JuliaLang/julia/tree/master/LICENSE.md).*
-
-The Julia community uses [GitHub issues](https://github.com/JuliaLang/julia/issues) to track and discuss problems, feature requests, and pull requests (PR).
-
-Issues and pull requests should have self explanatory titles such that they can be understood from the list of PRs and Issues.
-i.e. `Add {feature}` and `Fix {bug}` are good, `Fix #12345. Corrects the bug.` is bad.
-
-You can make pull requests for incomplete features to get code review. The convention is to open these a draft PRs and prefix
-the pull request title with "WIP:" for Work In Progress, or "RFC:" for Request for Comments when work is completed and ready
-for merging. This will prevent accidental merging of work that is in progress.
-
-Note: These instructions are for adding to or improving functionality in the base library. Before getting started, it can be helpful to discuss the proposed changes or additions on the [Julia Discourse forum](https://discourse.julialang.org) or in a GitHub issue---it's possible your proposed change belongs in a package rather than the core language. Also, keep in mind that changing stuff in the base can potentially break a lot of things. Finally, because of the time required to build Julia, note that it's usually faster to develop your code in stand-alone files, get it working, and then migrate it into the base libraries.
-
-Add new code to Julia's base libraries as follows (this is the "basic" approach; see a more efficient approach in the next section):
-
- 1. Edit the appropriate file in the `base/` directory, or add new files if necessary. Create tests for your functionality and add them to files in the `test/` directory. If you're editing C or Scheme code, most likely it lives in `src/` or one of its subdirectories, although some aspects of Julia's REPL initialization live in `cli/`.
-
- 2. Add any new files to `sysimg.jl` in order to build them into the Julia system image.
-
- 3. Add any necessary export symbols in `exports.jl`.
-
- 4. Include your tests in `test/Makefile` and `test/choosetests.jl`.
-
-Build as usual, and do `make clean testall` to test your contribution. If your contribution includes changes to Makefiles or external dependencies, make sure you can build Julia from a clean tree using `git clean -fdx` or equivalent (be careful – this command will delete any files lying around that aren't checked into git).
-
-#### Running specific tests
-There are `make` targets for running specific tests:
-
-    make test-bitarray
-
-You can also use the `runtests.jl` script, e.g. to run `test/bitarray.jl` and `test/math.jl`:
-
-    ./usr/bin/julia test/runtests.jl bitarray math
-
-#### Modifying base more efficiently with Revise.jl
-
-[Revise](https://github.com/timholy/Revise.jl) is a package that
-tracks changes in source files and automatically updates function
-definitions in your running Julia session. Using it, you can make
-extensive changes to Base without needing to rebuild in order to test
-your changes.
-
-Here is the standard procedure:
-
-1. If you are planning changes to any types or macros, make those
-   changes and build julia using `make`. (This is
-   necessary because `Revise` cannot handle changes to type
-   definitions or macros.) Unless it's
-   required to get Julia to build, you do not have to add any
-   functionality based on the new types, just the type definitions
-   themselves.
-
-2. Start a Julia REPL session. Then issue the following commands:
-
-```julia
-using Revise    # if you aren't launching it in your `.julia/config/startup.jl`
-Revise.track(Base)
-```
-
-3. Edit files in `base/`, save your edits, and test the
-   functionality.
-
-If you need to restart your Julia session, just start at step 2 above.
-`Revise.track(Base)` will note any changes from when Julia was last
-built and incorporate them automatically. You only need to rebuild
-Julia if you made code-changes that Revise cannot handle.
-
-For convenience, there are also `test-revise-*` targets for every [`test-*`
-target](https://github.com/JuliaLang/julia/blob/master/CONTRIBUTING.md#running-specific-tests) that use Revise to load any modifications to Base into the current
-system image before running the corresponding test. This can be useful as a shortcut
-on the command line (since tests aren't always designed to be run outside the
-runtest harness).
-
-### Contributing to the standard library
-
-The standard library (stdlib) packages are baked into the Julia system image.
-When running the ordinary test workflow on the stdlib packages, the system image
-version overrides the version you are developing.
-To test stdlib packages, you can do the following steps:
-
-1. Edit the UUID field of the `Project.toml` in the stdlib package
-2. Change the current directory to the directory of the stdlib you are developing
-3. Start julia with `julia --project=.`
-4. You can now test the package by running `pkg> test` in Pkg mode.
-
-Because you changed the UUID, the package manager treats the stdlib package as
-different from the one in the system image, and the system image version will
-not override the package.
-
-Be sure to change the UUID value back before making the pull request.
-
-### Contributing to patch releases
-
-The process of [creating a patch release](https://docs.julialang.org/en/v1/devdocs/build/distributing/#Point-releasing-101) is roughly as follows:
+* Create a [GitHub account](https://github.com/signup/free).
 
-1. Create a new branch (e.g. `backports-release-1.6`) against the relevant minor release
-   branch (e.g. `release-1.6`). Usually a corresponding pull request is created as well.
+* [Fork Julia](https://github.com/JuliaLang/julia/fork).
 
-2. Add commits, nominally from `master` (hence "backports"), to that branch.
-   See below for more information on this process.
+* Build the software and libraries (the first time takes a while, but it's fast after that). Detailed build instructions are in the [README](https://github.com/JuliaLang/julia/tree/master/README.md). Julia depends on several external packages; most are automatically downloaded and installed, but are less frequently updated than Julia itself.
 
-3. Run the [BaseBenchmarks.jl](https://github.com/JuliaCI/BaseBenchmarks.jl) benchmark
-   suite and [PkgEval.jl](https://github.com/JuliaCI/PkgEval.jl) package ecosystem
-   exerciser against that branch. Nominally BaseBenchmarks.jl and PkgEval.jl are
-   invoked via [Nanosoldier.jl](https://github.com/JuliaCI/Nanosoldier.jl) from
-   the pull request associated with the backports branch. Fix any issues.
+* Keep Julia current. Julia is a fast-moving target, and many details of the language are still settling out. Keep the repository up-to-date and rebase work-in-progress frequently to make merges simpler.
 
-4. Once all test and benchmark reports look good, merge the backports branch into
-   the corresponding release branch (e.g. merge `backports-release-1.6` into
-   `release-1.6`).
+* Learn to use [git](https://git-scm.com), the version control system used by GitHub and the Julia project. Try a tutorial such as the one [provided by GitHub](https://try.GitHub.io/levels/1/challenges/1).
 
-5. Open a pull request that bumps the version of the relevant minor release to the
-   next patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37718).
+* Review discussions on the [Julia Discourse forum](https://discourse.julialang.org).
 
-6. Ping `@JuliaLang/releases` to tag the patch release and update the website.
+* If your pull request contains substantial contributions from a generative AI tool, please disclose so with details, and review all changes before opening.
 
-7. Open a pull request that bumps the version of the relevant minor release to the
-   next prerelease patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37724).
+* Relax and have fun!
 
-Step 2 above, i.e. backporting commits to the `backports-release-X.Y` branch, has largely
-been automated via [`Backporter`](https://github.com/KristofferC/Backporter): Backporter
-searches for merged pull requests with the relevant `backport-X.Y` tag, and attempts to
-cherry-pick the commits from those pull requests onto the `backports-release-X.Y` branch.
-Some commits apply successfully without intervention, others not so much. The latter
-commits require "manual" backporting, with which help is generally much appreciated.
-Backporter generates a report identifying those commits it managed to backport automatically
-and those that require manual backporting; this report is usually copied into the first
-post of the pull request associated with `backports-release-X.Y` and maintained as
-additional commits are automatically and/or manually backported.
+### Guidance for specific changes
 
-When contributing a manual backport, if you have the necessary permissions, please push the
-backport directly to the `backports-release-X.Y` branch. If you lack the relevant
-permissions, please open a pull request against the `backports-release-X.Y` branch with the
-manual backport. Once the manual backport is live on the `backports-release-X.Y` branch,
-please remove the `backport-X.Y` tag from the originating pull request for the commits.
-
-### Code Formatting Guidelines
-
-#### General Formatting Guidelines for Julia code contributions
-
- - 4 spaces per indentation level, no tabs
- - use whitespace to make the code more readable
- - no whitespace at the end of a line (trailing whitespace)
- - comments are good, especially when they explain the algorithm
- - try to adhere to a 92 character line length limit
- - use upper camel case convention for modules, type names
- - use lower case with underscores for method names
- - it is generally preferred to use ASCII operators and identifiers over
-   Unicode equivalents whenever possible
- - in docstrings refer to the language as "Julia" and the executable as "`julia`"
-
-#### General Formatting Guidelines For C code contributions
-
- - 4 spaces per indentation level, no tabs
- - space between `if` and `(` (`if (x) ...`)
- - newline before opening `{` in function definitions
- - `f(void)` for 0-argument function declarations
- - newline between `}` and `else` instead of `} else {`
- - if one part of an `if..else` chain uses `{ }` then all should
- - no whitespace at the end of a line
-
-### Git Recommendations For Pull Requests
-
- - Avoid working from the `master` branch of your fork, creating a new branch will make it easier if Julia's `master` changes and you need to update your pull request.
- - Try to [squash](http://gitready.com/advanced/2009/02/10/squashing-commits-with-rebase.html) together small commits that make repeated changes to the same section of code so your pull request is easier to review. A reasonable number of separate well-factored commits is fine, especially for larger changes.
- - If any conflicts arise due to changes in Julia's `master`, prefer updating your pull request branch with `git rebase` versus `git merge` or `git pull`, since the latter will introduce merge commits that clutter the git history with noise that makes your changes more difficult to review.
- - Descriptive commit messages are good.
- - Using `git add -p` or `git add -i` can be useful to avoid accidentally committing unrelated changes.
- - When linking to specific lines of code in discussion of an issue or pull request, hit the `y` key while viewing code on GitHub to reload the page with a URL that includes the specific version that you're viewing. That way any lines of code that you refer to will still make sense in the future, even if the content of the file changes.
- - Whitespace can be automatically removed from existing commits with `git rebase`.
-   - To remove whitespace for the previous commit, run
-     `git rebase --whitespace=fix HEAD~1`.
-   - To remove whitespace relative to the `master` branch, run
-     `git rebase --whitespace=fix master`.
-
-#### Git Recommendations For Pull Request Reviewers
-
-- When merging, we generally like `squash+merge`. Unless it is the rare case of a PR with carefully staged individual commits that you want in the history separately, in which case `merge` is acceptable, but usually prefer `squash+merge`.
+The julia project maintains a more in-depth `Contributor's Guide` as part of our
+developer documentation. Here you can find more in-depth guidance for how to write
+specific kinds of changes. In particular, you want want to read:
 
+- [How to contribute code changes](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/contributing/code-changes.md)
+- [How to contribute additional tests](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/contributing/tests.md)
+- [How to work on documentation](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/contributing/documentation.md)
+- [Workflow tips for working with git](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/contributing/git-workflow.md)
 
 ## Resources
 
diff --git a/Compiler/.gitignore b/Compiler/.gitignore
new file mode 100644
index 0000000000000..ba39cc531edeb
--- /dev/null
+++ b/Compiler/.gitignore
@@ -0,0 +1 @@
+Manifest.toml
diff --git a/Compiler/LICENSE.md b/Compiler/LICENSE.md
new file mode 100644
index 0000000000000..dbbcd7506fc1e
--- /dev/null
+++ b/Compiler/LICENSE.md
@@ -0,0 +1,26 @@
+MIT License
+
+Copyright (c) 2009-2025: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+end of terms and conditions
+
+Please see [THIRDPARTY.md](../THIRDPARTY.md) for license information for other software used in this project.
diff --git a/Compiler/Project.toml b/Compiler/Project.toml
new file mode 100644
index 0000000000000..1a0cdf4abca39
--- /dev/null
+++ b/Compiler/Project.toml
@@ -0,0 +1,15 @@
+name = "Compiler"
+uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
+version = "0.1.1"
+
+[compat]
+julia = "1.10"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[targets]
+test = ["Test", "InteractiveUtils", "Random", "Libdl"]
diff --git a/Compiler/README.md b/Compiler/README.md
new file mode 100644
index 0000000000000..ae5aaa3f60792
--- /dev/null
+++ b/Compiler/README.md
@@ -0,0 +1,45 @@
+# The `Compiler` module
+
+This directory maintains the implementation of the Julia compiler.
+
+Through a bootstrapping process, it is bundled into the Julia runtime as `Base.Compiler`.
+
+You can also use this `Compiler` module as the `Compiler` standard library by following the steps below.
+
+## How to use
+
+To utilize this `Compiler.jl` standard library, you need to declare it as a dependency in
+your `Project.toml` as follows:
+> Project.toml
+```toml
+[deps]
+Compiler = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
+
+[compat]
+Compiler = "0.1"
+```
+
+With the setup above, [the special placeholder version (v0.1)](https://github.com/JuliaLang/BaseCompiler.jl)
+will be installed by default.[^1]
+
+[^1]: Currently, only version v0.1 is registered in the [General](https://github.com/JuliaRegistries/General) registry.
+
+If needed, you can switch to a custom implementation of the `Compiler` module by running
+```julia-repl
+pkg> dev /path/to/Compiler.jl # to use a local implementation
+```
+or
+```julia-repl
+pkg> add https://url/of/Compiler/branch # to use a remote implementation
+```
+This feature is particularly useful for developing or experimenting with alternative compiler implementations.
+
+> [!note]
+> The Compiler.jl standard library is available starting from Julia v1.10.
+> However, switching to a custom compiler implementation is supported only from
+> Julia v1.12 onwards.
+
+> [!warning]
+> When using a custom, non-`Base` version of `Compiler` implementation, it may be necessary
+> to run `InteractiveUtils.@activate Compiler` to ensure proper functionality of certain
+> reflection utilities.
diff --git a/Compiler/extras/CompilerDevTools/Manifest.toml b/Compiler/extras/CompilerDevTools/Manifest.toml
new file mode 100644
index 0000000000000..bcc78f1ded34a
--- /dev/null
+++ b/Compiler/extras/CompilerDevTools/Manifest.toml
@@ -0,0 +1,15 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "84f495a1bf065c95f732a48af36dd0cd2cefb9d5"
+
+[[deps.Compiler]]
+path = "../.."
+uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
+version = "0.0.2"
+
+[[deps.CompilerDevTools]]
+path = "."
+uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f"
+version = "0.0.0"
diff --git a/Compiler/extras/CompilerDevTools/Project.toml b/Compiler/extras/CompilerDevTools/Project.toml
new file mode 100644
index 0000000000000..a2749a9a56a84
--- /dev/null
+++ b/Compiler/extras/CompilerDevTools/Project.toml
@@ -0,0 +1,5 @@
+name = "CompilerDevTools"
+uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f"
+
+[deps]
+Compiler = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
diff --git a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl
new file mode 100644
index 0000000000000..ddf202f378fb5
--- /dev/null
+++ b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl
@@ -0,0 +1,81 @@
+module CompilerDevTools
+
+using Compiler
+using Compiler: argextype, widenconst
+using Core.IR
+using Base: isexpr
+
+mutable struct SplitCacheOwner end
+
+struct SplitCacheInterp <: Compiler.AbstractInterpreter
+    world::UInt
+    owner::SplitCacheOwner
+    inf_params::Compiler.InferenceParams
+    opt_params::Compiler.OptimizationParams
+    inf_cache::Vector{Compiler.InferenceResult}
+    codegen_cache::IdDict{CodeInstance,CodeInfo}
+    function SplitCacheInterp(;
+        world::UInt = Base.get_world_counter(),
+        owner::SplitCacheOwner = SplitCacheOwner(),
+        inf_params::Compiler.InferenceParams = Compiler.InferenceParams(),
+        opt_params::Compiler.OptimizationParams = Compiler.OptimizationParams(),
+        inf_cache::Vector{Compiler.InferenceResult} = Compiler.InferenceResult[])
+        new(world, owner, inf_params, opt_params, inf_cache, IdDict{CodeInstance,CodeInfo}())
+    end
+end
+
+Compiler.InferenceParams(interp::SplitCacheInterp) = interp.inf_params
+Compiler.OptimizationParams(interp::SplitCacheInterp) = interp.opt_params
+Compiler.get_inference_world(interp::SplitCacheInterp) = interp.world
+Compiler.get_inference_cache(interp::SplitCacheInterp) = interp.inf_cache
+Compiler.cache_owner(interp::SplitCacheInterp) = interp.owner
+Compiler.codegen_cache(interp::SplitCacheInterp) = interp.codegen_cache
+
+import Core.OptimizedGenerics.CompilerPlugins: typeinf, typeinf_edge
+@eval @noinline typeinf(owner::SplitCacheOwner, mi::MethodInstance, source_mode::UInt8) =
+    Base.invoke_in_world(which(typeinf, Tuple{SplitCacheOwner, MethodInstance, UInt8}).primary_world, Compiler.typeinf_ext_toplevel, SplitCacheInterp(; world=Base.tls_world_age(), owner), mi, source_mode)
+
+@eval @noinline function typeinf_edge(owner::SplitCacheOwner, mi::MethodInstance, parent_frame::Compiler.InferenceState, world::UInt, source_mode::UInt8)
+    # TODO: This isn't quite right, we're just sketching things for now
+    interp = SplitCacheInterp(; world, owner)
+    Compiler.typeinf_edge(interp, mi.def, mi.specTypes, Core.svec(), parent_frame, false, false)
+end
+
+function lookup_method_instance(f, args...)
+    @ccall jl_method_lookup(Any[f, args...]::Ptr{Any}, (1+length(args))::Csize_t, Base.tls_world_age()::Csize_t)::Ref{Core.MethodInstance}
+end
+
+function Compiler.transform_result_for_cache(interp::SplitCacheInterp, result::Compiler.InferenceResult, edges::Compiler.SimpleVector)
+    opt = result.src::Compiler.OptimizationState
+    ir = opt.optresult.ir::Compiler.IRCode
+    override = with_new_compiler
+    for inst in ir.stmts
+        stmt = inst[:stmt]
+        isexpr(stmt, :call) || continue
+        f = stmt.args[1]
+        f === override && continue
+        T = widenconst(argextype(f, ir))
+        T <: Core.Builtin && continue
+        insert!(stmt.args, 1, override)
+        insert!(stmt.args, 3, interp.owner)
+    end
+    @invoke Compiler.transform_result_for_cache(interp::Compiler.AbstractInterpreter, result::Compiler.InferenceResult, edges::Compiler.SimpleVector)
+end
+
+with_new_compiler(f, args...; owner::SplitCacheOwner = SplitCacheOwner()) = with_new_compiler(f, owner, args...)
+
+function with_new_compiler(f, owner::SplitCacheOwner, args...)
+    # We try to avoid introducing `with_new_compiler` in the first place,
+    # but if we can't see the type, it's still possible to end up with a
+    # builtin here - simply forward to the ordinary builtin call.
+    isa(f, Core.Builtin) && return f(args...)
+    mi = lookup_method_instance(f, args...)
+    new_compiler_ci = Core.OptimizedGenerics.CompilerPlugins.typeinf(
+        owner, mi, Compiler.SOURCE_MODE_ABI
+    )
+    invoke(f, new_compiler_ci, args...)
+end
+
+export with_new_compiler
+
+end
diff --git a/Compiler/extras/CompilerDevTools/test/runtests.jl b/Compiler/extras/CompilerDevTools/test/runtests.jl
new file mode 100644
index 0000000000000..89dc4696d9e1c
--- /dev/null
+++ b/Compiler/extras/CompilerDevTools/test/runtests.jl
@@ -0,0 +1,24 @@
+using Test
+using Compiler: code_cache
+using Base: inferencebarrier
+using CompilerDevTools
+using CompilerDevTools: lookup_method_instance, SplitCacheInterp
+
+@testset "CompilerDevTools" begin
+  do_work(x, y) = x + y
+  f1() = do_work(inferencebarrier(1), inferencebarrier(2))
+  interp = SplitCacheInterp()
+  cache = code_cache(interp)
+  mi = lookup_method_instance(f1)
+  @test !haskey(cache, mi)
+  @test with_new_compiler(f1, interp.owner) === 3
+  @test haskey(cache, mi)
+  # Here `do_work` is compiled at runtime, and so must have
+  # required extra work to be cached under the same cache owner.
+  mi = lookup_method_instance(do_work, 1, 2)
+  @test haskey(cache, mi)
+
+  # Should not error with a builtin whose type we do not know
+  f_unknown_builtin() = Base.compilerbarrier(:type, isa)(1, Int)
+  with_new_compiler(f_unknown_builtin, interp.owner) === true
+end;
diff --git a/Compiler/extras/CompilerDevTools/test/testpkg.jl b/Compiler/extras/CompilerDevTools/test/testpkg.jl
new file mode 100644
index 0000000000000..f658a48862d2c
--- /dev/null
+++ b/Compiler/extras/CompilerDevTools/test/testpkg.jl
@@ -0,0 +1,5 @@
+using Pkg
+
+Pkg.activate(dirname(@__DIR__)) do
+  include("runtests.jl")
+end
diff --git a/Compiler/src/Compiler.jl b/Compiler/src/Compiler.jl
new file mode 100644
index 0000000000000..55293277d7f64
--- /dev/null
+++ b/Compiler/src/Compiler.jl
@@ -0,0 +1,227 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+if isdefined(Base, :end_base_include) && !isdefined(Base, :Compiler)
+
+# Define a dummy `Compiler` module to make it installable even on Julia versions where
+# Compiler.jl is not available as a standard library.
+@eval module Compiler
+    function __init__()
+        println("""
+        The `Compiler` standard library is not available for this version of Julia.
+        Use Julia version `v"1.12.0-DEV.1581"` or later.
+        """)
+    end
+end
+
+# When generating an incremental precompile file, we first check whether we
+# already have a copy of this *exact* code in the system image. If so, we
+# simply generates a pkgimage that has the dependency edges we recorded in
+# the system image and simply returns that copy of the compiler. If not,
+# we proceed to load/precompile this as an ordinary package.
+elseif (isdefined(Base, :generating_output) && Base.generating_output(true) &&
+        Base.samefile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, Base._compiler_require_dependencies[1][2]), @eval @__FILE__) &&
+        !Base.any_includes_stale(
+            map(Base.compiler_chi, Base._compiler_require_dependencies),
+            "sysimg", nothing))
+
+    Base.prepare_compiler_stub_image!()
+    append!(Base._require_dependencies, map(Base.expand_compiler_path, Base._compiler_require_dependencies))
+    # There isn't much point in precompiling native code - downstream users will
+    # specialize their own versions of the compiler code and we don't activate
+    # the compiler by default anyway, so let's save ourselves some disk space.
+    ccall(:jl_suppress_precompile, Cvoid, (Cint,), 1)
+
+else
+
+@eval baremodule Compiler
+
+using Core.Intrinsics, Core.IR
+
+using Core: ABIOverride, Builtin, CodeInstance, IntrinsicFunction, MethodInstance, MethodMatch,
+    MethodTable, MethodCache, PartialOpaque, SimpleVector, TypeofVararg,
+    _apply_iterate, apply_type, compilerbarrier, donotdelete, memoryref_isassigned,
+    memoryrefget, memoryrefnew, memoryrefoffset, memoryrefset!, print, println, show, svec,
+    typename, unsafe_write, write, stdout, stderr
+
+using Base: @_foldable_meta, @_gc_preserve_begin, @_gc_preserve_end, @nospecializeinfer,
+    PARTITION_KIND_GLOBAL, PARTITION_KIND_UNDEF_CONST, PARTITION_KIND_BACKDATED_CONST, PARTITION_KIND_DECLARED,
+    PARTITION_FLAG_DEPWARN,
+    Base, BitVector, Bottom, Callable, DataTypeFieldDesc,
+    EffectsOverride, Filter, Generator, NUM_EFFECTS_OVERRIDES,
+    OneTo, Ordering, RefValue, _NAMEDTUPLE_NAME,
+    _array_for, _bits_findnext, _methods_by_ftype, _uniontypes, all, allocatedinline, any,
+    argument_datatype, binding_kind, cconvert, copy_exprargs, datatype_arrayelem,
+    datatype_fieldcount, datatype_fieldtypes, datatype_layoutsize, datatype_nfields,
+    datatype_pointerfree, decode_effects_override, diff_names, fieldindex, visit,
+    generating_output, get_nospecializeinfer_sig, get_world_counter, has_free_typevars,
+    hasgenerator, hasintersect, indexed_iterate, isType, is_file_tracked, is_function_def,
+    is_meta_expr, is_meta_expr_head, is_nospecialized, is_nospecializeinfer, is_defined_const_binding,
+    is_some_const_binding, is_some_guard, is_some_imported, is_some_explicit_imported, is_some_binding_imported, is_valid_intrinsic_elptr,
+    isbitsunion, isconcretedispatch, isdispatchelem, isexpr, isfieldatomic, isidentityfree,
+    iskindtype, ismutabletypename, ismutationfree, issingletontype, isvarargtype, isvatuple,
+    kwerr, lookup_binding_partition, may_invoke_generator, methods, midpoint, moduleroot,
+    partition_restriction, quoted, rename_unionall, rewrap_unionall, specialize_method,
+    structdiff, tls_world_age, unconstrain_vararg_length, unionlen, uniontype_layout,
+    uniontypes, unsafe_convert, unwrap_unionall, unwrapva, vect, widen_diagonal,
+    _uncompressed_ir, datatype_min_ninitialized,
+    partialstruct_init_undefs, fieldcount_noerror, _eval_import, _eval_using,
+    get_ci_mi, get_methodtable, morespecific, specializations, has_image_globalref,
+    PARTITION_MASK_KIND, PARTITION_KIND_GUARD, PARTITION_FLAG_EXPORTED, PARTITION_FLAG_DEPRECATED,
+    BINDING_FLAG_ANY_IMPLICIT_EDGES, is_some_implicit, IteratorSize, SizeUnknown, get_require_world, JLOptions,
+    devnull, devnull as stdin
+
+using Base
+using Base.Order
+
+import Base: ==, _topmod, append!, convert, copy, copy!, findall, first, get, get!,
+    getindex, haskey, in, isempty, isready, iterate, iterate, last, length, max_world,
+    min_world, popfirst!, push!, resize!, setindex!, size, intersect
+
+# Needs to match UUID defined in Project.toml
+ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Compiler,
+    (0x807dbc54_b67e_4c79, 0x8afb_eafe4df6f2e1))
+
+const getproperty = Core.getfield
+const setproperty! = Core.setfield!
+const swapproperty! = Core.swapfield!
+const modifyproperty! = Core.modifyfield!
+const replaceproperty! = Core.replacefield!
+const _DOCS_ALIASING_WARNING = ""
+
+ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Compiler, false)
+
+eval(x) = Core.eval(Compiler, x)
+eval(m, x) = Core.eval(m, x)
+
+function include(x::String)
+    if !isdefined(Base, :end_base_include)
+        # During bootstrap, all includes are relative to `base/`
+        x = Base.strcat(Base.strcat(Base.DATAROOT, "julia/Compiler/src/"), x)
+    end
+    Base.include(Compiler, x)
+end
+
+function include(mod::Module, x::String)
+    if !isdefined(Base, :end_base_include)
+        x = Base.strcat(Base.strcat(Base.DATAROOT, "julia/Compiler/src/"), x)
+    end
+    Base.include(mod, x)
+end
+
+macro _boundscheck() Expr(:boundscheck) end
+
+function return_type end
+function is_return_type(Core.@nospecialize(f))
+    f === return_type && return true
+    if isdefined(Base, :Compiler) && Compiler !== Base.Compiler
+        # Also model the return_type function of the builtin Compiler the same.
+        # This isn't completely sound. We don't actually have any idea what the
+        # base compiler will do at runtime. In the fullness of time, we should
+        # re-work the semantics to make the cache primary and thus avoid having
+        # to reason about what the compiler may do at runtime, but we're not
+        # fully there yet.
+        return f === Base.Compiler.return_type
+    end
+    return false
+end
+
+include("timing.jl")
+include("sort.jl")
+
+# We don't include some.jl, but this definition is still useful.
+something(x::Nothing, y...) = something(y...)
+something(x::Any, y...) = x
+
+############
+# compiler #
+############
+
+baremodule BuildSettings
+using Core: ARGS, include, Int, ===
+using ..Compiler: >, getindex, length
+
+global MAX_METHODS::Int = 3
+
+if length(ARGS) > 2 && ARGS[2] === "--buildsettings"
+    include(BuildSettings, ARGS[3])
+end
+end
+
+if !isdefined(Base, :end_base_include)
+    macro show(ex...)
+        blk = Expr(:block)
+        for s in ex
+            push!(blk.args, :(println(stdout, $(QuoteNode(s)), " = ",
+                                              begin local value = $(esc(s)) end)))
+        end
+        isempty(ex) || push!(blk.args, :value)
+        blk
+    end
+else
+    using Base: @show
+end
+
+include("cicache.jl")
+include("methodtable.jl")
+include("effects.jl")
+include("types.jl")
+include("utilities.jl")
+include("validation.jl")
+
+include("ssair/basicblock.jl")
+include("ssair/domtree.jl")
+include("ssair/ir.jl")
+include("ssair/tarjan.jl")
+
+include("abstractlattice.jl")
+include("stmtinfo.jl")
+include("inferenceresult.jl")
+include("inferencestate.jl")
+
+include("typeutils.jl")
+include("typelimits.jl")
+include("typelattice.jl")
+include("tfuncs.jl")
+
+include("abstractinterpretation.jl")
+include("typeinfer.jl")
+include("optimize.jl")
+
+include("bootstrap.jl")
+include("precompile.jl")
+include("reflection_interface.jl")
+include("opaque_closure.jl")
+
+baremodule ReinferUtils end
+include(ReinferUtils, "reinfer.jl")
+include(ReinferUtils, "bindinginvalidations.jl")
+
+macro __SOURCE_FILE__()
+    __source__.file === nothing && return nothing
+    return QuoteNode(__source__.file::Symbol)
+end
+
+module IRShow end # relies on string and IO operations defined in Base
+baremodule TrimVerifier using Core end # relies on IRShow, so define this afterwards
+
+if isdefined(Base, :end_base_include)
+    # When this module is loaded as the standard library, include these files as usual
+    include(IRShow, "ssair/show.jl")
+    include(TrimVerifier, "verifytrim.jl")
+else
+    function load_irshow!()
+        Base.delete_method(Base.which(verify_typeinf_trim, (IO, Vector{Any}, Bool)),)
+        include(IRShow, "ssair/show.jl")
+        include(TrimVerifier, "verifytrim.jl")
+    end
+    function verify_typeinf_trim(io::IO, codeinfos::Vector{Any}, onlywarn::Bool)
+        # stub implementation
+        msg = "--trim verifier not defined"
+        onlywarn ? println(io, msg) : error(msg)
+    end
+    # During bootstrap, skip including these files and defer to base/show.jl to include it later
+end
+
+end # baremodule Compiler
+
+end # if isdefined(Base, :generating_output) && ...
diff --git a/Compiler/src/abstractinterpretation.jl b/Compiler/src/abstractinterpretation.jl
new file mode 100644
index 0000000000000..3808797275e15
--- /dev/null
+++ b/Compiler/src/abstractinterpretation.jl
@@ -0,0 +1,4553 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+struct SlotRefinement
+    slot::SlotNumber
+    typ::Any
+    SlotRefinement(slot::SlotNumber, @nospecialize(typ)) = new(slot, typ)
+end
+
+# See if the inference result of the current statement's result value might affect
+# the final answer for the method (aside from optimization potential and exceptions).
+# To do that, we need to check both for slot assignment and SSA usage.
+call_result_unused(sv::InferenceState, currpc::Int) =
+    isexpr(sv.src.code[currpc], :call) && isempty(sv.ssavalue_uses[currpc])
+call_result_unused(si::StmtInfo) = !si.used
+
+is_const_bool_or_bottom(@nospecialize(b)) = (isa(b, Const) && isa(b.val, Bool)) || b == Bottom
+function can_propagate_conditional(@nospecialize(rt), argtypes::Vector{Any})
+    isa(rt, InterConditional) || return false
+    if rt.slot > length(argtypes)
+        # In the vararg tail - can't be conditional
+        @assert isvarargtype(argtypes[end])
+        return false
+    end
+    return isa(argtypes[rt.slot], Conditional) &&
+        is_const_bool_or_bottom(rt.thentype) && is_const_bool_or_bottom(rt.thentype)
+end
+
+function propagate_conditional(rt::InterConditional, cond::Conditional)
+    new_thentype = rt.thentype === Const(false) ? cond.elsetype : cond.thentype
+    new_elsetype = rt.elsetype === Const(true) ? cond.thentype : cond.elsetype
+    if rt.thentype == Bottom
+        @assert rt.elsetype != Bottom
+        return Conditional(cond.slot, Bottom, new_elsetype)
+    elseif rt.elsetype == Bottom
+        @assert rt.thentype != Bottom
+        return Conditional(cond.slot, new_thentype, Bottom)
+    end
+    return Conditional(cond.slot, new_thentype, new_elsetype)
+end
+
+mutable struct SafeBox{T}
+    x::T
+    SafeBox{T}(x::T) where T = new{T}(x)
+    SafeBox(@nospecialize x) = new{Any}(x)
+end
+getindex(box::SafeBox) = box.x
+setindex!(box::SafeBox{T}, x::T) where T = setfield!(box, :x, x)
+
+struct FailedMethodMatch
+    reason::String
+end
+
+struct MethodMatchTarget
+    match::MethodMatch
+    edges::Vector{Union{Nothing,CodeInstance}}
+    call_results::Vector{Union{Nothing,InferredCallResult}}
+    edge_idx::Int
+end
+
+struct MethodMatches
+    applicable::Vector{MethodMatchTarget}
+    info::MethodMatchInfo
+    valid_worlds::WorldRange
+end
+any_ambig(result::MethodLookupResult) = result.ambig
+any_ambig(info::MethodMatchInfo) = any_ambig(info.results)
+any_ambig(m::MethodMatches) = any_ambig(m.info)
+fully_covering(info::MethodMatchInfo) = info.fullmatch
+fully_covering(m::MethodMatches) = fully_covering(m.info)
+
+struct UnionSplitMethodMatches
+    applicable::Vector{MethodMatchTarget}
+    applicable_argtypes::Vector{Vector{Any}}
+    info::UnionSplitInfo
+    valid_worlds::WorldRange
+end
+any_ambig(info::UnionSplitInfo) = any(any_ambig, info.split)
+any_ambig(m::UnionSplitMethodMatches) = any_ambig(m.info)
+fully_covering(info::UnionSplitInfo) = all(fully_covering, info.split)
+fully_covering(m::UnionSplitMethodMatches) = fully_covering(m.info)
+
+nmatches(info::MethodMatchInfo) = length(info.results)
+function nmatches(info::UnionSplitInfo)
+    n = 0
+    for mminfo in info.split
+        n += nmatches(mminfo)
+    end
+    return n
+end
+
+# intermediate state for computing gfresult
+mutable struct CallInferenceState
+    inferidx::Int
+    rettype
+    exctype
+    all_effects::Effects
+    conditionals::Union{Nothing,Tuple{Vector{Any},Vector{Any}}} # keeps refinement information of call argument types when the return type is boolean
+    slotrefinements::Union{Nothing,Vector{Any}} # keeps refinement information on slot types obtained from call signature
+
+    # some additional fields for untyped objects (just to avoid capturing)
+    const func
+    const matches::Union{MethodMatches,UnionSplitMethodMatches}
+    function CallInferenceState(@nospecialize(func), matches::Union{MethodMatches,UnionSplitMethodMatches})
+        return new(#=inferidx=#1, #=rettype=#Bottom, #=exctype=#Bottom, #=all_effects=#EFFECTS_TOTAL,
+            #=conditionals=#nothing, #=slotrefinements=#nothing, func, matches)
+    end
+end
+
+function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(func),
+                                  arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
+                                  sv::AbsIntState, max_methods::Int)
+    𝕃ₚ, 𝕃ᵢ = ipo_lattice(interp), typeinf_lattice(interp)
+    ⊑ₚ, ⊔ₚ, ⊔ᵢ  = partialorder(𝕃ₚ), join(𝕃ₚ), join(𝕃ᵢ)
+    argtypes = arginfo.argtypes
+    if si.saw_latestworld
+        add_remark!(interp, sv, "Cannot infer call, because we previously saw :latestworld")
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    end
+    matches = find_method_matches(interp, argtypes, atype; max_methods)
+    if isa(matches, FailedMethodMatch)
+        add_remark!(interp, sv, matches.reason)
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    end
+
+    (; valid_worlds, applicable) = matches
+    update_valid_age!(sv, get_inference_world(interp), valid_worlds) # need to record the negative world now, since even if we don't generate any useful information, inlining might want to add an invoke edge and it won't have this information anymore
+    if bail_out_toplevel_call(interp, sv)
+        local napplicable = length(applicable)
+        for i = 1:napplicable
+            local sig = applicable[i].match.spec_types
+            if !isdispatchtuple(sig)
+                # only infer fully concrete call sites in top-level expressions (ignoring even isa_compileable_sig matches)
+                add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
+                return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+            end
+        end
+    end
+
+    # final result
+    gfresult = Future{CallMeta}()
+    state = CallInferenceState(func, matches)
+
+    # split the for loop off into a function, so that we can pause and restart it at will
+    function infercalls(interp, sv)
+        local napplicable = length(applicable)
+        local multiple_matches = napplicable > 1
+        while state.inferidx <= napplicable
+            (; match, edges, call_results, edge_idx) = applicable[state.inferidx]
+            local method = match.method
+            local sig = match.spec_types
+            if bail_out_call(interp, InferenceLoopState(state.rettype, state.all_effects), sv)
+                add_remark!(interp, sv, "Call inference reached maximally imprecise information: bailing on doing more abstract inference.")
+                break
+            end
+            # TODO: this is unmaintained now as it didn't seem to improve things, though it does avoid hard-coding the union split at the higher level,
+            # it also can hurt infer-ability of some constrained parameter types (e.g. quacks like a duck)
+            # sigtuple = unwrap_unionall(sig)::DataType
+            # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
+            #if splitunions
+            #    splitsigs = switchtupleunion(sig)
+            #    for sig_n in splitsigs
+            #        result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)::Future
+            #        handle1(...)
+            #    end
+            #end
+            mresult = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)::Future
+            function handle1(interp, sv)
+                local (; rt, exct, effects, edge, call_result) = mresult[]
+                this_conditional = ignorelimited(rt)
+                this_rt = widenwrappedconditional(rt)
+                this_exct = exct
+                # try constant propagation with argtypes for this match
+                # this is in preparation for inlining, or improving the return result
+                local matches = state.matches
+                this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[state.inferidx]
+                this_arginfo = ArgInfo(arginfo.fargs, this_argtypes)
+                const_call_result = abstract_call_method_with_const_args(interp,
+                    mresult[], state.func, this_arginfo, si, match, sv)
+                if const_call_result !== nothing
+                    this_const_conditional = ignorelimited(const_call_result.rt)
+                    this_const_rt = widenwrappedconditional(const_call_result.rt)
+                    const_result = const_edge = nothing
+                    if this_const_rt ⊑ₚ this_rt
+                        # As long as the const-prop result we have is not *worse* than
+                        # what we found out on types, we'd like to use it. Even if the
+                        # end result is exactly equivalent, it is likely that the IR
+                        # we produced while constproping is better than that with
+                        # generic types.
+                        # Return type of const-prop' inference can be wider than that of non const-prop' inference
+                        # e.g. in cases when there are cycles but cached result is still accurate
+                        this_conditional = this_const_conditional
+                        this_rt = this_const_rt
+                        (; effects, const_result, const_edge) = const_call_result
+                    elseif is_better_effects(const_call_result.effects, effects)
+                        (; effects, const_result, const_edge) = const_call_result
+                    else
+                        add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
+                    end
+                    # Treat the exception type separately. Currently, constprop often cannot determine the exception type
+                    # because consistent-cy does not apply to exceptions.
+                    if const_call_result.exct ⋤ this_exct
+                        this_exct = const_call_result.exct
+                        (; const_result, const_edge) = const_call_result
+                    else
+                        add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
+                    end
+                    if const_edge !== nothing
+                        edge = const_edge
+                        update_valid_age!(sv, get_inference_world(interp), world_range(const_edge))
+                    end
+                    if const_result !== nothing
+                        call_result = const_result
+                    end
+                end
+
+                state.all_effects = merge_effects(state.all_effects, effects)
+                @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
+                if can_propagate_conditional(this_conditional, argtypes)
+                    # The only case where we need to keep this in rt is where
+                    # we can directly propagate the conditional to a slot argument
+                    # that is not one of our arguments, otherwise we keep all the
+                    # relevant information in `conditionals` below.
+                    this_rt = this_conditional
+                end
+
+                state.rettype = state.rettype ⊔ₚ this_rt
+                state.exctype = state.exctype ⊔ₚ this_exct
+                if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, state.rettype) && arginfo.fargs !== nothing
+                    local conditionals = state.conditionals
+                    if conditionals === nothing
+                        conditionals = state.conditionals = (
+                            Any[Bottom for _ in 1:length(argtypes)],
+                            Any[Bottom for _ in 1:length(argtypes)])
+                    end
+                    for i = 1:length(argtypes)
+                        cnd = conditional_argtype(𝕃ᵢ, this_conditional, match.spec_types, argtypes, i)
+                        conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype
+                        conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype
+                    end
+                end
+                edges[edge_idx] = edge
+                call_results[edge_idx] = call_result
+
+                state.inferidx += 1
+                return true
+            end # function handle1
+            if isready(mresult) && handle1(interp, sv)
+                continue
+            else
+                push!(sv.tasks, handle1)
+                return false
+            end
+        end # while
+
+        seenall = state.inferidx > napplicable
+        retinfo = state.matches.info
+        if seenall # small optimization to skip some work that is already implied
+            if !fully_covering(state.matches) || any_ambig(state.matches)
+                # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+                state.all_effects = Effects(state.all_effects; nothrow=false)
+                state.exctype = state.exctype ⊔ₚ MethodError
+            end
+            local fargs = arginfo.fargs
+            if sv isa InferenceState && fargs !== nothing
+                state.slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv)
+            end
+            state.rettype = from_interprocedural!(interp, state.rettype, sv, arginfo, state.conditionals)
+            if call_result_unused(si) && !(state.rettype === Bottom)
+                add_remark!(interp, sv, "Call result type was widened because the return value is unused")
+                # We're mainly only here because the optimizer might want this code,
+                # but we ourselves locally don't typically care about it locally
+                # (beyond checking if it always throws).
+                # So avoid adding an edge, since we don't want to bother attempting
+                # to improve our result even if it does change (to always throw),
+                # and avoid keeping track of a more complex result type.
+                state.rettype = Any
+            end
+            # if from_interprocedural added any pclimitations to the set inherited from the arguments,
+            if isa(sv, InferenceState)
+                # TODO (#48913) implement a proper recursion handling for irinterp:
+                # This works most of the time just because currently the `:terminate` condition often guarantees that
+                # irinterp doesn't fail into unresolved cycles, but it is not a good (or working) solution.
+                # We should revisit this once we have a better story for handling cycles in irinterp.
+                delete!(sv.pclimitations, sv) # remove self, if present
+            end
+        else
+            # there is unanalyzed candidate, widen type and effects to the top
+            state.rettype = state.exctype = Any
+            state.all_effects = Effects()
+        end
+
+        # Also considering inferring the compilation signature for this method, so
+        # it is available to the compiler in case it ends up needing it for the invoke.
+        if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
+            (!is_removable_if_unused(state.all_effects) || !call_result_unused(si)))
+            inferidx = SafeBox{Int}(1)
+            function infercalls2(interp, sv)
+                local napplicable = length(applicable)
+                local multiple_matches = napplicable > 1
+                while inferidx[] <= napplicable
+                    (; match, call_results, edge_idx) = applicable[inferidx[]]
+                    inferidx[] += 1
+                    local method = match.method
+                    local sig = match.spec_types
+                    mi = specialize_method(match; preexisting=true)
+                    local call_result = call_results[edge_idx]
+                    if mi === nothing || !(call_result isa InferenceResult) || !const_prop_methodinstance_heuristic(interp, call_result, mi, arginfo, sv)
+                        csig = get_compileable_sig(method, sig, match.sparams)
+                        if csig !== nothing && (!seenall || csig !== sig) # corresponds to whether the first look already looked at this, so repeating abstract_call_method is not useful
+                            #println(sig, " changed to ", csig, " for ", method)
+                            sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), csig, method.sig)::SimpleVector
+                            sparams = sp_[2]::SimpleVector
+                            mresult = abstract_call_method(interp, method, csig, sparams, multiple_matches, StmtInfo(false, false), sv)::Future
+                            isready(mresult) || return false # wait for mresult Future to resolve off the callstack before continuing
+                        end
+                    end
+                end
+                return true
+            end
+            # start making progress on the first call
+            infercalls2(interp, sv) || push!(sv.tasks, infercalls2)
+        end
+
+        gfresult[] = CallMeta(state.rettype, state.exctype, state.all_effects, retinfo, state.slotrefinements)
+        return true
+    end # function infercalls
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return gfresult
+end
+
+function find_method_matches(interp::AbstractInterpreter, argtypes::Vector{Any}, @nospecialize(atype);
+                             max_union_splitting::Int = InferenceParams(interp).max_union_splitting,
+                             max_methods::Int = InferenceParams(interp).max_methods)
+    if is_union_split_eligible(typeinf_lattice(interp), argtypes, max_union_splitting)
+        return find_union_split_method_matches(interp, argtypes, max_methods)
+    end
+    return find_simple_method_matches(interp, atype, max_methods)
+end
+
+# NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
+is_union_split_eligible(𝕃::AbstractLattice, argtypes::Vector{Any}, max_union_splitting::Int) =
+    1 < unionsplitcost(𝕃, argtypes) <= max_union_splitting
+
+function find_union_split_method_matches(interp::AbstractInterpreter, argtypes::Vector{Any},
+                                         max_methods::Int)
+    split_argtypes = switchtupleunion(typeinf_lattice(interp), argtypes)
+    infos = MethodMatchInfo[]
+    applicable = MethodMatchTarget[]
+    applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
+    valid_worlds = WorldRange()
+    for i in 1:length(split_argtypes)
+        arg_n = split_argtypes[i]::Vector{Any}
+        sig_n = argtypes_to_type(arg_n)
+        sig_n === Bottom && continue
+        thismatches = findall(sig_n, method_table(interp); limit = max_methods)
+        if thismatches === nothing
+            return FailedMethodMatch("For one of the union split cases, too many methods matched")
+        end
+        valid_worlds = intersect(valid_worlds, thismatches.valid_worlds)
+        thisfullmatch = any(match::MethodMatch->match.fully_covers, thismatches)
+        mt = Core.methodtable
+        thisinfo = MethodMatchInfo(thismatches, mt, sig_n, thisfullmatch)
+        push!(infos, thisinfo)
+        for idx = 1:length(thismatches)
+            push!(applicable, MethodMatchTarget(thismatches[idx], thisinfo.edges, thisinfo.call_results, idx))
+            push!(applicable_argtypes, arg_n)
+        end
+    end
+    info = UnionSplitInfo(infos)
+    return UnionSplitMethodMatches(
+        applicable, applicable_argtypes, info, valid_worlds)
+end
+
+function find_simple_method_matches(interp::AbstractInterpreter, @nospecialize(atype), max_methods::Int)
+    matches = findall(atype, method_table(interp); limit = max_methods)
+    if matches === nothing
+        # this means too many methods matched
+        # (assume this will always be true, so we don't compute / update valid age in this case)
+        return FailedMethodMatch("Too many methods matched")
+    end
+    fullmatch = any(match::MethodMatch->match.fully_covers, matches)
+    mt = Core.methodtable
+    info = MethodMatchInfo(matches, mt, atype, fullmatch)
+    applicable = MethodMatchTarget[MethodMatchTarget(matches[idx], info.edges, info.call_results, idx) for idx = 1:length(matches)]
+    return MethodMatches(applicable, info, matches.valid_worlds)
+end
+
+"""
+    from_interprocedural!(interp::AbstractInterpreter, rt, sv::AbsIntState,
+                          arginfo::ArgInfo, maybecondinfo) -> newrt
+
+Converts inter-procedural return type `rt` into a local lattice element `newrt`,
+that is appropriate in the context of current local analysis frame `sv`, especially:
+- unwraps `rt::LimitedAccuracy` and collects its limitations into the current frame `sv`
+- converts boolean `rt` to new boolean `newrt` in a way `newrt` can propagate extra conditional
+  refinement information, e.g. translating `rt::InterConditional` into `newrt::Conditional`
+  that holds a type constraint information about a variable in `sv`
+
+This function _should_ be used wherever we propagate results returned from
+`abstract_call_method` or `abstract_call_method_with_const_args`.
+
+When `maybecondinfo !== nothing`, this function also tries extra conditional argument type refinement.
+In such cases `maybecondinfo` should be either of:
+- `maybecondinfo::Tuple{Vector{Any},Vector{Any}}`: precomputed argument type refinement information
+- method call signature tuple type
+When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by
+`tmerge`ing argument signature type of each method call.
+"""
+function from_interprocedural!(interp::AbstractInterpreter, @nospecialize(rt), sv::AbsIntState,
+                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
+    rt = collect_limitations!(rt, sv)
+    if isa(rt, InterMustAlias)
+        rt = from_intermustalias(typeinf_lattice(interp), rt, arginfo, sv)
+    elseif is_lattice_bool(ipo_lattice(interp), rt)
+        if maybecondinfo === nothing
+            rt = widenconditional(rt)
+        else
+            rt = from_interconditional(typeinf_lattice(interp), rt, sv, arginfo, maybecondinfo)
+        end
+    end
+    @assert !(rt isa InterConditional || rt isa InterMustAlias) "invalid lattice element returned from inter-procedural context"
+    return rt
+end
+
+function collect_limitations!(@nospecialize(typ), sv::InferenceState)
+    if isa(typ, LimitedAccuracy)
+        union!(sv.pclimitations, typ.causes)
+        return typ.typ
+    end
+    return typ
+end
+
+function from_intermustalias(𝕃ᵢ::AbstractLattice, rt::InterMustAlias, arginfo::ArgInfo, sv::AbsIntState)
+    fargs = arginfo.fargs
+    if fargs !== nothing && 1 ≤ rt.slot ≤ length(fargs)
+        arg = ssa_def_slot(fargs[rt.slot], sv)
+        if isa(arg, SlotNumber)
+            argtyp = widenslotwrapper(arginfo.argtypes[rt.slot])
+            ⊑ = partialorder(𝕃ᵢ)
+            if rt.vartyp ⊑ argtyp
+                return MustAlias(arg, rt.vartyp, rt.fldidx, rt.fldtyp)
+            else
+                # TODO optimize this case?
+            end
+        end
+    end
+    return widenmustalias(rt)
+end
+
+function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::AbsIntState,
+                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
+    has_conditional(𝕃ᵢ, sv) || return widenconditional(rt)
+    (; fargs, argtypes) = arginfo
+    fargs === nothing && return widenconditional(rt)
+    if can_propagate_conditional(rt, argtypes)
+        return propagate_conditional(rt, argtypes[rt.slot]::Conditional)
+    end
+    slot = 0
+    alias = nothing
+    thentype = elsetype = Any
+    condval = maybe_extract_const_bool(rt)
+    ⊑, ⋤, ⊓ = partialorder(𝕃ᵢ), strictneqpartialorder(𝕃ᵢ), meet(𝕃ᵢ)
+    for i in 1:length(fargs)
+        # find the first argument which supports refinement,
+        # and intersect all equivalent arguments with it
+        argtyp = argtypes[i]
+        if alias === nothing
+            arg = ssa_def_slot(fargs[i], sv)
+            if isa(arg, SlotNumber) && widenslotwrapper(argtyp) isa Type
+                old = argtyp
+                id = slot_id(arg)
+            elseif argtyp isa MustAlias
+                old = argtyp.fldtyp
+                id = argtyp.slot
+            else
+                continue # unlikely to refine
+            end
+        elseif argtyp isa MustAlias && issubalias(argtyp, alias)
+            arg = nothing
+            old = alias.fldtyp
+            id = alias.slot
+        else
+            continue
+        end
+        if slot == 0 || id == slot
+            if isa(maybecondinfo, Tuple{Vector{Any},Vector{Any}})
+                # if we have already computed argument refinement information, apply that now to get the result
+                new_thentype = maybecondinfo[1][i]
+                new_elsetype = maybecondinfo[2][i]
+            else
+                # otherwise compute it on the fly
+                cnd = conditional_argtype(𝕃ᵢ, rt, maybecondinfo, argtypes, i)
+                new_thentype = cnd.thentype
+                new_elsetype = cnd.elsetype
+            end
+            if condval === false
+                thentype = Bottom
+            elseif new_thentype ⊑ thentype
+                thentype = new_thentype
+            else
+                thentype = thentype ⊓ widenconst(new_thentype)
+            end
+            if condval === true
+                elsetype = Bottom
+            elseif new_elsetype ⊑ elsetype
+                elsetype = new_elsetype
+            else
+                elsetype = elsetype ⊓ widenconst(new_elsetype)
+            end
+            if (slot > 0 || condval !== false) && thentype ⋤ old
+                slot = id
+                if !(arg isa SlotNumber) && argtyp isa MustAlias
+                    alias = argtyp
+                end
+            elseif (slot > 0 || condval !== true) && elsetype ⋤ old
+                slot = id
+                if !(arg isa SlotNumber) && argtyp isa MustAlias
+                    alias = argtyp
+                end
+            else # reset: no new useful information for this slot
+                slot = 0
+                alias = nothing
+                thentype = elsetype = Any
+            end
+        end
+    end
+    if thentype === Bottom && elsetype === Bottom
+        return Bottom # accidentally proved this call to be dead / throw !
+    elseif slot > 0
+        if alias !== nothing
+            return form_mustalias_conditional(alias, thentype, elsetype)
+        end
+        return Conditional(slot, thentype, elsetype) # record a Conditional improvement to this slot
+    end
+    return widenconditional(rt)
+end
+
+function conditional_argtype(𝕃ᵢ::AbstractLattice, @nospecialize(rt), @nospecialize(sig),
+                             argtypes::Vector{Any}, i::Int)
+    if isa(rt, InterConditional) && rt.slot == i
+        return rt
+    else
+        argt = widenslotwrapper(argtypes[i])
+        if isvarargtype(argt)
+            @assert fieldcount(sig) == i
+            argt = unwrapva(argt)
+        end
+        thentype = elsetype = tmeet(𝕃ᵢ, argt, fieldtype(sig, i))
+        condval = maybe_extract_const_bool(rt)
+        condval === true && (elsetype = Bottom)
+        condval === false && (thentype = Bottom)
+        return InterConditional(i, thentype, elsetype)
+    end
+end
+
+function collect_slot_refinements(𝕃ᵢ::AbstractLattice, applicable::Vector{MethodMatchTarget},
+    argtypes::Vector{Any}, fargs::Vector{Any}, sv::InferenceState)
+    ⊏, ⊔ = strictpartialorder(𝕃ᵢ), join(𝕃ᵢ)
+    slotrefinements = nothing
+    for i = 1:length(fargs)
+        fargᵢ = fargs[i]
+        if fargᵢ isa SlotNumber
+            fidx = slot_id(fargᵢ)
+            argt = widenslotwrapper(argtypes[i])
+            if isvarargtype(argt)
+                argt = unwrapva(argt)
+            end
+            sigt = Bottom
+            for j = 1:length(applicable)
+                (;match) = applicable[j]
+                valid_as_lattice(match.spec_types, true) || continue
+                sigt = sigt ⊔ fieldtype(match.spec_types, i)
+            end
+            if sigt ⊏ argt # i.e. signature type is strictly more specific than the type of the argument slot
+                if slotrefinements === nothing
+                    slotrefinements = fill!(Vector{Any}(undef, length(sv.slottypes)), nothing)
+                end
+                slotrefinements[fidx] = sigt
+            end
+        end
+    end
+    return slotrefinements
+end
+
+const RECURSION_UNUSED_MSG = "Bounded recursion detected with unused result. Annotated return type may be wider than true result."
+const RECURSION_MSG = "Bounded recursion detected. Call was widened to force convergence."
+const RECURSION_MSG_HARDLIMIT = "Bounded recursion detected under hardlimit. Call was widened to force convergence."
+
+function abstract_call_method(interp::AbstractInterpreter,
+                              method::Method, @nospecialize(sig), sparams::SimpleVector,
+                              hardlimit::Bool, si::StmtInfo, sv::AbsIntState)
+    sigtuple = unwrap_unionall(sig)
+    sigtuple isa DataType ||
+        return Future(MethodCallResult(Any, Any, Effects(), nothing, false, false))
+    all(@nospecialize(x) -> isvarargtype(x) || valid_as_lattice(x, true), sigtuple.parameters) ||
+        return Future(MethodCallResult(Union{}, Any, EFFECTS_THROWS, nothing, false, false)) # catch bad type intersections early
+
+    if is_nospecializeinfer(method)
+        sig = get_nospecializeinfer_sig(method, sig, sparams)
+    end
+
+    # Limit argument type tuple growth of functions:
+    # look through the parents list to see if there's a call to the same method
+    # and from the same method.
+    # Returns the topmost occurrence of that repeated edge.
+    edgecycle = edgelimited = false
+    topmost = nothing
+
+    for sv′ in AbsIntStackUnwind(sv)
+        infmi = frame_instance(sv′)
+        if method === infmi.def
+            if infmi.specTypes::Type == sig::Type
+                # avoid widening when detecting self-recursion
+                # TODO: merge call cycle and return right away
+                topmost = nothing
+                edgecycle = true
+                break
+            end
+            topmost === nothing || continue
+            if edge_matches_sv(interp, sv′, method, sig, sparams, hardlimit, sv)
+                topmost = sv′
+                edgecycle = true
+            end
+        end
+    end
+    washardlimit = hardlimit
+
+    if topmost !== nothing
+        msig = unwrap_unionall(method.sig)::DataType
+        spec_len = length(msig.parameters) + 1
+        mi = frame_instance(sv)
+
+        if isdefined(method, :recursion_relation)
+            # We don't require the recursion_relation to be transitive, so
+            # apply a hard limit
+            hardlimit = true
+        end
+
+        if method === mi.def
+            # Under direct self-recursion, permit much greater use of reducers.
+            # here we assume that complexity(specTypes) :>= complexity(sig)
+            comparison = mi.specTypes
+            l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
+            spec_len = max(spec_len, l_comparison)
+        elseif !hardlimit && isa(topmost, InferenceState)
+            # Without a hardlimit, permit use of reducers too.
+            comparison = frame_instance(topmost).specTypes
+            # n.b. currently don't allow vararg reducers
+            #l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
+            #spec_len = max(spec_len, l_comparison)
+        else
+            comparison = method.sig
+        end
+
+        # see if the type is actually too big (relative to the caller), and limit it if required
+        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : mi.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, spec_len)
+
+        if newsig !== sig
+            # continue inference, but note that we've limited parameter complexity
+            # on this call (to ensure convergence), so that we don't cache this result
+            if call_result_unused(si)
+                add_remark!(interp, sv, RECURSION_UNUSED_MSG)
+                # if we don't (typically) actually care about this result,
+                # don't bother trying to examine some complex abstract signature
+                # since it's very unlikely that we'll try to inline this,
+                # or want make an invoke edge to its calling convention return type.
+                # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
+                return Future(MethodCallResult(Any, Any, Effects(), nothing, true, true))
+            end
+            add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
+            # TODO (#48913) implement a proper recursion handling for irinterp:
+            # This works just because currently the `:terminate` condition usually means this is unreachable here
+            # for irinterp because there are not unresolved cycles, but it's not a good solution.
+            # We should revisit this once we have a better story for handling cycles in irinterp.
+            if isa(sv, InferenceState)
+                # since the hardlimit is against the edge to the parent frame,
+                # we should try to poison the whole edge, not just the topmost frame
+                parentframe = frame_parent(topmost)
+                while !isa(parentframe, InferenceState)
+                    # attempt to find a parent frame that can handle this LimitedAccuracy result correctly
+                    # so we don't try to cache this incomplete intermediate result
+                    parentframe === nothing && break
+                    parentframe = frame_parent(parentframe)
+                end
+                if isa(parentframe, InferenceState)
+                    poison_callstack!(sv, parentframe)
+                elseif isa(topmost, InferenceState)
+                    poison_callstack!(sv, topmost)
+                end
+            end
+            # n.b. this heuristic depends on the non-local state, so we must record the limit later
+            sig = newsig
+            sparams = svec()
+            edgelimited = true
+        end
+    end
+
+    # if sig changed, may need to recompute the sparams environment
+    if isa(method.sig, UnionAll) && isempty(sparams)
+        recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), sig, method.sig)::SimpleVector
+        #@assert recomputed[1] !== Bottom
+        # We must not use `sig` here, since that may re-introduce structural complexity that
+        # our limiting heuristic sought to eliminate. The alternative would be to not increment depth over covariant contexts,
+        # but we prefer to permit inference of tuple-destructuring, so we don't do that right now
+        # For example, with a signature such as `Tuple{T, Ref{T}} where {T <: S}`
+        # we might want to limit this to `Tuple{S, Ref}`, while type-intersection can instead give us back the original type
+        # (which moves `S` back up to a lower comparison depth)
+        # Optionally, we could try to drive this to a fixed point, but I think this is getting too complex,
+        # and this would only cause more questions and more problems
+        # (the following is only an example, most of the statements are probable in the wrong order):
+        #     newsig = sig
+        #     seen = IdSet()
+        #     while !(newsig in seen)
+        #         push!(seen, newsig)
+        #         lsig = length((unwrap_unionall(sig)::DataType).parameters)
+        #         newsig = limit_type_size(newsig, sig, sv.linfo.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, lsig)
+        #         recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), newsig, method.sig)::SimpleVector
+        #         newsig = recomputed[2]
+        #     end
+        #     sig = ?
+        sparams = recomputed[2]::SimpleVector
+    end
+
+    return typeinf_edge(interp, method, sig, sparams, sv, edgecycle, edgelimited)
+end
+
+function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
+                         method::Method, @nospecialize(sig), sparams::SimpleVector,
+                         hardlimit::Bool, sv::AbsIntState)
+    # The `method_for_inference_heuristics` will expand the given method's generator if
+    # necessary in order to retrieve this field from the generated `CodeInfo`, if it exists.
+    # The other `CodeInfo`s we inspect will already have this field inflated, so we just
+    # access it directly instead (to avoid regeneration).
+    world = get_inference_world(interp)
+    callee_method2 = method_for_inference_heuristics(method, sig, sparams, world)
+    inf_method2 = method_for_inference_limit_heuristics(frame)
+    if callee_method2 !== inf_method2 # limit only if user token match
+        return false
+    end
+    if isa(frame, InferenceState) && cache_owner(frame.interp) !== cache_owner(interp)
+        # Don't assume that frames in different interpreters are the same
+        return false
+    end
+    if !hardlimit || InferenceParams(interp).ignore_recursion_hardlimit
+        # if this is a soft limit,
+        # also inspect the parent of this edge,
+        # to see if they are the same Method as sv
+        # in which case we'll need to ensure it is convergent
+        # otherwise, we don't
+
+        # check in the cycle list first
+        # all items in here are considered mutual parents of all others
+        if !any(p::AbsIntState->matches_sv(p, sv), callers_in_cycle(frame))
+            let parent = cycle_parent(frame)
+                parent === nothing && return false
+                (is_cached(parent) || frame_parent(parent) !== nothing) || return false
+                matches_sv(parent, sv) || return false
+            end
+        end
+
+        # If the method defines a recursion relation, give it a chance
+        # to tell us that this recursion is actually ok.
+        if isdefined(method, :recursion_relation)
+            if Core._call_in_world_total(get_world_counter(), method.recursion_relation, method, callee_method2, sig, frame_instance(frame).specTypes)
+                return false
+            end
+        end
+    end
+    return true
+end
+
+# This function is used for computing alternate limit heuristics
+function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector, world::UInt)
+    if (hasgenerator(method) && !(method.generator isa Core.GeneratedFunctionStub) &&
+        may_invoke_generator(method, sig, sparams))
+        mi = specialize_method(method, sig, sparams)
+        cinfo = get_staged(mi, world)
+        if isa(cinfo, CodeInfo)
+            method2 = cinfo.method_for_inference_limit_heuristics
+            if method2 isa Method
+                return method2
+            end
+        end
+    end
+    return nothing
+end
+
+function matches_sv(parent::AbsIntState, sv::AbsIntState)
+    # limit only if user token match
+    return (frame_instance(parent).def === frame_instance(sv).def &&
+            method_for_inference_limit_heuristics(sv) === method_for_inference_limit_heuristics(parent))
+end
+
+function is_edge_recursed(edge::CodeInstance, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return edge.def === frame_instance(sv)
+    end
+end
+
+function is_method_recursed(method::Method, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return method === frame_instance(sv).def
+    end
+end
+
+function is_constprop_edge_recursed(edge::MethodInstance, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return edge === frame_instance(sv) && is_constproped(sv)
+    end
+end
+
+function is_constprop_method_recursed(method::Method, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return method === frame_instance(sv).def && is_constproped(sv)
+    end
+end
+
+# keeps result and context information of abstract_method_call, which will later be used for
+# backedge computation, and concrete evaluation or constant-propagation
+struct MethodCallResult
+    rt
+    exct
+    effects::Effects
+    edge::Union{Nothing,CodeInstance}
+    edgecycle::Bool
+    edgelimited::Bool
+    call_result::Union{Nothing,InferredCallResult}
+    function MethodCallResult(@nospecialize(rt), @nospecialize(exct), effects::Effects,
+                              edge::Union{Nothing,CodeInstance}, edgecycle::Bool, edgelimited::Bool,
+                              call_result::Union{Nothing,InferredCallResult} = nothing)
+        return new(rt, exct, effects, edge, edgecycle, edgelimited, call_result)
+    end
+end
+
+struct InvokeCall
+    types     # ::Type
+    InvokeCall(@nospecialize(types)) = new(types)
+end
+
+struct ConstCallResult
+    rt::Any
+    exct::Any
+    const_result::InferredCallResult
+    effects::Effects
+    const_edge::Union{Nothing,CodeInstance}
+    function ConstCallResult(
+        @nospecialize(rt), @nospecialize(exct),
+        const_result::InferredCallResult, effects::Effects,
+        const_edge::Union{Nothing,CodeInstance})
+        return new(rt, exct, const_result, effects, const_edge)
+    end
+end
+
+function abstract_call_method_with_const_args(interp::AbstractInterpreter,
+    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
+    match::MethodMatch, sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing)
+    if bail_out_const_call(interp, result, si, match, sv)
+        return nothing
+    end
+    eligibility = concrete_eval_eligible(interp, f, result, arginfo, sv)
+    concrete_eval_result = nothing
+    if eligibility === :concrete_eval
+        concrete_eval_result = concrete_eval_call(interp, f, result, arginfo, sv, invokecall)
+        if (concrete_eval_result !== nothing &&  # allow external abstract interpreters to disable concrete evaluation ad-hoc
+            # if we don't inline the result of this concrete evaluation,
+            # give const-prop' a chance to inline a better method body
+            (!may_optimize(interp) ||
+             may_inline_concrete_result(concrete_eval_result.const_result::ConcreteResult) ||
+             concrete_eval_result.rt === Bottom)) # unless this call deterministically throws and thus is non-inlineable
+            return concrete_eval_result
+        end
+        # TODO allow semi-concrete interp for this call?
+    end
+    mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv)
+    mi === nothing && return concrete_eval_result
+    if is_constprop_recursed(result, mi, sv)
+        add_remark!(interp, sv, "[constprop] Edge cycle encountered")
+        return nothing
+    end
+    # try semi-concrete evaluation
+    if eligibility === :semi_concrete_eval
+        irinterp_result = semi_concrete_eval_call(interp, mi, result, arginfo, sv)
+        if irinterp_result !== nothing
+            return irinterp_result
+        end
+    end
+    # try constant prop'
+    return const_prop_call(interp, mi, result, arginfo, sv, concrete_eval_result)
+end
+
+function bail_out_const_call(interp::AbstractInterpreter, result::MethodCallResult,
+                             si::StmtInfo, match::MethodMatch, sv::AbsIntState)
+    if !InferenceParams(interp).ipo_constant_propagation
+        add_remark!(interp, sv, "[constprop] Disabled by parameter")
+        return true
+    end
+    if is_no_constprop(match.method)
+        add_remark!(interp, sv, "[constprop] Disabled by method parameter")
+        return true
+    end
+    if is_removable_if_unused(result.effects)
+        if isa(result.rt, Const)
+            add_remark!(interp, sv, "[constprop] No more information to be gained (const)")
+            return true
+        elseif call_result_unused(si)
+            add_remark!(interp, sv, "[constprop] No more information to be gained (unused result)")
+            return true
+        end
+    end
+    if result.rt === Bottom
+        if is_terminates(result.effects) && is_effect_free(result.effects)
+            # In the future, we may want to add `&& isa(result.exct, Const)` to
+            # the list of conditions here, but currently, our effect system isn't
+            # precise enough to let us determine :consistency of `exct`, so we
+            # would have to force constprop just to determine this, which is too
+            # expensive.
+            add_remark!(interp, sv, "[constprop] No more information to be gained (bottom)")
+            return true
+        end
+    end
+    return false
+end
+
+function concrete_eval_eligible(interp::AbstractInterpreter,
+    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
+    (;effects) = result
+    if inbounds_option() === :off
+        if !is_nothrow(effects)
+            # Disable concrete evaluation in `--check-bounds=no` mode,
+            # unless it is known to not throw.
+            return :none
+        end
+    end
+    if result.edge !== nothing && is_foldable(effects, #=check_rtcall=#true)
+        if f !== nothing && is_all_const_arg(arginfo, #=start=#2)
+            if (is_nonoverlayed(interp) || is_nonoverlayed(effects) ||
+                # Even if overlay methods are involved, when `:consistent_overlay` is
+                # explicitly applied, we can still perform concrete evaluation using the
+                # original methods for executing them.
+                # While there's a chance that the non-overlayed counterparts may raise
+                # non-egal exceptions, it will not impact the compilation validity, since:
+                # - the results of the concrete evaluation will not be inlined
+                # - the exception types from the concrete evaluation will not be propagated
+                is_consistent_overlay(effects))
+                return :concrete_eval
+            end
+            # disable concrete-evaluation if this function call is tainted by some overlayed
+            # method since currently there is no easy way to execute overlayed methods
+            add_remark!(interp, sv, "[constprop] Concrete eval disabled for overlayed methods")
+        end
+        if !any_conditional(arginfo)
+            if may_optimize(interp)
+                return :semi_concrete_eval
+            else
+                # disable irinterp if optimization is disabled, since it requires optimized IR
+                add_remark!(interp, sv, "[constprop] Semi-concrete interpretation disabled for non-optimizing interpreter")
+            end
+        end
+    end
+    return :none
+end
+
+is_all_const_arg(arginfo::ArgInfo, start::Int) = is_all_const_arg(arginfo.argtypes, start::Int)
+function is_all_const_arg(argtypes::Vector{Any}, start::Int)
+    for i = start:length(argtypes)
+        argtype = widenslotwrapper(argtypes[i])
+        is_const_argtype(argtype) || return false
+    end
+    return true
+end
+
+is_const_argtype(@nospecialize argtype) = isa(argtype, Const) || isconstType(argtype) || issingletontype(argtype)
+
+any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes)
+any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes)
+
+collect_const_args(arginfo::ArgInfo, start::Int) = collect_const_args(arginfo.argtypes, start)
+function collect_const_args(argtypes::Vector{Any}, start::Int)
+    return Any[ let a = widenslotwrapper(argtypes[i])
+                    isa(a, Const) ? a.val :
+                    isconstType(a) ? a.parameters[1] :
+                    (a::DataType).instance
+                end for i = start:length(argtypes) ]
+end
+
+function concrete_eval_call(interp::AbstractInterpreter,
+    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, ::AbsIntState,
+    invokecall::Union{InvokeCall,Nothing}=nothing)
+    args = collect_const_args(arginfo, #=start=#2)
+    if invokecall !== nothing
+        # this call should be `invoke`d, rewrite `args` back now
+        pushfirst!(args, f, invokecall.types)
+        f = invoke
+    end
+    world = get_inference_world(interp)
+    edge = result.edge::CodeInstance
+    value = try
+        Core._call_in_world_total(world, f, args...)
+    catch
+        # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime.
+        # Howevever, at present, :consistency does not mandate the type of the exception
+        concrete_result = ConcreteResult(edge, result.effects)
+        return ConstCallResult(Bottom, Any, concrete_result, result.effects, #=const_edge=#nothing)
+    end
+    concrete_result = ConcreteResult(edge, EFFECTS_TOTAL, value)
+    return ConstCallResult(Const(value), Bottom, concrete_result, EFFECTS_TOTAL, #=const_edge=#nothing)
+end
+
+# check if there is a cycle and duplicated inference of `mi`
+function is_constprop_recursed(result::MethodCallResult, mi::MethodInstance, sv::AbsIntState)
+    result.edgecycle || return false
+    if result.edgelimited
+        return is_constprop_method_recursed(mi.def::Method, sv)
+    else
+        # if the type complexity limiting didn't decide to limit the call signature (as
+        # indicated by `result.edgelimited === false`), we can relax the cycle detection
+        # by comparing `MethodInstance`s and allow inference to propagate different
+        # constant elements if the recursion is finite over the lattice
+        return is_constprop_edge_recursed(mi, sv)
+    end
+end
+
+# if there's a possibility we could get a better result with these constant arguments
+# (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise
+function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
+    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
+    match::MethodMatch, sv::AbsIntState)
+    method = match.method
+    force = force_const_prop(interp, f, method)
+    if !const_prop_rettype_heuristic(interp, result, si, sv, force)
+        # N.B. remarks are emitted within `const_prop_rettype_heuristic`
+        return nothing
+    end
+    if !const_prop_argument_heuristic(interp, arginfo, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by argument heuristics")
+        return nothing
+    end
+    all_overridden = is_all_overridden(interp, arginfo, sv)
+    if !force && !const_prop_function_heuristic(interp, f, arginfo, all_overridden, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
+        return nothing
+    end
+    force |= all_overridden
+    mi = specialize_method(match; preexisting=!force)
+    if mi === nothing
+        add_remark!(interp, sv, "[constprop] Failed to specialize")
+        return nothing
+    end
+    mi = mi::MethodInstance
+    inf_result = result.call_result
+    inf_result = inf_result isa InferenceResult ? inf_result : nothing
+    if !force && !const_prop_methodinstance_heuristic(interp, inf_result, mi, arginfo, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
+        return nothing
+    end
+    return mi
+end
+
+function const_prop_rettype_heuristic(interp::AbstractInterpreter, result::MethodCallResult,
+                                      si::StmtInfo, sv::AbsIntState, force::Bool)
+    rt = result.rt
+    if rt isa LimitedAccuracy
+        # optimizations like inlining are disabled for limited frames,
+        # thus there won't be much benefit in constant-prop' here
+        # N.B. don't allow forced constprop' for safety (xref #52763)
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (limited accuracy)")
+        return false
+    elseif force
+        return true
+    elseif call_result_unused(si) && result.edgecycle
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (edgecycle with unused result)")
+        return false
+    end
+    # check if this return type is improvable (i.e. whether it's possible that with more
+    # information, we might get a more precise type)
+    if isa(rt, Type)
+        # could always be improved to `Const`, `PartialStruct` or just a more precise type,
+        # unless we're already at `Bottom`
+        if rt === Bottom
+            add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (erroneous result)")
+            return false
+        end
+        return true
+    elseif isa(rt, PartialStruct) || isa(rt, InterConditional) || isa(rt, InterMustAlias)
+        # could be improved to `Const` or a more precise wrapper
+        return true
+    elseif isa(rt, Const)
+        if is_nothrow(result.effects)
+            add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (nothrow const)")
+            return false
+        end
+        # Could still be improved to Bottom (or at least could see the effects improved)
+        return true
+    else
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (unimprovable result)")
+        return false
+    end
+end
+
+# determines heuristically whether if constant propagation can be worthwhile
+# by checking if any of given `argtypes` is "interesting" enough to be propagated
+function const_prop_argument_heuristic(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    argtypes = arginfo.argtypes
+    for i in 1:length(argtypes)
+        a = argtypes[i]
+        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && arginfo.fargs !== nothing
+            is_const_prop_profitable_conditional(a, arginfo.fargs, sv) && return true
+        else
+            a = widenslotwrapper(a)
+            has_nontrivial_extended_info(𝕃ᵢ, a) && is_const_prop_profitable_arg(𝕃ᵢ, a) && return true
+        end
+    end
+    return false
+end
+
+function is_const_prop_profitable_conditional(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
+    slotid = find_constrained_arg(cnd, fargs, sv)
+    if slotid !== nothing
+        return true
+    end
+    # as a minor optimization, we just check the result is a constant or not,
+    # since both `has_nontrivial_extended_info`/`is_const_prop_profitable_arg` return `true`
+    # for `Const(::Bool)`
+    return isa(widenconditional(cnd), Const)
+end
+
+function find_constrained_arg(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
+    slot = cnd.slot
+    for i in 1:length(fargs)
+        arg = ssa_def_slot(fargs[i], sv)
+        if isa(arg, SlotNumber) && slot_id(arg) == slot
+            return i
+        end
+    end
+    return nothing
+end
+
+# checks if all argtypes has additional information other than what `Type` can provide
+function is_all_overridden(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    for i in 1:length(argtypes)
+        a = argtypes[i]
+        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && fargs !== nothing
+            is_const_prop_profitable_conditional(a, fargs, sv) || return false
+        else
+            is_forwardable_argtype(𝕃ᵢ, widenslotwrapper(a)) || return false
+        end
+    end
+    return true
+end
+
+function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method::Method)
+    return is_aggressive_constprop(method) ||
+           InferenceParams(interp).aggressive_constant_propagation ||
+           typename(typeof(f)).constprop_heuristic === Core.FORCE_CONST_PROP
+end
+
+function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f),
+    arginfo::ArgInfo, all_overridden::Bool, sv::AbsIntState)
+    argtypes = arginfo.argtypes
+    heuristic = typename(typeof(f)).constprop_heuristic
+    if length(argtypes) > 1
+        𝕃ᵢ = typeinf_lattice(interp)
+        if heuristic === Core.ARRAY_INDEX_HEURISTIC
+            arrty = argtypes[2]
+            # don't propagate constant index into indexing of non-constant array
+            if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
+                # For static arrays, allow the constprop if we could possibly
+                # deduce nothrow as a result.
+                still_nothrow = isa(sv, InferenceState) ? is_nothrow(sv.ipo_effects) : false
+                if !still_nothrow || ismutabletype(arrty)
+                    return false
+                end
+            elseif ⊑(𝕃ᵢ, arrty, Array) || ⊑(𝕃ᵢ, arrty, GenericMemory)
+                return false
+            end
+        elseif heuristic === Core.ITERATE_HEURISTIC
+            itrty = argtypes[2]
+            if ⊑(𝕃ᵢ, itrty, Array) || ⊑(𝕃ᵢ, itrty, GenericMemory)
+                return false
+            end
+        end
+    end
+    if !all_overridden && heuristic === Core.SAMETYPE_HEURISTIC
+        # it is almost useless to inline the op when all the same type,
+        # but highly worthwhile to inline promote of a constant
+        length(argtypes) > 2 || return false
+        t1 = widenconst(argtypes[2])
+        for i in 3:length(argtypes)
+            at = argtypes[i]
+            ty = isvarargtype(at) ? unwraptv(at) : widenconst(at)
+            if ty !== t1
+                return true
+            end
+        end
+        return false
+    end
+    return true
+end
+
+# This is a heuristic to avoid trying to const prop through complicated functions
+# where we would spend a lot of time, but are probably unlikely to get an improved
+# result anyway.
+function const_prop_methodinstance_heuristic(interp::AbstractInterpreter,
+    inf_result::Union{InferenceResult,Nothing}, mi::MethodInstance, arginfo::ArgInfo, sv::AbsIntState)
+    method = mi.def::Method
+    if method.is_for_opaque_closure
+        # Not inlining an opaque closure can be very expensive, so be generous
+        # with the const-prop-ability. It is quite possible that we can't infer
+        # anything at all without const-propping, so the inlining check below
+        # isn't particularly helpful here.
+        return true
+    end
+    # now check if the source of this method instance is inlineable, since the extended type
+    # information we have here would be discarded if it is not inlined into a callee context
+    # (modulo the inferred return type that can be potentially refined)
+    if is_declared_inline(method)
+        # this method is declared as `@inline` and will be inlined
+        return true
+    end
+    flag = get_curr_ssaflag(sv)
+    if is_stmt_inline(flag)
+        # force constant propagation for a call that is going to be inlined
+        # since the inliner will try to find this constant result
+        # if these constant arguments arrive there
+        return true
+    elseif is_stmt_noinline(flag)
+        # this call won't be inlined, thus this constant-prop' will most likely be unfruitful
+        return false
+    else
+        # Peek at the inferred result for the method to determine if the optimizer
+        # was able to cut it down to something simple (inlineable in particular).
+        # If so, there will be a good chance we might be able to const prop
+        # all the way through and learn something new.
+        if inf_result isa InferenceResult
+            inferred = inf_result.src
+            # TODO propagate a specific `CallInfo` that conveys information about this call
+            if src_inlining_policy(interp, mi, inferred, NoCallInfo(), IR_FLAG_NULL)
+                return true
+            end
+        end
+    end
+    return false # the cache isn't inlineable, so this constant-prop' will most likely be unfruitful
+end
+
+function semi_concrete_eval_call(interp::AbstractInterpreter,
+    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
+    call_result = result.call_result
+    call_result isa InferenceResult || return nothing
+    codeinst = call_result.ci
+    codeinst isa CodeInstance || return nothing
+    inferred = call_result.src
+    src_inlining_policy(interp, mi, inferred, NoCallInfo(), IR_FLAG_NULL) || return nothing # hack to work-around test failures caused by #58183 until both it and #48913 are fixed
+    irsv = IRInterpretationState(interp, codeinst, mi, arginfo.argtypes, inferred)
+    irsv === nothing && return nothing
+    assign_parentchild!(irsv, sv)
+    rt, (nothrow, noub) = ir_abstract_constant_propagation(interp, irsv)
+    @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp"
+    if !(isa(rt, Type) && hasintersect(rt, Bool))
+        ir = irsv.ir
+        # TODO (#48913) enable double inlining pass when there are any calls
+        # that are newly resolved by irinterp
+        # state = InliningState(interp)
+        # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv))
+        effects = result.effects
+        if nothrow
+            effects = Effects(effects; nothrow=true)
+        end
+        if noub
+            effects = Effects(effects; noub=ALWAYS_TRUE)
+        end
+        exct = refine_exception_type(result.exct, effects)
+        # TODO: SemiConcreteResult fails to preserve the ci_as_edge value
+        semi_concrete_result = SemiConcreteResult(codeinst, ir, effects, spec_info(irsv))
+        const_edge = nothing # TODO use the edges from irsv?
+        return ConstCallResult(rt, exct, semi_concrete_result, effects, const_edge)
+    end
+    nothing
+end
+
+function const_prop_result(inf_result::InferenceResult)
+    @assert isdefined(inf_result, :ci_as_edge) "InferenceResult without ci_as_edge"
+    return ConstCallResult(inf_result.result, inf_result.exc_result, inf_result,
+                           inf_result.ipo_effects, inf_result.ci_as_edge)
+end
+
+# return cached result of constant analysis
+return_localcache_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) =
+    const_prop_result(inf_result)
+
+function compute_forwarded_argtypes(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    return has_conditional(𝕃ᵢ, sv) ? ConditionalSimpleArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes)
+end
+
+function const_prop_call(interp::AbstractInterpreter,
+    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState,
+    concrete_eval_result::Union{Nothing,ConstCallResult}=nothing)
+    inf_cache = get_inference_cache(interp)
+    𝕃ᵢ = typeinf_lattice(interp)
+    forwarded_argtypes = compute_forwarded_argtypes(interp, arginfo, sv)
+    # use `cache_argtypes` that has been constructed for fresh regular inference if available
+    call_result = result.call_result
+    if call_result isa InferenceResult
+        cache_argtypes = call_result.argtypes
+    else
+        cache_argtypes = matching_cache_argtypes(𝕃ᵢ, mi)
+    end
+    argtypes = matching_cache_argtypes(𝕃ᵢ, mi, forwarded_argtypes, cache_argtypes)
+    inf_result = constprop_cache_lookup(𝕃ᵢ, mi, argtypes, inf_cache)
+    if inf_result !== nothing
+        # found the cache for this constant prop'
+        if inf_result.result === nothing
+            add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle")
+            return nothing
+        end
+        @assert inf_result.linfo === mi "MethodInstance for cached inference result does not match"
+        return return_localcache_result(interp, inf_result, sv)
+    end
+    overridden_by_const = falses(length(argtypes))
+    for i = 1:length(argtypes)
+        if argtypes[i] !== argtype_by_index(cache_argtypes, i)
+            overridden_by_const[i] = true
+        end
+    end
+    if !any(overridden_by_const)
+        add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
+        return nothing
+    end
+    # perform fresh constant prop'
+    inf_result = InferenceResult(mi, argtypes, overridden_by_const)
+    frame = InferenceState(inf_result, #=cache_mode=#:local, interp) # TODO: this should also be converted to a stackless Future
+    if frame === nothing
+        add_remark!(interp, sv, "[constprop] Could not retrieve the source")
+        return nothing # this is probably a bad generated function (unsound), but just ignore it
+    end
+    assign_parentchild!(frame, sv)
+    if !typeinf(interp, frame)
+        sv.time_caches += frame.time_caches
+        sv.time_paused += frame.time_paused
+        add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
+        @assert frame.frameid != 0 && frame.cycleid == frame.frameid
+        callstack = frame.callstack::Vector{AbsIntState}
+        @assert callstack[end] === frame && length(callstack) == frame.frameid
+        pop!(callstack)
+        # add to the cache to record that this will always fail
+        push!(get_inference_cache(interp), inf_result)
+        return nothing
+    end
+    existing_edge = result.edge
+    inf_result.ci_as_edge = codeinst_as_edge(interp, frame, existing_edge)
+    @assert frame.frameid != 0 && frame.cycleid == frame.frameid
+    @assert frame.parentid == sv.frameid
+    @assert inf_result.result !== nothing
+    # ConditionalSimpleArgtypes is allowed, because the only case in which it modifies
+    # the argtypes is when one of the argtypes is a `Conditional`, which case
+    # concrete_eval_result will not be available.
+    if concrete_eval_result !== nothing && isa(forwarded_argtypes, Union{SimpleArgtypes, ConditionalSimpleArgtypes})
+        # override return type and effects with concrete evaluation result if available
+        inf_result.result = concrete_eval_result.rt
+        inf_result.ipo_effects = concrete_eval_result.effects
+    end
+    return const_prop_result(inf_result)
+end
+
+# TODO implement MustAlias forwarding
+
+struct ConditionalSimpleArgtypes
+    arginfo::ArgInfo
+    sv::InferenceState
+end
+
+function matching_cache_argtypes(𝕃::AbstractLattice, mi::MethodInstance,
+                                 conditional_argtypes::ConditionalSimpleArgtypes,
+                                 cache_argtypes::Vector{Any})
+    (; arginfo, sv) = conditional_argtypes
+    (; fargs, argtypes) = arginfo
+    given_argtypes = Vector{Any}(undef, length(argtypes))
+    def = mi.def::Method
+    nargs = Int(def.nargs)
+    for i in 1:length(argtypes)
+        argtype = argtypes[i]
+        # forward `Conditional` if it conveys a constraint on any other argument
+        if isa(argtype, Conditional) && fargs !== nothing
+            cnd = argtype
+            slotid = find_constrained_arg(cnd, fargs, sv)
+            if slotid !== nothing
+                # using union-split signature, we may be able to narrow down `Conditional`
+                sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid])
+                ⊓ = meet(𝕃)
+                thentype = cnd.thentype ⊓ sigt
+                elsetype = cnd.elsetype ⊓ sigt
+                if thentype === Bottom && elsetype === Bottom
+                    # we accidentally proved this method match is impossible
+                    # TODO bail out here immediately rather than just propagating Bottom ?
+                    given_argtypes[i] = Bottom
+                else
+                    given_argtypes[i] = Conditional(slotid, thentype, elsetype)
+                end
+                continue
+            end
+        end
+        given_argtypes[i] = widenslotwrapper(argtype)
+    end
+    return pick_const_args!(𝕃, given_argtypes, cache_argtypes)
+end
+
+# This is only for use with `Conditional`.
+# In general, usage of this is wrong.
+function ssa_def_slot(@nospecialize(arg), sv::InferenceState)
+    code = sv.src.code
+    init = sv.currpc
+    while isa(arg, SSAValue)
+        init = arg.id
+        arg = code[init]
+    end
+    if arg isa SlotNumber
+        # found this kind of pattern:
+        # %init = SlotNumber(x)
+        # [...]
+        # goto if not isa(%init, T)
+        # now conservatively make sure there isn't potentially another conflicting assignment
+        # to the same slot between the def and usage
+        # we can assume the IR is sorted, since the front-end only creates SSA values in order
+        for i = init:(sv.currpc-1)
+            e = code[i]
+            if isexpr(e, :(=)) && e.args[1] === arg
+                return nothing
+            end
+        end
+    else
+        # there might still be the following kind of pattern (see #45499):
+        # %init = ...
+        # [...]
+        # SlotNumber(x) = %init
+        # [...]
+        # goto if not isa(%init, T)
+        # let's check if there is a slot assigned to the def SSA value but also there isn't
+        # any potentially conflicting assignment to the same slot
+        arg = nothing
+        def = SSAValue(init)
+        for i = (init+1):(sv.currpc-1)
+            e = code[i]
+            if isexpr(e, :(=))
+                lhs = e.args[1]
+                if isa(lhs, SlotNumber)
+                    lhs === arg && return nothing
+                    rhs = e.args[2]
+                    if rhs === def
+                        arg = lhs
+                    end
+                end
+            end
+        end
+    end
+    return arg
+end
+
+# No slots in irinterp
+ssa_def_slot(@nospecialize(arg), ::IRInterpretationState) = nothing
+
+struct AbstractIterationResult
+    cti::Vector{Any}
+    info::MaybeAbstractIterationInfo
+    ai_effects::Effects
+end
+AbstractIterationResult(cti::Vector{Any}, info::MaybeAbstractIterationInfo) =
+    AbstractIterationResult(cti, info, EFFECTS_TOTAL)
+
+# `typ` is the inferred type for expression `arg`.
+# if the expression constructs a container (e.g. `svec(x,y,z)`),
+# refine its type to an array of element types.
+# Union of Tuples of the same length is converted to Tuple of Unions.
+# returns an array of types
+function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(typ),
+                                sv::AbsIntState)
+    if isa(typ, PartialStruct)
+        widet = typ.typ
+        if isa(widet, DataType)
+            if widet.name === Tuple.name
+                return Future(AbstractIterationResult(typ.fields, nothing))
+            elseif widet.name === _NAMEDTUPLE_NAME
+                return Future(AbstractIterationResult(typ.fields, nothing))
+            end
+        end
+    end
+
+    if isa(typ, Const)
+        val = typ.val
+        if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple)
+            return Future(AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing)) # avoid making a tuple Generator here!
+        end
+    end
+
+    tti0 = widenconst(typ)
+    tti = unwrap_unionall(tti0)
+    if isa(tti, DataType) && tti.name === _NAMEDTUPLE_NAME
+        # A NamedTuple iteration is the same as the iteration of its Tuple parameter:
+        # compute a new `tti == unwrap_unionall(tti0)` based on that Tuple type
+        tti = unwraptv(tti.parameters[2])
+        tti0 = rewrap_unionall(tti, tti0)
+    end
+    if isa(tti, Union)
+        utis = uniontypes(tti)
+        # refine the Union to remove elements that are not valid tags for objects
+        filter!(@nospecialize(x) -> valid_as_lattice(x, true), utis)
+        if length(utis) == 0
+            return Future(AbstractIterationResult(Any[], nothing)) # oops, this statement was actually unreachable
+        elseif length(utis) == 1
+            tti = utis[1]
+            tti0 = rewrap_unionall(tti, tti0)
+        else
+            if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
+                return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
+            end
+            ltp = length((utis[1]::DataType).parameters)
+            for t in utis
+                if length((t::DataType).parameters) != ltp
+                    return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
+                end
+            end
+            result = Any[ Union{} for _ in 1:ltp ]
+            for t in utis
+                tps = (t::DataType).parameters
+                for j in 1:ltp
+                    @assert valid_as_lattice(tps[j], true)
+                    result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
+                end
+            end
+            return Future(AbstractIterationResult(result, nothing))
+        end
+    end
+    if tti0 <: Tuple
+        if isa(tti0, DataType)
+            return Future(AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing))
+        elseif !isa(tti, DataType)
+            return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
+        else
+            len = length(tti.parameters)
+            last = tti.parameters[len]
+            va = isvarargtype(last)
+            elts = Any[ fieldtype(tti0, i) for i = 1:len ]
+            if va
+                if elts[len] === Union{}
+                    pop!(elts)
+                else
+                    elts[len] = Vararg{elts[len]}
+                end
+            end
+            return Future(AbstractIterationResult(elts, nothing))
+        end
+    elseif tti0 === SimpleVector
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
+    elseif tti0 === Any
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
+    elseif tti0 <: Array || tti0 <: GenericMemory
+        if eltype(tti0) === Union{}
+            return Future(AbstractIterationResult(Any[], nothing))
+        end
+        return Future(AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing))
+    else
+        return abstract_iteration(interp, itft, typ, sv)
+    end
+end
+
+# simulate iteration protocol on container type up to fixpoint
+function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(itertype), sv::AbsIntState)
+    if isa(itft, Const)
+        iteratef = itft.val
+    else
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
+    end
+    @assert !isvarargtype(itertype)
+
+    iterateresult = Future{AbstractIterationResult}()
+    call1future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true, false), sv)::Future
+    function inferiterate(interp, sv)
+        call1 = call1future[]
+        stateordonet = call1.rt
+        # Return Bottom if this is not an iterator.
+        # WARNING: Changes to the iteration protocol must be reflected here,
+        # this is not just an optimization.
+        # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
+        if stateordonet === Bottom
+            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call1.effects, call1.info)], true))
+            return true
+        end
+        stateordonet_widened = widenconst(stateordonet)
+        calls = CallMeta[call1]
+        valtype = statetype = Bottom
+        ret = Any[]
+        𝕃ᵢ = typeinf_lattice(interp)
+        may_have_terminated = false
+        local call2future::Future{CallMeta}
+
+        nextstate::UInt8 = 0x0
+        function inferiterate_2arg(interp, sv)
+            if nextstate === 0x1
+                nextstate = 0xff
+                @goto state1
+            elseif nextstate === 0x2
+                nextstate = 0xff
+                @goto state2
+            else
+                @assert nextstate === 0x0
+                nextstate = 0xff
+            end
+
+            # Try to unroll the iteration up to max_tuple_splat, which covers any finite
+            # length iterators, or interesting prefix
+            while true
+                if stateordonet_widened === Nothing
+                    iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
+                    return true
+                end
+                if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
+                    break
+                end
+                if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
+                    break
+                end
+                nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
+                # If there's no new information in this statetype, don't bother continuing,
+                # the iterator won't be finite.
+                if ⊑(𝕃ᵢ, nstatetype, statetype)
+                    iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
+                    return true
+                end
+                valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
+                push!(ret, valtype)
+                statetype = nstatetype
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true, false), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x1
+                    return false
+                    @label state1
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
+                end
+            end
+            # From here on, we start asking for results on the widened types, rather than
+            # the precise (potentially const) state type
+            # statetype and valtype are reinitialized in the first iteration below from the
+            # (widened) stateordonet, which has not yet been fully analyzed in the loop above
+            valtype = statetype = Bottom
+            may_have_terminated = Nothing <: stateordonet_widened
+            while valtype !== Any
+                nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
+                if nounion !== Union{} && !isa(nounion, DataType)
+                    # nounion is of a type we cannot handle
+                    valtype = Any
+                    break
+                end
+                if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
+                    # reached a fixpoint or iterator failed/gave invalid answer
+                    if !hasintersect(stateordonet_widened, Nothing)
+                        # ... but cannot terminate
+                        if may_have_terminated
+                            # ... and iterator may have terminated prior to this loop, but not during it
+                            valtype = Bottom
+                        else
+                            #  ... or cannot have terminated prior to this loop
+                            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
+                            return true
+                        end
+                    end
+                    break
+                end
+                valtype = tmerge(valtype, nounion.parameters[1])
+                statetype = tmerge(statetype, nounion.parameters[2])
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true, false), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x2
+                    return false
+                    @label state2
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
+                end
+            end
+            if valtype !== Union{}
+                push!(ret, Vararg{valtype})
+            end
+            iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
+            return true
+        end # function inferiterate_2arg
+        # continue making progress as much as possible, on iterate(arg, state)
+        inferiterate_2arg(interp, sv) || push!(sv.tasks, inferiterate_2arg)
+        return true
+    end # inferiterate
+    # continue making progress as soon as possible, on iterate(arg)
+    if !(isready(call1future) && inferiterate(interp, sv))
+        push!(sv.tasks, inferiterate)
+    end
+    return iterateresult
+end
+
+# do apply(af, fargs...), where af is a function value
+function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo,
+                        sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv))
+    itft = Core.Box(argtype_by_index(argtypes, 2))
+    aft = argtype_by_index(argtypes, 3)
+    (itft.contents === Bottom || aft === Bottom) && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+    aargtypes = argtype_tail(argtypes, 4)
+    aftw = widenconst(aft)
+    if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
+        if !isconcretetype(aftw) || (aftw <: Builtin)
+            add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
+            # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
+            # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
+            return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+        end
+    end
+    res = Union{}
+    splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum
+    ctypes::Vector{Vector{Any}} = [Any[aft]]
+    infos::Vector{Vector{MaybeAbstractIterationInfo}} = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
+    all_effects::Effects = EFFECTS_TOTAL
+    retinfos = ApplyCallInfo[]
+    retinfo = UnionSplitApplyCallInfo(retinfos)
+    exctype = Union{}
+    ctypes´::Vector{Vector{Any}} = Vector{Any}[]
+    infos´::Vector{Vector{MaybeAbstractIterationInfo}} = Vector{MaybeAbstractIterationInfo}[]
+    local ti, argtypesi
+    local ctfuture::Future{AbstractIterationResult}
+    local callfuture::Future{CallMeta}
+
+    applyresult = Future{CallMeta}()
+    # split the rest into a resumable state machine
+    i::Int = 1
+    j::Int = 1
+    nextstate::UInt8 = 0x0
+    function infercalls(interp, sv)
+        # n.b. Remember that variables will lose their values across restarts,
+        # so be sure to manually hoist any values that must be preserved and do
+        # not rely on program order.
+        # This is a little more complex than the closure continuations often used elsewhere, but avoids needing to manage all of that indentation
+        if nextstate === 0x1
+            nextstate = 0xff
+            @goto state1
+        elseif nextstate === 0x2
+            nextstate = 0xff
+            @goto state2
+        elseif nextstate === 0x3
+            nextstate = 0xff
+            @goto state3
+        else
+            @assert nextstate === 0x0
+            nextstate = 0xff
+        end
+        while i <= length(aargtypes)
+            argtypesi = (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
+            i += 1
+            j = 1
+            while j <= length(argtypesi)
+                ti = argtypesi[j]
+                j += 1
+                if !isvarargtype(ti)
+                    ctfuture = precise_container_type(interp, itft.contents, ti, sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x1
+                        return false
+                        @label state1
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                else
+                    ctfuture = precise_container_type(interp, itft.contents, unwrapva(ti), sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x2
+                        return false
+                        @label state2
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                    # We can't represent a repeating sequence of the same types,
+                    # so tmerge everything together to get one type that represents
+                    # everything.
+                    argt = cti[end]
+                    if isvarargtype(argt)
+                        argt = unwrapva(argt)
+                    end
+                    for k in 1:(length(cti)-1)
+                        argt = tmerge(argt, cti[k])
+                    end
+                    cti = Any[Vararg{argt}]
+                end
+                all_effects = merge_effects(all_effects, ai_effects)
+                if info !== nothing
+                    for call in info.each
+                        all_effects = merge_effects(all_effects, call.effects)
+                    end
+                end
+                if any(@nospecialize(t) -> t === Bottom, cti)
+                    continue
+                end
+                for k = 1:length(ctypes)
+                    ct = ctypes[k]
+                    if isvarargtype(ct[end])
+                        # This is vararg, we're not gonna be able to do any inlining,
+                        # drop the info
+                        info = nothing
+                        tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
+                        push!(ctypes´, push!(ct[1:(end - 1)], tail))
+                    else
+                        push!(ctypes´, append!(ct[:], cti))
+                    end
+                    push!(infos´, push!(copy(infos[k]), info))
+                end
+            end
+            # swap for the new array and empty the temporary one
+            ctypes´, ctypes = ctypes, ctypes´
+            infos´, infos = infos, infos´
+            empty!(ctypes´)
+            empty!(infos´)
+        end
+        all_effects.nothrow || (exctype = Any)
+
+        i = 1
+        while i <= length(ctypes)
+            ct = ctypes[i]
+            if bail_out_apply(interp, InferenceLoopState(res, all_effects), sv)
+                add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information: bailing on analysis of more methods.")
+                # there is unanalyzed candidate, widen type and effects to the top
+                let retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+                    applyresult[] = CallMeta(Any, Any, Effects(), retinfo)
+                    return true
+                end
+            end
+            lct = length(ct)
+            # truncate argument list at the first Vararg
+            for k = 1:lct-1
+                cti = ct[k]
+                if isvarargtype(cti)
+                    ct[k] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(k+1):lct])
+                    resize!(ct, k)
+                    break
+                end
+            end
+            callfuture = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)::Future
+            if !isready(callfuture)
+                nextstate = 0x3
+                return false
+                @label state3
+            end
+            let (; info, rt, exct, effects) = callfuture[]
+                push!(retinfos, ApplyCallInfo(info, infos[i]))
+                res = tmerge(typeinf_lattice(interp), res, rt)
+                exctype = tmerge(typeinf_lattice(interp), exctype, exct)
+                all_effects = merge_effects(all_effects, effects)
+            end
+            i += 1
+        end
+        # TODO: Add a special info type to capture all the iteration info.
+        # For now, only propagate info if we don't also union-split the iteration
+        applyresult[] = CallMeta(res, exctype, all_effects, retinfo)
+        return true
+    end # function infercalls
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return applyresult
+end
+
+function argtype_by_index(argtypes::Vector{Any}, i::Int)
+    n = length(argtypes)
+    na = argtypes[n]
+    if isvarargtype(na)
+        return i >= n ? unwrapva(na) : argtypes[i]
+    else
+        return i > n ? Bottom : argtypes[i]
+    end
+end
+
+function argtype_tail(argtypes::Vector{Any}, i::Int)
+    n = length(argtypes)
+    if isvarargtype(argtypes[n]) && i > n
+        i = n
+    end
+    return argtypes[i:n]
+end
+
+struct ConditionalTypes
+    thentype
+    elsetype
+    ConditionalTypes(thentype, elsetype) = (@nospecialize; new(thentype, elsetype))
+end
+
+@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int,
+    @nospecialize(rt))
+    if isa(rt, Const)
+        xt = widenslotwrapper(xt)
+        if rt.val === false
+            return ConditionalTypes(Bottom, xt)
+        elseif rt.val === true
+            return ConditionalTypes(xt, Bottom)
+        end
+    end
+    return isa_condition(xt, ty, max_union_splitting)
+end
+@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int)
+    tty_ub, isexact_tty = instanceof_tfunc(ty, true)
+    tty = widenconst(xt)
+    if isexact_tty && !isa(tty_ub, TypeVar)
+        tty_lb = tty_ub # TODO: this would be wrong if !isexact_tty, but instanceof_tfunc doesn't preserve this info
+        if !has_free_typevars(tty_lb) && !has_free_typevars(tty_ub)
+            thentype = typeintersect(tty, tty_ub)
+            if iskindtype(tty_ub) && thentype !== Bottom
+                # `typeintersect` may be unable narrow down `Type`-type
+                thentype = tty_ub
+            end
+            valid_as_lattice(thentype, true) || (thentype = Bottom)
+            elsetype = typesubtract(tty, tty_lb, max_union_splitting)
+            return ConditionalTypes(thentype, elsetype)
+        end
+    end
+    return nothing
+end
+
+@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int,
+    @nospecialize(rt))
+    thentype = c
+    elsetype = widenslotwrapper(xt)
+    if rt === Const(false)
+        thentype = Bottom
+    elseif rt === Const(true)
+        elsetype = Bottom
+    elseif elsetype isa Type && issingletontype(typeof(c.val)) # can only widen a if it is a singleton
+        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
+    end
+    return ConditionalTypes(thentype, elsetype)
+end
+@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int)
+    thentype = c
+    elsetype = widenslotwrapper(xt)
+    if elsetype isa Type && issingletontype(typeof(c.val)) # can only widen a if it is a singleton
+        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
+    end
+    return ConditionalTypes(thentype, elsetype)
+end
+
+function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo,
+                               sv::AbsIntState)
+    @nospecialize f
+    la = length(argtypes)
+    𝕃ᵢ = typeinf_lattice(interp)
+    ⊑, ⊏, ⊔, ⊓ = partialorder(𝕃ᵢ), strictpartialorder(𝕃ᵢ), join(𝕃ᵢ), meet(𝕃ᵢ)
+    if has_conditional(𝕃ᵢ, sv) && f === Core.ifelse && fargs isa Vector{Any} && la == 4
+        cnd = argtypes[2]
+        if isa(cnd, Conditional)
+            newcnd = widenconditional(cnd)
+            tx = argtypes[3]
+            ty = argtypes[4]
+            if isa(newcnd, Const)
+                # if `cnd` is constant, we should just respect its constantness to keep inference accuracy
+                return newcnd.val::Bool ? tx : ty
+            else
+                # try to simulate this as a real conditional (`cnd ? x : y`), so that the penalty for using `ifelse` instead isn't too high
+                a = ssa_def_slot(fargs[3], sv)
+                b = ssa_def_slot(fargs[4], sv)
+                if isa(a, SlotNumber) && cnd.slot == slot_id(a)
+                    tx = (cnd.thentype ⊑ tx ? cnd.thentype : tx ⊓ widenconst(cnd.thentype))
+                end
+                if isa(b, SlotNumber) && cnd.slot == slot_id(b)
+                    ty = (cnd.elsetype ⊑ ty ? cnd.elsetype : ty ⊓ widenconst(cnd.elsetype))
+                end
+                return tx ⊔ ty
+            end
+        end
+    end
+    ft = popfirst!(argtypes)
+    rt = builtin_tfunction(interp, f, argtypes, sv)
+    pushfirst!(argtypes, ft)
+    if has_mustalias(𝕃ᵢ) && f === getfield && isa(fargs, Vector{Any}) && la ≥ 3
+        a3 = argtypes[3]
+        if isa(a3, Const)
+            if rt !== Bottom && !isalreadyconst(rt)
+                var = ssa_def_slot(fargs[2], sv)
+                if isa(var, SlotNumber)
+                    vartyp = widenslotwrapper(argtypes[2])
+                    fldidx = maybe_const_fldidx(vartyp, a3.val)
+                    if fldidx !== nothing
+                        # wrap this aliasable field into `MustAlias` for possible constraint propagations
+                        return MustAlias(var, vartyp, fldidx, rt)
+                    end
+                end
+            end
+        end
+    elseif has_conditional(𝕃ᵢ, sv) && (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
+        # perform very limited back-propagation of type information for `is` and `isa`
+        if f === isa
+            # try splitting value argument, based on types
+            a = ssa_def_slot(fargs[2], sv)
+            a2 = argtypes[2]
+            a3 = argtypes[3]
+            if isa(a, SlotNumber)
+                cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting, rt)
+                if cndt !== nothing
+                    return Conditional(a, cndt.thentype, cndt.elsetype)
+                end
+            end
+            if isa(a2, MustAlias)
+                if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                    cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting)
+                    if cndt !== nothing
+                        return form_mustalias_conditional(a2, cndt.thentype, cndt.elsetype)
+                    end
+                end
+            end
+            # try splitting type argument, based on value
+            if isdispatchelem(widenconst(a2)) && a3 isa Union && !has_free_typevars(a3) && !isa(rt, Const)
+                b = ssa_def_slot(fargs[3], sv)
+                if isa(b, SlotNumber)
+                    # !(x isa T) implies !(Type{a2} <: T)
+                    # TODO: complete splitting, based on which portions of the Union a3 for which isa_tfunc returns Const(true) or Const(false) instead of Bool
+                    elsetype = typesubtract(a3, Type{widenconst(a2)}, InferenceParams(interp).max_union_splitting)
+                    return Conditional(b, a3, elsetype)
+                end
+            end
+        elseif f === (===)
+            a = ssa_def_slot(fargs[2], sv)
+            b = ssa_def_slot(fargs[3], sv)
+            aty = argtypes[2]
+            bty = argtypes[3]
+            # if doing a comparison to a singleton, consider returning a `Conditional` instead
+            if isa(aty, Const)
+                if isa(b, SlotNumber)
+                    cndt = egal_condition(aty, bty, InferenceParams(interp).max_union_splitting, rt)
+                    return Conditional(b, cndt.thentype, cndt.elsetype)
+                elseif isa(bty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                    cndt = egal_condition(aty, bty.fldtyp, InferenceParams(interp).max_union_splitting)
+                    return form_mustalias_conditional(bty, cndt.thentype, cndt.elsetype)
+                end
+            elseif isa(bty, Const)
+                if isa(a, SlotNumber)
+                    cndt = egal_condition(bty, aty, InferenceParams(interp).max_union_splitting, rt)
+                    return Conditional(a, cndt.thentype, cndt.elsetype)
+                elseif isa(aty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                    cndt = egal_condition(bty, aty.fldtyp, InferenceParams(interp).max_union_splitting)
+                    return form_mustalias_conditional(aty, cndt.thentype, cndt.elsetype)
+                end
+            end
+            # TODO enable multiple constraints propagation here, there are two possible improvements:
+            # 1. propagate constraints for both lhs and rhs
+            # 2. we can propagate both constraints on aliased fields and slots
+            # As for 2, for now, we prioritize constraints on aliased fields, since currently
+            # different slots that represent the same object can't share same field constraint,
+            # and thus binding `MustAlias` to the other slot is less likely useful
+            if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                if isa(bty, MustAlias)
+                    thentype = widenslotwrapper(aty)
+                    elsetype = bty.fldtyp
+                    if thentype ⊏ elsetype
+                        return form_mustalias_conditional(bty, thentype, elsetype)
+                    end
+                elseif isa(aty, MustAlias)
+                    thentype = widenslotwrapper(bty)
+                    elsetype = aty.fldtyp
+                    if thentype ⊏ elsetype
+                        return form_mustalias_conditional(aty, thentype, elsetype)
+                    end
+                end
+            end
+            # narrow the lattice slightly (noting the dependency on one of the slots), to promote more effective smerge
+            if isa(b, SlotNumber)
+                thentype = rt === Const(false) ? Bottom : widenslotwrapper(bty)
+                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(bty)
+                return Conditional(b, thentype, elsetype)
+            elseif isa(a, SlotNumber)
+                thentype = rt === Const(false) ? Bottom : widenslotwrapper(aty)
+                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(aty)
+                return Conditional(a, thentype, elsetype)
+            end
+        elseif f === Core.Intrinsics.not_int
+            aty = argtypes[2]
+            if isa(aty, Conditional)
+                thentype = rt === Const(false) ? Bottom : aty.elsetype
+                elsetype = rt === Const(true)  ? Bottom : aty.thentype
+                return Conditional(aty.slot, thentype, elsetype)
+            end
+        elseif f === isdefined
+            a = ssa_def_slot(fargs[2], sv)
+            if isa(a, SlotNumber)
+                argtype2 = argtypes[2]
+                if isa(argtype2, Union)
+                    fld = argtypes[3]
+                    thentype = Bottom
+                    elsetype = Bottom
+                    for ty in uniontypes(argtype2)
+                        cnd = isdefined_tfunc(𝕃ᵢ, ty, fld)
+                        if isa(cnd, Const)
+                            if cnd.val::Bool
+                                thentype = thentype ⊔ ty
+                            else
+                                elsetype = elsetype ⊔ ty
+                            end
+                        else
+                            thentype = thentype ⊔ ty
+                            elsetype = elsetype ⊔ ty
+                        end
+                    end
+                    return Conditional(a, thentype, elsetype)
+                else
+                    thentype = form_partially_defined_struct(𝕃ᵢ, argtype2, argtypes[3])
+                    if thentype !== nothing
+                        elsetype = widenslotwrapper(argtype2)
+                        if rt === Const(false)
+                            thentype = Bottom
+                        elseif rt === Const(true)
+                            elsetype = Bottom
+                        end
+                        return Conditional(a, thentype, elsetype)
+                    end
+                end
+            end
+        end
+    end
+    @assert !isa(rt, TypeVar) "unhandled TypeVar"
+    return rt
+end
+
+function form_partially_defined_struct(𝕃ᵢ::AbstractLattice, @nospecialize(obj), @nospecialize(name))
+    obj isa Const && return nothing # nothing to refine
+    name isa Const || return nothing
+    objt0 = widenconst(obj)
+    objt = unwrap_unionall(objt0)
+    objt isa DataType || return nothing
+    isabstracttype(objt) && return nothing
+    objt <: Tuple && return nothing
+    fldidx = try_compute_fieldidx(objt, name.val)
+    fldidx === nothing && return nothing
+    if isa(obj, PartialStruct)
+        _getundefs(obj)[fldidx] === false && return nothing
+        newundefs = copy(_getundefs(obj))
+        newundefs[fldidx] = false
+        return PartialStruct(𝕃ᵢ, obj.typ, newundefs, copy(obj.fields))
+    end
+    nminfld = datatype_min_ninitialized(objt)
+    fldidx ≤ nminfld && return nothing
+    fldcnt = fieldcount_noerror(objt)::Int
+    fields = Any[fieldtype(objt0, i) for i = 1:fldcnt]
+    if fields[fldidx] === Union{}
+        return nothing # `Union{}` field never transitions to be defined
+    end
+    undefs = partialstruct_init_undefs(objt, fields)
+    if undefs === nothing
+        # this object never exists at runtime, avoid creating unprofitable `PartialStruct`
+        return nothing
+    end
+    undefs[fldidx] = false
+    return PartialStruct(𝕃ᵢ, objt0, undefs, fields)
+end
+
+function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{Any}, call::CallMeta)
+    na = length(argtypes)
+    if isvarargtype(argtypes[end])
+        if na ≤ 2
+            return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+        elseif na > 4
+            return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+        end
+        a2 = argtypes[2]
+        a3 = unwrapva(argtypes[3])
+        nothrow = false
+    elseif na == 3
+        a2 = argtypes[2]
+        a3 = argtypes[3]
+        ⊑ = partialorder(typeinf_lattice(interp))
+        nothrow = a2 ⊑ TypeVar && (a3 ⊑ Type || a3 ⊑ TypeVar)
+    else
+        return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    end
+    canconst = true
+    if isa(a3, Const)
+        body = a3.val
+    elseif isType(a3)
+        body = a3.parameters[1]
+        canconst = false
+    else
+        return CallMeta(Any, Any, Effects(EFFECTS_TOTAL; nothrow), call.info)
+    end
+    if !(isa(body, Type) || isa(body, TypeVar))
+        return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+    end
+    if has_free_typevars(body)
+        if isa(a2, Const)
+            tv = a2.val
+        elseif isa(a2, PartialTypeVar)
+            tv = a2.tv
+            canconst = false
+        else
+            return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+        end
+        isa(tv, TypeVar) || return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+        body = UnionAll(tv, body)
+    end
+    ret = canconst ? Const(body) : Type{body}
+    return CallMeta(ret, Any, Effects(EFFECTS_TOTAL; nothrow), call.info)
+end
+
+function get_ci_abi(ci::CodeInstance)
+    def = ci.def
+    isa(def, ABIOverride) && return def.abi
+    (def::MethodInstance).specTypes
+end
+
+function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState)
+    argtypes = arginfo.argtypes
+    ft′ = argtype_by_index(argtypes, 2)
+    ft = widenconst(ft′)
+    ft === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+    types = argtype_by_index(argtypes, 3)
+    our_world = get_inference_world(interp)
+    if types isa Const && types.val isa Union{Method, CodeInstance}
+        method_or_ci = types.val
+        if isa(method_or_ci, CodeInstance)
+            argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft))
+            specsig = get_ci_abi(method_or_ci)
+            defdef = get_ci_mi(method_or_ci).def
+            exct = method_or_ci.exctype
+            if !hasintersect(argtype, specsig)
+                return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+            elseif !(argtype <: specsig) || ((!isa(method_or_ci.def, ABIOverride) && isa(defdef, Method)) && !(argtype <: defdef.sig))
+                exct = Union{exct, TypeError}
+            end
+            callee_valid_range = WorldRange(method_or_ci.min_world, method_or_ci.max_world)
+            if !(our_world in callee_valid_range)
+                if our_world < first(callee_valid_range)
+                    update_valid_age!(sv, our_world, WorldRange(first(sv.valid_worlds), first(callee_valid_range)-1))
+                else
+                    update_valid_age!(sv, our_world, WorldRange(last(callee_valid_range)+1, last(sv.valid_worlds)))
+                end
+                return Future(CallMeta(Bottom, ErrorException, EFFECTS_THROWS, NoCallInfo()))
+            end
+            # TODO: When we add curing, we may want to assume this is nothrow
+            if (method_or_ci.owner === Nothing && method_ir_ci.def.def isa Method)
+                exct = Union{exct, ErrorException}
+            end
+            update_valid_age!(sv, our_world, callee_valid_range)
+            return Future(CallMeta(method_or_ci.rettype, exct, Effects(decode_effects(method_or_ci.ipo_purity_bits), nothrow=(exct===Bottom)),
+                InvokeCICallInfo(method_or_ci)))
+        else
+            method = method_or_ci::Method
+            types = method # argument value
+            lookupsig = method.sig # edge kind
+            argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft))
+            nargtype = typeintersect(lookupsig, argtype)
+            nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+            nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below
+            # Fall through to generic invoke handling
+        end
+    else
+        hasintersect(widenconst(types), Union{Method, CodeInstance}) && return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+        types, isexact, _, _ = instanceof_tfunc(argtype_by_index(argtypes, 3), false)
+        isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+        unwrapped = unwrap_unionall(types)
+        types === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+        if !(unwrapped isa DataType && unwrapped.name === Tuple.name)
+            return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+        end
+        argtype = argtypes_to_type(argtype_tail(argtypes, 4))
+        nargtype = typeintersect(types, argtype)
+        nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+        nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below
+        isdispatchelem(ft) || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+        ft = ft::DataType
+        lookupsig = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
+        nargtype = Tuple{ft, nargtype.parameters...}
+        argtype = Tuple{ft, argtype.parameters...}
+        matched, valid_worlds = findsup(lookupsig, method_table(interp))
+        matched === nothing && return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+        update_valid_age!(sv, our_world, valid_worlds)
+        method = matched.method
+    end
+    tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
+    ti = tienv[1]
+    env = tienv[2]::SimpleVector
+    mresult = abstract_call_method(interp, method, ti, env, false, si, sv)::Future
+    match = MethodMatch(ti, env, method, argtype <: method.sig)
+    ft′_box = Core.Box(ft′)
+    lookupsig_box = Core.Box(lookupsig)
+    invokecall = InvokeCall(types)
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, effects, edge, call_result) = result
+        local ft′ = ft′_box.contents
+        sig = match.spec_types
+        argtypes′ = invoke_rewrite(arginfo.argtypes)
+        fargs = arginfo.fargs
+        fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
+        arginfo′ = ArgInfo(fargs′, argtypes′)
+        # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
+        # for i in 1:length(argtypes′)
+        #     t, a = ti.parameters[i], argtypes′[i]
+        #     argtypes′[i] = t ⊑ a ? t : a
+        # end
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        f = singleton_type(ft′)
+        const_call_result = abstract_call_method_with_const_args(interp,
+            result, f, arginfo′, si, match, sv, invokecall)
+        if const_call_result !== nothing
+            const_result = const_edge = nothing
+            if const_call_result.rt ⊑ rt
+                (; rt, effects, const_result, const_edge) = const_call_result
+            end
+            if const_call_result.exct ⋤ exct
+                (; exct, const_result, const_edge) = const_call_result
+            end
+            if const_edge !== nothing
+                edge = const_edge
+                update_valid_age!(sv, get_inference_world(interp), world_range(const_edge))
+            end
+            if const_result !== nothing
+                call_result = const_result
+            end
+        end
+        rt = from_interprocedural!(interp, rt, sv, arginfo′, sig)
+        info = InvokeCallInfo(edge, match, call_result, lookupsig_box.contents)
+        if !match.fully_covers
+            effects = Effects(effects; nothrow=false)
+            exct = exct ⊔ TypeError
+        end
+        return CallMeta(rt, exct, effects, info)
+    end
+end
+
+function invoke_rewrite(xs::Vector{Any})
+    x0 = xs[2]
+    newxs = xs[3:end]
+    newxs[1] = x0
+    return newxs
+end
+
+function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
+    if length(argtypes) == 3
+        finalizer_argvec = Any[argtypes[2], argtypes[3]]
+        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false, false), sv, #=max_methods=#1)::Future
+        return Future{CallMeta}(call, interp, sv) do call, _, _
+            return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
+        end
+    end
+    return Future(CallMeta(Nothing, Any, Effects(), NoCallInfo()))
+end
+
+function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
+    na = length(argtypes)
+    ⊔ = join(typeinf_lattice(interp))
+    if na == 2
+        argtype2 = argtypes[2]
+        if isvarargtype(argtype2)
+            exct = unwrapva(argtype2) ⊔ ArgumentError
+        else
+            exct = argtype2
+        end
+    elseif na == 3 && isvarargtype(argtypes[3])
+        exct = argtypes[2] ⊔ ArgumentError
+    else
+        exct = ArgumentError
+    end
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
+end
+
+function abstract_throw_methoderror(::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
+    exct = if length(argtypes) == 1
+        ArgumentError
+    elseif !isvarargtype(argtypes[2])
+        MethodError
+    else
+        Union{MethodError, ArgumentError}
+    end
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
+end
+
+const generic_getglobal_effects = Effects(EFFECTS_THROWS, effect_free=ALWAYS_FALSE, consistent=ALWAYS_FALSE, inaccessiblememonly=ALWAYS_FALSE) #= effect_free for depwarn =#
+const generic_getglobal_exct = Union{ArgumentError, TypeError, ConcurrencyViolationError, UndefVarError}
+function abstract_eval_getglobal(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, @nospecialize(M), @nospecialize(s))
+    ⊑ = partialorder(typeinf_lattice(interp))
+    if M isa Const && s isa Const
+        M, s = M.val, s.val
+        if M isa Module && s isa Symbol
+            gr = GlobalRef(M, s)
+            ret = abstract_eval_globalref(interp, gr, saw_latestworld, sv)
+            return CallMeta(ret, GlobalAccessInfo(convert(Core.Binding, gr)))
+        end
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif !hasintersect(widenconst(M), Module) || !hasintersect(widenconst(s), Symbol)
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif M ⊑ Module && s ⊑ Symbol
+        return CallMeta(Any, UndefVarError, generic_getglobal_effects, NoCallInfo())
+    end
+    return CallMeta(Any, Union{UndefVarError, TypeError}, generic_getglobal_effects, NoCallInfo())
+end
+
+function merge_exct(cm::CallMeta, @nospecialize(exct))
+    if exct !== Bottom
+        cm = CallMeta(cm.rt, Union{cm.exct, exct}, Effects(cm.effects; nothrow=false), cm.info)
+    end
+    return cm
+end
+
+function abstract_eval_getglobal(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, @nospecialize(M), @nospecialize(s), @nospecialize(order))
+    goe = global_order_exct(order, #=loading=#true, #=storing=#false)
+    cm = abstract_eval_getglobal(interp, sv, saw_latestworld, M, s)
+    return merge_exct(cm, goe)
+end
+
+function abstract_eval_getglobal(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if !isvarargtype(argtypes[end])
+        if length(argtypes) == 3
+            return abstract_eval_getglobal(interp, sv, saw_latestworld, argtypes[2], argtypes[3])
+        elseif length(argtypes) == 4
+            return abstract_eval_getglobal(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4])
+        else
+            return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+        end
+    elseif length(argtypes) > 5
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Any, generic_getglobal_exct, generic_getglobal_effects, NoCallInfo())
+    end
+end
+
+# The binding lookup code uses the current world to bound its scan to only those worlds that are currently valid
+binding_world_hints(world::UInt, sv::AbsIntState) = WorldWithRange(world, sv.valid_worlds)
+
+@nospecs function abstract_eval_get_binding_type(interp::AbstractInterpreter, sv::AbsIntState, M, s)
+    @nospecialize M s
+    ⊑ = partialorder(typeinf_lattice(interp))
+    if isa(M, Const) && isa(s, Const)
+        (M, s) = (M.val, s.val)
+        if !isa(M, Module) || !isa(s, Symbol)
+            return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+        end
+        gr = GlobalRef(M, s)
+        world = get_inference_world(interp)
+        (valid_worlds, rt) = scan_leaf_partitions(interp, gr, binding_world_hints(world, sv)) do interp::AbstractInterpreter, ::Core.Binding, partition::Core.BindingPartition
+            local rt
+            kind = binding_kind(partition)
+            if is_some_guard(kind) || kind == PARTITION_KIND_DECLARED
+                # We do not currently assume an invalidation for guard -> defined transitions
+                # rt = Const(nothing)
+                rt = Type
+            elseif is_some_const_binding(kind)
+                rt = Const(Any)
+            else
+                rt = Const(partition_restriction(partition))
+            end
+            rt
+        end
+        update_valid_age!(sv, world, valid_worlds)
+        return CallMeta(rt, Union{}, EFFECTS_TOTAL, GlobalAccessInfo(convert(Core.Binding, gr)))
+    elseif !hasintersect(widenconst(M), Module) || !hasintersect(widenconst(s), Symbol)
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif M ⊑ Module && s ⊑ Symbol
+        return CallMeta(Type, Union{}, EFFECTS_TOTAL, NoCallInfo())
+    end
+    return CallMeta(Type, TypeError, EFFECTS_THROWS, NoCallInfo())
+end
+
+function abstract_eval_get_binding_type(interp::AbstractInterpreter, sv::AbsIntState, argtypes::Vector{Any})
+    if !isvarargtype(argtypes[end])
+        if length(argtypes) == 3
+            return abstract_eval_get_binding_type(interp, sv, argtypes[2], argtypes[3])
+        else
+            return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+        end
+    elseif length(argtypes) > 4
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Type, Union{TypeError, ArgumentError}, EFFECTS_THROWS, NoCallInfo())
+    end
+end
+
+const setglobal!_effects = Effects(EFFECTS_TOTAL; effect_free=ALWAYS_FALSE, nothrow=false, inaccessiblememonly=ALWAYS_FALSE)
+
+function abstract_eval_setglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, @nospecialize(M), @nospecialize(s), @nospecialize(v))
+    if isa(M, Const) && isa(s, Const)
+        M, s = M.val, s.val
+        if M isa Module && s isa Symbol
+            gr = GlobalRef(M, s)
+            (rt, exct) = global_assignment_rt_exct(interp, sv, saw_latestworld, gr, v)
+            return CallMeta(rt, exct, Effects(setglobal!_effects, nothrow=exct===Bottom), GlobalAccessInfo(convert(Core.Binding, gr)))
+        end
+        return CallMeta(Union{}, Union{TypeError, ErrorException}, EFFECTS_THROWS, NoCallInfo())
+    end
+    ⊑ = partialorder(typeinf_lattice(interp))
+    if !(hasintersect(widenconst(M), Module) && hasintersect(widenconst(s), Symbol))
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif M ⊑ Module && s ⊑ Symbol
+        return CallMeta(v, ErrorException, setglobal!_effects, NoCallInfo())
+    end
+    return CallMeta(v, Union{TypeError, ErrorException}, setglobal!_effects, NoCallInfo())
+end
+
+function abstract_eval_setglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, @nospecialize(M), @nospecialize(s), @nospecialize(v), @nospecialize(order))
+    goe = global_order_exct(order, #=loading=#false, #=storing=#true)
+    cm = abstract_eval_setglobal!(interp, sv, saw_latestworld, M, s, v)
+    return merge_exct(cm, goe)
+end
+
+const generic_setglobal!_exct = Union{ArgumentError, TypeError, ErrorException, ConcurrencyViolationError}
+
+function abstract_eval_setglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if !isvarargtype(argtypes[end])
+        if length(argtypes) == 4
+            return abstract_eval_setglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4])
+        elseif length(argtypes) == 5
+            return abstract_eval_setglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4], argtypes[5])
+        else
+            return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+        end
+    elseif length(argtypes) > 6
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Any, generic_setglobal!_exct, setglobal!_effects, NoCallInfo())
+    end
+end
+
+function abstract_eval_swapglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool,
+                                   @nospecialize(M), @nospecialize(s), @nospecialize(v))
+    scm = abstract_eval_setglobal!(interp, sv, saw_latestworld, M, s, v)
+    scm.rt === Bottom && return scm
+    gcm = abstract_eval_getglobal(interp, sv, saw_latestworld, M, s)
+    return CallMeta(gcm.rt, Union{scm.exct,gcm.exct}, merge_effects(scm.effects, gcm.effects), scm.info)
+end
+
+function abstract_eval_swapglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool,
+                                   @nospecialize(M), @nospecialize(s), @nospecialize(v), @nospecialize(order))
+    scm = abstract_eval_setglobal!(interp, sv, saw_latestworld, M, s, v, order)
+    scm.rt === Bottom && return scm
+    gcm = abstract_eval_getglobal(interp, sv, saw_latestworld, M, s, order)
+    return CallMeta(gcm.rt, Union{scm.exct,gcm.exct}, merge_effects(scm.effects, gcm.effects), scm.info)
+end
+
+function abstract_eval_swapglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if !isvarargtype(argtypes[end])
+        if length(argtypes) == 4
+            return abstract_eval_swapglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4])
+        elseif length(argtypes) == 5
+            return abstract_eval_swapglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4], argtypes[5])
+        else
+            return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+        end
+    elseif length(argtypes) > 6
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Any, Union{generic_getglobal_exct,generic_setglobal!_exct}, setglobal!_effects, NoCallInfo())
+    end
+end
+
+function abstract_eval_setglobalonce!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if !isvarargtype(argtypes[end])
+        if length(argtypes) in (4, 5, 6)
+            cm = abstract_eval_setglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4])
+            if length(argtypes) >= 5
+                goe = global_order_exct(argtypes[5], #=loading=#true, #=storing=#true)
+                cm = merge_exct(cm, goe)
+            end
+            if length(argtypes) == 6
+                goe = global_order_exct(argtypes[6], #=loading=#true, #=storing=#false)
+                cm = merge_exct(cm, goe)
+            end
+            return CallMeta(Bool, cm.exct, cm.effects, cm.info)
+        else
+            return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+        end
+    elseif length(argtypes) > 7
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Bool, generic_setglobal!_exct, setglobal!_effects, NoCallInfo())
+    end
+end
+
+function abstract_eval_replaceglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if !isvarargtype(argtypes[end])
+        if length(argtypes) in (5, 6, 7)
+            (M, s, v) = argtypes[2], argtypes[3], argtypes[5]
+            T = nothing
+            if isa(M, Const) && isa(s, Const)
+                M, s = M.val, s.val
+                M isa Module || return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+                s isa Symbol || return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+                gr = GlobalRef(M, s)
+                v′ = RefValue{Any}(v)
+                world = get_inference_world(interp)
+                (valid_worlds, (rte, T)) = scan_leaf_partitions(interp, gr, binding_world_hints(world, sv)) do interp::AbstractInterpreter, binding::Core.Binding, partition::Core.BindingPartition
+                    partition_T = nothing
+                    partition_rte = abstract_eval_partition_load(interp, binding, partition)
+                    if binding_kind(partition) == PARTITION_KIND_GLOBAL
+                        partition_T = partition_restriction(partition)
+                    end
+                    partition_exct = Union{partition_rte.exct, global_assignment_binding_rt_exct(interp, partition, v′[])[2]}
+                    partition_rte = RTEffects(partition_rte.rt, partition_exct, partition_rte.effects)
+                    Pair{RTEffects, Any}(partition_rte, partition_T)
+                end
+                update_valid_age!(sv, world, valid_worlds)
+                effects = merge_effects(rte.effects, Effects(setglobal!_effects, nothrow=rte.exct===Bottom))
+                sg = CallMeta(Any, rte.exct, effects, GlobalAccessInfo(convert(Core.Binding, gr)))
+            else
+                sg = abstract_eval_setglobal!(interp, sv, saw_latestworld, M, s, v)
+            end
+            if length(argtypes) >= 6
+                goe = global_order_exct(argtypes[6], #=loading=#true, #=storing=#true)
+                sg = merge_exct(sg, goe)
+            end
+            if length(argtypes) == 7
+                goe = global_order_exct(argtypes[7], #=loading=#true, #=storing=#false)
+                sg = merge_exct(sg, goe)
+            end
+            rt = T === nothing ?
+                ccall(:jl_apply_cmpswap_type, Any, (Any,), S) where S :
+                ccall(:jl_apply_cmpswap_type, Any, (Any,), T)
+            return CallMeta(rt, sg.exct, sg.effects, sg.info)
+        else
+            return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+        end
+    elseif length(argtypes) > 8
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Any, Union{generic_getglobal_exct,generic_setglobal!_exct}, setglobal!_effects, NoCallInfo())
+    end
+end
+
+function argtypes_are_actually_getglobal(argtypes::Vector{Any})
+    length(argtypes) in (3, 4) || return false
+    M = argtypes[2]
+    s = argtypes[3]
+    isa(M, Const) || return false
+    isa(s, Const) || return false
+    return isa(M.val, Module) && isa(s.val, Symbol)
+end
+
+# call where the function is known exactly
+function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
+        arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
+        max_methods::Int = get_max_methods(interp, f, sv))
+    (; fargs, argtypes) = arginfo
+    argtypes::Vector{Any} = arginfo.argtypes  # declare type because the closure below captures `argtypes`
+    fargs = arginfo.fargs
+    la = length(argtypes)
+    𝕃ᵢ = typeinf_lattice(interp)
+    if isa(f, Builtin)
+        if f === _apply_iterate
+            return abstract_apply(interp, argtypes, si, sv, max_methods)
+        elseif f === invoke
+            return abstract_invoke(interp, arginfo, si, sv)
+        elseif f === modifyfield! || f === Core.modifyglobal! ||
+               f === Core.memoryrefmodify! || f === atomic_pointermodify
+            return abstract_modifyop!(interp, f, argtypes, si, sv)
+        elseif f === Core.finalizer
+            return abstract_finalizer(interp, argtypes, sv)
+        elseif f === applicable
+            return abstract_applicable(interp, argtypes, sv, max_methods)
+        elseif f === throw
+            return abstract_throw(interp, argtypes, sv)
+        elseif f === Core.throw_methoderror
+            return abstract_throw_methoderror(interp, argtypes, sv)
+        elseif f === Core.getglobal
+            return Future(abstract_eval_getglobal(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.setglobal!
+            return Future(abstract_eval_setglobal!(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.swapglobal!
+            return Future(abstract_eval_swapglobal!(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.setglobalonce!
+            return Future(abstract_eval_setglobalonce!(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.replaceglobal!
+            return Future(abstract_eval_replaceglobal!(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.getfield && argtypes_are_actually_getglobal(argtypes)
+            return Future(abstract_eval_getglobal(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.isdefined && argtypes_are_actually_getglobal(argtypes)
+            return Future(abstract_eval_isdefinedglobal(interp, argtypes[2], argtypes[3], Const(true),
+                length(argtypes) == 4 ? argtypes[4] : Const(:unordered),
+                si.saw_latestworld, sv))
+        elseif f === Core.isdefinedglobal
+            return Future(abstract_eval_isdefinedglobal(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.get_binding_type
+            return Future(abstract_eval_get_binding_type(interp, sv, argtypes))
+        end
+        rt = abstract_call_builtin(interp, f, arginfo, sv)
+        ft = popfirst!(argtypes)
+        effects = builtin_effects(𝕃ᵢ, f, argtypes, rt)
+        if effects.nothrow
+            exct = Union{}
+        else
+            exct = builtin_exct(𝕃ᵢ, f, argtypes, rt)
+        end
+        pushfirst!(argtypes, ft)
+        refinements = nothing
+        if sv isa InferenceState
+            if f === typeassert
+                # perform very limited back-propagation of invariants after this type assertion
+                if rt !== Bottom && isa(fargs, Vector{Any})
+                    farg2 = ssa_def_slot(fargs[2], sv)
+                    if farg2 isa SlotNumber
+                        refinements = SlotRefinement(farg2, rt)
+                    end
+                end
+            elseif f === setfield! && length(argtypes) == 4 && isa(argtypes[3], Const)
+                # from there on we know that the struct field will never be undefined,
+                # so we try to encode that information with a `PartialStruct`
+                if rt !== Bottom && isa(fargs, Vector{Any})
+                    farg2 = ssa_def_slot(fargs[2], sv)
+                    if farg2 isa SlotNumber
+                        refined = form_partially_defined_struct(𝕃ᵢ, argtypes[2], argtypes[3])
+                        if refined !== nothing
+                            refinements = SlotRefinement(farg2, refined)
+                        end
+                    end
+                end
+            end
+        end
+        return Future(CallMeta(rt, exct, effects, NoCallInfo(), refinements))
+    elseif isa(f, Core.OpaqueClosure)
+        # calling an OpaqueClosure about which we have no information returns no information
+        return Future(CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo()))
+    elseif f === TypeVar && !isvarargtype(argtypes[end])
+        # Manually look through the definition of TypeVar to
+        # make sure to be able to get `PartialTypeVar`s out.
+        2 ≤ la ≤ 4 || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+        # make sure generic code is prepared for inlining if needed later
+        let T = Any[Type{TypeVar}, Any, Any, Any]
+            resize!(T, la)
+            atype = Tuple{T...}
+            T[1] = Const(TypeVar)
+            let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)::Future
+                return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                    n = argtypes[2]
+                    ub_var = Const(Any)
+                    lb_var = Const(Union{})
+                    if la == 4
+                        ub_var = argtypes[4]
+                        lb_var = argtypes[3]
+                    elseif la == 3
+                        ub_var = argtypes[3]
+                    end
+                    pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
+                    typevar_argtypes = Any[n, lb_var, ub_var]
+                    effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    if effects.nothrow
+                        exct = Union{}
+                    else
+                        exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    end
+                    return CallMeta(pT, exct, effects, call.info)
+                end
+            end
+        end
+    elseif f === UnionAll
+        let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)::Future
+            return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                return abstract_call_unionall(interp, argtypes, call)
+            end
+        end
+    elseif f === Tuple && la == 2
+        aty = argtypes[2]
+        ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
+        if !isconcretetype(ty)
+            return Future(CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo()))
+        end
+    elseif is_return_type(f)
+        return return_type_tfunc(interp, argtypes, si, sv)
+    elseif la == 3 && f === Core.:(!==)
+        # mark !== as exactly a negated call to ===
+        let callfuture = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)::Future,
+            rtfuture = abstract_call_known(interp, (===), arginfo, si, sv, max_methods)::Future
+            return Future{CallMeta}(isready(callfuture) && isready(rtfuture), interp, sv) do interp, sv
+                local rty = rtfuture[].rt
+                if isa(rty, Conditional)
+                    return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else
+                elseif isa(rty, Const)
+                    return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
+                end
+                return callfuture[]
+            end
+        end
+    elseif la == 3 && f === Core.:(>:)
+        # mark issupertype as a exact alias for issubtype
+        # swap T1 and T2 arguments and call <:
+        if fargs !== nothing && length(fargs) == 3
+            fargs = Any[<:, fargs[3], fargs[2]]
+        else
+            fargs = nothing
+        end
+        argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
+        return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
+    elseif la == 2 && f === Core.typename
+        return Future(CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure()))
+    elseif f === Core._hasmethod
+        return Future(_hasmethod_tfunc(interp, argtypes, sv))
+    end
+    atype = argtypes_to_type(argtypes)
+    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)::Future
+end
+
+function abstract_call_opaque_closure(interp::AbstractInterpreter,
+    closure::PartialOpaque, arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState, check::Bool=true)
+    sig = argtypes_to_type(arginfo.argtypes)
+    tt = closure.typ
+    ocargsig = rewrap_unionall((unwrap_unionall(tt)::DataType).parameters[1], tt)
+    ocargsig′ = unwrap_unionall(ocargsig)
+    ocargsig′ isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    ocsig = rewrap_unionall(Tuple{Tuple, ocargsig′.parameters...}, ocargsig)
+    hasintersect(sig, ocsig) || return Future(CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo()))
+    ocmethod = closure.source::Method
+    if !isdefined(ocmethod, :source)
+        # This opaque closure was created from optimized source. We cannot infer it further.
+        ocrt = rewrap_unionall((unwrap_unionall(tt)::DataType).parameters[2], tt)
+        if isa(ocrt, DataType)
+            return Future(CallMeta(ocrt, Any, Effects(), NoCallInfo()))
+        end
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    end
+    match = MethodMatch(sig, Core.svec(), ocmethod, sig <: ocsig)
+    mresult = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv)
+    ocsig_box = Core.Box(ocsig)
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, effects, call_result, edge, edgecycle) = result
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        if !edgecycle
+            const_call_result = abstract_call_method_with_const_args(interp, result,
+                #=f=#nothing, arginfo, si, match, sv)
+            if const_call_result !== nothing
+                const_result = const_edge = nothing
+                if const_call_result.rt ⊑ rt
+                    (; rt, effects, const_result, const_edge) = const_call_result
+                end
+                if const_call_result.exct ⋤ exct
+                    (; exct, const_result, const_edge) = const_call_result
+                end
+                if const_edge !== nothing
+                    edge = const_edge
+                    update_valid_age!(sv, get_inference_world(interp), world_range(const_edge))
+                end
+                if const_result !== nothing
+                    call_result = const_result
+                end
+            end
+        end
+        if check # analyze implicit type asserts on argument and return type
+            ftt = closure.typ
+            rty = (unwrap_unionall(ftt)::DataType).parameters[2]
+            rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, ftt)
+            if !(rt ⊑ rty && sig ⊑ ocsig_box.contents)
+                effects = Effects(effects; nothrow=false)
+                exct = exct ⊔ TypeError
+            end
+        end
+        rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
+        info = OpaqueClosureCallInfo(edge, match, call_result)
+        return CallMeta(rt, exct, effects, info)
+    end
+end
+
+function most_general_argtypes(closure::PartialOpaque)
+    cc = widenconst(closure)
+    argt = (unwrap_unionall(cc)::DataType).parameters[1]
+    if !isa(argt, DataType) || argt.name !== typename(Tuple)
+        argt = Tuple
+    end
+    return Any[argt.parameters...]
+end
+
+function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft),
+                               arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
+                               max_methods::Int)
+    if isa(ft, PartialOpaque)
+        newargtypes = copy(arginfo.argtypes)
+        newargtypes[1] = ft.env
+        return abstract_call_opaque_closure(interp,
+            ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true)
+    end
+    wft = widenconst(ft)
+    if hasintersect(wft, Builtin)
+        add_remark!(interp, sv, "Could not identify method table for call")
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    elseif hasintersect(wft, Core.OpaqueClosure)
+        uft = unwrap_unionall(wft)
+        if isa(uft, DataType)
+            return Future(CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo()))
+        end
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    end
+    # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic
+    atype = argtypes_to_type(arginfo.argtypes)
+    atype === Bottom && return Future(CallMeta(Union{}, Union{}, EFFECTS_THROWS, NoCallInfo())) # accidentally unreachable
+    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)::Future
+end
+
+# call where the function is any lattice element
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo,
+                       sv::AbsIntState, max_methods::Int=typemin(Int))
+    ft = widenslotwrapper(arginfo.argtypes[1])
+    f = singleton_type(ft)
+    if f === nothing
+        max_methods = max_methods == typemin(Int) ? get_max_methods(interp, sv) : max_methods
+        return abstract_call_unknown(interp, ft, arginfo, si, sv, max_methods)
+    end
+    max_methods = max_methods == typemin(Int) ? get_max_methods(interp, f, sv) : max_methods
+    return abstract_call_known(interp, f, arginfo, si, sv, max_methods)
+end
+
+function sp_type_rewrap(@nospecialize(T), mi::MethodInstance, isreturn::Bool)
+    isref = false
+    if unwrapva(T) === Bottom
+        return Bottom
+    elseif isa(T, Type)
+        if isa(T, DataType) && (T::DataType).name === Ref.body.name
+            isref = true
+            T = T.parameters[1]
+            if isreturn && T === Any
+                return Bottom # a return type of Ref{Any} is invalid
+            end
+        end
+    else
+        return Any
+    end
+    if isa(mi.def, Method)
+        spsig = mi.def.sig
+        if isa(spsig, UnionAll)
+            if !isempty(mi.sparam_vals)
+                sparam_vals = Any[isvarargtype(v) ? TypeVar(:N, Union{}, Any) :
+                                  v for v in  mi.sparam_vals]
+                T = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), T, spsig, sparam_vals)
+                isref && isreturn && T === Any && return Bottom # catch invalid return Ref{T} where T = Any
+                for v in sparam_vals
+                    if isa(v, TypeVar)
+                        T = UnionAll(v, T)
+                    end
+                end
+                if has_free_typevars(T)
+                    fv = ccall(:jl_find_free_typevars, Vector{Any}, (Any,), T)
+                    for v in fv
+                        T = UnionAll(v, T)
+                    end
+                end
+            else
+                T = rewrap_unionall(T, spsig)
+            end
+        end
+    end
+    return unwraptv(T)
+end
+
+function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, sstate::StatementState, sv::AbsIntState)
+    f = abstract_eval_value(interp, e.args[2], sstate, sv)
+    # rt = sp_type_rewrap(e.args[3], sv.linfo, true) # verify that the result type make sense?
+    # rt === Bottom && return RTEffects(Union{}, Any, EFFECTS_UNKNOWN)
+    atv = e.args[4]::SimpleVector
+    at = Vector{Any}(undef, length(atv) + 1)
+    at[1] = f
+    for i = 1:length(atv)
+        atᵢ = at[i + 1] = sp_type_rewrap(atv[i], frame_instance(sv), false)
+        atᵢ === Bottom && return RTEffects(Union{}, Any, EFFECTS_UNKNOWN)
+    end
+    # this may be the wrong world for the call,
+    # but some of the result is likely to be valid anyways
+    # and that may help generate better codegen
+    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false, false), sv)::Future
+    rt = e.args[1]
+    isconcretetype(rt) || (rt = Any)
+    return RTEffects(rt, Any, EFFECTS_UNKNOWN)
+end
+
+function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), sstate::StatementState, sv::AbsIntState)
+    if isa(e, SSAValue)
+        return RTEffects(abstract_eval_ssavalue(e, sv), Union{}, EFFECTS_TOTAL)
+    elseif isa(e, SlotNumber)
+        if sstate.vtypes !== nothing
+            vtyp = sstate.vtypes[slot_id(e)]
+            if !vtyp.undef
+                return RTEffects(vtyp.typ, Union{}, EFFECTS_TOTAL)
+            end
+            return RTEffects(vtyp.typ, UndefVarError, EFFECTS_THROWS)
+        end
+        return RTEffects(Any, UndefVarError, EFFECTS_THROWS)
+    elseif isa(e, Argument)
+        if sstate.vtypes !== nothing
+            return RTEffects(sstate.vtypes[slot_id(e)].typ, Union{}, EFFECTS_TOTAL)
+        else
+            @assert isa(sv, IRInterpretationState)
+            return RTEffects(sv.ir.argtypes[e.n], Union{}, EFFECTS_TOTAL) # TODO frame_argtypes(sv)[e.n] and remove the assertion
+        end
+    elseif isa(e, GlobalRef)
+        # No need for an edge since an explicit GlobalRef will be picked up by the source scan
+        return abstract_eval_globalref(interp, e, sstate.saw_latestworld, sv)
+    end
+    if isa(e, QuoteNode)
+        e = e.value
+    end
+    effects = Effects(EFFECTS_TOTAL;
+        inaccessiblememonly = is_mutation_free_argtype(typeof(e)) ? ALWAYS_TRUE : ALWAYS_FALSE)
+    return RTEffects(Const(e), Union{}, effects)
+end
+
+function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, sv::AbsIntState)
+    if e.head === :call && length(e.args) ≥ 1
+        # TODO: We still have non-linearized cglobal
+        @assert e.args[1] === Core.tuple || e.args[1] === GlobalRef(Core, :tuple)
+    else
+        @assert e.head !== :(=)
+        # Some of our tests expect us to handle invalid IR here and error later
+        # - permit that for now.
+        # @assert false "Unexpected EXPR head in value position"
+        merge_effects!(interp, sv, EFFECTS_UNKNOWN)
+    end
+    return Any
+end
+
+function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), sstate::StatementState, sv::AbsIntState)
+    if isa(e, Expr)
+        return abstract_eval_value_expr(interp, e, sv)
+    else
+        (;rt, effects) = abstract_eval_special_value(interp, e, sstate, sv)
+        merge_effects!(interp, sv, effects)
+        return collect_limitations!(rt, sv)
+    end
+end
+
+function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, sstate::StatementState, sv::AbsIntState)
+    n = length(ea)
+    argtypes = Vector{Any}(undef, n)
+    @inbounds for i = 1:n
+        ai = abstract_eval_value(interp, ea[i], sstate, sv)
+        if ai === Bottom
+            return nothing
+        end
+        argtypes[i] = ai
+    end
+    return argtypes
+end
+
+struct RTEffects
+    rt::Any
+    exct::Any
+    effects::Effects
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    function RTEffects(rt, exct, effects::Effects, refinements=nothing)
+        @nospecialize rt exct refinements
+        return new(rt, exct, effects, refinements)
+    end
+end
+
+CallMeta(rte::RTEffects, info::CallInfo) =
+    CallMeta(rte.rt, rte.exct, rte.effects, info, rte.refinements)
+
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sstate::StatementState, sv::InferenceState)
+    unused = call_result_unused(sv, sv.currpc)
+    if unused
+        add_curr_ssaflag!(sv, IR_FLAG_UNUSED)
+    end
+    si = StmtInfo(!unused, sstate.saw_latestworld)
+    call = abstract_call(interp, arginfo, si, sv)::Future
+    Future{Any}(call, interp, sv) do call, _, sv
+        # this only is needed for the side-effect, sequenced before any task tries to consume the return value,
+        # which this will do even without returning this Future
+        sv.stmt_info[sv.currpc] = call.info
+        nothing
+    end
+    return call
+end
+
+function abstract_eval_call(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                            sv::AbsIntState)
+    ea = e.args
+    argtypes = collect_argtypes(interp, ea, sstate, sv)
+    if argtypes === nothing
+        return Future(RTEffects(Bottom, Any, Effects()))
+    end
+    arginfo = ArgInfo(ea, argtypes)
+    call = abstract_call(interp, arginfo, sstate, sv)::Future
+    return Future{RTEffects}(call, interp, sv) do call, _, _
+        (; rt, exct, effects, refinements) = call
+        return RTEffects(rt, exct, effects, refinements)
+    end
+end
+
+function is_field_pointerfree(dt::DataType, fidx::Int)
+    dt.layout::Ptr{Cvoid} == C_NULL && return false
+    DataTypeFieldDesc(dt)[fidx].isptr && return false
+    ft = fieldtype(dt, fidx)
+    return ft isa DataType && datatype_pointerfree(ft)
+end
+
+function abstract_eval_new(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                           sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    rt, _... = instanceof_tfunc(abstract_eval_value(interp, e.args[1], sstate, sv), true)
+    ut = unwrap_unionall(rt)
+    exct = Union{ErrorException,TypeError}
+    if isa(ut, DataType) && !isabstracttype(ut)
+        ismutable = ismutabletype(ut)
+        fcount = datatype_fieldcount(ut)
+        nargs = length(e.args) - 1
+        has_any_uninitialized = fcount === nothing || (fcount > nargs &&
+            any(i::Int->is_field_pointerfree(ut, i), (nargs+1):fcount))
+        if has_any_uninitialized
+            # allocation with undefined field is inconsistent always
+            consistent = ALWAYS_FALSE
+        elseif ismutable
+            # mutable allocation isn't `:consistent`, but we still have a chance that
+            # return type information later refines the `:consistent`-cy of the method
+            consistent = CONSISTENT_IF_NOTRETURNED
+        else
+            consistent = ALWAYS_TRUE # immutable allocation is consistent
+        end
+        if isconcretedispatch(rt)
+            nothrow = true
+            @assert fcount !== nothing && fcount ≥ nargs "malformed :new expression" # syntactically enforced by the front-end
+            ats = Vector{Any}(undef, nargs)
+            local anyrefine = false
+            local allconst = true
+            for i = 1:nargs
+                at = widenslotwrapper(abstract_eval_value(interp, e.args[i+1], sstate, sv))
+                ft = fieldtype(rt, i)
+                nothrow && (nothrow = ⊑(𝕃ᵢ, at, ft))
+                at = tmeet(𝕃ᵢ, at, ft)
+                at === Bottom && return RTEffects(Bottom, TypeError, EFFECTS_THROWS)
+                if ismutable && !isconst(rt, i)
+                    ats[i] = ft # can't constrain this field (as it may be modified later)
+                    continue
+                end
+                allconst &= isa(at, Const)
+                if !anyrefine
+                    anyrefine = has_nontrivial_extended_info(𝕃ᵢ, at) || # extended lattice information
+                                ⋤(𝕃ᵢ, at, ft) # just a type-level information, but more precise than the declared type
+                end
+                ats[i] = at
+            end
+            if fcount == nargs && consistent === ALWAYS_TRUE && allconst
+                argvals = Vector{Any}(undef, nargs)
+                for j in 1:nargs
+                    argvals[j] = (ats[j]::Const).val
+                end
+                rt = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), rt, argvals, nargs))
+            elseif anyrefine || nargs > datatype_min_ninitialized(rt)
+                # propagate partially initialized struct as `PartialStruct` when:
+                # - any refinement information is available (`anyrefine`), or when
+                # - `nargs` is greater than `n_initialized` derived from the struct type
+                #   information alone
+                undefs = Union{Nothing,Bool}[false for _ in 1:nargs]
+                if nargs < fcount # fill in uninitialized fields
+                    for i = (nargs+1):fcount
+                        ft = fieldtype(rt, i)
+                        push!(ats, ft)
+                        if ft === Union{} # `Union{}`-typed field is never initialized
+                            push!(undefs, true)
+                        elseif isconcretetype(ft) && datatype_pointerfree(ft) # this check is probably incomplete
+                            push!(undefs, false)
+                        # TODO If we can implement the query such that it accurately
+                        #      identifies fields that never be `#undef'd, we can make the
+                        #      following improvements:
+                        # elseif is_field_pointerfree(rt, i)
+                        #     push!(undefs, false)
+                        # elseif ismutable && !isconst(rt, i) # can't constrain this field (as it may be modified later)
+                        #     push!(undefs, nothing)
+                        # else
+                        #     push!(undefs, true)
+                        else
+                            push!(undefs, nothing)
+                        end
+                    end
+                end
+                rt = PartialStruct(𝕃ᵢ, rt, undefs, ats)
+            end
+        else
+            rt = refine_partial_type(rt)
+            nothrow = false
+        end
+    else
+        consistent = ALWAYS_FALSE
+        nothrow = false
+    end
+    nothrow && (exct = Union{})
+    effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_splatnew(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                                sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    rt, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], sstate, sv), true)
+    nothrow = false
+    if length(e.args) == 2 && isconcretedispatch(rt) && !ismutabletype(rt)
+        at = abstract_eval_value(interp, e.args[2], sstate, sv)
+        n = fieldcount(rt)
+        if (isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
+            (let t = rt, at = at
+                all(i::Int -> getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n)
+            end))
+            nothrow = isexact
+            rt = Const(ccall(:jl_new_structt, Any, (Any, Any), rt, at.val))
+        elseif at isa PartialStruct
+            if ⊑(𝕃ᵢ, at, Tuple) && n > 0
+                fields = at.fields
+                if (n == length(fields) && !isvarargtype(fields[end]) &&
+                    (let t = rt
+                        all(i::Int -> ⊑(𝕃ᵢ, fields[i], fieldtype(t, i)), 1:n)
+                    end))
+                    nothrow = isexact
+                    undefs = Union{Nothing,Bool}[false for _ in 1:n]
+                    rt = PartialStruct(𝕃ᵢ, rt, undefs, fields)
+                end
+            end
+        end
+    else
+        rt = refine_partial_type(rt)
+    end
+    consistent = !ismutabletype(rt) ? ALWAYS_TRUE : CONSISTENT_IF_NOTRETURNED
+    effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
+    return RTEffects(rt, Any, effects)
+end
+
+function abstract_eval_new_opaque_closure(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                                          sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    rt = Union{}
+    effects = Effects() # TODO
+    if length(e.args) >= 5
+        ea = e.args
+        argtypes = collect_argtypes(interp, ea, sstate, sv)
+        if argtypes === nothing
+            rt = Bottom
+            effects = EFFECTS_THROWS
+        else
+            mi = frame_instance(sv)
+            rt = opaque_closure_tfunc(𝕃ᵢ, argtypes[1], argtypes[2], argtypes[3],
+                argtypes[5], argtypes[6:end], mi)
+            if ea[4] !== true && isa(rt, PartialOpaque)
+                rt = widenconst(rt)
+                # Propagation of PartialOpaque disabled
+            end
+            if isa(rt, PartialOpaque) && isa(sv, InferenceState) && !call_result_unused(sv, sv.currpc)
+                # Infer this now so that the specialization is available to
+                # optimization.
+                argtypes = most_general_argtypes(rt)
+                pushfirst!(argtypes, rt.env)
+                callinfo = abstract_call_opaque_closure(interp, rt,
+                    ArgInfo(nothing, argtypes), StmtInfo(true, false), sv, #=check=#false)::Future
+                Future{Any}(callinfo, interp, sv) do callinfo, _, sv
+                    sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+                    nothing
+                end
+            end
+        end
+    end
+    return Future(RTEffects(rt, Any, effects))
+end
+
+function abstract_eval_copyast(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                               sv::AbsIntState)
+    effects = EFFECTS_UNKNOWN
+    rt = abstract_eval_value(interp, e.args[1], sstate, sv)
+    if rt isa Const && rt.val isa Expr
+        # `copyast` makes copies of Exprs
+        rt = Expr
+    end
+    return RTEffects(rt, Any, effects)
+end
+
+function abstract_eval_isdefined_expr(::AbstractInterpreter, e::Expr, sstate::StatementState,
+                                      sv::AbsIntState)
+    sym = e.args[1]
+    if isa(sym, SlotNumber) && sstate.vtypes !== nothing
+        vtyp = sstate.vtypes[slot_id(sym)]
+        if vtyp.typ === Bottom
+            rt = Const(false) # never assigned previously
+        elseif !vtyp.undef
+            rt = Const(true) # definitely assigned previously
+        else # form `Conditional` to refine `vtyp.undef` in the then branch
+            rt = Conditional(sym, widenslotwrapper(vtyp.typ), widenslotwrapper(vtyp.typ); isdefined=true)
+        end
+        return RTEffects(rt, Union{}, EFFECTS_TOTAL)
+    end
+    rt = Bool
+    effects = EFFECTS_TOTAL
+    exct = Union{}
+    if isexpr(sym, :static_parameter)
+        n = sym.args[1]::Int
+        if 1 <= n <= length(sv.sptypes)
+            sp = sv.sptypes[n]
+            if !sp.undef
+                rt = Const(true)
+            elseif sp.typ === Bottom
+                rt = Const(false)
+            end
+        end
+    else
+        effects = EFFECTS_UNKNOWN
+        exct = Any
+    end
+    return RTEffects(rt, exct, effects)
+end
+
+const generic_isdefinedglobal_effects = Effects(EFFECTS_TOTAL, consistent=ALWAYS_FALSE, nothrow=false)
+function abstract_eval_isdefinedglobal(interp::AbstractInterpreter, mod::Module, sym::Symbol, allow_import::Union{Bool, Nothing}, saw_latestworld::Bool, sv::AbsIntState)
+    rt = Bool
+    if saw_latestworld
+        return CallMeta(RTEffects(rt, Union{}, Effects(generic_isdefinedglobal_effects, nothrow=true)), NoCallInfo())
+    end
+
+    effects = EFFECTS_TOTAL
+    gr = GlobalRef(mod, sym)
+    if allow_import !== true
+        gr = GlobalRef(mod, sym)
+        partition = lookup_binding_partition!(interp, gr, sv)
+        if allow_import !== true && is_some_binding_imported(binding_kind(partition))
+            if allow_import === false
+                rt = Const(false)
+            else
+                effects = Effects(generic_isdefinedglobal_effects, nothrow=true)
+            end
+            @goto done
+        end
+    end
+
+    world = get_inference_world(interp)
+    (valid_worlds, rte) = abstract_load_all_consistent_leaf_partitions(interp, gr, binding_world_hints(world, sv))
+    # XXX: it is unsound to ignore valid_worlds here
+    if rte.exct == Union{}
+        rt = Const(true)
+    elseif rte.rt === Union{} && rte.exct === UndefVarError
+        rt = Const(false)
+    else
+        effects = Effects(generic_isdefinedglobal_effects, nothrow=true)
+    end
+@label done
+    return CallMeta(RTEffects(rt, Union{}, effects), GlobalAccessInfo(convert(Core.Binding, gr)))
+end
+
+function abstract_eval_isdefinedglobal(interp::AbstractInterpreter, @nospecialize(M), @nospecialize(s), @nospecialize(allow_import_arg), @nospecialize(order_arg), saw_latestworld::Bool, sv::AbsIntState)
+    exct = Bottom
+    allow_import = true
+    if allow_import_arg !== nothing
+        if !isa(allow_import_arg, Const)
+            allow_import = nothing
+            if widenconst(allow_import_arg) != Bool
+                exct = Union{exct, TypeError}
+            end
+        else
+            allow_import = allow_import_arg.val
+        end
+    end
+    if order_arg !== nothing
+        exct = global_order_exct(order_arg, #=loading=#true, #=storing=#false)
+        if !(isa(order_arg, Const) && get_atomic_order(order_arg.val, #=loading=#true, #=storing=#false).x >= MEMORY_ORDER_UNORDERED.x)
+            exct = Union{exct, ConcurrencyViolationError}
+        end
+    end
+    ⊑ = partialorder(typeinf_lattice(interp))
+    if M isa Const && s isa Const
+        M, s = M.val, s.val
+        if M isa Module && s isa Symbol
+            return merge_exct(abstract_eval_isdefinedglobal(interp, M, s, allow_import, saw_latestworld, sv), exct)
+        end
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif !hasintersect(widenconst(M), Module) || !hasintersect(widenconst(s), Symbol)
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif M ⊑ Module && s ⊑ Symbol
+        return CallMeta(Bool, Union{exct, UndefVarError}, generic_isdefinedglobal_effects, NoCallInfo())
+    end
+    return CallMeta(Bool, Union{exct, TypeError, UndefVarError}, generic_isdefinedglobal_effects, NoCallInfo())
+end
+
+function abstract_eval_isdefinedglobal(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if !isvarargtype(argtypes[end])
+        if 3 <= length(argtypes) <= 5
+            return abstract_eval_isdefinedglobal(interp, argtypes[2], argtypes[3],
+                length(argtypes) >= 4 ? argtypes[4] : Const(true),
+                length(argtypes) >= 5 ? argtypes[5] : Const(:unordered),
+                saw_latestworld, sv)
+        else
+            return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+        end
+    elseif length(argtypes) > 6
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Bool, Union{ConcurrencyViolationError, TypeError, UndefVarError}, generic_isdefinedglobal_effects, NoCallInfo())
+    end
+end
+
+function abstract_eval_throw_undef_if_not(interp::AbstractInterpreter, e::Expr, sstate::StatementState, sv::AbsIntState)
+    condt = abstract_eval_value(interp, e.args[2], sstate, sv)
+    condval = maybe_extract_const_bool(condt)
+    rt = Nothing
+    exct = UndefVarError
+    effects = EFFECTS_THROWS
+    if condval isa Bool
+        if condval
+            effects = EFFECTS_TOTAL
+            exct = Union{}
+        else
+            rt = Union{}
+        end
+    elseif !hasintersect(widenconst(condt), Bool)
+        rt = Union{}
+    end
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_the_exception(::AbstractInterpreter, sv::InferenceState)
+    (;handler_info) = sv
+    if handler_info === nothing
+        return the_exception_info(Any)
+    end
+    (;handlers, handler_at) = handler_info
+    handler_id = handler_at[sv.currpc][2]
+    if handler_id === 0
+        return the_exception_info(Any)
+    end
+    return the_exception_info(handlers[handler_id].exct)
+end
+abstract_eval_the_exception(::AbstractInterpreter, ::IRInterpretationState) = the_exception_info(Any)
+the_exception_info(@nospecialize t) = RTEffects(t, Union{}, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE))
+
+function abstract_eval_static_parameter(::AbstractInterpreter, e::Expr, sv::AbsIntState)
+    n = e.args[1]::Int
+    nothrow = false
+    if 1 <= n <= length(sv.sptypes)
+        sp = sv.sptypes[n]
+        rt = sp.typ
+        nothrow = !sp.undef
+    else
+        rt = Any
+    end
+    exct = nothrow ? Union{} : UndefVarError
+    effects = Effects(EFFECTS_TOTAL; nothrow)
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                                      sv::AbsIntState)::Future{RTEffects}
+    ehead = e.head
+    if ehead === :call
+        return abstract_eval_call(interp, e, sstate, sv)
+    elseif ehead === :new
+        return abstract_eval_new(interp, e, sstate, sv)
+    elseif ehead === :splatnew
+        return abstract_eval_splatnew(interp, e, sstate, sv)
+    elseif ehead === :new_opaque_closure
+        return abstract_eval_new_opaque_closure(interp, e, sstate, sv)
+    elseif ehead === :foreigncall
+        return abstract_eval_foreigncall(interp, e, sstate, sv)
+    elseif ehead === :cfunction
+        return abstract_eval_cfunction(interp, e, sstate, sv)
+    elseif ehead === :method
+        rt = (length(e.args) == 1) ? Any : Method
+        return RTEffects(rt, Any, EFFECTS_UNKNOWN)
+    elseif ehead === :copyast
+        return abstract_eval_copyast(interp, e, sstate, sv)
+    elseif ehead === :invoke || ehead === :invoke_modify
+        error("type inference data-flow error: tried to double infer a function")
+    elseif ehead === :isdefined
+        return abstract_eval_isdefined_expr(interp, e, sstate, sv)
+    elseif ehead === :throw_undef_if_not
+        return abstract_eval_throw_undef_if_not(interp, e, sstate, sv)
+    elseif ehead === :boundscheck
+        return RTEffects(Bool, Union{}, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE))
+    elseif ehead === :the_exception
+        return abstract_eval_the_exception(interp, sv)
+    elseif ehead === :static_parameter
+        return abstract_eval_static_parameter(interp, e, sv)
+    elseif ehead === :gc_preserve_begin || ehead === :aliasscope
+        return RTEffects(Any, Union{}, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE, effect_free=EFFECT_FREE_GLOBALLY))
+    elseif ehead === :gc_preserve_end || ehead === :leave || ehead === :pop_exception || ehead === :popaliasscope
+        return RTEffects(Nothing, Union{}, Effects(EFFECTS_TOTAL; effect_free=EFFECT_FREE_GLOBALLY))
+    elseif ehead === :thunk
+        return RTEffects(Any, Any, Effects())
+    end
+    # N.B.: abstract_eval_value_expr can modify the global effects, but
+    # we move out any arguments with effects during SSA construction later
+    # and recompute the effects.
+    rt = abstract_eval_value_expr(interp, e, sv)
+    return RTEffects(rt, Any, EFFECTS_TOTAL)
+end
+
+# refine the result of instantiation of partially-known type `t` if some invariant can be assumed
+function refine_partial_type(@nospecialize t)
+    t′ = unwrap_unionall(t)
+    if isa(t′, DataType) && t′.name === _NAMEDTUPLE_NAME && length(t′.parameters) == 2 &&
+        (t′.parameters[1] === () || t′.parameters[2] === Tuple{})
+        # if the first/second parameter of `NamedTuple` is known to be empty,
+        # the second/first argument should also be empty tuple type,
+        # so refine it here
+        return Const((;))
+    end
+    return t
+end
+
+function abstract_eval_foreigncall(interp::AbstractInterpreter, e::Expr, sstate::StatementState, sv::AbsIntState)
+    callee = e.args[1]
+    if isexpr(callee, :tuple)
+        if length(callee.args) >= 1
+            # Evaluate the arguments to constrain the world, effects, and other info for codegen,
+            # but note there is an implied `if !=(C_NULL)` branch here that might read data
+            # in a different world (the exact cache behavior is unspecified), so we do not use
+            # these results to refine reachability of the subsequent foreigncall.
+            abstract_eval_value(interp, callee.args[1], sstate, sv)
+            if length(callee.args) >= 2
+                abstract_eval_value(interp, callee.args[2], sstate, sv)
+                #TODO: implement abstract_eval_nonlinearized_foreigncall_name correctly?
+                # lib_effects = abstract_call(interp, ArgInfo(e.args, Any[typeof(Libdl.dlopen), lib]), sstate, sv)::Future
+            end
+        end
+    else
+        abstract_eval_value(interp, callee, sstate, sv)
+    end
+    mi = frame_instance(sv)
+    t = sp_type_rewrap(e.args[2], mi, true)
+    let fptr = e.args[1]
+        if !isexpr(fptr, :tuple)
+            if !hasintersect(widenconst(abstract_eval_value(interp, fptr, sstate, sv)), Ptr)
+                return RTEffects(Bottom, Any, EFFECTS_THROWS)
+            end
+        end
+    end
+    for i = 3:length(e.args)
+        if abstract_eval_value(interp, e.args[i], sstate, sv) === Bottom
+            return RTEffects(Bottom, Any, EFFECTS_THROWS)
+        end
+    end
+    effects = foreigncall_effects(e) do @nospecialize x
+        abstract_eval_value(interp, x, sstate, sv)
+    end
+    cconv = e.args[5]
+    if isa(cconv, QuoteNode) && (v = cconv.value; isa(v, Tuple{Symbol, UInt16, Bool}))
+        override = decode_effects_override(v[2])
+        effects = override_effects(effects, override)
+    end
+    return RTEffects(t, Any, effects)
+end
+
+function abstract_eval_phi(interp::AbstractInterpreter, phi::PhiNode, sstate::StatementState, sv::AbsIntState)
+    rt = Union{}
+    for i in 1:length(phi.values)
+        isassigned(phi.values, i) || continue
+        val = phi.values[i]
+        # N.B.: Phi arguments are restricted to not have effects, so we can drop
+        # them here safely.
+        thisval = abstract_eval_special_value(interp, val, sstate, sv).rt
+        rt = tmerge(typeinf_lattice(interp), rt, thisval)
+    end
+    return rt
+end
+
+function stmt_taints_inbounds_consistency(sv::AbsIntState)
+    propagate_inbounds(sv) && return true
+    return has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
+end
+
+function merge_override_effects!(interp::AbstractInterpreter, effects::Effects, sv::InferenceState)
+    # N.B.: This only applies to the effects of the statement itself.
+    # It is possible for arguments (GlobalRef/:static_parameter) to throw,
+    # but these will be recomputed during SSA construction later.
+    override = decode_statement_effects_override(sv)
+    effects = override_effects(effects, override)
+    set_curr_ssaflag!(sv, flags_for_effects(effects), IR_FLAGS_EFFECTS)
+    merge_effects!(interp, sv, effects)
+    return effects
+end
+
+function override_effects(effects::Effects, override::EffectsOverride)
+    return Effects(effects;
+        consistent = override.consistent ? ALWAYS_TRUE : effects.consistent,
+        effect_free = override.effect_free ? ALWAYS_TRUE : effects.effect_free,
+        nothrow = override.nothrow ? true : effects.nothrow,
+        terminates = override.terminates_globally ? true : effects.terminates,
+        notaskstate = override.notaskstate ? true : effects.notaskstate,
+        inaccessiblememonly = override.inaccessiblememonly ? ALWAYS_TRUE : effects.inaccessiblememonly,
+        noub = override.noub ? ALWAYS_TRUE :
+            (override.noub_if_noinbounds && effects.noub !== ALWAYS_TRUE) ? NOUB_IF_NOINBOUNDS :
+            effects.noub,
+        nortcall = override.nortcall ? true : effects.nortcall)
+end
+
+world_range(ir::IRCode) = ir.valid_worlds
+world_range(ci::CodeInfo) = WorldRange(ci.min_world, ci.max_world)
+world_range(ci::CodeInstance) = WorldRange(ci.min_world, ci.max_world)
+world_range(compact::IncrementalCompact) = world_range(compact.ir)
+
+# n.b. this function is not part of abstract eval (where it would be unsound) but rather for the optimizer to observe the result of abstract eval
+function abstract_eval_globalref_type(g::GlobalRef, src::Union{CodeInfo, IRCode, IncrementalCompact})
+    worlds = world_range(src)
+    (valid_worlds, rte) = abstract_load_all_consistent_leaf_partitions(nothing, g, WorldWithRange(min_world(worlds), worlds))
+    if min_world(valid_worlds) > min_world(worlds) || max_world(valid_worlds) < max_world(worlds)
+        return Any
+    end
+    return rte.rt
+end
+
+function lookup_binding_partition!(interp::AbstractInterpreter, g::Union{GlobalRef, Core.Binding}, sv::AbsIntState)
+    world = get_inference_world(interp)
+    partition = lookup_binding_partition(world, g)
+    update_valid_age!(sv, world, WorldRange(partition.min_world, partition.max_world))
+    partition
+end
+
+function walk_binding_partition(imported_binding::Core.Binding, partition::Core.BindingPartition, world::UInt)
+    valid_worlds = WorldRange(partition.min_world, partition.max_world)
+    while is_some_binding_imported(binding_kind(partition))
+        imported_binding = partition_restriction(partition)::Core.Binding
+        partition = lookup_binding_partition(world, imported_binding)
+        valid_worlds = intersect(valid_worlds, WorldRange(partition.min_world, partition.max_world))
+    end
+    return Pair{WorldRange, Pair{Core.Binding, Core.BindingPartition}}(valid_worlds, imported_binding=>partition)
+end
+
+function abstract_eval_binding_partition!(interp::AbstractInterpreter, g::GlobalRef, sv::AbsIntState)
+    b = convert(Core.Binding, g)
+    partition = lookup_binding_partition!(interp, b, sv)
+    world = get_inference_world(interp)
+    valid_worlds, (_, partition) = walk_binding_partition(b, partition, world)
+    update_valid_age!(sv, world, valid_worlds)
+    return partition
+end
+
+function abstract_eval_partition_load(interp::Union{AbstractInterpreter,Nothing}, binding::Core.Binding, partition::Core.BindingPartition)
+    kind = binding_kind(partition)
+    isdepwarn = (partition.kind & PARTITION_FLAG_DEPWARN) != 0
+    local_getglobal_effects = Effects(generic_getglobal_effects, effect_free=isdepwarn ? ALWAYS_FALSE : ALWAYS_TRUE)
+    if is_some_guard(kind)
+        if interp !== nothing && InferenceParams(interp).assume_bindings_static
+            return RTEffects(Union{}, UndefVarError, EFFECTS_THROWS)
+        else
+            # We do not currently assume an invalidation for guard -> defined transitions
+            # return RTEffects(Union{}, UndefVarError, EFFECTS_THROWS)
+            return RTEffects(Any, UndefVarError, local_getglobal_effects)
+        end
+    end
+
+    if is_defined_const_binding(kind)
+        if kind == PARTITION_KIND_BACKDATED_CONST
+            # Infer this as guard. We do not want a later const definition to retroactively improve
+            # inference results in an earlier world.
+            return RTEffects(Any, UndefVarError, local_getglobal_effects)
+        end
+        rt = Const(partition_restriction(partition))
+        return RTEffects(rt, Union{}, Effects(EFFECTS_TOTAL,
+            inaccessiblememonly=is_mutation_free_argtype(rt) ? ALWAYS_TRUE : ALWAYS_FALSE,
+            effect_free=isdepwarn ? ALWAYS_FALSE : ALWAYS_TRUE))
+    end
+
+    if kind == PARTITION_KIND_DECLARED
+        # Could be replaced by a backdated const which has an effect, so we can't assume it won't.
+        # Besides, we would prefer not to merge the world range for this into the world range for
+        # _GLOBAL, because that would pessimize codegen.
+        effects = Effects(local_getglobal_effects, effect_free=ALWAYS_FALSE)
+        rt = Any
+    else
+        rt = partition_restriction(partition)
+        effects = local_getglobal_effects
+    end
+    if (interp !== nothing && InferenceParams(interp).assume_bindings_static &&
+        kind in (PARTITION_KIND_GLOBAL, PARTITION_KIND_DECLARED) &&
+        isdefined(binding, :value))
+        exct = Union{}
+        effects = Effects(generic_getglobal_effects; nothrow=true)
+    else
+        # We do not assume in general that assigned global bindings remain assigned.
+        # The existence of pkgimages allows them to revert in practice.
+        exct = UndefVarError
+    end
+    return RTEffects(rt, exct, effects)
+end
+
+function scan_specified_partitions(query::F1, walk_binding_partition::F2,
+    interp::Union{AbstractInterpreter,Nothing}, g::GlobalRef, wwr::WorldWithRange) where {F1,F2}
+    local total_validity, rte, binding_partition
+    binding = convert(Core.Binding, g)
+    lookup_world = max_world(wwr.valid_worlds)
+    while true
+        # Partitions are ordered newest-to-oldest so start at the top
+        binding_partition = @isdefined(binding_partition) ?
+            lookup_binding_partition(lookup_world, binding, binding_partition) :
+            lookup_binding_partition(lookup_world, binding)
+        while lookup_world >= binding_partition.min_world && (!@isdefined(total_validity) || min_world(total_validity) > min_world(wwr.valid_worlds))
+            partition_validity, (leaf_binding, leaf_partition) = walk_binding_partition(binding, binding_partition, lookup_world)
+            @assert lookup_world in partition_validity
+            this_rte = query(interp, leaf_binding, leaf_partition)
+            if @isdefined(rte)
+                if this_rte === rte
+                    total_validity = union(total_validity, partition_validity)
+                    lookup_world = min_world(total_validity) - 1
+                    continue
+                end
+                if min_world(total_validity) <= wwr.this
+                    @goto out
+                end
+            end
+            total_validity = partition_validity
+            lookup_world = min_world(total_validity) - 1
+            rte = this_rte
+        end
+        min_world(total_validity) > min_world(wwr.valid_worlds) || break
+    end
+@label out
+    return Pair{WorldRange, typeof(rte)}(total_validity, rte)
+end
+
+scan_leaf_partitions(query::F, ::Nothing, g::GlobalRef, wwr::WorldWithRange) where F =
+    scan_specified_partitions(query, walk_binding_partition, nothing, g, wwr)
+scan_leaf_partitions(query::F, interp::AbstractInterpreter, g::GlobalRef, wwr::WorldWithRange) where F =
+    scan_specified_partitions(query, walk_binding_partition, interp, g, wwr)
+
+function scan_partitions(query::F, interp::AbstractInterpreter, g::GlobalRef, wwr::WorldWithRange) where F
+    walk_binding_partition = function (b::Core.Binding, partition::Core.BindingPartition, ::UInt)
+        Pair{WorldRange, Pair{Core.Binding, Core.BindingPartition}}(
+            WorldRange(partition.min_world, partition.max_world), b=>partition)
+    end
+    return scan_specified_partitions(query, walk_binding_partition, interp, g, wwr)
+end
+
+abstract_load_all_consistent_leaf_partitions(interp::AbstractInterpreter, g::GlobalRef, wwr::WorldWithRange) =
+    scan_leaf_partitions(abstract_eval_partition_load, interp, g, wwr)
+abstract_load_all_consistent_leaf_partitions(::Nothing, g::GlobalRef, wwr::WorldWithRange) =
+    scan_leaf_partitions(abstract_eval_partition_load, nothing, g, wwr)
+
+function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, saw_latestworld::Bool, sv::AbsIntState)
+    if saw_latestworld
+        return RTEffects(Any, Any, generic_getglobal_effects)
+    end
+    # For inference purposes, we don't particularly care which global binding we end up loading, we only
+    # care about its type. However, we would still like to terminate the world range for the particular
+    # binding we end up reaching such that codegen can emit a simpler pointer load.
+    world = get_inference_world(interp)
+    (valid_worlds, ret) = scan_leaf_partitions(abstract_eval_partition_load, interp, g, binding_world_hints(world, sv))
+    update_valid_age!(sv, world, valid_worlds)
+    return ret
+end
+
+function global_assignment_rt_exct(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, g::GlobalRef, @nospecialize(newty))
+    if saw_latestworld
+        return Pair{Any,Any}(newty, Union{TypeError, ErrorException})
+    end
+    newty′ = RefValue{Any}(newty)
+    world = get_inference_world(interp)
+    (valid_worlds, ret) = scan_partitions(interp, g, binding_world_hints(world, sv)) do interp::AbstractInterpreter, ::Core.Binding, partition::Core.BindingPartition
+        global_assignment_binding_rt_exct(interp, partition, newty′[])
+    end
+    update_valid_age!(sv, world, valid_worlds)
+    return ret
+end
+
+function global_assignment_binding_rt_exct(interp::AbstractInterpreter, partition::Core.BindingPartition, @nospecialize(newty))
+    kind = binding_kind(partition)
+    if is_some_guard(kind)
+        return Pair{Any,Any}(newty, ErrorException)
+    elseif is_some_const_binding(kind) || is_some_imported(kind)
+        # N.B.: Backdating should not improve inference in an earlier world
+        return Pair{Any,Any}(kind == PARTITION_KIND_BACKDATED_CONST ? newty : Bottom, ErrorException)
+    end
+    ty = kind == PARTITION_KIND_DECLARED ? Any : partition_restriction(partition)
+    wnewty = widenconst(newty)
+    if !hasintersect(wnewty, ty)
+        return Pair{Any,Any}(Bottom, TypeError)
+    elseif !(wnewty <: ty)
+        retty = tmeet(typeinf_lattice(interp), newty, ty)
+        return Pair{Any,Any}(retty, TypeError)
+    end
+    return Pair{Any,Any}(newty, Bottom)
+end
+
+abstract_eval_ssavalue(s::SSAValue, sv::InferenceState) = abstract_eval_ssavalue(s, sv.ssavaluetypes)
+
+function abstract_eval_ssavalue(s::SSAValue, ssavaluetypes::Vector{Any})
+    (1 ≤ s.id ≤ length(ssavaluetypes)) || throw(InvalidIRError())
+    typ = ssavaluetypes[s.id]
+    if typ === NOT_FOUND
+        return Bottom
+    end
+    return typ
+end
+
+struct AbstractEvalBasicStatementResult
+    rt
+    exct
+    effects::Union{Nothing,Effects}
+    changes::Union{Nothing,StateUpdate}
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    currsaw_latestworld::Bool
+    function AbstractEvalBasicStatementResult(rt, exct, effects::Union{Nothing,Effects},
+        changes::Union{Nothing,StateUpdate}, refinements, currsaw_latestworld::Bool)
+        @nospecialize rt exct refinements
+        return new(rt, exct, effects, changes, refinements, currsaw_latestworld)
+    end
+end
+
+@inline function abstract_eval_basic_statement(
+    interp::AbstractInterpreter, @nospecialize(stmt), sstate::StatementState, frame::InferenceState,
+    result::Union{Nothing,Future{RTEffects}}=nothing)
+    rt = nothing
+    exct = Bottom
+    changes = nothing
+    refinements = nothing
+    effects = nothing
+    currsaw_latestworld = sstate.saw_latestworld
+    if result !== nothing
+        @goto injectresult
+    end
+    if isa(stmt, NewvarNode)
+        changes = StateUpdate(stmt.slot, VarState(Bottom, true))
+    elseif isa(stmt, PhiNode)
+        add_curr_ssaflag!(frame, IR_FLAGS_REMOVABLE)
+        # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over
+        # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge
+        # over the first and last iterations. By tmerging in the current old_rt, we ensure that
+        # we will not lose an intermediate value.
+        rt = abstract_eval_phi(interp, stmt, sstate, frame)
+        old_rt = frame.ssavaluetypes[frame.currpc]
+        rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt)
+    else
+        lhs = nothing
+        if isexpr(stmt, :(=))
+            lhs = stmt.args[1]
+            stmt = stmt.args[2]
+        end
+        if !isa(stmt, Expr)
+            (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, stmt, sstate, frame)
+        else
+            hd = stmt.head
+            if hd === :method
+                fname = stmt.args[1]
+                if isa(fname, SlotNumber)
+                    changes = StateUpdate(fname, VarState(Any, false))
+                end
+            elseif (hd === :code_coverage_effect ||
+                    # :boundscheck can be narrowed to Bool
+                    (hd !== :boundscheck && is_meta_expr(stmt)))
+                rt = Nothing
+            elseif hd === :latestworld
+                currsaw_latestworld = true
+                rt = Nothing
+            else
+                result = abstract_eval_statement_expr(interp, stmt, sstate, frame)::Future{RTEffects}
+                if !isready(result) || !isempty(frame.tasks)
+                    return result
+
+                    @label injectresult
+                    # reload local variables
+                    lhs = nothing
+                    if isexpr(stmt, :(=))
+                        lhs = stmt.args[1]
+                        stmt = stmt.args[2]
+                    end
+                end
+                result = result[]
+                (; rt, exct, effects, refinements) = result
+                if effects.noub === NOUB_IF_NOINBOUNDS
+                    if has_curr_ssaflag(frame, IR_FLAG_INBOUNDS)
+                        effects = Effects(effects; noub=ALWAYS_FALSE)
+                    elseif !propagate_inbounds(frame)
+                        # The callee read our inbounds flag, but unless we propagate inbounds,
+                        # we ourselves don't read our parent's inbounds.
+                        effects = Effects(effects; noub=ALWAYS_TRUE)
+                    end
+                end
+                @assert !isa(rt, TypeVar) "unhandled TypeVar"
+                rt = maybe_singleton_const(rt)
+                if !isempty(frame.pclimitations)
+                    if rt isa Const || rt === Union{}
+                        empty!(frame.pclimitations)
+                    else
+                        rt = LimitedAccuracy(rt, frame.pclimitations)
+                        frame.pclimitations = IdSet{InferenceState}()
+                    end
+                end
+            end
+        end
+        if lhs !== nothing && rt !== Bottom
+            changes = StateUpdate(lhs::SlotNumber, VarState(rt, false))
+        end
+    end
+    return AbstractEvalBasicStatementResult(rt, exct, effects, changes, refinements, currsaw_latestworld)
+end
+
+struct BestguessInfo{Interp<:AbstractInterpreter}
+    interp::Interp
+    bestguess
+    nargs::Int
+    slottypes::Vector{Any}
+    changes::VarTable
+    function BestguessInfo(interp::Interp, @nospecialize(bestguess), nargs::Int,
+        slottypes::Vector{Any}, changes::VarTable) where Interp<:AbstractInterpreter
+        new{Interp}(interp, bestguess, nargs, slottypes, changes)
+    end
+end
+
+@nospecializeinfer function widenreturn(@nospecialize(rt), info::BestguessInfo)
+    return widenreturn(typeinf_lattice(info.interp), rt, info)
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn_noslotwrapper(widenlattice(𝕃ᵢ), rt, info)
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::BestguessInfo)
+    if isa(rt, MustAlias)
+        if 1 ≤ rt.slot ≤ info.nargs
+            rt = InterMustAlias(rt)
+        else
+            rt = widenmustalias(rt)
+        end
+    end
+    isa(rt, InterMustAlias) && return rt
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::BestguessInfo)
+    ⊑ᵢ = ⊑(𝕃ᵢ)
+    if !(⊑(ipo_lattice(info.interp), info.bestguess, Bool)) || info.bestguess === Bool
+        # give up inter-procedural constraint back-propagation
+        # when tmerge would widen the result anyways (as an optimization)
+        rt = widenconditional(rt)
+    else
+        if isa(rt, Conditional)
+            id = rt.slot
+            if 1 ≤ id ≤ info.nargs
+                old_id_type = widenconditional(info.slottypes[id]) # same as `(states[1]::VarTable)[id].typ`
+                if (!(rt.thentype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.thentype) &&
+                   (!(rt.elsetype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.elsetype)
+                   # discard this `Conditional` since it imposes
+                   # no new constraint on the argument type
+                   # (the caller will recreate it if needed)
+                   rt = widenconditional(rt)
+               end
+            else
+                # discard this `Conditional` imposed on non-call arguments,
+                # since it's not interesting in inter-procedural context;
+                # we may give constraints on other call argument
+                rt = widenconditional(rt)
+            end
+        end
+        if isa(rt, Conditional)
+            rt = InterConditional(rt.slot, rt.thentype, rt.elsetype)
+        elseif is_lattice_bool(𝕃ᵢ, rt)
+            rt = bool_rt_to_conditional(rt, info)
+        end
+    end
+    if isa(rt, Conditional)
+        rt = InterConditional(rt)
+    end
+    isa(rt, InterConditional) && return rt
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo)
+    bestguess = info.bestguess
+    if isa(bestguess, InterConditional)
+        # if the bestguess so far is already `Conditional`, try to convert
+        # this `rt` into `Conditional` on the slot to avoid overapproximation
+        # due to conflict of different slots
+        rt = bool_rt_to_conditional(rt, bestguess.slot, info)
+    else
+        # pick up the first "interesting" slot, convert `rt` to its `Conditional`
+        # TODO: ideally we want `Conditional` and `InterConditional` to convey
+        # constraints on multiple slots
+        for slot_id = 1:Int(info.nargs)
+            rt = bool_rt_to_conditional(rt, slot_id, info)
+            rt isa InterConditional && break
+        end
+    end
+    return rt
+end
+@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::BestguessInfo)
+    ⊑ᵢ = ⊑(typeinf_lattice(info.interp))
+    old = info.slottypes[slot_id]
+    new = widenslotwrapper(info.changes[slot_id].typ) # avoid nested conditional
+    if isvarargtype(old) || isvarargtype(new)
+        return rt
+    end
+    if new ⊑ᵢ old && !(old ⊑ᵢ new)
+        if isa(rt, Const)
+            val = rt.val
+            if val === true
+                return InterConditional(slot_id, new, Bottom)
+            elseif val === false
+                return InterConditional(slot_id, Bottom, new)
+            end
+        elseif rt === Bool
+            return InterConditional(slot_id, new, new)
+        end
+    end
+    return rt
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn_partials(𝕃ᵢ, rt, info)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn_partials(𝕃ᵢ, rt, info)
+end
+@nospecializeinfer function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
+    if isa(rt, PartialStruct)
+        fields = copy(rt.fields)
+        anyrefine = n_initialized(rt) > datatype_min_ninitialized(rt.typ)
+        𝕃 = typeinf_lattice(info.interp)
+        ⊏ = strictpartialorder(𝕃)
+        for i in 1:length(fields)
+            a = fields[i]
+            a = isvarargtype(a) ? a : widenreturn_noslotwrapper(𝕃, a, info)
+            if !anyrefine
+                # TODO: consider adding && const_prop_profitable(a) here?
+                anyrefine = has_extended_info(a) || a ⊏ fieldtype(rt.typ, i)
+            end
+            fields[i] = a
+        end
+        anyrefine && return PartialStruct(𝕃ᵢ, rt.typ, _getundefs(rt), fields)
+    end
+    if isa(rt, PartialOpaque)
+        return rt # XXX: this case was missed in #39512
+    end
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+
+@nospecializeinfer function widenreturn(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenreturn_consts(rt)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenreturn_consts(rt)
+end
+@nospecializeinfer function widenreturn_consts(@nospecialize(rt))
+    isa(rt, Const) && return rt
+    return widenconst(rt)
+end
+
+@nospecializeinfer function widenreturn(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenconst(rt)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenconst(rt)
+end
+
+function handle_control_backedge!(interp::AbstractInterpreter, frame::InferenceState, from::Int, to::Int)
+    if from > to
+        if is_effect_overridden(frame, :terminates_locally)
+            # this backedge is known to terminate
+        else
+            merge_effects!(interp, frame, Effects(EFFECTS_TOTAL; terminates=false))
+        end
+    end
+    return nothing
+end
+
+function update_bbstate!(𝕃ᵢ::AbstractLattice, frame::InferenceState, bb::Int, vartable::VarTable, saw_latestworld::Bool)
+    frame.bb_saw_latestworld[bb] |= saw_latestworld
+    bbtable = frame.bb_vartables[bb]
+    if bbtable === nothing
+        # if a basic block hasn't been analyzed yet,
+        # we can update its state a bit more aggressively
+        frame.bb_vartables[bb] = copy(vartable)
+        return true
+    else
+        return stupdate!(𝕃ᵢ, bbtable, vartable)
+    end
+end
+
+function init_vartable!(vartable::VarTable, frame::InferenceState)
+    nargtypes = length(frame.result.argtypes)
+    for i = 1:length(vartable)
+        vartable[i] = VarState(Bottom, i > nargtypes)
+    end
+    return vartable
+end
+
+function update_bestguess!(interp::AbstractInterpreter, frame::InferenceState,
+                           currstate::VarTable, @nospecialize(rt))
+    bestguess = frame.bestguess
+    nargs = narguments(frame, #=include_va=#false)
+    slottypes = frame.slottypes
+    rt = widenreturn(rt, BestguessInfo(interp, bestguess, nargs, slottypes, currstate))
+    # narrow representation of bestguess slightly to prepare for tmerge with rt
+    if rt isa InterConditional && bestguess isa Const && bestguess.val isa Bool
+        slot_id = rt.slot
+        old_id_type = widenconditional(slottypes[slot_id])
+        if bestguess.val === true && rt.elsetype !== Bottom
+            bestguess = InterConditional(slot_id, old_id_type, Bottom)
+        elseif bestguess.val === false && rt.thentype !== Bottom
+            bestguess = InterConditional(slot_id, Bottom, old_id_type)
+        end
+    # or narrow representation of rt slightly to prepare for tmerge with bestguess
+    elseif bestguess isa InterConditional && rt isa Const && rt.val isa Bool
+        slot_id = bestguess.slot
+        old_id_type = widenconditional(slottypes[slot_id])
+        if rt.val === true && bestguess.elsetype !== Bottom
+            rt = InterConditional(slot_id, old_id_type, Bottom)
+        elseif rt.val === false && bestguess.thentype !== Bottom
+            rt = InterConditional(slot_id, Bottom, old_id_type)
+        end
+    end
+    # copy limitations to return value
+    if !isempty(frame.pclimitations)
+        union!(frame.limitations, frame.pclimitations)
+        empty!(frame.pclimitations)
+    end
+    if !isempty(frame.limitations)
+        rt = LimitedAccuracy(rt, copy(frame.limitations))
+    end
+    𝕃ₚ = ipo_lattice(interp)
+    if !⊑(𝕃ₚ, rt, bestguess)
+        # TODO: if bestguess isa InterConditional && !interesting(bestguess); bestguess = widenconditional(bestguess); end
+        frame.bestguess = tmerge(𝕃ₚ, bestguess, rt) # new (wider) return type for frame
+        return true
+    else
+        return false
+    end
+end
+
+function update_exc_bestguess!(interp::AbstractInterpreter, @nospecialize(exct), frame::InferenceState)
+    𝕃ₚ = ipo_lattice(interp)
+    handler = gethandler(frame)
+    if handler === nothing
+        if !⊑(𝕃ₚ, exct, frame.exc_bestguess)
+            frame.exc_bestguess = tmerge(𝕃ₚ, frame.exc_bestguess, exct)
+            update_cycle_worklists!(frame) do caller::InferenceState, caller_pc::Int
+                caller_handler = gethandler(caller, caller_pc)
+                caller_exct = caller_handler === nothing ?
+                    caller.exc_bestguess : caller_handler.exct
+                return caller_exct !== Any
+            end
+        end
+    else
+        if !⊑(𝕃ₚ, exct, handler.exct)
+            handler.exct = tmerge(𝕃ₚ, handler.exct, exct)
+            enter = frame.src.code[handler.enter_idx]::EnterNode
+            exceptbb = block_for_inst(frame.cfg, enter.catch_dest)
+            push!(frame.ip, exceptbb)
+        end
+    end
+end
+
+function propagate_to_error_handler!(currstate::VarTable, currsaw_latestworld::Bool, frame::InferenceState, 𝕃ᵢ::AbstractLattice)
+    # If this statement potentially threw, propagate the currstate to the
+    # exception handler, BEFORE applying any state changes.
+    curr_hand = gethandler(frame)
+    if curr_hand !== nothing
+        enter = frame.src.code[curr_hand.enter_idx]::EnterNode
+        exceptbb = block_for_inst(frame.cfg, enter.catch_dest)
+        if update_bbstate!(𝕃ᵢ, frame, exceptbb, currstate, currsaw_latestworld)
+            push!(frame.ip, exceptbb)
+        end
+    end
+end
+
+function update_cycle_worklists!(callback, frame::InferenceState)
+    for (caller, caller_pc) in frame.cycle_backedges
+        if callback(caller, caller_pc)
+            push!(caller.ip, block_for_inst(caller.cfg, caller_pc))
+        end
+    end
+end
+
+# make as much progress on `frame` as possible (without handling cycles)
+struct CurrentState
+    result::Future{RTEffects}
+    currstate::VarTable
+    currsaw_latestworld::Bool
+    bbstart::Int
+    bbend::Int
+    CurrentState(result::Future{RTEffects}, currstate::VarTable, currsaw_latestworld::Bool, bbstart::Int, bbend::Int) =
+        new(result, currstate, currsaw_latestworld, bbstart, bbend)
+    CurrentState() = new()
+end
+
+function typeinf_local(interp::AbstractInterpreter, frame::InferenceState, nextresult::CurrentState)
+    @assert !is_inferred(frame)
+    W = frame.ip
+    ssavaluetypes = frame.ssavaluetypes
+    bbs = frame.cfg.blocks
+    nbbs = length(bbs)
+    𝕃ᵢ = typeinf_lattice(interp)
+    states = frame.bb_vartables
+    saw_latestworld = frame.bb_saw_latestworld
+    currbb = frame.currbb
+    currpc = frame.currpc
+
+    if isdefined(nextresult, :result)
+        # for reasons that are fairly unclear, some state is arbitrarily on the stack instead in the InferenceState as normal
+        bbstart = nextresult.bbstart
+        bbend = nextresult.bbend
+        currstate = nextresult.currstate
+        currsaw_latestworld = nextresult.currsaw_latestworld
+        stmt = frame.src.code[currpc]
+        result = abstract_eval_basic_statement(interp, stmt, StatementState(currstate, currsaw_latestworld), frame, nextresult.result)
+        @goto injected_result
+    end
+
+    if currbb != 1
+        currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
+    end
+    currstate = copy(states[currbb]::VarTable)
+    currsaw_latestworld = saw_latestworld[currbb]
+    while currbb <= nbbs
+        delete!(W, currbb)
+        bbstart = first(bbs[currbb].stmts)
+        bbend = last(bbs[currbb].stmts)
+
+        currpc = bbstart - 1
+        while currpc < bbend
+            currpc += 1
+            frame.currpc = currpc
+            stmt = frame.src.code[currpc]
+            # If we're at the end of the basic block ...
+            if currpc == bbend
+                # Handle control flow
+                if isa(stmt, GotoNode)
+                    succs = bbs[currbb].succs
+                    @assert length(succs) == 1
+                    nextbb = succs[1]
+                    ssavaluetypes[currpc] = Any
+                    handle_control_backedge!(interp, frame, currpc, stmt.label)
+                    add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
+                    @goto branch
+                elseif isa(stmt, GotoIfNot)
+                    condx = stmt.cond
+                    condslot = ssa_def_slot(condx, frame)
+                    condt = abstract_eval_value(interp, condx, StatementState(currstate, currsaw_latestworld), frame)
+                    if condt === Bottom
+                        ssavaluetypes[currpc] = Bottom
+                        empty!(frame.pclimitations)
+                        @goto find_next_bb
+                    end
+                    orig_condt = condt
+                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condslot, SlotNumber)
+                        # if this non-`Conditional` object is a slot, we form and propagate
+                        # the conditional constraint on it
+                        condt = Conditional(condslot, Const(true), Const(false))
+                    end
+                    condval = maybe_extract_const_bool(condt)
+                    nothrow = (condval !== nothing) || ⊑(𝕃ᵢ, orig_condt, Bool)
+                    if nothrow
+                        add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
+                    else
+                        update_exc_bestguess!(interp, TypeError, frame)
+                        propagate_to_error_handler!(currstate, currsaw_latestworld, frame, 𝕃ᵢ)
+                        merge_effects!(interp, frame, EFFECTS_THROWS)
+                    end
+
+                    if !isempty(frame.pclimitations)
+                        # we can't model the possible effect of control
+                        # dependencies on the return
+                        # directly to all the return values (unless we error first)
+                        condval isa Bool || union!(frame.limitations, frame.pclimitations)
+                        empty!(frame.pclimitations)
+                    end
+                    ssavaluetypes[currpc] = Any
+                    if condval === true
+                        @goto fallthrough
+                    else
+                        if !nothrow && !hasintersect(widenconst(orig_condt), Bool)
+                            ssavaluetypes[currpc] = Bottom
+                            @goto find_next_bb
+                        end
+
+                        succs = bbs[currbb].succs
+                        if length(succs) == 1
+                            @assert condval === false || (stmt.dest === currpc + 1)
+                            nextbb = succs[1]
+                            @goto branch
+                        end
+                        @assert length(succs) == 2
+                        truebb = currbb + 1
+                        falsebb = succs[1] == truebb ? succs[2] : succs[1]
+                        if condval === false
+                            nextbb = falsebb
+                            handle_control_backedge!(interp, frame, currpc, stmt.dest)
+                            @goto branch
+                        end
+
+                        # We continue with the true branch, but process the false
+                        # branch here.
+                        if isa(condt, Conditional)
+                            else_change = conditional_change(𝕃ᵢ, currstate, condt, #=then_or_else=#false)
+                            if else_change !== nothing
+                                elsestate = copy(currstate)
+                                stoverwrite1!(elsestate, else_change)
+                            elseif condslot isa SlotNumber
+                                elsestate = copy(currstate)
+                            else
+                                elsestate = currstate
+                            end
+                            if condslot isa SlotNumber # refine the type of this conditional object itself for this else branch
+                                stoverwrite1!(elsestate, condition_object_change(currstate, condt, condslot, #=then_or_else=#false))
+                            end
+                            else_changed = update_bbstate!(𝕃ᵢ, frame, falsebb, elsestate, currsaw_latestworld)
+                            then_change = conditional_change(𝕃ᵢ, currstate, condt, #=then_or_else=#true)
+                            thenstate = currstate
+                            if then_change !== nothing
+                                stoverwrite1!(thenstate, then_change)
+                            end
+                            if condslot isa SlotNumber # refine the type of this conditional object itself for this then branch
+                                stoverwrite1!(thenstate, condition_object_change(currstate, condt, condslot, #=then_or_else=#true))
+                            end
+                        else
+                            else_changed = update_bbstate!(𝕃ᵢ, frame, falsebb, currstate, currsaw_latestworld)
+                        end
+                        if else_changed
+                            handle_control_backedge!(interp, frame, currpc, stmt.dest)
+                            push!(W, falsebb)
+                        end
+                        @goto fallthrough
+                    end
+                elseif isa(stmt, ReturnNode)
+                    rt = abstract_eval_value(interp, stmt.val, StatementState(currstate, currsaw_latestworld), frame)
+                    if update_bestguess!(interp, frame, currstate, rt)
+                        update_cycle_worklists!(frame) do caller::InferenceState, caller_pc::Int
+                            # no reason to revisit if that call-site doesn't affect the final result
+                            return caller.ssavaluetypes[caller_pc] !== Any
+                        end
+                    end
+                    ssavaluetypes[currpc] = Any
+                    @goto find_next_bb
+                elseif isa(stmt, EnterNode)
+                    ssavaluetypes[currpc] = Any
+                    add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
+                    if isdefined(stmt, :scope)
+                        scopet = abstract_eval_value(interp, stmt.scope, StatementState(currstate, currsaw_latestworld), frame)
+                        handler = gethandler(frame, currpc + 1)::TryCatchFrame
+                        @assert handler.scopet !== nothing
+                        if !⊑(𝕃ᵢ, scopet, handler.scopet)
+                            handler.scopet = tmerge(𝕃ᵢ, scopet, handler.scopet)
+                            if isdefined(handler, :scope_uses)
+                                for bb in handler.scope_uses
+                                    push!(W, bb)
+                                end
+                            end
+                        end
+                    end
+                    @goto fallthrough
+                elseif isexpr(stmt, :leave)
+                    ssavaluetypes[currpc] = Any
+                    @goto fallthrough
+                end
+                # Fall through terminator - treat as regular stmt
+            end
+            # Process non control-flow statements
+            @assert isempty(frame.tasks)
+            sstate = StatementState(currstate, currsaw_latestworld)
+            result = abstract_eval_basic_statement(interp, stmt, sstate, frame)
+            if result isa Future{RTEffects}
+                return CurrentState(result, currstate, currsaw_latestworld, bbstart, bbend)
+            else
+                @label injected_result
+                (; rt, exct, effects, changes, refinements, currsaw_latestworld) = result
+            end
+            effects === nothing || merge_override_effects!(interp, effects, frame)
+            if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW)
+                if exct !== Union{}
+                    update_exc_bestguess!(interp, exct, frame)
+                    # TODO: assert that these conditions match. For now, we assume the `nothrow` flag
+                    # to be correct, but allow the exct to be an over-approximation.
+                end
+                propagate_to_error_handler!(currstate, currsaw_latestworld, frame, 𝕃ᵢ)
+            end
+            if rt === Bottom
+                ssavaluetypes[currpc] = Bottom
+                # Special case: Bottom-typed PhiNodes do not error (but must also be unused)
+                if isa(stmt, PhiNode)
+                    continue
+                end
+                @goto find_next_bb
+            end
+            if changes !== nothing
+                stoverwrite1!(currstate, changes)
+            end
+            if refinements isa SlotRefinement
+                apply_refinement!(𝕃ᵢ, refinements.slot, refinements.typ, currstate, changes)
+            elseif refinements isa Vector{Any}
+                for i = 1:length(refinements)
+                    newtyp = refinements[i]
+                    newtyp === nothing && continue
+                    apply_refinement!(𝕃ᵢ, SlotNumber(i), newtyp, currstate, changes)
+                end
+            end
+            if rt === nothing
+                ssavaluetypes[currpc] = Any
+                continue
+            end
+            record_ssa_assign!(𝕃ᵢ, currpc, rt, frame)
+        end # for currpc in bbstart:bbend
+
+        # Case 1: Fallthrough termination
+        begin @label fallthrough
+            nextbb = currbb + 1
+        end
+
+        # Case 2: Directly branch to a different BB
+        begin @label branch
+            if update_bbstate!(𝕃ᵢ, frame, nextbb, currstate, currsaw_latestworld)
+                push!(W, nextbb)
+            end
+        end
+
+        # Case 3: Control flow ended along the current path (converged, return or throw)
+        begin @label find_next_bb
+            currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
+            currbb == -1 && break # the working set is empty
+            currbb > nbbs && break
+
+            nexttable = states[currbb]
+            if nexttable === nothing
+                init_vartable!(currstate, frame)
+            else
+                stoverwrite!(currstate, nexttable)
+            end
+        end
+    end # while currbb <= nbbs
+
+    return CurrentState()
+end
+
+function apply_refinement!(𝕃ᵢ::AbstractLattice, slot::SlotNumber, @nospecialize(newtyp),
+                           currstate::VarTable, currchanges::Union{Nothing,StateUpdate})
+    if currchanges !== nothing && currchanges.var == slot
+        return # type propagation from statement (like assignment) should have the precedence
+    end
+    vtype = currstate[slot_id(slot)]
+    oldtyp = vtype.typ
+    ⊏ = strictpartialorder(𝕃ᵢ)
+    if newtyp ⊏ oldtyp
+        stmtupdate = StateUpdate(slot, VarState(newtyp, vtype.undef))
+        stoverwrite1!(currstate, stmtupdate)
+    end
+end
+
+function conditional_change(𝕃ᵢ::AbstractLattice, currstate::VarTable, condt::Conditional, then_or_else::Bool)
+    vtype = currstate[condt.slot]
+    oldtyp = vtype.typ
+    newtyp = then_or_else ? condt.thentype : condt.elsetype
+    if iskindtype(newtyp)
+        # this code path corresponds to the special handling for `isa(x, iskindtype)` check
+        # implemented within `abstract_call_builtin`
+    elseif ⊑(𝕃ᵢ, ignorelimited(newtyp), ignorelimited(oldtyp))
+        # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
+        # since we probably formed these types with `typesubstract`,
+        # the comparison is likely simple
+    else
+        return nothing
+    end
+    if oldtyp isa LimitedAccuracy
+        # typ is better unlimited, but we may still need to compute the tmeet with the limit
+        # "causes" since we ignored those in the comparison
+        newtyp = tmerge(𝕃ᵢ, newtyp, LimitedAccuracy(Bottom, oldtyp.causes))
+    end
+    # if this `Conditional` is from `@isdefined condt.slot`, refine its `undef` information
+    newundef = condt.isdefined ? !then_or_else : vtype.undef
+    return StateUpdate(SlotNumber(condt.slot), VarState(newtyp, newundef), #=conditional=#true)
+end
+
+function condition_object_change(currstate::VarTable, condt::Conditional,
+                                 condslot::SlotNumber, then_or_else::Bool)
+    vtype = currstate[slot_id(condslot)]
+    newcondt = Conditional(condt.slot,
+        then_or_else ? condt.thentype : Union{},
+        then_or_else ? Union{} : condt.elsetype)
+    return StateUpdate(condslot, VarState(newcondt, vtype.undef))
+end
+
+# make as much progress on `frame` as possible (by handling cycles)
+warnlength::Int = 2500
+function typeinf(interp::AbstractInterpreter, frame::InferenceState)
+    time_before = _time_ns()
+    callstack = frame.callstack::Vector{AbsIntState}
+    nextstates = CurrentState[]
+    takenext = frame.frameid
+    minwarn = warnlength
+    while takenext >= frame.frameid
+        callee = takenext == 0 ? frame : callstack[takenext]::InferenceState
+        if !isempty(callstack)
+            if length(callstack) - frame.frameid >= minwarn
+                topmethod = callstack[1].linfo
+                topmethod.def isa Method || (topmethod = callstack[2].linfo)
+                print(Core.stderr, "info: inference of ", topmethod, " exceeding ", length(callstack), " frames (may be slow).\n")
+                minwarn *= 2
+            end
+            topcallee = (callstack[end]::InferenceState)
+            if topcallee.cycleid != callee.cycleid
+                callee = topcallee
+                takenext = length(callstack)
+            end
+        end
+        interp = callee.interp
+        nextstateid = takenext + 1 - frame.frameid
+        while length(nextstates) < nextstateid
+            push!(nextstates, CurrentState())
+        end
+        if doworkloop(interp, callee)
+            # First drain the workloop. Note that since some scheduled work doesn't
+            # affect the result (e.g. cfunction or abstract_call_method on
+            # get_compileable_sig), but still must be finished up since it may see and
+            # change the local variables of the InferenceState at currpc, we do this
+            # even if the nextresult status is already completed.
+        elseif isdefined(nextstates[nextstateid], :result) || !isempty(callee.ip)
+            # Next make progress on this frame
+            prev = length(callee.tasks) + 1
+            nextstates[nextstateid] = typeinf_local(interp, callee, nextstates[nextstateid])
+            reverse!(callee.tasks, prev)
+        elseif callee.cycleid == length(callstack)
+            # With no active ip's and no cycles, frame is done
+            time_now = _time_ns()
+            callee.time_self_ns += (time_now - time_before)
+            time_before = time_now
+            finish_nocycle(interp, callee, time_before)
+            callee.frameid == 0 && break
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            #@assert length(nextstates) == nextstateid + 1
+            #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+            resize!(nextstates, nextstateid)
+            continue
+        elseif callee.cycleid == callee.frameid
+            # If the current frame is the top part of a cycle, check if the whole cycle
+            # is done, and if not, pick the next item to work on.
+            time_now = _time_ns()
+            callee.time_self_ns += (time_now - time_before)
+            time_before = time_now
+            no_active_ips_in_cycle = true
+            for i = callee.cycleid:length(callstack)
+                caller = callstack[i]::InferenceState
+                @assert caller.cycleid == callee.cycleid
+                if !isempty(caller.tasks) || isdefined(nextstates[i+1-frame.frameid], :result) || !isempty(caller.ip)
+                    no_active_ips_in_cycle = false
+                    break
+                end
+            end
+            if no_active_ips_in_cycle
+                finish_cycle(interp, callstack, callee.cycleid, time_before)
+            end
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            if no_active_ips_in_cycle
+                #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+                resize!(nextstates, nextstateid)
+            else
+                #@assert length(nextstates) == nextstateid
+            end
+            continue
+        else
+            # Continue to the next frame in this cycle
+            takenext = takenext - 1
+        end
+        time_now = _time_ns()
+        callee.time_self_ns += (time_now - time_before)
+        time_before = time_now
+    end
+    #@assert all(nextresult -> !isdefined(nextresult, :result), nextstates)
+    return is_inferred(frame)
+end
diff --git a/base/compiler/abstractlattice.jl b/Compiler/src/abstractlattice.jl
similarity index 87%
rename from base/compiler/abstractlattice.jl
rename to Compiler/src/abstractlattice.jl
index 3c6c874a9a09c..d9cccdb880c7d 100644
--- a/base/compiler/abstractlattice.jl
+++ b/Compiler/src/abstractlattice.jl
@@ -98,8 +98,10 @@ is_valid_lattice_norec(::InferenceLattice, @nospecialize(elem)) = isa(elem, Limi
 """
     tmeet(𝕃::AbstractLattice, a, b::Type)
 
-Compute the lattice meet of lattice elements `a` and `b` over the lattice `𝕃`.
-If `𝕃` is `JLTypeLattice`, this is equivalent to type intersection.
+Compute the lattice meet of lattice elements `a` and `b` over the lattice `𝕃`,
+dropping any results that will not be inhabited at runtime.
+If `𝕃` is `JLTypeLattice`, this is equivalent to type intersection plus the
+elimination of results that have no concrete subtypes.
 Note that currently `b` is restricted to being a type
 (interpreted as a lattice element in the `JLTypeLattice` sub-lattice of `𝕃`).
 """
@@ -107,7 +109,7 @@ function tmeet end
 
 function tmeet(::JLTypeLattice, @nospecialize(a::Type), @nospecialize(b::Type))
     ti = typeintersect(a, b)
-    valid_as_lattice(ti) || return Bottom
+    valid_as_lattice(ti, true) || return Bottom
     return ti
 end
 
@@ -151,7 +153,7 @@ function ⊑ end
 @nospecializeinfer ⊑(::JLTypeLattice, @nospecialize(a::Type), @nospecialize(b::Type)) = a <: b
 
 """
-    ⊏(𝕃::AbstractLattice, a, b) -> Bool
+    ⊏(𝕃::AbstractLattice, a, b)::Bool
 
 The strict partial order over the type inference lattice.
 This is defined as the irreflexive kernel of `⊑`.
@@ -159,7 +161,7 @@ This is defined as the irreflexive kernel of `⊑`.
 @nospecializeinfer ⊏(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = ⊑(𝕃, a, b) && !⊑(𝕃, b, a)
 
 """
-    ⋤(𝕃::AbstractLattice, a, b) -> Bool
+    ⋤(𝕃::AbstractLattice, a, b)::Bool
 
 This order could be used as a slightly more efficient version of the strict order `⊏`,
 where we can safely assume `a ⊑ b` holds.
@@ -167,7 +169,7 @@ where we can safely assume `a ⊑ b` holds.
 @nospecializeinfer ⋤(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = !⊑(𝕃, b, a)
 
 """
-    is_lattice_equal(𝕃::AbstractLattice, a, b) -> Bool
+    is_lattice_equal(𝕃::AbstractLattice, a, b)::Bool
 
 Check if two lattice elements are partial order equivalent.
 This is basically `a ⊑ b && b ⊑ a` in the lattice of `𝕃`
@@ -179,9 +181,9 @@ but (optionally) with extra performance optimizations.
 end
 
 """
-    has_nontrivial_extended_info(𝕃::AbstractLattice, t) -> Bool
+    has_nontrivial_extended_info(𝕃::AbstractLattice, t)::Bool
 
-Determines whether the given lattice element `t` of `𝕃` has non-trivial extended lattice
+Determine whether the given lattice element `t` of `𝕃` has non-trivial extended lattice
 information that would not be available from the type itself.
 """
 @nospecializeinfer has_nontrivial_extended_info(𝕃::AbstractLattice, @nospecialize t) =
@@ -202,9 +204,9 @@ end
 @nospecializeinfer has_nontrivial_extended_info(::JLTypeLattice, @nospecialize(t)) = false
 
 """
-    is_const_prop_profitable_arg(𝕃::AbstractLattice, t) -> Bool
+    is_const_prop_profitable_arg(𝕃::AbstractLattice, t)::Bool
 
-Determines whether the given lattice element `t` of `𝕃` has new extended lattice information
+Determine whether the given lattice element `t` of `𝕃` has new extended lattice information
 that should be forwarded along with constant propagation.
 """
 @nospecializeinfer is_const_prop_profitable_arg(𝕃::AbstractLattice, @nospecialize t) =
@@ -225,9 +227,10 @@ that should be forwarded along with constant propagation.
 end
 @nospecializeinfer function is_const_prop_profitable_arg(𝕃::ConstsLattice, @nospecialize t)
     if isa(t, Const)
-        # don't consider mutable values useful constants
+        # don't consider mutable values useful constants unless they have const fields
         val = t.val
-        return isa(val, Symbol) || isa(val, Type) || !ismutable(val)
+        return isa(val, Symbol) || isa(val, Type) || isa(val, Method) || isa(val, CodeInstance) ||
+                    !ismutable(val) || (typeof(val).name.constfields != C_NULL)
     end
     isa(t, PartialTypeVar) && return false # this isn't forwardable
     return is_const_prop_profitable_arg(widenlattice(𝕃), t)
@@ -249,9 +252,7 @@ end
     isa(x, Const) && return true
     return is_forwardable_argtype(widenlattice(𝕃), x)
 end
-@nospecializeinfer function is_forwardable_argtype(::JLTypeLattice, @nospecialize x)
-    return false
-end
+@nospecializeinfer is_forwardable_argtype(::JLTypeLattice, @nospecialize x) = false
 
 """
     widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) -> new_bestguess
@@ -260,7 +261,7 @@ end
 Appropriately converts inferred type of a return value `rt` to such a type
 that we know we can store in the cache and is valid and good inter-procedurally,
 E.g. if `rt isa Conditional` then `rt` should be converted to `InterConditional`
-or the other cachable lattice element.
+or the other cacheable lattice element.
 
 External lattice `𝕃ᵢ::ExternalLattice` may overload:
 - `widenreturn(𝕃ᵢ::ExternalLattice, @nospecialize(rt), info::BestguessInfo)`
@@ -285,9 +286,16 @@ has_extended_unionsplit(::AnyMustAliasesLattice) = true
 has_extended_unionsplit(::JLTypeLattice) = false
 
 # Curried versions
-⊑(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊑(lattice, a, b)
-⊏(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊏(lattice, a, b)
-⋤(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⋤(lattice, a, b)
+⊑(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊑(𝕃, a, b)
+⊏(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊏(𝕃, a, b)
+⋤(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⋤(𝕃, a, b)
+tmerge(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> tmerge(𝕃, a, b)
+tmeet(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> tmeet(𝕃, a, b)
+partialorder(𝕃::AbstractLattice) = ⊑(𝕃)
+strictpartialorder(𝕃::AbstractLattice) = ⊏(𝕃)
+strictneqpartialorder(𝕃::AbstractLattice) = ⋤(𝕃)
+join(𝕃::AbstractLattice) = tmerge(𝕃)
+meet(𝕃::AbstractLattice) = tmeet(𝕃)
 
 # Fallbacks for external packages using these methods
 const fallback_lattice = InferenceLattice(BaseInferenceLattice.instance)
diff --git a/Compiler/src/bindinginvalidations.jl b/Compiler/src/bindinginvalidations.jl
new file mode 100644
index 0000000000000..1b84c075d83fc
--- /dev/null
+++ b/Compiler/src/bindinginvalidations.jl
@@ -0,0 +1,201 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using ..Compiler: _uncompressed_ir, specializations, get_ci_mi, convert, unsafe_load, cglobal, generating_output, has_image_globalref,
+    PARTITION_MASK_KIND, PARTITION_KIND_GUARD, PARTITION_FLAG_EXPORTED, PARTITION_FLAG_DEPRECATED,
+    BINDING_FLAG_ANY_IMPLICIT_EDGES, binding_kind, partition_restriction, is_some_imported,
+    is_some_binding_imported, is_some_implicit, SizeUnknown, maybe_add_binding_backedge!, walk_binding_partition, abstract_eval_partition_load, userefs
+using .Core: SimpleVector, CodeInfo
+
+function foreachgr(visit, src::CodeInfo)
+    stmts = src.code
+    for i = 1:length(stmts)
+        stmt = stmts[i]
+        isa(stmt, GlobalRef) && visit(stmt)
+        for ur in userefs(stmt)
+            arg = ur[]
+            isa(arg, GlobalRef) && visit(arg)
+        end
+    end
+end
+
+function anygr(visit, src::CodeInfo)
+    stmts = src.code
+    for i = 1:length(stmts)
+        stmt = stmts[i]
+        if isa(stmt, GlobalRef)
+            visit(stmt) && return true
+            continue
+        end
+        for ur in userefs(stmt)
+            arg = ur[]
+            isa(arg, GlobalRef) && visit(arg) && return true
+        end
+    end
+    return false
+end
+
+function should_invalidate_code_for_globalref(gr::GlobalRef, src::CodeInfo)
+    isgr(g::GlobalRef) = gr.mod == g.mod && gr.name === g.name
+    isgr(g) = false
+    return anygr(isgr, src)
+end
+
+function scan_edge_list(ci::Core.CodeInstance, binding::Core.Binding)
+    isdefined(ci, :edges) || return false
+    edges = ci.edges
+    i = 1
+    while i <= length(edges)
+        if isassigned(edges, i) && edges[i] === binding
+            return true
+        end
+        i += 1
+    end
+    return false
+end
+
+function invalidate_method_for_globalref!(gr::GlobalRef, method::Method, invalidated_bpart::Core.BindingPartition, new_max_world::UInt)
+    invalidate_all = false
+    binding = convert(Core.Binding, gr)
+    if isdefined(method, :source)
+        src = _uncompressed_ir(method)
+        invalidate_all = should_invalidate_code_for_globalref(gr, src)
+    end
+    if invalidate_all && !generating_output()
+        @atomic method.did_scan_source |= 0x4
+    end
+    invalidated_any = false
+    for mi in specializations(method)
+        isdefined(mi, :cache) || continue
+        ci = mi.cache
+        invalidated = false
+        while true
+            if ci.max_world > new_max_world && (invalidate_all || scan_edge_list(ci, binding))
+                ccall(:jl_invalidate_code_instance, Cvoid, (Any, UInt), ci, new_max_world)
+                invalidated = true
+            end
+            isdefined(ci, :next) || break
+            ci = ci.next
+        end
+        invalidated && ccall(:jl_maybe_log_binding_invalidation, Cvoid, (Any,), mi)
+        invalidated_any |= invalidated
+    end
+    return invalidated_any
+end
+
+export_affecting_partition_flags(bpart::Core.BindingPartition) =
+    ((bpart.kind & PARTITION_MASK_KIND) == PARTITION_KIND_GUARD,
+     (bpart.kind & PARTITION_FLAG_EXPORTED) != 0,
+     (bpart.kind & PARTITION_FLAG_DEPRECATED) != 0)
+
+function invalidate_code_for_globalref!(b::Core.Binding, invalidated_bpart::Core.BindingPartition, new_bpart::Core.BindingPartition, new_max_world::UInt)
+    gr = b.globalref
+
+    (_, (ib, ibpart)) = walk_binding_partition(b, invalidated_bpart, new_max_world)
+    (_, (nb, nbpart)) = walk_binding_partition(b, new_bpart, new_max_world+1)
+
+    # `abstract_eval_partition_load` is the maximum amount of information that inference
+    # reads from a binding partition. If this information does not change - we do not need to
+    # invalidate any code that inference created, because we know that the result will not change.
+    need_to_invalidate_code =
+        abstract_eval_partition_load(nothing, ib, ibpart) !==
+        abstract_eval_partition_load(nothing, nb, nbpart)
+
+    need_to_invalidate_export = export_affecting_partition_flags(invalidated_bpart) !==
+                                export_affecting_partition_flags(new_bpart)
+
+    invalidated_any = false
+    queued_bindings = Tuple{Core.Binding, Core.BindingPartition, Core.BindingPartition}[]    # defer handling these to keep the logging coherent
+    if need_to_invalidate_code
+        if (b.flags & BINDING_FLAG_ANY_IMPLICIT_EDGES) != 0
+            nmethods = ccall(:jl_module_scanned_methods_length, Csize_t, (Any,), gr.mod)
+            for i = 1:nmethods
+                method = ccall(:jl_module_scanned_methods_getindex, Any, (Any, Csize_t), gr.mod, i)::Method
+                invalidated_any |= invalidate_method_for_globalref!(gr, method, invalidated_bpart, new_max_world)
+            end
+        end
+        nbackedges = ccall(:jl_binding_backedges_length, Csize_t, (Any,), b)
+        for i = 1:nbackedges
+            edge = ccall(:jl_binding_backedges_getindex, Any, (Any, Csize_t), b, i)
+            if isa(edge, CodeInstance)
+                ccall(:jl_invalidate_code_instance, Cvoid, (Any, UInt), edge, new_max_world)
+                invalidated_any = true
+            elseif isa(edge, Core.Binding)
+                isdefined(edge, :partitions) || continue
+                latest_bpart = edge.partitions
+                latest_bpart.max_world == typemax(UInt) || continue
+                is_some_imported(binding_kind(latest_bpart)) || continue
+                if is_some_binding_imported(binding_kind(latest_bpart))
+                    partition_restriction(latest_bpart) === b || continue
+                end
+                push!(queued_bindings, (edge, latest_bpart, latest_bpart))
+            else
+                invalidated_any |= invalidate_method_for_globalref!(gr, edge::Method, invalidated_bpart, new_max_world)
+            end
+        end
+    end
+
+    if need_to_invalidate_code || need_to_invalidate_export
+        # This binding was exported - we need to check all modules that `using` us to see if they
+        # have a binding that is affected by this change.
+        usings_backedges = ccall(:jl_get_module_usings_backedges, Any, (Any,), gr.mod)
+        if usings_backedges !== nothing
+            for user::Module in usings_backedges::Vector{Any}
+                user_binding = ccall(:jl_get_module_binding_or_nothing, Any, (Any, Any), user, gr.name)::Union{Core.Binding, Nothing}
+                user_binding === nothing && continue
+                isdefined(user_binding, :partitions) || continue
+                latest_bpart = user_binding.partitions
+                latest_bpart.max_world == typemax(UInt) || continue
+                is_some_implicit(binding_kind(latest_bpart)) || continue
+                new_bpart = ccall(:jl_maybe_reresolve_implicit, Any, (Any, Csize_t), user_binding, new_max_world)
+                if need_to_invalidate_code || new_bpart !== latest_bpart
+                    push!(queued_bindings, (convert(Core.Binding, user_binding), latest_bpart, new_bpart))
+                end
+            end
+        end
+    end
+    invalidated_any && ccall(:jl_maybe_log_binding_invalidation, Cvoid, (Any,), invalidated_bpart)
+    for (edge, invalidated_bpart, new_bpart) in queued_bindings
+        invalidated_any |= invalidate_code_for_globalref!(edge, invalidated_bpart, new_bpart, new_max_world)
+    end
+    return invalidated_any
+end
+invalidate_code_for_globalref!(gr::GlobalRef, invalidated_bpart::Core.BindingPartition, new_bpart::Core.BindingPartition, new_max_world::UInt) =
+    invalidate_code_for_globalref!(convert(Core.Binding, gr), invalidated_bpart, new_bpart, new_max_world)
+
+function binding_was_invalidated(b::Core.Binding)
+    # At least one partition is required for invalidation
+    !isdefined(b, :partitions) && return false
+    b.partitions.min_world > unsafe_load(cglobal(:jl_require_world, UInt))
+end
+
+function scan_new_method!(method::Method, image_backedges_only::Bool)
+    isdefined(method, :source) || return
+    if image_backedges_only && !has_image_globalref(method)
+        return
+    end
+    src = _uncompressed_ir(method)
+    mod = method.module
+    foreachgr(src) do gr::GlobalRef
+        b = convert(Core.Binding, gr)
+        if binding_was_invalidated(b)
+            # TODO: We could turn this into an addition if condition. For now, use it as a reasonably cheap
+            # additional consistency check
+            @assert !image_backedges_only
+            @atomic method.did_scan_source |= 0x4
+        end
+        maybe_add_binding_backedge!(b, method)
+    end
+    @atomic method.did_scan_source |= 0x1
+end
+
+function scan_new_methods!(internal_methods::Vector{Any}, image_backedges_only::Bool)
+    if image_backedges_only && generating_output(true)
+        # Replacing image bindings is forbidden during incremental precompilation - skip backedge insertion
+        return
+    end
+    for method in internal_methods
+        if isa(method, Method)
+           scan_new_method!(method, image_backedges_only)
+        end
+    end
+end
diff --git a/Compiler/src/bootstrap.jl b/Compiler/src/bootstrap.jl
new file mode 100644
index 0000000000000..aef2a1c5f78c5
--- /dev/null
+++ b/Compiler/src/bootstrap.jl
@@ -0,0 +1,90 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# make sure that typeinf is executed before turning on typeinf_ext
+# this ensures that typeinf_ext doesn't recurse before it can add the item to the workq
+# especially try to make sure any recursive and leaf functions have concrete signatures,
+# since we won't be able to specialize & infer them at runtime
+
+function activate_codegen!()
+    ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel)
+    # Register the new unified compile and emit function
+    ccall(:jl_set_compile_and_emit_func, Cvoid, (Any,), compile_and_emit_native)
+    Core.eval(Compiler, quote
+        let typeinf_world_age = Base.tls_world_age()
+            @eval Core.OptimizedGenerics.CompilerPlugins.typeinf(::Nothing, mi::MethodInstance, source_mode::UInt8) =
+                Base.invoke_in_world($(Expr(:$, :typeinf_world_age)), typeinf_ext_toplevel, mi, Base.tls_world_age(), source_mode, Compiler.TRIM_NO)
+        end
+    end)
+end
+
+global bootstrapping_compiler::Bool = false
+function bootstrap!()
+    global bootstrapping_compiler = true
+    let time() = ccall(:jl_clock_now, Float64, ())
+        println("Compiling the compiler. This may take several minutes ...")
+
+        ssa_inlining_pass!_tt = Tuple{typeof(ssa_inlining_pass!), IRCode, InliningState{NativeInterpreter}, Bool}
+        optimize_tt = Tuple{typeof(optimize), NativeInterpreter, OptimizationState{NativeInterpreter}, InferenceResult}
+        typeinf_ext_tt = Tuple{typeof(typeinf_ext), NativeInterpreter, MethodInstance, UInt8}
+        typeinf_tt = Tuple{typeof(typeinf), NativeInterpreter, InferenceState}
+        typeinf_edge_tt = Tuple{typeof(typeinf_edge), NativeInterpreter, Method, Any, SimpleVector, InferenceState, Bool, Bool}
+        fs = Any[
+            # we first create caches for the optimizer, because they contain many loop constructions
+            # and they're better to not run in interpreter even during bootstrapping
+            compact!, ssa_inlining_pass!_tt, optimize_tt,
+            # then we create caches for inference entries
+            typeinf_ext_tt, typeinf_tt, typeinf_edge_tt,
+        ]
+        # tfuncs can't be inferred from the inference entries above, so here we infer them manually
+        for x in T_FFUNC_VAL
+            push!(fs, x[3])
+        end
+        for i = 1:length(T_IFUNC)
+            if isassigned(T_IFUNC, i)
+                x = T_IFUNC[i]
+                push!(fs, x[3])
+            else
+                println(stderr, "WARNING: tfunc missing for ", reinterpret(IntrinsicFunction, Int32(i)))
+            end
+        end
+        starttime = time()
+        world = get_world_counter()
+        for f in fs
+            if isa(f, DataType) && f.name === typename(Tuple)
+                tt = f
+            else
+                tt = Tuple{typeof(f), Vararg{Any}}
+            end
+            matches = _methods_by_ftype(tt, 10, world)::Vector
+            if isempty(matches)
+                println(stderr, "WARNING: no matching method found for `", tt, "`")
+            else
+                for m in matches
+                    # remove any TypeVars from the intersection
+                    m = m::MethodMatch
+                    params = Any[m.spec_types.parameters...]
+                    for i = 1:length(params)
+                        params[i] = unwraptv(params[i])
+                    end
+                    mi = specialize_method(m.method, Tuple{params...}, m.sparams)
+                    #isa_compileable_sig(mi) || println(stderr, "WARNING: inferring `", mi, "` which isn't expected to be called.")
+                    typeinf_ext_toplevel(mi, world, isa_compileable_sig(mi) ? SOURCE_MODE_ABI : SOURCE_MODE_NOT_REQUIRED, TRIM_NO)
+                end
+            end
+        end
+        endtime = time()
+        println("Base.Compiler ──── ", sub_float(endtime,starttime), " seconds")
+    end
+    activate_codegen!()
+    global bootstrapping_compiler = false
+    nothing
+end
+
+function activate!(; reflection=true, codegen=false)
+    if reflection
+        Base.REFLECTION_COMPILER[] = Compiler
+    end
+    if codegen
+        bootstrap!()
+    end
+end
diff --git a/base/compiler/cicache.jl b/Compiler/src/cicache.jl
similarity index 52%
rename from base/compiler/cicache.jl
rename to Compiler/src/cicache.jl
index 8332777e6d5bc..a632bae5dc55c 100644
--- a/base/compiler/cicache.jl
+++ b/Compiler/src/cicache.jl
@@ -1,21 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-"""
-    struct InternalCodeCache
-
-Internally, each `MethodInstance` keep a unique global cache of code instances
-that have been created for the given method instance, stratified by world age
-ranges. This struct abstracts over access to this cache.
-"""
-struct InternalCodeCache end
-
-function setindex!(cache::InternalCodeCache, ci::CodeInstance, mi::MethodInstance)
-    ccall(:jl_mi_cache_insert, Cvoid, (Any, Any), mi, ci)
-    return cache
-end
-
-const GLOBAL_CI_CACHE = InternalCodeCache()
-
 struct WorldRange
     min_world::UInt
     max_world::UInt
@@ -26,6 +10,8 @@ WorldRange(r::UnitRange) = WorldRange(first(r), last(r))
 first(wr::WorldRange) = wr.min_world
 last(wr::WorldRange) = wr.max_world
 in(world::UInt, wr::WorldRange) = wr.min_world <= world <= wr.max_world
+min_world(wr::WorldRange) = first(wr)
+max_world(wr::WorldRange) = last(wr)
 
 function intersect(a::WorldRange, b::WorldRange)
     ret = WorldRange(max(a.min_world, b.min_world), min(a.max_world, b.max_world))
@@ -33,40 +19,52 @@ function intersect(a::WorldRange, b::WorldRange)
     return ret
 end
 
+function union(a::WorldRange, b::WorldRange)
+    if b.min_world < a.min_world
+        (b, a) = (a, b)
+    end
+    @assert a.max_world >= b.min_world - 1
+    return WorldRange(a.min_world, b.max_world)
+end
+
 """
-    struct WorldView
+    struct InternalCodeCache
 
-Takes a given cache and provides access to the cache contents for the given
-range of world ages, rather than defaulting to the current active world age.
+Internally, each `MethodInstance` keep a unique global cache of code instances
+that have been created for the given method instance, stratified by world age
+ranges. This struct abstracts over access to this cache.
 """
-struct WorldView{Cache}
-    cache::Cache
+struct InternalCodeCache
+    owner::Any # `jl_egal` is used for comparison
     worlds::WorldRange
-    WorldView(cache::Cache, range::WorldRange) where Cache = new{Cache}(cache, range)
+    InternalCodeCache(@nospecialize(owner), wr::WorldRange) = new(owner, wr)
+    InternalCodeCache(@nospecialize(owner), args...) = new(owner, WorldRange(args...))
+end
+
+function setindex!(cache::InternalCodeCache, ci::CodeInstance, mi::MethodInstance)
+    @assert ci.owner === cache.owner
+    m = mi.def
+    if isa(m, Method)
+        ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
+    end
+    ccall(:jl_mi_cache_insert, Cvoid, (Any, Any), mi, ci)
+    return cache
 end
-WorldView(cache, args...) = WorldView(cache, WorldRange(args...))
-WorldView(wvc::WorldView, wr::WorldRange) = WorldView(wvc.cache, wr)
-WorldView(wvc::WorldView, args...) = WorldView(wvc.cache, args...)
 
-function haskey(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)
-    return ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds)) !== nothing
+function haskey(wvc::InternalCodeCache, mi::MethodInstance)
+    return ccall(:jl_rettype_inferred, Any, (Any, Any, UInt, UInt), wvc.owner, mi, first(wvc.worlds), last(wvc.worlds)) !== nothing
 end
 
-function get(wvc::WorldView{InternalCodeCache}, mi::MethodInstance, default)
-    r = ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds))
+function get(wvc::InternalCodeCache, mi::MethodInstance, default)
+    r = ccall(:jl_rettype_inferred, Any, (Any, Any, UInt, UInt), wvc.owner, mi, first(wvc.worlds), last(wvc.worlds))
     if r === nothing
         return default
     end
     return r::CodeInstance
 end
 
-function getindex(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)
+function getindex(wvc::InternalCodeCache, mi::MethodInstance)
     r = get(wvc, mi, nothing)
     r === nothing && throw(KeyError(mi))
     return r::CodeInstance
 end
-
-function setindex!(wvc::WorldView{InternalCodeCache}, ci::CodeInstance, mi::MethodInstance)
-    setindex!(wvc.cache, ci, mi)
-    return wvc
-end
diff --git a/Compiler/src/effects.jl b/Compiler/src/effects.jl
new file mode 100644
index 0000000000000..9aea4cb204ec6
--- /dev/null
+++ b/Compiler/src/effects.jl
@@ -0,0 +1,365 @@
+const effects_key_string = """
+## Key for `show` output of Effects:
+
+The output represents the state of different effect properties in the following order:
+
+1. `consistent` (`c`):
+    - `+c` (green): `ALWAYS_TRUE`
+    - `-c` (red): `ALWAYS_FALSE`
+    - `?c` (yellow): `CONSISTENT_IF_NOTRETURNED` and/or `CONSISTENT_IF_INACCESSIBLEMEMONLY`
+2. `effect_free` (`e`):
+    - `+e` (green): `ALWAYS_TRUE`
+    - `-e` (red): `ALWAYS_FALSE`
+    - `?e` (yellow): `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`
+3. `nothrow` (`n`):
+    - `+n` (green): `true`
+    - `-n` (red): `false`
+4. `terminates` (`t`):
+    - `+t` (green): `true`
+    - `-t` (red): `false`
+5. `notaskstate` (`s`):
+    - `+s` (green): `true`
+    - `-s` (red): `false`
+6. `inaccessiblememonly` (`m`):
+    - `+m` (green): `ALWAYS_TRUE`
+    - `-m` (red): `ALWAYS_FALSE`
+    - `?m` (yellow): `INACCESSIBLEMEM_OR_ARGMEMONLY`
+7. `noub` (`u`):
+    - `+u` (green): `true`
+    - `-u` (red): `false`
+    - `?u` (yellow): `NOUB_IF_NOINBOUNDS`
+8. `:nonoverlayed` (`o`):
+    - `+o` (green): `ALWAYS_TRUE`
+    - `-o` (red): `ALWAYS_FALSE`
+    - `?o` (yellow): `CONSISTENT_OVERLAY`
+9. `:nortcall` (`r`):
+    - `+r` (green): `true`
+    - `-r` (red): `false`
+"""
+
+"""
+    effects::Effects
+
+Represents computational effects of a method call.
+
+The effects are a composition of different effect bits that represent some program property
+of the method being analyzed. They are represented as `Bool` or `UInt8` bits with the
+following meanings:
+- `consistent::UInt8`:
+  * `ALWAYS_TRUE`: this method is guaranteed to return or terminate consistently.
+  * `ALWAYS_FALSE`: this method may be not return or terminate consistently, and there is
+    no need for further analysis with respect to this effect property as this conclusion
+    will not be refined anyway.
+  * `CONSISTENT_IF_NOTRETURNED`: the `:consistent`-cy of this method can later be refined to
+    `ALWAYS_TRUE` in a case when the return value of this method never involves newly
+    allocated mutable objects.
+  * `CONSISTENT_IF_INACCESSIBLEMEMONLY`: the `:consistent`-cy of this method can later be
+    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
+- `effect_free::UInt8`:
+  * `ALWAYS_TRUE`: this method is free from externally semantically visible side effects.
+  * `ALWAYS_FALSE`: this method may not be free from externally semantically visible side effects, and there is
+    no need for further analysis with respect to this effect property as this conclusion
+    will not be refined anyway.
+  * `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`: the `:effect-free`-ness of this method can later be
+    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
+- `nothrow::Bool`: this method is guaranteed to not throw an exception.
+  If the execution of this method may raise `MethodError`s and similar exceptions, then
+  the method is not considered as `:nothrow`.
+  However, note that environment-dependent errors like `StackOverflowError` or `InterruptException`
+  are not modeled by this effect and thus a method that may result in `StackOverflowError`
+  does not necessarily need to taint `:nothrow` (although it should usually taint `:terminates` too).
+- `terminates::Bool`: this method is guaranteed to terminate.
+- `notaskstate::Bool`: this method does not access any state bound to the current
+  task and may thus be moved to a different task without changing observable
+  behavior. Note that this currently implies that `noyield` as well, since
+  yielding modifies the state of the current task, though this may be split
+  in the future.
+- `inaccessiblememonly::UInt8`:
+  * `ALWAYS_TRUE`: this method does not access or modify externally accessible mutable memory.
+    This state corresponds to LLVM's `inaccessiblememonly` function attribute.
+  * `ALWAYS_FALSE`: this method may access or modify externally accessible mutable memory.
+  * `INACCESSIBLEMEM_OR_ARGMEMONLY`: this method does not access or modify externally accessible mutable memory,
+    except that it may access or modify mutable memory pointed to by its call arguments.
+    This may later be refined to `ALWAYS_TRUE` in a case when call arguments are known to be immutable.
+    This state corresponds to LLVM's `inaccessiblemem_or_argmemonly` function attribute.
+- `noub::UInt8`:
+  * `ALWAYS_TRUE`: this method is guaranteed to not execute any undefined behavior (for any input).
+  * `ALWAYS_FALSE`: this method may execute undefined behavior.
+  * `NOUB_IF_NOINBOUNDS`: this method is guaranteed to not execute any undefined behavior
+    under the assumption that its `@boundscheck` code is not elided (which happens when the
+    caller does not set nor propagate the `@inbounds` context)
+  Note that undefined behavior may technically cause the method to violate any other effect
+  assertions (such as `:consistent` or `:effect_free`) as well, but we do not model this,
+  and they assume the absence of undefined behavior.
+- `nonoverlayed::UInt8`:
+  * `ALWAYS_TRUE`: this method is guaranteed to not invoke any methods that defined in an
+    [overlayed method table](@ref OverlayMethodTable).
+  * `CONSISTENT_OVERLAY`: this method may invoke overlayed methods, but all such overlayed
+    methods are `:consistent` with their non-overlayed original counterparts
+    (see [`Base.@assume_effects`](@ref) for the exact definition of `:consistenct`-cy).
+  * `ALWAYS_FALSE`: this method may invoke overlayed methods.
+- `nortcall::Bool`: this method does not call `Core.Compiler.return_type`,
+  and it is guaranteed that any other methods this method might call also do not call
+  `Core.Compiler.return_type`.
+
+Note that the representations above are just internal implementation details and thus likely
+to change in the future. See [`Base.@assume_effects`](@ref) for more detailed explanation
+on the definitions of these properties.
+
+Along the abstract interpretation, `Effects` at each statement are analyzed locally and they
+are merged into the single global `Effects` that represents the entire effects of the
+analyzed method (see the implementation of `merge_effects!`). Each effect property is
+initialized with `ALWAYS_TRUE`/`true` and then transitioned towards `ALWAYS_FALSE`/`false`.
+Note that within the current flow-insensitive analysis design, effects detected by local
+analysis on each statement usually taint the global conclusion conservatively.
+
+
+$(effects_key_string)
+"""
+struct Effects
+    consistent::UInt8
+    effect_free::UInt8
+    nothrow::Bool
+    terminates::Bool
+    notaskstate::Bool
+    inaccessiblememonly::UInt8
+    noub::UInt8
+    nonoverlayed::UInt8
+    nortcall::Bool
+    function Effects(
+        consistent::UInt8,
+        effect_free::UInt8,
+        nothrow::Bool,
+        terminates::Bool,
+        notaskstate::Bool,
+        inaccessiblememonly::UInt8,
+        noub::UInt8,
+        nonoverlayed::UInt8,
+        nortcall::Bool)
+        return new(
+            consistent,
+            effect_free,
+            nothrow,
+            terminates,
+            notaskstate,
+            inaccessiblememonly,
+            noub,
+            nonoverlayed,
+            nortcall)
+    end
+end
+
+const ALWAYS_TRUE  = 0x00
+const ALWAYS_FALSE = 0x01
+
+# :consistent-cy bits
+const CONSISTENT_IF_NOTRETURNED         = 0x01 << 1
+const CONSISTENT_IF_INACCESSIBLEMEMONLY = 0x01 << 2
+
+# :effect_free-ness bits
+const EFFECT_FREE_IF_INACCESSIBLEMEMONLY = 0x02
+
+"""
+`EFFECT_FREE_GLOBALLY` means that the statement is `:effect-free` and does not have a
+caller-visible effect, but may not be removed from the function itself. This may e.g.
+be used for effects that last only for the scope of the current function.
+"""
+const EFFECT_FREE_GLOBALLY = 0x03
+
+# :inaccessiblememonly bits
+const INACCESSIBLEMEM_OR_ARGMEMONLY = 0x01 << 1
+
+# :noub bits
+const NOUB_IF_NOINBOUNDS = 0x01 << 1
+
+# :nonoverlayed bits
+const CONSISTENT_OVERLAY = 0x01 << 1
+
+const EFFECTS_TOTAL   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE, true)
+const EFFECTS_THROWS  = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE, true)
+const EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_TRUE, false) # unknown mostly, but it's not overlayed at least (e.g. it's not a call)
+
+function Effects(effects::Effects=Effects(
+    ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_FALSE, false);
+    consistent::UInt8 = effects.consistent,
+    effect_free::UInt8 = effects.effect_free,
+    nothrow::Bool = effects.nothrow,
+    terminates::Bool = effects.terminates,
+    notaskstate::Bool = effects.notaskstate,
+    inaccessiblememonly::UInt8 = effects.inaccessiblememonly,
+    noub::UInt8 = effects.noub,
+    nonoverlayed::UInt8 = effects.nonoverlayed,
+    nortcall::Bool = effects.nortcall)
+    return Effects(
+        consistent,
+        effect_free,
+        nothrow,
+        terminates,
+        notaskstate,
+        inaccessiblememonly,
+        noub,
+        nonoverlayed,
+        nortcall)
+end
+
+function is_better_effects(new::Effects, old::Effects)
+    any_improved = false
+    if new.consistent == ALWAYS_TRUE
+        any_improved |= old.consistent != ALWAYS_TRUE
+    else
+        if !iszero(new.consistent & CONSISTENT_IF_NOTRETURNED)
+            old.consistent == ALWAYS_TRUE && return false
+            any_improved |= iszero(old.consistent & CONSISTENT_IF_NOTRETURNED)
+        elseif !iszero(new.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
+            old.consistent == ALWAYS_TRUE && return false
+            any_improved |= iszero(old.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
+        else
+            return false
+        end
+    end
+    if new.effect_free == ALWAYS_TRUE
+        any_improved |= old.consistent != ALWAYS_TRUE
+    elseif new.effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+        old.effect_free == ALWAYS_TRUE && return false
+        any_improved |= old.effect_free != EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+    elseif new.effect_free != old.effect_free
+        return false
+    end
+    if new.nothrow
+        any_improved |= !old.nothrow
+    elseif new.nothrow != old.nothrow
+        return false
+    end
+    if new.terminates
+        any_improved |= !old.terminates
+    elseif new.terminates != old.terminates
+        return false
+    end
+    if new.notaskstate
+        any_improved |= !old.notaskstate
+    elseif new.notaskstate != old.notaskstate
+        return false
+    end
+    if new.inaccessiblememonly == ALWAYS_TRUE
+        any_improved |= old.inaccessiblememonly != ALWAYS_TRUE
+    elseif new.inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
+        old.inaccessiblememonly == ALWAYS_TRUE && return false
+        any_improved |= old.inaccessiblememonly != INACCESSIBLEMEM_OR_ARGMEMONLY
+    elseif new.inaccessiblememonly != old.inaccessiblememonly
+        return false
+    end
+    if new.noub == ALWAYS_TRUE
+        any_improved |= old.noub != ALWAYS_TRUE
+    elseif new.noub == NOUB_IF_NOINBOUNDS
+        old.noub == ALWAYS_TRUE && return false
+        any_improved |= old.noub != NOUB_IF_NOINBOUNDS
+    elseif new.noub != old.noub
+        return false
+    end
+    if new.nonoverlayed == ALWAYS_TRUE
+        any_improved |= old.nonoverlayed != ALWAYS_TRUE
+    elseif new.nonoverlayed == CONSISTENT_OVERLAY
+        old.nonoverlayed == ALWAYS_TRUE && return false
+        any_improved |= old.nonoverlayed != CONSISTENT_OVERLAY
+    elseif new.nonoverlayed != old.nonoverlayed
+        return false
+    end
+    if new.nortcall
+        any_improved |= !old.nortcall
+    elseif new.nortcall != old.nortcall
+        return false
+    end
+    return any_improved
+end
+
+function merge_effects(old::Effects, new::Effects)
+    return Effects(
+        merge_effectbits(old.consistent, new.consistent),
+        merge_effectbits(old.effect_free, new.effect_free),
+        merge_effectbits(old.nothrow, new.nothrow),
+        merge_effectbits(old.terminates, new.terminates),
+        merge_effectbits(old.notaskstate, new.notaskstate),
+        merge_effectbits(old.inaccessiblememonly, new.inaccessiblememonly),
+        merge_effectbits(old.noub, new.noub),
+        merge_effectbits(old.nonoverlayed, new.nonoverlayed),
+        merge_effectbits(old.nortcall, new.nortcall))
+end
+
+function merge_effectbits(old::UInt8, new::UInt8)
+    if old === ALWAYS_FALSE || new === ALWAYS_FALSE
+        return ALWAYS_FALSE
+    end
+    return old | new
+end
+merge_effectbits(old::Bool, new::Bool) = old & new
+
+is_consistent(effects::Effects)          = effects.consistent === ALWAYS_TRUE
+is_effect_free(effects::Effects)         = effects.effect_free === ALWAYS_TRUE
+is_nothrow(effects::Effects)             = effects.nothrow
+is_terminates(effects::Effects)          = effects.terminates
+is_notaskstate(effects::Effects)         = effects.notaskstate
+is_inaccessiblememonly(effects::Effects) = effects.inaccessiblememonly === ALWAYS_TRUE
+is_noub(effects::Effects)                = effects.noub === ALWAYS_TRUE
+is_noub_if_noinbounds(effects::Effects)  = effects.noub === NOUB_IF_NOINBOUNDS
+is_nonoverlayed(effects::Effects)        = effects.nonoverlayed === ALWAYS_TRUE
+is_nortcall(effects::Effects)            = effects.nortcall
+
+# implies `is_notaskstate` & `is_inaccessiblememonly`, but not explicitly checked here
+is_foldable(effects::Effects, check_rtcall::Bool=false) =
+    is_consistent(effects) &&
+    (is_noub(effects) || is_noub_if_noinbounds(effects)) &&
+    is_effect_free(effects) &&
+    is_terminates(effects) &&
+    (!check_rtcall || is_nortcall(effects))
+
+is_foldable_nothrow(effects::Effects, check_rtcall::Bool=false) =
+    is_foldable(effects, check_rtcall) &&
+    is_nothrow(effects)
+
+# TODO add `is_noub` here?
+is_removable_if_unused(effects::Effects) =
+    is_effect_free(effects) &&
+    is_terminates(effects) &&
+    is_nothrow(effects)
+
+is_finalizer_inlineable(effects::Effects) =
+    is_nothrow(effects) &&
+    is_notaskstate(effects)
+
+is_consistent_if_notreturned(effects::Effects)         = !iszero(effects.consistent & CONSISTENT_IF_NOTRETURNED)
+is_consistent_if_inaccessiblememonly(effects::Effects) = !iszero(effects.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
+
+is_effect_free_if_inaccessiblememonly(effects::Effects) = !iszero(effects.effect_free & EFFECT_FREE_IF_INACCESSIBLEMEMONLY)
+
+is_inaccessiblemem_or_argmemonly(effects::Effects) = effects.inaccessiblememonly === INACCESSIBLEMEM_OR_ARGMEMONLY
+
+is_consistent_overlay(effects::Effects) = effects.nonoverlayed === CONSISTENT_OVERLAY
+
+# (sync this with codegen.cpp and staticdata.c effects_foldable functions)
+function encode_effects(e::Effects)
+    return ((e.consistent          % UInt32) << 0)  |
+           ((e.effect_free         % UInt32) << 3)  |
+           ((e.nothrow             % UInt32) << 5)  |
+           ((e.terminates          % UInt32) << 6)  |
+           ((e.notaskstate         % UInt32) << 7)  |
+           ((e.inaccessiblememonly % UInt32) << 8)  |
+           ((e.noub                % UInt32) << 10) |
+           ((e.nonoverlayed        % UInt32) << 12) |
+           ((e.nortcall            % UInt32) << 14)
+end
+
+function decode_effects(e::UInt32)
+    return Effects(
+        UInt8((e >> 0) & 0x07),
+        UInt8((e >> 3) & 0x03),
+        Bool((e >> 5) & 0x01),
+        Bool((e >> 6) & 0x01),
+        Bool((e >> 7) & 0x01),
+        UInt8((e >> 8) & 0x03),
+        UInt8((e >> 10) & 0x03),
+        UInt8((e >> 12) & 0x03),
+        Bool((e >> 14) & 0x01))
+end
+
+decode_statement_effects_override(ssaflag::UInt32) =
+    decode_effects_override(UInt16((ssaflag >> NUM_IR_FLAGS) & (1 << NUM_EFFECTS_OVERRIDES - 1)))
diff --git a/Compiler/src/inferenceresult.jl b/Compiler/src/inferenceresult.jl
new file mode 100644
index 0000000000000..10c4976c9fd3d
--- /dev/null
+++ b/Compiler/src/inferenceresult.jl
@@ -0,0 +1,204 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function matching_cache_argtypes(::AbstractLattice, mi::MethodInstance)
+    (; def, specTypes) = mi
+    return most_general_argtypes(isa(def, Method) ? def : nothing, specTypes)
+end
+
+struct SimpleArgtypes
+    argtypes::Vector{Any}
+end
+
+# Like `SimpleArgtypes`, but allows the argtypes to be wider than the current call.
+# As a result, it is not legal to refine the cache result with information more
+# precise than was it deducible from the `WidenedSimpleArgtypes`.
+struct WidenedArgtypes
+    argtypes::Vector{Any}
+end
+
+function matching_cache_argtypes(𝕃::AbstractLattice, ::MethodInstance,
+                                 simple_argtypes::Union{SimpleArgtypes, WidenedArgtypes},
+                                 cache_argtypes::Vector{Any})
+    (; argtypes) = simple_argtypes
+    given_argtypes = Vector{Any}(undef, length(argtypes))
+    for i = 1:length(argtypes)
+        given_argtypes[i] = widenslotwrapper(argtypes[i])
+    end
+    return pick_const_args!(𝕃, given_argtypes, cache_argtypes)
+end
+
+function pick_const_arg(𝕃::AbstractLattice, @nospecialize(given_argtype), @nospecialize(cache_argtype))
+    if !is_argtype_match(𝕃, given_argtype, cache_argtype, false)
+        # prefer the argtype we were given over the one computed from `mi`
+        if (isa(given_argtype, PartialStruct) && isa(cache_argtype, Type) &&
+            !⊏(𝕃, given_argtype, cache_argtype))
+            # if the type information of this `PartialStruct` is less strict than
+            # declared method signature, narrow it down using `tmeet`
+            given_argtype = tmeet(𝕃, given_argtype, cache_argtype)
+        end
+        return given_argtype
+    else
+        return cache_argtype
+    end
+end
+
+function pick_const_args!(𝕃::AbstractLattice, given_argtypes::Vector{Any}, cache_argtypes::Vector{Any})
+    ngiven = length(given_argtypes)
+    ncache = length(cache_argtypes)
+    if ngiven == 0 || ncache == 0
+        return Any[]
+    end
+    given_va = given_argtypes[end]
+    cache_va = cache_argtypes[end]
+    if isvarargtype(given_va)
+        va = unwrapva(given_va)
+        if isvarargtype(cache_va)
+            # Process the common prefix, then join
+            nprocessargs = max(ngiven-1, ncache-1)
+            resize!(given_argtypes, nprocessargs+1)
+            given_argtypes[end] = Vararg{pick_const_arg(𝕃, va, unwrapva(cache_va))}
+        else
+            nprocessargs = ncache
+            resize!(given_argtypes, nprocessargs)
+        end
+        for i = ngiven:nprocessargs
+            given_argtypes[i] = va
+        end
+    elseif isvarargtype(cache_va)
+        nprocessargs = ngiven
+    else
+        @assert ngiven == ncache
+        nprocessargs = ngiven
+    end
+    for i = 1:nprocessargs
+        given_argtype = given_argtypes[i]
+        cache_argtype = argtype_by_index(cache_argtypes, i)
+        given_argtypes[i] = pick_const_arg(𝕃, given_argtype, cache_argtype)
+    end
+    return given_argtypes
+end
+
+function is_argtype_match(𝕃::AbstractLattice,
+                          @nospecialize(given_argtype),
+                          @nospecialize(cache_argtype),
+                          overridden_by_const::Bool)
+    if is_forwardable_argtype(𝕃, given_argtype)
+        return is_lattice_equal(𝕃, given_argtype, cache_argtype)
+    else
+        return !overridden_by_const
+    end
+end
+
+function va_process_argtypes(𝕃::AbstractLattice, given_argtypes::Vector{Any}, nargs::UInt, isva::Bool, mi::MethodInstance)
+    nargs = Int(nargs)
+    if isva || (!isempty(given_argtypes) && isvarargtype(given_argtypes[end]))
+        isva_given_argtypes = Vector{Any}(undef, nargs)
+        for i = 1:(nargs-isva)
+            newarg = argtype_by_index(given_argtypes, i)
+            if isva && has_conditional(𝕃) && isa(newarg, Conditional)
+                if newarg.slot > (nargs-isva)
+                    newarg = widenconditional(newarg)
+                end
+            end
+            isva_given_argtypes[i] = newarg
+        end
+        if isva
+            if length(given_argtypes) < nargs && isvarargtype(given_argtypes[end])
+                last = length(given_argtypes)
+            else
+                last = nargs
+                if has_conditional(𝕃)
+                    for i = last:length(given_argtypes)
+                        newarg = given_argtypes[i]
+                        if isa(newarg, Conditional) && newarg.slot > (nargs-isva)
+                            given_argtypes[i] = widenconditional(newarg)
+                        end
+                    end
+                end
+            end
+            isva_given_argtypes[nargs] = tuple_tfunc(𝕃, given_argtypes[last:end])
+        end
+        return isva_given_argtypes
+    end
+    if length(given_argtypes) != nargs
+        println(given_argtypes, " != ", nargs, " for ", mi)
+        throw(AssertionError("invalid `given_argtypes` for `mi`"))
+    end
+    return given_argtypes
+end
+
+function most_general_argtypes(method::Union{Method,Nothing}, @nospecialize(specTypes))
+    mi_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
+    nargtypes = length(mi_argtypes)
+    nargs = isa(method, Method) ? Int(method.nargs) : 0
+    if length(mi_argtypes) < nargs && isvarargtype(mi_argtypes[end])
+        resize!(mi_argtypes, nargs)
+    end
+    # Now, we propagate type info from `mi_argtypes` into `cache_argtypes`, improving some
+    # type info as we go (where possible). Note that if we're dealing with a varargs method,
+    # we already handled the last element of `cache_argtypes` (and decremented `nargs` so that
+    # we don't overwrite the result of that work here).
+    tail_index = min(nargtypes, nargs)
+    local lastatype
+    for i = 1:nargtypes
+        atyp = mi_argtypes[i]
+        wasva = false
+        if i == nargtypes && isvarargtype(atyp)
+            wasva = true
+            atyp = unwrapva(atyp)
+        end
+        atyp = unwraptv(atyp)
+        if issingletontype(atyp)
+            # replace singleton types with their equivalent Const object
+            atyp = Const(atyp.instance)
+        elseif isconstType(atyp)
+            atyp = Const(atyp.parameters[1])
+        else
+            atyp = elim_free_typevars(rewrap_unionall(atyp, specTypes))
+        end
+        mi_argtypes[i] = atyp
+        if wasva
+            lastatype = atyp
+            mi_argtypes[end] = Vararg{widenconst(atyp)}
+        end
+    end
+    for i = (tail_index+1):(nargs-1)
+        mi_argtypes[i] = lastatype
+    end
+    return mi_argtypes
+end
+
+# eliminate free `TypeVar`s in order to make the life much easier down the road:
+# at runtime only `Type{...}::DataType` can contain invalid type parameters, and other
+# malformed types here are user-constructed type arguments given at an inference entry
+# so this function will replace only the malformed `Type{...}::DataType` with `Type`
+# and simply replace other possibilities with `Any`
+function elim_free_typevars(@nospecialize t)
+    if has_free_typevars(t)
+        return isType(t) ? Type : Any
+    else
+        return t
+    end
+end
+
+function constprop_cache_lookup(𝕃::AbstractLattice, mi::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{InferenceResult})
+    method = mi.def::Method
+    nargtypes = length(given_argtypes)
+    for cached_result in cache
+        cached_result.tombstone && continue # ignore deleted entries (due to LimitedAccuracy)
+        cached_result.linfo === mi || continue
+        cache_argtypes = cached_result.argtypes
+        @assert length(cache_argtypes) == nargtypes "invalid `cache_argtypes` for `mi`"
+        cache_overridden_by_const = cached_result.overridden_by_const
+        cache_overridden_by_const === nothing && continue
+        cache_overridden_by_const = cache_overridden_by_const::BitVector
+        for i in 1:nargtypes
+            if !is_argtype_match(𝕃, given_argtypes[i], cache_argtypes[i], cache_overridden_by_const[i])
+                @goto next_cache
+            end
+        end
+        return cached_result
+        @label next_cache
+    end
+    return nothing
+end
diff --git a/Compiler/src/inferencestate.jl b/Compiler/src/inferencestate.jl
new file mode 100644
index 0000000000000..6f779a06b7633
--- /dev/null
+++ b/Compiler/src/inferencestate.jl
@@ -0,0 +1,1228 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# data structures
+# ===============
+
+mutable struct BitSetBoundedMinPrioritySet <: AbstractSet{Int}
+    elems::BitSet
+    min::Int
+    # Stores whether min is exact or a lower bound
+    # If exact, it is not set in elems
+    min_exact::Bool
+    max::Int
+end
+
+function BitSetBoundedMinPrioritySet(max::Int)
+    bs = BitSet()
+    bs.offset = 0
+    BitSetBoundedMinPrioritySet(bs, max+1, true, max)
+end
+
+@noinline function _advance_bsbmp!(bsbmp::BitSetBoundedMinPrioritySet)
+    @assert !bsbmp.min_exact
+    bsbmp.min = _bits_findnext(bsbmp.elems.bits, bsbmp.min)::Int
+    bsbmp.min < 0 && (bsbmp.min = bsbmp.max + 1)
+    bsbmp.min_exact = true
+    delete!(bsbmp.elems, bsbmp.min)
+    return nothing
+end
+
+function isempty(bsbmp::BitSetBoundedMinPrioritySet)
+    if bsbmp.min > bsbmp.max
+        return true
+    end
+    bsbmp.min_exact && return false
+    _advance_bsbmp!(bsbmp)
+    return bsbmp.min > bsbmp.max
+end
+
+function popfirst!(bsbmp::BitSetBoundedMinPrioritySet)
+    bsbmp.min_exact || _advance_bsbmp!(bsbmp)
+    m = bsbmp.min
+    m > bsbmp.max && throw(ArgumentError("BitSetBoundedMinPrioritySet must be non-empty"))
+    bsbmp.min = m+1
+    bsbmp.min_exact = false
+    return m
+end
+
+function push!(bsbmp::BitSetBoundedMinPrioritySet, idx::Int)
+    if idx <= bsbmp.min
+        if bsbmp.min_exact && bsbmp.min < bsbmp.max && idx != bsbmp.min
+            push!(bsbmp.elems, bsbmp.min)
+        end
+        bsbmp.min = idx
+        bsbmp.min_exact = true
+        return nothing
+    end
+    push!(bsbmp.elems, idx)
+    return nothing
+end
+
+function in(idx::Int, bsbmp::BitSetBoundedMinPrioritySet)
+    if bsbmp.min_exact && idx == bsbmp.min
+        return true
+    end
+    return idx in bsbmp.elems
+end
+
+iterate(bsbmp::BitSetBoundedMinPrioritySet, s...) = iterate(bsbmp.elems, s...)
+
+function append!(bsbmp::BitSetBoundedMinPrioritySet, itr)
+    for val in itr
+        push!(bsbmp, val)
+    end
+end
+
+mutable struct TwoPhaseVectorView <: AbstractVector{Int}
+    const data::Vector{Int}
+    count::Int
+    const range::UnitRange{Int}
+end
+size(tpvv::TwoPhaseVectorView) = (tpvv.count,)
+function getindex(tpvv::TwoPhaseVectorView, i::Int)
+    checkbounds(tpvv, i)
+    @inbounds tpvv.data[first(tpvv.range) + i - 1]
+end
+function push!(tpvv::TwoPhaseVectorView, v::Int)
+    tpvv.count += 1
+    tpvv.data[first(tpvv.range) + tpvv.count - 1] = v
+    return nothing
+end
+
+"""
+    mutable struct TwoPhaseDefUseMap
+
+This struct is intended as a memory- and GC-pressure-efficient mechanism
+for incrementally computing def-use maps. The idea is that the def-use map
+is constructed into two passes over the IR. In the first, we simply count the
+the number of uses, computing the number of uses for each def as well as the
+total number of uses. In the second pass, we actually fill in the def-use
+information.
+
+The idea is that either of these two phases can be combined with other useful
+work that needs to scan the instruction stream anyway, while avoiding the
+significant allocation pressure of e.g. allocating an array for every SSA value
+or attempting to dynamically move things around as new uses are discovered.
+
+The def-use map is presented as a vector of vectors. For every def, indexing
+into the map will return a vector of uses.
+"""
+mutable struct TwoPhaseDefUseMap <: AbstractVector{TwoPhaseVectorView}
+    ssa_uses::Vector{Int}
+    data::Vector{Int}
+    complete::Bool
+end
+
+function complete!(tpdum::TwoPhaseDefUseMap)
+    cumsum = 0
+    for i = 1:length(tpdum.ssa_uses)
+        this_val = cumsum + 1
+        cumsum += tpdum.ssa_uses[i]
+        tpdum.ssa_uses[i] = this_val
+    end
+    resize!(tpdum.data, cumsum)
+    fill!(tpdum.data, 0)
+    tpdum.complete = true
+end
+
+function TwoPhaseDefUseMap(nssas::Int)
+    ssa_uses = zeros(Int, nssas)
+    data = Int[]
+    complete = false
+    return TwoPhaseDefUseMap(ssa_uses, data, complete)
+end
+
+function count!(tpdum::TwoPhaseDefUseMap, arg::SSAValue)
+    @assert !tpdum.complete
+    tpdum.ssa_uses[arg.id] += 1
+end
+
+function kill_def_use!(tpdum::TwoPhaseDefUseMap, def::Int, use::Int)
+    if !tpdum.complete
+        tpdum.ssa_uses[def] -= 1
+    else
+        range = tpdum.ssa_uses[def]:(def == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[def + 1] - 1))
+        # TODO: Sorted
+        useidx = findfirst(idx->tpdum.data[idx] == use, range)
+        @assert useidx !== nothing
+        idx = range[useidx]
+        while idx < lastindex(range)
+            ndata = tpdum.data[idx+1]
+            ndata == 0 && break
+            tpdum.data[idx] = ndata
+            idx += 1
+        end
+        tpdum.data[idx] = 0
+    end
+end
+kill_def_use!(tpdum::TwoPhaseDefUseMap, def::SSAValue, use::Int) =
+    kill_def_use!(tpdum, def.id, use)
+
+function getindex(tpdum::TwoPhaseDefUseMap, idx::Int)
+    @assert tpdum.complete
+    range = tpdum.ssa_uses[idx]:(idx == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[idx + 1] - 1))
+    # TODO: Make logarithmic
+    nelems = 0
+    for i in range
+        tpdum.data[i] == 0 && break
+        nelems += 1
+    end
+    return TwoPhaseVectorView(tpdum.data, nelems, range)
+end
+
+mutable struct LazyCFGReachability
+    ir::IRCode
+    reachability::CFGReachability
+    LazyCFGReachability(ir::IRCode) = new(ir)
+end
+function get!(x::LazyCFGReachability)
+    isdefined(x, :reachability) && return x.reachability
+    domtree = construct_domtree(x.ir)
+    return x.reachability = CFGReachability(x.ir.cfg, domtree)
+end
+
+mutable struct LazyGenericDomtree{IsPostDom}
+    ir::IRCode
+    domtree::GenericDomTree{IsPostDom}
+    LazyGenericDomtree{IsPostDom}(ir::IRCode) where {IsPostDom} = new{IsPostDom}(ir)
+end
+function get!(x::LazyGenericDomtree{IsPostDom}) where {IsPostDom}
+    isdefined(x, :domtree) && return x.domtree
+    return @zone "CC: DOMTREE_2" x.domtree = IsPostDom ?
+        construct_postdomtree(x.ir) :
+        construct_domtree(x.ir)
+end
+
+const LazyDomtree = LazyGenericDomtree{false}
+const LazyPostDomtree = LazyGenericDomtree{true}
+
+# InferenceState
+# ==============
+
+"""
+    const VarTable = Vector{VarState}
+
+The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
+Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
+Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
+to enable flow-sensitive analysis.
+"""
+const VarTable = Vector{VarState}
+
+struct StatementState
+    vtypes::Union{VarTable,Nothing}
+    saw_latestworld::Bool
+end
+
+const CACHE_MODE_NULL     = 0x00      # not cached, optimization optional
+const CACHE_MODE_GLOBAL   = 0x01 << 0 # cached globally, optimization required
+const CACHE_MODE_LOCAL    = 0x01 << 1 # cached locally, optimization required
+
+abstract type Handler end
+get_enter_idx(handler::Handler) = get_enter_idx_impl(handler)::Int
+
+mutable struct TryCatchFrame <: Handler
+    exct
+    scopet
+    const enter_idx::Int
+    scope_uses::Vector{Int}
+    TryCatchFrame(@nospecialize(exct), @nospecialize(scopet), enter_idx::Int) =
+        new(exct, scopet, enter_idx)
+end
+TryCatchFrame(stmt::EnterNode, pc::Int) =
+    TryCatchFrame(Bottom, isdefined(stmt, :scope) ? Bottom : nothing, pc)
+get_enter_idx_impl((; enter_idx)::TryCatchFrame) = enter_idx
+
+struct SimpleHandler <: Handler
+    enter_idx::Int
+end
+SimpleHandler(::EnterNode, pc::Int) = SimpleHandler(pc)
+get_enter_idx_impl((; enter_idx)::SimpleHandler) = enter_idx
+
+struct HandlerInfo{T<:Handler}
+    handlers::Vector{T}
+    handler_at::Vector{Tuple{Int,Int}} # tuple of current (handler, exception stack) value at the pc
+end
+
+struct WorldWithRange
+    this::UInt
+    valid_worlds::WorldRange
+    function WorldWithRange(world::UInt, valid_worlds::WorldRange)
+        if !(world in valid_worlds)
+            error("invalid age range update")
+        end
+        return new(world, valid_worlds)
+    end
+end
+
+intersect(world::WorldWithRange, valid_worlds::WorldRange) =
+    WorldWithRange(world.this, intersect(world.valid_worlds, valid_worlds))
+
+mutable struct InferenceState
+    #= information about this method instance =#
+    linfo::MethodInstance
+    valid_worlds::WorldRange
+    mod::Module
+    sptypes::Vector{VarState}
+    slottypes::Vector{Any}
+    src::CodeInfo
+    cfg::CFG
+    spec_info::SpecInfo
+
+    #= intermediate states for local abstract interpretation =#
+    currbb::Int
+    currpc::Int
+    ip::BitSet#=TODO BoundedMinPrioritySet=# # current active instruction pointers
+    handler_info::Union{Nothing,HandlerInfo{TryCatchFrame}}
+    ssavalue_uses::Vector{BitSet} # ssavalue sparsity and restart info
+    # TODO: Could keep this sparsely by doing structural liveness analysis ahead of time.
+    bb_vartables::Vector{Union{Nothing,VarTable}} # nothing if not analyzed yet
+    bb_saw_latestworld::Vector{Bool}
+    ssavaluetypes::Vector{Any}
+    ssaflags::Vector{UInt32}
+    edges::Vector{Any}
+    stmt_info::Vector{CallInfo}
+
+    #= intermediate states for interprocedural abstract interpretation =#
+    tasks::Vector{WorkThunk}
+    pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
+    limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return
+    cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
+
+    # IPO tracking of in-process work, shared with all frames given AbstractInterpreter
+    callstack #::Vector{AbsIntState}
+    parentid::Int # index into callstack of the parent frame that originally added this frame (call cycle_parent to extract the current parent of the SCC)
+    frameid::Int # index into callstack at which this object is found (or zero, if this is not a cached frame and has no parent)
+    cycleid::Int # index into the callstack of the topmost frame in the cycle (all frames in the same cycle share the same cycleid)
+
+    #= results =#
+    result::InferenceResult # remember where to put the result
+    unreachable::BitSet # statements that were found to be statically unreachable
+    bestguess #::Type
+    exc_bestguess
+    ipo_effects::Effects
+    time_start::UInt64
+    time_caches::Float64
+    time_paused::UInt64
+    time_self_ns::UInt64
+
+    #= flags =#
+    # Whether to restrict inference of abstract call sites to avoid excessive work
+    # Set by default for toplevel frame.
+    restrict_abstract_call_sites::Bool
+    cache_mode::UInt8 # TODO move this to InferenceResult?
+    insert_coverage::Bool
+
+    # The interpreter that created this inference state. Not looked at by
+    # NativeInterpreter. But other interpreters may use this to detect cycles
+    interp::AbstractInterpreter
+
+    # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
+    function InferenceState(result::InferenceResult, src::CodeInfo, cache_mode::UInt8,
+                            interp::AbstractInterpreter)
+        mi = result.linfo
+        world = get_inference_world(interp)
+        if world == typemax(UInt)
+            error("Entering inference from a generated function with an invalid world")
+        end
+        def = mi.def
+        mod = isa(def, Method) ? def.module : def
+        sptypes = sptypes_from_meth_instance(mi)
+        code = src.code::Vector{Any}
+        cfg = compute_basic_blocks(code)
+        spec_info = SpecInfo(src)
+
+        currbb = currpc = 1
+        ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
+        handler_info = ComputeTryCatch{TryCatchFrame}()(code)
+        nssavalues = src.ssavaluetypes::Int
+        ssavalue_uses = find_ssavalue_uses(code, nssavalues)
+        nstmts = length(code)
+        edges = []
+        stmt_info = CallInfo[ NoCallInfo() for _ = 1:nstmts ]
+
+        nslots = length(src.slotflags)
+        slottypes = Vector{Any}(undef, nslots)
+        bb_saw_latestworld = Bool[false for _ = 1:length(cfg.blocks)]
+        bb_vartables = Union{Nothing,VarTable}[ nothing for _ = 1:length(cfg.blocks) ]
+        bb_vartable1 = bb_vartables[1] = VarTable(undef, nslots)
+        argtypes = result.argtypes
+
+        argtypes = va_process_argtypes(typeinf_lattice(interp), argtypes, src.nargs, src.isva, mi)
+
+        nargtypes = length(argtypes)
+        for i = 1:nslots
+            argtyp = (i > nargtypes) ? Bottom : argtypes[i]
+            if argtyp === Bool && has_conditional(typeinf_lattice(interp))
+                argtyp = Conditional(i, Const(true), Const(false))
+            end
+            slottypes[i] = argtyp
+            bb_vartable1[i] = VarState(argtyp, i > nargtypes)
+        end
+        src.ssavaluetypes = ssavaluetypes = Any[ NOT_FOUND for _ = 1:nssavalues ]
+        ssaflags = copy(src.ssaflags)
+
+        unreachable = BitSet()
+        pclimitations = IdSet{InferenceState}()
+        limitations = IdSet{InferenceState}()
+        cycle_backedges = Tuple{InferenceState,Int}[]
+        callstack = AbsIntState[]
+        tasks = WorkThunk[]
+
+        valid_worlds = WorldRange(1, get_world_counter())
+        bestguess = Bottom
+        exc_bestguess = Bottom
+        ipo_effects = EFFECTS_TOTAL
+
+        insert_coverage = should_insert_coverage(mod, src.debuginfo)
+        if insert_coverage
+            ipo_effects = Effects(ipo_effects; effect_free = ALWAYS_FALSE)
+        end
+
+        if def isa Method
+            nonoverlayed = is_nonoverlayed(def) ? ALWAYS_TRUE :
+                is_effect_overridden(def, :consistent_overlay) ? CONSISTENT_OVERLAY :
+                ALWAYS_FALSE
+            ipo_effects = Effects(ipo_effects; nonoverlayed)
+        end
+
+        restrict_abstract_call_sites = isa(def, Module)
+
+        parentid = frameid = cycleid = 0
+
+        this = new(
+            mi, valid_worlds, mod, sptypes, slottypes, src, cfg, spec_info,
+            currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, bb_saw_latestworld, ssavaluetypes, ssaflags, edges, stmt_info,
+            tasks, pclimitations, limitations, cycle_backedges, callstack, parentid, frameid, cycleid,
+            result, unreachable, bestguess, exc_bestguess, ipo_effects,
+            _time_ns(), 0.0, 0, 0,
+            restrict_abstract_call_sites, cache_mode, insert_coverage,
+            interp)
+
+        # some more setups
+        if !iszero(cache_mode & CACHE_MODE_GLOBAL)
+            push!(callstack, this)
+            this.cycleid = this.frameid = length(callstack)
+        end
+
+        # Apply generated function restrictions
+        if src.min_world != 1 || src.max_world != typemax(UInt)
+            # From generated functions
+            update_valid_age!(this, world, WorldRange(src.min_world, src.max_world))
+        end
+
+        return this
+    end
+end
+
+gethandler(frame::InferenceState, pc::Int=frame.currpc) = gethandler(frame.handler_info, pc)
+gethandler(::Nothing, ::Int) = nothing
+function gethandler(handler_info::HandlerInfo, pc::Int)
+    handler_idx = handler_info.handler_at[pc][1]
+    handler_idx == 0 && return nothing
+    return handler_info.handlers[handler_idx]
+end
+
+is_nonoverlayed(m::Method) = !isdefined(m, :external_mt)
+is_nonoverlayed(interp::AbstractInterpreter) = !isoverlayed(method_table(interp))
+isoverlayed(::MethodTableView) = error("unsatisfied MethodTableView interface")
+isoverlayed(::InternalMethodTable) = false
+isoverlayed(::OverlayMethodTable) = true
+isoverlayed(mt::CachedMethodTable) = isoverlayed(mt.table)
+
+is_inferred(sv::InferenceState) = is_inferred(sv.result)
+is_inferred(result::InferenceResult) = result.result !== nothing
+
+was_reached(sv::InferenceState, pc::Int) = sv.ssavaluetypes[pc] !== NOT_FOUND
+
+struct ComputeTryCatch{T<:Handler} end
+
+const compute_trycatch = ComputeTryCatch{SimpleHandler}()
+
+(compute_trycatch::ComputeTryCatch{SimpleHandler})(ir::IRCode) =
+    compute_trycatch(ir.stmts.stmt, ir.cfg.blocks)
+
+"""
+    (::ComputeTryCatch{Handler})(code, [, bbs]) -> handler_info::Union{Nothing,HandlerInfo{Handler}}
+    const compute_trycatch = ComputeTryCatch{SimpleHandler}()
+
+Given the code of a function, compute, at every statement, the current
+try/catch handler, and the current exception stack top. This function returns
+a tuple of:
+
+    1. `handler_info.handler_at`: A statement length vector of tuples
+       `(catch_handler, exception_stack)`, which are indices into `handlers`
+
+    2. `handler_info.handlers`: A `Handler` vector of handlers
+"""
+function (::ComputeTryCatch{Handler})(code::Vector{Any}, bbs::Union{Vector{BasicBlock},Nothing}=nothing) where Handler
+    # The goal initially is to record the frame like this for the state at exit:
+    # 1: (enter 3) # == 0
+    # 3: (expr)    # == 1
+    # 3: (leave %1) # == 1
+    # 4: (expr)    # == 0
+    # then we can find all `try`s by walking backwards from :enter statements,
+    # and all `catch`es by looking at the statement after the :enter
+    n = length(code)
+    ip = BitSet()
+    ip.offset = 0 # for _bits_findnext
+    push!(ip, n + 1)
+    handler_info = nothing
+
+    # start from all :enter statements and record the location of the try
+    for pc = 1:n
+        stmt = code[pc]
+        if isa(stmt, EnterNode)
+            (;handlers, handler_at) = handler_info =
+                (handler_info === nothing ? HandlerInfo{Handler}(Handler[], fill((0, 0), n)) : handler_info)
+            l = stmt.catch_dest
+            (bbs !== nothing) && (l != 0) && (l = first(bbs[l].stmts))
+            push!(handlers, Handler(stmt, pc))
+            handler_id = length(handlers)
+            handler_at[pc + 1] = (handler_id, 0)
+            push!(ip, pc + 1)
+            if l != 0
+                handler_at[l] = (0, handler_id)
+                push!(ip, l)
+            end
+        end
+    end
+
+    if handler_info === nothing
+        return nothing
+    end
+
+    # now forward those marks to all :leave statements
+    (;handlers, handler_at) = handler_info
+    while true
+        # make progress on the active ip set
+        pc = _bits_findnext(ip.bits, 0)::Int
+        pc > n && break
+        while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
+            pc´ = pc + 1 # next program-counter (after executing instruction)
+            delete!(ip, pc)
+            cur_stacks = handler_at[pc]
+            @assert cur_stacks != (0, 0) "unbalanced try/catch"
+            stmt = code[pc]
+            if isa(stmt, GotoNode)
+                pc´ = stmt.label
+                (bbs !== nothing) && (pc´ = first(bbs[pc´].stmts))
+            elseif isa(stmt, GotoIfNot)
+                l = stmt.dest::Int
+                (bbs !== nothing) && (l = first(bbs[l].stmts))
+                if handler_at[l] != cur_stacks
+                    @assert handler_at[l][1] == 0 || handler_at[l][1] == cur_stacks[1] "unbalanced try/catch"
+                    handler_at[l] = cur_stacks
+                    push!(ip, l)
+                end
+            elseif isa(stmt, ReturnNode)
+                @assert !isdefined(stmt, :val) || cur_stacks[1] == 0 "unbalanced try/catch"
+                break
+            elseif isa(stmt, EnterNode)
+                l = stmt.catch_dest
+                (bbs !== nothing) && (l != 0) && (l = first(bbs[l].stmts))
+                # We assigned a handler number above. Here we just merge that
+                # with out current handler information.
+                if l != 0
+                    handler_at[l] = (cur_stacks[1], handler_at[l][2])
+                end
+                cur_stacks = (handler_at[pc´][1], cur_stacks[2])
+            elseif isa(stmt, Expr)
+                head = stmt.head
+                if head === :leave
+                    l = 0
+                    for j = 1:length(stmt.args)
+                        arg = stmt.args[j]
+                        if arg === nothing
+                            continue
+                        else
+                            enter_stmt = code[(arg::SSAValue).id]
+                            if enter_stmt === nothing
+                                continue
+                            end
+                            @assert isa(enter_stmt, EnterNode) "malformed :leave"
+                        end
+                        l += 1
+                    end
+                    cur_hand = cur_stacks[1]
+                    for _ = 1:l
+                        cur_hand = handler_at[get_enter_idx(handlers[cur_hand])][1]
+                    end
+                    cur_stacks = (cur_hand, cur_stacks[2])
+                    cur_stacks == (0, 0) && break
+                elseif head === :pop_exception
+                    cur_stacks = (cur_stacks[1], handler_at[(stmt.args[1]::SSAValue).id][2])
+                    cur_stacks == (0, 0) && break
+                end
+            end
+
+            pc´ > n && break # can't proceed with the fast-path fall-through
+            if handler_at[pc´] != cur_stacks
+                handler_at[pc´] = cur_stacks
+            elseif !in(pc´, ip)
+                break  # already visited
+            end
+            pc = pc´
+        end
+    end
+
+    @assert first(ip) == n + 1
+    return handler_info
+end
+
+# check if coverage mode is enabled
+should_insert_coverage(mod::Module, debuginfo::DebugInfo) = should_instrument(mod, debuginfo, true)
+
+function should_instrument(mod::Module, debuginfo::DebugInfo, only_if_affects_optimizer::Bool=false)
+    instrumentation_enabled(mod, only_if_affects_optimizer) && return true
+    JLOptions().code_coverage == 3 || JLOptions().malloc_log == 3 || return false
+    # path-specific coverage mode: if any line falls in a tracked file enable coverage for all
+    return _should_instrument(debuginfo)
+end
+
+_should_instrument(loc::Symbol) = is_file_tracked(loc)
+_should_instrument(loc::Method) = _should_instrument(loc.file)
+_should_instrument(loc::MethodInstance) = _should_instrument(loc.def)
+_should_instrument(::Module) = false
+_should_instrument(::Nothing) = false
+function _should_instrument(info::DebugInfo)
+    linetable = info.linetable
+    linetable === nothing || (_should_instrument(linetable) && return true)
+    _should_instrument(info.def) && return true
+    return false
+end
+
+function InferenceState(result::InferenceResult, cache_mode::UInt8, interp::AbstractInterpreter)
+    # prepare an InferenceState object for inferring lambda
+    world = get_inference_world(interp)
+    mi = result.linfo
+    src = retrieve_code_info(mi, world)
+    src === nothing && return nothing
+    maybe_validate_code(mi, src, "lowered")
+    return InferenceState(result, src, cache_mode, interp)
+end
+InferenceState(result::InferenceResult, cache_mode::Symbol, interp::AbstractInterpreter) =
+    InferenceState(result, convert_cache_mode(cache_mode), interp)
+InferenceState(result::InferenceResult, src::CodeInfo, cache_mode::Symbol, interp::AbstractInterpreter) =
+    InferenceState(result, src, convert_cache_mode(cache_mode), interp)
+
+function convert_cache_mode(cache_mode::Symbol)
+    if cache_mode === :global
+        return CACHE_MODE_GLOBAL
+    elseif cache_mode === :local
+        return CACHE_MODE_LOCAL
+    elseif cache_mode === :no
+        return CACHE_MODE_NULL
+    end
+    error("unexpected `cache_mode` is given")
+end
+
+"""
+    constrains_param(var::TypeVar, sig, covariant::Bool, type_constrains::Bool)
+
+Check if `var` will be constrained to have a definite value
+in any concrete leaftype subtype of `sig`.
+
+It is used as a helper to determine whether type intersection is guaranteed to be able to
+find a value for a particular type parameter.
+A necessary condition for type intersection to not assign a parameter is that it only
+appears in a `Union[All]` and during subtyping some other union component (that does not
+constrain the type parameter) is selected.
+
+The `type_constrains` flag determines whether Type{T} is considered to be constraining
+`T`. This is not true in general, because of the existence of types with free type
+parameters, however, some callers would like to ignore this corner case.
+"""
+function constrains_param(var::TypeVar, @nospecialize(typ), covariant::Bool, type_constrains::Bool=false)
+    typ === var && return true
+    while typ isa UnionAll
+        covariant && constrains_param(var, typ.var.ub, covariant, type_constrains) && return true
+        # typ.var.lb doesn't constrain var
+        typ = typ.body
+    end
+    if typ isa Union
+        # for unions, verify that both options would constrain var
+        ba = constrains_param(var, typ.a, covariant, type_constrains)
+        bb = constrains_param(var, typ.b, covariant, type_constrains)
+        (ba && bb) && return true
+    elseif typ isa DataType
+        # return true if any param constrains var
+        fc = length(typ.parameters)
+        if fc > 0
+            if typ.name === Tuple.name
+                # vararg tuple needs special handling
+                for i in 1:(fc - 1)
+                    p = typ.parameters[i]
+                    constrains_param(var, p, covariant, type_constrains) && return true
+                end
+                lastp = typ.parameters[fc]
+                vararg = unwrap_unionall(lastp)
+                if vararg isa Core.TypeofVararg && isdefined(vararg, :N)
+                    constrains_param(var, vararg.N, covariant, type_constrains) && return true
+                    # T = vararg.parameters[1] doesn't constrain var
+                else
+                    constrains_param(var, lastp, covariant, type_constrains) && return true
+                end
+            else
+                if typ.name === typename(Type) && typ.parameters[1] === var && var.ub === Any
+                    # Types with free type parameters are <: Type cause the typevar
+                    # to be unconstrained because Type{T} with free typevars is illegal
+                    return type_constrains
+                end
+                for i in 1:fc
+                    p = typ.parameters[i]
+                    constrains_param(var, p, false, type_constrains) && return true
+                end
+            end
+        end
+    end
+    return false
+end
+
+const EMPTY_SPTYPES = VarState[]
+
+function sptypes_from_meth_instance(mi::MethodInstance)
+    def = mi.def
+    isa(def, Method) || return EMPTY_SPTYPES # toplevel
+    sig = def.sig
+    if isempty(mi.sparam_vals)
+        isa(sig, UnionAll) || return EMPTY_SPTYPES
+        # mi is unspecialized
+        spvals = Any[]
+        sig′ = sig
+        while isa(sig′, UnionAll)
+            push!(spvals, sig′.var)
+            sig′ = sig′.body
+        end
+    else
+        spvals = mi.sparam_vals
+    end
+    nvals = length(spvals)
+    sptypes = Vector{VarState}(undef, nvals)
+    for i = 1:nvals
+        v = spvals[i]
+        if v isa TypeVar
+            temp = sig
+            for j = 1:i-1
+                temp = temp.body
+            end
+            vᵢ = (temp::UnionAll).var
+            sigtypes = (unwrap_unionall(temp)::DataType).parameters
+            for j = 1:length(sigtypes)
+                sⱼ = sigtypes[j]
+                if isType(sⱼ) && sⱼ.parameters[1] === vᵢ
+                    # if this parameter came from `arg::Type{T}`,
+                    # then `arg` is more precise than `Type{T} where lb<:T<:ub`
+                    ty = fieldtype(mi.specTypes, j)
+                    @goto ty_computed
+                elseif (va = va_from_vatuple(sⱼ)) !== nothing
+                    # if this parameter came from `::Tuple{.., Vararg{T,vᵢ}}`,
+                    # then `vᵢ` is known to be `Int`
+                    if isdefined(va, :N) && va.N === vᵢ
+                        ty = Int
+                        @goto ty_computed
+                    end
+                end
+            end
+            ub = unwraptv_ub(v)
+            if has_free_typevars(ub)
+                ub = Any
+            end
+            lb = unwraptv_lb(v)
+            if has_free_typevars(lb)
+                lb = Bottom
+            end
+            if Any === ub && lb === Bottom
+                ty = Any
+            else
+                tv = TypeVar(v.name, lb, ub)
+                ty = UnionAll(tv, Type{tv})
+            end
+            @label ty_computed
+            undef = !(let sig=sig
+                # if the specialized signature `linfo.specTypes` doesn't contain any free
+                # type variables, we can use it for a more accurate analysis of whether `v`
+                # is constrained or not, otherwise we should use `def.sig` which always
+                # doesn't contain any free type variables
+                if !has_free_typevars(mi.specTypes)
+                    sig = mi.specTypes
+                end
+                @assert !has_free_typevars(sig)
+                constrains_param(v, sig, #=covariant=#true)
+            end)
+        elseif isvarargtype(v)
+            # if this parameter came from `func(..., ::Vararg{T,v})`,
+            # so the type is known to be `Int`
+            ty = Int
+            undef = false
+        else
+            ty = Const(v)
+            undef = false
+        end
+        sptypes[i] = VarState(ty, undef)
+    end
+    return sptypes
+end
+
+function va_from_vatuple(@nospecialize(t))
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    if isa(t, DataType)
+        n = length(t.parameters)
+        if n > 0
+            va = t.parameters[n]
+            if isvarargtype(va)
+               return va
+            end
+        end
+    end
+    return nothing
+end
+
+_topmod(sv::InferenceState) = _topmod(frame_module(sv))
+
+function record_ssa_assign!(𝕃ᵢ::AbstractLattice, ssa_id::Int, @nospecialize(new), frame::InferenceState)
+    ssavaluetypes = frame.ssavaluetypes
+    old = ssavaluetypes[ssa_id]
+    if old === NOT_FOUND || !is_lattice_equal(𝕃ᵢ, new, old)
+        ssavaluetypes[ssa_id] = new
+        W = frame.ip
+        for r in frame.ssavalue_uses[ssa_id]
+            if was_reached(frame, r)
+                usebb = block_for_inst(frame.cfg, r)
+                if usebb != frame.currbb || r < ssa_id
+                    push!(W, usebb)
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+function narguments(sv::InferenceState, include_va::Bool=true)
+    nargs = Int(sv.src.nargs)
+    if !include_va
+        nargs -= sv.src.isva
+    end
+    return nargs
+end
+
+# IRInterpretationState
+# =====================
+
+# TODO add `result::InferenceResult` and put the irinterp result into the inference cache?
+mutable struct IRInterpretationState
+    const spec_info::SpecInfo
+    const ir::IRCode
+    const mi::MethodInstance
+    valid_worlds::WorldRange
+    curridx::Int
+    time_caches::Float64
+    time_paused::UInt64
+    const argtypes_refined::Vector{Bool}
+    const sptypes::Vector{VarState}
+    const tpdum::TwoPhaseDefUseMap
+    const ssa_refined::BitSet
+    const lazyreachability::LazyCFGReachability
+    const tasks::Vector{WorkThunk}
+    const edges::Vector{Any}
+    callstack #::Vector{AbsIntState}
+    frameid::Int
+    parentid::Int
+
+    function IRInterpretationState(
+            interp::AbstractInterpreter, spec_info::SpecInfo, ir::IRCode,
+            mi::MethodInstance, argtypes::Vector{Any}, min_world::UInt, max_world::UInt
+        )
+        curridx = 1
+        given_argtypes = Vector{Any}(undef, length(argtypes))
+        for i = 1:length(given_argtypes)
+            given_argtypes[i] = widenslotwrapper(argtypes[i])
+        end
+        if isa(mi.def, Method)
+            argtypes_refined = Bool[!⊑(optimizer_lattice(interp), ir.argtypes[i], given_argtypes[i])
+                for i = 1:length(given_argtypes)]
+        else
+            argtypes_refined = Bool[false for i = 1:length(given_argtypes)]
+        end
+        empty!(ir.argtypes)
+        append!(ir.argtypes, given_argtypes)
+        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
+        ssa_refined = BitSet()
+        lazyreachability = LazyCFGReachability(ir)
+        valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
+        if !(get_inference_world(interp) in valid_worlds)
+            error("invalid age range update")
+        end
+        tasks = WorkThunk[]
+        edges = Any[]
+        callstack = AbsIntState[]
+        return new(spec_info, ir, mi, valid_worlds,
+                curridx, 0.0, 0, argtypes_refined, ir.sptypes, tpdum,
+                ssa_refined, lazyreachability, tasks, edges, callstack, 0, 0)
+    end
+end
+
+function IRInterpretationState(
+        interp::AbstractInterpreter, codeinst::CodeInstance, mi::MethodInstance,
+        argtypes::Vector{Any}, @nospecialize(src)
+    )
+    @assert get_ci_mi(codeinst) === mi "method instance is not synced with code instance"
+    if isa(src, String)
+        src = _uncompressed_ir(codeinst, src)
+    else
+        isa(src, CodeInfo) || return nothing
+    end
+    spec_info = SpecInfo(src)
+    ir = inflate_ir(src, mi)
+    argtypes = va_process_argtypes(optimizer_lattice(interp), argtypes, src.nargs, src.isva, mi)
+    return IRInterpretationState(interp, spec_info, ir, mi, argtypes,
+                                 codeinst.min_world, codeinst.max_world)
+end
+
+# AbsIntState
+# ===========
+
+const AbsIntState = Union{InferenceState,IRInterpretationState}
+
+function print_callstack(frame::AbsIntState)
+    print("=================== Callstack: ==================\n")
+    frames = frame.callstack::Vector{AbsIntState}
+    for idx = (frame.frameid == 0 ? 0 : 1):length(frames)
+        sv = (idx == 0 ? frame : frames[idx])
+        idx == frame.frameid && print("*")
+        print("[")
+        print(idx)
+        if sv isa InferenceState && !isa(sv.interp, NativeInterpreter)
+            print(", ")
+            print(typeof(sv.interp))
+        end
+        print("] ")
+        print(frame_instance(sv))
+        is_cached(sv) || print("  [not globally cached]")
+        sv.parentid == idx - 1 || print(" [parent=", sv.parentid, "]")
+        isempty(callers_in_cycle(sv)) || print(" [cycle=", sv.cycleid, "]")
+        println()
+        @assert sv.frameid == idx
+    end
+    print("================= End callstack ==================\n")
+end
+
+frame_instance(sv::InferenceState) = sv.linfo
+frame_instance(sv::IRInterpretationState) = sv.mi
+
+function frame_module(sv::AbsIntState)
+    mi = frame_instance(sv)
+    def = mi.def
+    isa(def, Module) && return def
+    return def.module
+end
+
+frame_parent(sv::AbsIntState) = sv.parentid == 0 ? nothing : (sv.callstack::Vector{AbsIntState})[sv.parentid]
+
+function cycle_parent(sv::InferenceState)
+    sv.parentid == 0 && return nothing
+    callstack = sv.callstack::Vector{AbsIntState}
+    sv = callstack[sv.cycleid]::InferenceState
+    sv.parentid == 0 && return nothing
+    return callstack[sv.parentid]
+end
+cycle_parent(sv::IRInterpretationState) = frame_parent(sv)
+
+
+# add the orphan child to the parent and the parent to the child
+function assign_parentchild!(child::InferenceState, parent::AbsIntState)
+    @assert child.frameid in (0, 1)
+    child.callstack = callstack = parent.callstack::Vector{AbsIntState}
+    child.parentid = parent.frameid
+    push!(callstack, child)
+    child.cycleid = child.frameid = length(callstack)
+    nothing
+end
+function assign_parentchild!(child::IRInterpretationState, parent::AbsIntState)
+    @assert child.frameid in (0, 1)
+    child.callstack = callstack = parent.callstack::Vector{AbsIntState}
+    child.parentid = parent.frameid
+    push!(callstack, child)
+    child.frameid = length(callstack)
+    nothing
+end
+
+function is_constproped(sv::InferenceState)
+    (;overridden_by_const) = sv.result
+    return overridden_by_const !== nothing
+end
+is_constproped(::IRInterpretationState) = true
+
+is_cached(sv::InferenceState) = !iszero(sv.cache_mode & CACHE_MODE_GLOBAL)
+is_cached(::IRInterpretationState) = false
+
+spec_info(sv::InferenceState) = sv.spec_info
+spec_info(sv::IRInterpretationState) = sv.spec_info
+
+propagate_inbounds(sv::AbsIntState) = spec_info(sv).propagate_inbounds
+method_for_inference_limit_heuristics(sv::AbsIntState) = spec_info(sv).method_for_inference_limit_heuristics
+
+function is_effect_overridden(sv::AbsIntState, effect::Symbol)
+    if is_effect_overridden(frame_instance(sv), effect)
+        return true
+    elseif is_effect_overridden(decode_statement_effects_override(sv), effect)
+        return true
+    end
+    return false
+end
+function is_effect_overridden(mi::MethodInstance, effect::Symbol)
+    def = mi.def
+    return isa(def, Method) && is_effect_overridden(def, effect)
+end
+is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
+is_effect_overridden(override::EffectsOverride, effect::Symbol) = getfield(override, effect)
+
+has_conditional(𝕃::AbstractLattice, ::InferenceState) = has_conditional(𝕃)
+has_conditional(::AbstractLattice, ::IRInterpretationState) = false
+
+# work towards converging the valid age range for sv
+function update_valid_age!(sv::AbsIntState, world, valid_worlds::WorldRange)
+    valid_worlds = intersect(sv.valid_worlds, valid_worlds)
+    if !(world in valid_worlds)
+        error("invalid age range update")
+    end
+    sv.valid_worlds = valid_worlds
+    return valid_worlds
+end
+
+"""
+    AbsIntStackUnwind(sv::AbsIntState)
+
+Iterate through all callers of the given `AbsIntState` in the abstract interpretation stack
+(including the given `AbsIntState` itself), visiting children before their parents (i.e.
+ascending the tree from the given `AbsIntState`).
+Note that cycles may be visited in any order.
+"""
+struct AbsIntStackUnwind
+    callstack::Vector{AbsIntState}
+    AbsIntStackUnwind(sv::AbsIntState) = new(sv.callstack::Vector{AbsIntState})
+end
+function iterate(unw::AbsIntStackUnwind, frame::Int=length(unw.callstack))
+    frame == 0 && return nothing
+    return (unw.callstack[frame], frame - 1)
+end
+
+struct AbsIntCycle
+    frames::Vector{AbsIntState}
+    cycleid::Int
+    cycletop::Int
+end
+iterate(unw::AbsIntCycle) = unw.cycleid == 0 ? nothing : (unw.frames[unw.cycletop], unw.cycletop)
+function iterate(unw::AbsIntCycle, frame::Int)
+    frame == unw.cycleid && return nothing
+    return (unw.frames[frame - 1], frame - 1)
+end
+
+"""
+    callers_in_cycle(sv::AbsIntState)
+
+Iterate through all callers of the given `AbsIntState` in the abstract
+interpretation stack (including the given `AbsIntState` itself) that are part
+of the same cycle, only if it is part of a cycle with multiple frames.
+"""
+function callers_in_cycle(sv::InferenceState)
+    callstack = sv.callstack::Vector{AbsIntState}
+    cycletop = cycleid = sv.cycleid
+    while cycletop < length(callstack)
+        frame = callstack[cycletop + 1]
+        frame isa InferenceState || break
+        frame.cycleid == cycleid || break
+        cycletop += 1
+    end
+    return AbsIntCycle(callstack, cycletop == cycleid ? 0 : cycleid, cycletop)
+end
+callers_in_cycle(sv::IRInterpretationState) = AbsIntCycle(sv.callstack::Vector{AbsIntState}, 0, 0)
+
+get_curr_ssaflag(sv::InferenceState) = sv.ssaflags[sv.currpc]
+get_curr_ssaflag(sv::IRInterpretationState) = sv.ir.stmts[sv.curridx][:flag]
+
+has_curr_ssaflag(sv::InferenceState, flag::UInt32) = has_flag(sv.ssaflags[sv.currpc], flag)
+has_curr_ssaflag(sv::IRInterpretationState, flag::UInt32) = has_flag(sv.ir.stmts[sv.curridx][:flag], flag)
+
+function set_curr_ssaflag!(sv::InferenceState, flag::UInt32, mask::UInt32=typemax(UInt32))
+    curr_flag = sv.ssaflags[sv.currpc]
+    sv.ssaflags[sv.currpc] = (curr_flag & ~mask) | flag
+    nothing
+end
+
+add_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.ssaflags[sv.currpc] |= flag
+add_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = add_flag!(sv.ir.stmts[sv.curridx], flag)
+
+sub_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.ssaflags[sv.currpc] &= ~flag
+sub_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = sub_flag!(sv.ir.stmts[sv.curridx], flag)
+
+function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::Effects)
+    if effects.effect_free === EFFECT_FREE_GLOBALLY
+        # This tracks the global effects
+        effects = Effects(effects; effect_free=ALWAYS_TRUE)
+    end
+    caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
+    nothing
+end
+merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return
+
+decode_statement_effects_override(sv::InferenceState) = decode_statement_effects_override(sv.src.ssaflags[sv.currpc])
+decode_statement_effects_override(sv::IRInterpretationState) = decode_statement_effects_override(UInt32(0))
+
+struct InferenceLoopState
+    rt
+    effects::Effects
+    function InferenceLoopState(@nospecialize(rt), effects::Effects)
+        new(rt, effects)
+    end
+end
+
+bail_out_toplevel_call(::AbstractInterpreter, sv::InferenceState) = sv.restrict_abstract_call_sites
+bail_out_toplevel_call(::AbstractInterpreter, ::IRInterpretationState) = false
+
+bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
+    state.rt === Any && !is_foldable(state.effects)
+bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
+    state.rt === Any && !is_foldable(state.effects)
+
+bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
+    state.rt === Any
+bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
+    state.rt === Any
+
+add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
+add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
+
+function get_max_methods(interp::AbstractInterpreter, @nospecialize(f), sv::AbsIntState)
+    fmax = get_max_methods_for_func(f)
+    fmax !== nothing && return fmax
+    return get_max_methods(interp, sv)
+end
+function get_max_methods(interp::AbstractInterpreter, @nospecialize(f))
+    fmax = get_max_methods_for_func(f)
+    fmax !== nothing && return fmax
+    return get_max_methods(interp)
+end
+function get_max_methods(interp::AbstractInterpreter, sv::AbsIntState)
+    mmax = get_max_methods_for_module(sv)
+    mmax !== nothing && return mmax
+    return get_max_methods(interp)
+end
+get_max_methods(interp::AbstractInterpreter) = InferenceParams(interp).max_methods
+
+function get_max_methods_for_func(@nospecialize(f))
+    if f !== nothing
+        fmm = typeof(f).name.max_methods
+        fmm !== UInt8(0) && return Int(fmm)
+    end
+    return nothing
+end
+get_max_methods_for_module(sv::AbsIntState) = get_max_methods_for_module(frame_module(sv))
+function get_max_methods_for_module(mod::Module)
+    max_methods = ccall(:jl_get_module_max_methods, Cint, (Any,), mod) % Int
+    max_methods < 0 && return nothing
+    return max_methods
+end
+
+"""
+    Future{T}
+
+Assign-once delayed return value for a value of type `T`, similar to RefValue{T}.
+Can be constructed in one of three ways:
+
+1. With an immediate as `Future{T}(val)`
+2. As an assign-once storage location with `Future{T}()`. Assigned (once) using `f[] = val`.
+3. As a delayed computation with `Future{T}(callback, dep, interp, sv)` to have
+   `sv` arrange to call the `callback` with the result of `dep` when it is ready.
+
+Use `isready` to check if the value is ready, and `getindex` to get the value.
+"""
+struct Future{T}
+    later::Union{Nothing,RefValue{T}}
+    now::Union{Nothing,T}
+    function Future{T}() where {T}
+        later = RefValue{T}()
+        @assert !isassigned(later) "Future{T}() is not allowed for inlinealloc T"
+        new{T}(later, nothing)
+    end
+    Future{T}(x) where {T} = new{T}(nothing, x)
+    Future(x::T) where {T} = new{T}(nothing, x)
+end
+isready(f::Future) = f.later === nothing || isassigned(f.later)
+getindex(f::Future{T}) where {T} = (later = f.later; later === nothing ? f.now::T : later[])
+function setindex!(f::Future, v)
+    later = something(f.later)
+    @assert !isassigned(later)
+    later[] = v
+    return f
+end
+convert(::Type{Future{T}}, x) where {T} = Future{T}(x) # support return type conversion
+convert(::Type{Future{T}}, x::Future) where {T} = x::Future{T}
+function Future{T}(f, immediate::Bool, interp::AbstractInterpreter, sv::AbsIntState) where {T}
+    if immediate
+        return Future{T}(f(interp, sv))
+    else
+        @assert applicable(f, interp, sv)
+        result = Future{T}()
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(interp, sv)
+            return true
+        end)
+        return result
+    end
+end
+function Future{T}(f, prev::Future{S}, interp::AbstractInterpreter, sv::AbsIntState) where {T, S}
+    later = prev.later
+    if later === nothing
+        return Future{T}(f(prev[], interp, sv))
+    else
+        @assert Core._hasmethod(Tuple{Core.Typeof(f), S, typeof(interp), typeof(sv)})
+        result = Future{T}()
+        @assert !isa(sv, InferenceState) || interp === sv.interp
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(later[], interp, sv) # capture just later, instead of all of prev
+            return true
+        end)
+        return result
+    end
+end
+
+"""
+    doworkloop(args...)
+
+Run a tasks inside the abstract interpreter, returning false if there are none.
+Tasks will be run in DFS post-order tree order, such that all child tasks will
+be run in the order scheduled, prior to running any subsequent tasks. This
+allows tasks to generate more child tasks, which will be run before anything else.
+Each task will be run repeatedly when returning `false`, until it returns `true`.
+"""
+function doworkloop(interp::AbstractInterpreter, sv::AbsIntState)
+    tasks = sv.tasks
+    prev = length(tasks)
+    prevcallstack = length(sv.callstack)
+    prev == 0 && return false
+    task = pop!(tasks)
+    completed = task(interp, sv)
+    tasks = sv.tasks # allow dropping gc root over the previous call
+    completed isa Bool || throw(TypeError(:return, "", Bool, task)) # print the task on failure as part of the error message, instead of just "@ workloop:line"
+    if !completed
+        @assert (length(tasks) >= prev || length(sv.callstack) > prevcallstack) "Task did not complete, but also did not create any child tasks"
+        push!(tasks, task)
+    end
+    # efficient post-order visitor: items pushed are executed in reverse post order such
+    # that later items are executed before earlier ones, but are fully executed
+    # (including any dependencies scheduled by them) before going on to the next item
+    reverse!(tasks, #=start=#prev)
+    return true
+end
+
+
+#macro workthunk(name::Symbol, body)
+#    name = esc(name)
+#    body = esc(body)
+#    return replace_linenums!(
+#        :(function $name($(esc(interp)), $(esc(sv)))
+#              $body
+#          end), __source__)
+#end
diff --git a/base/compiler/methodtable.jl b/Compiler/src/methodtable.jl
similarity index 78%
rename from base/compiler/methodtable.jl
rename to Compiler/src/methodtable.jl
index 8c79b2d8a8468..24a8b1ecf8242 100644
--- a/base/compiler/methodtable.jl
+++ b/Compiler/src/methodtable.jl
@@ -16,10 +16,7 @@ function iterate(result::MethodLookupResult, args...)
 end
 getindex(result::MethodLookupResult, idx::Int) = getindex(result.matches, idx)::MethodMatch
 
-struct MethodMatchResult
-    matches::MethodLookupResult
-    overlayed::Bool
-end
+abstract type MethodTableView end
 
 """
     struct InternalMethodTable <: MethodTableView
@@ -55,14 +52,14 @@ Overlays another method table view with an additional local fast path cache that
 can respond to repeated, identical queries faster than the original method table.
 """
 struct CachedMethodTable{T<:MethodTableView} <: MethodTableView
-    cache::IdDict{MethodMatchKey, Union{Nothing,MethodMatchResult}}
+    cache::IdDict{MethodMatchKey, Union{Nothing,MethodLookupResult}}
     table::T
 end
-CachedMethodTable(table::T) where T = CachedMethodTable{T}(IdDict{MethodMatchKey, Union{Nothing,MethodMatchResult}}(), table)
+CachedMethodTable(table::T) where T = CachedMethodTable{T}(IdDict{MethodMatchKey, Union{Nothing,MethodLookupResult}}(), table)
 
 """
     findall(sig::Type, view::MethodTableView; limit::Int=-1) ->
-        MethodMatchResult(matches::MethodLookupResult, overlayed::Bool) or nothing
+        matches::MethodLookupResult or nothing
 
 Find all methods in the given method table `view` that are applicable to the given signature `sig`.
 If no applicable methods are found, an empty result is returned.
@@ -70,11 +67,8 @@ If the number of applicable methods exceeded the specified `limit`, `nothing` is
 Note that the default setting `limit=-1` does not limit the number of applicable methods.
 `overlayed` indicates if any of the matching methods comes from an overlayed method table.
 """
-function findall(@nospecialize(sig::Type), table::InternalMethodTable; limit::Int=-1)
-    result = _findall(sig, nothing, table.world, limit)
-    result === nothing && return nothing
-    return MethodMatchResult(result, false)
-end
+findall(@nospecialize(sig::Type), table::InternalMethodTable; limit::Int=-1) =
+    _findall(sig, nothing, table.world, limit)
 
 function findall(@nospecialize(sig::Type), table::OverlayMethodTable; limit::Int=-1)
     result = _findall(sig, table.mt, table.world, limit)
@@ -82,20 +76,18 @@ function findall(@nospecialize(sig::Type), table::OverlayMethodTable; limit::Int
     nr = length(result)
     if nr ≥ 1 && result[nr].fully_covers
         # no need to fall back to the internal method table
-        return MethodMatchResult(result, true)
+        return result
     end
     # fall back to the internal method table
     fallback_result = _findall(sig, nothing, table.world, limit)
     fallback_result === nothing && return nothing
     # merge the fallback match results with the internal method table
-    return MethodMatchResult(
-        MethodLookupResult(
-            vcat(result.matches, fallback_result.matches),
-            WorldRange(
-                max(result.valid_worlds.min_world, fallback_result.valid_worlds.min_world),
-                min(result.valid_worlds.max_world, fallback_result.valid_worlds.max_world)),
-            result.ambig | fallback_result.ambig),
-        !isempty(result))
+    return MethodLookupResult(
+        vcat(result.matches, fallback_result.matches),
+        WorldRange(
+            max(result.valid_worlds.min_world, fallback_result.valid_worlds.min_world),
+            min(result.valid_worlds.max_world, fallback_result.valid_worlds.max_world)),
+        result.ambig | fallback_result.ambig)
 end
 
 function _findall(@nospecialize(sig::Type), mt::Union{Nothing,MethodTable}, world::UInt, limit::Int)
@@ -122,7 +114,7 @@ end
 
 """
     findsup(sig::Type, view::MethodTableView) ->
-        (match::MethodMatch, valid_worlds::WorldRange, overlayed::Bool) or nothing
+        (match::Union{MethodMatch,Nothing}, valid_worlds::WorldRange, overlayed::Bool)
 
 Find the (unique) method such that `sig <: match.method.sig`, while being more
 specific than any other method with the same property. In other words, find the method
@@ -138,21 +130,19 @@ In both cases `nothing` is returned.
 
 `overlayed` indicates if any of the matching methods comes from an overlayed method table.
 """
-function findsup(@nospecialize(sig::Type), table::InternalMethodTable)
-    return (_findsup(sig, nothing, table.world)..., false)
-end
+findsup(@nospecialize(sig::Type), table::InternalMethodTable) =
+    _findsup(sig, nothing, table.world)
 
 function findsup(@nospecialize(sig::Type), table::OverlayMethodTable)
     match, valid_worlds = _findsup(sig, table.mt, table.world)
-    match !== nothing && return match, valid_worlds, true
+    match !== nothing && return match, valid_worlds
     # fall back to the internal method table
     fallback_match, fallback_valid_worlds = _findsup(sig, nothing, table.world)
     return (
         fallback_match,
         WorldRange(
             max(valid_worlds.min_world, fallback_valid_worlds.min_world),
-            min(valid_worlds.max_world, fallback_valid_worlds.max_world)),
-        false)
+            min(valid_worlds.max_world, fallback_valid_worlds.max_world)))
 end
 
 function _findsup(@nospecialize(sig::Type), mt::Union{Nothing,MethodTable}, world::UInt)
@@ -166,8 +156,3 @@ end
 
 # This query is not cached
 findsup(@nospecialize(sig::Type), table::CachedMethodTable) = findsup(sig, table.table)
-
-isoverlayed(::MethodTableView)     = error("unsatisfied MethodTableView interface")
-isoverlayed(::InternalMethodTable) = false
-isoverlayed(::OverlayMethodTable)  = true
-isoverlayed(mt::CachedMethodTable) = isoverlayed(mt.table)
diff --git a/Compiler/src/opaque_closure.jl b/Compiler/src/opaque_closure.jl
new file mode 100644
index 0000000000000..21f2640037893
--- /dev/null
+++ b/Compiler/src/opaque_closure.jl
@@ -0,0 +1,58 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function compute_ir_rettype(ir::IRCode)
+    rt = Union{}
+    for i = 1:length(ir.stmts)
+        stmt = ir[SSAValue(i)][:stmt]
+        if isa(stmt, Core.ReturnNode) && isdefined(stmt, :val)
+            rt = Compiler.tmerge(Compiler.argextype(stmt.val, ir), rt)
+        end
+    end
+    return Compiler.widenconst(rt)
+end
+
+function compute_oc_signature(ir::IRCode, nargs::Int, isva::Bool)
+    argtypes = Vector{Any}(undef, nargs)
+    for i = 1:nargs
+        argtypes[i] = Compiler.widenconst(ir.argtypes[i+1])
+    end
+    if isva
+        lastarg = pop!(argtypes)
+        if lastarg <: Tuple
+            append!(argtypes, lastarg.parameters)
+        else
+            push!(argtypes, Vararg{Any})
+        end
+    end
+    return Tuple{argtypes...}
+end
+
+function Core.OpaqueClosure(ir::IRCode, @nospecialize env...;
+                            isva::Bool = false,
+                            slotnames::Union{Nothing,Vector{Symbol}}=nothing,
+                            kwargs...)
+    # NOTE: we need ir.argtypes[1] == typeof(env)
+    ir = Core.Compiler.copy(ir)
+    # if the user didn't specify a definition MethodInstance or filename Symbol to use for the debuginfo, set a filename now
+    ir.debuginfo.def === nothing && (ir.debuginfo.def = :var"generated IR for OpaqueClosure")
+    nargtypes = length(ir.argtypes)
+    nargs = nargtypes-1
+    sig = compute_oc_signature(ir, nargs, isva)
+    rt = compute_ir_rettype(ir)
+    src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
+    if slotnames === nothing
+        src.slotnames = fill(:none, nargtypes)
+    else
+        length(slotnames) == nargtypes || error("mismatched `argtypes` and `slotnames`")
+        src.slotnames = slotnames
+    end
+    src.slotflags = fill(zero(UInt8), nargtypes)
+    src.slottypes = copy(ir.argtypes)
+    src.min_world = ir.valid_worlds.min_world
+    src.max_world = ir.valid_worlds.max_world
+    src.isva = isva
+    src.nargs = UInt(nargtypes)
+    src = ir_to_codeinf!(src, ir)
+    src.rettype = rt
+    return Base.Experimental.generate_opaque_closure(sig, Union{}, rt, src, nargs, isva, env...; kwargs...)
+end
diff --git a/Compiler/src/optimize.jl b/Compiler/src/optimize.jl
new file mode 100644
index 0000000000000..b5704c488d273
--- /dev/null
+++ b/Compiler/src/optimize.jl
@@ -0,0 +1,1612 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#############
+# constants #
+#############
+
+# The slot has uses that are not statically dominated by any assignment
+# This is implied by `SLOT_USEDUNDEF`.
+# If this is not set, all the uses are (statically) dominated by the defs.
+# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
+const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
+const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
+const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
+# const SLOT_CALLED      = 64
+
+# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
+
+const IR_FLAG_NULL        = zero(UInt32)
+# This statement is marked as @inbounds by user.
+# If replaced by inlining, any contained boundschecks may be removed.
+const IR_FLAG_INBOUNDS    = one(UInt32) << 0
+# This statement is marked as @inline by user
+const IR_FLAG_INLINE      = one(UInt32) << 1
+# This statement is marked as @noinline by user
+const IR_FLAG_NOINLINE    = one(UInt32) << 2
+# This statement is proven :consistent
+const IR_FLAG_CONSISTENT  = one(UInt32) << 3
+# This statement is proven :effect_free
+const IR_FLAG_EFFECT_FREE = one(UInt32) << 4
+# This statement is proven :nothrow
+const IR_FLAG_NOTHROW     = one(UInt32) << 5
+# This statement is proven :terminates_globally
+const IR_FLAG_TERMINATES  = one(UInt32) << 6
+#const IR_FLAG_TERMINATES_LOCALLY = one(UInt32) << 7
+#const IR_FLAG_NOTASKSTATE = one(UInt32) << 8
+#const IR_FLAG_INACCESSIBLEMEM = one(UInt32) << 9
+const IR_FLAG_NOUB        = one(UInt32) << 10
+#const IR_FLAG_NOUBINIB   = one(UInt32) << 11
+#const IR_FLAG_CONSISTENTOVERLAY = one(UInt32) << 12
+# This statement is :nortcall
+const IR_FLAG_NORTCALL = one(UInt32) << 13
+# An optimization pass has updated this statement in a way that may
+# have exposed information that inference did not see. Re-running
+# inference on this statement may be profitable.
+const IR_FLAG_REFINED     = one(UInt32) << 16
+# This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE
+const IR_FLAG_UNUSED      = one(UInt32) << 17
+# TODO: Both of these next two should eventually go away once
+# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+const IR_FLAG_EFIIMO      = one(UInt32) << 18
+# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
+const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 19
+
+const NUM_IR_FLAGS = 3 # sync with julia.h
+
+const IR_FLAGS_EFFECTS =
+    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW |
+    IR_FLAG_TERMINATES | IR_FLAG_NOUB | IR_FLAG_NORTCALL
+
+const IR_FLAGS_REMOVABLE = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_TERMINATES
+
+const IR_FLAGS_NEEDS_EA = IR_FLAG_EFIIMO | IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
+
+has_flag(curr::UInt32, flag::UInt32) = (curr & flag) == flag
+
+function iscallstmt(@nospecialize stmt)
+    stmt isa Expr || return false
+    head = stmt.head
+    return head === :call || head === :invoke || head === :foreigncall
+end
+
+function flags_for_effects(effects::Effects)
+    flags = zero(UInt32)
+    if is_consistent(effects)
+        flags |= IR_FLAG_CONSISTENT
+    end
+    if is_effect_free(effects)
+        flags |= IR_FLAG_EFFECT_FREE
+    elseif is_effect_free_if_inaccessiblememonly(effects)
+        flags |= IR_FLAG_EFIIMO
+    end
+    if is_nothrow(effects)
+        flags |= IR_FLAG_NOTHROW
+    end
+    if is_terminates(effects)
+        flags |= IR_FLAG_TERMINATES
+    end
+    if is_inaccessiblemem_or_argmemonly(effects)
+        flags |= IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
+    end
+    if is_noub(effects)
+        flags |= IR_FLAG_NOUB
+    end
+    if is_nortcall(effects)
+        flags |= IR_FLAG_NORTCALL
+    end
+    return flags
+end
+
+const TOP_TUPLE = GlobalRef(Core, :tuple)
+
+inlining_cost(@nospecialize src) =
+    src isa Union{MaybeCompressed,UInt8} ? ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) : MAX_INLINE_COST
+is_inlineable(@nospecialize src) = inlining_cost(src) != MAX_INLINE_COST
+set_inlineable!(src::CodeInfo, val::Bool) =
+    src.inlining_cost = (val ? MIN_INLINE_COST : MAX_INLINE_COST)
+
+function inline_cost_clamp(x::Int)
+    x > MAX_INLINE_COST && return MAX_INLINE_COST
+    x < MIN_INLINE_COST && return MIN_INLINE_COST
+    x = ccall(:jl_encode_inlining_cost, UInt8, (InlineCostType,), x)
+    x = ccall(:jl_decode_inlining_cost, InlineCostType, (UInt8,), x)
+    return x
+end
+
+const SRC_FLAG_DECLARED_INLINE = 0x1
+const SRC_FLAG_DECLARED_NOINLINE = 0x2
+
+is_declared_inline(@nospecialize src::MaybeCompressed) =
+    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_INLINE
+
+is_declared_noinline(@nospecialize src::MaybeCompressed) =
+    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_NOINLINE
+
+#####################
+# OptimizationState #
+#####################
+
+# return whether this src should be inlined. If so, retrieve_ir_for_inlining must return an IRCode from it
+
+function src_inlining_policy(interp::AbstractInterpreter, mi::MethodInstance,
+    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
+    # If we have a generator, but we can't invoke it (because argument type information is lacking),
+    # don't inline so we defer its invocation to runtime where we'll have precise type information.
+    if isa(mi.def, Method) && hasgenerator(mi)
+        may_invoke_generator(mi) || return false
+    end
+    return src_inlining_policy(interp, src, info, stmt_flag)
+end
+
+function src_inlining_policy(::AbstractInterpreter,
+    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
+    isa(src, OptimizationState) && (src = src.src)
+    if isa(src, MaybeCompressed)
+        src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
+        return src_inlineable
+    elseif isa(src, IRCode)
+        return true
+    end
+    @assert !isa(src, CodeInstance) # handled by caller
+    return false
+end
+
+struct InliningState{Interp<:AbstractInterpreter}
+    edges::Vector{Any}
+    interp::Interp
+    opt_cache::IdDict{MethodInstance,CodeInstance}
+end
+function InliningState(sv::InferenceState, interp::AbstractInterpreter,
+                       opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
+    return InliningState(sv.edges, interp, opt_cache)
+end
+function InliningState(interp::AbstractInterpreter,
+                       opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
+    return InliningState(Any[], interp, opt_cache)
+end
+
+struct OptimizerCache{CodeCache}
+    cache::CodeCache
+    opt_cache::IdDict{MethodInstance,CodeInstance}
+    function OptimizerCache(
+        cache::CodeCache,
+        opt_cache::IdDict{MethodInstance,CodeInstance}) where CodeCache
+        return new{CodeCache}(cache, opt_cache)
+    end
+end
+function get((; cache, opt_cache)::OptimizerCache, mi::MethodInstance, default)
+    if haskey(opt_cache, mi)
+        return opt_cache[mi] # this is incomplete right now, but will be finished (by finish_cycle) before caching anything
+    end
+    return get(cache, mi, default)
+end
+
+# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
+function code_cache(state::InliningState)
+    cache = code_cache(state.interp)
+    return OptimizerCache(cache, state.opt_cache)
+end
+
+mutable struct OptimizationResult
+    ir::IRCode
+    inline_flag::UInt8
+    simplified::Bool # indicates whether the IR was processed with `cfg_simplify!`
+end
+
+function simplify_ir!(result::OptimizationResult)
+    result.ir = cfg_simplify!(result.ir)
+    result.simplified = true
+end
+
+mutable struct OptimizationState{Interp<:AbstractInterpreter}
+    linfo::MethodInstance
+    src::CodeInfo
+    optresult::Union{Nothing, OptimizationResult}
+    stmt_info::Vector{CallInfo}
+    mod::Module
+    sptypes::Vector{VarState}
+    slottypes::Vector{Any}
+    inlining::InliningState{Interp}
+    cfg::CFG
+    unreachable::BitSet
+    bb_vartables::Vector{Union{Nothing,VarTable}}
+    insert_coverage::Bool
+end
+function OptimizationState(sv::InferenceState, interp::AbstractInterpreter,
+                           opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
+    inlining = InliningState(sv, interp, opt_cache)
+    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
+                             sv.sptypes, sv.slottypes, inlining, sv.cfg,
+                             sv.unreachable, sv.bb_vartables, sv.insert_coverage)
+end
+function OptimizationState(mi::MethodInstance, src::CodeInfo, interp::AbstractInterpreter,
+                           opt_cache::IdDict{MethodInstance,CodeInstance}=IdDict{MethodInstance,CodeInstance}())
+    # prepare src for running optimization passes if it isn't already
+    nssavalues = src.ssavaluetypes
+    if nssavalues isa Int
+        src.ssavaluetypes = Any[ Any for _ = 1:nssavalues ]
+    else
+        nssavalues = length(src.ssavaluetypes::Vector{Any})
+    end
+    sptypes = sptypes_from_meth_instance(mi)
+    nslots = length(src.slotflags)
+    slottypes = src.slottypes
+    if slottypes === nothing
+        slottypes = Any[ Any for _ = 1:nslots ]
+    end
+    stmt_info = CallInfo[ NoCallInfo() for _ = 1:nssavalues ]
+    # cache some useful state computations
+    def = mi.def
+    mod = isa(def, Method) ? def.module : def
+    # Allow using the global MI cache, but don't track edges.
+    # This method is mostly used for unit testing the optimizer
+    inlining = InliningState(interp, opt_cache)
+    cfg = compute_basic_blocks(src.code)
+    unreachable = BitSet()
+    bb_vartables = Union{VarTable,Nothing}[]
+    for _ = 1:length(cfg.blocks)
+        push!(bb_vartables, VarState[
+            VarState(slottypes[slot], src.slotflags[slot] & SLOT_USEDUNDEF != 0)
+            for slot = 1:nslots
+        ])
+    end
+    return OptimizationState(mi, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, cfg, unreachable, bb_vartables, false)
+end
+function OptimizationState(mi::MethodInstance, interp::AbstractInterpreter)
+    world = get_inference_world(interp)
+    src = retrieve_code_info(mi, world)
+    src === nothing && return nothing
+    return OptimizationState(mi, src, interp)
+end
+
+function argextype end # imported by EscapeAnalysis
+function try_compute_field end # imported by EscapeAnalysis
+
+include("ssair/heap.jl")
+include("ssair/slot2ssa.jl")
+include("ssair/inlining.jl")
+include("ssair/verify.jl")
+include("ssair/legacy.jl")
+include("ssair/EscapeAnalysis.jl")
+include("ssair/passes.jl")
+include("ssair/irinterp.jl")
+
+function ir_to_codeinf!(opt::OptimizationState, frame::InferenceState, edges::SimpleVector)
+    ir_to_codeinf!(opt, edges, compute_inlining_cost(frame.interp, frame.result, opt.optresult))
+end
+
+function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector, inlining_cost::InlineCostType)
+    src = ir_to_codeinf!(opt, edges)
+    src.inlining_cost = inlining_cost
+    src
+end
+
+function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector)
+    src = ir_to_codeinf!(opt)
+    src.edges = edges
+    src
+end
+
+function ir_to_codeinf!(opt::OptimizationState)
+    (; linfo, src, optresult) = opt
+    if optresult === nothing
+        return src
+    end
+    src = ir_to_codeinf!(src, optresult.ir)
+    opt.optresult = nothing
+    opt.src = src
+    maybe_validate_code(linfo, src, "optimized")
+    return src
+end
+
+function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
+    replace_code_newstyle!(src, ir)
+    widen_all_consts!(src)
+    return src
+end
+
+# widen all Const elements in type annotations
+function widen_all_consts!(src::CodeInfo)
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for i = 1:length(ssavaluetypes)
+        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
+    end
+
+    for i = 1:length(src.code)
+        x = src.code[i]
+        if isa(x, PiNode)
+            src.code[i] = PiNode(x.val, widenconst(x.typ))
+        end
+    end
+
+    return src
+end
+
+#########
+# logic #
+#########
+
+_topmod(sv::OptimizationState) = _topmod(sv.mod)
+
+is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
+is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
+
+function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
+    Targ = args[1]
+    atyp = argextype(Targ, src)
+    # `Expr(:new)` of unknown type could raise arbitrary TypeError.
+    typ, isexact = instanceof_tfunc(atyp, true)
+    if !isexact
+        atyp = unwrap_unionall(widenconst(atyp))
+        if isType(atyp) && isTypeDataType(atyp.parameters[1])
+            typ = atyp.parameters[1]
+        else
+            return (false, false, false)
+        end
+        isabstracttype(typ) && return (false, false, false)
+    else
+        isconcretedispatch(typ) || return (false, false, false)
+    end
+    typ = typ::DataType
+    fcount = datatype_fieldcount(typ)
+    fcount === nothing && return (false, false, false)
+    fcount >= length(args) - 1 || return (false, false, false)
+    for fidx in 1:(length(args) - 1)
+        farg = args[fidx + 1]
+        eT = argextype(farg, src)
+        fT = fieldtype(typ, fidx)
+        if !isexact && has_free_typevars(fT)
+            if pattern_match !== nothing && pattern_match(src, typ, fidx, Targ, farg)
+                continue
+            end
+            return (false, false, false)
+        end
+        ⊑(𝕃ₒ, eT, fT) || return (false, false, false)
+    end
+    return (false, true, true)
+end
+
+# Returns a tuple of `(:consistent, :removable, :nothrow)` flags for a given statement.
+function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
+    # TODO: We're duplicating analysis from inference here.
+    isa(stmt, PiNode) && return (true, true, true)
+    isa(stmt, PhiNode) && return (true, true, true)
+    isa(stmt, ReturnNode) && return (true, false, true)
+    isa(stmt, EnterNode) && return (true, false, true)
+    isa(stmt, GotoNode) && return (true, false, true)
+    isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
+    if isa(stmt, GlobalRef)
+        # Modeled more precisely in abstract_eval_globalref. In general, if a
+        # GlobalRef was moved to statement position, it is probably not `const`,
+        # so we can't say much about it anyway.
+        return (false, false, false)
+    elseif isa(stmt, Expr)
+        (; head, args) = stmt
+        if head === :static_parameter
+            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
+            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
+            nothrow = !sptypes[args[1]::Int].undef
+            return (true, nothrow, nothrow)
+        end
+        if head === :call
+            f = argextype(args[1], src)
+            f = singleton_type(f)
+            f === nothing && return (false, false, false)
+            if f === Intrinsics.cglobal || f === Intrinsics.llvmcall
+                # TODO: these are not yet linearized
+                return (false, false, false)
+            end
+            isa(f, Builtin) || return (false, false, false)
+            # Needs to be handled in inlining to look at the callee effects
+            f === Core._apply_iterate && return (false, false, false)
+            argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
+            effects = builtin_effects(𝕃ₒ, f, argtypes, rt)
+            consistent = is_consistent(effects)
+            effect_free = is_effect_free(effects)
+            nothrow = is_nothrow(effects)
+            terminates = is_terminates(effects)
+            removable = effect_free & nothrow & terminates
+            return (consistent, removable, nothrow)
+        elseif head === :new
+            return new_expr_effect_flags(𝕃ₒ, args, src)
+        elseif head === :foreigncall
+            effects = foreigncall_effects(stmt) do @nospecialize x
+                argextype(x, src)
+            end
+            consistent = is_consistent(effects)
+            effect_free = is_effect_free(effects)
+            nothrow = is_nothrow(effects)
+            terminates = is_terminates(effects)
+            removable = effect_free & nothrow & terminates
+            return (consistent, removable, nothrow)
+        elseif head === :new_opaque_closure
+            length(args) < 4 && return (false, false, false)
+            typ = argextype(args[1], src)
+            typ, isexact = instanceof_tfunc(typ, true)
+            isexact || return (false, false, false)
+            ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
+            rt_lb = argextype(args[2], src)
+            rt_ub = argextype(args[3], src)
+            source = argextype(args[5], src)
+            if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
+                return (false, false, false)
+            end
+            return (false, true, true)
+        elseif head === :inbounds
+            return (true, true, true)
+        elseif head === :boundscheck || head === :isdefined || head === :the_exception || head === :copyast
+            return (false, true, true)
+        else
+            # e.g. :loopinfo
+            return (false, false, false)
+        end
+    end
+    isa(stmt, SlotNumber) && error("unexpected IR elements")
+    return (true, true, true)
+end
+
+function recompute_effects_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt),
+                                 src::Union{IRCode,IncrementalCompact})
+    flag = IR_FLAG_NULL
+    (consistent, removable, nothrow) = stmt_effect_flags(𝕃ₒ, stmt, rt, src)
+    if consistent
+        flag |= IR_FLAG_CONSISTENT
+    end
+    if removable
+        flag |= IR_FLAGS_REMOVABLE
+    elseif nothrow
+        flag |= IR_FLAG_NOTHROW
+    end
+    if !iscallstmt(stmt)
+        # There is a bit of a subtle point here, which is that some non-call
+        # statements (e.g. PiNode) can be UB:, however, we consider it
+        # illegal to introduce such statements that actually cause UB (for any
+        # input). Ideally that'd be handled at insertion time (TODO), but for
+        # the time being just do that here.
+        flag |= IR_FLAG_NOUB
+    end
+    return flag
+end
+
+"""
+    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
+    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
+
+Return the type of value `x` in the context of inferred source `src`.
+Note that `t` might be an extended lattice element.
+Use `widenconst(t)` to get the native Julia type of `x`.
+"""
+argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
+    argextype(x, ir, sptypes, ir.argtypes)
+function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
+    isa(x, AnySSAValue) && return types(compact)[x]
+    return argextype(x, compact, sptypes, compact.ir.argtypes)
+end
+function argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState})
+    return argextype(x, src, sptypes, src.slottypes::Union{Vector{Any},Nothing})
+end
+function argextype(
+    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
+    sptypes::Vector{VarState}, slottypes::Union{Vector{Any},Nothing})
+    if isa(x, Expr)
+        if x.head === :static_parameter
+            idx = x.args[1]::Int
+            (1 ≤ idx ≤ length(sptypes)) || throw(InvalidIRError())
+            return sptypes[idx].typ
+        elseif x.head === :boundscheck
+            return Bool
+        elseif x.head === :copyast
+            length(x.args) == 0 && throw(InvalidIRError())
+            return argextype(x.args[1], src, sptypes, slottypes)
+        end
+        Core.println("argextype called on Expr with head ", x.head,
+                     " which is not valid for IR in argument-position.")
+        @assert false
+    elseif isa(x, SlotNumber)
+        slottypes === nothing && return Any
+        (1 ≤ x.id ≤ length(slottypes)) || throw(InvalidIRError())
+        return slottypes[x.id]
+    elseif isa(x, SSAValue)
+        return abstract_eval_ssavalue(x, src)
+    elseif isa(x, Argument)
+        slottypes === nothing && return Any
+        (1 ≤ x.n ≤ length(slottypes)) || throw(InvalidIRError())
+        return slottypes[x.n]
+    elseif isa(x, QuoteNode)
+        return Const(x.value)
+    elseif isa(x, GlobalRef)
+        return abstract_eval_globalref_type(x, src)
+    elseif isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode)
+        return Any
+    elseif isa(x, PiNode)
+        return x.typ
+    else
+        return Const(x)
+    end
+end
+function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo)
+    ssavaluetypes = src.ssavaluetypes
+    if ssavaluetypes isa Int
+        (1 ≤ s.id ≤ ssavaluetypes) || throw(InvalidIRError())
+        return Any
+    else
+        return abstract_eval_ssavalue(s, ssavaluetypes::Vector{Any})
+    end
+end
+abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
+
+"""
+    finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
+
+Called at the end of optimization to store the resulting IR back into the OptimizationState.
+"""
+function finishopt!(::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
+    opt.optresult = OptimizationResult(ir, ccall(:jl_ir_flag_inlining, UInt8, (Any,), opt.src), false)
+    return nothing
+end
+
+function visit_bb_phis!(callback, ir::IRCode, bb::Int)
+    stmts = ir.cfg.blocks[bb].stmts
+    for idx in stmts
+        stmt = ir[SSAValue(idx)][:stmt]
+        if !isa(stmt, PhiNode)
+            if !is_valid_phiblock_stmt(stmt)
+                return
+            end
+        else
+            callback(idx)
+        end
+    end
+end
+
+function any_stmt_may_throw(ir::IRCode, bb::Int)
+    for idx in ir.cfg.blocks[bb].stmts
+        if !has_flag(ir[SSAValue(idx)], IR_FLAG_NOTHROW)
+            return true
+        end
+    end
+    return false
+end
+
+visit_conditional_successors(callback, ir::IRCode, bb::Int) = # used for test
+    visit_conditional_successors(callback, LazyPostDomtree(ir), ir, bb)
+function visit_conditional_successors(callback, lazypostdomtree::LazyPostDomtree, ir::IRCode, bb::Int)
+    visited = BitSet((bb,))
+    worklist = Int[bb]
+    while !isempty(worklist)
+        thisbb = popfirst!(worklist)
+        for succ in ir.cfg.blocks[thisbb].succs
+            succ in visited && continue
+            push!(visited, succ)
+            if postdominates(get!(lazypostdomtree), succ, bb)
+                # this successor is not conditional, so no need to visit it further
+                continue
+            elseif callback(succ)
+                return true
+            else
+                push!(worklist, succ)
+            end
+        end
+    end
+    return false
+end
+
+struct AugmentedDomtree
+    cfg::CFG
+    domtree::DomTree
+end
+
+mutable struct LazyAugmentedDomtree
+    const ir::IRCode
+    agdomtree::AugmentedDomtree
+    LazyAugmentedDomtree(ir::IRCode) = new(ir)
+end
+
+function get!(lazyagdomtree::LazyAugmentedDomtree)
+    isdefined(lazyagdomtree, :agdomtree) && return lazyagdomtree.agdomtree
+    ir = lazyagdomtree.ir
+    cfg = copy(ir.cfg)
+    # Add a virtual basic block to represent the exit
+    push!(cfg.blocks, BasicBlock(StmtRange(0:-1)))
+    for bb = 1:(length(cfg.blocks)-1)
+        terminator = ir[SSAValue(last(cfg.blocks[bb].stmts))][:stmt]
+        if isa(terminator, ReturnNode) && isdefined(terminator, :val)
+            cfg_insert_edge!(cfg, bb, length(cfg.blocks))
+        end
+    end
+    domtree = construct_domtree(cfg)
+    return lazyagdomtree.agdomtree = AugmentedDomtree(cfg, domtree)
+end
+
+mutable struct PostOptAnalysisState
+    const result::InferenceResult
+    const ir::IRCode
+    const inconsistent::BitSetBoundedMinPrioritySet
+    const tpdum::TwoPhaseDefUseMap
+    const lazypostdomtree::LazyPostDomtree
+    const lazyagdomtree::LazyAugmentedDomtree
+    const ea_analysis_pending::Vector{Int}
+    all_retpaths_consistent::Bool
+    all_effect_free::Bool
+    effect_free_if_argmem_only::Union{Nothing,Bool}
+    all_nothrow::Bool
+    all_noub::Bool
+    any_conditional_ub::Bool
+    nortcall::Bool
+    function PostOptAnalysisState(result::InferenceResult, ir::IRCode)
+        inconsistent = BitSetBoundedMinPrioritySet(length(ir.stmts))
+        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
+        lazypostdomtree = LazyPostDomtree(ir)
+        lazyagdomtree = LazyAugmentedDomtree(ir)
+        return new(result, ir, inconsistent, tpdum, lazypostdomtree, lazyagdomtree, Int[],
+                   true, true, nothing, true, true, false, true)
+    end
+end
+
+give_up_refinements!(sv::PostOptAnalysisState) =
+    sv.all_retpaths_consistent = sv.all_effect_free = sv.effect_free_if_argmem_only =
+    sv.all_nothrow = sv.all_noub = sv.nortcall = false
+
+function any_refinable(sv::PostOptAnalysisState)
+    effects = sv.result.ipo_effects
+    return ((!is_consistent(effects) & sv.all_retpaths_consistent) |
+            (!is_effect_free(effects) & sv.all_effect_free) |
+            (!is_nothrow(effects) & sv.all_nothrow) |
+            (!is_noub(effects) & sv.all_noub) |
+            (!is_nortcall(effects) & sv.nortcall))
+end
+
+struct GetNativeEscapeCache{CodeCache}
+    code_cache::CodeCache
+    GetNativeEscapeCache(code_cache::CodeCache) where CodeCache = new{CodeCache}(code_cache)
+end
+GetNativeEscapeCache(interp::AbstractInterpreter) = GetNativeEscapeCache(code_cache(interp))
+function ((; code_cache)::GetNativeEscapeCache)(codeinst::Union{CodeInstance,MethodInstance})
+    if codeinst isa MethodInstance
+        codeinst = get(code_cache, codeinst, nothing)
+        codeinst === nothing && return false
+    end
+    argescapes = traverse_analysis_results(codeinst) do @nospecialize result
+        return result isa EscapeAnalysis.ArgEscapeCache ? result : nothing
+    end
+    if argescapes !== nothing
+        return argescapes
+    end
+    effects = codeinst isa CodeInstance ? decode_effects(codeinst.ipo_purity_bits) : codeinst.ipo_effects
+    if is_effect_free(effects) && is_inaccessiblememonly(effects)
+        # We might not have run EA on simple frames without any escapes (e.g. when optimization
+        # is skipped when result is constant-folded by abstract interpretation). If those
+        # frames aren't inlined, the accuracy of EA for caller context takes a big hit.
+        # This is a HACK to avoid that, but obviously, a more comprehensive fix would be ideal.
+        return true
+    end
+    return false
+end
+
+function refine_effects!(interp::AbstractInterpreter, opt::OptimizationState, sv::PostOptAnalysisState)
+    if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
+        ir = sv.ir
+        nargs = Int(opt.src.nargs)
+        estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), get_escape_cache(interp))
+        argescapes = EscapeAnalysis.ArgEscapeCache(estate)
+        stack_analysis_result!(sv.result, argescapes)
+        validate_mutable_arg_escapes!(estate, sv)
+    end
+
+    any_refinable(sv) || return false
+    effects = sv.result.ipo_effects
+    sv.result.ipo_effects = Effects(effects;
+        consistent = sv.all_retpaths_consistent ? ALWAYS_TRUE : effects.consistent,
+        effect_free = sv.all_effect_free ? ALWAYS_TRUE :
+                      sv.effect_free_if_argmem_only === true ? EFFECT_FREE_IF_INACCESSIBLEMEMONLY : effects.effect_free,
+        nothrow = sv.all_nothrow ? true : effects.nothrow,
+        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub,
+        nortcall = sv.nortcall ? true : effects.nortcall)
+    return true
+end
+
+function is_ipo_dataflow_analysis_profitable(effects::Effects)
+    return !(is_consistent(effects) && is_effect_free(effects) &&
+             is_nothrow(effects) && is_noub(effects))
+end
+
+function iscall_with_boundscheck(@nospecialize(stmt), sv::PostOptAnalysisState)
+    isexpr(stmt, :call) || return false
+    ft = argextype(stmt.args[1], sv.ir)
+    f = singleton_type(ft)
+    f === nothing && return false
+    if f === getfield
+        nargs = 4
+    elseif f === memoryrefnew
+        nargs= 3
+    elseif f === memoryrefget || f === memoryref_isassigned
+        nargs = 4
+    elseif f === memoryrefset!
+        nargs = 5
+    else
+        return false
+    end
+    length(stmt.args) < nargs && return false
+    boundscheck = stmt.args[end]
+    argextype(boundscheck, sv.ir) === Bool || return false
+    isa(boundscheck, SSAValue) || return false
+    return true
+end
+
+function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospecialize(stmt),
+                                  estate::EscapeAnalysis.EscapeState)
+    stmt isa Expr || return false
+    if isexpr(stmt, :invoke)
+        startidx = 2
+    elseif isexpr(stmt, :new)
+        startidx = 1
+    else
+        return false
+    end
+    has_no_escape(x::EscapeAnalysis.EscapeInfo) =
+        EscapeAnalysis.has_no_escape(EscapeAnalysis.ignore_argescape(x))
+    for i = startidx:length(stmt.args)
+        arg = stmt.args[i]
+        argt = argextype(arg, ir)
+        if is_mutation_free_argtype(argt)
+            continue
+        end
+        # See if we can find the allocation
+        if isa(arg, Argument)
+            if has_no_escape(estate[arg])
+                # Even if we prove everything else effect_free, the best we can
+                # say is :effect_free_if_argmem_only
+                if sv.effect_free_if_argmem_only === nothing
+                    sv.effect_free_if_argmem_only = true
+                end
+            else
+                sv.effect_free_if_argmem_only = false
+            end
+            return false
+        elseif isa(arg, SSAValue)
+            has_no_escape(estate[arg]) || return false
+            check_all_args_noescape!(sv, ir, ir[arg][:stmt], estate) || return false
+        else
+            return false
+        end
+    end
+    return true
+end
+
+function validate_mutable_arg_escapes!(estate::EscapeAnalysis.EscapeState, sv::PostOptAnalysisState)
+    ir = sv.ir
+    for idx in sv.ea_analysis_pending
+        # See if any mutable memory was allocated in this function and determined
+        # not to escape.
+        inst = ir[SSAValue(idx)]
+        stmt = inst[:stmt]
+        if !check_all_args_noescape!(sv, ir, stmt, estate)
+            return sv.all_effect_free = false
+        end
+    end
+    return true
+end
+
+function is_conditional_noub(inst::Instruction, sv::PostOptAnalysisState)
+    stmt = inst[:stmt]
+    iscall_with_boundscheck(stmt, sv) || return false
+    barg = stmt.args[end]::SSAValue
+    bstmt = sv.ir[barg][:stmt]
+    isexpr(bstmt, :boundscheck) || return false
+    # If IR_FLAG_INBOUNDS is already set, no more conditional ub
+    (!isempty(bstmt.args) && bstmt.args[1] === false) && return false
+    return true
+end
+
+function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState)
+    flag = inst[:flag]
+    # If we can prove that the argmem does not escape the current function, we can
+    # refine this to :effect_free.
+    needs_ea_validation = has_flag(flag, IR_FLAGS_NEEDS_EA)
+    stmt = inst[:stmt]
+    if !needs_ea_validation
+        if !isterminator(stmt) && stmt !== nothing
+            # ignore control flow node – they are not removable on their own and thus not
+            # have `IR_FLAG_EFFECT_FREE` but still do not taint `:effect_free`-ness of
+            # the whole method invocation
+            sv.all_effect_free &= has_flag(flag, IR_FLAG_EFFECT_FREE)
+        end
+    elseif sv.all_effect_free
+        if (isexpr(stmt, :invoke) || isexpr(stmt, :new) ||
+            # HACK for performance: limit the scope of EA to code with object field access only,
+            # since its abilities to reason about e.g. arrays are currently very limited anyways.
+            is_known_call(stmt, setfield!, sv.ir))
+            push!(sv.ea_analysis_pending, inst.idx)
+        else
+            sv.all_effect_free = false
+        end
+    end
+    sv.all_nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
+    if !has_flag(flag, IR_FLAG_NOUB)
+        # Special case: `:boundscheck` into `getfield` or memory operations is `:noub_if_noinbounds`
+        if is_conditional_noub(inst, sv)
+            sv.any_conditional_ub = true
+        else
+            sv.all_noub = false
+        end
+    end
+    if !has_flag(flag, IR_FLAG_NORTCALL)
+        # if a function call that might invoke `Core.Compiler.return_type` has been deleted,
+        # there's no need to taint with `:nortcall`, allowing concrete evaluation
+        if iscallstmt(stmt)
+            sv.nortcall = false
+        end
+    end
+    nothing
+end
+
+function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState)
+    flag = inst[:flag]
+    stmt_inconsistent = !has_flag(flag, IR_FLAG_CONSISTENT)
+    stmt = inst[:stmt]
+    # Special case: For `getfield` and memory operations, we allow inconsistency of the :boundscheck argument
+    (; inconsistent, tpdum) = sv
+    if iscall_with_boundscheck(stmt, sv)
+        for i = 1:length(stmt.args)
+            val = stmt.args[i]
+            # SSAValue should be the only permitted argument type which can be inconsistent found here.
+            # Others (e.g. GlobalRef) should have been moved to statement position. See stmt_effect_flags.
+            if isa(val, SSAValue)
+                if i < length(stmt.args)  # not the boundscheck argument (which is last)
+                    stmt_inconsistent |= val.id in inconsistent
+                end
+                count!(tpdum, val)
+            end
+        end
+    else
+        for ur in userefs(stmt)
+            val = ur[]
+            if isa(val, SSAValue)
+                stmt_inconsistent |= val.id in inconsistent
+                count!(tpdum, val)
+            end
+        end
+    end
+    stmt_inconsistent && push!(inconsistent, inst.idx)
+    return stmt_inconsistent
+end
+
+struct ScanStmt
+    sv::PostOptAnalysisState
+end
+
+function ((; sv)::ScanStmt)(inst::Instruction, lstmt::Int, bb::Int)
+    stmt = inst[:stmt]
+
+    if isa(stmt, EnterNode)
+        # try/catch not yet modeled
+        give_up_refinements!(sv)
+        return true # don't bail out early -- can cause tpdum counts to be off
+    end
+
+    scan_non_dataflow_flags!(inst, sv)
+
+    stmt_inconsistent = scan_inconsistency!(inst, sv)
+
+    if stmt_inconsistent
+        if !has_flag(inst[:flag], IR_FLAG_NOTHROW)
+            # Taint :consistent if this statement may raise since :consistent requires
+            # consistent termination. TODO: Separate :consistent_return and :consistent_termination from :consistent.
+            sv.all_retpaths_consistent = false
+        end
+        if inst.idx == lstmt
+            if isa(stmt, ReturnNode) && isdefined(stmt, :val)
+                sv.all_retpaths_consistent = false
+            elseif isa(stmt, GotoIfNot)
+                # Conditional Branch with inconsistent condition.
+                # If we do not know this function terminates, taint consistency, now,
+                # :consistent requires consistent termination. TODO: Just look at the
+                # inconsistent region.
+                if !sv.result.ipo_effects.terminates
+                    sv.all_retpaths_consistent = false
+                elseif visit_conditional_successors(sv.lazypostdomtree, sv.ir, bb) do succ::Int
+                        return any_stmt_may_throw(sv.ir, succ)
+                    end
+                    # check if this `GotoIfNot` leads to conditional throws, which taints consistency
+                    sv.all_retpaths_consistent = false
+                else
+                    (; cfg, domtree) = get!(sv.lazyagdomtree)
+                    for succ in iterated_dominance_frontier(cfg, BlockLiveness(sv.ir.cfg.blocks[bb].succs, nothing), domtree)
+                        if succ == length(cfg.blocks)
+                            # Phi node in the virtual exit -> We have a conditional
+                            # return. TODO: Check if all the retvals are egal.
+                            sv.all_retpaths_consistent = false
+                        else
+                            visit_bb_phis!(sv.ir, succ) do phiidx::Int
+                                push!(sv.inconsistent, phiidx)
+                            end
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    # Do not bail out early, as this can cause tpdum counts to be off.
+    # # bail out early if there are no possibilities to refine the effects
+    # if !any_refinable(sv)
+    #     return nothing
+    # end
+
+    return true
+end
+
+function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner)
+    (; ir, inconsistent, tpdum) = sv
+
+    sv.all_retpaths_consistent || return
+    scan!(ScanStmt(sv), scanner, false)
+    sv.all_retpaths_consistent || return
+    complete!(tpdum); push!(scanner.bb_ip, 1)
+    populate_def_use_map!(tpdum, scanner)
+
+    stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
+    for def in inconsistent
+        append!(stmt_ip, tpdum[def])
+   end
+    lazydomtree = LazyDomtree(ir)
+    while !isempty(stmt_ip)
+        idx = popfirst!(stmt_ip)
+        idx in inconsistent && continue # already processed
+        inst = ir[SSAValue(idx)]
+        stmt = inst[:stmt]
+        if iscall_with_boundscheck(stmt, sv)
+            # recompute inconsistent flags for call while skipping boundscheck (last) argument
+            any_non_boundscheck_inconsistent = false
+            for i = 1:(length(stmt.args)-1)
+                val = stmt.args[i]
+                if isa(val, SSAValue)
+                    any_non_boundscheck_inconsistent |= val.id in inconsistent
+                    any_non_boundscheck_inconsistent && break
+                end
+            end
+            any_non_boundscheck_inconsistent || continue
+        elseif isa(stmt, ReturnNode)
+            sv.all_retpaths_consistent = false
+            return
+        elseif isa(stmt, GotoIfNot)
+            bb = block_for_inst(ir, idx)
+            cfg = ir.cfg
+            blockliveness = BlockLiveness(cfg.blocks[bb].succs, nothing)
+            for succ in iterated_dominance_frontier(cfg, blockliveness, get!(lazydomtree))
+                visit_bb_phis!(ir, succ) do phiidx::Int
+                    phiidx in inconsistent || push!(stmt_ip, phiidx)
+                end
+            end
+        end
+        push!(inconsistent, idx)
+        append!(stmt_ip, tpdum[idx])
+    end
+end
+
+function ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
+                                ir::IRCode, result::InferenceResult)
+    if !is_ipo_dataflow_analysis_profitable(result.ipo_effects)
+        return false
+    end
+
+    @assert isempty(ir.new_nodes) "IRCode should be compacted before post-opt analysis"
+
+    sv = PostOptAnalysisState(result, ir)
+    scanner = BBScanner(ir)
+
+    completed_scan = scan!(ScanStmt(sv), scanner, true)
+
+    if !completed_scan
+        # finish scanning for all_retpaths_consistent computation
+        check_inconsistentcy!(sv, scanner)
+        if !sv.all_retpaths_consistent
+            # No longer any dataflow concerns, just scan the flags
+            scan!(scanner, false) do inst::Instruction, ::Int, ::Int
+                scan_non_dataflow_flags!(inst, sv)
+                # bail out early if there are no possibilities to refine the effects
+                if !any_refinable(sv)
+                    return nothing
+                end
+                return true
+            end
+        end
+    end
+
+    return refine_effects!(interp, opt, sv)
+end
+
+# run the optimization work
+function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
+    @zone "CC: OPTIMIZER" ir = run_passes_ipo_safe(opt.src, opt)
+    ipo_dataflow_analysis!(interp, opt, ir, caller)
+    finishopt!(interp, opt, ir)
+    return nothing
+end
+
+const ALL_PASS_NAMES = String[]
+macro pass(name::String, expr)
+    optimize_until = esc(:optimize_until)
+    stage = esc(:__stage__)
+    macrocall = :(@zone $name $(esc(expr)))
+    macrocall.args[2] = __source__  # `@timeit` may want to use it
+    push!(ALL_PASS_NAMES, name)
+    quote
+        $macrocall
+        matchpass($optimize_until, ($stage += 1), $name) && $(esc(:(@goto __done__)))
+    end
+end
+
+matchpass(optimize_until::Int, stage, _) = optimize_until == stage
+matchpass(optimize_until::String, _, name) = optimize_until == name
+matchpass(::Nothing, _, _) = false
+
+function run_passes_ipo_safe(
+    ci::CodeInfo,
+    sv::OptimizationState,
+    optimize_until::Union{Nothing, Int, String} = nothing)  # run all passes by default
+    if optimize_until isa String && !contains_is(ALL_PASS_NAMES, optimize_until)
+        error("invalid `optimize_until` argument, no such optimization pass")
+    elseif optimize_until isa Int && (optimize_until < 1 || optimize_until > length(ALL_PASS_NAMES))
+        error("invalid `optimize_until` argument, no such optimization pass")
+    end
+
+    __stage__ = 0  # used by @pass
+    # NOTE: The pass name MUST be unique for `optimize_until::String` to work
+    @pass "CC: CONVERT"   ir = convert_to_ircode(ci, sv)
+    @pass "CC: SLOT2REG"  ir = slot2reg(ir, ci, sv)
+    # TODO: Domsorting can produce an updated domtree - no need to recompute here
+    @pass "CC: COMPACT_1" ir = compact!(ir)
+    @pass "CC: INLINING"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
+    # @zone "CC: VERIFY 2" verify_ir(ir)
+    @pass "CC: COMPACT_2" ir = compact!(ir)
+    @pass "CC: SROA"      ir = sroa_pass!(ir, sv.inlining)
+    @pass "CC: ADCE"      (ir, made_changes) = adce_pass!(ir, sv.inlining)
+    if made_changes
+        @pass "CC: COMPACT_3" ir = compact!(ir, true)
+    end
+    if is_asserts()
+        @zone "CC: VERIFY_3" begin
+            verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp), sv.linfo)
+            verify_linetable(ir.debuginfo, length(ir.stmts))
+        end
+    end
+    @label __done__  # used by @pass
+    return ir
+end
+
+function strip_trailing_junk!(code::Vector{Any}, ssavaluetypes::Vector{Any}, ssaflags::Vector, debuginfo::DebugInfoStream, cfg::CFG, info::Vector{CallInfo})
+    # Remove `nothing`s at the end, we don't handle them well
+    # (we expect the last instruction to be a terminator)
+    codelocs = debuginfo.codelocs
+    for i = length(code):-1:1
+        if code[i] !== nothing
+            resize!(code, i)
+            resize!(ssavaluetypes, i)
+            resize!(codelocs, 3i)
+            resize!(info, i)
+            resize!(ssaflags, i)
+            break
+        end
+    end
+    # If the last instruction is not a terminator, add one. This can
+    # happen for implicit return on dead branches.
+    term = code[end]
+    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
+        push!(code, ReturnNode())
+        push!(ssavaluetypes, Union{})
+        push!(codelocs, 0, 0, 0)
+        push!(info, NoCallInfo())
+        push!(ssaflags, IR_FLAG_NOTHROW)
+
+        # Update CFG to include appended terminator
+        old_range = cfg.blocks[end].stmts
+        new_range = StmtRange(first(old_range), last(old_range) + 1)
+        cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
+        (length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
+    end
+    nothing
+end
+
+function changed_lineinfo(di::DebugInfo, codeloc::Int, prevloc::Int)
+    while true
+        next = getdebugidx(di, codeloc)
+        line = next[1]
+        line < 0 && return false # invalid info
+        line == 0 && next[2] == 0 && return false # no new info
+        prevloc <= 0 && return true # no old info
+        prev = getdebugidx(di, prevloc)
+        next === prev && return false # exactly identical
+        prevline = prev[1]
+        prevline < 0 && return true # previous invalid info, now valid
+        edge = next[2]
+        edge === prev[2] || return true # change to this edge
+        linetable = di.linetable
+        # check for change to line number here
+        if linetable === nothing || line == 0
+            line == prevline || return true
+        else
+            changed_lineinfo(linetable::DebugInfo, Int(line), Int(prevline)) && return true
+        end
+        # check for change to edge here
+        edge == 0 && return false # no edge here
+        di = di.edges[Int(edge)]::DebugInfo
+        codeloc = Int(next[3])
+        prevloc = Int(prev[3])
+    end
+end
+
+function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
+    # Update control-flow to reflect any unreachable branches.
+    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
+    ci.code = code = copy_exprargs(ci.code)
+    di = DebugInfoStream(sv.linfo, ci.debuginfo, length(code))
+    codelocs = di.codelocs
+    ssaflags = ci.ssaflags
+    for i = 1:length(code)
+        expr = code[i]
+        if !(i in sv.unreachable)
+            if isa(expr, GotoIfNot)
+                # Replace this live GotoIfNot with:
+                # - no-op if :nothrow and the branch target is unreachable
+                # - cond if :nothrow and both targets are unreachable
+                # - typeassert if must-throw
+                block = block_for_inst(sv.cfg, i)
+                if ssavaluetypes[i] === Bottom
+                    destblock = block_for_inst(sv.cfg, expr.dest)
+                    cfg_delete_edge!(sv.cfg, block, block + 1)
+                    ((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
+                    expr = Expr(:call, Core.typeassert, expr.cond, Bool)
+                elseif i + 1 in sv.unreachable
+                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
+                    cfg_delete_edge!(sv.cfg, block, block + 1)
+                    expr = GotoNode(expr.dest)
+                elseif expr.dest in sv.unreachable
+                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
+                    cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
+                    expr = nothing
+                end
+                code[i] = expr
+            elseif isa(expr, EnterNode)
+                catchdest = expr.catch_dest
+                if catchdest in sv.unreachable
+                    cfg_delete_edge!(sv.cfg, block_for_inst(sv.cfg, i), block_for_inst(sv.cfg, catchdest))
+                    if isdefined(expr, :scope)
+                        # We've proven that nothing inside the enter region throws,
+                        # but we don't yet know whether something might read the scope,
+                        # so we need to retain this enter for the time being. However,
+                        # we use the special marker `0` to indicate that setting up
+                        # the try/catch frame is not required.
+                        code[i] = EnterNode(expr, 0)
+                    else
+                        code[i] = nothing
+                    end
+                end
+            elseif isa(expr, PhiNode)
+                new_edges = Int32[]
+                new_vals = Any[]
+                for j = 1:length(expr.edges)
+                    edge = expr.edges[j]
+                    (edge in sv.unreachable || (ssavaluetypes[edge] === Union{} && !isa(code[edge], PhiNode))) && continue
+                    push!(new_edges, edge)
+                    if isassigned(expr.values, j)
+                        push!(new_vals, expr.values[j])
+                    else
+                        resize!(new_vals, length(new_edges))
+                    end
+                end
+                code[i] = PhiNode(new_edges, new_vals)
+            end
+        end
+    end
+
+    # Go through and add an unreachable node after every
+    # Union{} call. Then reindex labels.
+    stmtinfo = sv.stmt_info
+    meta = Expr[]
+    idx = 1
+    oldidx = 1
+    nstmts = length(code)
+    ssachangemap = labelchangemap = blockchangemap = nothing
+    prevloc = 0
+    while idx <= length(code)
+        if sv.insert_coverage && changed_lineinfo(ci.debuginfo, oldidx, prevloc)
+            # insert a side-effect instruction before the current instruction in the same basic block
+            insert!(code, idx, Expr(:code_coverage_effect))
+            splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
+            insert!(ssavaluetypes, idx, Nothing)
+            insert!(stmtinfo, idx, NoCallInfo())
+            insert!(ssaflags, idx, IR_FLAG_NULL)
+            if ssachangemap === nothing
+                ssachangemap = fill(0, nstmts)
+            end
+            if labelchangemap === nothing
+                labelchangemap = fill(0, nstmts)
+            end
+            ssachangemap[oldidx] += 1
+            if oldidx < length(labelchangemap)
+                labelchangemap[oldidx + 1] += 1
+            end
+            if blockchangemap === nothing
+                blockchangemap = fill(0, length(sv.cfg.blocks))
+            end
+            blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
+            idx += 1
+            prevloc = oldidx
+        end
+        if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable) && !isa(code[idx], PhiNode)
+            # We should have converted any must-throw terminators to an equivalent w/o control-flow edges
+            @assert !isterminator(code[idx])
+
+            block = block_for_inst(sv.cfg, oldidx)
+            block_end = last(sv.cfg.blocks[block].stmts) + (idx - oldidx)
+
+            # Delete all successors to this basic block
+            for succ in sv.cfg.blocks[block].succs
+                preds = sv.cfg.blocks[succ].preds
+                deleteat!(preds, findfirst(x::Int->x==block, preds)::Int)
+            end
+            empty!(sv.cfg.blocks[block].succs)
+
+            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
+                # Any statements from here to the end of the block have been wrapped in Core.Const(...)
+                # by type inference (effectively deleting them). Only task left is to replace the block
+                # terminator with an explicit `unreachable` marker.
+
+                if block_end > idx
+                    if is_asserts()
+                        # Verify that type-inference did its job
+                        for i = (oldidx + 1):last(sv.cfg.blocks[block].stmts)
+                            @assert i in sv.unreachable
+                        end
+                    end
+                    code[block_end] = ReturnNode()
+                    codelocs[3block_end-2], codelocs[3block_end-1], codelocs[3block_end-0] = (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0])
+                    ssavaluetypes[block_end] = Union{}
+                    stmtinfo[block_end] = NoCallInfo()
+                    ssaflags[block_end] = IR_FLAG_NOTHROW
+                    idx += block_end - idx
+                else
+                    insert!(code, idx + 1, ReturnNode())
+                    splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
+                    insert!(ssavaluetypes, idx + 1, Union{})
+                    insert!(stmtinfo, idx + 1, NoCallInfo())
+                    insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
+                    if ssachangemap === nothing
+                        ssachangemap = fill(0, nstmts)
+                    end
+                    if labelchangemap === nothing
+                        labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
+                    end
+                    if oldidx < length(ssachangemap)
+                        ssachangemap[oldidx + 1] += 1
+                        sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
+                    end
+                    if blockchangemap === nothing
+                        blockchangemap = fill(0, length(sv.cfg.blocks))
+                    end
+                    blockchangemap[block] += 1
+                    idx += 1
+                end
+                oldidx = last(sv.cfg.blocks[block].stmts)
+            end
+        end
+        idx += 1
+        oldidx += 1
+    end
+    empty!(sv.unreachable)
+
+    if ssachangemap !== nothing && labelchangemap !== nothing
+        renumber_ir_elements!(code, ssachangemap, labelchangemap)
+    end
+    if blockchangemap !== nothing
+        renumber_cfg_stmts!(sv.cfg, blockchangemap)
+    end
+
+    for i = 1:length(code)
+        code[i] = process_meta!(meta, code[i])
+    end
+    strip_trailing_junk!(code, ssavaluetypes, ssaflags, di, sv.cfg, stmtinfo)
+    types = Any[]
+    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
+    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
+    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
+    # and eliminates slots (see below)
+    argtypes = sv.slottypes
+    return IRCode(stmts, sv.cfg, di, argtypes, meta, sv.sptypes, world_range(ci))
+end
+
+function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
+    if isexpr(stmt, :meta) && length(stmt.args) ≥ 1
+        push!(meta, stmt)
+        return nothing
+    end
+    return stmt
+end
+
+function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
+    # need `ci` for the slot metadata, IR for the code
+    @zone "CC: DOMTREE_1" domtree = construct_domtree(ir)
+    defuse_insts = scan_slot_def_use(Int(ci.nargs), ci, ir.stmts.stmt)
+    𝕃ₒ = optimizer_lattice(sv.inlining.interp)
+    @zone "CC: CONSTRUCT_SSA" ir = construct_ssa!(ci, ir, sv, domtree, defuse_insts, 𝕃ₒ) # consumes `ir`
+    # NOTE now we have converted `ir` to the SSA form and eliminated slots
+    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
+    resize!(ir.argtypes, ci.nargs)
+    return ir
+end
+
+## Computing the cost of a function body
+
+# saturating sum (inputs are non-negative), prevents overflow with typemax(Int) below
+plus_saturate(x::Int, y::Int) = max(x, y, x+y)
+
+# known return type
+isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
+
+function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
+                        params::OptimizationParams)
+    #=const=# UNKNOWN_CALL_COST = 20
+    head = ex.head
+    if is_meta_expr_head(head)
+        return 0
+    elseif head === :call
+        farg = ex.args[1]
+        ftyp = argextype(farg, src, sptypes)
+        if ftyp === IntrinsicFunction && farg isa SSAValue
+            # if this comes from code that was already inlined into another function,
+            # Consts have been widened. try to recover in simple cases.
+            farg = isa(src, CodeInfo) ? src.code[farg.id] : src[farg][:stmt]
+            if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
+                ftyp = argextype(farg, src, sptypes)
+            end
+        end
+        f = singleton_type(ftyp)
+        if isa(f, IntrinsicFunction)
+            iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+            if isassigned(T_IFUNC, iidx)
+                minarg, maxarg, = T_IFUNC[iidx]
+                nargs = length(ex.args)
+                if minarg + 1 <= nargs <= maxarg + 1
+                    # With mostly constant arguments, all Intrinsics tend to become very cheap
+                    # and are likely to combine with the operations around them,
+                    # so reduce their cost by half.
+                    cost = T_IFUNC_COST[iidx]
+                    if cost == 0 || nargs < 3 ||
+                       (f === Intrinsics.cglobal || f === Intrinsics.llvmcall) # these hold malformed IR, so argextype will crash on them
+                        return cost
+                    end
+                    aty2 = widenconditional(argextype(ex.args[2], src, sptypes))
+                    nconst = Int(aty2 isa Const)
+                    for i = 3:nargs
+                        aty = widenconditional(argextype(ex.args[i], src, sptypes))
+                        if widenconst(aty) != widenconst(aty2)
+                            nconst = 0
+                            break
+                        end
+                        nconst += aty isa Const
+                    end
+                    if nconst + 2 >= nargs
+                        cost = (cost - 1) ÷ 2
+                    end
+                    return cost
+                end
+            end
+            # unknown/unhandled intrinsic: hopefully the caller gets a slightly better answer after the inlining
+            return UNKNOWN_CALL_COST
+        end
+        if isa(f, Builtin) && f !== invoke
+            # The efficiency of operations like a[i] and s.b
+            # depend strongly on whether the result can be
+            # inferred, so check the type of ex
+            if f === Core.getfield || f === Core.tuple || f === Core.getglobal
+                # we might like to penalize non-inferrability, but
+                # tuple iteration/destructuring makes that impossible
+                # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
+                return 0
+            elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
+                atyp = argextype(ex.args[2], src, sptypes)
+                return isknowntype(atyp) ? 1 : params.inline_nonleaf_penalty
+            elseif f === Core.memoryrefset! && length(ex.args) >= 3
+                atyp = argextype(ex.args[2], src, sptypes)
+                return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
+            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
+                return 1
+            end
+            fidx = find_tfunc(f)
+            if fidx === nothing
+                # unknown/unhandled builtin
+                # Use the generic cost of a direct function call
+                return UNKNOWN_CALL_COST
+            end
+            return T_FFUNC_COST[fidx]
+        end
+        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
+        if extyp === Union{}
+            return 0
+        end
+        return params.inline_nonleaf_penalty
+    elseif head === :foreigncall
+        foreigncall = ex.args[1]
+        if isexpr(foreigncall, :tuple, 1)
+            foreigncall = foreigncall.args[1]
+            if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
+                return 1
+            end
+        end
+        return 20
+    elseif head === :invoke || head === :invoke_modify
+        # Calls whose "return type" is Union{} do not actually return:
+        # they are errors. Since these are not part of the typical
+        # run-time of the function, we omit them from
+        # consideration. This way, non-inlined error branches do not
+        # prevent inlining.
+        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
+        return extyp === Union{} ? 0 : UNKNOWN_CALL_COST
+    elseif head === :(=)
+        return statement_cost(ex.args[2], -1, src, sptypes, params)
+    elseif head === :copyast
+        return 100
+    end
+    return 0
+end
+
+function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
+                                  params::OptimizationParams)
+    thiscost = 0
+    dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
+    if stmt isa Expr
+        thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
+    elseif stmt isa GotoNode
+        # loops are generally always expensive
+        # but assume that forward jumps are already counted for from
+        # summing the cost of the not-taken branch
+        thiscost = dst(stmt.label) < line ? 40 : 0
+    elseif stmt isa GotoIfNot
+        thiscost = dst(stmt.dest) < line ? 40 : 0
+    elseif stmt isa EnterNode
+        # try/catch is a couple function calls,
+        # but don't inline functions with try/catch
+        # since these aren't usually performance-sensitive functions,
+        # and llvm is more likely to miscompile them when these functions get large
+        thiscost = typemax(Int)
+    end
+    return thiscost
+end
+
+function inline_cost_model(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
+    bodycost = 0
+    for i = 1:length(ir.stmts)
+        stmt = ir[SSAValue(i)][:stmt]
+        thiscost = statement_or_branch_cost(stmt, i, ir, ir.sptypes, params)
+        bodycost = plus_saturate(bodycost, thiscost)
+        if bodycost > cost_threshold
+            return MAX_INLINE_COST
+        end
+    end
+    return inline_cost_clamp(bodycost)
+end
+
+function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, params::OptimizationParams)
+    maxcost = 0
+    for line = 1:length(body)
+        stmt = body[line]
+        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
+                                            params)
+        cost[line] = thiscost
+        if thiscost > maxcost
+            maxcost = thiscost
+        end
+    end
+    return maxcost
+end
+
+function renumber_ir_elements!(body::Vector{Any}, cfg::Union{CFG,Nothing}, ssachangemap::Vector{Int})
+    return renumber_ir_elements!(body, cfg, ssachangemap, ssachangemap)
+end
+
+function cumsum_ssamap!(ssachangemap::Vector{Int})
+    any_change = false
+    rel_change = 0
+    for i = 1:length(ssachangemap)
+        val = ssachangemap[i]
+        any_change |= val ≠ 0
+        rel_change += val
+        if val == -1
+            # Keep a marker that this statement was deleted
+            ssachangemap[i] = typemin(Int)
+        else
+            ssachangemap[i] = rel_change
+        end
+    end
+    return any_change
+end
+
+function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
+    any_change = cumsum_ssamap!(labelchangemap)
+    if ssachangemap !== labelchangemap
+        any_change |= cumsum_ssamap!(ssachangemap)
+    end
+    any_change || return
+    for i = 1:length(body)
+        el = body[i]
+        if isa(el, GotoNode)
+            body[i] = GotoNode(el.label + labelchangemap[el.label])
+        elseif isa(el, GotoIfNot)
+            cond = el.cond
+            if isa(cond, SSAValue)
+                cond = SSAValue(cond.id + ssachangemap[cond.id])
+            end
+            was_deleted = labelchangemap[el.dest] == typemin(Int)
+            body[i] = was_deleted ? cond : GotoIfNot(cond, el.dest + labelchangemap[el.dest])
+        elseif isa(el, ReturnNode)
+            if isdefined(el, :val)
+                val = el.val
+                if isa(val, SSAValue)
+                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
+                end
+            end
+        elseif isa(el, SSAValue)
+            body[i] = SSAValue(el.id + ssachangemap[el.id])
+        elseif isa(el, PhiNode)
+            i = 1
+            edges = el.edges
+            values = el.values
+            while i <= length(edges)
+                was_deleted = ssachangemap[edges[i]] == typemin(Int)
+                if was_deleted
+                    deleteat!(edges, i)
+                    deleteat!(values, i)
+                else
+                    edges[i] += ssachangemap[edges[i]]
+                    val = values[i]
+                    if isa(val, SSAValue)
+                        values[i] = SSAValue(val.id + ssachangemap[val.id])
+                    end
+                    i += 1
+                end
+            end
+        elseif isa(el, EnterNode)
+            tgt = el.catch_dest
+            if tgt != 0
+                was_deleted = labelchangemap[tgt] == typemin(Int)
+                if was_deleted
+                    @assert !isdefined(el, :scope)
+                    body[i] = nothing
+                else
+                    if isdefined(el, :scope) && isa(el.scope, SSAValue)
+                        body[i] = EnterNode(tgt + labelchangemap[tgt], SSAValue(el.scope.id + ssachangemap[el.scope.id]))
+                    else
+                        body[i] = EnterNode(el, tgt + labelchangemap[tgt])
+                    end
+                end
+            end
+        elseif isa(el, Expr)
+            if el.head === :(=) && el.args[2] isa Expr
+                el = el.args[2]::Expr
+            end
+            if !is_meta_expr_head(el.head)
+                args = el.args
+                for i = 1:length(args)
+                    el = args[i]
+                    if isa(el, SSAValue)
+                        args[i] = SSAValue(el.id + ssachangemap[el.id])
+                    end
+                end
+            end
+        end
+    end
+end
+
+function renumber_cfg_stmts!(cfg::CFG, blockchangemap::Vector{Int})
+    cumsum_ssamap!(blockchangemap) || return
+    for i = 1:length(cfg.blocks)
+        old_range = cfg.blocks[i].stmts
+        new_range = StmtRange(first(old_range) + ((i > 1) ? blockchangemap[i - 1] : 0),
+                              last(old_range) + blockchangemap[i])
+        cfg.blocks[i] = BasicBlock(cfg.blocks[i], new_range)
+        if i <= length(cfg.index)
+            cfg.index[i] = cfg.index[i] + blockchangemap[i]
+        end
+    end
+end
diff --git a/Compiler/src/precompile.jl b/Compiler/src/precompile.jl
new file mode 100644
index 0000000000000..ebcfbb0edf7bf
--- /dev/null
+++ b/Compiler/src/precompile.jl
@@ -0,0 +1,471 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# This file replaces the functionality from src/precompile_utils.c
+
+compile_hint(@nospecialize(argt::Type)) = ccall(:jl_compile_hint, Int32, (Any,), argt) != 0
+
+# Utility functions for type manipulation
+function count_union_components(t::Union)
+    count = 0
+    while true
+        count += 1
+        t = t.b
+        if !isa(t, Union)
+            count += 1
+            break
+        end
+    end
+    return count
+end
+
+function nth_union_component(t::Union, n::Int)
+    current = 1
+    while current < n && isa(t, Union)
+        current += 1
+        t = t.b
+    end
+    if current == n
+        return isa(t, Union) ? t.a : t
+    else
+        error("Index out of bounds for Union type")
+    end
+end
+
+# Port of _compile_all_tvar_union
+# f(...) where {T<:Union{...}} is a common pattern
+function compile_all_tvar_union(methsig)
+    tvarslen = unionall_depth(methsig)
+    if tvarslen == 0
+        return compile_hint(methsig)
+    end
+
+    sigbody = methsig
+    env = Vector{Any}(undef, 2 * tvarslen)
+    idx = Vector{Int}(undef, tvarslen)
+
+    # Initialize environment
+    for i in 1:tvarslen
+        if !isa(sigbody, UnionAll)
+            return false
+        end
+        idx[i] = 1
+        var = sigbody.var
+        env[2*i - 1] = var
+
+        # Get upper bound
+        tv = var
+        while isa(tv, TypeVar)
+            tv = tv.ub
+        end
+
+        if isa(tv, DataType) && isabstracttype(tv) && !isa(tv, Type)
+            return false  # Any as TypeVar is common and not useful here
+        end
+
+        env[2*i] = tv
+        sigbody = sigbody.body
+    end
+
+    all_success = true
+    incr = false
+
+    while !incr
+        # Generate all combinations
+        for i in 1:tvarslen
+            incr = true
+            tv = env[2*i - 1]
+            while isa(tv, TypeVar)
+                tv = tv.ub
+            end
+
+            if isa(tv, Union)
+                l = count_union_components(tv)
+                j = idx[i]
+                env[2*i] = nth_union_component(tv, j)
+                j += 1
+
+                if incr
+                    if j > l
+                        idx[i] = 1
+                    else
+                        idx[i] = j
+                        incr = false
+                    end
+                end
+            end
+        end
+
+        # Try to instantiate and compile
+        sig = try
+                ccall(:jl_instantiate_type_with, Any, (Any, Ptr{Any}, Cint),
+                        sigbody, env, tvarslen)
+            catch
+                nothing
+            end
+        if isa(sig, DataType) && ccall(:jl_has_concrete_subtype, Cint, (Any,), sig) != 0
+            success = compile_hint(sig)
+            all_success = all_success && success
+        else
+            all_success = false
+        end
+    end
+
+    return all_success
+end
+
+# Port of _compile_all_union
+function compile_all_union(sig)
+    sigbody = unwrap_unionall(sig)::DataType
+
+    if !isa(sigbody, Type) || !isa(sigbody, DataType)
+        return compile_all_tvar_union(sig)
+    end
+
+    count_unions = 0
+    union_size = 1
+    params = sigbody.parameters
+
+    for ty in params
+        if isa(ty, Union)
+            count_unions += 1
+            union_size *= count_union_components(ty)
+        elseif isa(ty, DataType) &&
+               ((!isconcretetype(ty) || iskindtype(ty)) && !isType(ty))
+            return false  # no amount of union splitting will help
+        end
+    end
+
+    if union_size <= 1 || union_size > 8
+        return compile_all_tvar_union(sig)
+    end
+
+    idx = zeros(Int, count_unions)
+    all_success = true
+
+    incr = false
+    while !incr
+        # Generate parameter combinations
+        new_params = Vector{Any}(undef, length(params))
+        idx_ctr = 1
+        incr = true
+
+        for (i, ty) in enumerate(params)
+            if isa(ty, Union)
+                l = count_union_components(ty)
+                j = idx[idx_ctr]
+                new_params[i] = nth_union_component(ty, j + 1)  # 1-based indexing
+                j += 1
+
+                if incr
+                    if j >= l
+                        idx[idx_ctr] = 0
+                    else
+                        idx[idx_ctr] = j
+                        incr = false
+                    end
+                end
+                idx_ctr += 1
+            else
+                new_params[i] = ty
+            end
+        end
+
+        # Create new signature and try to compile
+        # Reconstruct tuple type
+        new_sigbody = Tuple{new_params...}
+        # Rewrap in UnionAll if needed
+        methsig = rewrap_unionall(new_sigbody, sig)
+        success = compile_all_tvar_union(methsig)
+        all_success = all_success && success
+    end
+    return all_success
+end
+
+# Complete method collection implementation
+function collect_all_method_defs(newmodules, mod_array)
+    allmeths = Any[]
+
+    function method_visitor(method)
+        method = method::Method
+        if newmodules !== nothing
+            method.module in newmodules || return true
+        end
+        if isdefined(method, :external_mt)
+            return true  # Continue iteration
+        end
+        push!(allmeths, method)
+        return true
+    end
+
+    # Always visit the global method table first
+    visit(method_visitor, Core.methodtable)
+
+    # If mod_array is provided, iterate through modules looking for MethodTable objects
+    #if mod_array !== nothing
+    #    function visit_methodtable(mt::Core.MethodTable)
+    #        if mt !== Core.methodtable  # Skip global method table since we already visited it
+    #            visit(method_visitor, mt)
+    #        end
+    #    end
+
+    #    function foreach_mtable_in_module(mod::Module)
+    #        # Get all bindings in the module and look for MethodTable objects
+    #        for name in names(mod, all=true, imported=true)
+    #            if isdefined(mod, name)
+    #                val = getglobal(mod, name)
+    #                if isa(val, Module) && val !== mod && parentmodule(val) === mod
+    #                    # Recursively visit submodules
+    #                    foreach_mtable_in_module(val)
+    #                elseif isa(val, Core.MethodTable)
+    #                    # Visit this method table
+    #                    visit_methodtable(val)
+    #                end
+    #            end
+    #        end
+    #    end
+
+    #    # Iterate through provided modules
+    #    for mod in mod_array
+    #        if isa(mod, Module)
+    #            # Only visit toplevel modules (where parent == mod)
+    #            if parentmodule(mod) === mod
+    #                foreach_mtable_in_module(mod)
+    #            end
+    #        end
+    #    end
+    #end
+    return allmeths
+end
+
+function infer_all_method_defs!(all::Bool, allmeths, world::UInt, worklist)
+    # Process collected methods and create method instances
+    for m in allmeths
+        m = m::Method
+
+        # Skip macro methods unless specifically requested
+        if !all && !iszero(ccall(:jl_method_is_macro, Cint, (Any,), m))
+            continue
+        end
+
+        if !isdefined(m, :source)
+            continue
+        end
+
+        # Check if this method has a single compilable specialization
+        if isa(m.sig, DataType) && isa_compileable_sig(m.sig, Core.svec(), m)
+            # Method has a single compilable specialization, e.g. its definition
+            # signature is concrete. in this case we can just hint it.
+            ccall(:jl_compile_method_sig, Cvoid, (Any, Any, Any, Csize_t),
+                  m, m.sig, Core.svec(), world)
+        else
+            # Try to create leaf signatures using union expansion from the signature declaration and compile those
+            compile_all_union(m.sig)
+
+            if all
+                # Also compile fully generic fallback if requested
+                unspec = ccall(:jl_get_unspecialized, Any, (Any,), m)
+                if unspec !== nothing
+                    push!(worklist, unspec)
+                end
+
+            end
+        end
+    end
+end
+
+# This corresponds to precompile_enq_all_specializations_
+function enqueue_specializations!(all::Bool, newmethods, worklist)
+    for method in newmethods
+        method = method::Method
+
+        # Check for special methods that should always be compiled
+        if (method.name === :__init__ || isdefined(method, :ccallable)) && isdispatchtuple(method.sig)
+            # Get method instance for __init__ methods and @ccallable functions
+            mi = specialize_method(method, method.sig, Core.svec())::MethodInstance
+            push!(worklist, mi)
+        else
+            # Process existing specializations
+            specializations = method.specializations
+            if isa(specializations, Core.SimpleVector)
+                for i = 1:length(specializations)
+                    mi = specializations[i]
+                    if mi !== nothing
+                        enqueue_specialization!(all, worklist, mi::Core.MethodInstance)
+                    end
+                end
+            elseif isa(specializations, Core.MethodInstance)
+                enqueue_specialization!(all, worklist, specializations)
+            end
+        end
+
+        # Handle ccallable methods
+        if isdefined(method, :ccallable)
+            push!(worklist, method.ccallable)
+        end
+    end
+end
+
+function enqueue_specialization!(all::Bool, worklist, mi::Core.MethodInstance)
+    # Translation of precompile_enq_specialization_ from C
+    codeinst = isdefined(mi, :cache) ? mi.cache : nothing
+    while codeinst !== nothing
+        do_compile = false
+        if codeinst.owner !== nothing
+            # TODO(vchuravy) native code caching for foreign interpreters
+            # Skip foreign code instances
+        elseif use_const_api(codeinst) # Check if invoke is jl_fptr_const_return
+            do_compile = true
+        elseif codeinst.invoke != C_NULL || codeinst.precompile
+            do_compile = true
+        elseif !do_compile && isdefined(codeinst, :inferred)
+            inferred = codeinst.inferred
+            # Check compilation options and inlining cost
+            if (all || inferred === nothing ||
+                ((isa(inferred, String) || isa(inferred, CodeInfo) || isa(inferred, UInt8)) &&
+                 ccall(:jl_ir_inlining_cost, UInt16, (Any,), inferred) == typemax(UInt16)))
+                do_compile = true
+            end
+        end
+        if do_compile
+            push!(worklist, mi)
+            return true
+        end
+        # Move to the next code instance in the chain
+        codeinst = isdefined(codeinst, :next) ? codeinst.next : nothing
+    end
+    return true
+end
+
+# Main unified compilation and emission function
+# This replaces the functionality from jl_precompile
+function compile_and_emit_native(worlds::Vector{UInt},
+                                 trim_mode::UInt8,
+                                 external_linkage::Bool,
+                                 newmodules, # Vector{Module} or Nothing
+                                 mod_array, # Vector{Module} or Nothing
+                                 all::Bool,
+                                 module_init_order::Vector{Any}) # Vector{Module}
+    latestworld = worlds[end]
+
+    # Step 1: Precompile all __init__ methods that will be required
+    for mod in module_init_order
+        if Core.invoke_in_world(latestworld, isdefined, mod, :__init__)
+            f = Core.invoke_in_world(latestworld, getglobal, mod, :__init__)
+            # Get module compile setting
+            setting = ccall(:jl_get_module_compile, Cint, (Any,), mod)
+            if setting != 0 && setting != 1  # JL_OPTIONS_COMPILE_OFF=0, JL_OPTIONS_COMPILE_MIN=1
+                tt = Tuple{Core.Typeof(f)}
+                compile_hint(tt)
+                trim_mode == 0x00 || add_entrypoint(tt)
+            end
+        end
+    end
+
+    # Step 2: Collect all method definitions, filtered by worklist if provided
+    newmethods = collect_all_method_defs(newmodules, mod_array)
+
+    # Step 3: Collect set of method instances that seem worth compiling
+    specialization_worklist = []
+    if trim_mode == 0x00
+        if newmodules === nothing
+            infer_all_method_defs!(all, newmethods, latestworld, specialization_worklist)
+        else
+            # Compute new_ext_cis using queue_external_cis with global newly_inferred
+            new_ext_cis = ccall(:jl_compute_new_ext_cis, Any, ())
+            if new_ext_cis !== nothing
+                for i in 1:length(new_ext_cis::Vector{Any})
+                    ci = new_ext_cis[i]::CodeInstance
+                    enqueue_specialization!(all, specialization_worklist, get_ci_mi(ci))
+                end
+            end
+        end
+        enqueue_specializations!(all, newmethods, specialization_worklist)
+
+        # Process the specialization worklist and prepare final tocompile worklist
+        tocompile = []
+        for item in specialization_worklist
+            if isa(item, Core.MethodInstance)
+                processed_mi = process_method_instance_for_compilation(item, latestworld)
+                if processed_mi !== nothing
+                    push!(tocompile, processed_mi)
+                end
+            else
+                # Handle SimpleVector (ccallable entries)
+                push!(tocompile, item::Core.SimpleVector)
+            end
+        end
+    else # trimming mode
+        # array of MethodInstances and ccallable aliases to include in the output
+        tocompile = []
+
+        # Process entrypoint method instances
+        for mi in _entrypoint_mis
+            # Add the method instance to compile list
+            push!(tocompile, mi)
+
+            # Check if this method has a ccallable annotation
+            if isdefined(mi.def, :ccallable) && mi.def.ccallable !== nothing
+                push!(tocompile, mi.def.ccallable)
+            end
+        end
+    end
+
+    # Step 4: Perform type inference on tocompile to create codeinfos
+    codeinfos = try
+        typeinf_ext_toplevel(tocompile, worlds, trim_mode)
+    catch exc
+        # Handle trimming failures
+        isa(exc, Core.TrimFailure) || rethrow()
+        # The verification check failed. The error message should already have
+        # been printed, so give up here and exit (w/o a stack trace).
+        invokelatest(exit, 1)
+    end
+
+    return codeinfos
+
+end
+
+# Helper function to process method instances for compilation
+# This corresponds to the logic in jl_precompile_
+function process_method_instance_for_compilation(mi::Core.MethodInstance, world::UInt)
+    method = mi.def::Method
+    if !(isdefined(method, :unspecialized) && mi === method.unspecialized)
+        if !isa_compileable_sig(mi.specTypes, mi.sparam_vals, method)
+            # Try to get a compileable specialization
+            mi = ccall(:jl_get_specialization1, Any, (Any, Csize_t, Cint),
+                         mi.specTypes, world, #= mt_cache =# 0)::Union{Nothing,MethodInstance}
+        end
+    end
+    return mi
+end
+
+const _entrypoint_mis = Vector{Core.MethodInstance}()
+
+# Add a method signature as an entrypoint for compilation.
+function add_entrypoint(types::Type)
+    world = get_world_counter()
+    # Get the method instance for this signature
+    mi = ccall(:jl_get_compile_hint_specialization, Any,
+                   (Any, Csize_t, Cint),
+                   types, world, 1)
+    if mi == nothing
+        return false
+    end
+    push!(_entrypoint_mis, mi::Core.MethodInstance)
+    return true
+end
+
+# This corresponds to jl_add_ccallable_entrypoints
+function add_ccallable_entrypoints!()
+    # Collect all methods with ccallable annotations
+    ccallable_methods = Any[]
+    visit(Core.methodtable) do method
+        method = method::Method
+        if isdefined(method, :ccallable)
+            # Add the ccallable tuple signature
+            ccallable_sig = method.ccallable[2]  # Second element is the signature
+            add_entrypoint(ccallable_sig)
+        end
+        return true
+    end
+end
diff --git a/Compiler/src/reflection_interface.jl b/Compiler/src/reflection_interface.jl
new file mode 100644
index 0000000000000..3fc182685e598
--- /dev/null
+++ b/Compiler/src/reflection_interface.jl
@@ -0,0 +1,58 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+_findall_matches(interp::AbstractInterpreter, @nospecialize(tt)) = findall(tt, method_table(interp))
+_default_interp(world::UInt) = NativeInterpreter(world)
+
+_may_throw_methoderror(matches::MethodLookupResult) =
+    matches.ambig || !any(match::Core.MethodMatch->match.fully_covers, matches.matches)
+
+function _infer_exception_type(interp::AbstractInterpreter, @nospecialize(tt), optimize::Bool)
+    matches = _findall_matches(interp, tt)
+    matches === nothing && return nothing
+    exct = Union{}
+    if _may_throw_methoderror(matches)
+        # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+        exct = MethodError
+    end
+    for match in matches.matches
+        match = match::Core.MethodMatch
+        frame = typeinf_frame(interp, match, #=run_optimizer=#optimize)
+        frame === nothing && return Any
+        exct = tmerge(exct, widenconst(frame.result.exc_result))
+    end
+    return exct
+end
+
+function _infer_effects(interp::AbstractInterpreter, @nospecialize(tt), optimize::Bool)
+    matches = _findall_matches(interp, tt)
+    matches === nothing && return nothing
+    effects = EFFECTS_TOTAL
+    if _may_throw_methoderror(matches)
+        # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+        effects = Effects(effects; nothrow=false)
+    end
+    for match in matches.matches
+        match = match::Core.MethodMatch
+        frame = typeinf_frame(interp, match, #=run_optimizer=#optimize)
+        frame === nothing && return Effects()
+        effects = merge_effects(effects, frame.result.ipo_effects)
+    end
+    return effects
+end
+
+function statement_costs!(interp::AbstractInterpreter, cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, match::Core.MethodMatch)
+    params = OptimizationParams(interp)
+    sptypes = VarState[VarState(sp, false) for sp in match.sparams]
+    return statement_costs!(cost, body, src, sptypes, params)
+end
+
+function findsup_mt(@nospecialize(tt), world, method_table)
+    if method_table === nothing
+        table = InternalMethodTable(world)
+    elseif method_table isa Core.MethodTable
+        table = OverlayMethodTable(world, method_table)
+    else
+        table = method_table
+    end
+    return findsup(tt, table)
+end
diff --git a/Compiler/src/reinfer.jl b/Compiler/src/reinfer.jl
new file mode 100644
index 0000000000000..a4a1db6324f70
--- /dev/null
+++ b/Compiler/src/reinfer.jl
@@ -0,0 +1,658 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using ..Compiler.Base
+using ..Compiler: _findsup, store_backedges, JLOptions, get_world_counter,
+    _methods_by_ftype, get_methodtable, get_ci_mi, should_instrument,
+    morespecific, RefValue, get_require_world, Vector, IdDict
+using .Core: CodeInstance, MethodInstance
+
+const CI_FLAGS_NATIVE_CACHE_VALID = 0b1000
+const WORLD_AGE_REVALIDATION_SENTINEL::UInt = 1
+const _jl_debug_method_invalidation = RefValue{Union{Nothing,Vector{Any}}}(nothing)
+debug_method_invalidation(onoff::Bool) =
+    _jl_debug_method_invalidation[] = onoff ? Any[] : nothing
+
+# Immutable structs for different categories of state data
+struct VerifyMethodInitialState
+    codeinst::CodeInstance
+    mi::MethodInstance
+    def::Method
+    callees::Core.SimpleVector
+end
+
+struct VerifyMethodWorkState
+    depth::Int
+    cause::CodeInstance
+    recursive_index::Int
+    stage::Symbol
+end
+
+struct VerifyMethodResultState
+    child_cycle::Int
+    result_minworld::UInt
+    result_maxworld::UInt
+end
+
+# Container for all the work arrays
+struct VerifyMethodWorkspace
+    # Arrays of different state categories
+    initial_states::Vector{VerifyMethodInitialState}
+    work_states::Vector{VerifyMethodWorkState}
+    result_states::Vector{VerifyMethodResultState}
+
+    # Tarjan's algorithm working data
+    stack::Vector{CodeInstance}
+    visiting::IdDict{CodeInstance,Int}
+
+    function VerifyMethodWorkspace()
+        new(VerifyMethodInitialState[], VerifyMethodWorkState[], VerifyMethodResultState[],
+            CodeInstance[], IdDict{CodeInstance,Int}())
+    end
+end
+
+# Helper functions to create default states
+function VerifyMethodInitialState(codeinst::CodeInstance)
+    mi = get_ci_mi(codeinst)
+    def = mi.def::Method
+    VerifyMethodInitialState(codeinst, mi, def, codeinst.edges)
+end
+
+function VerifyMethodWorkState(dummy_cause::CodeInstance)
+    VerifyMethodWorkState(0, dummy_cause, 1, :init_and_process_callees)
+end
+
+function VerifyMethodResultState()
+    VerifyMethodResultState(0, 0, 0)
+end
+
+
+# Restore backedges to external targets
+# `edges` = [caller1, ...], the list of worklist-owned code instances internally
+function insert_backedges(internal_methods::Vector{Any})
+    # determine which CodeInstance objects are still valid in our image
+    # to enable any applicable new codes
+    backedges_only = unsafe_load(cglobal(:jl_first_image_replacement_world, UInt)) == typemax(UInt)
+    scan_new_methods!(internal_methods, backedges_only)
+    workspace = VerifyMethodWorkspace()
+    scan_new_code!(internal_methods, workspace)
+    nothing
+end
+
+function scan_new_code!(internal_methods::Vector{Any}, workspace::VerifyMethodWorkspace)
+    for i = 1:length(internal_methods)
+        codeinst = internal_methods[i]
+        codeinst isa CodeInstance || continue
+        # codeinst.owner === nothing || continue
+        validation_world = get_world_counter()
+        verify_method_graph(codeinst, validation_world, workspace)
+        # After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
+        # (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
+        # validity.
+        @ccall jl_promote_ci_to_current(codeinst::Any, validation_world::UInt)::Cvoid
+    end
+end
+
+function verify_method_graph(codeinst::CodeInstance, validation_world::UInt, workspace::VerifyMethodWorkspace)
+    @assert isempty(workspace.stack) "workspace corrupted"
+    @assert isempty(workspace.visiting) "workspace corrupted"
+    @assert isempty(workspace.initial_states) "workspace corrupted"
+    @assert isempty(workspace.work_states) "workspace corrupted"
+    @assert isempty(workspace.result_states) "workspace corrupted"
+    child_cycle, minworld, maxworld = verify_method(codeinst, validation_world, workspace)
+    @assert child_cycle == 0
+    @assert isempty(workspace.stack) "workspace corrupted"
+    @assert isempty(workspace.visiting) "workspace corrupted"
+    @assert isempty(workspace.initial_states) "workspace corrupted"
+    @assert isempty(workspace.work_states) "workspace corrupted"
+    @assert isempty(workspace.result_states) "workspace corrupted"
+    nothing
+end
+
+function gen_staged_sig(def::Method, mi::MethodInstance)
+    isdefined(def, :generator) || return nothing
+    isdispatchtuple(mi.specTypes) || return nothing
+    gen = Core.Typeof(def.generator)
+    return Tuple{gen, UInt, Method, Vararg}
+    ## more precise method lookup, but more costly and likely not actually better?
+    #tts = (mi.specTypes::DataType).parameters
+    #sps = Any[Core.Typeof(mi.sparam_vals[i]) for i in 1:length(mi.sparam_vals)]
+    #if def.isva
+    #    return Tuple{gen, UInt, Method, sps..., tts[1:def.nargs - 1]..., Tuple{tts[def.nargs - 1:end]...}}
+    #else
+    #    return Tuple{gen, UInt, Method, sps..., tts...}
+    #end
+end
+
+function needs_instrumentation(codeinst::CodeInstance, mi::MethodInstance, def::Method, validation_world::UInt)
+    if JLOptions().code_coverage != 0 || JLOptions().malloc_log != 0
+        # test if the code needs to run with instrumentation, in which case we cannot use existing generated code
+        if isdefined(def, :debuginfo) ? # generated_only functions do not have debuginfo, so fall back to considering their codeinst debuginfo though this may be slower and less reliable
+            should_instrument(def.module, def.debuginfo) :
+            isdefined(codeinst, :debuginfo) && should_instrument(def.module, codeinst.debuginfo)
+            return true
+        end
+        gensig = gen_staged_sig(def, mi)
+        if gensig !== nothing
+            # if this is defined by a generator, try to consider forcing re-running the generators too, to add coverage for them
+            minworld = RefValue{UInt}(1)
+            maxworld = RefValue{UInt}(typemax(UInt))
+            has_ambig = RefValue{Int32}(0)
+            result = _methods_by_ftype(gensig, nothing, -1, validation_world, #=ambig=#false, minworld, maxworld, has_ambig)
+            if result !== nothing
+                for k = 1:length(result)
+                    match = result[k]::Core.MethodMatch
+                    genmethod = match.method
+                    # no, I refuse to refuse to recurse into your cursed generated function generators and will only test one level deep here
+                    if isdefined(genmethod, :debuginfo) && should_instrument(genmethod.module, genmethod.debuginfo)
+                        return true
+                    end
+                end
+            end
+        end
+    end
+    return false
+end
+
+# Test all edges relevant to a method:
+# - Visit the entire call graph, starting from edges[idx] to determine if that method is valid
+# - Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
+#   and slightly modified with an early termination option once the computation reaches its minimum
+function verify_method(codeinst::CodeInstance, validation_world::UInt, workspace::VerifyMethodWorkspace)
+    # Initialize root state
+    push!(workspace.initial_states, VerifyMethodInitialState(codeinst))
+    push!(workspace.work_states, VerifyMethodWorkState(codeinst))
+    push!(workspace.result_states, VerifyMethodResultState())
+
+    current_depth = 1 # == length(workspace._states) == end
+    while true
+        # Get current state indices
+        initial = workspace.initial_states[current_depth]
+        work = workspace.work_states[current_depth]
+
+        if work.stage == :init_and_process_callees
+            # Initialize state and handle early returns
+            world = initial.codeinst.min_world
+            let max_valid2 = initial.codeinst.max_world
+                if max_valid2 ≠ WORLD_AGE_REVALIDATION_SENTINEL
+                    workspace.result_states[current_depth] = VerifyMethodResultState(0, world, max_valid2)
+                    workspace.work_states[current_depth] = VerifyMethodWorkState(work.depth, work.cause, work.recursive_index, :return_to_parent)
+                    continue
+                end
+            end
+
+            if needs_instrumentation(initial.codeinst, initial.mi, initial.def, validation_world)
+                workspace.result_states[current_depth] = VerifyMethodResultState(0, world, UInt(0))
+                workspace.work_states[current_depth] = VerifyMethodWorkState(work.depth, work.cause, work.recursive_index, :return_to_parent)
+                continue
+            end
+
+            if haskey(workspace.visiting, initial.codeinst)
+                workspace.result_states[current_depth] = VerifyMethodResultState(workspace.visiting[initial.codeinst], UInt(1), validation_world)
+                workspace.work_states[current_depth] = VerifyMethodWorkState(work.depth, work.cause, work.recursive_index, :return_to_parent)
+                continue
+            end
+
+            push!(workspace.stack, initial.codeinst)
+            depth = length(workspace.stack)
+            workspace.visiting[initial.codeinst] = depth
+
+            # unable to backdate before require_world, since Bindings are not able to track that information
+            minworld, maxworld = get_require_world(), validation_world
+
+            # Check for invalidation of GlobalRef edges
+            if (initial.def.did_scan_source & 0x1) == 0x0
+                backedges_only = unsafe_load(cglobal(:jl_first_image_replacement_world, UInt)) == typemax(UInt)
+                scan_new_method!(initial.def, backedges_only)
+            end
+            if (initial.def.did_scan_source & 0x4) != 0x0
+                maxworld = 0
+                invalidations = _jl_debug_method_invalidation[]
+                if invalidations !== nothing
+                    push!(invalidations, initial.def, "method_globalref", initial.codeinst, nothing)
+                end
+            end
+
+            # Process all non-CodeInstance edges
+            if !isempty(initial.callees) && maxworld != get_require_world()
+                matches = []
+                j = 1
+                while j <= length(initial.callees)
+                    local min_valid2::UInt, max_valid2::UInt
+                    edge = initial.callees[j]
+                    @assert !(edge isa Method) "unexpected Method edge indicates corrupt edges list creation"
+
+                    if edge isa CodeInstance
+                        # Convert CodeInstance to MethodInstance for validation (like original)
+                        edge = get_ci_mi(edge)
+                    end
+
+                    if edge isa MethodInstance
+                        sig = edge.specTypes
+                        min_valid2, max_valid2 = verify_call(sig, initial.callees, j, 1, world, true, matches)
+                        j += 1
+                    elseif edge isa Int
+                        sig = initial.callees[j+1]
+                        nmatches = abs(edge)
+                        fully_covers = edge > 0
+                        min_valid2, max_valid2 = verify_call(sig, initial.callees, j+2, nmatches, world, fully_covers, matches)
+                        j += 2 + nmatches
+                        edge = sig
+                    elseif edge isa Core.Binding
+                        j += 1
+                        min_valid2 = minworld
+                        max_valid2 = maxworld
+                        if !binding_was_invalidated(edge)
+                            if isdefined(edge, :partitions)
+                                min_valid2 = edge.partitions.min_world
+                                max_valid2 = edge.partitions.max_world
+                            end
+                        else
+                            min_valid2 = 1
+                            max_valid2 = 0
+                        end
+                    else
+                        callee = initial.callees[j+1]
+                        if callee isa Core.MethodTable
+                            j += 2
+                            continue
+                        end
+                        if callee isa CodeInstance
+                            callee = get_ci_mi(callee)
+                        end
+                        if callee isa MethodInstance
+                            meth = callee.def::Method
+                        else
+                            meth = callee::Method
+                        end
+                        min_valid2, max_valid2 = verify_invokesig(edge, meth, world, matches)
+                        j += 2
+                    end
+
+                    if minworld < min_valid2
+                        minworld = min_valid2
+                    end
+                    if maxworld > max_valid2
+                        maxworld = max_valid2
+                    end
+                    invalidations = _jl_debug_method_invalidation[]
+                    if max_valid2 ≠ typemax(UInt) && invalidations !== nothing
+                        push!(invalidations, edge, "insert_backedges_callee", initial.codeinst, copy(matches))
+                    end
+                    if max_valid2 == 0 && invalidations === nothing
+                        break
+                    end
+                end
+            end
+
+            # Store computed minworld/maxworld in result state and transition to recursive phase
+            workspace.result_states[current_depth] = VerifyMethodResultState(depth, minworld, maxworld)
+            workspace.work_states[current_depth] = VerifyMethodWorkState(depth, work.cause, 1, :recursive_phase)
+
+        elseif work.stage == :recursive_phase
+            # Find next CodeInstance edge that needs processing
+            recursive_index = work.recursive_index
+            found_child = false
+            while recursive_index ≤ length(initial.callees)
+                edge = initial.callees[recursive_index]
+                recursive_index += 1
+
+                if edge isa CodeInstance
+                    # Create child state and add to stack
+                    workspace.work_states[current_depth] = VerifyMethodWorkState(work.depth, work.cause, recursive_index, :recursive_phase)
+                    push!(workspace.initial_states, VerifyMethodInitialState(edge))
+                    push!(workspace.work_states, VerifyMethodWorkState(edge))
+                    push!(workspace.result_states, VerifyMethodResultState())
+                    current_depth += 1
+                    found_child = true
+                    break
+                end
+            end
+
+            if !found_child
+                workspace.work_states[current_depth] = VerifyMethodWorkState(work.depth, work.cause, recursive_index, :cleanup)
+            end
+
+        elseif work.stage == :cleanup
+            # If we are the top of the current cycle, now mark all other parts of
+            # our cycle with what we found.
+            # Or if we found a failed edge, also mark all of the other parts of the
+            # cycle as also having a failed edge.
+            result = workspace.result_states[current_depth]
+            if result.result_maxworld == 0 || result.child_cycle == work.depth
+                while length(workspace.stack) ≥ work.depth
+                    child = pop!(workspace.stack)
+                    if result.result_maxworld ≠ 0
+                        @atomic :monotonic child.min_world = result.result_minworld
+                        # Finally, if this CI is still valid in some world age and marked as valid in the native cache, poke it in that mi's cache now
+                        if child.flags & CI_FLAGS_NATIVE_CACHE_VALID == CI_FLAGS_NATIVE_CACHE_VALID
+                            @ccall jl_mi_cache_insert(get_ci_mi(child)::Any, child::Any)::Cvoid
+                        end
+                    end
+                    @atomic :monotonic child.max_world = result.result_maxworld
+                    if result.result_maxworld == validation_world && validation_world == get_world_counter() && isdefined(child, :edges)
+                        store_backedges(child, child.edges)
+                    end
+                    @assert workspace.visiting[child] == length(workspace.stack) + 1 "internal error maintaining workspace"
+                    delete!(workspace.visiting, child)
+                    invalidations = _jl_debug_method_invalidation[]
+                    if invalidations !== nothing && result.result_maxworld < validation_world
+                        push!(invalidations, child, "verify_methods", work.cause)
+                    end
+                end
+
+                workspace.result_states[current_depth] = VerifyMethodResultState(0, result.result_minworld, result.result_maxworld)
+            end
+
+            workspace.work_states[current_depth] = VerifyMethodWorkState(work.depth, work.cause, work.recursive_index, :return_to_parent)
+
+        elseif work.stage == :return_to_parent
+            # Pass results to parent and process them
+            pop!(workspace.initial_states)
+            pop!(workspace.work_states)
+            result = pop!(workspace.result_states)
+            current_depth -= 1
+            if current_depth == 0 # Return results from the root call
+                return (result.child_cycle, result.result_minworld, result.result_maxworld)
+            end
+            # Propagate results to parent
+            parent_work = workspace.work_states[current_depth]
+            parent_result = workspace.result_states[current_depth]
+            callee = initial.codeinst
+            child_cycle, min_valid2, max_valid2 = result.child_cycle, result.result_minworld, result.result_maxworld
+            parent_cycle = parent_result.child_cycle
+            parent_minworld = parent_result.result_minworld
+            parent_maxworld = parent_result.result_maxworld
+            parent_cause = parent_work.cause
+            parent_stage = parent_work.stage
+            if parent_minworld < min_valid2
+                parent_minworld = min_valid2
+            end
+            if parent_minworld > max_valid2
+                max_valid2 = 0
+            end
+            if parent_maxworld > max_valid2
+                parent_cause = callee
+                parent_maxworld = max_valid2
+            end
+            if max_valid2 == 0
+                # found what we were looking for, so terminate early
+                # The parent should break out of its loop in :recursive_phase
+                parent_stage = :cleanup
+            elseif child_cycle ≠ 0 && child_cycle < parent_cycle
+                # record the cycle will resolve at depth "cycle"
+                parent_cycle = child_cycle
+            end
+            workspace.work_states[current_depth] = VerifyMethodWorkState(parent_work.depth, parent_cause, parent_work.recursive_index, parent_stage)
+            workspace.result_states[current_depth] = VerifyMethodResultState(parent_cycle, parent_minworld, parent_maxworld)
+        end
+    end
+end
+
+function get_method_from_edge(@nospecialize t)
+    if t isa Method
+        return t
+    else
+        if t isa CodeInstance
+            t = get_ci_mi(t)::MethodInstance
+        else
+            t = t::MethodInstance
+        end
+        return t.def::Method
+    end
+end
+
+# Check if method2 is in method1's interferences set
+# Returns true if method2 is found (meaning !morespecific(method1, method2))
+function method_in_interferences(method2::Method, method1::Method)
+    interferences = method1.interferences
+    for k = 1:length(interferences)
+        isassigned(interferences, k) || break
+        interference_method = interferences[k]::Method
+        if interference_method === method2
+            return true
+        end
+    end
+    return false
+end
+
+# Check if method1 is more specific than method2 via the interference graph
+function method_morespecific_via_interferences(method1::Method, method2::Method)
+    if method1 === method2
+        return false
+    end
+
+    # Check direct interferences first
+    if method_in_interferences(method2, method1)
+        return false
+    end
+    if method_in_interferences(method1, method2)
+        return true
+    end
+
+    visited = Method[]
+    push!(visited, method2)
+
+    workqueue = Method[method2]
+    while !isempty(workqueue)
+        current = pop!(workqueue)
+        interferences = current.interferences
+        for k = 1:length(interferences)
+            isassigned(interferences, k) || break
+            method3 = interferences[k]::Method
+
+            # Check if we're already visiting this interference method (cycle prevention and memoization)
+            method3 in visited && continue
+            push!(visited, method3)
+
+            if method_in_interferences(current, method3)
+                continue # only follow edges to morespecific methods in search of the morespecific target (skip ambiguities)
+            end
+
+            # Check direct interferences for this interference method
+            if method_in_interferences(method3, method1)
+                continue # return false for this path
+            end
+            if method_in_interferences(method1, method3)
+                return true # found method1 in the interference graph
+            end
+
+            push!(workqueue, method3)
+        end
+    end
+
+    # slow check: @assert ms === morespecific(method1, method2) || typeintersect(method1.sig, method2.sig) === Union{} || typeintersect(method2.sig, method1.sig) === Union{}
+    return false
+end
+
+function verify_call(@nospecialize(sig), expecteds::Core.SimpleVector, i::Int, n::Int, world::UInt, fully_covers::Bool, matches::Vector{Any})
+    # verify that these edges intersect with the same methods as before
+    mi = nothing
+    expected_deleted = false
+    for j = 1:n
+        t = expecteds[i+j-1]
+        meth = get_method_from_edge(t)
+        if iszero(meth.dispatch_status & METHOD_SIG_LATEST_WHICH)
+            expected_deleted = true
+            break
+        end
+    end
+    if expected_deleted
+        if _jl_debug_method_invalidation[] === nothing && world == get_world_counter()
+            return UInt(1), UInt(0)
+        end
+    elseif n == 1
+        # first, fast-path a check if the expected method simply dominates its sig anyways
+        # so the result of ml_matches is already simply known
+        let t = expecteds[i], meth, minworld, maxworld
+            meth = get_method_from_edge(t)
+            if !(t isa Method)
+                if t isa CodeInstance
+                    mi = get_ci_mi(t)::MethodInstance
+                else
+                    mi = t::MethodInstance
+                end
+                # Fast path is legal when fully_covers=true
+                if fully_covers && !iszero(mi.dispatch_status & METHOD_SIG_LATEST_ONLY)
+                    minworld = meth.primary_world
+                    @assert minworld ≤ world "expected method not present in verification world"
+                    maxworld = typemax(UInt)
+                    return minworld, maxworld
+                end
+            end
+            # Fast path is legal when fully_covers=true
+            if fully_covers && !iszero(meth.dispatch_status & METHOD_SIG_LATEST_ONLY)
+                minworld = meth.primary_world
+                @assert minworld ≤ world "expected method not present in verification world"
+                maxworld = typemax(UInt)
+                return minworld, maxworld
+            end
+        end
+    elseif n > 1
+        # Try the interference set fast path: check if all interference sets are covered by expecteds
+        interference_fast_path_success = fully_covers
+        # If it didn't fail yet, then check that all interference methods are either expected, or not applicable.
+        if interference_fast_path_success
+            local interference_minworld::UInt = 1
+            for j = 1:n
+                meth = get_method_from_edge(expecteds[i+j-1])
+                if interference_minworld < meth.primary_world
+                    interference_minworld = meth.primary_world
+                end
+                interferences = meth.interferences
+                for k = 1:length(interferences)
+                    isassigned(interferences, k) || break # no more entries
+                    interference_method = interferences[k]::Method
+                    if iszero(interference_method.dispatch_status & METHOD_SIG_LATEST_WHICH)
+                        # detected a deleted interference_method, so need the full lookup to compute minworld
+                        interference_fast_path_success = false
+                        break
+                    end
+                    world < interference_method.primary_world && break # this and later entries are for a future world
+                    local found_in_expecteds = false
+                    for j = 1:n
+                        if interference_method === get_method_from_edge(expecteds[i+j-1])
+                            found_in_expecteds = true
+                            break
+                        end
+                    end
+                    if !found_in_expecteds
+                        ti = typeintersect(sig, interference_method.sig)
+                        if !(ti === Union{})
+                            # try looking for a different expected method that fully covers this interference_method anyways over their intersection
+                            for j = 1:n
+                                meth2 = get_method_from_edge(expecteds[i+j-1])
+                                if method_morespecific_via_interferences(meth2, interference_method) && ti <: meth2.sig
+                                    found_in_expecteds = true
+                                    break
+                                end
+                            end
+                            if !found_in_expecteds
+                                meth2 = get_method_from_edge(expecteds[i])
+                                interference_fast_path_success = false
+                                break
+                            end
+                        end
+                    end
+                end
+                if !interference_fast_path_success
+                    break
+                end
+            end
+            if interference_fast_path_success
+                # All interference sets are covered by expecteds, can return success
+                @assert interference_minworld ≤ world "expected method not present in verification world"
+                maxworld = typemax(UInt)
+                return interference_minworld, maxworld
+            end
+        end
+   end
+    # next, compare the current result of ml_matches to the old result
+    lim = _jl_debug_method_invalidation[] !== nothing ? Int(typemax(Int32)) : n
+    minworld = RefValue{UInt}(1)
+    maxworld = RefValue{UInt}(typemax(UInt))
+    has_ambig = RefValue{Int32}(0)
+    result = _methods_by_ftype(sig, nothing, lim, world, #=ambig=#false, minworld, maxworld, has_ambig)
+    if result === nothing
+        empty!(matches)
+        maxworld[] = 0
+    else
+        # setdiff!(result, expected)
+        if length(result) ≠ n
+            maxworld[] = 0
+        end
+        ins = 0
+        for k = 1:length(result)
+            match = result[k]::Core.MethodMatch
+            local found = false
+            for j = 1:n
+                t = expecteds[i+j-1]
+                if match.method == get_method_from_edge(t)
+                    found = true
+                    break
+                end
+            end
+            if !found
+                # intersection has a new method or a method was
+                # deleted--this is now probably no good, just invalidate
+                # everything about it now
+                maxworld[] = 0
+                if _jl_debug_method_invalidation[] === nothing
+                    break
+                end
+                ins += 1
+                result[ins] = match.method
+            end
+        end
+        if maxworld[] ≠ typemax(UInt) && _jl_debug_method_invalidation[] !== nothing
+            resize!(result, ins)
+            copy!(matches, result)
+        end
+    end
+    if maxworld[] == typemax(UInt) && mi isa MethodInstance
+        ccall(:jl_promote_mi_to_current, Cvoid, (Any, UInt, UInt), mi, minworld[], world)
+    end
+    return minworld[], maxworld[]
+end
+
+# fast-path dispatch_status bit definitions (false indicates unknown)
+# true indicates this method would be returned as the result from `which` when invoking `method.sig` in the current latest world
+const METHOD_SIG_LATEST_WHICH = 0x1
+# true indicates this method would be returned as the only result from `methods` when calling `method.sig` in the current latest world
+const METHOD_SIG_LATEST_ONLY = 0x2
+
+function verify_invokesig(@nospecialize(invokesig), expected::Method, world::UInt, matches::Vector{Any})
+    @assert invokesig isa Type "corrupt edges list"
+    local minworld::UInt, maxworld::UInt
+    empty!(matches)
+    if invokesig === expected.sig && !iszero(expected.dispatch_status & METHOD_SIG_LATEST_WHICH)
+        # the invoke match is `expected` for `expected->sig`, unless `expected` is replaced
+        minworld = expected.primary_world
+        @assert minworld ≤ world "expected method not present in verification world"
+        maxworld = typemax(UInt)
+    else
+        mt = get_methodtable(expected)
+        if mt === nothing
+            minworld = 1
+            maxworld = 0
+        else
+            matched, valid_worlds = _findsup(invokesig, mt, world)
+            minworld, maxworld = valid_worlds.min_world, valid_worlds.max_world
+            if matched === nothing
+                maxworld = 0
+            else
+                matched = matched.method
+                push!(matches, matched)
+                if matched !== expected
+                    maxworld = 0
+                end
+            end
+        end
+    end
+    return minworld, maxworld
+end
+
+# Wrapper to call insert_backedges in typeinf_world for external calls
+function insert_backedges_typeinf(internal_methods::Vector{Any})
+    args = Any[insert_backedges, internal_methods]
+    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Any}, Cint), args, length(args))
+end
diff --git a/base/compiler/sort.jl b/Compiler/src/sort.jl
similarity index 97%
rename from base/compiler/sort.jl
rename to Compiler/src/sort.jl
index 71d2f8a51cd59..6c8571f6198e6 100644
--- a/base/compiler/sort.jl
+++ b/Compiler/src/sort.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # reference on sorted binary search:
-#   http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
+#   https://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
 
 # index of the first value of vector a that is greater than or equal to x;
 # returns lastindex(v)+1 if x is greater than all values in v.
diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/Compiler/src/ssair/EscapeAnalysis.jl
similarity index 55%
rename from base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
rename to Compiler/src/ssair/EscapeAnalysis.jl
index 2469507fd3699..75dfc42e5ddcd 100644
--- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
+++ b/Compiler/src/ssair/EscapeAnalysis.jl
@@ -10,59 +10,56 @@ export
     has_thrown_escape,
     has_all_escape
 
-const _TOP_MOD = ccall(:jl_base_relative_to, Any, (Any,), EscapeAnalysis)::Module
+using Base: Base
 
 # imports
-import ._TOP_MOD: ==, getindex, setindex!
+import Base: ==, copy, getindex, setindex!
 # usings
-import Core:
-    MethodInstance, Const, Argument, SSAValue, PiNode, PhiNode, UpsilonNode, PhiCNode,
-    ReturnNode, GotoNode, GotoIfNot, SimpleVector, MethodMatch, CodeInstance,
-    sizeof, ifelse, arrayset, arrayref, arraysize
-import ._TOP_MOD:     # Base definitions
-    @__MODULE__, @eval, @assert, @specialize, @nospecialize, @inbounds, @inline, @noinline,
-    @label, @goto, !, !==, !=, ≠, +, -, *, ≤, <, ≥, >, &, |, <<, error, missing, copy,
-    Vector, BitSet, IdDict, IdSet, UnitRange, Csize_t, Callable, ∪, ⊆, ∩, :, ∈, ∉, =>,
-    in, length, get, first, last, haskey, keys, get!, isempty, isassigned,
-    pop!, push!, pushfirst!, empty!, delete!, max, min, enumerate, unwrap_unionall,
-    ismutabletype
-import Core.Compiler: # Core.Compiler specific definitions
-    Bottom, InferenceResult, IRCode, IR_FLAG_NOTHROW, SimpleInferenceLattice,
-    isbitstype, isexpr, is_meta_expr_head, println, widenconst, argextype, singleton_type,
-    fieldcount_noerror, try_compute_field, try_compute_fieldidx, hasintersect, ⊑,
-    intrinsic_nothrow, array_builtin_common_typecheck, arrayset_typecheck,
-    setfield!_nothrow, alloc_array_ndims, stmt_effect_free, check_effect_free!
-
-include(x) = _TOP_MOD.include(@__MODULE__, x)
-if _TOP_MOD === Core.Compiler
-    include("compiler/ssair/EscapeAnalysis/disjoint_set.jl")
-else
-    include("disjoint_set.jl")
-end
+using Core
+using Core: Builtin, IntrinsicFunction, SimpleVector, ifelse, sizeof
+using Core.IR
+using Base:       # Base definitions
+    @__MODULE__, @assert, @eval, @goto, @inbounds, @inline, @label, @noinline,
+    @nospecialize, @specialize, BitSet, IdDict, IdSet, UnitRange, Vector,
+    delete!, empty!, enumerate, first, get, get!, hasintersect, haskey, isassigned,
+    isempty, length, max, min, missing, println, push!, pushfirst!,
+    !, !==, &, *, +, -, :, <, <<, >, |, ∈, ∉, ∩, ∪, ≠, ≤, ≥, ⊆
+using ..Compiler: # Compiler specific definitions
+    AbstractLattice, Compiler, IRCode, IR_FLAG_NOTHROW,
+    argextype, fieldcount_noerror, has_flag, intrinsic_nothrow, is_meta_expr_head,
+    is_identity_free_argtype, isexpr, setfield!_nothrow, singleton_type, try_compute_field,
+    try_compute_fieldidx, widenconst
+
+function include(x::String)
+    if !isdefined(Base, :end_base_include)
+        # During bootstrap, all includes are relative to `base/`
+        x = ccall(:jl_prepend_string, Ref{String}, (Any, Any), "ssair/", x)
+    end
+    Compiler.include(@__MODULE__, x)
+end
+
+include("disjoint_set.jl")
 
 const AInfo = IdSet{Any}
-const LivenessSet = BitSet
-const 𝕃ₒ = SimpleInferenceLattice.instance
 
 """
     x::EscapeInfo
 
 A lattice for escape information, which holds the following properties:
-- `x.Analyzed::Bool`: not formally part of the lattice, only indicates `x` has not been analyzed or not
+- `x.Analyzed::Bool`: not formally part of the lattice, only indicates whether `x` has been analyzed
 - `x.ReturnEscape::Bool`: indicates `x` can escape to the caller via return
 - `x.ThrownEscape::BitSet`: records SSA statement numbers where `x` can be thrown as exception:
   * `isempty(x.ThrownEscape)`: `x` will never be thrown in this call frame (the bottom)
   * `pc ∈ x.ThrownEscape`: `x` may be thrown at the SSA statement at `pc`
   * `-1 ∈ x.ThrownEscape`: `x` may be thrown at arbitrary points of this call frame (the top)
   This information will be used by `escape_exception!` to propagate potential escapes via exception.
-- `x.AliasInfo::Union{Bool,IndexableFields,IndexableElements,Unindexable}`: maintains all possible values
+- `x.AliasInfo::Union{Bool,IndexableFields,Unindexable}`: maintains all possible values
   that can be aliased to fields or array elements of `x`:
   * `x.AliasInfo === false` indicates the fields/elements of `x` aren't analyzed yet
   * `x.AliasInfo === true` indicates the fields/elements of `x` can't be analyzed,
     e.g. the type of `x` is not known or is not concrete and thus its fields/elements
     can't be known precisely
   * `x.AliasInfo::IndexableFields` records all the possible values that can be aliased to fields of object `x` with precise index information
-  * `x.AliasInfo::IndexableElements` records all the possible values that can be aliased to elements of array `x` with precise index information
   * `x.AliasInfo::Unindexable` records all the possible values that can be aliased to fields/elements of `x` without precise index information
 - `x.Liveness::BitSet`: records SSA statement numbers where `x` should be live, e.g.
   to be used as a call argument, to be returned to a caller, or preserved for `:foreigncall`:
@@ -88,78 +85,74 @@ An abstract state will be initialized with the bottom(-like) elements:
 struct EscapeInfo
     Analyzed::Bool
     ReturnEscape::Bool
-    ThrownEscape::LivenessSet
-    AliasInfo #::Union{IndexableFields,IndexableElements,Unindexable,Bool}
-    Liveness::LivenessSet
+    ThrownEscape::BitSet
+    AliasInfo #::Union{IndexableFields,Unindexable,Bool}
+    Liveness::BitSet
 
     function EscapeInfo(
         Analyzed::Bool,
         ReturnEscape::Bool,
-        ThrownEscape::LivenessSet,
-        AliasInfo#=::Union{IndexableFields,IndexableElements,Unindexable,Bool}=#,
-        Liveness::LivenessSet,
-        )
+        ThrownEscape::BitSet,
+        AliasInfo#=::Union{IndexableFields,Unindexable,Bool}=#,
+        Liveness::BitSet)
         @nospecialize AliasInfo
         return new(
             Analyzed,
             ReturnEscape,
             ThrownEscape,
             AliasInfo,
-            Liveness,
-            )
+            Liveness)
     end
     function EscapeInfo(
         x::EscapeInfo,
         # non-concrete fields should be passed as default arguments
         # in order to avoid allocating non-concrete `NamedTuple`s
-        AliasInfo#=::Union{IndexableFields,IndexableElements,Unindexable,Bool}=# = x.AliasInfo;
+        AliasInfo#=::Union{IndexableFields,Unindexable,Bool}=# = x.AliasInfo;
         Analyzed::Bool = x.Analyzed,
         ReturnEscape::Bool = x.ReturnEscape,
-        ThrownEscape::LivenessSet = x.ThrownEscape,
-        Liveness::LivenessSet = x.Liveness,
-        )
+        ThrownEscape::BitSet = x.ThrownEscape,
+        Liveness::BitSet = x.Liveness)
         @nospecialize AliasInfo
         return new(
             Analyzed,
             ReturnEscape,
             ThrownEscape,
             AliasInfo,
-            Liveness,
-            )
+            Liveness)
     end
 end
 
 # precomputed default values in order to eliminate computations at each callsite
 
-const BOT_THROWN_ESCAPE = LivenessSet()
+const BOT_THROWN_ESCAPE = BitSet()
 # NOTE the lattice operations should try to avoid actual set computations on this top value,
-# and e.g. LivenessSet(0:1000000) should also work without incurring excessive computations
-const TOP_THROWN_ESCAPE = LivenessSet(-1)
+# and e.g. BitSet(0:1000000) should also work without incurring excessive computations
+const TOP_THROWN_ESCAPE = BitSet(-1)
 
-const BOT_LIVENESS = LivenessSet()
+const BOT_LIVENESS = BitSet()
 # NOTE the lattice operations should try to avoid actual set computations on this top value,
-# and e.g. LivenessSet(0:1000000) should also work without incurring excessive computations
-const TOP_LIVENESS = LivenessSet(-1:0)
-const ARG_LIVENESS = LivenessSet(0)
+# and e.g. BitSet(0:1000000) should also work without incurring excessive computations
+const TOP_LIVENESS = BitSet(-1:0)
+const ARG_LIVENESS = BitSet(0)
 
 # the constructors
 NotAnalyzed() = EscapeInfo(false, false, BOT_THROWN_ESCAPE, false, BOT_LIVENESS) # not formally part of the lattice
 NoEscape() = EscapeInfo(true, false, BOT_THROWN_ESCAPE, false, BOT_LIVENESS)
 ArgEscape() = EscapeInfo(true, false, BOT_THROWN_ESCAPE, true, ARG_LIVENESS)
-ReturnEscape(pc::Int) = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, LivenessSet(pc))
+ReturnEscape(pc::Int) = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, BitSet(pc))
 AllReturnEscape() = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, TOP_LIVENESS)
-ThrownEscape(pc::Int) = EscapeInfo(true, false, LivenessSet(pc), false, BOT_LIVENESS)
+ThrownEscape(pc::Int) = EscapeInfo(true, false, BitSet(pc), false, BOT_LIVENESS)
 AllEscape() = EscapeInfo(true, true, TOP_THROWN_ESCAPE, true, TOP_LIVENESS)
 
 const ⊥, ⊤ = NotAnalyzed(), AllEscape()
 
 # Convenience names for some ⊑ₑ queries
 has_no_escape(x::EscapeInfo) = !x.ReturnEscape && isempty(x.ThrownEscape) && 0 ∉ x.Liveness
-has_arg_escape(x::EscapeInfo) = 0 in x.Liveness
+has_arg_escape(x::EscapeInfo) = 0 ∈ x.Liveness
 has_return_escape(x::EscapeInfo) = x.ReturnEscape
-has_return_escape(x::EscapeInfo, pc::Int) = x.ReturnEscape && (-1 ∈ x.Liveness || pc in x.Liveness)
+has_return_escape(x::EscapeInfo, pc::Int) = x.ReturnEscape && (-1 ∈ x.Liveness || pc ∈ x.Liveness)
 has_thrown_escape(x::EscapeInfo) = !isempty(x.ThrownEscape)
-has_thrown_escape(x::EscapeInfo, pc::Int) = -1 ∈ x.ThrownEscape  || pc in x.ThrownEscape
+has_thrown_escape(x::EscapeInfo, pc::Int) = -1 ∈ x.ThrownEscape || pc ∈ x.ThrownEscape
 has_all_escape(x::EscapeInfo) = ⊤ ⊑ₑ x
 
 # utility lattice constructors
@@ -172,14 +165,13 @@ ignore_liveness(x::EscapeInfo) = EscapeInfo(x; Liveness=BOT_LIVENESS)
 struct IndexableFields
     infos::Vector{AInfo}
 end
-struct IndexableElements
-    infos::IdDict{Int,AInfo}
-end
 struct Unindexable
     info::AInfo
 end
 IndexableFields(nflds::Int) = IndexableFields(AInfo[AInfo() for _ in 1:nflds])
 Unindexable() = Unindexable(AInfo())
+copy(AliasInfo::IndexableFields) = IndexableFields(AInfo[copy(info) for info in AliasInfo.infos])
+copy(AliasInfo::Unindexable) = Unindexable(copy(AliasInfo.info))
 
 merge_to_unindexable(AliasInfo::IndexableFields) = Unindexable(merge_to_unindexable(AliasInfo.infos))
 merge_to_unindexable(AliasInfo::Unindexable, AliasInfos::IndexableFields) = Unindexable(merge_to_unindexable(AliasInfo.info, AliasInfos.infos))
@@ -190,15 +182,6 @@ function merge_to_unindexable(info::AInfo, infos::Vector{AInfo})
     end
     return info
 end
-merge_to_unindexable(AliasInfo::IndexableElements) = Unindexable(merge_to_unindexable(AliasInfo.infos))
-merge_to_unindexable(AliasInfo::Unindexable, AliasInfos::IndexableElements) = Unindexable(merge_to_unindexable(AliasInfo.info, AliasInfos.infos))
-merge_to_unindexable(infos::IdDict{Int,AInfo}) = merge_to_unindexable(AInfo(), infos)
-function merge_to_unindexable(info::AInfo, infos::IdDict{Int,AInfo})
-    for idx in keys(infos)
-        info = info ∪ infos[idx]
-    end
-    return info
-end
 
 # we need to make sure this `==` operator corresponds to lattice equality rather than object equality,
 # otherwise `propagate_changes` can't detect the convergence
@@ -221,9 +204,6 @@ x::EscapeInfo == y::EscapeInfo = begin
     elseif isa(xa, IndexableFields)
         isa(ya, IndexableFields) || return false
         xa.infos == ya.infos || return false
-    elseif isa(xa, IndexableElements)
-        isa(ya, IndexableElements) || return false
-        xa.infos == ya.infos || return false
     else
         xa = xa::Unindexable
         isa(ya, Unindexable) || return false
@@ -275,8 +255,6 @@ x::EscapeInfo ⊑ₑ y::EscapeInfo = begin
             for i in 1:xn
                 xinfos[i] ⊆ yinfos[i] || return false
             end
-        elseif isa(ya, IndexableElements)
-            return false
         elseif isa(ya, Unindexable)
             xinfos, yinfo = xa.infos, ya.info
             for i = length(xinfos)
@@ -285,23 +263,6 @@ x::EscapeInfo ⊑ₑ y::EscapeInfo = begin
         else
             ya === true || return false
         end
-    elseif isa(xa, IndexableElements)
-        if isa(ya, IndexableElements)
-            xinfos, yinfos = xa.infos, ya.infos
-            keys(xinfos) ⊆ keys(yinfos) || return false
-            for idx in keys(xinfos)
-                xinfos[idx] ⊆ yinfos[idx] || return false
-            end
-        elseif isa(ya, IndexableFields)
-            return false
-        elseif isa(ya, Unindexable)
-            xinfos, yinfo = xa.infos, ya.info
-            for idx in keys(xinfos)
-                xinfos[idx] ⊆ yinfo || return false
-            end
-        else
-            ya === true || return false
-        end
     else
         xa = xa::Unindexable
         if isa(ya, Unindexable)
@@ -407,33 +368,10 @@ function merge_alias_info(@nospecialize(xa), @nospecialize(ya))
         else
             return true # handle conflicting case conservatively
         end
-    elseif isa(xa, IndexableElements)
-        if isa(ya, IndexableElements)
-            xinfos, yinfos = xa.infos, ya.infos
-            infos = IdDict{Int,AInfo}()
-            for idx in keys(xinfos)
-                if !haskey(yinfos, idx)
-                    infos[idx] = xinfos[idx]
-                else
-                    infos[idx] = xinfos[idx] ∪ yinfos[idx]
-                end
-            end
-            for idx in keys(yinfos)
-                haskey(xinfos, idx) && continue # unioned already
-                infos[idx] = yinfos[idx]
-            end
-            return IndexableElements(infos)
-        elseif isa(ya, Unindexable)
-            return merge_to_unindexable(ya, xa)
-        else
-            return true # handle conflicting case conservatively
-        end
     else
         xa = xa::Unindexable
         if isa(ya, IndexableFields)
             return merge_to_unindexable(xa, ya)
-        elseif isa(ya, IndexableElements)
-            return merge_to_unindexable(xa, ya)
         else
             ya = ya::Unindexable
             xinfo, yinfo = xa.info, ya.info
@@ -445,8 +383,6 @@ end
 
 const AliasSet = IntDisjointSet{Int}
 
-const ArrayInfo = IdDict{Int,Vector{Int}}
-
 """
     estate::EscapeState
 
@@ -457,13 +393,12 @@ struct EscapeState
     escapes::Vector{EscapeInfo}
     aliasset::AliasSet
     nargs::Int
-    arrayinfo::Union{Nothing,ArrayInfo}
 end
-function EscapeState(nargs::Int, nstmts::Int, arrayinfo::Union{Nothing,ArrayInfo})
+function EscapeState(nargs::Int, nstmts::Int)
     escapes = EscapeInfo[
         1 ≤ i ≤ nargs ? ArgEscape() : ⊥ for i in 1:(nargs+nstmts)]
     aliasset = AliasSet(nargs+nstmts)
-    return EscapeState(escapes, aliasset, nargs, arrayinfo)
+    return EscapeState(escapes, aliasset, nargs)
 end
 function getindex(estate::EscapeState, @nospecialize(x))
     xidx = iridx(x, estate)
@@ -480,7 +415,7 @@ end
 """
     iridx(x, estate::EscapeState) -> xidx::Union{Int,Nothing}
 
-Tries to convert analyzable IR element `x::Union{Argument,SSAValue}` to
+Try to convert analyzable IR element `x::Union{Argument,SSAValue}` to
 its unique identifier number `xidx` that is valid in the analysis context of `estate`.
 Returns `nothing` if `x` isn't maintained by `estate` and thus unanalyzable (e.g. `x::GlobalRef`).
 
@@ -509,8 +444,7 @@ that is analyzable in the context of `estate`.
 `iridx(irval(xidx, state), state) === xidx`.
 """
 function irval(xidx::Int, estate::EscapeState)
-    x = xidx > estate.nargs ? SSAValue(xidx-estate.nargs) : Argument(xidx)
-    return x
+    return xidx > estate.nargs ? SSAValue(xidx-estate.nargs) : Argument(xidx)
 end
 
 function getaliases(x::Union{Argument,SSAValue}, estate::EscapeState)
@@ -543,14 +477,14 @@ isaliased(xidx::Int, yidx::Int, estate::EscapeState) =
     in_same_set(estate.aliasset, xidx, yidx)
 
 struct ArgEscapeInfo
-    EscapeBits::UInt8
+    escape_bits::UInt8
 end
 function ArgEscapeInfo(x::EscapeInfo)
     x === ⊤ && return ArgEscapeInfo(ARG_ALL_ESCAPE)
-    EscapeBits = 0x00
-    has_return_escape(x) && (EscapeBits |= ARG_RETURN_ESCAPE)
-    has_thrown_escape(x) && (EscapeBits |= ARG_THROWN_ESCAPE)
-    return ArgEscapeInfo(EscapeBits)
+    escape_bits = 0x00
+    has_return_escape(x) && (escape_bits |= ARG_RETURN_ESCAPE)
+    has_thrown_escape(x) && (escape_bits |= ARG_THROWN_ESCAPE)
+    return ArgEscapeInfo(escape_bits)
 end
 
 const ARG_ALL_ESCAPE    = 0x01 << 0
@@ -558,9 +492,9 @@ const ARG_RETURN_ESCAPE = 0x01 << 1
 const ARG_THROWN_ESCAPE = 0x01 << 2
 
 has_no_escape(x::ArgEscapeInfo)     = !has_all_escape(x) && !has_return_escape(x) && !has_thrown_escape(x)
-has_all_escape(x::ArgEscapeInfo)    = x.EscapeBits & ARG_ALL_ESCAPE    ≠ 0
-has_return_escape(x::ArgEscapeInfo) = x.EscapeBits & ARG_RETURN_ESCAPE ≠ 0
-has_thrown_escape(x::ArgEscapeInfo) = x.EscapeBits & ARG_THROWN_ESCAPE ≠ 0
+has_all_escape(x::ArgEscapeInfo)    = x.escape_bits & ARG_ALL_ESCAPE    ≠ 0
+has_return_escape(x::ArgEscapeInfo) = x.escape_bits & ARG_RETURN_ESCAPE ≠ 0
+has_thrown_escape(x::ArgEscapeInfo) = x.escape_bits & ARG_THROWN_ESCAPE ≠ 0
 
 struct ArgAliasing
     aidx::Int
@@ -570,46 +504,22 @@ end
 struct ArgEscapeCache
     argescapes::Vector{ArgEscapeInfo}
     argaliases::Vector{ArgAliasing}
-end
-
-function ArgEscapeCache(estate::EscapeState)
-    nargs = estate.nargs
-    argescapes = Vector{ArgEscapeInfo}(undef, nargs)
-    argaliases = ArgAliasing[]
-    for i = 1:nargs
-        info = estate.escapes[i]
-        @assert info.AliasInfo === true
-        argescapes[i] = ArgEscapeInfo(info)
-        for j = (i+1):nargs
-            if isaliased(i, j, estate)
-                push!(argaliases, ArgAliasing(i, j))
+    function ArgEscapeCache(estate::EscapeState)
+        nargs = estate.nargs
+        argescapes = Vector{ArgEscapeInfo}(undef, nargs)
+        argaliases = ArgAliasing[]
+        for i = 1:nargs
+            info = estate.escapes[i]
+            @assert info.AliasInfo === true
+            argescapes[i] = ArgEscapeInfo(info)
+            for j = (i+1):nargs
+                if isaliased(i, j, estate)
+                    push!(argaliases, ArgAliasing(i, j))
+                end
             end
         end
+        return new(argescapes, argaliases)
     end
-    return ArgEscapeCache(argescapes, argaliases)
-end
-
-"""
-    is_ipo_profitable(ir::IRCode, nargs::Int) -> Bool
-
-Heuristically checks if there is any profitability to run the escape analysis on `ir`
-and generate IPO escape information cache. Specifically, this function examines
-if any call argument is "interesting" in terms of their escapability.
-"""
-function is_ipo_profitable(ir::IRCode, nargs::Int)
-    for i = 1:nargs
-        t = unwrap_unionall(widenconst(ir.argtypes[i]))
-        t <: IO && return false # bail out IO-related functions
-        is_ipo_profitable_type(t) && return true
-    end
-    return false
-end
-function is_ipo_profitable_type(@nospecialize t)
-    if isa(t, Union)
-        return is_ipo_profitable_type(t.a) && is_ipo_profitable_type(t.b)
-    end
-    (t === String || t === Symbol || t === Module || t === SimpleVector) && return false
-    return ismutabletype(t)
 end
 
 abstract type Change end
@@ -631,68 +541,47 @@ struct LivenessChange <: Change
 end
 const Changes = Vector{Change}
 
-struct AnalysisState{T<:Callable}
+struct AnalysisState{GetEscapeCache, Lattice<:AbstractLattice}
     ir::IRCode
     estate::EscapeState
     changes::Changes
-    get_escape_cache::T
-end
-
-function getinst(ir::IRCode, idx::Int)
-    nstmts = length(ir.stmts)
-    if idx ≤ nstmts
-        return ir.stmts[idx]
-    else
-        return ir.new_nodes.stmts[idx - nstmts]
-    end
+    𝕃ₒ::Lattice
+    get_escape_cache::GetEscapeCache
 end
 
 """
-    analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape_cache::Callable)
-        -> estate::EscapeState
+    analyze_escapes(ir::IRCode, nargs::Int, get_escape_cache) -> estate::EscapeState
 
 Analyzes escape information in `ir`:
 - `nargs`: the number of actual arguments of the analyzed call
-- `call_resolved`: if interprocedural calls are already resolved by `ssa_inlining_pass!`
-- `get_escape_cache(::Union{InferenceResult,MethodInstance}) -> Union{Nothing,ArgEscapeCache}`:
+- `get_escape_cache(::MethodInstance) -> Union{Bool,ArgEscapeCache}`:
   retrieves cached argument escape information
 """
-function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape_cache::T) where T<:Callable
+function analyze_escapes(ir::IRCode, nargs::Int, 𝕃ₒ::AbstractLattice, get_escape_cache)
     stmts = ir.stmts
     nstmts = length(stmts) + length(ir.new_nodes.stmts)
 
-    tryregions, arrayinfo, callinfo = compute_frameinfo(ir, call_resolved)
-    estate = EscapeState(nargs, nstmts, arrayinfo)
+    tryregions = compute_frameinfo(ir)
+    estate = EscapeState(nargs, nstmts)
     changes = Changes() # keeps changes that happen at current statement
-    astate = AnalysisState(ir, estate, changes, get_escape_cache)
+    astate = AnalysisState(ir, estate, changes, 𝕃ₒ, get_escape_cache)
 
     local debug_itr_counter = 0
     while true
         local anyupdate = false
 
         for pc in nstmts:-1:1
-            stmt = getinst(ir, pc)[:inst]
+            stmt = ir[SSAValue(pc)][:stmt]
 
             # collect escape information
             if isa(stmt, Expr)
                 head = stmt.head
                 if head === :call
-                    if callinfo !== nothing
-                        escape_call!(astate, pc, stmt.args, callinfo)
-                    else
-                        escape_call!(astate, pc, stmt.args)
-                    end
+                    escape_call!(astate, pc, stmt.args)
                 elseif head === :invoke
                     escape_invoke!(astate, pc, stmt.args)
                 elseif head === :new || head === :splatnew
                     escape_new!(astate, pc, stmt.args)
-                elseif head === :(=)
-                    lhs, rhs = stmt.args
-                    if isa(lhs, GlobalRef) # global store
-                        add_escape_change!(astate, rhs, ⊤)
-                    else
-                        unexpected_assignment!(ir, pc)
-                    end
                 elseif head === :foreigncall
                     escape_foreigncall!(astate, pc, stmt.args)
                 elseif head === :throw_undef_if_not # XXX when is this expression inserted ?
@@ -700,20 +589,25 @@ function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape
                 elseif is_meta_expr_head(head)
                     # meta expressions doesn't account for any usages
                     continue
-                elseif head === :enter || head === :leave || head === :the_exception || head === :pop_exception
+                elseif head === :leave || head === :the_exception || head === :pop_exception
                     # ignore these expressions since escapes via exceptions are handled by `escape_exception!`
                     # `escape_exception!` conservatively propagates `AllEscape` anyway,
                     # and so escape information imposed on `:the_exception` isn't computed
                     continue
+                elseif head === :gc_preserve_begin
+                    # GC preserve is handled by `escape_gc_preserve!`
+                elseif head === :gc_preserve_end
+                    escape_gc_preserve!(astate, pc, stmt.args)
                 elseif head === :static_parameter ||  # this exists statically, not interested in its escape
-                       head === :copyast ||           # XXX can this account for some escapes?
-                       head === :isdefined ||         # just returns `Bool`, nothing accounts for any escapes
-                       head === :gc_preserve_begin || # `GC.@preserve` expressions themselves won't be used anywhere
-                       head === :gc_preserve_end      # `GC.@preserve` expressions themselves won't be used anywhere
+                       head === :copyast ||           # XXX escape something?
+                       head === :isdefined            # just returns `Bool`, nothing accounts for any escapes
                     continue
                 else
                     add_conservative_changes!(astate, pc, stmt.args)
                 end
+            elseif isa(stmt, EnterNode)
+                # Handled via escape_exception!
+                continue
             elseif isa(stmt, ReturnNode)
                 if isdefined(stmt, :val)
                     add_escape_change!(astate, stmt.val, ReturnEscape(pc))
@@ -758,121 +652,29 @@ function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape
 end
 
 """
-    compute_frameinfo(ir::IRCode, call_resolved::Bool) -> (tryregions, arrayinfo, callinfo)
-
-A preparatory linear scan before the escape analysis on `ir` to find:
-- `tryregions::Union{Nothing,Vector{UnitRange{Int}}}`: regions in which potential `throw`s can be caught (used by `escape_exception!`)
-- `arrayinfo::Union{Nothing,IdDict{Int,Vector{Int}}}`: array allocations whose dimensions are known precisely (with some very simple local analysis)
-- `callinfo::`: when `!call_resolved`, `compute_frameinfo` additionally returns `callinfo::Vector{Union{MethodInstance,InferenceResult}}`,
-  which contains information about statically resolved callsites.
-  The inliner will use essentially equivalent interprocedural information to inline callees as well as resolve static callsites,
-  this additional information won't be required when analyzing post-inlining IR.
-
-!!! note
-    This array dimension analysis to compute `arrayinfo` is very local and doesn't account
-    for flow-sensitivity nor complex aliasing.
-    Ideally this dimension analysis should be done as a part of type inference that
-    propagates array dimensions in a flow sensitive way.
+    compute_frameinfo(ir::IRCode) -> tryregions
+
+A preparatory linear scan before the escape analysis on `ir` to find
+`tryregions::Union{Nothing,Vector{UnitRange{Int}}}`, that represent regions in which
+potential `throw`s can be caught (used by `escape_exception!`)
 """
-function compute_frameinfo(ir::IRCode, call_resolved::Bool)
+function compute_frameinfo(ir::IRCode)
     nstmts, nnewnodes = length(ir.stmts), length(ir.new_nodes.stmts)
-    tryregions, arrayinfo = nothing, nothing
-    if !call_resolved
-        callinfo = Vector{Any}(undef, nstmts+nnewnodes)
-    else
-        callinfo = nothing
-    end
+    tryregions = nothing
     for idx in 1:nstmts+nnewnodes
-        inst = getinst(ir, idx)
-        stmt = inst[:inst]
-        if !call_resolved
-            # TODO don't call `check_effect_free!` in the inlinear
-            check_effect_free!(ir, idx, stmt, inst[:type], 𝕃ₒ)
-        end
-        if callinfo !== nothing && isexpr(stmt, :call)
-            # TODO: pass effects here
-            callinfo[idx] = resolve_call(ir, stmt, inst[:info])
-        elseif isexpr(stmt, :enter)
-            @assert idx ≤ nstmts "try/catch inside new_nodes unsupported"
-            tryregions === nothing && (tryregions = UnitRange{Int}[])
-            leave_block = stmt.args[1]::Int
-            leave_pc = first(ir.cfg.blocks[leave_block].stmts)
-            push!(tryregions, idx:leave_pc)
-        elseif isexpr(stmt, :foreigncall)
-            args = stmt.args
-            name = args[1]
-            nn = normalize(name)
-            isa(nn, Symbol) || @goto next_stmt
-            ndims = alloc_array_ndims(nn)
-            ndims === nothing && @goto next_stmt
-            if ndims ≠ 0
-                length(args) ≥ ndims+6 || @goto next_stmt
-                dims = Int[]
-                for i in 1:ndims
-                    dim = argextype(args[i+6], ir)
-                    isa(dim, Const) || @goto next_stmt
-                    dim = dim.val
-                    isa(dim, Int) || @goto next_stmt
-                    push!(dims, dim)
-                end
-            else
-                length(args) ≥ 7 || @goto next_stmt
-                dims = argextype(args[7], ir)
-                if isa(dims, Const)
-                    dims = dims.val
-                    isa(dims, Tuple{Vararg{Int}}) || @goto next_stmt
-                    dims = collect(Int, dims)
-                else
-                    dims === Tuple{} || @goto next_stmt
-                    dims = Int[]
-                end
-            end
-            if arrayinfo === nothing
-                arrayinfo = ArrayInfo()
-            end
-            arrayinfo[idx] = dims
-        elseif arrayinfo !== nothing
-            # TODO this super limited alias analysis is able to handle only very simple cases
-            # this should be replaced with a proper forward dimension analysis
-            if isa(stmt, PhiNode)
-                values = stmt.values
-                local dims = nothing
-                for i = 1:length(values)
-                    if isassigned(values, i)
-                        val = values[i]
-                        if isa(val, SSAValue) && haskey(arrayinfo, val.id)
-                            if dims === nothing
-                                dims = arrayinfo[val.id]
-                                continue
-                            elseif dims == arrayinfo[val.id]
-                                continue
-                            end
-                        end
-                    end
-                    @goto next_stmt
-                end
-                if dims !== nothing
-                    arrayinfo[idx] = dims
-                end
-            elseif isa(stmt, PiNode)
-                if isdefined(stmt, :val)
-                    val = stmt.val
-                    if isa(val, SSAValue) && haskey(arrayinfo, val.id)
-                        arrayinfo[idx] = arrayinfo[val.id]
-                    end
-                end
+        inst = ir[SSAValue(idx)]
+        stmt = inst[:stmt]
+        if isa(stmt, EnterNode)
+            leave_block = stmt.catch_dest
+            if leave_block ≠ 0
+                @assert idx ≤ nstmts "try/catch inside new_nodes unsupported"
+                tryregions === nothing && (tryregions = UnitRange{Int}[])
+                leave_pc = first(ir.cfg.blocks[leave_block].stmts)
+                push!(tryregions, idx:leave_pc)
             end
         end
-        @label next_stmt
     end
-    return tryregions, arrayinfo, callinfo
-end
-
-# define resolve_call
-if _TOP_MOD === Core.Compiler
-    include("compiler/ssair/EscapeAnalysis/interprocedural.jl")
-else
-    include("interprocedural.jl")
+    return tryregions
 end
 
 # propagate changes, and check convergence
@@ -920,13 +722,13 @@ end
     return false
 end
 
-# propagate Liveness changes separately in order to avoid constructing too many LivenessSet
+# propagate Liveness changes separately in order to avoid constructing too many BitSet
 @inline function propagate_liveness_change!(estate::EscapeState, change::LivenessChange)
     (; xidx, livepc) = change
     info = estate.escapes[xidx]
     Liveness = info.Liveness
     Liveness === TOP_LIVENESS && return false
-    livepc in Liveness && return false
+    livepc ∈ Liveness && return false
     if Liveness === BOT_LIVENESS || Liveness === ARG_LIVENESS
         # if this Liveness is a constant, we shouldn't modify it and propagate this change as a new EscapeInfo
         Liveness = copy(Liveness)
@@ -958,7 +760,7 @@ function add_escape_change!(astate::AnalysisState, @nospecialize(x), xinfo::Esca
     xinfo === ⊥ && return nothing # performance optimization
     xidx = iridx(x, astate.estate)
     if xidx !== nothing
-        if force || !isbitstype(widenconst(argextype(x, astate.ir)))
+        if force || !is_identity_free_argtype(argextype(x, astate.ir))
             push!(astate.changes, EscapeChange(xidx, xinfo))
         end
     end
@@ -968,7 +770,7 @@ end
 function add_liveness_change!(astate::AnalysisState, @nospecialize(x), livepc::Int)
     xidx = iridx(x, astate.estate)
     if xidx !== nothing
-        if !isbitstype(widenconst(argextype(x, astate.ir)))
+        if !is_identity_free_argtype(argextype(x, astate.ir))
             push!(astate.changes, LivenessChange(xidx, livepc))
         end
     end
@@ -1074,25 +876,7 @@ function escape_unanalyzable_obj!(astate::AnalysisState, @nospecialize(obj), obj
     return objinfo
 end
 
-@noinline function unexpected_assignment!(ir::IRCode, pc::Int)
-    @eval Main (ir = $ir; pc = $pc)
-    error("unexpected assignment found: inspect `Main.pc` and `Main.pc`")
-end
-
-is_nothrow(ir::IRCode, pc::Int) = getinst(ir, pc)[:flag] & IR_FLAG_NOTHROW ≠ 0
-
-# NOTE if we don't maintain the alias set that is separated from the lattice state, we can do
-# something like below: it essentially incorporates forward escape propagation in our default
-# backward propagation, and leads to inefficient convergence that requires more iterations
-# # lhs = rhs: propagate escape information of `rhs` to `lhs`
-# function escape_alias!(astate::AnalysisState, @nospecialize(lhs), @nospecialize(rhs))
-#     if isa(rhs, SSAValue) || isa(rhs, Argument)
-#         vinfo = astate.estate[rhs]
-#     else
-#         return
-#     end
-#     add_escape_change!(astate, lhs, vinfo)
-# end
+is_nothrow(ir::IRCode, pc::Int) = has_flag(ir[SSAValue(pc)], IR_FLAG_NOTHROW)
 
 """
     escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{Int}})
@@ -1140,6 +924,7 @@ function escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{I
     # NOTE if `:the_exception` is the only way to access the exception, we can do:
     # exc = SSAValue(pc)
     # excinfo = estate[exc]
+    # TODO? set up a special effect bit that checks the existence of `rethrow` and `current_exceptions` and use it here
     excinfo = ⊤
     escapes = estate.escapes
     for i in 1:length(escapes)
@@ -1148,7 +933,7 @@ function escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{I
         xt === TOP_THROWN_ESCAPE && @goto propagate_exception_escape # fast pass
         for pc in xt
             for region in tryregions
-                pc in region && @goto propagate_exception_escape # early break because of AllEscape
+                pc ∈ region && @goto propagate_exception_escape # early break because of AllEscape
             end
         end
         continue
@@ -1159,25 +944,40 @@ function escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{I
 end
 
 # escape statically-resolved call, i.e. `Expr(:invoke, ::MethodInstance, ...)`
-escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    escape_invoke!(astate, pc, args, first(args)::MethodInstance, 2)
-
-function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any},
-    linfo::Linfo, first_idx::Int, last_idx::Int = length(args))
-    if isa(linfo, InferenceResult)
-        cache = astate.get_escape_cache(linfo)
-        linfo = linfo.linfo
+function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    codeinst = first(args)
+    if codeinst isa MethodInstance
+        mi = codeinst
     else
-        cache = astate.get_escape_cache(linfo)
-    end
-    if cache === nothing
-        return add_conservative_changes!(astate, pc, args, 2)
-    else
-        cache = cache::ArgEscapeCache
+        mi = (codeinst::CodeInstance).def
     end
+    first_idx, last_idx = 2, length(args)
+    add_liveness_changes!(astate, pc, args, first_idx, last_idx)
+    # TODO inspect `astate.ir.stmts[pc][:info]` and use const-prop'ed `InferenceResult` if available
+    cache = astate.get_escape_cache(codeinst)
     ret = SSAValue(pc)
+    if cache isa Bool
+        if cache
+            # This method call is very simple and has good effects, so there's no need to
+            # escape its arguments. However, since the arguments might be returned, we need
+            # to consider the possibility of aliasing between them and the return value.
+            for argidx = first_idx:last_idx
+                arg = args[argidx]
+                if arg isa GlobalRef
+                    continue # :effect_free guarantees that nothings escapes to the global scope
+                end
+                if !is_identity_free_argtype(argextype(arg, astate.ir))
+                    add_alias_change!(astate, ret, arg)
+                end
+            end
+            return nothing
+        else
+            return add_conservative_changes!(astate, pc, args, 2)
+        end
+    end
+    cache = cache::ArgEscapeCache
     retinfo = astate.estate[ret] # escape information imposed on the call statement
-    method = linfo.def::Method
+    method = mi.def::Method
     nargs = Int(method.nargs)
     for (i, argidx) in enumerate(first_idx:last_idx)
         arg = args[argidx]
@@ -1186,46 +986,40 @@ function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any},
             # COMBAK will this be invalid once we take alias information into account?
             i = nargs
         end
-        arginfo = cache.argescapes[i]
-        info = from_interprocedural(arginfo, pc)
-        if has_return_escape(arginfo)
-            # if this argument can be "returned", in addition to propagating
-            # the escape information imposed on this call argument within the callee,
-            # we should also account for possible aliasing of this argument and the returned value
-            add_escape_change!(astate, arg, info)
+        argescape = cache.argescapes[i]
+        info = from_interprocedural(argescape, pc)
+        # propagate the escape information imposed on this call argument by the callee
+        add_escape_change!(astate, arg, info)
+        if has_return_escape(argescape)
+            # if this argument can be "returned", we should also account for possible
+            # aliasing between this argument and the returned value
             add_alias_change!(astate, ret, arg)
-        else
-            # if this is simply passed as the call argument, we can just propagate
-            # the escape information imposed on this call argument within the callee
-            add_escape_change!(astate, arg, info)
         end
     end
     for (; aidx, bidx) in cache.argaliases
-        add_alias_change!(astate, args[aidx-(first_idx-1)], args[bidx-(first_idx-1)])
+        add_alias_change!(astate, args[aidx+(first_idx-1)], args[bidx+(first_idx-1)])
     end
     # we should disable the alias analysis on this newly introduced object
     add_escape_change!(astate, ret, EscapeInfo(retinfo, true))
 end
 
 """
-    from_interprocedural(arginfo::ArgEscapeInfo, pc::Int) -> x::EscapeInfo
+    from_interprocedural(argescape::ArgEscapeInfo, pc::Int) -> x::EscapeInfo
 
-Reinterprets the escape information imposed on the call argument which is cached as `arginfo`
+Reinterprets the escape information imposed on the call argument which is cached as `argescape`
 in the context of the caller frame, where `pc` is the SSA statement number of the return value.
 """
-function from_interprocedural(arginfo::ArgEscapeInfo, pc::Int)
-    has_all_escape(arginfo) && return ⊤
-
-    ThrownEscape = has_thrown_escape(arginfo) ? LivenessSet(pc) : BOT_THROWN_ESCAPE
-
-    return EscapeInfo(
-        #=Analyzed=#true, #=ReturnEscape=#false, ThrownEscape,
-        # FIXME implement interprocedural memory effect-analysis
-        # currently, this essentially disables the entire field analysis
-        # it might be okay from the SROA point of view, since we can't remove the allocation
-        # as far as it's passed to a callee anyway, but still we may want some field analysis
-        # for e.g. stack allocation or some other IPO optimizations
-        #=AliasInfo=#true, #=Liveness=#LivenessSet(pc))
+function from_interprocedural(argescape::ArgEscapeInfo, pc::Int)
+    has_all_escape(argescape) && return ⊤
+    ThrownEscape = has_thrown_escape(argescape) ? BitSet(pc) : BOT_THROWN_ESCAPE
+    # TODO implement interprocedural memory effect-analysis:
+    # currently, this essentially disables the entire field analysis–it might be okay from
+    # the SROA point of view, since we can't remove the allocation as far as it's passed to
+    # a callee anyway, but still we may want some field analysis for e.g. stack allocation
+    # or some other IPO optimizations
+    AliasInfo = true
+    Liveness = BitSet(pc)
+    return EscapeInfo(#=Analyzed=#true, #=ReturnEscape=#false, ThrownEscape, AliasInfo, Liveness)
 end
 
 # escape every argument `(args[6:length(args[3])])` and the name `args[1]`
@@ -1240,30 +1034,13 @@ function escape_foreigncall!(astate::AnalysisState, pc::Int, args::Vector{Any})
     argtypes = args[3]::SimpleVector
     nargs = length(argtypes)
     name = args[1]
-    nn = normalize(name)
-    if isa(nn, Symbol)
-        boundserror_ninds = array_resize_info(nn)
-        if boundserror_ninds !== nothing
-            boundserror, ninds = boundserror_ninds
-            escape_array_resize!(boundserror, ninds, astate, pc, args)
-            return
-        end
-        if is_array_copy(nn)
-            escape_array_copy!(astate, pc, args)
-            return
-        elseif is_array_isassigned(nn)
-            escape_array_isassigned!(astate, pc, args)
-            return
-        end
-        # if nn === :jl_gc_add_finalizer_th
-        #     # TODO add `FinalizerEscape` ?
-        # end
-    end
     # NOTE array allocations might have been proven as nothrow (https://github.com/JuliaLang/julia/pull/43565)
     nothrow = is_nothrow(astate.ir, pc)
     name_info = nothrow ? ⊥ : ThrownEscape(pc)
-    add_escape_change!(astate, name, name_info)
-    add_liveness_change!(astate, name, pc)
+    if !isexpr(name, :tuple)
+        add_escape_change!(astate, name, name_info)
+        add_liveness_change!(astate, name, pc)
+    end
     for i = 1:nargs
         # we should escape this argument if it is directly called,
         # otherwise just impose ThrownEscape if not nothrow
@@ -1282,79 +1059,57 @@ function escape_foreigncall!(astate::AnalysisState, pc::Int, args::Vector{Any})
     end
 end
 
-normalize(@nospecialize x) = isa(x, QuoteNode) ? x.value : x
-
-function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any}, callinfo::Vector{Any})
-    info = callinfo[pc]
-    if isa(info, Bool)
-        info && return # known to be no escape
-        # now cascade to the builtin handling
-        escape_call!(astate, pc, args)
-        return
-    elseif isa(info, EACallInfo)
-        for linfo in info.linfos
-            escape_invoke!(astate, pc, args, linfo, 1)
-        end
-        # accounts for a potential escape via MethodError
-        info.nothrow || add_thrown_escapes!(astate, pc, args)
-        return
-    else
-        @assert info === missing
-        # if this call couldn't be analyzed, escape it conservatively
-        add_conservative_changes!(astate, pc, args)
-    end
+function escape_gc_preserve!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    @assert length(args) == 1 "invalid :gc_preserve_end"
+    val = args[1]
+    @assert val isa SSAValue "invalid :gc_preserve_end"
+    beginstmt = astate.ir[val][:stmt]
+    @assert isexpr(beginstmt, :gc_preserve_begin) "invalid :gc_preserve_end"
+    beginargs = beginstmt.args
+    # COMBAK we might need to add liveness for all statements from `:gc_preserve_begin` to `:gc_preserve_end`
+    add_liveness_changes!(astate, pc, beginargs)
 end
 
 function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
-    ir = astate.ir
-    ft = argextype(first(args), ir, ir.sptypes, ir.argtypes)
+    ft = argextype(first(args), astate.ir)
     f = singleton_type(ft)
-    if isa(f, Core.IntrinsicFunction)
-        # XXX somehow `:call` expression can creep in here, ideally we should be able to do:
-        # argtypes = Any[argextype(args[i], astate.ir) for i = 2:length(args)]
-        argtypes = Any[]
-        for i = 2:length(args)
-            arg = args[i]
-            push!(argtypes, isexpr(arg, :call) ? Any : argextype(arg, ir))
-        end
-        if intrinsic_nothrow(f, argtypes)
+    if f isa IntrinsicFunction
+        if is_nothrow(astate.ir, pc)
             add_liveness_changes!(astate, pc, args, 2)
         else
             add_fallback_changes!(astate, pc, args, 2)
         end
-        return # TODO accounts for pointer operations?
-    end
-    result = escape_builtin!(f, astate, pc, args)
-    if result === missing
-        # if this call hasn't been handled by any of pre-defined handlers, escape it conservatively
-        add_conservative_changes!(astate, pc, args)
-        return
-    elseif result === true
-        add_liveness_changes!(astate, pc, args, 2)
-        return # ThrownEscape is already checked
-    else
-        # we escape statements with the `ThrownEscape` property using the effect-freeness
-        # computed by `stmt_effect_flags` invoked within inlining
-        # TODO throwness ≠ "effect-free-ness"
-        if is_nothrow(astate.ir, pc)
+        # TODO needs to account for pointer operations?
+    elseif f isa Builtin
+        result = escape_builtin!(f, astate, pc, args)
+        if result === missing
+            # if this call hasn't been handled by any of pre-defined handlers, escape it conservatively
+            add_conservative_changes!(astate, pc, args)
+        elseif result === true
+            add_liveness_changes!(astate, pc, args, 2)
+        elseif is_nothrow(astate.ir, pc)
             add_liveness_changes!(astate, pc, args, 2)
         else
             add_fallback_changes!(astate, pc, args, 2)
         end
-        return
+    else
+        # escape this generic function or unknown function call conservatively
+        add_conservative_changes!(astate, pc, args)
     end
 end
 
-escape_builtin!(@nospecialize(f), _...) = return missing
+escape_builtin!(@nospecialize(f), _...) = missing
 
 # safe builtins
-escape_builtin!(::typeof(isa), _...) = return false
-escape_builtin!(::typeof(typeof), _...) = return false
-escape_builtin!(::typeof(sizeof), _...) = return false
-escape_builtin!(::typeof(===), _...) = return false
+escape_builtin!(::typeof(isa), _...) = false
+escape_builtin!(::typeof(typeof), _...) = false
+escape_builtin!(::typeof(sizeof), _...) = false
+escape_builtin!(::typeof(===), _...) = false
+escape_builtin!(::typeof(Core.donotdelete), _...) = false
 # not really safe, but `ThrownEscape` will be imposed later
-escape_builtin!(::typeof(isdefined), _...) = return false
-escape_builtin!(::typeof(throw), _...) = return false
+escape_builtin!(::typeof(isdefined), _...) = false
+escape_builtin!(::typeof(throw), _...) = false
+escape_builtin!(::typeof(Core.throw_methoderror), _...) = false
 
 function escape_builtin!(::typeof(ifelse), astate::AnalysisState, pc::Int, args::Vector{Any})
     length(args) == 4 || return false
@@ -1400,6 +1155,7 @@ function escape_new!(astate::AnalysisState, pc::Int, args::Vector{Any})
             @goto escape_indexable_def
         end
     elseif isa(AliasInfo, IndexableFields)
+        AliasInfo = copy(AliasInfo)
         @label escape_indexable_def
         # fields are known precisely: propagate escape information imposed on recorded possibilities to the exact field values
         infos = AliasInfo.infos
@@ -1416,6 +1172,7 @@ function escape_new!(astate::AnalysisState, pc::Int, args::Vector{Any})
         end
         add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
     elseif isa(AliasInfo, Unindexable)
+        AliasInfo = copy(AliasInfo)
         @label escape_unindexable_def
         # fields are known partially: propagate escape information imposed on recorded possibilities to all fields values
         info = AliasInfo.info
@@ -1469,7 +1226,7 @@ function analyze_fields(ir::IRCode, @nospecialize(typ), @nospecialize(fld))
     return IndexableFields(nflds), fidx
 end
 
-function reanalyze_fields(ir::IRCode, AliasInfo::IndexableFields, @nospecialize(typ), @nospecialize(fld))
+function reanalyze_fields(AliasInfo::IndexableFields, ir::IRCode, @nospecialize(typ), @nospecialize(fld))
     nflds = fieldcount_noerror(typ)
     if nflds === nothing
         return merge_to_unindexable(AliasInfo), 0
@@ -1483,6 +1240,7 @@ function reanalyze_fields(ir::IRCode, AliasInfo::IndexableFields, @nospecialize(
     if fidx === nothing
         return merge_to_unindexable(AliasInfo), 0
     end
+    AliasInfo = copy(AliasInfo)
     infos = AliasInfo.infos
     ninfos = length(infos)
     if nflds > ninfos
@@ -1519,12 +1277,13 @@ function escape_builtin!(::typeof(getfield), astate::AnalysisState, pc::Int, arg
             @goto record_unindexable_use
         end
     elseif isa(AliasInfo, IndexableFields)
-        AliasInfo, fidx = reanalyze_fields(ir, AliasInfo, typ, args[3])
+        AliasInfo, fidx = reanalyze_fields(AliasInfo, ir, typ, args[3])
         isa(AliasInfo, Unindexable) && @goto record_unindexable_use
         @label record_indexable_use
         push!(AliasInfo.infos[fidx], LocalUse(pc))
         add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
     elseif isa(AliasInfo, Unindexable)
+        AliasInfo = copy(AliasInfo)
         @label record_unindexable_use
         push!(AliasInfo.info, LocalUse(pc))
         add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
@@ -1565,7 +1324,7 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
         end
     elseif isa(AliasInfo, IndexableFields)
         typ = widenconst(argextype(obj, ir))
-        AliasInfo, fidx = reanalyze_fields(ir, AliasInfo, typ, args[3])
+        AliasInfo, fidx = reanalyze_fields(AliasInfo, ir, typ, args[3])
         isa(AliasInfo, Unindexable) && @goto escape_unindexable_def
         @label escape_indexable_def
         add_alias_escapes!(astate, val, AliasInfo.infos[fidx])
@@ -1575,7 +1334,7 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
         # propagate the escape information of this object ignoring field information
         add_escape_change!(astate, val, ignore_aliasinfo(objinfo))
     elseif isa(AliasInfo, Unindexable)
-        info = AliasInfo.info
+        AliasInfo = copy(AliasInfo)
         @label escape_unindexable_def
         add_alias_escapes!(astate, val, AliasInfo.info)
         push!(AliasInfo.info, LocalDef(pc))
@@ -1597,10 +1356,10 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
     add_escape_change!(astate, val, ssainfo)
     # compute the throwness of this setfield! call here since builtin_nothrow doesn't account for that
     @label add_thrown_escapes
-    if length(args) == 4 && setfield!_nothrow(𝕃ₒ,
+    if length(args) == 4 && setfield!_nothrow(astate.𝕃ₒ,
         argextype(args[2], ir), argextype(args[3], ir), argextype(args[4], ir))
         return true
-    elseif length(args) == 3 && setfield!_nothrow(𝕃ₒ,
+    elseif length(args) == 3 && setfield!_nothrow(astate.𝕃ₒ,
         argextype(args[2], ir), argextype(args[3], ir))
         return true
     else
@@ -1609,314 +1368,12 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
     end
 end
 
-function escape_builtin!(::typeof(arrayref), astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 4 || return false
-    # check potential thrown escapes from this arrayref call
-    argtypes = Any[argextype(args[i], astate.ir) for i in 2:length(args)]
-    boundcheckt = argtypes[1]
-    aryt = argtypes[2]
-    if !array_builtin_common_typecheck(boundcheckt, aryt, argtypes, 3)
-        add_thrown_escapes!(astate, pc, args, 2)
-    end
-    ary = args[3]
-    inbounds = isa(boundcheckt, Const) && !boundcheckt.val::Bool
-    inbounds || add_escape_change!(astate, ary, ThrownEscape(pc))
-    # we don't track precise index information about this array and thus don't know what values
-    # can be referenced here: directly propagate the escape information imposed on the return
-    # value of this `arrayref` call to the array itself as the most conservative propagation
-    # but also with updated index information
-    estate = astate.estate
-    if isa(ary, SSAValue) || isa(ary, Argument)
-        aryinfo = estate[ary]
-    else
-        # unanalyzable object, so the return value is also unanalyzable
-        add_escape_change!(astate, SSAValue(pc), ⊤)
-        return true
-    end
-    AliasInfo = aryinfo.AliasInfo
-    if isa(AliasInfo, Bool)
-        AliasInfo && @goto conservative_propagation
-        # AliasInfo of this array hasn't been analyzed yet: set AliasInfo now
-        idx = array_nd_index(astate, ary, args[4:end])
-        if isa(idx, Int)
-            AliasInfo = IndexableElements(IdDict{Int,AInfo}())
-            @goto record_indexable_use
-        end
-        AliasInfo = Unindexable()
-        @goto record_unindexable_use
-    elseif isa(AliasInfo, IndexableElements)
-        idx = array_nd_index(astate, ary, args[4:end])
-        if !isa(idx, Int)
-            AliasInfo = merge_to_unindexable(AliasInfo)
-            @goto record_unindexable_use
-        end
-        @label record_indexable_use
-        info = get!(()->AInfo(), AliasInfo.infos, idx)
-        push!(info, LocalUse(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-    elseif isa(AliasInfo, Unindexable)
-        @label record_unindexable_use
-        push!(AliasInfo.info, LocalUse(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-    else
-        # this object has been used as struct, but it is used as array here (thus should throw)
-        # update ary's element information and just handle this case conservatively
-        aryinfo = escape_unanalyzable_obj!(astate, ary, aryinfo)
-        @label conservative_propagation
-        # at the extreme case, an element of `ary` may point to `ary` itself
-        # so add the alias change here as the most conservative propagation
-        add_alias_change!(astate, ary, SSAValue(pc))
-    end
-    return true
-end
-
-function escape_builtin!(::typeof(arrayset), astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 5 || return false
-    # check potential escapes from this arrayset call
-    # NOTE here we essentially only need to account for TypeError, assuming that
-    # UndefRefError or BoundsError don't capture any of the arguments here
-    argtypes = Any[argextype(args[i], astate.ir) for i in 2:length(args)]
-    boundcheckt = argtypes[1]
-    aryt = argtypes[2]
-    valt = argtypes[3]
-    if !(array_builtin_common_typecheck(boundcheckt, aryt, argtypes, 4) &&
-         arrayset_typecheck(aryt, valt))
-        add_thrown_escapes!(astate, pc, args, 2)
-    end
-    ary = args[3]
-    val = args[4]
-    inbounds = isa(boundcheckt, Const) && !boundcheckt.val::Bool
-    inbounds || add_escape_change!(astate, ary, ThrownEscape(pc))
-    # we don't track precise index information about this array and won't record what value
-    # is being assigned here: directly propagate the escape information of this array to
-    # the value being assigned as the most conservative propagation
-    estate = astate.estate
-    if isa(ary, SSAValue) || isa(ary, Argument)
-        aryinfo = estate[ary]
-    else
-        # unanalyzable object (e.g. obj::GlobalRef): escape field value conservatively
-        add_escape_change!(astate, val, ⊤)
-        return true
-    end
-    AliasInfo = aryinfo.AliasInfo
-    if isa(AliasInfo, Bool)
-        AliasInfo && @goto conservative_propagation
-        # AliasInfo of this array hasn't been analyzed yet: set AliasInfo now
-        idx = array_nd_index(astate, ary, args[5:end])
-        if isa(idx, Int)
-            AliasInfo = IndexableElements(IdDict{Int,AInfo}())
-            @goto escape_indexable_def
-        end
-        AliasInfo = Unindexable()
-        @goto escape_unindexable_def
-    elseif isa(AliasInfo, IndexableElements)
-        idx = array_nd_index(astate, ary, args[5:end])
-        if !isa(idx, Int)
-            AliasInfo = merge_to_unindexable(AliasInfo)
-            @goto escape_unindexable_def
-        end
-        @label escape_indexable_def
-        info = get!(()->AInfo(), AliasInfo.infos, idx)
-        add_alias_escapes!(astate, val, info)
-        push!(info, LocalDef(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-        # propagate the escape information of this array ignoring elements information
-        add_escape_change!(astate, val, ignore_aliasinfo(aryinfo))
-    elseif isa(AliasInfo, Unindexable)
-        @label escape_unindexable_def
-        add_alias_escapes!(astate, val, AliasInfo.info)
-        push!(AliasInfo.info, LocalDef(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-        # propagate the escape information of this array ignoring elements information
-        add_escape_change!(astate, val, ignore_aliasinfo(aryinfo))
-    else
-        # this object has been used as struct, but it is used as array here (thus should throw)
-        # update ary's element information and just handle this case conservatively
-        aryinfo = escape_unanalyzable_obj!(astate, ary, aryinfo)
-        @label conservative_propagation
-        add_alias_change!(astate, val, ary)
-    end
-    # also propagate escape information imposed on the return value of this `arrayset`
-    ssainfo = estate[SSAValue(pc)]
-    add_escape_change!(astate, ary, ssainfo)
-    return true
-end
-
-# NOTE this function models and thus should be synced with the implementation of:
-# size_t array_nd_index(jl_array_t *a, jl_value_t **args, size_t nidxs, ...)
-function array_nd_index(astate::AnalysisState, @nospecialize(ary), args::Vector{Any}, nidxs::Int = length(args))
-    isa(ary, SSAValue) || return nothing
-    aryid = ary.id
-    arrayinfo = astate.estate.arrayinfo
-    isa(arrayinfo, ArrayInfo) || return nothing
-    haskey(arrayinfo, aryid) || return nothing
-    dims = arrayinfo[aryid]
-    local i = 0
-    local k, stride = 0, 1
-    local nd = length(dims)
-    while k < nidxs
-        arg = args[k+1]
-        argval = argextype(arg, astate.ir)
-        isa(argval, Const) || return nothing
-        argval = argval.val
-        isa(argval, Int) || return nothing
-        ii = argval - 1
-        i += ii * stride
-        d = k ≥ nd ? 1 : dims[k+1]
-        k < nidxs - 1 && ii ≥ d && return nothing # BoundsError
-        stride *= d
-        k += 1
-    end
-    while k < nd
-        stride *= dims[k+1]
-        k += 1
-    end
-    i ≥ stride && return nothing # BoundsError
-    return i
-end
-
-function escape_builtin!(::typeof(arraysize), astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) == 3 || return false
-    ary = args[2]
-    dim = args[3]
-    if !arraysize_typecheck(ary, dim, astate.ir)
-        add_escape_change!(astate, ary, ThrownEscape(pc))
-        add_escape_change!(astate, dim, ThrownEscape(pc))
-    end
-    # NOTE we may still see "arraysize: dimension out of range", but it doesn't capture anything
-    return true
-end
-
-function arraysize_typecheck(@nospecialize(ary), @nospecialize(dim), ir::IRCode)
-    aryt = argextype(ary, ir)
-    aryt ⊑ Array || return false
-    dimt = argextype(dim, ir)
-    dimt ⊑ Int || return false
-    return true
-end
-
-# returns nothing if this isn't array resizing operation,
-# otherwise returns true if it can throw BoundsError and false if not
-function array_resize_info(name::Symbol)
-    if name === :jl_array_grow_beg || name === :jl_array_grow_end
-        return false, 1
-    elseif name === :jl_array_del_beg || name === :jl_array_del_end
-        return true, 1
-    elseif name === :jl_array_grow_at || name === :jl_array_del_at
-        return true, 2
-    else
-        return nothing
-    end
-end
-
-# NOTE may potentially throw "cannot resize array with shared data" error,
-# but just ignore it since it doesn't capture anything
-function escape_array_resize!(boundserror::Bool, ninds::Int,
-    astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 6+ninds || return add_fallback_changes!(astate, pc, args)
-    ary = args[6]
-    aryt = argextype(ary, astate.ir)
-    aryt ⊑ Array || return add_fallback_changes!(astate, pc, args)
-    for i in 1:ninds
-        ind = args[i+6]
-        indt = argextype(ind, astate.ir)
-        indt ⊑ Integer || return add_fallback_changes!(astate, pc, args)
-    end
-    if boundserror
-        # this array resizing can potentially throw `BoundsError`, impose it now
-        add_escape_change!(astate, ary, ThrownEscape(pc))
-    end
-    # give up indexing analysis whenever we see array resizing
-    # (since we track array dimensions only globally)
-    mark_unindexable!(astate, ary)
-    add_liveness_changes!(astate, pc, args, 6)
-end
-
-function mark_unindexable!(astate::AnalysisState, @nospecialize(ary))
-    isa(ary, SSAValue) || return
-    aryinfo = astate.estate[ary]
-    AliasInfo = aryinfo.AliasInfo
-    isa(AliasInfo, IndexableElements) || return
-    AliasInfo = merge_to_unindexable(AliasInfo)
-    add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo))
-end
-
-is_array_copy(name::Symbol) = name === :jl_array_copy
-
-# FIXME this implementation is very conservative, improve the accuracy and solve broken test cases
-function escape_array_copy!(astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 6 || return add_fallback_changes!(astate, pc, args)
-    ary = args[6]
-    aryt = argextype(ary, astate.ir)
-    aryt ⊑ Array || return add_fallback_changes!(astate, pc, args)
-    if isa(ary, SSAValue) || isa(ary, Argument)
-        newary = SSAValue(pc)
-        aryinfo = astate.estate[ary]
-        newaryinfo = astate.estate[newary]
-        add_escape_change!(astate, newary, aryinfo)
-        add_escape_change!(astate, ary, newaryinfo)
-    end
-    add_liveness_changes!(astate, pc, args, 6)
-end
-
-is_array_isassigned(name::Symbol) = name === :jl_array_isassigned
-
-function escape_array_isassigned!(astate::AnalysisState, pc::Int, args::Vector{Any})
-    if !array_isassigned_nothrow(args, astate.ir)
-        add_thrown_escapes!(astate, pc, args)
+function escape_builtin!(::typeof(Core.finalizer), astate::AnalysisState, pc::Int, args::Vector{Any})
+    if length(args) ≥ 3
+        obj = args[3]
+        add_liveness_change!(astate, obj, pc) # TODO setup a proper FinalizerEscape?
     end
-    add_liveness_changes!(astate, pc, args, 6)
-end
-
-function array_isassigned_nothrow(args::Vector{Any}, src::IRCode)
-    # if !validate_foreigncall_args(args,
-    #     :jl_array_isassigned, Cint, svec(Any,Csize_t), 0, :ccall)
-    #     return false
-    # end
-    length(args) ≥ 7 || return false
-    arytype = argextype(args[6], src)
-    arytype ⊑ Array || return false
-    idxtype = argextype(args[7], src)
-    idxtype ⊑ Csize_t || return false
-    return true
-end
-
-# # COMBAK do we want to enable this (and also backport this to Base for array allocations?)
-# import Core.Compiler: Cint, svec
-# function validate_foreigncall_args(args::Vector{Any},
-#     name::Symbol, @nospecialize(rt), argtypes::SimpleVector, nreq::Int, convention::Symbol)
-#     length(args) ≥ 5 || return false
-#     normalize(args[1]) === name || return false
-#     args[2] === rt || return false
-#     args[3] === argtypes || return false
-#     args[4] === vararg || return false
-#     normalize(args[5]) === convention || return false
-#     return true
-# end
-
-if isdefined(Core, :ImmutableArray)
-
-import Core: ImmutableArray, arrayfreeze, mutating_arrayfreeze, arraythaw
-
-escape_builtin!(::typeof(arrayfreeze), astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    is_safe_immutable_array_op(Array, astate, args)
-escape_builtin!(::typeof(mutating_arrayfreeze), astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    is_safe_immutable_array_op(Array, astate, args)
-escape_builtin!(::typeof(arraythaw), astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    is_safe_immutable_array_op(ImmutableArray, astate, args)
-function is_safe_immutable_array_op(@nospecialize(arytype), astate::AnalysisState, args::Vector{Any})
-    length(args) == 2 || return false
-    argextype(args[2], astate.ir) ⊑ arytype || return false
-    return true
-end
-
-end # if isdefined(Core, :ImmutableArray)
-
-if _TOP_MOD !== Core.Compiler
-    # NOTE define fancy package utilities when developing EA as an external package
-    include("EAUtils.jl")
-    using .EAUtils
-    export code_escapes, @code_escapes, __clear_cache!
+    return false
 end
 
 end # baremodule EscapeAnalysis
diff --git a/base/compiler/ssair/basicblock.jl b/Compiler/src/ssair/basicblock.jl
similarity index 100%
rename from base/compiler/ssair/basicblock.jl
rename to Compiler/src/ssair/basicblock.jl
diff --git a/base/compiler/ssair/EscapeAnalysis/disjoint_set.jl b/Compiler/src/ssair/disjoint_set.jl
similarity index 91%
rename from base/compiler/ssair/EscapeAnalysis/disjoint_set.jl
rename to Compiler/src/ssair/disjoint_set.jl
index 915bc214d7c3c..e000d7e8a582f 100644
--- a/base/compiler/ssair/EscapeAnalysis/disjoint_set.jl
+++ b/Compiler/src/ssair/disjoint_set.jl
@@ -3,14 +3,9 @@
 # under the MIT license: https://github.com/JuliaCollections/DataStructures.jl/blob/master/License.md
 
 # imports
-import ._TOP_MOD:
-    length,
-    eltype,
-    union!,
-    push!
+import Base: length, eltype, union!, push!
 # usings
-import ._TOP_MOD:
-    OneTo, collect, zero, zeros, one, typemax
+using Base: OneTo, collect, zero, zeros, one, typemax
 
 # Disjoint-Set
 
@@ -27,7 +22,8 @@ import ._TOP_MOD:
 #
 ############################################################
 
-_intdisjointset_bounds_err_msg(T) = "the maximum number of elements in IntDisjointSet{$T} is $(typemax(T))"
+_intdisjointset_bounds_err_msg(@nospecialize T) =
+    "the maximum number of elements in IntDisjointSet{$T} is $(typemax(T))"
 
 """
     IntDisjointSet{T<:Integer}(n::Integer)
@@ -59,7 +55,7 @@ eltype(::Type{IntDisjointSet{T}}) where {T<:Integer} = T
 # path compression is implemented here
 function find_root_impl!(parents::Vector{T}, x::Integer) where {T<:Integer}
     p = parents[x]
-    @inbounds if parents[p] != p
+    @inbounds if parents[p] ≠ p
         parents[x] = p = _find_root_impl!(parents, p)
     end
     return p
@@ -68,7 +64,7 @@ end
 # unsafe version of the above
 function _find_root_impl!(parents::Vector{T}, x::Integer) where {T<:Integer}
     @inbounds p = parents[x]
-    @inbounds if parents[p] != p
+    @inbounds if parents[p] ≠ p
         parents[x] = p = _find_root_impl!(parents, p)
     end
     return p
@@ -99,7 +95,7 @@ function union!(s::IntDisjointSet{T}, x::T, y::T) where {T<:Integer}
     parents = s.parents
     xroot = find_root_impl!(parents, x)
     yroot = find_root_impl!(parents, y)
-    return xroot != yroot ? root_union!(s, xroot, yroot) : xroot
+    return xroot ≠ yroot ? root_union!(s, xroot, yroot) : xroot
 end
 
 """
diff --git a/base/compiler/ssair/domtree.jl b/Compiler/src/ssair/domtree.jl
similarity index 96%
rename from base/compiler/ssair/domtree.jl
rename to Compiler/src/ssair/domtree.jl
index 1edb8d2d5c6d4..480d38f2bb218 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/Compiler/src/ssair/domtree.jl
@@ -82,6 +82,8 @@ struct DFSTree
     # (preorder number -> preorder number)
     # Storing it this way saves a few lookups in the snca_compress! algorithm
     to_parent_pre::Vector{PreNumber}
+
+    _worklist::Vector{Tuple{BBNumber, PreNumber, Bool}}
 end
 
 function DFSTree(n_blocks::Int)
@@ -89,14 +91,16 @@ function DFSTree(n_blocks::Int)
                    Vector{BBNumber}(undef, n_blocks),
                    zeros(PostNumber, n_blocks),
                    Vector{BBNumber}(undef, n_blocks),
-                   zeros(PreNumber, n_blocks))
+                   zeros(PreNumber, n_blocks),
+                   Vector{Tuple{BBNumber, PreNumber, Bool}}())
 end
 
 copy(D::DFSTree) = DFSTree(copy(D.to_pre),
                            copy(D.from_pre),
                            copy(D.to_post),
                            copy(D.from_post),
-                           copy(D.to_parent_pre))
+                           copy(D.to_parent_pre),
+                           copy(D._worklist))
 
 function copy!(dst::DFSTree, src::DFSTree)
     copy!(dst.to_pre, src.to_pre)
@@ -106,17 +110,26 @@ function copy!(dst::DFSTree, src::DFSTree)
     copy!(dst.to_parent_pre, src.to_parent_pre)
     return dst
 end
+function resize!(D::DFSTree, n::Integer)
+    resize!(D.to_pre, n)
+    resize!(D.from_pre, n)
+    resize!(D.to_post, n)
+    resize!(D.from_post, n)
+    resize!(D.to_parent_pre, n)
+end
 
 length(D::DFSTree) = length(D.from_pre)
 
 function DFS!(D::DFSTree, blocks::Vector{BasicBlock}, is_post_dominator::Bool)
-    copy!(D, DFSTree(length(blocks)))
+    resize!(D, length(blocks))
+    fill!(D.to_pre, 0)
+    to_visit = D._worklist # always starts empty
     if is_post_dominator
         # TODO: We're using -1 as the virtual exit node here. Would it make
         #       sense to actually have a real BB for the exit always?
-        to_visit = Tuple{BBNumber, PreNumber, Bool}[(-1, 0, false)]
+        push!(to_visit, (-1, 0, false))
     else
-        to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
+        push!(to_visit, (1, 0, false))
     end
     pre_num = is_post_dominator ? 0 : 1
     post_num = 1
@@ -189,7 +202,7 @@ DFS(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false) = DFS!(DFSTree(0)
 """
 Keeps the per-BB state of the Semi NCA algorithm. In the original formulation,
 there are three separate length `n` arrays, `label`, `semi` and `ancestor`.
-Instead, for efficiency, we use one array in a array-of-structs style setup.
+Instead, for efficiency, we use one array in an array-of-structs style setup.
 """
 struct SNCAData
     semi::PreNumber
@@ -332,10 +345,7 @@ function SNCA!(domtree::GenericDomTree{IsPostDom}, blocks::Vector{BasicBlock}, m
     ancestors = copy(D.to_parent_pre)
     relevant_blocks = IsPostDom ? (1:max_pre) : (2:max_pre)
     for w::PreNumber in reverse(relevant_blocks)
-        # LLVM initializes this to the parent, the paper initializes this to
-        # `w`, but it doesn't really matter (the parent is a predecessor, so at
-        # worst we'll discover it below). Save a memory reference here.
-        semi_w = typemax(PreNumber)
+        semi_w = ancestors[w]
         last_linked = PreNumber(w + 1)
         for v ∈ dom_edges(domtree, blocks, D.from_pre[w])
             # For the purpose of the domtree, ignore virtual predecessors into
@@ -585,9 +595,9 @@ function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber})
 end
 
 """
-    dominates(domtree::DomTree, bb1::Int, bb2::Int) -> Bool
+    dominates(domtree::DomTree, bb1::Int, bb2::Int)::Bool
 
-Checks if `bb1` dominates `bb2`.
+Check if `bb1` dominates `bb2`.
 `bb1` and `bb2` are indexes into the `CFG` blocks.
 `bb1` dominates `bb2` if the only way to enter `bb2` is via `bb1`.
 (Other blocks may be in between, e.g `bb1->bbx->bb2`).
@@ -596,9 +606,9 @@ dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) =
     _dominates(domtree, bb1, bb2)
 
 """
-    postdominates(domtree::DomTree, bb1::Int, bb2::Int) -> Bool
+    postdominates(domtree::PostDomTree, bb1::Int, bb2::Int)::Bool
 
-Checks if `bb1` post-dominates `bb2`.
+Check if `bb1` post-dominates `bb2`.
 `bb1` and `bb2` are indexes into the `CFG` blocks.
 `bb1` post-dominates `bb2` if every pass from `bb2` to the exit is via `bb1`.
 (Other blocks may be in between, e.g `bb2->bbx->bb1->exit`).
@@ -647,6 +657,8 @@ end
 Compute the nearest common (post-)dominator of `a` and `b`.
 """
 function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
+    a == 0 && return a
+    b == 0 && return b
     alevel = domtree.nodes[a].level
     blevel = domtree.nodes[b].level
     # W.l.g. assume blevel <= alevel
diff --git a/base/compiler/ssair/heap.jl b/Compiler/src/ssair/heap.jl
similarity index 99%
rename from base/compiler/ssair/heap.jl
rename to Compiler/src/ssair/heap.jl
index 6e9883bc4ec60..1afb4eb5b2ffc 100644
--- a/base/compiler/ssair/heap.jl
+++ b/Compiler/src/ssair/heap.jl
@@ -3,13 +3,11 @@
 # Heap operations on flat vectors
 # -------------------------------
 
-
 # Binary heap indexing
 heapleft(i::Integer) = 2i
 heapright(i::Integer) = 2i + 1
 heapparent(i::Integer) = div(i, 2)
 
-
 # Binary min-heap percolate down.
 function percolate_down!(xs::Vector, i::Integer, x, o::Ordering, len::Integer=length(xs))
     @inbounds while (l = heapleft(i)) <= len
@@ -60,7 +58,6 @@ function heappush!(xs::Vector, x, o::Ordering)
     return xs
 end
 
-
 """
     heapify!(v, ord::Ordering)
 
diff --git a/base/compiler/ssair/inlining.jl b/Compiler/src/ssair/inlining.jl
similarity index 61%
rename from base/compiler/ssair/inlining.jl
rename to Compiler/src/ssair/inlining.jl
index add390f7d454f..9d477cbff491c 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/Compiler/src/ssair/inlining.jl
@@ -12,19 +12,24 @@ struct InliningTodo
     mi::MethodInstance
     # The IR of the inlinee
     ir::IRCode
+    # The SpecInfo for the inlinee
+    spec_info::SpecInfo
+    # The DebugInfo table for the inlinee
+    di::DebugInfo
     # If the function being inlined is a single basic block we can use a
     # simpler inlining algorithm. This flag determines whether that's allowed
     linear_inline_eligible::Bool
     # Effects of the call statement
     effects::Effects
 end
-function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects)
-    return InliningTodo(mi, ir, linear_inline_eligible(ir), effects)
+function InliningTodo(mi::MethodInstance, ir::IRCode, spec_info::SpecInfo, di::DebugInfo, effects::Effects)
+    return InliningTodo(mi, ir, spec_info, di, linear_inline_eligible(ir), effects)
 end
 
 struct ConstantCase
     val::Any
-    ConstantCase(@nospecialize val) = new(val)
+    edge::CodeInstance
+    ConstantCase(@nospecialize(val), edge::CodeInstance) = new(val, edge)
 end
 
 struct SomeCase
@@ -33,7 +38,7 @@ struct SomeCase
 end
 
 struct InvokeCase
-    invoke::MethodInstance
+    invoke::Union{CodeInstance,MethodInstance}
     effects::Effects
     info::CallInfo
 end
@@ -48,37 +53,30 @@ struct InliningCase
 end
 
 struct UnionSplit
-    fully_covered::Bool
+    handled_all_cases::Bool # All possible dispatches are included in the cases
+    fully_covered::Bool # All handled cases are fully covering
     atype::DataType
     cases::Vector{InliningCase}
     bbs::Vector{Int}
-    UnionSplit(fully_covered::Bool, atype::DataType, cases::Vector{InliningCase}) =
-        new(fully_covered, atype, cases, Int[])
+    UnionSplit(handled_all_cases::Bool, fully_covered::Bool, atype::DataType, cases::Vector{InliningCase}) =
+        new(handled_all_cases, fully_covered, atype, cases, Int[])
 end
 
 struct InliningEdgeTracker
     edges::Vector{Any}
-    invokesig::Union{Nothing,Vector{Any}}
-    InliningEdgeTracker(state::InliningState, invokesig::Union{Nothing,Vector{Any}}=nothing) =
-        new(state.edges, invokesig)
+    InliningEdgeTracker(state::InliningState) = new(state.edges)
 end
 
-function add_inlining_backedge!((; edges, invokesig)::InliningEdgeTracker, mi::MethodInstance)
-    if invokesig === nothing
-        push!(edges, mi)
-    else # invoke backedge
-        push!(edges, invoke_signature(invokesig), mi)
-    end
-    return nothing
-end
+add_inlining_edge!(et::InliningEdgeTracker, edge::CodeInstance) = add_inlining_edge!(et.edges, edge)
+add_inlining_edge!(et::InliningEdgeTracker, edge::MethodInstance) = add_inlining_edge!(et.edges, edge)
 
 function ssa_inlining_pass!(ir::IRCode, state::InliningState, propagate_inbounds::Bool)
     # Go through the function, performing simple inlining (e.g. replacing call by constants
     # and analyzing legality of inlining).
-    @timeit "analysis" todo = assemble_inline_todo!(ir, state)
+    @zone "CC: ANALYSIS" todo = assemble_inline_todo!(ir, state)
     isempty(todo) && return ir
     # Do the actual inlining for every call we identified
-    @timeit "execution" ir = batch_inline!(ir, todo, propagate_inbounds, OptimizationParams(state.interp))
+    @zone "CC: EXECUTION" ir = batch_inline!(ir, todo, propagate_inbounds, state.interp)
     return ir
 end
 
@@ -128,14 +126,15 @@ function cfg_inline_item!(ir::IRCode, idx::Int, todo::InliningTodo, state::CFGIn
     block = block_for_inst(ir, idx)
     inline_into_block!(state, block)
 
-    if !isempty(inlinee_cfg.blocks[1].preds)
+    if length(inlinee_cfg.blocks[1].preds) > 1
         need_split_before = true
+    else
+        @assert inlinee_cfg.blocks[1].preds[1] == 0
     end
-
     last_block_idx = last(state.cfg.blocks[block].stmts)
     if false # TODO: ((idx+1) == last_block_idx && isa(ir[SSAValue(last_block_idx)], GotoNode))
         need_split = false
-        post_bb_id = -ir[SSAValue(last_block_idx)][:inst].label
+        post_bb_id = -ir[SSAValue(last_block_idx)][:stmt].label
     else
         post_bb_id = length(state.new_cfg_blocks) + length(inlinee_cfg.blocks) + (need_split_before ? 1 : 0)
         need_split = true #!(idx == last_block_idx)
@@ -168,12 +167,18 @@ function cfg_inline_item!(ir::IRCode, idx::Int, todo::InliningTodo, state::CFGIn
     end
     new_block_range = (length(state.new_cfg_blocks)-length(inlinee_cfg.blocks)+1):length(state.new_cfg_blocks)
 
-    # Fixup the edges of the newely added blocks
+    # Fixup the edges of the newly added blocks
     for (old_block, new_block) in enumerate(bb_rename_range)
         if old_block != 1 || need_split_before
             p = state.new_cfg_blocks[new_block].preds
             let bb_rename_range = bb_rename_range
                 map!(p, p) do old_pred_block
+                    # the meaning of predecessor 0 depends on the block we encounter it:
+                    #   - in the first block, it represents the function entry and so needs to be re-mapped
+                    if old_block == 1 && old_pred_block == 0
+                        return first(bb_rename_range) - 1
+                    end
+                    #   - elsewhere, it represents external control-flow from a caught exception which is un-affected by inlining
                     return old_pred_block == 0 ? 0 : bb_rename_range[old_pred_block]
                 end
             end
@@ -188,15 +193,11 @@ function cfg_inline_item!(ir::IRCode, idx::Int, todo::InliningTodo, state::CFGIn
         end
     end
 
-    if need_split_before
-        push!(state.new_cfg_blocks[first(bb_rename_range)].preds, first(bb_rename_range)-1)
-    end
-
     any_edges = false
     for (old_block, new_block) in enumerate(bb_rename_range)
         if (length(state.new_cfg_blocks[new_block].succs) == 0)
             terminator_idx = last(inlinee_cfg.blocks[old_block].stmts)
-            terminator = todo.ir[SSAValue(terminator_idx)][:inst]
+            terminator = todo.ir[SSAValue(terminator_idx)][:stmt]
             if isa(terminator, ReturnNode) && isdefined(terminator, :val)
                 any_edges = true
                 push!(state.new_cfg_blocks[new_block].succs, post_bb_id)
@@ -211,10 +212,9 @@ function cfg_inline_item!(ir::IRCode, idx::Int, todo::InliningTodo, state::CFGIn
     return nothing
 end
 
-function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
-                                (; fully_covered, #=atype,=# cases, bbs)::UnionSplit,
-                                state::CFGInliningState,
-                                params::OptimizationParams)
+function cfg_inline_unionsplit!(ir::IRCode, idx::Int, union_split::UnionSplit,
+                                state::CFGInliningState, params::OptimizationParams)
+    (; handled_all_cases, fully_covered, #=atype,=# cases, bbs) = union_split
     inline_into_block!(state, block_for_inst(ir, idx))
     from_bbs = Int[]
     delete!(state.split_targets, length(state.new_cfg_blocks))
@@ -234,9 +234,7 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
             end
         end
         push!(from_bbs, length(state.new_cfg_blocks))
-        # TODO: Right now we unconditionally generate a fallback block
-        # in case of subtyping errors - This is probably unnecessary.
-        if i != length(cases) || (!fully_covered)
+        if !(i == length(cases) && (handled_all_cases && fully_covered))
             # This block will have the next condition or the final else case
             push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx)))
             push!(state.new_cfg_blocks[cond_bb].succs, length(state.new_cfg_blocks))
@@ -245,7 +243,10 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
         end
     end
     # The edge from the fallback block.
-    fully_covered || push!(from_bbs, length(state.new_cfg_blocks))
+    # NOTE This edge is only required for `!handled_all_cases` and not `!fully_covered`,
+    #      since in the latter case we inline `Core.throw_methoderror` into the fallback
+    #      block, which is must-throw, making the subsequent code path unreachable.
+    !handled_all_cases && push!(from_bbs, length(state.new_cfg_blocks))
     # This block will be the block everyone returns to
     push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx), from_bbs, orig_succs))
     join_bb = length(state.new_cfg_blocks)
@@ -301,79 +302,39 @@ function finish_cfg_inline!(state::CFGInliningState)
     end
 end
 
-# duplicated from IRShow
-function normalize_method_name(m)
-    if m isa Method
-        return m.name
-    elseif m isa MethodInstance
-        return (m.def::Method).name
-    elseif m isa Symbol
-        return m
-    else
-        return Symbol("")
-    end
-end
-@noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
-
-inline_node_is_duplicate(topline::LineInfoNode, line::LineInfoNode) =
-    topline.module === line.module &&
-    method_name(topline) === method_name(line) &&
-    topline.file === line.file &&
-    topline.line === line.line
-
-function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode,
-                              inlinee::MethodInstance,
-                              inlined_at::Int32)
-    inlinee_def = inlinee.def::Method
-    coverage = coverage_enabled(inlinee_def.module)
-    linetable_offset::Int32 = length(linetable)
-    # Append the linetable of the inlined function to our line table
-    topline::Int32 = linetable_offset + Int32(1)
-    coverage_by_path = JLOptions().code_coverage == 3
-    push!(linetable, LineInfoNode(inlinee_def.module, inlinee, inlinee_def.file, inlinee_def.line, inlined_at))
-    oldlinetable = inlinee_ir.linetable
-    extra_coverage_line = zero(Int32)
-    for oldline in eachindex(oldlinetable)
-        entry = oldlinetable[oldline]
-        if !coverage && coverage_by_path && is_file_tracked(entry.file)
-            # include topline coverage entry if in path-specific coverage mode, and any file falls under path
-            coverage = true
+# TODO append `inlinee_debuginfo` to inner linetable when `inlined_at[2] ≠ 0`
+function ir_inline_linetable!(debuginfo::DebugInfoStream, inlinee_debuginfo::DebugInfo, inlined_at::NTuple{3,Int32})
+    # Append the linetable of the inlined function to our edges table
+    linetable_offset = 1
+    while true
+        if linetable_offset > length(debuginfo.edges)
+            push!(debuginfo.edges, inlinee_debuginfo)
+            break
+        elseif debuginfo.edges[linetable_offset] === inlinee_debuginfo
+            break
         end
-        newentry = LineInfoNode(entry.module, entry.method, entry.file, entry.line,
-            (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset + (oldline == 1) : inlined_at))
-        if oldline == 1
-            # check for a duplicate on the first iteration (likely true)
-            if inline_node_is_duplicate(linetable[topline], newentry)
-                continue
-            else
-                linetable_offset += 1
-            end
-        end
-        push!(linetable, newentry)
-    end
-    if coverage && inlinee_ir.stmts[1][:line] + linetable_offset != topline
-        extra_coverage_line = topline
+        linetable_offset += 1
     end
-    return linetable_offset, extra_coverage_line
+    return (inlined_at[1], Int32(linetable_offset), Int32(0))
 end
 
 function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
-        linetable::Vector{LineInfoNode}, ir′::IRCode, sparam_vals::SimpleVector,
-        mi::MethodInstance, inlined_at::Int32, argexprs::Vector{Any})
+                              ir::IRCode, spec_info::SpecInfo, di::DebugInfo, mi::MethodInstance,
+                              inlined_at::NTuple{3,Int32}, argexprs::Vector{Any})
     def = mi.def::Method
-    topline::Int32 = length(linetable) + Int32(1)
-    linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, ir′, mi, inlined_at)
-    if extra_coverage_line != 0
-        insert_node!(NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line))
+    debuginfo = inline_target isa IRCode ? inline_target.debuginfo : inline_target.ir.debuginfo
+    topline = new_inlined_at = ir_inline_linetable!(debuginfo, di, inlined_at)
+    if should_insert_coverage(def.module, di)
+        insert_node!(NewInstruction(Expr(:code_coverage_effect), Nothing, topline))
     end
-    sp_ssa = nothing
-    if !validate_sparams(sparam_vals)
+    spvals_ssa = nothing
+    if !validate_sparams(mi.sparam_vals)
         # N.B. This works on the caller-side argexprs, (i.e. before the va fixup below)
-        sp_ssa = insert_node!(
-            effect_free_and_nothrow(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline)))
+        spvals_ssa = insert_node!(
+            removable_if_unused(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline)))
     end
-    if def.isva
-        nargs_def = Int(def.nargs::Int32)
+    if spec_info.isva
+        nargs_def = spec_info.nargs
         if nargs_def > 0
             argexprs = fix_va_argexprs!(insert_node!, inline_target, argexprs, nargs_def, topline)
         end
@@ -382,43 +343,46 @@ function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCod
         # Replace the first argument by a load of the capture environment
         argexprs[1] = insert_node!(
             NewInstruction(Expr(:call, GlobalRef(Core, :getfield), argexprs[1], QuoteNode(:captures)),
-            ir′.argtypes[1], topline))
+            ir.argtypes[1], topline))
+    end
+    return SSASubstitute(mi, argexprs, spvals_ssa, new_inlined_at)
+end
+
+function adjust_boundscheck!(inline_compact::IncrementalCompact, idx′::Int, stmt::Expr, boundscheck::Symbol)
+    if boundscheck === :off
+        isempty(stmt.args) && push!(stmt.args, false)
+    elseif boundscheck !== :propagate
+        isempty(stmt.args) && push!(stmt.args, true)
     end
-    return (Pair{Union{Nothing, SSAValue}, Vector{Any}}(sp_ssa, argexprs), linetable_offset)
+    return nothing
 end
 
 function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
-                         linetable::Vector{LineInfoNode}, item::InliningTodo,
-                         boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
+                         item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
     # Ok, do the inlining here
-    sparam_vals = item.mi.sparam_vals
     inlined_at = compact.result[idx][:line]
+    ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.spec_info, item.di, item.mi, inlined_at, argexprs)
+    boundscheck = has_flag(compact.result[idx], IR_FLAG_INBOUNDS) ? :off : boundscheck
 
-    ((sp_ssa, argexprs), linetable_offset) = ir_prepare_inlining!(InsertHere(compact),
-        compact, linetable, item.ir, sparam_vals, item.mi, inlined_at, argexprs)
-
-    if boundscheck === :default || boundscheck === :propagate
-        if (compact.result[idx][:flag] & IR_FLAG_INBOUNDS) != 0
-            boundscheck = :off
-        end
-    end
     # If the iterator already moved on to the next basic block,
-    # temporarily re-open in again.
+    # temporarily re-open it again.
     local return_value
-    def = item.mi.def::Method
-    sig = def.sig
     # Special case inlining that maintains the current basic block if there's only one BB in the target
     new_new_offset = length(compact.new_new_nodes)
     late_fixup_offset = length(compact.late_fixup)
     if item.linear_inline_eligible
         #compact[idx] = nothing
         inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
-        for ((_, idx′), stmt′) in inline_compact
+        @assert isempty(inline_compact.perm) && isempty(inline_compact.pending_perm) "linetable not in canonical form (missing compact call)"
+        for ((lineidx, idx′), stmt′) in inline_compact
             # This dance is done to maintain accurate usage counts in the
             # face of rename_arguments! mutating in place - should figure out
             # something better eventually.
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(InsertBefore(inline_compact, SSAValue(idx′)), inline_compact[SSAValue(idx′)], stmt′, argexprs, sig, sparam_vals, sp_ssa, linetable_offset, boundscheck)
+            # alter the line number information for InsertBefore to point to the current instruction in the new linetable
+            inline_compact[SSAValue(idx′)][:line] = (ssa_substitute.inlined_at[1], ssa_substitute.inlined_at[2], Int32(lineidx))
+            insert_node! = InsertBefore(inline_compact, SSAValue(idx′))
+            stmt′ = ssa_substitute_op!(insert_node!, inline_compact[SSAValue(idx′)], stmt′, ssa_substitute)
             if isa(stmt′, ReturnNode)
                 val = stmt′.val
                 return_value = SSAValue(idx′)
@@ -426,8 +390,10 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                 inline_compact.result[idx′][:type] =
                     argextype(val, isa(val, Argument) || isa(val, Expr) ? compact : inline_compact)
                 # Everything legal in value position is guaranteed to be effect free in stmt position
-                inline_compact.result[idx′][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+                inline_compact.result[idx′][:flag] = IR_FLAGS_REMOVABLE
                 break
+            elseif isexpr(stmt′, :boundscheck)
+                adjust_boundscheck!(inline_compact, idx′, stmt′, boundscheck)
             end
             inline_compact[idx′] = stmt′
         end
@@ -436,16 +402,19 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
     else
         bb_offset, post_bb_id = popfirst!(todo_bbs)
         # This implements the need_split_before flag above
-        need_split_before = !isempty(item.ir.cfg.blocks[1].preds)
+        need_split_before = length(item.ir.cfg.blocks[1].preds) > 1
         if need_split_before
             finish_current_bb!(compact, 0)
         end
         pn = PhiNode()
         #compact[idx] = nothing
         inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
-        for ((_, idx′), stmt′) in inline_compact
+        @assert isempty(inline_compact.perm) && isempty(inline_compact.pending_perm) "linetable not in canonical form (missing compact call)"
+        for ((lineidx, idx′), stmt′) in inline_compact
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(InsertBefore(inline_compact, SSAValue(idx′)), inline_compact[SSAValue(idx′)], stmt′, argexprs, sig, sparam_vals, sp_ssa, linetable_offset, boundscheck)
+            inline_compact[SSAValue(idx′)][:line] = (ssa_substitute.inlined_at[1], ssa_substitute.inlined_at[2], Int32(lineidx))
+            insert_node! = InsertBefore(inline_compact, SSAValue(idx′))
+            stmt′ = ssa_substitute_op!(insert_node!, inline_compact[SSAValue(idx′)], stmt′, ssa_substitute)
             if isa(stmt′, ReturnNode)
                 if isdefined(stmt′, :val)
                     val = stmt′.val
@@ -456,12 +425,14 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                 end
             elseif isa(stmt′, GotoNode)
                 stmt′ = GotoNode(stmt′.label + bb_offset)
-            elseif isa(stmt′, Expr) && stmt′.head === :enter
-                stmt′ = Expr(:enter, stmt′.args[1]::Int + bb_offset)
+            elseif isa(stmt′, EnterNode)
+                stmt′ = EnterNode(stmt′, stmt′.catch_dest == 0 ? 0 : stmt′.catch_dest + bb_offset)
             elseif isa(stmt′, GotoIfNot)
                 stmt′ = GotoIfNot(stmt′.cond, stmt′.dest + bb_offset)
             elseif isa(stmt′, PhiNode)
                 stmt′ = PhiNode(Int32[edge+bb_offset for edge in stmt′.edges], stmt′.values)
+            elseif isexpr(stmt′, :boundscheck)
+                adjust_boundscheck!(inline_compact, idx′, stmt′, boundscheck)
             end
             inline_compact[idx′] = stmt′
         end
@@ -479,7 +450,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
 end
 
 function fix_va_argexprs!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
-    argexprs::Vector{Any}, nargs_def::Int, line_idx::Int32)
+    argexprs::Vector{Any}, nargs_def::Int, line_idx::NTuple{3,Int32})
     newargexprs = argexprs[1:(nargs_def-1)]
     tuple_call = Expr(:call, TOP_TUPLE)
     tuple_typs = Any[]
@@ -497,50 +468,45 @@ end
 """
     ir_inline_unionsplit!
 
-The core idea of this function is to simulate the dispatch semantics by generating
-(flat) `isa`-checks corresponding to the signatures of union-split dispatch candidates,
-and then inline their bodies into each `isa`-conditional block.
-This `isa`-based virtual dispatch requires few pre-conditions to hold in order to simulate
-the actual semantics correctly.
+The primary purpose of this function is to emulate the dispatch behavior by generating flat
+`isa`-checks that correspond to the signatures of union-split dispatch candidates.
+These checks allow us to inline the method bodies into respective `isa`-conditional blocks.
+
+Note that two pre-conditions are required for this emulation to work correctly:
 
-The first one is that these dispatch candidates need to be processed in order of their specificity,
-and the corresponding `isa`-checks should reflect the method specificities, since now their
-signatures are not necessarily concrete.
-For example, given the following definitions:
+1. Ordered Dispatch Candidates
+
+The dispatch candidates must be processed in order of their specificity.
+The generated `isa`-checks should reflect this order,
+especially since the method signatures may not be concrete.
+For instance, with the methods:
 
     f(x::Int)    = ...
     f(x::Number) = ...
     f(x::Any)    = ...
 
-and a callsite:
-
-    f(x::Any)
-
-then a correct `isa`-based virtual dispatch would be:
+A correct `isa`-based dispatch emulation for the call site `f(x::Any)` would look like:
 
     if isa(x, Int)
         [inlined/resolved f(x::Int)]
     elseif isa(x, Number)
         [inlined/resolved f(x::Number)]
-    else # implies `isa(x, Any)`, which fully covers this call signature,
-         # otherwise we need to insert a fallback dynamic dispatch case also
+    else
         [inlined/resolved f(x::Any)]
     end
 
-Fortunately, `ml_matches` should already sorted them in that way, except cases when there is
-any ambiguity, from which we already bail out at this point.
+`ml_matches` should already sort the matched method candidates correctly,
+except in ambiguous cases, which we've already excluded at this state.
 
-Another consideration is type equality constraint from type variables: the `isa`-checks are
-not enough to simulate the dispatch semantics in cases like:
-Given a definition:
+2. Type Equality Constraints
 
-    g(x::T, y::T) where T<:Integer = ...
-
-transform a callsite:
+Another factor is the type equality constraint imposed by type variables.
+Simple `isa`-checks are insufficient to capture the semantics in some cases.
+For example, given the following method definition:
 
-    g(x::Any, y::Any)
+    g(x::T, y::T) where T<:Integer = ...
 
-into the optimized form:
+it is _invalid_ to optimize a cal site like `g(x::Any, y::Any)` into:
 
     if isa(x, Integer) && isa(y, Integer)
         [inlined/resolved g(x::Integer, y::Integer)]
@@ -548,18 +514,20 @@ into the optimized form:
         g(x, y) # fallback dynamic dispatch
     end
 
-But again, we should already bail out from such cases at this point, essentially by
-excluding cases where `case.sig::UnionAll`.
+since we also need to check that `x` and `y` are equal types.
 
-In short, here we can process the dispatch candidates in order, assuming we haven't changed
-their order somehow somewhere up to this point.
+But, we've already excluded such cases at this point,
+mainly by filtering out `case.sig::UnionAll`,
+so there is no need to worry about type equality at this point.
+
+In essence, we can process the dispatch candidates sequentially,
+assuming their order stays the same post-discovery in `ml_matches`.
 """
-function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
-                               argexprs::Vector{Any}, linetable::Vector{LineInfoNode},
-                               (; fully_covered, atype, cases, bbs)::UnionSplit,
-                               boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}},
-                               params::OptimizationParams)
-    stmt, typ, line = compact.result[idx][:inst], compact.result[idx][:type], compact.result[idx][:line]
+function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
+                               union_split::UnionSplit, boundscheck::Symbol,
+                               todo_bbs::Vector{Tuple{Int,Int}}, interp::AbstractInterpreter)
+    (; handled_all_cases, fully_covered, atype, cases, bbs) = union_split
+    stmt, typ, line = compact.result[idx][:stmt], compact.result[idx][:type], compact.result[idx][:line]
     join_bb = bbs[end]
     pn = PhiNode()
     local bb = compact.active_result_bb
@@ -573,22 +541,24 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
         cond = true
         nparams = fieldcount(atype)
         @assert nparams == fieldcount(mtype)
-        if i != ncases || !fully_covered
+        if !(i == ncases && fully_covered && handled_all_cases)
             for i = 1:nparams
-                a, m = fieldtype(atype, i), fieldtype(mtype, i)
+                aft, mft = fieldtype(atype, i), fieldtype(mtype, i)
                 # If this is always true, we don't need to check for it
-                a <: m && continue
+                aft <: mft && continue
                 # Generate isa check
-                isa_expr = Expr(:call, isa, argexprs[i], m)
-                ssa = insert_node_here!(compact, NewInstruction(isa_expr, Bool, line))
+                isa_expr = Expr(:call, isa, argexprs[i], mft)
+                isa_type = isa_tfunc(optimizer_lattice(interp), argextype(argexprs[i], compact), Const(mft))
+                ssa = insert_node_here!(compact, NewInstruction(isa_expr, isa_type, line))
                 if cond === true
                     cond = ssa
                 else
                     and_expr = Expr(:call, and_int, cond, ssa)
-                    cond = insert_node_here!(compact, NewInstruction(and_expr, Bool, line))
+                    and_type = and_int_tfunc(optimizer_lattice(interp), argextype(cond, compact), isa_type)
+                    cond = insert_node_here!(compact, NewInstruction(and_expr, and_type, line))
                 end
             end
-            insert_node_here!(compact, NewInstruction(GotoIfNot(cond, next_cond_bb), Union{}, line))
+            insert_node_here!(compact, NewInstruction(GotoIfNot(cond, next_cond_bb), Any, line))
         end
         bb = next_cond_bb - 1
         finish_current_bb!(compact, 0)
@@ -598,19 +568,21 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
             for i = 1:nparams
                 argex = argexprs[i]
                 (isa(argex, SSAValue) || isa(argex, Argument)) || continue
-                a, m = fieldtype(atype, i), fieldtype(mtype, i)
-                if !(a <: m)
+                aft, mft = fieldtype(atype, i), fieldtype(mtype, i)
+                if !(aft <: mft)
+                    𝕃ₒ = optimizer_lattice(interp)
+                    narrowed_type = tmeet(𝕃ₒ, argextype(argex, compact), mft)
                     argexprs′[i] = insert_node_here!(compact,
-                        NewInstruction(PiNode(argex, m), m, line))
+                        NewInstruction(PiNode(argex, mft), narrowed_type, line))
                 end
             end
         end
         if isa(case, InliningTodo)
-            val = ir_inline_item!(compact, idx, argexprs′, linetable, case, boundscheck, todo_bbs)
+            val = ir_inline_item!(compact, idx, argexprs′, case, boundscheck, todo_bbs)
         elseif isa(case, InvokeCase)
-            inst = Expr(:invoke, case.invoke, argexprs′...)
+            invoke_stmt = Expr(:invoke, case.invoke, argexprs′...)
             flag = flags_for_effects(case.effects)
-            val = insert_node_here!(compact, NewInstruction(inst, typ, case.info, line, flag))
+            val = insert_node_here!(compact, NewInstruction(invoke_stmt, typ, case.info, line, flag))
         else
             case = case::ConstantCase
             val = case.val
@@ -619,7 +591,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
             push!(pn.edges, bb)
             push!(pn.values, val)
             insert_node_here!(compact,
-                NewInstruction(GotoNode(join_bb), Union{}, line))
+                NewInstruction(GotoNode(join_bb), Any, line))
         else
             insert_node_here!(compact,
                 NewInstruction(ReturnNode(), Union{}, line))
@@ -628,19 +600,24 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
     end
     bb += 1
     # We're now in the fall through block, decide what to do
-    if !fully_covered
+    if !handled_all_cases
         ssa = insert_node_here!(compact, NewInstruction(stmt, typ, line))
         push!(pn.edges, bb)
         push!(pn.values, ssa)
-        insert_node_here!(compact, NewInstruction(GotoNode(join_bb), Union{}, line))
+        insert_node_here!(compact, NewInstruction(GotoNode(join_bb), Any, line))
+        finish_current_bb!(compact, 0)
+    elseif !fully_covered
+        insert_node_here!(compact, NewInstruction(Expr(:call, GlobalRef(Core, :throw_methoderror), argexprs...), Union{}, line))
+        insert_node_here!(compact, NewInstruction(ReturnNode(), Union{}, line))
         finish_current_bb!(compact, 0)
+        ncases == 0 && return insert_node_here!(compact, NewInstruction(nothing, Any, line))
     end
-
     # We're now in the join block.
     return insert_node_here!(compact, NewInstruction(pn, typ, line))
 end
 
-function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inbounds::Bool, params::OptimizationParams)
+function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inbounds::Bool, interp::AbstractInterpreter)
+    params = OptimizationParams(interp)
     # Compute the new CFG first (modulo statement ranges, which will be computed below)
     state = CFGInliningState(ir)
     for (idx, item) in todo
@@ -655,10 +632,7 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun
     end
     finish_cfg_inline!(state)
 
-    boundscheck = inbounds_option()
-    if boundscheck === :default && propagate_inbounds
-        boundscheck = :propagate
-    end
+    boundscheck = propagate_inbounds ? :propagate : :default
 
     let compact = IncrementalCompact(ir, CFGTransformState!(state.new_cfg_blocks, false))
         # This needs to be a minimum and is more of a size hint
@@ -685,22 +659,10 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun
                     compact.active_result_bb -= 1
                     refinish = true
                 end
-                # It is possible for GlobalRefs and Exprs to be in argument position
-                # at this point in the IR, though in that case they are required
-                # to be effect-free. However, we must still move them out of argument
-                # position, since `Argument` is allowed in PhiNodes, but `GlobalRef`
-                # and `Expr` are not, so a substitution could anger the verifier.
-                for aidx in 1:length(argexprs)
-                    aexpr = argexprs[aidx]
-                    if isa(aexpr, Expr) || isa(aexpr, GlobalRef)
-                        ninst = effect_free_and_nothrow(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line]))
-                        argexprs[aidx] = insert_node_here!(compact, ninst)
-                    end
-                end
                 if isa(item, InliningTodo)
-                    compact.ssa_rename[old_idx] = ir_inline_item!(compact, idx, argexprs, ir.linetable, item, boundscheck, state.todo_bbs)
+                    compact.ssa_rename[old_idx] = ir_inline_item!(compact, idx, argexprs, item, boundscheck, state.todo_bbs)
                 elseif isa(item, UnionSplit)
-                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, ir.linetable, item, boundscheck, state.todo_bbs, params)
+                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, item, boundscheck, state.todo_bbs, interp)
                 end
                 compact[idx] = nothing
                 refinish && finish_current_bb!(compact, 0)
@@ -711,8 +673,8 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun
                 end
             elseif isa(stmt, GotoNode)
                 compact[idx] = GotoNode(state.bb_rename[stmt.label])
-            elseif isa(stmt, Expr) && stmt.head === :enter
-                compact[idx] = Expr(:enter, state.bb_rename[stmt.args[1]::Int])
+            elseif isa(stmt, EnterNode)
+                compact[idx] = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : state.bb_rename[stmt.catch_dest])
             elseif isa(stmt, GotoIfNot)
                 compact[idx] = GotoIfNot(stmt.cond, state.bb_rename[stmt.dest])
             elseif isa(stmt, PhiNode)
@@ -754,7 +716,7 @@ function rewrite_apply_exprargs!(todo::Vector{Pair{Int,Any}},
                         ti = ti.parameters[2]::DataType # checked by `is_valid_type_for_apply_rewrite`
                     end
                     for p in ti.parameters
-                        if isa(p, DataType) && isdefined(p, :instance)
+                        if issingletontype(p)
                             # replace singleton types with their equivalent Const object
                             p = Const(p.instance)
                         elseif isconstType(p)
@@ -805,19 +767,20 @@ function rewrite_apply_exprargs!(todo::Vector{Pair{Int,Any}},
     return new_argtypes
 end
 
-function compileable_specialization(mi::MethodInstance, effects::Effects,
-        et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
+function compileable_specialization(code::Union{MethodInstance,CodeInstance}, effects::Effects,
+    et::InliningEdgeTracker, @nospecialize(info::CallInfo), state::InliningState)
+    mi = code isa CodeInstance ? code.def : code
     mi_invoke = mi
-    if compilesig_invokes
-        method, atype, sparams = mi.def::Method, mi.specTypes, mi.sparam_vals
+    method, atype, sparams = mi.def::Method, mi.specTypes, mi.sparam_vals
+    if OptimizationParams(state.interp).compilesig_invokes
         new_atype = get_compileable_sig(method, atype, sparams)
         new_atype === nothing && return nothing
         if atype !== new_atype
             sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), new_atype, method.sig)::SimpleVector
-            if sparams === sp_[2]::SimpleVector
-                mi_invoke = specialize_method(method, new_atype, sparams)
-                mi_invoke === nothing && return nothing
-            end
+            sparams = sp_[2]::SimpleVector
+            mi_invoke = specialize_method(method, new_atype, sparams)
+            mi_invoke === nothing && return nothing
+            code = mi_invoke
         end
     else
         # If this caller does not want us to optimize calls to use their
@@ -827,99 +790,66 @@ function compileable_specialization(mi::MethodInstance, effects::Effects,
             return nothing
         end
     end
-    add_inlining_backedge!(et, mi)
-    return InvokeCase(mi_invoke, effects, info)
-end
-
-function compileable_specialization(match::MethodMatch, effects::Effects,
-        et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
-    mi = specialize_method(match)
-    return compileable_specialization(mi, effects, et, info; compilesig_invokes)
+    # prefer using a CodeInstance gotten from the cache, since that is where the invoke target should get compiled to normally
+    # TODO: can this code be gotten directly from inference sometimes?
+    code = get(code_cache(state), mi_invoke, nothing)
+    code isa InferenceResult && (code = code.ci)
+    if !isa(code, CodeInstance)
+        #println("missing code for ", mi_invoke, " for ", mi)
+        code = mi_invoke
+    end
+    add_inlining_edge!(et, code) # to the code and edges
+    return InvokeCase(code, effects, info)
 end
 
-struct CachedResult
-    src::Any
+struct InferredCode
+    src::Any # CodeInfo or IRCode
     effects::Effects
-    CachedResult(@nospecialize(src), effects::Effects) = new(src, effects)
-end
-@inline function get_cached_result(state::InliningState, mi::MethodInstance)
-    code = get(code_cache(state), mi, nothing)
-    if code isa CodeInstance
-        if use_const_api(code)
-            # in this case function can be inlined to a constant
-            return ConstantCase(quoted(code.rettype_const))
-        else
-            src = @atomic :monotonic code.inferred
+    edge::CodeInstance
+    InferredCode(@nospecialize(src), effects::Effects, edge::CodeInstance) = new(src, effects, edge)
+end
+@inline function get_local_code(inf_result::InferenceResult)
+    @assert isdefined(inf_result, :ci_as_edge) "InferenceResult without ci_as_edge"
+    effects = inf_result.ipo_effects
+    if is_foldable_nothrow(effects)
+        res = inf_result.result
+        if isa(res, Const) && is_inlineable_constant(res.val)
+            # use constant calling convention
+            return ConstantCase(quoted(res.val), inf_result.ci_as_edge)
         end
-        effects = decode_effects(code.ipo_purity_bits)
-        return CachedResult(src, effects)
     end
-    return CachedResult(nothing, Effects())
+    return InferredCode(inf_result.src, effects, inf_result.ci_as_edge)
 end
 
 # the general resolver for usual and const-prop'ed calls
-function resolve_todo(mi::MethodInstance, result::Union{MethodMatch,InferenceResult},
-        argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt8,
-        state::InliningState; invokesig::Union{Nothing,Vector{Any}}=nothing)
-    et = InliningEdgeTracker(state, invokesig)
-
-    if isa(result, InferenceResult)
-        src = result.src
-        effects = result.ipo_effects
-        if is_foldable_nothrow(effects)
-            res = result.result
-            if isa(res, Const) && is_inlineable_constant(res.val)
-                # use constant calling convention
-                add_inlining_backedge!(et, mi)
-                return ConstantCase(quoted(res.val))
-            end
-        end
-    else
-        cached_result = get_cached_result(state, mi)
-        if cached_result isa ConstantCase
-            add_inlining_backedge!(et, mi)
-            return cached_result
-        end
-        (; src, effects) = cached_result
-    end
+function resolve_todo(mi::MethodInstance, call_result::Union{Nothing,InferenceResult},
+    @nospecialize(info::CallInfo), flag::UInt32, state::InliningState)
+    et = InliningEdgeTracker(state)
 
-    # the duplicated check might have been done already within `analyze_method!`, but still
-    # we need it here too since we may come here directly using a constant-prop' result
-    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
-        return compileable_specialization(mi, effects, et, info;
-            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+    if call_result === nothing
+        # there is no cached source available for this, but there might be code for the compilation sig
+        return compileable_specialization(mi, Effects(), et, info, state)
     end
 
-    src = inlining_policy(state.interp, src, info, flag, mi, argtypes)
-    src === nothing && return compileable_specialization(mi, effects, et, info;
-        compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
-
-    add_inlining_backedge!(et, mi)
-    return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
-end
-
-# the special resolver for :invoke-d call
-function resolve_todo(mi::MethodInstance, argtypes::Vector{Any},
-    @nospecialize(info::CallInfo), flag::UInt8, state::InliningState)
-    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
-        return nothing
+    inferred_result = get_local_code(call_result)
+    if inferred_result isa ConstantCase
+        add_inlining_edge!(et, inferred_result.edge)
+        return inferred_result
     end
+    (; src, effects, edge) = inferred_result
 
-    et = InliningEdgeTracker(state)
-
-    cached_result = get_cached_result(state, mi)
-    if cached_result isa ConstantCase
-        add_inlining_backedge!(et, mi)
-        return cached_result
+    # the duplicated check might have been done already within `analyze_method!`, but still
+    # we need it here too since we may come here directly using a constant-prop' result
+    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
+        return compileable_specialization(edge, effects, et, info, state)
     end
-    (; src, effects) = cached_result
-
-    src = inlining_policy(state.interp, src, info, flag, mi, argtypes)
 
-    src === nothing && return nothing
+    src_inlining_policy(state.interp, mi, src, info, flag) ||
+        return compileable_specialization(edge, effects, et, info, state)
 
-    add_inlining_backedge!(et, mi)
-    return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
+    add_inlining_edge!(et, edge)
+    ir, spec_info, debuginfo = retrieve_ir_for_inlining(mi, src, true)
+    return InliningTodo(mi, ir, spec_info, debuginfo, effects)
 end
 
 function validate_sparams(sparams::SimpleVector)
@@ -937,11 +867,12 @@ function may_have_fcalls(m::Method)
     return ccall(:jl_ir_flag_has_fcall, Bool, (Any,), src)
 end
 
-function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
-    @nospecialize(info::CallInfo), flag::UInt8, state::InliningState;
-    allow_typevars::Bool, invokesig::Union{Nothing,Vector{Any}}=nothing)
+function analyze_method!(
+        call_result::Union{Nothing,InferenceResult}, match::MethodMatch, argtypes::Vector{Any},
+        @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+        allow_typevars::Bool
+    )
     method = match.method
-    spec_types = match.spec_types
 
     # Check that we have the correct number of arguments
     na = Int(method.nargs)
@@ -956,7 +887,9 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
     if !match.fully_covers
         # type-intersection was not able to give us a simple list of types, so
         # ir_inline_unionsplit won't be able to deal with inlining this
-        if !(spec_types isa DataType && length(spec_types.parameters) == length(argtypes) && !isvarargtype(spec_types.parameters[end]))
+        spec_types = match.spec_types
+        if !(spec_types isa DataType && length(spec_types.parameters) == npassedargs &&
+             !isvarargtype(spec_types.parameters[end]))
             return nothing
         end
     end
@@ -968,51 +901,67 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
     # Get the specialization for this method signature
     # (later we will decide what to do with it)
     mi = specialize_method(match)
-    return resolve_todo(mi, match, argtypes, info, flag, state; invokesig)
+    return resolve_todo(mi, call_result, info, flag, state)
 end
 
-function retrieve_ir_for_inlining(mi::MethodInstance, src::String)
-    src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src)::CodeInfo
-    return inflate_ir!(src, mi)
+function retrieve_ir_for_inlining(cached_result::CodeInstance, src::String)
+    src = _uncompressed_ir(cached_result, src)
+    return inflate_ir!(src, cached_result.def), SpecInfo(src), src.debuginfo
 end
-retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo) = inflate_ir(src, mi)
-retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode) = copy(ir)
-
-function flags_for_effects(effects::Effects)
-    flags::UInt8 = 0
-    if is_consistent(effects)
-        flags |= IR_FLAG_CONSISTENT
+function retrieve_ir_for_inlining(cached_result::CodeInstance, src::CodeInfo)
+    return inflate_ir!(copy(src), cached_result.def), SpecInfo(src), src.debuginfo
+end
+function retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo, preserve_local_sources::Bool)
+    if preserve_local_sources
+        src = copy(src)
     end
-    if is_effect_free(effects)
-        flags |= IR_FLAG_EFFECT_FREE
+    return inflate_ir!(src, mi), SpecInfo(src), src.debuginfo
+end
+function retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode, preserve_local_sources::Bool)
+    if preserve_local_sources
+        ir = copy(ir)
     end
-    if is_nothrow(effects)
-        flags |= IR_FLAG_NOTHROW
+    # COMBAK this is not correct, we should make `InferenceResult` propagate `SpecInfo`
+    spec_info = let m = mi.def::Method
+        SpecInfo(Int(m.nargs), m.isva, false, nothing)
     end
-    return flags
+    ir.debuginfo.def = mi
+    return ir, spec_info, DebugInfo(ir.debuginfo, length(ir.stmts))
+end
+function retrieve_ir_for_inlining(mi::MethodInstance, opt::OptimizationState, preserve_local_sources::Bool)
+    result = opt.optresult
+    if result !== nothing
+        !result.simplified && simplify_ir!(result)
+        return retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources)
+    end
+    retrieve_ir_for_inlining(mi, opt.src, preserve_local_sources)
 end
 
 function handle_single_case!(todo::Vector{Pair{Int,Any}},
     ir::IRCode, idx::Int, stmt::Expr, @nospecialize(case),
     isinvoke::Bool = false)
     if isa(case, ConstantCase)
-        ir[SSAValue(idx)][:inst] = case.val
+        ir[SSAValue(idx)][:stmt] = case.val
     elseif isa(case, InvokeCase)
         is_foldable_nothrow(case.effects) && inline_const_if_inlineable!(ir[SSAValue(idx)]) && return nothing
-        isinvoke && rewrite_invoke_exprargs!(stmt)
-        stmt.head = :invoke
-        pushfirst!(stmt.args, case.invoke)
-        ir[SSAValue(idx)][:flag] |= flags_for_effects(case.effects)
+        isinvoke && invoke_rewrite!(stmt)
+        if stmt.head === :invoke
+            stmt.args[1] = case.invoke
+        else
+            stmt.head = :invoke
+            pushfirst!(stmt.args, case.invoke)
+        end
+        add_flag!(ir[SSAValue(idx)], flags_for_effects(case.effects))
     elseif case === nothing
         # Do, well, nothing
     else
-        isinvoke && rewrite_invoke_exprargs!(stmt)
+        isinvoke && invoke_rewrite!(stmt)
         push!(todo, idx=>(case::InliningTodo))
     end
     return nothing
 end
 
-rewrite_invoke_exprargs!(expr::Expr) = (expr.args = invoke_rewrite(expr.args); expr)
+invoke_rewrite!(expr::Expr) = (expr.args = invoke_rewrite(expr.args); expr)
 
 function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::OptimizationParams)
     if isa(typ, Const) && (v = typ.val; isa(v, SimpleVector))
@@ -1112,7 +1061,7 @@ function inline_apply!(todo::Vector{Pair{Int,Any}},
             # e.g. rewrite `((t::Tuple)...,)` to `t`
             nonempty_idx = 0
             𝕃ₒ = optimizer_lattice(state.interp)
-            for i = (arg_start + 1):length(argtypes)
+            for i = (arg_start+1):length(argtypes)
                 ti = argtypes[i]
                 ⊑(𝕃ₒ, ti, Tuple{}) && continue
                 if ⊑(𝕃ₒ, ti, Tuple) && nonempty_idx == 0
@@ -1123,14 +1072,14 @@ function inline_apply!(todo::Vector{Pair{Int,Any}},
                 break
             end
             if nonempty_idx != 0
-                ir.stmts[idx][:inst] = stmt.args[nonempty_idx]
+                ir[SSAValue(idx)][:stmt] = stmt.args[nonempty_idx]
                 return nothing
             end
         end
         # Try to figure out the signature of the function being called
         # and if rewrite_apply_exprargs can deal with this form
         arginfos = MaybeAbstractIterationInfo[]
-        for i = (arg_start + 1):length(argtypes)
+        for i = (arg_start+1):length(argtypes)
             thisarginfo = nothing
             if !is_valid_type_for_apply_rewrite(argtypes[i], OptimizationParams(state.interp))
                 isa(info, ApplyCallInfo) || return nothing
@@ -1163,36 +1112,41 @@ function is_builtin(𝕃ₒ::AbstractLattice, s::Signature)
 end
 
 function handle_invoke_call!(todo::Vector{Pair{Int,Any}},
-    ir::IRCode, idx::Int, stmt::Expr, info::InvokeCallInfo, flag::UInt8,
+    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(info), flag::UInt32,
     sig::Signature, state::InliningState)
-    match = info.match
+    nspl = nsplit(info)
+    nspl == 0 && return nothing # e.g. InvokeCICallInfo
+    @assert nspl == 1
+    mresult = getsplit(info, 1)
+    match = mresult.matches[1]
     if !match.fully_covers
         # TODO: We could union split out the signature check and continue on
         return nothing
     end
-    result = info.result
-    invokesig = sig.argtypes
+    result = getresult(info, 1)
     if isa(result, ConcreteResult)
-        item = concrete_result_item(result, info, state; invokesig)
+        item = concrete_result_item(result, info, state)
+    elseif isa(result, SemiConcreteResult)
+        item = semiconcrete_result_item(result, info, flag, state)
     else
         argtypes = invoke_rewrite(sig.argtypes)
-        if isa(result, ConstPropResult)
-            mi = result.result.linfo
+        if isa(result, InferenceResult)
+            mi = result.linfo
             validate_sparams(mi.sparam_vals) || return nothing
             if Union{} !== argtypes_to_type(argtypes) <: mi.def.sig
-                item = resolve_todo(mi, result.result, argtypes, info, flag, state; invokesig)
+                item = resolve_todo(mi, result, info, flag, state)
                 handle_single_case!(todo, ir, idx, stmt, item, true)
                 return nothing
             end
         end
-        item = analyze_method!(match, argtypes, info, flag, state; allow_typevars=false, invokesig)
+        item = analyze_method!(result, match, argtypes, info, flag, state; allow_typevars=false)
     end
     handle_single_case!(todo, ir, idx, stmt, item, true)
     return nothing
 end
 
 function invoke_signature(argtypes::Vector{Any})
-    ft, argtyps = widenconst(argtypes[2]), instanceof_tfunc(widenconst(argtypes[3]))[1]
+    ft, argtyps = widenconst(argtypes[2]), instanceof_tfunc(widenconst(argtypes[3]), false)[1]
     return rewrap_unionall(Tuple{ft, unwrap_unionall(argtyps).parameters...}, argtyps)
 end
 
@@ -1219,44 +1173,38 @@ end
 # As a matter of convenience, this pass also computes effect-freenes.
 # For primitives, we do that right here. For proper calls, we will
 # discover this when we consult the caches.
-function check_effect_free!(ir::IRCode, idx::Int, @nospecialize(stmt), @nospecialize(rt), state::InliningState)
-    return check_effect_free!(ir, idx, stmt, rt, optimizer_lattice(state.interp))
-end
-function check_effect_free!(ir::IRCode, idx::Int, @nospecialize(stmt), @nospecialize(rt), 𝕃ₒ::AbstractLattice)
-    (consistent, effect_free_and_nothrow, nothrow) = stmt_effect_flags(𝕃ₒ, stmt, rt, ir)
-    if consistent
-        ir.stmts[idx][:flag] |= IR_FLAG_CONSISTENT
-    end
-    if effect_free_and_nothrow
-        ir.stmts[idx][:flag] |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-    elseif nothrow
-        ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
-    end
-    return effect_free_and_nothrow
+add_inst_flag!(inst::Instruction, ir::IRCode, state::InliningState) =
+    add_inst_flag!(inst, ir, optimizer_lattice(state.interp))
+function add_inst_flag!(inst::Instruction, ir::IRCode, 𝕃ₒ::AbstractLattice)
+    flags = recompute_effects_flags(𝕃ₒ, inst[:stmt], inst[:type], ir)
+    add_flag!(inst, flags)
+    return !iszero(flags & IR_FLAGS_REMOVABLE)
 end
 
 # Handles all analysis and inlining of intrinsics and builtins. In particular,
 # this method does not access the method table or otherwise process generic
 # functions.
-function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, state::InliningState)
-    stmt = ir.stmts[idx][:inst]
-    rt = ir.stmts[idx][:type]
+function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, flag::UInt32,
+                         state::InliningState)
+    inst = ir[SSAValue(idx)]
+    stmt = inst[:stmt]
     if !(stmt isa Expr)
-        check_effect_free!(ir, idx, stmt, rt, state)
+        add_inst_flag!(inst, ir, state)
         return nothing
     end
+    rt = inst[:type]
     head = stmt.head
     if head !== :call
         if head === :splatnew
             inline_splatnew!(ir, idx, stmt, rt, state)
         elseif head === :new_opaque_closure
-            narrow_opaque_closure!(ir, stmt, ir.stmts[idx][:info], state)
+            narrow_opaque_closure!(ir, stmt, inst[:info], state)
         elseif head === :invoke
             sig = call_sig(ir, stmt)
             sig === nothing && return nothing
             return stmt, sig
         end
-        check_effect_free!(ir, idx, stmt, rt, state)
+        add_inst_flag!(inst, ir, state)
         return nothing
     end
 
@@ -1268,324 +1216,272 @@ function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stat
     sig === nothing && return nothing
 
     # Check if we match any of the early inliners
-    earlyres = early_inline_special_case(ir, stmt, rt, sig, state)
+    earlyres = early_inline_special_case(ir, stmt, flag, rt, sig, state)
     if isa(earlyres, SomeCase)
-        ir.stmts[idx][:inst] = earlyres.val
+        inst[:stmt] = earlyres.val
         return nothing
     end
 
-    if check_effect_free!(ir, idx, stmt, rt, state)
+    if add_inst_flag!(inst, ir, state)
         if sig.f === typeassert || ⊑(optimizer_lattice(state.interp), sig.ft, typeof(typeassert))
             # typeassert is a no-op if effect free
-            ir.stmts[idx][:inst] = stmt.args[2]
+            inst[:stmt] = stmt.args[2]
             return nothing
         end
     end
 
-    if (sig.f !== Core.invoke && sig.f !== Core.finalizer && sig.f !== modifyfield!) &&
-        is_builtin(optimizer_lattice(state.interp), sig)
-        # No inlining for builtins (other invoke/apply/typeassert/finalizer)
-        return nothing
+    if is_builtin(optimizer_lattice(state.interp), sig)
+        let f = sig.f
+            if (f !== Core.invoke &&
+                f !== Core.finalizer &&
+                f !== modifyfield! &&
+                f !== Core.modifyglobal! &&
+                f !== Core.memoryrefmodify! &&
+                f !== atomic_pointermodify)
+                # No inlining defined for most builtins (just invoke/apply/typeassert/finalizer), so attempt an early exit for them
+                return nothing
+            end
+        end
     end
 
     # Special case inliners for regular functions
-    lateres = late_inline_special_case!(ir, idx, stmt, rt, sig, state)
+    lateres = late_inline_special_case!(ir, idx, stmt, flag, rt, sig, state)
     if isa(lateres, SomeCase)
-        ir[SSAValue(idx)][:inst] = lateres.val
-        check_effect_free!(ir, idx, lateres.val, rt, state)
+        inst[:stmt] = lateres.val
+        add_inst_flag!(inst, ir, state)
         return nothing
     end
 
     return stmt, sig
 end
 
-function handle_any_const_result!(cases::Vector{InliningCase},
-    @nospecialize(result), match::MethodMatch, argtypes::Vector{Any},
-    @nospecialize(info::CallInfo), flag::UInt8, state::InliningState;
-    allow_abstract::Bool, allow_typevars::Bool)
-    if isa(result, ConcreteResult)
-        return handle_concrete_result!(cases, result, info, state)
-    end
-    if isa(result, SemiConcreteResult)
-        result = inlining_policy(state.interp, result, info, flag, result.mi, argtypes)
-        if isa(result, SemiConcreteResult)
-            return handle_semi_concrete_result!(cases, result, info, flag, state; allow_abstract)
-        end
-    end
-    if isa(result, ConstPropResult)
-        return handle_const_prop_result!(cases, result, argtypes, info, flag, state; allow_abstract, allow_typevars)
+function handle_any_call_result!(
+        cases::Vector{InliningCase}, @nospecialize(call_result::Union{Nothing,InferredCallResult}),
+        match::MethodMatch, argtypes::Vector{Any}, @nospecialize(info::CallInfo),
+        flag::UInt32, state::InliningState;
+        allow_typevars::Bool
+    )
+    if isa(call_result, ConcreteResult)
+        return handle_concrete_result!(cases, call_result, match, info, state)
+    elseif isa(call_result, SemiConcreteResult)
+        return handle_semi_concrete_result!(cases, call_result, match, info, flag, state)
     else
-        @assert result === nothing
-        return handle_match!(cases, match, argtypes, info, flag, state; allow_abstract, allow_typevars)
+        return handle_call_result!(cases, call_result, match, argtypes, info, flag, state; allow_typevars)
     end
 end
 
-function info_effects(@nospecialize(result), match::MethodMatch, state::InliningState)
-    if isa(result, ConcreteResult)
-        return result.effects
-    elseif isa(result, SemiConcreteResult)
-        return result.effects
-    elseif isa(result, ConstPropResult)
-        return result.result.ipo_effects
-    else
-        mi = specialize_method(match; preexisting=true)
-        if isa(mi, MethodInstance)
-            code = get(code_cache(state), mi, nothing)
-            if code isa CodeInstance
-                return decode_effects(code.ipo_purity_bits)
-            end
-        end
+function info_effects(@nospecialize(call_result::Union{Nothing,InferredCallResult}), match::MethodMatch, state::InliningState)
+    if call_result === nothing
         return Effects()
-    end
+    elseif isa(call_result, InferenceResult)
+        return call_result.ipo_effects
+    elseif isa(call_result, ConcreteResult)
+        return call_result.effects
+    elseif isa(call_result, SemiConcreteResult)
+        return call_result.effects
+    else error("Unknown InferredCallResult type") end
 end
 
-function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt8, sig::Signature,
+function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig::Signature,
     state::InliningState)
     nunion = nsplit(info)
     nunion === nothing && return nothing
     cases = InliningCase[]
     argtypes = sig.argtypes
-    local handled_all_cases::Bool = true
+    local handled_all_cases = local fully_covered = true
     local revisit_idx = nothing
-    local only_method = nothing
-    local meth::MethodLookupResult
     local all_result_count = 0
-    local joint_effects::Effects = EFFECTS_TOTAL
-    local fully_covered::Bool = true
+    local joint_effects = EFFECTS_TOTAL
     for i = 1:nunion
         meth = getsplit(info, i)
         if meth.ambig
             # Too many applicable methods
             # Or there is a (partial?) ambiguity
             return nothing
-        elseif length(meth) == 0
-            # No applicable methods; try next union split
-            handled_all_cases = false
-            continue
-        else
-            if length(meth) == 1 && only_method !== false
-                if only_method === nothing
-                    only_method = meth[1].method
-                elseif only_method !== meth[1].method
-                    only_method = false
-                end
-            else
-                only_method = false
-            end
         end
-        local split_fully_covered::Bool = false
+        local split_fully_covered = false
         for (j, match) in enumerate(meth)
             all_result_count += 1
-            result = getresult(info, all_result_count)
-            joint_effects = merge_effects(joint_effects, info_effects(result, match, state))
+            call_result = getresult(info, all_result_count)
+            joint_effects = merge_effects(joint_effects, info_effects(call_result, match, state))
             split_fully_covered |= match.fully_covers
             if !validate_sparams(match.sparams)
-                if !match.fully_covers
-                    handled_all_cases = false
-                    continue
-                end
-                if revisit_idx === nothing
-                    revisit_idx = (i, j, all_result_count)
+                if match.fully_covers
+                    if revisit_idx === nothing
+                        revisit_idx = (i, j, all_result_count)
+                    else
+                        handled_all_cases = false
+                        revisit_idx = nothing
+                    end
                 else
                     handled_all_cases = false
-                    revisit_idx = nothing
                 end
+            elseif !(match.spec_types <: match.method.sig) # the requirement for correct union-split
+                handled_all_cases = false
             else
-                handled_all_cases &= handle_any_const_result!(cases,
-                    result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=false)
+                handled_all_cases &= handle_any_call_result!(cases,
+                    call_result, match, argtypes, info, flag, state; allow_typevars=false)
             end
         end
         fully_covered &= split_fully_covered
     end
 
-    fully_covered || (joint_effects = Effects(joint_effects; nothrow=false))
-
-    if handled_all_cases && revisit_idx !== nothing
-        # we handled everything except one match with unmatched sparams,
-        # so try to handle it by bypassing validate_sparams
-        (i, j, k) = revisit_idx
-        match = getsplit(info, i)[j]
-        result = getresult(info, k)
-        handled_all_cases &= handle_any_const_result!(cases,
-            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
-    elseif length(cases) == 0 && only_method isa Method
-        # if the signature is fully covered and there is only one applicable method,
-        # we can try to inline it even in the presence of unmatched sparams
-        # -- But don't try it if we already tried to handle the match in the revisit_idx
-        # case, because that'll (necessarily) be the same method.
-        if nsplit(info)::Int > 1
-            atype = argtypes_to_type(argtypes)
-            (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), atype, only_method.sig)::SimpleVector
-            match = MethodMatch(metharg, methsp::SimpleVector, only_method, true)
-            result = nothing
-        else
-            @assert length(meth) == 1
-            match = meth[1]
-            result = getresult(info, 1)
+    (handled_all_cases & fully_covered) || (joint_effects = Effects(joint_effects; nothrow=false))
+
+    if handled_all_cases
+        if revisit_idx !== nothing
+            # we handled everything except one match with unmatched sparams,
+            # so try to handle it by bypassing validate_sparams
+            (i, j, k) = revisit_idx
+            match = getsplit(info, i)[j]
+            call_result = getresult(info, k)
+            handled_all_cases &= handle_any_call_result!(cases,
+                call_result, match, argtypes, info, flag, state; allow_typevars=true)
+        end
+        if !fully_covered
+            # We will emit an inline MethodError in this case, but that info already came inference, so we must already have the uncovered edge for it
         end
-        handle_any_const_result!(cases,
-            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
-        fully_covered = handled_all_cases = match.fully_covers
-    elseif !handled_all_cases
+    elseif !isempty(cases)
         # if we've not seen all candidates, union split is valid only for dispatch tuples
         filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
     end
-
-    return cases, (handled_all_cases & fully_covered), joint_effects
+    return cases, handled_all_cases, fully_covered, joint_effects
 end
 
 function handle_call!(todo::Vector{Pair{Int,Any}},
-    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt8, sig::Signature,
+    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt32, sig::Signature,
     state::InliningState)
     cases = compute_inlining_cases(info, flag, sig, state)
     cases === nothing && return nothing
-    cases, all_covered, joint_effects = cases
-    handle_cases!(todo, ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
-        all_covered, joint_effects)
+    cases, handled_all_cases, fully_covered, joint_effects = cases
+    atype = argtypes_to_type(sig.argtypes)
+    atype === Union{} && return nothing # accidentally actually unreachable
+    handle_cases!(todo, ir, idx, stmt, atype, cases, handled_all_cases, fully_covered, joint_effects)
 end
 
-function handle_match!(cases::Vector{InliningCase},
-    match::MethodMatch, argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt8,
-    state::InliningState;
-    allow_abstract::Bool, allow_typevars::Bool)
-    spec_types = match.spec_types
-    allow_abstract || isdispatchtuple(spec_types) || return false
+function handle_call_result!(
+        cases::Vector{InliningCase}, call_result::Union{Nothing,InferenceResult},
+        match::MethodMatch, argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt32,
+        state::InliningState;
+        allow_typevars::Bool
+    )
     # We may see duplicated dispatch signatures here when a signature gets widened
     # during abstract interpretation: for the purpose of inlining, we can just skip
     # processing this dispatch candidate (unless unmatched type parameters are present)
-    !allow_typevars && _any(case->case.sig === spec_types, cases) && return true
-    item = analyze_method!(match, argtypes, info, flag, state; allow_typevars)
-    item === nothing && return false
-    push!(cases, InliningCase(spec_types, item))
-    return true
-end
+    !allow_typevars && any(case::InliningCase->case.sig === match.spec_types, cases) && return true
 
-function handle_const_prop_result!(cases::Vector{InliningCase},
-    result::ConstPropResult, argtypes::Vector{Any}, @nospecialize(info::CallInfo),
-    flag::UInt8, state::InliningState;
-    allow_abstract::Bool, allow_typevars::Bool)
-    mi = result.result.linfo
-    spec_types = mi.specTypes
-    allow_abstract || isdispatchtuple(spec_types) || return false
-    if !validate_sparams(mi.sparam_vals)
-        (allow_typevars && !may_have_fcalls(mi.def::Method)) || return false
-    end
-    item = resolve_todo(mi, result.result, argtypes, info, flag, state)
+    item = analyze_method!(call_result, match, argtypes, info, flag, state; allow_typevars)
     item === nothing && return false
-    push!(cases, InliningCase(spec_types, item))
+    push!(cases, InliningCase(match.spec_types, item))
     return true
 end
 
 function semiconcrete_result_item(result::SemiConcreteResult,
-        @nospecialize(info::CallInfo), flag::UInt8, state::InliningState)
-    mi = result.mi
-    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
-        et = InliningEdgeTracker(state)
-        return compileable_specialization(mi, result.effects, et, info;
-            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
-    else
-        return InliningTodo(mi, retrieve_ir_for_inlining(mi, result.ir), result.effects)
+        @nospecialize(info::CallInfo), flag::UInt32, state::InliningState)
+    code = result.edge
+    mi = get_ci_mi(code)
+    et = InliningEdgeTracker(state)
+
+    if (!OptimizationParams(state.interp).inlining || is_stmt_noinline(flag) ||
+        # For `NativeInterpreter`, `SemiConcreteResult` may be produced for
+        # a `@noinline`-declared method when it's marked as `@constprop :aggressive`.
+        # Suppress the inlining here (unless inlining is requested at the callsite).
+        (is_declared_noinline(mi.def::Method) && !is_stmt_inline(flag)))
+        return compileable_specialization(code, result.effects, et, info, state)
     end
+    src_inlining_policy(state.interp, mi, result.ir, info, flag) ||
+        return compileable_specialization(code, result.effects, et, info, state)
+
+    add_inlining_edge!(et, result.edge)
+    preserve_local_sources = OptimizationParams(state.interp).preserve_local_sources
+    ir, _, debuginfo = retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources)
+    return InliningTodo(mi, ir, result.spec_info, debuginfo, result.effects)
 end
 
 function handle_semi_concrete_result!(cases::Vector{InliningCase}, result::SemiConcreteResult,
-        @nospecialize(info::CallInfo), flag::UInt8, state::InliningState;
-        allow_abstract::Bool)
-    mi = result.mi
-    spec_types = mi.specTypes
-    allow_abstract || isdispatchtuple(spec_types) || return false
+    match::MethodMatch, @nospecialize(info::CallInfo), flag::UInt32, state::InliningState)
+    mi = result.edge.def
     validate_sparams(mi.sparam_vals) || return false
     item = semiconcrete_result_item(result, info, flag, state)
     item === nothing && return false
-    push!(cases, InliningCase(spec_types, item))
+    push!(cases, InliningCase(match.spec_types, item))
     return true
 end
 
-function handle_concrete_result!(cases::Vector{InliningCase}, result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState)
+function handle_concrete_result!(cases::Vector{InliningCase}, result::ConcreteResult,
+    match::MethodMatch, @nospecialize(info::CallInfo), state::InliningState)
     case = concrete_result_item(result, info, state)
     case === nothing && return false
-    push!(cases, InliningCase(result.mi.specTypes, case))
+    push!(cases, InliningCase(match.spec_types, case))
     return true
 end
 
 may_inline_concrete_result(result::ConcreteResult) =
     isdefined(result, :result) && is_inlineable_constant(result.result)
 
-function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState;
-    invokesig::Union{Nothing,Vector{Any}}=nothing)
+function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState)
     if !may_inline_concrete_result(result)
-        et = InliningEdgeTracker(state, invokesig)
-        return compileable_specialization(result.mi, result.effects, et, info;
-            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+        et = InliningEdgeTracker(state)
+        return compileable_specialization(result.edge, result.effects, et, info, state)
     end
     @assert result.effects === EFFECTS_TOTAL
-    return ConstantCase(quoted(result.result))
+    return ConstantCase(quoted(result.result), result.edge)
 end
 
 function handle_cases!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stmt::Expr,
-    @nospecialize(atype), cases::Vector{InliningCase}, fully_covered::Bool,
+    @nospecialize(atype), cases::Vector{InliningCase}, handled_all_cases::Bool, fully_covered::Bool,
     joint_effects::Effects)
     # If we only have one case and that case is fully covered, we may either
     # be able to do the inlining now (for constant cases), or push it directly
     # onto the todo list
-    if fully_covered && length(cases) == 1
+    if fully_covered && handled_all_cases && length(cases) == 1
         handle_single_case!(todo, ir, idx, stmt, cases[1].item)
-    elseif length(cases) > 0
+    elseif length(cases) > 0 || handled_all_cases
         isa(atype, DataType) || return nothing
         for case in cases
             isa(case.sig, DataType) || return nothing
         end
-        push!(todo, idx=>UnionSplit(fully_covered, atype, cases))
+        push!(todo, idx=>UnionSplit(handled_all_cases, fully_covered, atype, cases))
     else
-        ir[SSAValue(idx)][:flag] |= flags_for_effects(joint_effects)
+        add_flag!(ir[SSAValue(idx)], flags_for_effects(joint_effects))
     end
     return nothing
 end
 
 function handle_opaque_closure_call!(todo::Vector{Pair{Int,Any}},
     ir::IRCode, idx::Int, stmt::Expr, info::OpaqueClosureCallInfo,
-    flag::UInt8, sig::Signature, state::InliningState)
+    flag::UInt32, sig::Signature, state::InliningState)
     result = info.result
-    if isa(result, ConstPropResult)
-        mi = result.result.linfo
-        validate_sparams(mi.sparam_vals) || return nothing
-        item = resolve_todo(mi, result.result, sig.argtypes, info, flag, state)
-    elseif isa(result, ConcreteResult)
+    if isa(result, ConcreteResult)
         item = concrete_result_item(result, info, state)
+    elseif isa(result, SemiConcreteResult)
+        item = semiconcrete_result_item(result, info, flag, state)
     else
-        if isa(result, SemiConcreteResult)
-            result = inlining_policy(state.interp, result, info, flag, result.mi, sig.argtypes)
-        end
-        if isa(result, SemiConcreteResult)
-            item = semiconcrete_result_item(result, info, flag, state)
-        else
-            item = analyze_method!(info.match, sig.argtypes, info, flag, state; allow_typevars=false)
-        end
+        item = analyze_method!(result, info.match, sig.argtypes, info, flag, state; allow_typevars=false)
     end
     handle_single_case!(todo, ir, idx, stmt, item)
     return nothing
 end
 
-function handle_modifyfield!_call!(ir::IRCode, idx::Int, stmt::Expr, info::ModifyFieldInfo, state::InliningState)
+function handle_modifyop!_call!(ir::IRCode, idx::Int, stmt::Expr, info::ModifyOpInfo, state::InliningState)
     info = info.info
     info isa MethodResultPure && (info = info.info)
-    info isa ConstCallInfo && (info = info.call)
     info isa MethodMatchInfo || return nothing
-    length(info.results) == 1 || return nothing
+    length(info.edges) == length(info.results) == 1 || return nothing
     match = info.results[1]::MethodMatch
     match.fully_covers || return nothing
-    case = compileable_specialization(match, Effects(), InliningEdgeTracker(state), info;
-        compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+    edge = info.edges[1]
+    edge === nothing && return nothing
+    case = compileable_specialization(edge, Effects(), InliningEdgeTracker(state), info, state)
     case === nothing && return nothing
     stmt.head = :invoke_modify
     pushfirst!(stmt.args, case.invoke)
-    ir.stmts[idx][:inst] = stmt
+    ir[SSAValue(idx)][:stmt] = stmt
     return nothing
 end
 
 function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::FinalizerInfo,
-    state::InliningState)
-
+                                state::InliningState)
     # Finalizers don't return values, so if their execution is not observable,
     # we can just not register them
     if is_removable_if_unused(info.effects)
@@ -1605,7 +1501,7 @@ function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::Finalize
     argtypes[2] = argextype(stmt.args[3], ir)
     sig = Signature(f, ft, argtypes)
 
-    cases = compute_inlining_cases(info.info, #=flag=#UInt8(0), sig, state)
+    cases = compute_inlining_cases(info.info, #=flag=#UInt32(0), sig, state)
     cases === nothing && return nothing
     cases, all_covered, _ = cases
     if all_covered && length(cases) == 1
@@ -1613,36 +1509,52 @@ function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::Finalize
         # `Core.Compiler` data structure into the global cache
         item1 = cases[1].item
         if isa(item1, InliningTodo)
-            push!(stmt.args, true)
-            push!(stmt.args, item1.mi)
+            code = get(code_cache(state), item1.mi, nothing) # COMBAK: this seems like a bad design, can we use stmt_info instead to store the correct info?
+            code isa InferenceResult && (code = code.ci)
+            if code isa CodeInstance
+                push!(stmt.args, true)
+                push!(stmt.args, code)
+            end
         elseif isa(item1, InvokeCase)
             push!(stmt.args, false)
             push!(stmt.args, item1.invoke)
         elseif isa(item1, ConstantCase)
             push!(stmt.args, nothing)
-            push!(stmt.args, item1.val)
         end
     end
     return nothing
 end
 
-function handle_invoke_expr!(todo::Vector{Pair{Int,Any}},
-    idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt8, sig::Signature, state::InliningState)
-    mi = stmt.args[1]::MethodInstance
-    case = resolve_todo(mi, sig.argtypes, info, flag, state)
-    if case !== nothing
-        push!(todo, idx=>(case::InliningTodo))
+# the special resolver for :invoke-d call
+function handle_invoke_expr!(todo::Vector{Pair{Int,Any}}, ir::IRCode,
+    idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt32, sig::Signature, state::InliningState)
+    edge = stmt.args[1]
+    mi = isa(edge, MethodInstance) ? edge : get_ci_mi(edge::CodeInstance)
+    call_result = nothing
+    let info = info
+        info isa MethodResultPure && (info = info.info)
+        if isa(info, InvokeCallInfo)
+            call_result = info.result
+        elseif isa(info, MethodMatchInfo)
+            # We didn't preserve the converted info when inserting :invoke node so we cannot recover this with accuracy.
+            # Since that info isn't used, but this is enough for the "apply `ssa_inlining_pass` multiple times" test
+            if length(info.edges) == length(info.results) == 1
+                call_result = getresult(info, 1)
+            end
+        end
     end
+    item = resolve_todo(mi, call_result, info, flag, state)
+    handle_single_case!(todo, ir, idx, stmt, item)
     return nothing
 end
 
 function inline_const_if_inlineable!(inst::Instruction)
     rt = inst[:type]
     if rt isa Const && is_inlineable_constant(rt.val)
-        inst[:inst] = quoted(rt.val)
+        inst[:stmt] = quoted(rt.val)
         return true
     end
-    inst[:flag] |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+    add_flag!(inst, IR_FLAGS_REMOVABLE)
     return false
 end
 
@@ -1650,17 +1562,18 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
     todo = Pair{Int, Any}[]
 
     for idx in 1:length(ir.stmts)
-        simpleres = process_simple!(todo, ir, idx, state)
+        inst = ir.stmts[idx]
+        flag = inst[:flag]
+
+        simpleres = process_simple!(todo, ir, idx, flag, state)
         simpleres === nothing && continue
         stmt, sig = simpleres
-
-        flag = ir.stmts[idx][:flag]
-        info = ir.stmts[idx][:info]
+        info = inst[:info]
 
         # `NativeInterpreter` won't need this, but provide a support for `:invoke` exprs here
         # for external `AbstractInterpreter`s that may run the inlining pass multiple times
         if isexpr(stmt, :invoke)
-            handle_invoke_expr!(todo, idx, stmt, info, flag, sig, state)
+            handle_invoke_expr!(todo, ir, idx, stmt, info, flag, sig, state)
             continue
         end
 
@@ -1677,9 +1590,9 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
         # handle special cased builtins
         if isa(info, OpaqueClosureCallInfo)
             handle_opaque_closure_call!(todo, ir, idx, stmt, info, flag, sig, state)
-        elseif isa(info, ModifyFieldInfo)
-            handle_modifyfield!_call!(ir, idx, stmt, info, state)
-        elseif isa(info, InvokeCallInfo)
+        elseif isa(info, ModifyOpInfo)
+            handle_modifyop!_call!(ir, idx, stmt, info, state)
+        elseif sig.f === Core.invoke
             handle_invoke_call!(todo, ir, idx, stmt, info, flag, sig, state)
         elseif isa(info, FinalizerInfo)
             handle_finalizer_call!(ir, idx, stmt, info, state)
@@ -1694,15 +1607,14 @@ end
 
 function linear_inline_eligible(ir::IRCode)
     length(ir.cfg.blocks) == 1 || return false
-    terminator = ir[SSAValue(last(ir.cfg.blocks[1].stmts))][:inst]
+    terminator = ir[SSAValue(last(ir.cfg.blocks[1].stmts))][:stmt]
     isa(terminator, ReturnNode) || return false
     isdefined(terminator, :val) || return false
     return true
 end
 
-function early_inline_special_case(
-    ir::IRCode, stmt::Expr, @nospecialize(type), sig::Signature,
-    state::InliningState)
+function early_inline_special_case(ir::IRCode, stmt::Expr, flag::UInt32,
+                                   @nospecialize(type), sig::Signature, state::InliningState)
     OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
 
@@ -1710,18 +1622,13 @@ function early_inline_special_case(
         val = type.val
         is_inlineable_constant(val) || return nothing
         if isa(f, IntrinsicFunction)
-            if is_pure_intrinsic_infer(f) && intrinsic_nothrow(f, argtypes[2:end])
+            if is_pure_intrinsic_infer(f) && has_flag(flag, IR_FLAG_NOTHROW)
                 return SomeCase(quoted(val))
             end
         elseif contains_is(_PURE_BUILTINS, f)
             return SomeCase(quoted(val))
         elseif contains_is(_EFFECT_FREE_BUILTINS, f)
-            if _builtin_nothrow(optimizer_lattice(state.interp), f, argtypes[2:end], type)
-                return SomeCase(quoted(val))
-            end
-        elseif f === Core.get_binding_type
-            length(argtypes) == 3 || return nothing
-            if get_binding_type_effect_free(argtypes[2], argtypes[3])
+            if has_flag(flag, IR_FLAG_NOTHROW)
                 return SomeCase(quoted(val))
             end
         end
@@ -1734,7 +1641,9 @@ function early_inline_special_case(
         isa(setting, Const) || return nothing
         setting = setting.val
         isa(setting, Symbol) || return nothing
-        setting === :const || setting === :conditional || setting === :type || return nothing
+        # setting === :const || setting === :type barrier const evaluation,
+        # so they can't be eliminated at IPO time
+        setting === :conditional || return nothing
         # barriered successfully already, eliminate it
         return SomeCase(stmt.args[3])
     elseif f === Core.ifelse && length(argtypes) == 4
@@ -1745,6 +1654,8 @@ function early_inline_special_case(
             elseif cond.val === false
                 return SomeCase(stmt.args[4])
             end
+        elseif ⊑(optimizer_lattice(state.interp), cond, Bool) && stmt.args[3] === stmt.args[4]
+            return SomeCase(stmt.args[3])
         end
     end
     return nothing
@@ -1753,25 +1664,24 @@ end
 # special-case some regular method calls whose results are not folded within `abstract_call_known`
 # (and thus `early_inline_special_case` doesn't handle them yet)
 # NOTE we manually inline the method bodies, and so the logic here needs to precisely sync with their definitions
-function late_inline_special_case!(
-    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(type), sig::Signature,
-    state::InliningState)
+function late_inline_special_case!(ir::IRCode, idx::Int, stmt::Expr, flag::UInt32,
+                                   @nospecialize(type), sig::Signature, state::InliningState)
     OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
-    if length(argtypes) == 3 && istopfunction(f, :!==)
+    if length(argtypes) == 3 && f === Core.:(!==)
         # special-case inliner for !== that precedes _methods_by_ftype union splitting
         # and that works, even though inference generally avoids inferring the `!==` Method
         if isa(type, Const)
             return SomeCase(quoted(type.val))
         end
         cmp_call = Expr(:call, GlobalRef(Core, :(===)), stmt.args[2], stmt.args[3])
-        cmp_call_ssa = insert_node!(ir, idx, effect_free_and_nothrow(NewInstruction(cmp_call, Bool)))
+        cmp_call_ssa = insert_node!(ir, idx, removable_if_unused(NewInstruction(cmp_call, Bool)))
         not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa)
         return SomeCase(not_call)
-    elseif length(argtypes) == 3 && istopfunction(f, :(>:))
+    elseif length(argtypes) == 3 && f === Core.:(>:)
         # special-case inliner for issupertype
         # that works, even though inference generally avoids inferring the `>:` Method
-        if isa(type, Const) && _builtin_nothrow(optimizer_lattice(state.interp), <:, Any[argtypes[3], argtypes[2]], type)
+        if isa(type, Const) && has_flag(flag, IR_FLAG_NOTHROW)
             return SomeCase(quoted(type.val))
         end
         subtype_call = Expr(:call, GlobalRef(Core, :(<:)), stmt.args[3], stmt.args[2])
@@ -1782,7 +1692,7 @@ function late_inline_special_case!(
             length(stmt.args) == 2 ? Any : stmt.args[end])
         return SomeCase(typevar_call)
     elseif f === UnionAll && length(argtypes) == 3 && ⊑(optimizer_lattice(state.interp), argtypes[2], TypeVar)
-        unionall_call = Expr(:foreigncall, QuoteNode(:jl_type_unionall), Any, svec(Any, Any),
+        unionall_call = Expr(:foreigncall, Expr(:tuple, QuoteNode(:jl_type_unionall)), Any, svec(Any, Any),
             0, QuoteNode(:ccall), stmt.args[2], stmt.args[3])
         return SomeCase(unionall_call)
     elseif is_return_type(f)
@@ -1795,50 +1705,44 @@ function late_inline_special_case!(
     return nothing
 end
 
-function ssa_substitute!(insert_node!::Inserter,
-                         subst_inst::Instruction, @nospecialize(val), arg_replacements::Vector{Any},
-                         @nospecialize(spsig), spvals::SimpleVector,
-                         spvals_ssa::Union{Nothing, SSAValue},
-                         linetable_offset::Int32, boundscheck::Symbol)
-    subst_inst[:flag] &= ~IR_FLAG_INBOUNDS
-    subst_inst[:line] += linetable_offset
-    return ssa_substitute_op!(insert_node!, subst_inst,
-        val, arg_replacements, spsig, spvals, spvals_ssa, boundscheck)
+struct SSASubstitute
+    mi::MethodInstance
+    arg_replacements::Vector{Any}
+    spvals_ssa::Union{Nothing,SSAValue}
+    inlined_at::NTuple{3,Int32} # TODO: add a map also, so that ssaidx doesn't need to equal inlined_idx?
 end
 
 function insert_spval!(insert_node!::Inserter, spvals_ssa::SSAValue, spidx::Int, do_isdefined::Bool)
     ret = insert_node!(
-        effect_free_and_nothrow(NewInstruction(Expr(:call, Core._svec_ref, false, spvals_ssa, spidx), Any)))
+        removable_if_unused(NewInstruction(Expr(:call, Core._svec_ref, spvals_ssa, spidx), Any)))
     tcheck_not = nothing
     if do_isdefined
         tcheck = insert_node!(
-            effect_free_and_nothrow(NewInstruction(Expr(:call, Core.isa, ret, Core.TypeVar), Bool)))
+            removable_if_unused(NewInstruction(Expr(:call, Core.isa, ret, Core.TypeVar), Bool)))
         tcheck_not = insert_node!(
-            effect_free_and_nothrow(NewInstruction(Expr(:call, not_int, tcheck), Bool)))
+            removable_if_unused(NewInstruction(Expr(:call, not_int, tcheck), Bool)))
     end
     return (ret, tcheck_not)
 end
 
-function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction,
-                            @nospecialize(val), arg_replacements::Vector{Any},
-                            @nospecialize(spsig), spvals::SimpleVector,
-                            spvals_ssa::Union{Nothing, SSAValue},
-                            boundscheck::Symbol)
+function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction, @nospecialize(val),
+                            ssa_substitute::SSASubstitute)
     if isa(val, Argument)
-        return arg_replacements[val.n]
+        return ssa_substitute.arg_replacements[val.n]
     end
     if isa(val, Expr)
         e = val::Expr
         head = e.head
+        sparam_vals = ssa_substitute.mi.sparam_vals
         if head === :static_parameter
             spidx = e.args[1]::Int
-            val = spvals[spidx]
+            val = sparam_vals[spidx]
             if !isa(val, TypeVar) && val !== Vararg
                 return quoted(val)
             else
                 flag = subst_inst[:flag]
-                maybe_undef = (flag & IR_FLAG_NOTHROW) == 0 && isa(val, TypeVar)
-                (ret, tcheck_not) = insert_spval!(insert_node!, spvals_ssa::SSAValue, spidx, maybe_undef)
+                maybe_undef = !has_flag(flag, IR_FLAG_NOTHROW) && isa(val, TypeVar)
+                (ret, tcheck_not) = insert_spval!(insert_node!, ssa_substitute.spvals_ssa::SSAValue, spidx, maybe_undef)
                 if maybe_undef
                     insert_node!(
                         NewInstruction(Expr(:throw_undef_if_not, val.name, tcheck_not), Nothing))
@@ -1847,44 +1751,38 @@ function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction,
             end
         elseif head === :isdefined && isa(e.args[1], Expr) && e.args[1].head === :static_parameter
             spidx = (e.args[1]::Expr).args[1]::Int
-            val = spvals[spidx]
+            val = sparam_vals[spidx]
             if !isa(val, TypeVar)
                 return true
             else
-                (_, tcheck_not) = insert_spval!(insert_node!, spvals_ssa::SSAValue, spidx, true)
+                (_, tcheck_not) = insert_spval!(insert_node!, ssa_substitute.spvals_ssa::SSAValue, spidx, true)
                 return tcheck_not
             end
-        elseif head === :cfunction && spvals_ssa === nothing
-            @assert !isa(spsig, UnionAll) || !isempty(spvals)
-            e.args[3] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[3], spsig, spvals)
+        elseif head === :cfunction && ssa_substitute.spvals_ssa === nothing
+            msig = (ssa_substitute.mi.def::Method).sig
+            @assert !isa(msig, UnionAll) || !isempty(sparam_vals)
+            e.args[3] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[3], msig, sparam_vals)
             e.args[4] = svec(Any[
-                ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
+                ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, msig, sparam_vals)
                 for argt in e.args[4]::SimpleVector ]...)
-        elseif head === :foreigncall && spvals_ssa === nothing
-            @assert !isa(spsig, UnionAll) || !isempty(spvals)
+        elseif head === :foreigncall && ssa_substitute.spvals_ssa === nothing
+            msig = (ssa_substitute.mi.def::Method).sig
+            @assert !isa(msig, UnionAll) || !isempty(sparam_vals)
             for i = 1:length(e.args)
                 if i == 2
-                    e.args[2] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], spsig, spvals)
+                    e.args[2] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], msig, sparam_vals)
                 elseif i == 3
                     e.args[3] = svec(Any[
-                        ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
+                        ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, msig, sparam_vals)
                         for argt in e.args[3]::SimpleVector ]...)
                 end
             end
-        elseif head === :boundscheck
-            if boundscheck === :off # inbounds == true
-                return false
-            elseif boundscheck === :propagate
-                return e
-            else # on or default
-                return true
-            end
         end
     end
-    isa(val, Union{SSAValue, NewSSAValue}) && return val # avoid infinite loop
+    isa(val, AnySSAValue) && return val # avoid infinite loop
     urs = userefs(val)
     for op in urs
-        op[] = ssa_substitute_op!(insert_node!, subst_inst, op[], arg_replacements, spsig, spvals, spvals_ssa, boundscheck)
+        op[] = ssa_substitute_op!(insert_node!, subst_inst, op[], ssa_substitute)
     end
     return urs[]
 end
diff --git a/base/compiler/ssair/ir.jl b/Compiler/src/ssair/ir.jl
similarity index 71%
rename from base/compiler/ssair/ir.jl
rename to Compiler/src/ssair/ir.jl
index a1a6bf3b15546..743e26cb230bb 100644
--- a/base/compiler/ssair/ir.jl
+++ b/Compiler/src/ssair/ir.jl
@@ -1,8 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-Core.PhiNode() = Core.PhiNode(Int32[], Any[])
-
-isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode)
+isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) ||
+    isa(stmt, ReturnNode) || isa(stmt, EnterNode) || isexpr(stmt, :leave)
 
 struct CFG
     blocks::Vector{BasicBlock}
@@ -60,16 +59,18 @@ block_for_inst(cfg::CFG, inst::Int) = block_for_inst(cfg.index, inst)
             # This is a fake dest to force the next stmt to start a bb
             idx < length(stmts) && push!(jump_dests, idx+1)
             push!(jump_dests, stmt.label)
+        elseif isa(stmt, EnterNode)
+            # :enter starts/ends a BB
+            push!(jump_dests, idx)
+            push!(jump_dests, idx+1)
+            # The catch block is a jump dest
+            if stmt.catch_dest != 0
+                push!(jump_dests, stmt.catch_dest)
+            end
         elseif isa(stmt, Expr)
             if stmt.head === :leave
                 # :leave terminates a BB
                 push!(jump_dests, idx+1)
-            elseif stmt.head === :enter
-                # :enter starts/ends a BB
-                push!(jump_dests, idx)
-                push!(jump_dests, idx+1)
-                # The catch block is a jump dest
-                push!(jump_dests, stmt.args[1]::Int)
             end
         end
         if isa(stmt, PhiNode)
@@ -80,7 +81,7 @@ block_for_inst(cfg::CFG, inst::Int) = block_for_inst(cfg.index, inst)
             end
         end
     end
-    # and add add one more basic block start after the last statement
+    # and add one more basic block start after the last statement
     for i = length(stmts):-1:1
         if stmts[i] !== nothing
             push!(jump_dests, i+1)
@@ -104,6 +105,9 @@ function compute_basic_blocks(stmts::Vector{Any})
     end
     # Compute successors/predecessors
     for (num, b) in enumerate(blocks)
+        if b.stmts.start == 1
+            push!(b.preds, 0) # the entry block has a virtual predecessor
+        end
         terminator = stmts[last(b.stmts)]
         if isa(terminator, ReturnNode)
             # return never has any successors
@@ -125,14 +129,16 @@ function compute_basic_blocks(stmts::Vector{Any})
                 push!(blocks[block′].preds, num)
                 push!(b.succs, block′)
             end
-        elseif isexpr(terminator, :enter)
+        elseif isa(terminator, EnterNode)
             # :enter gets a virtual edge to the exception handler and
             # the exception handler gets a virtual edge from outside
             # the function.
-            block′ = block_for_inst(basic_block_index, terminator.args[1]::Int)
-            push!(blocks[block′].preds, num)
-            push!(blocks[block′].preds, 0)
-            push!(b.succs, block′)
+            if terminator.catch_dest != 0
+                block′ = block_for_inst(basic_block_index, terminator.catch_dest)
+                push!(blocks[block′].preds, num)
+                push!(blocks[block′].preds, 0)
+                push!(b.succs, block′)
+            end
         end
         # statement fall-through
         if num + 1 <= length(blocks)
@@ -144,16 +150,78 @@ function compute_basic_blocks(stmts::Vector{Any})
 end
 
 # this function assumes insert position exists
+function is_valid_phiblock_stmt(@nospecialize(stmt))
+    isa(stmt, PhiNode) && return true
+    isa(stmt, Union{UpsilonNode, PhiCNode, SSAValue}) && return false
+    isa(stmt, Expr) && return is_value_pos_expr_head(stmt.head)
+    return true
+end
+
 function first_insert_for_bb(code::Vector{Any}, cfg::CFG, block::Int)
-    for idx in cfg.blocks[block].stmts
+    stmts = cfg.blocks[block].stmts
+    lastnonphiidx = first(stmts)
+    for idx in stmts
         stmt = code[idx]
         if !isa(stmt, PhiNode)
-            return idx
+            if !is_valid_phiblock_stmt(stmt)
+                return lastnonphiidx
+            end
+        else
+            lastnonphiidx = idx + 1
+        end
+    end
+    if lastnonphiidx > last(stmts)
+        error("any insert position isn't found")
+    end
+    return lastnonphiidx
+end
+
+# mutable version of the compressed DebugInfo
+mutable struct DebugInfoStream
+    def::Union{MethodInstance,Symbol,Nothing}
+    linetable::Union{Nothing,DebugInfo}
+    edges::Vector{DebugInfo}
+    firstline::Int32 # the starting line for this block (specified by having an index of 0)
+    codelocs::Vector{Int32} # for each statement:
+        # index into linetable (if defined), else a line number (in the file represented by def)
+        # then index into edges
+        # then index into edges[linetable]
+    function DebugInfoStream(codelocs::Vector{Int32})
+        return new(nothing, nothing, DebugInfo[], 0, codelocs)
+    end
+    # DebugInfoStream(def::Union{MethodInstance,Nothing}, di::DebugInfo, nstmts::Int) =
+    #     if debuginfo_file1(di.def) === debuginfo_file1(di.def)
+    #         new(def, di.linetable, Core.svec(di.edges...), getdebugidx(di, 0),
+    #             ccall(:jl_uncompress_codelocs, Any, (Any, Int), di.codelocs, nstmts)::Vector{Int32})
+    #     else
+    function DebugInfoStream(def::Union{MethodInstance,Nothing}, di::DebugInfo, nstmts::Int)
+        codelocs = zeros(Int32, nstmts * 3)
+        for i = 1:nstmts
+            codelocs[3i - 2] = i
         end
+        return new(def, di, DebugInfo[], 0, codelocs)
+    end
+    global copy(di::DebugInfoStream) = new(di.def, di.linetable, di.edges, di.firstline, di.codelocs)
+end
+
+Core.DebugInfo(di::DebugInfoStream, nstmts::Int) =
+    DebugInfo(something(di.def), di.linetable, Core.svec(di.edges...),
+        ccall(:jl_compress_codelocs, Any, (Int32, Any, Int), di.firstline, di.codelocs, nstmts)::String)
+
+getdebugidx(debuginfo::DebugInfo, pc::Int) =
+    ccall(:jl_uncompress1_codeloc, NTuple{3,Int32}, (Any, Int), debuginfo.codelocs, pc)
+
+function getdebugidx(debuginfo::DebugInfoStream, pc::Int)
+    if 3 <= 3pc <= length(debuginfo.codelocs)
+        return (debuginfo.codelocs[3pc-2], debuginfo.codelocs[3pc-1], debuginfo.codelocs[3pc-0])
+    elseif pc == 0
+        return (Int32(debuginfo.firstline), Int32(0), Int32(0))
+    else
+        return (Int32(-1), Int32(0), Int32(0))
     end
-    error("any insert position isn't found")
 end
 
+
 # SSA values that need renaming
 struct OldSSAValue
     id::Int
@@ -183,27 +251,30 @@ end
 
 const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue}
 
-
 # SSA-indexed nodes
 struct InstructionStream
-    inst::Vector{Any}
+    stmt::Vector{Any}
     type::Vector{Any}
     info::Vector{CallInfo}
     line::Vector{Int32}
-    flag::Vector{UInt8}
+    flag::Vector{UInt32}
+    function InstructionStream(stmts::Vector{Any}, type::Vector{Any}, info::Vector{CallInfo}, line::Vector{Int32}, flag::Vector{UInt32})
+        return new(stmts, type, info, line, flag)
+    end
 end
 function InstructionStream(len::Int)
-    insts = Vector{Any}(undef, len)
+    stmts = Vector{Any}(undef, len)
     types = Vector{Any}(undef, len)
     info = Vector{CallInfo}(undef, len)
     fill!(info, NoCallInfo())
-    lines = fill(Int32(0), len)
+    lines = fill(Int32(0), 3len)
     flags = fill(IR_FLAG_NULL, len)
-    return InstructionStream(insts, types, info, lines, flags)
+    return InstructionStream(stmts, types, info, lines, flags)
 end
 InstructionStream() = InstructionStream(0)
-length(is::InstructionStream) = length(is.inst)
-isempty(is::InstructionStream) = isempty(is.inst)
+length(is::InstructionStream) = length(is.stmt)
+iterate(is::Compiler.InstructionStream, st::Int=1) = (st <= Compiler.length(is)) ? (is[st], st + 1) : nothing
+isempty(is::InstructionStream) = isempty(is.stmt)
 function add_new_idx!(is::InstructionStream)
     ninst = length(is) + 1
     resize!(is, ninst)
@@ -211,7 +282,7 @@ function add_new_idx!(is::InstructionStream)
 end
 function copy(is::InstructionStream)
     return InstructionStream(
-        copy_exprargs(is.inst),
+        copy_exprargs(is.stmt),
         copy(is.type),
         copy(is.info),
         copy(is.line),
@@ -219,13 +290,13 @@ function copy(is::InstructionStream)
 end
 function resize!(stmts::InstructionStream, len)
     old_length = length(stmts)
-    resize!(stmts.inst, len)
+    resize!(stmts.stmt, len)
     resize!(stmts.type, len)
     resize!(stmts.info, len)
-    resize!(stmts.line, len)
+    resize!(stmts.line, 3len)
     resize!(stmts.flag, len)
     for i in (old_length + 1):len
-        stmts.line[i] = 0
+        stmts.line[3i-2], stmts.line[3i-1], stmts.line[3i] = NoLineUpdate
         stmts.flag[i] = IR_FLAG_NULL
         stmts.info[i] = NoCallInfo()
     end
@@ -239,25 +310,37 @@ end
 Instruction(is::InstructionStream) = Instruction(is, add_new_idx!(is))
 
 @inline function getindex(node::Instruction, fld::Symbol)
+    (fld === :inst) && (fld = :stmt) # deprecated
     isdefined(node, fld) && return getfield(node, fld)
-    return getfield(getfield(node, :data), fld)[getfield(node, :idx)]
+    fldarray = getfield(getfield(node, :data), fld)
+    fldidx = getfield(node, :idx)
+    (fld === :line) && return (fldarray[3fldidx-2], fldarray[3fldidx-1], fldarray[3fldidx-0])
+    (1 ≤ fldidx ≤ length(fldarray)) || throw(InvalidIRError())
+    return fldarray[fldidx]
 end
 @inline function setindex!(node::Instruction, @nospecialize(val), fld::Symbol)
-    getfield(getfield(node, :data), fld)[getfield(node, :idx)] = val
+    (fld === :inst) && (fld = :stmt) # deprecated
+    fldarray = getfield(getfield(node, :data), fld)
+    fldidx = getfield(node, :idx)
+    if fld === :line
+        (fldarray[3fldidx-2], fldarray[3fldidx-1], fldarray[3fldidx-0]) = val::NTuple{3,Int32}
+    else
+        fldarray[fldidx] = val
+    end
     return node
 end
 
 @inline getindex(is::InstructionStream, idx::Int) = Instruction(is, idx)
 function setindex!(is::InstructionStream, newval::Instruction, idx::Int)
-    is.inst[idx] = newval[:inst]
+    is.stmt[idx] = newval[:stmt]
     is.type[idx] = newval[:type]
     is.info[idx] = newval[:info]
-    is.line[idx] = newval[:line]
+    (is.line[3idx-2], is.line[3idx-1], is.line[3idx-0]) = newval[:line]
     is.flag[idx] = newval[:flag]
     return is
 end
 function setindex!(is::InstructionStream, newval::Union{AnySSAValue, Nothing}, idx::Int)
-    is.inst[idx] = newval
+    is.stmt[idx] = newval
     return is
 end
 function setindex!(node::Instruction, newval::Instruction)
@@ -265,6 +348,10 @@ function setindex!(node::Instruction, newval::Instruction)
     return node
 end
 
+has_flag(inst::Instruction, flag::UInt32) = has_flag(inst[:flag], flag)
+add_flag!(inst::Instruction, flag::UInt32) = inst[:flag] |= flag
+sub_flag!(inst::Instruction, flag::UInt32) = inst[:flag] &= ~flag
+
 struct NewNodeInfo
     # Insertion position (interpretation depends on which array this is in)
     pos::Int
@@ -288,14 +375,15 @@ struct NewInstruction
     stmt::Any
     type::Any
     info::CallInfo
-    line::Union{Int32,Nothing} # if nothing, copy the line from previous statement in the insertion location
-    flag::Union{UInt8,Nothing} # if nothing, IR flags will be recomputed on insertion
+    line::Union{NTuple{3,Int32},Nothing} # if nothing, copy the line from previous statement in the insertion location
+    flag::Union{UInt32,Nothing} # if nothing, IR flags will be recomputed on insertion
     function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo),
-                            line::Union{Int32,Nothing}, flag::Union{UInt8,Nothing})
+                            line::Union{NTuple{3,Int32},Int32,Nothing}, flag::Union{UInt32,Nothing})
+        line isa Int32 && (line = (line, zero(Int32), zero(Int32)))
         return new(stmt, type, info, line, flag)
     end
 end
-function NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Int32,Nothing}=nothing)
+function NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{NTuple{3,Int32},Int32,Nothing}=nothing)
     return NewInstruction(stmt, type, NoCallInfo(), line, nothing)
 end
 @nospecialize
@@ -303,50 +391,67 @@ function NewInstruction(newinst::NewInstruction;
     stmt::Any=newinst.stmt,
     type::Any=newinst.type,
     info::CallInfo=newinst.info,
-    line::Union{Int32,Nothing}=newinst.line,
-    flag::Union{UInt8,Nothing}=newinst.flag)
+    line::Union{NTuple{3,Int32},Int32,Nothing}=newinst.line,
+    flag::Union{UInt32,Nothing}=newinst.flag)
     return NewInstruction(stmt, type, info, line, flag)
 end
 function NewInstruction(inst::Instruction;
-    stmt::Any=inst[:inst],
+    stmt::Any=inst[:stmt],
     type::Any=inst[:type],
     info::CallInfo=inst[:info],
-    line::Union{Int32,Nothing}=inst[:line],
-    flag::Union{UInt8,Nothing}=inst[:flag])
+    line::Union{NTuple{3,Int32},Int32,Nothing}=inst[:line],
+    flag::Union{UInt32,Nothing}=inst[:flag])
     return NewInstruction(stmt, type, info, line, flag)
 end
 @specialize
-effect_free_and_nothrow(newinst::NewInstruction) = NewInstruction(newinst; flag=add_flag(newinst, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW))
-with_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=add_flag(newinst, flags))
-without_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=sub_flag(newinst, flags))
-function add_flag(newinst::NewInstruction, newflag::UInt8)
+removable_if_unused(newinst::NewInstruction) = add_flag(newinst, IR_FLAGS_REMOVABLE)
+function add_flag(newinst::NewInstruction, newflag::UInt32)
     flag = newinst.flag
-    flag === nothing && return newflag
-    return flag | newflag
+    if flag === nothing
+        flag = newflag
+    else
+        flag |= newflag
+    end
+    return NewInstruction(newinst; flag)
 end
-function sub_flag(newinst::NewInstruction, newflag::UInt8)
+function sub_flag(newinst::NewInstruction, newflag::UInt32)
     flag = newinst.flag
-    flag === nothing && return IR_FLAG_NULL
-    return flag & ~newflag
+    if flag === nothing
+        flag = IR_FLAG_NULL
+    else
+        flag &= ~newflag
+    end
+    return NewInstruction(newinst; flag)
 end
 
 struct IRCode
     stmts::InstructionStream
     argtypes::Vector{Any}
     sptypes::Vector{VarState}
-    linetable::Vector{LineInfoNode}
+    debuginfo::DebugInfoStream
     cfg::CFG
     new_nodes::NewNodeStream
     meta::Vector{Expr}
+    valid_worlds::WorldRange
 
-    function IRCode(stmts::InstructionStream, cfg::CFG, linetable::Vector{LineInfoNode}, argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{VarState})
-        return new(stmts, argtypes, sptypes, linetable, cfg, NewNodeStream(), meta)
+    function IRCode(stmts::InstructionStream, cfg::CFG, debuginfo::DebugInfoStream,
+                    argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{VarState},
+                    valid_worlds=WorldRange(typemin(UInt), typemax(UInt)))
+        return new(stmts, argtypes, sptypes, debuginfo, cfg, NewNodeStream(), meta, valid_worlds)
     end
     function IRCode(ir::IRCode, stmts::InstructionStream, cfg::CFG, new_nodes::NewNodeStream)
-        return new(stmts, ir.argtypes, ir.sptypes, ir.linetable, cfg, new_nodes, ir.meta)
+        di = ir.debuginfo
+        @assert di.codelocs === stmts.line
+        return new(stmts, ir.argtypes, ir.sptypes, di, cfg, new_nodes, ir.meta, ir.valid_worlds)
+    end
+    global function copy(ir::IRCode)
+        di = ir.debuginfo
+        stmts = copy(ir.stmts)
+        di = copy(di)
+        di.edges = copy(di.edges)
+        di.codelocs = stmts.line
+        return new(stmts, copy(ir.argtypes), copy(ir.sptypes), di, copy(ir.cfg), copy(ir.new_nodes), copy(ir.meta), ir.valid_worlds)
     end
-    global copy(ir::IRCode) = new(copy(ir.stmts), copy(ir.argtypes), copy(ir.sptypes),
-        copy(ir.linetable), copy(ir.cfg), copy(ir.new_nodes), copy(ir.meta))
 end
 
 """
@@ -357,26 +462,42 @@ for debugging and unit testing of IRCode APIs. The compiler itself should genera
 from the frontend or one of the caches.
 """
 function IRCode()
-    ir = IRCode(InstructionStream(1), CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), LineInfoNode[], Any[], Expr[], VarState[])
-    ir[SSAValue(1)][:inst] = ReturnNode(nothing)
+    stmts = InstructionStream(1)
+    debuginfo = DebugInfoStream(stmts.line)
+    stmts.line[1] = 1
+    ir = IRCode(stmts, CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), debuginfo, Any[], Expr[], VarState[])
+    ir[SSAValue(1)][:stmt] = ReturnNode(nothing)
     ir[SSAValue(1)][:type] = Nothing
     ir[SSAValue(1)][:flag] = 0x00
-    ir[SSAValue(1)][:line] = Int32(0)
+    ir[SSAValue(1)][:line] = NoLineUpdate
     return ir
 end
 
+construct_domtree(ir::IRCode) = construct_domtree(ir.cfg)
+construct_domtree(cfg::CFG) = construct_domtree(cfg.blocks)
+
+construct_postdomtree(ir::IRCode) = construct_postdomtree(ir.cfg)
+construct_postdomtree(cfg::CFG) = construct_postdomtree(cfg.blocks)
+
 function block_for_inst(ir::IRCode, inst::Int)
     if inst > length(ir.stmts)
         inst = ir.new_nodes.info[inst - length(ir.stmts)].pos
     end
     block_for_inst(ir.cfg, inst)
 end
+block_for_inst(ir::IRCode, ssa::SSAValue) = block_for_inst(ir, ssa.id)
 
-function getindex(x::IRCode, s::SSAValue)
-    if s.id <= length(x.stmts)
-        return x.stmts[s.id]
+function getindex(ir::IRCode, s::SSAValue)
+    id = s.id
+    (id ≥ 1) || throw(InvalidIRError())
+    nstmts = length(ir.stmts)
+    if id <= nstmts
+        return ir.stmts[id]
     else
-        return x.new_nodes.stmts[s.id - length(x.stmts)]
+        id -= nstmts
+        stmts = ir.new_nodes.stmts
+        (id ≤ length(stmts)) || throw(InvalidIRError())
+        return stmts[id]
     end
 end
 
@@ -427,11 +548,15 @@ struct UndefToken end; const UNDEF_TOKEN = UndefToken()
         isdefined(stmt, :val) || return OOB_TOKEN
         op == 1 || return OOB_TOKEN
         return stmt.val
+    elseif isa(stmt, EnterNode)
+        isdefined(stmt, :scope) || return OOB_TOKEN
+        op == 1 || return OOB_TOKEN
+        return stmt.scope
     elseif isa(stmt, PiNode)
         isdefined(stmt, :val) || return OOB_TOKEN
         op == 1 || return OOB_TOKEN
         return stmt.val
-    elseif isa(stmt, Union{SSAValue, NewSSAValue, GlobalRef})
+    elseif isa(stmt, Union{AnySSAValue, GlobalRef})
         op == 1 || return OOB_TOKEN
         return stmt
     elseif isa(stmt, UpsilonNode)
@@ -459,6 +584,7 @@ function is_relevant_expr(e::Expr)
                       :foreigncall, :isdefined, :copyast,
                       :throw_undef_if_not,
                       :cfunction, :method, :pop_exception,
+                      :leave,
                       :new_opaque_closure)
 end
 
@@ -482,16 +608,19 @@ end
         stmt = GotoIfNot(v, stmt.dest)
     elseif isa(stmt, ReturnNode)
         op == 1 || throw(BoundsError())
-        stmt = typeof(stmt)(v)
-    elseif isa(stmt, Union{SSAValue, NewSSAValue, GlobalRef})
+        stmt = ReturnNode(v)
+    elseif isa(stmt, EnterNode)
+        op == 1 || throw(BoundsError())
+        stmt = EnterNode(stmt.catch_dest, v)
+    elseif isa(stmt, Union{AnySSAValue, Argument, GlobalRef})
         op == 1 || throw(BoundsError())
         stmt = v
     elseif isa(stmt, UpsilonNode)
         op == 1 || throw(BoundsError())
-        stmt = typeof(stmt)(v)
+        stmt = UpsilonNode(v)
     elseif isa(stmt, PiNode)
         op == 1 || throw(BoundsError())
-        stmt = typeof(stmt)(v, stmt.typ)
+        stmt = PiNode(v, stmt.typ)
     elseif isa(stmt, PhiNode)
         op > length(stmt.values) && throw(BoundsError())
         isassigned(stmt.values, op) || throw(BoundsError())
@@ -513,8 +642,8 @@ end
 
 function userefs(@nospecialize(x))
     relevant = (isa(x, Expr) && is_relevant_expr(x)) ||
-        isa(x, GotoIfNot) || isa(x, ReturnNode) || isa(x, SSAValue) || isa(x, NewSSAValue) ||
-        isa(x, PiNode) || isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode)
+        isa(x, GotoIfNot) || isa(x, ReturnNode) || isa(x, SSAValue) || isa(x, OldSSAValue) || isa(x, NewSSAValue) ||
+        isa(x, PiNode) || isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode) || isa(x, EnterNode) || isa(x, Argument)
     return UseRefIterator(x, relevant)
 end
 
@@ -554,7 +683,7 @@ function insert_node!(ir::IRCode, pos::SSAValue, newinst::NewInstruction, attach
     end
     node = add_inst!(ir.new_nodes, posid, attach_after)
     newline = something(newinst.line, ir[pos][:line])
-    newflag = recompute_inst_flag(newinst, ir)
+    newflag = recompute_newinst_flag(newinst, ir)
     node = inst_from_newinst!(node, newinst, newline, newflag)
     return SSAValue(length(ir.stmts) + node.idx)
 end
@@ -567,6 +696,7 @@ struct CFGTransformState
     result_bbs::Vector{BasicBlock}
     bb_rename_pred::Vector{Int}
     bb_rename_succ::Vector{Int}
+    domtree::Union{Nothing, DomTree}
 end
 
 # N.B.: Takes ownership of the CFG array
@@ -602,11 +732,14 @@ function CFGTransformState!(blocks::Vector{BasicBlock}, allow_cfg_transforms::Bo
         let blocks = blocks, bb_rename = bb_rename
             result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1]
         end
+        # TODO: This could be done by just renaming the domtree
+        domtree = construct_domtree(result_bbs)
     else
         bb_rename = Vector{Int}()
         result_bbs = blocks
+        domtree = nothing
     end
-    return CFGTransformState(allow_cfg_transforms, allow_cfg_transforms, result_bbs, bb_rename, bb_rename)
+    return CFGTransformState(allow_cfg_transforms, allow_cfg_transforms, result_bbs, bb_rename, bb_rename, domtree)
 end
 
 mutable struct IncrementalCompact
@@ -639,6 +772,7 @@ mutable struct IncrementalCompact
         perm = sort!(collect(eachindex(info)); by=i::Int->(2info[i].pos+info[i].attach_after, i))
         new_len = length(code.stmts) + length(info)
         result = InstructionStream(new_len)
+        code.debuginfo.codelocs = result.line
         used_ssas = fill(0, new_len)
         new_new_used_ssas = Vector{Int}()
         blocks = code.cfg.blocks
@@ -661,7 +795,7 @@ mutable struct IncrementalCompact
         bb_rename = Vector{Int}()
         pending_nodes = NewNodeStream()
         pending_perm = Int[]
-        return new(code, parent.result, CFGTransformState(false, false, parent.cfg_transform.result_bbs, bb_rename, bb_rename),
+        return new(code, parent.result, CFGTransformState(false, false, parent.cfg_transform.result_bbs, bb_rename, bb_rename, nothing),
             ssa_rename, parent.used_ssas,
             parent.late_fixup, perm, 1,
             parent.new_new_nodes, parent.new_new_used_ssas, pending_nodes, pending_perm,
@@ -679,12 +813,13 @@ end
 types(ir::Union{IRCode, IncrementalCompact}) = TypesView(ir)
 
 function getindex(compact::IncrementalCompact, ssa::SSAValue)
-    @assert ssa.id < compact.result_idx
+    (1 ≤ ssa.id < compact.result_idx) || throw(InvalidIRError())
     return compact.result[ssa.id]
 end
 
 function getindex(compact::IncrementalCompact, ssa::OldSSAValue)
     id = ssa.id
+    (id ≥ 1) || throw(InvalidIRError())
     if id < compact.idx
         new_idx = compact.ssa_rename[id]::Int
         return compact.result[new_idx]
@@ -696,12 +831,15 @@ function getindex(compact::IncrementalCompact, ssa::OldSSAValue)
         return compact.ir.new_nodes.stmts[id]
     end
     id -= length(compact.ir.new_nodes)
+    (id ≤ length(compact.pending_nodes.stmts)) || throw(InvalidIRError())
     return compact.pending_nodes.stmts[id]
 end
 
 function getindex(compact::IncrementalCompact, ssa::NewSSAValue)
     if ssa.id < 0
-        return compact.new_new_nodes.stmts[-ssa.id]
+        stmts = compact.new_new_nodes.stmts
+        (-ssa.id ≤ length(stmts)) || throw(InvalidIRError())
+        return stmts[-ssa.id]
     else
         return compact[SSAValue(ssa.id)]
     end
@@ -740,6 +878,16 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
     xb = block_for_inst(compact, x)
     yb = block_for_inst(compact, y)
     if xb == yb
+        if isa(compact[x][:stmt], PhiNode)
+            if isa(compact[y][:stmt], PhiNode)
+                # A node dominates another only if it dominates all uses of that note.
+                # Usually that is not a distinction. However, for phi nodes, the use
+                # occurs on the edge to the predecessor block. Thus, by definition, for
+                # any other PhiNode in the same BB there must be (at least) one edge
+                # that this phi node does not dominate.
+                return false
+            end
+        end
         xinfo = yinfo = nothing
         if isa(x, OldSSAValue)
             x′ = compact.ssa_rename[x.id]::SSAValue
@@ -765,7 +913,7 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
         else
             y′ = y
         end
-        if x′.id == y′.id && (xinfo !== nothing || yinfo !== nothing)
+        if x′.id == y′.id
             if xinfo !== nothing && yinfo !== nothing
                 if xinfo.attach_after == yinfo.attach_after
                     return x.id < y.id
@@ -773,8 +921,8 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
                 return yinfo.attach_after
             elseif xinfo !== nothing
                 return !xinfo.attach_after
-            else
-                return (yinfo::NewNodeInfo).attach_after
+            elseif yinfo !== nothing
+                return yinfo.attach_after
             end
         end
         return x′.id < y′.id
@@ -809,8 +957,8 @@ function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool)
 end
 
 function inst_from_newinst!(node::Instruction, newinst::NewInstruction,
-    newline::Int32=newinst.line::Int32, newflag::UInt8=newinst.flag::UInt8)
-    node[:inst] = newinst.stmt
+    newline::NTuple{3,Int32}=newinst.line::NTuple{3,Int32}, newflag::UInt32=newinst.flag::UInt32)
+    node[:stmt] = newinst.stmt
     node[:type] = newinst.type
     node[:info] = newinst.info
     node[:line] = newline
@@ -818,25 +966,14 @@ function inst_from_newinst!(node::Instruction, newinst::NewInstruction,
     return node
 end
 
-function recompute_inst_flag(newinst::NewInstruction, src::Union{IRCode,IncrementalCompact})
+function recompute_newinst_flag(newinst::NewInstruction, src::Union{IRCode,IncrementalCompact})
     flag = newinst.flag
     flag !== nothing && return flag
-    flag = IR_FLAG_NULL
-    (consistent, effect_free_and_nothrow, nothrow) = stmt_effect_flags(
-        fallback_lattice, newinst.stmt, newinst.type, src)
-    if consistent
-        flag |= IR_FLAG_CONSISTENT
-    end
-    if effect_free_and_nothrow
-        flag |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-    elseif nothrow
-        flag |= IR_FLAG_NOTHROW
-    end
-    return flag
+    return recompute_effects_flags(fallback_lattice, newinst.stmt, newinst.type, src)
 end
 
 function insert_node!(compact::IncrementalCompact, @nospecialize(before), newinst::NewInstruction, attach_after::Bool=false)
-    newflag = recompute_inst_flag(newinst, compact)
+    newflag = recompute_newinst_flag(newinst, compact)
     if isa(before, SSAValue)
         if before.id < compact.result_idx
             count_added_node!(compact, newinst.stmt)
@@ -894,22 +1031,33 @@ function insert_node!(compact::IncrementalCompact, @nospecialize(before), newins
     end
 end
 
-function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction, reverse_affinity::Bool=false)
-    newline = newinst.line::Int32
-    refinish = false
+function did_just_finish_bb(compact)
     result_idx = compact.result_idx
     result_bbs = compact.cfg_transform.result_bbs
-    if reverse_affinity &&
-            ((compact.active_result_bb == length(result_bbs) + 1) ||
-             result_idx == first(result_bbs[compact.active_result_bb].stmts))
+    (compact.active_result_bb == length(result_bbs) + 1) ||
+    (result_idx == first(result_bbs[compact.active_result_bb].stmts) &&
+     compact.active_result_bb != 1)
+end
+
+function maybe_reopen_bb!(compact)
+    if did_just_finish_bb(compact)
         compact.active_result_bb -= 1
-        refinish = true
+        return true
     end
+    return false
+end
+
+function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction, reverse_affinity::Bool=false)
+    newline = newinst.line::NTuple{3,Int32}
+    refinish = false
+    result_idx = compact.result_idx
+    result_bbs = compact.cfg_transform.result_bbs
+    refinish = reverse_affinity && maybe_reopen_bb!(compact)
     if result_idx > length(compact.result)
         @assert result_idx == length(compact.result) + 1
         resize!(compact, result_idx)
     end
-    newflag = recompute_inst_flag(newinst, compact)
+    newflag = recompute_newinst_flag(newinst, compact)
     node = inst_from_newinst!(compact.result[result_idx], newinst, newline, newflag)
     count_added_node!(compact, newinst.stmt) && push!(compact.late_fixup, result_idx)
     compact.result_idx = result_idx + 1
@@ -918,10 +1066,26 @@ function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction,
     return inst
 end
 
+function delete_inst_here!(compact::IncrementalCompact)
+    # If we already closed this bb, reopen it for our modification
+    refinish = maybe_reopen_bb!(compact)
+
+    # Delete the statement, update refcounts etc
+    compact[SSAValue(compact.result_idx-1)] = nothing
+
+    # Pretend that we never compacted this statement in the first place
+    compact.result_idx -= 1
+
+    refinish && finish_current_bb!(compact, 0)
+
+    return nothing
+end
+
 function getindex(view::TypesView, v::OldSSAValue)
     id = v.id
     ir = view.ir.ir
     stmts = ir.stmts
+    (id ≥ 1) || throw(InvalidIRError())
     if id <= length(stmts)
         return stmts[id][:type]
     end
@@ -930,7 +1094,9 @@ function getindex(view::TypesView, v::OldSSAValue)
         return ir.new_nodes.stmts[id][:type]
     end
     id -= length(ir.new_nodes)
-    return view.ir.pending_nodes.stmts[id][:type]
+    stmts = view.ir.pending_nodes.stmts
+    (id ≤ length(stmts)) || throw(InvalidIRError())
+    return stmts[id][:type]
 end
 
 function kill_current_use!(compact::IncrementalCompact, @nospecialize(val))
@@ -950,14 +1116,13 @@ function kill_current_uses!(compact::IncrementalCompact, @nospecialize(stmt))
     end
 end
 
-function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::SSAValue)
-    @assert idx.id < compact.result_idx
-    (compact.result[idx.id][:inst] === v) && return compact
+function setindex!(compact::IncrementalCompact, @nospecialize(v), ssa::Union{SSAValue, NewSSAValue})
+    (compact[ssa][:stmt] === v) && return compact
     # Kill count for current uses
-    kill_current_uses!(compact, compact.result[idx.id][:inst])
-    compact.result[idx.id][:inst] = v
+    kill_current_uses!(compact, compact[ssa][:stmt])
+    compact[ssa][:stmt] = v
     # Add count for new use
-    count_added_node!(compact, v) && push!(compact.late_fixup, idx.id)
+    count_added_node!(compact, v) && isa(ssa, SSAValue) && push!(compact.late_fixup, ssa.id)
     return compact
 end
 
@@ -965,22 +1130,22 @@ function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::OldSSAVal
     id = idx.id
     if id < compact.idx
         new_idx = compact.ssa_rename[id]::Int
-        (compact.result[new_idx][:inst] === v) && return compact
-        kill_current_uses!(compact, compact.result[new_idx][:inst])
-        compact.result[new_idx][:inst] = v
+        (compact.result[new_idx][:stmt] === v) && return compact
+        kill_current_uses!(compact, compact.result[new_idx][:stmt])
+        compact.result[new_idx][:stmt] = v
         count_added_node!(compact, v) && push!(compact.late_fixup, new_idx)
         return compact
     elseif id <= length(compact.ir.stmts)  # ir.stmts, new_nodes, and pending_nodes uses aren't counted yet, so no need to adjust
-        compact.ir.stmts[id][:inst] = v
+        compact.ir.stmts[id][:stmt] = v
         return compact
     end
     id -= length(compact.ir.stmts)
     if id <= length(compact.ir.new_nodes)
-        compact.ir.new_nodes.stmts[id][:inst] = v
+        compact.ir.new_nodes.stmts[id][:stmt] = v
         return compact
     end
     id -= length(compact.ir.new_nodes)
-    compact.pending_nodes.stmts[id][:inst] = v
+    compact.pending_nodes.stmts[id][:stmt] = v
     return compact
 end
 
@@ -988,7 +1153,7 @@ function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::Int)
     if idx < compact.result_idx
         compact[SSAValue(idx)] = v
     else
-        compact.ir.stmts[idx][:inst] = v
+        compact.ir.stmts[idx][:stmt] = v
     end
     return compact
 end
@@ -1000,7 +1165,7 @@ should_check_ssa_counts() = __check_ssa_counts__[]
 
 # specifically meant to be used with body1 = compact.result and body2 = compact.new_new_nodes, with nvals == length(compact.used_ssas)
 function find_ssavalue_uses1(compact::IncrementalCompact)
-    body1, body2 = compact.result.inst, compact.new_new_nodes.stmts.inst
+    body1, body2 = compact.result.stmt, compact.new_new_nodes.stmts.stmt
     nvals = length(compact.used_ssas)
     nvalsnew = length(compact.new_new_used_ssas)
     nbody1 = compact.result_idx
@@ -1034,7 +1199,7 @@ function find_ssavalue_uses1(compact::IncrementalCompact)
 end
 
 function _oracle_check(compact::IncrementalCompact)
-    (observed_used_ssas, observed_used_newssas) = Core.Compiler.find_ssavalue_uses1(compact)
+    (observed_used_ssas, observed_used_newssas) = find_ssavalue_uses1(compact)
     for i = 1:length(observed_used_ssas)
         if observed_used_ssas[i] != compact.used_ssas[i]
             return (observed_used_ssas, observed_used_newssas, SSAValue(i))
@@ -1058,20 +1223,27 @@ end
 
 getindex(view::TypesView, idx::SSAValue) = getindex(view, idx.id)
 function getindex(view::TypesView, idx::Int)
+    (idx ≥ 1) || throw(InvalidIRError())
     if isa(view.ir, IncrementalCompact) && idx < view.ir.result_idx
         return view.ir.result[idx][:type]
     elseif isa(view.ir, IncrementalCompact) && view.ir.renamed_new_nodes
         if idx <= length(view.ir.result)
             return view.ir.result[idx][:type]
         else
-            return view.ir.new_new_nodes.stmts[idx - length(view.ir.result)][:type]
+            idx -= length(view.ir.result)
+            stmts = view.ir.new_new_nodes.stmts
+            (idx ≤ length(stmts)) || throw(InvalidIRError())
+            return stmts[idx][:type]
         end
     else
         ir = isa(view.ir, IncrementalCompact) ? view.ir.ir : view.ir
         if idx <= length(ir.stmts)
             return ir.stmts[idx][:type]
         else
-            return ir.new_nodes.stmts[idx - length(ir.stmts)][:type]
+            idx -= length(ir.stmts)
+            stmts = ir.new_nodes.stmts
+            (idx ≤ length(stmts)) || throw(InvalidIRError())
+            return stmts[idx][:type]
         end
     end
 end
@@ -1082,13 +1254,13 @@ end
 
 # N.B.: Don't make this <: Function to avoid ::Function deopt
 struct Refiner
-    result_flags::Vector{UInt8}
+    result_flags::Vector{UInt32}
     result_idx::Int
 end
 (this::Refiner)() = (this.result_flags[this.result_idx] |= IR_FLAG_REFINED; nothing)
 
 function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int},
-                                processed_idx::Int, result_idx::Int,
+                                already_inserted, result_idx::Int,
                                 ssa_rename::Vector{Any}, used_ssas::Vector{Int},
                                 new_new_used_ssas::Vector{Int},
                                 do_rename_ssa::Bool,
@@ -1096,37 +1268,48 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int}
     values = Vector{Any}(undef, length(old_values))
     for i = 1:length(old_values)
         isassigned(old_values, i) || continue
-        val = old_values[i]
-        if isa(val, SSAValue)
-            if do_rename_ssa
-                if val.id > processed_idx
-                    push!(late_fixup, result_idx)
-                    val = OldSSAValue(val.id)
-                else
-                    val = renumber_ssa2(val, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
-                end
-            else
-                used_ssas[val.id] += 1
-            end
-        elseif isa(val, OldSSAValue)
-            if val.id > processed_idx
-                push!(late_fixup, result_idx)
-            else
-                # Always renumber these. do_rename_ssa applies only to actual SSAValues
-                val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, new_new_used_ssas, true, mark_refined!)
-            end
-        elseif isa(val, NewSSAValue)
-            if val.id < 0
+        values[i] = process_phinode_value(old_values, i, late_fixup, already_inserted, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
+    end
+    return values
+end
+
+function process_phinode_value(old_values::Vector{Any}, i::Int, late_fixup::Vector{Int},
+                               already_inserted, result_idx::Int,
+                               ssa_rename::Vector{Any}, used_ssas::Vector{Int},
+                               new_new_used_ssas::Vector{Int},
+                               do_rename_ssa::Bool,
+                               mark_refined!::Union{Refiner, Nothing})
+    val = old_values[i]
+    if isa(val, SSAValue)
+        if do_rename_ssa
+            if !already_inserted(i, OldSSAValue(val.id))
                 push!(late_fixup, result_idx)
-                new_new_used_ssas[-val.id] += 1
+                val = OldSSAValue(val.id)
             else
-                @assert do_rename_ssa
-                val = SSAValue(val.id)
+                val = renumber_ssa2(val, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
             end
+        else
+            used_ssas[val.id] += 1
+        end
+    elseif isa(val, OldSSAValue)
+        if !already_inserted(i, val)
+            push!(late_fixup, result_idx)
+        else
+            # Always renumber these. do_rename_ssa applies only to actual SSAValues
+            val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, new_new_used_ssas, true, mark_refined!)
+        end
+    elseif isa(val, NewSSAValue)
+        if val.id < 0
+            new_new_used_ssas[-val.id] += 1
+        else
+            @assert do_rename_ssa
+            val = SSAValue(val.id)
         end
-        values[i] = val
     end
-    return values
+    if isa(val, NewSSAValue)
+        push!(late_fixup, result_idx)
+    end
+    return val
 end
 
 function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int},
@@ -1144,6 +1327,9 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{In
     end
     if isa(val, SSAValue)
         used_ssas[val.id] += 1
+    elseif isa(val, NewSSAValue)
+        @assert val.id < 0
+        new_new_used_ssas[-val.id] += 1
     end
     return val
 end
@@ -1198,26 +1384,32 @@ end
 
 # N.B.: from and to are non-renamed indices
 function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::Int)
-    # Note: We recursively kill as many edges as are obviously dead. However, this
-    # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or
-    # worstcase during codegen).
-    (; bb_rename_pred, bb_rename_succ, result_bbs) = compact.cfg_transform
+    # Note: We recursively kill as many edges as are obviously dead.
+    (; bb_rename_pred, bb_rename_succ, result_bbs, domtree) = compact.cfg_transform
     preds = result_bbs[bb_rename_succ[to]].preds
     succs = result_bbs[bb_rename_pred[from]].succs
     deleteat!(preds, findfirst(x::Int->x==bb_rename_pred[from], preds)::Int)
     deleteat!(succs, findfirst(x::Int->x==bb_rename_succ[to], succs)::Int)
+    if domtree !== nothing
+        domtree_delete_edge!(domtree, result_bbs, bb_rename_pred[from], bb_rename_succ[to])
+    end
     # Check if the block is now dead
-    if length(preds) == 0
-        for succ in copy(result_bbs[bb_rename_succ[to]].succs)
-            kill_edge!(compact, active_bb, to, findfirst(x::Int->x==succ, bb_rename_pred)::Int)
+    if length(preds) == 0 || (domtree !== nothing && bb_unreachable(domtree, bb_rename_succ[to]))
+        to_succs = result_bbs[bb_rename_succ[to]].succs
+        for succ in copy(to_succs)
+            new_succ = findfirst(x::Int->x==succ, bb_rename_pred)
+            new_succ === nothing && continue
+            kill_edge!(compact, active_bb, to, new_succ)
         end
+        empty!(preds)
+        empty!(to_succs)
         if to < active_bb
             # Kill all statements in the block
             stmts = result_bbs[bb_rename_succ[to]].stmts
             for stmt in stmts
-                compact.result[stmt][:inst] = nothing
+                compact.result[stmt][:stmt] = nothing
             end
-            compact.result[last(stmts)][:inst] = ReturnNode()
+            compact.result[last(stmts)][:stmt] = ReturnNode()
         else
             # Tell compaction to not schedule this block. A value of -2 here
             # indicates that the block is not to be scheduled, but there should
@@ -1233,7 +1425,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
             stmts = result_bbs[bb_rename_succ[to]].stmts
             idx = first(stmts)
             while idx <= last(stmts)
-                stmt = compact.result[idx][:inst]
+                stmt = compact.result[idx][:stmt]
                 stmt === nothing && continue
                 isa(stmt, PhiNode) || break
                 i = findfirst(x::Int32->x==bb_rename_pred[from], stmt.edges)
@@ -1246,8 +1438,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
         else
             stmts = compact.ir.cfg.blocks[to].stmts
             for stmt in CompactPeekIterator(compact, first(stmts), last(stmts))
-                stmt === nothing && continue
-                isa(stmt, PhiNode) || break
+                is_valid_phiblock_stmt(stmt) || break
+                isa(stmt, PhiNode) || continue
                 i = findfirst(x::Int32->x==from, stmt.edges)
                 if i !== nothing
                     deleteat!(stmt.edges, i)
@@ -1265,37 +1457,39 @@ struct Refined
 end
 
 function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instruction, idx::Int, processed_idx::Int, active_bb::Int, do_rename_ssa::Bool)
-    stmt = inst[:inst]
+    stmt = inst[:stmt]
     (; result, ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact
     (; cfg_transforms_enabled, fold_constant_branches, bb_rename_succ, bb_rename_pred, result_bbs) = compact.cfg_transform
     mark_refined! = Refiner(result.flag, result_idx)
+    already_inserted_phi_arg = already_inserted_ssa(compact, processed_idx)
     if stmt === nothing
         ssa_rename[idx] = stmt
     elseif isa(stmt, OldSSAValue)
         ssa_rename[idx] = ssa_rename[stmt.id]
     elseif isa(stmt, GotoNode) && cfg_transforms_enabled
+        stmt.label < 0 && (println(stmt); println(compact))
         label = bb_rename_succ[stmt.label]
         @assert label > 0
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = GotoNode(label)
+        result[result_idx][:stmt] = GotoNode(label)
         result_idx += 1
     elseif isa(stmt, GlobalRef)
-        total_flags = IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE
+        total_flags = IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
         flag = result[result_idx][:flag]
-        if (flag & total_flags) == total_flags
+        if has_flag(flag, total_flags)
             ssa_rename[idx] = stmt
         else
             ssa_rename[idx] = SSAValue(result_idx)
-            result[result_idx][:inst] = stmt
+            result[result_idx][:stmt] = stmt
             result_idx += 1
         end
     elseif isa(stmt, GotoNode)
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = stmt
+        result[result_idx][:stmt] = stmt
         result_idx += 1
     elseif isa(stmt, GotoIfNot) && cfg_transforms_enabled
         stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::GotoIfNot
-        result[result_idx][:inst] = stmt
+        result[result_idx][:stmt] = stmt
         cond = stmt.cond
         if fold_constant_branches
             if !isa(cond, Bool)
@@ -1307,14 +1501,14 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             end
             if cond
                 ssa_rename[idx] = nothing
-                result[result_idx][:inst] = nothing
+                result[result_idx][:stmt] = nothing
                 kill_edge!(compact, active_bb, active_bb, stmt.dest)
                 # Don't increment result_idx => Drop this statement
             else
                 label = bb_rename_succ[stmt.dest]
                 @assert label > 0
                 ssa_rename[idx] = SSAValue(result_idx)
-                result[result_idx][:inst] = GotoNode(label)
+                result[result_idx][:stmt] = GotoNode(label)
                 kill_edge!(compact, active_bb, active_bb, active_bb+1)
                 result_idx += 1
             end
@@ -1323,16 +1517,23 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             label = bb_rename_succ[stmt.dest]
             @assert label > 0
             ssa_rename[idx] = SSAValue(result_idx)
-            result[result_idx][:inst] = GotoIfNot(cond, label)
+            result[result_idx][:stmt] = GotoIfNot(cond, label)
             result_idx += 1
         end
+    elseif cfg_transforms_enabled && isa(stmt, EnterNode)
+        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::EnterNode
+        if stmt.catch_dest != 0
+            label = bb_rename_succ[stmt.catch_dest]
+            @assert label > 0
+            result[result_idx][:stmt] = EnterNode(stmt, label)
+        else
+            result[result_idx][:stmt] = stmt
+        end
+        ssa_rename[idx] = SSAValue(result_idx)
+        result_idx += 1
     elseif isa(stmt, Expr)
         stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::Expr
-        if cfg_transforms_enabled && isexpr(stmt, :enter)
-            label = bb_rename_succ[stmt.args[1]::Int]
-            @assert label > 0
-            stmt.args[1] = label
-        elseif isexpr(stmt, :throw_undef_if_not)
+        if isexpr(stmt, :throw_undef_if_not)
             cond = stmt.args[2]
             if isa(cond, Bool) && cond === true
                 # cond was folded to true - this statement
@@ -1340,9 +1541,29 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
                 ssa_rename[idx] = nothing
                 return result_idx
             end
+        elseif isexpr(stmt, :leave)
+            let i = 1
+                while i <= length(stmt.args)
+                    if stmt.args[i] === nothing
+                        deleteat!(stmt.args, i)
+                    else
+                        i += 1
+                    end
+                end
+            end
+            if isempty(stmt.args)
+                # This :leave is dead
+                ssa_rename[idx] = nothing
+                return result_idx
+            end
         end
-        ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = stmt
+        typ = inst[:type]
+        if isa(typ, Const) && is_inlineable_constant(typ.val)
+            ssa_rename[idx] = quoted(typ.val)
+        else
+            ssa_rename[idx] = SSAValue(result_idx)
+        end
+        result[result_idx][:stmt] = stmt
         result_idx += 1
     elseif isa(stmt, PiNode)
         # As an optimization, we eliminate any trivial pinodes. For performance, we use ===
@@ -1362,18 +1583,19 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
                 return result_idx
             end
         elseif !isa(pi_val, AnySSAValue) && !isa(pi_val, GlobalRef)
-            valtyp = isa(pi_val, QuoteNode) ? typeof(pi_val.value) : typeof(pi_val)
-            if valtyp === stmt.typ
+            pi_val′ = isa(pi_val, QuoteNode) ? pi_val.value : pi_val
+            stmttyp = stmt.typ
+            if isa(stmttyp, Const) ? pi_val′ === stmttyp.val : typeof(pi_val′) === stmttyp
                 ssa_rename[idx] = pi_val
                 return result_idx
             end
         end
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = stmt
+        result[result_idx][:stmt] = stmt
         result_idx += 1
-    elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode) || isa(stmt, GotoIfNot)
+    elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode) || isa(stmt, GotoIfNot) || isa(stmt, EnterNode)
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)
+        result[result_idx][:stmt] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)
         result_idx += 1
     elseif isa(stmt, PhiNode)
         # N.B.: For PhiNodes, this needs to be at the top, since PhiNodes
@@ -1382,7 +1604,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         if cfg_transforms_enabled
             # Rename phi node edges
             let bb_rename_pred=bb_rename_pred
-                map!(i::Int32->bb_rename_pred[i], stmt.edges, stmt.edges)
+                map!(i::Int32->i == 0 ? 0 : bb_rename_pred[i], stmt.edges, stmt.edges)
             end
 
             # Remove edges and values associated with dead blocks. Entries in
@@ -1416,18 +1638,16 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             values = stmt.values
         end
 
-        values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
-        # Don't remove the phi node if it is before the definition of its value
-        # because doing so can create forward references. This should only
-        # happen with dead loops, but can cause problems when optimization
-        # passes look at all code, dead or not. This check should be
-        # unnecessary when DCE can remove those dead loops entirely, so this is
-        # just to be safe.
-        before_def = isassigned(values, 1) && (v = values[1]; isa(v, OldSSAValue)) && idx < v.id
-        if length(edges) == 1 && isassigned(values, 1) && !before_def &&
-                length(cfg_transforms_enabled ?
-                    result_bbs[bb_rename_succ[active_bb]].preds :
-                    compact.ir.cfg.blocks[active_bb].preds) == 1
+        values = process_phinode_values(values, late_fixup, already_inserted_phi_arg, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
+
+        # Quick egality check for PhiNode that may be replaced with its incoming
+        # value without needing to set the `Refined` flag. We can't do the actual
+        # refinement check, because we do not have access to the lattice here.
+        # Users may call `reprocess_phi_node!` inside the compaction loop to
+        # revisit PhiNodes with the proper lattice refinement check.
+        if may_replace_phi(values, cfg_transforms_enabled ?
+                result_bbs[bb_rename_succ[active_bb]] :
+                compact.ir.cfg.blocks[active_bb], idx) && argextype(values[1], compact) === inst[:type]
             # There's only one predecessor left - just replace it
             v = values[1]
             @assert !isa(v, NewSSAValue)
@@ -1436,12 +1656,26 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             end
             ssa_rename[idx] = v
         else
-            result[result_idx][:inst] = PhiNode(edges, values)
+            result[result_idx][:stmt] = PhiNode(edges, values)
             result_idx += 1
         end
     elseif isa(stmt, PhiCNode)
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = PhiCNode(process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!))
+        values = stmt.values
+        if cfg_transforms_enabled
+            # Filter arguments that come from dead blocks
+            values = Any[]
+            for value in stmt.values
+                if isa(value, SSAValue)
+                    blk = block_for_inst(compact.ir.cfg, value.id)
+                    if bb_rename_pred[blk] < 0
+                        continue
+                    end
+                end
+                push!(values, value)
+            end
+        end
+        result[result_idx][:stmt] = PhiCNode(process_phinode_values(values, late_fixup, already_inserted_phi_arg, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!))
         result_idx += 1
     else
         if isa(stmt, SSAValue)
@@ -1454,7 +1688,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         else
             # Constant assign, replace uses of this ssa value with its result
         end
-        if (inst[:flag] & IR_FLAG_REFINED) != 0 && !isa(stmt, Refined)
+        if has_flag(inst, IR_FLAG_REFINED) && !isa(stmt, Refined)
             # If we're compacting away an instruction that was marked as refined,
             # leave a marker in the ssa_rename, so we can taint any users.
             stmt = Refined(stmt)
@@ -1464,6 +1698,38 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
     return result_idx
 end
 
+function may_replace_phi(values::Vector{Any}, phi_bb::BasicBlock, idx::Int)
+    length(values) == 1 || return false
+    isassigned(values, 1) || return false
+    length(phi_bb.preds) == 1 || return false
+
+    # Don't remove the phi node if it is before the definition of its value
+    # because doing so can create forward references. This should only
+    # happen with dead loops, but can cause problems when optimization
+    # passes look at all code, dead or not. This check should be
+    # unnecessary when DCE can remove those dead loops entirely, so this is
+    # just to be safe.
+    v = values[1]
+    before_def = isa(v, OldSSAValue) && idx < v.id
+    return !before_def
+end
+
+function reprocess_phi_node!(𝕃ₒ::AbstractLattice, compact::IncrementalCompact, phi::PhiNode, old_idx::Int)
+    phi_bb = compact.active_result_bb
+    did_just_finish_bb(compact) && (phi_bb -= 1)
+    may_replace_phi(phi.values, compact.cfg_transform.result_bbs[phi_bb], compact.idx) || return false
+
+    # There's only one predecessor left - just replace it
+    v = phi.values[1]
+    if !⊑(𝕃ₒ, compact[compact.ssa_rename[old_idx]][:type], argextype(v, compact))
+        v = Refined(v)
+    end
+    compact.ssa_rename[old_idx] = v
+
+    delete_inst_here!(compact)
+    return true
+end
+
 function resize!(compact::IncrementalCompact, nnewnodes::Int)
     old_length = length(compact.result)
     resize!(compact.result, nnewnodes)
@@ -1474,6 +1740,8 @@ function resize!(compact::IncrementalCompact, nnewnodes::Int)
     return compact
 end
 
+const NoLineUpdate = (Int32(0), Int32(0), Int32(0))
+
 function finish_current_bb!(compact::IncrementalCompact, active_bb::Int,
                             old_result_idx::Int=compact.result_idx, unreachable::Bool=false)
     (;result_bbs, cfg_transforms_enabled, bb_rename_succ) = compact.cfg_transform
@@ -1490,9 +1758,9 @@ function finish_current_bb!(compact::IncrementalCompact, active_bb::Int,
             length(compact.result) < old_result_idx && resize!(compact, old_result_idx)
             node = compact.result[old_result_idx]
             if unreachable
-                node[:inst], node[:type], node[:line] = ReturnNode(), Union{}, 0
+                node[:stmt], node[:type], node[:line] = ReturnNode(), Union{}, NoLineUpdate
             else
-                node[:inst], node[:type], node[:line] = nothing, Nothing, 0
+                node[:stmt], node[:type], node[:line], node[:flag] = nothing, Nothing, NoLineUpdate, IR_FLAGS_EFFECTS
             end
             compact.result_idx = old_result_idx + 1
         elseif cfg_transforms_enabled && compact.result_idx - 1 == first(bb.stmts)
@@ -1511,10 +1779,20 @@ function finish_current_bb!(compact::IncrementalCompact, active_bb::Int,
     return skipped
 end
 
-function attach_after_stmt_after(compact::IncrementalCompact, idx::Int)
-    compact.new_nodes_idx > length(compact.perm) && return false
-    entry = compact.ir.new_nodes.info[compact.perm[compact.new_nodes_idx]]
-    return entry.pos == idx && entry.attach_after
+"""
+    stmts_awaiting_insertion(compact::IncrementalCompact, idx::Int)
+
+Returns true if there are new/pending instructions enqueued for insertion into
+`compact` on any instruction in the range `1:idx`. Otherwise, returns false.
+"""
+function stmts_awaiting_insertion(compact::IncrementalCompact, idx::Int)
+
+    new_node_waiting = compact.new_nodes_idx <= length(compact.perm) &&
+        compact.ir.new_nodes.info[compact.perm[compact.new_nodes_idx]].pos <= idx
+    pending_node_waiting = !isempty(compact.pending_perm) &&
+        compact.pending_nodes.info[compact.pending_perm[1]].pos <= idx
+
+    return new_node_waiting || pending_node_waiting
 end
 
 function process_newnode!(compact::IncrementalCompact, new_idx::Int, new_node_entry::Instruction, new_node_info::NewNodeInfo, idx::Int, active_bb::Int, do_rename_ssa::Bool)
@@ -1526,7 +1804,7 @@ function process_newnode!(compact::IncrementalCompact, new_idx::Int, new_node_en
     compact.result_idx = result_idx
     # If this instruction has reverse affinity and we were at the end of a basic block,
     # finish it now.
-    if new_node_info.attach_after && idx == last(bb.stmts)+1 && !attach_after_stmt_after(compact, idx-1)
+    if new_node_info.attach_after && idx == last(bb.stmts)+1 && !stmts_awaiting_insertion(compact, idx-1)
         active_bb += 1
         finish_current_bb!(compact, active_bb, old_result_idx)
     end
@@ -1537,13 +1815,27 @@ struct CompactPeekIterator
     compact::IncrementalCompact
     start_idx::Int
     end_idx::Int
+    include_stmts_before_start::Bool
 end
+CompactPeekIterator(compact::IncrementalCompact, start_idx::Int, end_idx::Int) =
+    CompactPeekIterator(compact, start_idx, end_idx, false)
+
 
 function CompactPeekIterator(compact::IncrementalCompact, start_idx::Int)
     return CompactPeekIterator(compact, start_idx, 0)
 end
 
-entry_at_idx(entry::NewNodeInfo, idx::Int) = entry.attach_after ? entry.pos == idx - 1 : entry.pos == idx
+function entry_at_idx(entry::NewNodeInfo, idx::Int, start_idx::Int, include_stmts_before_start::Bool)
+    if entry.attach_after
+        if !include_stmts_before_start
+            entry.pos >= start_idx || return false
+        end
+        return entry.pos == idx - 1
+    else
+        return entry.pos == idx
+    end
+end
+
 function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.start_idx, it.compact.new_nodes_idx, 1))
     if it.end_idx > 0 && idx > it.end_idx
         return nothing
@@ -1555,22 +1847,22 @@ function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.
     if compact.new_nodes_idx <= length(compact.perm)
         new_nodes = compact.ir.new_nodes
         for eidx in aidx:length(compact.perm)
-            if entry_at_idx(new_nodes.info[compact.perm[eidx]], idx)
+            if entry_at_idx(new_nodes.info[compact.perm[eidx]], idx, it.start_idx, it.include_stmts_before_start)
                 entry = new_nodes.stmts[compact.perm[eidx]]
-                return (entry[:inst], (idx, eidx+1, bidx))
+                return (entry[:stmt], (idx, eidx+1, bidx))
             end
         end
     end
     if !isempty(compact.pending_perm)
         for eidx in bidx:length(compact.pending_perm)
-            if entry_at_idx(compact.pending_nodes.info[compact.pending_perm[eidx]], idx)
+            if entry_at_idx(compact.pending_nodes.info[compact.pending_perm[eidx]], idx, it.start_idx, it.include_stmts_before_start)
                 entry = compact.pending_nodes.stmts[compact.pending_perm[eidx]]
-                return (entry[:inst], (idx, aidx, eidx+1))
+                return (entry[:stmt], (idx, aidx, eidx+1))
             end
         end
     end
     idx > length(compact.ir.stmts) && return nothing
-    return (compact.ir.stmts[idx][:inst], (idx + 1, aidx, bidx))
+    return (compact.ir.stmts[idx][:stmt], (idx + 1, aidx, bidx))
 end
 
 # the returned Union{Nothing, Pair{Pair{Int,Int},Any}} cannot be stack allocated,
@@ -1579,7 +1871,7 @@ end
     idxs = iterate_compact(compact)
     idxs === nothing && return nothing
     old_result_idx = idxs[2]
-    return Pair{Pair{Int,Int},Any}(idxs, compact.result[old_result_idx][:inst]), nothing
+    return Pair{Pair{Int,Int},Any}(idxs, compact.result[old_result_idx][:stmt]), nothing
 end
 
 function iterate_compact(compact::IncrementalCompact)
@@ -1656,7 +1948,7 @@ function iterate_compact(compact::IncrementalCompact)
     compact.result[old_result_idx] = compact.ir.stmts[idx]
     result_idx = process_node!(compact, old_result_idx, compact.ir.stmts[idx], idx, idx, active_bb, true)
     compact.result_idx = result_idx
-    if idx == last(bb.stmts) && !attach_after_stmt_after(compact, idx)
+    if idx == last(bb.stmts) && !stmts_awaiting_insertion(compact, idx)
         finish_current_bb!(compact, active_bb, old_result_idx)
         active_bb += 1
     end
@@ -1666,7 +1958,7 @@ function iterate_compact(compact::IncrementalCompact)
         idx += 1
         @goto restart
     end
-    @assert isassigned(compact.result.inst, old_result_idx)
+    @assert isassigned(compact.result.stmt, old_result_idx)
     return Pair{Int,Int}(compact.idx-1, old_result_idx)
 end
 
@@ -1676,11 +1968,13 @@ function maybe_erase_unused!(callback::Function, compact::IncrementalCompact, id
     in_worklist::Bool, extra_worklist::Vector{Int})
     nresult = length(compact.result)
     inst = idx ≤ nresult ? compact.result[idx] : compact.new_new_nodes.stmts[idx-nresult]
-    stmt = inst[:inst]
+    stmt = inst[:stmt]
     stmt === nothing && return false
     inst[:type] === Bottom && return false
-    effect_free = (inst[:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-    effect_free || return false
+    if !has_flag(inst, IR_FLAGS_REMOVABLE)
+        add_flag!(inst, IR_FLAG_UNUSED)
+        return false
+    end
     foreachssa(stmt) do val::SSAValue
         if compact.used_ssas[val.id] == 1
             if val.id < idx || in_worklist
@@ -1690,7 +1984,7 @@ function maybe_erase_unused!(callback::Function, compact::IncrementalCompact, id
         compact.used_ssas[val.id] -= 1
         callback(val)
     end
-    inst[:inst] = nothing
+    inst[:stmt] = nothing
     return true
 end
 
@@ -1728,14 +2022,22 @@ function fixup_node(compact::IncrementalCompact, @nospecialize(stmt), reify_new_
             return FixedNode(stmt, true)
         end
     elseif isa(stmt, OldSSAValue)
-        val = compact.ssa_rename[stmt.id]
-        if isa(val, Refined)
-            val = val.val
+        node = compact.ssa_rename[stmt.id]
+        if isa(node, Refined)
+            node = node.val
+        end
+        needs_fixup = false
+        if isa(node, NewSSAValue)
+            (;node, needs_fixup) = fixup_node(compact, node, reify_new_nodes)
         end
-        if isa(val, SSAValue)
-            compact.used_ssas[val.id] += 1
+        if isa(node, SSAValue)
+            compact.used_ssas[node.id] += 1
+        elseif isa(node, NewSSAValue)
+            compact.new_new_used_ssas[-node.id] += 1
+        elseif isa(node, OldSSAValue)
+            return fixup_node(compact, node, reify_new_nodes)
         end
-        return FixedNode(val, false)
+        return FixedNode(node, needs_fixup)
     else
         urs = userefs(stmt)
         fixup = false
@@ -1761,9 +2063,9 @@ function just_fixup!(compact::IncrementalCompact, new_new_nodes_offset::Union{In
     set_off = off
     for i in off:length(compact.late_fixup)
         idx = compact.late_fixup[i]
-        stmt = compact.result[idx][:inst]
+        stmt = compact.result[idx][:stmt]
         (;node, needs_fixup) = fixup_node(compact, stmt, late_fixup_offset === nothing)
-        (stmt === node) || (compact.result[idx][:inst] = node)
+        (stmt === node) || (compact.result[idx][:stmt] = node)
         if needs_fixup
             compact.late_fixup[set_off] = idx
             set_off += 1
@@ -1775,10 +2077,10 @@ function just_fixup!(compact::IncrementalCompact, new_new_nodes_offset::Union{In
     off = new_new_nodes_offset === nothing ? 1 : (new_new_nodes_offset+1)
     for idx in off:length(compact.new_new_nodes)
         new_node = compact.new_new_nodes.stmts[idx]
-        stmt = new_node[:inst]
+        stmt = new_node[:stmt]
         (;node) = fixup_node(compact, stmt, late_fixup_offset === nothing)
         if node !== stmt
-            new_node[:inst] = node
+            new_node[:stmt] = node
         end
     end
 end
@@ -1803,9 +2105,11 @@ function non_dce_finish!(compact::IncrementalCompact)
     result_idx = compact.result_idx
     resize!(compact.result, result_idx - 1)
     just_fixup!(compact)
-    bb = compact.cfg_transform.result_bbs[end]
-    compact.cfg_transform.result_bbs[end] = BasicBlock(bb,
-                StmtRange(first(bb.stmts), result_idx-1))
+    if !did_just_finish_bb(compact)
+        # Finish the bb now
+        finish_current_bb!(compact, 0)
+    end
+    result_bbs = resize!(compact.cfg_transform.result_bbs, compact.active_result_bb-1)
     compact.renamed_new_nodes = true
     nothing
 end
@@ -1817,7 +2121,7 @@ function finish(compact::IncrementalCompact)
 end
 
 function complete(compact::IncrementalCompact)
-    result_bbs = resize!(compact.cfg_transform.result_bbs, compact.active_result_bb-1)
+    result_bbs = compact.cfg_transform.result_bbs
     cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
     if should_check_ssa_counts()
         oracle_check(compact)
@@ -1826,7 +2130,7 @@ function complete(compact::IncrementalCompact)
     # trim trailing undefined statements due to copy propagation
     nundef = 0
     for i in length(compact.result):-1:1
-        if isassigned(compact.result.inst, i)
+        if isassigned(compact.result.stmt, i)
             break
         end
         nundef += 1
diff --git a/Compiler/src/ssair/irinterp.jl b/Compiler/src/ssair/irinterp.jl
new file mode 100644
index 0000000000000..7b67a6a3c2880
--- /dev/null
+++ b/Compiler/src/ssair/irinterp.jl
@@ -0,0 +1,468 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function collect_limitations!(@nospecialize(typ), ::IRInterpretationState)
+    @assert !isa(typ, LimitedAccuracy) "irinterp is unable to handle heavy recursion correctly"
+    return typ
+end
+
+function concrete_eval_invoke(interp::AbstractInterpreter, ci::CodeInstance, argtypes::Vector{Any}, parent::IRInterpretationState)
+    world = get_inference_world(interp)
+    effects = decode_effects(ci.ipo_purity_bits)
+    if (is_foldable(effects) && is_all_const_arg(argtypes, #=start=#1) &&
+        (is_nonoverlayed(interp) || is_nonoverlayed(effects)))
+        args = collect_const_args(argtypes, #=start=#1)
+        value = try
+            Core._call_in_world_total(world, args...)
+        catch
+            return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, is_noub(effects)))
+        end
+        return Pair{Any,Tuple{Bool,Bool}}(Const(value), (true, true))
+    else
+        mi = get_ci_mi(ci)
+        if is_constprop_edge_recursed(mi, parent)
+            return Pair{Any,Tuple{Bool,Bool}}(nothing, (is_nothrow(effects), is_noub(effects)))
+        end
+        src = ci_get_source(interp, ci)
+        newirsv = IRInterpretationState(interp, ci, mi, argtypes, src)
+        if newirsv !== nothing
+            assign_parentchild!(newirsv, parent)
+            return ir_abstract_constant_propagation(interp, newirsv)
+        end
+        return Pair{Any,Tuple{Bool,Bool}}(nothing, (is_nothrow(effects), is_noub(effects)))
+    end
+end
+
+function abstract_eval_invoke_inst(interp::AbstractInterpreter, inst::Instruction, irsv::IRInterpretationState)
+    stmt = inst[:stmt]::Expr
+    ci = stmt.args[1]
+    if ci isa MethodInstance
+        mi_cache = code_cache(interp)
+        code = get(mi_cache, ci, nothing)
+        code === nothing && return Pair{Any,Tuple{Bool,Bool}}(nothing, (false, false))
+        code isa InferenceResult && (code = code.ci) # COMBAK: we shouldn't discard the src so easily here, as we might not be able to get it back again
+    else
+        code = ci::CodeInstance
+    end
+    argtypes = collect_argtypes(interp, stmt.args[2:end], StatementState(nothing, false), irsv)
+    argtypes === nothing && return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, false))
+    return concrete_eval_invoke(interp, code, argtypes, irsv)
+end
+
+abstract_eval_ssavalue(s::SSAValue, sv::IRInterpretationState) = abstract_eval_ssavalue(s, sv.ir)
+
+function abstract_eval_phi_stmt(interp::AbstractInterpreter, phi::PhiNode, ::Int, irsv::IRInterpretationState)
+    return abstract_eval_phi(interp, phi, StatementState(nothing, false), irsv)
+end
+
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sstate::StatementState, irsv::IRInterpretationState)
+    si = StmtInfo(true, sstate.saw_latestworld) # TODO better job here?
+    call = abstract_call(interp, arginfo, si, irsv)::Future
+    Future{Any}(call, interp, irsv) do call, interp, irsv
+        irsv.ir.stmts[irsv.curridx][:info] = call.info
+        nothing
+    end
+    return call
+end
+
+function kill_block!(ir::IRCode, bb::Int)
+    # Kill the entire block
+    stmts = ir.cfg.blocks[bb].stmts
+    for bidx = stmts
+        inst = ir[SSAValue(bidx)]
+        inst[:stmt] = nothing
+        inst[:type] = Bottom
+        inst[:flag] = IR_FLAGS_REMOVABLE
+    end
+    ir[SSAValue(last(stmts))][:stmt] = ReturnNode()
+    return
+end
+kill_block!(ir::IRCode) = (bb::Int)->kill_block!(ir, bb)
+
+function update_phi!(irsv::IRInterpretationState, from::Int, to::Int)
+    ir = irsv.ir
+    if length(ir.cfg.blocks[to].preds) == 0
+        kill_block!(ir, to)
+    end
+    for sidx = ir.cfg.blocks[to].stmts
+        stmt = ir[SSAValue(sidx)][:stmt]
+        isa(stmt, Nothing) && continue # allowed between `PhiNode`s
+        isa(stmt, PhiNode) || break
+        for (eidx, edge) in enumerate(stmt.edges)
+            if edge == from
+                deleteat!(stmt.edges, eidx)
+                deleteat!(stmt.values, eidx)
+                push!(irsv.ssa_refined, sidx)
+                break
+            end
+        end
+    end
+end
+update_phi!(irsv::IRInterpretationState) = (from::Int, to::Int)->update_phi!(irsv, from, to)
+
+function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb::Int=block_for_inst(irsv.ir, term_idx))
+    ir = irsv.ir
+    stmt = ir[SSAValue(term_idx)][:stmt]
+    if isa(stmt, GotoIfNot)
+        kill_edge!(irsv, bb, stmt.dest)
+        kill_edge!(irsv, bb, bb+1)
+    elseif isa(stmt, GotoNode)
+        kill_edge!(irsv, bb, stmt.label)
+    elseif isa(stmt, ReturnNode)
+        # Nothing to do
+    else
+        @assert !isa(stmt, EnterNode)
+        kill_edge!(irsv, bb, bb+1)
+    end
+end
+
+function kill_edge!(irsv::IRInterpretationState, from::Int, to::Int)
+    kill_edge!(get!(irsv.lazyreachability), irsv.ir.cfg, from, to,
+               update_phi!(irsv), kill_block!(irsv.ir))
+end
+
+function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction, idx::Int,
+                                bb::Union{Int,Nothing}, irsv::IRInterpretationState)
+    ir = irsv.ir
+    stmt = inst[:stmt]
+    if isa(stmt, GotoIfNot)
+        cond = stmt.cond
+        condval = maybe_extract_const_bool(argextype(cond, ir))
+        if condval isa Bool
+            if isa(cond, SSAValue)
+                kill_def_use!(irsv.tpdum, cond, idx)
+            end
+            if bb === nothing
+                bb = block_for_inst(ir, idx)
+            end
+            add_flag!(inst, IR_FLAG_NOTHROW)
+            if condval
+                inst[:stmt] = nothing
+                kill_edge!(irsv, bb, stmt.dest)
+            else
+                inst[:stmt] = GotoNode(stmt.dest)
+                kill_edge!(irsv, bb, bb+1)
+            end
+            return true
+        end
+        return false
+    end
+    rt = nothing
+    if isa(stmt, Expr)
+        head = stmt.head
+        if (head === :call || head === :foreigncall || head === :new || head === :splatnew ||
+            head === :static_parameter || head === :isdefined || head === :boundscheck)
+            @assert isempty(irsv.tasks) # TODO: this whole function needs to be converted to a stackless design to be a valid AbsIntState, but this should work here for now
+            result = abstract_eval_statement_expr(interp, stmt, StatementState(nothing, false), irsv)
+            reverse!(irsv.tasks)
+            while true
+                if length(irsv.callstack) > irsv.frameid
+                    typeinf(interp, irsv.callstack[irsv.frameid + 1])
+                elseif !doworkloop(interp, irsv)
+                    break
+                end
+            end
+            @assert length(irsv.callstack) == irsv.frameid && isempty(irsv.tasks)
+            result isa Future && (result = result[])
+            (; rt, effects) = result
+            add_flag!(inst, flags_for_effects(effects))
+        elseif head === :invoke  # COMBAK: || head === :invoke_modifyfield (similar to call, but for args[2:end])
+            rt, (nothrow, noub) = abstract_eval_invoke_inst(interp, inst, irsv)
+            if nothrow
+                add_flag!(inst, IR_FLAG_NOTHROW)
+            end
+            if noub
+                add_flag!(inst, IR_FLAG_NOUB)
+            end
+        elseif head === :throw_undef_if_not
+            condval = maybe_extract_const_bool(argextype(stmt.args[2], ir))
+            condval isa Bool || return false
+            if condval
+                inst[:stmt] = nothing
+                # We simplified the IR, but we did not update the type
+                return false
+            end
+            rt = Union{}
+        elseif head === :gc_preserve_begin ||
+               head === :gc_preserve_end
+            return false
+        elseif head === :leave
+            return false
+        else
+            Core.println(stmt)
+            error("reprocess_instruction!: unhandled expression found")
+        end
+    elseif isa(stmt, PhiNode)
+        rt = abstract_eval_phi_stmt(interp, stmt, idx, irsv)
+    elseif isa(stmt, UpsilonNode)
+        rt = argextype(stmt.val, irsv.ir)
+    elseif isa(stmt, PhiCNode)
+        # Currently not modeled
+        return false
+    elseif isa(stmt, EnterNode)
+        # TODO: Propagate scope type changes
+        return false
+    elseif isa(stmt, ReturnNode)
+        # Handled at the very end
+        return false
+    elseif isa(stmt, PiNode)
+        rt = tmeet(typeinf_lattice(interp), argextype(stmt.val, ir), widenconst(stmt.typ))
+    elseif stmt === nothing
+        return false
+    elseif isa(stmt, GlobalRef)
+        # GlobalRef is not refinable
+    else
+        rt = argextype(stmt, irsv.ir)
+    end
+    @assert !(rt isa LimitedAccuracy)
+    if rt !== nothing
+        if has_flag(inst, IR_FLAG_UNUSED)
+            # Don't bother checking the type if we know it's unused
+            if has_flag(inst, IR_FLAGS_REMOVABLE)
+                inst[:stmt] = nothing
+            end
+            return false
+        end
+        if isa(rt, Const)
+            inst[:type] = rt
+            if is_inlineable_constant(rt.val) && has_flag(inst, IR_FLAGS_REMOVABLE)
+                inst[:stmt] = quoted(rt.val)
+            end
+            return true
+        elseif !⊑(typeinf_lattice(interp), inst[:type], rt)
+            inst[:type] = rt
+            return true
+        end
+    end
+    return false
+end
+
+# Process the terminator and add the successor to `bb_ip`. Returns whether a backedge was seen.
+function process_terminator!(@nospecialize(stmt), bb::Int, bb_ip::BitSetBoundedMinPrioritySet)
+    if isa(stmt, ReturnNode)
+        return false
+    elseif isa(stmt, GotoNode)
+        backedge = stmt.label <= bb
+        backedge || push!(bb_ip, stmt.label)
+        return backedge
+    elseif isa(stmt, GotoIfNot)
+        backedge = stmt.dest <= bb
+        backedge || push!(bb_ip, stmt.dest)
+        push!(bb_ip, bb+1)
+        return backedge
+    elseif isa(stmt, EnterNode)
+        dest = stmt.catch_dest
+        if dest ≠ 0
+            @assert dest > bb
+            push!(bb_ip, dest)
+        end
+        push!(bb_ip, bb+1)
+        return false
+    else
+        push!(bb_ip, bb+1)
+        return false
+    end
+end
+
+struct BBScanner
+    ir::IRCode
+    bb_ip::BitSetBoundedMinPrioritySet
+end
+
+function BBScanner(ir::IRCode)
+    bbs = ir.cfg.blocks
+    bb_ip = BitSetBoundedMinPrioritySet(length(bbs))
+    push!(bb_ip, 1)
+    return BBScanner(ir, bb_ip)
+end
+
+function scan!(callback, scanner::BBScanner, forwards_only::Bool)
+    (; bb_ip, ir) = scanner
+    bbs = ir.cfg.blocks
+    while !isempty(bb_ip)
+        bb = popfirst!(bb_ip)
+        stmts = bbs[bb].stmts
+        lstmt = last(stmts)
+        for idx = stmts
+            inst = ir[SSAValue(idx)]
+            ret = callback(inst, lstmt, bb)
+            ret === nothing && return true
+            ret::Bool || break
+            idx == lstmt && process_terminator!(inst[:stmt], bb, bb_ip) && forwards_only && return false
+        end
+    end
+    return true
+end
+
+function populate_def_use_map!(tpdum::TwoPhaseDefUseMap, scanner::BBScanner)
+    scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
+        for ur in userefs(inst[:stmt])
+            val = ur[]
+            if isa(val, SSAValue)
+                push!(tpdum[val.id], inst.idx)
+            end
+        end
+        return true
+    end
+end
+populate_def_use_map!(tpdum::TwoPhaseDefUseMap, ir::IRCode) =
+    populate_def_use_map!(tpdum, BBScanner(ir))
+
+function is_all_const_call(@nospecialize(stmt), interp::AbstractInterpreter, irsv::IRInterpretationState)
+    isexpr(stmt, :call) || return false
+    @inbounds for i = 2:length(stmt.args)
+        argtype = abstract_eval_value(interp, stmt.args[i], StatementState(nothing, false), irsv)
+        is_const_argtype(argtype) || return false
+    end
+    return true
+end
+
+function ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
+        externally_refined::Union{Nothing,BitSet} = nothing)
+    (; ir, tpdum, ssa_refined) = irsv
+
+    @assert isempty(ir.new_nodes) "IRCode should be compacted before irinterp"
+
+    all_rets = Int[]
+    scanner = BBScanner(ir)
+
+    check_ret!(@nospecialize(stmt), idx::Int) = isa(stmt, ReturnNode) && isdefined(stmt, :val) && push!(all_rets, idx)
+
+    # Fast path: Scan both use counts and refinement in one single pass of
+    #            of the instructions. In the absence of backedges, this will
+    #            converge.
+    completed_scan = scan!(scanner, true) do inst::Instruction, lstmt::Int, bb::Int
+        idx = inst.idx
+        irsv.curridx = idx
+        stmt = inst[:stmt]
+        typ = inst[:type]
+        flag = inst[:flag]
+        any_refined = false
+        if has_flag(flag, IR_FLAG_REFINED)
+            any_refined = true
+            sub_flag!(inst, IR_FLAG_REFINED)
+        elseif is_all_const_call(stmt, interp, irsv)
+            # force reinference on calls with all constant arguments
+            any_refined = true
+        end
+        for ur in userefs(stmt)
+            val = ur[]
+            if isa(val, Argument)
+                any_refined |= irsv.argtypes_refined[val.n]
+            elseif isa(val, SSAValue)
+                any_refined |= val.id in ssa_refined
+                count!(tpdum, val)
+            end
+        end
+        if isa(stmt, PhiNode) && idx in ssa_refined
+            any_refined = true
+            delete!(ssa_refined, idx)
+        end
+        check_ret!(stmt, idx)
+        is_terminator_or_phi = (isa(stmt, PhiNode) || stmt === nothing || isterminator(stmt))
+        if typ === Bottom && !(idx == lstmt && is_terminator_or_phi)
+            return true
+        end
+        if (any_refined && reprocess_instruction!(interp, inst, idx, bb, irsv)) ||
+            (externally_refined !== nothing && idx in externally_refined)
+            push!(ssa_refined, idx)
+            stmt = inst[:stmt]
+            typ = inst[:type]
+        end
+        if typ === Bottom && !is_terminator_or_phi
+            kill_terminator_edges!(irsv, lstmt, bb)
+            if idx != lstmt
+                for idx2 in (idx+1:lstmt-1)
+                    ir[SSAValue(idx2)] = nothing
+                end
+                ir[SSAValue(lstmt)][:stmt] = ReturnNode()
+            end
+            return false
+        end
+        return true
+    end
+
+    if !completed_scan
+        # Slow path
+        stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
+
+        # Slow Path Phase 1.A: Complete use scanning
+        scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
+            idx = inst.idx
+            irsv.curridx = idx
+            stmt = inst[:stmt]
+            flag = inst[:flag]
+            if has_flag(flag, IR_FLAG_REFINED)
+                sub_flag!(inst, IR_FLAG_REFINED)
+                push!(stmt_ip, idx)
+            end
+            check_ret!(stmt, idx)
+            for ur in userefs(stmt)
+                val = ur[]
+                if isa(val, Argument)
+                    if irsv.argtypes_refined[val.n]
+                        push!(stmt_ip, idx)
+                    end
+                elseif isa(val, SSAValue)
+                    count!(tpdum, val)
+                end
+            end
+            return true
+        end
+
+        # Slow Path Phase 1.B: Assemble def-use map
+        complete!(tpdum); push!(scanner.bb_ip, 1)
+        populate_def_use_map!(tpdum, scanner)
+
+        # Slow Path Phase 2: Use def-use map to converge cycles.
+        # TODO: It would be possible to return to the fast path after converging
+        #       each cycle, but that's somewhat complicated.
+        for val in ssa_refined
+            for use in tpdum[val]
+                if !(use in ssa_refined)
+                    push!(stmt_ip, use)
+                end
+            end
+        end
+        while !isempty(stmt_ip)
+            idx = popfirst!(stmt_ip)
+            irsv.curridx = idx
+            inst = ir[SSAValue(idx)]
+            if reprocess_instruction!(interp, inst, idx, nothing, irsv)
+                append!(stmt_ip, tpdum[idx])
+            end
+        end
+    end
+
+    ultimate_rt = Bottom
+    for idx in all_rets
+        bb = block_for_inst(ir.cfg, idx)
+        if bb != 1 && length(ir.cfg.blocks[bb].preds) == 0
+            # Could have discovered this block is dead after the initial scan
+            continue
+        end
+        inst = ir[SSAValue(idx)][:stmt]::ReturnNode
+        rt = argextype(inst.val, ir)
+        ultimate_rt = tmerge(typeinf_lattice(interp), ultimate_rt, rt)
+    end
+
+    nothrow = noub = true
+    for idx = 1:length(ir.stmts)
+        if ir[SSAValue(idx)][:stmt] === nothing
+            # skip `nothing` statement, which might be inserted as a dummy node,
+            # e.g. by `finish_current_bb!` without explicitly marking it as `:nothrow`
+            continue
+        end
+        flag = ir[SSAValue(idx)][:flag]
+        nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
+        noub &= has_flag(flag, IR_FLAG_NOUB)
+        (nothrow | noub) || break
+    end
+
+    if irsv.frameid != 0
+        callstack = irsv.callstack::Vector{AbsIntState}
+        @assert callstack[end] === irsv && length(callstack) == irsv.frameid
+        pop!(callstack)
+    end
+
+    return Pair{Any,Tuple{Bool,Bool}}(maybe_singleton_const(ultimate_rt), (nothrow, noub))
+end
diff --git a/base/compiler/ssair/legacy.jl b/Compiler/src/ssair/legacy.jl
similarity index 68%
rename from base/compiler/ssair/legacy.jl
rename to Compiler/src/ssair/legacy.jl
index e2c924d60cb83..f7cb7953f88f7 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/Compiler/src/ssair/legacy.jl
@@ -1,16 +1,22 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-    inflate_ir!(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
+    inflate_ir!(ci::CodeInfo, mi::MethodInstance) -> ir::IRCode
     inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
 
 Inflates `ci::CodeInfo`-IR to `ir::IRCode`-format.
 This should be used with caution as it is a in-place transformation where the fields of
 the original `ci::CodeInfo` are modified.
 """
-function inflate_ir!(ci::CodeInfo, linfo::MethodInstance)
-    sptypes = sptypes_from_meth_instance(linfo)
-    argtypes, _ = matching_cache_argtypes(fallback_lattice, linfo)
+function inflate_ir!(ci::CodeInfo, mi::MethodInstance)
+    sptypes = sptypes_from_meth_instance(mi)
+    if ci.slottypes === nothing
+        argtypes = va_process_argtypes(fallback_lattice,
+            matching_cache_argtypes(fallback_lattice, mi),
+            ci.nargs, ci.isva, mi)
+    else
+        argtypes = ci.slottypes[1:ci.nargs]
+    end
     return inflate_ir!(ci, sptypes, argtypes)
 end
 function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any})
@@ -25,9 +31,8 @@ function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{A
             code[i] = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
         elseif isa(stmt, PhiNode)
             code[i] = PhiNode(Int32[block_for_inst(cfg, Int(edge)) for edge in stmt.edges], stmt.values)
-        elseif isexpr(stmt, :enter)
-            stmt.args[1] = block_for_inst(cfg, stmt.args[1]::Int)
-            code[i] = stmt
+        elseif isa(stmt, EnterNode)
+            code[i] = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : block_for_inst(cfg, stmt.catch_dest))
         end
     end
     nstmts = length(code)
@@ -36,24 +41,23 @@ function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{A
         ssavaluetypes = Any[ Any for i = 1:ssavaluetypes::Int ]
     end
     info = CallInfo[NoCallInfo() for i = 1:nstmts]
-    stmts = InstructionStream(code, ssavaluetypes, info, ci.codelocs, ci.ssaflags)
-    linetable = ci.linetable
-    if !isa(linetable, Vector{LineInfoNode})
-        linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
-    end
+    di = DebugInfoStream(nothing, ci.debuginfo, nstmts)
+    stmts = InstructionStream(code, ssavaluetypes, info, di.codelocs, ci.ssaflags)
     meta = Expr[]
-    return IRCode(stmts, cfg, linetable, argtypes, meta, sptypes)
+    return IRCode(stmts, cfg, di, argtypes, meta, sptypes, world_range(ci))
 end
 
 """
-    inflate_ir(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
-    inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
     inflate_ir(ci::CodeInfo) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, mi::MethodInstance) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, argtypes::Vector{Any}) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
 
 Non-destructive version of `inflate_ir!`.
 Mainly used for testing or interactive use.
 """
-inflate_ir(ci::CodeInfo, linfo::MethodInstance) = inflate_ir!(copy(ci), linfo)
+inflate_ir(ci::CodeInfo, mi::MethodInstance) = inflate_ir!(copy(ci), mi)
+inflate_ir(ci::CodeInfo, argtypes::Vector{Any}) = inflate_ir(ci, VarState[], argtypes)
 inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) = inflate_ir!(copy(ci), sptypes, argtypes)
 function inflate_ir(ci::CodeInfo)
     parent = ci.parent
@@ -72,17 +76,19 @@ function replace_code_newstyle!(ci::CodeInfo, ir::IRCode)
     resize!(ci.slotflags, nargs)
     resize!(ci.slottypes, nargs)
     stmts = ir.stmts
-    code = ci.code = stmts.inst
+    code = ci.code = stmts.stmt
     ssavaluetypes = ci.ssavaluetypes = stmts.type
-    codelocs = ci.codelocs = stmts.line
+    codelocs = stmts.line
     ssaflags = ci.ssaflags = stmts.flag
-    linetable = ci.linetable = ir.linetable
+    debuginfo = ir.debuginfo
     for metanode in ir.meta
         push!(code, metanode)
-        push!(codelocs, 1)
+        push!(codelocs, 1, 0, 0)
         push!(ssavaluetypes, Any)
         push!(ssaflags, IR_FLAG_NULL)
     end
+    @assert debuginfo.codelocs === stmts.line "line table not from debuginfo"
+    ci.debuginfo = DebugInfo(debuginfo, length(code))
     # Translate BB Edges to statement edges
     # (and undo normalization for now)
     for i = 1:length(code)
@@ -93,9 +99,8 @@ function replace_code_newstyle!(ci::CodeInfo, ir::IRCode)
             code[i] = GotoIfNot(stmt.cond, first(ir.cfg.blocks[stmt.dest].stmts))
         elseif isa(stmt, PhiNode)
             code[i] = PhiNode(Int32[edge == 0 ? 0 : last(ir.cfg.blocks[edge].stmts) for edge in stmt.edges], stmt.values)
-        elseif isexpr(stmt, :enter)
-            stmt.args[1] = first(ir.cfg.blocks[stmt.args[1]::Int].stmts)
-            code[i] = stmt
+        elseif isa(stmt, EnterNode)
+            code[i] = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : first(ir.cfg.blocks[stmt.catch_dest].stmts))
         end
     end
 end
diff --git a/base/compiler/ssair/passes.jl b/Compiler/src/ssair/passes.jl
similarity index 63%
rename from base/compiler/ssair/passes.jl
rename to Compiler/src/ssair/passes.jl
index cf3a2118743a0..0a739815f4ac9 100644
--- a/base/compiler/ssair/passes.jl
+++ b/Compiler/src/ssair/passes.jl
@@ -6,6 +6,15 @@ function is_known_call(@nospecialize(x), @nospecialize(func), ir::Union{IRCode,I
     return singleton_type(ft) === func
 end
 
+function is_known_invoke_or_call(@nospecialize(x), @nospecialize(func), ir::Union{IRCode,IncrementalCompact})
+    isinvoke = isexpr(x, :invoke)
+    (isinvoke || isexpr(x, :call)) || return false
+    narg = isinvoke ? 2 : 1
+    length(x.args) < narg && return false
+    ft = argextype(x.args[narg], ir)
+    return singleton_type(ft) === func
+end
+
 struct SSAUse
     kind::Symbol
     idx::Int
@@ -64,7 +73,7 @@ function try_compute_field(ir::Union{IncrementalCompact,IRCode}, @nospecialize(f
 end
 
 # assume `stmt` is a call of `getfield`/`setfield!`/`isdefined`
-function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr, typ::DataType)
+function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr, @nospecialize(typ))
     field = try_compute_field(ir, stmt.args[3])
     return try_compute_fieldidx(typ, field)
 end
@@ -72,19 +81,18 @@ end
 function find_curblock(domtree::DomTree, allblocks::BitSet, curblock::Int)
     # TODO: This can be much faster by looking at current level and only
     # searching for those blocks in a sorted order
-    while !(curblock in allblocks) && curblock !== 0
+    while curblock ∉ allblocks && curblock ≠ 0
         curblock = domtree.idoms_bb[curblock]
     end
     return curblock
 end
 
 function val_for_def_expr(ir::IRCode, def::Int, fidx::Int)
-    ex = ir[SSAValue(def)][:inst]
+    ex = ir[SSAValue(def)][:stmt]
     if isexpr(ex, :new)
         return ex.args[1+fidx]
     else
-        @assert isa(ex, Expr)
-        # The use is whatever the setfield was
+        @assert is_known_call(ex, setfield!, ir)
         return ex.args[4]
     end
 end
@@ -177,15 +185,28 @@ function find_def_for_use(
 end
 
 function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice,
-                        predecessors = ((@nospecialize(def), compact::IncrementalCompact) -> isa(def, PhiNode) ? def.values : nothing))
+                        predecessors::Pre = ((@nospecialize(def), compact::IncrementalCompact) -> isa(def, PhiNode) ? def.values : nothing)) where {Pre}
     if isa(val, Union{OldSSAValue, SSAValue})
         val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
     end
     return walk_to_defs(compact, val, typeconstraint, predecessors, 𝕃ₒ)
 end
 
-function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
-                     callback = (@nospecialize(pi), @nospecialize(idx)) -> false)
+abstract type WalkerCallback end
+
+struct TrivialWalker <: WalkerCallback end
+(::TrivialWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue)) = nothing
+
+struct PiWalker <: WalkerCallback end
+function (::PiWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if isa(def, PiNode)
+        return LiftedValue(def.val)
+    end
+    return nothing
+end
+
+function simple_walk(compact::IncrementalCompact, @nospecialize(defssa::AnySSAValue),
+                     walker_callback::WalkerCallback=TrivialWalker())
     while true
         if isa(defssa, OldSSAValue)
             if already_inserted(compact, defssa)
@@ -200,41 +221,46 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
                 return rename
             end
         end
-        def = compact[defssa][:inst]
-        if isa(def, PiNode)
-            if callback(def, defssa)
-                return defssa
-            end
-            def = def.val
+        def = compact[defssa][:stmt]
+        if isa(def, AnySSAValue)
+            walker_callback(def, defssa)
             if isa(def, SSAValue)
                 is_old(compact, defssa) && (def = OldSSAValue(def.id))
-            else
-                return def
             end
             defssa = def
-        elseif isa(def, AnySSAValue)
-            callback(def, defssa)
-            if isa(def, SSAValue)
-                is_old(compact, defssa) && (def = OldSSAValue(def.id))
-            end
-            defssa = def
-        elseif isa(def, Union{PhiNode, PhiCNode, Expr, GlobalRef})
+        elseif isa(def, Union{PhiNode, PhiCNode, GlobalRef})
             return defssa
         else
-            return def
+            new_def = walker_callback(def, defssa)
+            if new_def === nothing
+                return defssa
+            end
+            new_def = new_def.val
+            if !isa(new_def, AnySSAValue)
+                return new_def
+            elseif isa(new_def, SSAValue)
+                is_old(compact, defssa) && (new_def = OldSSAValue(new_def.id))
+            end
+            defssa = new_def
         end
     end
 end
 
-function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
-                                @nospecialize(typeconstraint))
-    callback = function (@nospecialize(pi), @nospecialize(idx))
-        if isa(pi, PiNode)
-            typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ))
-        end
-        return false
+mutable struct TypeConstrainingWalker <: WalkerCallback
+    typeconstraint::Any
+    TypeConstrainingWalker(@nospecialize(typeconstraint::Any)) = new(typeconstraint)
+end
+function (walker_callback::TypeConstrainingWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if isa(def, PiNode)
+        walker_callback.typeconstraint =
+            typeintersect(walker_callback.typeconstraint, widenconst(def.typ))
+        return LiftedValue(def.val)
     end
-    def = simple_walk(compact, defssa, callback)
+    return nothing
+end
+function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(val::AnySSAValue),
+                                @nospecialize(typeconstraint))
+    def = simple_walk(compact, val, TypeConstrainingWalker(typeconstraint))
     return Pair{Any, Any}(def, typeconstraint)
 end
 
@@ -247,10 +273,10 @@ Starting at `val` walk use-def chains to get all the leaves feeding into this `v
 `predecessors(def, compact)` is a callback which should return the set of possible
 predecessors for a "phi-like" node (PhiNode or Core.ifelse) or `nothing` otherwise.
 """
-function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors, 𝕃ₒ::AbstractLattice)
+function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors::Pre, 𝕃ₒ::AbstractLattice) where {Pre}
     visited_philikes = AnySSAValue[]
     isa(defssa, AnySSAValue) || return Any[defssa], visited_philikes
-    def = compact[defssa][:inst]
+    def = compact[defssa][:stmt]
     if predecessors(def, compact) === nothing
         return Any[defssa], visited_philikes
     end
@@ -264,10 +290,12 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
         defssa = pop!(worklist_defs)
         typeconstraint = pop!(worklist_constraints)
         visited_constraints[defssa] = typeconstraint
-        def = compact[defssa][:inst]
+        def = compact[defssa][:stmt]
         values = predecessors(def, compact)
         if values !== nothing
-            push!(visited_philikes, defssa)
+            if isa(def, PhiNode) || length(values) > 1
+                push!(visited_philikes, defssa)
+            end
             possible_predecessors = Int[]
 
             for n in 1:length(values)
@@ -330,17 +358,23 @@ function record_immutable_preserve!(new_preserves::Vector{Any}, def::Expr, compa
 end
 
 function already_inserted(compact::IncrementalCompact, old::OldSSAValue)
-    id = old.id
-    if id < length(compact.ir.stmts)
-        return id < compact.idx
-    end
-    id -= length(compact.ir.stmts)
-    if id < length(compact.ir.new_nodes)
-        return already_inserted(compact, OldSSAValue(compact.ir.new_nodes.info[id].pos))
+    already_inserted_ssa(compact, compact.idx-1)(0, old)
+end
+
+function already_inserted_ssa(compact::IncrementalCompact, processed_idx::Int)
+    return function did_already_insert(phi_arg::Int, old::OldSSAValue)
+        id = old.id
+        if id <= length(compact.ir.stmts)
+            return id <= processed_idx
+        end
+        id -= length(compact.ir.stmts)
+        if id <= length(compact.ir.new_nodes)
+            return did_already_insert(phi_arg, OldSSAValue(compact.ir.new_nodes.info[id].pos))
+        end
+        id -= length(compact.ir.new_nodes)
+        @assert id <= length(compact.pending_nodes)
+        return !(id in compact.pending_perm)
     end
-    id -= length(compact.ir.new_nodes)
-    @assert id <= length(compact.pending_nodes)
-    return !(id in compact.pending_perm)
 end
 
 function is_pending(compact::IncrementalCompact, old::OldSSAValue)
@@ -383,7 +417,9 @@ function lift_leaves(compact::IncrementalCompact, field::Int,
             elseif isexpr(def, :new)
                 typ = unwrap_unionall(widenconst(types(compact)[leaf]))
                 (isa(typ, DataType) && !isabstracttype(typ)) || return nothing
-                @assert !ismutabletype(typ)
+                if ismutabletype(typ)
+                    isconst(typ, field) || return nothing
+                end
                 if length(def.args) < 1+field
                     if field > fieldcount(typ)
                         return nothing
@@ -404,7 +440,7 @@ function lift_leaves(compact::IncrementalCompact, field::Int,
             #      `:new` expressions by the inlinear
             # elseif isexpr(def, :splatnew) && length(def.args) == 2 && isa(def.args[2], AnySSAValue)
             #     tplssa = def.args[2]::AnySSAValue
-            #     tplexpr = compact[tplssa][:inst]
+            #     tplexpr = compact[tplssa][:stmt]
             #     if is_known_call(tplexpr, tuple, compact) && 1 ≤ field < length(tplexpr.args)
             #         lift_arg!(compact, tplssa, cache_key, tplexpr, 1+field, lifted_leaves)
             #         continue
@@ -417,8 +453,8 @@ function lift_leaves(compact::IncrementalCompact, field::Int,
                     ocleaf = simple_walk(compact, ocleaf)
                 end
                 ocdef, _ = walk_to_def(compact, ocleaf)
-                if isexpr(ocdef, :new_opaque_closure) && isa(field, Int) && 1 ≤ field ≤ length(ocdef.args)-4
-                    lift_arg!(compact, leaf, cache_key, ocdef, 4+field, lifted_leaves)
+                if isexpr(ocdef, :new_opaque_closure) && isa(field, Int) && 1 ≤ field ≤ length(ocdef.args)-5
+                    lift_arg!(compact, leaf, cache_key, ocdef, 5+field, lifted_leaves)
                     continue
                 end
                 return nothing
@@ -440,9 +476,9 @@ function lift_leaves(compact::IncrementalCompact, field::Int,
         elseif isa(leaf, QuoteNode)
             leaf = leaf.value
         elseif isa(leaf, GlobalRef)
-            mod, name = leaf.mod, leaf.name
-            if isdefined(mod, name) && isconst(mod, name)
-                leaf = getglobal(mod, name)
+            typ = argextype(leaf, compact)
+            if isa(typ, Const)
+                leaf = typ.val
             else
                 return nothing
             end
@@ -465,19 +501,15 @@ function lift_arg!(
     if is_old(compact, leaf) && isa(lifted, SSAValue)
         lifted = OldSSAValue(lifted.id)
         if already_inserted(compact, lifted)
-            lifted = compact.ssa_rename[lifted.id]
-            if isa(lifted, Refined)
-                lifted = lifted.val
+            new_lifted = compact.ssa_rename[lifted.id]
+            if isa(new_lifted, Refined)
+                new_lifted = new_lifted.val
+            end
+            # Special case: If lifted happens to be the statement we're currently processing,
+            # leave it as old SSAValue in case we decide to handle this in the renamer
+            if !isa(new_lifted, SSAValue) || new_lifted != SSAValue(compact.result_idx-1)
+                lifted = new_lifted
             end
-        end
-    end
-    if isa(lifted, GlobalRef) || isa(lifted, Expr)
-        lifted = insert_node!(compact, leaf, effect_free_and_nothrow(NewInstruction(lifted, argextype(lifted, compact))))
-        compact[leaf] = nothing
-        stmt.args[argidx] = lifted
-        compact[leaf] = stmt
-        if isa(leaf, SSAValue) && leaf.id < compact.result_idx
-            push!(compact.late_fixup, leaf.id)
         end
     end
     lifted_leaves[cache_key] = LiftedValue(lifted)
@@ -494,12 +526,12 @@ function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf))
             leaf = simple_walk(compact, leaf)
         end
         if isa(leaf, AnySSAValue)
-            def = compact[leaf][:inst]
+            def = compact[leaf][:stmt]
         else
             def = leaf
         end
     elseif isa(leaf, AnySSAValue)
-        def = compact[leaf][:inst]
+        def = compact[leaf][:stmt]
     else
         def = leaf
     end
@@ -522,8 +554,7 @@ end
 function lift_comparison! end
 
 function lift_comparison!(::typeof(===), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-    𝕃ₒ::AbstractLattice)
+    idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     args = stmt.args
     length(args) == 3 || return
     lhs, rhs = args[2], args[3]
@@ -539,28 +570,26 @@ function lift_comparison!(::typeof(===), compact::IncrementalCompact,
     else
         return
     end
-    lift_comparison_leaves!(egal_tfunc, compact, val, cmp, lifting_cache, idx, 𝕃ₒ)
+    lift_comparison_leaves!(egal_tfunc, compact, val, cmp, idx, 𝕃ₒ)
 end
 
 function lift_comparison!(::typeof(isa), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-    𝕃ₒ::AbstractLattice)
+    idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     args = stmt.args
     length(args) == 3 || return
     cmp = argextype(args[3], compact)
     val = args[2]
-    lift_comparison_leaves!(isa_tfunc, compact, val, cmp, lifting_cache, idx, 𝕃ₒ)
+    lift_comparison_leaves!(isa_tfunc, compact, val, cmp, idx, 𝕃ₒ)
 end
 
 function lift_comparison!(::typeof(isdefined), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-    𝕃ₒ::AbstractLattice)
+    idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     args = stmt.args
     length(args) == 3 || return
     cmp = argextype(args[3], compact)
     isa(cmp, Const) || return # `isdefined_tfunc` won't return Const
     val = args[2]
-    lift_comparison_leaves!(isdefined_tfunc, compact, val, cmp, lifting_cache, idx, 𝕃ₒ)
+    lift_comparison_leaves!(isdefined_tfunc, compact, val, cmp, idx, 𝕃ₒ)
 end
 
 function phi_or_ifelse_predecessors(@nospecialize(def), compact::IncrementalCompact)
@@ -571,15 +600,13 @@ end
 
 function lift_comparison_leaves!(@specialize(tfunc),
     compact::IncrementalCompact, @nospecialize(val), @nospecialize(cmp),
-    lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue}, idx::Int,
-    𝕃ₒ::AbstractLattice)
+    idx::Int, 𝕃ₒ::AbstractLattice)
     typeconstraint = widenconst(argextype(val, compact))
     if isa(val, Union{OldSSAValue, SSAValue})
         val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
     end
     isa(typeconstraint, Union) || return # bail out if there won't be a good chance for lifting
 
-
     leaves, visited_philikes = collect_leaves(compact, val, typeconstraint, 𝕃ₒ, phi_or_ifelse_predecessors)
     length(leaves) ≤ 1 && return # bail out if we don't have multiple leaves
 
@@ -599,11 +626,12 @@ function lift_comparison_leaves!(@specialize(tfunc),
     end
 
     # perform lifting
-    lifted_val = perform_lifting!(compact,
-        visited_philikes, cmp, lifting_cache, Bool,
-        lifted_leaves::LiftedLeaves, val, nothing)::LiftedValue
+    (lifted_val, nest) = perform_lifting!(compact,
+        visited_philikes, cmp, Bool, lifted_leaves::LiftedLeaves, val, nothing)
 
-    compact[idx] = lifted_val.val
+    compact[idx] = (lifted_val::LiftedValue).val
+
+    finish_phi_nest!(compact, nest)
 end
 
 struct IfElseCall
@@ -620,14 +648,17 @@ end
 
 struct SkipToken end; const SKIP_TOKEN = SkipToken()
 
-function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=::AnySSAValue=#), @nospecialize(old_value),
-                      lifted_philikes::Vector{LiftedPhilike}, lifted_leaves::Union{LiftedLeaves, LiftedDefs}, reverse_mapping::IdDict{AnySSAValue, Int})
+function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa::AnySSAValue),
+                      @nospecialize(old_value), lifted_philikes::Vector{LiftedPhilike},
+                      lifted_leaves::Union{LiftedLeaves, LiftedDefs},
+                      reverse_mapping::IdDict{AnySSAValue, Int},
+                      walker_callback::WalkerCallback)
     val = old_value
     if is_old(compact, old_node_ssa) && isa(val, SSAValue)
         val = OldSSAValue(val.id)
     end
     if isa(val, AnySSAValue)
-        val = simple_walk(compact, val)
+        val = simple_walk(compact, val, LiftedLeaveWalker(lifted_leaves, reverse_mapping, walker_callback))
     end
     if val in keys(lifted_leaves)
         lifted_val = lifted_leaves[val]
@@ -637,8 +668,7 @@ function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=:
         lifted_val === nothing && return UNDEF_TOKEN
         val = lifted_val.val
         if isa(val, AnySSAValue)
-            callback = (@nospecialize(pi), @nospecialize(idx)) -> true
-            val = simple_walk(compact, val, callback)
+            val = simple_walk(compact, val, PiWalker())
         end
         return val
     elseif isa(val, AnySSAValue) && val in keys(reverse_mapping)
@@ -649,95 +679,23 @@ function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=:
 end
 
 function is_old(compact, @nospecialize(old_node_ssa))
-    isa(old_node_ssa, OldSSAValue) &&
-        !is_pending(compact, old_node_ssa) &&
-        !already_inserted(compact, old_node_ssa)
+    isa(old_node_ssa, OldSSAValue) || return false
+    is_pending(compact, old_node_ssa) && return false
+    already_inserted(compact, old_node_ssa) && return false
+    return true
 end
 
-function perform_lifting!(compact::IncrementalCompact,
-        visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key),
-        lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-        @nospecialize(result_t), lifted_leaves::Union{LiftedLeaves, LiftedDefs}, @nospecialize(stmt_val),
-        lazydomtree::Union{LazyDomtree,Nothing})
-    reverse_mapping = IdDict{AnySSAValue, Int}()
-    for id in 1:length(visited_philikes)
-        reverse_mapping[visited_philikes[id]] = id
-    end
-
-    # Check if all the lifted leaves are the same
-    local the_leaf
-    all_same = true
-    for (_, val) in lifted_leaves
-        if !@isdefined(the_leaf)
-            the_leaf = val
-            continue
-        end
-        if val !== the_leaf
-            all_same = false
-        end
-    end
-
-    the_leaf_val = isa(the_leaf, LiftedValue) ? the_leaf.val : nothing
-    if !isa(the_leaf_val, SSAValue)
-        all_same = false
-    end
-
-    if all_same
-        dominates_all = true
-        if lazydomtree !== nothing
-            domtree = get!(lazydomtree)
-            for item in visited_philikes
-                if !dominates_ssa(compact, domtree, the_leaf_val, item)
-                    dominates_all = false
-                    break
-                end
-            end
-            if dominates_all
-                return the_leaf
-            end
-        end
-    end
-
-    # Insert PhiNodes
-    nphilikes = length(visited_philikes)
-    lifted_philikes = Vector{LiftedPhilike}(undef, nphilikes)
-    for i = 1:nphilikes
-        old_ssa = visited_philikes[i]
-        old_inst = compact[old_ssa]
-        old_node = old_inst[:inst]::Union{PhiNode,Expr}
-        # FIXME this cache is broken somehow
-        # ckey = Pair{AnySSAValue, Any}(old_ssa, cache_key)
-        # cached = ckey in keys(lifting_cache)
-        cached = false
-        if cached
-            ssa = lifting_cache[ckey]
-            if isa(old_node, PhiNode)
-                lifted_philikes[i] = LiftedPhilike(ssa, old_node, false)
-            else
-                lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(old_node), false)
-            end
-            continue
-        end
-        if isa(old_node, PhiNode)
-            new_node = PhiNode()
-            ssa = insert_node!(compact, old_ssa, effect_free_and_nothrow(NewInstruction(new_node, result_t)))
-            lifted_philikes[i] = LiftedPhilike(ssa, new_node, true)
-        else
-            @assert is_known_call(old_node, Core.ifelse, compact)
-            ifelse_func, condition = old_node.args
-            if is_old(compact, old_ssa) && isa(condition, SSAValue)
-                condition = OldSSAValue(condition.id)
-            end
-
-            new_node = Expr(:call, ifelse_func, condition) # Renamed then_result, else_result added below
-            new_inst = NewInstruction(new_node, result_t, NoCallInfo(), old_inst[:line], old_inst[:flag])
-
-            ssa = insert_node!(compact, old_ssa, new_inst, #= attach_after =# true)
-            lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(new_node), true)
-        end
-        # lifting_cache[ckey] = ssa
-    end
+struct PhiNest{C<:WalkerCallback}
+    visited_philikes::Vector{AnySSAValue}
+    lifted_philikes::Vector{LiftedPhilike}
+    lifted_leaves::Union{LiftedLeaves, LiftedDefs}
+    reverse_mapping::IdDict{AnySSAValue, Int}
+    walker_callback::C
+end
 
+function finish_phi_nest!(compact::IncrementalCompact, nest::PhiNest)
+    (;visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping, walker_callback) = nest
+    nphilikes = length(lifted_philikes)
     # Fix up arguments
     for i = 1:nphilikes
         (old_node_ssa, lf) = visited_philikes[i], lifted_philikes[i]
@@ -746,12 +704,12 @@ function perform_lifting!(compact::IncrementalCompact,
 
         lfnode = lf.node
         if isa(lfnode, PhiNode)
-            old_node = compact[old_node_ssa][:inst]::PhiNode
+            old_node = compact[old_node_ssa][:stmt]::PhiNode
             new_node = lfnode
             for i = 1:length(old_node.values)
                 isassigned(old_node.values, i) || continue
                 val = lifted_value(compact, old_node_ssa, old_node.values[i],
-                                   lifted_philikes, lifted_leaves, reverse_mapping)
+                                   lifted_philikes, lifted_leaves, reverse_mapping, walker_callback)
                 val !== SKIP_TOKEN && push!(new_node.edges, old_node.edges[i])
                 if val === UNDEF_TOKEN
                     resize!(new_node.values, length(new_node.values)+1)
@@ -761,13 +719,13 @@ function perform_lifting!(compact::IncrementalCompact,
                 end
             end
         elseif isa(lfnode, IfElseCall)
-            old_node = compact[old_node_ssa][:inst]::Expr
+            old_node = compact[old_node_ssa][:stmt]::Expr
             then_result, else_result = old_node.args[3], old_node.args[4]
 
             then_result = lifted_value(compact, old_node_ssa, then_result,
-                                       lifted_philikes, lifted_leaves, reverse_mapping)
+                                       lifted_philikes, lifted_leaves, reverse_mapping, walker_callback)
             else_result = lifted_value(compact, old_node_ssa, else_result,
-                                       lifted_philikes, lifted_leaves, reverse_mapping)
+                                       lifted_philikes, lifted_leaves, reverse_mapping, walker_callback)
 
             # In cases where the Core.ifelse condition is statically-known, e.g., thanks
             # to a PiNode from a guarding conditional, replace with the remaining branch.
@@ -775,8 +733,7 @@ function perform_lifting!(compact::IncrementalCompact,
                 only_result = (then_result === SKIP_TOKEN) ? else_result : then_result
 
                 # Replace Core.ifelse(%cond, %a, %b) with %a
-                compact[lf.ssa][:inst] = only_result
-                should_count && _count_added_node!(compact, only_result)
+                compact[lf.ssa] = only_result
 
                 # Note: Core.ifelse(%cond, %a, %b) has observable effects (!nothrow), but since
                 # we have not deleted the preceding statement that this was derived from, this
@@ -796,26 +753,175 @@ function perform_lifting!(compact::IncrementalCompact,
             push!(lfnode.call.args, else_result)
         end
     end
+end
+
+struct LiftedLeaveWalker{C<:WalkerCallback} <: WalkerCallback
+    lifted_leaves::Union{LiftedLeaves, LiftedDefs}
+    reverse_mapping::IdDict{AnySSAValue, Int}
+    inner_walker_callback::C
+    function LiftedLeaveWalker(@nospecialize(lifted_leaves::Union{LiftedLeaves, LiftedDefs}),
+                               @nospecialize(reverse_mapping::IdDict{AnySSAValue, Int}),
+                               inner_walker_callback::C) where C<:WalkerCallback
+        return new{C}(lifted_leaves, reverse_mapping, inner_walker_callback)
+    end
+end
+function (walker_callback::LiftedLeaveWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    (; lifted_leaves, reverse_mapping, inner_walker_callback) = walker_callback
+    if defssa in keys(lifted_leaves) || defssa in keys(reverse_mapping)
+        return nothing
+    end
+    isa(def, PiNode) && return LiftedValue(def.val)
+    return inner_walker_callback(def, defssa)
+end
+
+function perform_lifting!(compact::IncrementalCompact,
+        visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key),
+        @nospecialize(result_t), lifted_leaves::Union{LiftedLeaves, LiftedDefs}, @nospecialize(stmt_val),
+        lazydomtree::Union{LazyDomtree,Nothing}, walker_callback::WalkerCallback = TrivialWalker())
+    reverse_mapping = IdDict{AnySSAValue, Int}()
+    for id in 1:length(visited_philikes)
+        reverse_mapping[visited_philikes[id]] = id
+    end
+
+    # Check if all the lifted leaves are the same
+    local the_leaf
+    all_same = true
+    for (_, val) in lifted_leaves
+        if !@isdefined(the_leaf)
+            the_leaf = val
+            continue
+        end
+        if val !== the_leaf
+            all_same = false
+        end
+    end
+
+    if all_same && isa(the_leaf, LiftedValue)
+        dominates_all = true
+        the_leaf_val = the_leaf.val
+        if isa(the_leaf_val, AnySSAValue)
+            if lazydomtree === nothing
+                # Must conservatively assume this
+                dominates_all = false
+            else
+                # This code guards against the possibility of accidentally forwarding a value from a
+                # previous iteration. Consider for example:
+                #
+                # %p = phi(%arg, %t)
+                # %b = <...>
+                # %c = getfield(%p, 1)
+                # %t = tuple(%b)
+                #
+                # It would be incorrect to replace `%c` by `%b`, because that would read the value of
+                # `%b` in the *current* iteration, while the value of `%b` that comes in via `%p` is
+                # that of the previous iteration.
+                domtree = get!(lazydomtree)
+                for item in visited_philikes
+                    if !dominates_ssa(compact, domtree, the_leaf_val, item)
+                        dominates_all = false
+                        break
+                    end
+                end
+            end
+        end
+        if dominates_all
+            if isa(the_leaf_val, OldSSAValue)
+                the_leaf = LiftedValue(simple_walk(compact, the_leaf_val))
+            end
+            return Pair{Any, PhiNest}(the_leaf, PhiNest(visited_philikes, Vector{LiftedPhilike}(undef, 0), lifted_leaves, reverse_mapping, walker_callback))
+        end
+    end
+
+    # Insert PhiNodes
+    nphilikes = length(visited_philikes)
+    lifted_philikes = Vector{LiftedPhilike}(undef, nphilikes)
+    for i = 1:nphilikes
+        old_ssa = visited_philikes[i]
+        old_inst = compact[old_ssa]
+        old_node = old_inst[:stmt]::Union{PhiNode,Expr}
+        if isa(old_node, PhiNode)
+            new_node = PhiNode()
+            ssa = insert_node!(compact, old_ssa, removable_if_unused(NewInstruction(new_node, result_t)))
+            lifted_philikes[i] = LiftedPhilike(ssa, new_node, true)
+        else
+            @assert is_known_call(old_node, Core.ifelse, compact)
+            ifelse_func, condition = old_node.args
+            if is_old(compact, old_ssa) && isa(condition, SSAValue)
+                condition = OldSSAValue(condition.id)
+            end
+
+            new_node = Expr(:call, ifelse_func, condition) # Renamed then_result, else_result added below
+            new_inst = NewInstruction(new_node, result_t, NoCallInfo(), old_inst[:line], old_inst[:flag])
+
+            ssa = insert_node!(compact, old_ssa, new_inst, #= attach_after =# true)
+            lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(new_node), true)
+        end
+    end
 
     # Fixup the stmt itself
     if isa(stmt_val, Union{SSAValue, OldSSAValue})
-        stmt_val = simple_walk(compact, stmt_val)
+        stmt_val = simple_walk(compact, stmt_val, LiftedLeaveWalker(lifted_leaves, reverse_mapping, walker_callback))
     end
 
     if stmt_val in keys(lifted_leaves)
-        return lifted_leaves[stmt_val]
+        stmt_val = lifted_leaves[stmt_val]
     elseif isa(stmt_val, AnySSAValue) && stmt_val in keys(reverse_mapping)
-        return LiftedValue(lifted_philikes[reverse_mapping[stmt_val]].ssa)
+        stmt_val = LiftedValue(lifted_philikes[reverse_mapping[stmt_val]].ssa)
+    else
+        error()
     end
 
-    return stmt_val # N.B. should never happen
+    return Pair{Any, PhiNest}(stmt_val, PhiNest(visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping, walker_callback))
+end
+
+# Handle _apply_iterate calls: convert arguments to use `Core.svec`.
+# The behavior of `Core.svec` (with boxing) better matches the ABI of codegen.
+function lift_apply_args!(compact::IncrementalCompact, idx::Int, stmt::Expr)
+    compact[idx] = nothing
+    for i in 4:length(stmt.args) # Skip `_apply_iterate`, `iterate`, and the function
+        arg = stmt.args[i]
+        arg_type = widenconst(argextype(arg, compact))
+        if isa(arg_type, DataType) && arg_type.name === Tuple.name
+            svec_args = nothing
+            if isa(arg, SSAValue)
+                arg_stmt = compact[arg][:stmt]
+                if is_known_call(arg_stmt, Core.tuple, compact)
+                    svec_args = copy(arg_stmt.args)
+                end
+            end
+            if svec_args === nothing
+                # Fallback path: generate getfield calls for tuple elements
+                tuple_length = length(arg_type.parameters)
+                if tuple_length > 0 && !isvarargtype(arg_type.parameters[tuple_length])
+                    svec_args = Vector{Any}(undef, tuple_length + 1)
+                    for j in 1:tuple_length
+                        getfield_call = Expr(:call, GlobalRef(Core, :getfield), arg, j)
+                        getfield_type = arg_type.parameters[j]
+                        inst = compact[SSAValue(idx)]
+                        getfield_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(getfield_call, getfield_type, NoCallInfo(), inst[:line], inst[:flag]))
+                        svec_args[j + 1] = getfield_ssa
+                    end
+                end
+            end
+            if svec_args !== nothing
+                svec_args[1] = GlobalRef(Core, :svec)
+                new_svec_call = Expr(:call)
+                new_svec_call.args = svec_args
+                inst = compact[SSAValue(idx)]
+                new_svec_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(new_svec_call, SimpleVector, NoCallInfo(), inst[:line], inst[:flag]))
+                stmt.args[i] = new_svec_ssa
+            end
+        end
+    end
+    compact[idx] = stmt
+    nothing
 end
 
 function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
-    length(stmt.args) != 4 && return
+    length(stmt.args) != 3 && return
 
-    vec = stmt.args[3]
-    val = stmt.args[4]
+    vec = stmt.args[2]
+    val = stmt.args[3]
     valT = argextype(val, compact)
     (isa(valT, Const) && isa(valT.val, Int)) || return
     valI = valT.val::Int
@@ -825,7 +931,7 @@ function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
         valI <= length(vec) || return
         compact[idx] = quoted(vec[valI])
     elseif isa(vec, SSAValue)
-        def = compact[vec][:inst]
+        def = compact[vec][:stmt]
         if is_known_call(def, Core.svec, compact)
             valI <= length(def.args) - 1 || return
             compact[idx] = def.args[valI+1]
@@ -839,7 +945,116 @@ function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
     return
 end
 
-# TODO: We could do the whole lifing machinery here, but really all
+function lift_leaves_keyvalue(compact::IncrementalCompact, @nospecialize(key),
+                             leaves::Vector{Any}, 𝕃ₒ::AbstractLattice)
+    # For every leaf, the lifted value
+    lifted_leaves = LiftedLeaves()
+    for i = 1:length(leaves)
+        leaf = leaves[i]
+        cache_key = leaf
+        if isa(leaf, AnySSAValue)
+            (def, leaf) = walk_to_def(compact, leaf)
+            if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, compact)
+                @assert isexpr(def, :invoke)
+                if length(def.args) in (5, 6)
+                    set_key = def.args[end-1]
+                    set_val_idx = length(def.args)
+                elseif length(def.args) == 4
+                    # Key is deleted
+                    # TODO: Model this
+                    return nothing
+                elseif length(def.args) == 3
+                    # The whole collection is deleted
+                    # TODO: Model this
+                    return nothing
+                else
+                    return nothing
+                end
+                if set_key === key || (egal_tfunc(𝕃ₒ, argextype(key, compact), argextype(set_key, compact)) == Const(true))
+                    lift_arg!(compact, leaf, cache_key, def, set_val_idx, lifted_leaves)
+                    continue
+                end
+            end
+        end
+        return nothing
+    end
+    return lifted_leaves
+end
+
+function keyvalue_predecessors(@nospecialize(key), 𝕃ₒ::AbstractLattice)
+    function(@nospecialize(def), compact::IncrementalCompact)
+        if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, compact)
+            @assert isexpr(def, :invoke)
+            if length(def.args) in (5, 6)
+                collection = def.args[end-2]
+                set_key = def.args[end-1]
+                set_val_idx = length(def.args)
+            elseif length(def.args) == 4
+                collection = def.args[end-1]
+                # Key is deleted
+                # TODO: Model this
+                return nothing
+            elseif length(def.args) == 3
+                collection = def.args[end]
+                # The whole collection is deleted
+                # TODO: Model this
+                return nothing
+            else
+                return nothing
+            end
+            if set_key === key || (egal_tfunc(𝕃ₒ, argextype(key, compact), argextype(set_key, compact)) == Const(true))
+                # This is an actual def
+                return nothing
+            end
+            return Any[collection]
+        end
+        return phi_or_ifelse_predecessors(def, compact)
+    end
+end
+
+struct KeyValueWalker <: WalkerCallback
+    compact::IncrementalCompact
+end
+function (walker_callback::KeyValueWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, walker_callback.compact)
+        @assert length(def.args) in (5, 6)
+        return LiftedValue(def.args[end-2])
+    end
+    return nothing
+end
+
+function lift_keyvalue_get!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
+    collection = stmt.args[end-1]
+    key = stmt.args[end]
+
+    leaves, visited_philikes = collect_leaves(compact, collection, Any, 𝕃ₒ, keyvalue_predecessors(key, 𝕃ₒ))
+    isempty(leaves) && return
+
+    lifted_leaves = lift_leaves_keyvalue(compact, key, leaves, 𝕃ₒ)
+    lifted_leaves === nothing && return
+
+    result_t = Union{}
+    for v in values(lifted_leaves)
+        v === nothing && return
+        result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact))
+    end
+
+    (lifted_val, nest) = perform_lifting!(compact,
+        visited_philikes, key, result_t, lifted_leaves, collection, nothing,
+        KeyValueWalker(compact))
+
+    compact[idx] = lifted_val === nothing ? nothing : Expr(:call, GlobalRef(Core, :tuple), lifted_val.val)
+    finish_phi_nest!(compact, nest)
+    if lifted_val !== nothing
+        if !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], tuple_tfunc(𝕃ₒ, Any[result_t]))
+            add_flag!(compact[SSAValue(idx)], IR_FLAG_REFINED)
+        end
+    end
+
+    return
+end
+
+# TODO: We could do the whole lifting machinery here, but really all
 # we want to do is clean this up when it got inserted by inlining,
 # which always targets simple `svec` call or `_compute_sparams`,
 # so this specialized lifting would be enough
@@ -857,25 +1072,27 @@ end
     sig = sig.body
     isa(sig, DataType) || return nothing
     sig.name === Tuple.name || return nothing
-    length(sig.parameters) >= 1 || return nothing
+    sig_parameters = sig.parameters::SimpleVector
+    length_sig_parameters = length(sig_parameters)
+    length_sig_parameters >= 1 || return nothing
 
-    i = let sig=sig
-        findfirst(j::Int->has_typevar(sig.parameters[j], tvar), 1:length(sig.parameters))
+    function has_typevar_closure(j::Int)
+        has_typevar(sig_parameters[j], tvar)
     end
+
+    i = findfirst(has_typevar_closure, 1:length_sig_parameters)
     i === nothing && return nothing
-    let sig=sig
-        any(j::Int->has_typevar(sig.parameters[j], tvar), i+1:length(sig.parameters))
-    end && return nothing
+    any(has_typevar_closure, i+1:length_sig_parameters) && return nothing
 
-    arg = sig.parameters[i]
+    arg = sig_parameters[i]
 
     rarg = def.args[2 + i]
     isa(rarg, SSAValue) || return nothing
-    argdef = compact[rarg][:inst]
+    argdef = compact[rarg][:stmt]
     if isexpr(argdef, :new)
         rarg = argdef.args[1]
         isa(rarg, SSAValue) || return nothing
-        argdef = compact[rarg][:inst]
+        argdef = compact[rarg][:stmt]
     else
         isType(arg) || return nothing
         arg = arg.parameters[1]
@@ -924,7 +1141,7 @@ function pattern_match_typeof(compact::IncrementalCompact, typ::DataType, fidx::
                               @nospecialize(Targ), @nospecialize(farg))
     isa(Targ, SSAValue) || return false
 
-    Tdef = compact[Targ][:inst]
+    Tdef = compact[Targ][:stmt]
     is_known_call(Tdef, Core.apply_type, compact) || return false
     length(Tdef.args) ≥ 2 || return false
 
@@ -939,41 +1156,86 @@ function pattern_match_typeof(compact::IncrementalCompact, typ::DataType, fidx::
         push!(tvars, applyTvar)
     end
 
-    applyT.name === typ.name || return false
+    @assert applyT.name === typ.name
     fT = fieldtype(applyT, fidx)
     idx = findfirst(IsEgal(fT), tvars)
     idx === nothing && return false
     checkbounds(Bool, Tdef.args, 2+idx) || return false
     valarg = Tdef.args[2+idx]
     isa(valarg, SSAValue) || return false
-    valdef = compact[valarg][:inst]
+    valdef = compact[valarg][:stmt]
     is_known_call(valdef, typeof, compact) || return false
 
     return valdef.args[2] === farg
 end
 
 function refine_new_effects!(𝕃ₒ::AbstractLattice, compact::IncrementalCompact, idx::Int, stmt::Expr)
-    (consistent, effect_free_and_nothrow, nothrow) = new_expr_effect_flags(𝕃ₒ, stmt.args, compact, pattern_match_typeof)
+    inst = compact[SSAValue(idx)]
+    if has_flag(inst, IR_FLAGS_REMOVABLE)
+        return # already accurate
+    end
+    (consistent, removable, nothrow) = new_expr_effect_flags(𝕃ₒ, stmt.args, compact, pattern_match_typeof)
     if consistent
-        compact[SSAValue(idx)][:flag] |= IR_FLAG_CONSISTENT
+        add_flag!(inst, IR_FLAG_CONSISTENT)
     end
-    if effect_free_and_nothrow
-        compact[SSAValue(idx)][:flag] |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+    if removable
+        add_flag!(inst, IR_FLAGS_REMOVABLE)
     elseif nothrow
-        compact[SSAValue(idx)][:flag] |= IR_FLAG_NOTHROW
+        add_flag!(inst, IR_FLAG_NOTHROW)
     end
+    return nothing
+end
+
+function fold_ifelse!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
+    length(stmt.args) == 4 || return false
+    condarg = stmt.args[2]
+    condtyp = argextype(condarg, compact)
+    if isa(condtyp, Const)
+        if condtyp.val === true
+            compact[idx] = stmt.args[3]
+            return true
+        elseif condtyp.val === false
+            compact[idx] = stmt.args[4]
+            return true
+        end
+    elseif ⊑(𝕃ₒ, condtyp, Bool) && stmt.args[3] === stmt.args[4]
+        compact[idx] = stmt.args[3]
+        return true
+    end
+    return false
 end
 
 # NOTE we use `IdSet{Int}` instead of `BitSet` for in these passes since they work on IR after inlining,
 # which can be very large sometimes, and program counters in question are often very sparse
 const SPCSet = IdSet{Int}
 
-struct IntermediaryCollector
+struct IntermediaryCollector <: WalkerCallback
     intermediaries::SPCSet
 end
-function (this::IntermediaryCollector)(@nospecialize(pi), @nospecialize(ssa))
-    push!(this.intermediaries, ssa.id)
-    return false
+function (walker_callback::IntermediaryCollector)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if !(def isa Expr)
+        push!(walker_callback.intermediaries, defssa.id)
+        if def isa PiNode
+            return LiftedValue(def.val)
+        end
+    end
+    return nothing
+end
+
+function update_scope_mapping!(scope_mapping, bb, val)
+    current_mapping = scope_mapping[bb]
+    if current_mapping != SSAValue(0)
+        if val == SSAValue(0)
+            # Unreachable bbs will have SSAValue(0), but can branch into
+            # try/catch regions. We could validate with the domtree, but that's
+            # quite expensive for a debug check, so simply allow this without
+            # making any changes to mapping.
+            return
+        end
+        @assert current_mapping == val
+        return
+    end
+    scope_mapping[bb] = val
 end
 
 """
@@ -998,13 +1260,61 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
     𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
     compact = IncrementalCompact(ir)
     defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations
-    lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
-    def_lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
     # initialization of domtree is delayed to avoid the expensive computation in many cases
     lazydomtree = LazyDomtree(ir)
-    for ((_, idx), stmt) in compact
+    scope_mapping::Union{Vector{SSAValue}, Nothing} = nothing
+    for ((old_idx, idx), stmt) in compact
+        # If we encounter any EnterNode with set :scope, propagate the current scope for all basic blocks, so
+        # we have easy access for current_scope folding below.
+        if !isa(stmt, Expr)
+            bb = compact.active_result_bb - 1
+            if scope_mapping !== nothing && did_just_finish_bb(compact)
+                this_scope = scope_mapping[bb]
+                if isa(stmt, GotoIfNot)
+                    update_scope_mapping!(scope_mapping, stmt.dest, this_scope)
+                    update_scope_mapping!(scope_mapping, bb+1, this_scope)
+                elseif isa(stmt, GotoNode)
+                    update_scope_mapping!(scope_mapping, stmt.label, this_scope)
+                elseif isa(stmt, EnterNode)
+                    if stmt.catch_dest != 0
+                        update_scope_mapping!(scope_mapping, stmt.catch_dest, this_scope)
+                    end
+                    isdefined(stmt, :scope) || update_scope_mapping!(scope_mapping, bb+1, this_scope)
+                elseif !isa(stmt, ReturnNode)
+                    update_scope_mapping!(scope_mapping, bb+1, this_scope)
+                end
+            end
+            if isa(stmt, EnterNode)
+                if isdefined(stmt, :scope)
+                    if scope_mapping === nothing
+                        scope_mapping = SSAValue[SSAValue(0) for i = 1:length(compact.cfg_transform.result_bbs)]
+                    end
+                    update_scope_mapping!(scope_mapping, bb+1, SSAValue(idx))
+                end
+            end
+            continue
+        end
+        if scope_mapping !== nothing && did_just_finish_bb(compact)
+            bb = compact.active_result_bb - 1
+            bbs = scope_mapping[bb]
+            if isexpr(stmt, :leave) && bbs != SSAValue(0)
+                # Here we want to count the number of scopes that we're leaving,
+                # which is the same as the number of EnterNodes being referenced
+                # by `stmt.args`. Which have :scope set. In practice, the frontend
+                # does emit these in order, so we could simply go to the last one,
+                # but we want to avoid making that semantic assumption.
+                for i = 1:length(stmt.args)
+                    scope = stmt.args[i]
+                    scope === nothing && continue
+                    enter = compact[scope][:inst]
+                    @assert isa(enter, EnterNode)
+                    isdefined(enter, :scope) || continue
+                    bbs = scope_mapping[block_for_inst(compact, bbs)]
+                end
+            end
+            update_scope_mapping!(scope_mapping, bb+1, bbs)
+        end
         # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement)
-        isa(stmt, Expr) || continue
         is_setfield = is_isdefined = is_finalizer = false
         field_ordering = :unspecified
         if is_known_call(stmt, setfield!, compact)
@@ -1037,7 +1347,13 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
                 # Inlining performs legality checks on the finalizer to determine
                 # whether or not we may inline it. If so, it appends extra arguments
                 # at the end of the intrinsic. Detect that here.
-                length(stmt.args) == 5 || continue
+                if length(stmt.args) == 4 && stmt.args[4] === nothing
+                    # constant case
+                elseif length(stmt.args) == 5 && stmt.args[4] isa Bool && stmt.args[5] isa Core.CodeInstance
+                    # inlining case
+                else
+                    continue
+                end
             end
             is_finalizer = true
         elseif isexpr(stmt, :foreigncall)
@@ -1048,14 +1364,10 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
                 preserved_arg = stmt.args[pidx]
                 isa(preserved_arg, SSAValue) || continue
                 let intermediaries = SPCSet()
-                    callback = function (@nospecialize(pi), @nospecialize(ssa))
-                        push!(intermediaries, ssa.id)
-                        return false
-                    end
-                    def = simple_walk(compact, preserved_arg, callback)
+                    def = simple_walk(compact, preserved_arg, IntermediaryCollector(intermediaries))
                     isa(def, SSAValue) || continue
                     defidx = def.id
-                    def = compact[def][:inst]
+                    def = compact[def][:stmt]
                     if is_known_call(def, tuple, compact)
                         record_immutable_preserve!(new_preserves, def, compact)
                         push!(preserved, preserved_arg.id)
@@ -1090,11 +1402,27 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
             elseif is_known_call(stmt, Core._svec_ref, compact)
                 lift_svec_ref!(compact, idx, stmt)
             elseif is_known_call(stmt, (===), compact)
-                lift_comparison!(===, compact, idx, stmt, lifting_cache, 𝕃ₒ)
+                lift_comparison!(===, compact, idx, stmt, 𝕃ₒ)
             elseif is_known_call(stmt, isa, compact)
-                lift_comparison!(isa, compact, idx, stmt, lifting_cache, 𝕃ₒ)
-            elseif isexpr(stmt, :new) && (compact[SSAValue(idx)][:flag] & IR_FLAG_NOTHROW) == 0x00
+                lift_comparison!(isa, compact, idx, stmt, 𝕃ₒ)
+            elseif is_known_call(stmt, Core.ifelse, compact)
+                fold_ifelse!(compact, idx, stmt, 𝕃ₒ)
+            elseif is_known_invoke_or_call(stmt, Core.OptimizedGenerics.KeyValue.get, compact)
+                2 == (length(stmt.args) - (isexpr(stmt, :invoke) ? 2 : 1)) || continue
+                lift_keyvalue_get!(compact, idx, stmt, 𝕃ₒ)
+            elseif is_known_call(stmt, Core.current_scope, compact)
+                length(stmt.args) == 1 || continue
+                scope_mapping !== nothing || continue
+                bb = compact.active_result_bb
+                did_just_finish_bb(compact) && (bb -= 1)
+                enter_ssa = scope_mapping[bb]
+                enter_ssa == SSAValue(0) && continue
+                compact[SSAValue(idx)] = (compact[enter_ssa][:stmt]::EnterNode).scope
+            elseif isexpr(stmt, :new)
                 refine_new_effects!(𝕃ₒ, compact, idx, stmt)
+            elseif is_known_call(stmt, Core._apply_iterate, compact)
+                length(stmt.args) >= 4 || continue
+                lift_apply_args!(compact, idx, stmt)
             end
             continue
         end
@@ -1105,29 +1433,27 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
             # analyze `getfield` / `isdefined` / `setfield!` call
             val = stmt.args[2]
         end
-        struct_typ = unwrap_unionall(widenconst(argextype(val, compact)))
-        if isa(struct_typ, Union) && struct_typ <: Tuple
-            struct_typ = unswitchtupleunion(struct_typ)
-        end
-        if isa(struct_typ, Union) && is_isdefined
-            lift_comparison!(isdefined, compact, idx, stmt, lifting_cache, 𝕃ₒ)
+        struct_typ = widenconst(argextype(val, compact))
+        struct_argtyp = argument_datatype(struct_typ)
+        if struct_argtyp === nothing
+            if isa(struct_typ, Union) && is_isdefined
+                lift_comparison!(isdefined, compact, idx, stmt, 𝕃ₒ)
+            end
             continue
         end
-        isa(struct_typ, DataType) || continue
+        struct_typ_name = struct_argtyp.name
 
-        struct_typ.name.atomicfields == C_NULL || continue # TODO: handle more
+        struct_typ_name.atomicfields == C_NULL || continue # TODO: handle more
         if !((field_ordering === :unspecified) ||
              (field_ordering isa Const && field_ordering.val === :not_atomic))
             continue
         end
 
-
         # analyze this mutable struct here for the later pass
-        if ismutabletype(struct_typ)
+        if ismutabletypename(struct_typ_name)
             isa(val, SSAValue) || continue
             let intermediaries = SPCSet()
-                callback = IntermediaryCollector(intermediaries)
-                def = simple_walk(compact, val, callback)
+                def = simple_walk(compact, val, IntermediaryCollector(intermediaries))
                 # Mutable stuff here
                 isa(def, SSAValue) || continue
                 if defuses === nothing
@@ -1169,8 +1495,25 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
             result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact))
         end
 
-        lifted_val = perform_lifting!(compact,
-            visited_philikes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree)
+        (lifted_val, nest) = perform_lifting!(compact,
+            visited_philikes, field, result_t, lifted_leaves, val, lazydomtree)
+
+        should_delete_node = false
+        line = compact[SSAValue(idx)][:line]
+        if lifted_val !== nothing && !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], result_t)
+            compact[idx] = lifted_val === nothing ? nothing : lifted_val.val
+            add_flag!(compact[SSAValue(idx)], IR_FLAG_REFINED)
+        elseif lifted_val === nothing || isa(lifted_val.val, AnySSAValue)
+            # Save some work in a later compaction, by inserting this into the renamer now,
+            # but only do this if we didn't set the REFINED flag, to save work for irinterp
+            # in revisiting only the renamings that came through *this* idx.
+            compact.ssa_rename[old_idx] = lifted_val === nothing ? nothing : lifted_val.val
+            should_delete_node = true
+        else
+            compact[idx] = lifted_val === nothing ? nothing : lifted_val.val
+        end
+
+        finish_phi_nest!(compact, nest)
 
         # Insert the undef check if necessary
         if any_undef
@@ -1181,23 +1524,29 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
                 for (k, v) in pairs(lifted_leaves)
                     lifted_leaves_def[k] = v === nothing ? false : true
                 end
-                def_val = perform_lifting!(compact,
-                    visited_philikes, field, def_lifting_cache, Bool, lifted_leaves_def, val, lazydomtree).val
+                (def_val, nest) = perform_lifting!(compact,
+                    visited_philikes, field, Bool, lifted_leaves_def, val, lazydomtree)
+                def_val = (def_val::LiftedValue).val
+                finish_phi_nest!(compact, nest)
+            end
+            throw_expr = Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val)
+            if should_delete_node
+                # Replace the node we already have rather than deleting/re-inserting.
+                # This way it is easier to handle BB boundary corner cases.
+                compact[SSAValue(idx)] = throw_expr
+                compact[SSAValue(idx)][:type] = Nothing
+                compact[SSAValue(idx)][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_CONSISTENT | IR_FLAG_NOUB
+                should_delete_node = false
+            else
+                ni = NewInstruction(throw_expr, Nothing, line)
+                insert_node!(compact, SSAValue(idx), ni)
             end
-            insert_node!(compact, SSAValue(idx), NewInstruction(
-                Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val), Nothing))
-
         else
             # val must be defined
             @assert lifted_val !== nothing
         end
 
-        compact[idx] = lifted_val === nothing ? nothing : lifted_val.val
-        if lifted_val !== nothing
-            if !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], result_t)
-                compact[SSAValue(idx)][:flag] |= IR_FLAG_REFINED
-            end
-        end
+        should_delete_node && delete_inst_here!(compact)
     end
 
     non_dce_finish!(compact)
@@ -1212,6 +1561,10 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
             used_ssas[x.id] -= 1
         end
         ir = complete(compact)
+        # remove any use that has been optimized away by the DCE
+        for (intermediaries, defuse) in values(defuses)
+            filter!(x -> ir[SSAValue(x.idx)][:stmt] !== nothing, defuse.uses)
+        end
         sroa_mutables!(ir, defuses, used_ssas, lazydomtree, inlining)
         return ir
     else
@@ -1223,61 +1576,63 @@ end
 # NOTE we resolve the inlining source here as we don't want to serialize `Core.Compiler`
 # data structure into the global cache (see the comment in `handle_finalizer_call!`)
 function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
-    mi::MethodInstance, @nospecialize(info::CallInfo), inlining::InliningState,
+    code::CodeInstance, @nospecialize(info::CallInfo), inlining::InliningState,
     attach_after::Bool)
-    code = get(code_cache(inlining), mi, nothing)
+    mi = get_ci_mi(code)
     et = InliningEdgeTracker(inlining)
     if code isa CodeInstance
         if use_const_api(code)
             # No code in the function - Nothing to do
-            add_inlining_backedge!(et, mi)
+            add_inlining_edge!(et, code)
             return true
         end
-        src = @atomic :monotonic code.inferred
+        # COMBAK: this has awkward and unreliable global cache effects, but
+        # this doesn't respect the bottom-up inliner order so we do not have
+        # CallInfo anymore. See `handle_finalizer_call!` too.
+        src = ci_get_source(inlining.interp, code)
     else
-        src = nothing
+        return false
     end
 
-    src = inlining_policy(inlining.interp, src, info, IR_FLAG_NULL, mi, Any[])
-    src === nothing && return false
-    src = retrieve_ir_for_inlining(mi, src)
+    src_inlining_policy(inlining.interp, mi, src, info, IR_FLAG_NULL) || return false
+    src, spec_info, di = retrieve_ir_for_inlining(code, src)
 
     # For now: Require finalizer to only have one basic block
     length(src.cfg.blocks) == 1 || return false
 
     # Ok, we're committed to inlining the finalizer
-    add_inlining_backedge!(et, mi)
+    add_inlining_edge!(et, code)
 
     # TODO: Should there be a special line number node for inlined finalizers?
-    inlined_at = ir[SSAValue(idx)][:line]
-    ((sp_ssa, argexprs), linetable_offset) = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir,
-        ir.linetable, src, mi.sparam_vals, mi, inlined_at, argexprs)
+    inline_at = ir[SSAValue(idx)][:line]
+    ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, spec_info, di, mi, inline_at, argexprs)
 
     # TODO: Use the actual inliner here rather than open coding this special purpose inliner.
-    spvals = mi.sparam_vals
     ssa_rename = Vector{Any}(undef, length(src.stmts))
     for idx′ = 1:length(src.stmts)
         inst = src[SSAValue(idx′)]
-        stmt′ = inst[:inst]
+        stmt′ = inst[:stmt]
         isa(stmt′, ReturnNode) && continue
         stmt′ = ssamap(stmt′) do ssa::SSAValue
             ssa_rename[ssa.id]
         end
-        stmt′ = ssa_substitute_op!(InsertBefore(ir, SSAValue(idx)), inst, stmt′, argexprs, mi.specTypes, mi.sparam_vals, sp_ssa, :default)
+        stmt′ = ssa_substitute_op!(InsertBefore(ir, SSAValue(idx)), inst, stmt′, ssa_substitute)
         ssa_rename[idx′] = insert_node!(ir, idx,
-            NewInstruction(inst; stmt=stmt′, line=inst[:line]+linetable_offset),
+            NewInstruction(inst; stmt=stmt′, line=(ssa_substitute.inlined_at[1], ssa_substitute.inlined_at[2], Int32(idx′))),
             attach_after)
     end
 
     return true
 end
 
-is_nothrow(ir::IRCode, ssa::SSAValue) = (ir[ssa][:flag] & IR_FLAG_NOTHROW) ≠ 0
+is_nothrow(ir::IRCode, ssa::SSAValue) = has_flag(ir[ssa], IR_FLAG_NOTHROW)
 
-function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = nothing)
+function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Int)
     worklist = Int[from_bb]
     visited = BitSet(from_bb)
-    if to_bb !== nothing
+    if to_bb == from_bb
+        return visited
+    else
         push!(visited, to_bb)
     end
     function visit!(bb::Int)
@@ -1292,166 +1647,178 @@ function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = no
     return visited
 end
 
-function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse,
+function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int, defuse::SSADefUse,
         inlining::InliningState, lazydomtree::LazyDomtree,
         lazypostdomtree::LazyPostDomtree, @nospecialize(info::CallInfo))
     # For now, require that:
     # 1. The allocation dominates the finalizer registration
-    # 2. The finalizer registration dominates all uses reachable from the
-    #    finalizer registration.
-    # 3. The insertion block for the finalizer is the post-dominator of all
-    #    uses and the finalizer registration block. The insertion block must
-    #    be dominated by the finalizer registration block.
-    # 4. The path from the finalizer registration to the finalizer inlining
+    # 2. The insertion block for the finalizer is the post-dominator of all
+    #    uses (including the finalizer registration).
+    # 3. The path from the finalizer registration to the finalizer inlining
     #    location is nothrow
     #
-    # TODO: We could relax item 3, by inlining the finalizer multiple times.
+    # TODO: We could relax the check 2, by inlining the finalizer multiple times.
 
     # Check #1: The allocation dominates the finalizer registration
     domtree = get!(lazydomtree)
     finalizer_bb = block_for_inst(ir, finalizer_idx)
-    alloc_bb = block_for_inst(ir, idx)
+    alloc_bb = block_for_inst(ir, alloc_idx)
     dominates(domtree, alloc_bb, finalizer_bb) || return nothing
 
-    bb_insert_block::Int = finalizer_bb
-    bb_insert_idx::Union{Int,Nothing} = finalizer_idx
-    function note_block_use!(usebb::Int, useidx::Int)
-        new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree),
-            bb_insert_block, usebb)
-        if new_bb_insert_block == bb_insert_block && bb_insert_idx !== nothing
-            bb_insert_idx = max(bb_insert_idx::Int, useidx)
-        elseif new_bb_insert_block == usebb
-            bb_insert_idx = useidx
+    # Check #2: The insertion block for the finalizer is the post-dominator of all uses
+    insert_bb::Int = finalizer_bb
+    insert_idx::Union{Int,Nothing} = finalizer_idx
+    function note_defuse!(x::Union{Int,SSAUse})
+        defuse_idx = x isa SSAUse ? x.idx : x
+        defuse_idx == finalizer_idx && return nothing
+        defuse_bb = block_for_inst(ir, defuse_idx)
+        new_insert_bb = nearest_common_dominator(get!(lazypostdomtree),
+            insert_bb, defuse_bb)
+        if new_insert_bb == insert_bb && insert_idx !== nothing
+            insert_idx = max(insert_idx::Int, defuse_idx)
+        elseif new_insert_bb == defuse_bb
+            insert_idx = defuse_idx
         else
-            bb_insert_idx = nothing
+            insert_idx = nothing
         end
-        bb_insert_block = new_bb_insert_block
+        insert_bb = new_insert_bb
         nothing
     end
-
-    # Collect all reachable blocks between the finalizer registration and the
-    # insertion point
-    blocks = reachable_blocks(ir.cfg, finalizer_bb, alloc_bb)
-
-    # Check #2
-    function check_defuse(x::Union{Int,SSAUse})
-        duidx = x isa SSAUse ? x.idx : x
-        duidx == finalizer_idx && return true
-        bb = block_for_inst(ir, duidx)
-        # Not reachable from finalizer registration - we're ok
-        bb ∉ blocks && return true
-        note_block_use!(bb, duidx)
-        if dominates(domtree, finalizer_bb, bb)
-            return true
-        else
-            return false
-        end
-    end
-    all(check_defuse, defuse.uses) || return nothing
-    all(check_defuse, defuse.defs) || return nothing
-
-    # Check #3
-    dominates(domtree, finalizer_bb, bb_insert_block) || return nothing
+    foreach(note_defuse!, defuse.uses)
+    foreach(note_defuse!, defuse.defs)
+    insert_bb != 0 || return nothing # verify post-dominator of all uses exists
 
     if !OptimizationParams(inlining.interp).assume_fatal_throw
         # Collect all reachable blocks between the finalizer registration and the
         # insertion point
-        blocks = finalizer_bb == bb_insert_block ? Int[finalizer_bb] :
-            reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block)
+        blocks = reachable_blocks(ir.cfg, finalizer_bb, insert_bb)
 
-        # Check #4
-        function check_range_nothrow(ir::IRCode, s::Int, e::Int)
+        # Check #3
+        function check_range_nothrow(s::Int, e::Int)
             return all(s:e) do sidx::Int
                 sidx == finalizer_idx && return true
-                sidx == idx && return true
+                sidx == alloc_idx && return true
                 return is_nothrow(ir, SSAValue(sidx))
             end
         end
         for bb in blocks
             range = ir.cfg.blocks[bb].stmts
             s, e = first(range), last(range)
-            if bb == bb_insert_block
-                bb_insert_idx === nothing && continue
-                e = bb_insert_idx
+            if bb == insert_bb
+                insert_idx === nothing && continue
+                e = insert_idx
             end
             if bb == finalizer_bb
                 s = finalizer_idx
             end
-            check_range_nothrow(ir, s, e) || return nothing
+            check_range_nothrow(s, e) || return nothing
         end
     end
 
     # Ok, legality check complete. Figure out the exact statement where we're
-    # gonna inline the finalizer.
-    loc = bb_insert_idx === nothing ? first(ir.cfg.blocks[bb_insert_block].stmts) : bb_insert_idx::Int
-    attach_after = bb_insert_idx !== nothing
+    # going to inline the finalizer.
+    loc = insert_idx === nothing ? first(ir.cfg.blocks[insert_bb].stmts) : insert_idx::Int
+    attach_after = insert_idx !== nothing
 
-    finalizer_stmt = ir[SSAValue(finalizer_idx)][:inst]
+    finalizer_stmt = ir[SSAValue(finalizer_idx)][:stmt]
     argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]]
-    flags = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
+    flag = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
     if length(finalizer_stmt.args) >= 4
         inline = finalizer_stmt.args[4]
         if inline === nothing
             # No code in the function - Nothing to do
         else
-            mi = finalizer_stmt.args[5]::MethodInstance
-            if inline::Bool && try_inline_finalizer!(ir, argexprs, loc, mi, info, inlining, attach_after)
+            ci = finalizer_stmt.args[5]::CodeInstance
+            if inline::Bool && try_inline_finalizer!(ir, argexprs, loc, ci, info, inlining, attach_after)
                 # the finalizer body has been inlined
             else
-                insert_node!(ir, loc, with_flags(NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), flags), attach_after)
+                newinst = add_flag(NewInstruction(Expr(:invoke, ci, argexprs...), Any), flag)
+                insert_node!(ir, loc, newinst, attach_after)
             end
         end
     else
-        insert_node!(ir, loc, with_flags(NewInstruction(Expr(:call, argexprs...), Nothing), flags), attach_after)
+        newinst = add_flag(NewInstruction(Expr(:call, argexprs...), Nothing), flag)
+        insert_node!(ir, loc, newinst, attach_after)
     end
     # Erase the call to `finalizer`
-    ir[SSAValue(finalizer_idx)][:inst] = nothing
+    ir[SSAValue(finalizer_idx)][:stmt] = nothing
     return nothing
 end
 
-function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState})
+function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing,InliningState})
     𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
     lazypostdomtree = LazyPostDomtree(ir)
-    for (idx, (intermediaries, defuse)) in defuses
-        intermediaries = collect(intermediaries)
-        # Check if there are any uses we did not account for. If so, the variable
-        # escapes and we cannot eliminate the allocation. This works, because we're guaranteed
-        # not to include any intermediaries that have dead uses. As a result, missing uses will only ever
-        # show up in the nuses_total count.
-        nleaves = length(defuse.uses) + length(defuse.defs)
-        nuses = 0
-        for idx in intermediaries
-            nuses += used_ssas[idx]
+    function find_finalizer_useidx(defuse::SSADefUse)
+        finalizer_useidx = nothing
+        for (useidx, use) in enumerate(defuse.uses)
+            if use.kind === :finalizer
+                # For now: Only allow one finalizer per allocation
+                finalizer_useidx !== nothing && return false
+                finalizer_useidx = useidx
+            end
+        end
+        if finalizer_useidx === nothing || inlining === nothing
+            return true
         end
-        nuses_total = used_ssas[idx] + nuses - length(intermediaries)
-        nleaves == nuses_total || continue
+        return finalizer_useidx
+    end
+    for (defidx, (intermediaries, defuse)) in defuses
         # Find the type for this allocation
-        defexpr = ir[SSAValue(idx)][:inst]
+        defexpr = ir[SSAValue(defidx)][:stmt]
         isexpr(defexpr, :new) || continue
-        newidx = idx
-        typ = unwrap_unionall(ir.stmts[newidx][:type])
+        typ = unwrap_unionall(ir.stmts[defidx][:type])
         # Could still end up here if we tried to setfield! on an immutable, which would
         # error at runtime, but is not illegal to have in the IR.
         typ = widenconst(typ)
         ismutabletype(typ) || continue
         typ = typ::DataType
-        # First check for any finalizer calls
-        finalizer_idx = nothing
-        for use in defuse.uses
-            if use.kind === :finalizer
-                # For now: Only allow one finalizer per allocation
-                finalizer_idx !== nothing && @goto skip
-                finalizer_idx = use.idx
-            end
+        # Check if there are any uses we did not account for. If so, the variable
+        # escapes and we cannot eliminate the allocation. This works, because we're guaranteed
+        # not to include any intermediaries that have dead uses. As a result, missing uses will only ever
+        # show up in the nuses_total count.
+        nleaves = length(defuse.uses) + length(defuse.defs)
+        nuses = 0
+        for iidx in intermediaries
+            nuses += used_ssas[iidx]
         end
-        if finalizer_idx !== nothing && inlining !== nothing
-            try_resolve_finalizer!(ir, idx, finalizer_idx, defuse, inlining,
-                lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
+        nuses_total = used_ssas[defidx] + nuses - length(intermediaries)
+        all_eliminated = all_forwarded = true
+        if nleaves ≠ nuses_total
+            finalizer_useidx = find_finalizer_useidx(defuse)
+            if finalizer_useidx isa Int
+                nargs = length(ir.argtypes) # COMBAK this might need to be `Int(opt.src.nargs)`
+                estate = EscapeAnalysis.analyze_escapes(ir, nargs, 𝕃ₒ, get_escape_cache(inlining.interp))
+                # disable finalizer inlining when this allocation is aliased to somewhere,
+                # mostly likely to edges of `PhiNode`
+                hasaliases = EscapeAnalysis.getaliases(SSAValue(defidx), estate) !== nothing
+                einfo = estate[SSAValue(defidx)]
+                if !hasaliases && EscapeAnalysis.has_no_escape(einfo)
+                    already = BitSet(use.idx for use in defuse.uses)
+                    for idx = einfo.Liveness
+                        if idx ∉ already
+                            push!(defuse.uses, SSAUse(:EALiveness, idx))
+                        end
+                    end
+                    finalizer_idx = defuse.uses[finalizer_useidx].idx
+                    try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState,
+                        lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
+                end
+            end
             continue
+        else
+            finalizer_useidx = find_finalizer_useidx(defuse)
+            if finalizer_useidx isa Int
+                finalizer_idx = defuse.uses[finalizer_useidx].idx
+                try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState,
+                    lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
+                deleteat!(defuse.uses, finalizer_useidx)
+                all_eliminated = all_forwarded = false # can't eliminate `setfield!` calls safely
+            elseif !finalizer_useidx
+                continue
+            end
         end
         # Partition defuses by field
         fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
-        all_eliminated = all_forwarded = true
         for use in defuse.uses
             if use.kind === :preserve
                 for du in fielddefuse
@@ -1459,7 +1826,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 end
                 continue
             end
-            stmt = ir[SSAValue(use.idx)][:inst] # == `getfield`/`isdefined` call
+            stmt = ir[SSAValue(use.idx)][:stmt] # == `getfield`/`isdefined` call
             # We may have discovered above that this use is dead
             # after the getfield elim of immutables. In that case,
             # it would have been deleted. That's fine, just ignore
@@ -1473,7 +1840,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             push!(fielddefuse[field].uses, use)
         end
         for def in defuse.defs
-            stmt = ir[SSAValue(def)][:inst]::Expr # == `setfield!` call
+            stmt = ir[SSAValue(def)][:stmt]::Expr # == `setfield!` call
             field = try_compute_fieldidx_stmt(ir, stmt, typ)
             field === nothing && @goto skip
             isconst(typ, field) && @goto skip # we discovered an attempt to mutate a const field, which must error
@@ -1484,11 +1851,11 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         # but we should come up with semantics for well defined semantics
         # for uninitialized fields first.
         ndefuse = length(fielddefuse)
-        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# BitSet}}(undef, ndefuse)
+        blocks = Vector{Tuple{#=phiblocks=#Vector{Int},#=allblocks=#BitSet}}(undef, ndefuse)
         for fidx in 1:ndefuse
             du = fielddefuse[fidx]
             isempty(du.uses) && continue
-            push!(du.defs, newidx)
+            push!(du.defs, defidx)
             ldu = compute_live_ins(ir.cfg, du)
             if isempty(ldu.live_in_bbs)
                 phiblocks = Int[]
@@ -1501,8 +1868,8 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 for i = 1:length(du.uses)
                     use = du.uses[i]
                     if use.kind === :isdefined
-                        if has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx)
-                            ir[SSAValue(use.idx)][:inst] = true
+                        if has_safe_def(ir, get!(lazydomtree), allblocks, du, defidx, use.idx)
+                            ir[SSAValue(use.idx)][:stmt] = true
                         else
                             all_eliminated = false
                         end
@@ -1514,13 +1881,13 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                             continue
                         end
                     end
-                    has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx) || @goto skip
+                    has_safe_def(ir, get!(lazydomtree), allblocks, du, defidx, use.idx) || @goto skip
                 end
             else # always have some definition at the allocation site
                 for i = 1:length(du.uses)
                     use = du.uses[i]
                     if use.kind === :isdefined
-                        ir[SSAValue(use.idx)][:inst] = true
+                        ir[SSAValue(use.idx)][:stmt] = true
                     end
                 end
             end
@@ -1544,9 +1911,10 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 # Now go through all uses and rewrite them
                 for use in du.uses
                     if use.kind === :getfield
-                        ir[SSAValue(use.idx)][:inst] = compute_value_for_use(ir, domtree, allblocks,
+                        inst = ir[SSAValue(use.idx)]
+                        inst[:stmt] = compute_value_for_use(ir, domtree, allblocks,
                             du, phinodes, fidx, use.idx)
-                        ir[SSAValue(use.idx)][:flag] |= IR_FLAG_REFINED
+                        add_flag!(inst, IR_FLAG_REFINED)
                     elseif use.kind === :isdefined
                         continue # already rewritten if possible
                     elseif use.kind === :nopreserve
@@ -1565,7 +1933,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                     end
                 end
                 for b in phiblocks
-                    n = ir[phinodes[b]][:inst]::PhiNode
+                    n = ir[phinodes[b]][:stmt]::PhiNode
                     result_t = Bottom
                     for p in ir.cfg.blocks[b].preds
                         push!(n.edges, p)
@@ -1580,19 +1948,19 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             # all "usages" (i.e. `getfield` and `isdefined` calls) are eliminated,
             # now eliminate "definitions" (i.e. `setfield!`) calls
             # (NOTE the allocation itself will be eliminated by DCE pass later)
-            for idx in du.defs
-                idx == newidx && continue # this is allocation
+            for didx in du.defs
+                didx == defidx && continue # this is allocation
                 # verify this statement won't throw, otherwise it can't be eliminated safely
-                ssa = SSAValue(idx)
-                if is_nothrow(ir, ssa)
-                    ir[ssa][:inst] = nothing
+                setfield_ssa = SSAValue(didx)
+                if is_nothrow(ir, setfield_ssa)
+                    ir[setfield_ssa][:stmt] = nothing
                 else
                     # We can't eliminate this statement, because it might still
                     # throw an error, but we can mark it as effect-free since we
                     # know we have removed all uses of the mutable allocation.
                     # As a result, if we ever do prove nothrow, we can delete
                     # this statement then.
-                    ir[ssa][:flag] |= IR_FLAG_EFFECT_FREE
+                    add_flag!(ir[setfield_ssa], IR_FLAG_EFFECT_FREE)
                 end
             end
         end
@@ -1601,11 +1969,11 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             # this means all ccall preserves have been replaced with forwarded loads
             # so we can potentially eliminate the allocation, otherwise we must preserve
             # the whole allocation.
-            push!(intermediaries, newidx)
+            push!(intermediaries, defidx)
         end
         # Insert the new preserves
         for (useidx, new_preserves) in preserve_uses
-            ir[SSAValue(useidx)][:inst] = form_new_preserves(ir[SSAValue(useidx)][:inst]::Expr,
+            ir[SSAValue(useidx)][:stmt] = form_new_preserves(ir[SSAValue(useidx)][:stmt]::Expr,
                 intermediaries, new_preserves)
         end
 
@@ -1613,7 +1981,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
     end
 end
 
-function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preserves::Vector{Any})
+function form_new_preserves(origex::Expr, intermediaries::Union{Vector{Int},SPCSet}, new_preserves::Vector{Any})
     newex = Expr(:foreigncall)
     nccallargs = length(origex.args[3]::SimpleVector)
     for i in 1:(6+nccallargs-1)
@@ -1622,7 +1990,7 @@ function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preser
     for i in (6+nccallargs):length(origex.args)
         x = origex.args[i]
         # don't need to preserve intermediaries
-        if isa(x, SSAValue) && x.id in intermediates
+        if isa(x, SSAValue) && x.id in intermediaries
             continue
         end
         push!(newex.args, x)
@@ -1656,7 +2024,7 @@ end
 
 function adce_erase!(phi_uses::Vector{Int}, extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, in_worklist::Bool)
     # return whether this made a change
-    if isa(compact.result[idx][:inst], PhiNode)
+    if isa(compact.result[idx][:stmt], PhiNode)
         return maybe_erase_unused!(compact, idx, in_worklist, extra_worklist) do val::SSAValue
             phi_uses[val.id] -= 1
         end
@@ -1671,10 +2039,10 @@ function mark_phi_cycles!(compact::IncrementalCompact, safe_phis::SPCSet, phi::I
     while !isempty(worklist)
         phi = pop!(worklist)
         push!(safe_phis, phi)
-        for ur in userefs(compact.result[phi][:inst])
+        for ur in userefs(compact.result[phi][:stmt])
             val = ur[]
             isa(val, SSAValue) || continue
-            isa(compact[val][:inst], PhiNode) || continue
+            isa(compact[val][:stmt], PhiNode) || continue
             (val.id in safe_phis) && continue
             push!(worklist, val.id)
         end
@@ -1687,7 +2055,7 @@ end
 
 function is_union_phi(compact::IncrementalCompact, idx::Int)
     inst = compact.result[idx]
-    isa(inst[:inst], PhiNode) || return false
+    isa(inst[:stmt], PhiNode) || return false
     return is_some_union(inst[:type])
 end
 
@@ -1739,9 +2107,15 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
     phi_uses = fill(0, length(ir.stmts) + length(ir.new_nodes))
     all_phis = Int[]
     unionphis = Pair{Int,Any}[] # sorted
-    compact = IncrementalCompact(ir)
-    for ((_, idx), stmt) in compact
+    compact = IncrementalCompact(ir, true)
+    made_changes = false
+    for ((old_idx, idx), stmt) in compact
         if isa(stmt, PhiNode)
+            if reprocess_phi_node!(𝕃ₒ, compact, stmt, old_idx)
+                # Phi node has a single predecessor and was deleted
+                made_changes = true
+                continue
+            end
             push!(all_phis, idx)
             if is_some_union(compact.result[idx][:type])
                 push!(unionphis, Pair{Int,Any}(idx, Union{}))
@@ -1759,9 +2133,9 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         else
             if is_known_call(stmt, typeassert, compact) && length(stmt.args) == 3
                 # nullify safe `typeassert` calls
-                ty, isexact = instanceof_tfunc(argextype(stmt.args[3], compact))
+                ty, isexact = instanceof_tfunc(argextype(stmt.args[3], compact), true)
                 if isexact && ⊑(𝕃ₒ, argextype(stmt.args[2], compact), ty)
-                    compact[idx] = nothing
+                    delete_inst_here!(compact)
                     continue
                 end
             end
@@ -1779,11 +2153,11 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
     non_dce_finish!(compact)
     for phi in all_phis
         inst = compact.result[phi]
-        for ur in userefs(inst[:inst]::PhiNode)
+        for ur in userefs(inst[:stmt]::PhiNode)
             use = ur[]
             if isa(use, SSAValue)
                 phi_uses[use.id] += 1
-                stmt = compact.result[use.id][:inst]
+                stmt = compact.result[use.id][:stmt]
                 if isa(stmt, PhiNode)
                     r = searchsorted(unionphis, use.id; by=first)
                     if !isempty(r)
@@ -1800,17 +2174,19 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         unionphi = unionphis[i]
         phi = unionphi[1]
         t = unionphi[2]
+        inst = compact.result[phi]
         if t === Union{}
-            stmt = compact[SSAValue(phi)][:inst]::PhiNode
+            stmt = inst[:stmt]::PhiNode
             kill_phi!(compact, phi_uses, 1:length(stmt.values), SSAValue(phi), stmt, true)
+            made_changes = true
             continue
         elseif t === Any
             continue
-        elseif ⊑(𝕃ₒ, compact.result[phi][:type], t)
-            continue
         end
+        ⊏ = strictpartialorder(𝕃ₒ)
+        t ⊏ inst[:type] || continue
         to_drop = Int[]
-        stmt = compact[SSAValue(phi)][:inst]
+        stmt = inst[:stmt]
         stmt === nothing && continue
         stmt = stmt::PhiNode
         for i = 1:length(stmt.values)
@@ -1822,18 +2198,20 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
                 push!(to_drop, i)
             end
         end
-        compact.result[phi][:type] = t
+        inst[:type] = t
+        add_flag!(inst, IR_FLAG_REFINED) # t ⊏ inst[:type]
         kill_phi!(compact, phi_uses, to_drop, SSAValue(phi), stmt, false)
+        made_changes = true
     end
     # Perform simple DCE for unused values
     extra_worklist = Int[]
     for (idx, nused) in Iterators.enumerate(compact.used_ssas)
         idx >= compact.result_idx && break
         nused == 0 || continue
-        adce_erase!(phi_uses, extra_worklist, compact, idx, false)
+        made_changes |= adce_erase!(phi_uses, extra_worklist, compact, idx, false)
     end
     while !isempty(extra_worklist)
-        adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
+        made_changes |= adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
     end
     # Go back and erase any phi cycles
     changed = true
@@ -1854,16 +2232,18 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         while !isempty(extra_worklist)
             if adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
                 changed = true
+                made_changes = true
             end
         end
     end
-    return complete(compact)
+
+    return Pair{IRCode, Bool}(complete(compact), made_changes)
 end
 
 function is_bb_empty(ir::IRCode, bb::BasicBlock)
     isempty(bb.stmts) && return true
     if length(bb.stmts) == 1
-        stmt = ir[SSAValue(first(bb.stmts))][:inst]
+        stmt = ir[SSAValue(first(bb.stmts))][:stmt]
         return stmt === nothing || isa(stmt, GotoNode)
     end
     return false
@@ -1873,14 +2253,6 @@ end
 function is_legal_bb_drop(ir::IRCode, bbidx::Int, bb::BasicBlock)
     # For the time being, don't drop the first bb, because it has special predecessor semantics.
     bbidx == 1 && return false
-    # If the block we're going to is the same as the fallthrow, it's always legal to drop
-    # the block.
-    length(bb.stmts) == 0 && return true
-    if length(bb.stmts) == 1
-        stmt = ir[SSAValue(first(bb.stmts))][:inst]
-        stmt === nothing && return true
-        ((stmt::GotoNode).label == bbidx + 1) && return true
-    end
     return true
 end
 
@@ -1893,23 +2265,23 @@ function legalize_bb_drop_pred!(ir::IRCode, bb::BasicBlock, bbidx::Int, bbs::Vec
         dbi -= 1
     end
     last_fallthrough_term_ssa = SSAValue(last(bbs[last_fallthrough].stmts))
-    terminator = ir[last_fallthrough_term_ssa][:inst]
+    terminator = ir[last_fallthrough_term_ssa][:stmt]
     if isa(terminator, GotoIfNot)
         if terminator.dest != bbidx
             # The previous terminator's destination matches our fallthrough.
             # If we're also a fallthrough terminator, then we just have
             # to delete the GotoIfNot.
-            our_terminator = ir[SSAValue(last(bb.stmts))][:inst]
+            our_terminator = ir[SSAValue(last(bb.stmts))][:stmt]
             if terminator.dest != (isa(our_terminator, GotoNode) ? our_terminator.label : bbidx + 1)
                 return false
             end
         end
         ir[last_fallthrough_term_ssa] = nothing
         kill_edge!(bbs, last_fallthrough, terminator.dest)
-    elseif isexpr(terminator, :enter)
-        return false
     elseif isa(terminator, GotoNode)
         return true
+    elseif isterminator(terminator)
+        return false
     end
     # Hack, but effective. If we have a predecessor with a fall-through terminator, change the
     # instruction numbering to merge the blocks now such that below processing will properly
@@ -1918,59 +2290,109 @@ function legalize_bb_drop_pred!(ir::IRCode, bb::BasicBlock, bbidx::Int, bbs::Vec
     return true
 end
 
-is_terminator(@nospecialize(inst)) = isa(inst, GotoNode) || isa(inst, GotoIfNot) || isexpr(inst, :enter)
+function follow_map(map::Vector{Int}, idx::Int)
+    while map[idx] ≠ 0
+        idx = map[idx]
+    end
+    return idx
+end
 
-function cfg_simplify!(ir::IRCode)
-    bbs = ir.cfg.blocks
-    merge_into = zeros(Int, length(bbs))
-    merged_succ = zeros(Int, length(bbs))
-    dropped_bbs = Vector{Int}() # sorted
-    function follow_merge_into(idx::Int)
-        while merge_into[idx] != 0
-            idx = merge_into[idx]
-        end
-        return idx
+function ascend_eliminated_preds(bbs::Vector{BasicBlock}, pred::Int)
+    pred == 0 && return pred
+    while pred != 1 && length(bbs[pred].preds) == 1 && length(bbs[pred].succs) == 1
+        pred = bbs[pred].preds[1]
     end
-    function follow_merged_succ(idx::Int)
-        while merged_succ[idx] != 0
-            idx = merged_succ[idx]
+    return pred
+end
+
+# Compute (renamed) successors and predecessors given (renamed) block
+function compute_succs(merged_succ::Vector{Int}, bbs::Vector{BasicBlock}, result_bbs::Vector{Int}, bb_rename_succ::Vector{Int}, i::Int)
+    orig_bb = follow_map(merged_succ, result_bbs[i])
+    return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs]
+end
+
+function compute_preds(bbs::Vector{BasicBlock}, result_bbs::Vector{Int}, bb_rename_pred::Vector{Int}, i::Int)
+    orig_bb = result_bbs[i]
+    preds = copy(bbs[orig_bb].preds)
+    res = Int[]
+    while !isempty(preds)
+        pred = popfirst!(preds)
+        if pred == 0
+            push!(res, 0)
+            continue
+        end
+        r = bb_rename_pred[pred]
+        (r == -2 || r == -1) && continue
+        if r == -3
+            prepend!(preds, bbs[pred].preds)
+        else
+            push!(res, r)
         end
-        return idx
     end
-    function ascend_eliminated_preds(pred)
-        while pred != 1 && length(bbs[pred].preds) == 1 && length(bbs[pred].succs) == 1
-            pred = bbs[pred].preds[1]
+    return res
+end
+
+function add_preds!(all_new_preds::Vector{Int32}, bbs::Vector{BasicBlock}, bb_rename_pred::Vector{Int}, old_edge::Int32)
+    preds = copy(bbs[old_edge].preds)
+    while !isempty(preds)
+        old_edge′ = popfirst!(preds)
+        if old_edge′ == 0
+            push!(all_new_preds, old_edge′)
+            continue
+        end
+        new_edge = bb_rename_pred[old_edge′]
+        if new_edge > 0 && new_edge ∉ all_new_preds
+            push!(all_new_preds, Int32(new_edge))
+        elseif new_edge == -3
+            prepend!(preds, bbs[old_edge′].preds)
         end
-        return pred
     end
+end
+
+function cfg_simplify!(ir::IRCode)
+    bbs = ir.cfg.blocks
+    merge_into = zeros(Int, length(bbs))
+    merged_succ = zeros(Int, length(bbs))
+    dropped_bbs = Vector{Int}() # sorted
 
     # Walk the CFG from the entry block and aggressively combine blocks
     for (idx, bb) in enumerate(bbs)
         if length(bb.succs) == 1
             succ = bb.succs[1]
             if length(bbs[succ].preds) == 1 && succ != 1
-                # Can't merge blocks with :enter terminator even if they
-                # only have one successor.
-                if isexpr(ir[SSAValue(last(bb.stmts))][:inst], :enter)
+                # Can't merge blocks with a non-GotoNode terminator, even if they
+                # only have one successor, because it would not be legal to have that
+                # terminator in the middle of a basic block.
+                terminator = ir[SSAValue(last(bb.stmts))][:stmt]
+                if !isa(terminator, GotoNode) && isterminator(terminator)
                     continue
                 end
                 # Prevent cycles by making sure we don't end up back at `idx`
                 # by following what is to be merged into `succ`
-                if follow_merged_succ(succ) != idx
+                if follow_map(merged_succ, succ) != idx
                     merge_into[succ] = idx
                     merged_succ[idx] = succ
                 end
             elseif merge_into[idx] == 0 && is_bb_empty(ir, bb) && is_legal_bb_drop(ir, idx, bb)
                 # If this BB is empty, we can still merge it as long as none of our successor's phi nodes
                 # reference our predecessors.
+                #
+                # This is for situations like:
+                #   #1 - ...
+                #        goto #3 if not ...
+                #   #2 - (empty)
+                #   #3 - ϕ(#2 => true, #1 => false)
+                #
+                # where we rely on the empty basic block to disambiguate the ϕ-node's value
+
                 found_interference = false
-                preds = Int[ascend_eliminated_preds(pred) for pred in bb.preds]
+                preds = Int[ascend_eliminated_preds(bbs, pred) for pred in bb.preds]
                 for idx in bbs[succ].stmts
-                    stmt = ir[SSAValue(idx)][:inst]
+                    stmt = ir[SSAValue(idx)][:stmt]
                     stmt === nothing && continue
                     isa(stmt, PhiNode) || break
                     for edge in stmt.edges
-                        edge = ascend_eliminated_preds(edge)
+                        edge = ascend_eliminated_preds(bbs, Int(edge))
                         for pred in preds
                             if pred == edge
                                 found_interference = true
@@ -1989,14 +2411,14 @@ function cfg_simplify!(ir::IRCode)
 
     # Assign new BB numbers in DFS order, dropping unreachable blocks
     max_bb_num = 1
-    bb_rename_succ = fill(0, length(bbs))
+    bb_rename_succ = zeros(Int, length(bbs))
     worklist = BitSetBoundedMinPrioritySet(length(bbs))
     push!(worklist, 1)
     while !isempty(worklist)
         i = popfirst!(worklist)
         # Drop blocks that will be merged away
         if merge_into[i] != 0
-            bb_rename_succ[i] = -1
+            bb_rename_succ[i] = typemin(Int)
         end
         # Mark dropped blocks for fixup
         if !isempty(searchsorted(dropped_bbs, i))
@@ -2016,20 +2438,36 @@ function cfg_simplify!(ir::IRCode)
                 # we have to schedule that block next
                 while merged_succ[curr] != 0
                     if bb_rename_succ[curr] == 0
-                        bb_rename_succ[curr] = -1
+                        bb_rename_succ[curr] = typemin(Int)
                     end
                     curr = merged_succ[curr]
                 end
-                terminator = ir.stmts[ir.cfg.blocks[curr].stmts[end]][:inst]
-                if isa(terminator, GotoNode) || isa(terminator, ReturnNode)
-                    break
-                elseif isa(terminator, GotoIfNot)
+                terminator = ir[SSAValue(bbs[curr].stmts[end])][:stmt]
+
+                if isa(terminator, GotoIfNot)
                     if bb_rename_succ[terminator.dest] == 0
                         push!(worklist, terminator.dest)
                     end
-                elseif isexpr(terminator, :enter)
-                    if bb_rename_succ[terminator.args[1]] == 0
-                        push!(worklist, terminator.args[1])
+                elseif isa(terminator, EnterNode)
+                    catchbb = terminator.catch_dest
+                    if catchbb ≠ 0
+                        if bb_rename_succ[catchbb] == 0
+                            push!(worklist, catchbb)
+                        end
+                    end
+                elseif isa(terminator, GotoNode) || isa(terminator, ReturnNode)
+                    # No implicit fall through. Schedule from work list.
+                    break
+                else
+                    is_bottom = ir[SSAValue(bbs[curr].stmts[end])][:type] === Union{}
+                    if is_bottom && !isa(terminator, PhiNode) && terminator !== nothing
+                        # If this is a regular statement (not PhiNode/GotoNode/GotoIfNot
+                        # or the `nothing` special case deletion marker),
+                        # and the type is Union{}, then this may be a terminator.
+                        # Ordinarily we normalize with ReturnNode(), but this is not
+                        # required. In any case, we do not fall through, so we
+                        # do not need to schedule the fall-through block.
+                        break
                     end
                 end
                 ncurr = curr + 1
@@ -2055,9 +2493,9 @@ function cfg_simplify!(ir::IRCode)
         resolved_all = true
         for bb in dropped_bbs
             obb = bb_rename_succ[bb]
-            if obb < -1
+            if obb < 0 && obb != typemin(Int)
                 nsucc = bb_rename_succ[-obb]
-                if nsucc == -1
+                if nsucc == typemin(Int)
                     nsucc = -merge_into[-obb]
                 end
                 bb_rename_succ[bb] = nsucc
@@ -2070,8 +2508,10 @@ function cfg_simplify!(ir::IRCode)
     bb_rename_pred = zeros(Int, length(bbs))
     for i = 1:length(bbs)
         if bb_rename_succ[i] == 0
-            bb_rename_succ[i] = -1
+            bb_rename_succ[i] = -2
             bb_rename_pred[i] = -2
+        elseif bb_rename_succ[i] == typemin(Int)
+            bb_rename_succ[i] = -2
         end
     end
 
@@ -2115,7 +2555,7 @@ function cfg_simplify!(ir::IRCode)
         elseif is_multi
             bb_rename_pred[i] = -3
         else
-            bbnum = follow_merge_into(pred)
+            bbnum = follow_map(merge_into, pred)
             bb_rename_pred[i] = bb_rename_succ[bbnum]
         end
     end
@@ -2137,59 +2577,23 @@ function cfg_simplify!(ir::IRCode)
         bb_starts[i+1] = bb_starts[i] + result_bbs_lengths[i]
     end
 
-    cresult_bbs = let result_bbs = result_bbs,
-                      merged_succ = merged_succ,
-                      merge_into = merge_into,
-                      bbs = bbs,
-                      bb_rename_succ = bb_rename_succ
-
-        # Compute (renamed) successors and predecessors given (renamed) block
-        function compute_succs(i::Int)
-            orig_bb = follow_merged_succ(result_bbs[i])
-            return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs]
-        end
-        function compute_preds(i::Int)
-            orig_bb = result_bbs[i]
-            preds = bbs[orig_bb].preds
-            res = Int[]
-            function scan_preds!(preds::Vector{Int})
-                for pred in preds
-                    if pred == 0
-                        push!(res, 0)
-                        continue
-                    end
-                    r = bb_rename_pred[pred]
-                    (r == -2 || r == -1) && continue
-                    if r == -3
-                        scan_preds!(bbs[pred].preds)
-                    else
-                        push!(res, r)
-                    end
-                end
-            end
-            scan_preds!(preds)
-            return res
-        end
-
-        BasicBlock[
-            BasicBlock(StmtRange(bb_starts[i],
-                                 i+1 > length(bb_starts) ?
-                                    length(compact.result) : bb_starts[i+1]-1),
-                       compute_preds(i),
-                       compute_succs(i))
-            for i = 1:length(result_bbs)]
-    end
+    cresult_bbs = BasicBlock[
+        BasicBlock(StmtRange(bb_starts[i],
+                             i+1 > length(bb_starts) ? length(compact.result) : bb_starts[i+1]-1),
+                   compute_preds(bbs, result_bbs, bb_rename_pred, i),
+                   compute_succs(merged_succ, bbs, result_bbs, bb_rename_succ, i))
+        for i = 1:length(result_bbs)]
 
     # Fixup terminators for any blocks that would have caused double edges
     for (bbidx, (new_bb, old_bb)) in enumerate(zip(cresult_bbs, result_bbs))
         @assert length(new_bb.succs) <= 2
         length(new_bb.succs) <= 1 && continue
         if new_bb.succs[1] == new_bb.succs[2]
-            old_bb2 = findfirst(x::Int->x==bbidx, bb_rename_pred)
+            old_bb2 = findfirst(x::Int->x==bbidx, bb_rename_pred)::Int
             terminator = ir[SSAValue(last(bbs[old_bb2].stmts))]
-            @assert terminator[:inst] isa GotoIfNot
+            @assert terminator[:stmt] isa GotoIfNot
             # N.B.: The dest will be renamed in process_node! below
-            terminator[:inst] = GotoNode(terminator[:inst].dest)
+            terminator[:stmt] = GotoNode(terminator[:stmt].dest::Int)
             pop!(new_bb.succs)
             new_succ = cresult_bbs[new_bb.succs[1]]
             for (i, nsp) in enumerate(new_succ.preds)
@@ -2204,83 +2608,85 @@ function cfg_simplify!(ir::IRCode)
     # Run instruction compaction to produce the result,
     # but we're messing with the CFG
     # so we don't want compaction to do so independently
-    compact = IncrementalCompact(ir, CFGTransformState(true, false, cresult_bbs, bb_rename_pred, bb_rename_succ))
+    compact = IncrementalCompact(ir, CFGTransformState(true, false, cresult_bbs, bb_rename_pred, bb_rename_succ, nothing))
     result_idx = 1
     for (idx, orig_bb) in enumerate(result_bbs)
         ms = orig_bb
         bb_start = true
         while ms != 0
-            for i in bbs[ms].stmts
+            old_bb_stmts = bbs[ms].stmts
+            for i in old_bb_stmts
                 node = ir.stmts[i]
                 compact.result[compact.result_idx] = node
-                if isa(node[:inst], GotoNode) && merged_succ[ms] != 0
+                stmt = node[:stmt]
+                if isa(stmt, GotoNode) && merged_succ[ms] != 0
                     # If we merged a basic block, we need remove the trailing GotoNode (if any)
-                    compact.result[compact.result_idx][:inst] = nothing
-                elseif isa(node[:inst], PhiNode)
-                    phi = node[:inst]
+                    compact.result[compact.result_idx][:stmt] = nothing
+                elseif isa(stmt, PhiNode)
+                    phi = stmt
                     values = phi.values
                     (; ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact
                     ssa_rename[i] = SSAValue(compact.result_idx)
-                    processed_idx = i
-                    renamed_values = process_phinode_values(values, late_fixup, processed_idx, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true, nothing)
+                    already_inserted = function (branch::Int, val::OldSSAValue)
+                        if val.id in old_bb_stmts
+                            return val.id <= i
+                        end
+                        return 0 < bb_rename_pred[phi.edges[branch]] < idx
+                    end
                     edges = Int32[]
                     values = Any[]
-                    sizehint!(edges, length(phi.edges)); sizehint!(values, length(renamed_values))
+                    sizehint!(edges, length(phi.edges)); sizehint!(values, length(phi.values))
                     for old_index in 1:length(phi.edges)
                         old_edge = phi.edges[old_index]
                         new_edge = bb_rename_pred[old_edge]
                         if new_edge > 0
                             push!(edges, new_edge)
-                            if isassigned(renamed_values, old_index)
-                                push!(values, renamed_values[old_index])
+                            if isassigned(phi.values, old_index)
+                                val = process_phinode_value(phi.values, old_index, late_fixup, already_inserted, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true, nothing)
+                                push!(values, val)
                             else
                                 resize!(values, length(values)+1)
                             end
+                        elseif new_edge == -1
+                            @assert length(phi.edges) == 1
+                            if isassigned(phi.values, old_index)
+                                val = process_phinode_value(phi.values, old_index, late_fixup, already_inserted, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true, nothing)
+                                push!(edges, -1)
+                                push!(values, val)
+                            end
                         elseif new_edge == -3
                             # Multiple predecessors, we need to expand out this phi
                             all_new_preds = Int32[]
-                            function add_preds!(old_edge)
-                                for old_edge′ in bbs[old_edge].preds
-                                    new_edge = bb_rename_pred[old_edge′]
-                                    if new_edge > 0 && !in(new_edge, all_new_preds)
-                                        push!(all_new_preds, new_edge)
-                                    elseif new_edge == -3
-                                        add_preds!(old_edge′)
-                                    end
-                                end
-                            end
-                            add_preds!(old_edge)
+                            add_preds!(all_new_preds, bbs, bb_rename_pred, old_edge)
                             append!(edges, all_new_preds)
-                            if isassigned(renamed_values, old_index)
-                                val = renamed_values[old_index]
-                                for _ in 1:length(all_new_preds)
-                                    push!(values, val)
-                                end
-                                length(all_new_preds) == 0 && kill_current_use!(compact, val)
-                                for _ in 2:length(all_new_preds)
-                                    count_added_node!(compact, val)
+                            np = length(all_new_preds)
+                            if np > 0
+                                if isassigned(phi.values, old_index)
+                                    val = process_phinode_value(phi.values, old_index, late_fixup, already_inserted, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true, nothing)
+                                    for p in 1:np
+                                        push!(values, val)
+                                        p > 2 && count_added_node!(compact, val)
+                                    end
+                                else
+                                    resize!(values, length(values)+np)
                                 end
-                            else
-                                resize!(values, length(values)+length(all_new_preds))
                             end
-                        else
-                            isassigned(renamed_values, old_index) && kill_current_use!(compact, renamed_values[old_index])
                         end
                     end
                     if length(edges) == 0 || (length(edges) == 1 && !isassigned(values, 1))
-                        compact.result[compact.result_idx][:inst] = nothing
+                        compact.result[compact.result_idx][:stmt] = nothing
                     elseif length(edges) == 1 && !bb_start
-                        compact.result[compact.result_idx][:inst] = values[1]
+                        compact.result[compact.result_idx][:stmt] = values[1]
                     else
                         @assert bb_start
-                        compact.result[compact.result_idx][:inst] = PhiNode(edges, values)
+                        compact.result[compact.result_idx][:stmt] = PhiNode(edges, values)
                     end
                 else
                     ri = process_node!(compact, compact.result_idx, node, i, i, ms, true)
                     if ri == compact.result_idx
                         # process_node! wanted this statement dropped. We don't do this,
                         # but we still need to erase the node
-                        compact.result[compact.result_idx][:inst] = nothing
+                        compact.result[compact.result_idx][:stmt] = nothing
                     end
                 end
                 # We always increase the result index to ensure a predicatable
diff --git a/base/compiler/ssair/show.jl b/Compiler/src/ssair/show.jl
similarity index 60%
rename from base/compiler/ssair/show.jl
rename to Compiler/src/ssair/show.jl
index b420eb32b1205..2947a381be959 100644
--- a/base/compiler/ssair/show.jl
+++ b/Compiler/src/ssair/show.jl
@@ -1,19 +1,21 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# This file is not loaded into `Core.Compiler` but rather loaded into the context of
-# `Base.IRShow` and thus does not participate in bootstrapping.
+# This file does not participate in bootstrapping, but is included in the system image by
+# being loaded from `base/show.jl`. Compiler.jl as the standard library will simply include
+# this file in the context of `Compiler.IRShow`.
 
-@nospecialize
+using Base, Core.IR
 
-if Pair != Base.Pair
-import Base: Base, IOContext, string, join, sprint
-IOContext(io::IO, KV::Pair) = IOContext(io, Base.Pair(KV[1], KV[2]))
-length(s::String) = Base.length(s)
-^(s::String, i::Int) = Base.:^(s, i)
-end
+import Base: show
+using Base: isexpr, prec_decl, show_unquoted, with_output_color
+using .Compiler: ALWAYS_FALSE, ALWAYS_TRUE, argextype, BasicBlock, block_for_inst,
+    CachedMethodTable, CFG, compute_basic_blocks, DebugInfoStream, Effects,
+    EMPTY_SPTYPES, getdebugidx, IncrementalCompact, InferenceResult, InferenceState,
+    InvalidIRError, IRCode, LimitedAccuracy, NativeInterpreter, scan_ssa_use!,
+    singleton_type, sptypes_from_meth_instance, StmtRange, Timings, VarState, widenconst,
+    get_ci_mi, get_ci_abi
 
-import Base: show_unquoted
-using Base: printstyled, with_output_color, prec_decl, @invoke
+@nospecialize
 
 function Base.show(io::IO, cfg::CFG)
     print(io, "CFG with $(length(cfg.blocks)) blocks:")
@@ -31,7 +33,50 @@ function Base.show(io::IO, cfg::CFG)
     end
 end
 
-function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxlength_idx::Int, color::Bool, show_type::Bool)
+function maybe_argextype(
+    @nospecialize(x),
+    src::Union{IRCode,IncrementalCompact,CodeInfo},
+    sptypes::Vector{VarState},
+)
+    return try
+        argextype(x, src, sptypes)
+    catch err
+        !(err isa InvalidIRError) && rethrow()
+        nothing
+    end
+end
+
+const inlined_apply_iterate_types = Union{Array,Memory,Tuple,NamedTuple,Core.SimpleVector}
+
+function builtin_call_has_dispatch(
+    @nospecialize(f),
+    args::Vector{Any},
+    src::Union{IRCode,IncrementalCompact,CodeInfo},
+    sptypes::Vector{VarState},
+)
+    if f === Core._apply_iterate && length(args) >= 3
+        # The implementation of _apply_iterate has hand-inlined implementations
+        # for <builtin>(v::Union{Tuple,NamedTuple,Memory,Array,SimpleVector}...)
+        # which perform no dynamic dispatch
+        constructort = maybe_argextype(args[3], src, sptypes)
+        if constructort === nothing || !(widenconst(constructort) <: Core.Builtin)
+            return true
+        end
+        for arg in args[4:end]
+            argt = maybe_argextype(arg, src, sptypes)
+            if argt === nothing || !(widenconst(argt) <: inlined_apply_iterate_types)
+                return true
+            end
+        end
+    elseif (f === Core.invoke_in_world || f === Core._call_in_world_total || f === Core.invokelatest)
+        # These apply-like builtins are effectively dynamic calls
+        return true
+    end
+    return false
+end
+
+function print_stmt(io::IO, idx::Int, @nospecialize(stmt), code::Union{IRCode,CodeInfo,IncrementalCompact},
+                    sptypes::Vector{VarState}, used::BitSet, maxlength_idx::Int, color::Bool, show_type::Bool, label_dynamic_calls::Bool)
     if idx in used
         idx_s = string(idx)
         pad = " "^(maxlength_idx - length(idx_s) + 1)
@@ -48,16 +93,42 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         print(io, ", ")
         print(io, stmt.typ)
         print(io, ")")
-    elseif isexpr(stmt, :invoke) && length(stmt.args) >= 2 && isa(stmt.args[1], MethodInstance)
+    elseif isexpr(stmt, :invoke) && length(stmt.args) >= 2 && isa(stmt.args[1], Union{MethodInstance,CodeInstance})
         stmt = stmt::Expr
         # TODO: why is this here, and not in Base.show_unquoted
-        print(io, "invoke ")
-        linfo = stmt.args[1]::Core.MethodInstance
-        show_unquoted(io, stmt.args[2], indent)
-        print(io, "(")
+        ci = stmt.args[1]
+        if ci isa Core.CodeInstance
+            printstyled(io, "   invoke "; color = :light_black)
+            mi = get_ci_mi(ci)
+            abi = get_ci_abi(ci)
+        else
+            printstyled(io, "dynamic invoke "; color = :yellow)
+            abi = (ci::Core.MethodInstance).specTypes
+        end
         # XXX: this is wrong if `sig` is not a concretetype method
         # more correct would be to use `fieldtype(sig, i)`, but that would obscure / discard Varargs information in show
-        sig = linfo.specTypes == Tuple ? Core.svec() : Base.unwrap_unionall(linfo.specTypes).parameters::Core.SimpleVector
+        sig = abi == Tuple ? Core.svec() : Base.unwrap_unionall(abi).parameters::Core.SimpleVector
+        f = stmt.args[2]
+        ft = maybe_argextype(f, code, sptypes)
+
+        # We can elide the type for arg0 if it...
+        skip_ftype = (length(sig) == 0) # doesn't exist...
+        skip_ftype = skip_ftype || (
+            # ... or, f prints as a user-accessible value...
+            (f isa GlobalRef) &&
+            # ... and matches the value of the singleton type of the invoked MethodInstance
+            (singleton_type(ft) === singleton_type(sig[1]) !== nothing)
+        )
+        if skip_ftype
+            show_unquoted(io, f, indent)
+        else
+            print(io, "(")
+            show_unquoted(io, f, indent)
+            print(io, "::", sig[1], ")")
+        end
+
+        # Print the remaining arguments (with type annotations from the invoked MethodInstance)
+        print(io, "(")
         print_arg(i) = sprint(; context=io) do io
             show_unquoted(io, stmt.args[i], indent)
             if (i - 1) <= length(sig)
@@ -66,19 +137,42 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         end
         join(io, (print_arg(i) for i = 3:length(stmt.args)), ", ")
         print(io, ")")
+        # TODO: if we have a CodeInstance, should we print that rettype info here, which may differ (wider or narrower than the ssavaluetypes)
+    elseif isexpr(stmt, :call) && length(stmt.args) >= 1 && label_dynamic_calls
+        ft = maybe_argextype(stmt.args[1], code, sptypes)
+        f = singleton_type(ft)
+        if isa(f, Core.IntrinsicFunction)
+            printstyled(io, "intrinsic "; color = :light_black)
+        elseif isa(f, Core.Builtin)
+            if builtin_call_has_dispatch(f, stmt.args, code, sptypes)
+                printstyled(io, "dynamic builtin "; color = :yellow)
+            else
+                printstyled(io, "  builtin "; color = :light_black)
+            end
+        elseif ft === nothing
+            # This should only happen when, e.g., printing a call that targets
+            # an out-of-bounds SSAValue or similar
+            # (i.e. under normal circumstances, dead code)
+            printstyled(io, "  unknown "; color = :light_black)
+        elseif widenconst(ft) <: Core.Builtin
+            printstyled(io, "dynamic builtin "; color = :yellow)
+        else
+            printstyled(io, "  dynamic "; color = :yellow)
+        end
+        show_unquoted(io, stmt, indent, show_type ? prec_decl : 0)
     # given control flow information, we prefer to print these with the basic block #, instead of the ssa %
-    elseif isexpr(stmt, :enter) && length((stmt::Expr).args) == 1 && (stmt::Expr).args[1] isa Int
-        print(io, "\$(Expr(:enter, #", (stmt::Expr).args[1]::Int, "))")
+    elseif isa(stmt, EnterNode)
+        print(io, "enter #", stmt.catch_dest, "")
+        if isdefined(stmt, :scope)
+            print(io, " with scope ")
+            show_unquoted(io, stmt.scope, indent)
+        end
     elseif stmt isa GotoNode
         print(io, "goto #", stmt.label)
     elseif stmt isa PhiNode
-        show_unquoted_phinode(io, stmt, indent, "#")
+        Base.show_unquoted_phinode(io, stmt, indent, "#")
     elseif stmt isa GotoIfNot
-        show_unquoted_gotoifnot(io, stmt, indent, "#")
-    elseif stmt isa TypedSlot
-        # call `show` with the type set to Any so it will not be shown, since
-        # we will show the type ourselves.
-        show_unquoted(io, SlotNumber(stmt.id), indent, show_type ? prec_decl : 0)
+        Base.show_unquoted_gotoifnot(io, stmt, indent, "#")
     # everything else in the IR, defer to the generic AST printer
     else
         show_unquoted(io, stmt, indent, show_type ? prec_decl : 0)
@@ -86,83 +180,13 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
     nothing
 end
 
-show_unquoted(io::IO, val::Argument, indent::Int, prec::Int) = show_unquoted(io, Core.SlotNumber(val.n), indent, prec)
-
-show_unquoted(io::IO, stmt::PhiNode, indent::Int, ::Int) = show_unquoted_phinode(io, stmt, indent, "%")
-function show_unquoted_phinode(io::IO, stmt::PhiNode, indent::Int, prefix::String)
-    args = String[let
-        e = stmt.edges[i]
-        v = !isassigned(stmt.values, i) ? "#undef" :
-            sprint(; context=io) do io′
-                show_unquoted(io′, stmt.values[i], indent)
-            end
-        "$prefix$e => $v"
-        end for i in 1:length(stmt.edges)
-    ]
-    print(io, "φ ", '(')
-    join(io, args, ", ")
-    print(io, ')')
-end
-
-function show_unquoted(io::IO, stmt::PhiCNode, indent::Int, ::Int)
-    print(io, "φᶜ (")
-    first = true
-    for v in stmt.values
-        first ? (first = false) : print(io, ", ")
-        show_unquoted(io, v, indent)
-    end
-    print(io, ")")
-end
-
-function show_unquoted(io::IO, stmt::PiNode, indent::Int, ::Int)
-    print(io, "π (")
-    show_unquoted(io, stmt.val, indent)
-    print(io, ", ")
-    printstyled(io, stmt.typ, color=:cyan)
-    print(io, ")")
-end
-
-function show_unquoted(io::IO, stmt::UpsilonNode, indent::Int, ::Int)
-    print(io, "ϒ (")
-    isdefined(stmt, :val) ?
-        show_unquoted(io, stmt.val, indent) :
-        print(io, "#undef")
-    print(io, ")")
-end
-
-function show_unquoted(io::IO, stmt::ReturnNode, indent::Int, ::Int)
-    if !isdefined(stmt, :val)
-        print(io, "unreachable")
-    else
-        print(io, "return ")
-        show_unquoted(io, stmt.val, indent)
-    end
-end
-
-show_unquoted(io::IO, stmt::GotoIfNot, indent::Int, ::Int) = show_unquoted_gotoifnot(io, stmt, indent, "%")
-function show_unquoted_gotoifnot(io::IO, stmt::GotoIfNot, indent::Int, prefix::String)
-    print(io, "goto ", prefix, stmt.dest, " if not ")
-    show_unquoted(io, stmt.cond, indent)
-end
-
-function compute_inlining_depth(linetable::Vector, iline::Int32)
-    iline == 0 && return 1
-    depth = -1
-    while iline != 0
-        depth += 1
-        lineinfo = linetable[iline]::LineInfoNode
-        iline = lineinfo.inlined_at
-    end
-    return depth
-end
-
 function should_print_ssa_type(@nospecialize node)
     if isa(node, Expr)
-        return !(node.head in (:gc_preserve_begin, :gc_preserve_end, :meta, :enter, :leave))
+        return !(node.head in (:gc_preserve_begin, :gc_preserve_end, :meta, :leave))
     end
     return !isa(node, PiNode)   && !isa(node, GotoIfNot) &&
            !isa(node, GotoNode) && !isa(node, ReturnNode) &&
-           !isa(node, QuoteNode)
+           !isa(node, QuoteNode) && !isa(node, EnterNode)
 end
 
 function default_expr_type_printer(io::IO; @nospecialize(type), used::Bool, show_type::Bool=true, _...)
@@ -171,36 +195,31 @@ function default_expr_type_printer(io::IO; @nospecialize(type), used::Bool, show
     return nothing
 end
 
-function normalize_method_name(m)
+function method_name(@nospecialize m)
+    if m isa LineInfoNode
+        m = m.method
+    end
+    if m isa MethodInstance
+        m = m.def
+    end
     if m isa Method
-        return m.name
-    elseif m isa MethodInstance
-        return (m.def::Method).name
-    elseif m isa Symbol
+        m = m.name
+    end
+    if m isa Module
+        return :var"top-level scope"
+    end
+    if m isa Symbol
         return m
-    else
-        return Symbol("")
     end
+    return :var""
 end
-@noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
-
-# converts the linetable for line numbers
-# into a list in the form:
-#   1 outer-most-frame
-#   2   inlined-frame
-#   3     innermost-frame
-function compute_loc_stack(linetable::Vector, line::Int32)
-    stack = Int[]
-    while line != 0
-        entry = linetable[line]::LineInfoNode
-        pushfirst!(stack, line)
-        line = entry.inlined_at
-    end
-    return stack
+@noinline function normalize_method_name(@nospecialize m)
+    name = method_name(m)
+    return name === :var"" ? :none : name
 end
 
 """
-    Compute line number annotations for an IRCode
+    Compute line number annotations for an IRCode or CodeInfo.
 
 This functions compute three sets of annotations for each IR line. Take the following
 example (taken from `@code_typed sin(1.0)`):
@@ -254,94 +273,84 @@ We get:
   └──      return %3                      │
 ```
 
-Even though we were in the `f` scope since the first statement, it tooks us two statements
+Even though we were in the `f` scope since the first statement, it took us two statements
 to catch up and print the intermediate scopes. Which scope is printed is indicated both
 by the indentation of the method name and by an increased thickness of the appropriate
 line for the scope.
 """
-function compute_ir_line_annotations(code::IRCode)
+function compute_ir_line_annotations(code::Union{IRCode,CodeInfo})
     loc_annotations = String[]
     loc_methods = String[]
     loc_lineno = String[]
     cur_group = 1
     last_lineno = 0
-    last_stack = Int[]
+    last_stack = LineInfoNode[] # nb. only file, line, and method are populated in this
     last_printed_depth = 0
-    linetable = code.linetable
-    lines = code.stmts.line
-    last_line = zero(eltype(lines))
-    for idx in 1:length(lines)
+    debuginfo = code.debuginfo
+    def = :var"unknown scope"
+    n = isa(code, IRCode) ? length(code.stmts) : length(code.code)
+    for idx in 1:n
         buf = IOBuffer()
-        line = lines[idx]
         print(buf, "│")
-        depth = compute_inlining_depth(linetable, line)
-        iline = line
-        lineno = 0
+        stack = buildLineInfoNode(debuginfo, def, idx)
+        lineno::Int = 0
         loc_method = ""
-        if line != 0
-            stack = compute_loc_stack(linetable, line)
-            lineno = linetable[stack[1]].line
+        isempty(stack) && (stack = last_stack)
+        if !isempty(stack)
+            lineno = stack[1].line
             x = min(length(last_stack), length(stack))
-            if length(stack) != 0
-                # Compute the last depth that was in common
-                first_mismatch = let last_stack=last_stack
-                    findfirst(i->last_stack[i] != stack[i], 1:x)
-                end
-                # If the first mismatch is the last stack frame, that might just
-                # be a line number mismatch in inner most frame. Ignore those
-                if length(last_stack) == length(stack) && first_mismatch == length(stack)
-                    last_entry, entry = linetable[last_stack[end]], linetable[stack[end]]
-                    if method_name(last_entry) === method_name(entry) && last_entry.file === entry.file
-                        first_mismatch = nothing
-                    end
+            depth = length(stack) - 1
+            # Compute the last depth that was in common
+            first_mismatch = let last_stack=last_stack, stack=stack
+                findfirst(i->last_stack[i] != stack[i], 1:x)
+            end
+            # If the first mismatch is the last stack frame, that might just
+            # be a line number mismatch in inner most frame. Ignore those
+            if length(last_stack) == length(stack) && first_mismatch == length(stack)
+                last_entry, entry = last_stack[end], stack[end]
+                if method_name(last_entry) === method_name(entry) && last_entry.file === entry.file
+                    first_mismatch = nothing
                 end
-                last_depth = something(first_mismatch, x+1)-1
-                if min(depth, last_depth) > last_printed_depth
-                    printing_depth = min(depth, last_printed_depth + 1)
-                    last_printed_depth = printing_depth
-                elseif length(stack) > length(last_stack) || first_mismatch !== nothing
-                    printing_depth = min(depth, last_depth + 1)
-                    last_printed_depth = printing_depth
-                else
-                    printing_depth = 0
+            end
+            last_depth = something(first_mismatch, x+1)-1
+            if min(depth, last_depth) > last_printed_depth
+                printing_depth = min(depth, last_printed_depth + 1)
+                last_printed_depth = printing_depth
+            elseif length(stack) > length(last_stack) || first_mismatch !== nothing
+                printing_depth = min(depth, last_depth + 1)
+                last_printed_depth = printing_depth
+            else
+                printing_depth = 0
+            end
+            stole_one = false
+            if printing_depth != 0
+                for _ in 1:(printing_depth-1)
+                    print(buf, "│")
                 end
-                stole_one = false
-                if printing_depth != 0
-                    for _ in 1:(printing_depth-1)
+                if printing_depth <= last_depth-1 && first_mismatch === nothing
+                    print(buf, "┃")
+                    for _ in printing_depth+1:min(depth, last_depth)
                         print(buf, "│")
                     end
-                    if printing_depth <= last_depth-1 && first_mismatch === nothing
-                        print(buf, "┃")
-                        for _ in printing_depth+1:min(depth, last_depth)
-                            print(buf, "│")
-                        end
-                    else
-                        stole_one = true
-                        print(buf, "╻")
-                    end
                 else
-                    for _ in 1:min(depth, last_depth)
-                        print(buf, "│")
-                    end
+                    stole_one = true
+                    print(buf, "╻")
                 end
-                print(buf, "╷"^max(0, depth - last_depth - stole_one))
-                if printing_depth != 0
-                    if length(stack) == printing_depth
-                        loc_method = line
-                    else
-                        loc_method = stack[printing_depth + 1]
-                    end
-                    loc_method = method_name(linetable[loc_method])
+            else
+                for _ in 1:min(depth, last_depth)
+                    print(buf, "│")
                 end
-                loc_method = string(" "^printing_depth, loc_method)
             end
+            print(buf, "╷"^max(0, depth - last_depth - stole_one))
+            if printing_depth != 0
+                loc_method = normalize_method_name(stack[printing_depth + 1])
+            end
+            loc_method = string(" "^printing_depth, loc_method)
             last_stack = stack
-            entry = linetable[line]
         end
-        push!(loc_annotations, String(take!(buf)))
+        push!(loc_annotations, takestring!(buf))
         push!(loc_lineno, (lineno != 0 && lineno != last_lineno) ? string(lineno) : "")
         push!(loc_methods, loc_method)
-        last_line = line
         (lineno != 0) && (last_lineno = lineno)
         nothing
     end
@@ -350,19 +359,87 @@ end
 
 Base.show(io::IO, code::Union{IRCode, IncrementalCompact}) = show_ir(io, code)
 
+# A line_info_preprinter for disabling line info printing
 lineinfo_disabled(io::IO, linestart::String, idx::Int) = ""
 
-function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
+# utility function to extract the file name from a DebugInfo object
+function debuginfo_file1(debuginfo::Union{DebugInfo,DebugInfoStream})
+    def = debuginfo.def
+    if def isa MethodInstance
+        def = def.def
+    end
+    if def isa Method
+        def = def.file
+    end
+    if def isa Symbol
+        return def
+    end
+    return :var"<unknown>"
+end
+
+# utility function to extract the first line number and file of a block of code
+function debuginfo_firstline(debuginfo::Union{DebugInfo,DebugInfoStream})
+    linetable = debuginfo.linetable
+    while linetable != nothing
+        debuginfo = linetable
+        linetable = debuginfo.linetable
+    end
+    codeloc = getdebugidx(debuginfo, 0)
+    return debuginfo_file1(debuginfo), codeloc[1]
+end
+
+struct LineInfoNode
+    method # ::Union{Method,MethodInstance,Symbol}
+    file::Symbol
+    line::Int32
+end
+
+# utility function for converting a debuginfo object a particular pc to list of LineInfoNodes representing the inlining info at that pc for function `def`
+# which is either `nothing` (macro-expand), a module (top-level), a Method (unspecialized code) or a MethodInstance (specialized code)
+# Returns `false` if the line info should not be updated with this info because this
+# statement has no effect on the line numbers. The `scopes` will still be populated however
+# with as much information as was available about the inlining at that statement.
+function append_scopes!(scopes::Vector{LineInfoNode}, pc::Int, debuginfo, @nospecialize(def))
+    doupdate = true
+    while true
+        debuginfo.def isa Symbol || (def = debuginfo.def)
+        codeloc = getdebugidx(debuginfo, pc)
+        line::Int = codeloc[1]
+        inl_to::Int = codeloc[2]
+        doupdate &= line != 0 || inl_to != 0 # disabled debug info--no update
+        if debuginfo.linetable === nothing || pc <= 0 || line < 0
+            line < 0 && (doupdate = false; line = 0) # broken debug info
+            push!(scopes, LineInfoNode(def, debuginfo_file1(debuginfo), Int32(line)))
+        else
+            doupdate = append_scopes!(scopes, line, debuginfo.linetable::DebugInfo, def) && doupdate
+        end
+        inl_to == 0 && return doupdate
+        def = :var"macro expansion"
+        debuginfo = debuginfo.edges[inl_to]
+        pc::Int = codeloc[3]
+    end
+end
+
+# utility wrapper around `append_scopes!` that returns an empty list instead of false
+# when there is no applicable line update
+function buildLineInfoNode(debuginfo, @nospecialize(def), pc::Int)
+    DI = LineInfoNode[]
+    append_scopes!(DI, pc, debuginfo, def) || empty!(DI)
+    return DI
+end
+
+# A default line_info_preprinter for printing accurate line number information
+function DILineInfoPrinter(debuginfo, def, showtypes::Bool=false)
     context = LineInfoNode[]
     context_depth = Ref(0)
     indent(s::String) = s^(max(context_depth[], 1) - 1)
-    function emit_lineinfo_update(io::IO, linestart::String, lineidx::Int32)
+    function emit_lineinfo_update(io::IO, linestart::String, pc::Int)
         # internal configuration options:
         linecolor = :yellow
         collapse = showtypes ? false : true
         indent_all = true
-        # convert lineidx to a vector
-        if lineidx == typemin(Int32)
+        # convert pc to a vector
+        if pc == 0
             # sentinel value: reset internal (and external) state
             pops = indent("└")
             if !isempty(pops)
@@ -372,13 +449,10 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
             end
             empty!(context)
             context_depth[] = 0
-        elseif lineidx > 0 # just skip over lines with no debug info at all
-            DI = LineInfoNode[]
-            while lineidx != 0
-                entry = linetable[lineidx]::LineInfoNode
-                push!(DI, entry)
-                lineidx = entry.inlined_at
-            end
+            return ""
+        end
+        DI = reverse!(buildLineInfoNode(debuginfo, def, pc))
+        if !isempty(DI)
             # FOR DEBUGGING, or if you just like very excessive output:
             # this prints out the context in full for every statement
             #empty!(context)
@@ -460,7 +534,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                 started::Bool = false
                 if !update_line_only && showtypes && !isa(frame.method, Symbol) && nctx != 1
                     print(io, linestart)
-                    Base.with_output_color(linecolor, io) do io
+                    with_output_color(linecolor, io) do io
                         print(io, indent("│"))
                         print(io, "┌ invoke ", frame.method)
                         println(io)
@@ -468,7 +542,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                     started = true
                 end
                 print(io, linestart)
-                Base.with_output_color(linecolor, io) do io
+                with_output_color(linecolor, io) do io
                     print(io, indent("│"))
                     push!(context, frame)
                     if update_line_only
@@ -508,6 +582,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
             #end
         end
         indent_all || return ""
+        context_depth[] <= 1 && return ""
         return sprint(io -> printstyled(io, indent("│"), color=linecolor), context=io)
     end
     return emit_lineinfo_update
@@ -524,16 +599,28 @@ end
 - `should_print_stmt(idx::Int) -> Bool`: whether the statement at index `idx` should be
   printed as part of the IR or not
 - `bb_color`: color used for printing the basic block brackets on the left
+- `label_dynamic_calls`: whether to label calls as dynamic / builtin / intrinsic
 """
 struct IRShowConfig
     line_info_preprinter
     line_info_postprinter
     should_print_stmt
     bb_color::Symbol
-    function IRShowConfig(line_info_preprinter, line_info_postprinter=default_expr_type_printer;
-                          should_print_stmt=Returns(true), bb_color::Symbol=:light_black)
-        return new(line_info_preprinter, line_info_postprinter, should_print_stmt, bb_color)
-    end
+    label_dynamic_calls::Bool
+
+    IRShowConfig(
+        line_info_preprinter,
+        line_info_postprinter=default_expr_type_printer;
+        should_print_stmt=Returns(true),
+        bb_color::Symbol=:light_black,
+        label_dynamic_calls=true
+    ) = new(
+        line_info_preprinter,
+        line_info_postprinter,
+        should_print_stmt,
+        bb_color,
+        label_dynamic_calls
+    )
 end
 
 struct _UNDEF
@@ -542,11 +629,11 @@ end
 
 function _stmt(code::IRCode, idx::Int)
     stmts = code.stmts
-    return isassigned(stmts.inst, idx) ? stmts[idx][:inst] : UNDEF
+    return isassigned(stmts.stmt, idx) ? stmts[idx][:stmt] : UNDEF
 end
 function _stmt(compact::IncrementalCompact, idx::Int)
     stmts = compact.result
-    return isassigned(stmts.inst, idx) ? stmts[idx][:inst] : UNDEF
+    return isassigned(stmts.stmt, idx) ? stmts[idx][:stmt] : UNDEF
 end
 function _stmt(code::CodeInfo, idx::Int)
     code = code.code
@@ -569,10 +656,8 @@ end
 
 function statement_indices_to_labels(stmt, cfg::CFG)
     # convert statement index to labels, as expected by print_stmt
-    if stmt isa Expr
-        if stmt.head === :enter && length(stmt.args) == 1 && stmt.args[1] isa Int
-            stmt = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
-        end
+    if stmt isa EnterNode
+        stmt = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : block_for_inst(cfg, stmt.catch_dest))
     elseif isa(stmt, GotoIfNot)
         stmt = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
     elseif stmt isa GotoNode
@@ -591,13 +676,14 @@ end
 #   at index `idx`. This function is repeatedly called until it returns `nothing`.
 #   to iterate nodes that are to be inserted after the statement, set `attach_after=true`.
 function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact}, idx::Int, config::IRShowConfig,
-                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false)
+                      sptypes::Vector{VarState}, used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false)
     return show_ir_stmt(io, code, idx, config.line_info_preprinter, config.line_info_postprinter,
-                        used, cfg, bb_idx; pop_new_node!, only_after, config.bb_color)
+                        sptypes, used, cfg, bb_idx; pop_new_node!, only_after, config.bb_color, config.label_dynamic_calls)
 end
 
 function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact}, idx::Int, line_info_preprinter, line_info_postprinter,
-                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false, bb_color=:light_black)
+                      sptypes::Vector{VarState}, used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false,
+                      bb_color=:light_black, label_dynamic_calls::Bool=true)
     stmt = _stmt(code, idx)
     type = _type(code, idx)
     max_bb_idx_size = length(string(length(cfg.blocks)))
@@ -656,7 +742,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
         show_type = should_print_ssa_type(new_node_inst)
         let maxlength_idx=maxlength_idx, show_type=show_type
             with_output_color(:green, io) do io′
-                print_stmt(io′, node_idx, new_node_inst, used, maxlength_idx, false, show_type)
+                print_stmt(io′, node_idx, new_node_inst, code, sptypes, used, maxlength_idx, false, show_type, label_dynamic_calls)
             end
         end
 
@@ -685,7 +771,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
             stmt = statement_indices_to_labels(stmt, cfg)
         end
         show_type = type !== nothing && should_print_ssa_type(stmt)
-        print_stmt(io, idx, stmt, used, maxlength_idx, true, show_type)
+        print_stmt(io, idx, stmt, code, sptypes, used, maxlength_idx, true, show_type, label_dynamic_calls)
         if type !== nothing # ignore types for pre-inference code
             if type === UNDEF
                 # This is an error, but can happen if passes don't update their type information
@@ -717,7 +803,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
 end
 
 function _new_nodes_iter(stmts, new_nodes, new_nodes_info, new_nodes_idx)
-    new_nodes_perm = filter(i -> isassigned(new_nodes.inst, i), 1:length(new_nodes))
+    new_nodes_perm = filter(i -> isassigned(new_nodes.stmt, i), 1:length(new_nodes))
     sort!(new_nodes_perm, by = x -> (x = new_nodes_info[x]; (x.pos, x.attach_after)))
 
     # separate iterators for the nodes that are inserted before resp. after each statement
@@ -745,7 +831,7 @@ function _new_nodes_iter(stmts, new_nodes, new_nodes_info, new_nodes_idx)
 
         iter[] += 1
         new_node = new_nodes[node_idx]
-        new_node_inst = isassigned(new_nodes.inst, node_idx) ? new_node[:inst] : UNDEF
+        new_node_inst = isassigned(new_nodes.stmt, node_idx) ? new_node[:stmt] : UNDEF
         new_node_type = isassigned(new_nodes.type, node_idx) ? new_node[:type] : UNDEF
         node_idx += length(stmts)
         return node_idx, new_node_inst, new_node_type
@@ -767,7 +853,7 @@ function new_nodes_iter(compact::IncrementalCompact)
 end
 
 # print only line numbers on the left, some of the method names and nesting depth on the right
-function inline_linfo_printer(code::IRCode)
+function inline_linfo_printer(code::Union{IRCode,CodeInfo})
     loc_annotations, loc_methods, loc_lineno = compute_ir_line_annotations(code)
     max_loc_width = maximum(length, loc_annotations)
     max_lineno_width = maximum(length, loc_lineno)
@@ -805,30 +891,20 @@ end
 
 _strip_color(s::String) = replace(s, r"\e\[\d+m"a => "")
 
-function statementidx_lineinfo_printer(f, code::IRCode)
-    printer = f(code.linetable)
-    function (io::IO, indent::String, idx::Int)
-        printer(io, indent, idx > 0 ? code.stmts[idx][:line] : typemin(Int32))
-    end
-end
-function statementidx_lineinfo_printer(f, code::CodeInfo)
-    printer = f(code.linetable)
-    function (io::IO, indent::String, idx::Int)
-        printer(io, indent, idx > 0 ? code.codelocs[idx] : typemin(Int32))
-    end
-end
+statementidx_lineinfo_printer(f, code::IRCode) = f(code.debuginfo, :var"unknown scope")
+statementidx_lineinfo_printer(f, code::CodeInfo) = f(code.debuginfo, :var"unknown scope")
 statementidx_lineinfo_printer(code) = statementidx_lineinfo_printer(DILineInfoPrinter, code)
 
 function stmts_used(io::IO, code::IRCode, warn_unset_entry=true)
-    stmts = code.stmts
+    insts = code.stmts
     used = BitSet()
-    for stmt in stmts
-        scan_ssa_use!(push!, used, stmt[:inst])
+    for inst in insts
+        scan_ssa_use!(push!, used, inst[:stmt])
     end
     new_nodes = code.new_nodes.stmts
     for nn in 1:length(new_nodes)
-        if isassigned(new_nodes.inst, nn)
-            scan_ssa_use!(push!, used, new_nodes[nn][:inst])
+        if isassigned(new_nodes.stmt, nn)
+            scan_ssa_use!(push!, used, new_nodes[nn][:stmt])
         elseif warn_unset_entry
             printstyled(io, "ERROR: New node array has unset entry\n", color=:red)
             warn_unset_entry = false
@@ -846,18 +922,21 @@ function stmts_used(::IO, code::CodeInfo)
     return used
 end
 
-function default_config(code::IRCode; verbose_linetable=false)
-    return IRShowConfig(verbose_linetable ? statementidx_lineinfo_printer(code)
-                                          : inline_linfo_printer(code);
-                        bb_color=:normal)
+function default_config(code::IRCode; debuginfo = :source_inline)
+    return IRShowConfig(get_debuginfo_printer(code, debuginfo); bb_color=:normal)
+end
+default_config(code::CodeInfo; debuginfo = :source) = IRShowConfig(get_debuginfo_printer(code, debuginfo))
+function default_config(io::IO, src)
+    debuginfo = get(io, :debuginfo, nothing)
+    debuginfo !== nothing && return default_config(src; debuginfo)
+    return default_config(src)
 end
-default_config(code::CodeInfo) = IRShowConfig(statementidx_lineinfo_printer(code))
 
 function show_ir_stmts(io::IO, ir::Union{IRCode, CodeInfo, IncrementalCompact}, inds, config::IRShowConfig,
-                       used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing))
+                       sptypes::Vector{VarState}, used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing))
     for idx in inds
         if config.should_print_stmt(ir, idx, used)
-            bb_idx = show_ir_stmt(io, ir, idx, config, used, cfg, bb_idx; pop_new_node!)
+            bb_idx = show_ir_stmt(io, ir, idx, config, sptypes, used, cfg, bb_idx; pop_new_node!)
         elseif bb_idx <= length(cfg.blocks) && idx == cfg.blocks[bb_idx].stmts.stop
             bb_idx += 1
         end
@@ -871,28 +950,31 @@ function finish_show_ir(io::IO, cfg::CFG, config::IRShowConfig)
     return nothing
 end
 
-function show_ir(io::IO, ir::IRCode, config::IRShowConfig=default_config(ir);
-                 pop_new_node! = new_nodes_iter(ir))
+function show_ir(io::IO, ir::IRCode, config::IRShowConfig=default_config(io, ir); pop_new_node! = new_nodes_iter(ir))
     used = stmts_used(io, ir)
     cfg = ir.cfg
-    maxssaid = length(ir.stmts) + Core.Compiler.length(ir.new_nodes)
+    maxssaid = length(ir.stmts) + length(ir.new_nodes)
     let io = IOContext(io, :maxssaid=>maxssaid)
-        show_ir_stmts(io, ir, 1:length(ir.stmts), config, used, cfg, 1; pop_new_node!)
+        show_ir_stmts(io, ir, 1:length(ir.stmts), config, ir.sptypes, used, cfg, 1; pop_new_node!)
     end
     finish_show_ir(io, cfg, config)
 end
 
-function show_ir(io::IO, ci::CodeInfo, config::IRShowConfig=default_config(ci);
+function show_ir(io::IO, ci::CodeInfo, config::IRShowConfig=default_config(io, ci);
                  pop_new_node! = Returns(nothing))
     used = stmts_used(io, ci)
     cfg = compute_basic_blocks(ci.code)
+    parent = ci.parent
+    sptypes = if parent isa MethodInstance
+        sptypes_from_meth_instance(parent)
+    else EMPTY_SPTYPES end
     let io = IOContext(io, :maxssaid=>length(ci.code))
-        show_ir_stmts(io, ci, 1:length(ci.code), config, used, cfg, 1; pop_new_node!)
+        show_ir_stmts(io, ci, 1:length(ci.code), config, sptypes, used, cfg, 1; pop_new_node!)
     end
     finish_show_ir(io, cfg, config)
 end
 
-function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=default_config(compact.ir))
+function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=default_config(io, compact.ir))
     cfg = compact.ir.cfg
 
 
@@ -928,16 +1010,16 @@ function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=defau
         still_to_be_inserted = (last(input_bb.stmts) - compact.idx) + count
 
         result_bb = result_bbs[compact.active_result_bb]
-        result_bbs[compact.active_result_bb] = Core.Compiler.BasicBlock(result_bb,
-            Core.Compiler.StmtRange(first(result_bb.stmts), compact.result_idx+still_to_be_inserted))
+        result_bbs[compact.active_result_bb] = BasicBlock(result_bb,
+            StmtRange(first(result_bb.stmts), compact.result_idx+still_to_be_inserted))
     end
     compact_cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
 
     pop_new_node! = new_nodes_iter(compact)
-    maxssaid = length(compact.result) + Core.Compiler.length(compact.new_new_nodes)
+    maxssaid = length(compact.result) + length(compact.new_new_nodes)
     bb_idx = let io = IOContext(io, :maxssaid=>maxssaid)
-        show_ir_stmts(io, compact, 1:compact.result_idx-1, config, used_compacted,
-                      compact_cfg, 1; pop_new_node!)
+        show_ir_stmts(io, compact, 1:compact.result_idx-1, config, compact.ir.sptypes,
+                      used_compacted, compact_cfg, 1; pop_new_node!)
     end
 
 
@@ -955,8 +1037,8 @@ function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=defau
     inputs_bbs = copy(cfg.blocks)
     for (i, bb) in enumerate(inputs_bbs)
         if bb.stmts.stop < bb.stmts.start
-            inputs_bbs[i] = Core.Compiler.BasicBlock(bb,
-                Core.Compiler.StmtRange(last(bb.stmts), last(bb.stmts)))
+            inputs_bbs[i] = BasicBlock(bb,
+                StmtRange(last(bb.stmts), last(bb.stmts)))
             # this is not entirely correct, and will result in the bb starting again,
             # but is the best we can do without changing how `finish_current_bb!` works.
         end
@@ -964,17 +1046,17 @@ function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=defau
     uncompacted_cfg = CFG(inputs_bbs, Int[first(inputs_bbs[i].stmts) for i in 2:length(inputs_bbs)])
 
     pop_new_node! = new_nodes_iter(compact.ir, compact.new_nodes_idx)
-    maxssaid = length(compact.ir.stmts) + Core.Compiler.length(compact.ir.new_nodes)
+    maxssaid = length(compact.ir.stmts) + length(compact.ir.new_nodes)
     let io = IOContext(io, :maxssaid=>maxssaid)
         # first show any new nodes to be attached after the last compacted statement
         if compact.idx > 1
-            show_ir_stmt(io, compact.ir, compact.idx-1, config, used_uncompacted,
-                        uncompacted_cfg, bb_idx; pop_new_node!, only_after=true)
+            show_ir_stmt(io, compact.ir, compact.idx-1, config, compact.ir.sptypes,
+                         used_uncompacted, uncompacted_cfg, bb_idx; pop_new_node!, only_after=true)
         end
 
         # then show the actual uncompacted IR
-        show_ir_stmts(io, compact.ir, compact.idx:length(stmts), config, used_uncompacted,
-                      uncompacted_cfg, bb_idx; pop_new_node!)
+        show_ir_stmts(io, compact.ir, compact.idx:length(stmts), config, compact.ir.sptypes,
+                      used_uncompacted, uncompacted_cfg, bb_idx; pop_new_node!)
     end
 
     finish_show_ir(io, uncompacted_cfg, config)
@@ -1020,9 +1102,109 @@ function Base.show(io::IO, e::Effects)
     print(io, ',')
     printstyled(io, effectbits_letter(e, :inaccessiblememonly, 'm'); color=effectbits_color(e, :inaccessiblememonly))
     print(io, ',')
-    printstyled(io, effectbits_letter(e, :noinbounds, 'i'); color=effectbits_color(e, :noinbounds))
+    printstyled(io, effectbits_letter(e, :noub, 'u'); color=effectbits_color(e, :noub))
+    print(io, ',')
+    printstyled(io, effectbits_letter(e, :nonoverlayed, 'o'); color=effectbits_color(e, :nonoverlayed))
+    print(io, ',')
+    printstyled(io, effectbits_letter(e, :nortcall, 'r'); color=effectbits_color(e, :nortcall))
     print(io, ')')
-    e.nonoverlayed || printstyled(io, '′'; color=:red)
+end
+
+function Base.show(io::IO, inferred::InferenceResult)
+    mi = inferred.linfo
+    tt = mi.specTypes.parameters[2:end]
+    tts = join(["::$(t)" for t in tt], ", ")
+    rettype = inferred.result
+    if isa(rettype, InferenceState)
+        rettype = rettype.bestguess
+    end
+    if isa(mi.def, Method)
+        print(io, mi.def.name, "(", tts, " => ", rettype, ")")
+    else
+        print(io, "Toplevel MethodInstance thunk from ", mi.def, " => ", rettype)
+    end
+end
+
+Base.show(io::IO, sv::InferenceState) =
+    (print(io, "InferenceState for "); show(io, sv.linfo))
+
+Base.show(io::IO, ::NativeInterpreter) =
+    print(io, "Compiler.NativeInterpreter(...)")
+
+Base.show(io::IO, cache::CachedMethodTable) =
+    print(io, typeof(cache), "(", length(cache.cache), " entries)")
+
+function Base.show(io::IO, limited::LimitedAccuracy)
+    print(io, "LimitedAccuracy(")
+    show(io, limited.typ)
+    print(io, ", #= ", length(limited.causes), " cause(s) =#)")
+end
+
+# These sometimes show up as Const-values in InferenceFrameInfo signatures
+function Base.show(io::IO, mi_info::Timings.InferenceFrameInfo)
+    mi = mi_info.mi
+    def = mi.def
+    if isa(def, Method)
+        if isdefined(def, :generator) && mi === def.generator
+            print(io, "InferenceFrameInfo generator for ")
+            show(io, def)
+        else
+            print(io, "InferenceFrameInfo for ")
+            argnames = [isa(a, Core.Const) ? (isa(a.val, Type) ? "" : a.val) : "" for a in mi_info.slottypes[1:mi_info.nargs]]
+            show_tuple_as_call(io, def.name, mi.specTypes; argnames, qualified=true)
+        end
+    else
+        di = mi.cache.debuginfo
+        file, line = debuginfo_firstline(di)
+        file = string(file)
+        line = isempty(file) || line < 0 ? "<unknown>" : "$file:$line"
+        print(io, "Toplevel InferenceFrameInfo thunk from ", def, " starting at ", line)
+    end
+end
+
+function Base.show(io::IO, tinf::Timings.Timing)
+    print(io, "Compiler.Timings.Timing(", tinf.mi_info, ") with ", length(tinf.children), " children")
 end
 
 @specialize
+
+const __debuginfo = Dict{Symbol, Any}(
+    # :full => src -> statementidx_lineinfo_printer(src), # and add variable slot information
+    :source => src -> statementidx_lineinfo_printer(src),
+    # :oneliner => src -> statementidx_lineinfo_printer(PartialLineInfoPrinter, src),
+    :none => src -> lineinfo_disabled,
+    )
+const default_debuginfo = Ref{Symbol}(:none)
+debuginfo(sym) = sym === :default ? default_debuginfo[] : sym
+
+const __debuginfo = Dict{Symbol, Any}(
+    # :full => src -> statementidx_lineinfo_printer(src), # and add variable slot information
+    :source => src -> statementidx_lineinfo_printer(src),
+    :source_inline => src -> inline_linfo_printer(src),
+    # :oneliner => src -> statementidx_lineinfo_printer(PartialLineInfoPrinter, src),
+    :none => src -> lineinfo_disabled,
+    )
+
+const debuginfo_modes = [:none, :source, :source_inline]
+@assert Set(debuginfo_modes) == Set(keys(__debuginfo))
+
+function validate_debuginfo_mode(mode::Symbol)
+    in(mode, debuginfo_modes) && return true
+    throw(ArgumentError("`debuginfo` must be one of the following: $(join([repr(mode) for mode in debuginfo_modes], ", "))"))
+end
+
+const default_debuginfo_mode = Ref{Symbol}(:none)
+function expand_debuginfo_mode(mode::Symbol, default = default_debuginfo_mode[])
+    if mode === :default
+        mode = default
+    end
+    validate_debuginfo_mode(mode)
+    return mode
+end
+
+function get_debuginfo_printer(mode::Symbol)
+    mode = expand_debuginfo_mode(mode)
+    return __debuginfo[mode]
+end
+
+get_debuginfo_printer(src, mode::Symbol) = get_debuginfo_printer(mode)(src)
diff --git a/base/compiler/ssair/slot2ssa.jl b/Compiler/src/ssair/slot2ssa.jl
similarity index 72%
rename from base/compiler/ssair/slot2ssa.jl
rename to Compiler/src/ssair/slot2ssa.jl
index 73bdb51702ded..16a964b4d72f1 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/Compiler/src/ssair/slot2ssa.jl
@@ -1,13 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-struct TypedSlot
-    id::Int
-    typ
-    TypedSlot(id::Int, @nospecialize(typ)) = new(id, typ)
-end
-
-const UnoptSlot = Union{SlotNumber, TypedSlot}
-
 mutable struct SlotInfo
     defs::Vector{Int}
     uses::Vector{Int}
@@ -16,7 +8,7 @@ end
 SlotInfo() = SlotInfo(Int[], Int[], false)
 
 function scan_entry!(result::Vector{SlotInfo}, idx::Int, @nospecialize(stmt))
-    # NewVarNodes count as defs for the purpose
+    # NewvarNodes count as defs for the purpose
     # of liveness analysis (i.e. they kill use chains)
     if isa(stmt, NewvarNode)
         result[slot_id(stmt.slot)].any_newvar = true
@@ -29,13 +21,13 @@ function scan_entry!(result::Vector{SlotInfo}, idx::Int, @nospecialize(stmt))
         end
         stmt = stmt.args[2]
     end
-    if isa(stmt, UnoptSlot)
+    if isa(stmt, SlotNumber)
         push!(result[slot_id(stmt)].uses, idx)
         return
     end
     for op in userefs(stmt)
         val = op[]
-        if isa(val, UnoptSlot)
+        if isa(val, SlotNumber)
             push!(result[slot_id(val)].uses, idx)
         end
     end
@@ -89,28 +81,26 @@ function new_to_regular(@nospecialize(stmt), new_offset::Int)
     return urs[]
 end
 
-function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, stmt::UnoptSlot, @nospecialize(ssa), @nospecialize(def_ssa))
+function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, @nospecialize(ssa), @nospecialize(def_ssa))
     # We don't really have the information here to get rid of these.
     # We'll do so later
     if ssa === UNDEF_TOKEN
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], false), Any))
         return UNDEF_TOKEN
+    elseif has_flag(ir.stmts[idx], IR_FLAG_NOTHROW)
+        # if the `isdefined`-ness of this slot is guaranteed by abstract interpretation,
+        # there is no need to form a `:throw_undef_if_not`
     elseif def_ssa !== true
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], def_ssa), Any))
     end
-    if isa(stmt, SlotNumber)
-        return ssa
-    elseif isa(stmt, TypedSlot)
-        return NewSSAValue(insert_node!(ir, idx, NewInstruction(PiNode(ssa, stmt.typ), stmt.typ)).id - length(ir.stmts))
-    end
-    @assert false # unreachable
+    return ssa
 end
 
 function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt))
-    if isa(stmt, UnoptSlot) && slot_filter(stmt)
-        return fixup_slot!(ir, ci, idx, slot_id(stmt), stmt, rename_slot(stmt)...)
+    if isa(stmt, SlotNumber) && slot_filter(stmt)
+        return fixup_slot!(ir, ci, idx, slot_id(stmt), rename_slot(stmt)...)
     end
     if isexpr(stmt, :(=))
         stmt.args[2] = fixemup!(slot_filter, rename_slot, ir, ci, idx, stmt.args[2])
@@ -120,48 +110,35 @@ function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode
         for i = 1:length(stmt.edges)
             isassigned(stmt.values, i) || continue
             val = stmt.values[i]
-            isa(val, UnoptSlot) || continue
+            isa(val, SlotNumber) || continue
             slot_filter(val) || continue
             bb_idx = block_for_inst(ir.cfg, Int(stmt.edges[i]))
             from_bb_terminator = last(ir.cfg.blocks[bb_idx].stmts)
-            stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), val, rename_slot(val)...)
+            stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), rename_slot(val)...)
         end
         return stmt
     end
     if isexpr(stmt, :isdefined)
         val = stmt.args[1]
-        if isa(val, UnoptSlot)
-            slot = slot_id(val)
-            if (ci.slotflags[slot] & SLOT_USEDUNDEF) == 0
-                return true
-            else
-                ssa, undef_ssa = rename_slot(val)
-                if ssa === UNDEF_TOKEN
-                    return false
-                elseif !isa(ssa, SSAValue) && !isa(ssa, NewSSAValue)
-                    return true
-                end
-                return undef_ssa
-            end
+        if isa(val, SlotNumber)
+            ssa, undef_ssa = rename_slot(val)
+            return undef_ssa
         end
         return stmt
     end
     urs = userefs(stmt)
     for op in urs
         val = op[]
-        if isa(val, UnoptSlot) && slot_filter(val)
-            x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename_slot(val)...)
+        if isa(val, SlotNumber) && slot_filter(val)
+            x = fixup_slot!(ir, ci, idx, slot_id(val), rename_slot(val)...)
             # We inserted an undef error node. Delete subsequent statement
             # to avoid confusing the optimizer
             if x === UNDEF_TOKEN
                 return nothing
             end
             op[] = x
-        elseif isa(val, GlobalRef) && !(isdefined(val.mod, val.name) && isconst(val.mod, val.name))
-            op[] = NewSSAValue(insert_node!(ir, idx,
-                NewInstruction(val, typ_for_val(val, ci, ir.sptypes, idx, Any[]))).id - length(ir.stmts))
         elseif isexpr(val, :static_parameter)
-            ty = typ_for_val(val, ci, ir.sptypes, idx, Any[])
+            ty = typ_for_val(val, ci, ir, idx, Any[])
             if isa(ty, Const)
                 inst = NewInstruction(quoted(ty.val), ty)
             else
@@ -175,70 +152,38 @@ end
 
 function fixup_uses!(ir::IRCode, ci::CodeInfo, code::Vector{Any}, uses::Vector{Int}, slot::Int, @nospecialize(ssa))
     for use in uses
-        code[use] = fixemup!(x::UnoptSlot->slot_id(x)==slot, stmt::UnoptSlot->(ssa, true), ir, ci, use, code[use])
+        code[use] = fixemup!(x::SlotNumber->slot_id(x)==slot, ::SlotNumber->Pair{Any,Any}(ssa, true), ir, ci, use, code[use])
     end
 end
 
 function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Pair{Any, Any}})
-    return fixemup!(stmt::UnoptSlot->true, stmt::UnoptSlot->renames[slot_id(stmt)], ir, ci, idx, stmt)
-end
-
-function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{CallInfo})
-    # Remove `nothing`s at the end, we don't handle them well
-    # (we expect the last instruction to be a terminator)
-    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
-    (; codelocs, ssaflags) = ci
-    for i = length(code):-1:1
-        if code[i] !== nothing
-            resize!(code, i)
-            resize!(ssavaluetypes, i)
-            resize!(codelocs, i)
-            resize!(info, i)
-            resize!(ssaflags, i)
-            break
-        end
-    end
-    # If the last instruction is not a terminator, add one. This can
-    # happen for implicit return on dead branches.
-    term = code[end]
-    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
-        push!(code, ReturnNode())
-        push!(ssavaluetypes, Union{})
-        push!(codelocs, 0)
-        push!(info, NoCallInfo())
-        push!(ssaflags, IR_FLAG_NOTHROW)
-    end
-    nothing
-end
-
-struct DelayedTyp
-    phi::NewSSAValue
+    return fixemup!(::SlotNumber->true, x::SlotNumber->renames[slot_id(x)], ir, ci, idx, stmt)
 end
 
 # maybe use expr_type?
-function typ_for_val(@nospecialize(x), ci::CodeInfo, sptypes::Vector{VarState}, idx::Int, slottypes::Vector{Any})
+function typ_for_val(@nospecialize(x), ci::CodeInfo, ir::IRCode, idx::Int, slottypes::Vector{Any})
     if isa(x, Expr)
         if x.head === :static_parameter
-            return sptypes[x.args[1]::Int].typ
+            return ir.sptypes[x.args[1]::Int].typ
         elseif x.head === :boundscheck
             return Bool
         elseif x.head === :copyast
-            return typ_for_val(x.args[1], ci, sptypes, idx, slottypes)
+            return typ_for_val(x.args[1], ci, ir, idx, slottypes)
         end
         return (ci.ssavaluetypes::Vector{Any})[idx]
     end
-    isa(x, GlobalRef) && return abstract_eval_globalref(x)
+    isa(x, GlobalRef) && return abstract_eval_globalref_type(x, ci)
     isa(x, SSAValue) && return (ci.ssavaluetypes::Vector{Any})[x.id]
     isa(x, Argument) && return slottypes[x.n]
-    isa(x, NewSSAValue) && return DelayedTyp(x)
+    isa(x, NewSSAValue) && return types(ir)[new_to_regular(x, length(ir.stmts))]
     isa(x, QuoteNode) && return Const(x.value)
-    isa(x, Union{Symbol, PiNode, PhiNode, UnoptSlot}) && error("unexpected val type")
+    isa(x, Union{Symbol, PiNode, PhiNode, SlotNumber}) && error("unexpected val type")
     return Const(x)
 end
 
 struct BlockLiveness
     def_bbs::Vector{Int}
-    live_in_bbs::Vector{Int}
+    live_in_bbs::Union{Vector{Int}, Nothing}
 end
 
 """
@@ -249,7 +194,7 @@ Run iterated dominance frontier.
 The algorithm we have here essentially follows LLVM, which itself is a
 a cleaned up version of the linear-time algorithm described in [^SG95].
 
-The algorithm here, is quite straightforward. Suppose we have a CFG:
+The algorithm here is quite straightforward. Suppose we have a CFG:
 
     A -> B -> D -> F
      \\-> C ------>/
@@ -301,7 +246,8 @@ function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree:
         push!(worklist, node)
         while !isempty(worklist)
             active = pop!(worklist)
-            for succ in cfg.blocks[active].succs
+            succs = cfg.blocks[active].succs
+            for succ in succs
                 # Check whether the current root (`node`) dominates succ.
                 # We are guaranteed that `node` dominates `active`, since
                 # we've arrived at `active` by following dominator tree edges.
@@ -316,7 +262,7 @@ function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree:
                 # unless liveness said otherwise.
                 succ in processed && continue
                 push!(processed, succ)
-                if !(succ in liveness.live_in_bbs)
+                if liveness.live_in_bbs !== nothing && !(succ in liveness.live_in_bbs)
                     continue
                 end
                 push!(phiblocks, succ)
@@ -389,43 +335,58 @@ RPO traversal and in particular, any use of an SSA value must come after
 (by linear order) its definition.
 """
 function domsort_ssa!(ir::IRCode, domtree::DomTree)
-    # First compute the new order of basic blocks
+    # Mapping from new → old BB index
+    # An "old" index of 0 means that this was a BB inserted as part of a fixup (see below)
     result_order = Int[]
-    stack = Int[]
+
+    # Mapping from old → new BB index
     bb_rename = fill(-1, length(ir.cfg.blocks))
-    node = 1
-    ncritbreaks = 0
-    nnewfallthroughs = 0
-    while node !== -1
-        push!(result_order, node)
-        bb_rename[node] = length(result_order)
-        cs = domtree.nodes[node].children
-        terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)][:inst]
-        next_node = node + 1
-        node = -1
+
+    # The number of GotoNodes we need to insert to preserve control-flow after sorting
+    nfixupstmts = 0
+
+    # node queued up for scheduling (-1 === nothing)
+    node_to_schedule = 1
+    worklist = Int[]
+    while node_to_schedule !== -1
+        # First assign a new BB index to `node_to_schedule`
+        push!(result_order, node_to_schedule)
+        bb_rename[node_to_schedule] = length(result_order)
+        cs = domtree.nodes[node_to_schedule].children
+        terminator = ir[SSAValue(last(ir.cfg.blocks[node_to_schedule].stmts))][:stmt]
+        fallthrough = node_to_schedule + 1
+        node_to_schedule = -1
+
         # Adding the nodes in reverse sorted order attempts to retain
         # the original source order of the nodes as much as possible.
         # This is not required for correctness, but is easier on the humans
-        for child in Iterators.Reverse(cs)
-            if child == next_node
+        for node in Iterators.Reverse(cs)
+            if node == fallthrough
                 # Schedule the fall through node first,
                 # so we can retain the fall through
-                node = next_node
+                node_to_schedule = node
             else
-                push!(stack, child)
+                push!(worklist, node)
             end
         end
-        if node == -1 && !isempty(stack)
-            node = pop!(stack)
+        if node_to_schedule == -1 && !isempty(worklist)
+            node_to_schedule = pop!(worklist)
         end
-        if node != next_node && !isa(terminator, Union{GotoNode, ReturnNode})
+        # If a fallthrough successor is no longer the fallthrough after sorting, we need to
+        # add a GotoNode (and either extend or split the basic block as necessary)
+        if node_to_schedule != fallthrough && !isa(terminator, Union{GotoNode, ReturnNode})
             if isa(terminator, GotoIfNot)
                 # Need to break the critical edge
-                ncritbreaks += 1
+                push!(result_order, 0)
+            elseif isa(terminator, EnterNode) || isexpr(terminator, :leave)
+                # Cannot extend the BasicBlock with a goto, have to split it
                 push!(result_order, 0)
             else
-                nnewfallthroughs += 1
+                # No need for a new block, just extend
+                @assert !isterminator(terminator)
             end
+            # Reserve space for the fixup goto
+            nfixupstmts += 1
         end
     end
     new_bbs = Vector{BasicBlock}(undef, length(result_order))
@@ -435,7 +396,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
             nstmts += length(ir.cfg.blocks[i].stmts)
         end
     end
-    result = InstructionStream(nstmts + ncritbreaks + nnewfallthroughs)
+    result = InstructionStream(nstmts + nfixupstmts)
     inst_rename = Vector{SSAValue}(undef, length(ir.stmts) + length(ir.new_nodes))
     @inbounds for i = 1:length(ir.stmts)
         inst_rename[i] = SSAValue(-1)
@@ -444,14 +405,13 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         inst_rename[i + length(ir.stmts)] = SSAValue(i + length(result))
     end
     bb_start_off = 0
-    crit_edge_breaks_fixup = Tuple{Int, Int}[]
     for (new_bb, bb) in pairs(result_order)
         if bb == 0
             nidx = bb_start_off + 1
-            inst = result[nidx][:inst]
-            @assert isa(inst, GotoNode)
+            stmt = result[nidx][:stmt]
+            @assert isa(stmt, GotoNode)
             # N.B.: The .label has already been renamed when it was created.
-            new_bbs[new_bb] = BasicBlock(nidx:nidx, [new_bb - 1], [inst.label])
+            new_bbs[new_bb] = BasicBlock(nidx:nidx, [new_bb - 1], [stmt.label])
             bb_start_off += 1
             continue
         end
@@ -459,44 +419,45 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         inst_range = (bb_start_off+1):(bb_start_off+length(old_inst_range))
         for (nidx, idx) in zip(inst_range, old_inst_range)
             inst_rename[idx] = SSAValue(nidx)
-            @assert !isassigned(result.inst, nidx)
+            @assert !isassigned(result.stmt, nidx)
             node = result[nidx]
             node[] = ir.stmts[idx]
-            inst = node[:inst]
-            if isa(inst, PhiNode)
-                node[:inst] = rename_phinode_edges(inst, bb, result_order, bb_rename)
+            stmt = node[:stmt]
+            if isa(stmt, PhiNode)
+                node[:stmt] = rename_phinode_edges(stmt, bb, result_order, bb_rename)
             end
         end
         # Now fix up the terminator
-        terminator = result[inst_range[end]][:inst]
+        terminator = result[inst_range[end]][:stmt]
         if isa(terminator, GotoNode)
             # Convert to implicit fall through
             if bb_rename[terminator.label] == new_bb + 1
-                result[inst_range[end]][:inst] = nothing
+                result[inst_range[end]][:stmt] = nothing
             else
-                result[inst_range[end]][:inst] = GotoNode(bb_rename[terminator.label])
+                result[inst_range[end]][:stmt] = GotoNode(bb_rename[terminator.label])
             end
-        elseif isa(terminator, GotoIfNot)
-            # Check if we need to break the critical edge
+        elseif isa(terminator, GotoIfNot) || isa(terminator, EnterNode) || isexpr(terminator, :leave)
+            # Check if we need to break the critical edge or split the block
             if bb_rename[bb + 1] != new_bb + 1
                 @assert result_order[new_bb + 1] == 0
                 # Add an explicit goto node in the next basic block (we accounted for this above)
                 nidx = inst_range[end] + 1
                 node = result[nidx]
-                node[:inst], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
+                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, NoLineUpdate
             end
-            result[inst_range[end]][:inst] = GotoIfNot(terminator.cond, bb_rename[terminator.dest])
-        elseif !isa(terminator, ReturnNode)
-            if isa(terminator, Expr)
-                if terminator.head === :enter
-                    terminator.args[1] = bb_rename[terminator.args[1]]
-                end
+            if isa(terminator, GotoIfNot)
+                result[inst_range[end]][:stmt] = GotoIfNot(terminator.cond, bb_rename[terminator.dest])
+            elseif isa(terminator, EnterNode)
+                result[inst_range[end]][:stmt] = EnterNode(terminator, terminator.catch_dest == 0 ? 0 : bb_rename[terminator.catch_dest])
+            else
+                @assert isexpr(terminator, :leave)
             end
+        elseif !isa(terminator, ReturnNode)
             if bb_rename[bb + 1] != new_bb + 1
                 # Add an explicit goto node
                 nidx = inst_range[end] + 1
                 node = result[nidx]
-                node[:inst], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
+                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, NoLineUpdate
                 inst_range = first(inst_range):(last(inst_range) + 1)
             end
         end
@@ -504,12 +465,12 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         local new_preds, new_succs
         let bb = bb, bb_rename = bb_rename, result_order = result_order
             new_preds = Int[bb for bb in (rename_incoming_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].preds) if bb != -1]
-            new_succs = Int[             rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs]
+            new_succs = Int[              rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs]
         end
         new_bbs[new_bb] = BasicBlock(inst_range, new_preds, new_succs)
     end
     for i in 1:length(result)
-        result[i][:inst] = renumber_ssa!(result[i][:inst], inst_rename, true)
+        result[i][:stmt] = renumber_ssa!(result[i][:stmt], inst_rename, true)
     end
     cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]])
     new_new_nodes = NewNodeStream(length(ir.new_nodes))
@@ -519,12 +480,13 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         new_new_nodes.info[i] = new_new_info
         new_node = new_new_nodes.stmts[i]
         new_node[] = ir.new_nodes.stmts[i]
-        new_node_inst = new_node[:inst]
+        new_node_inst = new_node[:stmt]
         if isa(new_node_inst, PhiNode)
             new_node_inst = rename_phinode_edges(new_node_inst, block_for_inst(ir.cfg, new_info.pos), result_order, bb_rename)
         end
-        new_node[:inst] = renumber_ssa!(new_node_inst, inst_rename, true)
+        new_node[:stmt] = renumber_ssa!(new_node_inst, inst_rename, true)
     end
+    ir.debuginfo.codelocs = result.line
     new_ir = IRCode(ir, result, cfg, new_new_nodes)
     return new_ir
 end
@@ -569,22 +531,6 @@ function compute_live_ins(cfg::CFG, defs::Vector{Int}, uses::Vector{Int})
     BlockLiveness(bb_defs, bb_uses)
 end
 
-function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode,
-    sptypes::Vector{VarState}, slottypes::Vector{Any}, nstmts::Int, 𝕃ₒ::AbstractLattice)
-    new_typ = Union{}
-    for i = 1:length(node.values)
-        if isa(node, PhiNode) && !isassigned(node.values, i)
-            continue
-        end
-        typ = typ_for_val(node.values[i], ci, sptypes, -1, slottypes)
-        while isa(typ, DelayedTyp)
-            typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)]
-        end
-        new_typ = tmerge(𝕃ₒ, new_typ, typ)
-    end
-    return new_typ
-end
-
 struct TryCatchRegion
     enter_block::Int
     leave_block::Int
@@ -592,7 +538,7 @@ end
 struct NewSlotPhi{Phi}
     ssaval::NewSSAValue
     node::Phi
-    undef_ssaval::Union{NewSSAValue, Nothing}
+    undef_ssaval::Union{NewSSAValue, Bool}
     undef_node::Union{Phi, Nothing}
 end
 
@@ -603,45 +549,38 @@ struct NewPhiCNode2
     insert::NewSlotPhi{PhiCNode}
 end
 
-function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
-                        defuses::Vector{SlotInfo}, slottypes::Vector{Any},
+function construct_ssa!(ci::CodeInfo, ir::IRCode, sv::OptimizationState,
+                        domtree::DomTree, defuses::Vector{SlotInfo},
                         𝕃ₒ::AbstractLattice)
-    code = ir.stmts.inst
+    code = ir.stmts.stmt
     cfg = ir.cfg
     catch_entry_blocks = TryCatchRegion[]
     for idx in 1:length(code)
         stmt = code[idx]
-        if isexpr(stmt, :enter)
+        if isa(stmt, EnterNode)
             push!(catch_entry_blocks, TryCatchRegion(
                 block_for_inst(cfg, idx),
-                block_for_inst(cfg, stmt.args[1]::Int)))
+                block_for_inst(cfg, stmt.catch_dest)))
         end
     end
 
-    exc_handlers = IdDict{Int, TryCatchRegion}()
-    # Record the correct exception handler for all cricitcal sections
-    for catch_entry_block in catch_entry_blocks
-        (; enter_block, leave_block) = catch_entry_block
-        exc_handlers[enter_block+1] = catch_entry_block
-        # TODO: Cut off here if the terminator is a leave corresponding to this enter
-        for block in dominated(domtree, enter_block+1)
-            exc_handlers[block] = catch_entry_block
-        end
-    end
+    # Record the correct exception handler for all critical sections
+    handler_info = compute_trycatch(code)
 
     phi_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
+    live_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
     new_phi_nodes = Vector{NewPhiNode2}[NewPhiNode2[] for _ = 1:length(cfg.blocks)]
     new_phic_nodes = IdDict{Int, Vector{NewPhiCNode2}}()
     for (; leave_block) in catch_entry_blocks
         new_phic_nodes[leave_block] = NewPhiCNode2[]
     end
-    @timeit "idf" for (idx, slot) in Iterators.enumerate(defuses)
+    @zone "CC: IDF" for (idx, slot) in Iterators.enumerate(defuses)
         # No uses => no need for phi nodes
         isempty(slot.uses) && continue
         # TODO: Restore this optimization
         if false # length(slot.defs) == 1 && slot.any_newvar
             if slot.defs[] == 0
-                typ = slottypes[idx]
+                typ = sv.slottypes[idx]
                 ssaval = Argument(idx)
                 fixup_uses!(ir, ci, code, slot.uses, idx, ssaval)
             elseif isa(code[slot.defs[]], NewvarNode)
@@ -654,26 +593,37 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                 fixup_uses!(ir, ci, code, slot.uses, idx, nothing)
             else
                 val = code[slot.defs[]].args[2]
-                typ = typ_for_val(val, ci, ir.sptypes, slot.defs[], slottypes)
+                typ = typ_for_val(val, ci, ir, slot.defs[], sv.slottypes)
                 ssaval = make_ssa!(ci, code, slot.defs[], typ)
                 fixup_uses!(ir, ci, code, slot.uses, idx, ssaval)
             end
             continue
         end
-        @timeit "liveness" (live = compute_live_ins(cfg, slot))
+
+        @zone "CC: LIVENESS" (live = compute_live_ins(cfg, slot))
         for li in live.live_in_bbs
+            push!(live_slots[li], idx)
             cidx = findfirst(x::TryCatchRegion->x.leave_block==li, catch_entry_blocks)
             if cidx !== nothing
                 # The slot is live-in into this block. We need to
                 # Create a PhiC node in the catch entry block and
                 # an upsilon node in the corresponding enter block
+                varstate = sv.bb_vartables[li]
+                if varstate === nothing
+                    continue
+                end
                 node = PhiCNode(Any[])
                 insertpoint = first_insert_for_bb(code, cfg, li)
+                vt = varstate[idx]
                 phic_ssa = NewSSAValue(
                     insert_node!(ir, insertpoint,
-                        NewInstruction(node, Union{})).id - length(ir.stmts))
+                        NewInstruction(node, vt.typ)).id - length(ir.stmts))
                 undef_node = undef_ssaval = nothing
-                if (ci.slotflags[idx] & SLOT_USEDUNDEF) != 0
+                if vt.typ === Union{}
+                    undef_ssaval = false
+                elseif !vt.undef
+                    undef_ssaval = true
+                else
                     undef_node = PhiCNode(Any[])
                     undef_ssaval = NewSSAValue(insert_node!(ir,
                         insertpoint, NewInstruction(undef_node, Bool)).id - length(ir.stmts))
@@ -690,10 +640,17 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         for block in phiblocks
             push!(phi_slots[block], idx)
             node = PhiNode()
+            varstate = sv.bb_vartables[block]
+            @assert varstate !== nothing
+            vt = varstate[idx]
             ssaval = NewSSAValue(insert_node!(ir,
-                first_insert_for_bb(code, cfg, block), NewInstruction(node, Union{})).id - length(ir.stmts))
+                first_insert_for_bb(code, cfg, block), NewInstruction(node, vt.typ)).id - length(ir.stmts))
             undef_node = undef_ssaval = nothing
-            if (ci.slotflags[idx] & SLOT_USEDUNDEF) != 0
+            if vt.typ === Union{}
+                undef_ssaval = false
+            elseif !vt.undef
+                undef_ssaval = true
+            else
                 undef_node = PhiNode()
                 undef_ssaval = NewSSAValue(insert_node!(ir,
                     first_insert_for_bb(code, cfg, block), NewInstruction(undef_node, Bool)).id - length(ir.stmts))
@@ -713,10 +670,12 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
     ]
     worklist = Tuple{Int, Int, Vector{Pair{Any, Any}}}[(1, 0, initial_incoming_vals)]
     visited = BitSet()
-    type_refine_phi = BitSet()
     new_nodes = ir.new_nodes
-    @timeit "SSA Rename" while !isempty(worklist)
-        (item::Int, pred, incoming_vals) = pop!(worklist)
+    @zone "CC: SSA_RENAME" while !isempty(worklist)
+        (item, pred, incoming_vals) = pop!(worklist)
+        if sv.bb_vartables[item] === nothing
+            continue
+        end
         # Rename existing phi nodes first, because their uses occur on the edge
         # TODO: This isn't necessary if inlining stops replacing arguments by slots.
         for idx in cfg.blocks[item].stmts
@@ -748,33 +707,38 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             else
                 push!(node.values, incoming_val)
             end
-            outgoing_def = true
-            if (ci.slotflags[slot] & SLOT_USEDUNDEF) != 0
+            if undef_node !== nothing
                 push!(undef_node.edges, pred)
                 push!(undef_node.values, incoming_def)
-                outgoing_def = undef_ssaval
-            end
-            # TODO: Remove the next line, it shouldn't be necessary
-            push!(type_refine_phi, ssaval.id)
-            if isa(incoming_val, NewSSAValue)
-                push!(type_refine_phi, ssaval.id)
             end
-            typ = incoming_val === UNDEF_TOKEN ? Union{} : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes)
-            old_entry = new_nodes.stmts[ssaval.id]
-            if isa(typ, DelayedTyp)
-                push!(type_refine_phi, ssaval.id)
-            end
-            new_typ = isa(typ, DelayedTyp) ? Union{} : tmerge(𝕃ₒ, old_entry[:type], typ)
-            old_entry[:type] = new_typ
-            old_entry[:inst] = node
-            incoming_vals[slot] = Pair{Any, Any}(ssaval, outgoing_def)
+
+            incoming_vals[slot] = Pair{Any, Any}(ssaval, undef_ssaval)
         end
         (item in visited) && continue
         # Record phi_C nodes if necessary
         if haskey(new_phic_nodes, item)
             for (; slot, insert) in new_phic_nodes[item]
                 (; ssaval, undef_ssaval) = insert
-                incoming_vals[slot_id(slot)] = Pair{Any, Any}(ssaval, undef_ssaval === nothing ? true : undef_ssaval)
+                incoming_vals[slot_id(slot)] = Pair{Any, Any}(ssaval, undef_ssaval)
+            end
+        end
+        # Record Pi nodes if necessary
+        has_pinode = fill(false, length(sv.slottypes))
+        for slot in live_slots[item]
+            (ival, idef) = incoming_vals[slot]
+            (ival === SSAValue(-1)) && continue
+            (ival === SSAValue(-2)) && continue
+            (ival === UNDEF_TOKEN) && continue
+
+            varstate = sv.bb_vartables[item]
+            @assert varstate !== nothing
+            typ = varstate[slot].typ
+            if !⊑(𝕃ₒ, sv.slottypes[slot], typ)
+                node = PiNode(ival, typ)
+                ival = NewSSAValue(insert_node!(ir,
+                    first_insert_for_bb(code, cfg, item), NewInstruction(node, typ)).id - length(ir.stmts))
+                incoming_vals[slot] = Pair{Any, Any}(ival, idef)
+                has_pinode[slot] = true
             end
         end
         # Record initial upsilon nodes if necessary
@@ -785,7 +749,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                 (ival, idef) = incoming_vals[slot_id(slot)]
                 ivalundef = ival === UNDEF_TOKEN
                 Υ = NewInstruction(ivalundef ? UpsilonNode() : UpsilonNode(ival),
-                                   ivalundef ? Union{} : typ_for_val(ival, ci, ir.sptypes, -1, slottypes))
+                                   ivalundef ? Union{} : typ_for_val(ival, ci, ir, -1, sv.slottypes))
                 insertpos = first_insert_for_bb(code, cfg, item)
                 # insert `UpsilonNode` immediately before the `:enter` expression
                 Υssa = insert_node!(ir, insertpos, Υ)
@@ -803,6 +767,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             (isa(stmt, PhiNode) || (isexpr(stmt, :(=)) && isa(stmt.args[2], PhiNode))) && continue
             if isa(stmt, NewvarNode)
                 incoming_vals[slot_id(stmt.slot)] = Pair{Any, Any}(UNDEF_TOKEN, false)
+                has_pinode[slot_id(stmt.slot)] = false
                 code[idx] = nothing
             else
                 stmt = rename_uses!(ir, ci, idx, stmt, incoming_vals)
@@ -817,7 +782,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                     if isa(arg1, SlotNumber)
                         id = slot_id(arg1)
                         val = stmt.args[2]
-                        typ = typ_for_val(val, ci, ir.sptypes, idx, slottypes)
+                        typ = typ_for_val(val, ci, ir, idx, sv.slottypes)
                         # Having UNDEF_TOKEN appear on the RHS is possible if we're on a dead branch.
                         # Do something reasonable here, by marking the LHS as undef as well.
                         if val !== UNDEF_TOKEN
@@ -829,10 +794,14 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                             thisdef = false
                         end
                         incoming_vals[id] = Pair{Any, Any}(thisval, thisdef)
-                        enter_block = item
-                        while haskey(exc_handlers, enter_block)
-                            (; enter_block, leave_block) = exc_handlers[enter_block]
-                            cidx = findfirst((; slot)::NewPhiCNode2->slot_id(slot)==id, new_phic_nodes[leave_block])
+                        has_pinode[id] = false
+                        enter_idx = idx
+                        while (handler = gethandler(handler_info, enter_idx)) !== nothing
+                            enter_idx = get_enter_idx(handler)
+                            enter_node = code[enter_idx]::EnterNode
+                            leave_block = block_for_inst(cfg, enter_node.catch_dest)
+                            cidx = findfirst((; slot)::NewPhiCNode2->slot_id(slot)==id,
+                                new_phic_nodes[leave_block])
                             if cidx !== nothing
                                 node = thisdef ? UpsilonNode(thisval) : UpsilonNode()
                                 if incoming_vals[id] === UNDEF_TOKEN
@@ -852,6 +821,14 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                 end
             end
         end
+        # Unwrap any PiNodes before continuing, since they weren't considered during our
+        # dominance frontier calculation and so have to be used locally in each BB.
+        for (i, (ival, idef)) in enumerate(incoming_vals)
+            if has_pinode[i]
+                stmt = ir[new_to_regular(ival::NewSSAValue, length(ir.stmts))][:stmt]
+                incoming_vals[i] = Pair{Any, Any}(stmt.val, idef)
+            end
+        end
         for succ in cfg.blocks[item].succs
             push!(worklist, (succ, item, copy(incoming_vals)))
         end
@@ -871,7 +848,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
     nstmts = length(ir.stmts)
     new_code = Vector{Any}(undef, nstmts)
     ssavalmap = fill(SSAValue(-1), length(ssavaluetypes) + 1)
-    result_types = Any[Any for _ in 1:nstmts]
     # Detect statement positions for assignments and construct array
     for (bb, idx) in bbidxiter(ir)
         stmt = code[idx]
@@ -886,15 +862,15 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             else
                 new_code[idx] = GotoIfNot(stmt.cond, new_dest)
             end
-        elseif isexpr(stmt, :enter)
-            new_code[idx] = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
+        elseif isa(stmt, EnterNode)
+            except_bb = stmt.catch_dest == 0 ? 0 : block_for_inst(cfg, stmt.catch_dest)
+            new_code[idx] = EnterNode(stmt, except_bb)
             ssavalmap[idx] = SSAValue(idx) # Slot to store token for pop_exception
         elseif isexpr(stmt, :leave) || isexpr(stmt, :(=)) || isa(stmt, ReturnNode) ||
             isexpr(stmt, :meta) || isa(stmt, NewvarNode)
             new_code[idx] = stmt
         else
             ssavalmap[idx] = SSAValue(idx)
-            result_types[idx] = ssavaluetypes[idx]
             if isa(stmt, PhiNode)
                 edges = Int32[edge == 0 ? 0 : block_for_inst(cfg, Int(edge)) for edge in stmt.edges]
                 new_code[idx] = PhiNode(edges, stmt.values)
@@ -903,65 +879,18 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             end
         end
     end
-    for (_, nodes) in new_phic_nodes
-        for (; insert) in nodes
-            (; node, ssaval) = insert
-            new_typ = Union{}
-            # TODO: This could just be the ones that depend on other phis
-            push!(type_refine_phi, ssaval.id)
-            new_idx = ssaval.id
-            node = new_nodes.stmts[new_idx]
-            phic_values = (node[:inst]::PhiCNode).values
-            for i = 1:length(phic_values)
-                orig_typ = typ = typ_for_val(phic_values[i], ci, ir.sptypes, -1, slottypes)
-                while isa(typ, DelayedTyp)
-                    typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)]
-                end
-                new_typ = tmerge(𝕃ₒ, new_typ, typ)
-            end
-            node[:type] = new_typ
-        end
-    end
-    # This is a bit awkward, because it basically duplicates what type
-    # inference does. Ideally, we'd just use this representation earlier
-    # to make sure phi nodes have accurate types
-    changed = true
-    while changed
-        changed = false
-        for new_idx in type_refine_phi
-            node = new_nodes.stmts[new_idx]
-            new_typ = recompute_type(node[:inst]::Union{PhiNode,PhiCNode}, ci, ir, ir.sptypes, slottypes, nstmts, 𝕃ₒ)
-            if !⊑(𝕃ₒ, node[:type], new_typ) || !⊑(𝕃ₒ, new_typ, node[:type])
-                node[:type] = new_typ
-                changed = true
-            end
-        end
-    end
-    for i in 1:length(result_types)
-        rt_i = result_types[i]
-        if rt_i isa DelayedTyp
-            result_types[i] = types(ir)[new_to_regular(rt_i.phi::NewSSAValue, nstmts)]
-        end
-    end
-    for i = 1:length(new_nodes)
-        local node = new_nodes.stmts[i]
-        local typ = node[:type]
-        if isa(typ, DelayedTyp)
-            node[:type] = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)]
-        end
-    end
     # Renumber SSA values
     @assert isempty(ir.stmts.type)
     resize!(ir.stmts.type, nstmts)
     for i in 1:nstmts
         local node = ir.stmts[i]
-        node[:inst] = new_to_regular(renumber_ssa!(new_code[i], ssavalmap), nstmts)
-        node[:type] = result_types[i]
+        node[:stmt] = new_to_regular(renumber_ssa!(new_code[i], ssavalmap), nstmts)
+        node[:type] = ssavaluetypes[i]
     end
     for i = 1:length(new_nodes)
         local node = new_nodes.stmts[i]
-        node[:inst] = new_to_regular(renumber_ssa!(node[:inst], ssavalmap), nstmts)
+        node[:stmt] = new_to_regular(renumber_ssa!(node[:stmt], ssavalmap), nstmts)
     end
-    @timeit "domsort" ir = domsort_ssa!(ir, domtree)
+    @zone "CC: DOMSORT" ir = domsort_ssa!(ir, domtree)
     return ir
 end
diff --git a/Compiler/src/ssair/tarjan.jl b/Compiler/src/ssair/tarjan.jl
new file mode 100644
index 0000000000000..e73039868c367
--- /dev/null
+++ b/Compiler/src/ssair/tarjan.jl
@@ -0,0 +1,313 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using .Compiler: DomTree, CFG, BasicBlock, StmtRange, dominates
+
+struct SCCStackItem
+    v::Int32
+    # which child of `v` to scan
+    child::Int32
+    # the location of `parent` in the stack
+    parent::Int32
+    # the index in the (pre-order traversal of the) DFS tree
+    preorder::Int32
+    # the minimum node (by pre-order index) reachable from any node in the DFS sub-tree rooted at `v`
+    minpreorder::Int32
+    # whether this node is reachable from BasicBlock #1
+    live::Bool
+end
+
+function SCCStackItem(item::SCCStackItem; child=item.child,
+                      minpreorder=item.minpreorder, live=item.live)
+    return SCCStackItem(
+        item.v,        # v
+        child,         # child
+        item.parent,   # parent
+        item.preorder, # preorder
+        minpreorder,   # minpreorder
+        live,          # live
+    )
+end
+
+struct CFGReachability
+    irreducible::BitVector # BBNumber -> Bool
+    scc::Vector{Int}       # BBNumber -> SCCNumber
+    domtree::DomTree
+
+    _worklist::Vector{Int}       # for node removal
+    _stack::Vector{SCCStackItem} # for Tarjan's SCC algorithm
+end
+
+function CFGReachability(cfg::CFG, domtree::DomTree)
+    n_blocks = length(cfg.blocks)
+    reachability = CFGReachability(
+        BitVector(undef, n_blocks), # irreducible
+        zeros(Int, n_blocks),       # scc
+        domtree,                    # domtree
+        Int[],                      # _worklist
+        SCCStackItem[],             # _stack
+    )
+    tarjan!(reachability, cfg;
+        # reducible back-edges don't need to be considered for reachability
+        filter = (from::Int,to::Int)->!dominates(domtree, to, from)
+    )
+    return reachability
+end
+
+bb_unreachable(reach::CFGReachability, bb::Int) = reach.scc[bb] == 0
+
+bb_in_irreducible_loop(reach::CFGReachability, bb::Int) = reach.irreducible[bb]
+
+# Returns `true` if a node is 'rooted' as reachable, i.e. it is has an incoming
+# edge from a resolved SCC other than its own (or it is BasicBlock #1).
+#
+# `tarjan!` takes the transitive closure of this relation in order to detect
+# which BasicBlocks are unreachable.
+function _bb_externally_reachable(reach::CFGReachability, cfg::CFG, bb::Int; filter)
+    (; scc) = reach
+    bb == 1 && return true
+    for pred in cfg.blocks[bb].preds
+        scc[pred] <= 0 && continue
+        !filter(pred, bb) && continue
+        @assert scc[pred] != scc[bb]
+        return true
+    end
+    return false
+end
+
+"""
+    tarjan!(reach::CFGReachability, cfg::CFG, root::Int=1)
+
+Tarjan's strongly-connected components algorithm. Traverses the CFG starting at `root`, ignoring
+nodes with resolved SCC's and updating outputs for all un-resolved nodes.
+
+Returns true if any node was discovered to be unreachable, false otherwise.
+
+Outputs:
+  - `reach.scc`: strongly-connected components, ignoring backedges to (natural) loops
+  - `reach.irreducible`: true iff a BasicBlock is part of a (non-trivial) SCC / irreducible loop
+  - `reach._worklist`: if performing an incremental update (`root != 1`), any traversed nodes that
+    are unreachable from BasicBlock #1 are enqueued to this worklist
+"""
+function tarjan!(reach::CFGReachability, cfg::CFG; root::Int=1,
+    filter = (from::Int,to::Int)->true,
+)
+    (; scc, irreducible) = reach
+    scc[root] != 0 && return scc
+    live = _bb_externally_reachable(reach, cfg, root; filter)
+
+    # the original algorithm has a separate stack and worklist (unrelated to `reach._worklist`)
+    # here we use a single combined stack for improved memory/cache efficiency
+    stack = reach._stack
+    push!(stack, SCCStackItem(
+        root, # v
+        1,    # child
+        0,    # parent
+        1,    # preorder
+        1,    # minpreorder
+        live, # live
+    ))
+    scc[root] = -1
+    cursor = length(stack)
+
+    # worklist length before any new unreachable nodes are added
+    worklist_len = length(reach._worklist)
+
+    # last (pre-order) DFS label assigned to a node
+    preorder_id = 1
+    while true
+        (; v, child, minpreorder, live) = item = stack[cursor]
+
+        bb = cfg.blocks[v]
+        if child <= length(bb.succs) # queue next child
+            stack[cursor] = item = SCCStackItem(item; child=child+1)
+            succ = bb.succs[child]
+
+            # ignore any edges that don't pass the filter
+            !filter(convert(Int, v), succ) && continue
+
+            if scc[succ] < 0
+                # next child is already in DFS tree
+                child_preorder = stack[-scc[succ]].preorder
+
+                # only need to update `minpreorder` for `v`
+                stack[cursor] = item = SCCStackItem(item;
+                                                    minpreorder=min(minpreorder, child_preorder))
+            elseif scc[succ] == 0
+                # next child is a new element in DFS tree
+                preorder_id += 1
+                live = live || _bb_externally_reachable(reach, cfg, succ; filter)
+                push!(stack, SCCStackItem(
+                    succ,        # v
+                    1,           # child
+                    cursor,      # parent (index in stack)
+                    preorder_id, # preorder
+                    preorder_id, # minpreorder
+                    live,        # live
+                ))
+                scc[succ] = -length(stack)
+                cursor = length(stack)
+            else end # next child is a resolved SCC (do nothing)
+        else # v's children are processed, finalize v
+            if item.minpreorder == item.preorder
+                has_one_element = stack[end].v == v
+                while true
+                    item = pop!(stack)
+                    if live
+                        scc[item.v] = v
+                        scan_subgraph!(reach, cfg, convert(Int, item.v),
+                            #= filter =# (pred,x)->(filter(pred, x) && scc[x] > typemax(Int)÷2),
+                            #= action =# (x)->(scc[x] -= typemax(Int)÷2;),
+                        )
+                    else # this offset marks a node as 'maybe-dead'
+                        scc[item.v] = v + typemax(Int)÷2
+                        push!(reach._worklist, item.v)
+                    end
+                    irreducible[item.v] = !has_one_element
+                    (item.v == v) && break
+                end
+                item.parent == 0 && break # all done
+            elseif live
+                stack[item.parent] = SCCStackItem(stack[item.parent]; live=true)
+            end
+
+            # update `minpreorder` for parent
+            parent = stack[item.parent]
+            minpreorder = min(parent.minpreorder, item.minpreorder)
+            stack[item.parent] = SCCStackItem(parent; minpreorder)
+
+            cursor = item.parent
+        end
+    end
+
+    worklist = reach._worklist
+
+    # filter the worklist, leaving any nodes not proven to be reachable from BB #1
+    n_popped = 0
+    for i = (worklist_len + 1):length(worklist)
+        @assert worklist[i] != 1
+        @assert scc[worklist[i]] > 0
+        if scc[worklist[i]] > typemax(Int)÷2
+            # node is unreachable, enqueue it
+            scc[worklist[i]] = 0
+            worklist[i - n_popped] = worklist[i]
+        else
+            n_popped += 1
+        end
+    end
+    resize!(worklist, length(worklist) - n_popped)
+
+    return length(worklist) > worklist_len # if true, a (newly) unreachable node was enqueued
+end
+
+"""
+Scan the subtree rooted at `root`, excluding `root` itself
+
+Note: This function will not detect cycles for you. The `filter` provided must
+      protect against infinite cycle traversal.
+"""
+function scan_subgraph!(reach::CFGReachability, cfg::CFG, root::Int, filter, action)
+    worklist = reach._worklist
+    start_len = length(worklist)
+
+    push!(worklist, root)
+    while length(worklist) > start_len
+        v = pop!(worklist)
+        for succ in cfg.blocks[v].succs
+            !filter(v, succ) && continue
+            action(succ)
+            push!(worklist, succ)
+        end
+    end
+end
+
+function enqueue_if_unreachable!(reach::CFGReachability, cfg::CFG, bb::Int)
+    (; domtree, scc) = reach
+    @assert scc[bb] != 0
+
+    bb == 1 && return false
+    if bb_in_irreducible_loop(reach, bb)
+        # irreducible CFG
+        # this requires a full scan of the irreducible loop
+
+        # any reducible back-edges do not need to be considered as part of reachability
+        # (very important optimization, since it means reducible CFGs will have no SCCs)
+        filter = (from::Int, to::Int)->!dominates(domtree, to, from)
+
+        scc′ = scc[bb]
+        scc[bb] = 0
+        scan_subgraph!(reach, cfg, bb, # set this SCC to 0
+            #= filter =# (pred,x)->(filter(pred, x) && scc[x] == scc′),
+            #= action =# (x)->(scc[x] = 0;),
+        )
+
+        # re-compute the SCC's for this portion of the CFG, adding any freshly
+        # unreachable nodes to `reach._worklist`
+        return tarjan!(reach, cfg; root=bb, filter)
+    else
+        # target is a reducible CFG node
+        # this node lives iff it still has an incoming forward edge
+        for pred in cfg.blocks[bb].preds
+            # virtual edge does not count - if the enter is dead, that edge is
+            # not taken.
+            pred == 0 && continue
+            !dominates(domtree, bb, pred) && return false # forward-edge
+        end
+        scc[bb] = 0
+        push!(reach._worklist, bb)
+        return true
+    end
+end
+
+function kill_cfg_edge!(cfg::CFG, from::Int, to::Int)
+    preds, succs = cfg.blocks[to].preds, cfg.blocks[from].succs
+    deleteat!(preds, findfirst(x::Int->x==from, preds)::Int)
+    deleteat!(succs, findfirst(x::Int->x==to, succs)::Int)
+    return nothing
+end
+
+"""
+Remove from `cfg` and `reach` the edge (from → to), as well as any blocks/edges
+this causes to become unreachable.
+
+Calls:
+  - `block_callback` for every unreachable block.
+  - `edge_callback` for every unreachable edge into a reachable block (may also
+     be called for blocks which are later discovered to be unreachable).
+"""
+function kill_edge!(reach::CFGReachability, cfg::CFG, from::Int, to::Int,
+                    edge_callback=nothing, block_callback=nothing)
+    (reach.scc[from] == 0) && return # source is already unreachable
+    @assert reach.scc[to] != 0
+
+    # delete (from → to) edge
+    kill_cfg_edge!(cfg, from, to)
+
+    # check for unreachable target
+    enqueued = enqueue_if_unreachable!(reach, cfg, to)
+    if !enqueued && edge_callback !== nothing
+        edge_callback(from, to)
+    end
+    while !isempty(reach._worklist)
+        node = convert(Int, pop!(reach._worklist))
+
+        # already marked unreachable, just need to notify
+        @assert reach.scc[node] == 0 && node != 1
+        if block_callback !== nothing
+            block_callback(node)
+        end
+
+        for succ in cfg.blocks[node].succs
+            # delete (node → succ) edge
+            preds = cfg.blocks[succ].preds
+            deleteat!(preds, findfirst(x::Int->x==node, preds)::Int)
+
+            # check for newly unreachable target
+            reach.scc[succ] == 0 && continue
+            enqueued = enqueue_if_unreachable!(reach, cfg, succ)
+            if !enqueued && edge_callback !== nothing
+                edge_callback(node, succ)
+            end
+        end
+        empty!(cfg.blocks[node].succs)
+    end
+end
diff --git a/base/compiler/ssair/verify.jl b/Compiler/src/ssair/verify.jl
similarity index 53%
rename from base/compiler/ssair/verify.jl
rename to Compiler/src/ssair/verify.jl
index 39f56a47e1908..e190ae7a8438f 100644
--- a/base/compiler/ssair/verify.jl
+++ b/Compiler/src/ssair/verify.jl
@@ -1,14 +1,19 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+irshow_was_loaded() = invokelatest(isdefinedglobal, Compiler.IRShow, :debuginfo_firstline)
 function maybe_show_ir(ir::IRCode)
-    if isdefined(Core, :Main)
-        Core.Main.Base.display(ir)
+    if irshow_was_loaded()
+        # ensure we use I/O that does not yield, as this gets called during compilation
+        invokelatest(Core.Main.Base.show, Core.stdout, "text/plain", ir)
+    else
+        Core.show(ir)
     end
+    Core.println(Core.stdout)
 end
 
 if !isdefined(@__MODULE__, Symbol("@verify_error"))
     macro verify_error(arg)
-        arg isa String && return esc(:(print && println(stderr, $arg)))
+        arg isa String && return esc(:(print && println($(GlobalRef(Core, :stderr)), $arg)))
         isexpr(arg, :string) || error("verify_error macro expected a string expression")
         pushfirst!(arg.args, GlobalRef(Core, :stderr))
         pushfirst!(arg.args, :println)
@@ -20,10 +25,17 @@ if !isdefined(@__MODULE__, Symbol("@verify_error"))
     end
 end
 
-is_value_pos_expr_head(head::Symbol) = head === :boundscheck
-function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool)
+is_toplevel_expr_head(head::Symbol) = head === :method || head === :thunk
+is_value_pos_expr_head(head::Symbol) = head === :static_parameter
+function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int,
+    allow_frontend_forms::Bool, @nospecialize(raise_error))
     if isa(op, SSAValue)
+        op.id > 0 || @verify_error "Def ($(op.id)) is invalid in final IR"
         if op.id > length(ir.stmts)
+            if op.id - length(ir.stmts) > length(ir.new_nodes.info)
+                @verify_error "Def ($(op.id)) points to non-existent new node"
+                raise_error()
+            end
             def_bb = block_for_inst(ir.cfg, ir.new_nodes.info[op.id - length(ir.stmts)].pos)
         else
             def_bb = block_for_inst(ir.cfg, op.id)
@@ -34,45 +46,53 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
             else
                 if op.id >= use_idx
                     @verify_error "Def ($(op.id)) does not dominate use ($(use_idx)) in same BB"
-                    error("")
+                    raise_error()
                 end
             end
         else
             if !dominates(domtree, def_bb, use_bb) && !(bb_unreachable(domtree, def_bb) && bb_unreachable(domtree, use_bb))
                 # At the moment, we allow GC preserve tokens outside the standard domination notion
                 @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value %$(op.id) at %$(printed_use_idx))"
-                error("")
+                raise_error()
             end
         end
 
         use_inst = ir[op]
-        if isa(use_inst[:inst], Union{GotoIfNot, GotoNode, ReturnNode})
+        if isa(use_inst[:stmt], Union{GotoIfNot, GotoNode, ReturnNode}) && !(isa(use_inst[:stmt], ReturnNode) && !isdefined(use_inst[:stmt], :val))
+            # Allow uses of `unreachable`, which may have been inserted when
+            # an earlier block got deleted, but for some reason we didn't figure
+            # out yet that this entire block is dead also.
             @verify_error "At statement %$use_idx: Invalid use of value statement or terminator %$(op.id)"
-            error("")
+            raise_error()
         end
     elseif isa(op, GlobalRef)
-        if !isdefined(op.mod, op.name) || !isconst(op.mod, op.name)
-            @verify_error "Unbound GlobalRef not allowed in value position"
-            error("")
+        if op.mod !== Core && op.mod !== Base
+            (valid_worlds, alldef) = scan_leaf_partitions(nothing, op, WorldWithRange(min_world(ir.valid_worlds), ir.valid_worlds)) do _, _, bpart
+                is_defined_const_binding(binding_kind(bpart))
+            end
+            if !alldef || max_world(valid_worlds) < max_world(ir.valid_worlds) || min_world(valid_worlds) > min_world(ir.valid_worlds)
+                @verify_error "Unbound or partitioned GlobalRef not allowed in value position"
+                raise_error()
+            end
         end
     elseif isa(op, Expr)
         # Only Expr(:boundscheck) is allowed in value position
-        if isforeigncall && arg_idx == 1 && op.head === :call
-            # Allow a tuple in symbol position for foreigncall - this isn't actually
-            # a real call - it's interpreted in global scope by codegen. However,
-            # we do need to keep this a real use, because it could also be a pointer.
+        if isforeigncall && arg_idx == 1 && op.head === :tuple
+            # Allow a tuple literal in symbol position for foreigncall - this
+            # is syntax for a literal value or globalref - it is interpreted in
+            # global scope by codegen.
         elseif !is_value_pos_expr_head(op.head)
             if !allow_frontend_forms || op.head !== :opaque_closure_method
                 @verify_error "Expr not allowed in value position"
-                error("")
+                raise_error()
             end
         end
     elseif isa(op, Union{OldSSAValue, NewSSAValue})
-        @verify_error "Left over SSA marker"
-        error("")
-    elseif isa(op, UnoptSlot)
+        @verify_error "At statement %$use_idx: Left over SSA marker ($op)"
+        raise_error()
+    elseif isa(op, SlotNumber)
         @verify_error "Left over slot detected in converted IR"
-        error("")
+        raise_error()
     end
 end
 
@@ -86,9 +106,53 @@ function count_int(val::Int, arr::Vector{Int})
     n
 end
 
+_debuginfo_firstline(debuginfo::Union{DebugInfo,DebugInfoStream}) = IRShow.debuginfo_firstline(debuginfo)
 function verify_ir(ir::IRCode, print::Bool=true,
                    allow_frontend_forms::Bool=false,
-                   𝕃ₒ::AbstractLattice = SimpleInferenceLattice.instance)
+                   𝕃ₒ::AbstractLattice = SimpleInferenceLattice.instance,
+                   mi::Union{Nothing,MethodInstance}=nothing)
+    function raise_error()
+        error_args = Any["IR verification failed."]
+        if irshow_was_loaded()
+            # ensure we use I/O that does not yield, as this gets called during compilation
+            firstline = invokelatest(_debuginfo_firstline, ir.debuginfo)
+        else
+            firstline = nothing
+        end
+        if firstline !== nothing
+            file, line = firstline
+            push!(error_args, "\n", "    Code location: ", file, ":", line)
+        end
+        if mi !== nothing
+            push!(error_args, "\n", "  Method instance: ", mi)
+        end
+        invokelatest(error, error_args...)
+    end
+    # Verify CFG graph. Must be well formed to construct domtree
+    if !(length(ir.cfg.blocks) - 1 <= length(ir.cfg.index) <= length(ir.cfg.blocks))
+        @verify_error "CFG index length ($(length(ir.cfg.index))) does not correspond to # of blocks $(length(ir.cfg.blocks))"
+        raise_error()
+    end
+    if length(ir.stmts.stmt) != length(ir.stmts)
+        @verify_error "IR stmt length is invalid $(length(ir.stmts.stmt)) / $(length(ir.stmts))"
+        raise_error()
+    end
+    if length(ir.stmts.type) != length(ir.stmts)
+        @verify_error "IR type length is invalid $(length(ir.stmts.type)) / $(length(ir.stmts))"
+        raise_error()
+    end
+    if length(ir.stmts.info) != length(ir.stmts)
+        @verify_error "IR info length is invalid $(length(ir.stmts.info)) / $(length(ir.stmts))"
+        raise_error()
+    end
+    if length(ir.stmts.line) != length(ir.stmts) * 3
+        @verify_error "IR line length is invalid $(length(ir.stmts.line)) / $(length(ir.stmts) * 3)"
+        raise_error()
+    end
+    if length(ir.stmts.flag) != length(ir.stmts)
+        @verify_error "IR flag length is invalid $(length(ir.stmts.flag)) / $(length(ir.stmts))"
+        raise_error()
+    end
     # For now require compact IR
     # @assert isempty(ir.new_nodes)
     # Verify CFG
@@ -99,32 +163,44 @@ function verify_ir(ir::IRCode, print::Bool=true,
             p == 0 && continue
             if !(1 <= p <= length(ir.cfg.blocks))
                 @verify_error "Predecessor $p of block $idx out of bounds for IR"
-                error("")
+                raise_error()
             end
             c = count_int(idx, ir.cfg.blocks[p].succs)
             if c == 0
                 @verify_error "Predecessor $p of block $idx not in successor list"
-                error("")
+                raise_error()
             elseif c == 2
                 if count_int(p, block.preds) != 2
                     @verify_error "Double edge from $p to $idx not correctly accounted"
-                    error("")
+                    raise_error()
                 end
             end
         end
         for s in block.succs
             if !(1 <= s <= length(ir.cfg.blocks))
                 @verify_error "Successor $s of block $idx out of bounds for IR"
-                error("")
+                raise_error()
             end
             if !(idx in ir.cfg.blocks[s].preds)
                 #Base.@show ir.cfg
                 #Base.@show ir
                 #Base.@show ir.argtypes
                 @verify_error "Successor $s of block $idx not in predecessor list"
-                error("")
+                raise_error()
             end
         end
+        if !(1 <= first(block.stmts) <= length(ir.stmts))
+            @verify_error "First statement of BB $idx ($(first(block.stmts))) out of bounds for IR (length=$(length(ir.stmts)))"
+            raise_error()
+        end
+        if !(1 <= last(block.stmts) <= length(ir.stmts))
+            @verify_error "Last statement of BB $idx ($(last(block.stmts))) out of bounds for IR (length=$(length(ir.stmts)))"
+            raise_error()
+        end
+        if idx <= length(ir.cfg.index) && last(block.stmts) + 1 != ir.cfg.index[idx]
+            @verify_error "End of BB $idx ($(last(block.stmts))) is not one less than CFG index ($(ir.cfg.index[idx]))"
+            raise_error()
+        end
     end
     # Verify statements
     domtree = construct_domtree(ir.cfg.blocks)
@@ -132,44 +208,49 @@ function verify_ir(ir::IRCode, print::Bool=true,
         if first(block.stmts) != last_end + 1
             #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)]
             @verify_error "First statement of BB $idx ($(first(block.stmts))) does not match end of previous ($last_end)"
-            error("")
+            raise_error()
         end
         last_end = last(block.stmts)
-        terminator = ir.stmts[last_end][:inst]
+        terminator = ir[SSAValue(last_end)][:stmt]
 
         bb_unreachable(domtree, idx) && continue
         if isa(terminator, ReturnNode)
             if !isempty(block.succs)
                 @verify_error "Block $idx ends in return or unreachable, but has successors"
-                error("")
+                raise_error()
             end
         elseif isa(terminator, GotoNode)
             if length(block.succs) != 1 || block.succs[1] != terminator.label
-                @verify_error "Block $idx successors ($(block.succs)), does not match GotoNode terminator"
-                error("")
+                @verify_error "Block $idx successors ($(block.succs)), does not match GotoNode terminator ($(terminator.label))"
+                raise_error()
             end
         elseif isa(terminator, GotoIfNot)
             if terminator.dest == idx + 1
                 @verify_error "Block $idx terminator forms a double edge to block $(idx+1)"
-                error("")
+                raise_error()
             end
             if length(block.succs) != 2 || (block.succs != [terminator.dest, idx+1] && block.succs != [idx+1, terminator.dest])
                 @verify_error "Block $idx successors ($(block.succs)), does not match GotoIfNot terminator"
-                error("")
+                raise_error()
             end
-        elseif isexpr(terminator, :enter)
+        elseif isa(terminator, EnterNode)
             @label enter_check
-            if length(block.succs) != 2 || (block.succs != Int[terminator.args[1], idx+1] && block.succs != Int[idx+1, terminator.args[1]])
+            if length(block.succs) == 1
+                if terminator.catch_dest != 0
+                    @verify_error "Block $idx successors ($(block.succs)), does not match :enter terminator"
+                    raise_error()
+                end
+            elseif (block.succs != Int[terminator.catch_dest, idx+1] && block.succs != Int[idx+1, terminator.catch_dest])
                 @verify_error "Block $idx successors ($(block.succs)), does not match :enter terminator"
-                error("")
+                raise_error()
             end
         else
             if length(block.succs) != 1 || block.succs[1] != idx + 1
                 # As a special case, we allow extra statements in the BB of an :enter
                 # statement, until we can do proper CFG manipulations during compaction.
-                for idx in first(block.stmts):last(block.stmts)
-                    stmt = ir.stmts[idx][:inst]
-                    if isexpr(stmt, :enter)
+                for stmt_idx in first(block.stmts):last(block.stmts)
+                    stmt = ir[SSAValue(stmt_idx)][:stmt]
+                    if isa(stmt, EnterNode)
                         terminator = stmt
                         @goto enter_check
                     end
@@ -183,11 +264,15 @@ function verify_ir(ir::IRCode, print::Bool=true,
                     # here, but that isn't always possible.
                 else
                     @verify_error "Block $idx successors ($(block.succs)), does not match fall-through terminator %$termidx ($terminator)::$stmttyp"
-                    error("")
+                    raise_error()
                 end
             end
         end
     end
+    if length(ir.stmts) != last(ir.cfg.blocks[end].stmts)
+        @verify_error "End of last BB $(last(ir.cfg.blocks[end].stmts)) does not match last IR statement $(length(ir.stmts))"
+        raise_error()
+    end
     lastbb = 0
     is_phinode_block = false
     firstidx = 1
@@ -201,12 +286,12 @@ function verify_ir(ir::IRCode, print::Bool=true,
         # We allow invalid IR in dead code to avoid passes having to detect when
         # they're generating dead code.
         bb_unreachable(domtree, bb) && continue
-        stmt = ir.stmts[idx][:inst]
+        stmt = ir[SSAValue(idx)][:stmt]
         stmt === nothing && continue
         if isa(stmt, PhiNode)
             if !is_phinode_block
                 @verify_error "φ node $idx is not at the beginning of the basic block $bb"
-                error("")
+                raise_error()
             end
             lastphi = idx
             @assert length(stmt.edges) == length(stmt.values)
@@ -217,20 +302,20 @@ function verify_ir(ir::IRCode, print::Bool=true,
                     if edge == edge′
                         # TODO: Move `unique` to Core.Compiler. For now we assume the predecessor list is always unique.
                         @verify_error "Edge list φ node $idx in bb $bb not unique (double edge?)"
-                        error("")
+                        raise_error()
                     end
                 end
                 if !(edge == 0 && bb == 1) && !(edge in ir.cfg.blocks[bb].preds)
                     #Base.@show ir.argtypes
                     #Base.@show ir
                     @verify_error "Edge $edge of φ node $idx not in predecessor list"
-                    error("")
+                    raise_error()
                 end
                 edge == 0 && continue
                 if bb_unreachable(domtree, Int(edge))
                     # TODO: Disallow?
                     #@verify_error "Unreachable edge from #$edge should have been cleaned up at idx $idx"
-                    #error("")
+                    #raise_error()
                     continue
                 end
                 isassigned(stmt.values, i) || continue
@@ -243,21 +328,23 @@ function verify_ir(ir::IRCode, print::Bool=true,
                         #    PhiNode type was $phiT
                         #    Value type was $(ir.stmts[val.id][:type])
                         #"""
-                        #error("")
+                        #raise_error()
                     end
                 end
-                check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, idx, print, false, i, allow_frontend_forms)
+                check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, idx, print, false, i,
+                    allow_frontend_forms, raise_error)
             end
             continue
         end
 
-        if is_phinode_block && isa(stmt, Union{Expr, UpsilonNode, PhiCNode, SSAValue})
+        if is_phinode_block && !is_valid_phiblock_stmt(stmt)
             if !isa(stmt, Expr) || !is_value_pos_expr_head(stmt.head)
                 # Go back and check that all non-PhiNodes are valid value-position
                 for validate_idx in firstidx:(lastphi-1)
-                    validate_stmt = ir.stmts[validate_idx][:inst]
+                    validate_stmt = ir[SSAValue(validate_idx)][:stmt]
                     isa(validate_stmt, PhiNode) && continue
-                    check_op(ir, domtree, validate_stmt, bb, idx, idx, print, false, 0, allow_frontend_forms)
+                    check_op(ir, domtree, validate_stmt, bb, idx, idx, print, false, 0,
+                        allow_frontend_forms, raise_error)
                 end
                 is_phinode_block = false
             end
@@ -267,40 +354,44 @@ function verify_ir(ir::IRCode, print::Bool=true,
                 val = stmt.values[i]
                 if !isa(val, SSAValue)
                     @verify_error "Operand $i of PhiC node $idx must be an SSA Value."
-                    error("")
+                    raise_error()
                 end
-                if !isa(ir[val][:inst], UpsilonNode)
+                if !isa(ir[val][:stmt], UpsilonNode)
                     @verify_error "Operand $i of PhiC node $idx must reference an Upsilon node."
-                    error("")
+                    raise_error()
                 end
             end
-        elseif (isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isexpr(stmt, :enter)) && idx != last(ir.cfg.blocks[bb].stmts)
-            @verify_error "Terminator $idx in bb $bb is not the last statement in the block"
-            error("")
-        else
-            if isa(stmt, Expr) || isa(stmt, ReturnNode) # TODO: make sure everything has line info
-                if (stmt isa ReturnNode)
-                    if isdefined(stmt, :val)
-                        # TODO: Disallow unreachable returns?
-                        # bb_unreachable(domtree, Int64(edge))
-                    else
-                        #@verify_error "Missing line number information for statement $idx of $ir"
-                    end
-                end
-                if !(stmt isa ReturnNode && !isdefined(stmt, :val)) # not actually a return node, but an unreachable marker
-                    if ir.stmts[idx][:line] <= 0
-                    end
-                end
+        elseif isterminator(stmt)
+            if idx != last(ir.cfg.blocks[bb].stmts)
+                @verify_error "Terminator $idx in bb $bb is not the last statement in the block"
+                raise_error()
+            end
+            if !isa(stmt, ReturnNode) && ir[SSAValue(idx)][:type] !== Any
+                @verify_error "Explicit terminators (other than ReturnNode) must have `Any` type"
+                raise_error()
             end
+        else
             isforeigncall = false
             if isa(stmt, Expr)
                 if stmt.head === :(=)
-                    if stmt.args[1] isa SSAValue
-                        @verify_error "SSAValue as assignment LHS"
-                        error("")
+                    @verify_error "Assignment should have been removed during SSA conversion"
+                    raise_error()
+                elseif stmt.head === :isdefined
+                    if length(stmt.args) > 2
+                        @verify_error "malformed isdefined"
+                        raise_error()
                     end
-                    if stmt.args[2] isa GlobalRef
-                        # undefined GlobalRef as assignment RHS is OK
+                    if stmt.args[1] isa GlobalRef
+                        # undefined GlobalRef is OK in isdefined
+                        continue
+                    end
+                elseif stmt.head === :throw_undef_if_not
+                    if length(stmt.args) > 3
+                        @verify_error "malformed throw_undef_if_not"
+                        raise_error()
+                    end
+                    if stmt.args[1] isa GlobalRef
+                        # undefined GlobalRef is OK in throw_undef_if_not
                         continue
                     end
                 elseif stmt.head === :gc_preserve_end
@@ -311,7 +402,7 @@ function verify_ir(ir::IRCode, print::Bool=true,
                 elseif stmt.head === :foreigncall
                     isforeigncall = true
                 elseif stmt.head === :isdefined && length(stmt.args) == 1 &&
-                        (stmt.args[1] isa GlobalRef || isexpr(stmt.args[1], :static_parameter))
+                        isexpr(stmt.args[1], :static_parameter)
                     # a GlobalRef or static_parameter isdefined check does not evaluate its argument
                     continue
                 elseif stmt.head === :call
@@ -320,24 +411,39 @@ function verify_ir(ir::IRCode, print::Bool=true,
                         # TODO: these are not yet linearized
                         continue
                     end
+                elseif stmt.head === :leave
+                    for i in 1:length(stmt.args)
+                        arg = stmt.args[i]
+                        if !isa(arg, Union{Nothing, SSAValue})
+                            @verify_error "Malformed :leave - Expected `Nothing` or SSAValue"
+                            raise_error()
+                        elseif isa(arg, SSAValue)
+                            enter_stmt = ir[arg::SSAValue][:stmt]
+                            if !isa(enter_stmt, Nothing) && !isa(enter_stmt, EnterNode)
+                                @verify_error "Malformed :leave - argument ssavalue should point to `nothing` or :enter"
+                                raise_error()
+                            end
+                        end
+                    end
                 end
             end
             n = 1
             for op in userefs(stmt)
                 op = op[]
-                check_op(ir, domtree, op, bb, idx, idx, print, isforeigncall, n, allow_frontend_forms)
+                check_op(ir, domtree, op, bb, idx, idx, print, isforeigncall, n,
+                    allow_frontend_forms, raise_error)
                 n += 1
             end
         end
     end
 end
 
-function verify_linetable(linetable::Vector{LineInfoNode}, print::Bool=true)
-    for i in 1:length(linetable)
-        line = linetable[i]
-        if i <= line.inlined_at
-            @verify_error "Misordered linetable"
-            error("")
+function verify_linetable(di::DebugInfoStream, nstmts::Int, print::Bool=true)
+    @assert 3nstmts == length(di.codelocs)
+    for i in 1:nstmts
+        edge = di.codelocs[3i-1]
+        if !(edge == 0 || get(di.edges, edge, nothing) isa DebugInfo)
+            @verify_error "Malformed debuginfo index into edges"
         end
     end
 end
diff --git a/Compiler/src/stmtinfo.jl b/Compiler/src/stmtinfo.jl
new file mode 100644
index 0000000000000..5a2188060e6e5
--- /dev/null
+++ b/Compiler/src/stmtinfo.jl
@@ -0,0 +1,487 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+@nospecialize
+
+"""
+    call::CallMeta
+
+A simple struct that captures both the return type (`call.rt`)
+and any additional information (`call.info`) for a given generic call.
+"""
+struct CallMeta
+    rt::Any
+    exct::Any
+    effects::Effects
+    info::CallInfo
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    function CallMeta(rt::Any, exct::Any, effects::Effects, info::CallInfo,
+                      refinements=nothing)
+        @nospecialize rt exct info
+        return new(rt, exct, effects, info, refinements)
+    end
+end
+
+struct NoCallInfo <: CallInfo end
+add_edges_impl(::Vector{Any}, ::NoCallInfo) = nothing
+
+# InferredCallResult is defined in types.jl so that InferenceResult can inherit from it
+
+struct ConcreteResult <: InferredCallResult
+    edge::CodeInstance
+    effects::Effects
+    result
+    ConcreteResult(edge::CodeInstance, effects::Effects) = new(edge, effects)
+    ConcreteResult(edge::CodeInstance, effects::Effects, @nospecialize val) = new(edge, effects, val)
+end
+
+struct SemiConcreteResult <: InferredCallResult
+    edge::CodeInstance
+    ir::IRCode
+    effects::Effects
+    spec_info::SpecInfo
+end
+
+"""
+    info::MethodMatchInfo <: CallInfo
+
+Captures the essential arguments and result of a `:jl_matching_methods` lookup
+for the given call (`info.results`). This info may then be used by the
+optimizer, without having to re-consult the method table.
+This info is illegal on any statement that is not a call to a generic function.
+"""
+struct MethodMatchInfo <: CallInfo
+    results::MethodLookupResult
+    mt::MethodTable
+    atype
+    fullmatch::Bool
+    edges::Vector{Union{Nothing,CodeInstance}}
+    call_results::Vector{Union{Nothing,InferredCallResult}}
+    function MethodMatchInfo(
+        results::MethodLookupResult, mt::MethodTable, @nospecialize(atype), fullmatch::Bool)
+        edges = fill!(Vector{Union{Nothing,CodeInstance}}(undef, length(results)), nothing)
+        call_results = fill!(Vector{Union{Nothing,InferredCallResult}}(undef, length(results)), nothing)
+        return new(results, mt, atype, fullmatch, edges, call_results)
+    end
+end
+add_edges_impl(edges::Vector{Any}, info::MethodMatchInfo) = _add_edges_impl(edges, info)
+function _add_edges_impl(edges::Vector{Any}, info::MethodMatchInfo, mi_edge::Bool=false)
+    if !fully_covering(info)
+        exists = false
+        for i in 2:length(edges)
+            if edges[i] === Core.methodtable && edges[i-1] == info.atype
+                exists = true
+                break
+            end
+        end
+        if !exists
+            push!(edges, info.atype)
+            push!(edges, Core.methodtable)
+        end
+    end
+    nmatches = length(info.results)
+    if nmatches == length(info.edges) == 1 && fully_covering(info)
+        # try the optimized format for the representation, if possible and applicable
+        # if this doesn't succeed, the backedge will be less precise,
+        # but the forward edge will maintain the precision
+        edge = info.edges[1]
+        m = info.results[1]
+        if edge === nothing
+            mi = specialize_method(m) # don't allow `Method`-edge for this optimized format
+            edge = mi
+        else
+            mi = edge.def::MethodInstance
+        end
+        if mi.specTypes === m.spec_types
+            add_one_edge!(edges, edge)
+            return nothing
+        end
+    end
+    # add check for whether this lookup already existed in the edges list
+    # encode nmatches as negative if fully_covers is false
+    encoded_nmatches = fully_covering(info) ? nmatches : -nmatches
+    for i in 1:length(edges)
+        if edges[i] === encoded_nmatches && edges[i+1] == info.atype
+            # TODO: must also verify the CodeInstance match too
+            return nothing
+        end
+    end
+    push!(edges, encoded_nmatches, info.atype)
+    for i = 1:nmatches
+        edge = info.edges[i]
+        m = info.results[i]
+        if edge === nothing
+            edge = mi_edge ? specialize_method(m) : m.method
+        else
+            @assert edge.def.def === m.method
+        end
+        push!(edges, edge)
+    end
+    nothing
+end
+function add_one_edge!(edges::Vector{Any}, edge::MethodInstance)
+    i = 1
+    while i <= length(edges)
+        edgeᵢ = edges[i]
+        edgeᵢ isa Int && (i += 2 + abs(edgeᵢ); continue)
+        edgeᵢ isa CodeInstance && (edgeᵢ = get_ci_mi(edgeᵢ))
+        edgeᵢ isa MethodInstance || (i += 1; continue)
+        if edgeᵢ === edge && !(i > 1 && edges[i-1] isa Type)
+            return # found existing covered edge
+        end
+        i += 1
+    end
+    push!(edges, edge)
+    nothing
+end
+function add_one_edge!(edges::Vector{Any}, edge::CodeInstance)
+    i = 1
+    while i <= length(edges)
+        edgeᵢ_orig = edgeᵢ = edges[i]
+        edgeᵢ isa Int && (i += 2 + abs(edgeᵢ); continue)
+        edgeᵢ isa CodeInstance && (edgeᵢ = get_ci_mi(edgeᵢ))
+        edgeᵢ isa MethodInstance || (i += 1; continue)
+        if edgeᵢ === edge.def && !(i > 1 && edges[i-1] isa Type)
+            if edgeᵢ_orig isa MethodInstance
+                # found edge we can upgrade
+                edges[i] = edge
+                return
+            elseif true # XXX compare `CodeInstance` identify?
+                return
+            end
+        end
+        i += 1
+    end
+    push!(edges, edge)
+    nothing
+end
+nsplit_impl(::MethodMatchInfo) = 1
+getsplit_impl(info::MethodMatchInfo, idx::Int) = (@assert idx == 1; info.results)
+getresult_impl(info::MethodMatchInfo, idx::Int) = info.call_results[idx]
+
+"""
+    info::UnionSplitInfo <: CallInfo
+
+If inference decides to partition the method search space by splitting unions,
+it will issue a method lookup query for each such partition. This info indicates
+that such partitioning happened and wraps the corresponding `MethodMatchInfo` for
+each partition (`info.matches::Vector{MethodMatchInfo}`).
+This info is illegal on any statement that is not a call to a generic function.
+"""
+struct UnionSplitInfo <: CallInfo
+    split::Vector{MethodMatchInfo}
+end
+add_edges_impl(edges::Vector{Any}, info::UnionSplitInfo) =
+    _add_edges_impl(edges, info)
+_add_edges_impl(edges::Vector{Any}, info::UnionSplitInfo, mi_edge::Bool=false) =
+    for split in info.split; _add_edges_impl(edges, split, mi_edge); end
+nsplit_impl(info::UnionSplitInfo) = length(info.split)
+getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit(info.split[idx], 1)
+function getresult_impl(info::UnionSplitInfo, idx::Int)
+    for split in info.split
+        n = length(split.call_results)
+        if idx ≤ n
+            return split.call_results[idx]
+        else
+            idx -= n
+        end
+    end
+end
+
+"""
+    info::MethodResultPure <: CallInfo
+
+This struct represents a method result constant was proven to be effect-free.
+"""
+struct MethodResultPure <: CallInfo
+    info::CallInfo
+end
+let instance = MethodResultPure(NoCallInfo())
+    global MethodResultPure
+    MethodResultPure() = instance
+end
+add_edges_impl(edges::Vector{Any}, info::MethodResultPure) = add_edges!(edges, info.info)
+
+"""
+    ainfo::AbstractIterationInfo
+
+Captures all the information for abstract iteration analysis of a single value.
+Each (abstract) call to `iterate`, corresponds to one entry in `ainfo.each::Vector{CallMeta}`.
+"""
+struct AbstractIterationInfo
+    each::Vector{CallMeta}
+    complete::Bool
+end
+
+const MaybeAbstractIterationInfo = Union{Nothing, AbstractIterationInfo}
+
+"""
+    info::ApplyCallInfo <: CallInfo
+
+This info applies to any call of `_apply_iterate(...)` and captures both the
+info of the actual call being applied and the info for any implicit call
+to the `iterate` function. Note that it is possible for the call itself
+to be yet another `_apply_iterate`, in which case the `info.call` field will
+be another `ApplyCallInfo`. This info is illegal on any statement that is
+not an `_apply_iterate` call.
+"""
+struct ApplyCallInfo <: CallInfo
+    # The info for the call itself
+    call::CallInfo
+    # AbstractIterationInfo for each argument, if applicable
+    arginfo::Vector{MaybeAbstractIterationInfo}
+end
+function add_edges_impl(edges::Vector{Any}, info::ApplyCallInfo)
+    add_edges!(edges, info.call)
+    for arg in info.arginfo
+        arg === nothing && continue
+        for edge in arg.each
+            add_edges!(edges, edge.info)
+        end
+    end
+end
+
+"""
+    info::UnionSplitApplyCallInfo <: CallInfo
+
+Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than `MethodMatchInfo`.
+This info is illegal on any statement that is not an `_apply_iterate` call.
+"""
+struct UnionSplitApplyCallInfo <: CallInfo
+    infos::Vector{ApplyCallInfo}
+end
+add_edges_impl(edges::Vector{Any}, info::UnionSplitApplyCallInfo) =
+    for split in info.infos; add_edges!(edges, split); end
+
+"""
+    info::InvokeCICallInfo
+
+Represents a resolved call to `Core.invoke` targeting a `Core.CodeInstance`
+"""
+struct InvokeCICallInfo <: CallInfo
+    edge::CodeInstance
+end
+add_edges_impl(edges::Vector{Any}, info::InvokeCICallInfo) =
+    add_inlining_edge!(edges, info.edge)
+nsplit_impl(::InvokeCICallInfo) = 0
+
+"""
+    info::InvokeCallInfo
+
+Represents a resolved call to `Core.invoke`, carrying the `info.match::MethodMatch` of
+the method that has been processed.
+Optionally keeps `info.result::InferenceResult` that keeps constant information.
+"""
+struct InvokeCallInfo <: CallInfo
+    edge::Union{Nothing,CodeInstance}
+    match::MethodMatch
+    result::Union{Nothing,InferredCallResult}
+    atype # ::Type
+end
+add_edges_impl(edges::Vector{Any}, info::InvokeCallInfo) =
+    _add_edges_impl(edges, info)
+function _add_edges_impl(edges::Vector{Any}, info::InvokeCallInfo, mi_edge::Bool=false)
+    edge = info.edge
+    if edge === nothing
+        edge = mi_edge ? specialize_method(info.match) : info.match.method
+    end
+    add_invoke_edge!(edges, info.atype, edge)
+    nothing
+end
+function add_invoke_edge!(edges::Vector{Any}, @nospecialize(atype), edge::Union{MethodInstance,Method})
+    for i in 2:length(edges)
+        edgeᵢ = edges[i]
+        edgeᵢ isa CodeInstance && (edgeᵢ = edgeᵢ.def)
+        edgeᵢ isa MethodInstance || edgeᵢ isa Method || continue
+        if edgeᵢ === edge
+            edge_minus_1 = edges[i-1]
+            if edge_minus_1 isa Type && edge_minus_1 == atype
+                return # found existing covered edge
+            end
+        end
+    end
+    push!(edges, atype)
+    push!(edges, edge)
+    nothing
+end
+function add_invoke_edge!(edges::Vector{Any}, @nospecialize(atype), edge::CodeInstance)
+    for i in 2:length(edges)
+        edgeᵢ_orig = edgeᵢ = edges[i]
+        edgeᵢ isa CodeInstance && (edgeᵢ = edgeᵢ.def)
+        if ((edgeᵢ isa MethodInstance && edgeᵢ === edge.def) ||
+            (edgeᵢ isa Method && edgeᵢ === edge.def.def))
+            edge_minus_1 = edges[i-1]
+            if edge_minus_1 isa Type && edge_minus_1 == atype
+                if edgeᵢ_orig isa MethodInstance || edgeᵢ_orig isa Method
+                    # found edge we can upgrade
+                    edges[i] = edge
+                    return
+                elseif true # XXX compare `CodeInstance` identify?
+                    return
+                end
+            end
+        end
+    end
+    push!(edges, atype)
+    push!(edges, edge)
+    nothing
+end
+
+function add_inlining_edge!(edges::Vector{Any}, edge::MethodInstance)
+    # check if we already have an edge to this code
+    i = 1
+    while i <= length(edges)
+        edgeᵢ = edges[i]
+        if edgeᵢ isa Method && edgeᵢ === edge.def
+            # found edge we can upgrade
+            edges[i] = edge
+            return
+        end
+        edgeᵢ isa CodeInstance && (edgeᵢ = edgeᵢ.def)
+        if edgeᵢ isa MethodInstance && edgeᵢ === edge
+            return # found existing covered edge
+        end
+        i += 1
+    end
+    # add_invoke_edge alone
+    push!(edges, (edge.def::Method).sig)
+    push!(edges, edge)
+    nothing
+end
+function add_inlining_edge!(edges::Vector{Any}, edge::CodeInstance)
+    # check if we already have an edge to this code
+    i = 1
+    while i <= length(edges)
+        edgeᵢ = edges[i]
+        if edgeᵢ isa Method && edgeᵢ === edge.def.def
+            # found edge we can upgrade
+            edges[i] = edge
+            return
+        end
+        if edgeᵢ isa MethodInstance && edgeᵢ === edge.def
+            # found edge we can upgrade
+            edges[i] = edge
+            return
+        end
+        if edgeᵢ isa CodeInstance && edgeᵢ.def === edge.def
+            # found existing edge
+            # XXX compare `CodeInstance` identify?
+            return
+        end
+        i += 1
+    end
+    # add_invoke_edge alone
+    push!(edges, (get_ci_mi(edge).def::Method).sig)
+    push!(edges, edge)
+    nothing
+end
+
+nsplit_impl(::InvokeCallInfo) = 1
+getsplit_impl(info::InvokeCallInfo, idx::Int) = (@assert idx == 1; MethodLookupResult(Core.MethodMatch[info.match],
+    WorldRange(typemin(UInt), typemax(UInt)), false))
+getresult_impl(info::InvokeCallInfo, idx::Int) = (@assert idx == 1; info.result)
+
+"""
+    info::OpaqueClosureCallInfo
+
+Represents a resolved call of opaque closure, carrying the `info.match::MethodMatch` of
+the method that has been processed.
+Optionally keeps `info.result::InferenceResult` that keeps constant information.
+"""
+struct OpaqueClosureCallInfo <: CallInfo
+    edge::Union{Nothing,CodeInstance}
+    match::MethodMatch
+    result::Union{Nothing,InferredCallResult}
+end
+function add_edges_impl(edges::Vector{Any}, info::OpaqueClosureCallInfo)
+    edge = info.edge
+    if edge !== nothing
+        add_one_edge!(edges, edge)
+    end
+    nothing
+end
+
+"""
+    info::OpaqueClosureCreateInfo <: CallInfo
+
+This info may be constructed upon opaque closure construction, with `info.unspec::CallMeta`
+carrying out inference result of an unreal, partially specialized call (i.e. specialized on
+the closure environment, but not on the argument types of the opaque closure) in order to
+allow the optimizer to rewrite the return type parameter of the `OpaqueClosure` based on it.
+"""
+struct OpaqueClosureCreateInfo <: CallInfo
+    unspec::CallMeta
+    function OpaqueClosureCreateInfo(unspec::CallMeta)
+        @assert isa(unspec.info, Union{OpaqueClosureCallInfo, NoCallInfo})
+        return new(unspec)
+    end
+end
+# merely creating the object implies edges for OC, unlike normal objects,
+# since calling them doesn't normally have edges in contrast
+add_edges_impl(edges::Vector{Any}, info::OpaqueClosureCreateInfo) = add_edges!(edges, info.unspec.info)
+
+# Stmt infos that are used by external consumers, but not by optimization.
+# These are not produced by default and must be explicitly opted into by
+# the AbstractInterpreter.
+
+"""
+    info::ReturnTypeCallInfo <: CallInfo
+
+Represents a resolved call of `Core.Compiler.return_type`.
+`info.call` wraps the info corresponding to the call that `Core.Compiler.return_type` call
+was supposed to analyze.
+"""
+struct ReturnTypeCallInfo <: CallInfo
+    info::CallInfo
+end
+add_edges_impl(edges::Vector{Any}, info::ReturnTypeCallInfo) = add_edges!(edges, info.info)
+
+"""
+    info::FinalizerInfo <: CallInfo
+
+Represents the information of a potential (later) call to the finalizer on the given
+object type.
+"""
+struct FinalizerInfo <: CallInfo
+    info::CallInfo   # the callinfo for the finalizer call
+    effects::Effects # the effects for the finalizer call
+end
+# merely allocating a finalizer does not imply edges (unless it gets inlined later)
+add_edges_impl(::Vector{Any}, ::FinalizerInfo) = nothing
+
+"""
+    info::ModifyOpInfo <: CallInfo
+
+Represents a resolved call of one of:
+ - `modifyfield!(obj, name, op, x, [order])`
+ - `modifyglobal!(mod, var, op, x, order)`
+ - `memoryrefmodify!(memref, op, x, order, boundscheck)`
+ - `Intrinsics.atomic_pointermodify(ptr, op, x, order)`
+
+`info.info` wraps the call information of `op(getval(), x)`.
+"""
+struct ModifyOpInfo <: CallInfo
+    info::CallInfo # the callinfo for the `op(getval(), x)` call
+end
+add_edges_impl(edges::Vector{Any}, info::ModifyOpInfo) = add_edges!(edges, info.info)
+
+struct VirtualMethodMatchInfo <: CallInfo
+    info::Union{MethodMatchInfo,UnionSplitInfo,InvokeCallInfo}
+end
+add_edges_impl(edges::Vector{Any}, info::VirtualMethodMatchInfo) =
+    _add_edges_impl(edges, info.info, #=mi_edge=#true)
+
+"""
+    info::GlobalAccessInfo <: CallInfo
+
+Represents access to a global through runtime reflection, rather than as a manifest
+`GlobalRef` in the source code. Used for builtins (getglobal/setglobal/etc.) that
+perform such accesses.
+"""
+struct GlobalAccessInfo <: CallInfo
+    b::Core.Binding
+end
+function add_edges_impl(edges::Vector{Any}, info::GlobalAccessInfo)
+    push!(edges, info.b)
+end
+
+@specialize
diff --git a/base/compiler/tfuncs.jl b/Compiler/src/tfuncs.jl
similarity index 59%
rename from base/compiler/tfuncs.jl
rename to Compiler/src/tfuncs.jl
index 117f5288418e1..3c23974c88920 100644
--- a/base/compiler/tfuncs.jl
+++ b/Compiler/src/tfuncs.jl
@@ -42,11 +42,10 @@ macro nospecs(ex)
         push!(names, arg)
     end
     @assert isexpr(body, :block)
-    if !isempty(names)
-        lin = first(body.args)::LineNumberNode
-        nospec = Expr(:macrocall, Symbol("@nospecialize"), lin, names...)
-        insert!(body.args, 2, nospec)
-    end
+    isempty(names) && throw(ArgumentError("no arguments for @nospec"))
+    lin = first(body.args)::LineNumberNode
+    nospec = Expr(:macrocall, GlobalRef(@__MODULE__, :var"@nospecialize"), lin, names...)
+    insert!(body.args, 2, nospec)
     return esc(ex)
 end
 
@@ -89,31 +88,38 @@ function add_tfunc(@nospecialize(f::Builtin), minarg::Int, maxarg::Int, @nospeci
 end
 
 add_tfunc(throw, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0)
+add_tfunc(Core.throw_methoderror, 1, INT_INF, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0)
 
 # the inverse of typeof_tfunc
 # returns (type, isexact, isconcrete, istype)
 # if isexact is false, the actual runtime type may (will) be a subtype of t
 # if isconcrete is true, the actual runtime type is definitely concrete (unreachable if not valid as a typeof)
 # if istype is true, the actual runtime value will definitely be a type (e.g. this is false for Union{Type{Int}, Int})
-function instanceof_tfunc(@nospecialize(t))
+function instanceof_tfunc(@nospecialize(t), astag::Bool=false, @nospecialize(troot) = t)
     if isa(t, Const)
-        if isa(t.val, Type) && valid_as_lattice(t.val)
+        if isa(t.val, Type) && valid_as_lattice(t.val, astag)
             return t.val, true, isconcretetype(t.val), true
         end
         return Bottom, true, false, false # runtime throws on non-Type
     end
     t = widenconst(t)
+    troot = widenconst(troot)
     if t === Bottom
         return Bottom, true, true, false # runtime unreachable
     elseif t === typeof(Bottom) || !hasintersect(t, Type)
         return Bottom, true, false, false # literal Bottom or non-Type
     elseif isType(t)
         tp = t.parameters[1]
-        valid_as_lattice(tp) || return Bottom, true, false, false # runtime unreachable / throws on non-Type
+        valid_as_lattice(tp, astag) || return Bottom, true, false, false # runtime unreachable / throws on non-Type
+        if troot isa UnionAll
+            # Free `TypeVar`s inside `Type` has violated the "diagonal" rule.
+            # Widen them before `UnionAll` rewraping to relax concrete constraint.
+            tp = widen_diagonal(tp, troot)
+        end
         return tp, !has_free_typevars(tp), isconcretetype(tp), true
     elseif isa(t, UnionAll)
         t′ = unwrap_unionall(t)
-        t′′, isexact, isconcrete, istype = instanceof_tfunc(t′)
+        t′′, isexact, isconcrete, istype = instanceof_tfunc(t′, astag, rewrap_unionall(t, troot))
         tr = rewrap_unionall(t′′, t)
         if t′′ isa DataType && t′′.name !== Tuple.name && !has_free_typevars(tr)
             # a real instance must be within the declared bounds of the type,
@@ -128,8 +134,8 @@ function instanceof_tfunc(@nospecialize(t))
         end
         return tr, isexact, isconcrete, istype
     elseif isa(t, Union)
-        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(t.a)
-        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(t.b)
+        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(unwraptv(t.a), astag, troot)
+        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(unwraptv(t.b), astag, troot)
         isconcrete = isconcrete_a && isconcrete_b
         istype = istype_a && istype_b
         # most users already handle the Union case, so here we assume that
@@ -149,14 +155,14 @@ end
 # ----------
 
 @nospecs bitcast_tfunc(𝕃::AbstractLattice, t, x) = bitcast_tfunc(widenlattice(𝕃), t, x)
-@nospecs bitcast_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t)[1]
+@nospecs bitcast_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t, true)[1]
 @nospecs conversion_tfunc(𝕃::AbstractLattice, t, x) = conversion_tfunc(widenlattice(𝕃), t, x)
-@nospecs conversion_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t)[1]
+@nospecs conversion_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t, true)[1]
 
-add_tfunc(bitcast, 2, 2, bitcast_tfunc, 1)
-add_tfunc(sext_int, 2, 2, conversion_tfunc, 1)
-add_tfunc(zext_int, 2, 2, conversion_tfunc, 1)
-add_tfunc(trunc_int, 2, 2, conversion_tfunc, 1)
+add_tfunc(bitcast, 2, 2, bitcast_tfunc, 0)
+add_tfunc(sext_int, 2, 2, conversion_tfunc, 0)
+add_tfunc(zext_int, 2, 2, conversion_tfunc, 0)
+add_tfunc(trunc_int, 2, 2, conversion_tfunc, 0)
 add_tfunc(fptoui, 2, 2, conversion_tfunc, 1)
 add_tfunc(fptosi, 2, 2, conversion_tfunc, 1)
 add_tfunc(uitofp, 2, 2, conversion_tfunc, 1)
@@ -170,41 +176,74 @@ add_tfunc(fpext, 2, 2, conversion_tfunc, 1)
 @nospecs math_tfunc(𝕃::AbstractLattice, args...) = math_tfunc(widenlattice(𝕃), args...)
 @nospecs math_tfunc(::JLTypeLattice, x, xs...) = widenconst(x)
 
-add_tfunc(neg_int, 1, 1, math_tfunc, 1)
+add_tfunc(neg_int, 1, 1, math_tfunc, 0)
 add_tfunc(add_int, 2, 2, math_tfunc, 1)
 add_tfunc(sub_int, 2, 2, math_tfunc, 1)
-add_tfunc(mul_int, 2, 2, math_tfunc, 4)
-add_tfunc(sdiv_int, 2, 2, math_tfunc, 30)
-add_tfunc(udiv_int, 2, 2, math_tfunc, 30)
-add_tfunc(srem_int, 2, 2, math_tfunc, 30)
-add_tfunc(urem_int, 2, 2, math_tfunc, 30)
-add_tfunc(add_ptr, 2, 2, math_tfunc, 1)
-add_tfunc(sub_ptr, 2, 2, math_tfunc, 1)
+add_tfunc(mul_int, 2, 2, math_tfunc, 3)
+add_tfunc(sdiv_int, 2, 2, math_tfunc, 20)
+add_tfunc(udiv_int, 2, 2, math_tfunc, 20)
+add_tfunc(srem_int, 2, 2, math_tfunc, 20)
+add_tfunc(urem_int, 2, 2, math_tfunc, 20)
 add_tfunc(neg_float, 1, 1, math_tfunc, 1)
-add_tfunc(add_float, 2, 2, math_tfunc, 1)
-add_tfunc(sub_float, 2, 2, math_tfunc, 1)
-add_tfunc(mul_float, 2, 2, math_tfunc, 4)
-add_tfunc(div_float, 2, 2, math_tfunc, 4)
-add_tfunc(fma_float, 3, 3, math_tfunc, 5)
-add_tfunc(muladd_float, 3, 3, math_tfunc, 5)
+add_tfunc(add_float, 2, 2, math_tfunc, 2)
+add_tfunc(sub_float, 2, 2, math_tfunc, 2)
+add_tfunc(mul_float, 2, 2, math_tfunc, 8)
+add_tfunc(div_float, 2, 2, math_tfunc, 10)
+add_tfunc(min_float, 2, 2, math_tfunc, 1)
+add_tfunc(max_float, 2, 2, math_tfunc, 1)
+add_tfunc(fma_float, 3, 3, math_tfunc, 8)
+add_tfunc(muladd_float, 3, 3, math_tfunc, 8)
 
 # fast arithmetic
 add_tfunc(neg_float_fast, 1, 1, math_tfunc, 1)
-add_tfunc(add_float_fast, 2, 2, math_tfunc, 1)
-add_tfunc(sub_float_fast, 2, 2, math_tfunc, 1)
-add_tfunc(mul_float_fast, 2, 2, math_tfunc, 2)
-add_tfunc(div_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(add_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(sub_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(mul_float_fast, 2, 2, math_tfunc, 8)
+add_tfunc(div_float_fast, 2, 2, math_tfunc, 10)
+add_tfunc(min_float_fast, 2, 2, math_tfunc, 1)
+add_tfunc(max_float_fast, 2, 2, math_tfunc, 1)
 
 # bitwise operators
 # -----------------
 
+@nospecs and_int_tfunc(𝕃::AbstractLattice, x, y) = and_int_tfunc(widenlattice(𝕃), x, y)
+@nospecs function and_int_tfunc(𝕃::ConstsLattice, x, y)
+    if isa(x, Const) && x.val === false && widenconst(y) === Bool
+        return Const(false)
+    elseif isa(y, Const) && y.val === false && widenconst(x) === Bool
+        return Const(false)
+    end
+    return and_int_tfunc(widenlattice(𝕃), x, y)
+end
+@nospecs and_int_tfunc(::JLTypeLattice, x, y) = widenconst(x)
+
+@nospecs or_int_tfunc(𝕃::AbstractLattice, x, y) = or_int_tfunc(widenlattice(𝕃), x, y)
+@nospecs function or_int_tfunc(𝕃::ConstsLattice, x, y)
+    if isa(x, Const) && x.val === true && widenconst(y) === Bool
+        return Const(true)
+    elseif isa(y, Const) && y.val === true && widenconst(x) === Bool
+        return Const(true)
+    end
+    return or_int_tfunc(widenlattice(𝕃), x, y)
+end
+@nospecs or_int_tfunc(::JLTypeLattice, x, y) = widenconst(x)
+
 @nospecs shift_tfunc(𝕃::AbstractLattice, x, y) = shift_tfunc(widenlattice(𝕃), x, y)
 @nospecs shift_tfunc(::JLTypeLattice, x, y) = widenconst(x)
 
-add_tfunc(and_int, 2, 2, math_tfunc, 1)
-add_tfunc(or_int, 2, 2, math_tfunc, 1)
+function not_tfunc(𝕃::AbstractLattice, @nospecialize(b))
+    if isa(b, Conditional)
+        return Conditional(b.slot, b.elsetype, b.thentype)
+    elseif isa(b, Const)
+        return Const(not_int(b.val))
+    end
+    return math_tfunc(𝕃, b)
+end
+
+add_tfunc(and_int, 2, 2, and_int_tfunc, 1)
+add_tfunc(or_int, 2, 2, or_int_tfunc, 1)
 add_tfunc(xor_int, 2, 2, math_tfunc, 1)
-add_tfunc(not_int, 1, 1, math_tfunc, 0) # usually used as not_int(::Bool) to negate a condition
+add_tfunc(not_int, 1, 1, not_tfunc, 0) # usually used as not_int(::Bool) to negate a condition
 add_tfunc(shl_int, 2, 2, shift_tfunc, 1)
 add_tfunc(lshr_int, 2, 2, shift_tfunc, 1)
 add_tfunc(ashr_int, 2, 2, shift_tfunc, 1)
@@ -258,12 +297,12 @@ add_tfunc(le_float_fast, 2, 2, cmp_tfunc, 1)
 @nospecs chk_tfunc(𝕃::AbstractLattice, x, y) = chk_tfunc(widenlattice(𝕃), x, y)
 @nospecs chk_tfunc(::JLTypeLattice, x, y) = Tuple{widenconst(x), Bool}
 
-add_tfunc(checked_sadd_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_uadd_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_ssub_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_usub_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 10)
+add_tfunc(checked_sadd_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_uadd_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_ssub_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_usub_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 5)
+add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 5)
 
 # other, misc
 # -----------
@@ -281,7 +320,6 @@ end
 add_tfunc(Core.Intrinsics.cglobal, 1, 2, cglobal_tfunc, 5)
 
 add_tfunc(Core.Intrinsics.have_fma, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Bool), 1)
-add_tfunc(Core.Intrinsics.arraylen, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Int), 4)
 
 # builtin functions
 # =================
@@ -304,7 +342,7 @@ end
 add_tfunc(Core.ifelse, 3, 3, ifelse_tfunc, 1)
 
 @nospecs function ifelse_nothrow(𝕃::AbstractLattice, cond, x, y)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     return cond ⊑ Bool
 end
 
@@ -353,7 +391,7 @@ function isdefined_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any})
     return isdefined_nothrow(𝕃, argtypes[1], argtypes[2])
 end
 @nospecs function isdefined_nothrow(𝕃::AbstractLattice, x, name)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     isvarargtype(x) && return false
     isvarargtype(name) && return false
     if hasintersect(widenconst(x), Module)
@@ -367,22 +405,15 @@ end
     return isdefined_tfunc(𝕃, arg1, sym)
 end
 @nospecs function isdefined_tfunc(𝕃::AbstractLattice, arg1, sym)
-    if isa(arg1, Const)
-        arg1t = typeof(arg1.val)
-    else
-        arg1t = widenconst(arg1)
-    end
-    if isType(arg1t)
-        return Bool
+    if arg1 isa MustAlias
+        arg1 = widenmustalias(arg1)
     end
+    arg1t = arg1 isa Const ? typeof(arg1.val) : isconstType(arg1) ? typeof(arg1.parameters[1]) : widenconst(arg1)
     a1 = unwrap_unionall(arg1t)
     if isa(a1, DataType) && !isabstracttype(a1)
         if a1 === Module
             hasintersect(widenconst(sym), Symbol) || return Bottom
-            if isa(sym, Const) && isa(sym.val, Symbol) && isa(arg1, Const) &&
-               isdefined(arg1.val::Module, sym.val::Symbol)
-                return Const(true)
-            end
+            # isa(sym, Const) case intercepted in abstract interpretation
         elseif isa(sym, Const)
             val = sym.val
             if isa(val, Symbol)
@@ -392,7 +423,7 @@ end
             else
                 return Bottom
             end
-            if 1 <= idx <= datatype_min_ninitialized(a1)
+            if 1 ≤ idx ≤ datatype_min_ninitialized(a1)
                 return Const(true)
             elseif a1.name === _NAMEDTUPLE_NAME
                 if isconcretetype(a1)
@@ -400,15 +431,21 @@ end
                 else
                     ns = a1.parameters[1]
                     if isa(ns, Tuple)
-                        return Const(1 <= idx <= length(ns))
+                        return Const(1 ≤ idx ≤ length(ns))
                     end
                 end
-            elseif idx <= 0 || (!isvatuple(a1) && idx > fieldcount(a1))
+            elseif idx ≤ 0 || (!isvatuple(a1) && idx > fieldcount(a1))
                 return Const(false)
             elseif isa(arg1, Const)
-                arg1v = (arg1::Const).val
-                if !ismutable(arg1v) || isdefined(arg1v, idx) || isconst(typeof(arg1v), idx)
-                    return Const(isdefined(arg1v, idx))
+                if !ismutabletype(a1) || isconst(a1, idx)
+                    return Const(isdefined(arg1.val, idx))
+                end
+            elseif isa(arg1, PartialStruct)
+                if !isvarargtype(arg1.fields[end])
+                    aundefᵢ = _getundefs(arg1)[idx]
+                    if aundefᵢ isa Bool
+                        return Const(!aundefᵢ)
+                    end
                 end
             elseif !isvatuple(a1)
                 fieldT = fieldtype(a1, idx)
@@ -416,6 +453,10 @@ end
                     return Const(true)
                 end
             end
+        # datatype_fieldcount is what `fieldcount` uses internally
+        # and returns nothing (!==0) for non-definite field counts.
+        elseif datatype_fieldcount(a1) === 0
+            return Const(false)
         end
     elseif isa(a1, Union)
         # Results can only be `Const` or `Bool`
@@ -439,7 +480,7 @@ function sizeof_nothrow(@nospecialize(x))
         return sizeof_nothrow(rewrap_unionall(xu.a, x)) &&
                sizeof_nothrow(rewrap_unionall(xu.b, x))
     end
-    t, exact, isconcrete = instanceof_tfunc(x)
+    t, exact, isconcrete = instanceof_tfunc(x, false)
     if t === Bottom
         # x must be an instance (not a Type) or is the Bottom type object
         x = widenconst(x)
@@ -466,8 +507,8 @@ function sizeof_nothrow(@nospecialize(x))
 end
 
 function _const_sizeof(@nospecialize(x))
-    # Constant Vector does not have constant size
-    isa(x, Vector) && return Int
+    # Constant GenericMemory does not have constant size
+    isa(x, GenericMemory) && return Int
     size = try
             Core.sizeof(x)
         catch ex
@@ -491,7 +532,7 @@ end
     end
     # Core.sizeof operates on either a type or a value. First check which
     # case we're in.
-    t, exact = instanceof_tfunc(x)
+    t, exact = instanceof_tfunc(x, false)
     if t !== Bottom
         # The value corresponding to `x` at runtime could be a type.
         # Normalize the query to ask about that type.
@@ -530,16 +571,39 @@ add_tfunc(Core.sizeof, 1, 1, sizeof_tfunc, 1)
         end
     end
     if isa(x, Union)
-        na = nfields_tfunc(𝕃, x.a)
+        na = nfields_tfunc(𝕃, unwraptv(x.a))
         na === Int && return Int
-        return tmerge(na, nfields_tfunc(𝕃, x.b))
+        return tmerge(𝕃, na, nfields_tfunc(𝕃, unwraptv(x.b)))
     end
     return Int
 end
 add_tfunc(nfields, 1, 1, nfields_tfunc, 1)
 add_tfunc(Core._expr, 1, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->Expr), 100)
 add_tfunc(svec, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->SimpleVector), 20)
-@nospecs function typevar_tfunc(𝕃::AbstractLattice, n, lb_arg, ub_arg)
+
+@nospecs function _svec_len_tfunc(::AbstractLattice, s)
+    if isa(s, Const) && isa(s.val, SimpleVector)
+        return Const(length(s.val))
+    end
+    return Int
+end
+add_tfunc(Core._svec_len, 1, 1, _svec_len_tfunc, 1)
+@nospecs function _svec_len_nothrow(𝕃::AbstractLattice, s)
+    ⊑ = partialorder(𝕃)
+    return s ⊑ SimpleVector
+end
+
+@nospecs function _svec_ref_tfunc(::AbstractLattice, s, i)
+    if isa(s, Const) && isa(i, Const)
+        s, i = s.val, i.val
+        if isa(s, SimpleVector) && isa(i, Int)
+            return 1 ≤ i ≤ length(s) ? Const(s[i]) : Bottom
+        end
+    end
+    return Any
+end
+add_tfunc(Core._svec_ref, 2, 2, _svec_ref_tfunc, 1)
+@nospecs function typevar_tfunc(::AbstractLattice, n, lb_arg, ub_arg)
     lb = Union{}
     ub = Any
     ub_certain = lb_certain = true
@@ -568,12 +632,21 @@ add_tfunc(svec, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->SimpleVec
                 return TypeVar
             end
         end
-        tv = TypeVar(nval, lb, ub)
-        return PartialTypeVar(tv, lb_certain, ub_certain)
+        lb_valid = lb isa Type || lb isa TypeVar
+        ub_valid = ub isa Type || ub isa TypeVar
+        if lb_valid && ub_valid
+            tv = TypeVar(nval, lb, ub)
+            return PartialTypeVar(tv, lb_certain, ub_certain)
+        elseif !lb_valid && lb_certain
+            return Union{}
+        elseif !ub_valid && ub_certain
+            return Union{}
+        end
     end
     return TypeVar
 end
-@nospecs function typebound_nothrow(b)
+@nospecs function typebound_nothrow(𝕃::AbstractLattice, b)
+    ⊑ = partialorder(𝕃)
     b = widenconst(b)
     (b ⊑ TypeVar) && return true
     if isType(b)
@@ -582,30 +655,14 @@ end
     return false
 end
 @nospecs function typevar_nothrow(𝕃::AbstractLattice, n, lb, ub)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     n ⊑ Symbol || return false
-    typebound_nothrow(lb) || return false
-    typebound_nothrow(ub) || return false
+    typebound_nothrow(𝕃, lb) || return false
+    typebound_nothrow(𝕃, ub) || return false
     return true
 end
 add_tfunc(Core._typevar, 3, 3, typevar_tfunc, 100)
 
-@nospecs function arraysize_tfunc(𝕃::AbstractLattice, ary, dim)
-    hasintersect(widenconst(ary), Array) || return Bottom
-    hasintersect(widenconst(dim), Int) || return Bottom
-    return Int
-end
-add_tfunc(arraysize, 2, 2, arraysize_tfunc, 4)
-
-@nospecs function arraysize_nothrow(ary, dim)
-    ary ⊑ Array || return false
-    if isa(dim, Const)
-        dimval = dim.val
-        return isa(dimval, Int) && dimval > 0
-    end
-    return false
-end
-
 struct MemoryOrder x::Cint end
 const MEMORY_ORDER_UNSPECIFIED = MemoryOrder(-2)
 const MEMORY_ORDER_INVALID     = MemoryOrder(-1)
@@ -643,13 +700,16 @@ function pointer_eltype(@nospecialize(ptr))
         unw = unwrap_unionall(a)
         if isa(unw, DataType) && unw.name === Ptr.body.name
             T = unw.parameters[1]
-            valid_as_lattice(T) || return Bottom
+            valid_as_lattice(T, true) || return Bottom
             return rewrap_unionall(T, a)
         end
     end
     return Any
 end
 
+@nospecs function pointerarith_tfunc(𝕃::AbstractLattice, ptr, offset)
+    return widenconst(ptr)
+end
 @nospecs function pointerref_tfunc(𝕃::AbstractLattice, a, i, align)
     return pointer_eltype(a)
 end
@@ -675,7 +735,7 @@ end
         if isa(unw, DataType) && unw.name === Ptr.body.name
             T = unw.parameters[1]
             # note: we could sometimes refine this to a PartialStruct if we analyzed `op(T, T)::T`
-            valid_as_lattice(T) || return Bottom
+            valid_as_lattice(T, true) || return Bottom
             return rewrap_unionall(Pair{T, T}, a)
         end
     end
@@ -693,6 +753,8 @@ end
     end
     return ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
 end
+add_tfunc(add_ptr, 2, 2, pointerarith_tfunc, 1)
+add_tfunc(sub_ptr, 2, 2, pointerarith_tfunc, 1)
 add_tfunc(pointerref, 3, 3, pointerref_tfunc, 4)
 add_tfunc(pointerset, 4, 4, pointerset_tfunc, 5)
 add_tfunc(atomic_fence, 1, 1, atomic_fence_tfunc, 4)
@@ -731,8 +793,12 @@ function typeof_concrete_vararg(t::DataType)
     for i = 1:np
         p = t.parameters[i]
         if i == np && isvarargtype(p)
-            if isdefined(p, :T) && !isdefined(p, :N) && isconcretetype(p.T)
-                return Type{Tuple{t.parameters[1:np-1]..., Vararg{p.T, N}}} where N
+            if isdefined(p, :T) && isconcretetype(p.T)
+                t = Type{Tuple{t.parameters[1:np-1]..., Vararg{p.T, N}}} where N
+                if isdefined(p, :N)
+                    return t{p.N}
+                end
+                return t
             end
         elseif !isconcretetype(p)
             break
@@ -791,15 +857,15 @@ end
 add_tfunc(typeof, 1, 1, typeof_tfunc, 1)
 
 @nospecs function typeassert_tfunc(𝕃::AbstractLattice, v, t)
-    t = instanceof_tfunc(t)[1]
+    t = instanceof_tfunc(t, true)[1]
     t === Any && return v
     return tmeet(𝕃, v, t)
 end
 add_tfunc(typeassert, 2, 2, typeassert_tfunc, 4)
 
 @nospecs function typeassert_nothrow(𝕃::AbstractLattice, v, t)
-    ⊑ = Core.Compiler.:⊑(𝕃)
-    # ty, exact = instanceof_tfunc(t)
+    ⊑ = partialorder(𝕃)
+    # ty, exact = instanceof_tfunc(t, true)
     # return exact && v ⊑ ty
     if (isType(t) && !has_free_typevars(t) && v ⊑ t.parameters[1]) ||
         (isa(t, Const) && isa(t.val, Type) && v ⊑ t.val)
@@ -809,7 +875,7 @@ add_tfunc(typeassert, 2, 2, typeassert_tfunc, 4)
 end
 
 @nospecs function isa_tfunc(𝕃::AbstractLattice, v, tt)
-    t, isexact = instanceof_tfunc(tt)
+    t, isexact = instanceof_tfunc(tt, true)
     if t === Bottom
         # check if t could be equivalent to typeof(Bottom), since that's valid in `isa`, but the set of `v` is empty
         # if `t` cannot have instances, it's also invalid on the RHS of isa
@@ -844,13 +910,13 @@ end
 add_tfunc(isa, 2, 2, isa_tfunc, 1)
 
 @nospecs function isa_nothrow(𝕃::AbstractLattice, obj, typ)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     return typ ⊑ Type
 end
 
 @nospecs function subtype_tfunc(𝕃::AbstractLattice, a, b)
-    a, isexact_a = instanceof_tfunc(a)
-    b, isexact_b = instanceof_tfunc(b)
+    a, isexact_a = instanceof_tfunc(a, false)
+    b, isexact_b = instanceof_tfunc(b, false)
     if !has_free_typevars(a) && !has_free_typevars(b)
         if a <: b
             if isexact_b || a === Bottom
@@ -867,42 +933,13 @@ end
 add_tfunc(<:, 2, 2, subtype_tfunc, 10)
 
 @nospecs function subtype_nothrow(𝕃::AbstractLattice, lty, rty)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     return lty ⊑ Type && rty ⊑ Type
 end
 
-function fieldcount_noerror(@nospecialize t)
-    if t isa UnionAll || t isa Union
-        t = argument_datatype(t)
-        if t === nothing
-            return nothing
-        end
-        t = t::DataType
-    elseif t === Union{}
-        return 0
-    end
-    if !(t isa DataType)
-        return nothing
-    end
-    if t.name === _NAMEDTUPLE_NAME
-        names, types = t.parameters
-        if names isa Tuple
-            return length(names)
-        end
-        if types isa DataType && types <: Tuple
-            return fieldcount_noerror(types)
-        end
-        abstr = true
-    else
-        abstr = isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
-    end
-    if abstr
-        return nothing
-    end
-    return isdefined(t, :types) ? length(t.types) : length(t.name.names)
-end
-
-function try_compute_fieldidx(typ::DataType, @nospecialize(field))
+function try_compute_fieldidx(@nospecialize(typ), @nospecialize(field))
+    typ = argument_datatype(typ)
+    typ === nothing && return nothing
     if isa(field, Symbol)
         field = fieldindex(typ, field, false)
         field == 0 && return nothing
@@ -917,47 +954,43 @@ function try_compute_fieldidx(typ::DataType, @nospecialize(field))
     return field
 end
 
-function getfield_boundscheck((; fargs, argtypes)::ArgInfo) # Symbol
-    farg = nothing
-    if length(argtypes) == 3
+function getfield_boundscheck(argtypes::Vector{Any})
+    if length(argtypes) == 2
+        isvarargtype(argtypes[2]) && return :unsafe
         return :on
-    elseif length(argtypes) == 4
-        fargs !== nothing && (farg = fargs[4])
-        boundscheck = argtypes[4]
-        isvarargtype(boundscheck) && return :unknown
+    elseif length(argtypes) == 3
+        boundscheck = argtypes[3]
+        isvarargtype(boundscheck) && return :unsafe
         if widenconst(boundscheck) === Symbol
             return :on
         end
-    elseif length(argtypes) == 5
-        fargs !== nothing && (farg = fargs[5])
-        boundscheck = argtypes[5]
+    elseif length(argtypes) == 4
+        boundscheck = argtypes[4]
+        isvarargtype(boundscheck) && return :unsafe
     else
-        return :unknown
+        return :unsafe
     end
-    isvarargtype(boundscheck) && return :unknown
     boundscheck = widenconditional(boundscheck)
     if widenconst(boundscheck) === Bool
         if isa(boundscheck, Const)
             return boundscheck.val::Bool ? :on : :off
-        elseif farg !== nothing && isexpr(farg, :boundscheck)
-            return :boundscheck
         end
+        return :unknown # including a case when specified as `:boundscheck`
     end
-    return :unknown
+    return :unsafe
 end
 
-function getfield_nothrow(𝕃::AbstractLattice, arginfo::ArgInfo, boundscheck::Symbol=getfield_boundscheck(arginfo))
-    (;argtypes) = arginfo
-    boundscheck === :unknown && return false
+function getfield_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, boundscheck::Symbol=getfield_boundscheck(argtypes))
+    boundscheck === :unsafe && return false
     ordering = Const(:not_atomic)
-    if length(argtypes) == 4
-        isvarargtype(argtypes[4]) && return false
-        if widenconst(argtypes[4]) !== Bool
-            ordering = argtypes[4]
-        end
-    elseif length(argtypes) == 5
-        ordering = argtypes[5]
-    elseif length(argtypes) != 3
+    if length(argtypes) == 3
+        isvarargtype(argtypes[3]) && return false
+        if widenconst(argtypes[3]) !== Bool
+            ordering = argtypes[3]
+        end
+    elseif length(argtypes) == 4
+        ordering = argtypes[3]
+    elseif length(argtypes) ≠ 2
         return false
     end
     isa(ordering, Const) || return false
@@ -966,7 +999,7 @@ function getfield_nothrow(𝕃::AbstractLattice, arginfo::ArgInfo, boundscheck::
     if ordering !== :not_atomic # TODO: this is assuming not atomic
         return false
     end
-    return getfield_nothrow(𝕃, argtypes[2], argtypes[3], !(boundscheck === :off))
+    return getfield_nothrow(𝕃, argtypes[1], argtypes[2], !(boundscheck === :off))
 end
 @nospecs function getfield_nothrow(𝕃::AbstractLattice, s00, name, boundscheck::Bool)
     # If we don't have boundscheck off and don't know the field, don't even bother
@@ -974,30 +1007,43 @@ end
         isa(name, Const) || return false
     end
 
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
 
     # If we have s00 being a const, we can potentially refine our type-based analysis above
-    if isa(s00, Const) || isconstType(s00)
-        if !isa(s00, Const)
-            sv = s00.parameters[1]
-        else
+    if isa(s00, Const) || isconstType(s00) || isa(s00, PartialStruct)
+        if isa(s00, Const)
             sv = s00.val
+            sty = typeof(sv)
+            nflds = nfields(sv)
+            ismod = sv isa Module
+        elseif isa(s00, PartialStruct)
+            sty = unwrap_unionall(s00.typ)
+            nflds = fieldcount_noerror(sty)
+            ismod = false
+        else
+            sv = (s00::DataType).parameters[1]
+            sty = typeof(sv)
+            nflds = nfields(sv)
+            ismod = sv isa Module
         end
         if isa(name, Const)
             nval = name.val
             if !isa(nval, Symbol)
-                isa(sv, Module) && return false
+                ismod && return false
                 isa(nval, Int) || return false
             end
-            return isdefined(sv, nval)
+            return isdefined_tfunc(𝕃, s00, name) === Const(true)
         end
-        boundscheck && return false
+
         # If bounds checking is disabled and all fields are assigned,
         # we may assume that we don't throw
-        isa(sv, Module) && return false
+        @assert !boundscheck
+        ismod && return false
         name ⊑ Int || name ⊑ Symbol || return false
-        for i = 1:fieldcount(typeof(sv))
-            isdefined(sv, i) || return false
+        sty.name.n_uninitialized == 0 && return true
+        nflds === nothing && return false
+        for i = (datatype_min_ninitialized(sty)+1):nflds
+            isdefined_tfunc(𝕃, s00, Const(i)) === Const(true) || return false
         end
         return true
     end
@@ -1033,17 +1079,15 @@ end
 end
 
 @nospecs function getfield_tfunc(𝕃::AbstractLattice, s00, name, boundscheck_or_order)
-    t = isvarargtype(boundscheck_or_order) ? unwrapva(boundscheck_or_order) :
-        widenconst(boundscheck_or_order)
-    hasintersect(t, Symbol) || hasintersect(t, Bool) || return Bottom
+    if !isvarargtype(boundscheck_or_order)
+        t = widenconst(boundscheck_or_order)
+        hasintersect(t, Symbol) || hasintersect(t, Bool) || return Bottom
+    end
     return getfield_tfunc(𝕃, s00, name)
 end
 @nospecs function getfield_tfunc(𝕃::AbstractLattice, s00, name, order, boundscheck)
     hasintersect(widenconst(order), Symbol) || return Bottom
-    if isvarargtype(boundscheck)
-        t = unwrapva(boundscheck)
-        hasintersect(t, Symbol) || hasintersect(t, Bool) || return Bottom
-    else
+    if !isvarargtype(boundscheck)
         hasintersect(widenconst(boundscheck), Bool) || return Bottom
     end
     return getfield_tfunc(𝕃, s00, name)
@@ -1069,7 +1113,7 @@ function _getfield_tfunc_const(@nospecialize(sv), name::Const)
     if isa(sv, DataType) && nv == DATATYPE_TYPES_FIELDINDEX && isdefined(sv, nv)
         return Const(getfield(sv, nv))
     end
-    if isconst(typeof(sv), nv)
+    if !isa(sv, Module) && isconst(typeof(sv), nv)
         if isdefined(sv, nv)
             return Const(getfield(sv, nv))
         end
@@ -1094,7 +1138,7 @@ end
 end
 
 @nospecs function _getfield_tfunc(𝕃::AnyMustAliasesLattice, s00, name, setfield::Bool)
-    return _getfield_tfunc(widenlattice(𝕃), widenmustalias(s00), name, setfield)
+    return _getfield_tfunc(widenlattice(𝕃), widenmustalias(s00), widenmustalias(name), setfield)
 end
 
 @nospecs function _getfield_tfunc(𝕃::PartialsLattice, s00, name, setfield::Bool)
@@ -1103,8 +1147,12 @@ end
         sty = unwrap_unionall(s)::DataType
         if isa(name, Const)
             nv = _getfield_fieldindex(sty, name)
-            if isa(nv, Int) && 1 <= nv <= length(s00.fields)
-                return unwrapva(s00.fields[nv])
+            if isa(nv, Int)
+                if nv < 1
+                    return Bottom
+                elseif nv ≤ length(s00.fields)
+                    return unwrapva(s00.fields[nv])
+                end
             end
         end
         s00 = s
@@ -1120,7 +1168,9 @@ end
             if isa(sv, Module)
                 setfield && return Bottom
                 if isa(nv, Symbol)
-                    return abstract_eval_global(sv, nv)
+                    # In ordinary inference, this case is intercepted early and
+                    # re-routed to `getglobal`.
+                    return Any
                 end
                 return Bottom
             end
@@ -1202,98 +1252,40 @@ end
             return Bottom
         end
         if nf == 1
-            return rewrap_unionall(unwrapva(ftypes[1]), s00)
-        end
-        # union together types of all fields
-        t = Bottom
-        for i in 1:nf
-            _ft = ftypes[i]
-            setfield && isconst(s, i) && continue
-            t = tmerge(t, rewrap_unionall(unwrapva(_ft), s00))
-            t === Any && break
+            fld = 1
+        else
+            # union together types of all fields
+            t = Bottom
+            for i in 1:nf
+                _ft = unwrapva(ftypes[i])
+                valid_as_lattice(_ft, true) || continue
+                setfield && isconst(s, i) && continue
+                t = tmerge(t, rewrap_unionall(_ft, s00))
+                t === Any && break
+            end
+            return t
         end
-        return t
+    else
+        fld = _getfield_fieldindex(s, name)
+        fld === nothing && return Bottom
     end
-    fld = _getfield_fieldindex(s, name)
-    fld === nothing && return Bottom
     if s <: Tuple && fld >= nf && isvarargtype(ftypes[nf])
-        return rewrap_unionall(unwrapva(ftypes[nf]), s00)
-    end
-    if fld < 1 || fld > nf
-        return Bottom
-    elseif setfield && isconst(s, fld)
-        return Bottom
-    end
-    R = ftypes[fld]
-    if isempty(s.parameters)
-        return R
+        R = unwrapva(ftypes[nf])
+    else
+        if fld < 1 || fld > nf
+            return Bottom
+        elseif setfield && isconst(s, fld)
+            return Bottom
+        end
+        R = ftypes[fld]
+        valid_as_lattice(R, true) || return Bottom
+        if isempty(s.parameters)
+            return R
+        end
     end
     return rewrap_unionall(R, s00)
 end
 
-@nospecs function getfield_notundefined(typ0, name)
-    if isa(typ0, Const) && isa(name, Const)
-        typv = typ0.val
-        namev = name.val
-        isa(typv, Module) && return true
-        if isa(namev, Symbol) || isa(namev, Int)
-            # Fields are not allowed to transition from defined to undefined, so
-            # even if the field is not const, all we need to check here is that
-            # it is defined here.
-            return isdefined(typv, namev)
-        end
-    end
-    typ0 = widenconst(typ0)
-    typ = unwrap_unionall(typ0)
-    if isa(typ, Union)
-        return getfield_notundefined(rewrap_unionall(typ.a, typ0), name) &&
-               getfield_notundefined(rewrap_unionall(typ.b, typ0), name)
-    end
-    isa(typ, DataType) || return false
-    if typ.name === Tuple.name || typ.name === _NAMEDTUPLE_NAME
-        # tuples and named tuples can't be instantiated with undefined fields,
-        # so we don't need to be conservative here
-        return true
-    end
-    if !isa(name, Const)
-        isvarargtype(name) && return false
-        if !hasintersect(widenconst(name), Union{Int,Symbol})
-            return true # no undefined behavior if thrown
-        end
-        # field isn't known precisely, but let's check if all the fields can't be
-        # initialized with undefined value so to avoid being too conservative
-        fcnt = fieldcount_noerror(typ)
-        fcnt === nothing && return false
-        all(i::Int->is_undefref_fieldtype(fieldtype(typ,i)), (datatype_min_ninitialized(typ)+1):fcnt) && return true
-        return false
-    end
-    name = name.val
-    if isa(name, Symbol)
-        fidx = fieldindex(typ, name, false)
-        fidx === nothing && return true # no undefined behavior if thrown
-    elseif isa(name, Int)
-        fidx = name
-    else
-        return true # no undefined behavior if thrown
-    end
-    fcnt = fieldcount_noerror(typ)
-    fcnt === nothing && return false
-    0 < fidx ≤ fcnt || return true # no undefined behavior if thrown
-    ftyp = fieldtype(typ, fidx)
-    is_undefref_fieldtype(ftyp) && return true
-    return fidx ≤ datatype_min_ninitialized(typ)
-end
-# checks if a field of this type will not be initialized with undefined value
-# and the access to that uninitialized field will cause and `UndefRefError`, e.g.,
-# - is_undefref_fieldtype(String) === true
-# - is_undefref_fieldtype(Integer) === true
-# - is_undefref_fieldtype(Any) === true
-# - is_undefref_fieldtype(Int) === false
-# - is_undefref_fieldtype(Union{Int32,Int64}) === false
-function is_undefref_fieldtype(@nospecialize ftyp)
-    return !has_free_typevars(ftyp) && !allocatedinline(ftyp)
-end
-
 @nospecs function setfield!_tfunc(𝕃::AbstractLattice, o, f, v, order)
     if !isvarargtype(order)
         hasintersect(widenconst(order), Symbol) || return Bottom
@@ -1326,7 +1318,6 @@ end
     return setfield!_nothrow(𝕃, s00, name, v)
 end
 @nospecs function setfield!_nothrow(𝕃::AbstractLattice, s00, name, v)
-    ⊑ = Core.Compiler.:⊑(𝕃)
     s0 = widenconst(s00)
     s = unwrap_unionall(s0)
     if isa(s, Union)
@@ -1343,81 +1334,128 @@ end
         isconst(s, field) && return false
         isfieldatomic(s, field) && return false # TODO: currently we're only testing for ordering === :not_atomic
         v_expected = fieldtype(s0, field)
+        ⊑ = partialorder(𝕃)
         return v ⊑ v_expected
     end
     return false
 end
 
-@nospecs function swapfield!_tfunc(𝕃::AbstractLattice, o, f, v, order)
+@nospecs function swapfield!_tfunc(𝕃::AbstractLattice, o, f, v, order=Symbol)
+    setfield!_tfunc(𝕃, o, f, v) === Bottom && return Bottom
     return getfield_tfunc(𝕃, o, f)
 end
-@nospecs function swapfield!_tfunc(𝕃::AbstractLattice, o, f, v)
-    return getfield_tfunc(𝕃, o, f)
-end
-@nospecs function modifyfield!_tfunc(𝕃::AbstractLattice, o, f, op, v, order)
-    return modifyfield!_tfunc(𝕃, o, f, op, v)
-end
-@nospecs function modifyfield!_tfunc(𝕃::AbstractLattice, o, f, op, v)
-    T = _fieldtype_tfunc(𝕃, o, f, isconcretetype(o))
+@nospecs function modifyfield!_tfunc(𝕃::AbstractLattice, o, f, op, v, order=Symbol)
+    o′ = widenconst(o)
+    T = _fieldtype_tfunc(𝕃, o′, f, isconcretetype(o′))
     T === Bottom && return Bottom
     PT = Const(Pair)
-    return instanceof_tfunc(apply_type_tfunc(𝕃, PT, T, T))[1]
+    return instanceof_tfunc(apply_type_tfunc(𝕃, Any[PT, T, T]), true)[1]
+end
+@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v, success_order=Symbol, failure_order=Symbol)
+    o′ = widenconst(o)
+    T = _fieldtype_tfunc(𝕃, o′, f, isconcretetype(o′))
+    T === Bottom && return Bottom
+    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
+    return instanceof_tfunc(apply_type_tfunc(𝕃, Any[PT, T]), true)[1]
+end
+@nospecs function setfieldonce!_tfunc(𝕃::AbstractLattice, o, f, v, success_order=Symbol, failure_order=Symbol)
+    setfield!_tfunc(𝕃, o, f, v) === Bottom && return Bottom
+    isdefined_tfunc(𝕃, o, f) === Const(true) && return Const(false)
+    return Bool
 end
-function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
+
+@nospecs function abstract_modifyop!(interp::AbstractInterpreter, ff, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
+    if ff === modifyfield!
+        minargs = 5
+        maxargs = 6
+        op_argi = 4
+        v_argi = 5
+    elseif ff === Core.modifyglobal!
+        minargs = 5
+        maxargs = 6
+        op_argi = 4
+        v_argi = 5
+    elseif ff === Core.memoryrefmodify!
+        minargs = 6
+        maxargs = 6
+        op_argi = 3
+        v_argi = 4
+    elseif ff === atomic_pointermodify
+        minargs = 5
+        maxargs = 5
+        op_argi = 3
+        v_argi = 4
+    else
+        @assert false "unreachable"
+    end
+
     nargs = length(argtypes)
     if !isempty(argtypes) && isvarargtype(argtypes[nargs])
-        nargs - 1 <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-        nargs > 3 || return CallMeta(Any, Effects(), NoCallInfo())
+        nargs - 1 <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+        nargs + 1 >= op_argi || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     else
-        5 <= nargs <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
+        minargs <= nargs <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     end
     𝕃ᵢ = typeinf_lattice(interp)
-    o = unwrapva(argtypes[2])
-    f = unwrapva(argtypes[3])
-    RT = modifyfield!_tfunc(𝕃ᵢ, o, f, Any, Any)
+    if ff === modifyfield!
+        o = unwrapva(argtypes[2])
+        f = unwrapva(argtypes[3])
+        RT = modifyfield!_tfunc(𝕃ᵢ, o, f, Any, Any, Symbol)
+        TF = getfield_tfunc(𝕃ᵢ, o, f)
+    elseif ff === Core.modifyglobal!
+        o = unwrapva(argtypes[2])
+        f = unwrapva(argtypes[3])
+        GT = abstract_eval_get_binding_type(interp, sv, o, f).rt
+        RT = isa(GT, Const) ? Pair{GT.val, GT.val} : Pair
+        TF = isa(GT, Const) ? GT.val : Any
+    elseif ff === Core.memoryrefmodify!
+        o = unwrapva(argtypes[2])
+        RT = memoryrefmodify!_tfunc(𝕃ᵢ, o, Any, Any, Symbol, Bool)
+        TF = memoryrefget_tfunc(𝕃ᵢ, o, Symbol, Bool)
+    elseif ff === atomic_pointermodify
+        o = unwrapva(argtypes[2])
+        RT = atomic_pointermodify_tfunc(𝕃ᵢ, o, Any, Any, Symbol)
+        TF = atomic_pointerref_tfunc(𝕃ᵢ, o, Symbol)
+    else
+        @assert false "unreachable"
+    end
     info = NoCallInfo()
-    if nargs >= 5 && RT !== Bottom
+    if nargs >= v_argi && RT !== Bottom
         # we may be able to refine this to a PartialStruct by analyzing `op(o.f, v)::T`
         # as well as compute the info for the method matches
-        op = unwrapva(argtypes[4])
-        v = unwrapva(argtypes[5])
-        TF = getfield_tfunc(𝕃ᵢ, o, f)
-        callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=#1)
-        TF2 = tmeet(callinfo.rt, widenconst(TF))
-        if TF2 === Bottom
-            RT = Bottom
-        elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
-            RT = PartialStruct(RT, Any[TF, TF2])
+        op = unwrapva(argtypes[op_argi])
+        v = unwrapva(argtypes[v_argi])
+        callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true, si.saw_latestworld), sv, #=max_methods=#1)
+        TF = Core.Box(TF)
+        RT = Core.Box(RT)
+        return Future{CallMeta}(callinfo, interp, sv) do callinfo, interp, sv
+            TF = TF.contents
+            RT = RT.contents
+            TF2 = tmeet(ipo_lattice(interp), callinfo.rt, widenconst(TF))
+            if TF2 === Bottom
+                RT = Bottom
+            elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
+                RT = PartialStruct(fallback_lattice, RT, Union{Nothing,Bool}[false,false], Any[TF, TF2])
+            end
+            info = ModifyOpInfo(callinfo.info)
+            return CallMeta(RT, Any, Effects(), info)
         end
-        info = ModifyFieldInfo(callinfo.info)
     end
-    return CallMeta(RT, Effects(), info)
-end
-@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v, success_order, failure_order)
-    return replacefield!_tfunc(𝕃, o, f, x, v)
-end
-@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v, success_order)
-    return replacefield!_tfunc(𝕃, o, f, x, v)
-end
-@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v)
-    T = _fieldtype_tfunc(𝕃, o, f, isconcretetype(o))
-    T === Bottom && return Bottom
-    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
-    return instanceof_tfunc(apply_type_tfunc(𝕃, PT, T))[1]
+    return Future(CallMeta(RT, Any, Effects(), info))
 end
 
 # we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
 
 add_tfunc(getfield, 2, 4, getfield_tfunc, 1)
 add_tfunc(setfield!, 3, 4, setfield!_tfunc, 3)
-
 add_tfunc(swapfield!, 3, 4, swapfield!_tfunc, 3)
 add_tfunc(modifyfield!, 4, 5, modifyfield!_tfunc, 3)
 add_tfunc(replacefield!, 4, 6, replacefield!_tfunc, 3)
+add_tfunc(setfieldonce!, 3, 5, setfieldonce!_tfunc, 3)
 
 @nospecs function fieldtype_nothrow(𝕃::AbstractLattice, s0, name)
     s0 === Bottom && return true # unreachable
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     if s0 === Any || s0 === Type || DataType ⊑ s0 || UnionAll ⊑ s0
         # We have no idea
         return false
@@ -1435,7 +1473,7 @@ add_tfunc(replacefield!, 4, 6, replacefield!_tfunc, 3)
                fieldtype_nothrow(𝕃, rewrap_unionall(su.b, s0), name)
     end
 
-    s, exact = instanceof_tfunc(s0)
+    s, exact = instanceof_tfunc(s0, false)
     s === Bottom && return false # always
     return _fieldtype_nothrow(s, exact, name)
 end
@@ -1500,7 +1538,7 @@ end
                       fieldtype_tfunc(𝕃, rewrap_unionall(su.b, s0), name))
     end
 
-    s, exact = instanceof_tfunc(s0)
+    s, exact = instanceof_tfunc(s0, false)
     s === Bottom && return Bottom
     return _fieldtype_tfunc(𝕃, s, name, exact)
 end
@@ -1513,8 +1551,8 @@ end
         tb0 = _fieldtype_tfunc(𝕃, rewrap_unionall(u.b, s), name, exact)
         ta0 ⊑ tb0 && return tb0
         tb0 ⊑ ta0 && return ta0
-        ta, exacta, _, istypea = instanceof_tfunc(ta0)
-        tb, exactb, _, istypeb = instanceof_tfunc(tb0)
+        ta, exacta, _, istypea = instanceof_tfunc(ta0, false)
+        tb, exactb, _, istypeb = instanceof_tfunc(tb0, false)
         if exact && exacta && exactb
             return Const(Union{ta, tb})
         end
@@ -1629,7 +1667,7 @@ function apply_type_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospe
     (headtype === Union) && return true
     isa(rt, Const) && return true
     u = headtype
-    # TODO: implement optimization for isvarargtype(u) and istuple occurences (which are valid but are not UnionAll)
+    # TODO: implement optimization for isvarargtype(u) and istuple occurrences (which are valid but are not UnionAll)
     for i = 2:length(argtypes)
         isa(u, UnionAll) || return false
         ai = widenconditional(argtypes[i])
@@ -1648,7 +1686,7 @@ function apply_type_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospe
                 return false
             end
         else
-            T, exact, _, istype = instanceof_tfunc(ai)
+            T, exact, _, istype = instanceof_tfunc(ai, false)
             if T === Bottom
                 if !(u.var.lb === Union{} && u.var.ub === Any)
                     return false
@@ -1674,8 +1712,12 @@ end
 const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K, :_L, :_M,
                           :_N, :_O, :_P, :_Q, :_R, :_S, :_T, :_U, :_V, :_W, :_X, :_Y, :_Z]
 
-# TODO: handle e.g. apply_type(T, R::Union{Type{Int32},Type{Float64}})
-@nospecs function apply_type_tfunc(𝕃::AbstractLattice, headtypetype, args...)
+function apply_type_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any};
+                          max_union_splitting::Int=InferenceParams().max_union_splitting)
+    if isempty(argtypes)
+        return Bottom
+    end
+    headtypetype = argtypes[1]
     headtypetype = widenslotwrapper(headtypetype)
     if isa(headtypetype, Const)
         headtype = headtypetype.val
@@ -1684,15 +1726,15 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
     else
         return Any
     end
-    if !isempty(args) && isvarargtype(args[end])
+    largs = length(argtypes)
+    if largs > 1 && isvarargtype(argtypes[end])
         return isvarargtype(headtype) ? TypeofVararg : Type
     end
-    largs = length(args)
     if headtype === Union
-        largs == 0 && return Const(Bottom)
+        largs == 1 && return Const(Bottom)
         hasnonType = false
-        for i = 1:largs
-            ai = args[i]
+        for i = 2:largs
+            ai = argtypes[i]
             if isa(ai, Const)
                 if !isa(ai.val, Type)
                     if isa(ai.val, TypeVar)
@@ -1711,16 +1753,14 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
                 end
             end
         end
-        if largs == 1 # Union{T} --> T
-            u1 = typeintersect(widenconst(args[1]), Union{Type,TypeVar})
-            valid_as_lattice(u1) || return Bottom
-            return u1
+        if largs == 2 # Union{T} --> T
+            return tmeet(widenconst(argtypes[2]), Union{Type,TypeVar})
         end
         hasnonType && return Type
         ty = Union{}
         allconst = true
-        for i = 1:largs
-            ai = args[i]
+        for i = 2:largs
+            ai = argtypes[i]
             if isType(ai)
                 aty = ai.parameters[1]
                 allconst &= hasuniquerep(aty)
@@ -1731,7 +1771,19 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
         end
         return allconst ? Const(ty) : Type{ty}
     end
-    istuple = isa(headtype, Type) && (headtype == Tuple)
+    if 1 < unionsplitcost(𝕃, argtypes) ≤ max_union_splitting
+        rt = Bottom
+        for split_argtypes = switchtupleunion(𝕃, argtypes)
+            this_rt = widenconst(_apply_type_tfunc(𝕃, headtype, split_argtypes))
+            rt = Union{rt, this_rt}
+        end
+        return rt
+    end
+    return _apply_type_tfunc(𝕃, headtype, argtypes)
+end
+@nospecs function _apply_type_tfunc(𝕃::AbstractLattice, headtype, argtypes::Vector{Any})
+    largs = length(argtypes)
+    istuple = headtype === Tuple
     if !istuple && !isa(headtype, UnionAll) && !isvarargtype(headtype)
         return Union{}
     end
@@ -1744,20 +1796,20 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
     # first push the tailing vars from headtype into outervars
     outer_start, ua = 0, headtype
     while isa(ua, UnionAll)
-        if (outer_start += 1) > largs
+        if (outer_start += 1) > largs - 1
             push!(outervars, ua.var)
         end
         ua = ua.body
     end
-    if largs > outer_start && isa(headtype, UnionAll) # e.g. !isvarargtype(ua) && !istuple
+    if largs - 1 > outer_start && isa(headtype, UnionAll) # e.g. !isvarargtype(ua) && !istuple
         return Bottom # too many arguments
     end
-    outer_start = outer_start - largs + 1
+    outer_start = outer_start - largs + 2
 
     varnamectr = 1
     ua = headtype
-    for i = 1:largs
-        ai = widenslotwrapper(args[i])
+    for i = 2:largs
+        ai = widenslotwrapper(argtypes[i])
         if isType(ai)
             aip1 = ai.parameters[1]
             canconst &= !has_free_typevars(aip1)
@@ -1799,7 +1851,7 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
             elseif !isT
                 # if we didn't have isType to compute ub directly, try to use instanceof_tfunc to refine this guess
                 ai_w = widenconst(ai)
-                ub = ai_w isa Type && ai_w <: Type ? instanceof_tfunc(ai)[1] : Any
+                ub = ai_w isa Type && ai_w <: Type ? instanceof_tfunc(ai, false)[1] : Any
             end
             if istuple
                 # in the last parameter of a Tuple type, if the upper bound is Any
@@ -1831,7 +1883,7 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
                     # If the names are known, keep the upper bound, but otherwise widen to Tuple.
                     # This is a widening heuristic to avoid keeping type information
                     # that's unlikely to be useful.
-                    if !(uw.parameters[1] isa Tuple || (i == 2 && tparams[1] isa Tuple))
+                    if !(uw.parameters[1] isa Tuple || (i == 3 && tparams[1] isa Tuple))
                         ub = Any
                     end
                 else
@@ -1853,6 +1905,7 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
     try
         appl = apply_type(headtype, tparams...)
     catch ex
+        ex isa InterruptException && rethrow()
         # type instantiation might fail if one of the type parameters doesn't
         # match, which could happen only if a type estimate is too coarse
         # and might guess a concrete value while the actual type for it is Bottom
@@ -1872,7 +1925,7 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
         # throwing errors.
         appl = headtype
         if isa(appl, UnionAll)
-            for _ = 1:largs
+            for _ = 2:largs
                 appl = appl::UnionAll
                 push!(outervars, appl.var)
                 appl = appl.body
@@ -1892,6 +1945,8 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
     end
     return ans
 end
+@nospecs apply_type_tfunc(𝕃::AbstractLattice, headtypetype, args...) =
+    apply_type_tfunc(𝕃, Any[i == 0 ? headtypetype : args[i] for i in 0:length(args)])
 add_tfunc(apply_type, 1, INT_INF, apply_type_tfunc, 10)
 
 # convert the dispatch tuple type argtype to the real (concrete) type of
@@ -1906,15 +1961,8 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
         # UnionAll context is missing around this.
         pop!(argtypes)
     end
-    all_are_const = true
-    for i in 1:length(argtypes)
-        if !isa(argtypes[i], Const)
-            all_are_const = false
-            break
-        end
-    end
-    if all_are_const
-        return Const(ntuple(i::Int->argtypes[i].val, length(argtypes)))
+    if is_all_const_arg(argtypes, 1) # repeated from builtin_tfunction for the benefit of callers that use this tfunc directly
+        return Const(tuple(collect_const_args(argtypes, 1)...))
     end
     params = Vector{Any}(undef, length(argtypes))
     anyinfo = false
@@ -1958,54 +2006,142 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
     typ = Tuple{params...}
     # replace a singleton type with its equivalent Const object
     issingletontype(typ) && return Const(typ.instance)
-    return anyinfo ? PartialStruct(typ, argtypes) : typ
+    return anyinfo ? PartialStruct(𝕃, typ, partialstruct_init_undefs(typ, argtypes)::Vector, argtypes) : typ
 end
 
-@nospecs function arrayref_tfunc(𝕃::AbstractLattice, boundscheck, ary, idxs...)
-    return _arrayref_tfunc(𝕃, boundscheck, ary, idxs)
+@nospecs function memorynew_tfunc(𝕃::AbstractLattice, memtype, memlen)
+    hasintersect(widenconst(memlen), Int) || return Bottom
+    memt = tmeet(𝕃, instanceof_tfunc(memtype, true)[1], GenericMemory)
+    memt == Union{} && return memt
+    # PartialStruct so that loads of Const `length` get inferred
+    return PartialStruct(𝕃, memt, Union{Nothing,Bool}[false,false], Any[memlen, Ptr{Nothing}])
+end
+add_tfunc(Core.memorynew, 2, 2, memorynew_tfunc, 10)
+
+@nospecs function memoryrefget_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom
+    return memoryref_elemtype(mem)
+end
+@nospecs function memoryrefset!_tfunc(𝕃::AbstractLattice, mem, item, order, boundscheck)
+    hasintersect(widenconst(item), memoryrefget_tfunc(𝕃, mem, order, boundscheck)) || return Bottom
+    return item
 end
-@nospecs function _arrayref_tfunc(𝕃::AbstractLattice, boundscheck, ary, @nospecialize idxs::Tuple)
-    isempty(idxs) && return Bottom
-    array_builtin_common_errorcheck(boundscheck, ary, idxs) || return Bottom
-    return array_elmtype(ary)
+@nospecs function memoryrefswap!_tfunc(𝕃::AbstractLattice, mem, v, order, boundscheck)
+    memoryrefset!_tfunc(𝕃, mem, v, order, boundscheck) === Bottom && return Bottom
+    return memoryrefget_tfunc(𝕃, mem, order, boundscheck)
 end
-add_tfunc(arrayref, 3, INT_INF, arrayref_tfunc, 20)
-add_tfunc(const_arrayref, 3, INT_INF, arrayref_tfunc, 20)
+@nospecs function memoryrefmodify!_tfunc(𝕃::AbstractLattice, mem, op, v, order, boundscheck)
+    memoryrefget_tfunc(𝕃, mem, order, boundscheck) === Bottom && return Bottom
+    T = _memoryref_elemtype(mem)
+    T === Bottom && return Bottom
+    PT = Const(Pair)
+    return instanceof_tfunc(apply_type_tfunc(𝕃, Any[PT, T, T]), true)[1]
+end
+@nospecs function memoryrefreplace!_tfunc(𝕃::AbstractLattice, mem, x, v, success_order, failure_order, boundscheck)
+    memoryrefset!_tfunc(𝕃, mem, v, success_order, boundscheck) === Bottom && return Bottom
+    hasintersect(widenconst(failure_order), Symbol) || return Bottom
+    T = _memoryref_elemtype(mem)
+    T === Bottom && return Bottom
+    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
+    return instanceof_tfunc(apply_type_tfunc(𝕃, Any[PT, T]), true)[1]
+end
+@nospecs function memoryrefsetonce!_tfunc(𝕃::AbstractLattice, mem, v, success_order, failure_order, boundscheck)
+    memoryrefset!_tfunc(𝕃, mem, v, success_order, boundscheck) === Bottom && return Bottom
+    hasintersect(widenconst(failure_order), Symbol) || return Bottom
+    return Bool
+end
+
+add_tfunc(Core.memoryrefget, 3, 3, memoryrefget_tfunc, 20)
+add_tfunc(Core.memoryrefset!, 4, 4, memoryrefset!_tfunc, 20)
+add_tfunc(Core.memoryrefswap!, 4, 4, memoryrefswap!_tfunc, 20)
+add_tfunc(Core.memoryrefmodify!, 5, 5, memoryrefmodify!_tfunc, 20)
+add_tfunc(Core.memoryrefreplace!, 6, 6, memoryrefreplace!_tfunc, 20)
+add_tfunc(Core.memoryrefsetonce!, 5, 5, memoryrefsetonce!_tfunc, 20)
 
-@nospecs function arrayset_tfunc(𝕃::AbstractLattice, boundscheck, ary, item, idxs...)
-    hasintersect(widenconst(item), _arrayref_tfunc(𝕃, boundscheck, ary, idxs)) || return Bottom
-    return ary
+@nospecs function memoryref_isassigned_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    return _memoryref_isassigned_tfunc(𝕃, mem, order, boundscheck)
+end
+@nospecs function _memoryref_isassigned_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom
+    return Bool
 end
-add_tfunc(arrayset, 4, INT_INF, arrayset_tfunc, 20)
+add_tfunc(memoryref_isassigned, 3, 3, memoryref_isassigned_tfunc, 20)
 
-@nospecs function array_builtin_common_errorcheck(boundscheck, ary, @nospecialize idxs::Tuple)
-    hasintersect(widenconst(boundscheck), Bool) || return false
-    hasintersect(widenconst(ary), Array) || return false
-    for i = 1:length(idxs)
-        idx = getfield(idxs, i)
-        idx = isvarargtype(idx) ? unwrapva(idx) : widenconst(idx)
-        hasintersect(idx, Int) || return false
+@nospecs function memoryref_tfunc(𝕃::AbstractLattice, mem)
+    a = widenconst(unwrapva(mem))
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === GenericMemory.body.body.body.name
+            A = unw.parameters[1]
+            T = unw.parameters[2]
+            AS = unw.parameters[3]
+            T isa Type || T isa TypeVar || return Bottom
+            return rewrap_unionall(GenericMemoryRef{A, T, AS}, a)
+        end
     end
+    return GenericMemoryRef
+end
+@nospecs function memoryref_tfunc(𝕃::AbstractLattice, ref, idx)
+    if isvarargtype(idx)
+        idx = unwrapva(idx)
+    end
+    return memoryref_tfunc(𝕃, ref, idx, Const(true))
+end
+@nospecs function memoryref_tfunc(𝕃::AbstractLattice, ref, idx, boundscheck)
+    memoryref_builtin_common_errorcheck(ref, Const(:not_atomic), boundscheck) || return Bottom
+    hasintersect(widenconst(idx), Int) || return Bottom
+    hasintersect(widenconst(ref), GenericMemory) && return memoryref_tfunc(𝕃, ref)
+    return ref
+end
+add_tfunc(memoryrefnew, 1, 3, memoryref_tfunc, 1)
+
+@nospecs function memoryrefoffset_tfunc(𝕃::AbstractLattice, mem)
+    hasintersect(widenconst(mem), GenericMemoryRef) || return Bottom
+    return Int
+end
+add_tfunc(memoryrefoffset, 1, 1, memoryrefoffset_tfunc, 5)
+
+@nospecs function memoryref_builtin_common_errorcheck(mem, order, boundscheck)
+    hasintersect(widenconst(mem), Union{GenericMemory, GenericMemoryRef}) || return false
+    hasintersect(widenconst(order), Symbol) || return false
+    hasintersect(widenconst(unwrapva(boundscheck)), Bool) || return false
     return true
 end
 
-function array_elmtype(@nospecialize ary)
-    a = widenconst(ary)
-    if !has_free_typevars(a) && a <: Array
-        a0 = a
-        if isa(a, UnionAll)
-            a = unwrap_unionall(a0)
+@nospecs function memoryref_elemtype(mem)
+    m = widenconst(mem)
+    if !has_free_typevars(m) && m <: GenericMemoryRef
+        m0 = m
+        if isa(m, UnionAll)
+            m = unwrap_unionall(m0)
         end
-        if isa(a, DataType)
-            T = a.parameters[1]
-            valid_as_lattice(T) || return Bottom
-            return rewrap_unionall(T, a0)
+        if isa(m, DataType)
+            T = m.parameters[2]
+            valid_as_lattice(T, true) || return Bottom
+            return rewrap_unionall(T, m0)
         end
     end
     return Any
 end
 
-@nospecs function opaque_closure_tfunc(𝕃::AbstractLattice, arg, lb, ub, source, env::Vector{Any}, linfo::MethodInstance)
+@nospecs function _memoryref_elemtype(mem)
+    m = widenconst(mem)
+    if !has_free_typevars(m) && m <: GenericMemoryRef
+        m0 = m
+        if isa(m, UnionAll)
+            m = unwrap_unionall(m0)
+        end
+        if isa(m, DataType)
+            T = m.parameters[2]
+            valid_as_lattice(T, true) || return Bottom
+            has_free_typevars(T) || return Const(T)
+            return rewrap_unionall(Type{T}, m0)
+        end
+    end
+    return Type
+end
+
+@nospecs function opaque_closure_tfunc(𝕃::AbstractLattice, arg, lb, ub, source, env::Vector{Any}, mi::MethodInstance)
     argt, argt_exact = instanceof_tfunc(arg)
     lbt, lb_exact = instanceof_tfunc(lb)
     if !lb_exact
@@ -2019,91 +2155,138 @@ end
 
     (isa(source, Const) && isa(source.val, Method)) || return t
 
-    return PartialOpaque(t, tuple_tfunc(𝕃, env), linfo, source.val)
+    return PartialOpaque(t, tuple_tfunc(𝕃, env), mi, source.val)
 end
 
 # whether getindex for the elements can potentially throw UndefRef
-function array_type_undefable(@nospecialize(arytype))
+@nospecs function array_type_undefable(arytype)
+    arytype = unwrap_unionall(arytype)
     if isa(arytype, Union)
         return array_type_undefable(arytype.a) || array_type_undefable(arytype.b)
-    elseif isa(arytype, UnionAll)
-        return true
+    elseif arytype isa DataType
+        elmtype = memoryref_elemtype(arytype)
+        # TODO: use arraytype layout instead to derive this
+        return !((elmtype isa DataType && isbitstype(elmtype)) || (elmtype isa Union && isbitsunion(elmtype)))
+    end
+    return true
+end
+
+@nospecs function memoryset_typecheck(𝕃::AbstractLattice, memtype, elemtype)
+    # Check that we can determine the element type
+    isa(memtype, DataType) || return false
+    elemtype_expected = memoryref_elemtype(memtype)
+    elemtype_expected === Union{} && return false
+    # Check that the element type is compatible with the element we're assigning
+    ⊑ = partialorder(𝕃)
+    elemtype ⊑ elemtype_expected || return false
+    return true
+end
+
+function memoryref_builtin_common_nothrow(argtypes::Vector{Any})
+    if length(argtypes) == 1
+        memtype = widenconst(argtypes[1])
+        return memtype ⊑ GenericMemory
     else
-        elmtype = (arytype::DataType).parameters[1]
-        return !(elmtype isa Type && (isbitstype(elmtype) || isbitsunion(elmtype)))
+        if length(argtypes) == 2
+            boundscheck = Const(true)
+        elseif length(argtypes) == 3
+            boundscheck = argtypes[3]
+        else
+            return false
+        end
+        memtype = widenconst(argtypes[1])
+        idx = widenconst(argtypes[2])
+        idx ⊑ Int || return false
+        boundscheck ⊑ Bool || return false
+        memtype ⊑ Union{GenericMemory, GenericMemoryRef} || return false
+        # If we have @inbounds (last argument is false), we're allowed to assume
+        # we don't throw bounds errors.
+        if isa(boundscheck, Const)
+            boundscheck.val::Bool || return true
+        end
+        # Else we can't really say anything here
+        # TODO: In the future we may be able to track the minimum length though inference.
+        return false
     end
 end
 
-function array_builtin_common_nothrow(argtypes::Vector{Any}, isarrayref::Bool)
-    first_idx_idx = isarrayref ? 3 : 4
-    length(argtypes) ≥ first_idx_idx || return false
-    boundscheck = argtypes[1]
-    arytype = argtypes[2]
-    array_builtin_common_typecheck(boundscheck, arytype, argtypes, first_idx_idx) || return false
-    if isarrayref
+function memoryrefop_builtin_common_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospecialize f)
+    ismemoryset = f === memoryrefset!
+    nargs = ismemoryset ? 4 : 3
+    length(argtypes) == nargs || return false
+    order = argtypes[2 + ismemoryset]
+    boundscheck = argtypes[3 + ismemoryset]
+    memtype = widenconst(argtypes[1])
+    memoryref_builtin_common_typecheck(𝕃, boundscheck, memtype, order) || return false
+    if ismemoryset
+        # Additionally check element type compatibility
+        memoryset_typecheck(𝕃, memtype, argtypes[2]) || return false
+    elseif f === memoryrefget
         # If we could potentially throw undef ref errors, bail out now.
-        arytype = widenconst(arytype)
-        array_type_undefable(arytype) && return false
+        array_type_undefable(memtype) && return false
     end
-    # If we have @inbounds (first argument is false), we're allowed to assume
+    # If we have @inbounds (last argument is false), we're allowed to assume
     # we don't throw bounds errors.
     if isa(boundscheck, Const)
         boundscheck.val::Bool || return true
     end
     # Else we can't really say anything here
-    # TODO: In the future we may be able to track the shapes of arrays though
-    # inference.
+    # TODO: In the future we may be able to track the minimum length though inference.
     return false
 end
 
-@nospecs function array_builtin_common_typecheck(boundscheck, arytype,
-    argtypes::Vector{Any}, first_idx_idx::Int)
-    (boundscheck ⊑ Bool && arytype ⊑ Array) || return false
-    for i = first_idx_idx:length(argtypes)
-        argtypes[i] ⊑ Int || return false
-    end
-    return true
+@nospecs function memoryref_builtin_common_typecheck(𝕃::AbstractLattice, boundscheck, memtype, order)
+    ⊑ = partialorder(𝕃)
+    return boundscheck ⊑ Bool && memtype ⊑ GenericMemoryRef && order ⊑ Symbol
 end
 
-@nospecs function arrayset_typecheck(arytype, elmtype)
-    # Check that we can determine the element type
-    arytype = widenconst(arytype)
-    isa(arytype, DataType) || return false
-    elmtype_expected = arytype.parameters[1]
-    isa(elmtype_expected, Type) || return false
-    # Check that the element type is compatible with the element we're assigning
-    elmtype ⊑ elmtype_expected || return false
-    return true
+function memorynew_nothrow(argtypes::Vector{Any})
+    if !(argtypes[1] isa Const && argtypes[2] isa Const)
+        return false
+    end
+    MemT = argtypes[1].val
+    if !(isconcretetype(MemT) && MemT <: GenericMemory)
+        return false
+    end
+    len = argtypes[2].val
+    if !(len isa Int && 0 <= len < typemax(Int))
+        return false
+    end
+    elsz = datatype_layoutsize(MemT)
+    overflows = checked_smul_int(len, elsz)[2]
+    return !overflows
 end
 
-# Query whether the given builtin is guaranteed not to throw given the argtypes
-@nospecs function _builtin_nothrow(𝕃::AbstractLattice, f, argtypes::Vector{Any}, rt)
-    ⊑ = Core.Compiler.:⊑(𝕃)
-    if f === arrayset
-        array_builtin_common_nothrow(argtypes, #=isarrayref=#false) || return false
-        # Additionally check element type compatibility
-        return arrayset_typecheck(argtypes[2], argtypes[3])
-    elseif f === arrayref || f === const_arrayref
-        return array_builtin_common_nothrow(argtypes, #=isarrayref=#true)
+# Query whether the given builtin is guaranteed not to throw given the `argtypes`.
+# `argtypes` can be assumed not to contain varargs.
+function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any},
+                          @nospecialize(rt))
+    ⊑ = partialorder(𝕃)
+    na = length(argtypes)
+    if f === Core.memorynew
+        return memorynew_nothrow(argtypes)
+    elseif f === memoryrefnew
+        return memoryref_builtin_common_nothrow(argtypes)
+    elseif f === memoryrefoffset
+        length(argtypes) == 1 || return false
+        memtype = widenconst(argtypes[1])
+        return memtype ⊑ GenericMemoryRef
+    elseif f === memoryrefset!
+        return memoryrefop_builtin_common_nothrow(𝕃, argtypes, f)
+    elseif f === memoryrefget
+        return memoryrefop_builtin_common_nothrow(𝕃, argtypes, f)
+    elseif f === memoryref_isassigned
+        return memoryrefop_builtin_common_nothrow(𝕃, argtypes, f)
     elseif f === Core._expr
         length(argtypes) >= 1 || return false
         return argtypes[1] ⊑ Symbol
-    end
-
-    # These builtins are not-vararg, so if we have varars, here, we can't guarantee
-    # the correct number of arguments.
-    na = length(argtypes)
-    (na ≠ 0 && isvarargtype(argtypes[end])) && return false
-    if f === arraysize
-        na == 2 || return false
-        return arraysize_nothrow(argtypes[1], argtypes[2])
     elseif f === Core._typevar
         na == 3 || return false
         return typevar_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3])
     elseif f === invoke
         return false
     elseif f === getfield
-        return getfield_nothrow(𝕃, ArgInfo(nothing, Any[Const(f), argtypes...]))
+        return getfield_nothrow(𝕃, argtypes)
     elseif f === setfield!
         if na == 3
             return setfield!_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3])
@@ -2122,8 +2305,6 @@ end
     elseif f === (<:)
         na == 2 || return false
         return subtype_nothrow(𝕃, argtypes[1], argtypes[2])
-    elseif f === UnionAll
-        return na == 2 && (argtypes[1] ⊑ TypeVar && argtypes[2] ⊑ Type)
     elseif f === isdefined
         return isdefined_nothrow(𝕃, argtypes)
     elseif f === Core.sizeof
@@ -2135,20 +2316,6 @@ end
     elseif f === typeassert
         na == 2 || return false
         return typeassert_nothrow(𝕃, argtypes[1], argtypes[2])
-    elseif f === getglobal
-        if na == 2
-            return getglobal_nothrow(argtypes[1], argtypes[2])
-        elseif na == 3
-            return getglobal_nothrow(argtypes[1], argtypes[2], argtypes[3])
-        end
-        return false
-    elseif f === setglobal!
-        if na == 3
-            return setglobal!_nothrow(argtypes[1], argtypes[2], argtypes[3])
-        elseif na == 4
-            return setglobal!_nothrow(argtypes[1], argtypes[2], argtypes[3], argtypes[4])
-        end
-        return false
     elseif f === Core.get_binding_type
         na == 2 || return false
         return get_binding_type_nothrow(𝕃, argtypes[1], argtypes[2])
@@ -2161,11 +2328,17 @@ end
     elseif f === Core.compilerbarrier
         na == 2 || return false
         return compilerbarrier_nothrow(argtypes[1], nothing)
+    elseif f === Core._svec_len
+        na == 1 || return false
+        return _svec_len_nothrow(𝕃, argtypes[1])
+    elseif f === Core._svec_ref
+        na == 2 || return false
+        return _svec_ref_tfunc(𝕃, argtypes[1], argtypes[2]) isa Const
     end
     return false
 end
 
-# known to be always effect-free (in particular nothrow)
+# known to be always effect-free (in particular also nothrow)
 const _PURE_BUILTINS = Any[
     tuple,
     svec,
@@ -2189,8 +2362,13 @@ const _CONSISTENT_BUILTINS = Any[
     (<:),
     typeassert,
     throw,
+    Core.throw_methoderror,
     setfield!,
-    donotdelete
+    donotdelete,
+    memoryrefnew,
+    memoryrefoffset,
+    Core._svec_len,
+    Core._svec_ref,
 ]
 
 # known to be effect-free (but not necessarily nothrow)
@@ -2200,9 +2378,11 @@ const _EFFECT_FREE_BUILTINS = [
     isa,
     UnionAll,
     getfield,
-    arrayref,
-    arraysize,
-    const_arrayref,
+    Core.memorynew,
+    memoryrefnew,
+    memoryrefoffset,
+    memoryrefget,
+    memoryref_isassigned,
     isdefined,
     Core.sizeof,
     Core.ifelse,
@@ -2210,8 +2390,11 @@ const _EFFECT_FREE_BUILTINS = [
     (<:),
     typeassert,
     throw,
+    Core.throw_methoderror,
     getglobal,
     compilerbarrier,
+    Core._svec_len,
+    Core._svec_ref,
 ]
 
 const _INACCESSIBLEMEM_BUILTINS = Any[
@@ -2223,33 +2406,35 @@ const _INACCESSIBLEMEM_BUILTINS = Any[
     svec,
     fieldtype,
     isa,
-    isdefined,
     nfields,
     throw,
+    Core.throw_methoderror,
     tuple,
     typeassert,
     typeof,
     compilerbarrier,
     Core._typevar,
-    donotdelete
+    donotdelete,
+    Core.memorynew,
 ]
 
 const _ARGMEM_BUILTINS = Any[
-    arrayref,
-    arrayset,
-    arraysize,
+    memoryrefnew,
+    memoryrefoffset,
+    memoryrefget,
+    memoryref_isassigned,
+    memoryrefset!,
     modifyfield!,
     replacefield!,
     setfield!,
     swapfield!,
+    Core._svec_len,
+    Core._svec_ref,
 ]
 
 const _INCONSISTENT_INTRINSICS = Any[
-    Intrinsics.pointerref,      # this one is volatile
-    Intrinsics.sqrt_llvm_fast,  # this one may differ at runtime (by a few ulps)
-    Intrinsics.have_fma,        # this one depends on the runtime environment
-    Intrinsics.cglobal,         # cglobal lookup answer changes at runtime
-    # ... and list fastmath intrinsics:
+    # all is_pure_intrinsic_infer plus
+    # ... all the unsound fastmath functions which should have been in is_pure_intrinsic_infer
     # join(string.("Intrinsics.", sort(filter(endswith("_fast")∘string, names(Core.Intrinsics)))), ",\n")
     Intrinsics.add_float_fast,
     Intrinsics.div_float_fast,
@@ -2266,33 +2451,60 @@ const _INCONSISTENT_INTRINSICS = Any[
     # Intrinsics.muladd_float,    # this is not interprocedurally consistent
 ]
 
-const _SPECIAL_BUILTINS = Any[
-    Core._apply_iterate,
+# Intrinsics that require all arguments to be floats
+const _FLOAT_INTRINSICS = Any[
+    Intrinsics.neg_float,
+    Intrinsics.add_float,
+    Intrinsics.sub_float,
+    Intrinsics.mul_float,
+    Intrinsics.div_float,
+    Intrinsics.min_float,
+    Intrinsics.max_float,
+    Intrinsics.fma_float,
+    Intrinsics.muladd_float,
+    Intrinsics.neg_float_fast,
+    Intrinsics.add_float_fast,
+    Intrinsics.sub_float_fast,
+    Intrinsics.mul_float_fast,
+    Intrinsics.div_float_fast,
+    Intrinsics.min_float_fast,
+    Intrinsics.max_float_fast,
+    Intrinsics.eq_float,
+    Intrinsics.ne_float,
+    Intrinsics.lt_float,
+    Intrinsics.le_float,
+    Intrinsics.eq_float_fast,
+    Intrinsics.ne_float_fast,
+    Intrinsics.lt_float_fast,
+    Intrinsics.le_float_fast,
+    Intrinsics.fpiseq,
+    Intrinsics.abs_float,
+    Intrinsics.copysign_float,
+    Intrinsics.ceil_llvm,
+    Intrinsics.floor_llvm,
+    Intrinsics.trunc_llvm,
+    Intrinsics.rint_llvm,
+    Intrinsics.sqrt_llvm,
+    Intrinsics.sqrt_llvm_fast
 ]
 
+# Types compatible with fpext/fptrunc
+const CORE_FLOAT_TYPES = Union{Core.BFloat16, Float16, Float32, Float64}
+
 function isdefined_effects(𝕃::AbstractLattice, argtypes::Vector{Any})
     # consistent if the first arg is immutable
     na = length(argtypes)
     2 ≤ na ≤ 3 || return EFFECTS_THROWS
-    obj, sym = argtypes
-    wobj = unwrapva(obj)
+    wobj, sym = argtypes
+    wobj = unwrapva(wobj)
+    sym = unwrapva(sym)
     consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
     if is_immutable_argtype(wobj)
         consistent = ALWAYS_TRUE
-    else
-        # Bindings/fields are not allowed to transition from defined to undefined, so even
-        # if the object is not immutable, we can prove `:consistent`-cy if it is defined:
-        if isa(wobj, Const) && isa(sym, Const)
-            objval = wobj.val
-            symval = sym.val
-            if isa(objval, Module)
-                if isa(symval, Symbol) && isdefined(objval, symval)
-                    consistent = ALWAYS_TRUE
-                end
-            elseif (isa(symval, Symbol) || isa(symval, Int)) && isdefined(objval, symval)
-                consistent = ALWAYS_TRUE
-            end
-        end
+    elseif isdefined_tfunc(𝕃, wobj, sym) isa Const
+        # Some bindings/fields are not allowed to transition from defined to undefined or the reverse, so even
+        # if the object is not immutable, we can prove `:consistent`-cy of this:
+        consistent = ALWAYS_TRUE
     end
     nothrow = isdefined_nothrow(𝕃, argtypes)
     if hasintersect(widenconst(wobj), Module)
@@ -2305,105 +2517,181 @@ function isdefined_effects(𝕃::AbstractLattice, argtypes::Vector{Any})
     return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
 end
 
-function getfield_effects(𝕃::AbstractLattice, arginfo::ArgInfo, @nospecialize(rt))
-    (;argtypes) = arginfo
-    # consistent if the argtype is immutable
-    length(argtypes) < 3 && return EFFECTS_THROWS
-    obj = argtypes[2]
-    isvarargtype(obj) && return Effects(EFFECTS_THROWS; consistent=ALWAYS_FALSE)
+function getfield_effects(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospecialize(rt))
+    length(argtypes) < 2 && return EFFECTS_THROWS
+    obj = argtypes[1]
+    if isvarargtype(obj)
+        return Effects(EFFECTS_TOTAL;
+            consistent=CONSISTENT_IF_INACCESSIBLEMEMONLY,
+            nothrow=false,
+            inaccessiblememonly=ALWAYS_FALSE,
+            noub=ALWAYS_FALSE)
+    end
+    # :consistent if the argtype is immutable
     consistent = (is_immutable_argtype(obj) || is_mutation_free_argtype(obj)) ?
         ALWAYS_TRUE : CONSISTENT_IF_INACCESSIBLEMEMONLY
-    # access to `isbitstype`-field initialized with undefined value leads to undefined behavior
-    # so should taint `:consistent`-cy while access to uninitialized non-`isbitstype` field
-    # throws `UndefRefError` so doesn't need to taint it
-    # NOTE `getfield_notundefined` conservatively checks if this field is never initialized
-    # with undefined value so that we don't taint `:consistent`-cy too aggressively here
-    if !(length(argtypes) ≥ 3 && getfield_notundefined(obj, argtypes[3]))
-        consistent = ALWAYS_FALSE
-    end
-    bcheck = getfield_boundscheck(arginfo)
-    nothrow = getfield_nothrow(𝕃, arginfo, bcheck)
+    noub = ALWAYS_TRUE
+    bcheck = getfield_boundscheck(argtypes)
+    nothrow = getfield_nothrow(𝕃, argtypes, bcheck)
     if !nothrow
-        if !(bcheck === :on || bcheck === :boundscheck)
-            # If we cannot independently prove inboundsness, taint consistency.
-            # The inbounds-ness assertion requires dynamic reachability, while
-            # :consistent needs to be true for all input values.
+        if bcheck !== :on
+            # If we cannot independently prove inboundsness, taint `:noub`.
+            # The inbounds-ness assertion requires dynamic reachability,
+            # while `:noub` needs to be true for all input values.
             # However, as a special exception, we do allow literal `:boundscheck`.
-            # `:consistent`-cy will be tainted in any caller using `@inbounds` based
-            # on the `:noinbounds` effect.
-            # N.B. We do not taint for `--check-bounds=no` here. That is handled
-            # in concrete evaluation.
-            consistent = ALWAYS_FALSE
+            # `:noub` will be tainted in any caller using `@inbounds`
+            # based on the `:noinbounds` effect.
+            # N.B. We do not taint for `--check-bounds=no` here.
+            # That is handled in concrete evaluation.
+            noub = ALWAYS_FALSE
         end
     end
     if hasintersect(widenconst(obj), Module)
-        inaccessiblememonly = getglobal_effects(argtypes[2:end], rt).inaccessiblememonly
+        # Modeled more precisely in abstract_eval_getglobal
+        inaccessiblememonly = ALWAYS_FALSE
     elseif is_mutation_free_argtype(obj)
         inaccessiblememonly = ALWAYS_TRUE
     else
         inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY
     end
-    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
+    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly, noub)
 end
 
-function getglobal_effects(argtypes::Vector{Any}, @nospecialize(rt))
-    consistent = inaccessiblememonly = ALWAYS_FALSE
-    nothrow = false
-    if length(argtypes) ≥ 2
-        M, s = argtypes[1], argtypes[2]
-        if getglobal_nothrow(M, s)
-            nothrow = true
-            # typeasserts below are already checked in `getglobal_nothrow`
-            Mval, sval = (M::Const).val::Module, (s::Const).val::Symbol
-            if isconst(Mval, sval)
-                consistent = ALWAYS_TRUE
-                if is_mutation_free_argtype(rt)
-                    inaccessiblememonly = ALWAYS_TRUE
-                end
-            end
-        end
-    end
-    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
-end
+# add a new builtin function to this list only after making sure that
+# `builtin_effects` is properly implemented for it
+const _EFFECTS_KNOWN_BUILTINS = Any[
+    <:,
+    ===,
+    # Core._abstracttype,
+    # _apply_iterate,
+    # Core._call_in_world_total,
+    # Core._compute_sparams,
+    # Core._defaultctors,
+    # Core._equiv_typedef,
+    Core._expr,
+    # Core._primitivetype,
+    # Core._setsuper!,
+    # Core._structtype,
+    Core._svec_len,
+    Core._svec_ref,
+    # Core._typebody!,
+    Core._typevar,
+    apply_type,
+    compilerbarrier,
+    Core.current_scope,
+    donotdelete,
+    Core.finalizer,
+    Core.get_binding_type,
+    Core.ifelse,
+    # Core.invoke_in_world,
+    # invokelatest,
+    Core.memorynew,
+    memoryref_isassigned,
+    memoryrefget,
+    # Core.memoryrefmodify!,
+    memoryrefnew,
+    memoryrefoffset,
+    # Core.memoryrefreplace!,
+    memoryrefset!,
+    # Core.memoryrefsetonce!,
+    # Core.memoryrefswap!,
+    Core.sizeof,
+    svec,
+    Core.throw_methoderror,
+    applicable,
+    fieldtype,
+    getfield,
+    getglobal,
+    # invoke,
+    isa,
+    isdefined,
+    # isdefinedglobal,
+    modifyfield!,
+    # modifyglobal!,
+    nfields,
+    replacefield!,
+    # replaceglobal!,
+    setfield!,
+    # setfieldonce!,
+    # setglobal!,
+    # setglobalonce!,
+    swapfield!,
+    # swapglobal!,
+    throw,
+    tuple,
+    typeassert,
+    typeof
+]
+
+"""
+    builtin_effects(𝕃::AbstractLattice, f::Builtin, argtypes::Vector{Any}, rt)::Effects
 
-function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), arginfo::ArgInfo, @nospecialize(rt))
+Compute the effects of a builtin function call. `argtypes` should not include `f` itself.
+"""
+function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any}, @nospecialize(rt))
     if isa(f, IntrinsicFunction)
-        return intrinsic_effects(f, arginfo.argtypes[2:end])
+        return intrinsic_effects(f, argtypes)
     end
 
-    @assert !contains_is(_SPECIAL_BUILTINS, f)
+    if !(f in _EFFECTS_KNOWN_BUILTINS)
+        return Effects()
+    end
 
     if f === getfield
-        return getfield_effects(𝕃, arginfo, rt)
+        return getfield_effects(𝕃, argtypes, rt)
     end
-    argtypes = arginfo.argtypes[2:end]
+
+    # if this builtin call deterministically throws,
+    # don't bother to taint the other effects other than :nothrow:
+    # note this is safe only if we accounted for :noub already
+    rt === Bottom && return EFFECTS_THROWS
 
     if f === isdefined
         return isdefined_effects(𝕃, argtypes)
     elseif f === getglobal
-        return getglobal_effects(argtypes, rt)
+        2 ≤ length(argtypes) ≤ 3 || return EFFECTS_THROWS
+        # Modeled more precisely in abstract_eval_getglobal
+        return generic_getglobal_effects
     elseif f === Core.get_binding_type
         length(argtypes) == 2 || return EFFECTS_THROWS
-        effect_free = get_binding_type_effect_free(argtypes[1], argtypes[2]) ? ALWAYS_TRUE : ALWAYS_FALSE
-        return Effects(EFFECTS_TOTAL; effect_free)
+        # Modeled more precisely in abstract_eval_get_binding_type
+        return Effects(EFFECTS_TOTAL; nothrow=get_binding_type_nothrow(𝕃, argtypes[1], argtypes[2]))
+    elseif f === compilerbarrier
+        length(argtypes) == 2 || return Effects(EFFECTS_THROWS; consistent=ALWAYS_FALSE)
+        setting = argtypes[1]
+        return Effects(EFFECTS_TOTAL;
+            consistent = (isa(setting, Const) && setting.val === :conditional) ? ALWAYS_TRUE : ALWAYS_FALSE,
+            nothrow = compilerbarrier_nothrow(setting, nothing))
+    elseif f === Core.current_scope
+        nothrow = true
+        if length(argtypes) != 0
+            if length(argtypes) != 1 || !isvarargtype(argtypes[1])
+                return EFFECTS_THROWS
+            end
+            nothrow = false
+        end
+        return Effects(EFFECTS_TOTAL;
+            consistent = ALWAYS_FALSE,
+            notaskstate = false,
+            nothrow)
     else
         if contains_is(_CONSISTENT_BUILTINS, f)
             consistent = ALWAYS_TRUE
-        elseif f === arrayref || f === arrayset || f === arraysize
+        elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_len || f === Core._svec_ref
             consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
-        elseif f === Core._typevar
+        elseif f === Core._typevar || f === Core.memorynew
             consistent = CONSISTENT_IF_NOTRETURNED
         else
             consistent = ALWAYS_FALSE
         end
-        if f === setfield! || f === arrayset
+        if f === setfield! || f === memoryrefset!
             effect_free = EFFECT_FREE_IF_INACCESSIBLEMEMONLY
         elseif contains_is(_EFFECT_FREE_BUILTINS, f) || contains_is(_PURE_BUILTINS, f)
             effect_free = ALWAYS_TRUE
         else
             effect_free = ALWAYS_FALSE
         end
-        nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && builtin_nothrow(𝕃, f, argtypes, rt)
+        nothrow = builtin_nothrow(𝕃, f, argtypes, rt)
         if contains_is(_INACCESSIBLEMEM_BUILTINS, f)
             inaccessiblememonly = ALWAYS_TRUE
         elseif contains_is(_ARGMEM_BUILTINS, f)
@@ -2411,37 +2699,132 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argin
         else
             inaccessiblememonly = ALWAYS_FALSE
         end
-        return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly)
+        if f === memoryrefnew || f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
+            noub = memoryop_noub(f, argtypes) ? ALWAYS_TRUE : ALWAYS_FALSE
+        else
+            noub = ALWAYS_TRUE
+        end
+        return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly, noub)
+    end
+end
+
+function memoryop_noub(@nospecialize(f), argtypes::Vector{Any})
+    nargs = length(argtypes)
+    nargs == 0 && return true # must throw and noub
+    lastargtype = argtypes[end]
+    isva = isvarargtype(lastargtype)
+    if f === memoryrefnew
+        if nargs == 1 && !isva
+            return true
+        elseif nargs == 2 && !isva
+            return true
+        end
+        expected_nargs = 3
+    elseif f === memoryrefget || f === memoryref_isassigned
+        expected_nargs = 3
+    else
+        @assert f === memoryrefset! "unexpected memoryop is given"
+        expected_nargs = 4
+    end
+    if nargs == expected_nargs && !isva
+        boundscheck = widenconditional(lastargtype)
+        hasintersect(widenconst(boundscheck), Bool) || return true # must throw and noub
+        boundscheck isa Const && boundscheck.val === true && return true
+    elseif nargs > expected_nargs + 1
+        return true # must throw and noub
+    elseif !isva
+        return true # must throw and noub
+    end
+    return false
+end
+
+function current_scope_tfunc(::AbstractInterpreter, sv::InferenceState)
+    pc = sv.currpc
+    while true
+        pchandler = gethandler(sv, pc)
+        if pchandler === nothing
+            # No local scope available - inherited from the outside
+            return Any
+        end
+        # Remember that we looked at this handler, so we get re-scheduled
+        # if the scope information changes
+        isdefined(pchandler, :scope_uses) || (pchandler.scope_uses = Int[])
+        pcbb = block_for_inst(sv.cfg, pc)
+        if findfirst(==(pcbb), pchandler.scope_uses) === nothing
+            push!(pchandler.scope_uses, pcbb)
+        end
+        scope = pchandler.scopet
+        if scope !== nothing
+            # Found the scope - forward it
+            return scope
+        end
+        pc = pchandler.enter_idx
     end
 end
+current_scope_tfunc(::AbstractInterpreter, ::IRInterpretationState) = Any
+
+hasvarargtype(argtypes::Vector{Any}) = !isempty(argtypes) && isvarargtype(argtypes[end])
 
+"""
+    builtin_nothrow(𝕃::AbstractLattice, f::Builtin, argtypes::Vector{Any}, rt)::Bool
+
+Compute throw-ness of a builtin function call. `argtypes` should not include `f` itself.
+"""
 function builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f), argtypes::Vector{Any}, @nospecialize(rt))
     rt === Bottom && return false
-    contains_is(_PURE_BUILTINS, f) && return true
+    if f === tuple || f === svec
+        return true
+    elseif hasvarargtype(argtypes)
+        return false
+    elseif contains_is(_PURE_BUILTINS, f)
+        return true
+    end
     return _builtin_nothrow(𝕃, f, argtypes, rt)
 end
 
 function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any},
                            sv::Union{AbsIntState, Nothing})
     𝕃ᵢ = typeinf_lattice(interp)
-    if f === tuple
-        return tuple_tfunc(𝕃ᵢ, argtypes)
-    end
-    if isa(f, IntrinsicFunction)
-        if is_pure_intrinsic_infer(f) && all(@nospecialize(a) -> isa(a, Const), argtypes)
-            argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes)
+    # Early constant evaluation for foldable builtins with all const args
+    if isa(f, IntrinsicFunction) ? is_pure_intrinsic_infer(f) : (f in _PURE_BUILTINS || (f in _CONSISTENT_BUILTINS && f in _EFFECT_FREE_BUILTINS))
+        if is_all_const_arg(argtypes, 1)
+            argvals = collect_const_args(argtypes, 1)
             try
+                # unroll a few common cases for better codegen
+                if length(argvals) == 1
+                    return Const(f(argvals[1]))
+                elseif length(argvals) == 2
+                    return Const(f(argvals[1], argvals[2]))
+                elseif length(argvals) == 3
+                    return Const(f(argvals[1], argvals[2], argvals[3]))
+                end
                 return Const(f(argvals...))
-            catch
+            catch ex # expected ErrorException, TypeError, ConcurrencyViolationError, DivideError etc.
+                ex isa InterruptException && rethrow()
+                return Bottom
             end
         end
-        iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+    end
+    if isa(f, IntrinsicFunction)
+        iidx = Int(reinterpret(Int32, f)) + 1
         if iidx < 0 || iidx > length(T_IFUNC)
-            # invalid intrinsic
+            # unknown intrinsic
             return Any
         end
         tf = T_IFUNC[iidx]
     else
+        if f === tuple
+            return tuple_tfunc(𝕃ᵢ, argtypes)
+        elseif f === Core.current_scope
+            if length(argtypes) != 0
+                if length(argtypes) != 1 || !isvarargtype(argtypes[1])
+                    return Bottom
+                end
+            end
+            return current_scope_tfunc(interp, sv)
+        elseif f === Core.apply_type
+            return apply_type_tfunc(𝕃ᵢ, argtypes; max_union_splitting=InferenceParams(interp).max_union_splitting)
+        end
         fidx = find_tfunc(f)
         if fidx === nothing
             # unknown/unhandled builtin function
@@ -2449,8 +2832,8 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         end
         tf = T_FFUNC_VAL[fidx]
     end
-    tf = tf::Tuple{Int, Int, Any}
-    if !isempty(argtypes) && isvarargtype(argtypes[end])
+
+    if hasvarargtype(argtypes)
         if length(argtypes) - 1 > tf[2]
             # definitely too many arguments
             return Bottom
@@ -2480,123 +2863,218 @@ _iszero(@nospecialize x) = x === Intrinsics.xor_int(x, x)
 _isneg1(@nospecialize x) = _iszero(Intrinsics.not_int(x))
 _istypemin(@nospecialize x) = !_iszero(x) && Intrinsics.neg_int(x) === x
 
-function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Vector{Any})
+function builtin_exct(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any}, @nospecialize(rt))
+    if isa(f, IntrinsicFunction)
+        return intrinsic_exct(𝕃, f, argtypes)
+    elseif f === Core._svec_ref
+        return BoundsError
+    end
+    return Any
+end
+
+function div_nothrow(f::IntrinsicFunction, @nospecialize(arg1), @nospecialize(arg2))
+    isa(arg2, Const) || return false
+    den_val = arg2.val
+    _iszero(den_val) && return false
+    f !== Intrinsics.checked_sdiv_int && return true
+    # Nothrow as long as we additionally don't do typemin(T)/-1
+    return !_isneg1(den_val) || (isa(arg1, Const) && !_istypemin(arg1.val))
+end
+
+function known_is_valid_intrinsic_elptr(𝕃::AbstractLattice, @nospecialize(ptr))
+    ptrT = typeof_tfunc(𝕃, ptr)
+    isa(ptrT, Const) || return false
+    return is_valid_intrinsic_elptr(ptrT.val)
+end
+
+function intrinsic_exct(𝕃::AbstractLattice, f::IntrinsicFunction, argtypes::Vector{Any})
+    if hasvarargtype(argtypes)
+        return Any
+    end
+
     # First check that we have the correct number of arguments
-    iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+    iidx = Int(reinterpret(Int32, f)) + 1
     if iidx < 1 || iidx > length(T_IFUNC)
-        # invalid intrinsic
-        return false
+        # invalid intrinsic (system will crash)
+        return Any
     end
     tf = T_IFUNC[iidx]
-    tf = tf::Tuple{Int, Int, Any}
     if !(tf[1] <= length(argtypes) <= tf[2])
         # wrong # of args
-        return false
+        return ArgumentError
     end
+
     # TODO: We could do better for cglobal
-    f === Intrinsics.cglobal && return false
+    f === Intrinsics.cglobal && return Any
     # TODO: We can't know for sure, but the user should have a way to assert
     # that it won't
-    f === Intrinsics.llvmcall && return false
-    if f === Intrinsics.checked_udiv_int || f === Intrinsics.checked_urem_int || f === Intrinsics.checked_srem_int || f === Intrinsics.checked_sdiv_int
+    f === Intrinsics.llvmcall && return Any
+
+    if (f === Intrinsics.checked_udiv_int || f === Intrinsics.checked_urem_int ||
+        f === Intrinsics.checked_srem_int || f === Intrinsics.checked_sdiv_int)
         # Nothrow as long as the second argument is guaranteed not to be zero
-        arg2 = argtypes[2]
-        isa(arg2, Const) || return false
         arg1 = argtypes[1]
+        arg2 = argtypes[2]
         warg1 = widenconst(arg1)
         warg2 = widenconst(arg2)
-        (warg1 === warg2 && isprimitivetype(warg1)) || return false
-        den_val = arg2.val
-        _iszero(den_val) && return false
-        f !== Intrinsics.checked_sdiv_int && return true
-        # Nothrow as long as we additionally don't do typemin(T)/-1
-        return !_isneg1(den_val) || (isa(arg1, Const) && !_istypemin(arg1.val))
+        if !(warg1 === warg2 && isprimitivetype(warg1))
+            return Union{TypeError, DivideError}
+        end
+        if !div_nothrow(f, arg1, arg2)
+            return DivideError
+        end
+        return Union{}
     end
+
     if f === Intrinsics.pointerref
         # Nothrow as long as the types are ok. N.B.: dereferencability is not
         # modeled here, but can cause errors (e.g. ReadOnlyMemoryError). We follow LLVM here
         # in that it is legal to remove unused non-volatile loads.
-        length(argtypes) == 3 || return false
-        return argtypes[1] ⊑ Ptr && argtypes[2] ⊑ Int && argtypes[3] ⊑ Int
+        if !(argtypes[1] ⊑ Ptr && argtypes[2] ⊑ Int && argtypes[3] ⊑ Int)
+            return Union{TypeError, ErrorException}
+        end
+        if !known_is_valid_intrinsic_elptr(𝕃, argtypes[1])
+            return ErrorException
+        end
+        return Union{}
     end
+
     if f === Intrinsics.pointerset
         eT = pointer_eltype(argtypes[1])
-        isprimitivetype(eT) || return false
-        return argtypes[2] ⊑ eT && argtypes[3] ⊑ Int && argtypes[4] ⊑ Int
-    end
-    if f === Intrinsics.arraylen
-        return argtypes[1] ⊑ Array
+        if !known_is_valid_intrinsic_elptr(𝕃, argtypes[1])
+            return Union{TypeError, ErrorException}
+        end
+        if !(argtypes[2] ⊑ eT && argtypes[3] ⊑ Int && argtypes[4] ⊑ Int)
+            return TypeError
+        end
+        return Union{}
     end
+
     if f === Intrinsics.bitcast
-        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
+        ty, _, isconcrete, _ = instanceof_tfunc(argtypes[1], true)
         xty = widenconst(argtypes[2])
-        return isconcrete && isprimitivetype(ty) && isprimitivetype(xty) && Core.sizeof(ty) === Core.sizeof(xty)
+        if !isconcrete
+            return Union{ErrorException, TypeError}
+        end
+        if !(isprimitivetype(ty) && isprimitivetype(xty) && Core.sizeof(ty) === Core.sizeof(xty))
+            return ErrorException
+        end
+        return Union{}
     end
+
     if f in (Intrinsics.sext_int, Intrinsics.zext_int, Intrinsics.trunc_int,
              Intrinsics.fptoui, Intrinsics.fptosi, Intrinsics.uitofp,
              Intrinsics.sitofp, Intrinsics.fptrunc, Intrinsics.fpext)
         # If !isconcrete, `ty` may be Union{} at runtime even if we have
         # isprimitivetype(ty).
-        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
+        ty, _, isconcrete, _ = instanceof_tfunc(argtypes[1], true)
+        if !isconcrete
+            return Union{ErrorException, TypeError}
+        end
         xty = widenconst(argtypes[2])
-        return isconcrete && isprimitivetype(ty) && isprimitivetype(xty)
+        if !(isprimitivetype(ty) && isprimitivetype(xty))
+            return ErrorException
+        end
+
+        # fpext, fptrunc, fptoui, fptosi, uitofp, and sitofp have further
+        # restrictions on the allowed types.
+        if f === Intrinsics.fpext &&
+            !(ty <: CORE_FLOAT_TYPES && xty <: CORE_FLOAT_TYPES && Core.sizeof(ty) > Core.sizeof(xty))
+            return ErrorException
+        end
+        if f === Intrinsics.fptrunc &&
+            !(ty <: CORE_FLOAT_TYPES && xty <: CORE_FLOAT_TYPES && Core.sizeof(ty) < Core.sizeof(xty))
+            return ErrorException
+        end
+        if (f === Intrinsics.fptoui || f === Intrinsics.fptosi) && !(xty <: CORE_FLOAT_TYPES)
+            return ErrorException
+        end
+        if (f === Intrinsics.uitofp || f === Intrinsics.sitofp) && !(ty <: CORE_FLOAT_TYPES)
+            return ErrorException
+        end
+
+        return Union{}
     end
+
     if f === Intrinsics.have_fma
-        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
-        return isconcrete && isprimitivetype(ty)
+        ty, _, isconcrete, _ = instanceof_tfunc(argtypes[1], true)
+        if !(isconcrete && isprimitivetype(ty))
+            return TypeError
+        end
+        return Union{}
     end
-    # The remaining intrinsics are math/bits/comparison intrinsics. They work on all
-    # primitive types of the same type.
+
+    if f === Intrinsics.add_ptr || f === Intrinsics.sub_ptr
+        if !(argtypes[1] ⊑ Ptr && argtypes[2] ⊑ UInt)
+            return TypeError
+        end
+        return Union{}
+    end
+
+    # The remaining intrinsics are math/bits/comparison intrinsics.
+    # All the non-floating point intrinsics work on primitive values of the same type.
     isshift = f === shl_int || f === lshr_int || f === ashr_int
     argtype1 = widenconst(argtypes[1])
-    isprimitivetype(argtype1) || return false
+    isprimitivetype(argtype1) || return ErrorException
+    if contains_is(_FLOAT_INTRINSICS, f)
+        argtype1 <: CORE_FLOAT_TYPES || return ErrorException
+    end
+
     for i = 2:length(argtypes)
         argtype = widenconst(argtypes[i])
         if isshift ? !isprimitivetype(argtype) : argtype !== argtype1
-            return false
+            return ErrorException
         end
     end
-    return true
+    return Union{}
 end
 
-# whether `f` is pure for inference
-function is_pure_intrinsic_infer(f::IntrinsicFunction)
-    return !(f === Intrinsics.pointerref || # this one is volatile
-             f === Intrinsics.pointerset || # this one is never effect-free
-             f === Intrinsics.llvmcall ||   # this one is never effect-free
-             f === Intrinsics.arraylen ||   # this one is volatile
-             f === Intrinsics.sqrt_llvm_fast ||  # this one may differ at runtime (by a few ulps)
-             f === Intrinsics.have_fma ||  # this one depends on the runtime environment
-             f === Intrinsics.cglobal)  # cglobal lookup answer changes at runtime
+function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Vector{Any})
+    return intrinsic_exct(SimpleInferenceLattice.instance, f, argtypes) === Union{}
+end
+
+function _is_effect_free_infer(f::IntrinsicFunction)
+     return !(f === Intrinsics.pointerset ||
+              f === Intrinsics.atomic_pointerref ||
+              f === Intrinsics.atomic_pointerset ||
+              f === Intrinsics.atomic_pointerswap ||
+              # f === Intrinsics.atomic_pointermodify ||
+              f === Intrinsics.atomic_pointerreplace ||
+              f === Intrinsics.atomic_fence)
 end
 
-# whether `f` is effect free if nothrow
-function intrinsic_effect_free_if_nothrow(@nospecialize f)
-    return f === Intrinsics.pointerref ||
-           f === Intrinsics.have_fma ||
-           is_pure_intrinsic_infer(f)
+# whether `f` is pure for inference
+function is_pure_intrinsic_infer(f::IntrinsicFunction, is_effect_free::Union{Nothing,Bool}=nothing)
+    if is_effect_free === nothing
+        is_effect_free = _is_effect_free_infer(f)
+    end
+    return is_effect_free && !(
+            f === Intrinsics.llvmcall ||              # can do arbitrary things
+            f === Intrinsics.atomic_pointermodify ||  # can do arbitrary things
+            f === Intrinsics.pointerref ||            # this one is volatile
+            f === Intrinsics.sqrt_llvm_fast ||        # this one may differ at runtime (by a few ulps)
+            f === Intrinsics.have_fma ||              # this one depends on the runtime environment
+            f === Intrinsics.cglobal)                 # cglobal lookup answer changes at runtime
 end
 
 function intrinsic_effects(f::IntrinsicFunction, argtypes::Vector{Any})
     if f === Intrinsics.llvmcall
         # llvmcall can do arbitrary things
         return Effects()
+    elseif f === atomic_pointermodify
+        # atomic_pointermodify has memory effects, plus any effects from the ModifyOpInfo
+        return Effects()
     end
-
-    if contains_is(_INCONSISTENT_INTRINSICS, f)
-        consistent = ALWAYS_FALSE
-    elseif f === arraylen
-        consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
-    else
+    is_effect_free = _is_effect_free_infer(f)
+    effect_free = is_effect_free ? ALWAYS_TRUE : ALWAYS_FALSE
+    if ((is_pure_intrinsic_infer(f, is_effect_free) && !contains_is(_INCONSISTENT_INTRINSICS, f)) ||
+        f === Intrinsics.pointerset || f === Intrinsics.atomic_pointerset || f === Intrinsics.atomic_fence)
         consistent = ALWAYS_TRUE
-    end
-    effect_free = !(f === Intrinsics.pointerset) ? ALWAYS_TRUE : ALWAYS_FALSE
-    nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && intrinsic_nothrow(f, argtypes)
-    if f === arraylen
-        inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY
     else
-        inaccessiblememonly = ALWAYS_TRUE
+        consistent = ALWAYS_FALSE
     end
+    nothrow = intrinsic_nothrow(f, argtypes)
+    inaccessiblememonly = is_effect_free && !(f === Intrinsics.pointerref) ? ALWAYS_TRUE : ALWAYS_FALSE
     return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly)
 end
 
@@ -2604,34 +3082,40 @@ end
 # since abstract_call_gf_by_type is a very inaccurate model of _method and of typeinf_type,
 # while this assumes that it is an absolutely precise and accurate and exact model of both
 function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
-    UNKNOWN = CallMeta(Type, EFFECTS_THROWS, NoCallInfo())
+    UNKNOWN = CallMeta(Type, Any, Effects(EFFECTS_THROWS; nortcall=false), NoCallInfo())
     if !(2 <= length(argtypes) <= 3)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     tt = widenslotwrapper(argtypes[end])
     if !isa(tt, Const) && !(isType(tt) && !has_free_typevars(tt))
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
     if !isa(af_argtype, DataType) || !(af_argtype <: Tuple)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     if length(argtypes) == 3
         aft = widenslotwrapper(argtypes[2])
-        if !isa(aft, Const) && !(isType(aft) && !has_free_typevars(aft)) &&
-                !(isconcretetype(aft) && !(aft <: Builtin))
-            return UNKNOWN
-        end
         argtypes_vec = Any[aft, af_argtype.parameters...]
     else
         argtypes_vec = Any[af_argtype.parameters...]
+        isempty(argtypes_vec) && push!(argtypes_vec, Union{})
+        aft = argtypes_vec[1]
+    end
+    if !(isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
+            (isconcretetype(aft) && !(aft <: Builtin) && !iskindtype(aft)))
+        return Future(UNKNOWN)
     end
 
+    # effects are not an issue if we know this statement will get removed, but if it does not get removed,
+    # then this could be recursively re-entering inference (via concrete-eval), which will not terminate
+    RT_CALL_EFFECTS = Effects(EFFECTS_TOTAL; nortcall=false)
+
     if contains_is(argtypes_vec, Union{})
-        return CallMeta(Const(Union{}), EFFECTS_TOTAL, NoCallInfo())
+        return Future(CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo()))
     end
 
     # Run the abstract_call without restricting abstract call
@@ -2640,82 +3124,72 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     if isa(sv, InferenceState)
         old_restrict = sv.restrict_abstract_call_sites
         sv.restrict_abstract_call_sites = false
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-        sv.restrict_abstract_call_sites = old_restrict
-    else
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-    end
-    info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
-    rt = widenslotwrapper(call.rt)
-    if isa(rt, Const)
-        # output was computed to be constant
-        return CallMeta(Const(typeof(rt.val)), EFFECTS_TOTAL, info)
-    end
-    rt = widenconst(rt)
-    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
-        # output cannot be improved so it is known for certain
-        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
-    elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
-        # conservatively express uncertainty of this result
-        # in two ways: both as being a subtype of this, and
-        # because of LimitedAccuracy causes
-        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
-    elseif (isa(tt, Const) || isconstType(tt)) &&
-        (isa(aft, Const) || isconstType(aft))
-        # input arguments were known for certain
-        # XXX: this doesn't imply we know anything about rt
-        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
-    elseif isType(rt)
-        return CallMeta(Type{rt}, EFFECTS_TOTAL, info)
-    else
-        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
+    end
+    call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
+    tt = Core.Box(tt)
+    return Future{CallMeta}(call, interp, sv) do call, _, sv
+        if isa(sv, InferenceState)
+            sv.restrict_abstract_call_sites = old_restrict
+        end
+        info = MethodResultPure(ReturnTypeCallInfo(call.info))
+        rt = widenslotwrapper(call.rt)
+        if isa(rt, Const)
+            # output was computed to be constant
+            return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info)
+        end
+        rt = widenconst(rt)
+        if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
+            # output cannot be improved so it is known for certain
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
+            # conservatively express uncertainty of this result
+            # in two ways: both as being a subtype of this, and
+            # because of LimitedAccuracy causes
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(tt.contents, Const) || isconstType(tt.contents)
+            # input arguments were known for certain
+            # XXX: this doesn't imply we know anything about rt
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isType(rt)
+            return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info)
+        else
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        end
     end
 end
 
 # a simplified model of abstract_call_gf_by_type for applicable
 function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
                              sv::AbsIntState, max_methods::Int)
-    length(argtypes) < 2 && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    isvarargtype(argtypes[2]) && return CallMeta(Bool, EFFECTS_UNKNOWN, NoCallInfo())
+    length(argtypes) < 2 && return Future(CallMeta(Bottom, ArgumentError, EFFECTS_THROWS, NoCallInfo()))
+    isvarargtype(argtypes[2]) && return Future(CallMeta(Bool, ArgumentError, EFFECTS_THROWS, NoCallInfo()))
     argtypes = argtypes[2:end]
     atype = argtypes_to_type(argtypes)
-    matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
-        InferenceParams(interp).max_union_splitting, max_methods)
-    if isa(matches, FailedMethodMatch)
-        rt = Bool # too many matches to analyze
+    if atype === Union{}
+        rt = Union{} # accidentally unreachable code
     else
-        (; valid_worlds, applicable) = matches
-        update_valid_age!(sv, valid_worlds)
-
-        # also need an edge to the method table in case something gets
-        # added that did not intersect with any existing method
-        if isa(matches, MethodMatches)
-            matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype)
-        else
-            for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
-                thisfullmatch || add_mt_backedge!(sv, mt, atype)
-            end
-        end
-
-        napplicable = length(applicable)
-        if napplicable == 0
-            rt = Const(false) # never any matches
+        matches = find_method_matches(interp, argtypes, atype; max_methods)
+        info = NoCallInfo()
+        if isa(matches, FailedMethodMatch)
+            rt = Bool # too many matches to analyze
         else
-            rt = Const(true) # has applicable matches
-            for i in 1:napplicable
-                match = applicable[i]::MethodMatch
-                edge = specialize_method(match)::MethodInstance
-                add_backedge!(sv, edge)
-            end
-
-            if isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
-                    (!all(matches.fullmatches) || any_ambig(matches))
+            (; valid_worlds, applicable) = matches
+            update_valid_age!(sv, get_inference_world(interp), valid_worlds)
+            napplicable = length(applicable)
+            if napplicable == 0
+                rt = Const(false) # never any matches
+            elseif !fully_covering(matches) || any_ambig(matches)
                 # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
                 rt = Bool
+            else
+                rt = Const(true) # has applicable matches
+            end
+            if rt !== Bool
+                info = VirtualMethodMatchInfo(matches.info)
             end
         end
     end
-    return CallMeta(rt, EFFECTS_TOTAL, NoCallInfo())
+    return Future(CallMeta(rt, Union{}, EFFECTS_TOTAL, info))
 end
 add_tfunc(applicable, 1, INT_INF, @nospecs((𝕃::AbstractLattice, f, args...)->Bool), 40)
 
@@ -2724,41 +3198,38 @@ function _hasmethod_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, sv
     if length(argtypes) == 3 && !isvarargtype(argtypes[3])
         ft′ = argtype_by_index(argtypes, 2)
         ft = widenconst(ft′)
-        ft === Bottom && return CallMeta(Bool, EFFECTS_THROWS, NoCallInfo())
+        ft === Bottom && return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
         typeidx = 3
     elseif length(argtypes) == 2 && !isvarargtype(argtypes[2])
         typeidx = 2
     else
-        return CallMeta(Any, Effects(), NoCallInfo())
+        return CallMeta(Any, Any, Effects(), NoCallInfo())
     end
-    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, typeidx))
-    isexact || return CallMeta(Bool, Effects(), NoCallInfo())
+    (types, isexact, _, _) = instanceof_tfunc(argtype_by_index(argtypes, typeidx), false)
+    isexact || return CallMeta(Bool, Any, Effects(), NoCallInfo())
     unwrapped = unwrap_unionall(types)
     if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
-        return CallMeta(Bool, EFFECTS_THROWS, NoCallInfo())
+        return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
     end
     if typeidx == 3
-        isdispatchelem(ft) || return CallMeta(Bool, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+        isdispatchelem(ft) || return CallMeta(Bool, Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
         types = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
     end
-    mt = ccall(:jl_method_table_for, Any, (Any,), types)
-    if !isa(mt, MethodTable)
-        return CallMeta(Bool, EFFECTS_THROWS, NoCallInfo())
-    end
-    match, valid_worlds, overlayed = findsup(types, method_table(interp))
-    update_valid_age!(sv, valid_worlds)
+    match, valid_worlds = findsup(types, method_table(interp))
+    update_valid_age!(sv, get_inference_world(interp), valid_worlds)
     if match === nothing
         rt = Const(false)
-        add_mt_backedge!(sv, mt, types) # this should actually be an invoke-type backedge
+        vresults = MethodLookupResult(Any[], valid_worlds, true)
+        mt = Core.methodtable
+        vinfo = MethodMatchInfo(vresults, mt, types, false) # XXX: this should actually be an info with invoke-type edge
     else
         rt = Const(true)
-        edge = specialize_method(match)::MethodInstance
-        add_invoke_backedge!(sv, types, edge)
+        vinfo = InvokeCallInfo(nothing, match, nothing, types)
     end
-    return CallMeta(rt, EFFECTS_TOTAL, NoCallInfo())
+    info = VirtualMethodMatchInfo(vinfo)
+    return CallMeta(rt, Union{}, EFFECTS_TOTAL, info)
 end
 
-
 # N.B.: typename maps type equivalence classes to a single value
 function typename_static(@nospecialize(t))
     t isa Const && return _typename(t.val)
@@ -2767,92 +3238,42 @@ function typename_static(@nospecialize(t))
     return isType(t) ? _typename(t.parameters[1]) : Core.TypeName
 end
 
-function global_order_nothrow(@nospecialize(o), loading::Bool, storing::Bool)
-    o isa Const || return false
+function global_order_exct(@nospecialize(o), loading::Bool, storing::Bool)
+    if !(o isa Const)
+        if o === Symbol
+            return ConcurrencyViolationError
+        elseif !hasintersect(o, Symbol)
+            return TypeError
+        else
+            return Union{ConcurrencyViolationError, TypeError}
+        end
+    end
     sym = o.val
     if sym isa Symbol
         order = get_atomic_order(sym, loading, storing)
-        return order !== MEMORY_ORDER_INVALID && order !== MEMORY_ORDER_NOTATOMIC
-    end
-    return false
-end
-@nospecs function getglobal_nothrow(M, s, o)
-    global_order_nothrow(o, #=loading=#true, #=storing=#false) || return false
-    return getglobal_nothrow(M, s)
-end
-@nospecs function getglobal_nothrow(M, s)
-    if M isa Const && s isa Const
-        M, s = M.val, s.val
-        if M isa Module && s isa Symbol
-            return isdefined(M, s)
-        end
-    end
-    return false
-end
-@nospecs function getglobal_tfunc(𝕃::AbstractLattice, M, s, order=Symbol)
-    if M isa Const && s isa Const
-        M, s = M.val, s.val
-        if M isa Module && s isa Symbol
-            return abstract_eval_global(M, s)
-        end
-        return Bottom
-    elseif !(hasintersect(widenconst(M), Module) && hasintersect(widenconst(s), Symbol))
-        return Bottom
-    end
-    return Any
-end
-@nospecs function setglobal!_tfunc(𝕃::AbstractLattice, M, s, v, order=Symbol)
-    if !(hasintersect(widenconst(M), Module) && hasintersect(widenconst(s), Symbol))
-        return Bottom
-    end
-    return v
-end
-add_tfunc(getglobal, 2, 3, getglobal_tfunc, 1)
-add_tfunc(setglobal!, 3, 4, setglobal!_tfunc, 3)
-@nospecs function setglobal!_nothrow(M, s, newty, o)
-    global_order_nothrow(o, #=loading=#false, #=storing=#true) || return false
-    return setglobal!_nothrow(M, s, newty)
-end
-@nospecs function setglobal!_nothrow(M, s, newty)
-    if M isa Const && s isa Const
-        M, s = M.val, s.val
-        if isa(M, Module) && isa(s, Symbol)
-            return global_assignment_nothrow(M, s, newty)
-        end
-    end
-    return false
-end
-
-function global_assignment_nothrow(M::Module, s::Symbol, @nospecialize(newty))
-    if isdefined(M, s) && !isconst(M, s)
-        ty = ccall(:jl_get_binding_type, Any, (Any, Any), M, s)
-        return ty === nothing || newty ⊑ ty
-    end
-    return false
-end
-
-@nospecs function get_binding_type_effect_free(M, s)
-    if M isa Const && s isa Const
-        M, s = M.val, s.val
-        if M isa Module && s isa Symbol
-            return ccall(:jl_get_binding_type, Any, (Any, Any), M, s) !== nothing
+        if order !== MEMORY_ORDER_INVALID && order !== MEMORY_ORDER_NOTATOMIC
+            return Union{}
+        else
+            return ConcurrencyViolationError
         end
+    else
+        return TypeError
     end
-    return false
 end
-@nospecs function get_binding_type_tfunc(𝕃::AbstractLattice, M, s)
-    if get_binding_type_effect_free(M, s)
-        return Const(Core.get_binding_type((M::Const).val, (s::Const).val))
-    end
-    return Type
-end
-add_tfunc(Core.get_binding_type, 2, 2, get_binding_type_tfunc, 0)
 
 @nospecs function get_binding_type_nothrow(𝕃::AbstractLattice, M, s)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     return M ⊑ Module && s ⊑ Symbol
 end
 
+add_tfunc(getglobal, 2, 3, @nospecs((𝕃::AbstractLattice, args...)->Any), 1)
+add_tfunc(setglobal!, 3, 4, @nospecs((𝕃::AbstractLattice, args...)->Any), 3)
+add_tfunc(swapglobal!, 3, 4, @nospecs((𝕃::AbstractLattice, args...)->Any), 3)
+add_tfunc(modifyglobal!, 4, 5, @nospecs((𝕃::AbstractLattice, args...)->Any), 3)
+add_tfunc(replaceglobal!, 4, 6, @nospecs((𝕃::AbstractLattice, args...)->Any), 3)
+add_tfunc(setglobalonce!, 3, 5, @nospecs((𝕃::AbstractLattice, args...)->Bool), 3)
+add_tfunc(Core.get_binding_type, 2, 2, @nospecs((𝕃::AbstractLattice, args...)->Type), 0)
+
 # foreigncall
 # ===========
 
@@ -2861,94 +3282,31 @@ end
 
 const FOREIGNCALL_ARG_START = 6
 
-function foreigncall_effects(@specialize(abstract_eval), e::Expr)
-    args = e.args
-    name = args[1]
-    isa(name, QuoteNode) && (name = name.value)
-    isa(name, Symbol) || return EFFECTS_UNKNOWN
-    ndims = alloc_array_ndims(name)
-    if ndims !== nothing
-        if ndims ≠ 0
-            return alloc_array_effects(abstract_eval, args, ndims)
-        else
-            return new_array_effects(abstract_eval, args)
-        end
-    end
-    if is_array_resize(name)
-        return array_resize_effects()
-    end
+function foreigncall_effects(@nospecialize(abstract_eval), ::Expr)
+    # `:foreigncall` can potentially perform all sorts of operations, including calling
+    # overlay methods, but the `:foreigncall` itself is not dispatched, and there is no
+    # concern that the method calls that potentially occur within the `:foreigncall` will
+    # be executed using the wrong method table due to concrete evaluation, so using
+    # `EFFECTS_UNKNOWN` here and not tainting with `:nonoverlayed` is fine
     return EFFECTS_UNKNOWN
 end
 
-function is_array_resize(name::Symbol)
-    return name === :jl_array_grow_beg || name === :jl_array_grow_end ||
-           name === :jl_array_del_beg || name === :jl_array_del_end ||
-           name === :jl_array_grow_at || name === :jl_array_del_at
-end
-
-function array_resize_effects()
-    return Effects(EFFECTS_TOTAL;
-        effect_free = EFFECT_FREE_IF_INACCESSIBLEMEMONLY,
-        nothrow = false,
-        inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY)
-end
-
-function alloc_array_ndims(name::Symbol)
-    if name === :jl_alloc_array_1d
-        return 1
-    elseif name === :jl_alloc_array_2d
-        return 2
-    elseif name === :jl_alloc_array_3d
-        return 3
-    elseif name === :jl_new_array
-        return 0
-    end
-    return nothing
-end
-
-function alloc_array_effects(@specialize(abstract_eval), args::Vector{Any}, ndims::Int)
-    nothrow = alloc_array_nothrow(abstract_eval, args, ndims)
-    return Effects(EFFECTS_TOTAL; consistent=CONSISTENT_IF_NOTRETURNED, nothrow)
-end
-
-function alloc_array_nothrow(@specialize(abstract_eval), args::Vector{Any}, ndims::Int)
-    length(args) ≥ ndims+FOREIGNCALL_ARG_START || return false
-    atype = instanceof_tfunc(abstract_eval(args[FOREIGNCALL_ARG_START]))[1]
-    dims = Csize_t[]
-    for i in 1:ndims
-        dim = abstract_eval(args[i+FOREIGNCALL_ARG_START])
-        isa(dim, Const) || return false
-        dimval = dim.val
-        isa(dimval, Int) || return false
-        push!(dims, reinterpret(Csize_t, dimval))
-    end
-    return _new_array_nothrow(atype, ndims, dims)
-end
-
-function new_array_effects(@specialize(abstract_eval), args::Vector{Any})
-    nothrow = new_array_nothrow(abstract_eval, args)
-    return Effects(EFFECTS_TOTAL; consistent=CONSISTENT_IF_NOTRETURNED, nothrow)
-end
-
-function new_array_nothrow(@specialize(abstract_eval), args::Vector{Any})
-    length(args) ≥ FOREIGNCALL_ARG_START+1 || return false
-    atype = instanceof_tfunc(abstract_eval(args[FOREIGNCALL_ARG_START]))[1]
-    dims = abstract_eval(args[FOREIGNCALL_ARG_START+1])
-    isa(dims, Const) || return dims === Tuple{}
-    dimsval = dims.val
-    isa(dimsval, Tuple{Vararg{Int}}) || return false
-    ndims = nfields(dimsval)
-    isa(ndims, Int) || return false
-    dims = Csize_t[reinterpret(Csize_t, dimval) for dimval in dimsval]
-    return _new_array_nothrow(atype, ndims, dims)
-end
-
-function _new_array_nothrow(@nospecialize(atype), ndims::Int, dims::Vector{Csize_t})
-    isa(atype, DataType) || return false
-    eltype = atype.parameters[1]
-    iskindtype(typeof(eltype)) || return false
-    elsz = aligned_sizeof(eltype)
-    return ccall(:jl_array_validate_dims, Cint,
-        (Ptr{Csize_t}, Ptr{Csize_t}, UInt32, Ptr{Csize_t}, Csize_t),
-        #=nel=#RefValue{Csize_t}(), #=tot=#RefValue{Csize_t}(), ndims, dims, elsz) == 0
+function new_genericmemory_nothrow(@nospecialize(abstract_eval), args::Vector{Any})
+    length(args) ≥ 1+FOREIGNCALL_ARG_START || return false
+    mtype = instanceof_tfunc(abstract_eval(args[FOREIGNCALL_ARG_START]))[1]
+    isa(mtype, DataType) || return false
+    isdefined(mtype, :instance) || return false
+    elsz = Int(datatype_layoutsize(mtype))
+    arrayelem = datatype_arrayelem(mtype)
+    dim = abstract_eval(args[1+FOREIGNCALL_ARG_START])
+    isa(dim, Const) || return false
+    dimval = dim.val
+    isa(dimval, Int) || return false
+    0 < dimval < typemax(Int) || return false
+    tot, ovflw = Intrinsics.checked_smul_int(dimval, elsz)
+    ovflw && return false
+    isunion = 2
+    tot, ovflw = Intrinsics.checked_sadd_int(tot, arrayelem == isunion ? 1 + dimval : 1)
+    ovflw && return false
+    return true
 end
diff --git a/Compiler/src/timing.jl b/Compiler/src/timing.jl
new file mode 100644
index 0000000000000..64d2ab53d0e32
--- /dev/null
+++ b/Compiler/src/timing.jl
@@ -0,0 +1,38 @@
+if ccall(:jl_timing_enabled, Cint, ()) != 0
+    function getzonedexpr(name::Union{Symbol, String}, ex::Expr, func::Symbol, file::Symbol, line::Integer, color::Integer)
+        event = RefValue{Ptr{Cvoid}}(C_NULL)
+        name = QuoteNode(Symbol(name))
+        func = QuoteNode(func)
+        file = QuoteNode(file)
+
+        # XXX: This buffer must be large enough to store any jl_timing_block_t (runtime-checked)
+        buffer = (0, 0, 0, 0, 0, 0, 0)
+        buffer_size = Core.sizeof(buffer)
+        return quote
+            if $event[] === C_NULL
+                $event[] = ccall(:_jl_timing_event_create, Ptr{Cvoid},
+                                 (Ptr{UInt8}, Ptr{UInt8}, Ptr{UInt8}, Ptr{UInt8}, Cint, Cint),
+                                 :CORE_COMPILER, $name, $func, $file, $line, $color)
+            end
+            timing_block = RefValue($buffer)
+            block_ptr = pointer_from_objref(timing_block)
+            $(Expr(:gc_preserve, quote
+                ccall(:_jl_timing_block_init, Cvoid, (Ptr{Cvoid}, Csize_t, Ptr{Cvoid}), block_ptr, $buffer_size, $event[])
+                ccall(:_jl_timing_block_start, Cvoid, (Ptr{Cvoid},), block_ptr)
+                $(Expr(:tryfinally,
+                    :($(Expr(:escape, ex))),
+                    quote
+                        ccall(:_jl_timing_block_end, Cvoid, (Ptr{Cvoid},), block_ptr)
+                    end
+                ))
+            end, :timing_block))
+        end
+    end
+    macro zone(name, ex::Expr)
+        return getzonedexpr(name, ex, :unknown_julia_function, __source__.file, __source__.line, 0)
+    end
+else
+    macro zone(_, ex::Expr)
+        return esc(ex)
+    end
+end
diff --git a/Compiler/src/typeinfer.jl b/Compiler/src/typeinfer.jl
new file mode 100644
index 0000000000000..2ae0c627b1b4d
--- /dev/null
+++ b/Compiler/src/typeinfer.jl
@@ -0,0 +1,1871 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+The module `Core.Compiler.Timings` provides a simple implementation of nested timers that
+can be used to measure the exclusive time spent inferring each method instance that is
+recursively inferred during type inference.
+
+This is meant to be internal to the compiler, and makes some specific assumptions about
+being used for this purpose alone.
+"""
+module Timings
+
+using ..Core
+using ..Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
+    @inbounds, copy, backtrace, _time_ns
+
+# What we record for any given frame we infer during type inference.
+struct InferenceFrameInfo
+    mi::Core.MethodInstance
+    sptypes::Vector{Compiler.VarState}
+    slottypes::Vector{Any}
+    nargs::Int
+end
+
+function _typeinf_identifier(frame::Compiler.InferenceState)
+    mi_info = InferenceFrameInfo(
+        frame.linfo,
+        copy(frame.sptypes),
+        copy(frame.slottypes),
+        length(frame.result.argtypes),
+    )
+    return mi_info
+end
+
+_typeinf_identifier(frame::InferenceFrameInfo) = frame
+
+"""
+    Compiler.Timing(mi_info, start_time, ...)
+
+Internal type containing the timing result for running type inference on a single
+MethodInstance.
+"""
+struct Timing
+    mi_info::InferenceFrameInfo
+    start_time::UInt64
+    cur_start_time::UInt64
+    time::UInt64
+    children::Core.Array{Timing,1}
+    bt         # backtrace collected upon initial entry to typeinf
+end
+Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
+Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
+
+# We keep a stack of the Timings for each of the MethodInstances currently being timed.
+# Since type inference currently operates via a depth-first search (during abstract
+# evaluation), this vector operates like a call stack. The last node in _timings is the
+# node currently being inferred, and its parent is directly before it, etc.
+# Each Timing also contains its own vector for all of its children, so that the tree
+# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
+# because we create a new node for duplicates.)
+const _timings = Timing[]
+# ROOT() is an empty function used as the top-level Timing node to measure all time spent
+# *not* in type inference during a given recording trace. It is used as a "dummy" node.
+function ROOT() end
+const ROOTmi = Compiler.specialize_method(
+    first(Compiler.methods(ROOT)), Tuple{typeof(ROOT)}, Core.svec())
+"""
+    Compiler.reset_timings()
+
+Empty out the previously recorded type inference timings (`Compiler._timings`), and
+start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
+"""
+function reset_timings() end
+push!(_timings, Timing(
+    # The MethodInstance for ROOT(), and default empty values for other fields.
+    InferenceFrameInfo(ROOTmi, Compiler.VarState[], Any[Core.Const(ROOT)], 1),
+    _time_ns()))
+function close_current_timer() end
+function enter_new_timer(frame) end
+function exit_current_timer(_expected_frame_) end
+
+end  # module Timings
+
+"""
+    Compiler.__set_measure_typeinf(onoff::Bool)
+
+If set to `true`, record per-method-instance timings within type inference in the Compiler.
+"""
+__set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
+const __measure_typeinf__ = RefValue{Bool}(false)
+
+function result_edges(::AbstractInterpreter, caller::InferenceState)
+    result = caller.result
+    opt = result.src
+    if isa(opt, OptimizationState)
+        return Core.svec(opt.inlining.edges...)
+    else
+        return Core.svec(caller.edges...)
+    end
+end
+
+function finish!(interp::AbstractInterpreter, caller::InferenceState, validation_world::UInt, time_before::UInt64)
+    result = caller.result
+    edges = result_edges(interp, caller)
+    valid_worlds = caller.valid_worlds
+    min_world, max_world = first(valid_worlds), last(valid_worlds)
+    result.valid_worlds = valid_worlds
+    caller.src.min_world = min_world
+    caller.src.max_world = max_world
+    #@assert max_world <= get_world_counter() || isempty(edges)
+    if isdefined(result, :ci)
+        ci = result.ci
+        mi = result.linfo
+        result_type = result.result
+        result_type isa LimitedAccuracy && (result_type = result_type.typ)
+        @assert !(result_type === nothing)
+        const_flag = is_result_constabi_eligible(result)
+        if isa(result_type, Const)
+            rettype_const = result_type.val
+            const_flags = const_flag ? 0x3 : 0x2
+        elseif isa(result_type, PartialOpaque)
+            rettype_const = result_type
+            const_flags = 0x2
+        elseif isconstType(result_type)
+            rettype_const = result_type.parameters[1]
+            const_flags = 0x2
+        elseif isa(result_type, PartialStruct)
+            rettype_const = (_getundefs(result_type), result_type.fields)
+            const_flags = 0x2
+        elseif isa(result_type, InterConditional)
+            rettype_const = result_type
+            const_flags = 0x2
+        elseif isa(result_type, InterMustAlias)
+            rettype_const = result_type
+            const_flags = 0x2
+        else
+            rettype_const = nothing
+            const_flags = 0x0
+        end
+        inferred_result = nothing
+        debuginfo = nothing
+        const_flag = is_result_constabi_eligible(result)
+        discard_src = caller.cache_mode === CACHE_MODE_NULL || const_flag
+        if !discard_src
+            inferred_result = transform_result_for_cache(interp, result, edges)
+            if inferred_result !== nothing
+                result.src = inferred_result
+                debuginfo = get_debuginfo(inferred_result)
+                # Inlining may fast-path the global cache via InferenceResult, so store it back here
+                result.src = inferred_result
+            else
+                if isa(result.src, OptimizationState)
+                    debuginfo = get_debuginfo(ir_to_codeinf!(result.src))
+                elseif isa(result.src, CodeInfo)
+                    debuginfo = get_debuginfo(result.src)
+                end
+            end
+            # TODO: do we want to augment edges here with any :invoke targets that we got from inlining (such that we didn't have a direct edge to it already)?
+            if inferred_result isa CodeInfo
+                if may_compress(interp)
+                    nslots = length(inferred_result.slotflags)
+                    resize!(inferred_result.slottypes::Vector{Any}, nslots)
+                    resize!(inferred_result.slotnames, nslots)
+                end
+                inferred_result = maybe_compress_codeinfo(interp, mi, inferred_result)
+            elseif ci.owner === nothing
+                # The global cache can only handle objects that codegen understands
+                inferred_result = nothing
+            end
+        else
+            result.src = nothing
+        end
+        if debuginfo === nothing
+            debuginfo = DebugInfo(mi)
+        end
+        # if we aren't cached, we don't need this edge
+        # but our caller might, so let's just make it anyways
+        if max_world >= validation_world
+            # if we can record all of the backedges in the global reverse-cache,
+            # we can now widen our applicability in the global cache too
+            store_backedges(ci, edges)
+        end
+        ipo_effects = encode_effects(result.ipo_effects)
+        time_now = _time_ns()
+        time_self_ns = caller.time_self_ns + (time_now - time_before)
+        time_total = (time_now - caller.time_start - caller.time_paused) * 1e-9
+        ccall(:jl_fill_codeinst, Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
+            ci, widenconst(result_type), widenconst(result.exc_result), rettype_const, const_flags,
+            min_world, max_world,
+            ipo_effects, result.analysis_results, debuginfo, edges)
+        ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
+            ci, inferred_result, const_flag, min_world, max_world, ipo_effects,
+            result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, debuginfo, edges)
+    elseif caller.cache_mode === CACHE_MODE_LOCAL
+        result.src = transform_result_for_local_cache(interp, result)
+    end
+    nothing
+end
+
+function promotecache!(interp::AbstractInterpreter, caller::InferenceState)
+    result = caller.result
+    if isdefined(result, :ci)
+        ci = result.ci
+        mi = result.linfo
+        if is_already_cached(interp, result)
+            # convert to a local cache or insert it now globally
+            engine_reject(interp, ci)
+            caller.cache_mode = CACHE_MODE_LOCAL
+        end
+        if !iszero(caller.cache_mode & CACHE_MODE_GLOBAL)
+            code_cache(interp)[mi] = ci
+        end
+        engine_reject(interp, ci)
+        codegen = codegen_cache(interp)
+        if codegen !== nothing
+            uncompressed = result.src
+            if isa(uncompressed, OptimizationState)
+                uncompressed = ir_to_codeinf!(uncompressed, ci.edges)
+                result.src = uncompressed
+            end
+            if isa(uncompressed, CodeInfo)
+                # record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work
+                codegen[ci] = uncompressed
+                if bootstrapping_compiler && !(ci.inferred isa MaybeCompressed)
+                    # This is necessary to get decent bootstrapping performance
+                    # when compiling the compiler to inject everything eagerly
+                    # where codegen can start finding and using it right away
+                    if mi.def isa Method && isa_compileable_sig(mi) && is_cached(caller)
+                        ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), ci, uncompressed)
+                    end
+                end
+            end
+        end
+    end
+    if !iszero(caller.cache_mode & CACHE_MODE_LOCAL)
+        push!(get_inference_cache(interp), result)
+    end
+    nothing
+end
+
+function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstance, src::CodeInfo)
+    user_edges = src.edges
+    edges = user_edges isa SimpleVector ? user_edges : user_edges === nothing ? Core.svec() : Core.svec(user_edges...)
+    const_flag = false
+    di = src.debuginfo
+    rettype = Any
+    exctype = Any
+    const_flags = 0x0
+    ipo_effects = zero(UInt32)
+    min_world = src.min_world
+    max_world = src.max_world
+    if max_world >= get_world_counter()
+        max_world = typemax(UInt)
+    end
+    if max_world == typemax(UInt)
+        # if we can record all of the backedges in the global reverse-cache,
+        # we can now widen our applicability in the global cache too
+        store_backedges(ci, edges)
+    end
+    ccall(:jl_fill_codeinst, Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
+        ci, rettype, exctype, nothing, const_flags, min_world, max_world, ipo_effects, nothing, di, edges)
+    ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
+        ci, nothing, const_flag, min_world, max_world, ipo_effects, nothing, 0.0, 0.0, 0.0, di, edges)
+    code_cache(interp)[mi] = ci
+    codegen = codegen_cache(interp)
+    if codegen !== nothing
+        codegen[ci] = src
+    end
+    engine_reject(interp, ci)
+    return nothing
+end
+
+function finish_nocycle(interp::AbstractInterpreter, frame::InferenceState, time_before::UInt64)
+    opt_cache = IdDict{MethodInstance,CodeInstance}()
+    finishinfer!(frame, interp, frame.cycleid, opt_cache)
+    opt = frame.result.src
+    if opt isa OptimizationState # implies `may_optimize(interp) === true`
+        optimize(interp, opt, frame.result)
+        # check the valid_worlds hasn't been narrowed by added :invoke edges
+        valid_worlds = intersect(frame.valid_worlds, compute_recursive_worlds(opt.inlining.edges))
+        update_valid_age!(frame, get_inference_world(interp), valid_worlds)
+    end
+    empty!(opt_cache)
+    validation_world = get_world_counter()
+    finish!(interp, frame, validation_world, time_before)
+    promotecache!(interp, frame)
+    if isdefined(frame.result, :ci)
+        # After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
+        # (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
+        # validity.
+        ccall(:jl_promote_ci_to_current, Cvoid, (Any, UInt), frame.result.ci, validation_world)
+    end
+    if frame.cycleid != 0
+        frames = frame.callstack::Vector{AbsIntState}
+        @assert frames[end] === frame
+        pop!(frames)
+    end
+    return nothing
+end
+
+function finish_cycle(interp::AbstractInterpreter, frames::Vector{AbsIntState}, cycleid::Int, time_before::UInt64)
+    world = get_inference_world(interp)
+    cycle_valid_worlds = WorldRange()
+    cycle_valid_effects = EFFECTS_TOTAL
+    for frameid = cycleid:length(frames)
+        caller = frames[frameid]::InferenceState
+        @assert caller.cycleid == cycleid
+        # converge the world age range and effects for this cycle here:
+        # all frames in the cycle should have the same bits of `valid_worlds` and `effects`
+        # that are simply the intersection of each partial computation, without having
+        # dependencies on each other (unlike rt and exct)
+        cycle_valid_worlds = intersect(cycle_valid_worlds, caller.valid_worlds)
+        cycle_valid_effects = merge_effects(cycle_valid_effects, caller.ipo_effects)
+    end
+    opt_cache = IdDict{MethodInstance,CodeInstance}()
+    for frameid = cycleid:length(frames)
+        caller = frames[frameid]::InferenceState
+        adjust_cycle_frame!(caller, world, cycle_valid_worlds, cycle_valid_effects)
+        finishinfer!(caller, caller.interp, cycleid, opt_cache)
+        time_now = _time_ns()
+        caller.time_self_ns += (time_now - time_before)
+        time_before = time_now
+    end
+    time_caches = 0.0 # the total and adjusted time of every entry in the cycle are the same
+    time_paused = UInt64(0)
+    for frameid = cycleid:length(frames)
+        caller = frames[frameid]::InferenceState
+        opt = caller.result.src
+        if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
+            optimize(caller.interp, opt, caller.result)
+            cycle_valid_worlds = intersect(cycle_valid_worlds, compute_recursive_worlds(opt.inlining.edges))
+            time_now = _time_ns()
+            caller.time_self_ns += (time_now - time_before)
+            time_before = time_now
+        end
+        time_caches += caller.time_caches
+        time_paused += caller.time_paused
+        caller.time_paused = UInt64(0)
+        caller.time_caches = 0.0
+    end
+    empty!(opt_cache)
+    cycletop = frames[cycleid]::InferenceState
+    time_start = cycletop.time_start
+    validation_world = get_world_counter()
+    cis = CodeInstance[]
+    for frameid = cycleid:length(frames)
+        caller = frames[frameid]::InferenceState
+        caller.time_start = time_start
+        caller.time_caches = time_caches
+        caller.time_paused = time_paused
+        update_valid_age!(caller, world, cycle_valid_worlds)
+        finish!(caller.interp, caller, validation_world, time_before)
+        if isdefined(caller.result, :ci)
+            push!(cis, caller.result.ci)
+        end
+    end
+    if cycletop.parentid != 0
+        parent = frames[cycletop.parentid]
+        parent.time_caches += time_caches
+        parent.time_paused += time_paused
+    end
+    # After everything is finished, promote the work into visible caches
+    for frameid = cycleid:length(frames)
+        caller = frames[frameid]::InferenceState
+        promotecache!(caller.interp, caller)
+    end
+    # After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
+    # (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
+    # validity.
+    ccall(:jl_promote_cis_to_current, Cvoid, (Ptr{CodeInstance}, Csize_t, UInt), cis, length(cis), validation_world)
+    resize!(frames, cycleid - 1)
+    return nothing
+end
+
+function adjust_cycle_frame!(sv::InferenceState, world::UInt, cycle_valid_worlds::WorldRange, cycle_valid_effects::Effects)
+    update_valid_age!(sv, world, cycle_valid_worlds)
+    sv.ipo_effects = cycle_valid_effects
+    # traverse the callees of this cycle that are tracked within `sv.cycle_backedges`
+    # and adjust their statements so that they are consistent with the new `cycle_valid_effects`
+    new_flags = flags_for_effects(cycle_valid_effects)
+    for (callee, pc) in sv.cycle_backedges
+        old_currpc = callee.currpc
+        callee.currpc = pc
+        set_curr_ssaflag!(callee, new_flags, IR_FLAGS_EFFECTS)
+        callee.currpc = old_currpc
+    end
+    return nothing
+end
+
+function get_debuginfo(src)
+    isa(src, CodeInfo) && return src.debuginfo
+    isa(src, OptimizationState) && return src.src.debuginfo
+    return nothing
+end
+
+function is_result_constabi_eligible(result::InferenceResult)
+    result_type = result.result
+    return isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
+end
+
+function compute_inlining_cost(interp::AbstractInterpreter, result::InferenceResult)
+    src = result.src
+    isa(src, OptimizationState) || return MAX_INLINE_COST
+    compute_inlining_cost(interp, result, src.optresult)
+end
+
+function compute_inlining_cost(interp::AbstractInterpreter, result::InferenceResult, optresult#=::OptimizationResult=#)
+    return inline_cost_model(interp, result, optresult.inline_flag, optresult.ir)
+end
+
+function inline_cost_model(interp::AbstractInterpreter, result::InferenceResult,
+        inline_flag::UInt8, ir::IRCode)
+
+    inline_flag === SRC_FLAG_DECLARED_NOINLINE && return MAX_INLINE_COST
+
+    mi = result.linfo
+    (; def, specTypes) = mi
+    if !isa(def, Method)
+        return MAX_INLINE_COST
+    end
+
+    declared_inline = inline_flag === SRC_FLAG_DECLARED_INLINE
+
+    rt = result.result
+    @assert !(rt isa LimitedAccuracy)
+    rt = widenslotwrapper(rt)
+
+    sig = unwrap_unionall(specTypes)
+    if !(isa(sig, DataType) && sig.name === Tuple.name)
+        return MAX_INLINE_COST
+    end
+    if !declared_inline && rt === Bottom
+        return MAX_INLINE_COST
+    end
+
+    if declared_inline && isdispatchtuple(specTypes)
+        # obey @inline declaration if a dispatch barrier would not help
+        return MIN_INLINE_COST
+    else
+        # compute the cost (size) of inlining this code
+        params = OptimizationParams(interp)
+        cost_threshold = default = params.inline_cost_threshold
+        if ⊑(optimizer_lattice(interp), rt, Tuple) && !isconcretetype(widenconst(rt))
+            cost_threshold += params.inline_tupleret_bonus
+        end
+        # if the method is declared as `@inline`, increase the cost threshold 20x
+        if declared_inline
+            cost_threshold += 19*default
+        end
+        # a few functions get special treatment
+        if def.module === _topmod(def.module)
+            name = def.name
+            if name === :iterate || name === :unsafe_convert || name === :cconvert
+                cost_threshold += 4*default
+            end
+        end
+        return inline_cost_model(ir, params, cost_threshold)
+    end
+end
+
+function transform_result_for_local_cache(interp::AbstractInterpreter, result::InferenceResult)
+    ## XXX: this must perform the exact same operations as transform_result_for_cache to avoid introducing soundness bugs
+    if is_result_constabi_eligible(result)
+        return nothing
+    end
+    src = result.src
+    if isa(src, OptimizationState)
+        # Compute and store any information required to determine the inlineability of the callee.
+        opt = src
+        opt.src.inlining_cost = compute_inlining_cost(interp, result)
+    end
+    return src
+end
+
+function transform_result_for_cache(interp::AbstractInterpreter, result::InferenceResult, edges::SimpleVector)
+    inlining_cost = nothing
+    src = result.src
+    if isa(src, OptimizationState)
+        opt = src
+        inlining_cost = compute_inlining_cost(interp, result, opt.optresult)
+        discard_optimized_result(interp, inlining_cost) && return nothing
+        src = ir_to_codeinf!(opt)
+    end
+    if isa(src, CodeInfo)
+        src.edges = edges
+        if inlining_cost !== nothing
+            src.inlining_cost = inlining_cost
+        elseif may_optimize(interp)
+            src.inlining_cost = compute_inlining_cost(interp, result)
+        end
+    end
+    return src
+end
+
+function discard_optimized_result(interp::AbstractInterpreter, inlining_cost::InlineCostType)
+    may_discard_trees(interp) || return false
+    return inlining_cost == MAX_INLINE_COST
+end
+
+function maybe_compress_codeinfo(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInfo)
+    def = mi.def
+    isa(def, Method) || return ci # don't compress toplevel code
+    can_discard_trees = may_discard_trees(interp)
+    cache_the_tree = !can_discard_trees || is_inlineable(ci)
+    cache_the_tree || return nothing
+    # TODO: do we want to augment edges here with any :invoke targets that we got from inlining (such that we didn't have a direct edge to it already)?
+    may_compress(interp) && return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
+    return ci
+end
+
+function cycle_fix_limited(@nospecialize(typ), sv::InferenceState, cycleid::Int)
+    if typ isa LimitedAccuracy
+        frames = sv.callstack::Vector{AbsIntState}
+        causes = typ.causes
+        for frameid = cycleid:length(frames)
+            caller = frames[frameid]::InferenceState
+            caller in causes || continue
+            causes === typ.causes && (causes = copy(causes))
+            pop!(causes, caller)
+            if isempty(causes)
+                return typ.typ
+            end
+        end
+        @assert sv.parentid != 0
+        if causes !== typ.causes
+            return LimitedAccuracy(typ.typ, causes)
+        end
+    end
+    return typ
+end
+
+function adjust_effects(ipo_effects::Effects, def::Method)
+    # override the analyzed effects using manually annotated effect settings
+    override = decode_effects_override(def.purity)
+    if is_effect_overridden(override, :consistent)
+        ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
+    end
+    if is_effect_overridden(override, :effect_free)
+        ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_TRUE)
+    end
+    if is_effect_overridden(override, :nothrow)
+        ipo_effects = Effects(ipo_effects; nothrow=true)
+    end
+    if is_effect_overridden(override, :terminates_globally)
+        ipo_effects = Effects(ipo_effects; terminates=true)
+    end
+    if is_effect_overridden(override, :notaskstate)
+        ipo_effects = Effects(ipo_effects; notaskstate=true)
+    end
+    if is_effect_overridden(override, :inaccessiblememonly)
+        ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
+    end
+    if is_effect_overridden(override, :noub)
+        ipo_effects = Effects(ipo_effects; noub=ALWAYS_TRUE)
+    elseif is_effect_overridden(override, :noub_if_noinbounds) && ipo_effects.noub !== ALWAYS_TRUE
+        ipo_effects = Effects(ipo_effects; noub=NOUB_IF_NOINBOUNDS)
+    end
+    if is_effect_overridden(override, :consistent_overlay)
+        ipo_effects = Effects(ipo_effects; nonoverlayed=CONSISTENT_OVERLAY)
+    end
+    if is_effect_overridden(override, :nortcall)
+        ipo_effects = Effects(ipo_effects; nortcall=true)
+    end
+    return ipo_effects
+end
+
+function adjust_effects(sv::InferenceState)
+    ipo_effects = sv.ipo_effects
+
+    # refine :consistent-cy effect using the return type information
+    # TODO this adjustment tries to compromise imprecise :consistent-cy information,
+    # that is currently modeled in a flow-insensitive way: ideally we want to model it
+    # with a proper dataflow analysis instead
+    rt = sv.bestguess
+    if rt === Bottom
+        # always throwing an error counts or never returning both count as consistent
+        ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
+    end
+    if sv.exc_bestguess === Bottom
+        # if the exception type of this frame is known to be `Bottom`,
+        # this frame is known to be safe
+        ipo_effects = Effects(ipo_effects; nothrow=true)
+    end
+    if is_inaccessiblemem_or_argmemonly(ipo_effects) && all(1:narguments(sv, #=include_va=#true)) do i::Int
+            return is_mutation_free_argtype(sv.slottypes[i])
+        end
+        ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
+    end
+    if is_consistent_if_notreturned(ipo_effects) && is_identity_free_argtype(rt)
+        # in a case when the :consistent-cy here is only tainted by mutable allocations
+        # (indicated by `CONSISTENT_IF_NOTRETURNED`), we may be able to refine it if the return
+        # type guarantees that the allocations are never returned
+        consistent = ipo_effects.consistent & ~CONSISTENT_IF_NOTRETURNED
+        ipo_effects = Effects(ipo_effects; consistent)
+    end
+    if is_consistent_if_inaccessiblememonly(ipo_effects)
+        if is_inaccessiblememonly(ipo_effects)
+            consistent = ipo_effects.consistent & ~CONSISTENT_IF_INACCESSIBLEMEMONLY
+            ipo_effects = Effects(ipo_effects; consistent)
+        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
+        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
+            ipo_effects = Effects(ipo_effects; consistent=ALWAYS_FALSE)
+        end
+    end
+    if is_effect_free_if_inaccessiblememonly(ipo_effects)
+        if is_inaccessiblememonly(ipo_effects)
+            effect_free = ipo_effects.effect_free & ~EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+            ipo_effects = Effects(ipo_effects; effect_free)
+        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
+        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
+            ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_FALSE)
+        end
+    end
+
+    # override the analyzed effects using manually annotated effect settings
+    def = sv.linfo.def
+    if isa(def, Method)
+        ipo_effects = adjust_effects(ipo_effects, def)
+    end
+
+    return ipo_effects
+end
+
+function refine_exception_type(@nospecialize(exc_bestguess), ipo_effects::Effects)
+    ipo_effects.nothrow && return Bottom
+    return exc_bestguess
+end
+
+const empty_edges = Core.svec()
+
+# inference completed on `me`
+# update the MethodInstance
+function finishinfer!(me::InferenceState, interp::AbstractInterpreter, cycleid::Int,
+                      opt_cache::IdDict{MethodInstance, CodeInstance})
+    # prepare to run optimization passes on fulltree
+    @assert isempty(me.ip)
+    # inspect whether our inference had a limited result accuracy,
+    # else it may be suitable to cache
+    bestguess = me.bestguess = cycle_fix_limited(me.bestguess, me, cycleid)
+    exc_bestguess = me.exc_bestguess = cycle_fix_limited(me.exc_bestguess, me, cycleid)
+    limited_ret = bestguess isa LimitedAccuracy || exc_bestguess isa LimitedAccuracy
+    limited_src = false
+    if limited_ret
+        @assert me.parentid != 0
+    else
+        gt = me.ssavaluetypes
+        for j = 1:length(gt)
+            gt[j] = gtj = cycle_fix_limited(gt[j], me, cycleid)
+            if gtj isa LimitedAccuracy
+                @assert me.parentid != 0
+                limited_src = true
+                break
+            end
+        end
+    end
+    result = me.result
+    result.result = bestguess
+    ipo_effects = result.ipo_effects = me.ipo_effects = adjust_effects(me)
+    result.exc_result = me.exc_bestguess = refine_exception_type(me.exc_bestguess, ipo_effects)
+    src = me.src
+    src.rettype = widenconst(ignorelimited(bestguess))
+    src.ssaflags = me.ssaflags
+    valid_worlds = me.valid_worlds
+    min_world, max_world = first(valid_worlds), last(valid_worlds)
+    src.min_world = min_world
+    src.max_world = max_world
+    istoplevel = !(me.linfo.def isa Method)
+    istoplevel || compute_edges!(me) # don't add backedges to toplevel method instance
+
+    if limited_ret || limited_src
+        # A parent may be cached still, but not this intermediate work:
+        # we can throw everything else away now. Caching anything can confuse later
+        # heuristics to consider it worth trying to pursue compiling this further and
+        # finding infinite work as a result. Avoiding caching helps to ensure there is only
+        # a finite amount of work that can be discovered later (although potentially still a
+        # large multiplier on it).
+        result.src = nothing
+        result.tombstone = true
+        me.cache_mode = CACHE_MODE_NULL
+        set_inlineable!(src, false)
+    else
+        # annotate fulltree with type information,
+        # either because we are the outermost code, or we might use this later
+        type_annotate!(interp, me)
+        mayopt = may_optimize(interp)
+        doopt = mayopt &&
+                # disable optimization if we don't use this later (because it is not cached)
+                me.cache_mode != CACHE_MODE_NULL &&
+                # disable optimization if we've already obtained very accurate result
+                !result_is_constabi(interp, result)
+        if doopt
+            result.src = OptimizationState(me, interp, opt_cache)
+        else
+            result.src = src # for reflection etc.
+        end
+    end
+
+    maybe_validate_code(me.linfo, src, "inferred")
+
+    # check global cache again for :invoke use, and put in the opt_cache if it wasn't there at this time
+    if isdefined(result, :ci)
+        ci = result.ci
+        ipo_effects = encode_effects(result.ipo_effects)
+        # populate a few fields that won't change again (and are inspected by optimization)
+        @atomic :monotonic ci.ipo_purity_bits = ipo_effects
+        ci.analysis_results = result.analysis_results
+        if !iszero(me.cache_mode & CACHE_MODE_GLOBAL)
+            ci = result.ci
+            if is_already_cached(me.interp, result)
+                # convert to a local cache
+                engine_reject(interp, ci)
+                me.cache_mode = CACHE_MODE_LOCAL
+            else
+                opt_cache[result.linfo] = ci
+            end
+        end
+    end
+    nothing
+end
+
+function is_already_cached(interp::AbstractInterpreter, result::InferenceResult)
+    # check if the existing linfo metadata is also sufficient to describe the current inference result
+    # to decide if it is worth caching this right now
+    mi = result.linfo
+    cache = code_cache(interp, result.valid_worlds)
+    if haskey(cache, mi)
+        # n.b.: accurate edge representation might cause the CodeInstance for this to be constructed later
+        @assert isdefined(cache[mi], :inferred)
+        return true
+    end
+    return false
+end
+
+# Iterate a series of back-edges that need registering, based on the provided forward edge list.
+# Back-edges are returned as (invokesig, item), where the item is a Binding, MethodInstance, or
+# MethodTable.
+struct ForwardToBackedgeIterator
+    forward_edges::SimpleVector
+end
+
+function Base.iterate(it::ForwardToBackedgeIterator, i::Int = 1)
+    edges = it.forward_edges
+    i > length(edges) && return nothing
+    while i ≤ length(edges)
+        item = edges[i]
+        if item isa Int
+            i += 2
+            continue # ignore the query information if present but process the contents
+        elseif isa(item, Method)
+            # ignore `Method`-edges (from e.g. failed `abstract_call_method`)
+            i += 1
+            continue
+        elseif isa(item, Core.Binding)
+            return ((nothing, item), i + 1)
+        end
+        if isa(item, CodeInstance)
+            item = get_ci_mi(item)
+            return ((nothing, item), i + 1)
+        elseif isa(item, MethodInstance) # regular dispatch
+            return ((nothing, item), i + 1)
+        else
+            invokesig = item
+            callee = edges[i+1]
+            isa(callee, Method) && (i += 2; continue) # ignore `Method`-edges (from e.g. failed `abstract_call_method`)
+            if isa(callee, MethodTable)
+                # abstract dispatch (legacy style edges)
+                return ((invokesig, callee), i + 2)
+            else
+                # `invoke` edge
+                callee = isa(callee, CodeInstance) ? get_ci_mi(callee) : callee::MethodInstance
+                return ((invokesig, callee), i + 2)
+            end
+        end
+    end
+    return nothing
+end
+
+# record the backedges
+
+function maybe_add_binding_backedge!(b::Core.Binding, edge::Union{Method, CodeInstance})
+    meth = isa(edge, Method) ? edge : get_ci_mi(edge).def
+    ccall(:jl_maybe_add_binding_backedge, Cint, (Any, Any, Any), b, edge, meth)
+    return nothing
+end
+
+function store_backedges(caller::CodeInstance, edges::SimpleVector)
+    isa(get_ci_mi(caller).def, Method) || return # don't add backedges to toplevel method instance
+
+    backedges = ForwardToBackedgeIterator(edges)
+    for (i, (invokesig, item)) in enumerate(backedges)
+        # check for any duplicate edges we've already registered
+        duplicate_found = false
+        for (i′, (invokesig′, item′)) in enumerate(backedges)
+            i == i′ && break
+            if item′ === item && invokesig′ == invokesig
+                duplicate_found = true
+                break
+            end
+        end
+
+        if !duplicate_found
+            if item isa Core.Binding
+                maybe_add_binding_backedge!(item, caller)
+            elseif item isa MethodTable
+                ccall(:jl_method_table_add_backedge, Cvoid, (Any, Any), invokesig, caller)
+            else
+                item::MethodInstance
+                ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any, Any), item, invokesig, caller)
+            end
+        end
+    end
+    nothing
+end
+
+function compute_edges!(sv::InferenceState)
+    edges = sv.edges
+    for i in 1:length(sv.stmt_info)
+        add_edges!(edges, sv.stmt_info[i])
+    end
+    user_edges = sv.src.edges
+    if user_edges !== nothing && user_edges !== empty_edges
+        append!(edges, user_edges)
+    end
+    nothing
+end
+
+function compute_recursive_worlds(edges::Vector{Any})
+    range = WorldRange(typemin(UInt), typemax(UInt))
+    for edge in edges
+        if edge isa CodeInstance
+            wr = WorldRange(edge.min_world, edge.max_world)
+            iszero(last(wr.max_world)) && continue # part of the current cycle, not yet valid
+            range = intersect(range, wr)
+        end
+    end
+    return range
+end
+
+function record_slot_assign!(sv::InferenceState)
+    # look at all assignments to slots
+    # and union the set of types stored there
+    # to compute a lower bound on the storage required
+    body = sv.src.code::Vector{Any}
+    slottypes = sv.slottypes::Vector{Any}
+    ssavaluetypes = sv.ssavaluetypes
+    for i = 1:length(body)
+        expr = body[i]
+        # find all reachable assignments to locals
+        if was_reached(sv, i) && isexpr(expr, :(=))
+            lhs = expr.args[1]
+            if isa(lhs, SlotNumber)
+                typ = ssavaluetypes[i]
+                @assert typ !== NOT_FOUND "active slot in unreached region"
+                vt = widenconst(typ)
+                if vt !== Bottom
+                    id = slot_id(lhs)
+                    otherTy = slottypes[id]
+                    if otherTy === Bottom
+                        slottypes[id] = vt
+                    elseif otherTy === Any
+                        slottypes[id] = Any
+                    else
+                        slottypes[id] = tmerge(otherTy, vt)
+                    end
+                end
+            end
+        end
+    end
+    sv.src.slottypes = slottypes
+    return nothing
+end
+
+# find the dominating assignment to the slot `id` in the block containing statement `idx`,
+# returns `nothing` otherwise
+function find_dominating_assignment(id::Int, idx::Int, sv::InferenceState)
+    block = block_for_inst(sv.cfg, idx)
+    for pc in reverse(sv.cfg.blocks[block].stmts) # N.B. reverse since the last assignment is dominating this block
+        pc < idx || continue # N.B. needs pc ≠ idx as `id` can be assigned at `idx`
+        stmt = sv.src.code[pc]
+        isexpr(stmt, :(=)) || continue
+        lhs = stmt.args[1]
+        isa(lhs, SlotNumber) || continue
+        slot_id(lhs) == id || continue
+        return pc
+    end
+    return nothing
+end
+
+# annotate types of all symbols in AST, preparing for optimization
+function type_annotate!(::AbstractInterpreter, sv::InferenceState)
+    # widen `Conditional`s from `slottypes`
+    slottypes = sv.slottypes
+    for i = 1:length(slottypes)
+        slottypes[i] = widenconditional(slottypes[i])
+    end
+
+    # compute the required type for each slot
+    # to hold all of the items assigned into it
+    record_slot_assign!(sv)
+
+    # annotate variables load types
+    src = sv.src
+    stmts = src.code
+    nstmt = length(stmts)
+    ssavaluetypes = sv.ssavaluetypes
+    nslots = length(src.slotflags)
+
+    # widen slot wrappers (`Conditional` and `MustAlias`) and remove `NOT_FOUND` from `ssavaluetypes`
+    # and mark any unreachable statements by wrapping them in Const(...), to distinguish them from
+    # must-throw statements which also have type Bottom
+    for i = 1:nstmt
+        expr = stmts[i]
+        if was_reached(sv, i)
+            ssavaluetypes[i] = widenslotwrapper(ssavaluetypes[i]) # 3
+        else # i.e. any runtime execution will never reach this statement
+            push!(sv.unreachable, i)
+            if is_meta_expr(expr) # keep any lexically scoped expressions
+                ssavaluetypes[i] = Any # 3
+            else
+                ssavaluetypes[i] = Bottom # 3
+                # annotate that this statement actually is dead
+                stmts[i] = Const(expr)
+            end
+        end
+    end
+
+    # widen slot wrappers (`Conditional` and `MustAlias`) in `bb_vartables`
+    for varstate in sv.bb_vartables
+        if varstate !== nothing
+            for slot in 1:nslots
+                vt = varstate[slot]
+                widened_type = widenslotwrapper(ignorelimited(vt.typ))
+                varstate[slot] = VarState(widened_type, vt.undef)
+            end
+        end
+    end
+
+    return nothing
+end
+
+function merge_call_chain!(::AbstractInterpreter, parent::InferenceState, child::InferenceState)
+    # update all cycleid to be in the same group
+    frames = parent.callstack::Vector{AbsIntState}
+    @assert child.callstack === frames
+    ancestorid = child.cycleid
+    # ensure that walking the callstack has the same cycleid (DAG)
+    for frameid = reverse(ancestorid:length(frames))
+        frame = frames[frameid]::InferenceState
+        frame.cycleid == ancestorid && break
+        @assert frame.cycleid > ancestorid
+        frame.cycleid = ancestorid
+    end
+end
+
+function add_cycle_backedge!(caller::InferenceState, frame::InferenceState)
+    backedge = (caller, caller.currpc)
+    contains_is(frame.cycle_backedges, backedge) || push!(frame.cycle_backedges, backedge)
+    return frame
+end
+
+function is_same_frame(interp::AbstractInterpreter, mi::MethodInstance, frame::InferenceState)
+    return mi === frame_instance(frame) && cache_owner(interp) === cache_owner(frame.interp)
+end
+
+function poison_callstack!(infstate::InferenceState, topmost::InferenceState)
+    push!(infstate.pclimitations, topmost)
+    nothing
+end
+
+# Walk through `mi`'s upstream call chain, starting at `parent`. If a parent
+# frame matching `mi` is encountered, then there is a cycle in the call graph
+# (i.e. `mi` is a descendant callee of itself). Upon encountering this cycle,
+# we "resolve" it by merging the call chain, which entails updating each intermediary
+# frame's `cycleid` field. Finally, we return `mi`'s pre-existing frame.
+# If no cycles are found, `nothing` is returned instead.
+function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
+    # TODO (#48913) implement a proper recursion handling for irinterp:
+    # This works most of the time currently just because the irinterp code doesn't get used much with
+    # `@assume_effects`, so it never sees a cycle normally, but that may not be a sustainable solution.
+    parent isa InferenceState || return false
+    frames = parent.callstack::Vector{AbsIntState}
+    uncached = false
+    for frameid = reverse(1:length(frames))
+        frame = frames[frameid]
+        isa(frame, InferenceState) || break
+        uncached |= !is_cached(frame) # ensure we never add a (globally) uncached frame to a cycle
+        if is_same_frame(interp, mi, frame)
+            if uncached
+                # our attempt to speculate into a constant call lead to an undesired self-cycle
+                # that cannot be converged: if necessary, poison our call-stack (up to the discovered duplicate frame)
+                # with the limited flag and abort (set return type to Any) now
+                poison_callstack!(parent, frame)
+                return true
+            end
+            merge_call_chain!(interp, parent, frame)
+            return frame
+        end
+    end
+    return false
+end
+
+ipo_effects(code::CodeInstance) = decode_effects(code.ipo_purity_bits)
+
+# return cached result of regular inference
+function return_cached_result(interp::AbstractInterpreter, method::Method, codeinst::CodeInstance, @nospecialize(src), caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
+    rt = cached_return_type(codeinst)
+    exct = codeinst.exctype
+    effects = ipo_effects(codeinst)
+    valid_worlds = WorldRange(min_world(codeinst), max_world(codeinst))
+    if src !== nothing
+        # Create an InferenceResult to preserve cached source lookup
+        inf_result = InferenceResult(codeinst.def, typeinf_lattice(interp))
+        inf_result.result = rt
+        inf_result.exc_result = exct
+        inf_result.src = src::CodeInfo
+        inf_result.ipo_effects = effects
+        inf_result.ci_as_edge = inf_result.ci = codeinst
+        inf_result.valid_worlds = valid_worlds
+        push!(get_inference_cache(interp), inf_result)
+    else
+        inf_result = nothing
+    end
+    update_valid_age!(caller, get_inference_world(interp), valid_worlds)
+    caller.time_caches += reinterpret(Float16, codeinst.time_infer_total)
+    caller.time_caches += reinterpret(Float16, codeinst.time_infer_cache_saved)
+    return Future(MethodCallResult(interp, caller, method, rt, exct, effects, codeinst, edgecycle, edgelimited, inf_result))
+end
+
+function return_cached_result(interp::AbstractInterpreter, method::Method, inf_result::InferenceResult, @nospecialize(src), caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
+    rt = inf_result.result
+    exct = inf_result.exc_result
+    if src !== nothing
+        inf_result.src = src::CodeInfo
+    end
+    effects = inf_result.ipo_effects
+    codeinst = inf_result.ci
+    update_valid_age!(caller, get_inference_world(interp), inf_result.valid_worlds)
+    caller.time_caches += reinterpret(Float16, codeinst.time_infer_total)
+    caller.time_caches += reinterpret(Float16, codeinst.time_infer_cache_saved)
+    return Future(MethodCallResult(interp, caller, method, rt, exct, effects, codeinst, edgecycle, edgelimited, inf_result))
+end
+
+
+function MethodCallResult(::AbstractInterpreter, sv::AbsIntState, method::Method,
+                          @nospecialize(rt), @nospecialize(exct), effects::Effects,
+                          edge::Union{Nothing,CodeInstance}, edgecycle::Bool, edgelimited::Bool,
+                          call_result::Union{Nothing,InferredCallResult} = nothing)
+    if edge === nothing
+        edgecycle = edgelimited = true
+    end
+
+    # we look for the termination effect override here as well, since the :terminates effect
+    # may have been tainted due to recursion at this point even if it's overridden
+    if is_effect_overridden(sv, :terminates_globally)
+        # this frame is known to terminate
+        effects = Effects(effects, terminates=true)
+    elseif is_effect_overridden(method, :terminates_globally)
+        # this edge is known to terminate
+        effects = Effects(effects; terminates=true)
+    elseif edgecycle
+        # Some sort of recursion was detected.
+        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
+            # no `MethodInstance` cycles -- don't taint :terminate
+        else
+            # we cannot guarantee that the call will terminate
+            effects = Effects(effects; terminates=false)
+        end
+    end
+
+    return MethodCallResult(rt, exct, effects, edge, edgecycle, edgelimited, call_result)
+end
+
+function codeinst_edges_sub(existing_edge::CodeInstance, min_world::UInt, max_world::UInt, edges::SimpleVector)
+    # return if the existing edge has more restrictions than the other arguments (more edges and narrower worlds)
+    if existing_edge.min_world >= min_world &&
+       existing_edge.max_world <= max_world &&
+       existing_edge.edges == edges
+        return true
+    end
+    return false
+end
+
+# allocate a dummy `edge::CodeInstance` to be added by `add_edges!`, reusing an existing_edge if possible
+# TODO: fill this in fully correctly (currently IPO info such as effects and return types are lost)
+function codeinst_as_edge(interp::AbstractInterpreter, sv::InferenceState, @nospecialize existing_edge)
+    edges = Core.svec(sv.edges...)
+    min_world, max_world = first(sv.valid_worlds), last(sv.valid_worlds)
+    if max_world >= get_world_counter()
+        max_world = typemax(UInt)
+    end
+    if existing_edge isa CodeInstance && codeinst_edges_sub(existing_edge, min_world, max_world, edges)
+        return existing_edge
+    end
+    mi = sv.linfo
+    ci = CodeInstance(mi, cache_owner(interp), Any, Any, nothing, nothing, zero(Int32),
+        min_world, max_world, zero(UInt32), nothing, nothing, edges)
+    if max_world == typemax(UInt)
+        # if we can record all of the backedges in the global reverse-cache,
+        # we can now widen our applicability in the global cache too
+        # XXX: this should come after we decide this edge is even useful
+        # (e.g. this is the job of jl_promote_ci_to_current)
+        store_backedges(ci, edges)
+    end
+    return ci
+end
+
+# compute (and cache) an inferred AST and return the current best estimate of the result type
+function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
+    mi = specialize_method(method, atype, sparams)
+    cache_mode = CACHE_MODE_GLOBAL # cache edge targets globally by default
+    force_inline = is_stmt_inline(get_curr_ssaflag(caller))
+    edge_ci = nothing
+    let code = get(code_cache(interp), mi, nothing)
+        codeinst = code
+        if code isa InferenceResult
+            inferred = code.src
+            codeinst = code.ci
+        elseif code isa CodeInstance # return existing rettype if the code is already inferred
+            inferred = @atomic :monotonic code.inferred
+        else
+            inferred = nothing
+        end
+        if codeinst isa CodeInstance
+            need_inlineable_code = may_optimize(interp) && (force_inline || is_inlineable(inferred))
+            if need_inlineable_code
+                src = ci_get_source(interp, codeinst, inferred)
+                if src === nothing
+                    # Re-infer to get the appropriate source representation
+                    cache_mode = CACHE_MODE_LOCAL
+                    edge_ci = codeinst
+                else # no reinference needed
+                    @assert codeinst.def === mi "MethodInstance for cached edge does not match"
+                    return return_cached_result(interp, method, code, src, caller, edgecycle, edgelimited)
+                end
+            else # no reinference needed
+                @assert codeinst.def === mi "MethodInstance for cached edge does not match"
+                return return_cached_result(interp, method, code, nothing, caller, edgecycle, edgelimited)
+            end
+        end
+    end
+    if !InferenceParams(interp).force_enable_inference && ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0
+        add_remark!(interp, caller, "[typeinf_edge] Inference is disabled for the target module")
+        return Future(MethodCallResult(interp, caller, method, Any, Any, Effects(), nothing, edgecycle, edgelimited))
+    end
+    if !is_cached(caller) && frame_parent(caller) === nothing
+        # this caller exists to return to the user
+        # (if we asked resolve_call_cycle!, it might instead detect that there is a cycle that it can't merge)
+        frame = false
+    else
+        frame = resolve_call_cycle!(interp, mi, caller)
+    end
+    if frame === false
+        # completely new, but check again after reserving in the engine
+        if cache_mode == CACHE_MODE_GLOBAL
+            reserve_start = _time_ns() # subtract engine_reserve (thread-synchronization) time from callers to avoid double-counting
+            ci_from_engine = engine_reserve(interp, mi)
+            caller.time_paused += (_time_ns() - reserve_start)
+            code = get(code_cache(interp), mi, nothing)
+            codeinst = code
+            if code isa InferenceResult
+                inferred = code.src
+                codeinst = code.ci
+            elseif code isa CodeInstance # return existing rettype if the code is already inferred
+                inferred = @atomic :monotonic code.inferred
+            else
+                inferred = nothing
+            end
+            if codeinst isa CodeInstance # return existing rettype if the code is already inferred
+                engine_reject(interp, ci_from_engine)
+                ci_from_engine = nothing
+                need_inlineable_code = may_optimize(interp) && (force_inline || is_inlineable(inferred))
+                if need_inlineable_code
+                    src = ci_get_source(interp, codeinst, inferred)
+                    if src === nothing
+                        cache_mode = CACHE_MODE_LOCAL
+                        edge_ci = codeinst
+                    else
+                        @assert codeinst.def === mi "MethodInstance for cached edge does not match"
+                        return return_cached_result(interp, method, code, src, caller, edgecycle, edgelimited)
+                    end
+                else
+                    @assert codeinst.def === mi "MethodInstance for cached edge does not match"
+                    return return_cached_result(interp, method, code, nothing, caller, edgecycle, edgelimited)
+                end
+            end
+        else
+            ci_from_engine = nothing
+        end
+        result = InferenceResult(mi, typeinf_lattice(interp))
+        result.ci = if ci_from_engine !== nothing
+                ci_from_engine
+            else
+                ccall(:jl_new_codeinst_uninit, Any, (Any, Any), mi, cache_owner(interp))::CodeInstance
+            end
+        frame = InferenceState(result, cache_mode, interp) # always use the cache for edge targets
+        if frame === nothing
+            add_remark!(interp, caller, "[typeinf_edge] Failed to retrieve source")
+            # can't get the source for this, so we know nothing
+            if ci_from_engine !== nothing
+                engine_reject(interp, ci_from_engine)
+            end
+            return Future(MethodCallResult(interp, caller, method, Any, Any, Effects(), nothing, edgecycle, edgelimited))
+        end
+        assign_parentchild!(frame, caller)
+        # the actual inference task for this edge is going to be scheduled within `typeinf_local` via the callstack queue
+        # while splitting off the rest of the work for this caller into a separate workq thunk
+        let mresult = Future{MethodCallResult}()
+            push!(caller.tasks, function get_infer_result(interp, caller)
+                update_valid_age!(caller, get_inference_world(interp), frame.valid_worlds)
+                local isinferred = is_inferred(frame)
+                local effects
+                local edge = nothing
+                local call_result = nothing
+                if isinferred
+                    edge = result.ci
+                    if edge_ci isa CodeInstance && codeinst_edges_sub(edge_ci, edge.min_world, edge.max_world, edge.edges)
+                        edge = edge_ci # override the edge for tracking invalidation
+                    end
+                    result.ci_as_edge = edge # override the edge for tracking purposes
+                    effects = result.ipo_effects # effects are adjusted already within `finish` for ipo_effects
+                    call_result = result
+                else
+                    effects = adjust_effects(effects_for_cycle(frame.ipo_effects), method)
+                    add_cycle_backedge!(caller, frame)
+                end
+                local bestguess = frame.bestguess
+                local exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
+                # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
+                # note that this result is cached globally exclusively, so we can use this local result destructively
+                mresult[] = MethodCallResult(interp, caller, method, bestguess, exc_bestguess, effects,
+                    edge, edgecycle, edgelimited, call_result)
+                return true
+            end)
+            return mresult
+        end
+    elseif frame === true
+        # unresolvable cycle
+        add_remark!(interp, caller, "[typeinf_edge] Unresolvable cycle")
+        return Future(MethodCallResult(interp, caller, method, Any, Any, Effects(), nothing, edgecycle, edgelimited))
+    end
+    # return the current knowledge about this cycle
+    frame = frame::InferenceState
+    update_valid_age!(caller, get_inference_world(interp), frame.valid_worlds)
+    effects = adjust_effects(effects_for_cycle(frame.ipo_effects), method)
+    bestguess = frame.bestguess
+    exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
+    add_cycle_backedge!(caller, frame)
+    return Future(MethodCallResult(interp, caller, method, bestguess, exc_bestguess, effects, nothing, edgecycle, edgelimited))
+end
+
+# The `:terminates` effect bit must be conservatively tainted unless recursion cycle has
+# been fully resolved. As for other effects, there's no need to taint them at this moment
+# because they will be tainted as we try to resolve the cycle.
+effects_for_cycle(effects::Effects) = Effects(effects; terminates=false)
+
+function cached_return_type(code::CodeInstance)
+    rettype = code.rettype
+    isdefined(code, :rettype_const) || return rettype
+    rettype_const = code.rettype_const
+    # the second subtyping/egal conditions are necessary to distinguish usual cases
+    # from rare cases when `Const` wrapped those extended lattice type objects
+    if isa(rettype_const, Tuple{Vector{Union{Nothing,Bool}}, Vector{Any}}) && !(Tuple{Vector{Union{Nothing,Bool}}, Vector{Any}} <: rettype)
+        undefs, fields = rettype_const
+        return PartialStruct(fallback_lattice, rettype, undefs, fields)
+    elseif isa(rettype_const, PartialOpaque) && rettype <: Core.OpaqueClosure
+        return rettype_const
+    elseif isa(rettype_const, InterConditional) && rettype !== InterConditional
+        return rettype_const
+    elseif isa(rettype_const, InterMustAlias) && rettype !== InterMustAlias
+        return rettype_const
+    else
+        return Const(rettype_const)
+    end
+end
+
+#### entry points for inferring a MethodInstance given a type signature ####
+
+"""
+    codeinfo_for_const(interp::AbstractInterpreter, mi::MethodInstance, worlds::WorldRange, edges::SimpleVector, @nospecialize(val))
+
+Return a fake CodeInfo that just contains `return \$val`. This function is used in various reflection APIs when asking
+for the code of a function that inference has found to just return a constant. For such functions, no code is actually
+stored - the constant is used directly. However, because this is an ABI implementation detail, it is nice to maintain
+consistency and just synthesize a CodeInfo when the reflection APIs ask for them - this function does that.
+"""
+function codeinfo_for_const(::AbstractInterpreter, mi::MethodInstance, worlds::WorldRange, edges::SimpleVector, @nospecialize(val))
+    method = mi.def::Method
+    tree = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
+    tree.code = Any[ ReturnNode(quoted(val)) ]
+    nargs = Int(method.nargs)
+    tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
+    tree.slotflags = fill(0x00, nargs)
+    tree.ssavaluetypes = 1
+    tree.debuginfo = DebugInfo(mi)
+    tree.ssaflags = [IR_FLAG_NULL]
+    tree.rettype = Core.Typeof(val)
+    tree.min_world = first(worlds)
+    tree.max_world = last(worlds)
+    tree.edges = edges
+    tree.nargs = UInt(nargs)
+    tree.isva = method.isva
+    set_inlineable!(tree, true)
+    tree.parent = mi
+    return tree
+end
+
+result_is_constabi(interp::AbstractInterpreter, result::InferenceResult) =
+    may_discard_trees(interp) && is_result_constabi_eligible(result)
+
+# compute an inferred AST and return type
+typeinf_code(interp::AbstractInterpreter, match::MethodMatch, run_optimizer::Bool) =
+    typeinf_code(interp, specialize_method(match), run_optimizer)
+typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector,
+             run_optimizer::Bool) =
+    typeinf_code(interp, specialize_method(method, atype, sparams), run_optimizer)
+function typeinf_code(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
+    frame = typeinf_frame(interp, mi, run_optimizer)
+    frame === nothing && return nothing
+    return frame.src
+end
+
+"""
+    typeinf_ircode(interp::AbstractInterpreter, match::MethodMatch,
+                   optimize_until::Union{Int,String,Nothing}) -> (ir::Union{IRCode,Nothing}, returntype::Type)
+    typeinf_ircode(interp::AbstractInterpreter,
+                   method::Method, atype, sparams::SimpleVector,
+                   optimize_until::Union{Int,String,Nothing}) -> (ir::Union{IRCode,Nothing}, returntype::Type)
+    typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance,
+                   optimize_until::Union{Int,String,Nothing}) -> (ir::Union{IRCode,Nothing}, returntype::Type)
+
+Infer a `method` and return an `IRCode` with inferred `returntype` on success.
+"""
+typeinf_ircode(interp::AbstractInterpreter, match::MethodMatch,
+               optimize_until::Union{Int,String,Nothing}) =
+    typeinf_ircode(interp, specialize_method(match), optimize_until)
+typeinf_ircode(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector,
+               optimize_until::Union{Int,String,Nothing}) =
+    typeinf_ircode(interp, specialize_method(method, atype, sparams), optimize_until)
+function typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance,
+                        optimize_until::Union{Int,String,Nothing})
+    frame = typeinf_frame(interp, mi, false)
+    if frame === nothing
+        return nothing, Any
+    end
+    (; result) = frame
+    opt = OptimizationState(frame, interp)
+    ir = run_passes_ipo_safe(opt.src, opt, optimize_until)
+    rt = widenconst(ignorelimited(result.result))
+    return ir, rt
+end
+
+# compute an inferred frame
+typeinf_frame(interp::AbstractInterpreter, match::MethodMatch, run_optimizer::Bool) =
+    typeinf_frame(interp, specialize_method(match), run_optimizer)
+typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector,
+              run_optimizer::Bool) =
+    typeinf_frame(interp, specialize_method(method, atype, sparams), run_optimizer)
+function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
+    result = InferenceResult(mi, typeinf_lattice(interp))
+    frame = InferenceState(result, #=cache_mode=#:no, interp)
+    frame === nothing && return nothing
+    typeinf(interp, frame)
+    is_inferred(frame) || return nothing
+    if run_optimizer
+        if result_is_constabi(interp, frame.result)
+            rt = frame.result.result::Const
+            src = codeinfo_for_const(interp, frame.linfo, frame.valid_worlds, Core.svec(frame.edges...), rt.val)
+        else
+            opt = OptimizationState(frame, interp)
+            optimize(interp, opt, frame.result)
+            src = ir_to_codeinf!(opt, frame, Core.svec(opt.inlining.edges...))
+        end
+        result.src = frame.src = src
+    end
+    return frame
+end
+
+# N.B.: These need to be aligned with the C side headers
+"""
+    SOURCE_MODE_NOT_REQUIRED
+
+Indicates to inference that the source is not required and the only fields of
+the resulting `CodeInstance` that the caller is interested in are return or
+exception types and IPO effects. Inference is still free to create source for
+it or add it to the JIT even, but is not required or expected to do so.
+"""
+const SOURCE_MODE_NOT_REQUIRED = 0x0
+
+"""
+    SOURCE_MODE_ABI
+
+Indicates to inference that it should return a CodeInstance that can
+be `->invoke`'d (because it has already been compiled).
+"""
+const SOURCE_MODE_ABI = 0x1
+
+"""
+    SOURCE_MODE_GET_SOURCE
+
+Indicates to inference that it should return a CodeInstance after it has
+prepared interp to be able to provide source code for it.
+"""
+const SOURCE_MODE_GET_SOURCE = 0xf
+
+"""
+    ci_has_abi(interp::AbstractInterpreter, code::CodeInstance)
+
+Determine whether this CodeInstance is something that could be invoked if
+interp gave it to the runtime system (either because it already has an ->invoke
+ptr, or because interp has source that could be compiled).
+"""
+function ci_has_abi(interp::AbstractInterpreter, code::CodeInstance)
+    (@atomic :acquire code.invoke) !== C_NULL && return true
+    return ci_has_source(interp, code)
+end
+
+"""
+    ci_has_source(interp::AbstractInterpreter, code::CodeInstance)
+
+Determine whether this CodeInstance is something that will return something
+compileable by ci_get_source.
+"""
+function ci_has_source(interp::AbstractInterpreter, code::CodeInstance)
+    codegen = codegen_cache(interp)
+    codegen === nothing && return false
+    use_const_api(code) && return true
+    inf = get(codegen, code, nothing)
+    inf === nothing || return true
+    inf = @atomic :monotonic code.inferred
+    if isa(inf, String)
+        inf = _uncompressed_ir(code, inf)
+    end
+    if isa(inf, CodeInfo)
+        codegen[code] = inf
+        return true
+    end
+    return false
+end
+
+# Get source if available for inlining, otherwise return nothing
+# populates codegen cache for code, if successful
+function ci_get_source(interp::AbstractInterpreter, code::CodeInstance, @nospecialize src)
+    codegen = codegen_cache(interp)
+    if codegen !== nothing
+        inf = get(codegen, code, nothing)
+        inf === nothing || return inf
+    end
+    if use_const_api(code)
+        return codeinfo_for_const(interp, get_ci_mi(code), WorldRange(code.min_world, code.max_world), code.edges, code.rettype_const)
+    end
+    if isa(src, String)
+        src = _uncompressed_ir(code, src)
+    end
+    if isa(src, CodeInfo)
+        if codegen !== nothing
+            codegen[code] = src
+        end
+        return src
+    elseif isa(src, IRCode)
+        error("IRCode is unexpected")
+    end
+    return nothing
+end
+
+function ci_get_source(interp::AbstractInterpreter, code::CodeInstance)
+    return ci_get_source(interp, code, isdefined(code, :inferred) ? code.inferred : nothing)
+end
+
+function ci_has_invoke(code::CodeInstance)
+    return (@atomic :monotonic code.invoke) !== C_NULL
+end
+
+function ci_meets_requirement(interp::AbstractInterpreter, code::CodeInstance, source_mode::UInt8)
+    source_mode == SOURCE_MODE_NOT_REQUIRED && return true
+    source_mode == SOURCE_MODE_ABI && return ci_has_abi(interp, code)
+    source_mode == SOURCE_MODE_GET_SOURCE && return ci_has_source(interp, code)
+    return false
+end
+
+# compute (and cache) an inferred AST and return type
+function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mode::UInt8)
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    let code = get(code_cache(interp), mi, nothing)
+        code isa InferenceResult && (code = code.ci)
+        if code isa CodeInstance
+            # see if this code already exists in the cache
+            if ci_meets_requirement(interp, code, source_mode)
+                ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+                return code
+            end
+        end
+    end
+    def = mi.def
+    ci = engine_reserve(interp, mi)
+    # check cache again if it is still new after reserving in the engine
+    let code = get(code_cache(interp), mi, nothing)
+        code isa InferenceResult && (code = code.ci)
+        if code isa CodeInstance
+            # see if this code already exists in the cache
+            if ci_meets_requirement(interp, code, source_mode)
+                engine_reject(interp, ci)
+                ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+                return code
+            end
+        end
+    end
+    if !InferenceParams(interp).force_enable_inference
+        if isa(def, Method) && ccall(:jl_get_module_infer, Cint, (Any,), def.module) == 0
+            src = retrieve_code_info(mi, get_inference_world(interp))
+            if src isa CodeInfo
+                finish!(interp, mi, ci, src)
+            else
+                engine_reject(interp, ci)
+            end
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            return ci
+        end
+    end
+    result = InferenceResult(mi, typeinf_lattice(interp))
+    result.ci = result.ci_as_edge = ci
+    frame = InferenceState(result, #=cache_mode=#:global, interp)
+    if frame === nothing
+        engine_reject(interp, ci)
+        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+        return nothing
+    end
+    typeinf(interp, frame)
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+
+    ci = result.ci # reload from result in case it changed
+    codegen = codegen_cache(interp)
+    @assert frame.cache_mode != CACHE_MODE_NULL
+    @assert is_result_constabi_eligible(result) || codegen === nothing || haskey(codegen, ci)
+    @assert is_result_constabi_eligible(result) == use_const_api(ci)
+    @assert isdefined(ci, :inferred) "interpreter did not fulfill our expectations"
+    return ci
+end
+
+# compute (and cache) an inferred AST and return the inferred return type
+function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector)
+    if contains_is(unwrap_unionall(atype).parameters, Union{})
+        return Union{} # don't ask: it does weird and unnecessary things, if it occurs during bootstrap
+    end
+    return typeinf_type(interp, specialize_method(method, atype, sparams))
+end
+typeinf_type(interp::AbstractInterpreter, match::MethodMatch) =
+    typeinf_type(interp, specialize_method(match))
+function typeinf_type(interp::AbstractInterpreter, mi::MethodInstance)
+    ci = typeinf_ext(interp, mi, SOURCE_MODE_NOT_REQUIRED)
+    ci isa CodeInstance || return nothing
+    return ci.rettype
+end
+
+# Resolve a call, as described by `argtype` to a single matching
+# Method and return a compilable MethodInstance for the call, if
+# it will be runtime-dispatched to exactly that MethodInstance
+function compileable_specialization_for_call(interp::AbstractInterpreter, @nospecialize(argtype))
+    mt = ccall(:jl_method_table_for, Any, (Any,), argtype)
+    if mt === nothing
+        # this would require scanning all method tables, so give up instead
+        return nothing
+    end
+
+    matches = findall(argtype, method_table(interp); limit = 1)
+    matches === nothing && return nothing
+    length(matches.matches) == 0 && return nothing
+    match = only(matches.matches)
+
+    compileable_atype = get_compileable_sig(match.method, match.spec_types, match.sparams)
+    compileable_atype === nothing && return nothing
+    if match.spec_types !== compileable_atype
+        sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), compileable_atype, match.method.sig)::SimpleVector
+        sparams = sp_[2]::SimpleVector
+        mi = specialize_method(match.method, compileable_atype, sparams)
+    else
+        mi = specialize_method(match.method, compileable_atype, match.sparams)
+    end
+
+    return mi
+end
+
+const QueueItems = Union{CodeInstance,MethodInstance,SimpleVector}
+
+struct CompilationQueue
+    tocompile::Vector{QueueItems}
+    inspected::IdSet{QueueItems}
+    interp::Union{AbstractInterpreter,Nothing}
+
+    CompilationQueue(;
+        interp::Union{AbstractInterpreter,Nothing}
+    ) = new(QueueItems[], IdSet{QueueItems}(), interp)
+
+    CompilationQueue(queue::CompilationQueue;
+        interp::Union{AbstractInterpreter,Nothing}
+    ) = new(empty!(queue.tocompile), empty!(queue.inspected), interp)
+end
+
+Base.push!(queue::CompilationQueue, item) = push!(queue.tocompile, item)
+Base.append!(queue::CompilationQueue, items) = append!(queue.tocompile, items)
+Base.pop!(queue::CompilationQueue) = pop!(queue.tocompile)
+Base.empty!(queue::CompilationQueue) = (empty!(queue.tocompile); empty!(queue.inspected))
+markinspected!(queue::CompilationQueue, item) = push!(queue.inspected, item)
+isinspected(queue::CompilationQueue, item) = item in queue.inspected
+Base.isempty(queue::CompilationQueue) = isempty(queue.tocompile)
+
+# collect a list of all code that is needed along with CodeInstance to codegen it fully
+function collectinvokes!(workqueue::CompilationQueue, ci::CodeInfo, sptypes::Vector{VarState};
+                         invokelatest_queue::Union{CompilationQueue,Nothing} = nothing)
+    src = ci.code
+    for i = 1:length(src)
+        stmt = src[i]
+        isexpr(stmt, :(=)) && (stmt = stmt.args[2])
+        if isexpr(stmt, :invoke) || isexpr(stmt, :invoke_modify)
+            edge = stmt.args[1]
+            edge isa CodeInstance && isdefined(edge, :inferred) && push!(workqueue, edge)
+        end
+
+        invokelatest_queue === nothing && continue
+        if isexpr(stmt, :call)
+            farg = stmt.args[1]
+            !applicable(argextype, farg, ci, sptypes) && continue # TODO: Why is this failing during bootstrap
+            ftyp = widenconst(argextype(farg, ci, sptypes))
+
+            if ftyp === typeof(Core.finalizer) && length(stmt.args) == 3
+                finalizer = argextype(stmt.args[2], ci, sptypes)
+                obj = argextype(stmt.args[3], ci, sptypes)
+                atype = argtypes_to_type(Any[finalizer, obj])
+            else
+                # No dynamic dispatch to resolve / enqueue
+                continue
+            end
+        elseif isexpr(stmt, :cfunction) && length(stmt.args) == 5
+            (_, f, _, at, _) = stmt.args
+            linfo = ci.parent
+
+            linfo isa MethodInstance || continue
+            at isa SimpleVector || continue
+
+            ft = argextype(f, ci, sptypes)
+            argtypes = Any[ft]
+            for i = 1:length(at)
+                push!(argtypes, sp_type_rewrap(at[i], linfo, #= isreturn =# false))
+            end
+            atype = argtypes_to_type(argtypes)
+        else
+            # TODO: handle other StmtInfo like OpaqueClosure?
+            continue
+        end
+        let workqueue = invokelatest_queue
+            # make a best-effort attempt to enqueue the relevant code for the dynamic invokelatest call
+            mi = compileable_specialization_for_call(workqueue.interp, atype)
+            mi === nothing && continue
+
+            push!(workqueue, mi)
+        end
+    end
+end
+
+function add_codeinsts_to_jit!(interp::AbstractInterpreter, ci, source_mode::UInt8)
+    source_mode == SOURCE_MODE_ABI || return ci
+    ci isa CodeInstance && !ci_has_invoke(ci) || return ci
+    codegen = codegen_cache(interp)
+    codegen === nothing && return ci
+    workqueue = CompilationQueue(; interp)
+    push!(workqueue, ci)
+    while !isempty(workqueue)
+        # ci_has_real_invoke(ci) && return ci # optimization: cease looping if ci happens to get compiled (not just jl_fptr_wait_for_compiled, but fully jl_is_compiled_codeinst)
+        callee = pop!(workqueue)
+        ci_has_invoke(callee) && continue
+        isinspected(workqueue, callee) && continue
+        src = ci_get_source(interp, callee)
+        if !isa(src, CodeInfo)
+            newcallee = typeinf_ext(workqueue.interp, callee.def, source_mode) # always SOURCE_MODE_ABI
+            if newcallee isa CodeInstance
+                callee === ci && (ci = newcallee) # ci stopped meeting the requirements after typeinf_ext last checked, try again with newcallee
+                push!(workqueue, newcallee)
+            end
+            if newcallee !== callee
+                markinspected!(workqueue, callee)
+            end
+            continue
+        end
+        markinspected!(workqueue, callee)
+        mi = get_ci_mi(callee)
+        sptypes = sptypes_from_meth_instance(mi)
+        collectinvokes!(workqueue, src, sptypes)
+        if iszero(ccall(:jl_mi_cache_has_ci, Cint, (Any, Any), mi, callee))
+            cached = ccall(:jl_get_ci_equiv, Any, (Any, UInt), callee, get_inference_world(workqueue.interp))::CodeInstance
+            if cached === callee
+                # make sure callee is gc-rooted and cached, as required by jl_add_codeinst_to_jit
+                code_cache(workqueue.interp)[mi] = callee
+            else
+                # use an existing CI from the cache, if there is available one that is compatible
+                callee === ci && (ci = cached)
+                callee = cached
+            end
+        end
+        ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), callee, src)
+    end
+    return ci
+end
+
+function typeinf_ext_toplevel(interp::AbstractInterpreter, mi::MethodInstance, source_mode::UInt8)
+    ci = typeinf_ext(interp, mi, source_mode)
+    ci = add_codeinsts_to_jit!(interp, ci, source_mode)
+    return ci
+end
+
+# This is a bridge for the C code calling `jl_typeinf_func()` on a single Method match
+function typeinf_ext_toplevel(mi::MethodInstance, world::UInt, source_mode::UInt8, trim_mode::UInt8)
+    inf_params = InferenceParams(; force_enable_inference = trim_mode != TRIM_NO)
+    interp = NativeInterpreter(world; inf_params)
+    return typeinf_ext_toplevel(interp, mi, source_mode)
+end
+
+function compile!(codeinfos::Vector{Any}, workqueue::CompilationQueue;
+    invokelatest_queue::Union{CompilationQueue,Nothing} = nothing,
+)
+    interp = workqueue.interp
+    world = get_inference_world(interp)
+    while !isempty(workqueue)
+        item = pop!(workqueue)
+        # each item in this list is either a MethodInstance indicating something
+        # to compile, or an svec(rettype, sig) describing a C-callable alias to create.
+        if item isa MethodInstance
+            isinspected(workqueue, item) && continue
+            # if this method is generally visible to the current compilation world,
+            # and this is either the primary world, or not applicable in the primary world
+            # then we want to compile and emit this
+            if item.def.primary_world <= world
+                ci = typeinf_ext(interp, item, SOURCE_MODE_GET_SOURCE)
+                ci isa CodeInstance && push!(workqueue, ci)
+            end
+            markinspected!(workqueue, item)
+        elseif item isa SimpleVector
+            invokelatest_queue === nothing && continue
+            (rt::Type, sig::Type) = item
+            # make a best-effort attempt to enqueue the relevant code for the ccallable
+            mi = ccall(:jl_get_specialization1, Any,
+                        (Any, Csize_t, Cint),
+                        sig, world, #= mt_cache =# 0)
+            if mi !== nothing
+                mi = mi::MethodInstance
+                ci = typeinf_ext(interp, mi, SOURCE_MODE_GET_SOURCE)
+                ci isa CodeInstance && push!(invokelatest_queue, ci)
+            end
+            # additionally enqueue the ccallable entrypoint / adapter, which implicitly
+            # invokes the above ci
+            push!(codeinfos, item)
+        elseif item isa CodeInstance
+            callee = item
+            isinspected(workqueue, callee) && continue
+            mi = get_ci_mi(callee)
+            # now make sure everything has source code, if desired
+            if use_const_api(callee)
+                src = codeinfo_for_const(interp, mi, WorldRange(callee.min_world, callee.max_world), callee.edges, callee.rettype_const)
+            else
+                src = get(interp.codegen, callee, nothing)
+                if src === nothing
+                    newcallee = typeinf_ext(interp, mi, SOURCE_MODE_GET_SOURCE)
+                    if newcallee isa CodeInstance
+                        @assert use_const_api(newcallee) || haskey(interp.codegen, newcallee)
+                        push!(workqueue, newcallee)
+                    end
+                    if newcallee !== callee
+                        markinspected!(workqueue, callee)
+                    end
+                    continue
+                end
+            end
+            markinspected!(workqueue, callee)
+            if src isa CodeInfo
+                sptypes = sptypes_from_meth_instance(mi)
+                collectinvokes!(workqueue, src, sptypes; invokelatest_queue)
+                # try to reuse an existing CodeInstance from before to avoid making duplicates in the cache
+                if iszero(ccall(:jl_mi_cache_has_ci, Cint, (Any, Any), mi, callee))
+                    cached = ccall(:jl_get_ci_equiv, Any, (Any, UInt), callee, world)::CodeInstance
+                    if cached === callee
+                        code_cache(interp)[mi] = callee
+                    else
+                        # Use an existing CI from the cache, if there is available one that is compatible
+                        callee = cached
+                    end
+                end
+                push!(codeinfos, callee)
+                push!(codeinfos, src)
+            end
+        else @assert false "unexpected item in queue" end
+    end
+    return codeinfos
+end
+
+# This is a bridge for the C code calling `jl_typeinf_func()` on set of Method matches
+# The trim_mode can be any of:
+const TRIM_NO = 0x0
+const TRIM_SAFE = 0x1
+const TRIM_UNSAFE = 0x2
+const TRIM_UNSAFE_WARN = 0x3
+function typeinf_ext_toplevel(methods::Vector{Any}, worlds::Vector{UInt}, trim_mode::UInt8)
+    inf_params = InferenceParams(; force_enable_inference = trim_mode != TRIM_NO)
+
+    # Create an "invokelatest" queue to enable eager compilation of speculative
+    # invokelatest calls such as from `Core.finalizer` and `ccallable`
+    invokelatest_queue = CompilationQueue(;
+        interp = NativeInterpreter(get_world_counter(); inf_params)
+    )
+
+    codeinfos = []
+    workqueue = CompilationQueue(; interp = nothing)
+    for this_world in reverse!(sort!(worlds))
+        workqueue = CompilationQueue(workqueue;
+            interp = NativeInterpreter(this_world; inf_params)
+        )
+
+        append!(workqueue, methods)
+        compile!(codeinfos, workqueue; invokelatest_queue)
+    end
+
+    if invokelatest_queue !== nothing
+        # This queue is intentionally aliased, to handle e.g. a `finalizer` calling `Core.finalizer`
+        # (it will enqueue into itself and immediately drain)
+        compile!(codeinfos, invokelatest_queue; invokelatest_queue)
+    end
+
+    if trim_mode != TRIM_NO && trim_mode != TRIM_UNSAFE
+        verify_typeinf_trim(codeinfos, trim_mode == TRIM_UNSAFE_WARN)
+    end
+    return codeinfos
+end
+
+const _verify_trim_world_age = RefValue{UInt}(typemax(UInt))
+verify_typeinf_trim(codeinfos::Vector{Any}, onlywarn::Bool) = Core._call_in_world(_verify_trim_world_age[], verify_typeinf_trim, stdout, codeinfos, onlywarn)
+
+function return_type(@nospecialize(f), t::DataType) # this method has a special tfunc
+    world = tls_world_age()
+    args = Any[_return_type, NativeInterpreter(world), Tuple{Core.Typeof(f), t.parameters...}]
+    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Any}, Cint), args, length(args))
+end
+
+function return_type(@nospecialize(f), t::DataType, world::UInt)
+    return return_type(Tuple{Core.Typeof(f), t.parameters...}, world)
+end
+
+function return_type(t::DataType)
+    world = tls_world_age()
+    return return_type(t, world)
+end
+
+function return_type(t::DataType, world::UInt)
+    args = Any[_return_type, NativeInterpreter(world), t]
+    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Any}, Cint), args, length(args))
+end
+
+function _return_type(interp::AbstractInterpreter, t::DataType)
+    rt = Union{}
+    f = singleton_type(t.parameters[1])
+    if isa(f, Builtin)
+        args = Any[t.parameters...]
+        popfirst!(args)
+        rt = builtin_tfunction(interp, f, args, nothing)
+        rt = widenconst(rt)
+    else
+        for match in _methods_by_ftype(t, -1, get_inference_world(interp))::Vector
+            ty = typeinf_type(interp, match::MethodMatch)
+            ty === nothing && return Any
+            rt = tmerge(rt, ty)
+            rt === Any && break
+        end
+    end
+    return rt
+end
diff --git a/base/compiler/typelattice.jl b/Compiler/src/typelattice.jl
similarity index 80%
rename from base/compiler/typelattice.jl
rename to Compiler/src/typelattice.jl
index 324f2b600cc44..f4c3b051d3e3f 100644
--- a/base/compiler/typelattice.jl
+++ b/Compiler/src/typelattice.jl
@@ -6,22 +6,19 @@
 
 # N.B.: Const/PartialStruct/InterConditional are defined in Core, to allow them to be used
 # inside the global code cache.
-#
-# # The type of a value might be constant
-# struct Const
-#     val
-# end
-#
-# struct PartialStruct
-#     typ
-#     fields::Vector{Any} # elements are other type lattice members
-# end
-import Core: Const, PartialStruct
-function PartialStruct(@nospecialize(typ), fields::Vector{Any})
-    for i = 1:length(fields)
-        assert_nested_slotwrapper(fields[i])
+import Core: Const, InterConditional, PartialStruct
+
+function may_form_limited_typ(@nospecialize(aty), @nospecialize(bty), @nospecialize(xty))
+    if aty isa LimitedAccuracy
+        if bty isa LimitedAccuracy
+            return LimitedAccuracy(xty, union!(copy(aty.causes), bty.causes))
+        else
+            return LimitedAccuracy(xty, copy(aty.causes))
+        end
+    elseif bty isa LimitedAccuracy
+        return LimitedAccuracy(xty, copy(bty.causes))
     end
-    return Core._PartialStruct(typ, fields)
+    return nothing
 end
 
 """
@@ -48,34 +45,21 @@ struct Conditional
     slot::Int
     thentype
     elsetype
-    function Conditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype))
+    # `isdefined` indicates this `Conditional` is from `@isdefined slot`, implying that
+    # the `undef` information of `slot` can be improved in the then branch.
+    # Since this is only beneficial for local inference, it is not translated into `InterConditional`.
+    isdefined::Bool
+    function Conditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype);
+                         isdefined::Bool=false)
         assert_nested_slotwrapper(thentype)
         assert_nested_slotwrapper(elsetype)
-        return new(slot, thentype, elsetype)
+        limited = may_form_limited_typ(thentype, elsetype, Bool)
+        limited !== nothing && return limited
+        return new(slot, thentype, elsetype, isdefined)
     end
 end
-Conditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
-    Conditional(slot_id(var), thentype, elsetype)
-
-"""
-    cnd::InterConditional
-
-Similar to `Conditional`, but conveys inter-procedural constraints imposed on call arguments.
-This is separate from `Conditional` to catch logic errors: the lattice element name is `InterConditional`
-while processing a call, then `Conditional` everywhere else. Thus `InterConditional` does not appear in
-`CompilerTypes`—these type's usages are disjoint—though we define the lattice for `InterConditional`.
-"""
-:(InterConditional)
-import Core: InterConditional
-# struct InterConditional
-#     slot::Int
-#     thentype
-#     elsetype
-#     InterConditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype)) =
-#         new(slot, thentype, elsetype)
-# end
-InterConditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
-    InterConditional(slot_id(var), thentype, elsetype)
+Conditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype); isdefined::Bool=false) =
+    Conditional(slot_id(var), thentype, elsetype; isdefined)
 
 const AnyConditional = Union{Conditional,InterConditional}
 Conditional(cnd::InterConditional) = Conditional(cnd.slot, cnd.thentype, cnd.elsetype)
@@ -114,14 +98,14 @@ struct MustAlias
         assert_nested_slotwrapper(fldtyp)
         # @assert !isalreadyconst(vartyp) "vartyp is already const"
         # @assert !isalreadyconst(fldtyp) "fldtyp is already const"
+        limited = may_form_limited_typ(vartyp, fldtyp, fldtyp)
+        limited !== nothing && return limited
         return new(slot, vartyp, fldidx, fldtyp)
     end
 end
 MustAlias(var::SlotNumber, @nospecialize(vartyp), fldidx::Int, @nospecialize(fldtyp)) =
     MustAlias(slot_id(var), vartyp, fldidx, fldtyp)
 
-_uniontypes(x::MustAlias, ts) = _uniontypes(widenconst(x), ts)
-
 """
     alias::InterMustAlias
 
@@ -137,6 +121,8 @@ struct InterMustAlias
         assert_nested_slotwrapper(fldtyp)
         # @assert !isalreadyconst(vartyp) "vartyp is already const"
         # @assert !isalreadyconst(fldtyp) "fldtyp is already const"
+        limited = may_form_limited_typ(vartyp, fldtyp, fldtyp)
+        limited !== nothing && return limited
         return new(slot, vartyp, fldidx, fldtyp)
     end
 end
@@ -159,8 +145,8 @@ end
 struct StateUpdate
     var::SlotNumber
     vtype::VarState
-    state::VarTable
     conditional::Bool
+    StateUpdate(var::SlotNumber, vtype::VarState, conditional::Bool=false) = new(var, vtype, conditional)
 end
 
 """
@@ -224,11 +210,6 @@ struct NotFound end
 
 const NOT_FOUND = NotFound()
 
-const CompilerTypes = Union{Const, Conditional, MustAlias, NotFound, PartialStruct}
-==(x::CompilerTypes, y::CompilerTypes) = x === y
-==(x::Type, y::CompilerTypes) = false
-==(x::CompilerTypes, y::Type) = false
-
 #################
 # lattice logic #
 #################
@@ -286,11 +267,17 @@ end
 
 # `Conditional` and `InterConditional` are valid in opposite contexts
 # (i.e. local inference and inter-procedural call), as such they will never be compared
-@nospecializeinfer function issubconditional(lattice::AbstractLattice, a::C, b::C) where {C<:AnyConditional}
+@nospecializeinfer issubconditional(𝕃::AbstractLattice, a::Conditional, b::Conditional) =
+    _issubconditional(𝕃, a, b, #=check_isdefined=#true)
+@nospecializeinfer issubconditional(𝕃::AbstractLattice, a::InterConditional, b::InterConditional) =
+    _issubconditional(𝕃, a, b, #=check_isdefined=#false)
+@nospecializeinfer function _issubconditional(𝕃::AbstractLattice, a::C, b::C, check_isdefined::Bool) where C<:AnyConditional
     if is_same_conditionals(a, b)
-        if ⊑(lattice, a.thentype, b.thentype)
-            if ⊑(lattice, a.elsetype, b.elsetype)
-                return true
+        if ⊑(𝕃, a.thentype, b.thentype)
+            if ⊑(𝕃, a.elsetype, b.elsetype)
+                if !check_isdefined || a.isdefined ≥ b.isdefined
+                    return true
+                end
             end
         end
     end
@@ -323,7 +310,7 @@ end
 
 @nospecializeinfer function isalreadyconst(@nospecialize t)
     isa(t, Const) && return true
-    isa(t, DataType) && isdefined(t, :instance) && return true
+    issingletontype(t) && return true
     return isconstType(t)
 end
 
@@ -350,15 +337,15 @@ end
         fields = vartyp.fields
         thenfields = thentype === Bottom ? nothing : copy(fields)
         elsefields = elsetype === Bottom ? nothing : copy(fields)
-        for i in 1:length(fields)
-            if i == fldidx
-                thenfields === nothing || (thenfields[i] = thentype)
-                elsefields === nothing || (elsefields[i] = elsetype)
-            end
+        undefs = copy(_getundefs(vartyp))
+        if 1 ≤ fldidx ≤ length(fields)
+            thenfields === nothing || (thenfields[fldidx] = thentype)
+            elsefields === nothing || (elsefields[fldidx] = elsetype)
+            undefs[fldidx] = false
         end
         return Conditional(slot,
-            thenfields === nothing ? Bottom : PartialStruct(vartyp.typ, thenfields),
-            elsefields === nothing ? Bottom : PartialStruct(vartyp.typ, elsefields))
+            thenfields === nothing ? Bottom : PartialStruct(fallback_lattice, vartyp.typ, undefs, thenfields),
+            elsefields === nothing ? Bottom : PartialStruct(fallback_lattice, vartyp.typ, undefs, elsefields))
     else
         vartyp_widened = widenconst(vartyp)
         thenfields = thentype === Bottom ? nothing : Any[]
@@ -374,8 +361,8 @@ end
             end
         end
         return Conditional(slot,
-            thenfields === nothing ? Bottom : PartialStruct(vartyp_widened, thenfields),
-            elsefields === nothing ? Bottom : PartialStruct(vartyp_widened, elsefields))
+            thenfields === nothing ? Bottom : PartialStruct(fallback_lattice, vartyp_widened, thenfields),
+            elsefields === nothing ? Bottom : PartialStruct(fallback_lattice, vartyp_widened, elsefields))
     end
 end
 
@@ -394,8 +381,8 @@ ignorelimited(typ::LimitedAccuracy) = typ.typ
 # =============
 
 @nospecializeinfer function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b))
-    r = ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b))
-    r || return false
+    ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b)) || return false
+
     isa(b, LimitedAccuracy) || return true
 
     # We've found that ignorelimited(a) ⊑ ignorelimited(b).
@@ -427,6 +414,13 @@ end
         end
         a = Bool
     elseif isa(b, ConditionalT)
+        if isa(a, Const) && isa(a.val, Bool)
+           if (a.val === true && b.thentype === Any && b.elsetype === Bottom) ||
+              (a.val === false && b.elsetype === Any && b.thentype === Bottom)
+               # this Conditional contains distinctly no lattice information, and is simply an alternative representation of the Const Bool used for internal tracking purposes
+               return true
+           end
+        end
         return false
     end
     return ⊑(widenlattice(lattice), a, b)
@@ -448,13 +442,16 @@ end
 @nospecializeinfer function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b))
     if isa(a, PartialStruct)
         if isa(b, PartialStruct)
-            if !(length(a.fields) == length(b.fields) && a.typ <: b.typ)
-                return false
-            end
-            for i in 1:length(b.fields)
+            a.typ <: b.typ || return false
+            nflds = length(a.fields)
+            nflds == length(b.fields) || return false
+            for i in 1:nflds
+                if !(_getundefs(b)[i] === nothing || _getundefs(a)[i] === _getundefs(b)[i])
+                    return false
+                end
                 af = a.fields[i]
                 bf = b.fields[i]
-                if i == length(b.fields)
+                if i == nflds
                     if isvarargtype(af)
                         # If `af` is vararg, so must bf by the <: above
                         @assert isvarargtype(bf)
@@ -472,19 +469,31 @@ end
         return isa(b, Type) && a.typ <: b
     elseif isa(b, PartialStruct)
         if isa(a, Const)
-            nf = nfields(a.val)
-            nf == length(b.fields) || return false
             widea = widenconst(a)::DataType
             wideb = widenconst(b)
             wideb′ = unwrap_unionall(wideb)::DataType
             widea.name === wideb′.name || return false
-            # We can skip the subtype check if b is a Tuple, since in that
-            # case, the ⊑ of the elements is sufficient.
-            if wideb′.name !== Tuple.name && !(widea <: wideb)
-                return false
+            if wideb′.name === Tuple.name
+                # We can skip the subtype check if b is a Tuple, since in that
+                # case, the ⊑ of the elements is sufficient.
+                # But for tuple comparisons, we need their lengths to be the same for now.
+                # TODO improve accuracy for cases when `b` contains vararg element
+                nfields(a.val) == length(b.fields) || return false
+            else
+                widea <: wideb || return false
+                # for structs we need to check that `a` does not have less information than `b` that may be partially initialized
+                n_initialized(a) ≥ n_initialized(b) || return false
             end
+            nf = nfields(a.val)
             for i in 1:nf
-                isdefined(a.val, i) || continue # since ∀ T Union{} ⊑ T
+                if !isdefined(a.val, i)
+                    _getundefs(b)[i] === false && return false # conflicting defined-ness information
+                    continue # since ∀ T Union{} ⊑ T
+                end
+                i > length(b.fields) && break # `a` has more information than `b` that is partially initialized struct
+                if _getundefs(b)[i] === true
+                    return false # conflicting defined-ness information
+                end
                 bfᵢ = b.fields[i]
                 if i == nf
                     bfᵢ = unwrapva(bfᵢ)
@@ -555,6 +564,7 @@ end
     if isa(a, PartialStruct)
         isa(b, PartialStruct) || return false
         length(a.fields) == length(b.fields) || return false
+        _getundefs(a) == _getundefs(b) || return false
         widenconst(a) == widenconst(b) || return false
         a.fields === b.fields && return true # fast path
         for i in 1:length(a.fields)
@@ -607,7 +617,7 @@ end
         if ti === widev
             return v
         end
-        valid_as_lattice(ti) || return Bottom
+        valid_as_lattice(ti, true) || return Bottom
         if widev <: Tuple
             new_fields = Vector{Any}(undef, length(v.fields))
             for i = 1:length(new_fields)
@@ -631,7 +641,7 @@ end
             return v
         end
         ti = typeintersect(widev, t)
-        valid_as_lattice(ti) || return Bottom
+        valid_as_lattice(ti, true) || return Bottom
         return PartialOpaque(ti, v.env, v.parent, v.source)
     end
     return tmeet(widenlattice(lattice), v, t)
@@ -692,7 +702,7 @@ widenconst(::AnyConditional) = Bool
 widenconst(a::AnyMustAlias) = widenconst(widenmustalias(a))
 widenconst(c::Const) = (v = c.val; isa(v, Type) ? Type{v} : typeof(v))
 widenconst(::PartialTypeVar) = TypeVar
-widenconst(t::PartialStruct) = t.typ
+widenconst(t::Core.PartialStruct) = t.typ
 widenconst(t::PartialOpaque) = t.typ
 @nospecializeinfer widenconst(@nospecialize t::Type) = t
 widenconst(::TypeVar) = error("unhandled TypeVar")
@@ -724,28 +734,6 @@ function invalidate_slotwrapper(vt::VarState, changeid::Int, ignore_conditional:
     return nothing
 end
 
-function stupdate!(lattice::AbstractLattice, state::VarTable, changes::StateUpdate)
-    changed = false
-    changeid = slot_id(changes.var)
-    for i = 1:length(state)
-        if i == changeid
-            newtype = changes.vtype
-        else
-            newtype = changes.state[i]
-        end
-        invalidated = invalidate_slotwrapper(newtype, changeid, changes.conditional)
-        if invalidated !== nothing
-            newtype = invalidated
-        end
-        oldtype = state[i]
-        if schanged(lattice, newtype, oldtype)
-            state[i] = smerge(lattice, oldtype, newtype)
-            changed = true
-        end
-    end
-    return changed
-end
-
 function stupdate!(lattice::AbstractLattice, state::VarTable, changes::VarTable)
     changed = false
     for i = 1:length(state)
@@ -759,24 +747,6 @@ function stupdate!(lattice::AbstractLattice, state::VarTable, changes::VarTable)
     return changed
 end
 
-function stupdate1!(lattice::AbstractLattice, state::VarTable, change::StateUpdate)
-    changeid = slot_id(change.var)
-    for i = 1:length(state)
-        invalidated = invalidate_slotwrapper(state[i], changeid, change.conditional)
-        if invalidated !== nothing
-            state[i] = invalidated
-        end
-    end
-    # and update the type of it
-    newtype = change.vtype
-    oldtype = state[changeid]
-    if schanged(lattice, newtype, oldtype)
-        state[changeid] = smerge(lattice, oldtype, newtype)
-        return true
-    end
-    return false
-end
-
 function stoverwrite!(state::VarTable, newstate::VarTable)
     for i = 1:length(state)
         state[i] = newstate[i]
@@ -797,3 +767,25 @@ function stoverwrite1!(state::VarTable, change::StateUpdate)
     state[changeid] = newtype
     return state
 end
+
+# The ::AbstractLattice argument is unused and simply serves to disambiguate
+# different instances of the compiler that may share the `Core.PartialStruct`
+# type.
+
+# Legacy constructor
+function Core.PartialStruct(𝕃::AbstractLattice, @nospecialize(typ), fields::Vector{Any})
+    undefs = partialstruct_init_undefs(typ, fields)
+    undefs === nothing && error("This object never exists at runtime")
+    return PartialStruct(𝕃, typ, undefs, fields)
+end
+
+function Core.PartialStruct(::AbstractLattice, @nospecialize(typ), undefs::Vector{Union{Nothing,Bool}}, fields::Vector{Any})
+    for i = 1:length(fields)
+        assert_nested_slotwrapper(fields[i])
+    end
+    return PartialStruct(typ, undefs, fields)
+end
+
+# a special getter for `PartialStruct` to achieve better type stability:
+# `(x::PartialStruct).undefs` will be lowered to `getfield(x, :undefs)::Any` otherwise
+_getundefs(p::PartialStruct) = Base.getproperty(p, :undefs)
diff --git a/base/compiler/typelimits.jl b/Compiler/src/typelimits.jl
similarity index 77%
rename from base/compiler/typelimits.jl
rename to Compiler/src/typelimits.jl
index b648144ea3bd1..ce4a2a1ccbe91 100644
--- a/base/compiler/typelimits.jl
+++ b/Compiler/src/typelimits.jl
@@ -116,15 +116,31 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
             return Union{a, b}
         end
     elseif isa(t, DataType)
-        if isType(t) # see equivalent case in type_more_complex
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
-                is_derived_type_from_any(tt, sources, depth + 1) && return t
+        if isType(t)
+            # Type is fairly important, so do not widen it as fast as other types if avoidable
+            tt = t.parameters[1]
+            ttu = unwrap_unionall(tt) # TODO: use argument_datatype(tt) after #50692 fixed
+            # must forbid nesting through this if we detect that potentially occurring
+            # we already know !is_derived_type_from_any so refuse to recurse here
+            if !isa(ttu, DataType)
+                return Type
+            elseif isType(ttu)
+                return Type{<:Type}
+            end
+            # try to peek into c to get a comparison object, but if we can't perhaps t is already simple enough on its own
+            if isType(c)
+                ct = c.parameters[1]
             else
-                isType(c) && (c = unwrap_unionall(c.parameters[1]))
-                type_more_complex(tt, c, sources, depth, 0, 0) || return t
+                ct = Union{}
             end
-            return Type
+            Qt = __limit_type_size(tt, ct, sources, depth + 1, 0)
+            Qt === tt && return t
+            Qt === Any && return Type
+            # Can't form Type{<:Qt} just yet, without first make sure we limited the depth
+            # enough, since this moves Qt outside of Type for is_derived_type_from_any
+            Qt = __limit_type_size(tt, ct, sources, depth + 2, 0)
+            Qt === Any && return Type
+            return Type{<:Qt}
         elseif isa(c, DataType)
             tP = t.parameters
             cP = c.parameters
@@ -157,10 +173,11 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
             end
         end
         if allowed_tuplelen < 1 && t.name === Tuple.name
+            # forbid nesting Tuple{Tuple{Tuple...}} through this
             return Any
         end
         widert = t.name.wrapper
-        if !(t <: widert)
+        if !(t <: widert) # XXX: we should call has_free_typevars(t) here, but usually t does not have those wrappers by the time it got here
             # This can happen when a typevar has bounds too wide for its context, e.g.
             # `Complex{T} where T` is not a subtype of `Complex`. In that case widen even
             # faster to something safe to ensure the result is a supertype of the input.
@@ -211,20 +228,22 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
         return false # Bottom is as simple as they come
     elseif isa(t, DataType) && isempty(t.parameters)
         return false # fastpath: unparameterized types are always finite
-    elseif tupledepth > 0 && is_derived_type_from_any(unwrap_unionall(t), sources, depth)
+    elseif is_derived_type_from_any(unwrap_unionall(t), sources, depth)
         return false # t isn't something new
     end
     # peel off wrappers
     isvarargtype(t) && (t = unwrapva(t))
     isvarargtype(c) && (c = unwrapva(c))
     if isa(c, UnionAll)
-        # allow wrapping type with fewer UnionAlls than comparison if in a covariant context
+        # allow wrapping type with fewer UnionAlls than comparison only if in a covariant context
         if !isa(t, UnionAll) && tupledepth == 0
             return true
         end
-        t = unwrap_unionall(t)
         c = unwrap_unionall(c)
     end
+    if isa(t, UnionAll)
+        t = unwrap_unionall(t)
+    end
     # rules for various comparison types
     if isa(c, TypeVar)
         tupledepth = 1
@@ -248,16 +267,22 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
     if isa(t, DataType)
         tP = t.parameters
         if isType(t)
-            # Treat Type{T} and T as equivalent to allow taking typeof any
-            # source type (DataType) anywhere as Type{...}, as long as it isn't
-            # nesting as Type{Type{...}}
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
-                return !is_derived_type_from_any(tt, sources, depth + 1)
+            # Type is fairly important, so do not widen it as fast as other types if avoidable
+            tt = tP[1]
+            # ttu = unwrap_unionall(tt) # TODO: use argument_datatype(tt) after #50692 fixed
+            if isType(c)
+                ct = c.parameters[1]
             else
-                isType(c) && (c = unwrap_unionall(c.parameters[1]))
-                return type_more_complex(tt, c, sources, depth, 0, 0)
+                ct = Union{}
+                tupledepth == 0 && return true # cannot allow nesting
             end
+            # allow creating variation within a nested Type, but not very deep
+            if tupledepth > 1
+                tupledepth = 1
+            else
+                tupledepth = 0
+            end
+            return type_more_complex(tt, ct, sources, depth + 1, tupledepth, 0)
         elseif isa(c, DataType) && t.name === c.name
             cP = c.parameters
             length(cP) < length(tP) && return true
@@ -270,22 +295,9 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
             else
                 tupledepth = 0
             end
-            isgenerator = (t.name.name === :Generator && t.name.module === _topmod(t.name.module))
             for i = 1:length(tP)
                 tPi = tP[i]
                 cPi = cP[i + ntail]
-                if isgenerator
-                    let tPi = unwrap_unionall(tPi),
-                        cPi = unwrap_unionall(cPi)
-                        if isa(tPi, DataType) && isa(cPi, DataType) &&
-                            !isabstracttype(tPi) && !isabstracttype(cPi) &&
-                                sym_isless(cPi.name.name, tPi.name.name)
-                            # allow collect on (anonymous) Generators to nest, provided that their functions are appropriately ordered
-                            # TODO: is there a better way?
-                            continue
-                        end
-                    end
-                end
                 type_more_complex(tPi, cPi, sources, depth + 1, tupledepth, 0) && return true
             end
             return false
@@ -298,10 +310,27 @@ union_count_abstract(x::Union) = union_count_abstract(x.a) + union_count_abstrac
 union_count_abstract(@nospecialize(x)) = !isdispatchelem(x)
 
 function issimpleenoughtype(@nospecialize t)
-    return unionlen(t) + union_count_abstract(t) <= MAX_TYPEUNION_LENGTH &&
+    ut = unwrap_unionall(t)
+    ut isa DataType && ut.name.wrapper == t && return true
+    return max(unionlen(t), union_count_abstract(t) + 1) <= MAX_TYPEUNION_LENGTH &&
            unioncomplexity(t) <= MAX_TYPEUNION_COMPLEXITY
 end
 
+# We may want to apply a stricter limit than issimpleenoughtype to
+# tupleelements individually, to try to keep the whole tuple under the limit,
+# even after complicated recursion and other operations on it elsewhere
+const issimpleenoughtupleelem = issimpleenoughtype
+
+function n_initialized(t::Const)
+    nf = nfields(t.val)
+    return something(findfirst(i::Int->!isdefined(t.val,i), 1:nf), nf+1)-1
+end
+function n_initialized(pstruct::PartialStruct)
+    undefs = _getundefs(pstruct)
+    nf = length(undefs)
+    return something(findfirst(i::Int->undefs[i]!==false, 1:nf), nf+1)-1
+end
+
 # A simplified type_more_complex query over the extended lattice
 # (assumes typeb ⊑ typea)
 @nospecializeinfer function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb))
@@ -309,6 +338,14 @@ end
     typea === typeb && return true
     if typea isa PartialStruct
         aty = widenconst(typea)
+        if typeb isa Const || typeb isa PartialStruct
+            @assert n_initialized(typea) ≤ n_initialized(typeb) "typeb ⊑ typea is assumed"
+        elseif typeb isa PartialStruct
+            @assert n_initialized(typea) ≤ n_initialized(typeb) &&
+                all(b === nothing || a === b for (a, b) in zip(_getundefs(typea), _getundefs(typeb))) "typeb ⊑ typea is assumed"
+        else
+            return false
+        end
         for i = 1:length(typea.fields)
             ai = unwrapva(typea.fields[i])
             bi = fieldtype(aty, i)
@@ -553,34 +590,39 @@ end
 
 # N.B. This can also be called with both typea::Const and typeb::Const to
 # to recover PartialStruct from `Const`s with overlapping fields.
-@nospecializeinfer function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
+@nospecializeinfer function tmerge_partial_struct(𝕃::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
     aty = widenconst(typea)
     bty = widenconst(typeb)
-    if aty === bty
-        # must have egal here, since we do not create PartialStruct for non-concrete types
-        typea_nfields = nfields_tfunc(lattice, typea)
-        typeb_nfields = nfields_tfunc(lattice, typeb)
-        isa(typea_nfields, Const) || return nothing
-        isa(typeb_nfields, Const) || return nothing
-        type_nfields = typea_nfields.val::Int
-        type_nfields === typeb_nfields.val::Int || return nothing
-        type_nfields == 0 && return nothing
-        fields = Vector{Any}(undef, type_nfields)
-        anyrefine = false
-        for i = 1:type_nfields
-            ai = getfield_tfunc(lattice, typea, Const(i))
-            bi = getfield_tfunc(lattice, typeb, Const(i))
-            # N.B.: We're assuming here that !isType(aty), because that case
-            # only arises when typea === typeb, which should have been caught
-            # before calling this.
+    if aty === bty && !isType(aty)
+        if typea isa PartialStruct
+            if typeb isa PartialStruct
+                nflds = length(typea.fields)
+                @assert nflds == length(typeb.fields)
+            else
+                nflds = length(typea.fields)
+            end
+        elseif typeb isa PartialStruct
+            nflds = length(typeb.fields)
+        else
+            nflds = fieldcount(aty)
+        end
+        nflds == 0 && return nothing
+        undefs = Union{Nothing,Bool}[nothing for _ in 1:nflds]
+        fields = Vector{Any}(undef, nflds)
+        fldmin = datatype_min_ninitialized(aty)
+        n_initialized_merged = min(n_initialized(typea), n_initialized(typeb))
+        anyrefine = n_initialized_merged > fldmin
+        for i = 1:nflds
+            ai = getfield_tfunc(𝕃, typea, Const(i))
+            bi = getfield_tfunc(𝕃, typeb, Const(i))
             ft = fieldtype(aty, i)
-            if is_lattice_equal(lattice, ai, bi) || is_lattice_equal(lattice, ai, ft)
+            if is_lattice_equal(𝕃, ai, bi) || is_lattice_equal(𝕃, ai, ft)
                 # Since ai===bi, the given type has no restrictions on complexity.
                 # and can be used to refine ft
                 tyi = ai
-            elseif is_lattice_equal(lattice, bi, ft)
+            elseif is_lattice_equal(𝕃, bi, ft)
                 tyi = bi
-            elseif (tyi′ = tmerge_field(lattice, ai, bi); tyi′ !== nothing)
+            elseif (tyi′ = tmerge_field(𝕃, ai, bi); tyi′ !== nothing)
                 # allow external lattice implementation to provide a custom field-merge strategy
                 tyi = tyi′
             else
@@ -601,12 +643,34 @@ end
                 end
             end
             fields[i] = tyi
+            if typea isa PartialStruct
+                aundefᵢ = _getundefs(typea)[i]
+                if typeb isa PartialStruct
+                    if aundefᵢ === _getundefs(typeb)[i]
+                        undefs[i] = aundefᵢ
+                    end
+                else
+                    if aundefᵢ === !isdefined(typeb.val, i)
+                        undefs[i] = aundefᵢ
+                    end
+                end
+            elseif typeb isa PartialStruct
+                bundefᵢ = _getundefs(typeb)[i]
+                if !isdefined(typea.val, i) === bundefᵢ
+                    undefs[i] = bundefᵢ
+                end
+            else
+                aundefᵢ = isdefined(typea.val, i)
+                if aundefᵢ === isdefined(typeb.val, i)
+                    undefs[i] = !aundefᵢ
+                end
+            end
             if !anyrefine
-                anyrefine = has_nontrivial_extended_info(lattice, tyi) || # extended information
-                            ⋤(lattice, tyi, ft) # just a type-level information, but more precise than the declared type
+                anyrefine = has_nontrivial_extended_info(𝕃, tyi) || # extended information
+                            ⋤(𝕃, tyi, ft) # just a type-level information, but more precise than the declared type
             end
         end
-        anyrefine && return PartialStruct(aty, fields)
+        anyrefine && return PartialStruct(𝕃, aty, undefs, fields)
     end
     return nothing
 end
@@ -669,7 +733,7 @@ end
     return tmerge(wl, typea, typeb)
 end
 
-@nospecializeinfer function tmerge(::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type))
+@nospecializeinfer function tmerge(lattice::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type))
     # it's always ok to form a Union of two concrete types
     act = isconcretetype(typea)
     bct = isconcretetype(typeb)
@@ -680,11 +744,38 @@ end
     if (act || isType(typea)) && (bct || isType(typeb))
         return Union{typea, typeb}
     end
-    typea <: typeb && return typeb
-    typeb <: typea && return typea
+    u = tmerge_fast_path(lattice, typea, typeb)
+    u === nothing || return u
     return tmerge_types_slow(typea, typeb)
 end
 
+@nospecializeinfer @noinline function tname_intersect(aname::Core.TypeName, bname::Core.TypeName)
+    aname === bname && return aname
+    if !isabstracttype(aname.wrapper) && !isabstracttype(bname.wrapper)
+        return nothing # fast path
+    end
+    Any.name === aname && return aname
+    a = unwrap_unionall(aname.wrapper)
+    heighta = 0
+    while a !== Any
+        heighta += 1
+        a = a.super
+    end
+    b = unwrap_unionall(bname.wrapper)
+    heightb = 0
+    while b !== Any
+        b.name === aname && return aname
+        heightb += 1
+        b = b.super
+    end
+    a = unwrap_unionall(aname.wrapper)
+    while heighta > heightb
+        a = a.super
+        heighta -= 1
+    end
+    return a.name === bname ? bname : nothing
+end
+
 @nospecializeinfer @noinline function tmerge_types_slow(@nospecialize(typea::Type), @nospecialize(typeb::Type))
     # collect the list of types from past tmerge calls returning Union
     # and then reduce over that list
@@ -708,73 +799,97 @@ end
     # see if any of the union elements have the same TypeName
     # in which case, simplify this tmerge by replacing it with
     # the widest possible version of itself (the wrapper)
+    simplify = falses(length(types))
     for i in 1:length(types)
+        typenames[i] === Any.name && continue
         ti = types[i]
         for j in (i + 1):length(types)
-            if typenames[i] === typenames[j]
+            typenames[j] === Any.name && continue
+            ijname = tname_intersect(typenames[i], typenames[j])
+            if !(ijname === nothing)
                 tj = types[j]
                 if ti <: tj
                     types[i] = Union{}
                     typenames[i] = Any.name
+                    simplify[i] = false
+                    simplify[j] = true
                     break
                 elseif tj <: ti
                     types[j] = Union{}
                     typenames[j] = Any.name
+                    simplify[j] = false
+                    simplify[i] = true
                 else
-                    if typenames[i] === Tuple.name
+                    if ijname === Tuple.name
                         # try to widen Tuple slower: make a single non-concrete Tuple containing both
                         # converge the Tuple element-wise if they are the same length
                         # see 4ee2b41552a6bc95465c12ca66146d69b354317b, be59686f7613a2ccfd63491c7b354d0b16a95c05,
                         widen = tuplemerge(unwrap_unionall(ti)::DataType, unwrap_unionall(tj)::DataType)
                         widen = rewrap_unionall(rewrap_unionall(widen, ti), tj)
+                        simplify[j] = false
                     else
-                        wr = typenames[i].wrapper
+                        wr = ijname.wrapper
                         uw = unwrap_unionall(wr)::DataType
                         ui = unwrap_unionall(ti)::DataType
+                        while ui.name !== ijname
+                            ui = ui.super
+                        end
                         uj = unwrap_unionall(tj)::DataType
-                        merged = wr
+                        while uj.name !== ijname
+                            uj = uj.super
+                        end
+                        p = Vector{Any}(undef, length(uw.parameters))
+                        usep = true
+                        widen = wr
                         for k = 1:length(uw.parameters)
                             ui_k = ui.parameters[k]
                             if ui_k === uj.parameters[k] && !has_free_typevars(ui_k)
-                                merged = merged{ui_k}
+                                p[k] = ui_k
+                                usep = true
                             else
-                                merged = merged{uw.parameters[k]}
+                                p[k] = uw.parameters[k]
                             end
                         end
-                        widen = rewrap_unionall(merged, wr)
+                        if usep
+                            widen = rewrap_unionall(wr{p...}, wr)
+                            widen <: wr || (widen = wr) # sometimes there are cross-constraints on wr that we may lose in this process, but that would cause future calls to this to need to return Any, which is undesirable
+                        end
+                        simplify[j] = !usep
                     end
                     types[i] = Union{}
                     typenames[i] = Any.name
+                    simplify[i] = false
                     types[j] = widen
+                    typenames[j] = ijname
                     break
                 end
             end
         end
     end
-    u = Union{types...}
-    # don't let type unions get too big, if the above didn't reduce it enough
-    if issimpleenoughtype(u)
-        return u
-    end
-    # don't let the slow widening of Tuple cause the whole type to grow too fast
+    # don't let elements of the union get too big, if the above didn't reduce something enough
+    # Specifically widen Tuple{..., Union{lots of stuff}...} to Tuple{..., Any, ...}
+    # Don't let Val{<:Val{<:Val}} keep nesting abstract levels either
     for i in 1:length(types)
+        simplify[i] || continue
+        ti = types[i]
+        issimpleenoughtype(ti) && continue
         if typenames[i] === Tuple.name
-            widen = unwrap_unionall(types[i])
-            if isa(widen, DataType) && !isvatuple(widen)
-                widen = NTuple{length(widen.parameters), Any}
-            else
-                widen = Tuple
+            # otherwise we need to do a simple version of tuplemerge for one element now
+            tip = (unwrap_unionall(ti)::DataType).parameters
+            lt = length(tip)
+            p = Vector{Any}(undef, lt)
+            for j = 1:lt
+                ui = tip[j]
+                p[j] = issimpleenoughtupleelem(unwrapva(ui)) ? ui : isvarargtype(ui) ? Vararg : Any
             end
-            types[i] = widen
-            u = Union{types...}
-            if issimpleenoughtype(u)
-                return u
-            end
-            break
+            types[i] = rewrap_unionall(Tuple{p...}, ti)
+        else
+            # this element is not simple enough yet, make it so now
+            types[i] = typenames[i].wrapper
         end
     end
-    # finally, just return the widest possible type
-    return Any
+    u = Union{types...}
+    return u
 end
 
 # the inverse of switchtupleunion, with limits on max element union size
@@ -796,7 +911,7 @@ function tuplemerge(a::DataType, b::DataType)
     p = Vector{Any}(undef, lt + vt)
     for i = 1:lt
         ui = Union{ap[i], bp[i]}
-        p[i] = issimpleenoughtype(ui) ? ui : Any
+        p[i] = issimpleenoughtupleelem(ui) ? ui : Any
     end
     # merge the remaining tail into a single, simple Tuple{Vararg{T}} (#22120)
     if vt
@@ -814,8 +929,10 @@ function tuplemerge(a::DataType, b::DataType)
                 #   or (equivalently?) iteratively took super-types until reaching a common wrapper
                 #   e.g. consider the results of `tuplemerge(Tuple{Complex}, Tuple{Number, Int})` and of
                 #   `tuplemerge(Tuple{Int}, Tuple{String}, Tuple{Int, String})`
-                if !(ti <: tail)
-                    if tail <: ti
+                #   c.f. tname_intersect in the algorithm above
+                hasfree = has_free_typevars(ti)
+                if hasfree || !(ti <: tail)
+                    if !hasfree && tail <: ti
                         tail = ti # widen to ti
                     else
                         uw = unwrap_unionall(tail)
@@ -843,11 +960,16 @@ function tuplemerge(a::DataType, b::DataType)
                         end
                     end
                 end
-                tail === Any && return Tuple # short-circuit loop
+                tail === Any && return Tuple # short-circuit loops
             end
         end
         @assert !(tail === Union{})
-        p[lt + 1] = Vararg{tail}
+        if !issimpleenoughtupleelem(tail) || tail === Any
+            p[lt + 1] = Vararg
+            lt == 0 && return Tuple
+        else
+            p[lt + 1] = Vararg{tail}
+        end
     end
     return Tuple{p...}
 end
diff --git a/base/compiler/types.jl b/Compiler/src/types.jl
similarity index 58%
rename from base/compiler/types.jl
rename to Compiler/src/types.jl
index c53256c61ace9..6f0cb51456209 100644
--- a/base/compiler/types.jl
+++ b/Compiler/src/types.jl
@@ -1,5 +1,18 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+const WorkThunk = Any
+# #@eval struct WorkThunk
+#    thunk::Core.OpaqueClosure{Tuple{Vector{Tasks}}, Bool}
+#    WorkThunk(work) = new($(Expr(:opaque_closure, :(Tuple{Vector{Tasks}}), :Bool, :Bool, :((tasks) -> work(tasks))))) # @opaque Vector{Tasks}->Bool (tasks)->work(tasks)
+# end
+# (p::WorkThunk)() = p.thunk()
+
+# This corresponds to the type of `CodeInfo`'s `inlining_cost` field
+const InlineCostType = UInt16
+const MAX_INLINE_COST = typemax(InlineCostType)
+const MIN_INLINE_COST = InlineCostType(10)
+const MaybeCompressed = Union{CodeInfo, String}
+
 """
     AbstractInterpreter
 
@@ -12,14 +25,20 @@ If `interp::NewInterpreter` is an `AbstractInterpreter`, it is expected to provi
 the following methods to satisfy the `AbstractInterpreter` API requirement:
 - `InferenceParams(interp::NewInterpreter)` - return an `InferenceParams` instance
 - `OptimizationParams(interp::NewInterpreter)` - return an `OptimizationParams` instance
-- `get_world_counter(interp::NewInterpreter)` - return the world age for this interpreter
+- `get_inference_world(interp::NewInterpreter)` - return the world age for this interpreter
 - `get_inference_cache(interp::NewInterpreter)` - return the local inference cache
-- `code_cache(interp::NewInterpreter)` - return the global inference cache
+- `cache_owner(interp::NewInterpreter)` - return the owner of any new cache entries
+
+If `CodeInstance`s compiled using `interp::NewInterpreter` are meant to be executed with `invoke`,
+a method `codegen_cache(interp::NewInterpreter) -> IdDict{CodeInstance, CodeInfo}` must be defined,
+and inference must be triggered via `typeinf_ext_toplevel` with source mode `SOURCE_MODE_ABI`.
 """
-:(AbstractInterpreter)
+abstract type AbstractInterpreter end
 
 abstract type AbstractLattice end
 
+struct InvalidIRError <: Exception end
+
 struct ArgInfo
     fargs::Union{Nothing,Vector{Any}}
     argtypes::Vector{Any}
@@ -31,13 +50,17 @@ struct StmtInfo
     need thus not be computed.
     """
     used::Bool
+    saw_latestworld::Bool
 end
 
-struct MethodInfo
+struct SpecInfo
+    nargs::Int
+    isva::Bool
     propagate_inbounds::Bool
     method_for_inference_limit_heuristics::Union{Nothing,Method}
 end
-MethodInfo(src::CodeInfo) = MethodInfo(
+SpecInfo(src::CodeInfo) = SpecInfo(
+    Int(src.nargs), src.isva,
     src.propagate_inbounds,
     src.method_for_inference_limit_heuristics::Union{Nothing,Method})
 
@@ -57,38 +80,77 @@ struct VarState
     VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
 end
 
-abstract type ForwardableArgtypes end
+struct AnalysisResults
+    result
+    next::AnalysisResults
+    AnalysisResults(@nospecialize(result), next::AnalysisResults) = new(result, next)
+    AnalysisResults(@nospecialize(result)) = new(result)
+    # NullAnalysisResults() = new(nothing)
+    # global const NULL_ANALYSIS_RESULTS = NullAnalysisResults()
+end
+const NULL_ANALYSIS_RESULTS = AnalysisResults(nothing)
+
+# Abstract type for call inference results that can be stored in CallInfo
+# This is defined here so that InferenceResult can inherit from it
+abstract type InferredCallResult end
 
 """
-    InferenceResult(linfo::MethodInstance, [argtypes::ForwardableArgtypes, 𝕃::AbstractLattice])
+    result::InferenceResult
 
 A type that represents the result of running type inference on a chunk of code.
-
-See also [`matching_cache_argtypes`](@ref).
+There are two constructor available:
+- `InferenceResult(mi::MethodInstance, [𝕃::AbstractLattice])` for regular inference,
+  without extended lattice information included in `result.argtypes`.
+- `InferenceResult(mi::MethodInstance, argtypes::Vector{Any}, overridden_by_const::BitVector)`
+  for constant inference, with extended lattice information included in `result.argtypes`.
 """
-mutable struct InferenceResult
+mutable struct InferenceResult <: InferredCallResult
+    #=== constant fields ===#
     const linfo::MethodInstance
     const argtypes::Vector{Any}
-    const overridden_by_const::BitVector
-    result                   # extended lattice element if inferred, nothing otherwise
-    src                      # ::Union{CodeInfo, IRCode, OptimizationState} if inferred copy is available, nothing otherwise
-    valid_worlds::WorldRange # if inference and optimization is finished
-    ipo_effects::Effects     # if inference is finished
-    effects::Effects         # if optimization is finished
-    argescapes               # ::ArgEscapeCache if optimized, nothing otherwise
-    must_be_codeinf::Bool    # if this must come out as CodeInfo or leaving it as IRCode is ok
-    function InferenceResult(linfo::MethodInstance, cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-        # def = linfo.def
-        # nargs = def isa Method ? Int(def.nargs) : 0
-        # @assert length(cache_argtypes) == nargs
-        return new(linfo, cache_argtypes, overridden_by_const, nothing, nothing,
-            WorldRange(), Effects(), Effects(), nothing, true)
+    const overridden_by_const::Union{Nothing,BitVector}
+
+    #=== mutable fields ===#
+    result                            # extended lattice element if inferred, nothing otherwise
+    exc_result                        # like `result`, but for the thrown value
+    src                               # ::Union{CodeInfo, IRCode, OptimizationState} if inferred copy is available, nothing otherwise
+    valid_worlds::WorldRange          # if inference and optimization is finished
+    ipo_effects::Effects              # if inference is finished
+    effects::Effects                  # if optimization is finished
+    analysis_results::AnalysisResults # AnalysisResults with e.g. result::ArgEscapeCache if optimized, otherwise NULL_ANALYSIS_RESULTS
+    tombstone::Bool
+
+    #=== uninitialized fields ===#
+    ci::CodeInstance                  # CodeInstance that will contain the result in full
+    ci_as_edge::CodeInstance          # CodeInstance, that is preferred just for use when representing the result as the edge
+    function InferenceResult(mi::MethodInstance, argtypes::Vector{Any}, overridden_by_const::Union{Nothing,BitVector})
+        result = exc_result = src = nothing
+        valid_worlds = WorldRange()
+        ipo_effects = effects = Effects()
+        analysis_results = NULL_ANALYSIS_RESULTS
+        return new(mi, argtypes, overridden_by_const, result, exc_result, src,
+            valid_worlds, ipo_effects, effects, analysis_results, false)
     end
 end
-InferenceResult(linfo::MethodInstance, 𝕃::AbstractLattice=fallback_lattice) =
-    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo)...)
-InferenceResult(linfo::MethodInstance, argtypes::ForwardableArgtypes, 𝕃::AbstractLattice=fallback_lattice) =
-    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo, argtypes)...)
+function InferenceResult(mi::MethodInstance, 𝕃::AbstractLattice=fallback_lattice)
+    argtypes = matching_cache_argtypes(𝕃, mi)
+    return InferenceResult(mi, argtypes, #=overridden_by_const=#nothing)
+end
+
+function stack_analysis_result!(inf_result::InferenceResult, @nospecialize(result))
+    return inf_result.analysis_results = AnalysisResults(result, inf_result.analysis_results)
+end
+
+function traverse_analysis_results(callback, (;analysis_results)::Union{InferenceResult,CodeInstance})
+    analysis_results isa AnalysisResults || return nothing
+    while isdefined(analysis_results, :next)
+        if (result = callback(analysis_results.result)) !== nothing
+            return result
+        end
+        analysis_results = analysis_results.next
+    end
+    return nothing
+end
 
 """
     inf_params::InferenceParams
@@ -131,17 +193,16 @@ Parameters that control abstract interpretation-based type inference operation.
   information available. [`Base.@constprop :aggressive`](@ref Base.@constprop) can have a
   more fine-grained control on this configuration with per-method annotation basis.
 ---
-- `inf_params.unoptimize_throw_blocks::Bool = true`\\
-  If `true`, skips inferring calls that are in a block that is known to `throw`.
-  It may improve the compiler latency without sacrificing the runtime performance
-  in common situations.
----
 - `inf_params.assume_bindings_static::Bool = false`\\
   If `true`, assumes that no new bindings will be added, i.e. a non-existing binding at
   inference time can be assumed to always not exist at runtime (and thus e.g. any access to
   it will `throw`). Defaults to `false` since this assumption does not hold in Julia's
   semantics for native code execution.
 ---
+- `inf_params.force_enable_inference::Bool = false`\\
+  If `true`, inference will be performed on functions regardless of whether it was disabled
+  at the module level via `Base.Experimental.@compiler_options`.
+---
 """
 struct InferenceParams
     max_methods::Int
@@ -151,9 +212,9 @@ struct InferenceParams
     tuple_complexity_limit_depth::Int
     ipo_constant_propagation::Bool
     aggressive_constant_propagation::Bool
-    unoptimize_throw_blocks::Bool
     assume_bindings_static::Bool
     ignore_recursion_hardlimit::Bool
+    force_enable_inference::Bool
 
     function InferenceParams(
         max_methods::Int,
@@ -163,9 +224,10 @@ struct InferenceParams
         tuple_complexity_limit_depth::Int,
         ipo_constant_propagation::Bool,
         aggressive_constant_propagation::Bool,
-        unoptimize_throw_blocks::Bool,
         assume_bindings_static::Bool,
-        ignore_recursion_hardlimit::Bool)
+        ignore_recursion_hardlimit::Bool,
+        force_enable_inference::Bool,
+    )
         return new(
             max_methods,
             max_union_splitting,
@@ -174,23 +236,25 @@ struct InferenceParams
             tuple_complexity_limit_depth,
             ipo_constant_propagation,
             aggressive_constant_propagation,
-            unoptimize_throw_blocks,
             assume_bindings_static,
-            ignore_recursion_hardlimit)
+            ignore_recursion_hardlimit,
+            force_enable_inference,
+        )
     end
 end
 function InferenceParams(
     params::InferenceParams = InferenceParams( # default constructor
-        #=max_methods::Int=# 3,
+        #=max_methods::Int=# BuildSettings.MAX_METHODS,
         #=max_union_splitting::Int=# 4,
         #=max_apply_union_enum::Int=# 8,
         #=max_tuple_splat::Int=# 32,
         #=tuple_complexity_limit_depth::Int=# 3,
         #=ipo_constant_propagation::Bool=# true,
         #=aggressive_constant_propagation::Bool=# false,
-        #=unoptimize_throw_blocks::Bool=# true,
         #=assume_bindings_static::Bool=# false,
-        #=ignore_recursion_hardlimit::Bool=# false);
+        #=ignore_recursion_hardlimit::Bool=# false,
+        #=force_enable_inference::Bool=# false
+    );
     max_methods::Int = params.max_methods,
     max_union_splitting::Int = params.max_union_splitting,
     max_apply_union_enum::Int = params.max_apply_union_enum,
@@ -198,9 +262,10 @@ function InferenceParams(
     tuple_complexity_limit_depth::Int = params.tuple_complexity_limit_depth,
     ipo_constant_propagation::Bool = params.ipo_constant_propagation,
     aggressive_constant_propagation::Bool = params.aggressive_constant_propagation,
-    unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks,
     assume_bindings_static::Bool = params.assume_bindings_static,
-    ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit)
+    ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit,
+    force_enable_inference::Bool = params.force_enable_inference,
+)
     return InferenceParams(
         max_methods,
         max_union_splitting,
@@ -209,9 +274,10 @@ function InferenceParams(
         tuple_complexity_limit_depth,
         ipo_constant_propagation,
         aggressive_constant_propagation,
-        unoptimize_throw_blocks,
         assume_bindings_static,
-        ignore_recursion_hardlimit)
+        ignore_recursion_hardlimit,
+        force_enable_inference,
+    )
 end
 
 """
@@ -234,10 +300,6 @@ Parameters that control optimizer operation.
   tuple return types (in hopes of splitting it up). `opt_params.inline_tupleret_bonus` will
   be added to `opt_params.inline_cost_threshold` when making inlining decision.
 ---
-- `opt_params.inline_error_path_cost::Int = 20`\\
-  Specifies the penalty cost for an un-optimized dynamic call in a block that is known to
-  `throw`. See also [`(inf_params::InferenceParams).unoptimize_throw_blocks`](@ref InferenceParams).
----
 - `opt_params.max_tuple_splat::Int = 32`\\
   When attempting to inline `Core._apply_iterate`, abort the optimization if the tuple
   contains more than this many elements.
@@ -253,35 +315,40 @@ Parameters that control optimizer operation.
   optimizer license to move side effects (that are proven not observed within a particular
   code path) across a throwing call. Defaults to `false`.
 ---
+- `opt_params.preserve_local_sources::Bool = false`\\
+  If `true`, the inliner is restricted from modifying locally-cached sources that are
+  retained in `CallInfo` objects and always makes their copies before inlining them into
+  caller context. Defaults to `false`.
+---
 """
 struct OptimizationParams
     inlining::Bool
     inline_cost_threshold::Int
     inline_nonleaf_penalty::Int
     inline_tupleret_bonus::Int
-    inline_error_path_cost::Int
     max_tuple_splat::Int
     compilesig_invokes::Bool
     assume_fatal_throw::Bool
+    preserve_local_sources::Bool
 
     function OptimizationParams(
         inlining::Bool,
         inline_cost_threshold::Int,
         inline_nonleaf_penalty::Int,
         inline_tupleret_bonus::Int,
-        inline_error_path_cost::Int,
         max_tuple_splat::Int,
         compilesig_invokes::Bool,
-        assume_fatal_throw::Bool)
+        assume_fatal_throw::Bool,
+        preserve_local_sources::Bool)
         return new(
             inlining,
             inline_cost_threshold,
             inline_nonleaf_penalty,
             inline_tupleret_bonus,
-            inline_error_path_cost,
             max_tuple_splat,
             compilesig_invokes,
-            assume_fatal_throw)
+            assume_fatal_throw,
+            preserve_local_sources)
     end
 end
 function OptimizationParams(
@@ -290,27 +357,27 @@ function OptimizationParams(
         #=inline_cost_threshold::Int=# 100,
         #=inline_nonleaf_penalty::Int=# 1000,
         #=inline_tupleret_bonus::Int=# 250,
-        #=inline_error_path_cost::Int=# 20,
         #=max_tuple_splat::Int=# 32,
         #=compilesig_invokes::Bool=# true,
-        #=assume_fatal_throw::Bool=# false);
+        #=assume_fatal_throw::Bool=# false,
+        #=preserve_local_sources::Bool=# false);
     inlining::Bool = params.inlining,
     inline_cost_threshold::Int = params.inline_cost_threshold,
     inline_nonleaf_penalty::Int = params.inline_nonleaf_penalty,
     inline_tupleret_bonus::Int = params.inline_tupleret_bonus,
-    inline_error_path_cost::Int = params.inline_error_path_cost,
     max_tuple_splat::Int = params.max_tuple_splat,
     compilesig_invokes::Bool = params.compilesig_invokes,
-    assume_fatal_throw::Bool = params.assume_fatal_throw)
+    assume_fatal_throw::Bool = params.assume_fatal_throw,
+    preserve_local_sources::Bool = params.preserve_local_sources)
     return OptimizationParams(
         inlining,
         inline_cost_threshold,
         inline_nonleaf_penalty,
         inline_tupleret_bonus,
-        inline_error_path_cost,
         max_tuple_splat,
         compilesig_invokes,
-        assume_fatal_throw)
+        assume_fatal_throw,
+        preserve_local_sources)
 end
 
 """
@@ -327,82 +394,46 @@ struct NativeInterpreter <: AbstractInterpreter
 
     # Cache of inference results for this particular interpreter
     inf_cache::Vector{InferenceResult}
+    codegen::IdDict{CodeInstance,CodeInfo}
 
     # Parameters for inference and optimization
     inf_params::InferenceParams
     opt_params::OptimizationParams
-
-    # a boolean flag to indicate if this interpreter is performing semi concrete interpretation
-    irinterp::Bool
 end
 
 function NativeInterpreter(world::UInt = get_world_counter();
                            inf_params::InferenceParams = InferenceParams(),
                            opt_params::OptimizationParams = OptimizationParams())
+    curr_max_world = get_world_counter()
     # Sometimes the caller is lazy and passes typemax(UInt).
     # we cap it to the current world age for correctness
     if world == typemax(UInt)
-        world = get_world_counter()
+        world = curr_max_world
     end
-
     # If they didn't pass typemax(UInt) but passed something more subtly
     # incorrect, fail out loudly.
-    @assert world <= get_world_counter()
-
+    @assert world <= curr_max_world
     method_table = CachedMethodTable(InternalMethodTable(world))
-
     inf_cache = Vector{InferenceResult}() # Initially empty cache
-
-    return NativeInterpreter(world, method_table, inf_cache, inf_params, opt_params, #=irinterp=#false)
-end
-
-function NativeInterpreter(interp::NativeInterpreter;
-                           world::UInt = interp.world,
-                           method_table::CachedMethodTable{InternalMethodTable} = interp.method_table,
-                           inf_cache::Vector{InferenceResult} = interp.inf_cache,
-                           inf_params::InferenceParams = interp.inf_params,
-                           opt_params::OptimizationParams = interp.opt_params,
-                           irinterp::Bool = interp.irinterp)
-    return NativeInterpreter(world, method_table, inf_cache, inf_params, opt_params, irinterp)
+    codegen = IdDict{CodeInstance,CodeInfo}()
+    return NativeInterpreter(world, method_table, inf_cache, codegen, inf_params, opt_params)
 end
 
 # Quickly and easily satisfy the AbstractInterpreter API contract
 InferenceParams(interp::NativeInterpreter) = interp.inf_params
 OptimizationParams(interp::NativeInterpreter) = interp.opt_params
-get_world_counter(interp::NativeInterpreter) = interp.world
+get_inference_world(interp::NativeInterpreter) = interp.world
 get_inference_cache(interp::NativeInterpreter) = interp.inf_cache
-code_cache(interp::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, get_world_counter(interp))
-
-"""
-    already_inferred_quick_test(::AbstractInterpreter, ::MethodInstance)
-
-For the `NativeInterpreter`, we don't need to do an actual cache query to know if something
-was already inferred. If we reach this point, but the inference flag has been turned off,
-then it's in the cache. This is purely for a performance optimization.
-"""
-already_inferred_quick_test(interp::NativeInterpreter, mi::MethodInstance) = !mi.inInference
-already_inferred_quick_test(interp::AbstractInterpreter, mi::MethodInstance) = false
+cache_owner(::NativeInterpreter) = nothing
 
-"""
-    lock_mi_inference(::AbstractInterpreter, mi::MethodInstance)
-
-Hint that `mi` is in inference to help accelerate bootstrapping.
-This is particularly used by `NativeInterpreter` and helps us limit the amount of wasted
-work we might do when inference is working on initially inferring itself by letting us
-detect when inference is already in progress and not running a second copy on it.
-This creates a data-race, but the entry point into this code from C (`jl_type_infer`)
-already includes detection and restriction on recursion, so it is hopefully mostly a
-benign problem, since it should really only happen during the first phase of bootstrapping
-that we encounter this flag.
-"""
-lock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = true; nothing)
-lock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
+engine_reserve(interp::AbstractInterpreter, mi::MethodInstance) = engine_reserve(mi, cache_owner(interp))
+engine_reserve(mi::MethodInstance, @nospecialize owner) = ccall(:jl_engine_reserve, Any, (Any, Any), mi, owner)::CodeInstance
+# engine_fulfill(::AbstractInterpreter, ci::CodeInstance, src::CodeInfo) = ccall(:jl_engine_fulfill, Cvoid, (Any, Any), ci, src) # currently the same as engine_reject, so just use that one
+engine_reject(::AbstractInterpreter, ci::CodeInstance) = ccall(:jl_engine_fulfill, Cvoid, (Any, Ptr{Cvoid}), ci, C_NULL)
 
-"""
-See `lock_mi_inference`.
-"""
-unlock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = false; nothing)
-unlock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
+function already_inferred_quick_test end
+function lock_mi_inference end
+function unlock_mi_inference end
 
 """
     add_remark!(::AbstractInterpreter, sv::InferenceState, remark)
@@ -416,18 +447,30 @@ function add_remark! end
 may_optimize(::AbstractInterpreter) = true
 may_compress(::AbstractInterpreter) = true
 may_discard_trees(::AbstractInterpreter) = true
-verbose_stmt_info(::AbstractInterpreter) = false
 
 """
-    method_table(interp::AbstractInterpreter) -> MethodTableView
+    method_table(interp::AbstractInterpreter)::MethodTableView
 
 Returns a method table this `interp` uses for method lookup.
 External `AbstractInterpreter` can optionally return `OverlayMethodTable` here
 to incorporate customized dispatches for the overridden methods.
 """
-method_table(interp::AbstractInterpreter) = InternalMethodTable(get_world_counter(interp))
+method_table(interp::AbstractInterpreter) = InternalMethodTable(get_inference_world(interp))
 method_table(interp::NativeInterpreter) = interp.method_table
 
+"""
+    codegen_cache(interp::AbstractInterpreter) -> Union{Nothing, IdDict{CodeInstance, CodeInfo}}
+
+Optionally return a cache associating a `CodeInfo` to a `CodeInstance` that should be added to the JIT
+for future execution via `invoke(f, ::CodeInstance, args...)`. This cache is used during `typeinf_ext_toplevel`,
+and may be safely discarded between calls to this function.
+
+By default, a value of `nothing` is returned indicating that `CodeInstance`s should not be added to the JIT.
+Attempting to execute them via `invoke` will result in an error.
+"""
+codegen_cache(::AbstractInterpreter) = nothing
+codegen_cache(interp::NativeInterpreter) = interp.codegen
+
 """
 By default `AbstractInterpreter` implements the following inference bail out logic:
 - `bail_out_toplevel_call(::AbstractInterpreter, sig, ::InferenceState)`: bail out from
@@ -457,44 +500,70 @@ typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.i
 ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance)
 optimizer_lattice(::AbstractInterpreter) = SimpleInferenceLattice.instance
 
-typeinf_lattice(interp::NativeInterpreter) = interp.irinterp ?
-    InferenceLattice(SimpleInferenceLattice.instance) :
-    InferenceLattice(BaseInferenceLattice.instance)
-ipo_lattice(interp::NativeInterpreter) = interp.irinterp ?
-    InferenceLattice(SimpleInferenceLattice.instance) :
-    InferenceLattice(IPOResultLattice.instance)
-optimizer_lattice(interp::NativeInterpreter) = SimpleInferenceLattice.instance
+struct OverlayCodeCache{Cache}
+    globalcache::Cache
+    localcache::Vector{InferenceResult}
+end
 
-"""
-    switch_to_irinterp(interp::AbstractInterpreter) -> irinterp::AbstractInterpreter
+setindex!(cache::OverlayCodeCache, ci::CodeInstance, mi::MethodInstance) = (setindex!(cache.globalcache, ci, mi); cache)
 
-This interface allows `ir_abstract_constant_propagation` to convert `interp` to a new
-`irinterp::AbstractInterpreter` to perform semi-concrete interpretation.
-`NativeInterpreter` uses this interface to switch its lattice to `optimizer_lattice` during
-semi-concrete interpretation on `IRCode`.
-"""
-switch_to_irinterp(interp::AbstractInterpreter) = interp
-switch_to_irinterp(interp::NativeInterpreter) = NativeInterpreter(interp; irinterp=true)
+haskey(cache::OverlayCodeCache, mi::MethodInstance) = get(cache, mi, nothing) !== nothing
 
-"""
-    switch_from_irinterp(irinterp::AbstractInterpreter) -> interp::AbstractInterpreter
+function get(cache::OverlayCodeCache, mi::MethodInstance, default)
+    for cached_result in Iterators.reverse(cache.localcache)
+        cached_result.tombstone && continue # ignore deleted entries (due to LimitedAccuracy)
+        cached_result.linfo === mi || continue
+        cached_result.overridden_by_const === nothing || continue
+        isdefined(cached_result, :ci) || continue
+        ci = cached_result.ci
+        isdefined(ci, :inferred) || continue
+        return cached_result
+    end
+    return get(cache.globalcache, mi, default)
+end
 
-The inverse operation of `switch_to_irinterp`, allowing `typeinf` to convert `irinterp` back
-to a new `interp::AbstractInterpreter` to perform ordinary abstract interpretation.
-"""
-switch_from_irinterp(irinterp::AbstractInterpreter) = irinterp
-switch_from_irinterp(irinterp::NativeInterpreter) = NativeInterpreter(irinterp; irinterp=false)
+function getindex(cache::OverlayCodeCache, mi::MethodInstance)
+    r = get(cache, mi, nothing)
+    r === nothing && throw(KeyError(mi))
+    return r
+end
+
+code_cache(interp::AbstractInterpreter, #=extended_range=#::WorldRange) = code_cache(interp)
+
+function code_cache(interp::AbstractInterpreter)
+    cache = InternalCodeCache(cache_owner(interp), get_inference_world(interp))
+    return OverlayCodeCache(cache, get_inference_cache(interp))
+end
+
+function code_cache(interp::NativeInterpreter, extended_range::WorldRange)
+    @assert get_inference_world(interp) in extended_range
+    cache = InternalCodeCache(cache_owner(interp), extended_range)
+    return OverlayCodeCache(cache, get_inference_cache(interp))
+end
+
+get_escape_cache(interp::AbstractInterpreter) = GetNativeEscapeCache(interp)
 
 abstract type CallInfo end
 
 @nospecialize
 
+function add_edges!(edges::Vector{Any}, info::CallInfo)
+    if info === NoCallInfo()
+        return nothing # just a minor optimization to avoid dynamic dispatch
+    end
+    add_edges_impl(edges, info)
+    nothing
+end
 nsplit(info::CallInfo) = nsplit_impl(info)::Union{Nothing,Int}
 getsplit(info::CallInfo, idx::Int) = getsplit_impl(info, idx)::MethodLookupResult
-getresult(info::CallInfo, idx::Int) = getresult_impl(info, idx)
+getresult(info::CallInfo, idx::Int) = getresult_impl(info, idx)#=::Union{Nothing,InferenceResult}=#
 
+add_edges_impl(::Vector{Any}, ::CallInfo) = error("""
+    All `CallInfo` is required to implement `add_edges_impl(::Vector{Any}, ::CallInfo)`""")
 nsplit_impl(::CallInfo) = nothing
-getsplit_impl(::CallInfo, ::Int) = error("unexpected call into `getsplit`")
+getsplit_impl(::CallInfo, ::Int) = error("""
+    A `info::CallInfo` that implements `nsplit_impl(info::CallInfo)::Int` must implement `getsplit_impl(info::CallInfo, idx::Int)::MethodLookupResult`
+    in order to correctly opt in to inlining""")
 getresult_impl(::CallInfo, ::Int) = nothing
 
 @specialize
diff --git a/base/compiler/typeutils.jl b/Compiler/src/typeutils.jl
similarity index 81%
rename from base/compiler/typeutils.jl
rename to Compiler/src/typeutils.jl
index 2ecc077228264..b2ffd0ef0b57b 100644
--- a/base/compiler/typeutils.jl
+++ b/Compiler/src/typeutils.jl
@@ -18,13 +18,13 @@ function hasuniquerep(@nospecialize t)
     iskindtype(typeof(t)) || return true # non-types are always compared by egal in the type system
     isconcretetype(t) && return true # these are also interned and pointer comparable
     if isa(t, DataType) && t.name !== Tuple.name && !isvarargtype(t) # invariant DataTypes
-        return _all(hasuniquerep, t.parameters)
+        return all(hasuniquerep, t.parameters)
     end
     return false
 end
 
 """
-    isTypeDataType(@nospecialize t) -> Bool
+    isTypeDataType(@nospecialize t)::Bool
 
 For a type `t` test whether ∀S s.t. `isa(S, rewrap_unionall(Type{t}, ...))`,
 we have `isa(S, DataType)`. In particular, if a statement is typed as `Type{t}`
@@ -36,14 +36,8 @@ function isTypeDataType(@nospecialize t)
     isType(t) && return false
     # Could be Union{} at runtime
     t === Core.TypeofBottom && return false
-    if t.name === Tuple.name
-        # If we have a Union parameter, could have been redistributed at runtime,
-        # e.g. `Tuple{Union{Int, Float64}, Int}` is a DataType, but
-        # `Union{Tuple{Int, Int}, Tuple{Float64, Int}}` is typeequal to it and
-        # is not.
-        return all(isTypeDataType, t.parameters)
-    end
-    return true
+    # Return true if `t` is not covariant
+    return t.name !== Tuple.name
 end
 
 has_extended_info(@nospecialize x) = (!isa(x, Type) && !isvarargtype(x)) || isType(x)
@@ -54,7 +48,12 @@ has_extended_info(@nospecialize x) = (!isa(x, Type) && !isvarargtype(x)) || isTy
 # certain combinations of `a` and `b` where one/both isa/are `Union`/`UnionAll` type(s)s.
 isnotbrokensubtype(@nospecialize(a), @nospecialize(b)) = (!iskindtype(b) || !isType(a) || hasuniquerep(a.parameters[1]) || b <: a)
 
-argtypes_to_type(argtypes::Array{Any,1}) = Tuple{anymap(@nospecialize(a) -> isvarargtype(a) ? a : widenconst(a), argtypes)...}
+function argtypes_to_type(argtypes::Vector{Any})
+    argtypes = anymap(@nospecialize(a) -> isvarargtype(a) ? a : widenconst(a), argtypes)
+    filter!(@nospecialize(x) -> !isvarargtype(x) || valid_as_lattice(unwrapva(x), true), argtypes)
+    all(@nospecialize(x) -> isvarargtype(x) || valid_as_lattice(x, true), argtypes) || return Bottom
+    return Tuple{argtypes...}
+end
 
 function isknownlength(t::DataType)
     isvatuple(t) || return true
@@ -62,45 +61,15 @@ function isknownlength(t::DataType)
     return isdefined(va, :N) && va.N isa Int
 end
 
-# Compute the minimum number of initialized fields for a particular datatype
-# (therefore also a lower bound on the number of fields)
-function datatype_min_ninitialized(t::DataType)
-    isabstracttype(t) && return 0
-    if t.name === _NAMEDTUPLE_NAME
-        names, types = t.parameters[1], t.parameters[2]
-        if names isa Tuple
-            return length(names)
-        end
-        t = argument_datatype(types)
-        t isa DataType || return 0
-        t.name === Tuple.name || return 0
-    end
-    if t.name === Tuple.name
-        n = length(t.parameters)
-        n == 0 && return 0
-        va = t.parameters[n]
-        if isvarargtype(va)
-            n -= 1
-            if isdefined(va, :N)
-                va = va.N
-                if va isa Int
-                    n += va
-                end
-            end
-        end
-        return n
-    end
-    return length(t.name.names) - t.name.n_uninitialized
-end
-
 has_concrete_subtype(d::DataType) = d.flags & 0x0020 == 0x0020 # n.b. often computed only after setting the type and layout fields
 
-# determine whether x is a valid lattice element tag
+# determine whether x is a valid lattice element
 # For example, Type{v} is not valid if v is a value
-# Accepts TypeVars also, since it assumes the user will rewrap it correctly
-function valid_as_lattice(@nospecialize(x))
+# Accepts TypeVars and has_free_typevar also, since it assumes the user will rewrap it correctly
+# If astag is true, then also requires that it be a possible type tag for a valid object
+function valid_as_lattice(@nospecialize(x), astag::Bool=false)
     x === Bottom && false
-    x isa TypeVar && return valid_as_lattice(x.ub)
+    x isa TypeVar && return valid_as_lattice(x.ub, astag)
     x isa UnionAll && (x = unwrap_unionall(x))
     if x isa Union
         # the Union constructor ensures this (and we'll recheck after
@@ -111,6 +80,9 @@ function valid_as_lattice(@nospecialize(x))
         if isType(x)
             p = x.parameters[1]
             p isa Type || p isa TypeVar || return false
+        elseif astag && isstructtype(x)
+            datatype_fieldtypes(x) # force computation of has_concrete_subtype to be updated now
+            return has_concrete_subtype(x)
         end
         return true
     end
@@ -149,6 +121,7 @@ function compatible_vatuple(a::DataType, b::DataType)
 end
 
 # return an upper-bound on type `a` with type `b` removed
+# and also any contents that are not valid type tags on any objects
 # such that `return <: a` && `Union{return, b} == Union{a, b}`
 function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::Int)
     if a <: b && isnotbrokensubtype(a, b)
@@ -158,8 +131,8 @@ function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::I
     if isa(ua, Union)
         uua = typesubtract(rewrap_unionall(ua.a, a), b, max_union_splitting)
         uub = typesubtract(rewrap_unionall(ua.b, a), b, max_union_splitting)
-        return Union{valid_as_lattice(uua) ? uua : Union{},
-                     valid_as_lattice(uub) ? uub : Union{}}
+        return Union{valid_as_lattice(uua, true) ? uua : Union{},
+                     valid_as_lattice(uub, true) ? uub : Union{}}
     elseif a isa DataType
         ub = unwrap_unionall(b)
         if ub isa DataType
@@ -197,10 +170,8 @@ function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::I
     return a # TODO: improve this bound?
 end
 
-hasintersect(@nospecialize(a), @nospecialize(b)) = typeintersect(a, b) !== Bottom
-
 _typename(@nospecialize a) = Union{}
-_typename(a::TypeVar) = Core.TypeName
+_typename(::TypeVar) = Core.TypeName
 function _typename(a::Union)
     ta = _typename(a.a)
     tb = _typename(a.b)
@@ -291,7 +262,7 @@ end
 unioncomplexity(@nospecialize x) = _unioncomplexity(x)::Int
 function _unioncomplexity(@nospecialize x)
     if isa(x, DataType)
-        x.name === Tuple.name || isvarargtype(x) || return 0
+        x.name === Tuple.name || return 0
         c = 0
         for ti in x.parameters
             c = max(c, unioncomplexity(ti))
@@ -302,7 +273,7 @@ function _unioncomplexity(@nospecialize x)
     elseif isa(x, UnionAll)
         return max(unioncomplexity(x.body), unioncomplexity(x.var.ub))
     elseif isa(x, TypeofVararg)
-        return isdefined(x, :T) ? unioncomplexity(x.T) : 0
+        return isdefined(x, :T) ? unioncomplexity(x.T) + 1 : 1
     else
         return 0
     end
@@ -317,24 +288,6 @@ function unionall_depth(@nospecialize ua) # aka subtype_env_size
     return depth
 end
 
-# convert a Union of Tuple types to a Tuple of Unions
-function unswitchtupleunion(u::Union)
-    ts = uniontypes(u)
-    n = -1
-    for t in ts
-        if t isa DataType && t.name === Tuple.name && length(t.parameters) != 0 && !isvarargtype(t.parameters[end])
-            if n == -1
-                n = length(t.parameters)
-            elseif n != length(t.parameters)
-                return u
-            end
-        else
-            return u
-        end
-    end
-    Tuple{Any[ Union{Any[(t::DataType).parameters[i] for t in ts]...} for i in 1:n ]...}
-end
-
 function unwraptv_ub(@nospecialize t)
     while isa(t, TypeVar)
         t = t.ub
@@ -350,7 +303,7 @@ end
 const unwraptv = unwraptv_ub
 
 """
-    is_identity_free_argtype(argtype) -> Bool
+    is_identity_free_argtype(argtype)::Bool
 
 Return `true` if the `argtype` object is identity free in the sense that this type or any
 reachable through its fields has non-content-based identity (see `Base.isidentityfree`).
@@ -363,7 +316,7 @@ is_identity_free_argtype(@nospecialize ty) = is_identity_free_type(widenconst(ig
 is_identity_free_type(@nospecialize ty) = isidentityfree(ty)
 
 """
-    is_immutable_argtype(argtype) -> Bool
+    is_immutable_argtype(argtype)::Bool
 
 Return `true` if the `argtype` object is known to be immutable.
 This query is specifically designed for `getfield_effects` and `isdefined_effects`, allowing
@@ -381,7 +334,7 @@ function _is_immutable_type(@nospecialize ty)
 end
 
 """
-    is_mutation_free_argtype(argtype) -> Bool
+    is_mutation_free_argtype(argtype)::Bool
 
 Return `true` if `argtype` object is mutation free in the sense that no mutable memory
 is reachable from this type (either in the type itself) or through any fields
diff --git a/Compiler/src/utilities.jl b/Compiler/src/utilities.jl
new file mode 100644
index 0000000000000..865d76eda9e84
--- /dev/null
+++ b/Compiler/src/utilities.jl
@@ -0,0 +1,364 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+###########
+# generic #
+###########
+
+if !@isdefined(var"@zone")
+    # This is designed to allow inserting timers when loading a second copy
+    # of inference for performing performance experiments.
+    macro timeit(args...)
+        esc(args[end])
+    end
+end
+
+function contains_is(itr, @nospecialize(x))
+    for y in itr
+        if y === x
+            return true
+        end
+    end
+    return false
+end
+
+anymap(f::Function, a::Vector{Any}) = Any[ f(a[i]) for i in 1:length(a) ]
+
+############
+# inlining #
+############
+
+const MAX_INLINE_CONST_SIZE = 256
+
+function count_const_size(@nospecialize(x), count_self::Bool = true)
+    (x isa Type || x isa Core.TypeName || x isa Symbol) && return 0
+    if ismutable(x)
+        # No definite size
+        (isa(x, GenericMemory) || isa(x, String) || isa(x, SimpleVector)) &&
+            return MAX_INLINE_CONST_SIZE + 1
+        if isa(x, Module) || isa(x, Method) || isa(x, CodeInstance)
+            # We allow modules, methods and CodeInstance, because we already assume they are externally
+            # rooted, so we count their contents as 0 size.
+            return sizeof(Ptr{Cvoid})
+        end
+        # We allow mutable types with no mutable fields (i.e. those mutable
+        # types used for identity only). The intent of this function is to
+        # prevent the rooting of large amounts of data that may have been
+        # speculatively computed. If the struct can get mutated later, we
+        # cannot assess how much data we might end up rooting. However, if
+        # the struct is mutable only for identity, the query still works.
+        for i = 1:nfields(x)
+            if !isconst(typeof(x), i)
+                return MAX_INLINE_CONST_SIZE + 1
+            end
+        end
+    end
+    isbits(x) && return Core.sizeof(x)
+    dt = typeof(x)
+    sz = count_self ? sizeof(dt) : 0
+    sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
+    dtfd = DataTypeFieldDesc(dt)
+    for i = 1:Int(datatype_nfields(dt))
+        isdefined(x, i) || continue
+        f = getfield(x, i)
+        if !dtfd[i].isptr && datatype_pointerfree(typeof(f))
+            continue
+        end
+        sz += count_const_size(f, dtfd[i].isptr)
+        sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
+    end
+    return sz
+end
+
+function is_inlineable_constant(@nospecialize(x))
+    return count_const_size(x) <= MAX_INLINE_CONST_SIZE
+end
+
+###########################
+# MethodInstance/CodeInfo #
+###########################
+
+invoke_api(li::CodeInstance) = ccall(:jl_invoke_api, Cint, (Any,), li)
+use_const_api(li::CodeInstance) = invoke_api(li) == 2
+
+function get_staged(mi::MethodInstance, world::UInt)
+    may_invoke_generator(mi) || return nothing
+    cache_ci = (mi.def::Method).generator isa Core.CachedGenerator ?
+        RefValue{CodeInstance}() : nothing
+    try
+        return call_get_staged(mi, world, cache_ci)
+    catch # user code might throw errors – ignore them
+        return nothing
+    end
+end
+
+# enable caching of unoptimized generated code if the generator is `CachedGenerator`
+function call_get_staged(mi::MethodInstance, world::UInt, cache_ci::RefValue{CodeInstance})
+    token = @_gc_preserve_begin cache_ci
+    cache_ci_ptr = pointer_from_objref(cache_ci)
+    src = ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{CodeInstance}), mi, world, cache_ci_ptr)
+    @_gc_preserve_end token
+    return src
+end
+function call_get_staged(mi::MethodInstance, world::UInt, ::Nothing)
+    return ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{Cvoid}), mi, world, C_NULL)
+end
+
+function get_cached_uninferred(mi::MethodInstance, world::UInt)
+    ccall(:jl_cached_uninferred, Any, (Any, UInt), mi.cache, world)::CodeInstance
+end
+
+function retrieve_code_info(mi::MethodInstance, world::UInt)
+    def = mi.def
+    if !isa(def, Method)
+        ci = get_cached_uninferred(mi, world)
+        src = ci.inferred
+        # Inference may corrupt the src, which is fine, because this is a
+        # (short-lived) top-level thunk, but set it to NULL anyway, so we
+        # can catch it if somebody tries to read it again by accident.
+        # @atomic ci.inferred = C_NULL
+        return src
+    end
+    c = hasgenerator(def) ? get_staged(mi, world) : nothing
+    if c === nothing && isdefined(def, :source)
+        src = def.source
+        if src === nothing
+            # can happen in images built with --strip-ir
+            return nothing
+        elseif isa(src, String)
+            c = ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Ptr{Cvoid}, Any), def, C_NULL, src)
+        else
+            c = copy(src::CodeInfo)
+        end
+        if (def.did_scan_source & 0x1) == 0x0
+            # This scan must happen:
+            #   1. After method definition
+            #   2. Before any code instances that may have relied on information
+            #      from implicit GlobalRefs for this method are added to the cache
+            #   3. Preferably while the IR is already uncompressed
+            #   4. As late as possible, as early adding of the backedges may cause
+            #      spurious invalidations.
+            #
+            # At the moment we do so here, because
+            #  1. It's reasonably late
+            #  2. It has easy access to the uncompressed IR
+            #  3. We necessarily pass through here before relying on any
+            #     information obtained from implicit GlobalRefs.
+            #
+            # However, the exact placement of this scan is not as important as
+            # long as the above conditions are met.
+            ccall(:jl_scan_method_source_now, Cvoid, (Any, Any), def, c)
+        end
+    end
+    if c isa CodeInfo
+        c.parent = mi
+        return c
+    end
+    return nothing
+end
+
+function get_compileable_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    isa(atype, DataType) || return nothing
+    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Cint),
+        atype, sparams, method, #=int return_if_compileable=#1)
+end
+
+
+isa_compileable_sig(@nospecialize(atype), sparams::SimpleVector, method::Method) =
+    !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any, Any), atype, sparams, method))
+
+isa_compileable_sig(m::MethodInstance) = (def = m.def; !isa(def, Method) || isa_compileable_sig(m.specTypes, m.sparam_vals, def))
+isa_compileable_sig(::ABIOverride) = false
+
+has_typevar(@nospecialize(t), v::TypeVar) = ccall(:jl_has_typevar, Cint, (Any, Any), t, v) != 0
+
+"""
+    is_declared_inline(method::Method)::Bool
+
+Check if `method` is declared as `@inline`.
+"""
+is_declared_inline(method::Method) = _is_declared_inline(method, true)
+
+"""
+    is_declared_noinline(method::Method)::Bool
+
+Check if `method` is declared as `@noinline`.
+"""
+is_declared_noinline(method::Method) = _is_declared_inline(method, false)
+
+function _is_declared_inline(method::Method, inline::Bool)
+    isdefined(method, :source) || return false
+    src = method.source
+    isa(src, MaybeCompressed) || return false
+    return (inline ? is_declared_inline : is_declared_noinline)(src)
+end
+
+"""
+    is_aggressive_constprop(method::Union{Method,CodeInfo})::Bool
+
+Check if `method` is declared as `Base.@constprop :aggressive`.
+"""
+is_aggressive_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x01
+
+"""
+    is_no_constprop(method::Union{Method,CodeInfo})::Bool
+
+Check if `method` is declared as `Base.@constprop :none`.
+"""
+is_no_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x02
+
+#########
+# types #
+#########
+
+@nospecializeinfer function singleton_type(@nospecialize(ft))
+    ft = widenslotwrapper(ft)
+    if isa(ft, Const)
+        return ft.val
+    elseif isconstType(ft)
+        return ft.parameters[1]
+    elseif issingletontype(ft)
+        return ft.instance
+    end
+    return nothing
+end
+
+@nospecializeinfer function maybe_singleton_const(@nospecialize(t))
+    if isa(t, DataType)
+        if issingletontype(t)
+            return Const(t.instance)
+        elseif isconstType(t)
+            return Const(t.parameters[1])
+        end
+    end
+    return t
+end
+
+###################
+# SSAValues/Slots #
+###################
+
+function ssamap(f, @nospecialize(stmt))
+    urs = userefs(stmt)
+    for op in urs
+        val = op[]
+        if isa(val, SSAValue)
+            op[] = f(val)
+        end
+    end
+    return urs[]
+end
+
+function foreachssa(@specialize(f), @nospecialize(stmt))
+    urs = userefs(stmt)
+    for op in urs
+        val = op[]
+        if isa(val, SSAValue)
+            f(val)
+        end
+    end
+end
+
+function foreach_anyssa(@specialize(f), @nospecialize(stmt))
+    urs = userefs(stmt)
+    for op in urs
+        val = op[]
+        if isa(val, AnySSAValue)
+            f(val)
+        end
+    end
+end
+
+function find_ssavalue_uses(body::Vector{Any}, nvals::Int)
+    uses = BitSet[ BitSet() for _ = 1:nvals ]
+    for line in 1:length(body)
+        e = body[line]
+        if isa(e, ReturnNode)
+            isdefined(e, :val) || continue
+            e = e.val
+        elseif isa(e, GotoIfNot)
+            e = e.cond
+        end
+        if isa(e, SSAValue)
+            push!(uses[e.id], line)
+        elseif isa(e, Expr)
+            find_ssavalue_uses!(uses, e, line)
+        elseif isa(e, PhiNode)
+            find_ssavalue_uses!(uses, e, line)
+        end
+    end
+    return uses
+end
+
+function find_ssavalue_uses!(uses::Vector{BitSet}, e::Expr, line::Int)
+    head = e.head
+    is_meta_expr_head(head) && return
+    skiparg = (head === :(=))
+    for a in e.args
+        if skiparg
+            skiparg = false
+        elseif isa(a, SSAValue)
+            push!(uses[a.id], line)
+        elseif isa(a, Expr)
+            find_ssavalue_uses!(uses, a, line)
+        end
+    end
+end
+
+function find_ssavalue_uses!(uses::Vector{BitSet}, e::PhiNode, line::Int)
+    values = e.values
+    for i = 1:length(values)
+        isassigned(values, i) || continue
+        val = values[i]
+        if isa(val, SSAValue)
+            push!(uses[val.id], line)
+        end
+    end
+end
+
+# using a function to ensure we can infer this
+@inline function slot_id(s)
+    isa(s, SlotNumber) && return s.id
+    return (s::Argument).n
+end
+
+###########
+# options #
+###########
+
+inlining_enabled() = (JLOptions().can_inline == 1)
+
+function instrumentation_enabled(m::Module, only_if_affects_optimizer::Bool)
+    generating_output() && return false # don't alter caches
+    cov = JLOptions().code_coverage
+    if cov == 1 # user
+        m = moduleroot(m)
+        m === Core && return false
+        isdefined(Main, :Base) && m === Main.Base && return false
+        return true
+    elseif cov == 2 # all
+        return true
+    end
+    if !only_if_affects_optimizer
+        log = JLOptions().malloc_log
+        if log == 1 # user
+            m = moduleroot(m)
+            m === Core && return false
+            isdefined(Main, :Base) && m === Main.Base && return false
+            return true
+        elseif log == 2 # all
+            return true
+        end
+    end
+    return false
+end
+
+function inbounds_option()
+    opt_check_bounds = JLOptions().check_bounds
+    opt_check_bounds == 0 && return :default
+    opt_check_bounds == 1 && return :on
+    return :off
+end
+
+is_asserts() = ccall(:jl_is_assertsbuild, Cint, ()) == 1
+
+_time_ns() = ccall(:jl_hrtime, UInt64, ())
diff --git a/base/compiler/validation.jl b/Compiler/src/validation.jl
similarity index 84%
rename from base/compiler/validation.jl
rename to Compiler/src/validation.jl
index 68eb2ab15c59d..3fce8da4e6dd0 100644
--- a/base/compiler/validation.jl
+++ b/Compiler/src/validation.jl
@@ -9,23 +9,20 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :(&) => 1:1,
     :(=) => 2:2,
     :method => 1:4,
-    :const => 1:1,
     :new => 1:typemax(Int),
     :splatnew => 2:2,
     :the_exception => 0:0,
-    :enter => 1:1,
-    :leave => 1:1,
+    :leave => 1:typemax(Int),
     :pop_exception => 1:1,
     :inbounds => 1:1,
     :inline => 1:1,
     :noinline => 1:1,
-    :boundscheck => 0:0,
+    :boundscheck => 0:1,
     :copyast => 1:1,
     :meta => 0:typemax(Int),
-    :global => 1:1,
-    :foreigncall => 5:typemax(Int), # name, RT, AT, nreq, (cconv, effects), args..., roots...
+    :foreigncall => 5:typemax(Int), # name, RT, AT, nreq, (cconv, effects, gc_safe), args..., roots...
     :cfunction => 5:5,
-    :isdefined => 1:1,
+    :isdefined => 1:2,
     :code_coverage_effect => 0:0,
     :loopinfo => 0:typemax(Int),
     :gc_preserve_begin => 0:typemax(Int),
@@ -34,7 +31,10 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :throw_undef_if_not => 2:2,
     :aliasscope => 0:0,
     :popaliasscope => 0:0,
-    :new_opaque_closure => 4:typemax(Int)
+    :new_opaque_closure => 5:typemax(Int),
+    :export => 1:typemax(Int),
+    :public => 1:typemax(Int),
+    :latestworld => 0:0,
 )
 
 # @enum isn't defined yet, otherwise I'd use it for this
@@ -50,7 +50,6 @@ const SSAVALUETYPES_MISMATCH = "not all SSAValues in AST have a type in ssavalue
 const SSAVALUETYPES_MISMATCH_UNINFERRED = "uninferred CodeInfo ssavaluetypes field does not equal the number of present SSAValues"
 const SSAFLAGS_MISMATCH = "not all SSAValues have a corresponding `ssaflags`"
 const NON_TOP_LEVEL_METHOD = "encountered `Expr` head `:method` in non-top-level code (i.e. `nargs` > 0)"
-const NON_TOP_LEVEL_GLOBAL = "encountered `Expr` head `:global` in non-top-level code (i.e. `nargs` > 0)"
 const SIGNATURE_NARGS_MISMATCH = "method signature does not match number of method arguments"
 const SLOTNAMES_NARGS_MISMATCH = "CodeInfo for method contains fewer slotnames than the number of method arguments"
 const INVALID_SIGNATURE_OPAQUE_CLOSURE = "invalid signature of method for opaque closure - `sig` field must always be set to `Tuple`"
@@ -61,20 +60,22 @@ struct InvalidCodeError <: Exception
 end
 InvalidCodeError(kind::AbstractString) = InvalidCodeError(kind, nothing)
 
-function validate_code_in_debug_mode(linfo::MethodInstance, src::CodeInfo, kind::String)
-    if JLOptions().debug_level == 2
-        # this is a debug build of julia, so let's validate linfo
-        errors = validate_code(linfo, src)
+function maybe_validate_code(mi::MethodInstance, src::CodeInfo, kind::String)
+    if is_asserts()
+        errors = validate_code(mi, src)
         if !isempty(errors)
             for e in errors
-                if linfo.def isa Method
-                    println(stderr, "WARNING: Encountered invalid ", kind, " code for method ",
-                            linfo.def, ": ", e)
+                if mi.def isa Method
+                    println(Core.stderr,
+                            "WARNING: Encountered invalid ", kind,
+                            " code for method ", mi.def, ": ", e)
                 else
-                    println(stderr, "WARNING: Encountered invalid ", kind, " code for top level expression in ",
-                            linfo.def, ": ", e)
+                    println(Core.stderr,
+                            "WARNING: Encountered invalid ", kind,
+                            " code for top level expression in ", mi.def, ": ", e)
                 end
             end
+            error("")
         end
     end
 end
@@ -120,7 +121,6 @@ function validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo, is_top_le
             head = x.head
             if !is_top_level
                 head === :method && push!(errors, InvalidCodeError(NON_TOP_LEVEL_METHOD))
-                head === :global && push!(errors, InvalidCodeError(NON_TOP_LEVEL_GLOBAL))
             end
             narg_bounds = get(VALID_EXPR_HEADS, head, -1:-1)
             nargs = length(x.args)
@@ -144,8 +144,8 @@ function validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo, is_top_le
             elseif head === :call || head === :invoke || x.head === :invoke_modify ||
                 head === :gc_preserve_end || head === :meta ||
                 head === :inbounds || head === :foreigncall || head === :cfunction ||
-                head === :const || head === :enter || head === :leave || head === :pop_exception ||
-                head === :method || head === :global || head === :static_parameter ||
+                head === :leave || head === :pop_exception ||
+                head === :method || head === :static_parameter ||
                 head === :new || head === :splatnew || head === :thunk || head === :loopinfo ||
                 head === :throw_undef_if_not || head === :code_coverage_effect || head === :inline || head === :noinline
                 validate_val!(x)
@@ -160,6 +160,13 @@ function validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo, is_top_le
                 push!(errors, InvalidCodeError(INVALID_CALL_ARG, x.cond))
             end
             validate_val!(x.cond)
+        elseif isa(x, EnterNode)
+            if isdefined(x, :scope)
+                if !is_valid_argument(x.scope)
+                    push!(errors, InvalidCodeError(INVALID_CALL_ARG, x.scope))
+                end
+                validate_val!(x.scope)
+            end
         elseif isa(x, ReturnNode)
             if isdefined(x, :val)
                 if !is_valid_return(x.val)
@@ -213,7 +220,7 @@ function validate_code!(errors::Vector{InvalidCodeError}, mi::Core.MethodInstanc
         mnargs = 0
     else
         m = mi.def::Method
-        mnargs = m.nargs
+        mnargs = Int(m.nargs)
         n_sig_params = length((unwrap_unionall(m.sig)::DataType).parameters)
         if m.is_for_opaque_closure
             m.sig === Tuple || push!(errors, InvalidCodeError(INVALID_SIGNATURE_OPAQUE_CLOSURE, (m.sig, m.isva)))
@@ -222,6 +229,7 @@ function validate_code!(errors::Vector{InvalidCodeError}, mi::Core.MethodInstanc
         end
     end
     if isa(c, CodeInfo)
+        mnargs = Int(c.nargs)
         mnargs > length(c.slotnames) && push!(errors, InvalidCodeError(SLOTNAMES_NARGS_MISMATCH))
         validate_code!(errors, c, is_top_level)
     end
@@ -230,11 +238,11 @@ end
 
 validate_code(args...) = validate_code!(Vector{InvalidCodeError}(), args...)
 
-is_valid_lvalue(@nospecialize(x)) = isa(x, UnoptSlot) || isa(x, GlobalRef)
+is_valid_lvalue(@nospecialize(x)) = isa(x, SlotNumber) || isa(x, GlobalRef)
 
 function is_valid_argument(@nospecialize(x))
-    if isa(x, UnoptSlot) || isa(x, Argument) || isa(x, SSAValue) ||
-       isa(x, GlobalRef) || isa(x, QuoteNode) || isexpr(x, (:static_parameter, :boundscheck)) ||
+    if isa(x, SlotNumber) || isa(x, Argument) || isa(x, SSAValue) ||
+       isa(x, GlobalRef) || isa(x, QuoteNode) || (isa(x, Expr) && is_value_pos_expr_head(x.head))  ||
        isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
        isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
         return true
@@ -246,12 +254,12 @@ end
 
 function is_valid_rvalue(@nospecialize(x))
     is_valid_argument(x) && return true
-    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
+    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call,
+        :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast,
+        :new_opaque_closure)
         return true
     end
     return false
 end
 
 is_valid_return(@nospecialize(x)) = is_valid_argument(x) || (isa(x, Expr) && x.head === :lambda)
-
-is_flag_set(byte::UInt8, flag::UInt8) = (byte & flag) == flag
diff --git a/Compiler/src/verifytrim.jl b/Compiler/src/verifytrim.jl
new file mode 100644
index 0000000000000..eef2f15d43e86
--- /dev/null
+++ b/Compiler/src/verifytrim.jl
@@ -0,0 +1,409 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+import ..Compiler: verify_typeinf_trim, NativeInterpreter, argtypes_to_type, compileable_specialization_for_call
+
+using ..Compiler:
+     # operators
+     !, !=, !==, +, :, <, <=, ==, =>, >, >=, ∈, ∉,
+     # types
+     Array, Builtin, Callable, Cint, CodeInfo, CodeInstance, Csize_t, Exception,
+     GenericMemory, GlobalRef, IdDict, IdSet, IntrinsicFunction, Method, MethodInstance,
+     NamedTuple, Pair, PhiCNode, PhiNode, PiNode, QuoteNode, SSAValue, SimpleVector, String,
+     Tuple, VarState, Vector,
+     # functions
+     argextype, empty!, error, get, get_ci_mi, get_world_counter, getindex, getproperty,
+     hasintersect, haskey, in, isdispatchelem, isempty, isexpr, iterate, length, map!, max,
+     pop!, popfirst!, push!, pushfirst!, reinterpret, reverse!, reverse, setindex!,
+     setproperty!, similar, singleton_type, sptypes_from_meth_instance, sp_type_rewrap,
+     unsafe_pointer_to_objref, widenconst, isconcretetype,
+     # misc
+     @nospecialize, @assert, C_NULL
+using ..IRShow: LineInfoNode, print, show, println, append_scopes!, IOContext, IO, normalize_method_name
+using ..Base: Base, sourceinfo_slotnames
+using ..Base.StackTraces: StackFrame
+
+## declarations ##
+
+struct CallMissing <: Exception
+    codeinst::CodeInstance
+    codeinfo::CodeInfo
+    sptypes::Vector{VarState}
+    stmtidx::Int
+    desc::String
+end
+
+struct CCallableMissing <: Exception
+    rt
+    sig
+    desc
+end
+
+const ParentMap = IdDict{CodeInstance,Tuple{CodeInstance,Int}}
+const ErrorList = Vector{Pair{Bool,Any}} # severity => exception
+
+const runtime_functions = Symbol[
+    # a denylist of any runtime functions which someone might ccall which can call jl_apply or access reflection state
+    # which might not be captured by the trim output
+    :jl_apply,
+]
+
+## code for pretty printing ##
+
+# wrap a statement in a typeassert for printing clarity, unless that info seems already obvious
+function mapssavaluetypes(codeinfo::CodeInfo, sptypes::Vector{VarState}, stmt)
+    @nospecialize stmt
+    newstmt = mapssavalues(codeinfo, sptypes, stmt)
+    typ = widenconst(argextype(stmt, codeinfo, sptypes))
+    if newstmt isa Expr
+        if newstmt.head ∈ (:quote, :inert)
+            return newstmt
+        end
+    elseif newstmt isa GlobalRef && isdispatchelem(typ)
+        return newstmt
+    elseif newstmt isa Union{Int, UInt8, UInt16, UInt32, UInt64, Float16, Float32, Float64, String, QuoteNode}
+        return newstmt
+    elseif newstmt isa Callable
+        return newstmt
+    end
+    return Expr(:(::), newstmt, typ)
+end
+
+# map the ssavalues in a (value-producing) statement to the expression they came from, summarizing some things to avoid excess printing
+function mapssavalues(codeinfo::CodeInfo, sptypes::Vector{VarState}, stmt)
+    @nospecialize stmt
+    if stmt isa SSAValue
+        return mapssavalues(codeinfo, sptypes, codeinfo.code[stmt.id])
+    elseif stmt isa PiNode
+        return mapssavalues(codeinfo, sptypes, stmt.val)
+    elseif stmt isa Expr
+        stmt.head ∈ (:quote, :inert) && return stmt
+        newstmt = Expr(stmt.head)
+        if stmt.head === :foreigncall
+            return Expr(:call, :ccall, mapssavalues(codeinfo, sptypes, stmt.args[1]))
+        elseif stmt.head ∉ (:new, :method, :toplevel, :thunk)
+            newstmt.args = map!(similar(stmt.args), stmt.args) do arg
+                @nospecialize arg
+                return mapssavaluetypes(codeinfo, sptypes, arg)
+            end
+            if newstmt.head === :invoke
+                # why is the fancy printing for this not in show_unquoted?
+                popfirst!(newstmt.args)
+                newstmt.head = :call
+            end
+        end
+        return newstmt
+    elseif stmt isa PhiNode
+        return PhiNode()
+    elseif stmt isa PhiCNode
+        return PhiNode()
+    end
+    return stmt
+end
+
+function verify_print_stmt(io::IOContext{IO}, codeinfo::CodeInfo, sptypes::Vector{VarState}, stmtidx::Int)
+    if codeinfo.slotnames !== nothing
+        io = IOContext(io, :SOURCE_SLOTNAMES => sourceinfo_slotnames(codeinfo))
+    end
+    print(io, mapssavaluetypes(codeinfo, sptypes, SSAValue(stmtidx)))
+end
+
+function verify_print_error(io::IOContext{IO}, desc::CallMissing, parents::ParentMap)
+    (; codeinst, codeinfo, sptypes, stmtidx, desc) = desc
+    frames = verify_create_stackframes(codeinst, stmtidx, parents)
+    print(io, desc, " from statement ")
+    verify_print_stmt(io, codeinfo, sptypes, stmtidx)
+    Base.show_backtrace(io, frames)
+    print(io, "\n\n")
+    nothing
+end
+
+function verify_print_error(io::IOContext{IO}, desc::CCallableMissing, ::ParentMap)
+    print(io, desc.desc, " for ", desc.sig, " => ", desc.rt, "\n\n")
+    nothing
+end
+
+function verify_create_stackframes(codeinst::CodeInstance, stmtidx::Int, parents::ParentMap)
+    scopes = LineInfoNode[]
+    frames = StackFrame[]
+    parent = (codeinst, stmtidx)
+    while parent !== nothing
+        codeinst, stmtidx = parent
+        di = codeinst.debuginfo
+        append_scopes!(scopes, stmtidx, di, :var"unknown scope")
+        for i in reverse(1:length(scopes))
+            lno = scopes[i]
+            inlined = i != 1
+            def = lno.method
+            def isa Union{Method,Core.CodeInstance,MethodInstance} || (def = nothing)
+            sf = StackFrame(normalize_method_name(lno.method), lno.file, lno.line, def, false, inlined, 0)
+            push!(frames, sf)
+        end
+        empty!(scopes)
+        parent = get(parents, codeinst, nothing)
+    end
+    return frames
+end
+
+## code for analysis ##
+
+function may_dispatch(@nospecialize ftyp)
+    if ftyp <: IntrinsicFunction
+        return true
+    elseif ftyp <: Builtin
+        # other builtins (including the IntrinsicFunctions) are good
+        return Core._apply isa ftyp ||
+               Core._apply_iterate isa ftyp ||
+               Core._call_in_world_total isa ftyp ||
+               Core.invoke isa ftyp ||
+               Core.invoke_in_world isa ftyp ||
+               Core.invokelatest isa ftyp ||
+               Core.finalizer isa ftyp ||
+               Core.modifyfield! isa ftyp ||
+               Core.modifyglobal! isa ftyp ||
+               Core.memoryrefmodify! isa ftyp
+    else
+        return true
+    end
+end
+
+function verify_codeinstance!(interp::NativeInterpreter, codeinst::CodeInstance, codeinfo::CodeInfo, inspected::IdSet{CodeInstance}, caches::IdDict{MethodInstance,CodeInstance}, parents::ParentMap, errors::ErrorList)
+    mi = get_ci_mi(codeinst)
+    sptypes = sptypes_from_meth_instance(mi)
+    src = codeinfo.code
+    for i = 1:length(src)
+        stmt = src[i]
+        isexpr(stmt, :(=)) && (stmt = stmt.args[2])
+        error = ""
+        warn = false
+        if isexpr(stmt, :invoke) || isexpr(stmt, :invoke_modify)
+            error = "unresolved invoke"
+            edge = stmt.args[1]
+            if edge isa CodeInstance
+                haskey(parents, edge) || (parents[edge] = (codeinst, i))
+                edge in inspected && continue
+                edge_mi = get_ci_mi(edge)
+                if edge_mi === edge.def
+                    ci = get(caches, edge_mi, nothing)
+                    ci isa CodeInstance && continue # assume that only this_world matters for trim
+                end
+            end
+            # TODO: check for calls to Base.atexit?
+        elseif isexpr(stmt, :call)
+            error = "unresolved call"
+            farg = stmt.args[1]
+            ftyp = widenconst(argextype(farg, codeinfo, sptypes))
+            if ftyp <: IntrinsicFunction
+                #TODO: detect if f !== Core.Intrinsics.atomic_pointermodify (see statement_cost), otherwise error
+                continue
+            elseif ftyp <: Builtin
+                if !may_dispatch(ftyp)
+                    continue
+                end
+                if !isconcretetype(ftyp)
+                    error = "unresolved call to (unknown) builtin"
+                elseif Core._apply_iterate isa ftyp
+                    if length(stmt.args) >= 3
+                        # args[1] is _apply_iterate object
+                        # args[2] is invoke object
+                        farg = stmt.args[3]
+                        ftyp = widenconst(argextype(farg, codeinfo, sptypes))
+                        if may_dispatch(ftyp)
+                            error = "unresolved call to function"
+                        else
+                            for i in 4:length(stmt.args)
+                                atyp = widenconst(argextype(stmt.args[i], codeinfo, sptypes))
+                                if !(atyp <: Union{SimpleVector, GenericMemory, Array, Tuple, NamedTuple})
+                                    error = "unresolved argument to call"
+                                    break
+                                end
+                            end
+                        end
+                    end
+                elseif Core.finalizer isa ftyp
+                    if length(stmt.args) == 3
+                        finalizer = argextype(stmt.args[2], codeinfo, sptypes)
+                        obj = argextype(stmt.args[3], codeinfo, sptypes)
+                        atype = argtypes_to_type(Any[finalizer, obj])
+
+                        mi = compileable_specialization_for_call(interp, atype)
+                        if mi !== nothing
+                            ci = get(caches, mi, nothing)
+                            ci isa CodeInstance && continue
+                        end
+
+                        error = "unresolved finalizer registered"
+                    end
+                elseif Core._apply isa ftyp
+                    error = "trim verification not yet implemented for builtin `Core._apply`"
+                elseif Core._call_in_world_total isa ftyp
+                    error = "trim verification not yet implemented for builtin `Core._call_in_world_total`"
+                elseif Core.invoke isa ftyp
+                    error = "trim verification not yet implemented for builtin `Core.invoke`"
+                elseif Core.invoke_in_world isa ftyp
+                    error = "trim verification not yet implemented for builtin `Core.invoke_in_world`"
+                elseif Core.invokelatest isa ftyp
+                    error = "trim verification not yet implemented for builtin `Core.invokelatest`"
+                elseif Core.modifyfield! isa ftyp
+                    error = "trim verification not yet implemented for builtin `Core.modifyfield!`"
+                elseif Core.modifyglobal! isa ftyp
+                    error = "trim verification not yet implemented for builtin `Core.modifyglobal!`"
+                elseif Core.memoryrefmodify! isa ftyp
+                    error = "trim verification not yet implemented for builtin `Core.memoryrefmodify!`"
+                else @assert false "unexpected builtin" end
+            end
+            extyp = argextype(SSAValue(i), codeinfo, sptypes)
+            if extyp === Union{}
+                warn = true # downgrade must-throw calls to be only a warning
+            end
+        elseif isexpr(stmt, :cfunction)
+            length(stmt.args) != 5 && continue # required by IR legality
+            f, at = stmt.args[2], stmt.args[4]
+
+            at isa SimpleVector || continue  # required by IR legality
+            ft = argextype(f, codeinfo, sptypes)
+            argtypes = Any[ft]
+            for i = 1:length(at)
+                push!(argtypes, sp_type_rewrap(at[i], get_ci_mi(codeinst), #= isreturn =# false))
+            end
+            atype = argtypes_to_type(argtypes)
+
+            mi = compileable_specialization_for_call(interp, atype)
+            if mi !== nothing
+                # n.b.: Codegen may choose unpredictably to emit this `@cfunction` as a dynamic invoke or a full
+                # dynamic call, but in either case it guarantees that the required adapter(s) are emitted. All
+                # that we are required to verify here is that the callee CodeInstance is covered.
+                ci = get(caches, mi, nothing)
+                ci isa CodeInstance && continue
+            end
+
+            error = "unresolved cfunction"
+        elseif isexpr(stmt, :foreigncall)
+            foreigncall = stmt.args[1]
+            if isexpr(foreigncall, :tuple, 1)
+                foreigncall = foreigncall.args[1]
+                if foreigncall isa String
+                    foreigncall = QuoteNode(Symbol(foreigncall))
+                end
+                if foreigncall isa QuoteNode
+                    if foreigncall.value in runtime_functions
+                        error = "disallowed ccall into a runtime function"
+                    end
+                else
+                    error = "disallowed ccall with non-constant name and no library"
+                end
+            end
+        elseif isexpr(stmt, :new_opaque_closure)
+            error = "unresolved opaque closure"
+            # TODO: check that this opaque closure has a valid signature for possible codegen and code defined for it
+            warn = true
+        end
+        if !isempty(error)
+            push!(errors, warn => CallMissing(codeinst, codeinfo, sptypes, i, error))
+        end
+    end
+end
+
+## entry-point ##
+
+function get_verify_typeinf_trim(codeinfos::Vector{Any})
+    this_world = get_world_counter()
+    interp = NativeInterpreter(this_world)
+    inspected = IdSet{CodeInstance}()
+    caches = IdDict{MethodInstance,CodeInstance}()
+    errors = ErrorList()
+    parents = ParentMap()
+    for i = 1:length(codeinfos)
+        item = codeinfos[i]
+        if item isa CodeInstance
+            push!(inspected, item)
+            if item.owner === nothing && item.min_world <= this_world <= item.max_world
+                mi = get_ci_mi(item)
+                if mi === item.def
+                    caches[mi] = item
+                end
+            end
+        end
+    end
+    for i = 1:length(codeinfos)
+        item = codeinfos[i]
+        if item isa CodeInstance
+            src = codeinfos[i + 1]::CodeInfo
+            verify_codeinstance!(interp, item, src, inspected, caches, parents, errors)
+        elseif item isa SimpleVector
+            rt = item[1]::Type
+            sig = item[2]::Type
+            mi = ccall(:jl_get_specialization1, Any,
+                        (Any, Csize_t, Cint),
+                        sig, this_world, #= mt_cache =# 0)
+            asrt = Any
+            valid = if mi !== nothing
+                mi = mi::MethodInstance
+                ci = get(caches, mi, nothing)
+                if ci isa CodeInstance
+                    # TODO: should we find a way to indicate to the user that this gets called via ccallable?
+                    # parent[ci] = something
+                    asrt = ci.rettype
+                    true
+                else
+                    false
+                end
+            else
+                false
+            end
+            if !valid
+                warn = false
+                push!(errors, warn => CCallableMissing(rt, sig, "unresolved ccallable"))
+            elseif !(asrt <: rt)
+                warn = hasintersect(asrt, rt)
+                push!(errors, warn => CCallableMissing(asrt, sig, "ccallable declared return type does not match inference"))
+            end
+        end
+    end
+    return (errors, parents)
+end
+
+# It is unclear if this file belongs in Compiler itself, or should instead be a codegen
+# driver / verifier implemented by juliac-buildscript.jl for the purpose of extensibility.
+# For now, it is part of Base.Compiler, but executed with invokelatest so that packages
+# could provide hooks to change, customize, or tweak its behavior and heuristics.
+function verify_typeinf_trim(io::IO, codeinfos::Vector{Any}, onlywarn::Bool)
+    errors, parents = get_verify_typeinf_trim(codeinfos)
+
+    # count up how many messages we printed, of each severity
+    counts = [0, 0] # errors, warnings
+    io = IOContext{IO}(io)
+    # print all errors afterwards, when the parents map is fully constructed
+    for desc in errors
+        warn, desc = desc
+        severity = warn ? 2 : 1
+        no = (counts[severity] += 1)
+        print(io, warn ? "Verifier warning #" : "Verifier error #", no, ": ")
+        # TODO: should we coalesce any of these stacktraces to minimize spew?
+        verify_print_error(io, desc, parents)
+    end
+
+    ## TODO: compute and display the minimum and/or full call graph instead of merely the first parent stacktrace?
+    #for i = 1:length(codeinfos)
+    #    item = codeinfos[i]
+    #    if item isa CodeInstance
+    #        println(item, "::", item.rettype)
+    #    end
+    #end
+
+    let severity = 0
+        if counts[1] > 0 || counts[2] > 0
+            print("Trim verify finished with ")
+            print(counts[1], counts[1] == 1 ? " error" : " errors")
+            print(", ")
+            print(counts[2], counts[2] == 1 ? " warning" : " warnings")
+            print(".\n")
+            severity = 2
+        end
+        if counts[1] > 0
+            severity = 1
+        end
+        # messages classified as errors are fatal, warnings are not
+        0 < severity <= 1 && !onlywarn && throw(Core.TrimFailure())
+    end
+    nothing
+end
diff --git a/Compiler/test/AbstractInterpreter.jl b/Compiler/test/AbstractInterpreter.jl
new file mode 100644
index 0000000000000..fb0ddbdce7a96
--- /dev/null
+++ b/Compiler/test/AbstractInterpreter.jl
@@ -0,0 +1,567 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("setup_Compiler.jl")
+include("irutils.jl")
+include("newinterp.jl")
+
+# interpreter that performs abstract interpretation only
+# (semi-concrete interpretation should be disabled automatically)
+@newinterp AbsIntOnlyInterp1
+Compiler.may_optimize(::AbsIntOnlyInterp1) = false
+@test Base.infer_return_type(Base.init_stdio, (Ptr{Cvoid},); interp=AbsIntOnlyInterp1()) >: IO
+
+# it should work even if the interpreter discards inferred source entirely
+@newinterp AbsIntOnlyInterp2
+Compiler.may_optimize(::AbsIntOnlyInterp2) = false
+Compiler.transform_result_for_cache(::AbsIntOnlyInterp2, ::Compiler.InferenceResult, edges::Core.SimpleVector) = nothing
+@test Base.infer_return_type(Base.init_stdio, (Ptr{Cvoid},); interp=AbsIntOnlyInterp2()) >: IO
+
+# OverlayMethodTable
+# ==================
+
+using Base.Experimental: @MethodTable, @overlay, @consistent_overlay
+
+# @overlay method with return type annotation
+@MethodTable RT_METHOD_DEF
+@overlay RT_METHOD_DEF Base.sin(x::Float64)::Float64 = cos(x)
+@overlay RT_METHOD_DEF function Base.sin(x::T)::T where T<:AbstractFloat
+    cos(x)
+end
+
+@newinterp MTOverlayInterp
+@MethodTable OVERLAY_MT
+Compiler.method_table(interp::MTOverlayInterp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), OVERLAY_MT)
+
+function Compiler.add_remark!(interp::MTOverlayInterp, ::Compiler.InferenceState, remark)
+    if interp.meta !== nothing
+        # Core.println(remark)
+        push!(interp.meta, remark)
+    end
+    return nothing
+end
+
+struct StrangeSinError end
+strangesin(x) = sin(x)
+@overlay OVERLAY_MT strangesin(x::Float64) =
+    iszero(x) ? throw(StrangeSinError()) : x < 0 ? nothing : cos(x)
+
+# inference should use the overlayed method table
+@test Base.return_types((Float64,); interp=MTOverlayInterp()) do x
+    strangesin(x)
+end |> only === Union{Float64,Nothing}
+@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
+    @invoke strangesin(x::Float64)
+end |> only === Union{Float64,Nothing}
+@test only(Base.return_types(strangesin, (Float64,); interp=MTOverlayInterp())) === Union{Float64,Nothing}
+@test Base.infer_exception_type(strangesin, (Float64,); interp=MTOverlayInterp()) === Union{StrangeSinError,DomainError}
+@test only(Base.infer_exception_types(strangesin, (Float64,); interp=MTOverlayInterp())) === Union{StrangeSinError,DomainError}
+@test last(only(code_typed(strangesin, (Float64,); interp=MTOverlayInterp()))) === Union{Float64,Nothing}
+@test last(only(Base.code_ircode(strangesin, (Float64,); interp=MTOverlayInterp()))) === Union{Float64,Nothing}
+
+# effect analysis should figure out that the overlayed method is used
+@test Base.infer_effects((Float64,); interp=MTOverlayInterp()) do x
+    strangesin(x)
+end |> !Compiler.is_nonoverlayed
+@test Base.infer_effects((Any,); interp=MTOverlayInterp()) do x
+    @invoke strangesin(x::Float64)
+end |> !Compiler.is_nonoverlayed
+
+# account for overlay possibility in unanalyzed matching method
+callstrange(::Float64) = strangesin(x)
+callstrange(::Number) = Core.compilerbarrier(:type, nothing) # trigger inference bail out
+callstrange(::Any) = 1.0
+callstrange_entry(x) = callstrange(x) # needs to be defined here because of world age
+let interp = MTOverlayInterp(Set{Any}())
+    matches = Compiler.findall(Tuple{typeof(callstrange),Any}, Compiler.method_table(interp))
+    @test matches !== nothing
+    @test Compiler.length(matches) == 3
+    @test Base.infer_effects(callstrange_entry, (Any,); interp) |> !Compiler.is_nonoverlayed
+    @test "Call inference reached maximally imprecise information: bailing on doing more abstract inference." in interp.meta
+end
+
+# but it should never apply for the native compilation
+@test Base.infer_effects((Float64,)) do x
+    strangesin(x)
+end |> Compiler.is_nonoverlayed
+@test Base.infer_effects((Any,)) do x
+    @invoke strangesin(x::Float64)
+end |> Compiler.is_nonoverlayed
+
+# fallback to the internal method table
+@test Base.return_types((Int,); interp=MTOverlayInterp()) do x
+    cos(x)
+end |> only === Float64
+@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
+    @invoke cos(x::Float64)
+end |> only === Float64
+
+# not fully covered overlay method match
+overlay_match(::Any) = nothing
+@overlay OVERLAY_MT overlay_match(::Int) = missing
+@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
+    overlay_match(x)
+end |> only === Union{Nothing,Missing}
+
+# partial concrete evaluation
+@test Base.return_types(; interp=MTOverlayInterp()) do
+    isbitstype(Int) ? nothing : missing
+end |> only === Nothing
+Base.@assume_effects :terminates_locally function issue41694(x)
+    res = 1
+    0 ≤ x < 20 || error("bad fact")
+    while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+@test Base.return_types(; interp=MTOverlayInterp()) do
+    issue41694(3) == 6 ? nothing : missing
+end |> only === Nothing
+
+# disable partial concrete evaluation when tainted by any overlayed call
+Base.@assume_effects :total totalcall(f, args...) = f(args...)
+@test Base.return_types(; interp=MTOverlayInterp()) do
+    if totalcall(strangesin, 1.0) == cos(1.0)
+        return nothing
+    else
+        return missing
+    end
+end |> only === Nothing
+
+# override `:native_executable` to allow concrete-eval for overlay-ed methods
+function myfactorial(x::Int, raise)
+    res = 1
+    0 ≤ x < 20 || raise("x is too big")
+    Base.@assume_effects :terminates_locally while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+raise_on_gpu1(x) = error(x)
+@overlay OVERLAY_MT @noinline raise_on_gpu1(x) = #=do something with GPU=# error(x)
+raise_on_gpu2(x) = error(x)
+@consistent_overlay OVERLAY_MT @noinline raise_on_gpu2(x) = #=do something with GPU=# error(x)
+raise_on_gpu3(x) = error(x)
+@consistent_overlay OVERLAY_MT @noinline Base.@assume_effects :foldable raise_on_gpu3(x) = #=do something with GPU=# error_on_gpu(x)
+cpu_factorial(x::Int) = myfactorial(x, error)
+gpu_factorial1(x::Int) = myfactorial(x, raise_on_gpu1)
+gpu_factorial2(x::Int) = myfactorial(x, raise_on_gpu2)
+gpu_factorial3(x::Int) = myfactorial(x, raise_on_gpu3)
+
+@test Base.infer_effects(cpu_factorial, (Int,); interp=MTOverlayInterp()) |> Compiler.is_nonoverlayed
+@test Base.infer_effects(gpu_factorial1, (Int,); interp=MTOverlayInterp()) |> !Compiler.is_nonoverlayed
+@test Base.infer_effects(gpu_factorial2, (Int,); interp=MTOverlayInterp()) |> Compiler.is_consistent_overlay
+let effects = Base.infer_effects(gpu_factorial3, (Int,); interp=MTOverlayInterp())
+    # check if `@consistent_overlay` together works with `@assume_effects`
+    # N.B. the overlaid `raise_on_gpu3` is not :foldable otherwise since `error_on_gpu` is (intetionally) undefined.
+    @test Compiler.is_consistent_overlay(effects)
+    @test Compiler.is_foldable(effects)
+end
+@test Base.infer_return_type(; interp=MTOverlayInterp()) do
+    Val(gpu_factorial2(3))
+end == Val{6}
+@test Base.infer_return_type(; interp=MTOverlayInterp()) do
+    Val(gpu_factorial3(3))
+end == Val{6}
+
+# GPUCompiler needs accurate inference through kwfunc with the overlay of `Core.throw_inexacterror`
+# https://github.com/JuliaLang/julia/issues/48097
+@newinterp Issue48097Interp
+@MethodTable ISSUE_48097_MT
+Compiler.method_table(interp::Issue48097Interp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), ISSUE_48097_MT)
+function Compiler.concrete_eval_eligible(interp::Issue48097Interp,
+    @nospecialize(f), result::Compiler.MethodCallResult, arginfo::Compiler.ArgInfo, sv::Compiler.AbsIntState)
+    ret = @invoke Compiler.concrete_eval_eligible(interp::Compiler.AbstractInterpreter,
+        f::Any, result::Compiler.MethodCallResult, arginfo::Compiler.ArgInfo, sv::Compiler.AbsIntState)
+    if ret === :semi_concrete_eval
+        # disable semi-concrete interpretation
+        return :none
+    end
+    return ret
+end
+@overlay ISSUE_48097_MT @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return
+issue48097(; kwargs...) = return 42
+@test fully_eliminated(; interp=Issue48097Interp(), retval=42) do
+    issue48097(; a=1f0, b=1.0)
+end
+
+# https://github.com/JuliaLang/julia/issues/52938
+@newinterp Issue52938Interp
+@MethodTable ISSUE_52938_MT
+Compiler.method_table(interp::Issue52938Interp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), ISSUE_52938_MT)
+inner52938(x, types::Type, args...; kwargs...) = x
+outer52938(x) = @inline inner52938(x, Tuple{}; foo=Ref(42), bar=1)
+@test fully_eliminated(outer52938, (Any,); interp=Issue52938Interp(), retval=Argument(2))
+
+# https://github.com/JuliaGPU/CUDA.jl/issues/2241
+@newinterp Cuda2241Interp
+@MethodTable CUDA_2241_MT
+Compiler.method_table(interp::Cuda2241Interp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), CUDA_2241_MT)
+inner2241(f, types::Type, args...; kwargs...) = nothing
+function outer2241(f)
+    @inline inner2241(f, Tuple{}; foo=Ref(42), bar=1)
+    return nothing
+end
+# NOTE CUDA.jl overlays `throw_boundserror` in a way that causes effects, but these effects
+#      are ignored for this call graph at the `@assume_effects` annotation on `typejoin`.
+#      Here it's important to use `@consistent_overlay` to avoid tainting the `:nonoverlayed` bit.
+const cuda_kernel_state = Ref{Any}()
+@consistent_overlay CUDA_2241_MT @inline Base.throw_boundserror(A, I) =
+    (cuda_kernel_state[] = (A, I); error())
+@test fully_eliminated(outer2241, (Nothing,); interp=Cuda2241Interp(), retval=nothing)
+
+# Should not concrete-eval overlayed methods in semi-concrete interpretation
+@newinterp OverlaySinInterp
+@MethodTable OVERLAY_SIN_MT
+Compiler.method_table(interp::OverlaySinInterp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), OVERLAY_SIN_MT)
+overlay_sin1(x) = error("Not supposed to be called.")
+@overlay OVERLAY_SIN_MT overlay_sin1(x) = cos(x)
+@overlay OVERLAY_SIN_MT Base.sin(x::Union{Float32,Float64}) = overlay_sin1(x)
+let ir = Base.code_ircode(; interp=OverlaySinInterp()) do
+        sin(0.)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    @test oc() == cos(0.)
+end
+@overlay OVERLAY_SIN_MT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin1(x)
+let ir = Base.code_ircode(; interp=OverlaySinInterp()) do
+        sin(0.)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    @test oc() == cos(0.)
+end
+_overlay_sin2(x) = error("Not supposed to be called.")
+@overlay OVERLAY_SIN_MT _overlay_sin2(x) = cos(x)
+overlay_sin2(x) = _overlay_sin2(x)
+@overlay OVERLAY_SIN_MT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin2(x)
+let ir = Base.code_ircode(; interp=OverlaySinInterp()) do
+        sin(0.)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    @test oc() == cos(0.)
+end
+
+# AbstractLattice
+# ===============
+
+using Core: SlotNumber, Argument
+using .Compiler: slot_id, tmerge_fast_path
+import .Compiler:
+    AbstractLattice, BaseInferenceLattice, IPOResultLattice, InferenceLattice,
+    widenlattice, is_valid_lattice_norec, typeinf_lattice, ipo_lattice, optimizer_lattice,
+    widenconst, tmeet, tmerge, ⊑, abstract_eval_special_value, widenreturn
+
+@newinterp TaintInterpreter
+struct TaintLattice{PL<:AbstractLattice} <: Compiler.AbstractLattice
+    parent::PL
+end
+Compiler.widenlattice(𝕃::TaintLattice) = 𝕃.parent
+Compiler.is_valid_lattice_norec(::TaintLattice, @nospecialize(elm)) = isa(elm, Taint)
+
+struct InterTaintLattice{PL<:AbstractLattice} <: Compiler.AbstractLattice
+    parent::PL
+end
+Compiler.widenlattice(𝕃::InterTaintLattice) = 𝕃.parent
+Compiler.is_valid_lattice_norec(::InterTaintLattice, @nospecialize(elm)) = isa(elm, InterTaint)
+
+const AnyTaintLattice{L} = Union{TaintLattice{L},InterTaintLattice{L}}
+
+Compiler.typeinf_lattice(::TaintInterpreter) = InferenceLattice(TaintLattice(BaseInferenceLattice.instance))
+Compiler.ipo_lattice(::TaintInterpreter) = InferenceLattice(InterTaintLattice(IPOResultLattice.instance))
+Compiler.optimizer_lattice(::TaintInterpreter) = InterTaintLattice(SimpleInferenceLattice.instance)
+
+struct Taint
+    typ
+    slots::BitSet
+    function Taint(@nospecialize(typ), slots::BitSet)
+        if typ isa Taint
+            slots = typ.slots ∪ slots
+            typ = typ.typ
+        end
+        return new(typ, slots)
+    end
+end
+Taint(@nospecialize(typ), id::Int) = Taint(typ, push!(BitSet(), id))
+function Base.:(==)(a::Taint, b::Taint)
+    return a.typ == b.typ && a.slots == b.slots
+end
+
+struct InterTaint
+    typ
+    slots::BitSet
+    function InterTaint(@nospecialize(typ), slots::BitSet)
+        if typ isa InterTaint
+            slots = typ.slots ∪ slots
+            typ = typ.typ
+        end
+        return new(typ, slots)
+    end
+end
+InterTaint(@nospecialize(typ), id::Int) = InterTaint(typ, push!(BitSet(), id))
+function Base.:(==)(a::InterTaint, b::InterTaint)
+    return a.typ == b.typ && a.slots == b.slots
+end
+
+const AnyTaint = Union{Taint, InterTaint}
+
+function Compiler.tmeet(𝕃::AnyTaintLattice, @nospecialize(v), @nospecialize(t::Type))
+    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
+    if isa(v, T)
+        v = v.typ
+    end
+    return tmeet(widenlattice(𝕃), v, t)
+end
+function Compiler.tmerge(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
+    r = tmerge_fast_path(𝕃, typea, typeb)
+    r !== nothing && return r
+    # type-lattice for Taint
+    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
+    if isa(typea, T)
+        if isa(typeb, T)
+            return T(
+                tmerge(widenlattice(𝕃), typea.typ, typeb.typ),
+                typea.slots ∪ typeb.slots)
+        else
+            typea = typea.typ
+        end
+    elseif isa(typeb, T)
+        typeb = typeb.typ
+    end
+    return tmerge(widenlattice(𝕃), typea, typeb)
+end
+function Compiler.:⊑(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
+    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
+    if isa(typea, T)
+        if isa(typeb, T)
+            typea.slots ⊆ typeb.slots || return false
+            return ⊑(widenlattice(𝕃), typea.typ, typeb.typ)
+        end
+        typea = typea.typ
+    elseif isa(typeb, T)
+        return false
+    end
+    return ⊑(widenlattice(𝕃), typea, typeb)
+end
+Compiler.widenconst(taint::AnyTaint) = widenconst(taint.typ)
+
+function Compiler.abstract_eval_special_value(interp::TaintInterpreter,
+    @nospecialize(e), sstate::Compiler.StatementState, sv::Compiler.InferenceState)
+    ret = @invoke Compiler.abstract_eval_special_value(interp::Compiler.AbstractInterpreter,
+        e::Any, sstate::Compiler.StatementState, sv::Compiler.InferenceState)
+    if isa(e, SlotNumber) || isa(e, Argument)
+        return Taint(ret, slot_id(e))
+    end
+    return ret
+end
+
+function Compiler.widenreturn(𝕃::InferenceLattice{<:InterTaintLattice}, @nospecialize(rt), @nospecialize(bestguess), nargs::Int, slottypes::Vector{Any}, changes::Compiler.VarTable)
+    if isa(rt, Taint)
+        return InterTaint(rt.typ, BitSet((id for id in rt.slots if id ≤ nargs)))
+    end
+    return Compiler.widenreturn(widenlattice(𝕃), rt, bestguess, nargs, slottypes, changes)
+end
+
+@test Compiler.tmerge(typeinf_lattice(TaintInterpreter()), Taint(Int, 1), Taint(Int, 2)) == Taint(Int, BitSet(1:2))
+
+# code_typed(ifelse, (Bool, Int, Int); interp=TaintInterpreter())
+
+# External lattice without `Conditional`
+
+import .Compiler:
+    AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice,
+    typeinf_lattice, ipo_lattice, optimizer_lattice
+
+@newinterp NonconditionalInterpreter
+Compiler.typeinf_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
+Compiler.ipo_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
+Compiler.optimizer_lattice(::NonconditionalInterpreter) = PartialsLattice(ConstsLattice())
+
+@test Base.return_types((Any,); interp=NonconditionalInterpreter()) do x
+    c = isa(x, Int) || isa(x, Float64)
+    if c
+        return x
+    else
+        return nothing
+    end
+end |> only === Any
+
+# CallInfo × inlining
+# ===================
+
+@newinterp NoinlineInterpreter
+noinline_modules(interp::NoinlineInterpreter) = interp.meta::Set{Module}
+
+import .Compiler: CallInfo
+
+struct NoinlineCallInfo <: CallInfo
+    info::CallInfo # wrapped call
+end
+Compiler.add_edges_impl(edges::Vector{Any}, info::NoinlineCallInfo) = Compiler.add_edges!(edges, info.info)
+Compiler.nsplit_impl(info::NoinlineCallInfo) = Compiler.nsplit(info.info)
+Compiler.getsplit_impl(info::NoinlineCallInfo, idx::Int) = Compiler.getsplit(info.info, idx)
+Compiler.getresult_impl(info::NoinlineCallInfo, idx::Int) = Compiler.getresult(info.info, idx)
+
+function Compiler.abstract_call(interp::NoinlineInterpreter,
+    arginfo::Compiler.ArgInfo, si::Compiler.StmtInfo, sv::Compiler.InferenceState, max_methods::Int)
+    ret = @invoke Compiler.abstract_call(interp::Compiler.AbstractInterpreter,
+        arginfo::Compiler.ArgInfo, si::Compiler.StmtInfo, sv::Compiler.InferenceState, max_methods::Int)
+    return Compiler.Future{Compiler.CallMeta}(ret, interp, sv) do ret, interp, sv
+        if sv.mod in noinline_modules(interp)
+            (;rt, exct, effects, info) = ret
+            return Compiler.CallMeta(rt, exct, effects, NoinlineCallInfo(info))
+        end
+        return ret
+    end
+end
+function Compiler.src_inlining_policy(interp::NoinlineInterpreter,
+    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
+    if isa(info, NoinlineCallInfo)
+        return false
+    end
+    return @invoke Compiler.src_inlining_policy(interp::Compiler.AbstractInterpreter,
+        src::Any, info::CallInfo, stmt_flag::UInt32)
+end
+
+@inline function inlined_usually(x, y, z)
+    return x * y + z
+end
+foo_split(x::Float64) = 1
+foo_split(x::Int) = 2
+
+# check if the inlining algorithm works as expected
+let src = code_typed1((Float64,Float64,Float64)) do x, y, z
+        inlined_usually(x, y, z)
+    end
+    @test count(isinvoke(:inlined_usually), src.code) == 0
+    @test count(iscall((src, inlined_usually)), src.code) == 0
+end
+let NoinlineModule = Module()
+    OtherModule = Module()
+    main_func(x, y, z) = inlined_usually(x, y, z)
+    @eval NoinlineModule noinline_func(x, y, z) = $inlined_usually(x, y, z)
+    @eval OtherModule other_func(x, y, z) = $inlined_usually(x, y, z)
+    @eval NoinlineModule bar_split_error() = $foo_split(Core.compilerbarrier(:type, nothing))
+
+    interp = NoinlineInterpreter(Set((NoinlineModule,)))
+
+    # this anonymous function's context is Main -- it should be inlined as usual
+    let src = code_typed1(main_func, (Float64,Float64,Float64); interp)
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # it should work for cached results
+    method = only(methods(inlined_usually, (Float64,Float64,Float64,)))
+    mi = Compiler.specialize_method(method, Tuple{typeof(inlined_usually),Float64,Float64,Float64}, Core.svec())
+    @test Compiler.haskey(Compiler.code_cache(interp), mi)
+    let src = code_typed1(main_func, (Float64,Float64,Float64); interp)
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # now the context module is `NoinlineModule` -- it should not be inlined
+    let src = code_typed1(NoinlineModule.noinline_func, (Float64,Float64,Float64); interp)
+        @test count(isinvoke(:inlined_usually), src.code) == 1
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # the context module is totally irrelevant -- it should be inlined as usual
+    let src = code_typed1(OtherModule.other_func, (Float64,Float64,Float64); interp)
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    let src = code_typed1(NoinlineModule.bar_split_error)
+        @test count(iscall((src, foo_split)), src.code) == 0
+        @test count(iscall((src, Core.throw_methoderror)), src.code) > 0
+    end
+end
+
+# custom inferred data
+# ====================
+
+@newinterp CustomDataInterp
+struct CustomDataInterpToken end
+Compiler.cache_owner(::CustomDataInterp) = CustomDataInterpToken()
+struct CustomData
+    inferred
+    CustomData(@nospecialize inferred) = new(inferred)
+end
+function Compiler.transform_result_for_cache(
+    interp::CustomDataInterp, result::Compiler.InferenceResult, edges::Core.SimpleVector)
+    inferred_result = @invoke Compiler.transform_result_for_cache(
+        interp::Compiler.AbstractInterpreter, result::Compiler.InferenceResult, edges::Core.SimpleVector)
+    return CustomData(inferred_result)
+end
+function Compiler.src_inlining_policy(
+    interp::CustomDataInterp, @nospecialize(src), @nospecialize(info::Compiler.CallInfo),
+    stmt_flag::UInt32)
+    if src isa CustomData
+        src = src.inferred
+    end
+    return @invoke Compiler.src_inlining_policy(
+        interp::Compiler.AbstractInterpreter, src::Any, info::Compiler.CallInfo,
+        stmt_flag::UInt32)
+end
+Compiler.retrieve_ir_for_inlining(cached_result::CodeInstance, src::CustomData) =
+    Compiler.retrieve_ir_for_inlining(cached_result, src.inferred)
+Compiler.retrieve_ir_for_inlining(mi::MethodInstance, src::CustomData, preserve_local_sources::Bool) =
+    Compiler.retrieve_ir_for_inlining(mi, src.inferred, preserve_local_sources)
+let src = code_typed((Int,); interp=CustomDataInterp()) do x
+        return (@noinline sin(x)) + (@noinline cos(x))
+    end |> only |> first
+    @test count(isinvoke(:sin), src.code) == 1
+    @test count(isinvoke(:cos), src.code) == 1
+    @test_broken count(isinvoke(:+), src.code) == 0
+end
+
+# ephemeral cache mode
+@newinterp DebugInterp #=ephemeral_cache=#true
+func_ext_cache1(a) = func_ext_cache2(a) * cos(a)
+func_ext_cache2(a) = sin(a)
+let interp = DebugInterp()
+    @test Base.infer_return_type(func_ext_cache1, (Float64,); interp) === Float64
+    @test isdefined(interp, :global_cache)
+    found = false
+    for (mi, codeinst) in interp.global_cache.dict
+        if mi.def.name === :func_ext_cache2
+            found = true
+            break
+        end
+    end
+    @test found
+end
+
+@newinterp InvokeInterp
+struct InvokeOwner end
+global codegen::IdDict{CodeInstance, CodeInfo} = IdDict{CodeInstance, CodeInfo}()
+Compiler.cache_owner(::InvokeInterp) = InvokeOwner()
+Compiler.codegen_cache(::InvokeInterp) = codegen
+let interp = InvokeInterp()
+    source_mode = Compiler.SOURCE_MODE_ABI
+    f = (+)
+    args = (1, 1)
+    mi = @ccall jl_method_lookup(Any[f, args...]::Ptr{Any}, (1+length(args))::Csize_t, Base.tls_world_age()::Csize_t)::Ref{Core.MethodInstance}
+    ci = Compiler.typeinf_ext_toplevel(interp, mi, source_mode)
+    @test invoke(f, ci, args...) == 2
+
+    f = error
+    args = "test"
+    mi = @ccall jl_method_lookup(Any[f, args...]::Ptr{Any}, (1+length(args))::Csize_t, Base.tls_world_age()::Csize_t)::Ref{Core.MethodInstance}
+    ci = Compiler.typeinf_ext_toplevel(interp, mi, source_mode)
+    result = nothing
+    try
+        invoke(f, ci, args...)
+    catch e
+        result = sprint(Base.show_backtrace, catch_backtrace())
+    end
+    @test isa(result, String)
+    @test contains(result, "[1] error(::Char, ::Char, ::Char, ::Char)")
+end
diff --git a/Compiler/test/CompilerLoadingTest/Manifest.toml b/Compiler/test/CompilerLoadingTest/Manifest.toml
new file mode 100644
index 0000000000000..7fb3452a61017
--- /dev/null
+++ b/Compiler/test/CompilerLoadingTest/Manifest.toml
@@ -0,0 +1,16 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "10c2816629fed766649b89eb6670e7001df6ea18"
+
+[[deps.Compiler]]
+path = "../.."
+uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
+version = "0.0.1"
+
+[[deps.CompilerLoadingTest]]
+deps = ["Compiler"]
+path = "."
+uuid = "95defb8a-f82d-44d7-b2c9-37d658f648c1"
+version = "0.0.0"
diff --git a/Compiler/test/CompilerLoadingTest/Project.toml b/Compiler/test/CompilerLoadingTest/Project.toml
new file mode 100644
index 0000000000000..5dca932dc7997
--- /dev/null
+++ b/Compiler/test/CompilerLoadingTest/Project.toml
@@ -0,0 +1,5 @@
+name = "CompilerLoadingTest"
+uuid = "95defb8a-f82d-44d7-b2c9-37d658f648c1"
+
+[deps]
+Compiler = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
diff --git a/Compiler/test/CompilerLoadingTest/compiler_loading_test.jl b/Compiler/test/CompilerLoadingTest/compiler_loading_test.jl
new file mode 100644
index 0000000000000..a09f7751912b8
--- /dev/null
+++ b/Compiler/test/CompilerLoadingTest/compiler_loading_test.jl
@@ -0,0 +1,12 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, UUIDs
+
+# This file is loaded as part of special_loading.jl
+Base.compilecache(Base.PkgId(UUID(0x95defb8a_f82d_44d7_b2c9_37d658f648c1), "CompilerLoadingTest"))
+
+using CompilerLoadingTest
+@test Base.maybe_loaded_precompile(Base.PkgId(UUID(0x807dbc54_b67e_4c79_8afb_eafe4df6f2e1), "Compiler"), Base.module_build_id(Base.Compiler)) !== nothing
+
+using Compiler
+@test CompilerLoadingTest.Compiler === Compiler === Base.Compiler
diff --git a/stdlib/LinearAlgebra/test/runtests.jl b/Compiler/test/CompilerLoadingTest/src/CompilerLoadingTest.jl
similarity index 50%
rename from stdlib/LinearAlgebra/test/runtests.jl
rename to Compiler/test/CompilerLoadingTest/src/CompilerLoadingTest.jl
index 29581313c18d5..61f8417a23251 100644
--- a/stdlib/LinearAlgebra/test/runtests.jl
+++ b/Compiler/test/CompilerLoadingTest/src/CompilerLoadingTest.jl
@@ -1,5 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-for file in readlines(joinpath(@__DIR__, "testgroups"))
-    include(file * ".jl")
+module CompilerLoadingTest
+    using Compiler
 end
diff --git a/Compiler/test/EAUtils.jl b/Compiler/test/EAUtils.jl
new file mode 100644
index 0000000000000..5d7dc01b2a9ef
--- /dev/null
+++ b/Compiler/test/EAUtils.jl
@@ -0,0 +1,359 @@
+module EAUtils
+
+export code_escapes, @code_escapes, __clear_cache!
+
+include("setup_Compiler.jl")
+
+using .Compiler: EscapeAnalysis as EA
+
+# AbstractInterpreter
+# -------------------
+
+# imports
+import .Compiler:
+    AbstractInterpreter, InferenceParams, OptimizationParams,
+    get_world_counter, get_inference_cache, ipo_dataflow_analysis!
+# usings
+using Core.IR
+using .Compiler: InferenceResult, InferenceState, OptimizationState, IRCode
+using .EA: analyze_escapes, ArgEscapeCache, ArgEscapeInfo, EscapeInfo, EscapeState
+
+mutable struct EscapeAnalyzerCacheToken end
+global GLOBAL_EA_CACHE_TOKEN::EscapeAnalyzerCacheToken = EscapeAnalyzerCacheToken()
+
+struct EscapeResultForEntry
+    ir::IRCode
+    estate::EscapeState
+    mi::MethodInstance
+end
+
+mutable struct EscapeAnalyzer <: AbstractInterpreter
+    const world::UInt
+    const inf_params::InferenceParams
+    const opt_params::OptimizationParams
+    const inf_cache::Vector{InferenceResult}
+    const token::EscapeAnalyzerCacheToken
+    const entry_mi::Union{Nothing,MethodInstance}
+    result::EscapeResultForEntry
+    function EscapeAnalyzer(world::UInt, cache_token::EscapeAnalyzerCacheToken;
+                            entry_mi::Union{Nothing,MethodInstance}=nothing)
+        inf_params = InferenceParams()
+        opt_params = OptimizationParams()
+        inf_cache = InferenceResult[]
+        return new(world, inf_params, opt_params, inf_cache, cache_token, entry_mi)
+    end
+end
+
+Compiler.InferenceParams(interp::EscapeAnalyzer) = interp.inf_params
+Compiler.OptimizationParams(interp::EscapeAnalyzer) = interp.opt_params
+Compiler.get_inference_world(interp::EscapeAnalyzer) = interp.world
+Compiler.get_inference_cache(interp::EscapeAnalyzer) = interp.inf_cache
+Compiler.cache_owner(interp::EscapeAnalyzer) = interp.token
+Compiler.get_escape_cache(::EscapeAnalyzer) = GetEscapeCache()
+
+function Compiler.ipo_dataflow_analysis!(interp::EscapeAnalyzer, opt::OptimizationState,
+                                         ir::IRCode, caller::InferenceResult)
+    # run EA on all frames that have been optimized
+    nargs = Int(opt.src.nargs)
+    𝕃ₒ = Compiler.optimizer_lattice(interp)
+    estate = try
+        analyze_escapes(ir, nargs, 𝕃ₒ, GetEscapeCache())
+    catch err
+        @error "error happened within EA, inspect `Main.failedanalysis`"
+        failedanalysis = FailedAnalysis(caller, ir, nargs)
+        Core.eval(Main, :(failedanalysis = $failedanalysis))
+        rethrow(err)
+    end
+    if caller.linfo === interp.entry_mi
+        # return back the result
+        interp.result = EscapeResultForEntry(Compiler.copy(ir), estate, caller.linfo)
+    end
+    record_escapes!(caller, estate, ir)
+
+    @invoke Compiler.ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
+                                            ir::IRCode, caller::InferenceResult)
+end
+
+# cache entire escape state for inspection and debugging
+struct EscapeCacheInfo
+    argescapes::ArgEscapeCache
+    state::EscapeState # preserved just for debugging purpose
+    ir::IRCode         # preserved just for debugging purpose
+end
+
+function record_escapes!(caller::InferenceResult, estate::EscapeState, ir::IRCode)
+    argescapes = ArgEscapeCache(estate)
+    ecacheinfo = EscapeCacheInfo(argescapes, estate, ir)
+    return Compiler.stack_analysis_result!(caller, ecacheinfo)
+end
+
+struct GetEscapeCache end
+function (::GetEscapeCache)(codeinst::Union{CodeInstance,MethodInstance})
+    codeinst isa CodeInstance || return false
+    ecacheinfo = Compiler.traverse_analysis_results(codeinst) do @nospecialize result
+        return result isa EscapeCacheInfo ? result : nothing
+    end
+    return ecacheinfo === nothing ? false : ecacheinfo.argescapes
+end
+
+struct FailedAnalysis
+    caller::InferenceResult
+    ir::IRCode
+    nargs::Int
+end
+
+# printing
+# --------
+
+using Core: Argument, SSAValue
+using .Compiler: widenconst, singleton_type
+
+function get_name_color(x::EscapeInfo, symbol::Bool = false)
+    getname(x) = string(nameof(x))
+    if x === EA.⊥
+        name, color = (getname(EA.NotAnalyzed), "◌"), :plain
+    elseif EA.has_no_escape(EA.ignore_argescape(x))
+        if EA.has_arg_escape(x)
+            name, color = (getname(EA.ArgEscape), "✓"), :cyan
+        else
+            name, color = (getname(EA.NoEscape), "✓"), :green
+        end
+    elseif EA.has_all_escape(x)
+        name, color = (getname(EA.AllEscape), "X"), :red
+    elseif EA.has_return_escape(x)
+        name = (getname(EA.ReturnEscape), "↑")
+        color = EA.has_thrown_escape(x) ? :yellow : :blue
+    else
+        name = (nothing, "*")
+        color = EA.has_thrown_escape(x) ? :yellow : :bold
+    end
+    name = symbol ? last(name) : first(name)
+    if name !== nothing && !isa(x.AliasInfo, Bool)
+        name = string(name, "′")
+    end
+    return name, color
+end
+
+# pcs = sprint(show, collect(x.EscapeSites); context=:limit=>true)
+function Base.show(io::IO, x::EscapeInfo)
+    name, color = get_name_color(x)
+    if isnothing(name)
+        @invoke show(io::IO, x::Any)
+    else
+        printstyled(io, name; color)
+    end
+end
+
+function get_sym_color(x::ArgEscapeInfo)
+    escape_bits = x.escape_bits
+    if escape_bits == EA.ARG_ALL_ESCAPE
+        color, sym = :red, "X"
+    elseif escape_bits == 0x00
+        color, sym = :green, "✓"
+    else
+        color, sym = :bold, "*"
+        if !iszero(escape_bits & EA.ARG_RETURN_ESCAPE)
+            color, sym = :blue, "↑"
+        end
+        if !iszero(escape_bits & EA.ARG_THROWN_ESCAPE)
+            color = :yellow
+        end
+    end
+    return sym, color
+end
+
+function Base.show(io::IO, x::ArgEscapeInfo)
+    escape_bits = x.escape_bits
+    if escape_bits == EA.ARG_ALL_ESCAPE
+        color, sym = :red, "X"
+    elseif escape_bits == 0x00
+        color, sym = :green, "✓"
+    else
+        color, sym = :bold, "*"
+        if !iszero(escape_bits & EA.ARG_RETURN_ESCAPE)
+            color, sym = :blue, "↑"
+        end
+        if !iszero(escape_bits & EA.ARG_THROWN_ESCAPE)
+            color = :yellow
+        end
+    end
+    printstyled(io, "ArgEscapeInfo(", sym, ")"; color)
+end
+
+struct EscapeResult
+    ir::IRCode
+    state::EscapeState
+    mi::Union{Nothing,MethodInstance}
+    slotnames::Union{Nothing,Vector{Symbol}}
+    source::Bool
+    interp::Union{Nothing,EscapeAnalyzer}
+    function EscapeResult(ir::IRCode, state::EscapeState,
+                          mi::Union{Nothing,MethodInstance}=nothing,
+                          slotnames::Union{Nothing,Vector{Symbol}}=nothing,
+                          source::Bool=false,
+                          interp::Union{Nothing,EscapeAnalyzer}=nothing)
+        return new(ir, state, mi, slotnames, source, interp)
+    end
+end
+Base.show(io::IO, result::EscapeResult) = print_with_info(io, result)
+@eval Base.iterate(res::EscapeResult, state=1) =
+    return state > $(fieldcount(EscapeResult)) ? nothing : (getfield(res, state), state+1)
+
+Base.show(io::IO, ecacheinfo::EscapeCacheInfo) = show(io, EscapeResult(ecacheinfo.ir, ecacheinfo.state))
+
+# adapted from https://github.com/JuliaDebug/LoweredCodeUtils.jl/blob/4612349432447e868cf9285f647108f43bd0a11c/src/codeedges.jl#L881-L897
+function print_with_info(io::IO, result::EscapeResult)
+    (; ir, state, mi, slotnames, source) = result
+    # print escape information on SSA values
+    function preprint(io::IO)
+        ft = ir.argtypes[1]
+        f = singleton_type(ft)
+        if f === nothing
+            f = widenconst(ft)
+        end
+        print(io, f, '(')
+        for i in 1:state.nargs
+            arg = state[Argument(i)]
+            i == 1 && continue
+            c, color = get_name_color(arg, true)
+            slot = isnothing(slotnames) ? "_$i" : slotnames[i]
+            printstyled(io, c, ' ', slot, "::", ir.argtypes[i]; color)
+            i ≠ state.nargs && print(io, ", ")
+        end
+        print(io, ')')
+        if !isnothing(mi)
+            def = mi.def
+            printstyled(io, " in ", (isa(def, Module) ? (def,) : (def.module, " at ", def.file, ':', def.line))...; color=:bold)
+        end
+        println(io)
+    end
+
+    # print escape information on SSA values
+    # nd = ndigits(length(ssavalues))
+    function preprint(io::IO, idx::Int)
+        c, color = get_name_color(state[SSAValue(idx)], true)
+        # printstyled(io, lpad(idx, nd), ' ', c, ' '; color)
+        printstyled(io, rpad(c, 2), ' '; color)
+    end
+
+    print_with_info(preprint, (args...)->nothing, io, ir, source)
+end
+
+function print_with_info(preprint, postprint, io::IO, ir::IRCode, source::Bool)
+    io = IOContext(io, :displaysize=>displaysize(io))
+    used = Compiler.IRShow.stmts_used(io, ir)
+    if source
+        line_info_preprinter = function (io::IO, indent::String, idx::Int)
+            r = Compiler.IRShow.inline_linfo_printer(ir)(io, indent, idx)
+            idx ≠ 0 && preprint(io, idx)
+            return r
+        end
+    else
+        line_info_preprinter = Compiler.IRShow.lineinfo_disabled
+    end
+    line_info_postprinter = Compiler.IRShow.default_expr_type_printer
+    preprint(io)
+    bb_idx_prev = bb_idx = 1
+    for idx = 1:length(ir.stmts)
+        preprint(io, idx)
+        bb_idx = Compiler.IRShow.show_ir_stmt(io, ir, idx, line_info_preprinter, line_info_postprinter, ir.sptypes, used, ir.cfg, bb_idx)
+        postprint(io, idx, bb_idx != bb_idx_prev)
+        bb_idx_prev = bb_idx
+    end
+    max_bb_idx_size = ndigits(length(ir.cfg.blocks))
+    line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
+    postprint(io)
+    return nothing
+end
+
+# entries
+# -------
+
+using InteractiveUtils: gen_call_with_extracted_types_and_kwargs
+
+"""
+    @code_escapes [options...] f(args...)
+
+Evaluates the arguments to the function call, determines its types, and then calls
+[`code_escapes`](@ref) on the resulting expression.
+As with `@code_typed` and its family, any of `code_escapes` keyword arguments can be given
+as the optional arguments like `@code_escapes optimize=false myfunc(myargs...)`.
+"""
+macro code_escapes(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :code_escapes, ex0)
+end
+
+"""
+    code_escapes(f, argtypes=Tuple{}; [world::UInt], [debuginfo::Symbol]) -> result::EscapeResult
+    code_escapes(mi::MethodInstance; [world::UInt], [interp::EscapeAnalyzer], [debuginfo::Symbol]) -> result::EscapeResult
+
+Runs the escape analysis on optimized IR of a generic function call with the given type signature,
+while caching the analysis results.
+
+# Keyword Arguments
+
+- `world::UInt = Base.get_world_counter()`:
+  controls the world age to use when looking up methods, use current world age if not specified.
+- `cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN`:
+  specifies the cache token to use, by default a global token is used so that the analysis
+  can use the caches from previous invocations. If you with to use a fresh cache and perform
+  a new analysis, specify a new `EscapeAnalyzerCacheToken` instance.
+- `interp::EscapeAnalyzer = EscapeAnalyzer(world, cache_token)`:
+  specifies the escape analyzer to use.
+- `debuginfo::Symbol = :none`:
+  controls the amount of code metadata present in the output, possible options are `:none` or `:source`.
+"""
+function code_escapes(@nospecialize(f), @nospecialize(types=Base.default_tt(f));
+                      world::UInt = get_world_counter(),
+                      cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN,
+                      debuginfo::Symbol = :none)
+    tt = Base.signature_type(f, types)
+    match = Base._which(tt; world, raise=true)
+    mi = Compiler.specialize_method(match)
+    return code_escapes(mi; world, cache_token, debuginfo)
+end
+
+function code_escapes(mi::MethodInstance;
+                      world::UInt = get_world_counter(),
+                      cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN,
+                      interp::EscapeAnalyzer=EscapeAnalyzer(world, cache_token; entry_mi=mi),
+                      debuginfo::Symbol = :none)
+    frame = Compiler.typeinf_frame(interp, mi, #=run_optimizer=#true)
+    isdefined(interp, :result) || error("optimization didn't happen: maybe everything has been constant folded?")
+    slotnames = let src = frame.src
+        src isa CodeInfo ? src.slotnames : nothing
+    end
+    return EscapeResult(interp.result.ir, interp.result.estate, interp.result.mi,
+                        slotnames, debuginfo === :source, interp)
+end
+
+"""
+    code_escapes(ir::IRCode, nargs::Int; [world::UInt], [interp::AbstractInterpreter]) -> result::EscapeResult
+
+Runs the escape analysis on `ir::IRCode`.
+`ir` is supposed to be optimized already, specifically after inlining has been applied.
+Note that this version does not cache the analysis results.
+
+# Keyword Arguments
+
+- `world::UInt = Base.get_world_counter()`:
+  controls the world age to use when looking up methods, use current world age if not specified.
+- `cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN`:
+  specifies the cache token to use, by default a global token is used so that the analysis
+  can use the caches from previous invocations. If you with to use a fresh cache and perform
+  a new analysis, specify a new `EscapeAnalyzerCacheToken` instance.
+- `interp::AbstractInterpreter = EscapeAnalyzer(world, cache_token)`:
+  specifies the abstract interpreter to use, by default a new `EscapeAnalyzer` with an empty cache is created.
+"""
+function code_escapes(ir::IRCode, nargs::Int;
+                      world::UInt = get_world_counter(),
+                      cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN,
+                      interp::AbstractInterpreter=EscapeAnalyzer(world, cache_token))
+    estate = analyze_escapes(ir, nargs, Compiler.optimizer_lattice(interp), Compiler.get_escape_cache(interp))
+    return EscapeResult(ir, estate) # return back the result
+end
+
+# in order to run a whole analysis from ground zero (e.g. for benchmarking, etc.)
+__clear_cache!() = empty!(GLOBAL_EA_CODE_CACHE)
+
+end # module EAUtils
diff --git a/Compiler/test/EscapeAnalysis.jl b/Compiler/test/EscapeAnalysis.jl
new file mode 100644
index 0000000000000..07855d3362881
--- /dev/null
+++ b/Compiler/test/EscapeAnalysis.jl
@@ -0,0 +1,1712 @@
+module test_EA
+
+include("setup_Compiler.jl")
+include("irutils.jl")
+
+const EscapeAnalysis = Compiler.EscapeAnalysis
+
+include("EAUtils.jl")
+
+using Test, .EscapeAnalysis, .EAUtils
+using .EscapeAnalysis: ignore_argescape
+
+let utils_ex = quote
+        mutable struct SafeRef{T}
+            x::T
+        end
+        Base.getindex(s::SafeRef) = getfield(s, 1)
+        Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
+
+        mutable struct SafeRefs{S,T}
+            x1::S
+            x2::T
+        end
+        Base.getindex(s::SafeRefs, idx::Int) = getfield(s, idx)
+        Base.setindex!(s::SafeRefs, x, idx::Int) = setfield!(s, idx, x)
+
+        global GV::Any
+        const global GR = Ref{Any}()
+    end
+    global function EATModule(utils_ex = utils_ex)
+        M = Module()
+        Core.eval(M, utils_ex)
+        return M
+    end
+    Core.eval(@__MODULE__, utils_ex)
+end
+
+using .EscapeAnalysis: EscapeInfo, IndexableFields
+
+isϕ(@nospecialize x) = isa(x, Core.PhiNode)
+"""
+    is_load_forwardable(x::EscapeInfo) -> Bool
+
+Queries if `x` is elibigle for store-to-load forwarding optimization.
+"""
+function is_load_forwardable(x::EscapeInfo)
+    AliasInfo = x.AliasInfo
+    # NOTE technically we also need to check `!has_thrown_escape(x)` here as well,
+    # but we can also do equivalent check during forwarding
+    return isa(AliasInfo, IndexableFields)
+end
+
+@testset "EAUtils" begin
+    @test_throws "everything has been constant folded" code_escapes() do; sin(42); end
+    @test code_escapes(sin, (Int,)) isa EAUtils.EscapeResult
+    @test code_escapes(sin, (Int,)) isa EAUtils.EscapeResult
+end
+
+@testset "basics" begin
+    let # arg return
+        result = code_escapes((Any,)) do a # return to caller
+            println("prevent ConstABI")
+            return nothing
+        end
+        @test has_arg_escape(result.state[Argument(2)])
+        # return
+        result = code_escapes((Any,)) do a
+            println("prevent ConstABI")
+            return a
+        end
+        i = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_arg_escape(result.state[Argument(1)]) # self
+        @test !has_return_escape(result.state[Argument(1)], i) # self
+        @test has_arg_escape(result.state[Argument(2)]) # a
+        @test has_return_escape(result.state[Argument(2)], i) # a
+    end
+    let # global store
+        result = code_escapes((Any,)) do a
+            global GV = a
+            nothing
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let # global load
+        result = code_escapes() do
+            global GV
+            return GV
+        end
+        i = only(findall(has_return_escape, map(i->result.state[SSAValue(i)], 1:length(result.ir.stmts))))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # global store / load (https://github.com/aviatesk/EscapeAnalysis.jl/issues/56)
+        result = code_escapes((Any,)) do s
+            global GV
+            GV = s
+            return GV
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+    end
+    let # :gc_preserve_begin / :gc_preserve_end
+        result = code_escapes((String,)) do s
+            m = SafeRef(s)
+            GC.@preserve m begin
+                println(s)
+                return nothing
+            end
+        end
+        i = findfirst(==(SafeRef{String}), result.ir.stmts.type) # find allocation statement
+        @test !isnothing(i)
+        @test has_no_escape(result.state[SSAValue(i)])
+    end
+    let # :isdefined
+        result = code_escapes((String, Bool,)) do a, b
+            if b
+                s = Ref(a)
+            end
+            return @isdefined(s)
+        end
+        i = findfirst(==(Base.RefValue{String}), result.ir.stmts.type) # find allocation statement
+        @test isnothing(i) || has_no_escape(result.state[SSAValue(i)])
+    end
+    let # ϕ-node
+        result = code_escapes((Bool,Any,Any)) do cond, a, b
+            c = cond ? a : b # ϕ(a, b)
+            return c
+        end
+        @assert any(@nospecialize(x)->isa(x, Core.PhiNode), result.ir.stmts.stmt)
+        i = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], i) # a
+        @test has_return_escape(result.state[Argument(4)], i) # b
+    end
+    let # π-node
+        result = code_escapes((Any,)) do a
+            if isa(a, Regex) # a::π(Regex)
+                return a
+            end
+            return nothing
+        end
+        @assert any(@nospecialize(x)->isa(x, Core.PiNode), result.ir.stmts.stmt)
+        @test any(findall(isreturn, result.ir.stmts.stmt)) do i
+            has_return_escape(result.state[Argument(2)], i)
+        end
+    end
+    let # φᶜ-node / ϒ-node
+        result = code_escapes((Any,String)) do a, b
+            local x::String
+            try
+                x = a
+            catch err
+                x = b
+            end
+            return x
+        end
+        @assert any(@nospecialize(x)->isa(x, Core.PhiCNode), result.ir.stmts.stmt)
+        @assert any(@nospecialize(x)->isa(x, Core.UpsilonNode), result.ir.stmts.stmt)
+        i = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], i)
+        @test has_return_escape(result.state[Argument(3)], i)
+    end
+    let # branching
+        result = code_escapes((Any,Bool,)) do a, c
+            if c
+                return nothing # a doesn't escape in this branch
+            else
+                return a # a escapes to a caller
+            end
+        end
+        @test has_return_escape(result.state[Argument(2)])
+    end
+    let # loop
+        result = code_escapes((Int,)) do n
+            c = SafeRef{Bool}(false)
+            while n > 0
+                rand(Bool) && return c
+            end
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)])
+    end
+    let # try/catch
+        result = code_escapes((Any,)) do a
+            try
+                println("prevent ConstABI")
+                nothing
+            catch err
+                return a # return escape
+            end
+        end
+        @test has_return_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            try
+                println("prevent ConstABI")
+                nothing
+            finally
+                return a # return escape
+            end
+        end
+        @test has_return_escape(result.state[Argument(2)])
+    end
+    let # :foreigncall
+        result = code_escapes((Any,)) do x
+            ccall(:some_ccall, Any, (Any,), x)
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+end
+
+@testset "builtins" begin
+    let # throw
+        r = code_escapes((Any,)) do a
+            throw(a)
+        end
+        @test has_thrown_escape(r.state[Argument(2)])
+    end
+
+    let # implicit throws
+        r = code_escapes((Any,)) do a
+            getfield(a, :may_not_field)
+        end
+        @test has_thrown_escape(r.state[Argument(2)])
+
+        r = code_escapes((Any,)) do a
+            sizeof(a)
+        end
+        @test has_thrown_escape(r.state[Argument(2)])
+    end
+
+    let # :===
+        result = code_escapes((Bool, SafeRef{String})) do cond, s
+            m = cond ? s : nothing
+            c = m === nothing
+            return c
+        end
+        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+    end
+
+    let # sizeof
+        result = code_escapes((Vector{Any},)) do xs
+            sizeof(xs)
+        end
+        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+    end
+
+    let # ifelse
+        result = code_escapes((Bool,)) do c
+            r = ifelse(c, Ref("yes"), Ref("no"))
+            return r
+        end
+        inds = findall(isnew, result.ir.stmts.stmt)
+        @assert !isempty(inds)
+        for i in inds
+            @test has_return_escape(result.state[SSAValue(i)])
+        end
+    end
+    let # ifelse (with constant condition)
+        result = code_escapes() do
+            r = ifelse(true, Ref("yes"), Ref(nothing))
+            return r
+        end
+        for i in 1:length(result.ir.stmts)
+            if isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Base.RefValue{String}
+                @test has_return_escape(result.state[SSAValue(i)])
+            elseif isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Base.RefValue{Nothing}
+                @test has_no_escape(result.state[SSAValue(i)])
+            end
+        end
+    end
+
+    let # typeassert
+        result = code_escapes((Any,)) do x
+            y = x::Base.RefValue{Any}
+            return y
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test !has_all_escape(result.state[Argument(2)])
+    end
+
+    let # isdefined
+        result = code_escapes((Any,)) do x
+            isdefined(x, :foo) ? x : throw("undefined")
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test !has_all_escape(result.state[Argument(2)])
+    end
+end
+
+@testset "flow-sensitivity" begin
+    # ReturnEscape
+    let result = code_escapes((Bool,)) do cond
+            r = Ref("foo")
+            if cond
+                return cond
+            end
+            return r
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        rts = findall(isreturn, result.ir.stmts.stmt)
+        @assert length(rts) == 2
+        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 1
+    end
+    let result = code_escapes((Bool,)) do cond
+            r = Ref("foo")
+            cnt = 0
+            while rand(Bool)
+                cnt += 1
+                rand(Bool) && return r
+            end
+            rand(Bool) && return r
+            return cnt
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        rts = findall(isreturn, result.ir.stmts.stmt) # return statement
+        @assert length(rts) == 3
+        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 2
+    end
+end
+
+@testset "escape through exceptions" begin
+    M = @eval Module() begin
+        unsafeget(x) = isassigned(x) ? x[] : throw(x)
+        @noinline function escape_rethrow!()
+            try
+                rethrow()
+            catch err
+                GR[] = err
+            end
+        end
+        @noinline function escape_current_exceptions!()
+            excs = Base.current_exceptions()
+            GR[] = excs
+        end
+        const GR = Ref{Any}()
+        @__MODULE__
+    end
+
+    let # simple: return escape
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err
+                ret = err
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)])
+    end
+
+    let # simple: global escape
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret # prevent DCE
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err
+                global GV = err
+            end
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+
+    let # account for possible escapes via nested throws
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            try
+                try
+                    unsafeget(r)
+                catch err1
+                    throw(err1)
+                end
+            catch err2
+                GR[] = err2
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `rethrow`
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            try
+                try
+                    unsafeget(r)
+                catch err1
+                    rethrow(err1)
+                end
+            catch err2
+                GR[] = err2
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `rethrow`
+        result = @eval M $code_escapes() do
+            try
+                r = Ref{String}()
+                unsafeget(r)
+            catch
+                escape_rethrow!()
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `rethrow`
+        result = @eval M $code_escapes() do
+            local t
+            try
+                r = Ref{String}()
+                t = unsafeget(r)
+            catch err
+                t = typeof(err)
+                escape_rethrow!()
+            end
+            return t
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `Base.current_exceptions`
+        result = @eval M $code_escapes() do
+            try
+                r = Ref{String}()
+                unsafeget(r)
+            catch
+                GR[] = Base.current_exceptions()
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `Base.current_exceptions`
+        result = @eval M $code_escapes() do
+            try
+                r = Ref{String}()
+                unsafeget(r)
+            catch
+                escape_current_exceptions!()
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+
+    let # contextual: escape information imposed on `err` shouldn't propagate to `r2`, but only to `r1`
+        result = @eval M $code_escapes() do
+            r1 = Ref{String}()
+            r2 = Ref{String}()
+            local ret
+            try
+                s1 = unsafeget(r1)
+                ret = sizeof(s1)
+            catch err
+                global GV = err
+            end
+            s2 = unsafeget(r2)
+            return s2, r2
+        end
+        is = findall(isnew, result.ir.stmts.stmt)
+        @test length(is) == 2
+        i1, i2 = is
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i1)])
+        @test !has_all_escape(result.state[SSAValue(i2)])
+        @test has_return_escape(result.state[SSAValue(i2)], r)
+    end
+
+    # XXX test cases below are currently broken because of the technical reason described in `escape_exception!`
+
+    let # limited propagation: exception is caught within a frame => doesn't escape to a caller
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch
+                ret = nothing
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[SSAValue(i)], r) # TODO? see `escape_exception!`
+    end
+    let # sequential: escape information imposed on `err1` and `err2 should propagate separately
+        result = @eval M $code_escapes() do
+            r1 = Ref{String}()
+            r2 = Ref{String}()
+            local ret
+            try
+                s1 = unsafeget(r1)
+                ret = sizeof(s1)
+            catch err1
+                global GV = err1
+            end
+            try
+                s2 = unsafeget(r2)
+                ret = sizeof(s2)
+            catch err2
+                ret = err2
+            end
+            return ret
+        end
+        is = findall(isnew, result.ir.stmts.stmt)
+        @test length(is) == 2
+        i1, i2 = is
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i1)])
+        @test has_return_escape(result.state[SSAValue(i2)], r)
+        @test_broken !has_all_escape(result.state[SSAValue(i2)]) # TODO? see `escape_exception!`
+    end
+    let # nested: escape information imposed on `inner` shouldn't propagate to `s`
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                try
+                    ret = sizeof(s)
+                catch inner
+                    return inner
+                end
+            catch outer
+                ret = nothing
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[SSAValue(i)])
+    end
+    let # merge: escape information imposed on `err1` and `err2 should be merged
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err1
+                return err1
+            end
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err2
+                return err2
+            end
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        rs = findall(isreturn, result.ir.stmts.stmt)
+        @test_broken !has_all_escape(result.state[SSAValue(i)])
+        for r in rs
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let # no exception handling: should keep propagating the escape
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            finally
+                if !@isdefined(ret)
+                    ret = 42
+                end
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
+    end
+end
+
+@testset "field analysis / alias analysis" begin
+    # escaped allocations
+    # -------------------
+
+    # escaped object should escape its fields as well
+    let result = code_escapes((Any,)) do a
+            global GV = SafeRef{Any}(a)
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            global GV = (a,)
+            nothing
+        end
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            o0 = SafeRef{Any}(a)
+            global GV = SafeRef(o0)
+            nothing
+        end
+        is = findall(isnew, result.ir.stmts.stmt)
+        @test length(is) == 2
+        i0, i1 = is
+        @test has_all_escape(result.state[SSAValue(i0)])
+        @test has_all_escape(result.state[SSAValue(i1)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            t0 = (a,)
+            global GV = (t0,)
+            nothing
+        end
+        inds = findall(iscall((result.ir, tuple)), result.ir.stmts.stmt)
+        @assert length(inds) == 2
+        for i in inds; @test has_all_escape(result.state[SSAValue(i)]); end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    # global escape through `setfield!`
+    let result = code_escapes((Any,)) do a
+            r = SafeRef{Any}(:init)
+            global GV = r
+            r[] = a
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,Any)) do a, b
+            r = SafeRef{Any}(a)
+            global GV = r
+            r[] = b
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)]) # a
+        @test has_all_escape(result.state[Argument(3)]) # b
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef(Ref(""))
+            $code_escapes((Base.RefValue{String},)) do s
+                Rx[] = s
+                Core.sizeof(Rx[])
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef{Any}(nothing)
+            $code_escapes((Base.RefValue{String},)) do s
+                setfield!(Rx, :x, s)
+                Core.sizeof(Rx[])
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let M = EATModule()
+        @eval M module ___xxx___
+            import ..SafeRef
+            const Rx = SafeRef("Rx")
+        end
+        result = @eval M begin
+            $code_escapes((String,)) do s
+                rx = getfield(___xxx___, :Rx)
+                rx[] = s
+                nothing
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+
+    # field escape
+    # ------------
+
+    # field escape should propagate to :new arguments
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o = SafeRef(a)
+            Core.donotdelete(o)
+            return o[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Base.RefValue{String},)) do a
+            t = SafeRef((a,))
+            f = t[][1]
+            return f
+        end
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String})) do a, b
+            obj = SafeRefs(a, b)
+            Core.donotdelete(obj)
+            fld1 = obj[1]
+            fld2 = obj[2]
+            return (fld1, fld2)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # field escape should propagate to `setfield!` argument
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o = SafeRef(Ref("foo"))
+            Core.donotdelete(o)
+            o[] = a
+            return o[]
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    # propagate escape information imposed on return value of `setfield!` call
+    let result = code_escapes((Base.RefValue{String},)) do a
+            obj = SafeRef(Ref("foo"))
+            Core.donotdelete(obj)
+            return (obj[] = a)
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # nested allocations
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o1 = SafeRef(a)
+            o2 = SafeRef(o1)
+            return o2[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        for i in 1:length(result.ir.stmts)
+            if isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == SafeRef{String}
+                @test has_return_escape(result.state[SSAValue(i)], r)
+            elseif isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == SafeRef{SafeRef{String}}
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o1 = (a,)
+            o2 = (o1,)
+            return o2[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        for i in 1:length(result.ir.stmts)
+            if isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Tuple{String}
+                @test has_return_escape(result.state[SSAValue(i)], r)
+            elseif isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Tuple{Tuple{String}}
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o1  = SafeRef(a)
+            o2  = SafeRef(o1)
+            o1′ = o2[]
+            a′  = o1′[]
+            return a′
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    let result = code_escapes() do
+            o1 = SafeRef("foo")
+            o2 = SafeRef(o1)
+            return o2
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = code_escapes() do
+            o1   = SafeRef("foo")
+            o2′  = SafeRef(nothing)
+            o2   = SafeRef{SafeRef}(o2′)
+            o2[] = o1
+            return o2
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        findall(1:length(result.ir.stmts)) do i
+            if isnew(result.ir.stmts[i][:stmt])
+                t = result.ir.stmts[i][:type]
+                return t === SafeRef{String}  || # o1
+                       t === SafeRef{SafeRef}    # o2
+            end
+            return false
+        end |> x->foreach(x) do i
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = code_escapes((Base.RefValue{String},)) do x
+            o = Ref(x)
+            Core.donotdelete(o)
+            broadcast(identity, o)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # ϕ-node allocations
+    let result = code_escapes((Bool,Any,Any)) do cond, x, y
+            if cond
+                ϕ = SafeRef{Any}(x)
+            else
+                ϕ = SafeRef{Any}(y)
+            end
+            return ϕ[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test has_return_escape(result.state[Argument(4)], r) # y
+        i = only(findall(isϕ, result.ir.stmts.stmt))
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    let result = code_escapes((Bool,Any,Any)) do cond, x, y
+            if cond
+                ϕ2 = ϕ1 = SafeRef{Any}(x)
+            else
+                ϕ2 = ϕ1 = SafeRef{Any}(y)
+            end
+            return ϕ1[], ϕ2[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test has_return_escape(result.state[Argument(4)], r) # y
+        for i in findall(isϕ, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    # when ϕ-node merges values with different types
+    let result = code_escapes((Bool,Base.RefValue{String},Base.RefValue{String},Base.RefValue{String})) do cond, x, y, z
+            local out
+            if cond
+                ϕ = SafeRef(x)
+                out = ϕ[]
+            else
+                ϕ = SafeRefs(z, y)
+            end
+            return @isdefined(out) ? out : throw(ϕ)
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.stmt))
+        ϕ = only(findall(==(Union{SafeRef{Base.RefValue{String}},SafeRefs{Base.RefValue{String},Base.RefValue{String}}}), result.ir.stmts.type))
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test !has_return_escape(result.state[Argument(4)], r) # y
+        @test has_return_escape(result.state[Argument(5)], r) # z
+        @test has_thrown_escape(result.state[SSAValue(ϕ)], t)
+    end
+
+    # alias analysis
+    # --------------
+
+    # alias via getfield & Expr(:new)
+    let result = code_escapes((String,)) do s
+            r = SafeRef(s)
+            Core.donotdelete(r)
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        @test !isaliased(Argument(2), SSAValue(i), result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = SafeRef(s)
+            r2 = SafeRef(r1)
+            Core.donotdelete(r1, r2)
+            return r2[]
+        end
+        i1, i2 = findall(isnew, result.ir.stmts.stmt)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
+        @test isaliased(SSAValue(i1), val, result.state)
+        @test !isaliased(SSAValue(i2), val, result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = SafeRef(s)
+            r2 = SafeRef(r1)
+            Core.donotdelete(r1, r2)
+            return r2[][]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test !isaliased(SSAValue(i), val, result.state)
+        end
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef("Rx")
+            $code_escapes((String,)) do s
+                r = SafeRef(Rx)
+                Core.donotdelete(r)
+                rx = r[] # rx aliased to Rx
+                rx[] = s
+                nothing
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[Argument(2)])
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    # alias via getfield & setfield!
+    let result = code_escapes((String,)) do s
+            r = Ref{String}()
+            Core.donotdelete(r)
+            r[] = s
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        @test !isaliased(Argument(2), SSAValue(i), result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = Ref(s)
+            r2 = Ref{Base.RefValue{String}}()
+            Core.donotdelete(r1, r2)
+            r2[] = r1
+            return r2[]
+        end
+        i1, i2 = findall(isnew, result.ir.stmts.stmt)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
+        @test isaliased(SSAValue(i1), val, result.state)
+        @test !isaliased(SSAValue(i2), val, result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = Ref{String}()
+            r2 = Ref{Base.RefValue{String}}()
+            Core.donotdelete(r1, r2)
+            r2[] = r1
+            r1[] = s
+            return r2[][]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test !isaliased(SSAValue(i), val, result.state)
+        end
+        result = code_escapes((String,)) do s
+            r1 = Ref{String}()
+            r2 = Ref{Base.RefValue{String}}()
+            r1[] = s
+            r2[] = r1
+            return r2[][]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test !isaliased(SSAValue(i), val, result.state)
+        end
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef("Rx")
+            $code_escapes((SafeRef{String}, String,)) do _rx, s
+                r = SafeRef(_rx)
+                Core.donotdelete(r)
+                r[] = Rx
+                rx = r[] # rx aliased to Rx
+                rx[] = s
+                nothing
+            end
+        end
+        i = findfirst(isnew, result.ir.stmts.stmt)
+        @test has_all_escape(result.state[Argument(3)])
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    # alias via typeassert
+    let result = code_escapes((Any,)) do a
+            r = a::Base.RefValue{String}
+            return r
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test isaliased(Argument(2), val, result.state)       # a <-> r
+    end
+    let result = code_escapes((Any,)) do a
+            global GV
+            (g::SafeRef{Any})[] = a
+            nothing
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    # alias via ifelse
+    let result = code_escapes((Bool,Any,Any)) do c, a, b
+            r = ifelse(c, a, b)
+            return r
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(3)], r) # a
+        @test has_return_escape(result.state[Argument(4)], r) # b
+        @test !isaliased(Argument(2), val, result.state)      # c <!-> r
+        @test isaliased(Argument(3), val, result.state)       # a <-> r
+        @test isaliased(Argument(4), val, result.state)       # b <-> r
+    end
+    let result = @eval EATModule() begin
+            const Lx, Rx = SafeRef("Lx"), SafeRef("Rx")
+            $code_escapes((Bool,String,)) do c, a
+                r = ifelse(c, Lx, Rx)
+                r[] = a
+                nothing
+            end
+        end
+        @test has_all_escape(result.state[Argument(3)]) # a
+    end
+    # alias via ϕ-node
+    let result = code_escapes((Bool,Base.RefValue{String})) do cond, x
+            if cond
+                ϕ2 = ϕ1 = SafeRef(Ref("foo"))
+            else
+                ϕ2 = ϕ1 = SafeRef(Ref("bar"))
+            end
+            ϕ2[] = x
+            return ϕ1[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test isaliased(Argument(3), val, result.state) # x
+        for i in findall(isϕ, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        for i in findall(isnew, result.ir.stmts.stmt)
+            if result.ir[SSAValue(i)][:type] <: SafeRef
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    let result = code_escapes((Bool,Bool,Base.RefValue{String})) do cond1, cond2, x
+            if cond1
+                ϕ2 = ϕ1 = SafeRef(Ref("foo"))
+            else
+                ϕ2 = ϕ1 = SafeRef(Ref("bar"))
+            end
+            cond2 && (ϕ2[] = x)
+            return ϕ1[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(4)], r) # x
+        @test isaliased(Argument(4), val, result.state) # x
+        for i in findall(isϕ, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        for i in findall(isnew, result.ir.stmts.stmt)
+            if result.ir[SSAValue(i)][:type] <: SafeRef
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    # alias via π-node
+    let result = code_escapes((Any,)) do x
+            if isa(x, Base.RefValue{String})
+                return x
+            end
+            throw("error!")
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        rval = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(2)], r) # x
+        @test isaliased(Argument(2), rval, result.state)
+    end
+    let result = code_escapes((String,)) do x
+            global GV
+            l = g
+            if isa(l, SafeRef{String})
+                l[] = x
+            end
+            nothing
+        end
+        @test has_all_escape(result.state[Argument(2)]) # x
+    end
+    # circular reference
+    let result = code_escapes() do
+            x = Ref{Any}()
+            x[] = x
+            return x[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+    end
+    let result = @eval Module() begin
+            const Rx = Ref{Any}()
+            Rx[] = Rx
+            $code_escapes() do
+                r = Rx[]::Base.RefValue{Any}
+                return r[]
+            end
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        for i in findall(iscall((result.ir, getfield)), result.ir.stmts.stmt)
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = @eval Module() begin
+            @noinline function genr()
+                r = Ref{Any}()
+                r[] = r
+                return r
+            end
+            $code_escapes() do
+                x = genr()
+                return x[]
+            end
+        end
+        i = only(findall(isinvoke(:genr), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+    end
+
+    # dynamic semantics
+    # -----------------
+
+    # conservatively handle untyped objects
+    let result = @eval code_escapes((Any,Any,)) do T, x
+            obj = $(Expr(:new, :T, :x))
+        end
+        t = only(findall(isnew, result.ir.stmts.stmt))
+        @test #=T=# has_thrown_escape(result.state[Argument(2)], t) # T
+        @test #=x=# has_thrown_escape(result.state[Argument(3)], t) # x
+    end
+    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
+            obj = $(Expr(:new, :T, :x, :y))
+            return getfield(obj, :x)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test #=x=# has_return_escape(result.state[Argument(3)], r)
+        @test #=y=# has_return_escape(result.state[Argument(4)], r)
+        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
+    end
+    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
+            obj = $(Expr(:new, :T, :x))
+            setfield!(obj, :x, y)
+            return getfield(obj, :x)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test #=x=# has_return_escape(result.state[Argument(3)], r)
+        @test #=y=# has_return_escape(result.state[Argument(4)], r)
+        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
+    end
+
+    # conservatively handle unknown field:
+    # all fields should be escaped, but the allocation itself doesn't need to be escaped
+    let result = code_escapes((Base.RefValue{String}, Symbol)) do a, fld
+            obj = SafeRef(a)
+            return getfield(obj, fld)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Symbol)) do a, b, fld
+            obj = SafeRefs(a, b)
+            return getfield(obj, fld) # should escape both `a` and `b`
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Int)) do a, b, idx
+            obj = SafeRefs(a, b)
+            return obj[idx] # should escape both `a` and `b`
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Symbol)) do a, b, fld
+            obj = SafeRefs(Ref("a"), Ref("b"))
+            setfield!(obj, fld, a)
+            return obj[2] # should escape `a`
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Symbol)) do a, fld
+            obj = SafeRefs(Ref("a"), Ref("b"))
+            setfield!(obj, fld, a)
+            return obj[1] # this should escape `a`
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Int)) do a, b, idx
+        obj = SafeRefs(Ref("a"), Ref("b"))
+            obj[idx] = a
+            return obj[2] # should escape `a`
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+
+    # interprocedural
+    # ---------------
+
+    let result = @eval EATModule() begin
+            @noinline getx(obj) = obj[]
+            $code_escapes((Base.RefValue{String},)) do a
+                obj = SafeRef(a)
+                fld = getx(obj)
+                return fld
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        # NOTE we can't scalar replace `obj`, but still we may want to stack allocate it
+        @test_broken is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # TODO interprocedural alias analysis
+    let result = code_escapes((SafeRef{Base.RefValue{String}},)) do s
+            s[] = Ref("bar")
+            global GV = s[]
+            nothing
+        end
+        @test_broken !has_all_escape(result.state[Argument(2)])
+    end
+
+    # aliasing between arguments
+    let result = @eval EATModule() begin
+            @noinline setxy!(x, y) = x[] = y
+            $code_escapes((String,)) do y
+                x = SafeRef("init")
+                setxy!(x, y)
+                return x
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+        @test has_return_escape(result.state[Argument(2)], r) # y
+    end
+    let result = @eval EATModule() begin
+            @noinline setxy!(x, y) = x[] = y
+            $code_escapes((String,)) do y
+                x1 = SafeRef("init")
+                x2 = SafeRef(y)
+                Core.donotdelete(x1, x2)
+                setxy!(x1, x2[])
+                return x1
+            end
+        end
+        i1, i2 = findall(isnew, result.ir.stmts.stmt)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i1)], r)
+        @test !has_return_escape(result.state[SSAValue(i2)], r)
+        @test has_return_escape(result.state[Argument(2)], r) # y
+    end
+    let result = @eval EATModule() begin
+            @noinline mysetindex!(x, a) = x[1] = a
+            const Ax = Vector{Any}(undef, 1)
+            $code_escapes((Base.RefValue{String},)) do s
+                mysetindex!(Ax, s)
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)]) # s
+    end
+
+    # TODO flow-sensitivity?
+    # ----------------------
+
+    let result = code_escapes((Any,Any)) do a, b
+            r = SafeRef{Any}(a)
+            Core.donotdelete(r)
+            r[] = b
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Any,Any)) do a, b
+            r = SafeRef{Any}(:init)
+            Core.donotdelete(r)
+            r[] = a
+            r[] = b
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Any,Any,Bool)) do a, b, cond
+            r = SafeRef{Any}(:init)
+            Core.donotdelete(r)
+            if cond
+                r[] = a
+                return r[]
+            else
+                r[] = b
+                return nothing
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        r = only(findall(result.ir.stmts.stmt) do @nospecialize x
+            isreturn(x) && isa(x.val, Core.SSAValue)
+        end)
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test_broken !has_return_escape(result.state[Argument(3)], r) # b
+    end
+
+    # handle conflicting field information correctly
+    let result = code_escapes((Bool,Base.RefValue{String},Base.RefValue{String},)) do cnd, baz, qux
+            if cnd
+                o = SafeRef(Ref("foo"))
+            else
+                o = SafeRefs(Ref("bar"), baz)
+                r = getfield(o, 2)
+            end
+            if cnd
+                o = o::SafeRef
+                setfield!(o, 1, qux)
+                r = getfield(o, 1)
+            end
+            r
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], r) # baz
+        @test has_return_escape(result.state[Argument(4)], r) # qux
+        for new in findall(isnew, result.ir.stmts.stmt)
+            if !(result.ir[SSAValue(new)][:type] <: Base.RefValue)
+                @test is_load_forwardable(result.state[SSAValue(new)])
+            end
+        end
+    end
+    let result = code_escapes((Bool,Base.RefValue{String},Base.RefValue{String},)) do cnd, baz, qux
+            if cnd
+                o = SafeRefs(Ref("foo"), Ref("bar"))
+                r = setfield!(o, 2, baz)
+            else
+                o = SafeRef(qux)
+            end
+            if !cnd
+                o = o::SafeRef
+                r = getfield(o, 1)
+            end
+            r
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], r) # baz
+        @test has_return_escape(result.state[Argument(4)], r) # qux
+    end
+
+    # foreigncall should disable field analysis
+    let result = code_escapes((Any,Nothing,Int,UInt)) do t, mt, lim, world
+            ambig = false
+            min = Ref{UInt}(typemin(UInt))
+            max = Ref{UInt}(typemax(UInt))
+            has_ambig = Ref{Int32}(0)
+            mt = ccall(:jl_matching_methods, Any,
+                (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ref{Int32}),
+                t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
+            return mt, has_ambig[]
+        end
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test !is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+end
+
+# demonstrate the power of our field / alias analysis with a realistic end to end example
+abstract type AbstractPoint{T} end
+mutable struct MPoint{T} <: AbstractPoint{T}
+    x::T
+    y::T
+end
+add(a::P, b::P) where P<:AbstractPoint = P(a.x + b.x, a.y + b.y)
+function compute(T, ax, ay, bx, by)
+    a = T(ax, ay)
+    b = T(bx, by)
+    for i in 0:(100000000-1)
+        c = add(a, b) # replaceable
+        a = add(c, b) # replaceable
+    end
+    a.x, a.y
+end
+let result = @code_escapes compute(MPoint, 1+.5im, 2+.5im, 2+.25im, 4+.75im)
+    for i in findall(1:length(result.ir.stmts)) do idx
+                 inst = result.ir[SSAValue(idx)]
+                 stmt = inst[:stmt]
+                 return (isnew(stmt) || isϕ(stmt)) && inst[:type] <: MPoint
+             end
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+end
+function compute(a, b)
+    for i in 0:(100000000-1)
+        c = add(a, b) # replaceable
+        a = add(c, b) # unreplaceable (aliased to the call argument `a`)
+    end
+    a.x, a.y
+end
+# let result = @code_escapes compute(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
+#     idxs = findall(1:length(result.ir.stmts)) do idx
+#         inst = result.ir[SSAValue(idx)]
+#         stmt = inst[:stmt]
+#         return isnew(stmt) && inst[:type] <: MPoint
+#     end
+#     @assert length(idxs) == 2
+#     @test count(i->is_load_forwardable(result.state[SSAValue(i)]), idxs) == 1
+# end
+function compute!(a, b)
+    for i in 0:(100000000-1)
+        c = add(a, b)  # replaceable
+        a′ = add(c, b) # replaceable
+        a.x = a′.x
+        a.y = a′.y
+    end
+end
+let result = @code_escapes compute!(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
+    for i in findall(1:length(result.ir.stmts)) do idx
+                 inst = result.ir[SSAValue(idx)]
+                 stmt = inst[:stmt]
+                 return isnew(stmt) && inst[:type] <: MPoint
+             end
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+end
+
+# demonstrate a simple type level analysis can sometimes improve the analysis accuracy
+# by compensating the lack of yet unimplemented analyses
+@testset "special-casing bitstype" begin
+    let result = code_escapes((Nothing,)) do a
+            global GV = a
+        end
+        @test !(has_all_escape(result.state[Argument(2)]))
+    end
+
+    let result = code_escapes((Int,)) do a
+            o = SafeRef(a)
+            Core.donotdelete(o)
+            return o[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+    end
+
+    # an escaped tuple stmt will not propagate to its Int argument (since `Int` is of bitstype)
+    let result = code_escapes((Int,Any,)) do a, b
+            t = tuple(a, b)
+            return t
+        end
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test !has_return_escape(result.state[Argument(2)], r)
+        @test has_return_escape(result.state[Argument(3)], r)
+    end
+end
+
+# interprocedural analysis
+# ========================
+
+# propagate escapes imposed on call arguments
+@noinline broadcast_noescape2(b) = broadcast(identity, b)
+let result = code_escapes() do
+        broadcast_noescape2(Ref(Ref("Hi")))
+    end
+    i = last(findall(isnew, result.ir.stmts.stmt))
+    @test_broken !has_return_escape(result.state[SSAValue(i)]) # TODO interprocedural alias analysis
+    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # IDEA embed const-prop'ed `CodeInstance` for `:invoke`?
+end
+let result = code_escapes((Base.RefValue{Base.RefValue{String}},)) do x
+        out1 = broadcast_noescape2(Ref(Ref("Hi")))
+        out2 = broadcast_noescape2(x)
+        return out1, out2
+    end
+    i = last(findall(isnew, result.ir.stmts.stmt))
+    @test_broken !has_return_escape(result.state[SSAValue(i)]) # TODO interprocedural alias analysis
+    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # IDEA embed const-prop'ed `CodeInstance` for `:invoke`?
+    @test has_thrown_escape(result.state[Argument(2)])
+end
+@noinline allescape_argument(a) = (global GV = a) # obvious escape
+let result = code_escapes() do
+        allescape_argument(Ref("Hi"))
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test has_all_escape(result.state[SSAValue(i)])
+end
+# if we can't determine the matching method statically, we should be conservative
+let result = code_escapes((Ref{Any},)) do a
+        may_exist(a)
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+let result = code_escapes((Ref{Any},)) do a
+        Base.@invokelatest broadcast_noescape1(a)
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+
+# handling of simple union-split (just exploit the inliner's effort)
+@noinline unionsplit_noescape(a)      = string(nothing)
+@noinline unionsplit_noescape(a::Int) = a + 10
+let result = code_escapes((Union{Int,Nothing},)) do x
+        s = SafeRef{Union{Int,Nothing}}(x)
+        unionsplit_noescape(s[])
+        return nothing
+    end
+    inds = findall(isnew, result.ir.stmts.stmt) # find allocation statement
+    @assert !isempty(inds)
+    for i in inds
+        @test has_no_escape(result.state[SSAValue(i)])
+    end
+end
+
+@noinline unused_argument(a) = (println("prevent inlining"); nothing)
+let result = code_escapes() do
+        a = Ref("foo") # shouldn't be "return escape"
+        b = unused_argument(a)
+        nothing
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test has_no_escape(result.state[SSAValue(i)])
+
+    result = code_escapes() do
+        a = Ref("foo") # still should be "return escape"
+        b = unused_argument(a)
+        return a
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+end
+
+# should propagate escape information imposed on return value to the aliased call argument
+@noinline returnescape_argument(a) = (println("prevent inlining"); a)
+let result = code_escapes() do
+        obj = Ref("foo")           # should be "return escape"
+        ret = returnescape_argument(obj)
+        return ret                 # alias of `obj`
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+end
+@noinline noreturnescape_argument(a) = (println("prevent inlining"); identity("hi"))
+let result = code_escapes() do
+        obj = Ref("foo")              # better to not be "return escape"
+        ret = noreturnescape_argument(obj)
+        return ret                    # must not alias to `obj`
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test has_no_escape(result.state[SSAValue(i)])
+end
+
+function with_self_aliased(from_bb::Int, succs::Vector{Int})
+    worklist = Int[from_bb]
+    visited = BitSet(from_bb)
+    function visit!(bb::Int)
+        if bb ∉ visited
+            push!(visited, bb)
+            push!(worklist, bb)
+        end
+    end
+    while !isempty(worklist)
+        foreach(visit!, succs)
+    end
+    return visited
+end
+@test code_escapes(with_self_aliased) isa EAUtils.EscapeResult
+
+# accounts for ThrownEscape via potential MethodError
+
+# no method error
+@noinline identity_if_string(x::SafeRef{<:AbstractString}) = (println("preventing inlining"); nothing)
+let result = code_escapes((SafeRef{String},)) do x
+        identity_if_string(x)
+    end
+    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+end
+let result = code_escapes((SafeRef,)) do x
+        identity_if_string(x)
+    end
+    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_thrown_escape(result.state[Argument(2)], i)
+    @test_broken !has_return_escape(result.state[Argument(2)], r)
+end
+let result = code_escapes((SafeRef{String},)) do x
+        try
+            identity_if_string(x)
+        catch err
+            global GV = err
+        end
+        return nothing
+    end
+    @test !has_all_escape(result.state[Argument(2)])
+end
+let result = code_escapes((Union{SafeRef{String},Vector{String}},)) do x
+        try
+            identity_if_string(x)
+        catch err
+            global GV = err
+        end
+        return nothing
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+# method ambiguity error
+@noinline ambig_error_test(a::SafeRef, b) = (println("preventing inlining"); nothing)
+@noinline ambig_error_test(a, b::SafeRef) = (println("preventing inlining"); nothing)
+@noinline ambig_error_test(a, b) = (println("preventing inlining"); nothing)
+let result = code_escapes((SafeRef{String},Any)) do x, y
+        ambig_error_test(x, y)
+    end
+    i = only(findall(iscall((result.ir, ambig_error_test)), result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_thrown_escape(result.state[Argument(2)], i)  # x
+    @test has_thrown_escape(result.state[Argument(3)], i)  # y
+    @test_broken !has_return_escape(result.state[Argument(2)], r)  # x
+    @test_broken !has_return_escape(result.state[Argument(3)], r)  # y
+end
+let result = code_escapes((SafeRef{String},Any)) do x, y
+        try
+            ambig_error_test(x, y)
+        catch err
+            global GV = err
+        end
+    end
+    @test has_all_escape(result.state[Argument(2)])  # x
+    @test has_all_escape(result.state[Argument(3)])  # y
+end
+
+@eval function scope_folding()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), 2),
+            :(return Core.current_scope())),
+    :(), 1))
+end
+@eval function scope_folding_opt()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), :(Base.inferencebarrier(2))),
+            :(return Core.current_scope())),
+    :(), :(Base.inferencebarrier(1))))
+end
+@test (@code_escapes scope_folding()) isa EAUtils.EscapeResult
+@test (@code_escapes scope_folding_opt()) isa EAUtils.EscapeResult
+
+end # module test_EA
diff --git a/Compiler/test/abioverride.jl b/Compiler/test/abioverride.jl
new file mode 100644
index 0000000000000..feb992b27ee43
--- /dev/null
+++ b/Compiler/test/abioverride.jl
@@ -0,0 +1,61 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Base.Meta
+include("irutils.jl")
+
+# In this test, we will manually construct a CodeInstance that specializes the `myplus`
+# method on a constant for the second argument and test various, interfaces surrounding
+# CodeInstances with ABI overrides.
+myplus(x::Int, y::Int) = x + y
+
+struct SecondArgConstOverride
+    arg2::Int
+end
+
+function is_known_call(@nospecialize(x), @nospecialize(func), src::Core.CodeInfo)
+    isexpr(x, :call) || return false
+    ft = Compiler.argextype(x.args[1], src, Compiler.VarState[])
+    return Compiler.singleton_type(ft) === func
+end
+
+
+# Construct a CodeInstance with an ABI override
+let world = Base.tls_world_age()
+    # Get some inferred source code to give to the compiler
+    # Do not look at a CodeInstance here, since those fields are only valid to
+    # use while attached to a cache, and are thus invalid to make copies of
+    # (since you'd have to have made the copy to insert into the cache before
+    # making the original CodeInstance to copy from, which is obviously
+    # rather temporally-challenged)
+    new_source = only(code_typed(myplus, (Int, Int)))[1]
+    mi = new_source.parent
+    ## Sanity check
+    @assert length(new_source.code) == 2
+    add = new_source.code[1]
+    @assert is_known_call(add, Core.Intrinsics.add_int, new_source) && add.args[3] == Core.Argument(3)
+
+    ## Replace x + y by x + 1
+    add.args[3] = 1
+
+    ## Remove the argument
+    resize!(new_source.slotnames, 2)
+    resize!(new_source.slotflags, 2)
+    new_source.nargs = 2
+
+    # Construct the CodeInstance from the modified CodeInfo data
+    global new_ci = Core.CodeInstance(Core.ABIOverride(Tuple{typeof(myplus), Int}, mi),
+        #=owner=#SecondArgConstOverride(1), new_source.rettype, Any#=new_source.exctype is missing=#,
+        #=inferred_const=#nothing, #=code=#nothing, #=const_flags=#Int32(0),
+        new_source.min_world, typemax(UInt), #=new_source.ipo_purity_bits is missing=#UInt32(0),
+        #=analysis_results=#nothing, new_source.debuginfo, new_source.edges)
+
+    # Poke the CI into the global cache
+    # This isn't necessary, but does conveniently give it the mandatory permanent GC-root before calling `invoke`
+    ccall(:jl_mi_cache_insert, Cvoid, (Any, Any), mi, new_ci)
+
+    # Poke the source code into the JIT for it
+    ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), new_ci, new_source)
+end
+
+@test contains(repr(new_ci), "ABI Overridden")
+@test invoke(myplus, new_ci, 10) == 11
diff --git a/test/compiler/codegen.jl b/Compiler/test/codegen.jl
similarity index 67%
rename from test/compiler/codegen.jl
rename to Compiler/test/codegen.jl
index 85013ce30d2ca..f90ab7dff6655 100644
--- a/test/compiler/codegen.jl
+++ b/Compiler/test/codegen.jl
@@ -4,7 +4,11 @@
 
 using Random
 using InteractiveUtils
+using InteractiveUtils: code_llvm, code_native
 using Libdl
+using Test
+
+include("setup_Compiler.jl")
 
 const opt_level = Base.JLOptions().opt_level
 const coverage = (Base.JLOptions().code_coverage > 0) || (Base.JLOptions().malloc_log > 0)
@@ -17,14 +21,19 @@ end
 
 # The tests below assume a certain format and safepoint_on_entry=true breaks that.
 function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true)
-    params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false)
-    d = InteractiveUtils._dump_function(f, t, false, false, raw, dump_module, :att, optimize, :none, false, params)
+    params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false, debug_info_level=Cint(2))
+    d = InteractiveUtils._dump_function(InteractiveUtils.ArgInfo(f, t), false, false, raw, dump_module, :att, optimize, :none, false, params)
     sprint(print, d)
 end
 
+# Some tests assume calls should be stripped out,
+# so strip out the calls to debug intrinsics that
+# are not actually materialized as call instructions.
+strip_debug_calls(ir) = replace(ir, r"call void @llvm\.dbg\.declare.*\n" => "", r"call void @llvm\.dbg\.value.*\n" => "")
+
 if !is_debug_build && opt_level > 0
     # Make sure getptls call is removed at IR level with optimization on
-    @test !occursin(" call ", get_llvm(identity, Tuple{String}))
+    @test !occursin(" call ", strip_debug_calls(get_llvm(identity, Tuple{String})))
 end
 
 jl_string_ptr(s::String) = ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s)
@@ -114,22 +123,27 @@ end
 
 if !is_debug_build && opt_level > 0
     # Make sure `jl_string_ptr` is inlined
-    @test !occursin(" call ", get_llvm(jl_string_ptr, Tuple{String}))
+    @test !occursin(" call ", strip_debug_calls(get_llvm(jl_string_ptr, Tuple{String})))
     # Make sure `Core.sizeof` call is inlined
     s = "aaa"
     @test jl_string_ptr(s) == pointer_from_objref(s) + sizeof(Int)
     # String
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{String}), [Iptr])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{String})), [Iptr])
     # String
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Core.SimpleVector}), [Iptr])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Core.SimpleVector})), [Iptr])
     # Array
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Vector{Int}}), [Iptr])
+    test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Vector{Int}})), [Iptr])
+    # As long as the eltype is known we don't need to load the elsize, but do need to check isvector
+    @test_skip test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Array{Any}})), ["atomic volatile $Iptr", "ptr", "ptr", Iptr, Iptr, "ptr",  Iptr])
+    # Memory
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Int}})), [Iptr])
     # As long as the eltype is known we don't need to load the elsize
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Array{Any}}), [Iptr])
-    # Check that we load the elsize
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Vector}), [Iptr, "i16"])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Any}})), [Iptr])
+    # Check that we load the elsize and isunion from the typeof layout
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic volatile $Iptr", "ptr", "i32", "i16"])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic volatile $Iptr", "ptr", "i32", "i16"])
     # Primitive Type size should be folded to a constant
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Ptr}), String[])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Ptr})), String[])
 
     test_jl_dump_compiles()
     test_jl_dump_compiles_toplevel_thunks()
@@ -171,15 +185,15 @@ end
 breakpoint_mutable(a::MutableStruct) = ccall(:jl_breakpoint, Cvoid, (Ref{MutableStruct},), a)
 
 # Allocation with uninitialized field as gcroot
-mutable struct BadRef
+mutable struct BadRefMutableStruct
     x::MutableStruct
     y::MutableStruct
-    BadRef(x) = new(x)
+    BadRefMutableStruct(x) = new(x)
 end
-Base.cconvert(::Type{Ptr{BadRef}}, a::MutableStruct) = BadRef(a)
-Base.unsafe_convert(::Type{Ptr{BadRef}}, ar::BadRef) = Ptr{BadRef}(pointer_from_objref(ar.x))
+Base.cconvert(::Type{Ptr{BadRefMutableStruct}}, a::MutableStruct) = BadRefMutableStruct(a)
+Base.unsafe_convert(::Type{Ptr{BadRefMutableStruct}}, ar::BadRefMutableStruct) = Ptr{BadRefMutableStruct}(pointer_from_objref(ar.x))
 
-breakpoint_badref(a::MutableStruct) = ccall(:jl_breakpoint, Cvoid, (Ptr{BadRef},), a)
+breakpoint_badref(a::MutableStruct) = ccall(:jl_breakpoint, Cvoid, (Ptr{BadRefMutableStruct},), a)
 
 struct PtrStruct
     a::Ptr{Cvoid}
@@ -212,18 +226,18 @@ if opt_level > 0
     @test occursin("call i32 @memcmp(", compare_large_struct_ir) || occursin("call i32 @bcmp(", compare_large_struct_ir)
     @test !occursin("%gcframe", compare_large_struct_ir)
 
-    @test occursin("jl_gc_pool_alloc", get_llvm(MutableStruct, Tuple{}))
+    @test occursin("jl_gc_small_alloc", get_llvm(MutableStruct, Tuple{}))
     breakpoint_mutable_ir = get_llvm(breakpoint_mutable, Tuple{MutableStruct})
     @test !occursin("%gcframe", breakpoint_mutable_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_mutable_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_mutable_ir)
 
     breakpoint_badref_ir = get_llvm(breakpoint_badref, Tuple{MutableStruct})
     @test !occursin("%gcframe", breakpoint_badref_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_badref_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_badref_ir)
 
     breakpoint_ptrstruct_ir = get_llvm(breakpoint_ptrstruct, Tuple{RealStruct})
     @test !occursin("%gcframe", breakpoint_ptrstruct_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_ptrstruct_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_ptrstruct_ir)
 end
 
 function two_breakpoint(a::Float64)
@@ -241,22 +255,22 @@ end
 if opt_level > 0
     breakpoint_f64_ir = get_llvm((a)->ccall(:jl_breakpoint, Cvoid, (Ref{Float64},), a),
                                  Tuple{Float64})
-    @test !occursin("jl_gc_pool_alloc", breakpoint_f64_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_f64_ir)
     breakpoint_any_ir = get_llvm((a)->ccall(:jl_breakpoint, Cvoid, (Ref{Any},), a),
                                  Tuple{Float64})
-    @test occursin("jl_gc_pool_alloc", breakpoint_any_ir)
+    @test occursin("jl_gc_small_alloc", breakpoint_any_ir)
     two_breakpoint_ir = get_llvm(two_breakpoint, Tuple{Float64})
-    @test !occursin("jl_gc_pool_alloc", two_breakpoint_ir)
+    @test !occursin("jl_gc_small_alloc", two_breakpoint_ir)
     @test occursin("llvm.lifetime.end", two_breakpoint_ir)
 
     @test load_dummy_ref(1234) === 1234
     load_dummy_ref_ir = get_llvm(load_dummy_ref, Tuple{Int})
-    @test !occursin("jl_gc_pool_alloc", load_dummy_ref_ir)
+    @test !occursin("jl_gc_small_alloc", load_dummy_ref_ir)
     # Hopefully this is reliable enough. LLVM should be able to optimize this to a direct return.
-    @test occursin("ret $Iptr %0", load_dummy_ref_ir)
+    @test occursin("ret $Iptr %\"x::$(Int)\"", load_dummy_ref_ir)
 end
 
-# Issue 22770
+# Issue JuliaLang/julia#22770
 let was_gced = false
     @noinline make_tuple(x) = tuple(x)
     @noinline use(x) = ccall(:jl_breakpoint, Cvoid, ())
@@ -304,8 +318,8 @@ end
 
 # PR #23595
 @generated f23595(g, args...) = Expr(:call, :g, Expr(:(...), :args))
-x23595 = rand(1)
-@test f23595(Core.arrayref, true, x23595, 1) == x23595[]
+x23595 = rand(1).ref
+@test f23595(Core.memoryrefget, x23595, :not_atomic, true) == x23595[]
 
 # Issue #22421
 @noinline f22421_1(x) = x[] + 1
@@ -362,26 +376,9 @@ mktemp() do f_22330, _
 end
 
 # Alias scope
-macro aliasscope(body)
-    sym = gensym()
-    esc(quote
-        $(Expr(:aliasscope))
-        $sym = $body
-        $(Expr(:popaliasscope))
-        $sym
-    end)
-end
-
-struct ConstAliasScope{T<:Array}
-    a::T
-end
-
-@eval Base.getindex(A::ConstAliasScope, i1::Int) = Core.const_arrayref($(Expr(:boundscheck)), A.a, i1)
-@eval Base.getindex(A::ConstAliasScope, i1::Int, i2::Int, I::Int...) =  (@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
-
 function foo31018!(a, b)
-    @aliasscope for i in eachindex(a, b)
-        a[i] = ConstAliasScope(b)[i]
+    @Base.Experimental.aliasscope for i in eachindex(a, b)
+        a[i] = Base.Experimental.Const(b)[i]
     end
 end
 io = IOBuffer()
@@ -413,7 +410,7 @@ function g_dict_hash_alloc()
 end
 # Warm up
 f_dict_hash_alloc(); g_dict_hash_alloc();
-@test (@allocated f_dict_hash_alloc()) == (@allocated g_dict_hash_alloc())
+@test abs((@allocated f_dict_hash_alloc()) / (@allocated g_dict_hash_alloc()) - 1) < 0.3
 
 # returning an argument shouldn't alloc a new box
 @noinline f33829(x) = (global called33829 = true; x)
@@ -446,7 +443,7 @@ function f1_30093(r)
     end
 end
 
-@test f1_30093(Ref(0)) == nothing
+@test f1_30093(Ref(0)) === nothing
 
 # issue 33590
 function f33590(b, x)
@@ -493,20 +490,23 @@ function f37262(x)
     catch
         GC.safepoint()
     end
+    local a
     try
         GC.gc()
-        return g37262(x)
+        a = g37262(x)
+        Base.inferencebarrier(false) && error()
+        return a
     catch ex
         GC.gc()
     finally
+        @isdefined(a) && Base.donotdelete(a)
         GC.gc()
     end
 end
 @testset "#37262" begin
-    str = "store volatile { i8, {}*, {}*, {}*, {}* } zeroinitializer, { i8, {}*, {}*, {}*, {}* }* %phic"
-    str_opaque = "store volatile { i8, ptr, ptr, ptr, ptr } zeroinitializer, ptr %phic"
+    str_opaque = "getelementptr inbounds i8, ptr %.roots.phic, i32 8\n  store volatile ptr null"
     llvmstr = get_llvm(f37262, (Bool,), false, false, false)
-    @test (contains(llvmstr, str) || contains(llvmstr, str_opaque)) || llvmstr
+    @test contains(llvmstr, str_opaque)
     @test f37262(Base.inferencebarrier(true)) === nothing
 end
 
@@ -564,6 +564,7 @@ end
     function f1(cond)
         val = [1]
         GC.@preserve val begin end
+        return cond
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f1, Tuple{Bool}, true, false, false))
 
@@ -571,19 +572,22 @@ end
     function f3(cond)
         val = ([1],)
         GC.@preserve val begin end
+        return cond
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f3, Tuple{Bool}, true, false, false))
 
-    # unions of immutables (JuliaLang/julia#39501)
+    # PhiNode of unions of immutables (JuliaLang/julia#39501)
     function f2(cond)
-        val = cond ? 1 : 1f0
+        val = cond ? 1 : ""
         GC.@preserve val begin end
+        return cond
     end
-    @test !occursin("llvm.julia.gc_preserve_begin", get_llvm(f2, Tuple{Bool}, true, false, false))
+    @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f2, Tuple{Bool}, true, false, false))
     # make sure the fix for the above doesn't regress #34241
     function f4(cond)
         val = cond ? ([1],) : ([1f0],)
         GC.@preserve val begin end
+        return cond
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f4, Tuple{Bool}, true, false, false))
 end
@@ -619,10 +623,10 @@ g40612(a, b) = a[]|a[] === b[]|b[]
 
 # issue #41438
 struct A41438{T}
-  x::Ptr{T}
+    x::Ptr{T}
 end
 struct B41438{T}
-  x::T
+    x::T
 end
 f41438(y) = y[].x
 @test A41438.body.layout != C_NULL
@@ -695,7 +699,7 @@ mktempdir() do pfx
         libs_deleted += 1
     end
     @test libs_deleted > 0
-    @test readchomp(`$pfx/bin/$(Base.julia_exename()) -e 'print("no codegen!\n")'`) == "no codegen!"
+    @test readchomp(`$pfx/bin/$(Base.julia_exename()) --startup-file=no -e 'print("no codegen!\n")'`) == "no codegen!"
 
     # PR #47343
     libs_emptied = 0
@@ -720,14 +724,14 @@ mutable struct A42645{T}
     end
 end
 mutable struct B42645{T}
-  y::A42645{T}
+    y::A42645{T}
 end
 x42645 = 1
 function f42645()
-  res = B42645(A42645([x42645]))
-  res.y = A42645([x42645])
-  res.y.x = true
-  res
+    res = B42645(A42645([x42645]))
+    res.y = A42645([x42645])
+    res.y.x = true
+    res
 end
 @test ((f42645()::B42645).y::A42645{Int}).x
 
@@ -787,11 +791,11 @@ f47247(a::Ref{Int}, b::Nothing) = setfield!(a, :x, b)
 @test_throws TypeError f47247(Ref(5), nothing)
 
 f48085(@nospecialize x...) = length(x)
-@test Core.Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Vararg{Int}}, Core.svec()) === nothing
-@test Core.Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Int, Vararg{Int}}, Core.svec()) === Tuple{typeof(f48085), Any, Vararg{Any}}
+@test Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Vararg{Int}}, Core.svec()) === nothing
+@test Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Int, Vararg{Int}}, Core.svec()) === Tuple{typeof(f48085), Any, Vararg{Any}}
 
 # Make sure that the bounds check is elided in tuple iteration
-@test !occursin("call void @", get_llvm(iterate, Tuple{NTuple{4, Float64}, Int}))
+@test !occursin("call void @", strip_debug_calls(get_llvm(iterate, Tuple{NTuple{4, Float64}, Int})))
 
 # issue #34459
 function f34459(args...)
@@ -832,3 +836,242 @@ let res = @timed issue50317()
     @test res.bytes == 0
     return res # must return otherwise the compiler may eliminate the result entirely
 end
+struct Wrapper50317_2
+    lock::ReentrantLock
+    fun::Vector{Int}
+end
+const MONITOR50317_2 = Wrapper50317_2(ReentrantLock(),[1])
+issue50317_2() = @noinline MONITOR50317.lock
+issue50317_2()
+let res = @timed issue50317_2()
+    @test res.bytes == 0
+    return res
+end
+const a50317 = (b=3,)
+let res = @timed a50317[:b]
+    @test res.bytes == 0
+    return res
+end
+
+# https://github.com/JuliaLang/julia/issues/50964
+@noinline bar50964(x::Core.Const) = Base.inferencebarrier(1)
+@noinline bar50964(x::DataType) = Base.inferencebarrier(2)
+foo50964(x) = bar50964(Base.inferencebarrier(Core.Const(x)))
+foo50964(1) # Shouldn't assert!
+
+# https://github.com/JuliaLang/julia/issues/51233
+obj51233 = (1,)
+@test_throws FieldError obj51233.x
+
+# Very specific test for multiversioning
+if Sys.ARCH === :x86_64
+    foo52079() = Core.Intrinsics.have_fma(Float64)
+    if foo52079() == true
+        let io = IOBuffer()
+            code_native(io,Base.Math.exp_impl,(Float64,Float64,Val{:ℯ}), dump_module=false)
+            str = String(take!(io))
+            @test !occursin("fma_emulated", str)
+            @test occursin("vfmadd", str)
+        end
+    end
+end
+
+#Check if we aren't emitting the store with the wrong TBAA metadata
+
+foo54166(x,i,y) = x[i] = y
+let io = IOBuffer()
+    code_llvm(io,foo54166, (Vector{Union{Missing,Int}}, Int, Int), dump_module=true, raw=true)
+    str = String(take!(io))
+    @test !occursin("jtbaa_unionselbyte", str)
+    @test occursin("jtbaa_arrayselbyte", str)
+end
+
+ex54166 = Union{Missing, Int64}[missing -2; missing -2];
+dims54166 = (1,2)
+@test (minimum(ex54166; dims=dims54166)[1] === missing)
+
+bar54599() = Base.inferencebarrier(true) ? (Base.PkgId(Main),1) : nothing
+
+function foo54599()
+    pkginfo = @noinline bar54599()
+    pkgid = pkginfo !== nothing ? pkginfo[1] : nothing
+    @noinline println(devnull, pkgid)
+    pkgid.uuid !== nothing ? pkgid.uuid : false
+end
+
+#this function used to crash allocopt due to a no predecessors bug
+barnopreds() = Base.inferencebarrier(true) ? (Base.PkgId(Test),1) : nothing
+function foonopreds()
+    pkginfo = @noinline barnopreds()
+    pkgid = pkginfo !== nothing ? pkginfo[1] : nothing
+    pkgid.uuid !== nothing ? pkgid.uuid : false
+end
+@test foonopreds() !== nothing
+
+# issue 55396
+struct Incomplete55396
+  x::Tuple{Int}
+  y::Int
+  @noinline Incomplete55396(x::Int) = new((x,))
+end
+let x = Incomplete55396(55396)
+    @test x.x === (55396,)
+end
+
+# Core.getptls() special handling
+@test !occursin("call ptr @jlplt", get_llvm(Core.getptls, Tuple{})) #It should lower to a direct load of the ptls and not a ccall
+
+# issue 55208
+@noinline function f55208(x, i)
+    z = (i == 0 ? x[1] : x[i])
+    return z isa Core.TypeofBottom
+end
+@test f55208((Union{}, 5, 6, 7), 0)
+
+@noinline function g55208(x, i)
+    z = (i == 0 ? x[1] : x[i])
+    typeof(z)
+end
+@test g55208((Union{}, true, true), 0) === typeof(Union{})
+
+@test string((Core.Union{}, true, true, true)) == "(Union{}, true, true, true)"
+
+# Issue #55558
+for (T, StructName) in ((Int128, :Issue55558), (UInt128, :UIssue55558))
+    @eval begin
+        struct $(StructName)
+            a::$(T)
+            b::Int64
+            c::$(T)
+        end
+        local broken_i128 = Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "powerpc64le"
+        @test fieldoffset($(StructName), 2) == 16
+        @test fieldoffset($(StructName), 3) == 32 broken=broken_i128
+        @test sizeof($(StructName)) == 48 broken=broken_i128
+    end
+end
+
+# Issue #42326
+primitive type PadAfter64_42326 448 end
+mutable struct CheckPadAfter64_42326
+    a::UInt64
+    pad::PadAfter64_42326
+    b::UInt64
+end
+@test fieldoffset(CheckPadAfter64_42326, 3) == 80
+@test sizeof(CheckPadAfter64_42326) == 96
+
+@noinline Base.@nospecializeinfer f55768(@nospecialize z::UnionAll) = z === Vector
+@test f55768(Vector)
+@test f55768(Vector{T} where T)
+@test !f55768(Vector{S} where S)
+
+# test that values get rooted correctly over throw
+for a in ((@noinline Ref{Int}(2)),
+          (@noinline Ref{Int}(3)),
+          5,
+          (@noinline Ref{Int}(4)),
+          6)
+    @test a[] != 0
+    try
+        b = (@noinline Ref{Int}(5),
+             @noinline Ref{Int}(6),
+             @noinline Ref{Int}(7),
+             @noinline Ref{Int}(8),
+             @noinline Ref{Int}(9),
+             @noinline Ref{Int}(10),
+             @noinline Ref{Int}(11))
+        GC.gc(true)
+        GC.@preserve b throw(a)
+    catch ex
+        @test ex === a
+    end
+end
+
+# Make sure that code that has unbound sparams works
+#https://github.com/JuliaLang/julia/issues/56739
+
+@test_warn r"declares type variable T but does not use it" @eval f56739(a) where {T} = a
+
+@test f56739(1) == 1
+g56739(x) = @noinline f56739(x)
+@test g56739(1) == 1
+
+struct Vec56937 x::NTuple{8, VecElement{Int}} end
+
+x56937 = Ref(Vec56937(ntuple(_->VecElement(1),8)))
+@test x56937[].x[1] == VecElement{Int}(1) # shouldn't crash
+
+# issue #56996
+let
+   ()->() # trigger various heuristics
+   Base.Experimental.@force_compile
+   default_rng_orig = [] # make a value in a Slot
+   try
+       # overwrite the gc-slots in the exception branch
+       throw(ErrorException("This test is supposed to throw an error"))
+   catch ex
+       # destroy any values that aren't referenced
+       GC.gc()
+       # make sure that default_rng_orig value is still valid
+       @noinline copy!([], default_rng_orig)
+   end
+   nothing
+end
+
+# Test that turning an implicit import into an explicit one doesn't pessimize codegen
+module TurnedIntoExplicit
+    using Test
+    import ..get_llvm
+
+    module ReExportBitCast
+        export bitcast
+        import Base: bitcast
+    end
+    using .ReExportBitCast
+
+    f(x::UInt) = bitcast(Float64, x)
+
+    @test !occursin("jl_apply_generic", get_llvm(f, Tuple{UInt}))
+
+    import Base: bitcast
+
+    @test !occursin("jl_apply_generic", get_llvm(f, Tuple{UInt}))
+end
+
+# Test codegen for `isdefinedglobal` of constant (#57872)
+const x57872 = "Hello"
+f57872() = (Core.isdefinedglobal(@__MODULE__, Base.compilerbarrier(:const, :x57872)), x57872) # Extra globalref here to force world age bounds
+@test f57872() == (true, "Hello")
+
+@noinline f_mutateany(@nospecialize x) = x[] = 1
+g_mutateany() = (y = Ref(0); f_mutateany(y); y[])
+@test g_mutateany() === 1
+
+# 58470 tbaa for unionselbyte of heap allocated mutables
+mutable struct Wrapper58470
+    x::Union{Nothing,Int}
+end
+
+function findsomething58470(dict, inds)
+    default = Wrapper58470(nothing)
+    for i in inds
+        x = get(dict, i, default).x
+        if !isnothing(x)
+            return x
+        end
+    end
+    return nothing
+end
+
+let io = IOBuffer()
+    code_llvm(io, findsomething58470, Tuple{Dict{Int64, Wrapper58470}, Vector{Int}}, dump_module=true, raw=true, optimize=false)
+    str = String(take!(io))
+    @test !occursin("jtbaa_unionselbyte", str)
+end
+
+let io = IOBuffer()
+    code_llvm(io, (x, y) -> (@atomic x[1] = y; nothing), (AtomicMemory{Pair{Any,Any}}, Pair{Any,Any},), raw=true, optimize=false)
+    str = String(take!(io))
+    @test occursin("julia.write_barrier", str)
+end
diff --git a/Compiler/test/compact.jl b/Compiler/test/compact.jl
new file mode 100644
index 0000000000000..5b19dc68811fc
--- /dev/null
+++ b/Compiler/test/compact.jl
@@ -0,0 +1,58 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("setup_Compiler.jl")
+include("irutils.jl")
+
+using .Compiler: IncrementalCompact, insert_node_here!, finish,
+    NewInstruction, verify_ir, ReturnNode, SSAValue
+
+foo_test_function(i) = i == 1 ? 1 : 2
+
+@testset "IncrementalCompact statefulness" begin
+    ir = only(Base.code_ircode(foo_test_function, (Int,)))[1]
+    compact = IncrementalCompact(ir)
+
+    # set up first iterator
+    x = Compiler.iterate(compact)
+    x = Compiler.iterate(compact, x[2])
+
+    # set up second iterator
+    x = Compiler.iterate(compact)
+
+    # consume remainder
+    while x !== nothing
+        x = Compiler.iterate(compact, x[2])
+    end
+
+    ir = finish(compact)
+    @test Compiler.verify_ir(ir) === nothing
+end
+
+# Test early finish of IncrementalCompact
+@testset "IncrementalCompact early finish" begin
+    ir = only(Base.code_ircode(foo_test_function, (Int,)))[1]
+    compact = IncrementalCompact(ir)
+
+    insert_node_here!(compact, NewInstruction(ReturnNode(1), Union{}, ir[SSAValue(1)][:line]))
+    new_ir = finish(compact)
+    # TODO: Should IncrementalCompact be doing this internally?
+    empty!(new_ir.cfg.blocks[1].succs)
+    verify_ir(new_ir)
+    @test length(new_ir.cfg.blocks) == 1
+end
+
+# Test reverse affinity insert at start of compact
+@testset "IncrementalCompact reverse affinity insert" begin
+    ir = only(Base.code_ircode(foo_test_function, (Int,)))[1]
+    compact = IncrementalCompact(ir)
+    @test !Compiler.did_just_finish_bb(compact)
+
+    insert_node_here!(compact, NewInstruction(ReturnNode(1), Union{}, ir[SSAValue(1)][:line]), true)
+    new_ir = finish(compact)
+    # TODO: Should IncrementalCompact be doing this internally?
+    empty!(new_ir.cfg.blocks[1].succs)
+    verify_ir(new_ir)
+    @test length(new_ir.cfg.blocks) == 1
+end
diff --git a/Compiler/test/contextual.jl b/Compiler/test/contextual.jl
new file mode 100644
index 0000000000000..941ce172d41e2
--- /dev/null
+++ b/Compiler/test/contextual.jl
@@ -0,0 +1,152 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module contextual
+
+# N.B.: This file is also run from interpreter.jl, so needs to be standalone-executable
+using Test
+
+# Cassette
+# ========
+
+# TODO Use CassetteBase.jl instead of this mini-cassette?
+
+module MiniCassette
+    # A minimal demonstration of the cassette mechanism. Doesn't support all the
+    # fancy features, but sufficient to exercise this code path in the compiler.
+
+    using Core: SimpleVector
+    using Core.IR
+    using Base: Compiler as CC
+    using .CC: retrieve_code_info, quoted, anymap
+    using Base.Meta: isexpr
+
+    export Ctx, overdub
+
+    struct Ctx; end
+
+    # A no-op cassette-like transform
+    function transform_expr(expr, map_slot_number, map_ssa_value, sparams::SimpleVector)
+        @nospecialize expr
+        transform(@nospecialize expr) = transform_expr(expr, map_slot_number, map_ssa_value, sparams)
+        if isexpr(expr, :call)
+            return Expr(:call, overdub, SlotNumber(2), anymap(transform, expr.args)...)
+        elseif isa(expr, GotoIfNot)
+            return GotoIfNot(transform(expr.cond), map_ssa_value(SSAValue(expr.dest)).id)
+        elseif isexpr(expr, :static_parameter)
+            return quoted(sparams[expr.args[1]])
+        elseif isa(expr, ReturnNode)
+            return ReturnNode(transform(expr.val))
+        elseif isa(expr, Expr)
+            return Expr(expr.head, anymap(transform, expr.args)...)
+        elseif isa(expr, GotoNode)
+            return GotoNode(map_ssa_value(SSAValue(expr.label)).id)
+        elseif isa(expr, SlotNumber)
+            return map_slot_number(expr.id)
+        elseif isa(expr, SSAValue)
+            return map_ssa_value(expr)
+        else
+            return expr
+        end
+    end
+
+    function transform!(mi::MethodInstance, ci::CodeInfo, nargs::Int, sparams::SimpleVector)
+        code = ci.code
+        di = CC.DebugInfoStream(mi, ci.debuginfo, length(code))
+        ci.slotnames = Symbol[Symbol("#self#"), :ctx, :f, :args, ci.slotnames[nargs+2:end]...]
+        ci.slotflags = UInt8[(0x00 for i = 1:4)..., ci.slotflags[nargs+2:end]...]
+        # Insert one SSAValue for every argument statement
+        prepend!(code, Any[Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
+        prepend!(di.codelocs, fill(Int32(0), 3nargs))
+        prepend!(ci.ssaflags, fill(0x00, nargs))
+        ci.debuginfo = Core.DebugInfo(di, length(code))
+        ci.ssavaluetypes += nargs
+        function map_slot_number(slot::Int)
+            if slot == 1
+                # self in the original function is now `f`
+                return SlotNumber(3)
+            elseif 2 <= slot <= nargs + 1
+                # Arguments get inserted as ssa values at the top of the function
+                return SSAValue(slot - 1)
+            else
+                # The first non-argument slot will be 5
+                return SlotNumber(slot - (nargs + 1) + 4)
+            end
+        end
+        map_ssa_value(ssa::SSAValue) = SSAValue(ssa.id + nargs)
+        for i = (nargs+1:length(code))
+            code[i] = transform_expr(code[i], map_slot_number, map_ssa_value, sparams)
+        end
+    end
+
+    function overdub_generator(world::UInt, source, self, ctx, f, args)
+        @nospecialize
+        argnames = Core.svec(:overdub, :ctx, :f, :args)
+        spnames = Core.svec()
+
+        if !Base.issingletontype(f)
+            # (c, f, args..) -> f(args...)
+            return generate_lambda_ex(world, source, argnames, spnames, :(return f(args...)))
+        end
+
+        tt = Tuple{f, args...}
+        match = Base._which(tt; world)
+        mi = Base.specialize_method(match)
+        # Unsupported in this mini-cassette
+        !mi.def.isva ||
+            return generate_lambda_ex(world, source, argnames, spnames, :(error("Unsupported vararg method")))
+        src = retrieve_code_info(mi, world)
+        isa(src, CodeInfo) ||
+            return generate_lambda_ex(world, source, argnames, spnames, :(error("Unexpected code transformation")))
+        src = copy(src)
+        src.edges === Core.svec() ||
+            return generate_lambda_ex(world, source, argnames, spnames, :(error("Unexpected code transformation")))
+        src.edges = Any[mi]
+        transform!(mi, src, length(args), match.sparams)
+        # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
+        # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
+        # Match the generator, since that's what our transform! does
+        src.nargs = 4
+        src.isva = true
+        errors = CC.validate_code(mi, src)
+        if !isempty(errors)
+            foreach(Core.println, errors)
+            return generate_lambda_ex(world, source, argnames, spnames, :(error("Found errors in generated code")))
+        end
+        return src
+    end
+
+    function generate_lambda_ex(world::UInt, source::Method,
+                                argnames::SimpleVector, spnames::SimpleVector,
+                                body::Expr)
+        stub = Core.GeneratedFunctionStub(identity, argnames, spnames)
+        return stub(world, source, body)
+    end
+
+    @inline overdub(::Ctx, f::Union{Core.Builtin, Core.IntrinsicFunction}, args...) = f(args...)
+
+    @eval function overdub(ctx::Ctx, f, args...)
+        $(Expr(:meta, :generated_only))
+        $(Expr(:meta, :generated, overdub_generator))
+    end
+end
+
+using .MiniCassette
+
+# Test #265 for Cassette
+f() = 1
+@test overdub(Ctx(), f) === 1
+f() = 2
+@test overdub(Ctx(), f) === 2
+
+# Test that MiniCassette is at least somewhat capable by overdubbing gcd
+@test overdub(Ctx(), gcd, 10, 20) === gcd(10, 20)
+
+@generated bar(::Val{align}) where {align} = :(42)
+foo(i) = i+bar(Val(1))
+
+@test @inferred(overdub(Ctx(), foo, 1)) == 43
+
+morethan4args(a, b, c, d, e) = (((a + b) + c) + d) + e
+@test overdub(Ctx(), morethan4args, 1, 2, 3, 4, 5) == 15
+
+end # module contextual
diff --git a/Compiler/test/datastructures.jl b/Compiler/test/datastructures.jl
new file mode 100644
index 0000000000000..608e4e770998a
--- /dev/null
+++ b/Compiler/test/datastructures.jl
@@ -0,0 +1,115 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+include("setup_Compiler.jl")
+
+@testset "CachedMethodTable" begin
+    # cache result should be separated per `limit` and `sig`
+    # https://github.com/JuliaLang/julia/pull/46799
+    interp = Compiler.NativeInterpreter()
+    table = Compiler.method_table(interp)
+    sig = Tuple{typeof(*), Any, Any}
+    result1 = Compiler.findall(sig, table; limit=-1)
+    result2 = Compiler.findall(sig, table; limit=Compiler.InferenceParams().max_methods)
+    @test result1 !== nothing && !Compiler.isempty(result1)
+    @test result2 === nothing
+end
+
+@testset "BitSetBoundedMinPrioritySet" begin
+    bsbmp = Compiler.BitSetBoundedMinPrioritySet(5)
+    Compiler.push!(bsbmp, 2)
+    Compiler.push!(bsbmp, 2)
+    iterateok = true
+    cnt = 0
+    @eval Compiler for v in $bsbmp
+        if cnt == 0
+            iterateok &= v == 2
+        elseif cnt == 1
+            iterateok &= v == 5
+        else
+            iterateok = false
+        end
+        cnt += 1
+    end
+    @test iterateok
+    @test Compiler.popfirst!(bsbmp) == 2
+    Compiler.push!(bsbmp, 1)
+    @test Compiler.popfirst!(bsbmp) == 1
+    @test Compiler.isempty(bsbmp)
+end
+
+@testset "basic heap functionality" begin
+    v = [2,3,1]
+    @test Compiler.heapify!(v, Compiler.Forward) === v
+    @test Compiler.heappop!(v, Compiler.Forward) === 1
+    @test Compiler.heappush!(v, 4, Compiler.Forward) === v
+    @test Compiler.heappop!(v, Compiler.Forward) === 2
+    @test Compiler.heappop!(v, Compiler.Forward) === 3
+    @test Compiler.heappop!(v, Compiler.Forward) === 4
+end
+
+@testset "randomized heap correctness tests" begin
+    order = Compiler.By(x -> -x[2])
+    for i in 1:6
+        heap = Tuple{Int, Int}[(rand(1:i), rand(1:i)) for _ in 1:2i]
+        mock = copy(heap)
+        @test Compiler.heapify!(heap, order) === heap
+        sort!(mock, by=last)
+
+        for _ in 1:6i
+            if rand() < .5 && !isempty(heap)
+                # The first entries may differ because heaps are not stable
+                @test last(Compiler.heappop!(heap, order)) === last(pop!(mock))
+            else
+                new = (rand(1:i), rand(1:i))
+                Compiler.heappush!(heap, new, order)
+                push!(mock, new)
+                sort!(mock, by=last)
+            end
+        end
+    end
+end
+
+@testset "searchsorted" begin
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 0) === Compiler.UnitRange(1, 0)
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 1) === Compiler.UnitRange(1, 2)
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2) === Compiler.UnitRange(3, 4)
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 4) === Compiler.UnitRange(7, 6)
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2.5; lt=<) === Compiler.UnitRange(5, 4)
+
+    @test Compiler.searchsorted(Compiler.UnitRange(1, 3), 0) === Compiler.UnitRange(1, 0)
+    @test Compiler.searchsorted(Compiler.UnitRange(1, 3), 1) === Compiler.UnitRange(1, 1)
+    @test Compiler.searchsorted(Compiler.UnitRange(1, 3), 2) === Compiler.UnitRange(2, 2)
+    @test Compiler.searchsorted(Compiler.UnitRange(1, 3), 4) === Compiler.UnitRange(4, 3)
+
+    @test Compiler.searchsorted([1:10;], 1, by=(x -> x >= 5)) === Compiler.UnitRange(1, 4)
+    @test Compiler.searchsorted([1:10;], 10, by=(x -> x >= 5)) === Compiler.UnitRange(5, 10)
+    @test Compiler.searchsorted([1:5; 1:5; 1:5], 1, 6, 10, Compiler.Forward) === Compiler.UnitRange(6, 6)
+    @test Compiler.searchsorted(fill(1, 15), 1, 6, 10, Compiler.Forward) === Compiler.UnitRange(6, 10)
+
+    for (rg,I) in Any[(Compiler.UnitRange(49, 57),   47:59),
+                      (Compiler.StepRange(1, 2, 17), -1:19)]
+        rg_r = Compiler.reverse(rg)
+        rgv, rgv_r = Compiler.collect(rg), Compiler.collect(rg_r)
+        for i = I
+            @test Compiler.searchsorted(rg,i) === Compiler.searchsorted(rgv,i)
+            @test Compiler.searchsorted(rg_r,i,rev=true) === Compiler.searchsorted(rgv_r,i,rev=true)
+        end
+    end
+end
+
+@testset "basic sort" begin
+    v = [3,1,2]
+    @test v == [3,1,2]
+    @test Compiler.sort!(v) === v == [1,2,3]
+    @test Compiler.sort!(v, by = x -> -x) === v == [3,2,1]
+    @test Compiler.sort!(v, by = x -> -x, < = >) === v == [1,2,3]
+end
+
+@testset "randomized sorting tests" begin
+    for n in [0, 1, 3, 10, 30, 100, 300], k in [0, 30, 2n]
+        v = rand(-1:k, n)
+        for by in [identity, x -> -x, x -> x^2 + .1x], lt in [<, >]
+            @test sort(v; by, lt) == Compiler.sort!(copy(v); by, < = lt)
+        end
+    end
+end
diff --git a/Compiler/test/effects.jl b/Compiler/test/effects.jl
new file mode 100644
index 0000000000000..fa97c0c94c58d
--- /dev/null
+++ b/Compiler/test/effects.jl
@@ -0,0 +1,1498 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("setup_Compiler.jl")
+include("irutils.jl")
+
+# Test that the Core._apply_iterate bail path taints effects
+function f_apply_bail(f)
+    f(()...)
+    return nothing
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(f_apply_bail))
+@test !fully_eliminated((Function,)) do f
+    f_apply_bail(f)
+    nothing
+end
+
+# Test that effect modeling for return_type doesn't incorrectly pick
+# up the effects of the function being analyzed
+f_throws() = error()
+@noinline function return_type_unused(x)
+    Compiler.return_type(f_throws, Tuple{})
+    return x+1
+end
+@test Compiler.is_removable_if_unused(Base.infer_effects(return_type_unused, (Int,)))
+@test fully_eliminated((Int,)) do x
+    return_type_unused(x)
+    return nothing
+end
+
+# Test that ambiguous calls don't accidentally get nothrow effect
+ambig_effects_test(a::Int, b) = 1
+ambig_effects_test(a, b::Int) = 1
+ambig_effects_test(a, b) = 1
+@test !Compiler.is_nothrow(Base.infer_effects(ambig_effects_test, (Int, Any)))
+global ambig_unknown_type_global::Any = 1
+@noinline function conditionally_call_ambig(b::Bool, a)
+    if b
+        ambig_effects_test(a, ambig_unknown_type_global)
+    end
+    return 0
+end
+@test !fully_eliminated((Bool,)) do b
+    conditionally_call_ambig(b, 1)
+    return nothing
+end
+
+# Test that a missing methtable identification gets tainted
+# appropriately
+struct FCallback; f::Union{Nothing, Function}; end
+f_invoke_callback(fc) = let f=fc.f; (f !== nothing && f(); nothing); end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(f_invoke_callback, (FCallback,)))
+@test !fully_eliminated((FCallback,)) do fc
+    f_invoke_callback(fc)
+    return nothing
+end
+
+# @assume_effects override
+const ___CONST_DICT___ = Dict{Any,Any}(Symbol(c) => i for (i, c) in enumerate('a':'z'))
+Base.@assume_effects :foldable concrete_eval(
+    f, args...; kwargs...) = f(args...; kwargs...)
+@test fully_eliminated() do
+    concrete_eval(getindex, ___CONST_DICT___, :a)
+end
+
+# :removable override
+Base.@assume_effects :removable removable_call(
+    f, args...; kwargs...) = f(args...; kwargs...)
+@test fully_eliminated() do
+    @noinline removable_call(getindex, ___CONST_DICT___, :a)
+    nothing
+end
+
+# terminates_globally override
+# https://github.com/JuliaLang/julia/issues/41694
+Base.@assume_effects :terminates_globally function issue41694(x)
+    res = 1
+    0 ≤ x < 20 || error("bad fact")
+    while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+@test Compiler.is_foldable(Base.infer_effects(issue41694, (Int,)))
+@test fully_eliminated() do
+    issue41694(2)
+end
+
+Base.@assume_effects :terminates_globally function recur_termination1(x)
+    x == 0 && return 1
+    0 ≤ x < 20 || error("bad fact")
+    return x * recur_termination1(x-1)
+end
+@test Compiler.is_foldable(Base.infer_effects(recur_termination1, (Int,)))
+@test Compiler.is_terminates(Base.infer_effects(recur_termination1, (Int,)))
+function recur_termination2()
+    Base.@assume_effects :total !:terminates_globally
+    recur_termination1(12)
+end
+@test fully_eliminated(recur_termination2)
+@test fully_eliminated() do; recur_termination2(); end
+
+Base.@assume_effects :terminates_globally function recur_termination21(x)
+    x == 0 && return 1
+    0 ≤ x < 20 || error("bad fact")
+    return recur_termination22(x)
+end
+recur_termination22(x) = x * recur_termination21(x-1)
+@test Compiler.is_foldable(Base.infer_effects(recur_termination21, (Int,)))
+@test Compiler.is_foldable(Base.infer_effects(recur_termination22, (Int,)))
+@test Compiler.is_terminates(Base.infer_effects(recur_termination21, (Int,)))
+@test Compiler.is_terminates(Base.infer_effects(recur_termination22, (Int,)))
+function recur_termination2x()
+    Base.@assume_effects :total !:terminates_globally
+    recur_termination21(12) + recur_termination22(12)
+end
+@test fully_eliminated(recur_termination2x)
+@test fully_eliminated() do; recur_termination2x(); end
+
+# anonymous function support for `@assume_effects`
+@test fully_eliminated() do
+    map((2,3,4)) do x
+        # this :terminates_locally allows this anonymous function to be constant-folded
+        Base.@assume_effects :terminates_locally
+        res = 1
+        0 ≤ x < 20 || error("bad fact")
+        while x > 1
+            res *= x
+            x -= 1
+        end
+        return res
+    end
+end
+
+# control flow backedge should taint `terminates`
+@test Base.infer_effects((Int,)) do n
+    for i = 1:n; end
+end |> !Compiler.is_terminates
+
+# interprocedural-recursion should taint `terminates` **appropriately**
+function sumrecur(a, x)
+    isempty(a) && return x
+    return sumrecur(Base.tail(a), x + first(a))
+end
+@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int},Int)) |> Compiler.is_terminates
+@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int,Vararg{Int}},Int)) |> !Compiler.is_terminates
+
+# https://github.com/JuliaLang/julia/issues/45781
+@test Base.infer_effects((Float32,)) do a
+    out1 = promote_type(Irrational{:π}, Bool)
+    out2 = sin(a)
+    out1, out2
+end |> Compiler.is_terminates
+
+# refine :consistent-cy effect inference using the return type information
+@test Base.infer_effects((Any,)) do x
+    taint = Ref{Any}(x) # taints :consistent-cy, but will be adjusted
+    throw(taint)
+end |> Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    if x < 0
+        taint = Ref(x) # taints :consistent-cy, but will be adjusted
+        throw(DomainError(x, taint))
+    end
+    return nothing
+end |> Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    if x < 0
+        taint = Ref(x) # taints :consistent-cy, but will be adjusted
+        throw(DomainError(x, taint))
+    end
+    return x == 0 ? nothing : x # should `Union` of isbitstype objects nicely
+end |> Compiler.is_consistent
+@test Base.infer_effects((Symbol,Any)) do s, x
+    if s === :throw
+        taint = Ref{Any}(":throw option given") # taints :consistent-cy, but will be adjusted
+        throw(taint)
+    end
+    return s # should handle `Symbol` nicely
+end |> Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    return Ref(x)
+end |> !Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    return x < 0 ? Ref(x) : nothing
+end |> !Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    if x < 0
+        throw(DomainError(x, lazy"$x is negative"))
+    end
+    return nothing
+end |> Compiler.is_foldable
+
+# :the_exception expression should taint :consistent-cy
+global inconsistent_var::Int = 42
+function throw_inconsistent() # this is still :consistent
+    throw(inconsistent_var)
+end
+function catch_inconsistent()
+    try
+        throw_inconsistent()
+    catch err
+        err
+    end
+end
+@test !Compiler.is_consistent(Base.infer_effects(catch_inconsistent))
+cache_inconsistent() = catch_inconsistent()
+function compare_inconsistent()
+    a = cache_inconsistent()
+    global inconsistent_var = 0
+    b = cache_inconsistent()
+    global inconsistent_var = 42
+    return a === b
+end
+@test !compare_inconsistent()
+# return type information shouldn't be able to refine it also
+function catch_inconsistent(x::T) where T
+    v = x
+    try
+        throw_inconsistent()
+    catch err
+        v = err::T
+    end
+    return v
+end
+@test !Compiler.is_consistent(Base.infer_effects(catch_inconsistent, (Int,)))
+cache_inconsistent(x) = catch_inconsistent(x)
+function compare_inconsistent(x::T) where T
+    x = one(T)
+    a = cache_inconsistent(x)
+    global inconsistent_var = 0
+    b = cache_inconsistent(x)
+    global inconsistent_var = 42
+    return a === b
+end
+@test !compare_inconsistent(3)
+
+# Effect modeling for Core.compilerbarrier
+@test Base.infer_effects(Base.inferencebarrier, Tuple{Any}) |> Compiler.is_removable_if_unused
+
+# effects modeling for allocation/access of uninitialized fields
+struct Maybe{T}
+    x::T
+    Maybe{T}() where T = new{T}()
+    Maybe{T}(x) where T = new{T}(x)
+    Maybe(x::T) where T = new{T}(x)
+end
+Base.getindex(x::Maybe) = x.x
+struct SyntacticallyDefined{T}
+    x::T
+end
+@test Base.infer_effects() do
+    Maybe{Int}()
+end |> !Compiler.is_consistent
+@test Base.infer_effects() do
+    Maybe{Int}()[]
+end |> !Compiler.is_consistent
+@test !fully_eliminated() do
+    Maybe{Int}()[]
+end
+@test Base.infer_effects() do
+    Maybe{String}()
+end |> Compiler.is_consistent
+@test Base.infer_effects() do
+    Maybe{String}()[]
+end |> Compiler.is_consistent
+@test Base.infer_effects() do
+    Maybe{Some{Base.RefValue{Int}}}()
+end |> Compiler.is_consistent
+let f() = Maybe{String}()[]
+    @test Base.return_types() do
+        f() # this call should be concrete evaluated
+    end |> only === Union{}
+end
+@test Base.infer_effects() do
+    Ref{Int}()
+end |> !Compiler.is_consistent
+@test Base.infer_effects() do
+    Ref{Int}()[]
+end |> !Compiler.is_consistent
+@test !fully_eliminated() do
+    Ref{Int}()[]
+end
+@test Base.infer_effects() do
+    Ref{String}()[]
+end |> Compiler.is_consistent
+let f() = Ref{String}()[]
+    @test Base.return_types() do
+        f() # this call should be concrete evaluated
+    end |> only === Union{}
+end
+@test Base.infer_effects((SyntacticallyDefined{Float64}, Symbol)) do w, s
+    getfield(w, s)
+end |> Compiler.is_foldable
+
+# effects propagation for `Core.invoke` calls
+# https://github.com/JuliaLang/julia/issues/44763
+global x44763::Int = 0
+increase_x44763!(n) = (global x44763; x44763 += n)
+invoke44763(x) = @invoke increase_x44763!(x)
+@test Base.return_types() do
+    invoke44763(42)
+end |> only === Int
+@test x44763 == 0
+
+# `@inbounds`/`@boundscheck` expression should taint :consistent correctly
+# https://github.com/JuliaLang/julia/issues/48099
+function A1_inbounds()
+    r = 0
+    @inbounds begin
+        @boundscheck r += 1
+    end
+    return r
+end
+@test !Compiler.is_consistent(Base.infer_effects(A1_inbounds))
+
+# Test that purity doesn't try to accidentally run unreachable code due to
+# boundscheck elimination
+function f_boundscheck_elim(n)
+    # Inbounds here assumes that this is only ever called with `n==0`, but of
+    # course the compiler has no way of knowing that, so it must not attempt
+    # to run the `@inbounds getfield(sin, 1)` that `ntuple` generates.
+    ntuple(x->(@inbounds ()[x]), n)
+end
+@test !Compiler.is_noub(Base.infer_effects(f_boundscheck_elim, (Int,)))
+@test Tuple{} <: only(Base.return_types(f_boundscheck_elim, (Int,)))
+
+# Test that purity modeling doesn't accidentally introduce new world age issues
+f_redefine_me(x) = x+1
+f_call_redefine() = f_redefine_me(0)
+f_mk_opaque() = Base.Experimental.@opaque ()->Base.inferencebarrier(f_call_redefine)()
+const op_capture_world = f_mk_opaque()
+f_redefine_me(x) = x+2
+@test op_capture_world() == 1
+@test f_mk_opaque()() == 2
+
+# backedge insertion for Any-typed, effect-free frame
+const CONST_DICT = let d = Dict()
+    for c in 'A':'z'
+        push!(d, c => Int(c))
+    end
+    d
+end
+Base.@assume_effects :foldable getcharid(c) = CONST_DICT[c]
+@noinline callf(f, args...) = f(args...)
+function entry_to_be_invalidated(c)
+    return callf(getcharid, c)
+end
+@test Base.infer_effects((Char,)) do x
+    entry_to_be_invalidated(x)
+end |> Compiler.is_foldable
+@test fully_eliminated(; retval=97) do
+    entry_to_be_invalidated('a')
+end
+getcharid(c) = CONST_DICT[c] # now this is not eligible for concrete evaluation
+@test Base.infer_effects((Char,)) do x
+    entry_to_be_invalidated(x)
+end |> !Compiler.is_foldable
+@test !fully_eliminated() do
+    entry_to_be_invalidated('a')
+end
+
+@test !Compiler.builtin_nothrow(Compiler.fallback_lattice, Core.get_binding_type, Any[Rational{Int}, Core.Const(:foo)], Any)
+
+# effects modeling for assignment to globals
+global glob_assign_int::Int = 0
+f_glob_assign_int() = global glob_assign_int = 1
+let effects = Base.infer_effects(f_glob_assign_int, (); optimize=false)
+    @test Compiler.is_consistent(effects)
+    @test !Compiler.is_effect_free(effects)
+    @test Compiler.is_nothrow(effects)
+end
+# effects modeling for for setglobal!
+global SETGLOBAL!_NOTHROW::Int = 0
+let effects = Base.infer_effects(; optimize=false) do
+        setglobal!(@__MODULE__, :SETGLOBAL!_NOTHROW, 42)
+    end
+    @test Compiler.is_consistent(effects)
+    @test !Compiler.is_effect_free(effects)
+    @test Compiler.is_nothrow(effects)
+end
+
+# we should taint `nothrow` if the binding doesn't exist and isn't fixed yet,
+setglobal!_nothrow_undefinedyet() = setglobal!(@__MODULE__, :UNDEFINEDYET, 42)
+let effects = Base.infer_effects(setglobal!_nothrow_undefinedyet)
+    @test !Compiler.is_nothrow(effects)
+end
+@test_throws ErrorException setglobal!_nothrow_undefinedyet()
+# This declares the binding as ::Any
+@eval global_assignment_undefinedyet() = $(GlobalRef(@__MODULE__, :UNDEFINEDYET)) = 42
+let effects = Base.infer_effects(global_assignment_undefinedyet)
+    @test Compiler.is_nothrow(effects)
+end
+# Again with type mismatch
+global UNDEFINEDYET2::String = "0"
+setglobal!_nothrow_undefinedyet2() = setglobal!(@__MODULE__, :UNDEFINEDYET2, 42)
+@eval global_assignment_undefinedyet2() = $(GlobalRef(@__MODULE__, :UNDEFINEDYET2)) = 42
+let effects = Base.infer_effects(global_assignment_undefinedyet2)
+    @test !Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects(setglobal!_nothrow_undefinedyet2)
+    @test !Compiler.is_nothrow(effects)
+end
+@test_throws TypeError setglobal!_nothrow_undefinedyet2()
+
+module ExportMutableGlobal
+    global mutable_global_for_setglobal_test::Int = 0
+    export mutable_global_for_setglobal_test
+end
+using .ExportMutableGlobal: mutable_global_for_setglobal_test
+f_assign_imported() = global mutable_global_for_setglobal_test = 42
+let effects = Base.infer_effects(f_assign_imported)
+    @test !Compiler.is_nothrow(effects)
+end
+@test_throws ErrorException f_assign_imported()
+
+# Nothrow for setfield!
+mutable struct SetfieldNothrow
+    x::Int
+end
+f_setfield_nothrow() = SetfieldNothrow(0).x = 1
+let effects = Base.infer_effects(f_setfield_nothrow, ())
+    @test Compiler.is_nothrow(effects)
+    @test Compiler.is_effect_free(effects) # see EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+end
+
+# even if 2-arg `getfield` may throw, it should be still `:consistent`
+@test Compiler.is_consistent(Base.infer_effects(getfield, (NTuple{5, Float64}, Int)))
+
+# SimpleVector allocation is consistent
+@test Compiler.is_consistent(Base.infer_effects(Core.svec))
+@test Base.infer_effects() do
+    Core.svec(nothing, 1, "foo")
+end |> Compiler.is_consistent
+
+# fastmath operations are in-`:consistent`
+@test !Compiler.is_consistent(Base.infer_effects((a,b)->@fastmath(a+b), (Float64,Float64)))
+
+# issue 46122: @assume_effects for @ccall
+@test Base.infer_effects((Vector{Int},)) do a
+    Base.@assume_effects :effect_free @ccall this_call_does_not_really_exist(a::Any)::Ptr{Int}
+end |> Compiler.is_effect_free
+
+# `getfield_effects` handles access to union object nicely
+let 𝕃 = Compiler.fallback_lattice
+    getfield_effects = Compiler.getfield_effects
+    @test Compiler.is_consistent(getfield_effects(𝕃, Any[Some{String}, Core.Const(:value)], String))
+    @test Compiler.is_consistent(getfield_effects(𝕃, Any[Some{Symbol}, Core.Const(:value)], Symbol))
+    @test Compiler.is_consistent(getfield_effects(𝕃, Any[Union{Some{Symbol},Some{String}}, Core.Const(:value)], Union{Symbol,String}))
+end
+@test Base.infer_effects((Bool,)) do c
+    obj = c ? Some{String}("foo") : Some{Symbol}(:bar)
+    return getfield(obj, :value)
+end |> Compiler.is_consistent
+
+# getfield is nothrow when bounds checking is turned off
+@test Base.infer_effects((Tuple{Int,Int},Int)) do t, i
+    getfield(t, i, false)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Tuple{Int,Int},Symbol)) do t, i
+    getfield(t, i, false)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Tuple{Int,Int},String)) do t, i
+    getfield(t, i, false) # invalid name type
+end |> !Compiler.is_nothrow
+
+@test Base.infer_effects((Some{Any},)) do some
+    getfield(some, 1, :not_atomic)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},)) do some
+    getfield(some, 1, :invalid_atomic_spec)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Bool)) do some, boundscheck
+    getfield(some, 1, boundscheck)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Bool)) do some, boundscheck
+    getfield(some, 1, :not_atomic, boundscheck)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Bool)) do some, boundscheck
+    getfield(some, 1, :invalid_atomic_spec, boundscheck)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Any)) do some, boundscheck
+    getfield(some, 1, :not_atomic, boundscheck)
+end |> !Compiler.is_nothrow
+
+@test Compiler.is_consistent(Base.infer_effects(setindex!, (Base.RefValue{Int}, Int)))
+
+# :inaccessiblememonly effect
+const global constant_global::Int = 42
+const global ConstantType = Ref
+global nonconstant_global::Int = 42
+const global constant_mutable_global = Ref(0)
+const global constant_global_nonisbits = Some(:foo)
+@test Base.infer_effects() do
+    constant_global
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConstantType
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConstantType{Any}()
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    constant_global_nonisbits
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :constant_global)
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    nonconstant_global
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :nonconstant_global)
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Symbol,)) do name
+    getglobal(@__MODULE__, name)
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Int,)) do v
+    global nonconstant_global = v
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Int,)) do v
+    setglobal!(@__MODULE__, :nonconstant_global, v)
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Int,)) do v
+    constant_mutable_global[] = v
+end |> !Compiler.is_inaccessiblememonly
+module ConsistentModule
+const global constant_global::Int = 42
+const global ConstantType = Ref
+end # module
+@test Base.infer_effects() do
+    ConsistentModule.constant_global
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConsistentModule.ConstantType
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConsistentModule.ConstantType{Any}()
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :ConsistentModule).constant_global
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :ConsistentModule).ConstantType
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :ConsistentModule).ConstantType{Any}()
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Module,)) do M
+    M.constant_global
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Module,)) do M
+    M.ConstantType
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do M
+    M.ConstantType{Any}()
+end |> !Compiler.is_inaccessiblememonly
+
+# the `:inaccessiblememonly` helper effect allows us to prove `:consistent`-cy of frames
+# including `getfield` / `isdefined` accessing to local mutable object
+
+mutable struct SafeRef{T}
+    x::T
+end
+Base.getindex(x::SafeRef) = x.x;
+Base.setindex!(x::SafeRef, v) = x.x = v;
+Base.isassigned(x::SafeRef) = true;
+
+function mutable_consistent(s)
+    SafeRef(s)[]
+end
+@test Compiler.is_inaccessiblememonly(Base.infer_effects(mutable_consistent, (Symbol,)))
+@test fully_eliminated(; retval=:foo) do
+    mutable_consistent(:foo)
+end
+
+function nested_mutable_consistent(s)
+    SafeRef(SafeRef(SafeRef(SafeRef(SafeRef(s)))))[][][][][]
+end
+@test Compiler.is_inaccessiblememonly(Base.infer_effects(nested_mutable_consistent, (Symbol,)))
+@test fully_eliminated(; retval=:foo) do
+    nested_mutable_consistent(:foo)
+end
+
+const consistent_global = Some(:foo)
+@test Base.infer_effects() do
+    consistent_global.value
+end |> Compiler.is_consistent
+const inconsistent_global = SafeRef(:foo)
+@test Base.infer_effects() do
+    inconsistent_global[]
+end |> !Compiler.is_consistent
+const inconsistent_condition_ref = Ref{Bool}(false)
+@test Base.infer_effects() do
+    if inconsistent_condition_ref[]
+        return 0
+    else
+        return 1
+    end
+end |> !Compiler.is_consistent
+
+# should handle va-method properly
+callgetfield1(xs...) = getfield(getfield(xs, 1), 1)
+@test !Compiler.is_inaccessiblememonly(Base.infer_effects(callgetfield1, (Base.RefValue{Symbol},)))
+const GLOBAL_XS = Ref(:julia)
+global_getfield() = callgetfield1(GLOBAL_XS)
+@test let
+    Base.Experimental.@force_compile
+    global_getfield()
+end === :julia
+GLOBAL_XS[] = :julia2
+@test let
+    Base.Experimental.@force_compile
+    global_getfield()
+end === :julia2
+
+# the `:inaccessiblememonly` helper effect allows us to prove `:effect_free`-ness of frames
+# including `setfield!` modifying local mutable object
+
+const global_ref = Ref{Any}()
+global const global_bit::Int = 42
+makeref() = Ref{Any}()
+setref!(ref, @nospecialize v) = ref[] = v
+
+@noinline function removable_if_unused1()
+    x = makeref()
+    setref!(x, 42)
+    x
+end
+@noinline function removable_if_unused2()
+    x = makeref()
+    setref!(x, global_bit)
+    x
+end
+for f = Any[removable_if_unused1, removable_if_unused2]
+    effects = Base.infer_effects(f)
+    @test Compiler.is_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+    @test Compiler.is_removable_if_unused(effects)
+    @test @eval fully_eliminated() do
+        $f()
+        nothing
+    end
+end
+@noinline function removable_if_unused3(v)
+    x = makeref()
+    setref!(x, v)
+    x
+end
+let effects = Base.infer_effects(removable_if_unused3, (Int,))
+    @test Compiler.is_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+    @test Compiler.is_removable_if_unused(effects)
+end
+@test fully_eliminated((Int,)) do v
+    removable_if_unused3(v)
+    nothing
+end
+
+@noinline function unremovable_if_unused1!(x)
+    setref!(x, 42)
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (typeof(global_ref),)))
+@test !Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (Any,)))
+
+@noinline function unremovable_if_unused2!()
+    setref!(global_ref, 42)
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused2!))
+
+@noinline function unremovable_if_unused3!()
+    getfield(@__MODULE__, :global_ref)[] = nothing
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused3!))
+
+# array ops
+# =========
+
+# allocation
+# ----------
+
+# low-level constructor
+@noinline construct_array(@nospecialize(T), args...) = Array{T}(undef, args...)
+# should eliminate safe but dead allocations
+let good_dims = [1, 2, 3, 4, 10]
+    Ns = [1, 2, 3, 4, 10]
+    for dim = good_dims, N = Ns
+        Int64(dim)^N > typemax(Int) && continue
+        dims = ntuple(i->dim, N)
+        @test @eval Base.infer_effects() do
+            construct_array(Int, $(dims...))
+        end |> Compiler.is_removable_if_unused
+        @test @eval fully_eliminated() do
+            construct_array(Int, $(dims...))
+            nothing
+        end
+    end
+end
+# should analyze throwness correctly
+let bad_dims = [-1, typemax(Int)]
+    for dim in bad_dims, N in [1, 2, 3, 4, 10]
+        for T in Any[Int, Union{Missing,Nothing}, Missing, Any]
+            dims = ntuple(i->dim, N)
+            @test @eval Base.infer_effects() do
+                construct_array($T, $(dims...))
+            end |> !Compiler.is_removable_if_unused
+            @test @eval !fully_eliminated() do
+                construct_array($T, $(dims...))
+                nothing
+            end
+            @test_throws "invalid " @eval construct_array($T, $(dims...))
+        end
+    end
+end
+
+# high-level interfaces
+# getindex
+for safesig = Any[
+        (Type{Int},)
+        (Type{Int}, Int)
+        (Type{Int}, Int, Int)
+        (Type{Number},)
+        (Type{Number}, Number)
+        (Type{Number}, Int)
+        (Type{Any},)
+        (Type{Any}, Any,)
+        (Type{Any}, Any, Any)
+    ]
+    let effects = Base.infer_effects(getindex, safesig)
+        @test Compiler.is_consistent_if_notreturned(effects)
+        @test Compiler.is_removable_if_unused(effects)
+    end
+end
+for unsafesig = Any[
+        (Type{Int}, String)
+        (Type{Int}, Any)
+        (Type{Number}, AbstractString)
+        (Type{Number}, Any)
+    ]
+    let effects = Base.infer_effects(getindex, unsafesig)
+        @test !Compiler.is_nothrow(effects)
+    end
+end
+# vect
+for safesig = Any[
+        ()
+        (Int,)
+        (Int, Int)
+    ]
+    let effects = Base.infer_effects(Base.vect, safesig)
+        @test Compiler.is_consistent_if_notreturned(effects)
+        @test Compiler.is_removable_if_unused(effects)
+    end
+end
+
+# array getindex
+let tt = (MemoryRef{Any},Symbol,Bool)
+    @testset let effects = Base.infer_effects(Core.memoryrefget, tt)
+        @test Compiler.is_consistent_if_inaccessiblememonly(effects)
+        @test Compiler.is_effect_free(effects)
+        @test !Compiler.is_nothrow(effects)
+        @test Compiler.is_terminates(effects)
+    end
+end
+
+# array setindex!
+let tt = (MemoryRef{Any},Any,Symbol,Bool)
+    @testset let effects = Base.infer_effects(Core.memoryrefset!, tt)
+        @test Compiler.is_consistent_if_inaccessiblememonly(effects)
+        @test Compiler.is_effect_free_if_inaccessiblememonly(effects)
+        @test !Compiler.is_nothrow(effects)
+        @test Compiler.is_terminates(effects)
+    end
+end
+# nothrow for arrayset
+@test Base.infer_effects((MemoryRef{Int},Int)) do a, v
+    Core.memoryrefset!(a, v, :not_atomic, true)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((MemoryRef{Int},Int)) do a, v
+    a[] = v # may throw
+end |> !Compiler.is_nothrow
+# when bounds checking is turned off, it should be safe
+@test Base.infer_effects((MemoryRef{Int},Int)) do a, v
+    Core.memoryrefset!(a, v, :not_atomic, false)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((MemoryRef{Number},Number)) do a, v
+    Core.memoryrefset!(a, v, :not_atomic, false)
+end |> Compiler.is_nothrow
+
+# arraysize
+# ---------
+
+let effects = Base.infer_effects(size, (Array,Int))
+    @test Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+    @test !Compiler.is_nothrow(effects)
+    @test Compiler.is_terminates(effects)
+end
+# Test that arraysize has proper effect modeling
+@test fully_eliminated(M->(size(M, 2); nothing), (Matrix{Float64},))
+
+# arraylen
+# --------
+
+let effects = Base.infer_effects(length, (Vector{Any},))
+    @test Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+    @test Compiler.is_nothrow(effects)
+    @test Compiler.is_terminates(effects)
+end
+
+# resize
+# ------
+
+#for op = Any[
+#        Base._growbeg!,
+#        Base._growend!,
+#        Base._deletebeg!,
+#        Base._deleteend!,
+#    ]
+#    let effects = Base.infer_effects(op, (Vector, Int))
+#        @test Compiler.is_effect_free_if_inaccessiblememonly(effects)
+#        @test Compiler.is_terminates(effects)
+#        @test !Compiler.is_nothrow(effects)
+#    end
+#end
+
+@test Compiler.is_noub(Base.infer_effects(Base._growbeg!, (Vector{Int}, Int)))
+@test Compiler.is_noub(Base.infer_effects(Base._growbeg!, (Vector{Any}, Int)))
+@test Compiler.is_noub(Base.infer_effects(Base._growend!, (Vector{Int}, Int)))
+@test Compiler.is_noub(Base.infer_effects(Base._growend!, (Vector{Any}, Int)))
+
+# tuple indexing
+# --------------
+
+@test Compiler.is_foldable(Base.infer_effects(iterate, Tuple{Tuple{Int, Int}, Int}))
+
+# end to end
+# ----------
+
+#function simple_vec_ops(T, op!, op, xs...)
+#    a = T[]
+#    op!(a, xs...)
+#    return op(a)
+#end
+#for T = Any[Int,Any], op! = Any[push!,pushfirst!], op = Any[length,size],
+#    xs = Any[(Int,), (Int,Int,)]
+#    let effects = Base.infer_effects(simple_vec_ops, (Type{T},typeof(op!),typeof(op),xs...))
+#        @test Compiler.is_foldable(effects)
+#    end
+#end
+
+# Test that builtin_effects handles vararg correctly
+@test !Compiler.is_nothrow(Compiler.builtin_effects(Compiler.fallback_lattice, Core.isdefined,
+    Any[String, Vararg{Any}], Bool))
+
+# Test that :new can be eliminated even if an sparam is unknown
+struct SparamUnused{T}
+    x
+    SparamUnused(x::T) where {T} = new{T}(x)
+end
+mksparamunused(x) = (SparamUnused(x); nothing)
+let src = code_typed1(mksparamunused, (Any,))
+    @test count(isnew, src.code) == 0
+end
+
+struct WrapperOneField{T}
+    x::T
+end
+
+# Effects for getfield of type instance
+@test Base.infer_effects(Tuple{Nothing}) do x
+    WrapperOneField{typeof(x)}.instance
+end |> Compiler.is_foldable_nothrow
+@test Base.infer_effects(Tuple{WrapperOneField{Float64}, Symbol}) do w, s
+    getfield(w, s)
+end |> Compiler.is_foldable
+@test Base.infer_effects(Tuple{WrapperOneField{Symbol}, Symbol}) do w, s
+    getfield(w, s)
+end |> Compiler.is_foldable
+
+# Flow-sensitive consistent for _typevar
+@test Base.infer_effects() do
+    return WrapperOneField == (WrapperOneField{T} where T)
+end |> Compiler.is_foldable_nothrow
+
+# Test that dead `@inbounds` does not taint consistency
+# https://github.com/JuliaLang/julia/issues/48243
+@test Base.infer_effects(Tuple{Int64}) do i
+    false && @inbounds (1,2,3)[i]
+    return 1
+end |> Compiler.is_foldable_nothrow
+
+@test Base.infer_effects(Tuple{Int64}) do i
+    @inbounds (1,2,3)[i]
+end |> !Compiler.is_noub
+
+@test Base.infer_effects(Tuple{Tuple{Int64}}) do x
+    @inbounds x[1]
+end |> Compiler.is_foldable_nothrow
+
+# Test that :new of non-concrete, but otherwise known type
+# does not taint consistency.
+@eval struct ImmutRef{T}
+    x::T
+    ImmutRef(x) = $(Expr(:new, :(ImmutRef{typeof(x)}), :x))
+end
+@test Compiler.is_foldable(Base.infer_effects(ImmutRef, Tuple{Any}))
+
+@test Compiler.is_foldable_nothrow(Base.infer_effects(typejoin, ()))
+
+# nothrow-ness of subtyping operations
+# https://github.com/JuliaLang/julia/pull/48566
+@test !Compiler.is_nothrow(Base.infer_effects((A,B)->A<:B, (Any,Any)))
+@test !Compiler.is_nothrow(Base.infer_effects((A,B)->A>:B, (Any,Any)))
+
+# GotoIfNot should properly mark itself as throwing when given a non-Bool
+# https://github.com/JuliaLang/julia/pull/48583
+gotoifnot_throw_check_48583(x) = x ? x : 0
+@test !Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Missing,)))
+@test !Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Any,)))
+@test Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Bool,)))
+
+# unknown :static_parameter should taint :nothrow
+# https://github.com/JuliaLang/julia/issues/46771
+unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = (T; nothing)
+unknown_sparam_nothrow1(x::Ref{T}) where T = (T; nothing)
+unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = (T; nothing)
+@test Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{Int},)))
+@test Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{<:Integer},)))
+@test !Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type,)))
+@test !Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Nothing,)))
+@test !Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Union{Type{Int},Nothing},)))
+@test !Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Any,)))
+@test Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow1, (Ref,)))
+@test Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,)))
+
+# purely abstract recursion should not taint :terminates
+# https://github.com/JuliaLang/julia/issues/48983
+abstractly_recursive1() = abstractly_recursive2()
+abstractly_recursive2() = (Base._return_type(abstractly_recursive1, Tuple{}); 1)
+abstractly_recursive3() = abstractly_recursive2()
+@test_broken Compiler.is_terminates(Base.infer_effects(abstractly_recursive3, ()))
+actually_recursive1(x) = actually_recursive2(x)
+actually_recursive2(x) = (x <= 0) ? 1 : actually_recursive1(x - 1)
+actually_recursive3(x) = actually_recursive2(x)
+@test !Compiler.is_terminates(Base.infer_effects(actually_recursive3, (Int,)))
+
+# `isdefined` effects
+struct MaybeSome{T}
+    value::T
+    MaybeSome(x::T) where T = new{T}(x)
+    MaybeSome{T}(x::T) where T = new{T}(x)
+    MaybeSome{T}() where T = new{T}()
+end
+const undefined_ref = Ref{String}()
+const defined_ref = Ref{String}("julia")
+const undefined_some = MaybeSome{String}()
+const defined_some = MaybeSome{String}("julia")
+let effects = Base.infer_effects() do
+        isdefined(undefined_ref, :x)
+    end
+    @test !Compiler.is_consistent(effects)
+    @test Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(defined_ref, :x)
+    end
+    @test !Compiler.is_consistent(effects)
+    @test Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(undefined_some, :value)
+    end
+    @test Compiler.is_consistent(effects)
+    @test Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(defined_some, :value)
+    end
+    @test Compiler.is_consistent(effects)
+    @test Compiler.is_nothrow(effects)
+end
+# high-level interface test
+isassigned_effects(s) = isassigned(Ref(s))
+@test Compiler.is_consistent(Base.infer_effects(isassigned_effects, (Symbol,)))
+@test fully_eliminated(; retval=true) do
+    isassigned_effects(:foo)
+end
+
+# inference on throw block should be disabled only when the effects are already known to be
+# concrete-eval ineligible:
+function optimize_throw_block_for_effects(x)
+    a = [x]
+    if x < 0
+        throw(ArgumentError(lazy"negative number given: $x"))
+    end
+    return a
+end
+let effects = Base.infer_effects(optimize_throw_block_for_effects, (Int,))
+    @test Compiler.is_consistent_if_notreturned(effects)
+    @test Compiler.is_effect_free(effects)
+    @test !Compiler.is_nothrow(effects)
+    @test Compiler.is_terminates(effects)
+end
+
+# :isdefined effects
+@test @eval Base.infer_effects() do
+    @isdefined($(gensym("some_undef_symbol")))
+end |> !Compiler.is_consistent
+
+# Effects of Base.hasfield (#50198)
+hf50198(s) = hasfield(typeof((;x=1, y=2)), s)
+f50198() = (hf50198(Ref(:x)[]); nothing)
+@test fully_eliminated(f50198)
+
+# Effects properly applied to flags by irinterp (#50311)
+f50311(x, s) = Symbol(s)
+g50311(x) = Val{f50311((1.0, x), "foo")}()
+@test fully_eliminated(g50311, Tuple{Float64})
+
+# getglobal effects
+const my_defined_var = 42
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :my_defined_var, :monotonic)
+end |> Compiler.is_foldable_nothrow
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :my_defined_var, :foo)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :my_defined_var, :foo, nothing)
+end |> !Compiler.is_nothrow
+
+# irinterp should refine `:nothrow` information only if profitable
+Base.@assume_effects :nothrow function irinterp_nothrow_override(x, y)
+    z = sin(y)
+    if x
+        return "julia"
+    end
+    return z
+end
+@test Base.infer_effects((Float64,)) do y
+    isinf(y) && return zero(y)
+    irinterp_nothrow_override(true, y)
+end |> Compiler.is_nothrow
+
+# Effects for :compilerbarrier
+f1_compilerbarrier(b) = Base.compilerbarrier(:type, b)
+f2_compilerbarrier(b) = Base.compilerbarrier(:conditional, b)
+
+@test !Compiler.is_consistent(Base.infer_effects(f1_compilerbarrier, (Bool,)))
+@test Compiler.is_consistent(Base.infer_effects(f2_compilerbarrier, (Bool,)))
+
+# Optimizer-refined effects
+function f1_optrefine(b)
+    if Base.inferencebarrier(b)
+        error()
+    end
+    return b
+end
+@test !Compiler.is_consistent(Base.infer_effects(f1_optrefine, (Bool,)))
+
+function f2_optrefine()
+    if Ref(false)[]
+        error()
+    end
+    return true
+end
+@test !Compiler.is_nothrow(Base.infer_effects(f2_optrefine; optimize=false))
+@test Compiler.is_nothrow(Base.infer_effects(f2_optrefine))
+
+function f3_optrefine(x)
+    @fastmath sqrt(x)
+    return x
+end
+@test !Compiler.is_consistent(Base.infer_effects(f3_optrefine; optimize=false))
+@test Compiler.is_consistent(Base.infer_effects(f3_optrefine, (Float64,)))
+
+# Check that :consistent is properly modeled for throwing statements
+const GLOBAL_MUTABLE_SWITCH = Ref{Bool}(false)
+
+check_switch(switch::Base.RefValue{Bool}) = (switch[] && error(); return nothing)
+check_switch2() = check_switch(GLOBAL_MUTABLE_SWITCH)
+
+@test (Base.return_types(check_switch2) |> only) === Nothing
+GLOBAL_MUTABLE_SWITCH[] = true
+# Check that flipping the switch doesn't accidentally change the return type
+@test (Base.return_types(check_switch2) |> only) === Nothing
+
+@test !Compiler.is_consistent(Base.infer_effects(check_switch, (Base.RefValue{Bool},)))
+
+# post-opt IPO analysis refinement of `:effect_free`-ness
+function post_opt_refine_effect_free(y, c=true)
+    x = Ref(c)
+    if x[]
+        return true
+    else
+        r = y[] isa Number
+        y[] = nothing
+    end
+    return r
+end
+@test Compiler.is_effect_free(Base.infer_effects(post_opt_refine_effect_free, (Base.RefValue{Any},)))
+@test Base.infer_effects((Base.RefValue{Any},)) do y
+    post_opt_refine_effect_free(y, true)
+end |> Compiler.is_effect_free
+
+# Check EA-based refinement of :effect_free
+Base.@assume_effects :nothrow @noinline _noinline_set!(x) = (x[] = 1; nothing)
+
+function set_ref_with_unused_arg_1(_)
+    x = Ref(0)
+    _noinline_set!(x)
+    return nothing
+end
+function set_ref_with_unused_arg_2(_)
+    x = @noinline Ref(0)
+    _noinline_set!(x)
+    return nothing
+end
+function set_arg_ref!(x)
+    _noinline_set!(x)
+    y = Ref(false)
+    y[] && (Main.x = x)
+    return nothing
+end
+
+function set_arr_with_unused_arg_1(_)
+    x = Int[0]
+    _noinline_set!(x)
+    return nothing
+end
+function set_arr_with_unused_arg_2(_)
+    x = @noinline Int[0]
+    _noinline_set!(x)
+    return nothing
+end
+function set_arg_arr!(x)
+    _noinline_set!(x)
+    y = Bool[false]
+    y[] && (Main.x = x)
+    return nothing
+end
+
+# This is inferable by type analysis only since the arguments have no mutable memory
+@test Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(_noinline_set!, (Base.RefValue{Int},)))
+@test Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(_noinline_set!, (Vector{Int},)))
+for func in (set_ref_with_unused_arg_1, set_ref_with_unused_arg_2,
+             set_arr_with_unused_arg_1, set_arr_with_unused_arg_2)
+    effects = Base.infer_effects(func, (Nothing,))
+    @test Compiler.is_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+end
+
+# These need EA
+@test Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_1, (Base.RefValue{Int},)))
+@test Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_2, (Base.RefValue{Int},)))
+@test Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_ref!, (Base.RefValue{Int},)))
+@test_broken Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_1, (Vector{Int},)))
+@test_broken Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_2, (Vector{Int},)))
+@test_broken Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_arr!, (Vector{Int},)))
+
+# EA-based refinement of :effect_free
+function f_EA_refine(ax, b)
+    bx = Ref{Any}()
+    @noinline bx[] = b
+    return ax[] + b
+end
+@test Compiler.is_effect_free(Base.infer_effects(f_EA_refine, (Base.RefValue{Int},Int)))
+
+function issue51837(; openquotechar::Char, newlinechar::Char)
+    ncodeunits(openquotechar) == 1 || throw(ArgumentError("`openquotechar` must be a single-byte character"))
+    if !isnothing(newlinechar)
+        ncodeunits(newlinechar) > 1 && throw(ArgumentError("`newlinechar` must be a single-byte character."))
+    end
+    return nothing
+end
+@test Base.infer_effects() do openquotechar::Char, newlinechar::Char
+    issue51837(; openquotechar, newlinechar)
+end |> !Compiler.is_nothrow
+@test_throws ArgumentError issue51837(; openquotechar='α', newlinechar='\n')
+
+# idempotency of effects derived by post-opt analysis
+callgetfield(x, f) = getfield(x, f, Base.@_boundscheck)
+@test Base.infer_effects(callgetfield, (Some{Any},Symbol)).noub === Compiler.NOUB_IF_NOINBOUNDS
+callgetfield1(x, f) = getfield(x, f, Base.@_boundscheck)
+callgetfield_simple(x, f) = callgetfield1(x, f)
+@test Base.infer_effects(callgetfield_simple, (Some{Any},Symbol)).noub ===
+      Base.infer_effects(callgetfield_simple, (Some{Any},Symbol)).noub ===
+      Compiler.ALWAYS_TRUE
+callgetfield2(x, f) = getfield(x, f, Base.@_boundscheck)
+callgetfield_inbounds(x, f) = @inbounds callgetfield2(x, f)
+@test Base.infer_effects(callgetfield_inbounds, (Some{Any},Symbol)).noub ===
+      Base.infer_effects(callgetfield_inbounds, (Some{Any},Symbol)).noub ===
+      Compiler.ALWAYS_FALSE
+
+# noub modeling for memory ops
+let (memoryrefnew, memoryrefget, memoryref_isassigned, memoryrefset!) =
+        (Core.memoryrefnew, Core.memoryrefget, Core.memoryref_isassigned, Core.memoryrefset!)
+    function builtin_effects(@nospecialize xs...)
+        interp = Compiler.NativeInterpreter()
+        𝕃 = Compiler.typeinf_lattice(interp)
+        rt = Compiler.builtin_tfunction(interp, xs..., nothing)
+        return Compiler.builtin_effects(𝕃, xs..., rt)
+    end
+    @test Compiler.is_noub(builtin_effects(memoryrefnew, Any[Memory,]))
+    @test Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int]))
+    @test Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Core.Const(true)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Core.Const(false)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Bool]))
+    @test Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Int]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Vararg{Bool}]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Vararg{Any}]))
+    @test Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Core.Const(true)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Core.Const(false)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Bool]))
+    @test Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Int]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Vararg{Bool}]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Vararg{Any}]))
+    @test Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Core.Const(true)]))
+    @test !Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Core.Const(false)]))
+    @test !Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Bool]))
+    @test Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Int]))
+    @test !Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Vararg{Bool}]))
+    @test !Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Vararg{Any}]))
+    @test Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Core.Const(true)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Core.Const(false)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Bool]))
+    @test Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Int]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Vararg{Bool}]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Vararg{Any}]))
+    # `:boundscheck` taint should be refined by post-opt analysis
+    @test Base.infer_effects() do xs::Vector{Any}, i::Int
+        memoryrefget(memoryrefnew(getfield(xs, :ref), i, Base.@_boundscheck), :not_atomic, Base.@_boundscheck)
+    end |> Compiler.is_noub_if_noinbounds
+end
+
+# high level tests
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(getindex, (Vector{Int},Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(getindex, (Vector{Any},Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(setindex!, (Vector{Int},Int,Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(Base._setindex!, (Vector{Any},Any,Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(isassigned, (Vector{Int},Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(isassigned, (Vector{Any},Int)))
+@test Base.infer_effects((Vector{Int},Int)) do xs, i
+    xs[i]
+end |> Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    xs[i]
+end |> Compiler.is_noub
+@test Base.infer_effects((Vector{Int},Int,Int)) do xs, x, i
+    xs[i] = x
+end |> Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Any,Int)) do xs, x, i
+    xs[i] = x
+end |> Compiler.is_noub
+@test Base.infer_effects((Vector{Int},Int)) do xs, i
+    @inbounds xs[i]
+end |> !Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    @inbounds xs[i]
+end |> !Compiler.is_noub
+Base.@propagate_inbounds getindex_propagate(xs, i) = xs[i]
+getindex_dont_propagate(xs, i) = xs[i]
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(getindex_propagate, (Vector{Any},Int)))
+@test Compiler.is_noub(Base.infer_effects(getindex_dont_propagate, (Vector{Any},Int)))
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    @inbounds getindex_propagate(xs, i)
+end |> !Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    @inbounds getindex_dont_propagate(xs, i)
+end |> Compiler.is_noub
+
+# refine `:nothrow` when `exct` is known to be `Bottom`
+@test Base.infer_exception_type(getindex, (Vector{Int},Int)) == BoundsError
+function getindex_nothrow(xs::Vector{Int}, i::Int)
+    try
+        return xs[i]
+    catch err
+        err isa BoundsError && return nothing
+        rethrow(err)
+    end
+end
+@test Compiler.is_nothrow(Base.infer_effects(getindex_nothrow, (Vector{Int}, Int)))
+
+# callsite `@assume_effects` annotation
+let ast = code_lowered((Int,)) do x
+        Base.@assume_effects :total identity(x)
+    end |> only
+    ssaflag = ast.ssaflags[findfirst(!iszero, ast.ssaflags)::Int]
+    override = Compiler.decode_statement_effects_override(ssaflag)
+    # if this gets broken, check if this is synced with expr.jl
+    @test override.consistent && override.effect_free && override.nothrow &&
+          override.terminates_globally && !override.terminates_locally &&
+          override.notaskstate && override.inaccessiblememonly &&
+          override.noub && !override.noub_if_noinbounds
+end
+@test Base.infer_effects((Float64,)) do x
+    isinf(x) && return 0.0
+    return Base.@assume_effects :nothrow sin(x)
+end |> Compiler.is_nothrow
+let effects = Base.infer_effects((Vector{Float64},)) do xs
+        isempty(xs) && return 0.0
+        Base.@assume_effects :nothrow begin
+            x = Base.@assume_effects :noub @inbounds xs[1]
+            isinf(x) && return 0.0
+            return sin(x)
+        end
+    end
+    # all nested overrides should be applied
+    @test Compiler.is_nothrow(effects)
+    @test Compiler.is_noub(effects)
+end
+@test Base.infer_effects((Int,)) do x
+    res = 1
+    0 ≤ x < 20 || error("bad fact")
+    Base.@assume_effects :terminates_locally while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end |> Compiler.is_terminates
+
+# https://github.com/JuliaLang/julia/issues/52531
+const a52531 = Core.Ref(1)
+@eval getref52531() = $(QuoteNode(a52531)).x
+@test !Compiler.is_consistent(Base.infer_effects(getref52531))
+let
+    global set_a52531!, get_a52531
+    _a::Int             = -1
+    set_a52531!(a::Int) = (_a = a; return get_a52531())
+    get_a52531()        = _a
+end
+@test !Compiler.is_consistent(Base.infer_effects(set_a52531!, (Int,)))
+@test !Compiler.is_consistent(Base.infer_effects(get_a52531, ()))
+@test get_a52531() == -1
+@test set_a52531!(1) == 1
+@test get_a52531() == 1
+
+let
+    global is_initialized52531, set_initialized52531!
+    _is_initialized                   = false
+    set_initialized52531!(flag::Bool) = (_is_initialized = flag)
+    is_initialized52531()             = _is_initialized
+end
+top_52531(_) = (set_initialized52531!(true); nothing)
+@test !Compiler.is_consistent(Base.infer_effects(is_initialized52531))
+@test !Compiler.is_removable_if_unused(Base.infer_effects(set_initialized52531!, (Bool,)))
+@test !is_initialized52531()
+top_52531(0)
+@test is_initialized52531()
+
+const ref52843 = Ref{Int}()
+@eval func52843() = ($ref52843[] = 1; nothing)
+@test !Compiler.is_foldable(Base.infer_effects(func52843))
+let; Base.Experimental.@force_compile; func52843(); end
+@test ref52843[] == 1
+
+@test Compiler.is_inaccessiblememonly(Base.infer_effects(identity∘identity, Tuple{Any}))
+@test Compiler.is_inaccessiblememonly(Base.infer_effects(()->Vararg, Tuple{}))
+
+# pointerref nothrow for invalid pointer
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.pointerref, Any[Type{Ptr{Vector{Int64}}}, Int, Int])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.pointerref, Any[Type{Ptr{T}} where T, Int, Int])
+
+# post-opt :consistent-cy analysis correctness
+# https://github.com/JuliaLang/julia/issues/53508
+@test !Compiler.is_consistent(Base.infer_effects(getindex, (UnitRange{Int},Int)))
+@test !Compiler.is_consistent(Base.infer_effects(getindex, (Base.OneTo{Int},Int)))
+
+@noinline f53613() = @assert isdefined(@__MODULE__, :v53613)
+g53613() = f53613()
+h53613() = g53613()
+@test !Compiler.is_consistent(Base.infer_effects(f53613))
+@test !Compiler.is_consistent(Base.infer_effects(g53613))
+@test_throws AssertionError f53613()
+@test_throws AssertionError g53613()
+@test_throws AssertionError h53613()
+global v53613 = nothing
+@test f53613() === nothing
+@test g53613() === nothing
+@test h53613() === nothing
+
+# tuple/svec effects
+@test Base.infer_effects((Vector{Any},)) do xs
+    Core.tuple(xs...)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Vector{Any},)) do xs
+    Core.svec(xs...)
+end |> Compiler.is_nothrow
+
+# effects for unknown `:foreigncall`s
+@test Base.infer_effects() do
+    @ccall unsafecall()::Cvoid
+end == Compiler.EFFECTS_UNKNOWN
+
+# fpext
+@test Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float32}, Float16])
+@test Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float64}, Float16])
+@test Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float64}, Float32])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float16}, Float16])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float16}, Float32])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float32}, Float32])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float32}, Float64])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Int32}, Float16])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.fpext, Any[Type{Float32}, Int16])
+
+# Float intrinsics require float arguments
+@test Base.infer_effects((Int16,)) do x
+    return Core.Intrinsics.abs_float(x)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Int32, Int32)) do x, y
+    return Core.Intrinsics.add_float(x, y)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Int32, Int32)) do x, y
+    return Core.Intrinsics.add_float(x, y)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Int64, Int64, Int64)) do x, y, z
+    return Core.Intrinsics.fma_float(x, y, z)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Int64,)) do x
+    return Core.Intrinsics.fptoui(UInt32, x)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Int64,)) do x
+    return Core.Intrinsics.fptosi(Int32, x)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Int64,)) do x
+    return Core.Intrinsics.sitofp(Int64, x)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((UInt64,)) do x
+    return Core.Intrinsics.uitofp(Int64, x)
+end |> !Compiler.is_nothrow
+
+# effects modeling for pointer-related intrinsics
+let effects = Base.infer_effects(Core.Intrinsics.pointerref, Tuple{Vararg{Any}})
+    @test !Compiler.is_consistent(effects)
+    @test Compiler.is_effect_free(effects)
+    @test !Compiler.is_inaccessiblememonly(effects)
+end
+let effects = Base.infer_effects(Core.Intrinsics.pointerset, Tuple{Vararg{Any}})
+    @test Compiler.is_consistent(effects)
+    @test !Compiler.is_effect_free(effects)
+end
+@test Compiler.intrinsic_nothrow(Core.Intrinsics.add_ptr, Any[Ptr{Int}, UInt])
+@test Compiler.intrinsic_nothrow(Core.Intrinsics.sub_ptr, Any[Ptr{Int}, UInt])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.add_ptr, Any[UInt, UInt])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.sub_ptr, Any[UInt, UInt])
+@test Compiler.is_nothrow(Base.infer_effects(+, Tuple{Ptr{UInt8}, UInt}))
+# effects modeling for atomic intrinsics
+# these functions especially need to be marked !effect_free since they imply synchronization
+for atomicfunc = Any[
+        Core.Intrinsics.atomic_pointerref,
+        Core.Intrinsics.atomic_pointerset,
+        Core.Intrinsics.atomic_pointerswap,
+        Core.Intrinsics.atomic_pointerreplace,
+        Core.Intrinsics.atomic_fence]
+    @test !Compiler.is_effect_free(Base.infer_effects(atomicfunc, Tuple{Vararg{Any}}))
+end
+
+# effects modeling for intrinsics that can do arbitrary things
+let effects = Base.infer_effects(Core.Intrinsics.llvmcall, Tuple{Vararg{Any}})
+    @test effects == Compiler.Effects()
+end
+let effects = Base.infer_effects(Core.Intrinsics.atomic_pointermodify, Tuple{Vararg{Any}})
+    @test effects == Compiler.Effects()
+end
+
+# JuliaLang/julia#57780
+let effects = Base.infer_effects(Base._unsetindex!, (MemoryRef{String},))
+    @test !Compiler.is_effect_free(effects)
+end
+
+# builtin functions that can do arbitrary things should have the top effects
+@test Base.infer_effects(Core._call_in_world_total, Tuple{Vararg{Any}}) == Compiler.Effects()
+@test Base.infer_effects(Core.invoke_in_world, Tuple{Vararg{Any}}) == Compiler.Effects()
+@test Base.infer_effects(invokelatest, Tuple{Vararg{Any}}) == Compiler.Effects()
+@test Base.infer_effects(invoke, Tuple{Vararg{Any}}) == Compiler.Effects()
+
+# Core._svec_ref effects modeling (required for external abstract interpreter that doesn't run optimization)
+let effects = Base.infer_effects((Core.SimpleVector,Int); optimize=false) do svec, i
+        Core._svec_ref(svec, i)
+    end
+    @test Compiler.is_consistent(effects)
+    @test Compiler.is_effect_free(effects)
+    @test !Compiler.is_nothrow(effects)
+    @test Compiler.is_terminates(effects)
+end
+
+@test Compiler.is_nothrow(Base.infer_effects(length, (Core.SimpleVector,)))
+
+
+# https://github.com/JuliaLang/julia/issues/60009
+function null_offset(offset)
+    Ptr{UInt8}(C_NULL) + offset
+end
+@test null_offset(Int(100)) == Ptr{UInt8}(UInt(100))
diff --git a/test/compiler/inference.jl b/Compiler/test/inference.jl
similarity index 65%
rename from test/compiler/inference.jl
rename to Compiler/test/inference.jl
index ded9438037733..941f9d6605206 100644
--- a/test/compiler/inference.jl
+++ b/Compiler/test/inference.jl
@@ -1,14 +1,19 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# tests for Core.Compiler correctness and precision
-import Core.Compiler: Const, Conditional, ⊑, ReturnNode, GotoIfNot
-isdispatchelem(@nospecialize x) = !isa(x, Type) || Core.Compiler.isdispatchelem(x)
+module inference
 
-using Random, Core.IR
-using InteractiveUtils: code_llvm
+using Test
 
+include("setup_Compiler.jl")
 include("irutils.jl")
 
+# tests for Compiler correctness and precision
+using .Compiler: Conditional, ⊑
+isdispatchelem(@nospecialize x) = !isa(x, Type) || Compiler.isdispatchelem(x)
+
+using Random, Core.IR
+using InteractiveUtils
+
 f39082(x::Vararg{T}) where {T <: Number} = x[1]
 let ast = only(code_typed(f39082, Tuple{Vararg{Rational}}))[1]
     @test ast.slottypes == Any[Const(f39082), Tuple{Vararg{Rational}}]
@@ -18,161 +23,245 @@ let ast = only(code_typed(f39082, Tuple{Rational, Vararg{Rational}}))[1]
 end
 
 # demonstrate some of the type-size limits
-@test Core.Compiler.limit_type_size(Ref{Complex{T} where T}, Ref, Ref, 100, 0) == Ref
-@test Core.Compiler.limit_type_size(Ref{Complex{T} where T}, Ref{Complex{T} where T}, Ref, 100, 0) == Ref{Complex{T} where T}
+@test Compiler.limit_type_size(Ref{Complex{T} where T}, Ref, Ref, 100, 0) == Ref
+@test Compiler.limit_type_size(Ref{Complex{T} where T}, Ref{Complex{T} where T}, Ref, 100, 0) == Ref{Complex{T} where T}
 
 let comparison = Tuple{X, X} where X<:Tuple
     sig = Tuple{X, X} where X<:comparison
     ref = Tuple{X, X} where X
-    @test Core.Compiler.limit_type_size(sig, comparison, comparison, 100, 100) == Tuple{Tuple, Tuple}
-    @test Core.Compiler.limit_type_size(sig, ref, comparison, 100, 100) == Tuple{Any, Any}
-    @test Core.Compiler.limit_type_size(Tuple{sig}, Tuple{ref}, comparison, 100, 100) == Tuple{Tuple{Any, Any}}
-    @test Core.Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
+    @test Compiler.limit_type_size(sig, comparison, comparison, 100, 100) == Tuple{Tuple, Tuple}
+    @test Compiler.limit_type_size(sig, ref, comparison, 100, 100) == Tuple{Any, Any}
+    @test Compiler.limit_type_size(Tuple{sig}, Tuple{ref}, comparison, 100, 100) == Tuple{Tuple{Any, Any}}
+    @test Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
 end
 
 let ref = Tuple{T, Val{T}} where T<:Val
     sig = Tuple{T, Val{T}} where T<:(Val{T} where T<:Val)
-    @test Core.Compiler.limit_type_size(sig, ref, Union{}, 100, 100) == Tuple{Val, Val}
-    @test Core.Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
+    @test Compiler.limit_type_size(sig, ref, Union{}, 100, 100) == Tuple{Val, Val}
+    @test Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
 end
 let ref = Tuple{T, Val{T}} where T<:(Val{T} where T<:(Val{T} where T<:(Val{T} where T<:Val)))
     sig = Tuple{T, Val{T}} where T<:(Val{T} where T<:(Val{T} where T<:(Val{T} where T<:(Val{T} where T<:Val))))
-    @test Core.Compiler.limit_type_size(sig, ref, Union{}, 100, 100) == Tuple{Val, Val}
-    @test Core.Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
+    @test Compiler.limit_type_size(sig, ref, Union{}, 100, 100) == Tuple{Val, Val}
+    @test Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
 end
 
 let t = Tuple{Ref{T},T,T} where T, c = Tuple{Ref, T, T} where T # #36407
-    @test t <: Core.Compiler.limit_type_size(t, c, Union{}, 1, 100)
+    @test t <: Compiler.limit_type_size(t, c, Union{}, 1, 100)
 end
 
 # obtain Vararg with 2 undefined fields
 let va = ccall(:jl_type_intersection_with_env, Any, (Any, Any), Tuple{Tuple}, Tuple{Tuple{Vararg{Any, N}}} where N)[2][1]
-    @test Core.Compiler.__limit_type_size(Tuple, va, Core.svec(va, Union{}), 2, 2) === Tuple
+    @test Compiler.__limit_type_size(Tuple, va, Core.svec(va, Union{}), 2, 2) === Tuple
 end
 
 mutable struct TS14009{T}; end
 let A = TS14009{TS14009{TS14009{TS14009{TS14009{T}}}}} where {T},
     B = Base.rewrap_unionall(TS14009{Base.unwrap_unionall(A)}, A)
 
-    @test Core.Compiler.Compiler.limit_type_size(B, A, A, 2, 2) == TS14009
+    @test Compiler.Compiler.limit_type_size(B, A, A, 2, 2) == TS14009
 end
 
 # issue #42835
-@test !Core.Compiler.type_more_complex(Int, Any, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Int, Type{Int}, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{Int}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Type{Int}, Core.svec(Type{Int}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Int, Core.svec(Type{Int}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{Int}}}, Type{Type{Int}}, Core.svec(Type{Type{Int}}), 1, 1, 1)
-
-@test  Core.Compiler.type_more_complex(ComplexF32, Any, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(ComplexF32, Any, Core.svec(Type{ComplexF32}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(ComplexF32, Type{ComplexF32}, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(Type{Type{ComplexF32}}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{ComplexF32}, Type{Type{ComplexF32}}, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{ComplexF32}, ComplexF32, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, Type{ComplexF32}, Core.svec(Type{ComplexF32}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, ComplexF32, Core.svec(ComplexF32), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{ComplexF32}}}, Type{Type{ComplexF32}}, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test !Compiler.type_more_complex(Int, Any, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(Int, Type{Int}, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(Type{Int}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.limit_type_size(Type{Int}, Any, Union{}, 0, 0) == Type{Int}
+@test  Compiler.type_more_complex(Type{Type{Int}}, Type{Int}, Core.svec(Type{Int}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Int}}, Int, Core.svec(Type{Int}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{Int}}}, Type{Type{Int}}, Core.svec(Type{Type{Int}}), 1, 1, 1)
+
+@test  Compiler.type_more_complex(ComplexF32, Any, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(ComplexF32, Any, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test  Compiler.type_more_complex(ComplexF32, Type{ComplexF32}, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(Type{Type{ComplexF32}}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{ComplexF32}, Type{Type{ComplexF32}}, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{ComplexF32}, ComplexF32, Core.svec(), 1, 1, 1)
+@test  Compiler.limit_type_size(Type{ComplexF32}, ComplexF32, Union{}, 1, 1) == Type{<:Complex}
+@test  Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{ComplexF32}}, Type{ComplexF32}, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{ComplexF32}}, ComplexF32, Core.svec(ComplexF32), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{ComplexF32}}}, Type{Type{ComplexF32}}, Core.svec(Type{ComplexF32}), 1, 1, 1)
 
 # n.b. Type{Type{Union{}} === Type{Core.TypeofBottom}
-@test !Core.Compiler.type_more_complex(Type{Union{}}, Any, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{Type{Union{}}}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Type{Type{Union{}}}, Core.svec(Type{Type{Union{}}}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{Type{Union{}}}}}, Type{Type{Type{Union{}}}}, Core.svec(Type{Type{Type{Union{}}}}), 1, 1, 1)
-
-@test !Core.Compiler.type_more_complex(Type{1}, Type{2}, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 0, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Any, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test !Compiler.type_more_complex(Type{Union{}}, Any, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(Type{Type{Union{}}}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Type{Type{Union{}}}, Core.svec(Type{Type{Union{}}}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{Type{Union{}}}}}, Type{Type{Type{Union{}}}}, Core.svec(Type{Type{Type{Union{}}}}), 1, 1, 1)
+
+@test !Compiler.type_more_complex(Type{1}, Type{2}, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Union{Float32,Float64}}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Union{Float32,Float64}}}, Type{Union{Float32,Float64}}, Core.svec(Type{Union{Float32,Float64}}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Any, Core.svec(Union{Float32,Float64}), 1, 1, 1)
 
 # issue #49287
-@test !Core.Compiler.type_more_complex(Tuple{Vararg{Tuple{}}}, Tuple{Vararg{Tuple}}, Core.svec(), 0, 0, 0)
-@test  Core.Compiler.type_more_complex(Tuple{Vararg{Tuple}}, Tuple{Vararg{Tuple{}}}, Core.svec(), 0, 0, 0)
+@test !Compiler.type_more_complex(Tuple{Vararg{Tuple{}}}, Tuple{Vararg{Tuple}}, Core.svec(), 0, 0, 0)
+@test  Compiler.type_more_complex(Tuple{Vararg{Tuple}}, Tuple{Vararg{Tuple{}}}, Core.svec(), 0, 0, 0)
+
+# issue #51694
+@test Compiler.type_more_complex(
+       Base.Generator{Base.Iterators.Flatten{Vector{Bool}}, typeof(identity)},
+       Base.Generator{Vector{Bool}, typeof(identity)},
+       Core.svec(), 0, 0, 0)
+@test Compiler.type_more_complex(
+       Base.Generator{Base.Iterators.Flatten{Base.Generator{Vector{Bool}, typeof(identity)}}, typeof(identity)},
+       Base.Generator{Vector{Bool}, typeof(identity)},
+       Core.svec(), 0, 0, 0)
 
 let # 40336
-    t = Type{Type{Int}}
-    c = Type{Int}
-    r = Core.Compiler.limit_type_size(t, c, c, 100, 100)
+    t = Type{Type{Type{Int}}}
+    c = Type{Type{Int}}
+    r = Compiler.limit_type_size(t, c, c, 100, 100)
     @test t !== r && t <: r
 end
 
-@test Core.Compiler.unionlen(Union{}) == 1
-@test Core.Compiler.unionlen(Int8) == 1
-@test Core.Compiler.unionlen(Union{Int8, Int16}) == 2
-@test Core.Compiler.unionlen(Union{Int8, Int16, Int32, Int64}) == 4
-@test Core.Compiler.unionlen(Tuple{Union{Int8, Int16, Int32, Int64}}) == 1
-@test Core.Compiler.unionlen(Union{Int8, Int16, Int32, T} where T) == 1
-
-@test Core.Compiler.unioncomplexity(Union{}) == 0
-@test Core.Compiler.unioncomplexity(Int8) == 0
-@test Core.Compiler.unioncomplexity(Val{Union{Int8, Int16, Int32, Int64}}) == 0
-@test Core.Compiler.unioncomplexity(Union{Int8, Int16}) == 1
-@test Core.Compiler.unioncomplexity(Union{Int8, Int16, Int32, Int64}) == 3
-@test Core.Compiler.unioncomplexity(Tuple{Union{Int8, Int16, Int32, Int64}}) == 3
-@test Core.Compiler.unioncomplexity(Union{Int8, Int16, Int32, T} where T) == 3
-@test Core.Compiler.unioncomplexity(Tuple{Val{T}, Union{Int8, Int16}, Int8} where T<:Union{Int8, Int16, Int32, Int64}) == 3
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Tuple{Union{Int8, Int16}}}}) == 1
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Symbol}}) == 0
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}) == 1
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}) == 2
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}}}}) == 3
+@test Compiler.limit_type_size(Type{Type{Type{Int}}}, Type, Union{}, 0, 0) == Type{<:Type}
+@test Compiler.limit_type_size(Type{Type{Int}}, Type, Union{}, 0, 0) == Type{<:Type}
+@test Compiler.limit_type_size(Type{Int}, Type, Union{}, 0, 0) == Type{Int}
+@test Compiler.limit_type_size(Type{<:Int}, Type, Union{}, 0, 0) == Type{<:Int}
+@test Compiler.limit_type_size(Type{ComplexF32}, ComplexF32, Union{}, 0, 0) == Type{<:Complex} # added nesting
+@test Compiler.limit_type_size(Type{ComplexF32}, Type{ComplexF64}, Union{}, 0, 0) == Type{ComplexF32} # base matches
+@test Compiler.limit_type_size(Type{ComplexF32}, Type, Union{}, 0, 0) == Type{<:Complex}
+@test_broken  Compiler.limit_type_size(Type{<:ComplexF64}, Type, Union{}, 0, 0) == Type{<:Complex}
+@test Compiler.limit_type_size(Type{<:ComplexF64}, Type, Union{}, 0, 0) == Type #50692
+@test Compiler.limit_type_size(Type{Union{ComplexF32,ComplexF64}}, Type, Union{}, 0, 0) == Type
+@test_broken Compiler.limit_type_size(Type{Union{ComplexF32,ComplexF64}}, Type, Union{}, 0, 0) == Type{<:Complex} #50692
+@test Compiler.limit_type_size(Type{Union{Float32,Float64}}, Type, Union{}, 0, 0) == Type
+@test Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Type{Type{Int}}, Union{}, 0, 0) == Type
+@test Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Union{Type{Int},Type{Type{Int}}}, Union{}, 0, 0) == Type
+@test Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Type{Union{Type{Int},Type{Type{Int}}}}, Union{}, 0, 0) == Type{Union{Int, Type{Int}}}
+@test Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Type{Type{Int}}, Union{}, 0, 0) == Type
+
+
+@test Compiler.limit_type_size(Type{Any}, Union{}, Union{}, 0, 0) ==
+      Compiler.limit_type_size(Type{Any}, Any, Union{}, 0, 0) ==
+      Compiler.limit_type_size(Type{Any}, Type, Union{}, 0, 0) ==
+      Type{Any}
+
+# issue #43296
+struct C43296{t,I} end
+r43296(b) = r43296(typeof(b))
+r43296(::Type) = nothing
+r43296(::Nothing) = nonexistent
+r43296(::Type{C43296{c,d}}) where {c,d} = f43296(r43296(c), e)
+f43296(::Nothing, :) = nothing
+f43296(g, :) = h
+k43296(b, j, :) = l
+k43296(b, j, ::Nothing) = b
+i43296(b, j) = k43296(b, j, r43296(j))
+@test only(Base.return_types(i43296, (Int, C43296{C43296{C43296{Val, Tuple}}}))) <: Int
+@test only(Base.return_types(i43296, (Int, C43296{C43296{C43296{Val, <:Tuple}}}))) <: Int
+
+abstract type e43296{a, j} <: AbstractArray{a, j} end
+abstract type b43296{a, j, c, d} <: e43296{a, j} end
+struct h43296{a, j, f, d, i} <: b43296{a, j, f, d} end
+Base.ndims(::Type{f}) where {f<:e43296} = ndims(supertype(f))
+Base.ndims(g::e43296) = ndims(typeof(g))
+@test only(Base.return_types(ndims, (h43296{Any, 0, Any, Int, Any},))) == Int
+
+@test Compiler.unionlen(Union{}) == 1
+@test Compiler.unionlen(Int8) == 1
+@test Compiler.unionlen(Union{Int8, Int16}) == 2
+@test Compiler.unionlen(Union{Int8, Int16, Int32, Int64}) == 4
+@test Compiler.unionlen(Tuple{Union{Int8, Int16, Int32, Int64}}) == 1
+@test Compiler.unionlen(Union{Int8, Int16, Int32, T} where T) == 1
+
+@test Compiler.unioncomplexity(Union{}) == 0
+@test Compiler.unioncomplexity(Int8) == 0
+@test Compiler.unioncomplexity(Val{Union{Int8, Int16, Int32, Int64}}) == 0
+@test Compiler.unioncomplexity(Union{Int8, Int16}) == 1
+@test Compiler.unioncomplexity(Union{Int8, Int16, Int32, Int64}) == 3
+@test Compiler.unioncomplexity(Tuple{Union{Int8, Int16, Int32, Int64}}) == 3
+@test Compiler.unioncomplexity(Union{Int8, Int16, Int32, T} where T) == 3
+@test Compiler.unioncomplexity(Tuple{Val{T}, Union{Int8, Int16}, Int8} where T<:Union{Int8, Int16, Int32, Int64}) == 3
+@test Compiler.unioncomplexity(Tuple{Vararg{Tuple{Union{Int8, Int16}}}}) == 2
+@test Compiler.unioncomplexity(Tuple{Vararg{Symbol}}) == 1
+@test Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}) == 3
+@test Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}) == 5
+@test Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}}}}) == 7
 
 
 # PR 22120
-function tmerge_test(a, b, r, commutative=true)
-    @test r == Core.Compiler.tuplemerge(a, b)
-    if commutative
-        @test r == Core.Compiler.tuplemerge(b, a)
-    else
-        @test_broken r == Core.Compiler.tuplemerge(b, a)
-    end
-end
-tmerge_test(Tuple{Int}, Tuple{String}, Tuple{Union{Int, String}})
-tmerge_test(Tuple{Int}, Tuple{String, String}, Tuple)
-tmerge_test(Tuple{Vararg{Int}}, Tuple{String}, Tuple)
-tmerge_test(Tuple{Int}, Tuple{Int, Int},
+function tuplemerge_test(a, b, r, commutative=true)
+    @test r == Compiler.tuplemerge(a, b)
+    @test r == Compiler.tuplemerge(b, a) broken=!commutative
+end
+tuplemerge_test(Tuple{Int}, Tuple{String}, Tuple{Union{Int, String}})
+tuplemerge_test(Tuple{Int}, Tuple{String, String}, Tuple)
+tuplemerge_test(Tuple{Vararg{Int}}, Tuple{String}, Tuple)
+tuplemerge_test(Tuple{Int}, Tuple{Int, Int},
     Tuple{Vararg{Int}})
-tmerge_test(Tuple{Integer}, Tuple{Int, Int},
+tuplemerge_test(Tuple{Integer}, Tuple{Int, Int},
     Tuple{Vararg{Integer}})
-tmerge_test(Tuple{}, Tuple{Int, Int},
+tuplemerge_test(Tuple{}, Tuple{Int, Int},
     Tuple{Vararg{Int}})
-tmerge_test(Tuple{}, Tuple{Complex},
+tuplemerge_test(Tuple{}, Tuple{Complex},
     Tuple{Vararg{Complex}})
-tmerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF64},
+tuplemerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF64},
     Tuple{Vararg{Complex}})
-tmerge_test(Tuple{Vararg{ComplexF32}}, Tuple{Vararg{ComplexF64}},
+tuplemerge_test(Tuple{Vararg{ComplexF32}}, Tuple{Vararg{ComplexF64}},
     Tuple{Vararg{Complex}})
-tmerge_test(Tuple{}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{ComplexF32, ComplexF32, ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{ComplexF32, ComplexF32, ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{}, Tuple{Union{ComplexF64, ComplexF32}, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{}, Tuple{Union{ComplexF64, ComplexF32}, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{ComplexF64, ComplexF64, ComplexF32}, Tuple{Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{ComplexF64, ComplexF64, ComplexF32}, Tuple{Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Complex}}, false)
-tmerge_test(Tuple{}, Tuple{Complex, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{}, Tuple{Complex, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Complex}})
-@test Core.Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
+@test Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{}, Tuple{ComplexF32, ComplexF32}}
-@test Core.Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF32}}) ==
+@test Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{Vararg{ComplexF32}}}
-@test Core.Compiler.tmerge(Union{Nothing, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
+@test Compiler.tmerge(Union{Nothing, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF32}}
-@test Core.Compiler.tmerge(Union{Nothing, Tuple{}, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
+@test Compiler.tmerge(Union{Nothing, Tuple{}, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{Vararg{ComplexF32}}}
-@test Core.Compiler.tmerge(Vector{Int}, Core.Compiler.tmerge(Vector{String}, Vector{Bool})) ==
+@test Compiler.tmerge(Vector{Int}, Compiler.tmerge(Vector{String}, Vector{Bool})) ==
     Union{Vector{Bool}, Vector{Int}, Vector{String}}
-@test Core.Compiler.tmerge(Vector{Int}, Core.Compiler.tmerge(Vector{String}, Union{Vector{Bool}, Vector{Symbol}})) == Vector
-@test Core.Compiler.tmerge(Base.BitIntegerType, Union{}) === Base.BitIntegerType
-@test Core.Compiler.tmerge(Union{}, Base.BitIntegerType) === Base.BitIntegerType
-@test Core.Compiler.tmerge(Core.Compiler.fallback_ipo_lattice, Core.Compiler.InterConditional(1, Int, Union{}), Core.Compiler.InterConditional(2, String, Union{})) === Core.Compiler.Const(true)
+@test Compiler.tmerge(Vector{Int}, Compiler.tmerge(Vector{String}, Union{Vector{Bool}, Vector{Symbol}})) == Vector
+@test Compiler.tmerge(Base.BitIntegerType, Union{}) === Base.BitIntegerType
+@test Compiler.tmerge(Union{}, Base.BitIntegerType) === Base.BitIntegerType
+@test Compiler.tmerge(Compiler.fallback_ipo_lattice, Compiler.InterConditional(1, Int, Union{}), Compiler.InterConditional(2, String, Union{})) === Compiler.Const(true)
+# test issue behind https://github.com/JuliaLang/julia/issues/50458
+@test Compiler.tmerge(Nothing, Tuple{Base.BitInteger, Int}) == Union{Nothing, Tuple{Base.BitInteger, Int}}
+@test Compiler.tmerge(Union{Nothing, Tuple{Int, Int}}, Tuple{Base.BitInteger, Int}) == Union{Nothing, Tuple{Any, Int}}
+@test Compiler.tmerge(Nothing, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}) == Union{Nothing, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}}
+@test Compiler.tmerge(Union{Nothing, Tuple{Char, Int}}, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}) == Union{Nothing, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}}
+@test Compiler.tmerge(Nothing, Tuple{Integer, Int}) == Union{Nothing, Tuple{Integer, Int}}
+@test Compiler.tmerge(Union{Nothing, Tuple{Int, Int}}, Tuple{Integer, Int}) == Union{Nothing, Tuple{Integer, Int}}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector{Int}}, Vector) == Union{Nothing, Int, AbstractVector}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector{Int}}, Matrix) == Union{Nothing, Int, AbstractArray}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector{Int}}, Matrix{Int}) == Union{Nothing, Int, AbstractArray{Int}}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector{Int}}, Array) == Union{Nothing, Int, AbstractArray}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractArray{Int}}, Vector) == Union{Nothing, Int, AbstractArray}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector}, Matrix{Int}) == Union{Nothing, Int, AbstractArray}
+@test Compiler.tmerge(Union{Nothing, AbstractFloat}, Integer) == Union{Nothing, AbstractFloat, Integer}
+@test Compiler.tmerge(AbstractVector, AbstractMatrix) == Union{AbstractVector, AbstractMatrix}
+@test Compiler.tmerge(Union{AbstractVector, Nothing}, AbstractMatrix) == Union{Nothing, AbstractVector, AbstractMatrix}
+@test Compiler.tmerge(Union{AbstractVector, Int}, AbstractMatrix) == Union{Int, AbstractVector, AbstractMatrix}
+@test Compiler.tmerge(Union{AbstractVector, Integer}, AbstractMatrix) == Union{Integer, AbstractArray}
+@test Compiler.tmerge(Union{AbstractVector, Nothing, Int}, AbstractMatrix) == Union{Nothing, Int, AbstractArray}
+
+# test that recursively more complicated types don't widen all the way to Any when there is a useful valid type upper bound
+# Specifically test with base types of a trivial type, a simple union, a complicated union, and a tuple.
+for T in (Nothing, Base.BitInteger, Union{Int, Int32, Int16, Int8}, Tuple{Int, Int})
+    Ta, Tb = T, T
+    for i in 1:10
+        Ta = Union{Tuple{Ta}, Nothing}
+        Tb = Compiler.tmerge(Tuple{Tb}, Nothing)
+        @test Ta <: Tb <: Union{Nothing, Tuple}
+    end
+end
 
 struct SomethingBits
     x::Base.BitIntegerType
@@ -282,9 +371,9 @@ barTuple2() = fooTuple{tuple(:y)}()
 @test Base.return_types(barTuple1,Tuple{})[1] == Base.return_types(barTuple2,Tuple{})[1] == fooTuple{(:y,)}
 
 # issue #6050
-@test Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice,
+@test Compiler.getfield_tfunc(Compiler.fallback_lattice,
           Dict{Int64,Tuple{UnitRange{Int64},UnitRange{Int64}}},
-          Core.Compiler.Const(:vals)) == Array{Tuple{UnitRange{Int64},UnitRange{Int64}},1}
+          Compiler.Const(:vals)) == Memory{Tuple{UnitRange{Int64},UnitRange{Int64}}}
 
 # assert robustness of `getfield_tfunc`
 struct GetfieldRobustness
@@ -330,7 +419,7 @@ end
 
 # issue #12826
 f12826(v::Vector{I}) where {I<:Integer} = v[1]
-@test Base.return_types(f12826,Tuple{Array{I,1} where I<:Integer})[1] == Integer
+@test Base.return_types(f12826,Tuple{Vector{I} where I<:Integer})[1] == Integer
 
 
 # non-terminating inference, issue #14009
@@ -344,8 +433,7 @@ code_llvm(devnull, f14009, (Int,))
 mutable struct B14009{T}; end
 g14009(a) = g14009(B14009{a})
 code_typed(g14009, (Type{Int},))
-code_llvm(devnull, f14009, (Int,))
-
+code_llvm(devnull, g14009, (Type{Int},))
 
 # issue #9232
 arithtype9232(::Type{T},::Type{T}) where {T<:Real} = arithtype9232(T)
@@ -396,7 +484,7 @@ end
 @test f15259(1,2) == (1,2,1,2)
 # check that error cases are still correct
 @eval g15259(x,y) = (a = $(Expr(:new, :A15259, :x, :y)); a.z)
-@test_throws ErrorException g15259(1,1)
+@test_throws FieldError g15259(1,1)
 @eval h15259(x,y) = (a = $(Expr(:new, :A15259, :x, :y)); getfield(a, 3))
 @test_throws BoundsError h15259(1,1)
 
@@ -564,7 +652,7 @@ f18450() = ifelse(true, Tuple{Vararg{Int}}, Tuple{Vararg})
 @test f18450() == Tuple{Vararg{Int}}
 
 # issue #18569
-@test !Core.Compiler.isconstType(Type{Tuple})
+@test !Compiler.isconstType(Type{Tuple})
 
 # issue #10880
 function cat10880(a, b)
@@ -593,7 +681,6 @@ end
 function test_inferred_static(arrow::Pair, all_ssa)
     code, rt = arrow
     @test isdispatchelem(rt)
-    @test code.inferred
     for i = 1:length(code.code)
         e = code.code[i]
         test_inferred_static(e)
@@ -649,7 +736,7 @@ for (codetype, all_ssa) in Any[
     test_inferred_static(codetype, all_ssa)
 end
 @test f18679() === ()
-@test_throws UndefVarError(:any_undef_global) g18679()
+@test_throws UndefVarError(:any_undef_global, @__MODULE__) g18679()
 @test h18679() === nothing
 
 
@@ -696,9 +783,9 @@ end
 f_infer_abstract_fieldtype() = fieldtype(HasAbstractlyTypedField, :x)
 @test Base.return_types(f_infer_abstract_fieldtype, ()) == Any[Type{Union{Int,String}}]
 let fieldtype_tfunc(@nospecialize args...) =
-        Core.Compiler.fieldtype_tfunc(Core.Compiler.fallback_lattice, args...),
-    fieldtype_nothrow(@nospecialize(s0), @nospecialize(name)) = Core.Compiler.fieldtype_nothrow(
-        Core.Compiler.SimpleInferenceLattice.instance, s0, name)
+        Compiler.fieldtype_tfunc(Compiler.fallback_lattice, args...),
+    fieldtype_nothrow(@nospecialize(s0), @nospecialize(name)) = Compiler.fieldtype_nothrow(
+        Compiler.SimpleInferenceLattice.instance, s0, name)
     @test fieldtype_tfunc(Union{}, :x) == Union{}
     @test fieldtype_tfunc(Union{Type{Int32}, Int32}, Const(:x)) == Union{}
     @test fieldtype_tfunc(Union{Type{Base.RefValue{T}}, Type{Int32}} where {T<:Array}, Const(:x)) == Type{<:Array}
@@ -741,7 +828,7 @@ end
 
 # Issue 19641
 foo19641() = let a = 1.0
-    Core.Compiler.return_type(x -> x + a, Tuple{Float64})
+    Base._return_type(x -> x + a, Tuple{Float64})
 end
 @inferred foo19641()
 
@@ -895,15 +982,15 @@ test_no_apply(::Any) = true
 
 # issue #20033
 # check return_type_tfunc for calls where no method matches
-bcast_eltype_20033(f, A) = Core.Compiler.return_type(f, Tuple{eltype(A)})
+bcast_eltype_20033(f, A) = Base._return_type(f, Tuple{eltype(A)})
 err20033(x::Float64...) = prod(x)
 @test bcast_eltype_20033(err20033, [1]) === Union{}
 @test Base.return_types(bcast_eltype_20033, (typeof(err20033), Vector{Int},)) == Any[Type{Union{}}]
 # return_type on builtins
-@test Core.Compiler.return_type(tuple, Tuple{Int,Int8,Int}) === Tuple{Int,Int8,Int}
+@test Base._return_type(tuple, Tuple{Int,Int8,Int}) === Tuple{Int,Int8,Int}
 
 # issue #21088
-@test Core.Compiler.return_type(typeof, Tuple{Int}) == Type{Int}
+@test Base._return_type(typeof, Tuple{Int}) == Type{Int}
 
 # Inference of constant svecs
 @eval fsvecinf() = $(QuoteNode(Core.svec(Tuple{Int,Int}, Int)))[1]
@@ -983,7 +1070,7 @@ gl_17003 = [1, 2, 3]
 f2_17003(item::AVector_17003) = nothing
 f2_17003(::Any) = f2_17003(NArray_17003(gl_17003))
 
-@test f2_17003(1) == nothing
+@test f2_17003(1) === nothing
 
 # issue #20847
 function segfaultfunction_20847(A::Vector{NTuple{N, T}}) where {N, T}
@@ -994,7 +1081,7 @@ end
 tuplevec_20847 = Tuple{Float64, Float64}[(0.0,0.0), (1.0,0.0)]
 
 for A in (1,)
-    @test segfaultfunction_20847(tuplevec_20847) == nothing
+    @test segfaultfunction_20847(tuplevec_20847) === nothing
 end
 
 # Issue #20902, check that this doesn't error.
@@ -1015,7 +1102,7 @@ f21771(::Val{U}) where {U} = Tuple{g21771(U)}
 
 # PR #28284, check that constants propagate through calls to new
 struct t28284
-  x::Int
+    x::Int
 end
 f28284() = Val(t28284(1))
 @inferred f28284()
@@ -1078,7 +1165,7 @@ end
 struct UnionIsdefinedA; x; end
 struct UnionIsdefinedB; x; end
 let isdefined_tfunc(@nospecialize xs...) =
-        Core.Compiler.isdefined_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.isdefined_tfunc(Compiler.fallback_lattice, xs...)
     @test isdefined_tfunc(typeof(NamedTuple()), Const(0)) === Const(false)
     @test isdefined_tfunc(typeof(NamedTuple()), Const(1)) === Const(false)
     @test isdefined_tfunc(typeof((a=1,b=2)), Const(:a)) === Const(true)
@@ -1102,14 +1189,9 @@ let isdefined_tfunc(@nospecialize xs...) =
     @test isdefined_tfunc(ComplexF32, Const(0)) === Const(false)
     @test isdefined_tfunc(SometimesDefined, Const(:x)) == Bool
     @test isdefined_tfunc(SometimesDefined, Const(:y)) === Const(false)
-    @test isdefined_tfunc(Const(Base), Const(:length)) === Const(true)
-    @test isdefined_tfunc(Const(Base), Symbol) == Bool
-    @test isdefined_tfunc(Const(Base), Const(:NotCurrentlyDefinedButWhoKnows)) == Bool
     @test isdefined_tfunc(Core.SimpleVector, Const(1)) === Const(false)
     @test Const(false) ⊑ isdefined_tfunc(Const(:x), Symbol)
     @test Const(false) ⊑ isdefined_tfunc(Const(:x), Const(:y))
-    @test isdefined_tfunc(Vector{Int}, Const(1)) == Const(false)
-    @test isdefined_tfunc(Vector{Any}, Const(1)) == Const(false)
     @test isdefined_tfunc(Module, Int) === Union{}
     @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(0)) === Const(false)
     @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(1)) === Const(true)
@@ -1128,6 +1210,7 @@ let isdefined_tfunc(@nospecialize xs...) =
     @test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:x)) === Const(true)
     @test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:y)) === Const(false)
     @test isdefined_tfunc(Union{UnionIsdefinedA,Nothing}, Const(:x)) === Bool
+    @test isdefined_tfunc(Nothing, Any) === Const(false)
 end
 
 # https://github.com/aviatesk/JET.jl/issues/379
@@ -1180,18 +1263,18 @@ function get_linfo(@nospecialize(f), @nospecialize(t))
     # get the MethodInstance for the method match
     match = Base._which(Base.signature_type(f, t))
     precompile(match.spec_types)
-    return Core.Compiler.specialize_method(match)
+    return Compiler.specialize_method(match)
 end
 
 function test_const_return(@nospecialize(f), @nospecialize(t), @nospecialize(val))
-    interp = Core.Compiler.NativeInterpreter()
-    linfo = Core.Compiler.getindex(Core.Compiler.code_cache(interp), get_linfo(f, t))
+    interp = Compiler.NativeInterpreter()
+    linfo = Compiler.getindex(Compiler.code_cache(interp), get_linfo(f, t))
     # If coverage is not enabled, make the check strict by requiring constant ABI
     # Otherwise, check the typed AST to make sure we return a constant.
     if Base.JLOptions().code_coverage == 0
-        @test Core.Compiler.invoke_api(linfo) == 2
+        @test Compiler.invoke_api(linfo) == 2
     end
-    if Core.Compiler.invoke_api(linfo) == 2
+    if Compiler.invoke_api(linfo) == 2
         @test linfo.rettype_const == val
         return
     end
@@ -1211,7 +1294,7 @@ function test_const_return(@nospecialize(f), @nospecialize(t), @nospecialize(val
             @test ret === val || (isa(ret, QuoteNode) && (ret::QuoteNode).value === val)
             continue
         elseif isa(ex, Expr)
-            if Core.Compiler.is_meta_expr_head(ex.head)
+            if Compiler.is_meta_expr_head(ex.head)
                 continue
             end
         end
@@ -1231,7 +1314,7 @@ function find_call(code::Core.CodeInfo, @nospecialize(func), narg)
                     farg = typeof(getfield(farg.mod, farg.name))
                 end
             elseif isa(farg, Core.SSAValue)
-                farg = Core.Compiler.widenconst(code.ssavaluetypes[farg.id])
+                farg = Compiler.widenconst(code.ssavaluetypes[farg.id])
             else
                 farg = typeof(farg)
             end
@@ -1249,33 +1332,36 @@ test_const_return(()->sizeof(1), Tuple{}, sizeof(Int))
 test_const_return(()->sizeof(DataType), Tuple{}, sizeof(DataType))
 test_const_return(()->sizeof(1 < 2), Tuple{}, 1)
 test_const_return(()->fieldtype(Dict{Int64,Nothing}, :age), Tuple{}, UInt)
-test_const_return(@eval(()->Core.sizeof($(Array{Int,0}(undef)))), Tuple{}, sizeof(Int))
-test_const_return(@eval(()->Core.sizeof($(Matrix{Float32}(undef, 2, 2)))), Tuple{}, 4 * 2 * 2)
+test_const_return(@eval(()->Core.sizeof($(Array{Int,0}(undef)))), Tuple{}, 2 * sizeof(Int))
+test_const_return(@eval(()->Core.sizeof($(Matrix{Float32}(undef, 2, 2)))), Tuple{}, 4 * sizeof(Int))
+# TODO: do we want to implement these?
+# test_const_return(@eval(()->sizeof($(Array{Int,0}(undef)))), Tuple{}, sizeof(Int))
+# test_const_return(@eval(()->sizeof($(Matrix{Float32}(undef, 2, 2)))), Tuple{}, 4 * 2 * 2)
+# test_const_return(@eval(()->Core.sizeof($(Memory{Int}(undef, 0)))), Tuple{}, 0)
 
 # Make sure Core.sizeof with a ::DataType as inferred input type is inferred but not constant.
 function sizeof_typeref(typeref)
     return Core.sizeof(typeref[])
 end
 @test @inferred(sizeof_typeref(Ref{DataType}(Int))) == sizeof(Int)
-@test find_call(first(code_typed(sizeof_typeref, (Ref{DataType},))[1]), Core.sizeof, 2)
+@test find_call(only(code_typed(sizeof_typeref, (Ref{DataType},)))[1], Core.sizeof, 2)
 # Constant `Vector` can be resized and shouldn't be optimized to a constant.
 const constvec = [1, 2, 3]
 @eval function sizeof_constvec()
-    return Core.sizeof($constvec)
+    return sizeof($constvec)
 end
 @test @inferred(sizeof_constvec()) == sizeof(Int) * 3
-@test find_call(first(code_typed(sizeof_constvec, ())[1]), Core.sizeof, 2)
 push!(constvec, 10)
-@test @inferred(sizeof_constvec()) == sizeof(Int) * 4
+@test sizeof_constvec() == sizeof(Int) * 4
 
 test_const_return(x->isdefined(x, :re), Tuple{ComplexF64}, true)
 
 isdefined_f3(x) = isdefined(x, 3)
 @test @inferred(isdefined_f3(())) == false
-@test find_call(first(code_typed(isdefined_f3, Tuple{Tuple{Vararg{Int}}})[1]), isdefined, 3)
+@test find_call(only(code_typed(isdefined_f3, Tuple{Tuple{Vararg{Int}}}))[1], isdefined, 3)
 
 let isa_tfunc(@nospecialize xs...) =
-        Core.Compiler.isa_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.isa_tfunc(Compiler.fallback_lattice, xs...)
     @test isa_tfunc(Array, Const(AbstractArray)) === Const(true)
     @test isa_tfunc(Array, Type{AbstractArray}) === Const(true)
     @test isa_tfunc(Array, Type{AbstractArray{Int}}) == Bool
@@ -1315,7 +1401,7 @@ let isa_tfunc(@nospecialize xs...) =
 end
 
 let subtype_tfunc(@nospecialize xs...) =
-        Core.Compiler.subtype_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.subtype_tfunc(Compiler.fallback_lattice, xs...)
     @test subtype_tfunc(Type{<:Array}, Const(AbstractArray)) === Const(true)
     @test subtype_tfunc(Type{<:Array}, Type{AbstractArray}) === Const(true)
     @test subtype_tfunc(Type{<:Array}, Type{AbstractArray{Int}}) == Bool
@@ -1367,9 +1453,9 @@ end
 
 let egal_tfunc
     function egal_tfunc(a, b)
-        𝕃 = Core.Compiler.fallback_lattice
-        r = Core.Compiler.egal_tfunc(𝕃, a, b)
-        @test r === Core.Compiler.egal_tfunc(𝕃, b, a)
+        𝕃 = Compiler.fallback_lattice
+        r = Compiler.egal_tfunc(𝕃, a, b)
+        @test r === Compiler.egal_tfunc(𝕃, b, a)
         return r
     end
     @test egal_tfunc(Const(12345.12345), Const(12344.12345 + 1)) == Const(true)
@@ -1379,8 +1465,8 @@ let egal_tfunc
     @test egal_tfunc(Array, Array) == Bool
     @test egal_tfunc(Array, AbstractArray{Int}) == Bool
     @test egal_tfunc(Array{Real}, AbstractArray{Int}) === Const(false)
-    @test egal_tfunc(Array{Real, 2}, AbstractArray{Real, 2}) === Bool
-    @test egal_tfunc(Array{Real, 2}, AbstractArray{Int, 2}) === Const(false)
+    @test egal_tfunc(Matrix{Real}, AbstractMatrix{Real}) === Bool
+    @test egal_tfunc(Matrix{Real}, AbstractMatrix{Int}) === Const(false)
     @test egal_tfunc(DataType, Int) === Const(false)
     @test egal_tfunc(DataType, Const(Int)) === Bool
     @test egal_tfunc(DataType, Const(Array)) === Const(false)
@@ -1438,11 +1524,11 @@ egal_conditional_lattice3(x, y) = x === y + y ? "" : 1
 @test Base.return_types(egal_conditional_lattice3, (Int32, Int64)) == Any[Int]
 
 let nfields_tfunc(@nospecialize xs...) =
-        Core.Compiler.nfields_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.nfields_tfunc(Compiler.fallback_lattice, xs...)
     sizeof_tfunc(@nospecialize xs...) =
-        Core.Compiler.sizeof_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.sizeof_tfunc(Compiler.fallback_lattice, xs...)
     sizeof_nothrow(@nospecialize xs...) =
-        Core.Compiler.sizeof_nothrow(xs...)
+        Compiler.sizeof_nothrow(xs...)
     @test sizeof_tfunc(Const(Ptr)) === sizeof_tfunc(Union{Ptr, Int, Type{Ptr{Int8}}, Type{Int}}) === Const(Sys.WORD_SIZE ÷ 8)
     @test sizeof_tfunc(Type{Ptr}) === Const(sizeof(Ptr))
     @test sizeof_nothrow(Union{Ptr, Int, Type{Ptr{Int8}}, Type{Int}})
@@ -1450,12 +1536,12 @@ let nfields_tfunc(@nospecialize xs...) =
     @test sizeof_nothrow(Type{Ptr})
     @test sizeof_nothrow(Type{Union{Ptr{Int}, Int}})
     @test !sizeof_nothrow(Const(Tuple))
-    @test !sizeof_nothrow(Type{Vector{Int}})
+    @test sizeof_nothrow(Type{Vector{Int}})
     @test !sizeof_nothrow(Type{Union{Int, String}})
     @test sizeof_nothrow(String)
     @test !sizeof_nothrow(Type{String})
     @test sizeof_tfunc(Type{Union{Int64, Int32}}) == Const(Core.sizeof(Union{Int64, Int32}))
-    let PT = Core.Compiler.PartialStruct(Tuple{Int64,UInt64}, Any[Const(10), UInt64])
+    let PT = Core.PartialStruct(Compiler.fallback_lattice, Tuple{Int64,UInt64}, Any[Const(10), UInt64])
         @test sizeof_tfunc(PT) === Const(16)
         @test nfields_tfunc(PT) === Const(2)
         @test sizeof_nothrow(PT)
@@ -1483,7 +1569,7 @@ let nfields_tfunc(@nospecialize xs...) =
 end
 
 let typeof_tfunc(@nospecialize xs...) =
-        Core.Compiler.typeof_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.typeof_tfunc(Compiler.fallback_lattice, xs...)
     @test typeof_tfunc(Tuple{Vararg{Int}}) == Type{Tuple{Vararg{Int,N}}} where N
     @test typeof_tfunc(Tuple{Any}) == Type{<:Tuple{Any}}
     @test typeof_tfunc(Type{Array}) === DataType
@@ -1496,41 +1582,80 @@ end
 f_typeof_tfunc(x) = typeof(x)
 @test Base.return_types(f_typeof_tfunc, (Union{<:T, Int} where T<:Complex,)) == Any[Union{Type{Int}, Type{Complex{T}} where T<:Real}]
 
-# arrayref / arrayset / arraysize
-import Core.Compiler: Const
-let arrayref_tfunc(@nospecialize xs...) = Core.Compiler.arrayref_tfunc(Core.Compiler.fallback_lattice, xs...)
-    arrayset_tfunc(@nospecialize xs...) = Core.Compiler.arrayset_tfunc(Core.Compiler.fallback_lattice, xs...)
-    arraysize_tfunc(@nospecialize xs...) = Core.Compiler.arraysize_tfunc(Core.Compiler.fallback_lattice, xs...)
-    @test arrayref_tfunc(Const(true), Vector{Int}, Int) === Int
-    @test arrayref_tfunc(Const(true), Vector{<:Integer}, Int) === Integer
-    @test arrayref_tfunc(Const(true), Vector, Int) === Any
-    @test arrayref_tfunc(Const(true), Vector{Int}, Int, Vararg{Int}) === Int
-    @test arrayref_tfunc(Const(true), Vector{Int}, Vararg{Int}) === Int
-    @test arrayref_tfunc(Const(true), Vector{Int}) === Union{}
-    @test arrayref_tfunc(Const(true), String, Int) === Union{}
-    @test arrayref_tfunc(Const(true), Vector{Int}, Float64) === Union{}
-    @test arrayref_tfunc(Int, Vector{Int}, Int) === Union{}
-    @test arrayset_tfunc(Const(true), Vector{Int}, Int, Int) === Vector{Int}
-    let ua = Vector{<:Integer}
-        @test arrayset_tfunc(Const(true), ua, Int, Int) === ua
-    end
-    @test arrayset_tfunc(Const(true), Vector, Int, Int) === Vector
-    @test arrayset_tfunc(Const(true), Any, Int, Int) === Any
-    @test arrayset_tfunc(Const(true), Vector{String}, String, Int, Vararg{Int}) === Vector{String}
-    @test arrayset_tfunc(Const(true), Vector{String}, String, Vararg{Int}) === Vector{String}
-    @test arrayset_tfunc(Const(true), Vector{String}, String) === Union{}
-    @test arrayset_tfunc(Const(true), String, Char, Int) === Union{}
-    @test arrayset_tfunc(Const(true), Vector{Int}, Int, Float64) === Union{}
-    @test arrayset_tfunc(Int, Vector{Int}, Int, Int) === Union{}
-    @test arrayset_tfunc(Const(true), Vector{Int}, Float64, Int) === Union{}
-    @test arraysize_tfunc(Vector, Int) === Int
-    @test arraysize_tfunc(Vector, Float64) === Union{}
-    @test arraysize_tfunc(String, Int) === Union{}
+# memoryref_tfunc, memoryrefget_tfunc, memoryrefset!_tfunc, memoryref_isassigned, memoryrefoffset_tfunc
+let memoryref_tfunc(@nospecialize xs...) = Compiler.memoryref_tfunc(Compiler.fallback_lattice, xs...)
+    memoryrefget_tfunc(@nospecialize xs...) = Compiler.memoryrefget_tfunc(Compiler.fallback_lattice, xs...)
+    memoryref_isassigned_tfunc(@nospecialize xs...) = Compiler.memoryref_isassigned_tfunc(Compiler.fallback_lattice, xs...)
+    memoryrefset!_tfunc(@nospecialize xs...) = Compiler.memoryrefset!_tfunc(Compiler.fallback_lattice, xs...)
+    memoryrefoffset_tfunc(@nospecialize xs...) = Compiler.memoryrefoffset_tfunc(Compiler.fallback_lattice, xs...)
+    interp = Compiler.NativeInterpreter()
+    builtin_tfunction(@nospecialize xs...) = Compiler.builtin_tfunction(interp, xs..., nothing)
+    @test memoryref_tfunc(Memory{Int}) == MemoryRef{Int}
+    @test memoryref_tfunc(Memory{Integer}) == MemoryRef{Integer}
+    @test memoryref_tfunc(MemoryRef{Int}, Int) == MemoryRef{Int}
+    @test memoryref_tfunc(MemoryRef{Int}, Vararg{Int}) == MemoryRef{Int}
+    @test memoryref_tfunc(MemoryRef{Int}, Int, Symbol) == Union{}
+    @test memoryref_tfunc(MemoryRef{Int}, Int, Bool) == MemoryRef{Int}
+    @test memoryref_tfunc(MemoryRef{Int}, Int, Vararg{Bool}) == MemoryRef{Int}
+    @test memoryref_tfunc(Memory{Int}, Int) == MemoryRef{Int}
+    @test memoryref_tfunc(Memory{Int}, Int, Symbol) == Union{}
+    @test memoryref_tfunc(Memory{Int}, Int, Bool) == MemoryRef{Int}
+    @test memoryref_tfunc(Memory{Int}, Int, Vararg{Bool}) == MemoryRef{Int}
+    @test memoryref_tfunc(Any, Any, Any) == GenericMemoryRef
+    @test memoryref_tfunc(Any, Any) == GenericMemoryRef
+    @test memoryref_tfunc(Any) == GenericMemoryRef
+    @test memoryrefget_tfunc(MemoryRef{Int}, Symbol, Bool) === Int
+    @test memoryrefget_tfunc(MemoryRef{Int}, Any, Any) === Int
+    @test memoryrefget_tfunc(MemoryRef{<:Integer}, Symbol, Bool) === Integer
+    @test memoryrefget_tfunc(GenericMemoryRef, Symbol, Bool) === Any
+    @test memoryrefget_tfunc(GenericMemoryRef{:not_atomic}, Symbol, Bool) === Any
+    @test memoryrefget_tfunc(Vector{Int}, Symbol, Bool) === Union{}
+    @test memoryrefget_tfunc(String, Symbol, Bool) === Union{}
+    @test memoryrefget_tfunc(MemoryRef{Int}, String, Bool) === Union{}
+    @test memoryrefget_tfunc(MemoryRef{Int}, Symbol, String) === Union{}
+    @test memoryrefget_tfunc(Any, Any, Any) === Any
+    @test builtin_tfunction(Core.memoryrefget, Any[MemoryRef{Int}, Vararg{Any}]) == Int
+    @test builtin_tfunction(Core.memoryrefget, Any[MemoryRef{Int}, Symbol, Bool, Vararg{Bool}]) == Int
+    @test memoryref_isassigned_tfunc(MemoryRef{Any}, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(MemoryRef{Any}, Any, Any) === Bool
+    @test memoryref_isassigned_tfunc(MemoryRef{<:Integer}, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(GenericMemoryRef, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(GenericMemoryRef{:not_atomic}, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(Vector{Int}, Symbol, Bool) === Union{}
+    @test memoryref_isassigned_tfunc(String, Symbol, Bool) === Union{}
+    @test memoryref_isassigned_tfunc(MemoryRef{Int}, String, Bool) === Union{}
+    @test memoryref_isassigned_tfunc(MemoryRef{Int}, Symbol, String) === Union{}
+    @test memoryref_isassigned_tfunc(Any, Any, Any) === Bool
+    @test builtin_tfunction(Core.memoryref_isassigned, Any[MemoryRef{Int}, Vararg{Any}]) == Bool
+    @test builtin_tfunction(Core.memoryref_isassigned, Any[MemoryRef{Int}, Symbol, Bool, Vararg{Bool}]) == Bool
+    @test memoryrefset!_tfunc(MemoryRef{Int}, Int, Symbol, Bool) === Int
+    let ua = MemoryRef{<:Integer}
+        @test memoryrefset!_tfunc(ua, Int, Symbol, Bool) === Int
+    end
+    @test memoryrefset!_tfunc(GenericMemoryRef, Int, Symbol, Bool) === Int
+    @test memoryrefset!_tfunc(GenericMemoryRef{:not_atomic}, Int, Symbol, Bool) === Int
+    @test memoryrefset!_tfunc(Any, Int, Symbol, Bool) === Int
+    @test memoryrefset!_tfunc(MemoryRef{String}, Int, Symbol, Bool) === Union{}
+    @test memoryrefset!_tfunc(String, Char, Symbol, Bool) === Union{}
+    @test memoryrefset!_tfunc(MemoryRef{Int}, Any, Symbol, Bool) === Any # could improve this to Int
+    @test memoryrefset!_tfunc(MemoryRef{Int}, Any, Any, Any) === Any # could improve this to Int
+    @test memoryrefset!_tfunc(GenericMemoryRef{:not_atomic}, Any, Any, Any) === Any
+    @test memoryrefset!_tfunc(GenericMemoryRef, Any, Any, Any) === Any
+    @test memoryrefset!_tfunc(Any, Any, Any, Any) === Any
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Vararg{Any}]) == Any
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Vararg{Symbol}]) == Union{}
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Any, Symbol, Vararg{Bool}]) === Any # could improve this to Int
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Any, Symbol, Bool, Vararg{Any}]) === Any # could improve this to Int
+    @test memoryrefoffset_tfunc(MemoryRef) == memoryrefoffset_tfunc(GenericMemoryRef) == Int
+    @test memoryrefoffset_tfunc(Memory) == memoryrefoffset_tfunc(GenericMemory) == Union{}
+    @test builtin_tfunction(Core.memoryrefoffset, Any[Vararg{MemoryRef}]) == Int
+    @test builtin_tfunction(Core.memoryrefoffset, Any[Vararg{Any}]) == Int
+    @test builtin_tfunction(Core.memoryrefoffset, Any[Vararg{Memory}]) == Union{}
 end
 
 let tuple_tfunc(@nospecialize xs...) =
-        Core.Compiler.tuple_tfunc(Core.Compiler.fallback_lattice, Any[xs...])
-    @test Core.Compiler.widenconst(tuple_tfunc(Type{Int})) === Tuple{DataType}
+        Compiler.tuple_tfunc(Compiler.fallback_lattice, Any[xs...])
+    @test Compiler.widenconst(tuple_tfunc(Type{Int})) === Tuple{DataType}
     # https://github.com/JuliaLang/julia/issues/44705
     @test tuple_tfunc(Union{Type{Int32},Type{Int64}}) === Tuple{Type}
     @test tuple_tfunc(DataType) === Tuple{DataType}
@@ -1546,8 +1671,8 @@ g23024(TT::Tuple{DataType}) = f23024(TT[1], v23024)
 @test Base.return_types(g23024, (Tuple{DataType},)) == Any[Int]
 @test g23024((UInt8,)) === 2
 
-@test !Core.Compiler.isconstType(Type{typeof(Union{})}) # could be Core.TypeofBottom or Type{Union{}} at runtime
-@test !isa(Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice, Type{Core.TypeofBottom}, Core.Compiler.Const(:name)), Core.Compiler.Const)
+@test !Compiler.isconstType(Type{typeof(Union{})}) # could be Core.TypeofBottom or Type{Union{}} at runtime
+@test !isa(Compiler.getfield_tfunc(Compiler.fallback_lattice, Type{Core.TypeofBottom}, Compiler.Const(:name)), Compiler.Const)
 @test Base.return_types(supertype, (Type{typeof(Union{})},)) == Any[Any]
 
 # issue #23685
@@ -1573,19 +1698,18 @@ gg13183(x::X...) where {X} = (_false13183 ? gg13183(x, x) : 0)
 # test the external OptimizationState constructor
 let linfo = get_linfo(Base.convert, Tuple{Type{Int64}, Int32}),
     world = UInt(23) # some small-numbered world that should be valid
-    interp = Core.Compiler.NativeInterpreter()
-    opt = Core.Compiler.OptimizationState(linfo, interp)
+    interp = Compiler.NativeInterpreter()
+    opt = Compiler.OptimizationState(linfo, interp)
     # make sure the state of the properties look reasonable
     @test opt.src !== linfo.def.source
     @test length(opt.src.slotflags) == linfo.def.nargs <= length(opt.src.slotnames)
     @test opt.src.ssavaluetypes isa Vector{Any}
-    @test !opt.src.inferred
     @test opt.mod === Base
 end
 
 # approximate static parameters due to unions
 let T1 = Array{Float64}, T2 = Array{_1,2} where _1
-    inference_test_copy(a::T) where {T<:Array} = ccall(:jl_array_copy, Ref{T}, (Any,), a)
+    inference_test_copy(a::T) where {T<:Array} = ccall(:array_copy_like, Ref{T}, (Any,), a)
     rt = Base.return_types(inference_test_copy, (Union{T1,T2},))[1]
     @test rt >: T1 && rt >: T2
 
@@ -1606,12 +1730,12 @@ g_test_constant() = (f_constant(3) == 3 && f_constant(4) == 4 ? true : "BAD")
 f_pure_add() = (1 + 1 == 2) ? true : "FAIL"
 @test @inferred f_pure_add()
 
-import Core: Const
+using Core: Const
 mutable struct ARef{T}
     @atomic x::T
 end
 let getfield_tfunc(@nospecialize xs...) =
-        Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.getfield_tfunc(Compiler.fallback_lattice, xs...)
 
     # inference of `T.mutable`
     @test getfield_tfunc(Const(Int.name), Const(:flags)) == Const(0x4)
@@ -1647,7 +1771,7 @@ let getfield_tfunc(@nospecialize xs...) =
     @test getfield_tfunc(ARef{Int},Const(:x),Bool,Bool) === Union{}
 end
 
-import Core.Compiler: Const
+using Core: Const
 mutable struct XY{X,Y}
     x::X
     y::Y
@@ -1659,7 +1783,7 @@ mutable struct ABCDconst
     const d::Union{Int,Nothing}
 end
 let setfield!_tfunc(@nospecialize xs...) =
-        Core.Compiler.setfield!_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.setfield!_tfunc(Compiler.fallback_lattice, xs...)
     @test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int) === Int
     @test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int, Symbol) === Int
     @test setfield!_tfunc(Base.RefValue{Int}, Const(1), Int) === Int
@@ -1719,7 +1843,7 @@ let setfield!_tfunc(@nospecialize xs...) =
     @test setfield!_tfunc(ABCDconst, Const(4), Any) === Union{}
 end
 let setfield!_nothrow(@nospecialize xs...) =
-        Core.Compiler.setfield!_nothrow(Core.Compiler.SimpleInferenceLattice.instance, xs...)
+        Compiler.setfield!_nothrow(Compiler.SimpleInferenceLattice.instance, xs...)
     @test setfield!_nothrow(Base.RefValue{Int}, Const(:x), Int)
     @test setfield!_nothrow(Base.RefValue{Int}, Const(1), Int)
     @test setfield!_nothrow(Base.RefValue{Any}, Const(:x), Int)
@@ -1819,6 +1943,8 @@ function f24852_kernel_cinfo(world::UInt, source, fsig::Type)
     end
     pushfirst!(code_info.slotnames, Symbol("#self#"))
     pushfirst!(code_info.slotflags, 0x00)
+    code_info.nargs = 4
+    code_info.isva = false
     # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
     # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
     return match.method, code_info
@@ -1915,7 +2041,7 @@ function foo25261()
         next = f25261(Core.getfield(next, 2))
     end
 end
-let opt25261 = code_typed(foo25261, Tuple{}, optimize=false)[1].first.code
+let opt25261 = code_typed(foo25261, Tuple{}, optimize=true)[1].first.code
     i = 1
     # Skip to after the branch
     while !isa(opt25261[i], GotoIfNot)
@@ -1923,7 +2049,7 @@ let opt25261 = code_typed(foo25261, Tuple{}, optimize=false)[1].first.code
     end
     foundslot = false
     for expr25261 in opt25261[i:end]
-        if expr25261 isa Core.Compiler.TypedSlot && expr25261.typ === Tuple{Int, Int}
+        if expr25261 isa Core.PiNode && expr25261.typ === Tuple{Int, Int}
             # This should be the assignment to the SSAValue into the getfield
             # call - make sure it's a TypedSlot
             foundslot = true
@@ -2014,12 +2140,12 @@ end
 
     # handle edge case
     @test (@eval Module() begin
-        edgecase(_) = $(Core.Compiler.InterConditional(2, Int, Any))
+        edgecase(_) = $(Compiler.InterConditional(2, Int, Any))
         Base.return_types(edgecase, (Any,)) # create cache
         Base.return_types((Any,)) do x
             edgecase(x)
         end
-    end) == Any[Core.Compiler.InterConditional]
+    end) == Any[Compiler.InterConditional]
 
     # a tricky case: if constant inference derives `Const` while non-constant inference has
     # derived `InterConditional`, we should not discard that constant information
@@ -2031,78 +2157,75 @@ end
 
 @testset "branching on conditional object" begin
     # simple
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         b = a === nothing
         return b ? 0 : a # ::Int
-    end == Any[Int]
+    end == Int
 
     # can use multiple times (as far as the subject of condition hasn't changed)
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         b = a === nothing
         c = b ? 0 : a # c::Int
         d = !b ? a : 0 # d::Int
         return c, d # ::Tuple{Int,Int}
-    end == Any[Tuple{Int,Int}]
+    end == Tuple{Int,Int}
 
     # should invalidate old constraint when the subject of condition has changed
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         cond = a === nothing
         r1 = cond ? 0 : a # r1::Int
         a = 0
         r2 = cond ? a : 1 # r2::Int, not r2::Union{Nothing,Int}
         return r1, r2 # ::Tuple{Int,Int}
-    end == Any[Tuple{Int,Int}]
+    end == Tuple{Int,Int}
 end
 
 # https://github.com/JuliaLang/julia/issues/42090#issuecomment-911824851
 # `PartialStruct` shouldn't wrap `Conditional`
-let M = Module()
-    @eval M begin
-        struct BePartialStruct
-            val::Int
-            cond
-        end
-    end
-
-    rt = @eval M begin
-        Base.return_types((Union{Nothing,Int},)) do a
-            cond = a === nothing
-            obj = $(Expr(:new, M.BePartialStruct, 42, :cond))
-            r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
-            a = $(gensym(:anyvar))::Any
-            r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constraint invalidation here)
-            return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
-        end |> only
-    end
-    @test rt == Tuple{Union{Nothing,Int},Any}
+struct BePartialStruct
+    val::Int
+    cond
+end
+@test Tuple{Union{Nothing,Int},Any} == @eval Base.infer_return_type((Union{Nothing,Int},)) do a
+    cond = a === nothing
+    obj = $(Expr(:new, BePartialStruct, 42, :cond))
+    r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
+    a = $(gensym(:anyvar))::Any
+    r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constraint invalidation here)
+    return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
 end
 
 # make sure we never form nested `Conditional` (https://github.com/JuliaLang/julia/issues/46207)
-@test Base.return_types((Any,)) do a
+@test Base.infer_return_type((Any,)) do a
     c = isa(a, Integer)
     42 === c ? :a : "b"
-end |> only === String
-@test Base.return_types((Any,)) do a
+end == String
+@test Base.infer_return_type((Any,)) do a
     c = isa(a, Integer)
     c === 42 ? :a : "b"
-end |> only === String
-
-@testset "conditional constraint propagation from non-`Conditional` object" begin
-    @test Base.return_types((Bool,)) do b
-        if b
-            return !b ? nothing : 1 # ::Int
-        else
-            return 0
-        end
-    end == Any[Int]
+end == String
 
-    @test Base.return_types((Any,)) do b
-        if b
-            return b # ::Bool
-        else
-            return nothing
-        end
-    end == Any[Union{Bool,Nothing}]
+function condition_object_update1(cond)
+    if cond # `cond` is known to be `Const(true)` within this branch
+        return !cond ? nothing : 1 # ::Int
+    else
+        return  cond ? nothing : 1 # ::Int
+    end
+end
+function condition_object_update2(x)
+    cond = x isa Int
+    if cond # `cond` is known to be `Const(true)` within this branch
+        return !cond ? nothing : x # ::Int
+    else
+        return  cond ? nothing : 1 # ::Int
+    end
+end
+@testset "state update for condition object" begin
+    # refine the type of condition object into constant boolean values on branching
+    @test Base.infer_return_type(condition_object_update1, (Bool,)) == Int
+    @test Base.infer_return_type(condition_object_update1, (Any,)) == Int
+    # refine even when their original type is `Conditional`
+    @test Base.infer_return_type(condition_object_update2, (Any,)) == Int
 end
 
 @testset "`from_interprocedural!`: translate inter-procedural information" begin
@@ -2121,7 +2244,7 @@ end
     end |> only == Int
     # the `fargs = nothing` edge case
     @test Base.return_types((Any,)) do a
-        Core.Compiler.return_type(invoke, Tuple{typeof(ispositive), Type{Tuple{Any}}, Any})
+        Base._return_type(invoke, Tuple{typeof(ispositive), Type{Tuple{Any}}, Any})
     end |> only == Type{Bool}
 
     # `InterConditional` handling: `abstract_call_opaque_closure`
@@ -2145,32 +2268,36 @@ struct AliasableFields{S,T}
     f1::S
     f2::T
 end
+struct NullableAliasableFields{S,T}
+    f1::S
+    f2::T
+    NullableAliasableFields(f1::S, f2::T) where {S,T} = new{S,T}(f1, f2)
+    NullableAliasableFields(f1::S) where {S} = new{S,Union{}}(f1)
+end
 mutable struct AliasableConstField{S,T}
     const f1::S
     f2::T
 end
 
-import Core.Compiler:
+using .Compiler:
     InferenceLattice, MustAliasesLattice, InterMustAliasesLattice,
     BaseInferenceLattice, SimpleInferenceLattice, IPOResultLattice, typeinf_lattice, ipo_lattice, optimizer_lattice
 
 include("newinterp.jl")
 @newinterp MustAliasInterpreter
-let CC = Core.Compiler
-    CC.typeinf_lattice(::MustAliasInterpreter) = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
-    CC.ipo_lattice(::MustAliasInterpreter) = InferenceLattice(InterMustAliasesLattice(IPOResultLattice.instance))
-    CC.optimizer_lattice(::MustAliasInterpreter) = SimpleInferenceLattice.instance
-end
+Compiler.typeinf_lattice(::MustAliasInterpreter) = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
+Compiler.ipo_lattice(::MustAliasInterpreter) = InferenceLattice(InterMustAliasesLattice(IPOResultLattice.instance))
+Compiler.optimizer_lattice(::MustAliasInterpreter) = SimpleInferenceLattice.instance
 
 # lattice
 # -------
 
-import Core.Compiler: MustAlias, Const, PartialStruct, ⊑, tmerge
+using .Compiler: MustAlias, Const, PartialStruct, ⊑, tmerge
 let 𝕃ᵢ = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
-    ⊑(@nospecialize(a), @nospecialize(b)) = Core.Compiler.:⊑(𝕃ᵢ, a, b)
-    tmerge(@nospecialize(a), @nospecialize(b)) = Core.Compiler.tmerge(𝕃ᵢ, a, b)
-    isa_tfunc(@nospecialize xs...) = Core.Compiler.isa_tfunc(𝕃ᵢ, xs...)
-    ifelse_tfunc(@nospecialize xs...) = Core.Compiler.ifelse_tfunc(𝕃ᵢ, xs...)
+    ⊑(@nospecialize(a), @nospecialize(b)) = Compiler.:⊑(𝕃ᵢ, a, b)
+    tmerge(@nospecialize(a), @nospecialize(b)) = Compiler.tmerge(𝕃ᵢ, a, b)
+    isa_tfunc(@nospecialize xs...) = Compiler.isa_tfunc(𝕃ᵢ, xs...)
+    ifelse_tfunc(@nospecialize xs...) = Compiler.ifelse_tfunc(𝕃ᵢ, xs...)
 
     @test (MustAlias(2, AliasableField{Any}, 1, Int) ⊑ Int)
     @test !(Int ⊑ MustAlias(2, AliasableField{Any}, 1, Int))
@@ -2181,6 +2308,7 @@ let 𝕃ᵢ = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance)
     @test tmerge(MustAlias(2, AliasableField{Any}, 1, Int), Const(nothing)) === Union{Int,Nothing}
     @test tmerge(Const(nothing), MustAlias(2, AliasableField{Any}, 1, Any)) === Any
     @test tmerge(Const(nothing), MustAlias(2, AliasableField{Any}, 1, Int)) === Union{Int,Nothing}
+    tmerge(Const(AbstractVector{<:Any}), Const(AbstractVector{T} where {T}))  # issue #56913
     @test isa_tfunc(MustAlias(2, AliasableField{Any}, 1, Bool), Const(Bool)) === Const(true)
     @test isa_tfunc(MustAlias(2, AliasableField{Any}, 1, Bool), Type{Bool}) === Const(true)
     @test isa_tfunc(MustAlias(2, AliasableField{Any}, 1, Int), Type{Bool}) === Const(false)
@@ -2331,7 +2459,7 @@ isaint(a) = isa(a, Int)
     end
     return 0
 end |> only === Int
-# handle multiple call-site refinment targets
+# handle multiple call-site refinement targets
 isasome(_) = true
 isasome(::Nothing) = false
 @test_broken Base.return_types((AliasableField{Union{Int,Nothing}},); interp=MustAliasInterpreter()) do a
@@ -2396,6 +2524,15 @@ jet509_hasitems(list) = length(list) >= 1
     error("list is empty")
 end |> only == Vector{Int}
 
+# don't form nested slot wrappers
+@test Base.infer_return_type((NullableAliasableFields{NullableAliasableFields},); interp=MustAliasInterpreter()) do x
+    y = getfield(x, :f1)
+    if isdefined(y, :f2) && isa(getfield(y, :f2), Int)
+        return getfield(y, :f2)
+    end
+    return 0
+end == Int
+
 # === constraint
 # --------------
 
@@ -2439,11 +2576,32 @@ end |> only === Int
 end |> only === Some{Int}
 
 # handle the edge case
-@eval intermustalias_edgecase(_) = $(Core.Compiler.InterMustAlias(2, Some{Any}, 1, Int))
+@eval intermustalias_edgecase(_) = $(Compiler.InterMustAlias(2, Some{Any}, 1, Int))
 Base.return_types(intermustalias_edgecase, (Any,); interp=MustAliasInterpreter()) # create cache
 @test Base.return_types((Any,); interp=MustAliasInterpreter()) do x
     intermustalias_edgecase(x)
-end |> only === Core.Compiler.InterMustAlias
+end |> only === Compiler.InterMustAlias
+
+@test Base.infer_return_type((AliasableField,Integer,); interp=MustAliasInterpreter()) do a, x
+    s = (;x)
+    if getfield(a, :f) isa Symbol
+        return getfield(s, getfield(a, :f))
+    end
+    return 0
+end == Integer
+
+# `isdefined` accuracy for `MustAlias`
+@test Base.infer_return_type((Any,); interp=MustAliasInterpreter()) do x
+    xx = Ref{Any}(x)
+    xxx = Some{Any}(xx)
+    Val(isdefined(xxx.value, :x))
+end == Val{true}
+
+@testset "issue #56913: `BoundsError` in type inference" begin
+    R = UnitRange{Int}
+    @test Type{AbstractVector} == Base.infer_return_type(Base.promote_typeof, Tuple{R, R, Vector{Any}, Vararg{R}})
+    @test Type{AbstractVector} == Base.infer_return_type(Base.promote_typeof, Tuple{R, R, Vector{Any}, R, Vararg{R}})
+end
 
 function f25579(g)
     h = g[]
@@ -2471,7 +2629,7 @@ function h25579(g)
     return t ? typeof(h) : typeof(h)
 end
 @test Base.return_types(h25579, (Base.RefValue{Union{Nothing, Int}},)) ==
-        Any[Union{Type{Float64}, Type{Int}, Type{Nothing}}]
+        Any[Type{Float64}]
 
 f26172(v) = Val{length(Base.tail(ntuple(identity, v)))}() # Val(M-1)
 g26172(::Val{0}) = ()
@@ -2522,7 +2680,7 @@ g26826(x) = getfield26826(x, :a, :b)
 # If this test is broken (especially if inference is getting a correct, but loose result,
 # like a Union) then it's potentially an indication that the optimizer isn't hitting the
 # InferenceResult cache properly for varargs methods.
-let ct = Core.Compiler.code_typed(f26826, (Float64,))[1]
+let ct = code_typed(f26826, (Float64,))[1]
     typed_code, retty = ct.first, ct.second
     found_poorly_typed_getfield_call = false
     for i = 1:length(typed_code.code)
@@ -2605,6 +2763,26 @@ vacond(cnd, va...) = cnd ? va : 0
     vacond(isa(x, Tuple{Int,Int}), x, x)
 end |> only == Union{Int,Tuple{Any,Any}}
 
+let A = Core.Const(true)
+    B = Core.InterConditional(2, Tuple, Union{})
+    C = Core.InterConditional(2, Any, Union{})
+    L = ipo_lattice(Compiler.NativeInterpreter())
+    @test !⊑(L, A, B)
+    @test ⊑(L, B, A)
+    @test tmerge(L, A, B) == C
+    @test ⊑(L, A, C)
+end
+function tail_is_ntuple((@nospecialize t::Tuple))
+    if unknown
+        t isa Tuple
+    else
+        tail_is_ntuple(t)
+    end
+end
+tail_is_ntuple_val((@nospecialize t::Tuple)) = Val(tail_is_ntuple(t))
+@test Base.return_types(tail_is_ntuple, (Tuple,)) |> only === Bool
+@test Base.return_types(tail_is_ntuple_val, (Tuple,)) |> only === Val{true}
+
 # https://github.com/JuliaLang/julia/issues/47435
 is_closed_ex(e::InvalidStateException) = true
 is_closed_ex(e) = false
@@ -2645,10 +2823,10 @@ end |> only === Int
 
 # `apply_type_tfunc` accuracy for constrained type construction
 # https://github.com/JuliaLang/julia/issues/47089
-import Core: Const
-import Core.Compiler: apply_type_tfunc
 struct Issue47089{A<:Number,B<:Number} end
-let 𝕃 = Core.Compiler.fallback_lattice
+let apply_type_tfunc = Compiler.apply_type_tfunc
+    𝕃 = Compiler.fallback_lattice
+    Const = Core.Const
     A = Type{<:Integer}
     @test apply_type_tfunc(𝕃, Const(Issue47089), A, A) <: (Type{Issue47089{A,B}} where {A<:Integer, B<:Integer})
     @test apply_type_tfunc(𝕃, Const(Issue47089), Const(Int), Const(Int), Const(Int)) === Union{}
@@ -2667,7 +2845,7 @@ end
 @test only(Base.return_types(Base.afoldl, (typeof((m, n) -> () -> Returns(nothing)(m, n)), Function, Function, Vararg{Function}))) === Function
 
 let A = Tuple{A,B,C,D,E,F,G,H} where {A,B,C,D,E,F,G,H}
-    B = Core.Compiler.rename_unionall(A)
+    B = Compiler.rename_unionall(A)
     for i in 1:8
         @test A.var != B.var && (i == 1 ? A == B : A != B)
         A, B = A.body, B.body
@@ -2831,7 +3009,7 @@ end
 # issue #27316 - inference shouldn't hang on these
 f27316(::Vector) = nothing
 f27316(::Any) = f27316(Any[][1]), f27316(Any[][1])
-let expected = NTuple{2, Union{Nothing, NTuple{2, Union{Nothing, Tuple{Any, Any}}}}}
+let expected = NTuple{2, Union{Nothing, Tuple{Any, Any}}}
     @test Tuple{Nothing, Nothing} <: only(Base.return_types(f27316, Tuple{Int})) == expected # we may be able to improve this bound in the future
 end
 function g27316()
@@ -2908,13 +3086,15 @@ end
 @test ig27907(Int, Int, 1, 0) == 0
 
 # issue #28279
-function f28279(b::Bool)
-    i = 1
-    while i > b
-        i -= 1
+# ensure that lowering doesn't move these into statement position, which would require renumbering
+@eval function f28279(b::Bool)
+    let i = 1
+        while $(>)(i, b)
+            i = $(-)(i, 1)
+        end
+        if b end
+        return $(+)(i, 1)
     end
-    if b end
-    return i + 1
 end
 code28279 = code_lowered(f28279, (Bool,))[1].code
 oldcode28279 = deepcopy(code28279)
@@ -2931,7 +3111,7 @@ let i
         end
     end
 end
-Core.Compiler.renumber_ir_elements!(code28279, ssachangemap, labelchangemap)
+Compiler.renumber_ir_elements!(code28279, ssachangemap, labelchangemap)
 @test length(code28279) === length(oldcode28279)
 offset = 1
 let i
@@ -2954,11 +3134,11 @@ end
 # issue #28356
 # unit test to make sure countunionsplit overflows gracefully
 # we don't care what number is returned as long as it's large
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int32, Int64} for i=1:80]) > 100000
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}]) == 2
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32, Int64}, Int8]) == 8
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32}, Int8]) == 6
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32}, Union{Int8, Int16, Int32, Int64}, Int8]) == 6
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int32, Int64} for i=1:80]) > 100000
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}]) == 2
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32, Int64}, Int8]) == 8
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32}, Int8]) == 6
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32}, Union{Int8, Int16, Int32, Int64}, Int8]) == 6
 
 # make sure compiler doesn't hang in union splitting
 
@@ -3201,8 +3381,8 @@ _rttf_test(::Int16) = 0
 _rttf_test(::Int32) = 0
 _rttf_test(::Int64) = 0
 _rttf_test(::Int128) = 0
-_call_rttf_test() = Core.Compiler.return_type(_rttf_test, Tuple{Any})
-@test Core.Compiler.return_type(_rttf_test, Tuple{Any}) === Int
+_call_rttf_test() = Base._return_type(_rttf_test, Tuple{Any})
+@test Base._return_type(_rttf_test, Tuple{Any}) === Int
 @test _call_rttf_test() === Int
 
 f_with_Type_arg(::Type{T}) where {T} = T
@@ -3225,7 +3405,10 @@ end
 call_ntuple(a, b) = my_ntuple(i->(a+b; i), Val(4))
 @test Base.return_types(call_ntuple, Tuple{Any,Any}) == [NTuple{4, Int}]
 @test length(code_typed(my_ntuple, Tuple{Any, Val{4}})) == 1
-@test_throws ErrorException code_typed(my_ntuple, Tuple{Any, Val})
+let (src, rt) = only(code_typed(my_ntuple, Tuple{Any, Val}))
+    @test src isa CodeInfo
+    @test rt == Tuple
+end
 
 @generated unionall_sig_generated(::Vector{T}, b::Vector{S}) where {T, S} = :($b)
 @test length(code_typed(unionall_sig_generated, Tuple{Any, Vector{Int}})) == 1
@@ -3247,15 +3430,15 @@ end
 @test @inferred(foo30783(2)) == Val(1)
 
 # PartialStruct tmerge
-using Core.Compiler: PartialStruct, tmerge, Const, ⊑
+using .Compiler: PartialStruct, tmerge, Const, ⊑
 struct FooPartial
     a::Int
     b::Int
     c::Int
 end
-let PT1 = PartialStruct(FooPartial, Any[Const(1), Const(2), Int]),
-    PT2 = PartialStruct(FooPartial, Any[Const(1), Int, Int]),
-    PT3 = PartialStruct(FooPartial, Any[Const(1), Int, Const(3)])
+let PT1 = PartialStruct(Compiler.fallback_lattice, FooPartial, Any[Const(1), Const(2), Int]),
+    PT2 = PartialStruct(Compiler.fallback_lattice, FooPartial, Any[Const(1), Int, Int]),
+    PT3 = PartialStruct(Compiler.fallback_lattice, FooPartial, Any[Const(1), Int, Const(3)])
 
     @test PT1 ⊑ PT2
     @test !(PT1 ⊑ PT3) && !(PT2 ⊑ PT1)
@@ -3326,11 +3509,11 @@ end
 struct MixedKeyDict{T<:Tuple} #<: AbstractDict{Any,Any}
     dicts::T
 end
-Base.merge(f::Function, d::MixedKeyDict, others::MixedKeyDict...) = _merge(f, (), d.dicts, (d->d.dicts).(others)...)
-Base.merge(f, d::MixedKeyDict, others::MixedKeyDict...) = _merge(f, (), d.dicts, (d->d.dicts).(others)...)
+Base.mergewith(f::Function, d::MixedKeyDict, others::MixedKeyDict...) = _merge(f, (), d.dicts, (d->d.dicts).(others)...)
+Base.mergewith(f, d::MixedKeyDict, others::MixedKeyDict...) = _merge(f, (), d.dicts, (d->d.dicts).(others)...)
 function _merge(f, res, d, others...)
     ofsametype, remaining = _alloftype(Base.heads(d), ((),), others...)
-    return _merge(f, (res..., merge(f, ofsametype...)), Base.tail(d), remaining...)
+    return _merge(f, (res..., mergewith(f, ofsametype...)), Base.tail(d), remaining...)
 end
 _merge(f, res, ::Tuple{}, others...) = _merge(f, res, others...)
 _merge(f, res, d) = MixedKeyDict((res..., d...))
@@ -3354,9 +3537,9 @@ _alloftype(ofdesiredtype, accumulated) = ofdesiredtype, Base.front(accumulated)
 let
     d = MixedKeyDict((Dict(1 => 3), Dict(4. => 2)))
     e = MixedKeyDict((Dict(1 => 7), Dict(5. => 9)))
-    @test merge(+, d, e).dicts == (Dict(1 => 10), Dict(4.0 => 2, 5.0 => 9))
+    @test mergewith(+, d, e).dicts == (Dict(1 => 10), Dict(4.0 => 2, 5.0 => 9))
     f = MixedKeyDict((Dict(2 => 7), Dict(5. => 11)))
-    @test merge(+, d, e, f).dicts == (Dict(1 => 10, 2 => 7), Dict(4.0 => 2, 5.0 => 20))
+    @test mergewith(+, d, e, f).dicts == (Dict(1 => 10, 2 => 7), Dict(4.0 => 2, 5.0 => 20))
 end
 
 # Issue #31974
@@ -3366,8 +3549,14 @@ f31974(n::Int) = f31974(1:n)
 # call cycles.
 @test code_typed(f31974, Tuple{Int}) !== nothing
 
-f_overly_abstract_complex() = Complex(Ref{Number}(1)[])
-@test Base.return_types(f_overly_abstract_complex, Tuple{}) == [Complex]
+# Issue #33472
+struct WrapperWithUnionall33472{T<:Real}
+    x::T
+end
+
+f_overly_abstract33472() = WrapperWithUnionall33472(Base.inferencebarrier(1)::Number)
+# Check that this doesn't infer as `WrapperWithUnionall33472{T<:Number}`.
+@test Base.return_types(f_overly_abstract33472, Tuple{}) == [WrapperWithUnionall33472]
 
 # Issue 26724
 const IntRange = AbstractUnitRange{<:Integer}
@@ -3381,14 +3570,14 @@ const DenseIdx = Union{IntRange,Integer}
 @test @inferred(foo_26724((), 1:4, 1:5, 1:6)) === (4, 5, 6)
 
 # Non uniformity in expressions with PartialTypeVar
-@test Core.Compiler.:⊑(Core.Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar)
+@test Compiler.:⊑(Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar)
 let N = TypeVar(:N)
-    𝕃 = Core.Compiler.SimpleInferenceLattice.instance
-    argtypes = Any[Core.Compiler.Const(NTuple),
-        Core.Compiler.PartialTypeVar(N, true, true),
-        Core.Compiler.Const(Any)]
+    𝕃 = Compiler.SimpleInferenceLattice.instance
+    argtypes = Any[Compiler.Const(NTuple),
+        Compiler.PartialTypeVar(N, true, true),
+        Compiler.Const(Any)]
     rt = Type{Tuple{Vararg{Any,N}}}
-    @test Core.Compiler.apply_type_nothrow(𝕃, argtypes, rt)
+    @test Compiler.apply_type_nothrow(𝕃, argtypes, rt)
 end
 
 # issue #33768
@@ -3410,8 +3599,12 @@ end
 @test Base.return_types(h33768, ()) == Any[Union{}]
 
 # constant prop of `Symbol("")`
-f_getf_computed_symbol(p) = getfield(p, Symbol("first"))
-@test Base.return_types(f_getf_computed_symbol, Tuple{Pair{Int8,String}}) == [Int8]
+@test Base.return_types() do
+    Val(Symbol("julia"))
+end |> only == Val{:julia}
+@test Base.return_types() do p::Pair{Int8,String}
+    getfield(p, Symbol("first"))
+end |> only == Int8
 
 # issue #33954
 struct X33954
@@ -3442,8 +3635,20 @@ function pickvarnames(x::Vector{Any})
 end
 @test pickvarnames(:a) === :a
 @test pickvarnames(Any[:a, :b]) === (:a, :b)
-@test only(Base.return_types(pickvarnames, (Vector{Any},))) == Tuple{Vararg{Union{Symbol, Tuple}}}
-@test only(Base.code_typed(pickvarnames, (Vector{Any},), optimize=false))[2] == Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple}}}}}}
+@test only(Base.return_types(pickvarnames, (Vector{Any},))) == Tuple
+@test only(Base.code_typed(pickvarnames, (Vector{Any},), optimize=false))[2] == Tuple{Vararg{Union{Symbol, Tuple}}}
+
+# make sure this converges in a reasonable amount of time
+function pickvarnames2(x::Vector{Any})
+    varnames = ()
+    for a in x
+        varnames = (varnames..., pickvarnames(a) )
+    end
+    return varnames
+end
+@test only(Base.return_types(pickvarnames2, (Vector{Any},))) == Tuple{Vararg{Union{Symbol, Tuple}}}
+@test only(Base.code_typed(pickvarnames2, (Vector{Any},), optimize=false))[2] == Tuple{Vararg{Union{Symbol, Tuple}}}
+
 
 @test map(>:, [Int], [Int]) == [true]
 
@@ -3485,29 +3690,29 @@ end
 
 f() = _foldl_iter(step, (Missing[],), [0.0], 1)
 end
-@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 0) == Tuple{Int}
-@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 1) == Tuple{Int}
-@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 2) == Tuple{Int}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 0) ==
+@test Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 0) == Tuple{Int}
+@test Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 1) == Tuple{Int}
+@test Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 2) == Tuple{Int}
+@test Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 0) ==
         Tuple{Int, Union{Char, Int}, Union{Char, Int}}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 10) ==
+@test Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 10) ==
         Union{Tuple{Int, Char, Char}, Tuple{Int, Char, Int}, Tuple{Int, Int, Char}, Tuple{Int, Int, Int}}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 0) ==
+@test Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 0) ==
         NTuple{3, Union{Int, Char}}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 10) ==
+@test Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 10) ==
         Union{Tuple{Char, Char, Int}, Tuple{Char, Int, Char}, Tuple{Char, Int, Int}, Tuple{Int, Char, Char},
               Tuple{Int, Char, Int}, Tuple{Int, Int, Char}, Tuple{Int, Int, Int}}
 # Test that these don't throw
-@test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Char}}, 0) == Tuple{Vararg{Int}}
-@test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Int}}, 0) == Union{}
-@test Core.Compiler.typesubtract(Tuple{String,Int}, Tuple{String,Vararg{Int}}, 0) == Union{}
-@test Core.Compiler.typesubtract(Tuple{String,Vararg{Int}}, Tuple{String,Int}, 0) == Tuple{String,Vararg{Int}}
-@test Core.Compiler.typesubtract(NTuple{3, Real}, NTuple{3, Char}, 0) == NTuple{3, Real}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Real, Char}}, NTuple{2, Char}, 0) == NTuple{3, Union{Real, Char}}
+@test Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Char}}, 0) == Tuple{Vararg{Int}}
+@test Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Int}}, 0) == Union{}
+@test Compiler.typesubtract(Tuple{String,Int}, Tuple{String,Vararg{Int}}, 0) == Union{}
+@test Compiler.typesubtract(Tuple{String,Vararg{Int}}, Tuple{String,Int}, 0) == Tuple{String,Vararg{Int}}
+@test Compiler.typesubtract(NTuple{3, Real}, NTuple{3, Char}, 0) == NTuple{3, Real}
+@test Compiler.typesubtract(NTuple{3, Union{Real, Char}}, NTuple{2, Char}, 0) == NTuple{3, Union{Real, Char}}
 
-@test Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Vararg{Int}})
-@test !Core.Compiler.compatible_vatuple(Tuple{String,Int}, Tuple{String,Vararg{Int}})
-@test !Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Int})
+@test Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Vararg{Int}})
+@test !Compiler.compatible_vatuple(Tuple{String,Int}, Tuple{String,Vararg{Int}})
+@test !Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Int})
 
 @test Base.return_types(Issue35566.f) == [Val{:expected}]
 
@@ -3664,8 +3869,8 @@ f_generator_splat(t::Tuple) = tuple((identity(l) for l in t)...)
 
 # Issue #36710 - sizeof(::UnionAll) tfunc correctness
 @test (sizeof(Ptr),) == sizeof.((Ptr,)) == sizeof.((Ptr{Cvoid},))
-@test Core.Compiler.sizeof_tfunc(Core.Compiler.fallback_lattice, UnionAll) === Int
-@test !Core.Compiler.sizeof_nothrow(UnionAll)
+@test Compiler.sizeof_tfunc(Compiler.fallback_lattice, UnionAll) === Int
+@test !Compiler.sizeof_nothrow(UnionAll)
 
 @test only(Base.return_types(Core._expr)) === Expr
 @test only(Base.return_types(Core.svec, (Any,))) === Core.SimpleVector
@@ -3734,119 +3939,12 @@ f_apply_cglobal(args...) = cglobal(args...)
 @test only(Base.return_types(f_apply_cglobal, Tuple{Any, Type{Int}, Type{Int}, Vararg{Type{Int}}})) == Union{}
 
 # issue #37532
-@test Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{Int}}, Int])
-@test Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{T}} where T, Ptr])
-@test !Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr}, Ptr])
+@test Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{Int}}, Int])
+@test Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{T}} where T, Ptr])
+@test !Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr}, Ptr])
 f37532(T, x) = (Core.bitcast(Ptr{T}, x); x)
 @test Base.return_types(f37532, Tuple{Any, Int}) == Any[Int]
 
-# PR #37749
-# Helper functions for Core.Compiler.Timings. These are normally accessed via a package -
-# usually (SnoopCompileCore).
-function time_inference(f)
-    Core.Compiler.Timings.reset_timings()
-    Core.Compiler.__set_measure_typeinf(true)
-    f()
-    Core.Compiler.__set_measure_typeinf(false)
-    Core.Compiler.Timings.close_current_timer()
-    return Core.Compiler.Timings._timings[1]
-end
-function depth(t::Core.Compiler.Timings.Timing)
-    maximum(depth.(t.children), init=0) + 1
-end
-function flatten_times(t::Core.Compiler.Timings.Timing)
-    collect(Iterators.flatten([(t.time => t.mi_info,), flatten_times.(t.children)...]))
-end
-# Some very limited testing of timing the type inference (#37749).
-@testset "Core.Compiler.Timings" begin
-    # Functions that call each other
-    @eval module M1
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    timing1 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test occursin(r"Core.Compiler.Timings.Timing\(InferenceFrameInfo for Core.Compiler.Timings.ROOT\(\)\) with \d+ children", sprint(show, timing1))
-    # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
-    # their concrete types.
-    @test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2]
-    @test all(child->isa(child.bt, Vector), timing1.children)
-    @test all(child->child.bt===nothing, timing1.children[1].children)
-    # Test the stacktrace
-    @test isa(stacktrace(timing1.children[1].bt), Vector{Base.StackTraces.StackFrame})
-    # Test that inference has cached some of the Method Instances
-    timing2 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test length(flatten_times(timing2)) < length(flatten_times(timing1))
-    # Printing of InferenceFrameInfo for mi.def isa Module
-    @eval module M2
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    # BEGIN LINE NUMBER SENSITIVITY (adjust the line offset below as needed)
-    timingmod = time_inference() do
-        @eval @testset "Outer" begin
-            @testset "Inner" begin
-                for i = 1:2 M2.g(2, 3.0) end
-            end
-        end
-    end
-    @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 6)", string(timingmod.children))
-    # END LINE NUMBER SENSITIVITY
-
-    # Recursive function
-    @eval module _Recursive f(n::Integer) = n == 0 ? 0 : f(n-1) + 1 end
-    timing = time_inference() do
-        @eval _Recursive.f(Base.inferencebarrier(5))
-    end
-    @test 2 <= depth(timing) <= 3  # root -> f (-> +)
-    @test 2 <= length(flatten_times(timing)) <= 3  # root, f, +
-
-    # Functions inferred with multiple constants
-    @eval module C
-        i(x) = x === 0 ? 0 : 1 / x
-        a(x) = i(0) * i(x)
-        b() = i(0) * i(1) * i(0)
-        function loopc(n)
-            s = 0
-            for i = 1:n
-                s += i
-            end
-            return s
-        end
-        call_loopc() = loopc(5)
-        myfloor(::Type{T}, x) where T = floor(T, x)
-        d(x) = myfloor(Int16, x)
-    end
-    timing = time_inference() do
-        @eval C.a(2)
-        @eval C.b()
-        @eval C.call_loopc()
-        @eval C.d(3.2)
-    end
-    ft = flatten_times(timing)
-    @test !isempty(ft)
-    str = sprint(show, ft)
-    @test occursin("InferenceFrameInfo for /(1::$Int, ::$Int)", str)  # inference constants
-    @test occursin("InferenceFrameInfo for Core.Compiler.Timings.ROOT()", str) # qualified
-    # loopc has internal slots, check constant printing in this case
-    sel = filter(ti -> ti.second.mi.def.name === :loopc, ft)
-    ifi = sel[end].second
-    @test length(ifi.slottypes) > ifi.nargs
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.loopc(5::$Int)", str)
-    # check that types aren't double-printed as `T::Type{T}`
-    sel = filter(ti -> ti.second.mi.def.name === :myfloor, ft)
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.myfloor(::Type{Int16}, ::Float64)", str)
-end
-
 # issue #37638
 @test only(Base.return_types(() -> (nothing, Any[]...)[2])) isa Type
 
@@ -3887,16 +3985,16 @@ Base.@constprop :aggressive @noinline f_constprop_aggressive_noinline(f, x) = (f
 Base.@constprop :none f_constprop_none(f, x) = (f(x); Val{x}())
 Base.@constprop :none @inline f_constprop_none_inline(f, x) = (f(x); Val{x}())
 
-@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_simple)))
-@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_simple)))
-@test Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive)))
-@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_aggressive)))
-@test Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive_noinline)))
-@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_aggressive_noinline)))
-@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_none)))
-@test Core.Compiler.is_no_constprop(only(methods(f_constprop_none)))
-@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_none_inline)))
-@test Core.Compiler.is_no_constprop(only(methods(f_constprop_none_inline)))
+@test !Compiler.is_aggressive_constprop(only(methods(f_constprop_simple)))
+@test !Compiler.is_no_constprop(only(methods(f_constprop_simple)))
+@test Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive)))
+@test !Compiler.is_no_constprop(only(methods(f_constprop_aggressive)))
+@test Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive_noinline)))
+@test !Compiler.is_no_constprop(only(methods(f_constprop_aggressive_noinline)))
+@test !Compiler.is_aggressive_constprop(only(methods(f_constprop_none)))
+@test Compiler.is_no_constprop(only(methods(f_constprop_none)))
+@test !Compiler.is_aggressive_constprop(only(methods(f_constprop_none_inline)))
+@test Compiler.is_no_constprop(only(methods(f_constprop_none_inline)))
 
 # make sure that improvements to the compiler don't render the annotation effectless.
 @test Base.return_types((Function,)) do f
@@ -3952,12 +4050,12 @@ end
 @testset "switchtupleunion" begin
     # signature tuple
     let
-        tunion = Core.Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Nothing})
+        tunion = Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Nothing})
         @test Tuple{Int32, Nothing} in tunion
         @test Tuple{Int64, Nothing} in tunion
     end
     let
-        tunion = Core.Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Union{Float32,Float64}, Nothing})
+        tunion = Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Union{Float32,Float64}, Nothing})
         @test Tuple{Int32, Float32, Nothing} in tunion
         @test Tuple{Int32, Float64, Nothing} in tunion
         @test Tuple{Int64, Float32, Nothing} in tunion
@@ -3966,13 +4064,13 @@ end
 
     # argtypes
     let
-        tunion = Core.Compiler.switchtupleunion(Core.Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Core.Const(nothing)])
+        tunion = Compiler.switchtupleunion(Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Core.Const(nothing)])
         @test length(tunion) == 2
         @test Any[Int32, Core.Const(nothing)] in tunion
         @test Any[Int64, Core.Const(nothing)] in tunion
     end
     let
-        tunion = Core.Compiler.switchtupleunion(Core.Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Union{Float32,Float64}, Core.Const(nothing)])
+        tunion = Compiler.switchtupleunion(Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Union{Float32,Float64}, Core.Const(nothing)])
         @test length(tunion) == 4
         @test Any[Int32, Float32, Core.Const(nothing)] in tunion
         @test Any[Int32, Float64, Core.Const(nothing)] in tunion
@@ -4053,6 +4151,126 @@ end
     end
 end == [Union{Some{Float64}, Some{Int}, Some{UInt8}}]
 
+@testset "constraint back-propagation from typeassert" begin
+    @test Base.infer_return_type((Any,)) do a
+        typeassert(a, Int)
+        return a
+    end == Int
+
+    @test Base.infer_return_type((Any,Bool)) do a, b
+        if b
+            typeassert(a, Int64)
+        else
+            typeassert(a, Int32)
+        end
+        return a
+    end == Union{Int32,Int64}
+
+    @test Base.infer_return_type((Vector{Any},)) do args
+        codeinst = first(args)
+        if codeinst isa Core.MethodInstance
+            mi = codeinst
+        else
+            codeinst::Core.CodeInstance
+            def = codeinst.def
+            if isa(def, Core.ABIOverride)
+                mi = def.def
+            else
+                mi = def::Core.MethodInstance
+            end
+        end
+        return mi
+    end == Core.MethodInstance
+end
+
+callsig_backprop_basic(::Int) = nothing
+callsig_backprop_unionsplit(::Int32) = nothing
+callsig_backprop_unionsplit(::Int64) = nothing
+callsig_backprop_multi(::Int32, ::Int64) = nothing
+callsig_backprop_any(::Any) = nothing
+callsig_backprop_lhs(::Int) = nothing
+callsig_backprop_bailout(::Val{0}) = 0
+callsig_backprop_bailout(::Val{1}) = undefvar # undefvar::Any triggers `bail_out_call`
+callsig_backprop_bailout(::Val) = 2
+callsig_backprop_addinteger(a::Integer, b::Integer) = a + b # results in too many matching methods and triggers `bail_out_call`)
+@test Base.infer_return_type(callsig_backprop_addinteger) == Any
+let effects = Base.infer_effects(callsig_backprop_addinteger)
+    @test !Compiler.is_consistent(effects)
+    @test !Compiler.is_effect_free(effects)
+    @test !Compiler.is_nothrow(effects)
+    @test !Compiler.is_terminates(effects)
+end
+callsig_backprop_anti(::Any) = :any
+callsig_backprop_anti(::Int) = :int
+
+@testset "constraint back-propagation from call signature" begin
+    # basic case
+    @test Base.infer_return_type(a->(callsig_backprop_basic(a); return a), (Any,)) == Int
+
+    # union-split case
+    @test Base.infer_return_type(a->(callsig_backprop_unionsplit(a); return a), (Any,)) == Union{Int32,Int64}
+
+    # multiple arguments updates
+    @test Base.infer_return_type((Any,Any)) do a, b
+        callsig_backprop_multi(a, b)
+        return a, b
+    end == Tuple{Int32,Int64}
+
+    # refinement should happen only when it's worthwhile
+    @test Base.infer_return_type(a->(callsig_backprop_any(a); return a), (Integer,)) == Integer
+
+    # state update on lhs slot (assignment effect should have the precedence)
+    @test Base.infer_return_type((Any,)) do a
+        a = callsig_backprop_lhs(a)
+        return a
+    end == Nothing
+
+    # make sure to throw away an intermediate refinement information when we bail out early
+    # (inference would bail out on `callsig_backprop_bailout(::Val{1})`)
+    @test Base.infer_return_type(a->(callsig_backprop_bailout(a); return a), (Any,)) == Any
+
+    # if we see all the matching methods, we don't need to throw away refinement information
+    # even if it's caught by `bail_out_call` check
+    @test Base.infer_return_type((Any,Any)) do a, b
+        callsig_backprop_addinteger(a, b)
+        return a, b
+    end == Tuple{Integer,Integer}
+
+    # anti case
+    @test Base.infer_return_type((Any,)) do x
+        callsig_backprop_anti(x)
+        return x
+    end == Any
+end
+
+# make sure to add backedges when we use call signature constraint
+function callsig_backprop_invalidation_outer(a)
+    callsig_backprop_invalidation_inner!(a)
+    return a
+end
+@eval callsig_backprop_invalidation_inner!(::Int) = $(gensym(:undefvar)) # ::Any
+@test Base.infer_return_type((Any,)) do a
+    callsig_backprop_invalidation_outer(a)
+end == Int
+# new definition of `callsig_backprop_invalidation_inner!` should invalidate `callsig_backprop_invalidation_outer`
+# (even if the previous return type is annotated as `Any`)
+@eval callsig_backprop_invalidation_inner!(::Nothing) = $(gensym(:undefvar)) # ::Any
+@test Base.infer_return_type((Any,)) do a
+    # since inference will bail out at the first matched `_inner!` and so call signature constraint won't be available
+    callsig_backprop_invalidation_outer(a)
+end ≠ Int
+
+# https://github.com/JuliaLang/julia/issues/37866
+function issue37866(v::Vector{Union{Nothing,Float64}})
+    for x in v
+        if x > 5.0
+            return x # x > 5.0 is MethodError for Nothing so can assume ::Float64
+        end
+    end
+    return 0.0
+end
+@test Base.infer_return_type(issue37866, (Vector{Union{Nothing,Float64}},)) == Float64
+
 # make sure inference on a recursive call graph with nested `Type`s terminates
 # https://github.com/JuliaLang/julia/issues/40336
 f40336(@nospecialize(t)) = f40336(Type{t})
@@ -4108,22 +4326,22 @@ end
 let # Test the presence of PhiNodes in lowered IR by taking the above function,
     # running it through SSA conversion and then putting it into an opaque
     # closure.
-    mi = Core.Compiler.specialize_method(first(methods(f_convert_me_to_ir)),
+    mi = Compiler.specialize_method(first(methods(f_convert_me_to_ir)),
         Tuple{Bool, Float64}, Core.svec())
     ci = Base.uncompressed_ast(mi.def)
     ci.slottypes = Any[ Any for i = 1:length(ci.slotflags) ]
     ci.ssavaluetypes = Any[Any for i = 1:ci.ssavaluetypes]
-    sv = Core.Compiler.OptimizationState(mi, Core.Compiler.NativeInterpreter())
-    ir = Core.Compiler.convert_to_ircode(ci, sv)
-    ir = Core.Compiler.slot2reg(ir, ci, sv)
-    ir = Core.Compiler.compact!(ir)
-    Core.Compiler.replace_code_newstyle!(ci, ir)
+    sv = Compiler.OptimizationState(mi, Compiler.NativeInterpreter())
+    ir = Compiler.convert_to_ircode(ci, sv)
+    ir = Compiler.slot2reg(ir, ci, sv)
+    ir = Compiler.compact!(ir)
+    Compiler.replace_code_newstyle!(ci, ir)
     ci.ssavaluetypes = length(ci.ssavaluetypes)
     @test any(x->isa(x, Core.PhiNode), ci.code)
-    oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
+    oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any, true,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(b, 1.0)
     @test Base.return_types(oc, Tuple{Bool}) == Any[Float64]
-    oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
+    oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any, true,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(true, 1.0)
     @test Base.return_types(oc, Tuple{}) == Any[Float64]
 end
@@ -4275,30 +4493,28 @@ g41908() = f41908(Any[1][1])
 # issue #42022
 let x = Tuple{Int,Any}[
         #= 1=# (0, Expr(:(=), Core.SlotNumber(3), 1))
-        #= 2=# (0, Expr(:enter, 18))
+        #= 2=# (0, EnterNode(17))
         #= 3=# (2, Expr(:(=), Core.SlotNumber(3), 2.0))
-        #= 4=# (2, Expr(:enter, 12))
+        #= 4=# (2, EnterNode(12))
         #= 5=# (4, Expr(:(=), Core.SlotNumber(3), '3'))
         #= 6=# (4, Core.GotoIfNot(Core.SlotNumber(2), 9))
-        #= 7=# (4, Expr(:leave, 2))
+        #= 7=# (4, Expr(:leave, Core.SSAValue(4), Core.SSAValue(2)))
         #= 8=# (0, Core.ReturnNode(1))
         #= 9=# (4, Expr(:call, GlobalRef(Main, :throw)))
-        #=10=# (4, Expr(:leave, 1))
-        #=11=# (2, Core.GotoNode(16))
-        #=12=# (4, Expr(:leave, 1))
-        #=13=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
-        #=14=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
-        #=15=# (2, Expr(:pop_exception, Core.SSAValue(4)))
-        #=16=# (2, Expr(:leave, 1))
-        #=17=# (0, Core.GotoNode(22))
-        #=18=# (2, Expr(:leave, 1))
-        #=19=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
-        #=20=# (0, nothing)
-        #=21=# (0, Expr(:pop_exception, Core.SSAValue(2)))
-        #=22=# (0, Core.ReturnNode(Core.SlotNumber(3)))
+        #=10=# (4, Expr(:leave, Core.SSAValue(4)))
+        #=11=# (2, Core.GotoNode(15))
+        #=12=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
+        #=13=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
+        #=14=# (2, Expr(:pop_exception, Core.SSAValue(4)))
+        #=15=# (2, Expr(:leave, Core.SSAValue(2)))
+        #=16=# (0, Core.GotoNode(20))
+        #=17=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
+        #=18=# (0, nothing)
+        #=19=# (0, Expr(:pop_exception, Core.SSAValue(2)))
+        #=20=# (0, Core.ReturnNode(Core.SlotNumber(3)))
     ]
-    handler_at = Core.Compiler.compute_trycatch(last.(x), Core.Compiler.BitSet())
-    @test handler_at == first.(x)
+    (;handler_at, handlers) = Compiler.compute_trycatch(last.(x))
+    @test map(x->x[1] == 0 ? 0 : Compiler.get_enter_idx(handlers[x[1]]), handler_at) == first.(x)
 end
 
 @test only(Base.return_types((Bool,)) do y
@@ -4315,7 +4531,7 @@ end
             nothing
         end
         return x
-    end) === Union{Int, Float64, Char}
+    end) === Union{Int, Char}
 
 # issue #42097
 struct Foo42097{F} end
@@ -4347,8 +4563,10 @@ let
                # Vararg
         #=va=# Bound, unbound,         # => Tuple{Integer,Integer} (invalid `TypeVar` widened beforehand)
         } where Bound<:Integer
-    argtypes = Core.Compiler.most_general_argtypes(method, specTypes, true)
+    argtypes = Compiler.most_general_argtypes(method, specTypes)
     popfirst!(argtypes)
+    # N.B.: `argtypes` do not have va processing applied yet
+    @test length(argtypes) == 12
     @test argtypes[1] == Integer
     @test argtypes[2] == Integer
     @test argtypes[3] == Type{Bound} where Bound<:Integer
@@ -4359,7 +4577,8 @@ let
     @test argtypes[8] == Any
     @test argtypes[9] == Union{Nothing,Bound} where Bound<:Integer
     @test argtypes[10] == Any
-    @test argtypes[11] == Tuple{Integer,Integer}
+    @test argtypes[11] == Integer
+    @test argtypes[12] == Integer
 end
 
 # make sure not to call `widenconst` on `TypeofVararg` objects
@@ -4414,7 +4633,8 @@ end |> only == Tuple{Int,Int}
 end |> only == Int
 
 # form PartialStruct for mutables with `const` field
-import Core.Compiler: Const, ⊑
+using Core: Const
+using .Compiler: ⊑
 mutable struct PartialMutable{S,T}
     const s::S
     t::T
@@ -4491,32 +4711,82 @@ end
 
 # issue #43784
 @testset "issue #43784" begin
-    init = Base.ImmutableDict{Any,Any}()
-    a = Const(init)
-    b = Core.PartialStruct(typeof(init), Any[Const(init), Any, Any])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c)
-    @test ⊑(b, c)
-
-    init = Base.ImmutableDict{Number,Number}()
-    a = Const(init)
-    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), Any, ComplexF64])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c) && ⊑(b, c)
-    @test c === typeof(init)
-
-    a = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c) && ⊑(b, c)
-    @test c.fields[2] === Any # or Number
-    @test c.fields[3] === ComplexF64
-
-    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c)
-    @test ⊑(b, c)
-    @test c.fields[2] === Complex
-    @test c.fields[3] === Complex
+    ⊑ = Compiler.partialorder(Compiler.fallback_lattice)
+    ⊔ = Compiler.join(Compiler.fallback_lattice)
+    𝕃 = Compiler.fallback_lattice
+    Const, PartialStruct = Core.Const, Core.PartialStruct
+    alldefined = Union{Nothing,Bool}[false, false, false]
+    defined1 = Union{Nothing,Bool}[false, nothing, nothing]
+    let init = Base.ImmutableDict{Any,Any}()
+        a = Const(init)
+        b = PartialStruct(𝕃, typeof(init), alldefined, Any[Const(init), Any, Any])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === typeof(init)
+    end
+    let init = Base.ImmutableDict{Any,Any}(1,2)
+        a = Const(init)
+        b = PartialStruct(𝕃, typeof(init), alldefined, Any[Const(getfield(init,1)), Any, Any])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test length(c.fields) == 3
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = Const(init)
+        b = PartialStruct(𝕃, typeof(init), alldefined, Any[Const(init), Number, ComplexF64])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === typeof(init)
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = PartialStruct(𝕃, typeof(init), alldefined, Any[Const(init), ComplexF64, ComplexF64])
+        b = PartialStruct(𝕃, typeof(init), alldefined, Any[Const(init), Number, ComplexF64])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test c.fields[2] === Number
+        @test c.fields[3] === ComplexF64
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = PartialStruct(𝕃, typeof(init), alldefined, Any[Const(init), ComplexF64, ComplexF64])
+        b = PartialStruct(𝕃, typeof(init), alldefined, Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test c.fields[2] === Complex
+        @test c.fields[3] === Complex
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = PartialStruct(𝕃, T, defined1, Any[T, Number, Number])
+        b = PartialStruct(𝕃, T, alldefined, Any[T, Number, Number])
+        @test b ⊑ a
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test length(c.fields) == 3 && c.undefs == defined1
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = PartialStruct(𝕃, T, defined1, Any[T, Number, Number])
+        b = Const(T())
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = Const(T())
+        b = PartialStruct(𝕃, T, defined1, Any[T, Number, Number])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = Const(T())
+        b = Const(T(1,2))
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
 
     global const ginit43784 = Base.ImmutableDict{Any,Any}()
     @test Base.return_types() do
@@ -4525,6 +4795,238 @@ end
                 g = Base.ImmutableDict(g, 1=>2)
             end
         end |> only === Union{}
+
+    a = Val{Union{}}
+    a = Compiler.tmerge(Union{a, Val{a}}, a)
+    @test a == Union{Val{Union{}}, Val{Val{Union{}}}}
+    a = Compiler.tmerge(Union{a, Val{a}}, a)
+    @test a == Union{Val{Union{}}, Val{Val{Union{}}}, Val{Union{Val{Union{}}, Val{Val{Union{}}}}}}
+    a = Compiler.tmerge(Union{a, Val{a}}, a)
+    @test a == Val
+
+    a = Val{Union{}}
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Val{<:a}, a)
+    @test_broken a != Val{<:Val{Union{}}}
+    @test_broken a == Val{<:Val} || a == Val
+
+    a = Tuple{Vararg{Tuple{}}}
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Union{Tuple{Tuple{Vararg{Tuple{}}}}, Tuple{Vararg{Tuple{}}}}
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple{Vararg{Union{Tuple{Tuple{Vararg{Tuple{}}}}, Tuple{Vararg{Tuple{}}}}}}
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple
+end
+
+module _Partials_inference
+    mutable struct Partial
+        x::String
+        y::Integer
+        z::Any
+        Partial() = new()
+    end
+
+    struct Partial2
+        x::String
+        y::Integer
+        z::Any
+        Partial2(x) = new(x)
+    end
+
+    struct Partial3
+        x::Int
+        y::String
+        z::Float64
+        Partial3(x, y) = new(x, y)
+    end
+
+    struct Partial4
+        x::Int
+        y::String
+        z::Float64
+        Partial4(x) = new(x)
+    end
+end
+
+let ⊑ = Compiler.partialorder(Compiler.fallback_lattice)
+    ⋢ = !⊑
+    ⊔ = Compiler.join(Compiler.fallback_lattice)
+    𝕃 = Compiler.fallback_lattice
+    Const, PartialStruct = Core.Const, Core.PartialStruct
+    form_partially_defined_struct = Compiler.form_partially_defined_struct
+    M = _Partials_inference
+    Partial, Partial2, Partial3, Partial4 = M.Partial, M.Partial2, M.Partial3, M.Partial4
+
+    @test  (Const((1,2)) ⊑ PartialStruct(𝕃, Tuple{Int,Int}, Any[Const(1),Int]))
+    @test !(Const((1,2)) ⊑ PartialStruct(𝕃, Tuple{Int,Int,Int}, Any[Const(1),Int,Int]))
+    @test !(Const((1,2,3)) ⊑ PartialStruct(𝕃, Tuple{Int,Int}, Any[Const(1),Int]))
+    @test  (Const((1,2,3)) ⊑ PartialStruct(𝕃, Tuple{Int,Int,Int}, Any[Const(1),Int,Int]))
+    @test  (Const((1,2)) ⊑ PartialStruct(𝕃, Tuple{Int,Vararg{Int}}, Any[Const(1),Vararg{Int}]))
+    @test  (Const((1,2)) ⊑ PartialStruct(𝕃, Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}])) broken=true
+    @test  (Const((1,2,3)) ⊑ PartialStruct(𝕃, Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int}, Any[Const(1),Int]) ⊑ Const((1,2)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int,Int}, Any[Const(1),Int,Int]) ⊑ Const((1,2)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int}, Any[Const(1),Int]) ⊑ Const((1,2,3)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int,Int}, Any[Const(1),Int,Int]) ⊑ Const((1,2,3)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Vararg{Int}}, Any[Const(1),Vararg{Int}]) ⊑ Const((1,2)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]) ⊑ Const((1,2)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]) ⊑ Const((1,2,3)))
+    # test comparison between conflicting elements
+    let a = PartialStruct(M.Partial, Union{Nothing,Bool}[false,false,false], Any[Int,Int,Any])
+        b = Const(M.Partial())
+        @test a ⋢ b && b ⋢ a
+    end
+    let a = PartialStruct(M.Partial, Union{Nothing,Bool}[false,nothing,nothing], Any[Int,Int,Any])
+        b = Const(M.Partial())
+        @test a ⋢ b && b ⋢ a
+    end
+    let a = PartialStruct(M.Partial, Union{Nothing,Bool}[nothing,nothing,nothing], Any[Int,Int,Any])
+        b = Const(M.Partial())
+        @test a ⋢ b && b ⊑ a
+    end
+
+    let t = Const((false, false)) ⊔ Const((false, true))
+        @test t isa PartialStruct && length(t.fields) == 2 && t.fields[1] === Const(false)
+        t = t ⊔ Const((false, false, 0))
+        @test t ⊑ Union{Tuple{Bool,Bool},Tuple{Bool,Bool,Int}}
+    end
+
+    let t = PartialStruct(𝕃, Tuple{Int, Int}, Any[Const(1),Int])
+        @test Compiler.n_initialized(t) == 2
+        @test t ⊑ t && t ⊔ t === t
+        t = PartialStruct(𝕃, Partial, Union{Nothing,Bool}[false,false,nothing], Any[String, Const(2), Any])
+        @test Compiler.n_initialized(t) == 2
+        @test t ⊑ t && t ⊔ t === t
+    end
+
+    let t1 = PartialStruct(𝕃, Partial, Union{Nothing,Bool}[false,false,nothing], Any[String, Const(3), Any])
+        t2 = PartialStruct(𝕃, Partial, Union{Nothing,Bool}[false,nothing,nothing], Any[Const("x"), Integer, Any])
+        @test t1 ⋢ t2 && t2 ⋢ t1
+        t3 = t1 ⊔ t2
+        @test t3.fields == Any[String, Integer, Any]
+    end
+
+    let t1 = PartialStruct(𝕃, Partial, Union{Nothing,Bool}[nothing,false,false], Any[String, Int, Const(3)])
+        @test Compiler.n_initialized(t1) == 0
+        @test t1 ⊑ t1 && t1 ⊔ t1 === t1
+        t2 = PartialStruct(𝕃, Partial, Union{Nothing,Bool}[false,nothing,false], Any[Const("x"), Int, Any])
+        @test Compiler.n_initialized(t2) == 1
+        @test t1 ⊔ t2 isa PartialStruct
+    end
+
+    let t1 = PartialStruct(𝕃, Tuple{Int,String,Vararg}, Any[Int, String, Vararg])
+        @test t1 ⊑ t1 && t1 ⊔ t1 == t1
+        t2 = PartialStruct(𝕃, Tuple{Int,String}, Any[Int, String])
+        @test t1 ⋢ t2 && t2 ⋢ t1
+        t3 = t1 ⊔ t2
+        @test_broken t3 isa PartialStruct && Compiler.n_initialized(t3) == 2
+    end
+
+    let t = PartialStruct(𝕃, Partial, Union{Nothing,Bool}[false,nothing,nothing], Any[Const("x"),Integer,Any])
+        @test form_partially_defined_struct(𝕃, t, Const(:x)) === nothing
+        t′ = form_partially_defined_struct(𝕃, t, Const(:z))
+        @test t′ == PartialStruct(𝕃, Partial, Union{Nothing,Bool}[false,nothing,false], Any[Const("x"), Integer, Any])
+    end
+    let t = PartialStruct(𝕃, Partial, Union{Nothing,Bool}[false,nothing,nothing], Any[String,Integer,Const(2)])
+        @test form_partially_defined_struct(𝕃, t, Const(:x)) === nothing
+        t′ = form_partially_defined_struct(𝕃, t, Const(:z))
+        @test t′ == PartialStruct(𝕃, Partial, Union{Nothing,Bool}[false,nothing,false], Any[String,Integer,Const(2)])
+    end
+
+    @test form_partially_defined_struct(𝕃, Partial2, Const(:x)) === nothing
+    let t = PartialStruct(𝕃, Partial2, Any[String, Const(2), Any])
+        @test form_partially_defined_struct(𝕃, t, Const(:x)) === nothing
+        t′ = form_partially_defined_struct(𝕃, t, Const(:z))
+        @test t′ == PartialStruct(𝕃, Partial2, Union{Nothing,Bool}[false,nothing,false], Any[String, Const(2), Any])
+    end
+
+    @test form_partially_defined_struct(𝕃, Partial3, Const(:x)) === nothing
+    @test form_partially_defined_struct(𝕃, Partial3, Const(:y)) === nothing
+    let t = form_partially_defined_struct(𝕃, Partial3, Const(:z))
+        @test t == PartialStruct(𝕃, Partial3, Union{Nothing,Bool}[false,false,false], Any[Int, String, Float64])
+    end
+    let t = PartialStruct(𝕃, Partial3, Any[Int, String, Float64])
+        t′ = form_partially_defined_struct(𝕃, t, Const(:z))
+        @test t′ == PartialStruct(𝕃, Partial3, Union{Nothing,Bool}[false,false,false], Any[Int, String, Float64])
+    end
+
+    let t1 = PartialStruct(𝕃, Partial4, Union{Nothing,Bool}[false,false,nothing], Any[Int, String, Float64])
+        t2 = PartialStruct(𝕃, Partial4, Union{Nothing,Bool}[false,nothing,false], Any[Const(1), String, Float64])
+        @test t1 ⋢ t2 && t2 ⋢ t1
+        c = Const(Partial4(1))
+        @test c ⋢ t1 && t1 ⋢ c && c ⊑ t2 && t2 ⋢ c
+        t3 = PartialStruct(𝕃, Partial4, Union{Nothing,Bool}[false,false,nothing], Any[Const(1), Const("x"), Float64])
+        @test c ⋢ t3 && t3 ⋢ c
+    end
+
+    let c = Const(Ref{Any}(1))
+        t = PartialStruct(Base.RefValue{Any}, Union{Nothing,Bool}[true], Any[String])
+        @test c ⋢ t && t ⋢ c
+    end
+
+    let a = PartialStruct(𝕃, Base.RefValue{Any}, Union{Nothing,Bool}[false], Any[Int])
+        b = PartialStruct(𝕃, Base.RefValue{Any}, Union{Nothing,Bool}[true], Any[Int])
+        @test a ⊔ b == b ⊔ a
+        c = a ⊔ b
+        @test c isa PartialStruct && Compiler.n_initialized(c) == 0
+    end
+    let a = PartialStruct(𝕃, Base.RefValue{Any}, Union{Nothing,Bool}[false], Any[Int])
+        b = PartialStruct(𝕃, Base.RefValue{Any}, Union{Nothing,Bool}[nothing], Any[Int])
+        @test a ⊔ b == b ⊔ a
+        c = a ⊔ b
+        @test c isa PartialStruct && Compiler.n_initialized(c) == 0
+    end
+    let a = PartialStruct(𝕃, Base.RefValue{Int}, Union{Nothing,Bool}[false], Any[Int])
+        b = Const(Base.RefValue{Int}(42))
+        @test a == a ⊔ b == b ⊔ a
+    end
+    let a = Const(Base.RefValue{Int}(1))
+        b = Const(Base.RefValue{Int}(2))
+        @test a ⊔ b == b ⊔ a
+        c = a ⊔ b
+        @test c isa PartialStruct && Compiler.n_initialized(c) == 1
+    end
+end
+
+# strict undef information of `PartialStruct`
+struct StrictUndefXY1{X,Y}
+    x::X
+    y::Y
+    StrictUndefXY1{Y}(x::X) where {X,Y} = new{X,Y}(x)
+    StrictUndefXY1(x::X,y::Y) where {X,Y} = new{X,Y}(x,y)
+end
+@test Base.infer_return_type() do
+    Val(isdefined(StrictUndefXY1{Union{}}(42), :y))
+end == Val{false}
+@test Base.infer_return_type() do
+    Val(isdefined(StrictUndefXY1{Int}(42), :y))
+end == Val{true}
+@test Base.infer_return_type() do
+    Val(isdefined(StrictUndefXY1(42,nothing), :y))
+end == Val{true}
+
+mutable struct StrictUndefXY2{X,Y}
+    const x::X
+    y::Y
+    StrictUndefXY2{Y}(x::X) where {X,Y} = new{X,Y}(x)
+    StrictUndefXY2(x::X,y::Y) where {X,Y} = new{X,Y}(x,y)
+end
+@test Base.infer_return_type() do
+    Val(isdefined(StrictUndefXY2{Union{}}(42), :y))
+end == Val{false}
+@test Base.infer_return_type() do
+    Val(isdefined(StrictUndefXY2{Bool}(42), :y))
+end == Val{true}
+let rt = Base.infer_return_type((Bool,)) do b
+        xy = StrictUndefXY2{Any}(42)
+        if b
+            xy.y = nothing
+        end
+        Val(isdefined(xy, :y))
+    end
+    @test rt >: Val{false} && rt >: Val{true}
 end
 
 # Test that a function-wise `@max_methods` works as expected
@@ -4550,18 +5052,18 @@ end
 # at top level.
 @test let
     Base.Experimental.@force_compile
-    Core.Compiler.return_type(+, NTuple{2, Rational})
+    Base._return_type(+, NTuple{2, Rational})
 end == Rational
 
 # vararg-tuple comparison within `Compiler.PartialStruct`
 # https://github.com/JuliaLang/julia/issues/44965
-let 𝕃ᵢ = Core.Compiler.fallback_lattice
-    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Any}])
-    @test Core.Compiler.issimplertype(𝕃ᵢ, t, t)
+let 𝕃ᵢ = Compiler.fallback_lattice
+    t = Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Any}])
+    @test Compiler.issimplertype(𝕃ᵢ, t, t)
 
-    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Union{}}])
+    t = Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Union{}}])
     @test t === Const((42,))
-    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Int, Vararg{Union{}}])
+    t = Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Int, Vararg{Union{}}])
     @test t.typ === Tuple{Int, Int}
     @test t.fields == Any[Const(42), Int]
 end
@@ -4633,7 +5135,7 @@ end |> only === Union{Int,Nothing}
 @test Base.return_types((Symbol,Int)) do setting, val
     compilerbarrier(setting, val)
 end |> only === Any # XXX we may want to have "compile-time" error for this instead
-for setting = (:type, :const, :conditional)
+for setting = (#=:type, :const,=# :conditional,)
     # a successful barrier on abstract interpretation should be eliminated at the optimization
     @test @eval fully_eliminated((Int,)) do a
         compilerbarrier($(QuoteNode(setting)), 42)
@@ -4661,7 +5163,7 @@ let src = code_typed1() do
 end
 
 # Test that Const ⊑ PartialStruct respects vararg
-@test Const((1,2)) ⊑ PartialStruct(Tuple{Vararg{Int}}, [Const(1), Vararg{Int}])
+@test Const((1,2)) ⊑ PartialStruct(Compiler.fallback_lattice, Tuple{Int,Vararg{Int}}, Union{Nothing,Bool}[false,nothing], [Const(1), Vararg{Int}])
 
 # Test that semi-concrete interpretation doesn't break on functions with while loops in them.
 Base.@assume_effects :consistent :effect_free :terminates_globally function pure_annotated_loop(x::Int, y::Int)
@@ -4687,7 +5189,7 @@ invoke_concretized1(a::Integer) = a > 0 ? "integer" : nothing
 # check if `invoke(invoke_concretized1, Tuple{Integer}, ::Int)` is foldable
 @test Base.infer_effects((Int,)) do a
     @invoke invoke_concretized1(a::Integer)
-end |> Core.Compiler.is_foldable
+end |> Compiler.is_foldable
 @test Base.return_types() do
     @invoke invoke_concretized1(42::Integer)
 end |> only === String
@@ -4697,7 +5199,7 @@ invoke_concretized2(a::Integer) = a > 0 ? :integer : nothing
 # check if `invoke(invoke_concretized2, Tuple{Integer}, ::Int)` is foldable
 @test Base.infer_effects((Int,)) do a
     @invoke invoke_concretized2(a::Integer)
-end |> Core.Compiler.is_foldable
+end |> Compiler.is_foldable
 @test let
     Base.Experimental.@force_compile
     @invoke invoke_concretized2(42::Integer)
@@ -4717,11 +5219,11 @@ end
 @testset "#45956: non-linearized cglobal needs special treatment for stmt effects" begin
     function foo()
         cglobal((a, ))
-        ccall(0, Cvoid, (Nothing,), b)
+        ccall(C_NULL, Cvoid, (Nothing,), b)
     end
     @test only(code_typed() do
         cglobal((a, ))
-        ccall(0, Cvoid, (Nothing,), b)
+        ccall(C_NULL, Cvoid, (Nothing,), b)
     end)[2] === Nothing
 end
 
@@ -4775,34 +5277,32 @@ g() = empty_nt_values(Base.inferencebarrier(Tuple{}))
 # This is somewhat sensitive to the exact recursion level that inference is willing to do, but the intention
 # is to test the case where inference limited a recursion, but then a forced constprop nevertheless managed
 # to terminate the call.
+@newinterp RecurseInterpreter
+function Compiler.const_prop_rettype_heuristic(
+    interp::RecurseInterpreter, result::Compiler.MethodCallResult,
+    si::Compiler.StmtInfo, sv::Compiler.AbsIntState, force::Bool)
+    if result.rt isa Compiler.LimitedAccuracy
+        return force # allow forced constprop to recurse into unresolved cycles
+    end
+    return @invoke Compiler.const_prop_rettype_heuristic(
+        interp::Compiler.AbstractInterpreter, result::Compiler.MethodCallResult,
+        si::Compiler.StmtInfo, sv::Compiler.AbsIntState, force::Bool)
+end
 Base.@constprop :aggressive type_level_recurse1(x...) = x[1] == 2 ? 1 : (length(x) > 100 ? x : type_level_recurse2(x[1] + 1, x..., x...))
 Base.@constprop :aggressive type_level_recurse2(x...) = type_level_recurse1(x...)
 type_level_recurse_entry() = Val{type_level_recurse1(1)}()
-@test Base.return_types(type_level_recurse_entry, ()) |> only == Val{1}
+@test Base.infer_return_type(type_level_recurse_entry, (); interp=RecurseInterpreter()) == Val{1}
 
 # Test that inference doesn't give up if it can potentially refine effects,
 # even if the return type is Any.
 f_no_bail_effects_any(x::Any) = x
 f_no_bail_effects_any(x::NamedTuple{(:x,), Tuple{Any}}) = getfield(x, 1)
 g_no_bail_effects_any(x::Any) = f_no_bail_effects_any(x)
-@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(g_no_bail_effects_any, Tuple{Any}))
+@test Compiler.is_foldable_nothrow(Base.infer_effects(g_no_bail_effects_any, Tuple{Any}))
 
 # issue #48374
 @test (() -> Union{<:Nothing})() == Nothing
 
-# :static_parameter accuracy
-unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = @isdefined(T) ? T::Type : nothing
-unknown_sparam_nothrow1(x::Ref{T}) where T = @isdefined(T) ? T::Type : nothing
-unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = @isdefined(T) ? T::Type : nothing
-@test only(Base.return_types(unknown_sparam_throw, (Type{Int},))) == Type{Int}
-@test only(Base.return_types(unknown_sparam_throw, (Type{<:Integer},))) == Type{<:Integer}
-@test only(Base.return_types(unknown_sparam_throw, (Type,))) == Union{Nothing, Type}
-@test_broken only(Base.return_types(unknown_sparam_throw, (Nothing,))) === Nothing
-@test_broken only(Base.return_types(unknown_sparam_throw, (Union{Type{Int},Nothing},))) === Union{Nothing,Type{Int}}
-@test only(Base.return_types(unknown_sparam_throw, (Any,))) === Union{Nothing,Type}
-@test only(Base.return_types(unknown_sparam_nothrow1, (Ref,))) === Type
-@test only(Base.return_types(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,))) === Type
-
 struct Issue49027{Ty<:Number}
     x::Ty
 end
@@ -4950,9 +5450,9 @@ end |> only === Tuple{Int,Symbol}
     end
 end) == Type{Nothing}
 
-# Test that Core.Compiler.return_type inference works for the 1-arg version
+# Test that Base._return_type inference works for the 1-arg version
 @test Base.return_types() do
-    Core.Compiler.return_type(Tuple{typeof(+), Int, Int})
+    Base._return_type(Tuple{typeof(+), Int, Int})
 end |> only == Type{Int}
 
 # Test that NamedTuple abstract iteration works for PartialStruct/Const
@@ -5002,15 +5502,1025 @@ let src = code_typed1((Bool,Base.RefValue{String}, Base.RefValue{Any},Int,)) do
 end
 
 struct Issue49785{S, T<:S} end
-let 𝕃 = Core.Compiler.SimpleInferenceLattice.instance
-    argtypes = Any[Core.Compiler.Const(Issue49785),
+let 𝕃 = Compiler.SimpleInferenceLattice.instance
+    argtypes = Any[Compiler.Const(Issue49785),
         Union{Type{String},Type{Int}},
         Union{Type{String},Type{Int}}]
     rt = Type{Issue49785{<:Any, Int}}
     # the following should not throw
-    @test !Core.Compiler.apply_type_nothrow(𝕃, argtypes, rt)
+    @test !Compiler.apply_type_nothrow(𝕃, argtypes, rt)
     @test code_typed() do
         S = Union{Type{String},Type{Int}}[Int][1]
         map(T -> Issue49785{S,T}, (a = S,))
     end isa Vector
 end
+
+# `getindex(::SimpleVector, ::Int)` should be concrete-evaluated
+@eval Base.return_types() do
+    $(Core.svec(1,Int,nothing))[2]
+end |> only == Type{Int}
+# https://github.com/JuliaLang/julia/issues/50544
+struct Issue50544{T<:Tuple}
+    t::T
+end
+Base.@propagate_inbounds f_issue50544(x, i, ii...) = f_issue50544(f_issue50544(x, i), ii...)
+Base.@propagate_inbounds f_issue50544(::Type{Issue50544{T}}, i) where T = T.parameters[i]
+g_issue50544(T...) = Issue50544{Tuple{T...}}
+h_issue50544(x::T) where T = g_issue50544(f_issue50544(T, 1), f_issue50544(T, 2, 1))
+let x = Issue50544((1, Issue50544((2.0, 'x'))))
+    @test only(Base.return_types(h_issue50544, (typeof(x),))) == Type{Issue50544{Tuple{Int,Float64}}}
+end
+
+# refine const-prop'ed `PartialStruct` with declared method signature type
+Base.@constprop :aggressive function refine_partial_struct1((a, b)::Tuple{String,Int})
+    if iszero(b)
+        println("b=0") # to prevent semi-concrete eval
+        return nothing
+    else
+        return a
+    end
+end
+@test Base.return_types() do s::AbstractString
+    refine_partial_struct1((s, 42))
+end |> only === String
+
+function refine_partial_struct2(xs::Union{Int,String,Symbol}...)
+    first(xs) isa Int && iszero(first(xs)) && return nothing
+    for x in xs[2:end]
+        if x isa String
+            continue
+        else
+            return nothing
+        end
+    end
+    return string(length(xs))
+end
+@test Base.return_types() do s::AbstractString
+    refine_partial_struct2(42, s)
+end |> only === String
+# JET.test_call(s::AbstractString->Base._string(s, 'c'))
+
+# issue #45759 #46557
+g45759(x::Tuple{Any,Vararg}) = x[1] + _g45759(x[2:end])
+g45759(x::Tuple{}) = 0
+_g45759(x) = g45759(x)
+@test only(Base.return_types(g45759, Tuple{Tuple{Int,Int,Int,Int,Int,Int,Int}})) == Int
+
+h45759(x::Tuple{Any,Vararg}; kwargs...) = x[1] + h45759(x[2:end]; kwargs...)
+h45759(x::Tuple{}; kwargs...) = 0
+@test only(Base.return_types(h45759, Tuple{Tuple{Int,Int,Int,Int,Int,Int,Int}})) == Int
+
+# issue #50709
+@test Base.code_typed_by_type(Tuple{Type{Vector{S}} where {T, S<:AbstractVector{T}}, UndefInitializer, Int})[1][2] == Vector{<:AbstractVector{T}} where T
+
+@test only(Base.return_types((typeof([[[1]]]),)) do x
+    sum(x) do v
+        sum(length, v)
+    end
+end) == Int
+
+struct FunctionSum{Tf}
+    functions::Tf
+end
+(F::FunctionSum)(x) = sum(f -> f(x), F.functions)
+F = FunctionSum((x -> sqrt(x), FunctionSum((x -> x^2, x -> x^3))))
+@test @inferred(F(1.)) === 3.0
+
+f31485(arr::AbstractArray{T, 0}) where {T} = arr
+indirect31485(arr) = f31485(arr)
+f31485(arr::AbstractArray{T, N}) where {T, N} = indirect31485(view(arr, 1, ntuple(i -> :, Val(N-1))...))
+@test @inferred(f31485(zeros(3,3,3,3,3),)) == fill(0.0)
+
+# override const-prop' return type with the concrete-eval result
+# if concrete-eval returns non-inlineable constant
+Base.@assume_effects :foldable function continue_const_prop(i, j)
+    chars = map(Char, i:j)
+    String(chars)
+end
+@test Base.return_types() do
+    Val(length(continue_const_prop(1, 5)))
+end |> only === Val{5}
+@test fully_eliminated() do
+    length(continue_const_prop(1, 5))
+end
+
+# issue #51090
+@noinline function bar51090(b)
+    b == 0 && return
+    r = foo51090(b - 1)
+    Base.donotdelete(b)
+    return r
+end
+foo51090(b) = return bar51090(b)
+@test !fully_eliminated(foo51090, (Int,))
+
+Base.@assume_effects :terminates_globally @noinline function bar51090_terminates(b)
+    b == 0 && return
+    r = foo51090_terminates(b - 1)
+    Base.donotdelete(b)
+    return r
+end
+foo51090_terminates(b) = return bar51090_terminates(b)
+@test !fully_eliminated(foo51090_terminates, (Int,))
+
+# exploit throwness from concrete eval for intrinsics
+@test Base.return_types() do
+    Base.or_int(true, 1)
+end |> only === Union{}
+
+# [add|or]_int tfuncs
+@test Base.return_types((Bool,)) do b
+    Val(Core.Intrinsics.and_int(b, false))
+end |> only == Val{false}
+@test Base.return_types((Bool,)) do b
+    Val(Core.Intrinsics.or_int(true, b))
+end |> only == Val{true}
+
+# https://github.com/JuliaLang/julia/issues/51310
+@test code_typed() do
+    b{c} = d...
+end |> only |> first isa Core.CodeInfo
+
+abstract_call_unionall_vararg(some::Some{Any}) = UnionAll(some.value...)
+@test only(Base.return_types(abstract_call_unionall_vararg)) !== Union{}
+let TV = TypeVar(:T)
+    t = Vector{TV}
+    some = Some{Any}((TV, t))
+    @test abstract_call_unionall_vararg(some) isa UnionAll
+end
+
+# use `Vararg` type constraints
+use_vararg_constraint1(args::Vararg{T,N}) where {T,N} = Val(T), Val(N)
+@test only(Base.return_types(use_vararg_constraint1, Tuple{Int,Int})) == Tuple{Val{Int},Val{2}}
+use_vararg_constraint2(args::Vararg{T,N}) where {T,N} = Val(T), N
+@test only(Base.return_types(use_vararg_constraint2, Tuple{Vararg{Int}})) == Tuple{Val{Int},Int}
+use_vararg_constraint3(args::NTuple{N,T}) where {T,N} = Val(T), Val(N)
+@test only(Base.return_types(use_vararg_constraint3, Tuple{Tuple{Int,Int}})) == Tuple{Val{Int},Val{2}}
+use_vararg_constraint4(args::NTuple{N,T}) where {T,N} = Val(T), N
+@test only(Base.return_types(use_vararg_constraint4, Tuple{NTuple{N,Int}} where N)) == Tuple{Val{Int},Int}
+
+# issue 51228
+global whatever_unknown_value51228
+f51228() = f51228(whatever_unknown_value51228)
+f51228(x) = 1
+f51228(::Vararg{T,T}) where {T} = "2"
+@test only(Base.return_types(f51228, ())) == Int
+
+struct A51317
+    b::Tuple{1}
+    A1() = new()
+end
+struct An51317
+    a::Int
+    b::Tuple{1}
+    An51317() = new()
+end
+@test only(Base.return_types((x,f) -> getfield(x, f), (A51317, Symbol))) === Union{}
+@test only(Base.return_types((x,f) -> getfield(x, f), (An51317, Symbol))) === Int
+@test only(Base.return_types(x -> getfield(x, :b), (A51317,))) === Union{}
+@test only(Base.return_types(x -> getfield(x, :b), (An51317,))) === Union{}
+
+# Don't visit the catch block for empty try/catch
+function completely_dead_try_catch()
+    try
+    catch
+        return 2.0
+    end
+    return 1
+end
+@test Base.return_types(completely_dead_try_catch) |> only === Int
+@test fully_eliminated(completely_dead_try_catch)
+
+function nothrow_try_catch()
+    try
+        1+1
+    catch
+        return 2.0
+    end
+    return 1
+end
+@test Base.return_types(nothrow_try_catch) |> only === Int
+@test fully_eliminated(nothrow_try_catch)
+
+may_error(b) = Base.inferencebarrier(b) && error()
+function phic_type1()
+    a = 1
+    try
+        may_error(false)
+        a = 1.0
+    catch
+        return a
+    end
+    return 2
+end
+@test Base.return_types(phic_type1) |> only === Int
+@test phic_type1() === 2
+
+function phic_type2()
+    a = 1
+    try
+        may_error(false)
+        a = 1.0
+        may_error(false)
+    catch
+        return a
+    end
+    return 2
+end
+@test Base.return_types(phic_type2) |> only === Union{Int, Float64}
+@test phic_type2() === 2
+
+function phic_type3()
+    a = 1
+    try
+        may_error(false)
+        a = 1.0
+        may_error(false)
+        if Base.inferencebarrier(false)
+            a = Ref(1)
+        elseif Base.inferencebarrier(false)
+            a = nothing
+        end
+    catch
+        return a
+    end
+    return 2
+end
+@test Base.return_types(phic_type3) |> only === Union{Int, Float64}
+@test phic_type3() === 2
+
+# Issue #51852
+function phic_type4()
+    a = (;progress = "a")
+    try
+        may_error(false)
+        let b = Base.inferencebarrier(true) ? (;progress = 1.0) : a
+            a = b
+        end
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type4) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::String}}
+@test phic_type4() === (;progress = 1.0)
+
+function phic_type5()
+    a = (;progress = "a")
+    try
+        vals = (a, (progress=1.0,))
+        may_error(false)
+        a = vals[Base.inferencebarrier(false) ? 1 : 2]
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type5) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::String}}
+@test phic_type5() === (;progress = 1.0)
+
+function phic_type6()
+    a = Base.inferencebarrier(true) ? (;progress = "a") : (;progress = Ref{Any}(0))
+    try
+        may_error(false)
+        let b = Base.inferencebarrier(true) ? (;progress = 1.0) : a
+            a = b
+        end
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type6) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::Base.RefValue{Any}}, @NamedTuple{progress::String}}
+@test phic_type6() === (;progress = 1.0)
+
+function phic_type7()
+    a = Base.inferencebarrier(true) ? (;progress = "a") : (;progress = Ref{Any}(0))
+    try
+        vals = (a, (progress=1.0,))
+        may_error(false)
+        a = vals[Base.inferencebarrier(false) ? 1 : 2]
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type7) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::Base.RefValue{Any}}, @NamedTuple{progress::String}}
+@test phic_type7() === (;progress = 1.0)
+
+function phic_type8()
+    local a
+    try
+        may_error(true)
+        a = Base.inferencebarrier(1)
+    catch
+    end
+
+    try
+        a = 2
+        may_error(true)
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type8) |> only === Int
+@test phic_type8() === 2
+
+function phic_type9()
+    local a
+    try
+        may_error(false)
+        a = Base.inferencebarrier(false) ? 1 : nothing
+    catch
+    end
+
+    try
+        a = 2
+        may_error(true)
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type9) |> only === Int
+@test phic_type9() === 2
+
+function phic_type10()
+    local a
+    try
+        may_error(false)
+        a = Base.inferencebarrier(true) ? missing : nothing
+    catch
+    end
+
+    try
+        Base.inferencebarrier(true) && (a = 2)
+        may_error(true)
+    catch
+    end
+    GC.gc()
+    return a::Int
+end
+@test Base.return_types(phic_type10) |> only === Int
+@test phic_type10() === 2
+
+undef_trycatch() = try (a_undef_trycatch = a_undef_trycatch, b = 2); return 1 catch end
+# `global a_undef_trycatch` could be defined dynamically, so both paths must be allowed
+@test Base.return_types(undef_trycatch) |> only === Union{Nothing, Int}
+@test undef_trycatch() === nothing
+
+# Test that `exit` returns `Union{}` (issue #51856)
+function test_exit_bottom(s)
+    n = tryparse(Int, s)
+    isnothing(n) && exit()
+    n
+end
+@test only(Base.return_types(test_exit_bottom, Tuple{String})) == Int
+
+function foo_typed_throw_error()
+    try
+        error()
+    catch e
+        if isa(e, ErrorException)
+            return 1.0
+        end
+    end
+    return 1
+end
+@test Base.return_types(foo_typed_throw_error) |> only === Float64
+
+will_throw_no_method(x::Int) = 1
+function foo_typed_throw_metherr()
+    try
+        will_throw_no_method(1.0)
+    catch e
+        if isa(e, MethodError)
+            return 1.0
+        end
+    end
+    return 1
+end
+@test Base.return_types(foo_typed_throw_metherr) |> only === Float64
+
+# refine `exct` when `:nothrow` is proven
+Base.@assume_effects :nothrow function sin_nothrow(x::Float64)
+    x == Inf && return zero(x)
+    return sin(x)
+end
+@test Base.infer_exception_type(sin_nothrow, (Float64,)) == Union{}
+@test Base.return_types((Float64,)) do x
+    try
+        return sin_nothrow(x)
+    catch err
+        return err
+    end
+end |> only === Float64
+# for semi-concrete interpretation result too
+Base.@constprop :aggressive function sin_maythrow(x::Float64, maythrow::Bool)
+    if maythrow
+        return sin(x)
+    else
+        return @noinline sin_nothrow(x)
+    end
+end
+@test Base.return_types((Float64,)) do x
+    try
+        return sin_maythrow(x, false)
+    catch err
+        return err
+    end
+end |> only === Float64
+
+# exception type from GotoIfNot
+@test Base.infer_exception_type(c::Bool -> c ? 1 : 2) == Union{}
+@test Base.infer_exception_type(c::Missing -> c ? 1 : 2) == TypeError
+@test Base.infer_exception_type(c::Any -> c ? 1 : 2) == TypeError
+
+# exception type inference for `:new`
+struct NewExctInference
+    a::Int
+    @eval NewExctInference(a) = $(Expr(:new, :NewExctInference, :a))
+end
+@test Base.infer_exception_type(NewExctInference, (Float64,)) == TypeError
+
+# semi-concrete interpretation accuracy
+# https://github.com/JuliaLang/julia/issues/50037
+@inline countvars50037(bitflags::Int, var::Int) = bitflags >> 0
+@test Base.infer_return_type() do var::Int
+    Val(countvars50037(1, var))
+end == Val{1}
+
+# Issue #52168
+f52168(x, t::Type) = x::NTuple{2, Base.inferencebarrier(t)::Type}
+@test f52168((1, 2.), Any) === (1, 2.)
+
+# Issue #27031
+let x = 1, _Any = Any
+    @noinline bar27031(tt::Tuple{T,T}, ::Type{Val{T}}) where {T} = notsame27031(tt)
+    @noinline notsame27031(tt::Tuple{T, T}) where {T} = error()
+    @noinline notsame27031(tt::Tuple{T, S}) where {T, S} = "OK"
+    foo27031() = bar27031((x, 1.0), Val{_Any})
+    @test foo27031() == "OK"
+end
+
+# Issue #51927
+let apply_type_tfunc = Compiler.apply_type_tfunc
+    𝕃 = Compiler.fallback_lattice
+    @test apply_type_tfunc(𝕃, Const(Tuple{Vararg{Any,N}} where N), Int) == Type{NTuple{_A, Any}} where _A
+end
+
+# Issue #52613
+@test (code_typed((Any,)) do x; TypeVar(x...); end)[1][2] === TypeVar
+
+# https://github.com/JuliaLang/julia/issues/53590
+func53590(b) = b ? Int : Float64
+function issue53590(b1, b2)
+    T1 = func53590(b1)
+    T2 = func53590(b2)
+    return typejoin(T1, T2)
+end
+@test issue53590(true, true) == Int
+@test issue53590(true, false) == Real
+@test issue53590(false, false) == Float64
+@test issue53590(false, true) == Real
+
+# Expr(:throw_undef_if_not) handling
+@eval function has_tuin()
+    $(Expr(:throw_undef_if_not, :x, false))
+end
+@test Base.infer_return_type(has_tuin, Tuple{}) === Union{}
+@test_throws UndefVarError has_tuin()
+
+function gen_tuin_from_arg(world::UInt, source, _, _)
+    ci = make_codeinfo(Any[
+        Expr(:throw_undef_if_not, :x, Core.Argument(2)),
+        ReturnNode(true),
+    ]; slottypes=Any[Any, Bool])
+    ci.slotnames = Symbol[:var"#self#", :def]
+    ci.nargs = 2
+    ci.isva = false
+    ci
+end
+
+@eval function has_tuin2(def)
+    $(Expr(:meta, :generated, gen_tuin_from_arg))
+    $(Expr(:meta, :generated_only))
+end
+@test_throws UndefVarError has_tuin2(false)
+@test has_tuin2(true)
+
+# issue #53585
+let t = ntuple(i -> i % 8 == 1 ? Int64 : Float64, 4000)
+    @test only(Base.return_types(Base.promote_typeof, t)) == Type{Float64}
+    @test only(Base.return_types(vcat, t)) == Vector{Float64}
+end
+
+# Infinite loop in inference on SSA assignment
+const stop_infinite_loop::Base.Threads.Atomic{Bool} = Base.Threads.Atomic{Bool}(false)
+function gen_infinite_loop_ssa_generator(world::UInt, source, _)
+    ci = make_codeinfo(Any[
+        # Block 1
+        (),
+        # Block 2
+        PhiNode(Int32[1, 5], Any[SSAValue(1), SSAValue(3)]),
+        Expr(:call, tuple, SSAValue(2)),
+        Expr(:call, getindex, GlobalRef(@__MODULE__, :stop_infinite_loop)),
+        GotoIfNot(SSAValue(4), 2),
+        # Block 3
+        ReturnNode(SSAValue(2))
+    ]; slottypes=Any[Any])
+    ci.slotnames = Symbol[:var"#self#"]
+    ci.nargs = 1
+    ci.isva = false
+    ci
+end
+
+@eval function gen_infinite_loop_ssa()
+    $(Expr(:meta, :generated, gen_infinite_loop_ssa_generator))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+
+# We want to make sure that both this returns `Tuple` and that
+# it doesn't infinite loop inside inference.
+@test Base.infer_return_type(gen_infinite_loop_ssa, Tuple{}) === Tuple
+
+# inference local cache lookup with extended lattice elements that may be transformed
+# by `matching_cache_argtypes`
+@newinterp CachedConditionalInterp
+Base.@constprop :aggressive function func_cached_conditional(x, y)
+    if x
+        @noinline sin(y)
+    else
+        0.0
+    end
+end;
+function test_func_cached_conditional(y)
+    y₁ = func_cached_conditional(isa(y, Float64), y)
+    y₂ = func_cached_conditional(isa(y, Float64), y)
+    return y₁, y₂
+end;
+let interp = CachedConditionalInterp();
+    @test Base.infer_return_type(test_func_cached_conditional, (Any,); interp) == Tuple{Float64, Float64}
+    @test count(interp.inf_cache) do result
+        result.linfo.def.name === :func_cached_conditional
+    end == 1
+end
+
+# fieldcount on `Tuple` should constant fold, even though `.fields` not const
+@test fully_eliminated(Base.fieldcount, Tuple{Type{Tuple{Nothing, Int, Int}}})
+
+# Vararg-constprop regression from MutableArithmetics (#54341)
+global SIDE_EFFECT54341::Int
+function foo54341(a, b, c, d, args...)
+    # Side effect to force constprop rather than semi-concrete
+    global SIDE_EFFECT54341 = a + b + c + d
+    return SIDE_EFFECT54341
+end
+bar54341(args...) = foo54341(4, args...)
+
+@test Base.infer_return_type(bar54341, Tuple{Vararg{Int}}) === Int
+
+# `PartialStruct` for partially initialized structs:
+struct PartiallyInitialized1
+    a; b; c
+    PartiallyInitialized1(a) = (@nospecialize; new(a))
+    PartiallyInitialized1(a, b) = (@nospecialize; new(a, b))
+    PartiallyInitialized1(a, b, c) = (@nospecialize; new(a, b, c))
+end
+mutable struct PartiallyInitialized2
+    a; b; c
+    PartiallyInitialized2(a) = (@nospecialize; new(a))
+    PartiallyInitialized2(a, b) = (@nospecialize; new(a, b))
+    PartiallyInitialized2(a, b, c) = (@nospecialize; new(a, b, c))
+end
+
+# 1. isdefined modeling for partial struct
+@test Base.infer_return_type((Any,Any)) do a, b
+    Val(isdefined(PartiallyInitialized1(a, b), :b))
+end == Val{true}
+@test Base.infer_return_type((Any,Any,)) do a, b
+    Val(isdefined(PartiallyInitialized1(a, b), :c))
+end >: Val{false}
+@test Base.infer_return_type((PartiallyInitialized1,)) do x
+    @assert isdefined(x, :a)
+    return Val(isdefined(x, :c))
+end == Val
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    Val(isdefined(PartiallyInitialized1(a, b, c), :c))
+end == Val{true}
+@test Base.infer_return_type((Any,Any)) do a, b
+    Val(isdefined(PartiallyInitialized2(a, b), :b))
+end == Val{true}
+@test Base.infer_return_type((Any,Any,)) do a, b
+    Val(isdefined(PartiallyInitialized2(a, b), :c))
+end >: Val{false}
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    s = PartiallyInitialized2(a, b)
+    s.c = c
+    Val(isdefined(s, :c))
+end >: Val{true}
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    Val(isdefined(PartiallyInitialized2(a, b, c), :c))
+end == Val{true}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    Val(isdefined(tuple(1, xs...), 1))
+end == Val{true}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    Val(isdefined(tuple(1, xs...), 2))
+end == Val
+
+# 2. getfield modeling for partial struct
+@test Base.infer_effects((Any,Any); optimize=false) do a, b
+    getfield(PartiallyInitialized1(a, b), :b)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Symbol,); optimize=false) do a, b, f
+    getfield(PartiallyInitialized1(a, b), f, #=boundscheck=#false)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any); optimize=false) do a, b, c
+    getfield(PartiallyInitialized1(a, b, c), :c)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any,Symbol); optimize=false) do a, b, c, f
+    getfield(PartiallyInitialized1(a, b, c), f, #=boundscheck=#false)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any); optimize=false) do a, b
+    getfield(PartiallyInitialized2(a, b), :b)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Symbol,); optimize=false) do a, b, f
+    getfield(PartiallyInitialized2(a, b), f, #=boundscheck=#false)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any); optimize=false) do a, b, c
+    getfield(PartiallyInitialized2(a, b, c), :c)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any,Symbol); optimize=false) do a, b, c, f
+    getfield(PartiallyInitialized2(a, b, c), f, #=boundscheck=#false)
+end |> Compiler.is_nothrow
+
+# isdefined-Conditionals
+@test Base.infer_effects((Base.RefValue{Any},)) do x
+    if isdefined(x, :x)
+        return getfield(x, :x)
+    end
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Base.RefValue{Any},)) do x
+    if isassigned(x)
+        return x[]
+    end
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any); optimize=false) do a, c
+    x = PartiallyInitialized2(a)
+    x.c = c
+    if isdefined(x, :c)
+        return x.b
+    end
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((PartiallyInitialized2,); optimize=false) do x
+    if isdefined(x, :b)
+        if isdefined(x, :c)
+            return x.c
+        end
+        return x.b
+    end
+    return nothing
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Bool,Int,); optimize=false) do c, b
+    x = c ? PartiallyInitialized1(true) : PartiallyInitialized1(true, b)
+    if isdefined(x, :b)
+        return Val(x.a), x.b
+    end
+    return nothing
+end |> Compiler.is_nothrow
+
+# refine `undef` information from `@isdefined` check
+function isdefined_nothrow(c, x)
+    local val
+    if c
+        val = x
+    end
+    if @isdefined val
+        return val
+    end
+    return zero(Int)
+end
+@test Compiler.is_nothrow(Base.infer_effects(isdefined_nothrow, (Bool,Int)))
+@test !any(first(only(code_typed(isdefined_nothrow, (Bool,Int)))).code) do @nospecialize x
+    Meta.isexpr(x, :throw_undef_if_not)
+end
+
+# End to end test case for the partially initialized struct with `PartialStruct`
+@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
+@test fully_eliminated() do
+    broadcast_noescape1(Ref("x"))
+end
+
+# InterConditional rt with Vararg argtypes
+fcondvarargs(a, b, c, d) = isa(d, Int64)
+gcondvarargs(a, x...) = return fcondvarargs(a, x...) ? isa(a, Int64) : !isa(a, Int64)
+@test Base.infer_return_type(gcondvarargs, Tuple{Vararg{Any}}) === Bool
+
+# JuliaLang/julia#55627: argtypes check in `abstract_call_opaque_closure`
+issue55627_make_oc() = Base.Experimental.@opaque (x::Int) -> 2x
+@test Base.infer_return_type() do
+    f = issue55627_make_oc()
+    return f(1), f()
+end == Union{}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    f = issue55627_make_oc()
+    return f(1), f(xs...)
+end == Tuple{Int,Int}
+@test Base.infer_exception_type() do
+    f = issue55627_make_oc()
+    return f(1), f()
+end >: MethodError
+@test Base.infer_exception_type() do
+    f = issue55627_make_oc()
+    return f(1), f('1')
+end >: TypeError
+
+# `exct` modeling for opaque closure
+oc_exct_1() = Base.Experimental.@opaque (x) -> x < 0 ? throw(x) : x
+@test Base.infer_exception_type((Int,)) do x
+    oc_exct_1()(x)
+end == Int
+oc_exct_2() = Base.Experimental.@opaque Tuple{Number}->Number (x) -> '1'
+@test Base.infer_exception_type((Int,)) do x
+    oc_exct_2()(x)
+end == TypeError
+
+# nothrow modeling for `invoke` calls
+f_invoke_nothrow(::Number) = :number
+f_invoke_nothrow(::Int) = :int
+@test Base.infer_effects((Int,)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Char,)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Union{Nothing,Int},)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> !Compiler.is_nothrow
+
+# `exct` modeling for `invoke` calls
+f_invoke_exct(x::Number) = x < 0 ? throw(x) : x
+f_invoke_exct(x::Int) = x
+@test Base.infer_exception_type((Int,)) do x
+    @invoke f_invoke_exct(x::Number)
+end == Int
+@test Base.infer_exception_type() do
+    @invoke f_invoke_exct(42::Number)
+end == Union{}
+@test Base.infer_exception_type((Union{Nothing,Int},)) do x
+    @invoke f_invoke_exct(x::Number)
+end == Union{Int,TypeError}
+@test Base.infer_exception_type((Int,)) do x
+    invoke(f_invoke_exct, Number, x)
+end == TypeError
+@test Base.infer_exception_type((Char,)) do x
+    invoke(f_invoke_exct, Tuple{Number}, x)
+end == TypeError
+
+@test Base.infer_exception_type((Vector{Any},)) do args
+    Core.throw_methoderror(args...)
+end == Union{MethodError,ArgumentError}
+
+# Issue https://github.com/JuliaLang/julia/issues/55751
+
+abstract type AbstractGrid55751{T, N} <: AbstractArray{T, N} end
+struct Grid55751{T, N, AT} <: AbstractGrid55751{T, N}
+    axes::AT
+end
+
+t155751 = Union{AbstractArray{UInt8, 4}, Array{Float32, 4}, Grid55751{Float32, 3, _A} where _A}
+t255751 = Array{Float32, 3}
+@test Compiler.tmerge_types_slow(t155751,t255751) == AbstractArray # shouldn't hang
+
+issue55882_nfields(x::Union{T,Nothing}) where T<:Number = nfields(x)
+@test Base.infer_return_type(issue55882_nfields) <: Int
+
+# issue #55916
+f55916(x) = 1
+f55916(::Vararg{T,T}) where {T} = "2"
+g55916(x) = f55916(x)
+# this shouldn't error
+@test only(code_typed(g55916, (Any,); optimize=false))[2] == Int
+
+# JuliaLang/julia#56248
+@test Base.infer_return_type() do
+    TypeVar(:Issue56248, 1)
+end === Union{}
+@test Base.infer_return_type() do
+    TypeVar(:Issue56248, Any, 1)
+end === Union{}
+
+@test Base.infer_return_type((Nothing,)) do x
+    @atomic x.count += 1
+end == Union{}
+@test Base.infer_return_type((Nothing,)) do x
+    @atomicreplace x.count 0 => 1
+end == Union{}
+mutable struct AtomicModifySafety
+    @atomic count::Int
+end
+let src = code_typed((Union{Nothing,AtomicModifySafety},)) do x
+        @atomic x.count += 1
+    end |> only |> first
+    @test any(@nospecialize(x)->Meta.isexpr(x, :invoke_modify), src.code)
+end
+
+function issue56387(nt::NamedTuple, field::Symbol=:a)
+    NT = typeof(nt)
+    names = fieldnames(NT)
+    types = fieldtypes(NT)
+    index = findfirst(==(field), names)
+    if index === nothing
+        throw(ArgumentError("Field $field not found"))
+    end
+    types[index]
+end
+@test Base.infer_return_type(issue56387, (typeof((;a=1)),)) == Type{Int}
+
+# `apply_type_tfunc` with `Union` in its arguments
+let apply_type_tfunc = Compiler.apply_type_tfunc
+    𝕃 = Compiler.fallback_lattice
+    Const = Core.Const
+    @test apply_type_tfunc(𝕃, Any[Const(Vector), Union{Type{Int},Type{Nothing}}]) == Union{Type{Vector{Int}},Type{Vector{Nothing}}}
+end
+
+@test Base.infer_return_type((Bool,Int,)) do b, y
+    x = b ? 1 : missing
+    inner = y -> x + y
+    return inner(y)
+end == Union{Int,Missing}
+
+function issue31909(ys)
+    x = if @noinline rand(Bool)
+        1
+    else
+        missing
+    end
+    map(y -> x + y, ys)
+end
+@test Base.infer_return_type(issue31909, (Vector{Int},)) == Union{Vector{Int},Vector{Missing}}
+
+global setglobal!_refine::Int
+@test Base.infer_return_type((Integer,)) do x
+    setglobal!(@__MODULE__, :setglobal!_refine, x)
+end === Int
+global setglobal!_must_throw::Int = 42
+@test Base.infer_return_type((String,)) do x
+    setglobal!(@__MODULE__, :setglobal!_must_throw, x)
+end === Union{}
+
+global swapglobal!_xxx::Int = 42
+@test Base.infer_return_type((Int,)) do x
+    swapglobal!(@__MODULE__, :swapglobal!_xxx, x)
+end === Int
+@test Base.infer_return_type((String,)) do x
+    swapglobal!(@__MODULE__, :swapglobal!_xxx, x)
+end === Union{}
+
+@newinterp AssumeBindingsStaticInterp
+Compiler.InferenceParams(::AssumeBindingsStaticInterp) = Compiler.InferenceParams(; assume_bindings_static=true)
+
+eval(Expr(:const, :swapglobal!_must_throw))
+function func_swapglobal!_must_throw(x)
+    swapglobal!(@__MODULE__, :swapglobal!_must_throw, x)
+end
+@test Base.infer_return_type(func_swapglobal!_must_throw, (Int,); interp=AssumeBindingsStaticInterp()) === Union{}
+@test !Compiler.is_effect_free(Base.infer_effects(func_swapglobal!_must_throw, (Int,); interp=AssumeBindingsStaticInterp()) )
+
+global global_decl_defined
+global_decl_defined = 42
+@test Base.infer_effects(; interp=AssumeBindingsStaticInterp()) do
+    global global_decl_defined
+    return global_decl_defined
+end |> Compiler.is_nothrow
+global global_decl_defined2::Int
+global_decl_defined2 = 42
+@test Base.infer_effects(; interp=AssumeBindingsStaticInterp()) do
+    global global_decl_defined2
+    return global_decl_defined2
+end |> Compiler.is_nothrow
+
+@eval get_exception() = $(Expr(:the_exception))
+@test Base.infer_return_type() do
+    get_exception()
+end <: Any
+@test @eval Base.infer_return_type((Float64,)) do x
+    out = $(Expr(:the_exception))
+    try
+        out = sin(x)
+    catch
+        out = $(Expr(:the_exception))
+    end
+    return out
+end == Union{Float64,DomainError}
+
+# issue #56628
+@test Compiler.argtypes_to_type(Any[ Int, UnitRange{Int}, Vararg{Pair{Any, Union{}}} ]) === Tuple{Int, UnitRange{Int}}
+@test Compiler.argtypes_to_type(Any[ Int, UnitRange{Int}, Vararg{Pair{Any, Union{}}}, Float64 ]) === Tuple{Int, UnitRange{Int}, Float64}
+@test Compiler.argtypes_to_type(Any[ Int, UnitRange{Int}, Vararg{Pair{Any, Union{}}}, Float64, Memory{2} ]) === Union{}
+@test Base.return_types(Tuple{Tuple{Int, Vararg{Pair{Any, Union{}}}}},) do x; Returns(true)(x...); end |> only === Bool
+
+# issue #57292
+f57292(xs::Union{Tuple{String}, Int}...) = getfield(xs...)
+g57292(xs::String...) = getfield(("abc",), 1, :not_atomic, xs...)
+@test Base.infer_return_type(f57292) == String
+@test Base.infer_return_type(g57292) == String
+
+mutable struct Issue57673{C<:Union{Int,Float64}}
+    c::C
+    d
+    Issue57673(c::C, d) where C = new{C}(c, d)
+    Issue57673(c::C) where C = new{C}(c)
+end
+@test Base.infer_return_type((Issue57673,)) do a::Issue57673{<:String}
+    setfield!(a, :d, nothing)
+    a
+end === Union{} # `setfield!` tfunc should be able to figure out this object is runtime invalid
+
+# only refine with `PartialStruct` on `setfield!` when we have full argument type information
+let src = code_typed1((Base.RefValue{String}, String)) do x, val
+        setfield!(x, :x, val)
+        isdefined(x, :x)
+    end
+    retval = src.code[end].val
+    @test retval === true
+    src = code_typed1((Base.RefValue{String}, String)) do x, args...
+        setfield!(x, :x, args...)
+        isdefined(x, :x)
+    end
+    retval = src.code[end].val
+    @test isa(retval, Core.SSAValue)
+end
+
+global invalid_setglobal!_exct_modeling::Int
+@test Base.infer_exception_type((Float64,)) do x
+    setglobal!(@__MODULE__, :invalid_setglobal!_exct_modeling, x)
+end == TypeError
+
+# Issue #58257 - Hang in inference during BindingPartition resolution
+module A58257
+    module B58257
+        const age = Base.get_world_counter()
+        using ..A58257
+        # World age here is N
+    end
+    using .B58257
+    # World age here is N+1
+    @eval f() = $(GlobalRef(B58257, :get!))
+end
+
+## The sequence of events is critical here.
+A58257.get!      # Creates binding partition in A, N+1:∞
+A58257.B58257.get!    # Creates binding partition in A.B, N+1:∞
+Base.invoke_in_world(A58257.B58257.age, getglobal, A58257, :get!) # Expands binding partition in A through <N
+@test Base.infer_return_type(A58257.f) == typeof(Base.get!) # Attempt to lookup A.B in world age N hangs
+
+function tt57873(a::Vector{String}, pref)
+    ret = String[]
+    for j in a
+        append!(ret, tt57873(a[2:end], (pref..., "")))
+    end
+    return ret
+end
+let code = Compiler.typeinf_ext_toplevel(Any[Core.svec(Any,Tuple{typeof(tt57873),Vector{String},Tuple{String}})], [Base.get_world_counter()], Base.Compiler.TRIM_NO)
+    @test !isempty(code)
+    ## If we were to run trim here, we should fail with:
+    #    Verifier error #1: unresolved invoke from statement tt57873(::Vector{String}, ::Tuple{String, String})::Vector{String}
+    #Stacktrace:
+    # [1] tt57873(a::Vector{String}, pref::Tuple{String})
+    #   @ Main REPL[1]:4
+end
+
+function ss57873(a::Vector{String}, pref)
+    ret = String[]
+    for j in a
+        append!(ret, ss57873(a[2:end], (pref..., "")))
+    end
+    return ret
+end
+@test ss57873(["a", "b", "c"], ("",)) == String[]
+
+@test Base.infer_return_type((Module,Symbol,Vector{Any})) do m, n, xs
+    getglobal(m, n, xs...)
+end <: Any
+@test Base.infer_return_type((Module,Symbol,Any,Vector{Any})) do m, n, v, xs
+    setglobal!(m, n, v, xs...)
+end <: Any
+@test Base.infer_return_type((Module,Symbol,Vector{Any})) do m, n, xs
+    isdefinedglobal(m, n, xs...)
+end <: Bool
+@test Base.infer_return_type((Module,Symbol,Vector{Any})) do m, n, xs
+    Core.get_binding_type(m, n, xs...)
+end <: Type
+
+# issue #59269
+function haskey_inference_test()
+    kwargs = Core.compilerbarrier(:const, Base.pairs((; item = false)))
+    return haskey(kwargs, :item) ? nothing : Any[]
+end
+@inferred haskey_inference_test()
+
+end # module inference
diff --git a/test/compiler/inline.jl b/Compiler/test/inline.jl
similarity index 77%
rename from test/compiler/inline.jl
rename to Compiler/test/inline.jl
index be821a88f00cc..db60d2a924a5b 100644
--- a/test/compiler/inline.jl
+++ b/Compiler/test/inline.jl
@@ -1,9 +1,12 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+module inline_tests
+
 using Test
 using Base.Meta
 using Core: ReturnNode
 
+include("setup_Compiler.jl")
 include("irutils.jl")
 include("newinterp.jl")
 
@@ -71,7 +74,7 @@ function bar12620()
         foo_inl(i==1)
     end
 end
-@test_throws UndefVarError(:y) bar12620()
+@test_throws UndefVarError(:y, :local) bar12620()
 
 # issue #16165
 @inline f16165(x) = (x = UInt(x) + 1)
@@ -121,7 +124,7 @@ f29083(;μ,σ) = μ + σ*randn()
 g29083() = f29083(μ=2.0,σ=0.1)
 let c = code_typed(g29083, ())[1][1].code
     # make sure no call to kwfunc remains
-    @test !any(e->(isa(e,Expr) && (e.head === :invoke && e.args[1].def.name === :kwfunc)), c)
+    @test !any(e->(isa(e,Expr) && (e.head === :invoke && e.args[1].def.def.name === :kwfunc)), c)
 end
 
 @testset "issue #19122: [no]inline of short func. def. with return type annotation" begin
@@ -147,8 +150,10 @@ end
         s
     end
 
-    (src, _) = code_typed(sum27403, Tuple{Vector{Int}})[1]
-    @test !any(x -> x isa Expr && x.head === :invoke, src.code)
+    (src, _) = only(code_typed(sum27403, Tuple{Vector{Int}}))
+    @test !any(src.code) do x
+        x isa Expr && x.head === :invoke && !(x.args[2] in (Core.GlobalRef(Base, :throw_boundserror), Base.throw_boundserror))
+    end
 end
 
 # check that ismutabletype(type) can be fully eliminated
@@ -252,7 +257,7 @@ let code = code_typed(f_pointerref, Tuple{Type{Int}})[1][1].code
     @test !any_ptrref
 end
 
-# Test that inlining can inline _applys of builtins/_applys on SimpleVectors
+# Test that inlining can inline _apply_iterate of builtins/_apply_iterate on SimpleVectors
 function foo_apply_apply_type_svec()
     A = (Tuple, Float32)
     B = Tuple{Float32, Float32}
@@ -274,7 +279,7 @@ f34900(x, y::Int) = y
 f34900(x::Int, y::Int) = invoke(f34900, Tuple{Int, Any}, x, y)
 @test fully_eliminated(f34900, Tuple{Int, Int}; retval=Core.Argument(2))
 
-using Core.Compiler: is_declared_inline, is_declared_noinline
+using .Compiler: is_declared_inline, is_declared_noinline
 
 @testset "is_declared_[no]inline" begin
     @test is_declared_inline(only(methods(@inline x -> x)))
@@ -295,7 +300,7 @@ using Core.Compiler: is_declared_inline, is_declared_noinline
     @test !is_declared_noinline(only(methods() do x x end))
 end
 
-using Core.Compiler: is_inlineable, set_inlineable!
+using .Compiler: is_inlineable, set_inlineable!
 
 @testset "basic set_inlineable! functionality" begin
     ci = code_typed1() do
@@ -311,7 +316,7 @@ end
 const _a_global_array = [1]
 f_inline_global_getindex() = _a_global_array[1]
 let ci = code_typed(f_inline_global_getindex, Tuple{})[1].first
-    @test any(x->(isexpr(x, :call) && x.args[1] === GlobalRef(Base, :arrayref)), ci.code)
+    @test any(x->(isexpr(x, :call) && x.args[1] in (GlobalRef(Base, :memoryrefget), Base.memoryrefget)), ci.code)
 end
 
 # Issue #29114 & #36087 - Inlining of non-tuple splats
@@ -343,8 +348,8 @@ struct NonIsBitsDimsUndef
     dims::NTuple{N, Int} where N
     NonIsBitsDimsUndef() = new()
 end
-@test Core.Compiler.is_inlineable_constant(NonIsBitsDimsUndef())
-@test !Core.Compiler.is_inlineable_constant((("a"^1000, "b"^1000), nothing))
+@test Compiler.is_inlineable_constant(NonIsBitsDimsUndef())
+@test !Compiler.is_inlineable_constant((("a"^1000, "b"^1000), nothing))
 
 # More nothrow modeling for apply_type
 f_apply_type_typeof(x) = (Ref{typeof(x)}; nothing)
@@ -505,6 +510,17 @@ end
         Base.@constprop :aggressive noinlined_constprop_implicit(a) = a+g
         force_inline_constprop_implicit() = @inline noinlined_constprop_implicit(0)
 
+        function force_inline_constprop_cached1()
+            r1 =         noinlined_constprop_implicit(0)
+            r2 = @inline noinlined_constprop_implicit(0)
+            return (r1, r2)
+        end
+        function force_inline_constprop_cached2()
+            r1 = @inline noinlined_constprop_implicit(0)
+            r2 =         noinlined_constprop_implicit(0)
+            return (r1, r2)
+        end
+
         @inline Base.@constprop :aggressive inlined_constprop_explicit(a) = a+g
         force_noinline_constprop_explicit() = @noinline inlined_constprop_explicit(0)
         @inline Base.@constprop :aggressive inlined_constprop_implicit(a) = a+g
@@ -555,6 +571,12 @@ end
     let code = get_code(M.force_inline_constprop_implicit)
         @test all(!isinvoke(:noinlined_constprop_implicit), code)
     end
+    let code = get_code(M.force_inline_constprop_cached1)
+        @test count(isinvoke(:noinlined_constprop_implicit), code) == 1
+    end
+    let code = get_code(M.force_inline_constprop_cached2)
+        @test count(isinvoke(:noinlined_constprop_implicit), code) == 1
+    end
 
     let code = get_code(M.force_noinline_constprop_explicit)
         @test any(isinvoke(:inlined_constprop_explicit), code)
@@ -568,6 +590,18 @@ end
     end
 end
 
+@noinline fresh_edge_noinlined(a::Integer) = unresolvable(a)
+let src = code_typed1((Integer,)) do x
+        @inline fresh_edge_noinlined(x)
+    end
+    @test count(iscall((src, fresh_edge_noinlined)), src.code) == 0
+end
+let src = code_typed1((Integer,)) do x
+        @inline fresh_edge_noinlined(x)
+    end
+    @test count(iscall((src, fresh_edge_noinlined)), src.code) == 0 # should be idempotent
+end
+
 # force constant-prop' for `setproperty!`
 # https://github.com/JuliaLang/julia/pull/41882
 let code = @eval Module() begin
@@ -597,10 +631,12 @@ g41299(f::Tf, args::Vararg{Any,N}) where {Tf,N} = f(args...)
 
 # https://github.com/JuliaLang/julia/issues/42078
 # idempotency of callsite inlining
-function getcache(mi::Core.MethodInstance)
-    cache = Core.Compiler.code_cache(Core.Compiler.NativeInterpreter())
-    codeinf = Core.Compiler.get(cache, mi, nothing)
-    return isnothing(codeinf) ? nothing : codeinf
+function getcacheci(mi::Core.MethodInstance)
+    cache = Compiler.code_cache(Compiler.NativeInterpreter())
+    codeinst = Compiler.get(cache, mi, nothing)
+    codeinst === nothing && return nothing
+    codeinst isa Compiler.InferenceResult && (codeinst = codeinst.ci)
+    return codeinst
 end
 @noinline f42078(a) = sum(sincos(a))
 let
@@ -618,8 +654,8 @@ let
     end
     let # make sure to discard the inferred source
         mi = only(methods(f42078)).specializations::Core.MethodInstance
-        codeinf = getcache(mi)::Core.CodeInstance
-        @atomic codeinf.inferred = nothing
+        codeinst = getcacheci(mi)::Core.CodeInstance
+        @atomic codeinst.inferred = nothing
     end
 
     let # inference should re-infer `f42078(::Int)` and we should get the same code
@@ -692,7 +728,7 @@ mktempdir() do dir
                 ci, rt = only(code_typed(issue42246))
                 if any(ci.code) do stmt
                        Meta.isexpr(stmt, :invoke) &&
-                       stmt.args[1].def.name === nameof(IOBuffer)
+                       stmt.args[1].def.def.name === nameof(IOBuffer)
                    end
                     exit(0)
                 else
@@ -730,7 +766,7 @@ end
 let f(x) = (x...,)
     # Test splatting with a Union of non-{Tuple, SimpleVector} types that require creating new `iterate` calls
     # in inlining. For this particular case, we're relying on `iterate(::CaretesianIndex)` throwing an error, such
-    # the the original apply call is not union-split, but the inserted `iterate` call is.
+    # that the original apply call is not union-split, but the inserted `iterate` call is.
     @test code_typed(f, Tuple{Union{Int64, CartesianIndex{1}, CartesianIndex{3}}})[1][2] == Tuple{Int64}
 end
 
@@ -777,8 +813,8 @@ end
 let src = code_typed((Union{Tuple{Int,Int,Int}, Vector{Int}},)) do xs
         g42840(xs, 2)
     end |> only |> first
-    # `(xs::Vector{Int})[a::Const(2)]` => `Base.arrayref(true, xs, 2)`
-    @test count(iscall((src, Base.arrayref)), src.code) == 1
+    # `(xs::Vector{Int})[a::Const(2)]`
+    @test count(iscall((src, Base.memoryrefget)), src.code) == 1
     @test count(isinvoke(:g42840), src.code) == 1
 end
 
@@ -845,7 +881,7 @@ let src = code_typed1((Any,)) do x
         abstract_unionsplit_fallback(x)
     end
     @test count(isinvoke(:abstract_unionsplit_fallback), src.code) == 2
-    @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 let src = code_typed1((Union{Type,Number},)) do x
         abstract_unionsplit_fallback(x)
@@ -881,7 +917,7 @@ let src = code_typed1((Any,)) do x
     @test count(iscall((src, typeof)), src.code) == 2
     @test count(isinvoke(:println), src.code) == 0
     @test count(iscall((src, println)), src.code) == 0
-    @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 let src = code_typed1((Union{Type,Number},)) do x
         abstract_unionsplit_fallback(false, x)
@@ -920,34 +956,34 @@ end
 end
 
 # issue 43104
-
+_has_free_typevars(t) = ccall(:jl_has_free_typevars, Cint, (Any,), t) != 0
 @inline isGoodType(@nospecialize x::Type) =
-    x !== Any && !(@noinline Base.has_free_typevars(x))
+    x !== Any && !(@noinline _has_free_typevars(x))
 let # aggressive inlining of single, abstract method match
     src = code_typed((Type, Any,)) do x, y
         isGoodType(x), isGoodType(y)
     end |> only |> first
     # both callsites should be inlined
-    @test count(isinvoke(:has_free_typevars), src.code) == 2
-    # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
-    @test count(iscall((src,isGoodType)), src.code) == 1
+    @test count(isinvoke(:_has_free_typevars), src.code) == 2
+    # `isGoodType(y::Any)` isn't fully covered, so the fallback is a method error
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 
 @inline isGoodType2(cnd, @nospecialize x::Type) =
-    x !== Any && !(@noinline (cnd ? Core.Compiler.isType : Base.has_free_typevars)(x))
+    x !== Any && !(@noinline (cnd ? Compiler.isType : _has_free_typevars)(x))
 let # aggressive inlining of single, abstract method match (with constant-prop'ed)
     src = code_typed((Type, Any,)) do x, y
         isGoodType2(true, x), isGoodType2(true, y)
     end |> only |> first
     # both callsite should be inlined with constant-prop'ed result
     @test count(isinvoke(:isType), src.code) == 2
-    @test count(isinvoke(:has_free_typevars), src.code) == 0
-    # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
-    @test count(iscall((src,isGoodType2)), src.code) == 1
+    @test count(isinvoke(:_has_free_typevars), src.code) == 0
+    # `isGoodType(y::Any)` isn't fully covered, thus a MethodError gets inserted
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 
 @noinline function checkBadType!(@nospecialize x::Type)
-    if x === Any || Base.has_free_typevars(x)
+    if x === Any || _has_free_typevars(x)
         println(x)
     end
     return nothing
@@ -958,8 +994,8 @@ let # aggressive static dispatch of single, abstract method match
     end |> only |> first
     # both callsites should be resolved statically
     @test count(isinvoke(:checkBadType!), src.code) == 2
-    # `checkBadType!(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
-    @test count(iscall((src,checkBadType!)), src.code) == 1
+    # `checkBadType!(y::Any)` isn't fully covered, thus a MethodError gets inserted
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 
 @testset "late_inline_special_case!" begin
@@ -974,6 +1010,14 @@ end
         end |> only |> first
         @test count(iscall((src,UnionAll)), src.code) == 0
     end
+    # test >:
+    let src = code_typed((Any,Any)) do x, y
+            x >: y
+        end |> only |> first
+        idx = findfirst(iscall((src,<:)), src.code)
+        @test idx !== nothing
+        @test src.code[idx].args[2:3] == Any[#=y=#Argument(3), #=x=#Argument(2)]
+    end
 end
 
 # have_fma elimination inside ^
@@ -1130,7 +1174,7 @@ function f44200()
     x44200
 end
 let src = code_typed1(f44200)
-    @test_broken count(x -> isa(x, Core.PiNode), src.code) == 0
+    @test count(x -> isa(x, Core.PiNode), src.code) == 0
 end
 
 # Test that peeling off one case from (::Any) doesn't introduce
@@ -1164,7 +1208,7 @@ end
 end
 
 # Test that inlining doesn't accidentally delete a bad return_type call
-f_bad_return_type() = Core.Compiler.return_type(+, 1, 2)
+f_bad_return_type() = Compiler.return_type(+, 1, 2)
 @test_throws MethodError f_bad_return_type()
 
 # Test that inlining doesn't leave useless globalrefs around
@@ -1179,7 +1223,7 @@ end
 # Test that we can inline a finalizer for a struct that does not otherwise escape
 @noinline nothrow_side_effect(x) =
     Base.@assume_effects :total !:effect_free @ccall jl_(x::Any)::Cvoid
-@test Core.Compiler.is_finalizer_inlineable(Base.infer_effects(nothrow_side_effect, (Nothing,)))
+@test Compiler.is_finalizer_inlineable(Base.infer_effects(nothrow_side_effect, (Nothing,)))
 
 mutable struct DoAllocNoEscape
     function DoAllocNoEscape()
@@ -1364,7 +1408,7 @@ init_finalization_count!() = FINALIZATION_COUNT[] = 0
 get_finalization_count() = FINALIZATION_COUNT[]
 @noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x
 @noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...))
-@test Core.Compiler.is_finalizer_inlineable(Base.infer_effects(add_finalization_count!, (Int,)))
+@test Compiler.is_finalizer_inlineable(Base.infer_effects(add_finalization_count!, (Int,)))
 
 mutable struct DoAllocWithField
     x::Int
@@ -1531,7 +1575,6 @@ let
     @test get_finalization_count() == 1000
 end
 
-
 function cfg_finalization7(io)
     for i = -999:1000
         o = DoAllocWithField(0)
@@ -1558,24 +1601,51 @@ let
     @test get_finalization_count() == 1000
 end
 
+# Load forwarding with `finalizer` elision
+let src = code_typed1((Int,)) do x
+        xs = finalizer(Ref(x)) do obj
+            @noinline
+            Base.@assume_effects :nothrow :notaskstate
+            Core.println("finalizing: ", obj[])
+        end
+        Base.@assume_effects :nothrow @noinline println("xs[] = ", @inline xs[])
+        return xs[]
+    end
+    @test count(iscall((src, getfield)), src.code) == 0
+end
+let src = code_typed1((Int,)) do x
+        xs = finalizer(Ref(x)) do obj
+            @noinline
+            Base.@assume_effects :nothrow :notaskstate
+            Core.println("finalizing: ", obj[])
+        end
+        Base.@assume_effects :nothrow @noinline println("xs[] = ", @inline xs[])
+        xs[] += 1
+        return xs[]
+    end
+    @test count(iscall((src, getfield)), src.code) == 0
+    @test count(iscall((src, setfield!)), src.code) == 1
+end
 
 # optimize `[push!|pushfirst!](::Vector{Any}, x...)`
 @testset "optimize `$f(::Vector{Any}, x...)`" for f = Any[push!, pushfirst!]
     @eval begin
-        let src = code_typed1((Vector{Any}, Any)) do xs, x
-                $f(xs, x)
+        for T in [Int, Any]
+            let src = code_typed1((Vector{T}, T)) do xs, x
+                    $f(xs, x)
+                end
+                @test count(iscall((src, $f)), src.code) == 0
             end
-            @test count(iscall((src, $f)), src.code) == 0
-            @test count(src.code) do @nospecialize x
-                isa(x, Core.GotoNode) ||
-                isa(x, Core.GotoIfNot) ||
-                iscall((src, getfield))(x)
-            end == 0 # no loop should be involved for the common single arg case
-        end
-        let src = code_typed1((Vector{Any}, Any, Any)) do xs, x, y
-                $f(xs, x, y)
+            let effects = Base.infer_effects((Vector{T}, T)) do xs, x
+                    $f(xs, x)
+                end
+                @test Compiler.Compiler.is_terminates(effects)
+            end
+            let src = code_typed1((Vector{T}, T, T)) do xs, x, y
+                    $f(xs, x, y)
+                end
+                @test count(iscall((src, $f)), src.code) == 0
             end
-            @test count(iscall((src, $f)), src.code) == 0
         end
         let xs = Any[]
             $f(xs, :x, "y", 'z')
@@ -1586,7 +1656,7 @@ end
     end
 end
 
-using Core.Compiler: is_declared_inline, is_declared_noinline
+using .Compiler: is_declared_inline, is_declared_noinline
 
 # https://github.com/JuliaLang/julia/issues/45050
 @testset "propagate :meta annotations to keyword sorter methods" begin
@@ -1600,12 +1670,12 @@ using Core.Compiler: is_declared_inline, is_declared_noinline
         @test is_declared_noinline(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
     end
     let Base.@constprop :aggressive f(::Any; x::Int=1) = 2x
-        @test Core.Compiler.is_aggressive_constprop(only(methods(f)))
-        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+        @test Compiler.is_aggressive_constprop(only(methods(f)))
+        @test Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
     end
     let Base.@constprop :none f(::Any; x::Int=1) = 2x
-        @test Core.Compiler.is_no_constprop(only(methods(f)))
-        @test Core.Compiler.is_no_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+        @test Compiler.is_no_constprop(only(methods(f)))
+        @test Compiler.is_no_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
     end
     # @nospecialize
     let f(@nospecialize(A::Any); x::Int=1) = 2x
@@ -1618,19 +1688,19 @@ using Core.Compiler: is_declared_inline, is_declared_noinline
     end
     # Base.@assume_effects
     let Base.@assume_effects :notaskstate f(::Any; x::Int=1) = 2x
-        @test Core.Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
-        @test Core.Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
+        @test Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
+        @test Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
     end
     # propagate multiple metadata also
     let @inline Base.@assume_effects :notaskstate Base.@constprop :aggressive f(::Any; x::Int=1) = (@nospecialize; 2x)
         @test is_declared_inline(only(methods(f)))
-        @test Core.Compiler.is_aggressive_constprop(only(methods(f)))
+        @test Compiler.is_aggressive_constprop(only(methods(f)))
         @test is_declared_inline(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
-        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+        @test Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
         @test only(methods(f)).nospecialize == -1
         @test only(methods(Core.kwcall, (Any, typeof(f), Vararg))).nospecialize == -1
-        @test Core.Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
-        @test Core.Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
+        @test Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
+        @test Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
     end
 end
 
@@ -1642,13 +1712,12 @@ function oc_capture_oc(z)
 end
 @test fully_eliminated(oc_capture_oc, (Int,))
 
+# inlining with unmatched type parameters
 @eval struct OldVal{T}
-    x::T
     (OV::Type{OldVal{T}})() where T = $(Expr(:new, :OV))
 end
-with_unmatched_typeparam1(x::OldVal{i}) where {i} = i
-with_unmatched_typeparam2() = [ Base.donotdelete(OldVal{i}()) for i in 1:10000 ]
-function with_unmatched_typeparam3()
+@test OldVal{0}() === OldVal{0}.instance
+function with_unmatched_typeparam()
     f(x::OldVal{i}) where {i} = i
     r = 0
     for i = 1:10000
@@ -1656,17 +1725,15 @@ function with_unmatched_typeparam3()
     end
     return r
 end
-
-@testset "Inlining with unmatched type parameters" begin
-    let src = code_typed1(with_unmatched_typeparam1, (Any,))
-        @test !any(@nospecialize(x) -> isexpr(x, :call) && length(x.args) == 1, src.code)
-    end
-    let src = code_typed1(with_unmatched_typeparam2)
-        @test !any(@nospecialize(x) -> isexpr(x, :call) && length(x.args) == 1, src.code)
-    end
-    let src = code_typed1(with_unmatched_typeparam3)
-        @test !any(@nospecialize(x) -> isexpr(x, :call) && length(x.args) == 1, src.code)
+let src = code_typed1(with_unmatched_typeparam)
+    found = nothing
+    for x in src.code
+        if isexpr(x, :call) && length(x.args) == 1
+            found = x
+            break
+        end
     end
+    @test isnothing(found) || (source=src, statement=found)
 end
 
 function twice_sitofp(x::Int, y::Int)
@@ -1704,13 +1771,14 @@ end
 
 # Test getfield modeling of Type{Ref{_A}} where _A
 let getfield_tfunc(@nospecialize xs...) =
-        Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.getfield_tfunc(Compiler.fallback_lattice, xs...)
     @test getfield_tfunc(Type, Core.Const(:parameters)) !== Union{}
     @test !isa(getfield_tfunc(Type{Tuple{Union{Int, Float64}, Int}}, Core.Const(:name)), Core.Const)
+    @test !isa(getfield_tfunc(Type{Tuple{Any}}, Core.Const(:name)), Core.Const)
 end
 @test fully_eliminated(Base.ismutable, Tuple{Base.RefValue})
 
-# TODO: Remove compute sparams for vararg_retrival
+# TODO: Remove compute sparams for vararg_retrieval
 fvarargN_inline(x::Tuple{Vararg{Int, N}}) where {N} = N
 fvarargN_inline(args...) = fvarargN_inline(args)
 let src = code_typed1(fvarargN_inline, (Tuple{Vararg{Int}},))
@@ -1735,7 +1803,7 @@ end
 
 isinvokemodify(y) = @nospecialize(x) -> isinvokemodify(y, x)
 isinvokemodify(sym::Symbol, @nospecialize(x)) = isinvokemodify(mi->mi.def.name===sym, x)
-isinvokemodify(pred::Function, @nospecialize(x)) = isexpr(x, :invoke_modify) && pred(x.args[1]::MethodInstance)
+isinvokemodify(pred::Function, @nospecialize(x)) = isexpr(x, :invoke_modify) && pred((x.args[1]::CodeInstance).def)
 
 mutable struct Atomic{T}
     @atomic x::T
@@ -1763,22 +1831,62 @@ let src = code_typed1((Atomic{Int},Union{Int,Float64})) do a, b
     end
     @test count(isinvokemodify(:mymax), src.code) == 2
 end
+global x_global_inc::Int = 1
+let src = code_typed1(()) do
+        @atomic (@__MODULE__).x_global_inc += 1
+    end
+    @test count(isinvokemodify(:+), src.code) == 1
+end
+let src = code_typed1((Ptr{Int},)) do a
+        unsafe_modify!(a, +, 1)
+    end
+    @test count(isinvokemodify(:+), src.code) == 1
+end
+let src = code_typed1((AtomicMemoryRef{Int},)) do a
+        Core.memoryrefmodify!(a, +, 1, :sequentially_consistent, true)
+    end
+    @test count(isinvokemodify(:+), src.code) == 1
+end
 
 # apply `ssa_inlining_pass` multiple times
-let interp = Core.Compiler.NativeInterpreter()
+func_mul_int(a::Int, b::Int) = Core.Intrinsics.mul_int(a, b)
+multi_inlining1(a::Int, b::Int) = @noinline func_mul_int(a, b)
+let i::Int, continue_::Bool
+    interp = Compiler.NativeInterpreter()
     # check if callsite `@noinline` annotation works
-    ir, = Base.code_ircode((Int,Int); optimize_until="inlining", interp) do a, b
-        @noinline a*b
-    end |> only
-    i = findfirst(isinvoke(:*), ir.stmts.inst)
+    ir, = only(Base.code_ircode(multi_inlining1, (Int,Int); optimize_until="CC: INLINING", interp))
+    i = findfirst(isinvoke(:func_mul_int), ir.stmts.stmt)
     @test i !== nothing
-
-    # ok, now delete the callsite flag, and see the second inlining pass can inline the call
-    @eval Core.Compiler $ir.stmts[$i][:flag] &= ~IR_FLAG_NOINLINE
-    inlining = Core.Compiler.InliningState(interp)
-    ir = Core.Compiler.ssa_inlining_pass!(ir, inlining, false)
-    @test count(isinvoke(:*), ir.stmts.inst) == 0
-    @test count(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.inst) == 1
+    # now delete the callsite flag, and see the second inlining pass can inline the call
+    ir.stmts[i][:flag] &= ~Compiler.IR_FLAG_NOINLINE
+    inlining = Compiler.InliningState(interp)
+    ir = Compiler.ssa_inlining_pass!(ir, inlining, false)
+    @test findfirst(isinvoke(:func_mul_int), ir.stmts.stmt) === nothing
+    @test (i = findfirst(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.stmt)) !== nothing
+    lins = Compiler.IRShow.buildLineInfoNode(ir.debuginfo, nothing, i)
+    @test (continue_ = length(lins) == 2) # :multi_inlining1 -> :func_mul_int
+    if continue_
+        def1 = lins[1].method
+        @test def1 isa Core.MethodInstance && def1.def.name === :multi_inlining1
+        def2 = lins[2].method
+        @test def2 isa Core.MethodInstance && def2.def.name === :func_mul_int
+    end
+end
+
+call_func_mul_int(a::Int, b::Int) = @noinline func_mul_int(a, b)
+multi_inlining2(a::Int, b::Int) = call_func_mul_int(a, b)
+let i::Int, continue_::Bool
+    interp = Compiler.NativeInterpreter()
+    # check if callsite `@noinline` annotation works
+    ir, = only(Base.code_ircode(multi_inlining2, (Int,Int); optimize_until="CC: INLINING", interp))
+    i = findfirst(isinvoke(:func_mul_int), ir.stmts.stmt)
+    @test i !== nothing
+    # now delete the callsite flag, and see the second inlining pass does not inline the call, since inference recorded it should not
+    ir.stmts[i][:flag] &= ~Compiler.IR_FLAG_NOINLINE
+    inlining = Compiler.InliningState(interp)
+    ir = Compiler.ssa_inlining_pass!(ir, inlining, false)
+    @test findfirst(isinvoke(:func_mul_int), ir.stmts.stmt) !== nothing
+    @test findfirst(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.stmt) === nothing
 end
 
 # Test special purpose inliner for Core.ifelse
@@ -1803,30 +1911,30 @@ end
 
 # optimize away `NamedTuple`s used for handling `@nospecialize`d keyword-argument
 # https://github.com/JuliaLang/julia/pull/47059
-abstract type CallInfo end
-struct NewInstruction
+abstract type TestCallInfo end
+struct TestNewInstruction
     stmt::Any
     type::Any
-    info::CallInfo
+    info::TestCallInfo
     line::Int32
     flag::UInt8
-    function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo),
+    function TestNewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::TestCallInfo),
                             line::Int32, flag::UInt8)
         return new(stmt, type, info, line, flag)
     end
 end
 @nospecialize
-function NewInstruction(newinst::NewInstruction;
+function TestNewInstruction(newinst::TestNewInstruction;
     stmt=newinst.stmt,
     type=newinst.type,
-    info::CallInfo=newinst.info,
+    info::TestCallInfo=newinst.info,
     line::Int32=newinst.line,
     flag::UInt8=newinst.flag)
-    return NewInstruction(stmt, type, info, line, flag)
+    return TestNewInstruction(stmt, type, info, line, flag)
 end
 @specialize
-let src = code_typed1((NewInstruction,Any,Any,CallInfo)) do newinst, stmt, type, info
-        NewInstruction(newinst; stmt, type, info)
+let src = code_typed1((TestNewInstruction,Any,Any,TestCallInfo)) do newinst, stmt, type, info
+        TestNewInstruction(newinst; stmt, type, info)
     end
     @test count(issplatnew, src.code) == 0
     @test count(iscall((src,NamedTuple)), src.code) == 0
@@ -1836,26 +1944,16 @@ end
 # Test that inlining can still use nothrow information from concrete-eval
 # even if the result itself is too big to be inlined, and nothrow is not
 # known without concrete-eval
-const THE_BIG_TUPLE = ntuple(identity, 1024)
+const THE_BIG_TUPLE = ntuple(identity, 1024);
 function return_the_big_tuple(err::Bool)
     err && error("BAD")
     return THE_BIG_TUPLE
 end
-@noinline function return_the_big_tuple_noinline(err::Bool)
-    err && error("BAD")
-    return THE_BIG_TUPLE
+@test fully_eliminated() do
+    return_the_big_tuple(false)[1]
 end
-big_tuple_test1() = return_the_big_tuple(false)[1]
-big_tuple_test2() = return_the_big_tuple_noinline(false)[1]
-
-@test fully_eliminated(big_tuple_test2, Tuple{})
-# Currently we don't run these cleanup passes, but let's make sure that
-# if we did, inlining would be able to remove this
-let ir = Base.code_ircode(big_tuple_test1, Tuple{})[1][1]
-    ir = Core.Compiler.compact!(ir, true)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    ir = Core.Compiler.compact!(ir, true)
-    @test length(ir.stmts) == 1
+@test fully_eliminated() do
+    @inline return_the_big_tuple(false)[1]
 end
 
 # inlineable but removable call should be eligible for DCE
@@ -1926,7 +2024,7 @@ f48397(::Tuple{String,String}) = :ok
 let src = code_typed1((Union{Bool,Tuple{String,Any}},)) do x
         f48397(x)
     end
-    @test any(iscall((src, f48397)), src.code)
+    @test any(iscall((src, Core.throw_methoderror)), src.code) # fallback method error)
 end
 g48397::Union{Bool,Tuple{String,Any}} = ("48397", 48397)
 let res = @test_throws MethodError let
@@ -2009,7 +2107,7 @@ for run_finalizer_escape_test in (run_finalizer_escape_test1, run_finalizer_esca
     global finalizer_escape::Int = 0
 
     let src = code_typed1(run_finalizer_escape_test, Tuple{Bool, Bool})
-        @test any(x->isexpr(x, :(=)), src.code)
+        @test any(iscall((src, Core.setglobal!)), src.code)
     end
 
     let
@@ -2020,8 +2118,8 @@ end
 
 # `compilesig_invokes` inlining option
 @newinterp NoCompileSigInvokes
-Core.Compiler.OptimizationParams(::NoCompileSigInvokes) =
-    Core.Compiler.OptimizationParams(; compilesig_invokes=false)
+Compiler.OptimizationParams(::NoCompileSigInvokes) =
+    Compiler.OptimizationParams(; compilesig_invokes=false)
 @noinline no_compile_sig_invokes(@nospecialize x) = (x !== Any && !Base.has_free_typevars(x))
 # test the single dispatch candidate case
 let src = code_typed1((Type,)) do x
@@ -2029,7 +2127,7 @@ let src = code_typed1((Type,)) do x
     end
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
     end == 1
 end
 let src = code_typed1((Type,); interp=NoCompileSigInvokes()) do x
@@ -2037,7 +2135,7 @@ let src = code_typed1((Type,); interp=NoCompileSigInvokes()) do x
     end
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Type}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),Type}
     end == 1
 end
 # test the union split case
@@ -2046,7 +2144,7 @@ let src = code_typed1((Union{DataType,UnionAll},)) do x
     end
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
     end == 2
 end
 let src = code_typed1((Union{DataType,UnionAll},); interp=NoCompileSigInvokes()) do x
@@ -2054,10 +2152,197 @@ let src = code_typed1((Union{DataType,UnionAll},); interp=NoCompileSigInvokes())
     end
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),DataType}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),DataType}
     end == 1
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),UnionAll}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),UnionAll}
     end == 1
 end
+
+# https://github.com/JuliaLang/julia/issues/50612
+f50612(x) = UInt32(x)
+@test all(!isinvoke(:UInt32),get_code(f50612,Tuple{Char}))
+
+# move inlineable constant values into statement position during `compact!`-ion
+# so that we don't inline DCE-eligibile calls
+Base.@assume_effects :nothrow function erase_before_inlining(x, y)
+    z = sin(y)
+    if x
+        return "julia"
+    end
+    return z
+end
+@test fully_eliminated((Float64,); retval=5) do y
+    length(erase_before_inlining(true, y))
+end
+@test fully_eliminated((Float64,); retval=(5,5)) do y
+    z = erase_before_inlining(true, y)
+    return length(z), length(z)
+end
+
+# continue const-prop' when concrete-eval result is too big
+const THE_BIG_TUPLE_2 = ntuple(identity, 1024)
+return_the_big_tuple2(a) = (a, THE_BIG_TUPLE_2)
+let src = code_typed1() do
+        return return_the_big_tuple2(42)[2]
+    end
+    @test count(isinvoke(:return_the_big_tuple2), src.code) == 0
+end
+let src = code_typed1() do
+        return iterate(("1", '2'), 1)
+    end
+    @test count(isinvoke(:iterate), src.code) == 0
+end
+
+function issue53062(cond)
+    x = Ref{Int}(0)
+    if cond
+        x[] = x
+    else
+        return -1
+    end
+end
+@test !Compiler.is_nothrow(Base.infer_effects(issue53062, (Bool,)))
+@test issue53062(false) == -1
+@test_throws MethodError issue53062(true)
+
+struct Issue52644
+    tuple::Type{<:Tuple}
+end
+issue52644(::DataType) = :DataType
+issue52644(::UnionAll) = :UnionAll
+let ir = Base.code_ircode((Issue52644,); optimize_until="CC: INLINING") do t
+        issue52644(t.tuple)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    irfunc = Core.OpaqueClosure(ir)
+    @test irfunc(Issue52644(Tuple{})) === :DataType
+    @test irfunc(Issue52644(Tuple{<:Integer})) === :UnionAll
+end
+issue52644_single(x::DataType) = :DataType
+let ir = Base.code_ircode((Issue52644,); optimize_until="CC: INLINING") do t
+        issue52644_single(t.tuple)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    irfunc = Core.OpaqueClosure(ir)
+    @test irfunc(Issue52644(Tuple{})) === :DataType
+    @test_throws MethodError irfunc(Issue52644(Tuple{<:Integer}))
+end
+
+foo_split(x::Float64) = 1
+foo_split(x::Int) = 2
+bar_inline_error() = foo_split(nothing)
+bar_split_error() = foo_split(Core.compilerbarrier(:type,nothing))
+
+let src = code_typed1(bar_inline_error, Tuple{})
+    # Should inline method errors
+    @test count(iscall((src, foo_split)), src.code) == 0
+    @test count(iscall((src, Core.throw_methoderror)), src.code) > 0
+end
+let src = code_typed1(bar_split_error, Tuple{})
+    # Should inline method errors
+    @test count(iscall((src, foo_split)), src.code) == 0
+    @test count(iscall((src, Core.throw_methoderror)), src.code) > 0
+end
+
+# finalizer inlining with EA
+mutable struct ForeignBuffer{T}
+    const ptr::Ptr{T}
+end
+mutable struct ForeignBufferChecker
+    @atomic finalized::Bool
+end
+const foreign_buffer_checker = ForeignBufferChecker(false)
+function foreign_alloc(::Type{T}, length) where T
+    ptr = Libc.malloc(sizeof(T) * length)
+    ptr = Base.unsafe_convert(Ptr{T}, ptr)
+    obj = ForeignBuffer{T}(ptr)
+    return finalizer(obj) do obj
+        Base.@assume_effects :notaskstate :nothrow
+        @atomic foreign_buffer_checker.finalized = true
+        Libc.free(obj.ptr)
+    end
+end
+function f_EA_finalizer(N::Int)
+    workspace = foreign_alloc(Float64, N)
+    GC.@preserve workspace begin
+        (;ptr) = workspace
+        Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr)
+    end
+end
+let src = code_typed1(foreign_alloc, (Type{Float64},Int,))
+    @test count(iscall((src, Core.finalizer)), src.code) == 1
+end
+let src = code_typed1(f_EA_finalizer, (Int,))
+    @test count(iscall((src, Core.finalizer)), src.code) == 0
+end
+let;Base.Experimental.@force_compile
+    f_EA_finalizer(42000)
+    @test foreign_buffer_checker.finalized
+end
+
+# JuliaLang/julia#56422:
+# EA-based finalizer inlining should not result in an invalid IR in the existence of `PhiNode`s
+function issue56422(cnd::Bool, N::Int)
+    if cnd
+        workspace = foreign_alloc(Float64, N)
+    else
+        workspace = foreign_alloc(Float64, N+1)
+    end
+    GC.@preserve workspace begin
+        (;ptr) = workspace
+        Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr)
+    end
+end
+let src = code_typed1(issue56422, (Bool,Int,))
+    @test_broken count(iscall((src, Core.finalizer)), src.code) == 0
+end
+
+# Test that inlining doesn't unnecessarily move things to statement position
+@noinline f_noinline_invoke(x::Union{Symbol,Nothing}=nothing) = Core.donotdelete(x)
+g_noinline_invoke(x) = f_noinline_invoke(x)
+let src = code_typed1(g_noinline_invoke, (Union{Symbol,Nothing},))
+    @test !any(@nospecialize(x)->isa(x,GlobalRef), src.code)
+end
+
+path = Ref{Symbol}(:unknown)
+function f59018_generator(x)
+    if @generated
+        if x isa DataType && x.name === Type.body.name
+            path[] = :generator
+            return Core.sizeof(x.parameters[1])
+        end
+    else
+        path[] = :fallback
+        return Core.sizeof(x.parameters[1])
+    end
+end
+f59018() = f59018_generator(Base.inferencebarrier(Int64))
+let src = code_typed1(f59018, ())
+    # We should hit a dynamic dispatch, because not enough information
+    # is available to expand the generator during compilation.
+    @test iscall((src, f59018_generator), src.code[end - 1])
+    @test path[] === :unknown
+    @test f59018() === 8
+    @test path[] === :generator
+end
+
+# https://github.com/JuliaLang/julia/issues/58915
+f58915(nt) = @inline Base.setindex(nt, 2, :next)
+# This function should fully-inline, i.e. it should have only built-in / intrinsic calls
+# and no invokes or dynamic calls of user code
+let src = code_typed1(f58915, Tuple{@NamedTuple{next::UInt32,prev::UInt32}})
+    # Any calls should be built-in calls
+    @test count(iscall(f->!isa(singleton_type(argextype(f, src)), Core.Builtin)), src.code) == 0
+    # There should be no invoke at all
+    @test count(isinvoke(Returns(true)), src.code) == 0
+end
+
+# https://github.com/JuliaLang/julia/issues/58915#issuecomment-3061421895
+let src = code_typed1(Base.setindex, (@NamedTuple{next::UInt32,prev::UInt32}, Int, Symbol))
+    @test count(isinvoke(:merge_fallback), src.code) == 0
+    @test count(iscall((src, Base.merge_fallback)), src.code) == 0
+end
+
+end # module inline_tests
diff --git a/Compiler/test/interpreter_exec.jl b/Compiler/test/interpreter_exec.jl
new file mode 100644
index 0000000000000..b1d450f8f4286
--- /dev/null
+++ b/Compiler/test/interpreter_exec.jl
@@ -0,0 +1,114 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# tests that interpreter matches codegen
+include("setup_Compiler.jl")
+
+using Test
+using Core.IR
+
+# test that interpreter correctly handles PhiNodes (#29262)
+let m = Meta.@lower 1 + 1
+    @assert Meta.isexpr(m, :thunk)
+    src = m.args[1]::CodeInfo
+    src.code = Any[
+        # block 1
+        QuoteNode(:a),
+        QuoteNode(:b),
+        GlobalRef(@__MODULE__, :test29262),
+        GotoIfNot(SSAValue(3), 6),
+        # block 2
+        PhiNode(Int32[4], Any[SSAValue(1)]),
+        PhiNode(Int32[4, 5], Any[SSAValue(2), SSAValue(5)]),
+        ReturnNode(SSAValue(6)),
+    ]
+    nstmts = length(src.code)
+    src.ssavaluetypes = nstmts
+    src.ssaflags = fill(zero(UInt32), nstmts)
+    src.debuginfo = Core.DebugInfo(:none)
+    Compiler.verify_ir(Compiler.inflate_ir(src))
+    global test29262 = true
+    @test :a === @eval $m
+    global test29262 = false
+    @test :b === @eval $m
+end
+
+let m = Meta.@lower 1 + 1
+    @assert Meta.isexpr(m, :thunk)
+    src = m.args[1]::CodeInfo
+    src.code = Any[
+        # block 1
+        QuoteNode(:a),
+        QuoteNode(:b),
+        QuoteNode(:c),
+        GlobalRef(@__MODULE__, :test29262),
+        # block 2
+        PhiNode(Int32[4, 16], Any[false, true]), # false, true
+        PhiNode(Int32[4, 16], Any[SSAValue(1), SSAValue(2)]), # :a, :b
+        PhiNode(Int32[4, 16], Any[SSAValue(3), SSAValue(6)]), # :c, :a
+        PhiNode(Int32[16], Any[SSAValue(7)]), # NULL, :c
+        # block 3
+        PhiNode(Int32[], Any[]), # NULL, NULL
+        PhiNode(Int32[17, 8], Any[true, SSAValue(4)]), # test29262, test29262, [true]
+        PhiNode(Int32[17], Vector{Any}(undef, 1)), # NULL, NULL
+        PhiNode(Int32[8], Vector{Any}(undef, 1)), # NULL, NULL
+        PhiNode(Int32[], Any[]), # NULL, NULL
+        PhiNode(Int32[17, 8], Any[SSAValue(2), SSAValue(8)]), # NULL, :c, [:b]
+        PhiNode(Int32[], Any[]), # NULL, NULL
+        GotoIfNot(SSAValue(5), 5),
+        # block 4
+        GotoIfNot(SSAValue(10), 9),
+        # block 5
+        Expr(:call, GlobalRef(Core, :tuple), SSAValue(6), SSAValue(7), SSAValue(8), SSAValue(14)),
+        ReturnNode(SSAValue(18)),
+    ]
+    nstmts = length(src.code)
+    src.ssavaluetypes = nstmts
+    src.ssaflags = fill(zero(UInt32), nstmts)
+    src.debuginfo = Core.DebugInfo(:none)
+    m.args[1] = copy(src)
+    Compiler.verify_ir(Compiler.inflate_ir(src))
+    global test29262 = true
+    @test (:b, :a, :c, :c) === @eval $m
+    m.args[1] = copy(src)
+    global test29262 = false
+    @test (:b, :a, :c, :b) === @eval $m
+end
+
+let m = Meta.@lower 1 + 1
+    @assert Meta.isexpr(m, :thunk)
+    src = m.args[1]::CodeInfo
+    src.code = Any[
+        # block 1
+        QuoteNode(:a),
+        QuoteNode(:b),
+        GlobalRef(@__MODULE__, :test29262),
+        # block 2
+        EnterNode(12),
+        # block 3
+        UpsilonNode(),
+        UpsilonNode(),
+        UpsilonNode(SSAValue(2)),
+        GotoIfNot(SSAValue(3), 10),
+        # block 4
+        UpsilonNode(SSAValue(1)),
+        # block 5
+        Expr(:throw_undef_if_not, :expected, false),
+        ReturnNode(), # unreachable
+        # block 6
+        PhiCNode(Any[SSAValue(5), SSAValue(7), SSAValue(9)]), # NULL, :a, :b
+        PhiCNode(Any[SSAValue(6)]), # NULL
+        Expr(:pop_exception, SSAValue(4)),
+        # block 7
+        ReturnNode(SSAValue(12)),
+    ]
+    nstmts = length(src.code)
+    src.ssavaluetypes = nstmts
+    src.ssaflags = fill(zero(UInt32), nstmts)
+    src.debuginfo = Core.DebugInfo(:none)
+    Compiler.verify_ir(Compiler.inflate_ir(src))
+    global test29262 = true
+    @test :a === @eval $m
+    global test29262 = false
+    @test :b === @eval $m
+    @test isempty(current_exceptions())
+end
diff --git a/Compiler/test/invalidation.jl b/Compiler/test/invalidation.jl
new file mode 100644
index 0000000000000..233204a8a13e4
--- /dev/null
+++ b/Compiler/test/invalidation.jl
@@ -0,0 +1,363 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# setup
+# -----
+
+include("setup_Compiler.jl")
+include("irutils.jl")
+
+using Test
+
+struct InvalidationTesterToken end
+
+struct InvalidationTester <: Compiler.AbstractInterpreter
+    world::UInt
+    inf_params::Compiler.InferenceParams
+    opt_params::Compiler.OptimizationParams
+    inf_cache::Vector{Compiler.InferenceResult}
+    function InvalidationTester(;
+                                world::UInt = Base.get_world_counter(),
+                                inf_params::Compiler.InferenceParams = Compiler.InferenceParams(),
+                                opt_params::Compiler.OptimizationParams = Compiler.OptimizationParams(),
+                                inf_cache::Vector{Compiler.InferenceResult} = Compiler.InferenceResult[])
+        return new(world, inf_params, opt_params, inf_cache)
+    end
+end
+
+Compiler.InferenceParams(interp::InvalidationTester) = interp.inf_params
+Compiler.OptimizationParams(interp::InvalidationTester) = interp.opt_params
+Compiler.get_inference_world(interp::InvalidationTester) = interp.world
+Compiler.get_inference_cache(interp::InvalidationTester) = interp.inf_cache
+Compiler.cache_owner(::InvalidationTester) = InvalidationTesterToken()
+
+# basic functionality test
+# ------------------------
+
+basic_callee(x) = x
+basic_caller(x) = basic_callee(x)
+
+# run inference and check that cache exist
+@test Base.return_types((Float64,); interp=InvalidationTester()) do x
+    basic_caller(x)
+end |> only === Float64
+
+let mi = Base.method_instance(basic_callee, (Float64,))
+    ci = mi.cache
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == typemax(UInt)
+end
+
+let mi = Base.method_instance(basic_caller, (Float64,))
+    ci = mi.cache
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == typemax(UInt)
+end
+
+# this redefinition below should invalidate the cache
+const BASIC_CALLER_WORLD = Base.get_world_counter()+1
+basic_callee(x) = x, x
+@test !isdefined(Base.method_instance(basic_callee, (Float64,)), :cache)
+let mi = Base.method_instance(basic_caller, (Float64,))
+    ci = mi.cache
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == BASIC_CALLER_WORLD
+end
+
+# re-run inference and check the result is updated (and new cache exists)
+@test Base.return_types((Float64,); interp=InvalidationTester()) do x
+    basic_caller(x)
+end |> only === Tuple{Float64,Float64}
+let mi = Base.method_instance(basic_callee, (Float64,))
+    ci = mi.cache
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == typemax(UInt)
+end
+
+let mi = Base.method_instance(basic_caller, (Float64,))
+    ci = mi.cache
+    @test isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == typemax(UInt)
+    ci = ci.next
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world != typemax(UInt)
+end
+
+
+# backedge optimization
+# ---------------------
+
+const GLOBAL_BUFFER = IOBuffer()
+
+# test backedge optimization when the callee's type and effects information are maximized
+begin
+    take!(GLOBAL_BUFFER)
+
+    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
+    pr48932_caller(x) = pr48932_callee(Base.inferencebarrier(x))
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee, (Any,)))
+        @test rt === Any
+        effects = Base.infer_effects(pr48932_callee, (Any,))
+        @test effects == Compiler.Effects()
+    end
+
+    # run inference on both `pr48932_caller` and `pr48932_callee`
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller(x)
+        end |> only
+        @test rt === Any
+        @test any(iscall((src, pr48932_callee)), src.code)
+    end
+
+    let mi = only(Base.specializations(Base.only(Base.methods(pr48932_callee))))
+        # Base.method_instance(pr48932_callee, (Any,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test ci.max_world == typemax(UInt)
+
+        # In cache due to Base.return_types(pr48932_callee, (Any,))
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+    end
+    let mi = Base.method_instance(pr48932_caller, (Int,))
+        ci = mi.cache
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+    end
+
+    @test 42 == pr48932_caller(42)
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we didn't add the backedge from `pr48932_callee` to `pr48932_caller`:
+    # this redefinition below should invalidate the cache of `pr48932_callee` but not that of `pr48932_caller`
+    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); nothing)
+
+    @test length(Base.methods(pr48932_callee)) == 1
+    @test Base.only(Base.methods(pr48932_callee, Tuple{Any})) === only(Base.methods(pr48932_callee))
+    @test isempty(Base.specializations(Base.only(Base.methods(pr48932_callee, Tuple{Any}))))
+    let mi = only(Base.specializations(Base.only(Base.methods(pr48932_caller))))
+        # Base.method_instance(pr48932_callee, (Any,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test_broken ci.max_world == typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test_broken ci.max_world == typemax(UInt)
+    end
+
+    @test isnothing(pr48932_caller(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+end
+
+begin
+    deduped_callee(x::Int) = @noinline rand(Int)
+    deduped_caller1(x::Int) = @noinline deduped_callee(x)
+    deduped_caller2(x::Int) = @noinline deduped_callee(x)
+
+    # run inference on both `deduped_callerx` and `deduped_callee`
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline deduped_caller1(x)
+            @inline deduped_caller2(x)
+        end |> only
+        @test rt === Int
+        @test any(isinvoke(:deduped_callee), src.code)
+    end
+
+    # Verify that adding the backedge again does not actually add a new backedge
+    let mi = Base.method_instance(deduped_caller1, (Int,)),
+        ci = mi.cache
+
+        callee_mi = Base.method_instance(deduped_callee, (Int,))
+
+        # Inference should have added the callers to the callee's backedges
+        @test ci in callee_mi.backedges
+
+        # In practice, inference will never end up calling `store_backedges`
+        # twice on the same CodeInstance like this - we only need to check
+        # that de-duplication works for a single invocation
+        N = length(callee_mi.backedges)
+        Core.Compiler.store_backedges(ci, Core.svec(callee_mi, callee_mi))
+        N′ = length(callee_mi.backedges)
+
+        # A single `store_backedges` invocation should de-duplicate any of the
+        # edges it is adding.
+        @test N′ - N == 1
+    end
+end
+
+# we can avoid adding backedge even if the callee's return type is not the top
+# when the return value is not used within the caller
+begin take!(GLOBAL_BUFFER)
+    pr48932_callee_inferable(x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(1)::Int)
+    pr48932_caller_unuse(x) = (pr48932_callee_inferable(Base.inferencebarrier(x)); nothing)
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee_inferable, (Any,)))
+        @test rt === Int
+        effects = Base.infer_effects(pr48932_callee_inferable, (Any,))
+        @test effects == Compiler.Effects()
+    end
+
+    # run inference on both `pr48932_caller` and `pr48932_callee`:
+    # we don't need to add backedge to `pr48932_callee` from `pr48932_caller`
+    # since the inference result of `pr48932_callee` is maximized and it's not inlined
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller_unuse(x)
+        end |> only
+        @test rt === Nothing
+        @test any(iscall((src, pr48932_callee_inferable)), src.code)
+    end
+
+    let mi = only(Base.specializations(Base.only(Base.methods(pr48932_callee_inferable))))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test ci.max_world == typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+    end
+    let mi = Base.method_instance(pr48932_caller_unuse, (Int,))
+        ci = mi.cache
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+    end
+
+    @test isnothing(pr48932_caller_unuse(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we didn't add the backedge from `pr48932_callee_inferable` to `pr48932_caller_unuse`:
+    # this redefinition below should invalidate the cache of `pr48932_callee_inferable` but not that of `pr48932_caller_unuse`
+    pr48932_callee_inferable(x) = (print(GLOBAL_BUFFER, "foo"); x)
+
+    @test isempty(Base.specializations(Base.only(Base.methods(pr48932_callee_inferable, Tuple{Any}))))
+    let mi = Base.method_instance(pr48932_caller_unuse, (Int,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test_broken ci.max_world == typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test_broken ci.max_world == typemax(UInt)
+    end
+    @test isnothing(pr48932_caller_unuse(42))
+    @test "foo" == String(take!(GLOBAL_BUFFER))
+end
+
+# we need to add backedge when the callee is inlined
+begin take!(GLOBAL_BUFFER)
+
+    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
+    pr48932_caller_inlined(x) = pr48932_callee_inlined(Base.inferencebarrier(x))
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee_inlined, (Any,)))
+        @test rt === Any
+        effects = Base.infer_effects(pr48932_callee_inlined, (Any,))
+        @test effects == Compiler.Effects()
+    end
+
+    # run inference on `pr48932_caller_inlined` and `pr48932_callee_inlined`
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller_inlined(x)
+        end |> only
+        @test rt === Any
+        @test any(isinvoke(:pr48932_callee_inlined), src.code)
+    end
+
+    let mi = Base.method_instance(pr48932_callee_inlined, (Int,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test ci.max_world == typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+    end
+    let mi = Base.method_instance(pr48932_caller_inlined, (Int,))
+        ci = mi.cache
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+    end
+
+    @test 42 == pr48932_caller_inlined(42)
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we added the backedge from `pr48932_callee_inlined` to `pr48932_caller_inlined`:
+    # this redefinition below should invalidate the cache of `pr48932_callee_inlined` but not that of `pr48932_caller_inlined`
+    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); nothing)
+
+    @test isempty(Base.specializations(Base.only(Base.methods(pr48932_callee_inlined, Tuple{Any}))))
+    let mi = Base.method_instance(pr48932_caller_inlined, (Int,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world != typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test ci.max_world != typemax(UInt)
+    end
+
+    @test isnothing(pr48932_caller_inlined(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+end
+
+# Issue #57696
+# This test checks for invalidation of recursive backedges. However, unfortunately, the original failure
+# manifestation was an unreliable segfault or an assertion failure, so we don't have a more compact test.
+@test success(`$(Base.julia_cmd()) -e 'Base.typejoin(x, ::Type) = 0; exit()'`)
+
+# Test drop_all_caches functionality
+@testset "drop_all_caches" begin
+    # Run in subprocess to avoid disrupting the main test process
+    script = """
+        # Define test functions
+        drop_cache_test_f(x) = x + 1
+        drop_cache_test_g(x) = drop_cache_test_f(x) * 2
+
+        # Compile the functions and capture stderr
+        drop_cache_test_g(5) == 12 || error("failure")
+
+        println(stderr, "==DROPPING ALL CACHES==")
+
+        # Drop all caches
+        Base.drop_all_caches()
+
+        # Functions should still work (but will be recompiled on next call)
+        drop_cache_test_g(5) == 12 || error("failure")
+
+        println(stderr, "SUCCESS: drop_all_caches test passed")
+        exit(0)
+    """
+
+    io = Pipe()
+    # Run the test in a subprocess because Base.drop_all_caches() is extreme
+    result = run(pipeline(`$(Base.julia_cmd()[1]) --startup-file=no --trace-compile=stderr -e "$script"`, stderr=io))
+    close(io.in)
+    err = read(io, String)
+    # println(err)
+    @test success(result)
+    err_before, err_after = split(err, "==DROPPING ALL CACHES==")
+    @test occursin("SUCCESS: drop_all_caches test passed", err_after)
+    @test occursin("precompile(Tuple{typeof(Main.drop_cache_test_g), $Int})", err_before)
+    @test occursin("precompile(Tuple{typeof(Main.drop_cache_test_g), $Int}) # recompile", err_after)
+end
diff --git a/test/compiler/irpasses.jl b/Compiler/test/irpasses.jl
similarity index 52%
rename from test/compiler/irpasses.jl
rename to Compiler/test/irpasses.jl
index 68eb2e7137796..86567440e9fb4 100644
--- a/test/compiler/irpasses.jl
+++ b/Compiler/test/irpasses.jl
@@ -2,20 +2,16 @@
 
 using Test
 using Base.Meta
-import Core:
-    CodeInfo, Argument, SSAValue, GotoNode, GotoIfNot, PiNode, PhiNode,
-    QuoteNode, ReturnNode
+using Core.IR
 
-include(normpath(@__DIR__, "irutils.jl"))
+include("setup_Compiler.jl")
+include("irutils.jl")
 
 # domsort
 # =======
 
 ## Test that domsort doesn't mangle single-argument phis (#29262)
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+let code = Any[
         # block 1
         Expr(:call, :opaque),
         GotoIfNot(Core.SSAValue(1), 10),
@@ -33,24 +29,16 @@ let m = Meta.@lower 1 + 1
         Core.PhiNode(Int32[2, 8], Any[0, Core.SSAValue(7)]),
         ReturnNode(Core.SSAValue(10)),
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
-    ir = Core.Compiler.domsort_ssa!(ir, domtree)
-    Core.Compiler.verify_ir(ir)
-    phi = ir.stmts.inst[3]
+    ir = make_ircode(code)
+    domtree = Compiler.construct_domtree(ir)
+    ir = Compiler.domsort_ssa!(ir, domtree)
+    Compiler.verify_ir(ir)
+    phi = ir.stmts.stmt[3]
     @test isa(phi, Core.PhiNode) && length(phi.edges) == 1
 end
 
 # test that we don't stack-overflow in SNCA with large functions.
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    code = Any[]
+let code = Any[]
     N = 2^15
     for i in 1:2:N
         push!(code, Expr(:call, :opaque))
@@ -59,23 +47,16 @@ let m = Meta.@lower 1 + 1
     # all goto here
     push!(code, Expr(:call, :opaque))
     push!(code, ReturnNode(nothing))
-    src.code = code
-
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
-    ir = Core.Compiler.domsort_ssa!(ir, domtree)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
+    domtree = Compiler.construct_domtree(ir)
+    ir = Compiler.domsort_ssa!(ir, domtree)
+    Compiler.verify_ir(ir)
 end
 
 # SROA
 # ====
 
-import Core.Compiler: widenconst
+using .Compiler: widenconst
 
 is_load_forwarded(src::CodeInfo) = !any(iscall((src, getfield)), src.code)
 is_scalar_replaced(src::CodeInfo) =
@@ -456,7 +437,7 @@ let src = code_typed1() do
     @test count(isnew, src.code) == 1
 end
 
-# should eliminate allocation whose address isn't taked even if it has uninitialized field(s)
+# should eliminate allocation whose address isn't taken even if it has uninitialized field(s)
 mutable struct BadRef
     x::String
     y::String
@@ -505,7 +486,7 @@ function isdefined_elim()
     return arr
 end
 let src = code_typed1(isdefined_elim)
-    @test is_scalar_replaced(src)
+    @test count(isisdefined, src.code) == 0
 end
 @test isdefined_elim() == Any[]
 
@@ -596,7 +577,6 @@ let # lifting `isa` through Core.ifelse
     @test count(iscall((src, isa)), src.code) == 0
 end
 
-
 let # lifting `isdefined` through PhiNode
     src = code_typed1((Bool,Some{Int},)) do c, x
         y = c ? x : nothing
@@ -688,7 +668,7 @@ let nt = (a=1, b=2)
 end
 
 # Expr(:new) annotated as PartialStruct
-struct FooPartial
+struct FooPartialNew
     x
     y
     global f_partial
@@ -697,10 +677,7 @@ end
 @test fully_eliminated(f_partial, Tuple{Float64})
 
 # A SSAValue after the compaction line
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+let code = Any[
         # block 1
         nothing,
         # block 2
@@ -719,7 +696,7 @@ let m = Meta.@lower 1 + 1
         # block 5
         ReturnNode(Core.SSAValue(2)),
     ]
-    src.ssavaluetypes = Any[
+    ssavaluetypes = Any[
         Nothing,
         Any,
         Bool,
@@ -732,21 +709,14 @@ let m = Meta.@lower 1 + 1
         Any,
         Any
     ]
-    nstmts = length(src.code)
-    src.codelocs = fill(one(Int32), nstmts)
-    src.ssaflags = fill(one(Int32), nstmts)
-    src.slotflags = fill(zero(UInt8), 3)
-    ir = Core.Compiler.inflate_ir(src)
-    @test Core.Compiler.verify_ir(ir) === nothing
-    ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
-    @test Core.Compiler.verify_ir(ir) === nothing
+    slottypes = Any[Any, Any, Any]
+    ir = make_ircode(code; ssavaluetypes, slottypes)
+    ir = @test_nowarn Compiler.sroa_pass!(ir)
+    @test Compiler.verify_ir(ir) === nothing
 end
 
 # A lifted Core.ifelse with an eliminated branch (#50276)
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+let code = Any[
         # block 1
         #=  %1: =# Core.Argument(2),
         # block 2
@@ -770,7 +740,7 @@ let m = Meta.@lower 1 + 1
         # block 5
         #= %11: =# ReturnNode(false),
     ]
-    src.ssavaluetypes = Any[
+    ssavaluetypes = Any[
         Any,
         Union{Missing, Bool},
         Any,
@@ -783,14 +753,10 @@ let m = Meta.@lower 1 + 1
         Any,
         Any
     ]
-    nstmts = length(src.code)
-    src.codelocs = fill(one(Int32), nstmts)
-    src.ssaflags = fill(one(Int32), nstmts)
-    src.slotflags = fill(zero(UInt8), 3)
-    ir = Core.Compiler.inflate_ir(src)
-    @test Core.Compiler.verify_ir(ir) === nothing
-    ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
-    @test Core.Compiler.verify_ir(ir) === nothing
+    slottypes = Any[Any, Any, Any]
+    ir = make_ircode(code; ssavaluetypes, slottypes)
+    ir = @test_nowarn Compiler.sroa_pass!(ir)
+    @test Compiler.verify_ir(ir) === nothing
 end
 
 # Issue #31546 - missing widenconst in SROA
@@ -805,51 +771,44 @@ end
 # Tests for cfg simplification
 let src = code_typed(gcd, Tuple{Int, Int})[1].first
     # Test that cfg_simplify doesn't mangle IR on code with loops
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
-end
-
-let m = Meta.@lower 1 + 1
-    # Test that CFG simplify combines redundant basic blocks
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
-        Core.Compiler.GotoNode(2),
-        Core.Compiler.GotoNode(3),
-        Core.Compiler.GotoNode(4),
-        Core.Compiler.GotoNode(5),
-        Core.Compiler.GotoNode(6),
-        Core.Compiler.GotoNode(7),
+    ir = Compiler.inflate_ir(src)
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+end
+
+let # Test that CFG simplify combines redundant basic blocks
+    code = Any[
+        Compiler.GotoNode(2),
+        Compiler.GotoNode(3),
+        Compiler.GotoNode(4),
+        Compiler.GotoNode(5),
+        Compiler.GotoNode(6),
+        Compiler.GotoNode(7),
         ReturnNode(2)
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.compact!(ir)
-    @test length(ir.cfg.blocks) == 1 && Core.Compiler.length(ir.stmts) == 1
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    ir = Compiler.compact!(ir)
+    @test length(ir.cfg.blocks) == 1 && Compiler.length(ir.stmts) == 1
 end
 
 # Test cfg_simplify in complicated sequences of dropped and merged bbs
-using Core.Compiler: Argument, IRCode, GotoNode, GotoIfNot, ReturnNode, NoCallInfo, BasicBlock, StmtRange, SSAValue
-bb_term(ir, bb) = Core.Compiler.getindex(ir, SSAValue(Core.Compiler.last(ir.cfg.blocks[bb].stmts)))[:inst]
+using .Compiler: Argument, IRCode, GotoNode, GotoIfNot, ReturnNode, NoCallInfo, BasicBlock, StmtRange, SSAValue
+bb_term(ir, bb) = Compiler.getindex(ir, SSAValue(Compiler.last(ir.cfg.blocks[bb].stmts)))[:stmt]
 
 function each_stmt_a_bb(stmts, preds, succs)
     ir = IRCode()
-    empty!(ir.stmts.inst)
-    append!(ir.stmts.inst, stmts)
-    empty!(ir.stmts.type); append!(ir.stmts.type, [Nothing for _ = 1:length(stmts)])
+    empty!(ir.stmts.stmt)
+    append!(ir.stmts.stmt, stmts)
+    empty!(ir.stmts.type); append!(ir.stmts.type, [Any for _ = 1:length(stmts)])
     empty!(ir.stmts.flag); append!(ir.stmts.flag, [0x0 for _ = 1:length(stmts)])
-    empty!(ir.stmts.line); append!(ir.stmts.line, [Int32(0) for _ = 1:length(stmts)])
+    empty!(ir.stmts.line); append!(ir.stmts.line, [Int32(0) for _ = 1:3length(stmts)])
     empty!(ir.stmts.info); append!(ir.stmts.info, [NoCallInfo() for _ = 1:length(stmts)])
     empty!(ir.cfg.blocks); append!(ir.cfg.blocks, [BasicBlock(StmtRange(i, i), preds[i], succs[i]) for i = 1:length(stmts)])
-    Core.Compiler.verify_ir(ir)
+    empty!(ir.cfg.index);  append!(ir.cfg.index,  [i for i = 2:length(stmts)])
+    Compiler.verify_ir(ir)
     return ir
 end
 
@@ -885,8 +844,8 @@ for gotoifnot in (false, true)
     preds = Vector{Int}[Int[], [1], [2], [2], [4], [5], [6], [1], [3], [4, 9], [5, 10], gotoifnot ? [6,11] : [6], [7, 11]]
     succs = Vector{Int}[[2, 8], [3, 4], [9], [5, 10], [6, 11], [7, 12], [13], Int[], [10], [11], gotoifnot ? [12, 13] : [13], Int[], Int[]]
     ir = each_stmt_a_bb(stmts, preds, succs)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
 
     if gotoifnot
         let term4 = bb_term(ir, 4), term5 = bb_term(ir, 5)
@@ -916,78 +875,62 @@ let stmts = [
     preds = Vector{Int}[Int[], [1], [2], [1], [2, 3]]
     succs = Vector{Int}[[2, 4], [3, 5], [5], Int[], Int[]]
     ir = each_stmt_a_bb(stmts, preds, succs)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
 
     @test length(ir.cfg.blocks) == 4
     terms = map(i->bb_term(ir, i), 1:length(ir.cfg.blocks))
     @test Set(term.val for term in terms if isa(term, ReturnNode)) == Set([1,2])
 end
 
-let m = Meta.@lower 1 + 1
-    # Test that CFG simplify doesn't mess up when chaining past return blocks
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
-        Core.Compiler.GotoIfNot(Core.Compiler.Argument(2), 3),
-        Core.Compiler.GotoNode(4),
+let # Test that CFG simplify doesn't mess up when chaining past return blocks
+    code = Any[
+        Compiler.GotoIfNot(Compiler.Argument(2), 3),
+        Compiler.GotoNode(4),
         ReturnNode(1),
-        Core.Compiler.GotoNode(5),
-        Core.Compiler.GotoIfNot(Core.Compiler.Argument(2), 7),
+        Compiler.GotoNode(5),
+        Compiler.GotoIfNot(Compiler.Argument(2), 7),
         # This fall through block of the previous GotoIfNot
         # must be moved up along with it, when we merge it
         # into the goto 4 block.
         ReturnNode(2),
         ReturnNode(3)
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
     @test length(ir.cfg.blocks) == 5
-    ret_2 = ir.stmts.inst[ir.cfg.blocks[3].stmts[end]]
-    @test isa(ret_2, Core.Compiler.ReturnNode) && ret_2.val == 2
+    ret_2 = ir.stmts.stmt[ir.cfg.blocks[3].stmts[end]]
+    @test isa(ret_2, Compiler.ReturnNode) && ret_2.val == 2
 end
 
-let m = Meta.@lower 1 + 1
-    # Test that CFG simplify doesn't try to merge every block in a loop into
+let # Test that CFG simplify doesn't try to merge every block in a loop into
     # its predecessor
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+    code = Any[
         # Block 1
-        Core.Compiler.GotoNode(2),
+        Compiler.GotoNode(2),
         # Block 2
-        Core.Compiler.GotoNode(3),
+        Compiler.GotoNode(3),
         # Block 3
-        Core.Compiler.GotoNode(1)
+        Compiler.GotoNode(1)
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
     @test length(ir.cfg.blocks) == 1
 end
 
 # `cfg_simplify!` shouldn't error in a presence of `try/catch` block
-let ir = Base.code_ircode(; optimize_until="slot2ssa") do
+let ir = Base.code_ircode(; optimize_until="CC: SLOT2REG") do
         v = try
         catch
         end
         v
     end |> only |> first
-    Core.Compiler.verify_ir(ir)
+    Compiler.verify_ir(ir)
     nb = length(ir.cfg.blocks)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
     na = length(ir.cfg.blocks)
     @test na < nb
 end
@@ -1092,8 +1035,7 @@ exc39508 = ErrorException("expected")
 end
 @test test39508() === exc39508
 
-let
-    # `typeassert` elimination after SROA
+let # `typeassert` elimination after SROA
     # NOTE we can remove this optimization once inference is able to reason about memory-effects
     src = @eval Module() begin
         mutable struct Foo; x; end
@@ -1108,8 +1050,7 @@ let
     @test count(iscall((src, typeassert)), src.code) == 0
 end
 
-let
-    # Test for https://github.com/JuliaLang/julia/issues/43402
+let # Test for https://github.com/JuliaLang/julia/issues/43402
     # Ensure that structs required not used outside of the ccall,
     # still get listed in the ccall_preserves
 
@@ -1130,7 +1071,7 @@ let
     end
 
     refs = map(Core.SSAValue, findall(@nospecialize(x)->Meta.isexpr(x, :new), src.code))
-    some_ccall = findfirst(@nospecialize(x) -> Meta.isexpr(x, :foreigncall) && x.args[1] == :(:some_ccall), src.code)
+    some_ccall = findfirst(@nospecialize(x) -> Meta.isexpr(x, :foreigncall) && x.args[1] == Expr(:tuple, :(:some_ccall)), src.code)
     @assert some_ccall !== nothing
     stmt = src.code[some_ccall]
     nccallargs = length(stmt.args[3]::Core.SimpleVector)
@@ -1140,15 +1081,17 @@ let
     @test all(alloc -> alloc in preserves, refs)
 end
 
-# test `stmt_effect_free` and DCE
-# ===============================
+# test `flags_for_effects` and DCE
+# ================================
 
-let # effect-freeness computation for array allocation
+@testset "effect-freeness computation for array allocation" begin
 
     # should eliminate dead allocations
-    good_dims = @static Int === Int64 ? (1:10) : (1:8)
-    Ns = @static Int === Int64 ? (1:10) : (1:8)
-    for dim = good_dims, N = Ns
+    good_dims = [1, 2, 3, 4, 10]
+    Ns = [1, 2, 3, 4, 10]
+    Ts = Any[Int, Union{Missing,Nothing}, Nothing, Any]
+    @testset "$dim, $N" for dim in good_dims, N in Ns
+        Int64(dim)^N > typemax(Int) && continue
         dims = ntuple(i->dim, N)
         @test @eval fully_eliminated() do
             Array{Int,$N}(undef, $(dims...))
@@ -1158,14 +1101,14 @@ let # effect-freeness computation for array allocation
 
     # shouldn't eliminate erroneous dead allocations
     bad_dims = [-1, typemax(Int)]
-    for dim in bad_dims, N in 1:10
+    @testset "$dim, $N, $T" for dim in bad_dims, N in Ns, T in Ts
         dims = ntuple(i->dim, N)
         @test @eval !fully_eliminated() do
-            Array{Int,$N}(undef, $(dims...))
+            Array{$T,$N}(undef, $(dims...))
             nothing
         end
-        @test_throws "invalid Array" @eval let
-            Array{Int,$N}(undef, $(dims...))
+        @test_throws "invalid " @eval let
+            Array{$T,$N}(undef, $(dims...))
             nothing
         end
     end
@@ -1194,10 +1137,10 @@ let ci = code_typed1(optimize=false) do
             gcd(64, 128)
         end
     end
-    ir = Core.Compiler.inflate_ir(ci)
-    @test count(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.inst) == 1
-    ir = Core.Compiler.compact!(ir, true)
-    @test count(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.inst) == 0
+    ir = Compiler.inflate_ir(ci)
+    @test any(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.stmt)
+    ir = Compiler.compact!(ir, true)
+    @test !any(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.stmt)
 end
 
 # Test that adce_pass! can drop phi node uses that can be concluded unused
@@ -1223,26 +1166,29 @@ function foo_cfg_empty(b)
         @goto x
     end
     @label x
-    return 1
+    return b
 end
 let ci = code_typed(foo_cfg_empty, Tuple{Bool}, optimize=true)[1][1]
-    ir = Core.Compiler.inflate_ir(ci)
+    ir = Compiler.inflate_ir(ci)
     @test length(ir.stmts) == 3
     @test length(ir.cfg.blocks) == 3
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
     @test length(ir.cfg.blocks) <= 2
-    @test isa(ir.stmts[length(ir.stmts)][:inst], ReturnNode)
+    @test isa(ir.stmts[length(ir.stmts)][:stmt], ReturnNode)
 end
 
-@test Core.Compiler.is_effect_free(Base.infer_effects(getfield, (Complex{Int}, Symbol)))
-@test Core.Compiler.is_effect_free(Base.infer_effects(getglobal, (Module, Symbol)))
+@test Compiler.is_effect_free(Base.infer_effects(getfield, (Complex{Int}, Symbol)))
+
+# We consider a potential deprecatio warning an effect, so for completely unknown getglobal,
+# we taint the effect_free bit.
+@test !Compiler.is_effect_free(Base.infer_effects(getglobal, (Module, Symbol)))
 
 # Test that UseRefIterator gets SROA'd inside of new_to_regular (#44557)
 # expression and new_to_regular offset are arbitrary here, we just want to see the UseRefIterator erased
 let e = Expr(:call, Core.GlobalRef(Base, :arrayset), false, Core.SSAValue(4), Core.SSAValue(9), Core.SSAValue(8))
-    new_to_reg(expr) = Core.Compiler.new_to_regular(expr, 1)
+    new_to_reg(expr) = Compiler.new_to_regular(expr, 1)
     @allocated new_to_reg(e) # warmup call
     @test (@allocated new_to_reg(e)) == 0
 end
@@ -1371,3 +1317,821 @@ struct TParamTypeofTest2{S,T}
 end
 tparam_typeof_test_elim2(x, y) = TParamTypeofTest2(x, y).x
 @test fully_eliminated(tparam_typeof_test_elim2, Tuple{Any,Any})
+
+# Test that sroa doesn't get confused by free type parameters in struct types
+struct Wrap1{T}
+    x::T
+    @eval @inline (T::Type{Wrap1{X}} where X)(x) = $(Expr(:new, :T, :x))
+end
+Wrap1(x) = Wrap1{typeof(x)}(x)
+
+function wrap1_wrap1_ifelse(b, x, w1)
+    w2 = Wrap1(Wrap1(x))
+    w3 = Wrap1(typeof(w1)(w1.x))
+    Core.ifelse(b, w3, w2).x.x
+end
+function wrap1_wrap1_wrapper(b, x, y)
+    w1 = Base.inferencebarrier(Wrap1(y))::Wrap1{<:Union{Int, Float64}}
+    wrap1_wrap1_ifelse(b, x, w1)
+end
+@test wrap1_wrap1_wrapper(true, 1, 1.0) === 1.0
+@test wrap1_wrap1_wrapper(false, 1, 1.0) === 1
+
+# Test unswitching-union optimization within SRO Apass
+function sroaunswitchuniontuple(c, x1, x2)
+    t = c ? (x1,) : (x2,)
+    return getfield(t, 1)
+end
+struct SROAUnswitchUnion1{T}
+    x::T
+end
+struct SROAUnswitchUnion2{S,T}
+    x::T
+    @inline SROAUnswitchUnion2{S}(x::T) where {S,T} = new{S,T}(x)
+end
+function sroaunswitchunionstruct1(c, x1, x2)
+    x = c ? SROAUnswitchUnion1(x1) : SROAUnswitchUnion1(x2)
+    return getfield(x, :x)
+end
+function sroaunswitchunionstruct2(c, x1, x2)
+    x = c ? SROAUnswitchUnion2{:a}(x1) : SROAUnswitchUnion2{:a}(x2)
+    return getfield(x, :x)
+end
+let src = code_typed1(sroaunswitchuniontuple, Tuple{Bool, Int, Float64})
+    @test count(isnew, src.code) == 0
+    @test count(iscall((src, getfield)), src.code) == 0
+end
+let src = code_typed1(sroaunswitchunionstruct1, Tuple{Bool, Int, Float64})
+    @test count(isnew, src.code) == 0
+    @test count(iscall((src, getfield)), src.code) == 0
+end
+@test sroaunswitchunionstruct2(true, 1, 1.0) === 1
+@test sroaunswitchunionstruct2(false, 1, 1.0) === 1.0
+
+# Test SROA of union into getfield
+struct SingleFieldStruct1
+    x::Int
+end
+struct SingleFieldStruct2
+    x::Int
+end
+function foo(b, x)
+    if b
+        f = SingleFieldStruct1(x)
+    else
+        f = SingleFieldStruct2(x)
+    end
+    getfield(f, :x) + 1
+end
+@test foo(true, 1) == 2
+
+# ifelse folding
+@test Compiler.is_removable_if_unused(Base.infer_effects(exp, (Float64,)))
+@test !Compiler.is_inlineable(code_typed1(exp, (Float64,)))
+@test fully_eliminated(; retval=Core.Argument(2)) do x::Float64
+    return Core.ifelse(true, x, exp(x))
+end
+@test fully_eliminated(; retval=Core.Argument(2)) do x::Float64
+    return ifelse(true, x, exp(x)) # the optimization should be applied to post-inlining IR too
+end
+@test fully_eliminated(; retval=Core.Argument(2)) do x::Float64
+    return ifelse(isa(x, Float64), x, exp(x))
+end
+func_coreifelse(c, x) = Core.ifelse(c, x, x)
+func_ifelse(c, x) = ifelse(c, x, x)
+@test fully_eliminated(func_coreifelse, (Bool,Float64); retval=Core.Argument(3))
+@test !fully_eliminated(func_coreifelse, (Any,Float64))
+@test fully_eliminated(func_ifelse, (Bool,Float64); retval=Core.Argument(3))
+@test !fully_eliminated(func_ifelse, (Any,Float64))
+
+# PhiC fixup of compact! with cfg modification
+@inline function big_dead_throw_catch()
+    x = 1
+    try
+        x = 2
+        if Ref{Bool}(false)[]
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            x = 3
+        end
+    catch
+        return x
+    end
+end
+
+function call_big_dead_throw_catch()
+    if Ref{Bool}(false)[]
+        return big_dead_throw_catch()
+    end
+    return 4
+end
+
+# Issue #51159 - Unreachable reached in try-catch block
+function f_with_early_try_catch_exit()
+    result = false
+    for i in 3
+        x = try
+        catch
+            # This introduces an early Expr(:leave) that we must respect when building
+            # φᶜ-nodes in slot2ssa. In particular, we have to ignore the `result = x`
+            # assignment that occurs outside of this try-catch block
+            continue
+        end
+        result = x
+    end
+    result
+end
+
+let ir = first(only(Base.code_ircode(f_with_early_try_catch_exit, (); optimize_until="CC: SLOT2REG")))
+    for i = 1:length(ir.stmts)
+        expr = ir.stmts[i][:stmt]
+        if isa(expr, PhiCNode)
+            # The φᶜ should only observe the value of `result` at the try-catch :enter
+            # (from the `result = false` assignment), since `result = x` assignment is
+            # dominated by an Expr(:leave).
+            @test length(expr.values) == 1
+        end
+    end
+end
+
+@test isnothing(f_with_early_try_catch_exit())
+
+# Issue #51144 - UndefRefError during compaction
+let code = Any[
+        # block 1  → 2, 3
+        #=  %1: =# Expr(:(=), Core.SlotNumber(4), Core.Argument(2)),
+        #=  %2: =# Expr(:call, :(===), Core.SlotNumber(4), nothing),
+        #=  %3: =# GotoIfNot(Core.SSAValue(1), 5),
+        # block 2
+        #=  %4: =# ReturnNode(nothing),
+        # block 3  → 4, 5
+        #=  %5: =# Expr(:(=), Core.SlotNumber(4), false),
+        #=  %6: =# GotoIfNot(Core.Argument(2), 8),
+        # block 4  → 5
+        #=  %7: =# Expr(:(=), Core.SlotNumber(4), true),
+        # block 5
+        #=  %8: =# ReturnNode(nothing), # Must not insert a π-node here
+    ]
+    slottypes = Any[Any, Union{Bool, Nothing}, Bool, Union{Bool, Nothing}]
+    src = make_codeinfo(code; slottypes)
+
+    mi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
+    mi.specTypes = Tuple{}
+    mi.def = Module()
+
+    # Simulate the important results from inference
+    interp = Compiler.NativeInterpreter()
+    sv = Compiler.OptimizationState(mi, src, interp)
+    slot_id = 4
+    for block_id = 3:5
+        # (_4 !== nothing) conditional narrows the type, triggering PiNodes
+        sv.bb_vartables[block_id][slot_id] = VarState(Bool, #= maybe_undef =# false)
+    end
+
+    ir = Compiler.convert_to_ircode(src, sv)
+    ir = Compiler.slot2reg(ir, src, sv)
+    ir = Compiler.compact!(ir)
+
+    Compiler.verify_ir(ir)
+end
+
+function f_with_merge_to_entry_block()
+    while true
+        i = @noinline rand(Int)
+        if @noinline isodd(i)
+            return i
+        end
+    end
+end
+
+let (ir, _) = only(Base.code_ircode(f_with_merge_to_entry_block))
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+end
+
+# Test that CFG simplify doesn't leave an un-renamed SSA Value
+let # Test that CFG simplify doesn't try to merge every block in a loop into
+    # its predecessor
+    code = Any[
+        # Block 1
+        GotoIfNot(Argument(1), 3),
+        # Block 2
+        GotoNode(5),
+        # Block 3
+        Expr(:call, Base.inferencebarrier, 1),
+        GotoNode(6),
+        # Block 4
+        Expr(:call, Base.inferencebarrier, 2), # fallthrough
+        # Block 5
+        PhiNode(Int32[4, 5], Any[SSAValue(3), SSAValue(5)]),
+        ReturnNode(1)
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 4
+end
+
+# JET.test_opt(Compiler.cfg_simplify!, (Compiler.IRCode,))
+
+# Test support for Core.OptimizedGenerics.KeyValue protocol
+function persistent_dict_elim()
+    a = Base.PersistentDict(:a => 1)
+    return a[:a]
+end
+
+# Ideally we would be able to fully eliminate this,
+# but currently this would require an extra round of constprop
+@test_broken fully_eliminated(persistent_dict_elim)
+@test code_typed(persistent_dict_elim)[1][1].code[end] == Core.ReturnNode(1)
+
+function persistent_dict_elim_multiple()
+    a = Base.PersistentDict(:a => 1)
+    b = Base.PersistentDict(a, :b => 2)
+    return b[:a]
+end
+@test_broken fully_eliminated(persistent_dict_elim_multiple)
+let code = code_typed(persistent_dict_elim_multiple)[1][1].code
+    @test count(x->isexpr(x, :invoke), code) == 0
+    @test code[end] == Core.ReturnNode(1)
+end
+
+function persistent_dict_elim_multiple_phi(c::Bool)
+    if c
+        a = Base.PersistentDict(:a => 1)
+    else
+        a = Base.PersistentDict(:a => 1)
+    end
+    b = Base.PersistentDict(a, :b => 2)
+    return b[:a]
+end
+@test_broken fully_eliminated(persistent_dict_elim_multiple_phi)
+@test code_typed(persistent_dict_elim_multiple_phi)[1][1].code[end] == Core.ReturnNode(1)
+
+function persistent_dict_elim_multiple_phi2(c::Bool)
+    z = Base.inferencebarrier(1)::Int
+    if c
+        a = Base.PersistentDict(:a => z)
+    else
+        a = Base.PersistentDict(:a => z)
+    end
+    b = Base.PersistentDict(a, :b => 2)
+    return b[:a]
+end
+@test persistent_dict_elim_multiple_phi2(true) == 1
+
+# Test CFG simplify with try/catch blocks
+let code = Any[
+        # Block 1
+        GotoIfNot(Argument(1), 5),
+        # Block 2
+        EnterNode(4),
+        # Block 3
+        Expr(:leave, SSAValue(2)),
+        # Block 4
+        GotoNode(5),
+        # Block 5
+        ReturnNode(1)
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) <= 5
+end
+
+# Test CFG simplify with single predecessor phi node
+let code = Any[
+        # Block 1
+        GotoNode(3),
+        # Block 2
+        nothing,
+        # Block 3
+        Expr(:call, Base.inferencebarrier, 1),
+        GotoNode(5),
+        # Block 4
+        PhiNode(Int32[4], Any[SSAValue(3)]),
+        ReturnNode(SSAValue(5))
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) <= 2
+    ir = Compiler.compact!(ir)
+    @test length(ir.stmts) <= 3
+    @test (ir[SSAValue(length(ir.stmts))][:stmt]::ReturnNode).val !== nothing
+end
+
+let code = Any[
+    Expr(:call, Base.inferencebarrier, Argument(1)), # ::Bool
+    Expr(:call, Core.tuple, 1), # ::Tuple{Int}
+    Expr(:call, Core.tuple, 1.0), # ::Tuple{Float64}
+    Expr(:call, Core.ifelse, SSAValue(1), SSAValue(2), SSAValue(3)), # ::Tuple{Int} (e.g. from inlining)
+    Expr(:call, Core.getfield, SSAValue(4), 1), # ::Int
+    ReturnNode(SSAValue(5))
+]
+    try
+        argtypes = Any[Bool]
+        ssavaluetypes = Any[Bool, Tuple{Int}, Tuple{Float64}, Tuple{Int}, Int, Any]
+        ir = make_ircode(code; slottypes=argtypes, ssavaluetypes, verify=true)
+        Compiler.__set_check_ssa_counts(true)
+        ir = Compiler.sroa_pass!(ir)
+        Compiler.verify_ir(ir)
+    finally
+        Compiler.__set_check_ssa_counts(false)
+    end
+end
+
+# Test SROA all_same on NewNode
+let code = Any[
+    # Block 1
+    Expr(:call, tuple, Argument(1)),
+    GotoIfNot(Argument(4), 5),
+    # Block 2
+    Expr(:call, tuple, Argument(2)),
+    GotoIfNot(Argument(4), 9),
+    # Block 3
+    PhiNode(Int32[2, 4], Any[SSAValue(1), SSAValue(3)]),
+    Expr(:call, getfield, SSAValue(5), 1),
+    Expr(:call, tuple, SSAValue(6), Argument(2)), # ::Tuple{Int, Int}
+    Expr(:call, tuple, SSAValue(7), Argument(3)), # ::Tuple{Tuple{Int, Int}, Int}
+    # Block 4
+    PhiNode(Int32[4, 8], Any[nothing, SSAValue(8)]),
+    Expr(:call, Core.Intrinsics.not_int, Argument(4)),
+    GotoIfNot(SSAValue(10), 13),
+    # Block 5
+    ReturnNode(1),
+    # Block 6
+    PiNode(SSAValue(9), Tuple{Tuple{Int, Int}, Int}),
+    Expr(:call, getfield, SSAValue(13), 1),
+    Expr(:call, getfield, SSAValue(14), 1),
+    ReturnNode(SSAValue(15))
+]
+
+    argtypes = Any[Int, Int, Int, Bool]
+    ssavaluetypes = Any[Tuple{Int}, Any, Tuple{Int}, Any, Tuple{Int}, Int, Tuple{Int, Int}, Tuple{Tuple{Int, Int}, Int},
+                        Union{Nothing, Tuple{Tuple{Int, Int}, Int}}, Bool, Any, Any,
+                        Tuple{Tuple{Int, Int}, Int},
+                        Tuple{Int, Int}, Int, Any]
+    ir = make_ircode(code; slottypes=argtypes, ssavaluetypes, verify=true)
+    ir = Compiler.sroa_pass!(ir)
+    Compiler.verify_ir(ir)
+    ir = Compiler.compact!(ir)
+    Compiler.verify_ir(ir)
+end
+
+# Test correctness of current_scope folding
+@eval function scope_folding()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), 2),
+            :(return Core.current_scope())),
+    :(), 1))
+end
+
+@eval function scope_folding_opt()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), :(Base.inferencebarrier(2))),
+            :(return Core.current_scope())),
+    :(), :(Base.inferencebarrier(1))))
+end
+
+@test scope_folding() == 1
+@test scope_folding_opt() == 1
+@test_broken fully_eliminated(scope_folding)
+@test_broken fully_eliminated(scope_folding_opt)
+let ir = first(only(Base.code_ircode(scope_folding, ())))
+    @test Compiler.compute_trycatch(ir) isa Compiler.HandlerInfo
+end
+let ir = first(only(Base.code_ircode(scope_folding_opt, ())))
+    @test Compiler.compute_trycatch(ir) isa Compiler.HandlerInfo
+end
+
+# Function that happened to have lots of sroa that
+# happened to trigger a bad case in the renamer. We
+# just want to check this doesn't crash in inference.
+function f52610()
+    slots_dict = IdDict()
+    for () in Base.inferencebarrier(1)
+       for x in 1
+           if Base.inferencebarrier(true)
+               slots_dict[x] = 0
+           end
+       end
+    end
+    return nothing
+end
+@test code_typed(f52610)[1][2] === Nothing
+
+# Issue #52703
+@eval function f52703()
+    try
+        $(Expr(:tryfinally,
+            Expr(:block,
+                Expr(:tryfinally, :(), :(), 2),
+                :(return Base.inferencebarrier(Core.current_scope)()::Int)),
+        :(), 1))
+    catch
+        return 1
+    end
+    return 0
+end
+@test code_typed(f52703)[1][2] === Int
+
+# Issue #52858 - compaction gets confused by pending node
+let code = Any[
+    # Block 1
+    GotoIfNot(true, 6),
+    # Block 2
+    Expr(:call, println, 1),
+    Expr(:call, Base.inferencebarrier, true),
+    GotoIfNot(SSAValue(3), 6),
+    # Block 3
+    nothing,
+    # Block 4
+    PhiNode(Int32[1, 4, 5], Any[1, 2, 3]),
+    ReturnNode(SSAValue(6))
+]
+    ir = make_ircode(code)
+    Compiler.insert_node!(ir, SSAValue(5),
+        Compiler.NewInstruction(
+            Expr(:call, println, 2), Nothing, Int32(1)),
+            #= attach_after = =# true)
+    ir = Compiler.compact!(ir, true)
+    @test Compiler.verify_ir(ir) === nothing
+    @test count(x->isa(x, GotoIfNot), ir.stmts.stmt) == 1
+end
+
+# Issue #52857 - Affinity of sroa definedness check
+let code = Any[
+    Expr(:new, ImmutableRef{Any}),
+    GotoIfNot(Argument(1), 4),
+    Expr(:call, GlobalRef(Base, :getfield), SSAValue(1), 1), # Will throw
+    ReturnNode(1)
+]
+    ir = make_ircode(code; ssavaluetypes = Any[ImmutableRef{Any}, Any, Any, Any], slottypes=Any[Bool], verify=true)
+    ir = Compiler.sroa_pass!(ir)
+    @test Compiler.verify_ir(ir) === nothing
+    @test !any(iscall((ir, getfield)), ir.stmts.stmt)
+    @test length(ir.cfg.blocks[end].stmts) == 1
+end
+
+# https://github.com/JuliaLang/julia/issues/47065
+# `Compiler.sort!` should be able to handle a big list
+let n = 1000
+    ex = :(return 1)
+    for _ in 1:n
+        ex = :(rand() < .1 && $(ex))
+    end
+    @eval global function f_1000_blocks()
+        $ex
+        return 0
+    end
+end
+@test f_1000_blocks() == 0
+
+# https://github.com/JuliaLang/julia/issues/53521
+# Incorrect scope counting in :leave
+using Base.ScopedValues
+function f53521()
+    VALUE = ScopedValue(1)
+    @with VALUE => 2 begin
+        for i = 1
+            @with VALUE => 3 begin
+                try
+                    foo()
+                catch
+                    nothing
+                end
+            end
+        end
+    end
+end
+let (ir,rt) = only(Base.code_ircode(f53521, ()))
+    @test rt == Nothing
+    Compiler.verify_ir(ir)
+    Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+end
+
+Base.@assume_effects :foldable Base.@constprop :aggressive function f53521(x::Int, ::Int)
+    VALUE = ScopedValue(x)
+    @with VALUE => 2 begin
+        for i = 1
+            @with VALUE => 3 begin
+                local v
+                try
+                    v = sin(VALUE[])
+                catch
+                    v = nothing
+                end
+                return v
+            end
+        end
+    end
+end
+let (ir,rt) = only(Base.code_ircode((Int,)) do y
+        f53521(1, y)
+    end)
+    @test rt == Union{Nothing,Float64}
+end
+
+# Test that adce_pass! sets Refined on PhiNode values
+let code = Any[
+    # Basic Block 1
+    GotoIfNot(false, 3)
+    # Basic Block 2
+    nothing
+    # Basic Block 3
+    PhiNode(Int32[1, 2], Any[1.0, 1])
+    ReturnNode(Core.SSAValue(3))
+]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Nothing, Union{Int64, Float64}, Any])
+    (ir, made_changes) = Compiler.adce_pass!(ir)
+    @test made_changes
+    @test (ir[Core.SSAValue(length(ir.stmts))][:flag] & Compiler.IR_FLAG_REFINED) != 0
+end
+
+# JuliaLang/julia#52991: statements that may not :terminate should not be deleted
+@noinline Base.@assume_effects :effect_free :nothrow function issue52991(n)
+    local s = 0
+    try
+        while true
+            yield()
+            if n - rand(1:10) > 0
+                s += 1
+            else
+                break
+            end
+        end
+    catch
+    end
+    return s
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(issue52991, (Int,)))
+let src = code_typed1((Int,)) do x
+        issue52991(x)
+        nothing
+    end
+    @test count(isinvoke(:issue52991), src.code) == 1
+end
+let t = @async begin
+        issue52991(11) # this call never terminates
+        nothing
+    end
+    sleep(1)
+    if istaskdone(t)
+        ok = false
+    else
+        ok = true
+        schedule(t, InterruptException(); error=true)
+    end
+    @test ok
+end
+
+# JuliaLang/julia47664
+@test !fully_eliminated() do
+    any(isone, Iterators.repeated(0))
+end
+@test !fully_eliminated() do
+    all(iszero, Iterators.repeated(0))
+end
+
+## Test that cfg_simplify respects implicit `unreachable` terminators
+let code = Any[
+        # block 1
+        GotoIfNot(Core.Argument(2), 4),
+        # block 2
+        Expr(:call, Base.throw, "error"), # an implicit `unreachable` terminator
+        # block 3
+        Expr(:call, :opaque),
+        # block 4
+        ReturnNode(nothing),
+    ]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Union{}, Any, Union{}])
+
+    # Unfortunately `compute_basic_blocks` does not notice the `throw()` so it gives us
+    # a slightly imprecise CFG. Instead manually construct the CFG we need for this test:
+    empty!(ir.cfg.blocks)
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(1,1), [], [2,4]))
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(2,2), [1], []))
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(3,3), [], []))
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(4,4), [1], []))
+    empty!(ir.cfg.index)
+    append!(ir.cfg.index, Int[2,3,4])
+    ir.stmts.stmt[1] = GotoIfNot(Core.Argument(2), 4)
+
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 3 # should have removed block 3
+end
+
+let code = Any[
+        # block 1
+        EnterNode(4, 1),
+        # block 2
+        GotoNode(3), # will be turned into nothing
+        # block 3
+        GotoNode(5),
+        # block 4
+        ReturnNode(),
+        # block 5
+        Expr(:leave, SSAValue(1)),
+        # block 6
+        GotoIfNot(Core.Argument(1), 8),
+        # block 7
+        ReturnNode(1),
+        # block 8
+        ReturnNode(2),
+    ]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Any, Any, Any, Any, Any, Union{}, Union{}], verify=true)
+    @test length(ir.cfg.blocks) == 8
+
+    # Union typed deletion marker in basic block 2
+    Compiler.setindex!(ir, nothing, SSAValue(2))
+
+    # Test cfg_simplify
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 6
+    gotoifnot = Compiler.last(ir.cfg.blocks[3].stmts)
+    inst = ir[SSAValue(gotoifnot)]
+    @test isa(inst[:stmt], GotoIfNot)
+    # Make sure we didn't accidentally schedule the unreachable block as
+    # fallthrough
+    @test isdefined(ir[SSAValue(gotoifnot+1)][:inst]::ReturnNode, :val)
+end
+
+# Make sure that PhiNode values containing forward references are eventually updated.
+let code = Any[
+             # block 1
+    #= %1 =# Argument(2),
+    #= %2 =# GotoNode(4),
+             # block 2
+    #= %3 =# GotoNode(4), # will be removed, shifting SSA indices by 1
+             # block 3
+    #= %4 =# PhiNode(Int32[1, 9, 13], Any[SSAValue(1), SSAValue(6), SSAValue(6)]),
+    #= %5 =# GotoNode(6),
+             # block 4
+    #= %6 =# Expr(:call, :add_int, Argument(2), 1),
+    #= %7 =# GotoIfNot(Argument(3), 9),
+             # block 5
+    #= %8 =# ReturnNode(Argument(3)),
+             # block 6
+    #= %9 =# GotoIfNot(Argument(3), 4),
+             # block 7
+    #= %10=# GotoIfNot(Argument(3), 12),
+             # block 8
+    #= %11=# GotoNode(13),
+             # block 9
+    #= %12=# GotoNode(13),
+             # block 10
+    #= %13=# GotoNode(4),
+    ]
+    ssavaluetypes = Any[Int64, Any, Any, Int64, Any, Int64, Any, Int64, Any, Any, Any, Any, Any]
+    slottypes = Any[Any, Int, Bool]
+    ir = make_ircode(code; ssavaluetypes, slottypes, verify=true)
+    @test length(ir.cfg.blocks) == 10
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 6
+    phistmt = ir.cfg.blocks[2].stmts[1]
+    phinode = ir[SSAValue(phistmt)][:stmt]
+    @test isa(phinode, PhiNode)
+    @test phinode.values[2] == phinode.values[3] == SSAValue(5)
+end
+
+# https://github.com/JuliaLang/julia/issues/54596
+# finalized object's uses have no postdominator
+let f = (x)->nothing, mi = Base.method_instance(f, (Base.RefValue{Nothing},)), code = Any[
+   # Basic Block 1
+   Expr(:new, Base.RefValue{Nothing}, nothing)
+   Expr(:call, Core.finalizer, f, SSAValue(1), true, mi)
+   GotoIfNot(false, 6)
+   # Basic Block 2
+   Expr(:call, Base.getfield, SSAValue(1), :x)
+   ReturnNode(SSAValue(4))
+   # Basic Block 3
+   Expr(:call, Base.getfield, SSAValue(1), :x)
+   ReturnNode(SSAValue(6))
+]
+   ir = make_ircode(code; ssavaluetypes=Any[Base.RefValue{Nothing}, Nothing, Any, Nothing, Any, Nothing, Any], verify=true)
+   inlining = Compiler.InliningState(Compiler.NativeInterpreter())
+   ir = Compiler.sroa_pass!(ir, inlining)
+   Compiler.verify_ir(ir)
+end
+
+let code = Any[
+        # block 1
+        GotoNode(4), # skip
+        # block 2
+        Expr(:leave, SSAValue(1)), # not domsorted - make sure we move it correctly
+        # block 3
+        ReturnNode(2),
+        # block 4
+        EnterNode(7),
+        # block 5
+        GotoIfNot(Argument(1), 2),
+        # block 6
+        Expr(:leave, SSAValue(1)),
+        # block 7
+        ReturnNode(1),
+        # block 8
+        ReturnNode(nothing),
+    ]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Any, Union{}, Any, Any, Any, Union{}, Union{}], verify=true)
+    @test length(ir.cfg.blocks) == 8
+
+    # The IR should remain valid after domsorting
+    # (esp. including the insertion of new BasicBlocks for any fix-ups)
+    domtree = Compiler.construct_domtree(ir)
+    ir = Compiler.domsort_ssa!(ir, domtree)
+    Compiler.verify_ir(ir)
+end
+
+# https://github.com/JuliaLang/julia/issues/57141
+# don't eliminate `setfield!` when the field is to be used
+let src = code_typed1(()) do
+        ref = Ref{Any}()
+        ref[] = 0
+        @assert isdefined(ref, :x)
+        inner() = ref[] + 1
+        (inner(), ref[])
+    end
+    @test count(iscall((src, setfield!)), src.code) == 1
+end
+
+module _Partials_irpasses
+    mutable struct Partial
+        x::String
+        y::Integer
+        z::Any
+        Partial() = new()
+    end
+end
+
+# once `isdefined(p, name)` holds, this information should be kept
+# as a `PartialStruct` over `p` for subsequent constant propagation.
+let src = code_typed1(()) do
+        p = _Partials_irpasses.Partial()
+        invokelatest(identity, p)
+        isdefined(p, :z) && isdefined(p, :x) || return nothing
+        isdefined(p, :x) & isdefined(p, :z)
+    end
+    @test count(iscall((src, isdefined)), src.code) == 2
+end
+
+# optimize `isdefined` away in the presence of a dominating `setfield!`
+let src = code_typed1(()) do
+        a = Ref{Any}()
+        setfield!(a, :x, 2)
+        invokelatest(identity, a)
+        isdefined(a, :x) && return 1.0
+        a[]
+    end
+    @test count(iscall((src, isdefined)), src.code) == 0
+end
+# We should successfully fold the default values of a ScopedValue
+const svalconstprop = ScopedValue(1)
+foosvalconstprop() = svalconstprop[]
+
+let src = code_typed1(foosvalconstprop, ())
+    function is_constfield_load(expr)
+        iscall((src, getfield))(expr) && expr.args[3] in (:(:has_default), :(:default))
+    end
+    @test count(is_constfield_load, src.code) == 0
+end
+
+# JuliaLang/julia #59548
+# Rewrite `Core._apply_iterate` to use `Core.svec` instead of `tuple` to better match
+# the codegen ABI
+let src = code_typed1((Vector{Any},)) do xs
+        println(stdout, xs...)
+    end
+    @test count(iscall((src, Core.svec)), src.code) == 1
+end
+let src = code_typed1((Vector{Any},)) do xs
+        println(stdout, 1, xs...) # convert tuples represented by `PartialStruct`
+    end
+    @test count(iscall((src, Core.svec)), src.code) == 1
+end
diff --git a/Compiler/test/irutils.jl b/Compiler/test/irutils.jl
new file mode 100644
index 0000000000000..e50491420c338
--- /dev/null
+++ b/Compiler/test/irutils.jl
@@ -0,0 +1,103 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+include("setup_Compiler.jl")
+
+using Core.IR
+using .Compiler: IRCode, IncrementalCompact, singleton_type, VarState
+using Base.Meta: isexpr
+using InteractiveUtils: gen_call_with_extracted_types_and_kwargs
+
+argextype(@nospecialize args...) = Compiler.argextype(args..., VarState[])
+code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::CodeInfo
+macro code_typed1(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :code_typed1, ex0)
+end
+get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
+macro get_code(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :get_code, ex0; is_source_reflection = false)
+end
+
+# check if `x` is a statement with a given `head`
+isnew(@nospecialize x) = isexpr(x, :new)
+issplatnew(@nospecialize x) = isexpr(x, :splatnew)
+isreturn(@nospecialize x) = isa(x, ReturnNode) && isdefined(x, :val)
+isisdefined(@nospecialize x) = isexpr(x, :isdefined)
+
+# check if `x` is a dynamic call of a given function
+iscall(y) = @nospecialize(x) -> iscall(y, x)
+function iscall((src, f)::Tuple{IR,Base.Callable}, @nospecialize(x)) where IR<:Union{CodeInfo,IRCode,IncrementalCompact}
+    return iscall(x) do @nospecialize x
+        singleton_type(argextype(x, src)) === f
+    end
+end
+function iscall(pred::Base.Callable, @nospecialize(x))
+    if isexpr(x, :(=))
+        x = x.args[2]
+    end
+    return isexpr(x, :call) && pred(x.args[1])
+end
+
+# check if `x` is a statically-resolved call of a function whose name is `sym`
+isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
+isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
+isinvoke(pred::Function, @nospecialize(x)) = isexpr(x, :invoke) && pred((x.args[1]::CodeInstance).def)
+
+fully_eliminated(@nospecialize args...; retval=(@__FILE__), kwargs...) =
+    fully_eliminated(code_typed1(args...; kwargs...); retval)
+fully_eliminated(src::CodeInfo; retval=(@__FILE__)) = fully_eliminated(src.code; retval)
+fully_eliminated(ir::IRCode; retval=(@__FILE__)) = fully_eliminated(ir.stmts.stmt; retval)
+function fully_eliminated(code::Vector{Any}; retval=(@__FILE__), kwargs...)
+    length(code) == 1 || return false
+    retstmt = only(code)
+    isreturn(retstmt) || return false
+    retval === (@__FILE__) && return true
+    retval′ = retstmt.val
+    if retval′ isa QuoteNode
+        retval′ = retval′.value
+    end
+    return retval′ == retval
+end
+macro fully_eliminated(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :fully_eliminated, ex0; is_source_reflection = false)
+end
+
+let m = Meta.@lower 1 + 1
+    @assert isexpr(m, :thunk)
+    orig_src = m.args[1]::CodeInfo
+    global function make_codeinfo(code::Vector{Any};
+                                  ssavaluetypes::Union{Nothing,Vector{Any}}=nothing,
+                                  slottypes::Union{Nothing,Vector{Any}}=nothing,
+                                  slotnames::Union{Nothing,Vector{Symbol}}=nothing)
+        src = copy(orig_src)
+        src.code = code
+        nstmts = length(src.code)
+        if ssavaluetypes === nothing
+            src.ssavaluetypes = nstmts
+        else
+            src.ssavaluetypes = ssavaluetypes
+        end
+        src.debuginfo = Core.DebugInfo(:none)
+        src.ssaflags = fill(zero(UInt32), nstmts)
+        if slottypes !== nothing
+            src.slottypes = slottypes
+            src.slotflags = fill(zero(UInt8), length(slottypes))
+        end
+        if slotnames !== nothing
+            src.slotnames = slotnames
+        end
+        return src
+    end
+    global function make_ircode(code::Vector{Any};
+                                slottypes::Union{Nothing,Vector{Any}}=nothing,
+                                verify::Bool=true,
+                                kwargs...)
+        src = make_codeinfo(code; slottypes, kwargs...)
+        if slottypes !== nothing
+            ir = Compiler.inflate_ir(src, slottypes)
+        else
+            ir = Compiler.inflate_ir(src)
+        end
+        verify && Compiler.verify_ir(ir)
+        return ir
+    end
+end
diff --git a/Compiler/test/newinterp.jl b/Compiler/test/newinterp.jl
new file mode 100644
index 0000000000000..964912e8a6013
--- /dev/null
+++ b/Compiler/test/newinterp.jl
@@ -0,0 +1,64 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# TODO set up a version who defines new interpreter with persistent cache?
+
+include("setup_Compiler.jl")
+
+"""
+    @newinterp NewInterpreter [ephemeral_cache::Bool=false]
+
+Defines new `NewInterpreter <: AbstractInterpreter` whose cache is separated
+from the native code cache, satisfying the minimum interface requirements.
+
+When the `ephemeral_cache=true` option is specified, `NewInterpreter` will hold
+`CodeInstance` in an ephemeral non-integrated cache, rather than in the integrated
+`Compiler.InternalCodeCache`.
+Keep in mind that ephemeral cache lacks support for invalidation and doesn't persist across
+sessions. However it is an usual Julia object of the type `global_cache::IdDict{MethodInstance,CodeInstance}`,
+making it easier for debugging and inspecting the compiler behavior.
+"""
+macro newinterp(InterpName, ephemeral_cache::Bool=false)
+    InterpCacheName = esc(Symbol(string(InterpName, "Cache")))
+    InterpName = esc(InterpName)
+    C = Core
+    quote
+        $(ephemeral_cache && quote
+        struct $InterpCacheName
+            dict::IdDict{$C.MethodInstance,$C.CodeInstance}
+        end
+        $InterpCacheName() = $InterpCacheName(IdDict{$C.MethodInstance,$C.CodeInstance}())
+        end)
+        struct $InterpName <: $Compiler.AbstractInterpreter
+            meta # additional information
+            world::UInt
+            inf_params::$Compiler.InferenceParams
+            opt_params::$Compiler.OptimizationParams
+            inf_cache::Vector{$Compiler.InferenceResult}
+            $(ephemeral_cache && :(global_cache::$InterpCacheName))
+            function $InterpName(meta = nothing;
+                                 world::UInt = Base.get_world_counter(),
+                                 inf_params::$Compiler.InferenceParams = $Compiler.InferenceParams(),
+                                 opt_params::$Compiler.OptimizationParams = $Compiler.OptimizationParams(),
+                                 inf_cache::Vector{$Compiler.InferenceResult} = $Compiler.InferenceResult[],
+                                 $(ephemeral_cache ?
+                                    Expr(:kw, :(global_cache::$InterpCacheName), :($InterpCacheName())) :
+                                    Expr(:kw, :_, :nothing)))
+                return $(ephemeral_cache ?
+                    :(new(meta, world, inf_params, opt_params, inf_cache, global_cache)) :
+                    :(new(meta, world, inf_params, opt_params, inf_cache)))
+            end
+        end
+        $Compiler.InferenceParams(interp::$InterpName) = interp.inf_params
+        $Compiler.OptimizationParams(interp::$InterpName) = interp.opt_params
+        $Compiler.get_inference_world(interp::$InterpName) = interp.world
+        $Compiler.get_inference_cache(interp::$InterpName) = interp.inf_cache
+        $Compiler.cache_owner(::$InterpName) = $InterpName
+        $(ephemeral_cache && quote
+        $Compiler.code_cache(interp::$InterpName) = $Compiler.OverlayCodeCache(interp.global_cache, interp.inf_cache)
+        $Compiler.get(cache::$InterpCacheName, mi::$C.MethodInstance, default) = get(cache.dict, mi, default)
+        $Compiler.getindex(cache::$InterpCacheName, mi::$C.MethodInstance) = getindex(cache.dict, mi)
+        $Compiler.haskey(cache::$InterpCacheName, mi::$C.MethodInstance) = haskey(cache.dict, mi)
+        $Compiler.setindex!(cache::$InterpCacheName, ci::$C.CodeInstance, mi::$C.MethodInstance) = setindex!(cache.dict, ci, mi)
+        end)
+    end
+end
diff --git a/Compiler/test/runtests.jl b/Compiler/test/runtests.jl
new file mode 100644
index 0000000000000..8a35d8e71102f
--- /dev/null
+++ b/Compiler/test/runtests.jl
@@ -0,0 +1,15 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+Base.runtests(["Compiler"]; propagate_project=true)
+
+#=
+# To run serially
+using Test, Compiler
+@testset "Compiler.jl" begin
+    for file in readlines(joinpath(@__DIR__, "testgroups"))
+        file == "special_loading" && continue # Only applicable to Base.Compiler
+        testfile = file * ".jl"
+        @eval @testset $testfile include($testfile)
+    end
+end
+=#
diff --git a/Compiler/test/setup_Compiler.jl b/Compiler/test/setup_Compiler.jl
new file mode 100644
index 0000000000000..83cc42e2d0936
--- /dev/null
+++ b/Compiler/test/setup_Compiler.jl
@@ -0,0 +1,16 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using InteractiveUtils: @activate
+
+if Base.identify_package("Compiler") !== nothing && !isdefined(Main, :__custom_compiler_active)
+    Base.eval(Main, :(__custom_compiler_active=true))
+    @activate Compiler
+end
+
+if !@isdefined(Compiler)
+    if Base.REFLECTION_COMPILER[] === nothing
+        using Base.Compiler: Compiler
+    else
+        const Compiler = Base.REFLECTION_COMPILER[]
+    end
+end
diff --git a/Compiler/test/special_loading.jl b/Compiler/test/special_loading.jl
new file mode 100644
index 0000000000000..ca29618a44d17
--- /dev/null
+++ b/Compiler/test/special_loading.jl
@@ -0,0 +1,13 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Only run when testing Base compiler
+if Base.identify_package("Compiler") === nothing
+    include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test", "tempdepot.jl"))
+    mkdepottempdir() do dir
+        withenv("JULIA_DEPOT_PATH" => dir * (Sys.iswindows() ? ";" : ":"), "JULIA_LOAD_PATH" => nothing) do
+            cd(joinpath(@__DIR__, "CompilerLoadingTest")) do
+                @test success(pipeline(`$(Base.julia_cmd()[1]) --startup-file=no --project=. compiler_loading_test.jl`; stdout, stderr))
+            end
+        end
+    end
+end
diff --git a/Compiler/test/ssair.jl b/Compiler/test/ssair.jl
new file mode 100644
index 0000000000000..7aca2b8977a4e
--- /dev/null
+++ b/Compiler/test/ssair.jl
@@ -0,0 +1,847 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+include("setup_Compiler.jl")
+include("irutils.jl")
+
+using Test
+
+using .Compiler: CFG, BasicBlock, NewSSAValue
+
+make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs)
+
+# TODO: this test is broken
+#let code = Any[
+#        GotoIfNot(SlotNumber(2), 4),
+#        Expr(:(=), SlotNumber(3), 2),
+#        # Test a SlotNumber as a value of a PhiNode
+#        PhiNode(Int32[2,3], Any[1, SlotNumber(3)]),
+#        ReturnNode(SSAValue(3))
+#    ]
+#
+#    ci = eval(Expr(:new, CodeInfo,
+#        code,
+#        nothing,
+#        Any[Any, Any, Any],
+#        Any[Any],
+#        UInt8[0, 0, 0],
+#        Any[Symbol("Self"), :arg, :slot],
+#        false, false, false, false
+#    ))
+#
+#    Compiler.run_passes(ci, 1, [NullLineInfo])
+#    # XXX: missing @test
+#end
+
+# Issue #31121
+
+# We have the following CFG and corresponding DFS numbering:
+#
+#     CFG     DFS
+#
+#      A       1
+#      | \     | \
+#      B C     2 5
+#     /|/     /|/
+#    | D     | 3
+#     \|      \|
+#      E       4
+#
+# In the bug `E` got the wrong dominator (`B` instead of `A`), because the DFS
+# tree had the wrong parent (i.e. we recorded the parent of `4` as `2` rather
+# than `3`, so the idom search missed that `1` is `3`'s semi-dominator). Here
+# we manually construct that CFG and verify that the DFS records the correct
+# parent.
+let cfg = CFG(BasicBlock[
+    make_bb([]     , [2, 3]),
+    make_bb([1]    , [4, 5]),
+    make_bb([1]    , [4]   ),
+    make_bb([2, 3] , [5]   ),
+    make_bb([2, 4] , []    ),
+], Int[])
+    dfs = Compiler.DFS(cfg.blocks)
+    @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4
+    let correct_idoms = Compiler.naive_idoms(cfg.blocks),
+        correct_pidoms = Compiler.naive_idoms(cfg.blocks, true)
+        @test Compiler.construct_domtree(cfg).idoms_bb == correct_idoms
+        @test Compiler.construct_postdomtree(cfg).idoms_bb == correct_pidoms
+        # For completeness, reverse the order of pred/succ in the CFG and verify
+        # the answer doesn't change (it does change the which node is chosen
+        # as the semi-dominator, since it changes the DFS numbering).
+        for (a, b, c, d) in Iterators.product(((true, false) for _ = 1:4)...)
+            let blocks = copy(cfg.blocks)
+                a && (blocks[1] = make_bb(blocks[1].preds, reverse(blocks[1].succs)))
+                b && (blocks[2] = make_bb(blocks[2].preds, reverse(blocks[2].succs)))
+                c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs))
+                d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs))
+                cfg′ = CFG(blocks, cfg.index)
+                @test Compiler.construct_domtree(cfg′).idoms_bb == correct_idoms
+                @test Compiler.construct_postdomtree(cfg′).idoms_bb == correct_pidoms
+            end
+        end
+    end
+end
+
+# test code execution with the default compile-mode
+module CompilerExecTest
+include("interpreter_exec.jl")
+end
+
+# test code execution with the interpreter mode (compile=min)
+module InterpreterExecTest
+Base.Experimental.@compiler_options compile=min
+include("interpreter_exec.jl")
+end
+
+# PR #32145
+# Make sure IncrementalCompact can handle blocks with predecessors of index 0
+# while removing blocks with no predecessors.
+let cfg = CFG(BasicBlock[
+    make_bb([]        , [2, 4]),
+    make_bb([1]       , [4, 5]),
+    make_bb([]        , [4]   ), # should be removed
+    make_bb([0, 1, 2] , [5]   ), # 0 predecessor should be preserved
+    make_bb([2, 3]    , []    ),
+], Int[])
+    insts = Compiler.InstructionStream([], [], Compiler.CallInfo[], Int32[], UInt32[])
+    di = Compiler.DebugInfoStream(insts.line)
+    ir = Compiler.IRCode(insts, cfg, di, Any[], Expr[], Compiler.VarState[])
+    compact = Compiler.IncrementalCompact(ir, true)
+    @test length(compact.cfg_transform.result_bbs) == 4 && 0 in compact.cfg_transform.result_bbs[3].preds
+end
+
+# Issue #32579 - Optimizer bug involving type constraints
+function f32579(x::Int, b::Bool)
+    if b
+        x = nothing
+    end
+    if isa(x, Int)
+        y = x
+    else
+        y = x
+    end
+    if isa(y, Nothing)
+        z = y
+    else
+        z = y
+    end
+    return z === nothing
+end
+@test f32579(0, true) === true
+@test f32579(0, false) === false
+
+# Test for bug caused by renaming blocks improperly, related to PR #32145
+let code = Any[
+        # block 1
+        Expr(:boundscheck),
+        Compiler.GotoIfNot(SSAValue(1), 6),
+        # block 2
+        Expr(:call, size, Compiler.Argument(3)),
+        Compiler.ReturnNode(),
+        # block 3
+        Core.PhiNode(),
+        Compiler.ReturnNode(),
+        # block 4
+        GlobalRef(Main, :something),
+        GlobalRef(Main, :somethingelse),
+        Expr(:call, Core.SSAValue(7), Core.SSAValue(8)),
+        Compiler.GotoIfNot(Core.SSAValue(9), 12),
+        # block 5
+        Compiler.ReturnNode(Core.SSAValue(9)),
+        # block 6
+        Compiler.ReturnNode(Core.SSAValue(9))
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.compact!(ir, true)
+    @test Compiler.verify_ir(ir) === nothing
+end
+
+# Test that the verifier doesn't choke on cglobals (which aren't linearized)
+let code = Any[
+        Expr(:call, GlobalRef(Main, :cglobal),
+                    Expr(:call, Core.tuple, :(:c)), Nothing),
+                    Compiler.ReturnNode()
+    ]
+    ir = make_ircode(code)
+    @test Compiler.verify_ir(ir) === nothing
+end
+
+# Test that GlobalRef in value position is non-canonical
+let code = Any[
+        Expr(:call, GlobalRef(Main, :something_not_defined_please))
+        ReturnNode(SSAValue(1))
+    ]
+    ir = make_ircode(code; verify=false)
+    ir = Compiler.compact!(ir, true)
+    @test_throws ["IR verification failed.", "Code location: "] Compiler.verify_ir(ir, false)
+end
+
+# Issue #29107
+let code = Any[
+        # Block 1
+        Compiler.GotoNode(6),
+        # Block 2
+        # The following phi node gets deleted because it only has one edge, so
+        # the call to `something` is made to use the value of `something2()`,
+        # even though this value is defined after it. We don't want this to
+        # happen even though this block is dead because subsequent optimization
+        # passes may look at all code, dead or not.
+        Core.PhiNode(Int32[2], Any[Core.SSAValue(4)]),
+        Expr(:call, :something, Core.SSAValue(2)),
+        Expr(:call, :something2),
+        Compiler.GotoNode(2),
+        # Block 3
+        Compiler.ReturnNode(1000)
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.compact!(ir, true)
+    # Make sure that if there is a call to `something` (block 2 should be
+    # removed entirely with working DCE), it doesn't use any SSA values that
+    # come after it.
+    for i in 1:length(ir.stmts)
+        s = ir.stmts[i]
+        if Meta.isexpr(s, :call) && s.args[1] === :something
+            if isa(s.args[2], SSAValue)
+                @test s.args[2].id <= i
+            end
+        end
+    end
+end
+
+# Make sure dead blocks that are removed are not still referenced in live phi nodes
+let code = Any[
+        # Block 1
+        Compiler.GotoNode(3),
+        # Block 2 (no predecessors)
+        Compiler.ReturnNode(3),
+        # Block 3
+        Core.PhiNode(Int32[1, 2], Any[100, 200]),
+        Compiler.ReturnNode(Core.SSAValue(3))
+    ]
+    ir = make_ircode(code; verify=false)
+    ir = Compiler.compact!(ir, true)
+    @test Compiler.verify_ir(ir) === nothing
+end
+
+# issue #37919
+let ci = only(code_lowered(()->@isdefined(_not_def_37919_), ()))
+    ir = Compiler.inflate_ir(ci)
+    @test Compiler.verify_ir(ir) === nothing
+end
+
+let code = Any[
+        # block 1
+        GotoIfNot(Argument(2), 4)
+        # block 2
+        Expr(:call, throw, "potential throw")
+        ReturnNode() # unreachable
+        # block 3
+        ReturnNode(Argument(3))
+    ]
+    ir = make_ircode(code; slottypes=Any[Any,Bool,Int])
+    visited = BitSet()
+    @test !Compiler.visit_conditional_successors(ir, #=bb=#1) do succ::Int
+        push!(visited, succ)
+        return false
+    end
+    @test 2 ∈ visited
+    @test 3 ∈ visited
+    oc = Core.OpaqueClosure(ir)
+    @test oc(false, 1) == 1
+    @test_throws "potential throw" oc(true, 1)
+end
+
+let code = Any[
+        # block 1
+        GotoIfNot(Argument(2), 3)
+        # block 2
+        ReturnNode(Argument(3))
+        # block 3
+        Expr(:call, throw, "potential throw")
+        ReturnNode() # unreachable
+    ]
+    ir = make_ircode(code; slottypes=Any[Any,Bool,Int])
+    visited = BitSet()
+    @test !Compiler.visit_conditional_successors(ir, #=bb=#1) do succ::Int
+        push!(visited, succ)
+        return false
+    end
+    @test 2 ∈ visited
+    @test 3 ∈ visited
+    oc = Core.OpaqueClosure(ir)
+    @test oc(true, 1) == 1
+    @test_throws "potential throw" oc(false, 1)
+end
+
+let code = Any[
+        # block 1
+        GotoIfNot(Argument(2), 5)
+        # block 2
+        GotoNode(3)
+        # block 3
+        Expr(:call, throw, "potential throw")
+        ReturnNode()
+        # block 4
+        Expr(:call, Core.Intrinsics.add_int, Argument(3), Argument(4))
+        GotoNode(7)
+        # block 5
+        ReturnNode(SSAValue(5))
+    ]
+    ir = make_ircode(code; slottypes=Any[Any,Bool,Int,Int])
+    visited = BitSet()
+    @test !Compiler.visit_conditional_successors(ir, #=bb=#1) do succ::Int
+        push!(visited, succ)
+        return false
+    end
+    @test 2 ∈ visited
+    @test 3 ∈ visited
+    @test 4 ∈ visited
+    @test 5 ∈ visited
+    oc = Core.OpaqueClosure(ir)
+    @test oc(false, 1, 1) == 2
+    @test_throws "potential throw" oc(true, 1, 1)
+
+    let buf = IOBuffer()
+        oc = Core.OpaqueClosure(ir; slotnames=Symbol[:ocfunc, :x, :y, :z])
+        try
+            oc(true, 1, 1)
+        catch
+            Base.show_backtrace(buf, catch_backtrace())
+        end
+        s = String(take!(buf))
+        @test occursin("(x::Bool, y::$Int, z::$Int)", s)
+    end
+end
+
+# Test dynamic update of domtree with edge insertions and deletions in the
+# following CFG:
+#
+#     1,1
+#     |  \
+#     |   \
+#     |    3,4 <
+#     |    |    \
+#     2,2  4,5   |
+#     |    |    /
+#     |    6,6 /
+#     |   /
+#     |  /
+#     5,3
+#
+# Nodes indicate BB number, preorder number
+# Edges point down, except the arrow that points up
+let cfg = CFG(BasicBlock[
+        make_bb([],     [3, 2]), # the order of the successors is deliberate
+        make_bb([1],    [5]),    # and is to determine the preorder numbers
+        make_bb([1, 6], [4]),
+        make_bb([3],    [6]),
+        make_bb([2, 6], []),
+        make_bb([4],    [5, 3]),
+    ], Int[])
+    domtree = Compiler.construct_domtree(cfg)
+    @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6]
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+
+    # Test removal of edge between a parent and child in the DFS tree, which
+    # should trigger complete recomputation of domtree (first case in algorithm
+    # for removing edge from domtree dynamically)
+    Compiler.cfg_delete_edge!(cfg, 2, 5)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 2, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4]
+    # Add edge back (testing first case for insertion)
+    Compiler.cfg_insert_edge!(cfg, 2, 5)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 2, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+
+    # Test second case in algorithm for removing edges from domtree, in which
+    # `from` is on a semidominator path from the semidominator of `to` to `to`
+    Compiler.cfg_delete_edge!(cfg, 6, 5)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4]
+    # Add edge back (testing second case for insertion)
+    Compiler.cfg_insert_edge!(cfg, 6, 5)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+
+    # Test last case for removing edges, in which edge does not satisfy either
+    # of the above conditions
+    Compiler.cfg_delete_edge!(cfg, 6, 3)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+    # Add edge back (testing second case for insertion)
+    Compiler.cfg_insert_edge!(cfg, 6, 3)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+
+    # Try removing all edges from root
+    Compiler.cfg_delete_edge!(cfg, 1, 2)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 2)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4]
+    Compiler.cfg_delete_edge!(cfg, 1, 3)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0]
+    # Add edges back
+    Compiler.cfg_insert_edge!(cfg, 1, 2)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 2)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0]
+    Compiler.cfg_insert_edge!(cfg, 1, 3)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+end
+
+# Issue #41975 - SSA conversion drops type check
+f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
+@test_throws TypeError f_if_typecheck()
+
+let # https://github.com/JuliaLang/julia/issues/42258
+    code = """
+        using Base: Compiler
+
+        function foo()
+            a = @noinline rand(rand(0:10))
+            if isempty(a)
+                err = BoundsError(a)
+                throw(err)
+                return nothing
+            end
+            return a
+        end
+        code_typed(foo; optimize=true)
+
+        code_typed(Compiler.setindex!, (Compiler.UseRef,Compiler.NewSSAValue); optimize=true)
+        """
+    cmd = `$(Base.julia_cmd()) -g 2 -e $code`
+    stderr = IOBuffer()
+    @test success(pipeline(Cmd(cmd); stdout, stderr))
+    @test readchomp(stderr) == ""
+end
+
+@testset "code_ircode" begin
+    @test first(only(Base.code_ircode(+, (Float64, Float64)))) isa Compiler.IRCode
+    @test first(only(Base.code_ircode(+, (Float64, Float64); optimize_until = 3))) isa
+          Compiler.IRCode
+    @test first(only(Base.code_ircode(+, (Float64, Float64); optimize_until = "CC: SROA"))) isa
+          Compiler.IRCode
+
+    function demo(f)
+        f()
+        f()
+        f()
+    end
+    @test first(only(Base.code_ircode(demo))) isa Compiler.IRCode
+    @test first(only(Base.code_ircode(demo; optimize_until = 3))) isa Compiler.IRCode
+    @test first(only(Base.code_ircode(demo; optimize_until = "CC: SROA"))) isa Compiler.IRCode
+end
+
+# slots after SSA conversion
+function f_with_slots(a, b)
+    # `c` and `d` are local variables
+    c = a + b
+    d = c > 0
+    return (c, d)
+end
+let # #self#, a, b, c, d
+    unopt = code_typed1(f_with_slots, (Int,Int); optimize=false)
+    @test length(unopt.slotnames) == length(unopt.slotflags) == length(unopt.slottypes) == 5
+    ir_withslots = first(only(Base.code_ircode(f_with_slots, (Int,Int); optimize_until="CC: CONVERT")))
+    @test length(ir_withslots.argtypes) == 5
+    # #self#, a, b
+    opt = code_typed1(f_with_slots, (Int,Int); optimize=true)
+    @test length(opt.slotnames) == length(opt.slotflags) == length(opt.slottypes) == 3
+    ir_ssa = first(only(Base.code_ircode(f_with_slots, (Int,Int); optimize_until="CC: SLOT2REG")))
+    @test length(ir_ssa.argtypes) == 3
+end
+
+let
+    function test_useref(stmt, v, op)
+        if isa(stmt, Expr)
+            @test stmt.args[op] === v
+        elseif isa(stmt, GotoIfNot)
+            @test stmt.cond === v
+        elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode)
+            @test stmt.val === v
+        elseif isa(stmt, SSAValue) || isa(stmt, NewSSAValue) || isa(stmt, Argument)
+            @test stmt === v
+        elseif isa(stmt, PiNode)
+            @test stmt.val === v && stmt.typ === typeof(stmt)
+        elseif isa(stmt, PhiNode) || isa(stmt, PhiCNode)
+            @test stmt.values[op] === v
+        end
+    end
+
+    function _test_userefs(@nospecialize stmt)
+        ex = Expr(:call, :+, Core.SSAValue(3), 1)
+        urs = Compiler.userefs(stmt)::Compiler.UseRefIterator
+        it = Compiler.iterate(urs)
+        while it !== nothing
+            ur = getfield(it, 1)::Compiler.UseRef
+            op = getfield(it, 2)::Int
+            v1 = Compiler.getindex(ur)
+            # set to dummy expression and then back to itself to test `_useref_setindex!`
+            v2 = Compiler.setindex!(ur, ex)
+            test_useref(v2, ex, op)
+            Compiler.setindex!(ur, v1)
+            @test Compiler.getindex(ur) === v1
+            it = Compiler.iterate(urs, op)
+        end
+    end
+
+    function test_userefs(body)
+        for stmt in body
+            _test_userefs(stmt)
+        end
+    end
+
+    # this isn't valid code, we just care about looking at a variety of IR nodes
+    body = Any[
+        EnterNode(11),
+        Expr(:call, :+, SSAValue(3), 1),
+        Expr(:throw_undef_if_not, :expected, false),
+        Expr(:leave, Core.SSAValue(1)),
+        Expr(:(=), SSAValue(1), Expr(:call, :+, SSAValue(3), 1)),
+        UpsilonNode(),
+        UpsilonNode(SSAValue(2)),
+        PhiCNode(Any[SSAValue(5), SSAValue(7), SSAValue(9)]),
+        PhiCNode(Any[SSAValue(6)]),
+        PhiNode(Int32[8], Any[SSAValue(7)]),
+        PiNode(SSAValue(6), GotoNode),
+        GotoIfNot(SSAValue(3), 10),
+        GotoNode(5),
+        SSAValue(7),
+        NewSSAValue(9),
+        Argument(1),
+        ReturnNode(SSAValue(11)),
+    ]
+
+    test_userefs(body)
+end
+
+let ir = Base.code_ircode((Bool,Any)) do c, x
+        println(x, 1) #1
+        if c
+            println(x, 2) #2
+        else
+            println(x, 3) #3
+        end
+        println(x, 4) #4
+    end |> only |> first
+    # IR legality check
+    @test length(ir.cfg.blocks) == 4
+    for i = 1:4
+        @test any(ir.cfg.blocks[i].stmts) do j
+            inst = ir.stmts[j][:stmt]
+            iscall((ir, println), inst) &&
+            inst.args[3] == i
+        end
+    end
+    # domination analysis
+    domtree = Compiler.construct_domtree(ir)
+    @test Compiler.dominates(domtree, 1, 2)
+    @test Compiler.dominates(domtree, 1, 3)
+    @test Compiler.dominates(domtree, 1, 4)
+    for i = 2:4
+        for j = 1:4
+            i == j && continue
+            @test !Compiler.dominates(domtree, i, j)
+        end
+    end
+    # post domination analysis
+    post_domtree = Compiler.construct_postdomtree(ir)
+    @test Compiler.postdominates(post_domtree, 4, 1)
+    @test Compiler.postdominates(post_domtree, 4, 2)
+    @test Compiler.postdominates(post_domtree, 4, 3)
+    for i = 1:3
+        for j = 1:4
+            i == j && continue
+            @test !Compiler.postdominates(post_domtree, i, j)
+        end
+    end
+end
+
+@testset "issue #46967: undef stmts introduced by compaction" begin
+    # generate some IR
+    function foo(i)
+        j = i+42
+        j == 1 ? 1 : 2
+    end
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+    instructions = length(ir.stmts)
+
+    # get the addition instruction
+    add_stmt = ir.stmts[1]
+    @test Meta.isexpr(add_stmt[:stmt], :call) && add_stmt[:stmt].args[3] == 42
+
+    # replace the addition with a slightly different one
+    inst = Compiler.NewInstruction(Expr(:call, add_stmt[:stmt].args[1], add_stmt[:stmt].args[2], 999), Int)
+    node = Compiler.insert_node!(ir, 1, inst)
+    Compiler.setindex!(add_stmt, node, :stmt)
+
+    # perform compaction (not by calling compact! because with DCE the bug doesn't trigger)
+    compact = Compiler.IncrementalCompact(ir)
+    state = Compiler.iterate(compact)
+    while state !== nothing
+        state = Compiler.iterate(compact, state[2])
+    end
+    ir = Compiler.complete(compact)
+
+    # test that the inserted node was compacted
+    @test Compiler.length(ir.new_nodes) == 0
+
+    # test that we performed copy propagation, but that the undef node was trimmed
+    @test length(ir.stmts) == instructions
+
+    @test show(devnull, ir) === nothing
+end
+
+# insert_node! operations
+# =======================
+
+import Core: SSAValue
+import .Compiler: NewInstruction, insert_node!
+
+# insert_node! for pending node
+let ir = Base.code_ircode((Int,Int); optimize_until="CC: INLINING") do a, b
+        a^b
+    end |> only |> first
+    ir = Compiler.compact!(ir)
+    nstmts = length(ir.stmts)
+    invoke_idx = findfirst(@nospecialize(stmt)->Meta.isexpr(stmt, :invoke), ir.stmts.stmt)
+    @test invoke !== nothing
+
+    invoke_ssa = SSAValue(invoke_idx)
+    newssa = insert_node!(ir, invoke_ssa, NewInstruction(Expr(:call, println, invoke_ssa), Nothing), #=attach_after=#true)
+    newssa = insert_node!(ir, newssa, NewInstruction(Expr(:call, println, newssa), Nothing), #=attach_after=#true)
+
+    ir = Compiler.compact!(ir)
+    @test length(ir.stmts) == nstmts + 2
+    @test Meta.isexpr(ir.stmts[invoke_idx][:stmt], :invoke)
+    call1 = ir.stmts[invoke_idx+1][:stmt]
+    @test iscall((ir,println), call1)
+    @test call1.args[2] === invoke_ssa
+    call2 = ir.stmts[invoke_idx+2][:stmt]
+    @test iscall((ir,println), call2)
+    @test call2.args[2] === SSAValue(invoke_idx+1)
+end
+
+# Issue #50379 - insert_node!(::IncrementalCompact, ...) at end of basic block
+let code = Any[
+        # block 1
+        #= %1: =# Expr(:boundscheck),
+        #= %2: =# Compiler.GotoIfNot(SSAValue(1), 4),
+        # block 2
+        #= %3: =# Expr(:call, println, Argument(1)),
+        # block 3
+        #= %4: =# Core.PhiNode(),
+        #= %5: =# Compiler.ReturnNode(),
+    ]
+    ir = make_ircode(code)
+
+    # Insert another call at end of "block 2"
+    compact = Compiler.IncrementalCompact(ir)
+    new_inst = NewInstruction(Expr(:call, println, Argument(1)), Nothing)
+    insert_node!(compact, SSAValue(3), new_inst, #= attach_after =# true)
+
+    # Complete iteration
+    x = Compiler.iterate(compact)
+    while x !== nothing
+        x = Compiler.iterate(compact, x[2])
+    end
+    ir = Compiler.complete(compact)
+
+    @test Compiler.verify_ir(ir) === nothing
+end
+
+# compact constant PiNode
+let code = Any[
+        PiNode(0.0, Const(0.0))
+        ReturnNode(SSAValue(1))
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.compact!(ir)
+    @test fully_eliminated(ir)
+end
+
+# insert_node! with new instruction with flag computed
+let ir = Base.code_ircode((Int,Int); optimize_until="CC: INLINING") do a, b
+        a^b
+    end |> only |> first
+    ir = Compiler.compact!(ir)
+    invoke_idx = findfirst(@nospecialize(stmt)->Meta.isexpr(stmt, :invoke), ir.stmts.stmt)
+    @test invoke_idx !== nothing
+    invoke_expr = ir.stmts.stmt[invoke_idx]
+    invoke_ssa = SSAValue(invoke_idx)
+
+    # effect-ful node
+    let compact = Compiler.IncrementalCompact(Compiler.copy(ir))
+        insert_node!(compact, invoke_ssa, NewInstruction(Expr(:call, println, invoke_ssa), Nothing), #=attach_after=#true)
+        state = Compiler.iterate(compact)
+        while state !== nothing
+            state = Compiler.iterate(compact, state[2])
+        end
+        ir = Compiler.finish(compact)
+        new_invoke_idx = findfirst(@nospecialize(stmt)->stmt==invoke_expr, ir.stmts.stmt)
+        @test new_invoke_idx !== nothing
+        new_call_idx = findfirst(ir.stmts.stmt) do @nospecialize(stmt)
+            iscall((ir,println), stmt) && stmt.args[2] === SSAValue(new_invoke_idx)
+        end
+        @test new_call_idx !== nothing
+        @test new_call_idx == new_invoke_idx+1
+    end
+
+    # effect-free node
+    let compact = Compiler.IncrementalCompact(Compiler.copy(ir))
+        insert_node!(compact, invoke_ssa, NewInstruction(Expr(:call, GlobalRef(Base, :add_int), invoke_ssa, invoke_ssa), Int), #=attach_after=#true)
+        state = Compiler.iterate(compact)
+        while state !== nothing
+            state = Compiler.iterate(compact, state[2])
+        end
+        ir = Compiler.finish(compact)
+
+        ir = Compiler.finish(compact)
+        new_invoke_idx = findfirst(@nospecialize(stmt)->stmt==invoke_expr, ir.stmts.stmt)
+        @test new_invoke_idx !== nothing
+        new_call_idx = findfirst(ir.stmts.stmt) do @nospecialize(x)
+            iscall((ir,Base.add_int), x) && x.args[2] === SSAValue(new_invoke_idx)
+        end
+        @test new_call_idx === nothing # should be deleted during the compaction
+    end
+end
+
+@testset "GotoIfNot folding" begin
+    # After IRCode conversion, following the targets of a GotoIfNot should never lead to
+    # statically unreachable code.
+    function f_with_maybe_nonbool_cond(a::Int, r::Bool)
+        a = r ? true : a
+        if a
+            # The following conditional can be resolved statically, since `a === true`
+            # This test checks that it becomes a static `goto` despite its wide slottype.
+            x = a ? 1 : 2.
+        else
+            x = a ? 1 : 2.
+        end
+        return x
+    end
+    let
+        # At least some statements should have been found to be statically unreachable and wrapped in Const(...)::Union{}
+        unopt = code_typed1(f_with_maybe_nonbool_cond, (Int, Bool); optimize=false)
+        @test any(j -> isa(unopt.code[j], Core.Const) && unopt.ssavaluetypes[j] == Union{}, 1:length(unopt.code))
+
+        # Any GotoIfNot destinations after IRCode conversion should not be statically unreachable
+        ircode = first(only(Base.code_ircode(f_with_maybe_nonbool_cond, (Int, Bool); optimize_until="CC: CONVERT")))
+        for i = 1:length(ircode.stmts)
+            expr = ircode.stmts[i][:stmt]
+            if isa(expr, GotoIfNot)
+                # If this statement is Core.Const(...)::Union{}, that means this code was not reached
+                @test !(isa(ircode.stmts[i+1][:stmt], Core.Const) && (unopt.ssavaluetypes[i+1] === Union{}))
+                @test !(isa(ircode.stmts[expr.dest][:stmt], Core.Const) && (unopt.ssavaluetypes[expr.dest] === Union{}))
+            end
+        end
+    end
+end
+
+# Test that things don't break if one branch of the frontend PhiNode becomes unreachable
+const global_error_switch_const1::Bool = false
+function gen_unreachable_phinode_edge1(world::UInt, source, args...)
+    ci = make_codeinfo(Any[
+        # block 1
+        GlobalRef(@__MODULE__, :global_error_switch_const1),
+        GotoIfNot(SSAValue(1), 4),
+        # block 2
+        Expr(:call, identity, Argument(3)),
+        # block 3
+        PhiNode(Int32[2, 3], Any[Argument(2), SSAValue(3)]),
+        ReturnNode(SSAValue(4))
+    ]; slottypes=Any[Any,Int,Int])
+    ci.slotnames = Symbol[:var"#self#", :x, :y]
+    ci.nargs = 3
+    ci.isva = false
+    return ci
+end
+@eval function f_unreachable_phinode_edge1(x, y)
+    $(Expr(:meta, :generated, gen_unreachable_phinode_edge1))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+@test f_unreachable_phinode_edge1(1, 2) == 1
+
+const global_error_switch_const2::Bool = true
+function gen_unreachable_phinode_edge2(world::UInt, source, args...)
+    ci = make_codeinfo(Any[
+        # block 1
+        GlobalRef(@__MODULE__, :global_error_switch_const2),
+        GotoIfNot(SSAValue(1), 4),
+        # block 2
+        Expr(:call, identity, Argument(3)),
+        # block 3
+        PhiNode(Int32[2, 3], Any[Argument(2), SSAValue(3)]),
+        ReturnNode(SSAValue(4))
+    ]; slottypes=Any[Any,Int,Int])
+    ci.slotnames = Symbol[:var"#self#", :x, :y]
+    ci.nargs = 3
+    ci.isva = false
+    return ci
+end
+@eval function f_unreachable_phinode_edge2(x, y)
+    $(Expr(:meta, :generated, gen_unreachable_phinode_edge2))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+@test f_unreachable_phinode_edge2(1, 2) == 2
+
+global global_error_switch::Bool = true
+function gen_must_throw_phinode_edge(world::UInt, source, _)
+    ci = make_codeinfo(Any[
+        # block 1
+        GlobalRef(@__MODULE__, :global_error_switch),
+        GotoIfNot(SSAValue(1), 4),
+        # block 2
+        Expr(:call, error, "This error is expected"),
+        # block 3
+        PhiNode(Int32[2, 3], Any[1, 2]),
+        ReturnNode(SSAValue(4))
+    ]; slottypes=Any[Any])
+    ci.slotnames = Symbol[:var"#self#"]
+    ci.nargs = 1
+    ci.isva = false
+    return ci
+end
+@eval function f_must_throw_phinode_edge()
+    $(Expr(:meta, :generated, gen_must_throw_phinode_edge))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+let ir = first(only(Base.code_ircode(f_must_throw_phinode_edge)))
+    @test !any(@nospecialize(x)->isa(x,PhiNode), ir.stmts.stmt)
+end
+@test_throws ErrorException f_must_throw_phinode_edge()
+global global_error_switch = false
+@test f_must_throw_phinode_edge() == 1
+
+# Test roundtrip of debuginfo compression
+let cl = Int32[32, 1, 1, 1000, 240, 230]
+    str = ccall(:jl_compress_codelocs, Any, (Int32, Any, Int), 378, cl, 2)::String;
+    cl2 = ccall(:jl_uncompress_codelocs, Any, (Any, Int), str, 2)
+    @test cl == cl2
+end
+
+@test_throws ErrorException Base.code_ircode(+, (Float64, Float64); optimize_until = "nonexisting pass name")
+@test_throws ErrorException Base.code_ircode(+, (Float64, Float64); optimize_until = typemax(Int))
+
+#57153 check that the CFG has a #0 block predecessor and that we don't fail to compile code that observes that
+function _worker_task57153()
+    while true
+        r = let
+        try
+            if @noinline rand(Bool)
+                return nothing
+            end
+            q, m
+        finally
+            missing
+        end
+        end
+        r[1]::Bool
+    end
+end
+let ir = Base.code_ircode(_worker_task57153, (), optimize_until="CC: COMPACT_2")[1].first
+    @test findfirst(x->x==0, ir.cfg.blocks[1].preds) !== nothing
+end
diff --git a/Compiler/test/tarjan.jl b/Compiler/test/tarjan.jl
new file mode 100644
index 0000000000000..8fe940463b558
--- /dev/null
+++ b/Compiler/test/tarjan.jl
@@ -0,0 +1,170 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("setup_Compiler.jl")
+include("irutils.jl")
+
+using .Compiler: CFGReachability, DomTree, CFG, BasicBlock, StmtRange, dominates,
+                     bb_unreachable, kill_edge!
+
+function reachable(g::CFG, a::Int, b::Int; domtree=nothing)
+    visited = BitVector(false for _ = 1:length(g.blocks))
+    worklist = Int[a]
+    while !isempty(worklist)
+        node = pop!(worklist)
+        node == b && return true
+        visited[node] = true
+        for child in g.blocks[node].succs
+            if domtree !== nothing && dominates(domtree, child, node)
+                continue # if provided `domtree`, ignore back-edges
+            end
+
+            !visited[child] && push!(worklist, child)
+        end
+    end
+    return false
+end
+
+function rand_cfg(V, E)
+    bbs = [BasicBlock(StmtRange(0,0), Int[], Int[]) for _ = 1:V]
+
+    reachable = BitVector(false for _ = 1:V)
+    reachable[1] = true
+
+    targets = BitVector(false for _ = 1:V)
+
+    for _ = 1:E
+        # Pick any source (with at least 1 missing edge)
+        source, dest = 0, 0
+        while true
+            source = rand(findall(reachable))
+            for v = 1:V
+                targets[v] = !in(v, bbs[source].succs)
+            end
+            any(targets) && break
+        end
+
+        # Pick any new target for source
+        dest = rand(findall(targets))
+
+        # Add edge to graph
+        push!(bbs[source].succs, dest)
+        push!(bbs[dest].preds, source)
+
+        reachable[dest] = true
+    end
+
+    return CFG(bbs, zeros(Int, V + 1))
+end
+
+function get_random_edge(cfg::CFG, V)
+    has_edge = [length(cfg.blocks[bb].succs) != 0 for bb in 1:V]
+    source = rand(findall(has_edge))
+    target = rand(cfg.blocks[source].succs)
+    return source, target
+end
+
+# Generate a random CFG with the requested number of vertices and edges, then simulate
+# `deletions` edge removals and verify that reachability is maintained correctly.
+#
+# If `all_checks` is true, verify internal data structures as well with O(E^2) checks.
+function test_reachability(V, E; deletions = 2E ÷ 3, all_checks=false)
+
+    function check_reachability(reachability, cfg, domtree, all_checks)
+        for i = 1:V
+            # All nodes should be reported as unreachable only if we cannot reach them from BB #1.
+            @test reachable(cfg, 1, i) == !bb_unreachable(reachability, i)
+
+            # All predecessors of a reachable block should be reachable.
+            if !bb_unreachable(reachability, i)
+                for pred in cfg.blocks[i].preds
+                    @test !bb_unreachable(reachability, pred)
+                end
+            end
+        end
+
+        if all_checks # checks for internal data structures - O(E^2)
+
+            # Nodes should be mutually reachable iff they are in the same SCompiler.
+            scc = reachability.scc
+            reachable_nodes = BitSet(v for v = 1:V if !bb_unreachable(reachability, v))
+            for i ∈ reachable_nodes
+                for j ∈ reachable_nodes
+                    @test (reachable(cfg, i, j; domtree) && reachable(cfg, j, i; domtree)) == (scc[i] == scc[j])
+                end
+            end
+
+            # Nodes in any non-trivial SCC (ignoring backedges) should be marked irreducible.
+            irreducible = reachability.irreducible
+            for i ∈ reachable_nodes
+                in_nontrivial_scc = any(v != i && scc[v] == scc[i] for v = 1:V)
+                @test Compiler.getindex(irreducible, i) == in_nontrivial_scc
+            end
+        end
+    end
+
+    cfg = rand_cfg(V, E)
+    domtree = Compiler.construct_domtree(cfg)
+    reachability = CFGReachability(cfg, domtree)
+    check_reachability(reachability, cfg, domtree, all_checks)
+
+    # track the reachable blocks/edges so that we can verify callbacks below
+    blocks = Set{Int}()
+    edges = Set{Tuple{Int,Int}}()
+    for bb in 1:V
+        !bb_unreachable(reachability, bb) && push!(blocks, bb)
+        for succ in cfg.blocks[bb].succs
+            push!(edges, (bb, succ))
+        end
+    end
+
+    killed_edges = Tuple{Int,Int}[]
+    killed_blocks = Int[]
+    for k = 1:deletions
+        length(blocks) == 1 && break # no more reachable blocks
+
+        from, to = get_random_edge(cfg, V)
+        kill_edge!(reachability, cfg, from, to,
+            (from::Int, to::Int) -> push!(killed_edges, (from, to)),
+            (bb::Int) -> push!(killed_blocks, bb),
+        )
+
+        # If these nodes are still reachable, to and from edges should have been removed.
+        @test !reachable(cfg, 1, from) || !in(to, cfg.blocks[from].succs)
+        @test !reachable(cfg, 1, to)   || !in(from, cfg.blocks[to].preds)
+
+        check_reachability(reachability, cfg, domtree, all_checks)
+
+        for bb in 1:V
+            if bb_unreachable(reachability, bb) && in(bb, blocks)
+                # If the block changed from reachable -> unreachable, we should have gotten a callback.
+                @test bb in killed_blocks
+                delete!(blocks, bb)
+            end
+        end
+        for (from, to) in edges
+            if !in(from, cfg.blocks[to].preds) && !bb_unreachable(reachability, to)
+                # If the edge changed from reachable -> unreachable and feeds into a reachable BasicBlock,
+                # we should have gotten a callback.
+                @test (from, to) in killed_edges
+                delete!(edges, (from, to))
+            end
+        end
+
+        empty!(killed_edges)
+        empty!(killed_blocks)
+    end
+end
+
+@testset "CFGReachability tests" begin
+    test_reachability(1, 0; all_checks=true)
+
+    test_reachability(10, 15; all_checks=true)
+    test_reachability(10, 15; all_checks=true)
+    test_reachability(10, 15; all_checks=true)
+
+    test_reachability(100, 150; all_checks=false)
+    test_reachability(100, 150; all_checks=false)
+    test_reachability(100, 1000; all_checks=false)
+end
diff --git a/Compiler/test/testgroups b/Compiler/test/testgroups
new file mode 100644
index 0000000000000..4656448016cd3
--- /dev/null
+++ b/Compiler/test/testgroups
@@ -0,0 +1,19 @@
+AbstractInterpreter
+EscapeAnalysis
+codegen
+compact
+contextual
+datastructures
+effects
+inference
+inline
+interpreter_exec
+invalidation
+irpasses
+newinterp
+ssair
+tarjan
+validation
+special_loading
+abioverride
+verifytrim
diff --git a/test/compiler/validation.jl b/Compiler/test/validation.jl
similarity index 50%
rename from test/compiler/validation.jl
rename to Compiler/test/validation.jl
index 5fd074fee73ae..766f887c05860 100644
--- a/test/compiler/validation.jl
+++ b/Compiler/test/validation.jl
@@ -2,6 +2,17 @@
 
 using Test, Core.IR
 
+include("setup_Compiler.jl")
+
+@testset "stdio validation" begin
+    for s in (:stdout, :stderr, :print, :println, :write)
+        @test getglobal(Compiler, s) === getglobal(Core, s)
+        @test isconst(Compiler, s)
+    end
+    @test Compiler.stdin === devnull
+    @test isconst(Compiler, :stdin)
+end
+
 function f22938(a, b, x...)
     nothing
     nothing
@@ -18,20 +29,22 @@ function f22938(a, b, x...)
     return i * a
 end
 
-msig = Tuple{typeof(f22938),Int,Int,Int,Int}
-world = Base.get_world_counter()
-match = only(Base._methods_by_ftype(msig, -1, world))
-mi = Core.Compiler.specialize_method(match)
-c0 = Core.Compiler.retrieve_code_info(mi, world)
-
-@test isempty(Core.Compiler.validate_code(mi, c0))
+const c0 = let
+    msig = Tuple{typeof(f22938),Int,Int,Int,Int}
+    world = Base.get_world_counter()
+    match = only(Base._methods_by_ftype(msig, -1, world))
+    mi = Compiler.specialize_method(match)
+    c0 = Compiler.retrieve_code_info(mi, world)
+    @test isempty(Compiler.validate_code(mi, c0))
+    c0
+end
 
 @testset "INVALID_EXPR_HEAD" begin
     c = copy(c0)
     c.code[1] = Expr(:invalid, 1)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.INVALID_EXPR_HEAD
+    @test errors[1].kind === Compiler.INVALID_EXPR_HEAD
 end
 
 @testset "INVALID_LVALUE" begin
@@ -39,9 +52,9 @@ end
     c.code[1] = Expr(:(=), GotoNode(1), 1)
     c.code[2] = Expr(:(=), :x, 1)
     c.code[3] = Expr(:(=), 3, 1)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 3
-    @test all(e.kind === Core.Compiler.INVALID_LVALUE for e in errors)
+    @test all(e.kind === Compiler.INVALID_LVALUE for e in errors)
 end
 
 @testset "INVALID_RVALUE" begin
@@ -52,9 +65,9 @@ end
     for h in (:line, :const, :meta)
         c.code[i+=1] = Expr(:(=), SlotNumber(2), Expr(h))
     end
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 5
-    @test count(e.kind === Core.Compiler.INVALID_RVALUE for e in errors) == 5
+    @test count(e.kind === Compiler.INVALID_RVALUE for e in errors) == 5
 end
 
 @testset "INVALID_CALL_ARG" begin
@@ -66,74 +79,56 @@ end
     for h in (:line, :const, :meta)
         c.code[i+=1] = Expr(:call, GlobalRef(@__MODULE__,:f), Expr(h))
     end
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 6
-    @test count(e.kind === Core.Compiler.INVALID_CALL_ARG for e in errors) == 6
+    @test count(e.kind === Compiler.INVALID_CALL_ARG for e in errors) == 6
 end
 
 @testset "EMPTY_SLOTNAMES" begin
     c = copy(c0)
     empty!(c.slotnames)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 2
-    @test any(e.kind === Core.Compiler.EMPTY_SLOTNAMES for e in errors)
-    @test any(e.kind === Core.Compiler.SLOTFLAGS_MISMATCH for e in errors)
+    @test any(e.kind === Compiler.EMPTY_SLOTNAMES for e in errors)
+    @test any(e.kind === Compiler.SLOTFLAGS_MISMATCH for e in errors)
 end
 
 @testset "SLOTFLAGS_MISMATCH" begin
     c = copy(c0)
     push!(c.slotflags, 0x00)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SLOTFLAGS_MISMATCH
+    @test errors[1].kind === Compiler.SLOTFLAGS_MISMATCH
 end
 
 @testset "SSAVALUETYPES_MISMATCH" begin
     c = code_typed(f22938, (Int,Int,Int,Int))[1][1]
     empty!(c.ssavaluetypes)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SSAVALUETYPES_MISMATCH
+    @test errors[1].kind === Compiler.SSAVALUETYPES_MISMATCH
 end
 
 @testset "SSAVALUETYPES_MISMATCH_UNINFERRED" begin
     c = copy(c0)
     c.ssavaluetypes -= 1
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SSAVALUETYPES_MISMATCH_UNINFERRED
+    @test errors[1].kind === Compiler.SSAVALUETYPES_MISMATCH_UNINFERRED
 end
 
 @testset "SSAFLAGS_MISMATCH" begin
     c = copy(c0)
     empty!(c.ssaflags)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SSAFLAGS_MISMATCH
-end
-
-@testset "SIGNATURE_NARGS_MISMATCH" begin
-    old_sig = mi.def.sig
-    mi.def.sig = Tuple{1,2}
-    errors = Core.Compiler.validate_code(mi, nothing)
-    mi.def.sig = old_sig
-    @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SIGNATURE_NARGS_MISMATCH
+    @test errors[1].kind === Compiler.SSAFLAGS_MISMATCH
 end
 
 @testset "NON_TOP_LEVEL_METHOD" begin
     c = copy(c0)
     c.code[1] = Expr(:method, :dummy)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.NON_TOP_LEVEL_METHOD
-end
-
-@testset "SLOTNAMES_NARGS_MISMATCH" begin
-    mi.def.nargs += 20
-    errors = Core.Compiler.validate_code(mi, c0)
-    mi.def.nargs -= 20
-    @test length(errors) == 2
-    @test count(e.kind === Core.Compiler.SLOTNAMES_NARGS_MISMATCH for e in errors) == 1
-    @test count(e.kind === Core.Compiler.SIGNATURE_NARGS_MISMATCH for e in errors) == 1
+    @test errors[1].kind === Compiler.NON_TOP_LEVEL_METHOD
 end
diff --git a/Compiler/test/verifytrim.jl b/Compiler/test/verifytrim.jl
new file mode 100644
index 0000000000000..a84afd6933266
--- /dev/null
+++ b/Compiler/test/verifytrim.jl
@@ -0,0 +1,123 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+include("setup_Compiler.jl")
+
+# revise: Core.include(Compiler.TrimVerifier, joinpath(@__DIR__, "../src/verifytrim.jl"))
+
+using Test
+using .Compiler: typeinf_ext_toplevel, TrimVerifier, TRIM_SAFE, TRIM_UNSAFE
+using .TrimVerifier: get_verify_typeinf_trim, verify_print_error, CallMissing, CCallableMissing
+
+function sprint(f, args...)
+    return Base.sprint((io, f, args...) -> f(IOContext{IO}(io), args...), f, args...)
+end
+
+let infos = Any[]
+    errors, parents = get_verify_typeinf_trim(infos)
+    @test isempty(errors)
+    @test isempty(parents)
+end
+
+finalizer(@nospecialize(f), @nospecialize(o)) = Core.finalizer(f, o)
+
+let infos = typeinf_ext_toplevel(Any[Core.svec(Nothing, Tuple{typeof(finalizer), typeof(identity), Any})], [Base.get_world_counter()], TRIM_UNSAFE)
+    errors, parents = get_verify_typeinf_trim(infos)
+    @test !isempty(errors) # unresolvable finalizer
+
+    # the only error should be a CallMissing error for the Core.finalizer builtin
+    (warn, desc) = only(errors)
+    @test !warn
+    @test desc isa CallMissing
+    @test occursin("finalizer", desc.desc)
+    repr = sprint(verify_print_error, desc, parents)
+    @test occursin(
+        r"""^unresolved finalizer registered from statement \(Core.finalizer\)\(f::Any, o::Any\)::Nothing
+            Stacktrace:
+             \[1\] finalizer\(f::Any, o::Any\)""", repr)
+end
+
+# test that basic `cfunction` generation is allowed, when the dispatch target can be resolved
+make_cfunction() = @cfunction(+, Float64, (Int64,Int64))
+let infos = typeinf_ext_toplevel(Any[Core.svec(Ptr{Cvoid}, Tuple{typeof(make_cfunction)})], [Base.get_world_counter()], TRIM_UNSAFE)
+    errors, parents = get_verify_typeinf_trim(infos)
+    @test isempty(errors)
+end
+
+# use TRIM_UNSAFE to bypass verifier inside typeinf_ext_toplevel
+make_cfunction_bad(@nospecialize(f::Any)) = @cfunction($f, Float64, (Int64,Int64))::Base.CFunction
+let infos = typeinf_ext_toplevel(Any[Core.svec(Base.CFunction, Tuple{typeof(make_cfunction_bad), Any})], [Base.get_world_counter()], TRIM_UNSAFE)
+    errors, parents = get_verify_typeinf_trim(infos)
+    @test !isempty(errors) # missing cfunction
+
+    (is_warning, desc) = only(errors)
+    @test !is_warning
+    @test desc isa CallMissing
+    @test occursin("cfunction", desc.desc)
+    repr = sprint(verify_print_error, desc, parents)
+    @test occursin(r"""^unresolved cfunction from statement \$\(Expr\(:cfunction, Base.CFunction, :\(f::Any\), Float64, :\(svec\(Int64, Int64\)::Core.SimpleVector\), :\(:ccall\)\)\)::Base.CFunction
+            Stacktrace:
+             \[1\] make_cfunction_bad\(f::Any\)""", repr)
+    resize!(infos, 1)
+    @test infos[1] isa Core.SimpleVector && infos[1][1] isa Type && infos[1][2] isa Type
+    errors, parents = get_verify_typeinf_trim(infos)
+    desc = only(errors)
+    @test !desc.first
+    desc = desc.second
+    @test desc isa CCallableMissing
+    @test desc.rt == Base.CFunction
+    @test desc.sig == Tuple{typeof(make_cfunction_bad), Any}
+    @test occursin("unresolved ccallable", desc.desc)
+    repr = sprint(verify_print_error, desc, parents)
+    @test repr == "unresolved ccallable for Tuple{$(typeof(make_cfunction_bad)), Any} => Base.CFunction\n\n"
+end
+
+let infos = typeinf_ext_toplevel(Any[Core.svec(Base.SecretBuffer, Tuple{Type{Base.SecretBuffer}})], [Base.get_world_counter()], TRIM_UNSAFE)
+    @test length(infos) > 4
+    errors, parents = get_verify_typeinf_trim(infos)
+    @test isempty(errors)
+
+    resize!(infos, 1)
+    @test infos[1] isa Core.SimpleVector && infos[1][1] isa Type && infos[1][2] isa Type
+    errors, parents = get_verify_typeinf_trim(infos)
+    desc = only(errors)
+    @test !desc.first
+    desc = desc.second
+    @test desc isa CCallableMissing
+    @test desc.rt == Base.SecretBuffer
+    @test desc.sig == Tuple{Type{Base.SecretBuffer}}
+    @test occursin("unresolved ccallable", desc.desc)
+    repr = sprint(verify_print_error, desc, parents)
+    @test repr == "unresolved ccallable for Tuple{Type{Base.SecretBuffer}} => Base.SecretBuffer\n\n"
+end
+
+let infos = typeinf_ext_toplevel(Any[Core.svec(Float64, Tuple{typeof(+), Int32, Int64})], [Base.get_world_counter()], TRIM_UNSAFE)
+    errors, parents = get_verify_typeinf_trim(infos)
+    desc = only(errors)
+    @test !desc.first
+    desc = desc.second
+    @test desc isa CCallableMissing
+    @test desc.rt == Int64
+    @test desc.sig == Tuple{typeof(+), Int32, Int64}
+    @test occursin("ccallable declared return type", desc.desc)
+    repr = sprint(verify_print_error, desc, parents)
+    @test repr == "ccallable declared return type does not match inference for Tuple{typeof(+), Int32, Int64} => Int64\n\n"
+end
+
+let infos = typeinf_ext_toplevel(Any[Core.svec(Int64, Tuple{typeof(ifelse), Bool, Int64, UInt64})], [Base.get_world_counter()], TRIM_UNSAFE)
+    errors, parents = get_verify_typeinf_trim(infos)
+    desc = only(errors)
+    @test desc.first
+    desc = desc.second
+    @test desc isa CCallableMissing
+    @test occursin("ccallable declared return type", desc.desc)
+    repr = sprint(verify_print_error, desc, parents)
+    @test repr == "ccallable declared return type does not match inference for Tuple{typeof(ifelse), Bool, Int64, UInt64} => Union{Int64, UInt64}\n\n"
+end
+
+let infos = typeinf_ext_toplevel(Any[Core.svec(Union{Int64,UInt64}, Tuple{typeof(ifelse), Bool, Int64, UInt64})], [Base.get_world_counter()], TRIM_SAFE)
+    errors, parents = get_verify_typeinf_trim(infos)
+    @test isempty(errors)
+    infos = typeinf_ext_toplevel(Any[Core.svec(Real, Tuple{typeof(ifelse), Bool, Int64, UInt64})], [Base.get_world_counter()], TRIM_SAFE)
+    errors, parents = get_verify_typeinf_trim(infos)
+    @test isempty(errors)
+end
diff --git a/HISTORY.md b/HISTORY.md
index a4448497380c5..45667158be1f0 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,13 +1,594 @@
+Julia v1.12 Release Notes
+=========================
+
+New language features
+---------------------
+
+* New experimental option `--trim` that creates smaller binaries by removing code not proven to be reachable from
+  entry points. Entry points can be marked using `Base.Experimental.entrypoint` ([#55047]). Not all
+  code is expected to work with this option, and since it is experimental you may encounter problems.
+* Redefinition of constants is now well defined and follows world age semantics ([#57253]). Additional redefinitions
+  (e.g. of types) are now allowed. See [the new manual chapter on world age](https://docs.julialang.org/en/v1.13-dev/manual/worldage/).
+* A new keyword argument `usings::Bool` has been added to `names`, returning all names visible
+  via `using` ([#54609]).
+* The `@atomic` macro family now supports reference assignment syntax, e.g. `@atomic :monotonic v[3] += 4`,
+  which modifies `v[3]` atomically with monotonic ordering semantics ([#54707]).
+  The supported syntax allows
+  * atomic fetch (`x = @atomic v[3]`),
+  * atomic set (`@atomic v[3] = 4`),
+  * atomic modify (`@atomic v[3] += 2`),
+  * atomic set once (`@atomiconce v[3] = 2`),
+  * atomic swap (`x = @atomicswap v[3] = 2`), and
+  * atomic replace (`x = @atomicreplace v[3] 2=>5`).
+* New option `--task-metrics=yes` to enable the collection of per-task timing information,
+  which can also be enabled/disabled at runtime with `Base.Experimental.task_metrics(::Bool)` ([#56320]).
+  The available metrics are:
+  * actual running time for the task (`Base.Experimental.task_running_time_ns`), and
+  * wall-time for the task (`Base.Experimental.task_wall_time_ns`).
+* Support for Unicode 16 ([#56925]).
+* `Threads.@spawn` now takes a `:samepool` argument to specify the same threadpool as the caller.
+  `Threads.@spawn :samepool foo()` which is shorthand for `Threads.@spawn Threads.threadpool() foo()` ([#57109]).
+* The `@ccall` macro can now take a `gc_safe` argument, that if set to true allows the runtime to run garbage collection concurrently to the `ccall` ([#49933]).
+* A single method covering multiple functions is now allowed in more cases. See issue #54620. ([#58131]).
+
+Language changes
+----------------
+
+* When a method is replaced with an exactly equivalent one, the old method is not deleted. Instead, the
+  new method takes priority and becomes more specific than the old method. Thus if the new method is deleted
+  later, the old method will resume operating. This can be useful in mocking frameworks (as in SparseArrays,
+  Pluto, and Mocking, among others), as they do not need to explicitly restore the old method.
+  At this time, inference and compilation must be repeated in this situation, but we may eventually be
+  able to re-use the old results ([#53415]).
+* Macro expansion will no longer eagerly recurse into `Expr(:toplevel)` expressions returned from macros.
+  Instead, macro expansion of `:toplevel` expressions will be delayed until evaluation time. This allows a
+  later expression within a given `:toplevel` expression to make use of macros defined earlier in the same
+  `:toplevel` expression ([#53515]).
+* Trivial infinite loops (like `while true; end`) are no longer undefined behavior. Infinite loops that
+  do things (e.g. have side effects or sleep) were never and are still not undefined behavior ([#52999]).
+* It is now an error to mark a binding as both `public` and `export`ed ([#53664]).
+* Errors during `getfield` now raise a new `FieldError` exception type instead of the generic
+  `ErrorException` ([#54504]).
+* Macros in function-signature-position no longer require parentheses. E.g. `function @main(args) ... end` is now permitted, whereas `function (@main)(args) ... end` was required in prior Julia versions.
+* Calling `using` on a package name inside of that package of that name (especially relevant
+  for a submodule) now explicitly uses that package without examining the Manifest and
+  environment, which is identical to the behavior of `..Name`. This appears to better match
+  how users expect this to behave in the wild ([#57727]).
+
+Compiler/Runtime improvements
+-----------------------------
+
+* Generated LLVM IR now uses pointer types instead of passing pointers as integers.
+  This affects `llvmcall`: Inline LLVM IR should be updated to use `i8*` or `ptr` instead of
+  `i32` or `i64`, and remove unneeded `ptrtoint`/`inttoptr` conversions. For compatibility,
+  IR with integer pointers is still supported, but generates a deprecation warning ([#53687]).
+
+Command-line option changes
+---------------------------
+
+* The `-m/--module` flag can be passed to run the `main` function inside a package with a set of arguments.
+  This `main` function should be declared using `@main` to indicate that it is an entry point ([#52103]).
+* Enabling or disabling color text in Julia can now be controlled with the
+  [`NO_COLOR`](https://no-color.org/) or [`FORCE_COLOR`](https://force-color.org/) environment
+  variables. These variables are also honored by Julia's build system ([#53742], [#56346]).
+* `--project=@temp` starts Julia with a temporary environment ([#51149]).
+* New `--trace-compile-timing` option to report how long each method reported by `--trace-compile` took
+  to compile, in ms ([#54662]).
+* `--trace-compile` now prints recompiled methods in yellow or with a trailing comment if color is not
+  supported ([#55763]).
+* New `--trace-dispatch` option to report methods that are dynamically dispatched ([#55848]).
+
+Multi-threading changes
+-----------------------
+
+* Julia now defaults to 1 "interactive" thread, in addition to the 1 default "worker" thread. i.e. `-t1,1`.
+  This means in default configuration the main task and repl (when in interactive mode), which both run on
+  thread 1, now run within the `interactive` threadpool. The libuv IO loop also runs on thread 1,
+  helping efficient utilization of the worker threadpool used by `Threads.@spawn`. Asking for specifically 1 thread
+  (`-t1`/`JULIA_NUM_THREADS=1`) or `0` interactive threads will disable the interactive thread i.e. `-t1,0` or `JULIA_NUM_THREADS=1,0`
+  , or `-tauto,0` etc. Asking for more than 1 thread will enable the interactive thread so
+  `-t2` will set the equivalent of `-t2,1`. As a reminder, buffers
+  [should not be managed based on `threadid()`](https://docs.julialang.org/en/v1/manual/multi-threading/#Using-@threads-without-data-races) ([#57087]).
+* New types are defined to handle the pattern of code that must run once per process, called
+  a `OncePerProcess{T}` type, which allows defining a function that should be run exactly once
+  the first time it is called, and then always return the same result value of type `T`
+  every subsequent time afterwards. There are also `OncePerThread{T}` and `OncePerTask{T}` types for
+  similar usage with threads or tasks ([#55793]).
+
+Build system changes
+--------------------
+
+* There are new `Makefile`s to build Julia and LLVM using the Binary Optimization and Layout Tool (BOLT).
+  See `contrib/bolt` and `contrib/pgo-lto-bolt` ([#54107]).
+
+New library functions
+---------------------
+
+* `logrange(start, stop; length)` makes a range of constant ratio, instead of constant step ([#39071]).
+* The new `isfull(c::Channel)` function can be used to check if `put!(c, some_value)` will block ([#53159]).
+* `waitany(tasks; throw=false)` and `waitall(tasks; failfast=false, throw=false)` which wait for multiple tasks
+  at once ([#53341]).
+* `uuid7()` creates an RFC 9562 compliant UUID with version 7 ([#54834]).
+* `insertdims(array; dims)` inserts singleton dimensions into an array --- the inverse operation of
+  `dropdims` ([#45793]).
+* A new `Fix` type generalizes `Fix1/Fix2` for fixing a single argument ([#54653]).
+* `Sys.detectwsl()` tests whether Julia is running inside WSL at runtime ([#57069]).
+
+New library features
+--------------------
+
+* `escape_string` takes additional keyword arguments `ascii=true` (to escape all non-ASCII characters) and
+  `fullhex=true` (to require full 4/8-digit hex numbers for u/U escapes, e.g. for C compatibility) ([#55099]).
+* `tempname` can now take a suffix string to allow the file name to include a suffix and include that suffix in
+  the uniquing checking ([#53474]).
+* `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988]).
+* `Lockable` is now exported ([#54595]).
+* `Base.require_one_based_indexing` and `Base.has_offset_axes` are now public ([#56196]).
+* New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for
+  char widths ([#55351]).
+* `isless` (and thus `cmp`, sorting, etc.) is now supported for zero-dimensional `AbstractArray`s ([#55772]).
+* `invoke` now supports passing a `Method` instead of a type signature ([#56692]).
+* `invoke` now supports passing a `CodeInstance` instead of a type, which can enable certain compiler plugin
+  workflows ([#56660]).
+* `Timer(f, ...)` will now match the stickiness of the parent task when creating timer tasks, which can be
+  overridden by the new `spawn` keyword argument. This avoids the issue where sticky tasks (i.e. `@async`)
+  make their parent sticky ([#56745]).
+* `Timer` now has readable `timeout` and `interval` properties, and a more descriptive `show` method ([#57081]).
+* `sort` now supports `NTuple`s ([#54494]).
+* `map!(f, A)` now stores the results in `A`, like `map!(f, A, A)` or `A .= f.(A)` ([#40632]).
+* `setprecision` with a function argument (typically a `do` block) is now thread safe. Other forms
+  should be avoided, and types should switch to an implementation using `ScopedValue` ([#51362]).
+
+Standard library changes
+------------------------
+
+* `gcdx(0, 0)` now returns `(0, 0, 0)` instead of `(0, 1, 0)` ([#40989]).
+* `fd` returns a `RawFD` instead of an `Int` ([#55080]).
+
+#### JuliaSyntaxHighlighting
+
+* A new standard library for applying syntax highlighting to Julia code, this uses `JuliaSyntax` and
+  `StyledStrings` to implement a `highlight` function that creates an `AnnotatedString` with syntax highlighting
+  applied ([#51810]).
+
+#### LinearAlgebra
+
+* `rank` can now take a `QRPivoted` matrix to allow rank estimation via QR factorization ([#54283]).
+* Added keyword argument `alg` to `eigen`, `eigen!`, `eigvals` and `eigvals!` for self-adjoint matrix types
+  (i.e., the type union `RealHermSymComplexHerm`) that allows one to switch between different eigendecomposition
+  algorithms ([#49355]).
+* Added a generic version of the (unblocked) pivoted Cholesky decomposition (callable via
+  `cholesky[!](A, RowMaximum())`) ([#54619]).
+* The number of default BLAS threads now respects process affinity, instead of using the total number of logical
+  threads available on the system ([#55574]).
+* A new function `zeroslike` is added that generates the zero elements for matrix-valued banded matrices.
+  Custom array types may specialize this function to return an appropriate result ([#55252]).
+* The matrix multiplication `A * B` calls `matprod_dest(A, B, T::Type)` to generate the destination.
+  This function is now public ([#55537]).
+* The function `haszero(T::Type)` is used to check if a type `T` has a unique zero element defined as `zero(T)`.
+  This is now public ([#56223]).
+* A new function `diagview` is added that returns a view into a specific band of an `AbstractMatrix` ([#56175]).
+
+#### Profile
+
+* `Profile.take_heap_snapshot` takes a new keyword argument, `redact_data::Bool`, which is `true` by default.
+  When set, the contents of Julia objects are not emitted in the heap snapshot. This currently only applies to
+  strings ([#55326]).
+* `Profile.print()` now colors Base/Core/Package modules similarly to how they are in stacktraces.
+  Also paths, even if truncated, are now clickable in terminals that support URI links
+  to take you to the specified `JULIA_EDITOR` for the given file & line number ([#55335]).
+
+#### REPL
+
+* Using the new `usings=true` feature of the `names()` function, REPL completions can now
+  complete names visible via `using` ([#54610]).
+* REPL completions can now complete input lines like `[import|using] Mod: xxx|` e.g.
+  complete `using Base.Experimental: @op` to `using Base.Experimental: @opaque` ([#54719]).
+* The REPL will now warn if it detects a name is being accessed via a module which does not define it (nor has
+  a submodule which defines it), and for which the name is not public in that module. For example, `map` is
+  defined in Base, and executing `LinearAlgebra.map` in the REPL will now issue a warning the first time it
+  occurs ([#54872]).
+* When the result of a REPL input is printed, the output is now truncated to 20 KiB.
+  This does not affect manual calls to `show`, `print`, etc. ([#53959]).
+* Backslash completions now print the respective glyph or emoji next to each matching backslash shortcode ([#54800]).
+
+#### Test
+
+* A failing `DefaultTestSet` now prints to screen the random number generator (RNG) of the failed test, to help
+  reproducing a stochastic failure which only depends on the state of the RNG.
+  It is also possible seed a test set by passing the `rng` keyword argument to `@testset`:
+  ```julia
+  using Test, Random
+  @testset rng=Xoshiro(0x2e026445595ed28e, 0x07bb81ac4c54926d, 0x83d7d70843e8bad6, 0xdbef927d150af80b, 0xdbf91ddf2534f850) begin
+      @test rand() == 0.559472630416976
+  end
+  ```
+
+#### InteractiveUtils
+
+* New macros `@trace_compile` and `@trace_dispatch` for running an expression with
+  `--trace-compile=stderr --trace-compile-timing` and `--trace-dispatch=stderr` respectively enabled ([#55915]).
+
+External dependencies
+---------------------
+
+* The terminal info database, `terminfo`, is now vendored by default, providing a better
+  REPL user experience when `terminfo` is not available on the system. Julia can be built
+  without vendoring the database using the Makefile option `WITH_TERMINFO=0` ([#55411]).
+
+Tooling Improvements
+--------------------
+
+* A wall-time profiler is now available for users who need a sampling profiler that captures tasks regardless
+  of their scheduling or running state. This type of profiler enables profiling of I/O-heavy tasks and helps
+  detect areas of heavy contention in the system ([#55889]).
+
+<!--- generated by NEWS-update.jl: -->
+[#39071]: https://github.com/JuliaLang/julia/issues/39071
+[#40632]: https://github.com/JuliaLang/julia/issues/40632
+[#40989]: https://github.com/JuliaLang/julia/issues/40989
+[#45793]: https://github.com/JuliaLang/julia/issues/45793
+[#49355]: https://github.com/JuliaLang/julia/issues/49355
+[#50988]: https://github.com/JuliaLang/julia/issues/50988
+[#51149]: https://github.com/JuliaLang/julia/issues/51149
+[#51810]: https://github.com/JuliaLang/julia/issues/51810
+[#52103]: https://github.com/JuliaLang/julia/issues/52103
+[#52999]: https://github.com/JuliaLang/julia/issues/52999
+[#53159]: https://github.com/JuliaLang/julia/issues/53159
+[#53341]: https://github.com/JuliaLang/julia/issues/53341
+[#53415]: https://github.com/JuliaLang/julia/issues/53415
+[#53474]: https://github.com/JuliaLang/julia/issues/53474
+[#53515]: https://github.com/JuliaLang/julia/issues/53515
+[#53664]: https://github.com/JuliaLang/julia/issues/53664
+[#53687]: https://github.com/JuliaLang/julia/issues/53687
+[#53742]: https://github.com/JuliaLang/julia/issues/53742
+[#53959]: https://github.com/JuliaLang/julia/issues/53959
+[#54107]: https://github.com/JuliaLang/julia/issues/54107
+[#54283]: https://github.com/JuliaLang/julia/issues/54283
+[#54494]: https://github.com/JuliaLang/julia/issues/54494
+[#54504]: https://github.com/JuliaLang/julia/issues/54504
+[#54595]: https://github.com/JuliaLang/julia/issues/54595
+[#54609]: https://github.com/JuliaLang/julia/issues/54609
+[#54610]: https://github.com/JuliaLang/julia/issues/54610
+[#54619]: https://github.com/JuliaLang/julia/issues/54619
+[#54653]: https://github.com/JuliaLang/julia/issues/54653
+[#54662]: https://github.com/JuliaLang/julia/issues/54662
+[#54707]: https://github.com/JuliaLang/julia/issues/54707
+[#54719]: https://github.com/JuliaLang/julia/issues/54719
+[#54800]: https://github.com/JuliaLang/julia/issues/54800
+[#54834]: https://github.com/JuliaLang/julia/issues/54834
+[#54872]: https://github.com/JuliaLang/julia/issues/54872
+[#55047]: https://github.com/JuliaLang/julia/issues/55047
+[#55080]: https://github.com/JuliaLang/julia/issues/55080
+[#55099]: https://github.com/JuliaLang/julia/issues/55099
+[#55252]: https://github.com/JuliaLang/julia/issues/55252
+[#55326]: https://github.com/JuliaLang/julia/issues/55326
+[#55335]: https://github.com/JuliaLang/julia/issues/55335
+[#55351]: https://github.com/JuliaLang/julia/issues/55351
+[#55411]: https://github.com/JuliaLang/julia/issues/55411
+[#55537]: https://github.com/JuliaLang/julia/issues/55537
+[#55574]: https://github.com/JuliaLang/julia/issues/55574
+[#55763]: https://github.com/JuliaLang/julia/issues/55763
+[#55772]: https://github.com/JuliaLang/julia/issues/55772
+[#55793]: https://github.com/JuliaLang/julia/issues/55793
+[#55848]: https://github.com/JuliaLang/julia/issues/55848
+[#55889]: https://github.com/JuliaLang/julia/issues/55889
+[#55915]: https://github.com/JuliaLang/julia/issues/55915
+[#56175]: https://github.com/JuliaLang/julia/issues/56175
+[#56196]: https://github.com/JuliaLang/julia/issues/56196
+[#56223]: https://github.com/JuliaLang/julia/issues/56223
+[#56320]: https://github.com/JuliaLang/julia/issues/56320
+[#56346]: https://github.com/JuliaLang/julia/issues/56346
+[#56660]: https://github.com/JuliaLang/julia/issues/56660
+[#56692]: https://github.com/JuliaLang/julia/issues/56692
+[#56745]: https://github.com/JuliaLang/julia/issues/56745
+[#56925]: https://github.com/JuliaLang/julia/issues/56925
+[#57069]: https://github.com/JuliaLang/julia/issues/57069
+[#57081]: https://github.com/JuliaLang/julia/issues/57081
+[#57087]: https://github.com/JuliaLang/julia/issues/57087
+[#57109]: https://github.com/JuliaLang/julia/issues/57109
+[#57253]: https://github.com/JuliaLang/julia/issues/57253
+
+Julia v1.11 Release Notes
+=========================
+
+New language features
+---------------------
+* `public` is a new keyword. Symbols marked with `public` are considered public
+  API. Symbols marked with `export` are now also treated as public API. The
+  difference between `public` and `export` is that `public` names do not become
+  available when `using` a package/module ([#50105]).
+* `ScopedValue` implements dynamic scope with inheritance across tasks ([#50958]).
+* The new macro `Base.Cartesian.@ncallkw` is analogous to `Base.Cartesian.@ncall`,
+  but allows to add keyword arguments to the function call ([#51501]).
+* Support for Unicode 15.1 ([#51799]).
+* `Manifest.toml` files can now be renamed in the format `Manifest-v{major}.{minor}.toml`
+  to be preferentially picked up by the given julia version. i.e. in the same folder,
+  a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by every other julia
+  version. This makes managing environments for multiple julia versions at the same time
+  easier ([#43845]).
+* `@time` now reports a count of any lock conflicts where a `ReentrantLock` had to wait, plus a new macro
+  `@lock_conflicts` which returns that count ([#52883]).
+
+Language changes
+----------------
+* During precompilation, the `atexit` hooks now run before saving the output file. This
+  allows users to safely tear down background state (such as closing Timers and sending
+  disconnect notifications to heartbeat tasks) and cleanup other resources when the program
+  wants to begin exiting.
+* Code coverage and malloc tracking is no longer generated during the package precompilation stage.
+  Further, during these modes pkgimage caches are now used for packages that are not being tracked.
+  This means that coverage testing (the default for `julia-actions/julia-runtest`) will by default use
+  pkgimage caches for all other packages than the package being tested, likely meaning faster test
+  execution. ([#52123])
+
+* Specifying a path in `JULIA_DEPOT_PATH` now results in the expansion of empty strings to
+  omit the default user depot ([#51448]).
+
+Compiler/Runtime improvements
+-----------------------------
+* Updated GC heuristics to count allocated pages instead of individual objects ([#50144]).
+* A new `LazyLibrary` type is exported from `Libdl` for use in building chained lazy library
+  loads, primarily to be used within JLLs ([#50074]).
+* Added support for annotating `Base.@assume_effects` on code blocks ([#52400]).
+* The libuv library has been updated from a base of v1.44.2 to v1.48.0 ([#49937]).
+
+Command-line option changes
+---------------------------
+
+* The entry point for Julia has been standardized to `Main.main(Base.ARGS)`. This must be explicitly opted into using the `@main` macro
+(see the docstring for further details). When opted-in, and julia is invoked to run a script or expression
+(i.e. using `julia script.jl` or `julia -e expr`), julia will subsequently run the `Main.main` function automatically.
+This is intended to unify script and compilation workflows, where code loading may happen
+in the compiler and execution of `Main.main` may happen in the resulting executable. For interactive use, there is no semantic
+difference between defining a `main` function and executing the code directly at the end of the script ([50974]).
+* The `--compiled-modules` and `--pkgimages` flags can now be set to `existing`, which will
+  cause Julia to consider loading existing cache files, but not to create new ones ([#50586]
+  and [#52573]).
+* The `--project` argument now accepts `@script` to give a path to a directory with a Project.toml relative to the passed script file. `--project=@script/foo` for the `foo` subdirectory. If no path is given after (i.e. `--project=@script`) then (like `--project=@.`) the directory and its parents are searched for a Project.toml ([#50864] and [#53352])
+
+Multi-threading changes
+-----------------------
+
+* `Threads.@threads` now supports the `:greedy` scheduler, intended for non-uniform workloads ([#52096]).
+* A new public (but unexported) struct `Base.Lockable{T, L<:AbstractLock}` makes it easy to bundle a resource and its lock together ([#52898]).
+
+Build system changes
+--------------------
+
+* There is a new `Makefile` to build Julia and LLVM using the profile-guided and link-time optimizations (PGO and LTO) strategies, see `contrib/pgo-lto/Makefile` ([#45641]).
+
+New library functions
+---------------------
+
+* `in!(x, s::AbstractSet)` will return whether `x` is in `s`, and insert `x` in `s` if not.
+* The new `Libc.mkfifo` function wraps the `mkfifo` C function on Unix platforms ([#34587]).
+* `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]).
+* `eachrsplit(string, pattern)` iterates split substrings right to left.
+* `Sys.username()` can be used to return the current user's username ([#51897]).
+* `GC.logging_enabled()` can be used to test whether GC logging has been enabled via `GC.enable_logging` ([#51647]).
+* `IdSet` is now exported from Base and considered public ([#53262]).
+
+New library features
+--------------------
+
+* `invmod(n, T)` where `T` is a native integer type now computes the modular inverse of `n` in the modular integer ring that `T` defines ([#52180]).
+* `invmod(n)` is an abbreviation for `invmod(n, typeof(n))` for native integer types ([#52180]).
+* `replace(string, pattern...)` now supports an optional `IO` argument to
+  write the output to a stream rather than returning a string ([#48625]).
+* New methods `allequal(f, itr)` and `allunique(f, itr)` taking a predicate function ([#47679]).
+* `sizehint!(s, n)` now supports an optional `shrink` argument to disable shrinking ([#51929]).
+* New function `Docs.hasdoc(module, symbol)` tells whether a name has a docstring ([#52139]).
+* New function `Docs.undocumented_names(module)` returns a module's undocumented public names ([#52413]).
+* Passing an `IOBuffer` as a stdout argument for `Process` spawn now works as
+  expected, synchronized with `wait` or `success`, so a `Base.BufferStream` is
+  no longer required there for correctness to avoid data races ([#52461]).
+* After a process exits, `closewrite` will no longer be automatically called on
+  the stream passed to it. Call `wait` on the process instead to ensure the
+  content is fully written, then call `closewrite` manually to avoid
+  data-races. Or use the callback form of `open` to have all that handled
+  automatically.
+* `@timed` now additionally returns the elapsed compilation and recompilation time ([#52889])
+* `filter` can now act on a `NamedTuple` ([#50795]).
+* `Iterators.cycle(iter, n)` runs over `iter` a fixed number of times, instead of forever ([#47354])
+* `zero(::AbstractArray)` now applies recursively, so `zero([[1,2],[3,4,5]])` now produces the additive identity `[[0,0],[0,0,0]]` rather than erroring ([#38064]).
+
+Standard library changes
+------------------------
+
+* It's not possible to define `length` for stateful iterators in a generally consistent manner. The
+  potential for silently incorrect results for `Stateful` iterators is addressed by deleting the
+  `length(::Stateful)` method. The last type parameter of `Stateful` is gone, too. Issue: ([#47790]),
+  PR: ([#51747]).
+
+#### StyledStrings
+
+* A new experimental standard library for handling styling in a more comprehensive and structured way ([#49586]).
+* Three new types around the idea of text with "annotations" (`Pair{Symbol, Any}`
+  entries, e.g. `:lang => "en"` or `:face => :magenta`). These annotations
+  are preserved across operations (e.g. string concatenation with `*`) when
+  possible.
+  * `AnnotatedString` is a new `AbstractString` type. It wraps an underlying
+    string and allows for annotations to be attached to regions of the string.
+    This type is used extensively to hold styling information.
+  * `AnnotatedChar` is a new `AbstractChar` type. It wraps another char and
+    holds a list of annotations that apply to it.
+  * `AnnotatedIOBuffer` is a new `IO` type that mimics an `IOBuffer`, but has
+    specialised `read`/`write` methods for annotated content. This can be
+    thought of both as a "string builder" of sorts and also as glue between
+    annotated and unannotated content.
+* The new `Faces` struct serves as a container for text styling information
+  (think typeface, as well as color and decoration), and comes with a framework
+  to provide a convenient, extensible (via `addface!`), and customisable (with a
+  user's `Faces.toml` and `loadfaces!`) approach to
+  styled content ([#49586]).
+* The new `@styled_str` string macro provides a convenient way of creating a
+  `AnnotatedString` with various faces or other attributes applied ([#49586]).
+
+#### Package Manager
+* It is now possible to specify "sources" for packages in a `[sources]` section in Project.toml.
+  This can be used to add non-registered normal or test dependencies.
+* Pkg now obeys `[compat]` bounds for `julia` and raises an error if the version of the running Julia binary is incompatible with the bounds in `Project.toml`.
+  Pkg has always obeyed this compat when working with Registry packages. This change affects mostly local packages
+* `pkg> add` and `Pkg.add` will now add compat entries for new direct dependencies if the active environment is a
+  package (has a `name` and `uuid` entry).
+* Dependencies can now be directly added as weak deps or extras via the `pkg> add --weak/extra Foo` or
+  `Pkg.add("Foo", target=:weakdeps/:extras)` forms.
+
+#### LinearAlgebra
+* `cbrt(::AbstractMatrix{<:Real})` is now defined and returns real-valued matrix cube roots of real-valued matrices ([#50661]).
+* `eigvals/eigen(A, bunchkaufman(B))` and `eigvals/eigen(A, lu(B))`, which utilize the Bunchkaufman (LDL) and LU decomposition of `B`,
+   respectively, now efficiently compute the generalized eigenvalues (`eigen`: and eigenvectors) of `A` and `B`. Note: The second
+   argument is the output of `bunchkaufman` or `lu` ([#50471]).
+* There is now a specialized dispatch for `eigvals/eigen(::Hermitian{<:Tridiagonal})` which performs a similarity transformation to create a real symmetrix triagonal matrix, and solve that using the LAPACK routines ([#49546]).
+* Structured matrices now retain either the axes of the parent (for `Symmetric`/`Hermitian`/`AbstractTriangular`/`UpperHessenberg`), or that of the principal diagonal (for banded matrices) ([#52480]).
+* `bunchkaufman` and `bunchkaufman!` now work for any `AbstractFloat`, `Rational` and their complex variants. `bunchkaufman` now supports `Integer` types, by making an internal conversion to `Rational{BigInt}`. Added new function `inertia` that computes the inertia of the diagonal factor given by the `BunchKaufman` factorization object of a real symmetric or Hermitian matrix. For complex symmetric matrices, `inertia` only computes the number of zero eigenvalues of the diagonal factor ([#51487]).
+* Packages that specialize matrix-matrix `mul!` with a method signature of the form `mul!(::AbstractMatrix, ::MyMatrix, ::AbstractMatrix, ::Number, ::Number)` no longer encounter method ambiguities when interacting with `LinearAlgebra`. Previously, ambiguities used to arise when multiplying a `MyMatrix` with a structured matrix type provided by LinearAlgebra, such as `AbstractTriangular`, which used to necessitate additional methods to resolve such ambiguities. Similar sources of ambiguities have also been removed for matrix-vector `mul!` operations ([#52837]).
+* `lu` and `issuccess(::LU)` now accept an `allowsingular` keyword argument. When set to `true`, a valid factorization with rank-deficient U factor will be treated as success instead of throwing an error. Such factorizations are now shown by printing the factors together with a "rank-deficient" note rather than printing a "Failed Factorization" message ([#52957]).
+
+#### Logging
+
+#### Printf
+
+#### Profile
+
+#### Random
+* `rand` now supports sampling over `Tuple` types ([#35856], [#50251]).
+* `rand` now supports sampling over `Pair` types ([#28705]).
+* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]).
+* Seedable random number generators from `Random` can now be seeded by a string, e.g.
+  `seed!(rng, "a random seed")` ([#51527]).
+
+#### REPL
+
+* Tab complete hints now show in lighter text while typing in the repl. To disable
+  set `Base.active_repl.options.hint_tab_completes = false` interactively, or in startup.jl:
+  ```
+  if VERSION >= v"1.11.0-0"
+    atreplinit() do repl
+        repl.options.hint_tab_completes = false
+    end
+  end
+  ``` ([#51229]).
+* Meta-M with an empty prompt now toggles the contextual module between the previous non-Main
+  contextual module and Main so that switching back and forth is simple. ([#51616], [#52670])
+
+#### SuiteSparse
+
+
+#### SparseArrays
+
+#### Test
+
+#### Dates
+
+The undocumented function `adjust` is no longer exported but is now documented
+
+#### Statistics
+
+* Statistics is now an upgradeable standard library ([#46501]).
+
+#### Distributed
+
+* `pmap` now defaults to using a `CachingPool` ([#33892]).
+
+#### Unicode
+
+
+#### DelimitedFiles
+
+
+#### InteractiveUtils
+
+Deprecated or removed
+---------------------
+
+* `Base.map`, `Iterators.map`, and `foreach` lost their single-argument methods ([#52631]).
+
+
+External dependencies
+---------------------
+* `tput` is no longer called to check terminal capabilities, it has been replaced with a pure-Julia terminfo parser ([#50797]).
+
+Tooling Improvements
+--------------------
+
+* CI now performs limited automatic typo detection on all PRs. If you merge a PR with a
+  failing typo CI check, then the reported typos will be automatically ignored in future CI
+  runs on PRs that edit those same files ([#51704]).
+
+<!--- generated by NEWS-update.jl: -->
+[#28705]: https://github.com/JuliaLang/julia/issues/28705
+[#33892]: https://github.com/JuliaLang/julia/issues/33892
+[#34587]: https://github.com/JuliaLang/julia/issues/34587
+[#35856]: https://github.com/JuliaLang/julia/issues/35856
+[#38064]: https://github.com/JuliaLang/julia/issues/38064
+[#43845]: https://github.com/JuliaLang/julia/issues/43845
+[#45641]: https://github.com/JuliaLang/julia/issues/45641
+[#46501]: https://github.com/JuliaLang/julia/issues/46501
+[#47354]: https://github.com/JuliaLang/julia/issues/47354
+[#47679]: https://github.com/JuliaLang/julia/issues/47679
+[#47790]: https://github.com/JuliaLang/julia/issues/47790
+[#48273]: https://github.com/JuliaLang/julia/issues/48273
+[#48625]: https://github.com/JuliaLang/julia/issues/48625
+[#49546]: https://github.com/JuliaLang/julia/issues/49546
+[#49586]: https://github.com/JuliaLang/julia/issues/49586
+[#49937]: https://github.com/JuliaLang/julia/issues/49937
+[#50074]: https://github.com/JuliaLang/julia/issues/50074
+[#50105]: https://github.com/JuliaLang/julia/issues/50105
+[#50144]: https://github.com/JuliaLang/julia/issues/50144
+[#50251]: https://github.com/JuliaLang/julia/issues/50251
+[#50471]: https://github.com/JuliaLang/julia/issues/50471
+[#50586]: https://github.com/JuliaLang/julia/issues/50586
+[#50661]: https://github.com/JuliaLang/julia/issues/50661
+[#50795]: https://github.com/JuliaLang/julia/issues/50795
+[#50797]: https://github.com/JuliaLang/julia/issues/50797
+[#50864]: https://github.com/JuliaLang/julia/issues/50864
+[#50958]: https://github.com/JuliaLang/julia/issues/50958
+[#51229]: https://github.com/JuliaLang/julia/issues/51229
+[#51416]: https://github.com/JuliaLang/julia/issues/51416
+[#51448]: https://github.com/JuliaLang/julia/issues/51448
+[#51487]: https://github.com/JuliaLang/julia/issues/51487
+[#51501]: https://github.com/JuliaLang/julia/issues/51501
+[#51527]: https://github.com/JuliaLang/julia/issues/51527
+[#51616]: https://github.com/JuliaLang/julia/issues/51616
+[#51647]: https://github.com/JuliaLang/julia/issues/51647
+[#51704]: https://github.com/JuliaLang/julia/issues/51704
+[#51747]: https://github.com/JuliaLang/julia/issues/51747
+[#51799]: https://github.com/JuliaLang/julia/issues/51799
+[#51897]: https://github.com/JuliaLang/julia/issues/51897
+[#51929]: https://github.com/JuliaLang/julia/issues/51929
+[#52096]: https://github.com/JuliaLang/julia/issues/52096
+[#52123]: https://github.com/JuliaLang/julia/issues/52123
+[#52139]: https://github.com/JuliaLang/julia/issues/52139
+[#52180]: https://github.com/JuliaLang/julia/issues/52180
+[#52400]: https://github.com/JuliaLang/julia/issues/52400
+[#52413]: https://github.com/JuliaLang/julia/issues/52413
+[#52461]: https://github.com/JuliaLang/julia/issues/52461
+[#52480]: https://github.com/JuliaLang/julia/issues/52480
+[#52573]: https://github.com/JuliaLang/julia/issues/52573
+[#52631]: https://github.com/JuliaLang/julia/issues/52631
+[#52670]: https://github.com/JuliaLang/julia/issues/52670
+[#52837]: https://github.com/JuliaLang/julia/issues/52837
+[#52883]: https://github.com/JuliaLang/julia/issues/52883
+[#52889]: https://github.com/JuliaLang/julia/issues/52889
+[#52898]: https://github.com/JuliaLang/julia/issues/52898
+[#52957]: https://github.com/JuliaLang/julia/issues/52957
+[#53262]: https://github.com/JuliaLang/julia/issues/53262
+[#53352]: https://github.com/JuliaLang/julia/issues/53352
+
+
 Julia v1.10 Release Notes
 =========================
+
 New language features
 ---------------------
 
 * JuliaSyntax.jl is now used as the default parser, providing better diagnostics and faster
-  parsing. Set environment variable `JULIA_USE_NEW_PARSER` to `0` to switch back to the old
+  parsing. Set environment variable `JULIA_USE_FLISP_PARSER` to `1` to switch back to the old
   parser if necessary (and if you find this necessary, please file an issue) ([#46372]).
-* `⥺` (U+297A, `\leftarrowsubset`) and `⥷` (U+2977, `\leftarrowless`)
-  may now be used as binary operators with arrow precedence. ([#45962])
+* `⥺` (U+297A, `\leftarrowsubset`) and `⥷` (U+2977, `\leftarrowless`) may now be used as
+  binary operators with arrow precedence ([#45962]).
 
 Language changes
 ----------------
@@ -20,58 +601,73 @@ Language changes
   that significantly improves load and inference times for heavily overloaded methods that
   dispatch on Types (such as traits and constructors).
 * The "h bar" `ℏ` (`\hslash` U+210F) character is now treated as equivalent to `ħ` (`\hbar` U+0127).
-* The `@simd` macro now has a more limited and clearer semantics, it only enables reordering and contraction
+* The `@simd` macro now has more limited and clearer semantics: it only enables reordering and contraction
   of floating-point operations, instead of turning on all "fastmath" optimizations.
   If you observe performance regressions due to this change, you can recover previous behavior with `@fastmath @simd`,
-  if you are OK with all the optimizations enabled by the `@fastmath` macro. ([#49405])
+  if you are OK with all the optimizations enabled by the `@fastmath` macro ([#49405]).
 * When a method with keyword arguments is displayed in the stack trace view, the textual
-  representation of the keyword arguments' types is simplified using the new
+  representation of the keyword arguments' type is simplified using the new
   `@Kwargs{key1::Type1, ...}` macro syntax ([#49959]).
 
 Compiler/Runtime improvements
 -----------------------------
 
-* The `@pure` macro is now deprecated. Use `Base.@assume_effects :foldable` instead ([#48682]).
-* The mark phase of the Garbage Collector is now multi-threaded ([#48600]).
+* Updated GC heuristics to count allocated pages instead of object sizes ([#50144]). This should help
+  some programs that consumed excessive memory before.
+* The mark phase of the garbage collector is now multi-threaded ([#48600]).
 * [JITLink](https://llvm.org/docs/JITLink.html) is enabled by default on Linux aarch64 when Julia is linked to LLVM 15 or later versions ([#49745]).
   This should resolve many segmentation faults previously observed on this platform.
+* The precompilation process now uses pidfile locks and orchestrates multiple julia processes to only have one process
+  spend effort precompiling while the others wait. Previously all would do the work and race to overwrite the cache files.
+  ([#49052])
 
 Command-line option changes
 ---------------------------
 
-* New option `--gcthreads` to set how many threads will be used by the Garbage Collector ([#48600]).
-  The default is set to `N/2` where `N` is the amount of worker threads (`--threads`) used by Julia.
+* New option `--gcthreads` to set how many threads will be used by the garbage collector ([#48600]).
+  The default is `N/2` where `N` is the number of worker threads (`--threads`) used by Julia.
+
+Build system changes
+--------------------
+
+* SparseArrays and SuiteSparse are no longer included in the default system image, so the core
+  language no longer contains GPL libraries. However, these libraries are still included
+  alongside the language in the standard binary distribution ([#44247], [#48979], [#49266]).
 
 New library functions
 ---------------------
-* `tanpi` is now defined. It computes tan(πx) more accurately than `tan(pi*x)` ([#48575]).
+
+* `tanpi` is now defined. It computes tan(π*x) more accurately than `tan(pi*x)` ([#48575]).
 * `fourthroot(x)` is now defined in `Base.Math` and can be used to compute the fourth root of `x`.
    It can also be accessed using the unicode character `∜`, which can be typed by `\fourthroot<tab>` ([#48899]).
 * `Libc.memmove`, `Libc.memset`, and `Libc.memcpy` are now defined, whose functionality matches that of their respective C calls.
-* `Base.isprecompiled(pkg::PkgId)` to identify whether a package has already been precompiled ([#50218]).
+* `Base.isprecompiled(pkg::PkgId)` has been added, to identify whether a package has already been precompiled ([#50218]).
 
 New library features
 --------------------
-* The `initialized=true` keyword assignment for `sortperm!` and `partialsortperm!`
-  is now a no-op ([#47979]). It previously exposed unsafe behavior ([#47977]).
+
 * `binomial(x, k)` now supports non-integer `x` ([#48124]).
 * A `CartesianIndex` is now treated as a "scalar" for broadcasting ([#47044]).
 * `printstyled` now supports italic output ([#45164]).
-* `parent` and `parentindices` support `SubString`s
+* `parent` and `parentindices` support `SubString`s.
+* `replace(string, pattern...)` now supports an optional `IO` argument to
+  write the output to a stream rather than returning a string ([#48625]).
+* `startswith` now supports seekable `IO` streams ([#43055]).
 
 Standard library changes
 ------------------------
 
-* `startswith` now supports seekable `IO` streams ([#43055])
-* printing integral `Rational`s will skip the denominator in `Rational`-typed IO context (e.g. in `Arrays`) ([#45396])
+* The `initialized=true` keyword assignment for `sortperm!` and `partialsortperm!`
+  is now a no-op ([#47979]). It previously exposed unsafe behavior ([#47977]).
+* Printing integral `Rational`s will skip the denominator in `Rational`-typed IO context (e.g. in arrays) ([#45396]).
 
 #### Package Manager
 
-* `Pkg.precompile` now accepts `timing` as a keyword argument which displays per package timing information for precompilation (e.g. `Pkg.precompile(timing=true)`)
+* `Pkg.precompile` now accepts `timing` as a keyword argument which displays per package timing information for precompilation (e.g. `Pkg.precompile(timing=true)`).
 
 #### LinearAlgebra
 
-* `AbstractQ` no longer subtypes to `AbstractMatrix`. Moreover, `adjoint(Q::AbstractQ)`
+* `AbstractQ` no longer subtypes `AbstractMatrix`. Moreover, `adjoint(Q::AbstractQ)`
   no longer wraps `Q` in an `Adjoint` type, but instead in an `AdjointQ`, that itself
   subtypes `AbstractQ`. This change accounts for the fact that typically `AbstractQ`
   instances behave like function-based, matrix-backed linear operators, and hence don't
@@ -82,19 +678,20 @@ Standard library changes
   ([#46196]).
 * Adjoints and transposes of `Factorization` objects are no longer wrapped in `Adjoint`
   and `Transpose` wrappers, respectively. Instead, they are wrapped in
-  `AdjointFactorization` and `TranposeFactorization` types, which themselves subtype
+  `AdjointFactorization` and `TransposeFactorization` types, which themselves subtype
   `Factorization` ([#46874]).
 * New functions `hermitianpart` and `hermitianpart!` for extracting the Hermitian
   (real symmetric) part of a matrix ([#31836]).
 * The `norm` of the adjoint or transpose of an `AbstractMatrix` now returns the norm of the
   parent matrix by default, matching the current behaviour for `AbstractVector`s ([#49020]).
 * `eigen(A, B)` and `eigvals(A, B)`, where one of `A` or `B` is symmetric or Hermitian,
-  are now fully supported ([#49533])
+  are now fully supported ([#49533]).
 * `eigvals/eigen(A, cholesky(B))` now computes the generalized eigenvalues (`eigen`: and eigenvectors)
   of `A` and `B` via Cholesky decomposition for positive definite `B`. Note: The second argument is
   the output of `cholesky`.
 
 #### Printf
+
 * Format specifiers now support dynamic width and precision, e.g. `%*s` and `%*.*g` ([#40105]).
 
 #### REPL
@@ -105,13 +702,54 @@ Standard library changes
 #### Test
 
 * The `@test_broken` macro (or `@test` with `broken=true`) now complains if the test expression returns a
-  non-boolean value in the same way as a non-broken test. ([#47804])
-* When a call to `@test` fails or errors inside a function, a larger stacktrace is now printed such that the location of the test within a `@testset` can be retrieved ([#49451])
+  non-boolean value in the same way as a non-broken test ([#47804]).
+* When a call to `@test` fails or errors inside a function, a larger stacktrace is now printed such that the location of the  test within a `@testset` can be retrieved ([#49451]).
 
 #### InteractiveUtils
 
- * `code_native` and `@code_native` now default to intel syntax instead of AT&T.
- * `@time_imports` now shows the timing of any module `__init__()`s that are run ([#49529])
+* `code_native` and `@code_native` now default to intel syntax instead of AT&T.
+* `@time_imports` now shows the timing of any module `__init__()`s that are run ([#49529]).
+
+Deprecated or removed
+---------------------
+
+* The `@pure` macro is now deprecated. Use `Base.@assume_effects :foldable` instead ([#48682]).
+
+<!--- generated by NEWS-update.jl: -->
+[#31836]: https://github.com/JuliaLang/julia/issues/31836
+[#40105]: https://github.com/JuliaLang/julia/issues/40105
+[#43055]: https://github.com/JuliaLang/julia/issues/43055
+[#44247]: https://github.com/JuliaLang/julia/issues/44247
+[#45164]: https://github.com/JuliaLang/julia/issues/45164
+[#45396]: https://github.com/JuliaLang/julia/issues/45396
+[#45962]: https://github.com/JuliaLang/julia/issues/45962
+[#46196]: https://github.com/JuliaLang/julia/issues/46196
+[#46372]: https://github.com/JuliaLang/julia/issues/46372
+[#46874]: https://github.com/JuliaLang/julia/issues/46874
+[#47044]: https://github.com/JuliaLang/julia/issues/47044
+[#47804]: https://github.com/JuliaLang/julia/issues/47804
+[#47977]: https://github.com/JuliaLang/julia/issues/47977
+[#47979]: https://github.com/JuliaLang/julia/issues/47979
+[#48124]: https://github.com/JuliaLang/julia/issues/48124
+[#48575]: https://github.com/JuliaLang/julia/issues/48575
+[#48600]: https://github.com/JuliaLang/julia/issues/48600
+[#48625]: https://github.com/JuliaLang/julia/issues/48625
+[#48682]: https://github.com/JuliaLang/julia/issues/48682
+[#48899]: https://github.com/JuliaLang/julia/issues/48899
+[#48979]: https://github.com/JuliaLang/julia/issues/48979
+[#49020]: https://github.com/JuliaLang/julia/issues/49020
+[#49052]: https://github.com/JuliaLang/julia/issues/49052
+[#49110]: https://github.com/JuliaLang/julia/issues/49110
+[#49266]: https://github.com/JuliaLang/julia/issues/49266
+[#49405]: https://github.com/JuliaLang/julia/issues/49405
+[#49451]: https://github.com/JuliaLang/julia/issues/49451
+[#49529]: https://github.com/JuliaLang/julia/issues/49529
+[#49533]: https://github.com/JuliaLang/julia/issues/49533
+[#49745]: https://github.com/JuliaLang/julia/issues/49745
+[#49795]: https://github.com/JuliaLang/julia/issues/49795
+[#49959]: https://github.com/JuliaLang/julia/issues/49959
+[#50144]: https://github.com/JuliaLang/julia/issues/50144
+[#50218]: https://github.com/JuliaLang/julia/issues/50218
 
 Julia v1.9 Release Notes
 ========================
@@ -245,8 +883,7 @@ Standard library changes
 
 #### REPL
 
-* `Alt-e` now opens the current input in an editor. The content (if modified) will be executed
-  upon exiting the editor ([#33759]).
+* `Alt-e` now opens the current input in an editor ([#33759]).
 * The contextual module which is active in the REPL can be changed (it is `Main` by default),
   via the `REPL.activate(::Module)` function or via typing the module in the REPL and pressing
   the keybinding Alt-m ([#33872]).
@@ -325,11 +962,13 @@ Tooling Improvements
 [#42902]: https://github.com/JuliaLang/julia/issues/42902
 [#43270]: https://github.com/JuliaLang/julia/issues/43270
 [#43334]: https://github.com/JuliaLang/julia/issues/43334
+[#43536]: https://github.com/JuliaLang/julia/issues/43536
 [#44137]: https://github.com/JuliaLang/julia/issues/44137
 [#44266]: https://github.com/JuliaLang/julia/issues/44266
 [#44358]: https://github.com/JuliaLang/julia/issues/44358
 [#44360]: https://github.com/JuliaLang/julia/issues/44360
 [#44512]: https://github.com/JuliaLang/julia/issues/44512
+[#44527]: https://github.com/JuliaLang/julia/issues/44527
 [#44534]: https://github.com/JuliaLang/julia/issues/44534
 [#44571]: https://github.com/JuliaLang/julia/issues/44571
 [#44714]: https://github.com/JuliaLang/julia/issues/44714
@@ -359,6 +998,8 @@ Tooling Improvements
 [#46609]: https://github.com/JuliaLang/julia/issues/46609
 [#46862]: https://github.com/JuliaLang/julia/issues/46862
 [#46976]: https://github.com/JuliaLang/julia/issues/46976
+[#47117]: https://github.com/JuliaLang/julia/issues/47117
+[#47184]: https://github.com/JuliaLang/julia/issues/47184
 [#47367]: https://github.com/JuliaLang/julia/issues/47367
 [#47392]: https://github.com/JuliaLang/julia/issues/47392
 
@@ -653,6 +1294,7 @@ Tooling Improvements
 [#43919]: https://github.com/JuliaLang/julia/issues/43919
 [#44080]: https://github.com/JuliaLang/julia/issues/44080
 [#44136]: https://github.com/JuliaLang/julia/issues/44136
+[#45064]: https://github.com/JuliaLang/julia/issues/45064
 
 Julia v1.7 Release Notes
 ========================
@@ -778,7 +1420,7 @@ Standard library changes
 * `lpad/rpad` are now defined in terms of `textwidth` ([#39044]).
 * `Test.@test` now accepts `broken` and `skip` boolean keyword arguments, which
   mimic `Test.@test_broken` and `Test.@test_skip` behavior, but allows skipping
-  tests failing only under certain conditions.  For example
+  tests failing only under certain conditions. For example
   ```julia
   if T == Float64
       @test_broken isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
@@ -1202,7 +1844,7 @@ Standard library changes
 * The `Pkg.BinaryPlatforms` module has been moved into `Base` as `Base.BinaryPlatforms` and heavily reworked.
   Applications that want to be compatible with the old API should continue to import `Pkg.BinaryPlatforms`,
   however new users should use `Base.BinaryPlatforms` directly ([#37320]).
-* The `Pkg.Artifacts` module has been imported as a separate standard library.  It is still available as
+* The `Pkg.Artifacts` module has been imported as a separate standard library. It is still available as
   `Pkg.Artifacts`, however starting from Julia v1.6+, packages may import simply `Artifacts` without importing
   all of `Pkg` alongside ([#37320]).
 
@@ -1242,7 +1884,7 @@ Standard library changes
 * The `AbstractMenu` extension interface of `REPL.TerminalMenus` has been extensively
   overhauled. The new interface does not rely on global configuration variables, is more
   consistent in delegating printing of the navigation/selection markers, and provides
-  improved support for dynamic menus.  These changes are compatible with the previous
+  improved support for dynamic menus. These changes are compatible with the previous
   (deprecated) interface, so are non-breaking.
 
   The new API offers several enhancements:
@@ -1380,9 +2022,9 @@ Tooling Improvements
 [#37753]: https://github.com/JuliaLang/julia/issues/37753
 [#37829]: https://github.com/JuliaLang/julia/issues/37829
 [#37844]: https://github.com/JuliaLang/julia/issues/37844
+[#37928]: https://github.com/JuliaLang/julia/issues/37928
 [#37973]: https://github.com/JuliaLang/julia/issues/37973
 [#38042]: https://github.com/JuliaLang/julia/issues/38042
-[#38062]: https://github.com/JuliaLang/julia/issues/38062
 [#38168]: https://github.com/JuliaLang/julia/issues/38168
 [#38449]: https://github.com/JuliaLang/julia/issues/38449
 [#38475]: https://github.com/JuliaLang/julia/issues/38475
@@ -1497,8 +2139,8 @@ New library functions
   `Base.Experimental.show_error_hints` from their `showerror` method ([#35094]).
 * The `@ccall` macro has been added to Base. It is a near drop-in replacement for `ccall` with more Julia-like syntax. It also wraps the new `foreigncall` API for varargs of different types, though it lacks the capability to specify an LLVM calling convention ([#32748]).
 * New functions `mergewith` and `mergewith!` supersede `merge` and `merge!` with `combine`
-  argument.  They don't have the restriction for `combine` to be a `Function` and also
-  provide one-argument method that returns a closure.  The old methods of `merge` and
+  argument. They don't have the restriction for `combine` to be a `Function` and also
+  provide one-argument method that returns a closure. The old methods of `merge` and
   `merge!` are still available for backward compatibility ([#34296]).
 * The new `isdisjoint` function indicates whether two collections are disjoint ([#34427]).
 * Add function `ismutable` and deprecate `isimmutable` to check whether something is mutable ([#34652]).
@@ -1625,6 +2267,7 @@ Tooling Improvements
 [#25930]: https://github.com/JuliaLang/julia/issues/25930
 [#26872]: https://github.com/JuliaLang/julia/issues/26872
 [#28789]: https://github.com/JuliaLang/julia/issues/28789
+[#28811]: https://github.com/JuliaLang/julia/issues/28811
 [#29240]: https://github.com/JuliaLang/julia/issues/29240
 [#29333]: https://github.com/JuliaLang/julia/issues/29333
 [#29411]: https://github.com/JuliaLang/julia/issues/29411
@@ -1640,6 +2283,7 @@ Tooling Improvements
 [#33864]: https://github.com/JuliaLang/julia/issues/33864
 [#33886]: https://github.com/JuliaLang/julia/issues/33886
 [#33937]: https://github.com/JuliaLang/julia/issues/33937
+[#34126]: https://github.com/JuliaLang/julia/issues/34126
 [#34149]: https://github.com/JuliaLang/julia/issues/34149
 [#34199]: https://github.com/JuliaLang/julia/issues/34199
 [#34200]: https://github.com/JuliaLang/julia/issues/34200
@@ -1666,9 +2310,12 @@ Tooling Improvements
 [#34896]: https://github.com/JuliaLang/julia/issues/34896
 [#34953]: https://github.com/JuliaLang/julia/issues/34953
 [#35001]: https://github.com/JuliaLang/julia/issues/35001
+[#35057]: https://github.com/JuliaLang/julia/issues/35057
 [#35078]: https://github.com/JuliaLang/julia/issues/35078
+[#35085]: https://github.com/JuliaLang/julia/issues/35085
 [#35094]: https://github.com/JuliaLang/julia/issues/35094
 [#35108]: https://github.com/JuliaLang/julia/issues/35108
+[#35113]: https://github.com/JuliaLang/julia/issues/35113
 [#35124]: https://github.com/JuliaLang/julia/issues/35124
 [#35132]: https://github.com/JuliaLang/julia/issues/35132
 [#35138]: https://github.com/JuliaLang/julia/issues/35138
@@ -1979,6 +2626,7 @@ Tooling Improvements
 [#32534]: https://github.com/JuliaLang/julia/issues/32534
 [#32600]: https://github.com/JuliaLang/julia/issues/32600
 [#32628]: https://github.com/JuliaLang/julia/issues/32628
+[#32651]: https://github.com/JuliaLang/julia/issues/32651
 [#32653]: https://github.com/JuliaLang/julia/issues/32653
 [#32729]: https://github.com/JuliaLang/julia/issues/32729
 [#32814]: https://github.com/JuliaLang/julia/issues/32814
@@ -1988,6 +2636,7 @@ Tooling Improvements
 [#32851]: https://github.com/JuliaLang/julia/issues/32851
 [#32872]: https://github.com/JuliaLang/julia/issues/32872
 [#32875]: https://github.com/JuliaLang/julia/issues/32875
+[#32918]: https://github.com/JuliaLang/julia/issues/32918
 
 Julia v1.2 Release Notes
 ========================
@@ -2130,6 +2779,7 @@ External dependencies
 [#31009]: https://github.com/JuliaLang/julia/issues/31009
 [#31125]: https://github.com/JuliaLang/julia/issues/31125
 [#31211]: https://github.com/JuliaLang/julia/issues/31211
+[#31223]: https://github.com/JuliaLang/julia/issues/31223
 [#31230]: https://github.com/JuliaLang/julia/issues/31230
 [#31235]: https://github.com/JuliaLang/julia/issues/31235
 [#31310]: https://github.com/JuliaLang/julia/issues/31310
@@ -2378,7 +3028,7 @@ Standard Library Changes
 
 * The `Libdl` module's methods `dlopen()` and `dlsym()` have gained a
   `throw_error` keyword argument, replacing the now-deprecated `dlopen_e()`
-  and `dlsym_e()` methods.  When `throw_error` is `false`, failure to locate
+  and `dlsym_e()` methods. When `throw_error` is `false`, failure to locate
   a shared library or symbol will return `nothing` rather than `C_NULL`.
   ([#28888])
 
@@ -2640,7 +3290,7 @@ This section lists changes that do not have deprecation warnings.
     "Code Loading" and "Pkg" for documentation.
 
   * `replace(s::AbstractString, pat=>repl)` for function `repl` arguments formerly
-    passed a substring to `repl` in all cases.  It now passes substrings for
+    passed a substring to `repl` in all cases. It now passes substrings for
     string patterns `pat`, but a `Char` for character patterns (when `pat` is a
     `Char`, collection of `Char`, or a character predicate) ([#25815]).
 
@@ -2825,7 +3475,7 @@ This section lists changes that do not have deprecation warnings.
 
   * The logging system has been redesigned - `info` and `warn` are deprecated
     and replaced with the logging macros `@info`, `@warn`, `@debug` and
-    `@error`.  The `logging` function is also deprecated and replaced with
+    `@error`. The `logging` function is also deprecated and replaced with
     `AbstractLogger` and the functions from the new standard `Logging` library.
     ([#24490])
 
@@ -2981,7 +3631,7 @@ Library improvements
     For example, `x^-1` is now essentially a synonym for `inv(x)`, and works
     in a type-stable way even if `typeof(x) != typeof(inv(x))` ([#24240]).
 
-  * New `Iterators.reverse(itr)` for reverse-order iteration ([#24187]).  Iterator
+  * New `Iterators.reverse(itr)` for reverse-order iteration ([#24187]). Iterator
     types `T` can implement `start` etc. for `Iterators.Reverse{T}` to support this.
 
   * The functions `nextind` and `prevind` now accept `nchar` argument that indicates
@@ -3120,7 +3770,7 @@ Library improvements
     cartesian indices to linear indices using the normal indexing operation.
     ([#24715], [#26775]).
 
-  * `IdDict{K,V}` replaces `ObjectIdDict`.  It has type parameters
+  * `IdDict{K,V}` replaces `ObjectIdDict`. It has type parameters
     like other `AbstractDict` subtypes and its constructors mirror the
     ones of `Dict`. ([#25210])
 
@@ -3331,8 +3981,8 @@ Deprecated or removed
     should add offset axis support to the function `f` directly ([#26733]).
 
   * The functions `ones` and `zeros` used to accept any objects as dimensional arguments,
-    implicitly converting them to `Int`s.  This is now deprecated; only `Integer`s or
-    `AbstractUnitRange`s are accepted as arguments.  Instead, convert the arguments before
+    implicitly converting them to `Int`s. This is now deprecated; only `Integer`s or
+    `AbstractUnitRange`s are accepted as arguments. Instead, convert the arguments before
     calling `ones` or `zeros` ([#26733]).
 
   * The variadic `size(A, dim1, dim2, dims...)` method to return a tuple of multiple
@@ -4097,6 +4747,7 @@ Command-line option changes
 [#26932]: https://github.com/JuliaLang/julia/issues/26932
 [#26935]: https://github.com/JuliaLang/julia/issues/26935
 [#26980]: https://github.com/JuliaLang/julia/issues/26980
+[#26991]: https://github.com/JuliaLang/julia/issues/26991
 [#26997]: https://github.com/JuliaLang/julia/issues/26997
 [#27067]: https://github.com/JuliaLang/julia/issues/27067
 [#27071]: https://github.com/JuliaLang/julia/issues/27071
@@ -4131,6 +4782,7 @@ Command-line option changes
 [#28155]: https://github.com/JuliaLang/julia/issues/28155
 [#28266]: https://github.com/JuliaLang/julia/issues/28266
 [#28302]: https://github.com/JuliaLang/julia/issues/28302
+[#28310]: https://github.com/JuliaLang/julia/issues/28310
 
 Julia v0.6.0 Release Notes
 ==========================
@@ -4197,8 +4849,8 @@ Language changes
     Previously, this syntax parsed as an implicit multiplication ([#18690]).
 
   * For every binary operator `⨳`, `a .⨳ b` is now automatically equivalent to
-    the `broadcast` call `(⨳).(a, b)`.  Hence, one no longer defines methods
-    for `.*` etcetera.  This also means that "dot operations" automatically
+    the `broadcast` call `(⨳).(a, b)`. Hence, one no longer defines methods
+    for `.*` etcetera. This also means that "dot operations" automatically
     fuse into a single loop, along with other dot calls `f.(x)` ([#17623]).
     Similarly for unary operators ([#20249]).
 
@@ -4251,11 +4903,11 @@ This section lists changes that do not have deprecation warnings.
     or an array as a "scalar" ([#16986]).
 
   * `broadcast` now produces a `BitArray` instead of `Array{Bool}` for
-    functions yielding a boolean result.  If you want `Array{Bool}`, use
+    functions yielding a boolean result. If you want `Array{Bool}`, use
     `broadcast!` or `.=` ([#17623]).
 
   * Broadcast `A[I...] .= X` with entirely scalar indices `I` is deprecated as
-    its behavior will change in the future.  Use `A[I...] = X` instead.
+    its behavior will change in the future. Use `A[I...] = X` instead.
 
   * Operations like `.+` and `.*` on `Range` objects are now generic
     `broadcast` calls (see [above](#language-changes)) and produce an `Array`.
@@ -4301,7 +4953,7 @@ This section lists changes that do not have deprecation warnings.
     now tab-completes to U+03B5 (greek small letter epsilon) ([#19464]).
 
   * `retry` now inputs the keyword arguments `delays` and `check` instead of
-    `n` and `max_delay`.  The previous functionality can be achieved setting
+    `n` and `max_delay`. The previous functionality can be achieved setting
     `delays` to `ExponentialBackOff` ([#19331]).
 
   * `transpose(::AbstractVector)` now always returns a `RowVector` view of the input (which is a
@@ -4342,7 +4994,7 @@ This section lists changes that do not have deprecation warnings.
       using the values and types of `a` and `step` as given, whereas
       `range(a, step, len)` will attempt to match inputs `a::FloatNN`
       and `step::FloatNN` to rationals and construct a `StepRangeLen`
-      that internally uses twice-precision arithmetic.  These two
+      that internally uses twice-precision arithmetic. These two
       outcomes exhibit differences in both precision and speed.
 
   * `A=>B` expressions are now parsed as calls instead of using `=>` as the
@@ -4362,7 +5014,7 @@ This section lists changes that do not have deprecation warnings.
     trigamma, and polygamma special functions have been moved from Base to
     the
     [SpecialFunctions.jl package](https://github.com/JuliaMath/SpecialFunctions.jl)
-    ([#20427]).  Note that `airy`, `airyx` and `airyprime` have been deprecated
+    ([#20427]). Note that `airy`, `airyx` and `airyprime` have been deprecated
     in favor of more specific functions (`airyai`, `airybi`, `airyaiprime`,
     `airybiprimex`, `airyaix`, `airybix`, `airyaiprimex`, `airybiprimex`)
     ([#18050]).
@@ -4447,7 +5099,7 @@ Library improvements
     for more information.
 
   * The default color for info messages has been changed from blue to cyan
-    ([#18442]), and for warning messages from red to yellow ([#18453]).  This
+    ([#18442]), and for warning messages from red to yellow ([#18453]). This
     can be changed back to the original colors by setting the environment
     variables `JULIA_INFO_COLOR` to `"blue"` and `JULIA_WARN_COLOR` to `"red"`.
 
@@ -4791,10 +5443,10 @@ New language features
   * Function return type syntax `function f()::T` has been added ([#1090]). Values returned
     from a function with such a declaration will be converted to the specified type `T`.
 
-  * Many more operators now support `.` prefixes (e.g. `.≤`) ([#17393]).  However,
+  * Many more operators now support `.` prefixes (e.g. `.≤`) ([#17393]). However,
     users are discouraged from overloading these, since they are mainly parsed
     in order to implement backwards compatibility with planned automatic
-    broadcasting of dot operators in Julia 0.6 ([#16285]).  Explicitly qualified
+    broadcasting of dot operators in Julia 0.6 ([#16285]). Explicitly qualified
     operator names like `Base.≤` should now use `Base.:≤` (prefixed by `@compat`
     if you need 0.4 compatibility via the `Compat` package).
 
@@ -4927,7 +5579,7 @@ Library improvements
   * Strings ([#16107]):
 
     * The `UTF8String` and `ASCIIString` types have been merged into a single
-      `String` type ([#16058]).  Use `isascii(s)` to check whether
+      `String` type ([#16058]). Use `isascii(s)` to check whether
       a string contains only ASCII characters. The `ascii(s)` function now
       converts `s` to `String`, raising an `ArgumentError` exception if `s` is
       not pure ASCII.
@@ -5235,7 +5887,7 @@ Language tooling improvements
      talk](https://www.youtube.com/watch?v=e6-hcOHO0tc&list=PLP8iPy9hna6SQPwZUDtAM59-wPzCPyD_S&index=5)
      on Gallium shows off various features of the debugger.
 
-   * The [Juno IDE](http://junolab.org) has matured significantly, and now
+   * The [Juno IDE](https://junolab.org) has matured significantly, and now
      also includes support for plotting and debugging.
 
    * [Cxx.jl](https://github.com/Keno/Cxx.jl) provides a convenient FFI for
@@ -5249,7 +5901,7 @@ New language features
 
   * Function call overloading: for arbitrary objects `x` (not of type
     `Function`), `x(...)` is transformed into `call(x, ...)`, and `call`
-    can be overloaded as desired.  Constructors are now a special case of
+    can be overloaded as desired. Constructors are now a special case of
     this mechanism, which allows e.g. constructors for abstract types.
     `T(...)` falls back to `convert(T, x)`, so all `convert` methods implicitly
     define a constructor ([#8712], [#2403]).
@@ -5277,13 +5929,13 @@ New language features
     `~/.julia/lib/v0.4` ([#8745]).
 
       * See manual section on `Module initialization and precompilation` (under `Modules`) for
-        details and errata.  In particular, to be safely precompilable a module may need an
+        details and errata. In particular, to be safely precompilable a module may need an
         `__init__` function to separate code that must be executed at runtime rather than precompile
-        time.  Modules that are *not* precompilable should call `__precompile__(false)`.
+        time. Modules that are *not* precompilable should call `__precompile__(false)`.
 
       * The precompiled `.ji` file includes a list of dependencies (modules and files that
         were imported/included at precompile-time), and the module is automatically recompiled
-        upon `import` when any of its dependencies have changed.  Explicit dependencies
+        upon `import` when any of its dependencies have changed. Explicit dependencies
         on other files can be declared with `include_dependency(path)` ([#12458]).
 
       * New option `--output-incremental={yes|no}` added to invoke the equivalent of `Base.compilecache`
@@ -5487,7 +6139,7 @@ Library improvements
     * New `vecdot` function, analogous to `vecnorm`, for Euclidean inner products over any iterable container ([#11067]).
 
     * `p = plan_fft(x)` and similar functions now return a `Base.DFT.Plan` object, rather
-    than an anonymous function.  Calling it via `p(x)` is deprecated in favor of
+    than an anonymous function. Calling it via `p(x)` is deprecated in favor of
     `p * x` or `p \ x` (for the inverse), and it can also be used with `A_mul_B!`
     to employ pre-allocated output arrays ([#12087]).
 
@@ -5633,7 +6285,7 @@ Library improvements
 
   * Other improvements
 
-    * You can now tab-complete emoji via their [short names](http://www.emoji-cheat-sheet.com/), using `\:name:<tab>` ([#10709]).
+    * You can now tab-complete emoji via their [short names](https://www.emoji-cheat-sheet.com/), using `\:name:<tab>` ([#10709]).
 
     * `gc_enable` subsumes `gc_disable`, and also returns the previous GC state.
 
@@ -5977,7 +6629,7 @@ Library improvements
     * New string type, `UTF16String` ([#4930]), constructed by
       `utf16(s)` from another string, a `Uint16` array or pointer, or
       a byte array (possibly prefixed by a byte-order marker to
-      indicate endian-ness).  Its data is internally `NULL`-terminated
+      indicate endian-ness). Its data is internally `NULL`-terminated
       for passing to C ([#7016]).
 
     * `CharString` is renamed to `UTF32String` ([#4943]), and its data
@@ -6012,7 +6664,7 @@ Library improvements
 
       * New `vecnorm(itr, p=2)` function that computes the norm of
         any iterable collection of numbers as if it were a vector of
-        the same length.  This generalizes and replaces `normfro` ([#6057]),
+        the same length. This generalizes and replaces `normfro` ([#6057]),
         and `norm` is now type-stable ([#6056]).
 
       * New `UniformScaling` matrix type and identity `I` constant ([#5810]).
diff --git a/JuliaLowering/.gitignore b/JuliaLowering/.gitignore
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/JuliaLowering/LICENSE b/JuliaLowering/LICENSE
new file mode 100644
index 0000000000000..62a00d053a428
--- /dev/null
+++ b/JuliaLowering/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 JuliaHub and contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/JuliaLowering/Manifest.toml b/JuliaLowering/Manifest.toml
new file mode 100644
index 0000000000000..1a5f6860b185c
--- /dev/null
+++ b/JuliaLowering/Manifest.toml
@@ -0,0 +1,16 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.14.0-DEV"
+manifest_format = "2.1"
+project_hash = "16f6f8d58c46fe20d68a941bfaddb4590471548a"
+
+[[deps.JuliaLowering]]
+deps = ["JuliaSyntax"]
+path = "."
+uuid = "f3c80556-a63f-4383-b822-37d64f81a311"
+version = "1.0.0-DEV"
+
+[[deps.JuliaSyntax]]
+path = "../JuliaSyntax"
+uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4"
+version = "2.0.0-DEV"
diff --git a/JuliaLowering/Project.toml b/JuliaLowering/Project.toml
new file mode 100644
index 0000000000000..4add188a89eef
--- /dev/null
+++ b/JuliaLowering/Project.toml
@@ -0,0 +1,22 @@
+name = "JuliaLowering"
+uuid = "f3c80556-a63f-4383-b822-37d64f81a311"
+authors = ["Claire Foster <aka.c42f@gmail.com> and contributors"]
+version = "1.0.0-DEV"
+
+[deps]
+JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4"
+
+[sources]
+JuliaSyntax = {path = "../JuliaSyntax"}
+
+[compat]
+julia = "1"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[targets]
+test = ["Test", "FileWatching", "Markdown", "REPL"]
diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md
new file mode 100644
index 0000000000000..5ebe02b95688d
--- /dev/null
+++ b/JuliaLowering/README.md
@@ -0,0 +1,954 @@
+# JuliaLowering
+
+[![Build Status](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml?query=branch%3Amain)
+
+JuliaLowering.jl is an experimental port of Julia's code lowering compiler
+passes, written in Julia itself. "Code lowering" is the set of compiler passes
+which *symbolically* transform and simplify Julia's syntax prior to type
+inference.
+
+## Goals
+
+This work is intended to
+* Bring precise code provenance to Julia's lowered form (and eventually
+  downstream in type inference, stack traces, etc). This has many benefits
+    - Talk to users precisely about their code via character-precise error and
+      diagnostic messages from lowering
+    - Greatly simplify the implementation of critical tools like Revise.jl
+      which rely on analyzing how the user's source maps to the compiler's data
+      structures
+    - Allow tools like JuliaInterpreter to use type-inferred and optimized
+      code, with the potential for huge speed improvements.
+* Bring improvements for macro authors
+    - Prototype "automatic hygiene" (no more need for `esc()`!)
+    - Precise author-defined error reporting from macros
+    - Sketch better interfaces for syntax trees (hopefully!)
+
+## Trying it out
+
+Note this is a work in progress; many types of syntax are not yet handled.
+
+1. You need a 1.13.0-DEV build of Julia: At least 1.13.0-DEV.880. Commit `5ebc5b463ea` is currently known to work. Note that JuliaLowering relies on Julia internals and may be broken on the latest Julia dev version from time to time.
+2. Use commit `e02f29f` of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl)
+3. Get the latest version of [JuliaSyntaxFormatter](https://github.com/c42f/JuliaSyntaxFormatter.jl)
+4. Run the demo `include("test/demo.jl")`
+
+# Design notes
+
+## Syntax trees
+
+Want something something better than `JuliaSyntax.SyntaxNode`! `SyntaxTree` and
+`SyntaxGraph` provide this. Some future version of these should end up in
+`JuliaSyntax`.
+
+We want to allow arbitrary attributes to be attached to tree nodes by analysis
+passes. This separates the analysis pass implementation from the data
+structure, allowing passes which don't know about each other to act on a shared
+data structure.
+
+Design and implementation inspiration comes in several analogies:
+
+Analogy 1: the ECS (Entity-Component-System) pattern for computer game design.
+This pattern is highly successful because it separates game logic (systems)
+from game objects (entities) by providing flexible storage
+* Compiler passes are "systems"
+* AST tree nodes are "entities"
+* Node attributes are "components"
+
+Analogy 2: The AoS to SoA transformation. But here we've got a kind of
+tree-of-structs-with-optional-attributes to struct-of-Dicts transformation.
+The data alignment / packing efficiency and concrete type safe storage benefits
+are similar.
+
+Analogy 3: Graph algorithms which represent graphs as a compact array of node
+ids and edges with integer indices, rather than using a linked data structure.
+
+### References
+
+Sander Mertens, the author of the Flecs ECS has a blog post series discussing
+ECS data structures and the many things that may be done with them. We may want
+to use some of these tricks to make `SyntaxTree` faster, eventually. See, for
+example,
+[Building Games in ECS with Entity Relationships](https://ajmmertens.medium.com/building-games-in-ecs-with-entity-relationships-657275ba2c6c)
+
+### Structural assertions / checking validity of syntax trees
+
+Syntax trees in Julia `Expr` form are very close to lisp lists: a symbol at the
+`head` of the list which specifies the syntactic form, and a sequence of
+children in the syntax tree. This is a representation which `JuliaSyntax` and
+`JuliaLowering` follow but it does come with certain disadvantages. One of the
+most problematic is that the number of children affects the validity (and
+sometimes semantics) of an AST node, as much as the `head` symbol does.
+
+In `JuliaSyntax` we've greatly reduced the overloading of `head` in order to
+simplify the interpretation of child structures in the tree. For example,
+broadcast calls like `f.(x,y)` use the `K"dotcall"` kind rather than being a
+node with `head == Symbol(".")` and a tuple as children.
+
+However, there's still many ways for lowering to encounter invalid expressions
+of type `SyntaxTree` and these must be checked. In JuliaSyntax we have several
+levels of effort corresponding to the type of errors conditions we desire to
+check and report:
+
+* For invalid syntax which is accepted by the `JuliaSyntax`
+  parser but is invalid in lowering we use manual `if` blocks followed by
+  throwing a `LoweringError`. This is more programming effort but allows for
+  the highest quality error messages for the typical end user.
+* For invalid syntax which can only be produced by macros (ie, not by the
+  parser) we mostly use the `@chk` macro. This is a quick tool for validating
+  input but gives lesser quality error messages.
+* For JuliaLowering's internal invariants we just use `@assert` - these should
+  never be hit and can be compiled out in principle.
+
+## Provenance tracking
+
+Expression provenance is tracked through lowering by attaching provenance
+information in the `source` attribute to every expression as it is generated.
+For example when parsing a source file we have
+
+```julia
+julia> ex = parsestmt(SyntaxTree, "a + b", filename="foo.jl")
+SyntaxTree with attributes kind,value,name_val,syntax_flags,source
+[call-i]                                │
+  a                                     │
+  +                                     │
+  b                                     │
+
+julia> ex[3].source
+a + b
+#   ╙ ── these are the bytes you're looking for 😊
+```
+
+The `provenance` function should be used to look up the `source` attribute and
+the `showprov` function used to inspect the content (this is preferred because
+the encoding of `source` is an implementation detail). For example:
+
+```julia
+julia> showprov(ex[3])
+a + b
+#   ╙ ── in source
+# @ foo.jl:1
+```
+
+During macro expansion and lowering provenance gets more complicated because an
+expression can arise from multiple sources. For example, we want to keep track
+of the entire stack of macro expansions an expression was generated by, while
+also recording where it occurred in the original source file.
+
+For this, we use a tree data structure. Let's look at the following pair of
+macros
+
+```julia
+julia> JuliaLowering.include_string(Main, raw"""
+       module M
+           macro inner()
+               :(2)
+           end
+
+           macro outer()
+               :((1, @inner))
+           end
+       end
+       """, "some_macros.jl")
+```
+
+The tree which arises from macro expanding this is pretty simple:
+
+```julia
+julia> expanded = JuliaLowering.macroexpand(Main, parsestmt(SyntaxTree, "M.@outer()"))
+SyntaxTree with attributes scope_layer,kind,value,var_id,name_val,syntax_flags,source
+[tuple-p]                               │
+  1                                     │
+  2                                     │
+```
+
+but the provenance information recorded for the second element `2` of this
+tuple is not trivial; it includes the macro call expressions for `@inner` and
+`@outer`. We can show this in tree form:
+
+```julia
+julia> showprov(expanded[2], tree=true)
+2
+├─ 2
+│  └─ @ some_macros.jl:3
+└─ (macrocall @inner)
+   ├─ (macrocall @inner)
+   │  └─ @ some_macros.jl:7
+   └─ (macrocall-p (. M @outer))
+      └─ @ foo.jl:1
+```
+
+or as a more human readable flattened list highlighting of source ranges:
+
+```julia
+module M
+    macro inner()
+        :(2)
+#         ╙ ── in source
+    end
+
+# @ some_macros.jl:3
+
+
+    macro outer()
+        :((1, @inner))
+#             └────┘ ── in macro expansion
+    end
+end
+# @ some_macros.jl:7
+
+M.@outer()
+└────────┘ ── in macro expansion
+# @ foo.jl:1
+```
+
+## Problems with Hygiene in Julia's exiting macro system
+
+To write correct hygienic macros in Julia (as of 2024), macro authors must use
+`esc()` on any any syntax passed to the macro so that passed identifiers escape
+to the macro caller scope. However
+
+* This is not automatic and the correct use of `esc()` is one of the things
+  that new macro authors find most confusing. (My impression, based on various
+  people complaining about how confusing `esc()` is.)
+* `esc()` wraps expressions in `Expr(:escape)`, but this doesn't work well when
+  macros pass such escaped syntax to an inner macro call. As discussed in
+  [Julia issue #37691](https://github.com/JuliaLang/julia/issues/37691), macros
+  in Julia's existing system are not composable by default. Writing
+  composable macros in the existing system would require preserving the escape
+  nesting depth when recursing into any macro argument nested expressions.
+  Almost no macro author knows how to do this and is prepared to pay for the
+  complexity of getting it right.
+
+The requirement to use `esc()` stems from Julia's pervasive use of the simple
+`Expr` data structure which represents a unadorned AST in which names are plain
+symbols. For example, a macro call `@foo x` gets passed the  symbol `:x`
+which is just a name without any information attached to indicate that it came
+from the scope where `@foo` was called.
+
+### Hygiene References
+
+* [Toward Fearless Macros](https://lambdaland.org/posts/2023-10-17_fearless_macros) -
+  a blog post by Ashton Wiersdorf
+* [Towards the Essence of Hygiene](https://michaeldadams.org/papers/hygiene/hygiene-2015-popl-authors-copy.pdf) - a paper by Michael Adams
+* [Bindings as sets of scopes](https://www-old.cs.utah.edu/plt/scope-sets/) - a description of Racket's scope set mechanism by Matthew Flatt
+
+# Overview of lowering passes
+
+JuliaLowering uses six symbolic transformation passes:
+
+1. Macro expansion - expanding user-defined syntactic constructs by running the
+   user's macros. This pass also includes a small amount of other symbolic
+   simplification.
+2. Syntax desugaring - simplifying Julia's rich surface syntax down to a small
+   number of syntactic forms.
+3. Scope analysis - analyzing identifier names used in the code to discover
+   local variables, closure captures, and associate global variables to the
+   appropriate module. Transform all names (kind `K"Identifier"`) into binding
+   IDs (kind `K"BindingId"`) which can be looked up in a table of bindings.
+4. Closure conversion - convert closures to types and deal with captured
+   variables efficiently where possible.
+5. Flattening to untyped IR - convert code in hierarchical tree form to a
+   flat array of statements; convert control flow into gotos.
+6. Convert untyped IR to `CodeInfo` form for integration with the Julia runtime.
+
+## Pass 1: Macro expansion
+
+This pass expands macros and quoted syntax, and does some very light conversion
+of a few syntax `Kind`s in preparation for syntax desugaring.
+
+### Hygiene in JuliaLowering
+
+In JuliaLowering we make hygiene automatic and remove `esc()` by combining names
+with scope information. In the language of the paper [*Towards the Essence of
+Hygiene*](https://michaeldadams.org/papers/hygiene/hygiene-2015-popl-authors-copy.pdf)
+by Michael Adams, this combination is called a "syntax object". In
+JuliaLowering our representation is the tuple `(name,scope_layer)`, also called
+`VarId` in the scope resolution pass.
+
+JuliaLowering's macro expander attaches a unique *scope layer* to each
+identifier in a piece of syntax. A "scope layer" is an integer identifier
+combined with the module in which the syntax was created.
+
+When expanding macros,
+
+* Any identifiers passed to the macro are tagged with the scope layer they were
+  defined within.
+* A new unique scope layer is generated for the macro invocation, and any names
+  in the syntax produced by the macro are tagged with this layer.
+
+Subsequently, the `(name,scope_layer)` pairs are used when resolving bindings.
+This ensures that, by default, we satisfy the basic rules for hygienic macros
+discussed in Adams' paper:
+
+1. A macro can't insert a binding that can capture references other than those
+   inserted by the macro.
+2. A macro can't insert a reference that can be captured by bindings other than
+   those inserted by the macro.
+
+TODO: Write more here...
+
+
+### Compatibility with `Expr` macros
+
+In order to have compatibility with old-style macros which expect an `Expr`-based
+data structure as input, we convert `SyntaxTree` to `Expr`, call the old-style
+macro, then convert `SyntaxTree` back to `Expr` and continue with the expansion
+process. This involves some loss of provenance precision but allows full
+interoperability in the package ecosystem without a need to make breaking
+changes.
+
+Let's look at an example. Suppose a manually escaped old-style macro
+`@oldstyle` is implemented as
+
+```julia
+macro oldstyle(a, b)
+    quote
+        x = "x in @oldstyle"
+        @newstyle $(esc(a)) $(esc(b)) x
+    end
+end
+```
+
+along with two correctly escaped new-style macros:
+
+```julia
+macro call_oldstyle_macro(y)
+    quote
+        x = "x in call_oldstyle_macro"
+        @oldstyle $y x
+    end
+end
+
+macro newstyle(x, y, z)
+    quote
+        x = "x in @newstyle"
+        ($x, $y, $z, x)
+    end
+end
+```
+
+Then want some code like the following to "just work" with respect to hygiene
+
+```julia
+let
+    x = "x in outer ctx"
+    @call_oldstyle_macro x
+end
+```
+
+When calling `@oldstyle`, we must convert `SyntaxTree` into `Expr`, but we need
+to preserve the scope layer of the `x` from the outer context as it is passed
+into `@oldstyle` as a macro argument. To do this, we use `Expr(:scope_layer,
+:x, outer_layer_id)`. (In the old system, this would be `Expr(:escape, :x)`
+instead, presuming that `@call_oldstyle_macro` was implemented using `esc()`.)
+
+When receiving output from old style macro invocations, we preserve the escape
+handling of the existing system for any symbols which aren't tagged with a
+scope layer.
+
+## Pass 2: Syntax desugaring
+
+This pass recursively converts many special surface syntax forms to a smaller
+set of syntax `Kind`s, following the AST's hierarchical tree structure. Some
+such as `K"scope_block"` are internal to lowering and removed during later
+passes. See `kinds.jl` for a list of these internal forms.
+
+This pass is implemented in `desugaring.jl`. It's quite large because Julia has
+many special syntax features.
+
+### Desugaring of function definitions
+
+Desugaring of function definitions is particularly complex because of the cross
+product of features which need to work together consistently:
+
+* Positional arguments (with and without defaults, with and without types)
+* Keyword arguments (with and without defaults, with and without types)
+* Type parameters with `where` syntax
+* Argument slurping syntax with `...`
+* Fancy arguments (argument destructuring)
+
+The combination of positional arguments with defaults and keyword arguments is
+particularly complex. Here's an example.  Suppose we're given the function
+definition
+
+```julia
+function f(a::A=a_default, b::B=b_default; x::X=x_default,y::Y=y_default)
+    body
+end
+```
+
+This generates
+* One method of `f` for each number of positional arguments which can be
+  called when `f` is called without keyword args
+* One overload of `Core.kwcall(kws, ::typeof(f), ...)` for each number of
+  positional arguments (when called with a nonzero number of keyword args; the
+  tuple `kws` being constructed by the caller)
+* One internal method for the body of the function (we can call it `f_kw`
+  though it will be named something like `#f#18`)
+
+First, partially expanding the kw definitions this roughly looks like
+
+```julia
+function f_kw(x::X, y::X, f_self::typeof(f), a::A, b::B)
+    body
+end
+
+function f(a::A=a_default, b::B=b_default)
+    f_kw(x_default, y_default, var"#self#", a, b)
+end
+
+function Core.kwcall(kws::NamedTuple, self::typeof(f), a::A=a_default, b::B=b_default)
+    if Core.isdefined(kws, :x)
+        x_tmp = Core.getfield(kws, :x)
+        if x_tmp isa X
+            nothing
+        else
+            Core.throw($(Expr(:new, Core.TypeError, Symbol("keyword argument"), :x, X, x_tmp)))
+        end
+        x = x_tmp
+    else
+        x = 1
+    end
+    if Core.isdefined(kws, :y)
+        y_tmp = Core.getfield(kws, :y)
+        if y_tmp isa Y
+            nothing
+        else
+            Core.throw($(Expr(:new, Core.TypeError, Symbol("keyword argument"), :y, Y, y_tmp)))
+        end
+        y = y_tmp
+    else
+        y = 2
+    end
+    if Base.isempty(Base.diff_names(Base.keys(kws), (:x, :y)))
+        nothing
+    else
+        # Else unsupported kws
+        Base.kwerr(kws, self, a, b)
+    end
+    f_kw(x, y, self, a, b)
+end
+```
+
+We can then pass this to function expansion for default arguments which expands
+each of the above into three more methods. For example, for the first
+definition we conceptually expand `f(a::A=a_default, b::B=b_default)` into the
+methods
+
+```julia
+# The body
+function f(a::A, b::B)
+    f_kw(x_default, y_default, var"#self#", a, b)
+end
+
+# And two methods for the different numbers of default args
+function f(a::A)
+    var"#self#"(a, b_default)
+end
+
+function f()
+    var"#self#"(a_default, b_default)
+end
+```
+
+In total, this expands a single "function definition" into seven methods.
+
+Note that the above is only a sketch! There's more fiddly details when `where`
+syntax comes in
+
+### Desugaring of generated functions
+
+A brief description of how this works. Let's consider the generated function
+
+```julia
+function gen(x::NTuple{N}, y) where {N,T}
+    shared = :shared
+    # Unnecessary use of @generated, but it shows what's going on.
+    if @generated
+        quote
+            maybe_gen = ($x, $N)
+        end
+    else
+        maybe_gen = (typeof(x), N)
+    end
+    (shared, maybe_gen)
+end
+```
+
+This is desugared into the following two function definitions. First, a code
+generator which will generate code for the body of the function, given the
+static parameters `N`, `T` and the positional arguments `x`, `y`.
+(`var"#self#"::Type{typeof(gen)}` is also provided by the Julia runtime to
+complete the full signature of `gen`, though the user won't normally use this.)
+
+```julia
+function var"#gen@generator#0"(__context__::JuilaSyntax.MacroContext, N, T, var"#self#", x, y)
+    gen_stuff = quote
+        maybe_gen = ($x, $N)
+    end
+    quote
+        shared = :shared
+        $gen_stuff
+        (shared, maybe_gen)
+    end
+end
+```
+
+Second, the non-generated version, using the `if @generated` else branches, and
+containing mostly normal code.
+
+```julia
+function gen(x::NTuple{N}, y) where {N,T}
+    $(Expr(:meta, :generated,
+        Expr(:call, JuliaLowering.GeneratedFunctionStub,
+             :var"#gen@generator#0", sourceref_of_gen,
+             :(Core.svec(:var"#self", :x, :y))
+             :(Core.svec(:N, :T)))))
+    shared = :shared
+    maybe_gen = (typeof(x), N)
+    (shared, maybe_gen)
+end
+```
+
+The one extra thing added here is the `Expr(:meta, :generated)` which is an
+expression creating a callable wrapper for the user's generator, to be
+evaluated at top level. This wrapper will then be invoked by the runtime
+whenever the user calls `gen` with a new signature and it's expected that a
+`CodeInfo` be returned from it. `JuliaLowering.GeneratedFunctionStub` differs
+from `Core.GeneratedFunctionStub` in that it contains extra provenance
+information (the `sourcref_of_gen`) and expects a `SyntaxTree` to be returned
+by the user's generator code.
+
+## Pass 3: Scope analysis / binding resolution
+
+This pass replaces variables with bindings of kind `K"BindingId"`,
+disambiguating variables when the same name is used in different scopes. It
+also fills in the list of non-global bindings within each lambda and metadata
+about such bindings as will be used later during closure conversion.
+
+Scopes are documented in the Juila documentation on
+[Scope of Variables](https://docs.julialang.org/en/v1/manual/variables-and-scoping/)
+
+During scope resolution, we maintain a stack of `ScopeInfo` data structures.
+
+When a new `lambda` or `scope_block` is discovered, we create a new `ScopeInfo` by
+1. Find all identifiers bound or used within a scope. New *bindings* may be
+   introduced by one of the `local`, `global` keywords, implicitly by
+   assignment, as function arguments to a `lambda`, or as type arguments in a
+   method ("static parameters"). Identifiers are *used* when they are
+   referenced.
+2. Infer which bindings are newly introduced local or global variables (and
+   thus require a distinct identity from names already in the stack)
+3. Assign a `BindingId` (unique integer) to each new binding
+
+We then push this `ScopeInfo` onto the stack and traverse the expressions
+within the scope translating each `K"Identifier"` into the associated
+`K"BindingId"`. While we're doing this we also resolve some special forms like
+`islocal` by making use of the scope stack.
+
+The detailed rules for whether assignment introduces a new variable depend on
+the `scope_block`'s `scope_type` attribute when we are processing top-level
+code.
+* `scope_type == :hard` (as for bindings inside a `let` block) means an
+  assignment always introduces a new binding
+* `scope_type == :neutral` - inherit soft or hard scope from the parent scope.
+* `scope_type == :soft` - assignments are to globals if the variable
+  exists in global module scope. Soft scope doesn't have surface syntax and is
+  introduced for top-level code by REPL-like environments.
+
+## Pass 4: Closure conversion / lower bindings
+
+The main goal of this pass is closure conversion, but it's also used for
+lowering typed bindings and global assignments. Roughly, this is passes 3 and 4
+in the original `julia-syntax.scm`. In JuliaLowering it also comes in two steps:
+
+The first step (part of `scope_resolution.jl`) is to compute metadata related
+to bindings, both per-binding and per-binding-per-closure-scope.
+
+Properties which are computed per-binding which can help with symbolic
+optimizations include:
+* Type is declared (`x::T` syntax in a statement): type conversions must be
+  inserted at every assignment of `x`.
+* Never undefined: value is always assigned to the binding before being read
+  hence this binding doesn't require the use of `Core.NewvarNode`.
+* Single assignment: (TODO how is this defined, what is it for and does it go
+  here or below?)
+
+Properties of non-globals which are computed per-binding-per-closure include:
+* Read: the value of the binding is used.
+* Write: the binding is assigned to.
+* Captured: Bindings defined outside the closure which are either Read or Write
+  within the closure are "captured" and need to be one of the closure's fields.
+* Called: the binding is called as a function, ie, `x()`. (TODO - what is this
+  for?)
+
+The second step uses this metadata to
+* Convert closures into `struct` types
+* Lower bindings captured by closures into references to boxes as necessary
+* Deal with typed bindings (`K"decl"`) and their assignments
+* Lower const and non-const global assignments
+* TODO: probably more here.
+
+
+### Q&A
+
+#### When does `function` introduce a closure?
+
+Closures are just functions where the name of the function is *local* in scope.
+How does the function name become a local? The `function` keyword acts like an
+assignment to the function name for the purposes of scope resolution. Thus
+`function f() body end` is rather like `f = ()->body` and may result in the
+symbol `f` being either `local` or `global`. Like other assignments, `f` may be
+declared global or local explicitly, but if not `f` is subject to the usual
+rules for assignments inside scopes. For example, inside a `let` scope
+`function f() ...` would result in the symbol `f` being local.
+
+Examples:
+
+```julia
+begin
+    # f is global because `begin ... end` does not introduce a scope
+    function f()
+        body
+    end
+
+    # g is a closure because `g` is explicitly declared local
+    local g
+    function g()
+        body
+    end
+end
+
+let
+    # f is local so this is a closure because `let ... end` introduces a scope
+    function f()
+        body
+    end
+
+    # g is not a closure because `g` is declared global
+    global g
+    function g()
+        body
+    end
+end
+```
+
+#### How do captures work with non-closures?
+
+Yes it's true, you can capture local variables into global methods. For example:
+
+```julia
+begin
+    local x = 1
+    function f(y)
+        x + y
+    end
+    x = 2
+end
+```
+
+The way this works is to put `x` in a `Box` and interpolate it into the AST of
+`f` (the `Box` can be eliminated in some cases, but not here). Essentially this
+lowers to code which is almost-equivalent to the following:
+
+```julia
+begin
+    local x = Core.Box(1)
+    @eval function f(y)
+        $(x.contents) + y
+    end
+    x.contents = 2
+end
+```
+
+#### How do captures work with closures with multiple methods?
+
+Sometimes you might want a closure with multiple methods, but those methods
+might capture different local variables. For example,
+
+```julia
+let
+    x = 1
+    y = 1.5
+    function f(xx::Int)
+        xx + x
+    end
+    function f(yy::Float64)
+        yy + y
+    end
+
+    f(42)
+end
+```
+
+In this case, the closure type must capture both `x` and `y` and the generated
+code looks rather like this:
+
+```julia
+struct TheClosureType
+    x
+    y
+end
+
+let
+    x = 1
+    y = 1.5
+    f = TheClosureType(x,y)
+    function (self::TheClosureType)(xx::Int)
+        xx + self.x
+    end
+    function (self::TheClosureType)(yy::Int)
+        yy + self.y
+    end
+
+    f(42)
+end
+```
+
+#### When are `method` defs lifted to top level?
+
+Closure method definitions must be lifted to top level whenever the definitions
+appear inside a function. This is allow efficient compilation and avoid world
+age issues.
+
+Conversely, when method defs appear in top level code, they are executed
+inline.
+
+## Pass 5: Convert to untyped IR
+
+This pass is implemented in `linear_ir.jl`.
+
+### Untyped IR (JuliaLowering form)
+
+JuliaLowering's untyped IR is very close to the runtime's `CodeInfo` form (see
+below), but is more concretely typed as `JuliaLowering.SyntaxTree`.
+
+Metadata is generally represented differently:
+* The statements retain full code provenance information as `SyntaxTree`
+  objects. See `kinds.jl` for a list of which `Kind`s occur in the output IR
+  but not in surface syntax.
+* The list of slots is `Vector{Slot}`, including `@nospecialize` metadata
+
+### Lowering of exception handlers
+
+Exception handling involves a careful interplay between lowering and the Julia
+runtime. The forms `enter`, `leave` and `pop_exception` dynamically modify the
+exception-related state on the `Task`; lowering and the runtime work together
+to maintain correct invariants for this state.
+
+Lowering of exception handling must ensure that
+
+* Each `enter` is matched with a `leave` on every possible non-exceptional
+  program path (including implicit returns generated in tail position).
+* Each `catch` block which is entered and handles the exception - by exiting
+  via a non-exceptional program path - is matched with a `pop_exception`
+* Each `finally` block runs, regardless of the way it's entered - either by
+  normal program flow, an exception, early `return` or a jump out of an inner
+  context via `break`/`continue`/`goto` etc.
+
+The following special forms are emitted into the IR:
+
+* `(= tok (enter catch_label dynscope))` -
+  push exception handler with catch block at `catch_label` and dynamic
+  scope `dynscope`, yielding a token which is used by `leave` and
+  `pop_exception`. `dynscope` is only used in the special `tryfinally` form
+  without associated source level syntax (see the `@with` macro)
+* `(leave tok)` -
+    pop exception handler back to the state of the `tok` from the associated
+    `enter`. Multiple tokens can be supplied to pop multiple handlers using
+    `(leave tok1 tok2 ...)`.
+* `(pop_exception tok)` - pop exception stack back to state of associated enter
+
+When an `enter` is encountered, the runtime pushes a new handler onto the
+`Task`'s exception handler stack which will jump to `catch_label` when an
+exception occurs.
+
+There are two ways that the exception-related task state can be restored
+
+1. By encountering a `leave` which will restore the handler state with `tok`.
+2. By throwing an exception. In this case the runtime will pop one handler
+   automatically and jump to the catch label with the new exception pushed
+   onto the exception stack. On this path the exception stack state must be
+   restored back to the associated `enter` by encountering `pop_exception`.
+
+Note that the handler and exception stack represent two distinct types of
+exception-related state restoration which need to happen. Note also that the
+"handler state restoration" actually includes several pieces of runtime state
+including GC flags - see `jl_eh_restore_state` in the runtime for that.
+
+#### Lowering finally code paths
+
+When lowering `finally` blocks we want to emit the user's finally code once but
+multiple code paths may traverse the finally block. For example, consider the
+code
+
+```julia
+function foo(x)
+    while true
+        try
+            if x == 1
+                return f(x)
+            elseif x == 2
+                g(x)
+                continue
+            else
+                break
+            end
+        finally
+            h()
+        end
+    end
+end
+```
+
+In this situation there's four distinct code paths through the finally block:
+1. `return f(x)` needs to call `val = f(x)`, leave the `try` block, run `h()` then
+   return `val`.
+2. `continue` needs to call `h()` then jump to the start of the while loop
+3. `break` needs to call `h()` then jump to the exit of the while loop
+4. If an exception occurs in `f(x)` or `g(x)`, we need to call `h()` before
+   falling back into the while loop.
+
+To deal with these we create a `finally_tag` variable to dynamically track
+which action to take after the finally block exits. Before jumping to the block
+we set this variable to a unique integer tag identifying the incoming code
+path. At the exit of the user's code (`h()` in this case) we perform the jump
+appropriate to the `break`, `continue` or `return` as necessary based on the tag.
+
+(TODO - these are the only four cases which can occur, but, for example,
+multiple `return`s create multiple tags rather than assigning to a single
+variable. Collapsing these into a single case might be worth considering? But
+also might be worse for type inference in some cases?)
+
+## Pass 6: Convert IR to `CodeInfo` representation
+
+This pass convert's JuliaLowering's internal representation of untyped IR into
+a form the Julia runtime understands. This is a necessary decoupling which
+separates the development of JuliaLowering.jl from the evolution of the Julia
+runtime itself.
+
+### Untyped IR (`CodeInfo` form)
+
+The final lowered IR is expressed as `CodeInfo` objects which are a sequence of
+`code` statements containing
+* Literals
+* Restricted forms of `Expr` (with semantics different from surface syntax,
+  even for the same `head`! for example the arguments to `Expr(:call)` in IR
+  must be "simple" and aren't evaluated in order)
+* `Core.SlotNumber`
+* Other special forms from `Core` like `Core.ReturnNode`, `Core.EnterNode`, etc.
+* `Core.SSAValue`, indexing any value generated from a statement in the `code`
+  array.
+* Etc (todo)
+
+The IR obeys certain invariants which are checked by the downstream code in
+base/compiler/validation.jl.
+
+See also https://docs.julialang.org/en/v1/devdocs/ast/#Lowered-form
+
+CodeInfo layout (as of early 1.12-DEV):
+
+```julia
+mutable struct CodeInfo
+    code::Vector{Any}             # IR statements
+    codelocs::Vector{Int32}       # `length(code)` Vector of indices into `linetable`
+    ssavaluetypes::Any            # `length(code)` or Vector of inferred types after opt
+    ssaflags::Vector{UInt32}      # flag for every statement in `code`
+                                  #   0 if meta statement
+                                  #   inbounds_flag - 1 bit (LSB)
+                                  #   inline_flag   - 1 bit
+                                  #   noinline_flag - 1 bit
+                                  #   ... other 8 flags which are defined in compiler/optimize.jl
+                                  #   effects_flags - 9 bits
+    method_for_inference_limit_heuristics::Any
+    linetable::Any
+    slotnames::Vector{Symbol}     # names of parameters and local vars used in the code
+    slotflags::Vector{UInt8}      # vinfo flags from flisp
+    slottypes::Any                # nothing (used by typeinf)
+    rettype::Any                  # Any (used by typeinf)
+    parent::Any                   # nothing (used by typeinf)
+    edges::Any
+    min_world::UInt64
+    max_world::UInt64
+    inferred::Bool
+    propagate_inbounds::Bool
+    has_fcall::Bool
+    nospecializeinfer::Bool
+    inlining::UInt8
+    constprop::UInt8
+    purity::UInt16
+    inlining_cost::UInt16
+end
+```
+
+## Notes on toplevel-only forms and eval-related functions
+
+In the current Julia runtime,
+
+`Base.eval()`
+- Uses `jl_toplevel_eval_in` which calls `jl_toplevel_eval_flex`
+
+`jl_toplevel_eval_flex(mod, ex)`
+- Lowers if necessary
+- Evaluates certain blessed top level forms
+  * `:.`
+  * `:module`
+  * `:using`
+  * `:import`
+  * `:public`
+  * `:export`
+  * `:toplevel`
+  * `:error`
+  * `:incomplete`
+  * Identifier and literals
+- Otherwise expects `Expr(:thunk)`
+  * Use codegen "where necessary/profitable" (eg ccall, has_loops etc)
+  * Otherwise interpret via `jl_interpret_toplevel_thunk`
+
+Should we lower the above blessed top level forms to julia runtime calls?
+Pros:
+- Semantically sound. Lowering should do syntax checking in things like
+  `Expr(:using)` rather than doing this in the runtime support functions.
+- Precise lowering error messages
+- Replaces more Expr usage
+- Replaces a whole pile of C code with significantly less Julia code
+- Lowering output becomes more consistently imperative
+Cons:
+- Lots more code to write
+- May need to invent intermediate data structures to replace `Expr`
+- Bootstrap?
+- Some forms require creating toplevel thunks
+
+In general, we'd be replacing current *declarative* lowering targets like
+`Expr(:using)` with an *imperative* call to a `Core` API instead. The call and
+the setup of its arguments would need to go in a thunk. We've currently got an
+odd mixture of imperative and declarative lowered code.
+
+## Bugs in Julia's lowering
+
+Subset of bugs which exist in upstream in flisp implementation, but which are fixed here
+* `f()[begin]` has the side effect `f()` twice.
+* `a[(begin=1; a=2)]` gives a weird error
+* `function A.ccall() ; end` allows `ccall` as a name but it's not allowed without the `A.`
+* `a .< b .< c` expands to `(a .< b) .& (b .< c)` where the scope of the `&` is
+  the expansion module but should be `top.&` to avoid scope-dependence
+  (especially in the presence of macros)
+
+## Notes on Racket's hygiene
+
+People look at [Racket](https://racket-lang.org/) as an example of a very
+complete system of hygienic macros. We should learn from them, but keeping in
+mind that Racket's macro system is inherently more complicated. Racket's
+current approach to hygiene is described in an [accessible talk](https://www.youtube.com/watch?v=Or_yKiI3Ha4)
+and in more depth in [a paper](https://www-old.cs.utah.edu/plt/publications/popl16-f.pdf).
+
+Some differences which makes Racket's macro expander different from Julia:
+
+* Racket allows *local* definitions of macros. Macro code can be embedded in an
+  inner lexical scope and capture locals from that scope, but still needs to be
+  executed at compile time. Julia supports macros at top level scope only.
+* Racket goes to great lengths to execute the minimal package code necessary to
+  expand macros; the "pass system". Julia just executes all top level
+  statements in order when precompiling a package.
+* As a lisp, Racket's surface syntax is dramatically simpler and more uniform
diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl
new file mode 100644
index 0000000000000..d8ff05dd013e8
--- /dev/null
+++ b/JuliaLowering/src/JuliaLowering.jl
@@ -0,0 +1,47 @@
+# Use a baremodule because we're implementing `include` and `eval`
+baremodule JuliaLowering
+
+using Base
+# We define a separate _include() for use in this module to avoid mixing method
+# tables with the public `JuliaLowering.include()` API
+const _include = Base.IncludeInto(JuliaLowering)
+
+if parentmodule(JuliaLowering) === Base
+    using Base.JuliaSyntax
+else
+    using JuliaSyntax
+end
+
+using .JuliaSyntax: highlight, Kind, @KSet_str, is_leaf, children, numchildren,
+    head, kind, flags, has_flags, numeric_flags, filename, first_byte,
+    last_byte, byte_range, sourcefile, source_location, span, sourcetext,
+    is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call,
+    is_infix_op_call, is_postfix_op_call, is_error
+
+_include("kinds.jl")
+_register_kinds()
+
+_include("syntax_graph.jl")
+_include("ast.jl")
+_include("bindings.jl")
+_include("utils.jl")
+
+_include("macro_expansion.jl")
+_include("desugaring.jl")
+_include("scope_analysis.jl")
+_include("closure_conversion.jl")
+_include("linear_ir.jl")
+_include("runtime.jl")
+_include("syntax_macros.jl")
+
+_include("eval.jl")
+_include("compat.jl")
+_include("hooks.jl")
+
+function __init__()
+    _register_kinds()
+end
+
+_include("precompile.jl")
+
+end
diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl
new file mode 100644
index 0000000000000..3f5533f8b773a
--- /dev/null
+++ b/JuliaLowering/src/ast.jl
@@ -0,0 +1,741 @@
+#-------------------------------------------------------------------------------
+# @chk: Basic AST structure checking tool
+#
+# Check a condition involving an expression, throwing a LoweringError if it
+# doesn't evaluate to true. Does some very simple pattern matching to attempt
+# to extract the expression variable from the left hand side.
+#
+# Forms:
+# @chk pred(ex)
+# @chk pred(ex) msg
+# @chk pred(ex) (msg_display_ex, msg)
+macro chk(cond, msg=nothing)
+    if Meta.isexpr(msg, :tuple)
+        ex = msg.args[1]
+        msg = msg.args[2]
+    else
+        ex = cond
+        while true
+            if ex isa Symbol
+                break
+            elseif ex.head == :call
+                ex = ex.args[2]
+            elseif ex.head == :ref
+                ex = ex.args[1]
+            elseif ex.head == :.
+                ex = ex.args[1]
+            elseif ex.head in (:(==), :(in), :<, :>)
+                ex = ex.args[1]
+            else
+                error("Can't analyze $cond")
+            end
+        end
+    end
+    quote
+        ex = $(esc(ex))
+        @assert ex isa SyntaxTree
+        ok = try
+            $(esc(cond))
+        catch
+            false
+        end
+        if !ok
+            throw(LoweringError(ex, $(isnothing(msg) ? "expected `$cond`" : esc(msg))))
+        end
+    end
+end
+
+#-------------------------------------------------------------------------------
+abstract type AbstractLoweringContext end
+
+"""
+Bindings for the current lambda being processed.
+
+Lowering passes prior to scope resolution return `nothing` and bindings are
+collected later.
+"""
+current_lambda_bindings(ctx::AbstractLoweringContext) = nothing
+
+function syntax_graph(ctx::AbstractLoweringContext)
+    ctx.graph
+end
+
+"""
+Unique symbolic identity for a variable, constant, label, or other entity
+"""
+const IdTag = Int
+
+"""
+Id for scope layers in macro expansion
+"""
+const LayerId = Int
+
+"""
+A `ScopeLayer` is a mechanism for automatic hygienic macros; every identifier
+is assigned to a particular layer and can only match against bindings which are
+themselves part of that layer.
+
+Normal code contains a single scope layer, whereas each macro expansion
+generates a new layer.
+"""
+struct ScopeLayer
+    id::LayerId
+    mod::Module
+    parent_layer::LayerId # Index of parent layer in a macro expansion. Equal to 0 for no parent
+    is_macro_expansion::Bool # FIXME
+end
+
+#-------------------------------------------------------------------------------
+# AST creation utilities
+_node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_compatible_graph(graph, ex); ex._id)
+function _node_id(graph::SyntaxGraph, ex)
+    # Fallback to give a comprehensible error message for use with the @ast macro
+    error("Attempt to use `$(repr(ex))` of type `$(typeof(ex))` as an AST node. Try annotating with `::K\"your_intended_kind\"?`")
+end
+function _node_id(graph::SyntaxGraph, ex::AbstractVector{<:SyntaxTree})
+    # Fallback to give a comprehensible error message for use with the @ast macro
+    error("Attempt to use vector as an AST node. Did you mean to splat this? (content: `$(repr(ex))`)")
+end
+
+_node_ids(graph::SyntaxGraph) = ()
+_node_ids(graph::SyntaxGraph, ::Nothing, cs...) = _node_ids(graph, cs...)
+_node_ids(graph::SyntaxGraph, c, cs...) = (_node_id(graph, c), _node_ids(graph, cs...)...)
+_node_ids(graph::SyntaxGraph, cs::SyntaxList, cs1...) = (_node_ids(graph, cs...)..., _node_ids(graph, cs1...)...)
+function _node_ids(graph::SyntaxGraph, cs::SyntaxList)
+    check_compatible_graph(graph, cs)
+    cs.ids
+end
+
+_unpack_srcref(graph, srcref::SyntaxTree) = _node_id(graph, srcref)
+_unpack_srcref(graph, srcref::Tuple)      = _node_ids(graph, srcref...)
+_unpack_srcref(graph, srcref)             = srcref
+
+function _push_nodeid!(graph::SyntaxGraph, ids::Vector{NodeId}, val)
+    push!(ids, _node_id(graph, val))
+end
+function _push_nodeid!(graph::SyntaxGraph, ids::Vector{NodeId}, val::Nothing)
+    nothing
+end
+function _append_nodeids!(graph::SyntaxGraph, ids::Vector{NodeId}, vals)
+    for v in vals
+        _push_nodeid!(graph, ids, v)
+    end
+end
+function _append_nodeids!(graph::SyntaxGraph, ids::Vector{NodeId}, vals::SyntaxList)
+    check_compatible_graph(graph, vals)
+    append!(ids, vals.ids)
+end
+
+function makeleaf(graph::SyntaxGraph, srcref, proto; attrs...)
+    id = newnode!(graph)
+    ex = SyntaxTree(graph, id)
+    copy_attrs!(ex, proto, true)
+    setattr!(graph, id; source=_unpack_srcref(graph, srcref), attrs...)
+    return ex
+end
+
+function _makenode(graph::SyntaxGraph, srcref, proto, children; attrs...)
+    id = newnode!(graph)
+    setchildren!(graph, id, children)
+    ex = SyntaxTree(graph, id)
+    copy_attrs!(ex, proto, true)
+    setattr!(graph, id; source=_unpack_srcref(graph, srcref), attrs...)
+    return SyntaxTree(graph, id)
+end
+function _makenode(ctx, srcref, proto, children; attrs...)
+    _makenode(syntax_graph(ctx), srcref, proto, children; attrs...)
+end
+
+function makenode(ctx, srcref, proto, children...; attrs...)
+    _makenode(ctx, srcref, proto, _node_ids(syntax_graph(ctx), children...); attrs...)
+end
+
+function makeleaf(ctx, srcref, proto; kws...)
+    makeleaf(syntax_graph(ctx), srcref, proto; kws...)
+end
+
+function makeleaf(ctx, srcref, k::Kind, value; kws...)
+    graph = syntax_graph(ctx)
+    if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" ||
+            k == K"globalref" || k == K"Placeholder" ||
+            k == K"StrMacroName" || k == K"CmdMacroName"
+        makeleaf(graph, srcref, k; name_val=value, kws...)
+    elseif k == K"BindingId"
+        makeleaf(graph, srcref, k; var_id=value, kws...)
+    elseif k == K"label"
+        makeleaf(graph, srcref, k; id=value, kws...)
+    elseif k == K"symbolic_label"
+        makeleaf(graph, srcref, k; name_val=value, kws...)
+    elseif k in KSet"TOMBSTONE SourceLocation latestworld latestworld_if_toplevel"
+        makeleaf(graph, srcref, k; kws...)
+    else
+        val = k == K"Integer" ? convert(Int,     value) :
+              k == K"Float"   ? convert(Float64, value) :
+              k == K"String"  ? convert(String,  value) :
+              k == K"Char"    ? convert(Char,    value) :
+              k == K"Value"   ? value                   :
+              k == K"Bool"    ? value                   :
+              k == K"VERSION" ? value                   :
+              error("Unexpected leaf kind `$k`")
+        makeleaf(graph, srcref, k; value=val, kws...)
+    end
+end
+
+# TODO: Replace this with makeleaf variant?
+function mapleaf(ctx, src, kind)
+    ex = makeleaf(syntax_graph(ctx), src, kind)
+    # TODO: Value coercion might be broken here due to use of `name_val` vs
+    # `value` vs ... ?
+    copy_attrs!(ex, src)
+    ex
+end
+
+# Convenience functions to create leaf nodes referring to identifiers within
+# the Core and Top modules.
+core_ref(ctx, ex, name) = makeleaf(ctx, ex, K"core", name)
+svec_type(ctx, ex) = core_ref(ctx, ex, "svec")
+nothing_(ctx, ex) = core_ref(ctx, ex, "nothing")
+
+top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name)
+
+# Assign `ex` to an SSA variable.
+# Return (variable, assignment_node)
+function assign_tmp(ctx::AbstractLoweringContext, ex, name="tmp")
+    var = ssavar(ctx, ex, name)
+    assign_var = makenode(ctx, ex, K"=", var, ex)
+    var, assign_var
+end
+
+function emit_assign_tmp(stmts::SyntaxList, ctx, ex, name="tmp")
+    if is_ssa(ctx, ex)
+        return ex
+    end
+    var = ssavar(ctx, ex, name)
+    push!(stmts, makenode(ctx, ex, K"=", var, ex))
+    var
+end
+
+#-------------------------------------------------------------------------------
+# @ast macro
+function _match_srcref(ex)
+    if Meta.isexpr(ex, :macrocall) && ex.args[1] == Symbol("@HERE")
+        QuoteNode(ex.args[2])
+    else
+        esc(ex)
+    end
+end
+
+function _match_kind(f::Function, srcref, ex)
+    kws = []
+    if Meta.isexpr(ex, :call)
+        kind = esc(ex.args[1])
+        args = ex.args[2:end]
+        if Meta.isexpr(args[1], :parameters)
+            kws = map(esc, args[1].args)
+            popfirst!(args)
+        end
+        while length(args) >= 1 && Meta.isexpr(args[end], :kw)
+            pushfirst!(kws, esc(pop!(args)))
+        end
+        if length(args) == 1
+            srcref_tmp = gensym("srcref")
+            return quote
+                $srcref_tmp = $(_match_srcref(args[1]))
+                $(f(kind, srcref_tmp, kws))
+            end
+        elseif length(args) > 1
+            error("Unexpected: extra srcref argument in `$ex`?")
+        end
+    else
+        kind = esc(ex)
+    end
+    f(kind, srcref, kws)
+end
+
+function _expand_ast_tree(ctx, srcref, tree)
+    if Meta.isexpr(tree, :(::))
+        # Leaf node
+        if length(tree.args) == 2
+            val = esc(tree.args[1])
+            kindspec = tree.args[2]
+        else
+            val = nothing
+            kindspec = tree.args[1]
+        end
+        _match_kind(srcref, kindspec) do kind, srcref, kws
+            :(makeleaf($ctx, $srcref, $kind, $(val), $(kws...)))
+        end
+    elseif Meta.isexpr(tree, :call) && tree.args[1] === :(=>)
+        # Leaf node with copied attributes
+        kind = esc(tree.args[3])
+        srcref = esc(tree.args[2])
+        :(mapleaf($ctx, $srcref, $kind))
+    elseif Meta.isexpr(tree, (:vcat, :hcat, :vect))
+        # Interior node
+        flatargs = []
+        for a in tree.args
+            if Meta.isexpr(a, :row)
+                append!(flatargs, a.args)
+            else
+                push!(flatargs, a)
+            end
+        end
+        children_ex = :(let child_ids = Vector{NodeId}(), graph = syntax_graph($ctx)
+        end)
+        child_stmts = children_ex.args[2].args
+        for a in flatargs[2:end]
+            child = _expand_ast_tree(ctx, srcref, a)
+            if Meta.isexpr(child, :(...))
+                push!(child_stmts, :(_append_nodeids!(graph, child_ids, $(child.args[1]))))
+            else
+                push!(child_stmts, :(_push_nodeid!(graph, child_ids, $child)))
+            end
+        end
+        push!(child_stmts, :(child_ids))
+        _match_kind(srcref, flatargs[1]) do kind, srcref, kws
+            :(_makenode($ctx, $srcref, $kind, $children_ex; $(kws...)))
+        end
+    elseif Meta.isexpr(tree, :(:=))
+        lhs = tree.args[1]
+        rhs = _expand_ast_tree(ctx, srcref, tree.args[2])
+        ssadef = gensym("ssadef")
+        quote
+            ($(esc(lhs)), $ssadef) = assign_tmp($ctx, $rhs, $(string(lhs)))
+            $ssadef
+        end
+    elseif Meta.isexpr(tree, :macrocall)
+        esc(tree)
+    elseif tree isa Expr
+        Expr(tree.head, map(a->_expand_ast_tree(ctx, srcref, a), tree.args)...)
+    else
+        esc(tree)
+    end
+end
+
+"""
+    @ast ctx srcref tree
+
+Syntactic s-expression shorthand for constructing a `SyntaxTree` AST.
+
+* `ctx` - SyntaxGraph context
+* `srcref` - Reference to the source code from which this AST was derived.
+
+The `tree` contains syntax of the following forms:
+* `[kind child₁ child₂]` - construct an interior node with children
+* `value :: kind`        - construct a leaf node
+* `ex => kind`           - convert a leaf node to the given `kind`, copying attributes
+                           from it and also using `ex` as the source reference.
+* `var := ex`            - Set `var=ssavar(...)` and return an assignment node `\$var=ex`.
+                           `var` may be used outside `@ast`
+* `cond ? ex1 : ex2`     - Conditional; `ex1` and `ex2` will be recursively expanded.
+                           `if ... end` and `if ... else ... end` also work with this.
+
+Any `kind` can be replaced with an expression of the form
+* `kind(srcref)` - override the source reference for this node and its children
+* `kind(attr=val)` - set an additional attribute
+* `kind(srcref; attr₁=val₁, attr₂=val₂)` - the general form
+
+In any place `srcref` is used, the special form `@HERE()` can be used to instead
+to indicate that the "primary" location of the source is the location where
+`@HERE` occurs.
+
+
+# Examples
+
+```
+@ast ctx srcref [
+   K"toplevel"
+   [K"using"
+       [K"importpath"
+           "Base"       ::K"Identifier"(src)
+       ]
+   ]
+   [K"function"
+       [K"call"
+           "eval"       ::K"Identifier"
+           "x"          ::K"Identifier"
+       ]
+       [K"call"
+           "eval"       ::K"core"
+           mn           =>K"Identifier"
+           "x"          ::K"Identifier"
+       ]
+   ]
+]
+```
+"""
+macro ast(ctx, srcref, tree)
+    quote
+        ctx = $(esc(ctx))
+        srcref = $(_match_srcref(srcref))
+        $(_expand_ast_tree(:ctx, :srcref, tree))
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Mapping and copying of AST nodes
+function copy_attrs!(dest, src, all=false)
+    # TODO: Make this faster?
+    for (name, attr) in pairs(src._graph.attributes)
+        if (all || (name !== :source && name !== :kind && name !== :syntax_flags)) &&
+                haskey(attr, src._id)
+            dest_attr = getattr(dest._graph, name, nothing)
+            if !isnothing(dest_attr)
+                dest_attr[dest._id] = attr[src._id]
+            end
+        end
+    end
+end
+
+function copy_attrs!(dest, head::Union{Kind,JuliaSyntax.SyntaxHead}, all=false)
+    if all
+        sethead!(dest._graph, dest._id, head)
+    end
+end
+
+function mapchildren(f::Function, ctx, ex::SyntaxTree, do_map_child::Function;
+                     extra_attrs...)
+    if is_leaf(ex)
+        return ex
+    end
+    orig_children = children(ex)
+    cs = isempty(extra_attrs) ? nothing : SyntaxList(ctx)
+    for (i,e) in enumerate(orig_children)
+        newchild = do_map_child(i) ? f(e) : e
+        if isnothing(cs)
+            if newchild == e
+                continue
+            else
+                cs = SyntaxList(ctx)
+                append!(cs, orig_children[1:i-1])
+            end
+        end
+        push!(cs::SyntaxList, newchild)
+    end
+    if isnothing(cs)
+        # This function should be allocation-free if no children were changed
+        # by the mapping and there's no extra_attrs
+        return ex
+    end
+    cs::SyntaxList
+    ex2 = makenode(ctx, ex, head(ex), cs)
+    copy_attrs!(ex2, ex)
+    setattr!(ex2; extra_attrs...)
+    return ex2
+end
+
+function mapchildren(f::Function, ctx, ex::SyntaxTree, mapped_children::AbstractVector{<:Integer};
+                     extra_attrs...)
+    j = Ref(firstindex(mapped_children))
+    function do_map_child(i)
+        ind = j[]
+        if ind <= lastindex(mapped_children) && mapped_children[ind] == i
+            j[] += 1
+            true
+        else
+            false
+        end
+    end
+    mapchildren(f, ctx, ex, do_map_child; extra_attrs...)
+end
+
+function mapchildren(f::Function, ctx, ex::SyntaxTree; extra_attrs...)
+    mapchildren(f, ctx, ex, i->true; extra_attrs...)
+end
+
+
+"""
+Recursively copy AST `ex` into `ctx`.
+
+Special provenance handling: If `copy_source` is true, treat the `.source`
+attribute as a reference and recurse on its contents.  Otherwise, treat it like
+any other attribute.
+"""
+function copy_ast(ctx, ex::SyntaxTree; copy_source=true)
+    graph1 = syntax_graph(ex)
+    graph2 = syntax_graph(ctx)
+    !copy_source && check_same_graph(graph1, graph2)
+    id2 = _copy_ast(graph2, graph1, ex._id, Dict{NodeId, NodeId}(), copy_source)
+    return SyntaxTree(graph2, id2)
+end
+
+function _copy_ast(graph2::SyntaxGraph, graph1::SyntaxGraph,
+                   id1::NodeId, seen, copy_source)
+    let copied = get(seen, id1, nothing)
+        isnothing(copied) || return copied
+    end
+    id2 = newnode!(graph2)
+    seen[id1] = id2
+    src1 = get(SyntaxTree(graph1, id1), :source, nothing)
+    src2 = if !copy_source
+        src1
+    elseif src1 isa NodeId
+        _copy_ast(graph2, graph1, src1, seen, copy_source)
+    elseif src1 isa Tuple
+        map(i->_copy_ast(graph2, graph1, i, seen, copy_source), src1)
+    else
+        src1
+    end
+    copy_attrs!(SyntaxTree(graph2, id2), SyntaxTree(graph1, id1), true)
+    setattr!(graph2, id2; source=src2)
+    if !is_leaf(graph1, id1)
+        cs = NodeId[]
+        for cid in children(graph1, id1)
+            push!(cs, _copy_ast(graph2, graph1, cid, seen, copy_source))
+        end
+        setchildren!(graph2, id2, cs)
+    end
+    return id2
+end
+
+#-------------------------------------------------------------------------------
+function set_scope_layer(ctx, ex, layer_id, force)
+    k = kind(ex)
+    scope_layer = force ? layer_id : get(ex, :scope_layer, layer_id)
+    if k == K"module" || k == K"toplevel" || k == K"inert"
+        makenode(ctx, ex, ex, children(ex);
+                 scope_layer=scope_layer)
+    elseif k == K"."
+        makenode(ctx, ex, ex, set_scope_layer(ctx, ex[1], layer_id, force), ex[2],
+                 scope_layer=scope_layer)
+    elseif !is_leaf(ex)
+        mapchildren(e->set_scope_layer(ctx, e, layer_id, force), ctx, ex;
+                    scope_layer=scope_layer)
+    else
+        makeleaf(ctx, ex, ex;
+                 scope_layer=scope_layer)
+    end
+end
+
+"""
+    adopt_scope(ex, ref)
+
+Copy `ex`, adopting the scope layer of `ref`.
+"""
+function adopt_scope(ex::SyntaxTree, scope_layer::LayerId)
+    set_scope_layer(ex, ex, scope_layer, true)
+end
+
+function adopt_scope(ex::SyntaxTree, layer::ScopeLayer)
+    adopt_scope(ex, layer.id)
+end
+
+function adopt_scope(ex::SyntaxTree, ref::SyntaxTree)
+    adopt_scope(ex, ref.scope_layer)
+end
+
+function adopt_scope(exs::SyntaxList, ref)
+    out = SyntaxList(syntax_graph(exs))
+    for e in exs
+        push!(out, adopt_scope(e, ref))
+    end
+    return out
+end
+
+# Type for `meta` attribute, to replace `Expr(:meta)`.
+# It's unclear how much flexibility we need here - is a dict good, or could we
+# just use a struct? Likely this will be sparse. Alternatively we could just
+# use individual attributes but those aren't easy to add on an ad-hoc basis in
+# the middle of a pass.
+const CompileHints = Base.ImmutableDict{Symbol,Any}
+
+function setmeta!(ex::SyntaxTree; kws...)
+    @assert length(kws) == 1 # todo relax later ?
+    key = first(keys(kws))
+    value = first(values(kws))
+    meta = begin
+        m = get(ex, :meta, nothing)
+        isnothing(m) ? CompileHints(key, value) : CompileHints(m, key, value)
+    end
+    setattr!(ex; meta=meta)
+    ex
+end
+
+setmeta(ex::SyntaxTree; kws...) = setmeta!(copy_node(ex); kws...)
+
+function getmeta(ex::SyntaxTree, name::Symbol, default)
+    meta = get(ex, :meta, nothing)
+    isnothing(meta) ? default : get(meta, name, default)
+end
+
+name_hint(name) = CompileHints(:name_hint, name)
+
+#-------------------------------------------------------------------------------
+# Predicates and accessors working on expression trees
+
+# For historical reasons, `cglobal` and `ccall` are their own special
+# quasi-identifier-like syntax but with special handling inside lowering which
+# means they can't be used as normal identifiers.
+function is_ccall_or_cglobal(name::AbstractString)
+    return name == "ccall" || name == "cglobal"
+end
+
+function is_quoted(ex)
+    kind(ex) in KSet"Symbol quote top core globalref break inert
+                     meta inbounds inline noinline loopinfo"
+end
+
+function extension_type(ex)
+    @assert kind(ex) == K"extension" || kind(ex) == K"assert"
+    @chk numchildren(ex) >= 1
+    @chk kind(ex[1]) == K"Symbol"
+    ex[1].name_val
+end
+
+function is_sym_decl(x)
+    k = kind(x)
+    k == K"Identifier" || k == K"::"
+end
+
+function is_eventually_call(ex::SyntaxTree)
+    k = kind(ex)
+    return k == K"call" || ((k == K"where" || k == K"::") && is_eventually_call(ex[1]))
+end
+
+function find_parameters_ind(exs)
+    i = length(exs)
+    while i >= 1
+        k = kind(exs[i])
+        if k == K"parameters"
+            return i
+        elseif k != K"do"
+            break
+        end
+        i -= 1
+    end
+    return 0
+end
+
+function has_parameters(ex::SyntaxTree)
+    find_parameters_ind(children(ex)) != 0
+end
+
+function has_parameters(args::AbstractVector)
+    find_parameters_ind(args) != 0
+end
+
+function any_assignment(exs)
+    any(kind(e) == K"=" for e in exs)
+end
+
+function is_valid_modref(ex)
+    return kind(ex) == K"." && kind(ex[2]) == K"Symbol" &&
+           (kind(ex[1]) == K"Identifier" || is_valid_modref(ex[1]))
+end
+
+function is_core_ref(ex, name)
+    kind(ex) == K"core" && ex.name_val == name
+end
+
+function is_core_nothing(ex)
+    is_core_ref(ex, "nothing")
+end
+
+function is_core_Any(ex)
+    is_core_ref(ex, "Any")
+end
+
+function is_simple_atom(ctx, ex)
+    k = kind(ex)
+    # TODO thismodule
+    is_literal(k) || k == K"Symbol" || k == K"Value" || is_ssa(ctx, ex) || is_core_nothing(ex)
+end
+
+function is_identifier_like(ex)
+    k = kind(ex)
+    k == K"Identifier" || k == K"BindingId" || k == K"Placeholder"
+end
+
+function decl_var(ex)
+    kind(ex) == K"::" ? ex[1] : ex
+end
+
+# Given the signature of a `function`, return the symbol that will ultimately
+# be assigned to in local/global scope, if any.
+function assigned_function_name(ex)
+    while kind(ex) == K"where"
+        # f() where T
+        ex = ex[1]
+    end
+    if kind(ex) == K"::" && numchildren(ex) == 2
+        # f()::T
+        ex = ex[1]
+    end
+    if kind(ex) != K"call"
+        throw(LoweringError(ex, "Expected call syntax in function signature"))
+    end
+    ex = ex[1]
+    if kind(ex) == K"curly"
+        # f{T}()
+        ex = ex[1]
+    end
+    if kind(ex) == K"::" || kind(ex) == K"."
+        # (obj::CallableType)(args)
+        # A.b.c(args)
+        nothing
+    elseif is_identifier_like(ex)
+        ex
+    else
+        throw(LoweringError(ex, "Unexpected name in function signature"))
+    end
+end
+
+# Remove empty parameters block, eg, in the arg list of `f(x, y;)`
+function remove_empty_parameters(args)
+    i = length(args)
+    while i > 0 && kind(args[i]) == K"parameters" && numchildren(args[i]) == 0
+        i -= 1
+    end
+    args[1:i]
+end
+
+function to_symbol(ctx, ex)
+    @ast ctx ex ex=>K"Symbol"
+end
+
+function new_scope_layer(ctx, mod_ref::Module=ctx.mod)
+    new_layer = ScopeLayer(length(ctx.scope_layers)+1, ctx.mod, 0, false)
+    push!(ctx.scope_layers, new_layer)
+    new_layer.id
+end
+
+function new_scope_layer(ctx, mod_ref::SyntaxTree)
+    @assert kind(mod_ref) == K"Identifier"
+    new_scope_layer(ctx, ctx.scope_layers[mod_ref.scope_layer].mod)
+end
+
+#-------------------------------------------------------------------------------
+# Context wrapper which helps to construct a list of statements to be executed
+# prior to some expression. Useful when we need to use subexpressions multiple
+# times.
+struct StatementListCtx{Ctx, GraphType} <: AbstractLoweringContext
+    ctx::Ctx
+    stmts::SyntaxList{GraphType}
+end
+
+function Base.getproperty(ctx::StatementListCtx, field::Symbol)
+    if field === :ctx
+        getfield(ctx, :ctx)
+    elseif field === :stmts
+        getfield(ctx, :stmts)
+    else
+        getproperty(getfield(ctx, :ctx), field)
+    end
+end
+
+function emit(ctx::StatementListCtx, ex)
+    push!(ctx.stmts, ex)
+end
+
+function emit_assign_tmp(ctx::StatementListCtx, ex, name="tmp")
+    emit_assign_tmp(ctx.stmts, ctx.ctx, ex, name)
+end
+
+with_stmts(ctx, stmts) = StatementListCtx(ctx, stmts)
+with_stmts(ctx::StatementListCtx, stmts) = StatementListCtx(ctx.ctx, stmts)
+
+function with_stmts(ctx)
+    StatementListCtx(ctx, SyntaxList(ctx))
+end
+
+with_stmts(ctx::StatementListCtx) = StatementListCtx(ctx.ctx)
diff --git a/JuliaLowering/src/bindings.jl b/JuliaLowering/src/bindings.jl
new file mode 100644
index 0000000000000..286e67ecbeb14
--- /dev/null
+++ b/JuliaLowering/src/bindings.jl
@@ -0,0 +1,249 @@
+"""
+Metadata about a binding
+"""
+struct BindingInfo
+    id::IdTag                 # Unique integer identifying this binding
+    name::String
+    kind::Symbol              # :local :global :argument :static_parameter
+    node_id::Int              # ID of associated K"BindingId" node in the syntax graph
+    mod::Union{Nothing,Module} # Set when `kind === :global`
+    type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10
+    n_assigned::Int32         # Number of times variable is assigned to
+    is_const::Bool            # Constant, cannot be reassigned
+    is_ssa::Bool              # Single assignment, defined before use
+    is_captured::Bool         # Variable is captured by some lambda
+    is_always_defined::Bool   # A local that we know has an assignment that dominates all usages (is never undef)
+    is_internal::Bool         # True for internal bindings generated by the compiler
+    is_ambiguous_local::Bool  # Local, but would be global in soft scope (ie, the REPL)
+    is_nospecialize::Bool     # @nospecialize on this argument (only valid for kind == :argument)
+end
+
+function BindingInfo(id::IdTag, name::AbstractString, kind::Symbol, node_id::Integer;
+                     mod::Union{Nothing,Module} = nothing,
+                     type::Union{Nothing,SyntaxTree} = nothing,
+                     n_assigned::Integer = 0,
+                     is_const::Bool = false,
+                     is_ssa::Bool = false,
+                     is_captured::Bool = false,
+                     is_always_defined::Bool = is_ssa,
+                     is_internal::Bool = false,
+                     is_ambiguous_local::Bool = false,
+                     is_nospecialize::Bool = false)
+    BindingInfo(id, name, kind, node_id, mod, type, n_assigned, is_const,
+                is_ssa, is_captured, is_always_defined,
+                is_internal, is_ambiguous_local, is_nospecialize)
+end
+
+function Base.show(io::IO, binfo::BindingInfo)
+    print(io, "BindingInfo(", binfo.id, ", ",
+          repr(binfo.name), ", ",
+          repr(binfo.kind), ", ",
+          binfo.node_id)
+    if !isnothing(binfo.mod)
+        print(io, ", mod=", binfo.mod)
+    end
+    if !isnothing(binfo.type)
+        print(io, ", type=", binfo.type)
+    end
+    if binfo.n_assigned != 0
+        print(io, ", n_assigned=", binfo.n_assigned)
+    end
+    if binfo.is_const
+        print(io, ", is_const=", binfo.is_const)
+    end
+    if binfo.is_ssa
+        print(io, ", is_ssa=", binfo.is_ssa)
+    end
+    if binfo.is_captured
+        print(io, ", is_captured=", binfo.is_captured)
+    end
+    if binfo.is_always_defined != binfo.is_ssa
+        print(io, ", is_always_defined=", binfo.is_always_defined)
+    end
+    if binfo.is_internal
+        print(io, ", is_internal=", binfo.is_internal)
+    end
+    if binfo.is_ambiguous_local
+        print(io, ", is_ambiguous_local=", binfo.is_ambiguous_local)
+    end
+    if binfo.is_nospecialize
+        print(io, ", is_nospecialize=", binfo.is_nospecialize)
+    end
+    print(io, ")")
+end
+
+"""
+Metadata about "entities" (variables, constants, etc) in the program. Each
+entity is associated to a unique integer id, the BindingId. A binding will be
+inferred for each *name* in the user's source program by symbolic analysis of
+the source.
+
+However, bindings can also be introduced programmatically during lowering or
+macro expansion: the primary key for bindings is the `BindingId` integer, not
+a name.
+"""
+struct Bindings
+    info::Vector{BindingInfo}
+end
+
+Bindings() = Bindings(Vector{BindingInfo}())
+
+next_binding_id(bindings::Bindings) = length(bindings.info) + 1
+
+function add_binding(bindings::Bindings, binding)
+    if next_binding_id(bindings) != binding.id
+        error("Use next_binding_id() to create a valid binding id")
+    end
+    push!(bindings.info, binding)
+end
+
+function _binding_id(id::Integer)
+    id
+end
+
+function _binding_id(ex::SyntaxTree)
+    @chk kind(ex) == K"BindingId"
+    ex.var_id
+end
+
+function update_binding!(bindings::Bindings, x;
+        type=nothing, is_const=nothing, add_assigned=0,
+        is_always_defined=nothing, is_captured=nothing)
+    id = _binding_id(x)
+    b = lookup_binding(bindings, id)
+    bindings.info[id] = BindingInfo(
+        b.id,
+        b.name,
+        b.kind,
+        b.node_id,
+        b.mod,
+        isnothing(type) ? b.type : type,
+        b.n_assigned + add_assigned,
+        isnothing(is_const) ? b.is_const : is_const,
+        b.is_ssa,
+        isnothing(is_captured) ? b.is_captured : is_captured,
+        isnothing(is_always_defined) ? b.is_always_defined : is_always_defined,
+        b.is_internal,
+        b.is_ambiguous_local,
+        b.is_nospecialize
+    )
+end
+
+function lookup_binding(bindings::Bindings, x)
+    bindings.info[_binding_id(x)]
+end
+
+function lookup_binding(ctx::AbstractLoweringContext, x)
+    lookup_binding(ctx.bindings, x)
+end
+
+function update_binding!(ctx::AbstractLoweringContext, x; kws...)
+    update_binding!(ctx.bindings, x; kws...)
+end
+
+function new_binding(ctx::AbstractLoweringContext, srcref::SyntaxTree,
+                     name::AbstractString, kind::Symbol; kws...)
+    binding_id = next_binding_id(ctx.bindings)
+    ex = @ast ctx srcref binding_id::K"BindingId"
+    add_binding(ctx.bindings, BindingInfo(binding_id, name, kind, ex._id; kws...))
+    ex
+end
+
+# Create a new SSA binding
+function ssavar(ctx::AbstractLoweringContext, srcref, name="tmp")
+    nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name)
+    new_binding(ctx, nameref, name, :local; is_ssa=true, is_internal=true)
+end
+
+# Create a new local mutable binding or lambda argument
+function new_local_binding(ctx::AbstractLoweringContext, srcref, name; kind=:local, kws...)
+    @assert kind === :local || kind === :argument
+    nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name)
+    ex = new_binding(ctx, nameref, name, kind; is_internal=true, kws...)
+    lbindings = current_lambda_bindings(ctx)
+    if !isnothing(lbindings)
+        init_lambda_binding(lbindings, ex.var_id)
+    end
+    ex
+end
+
+function new_global_binding(ctx::AbstractLoweringContext, srcref, name, mod; kws...)
+    nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name)
+    new_binding(ctx, nameref, name, :global; is_internal=true, mod=mod, kws...)
+end
+
+function binding_ex(ctx::AbstractLoweringContext, id::IdTag)
+    # Reconstruct the SyntaxTree for this binding. We keep only the node_id
+    # here, because that's got a concrete type. Whereas if we stored SyntaxTree
+    # that would contain the type of the graph used in the pass where the
+    # bindings were created and we'd need to call reparent(), etc.
+    SyntaxTree(syntax_graph(ctx), lookup_binding(ctx, id).node_id)
+end
+
+
+#-------------------------------------------------------------------------------
+"""
+Metadata about how a binding is used within some enclosing lambda
+"""
+struct LambdaBindingInfo
+    is_captured::Bool
+    is_read::Bool
+    is_assigned::Bool
+    # Binding was the function name in a call. Used for specialization
+    # heuristics in the optimizer.
+    is_called::Bool
+end
+
+LambdaBindingInfo() = LambdaBindingInfo(false, false, false, false)
+
+function LambdaBindingInfo(parent::LambdaBindingInfo;
+                           is_captured = nothing,
+                           is_read     = nothing,
+                           is_assigned = nothing,
+                           is_called   = nothing)
+    LambdaBindingInfo(
+        isnothing(is_captured) ? parent.is_captured : is_captured,
+        isnothing(is_read)     ? parent.is_read     : is_read,
+        isnothing(is_assigned) ? parent.is_assigned : is_assigned,
+        isnothing(is_called)   ? parent.is_called   : is_called,
+    )
+end
+
+struct LambdaBindings
+    # Bindings used within the lambda
+    self::IdTag
+    bindings::Dict{IdTag,LambdaBindingInfo}
+end
+
+LambdaBindings(self::IdTag = 0) = LambdaBindings(self, Dict{IdTag,LambdaBindings}())
+
+function init_lambda_binding(bindings::LambdaBindings, id; kws...)
+    @assert !haskey(bindings.bindings, id)
+    bindings.bindings[id] = LambdaBindingInfo(LambdaBindingInfo(); kws...)
+end
+
+function update_lambda_binding!(bindings::LambdaBindings, x; kws...)
+    id = _binding_id(x)
+    binfo = bindings.bindings[id]
+    bindings.bindings[id] = LambdaBindingInfo(binfo; kws...)
+end
+
+function update_lambda_binding!(ctx::AbstractLoweringContext, x; kws...)
+    update_lambda_binding!(current_lambda_bindings(ctx), x; kws...)
+end
+
+function lookup_lambda_binding(bindings::LambdaBindings, x)
+    get(bindings.bindings, _binding_id(x), nothing)
+end
+
+function lookup_lambda_binding(ctx::AbstractLoweringContext, x)
+    lookup_lambda_binding(current_lambda_bindings(ctx), x)
+end
+
+function has_lambda_binding(bindings::LambdaBindings, x)
+    haskey(bindings.bindings, _binding_id(x))
+end
+
+function has_lambda_binding(ctx::AbstractLoweringContext, x)
+    has_lambda_binding(current_lambda_bindings(ctx), x)
+end
diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl
new file mode 100644
index 0000000000000..1dc4ca2bc8d9d
--- /dev/null
+++ b/JuliaLowering/src/closure_conversion.jl
@@ -0,0 +1,633 @@
+struct ClosureInfo{GraphType}
+    # Global name of the type of the closure
+    type_name::SyntaxTree{GraphType}
+    # Names of fields for use with getfield, in order
+    field_names::SyntaxList{GraphType}
+    # Map from the original BindingId of closed-over vars to the index of the
+    # associated field in the closure type.
+    field_inds::Dict{IdTag,Int}
+end
+
+struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext
+    graph::GraphType
+    bindings::Bindings
+    mod::Module
+    closure_bindings::Dict{IdTag,ClosureBindings}
+    capture_rewriting::Union{Nothing,ClosureInfo{GraphType},SyntaxList{GraphType}}
+    lambda_bindings::LambdaBindings
+    # True if we're in a section of code which preserves top-level sequencing
+    # such that closure types can be emitted inline with other code.
+    is_toplevel_seq_point::Bool
+    # True if this expression should not have toplevel effects, namely, it
+    # should not declare the globals it references.  This allows generated
+    # functions to refer to globals that have already been declared, without
+    # triggering the "function body AST not pure" error.
+    toplevel_pure::Bool
+    toplevel_stmts::SyntaxList{GraphType}
+    closure_infos::Dict{IdTag,ClosureInfo{GraphType}}
+end
+
+function ClosureConversionCtx(graph::GraphType, bindings::Bindings,
+                              mod::Module, closure_bindings::Dict{IdTag,ClosureBindings},
+                              lambda_bindings::LambdaBindings) where {GraphType}
+    ClosureConversionCtx{GraphType}(
+        graph, bindings, mod, closure_bindings, nothing,
+        lambda_bindings, false, true, SyntaxList(graph),
+        Dict{IdTag,ClosureInfo{GraphType}}())
+end
+
+function current_lambda_bindings(ctx::ClosureConversionCtx)
+    ctx.lambda_bindings
+end
+
+# Access captured variable from inside a closure
+function captured_var_access(ctx, ex)
+    cap_rewrite = ctx.capture_rewriting
+    if cap_rewrite isa ClosureInfo
+        field_sym = cap_rewrite.field_names[cap_rewrite.field_inds[ex.var_id]]
+        @ast ctx ex [K"call"
+            "getfield"::K"core"
+            binding_ex(ctx, current_lambda_bindings(ctx).self)
+            field_sym
+        ]
+    else
+        interpolations = cap_rewrite
+        @assert !isnothing(cap_rewrite)
+        if isempty(interpolations) || !is_same_identifier_like(interpolations[end], ex)
+            push!(interpolations, ex)
+        end
+        @ast ctx ex [K"captured_local" length(interpolations)::K"Integer"]
+    end
+end
+
+function get_box_contents(ctx::ClosureConversionCtx, var, box_ex)
+    undef_var = new_local_binding(ctx, var, lookup_binding(ctx, var.var_id).name)
+    @ast ctx var [K"block"
+        box := box_ex
+        # Lower in an UndefVar check to a similarly named variable
+        # (ref #20016) so that closure lowering Box introduction
+        # doesn't impact the error message and the compiler is expected
+        # to fold away the extraneous null check
+        #
+        # TODO: Ideally the runtime would rely on provenance info for
+        # this error and we can remove the isdefined check.
+        [K"if" [K"call"
+                "isdefined"::K"core"
+                box
+                "contents"::K"Symbol"
+            ]
+            ::K"TOMBSTONE"
+            [K"block"
+                 [K"newvar" undef_var]
+                 undef_var
+            ]
+        ]
+        [K"call"
+            "getfield"::K"core"
+            box
+            "contents"::K"Symbol"
+        ]
+    ]
+end
+
+# Convert `ex` to `type` by calling `convert(type, ex)` when necessary.
+#
+# Used for converting the right hand side of an assignment to a typed local or
+# global and for converting the return value of a function call to the declared
+# return type.
+function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert)
+    # Use a slot to permit union-splitting this in inference
+    tmp = new_local_binding(ctx, srcref, "tmp", is_always_defined=true)
+
+    @ast ctx srcref [K"block"
+        type_tmp := type
+        # [K"=" type_ssa renumber_assigned_ssavalues(type)]
+        [K"=" tmp ex]
+        [K"if"
+            [K"call" "isa"::K"core" tmp type_tmp]
+            "nothing"::K"core"
+            [K"="
+                tmp
+                if do_typeassert
+                    [K"call"
+                        "typeassert"::K"core"
+                        [K"call" "convert"::K"top" type_tmp tmp]
+                        type_tmp
+                    ]
+                else
+                    [K"call" "convert"::K"top" type_tmp tmp]
+                end
+            ]
+        ]
+        tmp
+    ]
+end
+
+# TODO: Avoid producing redundant calls to declare_global
+function make_globaldecl(ctx, src_ex, mod, name, strong=false, type=nothing; ret_nothing=false)
+    if !ctx.toplevel_pure
+        decl = @ast ctx src_ex [K"block"
+            [K"call"
+                "declare_global"::K"core"
+                mod::K"Value" name::K"Symbol" strong::K"Bool"
+                type
+            ]
+            [K"latestworld"]
+            "nothing"::K"core"
+        ]
+        if ctx.is_toplevel_seq_point
+            return decl
+        else
+            push!(ctx.toplevel_stmts, decl)
+        end
+    end
+    if ret_nothing
+        nothing
+    else
+        @ast ctx src_ex "nothing"::K"core"
+    end
+end
+
+function convert_global_assignment(ctx, ex, var, rhs0)
+    binfo = lookup_binding(ctx, var)
+    @assert binfo.kind == :global
+    stmts = SyntaxList(ctx)
+    decl = make_globaldecl(ctx, ex, binfo.mod, binfo.name, true; ret_nothing=true)
+    decl !== nothing && push!(stmts, decl)
+    rhs1 = if is_simple_atom(ctx, rhs0)
+        rhs0
+    else
+        tmp = ssavar(ctx, rhs0)
+        push!(stmts, @ast ctx rhs0 [K"=" tmp rhs0])
+        tmp
+    end
+    rhs = if binfo.is_const && isnothing(binfo.type)
+        # const global assignments without a type declaration don't need us to
+        # deal with the binding type at all.
+        rhs1
+    else
+        type_var = ssavar(ctx, ex, "binding_type")
+        push!(stmts, @ast ctx ex [K"="
+            type_var
+            [K"call"
+                "get_binding_type"::K"core"
+                binfo.mod::K"Value"
+                binfo.name::K"Symbol"
+            ]
+        ])
+        do_typeassert = false # Global assignment type checking is done by the runtime
+        convert_for_type_decl(ctx, ex, rhs1, type_var, do_typeassert)
+    end
+    push!(stmts, @ast ctx ex [K"=" var rhs])
+    @ast ctx ex [K"block"
+        stmts...
+        rhs1
+    ]
+end
+
+# Convert assignment to a closed variable to a `setfield!` call and generate
+# `convert` calls for variables with declared types.
+#
+# When doing this, the original value needs to be preserved, to ensure the
+# expression `a=b` always returns exactly `b`.
+function convert_assignment(ctx, ex)
+    var = ex[1]
+    rhs0 = _convert_closures(ctx, ex[2])
+    if kind(var) == K"Placeholder"
+        return @ast ctx ex [K"=" var rhs0]
+    end
+    @chk kind(var) == K"BindingId"
+    binfo = lookup_binding(ctx, var)
+    if binfo.kind == :global
+        convert_global_assignment(ctx, ex, var, rhs0)
+    else
+        @assert binfo.kind == :local || binfo.kind == :argument
+        boxed = is_boxed(binfo)
+        if isnothing(binfo.type) && !boxed
+            @ast ctx ex [K"=" var rhs0]
+        else
+            # Typed local
+            tmp_rhs0 = ssavar(ctx, rhs0)
+            rhs = isnothing(binfo.type) ? tmp_rhs0 :
+                  convert_for_type_decl(ctx, ex, tmp_rhs0, _convert_closures(ctx, binfo.type), true)
+            assignment = if boxed
+                @ast ctx ex [K"call"
+                    "setfield!"::K"core"
+                    is_self_captured(ctx, var) ? captured_var_access(ctx, var) : var
+                    "contents"::K"Symbol"
+                    rhs
+                ]
+            else
+                @ast ctx ex [K"=" var rhs]
+            end
+            @ast ctx ex [K"block"
+                [K"=" tmp_rhs0 rhs0]
+                assignment
+                tmp_rhs0
+            ]
+        end
+    end
+end
+
+# Compute fields for a closure type, one field for each captured variable.
+function closure_type_fields(ctx, srcref, closure_binds, is_opaque)
+    capture_ids = Vector{IdTag}()
+    for lambda_bindings in closure_binds.lambdas
+        for (id, lbinfo) in lambda_bindings.bindings
+            if lbinfo.is_captured
+                push!(capture_ids, id)
+            end
+        end
+    end
+    # sort here to avoid depending on undefined Dict iteration order.
+    capture_ids = sort!(unique(capture_ids))
+
+    field_syms = SyntaxList(ctx)
+    if is_opaque
+        field_orig_bindings = capture_ids
+        # For opaque closures we don't try to generate sensible names for the
+        # fields as there's no closure type to generate.
+        for (i,id) in enumerate(field_orig_bindings)
+            push!(field_syms, @ast ctx srcref i::K"Integer")
+        end
+    else
+        field_names = Dict{String,IdTag}()
+        for id in capture_ids
+            binfo = lookup_binding(ctx, id)
+            # We name each field of the closure after the variable which was closed
+            # over, for clarity. Adding a suffix can be necessary when collisions
+            # occur due to macro expansion and generated bindings
+            name0 = binfo.name
+            name = name0
+            i = 1
+            while haskey(field_names, name)
+                name = "$name0#$i"
+                i += 1
+            end
+            field_names[name] = id
+        end
+        field_orig_bindings = Vector{IdTag}()
+        for (name,id) in sort!(collect(field_names))
+            push!(field_syms, @ast ctx srcref name::K"Symbol")
+            push!(field_orig_bindings, id)
+        end
+    end
+    field_inds = Dict{IdTag,Int}()
+    field_is_box = Vector{Bool}()
+    for (i,id) in enumerate(field_orig_bindings)
+        push!(field_is_box, is_boxed(ctx, id))
+        field_inds[id] = i
+    end
+
+    return field_syms, field_orig_bindings, field_inds, field_is_box
+end
+
+# Return a thunk which creates a new type for a closure with `field_syms` named
+# fields. The new type will be named `name_str` which must be an unassigned
+# name in the module.
+function type_for_closure(ctx::ClosureConversionCtx, srcref, name_str, field_syms, field_is_box)
+    # New closure types always belong to the module we're expanding into - they
+    # need to be serialized there during precompile.
+    mod = ctx.mod
+    type_binding = new_global_binding(ctx, srcref, name_str, mod)
+    type_ex = @ast ctx srcref [K"call"
+        #"_call_latest"::K"core"
+        eval_closure_type::K"Value"
+        ctx.mod::K"Value"
+        name_str::K"Symbol"
+        [K"call" "svec"::K"core" field_syms...]
+        [K"call" "svec"::K"core" [f::K"Bool" for f in field_is_box]...]
+    ]
+    type_ex, type_binding
+end
+
+function is_boxed(binfo::BindingInfo)
+    # True for
+    # * :argument when it's not reassigned
+    # * :static_parameter (these can't be reassigned)
+    defined_but_not_assigned = binfo.is_always_defined && binfo.n_assigned == 0
+    # For now, we box almost everything but later we'll want to do dominance
+    # analysis on the untyped IR.
+    return binfo.is_captured && !defined_but_not_assigned
+end
+
+function is_boxed(ctx, x)
+    is_boxed(lookup_binding(ctx, x))
+end
+
+# Is captured in the closure's `self` argument
+function is_self_captured(ctx, x)
+    lbinfo = lookup_lambda_binding(ctx, x)
+    !isnothing(lbinfo) && lbinfo.is_captured
+end
+
+# Map the children of `ex` through _convert_closures, lifting any toplevel
+# closure definition statements to occur before the other content of `ex`.
+function map_cl_convert(ctx::ClosureConversionCtx, ex, toplevel_preserving)
+    if ctx.is_toplevel_seq_point && !toplevel_preserving
+        toplevel_stmts = SyntaxList(ctx)
+        ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod,
+                                    ctx.closure_bindings, ctx.capture_rewriting, ctx.lambda_bindings,
+                                    false, ctx.toplevel_pure, toplevel_stmts, ctx.closure_infos)
+        res = mapchildren(e->_convert_closures(ctx2, e), ctx2, ex)
+        if isempty(toplevel_stmts)
+            res
+        else
+            @ast ctx ex [K"block"
+                toplevel_stmts...
+                res
+            ]
+        end
+    else
+        mapchildren(e->_convert_closures(ctx, e), ctx, ex)
+    end
+end
+
+function _convert_closures(ctx::ClosureConversionCtx, ex)
+    k = kind(ex)
+    if k == K"BindingId"
+        access = is_self_captured(ctx, ex) ? captured_var_access(ctx, ex) : ex
+        if is_boxed(ctx, ex)
+            get_box_contents(ctx, ex, access)
+        else
+            access
+        end
+    elseif is_leaf(ex) || k == K"inert" || k == K"static_eval"
+        ex
+    elseif k == K"="
+        convert_assignment(ctx, ex)
+    elseif k == K"isdefined"
+        # Convert isdefined expr to function for closure converted variables
+        var = ex[1]
+        binfo = lookup_binding(ctx, var)
+        if is_boxed(binfo)
+            access = is_self_captured(ctx, var) ? captured_var_access(ctx, var) : var
+            @ast ctx ex [K"call"
+                "isdefined"::K"core"
+                access
+                "contents"::K"Symbol"
+            ]
+        elseif binfo.is_always_defined || is_self_captured(ctx, var)
+            # Captured but unboxed vars are always defined
+            @ast ctx ex true::K"Bool"
+        elseif binfo.kind == :global
+            # Normal isdefined won't work for globals (#56985)
+            @ast ctx ex [K"call"
+                "isdefinedglobal"::K"core"
+                ctx.mod::K"Value"
+                binfo.name::K"Symbol"
+                false::K"Bool"]
+        else
+            ex
+        end
+    elseif k == K"decl"
+        @assert kind(ex[1]) == K"BindingId"
+        binfo = lookup_binding(ctx, ex[1])
+        if binfo.kind == :global
+            # flisp has this, but our K"assert" handling is in a previous pass
+            # [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]]
+            make_globaldecl(ctx, ex, binfo.mod, binfo.name, true, _convert_closures(ctx, ex[2]))
+        else
+            makeleaf(ctx, ex, K"TOMBSTONE")
+        end
+    elseif k == K"global"
+        # Leftover `global` forms become weak globals.
+        mod, name = if kind(ex[1]) == K"BindingId"
+            binfo = lookup_binding(ctx, ex[1])
+            @assert binfo.kind == :global
+            binfo.mod, binfo.name
+        else
+            # See note about using eval on Expr(:global/:const, GlobalRef(...))
+            @assert ex[1].value isa GlobalRef
+            ex[1].value.mod, String(ex[1].value.name)
+        end
+        @ast ctx ex [K"unused_only" make_globaldecl(ctx, ex, mod, name, false)]
+    elseif k == K"local"
+        var = ex[1]
+        binfo = lookup_binding(ctx, var)
+        if binfo.is_captured
+            @ast ctx ex [K"=" var [K"call" "Box"::K"core"]]
+        elseif !binfo.is_always_defined
+            @ast ctx ex [K"newvar" var]
+        else
+            makeleaf(ctx, ex, K"TOMBSTONE")
+        end
+    elseif k == K"lambda"
+        closure_convert_lambda(ctx, ex)
+    elseif k == K"function_decl"
+        func_name = ex[1]
+        @assert kind(func_name) == K"BindingId"
+        func_name_id = func_name.var_id
+        if haskey(ctx.closure_bindings, func_name_id)
+            closure_info = get(ctx.closure_infos, func_name_id, nothing)
+            needs_def = isnothing(closure_info)
+            if needs_def
+                closure_binds = ctx.closure_bindings[func_name_id]
+                field_syms, field_orig_bindings, field_inds, field_is_box =
+                    closure_type_fields(ctx, ex, closure_binds, false)
+                name_str = reserve_module_binding_i(ctx.mod,
+                    "#$(join(closure_binds.name_stack, "#"))##")
+                closure_type_def, closure_type_ =
+                    type_for_closure(ctx, ex, name_str, field_syms, field_is_box)
+                if !ctx.is_toplevel_seq_point
+                    push!(ctx.toplevel_stmts, closure_type_def)
+                    push!(ctx.toplevel_stmts, @ast ctx ex (::K"latestworld_if_toplevel"))
+                    closure_type_def = nothing
+                end
+                closure_info = ClosureInfo(closure_type_, field_syms, field_inds)
+                ctx.closure_infos[func_name_id] = closure_info
+                type_params = SyntaxList(ctx)
+                init_closure_args = SyntaxList(ctx)
+                for (id, boxed) in zip(field_orig_bindings, field_is_box)
+                    field_val = binding_ex(ctx, id)
+                    if is_self_captured(ctx, field_val)
+                        # Access from outer closure if necessary but do not
+                        # unbox to feed into the inner nested closure.
+                        field_val = captured_var_access(ctx, field_val)
+                    end
+                    push!(init_closure_args, field_val)
+                    if !boxed
+                        push!(type_params, @ast ctx ex [K"call"
+                              # TODO: Update to use _typeof_captured_variable (#40985)
+                              #"_typeof_captured_variable"::K"core"
+                              "typeof"::K"core"
+                              field_val])
+                    end
+                end
+                @ast ctx ex [K"block"
+                    closure_type_def
+                    (::K"latestworld_if_toplevel")
+                    closure_type := if isempty(type_params)
+                        closure_type_
+                    else
+                        [K"call" "apply_type"::K"core" closure_type_ type_params...]
+                    end
+                    closure_val := [K"new"
+                        closure_type
+                        init_closure_args...
+                    ]
+                    convert_assignment(ctx, [K"=" func_name closure_val])
+                    ::K"TOMBSTONE"
+                ]
+            else
+                @ast ctx ex (::K"TOMBSTONE")
+            end
+        else
+            # Single-arg K"method" has the side effect of creating a global
+            # binding for `func_name` if it doesn't exist.
+            @ast ctx ex [K"block"
+                [K"method" func_name]
+                ::K"TOMBSTONE" # <- function_decl should not be used in value position
+            ]
+        end
+    elseif k == K"function_type"
+        func_name = ex[1]
+        if kind(func_name) == K"BindingId" && lookup_binding(ctx, func_name).kind === :local
+            ctx.closure_infos[func_name.var_id].type_name
+        else
+            @ast ctx ex [K"call" "Typeof"::K"core" func_name]
+        end
+    elseif k == K"method_defs"
+        name = ex[1]
+        is_closure = kind(name) == K"BindingId" && lookup_binding(ctx, name).kind === :local
+        cap_rewrite = is_closure ? ctx.closure_infos[name.var_id] : nothing
+        ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod,
+                                    ctx.closure_bindings, cap_rewrite, ctx.lambda_bindings,
+                                    ctx.is_toplevel_seq_point, ctx.toplevel_pure, ctx.toplevel_stmts,
+                                    ctx.closure_infos)
+        body = map_cl_convert(ctx2, ex[2], false)
+        if is_closure
+            if ctx.is_toplevel_seq_point
+                body
+            else
+                # Move methods out to a top-level sequence point.
+                push!(ctx.toplevel_stmts, body)
+                @ast ctx ex (::K"TOMBSTONE")
+            end
+        else
+            @ast ctx ex [K"block"
+                body
+                ::K"TOMBSTONE"
+            ]
+        end
+    elseif k == K"_opaque_closure"
+        closure_binds = ctx.closure_bindings[ex[1].var_id]
+        field_syms, field_orig_bindings, field_inds, field_is_box =
+            closure_type_fields(ctx, ex, closure_binds, true)
+
+        capture_rewrites = ClosureInfo(ex #=unused=#, field_syms, field_inds)
+
+        ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod,
+                                    ctx.closure_bindings, capture_rewrites, ctx.lambda_bindings,
+                                    false, ctx.toplevel_pure, ctx.toplevel_stmts, ctx.closure_infos)
+
+        init_closure_args = SyntaxList(ctx)
+        for id in field_orig_bindings
+            push!(init_closure_args, binding_ex(ctx, id))
+        end
+        @ast ctx ex [K"new_opaque_closure"
+            ex[2] # arg type tuple
+            ex[3] # return_lower_bound
+            ex[4] # return_upper_bound
+            ex[5] # allow_partial
+            [K"opaque_closure_method"
+                "nothing"::K"core"
+                ex[6] # nargs
+                ex[7] # is_va
+                ex[8] # functionloc
+                closure_convert_lambda(ctx2, ex[9])
+            ]
+            init_closure_args...
+        ]
+    else
+        # A small number of kinds are toplevel-preserving in terms of closure
+        # closure definitions will be lifted out into `toplevel_stmts` if they
+        # occur inside `ex`.
+        toplevel_seq_preserving = k == K"if" || k == K"elseif" || k == K"block" ||
+                              k == K"tryfinally" || k == K"trycatchelse"
+        map_cl_convert(ctx, ex, toplevel_seq_preserving)
+    end
+end
+
+function closure_convert_lambda(ctx, ex)
+    @assert kind(ex) == K"lambda"
+    lambda_bindings = ex.lambda_bindings
+    interpolations = nothing
+    if isnothing(ctx.capture_rewriting)
+        # Global method which may capture locals
+        interpolations = SyntaxList(ctx)
+        cap_rewrite = interpolations
+    else
+        cap_rewrite = ctx.capture_rewriting
+    end
+    ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod,
+                                ctx.closure_bindings, cap_rewrite, lambda_bindings,
+                                ex.is_toplevel_thunk, ctx.toplevel_pure && ex.toplevel_pure,
+                                ctx.toplevel_stmts, ctx.closure_infos)
+    lambda_children = SyntaxList(ctx)
+    args = ex[1]
+    push!(lambda_children, args)
+    push!(lambda_children, ex[2])
+
+    # Add box initializations for arguments which are captured by an inner lambda
+    body_stmts = SyntaxList(ctx)
+    for arg in children(args)
+        kind(arg) != K"Placeholder" || continue
+        if is_boxed(ctx, arg)
+            push!(body_stmts, @ast ctx arg [K"="
+                arg
+                [K"call" "Box"::K"core" arg]
+            ])
+        end
+    end
+    # Convert body.
+    input_body_stmts = kind(ex[3]) != K"block" ? ex[3:3] : ex[3][1:end]
+    for e in input_body_stmts
+        push!(body_stmts, _convert_closures(ctx2, e))
+    end
+    push!(lambda_children, @ast ctx2 ex[3] [K"block" body_stmts...])
+
+    if numchildren(ex) > 3
+        # Convert return type
+        @assert numchildren(ex) == 4
+        push!(lambda_children, _convert_closures(ctx2, ex[4]))
+    end
+
+    lam = makenode(ctx, ex, ex, lambda_children; lambda_bindings=lambda_bindings)
+    if !isnothing(interpolations) && !isempty(interpolations)
+        @ast ctx ex [K"call"
+            replace_captured_locals!::K"Value"
+            lam
+            [K"call"
+                "svec"::K"core"
+                interpolations...
+            ]
+        ]
+    else
+        lam
+    end
+end
+
+
+"""
+Closure conversion and lowering of bindings
+
+This pass does a few things things:
+* Deal with typed variables (K"decl") and their assignments
+* Deal with const and non-const global assignments
+* Convert closures into types
+* Lower variables captured by closures into boxes, etc, as necessary
+
+Invariants:
+* This pass must not introduce new K"Identifier" - only K"BindingId".
+* Any new binding IDs must be added to the enclosing lambda locals
+"""
+@fzone "JL: closures" function convert_closures(ctx::VariableAnalysisContext, ex)
+    ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod,
+                               ctx.closure_bindings, ex.lambda_bindings)
+    ex1 = closure_convert_lambda(ctx, ex)
+    if !isempty(ctx.toplevel_stmts)
+        throw(LoweringError(first(ctx.toplevel_stmts), "Top level code was found outside any top level context. `@generated` functions may not contain closures, including `do` syntax and generators/comprehension"))
+    end
+    ctx, ex1
+end
diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl
new file mode 100644
index 0000000000000..391c99a624d93
--- /dev/null
+++ b/JuliaLowering/src/compat.jl
@@ -0,0 +1,610 @@
+const JS = JuliaSyntax
+
+function _insert_tree_node(graph::SyntaxGraph, k::Kind, src::SourceAttrType,
+                           flags::UInt16=0x0000; attrs...)
+    id = newnode!(graph)
+    sethead!(graph, id, k)
+    flags !== 0 && setflags!(graph, id, flags)
+    setattr!(graph, id; source=src, attrs...)
+    return id
+end
+
+"""
+An Expr -> SyntaxTree transformation that should preserve semantics, but will
+have low-quality provenance info (namely, each tree node will be associated with
+the last seen LineNumberNode in the pre-order expr traversal).
+
+Last-resort option so that, for example, we can lower the output of old
+Expr-producing macros.  Always prefer re-parsing source text over using this.
+
+Supports parsed and/or macro-expanded exprs, but not lowered exprs
+"""
+function expr_to_syntaxtree(@nospecialize(e), lnn::Union{LineNumberNode, Nothing}=nothing)
+    graph = ensure_attributes!(
+        SyntaxGraph(),
+        kind=Kind, syntax_flags=UInt16,
+        source=SourceAttrType, var_id=Int, value=Any,
+        name_val=String, is_toplevel_thunk=Bool,
+        scope_layer=LayerId, meta=CompileHints,
+        toplevel_pure=Bool)
+    expr_to_syntaxtree(graph, e, lnn)
+end
+
+@fzone "JL: expr_to_syntaxtree" function expr_to_syntaxtree(ctx, @nospecialize(e), lnn::Union{LineNumberNode, Nothing})
+    graph = syntax_graph(ctx)
+    toplevel_src = if isnothing(lnn)
+        # Provenance sinkhole for all nodes until we hit a linenode
+        dummy_src = SourceRef(
+            SourceFile("No source for expression"),
+            1, JS.GreenNode(K"None", 0))
+        _insert_tree_node(graph, K"None", dummy_src)
+    else
+        lnn
+    end
+    st_id, _ = _insert_convert_expr(e, graph, toplevel_src)
+    out = SyntaxTree(graph, st_id)
+    return out
+end
+
+function _expr_replace!(@nospecialize(e), replace_pred::Function, replacer!::Function,
+                        recurse_pred=(@nospecialize e)->true)
+    if replace_pred(e)
+        replacer!(e)
+    end
+    if e isa Expr && recurse_pred(e)
+        for a in e.args
+            _expr_replace!(a, replace_pred, replacer!, recurse_pred)
+        end
+    end
+end
+
+function _to_iterspec(exs::Vector, is_generator::Bool)
+    if length(exs) === 1 && exs[1].head === :filter
+        @assert length(exs[1].args) >= 2
+        return Expr(:filter, _to_iterspec(exs[1].args[2:end], true), exs[1].args[1])
+    end
+    outex = Expr(:iteration)
+    for e in exs
+        if e.head === :block && !is_generator
+            for iter in e.args
+                push!(outex.args, Expr(:in, iter.args...))
+            end
+        elseif e.head === :(=)
+            push!(outex.args, Expr(:in, e.args...))
+        else
+            @assert false "unknown iterspec in $e"
+        end
+    end
+    return outex
+end
+
+"""
+Return `e.args`, but with any parameters in SyntaxTree (flattened, source) order.
+Parameters are expected to be at `e.args[pos]`.
+
+e.g. orderings of (a,b,c;d;e;f):
+  Expr:       (tuple (parameters (parameters (parameters f) e) d) a b c)
+  SyntaxTree: (tuple a b c (parameters d) (parameters e) (parameters f))
+"""
+function collect_expr_parameters(e::Expr, pos::Int)
+    params = expr_parameters(e, pos)
+    isnothing(params) && return copy(e.args)
+    args = Any[e.args[1:pos-1]..., e.args[pos+1:end]...]
+    return _flatten_params!(args, params)
+end
+function _flatten_params!(out::Vector{Any}, params::Expr)
+    p,p_esc = unwrap_esc(params)
+    p1 = expr_parameters(p, 1)
+    if !isnothing(p1)
+        push!(out, p_esc(Expr(:parameters, p.args[2:end]...)))
+        _flatten_params!(out, p_esc(p1))
+    else
+        push!(out, params::Any)
+    end
+    return out
+end
+function expr_parameters(p::Expr, pos::Int)
+    if pos <= length(p.args)
+        e,_ = unwrap_esc(p.args[pos])
+        if e isa Expr && e.head === :parameters
+            return p.args[pos]
+        end
+    end
+    return nothing
+end
+
+"""
+If `b` (usually a block) has exactly one non-LineNumberNode argument, unwrap it.
+"""
+function maybe_unwrap_arg(b)
+    if !(b isa Expr)
+        return b
+    end
+    e1 = findfirst(c -> !isa(c, LineNumberNode), b.args)
+    isnothing(e1) && return b
+    e2 = findfirst(c -> !isa(c, LineNumberNode), b.args[e1+1:end])
+    !isnothing(e2) && return b
+    return b.args[e1]
+end
+
+function maybe_extract_lnn(b, default)
+    !(b isa Expr) && return default
+    lnn_i = findfirst(a->isa(a, LineNumberNode), b.args)
+    return isnothing(lnn_i) ? default : b.args[lnn_i]
+end
+
+# Get kind by string if exists.  TODO relies on internals
+function find_kind(s::String)
+    out = get(JS._kind_str_to_int, s, nothing)
+    return isnothing(out) ? nothing : JS.Kind(out)
+end
+
+function is_dotted_operator(s::AbstractString)
+    return length(s) >= 2 &&
+        s[1] === '.' &&
+        JS.is_operator(something(find_kind(s[2:end]), K"None"))
+end
+
+function is_eventually_call(e)
+    return e isa Expr && (e.head === :call ||
+        e.head in (:escape, :where, :(::)) && is_eventually_call(e.args[1]))
+end
+
+function rewrap_escapes(hyg, ex)
+    if hyg isa Expr && hyg.head in (:escape, :var"hygienic-scope")
+        ex = Expr(hyg.head, rewrap_escapes(hyg.args[1], ex))
+        if hyg.head === :var"hygienic-scope"
+            append!(ex.args, @view hyg.args[2:end])
+        end
+    end
+    return ex
+end
+
+# Unwrap Expr(:escape) and Expr(:hygienic-scope). Return the unwrapped
+# expression and a function which will rewrap a derived expression in the
+# correct hygiene wrapper.
+function unwrap_esc(ex)
+    orig_ex = ex
+    while ex isa Expr && ex.head in (:escape, :var"hygienic-scope")
+        @assert length(ex.args) >= 1
+        ex = ex.args[1]
+    end
+    return ex, e->rewrap_escapes(orig_ex, e)
+end
+
+function unwrap_esc_(e)
+    unwrap_esc(e)[1]
+end
+
+"""
+Insert `e` converted to a syntaxtree into graph and recurse on children.  Return
+a pair (my_node_id, last_srcloc).  Should not mutate `e`.
+
+`src` is the latest location found in the pre-order traversal, and is the line
+number node to be associated with `e`.
+"""
+function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceAttrType)
+    #---------------------------------------------------------------------------
+    # Non-expr types
+    if isnothing(e)
+        st_id = _insert_tree_node(graph, K"core", src; name_val="nothing")
+        return st_id, src
+    elseif e isa LineNumberNode
+        # A LineNumberNode in value position evaluates to nothing
+        st_id = _insert_tree_node(graph, K"core", src; name_val="nothing")
+        return st_id, e
+    elseif e isa Symbol
+        st_id = _insert_tree_node(graph, K"Identifier", src; name_val=String(e))
+        return st_id, src
+    elseif e isa QuoteNode
+        if e.value isa Symbol
+            return _insert_convert_expr(Expr(:quoted_symbol, e.value), graph, src)
+        elseif e.value isa Expr
+            return _insert_convert_expr(Expr(:inert, e.value), graph, src)
+        elseif e.value isa LineNumberNode
+            return _insert_tree_node(graph, K"Value", src; value=e.value), src
+        else
+            return _insert_convert_expr(e.value, graph, src)
+        end
+    elseif e isa String
+        st_id = _insert_tree_node(graph, K"string", src)
+        id_inner = _insert_tree_node(graph, K"String", src; value=e)
+        setchildren!(graph, st_id, [id_inner])
+        return st_id, src
+    elseif e isa VersionNumber
+        st_id = _insert_tree_node(graph, K"VERSION", src, JS.set_numeric_flags(e.minor*10); value=e)
+        return st_id, src
+    elseif !(e isa Expr)
+        # There are other kinds we could potentially back-convert (e.g. Float),
+        # but Value should work fine.
+        st_k = e isa Bool ? K"Bool" :
+            e isa Integer ? K"Integer" :
+            find_kind(string(typeof(e)))
+        st_id = _insert_tree_node(graph, isnothing(st_k) ? K"Value" : st_k, src; value=e)
+        return st_id, src
+    end
+
+    #---------------------------------------------------------------------------
+    # `e` is an expr.  In many cases, it suffices to
+    # - guess that the kind name is the same as the expr head
+    # - add no syntax flags or attrs
+    # - map e.args to syntax tree children one-to-one
+    e::Expr
+    nargs = length(e.args)
+    maybe_kind = find_kind(string(e.head))
+    st_k = isnothing(maybe_kind) ? K"None" : maybe_kind
+    st_flags = 0x0000
+    st_attrs = Dict{Symbol, Any}()
+    # Note that SyntaxTree/Node differentiate 0-child non-terminals and leaves
+    child_exprs::Union{Nothing, Vector{Any}} = copy(e.args)
+
+    # However, the following are (many) special cases where the kind, flags,
+    # children, or attributes are different from what we guessed above
+    if Base.isoperator(e.head) && st_k === K"None"
+        # e.head is an updating assignment operator (+=, .-=, etc).  Non-=
+        # dotted ops are wrapped in a call, so we don't reach this.
+        s = string(e.head)
+        @assert s[end] === '=' && nargs === 2
+        if s[1] === '.'
+            st_k = K".op="
+            op = s[2:end-1]
+        else
+            st_k = K"op="
+            op = s[1:end-1]
+        end
+        child_exprs = Any[e.args[1], Symbol(op), e.args[2]]
+    elseif e.head === :comparison
+        for i = 2:2:length(child_exprs)
+            op,op_esc = unwrap_esc(child_exprs[i])
+            @assert op isa Symbol
+            op_s = string(op)
+            if is_dotted_operator(op_s)
+                child_exprs[i] = Expr(:., op_esc(Symbol(op_s[2:end])))
+            end
+        end
+    elseif e.head === :macrocall
+        @assert nargs >= 2
+        a1,a1_esc = unwrap_esc(e.args[1])
+        child_exprs = collect_expr_parameters(e, 3)
+        if child_exprs[2] isa LineNumberNode
+            src = child_exprs[2]
+        end
+        deleteat!(child_exprs, 2)
+        if a1 isa Symbol
+            child_exprs[1] = a1_esc(Expr(:macro_name, a1))
+        elseif a1 isa Expr && a1.head === :(.)
+            a12,a12_esc = unwrap_esc(a1.args[2])
+            if a12 isa QuoteNode
+                child_exprs[1] = a1_esc(Expr(:(.), a1.args[1],
+                                             Expr(:macro_name, a12_esc(a12.value))))
+            end
+        elseif a1 isa GlobalRef && a1.mod === Core
+            # Syntax-introduced macrocalls are listed here for reference.  We
+            # probably don't need to convert these.
+            if a1.name === Symbol("@cmd")
+            elseif a1.name === Symbol("@doc") && nargs === 4 # two macro args only
+                # Single-arg @doc is a lookup not corresponding to K"doc"
+                # Revise sometimes calls @doc with three args, but probably shouldn't
+                st_k = K"doc"
+                child_exprs = child_exprs[2:3]
+            elseif a1.name === Symbol("@int128_str")
+            elseif a1.name === Symbol("@int128_str")
+            elseif a1.name === Symbol("@big_str")
+            end
+        end
+    elseif e.head === Symbol("'")
+        @assert nargs === 1
+        st_k = K"call"
+        child_exprs = Any[e.head, e.args[1]]
+    elseif e.head === :. && nargs === 2
+        a2, a2_esc = unwrap_esc(e.args[2])
+        if a2 isa Expr && a2.head === :tuple
+            st_k = K"dotcall"
+            tuple_exprs = collect_expr_parameters(a2_esc(a2), 1)
+            child_exprs = pushfirst!(tuple_exprs, e.args[1])
+        elseif a2 isa QuoteNode
+            child_exprs[2] = a2_esc(a2.value)
+        end
+    elseif e.head === :for
+        @assert nargs === 2
+        child_exprs = Any[_to_iterspec(Any[e.args[1]], false), e.args[2]]
+    elseif e.head === :where
+        @assert nargs >= 2
+        e2,_ = unwrap_esc(e.args[2])
+        if !(e2 isa Expr && e2.head === :braces)
+            child_exprs = Any[e.args[1], Expr(:braces, e.args[2:end]...)]
+        end
+    elseif e.head in (:tuple, :vect, :braces)
+        child_exprs = collect_expr_parameters(e, 1)
+    elseif e.head in (:curly, :ref)
+        child_exprs = collect_expr_parameters(e, 2)
+    elseif e.head === :try
+        child_exprs = Any[e.args[1]]
+        # Expr:
+        # (try (block ...) var       (block ...) [block ...] [block ...])
+        # #     try        catch_var  catch       finally     else
+        # SyntaxTree:
+        #   (try (block ...)
+        #        [catch var (block ...)]
+        #        [else (block ...)]
+        #        [finally (block ...)])
+        e2 = unwrap_esc_(e.args[2])
+        e3 = unwrap_esc_(e.args[3])
+        if e2 !== false || e3 !== false
+            push!(child_exprs,
+                  Expr(:catch,
+                       e2 === false ? Expr(:catch_var_placeholder) : e.args[2],
+                       e3 === false ? nothing : e.args[3]))
+        end
+        if nargs >= 5
+            push!(child_exprs, Expr(:else, e.args[5]))
+        end
+        if nargs >= 4 && unwrap_esc_(e.args[4]) !== false
+            push!(child_exprs, Expr(:finally, e.args[4]))
+        end
+    elseif e.head === :flatten || e.head === :generator
+        st_k = K"generator"
+        child_exprs = Any[]
+        next = e
+        while next.head === :flatten
+            @assert next.args[1].head === :generator
+            push!(child_exprs, _to_iterspec(next.args[1].args[2:end], true))
+            next = next.args[1].args[1]
+        end
+        @assert next.head === :generator
+        push!(child_exprs, _to_iterspec(next.args[2:end], true))
+        pushfirst!(child_exprs, next.args[1])
+    elseif e.head === :ncat || e.head === :nrow
+        dim = unwrap_esc_(popfirst!(child_exprs))
+        st_flags |= JS.set_numeric_flags(dim)
+    elseif e.head === :typed_ncat
+        st_flags |= JS.set_numeric_flags(unwrap_esc_(e.args[2]))
+        deleteat!(child_exprs, 2)
+    elseif e.head === :(->)
+        @assert nargs === 2
+        a1, a1_esc = unwrap_esc(e.args[1])
+        if a1 isa Expr && a1.head === :block
+            # Expr parsing fails to make :parameters here...
+            lam_args = Any[]
+            lam_eqs = Any[]
+            for a in a1.args
+                a isa LineNumberNode && continue
+                a isa Expr && a.head === :(=) ? push!(lam_eqs, a) : push!(lam_args, a)
+            end
+            !isempty(lam_eqs) && push!(lam_args, Expr(:parameters, lam_eqs...))
+            child_exprs[1] = a1_esc(Expr(:tuple, lam_args...))
+        elseif !(a1 isa Expr && (a1.head in (:tuple, :where)))
+            child_exprs[1] = a1_esc(Expr(:tuple, a1))
+        end
+        src = maybe_extract_lnn(e.args[2], src)
+        child_exprs[2] = maybe_unwrap_arg(e.args[2])
+    elseif e.head === :call
+        child_exprs = collect_expr_parameters(e, 2)
+        a1,a1_esc = unwrap_esc(child_exprs[1])
+        if a1 isa Symbol
+            a1s = string(a1)
+            if is_dotted_operator(a1s)
+                # non-assigning dotop like .+ or .==
+                st_k = K"dotcall"
+                child_exprs[1] = a1_esc(Symbol(a1s[2:end]))
+            end
+        end
+    elseif e.head === :function
+        if nargs >= 2
+            src = maybe_extract_lnn(e.args[2], src)
+        end
+    elseif e.head === :(=)
+        if is_eventually_call(e.args[1])
+            st_k = K"function"
+            st_flags |= JS.SHORT_FORM_FUNCTION_FLAG
+            src = maybe_extract_lnn(e.args[2], src)
+            child_exprs[2] = maybe_unwrap_arg(e.args[2])
+        end
+    elseif e.head === :module
+        @assert nargs in (3, 4)
+        has_version = !isa(e.args[1], Bool)
+        if !e.args[1+has_version]
+            st_flags |= JS.BARE_MODULE_FLAG
+        end
+        child_exprs = has_version ?
+            Any[e.args[1], e.args[2+has_version], e.args[3+has_version]] :
+            Any[e.args[2+has_version], e.args[3+has_version]]
+    elseif e.head === :do
+        # Expr:
+        # (do (call f args...) (-> (tuple lam_args...) (block ...)))
+        # SyntaxTree:
+        # (call f args... (do (tuple lam_args...) (block ...)))
+        callargs = collect_expr_parameters(e.args[1], 2)
+        if e.args[1].head === :macrocall
+            st_k = K"macrocall"
+            if callargs[2] isa LineNumberNode
+                src = callargs[2]
+            end
+            deleteat!(callargs, 2)
+            c1,c1_esc = unwrap_esc(callargs[1])
+            callargs[1] = c1_esc(Expr(:macro_name, c1))
+        else
+            st_k = K"call"
+        end
+        child_exprs = Any[callargs..., Expr(:do_lambda, e.args[2].args...)]
+    elseif e.head === :let
+        if nargs >= 1
+            a1,_ = unwrap_esc(e.args[1])
+            if !(a1 isa Expr && a1.head === :block)
+                child_exprs[1] = Expr(:block, e.args[1])
+            end
+        end
+    elseif e.head === :struct
+        e.args[1] && (st_flags |= JS.MUTABLE_FLAG)
+        child_exprs = child_exprs[2:end]
+        # TODO handle docstrings after refactor
+    elseif (e.head === :using || e.head === :import)
+        _expr_replace!(e,
+                       (e)->(e isa Expr && e.head === :.),
+                       (e)->(e.head = :importpath))
+    elseif e.head === :kw
+        st_k = K"="
+    elseif e.head in (:local, :global) && nargs > 1
+        # Possible normalization
+        # child_exprs = Any[Expr(:tuple, child_exprs...)]
+    elseif e.head === :error
+        # Zero-child errors from parsing are leaf nodes.  We could change this
+        # upstream for consistency.
+        if nargs === 0
+            child_exprs = nothing
+            st_attrs[:value] = JS.ErrorVal()
+            st_flags |= JS.TRIVIA_FLAG
+        end
+    end
+
+    #---------------------------------------------------------------------------
+    # The following heads are not emitted from parsing, but old macros could
+    # produce these and they would historically be accepted by flisp lowering.
+    if e.head === Symbol("latestworld-if-toplevel")
+        st_k = K"latestworld_if_toplevel"
+    elseif e.head === Symbol("hygienic-scope")
+        st_k = K"hygienic_scope"
+    elseif e.head === :meta
+        # Messy and undocumented.  Only sometimes we want a K"meta".
+        if e.args[1] isa Expr && e.args[1].head === :purity
+            st_k = K"meta"
+            child_exprs = [Expr(:quoted_symbol, :purity), Base.EffectsOverride(e.args[1].args...)]
+        elseif nargs === 0
+            # pass
+        elseif e.args[1] === :nospecialize
+            if nargs === 1
+                child_exprs[1] = Expr(:quoted_symbol, :nospecialize)
+            elseif nargs > 2
+                st_k = K"block"
+                # Kick the can down the road (should only be simple atoms?)
+                child_exprs = map(c->Expr(:meta, :nospecialize, c), child_exprs[2:end])
+            elseif nargs === 2
+                st_id, src = _insert_convert_expr(e.args[2], graph, src)
+                setmeta!(SyntaxTree(graph, st_id); nospecialize=true)
+                return st_id, src
+            end
+        elseif e.args[1] in (:inline, :noinline, :generated, :generated_only,
+                             :max_methods, :optlevel, :toplevel, :push_loc, :pop_loc,
+                             :no_constprop, :aggressive_constprop, :specialize, :compile, :infer,
+                             :nospecializeinfer, :force_compile, :propagate_inbounds, :doc)
+            # TODO: Some need to be handled in lowering
+            for (i, ma) in enumerate(e.args)
+                if ma isa Symbol
+                    # @propagate_inbounds becomes (meta inline propagate_inbounds)
+                    child_exprs[i] = Expr(:quoted_symbol, e.args[i])
+                end
+            end
+        else
+            # Can't throw a hard error; it is explicitly tested that meta can take arbitrary keys.
+            @error("Unknown meta form at $src: `$e`\n$(sprint(dump, e))")
+            child_exprs[1] = Expr(:quoted_symbol, e.args[1])
+        end
+    elseif e.head === :scope_layer
+        @assert nargs === 2
+        @assert e.args[1] isa Symbol
+        @assert e.args[2] isa LayerId
+        st_id, src = _insert_convert_expr(e.args[1], graph, src)
+        setattr!(graph, st_id, scope_layer=e.args[2])
+        return st_id, src
+    elseif e.head === :symbolicgoto || e.head === :symboliclabel
+        @assert nargs === 1
+        st_k = e.head === :symbolicgoto ? K"symbolic_label" : K"symbolic_goto"
+        st_attrs[:name_val] = string(e.args[1])
+        child_exprs = nothing
+    elseif e.head in (:inline, :noinline)
+        @assert nargs === 1 && e.args[1] isa Bool
+        # TODO: JuliaLowering doesn't accept this (non-:meta) form yet
+        st_k = K"TOMBSTONE"
+        child_exprs = nothing
+    elseif e.head === :inbounds
+        @assert nargs === 1 && typeof(e.args[1]) in (Symbol, Bool)
+        # TODO: JuliaLowering doesn't accept this form yet
+        st_k = K"TOMBSTONE"
+        child_exprs = nothing
+    elseif e.head === :core
+        @assert nargs === 1
+        @assert e.args[1] isa Symbol
+        st_attrs[:name_val] = string(e.args[1])
+        child_exprs = nothing
+    elseif e.head === :islocal || e.head === :isglobal
+        st_k = K"extension"
+        child_exprs = [Expr(:quoted_symbol, e.head), e.args[1]]
+    elseif e.head === :block && nargs >= 1 &&
+        e.args[1] isa Expr && e.args[1].head === :softscope
+        # (block (softscope true) ex) produced with every REPL prompt.
+        # :hardscope exists too, but should just be a let, and appears to be
+        # unused in the wild.
+        ensure_attributes!(graph; scope_type=Symbol)
+        st_k = K"scope_block"
+        st_attrs[:scope_type] = :soft
+        child_exprs = e.args[2:end]
+    end
+
+    #---------------------------------------------------------------------------
+    # Possibly-temporary heads introduced by us converting the parent expr
+    if e.head === :macro_name
+        @assert nargs === 1
+        # Trim `@` for a correct SyntaxTree, although we need to add it back
+        # later for finding the macro
+        if e.args[1] === :(.)
+            mac_name = string(e.args[1][2])
+            mac_name = mac_name == "@__dot__" ? "." : mac_name[2:end]
+            child_exprs[1] = Expr(:(.), e.args[1][1], Symbol(mac_name))
+        else
+            mac_name = string(e.args[1])
+            mac_name = mac_name == "@__dot__" ? "." : mac_name[2:end]
+            child_exprs[1] = Symbol(mac_name)
+        end
+    elseif e.head === :catch_var_placeholder
+        st_k = K"Placeholder"
+        st_attrs[:name_val] = ""
+        child_exprs = nothing
+    elseif e.head === :quoted_symbol
+        st_k = K"Symbol"
+        st_attrs[:name_val] = String(e.args[1])
+        child_exprs = nothing
+    elseif e.head === :do_lambda
+        st_k = K"do"
+    end
+
+    #---------------------------------------------------------------------------
+    # Throw if this function isn't complete.  Finally, insert a new node into the
+    # graph and recurse on child_exprs
+    if st_k === K"None"
+        error("Unknown expr head at $src: `$(e.head)`\n$(sprint(dump, e))")
+    elseif st_k === K"TOMBSTONE"
+        return nothing, src
+    end
+
+    st_id = _insert_tree_node(graph, st_k, src, st_flags; st_attrs...)
+
+    # child_exprs === nothing means we want a leaf.  Note that setchildren! with
+    # an empty list makes a node non-leaf.
+    if isnothing(child_exprs)
+        return st_id, src
+    else
+        st_child_ids, last_src = _insert_child_exprs(e.head, child_exprs, graph, src)
+        setchildren!(graph, st_id, st_child_ids)
+        return st_id, last_src
+    end
+end
+
+function _insert_child_exprs(head::Symbol, child_exprs::Vector{Any},
+                             graph::SyntaxGraph, src::SourceAttrType)
+    st_child_ids = NodeId[]
+    last_src = src
+    for (i, c) in enumerate(child_exprs)
+        c_unwrapped, _ = unwrap_esc(c)
+        # If c::LineNumberNode is anywhere in a block OR c is not in tail
+        # position, we don't need to insert `nothing` here
+        if c_unwrapped isa LineNumberNode && (head === :block || head === :toplevel && i != length(child_exprs))
+            last_src = c_unwrapped
+        else
+            (c_id, last_src) = _insert_convert_expr(c, graph, last_src)
+            if !isnothing(c_id)
+                push!(st_child_ids, c_id)
+            end
+        end
+    end
+    return st_child_ids, last_src
+end
diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl
new file mode 100644
index 0000000000000..9c77473830335
--- /dev/null
+++ b/JuliaLowering/src/desugaring.jl
@@ -0,0 +1,4643 @@
+# Lowering Pass 2 - syntax desugaring
+
+struct DesugaringContext{GraphType} <: AbstractLoweringContext
+    graph::GraphType
+    bindings::Bindings
+    scope_layers::Vector{ScopeLayer}
+    mod::Module
+    expr_compat_mode::Bool
+end
+
+function DesugaringContext(ctx, expr_compat_mode::Bool)
+    graph = ensure_attributes(syntax_graph(ctx),
+                              kind=Kind, syntax_flags=UInt16,
+                              source=SourceAttrType,
+                              value=Any, name_val=String,
+                              scope_type=Symbol, # :hard or :soft
+                              var_id=IdTag,
+                              is_toplevel_thunk=Bool,
+                              toplevel_pure=Bool)
+    DesugaringContext(graph,
+                      ctx.bindings,
+                      ctx.scope_layers,
+                      current_layer(ctx).mod,
+                      expr_compat_mode)
+end
+
+#-------------------------------------------------------------------------------
+
+# Return true when `x` and `y` are "the same identifier", but also works with
+# bindings (and hence ssa vars). See also `is_identifier_like()`
+function is_same_identifier_like(ex::SyntaxTree, y::SyntaxTree)
+    return (kind(ex) == K"Identifier" && kind(y) == K"Identifier" && NameKey(ex) == NameKey(y)) ||
+           (kind(ex) == K"BindingId"  && kind(y) == K"BindingId"  && ex.var_id   == y.var_id)
+end
+
+function is_same_identifier_like(ex::SyntaxTree, name::AbstractString)
+    return kind(ex) == K"Identifier" && ex.name_val == name
+end
+
+function contains_identifier(ex::SyntaxTree, idents::AbstractVector{<:SyntaxTree})
+    contains_unquoted(ex) do e
+        any(is_same_identifier_like(e, id) for id in idents)
+    end
+end
+
+function contains_identifier(ex::SyntaxTree, idents...)
+    contains_unquoted(ex) do e
+        any(is_same_identifier_like(e, id) for id in idents)
+    end
+end
+
+function contains_ssa_binding(ctx, ex)
+    contains_unquoted(ex) do e
+        kind(e) == K"BindingId" && lookup_binding(ctx, e).is_ssa
+    end
+end
+
+# Return true if `f(e)` is true for any unquoted child of `ex`, recursively.
+function contains_unquoted(f::Function, ex::SyntaxTree)
+    if f(ex)
+        return true
+    elseif !is_leaf(ex) && !(kind(ex) in KSet"quote inert meta")
+        return any(contains_unquoted(f, e) for e in children(ex))
+    else
+        return false
+    end
+end
+
+# Identify some expressions that are safe to repeat
+#
+# TODO: Can we use this in more places?
+function is_effect_free(ex)
+    k = kind(ex)
+    # TODO: metas
+    is_literal(k) || is_identifier_like(ex) || k == K"Symbol" ||
+        k == K"inert" || k == K"top" || k == K"core" || k == K"Value"
+    # flisp also includes `a.b` with simple `a`, but this seems like a bug
+    # because this calls the user-defined getproperty?
+end
+
+function check_no_parameters(ex::SyntaxTree, msg)
+    i = find_parameters_ind(children(ex))
+    if i > 0
+        throw(LoweringError(ex[i], msg))
+    end
+end
+
+function check_no_assignment(exs, msg="misplaced assignment statement in `[ ... ]`")
+    i = findfirst(kind(e) == K"=" for e in exs)
+    if !isnothing(i)
+        throw(LoweringError(exs[i], msg))
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Destructuring
+
+# Convert things like `(x,y,z) = (a,b,c)` to assignments, eliminating the
+# tuple. Includes support for slurping/splatting. This function assumes that
+# `_tuple_sides_match` returns true, so the following have already been
+# checked:
+#   * There's max one `...` on the left hand side
+#   * There's max one `...` on the right hand side, in the last place, or
+#     matched with an lhs... in the last place. (required so that
+#     pairwise-matching terms from the right is valid)
+#   * Neither side has any key=val terms or parameter blocks
+#
+# Tuple elimination must act /as if/ the right hand side tuple was first
+# constructed followed by destructuring. In particular, any side effects due to
+# evaluating the individual terms in the right hand side tuple must happen in
+# order.
+function tuple_to_assignments(ctx, ex, is_const)
+    lhs = ex[1]
+    rhs = ex[2]
+    wrap(asgn) = is_const ? (@ast ctx ex [K"const" asgn]) : asgn
+
+    # Tuple elimination aims to turn assignments between tuples into lists of assignments.
+    #
+    # However, there's a complex interplay of side effects due to the
+    # individual assignments and these can be surprisingly complicated to
+    # model. For example `(x[i], y) = (f(), g)` can contain the following
+    # surprises:
+    # * `tmp = f()` calls `f` which might throw, or modify the bindings for
+    #   `x` or `y`.
+    # * `x[i] = tmp` is lowered to `setindex!` which might throw or modify the
+    #   bindings for `x` or `y`.
+    # * `g` might throw an `UndefVarError`
+    #
+    # Thus for correctness we introduce temporaries for all right hand sides
+    # with observable side effects and ensure they're evaluated in order.
+    n_lhs = numchildren(lhs)
+    n_rhs = numchildren(rhs)
+    stmts = SyntaxList(ctx)
+    rhs_tmps = SyntaxList(ctx)
+    for i in 1:n_rhs
+        rh = rhs[i]
+        r = if kind(rh) == K"..."
+            rh[1]
+        else
+            rh
+        end
+        k = kind(r)
+        if is_literal(k) || k == K"Symbol" || k == K"inert" || k == K"top" ||
+                k == K"core" || k == K"Value"
+            # Effect-free and nothrow right hand sides do not need a temporary
+            # (we require nothrow because the order of rhs terms is observable
+            #  due to sequencing, thus identifiers are not allowed)
+        else
+            # Example rhs which need a temporary
+            # * `f()` - arbitrary side effects to any binding
+            # * `z`   - might throw UndefVarError
+            tmp = emit_assign_tmp(stmts, ctx, r)
+            rh = kind(rh) == K"..." ? @ast(ctx, rh, [K"..." tmp]) : tmp
+        end
+        push!(rhs_tmps, rh)
+    end
+
+    il = 0
+    ir = 0
+    while il < n_lhs
+        il += 1
+        ir += 1
+        lh = lhs[il]
+        if kind(lh) == K"..."
+            # Exactly one lhs `...` occurs in the middle somewhere, with a
+            # general rhs which has at least as many non-`...` terms or one
+            # `...` term at the end.
+            # Examples:
+            #   (x, ys..., z) = (a, b, c, d)
+            #   (x, ys..., z) = (a, bs...)
+            #   (xs..., y)    = (a, bs...)
+            #   (xs...) = (a, b, c)
+            # in this case we can pairwise-match arguments from the end
+            # backward and emit a general tuple assignment for the middle.
+            jl = n_lhs
+            jr = n_rhs
+            while jl > il && jr > ir
+                if kind(lhs[jl]) == K"..." || kind(rhs_tmps[jr]) == K"..."
+                    break
+                end
+                jl -= 1
+                jr -= 1
+            end
+            middle = emit_assign_tmp(stmts, ctx,
+                @ast(ctx, rhs, [K"tuple" rhs_tmps[ir:jr]...]),
+                "rhs_tmp"
+            )
+            if il == jl
+                # (x, ys...) = (a,b,c)
+                # (x, ys...) = (a,bs...)
+                # (ys...)    = ()
+                push!(stmts, wrap(@ast ctx ex [K"=" lh[1] middle]))
+            else
+                # (x, ys..., z) = (a, b, c, d)
+                # (x, ys..., z) = (a, bs...)
+                # (xs..., y)    = (a, bs...)
+                push!(stmts, wrap(@ast ctx ex [K"=" [K"tuple" lhs[il:jl]...] middle]))
+            end
+            # Continue with the remainder of the list of non-splat terms
+            il = jl
+            ir = jr
+        else
+            rh = rhs_tmps[ir]
+            if kind(rh) == K"..."
+                push!(stmts, wrap(@ast ctx ex [K"=" [K"tuple" lhs[il:end]...] rh[1]]))
+                break
+            else
+                push!(stmts, wrap(@ast ctx ex [K"=" lh rh]))
+            end
+        end
+    end
+
+    @ast ctx ex [K"block"
+        stmts...
+        [K"removable" [K"tuple" rhs_tmps...]]
+    ]
+end
+
+# Create an assignment `$lhs = $rhs` where `lhs` must be "simple". If `rhs` is
+# a block, sink the assignment into the last statement of the block to keep
+# more expressions at top level. `rhs` should already be expanded.
+#
+# flisp: sink-assignment
+function sink_assignment(ctx, srcref, lhs, rhs)
+    @assert is_identifier_like(lhs)
+    if kind(rhs) == K"block"
+        @ast ctx srcref [K"block"
+            rhs[1:end-1]...
+            [K"=" lhs rhs[end]]
+        ]
+    else
+        @ast ctx srcref [K"=" lhs rhs]
+    end
+end
+
+function _tuple_sides_match(lhs, rhs)
+    N = max(length(lhs), length(rhs))
+    for i = 1:N+1
+        if i > length(lhs)
+            # (x, y)        = (a, b)      # match
+            # (x,)          = (a, b)      # no match
+            return i > length(rhs)
+        elseif kind(lhs[i]) == K"..."
+            # (x, ys..., z) = (a, b)      # match
+            # (x, ys...)    = (a,)        # match
+            return true
+        elseif i > length(rhs)
+            # (x, y)        = (a,)        # no match
+            # (x, y, zs...) = (a,)        # no match
+            return false
+        elseif kind(rhs[i]) == K"..."
+            # (x, y)        = (as...,)    # match
+            # (x, y, z)     = (a, bs...)  # match
+            # (x, y)        = (as..., b)  # no match
+            return i == length(rhs)
+        end
+    end
+end
+
+# Lower `(lhss...) = rhs` in contexts where `rhs` must be a tuple at runtime
+# by assuming that `getfield(rhs, i)` works and is efficient.
+function lower_tuple_assignment(ctx, assignment_srcref, lhss, rhs)
+    stmts = SyntaxList(ctx)
+    tmp = emit_assign_tmp(stmts, ctx, rhs, "rhs_tmp")
+    for (i, lh) in enumerate(lhss)
+        push!(stmts, @ast ctx assignment_srcref [K"="
+            lh
+            [K"call" "getfield"::K"core" tmp i::K"Integer"]
+        ])
+    end
+    makenode(ctx, assignment_srcref, K"block", stmts)
+end
+
+# Implement destructuring with `lhs` a tuple expression (possibly with
+# slurping) and `rhs` a general expression.
+#
+# Destructuring in this context is done via the iteration interface, though
+# calls `Base.indexed_iterate()` to allow for a fast path in cases where the
+# right hand side is directly indexable.
+function _destructure(ctx, assignment_srcref, stmts, lhs, rhs, is_const)
+    n_lhs = numchildren(lhs)
+    if n_lhs > 0
+        iterstate = new_local_binding(ctx, rhs, "iterstate")
+    end
+
+    end_stmts = SyntaxList(ctx)
+    wrap(asgn) = is_const ? (@ast ctx assignment_srcref [K"const" asgn]) : asgn
+
+    i = 0
+    for lh in children(lhs)
+        i += 1
+        if kind(lh) == K"..."
+            lh1 = if is_identifier_like(lh[1]) && !is_const
+                lh[1]
+            else
+                lhs_tmp = ssavar(ctx, lh[1], "lhs_tmp")
+                push!(end_stmts, expand_forms_2(ctx, wrap(@ast ctx lh[1] [K"=" lh[1] lhs_tmp])))
+                lhs_tmp
+            end
+            if i == n_lhs
+                # Slurping as last lhs, eg, for `zs` in
+                #   (x, y, zs...) = rhs
+                if kind(lh1) != K"Placeholder"
+                    push!(stmts, expand_forms_2(ctx,
+                        @ast ctx assignment_srcref [K"="
+                            lh1
+                            [K"call"
+                                "rest"::K"top"
+                                rhs
+                                if i > 1
+                                    iterstate
+                                end
+                            ]
+                        ]
+                    ))
+                end
+            else
+                # Slurping before last lhs. Eg, for `xs` in
+                #   (xs..., y, z) = rhs
+                # For this we call
+                #   (xs, tail) = Base.split_rest(...)
+                # then continue iteration with `tail` as new rhs.
+                tail = ssavar(ctx, lh, "tail")
+                push!(stmts,
+                    expand_forms_2(ctx,
+                        lower_tuple_assignment(ctx,
+                            assignment_srcref,
+                            (lh1, tail),
+                            @ast ctx assignment_srcref [K"call"
+                                "split_rest"::K"top"
+                                rhs
+                                (n_lhs - i)::K"Integer"
+                                if i > 1
+                                    iterstate
+                                end
+                            ]
+                        )
+                    )
+                )
+                rhs = tail
+                n_lhs = n_lhs - i
+                i = 0
+            end
+        else
+            # Normal case, eg, for `y` in
+            #   (x, y, z) = rhs
+            lh1 = if is_identifier_like(lh) && !is_const
+                lh
+            # elseif is_eventually_call(lh) (TODO??)
+            else
+                lhs_tmp = ssavar(ctx, lh, "lhs_tmp")
+                push!(end_stmts, expand_forms_2(ctx, wrap(@ast ctx lh [K"=" lh lhs_tmp])))
+                lhs_tmp
+            end
+            push!(stmts,
+                expand_forms_2(ctx,
+                    lower_tuple_assignment(ctx,
+                        assignment_srcref,
+                        i == n_lhs ? (lh1,) : (lh1, iterstate),
+                        @ast ctx assignment_srcref [K"call"
+                            "indexed_iterate"::K"top"
+                            rhs
+                            i::K"Integer"
+                            if i > 1
+                                iterstate
+                            end
+                        ]
+                    )
+                )
+            )
+        end
+    end
+    # Actual assignments must happen after the whole iterator is desctructured
+    # (https://github.com/JuliaLang/julia/issues/40574)
+    append!(stmts, end_stmts)
+    stmts
+end
+
+# Expands cases of property destructuring
+function expand_property_destruct(ctx, ex, is_const)
+    @assert numchildren(ex) == 2
+    lhs = ex[1]
+    @assert kind(lhs) == K"tuple"
+    if numchildren(lhs) != 1
+        throw(LoweringError(lhs, "Property destructuring must use a single `;` before the property names, eg `(; a, b) = rhs`"))
+    end
+    params = lhs[1]
+    @assert kind(params) == K"parameters"
+    rhs = ex[2]
+    stmts = SyntaxList(ctx)
+    rhs1 = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs))
+    for prop in children(params)
+        propname = kind(prop) == K"Identifier"                           ? prop    :
+                   kind(prop) == K"::" && kind(prop[1]) == K"Identifier" ? prop[1] :
+                   throw(LoweringError(prop, "invalid assignment location"))
+        push!(stmts, expand_forms_2(ctx, @ast ctx rhs1 [K"="
+            prop
+            [K"call"
+                "getproperty"::K"top"
+                rhs1
+                propname=>K"Symbol"
+            ]
+        ]))
+    end
+    push!(stmts, @ast ctx rhs1 [K"removable" rhs1])
+    makenode(ctx, ex, K"block", stmts)
+end
+
+# Expands all cases of general tuple destructuring, eg
+#   (x,y) = (a,b)
+function expand_tuple_destruct(ctx, ex, is_const)
+    lhs = ex[1]
+    @assert kind(lhs) == K"tuple"
+    rhs = ex[2]
+
+    num_slurp = 0
+    for lh in children(lhs)
+        num_slurp += (kind(lh) == K"...")
+        if num_slurp > 1
+            throw(LoweringError(lh, "multiple `...` in destructuring assignment are ambiguous"))
+        end
+    end
+
+    if kind(rhs) == K"tuple"
+        num_splat = sum(kind(rh) == K"..." for rh in children(rhs); init=0)
+        if num_splat == 0 && (numchildren(lhs) - num_slurp) > numchildren(rhs)
+            throw(LoweringError(ex, "More variables on left hand side than right hand in tuple assignment"))
+        end
+
+        if !any_assignment(children(rhs)) && !has_parameters(rhs) &&
+                _tuple_sides_match(children(lhs), children(rhs))
+            return expand_forms_2(ctx, tuple_to_assignments(ctx, ex, is_const))
+        end
+    end
+
+    stmts = SyntaxList(ctx)
+    rhs1 = if is_ssa(ctx, rhs) ||
+            (is_identifier_like(rhs) &&
+             !any(is_same_identifier_like(kind(l) == K"..." ? l[1] : l, rhs)
+                  for l in children(lhs)))
+        rhs
+    else
+        emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs))
+    end
+    _destructure(ctx, ex, stmts, lhs, rhs1, is_const)
+    push!(stmts, @ast ctx rhs1 [K"removable" rhs1])
+    makenode(ctx, ex, K"block", stmts)
+end
+
+#-------------------------------------------------------------------------------
+# Expand comparison chains
+
+function expand_scalar_compare_chain(ctx, srcref, terms, i)
+    comparisons = nothing
+    while i + 2 <= length(terms)
+        lhs = terms[i]
+        op = terms[i+1]
+        rhs = terms[i+2]
+        if kind(op) == K"."
+            break
+        end
+        comp = @ast ctx op [K"call"
+            op
+            lhs
+            rhs
+        ]
+        if isnothing(comparisons)
+            comparisons = comp
+        else
+            comparisons = @ast ctx srcref [K"&&"
+                comparisons
+                comp
+            ]
+        end
+        i += 2
+    end
+    (comparisons, i)
+end
+
+# Expanding comparison chains: (comparison a op b op c ...)
+#
+# We use && to combine pairs of adjacent scalar comparisons and .& to combine
+# vector-vector and vector-scalar comparisons. Combining scalar comparisons are
+# treated as having higher precedence than vector comparisons, thus:
+#
+# a < b < c   ==>  (a < b) && (b < c)
+# a .< b .< c   ==>  (a .< b) .& (b .< c)
+# a < b < c .< d .< e   ==>  (a < b && b < c) .& (c .< d) .& (d .< e)
+# a .< b .< c < d < e   ==>  (a .< b) .& (b .< c) .& (c < d && d < e)
+function expand_compare_chain(ctx, ex)
+    @assert kind(ex) == K"comparison"
+    terms = children(ex)
+    @chk numchildren(ex) >= 3
+    @chk isodd(numchildren(ex))
+    i = 1
+    comparisons = nothing
+    # Combine any number of dotted comparisons
+    while i + 2 <= length(terms)
+        if kind(terms[i+1]) != K"."
+            (comp, i) = expand_scalar_compare_chain(ctx, ex, terms, i)
+        else
+            lhs = terms[i]
+            op = terms[i+1]
+            rhs = terms[i+2]
+            i += 2
+            comp = @ast ctx op [K"dotcall"
+                op[1]
+                lhs
+                rhs
+            ]
+        end
+        if isnothing(comparisons)
+            comparisons = comp
+        else
+            comparisons = @ast ctx ex [K"dotcall"
+                "&"::K"top"
+                # ^^ NB: Flisp bug. Flisp lowering essentially does
+                #     adopt_scope("&"::K"Identifier", ctx.mod)
+                # here which seems wrong if the comparison chain arose from
+                # a macro in a different module. One fix would be to use
+                #     adopt_scope("&"::K"Identifier", ex)
+                # to get the module of the comparison expression for the
+                # `&` operator. But a simpler option is probably to always
+                # use `Base.&` so we do that.
+                comparisons
+                comp
+            ]
+        end
+    end
+    comparisons
+end
+
+#-------------------------------------------------------------------------------
+# Expansion of array indexing
+function _arg_to_temp(ctx, stmts, ex, eq_is_kw=false)
+    k = kind(ex)
+    if is_effect_free(ex)
+        ex
+    elseif k == K"..."
+        @ast ctx ex [k _arg_to_temp(ctx, stmts, ex[1])]
+    elseif k == K"=" && eq_is_kw
+        @ast ctx ex [K"=" ex[1] _arg_to_temp(ctx, stmts, ex[2], false)]
+    elseif k == K"parameters"
+        mapchildren(ctx, ex) do e
+            _arg_to_temp(ctx, stmts, e, true)
+        end
+    else
+        emit_assign_tmp(stmts, ctx, ex)
+    end
+end
+
+# Make the *arguments* of an expression safe for multiple evaluation, for
+# example
+#
+#   a[f(x)] => (temp=f(x); a[temp])
+#
+# Any assignments are added to `stmts` and a result expression returned which
+# may be used in further desugaring.
+function remove_argument_side_effects(ctx, stmts, ex)
+    if is_literal(ex) || is_identifier_like(ex)
+        ex
+    else
+        k = kind(ex)
+        if k == K"let"
+            emit_assign_tmp(stmts, ctx, ex)
+        else
+            args = SyntaxList(ctx)
+            eq_is_kw = ((k == K"call" || k == K"dotcall") && is_prefix_call(ex)) || k == K"ref"
+            for (i,e) in enumerate(children(ex))
+                push!(args, _arg_to_temp(ctx, stmts, e, eq_is_kw && i > 1))
+            end
+            # TODO: Copy attributes?
+            @ast ctx ex [k args...]
+        end
+    end
+end
+
+# Replace any `begin` or `end` symbols with an expression indexing the array
+# `arr` in the `n`th index. `splats` are a list of the splatted arguments that
+# precede index `n` `is_last` is true when this is this
+# last index
+function replace_beginend(ctx, ex, arr, n, splats, is_last)
+    k = kind(ex)
+    if k == K"Identifier" && ex.name_val in ("begin", "end")
+        indexfunc = @ast ctx ex (ex.name_val == "begin" ? "firstindex" : "lastindex")::K"top"
+        if length(splats) == 0
+            if is_last && n == 1
+                @ast ctx ex [K"call" indexfunc arr]
+            else
+                @ast ctx ex [K"call" indexfunc arr n::K"Integer"]
+            end
+        else
+            splat_lengths = SyntaxList(ctx)
+            for splat in splats
+                push!(splat_lengths, @ast ctx ex [K"call" "length"::K"top" splat])
+            end
+            @ast ctx ex [K"call"
+                indexfunc
+                arr
+                [K"call"
+                    "+"::K"top"
+                    (n - length(splats))::K"Integer"
+                    splat_lengths...
+                ]
+            ]
+        end
+    elseif is_leaf(ex) || is_quoted(ex)
+        ex
+    elseif k == K"ref"
+        # inside ref, only replace within the first argument
+        @ast ctx ex [k
+            replace_beginend(ctx, ex[1], arr, n, splats, is_last)
+            ex[2:end]...
+        ]
+    # elseif k == K"kw" - keyword args - what does this mean here?
+    #   # note from flisp
+    #   # TODO: this probably should not be allowed since keyword args aren't
+    #   # positional, but in this context we have just used their positions anyway
+    else
+        mapchildren(e->replace_beginend(ctx, e, arr, n, splats, is_last), ctx, ex)
+    end
+end
+
+# Go through indices and replace the `begin` or `end` symbol
+# `arr` - array being indexed
+# `idxs` - list of indices
+# returns the expanded indices. Any statements that need to execute first are
+# added to ctx.stmts.
+function process_indices(sctx::StatementListCtx, arr, idxs)
+    has_splats = any(kind(i) == K"..." for i in idxs)
+    idxs_out = SyntaxList(sctx)
+    splats = SyntaxList(sctx)
+    for (n, idx0) in enumerate(idxs)
+        is_splat = kind(idx0) == K"..."
+        val = replace_beginend(sctx, is_splat ? idx0[1] : idx0,
+                               arr, n, splats, n == length(idxs))
+        # TODO: kwarg?
+        idx = !has_splats || is_simple_atom(sctx, val) ? val : emit_assign_tmp(sctx, val)
+        if is_splat
+            push!(splats, idx)
+        end
+        push!(idxs_out, is_splat ? @ast(sctx, idx0, [K"..." idx]) : idx)
+    end
+    return idxs_out
+end
+
+# Expand things like `f()[i,end]`, add to `sctx.stmts` (temporaries for
+# computing indices) and return
+# * `arr` -  The array (may be a temporary ssa value)
+# * `idxs` - List of indices
+function expand_ref_components(sctx::StatementListCtx, ex)
+    check_no_parameters(ex, "unexpected semicolon in array expression")
+    @assert kind(ex) == K"ref"
+    @chk numchildren(ex) >= 1
+    arr = ex[1]
+    idxs = ex[2:end]
+    if any(contains_identifier(e, "begin", "end") for e in idxs)
+        arr = emit_assign_tmp(sctx, arr)
+    end
+    new_idxs = process_indices(sctx, arr, idxs)
+    return (arr, new_idxs)
+end
+
+function expand_setindex(ctx, ex)
+    @assert kind(ex) == K"=" && numchildren(ex) == 2
+    lhs = ex[1]
+    sctx = with_stmts(ctx)
+    (arr, idxs) = expand_ref_components(sctx, lhs)
+    rhs = emit_assign_tmp(sctx, ex[2])
+    @ast ctx ex [K"block"
+        sctx.stmts...
+        expand_forms_2(ctx, [K"call"
+            "setindex!"::K"top"
+            arr
+            rhs
+            idxs...
+        ])
+        [K"removable" rhs]
+    ]
+end
+
+#-------------------------------------------------------------------------------
+# Expansion of broadcast notation `f.(x .+ y)`
+
+function expand_dotcall(ctx, ex)
+    k = kind(ex)
+    if k == K"dotcall"
+        @chk numchildren(ex) >= 1
+        farg = ex[1]
+        args = SyntaxList(ctx)
+        append!(args, ex[2:end])
+        kws = remove_kw_args!(ctx, args)
+        @ast ctx ex [K"call"
+            (isnothing(kws) ? "broadcasted" : "broadcasted_kwsyntax")::K"top"
+            farg    # todo: What about (z=f).(x,y) ?
+            (expand_dotcall(ctx, arg) for arg in args)...
+            if !isnothing(kws)
+                [K"parameters"
+                    kws...
+                ]
+            end
+        ]
+    elseif k == K"comparison"
+        expand_dotcall(ctx, expand_compare_chain(ctx, ex))
+    elseif k == K".&&" || k == K".||"
+        @ast ctx ex [K"call"
+            "broadcasted"::K"top"
+            (k == K".&&" ? "andand" : "oror")::K"top"
+            (expand_dotcall(ctx, arg) for arg in children(ex))...
+        ]
+    else
+        ex
+    end
+end
+
+function expand_fuse_broadcast(ctx, ex)
+    if kind(ex) == K".=" || kind(ex) == K".op="
+        @chk numchildren(ex) == 2
+        lhs = ex[1]
+        kl = kind(lhs)
+        rhs = expand_dotcall(ctx, ex[2])
+        @ast ctx ex [K"call"
+            "materialize!"::K"top"
+            if kl == K"ref"
+                sctx = with_stmts(ctx)
+                (arr, idxs) = expand_ref_components(sctx, lhs)
+                [K"block"
+                    sctx.stmts...
+                    [K"call"
+                        "dotview"::K"top"
+                        arr
+                        idxs...
+                    ]
+                ]
+            elseif kl == K"." && numchildren(lhs) == 2
+                [K"call"
+                    "dotgetproperty"::K"top"
+                    children(lhs)...
+                ]
+            else
+                lhs
+            end
+            if !(kind(rhs) == K"call" && kind(rhs[1]) == K"top" && rhs[1].name_val == "broadcasted")
+                # Ensure the rhs of .= is always wrapped in a call to `broadcasted()`
+                [K"call"(rhs)
+                    "broadcasted"::K"top"
+                    "identity"::K"top"
+                    rhs
+                ]
+            else
+                rhs
+            end
+        ]
+    else
+        @ast ctx ex [K"call"
+            "materialize"::K"top"
+            expand_dotcall(ctx, ex)
+        ]
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Expansion of generators and comprehensions
+
+# Return any subexpression which is a 'return` statement, not including any
+# inside quoted sections or method bodies.
+function find_return(ex::SyntaxTree)
+    if kind(ex) == K"return"
+        return ex
+    elseif !is_leaf(ex) && !(kind(ex) in KSet"quote inert meta function ->")
+        for e in children(ex)
+            r = find_return(e)
+            if !isnothing(r)
+                return r
+            end
+        end
+    else
+        return nothing
+    end
+end
+
+function check_no_return(ex)
+    r = find_return(ex)
+    if !isnothing(r)
+        throw(LoweringError(r, "`return` not allowed inside comprehension or generator"))
+    end
+end
+
+# Return true for nested tuples of the same identifiers
+function similar_tuples_or_identifiers(a, b)
+    if kind(a) == K"tuple" && kind(b) == K"tuple"
+        return numchildren(a) == numchildren(b) &&
+            all( ((x,y),)->similar_tuples_or_identifiers(x,y),
+                zip(children(a), children(b)))
+    else
+        is_same_identifier_like(a,b)
+    end
+end
+
+# Return the anonymous function taking an iterated value, for use with the
+# first argument to `Base.Generator`
+function func_for_generator(ctx, body, iter_value_destructuring)
+    if similar_tuples_or_identifiers(iter_value_destructuring, body)
+        # Use Base.identity for generators which are filters such as
+        # `(x for x in xs if f(x))`. This avoids creating a new type.
+        @ast ctx body "identity"::K"top"
+    else
+        @ast ctx body [K"->"
+            [K"tuple"
+                iter_value_destructuring
+            ]
+            [K"block"
+                body
+            ]
+        ]
+    end
+end
+
+function expand_generator(ctx, ex)
+    @chk numchildren(ex) >= 2
+    body = ex[1]
+    check_no_return(body)
+    if numchildren(ex) > 2
+        # Uniquify outer vars by NameKey
+        outervars_by_key = Dict{NameKey,typeof(ex)}()
+        for iterspecs in ex[2:end-1]
+            for iterspec in children(iterspecs)
+                foreach_lhs_name(iterspec[1]) do var
+                    @assert kind(var) == K"Identifier" # Todo: K"BindingId"?
+                    outervars_by_key[NameKey(var)] = var
+                end
+            end
+        end
+        outervar_assignments = SyntaxList(ctx)
+        for (k,v) in sort(collect(pairs(outervars_by_key)), by=first)
+            push!(outervar_assignments, @ast ctx v [K"=" v v])
+        end
+        body = @ast ctx ex [K"let"
+            [K"block"
+                outervar_assignments...
+            ]
+            [K"block"
+                body
+            ]
+        ]
+    end
+    for iterspecs_ind in numchildren(ex):-1:2
+        iterspecs = ex[iterspecs_ind]
+        filter_test = nothing
+        if kind(iterspecs) == K"filter"
+            filter_test = iterspecs[2]
+            iterspecs = iterspecs[1]
+        end
+        if kind(iterspecs) != K"iteration"
+            throw(LoweringError("""Expected `K"iteration"` iteration specification in generator"""))
+        end
+        iter_ranges = SyntaxList(ctx)
+        iter_lhss = SyntaxList(ctx)
+        for iterspec in children(iterspecs)
+            @chk kind(iterspec) == K"in"
+            @chk numchildren(iterspec) == 2
+            push!(iter_lhss, iterspec[1])
+            push!(iter_ranges, iterspec[2])
+        end
+        iter_value_destructuring = if numchildren(iterspecs) == 1
+            iterspecs[1][1]
+        else
+            iter_lhss = SyntaxList(ctx)
+            for iterspec in children(iterspecs)
+                push!(iter_lhss, iterspec[1])
+            end
+            @ast ctx iterspecs [K"tuple" iter_lhss...]
+        end
+        iter = if length(iter_ranges) > 1
+            @ast ctx iterspecs [K"call"
+                "product"::K"top"
+                iter_ranges...
+            ]
+        else
+            iter_ranges[1]
+        end
+        if !isnothing(filter_test)
+            iter = @ast ctx ex [K"call"
+                "Filter"::K"top"
+                func_for_generator(ctx, filter_test, iter_value_destructuring)
+                iter
+            ]
+        end
+        body = @ast ctx ex [K"call"
+            "Generator"::K"top"
+            func_for_generator(ctx, body, iter_value_destructuring)
+            iter
+        ]
+        if iterspecs_ind < numchildren(ex)
+            body = @ast ctx ex [K"call"
+                "Flatten"::K"top"
+                body
+            ]
+        end
+    end
+    body
+end
+
+function expand_comprehension_to_loops(ctx, ex)
+    @assert kind(ex) == K"typed_comprehension"
+    element_type = ex[1]
+    gen = ex[2]
+    @assert kind(gen) == K"generator"
+    body = gen[1]
+    check_no_return(body)
+    # TODO: check_no_break_continue
+    iterspecs = gen[2]
+    @assert kind(iterspecs) == K"iteration"
+    new_iterspecs = SyntaxList(ctx)
+    iters = SyntaxList(ctx)
+    iter_defs = SyntaxList(ctx)
+    for iterspec in children(iterspecs)
+        iter = emit_assign_tmp(iter_defs, ctx, iterspec[2], "iter")
+        push!(iters, iter)
+        push!(new_iterspecs, @ast ctx iterspec [K"in" iterspec[1] iter])
+    end
+    # Lower to nested for loops
+    idx = new_local_binding(ctx, iterspecs, "idx")
+    @ast ctx ex [K"block"
+        iter_defs...
+        full_iter := if length(iters) == 1
+            iters[1]
+        else
+            [K"call"
+                "product"::K"top"
+                iters...
+            ]
+        end
+        iter_size := [K"call" "IteratorSize"::K"top" full_iter]
+        size_unknown := [K"call" "isa"::K"core" iter_size "SizeUnknown"::K"top"]
+        result    := [K"call" "_array_for"::K"top" element_type full_iter iter_size]
+        [K"=" idx [K"call" "first"::K"top" [K"call" "LinearIndices"::K"top" result]]]
+        [K"for" [K"iteration" Iterators.reverse(new_iterspecs)...]
+            [K"block"
+                val := body
+                # TODO: inbounds setindex
+                [K"if" size_unknown
+                    [K"call" "push!"::K"top" result val]
+                    [K"call" "setindex!"::K"top" result val idx]
+                ]
+                #[K"call" "println"::K"top" [K"call" "typeof"::K"core" idx]]
+                [K"=" idx [K"call" "add_int"::K"top" idx 1::K"Integer"]]
+            ]
+        ]
+        result
+    ]
+end
+
+# Mimics native lowerer's tuple-wrap function (julia-syntax.scm:2723-2736)
+# Unwraps only ONE layer of `...` and wraps sequences of non-splat args in tuples.
+# Example: `[a, b, xs..., c]` -> `[tuple(a, b), xs, tuple(c)]`
+function _wrap_unsplatted_args(ctx, call_ex, args)
+    result = SyntaxList(ctx)
+    non_splat_run = SyntaxList(ctx)
+    for arg in args
+        if kind(arg) == K"..."
+            # Flush any accumulated non-splat args
+            if !isempty(non_splat_run)
+                push!(result, @ast ctx call_ex [K"call" "tuple"::K"core" non_splat_run...])
+                non_splat_run = SyntaxList(ctx)
+            end
+            # Unwrap only ONE layer of `...` (corresponds to (cadr x) in native lowerer)
+            push!(result, arg[1])
+        else
+            # Accumulate non-splat args
+            push!(non_splat_run, arg)
+        end
+    end
+    # Flush any remaining non-splat args
+    if !isempty(non_splat_run)
+        push!(result, @ast ctx call_ex [K"call" "tuple"::K"core" non_splat_run...])
+    end
+    result
+end
+
+function expand_splat(ctx, ex, topfunc, args)
+    # Matches native lowerer's algorithm
+    # https://github.com/JuliaLang/julia/blob/f362f47338de099cdeeb1b2d81b3ec1948443274/src/julia-syntax.scm#L2761-2762:
+    # 1. Unwrap one layer of `...` from each argument (via _wrap_unsplatted_args)
+    # 2. Create `_apply_iterate(iterate, f, wrapped_args...)` WITHOUT expanding args yet
+    # 3. Recursively expand the entire call - if any wrapped_arg still contains `...`,
+    #    the recursive expansion will handle it, naturally building nested structure
+    #
+    # Example: tuple((xs...)...) recursion:
+    #   Pass 1: unwrap outer `...` -> _apply_iterate(iterate, tuple, (xs...))
+    #   Pass 2: expand sees (xs...) in call context, unwraps again
+    #           -> _apply_iterate(iterate, _apply_iterate, tuple(iterate, tuple), xs)
+
+    wrapped_args = _wrap_unsplatted_args(ctx, ex, args)
+
+    # Construct the unevaluated _apply_iterate call
+    result = @ast ctx ex [K"call"
+        "_apply_iterate"::K"core"
+        "iterate"::K"top"
+        topfunc
+        wrapped_args...
+    ]
+
+    # Recursively expand the entire call (matching native's expand-forms)
+    return expand_forms_2(ctx, result)
+end
+
+function expand_array(ctx, ex, topfunc)
+    args = children(ex)
+    check_no_assignment(args)
+    topfunc = @ast ctx ex topfunc::K"top"
+    if any(kind(arg) == K"..." for arg in args)
+        expand_splat(ctx, ex, topfunc, args)
+    else
+        @ast ctx ex [K"call"
+            topfunc
+            expand_forms_2(ctx, args)...
+        ]
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Expansion of array concatenation notation `[a b ; c d]` etc
+
+function expand_vcat(ctx, ex)
+    check_no_parameters(ex, "unexpected semicolon in array expression")
+    check_no_assignment(children(ex))
+    had_row = false
+    had_row_splat = false
+    is_typed = kind(ex) == K"typed_vcat"
+    eltype   = is_typed ? ex[1]     : nothing
+    elements = is_typed ? ex[2:end] : ex[1:end]
+    for e in elements
+        k = kind(e)
+        if k == K"row"
+            had_row = true
+            had_row_splat = had_row_splat || any(kind(e1) == K"..." for e1 in children(e))
+        end
+    end
+    if had_row_splat
+        # In case there is splatting inside `hvcat`, collect each row as a
+        # separate tuple and pass those to `hvcat_rows` instead (ref #38844)
+        rows = SyntaxList(ctx)
+        for e in elements
+            if kind(e) == K"row"
+                push!(rows, @ast ctx e [K"tuple" children(e)...])
+            else
+                push!(rows, @ast ctx e [K"tuple" e])
+            end
+        end
+        fname = is_typed ? "typed_hvcat_rows" : "hvcat_rows"
+        @ast ctx ex [K"call"
+            fname::K"top"
+            eltype
+            rows...
+        ]
+    else
+        row_sizes = SyntaxList(ctx)
+        flat_elems = SyntaxList(ctx)
+        for e in elements
+            if kind(e) == K"row"
+                rowsize = numchildren(e)
+                append!(flat_elems, children(e))
+            else
+                rowsize = 1
+                push!(flat_elems, e)
+            end
+            push!(row_sizes, @ast ctx e rowsize::K"Integer")
+        end
+        if had_row
+            fname = is_typed ? "typed_hvcat" : "hvcat"
+            @ast ctx ex [K"call"
+                fname::K"top"
+                eltype
+                [K"tuple" row_sizes...]
+                flat_elems...
+            ]
+        else
+            fname = is_typed ? "typed_vcat" : "vcat"
+            @ast ctx ex [K"call"
+                fname::K"top"
+                eltype
+                flat_elems...
+            ]
+        end
+    end
+end
+
+function ncat_contains_row(ex)
+    k = kind(ex)
+    if k == K"row"
+        return true
+    elseif k == K"nrow"
+        return any(ncat_contains_row(e) for e in children(ex))
+    else
+        return false
+    end
+end
+
+# flip first and second dimension for row major layouts
+function nrow_flipdim(row_major, d)
+    return !row_major ? d :
+           d == 1     ? 2 :
+           d == 2     ? 1 : d
+end
+
+function flatten_ncat_rows!(flat_elems, nrow_spans, row_major, parent_layout_dim, ex)
+    # Note that most of the checks for valid nesting here are also checked in
+    # the parser - they can only fail when nrcat is constructed
+    # programmatically (eg, by a macro).
+    k = kind(ex)
+    if k == K"row"
+        layout_dim = 1
+        @chk parent_layout_dim != 1 (ex,"Badly nested rows in `ncat`")
+    elseif k == K"nrow"
+        dim = numeric_flags(ex)
+        @chk dim > 0                (ex,"Unsupported dimension $dim in ncat")
+        @chk !row_major || dim != 2 (ex,"2D `nrow` cannot be mixed with `row` in `ncat`")
+        layout_dim = nrow_flipdim(row_major, dim)
+    elseif kind(ex) == K"..."
+        throw(LoweringError(ex, "Splatting ... in an `ncat` with multiple dimensions is not supported"))
+    else
+        push!(flat_elems, ex)
+        for ld in parent_layout_dim-1:-1:1
+            push!(nrow_spans, (ld, 1))
+        end
+        return
+    end
+    row_start = length(flat_elems)
+    @chk parent_layout_dim > layout_dim (ex, "Badly nested rows in `ncat`")
+    for e in children(ex)
+        if layout_dim == 1
+            @chk kind(e) ∉ KSet"nrow row" (e,"Badly nested rows in `ncat`")
+        end
+        flatten_ncat_rows!(flat_elems, nrow_spans, row_major, layout_dim, e)
+    end
+    n_elems_in_row = length(flat_elems) - row_start
+    for ld in parent_layout_dim-1:-1:layout_dim
+        push!(nrow_spans, (ld, n_elems_in_row))
+    end
+end
+
+# ncat comes in various layouts which we need to lower to special cases
+# - one dimensional along some dimension
+# - balanced column first or row first
+# - ragged column first or row first
+function expand_ncat(ctx, ex)
+    is_typed = kind(ex) == K"typed_ncat"
+    outer_dim = numeric_flags(ex)
+    @chk outer_dim > 0 (ex,"Unsupported dimension in ncat")
+    eltype      = is_typed ? ex[1]     : nothing
+    elements    = is_typed ? ex[2:end] : ex[1:end]
+    hvncat_name = is_typed ? "typed_hvncat" : "hvncat"
+    if !any(kind(e) in KSet"row nrow" for e in elements)
+        # One-dimensional ncat along some dimension
+        #   [a ;;; b ;;; c]
+        return @ast ctx ex [K"call"
+            hvncat_name::K"top"
+            eltype
+            outer_dim::K"Integer"
+            elements...
+        ]
+    end
+    # N-dimensional case. May be
+    # * column first or row first:
+    #   [a;b ;;; c;d]
+    #   [a b ;;; c d]
+    # * balanced or ragged:
+    #   [a ; b ;;; c ; d]
+    #   [a ; b ;;; c]
+    row_major = any(ncat_contains_row, elements)
+    @chk !row_major || outer_dim != 2 (ex,"2D `nrow` cannot be mixed with `row` in `ncat`")
+    flat_elems = SyntaxList(ctx)
+    # `ncat` syntax nests lower dimensional `nrow` inside higher dimensional
+    # ones (with the exception of K"row" when `row_major` is true). Each nrow
+    # spans a number of elements and we first extract that.
+    nrow_spans = Vector{Tuple{Int,Int}}()
+    for e in elements
+        flatten_ncat_rows!(flat_elems, nrow_spans, row_major,
+                           nrow_flipdim(row_major, outer_dim), e)
+    end
+    push!(nrow_spans, (outer_dim, length(flat_elems)))
+    # Construct the shape specification by postprocessing the flat list of
+    # spans.
+    sort!(nrow_spans, by=first) # depends on a stable sort
+    is_balanced = true
+    i = 1
+    dim_lengths = zeros(outer_dim)
+    prev_dimspan = 1
+    while i <= length(nrow_spans)
+        layout_dim, dimspan = nrow_spans[i]
+        while i <= length(nrow_spans) && nrow_spans[i][1] == layout_dim
+            if dimspan != nrow_spans[i][2]
+                is_balanced = false
+                break
+            end
+            i += 1
+        end
+        is_balanced || break
+        @assert dimspan % prev_dimspan == 0
+        dim_lengths[layout_dim] = dimspan ÷ prev_dimspan
+        prev_dimspan = dimspan
+    end
+    shape_spec = SyntaxList(ctx)
+    if is_balanced
+        if row_major
+            dim_lengths[1], dim_lengths[2] = dim_lengths[2], dim_lengths[1]
+        end
+        # For balanced concatenations, the shape is specified by the length
+        # along each dimension.
+        for dl in dim_lengths
+            push!(shape_spec, @ast ctx ex dl::K"Integer")
+        end
+    else
+        # For unbalanced/ragged concatenations, the shape is specified by the
+        # number of elements in each N-dimensional slice of the array, from layout
+        # dimension 1 to N. See the documentation for `hvncat` for details.
+        i = 1
+        while i <= length(nrow_spans)
+            groups_for_dim = Int[]
+            layout_dim = nrow_spans[i][1]
+            while i <= length(nrow_spans) && nrow_spans[i][1] == layout_dim
+                push!(groups_for_dim, nrow_spans[i][2])
+                i += 1
+            end
+            push!(shape_spec,
+                @ast ctx ex [K"tuple"
+                    [i::K"Integer" for i in groups_for_dim]...
+                ]
+            )
+        end
+    end
+    @ast ctx ex [K"call"
+        hvncat_name::K"top"
+        eltype
+        [K"tuple" shape_spec...]
+        row_major::K"Bool"
+        flat_elems...
+    ]
+end
+
+#-------------------------------------------------------------------------------
+# Expand assignments
+
+# Expand UnionAll definitions, eg `X{T} = Y{T,T}`
+function expand_unionall_def(ctx, srcref, lhs, rhs, is_const=true)
+    if numchildren(lhs) <= 1
+        throw(LoweringError(lhs, "empty type parameter list in type alias"))
+    end
+    name = lhs[1]
+    expand_forms_2(
+        ctx,
+        @ast ctx srcref [K"block"
+            rr := [K"where" rhs lhs[2:end]...]
+            [is_const ? K"constdecl" : K"assign_or_constdecl_if_global" name rr]
+            [K"removable" rr]
+        ]
+    )
+end
+
+# Expand general assignment syntax, including
+#   * UnionAll definitions
+#   * Chained assignments
+#   * Setting of structure fields
+#   * Assignments to array elements
+#   * Destructuring
+#   * Typed variable declarations
+function expand_assignment(ctx, ex, is_const=false)
+    @chk numchildren(ex) == 2
+    lhs = ex[1]
+    rhs = ex[2]
+    kl = kind(lhs)
+    if kind(ex) == K"function"
+        # `const f() = ...` - The `const` here is inoperative, but the syntax
+        # happened to work in earlier versions, so simply strip `const`.
+        expand_forms_2(ctx, ex[1])
+    elseif kl == K"curly"
+        expand_unionall_def(ctx, ex, lhs, rhs, is_const)
+    elseif kind(rhs) == K"="
+        # Expand chains of assignments
+        # a = b = c  ==>  b=c; a=c
+        stmts = SyntaxList(ctx)
+        push!(stmts, lhs)
+        while kind(rhs) == K"="
+            push!(stmts, rhs[1])
+            rhs = rhs[2]
+        end
+        if is_identifier_like(rhs)
+            tmp_rhs = nothing
+            rr = rhs
+        else
+            tmp_rhs = ssavar(ctx, rhs, "rhs")
+            rr = tmp_rhs
+        end
+        # In const a = b = c, only a is const
+        stmts[1] = @ast ctx ex [(is_const ? K"constdecl" : K"=") stmts[1] rr]
+        for i in 2:length(stmts)
+            stmts[i] = @ast ctx ex [K"=" stmts[i] rr]
+        end
+        if !isnothing(tmp_rhs)
+            pushfirst!(stmts, @ast ctx ex [K"=" tmp_rhs rhs])
+        end
+        expand_forms_2(ctx,
+            @ast ctx ex [K"block"
+                stmts...
+                [K"removable" rr]
+            ]
+        )
+    elseif is_identifier_like(lhs)
+        if is_const
+            @ast ctx ex [K"block"
+                rr := expand_forms_2(ctx, rhs)
+                [K"constdecl" lhs rr]
+                [K"removable" rr]
+            ]
+        else
+            sink_assignment(ctx, ex, lhs, expand_forms_2(ctx, rhs))
+        end
+    elseif kl == K"."
+        # a.b = rhs  ==>  setproperty!(a, :b, rhs)
+        @chk !is_const (ex, "cannot declare `.` form const")
+        @chk numchildren(lhs) == 2
+        a = lhs[1]
+        b = lhs[2]
+        stmts = SyntaxList(ctx)
+        # TODO: Do we need these first two temporaries?
+        if !is_identifier_like(a)
+            a = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, a), "a_tmp")
+        end
+        if kind(b) != K"Symbol"
+            b = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, b), "b_tmp")
+        end
+        if !is_identifier_like(rhs) && !is_literal(rhs)
+            rhs = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs), "rhs_tmp")
+        end
+        @ast ctx ex [K"block"
+            stmts...
+            [K"call" "setproperty!"::K"top" a b rhs]
+            [K"removable" rhs]
+        ]
+    elseif kl == K"tuple"
+        if has_parameters(lhs)
+            expand_property_destruct(ctx, ex, is_const)
+        else
+            expand_tuple_destruct(ctx, ex, is_const)
+        end
+    elseif kl == K"ref"
+        # a[i1, i2] = rhs
+        @chk !is_const (ex, "cannot declare ref form const")
+        expand_forms_2(ctx, expand_setindex(ctx, ex))
+    elseif kl == K"::" && numchildren(lhs) == 2
+        x = lhs[1]
+        T = lhs[2]
+        res = if is_const
+            expand_forms_2(ctx, @ast ctx ex [K"const"
+                [K"="
+                     lhs[1]
+                     convert_for_type_decl(ctx, ex, rhs, T, true)
+                 ]])
+        elseif is_identifier_like(x)
+            # Identifier in lhs[1] is a variable type declaration, eg
+            # x::T = rhs
+            @ast ctx ex [K"block"
+                if kind(x) !== K"Placeholder"
+                     [K"decl" x T]
+                end
+                is_const ? [K"const" [K"=" x rhs]] : [K"=" x rhs]
+            ]
+        else
+            # Otherwise just a type assertion, eg
+            # a[i]::T = rhs  ==>  (a[i]::T; a[i] = rhs)
+            # a[f(x)]::T = rhs  ==>  (tmp = f(x); a[tmp]::T; a[tmp] = rhs)
+            stmts = SyntaxList(ctx)
+            l1 = remove_argument_side_effects(ctx, stmts, lhs[1])
+            # TODO: What about (f(z),y)::T = rhs? That's broken syntax and
+            # needs to be detected somewhere but won't be detected here. Maybe
+            # it shows that remove_argument_side_effects() is not the ideal
+            # solution here?
+            # TODO: handle underscore?
+            @ast ctx ex [K"block"
+                stmts...
+                [K"::" l1 lhs[2]]
+                [K"=" l1 rhs]
+            ]
+        end
+        expand_forms_2(ctx, res)
+    elseif kl == K"dotcall"
+        throw(LoweringError(lhs, "invalid dot call syntax on left hand side of assignment"))
+    elseif kl == K"typed_hcat"
+        throw(LoweringError(lhs, "invalid spacing in left side of indexed assignment"))
+    elseif kl == K"typed_vcat" || kl == K"typed_ncat"
+        throw(LoweringError(lhs, "unexpected `;` in left side of indexed assignment"))
+    elseif kl == K"vect" || kl == K"hcat" || kl == K"vcat" || kl == K"ncat"
+        throw(LoweringError(lhs, "use `(a, b) = ...` to assign multiple values"))
+    else
+        throw(LoweringError(lhs, "invalid assignment location"))
+    end
+end
+
+function expand_update_operator(ctx, ex)
+    k = kind(ex)
+    dotted = k == K".op="
+
+    @chk numchildren(ex) == 3
+    lhs = ex[1]
+    op = ex[2]
+    rhs = ex[3]
+
+    stmts = SyntaxList(ctx)
+
+    declT = nothing
+    if kind(lhs) == K"::"
+        # eg `a[i]::T += 1`
+        declT = lhs[2]
+        decl_lhs = lhs
+        lhs = lhs[1]
+    end
+
+    if kind(lhs) == K"ref"
+        # eg `a[end] = rhs`
+        sctx = with_stmts(ctx, stmts)
+        (arr, idxs) = expand_ref_components(sctx, lhs)
+        lhs = @ast ctx lhs [K"ref" arr idxs...]
+    end
+
+    lhs = remove_argument_side_effects(ctx, stmts, lhs)
+
+    if dotted
+        if !(kind(lhs) == K"ref" || (kind(lhs) == K"." && numchildren(lhs) == 2))
+            # `f() .+= rhs`
+            lhs = emit_assign_tmp(stmts, ctx, lhs)
+        end
+    else
+        if kind(lhs) == K"tuple" && contains_ssa_binding(ctx, lhs)
+            # If remove_argument_side_effects needed to replace an expression
+            # with an ssavalue, then it can't be updated by assignment
+            # (JuliaLang/julia#30062)
+            throw(LoweringError(lhs, "invalid multiple assignment location"))
+        end
+    end
+
+    @ast ctx ex [K"block"
+        stmts...
+        [(dotted ? K".=" : K"=")
+            lhs
+            [(dotted ? K"dotcall" : K"call")
+                op
+                if isnothing(declT)
+                    lhs
+                else
+                    [K"::"(decl_lhs) lhs declT]
+                end
+                rhs
+            ]
+        ]
+    ]
+end
+
+#-------------------------------------------------------------------------------
+# Expand logical conditional statements
+
+# Flatten nested && or || nodes and expand their children
+function expand_cond_children(ctx, ex, cond_kind=kind(ex), flat_children=SyntaxList(ctx))
+    for e in children(ex)
+        if kind(e) == cond_kind
+            expand_cond_children(ctx, e, cond_kind, flat_children)
+        else
+            push!(flat_children, expand_forms_2(ctx, e))
+        end
+    end
+    flat_children
+end
+
+# Expand condition in, eg, `if` or `while`
+function expand_condition(ctx, ex)
+    isblock = kind(ex) == K"block"
+    test = isblock ? ex[end] : ex
+    k = kind(test)
+    if k == K"&&" || k == K"||"
+        # `||` and `&&` get special lowering so that they compile directly to
+        # jumps rather than first computing a bool and then jumping.
+        cs = expand_cond_children(ctx, test)
+        @assert length(cs) > 1
+        test = makenode(ctx, test, k, cs)
+    else
+        test = expand_forms_2(ctx, test)
+    end
+    if isblock
+        # Special handling so that the rules for `&&` and `||` can be applied
+        # to the last statement of a block
+        @ast ctx ex [K"block" map(e->expand_forms_2(ctx,e), ex[1:end-1])... test]
+    else
+        test
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Expand let blocks
+
+function expand_let(ctx, ex)
+    @chk numchildren(ex) == 2
+    bindings = ex[1]
+    @chk kind(bindings) == K"block"
+    blk = ex[2]
+    scope_type = get(ex, :scope_type, :hard)
+    if numchildren(bindings) == 0
+        return @ast ctx ex [K"scope_block"(scope_type=scope_type) blk]
+    end
+    for binding in Iterators.reverse(children(bindings))
+        kb = kind(binding)
+        if is_sym_decl(kb)
+            blk = @ast ctx ex [K"scope_block"(scope_type=scope_type)
+                [K"local" binding]
+                blk
+            ]
+        elseif kb == K"=" && numchildren(binding) == 2
+            lhs = binding[1]
+            rhs = binding[2]
+            kl = kind(lhs)
+            if kl == K"Identifier" || kl == K"BindingId"
+                blk = @ast ctx binding [K"block"
+                    tmp := rhs
+                    [K"scope_block"(ex, scope_type=scope_type)
+                        [K"local"(lhs) lhs]
+                        [K"always_defined" lhs]
+                        [K"="(binding) lhs tmp]
+                        blk
+                    ]
+                ]
+            elseif kl == K"::"
+                var = lhs[1]
+                if !(kind(var) in KSet"Identifier BindingId")
+                    throw(LoweringError(var, "Invalid assignment location in let syntax"))
+                end
+                blk = @ast ctx binding [K"block"
+                    tmp := rhs
+                    type := lhs[2]
+                    [K"scope_block"(ex, scope_type=scope_type)
+                        [K"local"(lhs) [K"::" var type]]
+                        [K"always_defined" var]
+                        [K"="(binding) var tmp]
+                        blk
+                    ]
+                ]
+            elseif kind(lhs) == K"tuple"
+                lhs_locals = SyntaxList(ctx)
+                foreach_lhs_name(lhs) do var
+                    push!(lhs_locals, @ast ctx var [K"local" var])
+                    push!(lhs_locals, @ast ctx var [K"always_defined" var])
+                end
+                blk = @ast ctx binding [K"block"
+                    tmp := rhs
+                    [K"scope_block"(ex, scope_type=scope_type)
+                        lhs_locals...
+                        [K"="(binding) lhs tmp]
+                        blk
+                    ]
+                ]
+            else
+                throw(LoweringError(lhs, "Invalid assignment location in let syntax"))
+            end
+        elseif kind(binding) == K"function"
+            sig = binding[1]
+            func_name = assigned_function_name(sig)
+            if isnothing(func_name)
+                # Some valid function syntaxes define methods on existing types and
+                # don't really make sense with let:
+                #    let A.f() = 1 ... end
+                #    let (obj::Callable)() = 1 ... end
+                throw(LoweringError(sig, "Function signature does not define a local function name"))
+            end
+            blk = @ast ctx binding [K"block"
+                [K"scope_block"(ex, scope_type=scope_type)
+                    [K"local"(func_name) func_name]
+                    [K"always_defined" func_name]
+                    binding
+                    [K"scope_block"(ex, scope_type=scope_type)
+                        # The inside of the block is isolated from the closure,
+                        # which itself can only capture values from the outside.
+                        blk
+                    ]
+                ]
+            ]
+        else
+            throw(LoweringError(binding, "Invalid binding in let"))
+            continue
+        end
+    end
+    return blk
+end
+
+#-------------------------------------------------------------------------------
+# Expand named tuples
+
+function _named_tuple_expr(ctx, srcref, names, values)
+    if isempty(names)
+        @ast ctx srcref [K"call" "NamedTuple"::K"core"]
+    else
+        @ast ctx srcref [K"call"
+            [K"curly" "NamedTuple"::K"core" [K"tuple" names...]]
+            # NOTE: don't use `tuple` head, so an assignment expression as a value
+            # doesn't turn this into another named tuple.
+            [K"call" "tuple"::K"core" values...]
+        ]
+    end
+end
+
+function _merge_named_tuple(ctx, srcref, old, new)
+    if isnothing(old)
+        new
+    else
+        @ast ctx srcref [K"call" "merge"::K"top" old new]
+    end
+end
+
+function expand_named_tuple(ctx, ex, kws;
+                            field_name="named tuple field",
+                            element_name="named tuple element")
+    name_strs = Set{String}()
+    names = SyntaxList(ctx)
+    values = SyntaxList(ctx)
+    current_nt = nothing
+    for (i,kw) in enumerate(kws)
+        k = kind(kw)
+        appended_nt = nothing
+        name = nothing
+        if kind(k) == K"Identifier"
+            # x  ==>  x = x
+            name = to_symbol(ctx, kw)
+            value = kw
+        elseif k == K"="
+            # x = a
+            if kind(kw[1]) != K"Identifier" && kind(kw[1]) != K"Placeholder"
+                throw(LoweringError(kw[1], "invalid $field_name name"))
+            end
+            if kind(kw[2]) == K"..."
+                throw(LoweringError(kw[2], "`...` cannot be used in a value for a $field_name"))
+            end
+            name = to_symbol(ctx, kw[1])
+            value = kw[2]
+        elseif k == K"."
+            # a.x ==> x=a.x
+            if kind(kw[2]) != K"Symbol"
+                throw(LoweringError(kw, "invalid $element_name"))
+            end
+            name = to_symbol(ctx, kw[2])
+            value = kw
+        elseif k == K"call" && is_infix_op_call(kw) && numchildren(kw) == 3 &&
+                is_same_identifier_like(kw[1], "=>")
+            # a=>b   ==>  $a=b
+            appended_nt = _named_tuple_expr(ctx, kw, (kw[2],), (kw[3],))
+            nothing, nothing
+        elseif k == K"..."
+            # args...  ==> splat pairs
+            appended_nt = kw[1]
+            if isnothing(current_nt) && isempty(names)
+                # Must call merge to create NT from an initial splat
+                current_nt = _named_tuple_expr(ctx, ex, (), ())
+            end
+            nothing, nothing
+        else
+            throw(LoweringError(kw, "Invalid $element_name"))
+        end
+        if !isnothing(name)
+            if kind(name) == K"Symbol"
+                name_str = name.name_val
+                if name_str in name_strs
+                    throw(LoweringError(name, "Repeated $field_name name"))
+                end
+                push!(name_strs, name_str)
+            end
+            push!(names, name)
+            push!(values, value)
+        end
+        if !isnothing(appended_nt)
+            if !isempty(names)
+                current_nt = _merge_named_tuple(ctx, ex, current_nt,
+                                                _named_tuple_expr(ctx, ex, names, values))
+                empty!(names)
+                empty!(values)
+            end
+            current_nt = _merge_named_tuple(ctx, ex, current_nt, appended_nt)
+        end
+    end
+    if !isempty(names) || isnothing(current_nt)
+        current_nt = _merge_named_tuple(ctx, ex, current_nt,
+                                        _named_tuple_expr(ctx, ex, names, values))
+    end
+    @assert !isnothing(current_nt)
+    current_nt
+end
+
+#-------------------------------------------------------------------------------
+# Call expansion
+
+function expand_kw_call(ctx, srcref, farg, args, kws)
+    @ast ctx srcref [K"block"
+        func := farg
+        kw_container := expand_named_tuple(ctx, srcref, kws;
+                                           field_name="keyword argument",
+                                           element_name="keyword argument")
+        if all(kind(kw) == K"..." for kw in kws)
+            # In this case need to check kws nonempty at runtime
+            [K"if"
+                [K"call" "isempty"::K"top" kw_container]
+                [K"call" func args...]
+                [K"call" "kwcall"::K"core" kw_container func args...]
+            ]
+        else
+            [K"call" "kwcall"::K"core" kw_container func args...]
+        end
+    ]
+end
+
+# Special rule: Any becomes core.Any regardless of the module
+# scope, and don't need GC roots.
+function expand_ccall_argtype(ctx, ex)
+    if is_same_identifier_like(ex, "Any")
+        @ast ctx ex "Any"::K"core"
+    else
+        expand_forms_2(ctx, ex)
+    end
+end
+
+# Expand the (sym,lib) argument to ccall/cglobal
+function expand_C_library_symbol(ctx, ex)
+    expanded = expand_forms_2(ctx, ex)
+    if kind(ex) == K"tuple"
+        expanded = @ast ctx ex [K"static_eval"(meta=name_hint("function name and library expression"))
+            expanded
+        ]
+    end
+    return expanded
+end
+
+function expand_ccall(ctx, ex)
+    @assert kind(ex) == K"call" && is_core_ref(ex[1], "ccall")
+    if numchildren(ex) < 4
+        throw(LoweringError(ex, "too few arguments to ccall"))
+    end
+    cfunc_name = ex[2]
+    # Detect calling convention if present.
+    known_conventions = ("cdecl", "stdcall", "fastcall", "thiscall", "llvmcall")
+    cconv = if any(is_same_identifier_like(ex[3], id) for id in known_conventions)
+        ex[3]
+    end
+    if isnothing(cconv)
+        rt_idx = 3
+    else
+        rt_idx = 4
+        if numchildren(ex) < 5
+            throw(LoweringError(ex, "too few arguments to ccall with calling convention specified"))
+        end
+    end
+    return_type = ex[rt_idx]
+    arg_type_tuple = ex[rt_idx+1]
+    args = ex[rt_idx+2:end]
+    if kind(arg_type_tuple) != K"tuple"
+        msg = "ccall argument types must be a tuple; try `(T,)`"
+        if kind(return_type) == K"tuple"
+            throw(LoweringError(return_type, msg*" and check if you specified a correct return type"))
+        else
+            throw(LoweringError(arg_type_tuple, msg))
+        end
+    end
+    arg_types = children(arg_type_tuple)
+    vararg_type = nothing
+    if length(arg_types) >= 1
+        va = arg_types[end]
+        if kind(va) == K"..."
+            @chk numchildren(va) == 1
+            # Ok: vararg function
+            vararg_type = expand_ccall_argtype(ctx, va[1])
+            arg_types = arg_types[1:end-1]
+            if length(arg_types) === 0
+                throw(LoweringError(va, "C ABI prohibits vararg without one required argument"))
+            end
+        end
+    end
+    # todo: use multi-range errors here
+    if length(args) < length(arg_types)
+        throw(LoweringError(ex, "Too few arguments in ccall compared to argument types"))
+    elseif length(args) > length(arg_types) && isnothing(vararg_type)
+        throw(LoweringError(ex, "More arguments than types in ccall"))
+    end
+    sctx = with_stmts(ctx)
+    expanded_types = SyntaxList(ctx)
+    for argt in arg_types
+        if kind(argt) == K"..."
+            throw(LoweringError(argt, "only the trailing ccall argument type should have `...`"))
+        end
+        push!(expanded_types, expand_ccall_argtype(ctx, argt))
+    end
+    for _ in length(arg_types)+1:length(args)
+        push!(expanded_types, vararg_type)
+    end
+
+    # An improvement might be wrap the use of types in cconvert in a special
+    # K"global_scope" expression which modifies the scope resolution. This
+    # would at least make the rules self consistent if not pretty.
+    #
+    # One small improvement we make here is to emit temporaries for all the
+    # types used during expansion so at least we don't have their side effects
+    # more than once.
+    types_for_conv = SyntaxList(ctx)
+    for argt in expanded_types
+        push!(types_for_conv, emit_assign_tmp(sctx, argt))
+    end
+    gc_roots = SyntaxList(ctx)
+    unsafe_args  = SyntaxList(ctx)
+    for (i,arg) in enumerate(args)
+        if i > length(expanded_types)
+            raw_argt = expanded_types[end]
+            push!(expanded_types, raw_argt)
+            argt = types_for_conv[end]
+        else
+            raw_argt = expanded_types[i]
+            argt = types_for_conv[i]
+        end
+        exarg = expand_forms_2(ctx, arg)
+        if is_core_Any(raw_argt)
+            push!(unsafe_args, exarg)
+        else
+            cconverted_arg = emit_assign_tmp(sctx,
+                @ast ctx argt [K"call"
+                    "cconvert"::K"top"
+                    argt
+                    exarg
+                ]
+            )
+            push!(gc_roots, cconverted_arg)
+            push!(unsafe_args,
+                @ast ctx argt [K"call"
+                    "unsafe_convert"::K"top"
+                    argt
+                    cconverted_arg
+                ]
+            )
+        end
+    end
+    @ast ctx ex [K"block"
+        sctx.stmts...
+        [K"foreigncall"
+            expand_C_library_symbol(ctx, cfunc_name)
+            [K"static_eval"(meta=name_hint("ccall return type"))
+                expand_forms_2(ctx, return_type)
+            ]
+            [K"static_eval"(meta=name_hint("ccall argument type"))
+                [K"call"
+                    "svec"::K"core"
+                    expanded_types...
+                ]
+            ]
+            (isnothing(vararg_type) ? 0 : length(arg_types))::K"Integer"
+            if isnothing(cconv)
+                "ccall"::K"Symbol"
+            else
+                cconv=>K"Symbol"
+            end
+            unsafe_args...
+            gc_roots... # GC roots
+        ]
+    ]
+end
+
+function remove_kw_args!(ctx, args::SyntaxList)
+    kws = nothing
+    j = 0
+    num_parameter_blocks = 0
+    for i in 1:length(args)
+        arg = args[i]
+        k = kind(arg)
+        if k == K"="
+            if isnothing(kws)
+                kws = SyntaxList(ctx)
+            end
+            push!(kws, arg)
+        elseif k == K"parameters"
+            num_parameter_blocks += 1
+            if num_parameter_blocks > 1
+                throw(LoweringError(arg, "Cannot have more than one group of keyword arguments separated with `;`"))
+            end
+            if numchildren(arg) == 0
+                continue # ignore empty parameters (issue #18845)
+            end
+            if isnothing(kws)
+                kws = SyntaxList(ctx)
+            end
+            append!(kws, children(arg))
+        else
+            j += 1
+            if j < i
+                args[j] = args[i]
+            end
+        end
+    end
+    resize!(args, j)
+    return kws
+end
+
+function expand_call(ctx, ex)
+    farg = ex[1]
+    if is_core_ref(farg, "ccall")
+        return expand_ccall(ctx, ex)
+    elseif is_core_ref(farg, "cglobal")
+        @chk numchildren(ex) in 2:3  (ex, "cglobal must have one or two arguments")
+        return @ast ctx ex [K"call"
+            ex[1]
+            expand_C_library_symbol(ctx, ex[2])
+            if numchildren(ex) == 3
+                expand_forms_2(ctx, ex[3])
+            end
+        ]
+    end
+    args = copy(ex[2:end])
+    kws = remove_kw_args!(ctx, args)
+    if !isnothing(kws)
+        return expand_forms_2(ctx, expand_kw_call(ctx, ex, farg, args, kws))
+    end
+    if any(kind(arg) == K"..." for arg in args)
+        # Splatting, eg, `f(a, xs..., b)`
+        expand_splat(ctx, ex, expand_forms_2(ctx, farg), args)
+    elseif kind(farg) == K"Identifier" && farg.name_val == "include"
+        # world age special case
+        r = ssavar(ctx, ex)
+        @ast ctx ex [K"block"
+            [K"=" r [K"call"
+                expand_forms_2(ctx, farg)
+                expand_forms_2(ctx, args)...
+            ]]
+            (::K"latestworld_if_toplevel")
+            r
+        ]
+    else
+        @ast ctx ex [K"call"
+            expand_forms_2(ctx, farg)
+            expand_forms_2(ctx, args)...
+        ]
+    end
+end
+
+#-------------------------------------------------------------------------------
+
+function expand_dot(ctx, ex)
+    @chk numchildren(ex) in (1,2)  (ex, "`.` form requires either one or two children")
+
+    if numchildren(ex) == 1
+        # eg, `f = .+`
+        # Upstream TODO: Remove the (. +) representation and replace with use
+        # of DOTOP_FLAG? This way, `K"."` will be exclusively used for
+        # getproperty.
+        @ast ctx ex [K"call"
+            "BroadcastFunction"::K"top"
+            ex[1]
+        ]
+    elseif numchildren(ex) == 2
+        # eg, `x.a` syntax
+        rhs = ex[2]
+        # Required to support the possibly dubious syntax `a."b"`. See
+        # https://github.com/JuliaLang/julia/issues/26873
+        # Syntax edition TODO: reconsider this; possibly restrict to only K"String"?
+        if !(kind(rhs) == K"string" || is_leaf(rhs))
+            throw(LoweringError(rhs, "Unrecognized field access syntax"))
+        end
+        @ast ctx ex [K"call"
+            "getproperty"::K"top"
+            ex[1]
+            rhs
+        ]
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Expand for loops
+
+function expand_for(ctx, ex)
+    iterspecs = ex[1]
+
+    @chk kind(iterspecs) == K"iteration"
+
+    # Loop variables not declared `outer` are reassigned for each iteration of
+    # the innermost loop in case the user assigns them to something else.
+    # (Maybe we should filter these to remove vars not assigned in the loop?
+    # But that would ideally happen after the variable analysis pass, not
+    # during desugaring.)
+    copied_vars = SyntaxList(ctx)
+    for iterspec in iterspecs[1:end-1]
+        @chk kind(iterspec) == K"in"
+        lhs = iterspec[1]
+        if kind(lhs) != K"outer"
+            foreach_lhs_name(lhs) do var
+                push!(copied_vars, @ast ctx var [K"=" var var])
+            end
+        end
+    end
+
+    loop = ex[2]
+    for i in numchildren(iterspecs):-1:1
+        iterspec = iterspecs[i]
+        lhs = iterspec[1]
+
+        outer = kind(lhs) == K"outer"
+        lhs_local_defs = SyntaxList(ctx)
+        lhs_outer_defs = SyntaxList(ctx)
+        if outer
+            lhs = lhs[1]
+        end
+        foreach_lhs_name(lhs) do var
+            if outer
+                push!(lhs_outer_defs, @ast ctx var var)
+            else
+                push!(lhs_local_defs, @ast ctx var [K"local" var])
+            end
+        end
+
+        iter_ex = iterspec[2]
+        next = new_local_binding(ctx, iterspec, "next")
+        state = ssavar(ctx, iterspec, "state")
+        collection = ssavar(ctx, iter_ex, "collection")
+
+        # Assign iteration vars and next state
+        body = @ast ctx iterspec [K"block"
+            lhs_local_defs...
+            lower_tuple_assignment(ctx, iterspec, (lhs, state), next)
+            loop
+        ]
+
+        body = if i == numchildren(iterspecs)
+            # Innermost loop gets the continue label and copied vars
+            @ast ctx ex [K"break_block"
+                "loop_cont"::K"symbolic_label"
+                [K"let"(scope_type=:neutral)
+                     [K"block"
+                         copied_vars...
+                     ]
+                     body
+                ]
+            ]
+        else
+            # Outer loops get a scope block to contain the iteration vars
+            @ast ctx ex [K"scope_block"(scope_type=:neutral)
+                body
+            ]
+        end
+
+        loop = @ast ctx ex [K"block"
+            if outer
+                [K"assert"
+                    "require_existing_locals"::K"Symbol"
+                    lhs_outer_defs...
+                ]
+            end
+            [K"="(iter_ex) collection iter_ex]
+            # First call to iterate is unrolled
+            #   next = top.iterate(collection)
+            [K"="(iterspec) next [K"call" "iterate"::K"top" collection]]
+            [K"if"(iterspec) # if next !== nothing
+                [K"call"(iterspec)
+                    "not_int"::K"top"
+                    [K"call" "==="::K"core" next "nothing"::K"core"]
+                ]
+                [K"_do_while"(ex)
+                    [K"block"
+                        body
+                        # Advance iterator
+                        [K"="(iterspec) next [K"call" "iterate"::K"top" collection state]]
+                    ]
+                    [K"call"(iterspec)
+                        "not_int"::K"top"
+                        [K"call" "==="::K"core" next "nothing"::K"core"]
+                    ]
+                ]
+            ]
+        ]
+    end
+
+    @ast ctx ex [K"break_block" "loop_exit"::K"symbolic_label"
+        loop
+    ]
+end
+
+#-------------------------------------------------------------------------------
+# Expand try/catch/finally
+
+function match_try(ex)
+    @chk numchildren(ex) > 1 "Invalid `try` form"
+    try_ = ex[1]
+    catch_ = nothing
+    finally_ = nothing
+    else_ = nothing
+    for e in ex[2:end]
+        k = kind(e)
+        if k == K"catch" && isnothing(catch_)
+            @chk numchildren(e) == 2 "Invalid `catch` form"
+            catch_ = e
+        elseif k == K"else" && isnothing(else_)
+            @chk numchildren(e) == 1
+            else_ = e[1]
+        elseif k == K"finally" && isnothing(finally_)
+            @chk numchildren(e) == 1
+            finally_ = e[1]
+        else
+            throw(LoweringError(ex, "Invalid clause in `try` form"))
+        end
+    end
+    (try_, catch_, else_, finally_)
+end
+
+function expand_try(ctx, ex)
+    (try_, catch_, else_, finally_) = match_try(ex)
+
+    if !isnothing(finally_)
+        # TODO: check unmatched symbolic gotos in try.
+    end
+
+    try_body = @ast ctx try_ [K"scope_block"(scope_type=:neutral) try_]
+
+    if isnothing(catch_)
+        try_block = try_body
+    else
+        exc_var = catch_[1]
+        catch_block = catch_[2]
+        if !is_identifier_like(exc_var)
+            throw(LoweringError(exc_var, "Expected an identifier as exception variable"))
+        end
+        try_block = @ast ctx ex [K"trycatchelse"
+            try_body
+            [K"scope_block"(catch_, scope_type=:neutral)
+                if kind(exc_var) != K"Placeholder"
+                    [K"block"
+                        [K"="(exc_var) exc_var [K"call" current_exception::K"Value"]]
+                        catch_block
+                    ]
+                else
+                    catch_block
+                end
+            ]
+            if !isnothing(else_)
+                [K"scope_block"(else_, scope_type=:neutral) else_]
+            end
+        ]
+    end
+
+    if isnothing(finally_)
+        try_block
+    else
+        @ast ctx ex [K"tryfinally"
+            try_block
+            [K"scope_block"(finally_, scope_type=:neutral) finally_]
+        ]
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Expand local/global/const declarations
+
+# Create local/global declarations, and possibly type declarations for each name
+# on an assignment LHS.  Works recursively with complex left hand side
+# assignments containing tuple destructuring.  Eg, given
+#   (x::T, (y::U, z))
+#   strip out stmts = (local x) (decl x T) (local x) (decl y U) (local z)
+#   and return (x, (y, z))
+function make_lhs_decls(ctx, stmts, declkind, declmeta, ex, type_decls=true)
+    k = kind(ex)
+    if k == K"Identifier" || k == K"Value" && ex.value isa GlobalRef
+        # TODO: consider removing support for Expr(:global, GlobalRef(...)) and
+        # other Exprs that cannot be produced by the parser (tested by
+        # test/precompile.jl #50538).
+        if !isnothing(declmeta)
+            push!(stmts, makenode(ctx, ex, declkind, ex; meta=declmeta))
+        else
+            push!(stmts, makenode(ctx, ex, declkind, ex))
+        end
+    elseif k == K"Placeholder"
+        nothing
+    elseif (k === K"::" && numchildren(ex) === 2) || k in KSet"call curly where"
+        if type_decls
+            @chk numchildren(ex) == 2
+            name = ex[1]
+            if kind(name) == K"Identifier"
+                push!(stmts, makenode(ctx, ex, K"decl", name, ex[2]))
+            else
+                # TODO: Currently, this ignores the LHS in `_::T = val`.
+                # We should probably do one of the following:
+                # - Throw a LoweringError if that's not too breaking
+                # - `convert(T, rhs)::T` and discard the result which is what
+                #   `x::T = rhs` would do if x is never used again.
+                @chk kind(name) == K"Placeholder"
+                return
+            end
+        end
+        make_lhs_decls(ctx, stmts, declkind, declmeta, ex[1], type_decls)
+    elseif k == K"tuple" || k == K"parameters"
+        for e in children(ex)
+            make_lhs_decls(ctx, stmts, declkind, declmeta, e, type_decls)
+        end
+    else
+        throw(LoweringError(ex, "invalid kind $k in $declkind declaration"))
+    end
+end
+
+# Separate decls and assignments (which require re-expansion)
+# local x, (y=2), z ==> local x; local z; y = 2
+function expand_decls(ctx, ex)
+    declkind = kind(ex)
+    @assert declkind in KSet"local global"
+    declmeta = get(ex, :meta, nothing)
+    bindings = children(ex)
+    stmts = SyntaxList(ctx)
+    for binding in bindings
+        if is_prec_assignment(kind(binding))
+            @chk numchildren(binding) == 2
+            # expand_assignment will create the type decls
+            make_lhs_decls(ctx, stmts, declkind, declmeta, binding[1], false)
+            push!(stmts, expand_assignment(ctx, binding))
+        elseif is_sym_decl(binding) || kind(binding) in (K"Value", K"Placeholder")
+            make_lhs_decls(ctx, stmts, declkind, declmeta, binding, true)
+        elseif kind(binding) == K"function"
+            make_lhs_decls(ctx, stmts, declkind, declmeta, binding[1], false)
+            push!(stmts, expand_forms_2(ctx, binding))
+        else
+            throw(LoweringError(ex, "invalid syntax in variable declaration"))
+        end
+    end
+    makenode(ctx, ex, K"block", stmts)
+end
+
+# Iterate over the variable names assigned to from a "fancy assignment left hand
+# side" such as nested tuple destructuring, curlies, and calls.
+function foreach_lhs_name(f::Function, ex)
+    k = kind(ex)
+    if k == K"Placeholder"
+        # Ignored
+    elseif is_identifier_like(ex)
+        f(ex)
+    elseif (k === K"::" && numchildren(ex) === 2) || k in KSet"call curly where"
+        foreach_lhs_name(f, ex[1])
+    elseif k in KSet"tuple parameters"
+        for c in children(ex)
+            foreach_lhs_name(f, c)
+        end
+    end
+    return nothing
+end
+
+function expand_const_decl(ctx, ex)
+    k = kind(ex[1])
+    if k == K"global"
+        asgn = ex[1][1]
+        @chk (kind(asgn) == K"=" || kind(asgn) == K"function") (ex, "expected assignment after `const`")
+        globals = SyntaxList(ctx)
+        foreach_lhs_name(asgn[1]) do x
+            push!(globals, @ast ctx ex [K"global" x])
+        end
+        @ast ctx ex [K"block"
+            globals...
+            expand_assignment(ctx, asgn, true)
+        ]
+    elseif k == K"=" || k == K"function"
+        expand_assignment(ctx, ex[1], true)
+    elseif k == K"local"
+        throw(LoweringError(ex, "unsupported `const local` declaration"))
+    elseif k == K"Identifier" || k == K"Value"
+        # Expr(:const, v) where v is a Symbol or a GlobalRef is an unfortunate
+        # remnant from the days when const-ness was a flag that could be set on
+        # any global.  It creates a binding with kind PARTITION_KIND_UNDEF_CONST.
+        # TODO: deprecate and delete this "feature"
+        @chk numchildren(ex) == 1
+        @ast ctx ex [K"constdecl" ex[1]]
+    else
+        throw(LoweringError(ex, "expected assignment after `const`"))
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Expansion of function definitions
+
+function expand_function_arg(ctx, body_stmts, arg, is_last_arg, is_kw, arg_id)
+    ex = arg
+
+    if kind(ex) == K"="
+        default = ex[2]
+        ex = ex[1]
+    else
+        default = nothing
+    end
+
+    if kind(ex) == K"..."
+        if !is_last_arg
+            typmsg = is_kw ? "keyword" : "positional"
+            throw(LoweringError(arg, "`...` may only be used for the last $typmsg argument"))
+        end
+        @chk numchildren(ex) == 1
+        slurp_ex = ex
+        ex = ex[1]
+    else
+        slurp_ex = nothing
+    end
+
+    if kind(ex) == K"::"
+        @chk numchildren(ex) in (1,2)
+        if numchildren(ex) == 1
+            type = ex[1]
+            ex = @ast ctx ex "_"::K"Placeholder"
+        else
+            type = ex[2]
+            ex = ex[1]
+        end
+        if is_kw && !isnothing(slurp_ex)
+            throw(LoweringError(slurp_ex, "keyword argument with `...` may not be given a type"))
+        end
+    else
+        type = @ast ctx ex "Any"::K"core"
+    end
+    if !isnothing(slurp_ex)
+        type = @ast ctx slurp_ex [K"curly" "Vararg"::K"core" type]
+    end
+
+    k = kind(ex)
+    if k == K"tuple" && !is_kw
+        # Argument destructuring
+        is_nospecialize = getmeta(arg, :nospecialize, false)
+        name = new_local_binding(ctx, ex, "destructured_arg";
+                                 kind=:argument, is_nospecialize=is_nospecialize)
+        push!(body_stmts, @ast ctx ex [
+            K"local"(meta=CompileHints(:is_destructured_arg, true))
+            [K"=" ex name]
+        ])
+    elseif k == K"Placeholder"
+        # Lowering should be able to use placeholder args as rvalues internally,
+        # e.g. for kw method dispatch.  Duplicate positional placeholder names
+        # should be allowed.
+        name = if is_kw
+            @ast ctx ex ex=>K"Identifier"
+        else
+            new_local_binding(ctx, ex, "#arg$(string(arg_id))#"; kind=:argument)
+        end
+    elseif k == K"Identifier"
+        name = ex
+    else
+        throw(LoweringError(ex, is_kw ? "Invalid keyword name" : "Invalid function argument"))
+    end
+
+    return (name, type, default, !isnothing(slurp_ex))
+end
+
+# Expand `where` clause(s) of a function into (typevar_names, typevar_stmts) where
+# - `typevar_names` are the names of the type's type parameters
+# - `typevar_stmts` are a list of statements to define a `TypeVar` for each parameter
+#   name in `typevar_names`, with exactly one per `typevar_name`. Some of these
+#   may already have been emitted.
+# - `new_typevar_stmts` is the list of statements which needs to to be emitted
+#   prior to uses of `typevar_names`.
+function _split_wheres!(ctx, typevar_names, typevar_stmts, new_typevar_stmts, ex)
+    if kind(ex) == K"where" && numchildren(ex) == 2
+        vars_kind = kind(ex[2])
+        if vars_kind == K"_typevars"
+            append!(typevar_names, children(ex[2][1]))
+            append!(typevar_stmts, children(ex[2][2]))
+        else
+            params = vars_kind == K"braces" ? ex[2][1:end] : ex[2:2]
+            n_existing = length(new_typevar_stmts)
+            expand_typevars!(ctx, typevar_names, new_typevar_stmts, params)
+            append!(typevar_stmts, view(new_typevar_stmts, n_existing+1:length(new_typevar_stmts)))
+        end
+        _split_wheres!(ctx, typevar_names, typevar_stmts, new_typevar_stmts, ex[1])
+    else
+        ex
+    end
+end
+
+function method_def_expr(ctx, srcref, callex_srcref, method_table,
+                         typevar_names, arg_names, arg_types, body, ret_var=nothing)
+    @ast ctx srcref [K"block"
+        # metadata contains svec(types, sparms, location)
+        method_metadata := [K"call"(callex_srcref)
+            "svec"              ::K"core"
+            [K"call"
+                "svec"          ::K"core"
+                arg_types...
+            ]
+            [K"call"
+                "svec"          ::K"core"
+                typevar_names...
+            ]
+            ::K"SourceLocation"(callex_srcref)
+        ]
+        [K"method"
+            isnothing(method_table) ? "nothing"::K"core" : method_table
+            method_metadata
+            [K"lambda"(body, is_toplevel_thunk=false, toplevel_pure=false)
+                [K"block" arg_names...]
+                [K"block" typevar_names...]
+                body
+                ret_var  # might be `nothing` and hence removed
+            ]
+        ]
+        [K"removable" method_metadata]
+    ]
+end
+
+# Select static parameters which are used in function arguments `arg_types`, or
+# transitively used.
+#
+# The transitive usage check probably doesn't guarantee that the types are
+# inferable during dispatch as they may only be part of the bounds of another
+# type. Thus we might get false positives here but we shouldn't get false
+# negatives.
+function select_used_typevars(arg_types, typevar_names, typevar_stmts)
+    n_typevars = length(typevar_names)
+    @assert n_typevars == length(typevar_stmts)
+    # Filter typevar names down to those which are directly used in the arg list
+    typevar_used = Bool[any(contains_identifier(argtype, tn) for argtype in arg_types)
+                        for tn in typevar_names]
+    # _Or_ used transitively via other typevars. The following code
+    # computes this by incrementally coloring the graph of dependencies
+    # between type vars.
+    found_used = true
+    while found_used
+        found_used = false
+        for (i,tn) in enumerate(typevar_names)
+            if typevar_used[i]
+                continue
+            end
+            for j = i+1:n_typevars
+                if typevar_used[j] && contains_identifier(typevar_stmts[j], tn)
+                    found_used = true
+                    typevar_used[i] = true
+                    break
+                end
+            end
+        end
+    end
+    typevar_used
+end
+
+function check_all_typevars_used(arg_types, typevar_names, typevar_stmts)
+    selected = select_used_typevars(arg_types, typevar_names, typevar_stmts)
+    unused_typevar = findfirst(s->!s, selected)
+    if !isnothing(unused_typevar)
+        # Type variables which may be statically determined to be unused in
+        # any function argument and therefore can't be inferred during
+        # dispatch.
+        throw(LoweringError(typevar_names[unused_typevar],
+                            "Method definition declares type variable but does not use it in the type of any function parameter"))
+    end
+end
+
+# Return `typevar_names` which are used directly or indirectly in `arg_types`.
+function trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts)
+    typevar_used = select_used_typevars(arg_types, typevar_names, typevar_stmts)
+    trimmed_typevar_names = SyntaxList(ctx)
+    for (used,tn) in zip(typevar_used, typevar_names)
+        if used
+            push!(trimmed_typevar_names, tn)
+        end
+    end
+    return trimmed_typevar_names
+end
+
+function is_if_generated(ex)
+    kind(ex) == K"if" && kind(ex[1]) == K"generated"
+end
+
+# Return true if a function body contains a code generator from `@generated` in
+# the form `[K"if" [K"generated"] ...]`
+function is_generated(ex)
+    if is_if_generated(ex)
+        return true
+    elseif is_quoted(ex) || kind(ex) == K"function"
+        return false
+    else
+        return any(is_generated, children(ex))
+    end
+end
+
+function split_generated(ctx, ex, gen_part)
+    if is_leaf(ex)
+        ex
+    elseif is_if_generated(ex)
+        gen_part ? @ast(ctx, ex, [K"$" ex[2]]) : ex[3]
+    else
+        mapchildren(e->split_generated(ctx, e, gen_part), ctx, ex)
+    end
+end
+
+# Split @generated function body into two parts:
+# * The code generator
+# * The non-generated function body
+function expand_function_generator(ctx, srcref, callex_srcref, func_name, func_name_str, body, arg_names, typevar_names)
+    gen_body = if is_if_generated(body)
+        body[2] # Simple case - don't need interpolation when the whole body is generated
+    else
+        expand_quote(ctx, @ast ctx body [K"block" split_generated(ctx, body, true)])
+    end
+    gen_name_str = reserve_module_binding_i(ctx.mod,
+                        "#$(isnothing(func_name_str) ? "_" : func_name_str)@generator#")
+    gen_name = new_global_binding(ctx, body, gen_name_str, ctx.mod)
+
+    # Set up the arguments for the code generator
+    gen_arg_names = SyntaxList(ctx)
+    gen_arg_types = SyntaxList(ctx)
+    # Self arg
+    push!(gen_arg_names, new_local_binding(ctx, callex_srcref, "#self#"; kind=:argument))
+    push!(gen_arg_types, @ast ctx callex_srcref [K"function_type" gen_name])
+    # Macro expansion context arg
+    if kind(func_name) != K"Identifier"
+        TODO(func_name, "Which scope do we adopt for @generated generator `__context__` in this case?")
+    end
+    push!(gen_arg_names, adopt_scope(@ast(ctx, callex_srcref, "__context__"::K"Identifier"), func_name))
+    push!(gen_arg_types, @ast(ctx, callex_srcref, MacroContext::K"Value"))
+    # Trailing arguments to the generator are provided by the Julia runtime. They are:
+    # static_parameters... parent_function arg_types...
+    first_trailing_arg = length(gen_arg_names) + 1
+    append!(gen_arg_names, typevar_names)
+    append!(gen_arg_names, arg_names)
+    # Apply nospecialize to all arguments to prevent so much codegen and add
+    # Core.Any type for them
+    for i in first_trailing_arg:length(gen_arg_names)
+        gen_arg_names[i] = setmeta(gen_arg_names[i]; nospecialize=true)
+        push!(gen_arg_types, @ast ctx gen_arg_names[i] "Any"::K"core")
+    end
+    # Code generator definition
+    gen_func_method_defs = @ast ctx srcref [K"block"
+        [K"function_decl" gen_name]
+        [K"scope_block"(scope_type=:hard)
+            [K"method_defs"
+                gen_name
+                [K"block"
+                    method_def_expr(ctx, srcref, callex_srcref, nothing, SyntaxList(ctx),
+                                    gen_arg_names, gen_arg_types, gen_body, nothing)
+                ]
+            ]
+        ]
+    ]
+
+    function stub_argname(n::SyntaxTree, i)
+        if kind(n) == K"Identifier"
+            return n.name_val::String
+        elseif kind(n) == K"BindingId"
+            # flisp lowering calls these unnamed arguments "#unused#", but JL does
+            # not accept that as a repeated argument name
+            return "#arg" * string(i) * "#"
+        else @assert false "Unexpected argument kind: $(kind(n))" end
+    end
+
+    # Extract non-generated body
+    nongen_body = @ast ctx body [K"block"
+        # The Julia runtime associates the code generator with the
+        # non-generated method by adding this meta to the body. This feels like
+        # a hack though since the generator ultimately gets attached to the
+        # method rather than the CodeInfo which we're putting it inside.
+        [K"meta"
+            "generated"::K"Symbol"
+            # The following is code to be evaluated at top level and will wrap
+            # whatever code comes from the user's generator into an appropriate
+            # K"lambda" (+ K"with_static_parameters") suitable for lowering
+            # into a CodeInfo.
+            #
+            # todo: As isolated top-level code, we don't actually want to apply
+            # the normal scope rules of the surrounding function ... it should
+            # technically have scope resolved at top level.
+            [K"new"
+                GeneratedFunctionStub::K"Value" # Use stub type from JuliaLowering
+                ctx.expr_compat_mode::K"Value"
+                gen_name
+                # Truncate provenance to just the source file range, as this
+                # will live permanently in the IR and we probably don't want
+                # the full provenance tree and intermediate expressions
+                # (TODO: More truncation. We certainly don't want to store the
+                #  source file either.)
+                sourceref(srcref)::K"Value"
+                [K"call"
+                    "svec"::K"core"
+                    "#self#"::K"Symbol"
+                    (stub_argname(n,i)::K"Symbol"(n) for (i,n) in enumerate(arg_names[2:end]))...
+                ]
+                [K"call"
+                    "svec"::K"core"
+                    (n.name_val::K"Symbol"(n) for n in typevar_names)...
+                ]
+            ]
+        ]
+        split_generated(ctx, body, false)
+    ]
+
+    return gen_func_method_defs, nongen_body
+end
+
+# Generate a method for every number of allowed optional arguments
+# For example for `f(x, y=1, z=2)` we generate two additional methods
+# f(x) = f(x, 1, 2)
+# f(x, y) = f(x, y, 2)
+function optional_positional_defs!(ctx, method_stmts, srcref, callex,
+                                   method_table, typevar_names, typevar_stmts,
+                                   arg_names, arg_types, first_default,
+                                   arg_defaults)
+    # Replace placeholder arguments with variables - we need to pass them to
+    # the inner method for dispatch even when unused in the inner method body
+    def_arg_names = map(arg_names) do arg
+        kind(arg) == K"Placeholder" ?
+            new_local_binding(ctx, arg, arg.name_val; kind=:argument) :
+            arg
+    end
+    for def_idx = 1:length(arg_defaults)
+        first_omitted = first_default + def_idx - 1
+        trimmed_arg_names = def_arg_names[1:first_omitted-1]
+        # Call the full method directly if no arguments are reused in
+        # subsequent defaults. Otherwise conservatively call the function with
+        # only one additional default argument supplied and let the chain of
+        # function calls eventually lead to the full method.
+        any_args_in_trailing_defaults =
+            any(arg_defaults[def_idx+1:end]) do defaultval
+                contains_identifier(defaultval, def_arg_names[first_omitted:end])
+            end
+        last_used_default = any_args_in_trailing_defaults ?
+            def_idx : lastindex(arg_defaults)
+        body = @ast ctx callex [K"block"
+            [K"call"
+                trimmed_arg_names...
+                arg_defaults[def_idx:last_used_default]...
+            ]
+        ]
+        trimmed_arg_types = arg_types[1:first_omitted-1]
+        trimmed_typevar_names = trim_used_typevars(ctx, trimmed_arg_types,
+                                                   typevar_names, typevar_stmts)
+        # TODO: Ensure we preserve @nospecialize metadata in args
+        push!(method_stmts,
+              method_def_expr(ctx, srcref, callex, method_table,
+                              trimmed_typevar_names, trimmed_arg_names, trimmed_arg_types,
+                              body))
+    end
+end
+
+function scope_nest(ctx, names, values, body)
+    for (name, value) in Iterators.reverse(zip(names, values))
+        body = @ast ctx name [K"let" [K"block" [K"=" name value]]
+            body
+        ]
+    end
+    body
+end
+
+# Generate body function and `Core.kwcall` overloads for functions taking keywords.
+function keyword_function_defs(ctx, srcref, callex_srcref, name_str, typevar_names,
+                               typevar_stmts, new_typevar_stmts, arg_names,
+                               arg_types, has_slurp, first_default, arg_defaults,
+                               keywords, body, ret_var)
+    mangled_name = let n = isnothing(name_str) ? "_" : name_str
+        reserve_module_binding_i(ctx.mod, string(startswith(n, '#') ? "" : "#", n, "#"))
+    end
+    # TODO: Is the layer correct here? Which module should be the parent module
+    # of this body function?
+    layer = new_scope_layer(ctx)
+    body_func_name = adopt_scope(@ast(ctx, callex_srcref, mangled_name::K"Identifier"), layer)
+
+    kwcall_arg_names = SyntaxList(ctx)
+    kwcall_arg_types = SyntaxList(ctx)
+
+    push!(kwcall_arg_names, new_local_binding(ctx, callex_srcref, "#self#"; kind=:argument))
+    push!(kwcall_arg_types,
+        @ast ctx callex_srcref [K"call"
+            "typeof"::K"core"
+            "kwcall"::K"core"
+        ]
+    )
+    kws_arg = new_local_binding(ctx, keywords, "kws"; kind=:argument)
+    push!(kwcall_arg_names, kws_arg)
+    push!(kwcall_arg_types, @ast ctx keywords "NamedTuple"::K"core")
+
+    body_arg_names = SyntaxList(ctx)
+    body_arg_types = SyntaxList(ctx)
+    push!(body_arg_names, new_local_binding(ctx, body_func_name, "#self#"; kind=:argument))
+    push!(body_arg_types, @ast ctx body_func_name [K"function_type" body_func_name])
+
+    non_positional_typevars = typevar_names[map(!,
+        select_used_typevars(arg_types, typevar_names, typevar_stmts))]
+
+    kw_values = SyntaxList(ctx)
+    kw_defaults = SyntaxList(ctx)
+    kw_names = SyntaxList(ctx)
+    kw_name_syms = SyntaxList(ctx)
+    has_kw_slurp = false
+    kwtmp = new_local_binding(ctx, keywords, "kwtmp")
+    for (i,arg) in enumerate(children(keywords))
+        (aname, atype, default, is_slurp) =
+            expand_function_arg(ctx, nothing, arg, i == numchildren(keywords), true, i)
+        push!(kw_names, aname)
+        name_sym = @ast ctx aname aname=>K"Symbol"
+        push!(body_arg_names, aname)
+
+        if is_slurp
+            if !isnothing(default)
+                throw(LoweringError(arg, "keyword argument with `...` cannot have a default value"))
+            end
+            has_kw_slurp = true
+            push!(body_arg_types, @ast ctx arg [K"call" "pairs"::K"top" "NamedTuple"::K"core"])
+            push!(kw_defaults, @ast ctx arg [K"call" "pairs"::K"top" [K"call" "NamedTuple"::K"core"]])
+            continue
+        else
+            push!(body_arg_types, atype)
+        end
+
+        if isnothing(default)
+            default = @ast ctx arg [K"call"
+                "throw"::K"core"
+                [K"call"
+                    "UndefKeywordError"::K"core"
+                    name_sym
+                ]
+            ]
+        end
+        push!(kw_defaults, default)
+
+        # Extract the keyword argument value and check the type
+        push!(kw_values, @ast ctx arg [K"block"
+            [K"if"
+                [K"call" "isdefined"::K"core" kws_arg name_sym]
+                [K"block"
+                    kwval := [K"call" "getfield"::K"core" kws_arg name_sym]
+                    if is_core_Any(atype) || contains_identifier(atype, non_positional_typevars)
+                        # <- Do nothing in this branch because `atype` includes
+                        # something from the typevars and those static
+                        # parameters don't have values yet. Instead, the type
+                        # will be picked up when the body method is called and
+                        # result in a MethodError during dispatch rather than
+                        # the `TypeError` below.
+                        #
+                        # In principle we could probably construct the
+                        # appropriate UnionAll here in some simple cases but
+                        # the fully general case probably requires simulating
+                        # the runtime's dispatch machinery.
+                    else
+                        [K"if" [K"call" "isa"::K"core" kwval atype]
+                            "nothing"::K"core"
+                            [K"call"
+                                "throw"::K"core"
+                                [K"new" "TypeError"::K"core"
+                                    "keyword argument"::K"Symbol"
+                                    name_sym
+                                    atype
+                                    kwval
+                                ]
+                            ]
+                        ]
+                    end
+                    # Compiler performance hack: we reuse the kwtmp slot in all
+                    # keyword if blocks rather than using the if block in value
+                    # position. This cuts down on the number of slots required
+                    # https://github.com/JuliaLang/julia/pull/44333
+                    [K"=" kwtmp kwval]
+                ]
+                [K"=" kwtmp default]
+            ]
+            kwtmp
+        ])
+
+        push!(kw_name_syms, name_sym)
+    end
+    append!(body_arg_names, arg_names)
+    append!(body_arg_types, arg_types)
+
+    first_default += length(kwcall_arg_names)
+    append!(kwcall_arg_names, arg_names)
+    append!(kwcall_arg_types, arg_types)
+
+    kwcall_mtable = @ast(ctx, srcref, "nothing"::K"core")
+
+    kwcall_method_defs = SyntaxList(ctx)
+    if !isempty(arg_defaults)
+        # Construct kwcall overloads which forward default positional args on
+        # to the main kwcall overload.
+        optional_positional_defs!(ctx, kwcall_method_defs, srcref, callex_srcref,
+                                  kwcall_mtable, typevar_names, typevar_stmts,
+                                  kwcall_arg_names, kwcall_arg_types, first_default, arg_defaults)
+    end
+
+    positional_forwarding_args = if has_slurp
+        a = copy(arg_names)
+        a[end] = @ast ctx a[end] [K"..." a[end]]
+        a
+    else
+        arg_names
+    end
+
+    #--------------------------------------------------
+    # Construct the "main kwcall overload" which unpacks keywords and checks
+    # their consistency before dispatching to the user's code in the body
+    # method.
+    defaults_depend_on_kw_names = any(val->contains_identifier(val, kw_names), kw_defaults)
+    defaults_have_assign = any(val->contains_unquoted(e->kind(e) == K"=", val), kw_defaults)
+    use_ssa_kw_temps = !defaults_depend_on_kw_names && !defaults_have_assign
+
+    if use_ssa_kw_temps
+        kw_val_stmts = SyntaxList(ctx)
+        for n in kw_names
+            # If not using slots for the keyword argument values, still declare
+            # them for reflection purposes.
+            push!(kw_val_stmts, @ast ctx n [K"local" n])
+        end
+        kw_val_vars = SyntaxList(ctx)
+        for val in kw_values
+            v = emit_assign_tmp(kw_val_stmts, ctx, val, "kwval")
+            push!(kw_val_vars, v)
+        end
+    else
+        kw_val_vars = kw_names
+    end
+
+    kwcall_body_tail = @ast ctx keywords [K"block"
+        if has_kw_slurp
+            # Slurp remaining keywords into last arg
+            remaining_kws := [K"call"
+                "pairs"::K"top"
+                if isempty(kw_name_syms)
+                    kws_arg
+                else
+                    [K"call"
+                        "structdiff"::K"top"
+                        kws_arg
+                        [K"curly"
+                            "NamedTuple"::K"core"
+                            [K"tuple" kw_name_syms...]
+                        ]
+                    ]
+                end
+            ]
+        else
+            # Check that there's no unexpected keywords
+            [K"if"
+                [K"call"
+                    "isempty"::K"top"
+                    [K"call"
+                        "diff_names"::K"top"
+                        [K"call" "keys"::K"top" kws_arg]
+                        [K"tuple" kw_name_syms...]
+                    ]
+                ]
+                "nothing"::K"core"
+                [K"call"
+                    "kwerr"::K"top"
+                    kws_arg
+                    positional_forwarding_args...
+                ]
+            ]
+        end
+        [K"call"
+            body_func_name
+            kw_val_vars...
+            if has_kw_slurp
+                remaining_kws
+            end
+            positional_forwarding_args...
+        ]
+    ]
+    kwcall_body = if use_ssa_kw_temps
+        @ast ctx keywords [K"block"
+            kw_val_stmts...
+            kwcall_body_tail
+        ]
+    else
+        scope_nest(ctx, kw_names, kw_values, kwcall_body_tail)
+    end
+    main_kwcall_typevars = trim_used_typevars(ctx, kwcall_arg_types, typevar_names, typevar_stmts)
+    push!(kwcall_method_defs,
+          method_def_expr(ctx, srcref, callex_srcref, kwcall_mtable,
+                          main_kwcall_typevars, kwcall_arg_names, kwcall_arg_types, kwcall_body))
+
+    # Check kws of body method
+    check_all_typevars_used(body_arg_types, typevar_names, typevar_stmts)
+
+    kw_func_method_defs = @ast ctx srcref [K"block"
+        [K"function_decl" body_func_name]
+        [K"scope_block"(scope_type=:hard)
+            [K"method_defs"
+                body_func_name
+                [K"block"
+                    new_typevar_stmts...
+                    method_def_expr(ctx, srcref, callex_srcref, "nothing"::K"core",
+                                    typevar_names, body_arg_names, body_arg_types,
+                                    [K"block"
+                                        [K"meta" "nkw"::K"Symbol" numchildren(keywords)::K"Integer"]
+                                        body
+                                    ],
+                                    ret_var)
+                ]
+            ]
+        ]
+        [K"scope_block"(scope_type=:hard)
+            [K"method_defs"
+                "nothing"::K"core"
+                [K"block"
+                    new_typevar_stmts...
+                    kwcall_method_defs...
+                ]
+            ]
+        ]
+    ]
+
+    #--------------------------------------------------
+    # Body for call with no keywords
+    body_for_positional_args_only = if defaults_depend_on_kw_names
+        scope_nest(ctx, kw_names, kw_defaults,
+            @ast ctx srcref [K"call" body_func_name
+                kw_names...
+                positional_forwarding_args...
+            ]
+        )
+    else
+        @ast ctx srcref [K"call" body_func_name
+            kw_defaults...
+            positional_forwarding_args...
+        ]
+    end
+
+    kw_func_method_defs, body_for_positional_args_only
+end
+
+# Check valid identifier/function names
+function is_invalid_func_name(ex)
+    k = kind(ex)
+    if k == K"Identifier"
+        name = ex.name_val
+    elseif k == K"." && numchildren(ex) == 2 && kind(ex[2]) == K"Symbol"
+        # `function A.f(x,y) ...`
+        name = ex[2].name_val
+    else
+        return true
+    end
+    return is_ccall_or_cglobal(name)
+end
+
+function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body=identity; doc_only=false)
+    @chk numchildren(ex) in (1,2)
+    name = ex[1]
+    if numchildren(ex) == 1 && is_identifier_like(name)
+        # Function declaration with no methods
+        if is_invalid_func_name(name)
+            throw(LoweringError(name, "Invalid function name"))
+        end
+        return @ast ctx ex [K"block"
+            [K"function_decl" name]
+            name
+        ]
+    end
+
+    typevar_names = SyntaxList(ctx)
+    typevar_stmts = SyntaxList(ctx)
+    new_typevar_stmts = SyntaxList(ctx)
+    if kind(name) == K"where"
+        # `where` vars end up in two places
+        # 1. Argument types - the `T` in `x::T` becomes a `TypeVar` parameter in
+        #    the method sig, eg, `function f(x::T) where T ...`.  These define the
+        #    static parameters of the method.
+        # 2. In the method body - either explicitly or implicitly via the method
+        #    return type or default arguments - where `T` turns up as the *name* of
+        #    a special slot of kind ":static_parameter"
+        name = _split_wheres!(ctx, typevar_names, typevar_stmts, new_typevar_stmts, name)
+    end
+
+    return_type = nothing
+    if kind(name) == K"::"
+        @chk numchildren(name) == 2
+        return_type = name[2]
+        name = name[1]
+    end
+
+    callex = if kind(name) == K"call"
+        name
+    elseif kind(name) == K"tuple"
+        # Anonymous function syntax `function (x,y) ... end`
+        @ast ctx name [K"call"
+            "#anon#"::K"Placeholder"
+            children(name)...
+        ]
+    elseif kind(name) == K"dotcall"
+        throw(LoweringError(name, "Cannot define function using `.` broadcast syntax"))
+    else
+        throw(LoweringError(name, "Bad function definition"))
+    end
+
+    # Fixup for `new` constructor sigs if necessary
+    callex = rewrite_call(callex)
+
+    # Construct method argument lists of names and types.
+    #
+    # First, match the "self" argument: In the method signature, each function
+    # gets a self argument name+type. For normal generic functions, this is a
+    # singleton and subtype of `Function`. But objects of any type can be made
+    # callable when the self argument is explicitly given using `::` syntax in
+    # the function name.
+    name = callex[1]
+    bare_func_name = nothing
+    name_str = nothing
+    doc_obj = nothing
+    self_name = nothing
+    if kind(name) == K"::"
+        # Self argument is specified by user
+        if numchildren(name) == 1
+            # function (::T)() ...
+            self_type = name[1]
+        else
+            # function (f::T)() ...
+            @chk numchildren(name) == 2
+            self_name = name[1]
+            self_type = name[2]
+        end
+        doc_obj = self_type
+    elseif kind(name) == K"curly"
+        @chk numchildren(name) >= 2
+        self_type = @ast ctx ex [K"function_type"
+                                 expand_forms_2(ctx, expand_curly(ctx, name))]
+        name = name[1]
+        is_invalid_func_name(name) && throw(LoweringError(name, "Invalid function name"))
+        doc_obj = name
+        name_str = get(kind(name) == K"." ? name[2] : name, :name_val, nothing)
+    else
+        if kind(name) == K"Placeholder"
+            # Anonymous function. In this case we may use an ssavar for the
+            # closure's value.
+            name_str = name.name_val
+            name = ssavar(ctx, name, name.name_val)
+            bare_func_name = name
+        elseif is_invalid_func_name(name)
+            throw(LoweringError(name, "Invalid function name"))
+        elseif is_identifier_like(name)
+            # Add methods to a global `Function` object, or local closure
+            # type function f() ...
+            name_str = name.name_val
+            bare_func_name = name
+        else
+            # Add methods to an existing Function
+            # function A.B.f() ...
+            if kind(name) == K"." && kind(name[2]) == K"Symbol"
+                name_str = name[2].name_val
+            end
+        end
+        doc_obj = name # todo: can closures be documented?
+        self_type = @ast ctx name [K"function_type" name]
+    end
+    # Add self argument
+    if isnothing(self_name)
+        # TODO: #self# should be symbolic rather than a binding for the cases
+        # where it's reused in `optional_positional_defs!` because it's
+        # probably unsafe to reuse bindings for multiple different methods in
+        # the presence of closure captures or other global binding properties.
+        #
+        # This is reminiscent of the need to renumber SSA vars in certain cases
+        # in the flisp implementation.
+        self_name = new_local_binding(ctx, name, "#self#"; kind=:argument)
+    end
+
+    # Expand remaining argument names and types
+    arg_names = SyntaxList(ctx)
+    arg_types = SyntaxList(ctx)
+    push!(arg_names, self_name)
+    push!(arg_types, self_type)
+    args = callex[2:end]
+    keywords = nothing
+    if !isempty(args) && kind(args[end]) == K"parameters"
+        keywords = args[end]
+        args = args[1:end-1]
+        if numchildren(keywords) == 0
+            keywords = nothing
+        end
+    end
+    body_stmts = SyntaxList(ctx)
+    has_slurp = false
+    first_default = 0 # index into arg_names/arg_types
+    arg_defaults = SyntaxList(ctx)
+    for (i,arg) in enumerate(args)
+        (aname, atype, default, is_slurp) =
+            expand_function_arg(ctx, body_stmts, arg, i == length(args), false, i)
+        has_slurp |= is_slurp
+        push!(arg_names, aname)
+
+        # TODO: Ideally, ensure side effects of evaluating arg_types only
+        # happen once - we should create an ssavar if there's any following
+        # defaults. (flisp lowering doesn't ensure this either). Beware if
+        # fixing this that optional_positional_defs! depends on filtering the
+        # *symbolic* representation of arg_types.
+        push!(arg_types, atype)
+
+        if isnothing(default)
+            if !isempty(arg_defaults) && !is_slurp
+                # TODO: Referring to multiple pieces of syntax in one error message is necessary.
+                # TODO: Poison ASTs with error nodes and continue rather than immediately throwing.
+                #
+                # We should make something like the following kind of thing work!
+                # arg_defaults[1] = @ast_error ctx arg_defaults[1] """
+                #     Positional arguments with defaults must occur at the end.
+                #
+                #     We found a [non-optional position argument]($arg) *after*
+                #     one with a [default value]($(first(arg_defaults)))
+                # """
+                #
+                throw(LoweringError(args[first_default-1], "optional positional arguments must occur at end"))
+            end
+        else
+            if isempty(arg_defaults)
+                first_default = i + 1 # Offset for self argument
+            end
+            push!(arg_defaults, default)
+        end
+    end
+
+    if doc_only
+        # The (doc str (call ...)) form requires method signature lowering, but
+        # does not execute or define any method, so we can't use function_type.
+        # This is a bit of a messy case in the docsystem which we'll hopefully
+        # be able to delete at some point.
+        sig_stmts = SyntaxList(ctx)
+        @assert first_default != 1 && length(arg_types) >= 1
+        last_required = first_default === 0 ? length(arg_types) : first_default - 1
+        for i in last_required:length(arg_types)
+            push!(sig_stmts, @ast(ctx, ex, [K"curly" "Tuple"::K"core" arg_types[2:i]...]))
+        end
+        sig_type = @ast ctx ex [K"where"
+            [K"curly" "Union"::K"core" sig_stmts...]
+            [K"_typevars" [K"block" typevar_names...] [K"block"]]
+        ]
+        out = @ast ctx docs [K"block"
+            typevar_stmts...
+            [K"call"
+                bind_static_docs!::K"Value"
+                (kind(name) == K"." ? name[1] : ctx.mod::K"Value")
+                name_str::K"Symbol"
+                docs[1]
+                ::K"SourceLocation"(ex)
+                sig_type
+            ]
+        ]
+        return expand_forms_2(ctx, out)
+    end
+
+    if !isnothing(return_type)
+        ret_var = ssavar(ctx, return_type, "return_type")
+        push!(body_stmts, @ast ctx return_type [K"=" ret_var return_type])
+    else
+        ret_var = nothing
+    end
+
+    body = rewrite_body(ex[2])
+    if !isempty(body_stmts)
+        body = @ast ctx body [
+            K"block"
+            body_stmts...
+            body
+        ]
+    end
+
+    gen_func_method_defs = nothing
+    if is_generated(body)
+        gen_func_method_defs, body =
+            expand_function_generator(ctx, ex, callex, name, name_str, body, arg_names, typevar_names)
+
+    end
+
+    if isnothing(keywords)
+        kw_func_method_defs = nothing
+        # NB: The following check seems good as it statically catches any useless
+        # static parameters which can't be bound during method invocation.
+        # However it wasn't previously an error so we might need to reduce it
+        # to a warning?
+        check_all_typevars_used(arg_types, typevar_names, typevar_stmts)
+        main_typevar_names = typevar_names
+    else
+        # Rewrite `body` here so that the positional-only versions dispatch there.
+        kw_func_method_defs, body =
+            keyword_function_defs(ctx, ex, callex, name_str, typevar_names, typevar_stmts,
+                                  new_typevar_stmts, arg_names, arg_types, has_slurp,
+                                  first_default, arg_defaults, keywords, body, ret_var)
+        # The main function (but without keywords) needs its typevars trimmed,
+        # as some of them may be for the keywords only.
+        main_typevar_names = trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts)
+        # ret_var is used only in the body method
+        ret_var = nothing
+    end
+
+    method_table_val = nothing # TODO: method overlays
+    method_table = isnothing(method_table_val)            ?
+                   @ast(ctx, callex, "nothing"::K"core")  :
+                   ssavar(ctx, ex, "method_table")
+    method_stmts = SyntaxList(ctx)
+
+    if !isempty(arg_defaults)
+        optional_positional_defs!(ctx, method_stmts, ex, callex,
+                                  method_table, typevar_names, typevar_stmts,
+                                  arg_names, arg_types, first_default, arg_defaults)
+    end
+
+    # The method with all non-default arguments
+    push!(method_stmts,
+          method_def_expr(ctx, ex, callex, method_table, main_typevar_names, arg_names,
+                          arg_types, body, ret_var))
+
+    if !isnothing(docs)
+        method_stmts[end] = @ast ctx docs [K"block"
+            method_metadata := method_stmts[end]
+            [K"call"
+                bind_docs!::K"Value"
+                doc_obj
+                docs[1]
+                method_metadata
+            ]
+        ]
+    end
+
+    @ast ctx ex [K"block"
+        if !isnothing(bare_func_name)
+            # Need the main function type created here before running any code
+            # in kw_func_method_defs
+            [K"function_decl"(bare_func_name) bare_func_name]
+        end
+        gen_func_method_defs
+        kw_func_method_defs
+        [K"scope_block"(scope_type=:hard)
+            [K"method_defs"
+                isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name
+                [K"block"
+                    new_typevar_stmts...
+                    if !isnothing(method_table_val)
+                        [K"=" method_table method_table_val]
+                    end
+                    method_stmts...
+                ]
+            ]
+        ]
+        [K"removable"
+            isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name
+        ]
+    ]
+end
+
+#-------------------------------------------------------------------------------
+# Anon function syntax
+function expand_arrow_arglist(ctx, arglist, arrowname)
+    k = kind(arglist)
+    if k == K"where"
+        @ast ctx arglist [K"where"
+            expand_arrow_arglist(ctx, arglist[1], arrowname)
+            arglist[2]
+        ]
+    else
+        # The arglist can sometimes be parsed as a block, or something else, and
+        # fixing this is extremely awkward when nested inside `where`. See
+        # https://github.com/JuliaLang/JuliaSyntax.jl/pull/522
+        if k == K"block"
+            @chk numchildren(arglist) == 2
+            arglist = @ast ctx arglist [K"tuple"
+                arglist[1]
+                [K"parameters" arglist[2]]
+            ]
+        elseif k != K"tuple"
+            arglist = @ast ctx arglist [K"tuple"
+                arglist[1]
+            ]
+        end
+        @ast ctx arglist [K"call"
+            arrowname::K"Placeholder"
+            children(arglist)...
+        ]
+    end
+end
+
+function expand_arrow(ctx, ex)
+    @chk numchildren(ex) == 2
+    expand_forms_2(ctx,
+        @ast ctx ex [K"function"
+            expand_arrow_arglist(ctx, ex[1], string(kind(ex)))
+            ex[2]
+        ]
+    )
+end
+
+function expand_opaque_closure(ctx, ex)
+    arg_types_spec = ex[1]
+    return_lower_bound = ex[2]
+    return_upper_bound = ex[3]
+    allow_partial = ex[4]
+    func_expr = ex[5]
+    @chk kind(func_expr) == K"->"
+    @chk numchildren(func_expr) == 2
+    args = func_expr[1]
+    @chk kind(args) == K"tuple"
+    check_no_parameters(ex, args)
+
+    arg_names = SyntaxList(ctx)
+    arg_types = SyntaxList(ctx)
+    push!(arg_names, new_local_binding(ctx, args, "#self#"; kind=:argument))
+    body_stmts = SyntaxList(ctx)
+    is_va = false
+    for (i, arg) in enumerate(children(args))
+        (aname, atype, default, is_slurp) =
+            expand_function_arg(ctx, body_stmts, arg, i == numchildren(args), false, i)
+        is_va |= is_slurp
+        push!(arg_names, aname)
+        push!(arg_types, atype)
+        if !isnothing(default)
+            throw(LoweringError(default, "Default positional arguments cannot be used in an opaque closure"))
+        end
+    end
+
+    nargs = length(arg_names) - 1 # ignoring #self#
+
+    @ast ctx ex [K"_opaque_closure"
+        ssavar(ctx, ex, "opaque_closure_id") # only a placeholder. Must be :local
+        if is_core_nothing(arg_types_spec)
+            [K"curly"
+                "Tuple"::K"core"
+                arg_types...
+            ]
+        else
+            arg_types_spec
+        end
+        is_core_nothing(return_lower_bound) ? [K"curly" "Union"::K"core"] : return_lower_bound
+        is_core_nothing(return_upper_bound) ? "Any"::K"core" : return_upper_bound
+        allow_partial
+        nargs::K"Integer"
+        is_va::K"Bool"
+        ::K"SourceLocation"(func_expr)
+        [K"lambda"(func_expr, is_toplevel_thunk=false, toplevel_pure=false)
+            [K"block" arg_names...]
+            [K"block"]
+            [K"block"
+                body_stmts...
+                func_expr[2]
+            ]
+        ]
+    ]
+end
+
+#-------------------------------------------------------------------------------
+# Expand macro definitions
+
+function _make_macro_name(ctx, ex)
+    k = kind(ex)
+    if k == K"Identifier" || k == K"Symbol"
+        name = mapleaf(ctx, ex, k)
+        name.name_val = "@$(ex.name_val)"
+        name
+    elseif is_valid_modref(ex)
+        @chk numchildren(ex) == 2
+        @ast ctx ex [K"." ex[1] _make_macro_name(ctx, ex[2])]
+    else
+        throw(LoweringError(ex, "invalid macro name"))
+    end
+end
+
+# flisp: expand-macro-def
+function expand_macro_def(ctx, ex)
+    @chk numchildren(ex) >= 1 (ex,"invalid macro definition")
+    if numchildren(ex) == 1
+        name = ex[1]
+        # macro with zero methods
+        # `macro m end`
+        return @ast ctx ex [K"function" _make_macro_name(ctx, name)]
+    end
+    # TODO: Making this manual pattern matching robust is such a pain!!!
+    sig = ex[1]
+    @chk (kind(sig) == K"call" && numchildren(sig) >= 1) (sig, "invalid macro signature")
+    name = sig[1]
+    args = remove_empty_parameters(children(sig))
+    @chk kind(args[end]) != K"parameters" (args[end], "macros cannot accept keyword arguments")
+    scope_ref = kind(name) == K"." ? name[1] : name
+    if ctx.expr_compat_mode
+        @ast ctx ex [K"function"
+            [K"call"(sig)
+                _make_macro_name(ctx, name)
+                [K"::"
+                    # TODO: should we be adopting the scope of the K"macro" expression itself?
+                    adopt_scope(@ast(ctx, sig, "__source__"::K"Identifier"), scope_ref)
+                    LineNumberNode::K"Value"
+                ]
+                [K"::"
+                    adopt_scope(@ast(ctx, sig, "__module__"::K"Identifier"), scope_ref)
+                    Module::K"Value"
+                ]
+                map(e->_apply_nospecialize(ctx, e), args[2:end])...
+            ]
+            ex[2]
+        ]
+    else
+        @ast ctx ex [K"function"
+            [K"call"(sig)
+                _make_macro_name(ctx, name)
+                [K"::"
+                    adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), scope_ref)
+                    MacroContext::K"Value"
+                ]
+                # flisp: We don't mark these @nospecialize because all arguments to
+                # new macros will be of type SyntaxTree
+                args[2:end]...
+            ]
+            ex[2]
+        ]
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Expand type definitions
+
+# Match `x<:T<:y` etc, returning `(name, lower_bound, upper_bound)`
+# A bound is `nothing` if not specified
+function analyze_typevar(ctx, ex)
+    k = kind(ex)
+    if k == K"Identifier"
+        (ex, nothing, nothing)
+    elseif k == K"comparison" && numchildren(ex) == 5
+        kind(ex[3]) == K"Identifier" || throw(LoweringError(ex[3], "expected type name"))
+        if !((kind(ex[2]) == K"Identifier" && ex[2].name_val == "<:") &&
+             (kind(ex[4]) == K"Identifier" && ex[4].name_val == "<:"))
+            throw(LoweringError(ex, "invalid type bounds"))
+        end
+        # a <: b <: c
+        (ex[3], ex[1], ex[5])
+    elseif k == K"<:" && numchildren(ex) == 2
+        kind(ex[1]) == K"Identifier" || throw(LoweringError(ex[1], "expected type name"))
+        (ex[1], nothing, ex[2])
+    elseif k == K">:" && numchildren(ex) == 2
+        kind(ex[2]) == K"Identifier" || throw(LoweringError(ex[2], "expected type name"))
+        (ex[1], ex[2], nothing)
+    else
+        throw(LoweringError(ex, "expected type name or type bounds"))
+    end
+end
+
+function bounds_to_TypeVar(ctx, srcref, bounds)
+    name, lb, ub = bounds
+    # Generate call to one of
+    # TypeVar(name)
+    # TypeVar(name, ub)
+    # TypeVar(name, lb, ub)
+    @ast ctx srcref [K"call"
+        "TypeVar"::K"core"
+        name=>K"Symbol"
+        lb
+        if isnothing(ub) && !isnothing(lb)
+            "Any"::K"core"
+        else
+            ub
+        end
+    ]
+end
+
+# Analyze type signatures such as `A{C} <: B where C`
+#
+# Return (name, typevar_names, typevar_stmts, supertype) where
+# - `name` is the name of the type
+# - `supertype` is the super type of the type
+function analyze_type_sig(ctx, ex)
+    k = kind(ex)
+    if k == K"Identifier"
+        name = ex
+        type_params = ()
+        supertype = @ast ctx ex "Any"::K"core"
+    elseif k == K"curly" && numchildren(ex) >= 1 && kind(ex[1]) == K"Identifier"
+        # name{type_params}
+        name = ex[1]
+        type_params = ex[2:end]
+        supertype = @ast ctx ex "Any"::K"core"
+    elseif k == K"<:" && numchildren(ex) == 2
+        if kind(ex[1]) == K"Identifier"
+            name = ex[1]
+            type_params = ()
+            supertype = ex[2]
+        elseif kind(ex[1]) == K"curly" && numchildren(ex[1]) >= 1 && kind(ex[1][1]) == K"Identifier"
+            name = ex[1][1]
+            type_params = ex[1][2:end]
+            supertype = ex[2]
+        end
+    end
+    @isdefined(name) || throw(LoweringError(ex, "invalid type signature"))
+
+    return (name, type_params, supertype)
+end
+
+# Expand type_params into (typevar_names, typevar_stmts) where
+# - `typevar_names` are the names of the type's type parameters
+# - `typevar_stmts` are a list of statements to define a `TypeVar` for each parameter
+#   name in `typevar_names`, to be emitted prior to uses of `typevar_names`.
+#   There is exactly one statement from each typevar.
+function expand_typevars!(ctx, typevar_names, typevar_stmts, type_params)
+    for param in type_params
+        bounds = analyze_typevar(ctx, param)
+        n = bounds[1]
+        push!(typevar_names, n)
+        push!(typevar_stmts, @ast ctx param [K"block"
+            [K"local" n]
+            [K"=" n bounds_to_TypeVar(ctx, param, bounds)]
+        ])
+    end
+    return nothing
+end
+
+function expand_typevars(ctx, type_params)
+    typevar_names = SyntaxList(ctx)
+    typevar_stmts = SyntaxList(ctx)
+    expand_typevars!(ctx, typevar_names, typevar_stmts, type_params)
+    return (typevar_names, typevar_stmts)
+end
+
+function expand_abstract_or_primitive_type(ctx, ex)
+    is_abstract = kind(ex) == K"abstract"
+    if is_abstract
+        @chk numchildren(ex) == 1
+    else
+        @assert kind(ex) == K"primitive"
+        @chk numchildren(ex) == 2
+        nbits = ex[2]
+    end
+    name, type_params, supertype = analyze_type_sig(ctx, ex[1])
+    typevar_names, typevar_stmts = expand_typevars(ctx, type_params)
+    newtype_var = ssavar(ctx, ex, "new_type")
+    @ast ctx ex [K"block"
+        [K"scope_block"(scope_type=:hard)
+            [K"block"
+                [K"local" name]
+                [K"always_defined" name]
+                typevar_stmts...
+                [K"="
+                    newtype_var
+                    [K"call"
+                        (is_abstract ? "_abstracttype" : "_primitivetype")::K"core"
+                        ctx.mod::K"Value"
+                        name=>K"Symbol"
+                        [K"call" "svec"::K"core" typevar_names...]
+                        if !is_abstract
+                            nbits
+                        end
+                    ]
+                ]
+                [K"=" name newtype_var]
+                [K"call" "_setsuper!"::K"core" newtype_var supertype]
+                [K"call" "_typebody!"::K"core" false::K"Bool" name]
+            ]
+        ]
+        [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ]
+        [K"global" name]
+        [K"if"
+            [K"&&"
+                [K"call"
+                   "isdefinedglobal"::K"core"
+                   ctx.mod::K"Value"
+                   name=>K"Symbol"
+                   false::K"Bool"]
+                [K"call" "_equiv_typedef"::K"core" name newtype_var]
+            ]
+            nothing_(ctx, ex)
+            [K"constdecl" name newtype_var]
+        ]
+        nothing_(ctx, ex)
+    ]
+end
+
+function _match_struct_field(x0)
+    type=nothing
+    docs=nothing
+    atomic=false
+    _const=false
+    x = x0
+    while true
+        k = kind(x)
+        if k == K"Identifier"
+            return (name=x, type=type, atomic=atomic, _const=_const, docs=docs)
+        elseif k == K"::" && numchildren(x) == 2
+            isnothing(type) || throw(LoweringError(x0, "multiple types in struct field"))
+            type = x[2]
+            x = x[1]
+        elseif k == K"atomic"
+            atomic = true
+            x = x[1]
+        elseif k == K"const"
+            _const = true
+            x = x[1]
+        elseif k == K"doc"
+            docs = x[1]
+            x = x[2]
+        else
+            return nothing
+        end
+    end
+end
+
+function _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, inner_defs, exs)
+    for e in exs
+        if kind(e) == K"block"
+            _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs,
+                                   inner_defs, children(e))
+        elseif kind(e) == K"="
+            throw(LoweringError(e, "assignment syntax in structure fields is reserved"))
+        else
+            m = _match_struct_field(e)
+            if !isnothing(m)
+                # Struct field
+                push!(field_names, m.name)
+                n = length(field_names)
+                push!(field_types, isnothing(m.type) ? @ast(ctx, e, "Any"::K"core") : m.type)
+                if m.atomic
+                    push!(field_attrs, @ast ctx e n::K"Integer")
+                    push!(field_attrs, @ast ctx e "atomic"::K"Symbol")
+                end
+                if m._const
+                    push!(field_attrs, @ast ctx e n::K"Integer")
+                    push!(field_attrs, @ast ctx e "const"::K"Symbol")
+                end
+                if !isnothing(m.docs)
+                    push!(field_docs, @ast ctx e n::K"Integer")
+                    push!(field_docs, @ast ctx e m.docs)
+                end
+            else
+                # Inner constructors and inner functions
+                # TODO: Disallow arbitrary expressions inside `struct`?
+                push!(inner_defs, e)
+            end
+        end
+    end
+end
+
+# generate call to `convert()` for `(call new ...)` expressions
+function _new_call_convert_arg(ctx, full_struct_type, field_type, field_index, val)
+    if is_core_Any(field_type)
+        return val
+    end
+    # kt = kind(field_type)
+    # TODO: Allow kt == K"Identifier" && kt in static_params to avoid fieldtype call?
+    @ast ctx field_type [K"block"
+        tmp_type := [K"call"
+            "fieldtype"::K"core"
+            full_struct_type
+            field_index::K"Integer"
+        ]
+        convert_for_type_decl(ctx, field_type, val, tmp_type, false)
+    ]
+end
+
+function default_inner_constructors(ctx, srcref, global_struct_name,
+                                    typevar_names, typevar_stmts, field_names, field_types)
+    # TODO: Consider using srcref = @HERE ?
+    exact_ctor = if isempty(typevar_names)
+        # Definition with exact types for all arguments
+        field_decls = SyntaxList(ctx)
+        @ast ctx srcref [K"function"
+            [K"call"
+                [K"::" [K"curly" "Type"::K"core" global_struct_name]]
+                [[K"::" n t] for (n,t) in zip(field_names, field_types)]...
+            ]
+            [K"new"
+                global_struct_name
+                field_names...
+            ]
+        ]
+    end
+    maybe_non_Any_field_types = filter(!is_core_Any, field_types)
+    converting_ctor = if !isempty(typevar_names) || !isempty(maybe_non_Any_field_types)
+        # Definition which takes `Any` for all arguments and uses
+        # `Base.convert()` to convert those to the exact field type. Only
+        # defined if at least one field type is not Any.
+        ctor_self = new_local_binding(ctx, srcref, "#ctor-self#"; kind=:argument)
+        @ast ctx srcref [K"function"
+            [K"call"
+                 [K"::"
+                     ctor_self
+                     if isempty(typevar_names)
+                         [K"curly" "Type"::K"core" global_struct_name]
+                     else
+                         [K"where"
+                             [K"curly"
+                                 "Type"::K"core"
+                                 [K"curly"
+                                     global_struct_name
+                                     typevar_names...
+                                 ]
+                             ]
+                             [K"_typevars" [K"block" typevar_names...] [K"block" typevar_stmts...]]
+                         ]
+                     end
+                ]
+                field_names...
+            ]
+            [K"block"
+                [K"new"
+                    ctor_self
+                    [_new_call_convert_arg(ctx, ctor_self, type, i, name)
+                     for (i, (name,type)) in enumerate(zip(field_names, field_types))]...
+                ]
+            ]
+        ]
+    end
+    if isnothing(exact_ctor)
+        converting_ctor
+    else
+        if isnothing(converting_ctor)
+            exact_ctor
+        else
+            @ast ctx srcref [K"block"
+                [K"if"
+                    # Only define converting_ctor if at least one field type is not Any.
+                    mapfoldl(t     -> [K"call" "==="::K"core" "Any"::K"core" t],
+                             (t,u) -> [K"&&" u t],
+                             maybe_non_Any_field_types)
+                    [K"block"]
+                    converting_ctor
+                ]
+                exact_ctor
+            ]
+        end
+    end
+end
+
+# Generate outer constructor for structs with type parameters. Eg, for
+#     struct X{U,V}
+#         x::U
+#         y::V
+#     end
+#
+# We basically generate
+#     function (::Type{X})(x::U, y::V) where {U,V}
+#         new(X{U,V}, x, y)
+#     end
+#
+function default_outer_constructor(ctx, srcref, global_struct_name,
+                                   typevar_names, typevar_stmts, field_names, field_types)
+    @ast ctx srcref [K"function"
+        [K"where"
+            [K"call"
+                # We use `::Type{$global_struct_name}` here rather than just
+                # `struct_name` because global_struct_name is a binding to a
+                # type - we know we're not creating a new `Function` and
+                # there's no reason to emit the 1-arg `Expr(:method, name)` in
+                # the next phase of expansion.
+                [K"::" [K"curly" "Type"::K"core" global_struct_name]]
+                [[K"::" n t] for (n,t) in zip(field_names, field_types)]...
+            ]
+            [K"_typevars" [K"block" typevar_names...] [K"block" typevar_stmts...]]
+        ]
+        [K"new" [K"curly" global_struct_name typevar_names...] field_names...]
+    ]
+end
+
+function _is_new_call(ex)
+    kind(ex) == K"call" &&
+        ((kind(ex[1]) == K"Identifier" && ex[1].name_val == "new") ||
+         (kind(ex[1]) == K"curly" && kind(ex[1][1]) == K"Identifier" && ex[1][1].name_val == "new"))
+end
+
+# Rewrite inner constructor signatures for struct `X` from `X(...)`
+# to `(ctor_self::Type{X})(...)`
+function _rewrite_ctor_sig(ctx, callex, struct_name, global_struct_name, struct_typevars, ctor_self)
+    @assert kind(callex) == K"call"
+    name = callex[1]
+    if is_same_identifier_like(struct_name, name)
+        # X(x,y)  ==>  (#ctor-self#::Type{X})(x,y)
+        ctor_self[] = new_local_binding(ctx, callex, "#ctor-self#"; kind=:argument)
+        @ast ctx callex [K"call"
+            [K"::"
+                ctor_self[]
+                [K"curly" "Type"::K"core" global_struct_name]
+            ]
+            callex[2:end]...
+        ]
+    elseif kind(name) == K"curly" && is_same_identifier_like(struct_name, name[1])
+        # X{T}(x,y)  ==>  (#ctor-self#::Type{X{T}})(x,y)
+        self = new_local_binding(ctx, callex, "#ctor-self#"; kind=:argument)
+        if numchildren(name) - 1 == length(struct_typevars)
+            # Self fully parameterized - can be used as the full type to
+            # rewrite new() calls in constructor body.
+            ctor_self[] = self
+        end
+        @ast ctx callex [K"call"
+            [K"::"
+                self
+                [K"curly"
+                    "Type"::K"core"
+                    [K"curly"
+                        global_struct_name
+                        name[2:end]...
+                    ]
+                ]
+            ]
+            callex[2:end]...
+        ]
+    else
+        callex
+    end
+end
+
+# Rewrite calls to `new` in bodies of inner constructors and inner functions
+# into `new` or `splatnew` expressions.  For example:
+#
+#     struct X{T,S}
+#         X() = new()
+#         X() = new{A,B}()
+#         X{T,S}() where {T,S} = new()
+#         X{A,B}() = new()
+#         X{A}() = new()
+#         (t::Type{X})() = new{A,B}()
+#         f() = new()
+#         f() = new{A,B}()
+#         f() = new{Ts...}()
+#     end
+#
+# Map to the following
+#
+#     X() = ERROR
+#     (#ctor-self#::Type{X})() = (new X{A,B})
+#     (Type{X{T,S}}() where {T,S} = (new #ctor-self#)
+#     X{A,B}() = (new #ctor-self#)
+#     X{A}() = ERROR
+#     (t::Type{X})() = (new X{A,B})
+#     f() = ERROR
+#     f() = (new X{A,B})
+#     f() = (new X{Ts...})
+#
+# TODO: Arguably the following "could also work", but any symbolic match of
+# this case would be heuristic and rely on assuming Type == Core.Type. So
+# runtime checks would really be required and flisp lowering doesn't catch
+# this case either.
+#
+#     (t::Type{X{A,B}})() = new()
+function _rewrite_ctor_new_calls(ctx, ex, struct_name, global_struct_name, ctor_self,
+                                 struct_typevars, field_types)
+    if is_leaf(ex)
+        return ex
+    elseif !_is_new_call(ex)
+        return mapchildren(
+            e->_rewrite_ctor_new_calls(ctx, e, struct_name, global_struct_name,
+                                       ctor_self, struct_typevars, field_types),
+            ctx, ex
+        )
+    end
+    # Rewrite a call to new()
+    kw_arg_i = findfirst(e->(k = kind(e); k == K"=" || k == K"parameters"), children(ex))
+    if !isnothing(kw_arg_i)
+        throw(LoweringError(ex[kw_arg_i], "`new` does not accept keyword arguments"))
+    end
+    full_struct_type = if kind(ex[1]) == K"curly"
+        # new{A,B}(...)
+        new_type_params = ex[1][2:end]
+        n_type_splat = sum(kind(t) == K"..." for t in new_type_params; init=0)
+        n_type_nonsplat = length(new_type_params) - n_type_splat
+        if n_type_splat == 0 && n_type_nonsplat < length(struct_typevars)
+            throw(LoweringError(ex[1], "too few type parameters specified in `new{...}`"))
+        elseif n_type_nonsplat > length(struct_typevars)
+            throw(LoweringError(ex[1], "too many type parameters specified in `new{...}`"))
+        end
+        @ast ctx ex[1] [K"curly" global_struct_name new_type_params...]
+    elseif !isnothing(ctor_self)
+        # new(...) in constructors
+        ctor_self
+    else
+        # new(...) inside non-constructor inner functions
+        if isempty(struct_typevars)
+            global_struct_name
+        else
+            throw(LoweringError(ex[1], "too few type parameters specified in `new`"))
+        end
+    end
+    new_args = ex[2:end]
+    n_splat = sum(kind(t) == K"..." for t in new_args; init=0)
+    n_nonsplat = length(new_args) - n_splat
+    n_fields = length(field_types)
+    function throw_n_fields_error(desc)
+        @ast ctx ex [K"call"
+            "throw"::K"core"
+            [K"call"
+                "ArgumentError"::K"top"
+                "too $desc arguments in `new` (expected $n_fields)"::K"String"
+            ]
+        ]
+    end
+    if n_nonsplat > n_fields
+        return throw_n_fields_error("many")
+    else
+        # "Too few" args are allowed in partially initialized structs
+    end
+    if n_splat == 0
+        @ast ctx ex [K"block"
+            struct_type := full_struct_type
+            [K"new"
+                struct_type
+                [_new_call_convert_arg(ctx, struct_type, type, i, name)
+                 for (i, (name,type)) in enumerate(zip(ex[2:end], field_types))]...
+            ]
+        ]
+    else
+        fields_all_Any = all(is_core_Any, field_types)
+        if fields_all_Any
+            @ast ctx ex [K"block"
+                struct_type := full_struct_type
+                [K"splatnew"
+                    struct_type
+                    # Note: `jl_new_structt` ensures length of this tuple is
+                    # exactly the number of fields.
+                    [K"call" "tuple"::K"core" ex[2:end]...]
+                ]
+            ]
+        else
+            # `new` with splatted args which are symbolically not `Core.Any`
+            # (might be `Any` at runtime but we can't know that here.)
+            @ast ctx ex [K"block"
+                args := [K"call" "tuple"::K"core" ex[2:end]...]
+                n_args := [K"call" "nfields"::K"core" args]
+                [K"if"
+                    [K"call" "ult_int"::K"top" n_args n_fields::K"Integer"]
+                    throw_n_fields_error("few")
+                ]
+                [K"if"
+                    [K"call" "ult_int"::K"top" n_fields::K"Integer" n_args]
+                    throw_n_fields_error("many")
+                ]
+                struct_type := full_struct_type
+                [K"new"
+                    struct_type
+                    [_new_call_convert_arg(ctx, struct_type, type, i,
+                         [K"call" "getfield"::K"core" args i::K"Integer"])
+                     for (i, type) in enumerate(field_types)]...
+                ]
+            ]
+        end
+    end
+end
+
+# Rewrite calls to `new( ... )` to `new` expressions on the appropriate
+# type, determined by the containing type and constructor definitions.
+#
+# This is mainly for constructors, but also needs to work for inner functions
+# which may call new() but are not constructors.
+function rewrite_new_calls(ctx, ex, struct_name, global_struct_name,
+                           typevar_names, field_names, field_types)
+    if kind(ex) == K"doc"
+        docs = ex[1]
+        ex = ex[2]
+    else
+        docs = nothing
+    end
+    if kind(ex) != K"function"
+        return ex
+    end
+    if !(numchildren(ex) == 2 && is_eventually_call(ex[1]))
+        throw(LoweringError(ex, "Expected constructor or named inner function"))
+    end
+
+    ctor_self = Ref{Union{Nothing,SyntaxTree}}(nothing)
+    expand_function_def(ctx, ex, docs,
+        callex->_rewrite_ctor_sig(ctx, callex, struct_name,
+                                  global_struct_name, typevar_names, ctor_self),
+        body->_rewrite_ctor_new_calls(ctx, body, struct_name, global_struct_name,
+                                      ctor_self[], typevar_names, field_types)
+    )
+end
+
+function _constructor_min_initialized(ex::SyntaxTree)
+    if _is_new_call(ex)
+        if any(kind(e) == K"..." for e in ex[2:end])
+            # Lowering ensures new with splats always inits all fields
+            # or in the case of splatnew this is enforced by the runtime.
+            typemax(Int)
+        else
+            numchildren(ex) - 1
+        end
+    elseif !is_leaf(ex)
+        minimum((_constructor_min_initialized(e) for e in children(ex)), init=typemax(Int))
+    else
+        typemax(Int)
+    end
+end
+
+# Let S be a struct we're defining in module M.  Below is a hack to allow its
+# field types to refer to S as M.S.  See #56497.
+function insert_struct_shim(ctx, fieldtypes, name)
+    function replace_type(ex)
+        if kind(ex) == K"." &&
+            numchildren(ex) == 2 &&
+            kind(ex[2]) == K"Symbol" &&
+            ex[2].name_val == name.name_val
+            @ast ctx ex [K"call" "struct_name_shim"::K"core" ex[1] ex[2] ctx.mod::K"Value" name]
+        elseif numchildren(ex) > 0
+            mapchildren(replace_type, ctx, ex)
+        else
+            ex
+        end
+    end
+    map(replace_type, fieldtypes)
+end
+
+function expand_struct_def(ctx, ex, docs)
+    @chk numchildren(ex) == 2
+    type_sig = ex[1]
+    type_body = ex[2]
+    if kind(type_body) != K"block"
+        throw(LoweringError(type_body, "expected block for `struct` fields"))
+    end
+    struct_name, type_params, supertype = analyze_type_sig(ctx, type_sig)
+    typevar_names, typevar_stmts = expand_typevars(ctx, type_params)
+    field_names = SyntaxList(ctx)
+    field_types = SyntaxList(ctx)
+    field_attrs = SyntaxList(ctx)
+    field_docs = SyntaxList(ctx)
+    inner_defs = SyntaxList(ctx)
+    _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs,
+                           inner_defs, children(type_body))
+    is_mutable = has_flags(ex, JuliaSyntax.MUTABLE_FLAG)
+    min_initialized = minimum((_constructor_min_initialized(e) for e in inner_defs),
+                              init=length(field_names))
+    newtype_var = ssavar(ctx, ex, "struct_type")
+    hasprev = ssavar(ctx, ex, "hasprev")
+    prev = ssavar(ctx, ex, "prev")
+    newdef = ssavar(ctx, ex, "newdef")
+    layer = new_scope_layer(ctx, struct_name)
+    global_struct_name = adopt_scope(struct_name, layer)
+    if !isempty(typevar_names)
+        # Generate expression like `prev_struct.body.body.parameters`
+        prev_typevars = global_struct_name
+        for _ in 1:length(typevar_names)
+            prev_typevars = @ast ctx type_sig [K"." prev_typevars "body"::K"Symbol"]
+        end
+        prev_typevars = @ast ctx type_sig [K"." prev_typevars "parameters"::K"Symbol"]
+    end
+
+    # New local variable names for constructor args to avoid clashing with any
+    # type names
+    if isempty(inner_defs)
+        field_names_2 = adopt_scope(field_names, layer)
+    end
+
+    need_outer_constructor = false
+    if isempty(inner_defs) && !isempty(typevar_names)
+        # To generate an outer constructor each struct type parameter must be
+        # able to be inferred from the list of fields passed as constructor
+        # arguments.
+        #
+        # More precisely, it must occur in a field type, or in the bounds of a
+        # subsequent type parameter. For example the following won't work
+        #     struct X{T}
+        #         a::Int
+        #     end
+        #     X(a::Int) where T = #... construct X{T} ??
+        #
+        # But the following does
+        #     struct X{T}
+        #         a::T
+        #     end
+        #     X(a::T) where {T} = # construct X{typeof(a)}(a)
+        need_outer_constructor = true
+        for i in 1:length(typevar_names)
+            typevar_name = typevar_names[i]
+            typevar_in_fields = any(contains_identifier(ft, typevar_name) for ft in field_types)
+            if !typevar_in_fields
+                typevar_in_bounds = any(type_params[i+1:end]) do param
+                    # Check the bounds of subsequent type params
+                    (_,lb,ub) = analyze_typevar(ctx, param)
+                    # todo: flisp lowering tests `lb` here so we also do. But
+                    # in practice this doesn't seem to constrain `typevar_name`
+                    # and the generated constructor doesn't work?
+                    (!isnothing(ub) && contains_identifier(ub, typevar_name)) ||
+                    (!isnothing(lb) && contains_identifier(lb, typevar_name))
+                end
+                if !typevar_in_bounds
+                    need_outer_constructor = false
+                    break
+                end
+            end
+        end
+    end
+
+    # The following lowering covers several subtle issues in the ordering of
+    # typevars when "redefining" structs.
+    # See https://github.com/JuliaLang/julia/pull/36121
+    @ast ctx ex [K"block"
+        [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ]
+        [K"scope_block"(scope_type=:hard)
+            # Needed for later constdecl to work, though plain global form may be removed soon.
+            [K"global" global_struct_name]
+            [K"block"
+                [K"local" struct_name]
+                [K"always_defined" struct_name]
+                typevar_stmts...
+                [K"="
+                    newtype_var
+                    [K"call"
+                        "_structtype"::K"core"
+                        ctx.mod::K"Value"
+                        struct_name=>K"Symbol"
+                        [K"call"(type_sig) "svec"::K"core" typevar_names...]
+                        [K"call"(type_body) "svec"::K"core" [n=>K"Symbol" for n in field_names]...]
+                        [K"call"(type_body) "svec"::K"core" field_attrs...]
+                        is_mutable::K"Bool"
+                        min_initialized::K"Integer"
+                    ]
+                ]
+                [K"=" struct_name newtype_var]
+                [K"call"(supertype) "_setsuper!"::K"core" newtype_var supertype]
+                [K"=" hasprev
+                      [K"&&" [K"call" "isdefinedglobal"::K"core"
+                              ctx.mod::K"Value"
+                              struct_name=>K"Symbol"
+                              false::K"Bool"]
+                             [K"call" "_equiv_typedef"::K"core" global_struct_name newtype_var]
+                       ]]
+                [K"=" prev [K"if" hasprev global_struct_name false::K"Bool"]]
+                [K"if" hasprev
+                   [K"block"
+                    # if this is compatible with an old definition, use the old parameters, but the
+                    # new object. This will fail to capture recursive cases, but the call to typebody!
+                    # below is permitted to choose either type definition to put into the binding table
+                    if !isempty(typevar_names)
+                        # And resassign the typevar_names - these may be
+                        # referenced in the definition of the field
+                        # types below
+                        [K"=" [K"tuple" typevar_names...] prev_typevars]
+                    end
+                    ]
+                ]
+                [K"=" newdef
+                   [K"call"(type_body)
+                      "_typebody!"::K"core"
+                      prev
+                      newtype_var
+                      [K"call" "svec"::K"core" insert_struct_shim(ctx, field_types, struct_name)...]
+                   ]]
+                [K"constdecl"
+                    global_struct_name
+                    newdef
+                 ]
+                # Default constructors
+                if isempty(inner_defs)
+                    default_inner_constructors(ctx, ex, global_struct_name,
+                                               typevar_names, typevar_stmts, field_names_2, field_types)
+                else
+                    map!(inner_defs, inner_defs) do def
+                        rewrite_new_calls(ctx, def, struct_name, global_struct_name,
+                                          typevar_names, field_names, field_types)
+                    end
+                    [K"block" inner_defs...]
+                end
+                if need_outer_constructor
+                    default_outer_constructor(ctx, ex, global_struct_name,
+                                              typevar_names, typevar_stmts, field_names_2, field_types)
+                end
+            ]
+        ]
+
+        # Documentation
+        if !isnothing(docs) || !isempty(field_docs)
+            [K"call"(isnothing(docs) ? ex : docs)
+                bind_docs!::K"Value"
+                struct_name
+                isnothing(docs) ? nothing_(ctx, ex) : docs[1]
+                ::K"SourceLocation"(ex)
+                [K"="
+                    "field_docs"::K"Identifier"
+                    [K"call" "svec"::K"core" field_docs...]
+                ]
+            ]
+        end
+        nothing_(ctx, ex)
+    ]
+end
+
+#-------------------------------------------------------------------------------
+# Expand `where` syntax
+
+function expand_where(ctx, srcref, lhs, rhs)
+    bounds = analyze_typevar(ctx, rhs)
+    v = bounds[1]
+    @ast ctx srcref [K"let"
+        [K"block" [K"=" v bounds_to_TypeVar(ctx, srcref, bounds)]]
+        [K"call" "UnionAll"::K"core" v lhs]
+    ]
+end
+
+function expand_wheres(ctx, ex)
+    body = ex[1]
+    rhs = ex[2]
+    if kind(rhs) == K"braces"
+        # S{X,Y} where {X,Y}
+        for r in reverse(children(rhs))
+            body = expand_where(ctx, ex, body, r)
+        end
+    elseif kind(rhs) == K"_typevars"
+        # Eg, `S{X,Y} where {X, Y}` but with X and Y
+        # already allocated `TypeVar`s
+        for r in reverse(children(rhs[1]))
+            body = @ast ctx ex [K"call" "UnionAll"::K"core" r body]
+        end
+    else
+        # S{X} where X
+        body = expand_where(ctx, ex, body, rhs)
+    end
+    body
+end
+
+# Match implicit where parameters for `Foo{<:Bar}` ==> `Foo{T} where T<:Bar`
+function expand_curly(ctx, ex)
+    @assert kind(ex) == K"curly"
+    check_no_parameters(ex, "unexpected semicolon in type parameter list")
+    check_no_assignment(children(ex), "misplace assignment in type parameter list")
+
+    typevar_stmts = SyntaxList(ctx)
+    type_args = SyntaxList(ctx)
+    implicit_typevars = SyntaxList(ctx)
+
+    i = 1
+    for e in children(ex)
+        k = kind(e)
+        if (k == K"<:" || k == K">:") && numchildren(e) == 1
+            # `X{<:A}` and `X{>:A}`
+            name = @ast ctx e "#T$i"::K"Placeholder"
+            i += 1
+            typevar = k == K"<:" ?
+                bounds_to_TypeVar(ctx, e, (name, nothing, e[1])) :
+                bounds_to_TypeVar(ctx, e, (name, e[1], nothing))
+            arg = emit_assign_tmp(typevar_stmts, ctx, typevar)
+            push!(implicit_typevars, arg)
+        else
+            arg = e
+        end
+        push!(type_args, arg)
+    end
+
+    type = @ast ctx ex [K"call" "apply_type"::K"core" type_args...]
+    if !isempty(implicit_typevars)
+        type = @ast ctx ex [K"block"
+            typevar_stmts...
+            [K"where" type [K"_typevars" [K"block" implicit_typevars...] [K"block" typevar_stmts...]]]
+        ]
+    end
+
+    return type
+end
+
+#-------------------------------------------------------------------------------
+# Expand import / using / export
+
+function expand_importpath(path)
+    @chk kind(path) == K"importpath"
+    path_spec = Expr(:.)
+    prev_was_dot = true
+    for component in children(path)
+        k = kind(component)
+        if k == K"quote"
+            # Permit quoted path components as in
+            # import A.(:b).:c
+            component = component[1]
+        end
+        @chk kind(component) in (K"Identifier", K".")
+        name = component.name_val
+        is_dot = kind(component) == K"."
+        if is_dot && !prev_was_dot
+            throw(LoweringError(component, "invalid import path: `.` in identifier path"))
+        end
+        prev_was_dot = is_dot
+        push!(path_spec.args, Symbol(name))
+    end
+    return path_spec
+end
+
+function expand_import_or_using(ctx, ex)
+    if kind(ex[1]) == K":"
+        # import M: x.y as z, w
+        # (import (: (importpath M) (as (importpath x y) z) (importpath w)))
+        # =>
+        # (call module_import
+        #  false
+        #  (call core.svec "M")
+        #  (call core.svec  2 "x" "y" "z"  1 "w" "w"))
+        @chk numchildren(ex[1]) >= 2
+        from = ex[1][1]
+        from_path = @ast ctx from QuoteNode(expand_importpath(from))::K"Value"
+        paths = ex[1][2:end]
+    else
+        # import A.B
+        # (using (importpath A B))
+        # (call eval_import true nothing (call core.svec 1 "w"))
+        @chk numchildren(ex) >= 1
+        from_path = nothing
+        paths = children(ex)
+    end
+    # Here we represent the paths as quoted `Expr` data structures
+    path_specs = SyntaxList(ctx)
+    for spec in paths
+        as_name = nothing
+        if kind(spec) == K"as"
+            @chk numchildren(spec) == 2
+            @chk kind(spec[2]) == K"Identifier"
+            as_name = Symbol(spec[2].name_val)
+            path = QuoteNode(Expr(:as, expand_importpath(spec[1]), as_name))
+        else
+            path = QuoteNode(expand_importpath(spec))
+        end
+        push!(path_specs, @ast ctx spec path::K"Value")
+    end
+    is_using = kind(ex) == K"using"
+    stmts = SyntaxList(ctx)
+    if isnothing(from_path)
+        for spec in path_specs
+            if is_using
+                push!(stmts,
+                    @ast ctx spec [K"call"
+                        eval_using   ::K"Value"
+                        ctx.mod      ::K"Value"
+                        spec
+                    ]
+                )
+            else
+                push!(stmts,
+                    @ast ctx spec [K"call"
+                        eval_import   ::K"Value"
+                        (!is_using)   ::K"Bool"
+                        ctx.mod       ::K"Value"
+                        "nothing"     ::K"top"
+                        spec
+                    ]
+                )
+            end
+            # latestworld required between imports so that previous symbols
+            # become visible
+            push!(stmts, @ast ctx spec (::K"latestworld"))
+        end
+    else
+        push!(stmts, @ast ctx ex [K"call"
+            eval_import   ::K"Value"
+            (!is_using)   ::K"Bool"
+            ctx.mod       ::K"Value"
+            from_path
+            path_specs...
+        ])
+        push!(stmts, @ast ctx ex (::K"latestworld"))
+    end
+    @ast ctx ex [K"block"
+        [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]]
+        stmts...
+        [K"removable" "nothing"::K"core"]
+    ]
+end
+
+# Expand `public` or `export`
+function expand_public(ctx, ex)
+    identifiers = String[]
+    for e in children(ex)
+        @chk kind(e) == K"Identifier" (ex, "Expected identifier")
+        push!(identifiers, e.name_val)
+    end
+    (e.name_val::K"String" for e in children(ex))
+    @ast ctx ex [K"call"
+        eval_public::K"Value"
+        ctx.mod::K"Value"
+        (kind(ex) == K"export")::K"Bool"
+        identifiers::K"Value"
+    ]
+end
+
+#-------------------------------------------------------------------------------
+# Expand docstring-annotated expressions
+
+function expand_doc(ctx, ex, docex, mod=ctx.mod)
+    if kind(ex) in (K"Identifier", K".")
+        expand_forms_2(ctx, @ast ctx docex [K"call"
+            bind_static_docs!::K"Value"
+            (kind(ex) === K"." ? ex[1] : ctx.mod::K"Value")
+            (kind(ex) === K"." ? ex[2] : ex).name_val::K"Symbol"
+            docex[1]
+            ::K"SourceLocation"(ex)
+            Union{}::K"Value"
+        ])
+    elseif is_eventually_call(ex)
+        expand_function_def(ctx, @ast(ctx, ex, [K"function" ex [K"block"]]),
+                            docex; doc_only=true)
+    else
+        expand_forms_2(ctx, ex, docex)
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Desugaring's "big switch": expansion of some simple forms; dispatch to other
+# expansion functions for the rest.
+
+"""
+Lowering pass 2 - desugaring
+
+This pass simplifies expressions by expanding complicated syntax sugar into a
+small set of core syntactic forms. For example, field access syntax `a.b` is
+expanded to a function call `getproperty(a, :b)`.
+"""
+function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing)
+    k = kind(ex)
+    if k == K"atomic"
+        throw(LoweringError(ex, "unimplemented or unsupported atomic declaration"))
+    elseif k == K"call"
+        expand_call(ctx, ex)
+    elseif k == K"dotcall" || k == K".&&" || k == K".||" || k == K".="
+        expand_forms_2(ctx, expand_fuse_broadcast(ctx, ex))
+    elseif k == K"."
+        expand_forms_2(ctx, expand_dot(ctx, ex))
+    elseif k == K"?"
+        @chk numchildren(ex) == 3
+        expand_forms_2(ctx, @ast ctx ex [K"if" children(ex)...])
+    elseif k == K"&&" || k == K"||"
+        @chk numchildren(ex) > 1
+        cs = expand_cond_children(ctx, ex)
+        # Attributing correct provenance for `cs[1:end-1]` is tricky in cases
+        # like `a && (b && c)` because the expression constructed here arises
+        # from the source fragment `a && (b` which doesn't follow the tree
+        # structure. For now we attribute to the parent node.
+        cond = length(cs) == 2 ?
+            cs[1] :
+            makenode(ctx, ex, k, cs[1:end-1])
+        # This transformation assumes the type assertion `cond::Bool` will be
+        # added by a later compiler pass (currently done in codegen)
+        if k == K"&&"
+            @ast ctx ex [K"if" cond cs[end] false::K"Bool"]
+        else
+            @ast ctx ex [K"if" cond true::K"Bool" cs[end]]
+        end
+    elseif k == K"::"
+        @chk numchildren(ex) == 2 "`::` must be written `value::type` outside function argument lists"
+        @ast ctx ex [K"call"
+            "typeassert"::K"core"
+            expand_forms_2(ctx, ex[1])
+            expand_forms_2(ctx, ex[2])
+        ]
+    elseif k == K"<:" || k == K">:" || k == K"-->"
+        expand_forms_2(ctx, @ast ctx ex [K"call"
+            adopt_scope(string(k)::K"Identifier", ex)
+            children(ex)...
+        ])
+    elseif k == K"op=" || k == K".op="
+        expand_forms_2(ctx, expand_update_operator(ctx, ex))
+    elseif k == K"="
+        expand_assignment(ctx, ex)
+    elseif k == K"break"
+        numchildren(ex) > 0 ? ex :
+            @ast ctx ex [K"break" "loop_exit"::K"symbolic_label"]
+    elseif k == K"continue"
+        @ast ctx ex [K"break" "loop_cont"::K"symbolic_label"]
+    elseif k == K"comparison"
+        expand_forms_2(ctx, expand_compare_chain(ctx, ex))
+    elseif k == K"doc"
+        @chk numchildren(ex) == 2
+        expand_doc(ctx, ex[2], ex)
+    elseif k == K"for"
+        expand_forms_2(ctx, expand_for(ctx, ex))
+    elseif k == K"comprehension"
+        @chk numchildren(ex) == 1
+        @chk kind(ex[1]) == K"generator"
+        @ast ctx ex [K"call"
+            "collect"::K"top"
+            expand_forms_2(ctx, ex[1])
+        ]
+    elseif k == K"typed_comprehension"
+        @chk numchildren(ex) == 2
+        @chk kind(ex[2]) == K"generator"
+        if numchildren(ex[2]) == 2 && kind(ex[2][2]) == K"iteration"
+            # Hack to lower simple typed comprehensions to loops very early,
+            # greatly reducing the number of functions and load on the compiler
+            expand_forms_2(ctx, expand_comprehension_to_loops(ctx, ex))
+        else
+            @ast ctx ex [K"call"
+                "collect"::K"top"
+                expand_forms_2(ctx, ex[1])
+                expand_forms_2(ctx, ex[2])
+            ]
+        end
+    elseif k == K"generator"
+        expand_forms_2(ctx, expand_generator(ctx, ex))
+    elseif k == K"->" || k == K"do"
+        expand_forms_2(ctx, expand_arrow(ctx, ex))
+    elseif k == K"function"
+        expand_forms_2(ctx, expand_function_def(ctx, ex, docs))
+    elseif k == K"macro"
+        @ast ctx ex [K"block"
+            [K"assert"
+                "global_toplevel_only"::K"Symbol"
+                [K"inert" ex]
+            ]
+            expand_forms_2(ctx, expand_macro_def(ctx, ex))
+        ]
+    elseif k == K"if" || k == K"elseif"
+        @chk numchildren(ex) >= 2
+        @ast ctx ex [k
+            expand_condition(ctx, ex[1])
+            expand_forms_2(ctx, ex[2:end])...
+        ]
+    elseif k == K"let"
+        expand_forms_2(ctx, expand_let(ctx, ex))
+    elseif k == K"const"
+        expand_const_decl(ctx, ex)
+    elseif k == K"local" || k == K"global"
+        if k == K"global" && kind(ex[1]) == K"const"
+            # Normalize `global const` to `const global`
+            expand_const_decl(ctx, @ast ctx ex [K"const" [K"global" ex[1][1]]])
+        else
+            expand_decls(ctx, ex)
+        end
+    elseif k == K"where"
+        expand_forms_2(ctx, expand_wheres(ctx, ex))
+    elseif k == K"braces" || k == K"bracescat"
+        throw(LoweringError(ex, "{ } syntax is reserved for future use"))
+    elseif k == K"string"
+        if numchildren(ex) == 1 && kind(ex[1]) == K"String"
+            ex[1]
+        else
+            @ast ctx ex [K"call"
+                "string"::K"top"
+                expand_forms_2(ctx, children(ex))...
+            ]
+        end
+    elseif k == K"try"
+        expand_forms_2(ctx, expand_try(ctx, ex))
+    elseif k == K"tuple"
+        if has_parameters(ex)
+            if numchildren(ex) > 1
+                throw(LoweringError(ex[end], "unexpected semicolon in tuple - use `,` to separate tuple elements"))
+            end
+            expand_forms_2(ctx, expand_named_tuple(ctx, ex, children(ex[1])))
+        elseif any_assignment(children(ex))
+            expand_forms_2(ctx, expand_named_tuple(ctx, ex, children(ex)))
+        else
+            expand_forms_2(ctx, @ast ctx ex [K"call"
+                "tuple"::K"core"
+                children(ex)...
+            ])
+        end
+    elseif k == K"$"
+        throw(LoweringError(ex, "`\$` expression outside string or quote block"))
+    elseif k == K"module"
+        throw(LoweringError(ex, "`module` is only allowed at top level"))
+    elseif k == K"import" || k == K"using"
+        expand_import_or_using(ctx, ex)
+    elseif k == K"export" || k == K"public"
+        expand_public(ctx, ex)
+    elseif k == K"abstract" || k == K"primitive"
+        expand_forms_2(ctx, expand_abstract_or_primitive_type(ctx, ex))
+    elseif k == K"struct"
+        expand_forms_2(ctx, expand_struct_def(ctx, ex, docs))
+    elseif k == K"ref"
+        sctx = with_stmts(ctx)
+        (arr, idxs) = expand_ref_components(sctx, ex)
+        expand_forms_2(ctx,
+            @ast ctx ex [K"block"
+                sctx.stmts...
+                [K"call"
+                    "getindex"::K"top"
+                    arr
+                    idxs...
+                ]
+            ]
+        )
+    elseif k == K"curly"
+        expand_forms_2(ctx, expand_curly(ctx, ex))
+    elseif k == K"toplevel"
+        # The toplevel form can't be lowered here - it needs to just be quoted
+        # and passed through to a call to eval.
+        ex2 = @ast ctx ex [K"block"
+            [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]]
+            [K"call"
+                eval                  ::K"Value"
+                ctx.mod               ::K"Value"
+                [K"inert" ex]
+                [K"parameters"
+                    [K"="
+                        "expr_compat_mode"::K"Identifier"
+                        ctx.expr_compat_mode::K"Bool"
+                    ]
+                ]
+            ]
+        ]
+        expand_forms_2(ctx, ex2)
+    elseif k == K"vect"
+        check_no_parameters(ex, "unexpected semicolon in array expression")
+        expand_array(ctx, ex, "vect")
+    elseif k == K"hcat"
+        expand_array(ctx, ex, "hcat")
+    elseif k == K"typed_hcat"
+        expand_array(ctx, ex, "typed_hcat")
+    elseif k == K"opaque_closure"
+        expand_forms_2(ctx, expand_opaque_closure(ctx, ex))
+    elseif k == K"vcat" || k == K"typed_vcat"
+        expand_forms_2(ctx, expand_vcat(ctx, ex))
+    elseif k == K"ncat" || k == K"typed_ncat"
+        expand_forms_2(ctx, expand_ncat(ctx, ex))
+    elseif k == K"while"
+        @chk numchildren(ex) == 2
+        @ast ctx ex [K"break_block" "loop_exit"::K"symbolic_label"
+            [K"_while"
+                expand_condition(ctx, ex[1])
+                [K"break_block" "loop_cont"::K"symbolic_label"
+                    [K"scope_block"(scope_type=:neutral)
+                         expand_forms_2(ctx, ex[2])
+                    ]
+                ]
+            ]
+        ]
+    elseif k == K"inert"
+        ex
+    elseif k == K"gc_preserve"
+        s = ssavar(ctx, ex)
+        r = ssavar(ctx, ex)
+        @ast ctx ex [K"block"
+            s := [K"gc_preserve_begin" children(ex)[2:end]...]
+            r := expand_forms_2(ctx, children(ex)[1])
+            [K"gc_preserve_end" s]
+            r
+        ]
+    elseif k == K"&"
+        throw(LoweringError(ex, "invalid syntax"))
+    elseif k == K"$"
+        throw(LoweringError(ex, "`\$` expression outside string or quote"))
+    elseif k == K"..."
+        throw(LoweringError(ex, "`...` expression outside call"))
+    elseif is_leaf(ex)
+        ex
+    elseif k == K"return"
+        if numchildren(ex) == 0
+            @ast ctx ex [K"return" "nothing"::K"core"]
+        elseif numchildren(ex) == 1
+            mapchildren(e->expand_forms_2(ctx,e), ctx, ex)
+        else
+            throw(LoweringError(ex, "More than one argument to return"))
+        end
+    else
+        mapchildren(e->expand_forms_2(ctx,e), ctx, ex)
+    end
+end
+
+function expand_forms_2(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector})
+    res = SyntaxList(ctx)
+    for e in exs
+        push!(res, expand_forms_2(ctx, e))
+    end
+    res
+end
+
+function expand_forms_2(ctx::StatementListCtx, args...)
+    expand_forms_2(ctx.ctx, args...)
+end
+
+@fzone "JL: desugar" function expand_forms_2(ctx::MacroExpansionContext, ex::SyntaxTree)
+    ctx1 = DesugaringContext(ctx, ctx.expr_compat_mode)
+    ex1 = expand_forms_2(ctx1, reparent(ctx1, ex))
+    ctx1, ex1
+end
diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl
new file mode 100644
index 0000000000000..933f4c31f385f
--- /dev/null
+++ b/JuliaLowering/src/eval.jl
@@ -0,0 +1,530 @@
+# Non-incremental lowering API for non-toplevel non-module expressions.
+# May be removed?
+
+function lower(mod::Module, ex0; expr_compat_mode=false, world=Base.get_world_counter())
+    ctx1, ex1 = expand_forms_1(  mod,  ex0, expr_compat_mode, world)
+    ctx2, ex2 = expand_forms_2(  ctx1, ex1)
+    ctx3, ex3 = resolve_scopes(  ctx2, ex2)
+    ctx4, ex4 = convert_closures(ctx3, ex3)
+    ctx5, ex5 = linearize_ir(    ctx4, ex4)
+    ex5
+end
+
+function macroexpand(mod::Module, ex; expr_compat_mode=false, world=Base.get_world_counter())
+    ctx1, ex1 = expand_forms_1(mod, ex, expr_compat_mode, world)
+    ex1
+end
+
+# Incremental lowering API which can manage toplevel and module expressions.
+#
+# This iteration API is oddly bespoke and arguably somewhat non-Julian for two
+# reasons:
+#
+# * Lowering knows when new modules are required, and may request them with
+#   `:begin_module`. However `eval()` generates those modules so they need to
+#   be passed back into lowering. So we can't just use `Base.iterate()`. (Put a
+#   different way, we have a situation which is suited to coroutines but we
+#   don't want to use full Julia `Task`s for this.)
+# * We might want to implement this `eval()` in Julia's C runtime code or early
+#   in bootstrap. Hence using SimpleVector and Symbol as the return values of
+#   `lower_step()`
+#
+# We might consider changing at least the second of these choices, depending on
+# how we end up putting this into Base.
+
+struct LoweringIterator{GraphType}
+    expr_compat_mode::Bool # later stored in module?
+    todo::Vector{Tuple{SyntaxTree{GraphType}, Bool, Int}}
+end
+
+function lower_init(ex::SyntaxTree{T};
+                    expr_compat_mode::Bool=false) where {T}
+    LoweringIterator{T}(expr_compat_mode, [(ex, false, 0)])
+end
+
+function lower_step(iter, mod, world=Base.get_world_counter())
+    if isempty(iter.todo)
+        return Core.svec(:done)
+    end
+
+    top_ex, is_module_body, child_idx = pop!(iter.todo)
+    if child_idx > 0
+        if child_idx <= numchildren(top_ex)
+            push!(iter.todo, (top_ex, is_module_body, child_idx + 1))
+            ex = top_ex[child_idx]
+        elseif is_module_body
+            return Core.svec(:end_module)
+        else
+            return lower_step(iter, mod)
+        end
+    else
+        ex = top_ex
+    end
+
+    k = kind(ex)
+    if !(k in KSet"toplevel module")
+        ctx1, ex = expand_forms_1(mod, ex, iter.expr_compat_mode, world)
+        k = kind(ex)
+    end
+    if k == K"toplevel"
+        push!(iter.todo, (ex, false, 1))
+        return lower_step(iter, mod)
+    elseif k == K"module"
+        name_or_version = ex[1]
+        version = nothing
+        if kind(name_or_version) == K"VERSION"
+            version = name_or_version.value
+            name = ex[2]
+        else
+            name = name_or_version
+        end
+        if kind(name) != K"Identifier"
+            throw(LoweringError(name, "Expected module name"))
+        end
+        newmod_name = Symbol(name.name_val)
+        body = ex[end]
+        if kind(body) != K"block"
+            throw(LoweringError(body, "Expected block in module body"))
+        end
+        std_defs = !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG)
+        loc = source_location(LineNumberNode, ex)
+        push!(iter.todo, (body, true, 1))
+        return Core.svec(:begin_module, version, newmod_name, std_defs, loc)
+    else
+        # Non macro expansion parts of lowering
+        ctx2, ex2 = expand_forms_2(ctx1, ex)
+        ctx3, ex3 = resolve_scopes(ctx2, ex2)
+        ctx4, ex4 = convert_closures(ctx3, ex3)
+        ctx5, ex5 = linearize_ir(ctx4, ex4)
+        thunk = to_lowered_expr(ex5)
+        return Core.svec(:thunk, thunk)
+    end
+end
+
+
+#-------------------------------------------------------------------------------
+
+function codeinfo_has_image_globalref(@nospecialize(e))
+    if e isa GlobalRef
+        return 0x00 !== @ccall jl_object_in_image(e.mod::Any)::UInt8
+    elseif e isa Core.CodeInfo
+        return any(codeinfo_has_image_globalref, e.code)
+    else
+        return false
+    end
+end
+
+_CodeInfo_need_ver = v"1.12.0-DEV.512"
+if VERSION < _CodeInfo_need_ver
+    function _CodeInfo(args...)
+        error("Constructing a CodeInfo using JuliaLowering currently requires Julia version $_CodeInfo_need_ver or greater")
+    end
+else
+    # debuginfo changed completely as of https://github.com/JuliaLang/julia/pull/52415
+    # nargs / isva was added as of       https://github.com/JuliaLang/julia/pull/54341
+    # field rettype added in             https://github.com/JuliaLang/julia/pull/54655
+    # field has_image_globalref added in https://github.com/JuliaLang/julia/pull/57433
+    # CodeInfo constructor. TODO: Should be in Core
+    let
+        fns = fieldnames(Core.CodeInfo)
+        fts = fieldtypes(Core.CodeInfo)
+        conversions = [:(convert($t, $n)) for (t,n) in zip(fts, fns)]
+
+        expected_fns = (:code, :debuginfo, :ssavaluetypes, :ssaflags, :slotnames, :slotflags, :slottypes, :rettype, :parent, :edges, :min_world, :max_world, :method_for_inference_limit_heuristics, :nargs, :propagate_inbounds, :has_fcall, :has_image_globalref, :nospecializeinfer, :isva, :inlining, :constprop, :purity, :inlining_cost)
+        expected_fts = (Vector{Any}, Core.DebugInfo, Any, Vector{UInt32}, Vector{Symbol}, Vector{UInt8}, Any, Any, Any, Any, UInt, UInt, Any, UInt, Bool, Bool, Bool, Bool, Bool, UInt8, UInt8, UInt16, UInt16)
+
+        code = if fns != expected_fns
+            unexpected_fns = collect(setdiff(Set(fns), Set(expected_fns)))
+            missing_fns = collect(setdiff(Set(expected_fns), Set(fns)))
+            :(function _CodeInfo(args...)
+                  error("Unrecognized CodeInfo fields: Maybe version $VERSION is too new for this version of JuliaLowering?"
+                         * isempty(unexpected_fns) ? "" : "\nUnexpected fields found: $($unexpected_fns)"
+                         * isempty(missing_fns)    ? "" : "\nMissing fields:          $($missing_fns)")
+              end)
+        elseif fts != expected_fts
+            :(function _CodeInfo(args...)
+                  error("Unrecognized CodeInfo field types: Maybe version $VERSION is too new for this version of JuliaLowering?")
+              end)
+        else
+            :(function _CodeInfo($(fns...))
+                $(Expr(:new, :(Core.CodeInfo), conversions...))
+            end)
+        end
+
+        Core.eval(@__MODULE__, code)
+    end
+end
+
+function _compress_debuginfo(info)
+    filename, edges, codelocs = info
+    edges = Core.svec(map(_compress_debuginfo, edges)...)
+    codelocs = @ccall jl_compress_codelocs((-1)::Int32, codelocs::Any,
+                                           div(length(codelocs),3)::Csize_t)::String
+    Core.DebugInfo(Symbol(filename), nothing, edges, codelocs)
+end
+
+function ir_debug_info_state(ex)
+    e1 = first(flattened_provenance(ex))
+    topfile = filename(e1)
+    [(topfile, [], Vector{Int32}())]
+end
+
+function add_ir_debug_info!(current_codelocs_stack, stmt)
+    locstk = [(filename(e), source_location(e)[1]) for e in flattened_provenance(stmt)]
+    for j in 1:length(locstk)
+        if j === 1 && current_codelocs_stack[j][1] != locstk[j][1]
+            # dilemma: the filename stack here shares no prefix with that of the
+            # previous statement, where differing filenames usually (j > 1) mean
+            # a different macro expansion has started at this statement.  guess
+            # that both files are the same, and inherit the previous filename.
+            locstk[j] = (current_codelocs_stack[j][1], locstk[j][2])
+        end
+        if j < length(current_codelocs_stack) && (j === length(locstk) ||
+                current_codelocs_stack[j+1][1] != locstk[j+1][1])
+            while j < length(current_codelocs_stack)
+                info = pop!(current_codelocs_stack)
+                push!(last(current_codelocs_stack)[2], info)
+            end
+        elseif j > length(current_codelocs_stack)
+            push!(current_codelocs_stack, (locstk[j][1], [], Vector{Int32}()))
+        end
+    end
+    @assert length(locstk) === length(current_codelocs_stack)
+    for (j, (file,line)) in enumerate(locstk)
+        fn, edges, codelocs = current_codelocs_stack[j]
+        @assert fn == file
+        if j < length(locstk)
+            edge_index = length(edges) + 1
+            edge_codeloc_index = fld1(length(current_codelocs_stack[j+1][3]) + 1, 3)
+        else
+            edge_index = 0
+            edge_codeloc_index = 0
+        end
+        push!(codelocs, line)
+        push!(codelocs, edge_index)
+        push!(codelocs, edge_codeloc_index)
+    end
+end
+
+function finish_ir_debug_info!(current_codelocs_stack)
+    while length(current_codelocs_stack) > 1
+        info = pop!(current_codelocs_stack)
+        push!(last(current_codelocs_stack)[2], info)
+    end
+
+    _compress_debuginfo(only(current_codelocs_stack))
+end
+
+# Convert SyntaxTree to the CodeInfo+Expr data structures understood by the
+# Julia runtime
+function to_code_info(ex::SyntaxTree, slots::Vector{Slot}, meta::CompileHints)
+    stmts = Any[]
+
+    current_codelocs_stack = ir_debug_info_state(ex)
+
+    nargs = sum((s.kind==:argument for s in slots), init=0)
+    slotnames = Vector{Symbol}(undef, length(slots))
+    slot_rename_inds = Dict{String,Int}()
+    slotflags = Vector{UInt8}(undef, length(slots))
+    for (i, slot) in enumerate(slots)
+        name = slot.name
+        # TODO: Do we actually want unique names here? The C code in
+        # `jl_new_code_info_from_ir` has logic to simplify gensym'd names and
+        # use the empty string for compiler-generated bindings.
+        ni = get(slot_rename_inds, name, 0)
+        slot_rename_inds[name] = ni + 1
+        if ni > 0
+            name = "$name@$ni"
+        end
+        sname = Symbol(name)
+        slotnames[i] = sname
+        slotflags[i] =                   # Inference          | Codegen
+            slot.is_read          << 3 | # SLOT_USED          | jl_vinfo_sa
+            slot.is_single_assign << 4 | # SLOT_ASSIGNEDONCE  | -
+            slot.is_maybe_undef   << 5 | # SLOT_USEDUNDEF     | jl_vinfo_usedundef
+            slot.is_called        << 6   # SLOT_CALLED        | -
+        if slot.is_nospecialize
+            # Ideally this should be a slot flag instead
+            add_ir_debug_info!(current_codelocs_stack, ex)
+            push!(stmts, Expr(:meta, :nospecialize, Core.SlotNumber(i)))
+        end
+    end
+
+    stmt_offset = length(stmts)
+    for stmt in children(ex)
+        push!(stmts, _to_lowered_expr(stmt, stmt_offset))
+        add_ir_debug_info!(current_codelocs_stack, stmt)
+    end
+
+    debuginfo = finish_ir_debug_info!(current_codelocs_stack)
+
+    has_image_globalref = any(codeinfo_has_image_globalref, stmts)
+
+    # TODO: Set ssaflags based on call site annotations:
+    # - @inbounds annotations
+    # - call site @inline / @noinline
+    # - call site @assume_effects
+    ssaflags = zeros(UInt32, length(stmts))
+
+    propagate_inbounds =
+        get(meta, :propagate_inbounds, false)
+    # TODO: Set true if there's a foreigncall
+    has_fcall = false
+    nospecializeinfer =
+        get(meta, :nospecializeinfer, false)
+    inlining =
+        get(meta, :inline, false) ? 0x01 :
+        get(meta, :noinline, false) ? 0x02 : 0x00
+    constprop =
+        get(meta, :aggressive_constprop, false) ? 0x01 :
+        get(meta, :no_constprop, false) ? 0x02 : 0x00
+    purity =
+        let eo = get(meta, :purity, nothing)
+            isnothing(eo) ? 0x0000 : Base.encode_effects_override(eo)
+        end
+
+    # The following CodeInfo fields always get their default values for
+    # uninferred code.
+    ssavaluetypes      = length(stmts) # Why does the runtime code do this?
+    slottypes          = nothing
+    parent             = nothing
+    method_for_inference_limit_heuristics = nothing
+    edges               = nothing
+    min_world           = Csize_t(1)
+    max_world           = typemax(Csize_t)
+    isva                = false
+    inlining_cost       = 0xffff
+    rettype             = Any
+
+    _CodeInfo(
+        stmts,
+        debuginfo,
+        ssavaluetypes,
+        ssaflags,
+        slotnames,
+        slotflags,
+        slottypes,
+        rettype,
+        parent,
+        edges,
+        min_world,
+        max_world,
+        method_for_inference_limit_heuristics,
+        nargs,
+        propagate_inbounds,
+        has_fcall,
+        has_image_globalref,
+        nospecializeinfer,
+        isva,
+        inlining,
+        constprop,
+        purity,
+        inlining_cost
+    )
+end
+
+@fzone "JL: to_lowered_expr" function to_lowered_expr(ex::SyntaxTree)
+    _to_lowered_expr(ex, 0)
+end
+
+function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int)
+    k = kind(ex)
+    if is_literal(k)
+        ex.value
+    elseif k == K"core"
+        name = ex.name_val
+        if name == "cglobal"
+            # Inference expects cglobal as call argument to be `GlobalRef`,
+            # so we resolve that name as a symbol of `Core.Intrinsics` here.
+            # https://github.com/JuliaLang/julia/blob/7a8cd6e202f1d1216a6c0c0b928fb43a123cada8/Compiler/src/validation.jl#L87
+            GlobalRef(Core.Intrinsics, :cglobal)
+        elseif name == "nothing"
+            # Translate Core.nothing into literal `nothing`s (flisp uses a
+            # special form (null) for this during desugaring, etc)
+            nothing
+        else
+            GlobalRef(Core, Symbol(name))
+        end
+    elseif k == K"top"
+        GlobalRef(Base, Symbol(ex.name_val))
+    elseif k == K"globalref"
+        GlobalRef(ex.mod, Symbol(ex.name_val))
+    elseif k == K"Identifier"
+        # Implicitly refers to name in parent module
+        # TODO: Should we even have plain identifiers at this point or should
+        # they all effectively be resolved into GlobalRef earlier?
+        Symbol(ex.name_val)
+    elseif k == K"SourceLocation"
+        QuoteNode(source_location(LineNumberNode, ex))
+    elseif k == K"Symbol"
+        QuoteNode(Symbol(ex.name_val))
+    elseif k == K"slot"
+        Core.SlotNumber(ex.var_id)
+    elseif k == K"static_parameter"
+        Expr(:static_parameter, ex.var_id)
+    elseif k == K"SSAValue"
+        Core.SSAValue(ex.var_id + stmt_offset)
+    elseif k == K"return"
+        Core.ReturnNode(_to_lowered_expr(ex[1], stmt_offset))
+    elseif k == K"inert"
+        e1 = ex[1]
+        getmeta(ex, :as_Expr, false) ? QuoteNode(Expr(e1)) : e1
+    elseif k == K"code_info"
+        ir = to_code_info(ex[1], ex.slots, ex.meta)
+        if ex.is_toplevel_thunk
+            Expr(:thunk, ir) # TODO: Maybe nice to just return a CodeInfo here?
+        else
+            ir
+        end
+    elseif k == K"Value"
+        ex.value isa LineNumberNode ? QuoteNode(ex.value) : ex.value
+    elseif k == K"goto"
+        Core.GotoNode(ex[1].id + stmt_offset)
+    elseif k == K"gotoifnot"
+        Core.GotoIfNot(_to_lowered_expr(ex[1], stmt_offset), ex[2].id + stmt_offset)
+    elseif k == K"enter"
+        catch_idx = ex[1].id
+        numchildren(ex) == 1 ?
+            Core.EnterNode(catch_idx) :
+            Core.EnterNode(catch_idx, _to_lowered_expr(ex[2], stmt_offset))
+    elseif k == K"method"
+        cs = map(e->_to_lowered_expr(e, stmt_offset), children(ex))
+        # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations
+        cs1 = cs[1]
+        c1 = cs1 isa QuoteNode ? cs1.value : cs1
+        Expr(:method, c1, cs[2:end]...)
+    elseif k == K"newvar"
+        Core.NewvarNode(_to_lowered_expr(ex[1], stmt_offset))
+    elseif k == K"opaque_closure_method"
+        args = map(e->_to_lowered_expr(e, stmt_offset), children(ex))
+        # opaque_closure_method has special non-evaluated semantics for the
+        # `functionloc` line number node so we need to undo a level of quoting
+        arg4 = args[4]
+        @assert arg4 isa QuoteNode
+        args[4] = arg4.value
+        Expr(:opaque_closure_method, args...)
+    elseif k == K"meta"
+        args = Any[_to_lowered_expr(e, stmt_offset) for e in children(ex)]
+        # Unpack K"Symbol" QuoteNode as `Expr(:meta)` requires an identifier here.
+        arg1 = args[1]
+        @assert arg1 isa QuoteNode
+        args[1] = arg1.value
+        Expr(:meta, args...)
+    elseif k == K"static_eval"
+        @assert numchildren(ex) == 1
+        _to_lowered_expr(ex[1], stmt_offset)
+    elseif k == K"cfunction"
+        args = Any[_to_lowered_expr(e, stmt_offset) for e in children(ex)]
+        if kind(ex[2]) == K"static_eval"
+            args[2] = QuoteNode(args[2])
+        end
+        Expr(:cfunction, args...)
+    else
+        # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/
+        #
+        # call invoke static_parameter `=` method struct_type abstract_type
+        # primitive_type global const new splatnew isdefined
+        # enter leave pop_exception inbounds boundscheck loopinfo copyast meta
+        # lambda
+        head = k == K"call"      ? :call       :
+               k == K"new"       ? :new        :
+               k == K"splatnew"  ? :splatnew   :
+               k == K"="         ? :(=)        :
+               k == K"leave"     ? :leave      :
+               k == K"isdefined" ? :isdefined  :
+               k == K"latestworld"       ? :latestworld       :
+               k == K"pop_exception"     ? :pop_exception     :
+               k == K"captured_local"    ? :captured_local    :
+               k == K"gc_preserve_begin" ? :gc_preserve_begin :
+               k == K"gc_preserve_end"   ? :gc_preserve_end   :
+               k == K"foreigncall"       ? :foreigncall       :
+               k == K"new_opaque_closure" ? :new_opaque_closure :
+               nothing
+        if isnothing(head)
+            throw(LoweringError(ex, "Unhandled form for kind $k"))
+        end
+        ret = Expr(head)
+        for e in children(ex)
+            push!(ret.args, _to_lowered_expr(e, stmt_offset))
+        end
+        return ret
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Our version of eval - should be upstreamed though?
+@fzone "JL: eval" function eval(mod::Module, ex::SyntaxTree;
+                                macro_world::UInt=Base.get_world_counter(),
+                                opts...)
+    iter = lower_init(ex; opts...)
+    _eval(mod, iter)
+end
+
+# Version of eval() taking `Expr` (or Expr tree leaves of any type)
+function eval(mod::Module, ex; opts...)
+    eval(mod, expr_to_syntaxtree(ex); opts...)
+end
+
+function _eval(mod, iter)
+    modules = Module[mod]
+    result = nothing
+    while true
+        thunk = lower_step(iter, modules[end])::Core.SimpleVector
+        type = thunk[1]::Symbol
+        if type == :done
+            break
+        elseif type == :begin_module
+            filename = something(thunk[5].file, :none)
+            mod = @ccall jl_begin_new_module(
+                modules[end]::Any, thunk[3]::Symbol, thunk[2]::Any, thunk[4]::Cint,
+                filename::Cstring, thunk[5].line::Cint)::Module
+            push!(modules, mod)
+        elseif type == :end_module
+            @ccall jl_end_new_module(modules[end]::Module)::Cvoid
+            result = pop!(modules)
+        else
+            @assert type == :thunk
+            result = Core.eval(modules[end], thunk[2])
+        end
+    end
+    @assert length(modules) === 1
+    return result
+end
+
+"""
+    include(mod::Module, path::AbstractString)
+
+Evaluate the contents of the input source file in the global scope of module
+`mod`. Every module (except those defined with baremodule) has its own
+definition of `include()` omitting the `mod` argument, which evaluates the file
+in that module. Returns the result of the last evaluated expression of the
+input file. During including, a task-local include path is set to the directory
+containing the file. Nested calls to include will search relative to that path.
+This function is typically used to load source interactively, or to combine
+files in packages that are broken into multiple source files.
+"""
+function include(mod::Module, path::AbstractString)
+    path, prev = Base._include_dependency(mod, path)
+    code = read(path, String)
+    tls = task_local_storage()
+    tls[:SOURCE_PATH] = path
+    try
+        return include_string(mod, code, path)
+    finally
+        if prev === nothing
+            delete!(tls, :SOURCE_PATH)
+        else
+            tls[:SOURCE_PATH] = prev
+        end
+    end
+end
+
+"""
+    include_string(mod::Module, code::AbstractString, filename::AbstractString="string")
+
+Like `include`, except reads code from the given string rather than from a file.
+"""
+function include_string(mod::Module, code::AbstractString, filename::AbstractString="string";
+                        expr_compat_mode=false)
+    eval(mod, parseall(SyntaxTree, code; filename=filename); expr_compat_mode)
+end
diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl
new file mode 100644
index 0000000000000..ca7ba9a0c3de1
--- /dev/null
+++ b/JuliaLowering/src/hooks.jl
@@ -0,0 +1,62 @@
+"""
+Becomes `Core._lower()` upon activating JuliaLowering.
+
+Returns an svec with the lowered code (usually expr) as its first element, and
+(until integration is less experimental) whatever we want after it
+"""
+function core_lowering_hook(@nospecialize(code), mod::Module,
+                            file="none", line=0, world=typemax(Csize_t), warn=false)
+    if !(code isa SyntaxTree || code isa Expr)
+        # e.g. LineNumberNode, integer...
+        return Core.svec(code)
+    end
+
+    # TODO: fix in base
+    file = file isa Ptr{UInt8} ? unsafe_string(file) : file
+    line = !(line isa Int) ? Int(line) : line
+
+    local st0 = nothing
+    try
+        st0 = code isa Expr ? expr_to_syntaxtree(code, LineNumberNode(line, file)) : code
+        if kind(st0) in KSet"toplevel module"
+            return Core.svec(code)
+        elseif kind(st0) === K"doc" && numchildren(st0) >= 2 && kind(st0[2]) === K"module"
+            # TODO: this ignores module docstrings for now
+            return Core.svec(Expr(st0[2]))
+        end
+        ctx1, st1 = expand_forms_1(  mod,  st0, true, world)
+        ctx2, st2 = expand_forms_2(  ctx1, st1)
+        ctx3, st3 = resolve_scopes(  ctx2, st2)
+        ctx4, st4 = convert_closures(ctx3, st3)
+        ctx5, st5 = linearize_ir(    ctx4, st4)
+        ex = to_lowered_expr(st5)
+        return Core.svec(ex, st5, ctx5)
+    catch exc
+        @info("JuliaLowering threw given input:", code=code, st0=st0, file=file, line=line, mod=mod)
+        rethrow(exc)
+
+        # TODO: Re-enable flisp fallback once we're done collecting errors
+        # @error("JuliaLowering failed — falling back to flisp!",
+        #        exception=(exc,catch_backtrace()),
+        #        code=code, file=file, line=line, mod=mod)
+        # return Base.fl_lower(code, mod, file, line, world, warn)
+    end
+end
+
+# TODO: Write a parser hook here.  The input to `core_lowering_hook` should
+# eventually be a (convertible to) SyntaxTree, but we need to make updates to
+# the parsing API to include a parameter for AST type.
+
+const _has_v1_13_hooks = isdefined(Core, :_lower)
+
+function activate!(enable=true)
+    if !_has_v1_13_hooks
+        error("Cannot use JuliaLowering without `Core._lower` binding or in $VERSION < 1.13")
+    end
+
+    if enable
+        Core._setlowerer!(core_lowering_hook)
+    else
+        Core._setlowerer!(Base.fl_lower)
+    end
+end
diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl
new file mode 100644
index 0000000000000..be88e1b0e41e4
--- /dev/null
+++ b/JuliaLowering/src/kinds.jl
@@ -0,0 +1,170 @@
+# The following kinds are used in intermediate forms by lowering but are not
+# part of the surface syntax
+function _register_kinds()
+    JuliaSyntax.register_kinds!(JuliaLowering, 1, [
+        # "Syntax extensions" - expression kinds emitted by macros or macro
+        # expansion, and known to lowering. These are part of the AST API but
+        # without having surface syntax.
+        "BEGIN_EXTENSION_KINDS"
+            # atomic fields or accesses (see `@atomic`)
+            "atomic"
+            # Flag for @generated parts of a function
+            "generated"
+            # Temporary rooting of identifiers (GC.@preserve)
+            "gc_preserve"
+            "gc_preserve_begin"
+            "gc_preserve_end"
+            # A literal Julia value of any kind, as might be inserted into the
+            # AST during macro expansion
+            "Value"
+            # A (quoted) `Symbol`
+            "Symbol"
+            # QuoteNode; not quasiquote
+            "inert"
+            # TODO: Use `meta` for inbounds and loopinfo etc?
+            "inbounds"
+            "boundscheck"
+            "inline"
+            "noinline"
+            "loopinfo"
+            # Call into foreign code. Emitted by `@ccall`
+            "foreigncall"
+            # Special form for constructing a function callable from C
+            "cfunction"
+            # Special form emitted by `Base.Experimental.@opaque`
+            "opaque_closure"
+            # Test whether a variable is defined
+            "isdefined"
+            # [K"throw_undef_if_not" var cond]
+            # This form is used internally in Core.Compiler but might be
+            # emitted by packages such as Diffractor. In principle it needs to
+            # be passed through lowering in a similar way to `isdefined`
+            "throw_undef_if_not"
+            # named labels for `@label` and `@goto`
+            "symbolic_label"
+            # Goto named label
+            "symbolic_goto"
+            # Internal initializer for struct types, for inner constructors/functions
+            "new"
+            "splatnew"
+            # Used for converting `esc()`'d expressions arising from old macro
+            # invocations during macro expansion (gone after macro expansion)
+            "escape"
+            # Used for converting the old-style macro hygienic-scope form (gone
+            # after macro expansion)
+            "hygienic_scope"
+            # An expression which will eventually be evaluated "statically" in
+            # the context of a CodeInfo and thus allows access only to globals
+            # and static parameters. Used for ccall, cfunction, cglobal
+            # TODO: Use this for GeneratedFunctionStub also?
+            "static_eval"
+            # Catch-all for additional syntax extensions without the need to
+            # extend `Kind`. Known extensions include:
+            #   locals, islocal, isglobal
+            # The content of an assertion is not considered to be quoted, so
+            # use K"Symbol" or K"inert" inside where necessary.
+            "extension"
+        "END_EXTENSION_KINDS"
+
+        # The following kinds are internal to lowering
+        "BEGIN_LOWERING_KINDS"
+            # Semantic assertions used by lowering. The content of an assertion
+            # is not considered to be quoted, so use K"Symbol" etc inside where necessary.
+            "assert"
+            # Unique identifying integer for bindings (of variables, constants, etc)
+            "BindingId"
+            # Various heads harvested from flisp lowering.
+            # (TODO: May or may not need all these - assess later)
+            "break_block"
+            # Like block, but introduces a lexical scope; used during scope resolution.
+            "scope_block"
+            # [K"always_defined" x] is an assertion that variable `x` is assigned before use
+            # ('local-def in flisp implementation is K"local" plus K"always_defined"
+            "always_defined"
+            "_while"
+            "_do_while"
+            "_typevars" # used for supplying already-allocated `TypeVar`s to `where`
+            "with_static_parameters"
+            "top"
+            "core"
+            "lambda"
+            # "A source location literal" - a node which exists only to record
+            # a sourceref
+            "SourceLocation"
+            # [K"function_decl" name]
+            # Declare a zero-method generic function with global `name` or
+            # creates a closure object and assigns it to the local `name`.
+            "function_decl"
+            # [K"function_type name]
+            # Evaluates to the type of the function or closure with given `name`
+            "function_type"
+            # [K"method_defs" name block]
+            # The code in `block` defines methods for generic function `name`
+            "method_defs"
+            # The code in `block` defines methods for generic function `name`
+            "_opaque_closure"
+            # The enclosed statements must be executed at top level
+            "toplevel_butfirst"
+            "assign_or_constdecl_if_global"
+            "moved_local"
+            "label"
+            "trycatchelse"
+            "tryfinally"
+            # The contained block of code causes no side effects and can be
+            # removed by a later lowering pass if its value isn't used.
+            # (That is, it's removable in the same sense as
+            #  `@assume_effects :removable`.)
+            "removable"
+            # Variable type declaration; `x::T = rhs` will be temporarily
+            # desugared to include `(decl x T)`
+            "decl"
+            # [K"captured_local" index]
+            # A local variable captured into a global method. Contains the
+            # `index` of the associated `Box` in the rewrite list.
+            "captured_local"
+            # Causes the linearization pass to conditionally emit a world age increment
+            "latestworld_if_toplevel"
+            # This has two forms:
+            #   [K"constdecl" var val] => declare and assign constant
+            #   [K"constdecl" var]     => declare undefined constant
+            #                             var is GlobalRef Value or Identifier
+            "constdecl"
+            # Returned from statements that should error if the result is used.
+            "unused_only"
+        "END_LOWERING_KINDS"
+
+        # The following kinds are emitted by lowering and used in Julia's untyped IR
+        "BEGIN_IR_KINDS"
+            # Identifier for a value which is only assigned once
+            "SSAValue"
+            # Local variable in a `CodeInfo` code object (including lambda arguments)
+            "slot"
+            # Static parameter to a `CodeInfo` code object ("type parameters" to methods)
+            "static_parameter"
+            # References/declares a global variable within a module
+            "globalref"
+            # Unconditional goto
+            "goto"
+            # Conditional goto
+            "gotoifnot"
+            # Exception handling
+            "enter"
+            "leave"
+            "pop_exception"
+            # Lowering targets for method definitions arising from `function` etc
+            "method"
+            # (re-)initialize a slot to undef
+            # See Core.NewvarNode
+            "newvar"
+            # Result of lowering a `K"lambda"` after bindings have been
+            # converted to slot/globalref/SSAValue.
+            "code_info"
+            # Internal initializer for opaque closures
+            "new_opaque_closure"
+            # Wrapper for the lambda of around opaque closure methods
+            "opaque_closure_method"
+            # World age increment (TODO: use top level assertion and only one latestworld kind)
+            "latestworld"
+        "END_IR_KINDS"
+    ])
+end
diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl
new file mode 100644
index 0000000000000..974c51902caf5
--- /dev/null
+++ b/JuliaLowering/src/linear_ir.jl
@@ -0,0 +1,1144 @@
+#-------------------------------------------------------------------------------
+# Lowering pass 5: Flatten to linear IR
+
+function is_valid_ir_argument(ctx, ex)
+    k = kind(ex)
+    if is_simple_atom(ctx, ex) || k in KSet"inert top core quote static_eval"
+        true
+    elseif k == K"BindingId"
+        binfo = lookup_binding(ctx, ex)
+        bk = binfo.kind
+        bk === :slot
+        # TODO: We should theoretically be able to allow `bk ===
+        # :static_parameter` for slightly more compact IR, but it's uncertain
+        # what the compiler is built to tolerate.  Notably, flisp allows
+        # static_parameter, but doesn't produce this form until a later pass, so
+        # it doesn't end up in the IR.
+    else
+        false
+    end
+end
+
+function is_ssa(ctx, ex)
+    kind(ex) == K"BindingId" && lookup_binding(ctx, ex).is_ssa
+end
+
+# Target to jump to, including info on try handler nesting and catch block
+# nesting
+struct JumpTarget{GraphType}
+    label::SyntaxTree{GraphType}
+    handler_token_stack::SyntaxList{GraphType, Vector{NodeId}}
+    catch_token_stack::SyntaxList{GraphType, Vector{NodeId}}
+end
+
+function JumpTarget(label::SyntaxTree{GraphType}, ctx) where {GraphType}
+    JumpTarget{GraphType}(label, copy(ctx.handler_token_stack), copy(ctx.catch_token_stack))
+end
+
+struct JumpOrigin{GraphType}
+    goto::SyntaxTree{GraphType}
+    index::Int
+    handler_token_stack::SyntaxList{GraphType, Vector{NodeId}}
+    catch_token_stack::SyntaxList{GraphType, Vector{NodeId}}
+end
+
+function JumpOrigin(goto::SyntaxTree{GraphType}, index, ctx) where {GraphType}
+    JumpOrigin{GraphType}(goto, index, copy(ctx.handler_token_stack), copy(ctx.catch_token_stack))
+end
+
+struct FinallyHandler{GraphType}
+    tagvar::SyntaxTree{GraphType}
+    target::JumpTarget{GraphType}
+    exit_actions::Vector{Tuple{Symbol,Union{Nothing,SyntaxTree{GraphType}}}}
+end
+
+function FinallyHandler(tagvar::SyntaxTree{GraphType}, target::JumpTarget) where {GraphType}
+    FinallyHandler{GraphType}(tagvar, target,
+        Vector{Tuple{Symbol, Union{Nothing,SyntaxTree{GraphType}}}}())
+end
+
+
+"""
+Context for creating linear IR.
+
+One of these is created per lambda expression to flatten the body down to
+a sequence of statements (linear IR), which eventually becomes one CodeInfo.
+"""
+struct LinearIRContext{GraphType} <: AbstractLoweringContext
+    graph::GraphType
+    code::SyntaxList{GraphType, Vector{NodeId}}
+    bindings::Bindings
+    next_label_id::Ref{Int}
+    is_toplevel_thunk::Bool
+    lambda_bindings::LambdaBindings
+    return_type::Union{Nothing, SyntaxTree{GraphType}}
+    break_targets::Dict{String, JumpTarget{GraphType}}
+    handler_token_stack::SyntaxList{GraphType, Vector{NodeId}}
+    catch_token_stack::SyntaxList{GraphType, Vector{NodeId}}
+    finally_handlers::Vector{FinallyHandler{GraphType}}
+    symbolic_jump_targets::Dict{String,JumpTarget{GraphType}}
+    symbolic_jump_origins::Vector{JumpOrigin{GraphType}}
+    meta::Dict{Symbol, Any}
+    mod::Module
+end
+
+function LinearIRContext(ctx, is_toplevel_thunk, lambda_bindings, return_type)
+    graph = syntax_graph(ctx)
+    rett = isnothing(return_type) ? nothing : reparent(graph, return_type)
+    GraphType = typeof(graph)
+    LinearIRContext(graph, SyntaxList(ctx), ctx.bindings, Ref(0),
+                    is_toplevel_thunk, lambda_bindings, rett,
+                    Dict{String,JumpTarget{GraphType}}(), SyntaxList(ctx), SyntaxList(ctx),
+                    Vector{FinallyHandler{GraphType}}(), Dict{String,JumpTarget{GraphType}}(),
+                    Vector{JumpOrigin{GraphType}}(), Dict{Symbol, Any}(), ctx.mod)
+end
+
+function current_lambda_bindings(ctx::LinearIRContext)
+    ctx.lambda_bindings
+end
+
+function is_valid_body_ir_argument(ctx, ex)
+    if is_valid_ir_argument(ctx, ex)
+        true
+    elseif kind(ex) == K"BindingId"
+        binfo = lookup_binding(ctx, ex)
+        # Arguments are always defined
+        # TODO: use equiv of vinfo:never-undef when we have it
+        binfo.kind == :argument
+    else
+        false
+    end
+end
+
+function is_simple_arg(ctx, ex)
+    k = kind(ex)
+    return is_simple_atom(ctx, ex) || k == K"BindingId" || k == K"quote" || k == K"inert" ||
+           k == K"top" || k == K"core" || k == K"globalref" || k == K"static_eval"
+end
+
+function is_single_assign_var(ctx::LinearIRContext, ex)
+    kind(ex) == K"BindingId" || return false
+    binfo = lookup_binding(ctx, ex)
+    # Arguments are always single-assign
+    # TODO: Use equiv of vinfo:sa when we have it
+    return binfo.kind == :argument
+end
+
+function is_const_read_arg(ctx, ex)
+    k = kind(ex)
+    # Even if we have side effects, we know that singly-assigned
+    # locals cannot be affected by them so we can inline them anyway.
+    # TODO from flisp: "We could also allow const globals here"
+    return k == K"inert" || k == K"top" || k == K"core" || k == K"static_eval" ||
+        is_simple_atom(ctx, ex) || is_single_assign_var(ctx, ex)
+end
+
+function is_valid_ir_rvalue(ctx, lhs, rhs)
+    return is_ssa(ctx, lhs) ||
+           is_valid_ir_argument(ctx, rhs) ||
+           (kind(lhs) == K"BindingId" &&
+            # FIXME: add: invoke ?
+            kind(rhs) in KSet"new splatnew cfunction isdefined call foreigncall gc_preserve_begin foreigncall new_opaque_closure")
+end
+
+function check_no_local_bindings(ctx, ex, msg)
+    contains_nonglobal_binding = contains_unquoted(ex) do e
+        kind(e) == K"BindingId" && lookup_binding(ctx, e).kind !== :global
+    end
+    if contains_nonglobal_binding
+        throw(LoweringError(ex, msg))
+    end
+end
+
+# evaluate the arguments of a call, creating temporary locations as needed
+function compile_args(ctx, args)
+    # First check if all the arguments are simple (and therefore side-effect free).
+    # Otherwise, we need to use ssa values for all arguments to ensure proper
+    # left-to-right evaluation semantics.
+    all_simple = all(a->is_simple_arg(ctx, a), args)
+    args_out = SyntaxList(ctx)
+    for arg in args
+        arg_val = compile(ctx, arg, true, false)
+        if (all_simple || is_const_read_arg(ctx, arg_val)) && is_valid_body_ir_argument(ctx, arg_val)
+            push!(args_out, arg_val)
+        else
+            push!(args_out, emit_assign_tmp(ctx, arg_val))
+        end
+    end
+    return args_out
+end
+
+function emit(ctx::LinearIRContext, ex)
+    push!(ctx.code, ex)
+    return ex
+end
+
+function emit(ctx::LinearIRContext, srcref, k, args...)
+    emit(ctx, makenode(ctx, srcref, k, args...))
+end
+
+# Emit computation of ex, assigning the result to an ssavar and returning that
+function emit_assign_tmp(ctx::LinearIRContext, ex, name="tmp")
+    tmp = ssavar(ctx, ex, name)
+    emit(ctx, @ast ctx ex [K"=" tmp ex])
+    return tmp
+end
+
+function compile_pop_exception(ctx, srcref, src_tokens, dest_tokens)
+    # It's valid to leave the context of src_tokens for the context of
+    # dest_tokens when src_tokens is the same or nested within dest_tokens.
+    # It's enough to check the token on the top of the dest stack.
+    n = length(dest_tokens)
+    jump_ok = n == 0 || (n <= length(src_tokens) && dest_tokens[n].var_id == src_tokens[n].var_id)
+    jump_ok || throw(LoweringError(srcref, "Attempt to jump into catch block"))
+    if n < length(src_tokens)
+        @ast ctx srcref [K"pop_exception" src_tokens[n+1]]
+    else
+        nothing
+    end
+end
+
+function compile_leave_handler(ctx, srcref, src_tokens, dest_tokens)
+    n = length(dest_tokens)
+    jump_ok = n == 0 || (n <= length(src_tokens) && dest_tokens[n].var_id == src_tokens[n].var_id)
+    jump_ok || throw(LoweringError(srcref, "Attempt to jump into try block"))
+    if n < length(src_tokens)
+        @ast ctx srcref [K"leave" src_tokens[n+1:end]...]
+    else
+        nothing
+    end
+end
+
+function emit_pop_exception(ctx::LinearIRContext, srcref, dest_tokens)
+    pexc = compile_pop_exception(ctx, srcref, ctx.catch_token_stack, dest_tokens)
+    if !isnothing(pexc)
+        emit(ctx, pexc)
+    end
+end
+
+function emit_leave_handler(ctx::LinearIRContext, srcref, dest_tokens)
+    ex = compile_leave_handler(ctx, srcref, ctx.handler_token_stack, dest_tokens)
+    if !isnothing(ex)
+        emit(ctx, ex)
+    end
+end
+
+function emit_jump(ctx, srcref, target::JumpTarget)
+    emit_pop_exception(ctx, srcref, target.catch_token_stack)
+    emit_leave_handler(ctx, srcref, target.handler_token_stack)
+    emit(ctx, @ast ctx srcref [K"goto" target.label])
+end
+
+# Enter the current finally block, either through the landing pad (on_exit ==
+# :rethrow) or via a jump (on_exit ∈ (:return, :break)).
+#
+# An integer tag is created to identify the current code path and select the
+# on_exit action to be taken at finally handler exit.
+function enter_finally_block(ctx, srcref, on_exit, value)
+    @assert on_exit ∈ (:rethrow, :break, :return)
+    handler = last(ctx.finally_handlers)
+    push!(handler.exit_actions, (on_exit, value))
+    tag = length(handler.exit_actions)
+    emit(ctx, @ast ctx srcref [K"=" handler.tagvar tag::K"Integer"])
+    if on_exit != :rethrow
+        emit_jump(ctx, srcref, handler.target)
+    end
+end
+
+# Helper function for emit_return
+function _actually_return(ctx, ex)
+    # TODO: Handle the implicit return coverage hack for #53354 ?
+    rett = ctx.return_type
+    if !isnothing(rett)
+        ex = compile(ctx, convert_for_type_decl(ctx, rett, ex, rett, true), true, false)
+    end
+    simple_ret_val = isempty(ctx.catch_token_stack) ?
+        # returning lambda directly is needed for @generated
+        (is_valid_ir_argument(ctx, ex) || kind(ex) == K"lambda") :
+        is_simple_atom(ctx, ex)
+    if !simple_ret_val
+        ex = emit_assign_tmp(ctx, ex, "return_tmp")
+    end
+    emit_pop_exception(ctx, ex, ())
+    emit(ctx, @ast ctx ex [K"return" ex])
+    return nothing
+end
+
+function emit_return(ctx, srcref, ex)
+    # todo: Mark implicit returns
+    if isnothing(ex)
+        return
+    elseif isempty(ctx.handler_token_stack)
+        _actually_return(ctx, ex)
+        return
+    end
+    # TODO: What's this !is_ssa(ctx, ex) here about?
+    x = if is_simple_atom(ctx, ex) && !(is_ssa(ctx, ex) && !isempty(ctx.finally_handlers))
+        ex
+    elseif !isempty(ctx.finally_handlers)
+        # todo: Why does flisp lowering create a mutable variable here even
+        # though we don't mutate it?
+        # tmp = ssavar(ctx, srcref, "returnval_via_finally") # <- can we use this?
+        tmp = new_local_binding(ctx, srcref, "returnval_via_finally")
+        emit(ctx, @ast ctx srcref [K"=" tmp ex])
+        tmp
+    else
+        emit_assign_tmp(ctx, ex, "returnval_via_finally")
+    end
+    if !isempty(ctx.finally_handlers)
+        enter_finally_block(ctx, srcref, :return, x)
+    else
+        emit(ctx, @ast ctx srcref [K"leave" ctx.handler_token_stack...])
+        _actually_return(ctx, x)
+    end
+    return nothing
+end
+
+function emit_return(ctx, ex)
+    emit_return(ctx, ex, ex)
+end
+
+function emit_break(ctx, ex)
+    name = ex[1].name_val
+    target = get(ctx.break_targets, name, nothing)
+    if isnothing(target)
+        ty = name == "loop_exit" ? "break" : "continue"
+        throw(LoweringError(ex, "$ty must be used inside a `while` or `for` loop"))
+    end
+    if !isempty(ctx.finally_handlers)
+        handler = last(ctx.finally_handlers)
+        if length(target.handler_token_stack) < length(handler.target.handler_token_stack)
+            enter_finally_block(ctx, ex, :break, ex)
+            return
+        end
+    end
+    emit_jump(ctx, ex, target)
+end
+
+# `op` may be either K"=" (where global assignments are converted to setglobal!)
+# or K"constdecl".  flisp: emit-assignment-or-setglobal
+function emit_simple_assignment(ctx, srcref, lhs, rhs, op=K"=")
+    binfo = lookup_binding(ctx, lhs.var_id)
+    if binfo.kind == :global
+        emit(ctx, @ast ctx srcref [
+            K"call"
+            op == K"constdecl" ? "declare_const"::K"core" : "setglobal!"::K"core"
+            binfo.mod::K"Value"
+            binfo.name::K"Symbol"
+            rhs
+        ])
+    else
+        emit(ctx, srcref, op, lhs, rhs)
+    end
+end
+
+function emit_assignment(ctx, srcref, lhs, rhs, op=K"=")
+    if !isnothing(rhs)
+        if is_valid_ir_rvalue(ctx, lhs, rhs)
+            emit_simple_assignment(ctx, srcref, lhs, rhs, op)
+        else
+            r = emit_assign_tmp(ctx, rhs)
+            emit_simple_assignment(ctx, srcref, lhs, r, op)
+        end
+    else
+        # in unreachable code (such as after return); still emit the assignment
+        # so that the structure of those uses is preserved
+        emit_simple_assignment(ctx, srcref, lhs, nothing_(ctx, srcref), op)
+        nothing
+    end
+end
+
+function make_label(ctx, srcref)
+    id = ctx.next_label_id[]
+    ctx.next_label_id[] += 1
+    makeleaf(ctx, srcref, K"label", id=id)
+end
+
+# flisp: make&mark-label
+function emit_label(ctx, srcref)
+    if !isempty(ctx.code)
+        # Use current label if available
+        e = ctx.code[end]
+        if kind(e) == K"label"
+            return e
+        end
+    end
+    l = make_label(ctx, srcref)
+    emit(ctx, l)
+    l
+end
+
+function emit_latestworld(ctx, srcref)
+    (isempty(ctx.code) || kind(last(ctx.code)) != K"latestworld") &&
+        emit(ctx, makeleaf(ctx, srcref, K"latestworld"))
+end
+
+function compile_condition_term(ctx, ex)
+    cond = compile(ctx, ex, true, false)
+    if !is_valid_body_ir_argument(ctx, cond)
+        cond = emit_assign_tmp(ctx, cond)
+    end
+    return cond
+end
+
+# flisp: emit-cond
+function compile_conditional(ctx, ex, false_label)
+    if kind(ex) == K"block"
+        for i in 1:numchildren(ex)-1
+            compile(ctx, ex[i], false, false)
+        end
+        test = ex[end]
+    else
+        test = ex
+    end
+    k = kind(test)
+    if k == K"||"
+        true_label = make_label(ctx, test)
+        for (i,e) in enumerate(children(test))
+            c = compile_condition_term(ctx, e)
+            if i < numchildren(test)
+                next_term_label = make_label(ctx, test)
+                # Jump over short circuit
+                emit(ctx, @ast ctx e [K"gotoifnot" c next_term_label])
+                # Short circuit to true
+                emit(ctx, @ast ctx e [K"goto" true_label])
+                emit(ctx, next_term_label)
+            else
+                emit(ctx, @ast ctx e [K"gotoifnot" c false_label])
+            end
+        end
+        emit(ctx, true_label)
+    elseif k == K"&&"
+        for e in children(test)
+            c = compile_condition_term(ctx, e)
+            emit(ctx, @ast ctx e [K"gotoifnot" c false_label])
+        end
+    else
+        c = compile_condition_term(ctx, test)
+        emit(ctx, @ast ctx test [K"gotoifnot" c false_label])
+    end
+end
+
+# Lowering of exception handling must ensure that
+#
+# * Each `enter` is matched with a `leave` on every possible non-exceptional
+#   program path (including implicit returns generated in tail position).
+# * Each catch block which is entered and handles the exception - by exiting
+#   via a non-exceptional program path - leaves the block with `pop_exception`.
+# * Each `finally` block runs, regardless of any early `return` or jumps
+#   via `break`/`continue`/`goto` etc.
+#
+# These invariants are upheld by tracking the nesting using
+# `handler_token_stack` and `catch_token_stack` and using these when emitting
+# any control flow (return / goto) which leaves the associated block.
+#
+# The following special forms are emitted into the IR:
+#
+#   (= tok (enter catch_label dynscope))
+#     push exception handler with catch block at `catch_label` and dynamic
+#     scope `dynscope`, yielding a token which is used by `leave` and
+#     `pop_exception`. `dynscope` is only used in the special `tryfinally` form
+#     without associated source level syntax (see the `@with` macro)
+#
+#   (leave tok)
+#     pop exception handler back to the state of the `tok` from the associated
+#     `enter`. Multiple tokens can be supplied to pop multiple handlers using
+#     `(leave tok1 tok2 ...)`.
+#
+#   (pop_exception tok) - pop exception stack back to state of associated enter
+#
+# See the devdocs for further discussion.
+function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos)
+    @chk numchildren(ex) <= 3
+    try_block = ex[1]
+    if kind(ex) == K"trycatchelse"
+        catch_block = ex[2]
+        else_block = numchildren(ex) == 2 ? nothing : ex[3]
+        finally_block = nothing
+        catch_label = make_label(ctx, catch_block)
+    else
+        catch_block = nothing
+        else_block = nothing
+        finally_block = ex[2]
+        catch_label = make_label(ctx, finally_block)
+    end
+
+    end_label = !in_tail_pos || !isnothing(finally_block) ? make_label(ctx, ex) : nothing
+    try_result = needs_value && !in_tail_pos ? new_local_binding(ctx, ex, "try_result") : nothing
+
+    # Exception handler block prefix
+    handler_token = ssavar(ctx, ex, "handler_token")
+    emit(ctx, @ast ctx ex [K"="
+        handler_token
+        [K"enter" catch_label]  # TODO: dynscope
+    ])
+    if !isnothing(finally_block)
+        # TODO: Trivial finally block optimization from JuliaLang/julia#52593 (or
+        # support a special form for @with)?
+        finally_handler = FinallyHandler(new_local_binding(ctx, finally_block, "finally_tag"),
+                                         JumpTarget(end_label, ctx))
+        push!(ctx.finally_handlers, finally_handler)
+        emit(ctx, @ast ctx finally_block [K"=" finally_handler.tagvar (-1)::K"Integer"])
+    end
+    push!(ctx.handler_token_stack, handler_token)
+
+    # Try block code.
+    try_val = compile(ctx, try_block, needs_value, false)
+    # Exception handler block postfix
+    if isnothing(else_block)
+        if in_tail_pos
+            if !isnothing(try_val)
+                emit_return(ctx, try_val)
+            end
+        else
+            if needs_value && !isnothing(try_val)
+                emit_assignment(ctx, ex, try_result, try_val)
+            end
+            emit(ctx, @ast ctx ex [K"leave" handler_token])
+        end
+        pop!(ctx.handler_token_stack)
+    else
+        if !isnothing(try_val) && (in_tail_pos || needs_value)
+            emit(ctx, try_val) # TODO: Only for any side effects ?
+        end
+        emit(ctx, @ast ctx ex [K"leave" handler_token])
+        pop!(ctx.handler_token_stack)
+        # Else block code
+        else_val = compile(ctx, else_block, needs_value, in_tail_pos)
+        if !in_tail_pos
+            if needs_value && !isnothing(else_val)
+                emit_assignment(ctx, ex, try_result, else_val)
+            end
+        end
+    end
+    if !in_tail_pos
+        emit(ctx, @ast ctx ex [K"goto" end_label])
+    end
+
+    # Catch pad
+    # Emit either catch or finally block. A combined try/catch/finally block
+    # was split into separate trycatchelse and tryfinally blocks earlier.
+    emit(ctx, catch_label) # <- Exceptional control flow enters here
+    if !isnothing(finally_block)
+        # Attribute the postfix and prefix to the finally block as a whole.
+        srcref = finally_block
+        enter_finally_block(ctx, srcref, :rethrow, nothing)
+        emit(ctx, end_label) # <- Non-exceptional control flow enters here
+        pop!(ctx.finally_handlers)
+        compile(ctx, finally_block, false, false)
+        # Finally block postfix: Emit a branch for every code path which enters
+        # the block to dynamically decide which return/break/rethrow exit action to take
+        for (tag, (on_exit, value)) in Iterators.reverse(enumerate(finally_handler.exit_actions))
+            next_action_label = !in_tail_pos || tag != 1 || on_exit != :return ?
+                make_label(ctx, srcref) : nothing
+            if !isnothing(next_action_label)
+                next_action_label = make_label(ctx, srcref)
+                tmp = ssavar(ctx, srcref, "do_finally_action")
+                emit(ctx, @ast ctx srcref [K"=" tmp
+                    [K"call"
+                        "==="::K"core"
+                        finally_handler.tagvar
+                        tag::K"Integer"
+                    ]
+                ])
+                emit(ctx, @ast ctx srcref [K"gotoifnot" tmp next_action_label])
+            end
+            if on_exit === :return
+                emit_return(ctx, value)
+            elseif on_exit === :break
+                emit_break(ctx, value)
+            elseif on_exit === :rethrow
+                emit(ctx, @ast ctx srcref [K"call" "rethrow"::K"top"])
+            else
+                @assert false
+            end
+            if !isnothing(next_action_label)
+                emit(ctx, next_action_label)
+            end
+        end
+    else
+        push!(ctx.catch_token_stack, handler_token)
+        catch_val = compile(ctx, catch_block, needs_value, in_tail_pos)
+        if !isnothing(try_result) && !isnothing(catch_val)
+            emit_assignment(ctx, ex, try_result, catch_val)
+        end
+        if !in_tail_pos
+            emit(ctx, @ast ctx ex [K"pop_exception" handler_token])
+            emit(ctx, end_label)
+        else
+            # (pop_exception done in emit_return)
+        end
+        pop!(ctx.catch_token_stack)
+    end
+    try_result
+end
+
+# This pass behaves like an interpreter on the given code.
+# To perform stateful operations, it calls `emit` to record that something
+# needs to be done. In value position, it returns an expression computing
+# the needed value.
+function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos)
+    k = kind(ex)
+    if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" ||
+            k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" ||
+            k == K"SourceLocation" || k == K"static_eval"
+        if in_tail_pos
+            emit_return(ctx, ex)
+        elseif needs_value
+            ex
+        else
+            if k == K"BindingId" && !is_ssa(ctx, ex)
+                emit(ctx, ex) # keep identifiers for undefined-var checking
+            end
+            nothing
+        end
+    elseif k == K"Placeholder"
+        if needs_value
+            throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions"))
+        end
+        nothing
+    elseif k == K"TOMBSTONE"
+        @chk !needs_value (ex,"TOMBSTONE encountered in value position")
+        nothing
+    elseif k == K"call" || k == K"new" || k == K"splatnew" || k == K"foreigncall" ||
+            k == K"new_opaque_closure" || k == K"cfunction"
+        callex = makenode(ctx, ex, k, compile_args(ctx, children(ex)))
+        if in_tail_pos
+            emit_return(ctx, ex, callex)
+        elseif needs_value
+            callex
+        else
+            emit(ctx, callex)
+            nothing
+        end
+    elseif k == K"=" || k == K"constdecl"
+        lhs = ex[1]
+        res = if kind(lhs) == K"Placeholder"
+            compile(ctx, ex[2], needs_value, in_tail_pos)
+        elseif k == K"constdecl" && numchildren(ex) == 1
+            # No RHS - make undefined constant
+            mod, name = if kind(ex[1]) == K"BindingId"
+                binfo = lookup_binding(ctx, ex[1])
+                binfo.mod, binfo.name
+            else
+                @assert kind(ex[1]) == K"Value" && typeof(ex[1].value) === GlobalRef
+                gr = ex[1].value
+                gr.mod, String(gr.name)
+            end
+            emit(ctx, @ast ctx ex [K"call" "declare_const"::K"core"
+                                   mod::K"Value" name::K"Symbol"])
+        else
+            rhs = compile(ctx, ex[2], true, false)
+            # TODO look up arg-map for renaming if lhs was reassigned
+            if needs_value && !isnothing(rhs)
+                r = emit_assign_tmp(ctx, rhs)
+                emit_simple_assignment(ctx, ex, lhs, r, k)
+                if in_tail_pos
+                    emit_return(ctx, ex, r)
+                else
+                    r
+                end
+            else
+                emit_assignment(ctx, ex, lhs, rhs, k)
+            end
+        end
+        k == K"constdecl" && emit_latestworld(ctx, ex)
+        res
+    elseif k == K"block" || k == K"scope_block"
+        nc = numchildren(ex)
+        if nc == 0
+            if in_tail_pos
+                emit_return(ctx, nothing_(ctx, ex))
+            elseif needs_value
+                nothing_(ctx, ex)
+            else
+                nothing
+            end
+        else
+            res = nothing
+            for i in 1:nc
+                islast = i == nc
+                res = compile(ctx, ex[i], islast && needs_value, islast && in_tail_pos)
+            end
+            res
+        end
+    elseif k == K"break_block"
+        end_label = make_label(ctx, ex)
+        name = ex[1].name_val
+        outer_target = get(ctx.break_targets, name, nothing)
+        ctx.break_targets[name] = JumpTarget(end_label, ctx)
+        compile(ctx, ex[2], false, false)
+        if isnothing(outer_target)
+            delete!(ctx.break_targets, name)
+        else
+            ctx.break_targets[name] = outer_target
+        end
+        emit(ctx, end_label)
+        if needs_value
+            compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos)
+        end
+    elseif k == K"break"
+        emit_break(ctx, ex)
+    elseif k == K"symbolic_label"
+        label = emit_label(ctx, ex)
+        name = ex.name_val
+        if haskey(ctx.symbolic_jump_targets, name)
+            throw(LoweringError(ex, "Label `$name` defined multiple times"))
+        end
+        push!(ctx.symbolic_jump_targets, name=>JumpTarget(label, ctx))
+        if in_tail_pos
+            emit_return(ctx, ex, nothing_(ctx, ex))
+        elseif needs_value
+            throw(LoweringError(ex, "misplaced label in value position"))
+        end
+    elseif k == K"symbolic_goto"
+        push!(ctx.symbolic_jump_origins, JumpOrigin(ex, length(ctx.code)+1, ctx))
+        emit(ctx, makeleaf(ctx, ex, K"TOMBSTONE")) # ? pop_exception
+        emit(ctx, makeleaf(ctx, ex, K"TOMBSTONE")) # ? leave
+        emit(ctx, makeleaf(ctx, ex, K"TOMBSTONE")) # ? goto
+        nothing
+    elseif k == K"return"
+        compile(ctx, ex[1], true, true)
+        nothing
+    elseif k == K"removable"
+        if needs_value
+            compile(ctx, ex[1], needs_value, in_tail_pos)
+        else
+            nothing
+        end
+    elseif k == K"if" || k == K"elseif"
+        @chk numchildren(ex) <= 3
+        has_else = numchildren(ex) > 2
+        else_label = make_label(ctx, ex)
+        compile_conditional(ctx, ex[1], else_label)
+        if in_tail_pos
+            compile(ctx, ex[2], needs_value, in_tail_pos)
+            emit(ctx, else_label)
+            if has_else
+                compile(ctx, ex[3], needs_value, in_tail_pos)
+            else
+                emit_return(ctx, ex, nothing_(ctx, ex))
+            end
+            nothing
+        else
+            val = needs_value && new_local_binding(ctx, ex, "if_val")
+            v1 = compile(ctx, ex[2], needs_value, in_tail_pos)
+            if needs_value
+                emit_assignment(ctx, ex, val, v1)
+            end
+            if has_else || needs_value
+                end_label = make_label(ctx, ex)
+                emit(ctx, @ast ctx ex [K"goto" end_label])
+            else
+                end_label = nothing
+            end
+            emit(ctx, else_label)
+            v2 = if has_else
+                compile(ctx, ex[3], needs_value, in_tail_pos)
+            elseif needs_value
+                nothing_(ctx, ex)
+            end
+            if needs_value
+                emit_assignment(ctx, ex, val, v2)
+            end
+            if !isnothing(end_label)
+                emit(ctx, end_label)
+            end
+            val
+        end
+    elseif k == K"trycatchelse" || k == K"tryfinally"
+        compile_try(ctx, ex, needs_value, in_tail_pos)
+    elseif k == K"method"
+        # TODO
+        # throw(LoweringError(ex,
+        #     "Global method definition needs to be placed at the top level, or use `eval`"))
+        res = if numchildren(ex) == 1
+            if in_tail_pos
+                emit_return(ctx, ex)
+            elseif needs_value
+                ex
+            else
+                emit(ctx, ex)
+            end
+        else
+            @chk numchildren(ex) == 3
+            fname = ex[1]
+            sig = compile(ctx, ex[2], true, false)
+            if !is_valid_ir_argument(ctx, sig)
+                sig = emit_assign_tmp(ctx, sig)
+            end
+            lam = ex[3]
+            if kind(lam) == K"lambda"
+                lam = compile_lambda(ctx, lam)
+            else
+                lam = emit_assign_tmp(ctx, compile(ctx, lam, true, false))
+            end
+            emit(ctx, ex, K"method", fname, sig, lam)
+            @assert !needs_value && !in_tail_pos
+            nothing
+        end
+        emit_latestworld(ctx, ex)
+        res
+    elseif k == K"opaque_closure_method"
+        @ast ctx ex [K"opaque_closure_method"
+            ex[1]
+            ex[2]
+            ex[3]
+            ex[4]
+            compile_lambda(ctx, ex[5])
+        ]
+    elseif k == K"lambda"
+        lam = compile_lambda(ctx, ex)
+        if in_tail_pos
+            emit_return(ctx, lam)
+        elseif needs_value
+            lam
+        else
+            emit(ctx, lam)
+        end
+    elseif k == K"gc_preserve_begin"
+        makenode(ctx, ex, k, compile_args(ctx, children(ex)))
+    elseif k == K"gc_preserve_end"
+        if needs_value
+            throw(LoweringError(ex, "misplaced kind $k in value position"))
+        end
+        emit(ctx, ex)
+        nothing
+    elseif k == K"meta"
+        @chk numchildren(ex) >= 1
+        if ex[1].name_val in ("inline", "noinline", "propagate_inbounds",
+                              "nospecializeinfer", "aggressive_constprop", "no_constprop")
+            for c in children(ex)
+                ctx.meta[Symbol(c.name_val)] = true
+            end
+        elseif ex[1].name_val === "purity"
+            ctx.meta[Symbol(ex[1].name_val)] = ex[2].value::Base.EffectsOverride
+        else
+            emit(ctx, ex)
+        end
+        if needs_value
+            val = @ast ctx ex "nothing"::K"core"
+            if in_tail_pos
+                emit_return(ctx, val)
+            else
+                val
+            end
+        end
+    elseif k == K"_while"
+        end_label = make_label(ctx, ex)
+        top_label = emit_label(ctx, ex)
+        compile_conditional(ctx, ex[1], end_label)
+        compile(ctx, ex[2], false, false)
+        emit(ctx, @ast ctx ex [K"goto" top_label])
+        emit(ctx, end_label)
+        if needs_value
+            compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos)
+        end
+    elseif k == K"_do_while"
+        end_label = make_label(ctx, ex)
+        top_label = emit_label(ctx, ex)
+        compile(ctx, ex[1], false, false)
+        compile_conditional(ctx, ex[2], end_label)
+        emit(ctx, @ast ctx ex [K"goto" top_label])
+        emit(ctx, end_label)
+        if needs_value
+            compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos)
+        end
+    elseif k == K"isdefined" || k == K"captured_local" || k == K"throw_undef_if_not" ||
+            k == K"boundscheck"
+        if in_tail_pos
+            emit_return(ctx, ex)
+        elseif needs_value
+            ex
+        end
+    elseif k == K"newvar"
+        @assert !needs_value
+        is_duplicate = !isempty(ctx.code) &&
+            (e = last(ctx.code); kind(e) == K"newvar" && e[1].var_id == ex[1].var_id)
+        if !is_duplicate
+            # TODO: also exclude deleted vars
+            emit(ctx, ex)
+        end
+    elseif k == K"latestworld"
+        if needs_value
+            throw(LoweringError(ex, "misplaced latestsworld"))
+        end
+        emit_latestworld(ctx, ex)
+    elseif k == K"latestworld_if_toplevel"
+        ctx.is_toplevel_thunk && emit_latestworld(ctx, ex)
+    elseif k == K"unused_only"
+        if needs_value && !(in_tail_pos && ctx.is_toplevel_thunk)
+            throw(LoweringError(ex,
+                "global declaration doesn't read the variable and can't return a value"))
+        end
+        compile(ctx, ex[1], needs_value, in_tail_pos)
+    else
+        throw(LoweringError(ex, "Invalid syntax; $(repr(k))"))
+    end
+end
+
+function _remove_vars_with_isdefined_check!(vars, ex)
+    if is_leaf(ex) || is_quoted(ex) || kind(ex) == K"static_eval"
+        return
+    elseif kind(ex) == K"isdefined"
+        delete!(vars, ex[1].var_id)
+    else
+        for e in children(ex)
+            _remove_vars_with_isdefined_check!(vars, e)
+        end
+    end
+end
+
+# Find newvar nodes that are unnecessary because
+# 1. The variable is not captured and
+# 2. The variable is assigned before any branches.
+#
+# This is used to remove newvar nodes that are not needed for re-initializing
+# variables to undefined (see Julia issue #11065). It doesn't look for variable
+# *uses*, because any variables used-before-def that also pass this test are
+# *always* used undefined, and therefore don't need to be reinitialized. The
+# one exception to that is `@isdefined`, which can observe an undefined
+# variable without throwing an error.
+function unnecessary_newvar_ids(ctx, stmts)
+    vars = Set{IdTag}()
+    ids_assigned_before_branch = Set{IdTag}()
+    for ex in stmts
+        _remove_vars_with_isdefined_check!(vars, ex)
+        k = kind(ex)
+        if k == K"newvar"
+            id = ex[1].var_id
+            if !lookup_binding(ctx, id).is_captured
+                push!(vars, id)
+            end
+        elseif k == K"goto" || k == K"gotoifnot" || (k == K"=" && kind(ex[2]) == K"enter")
+            empty!(vars)
+        elseif k == K"="
+            id = ex[1].var_id
+            if id in vars
+                delete!(vars, id)
+                push!(ids_assigned_before_branch, id)
+            end
+        end
+    end
+    ids_assigned_before_branch
+end
+
+# flisp: compile-body
+function compile_body(ctx, ex)
+    compile(ctx, ex, true, true)
+
+    # Fix up any symbolic gotos. (We can't do this earlier because the goto
+    # might precede the label definition in unstructured control flow.)
+    for origin in ctx.symbolic_jump_origins
+        name = origin.goto.name_val
+        target = get(ctx.symbolic_jump_targets, name, nothing)
+        if isnothing(target)
+            throw(LoweringError(origin.goto, "label `$name` referenced but not defined"))
+        end
+        i = origin.index
+        pop_ex = compile_pop_exception(ctx, origin.goto, origin.catch_token_stack,
+                                     target.catch_token_stack)
+        if !isnothing(pop_ex)
+            @assert kind(ctx.code[i]) == K"TOMBSTONE"
+            ctx.code[i] = pop_ex
+            i += 1
+        end
+        leave_ex = compile_leave_handler(ctx, origin.goto, origin.handler_token_stack,
+                                         target.handler_token_stack)
+        if !isnothing(leave_ex)
+            @assert kind(ctx.code[i]) == K"TOMBSTONE"
+            ctx.code[i] = leave_ex
+            i += 1
+        end
+        @assert kind(ctx.code[i]) == K"TOMBSTONE"
+        ctx.code[i] = @ast ctx origin.goto [K"goto" target.label]
+    end
+
+    # Filter out unnecessary newvar nodes
+    ids_assigned_before_branch = unnecessary_newvar_ids(ctx, ctx.code)
+    filter!(ctx.code) do ex
+        !(kind(ex) == K"newvar" && ex[1].var_id in ids_assigned_before_branch)
+    end
+end
+
+#-------------------------------------------------------------------------------
+
+# Recursively renumber an expression within linear IR
+# flisp: renumber-stuff
+function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex)
+    k = kind(ex)
+    if k == K"BindingId"
+        id = ex.var_id
+        if haskey(ssa_rewrites, id)
+            makeleaf(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[id])
+        else
+            new_id = get(slot_rewrites, id, nothing)
+            binfo = lookup_binding(ctx, id)
+            if !isnothing(new_id)
+                sk = binfo.kind == :local || binfo.kind == :argument ? K"slot"             :
+                     binfo.kind == :static_parameter                 ? K"static_parameter" :
+                     throw(LoweringError(ex, "Found unexpected binding of kind $(binfo.kind)"))
+                makeleaf(ctx, ex, sk; var_id=new_id)
+            else
+                if binfo.kind !== :global
+                    throw(LoweringError(ex, "Found unexpected binding of kind $(binfo.kind)"))
+                end
+                makeleaf(ctx, ex, K"globalref", binfo.name, mod=binfo.mod)
+            end
+        end
+    elseif k == K"meta" || k == K"static_eval"
+        # Somewhat-hack for Expr(:meta, :generated, gen) which has
+        # weird top-level semantics for `gen`, but we still need to translate
+        # the binding it contains to a globalref. (TODO: use
+        # static_eval for this meta, somehow)
+        mapchildren(ctx, ex) do e
+            _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, e)
+        end
+    elseif is_literal(k) || is_quoted(k)
+        ex
+    elseif k == K"label"
+        @ast ctx ex label_table[ex.id]::K"label"
+    elseif k == K"code_info"
+        ex
+    else
+        mapchildren(ctx, ex) do e
+            _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, e)
+        end
+    end
+end
+
+# flisp: renumber-lambda, compact-ir
+function renumber_body(ctx, input_code, slot_rewrites)
+    # Step 1: Remove any assignments to SSA variables, record the indices of labels
+    ssa_rewrites = Dict{IdTag,IdTag}()
+    label_table = Dict{Int,Int}()
+    code = SyntaxList(ctx)
+    for ex in input_code
+        k = kind(ex)
+        ex_out = nothing
+        if k == K"=" && is_ssa(ctx, ex[1])
+            lhs_id = ex[1].var_id
+            if is_ssa(ctx, ex[2])
+                # For SSA₁ = SSA₂, record that all uses of SSA₁ should be replaced by SSA₂
+                ssa_rewrites[lhs_id] = ssa_rewrites[ex[2].var_id]
+            else
+                # Otherwise, record which `code` index this SSA value refers to
+                ssa_rewrites[lhs_id] = length(code) + 1
+                ex_out = ex[2]
+            end
+        elseif k == K"label"
+            label_table[ex.id] = length(code) + 1
+        elseif k == K"TOMBSTONE"
+            # remove statement
+        else
+            ex_out = ex
+        end
+        if !isnothing(ex_out)
+            push!(code, ex_out)
+        end
+    end
+
+    # Step 2:
+    # * Translate any SSA uses and labels into indices in the code table
+    # * Translate locals into slot indices
+    for i in 1:length(code)
+        code[i] = _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, code[i])
+    end
+    code
+end
+
+struct Slot
+    name::String
+    kind::Symbol
+    is_nospecialize::Bool
+    is_read::Bool
+    is_single_assign::Bool
+    is_maybe_undef::Bool
+    is_called::Bool
+end
+
+function compile_lambda(outer_ctx, ex)
+    lambda_args = ex[1]
+    static_parameters = ex[2]
+    ret_var = numchildren(ex) == 4 ? ex[4] : nothing
+    # TODO: Add assignments for reassigned arguments to body
+    lambda_bindings = ex.lambda_bindings
+    ctx = LinearIRContext(outer_ctx, ex.is_toplevel_thunk, lambda_bindings, ret_var)
+    compile_body(ctx, ex[3])
+    slots = Vector{Slot}()
+    slot_rewrites = Dict{IdTag,Int}()
+    for arg in children(lambda_args)
+        if kind(arg) == K"Placeholder"
+            # Unused functions arguments like: `_` or `::T`
+            push!(slots, Slot(arg.name_val, :argument, false, false, false, false, false))
+        else
+            @assert kind(arg) == K"BindingId"
+            id = arg.var_id
+            binfo = lookup_binding(ctx, id)
+            lbinfo = lookup_lambda_binding(ctx, id)
+            @assert binfo.kind == :local || binfo.kind == :argument
+            # FIXME: is_single_assign, is_maybe_undef
+            push!(slots, Slot(binfo.name, :argument, binfo.is_nospecialize,
+                              lbinfo.is_read, false, false, lbinfo.is_called))
+            slot_rewrites[id] = length(slots)
+        end
+    end
+    # Sorting the lambda locals is required to remove dependence on Dict iteration order.
+    for (id, lbinfo) in sort(collect(pairs(lambda_bindings.bindings)), by=first)
+        if !lbinfo.is_captured
+            binfo = lookup_binding(ctx.bindings, id)
+            if binfo.kind == :local
+                # FIXME: is_single_assign, is_maybe_undef
+                push!(slots, Slot(binfo.name, :local, false,
+                                  lbinfo.is_read, false, false, lbinfo.is_called))
+                slot_rewrites[id] = length(slots)
+            end
+        end
+    end
+    for (i,arg) in enumerate(children(static_parameters))
+        @assert kind(arg) == K"BindingId"
+        id = arg.var_id
+        info = lookup_binding(ctx.bindings, id)
+        @assert info.kind == :static_parameter
+        slot_rewrites[id] = i
+    end
+    code = renumber_body(ctx, ctx.code, slot_rewrites)
+    meta = CompileHints()
+    for (k, v) in ctx.meta
+        meta = CompileHints(meta, k, v)
+    end
+    @ast ctx ex [K"code_info"(is_toplevel_thunk=ex.is_toplevel_thunk,
+                              slots=slots, meta=meta)
+        [K"block"(ex[3])
+            code...
+        ]
+    ]
+end
+
+"""
+This pass converts nested ASTs in the body of a lambda into a list of
+statements (ie, Julia's linear/untyped IR).
+
+Most of the compliexty of this pass is in lowering structured control flow (if,
+loops, etc) to gotos and exception handling to enter/leave. We also convert
+`K"BindingId"` into K"slot", `K"globalref"` or `K"SSAValue` as appropriate.
+"""
+@fzone "JL: linearize" function linearize_ir(ctx, ex)
+    graph = ensure_attributes(ctx.graph,
+                              slots=Vector{Slot},
+                              mod=Module,
+                              id=Int)
+    # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently
+    # required to call reparent() ...
+    GraphType = typeof(graph)
+    _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.bindings,
+                           Ref(0), false, LambdaBindings(), nothing,
+                           Dict{String,JumpTarget{typeof(graph)}}(),
+                           SyntaxList(graph), SyntaxList(graph),
+                           Vector{FinallyHandler{GraphType}}(),
+                           Dict{String, JumpTarget{GraphType}}(),
+                           Vector{JumpOrigin{GraphType}}(),
+                           Dict{Symbol, Any}(), ctx.mod)
+    res = compile_lambda(_ctx, reparent(_ctx, ex))
+    _ctx, res
+end
diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl
new file mode 100644
index 0000000000000..708aed4e14fce
--- /dev/null
+++ b/JuliaLowering/src/macro_expansion.jl
@@ -0,0 +1,592 @@
+# Lowering pass 1: Macro expansion, simple normalizations and quote expansion
+
+struct MacroExpansionContext{GraphType} <: AbstractLoweringContext
+    graph::GraphType
+    bindings::Bindings
+    scope_layers::Vector{ScopeLayer}
+    scope_layer_stack::Vector{LayerId}
+    expr_compat_mode::Bool
+    macro_world::UInt
+end
+
+function MacroExpansionContext(graph::SyntaxGraph, mod::Module, expr_compat_mode::Bool, world::UInt)
+    layers = ScopeLayer[ScopeLayer(1, mod, 0, false)]
+    MacroExpansionContext(graph, Bindings(), layers, LayerId[length(layers)], expr_compat_mode, world)
+end
+
+function push_layer!(ctx::MacroExpansionContext, mod::Module, is_macro_expansion::Bool)
+    new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod,
+                           current_layer_id(ctx), is_macro_expansion)
+    push!(ctx.scope_layers, new_layer)
+    push!(ctx.scope_layer_stack, new_layer.id)
+end
+function pop_layer!(ctx::MacroExpansionContext)
+    pop!(ctx.scope_layer_stack)
+end
+
+current_layer(ctx::MacroExpansionContext) = ctx.scope_layers[last(ctx.scope_layer_stack)]
+current_layer_id(ctx::MacroExpansionContext) = last(ctx.scope_layer_stack)
+
+#--------------------------------------------------
+# Expansion of quoted expressions
+function collect_unquoted!(ctx, unquoted, ex, depth)
+    if kind(ex) == K"$" && depth == 0
+        # children(ex) is usually length 1, but for double interpolation it may
+        # be longer and the children may contain K"..." expressions. Wrapping
+        # in a tuple groups the arguments together correctly in those cases.
+        push!(unquoted, @ast ctx ex [K"tuple" children(ex)...])
+    else
+        inner_depth = kind(ex) == K"quote" ? depth + 1 :
+                      kind(ex) == K"$"     ? depth - 1 :
+                      depth
+        for e in children(ex)
+            collect_unquoted!(ctx, unquoted, e, inner_depth)
+        end
+    end
+    return unquoted
+end
+
+function expand_quote(ctx, ex)
+    unquoted = SyntaxList(ctx)
+    collect_unquoted!(ctx, unquoted, ex, 0)
+    # Unlike user-defined macro expansion, we don't call append_sourceref for
+    # the entire expression produced by `quote` expansion. We could, but it
+    # seems unnecessary for `quote` because the surface syntax is a transparent
+    # representation of the expansion process. However, it's useful to add the
+    # extra srcref in a more targeted way for $ interpolations inside
+    # interpolate_ast, so we do that there.
+    #
+    # In principle, particular user-defined macros could opt into a similar
+    # mechanism.
+    #
+    # TODO: Should we try adding a srcref to the `quote` node only for the
+    # extra syntax generated by expand_quote so srcref essentially becomes
+    # (ex, @HERE) ?
+    @ast ctx ex [K"call"
+        interpolate_ast::K"Value"
+        (ctx.expr_compat_mode ? Expr : SyntaxTree)::K"Value"
+        [K"inert"(meta=CompileHints(:as_Expr, ctx.expr_compat_mode)) ex]
+        unquoted...
+    ]
+end
+
+#--------------------------------------------------
+struct MacroContext <: AbstractLoweringContext
+    graph::SyntaxGraph
+    macrocall::Union{SyntaxTree,LineNumberNode,SourceRef}
+    scope_layer::ScopeLayer
+    expr_compat_mode::Bool
+end
+
+function adopt_scope(ex, ctx::MacroContext)
+    adopt_scope(ex, ctx.scope_layer.id)
+end
+
+struct MacroExpansionError <: Exception
+    context::Union{Nothing,MacroContext}
+    ex::SyntaxTree
+    msg::String
+    "The source position relative to the node - may be `:begin` or `:end` or `:all`"
+    position::Symbol
+    "Error that occurred inside the macro function call (`nothing` if no inner exception)"
+    err
+    MacroExpansionError(
+        context::Union{Nothing,MacroContext}, ex::SyntaxTree, msg::AbstractString, position::Symbol,
+        @nospecialize err = nothing
+    ) = new(context, ex, msg, position, err)
+end
+
+function MacroExpansionError(ex::SyntaxTree, msg::AbstractString; position=:all)
+    MacroExpansionError(nothing, ex, msg, position)
+end
+
+function Base.showerror(io::IO, exc::MacroExpansionError)
+    print(io, "MacroExpansionError")
+    ctx = exc.context
+    if !isnothing(ctx)
+        # Use `Expr` formatting to pretty print the macro name for now -
+        # there's quite a lot of special cases. We could alternatively consider
+        # calling sourcetext() though that won't work well if it's a
+        # synthetically-generated macro name path.
+        macname_str = string(Expr(:macrocall, Expr(ctx.macrocall[1]), nothing))
+        print(io, " while expanding ", macname_str,
+              " in module ", ctx.scope_layer.mod)
+    end
+    print(io, ":\n")
+    # TODO: Display niceties:
+    # * Show the full provenance tree somehow, in addition to the primary
+    #   source location we're showing here?
+    # * What if the expression doesn't arise from a source file?
+    # * How to deal with highlighting trivia? Could provide a token kind or
+    #   child position within the raw tree? How to abstract this??
+    src = sourceref(exc.ex)
+    if src isa LineNumberNode
+        highlight(io, src, note=exc.msg)
+    else
+        fb = first_byte(src)
+        lb = last_byte(src)
+        pos = exc.position
+        byterange = pos == :all     ? (fb:lb)   :
+            pos == :begin   ? (fb:fb-1) :
+            pos == :end     ? (lb+1:lb) :
+            error("Unknown position $pos")
+        highlight(io, src.file, byterange, note=exc.msg)
+    end
+    if !isnothing(exc.err)
+        print(io, "\nCaused by:\n")
+        showerror(io, exc.err)
+    end
+end
+
+function fixup_macro_name(ctx::MacroExpansionContext, ex::SyntaxTree)
+    k = kind(ex)
+    if k == K"StrMacroName" || k == K"CmdMacroName"
+        layerid = get(ex, :scope_layer, current_layer_id(ctx))
+        newname = JuliaSyntax.lower_identifier_name(ex.name_val, k)
+        makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid, name_val=newname)
+    elseif k == K"macro_name"
+        @chk numchildren(ex) === 1
+        if kind(ex[1]) === K"."
+            @ast ctx ex [K"." ex[1][1] [K"macro_name" ex[1][2]]]
+        else
+            layerid = get(ex, :scope_layer, current_layer_id(ctx))
+            newname = JuliaSyntax.lower_identifier_name(ex[1].name_val, K"macro_name")
+            makeleaf(ctx, ex[1], ex[1], kind=kind(ex[1]), name_val=newname)
+        end
+    else
+        mapchildren(e->fixup_macro_name(ctx,e), ctx, ex)
+    end
+end
+
+function _eval_dot(world::UInt, mod, ex::SyntaxTree)
+    if kind(ex) === K"."
+        mod = _eval_dot(world, mod, ex[1])
+        ex = ex[2]
+    end
+    kind(ex) in KSet"Identifier Symbol" && mod isa Module ?
+        Base.invoke_in_world(world, getproperty, mod, Symbol(ex.name_val)) :
+        nothing
+end
+
+# If macroexpand(ex[1]) is an identifier or dot-expression, we can simply grab
+# it from the scope layer's module in ctx.macro_world.  Otherwise, we need to
+# eval arbitrary code (which, TODO: does not use the correct world age, and it
+# isn't clear the language is meant to support this).
+function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex0::SyntaxTree)
+    mod = current_layer(ctx).mod
+    ex = fixup_macro_name(ctx, expand_forms_1(ctx, ex0))
+    try
+        if kind(ex) === K"Value"
+            !(ex.value isa GlobalRef) ? ex.value :
+                Base.invoke_in_world(ctx.macro_world, getglobal,
+                                     ex.value.mod, ex.value.name)
+        elseif kind(ex) === K"Identifier"
+            layer = get(ex, :scope_layer, nothing)
+            if !isnothing(layer)
+                mod = ctx.scope_layers[layer].mod
+            end
+            Base.invoke_in_world(ctx.macro_world, getproperty,
+                                 mod, Symbol(ex.name_val))
+        elseif kind(ex) === K"." &&
+                (ed = _eval_dot(ctx.macro_world, mod, ex); !isnothing(ed))
+            ed
+        else
+            # `ex` might contain a nontrivial mix of scope layers so we can't
+            # just `eval()` it, as it's already been partially lowered by this
+            # point.  Instead, we repeat the latter parts of `lower()` here.
+            ctx2, ex2 = expand_forms_2(ctx, ex)
+            ctx3, ex3 = resolve_scopes(ctx2, ex2)
+            ctx4, ex4 = convert_closures(ctx3, ex3)
+            ctx5, ex5 = linearize_ir(ctx4, ex4)
+            expr_form = to_lowered_expr(ex5)
+            ccall(:jl_toplevel_eval, Any, (Any, Any), mod, expr_form)
+        end
+    catch err
+        throw(MacroExpansionError(mctx, ex, "Macro not found", :all, err))
+    end
+end
+
+# Record scope layer information for symbols passed to a macro by setting
+# scope_layer for each expression and also processing any K"escape" arising
+# from previous expansion of old-style macros.
+#
+# See also set_scope_layer()
+function set_macro_arg_hygiene(ctx, ex, layer_ids, layer_idx)
+    k = kind(ex)
+    scope_layer = get(ex, :scope_layer, layer_ids[layer_idx])
+    if is_leaf(ex)
+        makeleaf(ctx, ex, ex; scope_layer=scope_layer)
+    else
+        inner_layer_idx = layer_idx
+        if k == K"escape"
+            inner_layer_idx = layer_idx - 1
+            if inner_layer_idx < 1
+                # If we encounter too many escape nodes, there's probably been
+                # an error in the previous macro expansion.
+                # todo: The error here isn't precise about that - maybe we
+                # should record that macro call expression with the scope layer
+                # if we want to report the error against the macro call?
+                throw(MacroExpansionError(ex, "`escape` node in outer context"))
+            end
+        end
+        mapchildren(e->set_macro_arg_hygiene(ctx, e, layer_ids, inner_layer_idx),
+                    ctx, ex; scope_layer=scope_layer)
+    end
+end
+
+function prepare_macro_args(ctx, mctx, raw_args)
+    macro_args = Any[mctx]
+    for arg in raw_args
+        # Add hygiene information to be carried along with macro arguments.
+        #
+        # Macro call arguments may be either
+        # * Unprocessed by the macro expansion pass
+        # * Previously processed, but spliced into a further macro call emitted by
+        #   a macro expansion.
+        # In either case, we need to set scope layers before passing the
+        # arguments to the macro call.
+        push!(macro_args, set_macro_arg_hygiene(ctx, arg, ctx.scope_layer_stack,
+                                                length(ctx.scope_layer_stack)))
+    end
+    return macro_args
+end
+
+# TODO: Do we need to handle :scope_layer or multiple escapes here?
+# See https://github.com/c42f/JuliaLowering.jl/issues/39
+"""
+Insert a hygienic-scope around each arg of K"toplevel" returned from a macro.
+
+It isn't correct for macro expansion to recurse into a K"toplevel" expression
+since one child may define a macro and the next may use it.  However, not
+recursing now means we lose some important context: the module of the macro we
+just expanded, which is necessary for resolving the identifiers in the
+K"toplevel" AST.  The solution implemented in JuliaLang/julia#53515 was to save
+our place and expand later using `Expr(:hygienic-scope toplevel_child mod)`.
+
+Of course, these hygienic-scopes are also necessary because existing user code
+contains the corresponding escaping, which would otherwise cause errors. We
+already consumed the hygienic-scope that comes with every expansion, but won't
+be looking for escapes under :toplevel, so push hygienic-scope under toplevel
+"""
+function fix_toplevel_expansion(ctx, ex::SyntaxTree, mod::Module, lnn::LineNumberNode)
+    if kind(ex) === K"toplevel"
+        mapchildren(ctx, ex) do e
+            @ast ctx ex [K"hygienic_scope" e mod::K"Value" lnn::K"Value"]
+        end
+    else
+        mapchildren(e->fix_toplevel_expansion(ctx, e, mod, lnn), ctx, ex)
+    end
+end
+
+function expand_macro(ctx, ex)
+    @assert kind(ex) == K"macrocall"
+
+    macname = ex[1]
+    mctx = MacroContext(ctx.graph, ex, current_layer(ctx), ctx.expr_compat_mode)
+    macfunc = eval_macro_name(ctx, mctx, macname)
+    raw_args = ex[2:end]
+    macro_loc = let loc = source_location(LineNumberNode, ex)
+        # Some macros, e.g. @cmd, don't play nicely with file == nothing
+        isnothing(loc.file) ? LineNumberNode(loc.line, :none) : loc
+    end
+    # We use a specific well defined world age for the next checks and macro
+    # expansion invocations. This avoids inconsistencies if the latest world
+    # age changes concurrently.
+    #
+    # TODO: Allow this to be passed in
+    # TODO: hasmethod always returns false for our `typemax(UInt)` meaning
+    # "latest world," which we shouldn't be using.
+    has_new_macro = ctx.macro_world === typemax(UInt) ?
+        hasmethod(macfunc, Tuple{typeof(mctx), typeof.(raw_args)...}) :
+        hasmethod(macfunc, Tuple{typeof(mctx), typeof.(raw_args)...}; world=ctx.macro_world)
+
+    if has_new_macro
+        macro_args = prepare_macro_args(ctx, mctx, raw_args)
+        expanded = try
+            Base.invoke_in_world(ctx.macro_world, macfunc, macro_args...)
+        catch exc
+            newexc = exc isa MacroExpansionError ?
+                MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.err) :
+                MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc)
+            # TODO: We can delete this rethrow when we move to AST-based error propagation.
+            rethrow(newexc)
+        end
+        if expanded isa SyntaxTree
+            if !is_compatible_graph(ctx, expanded)
+                # If the macro has produced syntax outside the macro context,
+                # copy it over. TODO: Do we expect this always to happen?  What
+                # is the API for access to the macro expansion context?
+                expanded = copy_ast(ctx, expanded)
+            end
+        else
+            expanded = @ast ctx ex expanded::K"Value"
+        end
+    else
+        # Compat: attempt to invoke an old-style macro if there's no applicable
+        # method for new-style macro arguments.
+        macro_args = Any[macro_loc, ctx.scope_layers[1].mod]
+        for arg in raw_args
+            # For hygiene in old-style macros, we omit any additional scope
+            # layer information from macro arguments. Old-style macros will
+            # handle that using manual escaping in the macro itself.
+            #
+            # Note that there's one slight incompatibility here for identifiers
+            # interpolated into the `raw_args` from outer macro expansions of
+            # new-style macros which call old-style macros. Instead of seeing
+            # `Expr(:escape)` in such situations, old-style macros will now see
+            # `Expr(:scope_layer)` inside `macro_args`.
+            push!(macro_args, Expr(arg))
+        end
+        expanded = try
+            Base.invoke_in_world(ctx.macro_world, macfunc, macro_args...)
+        catch exc
+            if exc isa MethodError && exc.f === macfunc
+                if !isempty(methods_in_world(macfunc, Tuple{typeof(mctx), Vararg{Any}}, ctx.macro_world))
+                    # If the macro has at least some methods implemented in the
+                    # new style, assume the user meant to call one of those
+                    # rather than any old-style macro methods which might exist
+                    exc = MethodError(macfunc, (prepare_macro_args(ctx, mctx, raw_args)..., ), ctx.macro_world)
+                end
+            end
+            rethrow(MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc))
+        end
+        expanded = expr_to_syntaxtree(ctx, expanded, macro_loc)
+    end
+
+    if kind(expanded) != K"Value"
+        expanded = append_sourceref(ctx, expanded, ex)
+        # Module scope for the returned AST is the module where this particular
+        # method was defined (may be different from `parentmodule(macfunc)`)
+        mod_for_ast = lookup_method_instance(macfunc, macro_args,
+                                             ctx.macro_world).def.module
+        new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod_for_ast,
+                               current_layer_id(ctx), true)
+        push_layer!(ctx, mod_for_ast, true)
+        expanded = expand_forms_1(ctx, expanded)
+        pop_layer!(ctx)
+    end
+    return expanded
+end
+
+# Add a secondary source of provenance to each expression in the tree `ex`.
+function append_sourceref(ctx, ex, secondary_prov)
+    srcref = (ex, secondary_prov)
+    if !is_leaf(ex)
+        if kind(ex) == K"macrocall"
+            makenode(ctx, srcref, ex, children(ex)...)
+        else
+            makenode(ctx, srcref, ex,
+                     map(e->append_sourceref(ctx, e, secondary_prov), children(ex))...)
+        end
+    else
+        makeleaf(ctx, srcref, ex)
+    end
+end
+
+function remove_scope_layer!(ex)
+    if !is_leaf(ex)
+        for c in children(ex)
+            remove_scope_layer!(c)
+        end
+    end
+    deleteattr!(ex, :scope_layer)
+    ex
+end
+
+function remove_scope_layer(ctx, ex)
+    remove_scope_layer!(copy_ast(ctx, ex))
+end
+
+"""
+Lowering pass 1
+
+This pass contains some simple expansion to make the rest of desugaring easier
+to write and expands user defined macros. Macros see the surface syntax, so
+need to be dealt with before other lowering.
+
+* Does identifier normalization
+* Strips semantically irrelevant "container" nodes like parentheses
+* Expands macros
+* Processes quoted syntax turning `K"quote"` into `K"inert"` (eg, expanding
+  interpolations)
+"""
+function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree)
+    k = kind(ex)
+    if k == K"Identifier"
+        name_str = ex.name_val
+        if is_ccall_or_cglobal(name_str)
+            # Lower special identifiers `cglobal` and `ccall` to `K"core"`
+            # pseudo-refs very early so that cglobal and ccall can never be
+            # turned into normal bindings (eg, assigned to)
+            @ast ctx ex name_str::K"core"
+        else
+            k = all(==('_'), name_str) ? K"Placeholder" : K"Identifier"
+            scope_layer = get(ex, :scope_layer, current_layer_id(ctx))
+            makeleaf(ctx, ex, ex; kind=k, scope_layer)
+        end
+    elseif k == K"StrMacroName" || k == K"CmdMacroName" || k == K"macro_name"
+        # These can appear outside of a macrocall, e.g. in `import`
+        e2 = fixup_macro_name(ctx, ex)
+        expand_forms_1(ctx, e2)
+    elseif k == K"var" || k == K"char" || k == K"parens"
+        # Strip "container" nodes
+        @chk numchildren(ex) == 1
+        expand_forms_1(ctx, ex[1])
+    elseif k == K"escape"
+        # For processing of old-style macros
+        @chk numchildren(ex) >= 1 "`escape` requires an argument"
+        if length(ctx.scope_layer_stack) === 1
+            throw(MacroExpansionError(ex, "`escape` node in outer context"))
+        end
+        top_layer = pop!(ctx.scope_layer_stack)
+        escaped_ex = expand_forms_1(ctx, ex[1])
+        push!(ctx.scope_layer_stack, top_layer)
+        escaped_ex
+    elseif k == K"hygienic_scope"
+        @chk numchildren(ex) >= 2 && ex[2].value isa Module (ex,"`hygienic_scope` requires an AST and a module")
+        new_layer = ScopeLayer(length(ctx.scope_layers)+1, ex[2].value,
+                               current_layer_id(ctx), true)
+        push!(ctx.scope_layers, new_layer)
+        push!(ctx.scope_layer_stack, new_layer.id)
+        hyg_ex = expand_forms_1(ctx, ex[1])
+        pop!(ctx.scope_layer_stack)
+        hyg_ex
+    elseif k == K"juxtapose"
+        layerid = get(ex, :scope_layer, current_layer_id(ctx))
+        @chk numchildren(ex) == 2
+        @ast ctx ex [K"call"
+            "*"::K"Identifier"(scope_layer=layerid)
+            expand_forms_1(ctx, ex[1])
+            expand_forms_1(ctx, ex[2])
+        ]
+    elseif k == K"quote"
+        @chk numchildren(ex) == 1
+        # TODO: Upstream should set a general flag for detecting parenthesized
+        # expressions so we don't need to dig into `green_tree` here. Ugh!
+        plain_symbol = has_flags(ex, JuliaSyntax.COLON_QUOTE) &&
+                       kind(ex[1]) == K"Identifier" &&
+                       (sr = sourceref(ex); sr isa SourceRef && kind(sr.green_tree[2]) != K"parens")
+        if plain_symbol
+            # As a compromise for compatibility, we treat non-parenthesized
+            # colon quoted identifiers like `:x` as plain Symbol literals
+            # because these are ubiquitiously used in Julia programs as ad hoc
+            # enum-like entities rather than pieces of AST.
+            @ast ctx ex[1] ex[1]=>K"Symbol"
+        else
+            expand_forms_1(ctx, expand_quote(ctx, ex[1]))
+        end
+    elseif k == K"macrocall"
+        expand_macro(ctx, ex)
+    elseif k == K"toplevel" && length(ctx.scope_layer_stack) > 1
+        fix_toplevel_expansion(ctx, ex, current_layer(ctx).mod,
+                               source_location(LineNumberNode, ex))
+    elseif k == K"module" || k == K"toplevel" || k == K"inert"
+        # Remove scope layer information from any inert syntax which survives
+        # macro expansion so that it doesn't contaminate lowering passes which
+        # are later run against the quoted code. TODO: This works as a first
+        # approximation but is incorrect in general. We need to revisit such
+        # "deferred hygiene" situations (see https://github.com/c42f/JuliaLowering.jl/issues/111)
+        remove_scope_layer(ctx, ex)
+    elseif k == K"." && numchildren(ex) == 2
+        # Handle quoted property access like `x.:(foo)` or `Core.:(!==)`
+        # Unwrap the quote to get the identifier before expansion
+        rhs = ex[2]
+        if kind(rhs) == K"quote" && numchildren(rhs) == 1
+            rhs = rhs[1]
+        end
+        e2 = expand_forms_1(ctx, rhs)
+        if kind(e2) == K"Identifier" || kind(e2) == K"Placeholder"
+            # FIXME: Do the K"Symbol" transformation in the parser??
+            e2 = @ast ctx e2 e2=>K"Symbol"
+        end
+        @ast ctx ex [K"." expand_forms_1(ctx, ex[1]) e2]
+    elseif k == K"cmdstring"
+        @chk numchildren(ex) == 1
+        e2 = @ast ctx ex [K"macrocall" [K"macro_name" "cmd"::K"core"] ex[1]]
+        expand_macro(ctx, e2)
+    elseif (k == K"call" || k == K"dotcall")
+        # Do some initial desugaring of call and dotcall here to simplify
+        # the later desugaring pass
+        args = SyntaxList(ctx)
+        if is_infix_op_call(ex) || is_postfix_op_call(ex)
+            @chk numchildren(ex) >= 2 "Postfix/infix operators must have at least two positional arguments"
+            farg = ex[2]
+            push!(args, ex[1])
+            append!(args, ex[3:end])
+        else
+            @chk numchildren(ex) > 0 "Call expressions must have a function name"
+            farg = ex[1]
+            append!(args, ex[2:end])
+        end
+        if !isempty(args)
+            if kind(args[end]) == K"do"
+                # move do block into first argument location
+                pushfirst!(args, pop!(args))
+            end
+        end
+        if length(args) == 2 && is_same_identifier_like(farg, "^") && kind(args[2]) == K"Integer"
+            # Do literal-pow expansion here as it's later used in both call and
+            # dotcall expansion.
+            @ast ctx ex [k
+                "literal_pow"::K"top"
+                expand_forms_1(ctx, farg)
+                expand_forms_1(ctx, args[1])
+                [K"call"
+                    [K"call"
+                        "apply_type"::K"core"
+                        "Val"::K"top"
+                        args[2]
+                    ]
+                ]
+            ]
+        else
+            if kind(farg) == K"." && numchildren(farg) == 1
+                # (.+)(x,y) is treated as a dotcall
+                k = K"dotcall"
+                farg = farg[1]
+            end
+            # Preserve call type flags (mostly ignored in the next pass as
+            # we've already reordered arguments.)
+            callflags = JuliaSyntax.call_type_flags(ex)
+            @ast ctx ex [k(syntax_flags=(callflags == 0 ? nothing : callflags))
+                expand_forms_1(ctx, farg)
+                (expand_forms_1(ctx, a) for a in args)...
+            ]
+        end
+    elseif is_leaf(ex)
+        ex
+    elseif k == K"<:" || k == K">:" || k == K"-->"
+        # TODO: Should every form get layerid systematically? Or only the ones
+        # which expand_forms_2 needs?
+        layerid = get(ex, :scope_layer, current_layer_id(ctx))
+        mapchildren(e->expand_forms_1(ctx,e), ctx, ex; scope_layer=layerid)
+    else
+        mapchildren(e->expand_forms_1(ctx,e), ctx, ex)
+    end
+end
+
+function ensure_macro_attributes(graph)
+    ensure_attributes(graph,
+                      var_id=IdTag,
+                      scope_layer=LayerId,
+                      __macro_ctx__=Nothing,
+                      meta=CompileHints)
+end
+
+@fzone "JL: macroexpand" function expand_forms_1(mod::Module, ex::SyntaxTree, expr_compat_mode::Bool, macro_world::UInt)
+    if kind(ex) == K"local"
+        # This error assumes we're expanding the body of a top level thunk but
+        # we might want to make that more explicit in the pass system.
+        throw(LoweringError(ex, "local declarations have no effect outside a scope"))
+    end
+    graph = ensure_macro_attributes(syntax_graph(ex))
+    ctx = MacroExpansionContext(graph, mod, expr_compat_mode, macro_world)
+    ex2 = expand_forms_1(ctx, reparent(ctx, ex))
+    graph2 = delete_attributes(graph, :__macro_ctx__)
+    # TODO: Returning the context with pass-specific mutable data is a bad way
+    # to carry state into the next pass. We might fix this by attaching such
+    # data to the graph itself as global attributes?
+    ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, ctx.scope_layer_stack,
+                                 expr_compat_mode, macro_world)
+    return ctx2, reparent(ctx2, ex2)
+end
diff --git a/JuliaLowering/src/precompile.jl b/JuliaLowering/src/precompile.jl
new file mode 100644
index 0000000000000..7a5fccaded4b5
--- /dev/null
+++ b/JuliaLowering/src/precompile.jl
@@ -0,0 +1,27 @@
+# exercise the whole lowering pipeline
+if Base.get_bool_env("JULIA_LOWERING_PRECOMPILE", true)
+    thunks = String[
+        """
+        function foo(xxx, yyy)
+            @nospecialize xxx
+            return Pair{Any,Any}(typeof(xxx), typeof(yyy))
+        end
+        """
+
+        """
+        struct Foo
+            x::Int
+            Foo(x::Int) = new(x)
+            # Foo() = new()
+        end
+        """
+    ]
+    for thunk in thunks
+        stream = JuliaSyntax.ParseStream(thunk)
+        JuliaSyntax.parse!(stream; rule=:all)
+        st0 = JuliaSyntax.build_tree(SyntaxTree, stream; filename=@__FILE__)
+        lwrst = lower(@__MODULE__, st0[1])
+        lwr = to_lowered_expr(lwrst)
+        @assert Meta.isexpr(lwr, :thunk) && only(lwr.args) isa Core.CodeInfo
+    end
+end
diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl
new file mode 100644
index 0000000000000..377934a234b2e
--- /dev/null
+++ b/JuliaLowering/src/runtime.jl
@@ -0,0 +1,459 @@
+# Runtime support for
+# 1. Functions called by the code emitted from lowering
+# 2. Introspecting Julia's state during lowering
+#
+# These should probably all move to `Core` at some point.
+
+#-------------------------------------------------------------------------------
+# Functions/types used by code emitted from lowering, but not called by it directly
+
+# Return the current exception. In JuliaLowering we use this rather than the
+# special form `K"the_exception"` to reduces the number of special forms.
+Base.@assume_effects :removable function current_exception()
+    @ccall jl_current_exception(current_task()::Any)::Any
+end
+
+#--------------------------------------------------
+# Supporting functions for AST interpolation (`quote`)
+struct InterpolationContext{Graph} <: AbstractLoweringContext
+    graph::Graph
+    values::Tuple
+    current_index::Ref{Int}
+end
+
+# Context for `Expr`-based AST interpolation in compat mode
+struct ExprInterpolationContext <: AbstractLoweringContext
+    values::Tuple
+    current_index::Ref{Int}
+end
+
+# Helper functions to make shared interpolation code which works with both
+# SyntaxTree and Expr data structures.
+_interp_kind(ex::SyntaxTree) = kind(ex)
+function _interp_kind(@nospecialize(ex))
+    return (ex isa Expr && ex.head === :quote) ? K"quote" :
+           (ex isa Expr && ex.head === :$)     ? K"$"     :
+           K"None" # Other cases irrelevant to interpolation
+end
+
+_children(ex::SyntaxTree) = children(ex)
+_children(@nospecialize(ex)) = ex isa Expr ? ex.args : ()
+
+_numchildren(ex::SyntaxTree) = numchildren(ex)
+_numchildren(@nospecialize(ex)) = ex isa Expr ? length(ex.args) : 0
+
+_syntax_list(ctx::InterpolationContext) = SyntaxList(ctx)
+_syntax_list(ctx::ExprInterpolationContext) = Any[]
+
+_interp_makenode(ctx::InterpolationContext, ex, args) = makenode(ctx, ex, ex, args)
+_interp_makenode(ctx::ExprInterpolationContext, ex, args) = Expr((ex::Expr).head, args...)
+
+_is_leaf(ex::SyntaxTree) = is_leaf(ex)
+_is_leaf(ex::Expr) = false
+_is_leaf(@nospecialize(ex)) = true
+
+# Produce interpolated node for `$x` syntax
+function _interpolated_value(ctx::InterpolationContext, srcref, ex)
+    if ex isa SyntaxTree
+        if !is_compatible_graph(ctx, ex)
+            ex = copy_ast(ctx, ex)
+        end
+        append_sourceref(ctx, ex, srcref)
+    elseif ex isa Symbol
+        # Plain symbols become identifiers. This is an accommodation for
+        # compatibility to allow `:x` (a Symbol) and `:(x)` (a SyntaxTree) to
+        # be used interchangeably in macros.
+        makeleaf(ctx, srcref, K"Identifier", string(ex))
+    else
+        makeleaf(ctx, srcref, K"Value", ex)
+    end
+end
+
+function _interpolated_value(::ExprInterpolationContext, _, ex)
+    ex
+end
+
+function _interpolate_ast(ctx::ExprInterpolationContext, ex::QuoteNode, depth)
+    out = _interpolate_ast(ctx, Expr(:inert, ex.value), depth)
+    QuoteNode(only(out.args))
+end
+
+function _interpolate_ast(ctx, @nospecialize(ex), depth)
+    _is_leaf(ex) && return ex
+    k = _interp_kind(ex)
+    inner_depth = k == K"quote" ? depth + 1 :
+                  k == K"$"     ? depth - 1 :
+                  depth
+    expanded_children = _syntax_list(ctx)
+
+    for e in _children(ex)
+        if _interp_kind(e) == K"$" && inner_depth == 0
+            vals = ctx.values[ctx.current_index[]]::Tuple
+            ctx.current_index[] += 1
+            for (i,v) in enumerate(vals)
+                srcref = _numchildren(e) == 1 ? e : _children(e)[i]
+                push!(expanded_children, _interpolated_value(ctx, srcref, v))
+            end
+        else
+            push!(expanded_children, _interpolate_ast(ctx, e, inner_depth))
+        end
+    end
+
+    _interp_makenode(ctx, ex, expanded_children)
+end
+
+# Produced by expanding K"quote".  Must create a copy of the AST.  Note that
+# wrapping `ex` in an extra node handles the edge case where the root `ex` is
+# `$` (our recursion is one step removed due to forms like `($ a b)`.)
+function interpolate_ast(::Type{SyntaxTree}, ex::SyntaxTree, values...)
+    # Construct graph for interpolation context. We inherit this from the macro
+    # context where possible by detecting it using __macro_ctx__. This feels
+    # hacky though.
+    #
+    # Perhaps we should use a ScopedValue for this instead or get it from
+    # the macro __context__? None of the options feel great here.
+    graph = nothing
+    for vals in values
+        for v in vals
+            if v isa SyntaxTree && hasattr(syntax_graph(v), :__macro_ctx__)
+                graph = syntax_graph(v)
+                break
+            end
+        end
+    end
+    if isnothing(graph)
+        graph = ensure_attributes(
+            SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType,
+            value=Any, name_val=String, scope_layer=LayerId)
+    end
+    ctx = InterpolationContext(graph, values, Ref(1))
+
+    # We must copy the AST into our context to use it as the source reference of
+    # generated expressions.
+    ex1 = copy_ast(ctx, ex)
+    out = _interpolate_ast(ctx, @ast(ctx, ex1, [K"None" ex1]), 0)
+    length(children(out)) === 1 || throw(
+        LoweringError(ex1, "More than one value in bare `\$` expression"))
+    return only(children(out))
+end
+
+function interpolate_ast(::Type{Expr}, @nospecialize(ex), values...)
+    ctx = ExprInterpolationContext(values, Ref(1))
+    if ex isa Expr && ex.head === :$
+        @assert length(values) === 1
+        if length(ex.args) !== 1
+            throw(LoweringError(
+                expr_to_syntaxtree(ex), "More than one value in bare `\$` expression"))
+        end
+        only(values[1])
+    else
+        _interpolate_ast(ctx, ex, 0)
+    end
+end
+
+#--------------------------------------------------
+# Functions called by closure conversion
+function eval_closure_type(mod::Module, closure_type_name::Symbol, field_names, field_is_box)
+    type_params = Core.TypeVar[]
+    field_types = []
+    for (name, isbox) in zip(field_names, field_is_box)
+        if !isbox
+            T = Core.TypeVar(Symbol(name, "_type"))
+            push!(type_params, T)
+            push!(field_types, T)
+        else
+            push!(field_types, Core.Box)
+        end
+    end
+    type = Core._structtype(mod, closure_type_name,
+                            Core.svec(type_params...),
+                            Core.svec(field_names...),
+                            Core.svec(),
+                            false,
+                            length(field_names))
+    Core._setsuper!(type, Core.Function)
+    Core.declare_const(mod, closure_type_name, type)
+    Core._typebody!(false, type, Core.svec(field_types...))
+    type
+end
+
+# Interpolate captured local variables into the CodeInfo for a global method
+function replace_captured_locals!(codeinfo::Core.CodeInfo, locals::Core.SimpleVector)
+    for (i, ex) in enumerate(codeinfo.code)
+        if Meta.isexpr(ex, :captured_local)
+            codeinfo.code[i] = locals[ex.args[1]::Int]
+        end
+    end
+    codeinfo
+end
+
+#--------------------------------------------------
+# Functions which create modules or mutate their bindings
+
+const _Base_has_eval_import = isdefined(Base, :_eval_import)
+
+function eval_import(imported::Bool, to::Module, from::Union{Expr, Nothing}, paths::Expr...)
+    if _Base_has_eval_import
+        Base._eval_import(imported, to, from, paths...)
+    else
+        head = imported ? :import : :using
+        ex = isnothing(from) ?
+            Expr(head, paths...) :
+            Expr(head, Expr(Symbol(":"), from, paths...))
+        Core.eval(to, ex)
+    end
+end
+
+function eval_using(to::Module, path::Expr)
+    if _Base_has_eval_import
+        Base._eval_using(to, path)
+    else
+        Core.eval(to, Expr(:using, path))
+    end
+end
+
+function eval_public(mod::Module, is_exported::Bool, identifiers)
+    # symbol jl_module_public is no longer exported as of #57765
+    Core.eval(mod, Expr((is_exported ? :export : :public), map(Symbol, identifiers)...))
+end
+
+#--------------------------------------------------
+# Docsystem integration
+function _bind_func_docs!(f, docstr, method_metadata::Core.SimpleVector)
+    mod = parentmodule(f)
+    bind = Base.Docs.Binding(mod, nameof(f))
+    full_sig = method_metadata[1]
+    arg_sig = Tuple{full_sig[2:end]...}
+    lineno = method_metadata[3]
+    metadata = Dict{Symbol, Any}(
+        :linenumber => lineno.line,
+        :module => mod,
+    )
+    if !isnothing(lineno.file)
+        push!(metadata, :path => string(lineno.file))
+    end
+    Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), arg_sig)
+end
+
+function bind_docs!(f::Function, docstr, method_metadata::Core.SimpleVector)
+    _bind_func_docs!(f, docstr, method_metadata)
+end
+
+# Document constructors
+function bind_docs!(::Type{Type{T}}, docstr, method_metadata::Core.SimpleVector) where T
+    _bind_func_docs!(T, docstr, method_metadata)
+end
+
+function bind_docs!(type::Type, docstr, method_metadata::Core.SimpleVector)
+    _bind_func_docs!(type, docstr, method_metadata)
+end
+
+function bind_docs!(type::Type, docstr, lineno::LineNumberNode; field_docs=Core.svec())
+    mod = parentmodule(type)
+    bind = Base.Docs.Binding(mod, nameof(type))
+    metadata = Dict{Symbol, Any}(
+        :linenumber => lineno,
+        :module => mod,
+    )
+    if !isnothing(lineno.file)
+        push!(metadata, :path => string(lineno.file))
+    end
+    if !isempty(field_docs)
+        fd = Dict{Symbol, Any}()
+        fns = fieldnames(type)
+        for i = 1:2:length(field_docs)
+            fd[fns[field_docs[i]]] = field_docs[i+1]
+        end
+        metadata[:fields] = fd
+    end
+    Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), Union{})
+end
+
+"""
+Called in the unfortunate cases (K"call", K".", K"Identifier") where docstrings
+change the semantics of the expressions they annotate, no longer requiring the
+expression to execute.
+"""
+function bind_static_docs!(mod::Module, name::Symbol, docstr, lnn::LineNumberNode, sigtypes::Type)
+    metadata = Dict{Symbol, Any}(
+        :linenumber => lnn.line,
+        :module => mod,
+        :path => something(lnn.file, "none"),
+    )
+    bind = Base.Docs.Binding(mod, name)
+    Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), sigtypes)
+end
+
+#--------------------------------------------------
+# Runtime support infrastructure for `@generated`
+
+# An alternative to Core.GeneratedFunctionStub which works on SyntaxTree rather
+# than Expr.
+struct GeneratedFunctionStub
+    expr_compat_mode::Bool
+    gen::Function
+    srcref::Union{SyntaxTree,LineNumberNode,SourceRef}
+    argnames::Core.SimpleVector
+    spnames::Core.SimpleVector
+end
+
+# Call the `@generated` code generator function and wrap the results of the
+# expression into a CodeInfo.
+#
+# `args` passed into stub by the Julia runtime are (parent_func, static_params..., arg_types...)
+function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize args...)
+    # Some of the lowering pipeline from lower() and the pass-specific setup is
+    # re-implemented here because generated functions are very much (but not
+    # entirely) like macro expansion.
+    #
+    # TODO: Reduce duplication where possible.
+
+    # Attributes from parsing
+    graph = ensure_attributes(SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType,
+                              value=Any, name_val=String)
+    # Attributes for macro expansion
+    graph = ensure_attributes(ensure_macro_attributes(graph),
+                              # Additional attribute for resolve_scopes, for
+                              # adding our custom lambda below
+                              is_toplevel_thunk=Bool,
+                              toplevel_pure=Bool,
+                              )
+
+    __module__ = source.module
+
+    # Macro expansion. Note that we expand in `tls_world_age()` (see
+    # Core.GeneratedFunctionStub)
+    macro_world = Base.tls_world_age()
+    ctx1 = MacroExpansionContext(graph, __module__, g.expr_compat_mode, macro_world)
+
+    layer = only(ctx1.scope_layers)
+
+    # Run code generator - this acts like a macro expander and like a macro
+    # expander it gets a MacroContext.
+    mctx = MacroContext(syntax_graph(ctx1), g.srcref, layer, g.expr_compat_mode)
+    ex0 = g.gen(mctx, args...)
+    if ex0 isa Expr
+        ex0 = expr_to_syntaxtree(ctx1, ex0, source_location(LineNumberNode, g.srcref))
+    end
+    if ex0 isa SyntaxTree
+        if !is_compatible_graph(ctx1, ex0)
+            # If the macro has produced syntax outside the macro context, copy it over.
+            # TODO: Do we expect this always to happen?  What is the API for access
+            # to the macro expansion context?
+            ex0 = copy_ast(ctx1, ex0)
+        end
+    else
+        ex0 = @ast ctx1 g.srcref ex0::K"Value"
+    end
+    # Expand any macros emitted by the generator
+    ex1 = expand_forms_1(ctx1, reparent(ctx1, ex0))
+    ctx1 = MacroExpansionContext(delete_attributes(graph, :__macro_ctx__),
+                                 ctx1.bindings, ctx1.scope_layers,
+                                 ctx1.scope_layer_stack, g.expr_compat_mode,
+                                 macro_world)
+    ex1 = reparent(ctx1, ex1)
+
+    # Desugaring
+    ctx2, ex2 = expand_forms_2(ctx1, ex1)
+
+    # Wrap expansion in a non-toplevel lambda and run scope resolution
+    ex2 = @ast ctx2 ex0 [K"lambda"(is_toplevel_thunk=false, toplevel_pure=true)
+        [K"block"
+            (adopt_scope(string(n)::K"Identifier", layer) for n in g.argnames)...
+        ]
+        [K"block"
+            (adopt_scope(string(n)::K"Identifier", layer) for n in g.spnames)...
+        ]
+        ex2
+    ]
+    ctx3, ex3 = resolve_scopes(ctx2, ex2)
+
+    # Rest of lowering
+    ctx4, ex4 = convert_closures(ctx3, ex3)
+    ctx5, ex5 = linearize_ir(ctx4, ex4)
+    ci = to_lowered_expr(ex5)
+    @assert ci isa Core.CodeInfo
+
+    # See GeneratedFunctionStub code in base/expr.jl
+    ci.isva = source.isva
+    code = ci.code
+    bindings = IdSet{Core.Binding}()
+    for i = 1:length(code)
+        stmt = code[i]
+        if isa(stmt, GlobalRef)
+            push!(bindings, convert(Core.Binding, stmt))
+        end
+    end
+    if !isempty(bindings)
+        ci.edges = Core.svec(bindings...)
+    end
+
+    return ci
+end
+
+
+#-------------------------------------------------------------------------------
+# The following functions are called directly by lowering to inspect Julia's state.
+
+# Get the binding for `name` if one is already resolved in module `mod`. Note
+# that we cannot use `isdefined(::Module, ::Symbol)` here, because that causes
+# binding resolution which is a massive side effect we must avoid in lowering.
+function _get_module_binding(mod, name; create=false)
+    b = @ccall jl_get_module_binding(mod::Module, name::Symbol, create::Cint)::Ptr{Core.Binding}
+    b == C_NULL ? nothing : unsafe_pointer_to_objref(b)
+end
+
+# Return true if a `name` is defined in and *by* the module `mod`.
+# Has no side effects, unlike isdefined()
+#
+# (This should do what fl_defined_julia_global does for flisp lowering)
+function is_defined_and_owned_global(mod, name)
+    Base.binding_kind(mod, name) === Base.PARTITION_KIND_GLOBAL
+end
+
+# "Reserve" a binding: create the binding if it doesn't exist but do not assign
+# to it.
+function reserve_module_binding(mod, name)
+    # TODO: Fix the race condition here: We should really hold the Module's
+    # binding lock during this test-and-set type operation. But the binding
+    # lock is only accessible from C. See also the C code in
+    # `fl_module_unique_name`.
+    if _get_module_binding(mod, name; create=false) === nothing
+        _get_module_binding(mod, name; create=true) !== nothing
+    else
+        return false
+    end
+end
+
+# Reserve a global binding named "$basename#$i" in module `mod` for the
+# smallest `i` starting at `0`.
+#
+# TODO: Remove the use of this where possible. Currently this is used within
+# lowering to create unique global names for keyword function bodies and
+# closure types as a more local alternative to current-julia-module-counter.
+# However, we should ideally defer it to eval-time to make lowering itself
+# completely non-mutating.
+function reserve_module_binding_i(mod, basename)
+    i = 0
+    while true
+        name = "$basename$i"
+        if reserve_module_binding(mod, Symbol(name))
+            return name
+        end
+        i += 1
+    end
+end
+
+function lookup_method_instance(func, args, world::Integer)
+    allargs = Vector{Any}(undef, length(args) + 1)
+    allargs[1] = func
+    allargs[2:end] = args
+    mi = @ccall jl_method_lookup(allargs::Ptr{Any}, length(allargs)::Csize_t,
+                                 world::Csize_t)::Ptr{Cvoid}
+    return mi == C_NULL ? nothing : unsafe_pointer_to_objref(mi)
+end
+
+# Like `Base.methods()` but with world age support
+function methods_in_world(func, arg_sig, world)
+    Base._methods(func, arg_sig, -1, world)
+end
diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl
new file mode 100644
index 0000000000000..ce3f0fba23b76
--- /dev/null
+++ b/JuliaLowering/src/scope_analysis.jl
@@ -0,0 +1,814 @@
+# Lowering pass 3: scope and variable analysis
+
+"""
+Key to use when transforming names into bindings
+"""
+struct NameKey
+    name::String
+    layer::LayerId
+end
+
+function Base.isless(a::NameKey, b::NameKey)
+    (a.name, a.layer) < (b.name, b.layer)
+end
+
+function NameKey(ex::SyntaxTree)
+    @chk kind(ex) == K"Identifier"
+    NameKey(ex.name_val, ex.scope_layer)
+end
+
+#-------------------------------------------------------------------------------
+_insert_if_not_present!(dict, key, val) = get!(dict, key, val)
+
+function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, ex)
+    k = kind(ex)
+    if k == K"Identifier"
+        _insert_if_not_present!(used_names, NameKey(ex), ex)
+    elseif k == K"BindingId"
+        push!(used_bindings, ex.var_id)
+    elseif is_leaf(ex) || is_quoted(k) ||
+            k in KSet"scope_block lambda module toplevel"
+        return
+    elseif k == K"local"
+        if getmeta(ex, :is_destructured_arg, false)
+            push!(destructured_args, ex[1])
+        else
+            _insert_if_not_present!(locals, NameKey(ex[1]), ex)
+        end
+    elseif k == K"global"
+        if !(kind(ex[1]) == K"Value" && ex[1].value isa GlobalRef)
+            _insert_if_not_present!(globals, NameKey(ex[1]), ex)
+        end
+    elseif k == K"assign_or_constdecl_if_global"
+        # like v = val, except that if `v` turns out global(either implicitly or
+        # by explicit `global`), it gains an implicit `const`
+        _insert_if_not_present!(assignments, NameKey(ex[1]), ex)
+    elseif k == K"=" || k == K"constdecl"
+        v = decl_var(ex[1])
+        if !(kind(v) in KSet"BindingId globalref Value Placeholder")
+            _insert_if_not_present!(assignments, NameKey(v), v)
+        end
+        if k != K"constdecl" || numchildren(ex) == 2
+            _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, ex[2])
+        end
+    elseif k == K"function_decl"
+        v = ex[1]
+        kv = kind(v)
+        if kv == K"Identifier"
+            _insert_if_not_present!(assignments, NameKey(v), v)
+        elseif kv == K"BindingId"
+            binfo = lookup_binding(ctx, v)
+            if !binfo.is_ssa && binfo.kind != :global
+                @assert false "allow local BindingId as function name?"
+            end
+        else
+            @assert false
+        end
+    else
+        for e in children(ex)
+            _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, e)
+        end
+    end
+end
+
+# Find names of all identifiers used in the given expression, grouping them
+# into sets by type of usage.
+#
+# NB: This only works properly after desugaring
+function find_scope_vars(ctx, ex)
+    ExT = typeof(ex)
+    assignments = Dict{NameKey,ExT}()
+    locals = Dict{NameKey,ExT}()
+    destructured_args = Vector{ExT}()
+    globals = Dict{NameKey,ExT}()
+    used_names = Dict{NameKey,ExT}()
+    used_bindings = Set{IdTag}()
+    for e in children(ex)
+        _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, e)
+    end
+
+    # Sort by key so that id generation is deterministic
+    assignments = sort!(collect(pairs(assignments)), by=first)
+    locals      = sort!(collect(pairs(locals)),      by=first)
+    globals     = sort!(collect(pairs(globals)),     by=first)
+    used_names  = sort!(collect(pairs(used_names)),  by=first)
+    used_bindings = sort!(collect(used_bindings))
+
+    return assignments, locals, destructured_args, globals, used_names, used_bindings
+end
+
+struct ScopeInfo
+    # True if scope is the global top level scope
+    is_toplevel_global_scope::Bool
+    # True if scope is part of top level code, or a non-lambda scope nested
+    # inside top level code. Thus requiring special scope resolution rules.
+    in_toplevel_thunk::Bool
+    # Soft/hard scope. For top level thunks only
+    is_soft::Bool
+    is_hard::Bool
+    # Map from variable names to IDs which appear in this scope but not in the
+    # parent scope
+    # TODO: Rename to `locals` or local_bindings?
+    var_ids::Dict{NameKey,IdTag}
+    # Bindings used by the enclosing lambda
+    lambda_bindings::LambdaBindings
+end
+
+struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext
+    graph::GraphType
+    bindings::Bindings
+    mod::Module
+    scope_layers::Vector{ScopeLayer}
+    # name=>id mappings for all discovered global vars
+    global_vars::Dict{NameKey,IdTag}
+    # Stack of name=>id mappings for each scope, innermost scope last.
+    scope_stack::Vector{ScopeInfo}
+    # Variables which were implicitly global due to being assigned to in top
+    # level code
+    implicit_toplevel_globals::Set{NameKey}
+end
+
+function ScopeResolutionContext(ctx)
+    graph = ensure_attributes(ctx.graph, lambda_bindings=LambdaBindings)
+    ScopeResolutionContext(graph,
+                           ctx.bindings,
+                           ctx.mod,
+                           ctx.scope_layers,
+                           Dict{NameKey,IdTag}(),
+                           Vector{ScopeInfo}(),
+                           Set{NameKey}())
+end
+
+function current_lambda_bindings(ctx::ScopeResolutionContext)
+    last(ctx.scope_stack).lambda_bindings
+end
+
+function lookup_var(ctx, varkey::NameKey, exclude_toplevel_globals=false)
+    for i in lastindex(ctx.scope_stack):-1:1
+        ids = ctx.scope_stack[i].var_ids
+        id = get(ids, varkey, nothing)
+        if !isnothing(id) && (!exclude_toplevel_globals ||
+                              i > 1 || lookup_binding(ctx, id).kind != :global)
+            return id
+        end
+    end
+    return exclude_toplevel_globals ? nothing : get(ctx.global_vars, varkey, nothing)
+end
+
+function var_kind(ctx, id::IdTag)
+    lookup_binding(ctx, id).kind
+end
+
+function var_kind(ctx, varkey::NameKey, exclude_toplevel_globals=false)
+    id = lookup_var(ctx, varkey, exclude_toplevel_globals)
+    isnothing(id) ? nothing : lookup_binding(ctx, id).kind
+end
+
+function init_binding(ctx, srcref, varkey::NameKey, kind::Symbol; kws...)
+    id = kind === :global ? get(ctx.global_vars, varkey, nothing) : nothing
+    if isnothing(id)
+        mod = kind === :global ? ctx.scope_layers[varkey.layer].mod : nothing
+        ex = new_binding(ctx, srcref, varkey.name, kind; mod=mod, kws...)
+        id = ex.var_id
+    end
+    if kind === :global
+        ctx.global_vars[varkey] = id
+    end
+    id
+end
+
+# Add lambda arguments and static parameters
+function add_lambda_args(ctx, var_ids, args, args_kind)
+    for arg in args
+        ka = kind(arg)
+        if ka == K"Identifier"
+            varkey = NameKey(arg)
+            if haskey(var_ids, varkey)
+                vk = lookup_binding(ctx, var_ids[varkey]).kind
+                _is_arg(k) = k == :argument || k == :local
+                msg = _is_arg(vk) && _is_arg(args_kind) ? "function argument name not unique"         :
+                      vk == :static_parameter && args_kind == :static_parameter ? "function static parameter name not unique" :
+                      "static parameter name not distinct from function argument"
+                throw(LoweringError(arg, msg))
+            end
+            is_always_defined = args_kind == :argument
+            id = init_binding(ctx, arg, varkey, args_kind;
+                              is_nospecialize=getmeta(arg, :nospecialize, false),
+                              is_always_defined=is_always_defined)
+            var_ids[varkey] = id
+        elseif ka != K"BindingId" && ka != K"Placeholder"
+            throw(LoweringError(arg, "Unexpected lambda arg kind"))
+        end
+    end
+end
+
+# Analyze identifier usage within a scope
+# * Allocate a new binding for each identifier which the scope introduces.
+# * Record the identifier=>binding mapping in a lookup table
+# * Return a `ScopeInfo` with the mapping plus additional scope metadata
+function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false,
+                       lambda_args=nothing, lambda_static_parameters=nothing)
+    parentscope = isempty(ctx.scope_stack) ? nothing : ctx.scope_stack[end]
+    is_outer_lambda_scope = kind(ex) == K"lambda"
+    in_toplevel_thunk = is_toplevel_global_scope ||
+        (!is_outer_lambda_scope && parentscope.in_toplevel_thunk)
+
+    assignments, locals, destructured_args, globals,
+        used_names, used_bindings = find_scope_vars(ctx, ex)
+
+    # Construct a mapping from identifiers to bindings
+    #
+    # This will contain a binding ID for each variable which is introduced by
+    # the scope, including
+    # * Explicit locals
+    # * Explicit globals
+    # * Implicit locals created by assignment
+    var_ids = Dict{NameKey,IdTag}()
+
+    if !isnothing(lambda_args)
+        add_lambda_args(ctx, var_ids, lambda_args, :argument)
+        add_lambda_args(ctx, var_ids, lambda_static_parameters, :static_parameter)
+        add_lambda_args(ctx, var_ids, destructured_args, :local)
+    end
+
+    # Add explicit locals
+    for (varkey,e) in locals
+        if haskey(var_ids, varkey)
+            vk = lookup_binding(ctx, var_ids[varkey]).kind
+            if vk === :argument && is_outer_lambda_scope
+                throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with an argument"))
+            elseif vk === :static_parameter
+                throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter"))
+            end
+        elseif var_kind(ctx, varkey) === :static_parameter
+            throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter"))
+        else
+            var_ids[varkey] = init_binding(ctx, e[1], varkey, :local)
+        end
+    end
+
+    # Add explicit globals
+    for (varkey,e) in globals
+        if haskey(var_ids, varkey)
+            vk = lookup_binding(ctx, var_ids[varkey]).kind
+            if vk === :local
+                throw(LoweringError(e, "Variable `$(varkey.name)` declared both local and global"))
+            elseif vk === :argument && is_outer_lambda_scope
+                throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with an argument"))
+            elseif vk === :static_parameter
+                throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter"))
+            end
+        elseif var_kind(ctx, varkey) === :static_parameter
+            throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter"))
+        end
+        var_ids[varkey] = init_binding(ctx, e[1], varkey, :global)
+    end
+
+    # Compute implicit locals and globals
+    if is_toplevel_global_scope
+        is_hard_scope = false
+        is_soft_scope = false
+
+        # Assignments are implicitly global at top level, unless they come from
+        # a macro expansion
+        for (varkey,e) in assignments
+            vk = haskey(var_ids, varkey) ?
+                 lookup_binding(ctx, var_ids[varkey]).kind :
+                 var_kind(ctx, varkey, true)
+            if vk === nothing
+                if ctx.scope_layers[varkey.layer].is_macro_expansion
+                    var_ids[varkey] = init_binding(ctx, e, varkey, :local)
+                else
+                    init_binding(ctx, e, varkey, :global)
+                    push!(ctx.implicit_toplevel_globals, varkey)
+                end
+            end
+        end
+    else
+        is_hard_scope = in_toplevel_thunk && (parentscope.is_hard || scope_type === :hard)
+        is_soft_scope = in_toplevel_thunk && !is_hard_scope &&
+                        (scope_type === :neutral ? parentscope.is_soft : scope_type === :soft)
+
+        # Outside top level code, most assignments create local variables implicitly
+        for (varkey,e) in assignments
+            vk = haskey(var_ids, varkey) ?
+                 lookup_binding(ctx, var_ids[varkey]).kind :
+                 var_kind(ctx, varkey, true)
+            if vk === :static_parameter
+                throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter"))
+            elseif vk !== nothing
+                continue
+            end
+            # Assignment is to a newly discovered variable name
+            is_ambiguous_local = false
+            if in_toplevel_thunk && !is_hard_scope
+                # In a top level thunk but *inside* a nontrivial scope
+                layer = ctx.scope_layers[varkey.layer]
+                if !layer.is_macro_expansion && (varkey in ctx.implicit_toplevel_globals ||
+                        is_defined_and_owned_global(layer.mod, Symbol(varkey.name)))
+                    # Special scope rules to make assignments to globals work
+                    # like assignments to locals do inside a function.
+                    if is_soft_scope
+                        # Soft scope (eg, for loop in REPL) => treat as a global
+                        init_binding(ctx, e, varkey, :global)
+                        continue
+                    else
+                        # Ambiguous case (eg, nontrivial scopes in package top level code)
+                        # => Treat as local but generate warning when assigned to
+                        is_ambiguous_local = true
+                    end
+                end
+            end
+            var_ids[varkey] = init_binding(ctx, e, varkey, :local;
+                                           is_ambiguous_local=is_ambiguous_local)
+        end
+    end
+
+    #--------------------------------------------------
+    # At this point we've discovered all the bindings defined in this scope and
+    # added them to `var_ids`.
+    #
+    # Next we record information about how the new bindings relate to the
+    # enclosing lambda
+    # * All non-globals are recorded (kind :local and :argument will later be turned into slots)
+    # * Captured variables are detected and recorded
+    #
+    # TODO: Move most or-all of this to the VariableAnalysis sub-pass
+    lambda_bindings = if is_outer_lambda_scope
+        if isempty(lambda_args)
+            LambdaBindings()
+        else
+            selfarg = first(lambda_args)
+            selfid = kind(selfarg) == K"BindingId" ?
+                     selfarg.var_id : var_ids[NameKey(selfarg)]
+            LambdaBindings(selfid)
+        end
+    else
+        parentscope.lambda_bindings
+    end
+
+    for id in values(var_ids)
+        binfo = lookup_binding(ctx, id)
+        if !binfo.is_ssa && binfo.kind !== :global
+            init_lambda_binding(lambda_bindings, id)
+        end
+    end
+
+    # FIXME: This assumes used bindings are internal to the lambda and cannot
+    # be from the environment, and also assumes they are assigned. That's
+    # correct for now but in general we should go by the same code path that
+    # identifiers do.
+    for id in used_bindings
+        binfo = lookup_binding(ctx, id)
+        if (binfo.kind === :local && !binfo.is_ssa) || binfo.kind === :argument ||
+                binfo.kind === :static_parameter
+            if !has_lambda_binding(lambda_bindings, id)
+                init_lambda_binding(lambda_bindings, id)
+            end
+        end
+    end
+
+    for (varkey, e) in used_names
+        id = haskey(var_ids, varkey) ? var_ids[varkey] : lookup_var(ctx, varkey)
+        if id === nothing
+            # Identifiers which are used but not defined in some scope are
+            # newly discovered global bindings
+            init_binding(ctx, e, varkey, :global)
+        elseif !in_toplevel_thunk
+            binfo = lookup_binding(ctx, id)
+            if binfo.kind !== :global
+                if !has_lambda_binding(lambda_bindings, id)
+                    # Used vars from a scope *outside* the current lambda are captured
+                    init_lambda_binding(lambda_bindings, id, is_captured=true)
+                    update_binding!(ctx, id; is_captured=true)
+                end
+            end
+        end
+    end
+
+    if !in_toplevel_thunk
+        for (varkey,_) in assignments
+            id = haskey(var_ids, varkey) ? var_ids[varkey] : lookup_var(ctx, varkey)
+            binfo = lookup_binding(ctx, id)
+            if binfo.kind !== :global
+                if !has_lambda_binding(lambda_bindings, id)
+                    # Assigned vars from a scope *outside* the current lambda are captured
+                    init_lambda_binding(lambda_bindings, id, is_captured=true)
+                    update_binding!(ctx, id; is_captured=true)
+                end
+            end
+        end
+    end
+
+    return ScopeInfo(is_toplevel_global_scope, in_toplevel_thunk, is_soft_scope,
+                     is_hard_scope, var_ids, lambda_bindings)
+end
+
+function add_local_decls!(ctx, stmts, srcref, scope)
+    # Add local decls to start of block so that closure conversion can
+    # initialize if necessary.
+    for id in sort!(collect(values(scope.var_ids)))
+        binfo = lookup_binding(ctx, id)
+        if binfo.kind == :local
+            push!(stmts, @ast ctx srcref [K"local" binding_ex(ctx, id)])
+        end
+    end
+end
+
+function _resolve_scopes(ctx, ex::SyntaxTree)
+    k = kind(ex)
+    if k == K"Identifier"
+        @ast ctx ex lookup_var(ctx, NameKey(ex))::K"BindingId"
+    elseif is_leaf(ex) || is_quoted(ex) || k == K"toplevel"
+        ex
+    # elseif k == K"global"
+    #     ex
+    elseif k == K"local"
+        # Local declarations have a value of `nothing` according to flisp
+        # lowering.
+        # TODO: Should local decls be disallowed in value position?
+        @ast ctx ex "nothing"::K"core"
+    elseif k == K"decl"
+        ex_out = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex)
+        name = ex_out[1]
+        if kind(name) != K"Placeholder"
+            binfo = lookup_binding(ctx, name)
+            if binfo.kind == :global && !ctx.scope_stack[end].in_toplevel_thunk
+                throw(LoweringError(ex, "type declarations for global variables must be at top level, not inside a function"))
+            end
+        end
+        id = ex_out[1]
+        if kind(id) != K"Placeholder"
+            binfo = lookup_binding(ctx, id)
+            if !isnothing(binfo.type)
+                throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`"))
+            end
+            update_binding!(ctx, id; type=ex_out[2])
+        end
+        ex_out
+    elseif k == K"always_defined"
+        id = lookup_var(ctx, NameKey(ex[1]))
+        update_binding!(ctx, id; is_always_defined=true)
+        makeleaf(ctx, ex, K"TOMBSTONE")
+    elseif k == K"lambda"
+        is_toplevel_thunk = ex.is_toplevel_thunk
+        scope = analyze_scope(ctx, ex, nothing, is_toplevel_thunk,
+                              children(ex[1]), children(ex[2]))
+
+        push!(ctx.scope_stack, scope)
+        arg_bindings = _resolve_scopes(ctx, ex[1])
+        sparm_bindings = _resolve_scopes(ctx, ex[2])
+        body_stmts = SyntaxList(ctx)
+        add_local_decls!(ctx, body_stmts, ex, scope)
+        body = _resolve_scopes(ctx, ex[3])
+        if kind(body) == K"block"
+            append!(body_stmts, children(body))
+        else
+            push!(body_stmts, body)
+        end
+        ret_var = numchildren(ex) == 4 ? _resolve_scopes(ctx, ex[4]) : nothing
+        pop!(ctx.scope_stack)
+
+        @ast ctx ex [K"lambda"(lambda_bindings=scope.lambda_bindings,
+                               is_toplevel_thunk=is_toplevel_thunk,
+                               toplevel_pure=false)
+            arg_bindings
+            sparm_bindings
+            [K"block"
+                body_stmts...
+            ]
+            ret_var
+        ]
+    elseif k == K"scope_block"
+        scope = analyze_scope(ctx, ex, ex.scope_type)
+        push!(ctx.scope_stack, scope)
+        stmts = SyntaxList(ctx)
+        add_local_decls!(ctx, stmts, ex, scope)
+        for e in children(ex)
+            push!(stmts, _resolve_scopes(ctx, e))
+        end
+        pop!(ctx.scope_stack)
+        @ast ctx ex [K"block" stmts...]
+    elseif k == K"extension"
+        etype = extension_type(ex)
+        if etype == "islocal"
+            id = lookup_var(ctx, NameKey(ex[2]))
+            islocal = !isnothing(id) && var_kind(ctx, id) != :global
+            @ast ctx ex islocal::K"Bool"
+        elseif etype == "isglobal"
+            e2 = ex[2]
+            @chk kind(e2) in KSet"Identifier Placeholder"
+            isglobal = if kind(e2) == K"Identifier"
+                id = lookup_var(ctx, NameKey(e2))
+                isnothing(id) || var_kind(ctx, id) == :global
+            else
+                false
+            end
+            @ast ctx ex isglobal::K"Bool"
+        elseif etype == "locals"
+            stmts = SyntaxList(ctx)
+            locals_dict = ssavar(ctx, ex, "locals_dict")
+            push!(stmts, @ast ctx ex [K"="
+                locals_dict
+                [K"call"
+                    [K"call"
+                        "apply_type"::K"core"
+                        "Dict"::K"top"
+                        "Symbol"::K"core"
+                        "Any"::K"core"
+                    ]
+                ]
+            ])
+            for scope in ctx.scope_stack
+                for id in values(scope.var_ids)
+                    binfo = lookup_binding(ctx, id)
+                    if binfo.kind == :global || binfo.is_internal
+                        continue
+                    end
+                    binding = binding_ex(ctx, id)
+                    push!(stmts, @ast ctx ex [K"if"
+                        [K"isdefined" binding]
+                        [K"call"
+                            "setindex!"::K"top"
+                            locals_dict
+                            binding
+                            binfo.name::K"Symbol"
+                        ]
+                    ])
+                end
+            end
+            push!(stmts, locals_dict)
+            makenode(ctx, ex, K"block", stmts)
+        end
+    elseif k == K"assert"
+        etype = extension_type(ex)
+        if etype == "require_existing_locals"
+            for v in ex[2:end]
+                vk = var_kind(ctx, NameKey(v))
+                if vk !== :local
+                    throw(LoweringError(v, "`outer` annotations must match with a local variable in an outer scope but no such variable was found"))
+                end
+            end
+        elseif etype == "global_toplevel_only"
+            if !ctx.scope_stack[end].is_toplevel_global_scope
+                e = ex[2][1]
+                throw(LoweringError(e, "$(kind(e)) is only allowed in global scope"))
+            end
+        elseif etype == "toplevel_only"
+            if !ctx.scope_stack[end].in_toplevel_thunk
+                e = ex[2][1]
+                throw(LoweringError(e, "this syntax is only allowed in top level code"))
+            end
+        else
+            throw(LoweringError(ex, "Unknown syntax assertion"))
+        end
+        makeleaf(ctx, ex, K"TOMBSTONE")
+    elseif k == K"function_decl"
+        resolved = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex)
+        name = resolved[1]
+        if kind(name) == K"BindingId"
+            bk = lookup_binding(ctx, name).kind
+            if bk == :argument
+                throw(LoweringError(name, "Cannot add method to a function argument"))
+            elseif bk == :global && !ctx.scope_stack[end].in_toplevel_thunk
+                throw(LoweringError(name,
+                    "Global method definition needs to be placed at the top level, or use `eval()`"))
+            end
+        end
+        resolved
+    elseif k == K"assign_or_constdecl_if_global"
+        id = _resolve_scopes(ctx, ex[1])
+        bk = lookup_binding(ctx, id).kind
+        @assert numchildren(ex) === 2
+        assignment_kind = bk == :global ? K"constdecl" : K"="
+        @ast ctx ex _resolve_scopes(ctx, [assignment_kind ex[1] ex[2]])
+    else
+        mapchildren(e->_resolve_scopes(ctx, e), ctx, ex)
+    end
+end
+
+function _resolve_scopes(ctx, exs::AbstractVector)
+    out = SyntaxList(ctx)
+    for e in exs
+        push!(out, _resolve_scopes(ctx, e))
+    end
+    out
+end
+
+#-------------------------------------------------------------------------------
+# Sub-pass to compute additional information about variable usage as required
+# by closure conversion, etc
+struct ClosureBindings
+    name_stack::Vector{String}      # Names of functions the closure is nested within
+    lambdas::Vector{LambdaBindings} # Bindings for each method of the closure
+end
+
+ClosureBindings(name_stack) = ClosureBindings(name_stack, Vector{LambdaBindings}())
+
+struct VariableAnalysisContext{GraphType} <: AbstractLoweringContext
+    graph::GraphType
+    bindings::Bindings
+    mod::Module
+    lambda_bindings::LambdaBindings
+    # Stack of method definitions for closure naming
+    method_def_stack::SyntaxList{GraphType}
+    # Collection of information about each closure, principally which methods
+    # are part of the closure (and hence captures).
+    closure_bindings::Dict{IdTag,ClosureBindings}
+end
+
+function VariableAnalysisContext(graph, bindings, mod, lambda_bindings)
+    VariableAnalysisContext(graph, bindings, mod, lambda_bindings,
+                            SyntaxList(graph), Dict{IdTag,ClosureBindings}())
+end
+
+function current_lambda_bindings(ctx::VariableAnalysisContext)
+    ctx.lambda_bindings
+end
+
+function init_closure_bindings!(ctx, fname)
+    func_name_id = fname.var_id
+    @assert lookup_binding(ctx, func_name_id).kind === :local
+    get!(ctx.closure_bindings, func_name_id) do
+        name_stack = Vector{String}()
+        for parentname in ctx.method_def_stack
+            if kind(parentname) == K"BindingId"
+                push!(name_stack, lookup_binding(ctx, parentname).name)
+            end
+        end
+        push!(name_stack, lookup_binding(ctx, func_name_id).name)
+        ClosureBindings(name_stack)
+    end
+end
+
+function find_any_local_binding(ctx, ex)
+    k = kind(ex)
+    if k == K"BindingId"
+        bkind = lookup_binding(ctx, ex.var_id).kind
+        if bkind != :global && bkind != :static_parameter
+            return ex
+        end
+    elseif !is_leaf(ex) && !is_quoted(ex)
+        for e in children(ex)
+            r = find_any_local_binding(ctx, e)
+            if !isnothing(r)
+                return r
+            end
+        end
+    end
+    return nothing
+end
+
+# Update ctx.bindings and ctx.lambda_bindings metadata based on binding usage
+function analyze_variables!(ctx, ex)
+    k = kind(ex)
+    if k == K"BindingId"
+        if has_lambda_binding(ctx, ex)
+            # TODO: Move this after closure conversion so that we don't need
+            # to model the closure conversion transformations here.
+            update_lambda_binding!(ctx, ex, is_read=true)
+        else
+            binfo = lookup_binding(ctx, ex.var_id)
+            if !binfo.is_ssa && binfo.kind != :global
+                # The type of typed locals is invisible in the previous pass,
+                # but is filled in here.
+                init_lambda_binding(ctx.lambda_bindings, ex.var_id, is_captured=true, is_read=true)
+                update_binding!(ctx, ex, is_captured=true)
+            end
+        end
+    elseif is_leaf(ex) || is_quoted(ex)
+        return
+    elseif k == K"static_eval"
+        badvar = find_any_local_binding(ctx, ex[1])
+        if !isnothing(badvar)
+            name_hint = getmeta(ex, :name_hint, "syntax")
+            throw(LoweringError(badvar, "$(name_hint) cannot reference local variable"))
+        end
+        return
+    elseif k == K"local" || k == K"global"
+        # Presence of BindingId within local/global is ignored.
+        return
+    elseif k == K"="
+        lhs = ex[1]
+        if kind(lhs) != K"Placeholder"
+            update_binding!(ctx, lhs, add_assigned=1)
+            if has_lambda_binding(ctx, lhs)
+                update_lambda_binding!(ctx, lhs, is_assigned=true)
+            end
+            lhs_binfo = lookup_binding(ctx, lhs)
+            if !isnothing(lhs_binfo.type)
+                # Assignments introduce a variable's type later during closure
+                # conversion, but we must model that explicitly here.
+                analyze_variables!(ctx, lhs_binfo.type)
+            end
+        end
+        analyze_variables!(ctx, ex[2])
+    elseif k == K"function_decl"
+        name = ex[1]
+        if lookup_binding(ctx, name.var_id).kind === :local
+            init_closure_bindings!(ctx, name)
+        end
+        update_binding!(ctx, name, add_assigned=1)
+        if has_lambda_binding(ctx, name)
+            update_lambda_binding!(ctx, name, is_assigned=true)
+        end
+    elseif k == K"function_type"
+        if kind(ex[1]) != K"BindingId" || lookup_binding(ctx, ex[1]).kind !== :local
+            analyze_variables!(ctx, ex[1])
+        end
+    elseif k == K"constdecl"
+        id = ex[1]
+        if kind(id) == K"BindingId"
+            if lookup_binding(ctx, id).kind == :local
+                throw(LoweringError(ex, "unsupported `const` declaration on local variable"))
+            end
+            update_binding!(ctx, id; is_const=true)
+        end
+    elseif k == K"call"
+        name = ex[1]
+        if kind(name) == K"BindingId"
+            id = name.var_id
+            if has_lambda_binding(ctx, id)
+                # TODO: Move this after closure conversion so that we don't need
+                # to model the closure conversion transformations.
+                update_lambda_binding!(ctx, id, is_called=true)
+            end
+        end
+        foreach(e->analyze_variables!(ctx, e), children(ex))
+    elseif k == K"method_defs"
+        push!(ctx.method_def_stack, ex[1])
+        analyze_variables!(ctx, ex[2])
+        pop!(ctx.method_def_stack)
+    elseif k == K"_opaque_closure"
+        name = ex[1]
+        init_closure_bindings!(ctx, name)
+        push!(ctx.method_def_stack, name)
+        analyze_variables!(ctx, ex[2])
+        analyze_variables!(ctx, ex[3])
+        analyze_variables!(ctx, ex[4])
+        analyze_variables!(ctx, ex[9])
+        pop!(ctx.method_def_stack)
+    elseif k == K"lambda"
+        lambda_bindings = ex.lambda_bindings
+        if !ex.is_toplevel_thunk && !isempty(ctx.method_def_stack)
+            # Record all lambdas for the same closure type in one place
+            func_name = last(ctx.method_def_stack)
+            if kind(func_name) == K"BindingId"
+                func_name_id = func_name.var_id
+                if lookup_binding(ctx, func_name_id).kind === :local
+                    push!(ctx.closure_bindings[func_name_id].lambdas, lambda_bindings)
+                end
+            end
+        end
+        ctx2 = VariableAnalysisContext(ctx.graph, ctx.bindings, ctx.mod, lambda_bindings,
+                                       ctx.method_def_stack, ctx.closure_bindings)
+        foreach(e->analyze_variables!(ctx2, e), ex[3:end]) # body & return type
+        for (id,lbinfo) in pairs(lambda_bindings.bindings)
+            if lbinfo.is_captured
+                # Add any captured bindings to the enclosing lambda, if necessary.
+                outer_lbinfo = lookup_lambda_binding(ctx.lambda_bindings, id)
+                if isnothing(outer_lbinfo)
+                    # Inner lambda captures a variable. If it's not yet present
+                    # in the outer lambda, the outer lambda must capture it as
+                    # well so that the closure associated to the inner lambda
+                    # can be initialized when `function_decl` is hit.
+                    init_lambda_binding(ctx.lambda_bindings, id, is_captured=true, is_read=true)
+                end
+            end
+        end
+    else
+        foreach(e->analyze_variables!(ctx, e), children(ex))
+    end
+    nothing
+end
+
+function resolve_scopes(ctx::ScopeResolutionContext, ex)
+    if kind(ex) != K"lambda"
+        # Wrap in a top level thunk if we're not already expanding a lambda.
+        # (Maybe this should be done elsewhere?)
+        ex = @ast ctx ex [K"lambda"(is_toplevel_thunk=true, toplevel_pure=false)
+            [K"block"]
+            [K"block"]
+            ex
+        ]
+    end
+    _resolve_scopes(ctx, ex)
+end
+
+"""
+This pass analyzes scopes and the names (locals/globals etc) used within them.
+
+Names of kind `K"Identifier"` are transformed into binding identifiers of
+kind `K"BindingId"`. The associated `Bindings` table in the context records
+metadata about each binding.
+
+This pass also records the set of binding IDs used locally within the
+enclosing lambda form and information about variables captured by closures.
+"""
+@fzone "JL: resolve_scopes" function resolve_scopes(ctx::DesugaringContext, ex)
+    ctx2 = ScopeResolutionContext(ctx)
+    ex2 = resolve_scopes(ctx2, reparent(ctx2, ex))
+    ctx3 = VariableAnalysisContext(ctx2.graph, ctx2.bindings, ctx2.mod, ex2.lambda_bindings)
+    analyze_variables!(ctx3, ex2)
+    ctx3, ex2
+end
diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl
new file mode 100644
index 0000000000000..af569d731cf27
--- /dev/null
+++ b/JuliaLowering/src/syntax_graph.jl
@@ -0,0 +1,831 @@
+const NodeId = Int
+
+"""
+Directed graph with arbitrary attributes on nodes. Used here for representing
+one or several syntax trees.
+
+TODO: Global attributes!
+"""
+mutable struct SyntaxGraph{Attrs}
+    edge_ranges::Vector{UnitRange{Int}}
+    edges::Vector{NodeId}
+    attributes::Attrs
+end
+
+SyntaxGraph() = SyntaxGraph{Dict{Symbol,Any}}(Vector{UnitRange{Int}}(),
+                                              Vector{NodeId}(), Dict{Symbol,Any}())
+
+# "Freeze" attribute names and types, encoding them in the type of the returned
+# SyntaxGraph.
+function freeze_attrs(graph::SyntaxGraph)
+    frozen_attrs = (; pairs(graph.attributes)...)
+    SyntaxGraph(graph.edge_ranges, graph.edges, frozen_attrs)
+end
+
+# Create a copy of `graph` where the attribute list is mutable
+function unfreeze_attrs(graph::SyntaxGraph)
+    unfrozen_attrs = Dict{Symbol,Any}(pairs(graph.attributes)...)
+    SyntaxGraph(graph.edge_ranges, graph.edges, unfrozen_attrs)
+end
+
+function _show_attrs(io, attributes::Dict)
+    show(io, MIME("text/plain"), attributes)
+end
+function _show_attrs(io, attributes::NamedTuple)
+    show(io, MIME("text/plain"), Dict(pairs(attributes)...))
+end
+
+function attrnames(graph::SyntaxGraph)
+    keys(graph.attributes)
+end
+
+function attrdefs(graph::SyntaxGraph)
+    [(k=>typeof(v).parameters[2]) for (k, v) in pairs(graph.attributes)]
+end
+
+function Base.show(io::IO, ::MIME"text/plain", graph::SyntaxGraph)
+    print(io, typeof(graph),
+          " with $(length(graph.edge_ranges)) vertices, $(length(graph.edges)) edges, and attributes:\n")
+    _show_attrs(io, graph.attributes)
+end
+
+function ensure_attributes!(graph::SyntaxGraph; kws...)
+    for (k,v) in pairs(kws)
+        @assert k isa Symbol
+        @assert v isa Type
+        if haskey(graph.attributes, k)
+            v0 = valtype(graph.attributes[k])
+            v == v0 || throw(ErrorException("Attribute type mismatch $v != $v0"))
+        elseif graph.attributes isa NamedTuple
+            throw(ErrorException("""
+                ensure_attributes!: $k is not an existing attribute, and the graph's attributes are frozen. \
+                Consider calling non-mutating `ensure_attributes` instead."""))
+        else
+            graph.attributes[k] = Dict{NodeId,v}()
+        end
+    end
+    graph
+end
+
+function ensure_attributes(graph::SyntaxGraph{<:Dict}; kws...)
+    g = unfreeze_attrs(graph)
+    ensure_attributes!(g; kws...)
+end
+
+function ensure_attributes(graph::SyntaxGraph{<:NamedTuple}; kws...)
+    g = unfreeze_attrs(graph)
+    ensure_attributes!(g; kws...)
+    freeze_attrs(g)
+end
+
+function delete_attributes!(graph::SyntaxGraph{<:Dict}, attr_names::Symbol...)
+    for name in attr_names
+        delete!(graph.attributes, name)
+    end
+    graph
+end
+
+function delete_attributes(graph::SyntaxGraph{<:Dict}, attr_names::Symbol...)
+    delete_attributes!(unfreeze_attrs(graph), attr_names...)
+end
+
+function delete_attributes(graph::SyntaxGraph{<:NamedTuple}, attr_names::Symbol...)
+    g = delete_attributes!(unfreeze_attrs(graph), attr_names...)
+    freeze_attrs(g)
+end
+
+function newnode!(graph::SyntaxGraph)
+    push!(graph.edge_ranges, 0:-1) # Invalid range start => leaf node
+    return length(graph.edge_ranges)
+end
+
+function setchildren!(graph::SyntaxGraph, id, children::NodeId...)
+    setchildren!(graph, id, children)
+end
+
+function setchildren!(graph::SyntaxGraph, id, children)
+    n = length(graph.edges)
+    graph.edge_ranges[id] = n+1:(n+length(children))
+    # TODO: Reuse existing edges if possible
+    append!(graph.edges, children)
+end
+
+function JuliaSyntax.is_leaf(graph::SyntaxGraph, id)
+    first(graph.edge_ranges[id]) == 0
+end
+
+function JuliaSyntax.numchildren(graph::SyntaxGraph, id)
+    length(graph.edge_ranges[id])
+end
+
+function JuliaSyntax.children(graph::SyntaxGraph, id)
+    @view graph.edges[graph.edge_ranges[id]]
+end
+
+function JuliaSyntax.children(graph::SyntaxGraph, id, r::UnitRange)
+    @view graph.edges[graph.edge_ranges[id][r]]
+end
+
+function child(graph::SyntaxGraph, id::NodeId, i::Integer)
+    graph.edges[graph.edge_ranges[id][i]]
+end
+
+function getattr(graph::SyntaxGraph{<:Dict}, name::Symbol)
+    getfield(graph, :attributes)[name]
+end
+
+function getattr(graph::SyntaxGraph{<:NamedTuple}, name::Symbol)
+    getfield(getfield(graph, :attributes), name)
+end
+
+function getattr(graph::SyntaxGraph, name::Symbol, default)
+    get(getfield(graph, :attributes), name, default)
+end
+
+function hasattr(graph::SyntaxGraph, name::Symbol)
+    getattr(graph, name, nothing) !== nothing
+end
+
+# TODO: Probably terribly non-inferable?
+function setattr!(graph::SyntaxGraph, id; attrs...)
+    for (k,v) in pairs(attrs)
+        if !isnothing(v)
+            getattr(graph, k)[id] = v
+        end
+    end
+end
+
+function deleteattr!(graph::SyntaxGraph, id::NodeId, name::Symbol)
+    delete!(getattr(graph, name), id)
+end
+
+function Base.getproperty(graph::SyntaxGraph, name::Symbol)
+    # TODO: Remove access to internals?
+    name === :edge_ranges && return getfield(graph, :edge_ranges)
+    name === :edges       && return getfield(graph, :edges)
+    name === :attributes  && return getfield(graph, :attributes)
+    return getattr(graph, name)
+end
+
+function sethead!(graph, id::NodeId, h::JuliaSyntax.SyntaxHead)
+    sethead!(graph, id, kind(h))
+    setflags!(graph, id, flags(h))
+end
+
+function sethead!(graph, id::NodeId, k::Kind)
+    graph.kind[id] = k
+end
+
+function setflags!(graph, id::NodeId, f::UInt16)
+    graph.syntax_flags[id] = f
+end
+
+function _convert_nodes(graph::SyntaxGraph, node::SyntaxNode)
+    id = newnode!(graph)
+    sethead!(graph, id, head(node))
+    if !isnothing(node.val)
+        v = node.val
+        if v isa Symbol
+            # TODO: Fixes in JuliaSyntax to avoid ever converting to Symbol
+            setattr!(graph, id, name_val=string(v))
+        else
+            setattr!(graph, id, value=v)
+        end
+    end
+    setattr!(graph, id, source=SourceRef(node.source, node.position, node.raw))
+    if !is_leaf(node)
+        cs = map(children(node)) do n
+            _convert_nodes(graph, n)
+        end
+        setchildren!(graph, id, cs)
+    end
+    return id
+end
+
+"""
+    syntax_graph(ctx)
+
+Return `SyntaxGraph` associated with `ctx`
+"""
+syntax_graph(graph::SyntaxGraph) = graph
+
+function check_same_graph(x, y)
+    if syntax_graph(x) !== syntax_graph(y)
+        error("Mismatching syntax graphs")
+    end
+end
+
+function check_compatible_graph(x, y)
+    if !is_compatible_graph(x, y)
+        error("Incompatible syntax graphs")
+    end
+end
+
+function is_compatible_graph(x, y)
+    syntax_graph(x).edges === syntax_graph(y).edges
+end
+
+#-------------------------------------------------------------------------------
+struct SyntaxTree{GraphType}
+    _graph::GraphType
+    _id::NodeId
+end
+
+function Base.getproperty(ex::SyntaxTree, name::Symbol)
+    name === :_graph && return getfield(ex, :_graph)
+    name === :_id  && return getfield(ex, :_id)
+    _id = getfield(ex, :_id)
+    return get(getproperty(getfield(ex, :_graph), name), _id) do
+        attrstr = join(["\n    $n = $(getproperty(ex, n))"
+                        for n in attrnames(ex)], ",")
+        error("Property `$name[$_id]` not found. Available attributes:$attrstr")
+    end
+end
+
+function Base.setproperty!(ex::SyntaxTree, name::Symbol, val)
+    return setattr!(ex._graph, ex._id; name=>val)
+end
+
+function Base.propertynames(ex::SyntaxTree)
+    attrnames(ex)
+end
+
+function Base.get(ex::SyntaxTree, name::Symbol, default)
+    attr = getattr(getfield(ex, :_graph), name, nothing)
+    return isnothing(attr) ? default :
+           get(attr, getfield(ex, :_id), default)
+end
+
+function Base.getindex(ex::SyntaxTree, i::Integer)
+    SyntaxTree(ex._graph, child(ex._graph, ex._id, i))
+end
+
+function Base.getindex(ex::SyntaxTree, r::UnitRange)
+    SyntaxList(ex._graph, children(ex._graph, ex._id, r))
+end
+
+Base.firstindex(ex::SyntaxTree) = 1
+Base.lastindex(ex::SyntaxTree) = numchildren(ex)
+
+function hasattr(ex::SyntaxTree, name::Symbol)
+    attr = getattr(ex._graph, name, nothing)
+    return !isnothing(attr) && haskey(attr, ex._id)
+end
+
+function attrnames(ex::SyntaxTree)
+    attrs = ex._graph.attributes
+    [name for (name, value) in pairs(attrs) if haskey(value, ex._id)]
+end
+
+function copy_node(ex::SyntaxTree)
+    graph = syntax_graph(ex)
+    id = newnode!(graph)
+    if !is_leaf(ex)
+        setchildren!(graph, id, _node_ids(graph, children(ex)...))
+    end
+    ex2 = SyntaxTree(graph, id)
+    copy_attrs!(ex2, ex, true)
+    ex2
+end
+
+function setattr(ex::SyntaxTree; extra_attrs...)
+    ex2 = copy_node(ex)
+    setattr!(ex2; extra_attrs...)
+    ex2
+end
+
+function setattr!(ex::SyntaxTree; attrs...)
+    setattr!(ex._graph, ex._id; attrs...)
+end
+
+function deleteattr!(ex::SyntaxTree, name::Symbol)
+    deleteattr!(ex._graph, ex._id, name)
+end
+
+# JuliaSyntax tree API
+
+function JuliaSyntax.is_leaf(ex::SyntaxTree)
+    is_leaf(ex._graph, ex._id)
+end
+
+function JuliaSyntax.numchildren(ex::SyntaxTree)
+    numchildren(ex._graph, ex._id)
+end
+
+function JuliaSyntax.children(ex::SyntaxTree)
+    SyntaxList(ex._graph, children(ex._graph, ex._id))
+end
+
+function JuliaSyntax.head(ex::SyntaxTree)
+    JuliaSyntax.SyntaxHead(kind(ex), flags(ex))
+end
+
+function JuliaSyntax.kind(ex::SyntaxTree)
+    ex.kind::JuliaSyntax.Kind
+end
+
+function JuliaSyntax.flags(ex::SyntaxTree)
+    get(ex, :syntax_flags, 0x0000)
+end
+
+
+# Reference to bytes within a source file
+struct SourceRef
+    file::SourceFile
+    first_byte::Int
+    # TODO: Do we need the green node, or would last_byte suffice?
+    green_tree::JuliaSyntax.GreenNode
+end
+
+JuliaSyntax.sourcefile(src::SourceRef) = src.file
+JuliaSyntax.byte_range(src::SourceRef) = src.first_byte:(src.first_byte + span(src.green_tree) - 1)
+
+# TODO: Adding these methods to support LineNumberNode is kind of hacky but we
+# can remove these after JuliaLowering becomes self-bootstrapping for macros
+# and we a proper SourceRef for @ast's @HERE form.
+JuliaSyntax.byte_range(src::LineNumberNode) = 0:0
+JuliaSyntax.source_location(src::LineNumberNode) = (src.line, 0)
+JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode) = src
+JuliaSyntax.source_line(src::LineNumberNode) = src.line
+# The follow somewhat strange cases are for where LineNumberNode is standing in
+# for SourceFile because we've only got Expr-based provenance info
+JuliaSyntax.sourcefile(src::LineNumberNode) = src
+JuliaSyntax.sourcetext(src::LineNumberNode) = SubString("")
+JuliaSyntax.source_location(src::LineNumberNode, byte_index::Integer) = (src.line, 0)
+JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode, byte_index::Integer) = src
+JuliaSyntax.filename(src::LineNumberNode) = string(src.file)
+
+function JuliaSyntax.highlight(io::IO, src::LineNumberNode; note="")
+    print(io, src, " - ", note)
+end
+
+function JuliaSyntax.highlight(io::IO, src::SourceRef; kws...)
+    highlight(io, src.file, first_byte(src):last_byte(src); kws...)
+end
+
+function Base.show(io::IO, ::MIME"text/plain", src::SourceRef)
+    highlight(io, src; note="these are the bytes you're looking for 😊", context_lines_inner=20)
+end
+
+
+function provenance(ex::SyntaxTree)
+    s = ex.source
+    if s isa NodeId
+        return (SyntaxTree(ex._graph, s),)
+    elseif s isa Tuple
+        return SyntaxTree.((ex._graph,), s)
+    else
+        return (s,)
+    end
+end
+
+
+function _sourceref(sources, id)
+    i = 1
+    while true
+        i += 1
+        s = sources[id]
+        if s isa NodeId
+            id = s
+        else
+            return s, id
+        end
+    end
+end
+
+function sourceref(ex::SyntaxTree)
+    sources = ex._graph.source
+    id::NodeId = ex._id
+    while true
+        s, _ = _sourceref(sources, id)
+        if s isa Tuple
+            s = s[1]
+        end
+        if s isa NodeId
+            id = s
+        else
+            return s
+        end
+    end
+end
+
+function _flattened_provenance(refs, graph, sources, id)
+    # TODO: Implement in terms of `provenance()`?
+    s, id2 = _sourceref(sources, id)
+    if s isa Tuple
+        for i in s
+            _flattened_provenance(refs, graph, sources, i)
+        end
+    else
+        push!(refs, SyntaxTree(graph, id2))
+    end
+end
+
+function flattened_provenance(ex::SyntaxTree)
+    refs = SyntaxList(ex)
+    _flattened_provenance(refs, ex._graph, ex._graph.source, ex._id)
+    return reverse(refs)
+end
+
+
+function is_ancestor(ex, ancestor)
+    if !is_compatible_graph(ex, ancestor)
+        return false
+    end
+    sources = ex._graph.source
+    id::NodeId = ex._id
+    while true
+        s = get(sources, id, nothing)
+        if s isa NodeId
+            id = s
+            if id == ancestor._id
+                return true
+            end
+        else
+            return false
+        end
+    end
+end
+
+const SourceAttrType = Union{SourceRef,LineNumberNode,NodeId,Tuple}
+
+function SyntaxTree(graph::SyntaxGraph, node::SyntaxNode)
+    ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType,
+                       value=Any, name_val=String)
+    id = _convert_nodes(graph, node)
+    return SyntaxTree(graph, id)
+end
+
+function SyntaxTree(node::SyntaxNode)
+    return SyntaxTree(SyntaxGraph(), node)
+end
+
+attrsummary(name, value) = string(name)
+attrsummary(name, value::Number) = "$name=$value"
+
+function _value_string(ex)
+    k = kind(ex)
+    str = k in KSet"Identifier StrMacroName CmdMacroName" || is_operator(k) ? ex.name_val :
+          k == K"Placeholder" ? ex.name_val           :
+          k == K"SSAValue"    ? "%"                   :
+          k == K"BindingId"   ? "#"                   :
+          k == K"label"       ? "label"               :
+          k == K"core"        ? "core.$(ex.name_val)" :
+          k == K"top"         ? "top.$(ex.name_val)"  :
+          k == K"Symbol"      ? ":$(ex.name_val)" :
+          k == K"globalref"   ? "$(ex.mod).$(ex.name_val)" :
+          k == K"slot"        ? "slot" :
+          k == K"latestworld" ? "latestworld" :
+          k == K"static_parameter" ? "static_parameter" :
+          k == K"symbolic_label" ? "label:$(ex.name_val)" :
+          k == K"symbolic_goto" ? "goto:$(ex.name_val)" :
+          k == K"SourceLocation" ?
+              "SourceLocation:$(JuliaSyntax.filename(ex)):$(join(source_location(ex), ':'))" :
+          k == K"Value" && ex.value isa SourceRef ?
+              "SourceRef:$(JuliaSyntax.filename(ex)):$(join(source_location(ex), ':'))" :
+          repr(get(ex, :value, nothing))
+    id = get(ex, :var_id, nothing)
+    if isnothing(id)
+        id = get(ex, :id, nothing)
+    end
+    if !isnothing(id)
+        idstr = subscript_str(id)
+        str = "$(str)$idstr"
+    end
+    if k == K"slot" || k == K"BindingId"
+        p = provenance(ex)[1]
+        while p isa SyntaxTree
+            if kind(p) == K"Identifier"
+                str = "$(str)/$(p.name_val)"
+                break
+            end
+            p = provenance(p)[1]
+        end
+    end
+    return str
+end
+
+function _show_syntax_tree(io, ex, indent, show_kinds)
+    val = get(ex, :value, nothing)
+    nodestr = !is_leaf(ex) ? "[$(untokenize(head(ex)))]" : _value_string(ex)
+
+    treestr = rpad(string(indent, nodestr), 40)
+    if show_kinds && is_leaf(ex)
+        treestr = treestr*" :: "*string(kind(ex))
+    end
+
+    std_attrs = Set([:name_val,:value,:kind,:syntax_flags,:source,:var_id])
+    attrstr = join([attrsummary(n, getproperty(ex, n))
+                    for n in attrnames(ex) if n ∉ std_attrs], ",")
+    treestr = string(rpad(treestr, 60), " │ $attrstr")
+
+    println(io, treestr)
+    if !is_leaf(ex)
+        new_indent = indent*"  "
+        for n in children(ex)
+            _show_syntax_tree(io, n, new_indent, show_kinds)
+        end
+    end
+end
+
+function Base.show(io::IO, ::MIME"text/plain", ex::SyntaxTree, show_kinds=true)
+    anames = join(string.(attrnames(syntax_graph(ex))), ",")
+    println(io, "SyntaxTree with attributes $anames")
+    _show_syntax_tree(io, ex, "", show_kinds)
+end
+
+function _show_syntax_tree_sexpr(io, ex)
+    if is_leaf(ex)
+        if is_error(ex)
+            print(io, "(", untokenize(head(ex)), ")")
+        else
+            print(io, _value_string(ex))
+        end
+    else
+        print(io, "(", untokenize(head(ex)))
+        first = true
+        for n in children(ex)
+            print(io, ' ')
+            _show_syntax_tree_sexpr(io, n)
+            first = false
+        end
+        print(io, ')')
+    end
+end
+
+function Base.show(io::IO, ::MIME"text/x.sexpression", node::SyntaxTree)
+    _show_syntax_tree_sexpr(io, node)
+end
+
+function Base.show(io::IO, node::SyntaxTree)
+    _show_syntax_tree_sexpr(io, node)
+end
+
+function reparent(ctx, ex::SyntaxTree)
+    # Ensure `ex` has the same parent graph, in a somewhat loose sense.
+    # Could relax by copying if necessary?
+    # In that case, would we copy all the attributes? That would have slightly
+    # different semantics.
+    graph = syntax_graph(ctx)
+    @assert graph.edge_ranges === ex._graph.edge_ranges
+    SyntaxTree(graph, ex._id)
+end
+
+function ensure_attributes(ex::SyntaxTree; kws...)
+    reparent(ensure_attributes(syntax_graph(ex); kws...), ex)
+end
+
+syntax_graph(ex::SyntaxTree) = ex._graph
+
+function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStream; kws...)
+    SyntaxTree(JuliaSyntax.build_tree(SyntaxNode, stream; kws...))
+end
+
+JuliaSyntax.sourcefile(ex::SyntaxTree) = sourcefile(sourceref(ex))
+JuliaSyntax.byte_range(ex::SyntaxTree) = byte_range(sourceref(ex))
+
+function JuliaSyntax._expr_leaf_val(ex::SyntaxTree, _...)
+    name = get(ex, :name_val, nothing)
+    if !isnothing(name)
+        n = Symbol(name)
+        if kind(ex) === K"Symbol"
+            return QuoteNode(n)
+        elseif hasattr(ex, :scope_layer)
+            Expr(:scope_layer, n, ex.scope_layer)
+        else
+            n
+        end
+    else
+        val = get(ex, :value, nothing)
+        if kind(ex) == K"Value" && val isa Expr || val isa LineNumberNode
+            # Expr AST embedded in a SyntaxTree should be quoted rather than
+            # becoming part of the output AST.
+            QuoteNode(val)
+        else
+            val
+        end
+    end
+end
+
+Base.Expr(ex::SyntaxTree) = JuliaSyntax.to_expr(ex)
+
+#--------------------------------------------------
+function _find_SyntaxTree_macro(ex, line)
+    @assert !is_leaf(ex)
+    for c in children(ex)
+        rng = byte_range(c)
+        firstline = JuliaSyntax.source_line(sourcefile(c), first(rng))
+        lastline = JuliaSyntax.source_line(sourcefile(c), last(rng))
+        if line < firstline || lastline < line
+            continue
+        end
+        # We're in the line range. Either
+        if firstline == line && kind(c) == K"macrocall" && begin
+                    name = c[1]
+                    if kind(name) == K"macro_name"
+                        name = name[1]
+                    end
+                    if kind(name) == K"."
+                        name = name[2]
+                        if kind(name) == K"macro_name"
+                            name = name[1]
+                        end
+                    end
+                    @assert kind(name) == K"Identifier"
+                    name.name_val == "SyntaxTree"
+                end
+            # We find the node we're looking for. NB: Currently assuming a max
+            # of one @SyntaxTree invocation per line. Though we could relax
+            # this with more heuristic matching of the Expr-AST...
+            @assert numchildren(c) == 2
+            return c[2]
+        elseif !is_leaf(c)
+            # Recurse
+            ex1 = _find_SyntaxTree_macro(c, line)
+            if !isnothing(ex1)
+                return ex1
+            end
+        end
+    end
+    return nothing # Will get here if multiple children are on the same line.
+end
+
+# Translate JuliaLowering hygiene to esc() for use in @SyntaxTree
+function _scope_layer_1_to_esc!(ex)
+    if ex isa Expr
+        if ex.head == :scope_layer
+            @assert ex.args[2] === 1
+            return esc(_scope_layer_1_to_esc!(ex.args[1]))
+        else
+            map!(_scope_layer_1_to_esc!, ex.args, ex.args)
+            return ex
+        end
+    else
+        return ex
+    end
+end
+
+"""
+Macro to construct quoted SyntaxTree literals (instead of quoted Expr literals)
+in normal Julia source code.
+
+Example:
+
+```julia
+tree1 = @SyntaxTree :(some_unique_identifier)
+tree2 = @SyntaxTree quote
+    x = 1
+    \$tree1 = x
+end
+```
+"""
+macro SyntaxTree(ex_old)
+    # The implementation here is hilarious and arguably very janky: we
+    # 1. Briefly check but throw away the Expr-AST
+    if !(Meta.isexpr(ex_old, :quote) || ex_old isa QuoteNode)
+        throw(ArgumentError("@SyntaxTree expects a `quote` block or `:`-quoted expression"))
+    end
+    # 2. Re-parse the current source file as SyntaxTree instead
+    fname = isnothing(__source__.file) ? error("No current file") : String(__source__.file)
+    if occursin(r"REPL\[\d+\]", fname)
+        # Assume we should look at last history entry in REPL
+        try
+            # Wow digging in like this is an awful hack but `@SyntaxTree` is
+            # already a hack so let's go for it I guess 😆
+            text = Base.active_repl.mistate.interface.modes[1].hist.history[end]
+            if !occursin("@SyntaxTree", text)
+                error("Text not found in last REPL history line")
+            end
+        catch
+            error("Text not found in REPL history")
+        end
+    else
+        text = read(fname, String)
+    end
+    full_ex = parseall(SyntaxTree, text)
+    # 3. Using the current file and line number, dig into the re-parsed tree and
+    # discover the piece of AST which should be returned.
+    ex = _find_SyntaxTree_macro(full_ex, __source__.line)
+    isnothing(ex) && error("_find_SyntaxTree_macro failed")
+    # 4. Do the first step of JuliaLowering's syntax lowering to get
+    # syntax interpolations to work
+    _, ex1 = expand_forms_1(__module__, ex, false, Base.tls_world_age())
+    @assert kind(ex1) == K"call" && ex1[1].value == interpolate_ast
+    Expr(:call, :interpolate_ast, SyntaxTree, ex1[3][1],
+         map(e->_scope_layer_1_to_esc!(Expr(e)), ex1[4:end])...)
+end
+
+#-------------------------------------------------------------------------------
+# Lightweight vector of nodes ids with associated pointer to graph stored separately.
+mutable struct SyntaxList{GraphType, NodeIdVecType} <: AbstractVector{SyntaxTree}
+    graph::GraphType
+    ids::NodeIdVecType
+end
+
+function SyntaxList(graph::SyntaxGraph, ids::AbstractVector{NodeId})
+    SyntaxList{typeof(graph), typeof(ids)}(graph, ids)
+end
+
+SyntaxList(graph::SyntaxGraph) = SyntaxList(graph, Vector{NodeId}())
+SyntaxList(ctx) = SyntaxList(syntax_graph(ctx))
+
+syntax_graph(lst::SyntaxList) = lst.graph
+
+Base.size(v::SyntaxList) = size(v.ids)
+
+Base.IndexStyle(::Type{<:SyntaxList}) = IndexLinear()
+
+Base.getindex(v::SyntaxList, i::Int) = SyntaxTree(v.graph, v.ids[i])
+
+function Base.getindex(v::SyntaxList, r::UnitRange)
+    SyntaxList(v.graph, view(v.ids, r))
+end
+
+function Base.setindex!(v::SyntaxList, ex::SyntaxTree, i::Int)
+    check_compatible_graph(v, ex)
+    v.ids[i] = ex._id
+end
+
+function Base.setindex!(v::SyntaxList, id::NodeId, i::Int)
+    v.ids[i] = id
+end
+
+function Base.push!(v::SyntaxList, ex::SyntaxTree)
+    check_compatible_graph(v, ex)
+    push!(v.ids, ex._id)
+end
+
+function Base.pushfirst!(v::SyntaxList, ex::SyntaxTree)
+    check_compatible_graph(v, ex)
+    pushfirst!(v.ids, ex._id)
+end
+
+function Base.similar(v::SyntaxList, size::Tuple=Base.size(v.ids))
+    SyntaxList(v.graph, zeros(NodeId, size))
+end
+
+function Base.isassigned(v::SyntaxList, i::Integer)
+    v.ids[i] > 0
+end
+
+function Base.append!(v::SyntaxList, exs)
+    for e in exs
+        push!(v, e)
+    end
+    v
+end
+
+function Base.append!(v::SyntaxList, exs::SyntaxList)
+    check_compatible_graph(v, exs)
+    append!(v.ids, exs.ids)
+    v
+end
+
+function Base.push!(v::SyntaxList, id::NodeId)
+    push!(v.ids, id)
+end
+
+function Base.pop!(v::SyntaxList)
+    SyntaxTree(v.graph, pop!(v.ids))
+end
+
+function Base.resize!(v::SyntaxList, n)
+    resize!(v.ids, n)
+    v
+end
+
+function Base.empty!(v::SyntaxList)
+    empty!(v.ids)
+    v
+end
+
+function Base.deleteat!(v::SyntaxList, inds)
+    deleteat!(v.ids, inds)
+    v
+end
+
+function Base.copy(v::SyntaxList)
+    SyntaxList(v.graph, copy(v.ids))
+end
+
+function Base.filter(f, exs::SyntaxList)
+    out = SyntaxList(syntax_graph(exs))
+    for ex in exs
+        if f(ex)
+            push!(out, ex)
+        end
+    end
+    out
+end
+
+# Would like the following to be an overload of Base.map() ... but need
+# somewhat arcane trickery to ensure that this only tries to collect into a
+# SyntaxList when `f` yields a SyntaxTree.
+#
+# function mapsyntax(f, exs::SyntaxList)
+#     out = SyntaxList(syntax_graph(exs))
+#     for ex in exs
+#         push!(out, f(ex))
+#     end
+#     out
+# end
diff --git a/JuliaLowering/src/syntax_macros.jl b/JuliaLowering/src/syntax_macros.jl
new file mode 100644
index 0000000000000..cd6599a3fbbb8
--- /dev/null
+++ b/JuliaLowering/src/syntax_macros.jl
@@ -0,0 +1,387 @@
+# The following are versions of macros from Base which act as "standard syntax
+# extensions":
+#
+# * They emit syntactic forms with special `Kind`s and semantics known to
+#   lowering
+# * There is no other Julia surface syntax for these `Kind`s.
+
+# In order to implement these here without getting into bootstrapping problems,
+# we just write them as plain old macro-named functions and add the required
+# __context__ argument ourselves.
+#
+# TODO: @inline, @noinline, @inbounds, @simd, @ccall, @assume_effects
+#
+# TODO: Eventually move these to proper `macro` definitions and use
+# `JuliaLowering.include()` or something. Then we'll be in the fun little world
+# of bootstrapping but it shouldn't be too painful :)
+
+function _apply_nospecialize(ctx, ex)
+    k = kind(ex)
+    if k == K"Identifier" || k == K"Placeholder" || k == K"tuple"
+        setmeta(ex; nospecialize=true)
+    elseif k == K"..." || k == K"::" || k == K"="
+        if k == K"::" && numchildren(ex) == 1
+            ex = @ast ctx ex [K"::" "_"::K"Placeholder" ex[1]]
+        end
+        mapchildren(c->_apply_nospecialize(ctx, c), ctx, ex, 1:1)
+    else
+        throw(LoweringError(ex, "Invalid function argument"))
+    end
+end
+
+function Base.var"@nospecialize"(__context__::MacroContext, ex, exs...)
+    # TODO support multi-arg version properly
+    _apply_nospecialize(__context__, ex)
+end
+
+# TODO: support all forms that the original supports
+# function Base.var"@atomic"(__context__::MacroContext, ex)
+#     @chk kind(ex) == K"Identifier" || kind(ex) == K"::" (ex, "Expected identifier or declaration")
+#     @ast __context__ __context__.macrocall [K"atomic" ex]
+# end
+
+function Base.var"@label"(__context__::MacroContext, ex)
+    @chk kind(ex) == K"Identifier"
+    @ast __context__ ex ex=>K"symbolic_label"
+end
+
+function Base.var"@goto"(__context__::MacroContext, ex)
+    @chk kind(ex) == K"Identifier"
+    @ast __context__ ex ex=>K"symbolic_goto"
+end
+
+function Base.var"@locals"(__context__::MacroContext)
+    @ast __context__ __context__.macrocall [K"extension" "locals"::K"Symbol"]
+end
+
+function Base.var"@isdefined"(__context__::MacroContext, ex)
+    @ast __context__ __context__.macrocall [K"isdefined" ex]
+end
+
+function Base.var"@generated"(__context__::MacroContext)
+    @ast __context__ __context__.macrocall [K"generated"]
+end
+function Base.var"@generated"(__context__::MacroContext, ex)
+    if kind(ex) != K"function"
+        throw(LoweringError(ex, "Expected a function argument to `@generated`"))
+    end
+    @ast __context__ __context__.macrocall [K"function"
+        ex[1]
+        [K"if" [K"generated"]
+            ex[2]
+            [K"block"
+                [K"meta" "generated_only"::K"Symbol"]
+                [K"return"]
+            ]
+        ]
+    ]
+end
+
+function Base.var"@cfunction"(__context__::MacroContext, callable, return_type, arg_types)
+    if kind(arg_types) != K"tuple"
+        throw(MacroExpansionError(arg_types, "@cfunction argument types must be a literal tuple"))
+    end
+    arg_types_svec = @ast __context__ arg_types [K"call"
+        "svec"::K"core"
+        children(arg_types)...
+    ]
+    if kind(callable) == K"$"
+        fptr = callable[1]
+        typ = Base.CFunction
+    else
+        # Kinda weird semantics here - without `$`, the callable is a top level
+        # expression which will be evaluated by `jl_resolve_globals_in_ir`,
+        # implicitly within the module where the `@cfunction` is expanded into.
+        fptr = @ast __context__ callable [K"static_eval"(
+                meta=name_hint("cfunction function name"))
+            callable
+        ]
+        typ = Ptr{Cvoid}
+    end
+    @ast __context__ __context__.macrocall [K"cfunction"
+        typ::K"Value"
+        fptr
+        [K"static_eval"(meta=name_hint("cfunction return type"))
+            return_type
+        ]
+        [K"static_eval"(meta=name_hint("cfunction argument type"))
+            arg_types_svec
+        ]
+        "ccall"::K"Symbol"
+    ]
+end
+
+function ccall_macro_parse(ctx, ex, opts)
+    gc_safe=false
+    for opt in opts
+        if kind(opt) != K"=" || numchildren(opt) != 2 ||
+                kind(opt[1]) != K"Identifier"
+            throw(MacroExpansionError(opt, "Bad option to ccall"))
+        else
+            optname = opt[1].name_val
+            if optname == "gc_safe"
+                if kind(opt[2]) == K"Bool"
+                    gc_safe = opt[2].value::Bool
+                else
+                    throw(MacroExpansionError(opt[2], "gc_safe must be true or false"))
+                end
+            else
+                throw(MacroExpansionError(opt[1], "Unknown option name for ccall"))
+            end
+        end
+    end
+
+    if kind(ex) != K"::"
+        throw(MacroExpansionError(ex, "Expected a return type annotation `::SomeType`", position=:end))
+    end
+
+    rettype = ex[2]
+    call = ex[1]
+    if kind(call) != K"call"
+        throw(MacroExpansionError(call, "Expected function call syntax `f()`"))
+    end
+
+    func = call[1]
+    varargs = numchildren(call) > 1 && kind(call[end]) == K"parameters" ?
+        children(call[end]) : nothing
+
+    # collect args and types
+    args = SyntaxList(ctx)
+    types = SyntaxList(ctx)
+    function pusharg!(arg)
+        if kind(arg) != K"::"
+            throw(MacroExpansionError(arg, "argument needs a type annotation"))
+        end
+        push!(args, arg[1])
+        push!(types, arg[2])
+    end
+
+    for e in call[2:(isnothing(varargs) ? end : end-1)]
+        kind(e) != K"parameters" || throw(MacroExpansionError(call[end], "Multiple parameter blocks not allowed"))
+        pusharg!(e)
+    end
+
+    if !isnothing(varargs)
+        num_required_args = length(args)
+        if num_required_args == 0
+            throw(MacroExpansionError(call[end], "C ABI prohibits varargs without one required argument"))
+        end
+        for e in varargs
+            pusharg!(e)
+        end
+    else
+        num_required_args = 0 # Non-vararg call
+    end
+
+    return func, rettype, types, args, gc_safe, num_required_args
+end
+
+function ccall_macro_lower(ctx, ex, convention, func, rettype, types, args, gc_safe, num_required_args)
+    statements = SyntaxTree[]
+    kf = kind(func)
+    if kf == K"Identifier"
+        lowered_func = @ast ctx func func=>K"Symbol"
+    elseif kf == K"."
+        lowered_func = @ast ctx func [K"tuple"
+            func[2]=>K"Symbol"
+            [K"static_eval"(meta=name_hint("@ccall library name"))
+                func[1]
+            ]
+        ]
+    elseif kf == K"$"
+        fid = @ast ctx func[1] "func"::K"Identifier"
+        check = @ast ctx func [K"block"
+            [K"=" fid func[1]]
+            [K"if"
+                [K"call" (!isa)::K"Value" fid [K"curly" Ptr::K"Value" Cvoid::K"Value"]]
+                [K"block"
+                    [K"=" "name"::K"Identifier" [K"quote" func[1]]]
+                    [K"call" throw::K"Value"
+                        [K"call" ArgumentError::K"Value"
+                            [K"string"
+                                "interpolated function `"::K"String"
+                                "name"::K"Identifier"
+                                "` was not a `Ptr{Cvoid}`, but "::K"String"
+                                [K"call" typeof::K"Value" fid]]]]]]]
+        push!(statements, check)
+        lowered_func = check[1][1]
+    else
+        throw(MacroExpansionError(func,
+            "Function name must be a symbol like `foo`, a library and function name like `libc.printf` or an interpolated function pointer like `\$ptr`"))
+    end
+
+    roots = SyntaxTree[]
+    cargs = SyntaxTree[]
+    for (i, (type, arg)) in enumerate(zip(types, args))
+        argi = @ast ctx arg "arg$i"::K"Identifier"
+        # TODO: Does it help to emit ssavar() here for the `argi`?
+        push!(statements,
+              @ast ctx arg [K"local"
+                  [K"=" argi [K"call" Base.cconvert::K"Value" type arg]]])
+        push!(roots, argi)
+        push!(cargs, @ast ctx ex [K"call" Base.unsafe_convert::K"Value" type argi])
+    end
+    effect_flags = UInt16(0)
+    push!(statements, @ast ctx ex [K"foreigncall"
+        lowered_func
+        [K"static_eval"(meta=name_hint("@ccall return type"))
+            rettype
+        ]
+        [K"static_eval"(meta=name_hint("@ccall argument type"))
+            [K"call"
+                "svec"::K"core"
+                types...
+            ]
+        ]
+        num_required_args::K"Integer"
+        QuoteNode((convention, effect_flags, gc_safe))::K"Value"
+        cargs...
+        roots...
+    ])
+
+    @ast ctx ex [K"block"
+        statements...
+    ]
+end
+
+function Base.var"@ccall"(ctx::MacroContext, ex, opts...)
+    ccall_macro_lower(ctx, ex, :ccall, ccall_macro_parse(ctx, ex, opts)...)
+end
+
+function Base.GC.var"@preserve"(__context__::MacroContext, exs...)
+    idents = exs[1:end-1]
+    for e in idents
+        if kind(e) != K"Identifier"
+            throw(MacroExpansionError(e, "Preserved variable must be a symbol"))
+        end
+    end
+    @ast __context__ __context__.macrocall [K"block"
+        [K"="
+            "s"::K"Identifier"
+            [K"gc_preserve_begin"
+                idents...
+            ]
+        ]
+        [K"="
+            "r"::K"Identifier"
+            exs[end]
+        ]
+        [K"gc_preserve_end" "s"::K"Identifier"]
+        "r"::K"Identifier"
+    ]
+end
+
+function Base.Experimental.var"@opaque"(__context__::MacroContext, ex)
+    @chk kind(ex) == K"->"
+    @ast __context__ __context__.macrocall [K"opaque_closure"
+        "nothing"::K"core"
+        "nothing"::K"core"
+        "nothing"::K"core"
+        true::K"Bool"
+        ex
+    ]
+end
+
+function _at_eval_code(ctx, srcref, mod, ex)
+    @ast ctx srcref [K"block"
+        [K"local"
+            [K"="
+                "eval_result"::K"Identifier"
+                [K"call"
+                    # TODO: Call "eval"::K"core" here
+                    JuliaLowering.eval::K"Value"
+                    mod
+                    [K"quote" ex]
+                    [K"parameters"
+                        [K"="
+                            "expr_compat_mode"::K"Identifier"
+                            ctx.expr_compat_mode::K"Bool"
+                        ]
+                    ]
+                ]
+            ]
+        ]
+        (::K"latestworld_if_toplevel")
+        "eval_result"::K"Identifier"
+    ]
+end
+
+function Base.var"@eval"(__context__::MacroContext, ex)
+    mod = @ast __context__ __context__.macrocall __context__.scope_layer.mod::K"Value"
+    _at_eval_code(__context__, __context__.macrocall, mod, ex)
+end
+
+function Base.var"@eval"(__context__::MacroContext, mod, ex)
+    _at_eval_code(__context__, __context__.macrocall, mod, ex)
+end
+
+#--------------------------------------------------------------------------------
+# The following `@islocal` and `@inert` are macros for special syntax known to
+# lowering which don't exist in Base but arguably should.
+#
+# For now we have our own versions
+function var"@islocal"(__context__::MacroContext, ex)
+    @chk kind(ex) == K"Identifier"
+    @ast __context__ __context__.macrocall [K"extension"
+        "islocal"::K"Symbol"
+        ex
+    ]
+end
+
+"""
+A non-interpolating quoted expression.
+
+For example,
+
+```julia
+@inert quote
+    \$x
+end
+```
+
+does not take `x` from the surrounding scope - instead it leaves the
+interpolation `\$x` intact as part of the expression tree.
+
+TODO: What is the correct way for `@inert` to work? ie which of the following
+should work?
+
+```julia
+@inert quote
+   body
+end
+
+@inert begin
+   body
+end
+
+@inert x
+
+@inert \$x
+```
+
+The especially tricky cases involve nested interpolation ...
+```julia
+quote
+    @inert \$x
+end
+
+@inert quote
+    quote
+        \$x
+    end
+end
+
+@inert quote
+    quote
+        \$\$x
+    end
+end
+```
+
+etc. Needs careful thought - we should probably just copy what lisp does with
+quote+quasiquote 😅
+"""
+function var"@inert"(__context__::MacroContext, ex)
+    @chk kind(ex) == K"quote"
+    @ast __context__ __context__.macrocall [K"inert" ex]
+end
diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl
new file mode 100644
index 0000000000000..2803da01e30dc
--- /dev/null
+++ b/JuliaLowering/src/utils.jl
@@ -0,0 +1,186 @@
+# Error handling
+
+TODO(msg::AbstractString) = throw(ErrorException("Lowering TODO: $msg"))
+TODO(ex::SyntaxTree, msg="") = throw(LoweringError(ex, "Lowering TODO: $msg"))
+
+# Errors found during lowering will result in LoweringError being thrown to
+# indicate the syntax causing the error.
+struct LoweringError <: Exception
+    ex::SyntaxTree
+    msg::String
+end
+
+function Base.showerror(io::IO, exc::LoweringError; show_detail=true)
+    print(io, "LoweringError:\n")
+    src = sourceref(exc.ex)
+    highlight(io, src; note=exc.msg)
+
+    if show_detail
+        print(io, "\n\nDetailed provenance:\n")
+        showprov(io, exc.ex, tree=true)
+    end
+end
+
+#-------------------------------------------------------------------------------
+function _show_provtree(io::IO, ex::SyntaxTree, indent)
+    print(io, ex, "\n")
+    prov = provenance(ex)
+    for (i, e) in enumerate(prov)
+        islast = i == length(prov)
+        printstyled(io, "$indent$(islast ? "└─ " : "├─ ")", color=:light_black)
+        inner_indent = indent * (islast ? "   " : "│  ")
+        _show_provtree(io, e, inner_indent)
+    end
+end
+
+function _show_provtree(io::IO, prov, indent)
+    fn = filename(prov)
+    line, _ = source_location(prov)
+    printstyled(io, "@ $fn:$line\n", color=:light_black)
+end
+
+function showprov(io::IO, exs::AbstractVector;
+                  note=nothing, include_location::Bool=true, highlight_kwargs...)
+    for (i,ex) in enumerate(Iterators.reverse(exs))
+        sr = sourceref(ex)
+        if i > 1
+            print(io, "\n\n")
+        end
+        k = kind(ex)
+        ex_note = !isnothing(note) ? note :
+            i > 1 && k == K"macrocall"  ? "in macro expansion" :
+            i > 1 && k == K"$"          ? "interpolated here"  :
+            "in source"
+        highlight(io, sr; note=ex_note, highlight_kwargs...)
+
+        if include_location
+            line, _ = source_location(sr)
+            locstr = "$(filename(sr)):$line"
+            JuliaSyntax._printstyled(io, "\n# @ $locstr", fgcolor=:light_black)
+        end
+    end
+end
+
+function showprov(io::IO, ex::SyntaxTree; tree::Bool=false, showprov_kwargs...)
+    if tree
+        _show_provtree(io, ex, "")
+    else
+        showprov(io, flattened_provenance(ex); showprov_kwargs...)
+    end
+end
+
+function showprov(x; kws...)
+    showprov(stdout, x; kws...)
+end
+
+function subscript_str(i)
+     replace(string(i),
+             "0"=>"₀", "1"=>"₁", "2"=>"₂", "3"=>"₃", "4"=>"₄",
+             "5"=>"₅", "6"=>"₆", "7"=>"₇", "8"=>"₈", "9"=>"₉")
+end
+
+function _deref_ssa(stmts, ex)
+    while kind(ex) == K"SSAValue"
+        ex = stmts[ex.var_id]
+    end
+    ex
+end
+
+function _find_method_lambda(ex, name)
+    @assert kind(ex) == K"code_info"
+    # Heuristic search through outer thunk for the method in question.
+    method_found = false
+    stmts = children(ex[1])
+    for e in stmts
+        if kind(e) == K"method" && numchildren(e) >= 2
+            sig = _deref_ssa(stmts, e[2])
+            @assert kind(sig) == K"call"
+            arg_types = _deref_ssa(stmts, sig[2])
+            @assert kind(arg_types) == K"call"
+            self_type = _deref_ssa(stmts, arg_types[2])
+            if kind(self_type) == K"globalref" && occursin(name, self_type.name_val)
+                return e[3]
+            end
+        end
+    end
+end
+
+function print_ir(io::IO, ex, method_filter=nothing)
+    @assert kind(ex) == K"code_info"
+    if !isnothing(method_filter)
+        filtered = _find_method_lambda(ex, method_filter)
+        if isnothing(filtered)
+            @warn "Method not found with method filter $method_filter"
+        else
+            ex = filtered
+        end
+    end
+    _print_ir(io, ex, "")
+end
+
+# TODO: JuliaLowering-the-module should always print the same way, ignoring parent modules
+function _print_ir(io::IO, ex, indent)
+    added_indent = "    "
+    @assert (kind(ex) == K"lambda" || kind(ex) == K"code_info") && kind(ex[1]) == K"block"
+    if !ex.is_toplevel_thunk && kind(ex) == K"code_info"
+        slots = ex.slots
+        print(io, indent, "slots: [")
+        for (i,slot) in enumerate(slots)
+            print(io, "slot$(subscript_str(i))/$(slot.name)")
+            flags = String[]
+            slot.is_nospecialize   && push!(flags, "nospecialize")
+            !slot.is_read          && push!(flags, "!read")
+            slot.is_single_assign  && push!(flags, "single_assign")
+            slot.is_maybe_undef    && push!(flags, "maybe_undef")
+            slot.is_called         && push!(flags, "called")
+            if !isempty(flags)
+                print(io, "($(join(flags, ",")))")
+            end
+            if i < length(slots)
+                print(io, " ")
+            end
+        end
+        println(io, "]")
+    end
+    stmts = children(ex[1])
+    for (i, e) in enumerate(stmts)
+        lno = rpad(i, 3)
+        if kind(e) == K"method" && numchildren(e) == 3
+            print(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2]))
+            if kind(e[3]) == K"lambda" || kind(e[3]) == K"code_info"
+                println(io)
+                _print_ir(io, e[3], indent*added_indent)
+            else
+                println(io, " ", string(e[3]))
+            end
+        elseif kind(e) == K"opaque_closure_method"
+            @assert numchildren(e) == 5
+            print(io, indent, lno, " --- opaque_closure_method ")
+            for i=1:4
+                print(io, " ", e[i])
+            end
+            println(io)
+            _print_ir(io, e[5], indent*added_indent)
+        elseif kind(e) == K"code_info"
+            println(io, indent, lno, " --- ", e.is_toplevel_thunk ? "thunk" : "code_info")
+            _print_ir(io, e, indent*added_indent)
+        else
+            code = string(e)
+            println(io, indent, lno, " ", code)
+        end
+    end
+end
+
+# Wrap a function body in Base.Compiler.@zone for profiling
+if isdefined(Base.Compiler, Symbol("@zone"))
+    macro fzone(str, f)
+        @assert f isa Expr && f.head === :function && length(f.args) === 2 && str isa String
+        esc(Expr(:function, f.args[1],
+                 # Use source of our caller, not of this macro.
+                 Expr(:macrocall, :(Base.Compiler.var"@zone"), __source__, str, f.args[2])))
+    end
+else
+    macro fzone(str, f)
+        esc(f)
+    end
+end
diff --git a/JuliaLowering/test/arrays.jl b/JuliaLowering/test/arrays.jl
new file mode 100644
index 0000000000000..bc3e43af0b089
--- /dev/null
+++ b/JuliaLowering/test/arrays.jl
@@ -0,0 +1,148 @@
+using Test, JuliaLowering
+
+@testset "Array syntax" begin
+
+test_mod = Module()
+
+# Test that two array element types are equal and that they are also equal
+# elementwise
+function ≅(a, b)
+    eltype(a) == eltype(b) && a == b
+end
+
+# vect
+@test JuliaLowering.include_string(test_mod, """
+[1,2,3]
+""") ≅ [1,2,3]
+
+# hcat
+@test JuliaLowering.include_string(test_mod, """
+[1 2 3]
+""") ≅ [1 2 3]
+
+# typed_hcat
+@test JuliaLowering.include_string(test_mod, """
+Int[1.0 2.0 3.0]
+""") ≅ [1 2 3]
+
+# splat with vect/hcat/typed_hcat
+@test JuliaLowering.include_string(test_mod, """
+let xs = [1,2,3]
+    [0, xs...]
+end
+""") ≅ [0,1,2,3]
+@test JuliaLowering.include_string(test_mod, """
+let xs = [1,2,3]
+    [0 xs...]
+end
+""") ≅ [0 1 2 3]
+@test JuliaLowering.include_string(test_mod, """
+let xs = [1,2,3]
+    Int[0 xs...]
+end
+""") ≅ Int[0 1 2 3]
+
+# vcat
+@test JuliaLowering.include_string(test_mod, """
+[1;2;3]
+""") ≅ [1; 2; 3]
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    xs = (1,2)
+    [xs...; xs...]
+end
+""") ≅ [1,2,1,2]
+
+# hvcat
+@test JuliaLowering.include_string(test_mod, """
+[1 2 3; 4 5 6]
+""") ≅ [1 2 3;
+        4 5 6]
+
+# hvcat_rows
+@test JuliaLowering.include_string(test_mod, """
+let
+    xs = (1,2)
+    [xs... 3; 4 xs...]
+end
+""") ≅ [1 2 3;
+        4 1 2]
+
+# typed_vcat
+@test JuliaLowering.include_string(test_mod, """
+Int[1.0; 2.0; 3.0]
+""") ≅ [1; 2; 3]
+
+# typed_hvcat
+@test JuliaLowering.include_string(test_mod, """
+Int[1.0 2.0 3.0; 4.0 5.0 6.0]
+""") ≅ [1 2 3;
+        4 5 6]
+
+# typed_hvcat_rows
+@test JuliaLowering.include_string(test_mod, """
+let
+    xs = (1.0,2.0)
+    Int[xs... 3; 4 xs...]
+end
+""") ≅ [1 2 3;
+        4 1 2]
+
+# ncat with a single dimension
+@test JuliaLowering.include_string(test_mod, """
+[1 ;;; 2 ;;; 3]
+""") ≅ [1 ;;; 2 ;;; 3]
+
+@test JuliaLowering.include_string(test_mod, """
+Int[1.0 ;;; 2.0 ;;; 3.0]
+""") ≅ [1 ;;; 2 ;;; 3]
+
+# Lowering of ref to setindex
+@test JuliaLowering.include_string(test_mod, """
+let
+    as = [0,0,0,0]
+    as[begin] = 1
+    as[2] = 2
+    as[end] = 4
+    as
+end
+""") == [1, 2, 0, 4]
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    as = zeros(Int, 2,3)
+    as[begin, end] = 1
+    as[end, begin] = 2
+    js = (2,)
+    as[js..., end] = 3
+    as
+end
+""") == [0 0 1;
+         2 0 3]
+
+# getindex
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = [1 2;
+         3 4]
+    (x[end,begin], x[begin,end])
+end
+""") == (3, 2)
+
+# getindex with splats
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = [1 2;
+         3 4
+         ;;;
+         5 6;
+         7 8]
+    inds = (2,1)
+    ind1 = (1,)
+    (x[inds..., begin], x[inds..., end], x[1, inds...],
+     x[ind1..., ind1..., end])
+end
+""") == (3, 7, 2, 5)
+
+end # @testset "Array syntax" begin
diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl
new file mode 100644
index 0000000000000..4595603e4b79d
--- /dev/null
+++ b/JuliaLowering/test/arrays_ir.jl
@@ -0,0 +1,498 @@
+########################################
+# vect syntax
+[10, 20, 30]
+#---------------------
+1   (call top.vect 10 20 30)
+2   (return %₁)
+
+########################################
+# vect with splat
+[x, xs...]
+#---------------------
+1   TestMod.x
+2   (call core.tuple %₁)
+3   TestMod.xs
+4   (call core._apply_iterate top.iterate top.vect %₂ %₃)
+5   (return %₄)
+
+########################################
+# vect with splats
+[x, xs..., y, ys...]
+#---------------------
+1   TestMod.x
+2   (call core.tuple %₁)
+3   TestMod.xs
+4   TestMod.y
+5   (call core.tuple %₄)
+6   TestMod.ys
+7   (call core._apply_iterate top.iterate top.vect %₂ %₃ %₅ %₆)
+8   (return %₇)
+
+########################################
+# Error: vect syntax with parameters
+[10, 20; 30]
+#---------------------
+LoweringError:
+[10, 20; 30]
+#      └──┘ ── unexpected semicolon in array expression
+
+########################################
+# Error: vect syntax with embedded assignments
+[a=20, 30]
+#---------------------
+LoweringError:
+[a=20, 30]
+#└──┘ ── misplaced assignment statement in `[ ... ]`
+
+########################################
+# hcat syntax
+[10 20 30]
+#---------------------
+1   (call top.hcat 10 20 30)
+2   (return %₁)
+
+########################################
+# hcat with splat
+[x xs...]
+#---------------------
+1   TestMod.x
+2   (call core.tuple %₁)
+3   TestMod.xs
+4   (call core._apply_iterate top.iterate top.hcat %₂ %₃)
+5   (return %₄)
+
+########################################
+# typed hcat syntax
+T[10 20 30]
+#---------------------
+1   TestMod.T
+2   (call top.typed_hcat %₁ 10 20 30)
+3   (return %₂)
+
+########################################
+# typed hcat syntax with splat
+T[x xs...]
+#---------------------
+1   TestMod.T
+2   TestMod.x
+3   (call core.tuple %₁ %₂)
+4   TestMod.xs
+5   (call core._apply_iterate top.iterate top.typed_hcat %₃ %₄)
+6   (return %₅)
+
+########################################
+# Error: hcat syntax with embedded assignments
+[10 20 a=40]
+#---------------------
+LoweringError:
+[10 20 a=40]
+#     └───┘ ── misplaced assignment statement in `[ ... ]`
+
+########################################
+# vcat syntax
+[10; 20; 30]
+#---------------------
+1   (call top.vcat 10 20 30)
+2   (return %₁)
+
+########################################
+# vcat with splats
+[a...; 20; 30]
+#---------------------
+1   TestMod.a
+2   (call core.tuple 20 30)
+3   (call core._apply_iterate top.iterate top.vcat %₁ %₂)
+4   (return %₃)
+
+########################################
+# hvcat syntax
+[10; 20 30; 40 e f]
+#---------------------
+1   (call core.tuple 1 2 3)
+2   TestMod.e
+3   TestMod.f
+4   (call top.hvcat %₁ 10 20 30 40 %₂ %₃)
+5   (return %₄)
+
+########################################
+# hvcat with splats nested within rows
+[10; 20 a...]
+#---------------------
+1   (call core.tuple 10)
+2   (call core.tuple 20)
+3   TestMod.a
+4   (call core._apply_iterate top.iterate core.tuple %₂ %₃)
+5   (call top.hvcat_rows %₁ %₄)
+6   (return %₅)
+
+########################################
+# Error: vcat syntax with assignments
+[a=20; 30]
+#---------------------
+LoweringError:
+[a=20; 30]
+#└──┘ ── misplaced assignment statement in `[ ... ]`
+
+########################################
+# typed_vcat syntax
+T[10; 20; 30]
+#---------------------
+1   TestMod.T
+2   (call top.typed_vcat %₁ 10 20 30)
+3   (return %₂)
+
+########################################
+# typed_hvcat syntax
+T[10; 20 30; 40 50 60]
+#---------------------
+1   TestMod.T
+2   (call core.tuple 1 2 3)
+3   (call top.typed_hvcat %₁ %₂ 10 20 30 40 50 60)
+4   (return %₃)
+
+########################################
+# typed_hvcat with splats nested within rows
+T[10; 20 a...]
+#---------------------
+1   TestMod.T
+2   (call core.tuple 10)
+3   (call core.tuple 20)
+4   TestMod.a
+5   (call core._apply_iterate top.iterate core.tuple %₃ %₄)
+6   (call top.typed_hvcat_rows %₁ %₂ %₅)
+7   (return %₆)
+
+########################################
+# ncat with a single dimension
+[10 ;;; 20 ;;; 30]
+#---------------------
+1   (call top.hvncat 3 10 20 30)
+2   (return %₁)
+
+########################################
+# typed_ncat with a single dimension
+T[10 ;;; 20 ;;; 30]
+#---------------------
+1   TestMod.T
+2   (call top.typed_hvncat %₁ 3 10 20 30)
+3   (return %₂)
+
+########################################
+# ncat with balanced column major element layout
+[10 ; 20 ; 30 ;;; 40 ; 50 ; 60]
+#---------------------
+1   (call core.tuple 3 1 2)
+2   (call top.hvncat %₁ false 10 20 30 40 50 60)
+3   (return %₂)
+
+########################################
+# typed multidimensional ncat
+T[10 ; 20 ; 30 ;;; 40 ; 50 ; 60]
+#---------------------
+1   TestMod.T
+2   (call core.tuple 3 1 2)
+3   (call top.typed_hvncat %₁ %₂ false 10 20 30 40 50 60)
+4   (return %₃)
+
+########################################
+# ncat with balanced row major element layout
+[10 20 30 ; 40 50 60 ;;;]
+#---------------------
+1   (call core.tuple 2 3 1)
+2   (call top.hvncat %₁ true 10 20 30 40 50 60)
+3   (return %₂)
+
+########################################
+# ncat of 3D array with balanced layout
+[10 ; 20 ;; 30 ; 40 ;;; 50 ; 60 ;; 70 ; 80]
+#---------------------
+1   (call core.tuple 2 2 2)
+2   (call top.hvncat %₁ false 10 20 30 40 50 60 70 80)
+3   (return %₂)
+
+########################################
+# ncat with unbalanced column major layout
+[10 ; 20 ;; 30 ;;; 40 ;;;;]
+#---------------------
+1   (call core.tuple 2 1 1)
+2   (call core.tuple 3 1)
+3   (call core.tuple 4)
+4   (call core.tuple 4)
+5   (call core.tuple %₁ %₂ %₃ %₄)
+6   (call top.hvncat %₅ false 10 20 30 40)
+7   (return %₆)
+
+########################################
+# ncat with unbalanced row major layout
+[10 20 ; 30 40 ; 50 60 ;;; 70 ;;; 80 ;;;;]
+#---------------------
+1   (call core.tuple 2 2 2 1 1)
+2   (call core.tuple 6 1 1)
+3   (call core.tuple 8)
+4   (call core.tuple 8)
+5   (call core.tuple %₁ %₂ %₃ %₄)
+6   (call top.hvncat %₅ true 10 20 30 40 50 60 70 80)
+7   (return %₆)
+
+########################################
+# Splatting with 1D ncat
+[xs ;;; ys... ;;; zs]
+#---------------------
+1   TestMod.xs
+2   (call core.tuple 3 %₁)
+3   TestMod.ys
+4   TestMod.zs
+5   (call core.tuple %₄)
+6   (call core._apply_iterate top.iterate top.hvncat %₂ %₃ %₅)
+7   (return %₆)
+
+########################################
+# Error: splatting with multi-dimensional ncat
+[xs ; ys ;;; zs...]
+#---------------------
+LoweringError:
+[xs ; ys ;;; zs...]
+#            └───┘ ── Splatting ... in an `ncat` with multiple dimensions is not supported
+
+########################################
+# Error: bad nrow nesting
+@ast_ [K"ncat"(syntax_flags=set_numeric_flags(3))
+    [K"nrow"(syntax_flags=set_numeric_flags(1))
+        [K"nrow"(syntax_flags=set_numeric_flags(1))
+            1::K"Integer"
+        ]
+    ]
+]
+#---------------------
+LoweringError:
+#= line 1 =# - Badly nested rows in `ncat`
+
+########################################
+# Error: bad nrow nesting
+@ast_ [K"ncat"(syntax_flags=set_numeric_flags(3))
+    [K"nrow"(syntax_flags=set_numeric_flags(2))
+        [K"row"
+            1::K"Integer"
+        ]
+    ]
+]
+#---------------------
+LoweringError:
+#= line 1 =# - 2D `nrow` cannot be mixed with `row` in `ncat`
+
+########################################
+# Error: bad nrow nesting
+@ast_ [K"ncat"(syntax_flags=set_numeric_flags(3))
+    [K"row"
+        [K"row"
+            1::K"Integer"
+        ]
+    ]
+]
+#---------------------
+LoweringError:
+#= line 1 =# - Badly nested rows in `ncat`
+
+########################################
+# Simple getindex
+a[i]
+#---------------------
+1   TestMod.a
+2   TestMod.i
+3   (call top.getindex %₁ %₂)
+4   (return %₃)
+
+########################################
+# simple 1D getindex with begin
+a[begin]
+#---------------------
+1   TestMod.a
+2   (call top.firstindex %₁)
+3   (call top.getindex %₁ %₂)
+4   (return %₃)
+
+########################################
+# simple 1D getindex with end
+a[end]
+#---------------------
+1   TestMod.a
+2   (call top.lastindex %₁)
+3   (call top.getindex %₁ %₂)
+4   (return %₃)
+
+########################################
+# multidimensional getindex with begin
+a[i, begin]
+#---------------------
+1   TestMod.a
+2   TestMod.i
+3   (call top.firstindex %₁ 2)
+4   (call top.getindex %₁ %₂ %₃)
+5   (return %₄)
+
+########################################
+# multidimensional getindex with end
+a[i, end]
+#---------------------
+1   TestMod.a
+2   TestMod.i
+3   (call top.lastindex %₁ 2)
+4   (call top.getindex %₁ %₂ %₃)
+5   (return %₄)
+
+########################################
+# multidimensional getindex with begin/end and splats
+a[is..., end, js..., begin]
+#---------------------
+1   TestMod.a
+2   TestMod.is
+3   (call top.length %₂)
+4   (call top.+ 1 %₃)
+5   (call top.lastindex %₁ %₄)
+6   TestMod.js
+7   (call top.length %₂)
+8   (call top.length %₆)
+9   (call top.+ 2 %₇ %₈)
+10  (call top.firstindex %₁ %₉)
+11  (call core.tuple %₁)
+12  (call core.tuple %₅)
+13  (call core.tuple %₁₀)
+14  (call core._apply_iterate top.iterate top.getindex %₁₁ %₂ %₁₂ %₆ %₁₃)
+15  (return %₁₄)
+
+########################################
+# getindex with nontrivial array expression and begin/end
+f()[end]
+#---------------------
+1   TestMod.f
+2   (call %₁)
+3   (call top.lastindex %₂)
+4   (call top.getindex %₂ %₃)
+5   (return %₄)
+
+########################################
+# nested refs with getindex and begin/end
+b[a[begin, end], begin, end]
+#---------------------
+1   TestMod.b
+2   TestMod.a
+3   (call top.firstindex %₂ 1)
+4   (call top.lastindex %₂ 2)
+5   (call top.getindex %₂ %₃ %₄)
+6   (call top.firstindex %₁ 2)
+7   (call top.lastindex %₁ 3)
+8   (call top.getindex %₁ %₅ %₆ %₇)
+9   (return %₈)
+
+########################################
+# Error: parameters in array ref
+a[i, j; w=1]
+#---------------------
+LoweringError:
+a[i, j; w=1]
+#     └───┘ ── unexpected semicolon in array expression
+
+########################################
+# simple setindex!
+a[i] = x
+#---------------------
+1   TestMod.x
+2   TestMod.a
+3   TestMod.i
+4   (call top.setindex! %₂ %₁ %₃)
+5   (return %₁)
+
+########################################
+# simple setindex! with begin
+a[begin] = x
+#---------------------
+1   TestMod.a
+2   TestMod.x
+3   (call top.firstindex %₁)
+4   (call top.setindex! %₁ %₂ %₃)
+5   (return %₂)
+
+########################################
+# simple setindex! with end
+a[end] = x
+#---------------------
+1   TestMod.a
+2   TestMod.x
+3   (call top.lastindex %₁)
+4   (call top.setindex! %₁ %₂ %₃)
+5   (return %₂)
+
+########################################
+# multidimensional setindex! with begin
+a[i, begin] = x
+#---------------------
+1   TestMod.a
+2   TestMod.x
+3   TestMod.i
+4   (call top.firstindex %₁ 2)
+5   (call top.setindex! %₁ %₂ %₃ %₄)
+6   (return %₂)
+
+########################################
+# multidimensional setindex! with end
+a[i, end] = x
+#---------------------
+1   TestMod.a
+2   TestMod.x
+3   TestMod.i
+4   (call top.lastindex %₁ 2)
+5   (call top.setindex! %₁ %₂ %₃ %₄)
+6   (return %₂)
+
+########################################
+# multidimensional setindex! with begin/end and splats
+a[is..., end, js..., begin] = x
+#---------------------
+1   TestMod.a
+2   TestMod.is
+3   (call top.length %₂)
+4   (call top.+ 1 %₃)
+5   (call top.lastindex %₁ %₄)
+6   TestMod.js
+7   (call top.length %₂)
+8   (call top.length %₆)
+9   (call top.+ 2 %₇ %₈)
+10  (call top.firstindex %₁ %₉)
+11  TestMod.x
+12  (call core.tuple %₁ %₁₁)
+13  (call core.tuple %₅)
+14  (call core.tuple %₁₀)
+15  (call core._apply_iterate top.iterate top.setindex! %₁₂ %₂ %₁₃ %₆ %₁₄)
+16  (return %₁₁)
+
+########################################
+# setindex! with nontrivial array expression and begin/end
+f()[end] = x
+#---------------------
+1   TestMod.f
+2   (call %₁)
+3   TestMod.x
+4   (call top.lastindex %₂)
+5   (call top.setindex! %₂ %₃ %₄)
+6   (return %₃)
+
+########################################
+# nested refs
+b[a[begin]] = x
+#---------------------
+1   TestMod.b
+2   TestMod.x
+3   TestMod.a
+4   (call top.firstindex %₃)
+5   (call top.getindex %₃ %₄)
+6   (call top.setindex! %₁ %₂ %₅)
+7   (return %₂)
+
+########################################
+# empty ref and setindex!
+a[] = rhs
+#---------------------
+1   TestMod.rhs
+2   TestMod.a
+3   (call top.setindex! %₂ %₁)
+4   (return %₁)
diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl
new file mode 100644
index 0000000000000..7bd37e6f14f46
--- /dev/null
+++ b/JuliaLowering/test/assignments.jl
@@ -0,0 +1,112 @@
+@testset "assignments" begin
+
+test_mod = Module()
+
+Base.include_string(test_mod,
+"""
+mutable struct X
+    a
+    b
+end
+""")
+
+# TODO: Desugaring of assignment done, but needs `where` lowering
+JuliaLowering.include_string(test_mod, """
+MyVector{T} = Array{1,T}
+""")
+@test test_mod.MyVector{Int} == Array{1,Int}
+
+# Chained assignment
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = b = 42
+end
+""") == 42
+
+# Assignment in value but not tail position
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = begin
+        y = 42
+    end
+    x
+end
+""") == 42
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = []
+    a = b = (push!(x, 1); 42)
+    (a,b,x)
+end
+""") == (42,42,[1])
+
+# setproperty!
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = X(1,2)
+    x.a = 10
+    (x.a, x.b)
+end
+""") == (10,2)
+
+# Declarations
+@test JuliaLowering.include_string(test_mod, """
+let
+    x::Int = 1
+    x = 10.0
+    x
+end
+""") === 10
+
+# Updating assignments
+@test JuliaLowering.include_string(test_mod, """
+let x = "hi"
+    x *= " ho"
+    x
+end
+""") == "hi ho"
+
+@test JuliaLowering.include_string(test_mod, """
+let x = [1,3]
+    x .-= [0,1]
+    x
+end
+""") == [1,2]
+
+@test JuliaLowering.include_string(test_mod, """
+let x = [1 2; 3 4]
+    x[begin, 1:end] .-= 1
+    x
+end
+""") == [0 1 ; 3 4]
+
+# Test that side effects of computing indices in left hand side only occur
+# once.
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = [1, 2]
+    n_calls = 0
+    the_index() = (n_calls = n_calls + 1; 1)
+    x[the_index()] += 1
+    x[the_index()]::Int += 1
+    x[the_index():end] .+= 1
+    n_calls
+end
+""") == 3
+
+# removing argument side effect in kwcall lhs
+@eval test_mod f60152(v, pa; kw) = copy(v)
+@test JuliaLowering.include_string(test_mod, """
+    f60152([1, 2, 3], 0; kw=0) .*= 2
+""") == [2,4,6]
+@test JuliaLowering.include_string(test_mod, """
+let
+    pa_execs = 0
+    kw_execs = 0
+    out = f60152([1, 2, 3], (pa_execs+=1); kw=(kw_execs+=1)) .*= 2
+    (out, pa_execs, kw_execs)
+end
+""") == ([2,4,6], 1, 1)
+
+end
diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl
new file mode 100644
index 0000000000000..2b002fbcef61e
--- /dev/null
+++ b/JuliaLowering/test/assignments_ir.jl
@@ -0,0 +1,361 @@
+########################################
+# chain of assignments
+let
+    a = b = c = 1
+end
+#---------------------
+1   1
+2   (= slot₁/a %₁)
+3   (= slot₂/b %₁)
+4   (= slot₃/c %₁)
+5   (return %₁)
+
+########################################
+# chain of assignments with nontrivial rhs
+let
+    a = b = c = f()
+end
+#---------------------
+1   TestMod.f
+2   (call %₁)
+3   (= slot₁/a %₂)
+4   (= slot₂/b %₂)
+5   (= slot₃/c %₂)
+6   (return %₂)
+
+########################################
+# Assignment in value but not tail position
+let
+    x = begin
+        y = 42
+    end
+    x
+end
+#---------------------
+1   42
+2   (= slot₂/y %₁)
+3   (= slot₁/x %₁)
+4   slot₁/x
+5   (return %₄)
+
+########################################
+# short form function def, not chain of assignments
+begin
+    local a
+    a = b() = c = d
+end
+#---------------------
+1   (method TestMod.b)
+2   latestworld
+3   TestMod.b
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::3:9
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/c(!read)]
+    1   TestMod.d
+    2   (= slot₂/c %₁)
+    3   (return %₁)
+10  latestworld
+11  TestMod.b
+12  (= slot₁/a %₁₁)
+13  (return %₁₁)
+
+########################################
+# a.b = ... => setproperty! assignment
+let
+    a.b = c
+end
+#---------------------
+1   TestMod.a
+2   TestMod.c
+3   (call top.setproperty! %₁ :b %₂)
+4   TestMod.c
+5   (return %₄)
+
+########################################
+# a.b.c = f() => setproperty! assignment, complex case
+let
+    a.b.c = f()
+end
+#---------------------
+1   TestMod.a
+2   (call top.getproperty %₁ :b)
+3   TestMod.f
+4   (call %₃)
+5   (call top.setproperty! %₂ :c %₄)
+6   (return %₄)
+
+########################################
+# declarations of typed locals
+let
+    x::T = f()
+    x
+end
+#---------------------
+1   (newvar slot₁/x)
+2   TestMod.f
+3   (call %₂)
+4   TestMod.T
+5   (= slot₂/tmp %₃)
+6   slot₂/tmp
+7   (call core.isa %₆ %₄)
+8   (gotoifnot %₇ label₁₀)
+9   (goto label₁₃)
+10  slot₂/tmp
+11  (call top.convert %₄ %₁₀)
+12  (= slot₂/tmp (call core.typeassert %₁₁ %₄))
+13  slot₂/tmp
+14  (= slot₁/x %₁₃)
+15  slot₁/x
+16  (return %₁₅)
+
+########################################
+# "complex lhs" of `::T` => type-assert, not decl
+let
+    a.b::T = f()
+    x
+end
+#---------------------
+1   TestMod.a
+2   (call top.getproperty %₁ :b)
+3   TestMod.T
+4   (call core.typeassert %₂ %₃)
+5   TestMod.f
+6   (call %₅)
+7   TestMod.a
+8   (call top.setproperty! %₇ :b %₆)
+9   TestMod.x
+10  (return %₉)
+
+########################################
+# UnionAll expansion at global scope results in const decl
+X{T} = Y{T,T}
+#---------------------
+1   (call core.TypeVar :T)
+2   (= slot₁/T %₁)
+3   slot₁/T
+4   TestMod.Y
+5   slot₁/T
+6   slot₁/T
+7   (call core.apply_type %₄ %₅ %₆)
+8   (call core.UnionAll %₃ %₇)
+9   (call core.declare_const TestMod :X %₈)
+10  latestworld
+11  (return %₈)
+
+########################################
+# UnionAll expansion in local scope
+let
+    X{T} = Y{T,T}
+end
+#---------------------
+1   (call core.TypeVar :T)
+2   (= slot₂/T %₁)
+3   slot₂/T
+4   TestMod.Y
+5   slot₂/T
+6   slot₂/T
+7   (call core.apply_type %₄ %₅ %₆)
+8   (call core.UnionAll %₃ %₇)
+9   (= slot₁/X %₈)
+10  (return %₈)
+
+########################################
+# Error: Invalid lhs in `=`
+a.(b) = rhs
+#---------------------
+LoweringError:
+a.(b) = rhs
+└───┘ ── invalid dot call syntax on left hand side of assignment
+
+########################################
+# Error: Invalid lhs in `=`
+T[x y] = rhs
+#---------------------
+LoweringError:
+T[x y] = rhs
+└────┘ ── invalid spacing in left side of indexed assignment
+
+########################################
+# Error: Invalid lhs in `=`
+T[x; y] = rhs
+#---------------------
+LoweringError:
+T[x; y] = rhs
+└─────┘ ── unexpected `;` in left side of indexed assignment
+
+########################################
+# Error: Invalid lhs in `=`
+T[x ;;; y] = rhs
+#---------------------
+LoweringError:
+T[x ;;; y] = rhs
+└────────┘ ── unexpected `;` in left side of indexed assignment
+
+########################################
+# Error: Invalid lhs in `=`
+[x, y] = rhs
+#---------------------
+LoweringError:
+[x, y] = rhs
+└────┘ ── use `(a, b) = ...` to assign multiple values
+
+########################################
+# Error: Invalid lhs in `=`
+[x y] = rhs
+#---------------------
+LoweringError:
+[x y] = rhs
+└───┘ ── use `(a, b) = ...` to assign multiple values
+
+########################################
+# Error: Invalid lhs in `=`
+[x; y] = rhs
+#---------------------
+LoweringError:
+[x; y] = rhs
+└────┘ ── use `(a, b) = ...` to assign multiple values
+
+########################################
+# Error: Invalid lhs in `=`
+[x ;;; y] = rhs
+#---------------------
+LoweringError:
+[x ;;; y] = rhs
+└───────┘ ── use `(a, b) = ...` to assign multiple values
+
+########################################
+# Error: Invalid lhs in `=`
+1 = rhs
+#---------------------
+LoweringError:
+1 = rhs
+╙ ── invalid assignment location
+
+########################################
+# Basic updating assignment
+begin
+    local x
+    x += y
+end
+#---------------------
+1   TestMod.+
+2   slot₁/x
+3   TestMod.y
+4   (call %₁ %₂ %₃)
+5   (= slot₁/x %₄)
+6   (return %₄)
+
+########################################
+# Broadcasted updating assignment
+begin
+    local x
+    x .+= y
+end
+#---------------------
+1   (newvar slot₁/x)
+2   slot₁/x
+3   TestMod.+
+4   TestMod.y
+5   (call top.broadcasted %₃ %₂ %₄)
+6   (call top.materialize! %₂ %₅)
+7   (return %₆)
+
+########################################
+# Broadcasted updating assignment with general left hand side permitted
+f() .+= y
+#---------------------
+1   TestMod.f
+2   (call %₁)
+3   TestMod.+
+4   TestMod.y
+5   (call top.broadcasted %₃ %₂ %₄)
+6   (call top.materialize! %₂ %₅)
+7   (return %₆)
+
+########################################
+# Updating assignment with basic ref as left hand side
+x[i] += y
+#---------------------
+1   TestMod.+
+2   TestMod.x
+3   TestMod.i
+4   (call top.getindex %₂ %₃)
+5   TestMod.y
+6   (call %₁ %₄ %₅)
+7   TestMod.x
+8   TestMod.i
+9   (call top.setindex! %₇ %₆ %₈)
+10  (return %₆)
+
+########################################
+# Updating assignment with complex ref as left hand side
+g()[f(), end] += y
+#---------------------
+1   TestMod.g
+2   (call %₁)
+3   TestMod.f
+4   (call %₃)
+5   (call top.lastindex %₂ 2)
+6   TestMod.+
+7   (call top.getindex %₂ %₄ %₅)
+8   TestMod.y
+9   (call %₆ %₇ %₈)
+10  (call top.setindex! %₂ %₉ %₄ %₅)
+11  (return %₉)
+
+########################################
+# Updating assignment with type assert on left hand side
+begin
+    local x
+    x::T += y
+end
+#---------------------
+1   TestMod.+
+2   slot₁/x
+3   TestMod.T
+4   (call core.typeassert %₂ %₃)
+5   TestMod.y
+6   (call %₁ %₄ %₅)
+7   (= slot₁/x %₆)
+8   (return %₆)
+
+########################################
+# Updating assignment with ref and type assert on left hand side
+begin
+    local x
+    x[f()]::T += y
+end
+#---------------------
+1   (newvar slot₁/x)
+2   TestMod.f
+3   (call %₂)
+4   TestMod.+
+5   slot₁/x
+6   (call top.getindex %₅ %₃)
+7   TestMod.T
+8   (call core.typeassert %₆ %₇)
+9   TestMod.y
+10  (call %₄ %₈ %₉)
+11  slot₁/x
+12  (call top.setindex! %₁₁ %₁₀ %₃)
+13  (return %₁₀)
+
+########################################
+# Error: Updating assignment with invalid left hand side
+f() += y
+#---------------------
+LoweringError:
+f() += y
+└─┘ ── invalid assignment location
+
+########################################
+# Error: Updating assignment with invalid tuple destructuring on left hand side
+(if false end, b) += 2
+#---------------------
+LoweringError:
+(if false end, b) += 2
+└───────────────┘ ── invalid multiple assignment location
diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl
new file mode 100644
index 0000000000000..2b7eac29f348c
--- /dev/null
+++ b/JuliaLowering/test/branching.jl
@@ -0,0 +1,317 @@
+# Branching
+
+@testset "branching" begin
+
+test_mod = Module()
+
+#-------------------------------------------------------------------------------
+@testset "Tail position" begin
+
+@test JuliaLowering.include_string(test_mod, """
+let a = true
+    if a
+        1
+    end
+end
+""") === 1
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false
+    if a
+        1
+    end
+end
+""") === nothing
+
+@test JuliaLowering.include_string(test_mod, """
+let a = true
+    if a
+        1
+    else
+        2
+    end
+end
+""") === 1
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false
+    if a
+        1
+    else
+        2
+    end
+end
+""") === 2
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false, b = true
+    if a
+        1
+    elseif b
+        2
+    else
+        3
+    end
+end
+""") === 2
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false, b = false
+    if a
+        1
+    elseif b
+        2
+    else
+        3
+    end
+end
+""") === 3
+
+end
+
+#-------------------------------------------------------------------------------
+@testset "Value required but not tail position" begin
+
+@test JuliaLowering.include_string(test_mod, """
+let a = true
+    x = if a
+        1
+    end
+    x
+end
+""") === 1
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false
+    x = if a
+        1
+    end
+    x
+end
+""") === nothing
+
+@test JuliaLowering.include_string(test_mod, """
+let a = true
+    x = if a
+        1
+    else
+        2
+    end
+    x
+end
+""") === 1
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false
+    x = if a
+        1
+    else
+        2
+    end
+    x
+end
+""") === 2
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false, b = true
+    x = if a
+        1
+    elseif b
+        2
+    else
+        3
+    end
+    x
+end
+""") === 2
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false, b = false
+    x = if a
+        1
+    elseif b
+        2
+    else
+        3
+    end
+    x
+end
+""") === 3
+
+end
+
+#-------------------------------------------------------------------------------
+@testset "Side effects (not value or tail position)" begin
+
+@test JuliaLowering.include_string(test_mod, """
+let a = true
+    x = nothing
+    if a
+        x = 1
+    end
+    x
+end
+""") === 1
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false
+    x = nothing
+    if a
+        x = 1
+    end
+    x
+end
+""") === nothing
+
+@test JuliaLowering.include_string(test_mod, """
+let a = true
+    x = nothing
+    if a
+        x = 1
+    else
+        x = 2
+    end
+    x
+end
+""") === 1
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false
+    x = nothing
+    if a
+        x = 1
+    else
+        x = 2
+    end
+    x
+end
+""") === 2
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false, b = true
+    x = nothing
+    if a
+        x = 1
+    elseif b
+        x = 2
+    else
+        x = 3
+    end
+    x
+end
+""") === 2
+
+@test JuliaLowering.include_string(test_mod, """
+let a = false, b = false
+    x = nothing
+    if a
+        x = 1
+    elseif b
+        x = 2
+    else
+        x = 3
+    end
+    x
+end
+""") === 3
+
+end
+#-------------------------------------------------------------------------------
+# Block condition
+@test JuliaLowering.include_string(test_mod, """
+let a = true
+    if begin; x = 2; a; end
+        x
+    end
+end
+""") === 2
+
+#-------------------------------------------------------------------------------
+@testset "`&&` and `||` chains" begin
+
+@test JuliaLowering.include_string(test_mod, """
+true && "hi"
+""") == "hi"
+
+@test JuliaLowering.include_string(test_mod, """
+true && true && "hi"
+""") == "hi"
+
+@test JuliaLowering.include_string(test_mod, """
+false && "hi"
+""") == false
+
+@test JuliaLowering.include_string(test_mod, """
+true && false && "hi"
+""") == false
+
+@test JuliaLowering.include_string(test_mod, """
+begin
+    z = true && "hi"
+    z
+end
+""") == "hi"
+
+@test JuliaLowering.include_string(test_mod, """
+begin
+    z = false && "hi"
+    z
+end
+""") == false
+
+
+@test JuliaLowering.include_string(test_mod, """
+true || "hi"
+""") == true
+
+@test JuliaLowering.include_string(test_mod, """
+true || true || "hi"
+""") == true
+
+@test JuliaLowering.include_string(test_mod, """
+false || "hi"
+""") == "hi"
+
+@test JuliaLowering.include_string(test_mod, """
+false || true || "hi"
+""") == true
+
+@test JuliaLowering.include_string(test_mod, """
+false || false || "hi"
+""") == "hi"
+
+@test JuliaLowering.include_string(test_mod, """
+begin
+    z = false || "hi"
+    z
+end
+""") == "hi"
+
+@test JuliaLowering.include_string(test_mod, """
+begin
+    z = true || "hi"
+    z
+end
+""") == true
+
+end
+
+@testset "symbolic goto/label" begin
+
+JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    i = 1
+    @label foo
+    push!(a, i)
+    i = i + 1
+    if i <= 2
+        @goto foo
+    end
+    a
+end
+""") == [1,2]
+
+end
+
+end
diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl
new file mode 100644
index 0000000000000..f7a63f40291e6
--- /dev/null
+++ b/JuliaLowering/test/branching_ir.jl
@@ -0,0 +1,239 @@
+########################################
+# Basic branching tail && value
+begin
+    local a, b
+    if a
+        b
+    end
+end
+#---------------------
+1   (newvar slot₁/a)
+2   (newvar slot₂/b)
+3   slot₁/a
+4   (gotoifnot %₃ label₇)
+5   slot₂/b
+6   (return %₅)
+7   (return core.nothing)
+
+########################################
+# Branching, !tail && !value
+begin
+    local a, b, c
+    if a
+        b
+    end
+    c
+end
+#---------------------
+1   (newvar slot₁/a)
+2   (newvar slot₂/b)
+3   (newvar slot₃/c)
+4   slot₁/a
+5   (gotoifnot %₄ label₇)
+6   slot₂/b
+7   slot₃/c
+8   (return %₇)
+
+########################################
+# Branching with else
+begin
+    local a, b, c
+    if a
+        b
+    else
+        c
+    end
+end
+#---------------------
+1   (newvar slot₁/a)
+2   (newvar slot₂/b)
+3   (newvar slot₃/c)
+4   slot₁/a
+5   (gotoifnot %₄ label₈)
+6   slot₂/b
+7   (return %₆)
+8   slot₃/c
+9   (return %₈)
+
+########################################
+# Branching with else, !tail && !value
+begin
+    local a, b, c, d
+    if a
+        b
+    else
+        c
+    end
+    d
+end
+#---------------------
+1   (newvar slot₁/a)
+2   (newvar slot₂/b)
+3   (newvar slot₃/c)
+4   (newvar slot₄/d)
+5   slot₁/a
+6   (gotoifnot %₅ label₉)
+7   slot₂/b
+8   (goto label₁₀)
+9   slot₃/c
+10  slot₄/d
+11  (return %₁₀)
+
+########################################
+# Blocks compile directly to branches
+begin
+   local a, b, c, d
+   if (a; b && c)
+       d
+   end
+end
+#---------------------
+1   (newvar slot₁/a)
+2   (newvar slot₂/b)
+3   (newvar slot₃/c)
+4   (newvar slot₄/d)
+5   slot₁/a
+6   slot₂/b
+7   (gotoifnot %₆ label₁₂)
+8   slot₃/c
+9   (gotoifnot %₈ label₁₂)
+10  slot₄/d
+11  (return %₁₀)
+12  (return core.nothing)
+
+########################################
+# symbolic goto forward jump
+begin
+    a
+    @goto foo
+    b
+    @label foo
+end
+#---------------------
+1   TestMod.a
+2   (goto label₄)
+3   TestMod.b
+4   (return core.nothing)
+
+########################################
+# symbolic goto backward jump
+begin
+    a
+    @label foo
+    b
+    @goto foo
+end
+#---------------------
+1   TestMod.a
+2   TestMod.b
+3   (goto label₂)
+
+########################################
+# Jumping out of try and catch blocks using @goto
+begin
+    try
+        a
+        @goto lab
+        b
+    catch
+        c
+        @goto lab
+        d
+    end
+    @label lab
+end
+#---------------------
+1   (enter label₈)
+2   TestMod.a
+3   (leave %₁)
+4   (goto label₁₃)
+5   TestMod.b
+6   (leave %₁)
+7   (goto label₁₃)
+8   TestMod.c
+9   (pop_exception %₁)
+10  (goto label₁₃)
+11  TestMod.d
+12  (pop_exception %₁)
+13  (return core.nothing)
+
+########################################
+# Jumping out of nested try/catch and catch/try
+begin
+    try
+        try
+            a
+        catch
+            b
+            @goto lab
+            c
+        end
+    catch
+        try
+            d
+            @goto lab
+            e
+        catch
+        end
+    end
+    @label lab
+end
+#---------------------
+1   (enter label₁₄)
+2   (enter label₆)
+3   TestMod.a
+4   (leave %₂)
+5   (goto label₁₂)
+6   TestMod.b
+7   (pop_exception %₂)
+8   (leave %₁)
+9   (goto label₂₄)
+10  TestMod.c
+11  (pop_exception %₂)
+12  (leave %₁)
+13  (goto label₂₄)
+14  (enter label₂₂)
+15  TestMod.d
+16  (pop_exception %₁)
+17  (leave %₁₄)
+18  (goto label₂₄)
+19  TestMod.e
+20  (leave %₁₄)
+21  (goto label₂₃)
+22  (pop_exception %₁₄)
+23  (pop_exception %₁)
+24  (return core.nothing)
+
+########################################
+# Error: no symbolic label
+begin
+    @goto foo
+end
+#---------------------
+LoweringError:
+begin
+    @goto foo
+#         └─┘ ── label `foo` referenced but not defined
+end
+
+########################################
+# Error: duplicate symbolic label
+begin
+    @label foo
+    @label foo
+end
+#---------------------
+LoweringError:
+begin
+    @label foo
+    @label foo
+#          └─┘ ── Label `foo` defined multiple times
+end
+
+########################################
+# Error: using value of symbolic label
+x = @label foo
+#---------------------
+LoweringError:
+x = @label foo
+#          └─┘ ── misplaced label in value position
diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl
new file mode 100644
index 0000000000000..3999b1a2c486a
--- /dev/null
+++ b/JuliaLowering/test/closures.jl
@@ -0,0 +1,244 @@
+@testset "Closures" begin
+
+test_mod = Module()
+
+# Capture assigned before closure
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = 1
+    f(y) = x+y
+    f(2), f(3)
+end
+""") == (3,4)
+
+# Capture assigned after closure
+@test JuliaLowering.include_string(test_mod, """
+let
+    f(y) = x+y
+    x = 1
+    f(2)
+end
+""") == 3
+
+# Capture assigned inside closure
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = 1
+    function f(y)
+        x = y
+    end
+    f(100)
+    x
+end
+""") == 100
+
+Base.eval(test_mod, :(call_it(f, args...) = f(args...)))
+
+# Closure where a local `x` is captured but not boxed
+@test JuliaLowering.include_string(test_mod, """
+begin
+    function f_unboxed_test(x)
+        z = 0
+        function g()
+            y = x  # x will not be boxed
+            (y + 1, z)
+        end
+        z = 2 # will be boxed
+        (x, g())
+    end
+    f_unboxed_test(10)
+end
+""") == (10,(11,2))
+
+# Use of isdefined
+@test JuliaLowering.include_string(test_mod, """
+begin
+    function f_isdefined(x)
+        local w
+        function g()
+            z = 3
+            (@isdefined(x), # unboxed, always defined capture
+             @isdefined(y), # boxed capture
+             @isdefined(z), # normal local var
+             @isdefined(w)) # boxed undefined var
+        end
+        y = 2
+        (@isdefined(y), @isdefined(w), g())
+    end
+    f_isdefined(1)
+end
+""") == (true, false, (true, true, true, false))
+
+# Mutually recursive closures (closure capturing a closure)
+@test JuliaLowering.include_string(test_mod, """
+let
+    function recursive_a(n)
+        here = (:a, n)
+        n <= 0 ? here  : (here, recursive_b(n-1))
+    end
+    function recursive_b(n)
+        ((:b, n), recursive_a(n-1))
+    end
+    recursive_a(2)
+end
+""") == ((:a, 2), ((:b, 1), (:a, 0)))
+
+# Global method capturing local variables
+JuliaLowering.include_string(test_mod, """
+begin
+    local x = 1
+    function f_global_method_capturing_local()
+        x = x + 1
+    end
+end
+""")
+@test test_mod.f_global_method_capturing_local() == 2
+@test test_mod.f_global_method_capturing_local() == 3
+
+# Closure with multiple methods depending on local variables
+f_closure_local_var_types = JuliaLowering.include_string(test_mod, """
+let T=Int, S=Float64
+    function f_closure_local_var_types(::T)
+        1
+    end
+    function f_closure_local_var_types(::S)
+        1.0
+    end
+end
+""")
+@test f_closure_local_var_types(2) == 1
+@test f_closure_local_var_types(2.0) == 1.0
+@test_throws MethodError f_closure_local_var_types("hi")
+
+# Multiply nested closures. In this case g_nest needs to capture `x` in order
+# to construct an instance of `h_nest()` inside it.
+@test JuliaLowering.include_string(test_mod, """
+begin
+    function f_nest(x)
+        function g_nest(y)
+            function h_nest(z)
+                (x,y,z)
+            end
+        end
+    end
+
+    f_nest(1)(2)(3)
+end
+""") === (1,2,3)
+
+# Closure with return type must capture the return type
+@test JuliaLowering.include_string(test_mod, """
+let T = Int
+    function f_captured_return_type()::T
+        2.0
+    end
+    f_captured_return_type()
+end
+""") === 2
+
+# Capturing a typed local
+@test JuliaLowering.include_string(test_mod, """
+let T = Int
+    x::T = 1.0
+    function f_captured_typed_local()
+        x = 2.0
+    end
+    f_captured_typed_local()
+    x
+end
+""") === 2
+
+# Capturing a typed local where the type is a nontrivial expression
+@test begin
+    res = JuliaLowering.include_string(test_mod, """
+    let T = Int, V=Vector
+        x::V{T} = [1,2]
+        function f_captured_typed_local_composite()
+            x = [100.0, 200.0]
+        end
+        f_captured_typed_local_composite()
+        x
+    end
+    """)
+    res == [100, 200] && eltype(res) == Int
+end
+
+# Evil case where we mutate `T` which is the type of `x`, such that x is
+# eventually set to a Float64.
+#
+# Completely dynamic types for variables should be disallowed somehow?? For
+# example, by emitting the expression computing the type of `x` alongside the
+# newvar node. However, for now we verify that this potentially evil behavior
+# is compatible with the existing implementation :)
+@test JuliaLowering.include_string(test_mod, """
+let T = Int
+    x::T = 1.0
+    function f_captured_mutating_typed_local()
+        x = 2
+    end
+    T = Float64
+    f_captured_mutating_typed_local()
+    x
+end
+""") === 2.0
+
+# Anon function syntax
+@test JuliaLowering.include_string(test_mod, """
+begin
+    local y = 2
+    call_it(x->x+y, 3)
+end
+""") == 5
+
+# Anon function syntax with `where`
+@test JuliaLowering.include_string(test_mod, """
+begin
+    local y = 2
+    call_it((x::T where {T<:Integer})->x+y, 3)
+end
+""") == 5
+
+# Do block syntax
+@test JuliaLowering.include_string(test_mod, """
+begin
+    local y = 2
+    call_it(3) do x
+        x + y
+    end
+end
+""") == 5
+
+# Attempt to reference capture which is not assigned
+@test_throws UndefVarError(:x, :local) JuliaLowering.include_string(test_mod, """
+let
+    function f()
+        x
+    end
+    f()
+    x = 1
+end
+""")
+
+# Opaque closure
+@test JuliaLowering.include_string(test_mod, """
+let y = 1
+    oc = Base.Experimental.@opaque x->2x + y
+    oc(3)
+end
+""") == 7
+
+# Opaque closure with `...`
+@test JuliaLowering.include_string(test_mod, """
+let
+    oc = Base.Experimental.@opaque (xs...)->xs
+    oc(3,4,5)
+end
+""") == (3,4,5)
+
+# opaque_closure_method internals
+method_ex = lower_str(test_mod, "Base.Experimental.@opaque x -> 2x").args[1].code[3]
+@test method_ex.head === :opaque_closure_method
+@test method_ex.args[1] === nothing
+@test method_ex.args[4] isa LineNumberNode
+
+end
diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl
new file mode 100644
index 0000000000000..d1173d5273ca2
--- /dev/null
+++ b/JuliaLowering/test/closures_ir.jl
@@ -0,0 +1,765 @@
+########################################
+# Simple closure
+# (FIXME: #self# should have `read` flag set)
+let
+    x = 1
+    function f(y)
+        x + y
+    end
+end
+#---------------------
+1   (= slot₂/x (call core.Box))
+2   1
+3   slot₂/x
+4   (call core.setfield! %₃ :contents %₂)
+5   (call core.svec :x)
+6   (call core.svec true)
+7   (call JuliaLowering.eval_closure_type TestMod :#f##0 %₅ %₆)
+8   latestworld
+9   TestMod.#f##0
+10  slot₂/x
+11  (new %₉ %₁₀)
+12  (= slot₁/f %₁₁)
+13  TestMod.#f##0
+14  (call core.svec %₁₃ core.Any)
+15  (call core.svec)
+16  SourceLocation::3:14
+17  (call core.svec %₁₄ %₁₅ %₁₆)
+18  --- method core.nothing %₁₇
+    slots: [slot₁/#self#(!read) slot₂/y slot₃/x(!read)]
+    1   TestMod.+
+    2   (call core.getfield slot₁/#self# :x)
+    3   (call core.isdefined %₂ :contents)
+    4   (gotoifnot %₃ label₆)
+    5   (goto label₈)
+    6   (newvar slot₃/x)
+    7   slot₃/x
+    8   (call core.getfield %₂ :contents)
+    9   (call %₁ %₈ slot₂/y)
+    10  (return %₉)
+19  latestworld
+20  slot₁/f
+21  (return %₂₀)
+
+########################################
+# Closure declaration with no methods
+begin
+    local no_method_f
+    function no_method_f
+    end
+end
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#no_method_f##0 %₁ %₂)
+4   latestworld
+5   TestMod.#no_method_f##0
+6   (new %₅)
+7   (= slot₁/no_method_f %₆)
+8   slot₁/no_method_f
+9   (return %₈)
+
+########################################
+# Closure which sets the value of a captured variable
+let
+    x = 1
+    function f(y)
+        x = 2
+    end
+end
+#---------------------
+1   (= slot₂/x (call core.Box))
+2   1
+3   slot₂/x
+4   (call core.setfield! %₃ :contents %₂)
+5   (call core.svec :x)
+6   (call core.svec true)
+7   (call JuliaLowering.eval_closure_type TestMod :#f##1 %₅ %₆)
+8   latestworld
+9   TestMod.#f##1
+10  slot₂/x
+11  (new %₉ %₁₀)
+12  (= slot₁/f %₁₁)
+13  TestMod.#f##1
+14  (call core.svec %₁₃ core.Any)
+15  (call core.svec)
+16  SourceLocation::3:14
+17  (call core.svec %₁₄ %₁₅ %₁₆)
+18  --- method core.nothing %₁₇
+    slots: [slot₁/#self#(!read) slot₂/y(!read)]
+    1   2
+    2   (call core.getfield slot₁/#self# :x)
+    3   (call core.setfield! %₂ :contents %₁)
+    4   (return %₁)
+19  latestworld
+20  slot₁/f
+21  (return %₂₀)
+
+########################################
+# Function where arguments are captured into a closure and assigned
+function f(x)
+    function g()
+        x = 10
+    end
+    g()
+    x
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   (call core.svec :x)
+4   (call core.svec true)
+5   (call JuliaLowering.eval_closure_type TestMod :#f#g##0 %₃ %₄)
+6   latestworld
+7   TestMod.#f#g##0
+8   (call core.svec %₇)
+9   (call core.svec)
+10  SourceLocation::2:14
+11  (call core.svec %₈ %₉ %₁₀)
+12  --- method core.nothing %₁₁
+    slots: [slot₁/#self#(!read)]
+    1   10
+    2   (call core.getfield slot₁/#self# :x)
+    3   (call core.setfield! %₂ :contents %₁)
+    4   (return %₁)
+13  latestworld
+14  TestMod.f
+15  (call core.Typeof %₁₄)
+16  (call core.svec %₁₅ core.Any)
+17  (call core.svec)
+18  SourceLocation::1:10
+19  (call core.svec %₁₆ %₁₇ %₁₈)
+20  --- method core.nothing %₁₉
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/g(called) slot₄/x(!read)]
+    1   (= slot₂/x (call core.Box slot₂/x))
+    2   TestMod.#f#g##0
+    3   (new %₂ slot₂/x)
+    4   (= slot₃/g %₃)
+    5   slot₃/g
+    6   (call %₅)
+    7   slot₂/x
+    8   (call core.isdefined %₇ :contents)
+    9   (gotoifnot %₈ label₁₁)
+    10  (goto label₁₃)
+    11  (newvar slot₄/x)
+    12  slot₄/x
+    13  (call core.getfield %₇ :contents)
+    14  (return %₁₃)
+21  latestworld
+22  TestMod.f
+23  (return %₂₂)
+
+########################################
+# Closure where a local `x` is captured but not boxed
+function f(x)
+    function g()
+        y = x
+    end
+    z = x
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   (call core.svec :x)
+4   (call core.svec false)
+5   (call JuliaLowering.eval_closure_type TestMod :#f#g##1 %₃ %₄)
+6   latestworld
+7   TestMod.#f#g##1
+8   (call core.svec %₇)
+9   (call core.svec)
+10  SourceLocation::2:14
+11  (call core.svec %₈ %₉ %₁₀)
+12  --- method core.nothing %₁₁
+    slots: [slot₁/#self#(!read) slot₂/y(!read)]
+    1   (call core.getfield slot₁/#self# :x)
+    2   (= slot₂/y %₁)
+    3   (return %₁)
+13  latestworld
+14  TestMod.f
+15  (call core.Typeof %₁₄)
+16  (call core.svec %₁₅ core.Any)
+17  (call core.svec)
+18  SourceLocation::1:10
+19  (call core.svec %₁₆ %₁₇ %₁₈)
+20  --- method core.nothing %₁₉
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/z(!read)]
+    1   TestMod.#f#g##1
+    2   (call core.typeof slot₂/x)
+    3   (call core.apply_type %₁ %₂)
+    4   (new %₃ slot₂/x)
+    5   (= slot₃/g %₄)
+    6   slot₂/x
+    7   (= slot₄/z %₆)
+    8   (return %₆)
+21  latestworld
+22  TestMod.f
+23  (return %₂₂)
+
+########################################
+# Closure where a static parameter of an outer function is captured
+function f(::T) where T
+    function g()
+        use(T)
+    end
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   (call core.svec :T)
+4   (call core.svec true)
+5   (call JuliaLowering.eval_closure_type TestMod :#f#g##2 %₃ %₄)
+6   latestworld
+7   TestMod.#f#g##2
+8   (call core.svec %₇)
+9   (call core.svec)
+10  SourceLocation::2:14
+11  (call core.svec %₈ %₉ %₁₀)
+12  --- method core.nothing %₁₁
+    slots: [slot₁/#self#(!read) slot₂/T(!read)]
+    1   TestMod.use
+    2   (call core.getfield slot₁/#self# :T)
+    3   (call core.isdefined %₂ :contents)
+    4   (gotoifnot %₃ label₆)
+    5   (goto label₈)
+    6   (newvar slot₂/T)
+    7   slot₂/T
+    8   (call core.getfield %₂ :contents)
+    9   (call %₁ %₈)
+    10  (return %₉)
+13  latestworld
+14  (= slot₁/T (call core.TypeVar :T))
+15  TestMod.f
+16  (call core.Typeof %₁₅)
+17  slot₁/T
+18  (call core.svec %₁₆ %₁₇)
+19  slot₁/T
+20  (call core.svec %₁₉)
+21  SourceLocation::1:10
+22  (call core.svec %₁₈ %₂₀ %₂₁)
+23  --- method core.nothing %₂₂
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read) slot₃/g]
+    1   TestMod.#f#g##2
+    2   static_parameter₁
+    3   (new %₁ %₂)
+    4   (= slot₃/g %₃)
+    5   slot₃/g
+    6   (return %₅)
+24  latestworld
+25  TestMod.f
+26  (return %₂₅)
+
+########################################
+# Closure captures with `isdefined`
+function f(x)
+    function g()
+        z = 3
+        (@isdefined(x), # unboxed, always defined capture
+         @isdefined(y), # boxed capture
+         @isdefined(z)) # normal local var
+    end
+    y = 2
+    (@isdefined(y), # boxed local
+     @isdefined(x)) # always defined local (function arg)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   (call core.svec :x :y)
+4   (call core.svec false true)
+5   (call JuliaLowering.eval_closure_type TestMod :#f#g##3 %₃ %₄)
+6   latestworld
+7   TestMod.#f#g##3
+8   (call core.svec %₇)
+9   (call core.svec)
+10  SourceLocation::2:14
+11  (call core.svec %₈ %₉ %₁₀)
+12  --- method core.nothing %₁₁
+    slots: [slot₁/#self#(!read) slot₂/z]
+    1   (= slot₂/z 3)
+    2   (call core.getfield slot₁/#self# :y)
+    3   (call core.isdefined %₂ :contents)
+    4   (isdefined slot₂/z)
+    5   (call core.tuple true %₃ %₄)
+    6   (return %₅)
+13  latestworld
+14  TestMod.f
+15  (call core.Typeof %₁₄)
+16  (call core.svec %₁₅ core.Any)
+17  (call core.svec)
+18  SourceLocation::1:10
+19  (call core.svec %₁₆ %₁₇ %₁₈)
+20  --- method core.nothing %₁₉
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/y]
+    1   (= slot₄/y (call core.Box))
+    2   TestMod.#f#g##3
+    3   (call core.typeof slot₂/x)
+    4   (call core.apply_type %₂ %₃)
+    5   slot₄/y
+    6   (new %₄ slot₂/x %₅)
+    7   (= slot₃/g %₆)
+    8   2
+    9   slot₄/y
+    10  (call core.setfield! %₉ :contents %₈)
+    11  slot₄/y
+    12  (call core.isdefined %₁₁ :contents)
+    13  (call core.tuple %₁₂ true)
+    14  (return %₁₃)
+21  latestworld
+22  TestMod.f
+23  (return %₂₂)
+
+########################################
+# Nested captures - here `g` captures `x` because it is needed to initialize
+# the closure `h` which captures both `x` and `y`.
+# [method_filter: #f_nest#g_nest##0]
+function f_nest(x)
+    function g_nest(y)
+        function h_nest(z)
+            (x,y,z)
+        end
+    end
+end
+#---------------------
+slots: [slot₁/#self#(!read) slot₂/y(!read) slot₃/h_nest]
+1   TestMod.#f_nest#g_nest#h_nest##0
+2   (call core.getfield slot₁/#self# :x)
+3   (call core.typeof %₂)
+4   (call core.typeof slot₂/y)
+5   (call core.apply_type %₁ %₃ %₄)
+6   (call core.getfield slot₁/#self# :x)
+7   (new %₅ %₆ slot₂/y)
+8   (= slot₃/h_nest %₇)
+9   slot₃/h_nest
+10  (return %₉)
+
+########################################
+# Global method capturing local variables
+begin
+    local x = 1
+    function f()
+        x = x + 1
+    end
+end
+#---------------------
+1   (= slot₁/x (call core.Box))
+2   1
+3   slot₁/x
+4   (call core.setfield! %₃ :contents %₂)
+5   (method TestMod.f)
+6   latestworld
+7   TestMod.f
+8   (call core.Typeof %₇)
+9   (call core.svec %₈)
+10  (call core.svec)
+11  SourceLocation::3:14
+12  (call core.svec %₉ %₁₀ %₁₁)
+13  --- code_info
+    slots: [slot₁/#self#(!read) slot₂/x(!read)]
+    1   TestMod.+
+    2   (captured_local 1)
+    3   (call core.isdefined %₂ :contents)
+    4   (gotoifnot %₃ label₆)
+    5   (goto label₈)
+    6   (newvar slot₂/x)
+    7   slot₂/x
+    8   (call core.getfield %₂ :contents)
+    9   (call %₁ %₈ 1)
+    10  (captured_local 1)
+    11  (call core.setfield! %₁₀ :contents %₉)
+    12  (return %₉)
+14  slot₁/x
+15  (call core.svec %₁₄)
+16  (call JuliaLowering.replace_captured_locals! %₁₃ %₁₅)
+17  --- method core.nothing %₁₂ %₁₆
+18  latestworld
+19  TestMod.f
+20  (return %₁₉)
+
+########################################
+# Anonymous function syntax with ->
+x -> x*x
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#->##0 %₁ %₂)
+4   latestworld
+5   TestMod.#->##0
+6   (new %₅)
+7   TestMod.#->##0
+8   (call core.svec %₇ core.Any)
+9   (call core.svec)
+10  SourceLocation::1:1
+11  (call core.svec %₈ %₉ %₁₀)
+12  --- method core.nothing %₁₁
+    slots: [slot₁/#self#(!read) slot₂/x]
+    1   TestMod.*
+    2   (call %₁ slot₂/x slot₂/x)
+    3   (return %₂)
+13  latestworld
+14  (return %₆)
+
+########################################
+# Anonymous function syntax with `function`
+function (x)
+    x*x
+end
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :##anon###0 %₁ %₂)
+4   latestworld
+5   TestMod.##anon###0
+6   (new %₅)
+7   TestMod.##anon###0
+8   (call core.svec %₇ core.Any)
+9   (call core.svec)
+10  SourceLocation::1:10
+11  (call core.svec %₈ %₉ %₁₀)
+12  --- method core.nothing %₁₁
+    slots: [slot₁/#self#(!read) slot₂/x]
+    1   TestMod.*
+    2   (call %₁ slot₂/x slot₂/x)
+    3   (return %₂)
+13  latestworld
+14  (return %₆)
+
+########################################
+# `do` blocks
+f(x; a=1) do y
+    y + 2
+end
+#---------------------
+1   TestMod.f
+2   (call core.tuple :a)
+3   (call core.apply_type core.NamedTuple %₂)
+4   (call core.tuple 1)
+5   (call %₃ %₄)
+6   (call core.svec)
+7   (call core.svec)
+8   (call JuliaLowering.eval_closure_type TestMod :#do##0 %₆ %₇)
+9   latestworld
+10  TestMod.#do##0
+11  (call core.svec %₁₀ core.Any)
+12  (call core.svec)
+13  SourceLocation::1:13
+14  (call core.svec %₁₁ %₁₂ %₁₃)
+15  --- method core.nothing %₁₄
+    slots: [slot₁/#self#(!read) slot₂/y]
+    1   TestMod.+
+    2   (call %₁ slot₂/y 2)
+    3   (return %₂)
+16  latestworld
+17  TestMod.#do##0
+18  (new %₁₇)
+19  TestMod.x
+20  (call core.kwcall %₅ %₁ %₁₈ %₁₉)
+21  (return %₂₀)
+
+########################################
+# Error: Static parameter clashing with closure name
+function f(::g) where {g}
+    function g()
+    end
+end
+#---------------------
+LoweringError:
+function f(::g) where {g}
+    function g()
+#            ╙ ── local variable name `g` conflicts with a static parameter
+    end
+end
+
+########################################
+# Opaque closure
+let y = 1
+    Base.Experimental.@opaque (x, z::T)->2x + y - z
+end
+#---------------------
+1   1
+2   (= slot₁/y (call core.Box))
+3   slot₁/y
+4   (call core.setfield! %₃ :contents %₁)
+5   TestMod.T
+6   (call core.apply_type core.Tuple core.Any %₅)
+7   (call core.apply_type core.Union)
+8   --- opaque_closure_method  core.nothing 2 false SourceLocation::2:31
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/z slot₄/y(!read)]
+    1   TestMod.-
+    2   TestMod.+
+    3   TestMod.*
+    4   (call %₃ 2 slot₂/x)
+    5   (call core.getfield slot₁/#self# 1)
+    6   (call core.isdefined %₅ :contents)
+    7   (gotoifnot %₆ label₉)
+    8   (goto label₁₁)
+    9   (newvar slot₄/y)
+    10  slot₄/y
+    11  (call core.getfield %₅ :contents)
+    12  (call %₂ %₄ %₁₁)
+    13  (call %₁ %₁₂ slot₃/z)
+    14  (return %₁₃)
+9   slot₁/y
+10  (new_opaque_closure %₆ %₇ core.Any true %₈ %₉)
+11  (return %₁₀)
+
+########################################
+# Opaque closure with `...`
+let
+    Base.Experimental.@opaque (x, ys...)->ys
+end
+#---------------------
+1   (call core.apply_type core.Vararg core.Any)
+2   (call core.apply_type core.Tuple core.Any %₁)
+3   (call core.apply_type core.Union)
+4   --- opaque_closure_method  core.nothing 2 true SourceLocation::2:31
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys]
+    1   slot₃/ys
+    2   (return %₁)
+5   (new_opaque_closure %₂ %₃ core.Any true %₄)
+6   (return %₅)
+
+########################################
+# Error: Opaque closure with default args
+Base.Experimental.@opaque (x=1)->2x
+#---------------------
+LoweringError:
+Base.Experimental.@opaque (x=1)->2x
+#                            ╙ ── Default positional arguments cannot be used in an opaque closure
+
+########################################
+# Mutually recursive closures
+let
+    function recursive_a()
+        recursive_b()
+    end
+    function recursive_b()
+        recursive_a()
+    end
+end
+#---------------------
+1   (= slot₁/recursive_a (call core.Box))
+2   (= slot₂/recursive_b (call core.Box))
+3   (call core.svec :recursive_b)
+4   (call core.svec true)
+5   (call JuliaLowering.eval_closure_type TestMod :#recursive_a##0 %₃ %₄)
+6   latestworld
+7   TestMod.#recursive_a##0
+8   slot₂/recursive_b
+9   (new %₇ %₈)
+10  slot₁/recursive_a
+11  (call core.setfield! %₁₀ :contents %₉)
+12  TestMod.#recursive_a##0
+13  (call core.svec %₁₂)
+14  (call core.svec)
+15  SourceLocation::2:14
+16  (call core.svec %₁₃ %₁₄ %₁₅)
+17  --- method core.nothing %₁₆
+    slots: [slot₁/#self#(!read) slot₂/recursive_b(!read)]
+    1   (call core.getfield slot₁/#self# :recursive_b)
+    2   (call core.isdefined %₁ :contents)
+    3   (gotoifnot %₂ label₅)
+    4   (goto label₇)
+    5   (newvar slot₂/recursive_b)
+    6   slot₂/recursive_b
+    7   (call core.getfield %₁ :contents)
+    8   (call %₇)
+    9   (return %₈)
+18  latestworld
+19  (call core.svec :recursive_a)
+20  (call core.svec true)
+21  (call JuliaLowering.eval_closure_type TestMod :#recursive_b##0 %₁₉ %₂₀)
+22  latestworld
+23  TestMod.#recursive_b##0
+24  slot₁/recursive_a
+25  (new %₂₃ %₂₄)
+26  slot₂/recursive_b
+27  (call core.setfield! %₂₆ :contents %₂₅)
+28  TestMod.#recursive_b##0
+29  (call core.svec %₂₈)
+30  (call core.svec)
+31  SourceLocation::5:14
+32  (call core.svec %₂₉ %₃₀ %₃₁)
+33  --- method core.nothing %₃₂
+    slots: [slot₁/#self#(!read) slot₂/recursive_a(!read)]
+    1   (call core.getfield slot₁/#self# :recursive_a)
+    2   (call core.isdefined %₁ :contents)
+    3   (gotoifnot %₂ label₅)
+    4   (goto label₇)
+    5   (newvar slot₂/recursive_a)
+    6   slot₂/recursive_a
+    7   (call core.getfield %₁ :contents)
+    8   (call %₇)
+    9   (return %₈)
+34  latestworld
+35  slot₂/recursive_b
+36  (call core.isdefined %₃₅ :contents)
+37  (gotoifnot %₃₆ label₃₉)
+38  (goto label₄₁)
+39  (newvar slot₄/recursive_b)
+40  slot₄/recursive_b
+41  (call core.getfield %₃₅ :contents)
+42  (return %₄₁)
+
+########################################
+# Closure with keywords
+let y = y_init
+    function f_kw_closure(; x::X=x_default)
+        x + y
+    end
+end
+#---------------------
+1   TestMod.y_init
+2   (= slot₁/y (call core.Box))
+3   (= slot₂/#f_kw_closure#0 (call core.Box))
+4   slot₁/y
+5   (call core.setfield! %₄ :contents %₁)
+6   (call core.svec :#f_kw_closure#0)
+7   (call core.svec true)
+8   (call JuliaLowering.eval_closure_type TestMod :#f_kw_closure##0 %₆ %₇)
+9   latestworld
+10  TestMod.#f_kw_closure##0
+11  slot₂/#f_kw_closure#0
+12  (new %₁₀ %₁₁)
+13  (= slot₃/f_kw_closure %₁₂)
+14  (call core.svec :y)
+15  (call core.svec true)
+16  (call JuliaLowering.eval_closure_type TestMod :##f_kw_closure#0##0 %₁₄ %₁₅)
+17  latestworld
+18  TestMod.##f_kw_closure#0##0
+19  slot₁/y
+20  (new %₁₈ %₁₉)
+21  slot₂/#f_kw_closure#0
+22  (call core.setfield! %₂₁ :contents %₂₀)
+23  TestMod.##f_kw_closure#0##0
+24  TestMod.X
+25  TestMod.#f_kw_closure##0
+26  (call core.svec %₂₃ %₂₄ %₂₅)
+27  (call core.svec)
+28  SourceLocation::2:14
+29  (call core.svec %₂₆ %₂₇ %₂₈)
+30  --- method core.nothing %₂₉
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/#self#(!read) slot₄/y(!read)]
+    1   (meta :nkw 1)
+    2   TestMod.+
+    3   (call core.getfield slot₁/#self# :y)
+    4   (call core.isdefined %₃ :contents)
+    5   (gotoifnot %₄ label₇)
+    6   (goto label₉)
+    7   (newvar slot₄/y)
+    8   slot₄/y
+    9   (call core.getfield %₃ :contents)
+    10  (call %₂ slot₂/x %₉)
+    11  (return %₁₀)
+31  latestworld
+32  (call core.typeof core.kwcall)
+33  TestMod.#f_kw_closure##0
+34  (call core.svec %₃₂ core.NamedTuple %₃₃)
+35  (call core.svec)
+36  SourceLocation::2:14
+37  (call core.svec %₃₄ %₃₅ %₃₆)
+38  --- code_info
+    slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/x(!read) slot₆/#f_kw_closure#0(!read)]
+    1   (newvar slot₅/x)
+    2   (call core.isdefined slot₂/kws :x)
+    3   (gotoifnot %₂ label₁₄)
+    4   (call core.getfield slot₂/kws :x)
+    5   TestMod.X
+    6   (call core.isa %₄ %₅)
+    7   (gotoifnot %₆ label₉)
+    8   (goto label₁₂)
+    9   TestMod.X
+    10  (new core.TypeError :keyword argument :x %₉ %₄)
+    11  (call core.throw %₁₀)
+    12  (= slot₄/kwtmp %₄)
+    13  (goto label₁₆)
+    14  TestMod.x_default
+    15  (= slot₄/kwtmp %₁₄)
+    16  slot₄/kwtmp
+    17  (call top.keys slot₂/kws)
+    18  (call core.tuple :x)
+    19  (call top.diff_names %₁₇ %₁₈)
+    20  (call top.isempty %₁₉)
+    21  (gotoifnot %₂₀ label₂₃)
+    22  (goto label₂₄)
+    23  (call top.kwerr slot₂/kws slot₃/#self#)
+    24  (captured_local 1)
+    25  (call core.isdefined %₂₄ :contents)
+    26  (gotoifnot %₂₅ label₂₈)
+    27  (goto label₃₀)
+    28  (newvar slot₆/#f_kw_closure#0)
+    29  slot₆/#f_kw_closure#0
+    30  (call core.getfield %₂₄ :contents)
+    31  (call %₃₀ %₁₆ slot₃/#self#)
+    32  (return %₃₁)
+39  slot₂/#f_kw_closure#0
+40  (call core.svec %₃₉)
+41  (call JuliaLowering.replace_captured_locals! %₃₈ %₄₀)
+42  --- method core.nothing %₃₇ %₄₁
+43  latestworld
+44  TestMod.#f_kw_closure##0
+45  (call core.svec %₄₄)
+46  (call core.svec)
+47  SourceLocation::2:14
+48  (call core.svec %₄₅ %₄₆ %₄₇)
+49  --- method core.nothing %₄₈
+    slots: [slot₁/#self# slot₂/#f_kw_closure#0(!read)]
+    1   (call core.getfield slot₁/#self# :#f_kw_closure#0)
+    2   (call core.isdefined %₁ :contents)
+    3   (gotoifnot %₂ label₅)
+    4   (goto label₇)
+    5   (newvar slot₂/#f_kw_closure#0)
+    6   slot₂/#f_kw_closure#0
+    7   (call core.getfield %₁ :contents)
+    8   TestMod.x_default
+    9   (call %₇ %₈ slot₁/#self#)
+    10  (return %₉)
+50  latestworld
+51  slot₃/f_kw_closure
+52  (return %₅₁)
+
+########################################
+# Closure capturing a typed local must also capture the type expression
+# [method_filter: #f_captured_typed_local##0]
+let T=Blah
+    x::T = 1.0
+    function f_captured_typed_local()
+        x = 2.0
+    end
+    f_captured_typed_local()
+    x
+end
+#---------------------
+slots: [slot₁/#self#(!read) slot₂/T(!read) slot₃/tmp(!read)]
+1   2.0
+2   (call core.getfield slot₁/#self# :x)
+3   (call core.getfield slot₁/#self# :T)
+4   (call core.isdefined %₃ :contents)
+5   (gotoifnot %₄ label₇)
+6   (goto label₉)
+7   (newvar slot₂/T)
+8   slot₂/T
+9   (call core.getfield %₃ :contents)
+10  (= slot₃/tmp %₁)
+11  slot₃/tmp
+12  (call core.isa %₁₁ %₉)
+13  (gotoifnot %₁₂ label₁₅)
+14  (goto label₁₈)
+15  slot₃/tmp
+16  (call top.convert %₉ %₁₅)
+17  (= slot₃/tmp (call core.typeassert %₁₆ %₉))
+18  slot₃/tmp
+19  (call core.setfield! %₂ :contents %₁₈)
+20  (return %₁)
+
+########################################
+# Error: Closure outside any top level context
+# (Should only happen in a user-visible way when lowering code emitted
+#  from a `@generated` function code generator.)
+@ast_ [K"lambda"(is_toplevel_thunk=false, toplevel_pure=false)
+    [K"block"]
+    [K"block"]
+    [K"->" [K"tuple"] [K"block"]]
+]
+#---------------------
+LoweringError:
+#= line 1 =# - Top level code was found outside any top level context. `@generated` functions may not contain closures, including `do` syntax and generators/comprehension
diff --git a/JuliaLowering/test/compat.jl b/JuliaLowering/test/compat.jl
new file mode 100644
index 0000000000000..a7fce558e9f40
--- /dev/null
+++ b/JuliaLowering/test/compat.jl
@@ -0,0 +1,629 @@
+using Test
+const JS = JuliaSyntax
+const JL = JuliaLowering
+
+@testset "expr->syntaxtree" begin
+    @testset "semantics only" begin
+        # Test that `s` evaluates to the same thing both under normal parsing
+        # and with the expr->tree->expr transformation
+
+        programs = [
+            "let x = 2; x += 5; x -= 1; [1] .*= 1; end",
+            "let var\"x\" = 123; x; end",
+            "try; 1; catch e; e; else; 2; finally; 3; end",
+            "for x in 1:2, y in 3:4; x + y; end",
+            "[x+y for x in 1:2, y in 3:4]",
+            "Int[x+y for x in 1:2, y in 3:4 if true]",
+            "for x in 1; x+=1\n if true\n continue \n elseif false \n break\n end\n end",
+            "Base.Meta.@lower 1",
+            "function foo(x, y=1; z, what::Int=5); x + y + z + what; end; foo(1,2;z=3)",
+            "(()->1)()",
+            "((x)->2)(3)",
+            "((x,y)->4)(5,6)",
+            "filter([1,2,3]) do x; x > 1; end",
+            """
+            struct X
+                f1::Int # hi
+                "foo"
+                f2::Int
+                f3::Int
+                X(y) = new(y,y,y)
+            end
+            """,
+            "global x,y",
+            "global (x,y)",
+            "999999999999999999999999999999999999999",
+            "0x00000000000000001",
+            "(0x00000000000000001)",
+            "let x = 1; 2x; end",
+            "let x = 1; (2)(3)x; end",
+            "if false\n1\nelseif true\n 3\nend",
+            "\"str\"",
+            "\"\$(\"str\")\"",
+            "'a'",
+            "'α'",
+            "'\\xce\\xb1'",
+            "let x = 1; \"\"\"\n  a\n  \$x\n  b\n  c\"\"\"; end",
+            "try throw(0) catch e; 1 end",
+            "try 0 finally 1 end",
+            "try throw(0) catch e; 1 finally 2 end",
+            "try throw(0) catch e; 1 else 2 end",
+            "try throw(0) catch e; 1 else 2 finally 3 end",
+            "try throw(0) finally 1 catch e; 2 end",
+            ":.+",
+            ":.=",
+            ":(.=)",
+            ":+=",
+            ":(+=)",
+            ":.+=",
+            ":(.+=)",
+        ]
+
+        test_mod_1 = Module()
+        test_mod_2 = Module()
+
+        for p in programs
+            @testset "`$p`" begin
+                local good_expr, good_out, test_st, test_expr, test_out
+                try
+                    good_expr = JS.parseall(Expr, p; ignore_errors=true)
+                    good_out = Core.eval(test_mod_1, good_expr)
+                catch e
+                    @error "Couldn't eval the reference expression---fix your test"
+                    rethrow(e)
+                end
+
+                test_st = JuliaLowering.expr_to_syntaxtree(good_expr)
+                test_expr = Expr(test_st)
+                test_out = Core.eval(test_mod_2, test_expr)
+
+                @test good_out == test_out
+            end
+        end
+    end
+
+    # Remove any information that can't be recovered from an Expr
+    function normalize_st!(st)
+        k = JS.kind(st)
+        args = JS.children(st)
+
+        if JS.is_infix_op_call(st) && (k === K"call" || k === K"dotcall")
+            # Infix calls are not preserved in Expr; we need to re-order the children
+            pre_st_args = JL.NodeId[st[2]._id, st[1]._id]
+            for c in st[3:end]
+                push!(pre_st_args, c._id)
+            end
+            pre_st_flags = (JS.flags(st) & ~JS.INFIX_FLAG) | JS.PREFIX_CALL_FLAG
+            JL.setchildren!(st._graph, st._id, pre_st_args)
+            JL.setflags!(st._graph, st._id, pre_st_flags)
+        elseif JS.is_postfix_op_call(st) && (k === K"call" || k === K"dotcall")
+            pre_st_args = JL.NodeId[st[end]._id]
+            for c in st[1:end-1]
+                push!(pre_st_args, c._id)
+            end
+            pre_st_flags = (JS.flags(st) & ~JS.POSTFIX_OP_FLAG) | JS.PREFIX_CALL_FLAG
+            JL.setchildren!(st._graph, st._id, pre_st_args)
+            JL.setflags!(st._graph, st._id, pre_st_flags)
+        elseif k in JS.KSet"tuple block macrocall"
+            JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.PARENS_FLAG)
+        elseif k === K"toplevel"
+            JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.TOPLEVEL_SEMICOLONS_FLAG)
+        end
+
+        if k in JS.KSet"tuple call dotcall macrocall vect curly braces <: >:"
+            JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.TRAILING_COMMA_FLAG)
+        end
+
+        k === K"quote" && JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.COLON_QUOTE)
+        k === K"wrapper" && JL.sethead!(st._graph, st._id, K"block")
+
+        # All ops are prefix ops in an expr.
+        # Ignore trivia (shows up on some K"error"s)
+        JL.setflags!(st._graph, st._id, JS.flags(st) &
+            ~JS.PREFIX_OP_FLAG & ~JS.INFIX_FLAG & ~JS.TRIVIA_FLAG & ~JS.NON_TERMINAL_FLAG)
+
+        for c in JS.children(st)
+            normalize_st!(c)
+        end
+        return st
+    end
+
+    function st_roughly_equal(; st_good, st_test)
+        normalize_st!(st_good)
+
+        if kind(st_good) === kind(st_test) === K"error"
+            # We could consider some sort of equivalence later, but we would
+            # need to specify within JS what the error node contains.
+            return true
+        end
+
+        out = kind(st_good) === kind(st_test) &&
+            JS.flags(st_good) === JS.flags(st_test) &&
+            JS.numchildren(st_good) === JS.numchildren(st_test) &&
+            JS.is_leaf(st_good) === JS.is_leaf(st_test) &&
+            get(st_good, :value, nothing) === get(st_test, :value, nothing) &&
+            get(st_good, :name_val, nothing) === get(st_test, :name_val, nothing) &&
+            all(map((cg, ct)->st_roughly_equal(;st_good=cg, st_test=ct),
+                    JS.children(st_good), JS.children(st_test)))
+
+        !out && @warn("!st_roughly_equal (normalized_reference, st_test):",
+                      JS.sourcetext(st_good), st_good, st_test)
+        return out
+    end
+
+    @testset "SyntaxTree equivalence (tests taken from JuliaSyntax expr.jl)" begin
+        # test that string->tree->expr->tree ~= string->tree
+        #                             ^^
+        programs = [
+            "begin a\nb\n\nc\nend",
+            "(a;b;c)",
+            "begin end",
+            "(;;)",
+            "a;b",
+            "module A\n\nbody\nend",
+            "function f()\na\n\nb\nend",
+            "f() = 1",
+            "macro f()\na\nend",
+            "function f end",
+            "macro f end",
+            "function (f() where {T}) end",
+            "function (f()::S) end",
+            "a -> b",
+            "(a,) -> b",
+            "(a where {T}) -> b",
+            "a -> (\nb;c)",
+            "a -> begin\nb\nc\nend",
+            "(a;b=1) -> c",
+            "(a...;b...) -> c",
+            "(;) -> c",
+            "a::T -> b",
+            "let i=is, j=js\nbody\nend",
+            "for x=xs\n\nend",
+            "for x=xs\ny\nend",
+            "while cond\n\nend",
+            "while cond\ny\nend",
+            "f() = xs",
+            "f() =\n(a;b)",
+            "f() =\nbegin\na\nb\nend",
+            "let f(x) =\ng(x)=1\nend",
+            "f() .= xs",
+            "for i=is body end",
+            "for i=is, j=js\nbody\nend",
+            "f(x) do y\n body end",
+            "@f(x) do y body end",
+            "f(x; a=1) do y body end",
+            "g(f(x) do y\n body end)",
+            "f(a=1)",
+            "f(; b=2)",
+            "f(a=1; b=2)",
+            "f(a; b; c)",
+            "+(a=1,)",
+            "(a=1)()",
+            "(x=1) != 2",
+            "+(a=1)",
+            "(a=1)'",
+            "f.(a=1; b=2)",
+            "(a=1,)",
+            "(a=1,; b=2)",
+            "(a=1,; b=2; c=3)",
+            "x[i=j]",
+            "(i=j)[x]",
+            "x[a, b; i=j]",
+            "(i=j){x}",
+            "x{a, b; i=j}",
+            "[a=1,; b=2]",
+            "{a=1,; b=2}",
+            "f(a .= 1)",
+            "f(((a = 1)))",
+            "(((a = 1)),)",
+            "(;((a = 1)),)",
+            "a.b",
+            "a.@b x",
+            "f.(x,y)",
+            "f.(x=1)",
+            "f.(a=1; b=2)",
+            "(a=1).()",
+            "x .+ y",
+            "(x=1) .+ y",
+            "a .< b .< c",
+            "a .< (.<) .< c",
+            "quote .+ end",
+            ".+(x)",
+            ".+x",
+            "f(.+)",
+            "(a, .+)",
+            "x += y",
+            "x .+= y",
+            "x \u2212= y",
+            "let x=1\n end",
+            "let x=1 ; end",
+            "let x ; end",
+            "let x::1 ; end",
+            "let x=1,y=2 end",
+            "let x+=1 ; end",
+            "let ; end",
+            "let ; body end",
+            "let\na\nb\nend",
+            "A where {T}",
+            "A where {S, T}",
+            "A where {X, Y; Z}",
+            "@m\n",
+            "\n@m",
+            "@m(x; a)",
+            "@m(a=1; b=2)",
+            "@S[a,b]",
+            "@S[a b]",
+            "@S[a; b]",
+            "@S[a ;; b]",
+            "[x,y ; z]",
+            "[a ;;; b ;;;; c]",
+            "[a b ; c d]",
+            "[a\nb]",
+            "[a b]",
+            "[a b ; c d]",
+            "T[a ;;; b ;;;; c]",
+            "T[a b ; c d]",
+            "T[a\nb]",
+            "T[a b]",
+            "T[a b ; c d]",
+            "(x for a in as for b in bs)",
+            "(x for a in as, b in bs)",
+            "(x for a in as, b in bs if z)",
+            "(x for a in as, b in bs for c in cs, d in ds)",
+            "(x for a in as for b in bs if z)",
+            "(x for a in as if z for b in bs)",
+            "[x for a = as for b = bs if cond1 for c = cs if cond2]" ,
+            "[x for a = as if begin cond2 end]" ,
+            "(x for a in as if z)",
+            "return x",
+            "struct A end",
+            "mutable struct A end",
+            "struct A <: B \n a::X \n end",
+            "struct A \n a \n b \n end",
+            "struct A const a end",
+            "export a",
+            "export +, ==",
+            "export \n a",
+            "global x",
+            "local x",
+            "global x,y",
+            "const x,y = 1,2",
+            "const x = 1",
+            "global x ~ 1",
+            "global x += 1",
+            "(;)",
+            "(; a=1)",
+            "(; a=1; b=2)",
+            "(a; b; c,d)",
+            "module A end",
+            "baremodule A end",
+            "import A",
+            "A.x",
+            "A.\$x",
+            "try x catch e; y end",
+            "try x finally y end",
+            "try x catch e; y finally z end",
+            "try x catch e; y else z end",
+            "try x catch e; y else z finally w end",
+        ]
+
+        for p in programs
+            @testset "`$(repr(p))`" begin
+                st_good = JS.parsestmt(JL.SyntaxTree, p; ignore_errors=true)
+                st_test = JL.expr_to_syntaxtree(Expr(st_good))
+                @test st_roughly_equal(;st_good, st_test)
+            end
+        end
+
+        # toplevel has a special parsing mode where docstrings and a couple of
+        # other things are enabled
+        toplevel_programs = [
+            "\"docstr\"\nthing_to_be_documented",
+        ]
+        for p in toplevel_programs
+            @testset "`$(repr(p))`" begin
+                st_good = JS.parseall(JL.SyntaxTree, p; ignore_errors=true)
+                st_test = JL.expr_to_syntaxtree(Expr(st_good))
+                @test st_roughly_equal(;st_good, st_test)
+            end
+        end
+    end
+
+    @testset "provenance via scavenging for LineNumberNodes" begin
+        # Provenenance of a node should be the last seen LineNumberNode in the
+        # depth-first traversal of the Expr, or the initial line given if none
+        # have been seen yet.  If none have been seen and no initial line was
+        # given, .source should still be defined on all nodes (of unspecified
+        # value, but hopefully a helpful value for the user.)
+        ex = Expr(:block,
+                  LineNumberNode(123),
+                  Expr(:block,
+                       Expr(:block, LineNumberNode(456)),
+                       Expr(:block)),
+                  Expr(:block,
+                       Expr(:block),
+                       Expr(:block)))
+
+        # No initial line provided
+        st = JuliaLowering.expr_to_syntaxtree(ex)
+        for i in length(st._graph.edge_ranges)
+            @test !isnothing(get(SyntaxTree(st._graph, i), :source, nothing))
+        end
+        @test let lnn = st[1].source;    lnn isa LineNumberNode && lnn.line === 123; end
+        @test let lnn = st[1][1].source; lnn isa LineNumberNode && lnn.line === 123; end
+        @test let lnn = st[1][2].source; lnn isa LineNumberNode && lnn.line === 456; end
+        @test let lnn = st[2].source;    lnn isa LineNumberNode && lnn.line === 456; end
+        @test let lnn = st[2][1].source; lnn isa LineNumberNode && lnn.line === 456; end
+        @test let lnn = st[2][2].source; lnn isa LineNumberNode && lnn.line === 456; end
+
+        # Same tree, but provide an initial line
+        st = JuliaLowering.expr_to_syntaxtree(ex, LineNumberNode(789))
+        @test let lnn = st.source;       lnn isa LineNumberNode && lnn.line === 789; end
+        @test let lnn = st[1].source;    lnn isa LineNumberNode && lnn.line === 123; end
+        @test let lnn = st[1][1].source; lnn isa LineNumberNode && lnn.line === 123; end
+        @test let lnn = st[1][2].source; lnn isa LineNumberNode && lnn.line === 456; end
+        @test let lnn = st[2].source;    lnn isa LineNumberNode && lnn.line === 456; end
+        @test let lnn = st[2][1].source; lnn isa LineNumberNode && lnn.line === 456; end
+        @test let lnn = st[2][2].source; lnn isa LineNumberNode && lnn.line === 456; end
+
+        ex = parsestmt(Expr, """
+        begin
+            try
+                maybe
+                lots
+                of
+                lines
+            catch exc
+                y
+            end
+        end""")
+        st = JuliaLowering.expr_to_syntaxtree(ex, LineNumberNode(1))
+
+        # sanity: ensure we're testing the tree we expect
+        @test st ≈ @ast_ [K"block"
+            [K"try"
+                [K"block"
+                    "maybe"::K"Identifier"
+                    "lots"::K"Identifier"
+                    "of"::K"Identifier"
+                    "lines"::K"Identifier"
+                ]
+                [K"catch"
+                    "exc"::K"Identifier"
+                    [K"block"
+                        "y"::K"Identifier"
+                    ]
+                ]
+            ]
+        ]
+
+        @test let lnn = st.source;             lnn isa LineNumberNode && lnn.line === 1; end
+        @test let lnn = st[1].source;          lnn isa LineNumberNode && lnn.line === 2; end
+        @test let lnn = st[1][1].source;       lnn isa LineNumberNode && lnn.line === 2; end
+        @test let lnn = st[1][1][1].source;    lnn isa LineNumberNode && lnn.line === 3; end
+        @test let lnn = st[1][1][2].source;    lnn isa LineNumberNode && lnn.line === 4; end
+        @test let lnn = st[1][1][3].source;    lnn isa LineNumberNode && lnn.line === 5; end
+        @test let lnn = st[1][1][4].source;    lnn isa LineNumberNode && lnn.line === 6; end
+        @test let lnn = st[1][2].source;       lnn isa LineNumberNode && lnn.line === 6; end
+        @test let lnn = st[1][2][1].source;    lnn isa LineNumberNode && lnn.line === 6; end
+        @test let lnn = st[1][2][2].source;    lnn isa LineNumberNode && lnn.line === 6; end
+        @test let lnn = st[1][2][2][1].source; lnn isa LineNumberNode && lnn.line === 8; end
+
+        st_shortfunc = JuliaLowering.expr_to_syntaxtree(
+            Expr(:block,
+                 LineNumberNode(11),
+                 Expr(:(=),
+                      Expr(:call, :f),
+                      :body))
+        )
+        @test st_shortfunc ≈ @ast_ [K"block"
+            [K"function"
+                [K"call" "f"::K"Identifier"]
+                "body"::K"Identifier"
+            ]
+        ]
+        @test let lnn = st_shortfunc[1][1].source; lnn isa LineNumberNode && lnn.line === 11; end
+
+        st_shortfunc_2 = JuliaLowering.expr_to_syntaxtree(
+            Expr(:block,
+                 LineNumberNode(11),
+                 Expr(:(=),
+                      Expr(:call, :f),
+                      Expr(:block,
+                         LineNumberNode(22),
+                         :body)))
+        )
+        @test st_shortfunc_2 ≈ @ast_ [K"block"
+            [K"function"
+                [K"call" "f"::K"Identifier"]
+                "body"::K"Identifier"
+            ]
+        ]
+        @test let lnn = st_shortfunc_2[1][1].source; lnn isa LineNumberNode && lnn.line === 22; end
+    end
+
+    @testset "`Expr(:escape)` handling" begin
+        # `x.y` with quoted y escaped (this esc does nothing, but is permitted by
+        # the existing expander)
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:(.), :x, esc(QuoteNode(:y)))) ≈
+            @ast_ [K"."
+                "x"::K"Identifier"
+                [K"escape"
+                    "y"::K"Identifier"
+                ]
+            ]
+
+        # `f(x; y)` with parameters escaped
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:call, :f, esc(Expr(:parameters, :y)), :x)) ≈
+            @ast_ [K"call"
+                "f"::K"Identifier"
+                "x"::K"Identifier"
+                [K"escape"
+                    [K"parameters"
+                        "y"::K"Identifier"
+                    ]
+                ]
+            ]
+
+        # `.+(x)` with operator escaped
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:call, esc(Symbol(".+")), :x)) ≈
+            @ast_ [K"dotcall"
+                [K"escape" "+"::K"Identifier"]
+                "x"::K"Identifier"
+            ]
+
+        # `let x \n end` with binding escaped
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:let, esc(:x), Expr(:block))) ≈
+            @ast_ [K"let"
+                [K"block" [K"escape" "x"::K"Identifier"]]
+                [K"block"]
+            ]
+
+        # `x .+ y` with .+ escaped
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:comparison, :x, esc(Symbol(".+")), :y)) ≈
+            @ast_ [K"comparison"
+                "x"::K"Identifier"
+                [K"."
+                    [K"escape" "+"::K"Identifier"]
+                ]
+                "y"::K"Identifier"
+            ]
+
+        # `@mac x` with macro name escaped
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:macrocall, esc(Symbol("@mac")), nothing, :x)) ≈
+            @ast_ [K"macrocall"
+                [K"escape" [K"macro_name" "mac"::K"Identifier"]]
+                "x"::K"Identifier"
+            ]
+
+        # `@mac x` with macro name escaped
+        @test JuliaLowering.expr_to_syntaxtree(
+            Expr(:macrocall, esc(Expr(:(.), :A, QuoteNode(Symbol("@mac")))), nothing, :x)
+        ) ≈ @ast_ [K"macrocall"
+            [K"escape"
+                [K"."
+                    "A"::K"Identifier"
+                    [K"macro_name" "mac"::K"Identifier"]
+                ]
+            ]
+            "x"::K"Identifier"
+        ]
+
+        # `x where y`
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:where, :x, esc(:y))) ≈
+            @ast_ [K"where"
+                "x"::K"Identifier"
+                [K"braces"
+                    [K"escape" "y"::K"Identifier"]
+                ]
+            ]
+
+        # Some weirdly placed esc's in try-catch
+        # `try body1 catch exc \n end`
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, :exc, esc(false))) ≈
+            @ast_ [K"try"
+                "body1"::K"Identifier"
+                [K"catch"
+                    "exc"::K"Identifier"
+                    "nothing"::K"core"
+                ]
+            ]
+        # `try body1 catch \n body2 \n end`
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, esc(false), :body2)) ≈
+            @ast_ [K"try"
+                "body1"::K"Identifier"
+                [K"catch"
+                    ""::K"Placeholder"
+                    "body2"::K"Identifier"
+                ]
+            ]
+        # `try body1 finally body2 end`
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, esc(false), esc(false), :body2)) ≈
+            @ast_ [K"try"
+                "body1"::K"Identifier"
+                [K"finally"
+                    "body2"::K"Identifier"
+                ]
+            ]
+
+        # `try body1 finally body2 end`
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, esc(false), esc(false), esc(false), :body2)) ≈
+            @ast_ [K"try"
+                "body1"::K"Identifier"
+                [K"else"
+                    "body2"::K"Identifier"
+                ]
+            ]
+
+        # [x ;;; y] with dim escaped
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:ncat, esc(3), :x, :y)) ≈
+            @ast_ [K"ncat"(syntax_flags=JuliaSyntax.set_numeric_flags(3))
+                "x"::K"Identifier"
+                "y"::K"Identifier"
+            ]
+
+        # T[x ;;; y] with dim escaped
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:typed_ncat, :T, esc(3), :x, :y)) ≈
+            @ast_ [K"typed_ncat"(syntax_flags=JuliaSyntax.set_numeric_flags(3))
+                "T"::K"Identifier"
+                "x"::K"Identifier"
+                "y"::K"Identifier"
+            ]
+
+        # One example of hygienic-scope (handled with the same mechanism as escape)
+        @test JuliaLowering.expr_to_syntaxtree(
+            Expr(:macrocall, Expr(:var"hygienic-scope", Symbol("@mac"), :other, :args), nothing, :x)) ≈
+            @ast_ [K"macrocall"
+                [K"hygienic_scope"
+                    [K"macro_name" "mac"::K"Identifier"]
+                    "other"::K"Identifier" # (<- normally a Module)
+                    "args"::K"Identifier" # (<- normally a LineNumberNode)
+                ]
+                "x"::K"Identifier"
+            ]
+
+        # One example of double escaping
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:macrocall, esc(esc(Symbol("@mac"))), nothing, :x)) ≈
+            @ast_ [K"macrocall"
+                [K"escape" [K"escape" [K"macro_name" "mac"::K"Identifier"]]]
+                "x"::K"Identifier"
+            ]
+
+        # One example of nested escape and hygienic-scope
+        @test JuliaLowering.expr_to_syntaxtree(
+            Expr(:macrocall,
+                 Expr(:var"hygienic-scope", esc(Symbol("@mac")), :other, :args),
+                 nothing,
+                 :x)) ≈
+            @ast_ [K"macrocall"
+                [K"hygienic_scope"
+                    [K"escape"
+                        [K"macro_name" "mac"::K"Identifier"]
+                    ]
+                    "other"::K"Identifier" # (<- normally a Module)
+                    "args"::K"Identifier" # (<- normally a LineNumberNode)
+                ]
+                "x"::K"Identifier"
+            ]
+
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:block, LineNumberNode(1))) ≈
+            @ast_ [K"block"]
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:block, esc(LineNumberNode(1)))) ≈
+            @ast_ [K"block"]
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:block, QuoteNode(LineNumberNode(1)))) ≈
+            @ast_ [K"block" LineNumberNode(1)::K"Value"]
+
+        # toplevel (and all other non-block forms) keep LineNumberNodes in value position
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:toplevel, esc(LineNumberNode(1)))) ≈
+            @ast_ [K"toplevel"  [K"escape" "nothing"::K"core"]]
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:toplevel, LineNumberNode(1))) ≈
+            @ast_ [K"toplevel" "nothing"::K"core"]
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:toplevel, QuoteNode(LineNumberNode(1)))) ≈
+            @ast_ [K"toplevel" LineNumberNode(1)::K"Value"]
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:call, :identity, LineNumberNode(1))) ≈
+            @ast_ [K"call" "identity"::K"Identifier" "nothing"::K"core"]
+        @test JuliaLowering.expr_to_syntaxtree(Expr(:call, :identity, QuoteNode(LineNumberNode(1)))) ≈
+            @ast_ [K"call" "identity"::K"Identifier" LineNumberNode(1)::K"Value"]
+
+    end
+end
diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl
new file mode 100644
index 0000000000000..a0e3aaf3e2fa6
--- /dev/null
+++ b/JuliaLowering/test/decls.jl
@@ -0,0 +1,129 @@
+@testset "Declarations" begin
+
+test_mod = Module()
+
+@test JuliaLowering.include_string(test_mod, """
+begin
+    local x::Int = 1.0
+    x
+end
+""") === 1
+
+# In value position, yield the right hand side, not `x`
+@test JuliaLowering.include_string(test_mod, """
+begin
+    local x::Int = 1.0
+end
+""") === 1.0
+
+# Global decl in value position without assignment returns nothing
+@test JuliaLowering.include_string(test_mod, "global x_no_assign") === nothing
+
+# Unadorned declarations
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = 0.0
+    x::Int = a
+    x
+end
+""") === 0
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    local x::Int = 1
+    x1 = x
+    x = 20.0
+    x2 = x
+    (x1,x2)
+end
+""") === (1, 20)
+
+# Global const mixes
+@test JuliaLowering.include_string(test_mod, "global x_g = 1") === 1
+@test Base.isdefinedglobal(test_mod, :x_g)
+@test !Base.isconst(test_mod, :x_g)
+@test test_mod.x_g === 1
+
+@test JuliaLowering.include_string(test_mod, "const x_c = 1") === 1
+@test Base.isdefinedglobal(test_mod, :x_c)
+@test Base.isconst(test_mod, :x_c)
+@test test_mod.x_c === 1
+
+@test JuliaLowering.include_string(test_mod, "global const x_gc = 1") === 1
+@test Base.isdefinedglobal(test_mod, :x_gc)
+@test Base.isconst(test_mod, :x_gc)
+@test test_mod.x_gc === 1
+
+@test JuliaLowering.include_string(test_mod, "const global x_cg = 1") === 1
+@test Base.isdefinedglobal(test_mod, :x_cg)
+@test Base.isconst(test_mod, :x_cg)
+@test test_mod.x_cg === 1
+# Possibly worth testing excessive global/const keywords or invalid combinations
+# (local + global/const) once we decide whether that's a parse error or a
+# lowering error
+
+# Global decls with types
+@test JuliaLowering.include_string(test_mod, """
+global a_typed_global::Int = 10.0
+""") === 10.0
+@test Core.get_binding_type(test_mod, :a_typed_global) === Int
+@test test_mod.a_typed_global === 10
+
+# Also allowed in nontrivial scopes in a top level thunk
+@test JuliaLowering.include_string(test_mod, """
+let
+    global a_typed_global_2::Int = 10.0
+end
+""") === 10.0
+@test Core.get_binding_type(test_mod, :a_typed_global_2) === Int
+@test test_mod.a_typed_global_2 === 10
+
+@test JuliaLowering.include_string(test_mod, "const x_c_T::Int = 9") === 9
+@test Base.isdefinedglobal(test_mod, :x_c_T)
+@test Base.isconst(test_mod, :x_c_T)
+
+@testset "typed const redeclaration" begin
+    # redeclaration of the same value used to be allowed
+    @test_throws ErrorException JuliaLowering.include_string(test_mod, "x_c_T = 9")
+    @test_throws ErrorException JuliaLowering.include_string(test_mod, "x_c_T = 10")
+    # redeclaration with const should be OK
+    @test JuliaLowering.include_string(test_mod, "const x_c_T::Int = 0") === 0
+end
+
+# Tuple/destructuring assignments
+@test JuliaLowering.include_string(test_mod, "(a0, a1, a2) = [1,2,3]") == [1,2,3]
+@test JuliaLowering.include_string(test_mod, "const a,b,c = 1,2,3") === (1, 2, 3)
+
+@testset "Placeholder decls" begin
+    @test JuliaLowering.include_string(test_mod, "global _ = 1") === 1
+    @test JuliaLowering.include_string(test_mod, "global _::Int = 1") === 1
+    @test JuliaLowering.include_string(test_mod, "let; local _; _ = 1; end") === 1
+    @test JuliaLowering.include_string(test_mod, "let; local _::Int = 1; end") === 1
+    @test JuliaLowering.include_string(test_mod, "let; local (a0, _, a2) = [1,2,3]; end") == [1,2,3]
+    @test JuliaLowering.include_string(test_mod, "let; local (a0, _::Int, a2) = [1,2,3]; end") == [1,2,3]
+end
+
+test_mod_2 = Module()
+@testset "toplevel-preserving syntax" begin
+    JuliaLowering.include_string(test_mod_2, "if true; global v1::Bool; else const v1 = 1; end")
+    @test !isdefined(test_mod_2, :v1)
+    @test Base.binding_kind(test_mod_2, :v1) == Base.PARTITION_KIND_GLOBAL
+    @test Core.get_binding_type(test_mod_2, :v1) == Bool
+
+    JuliaLowering.include_string(test_mod_2, "if false; global v2::Bool; else const v2 = 2; end")
+    @test test_mod_2.v2 === 2
+    @test Base.binding_kind(test_mod_2, :v2) == Base.PARTITION_KIND_CONST
+
+    JuliaLowering.include_string(test_mod_2, "v3 = if true; global v4::Bool; 4 else const v4 = 5; 6; end")
+    @test test_mod_2.v3 == 4
+    @test !isdefined(test_mod_2, :v4)
+    @test Base.binding_kind(test_mod_2, :v4) == Base.PARTITION_KIND_GLOBAL
+    @test Core.get_binding_type(test_mod_2, :v4) == Bool
+
+    JuliaLowering.include_string(test_mod_2, "v5 = if false; global v6::Bool; 4 else const v6 = 5; 6; end")
+    @test test_mod_2.v5 === 6
+    @test test_mod_2.v6 === 5
+    @test Base.binding_kind(test_mod_2, :v6) == Base.PARTITION_KIND_CONST
+end
+
+end
diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl
new file mode 100644
index 0000000000000..1092b4d70d3f5
--- /dev/null
+++ b/JuliaLowering/test/decls_ir.jl
@@ -0,0 +1,299 @@
+########################################
+# Local declaration with type
+begin
+    local x::T = 1
+end
+#---------------------
+1   (newvar slot₁/x)
+2   1
+3   TestMod.T
+4   (= slot₂/tmp %₂)
+5   slot₂/tmp
+6   (call core.isa %₅ %₃)
+7   (gotoifnot %₆ label₉)
+8   (goto label₁₂)
+9   slot₂/tmp
+10  (call top.convert %₃ %₉)
+11  (= slot₂/tmp (call core.typeassert %₁₀ %₃))
+12  slot₂/tmp
+13  (= slot₁/x %₁₂)
+14  (return %₂)
+
+########################################
+# Error: Local declarations outside a scope are disallowed
+# See https://github.com/JuliaLang/julia/issues/57483
+local x
+#---------------------
+LoweringError:
+local x
+└─────┘ ── local declarations have no effect outside a scope
+
+########################################
+# Local declaration allowed in tail position
+begin
+    local x
+end
+#---------------------
+1   (newvar slot₁/x)
+2   (return core.nothing)
+
+########################################
+# Local declaration allowed in value position
+# TODO: This may be a bug in flisp lowering - should we reconsider this?
+let
+    y = local x
+end
+#---------------------
+1   (newvar slot₁/x)
+2   core.nothing
+3   (= slot₂/y %₂)
+4   (return %₂)
+
+########################################
+# Global declaration allowed in tail position
+global x
+#---------------------
+1   (call core.declare_global TestMod :x false)
+2   latestworld
+3   (return core.nothing)
+
+########################################
+# Global declaration allowed in tail position, nested
+begin
+    global x
+end
+#---------------------
+1   (call core.declare_global TestMod :x false)
+2   latestworld
+3   (return core.nothing)
+
+########################################
+# Error: Global declaration not allowed in tail position in functions
+function f()
+    global x
+end
+#---------------------
+LoweringError:
+function f()
+    global x
+#          ╙ ── global declaration doesn't read the variable and can't return a value
+end
+
+########################################
+# Error: Global declaration not allowed in value position
+y = global x
+#---------------------
+LoweringError:
+y = global x
+#          ╙ ── global declaration doesn't read the variable and can't return a value
+
+########################################
+# const
+const xx = 10
+#---------------------
+1   10
+2   (call core.declare_const TestMod :xx %₁)
+3   latestworld
+4   (return %₁)
+
+########################################
+# Typed const
+const xx::T = 10
+#---------------------
+1   TestMod.T
+2   (= slot₁/tmp 10)
+3   slot₁/tmp
+4   (call core.isa %₃ %₁)
+5   (gotoifnot %₄ label₇)
+6   (goto label₁₀)
+7   slot₁/tmp
+8   (call top.convert %₁ %₇)
+9   (= slot₁/tmp (call core.typeassert %₈ %₁))
+10  slot₁/tmp
+11  (call core.declare_const TestMod :xx %₁₀)
+12  latestworld
+13  (return %₁₀)
+
+########################################
+# Const tuple
+const xxx,xxxx,xxxxx = 10,20,30
+#---------------------
+1   10
+2   (call core.declare_const TestMod :xxx %₁)
+3   latestworld
+4   20
+5   (call core.declare_const TestMod :xxxx %₄)
+6   latestworld
+7   30
+8   (call core.declare_const TestMod :xxxxx %₇)
+9   latestworld
+10  (call core.tuple 10 20 30)
+11  (return %₁₀)
+
+########################################
+# Const in chain: only first is const
+const c0 = v0 = v1 = 123
+#---------------------
+1   123
+2   (call core.declare_const TestMod :c0 %₁)
+3   latestworld
+4   (call core.declare_global TestMod :v0 true)
+5   latestworld
+6   (call core.get_binding_type TestMod :v0)
+7   (= slot₁/tmp %₁)
+8   slot₁/tmp
+9   (call core.isa %₈ %₆)
+10  (gotoifnot %₉ label₁₂)
+11  (goto label₁₄)
+12  slot₁/tmp
+13  (= slot₁/tmp (call top.convert %₆ %₁₂))
+14  slot₁/tmp
+15  (call core.setglobal! TestMod :v0 %₁₄)
+16  (call core.declare_global TestMod :v1 true)
+17  latestworld
+18  (call core.get_binding_type TestMod :v1)
+19  (= slot₂/tmp %₁)
+20  slot₂/tmp
+21  (call core.isa %₂₀ %₁₈)
+22  (gotoifnot %₂₁ label₂₄)
+23  (goto label₂₆)
+24  slot₂/tmp
+25  (= slot₂/tmp (call top.convert %₁₈ %₂₄))
+26  slot₂/tmp
+27  (call core.setglobal! TestMod :v1 %₂₆)
+28  (return %₁)
+
+########################################
+# Global assignment
+xx = 10
+#---------------------
+1   (call core.declare_global TestMod :xx true)
+2   latestworld
+3   (call core.get_binding_type TestMod :xx)
+4   (= slot₁/tmp 10)
+5   slot₁/tmp
+6   (call core.isa %₅ %₃)
+7   (gotoifnot %₆ label₉)
+8   (goto label₁₁)
+9   slot₁/tmp
+10  (= slot₁/tmp (call top.convert %₃ %₉))
+11  slot₁/tmp
+12  (call core.setglobal! TestMod :xx %₁₁)
+13  (return 10)
+
+########################################
+# Typed global assignment
+global xx::T = 10
+#---------------------
+1   (call core.declare_global TestMod :xx false)
+2   latestworld
+3   TestMod.T
+4   (call core.declare_global TestMod :xx true %₃)
+5   latestworld
+6   (call core.declare_global TestMod :xx true)
+7   latestworld
+8   (call core.get_binding_type TestMod :xx)
+9   (= slot₁/tmp 10)
+10  slot₁/tmp
+11  (call core.isa %₁₀ %₈)
+12  (gotoifnot %₁₁ label₁₄)
+13  (goto label₁₆)
+14  slot₁/tmp
+15  (= slot₁/tmp (call top.convert %₈ %₁₄))
+16  slot₁/tmp
+17  (call core.setglobal! TestMod :xx %₁₆)
+18  (return 10)
+
+########################################
+# Error: x declared twice
+begin
+    local x::T = 1
+    local x::S = 1
+end
+#---------------------
+LoweringError:
+begin
+    local x::T = 1
+    local x::S = 1
+#        └───────┘ ── multiple type declarations found for `x`
+end
+
+########################################
+# Error: Const not supported on locals
+const local x = 1
+#---------------------
+LoweringError:
+const local x = 1
+└───────────────┘ ── unsupported `const local` declaration
+
+########################################
+# Error: Const not supported on locals
+let
+    const x = 1
+end
+#---------------------
+LoweringError:
+let
+    const x = 1
+#        └────┘ ── unsupported `const` declaration on local variable
+end
+
+########################################
+# Type decl on function argument
+function f(x)
+    x::Int = 1
+    x = 2.0
+    x
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ core.Any)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read) slot₄/tmp(!read)]
+    1   1
+    2   TestMod.Int
+    3   (= slot₃/tmp %₁)
+    4   slot₃/tmp
+    5   (call core.isa %₄ %₂)
+    6   (gotoifnot %₅ label₈)
+    7   (goto label₁₁)
+    8   slot₃/tmp
+    9   (call top.convert %₂ %₈)
+    10  (= slot₃/tmp (call core.typeassert %₉ %₂))
+    11  slot₃/tmp
+    12  (= slot₂/x %₁₁)
+    13  2.0
+    14  TestMod.Int
+    15  (= slot₄/tmp %₁₃)
+    16  slot₄/tmp
+    17  (call core.isa %₁₆ %₁₄)
+    18  (gotoifnot %₁₇ label₂₀)
+    19  (goto label₂₃)
+    20  slot₄/tmp
+    21  (call top.convert %₁₄ %₂₀)
+    22  (= slot₄/tmp (call core.typeassert %₂₁ %₁₄))
+    23  slot₄/tmp
+    24  (= slot₂/x %₂₃)
+    25  slot₂/x
+    26  (return %₂₅)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Error: global type decls only allowed at top level
+function f()
+    global x::Int = 1
+end
+#---------------------
+LoweringError:
+function f()
+    global x::Int = 1
+#         └─────────┘ ── type declarations for global variables must be at top level, not inside a function
+end
diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl
new file mode 100644
index 0000000000000..0b2fe25a82cc2
--- /dev/null
+++ b/JuliaLowering/test/demo.jl
@@ -0,0 +1,910 @@
+# Just some hacking
+
+using JuliaSyntax
+using JuliaLowering
+
+using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, is_leaf, @ast, numchildren, children, child, setattr!, sourceref, makenode, sourcetext, showprov, lookup_binding
+
+using JuliaSyntaxFormatter
+
+# Extract variable kind for highlighting purposes
+function var_kind(ctx, ex)
+    id = get(ex, :var_id, nothing)
+    if isnothing(id)
+        return nothing
+    end
+    binfo = lookup_binding(ctx, id)
+    return binfo.kind == :local ?
+        (binfo.is_captured ? :local_captured : :local) :
+        binfo.kind
+end
+
+# Extract module of globals for highlighting
+function var_mod(ctx, ex)
+    id = get(ex, :var_id, nothing)
+    if isnothing(id)
+        return nothing
+    end
+    return lookup_binding(ctx, id).mod
+end
+
+function formatsrc(ex; kws...)
+    Text(JuliaSyntaxFormatter.formatsrc(ex; kws...))
+end
+
+function debug_lower(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false, verbose::Bool=false, do_eval::Bool=false)
+    ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex, expr_compat_mode, Base.get_world_counter())
+
+    verbose && @info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer)
+
+    ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand)
+    verbose && @info "Desugared" formatsrc(ex_desugar, color_by=:scope_layer)
+
+    ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar)
+    verbose && @info "Resolved scopes" formatsrc(ex_scoped, color_by=e->var_kind(ctx2,e))
+
+    ctx4, ex_converted = JuliaLowering.convert_closures(ctx3, ex_scoped)
+    verbose && @info "Closure converted" formatsrc(ex_converted, color_by=:var_id)
+
+    ctx5, ex_compiled = JuliaLowering.linearize_ir(ctx4, ex_converted)
+    verbose && @info "Linear IR" formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled))
+
+    ex_expr = JuliaLowering.to_lowered_expr(ex_compiled)
+    verbose && @info "CodeInfo" ex_expr
+
+    if do_eval
+        eval_result = Base.eval(mod, ex_expr)
+        verbose && @info "Eval" eval_result
+    else
+        eval_result = nothing
+    end
+
+    (ctx1, ex_macroexpand, ctx2, ex_desugar, ctx3, ex_scoped, ctx4, ex_converted, ctx5, ex_compiled, ex_expr, eval_result)
+end
+
+
+# Currently broken - need to push info back onto src
+# function annotate_scopes(mod, ex)
+#     ex = ensure_attributes(ex, var_id=Int)
+#     ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex, false)
+#     ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand)
+#     ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar)
+#     ex
+# end
+
+#-------------------------------------------------------------------------------
+# Module containing macros used in the demo.
+define_macros = false
+if !define_macros
+    eval(:(module M end))
+else
+eval(JuliaLowering.@SyntaxTree :(baremodule M
+    using Base
+
+    using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope, MacroExpansionError, makenode
+    using JuliaSyntax
+    using JuliaLowering: @inert, @label, @goto, @islocal
+    using Base: @locals
+
+    macro K_str(str)
+        JuliaSyntax.Kind(str)
+    end
+
+    # Introspection
+    macro __MODULE__()
+        __context__.scope_layer.mod
+    end
+
+    macro __FILE__()
+        JuliaLowering.filename(__context__.macrocall)
+    end
+
+    macro __LINE__()
+        JuliaLowering.source_location(__context__.macrocall)[1]
+    end
+
+    # Macro with local variables
+    module A
+        another_global = "global in A"
+
+        macro bar(ex)
+            quote
+                x = "`x` in @bar"
+                (x, another_global, $ex)
+            end
+        end
+    end
+
+    someglobal = "global in module M"
+
+    # Macro with local variables
+    macro foo(ex)
+        quote
+            x = "`x` from @foo"
+            (x, someglobal, A.@bar $ex)
+            #(x, someglobal, $ex, A.@bar($ex), A.@bar(x))
+        end
+    end
+
+    macro call_show(x)
+        quote
+            z = "z in @call_show"
+            @show z $x
+        end
+    end
+
+    macro call_info(x)
+        quote
+            z = "z in @call_info"
+            @info "hi" z $x
+        end
+    end
+
+    macro call_oldstyle_macro(y)
+        quote
+            x = "x in call_oldstyle_macro"
+            @oldstyle $y x
+        end
+    end
+
+    macro newstyle(x, y, z)
+        quote
+            x = "x in @newstyle"
+            ($x, $y, $z, x)
+        end
+    end
+
+    macro set_a_global(val)
+        quote
+            global a_global = $val
+        end
+    end
+
+    macro set_global_in_parent(ex)
+        e1 = adopt_scope(:(sym_introduced_from_M), __context__)
+        quote
+            $e1 = $ex
+        end
+    end
+
+    macro baz(ex)
+        quote
+            let $ex = 10
+                $ex
+            end
+        end
+    end
+
+    macro make_module()
+        :(module X
+              blah = 10
+          end)
+    end
+
+    macro return_a_value()
+        42
+    end
+
+    macro nested_return_a_value()
+        :(
+            @return_a_value
+        )
+    end
+
+    macro inner()
+        :(2)
+    end
+
+    macro outer()
+        :((1, @inner))
+    end
+
+    macro K_str(str)
+        JuliaSyntax.Kind(str[1].value)
+    end
+
+    # Recursive macro call
+    macro recursive(N)
+        Nval = if kind(N) == K"Integer" || kind(N) == K"Value"
+            N.value
+        end
+        if !(Nval isa Integer)
+            throw(MacroExpansionError(N, "argument must be an integer"))
+        end
+        if Nval < 1
+            return N
+        end
+        quote
+            x = $N
+            (@recursive($(Nval-1)), x)
+        end
+    end
+
+    xx = "xx in M"
+
+    macro test_inert_quote()
+        println(xx)
+        @inert quote
+            ($xx, xx)
+        end
+    end
+
+    macro mmm(ex)
+        :(let
+              local x
+              function f()
+                  (x, $ex)
+              end
+              f()
+          end)
+    end
+
+end))
+end
+
+Base.eval(M, :(
+macro oldstyle(a, b)
+    quote
+        x = "x in @oldstyle"
+        @newstyle $(esc(a)) $(esc(b)) x
+    end
+end
+))
+
+#
+#-------------------------------------------------------------------------------
+# Demos of the prototype
+
+# src = """
+# let
+#     local x, (y = 2), (w::T = ww), q::S
+# end
+# """
+
+# src = """
+# function foo(x::f(T), y::w(let ; S end))
+#     "a \$("b \$("c")")"
+# end
+# """
+
+src = """
+begin
+    function f(x)
+        nothing
+    end
+
+    f(1)
+end
+"""
+
+# src = """
+#     x + y
+# """
+
+# src = """
+# module A
+#     function f(x)::Int
+#         x + 1
+#     end
+#
+#     b = f(2)
+# end
+# """
+
+# src = """
+# function f()
+# end
+# """
+#
+# src = """
+# # import A.B: C.c as d, E.e as f
+# # import JuliaLowering
+# using JuliaLowering
+# """
+#
+# src = """
+# module A
+#     z = 1 + 1
+# end
+# """
+
+src = raw"""
+begin
+    x = 10
+    y = :(g(z))
+    quote
+        f($(x+1), $y)
+    end
+end
+"""
+
+function wrapscope(ex, scope_type)
+    makenode(ex, ex, K"scope_block", ex; scope_type=scope_type)
+end
+
+function softscope_test(ex)
+    g = ensure_attributes(ex._graph, scope_type=Symbol)
+    wrapscope(wrapscope(JuliaLowering.reparent(g, ex), :neutral), :soft)
+end
+
+# src = """
+# M.@test_inert_quote()
+# """
+
+# src = """
+# macro mmm(a; b=2)
+# end
+# macro A.b(ex)
+# end
+# """
+
+# src = """
+# M.@set_global_in_parent "bent hygiene!"
+# """
+
+# src = """
+# begin
+# M.@__LINE__
+# end
+# """
+
+# src = """@foo z"""
+
+src = """
+M.@recursive 3
+"""
+
+# src = """
+# M.@set_global_in_parent "bent hygiene!"
+# """
+
+# src = """
+# begin
+#    x = 10
+#    y = 20
+#    let x = y + x
+#        z = "some string \$x \$y"
+#
+#        function f(y)
+#            a = M.@foo z
+#            "\$z \$y \$a \$x"
+#        end
+#        print(x)
+#    end
+#    print(x)
+# end
+# """
+
+# src = """
+# begin
+#     x = -1
+#     M.@baz x
+# end
+# """
+
+# src = """
+#     _ = -1
+# """
+
+# src = """
+# M.@make_module
+# """
+
+# src = """
+# M.@nested_return_a_value
+# """
+
+# src = """
+# function f(y)
+#     x = 42 + y
+#     M.@foo error(x)
+# end
+# """
+
+src = """
+let
+    y = 0
+    x = 1
+    let x = x + 1
+        y = x
+    end
+    (x, y)
+end
+"""
+
+#src = """M.@outer"""
+
+src = """
+begin
+    local a, b, c
+    if a
+        b
+    else
+        c
+    end
+end
+"""
+
+src = """
+begin
+    local i = 0
+    while i < 10
+        i = i + 1
+        if isodd(i)
+            continue
+        end
+        println(i)
+    end
+end
+"""
+
+src = """
+for i in [3,1,2]
+    println("i = ", i, ", j = ", j)
+end
+"""
+
+# src = """
+# @ccall f()::T
+# """
+#
+# src = """
+# begin
+#     a = 1
+#     xs = [:(a),]
+#     x = :(:(\$(\$(xs...))))
+# end
+# """
+
+# src = """
+# try
+#     a
+# catch exc
+#     b
+# end
+# """
+
+src = """
+let
+    a = []
+    for i = 1:2, j = 3:4
+        push!(a, (i,j))
+        i = 100
+    end
+    a
+end
+"""
+
+src = """
+begin
+    function f(x)
+        y = x + 1
+        "hi", x, y
+    end
+
+    f(1)
+end
+"""
+
+src = """
+let
+    x = try
+        error("hi")
+        1
+    catch exc
+        current_exceptions()
+    else
+        3
+    end
+    x
+end
+"""
+
+src = """
+function f(y)
+    x =
+    try
+        try
+            error("hi")
+            1
+        catch exc
+            if y
+                return 2
+            end
+            3
+        else
+            4
+        end
+    catch
+        5
+    end
+    x
+end
+"""
+
+src = """
+function f(x)::Int
+    if x
+        42.0
+    end
+    0xff
+end
+"""
+
+src = """
+let x = 10
+    global a = []
+    try
+        try
+            return 100
+        finally
+            push!(a, 1)
+        end
+    finally
+        push!(a, 2)
+    end
+    x
+end
+"""
+
+src = """
+let
+    for outer i = 1:2
+        body
+    end
+end
+"""
+
+src = """
+let
+    i = "hi"
+    j = 1
+    M.@label foo
+    try
+        println("i = ", i)
+        i = i + 1
+        if i <= 2
+            M.@goto foo
+        end
+    catch exc
+        println("Caught exception ", exc)
+        j = j + 1
+        if j <= 2
+            println("Trying again ", exc)
+            M.@goto foo
+        end
+    end
+end
+"""
+
+src = """
+let
+    M.@goto foo
+    M.@label foo
+end
+"""
+
+src = """
+x = M.@label foo
+"""
+
+src = """
+begin
+    local x::T = 1
+    local x::S = 1
+end
+"""
+
+src = """
+begin
+    local a, b
+    if a
+        b
+    end
+end
+"""
+
+src = """
+let
+    A{S} = B{S}
+end
+"""
+
+src = """
+let
+    a = b = c = sin(1)
+    (a,b,c)
+end
+"""
+
+src = """
+a.b = c
+"""
+
+src = """
+a[i j] = c
+"""
+
+src = """
+let
+    as = [1,2,3,4]
+    (x,ys...,z) = as
+    (x,ys,z)
+end
+"""
+
+src = """
+let
+    x = (1,2)
+    (y,x) = x
+    (x,y)
+end
+"""
+
+src = """
+let
+    a = b = c = sin(1)
+    (a,b,c)
+end
+"""
+
+src = """
+begin
+    as = [(1,2), (3,4)]
+    ((x,y), (z,w)) = as
+end
+"""
+
+src = """
+let
+(x, y) = (y,x)
+end
+"""
+
+src = """
+let x = 1
+    M.@islocal x
+end
+"""
+
+src = """
+let x = 1
+    local y
+    M.@locals
+end
+"""
+
+src = """
+let
+    (a, bs...,) = (1,2,3)
+    bs
+end
+"""
+
+src = """
+(; a=1, a=2)
+"""
+
+src = """
+begin
+    kws = (c=3, d=4)
+    xs = 1:3
+    f(xs...; kws..., a=1, b=2)
+end
+"""
+
+src = """
+"some docs"
+function f()
+    println("hi")
+end
+"""
+
+src = """
+function f(::T, ::U, ::S) where T where {U,S}
+    println(T)
+    println(U)
+    println(S)
+end
+"""
+
+src = """
+function (x::XXX)(y)
+    println("hi", " ", x, " ", y)
+end
+"""
+
+src = """
+struct X
+    x
+    y::String
+end
+"""
+
+src = """
+struct X{U,V}
+    x::U
+    y::V
+end
+"""
+
+src = """
+struct S9{T}
+    x
+    y
+
+    "Docs for S9"
+    S9{Int}(xs) = new(xs...)
+end
+"""
+
+# Default positional args with missing arg names
+src = """
+function f(::Int, y=1, z=2)
+    (y, z)
+end
+"""
+
+# Default positional args with placeholders
+src = """
+function f(_::Int, x=1)
+    x
+end
+"""
+
+# Positional args and type parameters with transitive dependencies
+# Bug in flisp lowering - see https://github.com/JuliaLang/julia/issues/49275
+src = """
+function f(x, y::S=[1], z) where {T, S<:AbstractVector{T}}
+    (x, y, z, T)
+end
+"""
+
+# Default positional args before trailing slurp are allowed
+src = """
+function f(x=1, ys...)
+    ys
+end
+"""
+
+# Default positional args after a slurp is an error
+src = """
+function f(x=1, ys..., z=2)
+    ys
+end
+"""
+
+# Positional arg with slurp and default
+src = """
+function f(x=1, ys...="hi")
+    ys
+end
+"""
+
+# Positional arg with slurp and splat
+src = """
+function f(x=1, ys...=(1,2)...)
+    ys
+end
+"""
+
+src = """
+let
+    x = 10
+    function f(y)
+        x + y
+    end
+end
+"""
+
+src = """
+begin
+    local f, set_x
+    local x = 10
+    local y = 100
+    function f()
+        z = 1 + y - x
+        z
+    end
+    function set_x()
+        x = 1
+    end
+    println("f = ", f())
+    set_x()
+    y = 10
+    println("f = ", f())
+end
+"""
+
+# TODO: fix this - it's interpreted in a bizarre way as a kw call.
+# src = """
+# function f(x=y=1)
+#     x
+# end
+# """
+
+function gen_stuff(ctx, N, x)
+    JuliaLowering.@ast ctx ctx.macrocall [K"tuple"
+        (i::K"Integer" for i in 1:N)...
+    ]
+end
+
+src = raw"""
+function gen(x::NTuple{N}) where {N}
+    nongen_stuff = :nongen
+    if @generated
+        quote
+            maybe_gen_stuff = ($N, $x)
+        end
+    else
+        maybe_gen_stuff = :nongen_2
+    end
+    (nongen_stuff, maybe_gen_stuff)
+end
+"""
+
+src = raw"""
+begin
+    function partially_gen(x::NTuple{N,T}) where {N,T}
+        shared = :shared_stuff
+        if @generated
+            quote
+                unshared = ($x, $N, $T)
+            end
+        else
+            # Uuuum. How do we test both sides of this branch??
+            unshared = :nongen # (typeof(x), N, T)
+        end
+        (shared, unshared)
+    end
+
+    partially_gen((1,2,3,4,5))
+end
+"""
+
+src = """
+let
+    z = "z in outer ctx"
+    @call_show z
+end
+"""
+
+src = """
+let
+    x = "x in outer ctx"
+    @call_oldstyle_macro x
+end
+"""
+
+src = """
+let
+    z = "z in outer ctx"
+    @call_info z
+end
+"""
+
+ex = parsestmt(SyntaxTree, src, filename="foo.jl")
+#ex = ensure_attributes(ex, var_id=Int)
+#ex = softscope_test(ex)
+@info "Input code" formatsrc(ex)
+
+(ctx1, ex_macroexpand,
+ ctx2, ex_desugar,
+ ctx3, ex_scoped,
+ ctx4, ex_converted,
+ ctx5, ex_compiled,
+ ex_expr, eval_result) = debug_lower(M, ex; verbose=true, do_eval=true)
+
+# Automatic test reduction
+# bad = reduce_any_failing_toplevel(JuliaLowering, joinpath(@__DIR__, "../src/desugaring.jl"))
+# if !isnothing(bad)
+#     @error "Reduced expression as code" formatsrc(bad)
+#     write("bad.jl", JuliaSyntaxFormatter.formatsrc(bad))
+# end
+
+# Old lowering
+# text = read(joinpath(@__DIR__, "../src/desugaring.jl"), String)
+# ex = parseall(SyntaxTree, text, filename="desugaring.jl")
+# for e in Meta.parseall(text).args
+#     Meta.lower(JuliaLowering, e)
+# end
diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl
new file mode 100644
index 0000000000000..6158d8bc28ebf
--- /dev/null
+++ b/JuliaLowering/test/destructuring.jl
@@ -0,0 +1,225 @@
+@testset "Destructuring" begin
+
+test_mod = Module()
+
+@testset "Destructuring via iteration" begin
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    as = [1,2,3]
+    (x,y) = as
+    (x,y)
+end
+""") == (1,2)
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    as = [1,2,3]
+    (x,ys...) = as
+    (x,ys)
+end
+""") == (1, [2,3])
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    as = [1,2,3,4]
+    (x,ys...,z) = as
+    (x,ys,z)
+end
+""") == (1, [2, 3], 4)
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    as = [1,2,3,4]
+    (xs...,y) = as
+    (xs,y)
+end
+""") == ([1, 2, 3], 4)
+
+# Case where indexed_iterate is just iteration
+@test JuliaLowering.include_string(test_mod, """
+let
+    (x,ys...,z) = "aβcδe"
+    (x,ys,z)
+end
+""") == ('a', "βcδ", 'e')
+
+
+# Use in value position yields rhs
+@test JuliaLowering.include_string(test_mod, """
+let
+    as = [1,2]
+    zs = begin
+        (x,y) = as
+    end
+    (x,y, as === zs)
+end
+""") == (1, 2, true)
+
+# lhs variable name in rhs
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = (1,2)
+    (x,y) = x
+    (x,y)
+end
+""") == (1, 2)
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = (1,2)
+    (x...,y) = x
+    (x,y)
+end
+""") == ((1,), 2)
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    zs = [(1,2), (3,(4,5))]
+    ((a,b), (c,(d,e))) = zs
+    (a,b,c,d,e)
+end
+""") == (1,2,3,4,5)
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    zs = [[1,2,3], 4]
+    ((a,bs...), c) = zs
+    (a, bs, c)
+end
+""") == (1, [2,3], 4)
+
+end
+
+
+@testset "Tuple elimination with tuples on both sides" begin
+
+# Simple case
+@test JuliaLowering.include_string(test_mod, """
+let a = 1, b = 2
+    (x,y) = (a,b)
+    (x,y)
+end
+""") == (1, 2)
+
+# lhs variable name in rhs
+@test JuliaLowering.include_string(test_mod, """
+let x = 1, y = 2
+    (x,y) = (y,x)
+    (x,y)
+end
+""") == (2, 1)
+
+# Slurps and splats
+
+@test JuliaLowering.include_string(test_mod, """
+let a = 1, b = 2, c = 3
+    (x, ys..., z) = (a, b, c)
+    (x, ys, z)
+end
+""") == (1, (2,), 3)
+
+@test JuliaLowering.include_string(test_mod, """
+let a = 1, b = 2, cs = (3,4)
+    (x, ys...) = (a, b, cs...)
+    (x, ys)
+end
+""") == (1, (2,3,4))
+
+@test JuliaLowering.include_string(test_mod, """
+let a = 1, bs = (2,3), c = 4
+    (x, ys...) = (a, bs..., c)
+    (x, ys)
+end
+""") == (1, (2,3,4))
+
+@test JuliaLowering.include_string(test_mod, """
+let a = 1, b = 2, cs = (3,4)
+    (x, ys..., z) = (a, b, cs...)
+    (x, ys, z)
+end
+""") == (1, (2,3), 4)
+
+@test JuliaLowering.include_string(test_mod, """
+let a = 1
+    (x, ys...) = (a,)
+    (x, ys)
+end
+""") == (1, ())
+
+# dotted rhs in last place
+@test JuliaLowering.include_string(test_mod, """
+let
+    rh = (2, 3)
+    (x,y,z) = (1,rh...)
+    (x,y,z)
+end
+""") == (1, 2, 3)
+
+# in value position
+@test JuliaLowering.include_string(test_mod, """
+let
+    rh = (2, 3)
+    (x,y) = (1,rh...)
+end
+""") == (1, 2, 3)
+
+# Side effects in the right hand tuple can affect the previous left hand side
+# bindings, for example, `x`, below. In this case we need to ensure `f()` is
+# called before `x` is assigned the value from the right hand side.
+# (the flisp implementation fails this test.)
+@test JuliaLowering.include_string(test_mod, """
+let
+   function f()
+       x=100
+       2
+   end
+   (x,y) = (1,f())
+   x,y
+end
+""") == (1,2)
+
+# `x` is not assigned and no side effect from `f()` happens when the right hand
+# side throws an UndefVarError
+@test JuliaLowering.include_string(test_mod, """
+let x=1, y=2, z=3, side_effect=false, a
+    exc = try
+        function f()
+            side_effect=true
+        end
+        (x,y,z) = (100, a, f())
+    catch e
+        e
+    end
+    (x, y, z, side_effect, exc.var)
+end
+""") == (1, 2, 3, false, :a)
+
+# Require that rhs is evaluated before any assignments, thus `x` is not defined
+# here because accessing `a` first throws an UndefVarError
+@test JuliaLowering.include_string(test_mod, """
+let x, y, a
+    try
+        (x, y) = (1, a)
+    catch
+    end
+    @isdefined(x)
+end
+""") == false
+
+end
+
+
+@testset "Property destructuring" begin
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    ab = (a=1, b=2)
+    (; a, b) = ab
+    (a, b)
+end
+""") == (1, 2)
+
+end
+
+end
diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl
new file mode 100644
index 0000000000000..990096a87e916
--- /dev/null
+++ b/JuliaLowering/test/destructuring_ir.jl
@@ -0,0 +1,387 @@
+########################################
+# Simple destructuring
+let
+    (x,y) = as
+end
+#---------------------
+1   TestMod.as
+2   (call top.indexed_iterate %₁ 1)
+3   (= slot₂/x (call core.getfield %₂ 1))
+4   (= slot₁/iterstate (call core.getfield %₂ 2))
+5   TestMod.as
+6   slot₁/iterstate
+7   (call top.indexed_iterate %₅ 2 %₆)
+8   (= slot₃/y (call core.getfield %₇ 1))
+9   TestMod.as
+10  (return %₉)
+
+########################################
+# Trivial slurping
+let
+    (xs...,) = as
+end
+#---------------------
+1   TestMod.as
+2   (= slot₁/xs (call top.rest %₁))
+3   TestMod.as
+4   (return %₃)
+
+########################################
+# Slurping last arg
+let
+    (x, ys...) = as
+end
+#---------------------
+1   TestMod.as
+2   (call top.indexed_iterate %₁ 1)
+3   (= slot₂/x (call core.getfield %₂ 1))
+4   (= slot₁/iterstate (call core.getfield %₂ 2))
+5   TestMod.as
+6   slot₁/iterstate
+7   (= slot₃/ys (call top.rest %₅ %₆))
+8   TestMod.as
+9   (return %₈)
+
+########################################
+# Slurping, first arg
+let
+    (xs..., y, z) = as
+end
+#---------------------
+1   TestMod.as
+2   (call top.split_rest %₁ 2)
+3   (= slot₂/xs (call core.getfield %₂ 1))
+4   (call core.getfield %₂ 2)
+5   (call top.indexed_iterate %₄ 1)
+6   (= slot₃/y (call core.getfield %₅ 1))
+7   (= slot₁/iterstate (call core.getfield %₅ 2))
+8   slot₁/iterstate
+9   (call top.indexed_iterate %₄ 2 %₈)
+10  (= slot₄/z (call core.getfield %₉ 1))
+11  TestMod.as
+12  (return %₁₁)
+
+########################################
+# Slurping, middle arg
+let
+    (x, ys..., z) = as
+end
+#---------------------
+1   TestMod.as
+2   (call top.indexed_iterate %₁ 1)
+3   (= slot₂/x (call core.getfield %₂ 1))
+4   (= slot₁/iterstate (call core.getfield %₂ 2))
+5   TestMod.as
+6   slot₁/iterstate
+7   (call top.split_rest %₅ 1 %₆)
+8   (= slot₃/ys (call core.getfield %₇ 1))
+9   (call core.getfield %₇ 2)
+10  (call top.indexed_iterate %₉ 1)
+11  (= slot₄/z (call core.getfield %₁₀ 1))
+12  TestMod.as
+13  (return %₁₂)
+
+########################################
+# Error: Slurping multiple args
+(xs..., ys...) = x
+#---------------------
+LoweringError:
+(xs..., ys...) = x
+#      └────┘ ── multiple `...` in destructuring assignment are ambiguous
+
+########################################
+# Recursive destructuring
+let
+    ((x,y), (z,w)) = as
+end
+#---------------------
+1   TestMod.as
+2   (call top.indexed_iterate %₁ 1)
+3   (call core.getfield %₂ 1)
+4   (= slot₁/iterstate (call core.getfield %₂ 2))
+5   TestMod.as
+6   slot₁/iterstate
+7   (call top.indexed_iterate %₅ 2 %₆)
+8   (call core.getfield %₇ 1)
+9   (call top.indexed_iterate %₃ 1)
+10  (= slot₅/x (call core.getfield %₉ 1))
+11  (= slot₂/iterstate (call core.getfield %₉ 2))
+12  slot₂/iterstate
+13  (call top.indexed_iterate %₃ 2 %₁₂)
+14  (= slot₆/y (call core.getfield %₁₃ 1))
+15  (call top.indexed_iterate %₈ 1)
+16  (= slot₇/z (call core.getfield %₁₅ 1))
+17  (= slot₃/iterstate (call core.getfield %₁₅ 2))
+18  slot₃/iterstate
+19  (call top.indexed_iterate %₈ 2 %₁₈)
+20  (= slot₄/w (call core.getfield %₁₉ 1))
+21  TestMod.as
+22  (return %₂₁)
+
+########################################
+# Recursive destructuring with slurping
+let
+    ((x,ys...), z) = as
+end
+#---------------------
+1   TestMod.as
+2   (call top.indexed_iterate %₁ 1)
+3   (call core.getfield %₂ 1)
+4   (= slot₁/iterstate (call core.getfield %₂ 2))
+5   TestMod.as
+6   slot₁/iterstate
+7   (call top.indexed_iterate %₅ 2 %₆)
+8   (= slot₅/z (call core.getfield %₇ 1))
+9   (call top.indexed_iterate %₃ 1)
+10  (= slot₃/x (call core.getfield %₉ 1))
+11  (= slot₂/iterstate (call core.getfield %₉ 2))
+12  slot₂/iterstate
+13  (= slot₄/ys (call top.rest %₃ %₁₂))
+14  TestMod.as
+15  (return %₁₄)
+
+########################################
+# Destructuring with simple tuple elimination
+let
+    (x, y) = (a, b)
+end
+#---------------------
+1   TestMod.a
+2   TestMod.b
+3   (= slot₁/x %₁)
+4   (= slot₂/y %₂)
+5   (call core.tuple %₁ %₂)
+6   (return %₅)
+
+########################################
+# Destructuring with tuple elimination where variables are repeated
+let
+    (x, y, z) = (y, a, x)
+end
+#---------------------
+1   slot₂/y
+2   TestMod.a
+3   slot₁/x
+4   (= slot₁/x %₁)
+5   (= slot₂/y %₂)
+6   (= slot₃/z %₃)
+7   (call core.tuple %₁ %₂ %₃)
+8   (return %₇)
+
+########################################
+# Destructuring with simple tuple elimination and rhs with side effects
+let
+    (x, y) = (f(), b)
+end
+#---------------------
+1   TestMod.f
+2   (call %₁)
+3   TestMod.b
+4   (= slot₁/x %₂)
+5   (= slot₂/y %₃)
+6   (call core.tuple %₂ %₃)
+7   (return %₆)
+
+########################################
+# Destructuring with simple tuple elimination and lhs with side effects
+let
+    (x[10], y[20]) = (1,2)
+end
+#---------------------
+1   1
+2   TestMod.x
+3   (call top.setindex! %₂ %₁ 10)
+4   2
+5   TestMod.y
+6   (call top.setindex! %₅ %₄ 20)
+7   (call core.tuple 1 2)
+8   (return %₇)
+
+########################################
+# Destructuring with tuple elimination and trailing rhs ...
+let
+    (x, y) = (a, rhs...)
+end
+#---------------------
+1   TestMod.a
+2   TestMod.rhs
+3   (= slot₁/x %₁)
+4   (call top.indexed_iterate %₂ 1)
+5   (= slot₂/y (call core.getfield %₄ 1))
+6   (call core.tuple %₁)
+7   (call core._apply_iterate top.iterate core.tuple %₆ %₂)
+8   (return %₇)
+
+########################################
+# Destructuring with with non-trailing rhs `...` does not use tuple elimination
+# (though we could do it for the `x = a` part here)
+let
+    (x, y, z) = (a, rhs..., b)
+end
+#---------------------
+1   TestMod.a
+2   (call core.tuple %₁)
+3   TestMod.rhs
+4   TestMod.b
+5   (call core.tuple %₄)
+6   (call core._apply_iterate top.iterate core.tuple %₂ %₃ %₅)
+7   (call top.indexed_iterate %₆ 1)
+8   (= slot₂/x (call core.getfield %₇ 1))
+9   (= slot₁/iterstate (call core.getfield %₇ 2))
+10  slot₁/iterstate
+11  (call top.indexed_iterate %₆ 2 %₁₀)
+12  (= slot₃/y (call core.getfield %₁₁ 1))
+13  (= slot₁/iterstate (call core.getfield %₁₁ 2))
+14  slot₁/iterstate
+15  (call top.indexed_iterate %₆ 3 %₁₄)
+16  (= slot₄/z (call core.getfield %₁₅ 1))
+17  (return %₆)
+
+########################################
+# Destructuring with tuple elimination and final ... on lhs
+let
+    (x, ys...) = (a,b,c)
+end
+#---------------------
+1   TestMod.a
+2   TestMod.b
+3   TestMod.c
+4   (= slot₁/x %₁)
+5   (call core.tuple %₂ %₃)
+6   (= slot₂/ys %₅)
+7   (call core.tuple %₁ %₂ %₃)
+8   (return %₇)
+
+########################################
+# Destructuring with tuple elimination, slurping, and completely effect free right hand sides
+let
+    (x, ys...) = (1,2,3)
+end
+#---------------------
+1   (= slot₁/x 1)
+2   (call core.tuple 2 3)
+3   (= slot₂/ys %₂)
+4   (call core.tuple 1 2 3)
+5   (return %₄)
+
+########################################
+# Destructuring with tuple elimination and non-final ... on lhs
+let
+    (x, ys..., z) = (a,b,c)
+end
+#---------------------
+1   TestMod.a
+2   TestMod.b
+3   TestMod.c
+4   (= slot₁/x %₁)
+5   (call core.tuple %₂)
+6   (= slot₂/ys %₅)
+7   (= slot₃/z %₃)
+8   (call core.tuple %₁ %₂ %₃)
+9   (return %₈)
+
+########################################
+# Error: Destructuring with tuple elimination and too few RHS elements
+(x,) = ()
+#---------------------
+LoweringError:
+(x,) = ()
+└───────┘ ── More variables on left hand side than right hand in tuple assignment
+
+########################################
+# Error: Destructuring with tuple elimination, slurping, and too few RHS elements
+(x,y,ys...) = (1,)
+#---------------------
+LoweringError:
+(x,y,ys...) = (1,)
+└────────────────┘ ── More variables on left hand side than right hand in tuple assignment
+
+########################################
+# Destructuring with tuple elimination but not in value position never creates
+# the tuple
+let
+    (x, ys...) = (a,b,c)
+    nothing
+end
+#---------------------
+1   TestMod.a
+2   TestMod.b
+3   TestMod.c
+4   (= slot₁/x %₁)
+5   (call core.tuple %₂ %₃)
+6   (= slot₂/ys %₅)
+7   TestMod.nothing
+8   (return %₇)
+
+########################################
+# Property destructuring
+let
+    (; x, y) = rhs
+end
+#---------------------
+1   TestMod.rhs
+2   (= slot₁/x (call top.getproperty %₁ :x))
+3   (= slot₂/y (call top.getproperty %₁ :y))
+4   (return %₁)
+
+########################################
+# Property destructuring with colliding symbolic lhs/rhs
+let
+    local x
+    (; x, y) = x
+end
+#---------------------
+1   slot₁/x
+2   (= slot₁/x (call top.getproperty %₁ :x))
+3   (= slot₂/y (call top.getproperty %₁ :y))
+4   (return %₁)
+
+########################################
+# Property destructuring with nontrivial rhs
+let
+    (; x, y) = f()
+end
+#---------------------
+1   TestMod.f
+2   (call %₁)
+3   (= slot₁/x (call top.getproperty %₂ :x))
+4   (= slot₂/y (call top.getproperty %₂ :y))
+5   (return %₂)
+
+########################################
+# Property destructuring with type decl
+let
+    (; x::T) = rhs
+end
+#---------------------
+1   (newvar slot₁/x)
+2   TestMod.rhs
+3   (call top.getproperty %₂ :x)
+4   TestMod.T
+5   (= slot₂/tmp %₃)
+6   slot₂/tmp
+7   (call core.isa %₆ %₄)
+8   (gotoifnot %₇ label₁₀)
+9   (goto label₁₃)
+10  slot₂/tmp
+11  (call top.convert %₄ %₁₀)
+12  (= slot₂/tmp (call core.typeassert %₁₁ %₄))
+13  slot₂/tmp
+14  (= slot₁/x %₁₃)
+15  (return %₂)
+
+########################################
+# Error: Property destructuring with frankentuple
+(x ; a, b) = rhs
+#---------------------
+LoweringError:
+(x ; a, b) = rhs
+└────────┘ ── Property destructuring must use a single `;` before the property names, eg `(; a, b) = rhs`
+
+########################################
+# Error: Property destructuring with values for properties
+(; a=1, b) = rhs
+#---------------------
+LoweringError:
+(; a=1, b) = rhs
+#  └─┘ ── invalid assignment location
diff --git a/JuliaLowering/test/desugaring.jl b/JuliaLowering/test/desugaring.jl
new file mode 100644
index 0000000000000..66a1766b342cb
--- /dev/null
+++ b/JuliaLowering/test/desugaring.jl
@@ -0,0 +1,57 @@
+@testset "Desugaring" begin
+
+test_mod = Module(:TestMod)
+
+# @test desugar(test_mod, """
+# let
+#     y = 0
+#     x = 1
+#     let x = x + 1
+#         y = x
+#     end
+#     (x, y)
+# end
+# """) ≈ @ast_ [K"block"
+#     [K"block"
+#         [K"="
+#             "y"::K"Identifier"
+#             0::K"Integer"
+#         ]
+#         [K"="
+#             "x"::K"Identifier"
+#             1::K"Integer"
+#         ]
+#         [K"block"
+#             [K"="
+#                 1::K"BindingId"
+#                 [K"call"
+#                     "+"::K"Identifier"
+#                     "x"::K"Identifier"
+#                     1::K"Integer"
+#                 ]
+#             ]
+#             [K"block"
+#                 [K"local_def"
+#                     "x"::K"Identifier"
+#                 ]
+#                 [K"="
+#                     "x"::K"Identifier"
+#                     1::K"BindingId"
+#                 ]
+#                 [K"block"
+#                     [K"="
+#                         "y"::K"Identifier"
+#                         "x"::K"Identifier"
+#                     ]
+#                 ]
+#             ]
+#         ]
+#         [K"call"
+#             "tuple"::K"core"
+#             "x"::K"Identifier"
+#             "y"::K"Identifier"
+#         ]
+#     ]
+# ]
+
+end
diff --git a/JuliaLowering/test/exceptions.jl b/JuliaLowering/test/exceptions.jl
new file mode 100644
index 0000000000000..e270ae38944f7
--- /dev/null
+++ b/JuliaLowering/test/exceptions.jl
@@ -0,0 +1,338 @@
+@testset "try/catch" begin
+
+test_mod = Module()
+
+@test isempty(current_exceptions())
+
+@testset "tail position" begin
+
+    @test JuliaLowering.include_string(test_mod, """
+    try
+        1
+    catch
+        2
+    end
+    """) == 1
+
+    @test JuliaLowering.include_string(test_mod, """
+    try
+        error("hi")
+        1
+    catch
+        2
+    end
+    """) == 2
+
+    @test JuliaLowering.include_string(test_mod, """
+    try
+        error("hi")
+    catch exc
+        exc
+    end
+    """) == ErrorException("hi")
+
+
+    @test JuliaLowering.include_string(test_mod, """
+    try
+        1
+    catch
+        2
+    else
+        3
+    end
+    """) == 3
+
+    @test JuliaLowering.include_string(test_mod, """
+    try
+        error("hi")
+        1
+    catch
+        2
+    else
+        3
+    end
+    """) == 2
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f()
+            try
+                return 1
+            catch
+            end
+            return 2
+        end
+        f()
+    end
+    """) == 1
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function g()
+            try
+                return 1
+            catch
+            end
+        end
+        g()
+    end
+    """) == 1
+
+    @test JuliaLowering.include_string(test_mod, """
+    let x = -1
+        while true
+            try
+                error("hi")
+            catch
+                x = 2
+                break
+            end
+        end
+        x
+    end
+    """) == 2
+
+    @test JuliaLowering.include_string(test_mod, """
+    let x = -1
+        while true
+            try
+                x = 2
+                break
+            catch
+            end
+        end
+        x
+    end
+    """) == 2
+end
+
+@testset "value position" begin
+
+    @test JuliaLowering.include_string(test_mod, """
+    let
+        x = try
+            1
+        catch
+            2
+        end
+        x
+    end
+    """) == 1
+
+    @test JuliaLowering.include_string(test_mod, """
+    let
+        x = try
+            error("hi")
+            1
+        catch
+            2
+        end
+        x
+    end
+    """) == 2
+
+    @test JuliaLowering.include_string(test_mod, """
+    let
+        x = try
+            error("hi")
+        catch exc
+            exc
+        end
+        x
+    end
+    """) == ErrorException("hi")
+
+
+    @test JuliaLowering.include_string(test_mod, """
+    let
+        x = try
+            1
+        catch
+            2
+        else
+            3
+        end
+        x
+    end
+    """) == 3
+
+    @test JuliaLowering.include_string(test_mod, """
+    let
+        x = try
+            error("hi")
+            1
+        catch
+            2
+        else
+            3
+        end
+        x
+    end
+    """) == 2
+
+end
+
+@testset "not value/tail position" begin
+
+    @test JuliaLowering.include_string(test_mod, """
+    let x = -1
+        try
+            x = 1
+        catch
+            x = 2
+        end
+        x
+    end
+    """) == 1
+
+    @test JuliaLowering.include_string(test_mod, """
+    let x = -1
+        try
+            error("hi")
+            x = 1
+        catch
+            x = 2
+        end
+        x
+    end
+    """) == 2
+
+    @test JuliaLowering.include_string(test_mod, """
+    let x = -1
+        try
+            x = error("hi")
+        catch exc
+            x = exc
+        end
+        x
+    end
+    """) == ErrorException("hi")
+
+
+    @test JuliaLowering.include_string(test_mod, """
+    let x = -1
+        try
+            x = 1
+        catch
+            x = 2
+        else
+            x = 3
+        end
+        x
+    end
+    """) == 3
+
+    @test JuliaLowering.include_string(test_mod, """
+    let x = -1
+        try
+            error("hi")
+            x = 1
+        catch
+            x = 2
+        else
+            x = 3
+        end
+        x
+    end
+    """) == 2
+
+end
+
+@testset "exception stack" begin
+
+    @test JuliaLowering.include_string(test_mod, """
+    try
+        try
+            error("hi")
+        catch
+            error("ho")
+        end
+    catch
+        a = []
+        for x in current_exceptions()
+            push!(a, x.exception)
+        end
+        a
+    end
+    """) == [ErrorException("hi"), ErrorException("ho")]
+
+end
+
+@test isempty(current_exceptions())
+
+end
+
+#-------------------------------------------------------------------------------
+@testset "try/finally" begin
+
+test_mod = Module()
+
+@test JuliaLowering.include_string(test_mod, """
+let x = -1
+    try
+        x = 1
+    finally
+        x = 2
+    end
+    x
+end
+""") == 2
+
+@test JuliaLowering.include_string(test_mod, """
+let x = -1
+    try
+        try
+            error("hi")
+            x = 1
+        finally
+            x = 2
+        end
+    catch
+    end
+    x
+end
+""") == 2
+
+JuliaLowering.include_string(test_mod, """
+begin
+    function nested_finally(a, x, b, c)
+        try
+            try
+                if x
+                    return b
+                end
+                c
+            finally
+                push!(a, 1)
+            end
+        finally
+            push!(a, 2)
+        end
+    end
+end
+""")
+@test (a = []; res = test_mod.nested_finally(a, true, 100, 200); (a, res)) == ([1,2], 100)
+@test (a = []; res = test_mod.nested_finally(a, false, 100, 200); (a, res)) == ([1,2], 200)
+
+@test JuliaLowering.include_string(test_mod, """
+try
+    1
+catch
+    2
+finally
+    3
+end
+""") == 1
+
+@test JuliaLowering.include_string(test_mod, """
+try
+    error("hi")
+    1
+catch
+    2
+finally
+    3
+end
+""") == 2
+
+end
diff --git a/JuliaLowering/test/exceptions_ir.jl b/JuliaLowering/test/exceptions_ir.jl
new file mode 100644
index 0000000000000..8cf423258f0c5
--- /dev/null
+++ b/JuliaLowering/test/exceptions_ir.jl
@@ -0,0 +1,357 @@
+########################################
+# Return from inside try/catch
+try
+    f
+    return x
+catch
+    g
+    return y
+end
+#---------------------
+1   (enter label₆)
+2   TestMod.f
+3   TestMod.x
+4   (leave %₁)
+5   (return %₃)
+6   TestMod.g
+7   TestMod.y
+8   (pop_exception %₁)
+9   (return %₇)
+
+########################################
+# Return from inside try/catch with simple return vals
+try
+    f
+    return 10
+catch
+    g
+    return 20
+end
+#---------------------
+1   (enter label₅)
+2   TestMod.f
+3   (leave %₁)
+4   (return 10)
+5   TestMod.g
+6   (pop_exception %₁)
+7   (return 20)
+
+########################################
+# Return from multiple try + try/catch
+try
+    try
+        return 10
+    catch
+        return 20
+    end
+catch
+end
+#---------------------
+1   (enter label₁₄)
+2   (enter label₇)
+3   (leave %₁ %₂)
+4   (return 10)
+5   (leave %₂)
+6   (goto label₁₁)
+7   (leave %₁)
+8   (pop_exception %₂)
+9   (return 20)
+10  (pop_exception %₂)
+11  slot₁/try_result
+12  (leave %₁)
+13  (return %₁₁)
+14  (pop_exception %₁)
+15  (return core.nothing)
+
+########################################
+# Return from multiple catch + try/catch
+try
+catch
+    try
+        return 10
+    catch
+        return 20
+    end
+end
+#---------------------
+1   (enter label₄)
+2   (leave %₁)
+3   (return core.nothing)
+4   (enter label₈)
+5   (leave %₄)
+6   (pop_exception %₁)
+7   (return 10)
+8   (pop_exception %₁)
+9   (return 20)
+
+########################################
+# try/catch/else, tail position
+try
+    a
+catch
+    b
+else
+    c
+end
+#---------------------
+1   (enter label₆)
+2   TestMod.a
+3   (leave %₁)
+4   TestMod.c
+5   (return %₄)
+6   TestMod.b
+7   (pop_exception %₁)
+8   (return %₆)
+
+########################################
+# try/catch/else, value position
+let
+    z = try
+        a
+    catch
+        b
+    else
+        c
+    end
+end
+#---------------------
+1   (newvar slot₁/z)
+2   (enter label₈)
+3   TestMod.a
+4   (leave %₂)
+5   TestMod.c
+6   (= slot₂/try_result %₅)
+7   (goto label₁₁)
+8   TestMod.b
+9   (= slot₂/try_result %₈)
+10  (pop_exception %₂)
+11  slot₂/try_result
+12  (= slot₁/z %₁₁)
+13  (return %₁₁)
+
+########################################
+# try/catch/else, not value/tail
+begin
+    try
+        a
+    catch
+        b
+    else
+        c
+    end
+    z
+end
+#---------------------
+1   (enter label₆)
+2   TestMod.a
+3   (leave %₁)
+4   TestMod.c
+5   (goto label₈)
+6   TestMod.b
+7   (pop_exception %₁)
+8   TestMod.z
+9   (return %₈)
+
+########################################
+# basic try/finally, tail position
+try
+    a
+finally
+    b
+end
+#---------------------
+1   (enter label₇)
+2   (= slot₁/finally_tag -1)
+3   (= slot₂/returnval_via_finally TestMod.a)
+4   (= slot₁/finally_tag 1)
+5   (leave %₁)
+6   (goto label₈)
+7   (= slot₁/finally_tag 2)
+8   TestMod.b
+9   (call core.=== slot₁/finally_tag 2)
+10  (gotoifnot %₉ label₁₂)
+11  (call top.rethrow)
+12  slot₂/returnval_via_finally
+13  (return %₁₂)
+
+########################################
+# basic try/finally, value position
+let
+    z = try
+        a
+    finally
+        b
+    end
+end
+#---------------------
+1   (newvar slot₁/z)
+2   (enter label₈)
+3   (= slot₃/finally_tag -1)
+4   TestMod.a
+5   (= slot₂/try_result %₄)
+6   (leave %₂)
+7   (goto label₉)
+8   (= slot₃/finally_tag 1)
+9   TestMod.b
+10  (call core.=== slot₃/finally_tag 1)
+11  (gotoifnot %₁₀ label₁₃)
+12  (call top.rethrow)
+13  slot₂/try_result
+14  (= slot₁/z %₁₃)
+15  (return %₁₃)
+
+########################################
+# basic try/finally, not value/tail
+begin
+    try
+        a
+    finally
+        b
+    end
+    z
+end
+#---------------------
+1   (enter label₆)
+2   (= slot₁/finally_tag -1)
+3   TestMod.a
+4   (leave %₁)
+5   (goto label₇)
+6   (= slot₁/finally_tag 1)
+7   TestMod.b
+8   (call core.=== slot₁/finally_tag 1)
+9   (gotoifnot %₈ label₁₁)
+10  (call top.rethrow)
+11  TestMod.z
+12  (return %₁₁)
+
+########################################
+# try/finally + break
+while true
+    try
+        a
+        break
+    finally
+        b
+    end
+end
+#---------------------
+1   (gotoifnot true label₁₅)
+2   (enter label₉)
+3   (= slot₁/finally_tag -1)
+4   TestMod.a
+5   (leave %₂)
+6   (goto label₁₅)
+7   (leave %₂)
+8   (goto label₁₀)
+9   (= slot₁/finally_tag 1)
+10  TestMod.b
+11  (call core.=== slot₁/finally_tag 1)
+12  (gotoifnot %₁₁ label₁₄)
+13  (call top.rethrow)
+14  (goto label₁)
+15  (return core.nothing)
+
+########################################
+# try/catch/finally
+try
+    a
+catch
+    b
+finally
+    c
+end
+#---------------------
+1   (enter label₁₅)
+2   (= slot₁/finally_tag -1)
+3   (enter label₈)
+4   TestMod.a
+5   (= slot₂/try_result %₄)
+6   (leave %₃)
+7   (goto label₁₁)
+8   TestMod.b
+9   (= slot₂/try_result %₈)
+10  (pop_exception %₃)
+11  (= slot₃/returnval_via_finally slot₂/try_result)
+12  (= slot₁/finally_tag 1)
+13  (leave %₁)
+14  (goto label₁₆)
+15  (= slot₁/finally_tag 2)
+16  TestMod.c
+17  (call core.=== slot₁/finally_tag 2)
+18  (gotoifnot %₁₇ label₂₀)
+19  (call top.rethrow)
+20  slot₃/returnval_via_finally
+21  (return %₂₀)
+
+########################################
+# Nested finally blocks
+try
+    try
+        if x
+            return a
+        end
+        b
+    finally
+        c
+    end
+finally
+    d
+end
+#---------------------
+1   (enter label₃₀)
+2   (= slot₁/finally_tag -1)
+3   (enter label₁₅)
+4   (= slot₃/finally_tag -1)
+5   TestMod.x
+6   (gotoifnot %₅ label₁₁)
+7   (= slot₄/returnval_via_finally TestMod.a)
+8   (= slot₃/finally_tag 1)
+9   (leave %₃)
+10  (goto label₁₆)
+11  TestMod.b
+12  (= slot₂/try_result %₁₁)
+13  (leave %₃)
+14  (goto label₁₆)
+15  (= slot₃/finally_tag 2)
+16  TestMod.c
+17  (call core.=== slot₃/finally_tag 2)
+18  (gotoifnot %₁₇ label₂₀)
+19  (call top.rethrow)
+20  (call core.=== slot₃/finally_tag 1)
+21  (gotoifnot %₂₀ label₂₆)
+22  (= slot₅/returnval_via_finally slot₄/returnval_via_finally)
+23  (= slot₁/finally_tag 1)
+24  (leave %₁)
+25  (goto label₃₁)
+26  (= slot₆/returnval_via_finally slot₂/try_result)
+27  (= slot₁/finally_tag 2)
+28  (leave %₁)
+29  (goto label₃₁)
+30  (= slot₁/finally_tag 3)
+31  TestMod.d
+32  (call core.=== slot₁/finally_tag 3)
+33  (gotoifnot %₃₂ label₃₅)
+34  (call top.rethrow)
+35  (call core.=== slot₁/finally_tag 2)
+36  (gotoifnot %₃₅ label₃₉)
+37  slot₆/returnval_via_finally
+38  (return %₃₇)
+39  slot₅/returnval_via_finally
+40  (return %₃₉)
+
+########################################
+# Access to the exception object
+try
+    a
+catch exc
+    b
+end
+#---------------------
+1   (enter label₅)
+2   TestMod.a
+3   (leave %₁)
+4   (return %₂)
+5   (= slot₁/exc (call JuliaLowering.current_exception))
+6   TestMod.b
+7   (pop_exception %₁)
+8   (return %₆)
diff --git a/JuliaLowering/test/function_calls_ir.jl b/JuliaLowering/test/function_calls_ir.jl
new file mode 100644
index 0000000000000..1426ed228ddc8
--- /dev/null
+++ b/JuliaLowering/test/function_calls_ir.jl
@@ -0,0 +1,664 @@
+########################################
+# Simple call
+f(x, y)
+#---------------------
+1   TestMod.f
+2   TestMod.x
+3   TestMod.y
+4   (call %₁ %₂ %₃)
+5   (return %₄)
+
+########################################
+# Keyword calls
+f(x; a=1, b=2)
+#---------------------
+1   TestMod.f
+2   (call core.tuple :a :b)
+3   (call core.apply_type core.NamedTuple %₂)
+4   (call core.tuple 1 2)
+5   (call %₃ %₄)
+6   TestMod.x
+7   (call core.kwcall %₅ %₁ %₆)
+8   (return %₇)
+
+########################################
+# Keyword call with only splats for kws
+f(; ks1..., ks2...)
+#---------------------
+1   TestMod.f
+2   (call core.NamedTuple)
+3   TestMod.ks1
+4   (call top.merge %₂ %₃)
+5   TestMod.ks2
+6   (call top.merge %₄ %₅)
+7   (call top.isempty %₆)
+8   (gotoifnot %₇ label₁₁)
+9   (call %₁)
+10  (return %₉)
+11  (call core.kwcall %₆ %₁)
+12  (return %₁₁)
+
+########################################
+# Error: Call with repeated keywords
+f(x; a=1, a=2)
+#---------------------
+LoweringError:
+f(x; a=1, a=2)
+#         ╙ ── Repeated keyword argument name
+
+########################################
+# literal_pow lowering
+x^42
+#---------------------
+1   TestMod.^
+2   TestMod.x
+3   (call core.apply_type top.Val 42)
+4   (call %₃)
+5   (call top.literal_pow %₁ %₂ %₄)
+6   (return %₅)
+
+########################################
+# almost but not quite literal_pow lowering :)
+x^42.0
+#---------------------
+1   TestMod.^
+2   TestMod.x
+3   (call %₁ %₂ 42.0)
+4   (return %₃)
+
+########################################
+# Error: infix call without enough arguments
+@ast_ [K"call"(syntax_flags=JuliaSyntax.INFIX_FLAG)
+    "x"::K"Identifier"
+]
+#---------------------
+LoweringError:
+#= line 1 =# - Postfix/infix operators must have at least two positional arguments
+
+########################################
+# Error: postfix call without enough arguments
+@ast_ [K"call"(syntax_flags=JuliaSyntax.POSTFIX_OP_FLAG)
+    "x"::K"Identifier"
+]
+#---------------------
+LoweringError:
+#= line 1 =# - Postfix/infix operators must have at least two positional arguments
+
+########################################
+# Error: Call with no function name
+@ast_ [K"call"]
+#---------------------
+LoweringError:
+#= line 1 =# - Call expressions must have a function name
+
+########################################
+# Simple broadcast
+x .* y .+ f.(z)
+#---------------------
+1   TestMod.+
+2   TestMod.*
+3   TestMod.x
+4   TestMod.y
+5   (call top.broadcasted %₂ %₃ %₄)
+6   TestMod.f
+7   TestMod.z
+8   (call top.broadcasted %₆ %₇)
+9   (call top.broadcasted %₁ %₅ %₈)
+10  (call top.materialize %₉)
+11  (return %₁₀)
+
+########################################
+# Broadcast with unary function calls
+.+x
+#---------------------
+1   TestMod.+
+2   TestMod.x
+3   (call top.broadcasted %₁ %₂)
+4   (call top.materialize %₃)
+5   (return %₄)
+
+########################################
+# Broadcast with short circuit operators
+x .&& y .|| z
+#---------------------
+1   TestMod.x
+2   TestMod.y
+3   (call top.broadcasted top.andand %₁ %₂)
+4   TestMod.z
+5   (call top.broadcasted top.oror %₃ %₄)
+6   (call top.materialize %₅)
+7   (return %₆)
+
+########################################
+# Scalar comparison chain
+x < y < z
+#---------------------
+1   TestMod.<
+2   TestMod.x
+3   TestMod.y
+4   (call %₁ %₂ %₃)
+5   (gotoifnot %₄ label₁₁)
+6   TestMod.<
+7   TestMod.y
+8   TestMod.z
+9   (call %₆ %₇ %₈)
+10  (return %₉)
+11  (return false)
+
+########################################
+# Broadcasted comparison chain
+x .< y .< z
+#---------------------
+1   TestMod.<
+2   TestMod.x
+3   TestMod.y
+4   (call top.broadcasted %₁ %₂ %₃)
+5   TestMod.<
+6   TestMod.y
+7   TestMod.z
+8   (call top.broadcasted %₅ %₆ %₇)
+9   (call top.broadcasted top.& %₄ %₈)
+10  (call top.materialize %₉)
+11  (return %₁₀)
+
+########################################
+# Mixed scalar / broadcasted comparison chain
+a < b < c .< d .< e
+#---------------------
+1   TestMod.<
+2   TestMod.a
+3   TestMod.b
+4   (call %₁ %₂ %₃)
+5   (gotoifnot %₄ label₁₁)
+6   TestMod.<
+7   TestMod.b
+8   TestMod.c
+9   (= slot₁/if_val (call %₆ %₇ %₈))
+10  (goto label₁₂)
+11  (= slot₁/if_val false)
+12  slot₁/if_val
+13  TestMod.<
+14  TestMod.c
+15  TestMod.d
+16  (call top.broadcasted %₁₃ %₁₄ %₁₅)
+17  (call top.broadcasted top.& %₁₂ %₁₆)
+18  TestMod.<
+19  TestMod.d
+20  TestMod.e
+21  (call top.broadcasted %₁₈ %₁₉ %₂₀)
+22  (call top.broadcasted top.& %₁₇ %₂₁)
+23  (call top.materialize %₂₂)
+24  (return %₂₃)
+
+########################################
+# Mixed scalar / broadcasted comparison chain
+a .< b .< c < d < e
+#---------------------
+1   TestMod.<
+2   TestMod.a
+3   TestMod.b
+4   (call top.broadcasted %₁ %₂ %₃)
+5   TestMod.<
+6   TestMod.b
+7   TestMod.c
+8   (call top.broadcasted %₅ %₆ %₇)
+9   (call top.broadcasted top.& %₄ %₈)
+10  TestMod.<
+11  TestMod.c
+12  TestMod.d
+13  (call %₁₀ %₁₁ %₁₂)
+14  (gotoifnot %₁₃ label₂₀)
+15  TestMod.<
+16  TestMod.d
+17  TestMod.e
+18  (= slot₁/if_val (call %₁₅ %₁₆ %₁₇))
+19  (goto label₂₁)
+20  (= slot₁/if_val false)
+21  slot₁/if_val
+22  (call top.broadcasted top.& %₉ %₂₁)
+23  (call top.materialize %₂₂)
+24  (return %₂₃)
+
+########################################
+# Comparison chain fused with other broadcasting
+x .+ (a .< b .< c)
+#---------------------
+1   TestMod.+
+2   TestMod.x
+3   TestMod.<
+4   TestMod.a
+5   TestMod.b
+6   (call top.broadcasted %₃ %₄ %₅)
+7   TestMod.<
+8   TestMod.b
+9   TestMod.c
+10  (call top.broadcasted %₇ %₈ %₉)
+11  (call top.broadcasted top.& %₆ %₁₀)
+12  (call top.broadcasted %₁ %₂ %₁₁)
+13  (call top.materialize %₁₂)
+14  (return %₁₃)
+
+########################################
+# Broadcast with literal_pow
+x.^3
+#---------------------
+1   TestMod.^
+2   TestMod.x
+3   (call core.apply_type top.Val 3)
+4   (call %₃)
+5   (call top.broadcasted top.literal_pow %₁ %₂ %₄)
+6   (call top.materialize %₅)
+7   (return %₆)
+
+########################################
+# Broadcast with keywords
+f.(x, y, z = 1; w = 2)
+#---------------------
+1   top.broadcasted_kwsyntax
+2   (call core.tuple :z :w)
+3   (call core.apply_type core.NamedTuple %₂)
+4   (call core.tuple 1 2)
+5   (call %₃ %₄)
+6   TestMod.f
+7   TestMod.x
+8   TestMod.y
+9   (call core.kwcall %₅ %₁ %₆ %₇ %₈)
+10  (call top.materialize %₉)
+11  (return %₁₀)
+
+########################################
+# Broadcast with unary dot syntax
+(.+)(x,y)
+#---------------------
+1   TestMod.+
+2   TestMod.x
+3   TestMod.y
+4   (call top.broadcasted %₁ %₂ %₃)
+5   (call top.materialize %₄)
+6   (return %₅)
+
+########################################
+# Trivial in-place broadcast update
+x .= y
+#---------------------
+1   TestMod.x
+2   TestMod.y
+3   (call top.broadcasted top.identity %₂)
+4   (call top.materialize! %₁ %₃)
+5   (return %₄)
+
+########################################
+# Fused in-place broadcast update
+x .= y .+ z
+#---------------------
+1   TestMod.x
+2   TestMod.+
+3   TestMod.y
+4   TestMod.z
+5   (call top.broadcasted %₂ %₃ %₄)
+6   (call top.materialize! %₁ %₅)
+7   (return %₆)
+
+########################################
+# In-place broadcast update with property assignment on left hand side
+x.prop .= y
+#---------------------
+1   TestMod.x
+2   (call top.dotgetproperty %₁ :prop)
+3   TestMod.y
+4   (call top.broadcasted top.identity %₃)
+5   (call top.materialize! %₂ %₄)
+6   (return %₅)
+
+########################################
+# In-place broadcast update with ref on left hand side
+x[i,end] .= y
+#---------------------
+1   TestMod.x
+2   TestMod.i
+3   (call top.lastindex %₁ 2)
+4   (call top.dotview %₁ %₂ %₃)
+5   TestMod.y
+6   (call top.broadcasted top.identity %₅)
+7   (call top.materialize! %₄ %₆)
+8   (return %₇)
+
+########################################
+# <: as a function call
+x <: y
+#---------------------
+1   TestMod.<:
+2   TestMod.x
+3   TestMod.y
+4   (call %₁ %₂ %₃)
+5   (return %₄)
+
+########################################
+# >: as a function call
+x >: y
+#---------------------
+1   TestMod.>:
+2   TestMod.x
+3   TestMod.y
+4   (call %₁ %₂ %₃)
+5   (return %₄)
+
+########################################
+# --> as a function call
+x --> y
+#---------------------
+1   TestMod.-->
+2   TestMod.x
+3   TestMod.y
+4   (call %₁ %₂ %₃)
+5   (return %₄)
+
+########################################
+# basic ccall
+ccall(:strlen, Csize_t, (Cstring,), "asdfg")
+#---------------------
+1   TestMod.Cstring
+2   (call top.cconvert %₁ "asdfg")
+3   (call top.unsafe_convert %₁ %₂)
+4   (foreigncall :strlen (static_eval TestMod.Csize_t) (static_eval (call core.svec TestMod.Cstring)) 0 :ccall %₃ %₂)
+5   (return %₄)
+
+########################################
+# ccall with library name as a global var
+ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg")
+#---------------------
+1   TestMod.Cstring
+2   (call top.cconvert %₁ "asdfg")
+3   (call top.unsafe_convert %₁ %₂)
+4   (foreigncall (static_eval (call core.tuple :strlen TestMod.libc)) (static_eval TestMod.Csize_t) (static_eval (call core.svec TestMod.Cstring)) 0 :ccall %₃ %₂)
+5   (return %₄)
+
+########################################
+# ccall with a calling convention
+ccall(:foo, stdcall, Csize_t, ())
+#---------------------
+1   (foreigncall :foo (static_eval TestMod.Csize_t) (static_eval (call core.svec)) 0 :stdcall)
+2   (return %₁)
+
+########################################
+# ccall with Any args become core.Any and don't need conversion or GC roots
+ccall(:foo, stdcall, Csize_t, (Any,), x)
+#---------------------
+1   core.Any
+2   TestMod.x
+3   (foreigncall :foo (static_eval TestMod.Csize_t) (static_eval (call core.svec core.Any)) 0 :stdcall %₂)
+4   (return %₃)
+
+########################################
+# ccall with variable as function name (must eval to a pointer)
+ccall(ptr, Csize_t, (Cstring,), "asdfg")
+#---------------------
+1   TestMod.Cstring
+2   (call top.cconvert %₁ "asdfg")
+3   TestMod.ptr
+4   (call top.unsafe_convert %₁ %₂)
+5   (foreigncall %₃ (static_eval TestMod.Csize_t) (static_eval (call core.svec TestMod.Cstring)) 0 :ccall %₄ %₂)
+6   (return %₅)
+
+########################################
+# ccall with varargs
+ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5")
+#---------------------
+1   TestMod.Cstring
+2   TestMod.Cstring
+3   TestMod.Cstring
+4   (call top.cconvert %₁ "%s = %s\n")
+5   (call top.cconvert %₂ "2 + 2")
+6   (call top.cconvert %₃ "5")
+7   (call top.unsafe_convert %₁ %₄)
+8   (call top.unsafe_convert %₂ %₅)
+9   (call top.unsafe_convert %₃ %₆)
+10  (foreigncall :printf (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.Cstring TestMod.Cstring TestMod.Cstring)) 1 :ccall %₇ %₈ %₉ %₄ %₅ %₆)
+11  (return %₁₀)
+
+########################################
+# Error: ccall with too few arguments
+ccall(:foo, Csize_t)
+#---------------------
+LoweringError:
+ccall(:foo, Csize_t)
+└──────────────────┘ ── too few arguments to ccall
+
+########################################
+# Error: ccall with calling conv and too few arguments
+ccall(:foo, thiscall, Csize_t)
+#---------------------
+LoweringError:
+ccall(:foo, thiscall, Csize_t)
+└────────────────────────────┘ ── too few arguments to ccall with calling convention specified
+
+########################################
+# Error: ccall without tuple for argument types
+ccall(:foo, Csize_t, Cstring)
+#---------------------
+LoweringError:
+ccall(:foo, Csize_t, Cstring)
+#                    └─────┘ ── ccall argument types must be a tuple; try `(T,)`
+
+########################################
+# Error: ccall without tuple for argument types
+ccall(:foo, (Csize_t,), "arg")
+#---------------------
+LoweringError:
+ccall(:foo, (Csize_t,), "arg")
+#           └────────┘ ── ccall argument types must be a tuple; try `(T,)` and check if you specified a correct return type
+
+########################################
+# Error: ccall with library name which is a local variable
+let libc = "libc"
+    ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg")
+end
+#---------------------
+LoweringError:
+let libc = "libc"
+    ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg")
+#                   └──┘ ── function name and library expression cannot reference local variable
+end
+
+########################################
+# Error: ccall with return type which is a local variable
+let Csize_t = 1
+    ccall(:strlen, Csize_t, (Cstring,), "asdfg")
+end
+#---------------------
+LoweringError:
+let Csize_t = 1
+    ccall(:strlen, Csize_t, (Cstring,), "asdfg")
+#                  └─────┘ ── ccall return type cannot reference local variable
+end
+
+########################################
+# Error: ccall with argument type which is a local variable
+let Cstring = 1
+    ccall(:strlen, Csize_t, (Cstring,), "asdfg")
+end
+#---------------------
+LoweringError:
+let Cstring = 1
+    ccall(:strlen, Csize_t, (Cstring,), "asdfg")
+#                            └─────┘ ── ccall argument type cannot reference local variable
+end
+
+########################################
+# Error: ccall with too few arguments
+ccall(:strlen, Csize_t, (Cstring,))
+#---------------------
+LoweringError:
+ccall(:strlen, Csize_t, (Cstring,))
+└─────────────────────────────────┘ ── Too few arguments in ccall compared to argument types
+
+########################################
+# Error: ccall with too many arguments
+ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah")
+#---------------------
+LoweringError:
+ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah")
+└──────────────────────────────────────────────────┘ ── More arguments than types in ccall
+
+########################################
+# Error: ccall varargs with too few args
+ccall(:foo, Csize_t, (Cstring...,), "asdfg")
+#---------------------
+LoweringError:
+ccall(:foo, Csize_t, (Cstring...,), "asdfg")
+#                     └────────┘ ── C ABI prohibits vararg without one required argument
+
+########################################
+# Error: ccall with multiple varargs
+ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah")
+#---------------------
+LoweringError:
+ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah")
+#                     └────────┘ ── only the trailing ccall argument type should have `...`
+
+########################################
+# cglobal special support for (sym, lib) tuple
+cglobal((:sym, lib), Int)
+#---------------------
+1   TestMod.Int
+2   (call core.cglobal (static_eval (call core.tuple :sym TestMod.lib)) %₁)
+3   (return %₂)
+
+########################################
+# cglobal - non-tuple expressions in first arg are lowered as normal
+cglobal(f(), Int)
+#---------------------
+1   TestMod.f
+2   (call %₁)
+3   TestMod.Int
+4   (call core.cglobal %₂ %₃)
+5   (return %₄)
+
+########################################
+# Error: cglobal with library name referencing local variable
+let func="myfunc"
+    cglobal((func, "somelib"), Int)
+end
+#---------------------
+LoweringError:
+let func="myfunc"
+    cglobal((func, "somelib"), Int)
+#            └──┘ ── function name and library expression cannot reference local variable
+end
+
+########################################
+# Error: cglobal too many arguments
+cglobal(:sym, Int, blah)
+#---------------------
+LoweringError:
+cglobal(:sym, Int, blah)
+└──────────────────────┘ ── cglobal must have one or two arguments
+
+########################################
+# Error: assigning to `cglobal`
+cglobal = 10
+#---------------------
+LoweringError:
+cglobal = 10
+└─────┘ ── invalid assignment location
+
+########################################
+# Error: assigning to `ccall`
+ccall = 10
+#---------------------
+LoweringError:
+ccall = 10
+└───┘ ── invalid assignment location
+
+########################################
+# Error: assigning to `var"ccall"`
+var"ccall" = 10
+#---------------------
+LoweringError:
+var"ccall" = 10
+#   └───┘ ── invalid assignment location
+
+########################################
+# Error: Invalid function name ccall
+function ccall()
+end
+#---------------------
+LoweringError:
+function ccall()
+#        └───┘ ── Invalid function name
+end
+
+########################################
+# Error: Invalid function name ccall
+function A.ccall()
+end
+#---------------------
+LoweringError:
+function A.ccall()
+#        └─────┘ ── Invalid function name
+end
+
+########################################
+# Error: Invalid function name ccall
+function ccall{<:T}()
+end
+#---------------------
+LoweringError:
+function ccall{<:T}()
+#        └───┘ ── Invalid function name
+end
+
+########################################
+# Nested splat: simple case
+tuple((xs...)...)
+#---------------------
+1   TestMod.tuple
+2   (call core.tuple top.iterate %₁)
+3   TestMod.xs
+4   (call core._apply_iterate top.iterate core._apply_iterate %₂ %₃)
+5   (return %₄)
+
+########################################
+# Nested splat: with mixed arguments
+tuple(a, (xs...)..., b)
+#---------------------
+1   TestMod.tuple
+2   TestMod.a
+3   (call core.tuple %₂)
+4   (call core.tuple top.iterate %₁ %₃)
+5   TestMod.xs
+6   TestMod.b
+7   (call core.tuple %₆)
+8   (call core.tuple %₇)
+9   (call core._apply_iterate top.iterate core._apply_iterate %₄ %₅ %₈)
+10  (return %₉)
+
+########################################
+# Nested splat: multiple nested splats
+tuple((xs...)..., (ys...)...)
+#---------------------
+1   TestMod.tuple
+2   (call core.tuple top.iterate %₁)
+3   TestMod.xs
+4   TestMod.ys
+5   (call core._apply_iterate top.iterate core._apply_iterate %₂ %₃ %₄)
+6   (return %₅)
+
+########################################
+# Nested splat: triple nesting
+tuple(((xs...)...)...)
+#---------------------
+1   TestMod.tuple
+2   (call core.tuple top.iterate %₁)
+3   (call core.tuple top.iterate core._apply_iterate %₂)
+4   TestMod.xs
+5   (call core._apply_iterate top.iterate core._apply_iterate %₃ %₄)
+6   (return %₅)
+
+########################################
+# Error: Standalone splat expression
+(xs...)
+#---------------------
+LoweringError:
+(xs...)
+#└───┘ ── `...` expression outside call
diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl
new file mode 100644
index 0000000000000..ccf856d5f5305
--- /dev/null
+++ b/JuliaLowering/test/functions.jl
@@ -0,0 +1,605 @@
+@testset "Functions" begin
+
+test_mod = Module()
+
+# Function calls
+# Splatting
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = 1
+    y = 2
+    zs = (3,4)
+    w = 5
+    (tuple(zs...),
+     tuple(zs..., w),
+     tuple(y, zs...),
+     tuple(x, y, zs..., w))
+end
+""") == ((3,4),
+         (3,4,5),
+         (2,3,4),
+         (1,2,3,4,5))
+
+# Nested splatting
+@test JuliaLowering.include_string(test_mod, """
+let
+    xs = [[1, 2], [3, 4]]
+    tuple((xs...)...)
+end
+""") == (1, 2, 3, 4)
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    xs = [[1, 2]]
+    ys = [[3, 4]]
+    tuple((xs...)..., (ys...)...)
+end
+""") == (1, 2, 3, 4)
+
+# Multiple (>2) nested splat
+@test JuliaLowering.include_string(test_mod, """
+let
+    xs = [[[1, 2]]]
+    tuple(((xs...)...)...)
+end
+""") == (1, 2)
+@test JuliaLowering.include_string(test_mod, """
+let
+    xs = [[[1, 2]]]
+    ys = [[[3, 4]]]
+    tuple(((xs...)...)..., ((ys...)...)...)
+end
+""") == (1, 2, 3, 4)
+@test JuliaLowering.include_string(test_mod, """
+let
+    xs = [[[1, 2]]]
+    ys = [[[3, 4]]]
+    tuple(((xs...)...)..., ((ys...)...))
+end
+""") == (1, 2, [3, 4])
+
+# Trailing comma case should still work (different semantics)
+@test JuliaLowering.include_string(test_mod, """
+let
+    xs = [[1, 2], [3, 4]]
+    tuple((xs...,)...)
+end
+""") == ([1, 2], [3, 4])
+
+# Keyword calls
+Base.eval(test_mod, :(
+begin
+    function kwtest(; kws...)
+        values(kws)
+    end
+
+    # Note this definition generates an arguably-spurious warning when run via
+    # `Pkg.test()` due to the use of `--warn-override=true` in the test
+    # harness.
+    function kwtest()
+        "non-kw version of kwtest"
+    end
+end
+))
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    kws = (c=3,d=4)
+    kwtest(; kws..., a=1, d=0, e=5)
+end
+""") == (c=3, d=0, a=1, e=5)
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    kws = (;)
+    kwtest(; kws..., kws...)
+end
+""") == "non-kw version of kwtest"
+
+# literal_pow
+@test JuliaLowering.include_string(test_mod, """
+2^4
+""") == 16
+
+#-------------------------------------------------------------------------------
+# Arrow syntax
+@test JuliaLowering.include_string(test_mod, """
+let
+    f = ((x::T, y::T) where T) -> x + y
+    f(1, 2)
+end
+""") === 3
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    f = ((x::T; y=2) where T) -> x + y
+    f(1)
+end
+""") === 3
+
+# Passes desugaring, but T is detected as unused and throws an error.
+# Is it clear whether this should be `f(x::T) where T` or `f(x::T where T)`?
+@test_broken JuliaLowering.include_string(test_mod, """
+let
+    f = ((x::T) where T) -> x
+    f(1)
+end
+""") === 1
+
+#-------------------------------------------------------------------------------
+# Function definitions
+@test JuliaLowering.include_string(test_mod, """
+begin
+    function f(x)
+        y = x + 1
+        "hi", x, y
+    end
+
+    f(1)
+end
+""") == ("hi", 1, 2)
+
+@test JuliaLowering.include_string(test_mod, """
+begin
+    function unused_arg(x, _, y)
+        x + y
+    end
+    unused_arg(1,2,3)
+end
+""") == 4
+
+@test JuliaLowering.include_string(test_mod, """
+begin
+    function g(x)::Int
+        if x == 1
+            return 42.0
+        end
+        0xff
+    end
+    (g(1), g(2))
+end
+""") === (42, 255)
+
+# static parameters
+@test JuliaLowering.include_string(test_mod, """
+begin
+    function h(x, y)
+        "fallback"
+    end
+    function h(::Vector{T}, ::S) where {T, S <: T}
+        T, S
+    end
+    (h(1, 2), h(Number[0xff], 1.0), h(Int[1], 1), h(Int[1], 1.0))
+end
+""") === ("fallback", (Number, Float64), (Int, Int), "fallback")
+
+Base.eval(test_mod,
+:(struct X1{T} end)
+)
+
+# `where` params used in function obj type
+@test JuliaLowering.include_string(test_mod, """
+begin
+    function (x::X1{T})() where T
+        T
+    end
+    X1{Int}()()
+end
+""") === Int
+
+Base.include_string(test_mod,
+"""
+    struct X end
+
+    # Erroneous `convert` to test type assert in function return values
+    Base.convert(::Type{X}, y) = y
+""")
+
+@test_throws TypeError JuliaLowering.include_string(test_mod, """
+begin
+    function h()::X
+        return nothing
+    end
+    h()
+end
+""")
+
+@test JuliaLowering.include_string(test_mod, """
+x = 0
+function f_return_in_value_pos()
+    global x
+    x = return 42
+end
+
+(f_return_in_value_pos(), x)
+""") === (42, 0)
+
+@testset "Default positional arguments" begin
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_def_simple(x=1, y=2, z=x)
+            (x,y,z)
+        end
+
+        (f_def_simple(), f_def_simple(10), f_def_simple(10,20), f_def_simple(10,20,30))
+    end
+    """) == ((1,2,1), (10,2,10), (10,20,10), (10,20,30))
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_def_placeholders(::T=1, _::S=1.0) where {T,S}
+            (T,S)
+        end
+
+        (f_def_placeholders(), f_def_placeholders(1.0), f_def_placeholders(1.0, 1))
+    end
+    """) == ((Int,Float64), (Float64,Float64), (Float64,Int))
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_def_typevars(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U}
+            (x, y, z, T, S, U)
+        end
+
+        (f_def_typevars(1), f_def_typevars(1,[1.0]), f_def_typevars(1,[1.0],-1.0))
+    end
+    """) == ((1, [1], 2, Int, Vector{Int}, Int),
+             (1, [1.0], 2, Float64, Vector{Float64}, Int),
+             (1, [1.0], -1.0, Float64, Vector{Float64}, Float64))
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_def_typevar_vararg_undef(x::T, y::Vararg{S}) where {T,S}
+            (x, y, @isdefined S)
+        end
+
+        (f_def_typevar_vararg_undef(1), f_def_typevar_vararg_undef(1,2), f_def_typevar_vararg_undef(1,2,3))
+    end
+    """) === ((1, (), false), (1, (2,), true), (1, (2, 3), true))
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        f_def_typevar_with_lowerbound(x::T) where {T>:Int} =
+            (x, @isdefined(T))
+        (f_def_typevar_with_lowerbound(1), f_def_typevar_with_lowerbound(1.0))
+    end
+    """) == ((1, true), (1.0, false))
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_def_slurp(x=1, ys...)
+            (x, ys)
+        end
+
+        (f_def_slurp(), f_def_slurp(2), f_def_slurp(2,3))
+    end
+    """) == ((1, ()),
+             (2, ()),
+             (2, (3,)))
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_def_ret_type(x=1.0)::Int
+            x
+        end
+
+        (f_def_ret_type(), f_def_ret_type(10.0))
+    end
+    """) === (1,10)
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_def_slurp_splat(ys...=(1,2)...)
+            ys
+        end
+
+        (f_def_slurp_splat(), f_def_slurp_splat(10,20))
+    end
+    """) == ((1,2),
+             (10,20))
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_def_destructure(x, (y,z)::Tuple{Int,Int}, (w,)...=(4,)...)
+            (x,y,z,w)
+        end
+
+        f_def_destructure(1, (2,3))
+    end
+    """) == (1,2,3,4)
+
+end
+
+@testset "Slot flags" begin
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_nospecialize(u, v, @nospecialize(x), y, @nospecialize(z))
+            (u, v, x, y, z)
+        end
+
+        f_nospecialize(1,2,3,4,5)
+    end
+    """) == (1,2,3,4,5)
+    # We dig into the internal of `Method` here to check which slots have been
+    # flagged as nospecialize.
+    @test only(methods(test_mod.f_nospecialize)).nospecialize == 0b10100
+
+    JuliaLowering.include_string(test_mod, """
+    function f_slotflags(x, y, f, z)
+        f() + x + y
+    end
+    """)
+    @test only(methods(test_mod.f_slotflags)).called == 0b0100
+
+    # Branching combined with nospecialize meta in CodeInfo
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        function f_branch_meta(@nospecialize(x), cond)
+            if cond
+                x + 1
+            else
+                x + 2
+            end
+        end
+
+        (f_branch_meta(10, false), f_branch_meta(20, true))
+    end
+    """) == (12, 21)
+end
+
+@testset "Keyword functions" begin
+    JuliaLowering.include_string(test_mod, """
+    function f_kw_simple(a::Int=1, b::Float64=1.0; x::Char='a', y::Bool=true)
+        (a, b, x, y)
+    end
+    """)
+
+    @test test_mod.f_kw_simple()               === (1, 1.0, 'a', true)
+    @test test_mod.f_kw_simple(x='b')          === (1, 1.0, 'b', true)
+    @test test_mod.f_kw_simple(y=false)        === (1, 1.0, 'a', false)
+    @test test_mod.f_kw_simple(x='b', y=false) === (1, 1.0, 'b', false)
+
+    @test test_mod.f_kw_simple(20)                 === (20, 1.0, 'a', true)
+    @test test_mod.f_kw_simple(20; x='b')          === (20, 1.0, 'b', true)
+    @test test_mod.f_kw_simple(20; y=false)        === (20, 1.0, 'a', false)
+    @test test_mod.f_kw_simple(20; x='b', y=false) === (20, 1.0, 'b', false)
+
+    @test test_mod.f_kw_simple(20, 2.0)                 === (20, 2.0, 'a', true)
+    @test test_mod.f_kw_simple(20, 2.0; x='b')          === (20, 2.0, 'b', true)
+    @test test_mod.f_kw_simple(20, 2.0; y=false)        === (20, 2.0, 'a', false)
+    @test test_mod.f_kw_simple(20, 2.0; x='b', y=false) === (20, 2.0, 'b', false)
+
+    # Bad types for keyword args throw a type error
+    @test_throws(TypeError(Symbol("keyword argument"), :x, Char, 100),
+                 test_mod.f_kw_simple(x=100))
+    @test_throws(TypeError(Symbol("keyword argument"), :y, Bool, 100),
+                 test_mod.f_kw_simple(y=100))
+
+    # Keywords which aren't present throw an error
+    try
+        test_mod.f_kw_simple(20; not_present=100)
+        @test false
+    catch exc
+        @test exc isa MethodError
+        @test exc.f == Core.kwcall
+        @test exc.args == ((; not_present=100), test_mod.f_kw_simple, 20, 1.0)
+    end
+
+    # Slurping of positional args with keywords
+    JuliaLowering.include_string(test_mod, """
+    function f_pos_slurp_with_kws(z, args...; x=1,y=2)
+        args
+    end
+    """)
+    @test test_mod.f_pos_slurp_with_kws(3, 2, 1; x = 100) === (2,1)
+    @test test_mod.f_pos_slurp_with_kws(3, 2, 1) === (2,1)
+
+    # Slurping of keyword args
+    JuliaLowering.include_string(test_mod, """
+    function f_kw_slurp_all(; kws...)
+        kws
+    end
+    """)
+    @test values(test_mod.f_kw_slurp_all(x = 1, y = 2)) === (x=1, y=2)
+    @test values(test_mod.f_kw_slurp_all()) === (;)
+
+    # Slurping of keyword args
+    JuliaLowering.include_string(test_mod, """
+    function f_kw_slurp_some(; x=1, y=2, kws...)
+        kws
+    end
+    """)
+    @test values(test_mod.f_kw_slurp_some(z=3, x = 1, y = 2, w=4)) === (z=3, w=4)
+    @test values(test_mod.f_kw_slurp_some(x = 1)) === (;)
+    @test values(test_mod.f_kw_slurp_some()) === (;)
+
+    # Keyword defaults which depend on other keywords.
+    JuliaLowering.include_string(test_mod, """
+    begin
+        aaa = :outer
+        function f_kw_default_dependencies(; x=1, y=x, bbb=aaa, aaa=:aaa_kw, ccc=aaa)
+            (x, y, bbb, aaa, ccc)
+        end
+    end
+    """)
+    @test values(test_mod.f_kw_default_dependencies()) === (1, 1, :outer, :aaa_kw, :aaa_kw)
+    @test values(test_mod.f_kw_default_dependencies(x = 10)) === (10, 10, :outer, :aaa_kw, :aaa_kw)
+    @test values(test_mod.f_kw_default_dependencies(x = 10, aaa=:blah)) === (10, 10, :outer, :blah, :blah)
+
+    # Keywords with static parameters
+    JuliaLowering.include_string(test_mod, """
+    function f_kw_sparams(x::X, y::Y; a::A, b::B) where {X,Y,A,B}
+        (X,Y,A,B)
+    end
+    """)
+    @test values(test_mod.f_kw_sparams(1, 1.0; a="a", b='b')) === (Int, Float64, String, Char)
+
+    # Keywords with static parameters, where some keyword types can be inferred
+    # based on the positional parameters and others cannot.
+    JuliaLowering.include_string(test_mod, """
+    function f_kw_type_errors(x::X; a::F, b::X) where {X<:Integer,F<:AbstractFloat}
+        (X,F)
+    end
+    """)
+    @test values(test_mod.f_kw_type_errors(1; a=1.0, b=10)) === (Int, Float64)
+    # The following is a keyword TypeError because we can infer `X` based on
+    # the positional parameters and use that to check the type of `b`.
+    @test_throws TypeError values(test_mod.f_kw_type_errors(1; a=1.0, b="str"))
+    # The following is only a method error as we can't infer `F` prior to
+    # dispatching to the body function.
+    @test_throws MethodError values(test_mod.f_kw_type_errors(1; a="str", b=10))
+
+    # Throwing of UndefKeywordError
+    JuliaLowering.include_string(test_mod, """
+    function f_kw_no_default(; x)
+        x
+    end
+    """)
+    @test test_mod.f_kw_no_default(x = 10) == 10
+    @test_throws UndefKeywordError(:x) test_mod.f_kw_no_default() == 10
+
+    # Closure with keywords
+    cl = JuliaLowering.include_string(test_mod, """
+    let y = 1
+        function f_kw_closure(; x=10)
+            x + y
+        end
+    end
+    """)
+    @test cl() == 11
+    @test cl(x = 20) == 21
+end
+
+@testset "Write-only placeholder function arguments" begin
+    # positional arguments may be duplicate placeholders.  keyword arguments can
+    # contain placeholders, but they must be unique
+    params_req = [""
+                  "_"
+                  "::Int"
+                  "_, _"]
+    params_opt = [""
+                  "::Int=2"
+                  "_=2"]
+    params_va  = ["", "_..."]
+    params_kw  = [""
+                  "; _"
+                  "; _::Int"
+                  "; _::Int=1"
+                  "; _=1, __=2"
+                  "; _..."
+                  "; _=1, __..."]
+    local i = 0
+    for req in params_req, opt in params_opt, va in params_va, kw in params_kw
+        arg_str = join(filter(!isempty, (req, opt, va, kw)), ", ")
+        f_str = "function f_placeholders$i($arg_str); end"
+        i += 1
+        @testset "$f_str" begin
+            @test JuliaLowering.include_string(test_mod, f_str) isa Function
+        end
+    end
+end
+
+@testset "Generated functions" begin
+    for expr_compat_mode in (false, true)
+    @test JuliaLowering.include_string(test_mod, raw"""
+    begin
+        @generated function f_gen(x::NTuple{N,T}) where {N,T}
+            quote
+                ($x, $N, $T)
+            end
+        end
+
+        f_gen((1,2,3,4,5))
+    end
+    """; expr_compat_mode) == (NTuple{5,Int}, 5, Int)
+
+    @test JuliaLowering.include_string(test_mod, """
+    begin
+        @generated function f_gen_unnamed_args(::Type{T}, y, ::Type{U}) where {T, U}
+            return (T, y, U)
+        end
+
+        f_gen_unnamed_args(Int, UInt8(3), Float64)
+    end
+    """; expr_compat_mode) == (Int, UInt8, Float64)
+
+    @test JuliaLowering.include_string(test_mod, raw"""
+    begin
+        function f_partially_gen(x::NTuple{N,T}) where {N,T}
+            shared = :shared_stuff
+            if @generated
+                if N == 2
+                    error("intentionally broken codegen (will trigger nongen branch)")
+                end
+                quote
+                    unshared = (:gen, ($x, $N, $T))
+                end
+            else
+                unshared = (:nongen, (typeof(x), N, T))
+            end
+            (shared, unshared)
+        end
+
+        (f_partially_gen((1,2)), f_partially_gen((1,2,3,4,5)))
+    end
+    """; expr_compat_mode) ==
+        ((:shared_stuff, (:nongen, (NTuple{2,Int}, 2, Int))),
+         (:shared_stuff, (:gen, (NTuple{5,Int}, 5, Int))))
+
+    @test JuliaLowering.include_string(test_mod, raw"""
+    begin
+        @generated function f_gen_calls_macros(x::T) where {T}
+            s = @raw_str "foo"
+            :(@raw_str $s)
+        end
+        f_gen_calls_macros(1)
+    end
+    """; expr_compat_mode) === "foo"
+    end
+
+    # Test generated function edges to bindings
+    # (see also https://github.com/JuliaLang/julia/pull/57230)
+    JuliaLowering.include_string(test_mod, raw"""
+    const delete_me = 4
+    @generated f_generated_return_delete_me() = return :(delete_me)
+    """)
+    @test test_mod.f_generated_return_delete_me() == 4
+    Base.delete_binding(test_mod, :delete_me)
+    @test_throws UndefVarError test_mod.f_generated_return_delete_me()
+end
+
+@testset "Broadcast" begin
+    @test JuliaLowering.include_string(test_mod, """
+    let x = [1,2], y = [3,4], z = [5,6]
+        x .* y .+ z
+    end
+    """) == [8, 14]
+
+    @test JuliaLowering.include_string(test_mod, """
+    let nums = [1, 2, 3]
+        string.(nums, base=2; pad=2)
+    end
+    """) == ["01", "10", "11"]
+
+    @test JuliaLowering.include_string(test_mod, """
+    let lhs = [0,0], x = [1,2], y = [3,4], z = [5,6]
+        lhs .= x .* y .+ z
+        lhs
+    end
+    """) == [8, 14]
+
+    @test JuliaLowering.include_string(test_mod, """
+    [1,2] .+ ([3,4] .< [5,6] .< [7,1])
+    """) == [2, 2]
+
+    @test JuliaLowering.include_string(test_mod, """
+    let
+        x = [0,0,0,0]
+        x[begin+1:end-1] .= [1,2] .+ [3,4]
+        x
+    end
+    """) == [0,4,6,0]
+end
+
+end
diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl
new file mode 100644
index 0000000000000..c6de753de71ca
--- /dev/null
+++ b/JuliaLowering/test/functions_ir.jl
@@ -0,0 +1,1619 @@
+########################################
+# Function declaration with no methods
+function f
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (return %₃)
+
+########################################
+# Functions with placeholder arg
+function f(x, _, y)
+    x + y
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ core.Any core.Any core.Any)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/#arg2#(!read) slot₄/y]
+    1   TestMod.+
+    2   (call %₁ slot₂/x slot₄/y)
+    3   (return %₂)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Functions with argument types only, no name
+function f(::T, x)
+    x
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   TestMod.T
+6   (call core.svec %₄ %₅ core.Any)
+7   (call core.svec)
+8   SourceLocation::1:10
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read) slot₃/x]
+    1   slot₃/x
+    2   (return %₁)
+11  latestworld
+12  TestMod.f
+13  (return %₁₂)
+
+########################################
+# Functions argument types
+function f(x, y::T)
+    body
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   TestMod.T
+6   (call core.svec %₄ core.Any %₅)
+7   (call core.svec)
+8   SourceLocation::1:10
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)]
+    1   TestMod.body
+    2   (return %₁)
+11  latestworld
+12  TestMod.f
+13  (return %₁₂)
+
+########################################
+# Functions with slurp of Any
+function f(x, ys...)
+    body
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.apply_type core.Vararg core.Any)
+6   (call core.svec %₄ core.Any %₅)
+7   (call core.svec)
+8   SourceLocation::1:10
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)]
+    1   TestMod.body
+    2   (return %₁)
+11  latestworld
+12  TestMod.f
+13  (return %₁₂)
+
+########################################
+# Functions with slurp of T
+function f(x, ys::T...)
+    body
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   TestMod.T
+6   (call core.apply_type core.Vararg %₅)
+7   (call core.svec %₄ core.Any %₆)
+8   (call core.svec)
+9   SourceLocation::1:10
+10  (call core.svec %₇ %₈ %₉)
+11  --- method core.nothing %₁₀
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)]
+    1   TestMod.body
+    2   (return %₁)
+12  latestworld
+13  TestMod.f
+14  (return %₁₃)
+
+########################################
+# Error: Function with slurp not in last position arg
+function f(xs..., y)
+    body
+end
+#---------------------
+LoweringError:
+function f(xs..., y)
+#          └───┘ ── `...` may only be used for the last positional argument
+    body
+end
+
+########################################
+# Basic static parameters
+function f(::T, ::U, ::V) where T where {U,V}
+    (T,U,V)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   (= slot₂/U (call core.TypeVar :U))
+4   (= slot₃/V (call core.TypeVar :V))
+5   (= slot₁/T (call core.TypeVar :T))
+6   TestMod.f
+7   (call core.Typeof %₆)
+8   slot₁/T
+9   slot₂/U
+10  slot₃/V
+11  (call core.svec %₇ %₈ %₉ %₁₀)
+12  slot₂/U
+13  slot₃/V
+14  slot₁/T
+15  (call core.svec %₁₂ %₁₃ %₁₄)
+16  SourceLocation::1:10
+17  (call core.svec %₁₁ %₁₅ %₁₆)
+18  --- method core.nothing %₁₇
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read) slot₃/#arg2#(!read) slot₄/#arg3#(!read)]
+    1   static_parameter₃
+    2   static_parameter₁
+    3   static_parameter₂
+    4   (call core.tuple %₁ %₂ %₃)
+    5   (return %₄)
+19  latestworld
+20  TestMod.f
+21  (return %₂₀)
+
+########################################
+# Static parameter with bounds and used with apply_type in argument
+function f(::S{T}) where X <: T <: Y
+    T
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.X
+4   TestMod.Y
+5   (= slot₁/T (call core.TypeVar :T %₃ %₄))
+6   TestMod.f
+7   (call core.Typeof %₆)
+8   TestMod.S
+9   slot₁/T
+10  (call core.apply_type %₈ %₉)
+11  (call core.svec %₇ %₁₀)
+12  slot₁/T
+13  (call core.svec %₁₂)
+14  SourceLocation::1:10
+15  (call core.svec %₁₁ %₁₃ %₁₄)
+16  --- method core.nothing %₁₅
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read)]
+    1   static_parameter₁
+    2   (return %₁)
+17  latestworld
+18  TestMod.f
+19  (return %₁₈)
+
+########################################
+# Static parameter with lower bound
+function f(::S{T}) where T >: X
+    T
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.X
+4   (= slot₁/T (call core.TypeVar :T %₃ core.Any))
+5   TestMod.f
+6   (call core.Typeof %₅)
+7   TestMod.S
+8   slot₁/T
+9   (call core.apply_type %₇ %₈)
+10  (call core.svec %₆ %₉)
+11  slot₁/T
+12  (call core.svec %₁₁)
+13  SourceLocation::1:10
+14  (call core.svec %₁₀ %₁₂ %₁₃)
+15  --- method core.nothing %₁₄
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read)]
+    1   static_parameter₁
+    2   (return %₁)
+16  latestworld
+17  TestMod.f
+18  (return %₁₇)
+
+########################################
+# Static parameter which is used only in the bounds of another static parameter
+# See https://github.com/JuliaLang/julia/issues/49275
+function f(x, y::S) where {T, S<:AbstractVector{T}}
+    (T,S)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   (= slot₂/T (call core.TypeVar :T))
+4   TestMod.AbstractVector
+5   slot₂/T
+6   (call core.apply_type %₄ %₅)
+7   (= slot₁/S (call core.TypeVar :S %₆))
+8   TestMod.f
+9   (call core.Typeof %₈)
+10  slot₁/S
+11  (call core.svec %₉ core.Any %₁₀)
+12  slot₂/T
+13  slot₁/S
+14  (call core.svec %₁₂ %₁₃)
+15  SourceLocation::1:10
+16  (call core.svec %₁₁ %₁₄ %₁₅)
+17  --- method core.nothing %₁₆
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)]
+    1   static_parameter₁
+    2   static_parameter₂
+    3   (call core.tuple %₁ %₂)
+    4   (return %₃)
+18  latestworld
+19  TestMod.f
+20  (return %₁₉)
+
+########################################
+# Error: Static parameter which is unused
+function f(::T) where {T,S}
+    (T,S)
+end
+#---------------------
+LoweringError:
+function f(::T) where {T,S}
+#                        ╙ ── Method definition declares type variable but does not use it in the type of any function parameter
+    (T,S)
+end
+
+########################################
+# Return types
+function f(x)::Int
+    if x
+        42.0
+    end
+    0xff
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ core.Any)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read)]
+    1   TestMod.Int
+    2   (gotoifnot slot₂/x label₃)
+    3   (= slot₃/tmp 0xff)
+    4   slot₃/tmp
+    5   (call core.isa %₄ %₁)
+    6   (gotoifnot %₅ label₈)
+    7   (goto label₁₁)
+    8   slot₃/tmp
+    9   (call top.convert %₁ %₈)
+    10  (= slot₃/tmp (call core.typeassert %₉ %₁))
+    11  slot₃/tmp
+    12  (return %₁₁)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Callable type
+function (::T)(x)
+    x
+end
+#---------------------
+1   TestMod.T
+2   (call core.svec %₁ core.Any)
+3   (call core.svec)
+4   SourceLocation::1:10
+5   (call core.svec %₂ %₃ %₄)
+6   --- method core.nothing %₅
+    slots: [slot₁/#self#(!read) slot₂/x]
+    1   slot₂/x
+    2   (return %₁)
+7   latestworld
+8   (return core.nothing)
+
+########################################
+# Callable type with instance
+function (y::T)(x)
+    (y, x)
+end
+#---------------------
+1   TestMod.T
+2   (call core.svec %₁ core.Any)
+3   (call core.svec)
+4   SourceLocation::1:10
+5   (call core.svec %₂ %₃ %₄)
+6   --- method core.nothing %₅
+    slots: [slot₁/y slot₂/x]
+    1   (call core.tuple slot₁/y slot₂/x)
+    2   (return %₁)
+7   latestworld
+8   (return core.nothing)
+
+########################################
+# `where` params used in callable object type
+function (x::X1{T})() where T
+    T
+end
+#---------------------
+1   (= slot₁/T (call core.TypeVar :T))
+2   TestMod.X1
+3   slot₁/T
+4   (call core.apply_type %₂ %₃)
+5   (call core.svec %₄)
+6   slot₁/T
+7   (call core.svec %₆)
+8   SourceLocation::1:10
+9   (call core.svec %₅ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/x(!read)]
+    1   static_parameter₁
+    2   (return %₁)
+11  latestworld
+12  (return core.nothing)
+
+########################################
+# Function with module ref in name
+function A.f()
+end
+#---------------------
+1   TestMod.A
+2   (call top.getproperty %₁ :f)
+3   (call core.Typeof %₂)
+4   (call core.svec %₃)
+5   (call core.svec)
+6   SourceLocation::1:10
+7   (call core.svec %₄ %₅ %₆)
+8   --- method core.nothing %₇
+    slots: [slot₁/#self#(!read)]
+    1   (return core.nothing)
+9   latestworld
+10  (return core.nothing)
+
+########################################
+# Error: Invalid dotop function name
+function (.+)(x,y)
+end
+#---------------------
+LoweringError:
+function (.+)(x,y)
+#        └───────┘ ── Cannot define function using `.` broadcast syntax
+end
+
+########################################
+# Error: Invalid function name
+function f[](x,y)
+end
+#---------------------
+LoweringError:
+function f[](x,y)
+#        └─┘ ── Invalid function name
+end
+
+########################################
+# Simple positional args with defaults
+function f(x::T, y::S=1, z::U=2)
+    (x,y)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   TestMod.T
+6   (call core.svec %₄ %₅)
+7   (call core.svec)
+8   SourceLocation::1:10
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(called) slot₂/x]
+    1   (call slot₁/#self# slot₂/x 1 2)
+    2   (return %₁)
+11  latestworld
+12  TestMod.f
+13  (call core.Typeof %₁₂)
+14  TestMod.T
+15  TestMod.S
+16  (call core.svec %₁₃ %₁₄ %₁₅)
+17  (call core.svec)
+18  SourceLocation::1:10
+19  (call core.svec %₁₆ %₁₇ %₁₈)
+20  --- method core.nothing %₁₉
+    slots: [slot₁/#self#(called) slot₂/x slot₃/y]
+    1   (call slot₁/#self# slot₂/x slot₃/y 2)
+    2   (return %₁)
+21  latestworld
+22  TestMod.f
+23  (call core.Typeof %₂₂)
+24  TestMod.T
+25  TestMod.S
+26  TestMod.U
+27  (call core.svec %₂₃ %₂₄ %₂₅ %₂₆)
+28  (call core.svec)
+29  SourceLocation::1:10
+30  (call core.svec %₂₇ %₂₈ %₂₉)
+31  --- method core.nothing %₃₀
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z(!read)]
+    1   (call core.tuple slot₂/x slot₃/y)
+    2   (return %₁)
+32  latestworld
+33  TestMod.f
+34  (return %₃₃)
+
+########################################
+# Default positional args which depend on other args
+function f(x=1, y=x)
+    (x,y)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(called)]
+    1   (call slot₁/#self# 1)
+    2   (return %₁)
+10  latestworld
+11  TestMod.f
+12  (call core.Typeof %₁₁)
+13  (call core.svec %₁₂ core.Any)
+14  (call core.svec)
+15  SourceLocation::1:10
+16  (call core.svec %₁₃ %₁₄ %₁₅)
+17  --- method core.nothing %₁₆
+    slots: [slot₁/#self#(called) slot₂/x]
+    1   (call slot₁/#self# slot₂/x slot₂/x)
+    2   (return %₁)
+18  latestworld
+19  TestMod.f
+20  (call core.Typeof %₁₉)
+21  (call core.svec %₂₀ core.Any core.Any)
+22  (call core.svec)
+23  SourceLocation::1:10
+24  (call core.svec %₂₁ %₂₂ %₂₃)
+25  --- method core.nothing %₂₄
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/y]
+    1   (call core.tuple slot₂/x slot₃/y)
+    2   (return %₁)
+26  latestworld
+27  TestMod.f
+28  (return %₂₇)
+
+########################################
+# Default positional args with missing arg names (implicit placeholders)
+function f(::Int, y=1, z=2)
+    (y, z)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   TestMod.Int
+6   (call core.svec %₄ %₅)
+7   (call core.svec)
+8   SourceLocation::1:10
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(called) slot₂/#arg1#]
+    1   (call slot₁/#self# slot₂/#arg1# 1 2)
+    2   (return %₁)
+11  latestworld
+12  TestMod.f
+13  (call core.Typeof %₁₂)
+14  TestMod.Int
+15  (call core.svec %₁₃ %₁₄ core.Any)
+16  (call core.svec)
+17  SourceLocation::1:10
+18  (call core.svec %₁₅ %₁₆ %₁₇)
+19  --- method core.nothing %₁₈
+    slots: [slot₁/#self#(called) slot₂/#arg1# slot₃/y]
+    1   (call slot₁/#self# slot₂/#arg1# slot₃/y 2)
+    2   (return %₁)
+20  latestworld
+21  TestMod.f
+22  (call core.Typeof %₂₁)
+23  TestMod.Int
+24  (call core.svec %₂₂ %₂₃ core.Any core.Any)
+25  (call core.svec)
+26  SourceLocation::1:10
+27  (call core.svec %₂₄ %₂₅ %₂₆)
+28  --- method core.nothing %₂₇
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read) slot₃/y slot₄/z]
+    1   (call core.tuple slot₃/y slot₄/z)
+    2   (return %₁)
+29  latestworld
+30  TestMod.f
+31  (return %₃₀)
+
+########################################
+# Default positional args with placeholders
+function f(_::Int, x=1)
+    x
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   TestMod.Int
+6   (call core.svec %₄ %₅)
+7   (call core.svec)
+8   SourceLocation::1:10
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(called) slot₂/#arg1#]
+    1   (call slot₁/#self# slot₂/#arg1# 1)
+    2   (return %₁)
+11  latestworld
+12  TestMod.f
+13  (call core.Typeof %₁₂)
+14  TestMod.Int
+15  (call core.svec %₁₃ %₁₄ core.Any)
+16  (call core.svec)
+17  SourceLocation::1:10
+18  (call core.svec %₁₅ %₁₆ %₁₇)
+19  --- method core.nothing %₁₈
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read) slot₃/x]
+    1   slot₃/x
+    2   (return %₁)
+20  latestworld
+21  TestMod.f
+22  (return %₂₁)
+
+########################################
+# Positional args with defaults and `where` clauses
+function f(x::T, y::S=1, z::U=2) where {T,S<:T,U<:S}
+    (x,y,z)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   (= slot₂/T (call core.TypeVar :T))
+4   slot₂/T
+5   (= slot₁/S (call core.TypeVar :S %₄))
+6   slot₁/S
+7   (= slot₃/U (call core.TypeVar :U %₆))
+8   TestMod.f
+9   (call core.Typeof %₈)
+10  slot₂/T
+11  (call core.svec %₉ %₁₀)
+12  slot₂/T
+13  (call core.svec %₁₂)
+14  SourceLocation::1:10
+15  (call core.svec %₁₁ %₁₃ %₁₄)
+16  --- method core.nothing %₁₅
+    slots: [slot₁/#self#(called) slot₂/x]
+    1   (call slot₁/#self# slot₂/x 1 2)
+    2   (return %₁)
+17  latestworld
+18  TestMod.f
+19  (call core.Typeof %₁₈)
+20  slot₂/T
+21  slot₁/S
+22  (call core.svec %₁₉ %₂₀ %₂₁)
+23  slot₂/T
+24  slot₁/S
+25  (call core.svec %₂₃ %₂₄)
+26  SourceLocation::1:10
+27  (call core.svec %₂₂ %₂₅ %₂₆)
+28  --- method core.nothing %₂₇
+    slots: [slot₁/#self#(called) slot₂/x slot₃/y]
+    1   (call slot₁/#self# slot₂/x slot₃/y 2)
+    2   (return %₁)
+29  latestworld
+30  TestMod.f
+31  (call core.Typeof %₃₀)
+32  slot₂/T
+33  slot₁/S
+34  slot₃/U
+35  (call core.svec %₃₁ %₃₂ %₃₃ %₃₄)
+36  slot₂/T
+37  slot₁/S
+38  slot₃/U
+39  (call core.svec %₃₆ %₃₇ %₃₈)
+40  SourceLocation::1:10
+41  (call core.svec %₃₅ %₃₉ %₄₀)
+42  --- method core.nothing %₄₁
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z]
+    1   (call core.tuple slot₂/x slot₃/y slot₄/z)
+    2   (return %₁)
+43  latestworld
+44  TestMod.f
+45  (return %₄₄)
+
+########################################
+# Positional args and type parameters with transitive dependencies
+# See https://github.com/JuliaLang/julia/issues/49275 - the first method
+# generated here for only `x` should contain zero type parameters.
+function f(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U}
+    (x, y, z, T, S, U)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   (= slot₂/T (call core.TypeVar :T))
+4   TestMod.AbstractVector
+5   slot₂/T
+6   (call core.apply_type %₄ %₅)
+7   (= slot₁/S (call core.TypeVar :S %₆))
+8   (= slot₃/U (call core.TypeVar :U))
+9   TestMod.f
+10  (call core.Typeof %₉)
+11  (call core.svec %₁₀ core.Any)
+12  (call core.svec)
+13  SourceLocation::1:10
+14  (call core.svec %₁₁ %₁₂ %₁₃)
+15  --- method core.nothing %₁₄
+    slots: [slot₁/#self#(called) slot₂/x]
+    1   (call top.vect 1)
+    2   (call slot₁/#self# slot₂/x %₁ 2)
+    3   (return %₂)
+16  latestworld
+17  TestMod.f
+18  (call core.Typeof %₁₇)
+19  slot₁/S
+20  (call core.svec %₁₈ core.Any %₁₉)
+21  slot₂/T
+22  slot₁/S
+23  (call core.svec %₂₁ %₂₂)
+24  SourceLocation::1:10
+25  (call core.svec %₂₀ %₂₃ %₂₄)
+26  --- method core.nothing %₂₅
+    slots: [slot₁/#self#(called) slot₂/x slot₃/y]
+    1   (call slot₁/#self# slot₂/x slot₃/y 2)
+    2   (return %₁)
+27  latestworld
+28  TestMod.f
+29  (call core.Typeof %₂₈)
+30  slot₁/S
+31  slot₃/U
+32  (call core.svec %₂₉ core.Any %₃₀ %₃₁)
+33  slot₂/T
+34  slot₁/S
+35  slot₃/U
+36  (call core.svec %₃₃ %₃₄ %₃₅)
+37  SourceLocation::1:10
+38  (call core.svec %₃₂ %₃₆ %₃₇)
+39  --- method core.nothing %₃₈
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z]
+    1   static_parameter₁
+    2   static_parameter₂
+    3   static_parameter₃
+    4   (call core.tuple slot₂/x slot₃/y slot₄/z %₁ %₂ %₃)
+    5   (return %₄)
+40  latestworld
+41  TestMod.f
+42  (return %₄₁)
+
+########################################
+# Default positional args are allowed before trailing slurp with no default
+function f(x=1, ys...)
+    ys
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(called)]
+    1   (call slot₁/#self# 1)
+    2   (return %₁)
+10  latestworld
+11  TestMod.f
+12  (call core.Typeof %₁₁)
+13  (call core.apply_type core.Vararg core.Any)
+14  (call core.svec %₁₂ core.Any %₁₃)
+15  (call core.svec)
+16  SourceLocation::1:10
+17  (call core.svec %₁₄ %₁₅ %₁₆)
+18  --- method core.nothing %₁₇
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys]
+    1   slot₃/ys
+    2   (return %₁)
+19  latestworld
+20  TestMod.f
+21  (return %₂₀)
+
+########################################
+# Error: Default positional args before non-default arg
+function f(x=1, ys, z=2)
+    ys
+end
+#---------------------
+LoweringError:
+function f(x=1, ys, z=2)
+#          └─┘ ── optional positional arguments must occur at end
+    ys
+end
+
+########################################
+# Positional arg with slurp and default
+function f(xs...=1)
+    xs
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(called)]
+    1   (call slot₁/#self# 1)
+    2   (return %₁)
+10  latestworld
+11  TestMod.f
+12  (call core.Typeof %₁₁)
+13  (call core.apply_type core.Vararg core.Any)
+14  (call core.svec %₁₂ %₁₃)
+15  (call core.svec)
+16  SourceLocation::1:10
+17  (call core.svec %₁₄ %₁₅ %₁₆)
+18  --- method core.nothing %₁₇
+    slots: [slot₁/#self#(!read) slot₂/xs]
+    1   slot₂/xs
+    2   (return %₁)
+19  latestworld
+20  TestMod.f
+21  (return %₂₀)
+
+########################################
+# Positional arg with slurp and splatted default value
+function f(xs...=(1,2)...)
+    xs
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#]
+    1   (call core.tuple 1 2)
+    2   (call core._apply_iterate top.iterate slot₁/#self# %₁)
+    3   (return %₂)
+10  latestworld
+11  TestMod.f
+12  (call core.Typeof %₁₁)
+13  (call core.apply_type core.Vararg core.Any)
+14  (call core.svec %₁₂ %₁₃)
+15  (call core.svec)
+16  SourceLocation::1:10
+17  (call core.svec %₁₄ %₁₅ %₁₆)
+18  --- method core.nothing %₁₇
+    slots: [slot₁/#self#(!read) slot₂/xs]
+    1   slot₂/xs
+    2   (return %₁)
+19  latestworld
+20  TestMod.f
+21  (return %₂₀)
+
+########################################
+# Trivial function argument destructuring
+function f(x, (y,z), w)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ core.Any core.Any core.Any)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/destructured_arg slot₄/w(!read) slot₅/iterstate slot₆/y(!read) slot₇/z(!read)]
+    1   (call top.indexed_iterate slot₃/destructured_arg 1)
+    2   (= slot₆/y (call core.getfield %₁ 1))
+    3   (= slot₅/iterstate (call core.getfield %₁ 2))
+    4   slot₅/iterstate
+    5   (call top.indexed_iterate slot₃/destructured_arg 2 %₄)
+    6   (= slot₇/z (call core.getfield %₅ 1))
+    7   (return core.nothing)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Function argument destructuring combined with splats, types and and defaults
+function f((x,)::T...=rhs)
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(called)]
+    1   TestMod.rhs
+    2   (call slot₁/#self# %₁)
+    3   (return %₂)
+10  latestworld
+11  TestMod.f
+12  (call core.Typeof %₁₁)
+13  TestMod.T
+14  (call core.apply_type core.Vararg %₁₃)
+15  (call core.svec %₁₂ %₁₄)
+16  (call core.svec)
+17  SourceLocation::1:10
+18  (call core.svec %₁₅ %₁₆ %₁₇)
+19  --- method core.nothing %₁₈
+    slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/x(!read)]
+    1   (call top.indexed_iterate slot₂/destructured_arg 1)
+    2   (= slot₃/x (call core.getfield %₁ 1))
+    3   (return core.nothing)
+20  latestworld
+21  TestMod.f
+22  (return %₂₁)
+
+########################################
+# Function argument destructuring combined with splats, types and and defaults
+function f(x=default_x)::T
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(called)]
+    1   TestMod.default_x
+    2   (call slot₁/#self# %₁)
+    3   (return %₂)
+10  latestworld
+11  TestMod.f
+12  (call core.Typeof %₁₁)
+13  (call core.svec %₁₂ core.Any)
+14  (call core.svec)
+15  SourceLocation::1:10
+16  (call core.svec %₁₃ %₁₄ %₁₅)
+17  --- method core.nothing %₁₆
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/tmp(!read)]
+    1   TestMod.T
+    2   (= slot₃/tmp core.nothing)
+    3   slot₃/tmp
+    4   (call core.isa %₃ %₁)
+    5   (gotoifnot %₄ label₇)
+    6   (goto label₁₀)
+    7   slot₃/tmp
+    8   (call top.convert %₁ %₇)
+    9   (= slot₃/tmp (call core.typeassert %₈ %₁))
+    10  slot₃/tmp
+    11  (return %₁₀)
+18  latestworld
+19  TestMod.f
+20  (return %₁₉)
+
+########################################
+# Duplicate positional placeholders ok
+function f(_, _); end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ core.Any core.Any)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read) slot₃/#arg2#(!read)]
+    1   (return core.nothing)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Duplicate destructured placeholders ok
+function f((_,), (_,))
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ core.Any core.Any)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/destructured_arg]
+    1   (call top.indexed_iterate slot₂/destructured_arg 1)
+    2   (call core.getfield %₁ 1)
+    3   (call top.indexed_iterate slot₃/destructured_arg 1)
+    4   (call core.getfield %₃ 1)
+    5   (return core.nothing)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Slot flags
+function f(@nospecialize(x), g, y)
+    g() + y
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ core.Any core.Any core.Any)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/x(nospecialize,!read) slot₃/g(called) slot₄/y]
+    1   TestMod.+
+    2   (call slot₃/g)
+    3   (call %₁ %₂ slot₄/y)
+    4   (return %₃)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Function return without arguments
+function f()
+    return
+    after_return # <- distinguish output from implicit return
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read)]
+    1   (return core.nothing)
+    2   TestMod.after_return
+    3   (return %₂)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Function return in value position is allowed
+function f()
+    x = return 1
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/x(!read)]
+    1   (return 1)
+    2   (= slot₂/x core.nothing)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Binding docs to functions
+"""
+some docs
+"""
+function f()
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::4:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read)]
+    1   (return core.nothing)
+10  latestworld
+11  TestMod.f
+12  (call JuliaLowering.bind_docs! %₁₁ "some docs\n" %₈)
+13  TestMod.f
+14  (return %₁₃)
+
+########################################
+# Binding docs to callable type
+"""
+some docs
+"""
+function (x::T)()
+end
+#---------------------
+1   TestMod.T
+2   (call core.svec %₁)
+3   (call core.svec)
+4   SourceLocation::4:10
+5   (call core.svec %₂ %₃ %₄)
+6   --- method core.nothing %₅
+    slots: [slot₁/x(!read)]
+    1   (return core.nothing)
+7   latestworld
+8   TestMod.T
+9   (call JuliaLowering.bind_docs! %₈ "some docs\n" %₅)
+10  (return core.nothing)
+
+########################################
+# Keyword function with defaults.
+# Order of methods
+# 1. #f_kw_simple#0(x, y, ::typeof(f_kw_simple), a, b)  (body)
+# 2. Core.kwcall(kws, ::typeof(f_kw_simple))
+# 3. Core.kwcall(kws, ::typeof(f_kw_simple), a)
+# 4. Core.kwcall(kws, ::typeof(f_kw_simple), a, b)      (kwcall body)
+# 5. f_kw_simple()
+# 6. f_kw_simple(a)
+# 7. f_kw_simple(a, b)
+function f_kw_simple(a::Int=1, b::Float64=1.0; x::Char='a', y::Bool=true)
+    (a, b, x, y)
+end
+#---------------------
+1   (method TestMod.f_kw_simple)
+2   latestworld
+3   (method TestMod.#f_kw_simple#0)
+4   latestworld
+5   TestMod.#f_kw_simple#0
+6   (call core.Typeof %₅)
+7   TestMod.Char
+8   TestMod.Bool
+9   TestMod.f_kw_simple
+10  (call core.Typeof %₉)
+11  TestMod.Int
+12  TestMod.Float64
+13  (call core.svec %₆ %₇ %₈ %₁₀ %₁₁ %₁₂)
+14  (call core.svec)
+15  SourceLocation::1:10
+16  (call core.svec %₁₃ %₁₄ %₁₅)
+17  --- method core.nothing %₁₆
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/#self#(!read) slot₅/a slot₆/b]
+    1   (meta :nkw 2)
+    2   (call core.tuple slot₅/a slot₆/b slot₂/x slot₃/y)
+    3   (return %₂)
+18  latestworld
+19  (call core.typeof core.kwcall)
+20  TestMod.f_kw_simple
+21  (call core.Typeof %₂₀)
+22  (call core.svec %₁₉ core.NamedTuple %₂₁)
+23  (call core.svec)
+24  SourceLocation::1:10
+25  (call core.svec %₂₂ %₂₃ %₂₄)
+26  --- method core.nothing %₂₅
+    slots: [slot₁/#self#(called) slot₂/kws slot₃/#self#]
+    1   (call slot₁/#self# slot₂/kws slot₃/#self# 1 1.0)
+    2   (return %₁)
+27  latestworld
+28  (call core.typeof core.kwcall)
+29  TestMod.f_kw_simple
+30  (call core.Typeof %₂₉)
+31  TestMod.Int
+32  (call core.svec %₂₈ core.NamedTuple %₃₀ %₃₁)
+33  (call core.svec)
+34  SourceLocation::1:10
+35  (call core.svec %₃₂ %₃₃ %₃₄)
+36  --- method core.nothing %₃₅
+    slots: [slot₁/#self#(called) slot₂/kws slot₃/#self# slot₄/a]
+    1   (call slot₁/#self# slot₂/kws slot₃/#self# slot₄/a 1.0)
+    2   (return %₁)
+37  latestworld
+38  (call core.typeof core.kwcall)
+39  TestMod.f_kw_simple
+40  (call core.Typeof %₃₉)
+41  TestMod.Int
+42  TestMod.Float64
+43  (call core.svec %₃₈ core.NamedTuple %₄₀ %₄₁ %₄₂)
+44  (call core.svec)
+45  SourceLocation::1:10
+46  (call core.svec %₄₃ %₄₄ %₄₅)
+47  --- method core.nothing %₄₆
+    slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/a slot₅/b slot₆/kwtmp slot₇/x(!read) slot₈/y(!read)]
+    1   (newvar slot₇/x)
+    2   (newvar slot₈/y)
+    3   (call core.isdefined slot₂/kws :x)
+    4   (gotoifnot %₃ label₁₅)
+    5   (call core.getfield slot₂/kws :x)
+    6   TestMod.Char
+    7   (call core.isa %₅ %₆)
+    8   (gotoifnot %₇ label₁₀)
+    9   (goto label₁₃)
+    10  TestMod.Char
+    11  (new core.TypeError :keyword argument :x %₁₀ %₅)
+    12  (call core.throw %₁₁)
+    13  (= slot₆/kwtmp %₅)
+    14  (goto label₁₆)
+    15  (= slot₆/kwtmp 'a')
+    16  slot₆/kwtmp
+    17  (call core.isdefined slot₂/kws :y)
+    18  (gotoifnot %₁₇ label₂₉)
+    19  (call core.getfield slot₂/kws :y)
+    20  TestMod.Bool
+    21  (call core.isa %₁₉ %₂₀)
+    22  (gotoifnot %₂₁ label₂₄)
+    23  (goto label₂₇)
+    24  TestMod.Bool
+    25  (new core.TypeError :keyword argument :y %₂₄ %₁₉)
+    26  (call core.throw %₂₅)
+    27  (= slot₆/kwtmp %₁₉)
+    28  (goto label₃₀)
+    29  (= slot₆/kwtmp true)
+    30  slot₆/kwtmp
+    31  (call top.keys slot₂/kws)
+    32  (call core.tuple :x :y)
+    33  (call top.diff_names %₃₁ %₃₂)
+    34  (call top.isempty %₃₃)
+    35  (gotoifnot %₃₄ label₃₇)
+    36  (goto label₃₈)
+    37  (call top.kwerr slot₂/kws slot₃/#self# slot₄/a slot₅/b)
+    38  TestMod.#f_kw_simple#0
+    39  (call %₃₈ %₁₆ %₃₀ slot₃/#self# slot₄/a slot₅/b)
+    40  (return %₃₉)
+48  latestworld
+49  TestMod.f_kw_simple
+50  (call core.Typeof %₄₉)
+51  (call core.svec %₅₀)
+52  (call core.svec)
+53  SourceLocation::1:10
+54  (call core.svec %₅₁ %₅₂ %₅₃)
+55  --- method core.nothing %₅₄
+    slots: [slot₁/#self#(called)]
+    1   (call slot₁/#self# 1 1.0)
+    2   (return %₁)
+56  latestworld
+57  TestMod.f_kw_simple
+58  (call core.Typeof %₅₇)
+59  TestMod.Int
+60  (call core.svec %₅₈ %₅₉)
+61  (call core.svec)
+62  SourceLocation::1:10
+63  (call core.svec %₆₀ %₆₁ %₆₂)
+64  --- method core.nothing %₆₃
+    slots: [slot₁/#self#(called) slot₂/a]
+    1   (call slot₁/#self# slot₂/a 1.0)
+    2   (return %₁)
+65  latestworld
+66  TestMod.f_kw_simple
+67  (call core.Typeof %₆₆)
+68  TestMod.Int
+69  TestMod.Float64
+70  (call core.svec %₆₇ %₆₈ %₆₉)
+71  (call core.svec)
+72  SourceLocation::1:10
+73  (call core.svec %₇₀ %₇₁ %₇₂)
+74  --- method core.nothing %₇₃
+    slots: [slot₁/#self# slot₂/a slot₃/b]
+    1   TestMod.#f_kw_simple#0
+    2   (call %₁ 'a' true slot₁/#self# slot₂/a slot₃/b)
+    3   (return %₂)
+75  latestworld
+76  TestMod.f_kw_simple
+77  (return %₇₆)
+
+########################################
+# Error: Duplicate keyword placeholder name
+function f_kw_placeholders(; _=1, _=2); end
+#---------------------
+LoweringError:
+function f_kw_placeholders(; _=1, _=2); end
+#                                 ╙ ── function argument name not unique
+
+########################################
+# Keyword slurping - simple forwarding of all kws
+function f_kw_slurp_simple(; all_kws...)
+    all_kws
+end
+#---------------------
+1   (method TestMod.f_kw_slurp_simple)
+2   latestworld
+3   (method TestMod.#f_kw_slurp_simple#0)
+4   latestworld
+5   TestMod.#f_kw_slurp_simple#0
+6   (call core.Typeof %₅)
+7   (call top.pairs core.NamedTuple)
+8   TestMod.f_kw_slurp_simple
+9   (call core.Typeof %₈)
+10  (call core.svec %₆ %₇ %₉)
+11  (call core.svec)
+12  SourceLocation::1:10
+13  (call core.svec %₁₀ %₁₁ %₁₂)
+14  --- method core.nothing %₁₃
+    slots: [slot₁/#self#(!read) slot₂/all_kws slot₃/#self#(!read)]
+    1   (meta :nkw 1)
+    2   slot₂/all_kws
+    3   (return %₂)
+15  latestworld
+16  (call core.typeof core.kwcall)
+17  TestMod.f_kw_slurp_simple
+18  (call core.Typeof %₁₇)
+19  (call core.svec %₁₆ core.NamedTuple %₁₈)
+20  (call core.svec)
+21  SourceLocation::1:10
+22  (call core.svec %₁₉ %₂₀ %₂₁)
+23  --- method core.nothing %₂₂
+    slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/all_kws(!read)]
+    1   (newvar slot₄/all_kws)
+    2   (call top.pairs slot₂/kws)
+    3   TestMod.#f_kw_slurp_simple#0
+    4   (call %₃ %₂ slot₃/#self#)
+    5   (return %₄)
+24  latestworld
+25  TestMod.f_kw_slurp_simple
+26  (call core.Typeof %₂₅)
+27  (call core.svec %₂₆)
+28  (call core.svec)
+29  SourceLocation::1:10
+30  (call core.svec %₂₇ %₂₈ %₂₉)
+31  --- method core.nothing %₃₀
+    slots: [slot₁/#self#]
+    1   TestMod.#f_kw_slurp_simple#0
+    2   (call core.NamedTuple)
+    3   (call top.pairs %₂)
+    4   (call %₁ %₃ slot₁/#self#)
+    5   (return %₄)
+32  latestworld
+33  TestMod.f_kw_slurp_simple
+34  (return %₃₃)
+
+########################################
+# Keyword slurping
+function f_kw_slurp(; x=x_default, non_x_kws...)
+    all_kws
+end
+#---------------------
+1   (method TestMod.f_kw_slurp)
+2   latestworld
+3   (method TestMod.#f_kw_slurp#0)
+4   latestworld
+5   TestMod.#f_kw_slurp#0
+6   (call core.Typeof %₅)
+7   (call top.pairs core.NamedTuple)
+8   TestMod.f_kw_slurp
+9   (call core.Typeof %₈)
+10  (call core.svec %₆ core.Any %₇ %₉)
+11  (call core.svec)
+12  SourceLocation::1:10
+13  (call core.svec %₁₀ %₁₁ %₁₂)
+14  --- method core.nothing %₁₃
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/non_x_kws(!read) slot₄/#self#(!read)]
+    1   (meta :nkw 2)
+    2   TestMod.all_kws
+    3   (return %₂)
+15  latestworld
+16  (call core.typeof core.kwcall)
+17  TestMod.f_kw_slurp
+18  (call core.Typeof %₁₇)
+19  (call core.svec %₁₆ core.NamedTuple %₁₈)
+20  (call core.svec)
+21  SourceLocation::1:10
+22  (call core.svec %₁₉ %₂₀ %₂₁)
+23  --- method core.nothing %₂₂
+    slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/non_x_kws(!read) slot₆/x(!read)]
+    1   (newvar slot₅/non_x_kws)
+    2   (newvar slot₆/x)
+    3   (call core.isdefined slot₂/kws :x)
+    4   (gotoifnot %₃ label₈)
+    5   (call core.getfield slot₂/kws :x)
+    6   (= slot₄/kwtmp %₅)
+    7   (goto label₁₀)
+    8   TestMod.x_default
+    9   (= slot₄/kwtmp %₈)
+    10  slot₄/kwtmp
+    11  (call core.tuple :x)
+    12  (call core.apply_type core.NamedTuple %₁₁)
+    13  (call top.structdiff slot₂/kws %₁₂)
+    14  (call top.pairs %₁₃)
+    15  TestMod.#f_kw_slurp#0
+    16  (call %₁₅ %₁₀ %₁₄ slot₃/#self#)
+    17  (return %₁₆)
+24  latestworld
+25  TestMod.f_kw_slurp
+26  (call core.Typeof %₂₅)
+27  (call core.svec %₂₆)
+28  (call core.svec)
+29  SourceLocation::1:10
+30  (call core.svec %₂₇ %₂₈ %₂₉)
+31  --- method core.nothing %₃₀
+    slots: [slot₁/#self#]
+    1   TestMod.#f_kw_slurp#0
+    2   TestMod.x_default
+    3   (call core.NamedTuple)
+    4   (call top.pairs %₃)
+    5   (call %₁ %₂ %₄ slot₁/#self#)
+    6   (return %₅)
+32  latestworld
+33  TestMod.f_kw_slurp
+34  (return %₃₃)
+
+########################################
+# Static parameters used in keywords, with and without the static parameter
+# being present in positional argument types.
+#
+# Here the wrong type for `b` will get a `TypeError` but `A` will need to rely
+# on a MethodError.
+function f_kw_sparams(x::X; a::A=a_def, b::X=b_def) where {X,A}
+    (X,A)
+end
+#---------------------
+1   (method TestMod.f_kw_sparams)
+2   latestworld
+3   (method TestMod.#f_kw_sparams#0)
+4   latestworld
+5   (= slot₂/X (call core.TypeVar :X))
+6   (= slot₁/A (call core.TypeVar :A))
+7   TestMod.#f_kw_sparams#0
+8   (call core.Typeof %₇)
+9   slot₁/A
+10  slot₂/X
+11  TestMod.f_kw_sparams
+12  (call core.Typeof %₁₁)
+13  slot₂/X
+14  (call core.svec %₈ %₉ %₁₀ %₁₂ %₁₃)
+15  slot₂/X
+16  slot₁/A
+17  (call core.svec %₁₅ %₁₆)
+18  SourceLocation::1:10
+19  (call core.svec %₁₄ %₁₇ %₁₈)
+20  --- method core.nothing %₁₉
+    slots: [slot₁/#self#(!read) slot₂/a(!read) slot₃/b(!read) slot₄/#self#(!read) slot₅/x(!read)]
+    1   (meta :nkw 2)
+    2   static_parameter₁
+    3   static_parameter₂
+    4   (call core.tuple %₂ %₃)
+    5   (return %₄)
+21  latestworld
+22  (= slot₄/X (call core.TypeVar :X))
+23  (= slot₃/A (call core.TypeVar :A))
+24  (call core.typeof core.kwcall)
+25  TestMod.f_kw_sparams
+26  (call core.Typeof %₂₅)
+27  slot₄/X
+28  (call core.svec %₂₄ core.NamedTuple %₂₆ %₂₇)
+29  slot₄/X
+30  (call core.svec %₂₉)
+31  SourceLocation::1:10
+32  (call core.svec %₂₈ %₃₀ %₃₁)
+33  --- method core.nothing %₃₂
+    slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/x slot₅/kwtmp slot₆/a(!read) slot₇/b(!read)]
+    1   (newvar slot₆/a)
+    2   (newvar slot₇/b)
+    3   (call core.isdefined slot₂/kws :a)
+    4   (gotoifnot %₃ label₈)
+    5   (call core.getfield slot₂/kws :a)
+    6   (= slot₅/kwtmp %₅)
+    7   (goto label₁₀)
+    8   TestMod.a_def
+    9   (= slot₅/kwtmp %₈)
+    10  slot₅/kwtmp
+    11  (call core.isdefined slot₂/kws :b)
+    12  (gotoifnot %₁₁ label₂₃)
+    13  (call core.getfield slot₂/kws :b)
+    14  static_parameter₁
+    15  (call core.isa %₁₃ %₁₄)
+    16  (gotoifnot %₁₅ label₁₈)
+    17  (goto label₂₁)
+    18  static_parameter₁
+    19  (new core.TypeError :keyword argument :b %₁₈ %₁₃)
+    20  (call core.throw %₁₉)
+    21  (= slot₅/kwtmp %₁₃)
+    22  (goto label₂₅)
+    23  TestMod.b_def
+    24  (= slot₅/kwtmp %₂₃)
+    25  slot₅/kwtmp
+    26  (call top.keys slot₂/kws)
+    27  (call core.tuple :a :b)
+    28  (call top.diff_names %₂₆ %₂₇)
+    29  (call top.isempty %₂₈)
+    30  (gotoifnot %₂₉ label₃₂)
+    31  (goto label₃₃)
+    32  (call top.kwerr slot₂/kws slot₃/#self# slot₄/x)
+    33  TestMod.#f_kw_sparams#0
+    34  (call %₃₃ %₁₀ %₂₅ slot₃/#self# slot₄/x)
+    35  (return %₃₄)
+34  latestworld
+35  (= slot₆/X (call core.TypeVar :X))
+36  (= slot₅/A (call core.TypeVar :A))
+37  TestMod.f_kw_sparams
+38  (call core.Typeof %₃₇)
+39  slot₆/X
+40  (call core.svec %₃₈ %₃₉)
+41  slot₆/X
+42  (call core.svec %₄₁)
+43  SourceLocation::1:10
+44  (call core.svec %₄₀ %₄₂ %₄₃)
+45  --- method core.nothing %₄₄
+    slots: [slot₁/#self# slot₂/x]
+    1   TestMod.#f_kw_sparams#0
+    2   TestMod.a_def
+    3   TestMod.b_def
+    4   (call %₁ %₂ %₃ slot₁/#self# slot₂/x)
+    5   (return %₄)
+46  latestworld
+47  TestMod.f_kw_sparams
+48  (return %₄₇)
+
+########################################
+# Error: Static parameter which is unused in keyword body arg types
+function f_kw_sparams(x::X; a::A) where {X,Y,A}
+    (X,A)
+end
+#---------------------
+LoweringError:
+function f_kw_sparams(x::X; a::A) where {X,Y,A}
+#                                          ╙ ── Method definition declares type variable but does not use it in the type of any function parameter
+    (X,A)
+end
+
+########################################
+# Error: argument unpacking in keywords
+function f_kw_destruct(; (x,y)=10)
+end
+#---------------------
+LoweringError:
+function f_kw_destruct(; (x,y)=10)
+#                        └───┘ ── Invalid keyword name
+end
+
+########################################
+# Error: keyword slurping combined with a default
+function f_kw_slurp_default(; kws...=def)
+end
+#---------------------
+LoweringError:
+function f_kw_slurp_default(; kws...=def)
+#                             └────────┘ ── keyword argument with `...` cannot have a default value
+end
+
+########################################
+# Error: keyword slurping combined with type
+function f_kw_slurp_type(; kws::T...)
+end
+#---------------------
+LoweringError:
+function f_kw_slurp_type(; kws::T...)
+#                          └───────┘ ── keyword argument with `...` may not be given a type
+end
+
+########################################
+# Error: keyword slurping on non-final argument
+function f_kw_slurp_not_last(; kws..., x=1)
+end
+#---------------------
+LoweringError:
+function f_kw_slurp_not_last(; kws..., x=1)
+#                              └────┘ ── `...` may only be used for the last keyword argument
+end
+
+########################################
+# Fully generated function
+@generated function f_only_generated(x, y)
+    generator_code(x,y)
+end
+#---------------------
+1   (method TestMod.f_only_generated)
+2   latestworld
+3   (method TestMod.#f_only_generated@generator#0)
+4   latestworld
+5   TestMod.#f_only_generated@generator#0
+6   (call core.Typeof %₅)
+7   (call core.svec %₆ JuliaLowering.MacroContext core.Any core.Any core.Any)
+8   (call core.svec)
+9   SourceLocation::1:21
+10  (call core.svec %₇ %₈ %₉)
+11  --- method core.nothing %₁₀
+    slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/#self#(!read) slot₄/x(nospecialize) slot₅/y(nospecialize)]
+    1   TestMod.generator_code
+    2   (call %₁ slot₄/x slot₅/y)
+    3   (return %₂)
+12  latestworld
+13  TestMod.f_only_generated
+14  (call core.Typeof %₁₃)
+15  (call core.svec %₁₄ core.Any core.Any)
+16  (call core.svec)
+17  SourceLocation::1:21
+18  (call core.svec %₁₅ %₁₆ %₁₇)
+19  --- method core.nothing %₁₈
+    slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)]
+    1   (meta :generated (new JuliaLowering.GeneratedFunctionStub false TestMod.#f_only_generated@generator#0 SourceRef::1:1 (call core.svec :#self# :x :y) (call core.svec)))
+    2   (meta :generated_only)
+    3   (return core.nothing)
+20  latestworld
+21  TestMod.f_only_generated
+22  (return %₂₁)
+
+########################################
+# Partially generated function with `if @generated`
+function f_partially_generated(x, y)
+    nongen_stuff = bothgen(x, y)
+    if @generated
+        quote
+            maybe_gen_stuff = some_gen_stuff(x, y)
+        end
+    else
+        maybe_gen_stuff = some_nongen_stuff(x, y)
+    end
+    (nongen_stuff, maybe_gen_stuff)
+end
+#---------------------
+1   (method TestMod.f_partially_generated)
+2   latestworld
+3   (method TestMod.#f_partially_generated@generator#0)
+4   latestworld
+5   TestMod.#f_partially_generated@generator#0
+6   (call core.Typeof %₅)
+7   (call core.svec %₆ JuliaLowering.MacroContext core.Any core.Any core.Any)
+8   (call core.svec)
+9   SourceLocation::1:10
+10  (call core.svec %₇ %₈ %₉)
+11  --- method core.nothing %₁₀
+    slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/#self#(!read) slot₄/x(nospecialize,!read) slot₅/y(nospecialize,!read)]
+    1   (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (= maybe_gen_stuff (call some_gen_stuff x y)))))
+    2   (call core.tuple %₁)
+    3   (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (block (= nongen_stuff (call bothgen x y)) ($ (block (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))))) (tuple-p nongen_stuff maybe_gen_stuff)))) %₂)
+    4   (return %₃)
+12  latestworld
+13  TestMod.f_partially_generated
+14  (call core.Typeof %₁₃)
+15  (call core.svec %₁₄ core.Any core.Any)
+16  (call core.svec)
+17  SourceLocation::1:10
+18  (call core.svec %₁₅ %₁₆ %₁₇)
+19  --- method core.nothing %₁₈
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/maybe_gen_stuff slot₅/nongen_stuff]
+    1   (meta :generated (new JuliaLowering.GeneratedFunctionStub false TestMod.#f_partially_generated@generator#0 SourceRef::1:37 (call core.svec :#self# :x :y) (call core.svec)))
+    2   TestMod.bothgen
+    3   (= slot₅/nongen_stuff (call %₂ slot₂/x slot₃/y))
+    4   TestMod.some_nongen_stuff
+    5   (= slot₄/maybe_gen_stuff (call %₄ slot₂/x slot₃/y))
+    6   slot₅/nongen_stuff
+    7   slot₄/maybe_gen_stuff
+    8   (call core.tuple %₆ %₇)
+    9   (return %₈)
+20  latestworld
+21  TestMod.f_partially_generated
+22  (return %₂₁)
diff --git a/JuliaLowering/test/generators.jl b/JuliaLowering/test/generators.jl
new file mode 100644
index 0000000000000..7dce6236afe20
--- /dev/null
+++ b/JuliaLowering/test/generators.jl
@@ -0,0 +1,76 @@
+@testset "Generators" begin
+
+test_mod = Module()
+
+@test JuliaLowering.include_string(test_mod, """
+collect(x^2 for x in 1:3)
+""") == [1,4,9]
+
+@test JuliaLowering.include_string(test_mod, """
+collect(x for x in 1:5 if isodd(x))
+""") == [1,3,5]
+
+@test JuliaLowering.include_string(test_mod, """
+collect((y,x) for (x,y) in zip(1:3, 2:4) if y != 3)
+""") == [(2,1), (4,3)]
+
+# product iterator
+@test JuliaLowering.include_string(test_mod, """
+collect((x,y) for x in 1:3, y in 1:2)
+""") == [(1,1)  (1,2)
+         (2,1)  (2,2)
+         (3,1)  (3,2)]
+
+# flattened iterator
+@test JuliaLowering.include_string(test_mod, """
+collect((x,y,z) for x in 1:3, y in 4:5 for z in 6:7)
+""") == [
+    (1,4,6)
+    (1,4,7)
+    (2,4,6)
+    (2,4,7)
+    (3,4,6)
+    (3,4,7)
+    (1,5,6)
+    (1,5,7)
+    (2,5,6)
+    (2,5,7)
+    (3,5,6)
+    (3,5,7)
+]
+
+# Duplicate iteration variables - body sees only innermost
+@test JuliaLowering.include_string(test_mod, """
+collect(x for x in 1:3 for x in 1:2)
+""") == [1, 2, 1, 2, 1, 2]
+
+# Outer iteration variables are protected from mutation
+@test JuliaLowering.include_string(test_mod, """
+collect((z=y; y=100; z) for y in 1:3 for x in 1:2)
+""") == [1, 1, 2, 2, 3, 3]
+
+# Simple typed comprehension lowered to for loops
+@test JuliaLowering.include_string(test_mod, """
+Tuple{Int,Int}[(x,y) for x in 1:2, y in 1:3]
+""") == [(1,1) (1,2) (1,3)
+         (2,1) (2,2) (2,3)]
+
+# Triply nested comprehension
+@test JuliaLowering.include_string(test_mod, """
+[(x,y,z) for x in 1:3 for y in 4:5 for z in 6:7]
+""") == [
+    (1, 4, 6)
+    (1, 4, 7)
+    (1, 5, 6)
+    (1, 5, 7)
+    (2, 4, 6)
+    (2, 4, 7)
+    (2, 5, 6)
+    (2, 5, 7)
+    (3, 4, 6)
+    (3, 4, 7)
+    (3, 5, 6)
+    (3, 5, 7)
+]
+
+end
diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl
new file mode 100644
index 0000000000000..2ff78581522a3
--- /dev/null
+++ b/JuliaLowering/test/generators_ir.jl
@@ -0,0 +1,299 @@
+########################################
+# Simple 1D generator
+(x+1 for x in xs)
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#->##0 %₁ %₂)
+4   latestworld
+5   TestMod.#->##0
+6   (call core.svec %₅ core.Any)
+7   (call core.svec)
+8   SourceLocation::1:2
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(!read) slot₂/x]
+    1   TestMod.+
+    2   (call %₁ slot₂/x 1)
+    3   (return %₂)
+11  latestworld
+12  TestMod.#->##0
+13  (new %₁₂)
+14  TestMod.xs
+15  (call top.Generator %₁₃ %₁₄)
+16  (return %₁₅)
+
+########################################
+# Product iteration
+(x+y for x in xs, y in ys)
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#->##1 %₁ %₂)
+4   latestworld
+5   TestMod.#->##1
+6   (call core.svec %₅ core.Any)
+7   (call core.svec)
+8   SourceLocation::1:2
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y]
+    1   (call top.indexed_iterate slot₂/destructured_arg 1)
+    2   (= slot₄/x (call core.getfield %₁ 1))
+    3   (= slot₃/iterstate (call core.getfield %₁ 2))
+    4   slot₃/iterstate
+    5   (call top.indexed_iterate slot₂/destructured_arg 2 %₄)
+    6   (= slot₅/y (call core.getfield %₅ 1))
+    7   TestMod.+
+    8   slot₄/x
+    9   slot₅/y
+    10  (call %₇ %₈ %₉)
+    11  (return %₁₀)
+11  latestworld
+12  TestMod.#->##1
+13  (new %₁₂)
+14  TestMod.xs
+15  TestMod.ys
+16  (call top.product %₁₄ %₁₅)
+17  (call top.Generator %₁₃ %₁₆)
+18  (return %₁₇)
+
+########################################
+# Use `identity` as the Generator function when possible eg in filters
+((x,y) for (x,y) in iter if f(x))
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#->##2 %₁ %₂)
+4   latestworld
+5   TestMod.#->##2
+6   (call core.svec %₅ core.Any)
+7   (call core.svec)
+8   SourceLocation::1:29
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y(!read)]
+    1   (call top.indexed_iterate slot₂/destructured_arg 1)
+    2   (= slot₄/x (call core.getfield %₁ 1))
+    3   (= slot₃/iterstate (call core.getfield %₁ 2))
+    4   slot₃/iterstate
+    5   (call top.indexed_iterate slot₂/destructured_arg 2 %₄)
+    6   (= slot₅/y (call core.getfield %₅ 1))
+    7   TestMod.f
+    8   slot₄/x
+    9   (call %₇ %₈)
+    10  (return %₉)
+11  latestworld
+12  TestMod.#->##2
+13  (new %₁₂)
+14  TestMod.iter
+15  (call top.Filter %₁₃ %₁₄)
+16  (call top.Generator top.identity %₁₅)
+17  (return %₁₆)
+
+########################################
+# Use of placeholders in iteration vars
+(1 for _ in xs)
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#->##3 %₁ %₂)
+4   latestworld
+5   TestMod.#->##3
+6   (call core.svec %₅ core.Any)
+7   (call core.svec)
+8   SourceLocation::1:2
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read)]
+    1   (return 1)
+11  latestworld
+12  TestMod.#->##3
+13  (new %₁₂)
+14  TestMod.xs
+15  (call top.Generator %₁₃ %₁₄)
+16  (return %₁₅)
+
+########################################
+# Error: Use of placeholders in body
+(_ for _ in xs)
+#---------------------
+LoweringError:
+(_ for _ in xs)
+#╙ ── all-underscore identifiers are write-only and their values cannot be used in expressions
+
+########################################
+# 1D generator with destructuring
+(body for (x,_,y) in iter)
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#->##5 %₁ %₂)
+4   latestworld
+5   TestMod.#->##5
+6   (call core.svec %₅ core.Any)
+7   (call core.svec)
+8   SourceLocation::1:2
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x(!read) slot₅/y(!read)]
+    1   (call top.indexed_iterate slot₂/destructured_arg 1)
+    2   (= slot₄/x (call core.getfield %₁ 1))
+    3   (= slot₃/iterstate (call core.getfield %₁ 2))
+    4   slot₃/iterstate
+    5   (call top.indexed_iterate slot₂/destructured_arg 2 %₄)
+    6   (call core.getfield %₅ 1)
+    7   (= slot₃/iterstate (call core.getfield %₅ 2))
+    8   slot₃/iterstate
+    9   (call top.indexed_iterate slot₂/destructured_arg 3 %₈)
+    10  (= slot₅/y (call core.getfield %₉ 1))
+    11  TestMod.body
+    12  (return %₁₁)
+11  latestworld
+12  TestMod.#->##5
+13  (new %₁₂)
+14  TestMod.iter
+15  (call top.Generator %₁₃ %₁₄)
+16  (return %₁₅)
+
+########################################
+# return permitted in quoted syntax in generator
+(:(return x) for _ in iter)
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#->##6 %₁ %₂)
+4   latestworld
+5   TestMod.#->##6
+6   (call core.svec %₅ core.Any)
+7   (call core.svec)
+8   SourceLocation::1:4
+9   (call core.svec %₆ %₇ %₈)
+10  --- method core.nothing %₉
+    slots: [slot₁/#self#(!read) slot₂/#arg1#(!read)]
+    1   (call JuliaLowering.interpolate_ast SyntaxTree (inert (return x)))
+    2   (return %₁)
+11  latestworld
+12  TestMod.#->##6
+13  (new %₁₂)
+14  TestMod.iter
+15  (call top.Generator %₁₃ %₁₄)
+16  (return %₁₅)
+
+########################################
+# Error: `return` not permitted in generator body
+((return x) + y for x in iter)
+#---------------------
+LoweringError:
+((return x) + y for x in iter)
+# └──────┘ ── `return` not allowed inside comprehension or generator
+
+########################################
+# Nested case with duplicate iteration variables
+(x for x in 1:3 for x in 1:2)
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#->##7 %₁ %₂)
+4   latestworld
+5   (call core.svec)
+6   (call core.svec)
+7   (call JuliaLowering.eval_closure_type TestMod :#->#->##0 %₅ %₆)
+8   latestworld
+9   TestMod.#->#->##0
+10  (call core.svec %₉ core.Any)
+11  (call core.svec)
+12  SourceLocation::1:2
+13  (call core.svec %₁₀ %₁₁ %₁₂)
+14  --- method core.nothing %₁₃
+    slots: [slot₁/#self#(!read) slot₂/x slot₃/x]
+    1   slot₂/x
+    2   (= slot₃/x %₁)
+    3   slot₃/x
+    4   (return %₃)
+15  latestworld
+16  TestMod.#->##7
+17  (call core.svec %₁₆ core.Any)
+18  (call core.svec)
+19  SourceLocation::1:2
+20  (call core.svec %₁₇ %₁₈ %₁₉)
+21  --- method core.nothing %₂₀
+    slots: [slot₁/#self#(!read) slot₂/x(!read)]
+    1   TestMod.#->#->##0
+    2   (new %₁)
+    3   TestMod.:
+    4   (call %₃ 1 2)
+    5   (call top.Generator %₂ %₄)
+    6   (return %₅)
+22  latestworld
+23  TestMod.#->##7
+24  (new %₂₃)
+25  TestMod.:
+26  (call %₂₅ 1 3)
+27  (call top.Generator %₂₄ %₂₆)
+28  (call top.Flatten %₂₇)
+29  (return %₂₈)
+
+########################################
+# Comprehension lowers to generator with collect
+[x for x in xs]
+#---------------------
+1   TestMod.xs
+2   (call top.Generator top.identity %₁)
+3   (call top.collect %₂)
+4   (return %₃)
+
+########################################
+# Simple typed comprehension lowers to for loop
+T[(x,y) for x in xs, y in ys]
+#---------------------
+1   TestMod.xs
+2   TestMod.ys
+3   (call top.product %₁ %₂)
+4   (call top.IteratorSize %₃)
+5   (call core.isa %₄ top.SizeUnknown)
+6   TestMod.T
+7   (call top._array_for %₆ %₃ %₄)
+8   (call top.LinearIndices %₇)
+9   (= slot₁/idx (call top.first %₈))
+10  (= slot₃/next (call top.iterate %₂))
+11  slot₃/next
+12  (call core.=== %₁₁ core.nothing)
+13  (call top.not_int %₁₂)
+14  (gotoifnot %₁₃ label₅₀)
+15  slot₃/next
+16  (= slot₄/y (call core.getfield %₁₅ 1))
+17  (call core.getfield %₁₅ 2)
+18  (= slot₂/next (call top.iterate %₁))
+19  slot₂/next
+20  (call core.=== %₁₉ core.nothing)
+21  (call top.not_int %₂₀)
+22  (gotoifnot %₂₁ label₄₄)
+23  slot₄/y
+24  (= slot₆/y %₂₃)
+25  slot₂/next
+26  (= slot₅/x (call core.getfield %₂₅ 1))
+27  (call core.getfield %₂₅ 2)
+28  slot₅/x
+29  slot₆/y
+30  (call core.tuple %₂₈ %₂₉)
+31  (gotoifnot %₅ label₃₄)
+32  (call top.push! %₇ %₃₀)
+33  (goto label₃₆)
+34  slot₁/idx
+35  (call top.setindex! %₇ %₃₀ %₃₄)
+36  slot₁/idx
+37  (= slot₁/idx (call top.add_int %₃₆ 1))
+38  (= slot₂/next (call top.iterate %₁ %₂₇))
+39  slot₂/next
+40  (call core.=== %₃₉ core.nothing)
+41  (call top.not_int %₄₀)
+42  (gotoifnot %₄₁ label₄₄)
+43  (goto label₂₃)
+44  (= slot₃/next (call top.iterate %₂ %₁₇))
+45  slot₃/next
+46  (call core.=== %₄₅ core.nothing)
+47  (call top.not_int %₄₆)
+48  (gotoifnot %₄₇ label₅₀)
+49  (goto label₁₅)
+50  (return %₇)
diff --git a/JuliaLowering/test/hooks.jl b/JuliaLowering/test/hooks.jl
new file mode 100644
index 0000000000000..5fd017c70e0f4
--- /dev/null
+++ b/JuliaLowering/test/hooks.jl
@@ -0,0 +1,85 @@
+const JL = JuliaLowering
+
+@testset "hooks" begin
+    test_mod = Module()
+
+    @testset "`core_lowering_hook`" begin
+        # Non-AST types are often sent through lowering
+        stuff = Any[LineNumberNode(1), 123, 123.123, true, "foo", test_mod]
+        for s in stuff
+            @test JL.core_lowering_hook(s, test_mod) == Core.svec(s)
+        end
+
+        for ast_type in (Expr, JL.SyntaxTree)
+            ex = parsestmt(ast_type, "[1,2,3] .+= 1")
+            out = JL.core_lowering_hook(ex, test_mod)
+            @test out isa Core.SimpleVector && out[1] isa Expr
+            val = Core.eval(test_mod, out[1])
+            @test val == [2,3,4]
+        end
+
+        # file argument mismatch with embedded linenumbernodes shouldn't crash
+        ex = Expr(:block, LineNumberNode(111), :(x = 1), LineNumberNode(222), :(x + 1))
+        lwr = JuliaLowering.core_lowering_hook(ex, test_mod, "foo.jl", 333)[1]
+        @test Core.eval(test_mod, lwr) === 2
+    end
+
+    function jeval(str)
+        prog = parseall(Expr, str)
+        local out
+        try
+            JL.activate!()
+            out = Core.eval(test_mod, prog)
+        finally
+            JL.activate!(false)
+        end
+    end
+    @testset "integration: `JuliaLowering.activate!`" begin
+        out = jeval("global asdf = 1")
+        @test out === 1
+        @test isdefined(test_mod, :asdf)
+
+        out = jeval("module M; x = 1; end")
+        @test out isa Module
+        @test isdefined(test_mod, :M)
+        @test isdefined(test_mod.M, :x)
+
+        @test jeval("@ccall jl_value_ptr(nothing::Any)::Ptr{Cvoid}") isa Ptr{Cvoid}
+
+        # Tricky cases with symbols
+        out = jeval("""module M2
+                Base.@constprop :aggressive function f(x); x; end
+                const what = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), Core.nothing)
+            end""")
+        @test out isa Module
+        @test isdefined(test_mod, :M2)
+        @test isdefined(test_mod.M2, :f)
+        @test isdefined(test_mod.M2, :what)
+
+        out = jeval(""" "docstring" module M3 end """)
+        @test out isa Module
+        @test isdefined(test_mod, :M3)
+
+        # Macros may produce toplevel expressions.  Note that julia handles
+        # this case badly (macro expansion replaces M5_inner with a
+        # globalref) and we handle esc(:M5_inner) badly
+        out = jeval("""module M5
+            macro newmod()
+                return quote
+                    let a = 1
+                        $(Expr(:toplevel,
+                               Expr(:module, true, :M5_inner,
+                                    Expr(:block, :(global asdf = 1)))))
+                    end
+                end
+            end
+            @newmod()
+            end""")
+        @test out isa Module
+        @test isdefined(test_mod, :M5)
+        @test isdefined(test_mod.M5, :M5_inner)
+        @test isdefined(test_mod.M5.M5_inner, :asdf)
+
+        @test jeval("Base.@propagate_inbounds @inline meta_double_quote_issue(x) = x") isa Function
+    end
+end
diff --git a/JuliaLowering/test/import.jl b/JuliaLowering/test/import.jl
new file mode 100644
index 0000000000000..74cdd9260149e
--- /dev/null
+++ b/JuliaLowering/test/import.jl
@@ -0,0 +1,80 @@
+@testset "using / import" begin
+
+test_mod = Module()
+
+# Test attributes are correctly set for export/public
+JuliaLowering.include_string(test_mod, """
+x = 1
+y = 2
+export x
+public y
+""")
+@test Base.isexported(test_mod, :x)
+@test Base.ispublic(test_mod, :x)
+@test Base.ispublic(test_mod, :y)
+@test !Base.isexported(test_mod, :y)
+
+# Test various forms of `using`
+C = JuliaLowering.include_string(test_mod, """
+module C
+    module D
+        export x
+        public y, f
+        x = [101]
+        y = [202]
+
+        function f()
+            "hi"
+        end
+    end
+    module E
+        using ..D: f
+        using ..D
+        using .D: y as D_y
+        using .D: x as D_x_2, y as D_y_2
+        import .D.y as D_y_3
+    end
+end
+""")
+@test C.D.f === C.E.f
+@test C.D.x === C.E.x
+@test C.D.y === C.E.D_y
+@test C.D.x === C.E.D_x_2
+@test C.D.y === C.E.D_y_2
+@test C.D.y === C.E.D_y_3
+
+# Test that using F brings in the exported symbol G immediately and that it can
+# be used next in the import list.
+F = JuliaLowering.include_string(test_mod, """
+module F
+    export G
+    module G
+        export G_global
+        G_global = "exported from G"
+    end
+end
+""")
+JuliaLowering.include_string(test_mod, """
+using .F, .G
+""")
+@test test_mod.F === F
+@test test_mod.G === F.G
+@test test_mod.G_global === "exported from G"
+
+# Similarly, that import makes symbols available immediately
+H = JuliaLowering.include_string(test_mod, """
+module H
+    module I
+        module J
+        end
+    end
+end
+""")
+JuliaLowering.include_string(test_mod, """
+import .H.I, .I.J
+""")
+@test test_mod.I === H.I
+@test test_mod.J === H.I.J
+@test test_mod.G_global === "exported from G"
+
+end
diff --git a/JuliaLowering/test/import_ir.jl b/JuliaLowering/test/import_ir.jl
new file mode 100644
index 0000000000000..8f34f5f0c4939
--- /dev/null
+++ b/JuliaLowering/test/import_ir.jl
@@ -0,0 +1,69 @@
+########################################
+# Basic import
+import A: b
+#---------------------
+1   (call JuliaLowering.eval_import true TestMod :($(QuoteNode(:($(Expr(:., :A)))))) :($(QuoteNode(:($(Expr(:., :b)))))))
+2   latestworld
+3   (return core.nothing)
+
+########################################
+# Import with paths and `as`
+import A.B.C: b, c.d as e
+#---------------------
+1   (call JuliaLowering.eval_import true TestMod :($(QuoteNode(:($(Expr(:., :A, :B, :C)))))) :($(QuoteNode(:($(Expr(:., :b)))))) :($(QuoteNode(:(c.d as e)))))
+2   latestworld
+3   (return core.nothing)
+
+########################################
+# Imports without `from` module need separating with latestworld
+import A, B
+#---------------------
+1   (call JuliaLowering.eval_import true TestMod top.nothing :($(QuoteNode(:($(Expr(:., :A)))))))
+2   latestworld
+3   (call JuliaLowering.eval_import true TestMod top.nothing :($(QuoteNode(:($(Expr(:., :B)))))))
+4   latestworld
+5   (return core.nothing)
+
+########################################
+# Multiple usings need separating with latestworld
+using A, B
+#---------------------
+1   (call JuliaLowering.eval_using TestMod :($(QuoteNode(:($(Expr(:., :A)))))))
+2   latestworld
+3   (call JuliaLowering.eval_using TestMod :($(QuoteNode(:($(Expr(:., :B)))))))
+4   latestworld
+5   (return core.nothing)
+
+########################################
+# Using with paths and `as`
+using A.B.C: b, c.d as e
+#---------------------
+1   (call JuliaLowering.eval_import false TestMod :($(QuoteNode(:($(Expr(:., :A, :B, :C)))))) :($(QuoteNode(:($(Expr(:., :b)))))) :($(QuoteNode(:(c.d as e)))))
+2   latestworld
+3   (return core.nothing)
+
+########################################
+# Error: Import not at top level
+function f()
+    import A: b
+end
+#---------------------
+LoweringError:
+function f()
+    import A: b
+#   └─────────┘ ── this syntax is only allowed in top level code
+end
+
+########################################
+# Export
+export a, b, c
+#---------------------
+1   (call JuliaLowering.eval_public TestMod true ["a", "b", "c"])
+2   (return %₁)
+
+########################################
+# Public
+public a, b, c
+#---------------------
+1   (call JuliaLowering.eval_public TestMod false ["a", "b", "c"])
+2   (return %₁)
diff --git a/JuliaLowering/test/ir_tests.jl b/JuliaLowering/test/ir_tests.jl
new file mode 100644
index 0000000000000..3035a6f3a7bc4
--- /dev/null
+++ b/JuliaLowering/test/ir_tests.jl
@@ -0,0 +1,10 @@
+@testset "IR tests" begin
+    testdir = @__DIR__
+    for filename in readdir(testdir)
+        if endswith(filename, "_ir.jl")
+            @testset "$filename" begin
+                test_ir_cases(joinpath(testdir, filename))
+            end
+        end
+    end
+end
diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl
new file mode 100644
index 0000000000000..6f63b28cc2b0e
--- /dev/null
+++ b/JuliaLowering/test/loops.jl
@@ -0,0 +1,279 @@
+
+@testset "while loops" begin
+
+test_mod = Module()
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    i = 0
+    while i < 5
+        i = i + 1
+        push!(a, i)
+    end
+    a
+end
+""") == [1,2,3,4,5]
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    i = 0
+    while i < 5
+        i = i + 1
+        if i == 3
+            break
+        end
+        push!(a, i)
+    end
+    a
+end
+""") == [1,2]
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    i = 0
+    while i < 5
+        i = i + 1
+        if isodd(i)
+            continue
+        end
+        push!(a, i)
+    end
+    a
+end
+""") == [2,4]
+
+end
+
+@testset "for loops" begin
+
+test_mod = Module()
+
+# iteration
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:3
+        push!(a, i)
+    end
+    a
+end
+""") == [1,2,3]
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:0
+        push!(a, i)
+    end
+    a
+end
+""") == []
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for _ = 1:3
+        push!(a, 1)
+    end
+    a
+end
+""") == [1, 1, 1]
+
+# break
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:6
+        if i == 3
+            break
+        end
+        push!(a, i)
+    end
+    a
+end
+""") == [1, 2]
+# Break from inner nested loop
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i in 1:2
+       for j in 3:4
+           push!(a, (i, j))
+           j == 6 && break
+       end
+    end
+    a
+end
+""") == [(1, 3), (1, 4), (2, 3), (2, 4)]
+
+# continue
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:6
+        if isodd(i)
+            continue
+        end
+        push!(a, i)
+    end
+    a
+end
+""") == [2, 4, 6]
+
+# Loop variable scope
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:3
+        push!(a, i)
+        i = 100
+    end
+    a
+end
+""") == [1,2,3]
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    i = 100
+    for i = 1:3
+    end
+    i
+end
+""") == 100
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    i = 100
+    for outer i = 1:2
+        nothing
+    end
+    i
+end
+""") == 2
+
+# Fancy for loop left hand side - unpacking and scoping
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    i = 100
+    j = 200
+    for (i,j) in [('a', 'b'), (1,2)]
+        push!(a, (i,j))
+    end
+    (a, i, j)
+end
+""") == ([('a', 'b'), (1,2)], 100, 200)
+
+end
+
+
+@testset "multidimensional for loops" begin
+
+test_mod = Module()
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:2, j = 3:4
+        push!(a, (i,j))
+    end
+    a
+end
+""") == [(1,3), (1,4), (2,3), (2,4)]
+
+@testset "break/continue" begin
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:2, j = 3:4
+        push!(a, (i,j))
+        break
+    end
+    a
+end
+""") == [(1,3)]
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:4, j = 3:4
+        if isodd(i)
+            continue
+        end
+        push!(a, (i,j))
+    end
+    a
+end
+""") == [(2,3), (2,4), (4,3), (4,4)]
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:2, j = 1:4
+        if isodd(j)
+            continue
+        end
+        push!(a, (i,j))
+    end
+    a
+end
+""") == [(1,2), (1,4), (2,2), (2,4)]
+
+
+end
+
+
+@testset "Loop variable scope" begin
+
+# Test that `i` is copied in the inner loop
+@test JuliaLowering.include_string(test_mod, """
+let
+    a = []
+    for i = 1:2, j = 3:4
+        push!(a, (i,j))
+        i = 100
+    end
+    a
+end
+""") == [(1,3), (1,4), (2,3), (2,4)]
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    i = 100
+    j = 200
+    for i = 1:2, j = 3:4
+        nothing
+    end
+    (i,j)
+end
+""") == (100,200)
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    i = 100
+    j = 200
+    for outer i = 1:2, j = 3:4
+        nothing
+    end
+    (i,j)
+end
+""") == (2,200)
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    i = 100
+    j = 200
+    for i = 1:2, outer j = 3:4
+        nothing
+    end
+    (i,j)
+end
+""") == (100,4)
+
+end
+
+end
diff --git a/JuliaLowering/test/loops_ir.jl b/JuliaLowering/test/loops_ir.jl
new file mode 100644
index 0000000000000..709322a084c68
--- /dev/null
+++ b/JuliaLowering/test/loops_ir.jl
@@ -0,0 +1,146 @@
+########################################
+# Basic while loop
+while f(a)
+    body1
+    body2
+end
+#---------------------
+1   TestMod.f
+2   TestMod.a
+3   (call %₁ %₂)
+4   (gotoifnot %₃ label₈)
+5   TestMod.body1
+6   TestMod.body2
+7   (goto label₁)
+8   (return core.nothing)
+
+########################################
+# While loop with short circuit condition
+while a && b
+    body
+end
+#---------------------
+1   TestMod.a
+2   (gotoifnot %₁ label₇)
+3   TestMod.b
+4   (gotoifnot %₃ label₇)
+5   TestMod.body
+6   (goto label₁)
+7   (return core.nothing)
+
+########################################
+# While loop with with break and continue
+while cond
+    body1
+    break
+    body2
+    continue
+    body3
+end
+#---------------------
+1   TestMod.cond
+2   (gotoifnot %₁ label₉)
+3   TestMod.body1
+4   (goto label₉)
+5   TestMod.body2
+6   (goto label₈)
+7   TestMod.body3
+8   (goto label₁)
+9   (return core.nothing)
+
+########################################
+# Basic for loop
+for x in xs
+    body
+end
+#---------------------
+1   TestMod.xs
+2   (= slot₁/next (call top.iterate %₁))
+3   slot₁/next
+4   (call core.=== %₃ core.nothing)
+5   (call top.not_int %₄)
+6   (gotoifnot %₅ label₁₇)
+7   slot₁/next
+8   (= slot₂/x (call core.getfield %₇ 1))
+9   (call core.getfield %₇ 2)
+10  TestMod.body
+11  (= slot₁/next (call top.iterate %₁ %₉))
+12  slot₁/next
+13  (call core.=== %₁₂ core.nothing)
+14  (call top.not_int %₁₃)
+15  (gotoifnot %₁₄ label₁₇)
+16  (goto label₇)
+17  (return core.nothing)
+
+########################################
+# Syntax sugar for nested for loop
+for x in xs, y in ys
+    x = 10 # Copy of x; does not overwrite x iteration var
+end
+#---------------------
+1   TestMod.xs
+2   (= slot₂/next (call top.iterate %₁))
+3   slot₂/next
+4   (call core.=== %₃ core.nothing)
+5   (call top.not_int %₄)
+6   (gotoifnot %₅ label₃₄)
+7   slot₂/next
+8   (= slot₃/x (call core.getfield %₇ 1))
+9   (call core.getfield %₇ 2)
+10  TestMod.ys
+11  (= slot₁/next (call top.iterate %₁₀))
+12  slot₁/next
+13  (call core.=== %₁₂ core.nothing)
+14  (call top.not_int %₁₃)
+15  (gotoifnot %₁₄ label₂₈)
+16  slot₃/x
+17  (= slot₄/x %₁₆)
+18  slot₁/next
+19  (= slot₅/y (call core.getfield %₁₈ 1))
+20  (call core.getfield %₁₈ 2)
+21  (= slot₄/x 10)
+22  (= slot₁/next (call top.iterate %₁₀ %₂₀))
+23  slot₁/next
+24  (call core.=== %₂₃ core.nothing)
+25  (call top.not_int %₂₄)
+26  (gotoifnot %₂₅ label₂₈)
+27  (goto label₁₆)
+28  (= slot₂/next (call top.iterate %₁ %₉))
+29  slot₂/next
+30  (call core.=== %₂₉ core.nothing)
+31  (call top.not_int %₃₀)
+32  (gotoifnot %₃₁ label₃₄)
+33  (goto label₇)
+34  (return core.nothing)
+
+########################################
+# Error: break outside for/while
+break
+#---------------------
+LoweringError:
+break
+└───┘ ── break must be used inside a `while` or `for` loop
+
+########################################
+# Error: continue outside for/while
+continue
+#---------------------
+LoweringError:
+continue
+└──────┘ ── continue must be used inside a `while` or `for` loop
+
+########################################
+# Error: `outer` without outer local variable
+let
+    for outer i = 1:2
+        nothing
+    end
+    i
+end
+#---------------------
+LoweringError:
+let
+    for outer i = 1:2
+#             ╙ ── `outer` annotations must match with a local variable in an outer scope but no such variable was found
+        nothing
+    end
diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl
new file mode 100644
index 0000000000000..84c80499722f7
--- /dev/null
+++ b/JuliaLowering/test/macros.jl
@@ -0,0 +1,542 @@
+@testset "macro tests" begin
+
+test_mod = Module(:macro_test)
+Base.eval(test_mod, :(const var"@ast" = $(JuliaLowering.var"@ast")))
+Base.eval(test_mod, :(const var"@K_str" = $(JuliaLowering.var"@K_str")))
+
+# These libraries may either be packages or vendored into Base - need to pull
+# them in via relative paths in the `using` statements below.
+Base.eval(test_mod, :(const JuliaLowering = $(JuliaLowering)))
+Base.eval(test_mod, :(const JuliaSyntax = $(JuliaSyntax)))
+
+JuliaLowering.include_string(test_mod, raw"""
+module M
+    using ..JuliaLowering: JuliaLowering, adopt_scope
+    using ..JuliaSyntax
+
+    # Introspection
+    macro __MODULE__()
+        __context__.scope_layer.mod
+    end
+
+    macro __FILE__()
+        JuliaLowering.filename(__context__.macrocall)
+    end
+
+    macro __LINE__()
+        JuliaLowering.source_location(__context__.macrocall)[1]
+    end
+
+    someglobal = "global in module M"
+
+    # Macro with local variables
+    macro foo(ex)
+        :(begin
+            x = "`x` from @foo"
+            (x, someglobal, $ex)
+        end)
+    end
+
+    # Set `a_global` in M
+    macro set_a_global(val)
+        :(begin
+            global a_global = $val
+        end)
+    end
+
+    macro set_other_global(ex, val)
+        :(begin
+            global $ex = $val
+        end)
+    end
+
+    macro set_global_in_parent(ex)
+        e1 = adopt_scope(:(sym_introduced_from_M), __context__)
+        quote
+            $e1 = $ex
+            nothing
+        end
+    end
+
+    macro inner()
+        :(y)
+    end
+
+    macro outer()
+        :((x, @inner))
+    end
+
+    macro recursive(N)
+        Nval = N.value::Int
+        if Nval < 1
+            return N
+        end
+        quote
+            x = $N
+            (x, @recursive $(Nval-1))
+        end
+    end
+end
+""")
+
+@test JuliaLowering.include_string(test_mod, """
+let
+    x = "`x` from outer scope"
+    M.@foo x
+end
+""") == ("`x` from @foo", "global in module M", "`x` from outer scope")
+@test !isdefined(test_mod.M, :x)
+
+
+@test JuliaLowering.include_string(test_mod, """
+#line1
+(M.@__MODULE__(), M.@__FILE__(), M.@__LINE__())
+""", "foo.jl") == (test_mod, "foo.jl", 2)
+
+@test !isdefined(test_mod.M, :a_global)
+@test JuliaLowering.include_string(test_mod, """
+begin
+    M.@set_a_global 42
+    M.a_global
+end
+""") == 42
+
+JuliaLowering.include_string(test_mod, """
+M.@set_global_in_parent "bent hygiene!"
+""")
+@test test_mod.sym_introduced_from_M == "bent hygiene!"
+
+JuliaLowering.include_string(test_mod, "M.@set_other_global global_in_test_mod 100")
+@test !isdefined(test_mod.M, :global_in_test_mod)
+@test test_mod.global_in_test_mod == 100
+
+@test JuliaLowering.include_string(test_mod, """
+M.@recursive 3
+""") == (3, (2, (1, 0)))
+
+ex = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "M.@outer()", filename="foo.jl")
+ctx, expanded = JuliaLowering.expand_forms_1(test_mod, ex, false, Base.get_world_counter())
+@test JuliaLowering.sourcetext.(JuliaLowering.flattened_provenance(expanded[2])) == [
+    "M.@outer()"
+    "@inner"
+    "y"
+]
+
+# World age support for macro expansion
+JuliaLowering.include_string(test_mod, raw"""
+macro world_age_test()
+    :(world1)
+end
+""")
+world1 = Base.get_world_counter()
+JuliaLowering.include_string(test_mod, raw"""
+macro world_age_test()
+    :(world2)
+end
+""")
+world2 = Base.get_world_counter()
+
+call_world_arg_test = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "@world_age_test()")
+@test JuliaLowering.expand_forms_1(test_mod, call_world_arg_test, false, world1)[2] ≈
+    @ast_ "world1"::K"Identifier"
+@test JuliaLowering.expand_forms_1(test_mod, call_world_arg_test, false, world2)[2] ≈
+    @ast_ "world2"::K"Identifier"
+
+# Layer parenting
+@test expanded[1].scope_layer == 2
+@test expanded[2].scope_layer == 3
+@test getfield.(ctx.scope_layers, :parent_layer) == [0,1,2]
+
+JuliaLowering.include_string(test_mod, """
+f_throw(x) = throw(x)
+macro m_throw(x)
+    :(\$(f_throw(x)))
+end
+""")
+let (err, st) = try
+        JuliaLowering.include_string(test_mod, "_never_exist = @m_throw 42")
+    catch e
+        e, stacktrace(catch_backtrace())
+    end
+    @test err isa JuliaLowering.MacroExpansionError
+    @test !isnothing(err.err)
+    # Check that `catch_backtrace` can capture the stacktrace of the macro functions
+    @test any(sf->sf.func===:f_throw, st)
+    @test any(sf->sf.func===Symbol("@m_throw"), st)
+end
+
+let err = try
+        JuliaLowering.include_string(test_mod, "_never_exist = @m_not_exist 42")
+    catch e
+        e
+    end
+    @test err isa JuliaLowering.MacroExpansionError
+    @test err.msg == "Macro not found"
+    @test err.err isa UndefVarError
+end
+
+@test JuliaLowering.include_string(test_mod, "@ccall strlen(\"foo\"::Cstring)::Csize_t") == 3
+@test JuliaLowering.include_string(test_mod, "@ccall strlen(\"asdf\"::Cstring)::Csize_t gc_safe=true") == 4
+@test JuliaLowering.include_string(test_mod, """
+begin
+    buf = zeros(UInt8, 20)
+    @ccall sprintf(buf::Ptr{UInt8}, "num:%d str:%s"::Cstring; 42::Cint, "hello"::Cstring)::Cint
+    String(buf)
+end
+""") == "num:42 str:hello\0\0\0\0"
+
+let (err, st) = try
+        JuliaLowering.include_string(test_mod, "@ccall strlen(\"foo\"::Cstring)")
+    catch e
+        e, stacktrace(catch_backtrace())
+    end
+    @test err isa JuliaLowering.MacroExpansionError
+    @test err.msg == "Expected a return type annotation `::SomeType`"
+    @test isnothing(err.err)
+    # Check that `catch_backtrace` can capture the stacktrace of the macro function
+    @test any(sf->sf.func===:ccall_macro_parse, st)
+end
+
+# Tests for interop between old and new-style macros
+
+# Hygiene interop
+JuliaLowering.include_string(test_mod, raw"""
+    macro call_oldstyle_macro(a)
+        quote
+            x = "x in call_oldstyle_macro"
+            @oldstyle $a x
+        end
+    end
+
+    macro newstyle(a, b, c)
+        quote
+            x = "x in @newstyle"
+            ($a, $b, $c, x)
+        end
+    end
+""")
+# TODO: Make this macro lowering go via JuliaSyntax rather than the flisp code
+# (JuliaSyntax needs support for old-style quasiquote processing)
+Base.eval(test_mod, :(
+macro oldstyle(a, b)
+    quote
+        x = "x in @oldstyle"
+        @newstyle $(esc(a)) $(esc(b)) x
+    end
+end
+))
+@test JuliaLowering.include_string(test_mod, """
+let x = "x in outer scope"
+    @call_oldstyle_macro x
+end
+""") == ("x in outer scope",
+         "x in call_oldstyle_macro",
+         "x in @oldstyle",
+         "x in @newstyle")
+
+# Old style unhygenic escaping with esc()
+Base.eval(test_mod, :(
+macro oldstyle_unhygenic()
+    esc(:x)
+end
+))
+@test JuliaLowering.include_string(test_mod, """
+let x = "x in outer scope"
+    @oldstyle_unhygenic
+end
+""") == "x in outer scope"
+
+# Exceptions in old style macros
+Base.eval(test_mod, :(
+macro oldstyle_error()
+    error("Some error in old style macro")
+end
+))
+@test try
+    JuliaLowering.include_string(test_mod, """
+    @oldstyle_error
+    """)
+catch exc
+    sprint(showerror, exc)
+end == """
+MacroExpansionError while expanding @oldstyle_error in module Main.macro_test:
+@oldstyle_error
+└─────────────┘ ── Error expanding macro
+Caused by:
+Some error in old style macro"""
+
+@test sprint(
+    showerror,
+    JuliaLowering.MacroExpansionError(
+        JuliaLowering.expr_to_syntaxtree(:(foo), LineNumberNode(1)),
+        "fake error")) ==
+            "MacroExpansionError:\n#= line 1 =# - fake error"
+
+# Old-style macros returning non-Expr values
+Base.eval(test_mod, :(
+macro oldstyle_non_Expr()
+    42
+end
+))
+@test JuliaLowering.include_string(test_mod, """
+@oldstyle_non_Expr
+""") === 42
+
+# New-style macros called with the wrong arguments
+JuliaLowering.include_string(test_mod, raw"""
+macro method_error_test(a)
+end
+""")
+Base.eval(test_mod, :(
+macro method_error_test()
+end
+))
+try
+    JuliaLowering.include_string(test_mod, raw"""
+    @method_error_test x y
+    """)
+    @test false
+catch exc
+    @test exc isa JuliaLowering.MacroExpansionError
+    mexc = exc.err
+    @test mexc isa MethodError
+    @test mexc.args isa Tuple{JuliaLowering.MacroContext, JuliaLowering.SyntaxTree, JuliaLowering.SyntaxTree}
+end
+
+@testset "calling with old/new macro signatures" begin
+    # Old defined with 1 arg, new with 2 args, both with 3 (but with different values)
+    Base.eval(test_mod, :(macro sig_mismatch(x); x; end))
+    Base.eval(test_mod, :(macro sig_mismatch(x, y, z); z; end))
+    JuliaLowering.include_string(test_mod, "macro sig_mismatch(x, y); x; end")
+    JuliaLowering.include_string(test_mod, "macro sig_mismatch(x, y, z); x; end")
+
+    @test JuliaLowering.include_string(test_mod, "@sig_mismatch(1)") === 1
+    @test JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2)") === 1
+    @test JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2, 3)") === 1 # 3 if we prioritize old sig
+    err = try
+        JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2, 3, 4)") === 1
+    catch exc
+        sprint(showerror, exc, context=:module=>test_mod)
+    end
+    @test startswith(err, """
+    MacroExpansionError while expanding @sig_mismatch in module Main.macro_test:
+    @sig_mismatch(1, 2, 3, 4)
+    └───────────────────────┘ ── Error expanding macro
+    Caused by:
+    MethodError: no method matching var"@sig_mismatch"(::JuliaLowering.MacroContext, ::JuliaLowering.SyntaxTree""")
+end
+
+@testset "old macros producing exotic expr heads" begin
+    @test JuliaLowering.include_string(test_mod, """
+    let # example from @preserve docstring
+        x = Ref{Int}(101)
+        p = Base.unsafe_convert(Ptr{Int}, x)
+        GC.@preserve x unsafe_load(p)
+    end""") === 101 # Expr(:gc_preserve)
+
+    # only invokelatest produces :isglobal now, so MWE here
+    Base.eval(test_mod, :(macro isglobal(x); esc(Expr(:isglobal, x)); end))
+    @test JuliaLowering.include_string(test_mod, """
+    some_global = 1
+    function isglobal_chk(some_arg)
+       local some_local = 1
+       (@isglobal(some_undefined), @isglobal(some_global), @isglobal(some_arg), @isglobal(some_local))
+    end
+    isglobal_chk(1)
+    """) === (true, true, false, false)
+    # with K"Placeholder"s
+    @test JuliaLowering.include_string(test_mod, """
+    __ = 1
+    function isglobal_chk(___)
+       local ____ = 1
+       (@isglobal(_), @isglobal(__), @isglobal(___), @isglobal(____))
+    end
+    isglobal_chk(1)
+    """) === (false, false, false, false)
+
+    # @test appears to be the only macro in base to use :inert
+    test_result = JuliaLowering.include_string(test_mod, """
+    using Test
+    @test identity(123) === 123
+    """; expr_compat_mode=true)
+    @test test_result.value === true
+
+    # @enum produces Expr(:toplevel)
+    JuliaLowering.include_string(test_mod, """
+    @enum SOME_ENUM X1 X2 X3
+    """; expr_compat_mode=true)
+    @test test_mod.SOME_ENUM <: Enum
+    @test test_mod.X1 isa Enum
+end
+
+@testset "macros producing meta forms" begin
+    function find_method_ci(thunk)
+        ci = thunk.args[1]::Core.CodeInfo
+        m = findfirst(x->(x isa Expr && x.head === :method && length(x.args) === 3), ci.code)
+        ci.code[m].args[3]
+    end
+    jlower_e(s) = JuliaLowering.to_lowered_expr(
+        JuliaLowering.lower(
+            test_mod, JuliaLowering.parsestmt(
+                JuliaLowering.SyntaxTree, s);
+            expr_compat_mode=true))
+
+    prog = "Base.@assume_effects :foldable function foo(); end"
+    ref = Meta.lower(test_mod, Meta.parse(prog))
+    our = jlower_e(prog)
+    @test find_method_ci(ref).purity === find_method_ci(our).purity
+
+    prog = "Base.@inline function foo(); end"
+    ref = Meta.lower(test_mod, Meta.parse(prog))
+    our = jlower_e(prog)
+    @test find_method_ci(ref).inlining === find_method_ci(our).inlining
+
+    prog = "Base.@noinline function foo(); end"
+    ref = Meta.lower(test_mod, Meta.parse(prog))
+    our = jlower_e(prog)
+    @test find_method_ci(ref).inlining === find_method_ci(our).inlining
+
+    prog = "Base.@constprop :none function foo(); end"
+    ref = Meta.lower(test_mod, Meta.parse(prog))
+    our = jlower_e(prog)
+    @test find_method_ci(ref).constprop === find_method_ci(our).constprop
+
+    prog = "Base.@nospecializeinfer function foo(); end"
+    ref = Meta.lower(test_mod, Meta.parse(prog))
+    our = jlower_e(prog)
+    @test find_method_ci(ref).nospecializeinfer === find_method_ci(our).nospecializeinfer
+
+    prog = "Base.@propagate_inbounds function foo(); end"
+    ref = Meta.lower(test_mod, Meta.parse(prog))
+    our = jlower_e(prog)
+    @test find_method_ci(ref).propagate_inbounds === find_method_ci(our).propagate_inbounds
+
+end
+
+@testset "scope layers for normally-inert ASTs" begin
+    # Right hand side of `.`
+    @test JuliaLowering.include_string(test_mod, raw"""
+    let x = :(hi)
+        :(A.$x)
+    end
+    """) ≈ @ast_ [K"."
+        "A"::K"Identifier"
+        "hi"::K"Identifier"
+    ]
+    # module
+    @test JuliaLowering.include_string(test_mod, raw"""
+    let x = :(AA)
+        :(module $x
+        end
+        )
+    end
+    """) ≈ @ast_ [K"module"
+        v"1.14.0"::K"VERSION"
+        "AA"::K"Identifier"
+        [K"block"
+        ]
+    ]
+
+    # In macro expansion, require that expressions passed in as macro
+    # *arguments* get the lexical scope of the calling context, even for the
+    # `x` in `M.$x` where the right hand side of `.` is normally quoted.
+    @test JuliaLowering.include_string(test_mod, raw"""
+        let x = :(someglobal)
+            @eval M.$x
+        end
+    """) == "global in module M"
+
+    JuliaLowering.include_string(test_mod, raw"""
+        let y = 101
+            @eval module AA
+                x = $y
+            end
+        end
+    """)
+    @test test_mod.AA.x == 101
+
+    # "Deferred hygiene" in macros which emit quoted code currently doesn't
+    # work as might be expected.
+    #
+    # The old macro system also doesn't handle this - here's the equivalent
+    # implementation
+    # macro make_quoted_code(init, y)
+    #     QuoteNode(:(let
+    #         x = "inner x"
+    #         $(esc(init))
+    #         ($(esc(y)), x)
+    #     end))
+    # end
+    #
+    # TODO: The following should throw an error rather than producing a
+    # surprising value, or work "as expected" whatever that is!
+    JuliaLowering.include_string(test_mod, raw"""
+    macro make_quoted_code(init, y)
+        q = :(let
+            x = "inner x"
+            $init
+            ($y, x)
+        end)
+        @ast q q [K"inert" q]
+    end
+    """)
+    code = JuliaLowering.include_string(test_mod, """@make_quoted_code(x="outer x", x)""")
+    @test_broken JuliaLowering.eval(test_mod, code) == ("outer x", "inner x")
+end
+
+@testset "toplevel macro hygiene" begin
+    @eval test_mod global mod = $test_mod
+    @eval test_mod module MacroMod
+    global mod = MacroMod
+    macro escaped_toplevel()
+        esc(Expr(:toplevel, :(mod)))
+    end
+    macro inner_escaped_toplevel()
+        Expr(:toplevel, esc(:(mod)))
+    end
+    macro unescaped_toplevel()
+        Expr(:toplevel, :(mod))
+    end
+    end
+    @test JuliaLowering.include_string(test_mod, "MacroMod.@escaped_toplevel") === test_mod
+    @test JuliaLowering.include_string(test_mod, "MacroMod.@inner_escaped_toplevel") === test_mod
+    @test JuliaLowering.include_string(test_mod, "MacroMod.@unescaped_toplevel") === test_mod.MacroMod
+end
+
+# JuliaLang/JuliaLowering.jl#120
+#
+# `__module__` should be expanded as the lexical module containing the expanded
+# code, not the module corresponding to the current hygienic scope
+JuliaLowering.include_string(test_mod, raw"""
+module Mod1
+macro indirect_MODULE()
+    return :(@__MODULE__())
+end
+end
+""")
+code = JuliaLowering.include_string(test_mod, """Mod1.@indirect_MODULE()""")
+@test JuliaLowering.eval(test_mod, code) === test_mod # !== test_mod.Mod1
+# the lowering/eval iterator needs to expand in the correct world age (currently
+# the only way to hit this from user code is macros producing toplevel)
+
+@testset "macros defining macros" begin
+    @eval test_mod macro make_and_use_macro_toplevel()
+        Expr(:toplevel,
+             esc(:(macro from_toplevel_expansion()
+                   :(123)
+               end)),
+             esc(:(@from_toplevel_expansion())))
+    end
+
+    @test JuliaLowering.include_string(
+        test_mod, "@make_and_use_macro_toplevel()"; expr_compat_mode=true) === 123
+
+    if isdefined(test_mod, Symbol("@from_toplevel_expansion"))
+        Base.delete_binding(test_mod, Symbol("@from_toplevel_expansion"))
+    end
+
+    @test JuliaLowering.include_string(
+        test_mod, "@make_and_use_macro_toplevel()"; expr_compat_mode=false) === 123
+end
+
+end
diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl
new file mode 100644
index 0000000000000..f5f7fd41f8744
--- /dev/null
+++ b/JuliaLowering/test/macros_ir.jl
@@ -0,0 +1,210 @@
+module MacroMethods
+    macro some_macro()
+        quote
+            some_global
+        end
+    end
+
+    module ExtraMacroMethods
+        using ..MacroMethods
+        macro MacroMethods.some_macro(ex)
+            quote
+                some_global
+            end
+        end
+    end
+end
+
+macro strmac_str(ex, suff=nothing)
+    s = "$(ex[1].value) from strmac"
+    if !isnothing(suff)
+        s = "$s with suffix $(suff.value)"
+    end
+    s
+end
+
+macro cmdmac_cmd(ex, suff=nothing)
+    s = "$(ex[1].value) from cmdmac"
+    if !isnothing(suff)
+        s = "$s with suffix $(suff.value)"
+    end
+    s
+end
+
+#*******************************************************************************
+########################################
+# Simple macro
+macro add_one(ex)
+    quote
+        $ex + 1
+    end
+end
+#---------------------
+1   (method TestMod.@add_one)
+2   latestworld
+3   TestMod.@add_one
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ JuliaLowering.MacroContext core.Any)
+6   (call core.svec)
+7   SourceLocation::1:7
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/ex]
+    1   (call core.tuple slot₃/ex)
+    2   (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (call-i ($ ex) + 1))) %₁)
+    3   (return %₂)
+10  latestworld
+11  TestMod.@add_one
+12  (return %₁₁)
+
+########################################
+# Macro using `__context__`
+macro foo(ex)
+    ctx = __context__
+end
+#---------------------
+1   (method TestMod.@foo)
+2   latestworld
+3   TestMod.@foo
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ JuliaLowering.MacroContext core.Any)
+6   (call core.svec)
+7   SourceLocation::1:7
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/__context__ slot₃/ex(!read) slot₄/ctx(!read)]
+    1   slot₂/__context__
+    2   (= slot₄/ctx %₁)
+    3   (return %₁)
+10  latestworld
+11  TestMod.@foo
+12  (return %₁₁)
+
+########################################
+# Scope for symbols emitted by macros is the module where the method was
+# defined, thus two different modules in this case, even though `@some_macro`
+# belongs to the MacroMethods module.
+(MacroMethods.@some_macro(), MacroMethods.@some_macro(unused))
+#---------------------
+1   TestMod.MacroMethods.some_global
+2   TestMod.MacroMethods.ExtraMacroMethods.some_global
+3   (call core.tuple %₁ %₂)
+4   (return %₃)
+
+########################################
+# Error: Macro with kw args
+macro mmm(a; b=2)
+end
+#---------------------
+LoweringError:
+macro mmm(a; b=2)
+#          └───┘ ── macros cannot accept keyword arguments
+end
+
+########################################
+# Error: Bad macro name
+macro mmm[](ex)
+end
+#---------------------
+LoweringError:
+macro mmm[](ex)
+#     └───┘ ── invalid macro name
+end
+
+########################################
+# Error: Macros not allowed in local scope
+let
+    macro foo(ex)
+    end
+end
+#---------------------
+LoweringError:
+let
+#   ┌────────────
+    macro foo(ex)
+    end
+#─────┘ ── macro is only allowed in global scope
+end
+
+########################################
+# Error: Macros not allowed in local scope
+function f()
+    macro foo()
+    end
+end
+#---------------------
+LoweringError:
+function f()
+#   ┌──────────
+    macro foo()
+    end
+#─────┘ ── macro is only allowed in global scope
+end
+
+########################################
+# Error: Macros not found
+_never_exist = @m_not_exist 42
+#---------------------
+MacroExpansionError while expanding @m_not_exist in module Main.TestMod:
+_never_exist = @m_not_exist 42
+#               └─────────┘ ── Macro not found
+Caused by:
+UndefVarError: `@m_not_exist` not defined in `Main.TestMod`
+Suggestion: check for spelling errors or missing imports.
+
+########################################
+# Simple cmdstring
+`echo 1`
+#---------------------
+1   Base.cmd_gen
+2   (call core.tuple "echo")
+3   (call core.tuple "1")
+4   (call core.tuple %₂ %₃)
+5   (call %₁ %₄)
+6   (return %₅)
+
+########################################
+# Simple string macro
+strmac"hello"
+#---------------------
+1   (return "hello from strmac")
+
+########################################
+# String macro with suffix
+strmac"hello"blah
+#---------------------
+1   (return "hello from strmac with suffix blah")
+
+########################################
+# Simple cmd macro
+cmdmac`hello`
+#---------------------
+1   (return "hello from cmdmac")
+
+########################################
+# Cmd macro with suffix
+cmdmac`hello`12345
+#---------------------
+1   (return "hello from cmdmac with suffix 12345")
+
+########################################
+# @nospecialize (zero args)
+function foo()
+    @nospecialize
+end
+#---------------------
+1   (method TestMod.foo)
+2   latestworld
+3   TestMod.foo
+4   (call core.Typeof %₃)
+5   (call core.svec %₄)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read)]
+    1   (meta :nospecialize)
+    2   (return core.nothing)
+10  latestworld
+11  TestMod.foo
+12  (return %₁₁)
diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl
new file mode 100644
index 0000000000000..75bb26e9a6f92
--- /dev/null
+++ b/JuliaLowering/test/misc.jl
@@ -0,0 +1,216 @@
+@testset "Miscellaneous" begin
+
+test_mod = Module()
+
+# Blocks
+@test JuliaLowering.include_string(test_mod, """
+begin
+end
+""") == nothing
+
+# Placeholders
+@test JuliaLowering.include_string(test_mod, """_ = 10""") == 10
+
+# GC.@preserve
+@test JuliaLowering.include_string(test_mod, """
+let x = [1,2]
+    GC.@preserve x begin
+        x
+    end
+end
+""") == [1,2]
+
+@test JuliaLowering.include_string(test_mod, raw"""
+let
+    x = 10
+    @eval $x + 2
+end
+""") == 12
+
+@test JuliaLowering.include_string(test_mod, raw"""
+module EvalTest
+    _some_var = 2
+end
+let
+    x = 10
+    @eval EvalTest $x + _some_var
+end
+""") == 12
+
+@test JuliaLowering.include_string(test_mod, """
+let x=11
+    20x
+end
+""") == 220
+
+# ccall
+@test JuliaLowering.include_string(test_mod, """
+ccall(:strlen, Csize_t, (Cstring,), "asdfg")
+""") == 5
+@test JuliaLowering.include_string(test_mod, """
+function cvarargs_0()
+    strp = Ref{Ptr{Cchar}}(0)
+    fmt = "hi"
+    len = ccall(:asprintf, Cint, (Ptr{Ptr{Cchar}}, Cstring, Cfloat...), strp, fmt)
+    str = unsafe_string(strp[], len)
+    Libc.free(strp[])
+    return str
+end
+""") isa Function
+@test test_mod.cvarargs_0() == "hi"
+@test JuliaLowering.include_string(test_mod, """
+function cvarargs_2(arg1::Float64, arg2::Float64)
+    strp = Ref{Ptr{Cchar}}(0)
+    fmt = "%3.1f %3.1f"
+    len = ccall(:asprintf, Cint, (Ptr{Ptr{Cchar}}, Cstring, Cfloat...), strp, fmt, arg1, arg2)
+    str = unsafe_string(strp[], len)
+    Libc.free(strp[])
+    return str
+end
+""") isa Function
+@test test_mod.cvarargs_2(1.1, 2.2) == "1.1 2.2"
+
+# cfunction
+JuliaLowering.include_string(test_mod, """
+function f_ccallable(x, y)
+    x + y * 10
+end
+""")
+cf_int = JuliaLowering.include_string(test_mod, """
+@cfunction(f_ccallable, Int, (Int,Int))
+""")
+@test @ccall($cf_int(2::Int, 3::Int)::Int) == 32
+cf_float = JuliaLowering.include_string(test_mod, """
+@cfunction(f_ccallable, Float64, (Float64,Float64))
+""")
+@test @ccall($cf_float(2::Float64, 3::Float64)::Float64) == 32.0
+
+# Test that hygiene works with @ccallable function names (this is broken in
+# Base)
+JuliaLowering.include_string(test_mod, raw"""
+f_ccallable_hygiene() = 1
+
+module Nested
+    f_ccallable_hygiene() = 2
+    macro cfunction_hygiene()
+        :(@cfunction(f_ccallable_hygiene, Int, ()))
+    end
+end
+""")
+cf_hygiene = JuliaLowering.include_string(test_mod, """
+Nested.@cfunction_hygiene
+""")
+@test @ccall($cf_hygiene()::Int) == 2
+
+# Test that ccall can be passed static parameters in type signatures.
+#
+# Note that the cases where this works are extremely limited and tend to look
+# like `Ptr{T}` or `Ref{T}` (`T` doesn't work!?) because of the compilation
+# order in which the runtime inspects the arguments to ccall (`Ptr{T}` has a
+# well defined C ABI even when `T` is not yet determined). See also
+# https://github.com/JuliaLang/julia/issues/29400
+# https://github.com/JuliaLang/julia/pull/40947
+JuliaLowering.include_string(test_mod, raw"""
+function sparam_ccallable(x::Ptr{T}) where {T}
+    unsafe_store!(x, one(T))
+    nothing
+end
+
+function ccall_with_sparams(::Type{T}) where {T}
+    x = T[zero(T)]
+    cf = @cfunction(sparam_ccallable, Cvoid, (Ptr{T},))
+    @ccall $cf(x::Ptr{T})::Cvoid
+    x[1]
+end
+""")
+@test test_mod.ccall_with_sparams(Int) === 1
+@test test_mod.ccall_with_sparams(Float64) === 1.0
+
+# FIXME Currently JL cannot handle `@generated` functions, so the following test cases are commented out.
+# # Test that ccall can be passed static parameters in the function name
+# # Note that this only works with `@generated` functions from 1.13 onwards,
+# # where the function name can be evaluated at code generation time.
+# JuliaLowering.include_string(test_mod, raw"""
+# # In principle, may add other strlen-like functions here for different string
+# # types
+# ccallable_sptest_name(::Type{String}) = :strlen
+#
+# @generated function ccall_with_sparams_in_name(s::T) where {T}
+#     name = QuoteNode(ccallable_sptest_name(T))
+#     :(ccall($name, Csize_t, (Cstring,), s))
+# end
+# """)
+# @test test_mod.ccall_with_sparams_in_name("hii") == 3
+
+@testset "CodeInfo: has_image_globalref" begin
+    @test lower_str(test_mod, "x + y").args[1].has_image_globalref === false
+    @test lower_str(Main, "x + y").args[1].has_image_globalref === true
+end
+
+@testset "docstrings: doc-only expressions" begin
+    local jeval(mod, str) = JuliaLowering.include_string(mod, str; expr_compat_mode=true)
+    jeval(test_mod, "function fun_exists(x); x; end")
+    jeval(test_mod, "module M end; module M2 end")
+    # TODO: return values are to be determined, currently Base.Docs.Binding for
+    # both lowering implementations.  We can't return the value of the
+    # expression in these special cases.
+    jeval(test_mod, "\"docstr1\" sym_noexist")
+    jeval(test_mod, "\"docstr2\" fun_noexist()")
+    jeval(test_mod, "\"docstr3\" fun_exists(sym_noexist)")
+    jeval(test_mod, "\"docstr4\" M.sym_noexist")
+    jeval(test_mod, "\"docstr5\" M.fun_noexist()")
+    jeval(test_mod, "\"docstr6\" M.fun_exists(sym_noexist)")
+    @test jeval(test_mod, "@doc sym_noexist")               |> string === "docstr1\n"
+    @test jeval(test_mod, "@doc fun_noexist()")             |> string === "docstr2\n"
+    @test jeval(test_mod, "@doc fun_exists(sym_noexist)")   |> string === "docstr3\n"
+    @test jeval(test_mod, "@doc M.sym_noexist")             |> string === "docstr4\n"
+    @test jeval(test_mod, "@doc M.fun_noexist()")           |> string === "docstr5\n"
+    @test jeval(test_mod, "@doc M.fun_exists(sym_noexist)") |> string === "docstr6\n"
+    @test jeval(test_mod.M, "@doc M.sym_noexist")             |> string === "docstr4\n"
+    @test jeval(test_mod.M, "@doc M.fun_noexist()")           |> string === "docstr5\n"
+    @test jeval(test_mod.M, "@doc M.fun_exists(sym_noexist)") |> string === "docstr6\n"
+
+    jeval(test_mod.M2, "\"docstr7\" M2.M2.sym_noexist")
+    jeval(test_mod.M2, "\"docstr8\" M2.M2.fun_noexist()")
+    jeval(test_mod.M2, "\"docstr9\" M2.M2.fun_exists(sym_noexist)")
+    @test jeval(test_mod, "@doc M2.M2.sym_noexist")             |> string === "docstr7\n"
+    @test jeval(test_mod, "@doc M2.M2.fun_noexist()")           |> string === "docstr8\n"
+    @test jeval(test_mod, "@doc M2.M2.fun_exists(sym_noexist)") |> string === "docstr9\n"
+    @test jeval(test_mod.M2, "@doc M2.M2.sym_noexist")             |> string === "docstr7\n"
+    @test jeval(test_mod.M2, "@doc M2.M2.fun_noexist()")           |> string === "docstr8\n"
+    @test jeval(test_mod.M2, "@doc M2.M2.fun_exists(sym_noexist)") |> string === "docstr9\n"
+
+    # Try with signatures and type variables
+    jeval(test_mod, "abstract type T_exists end")
+
+    jeval(test_mod, "\"docstr10\" f10(x::Int, y, z::T_exists)")
+    d = jeval(test_mod, "@doc f10")
+    @test d |> string === "docstr10\n"
+    # TODO: Is there a better way of accessing this? Feel free to change tests
+    # if docsystem storage changes.
+    @test d.meta[:results][1].data[:typesig] === Tuple{Int, Any, test_mod.T_exists}
+
+    jeval(test_mod, "\"docstr11\" f11(x::T_exists, y::U, z::T) where {T, U<:Number}")
+    d = jeval(test_mod, "@doc f11")
+    @test d |> string === "docstr11\n"
+    @test d.meta[:results][1].data[:typesig] === Tuple{test_mod.T_exists, U, T} where {T, U<:Number}
+
+    jeval(test_mod, "\"docstr12\" f12(x::Int, y::U, z::T=1) where {T, U<:Number}")
+    d = jeval(test_mod, "@doc f12")
+    @test d |> string === "docstr12\n"
+    @test d.meta[:results][1].data[:typesig] === Union{Tuple{Int, U, T}, Tuple{Int, U}} where {T, U<:Number}
+
+end
+
+# SyntaxTree @eval should pass along expr_compat_mode
+@test JuliaLowering.include_string(test_mod, "@eval quote x end";
+                                   expr_compat_mode=false) isa SyntaxTree
+@test JuliaLowering.include_string(test_mod, "@eval quote x end";
+                                   expr_compat_mode=true) isa Expr
+@test JuliaLowering.include_string(test_mod, raw"""
+    let T = :foo
+        @eval @doc $"This is a $T" $T = 1
+    end
+"""; expr_compat_mode=true) === 1
+
+end
diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl
new file mode 100644
index 0000000000000..0b9ab2ee7e78c
--- /dev/null
+++ b/JuliaLowering/test/misc_ir.jl
@@ -0,0 +1,574 @@
+module JuxtuposeTest
+    macro emit_juxtupose()
+        :(10x)
+    end
+end
+
+#*******************************************************************************
+########################################
+# Getproperty syntax
+x.a
+#---------------------
+1   TestMod.x
+2   (call top.getproperty %₁ :a)
+3   (return %₂)
+
+########################################
+# Getproperty syntax with a string on right hand side
+x."b"
+#---------------------
+1   TestMod.x
+2   (call top.getproperty %₁ "b")
+3   (return %₂)
+
+########################################
+# Standalone dot syntax
+.*
+#---------------------
+1   TestMod.*
+2   (call top.BroadcastFunction %₁)
+3   (return %₂)
+
+########################################
+# Error: Wrong number of children in `.`
+@ast_ [K"." "x"::K"Identifier" "a"::K"Identifier" 3::K"Integer"]
+#---------------------
+LoweringError:
+#= line 1 =# - `.` form requires either one or two children
+
+########################################
+# Error: Placeholder value used
+_ + 1
+#---------------------
+LoweringError:
+_ + 1
+╙ ── all-underscore identifiers are write-only and their values cannot be used in expressions
+
+########################################
+# Named tuple
+(a=1, b=2)
+#---------------------
+1   (call core.tuple :a :b)
+2   (call core.apply_type core.NamedTuple %₁)
+3   (call core.tuple 1 2)
+4   (call %₂ %₃)
+5   (return %₄)
+
+########################################
+# Named tuple with parameters
+(; a=1, b=2)
+#---------------------
+1   (call core.tuple :a :b)
+2   (call core.apply_type core.NamedTuple %₁)
+3   (call core.tuple 1 2)
+4   (call %₂ %₃)
+5   (return %₄)
+
+########################################
+# Empty named tuple
+(;)
+#---------------------
+1   (call core.NamedTuple)
+2   (return %₁)
+
+########################################
+# Named tuple with implicit field names
+(;x, a.b.c, y._)
+#---------------------
+1   (call core.tuple :x :c :_)
+2   (call core.apply_type core.NamedTuple %₁)
+3   TestMod.x
+4   TestMod.a
+5   (call top.getproperty %₄ :b)
+6   (call top.getproperty %₅ :c)
+7   TestMod.y
+8   (call top.getproperty %₇ :_)
+9   (call core.tuple %₃ %₆ %₈)
+10  (call %₂ %₉)
+11  (return %₁₀)
+
+########################################
+# Named tuple with splats
+(; a=1, b=2, bs..., c=3, ds...)
+#---------------------
+1   (call core.tuple :a :b)
+2   (call core.apply_type core.NamedTuple %₁)
+3   (call core.tuple 1 2)
+4   (call %₂ %₃)
+5   TestMod.bs
+6   (call top.merge %₄ %₅)
+7   (call core.tuple :c)
+8   (call core.apply_type core.NamedTuple %₇)
+9   (call core.tuple 3)
+10  (call %₈ %₉)
+11  (call top.merge %₆ %₁₀)
+12  TestMod.ds
+13  (call top.merge %₁₁ %₁₂)
+14  (return %₁₃)
+
+########################################
+# Named tuple with only splats
+(; as..., bs...)
+#---------------------
+1   (call core.NamedTuple)
+2   TestMod.as
+3   (call top.merge %₁ %₂)
+4   TestMod.bs
+5   (call top.merge %₃ %₄)
+6   (return %₅)
+
+########################################
+# Named tuple with dynamic names
+(; a=1, b=2, c=>d)
+#---------------------
+1   (call core.tuple :a :b)
+2   (call core.apply_type core.NamedTuple %₁)
+3   (call core.tuple 1 2)
+4   (call %₂ %₃)
+5   TestMod.c
+6   (call core.tuple %₅)
+7   (call core.apply_type core.NamedTuple %₆)
+8   TestMod.d
+9   (call core.tuple %₈)
+10  (call %₇ %₉)
+11  (call top.merge %₄ %₁₀)
+12  (return %₁₁)
+
+########################################
+# Error: Named tuple with repeated fields
+(; a=1, bs..., c=3, a=2)
+#---------------------
+LoweringError:
+(; a=1, bs..., c=3, a=2)
+#                   ╙ ── Repeated named tuple field name
+
+########################################
+# Error: Named tuple frankentuple
+(a=1; b=2, c=3)
+#---------------------
+LoweringError:
+(a=1; b=2, c=3)
+#   └────────┘ ── unexpected semicolon in tuple - use `,` to separate tuple elements
+
+########################################
+# Error: Named tuple field dots in rhs
+(; a=xs...)
+#---------------------
+LoweringError:
+(; a=xs...)
+#    └───┘ ── `...` cannot be used in a value for a named tuple field
+
+########################################
+# Error: Named tuple field invalid lhs
+(; a[]=1)
+#---------------------
+LoweringError:
+(; a[]=1)
+#  └─┘ ── invalid named tuple field name
+
+########################################
+# Error: Named tuple element with weird dot syntax
+(; a."b")
+#---------------------
+LoweringError:
+(; a."b")
+#  └───┘ ── invalid named tuple element
+
+########################################
+# Error: Named tuple element without valid name
+(; a=1, f())
+#---------------------
+LoweringError:
+(; a=1, f())
+#       └─┘ ── Invalid named tuple element
+
+########################################
+# Error: Modules not allowed inside blocks
+begin
+    module C
+    end
+end
+#---------------------
+LoweringError:
+begin
+#   ┌───────
+    module C
+    end
+#─────┘ ── `module` is only allowed at top level
+end
+
+########################################
+# Error: Modules not allowed in local scope
+function f()
+    module C
+    end
+end
+#---------------------
+LoweringError:
+function f()
+#   ┌───────
+    module C
+    end
+#─────┘ ── `module` is only allowed at top level
+end
+
+########################################
+# Basic type assert
+x::T
+#---------------------
+1   TestMod.x
+2   TestMod.T
+3   (call core.typeassert %₁ %₂)
+4   (return %₃)
+
+########################################
+# Error: Invalid :: syntax outside function arg list
+::T
+#---------------------
+LoweringError:
+::T
+└─┘ ── `::` must be written `value::type` outside function argument lists
+
+########################################
+# Error: braces vector syntax
+{x, y}
+#---------------------
+LoweringError:
+{x, y}
+└────┘ ── { } syntax is reserved for future use
+
+########################################
+# Error: braces matrix syntax
+{x y; y z}
+#---------------------
+LoweringError:
+{x y; y z}
+└────────┘ ── { } syntax is reserved for future use
+
+########################################
+# Error: Test AST which has no source form and thus must have been constructed
+# programmatically (eg, a malformed if)
+@ast_ [K"if"]
+#---------------------
+LoweringError:
+#= line 1 =# - expected `numchildren(ex) >= 2`
+
+########################################
+# Error: @atomic in wrong position
+let
+    @atomic x
+end
+#---------------------
+LoweringError:
+#= none:2 =# - unimplemented or unsupported atomic declaration
+
+########################################
+# GC.@preserve support
+GC.@preserve a b begin
+    f(a,b)
+end
+#---------------------
+1   TestMod.a
+2   TestMod.b
+3   (= slot₂/s (gc_preserve_begin %₁ %₂))
+4   TestMod.f
+5   TestMod.a
+6   TestMod.b
+7   (= slot₁/r (call %₄ %₅ %₆))
+8   (gc_preserve_end slot₂/s)
+9   slot₁/r
+10  (return %₉)
+
+########################################
+# Error: GC.@preserve bad args
+GC.@preserve a b g() begin
+    body
+end
+#---------------------
+MacroExpansionError while expanding GC.@preserve in module Main.TestMod:
+GC.@preserve a b g() begin
+#                └─┘ ── Preserved variable must be a symbol
+    body
+end
+
+########################################
+# @eval without module
+@eval $f(x, y)
+#---------------------
+1   JuliaLowering.eval
+2   (call core.tuple :expr_compat_mode)
+3   (call core.apply_type core.NamedTuple %₂)
+4   (call core.tuple false)
+5   (call %₃ %₄)
+6   TestMod.f
+7   (call core.tuple %₆)
+8   (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₇)
+9   (= slot₁/eval_result (call core.kwcall %₅ %₁ TestMod %₈))
+10  latestworld
+11  slot₁/eval_result
+12  (return %₁₁)
+
+########################################
+# @eval with module
+@eval mod $f(x, y)
+#---------------------
+1   JuliaLowering.eval
+2   (call core.tuple :expr_compat_mode)
+3   (call core.apply_type core.NamedTuple %₂)
+4   (call core.tuple false)
+5   (call %₃ %₄)
+6   TestMod.mod
+7   TestMod.f
+8   (call core.tuple %₇)
+9   (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₈)
+10  (= slot₁/eval_result (call core.kwcall %₅ %₁ %₆ %₉))
+11  latestworld
+12  slot₁/eval_result
+13  (return %₁₂)
+
+########################################
+# Juxtaposition
+20x
+#---------------------
+1   TestMod.*
+2   TestMod.x
+3   (call %₁ 20 %₂)
+4   (return %₃)
+
+########################################
+# Juxtaposition - check the juxtapose multiply is resolved to `JuxtuposeTest.*` when
+# emitted by the macro in the JuxtuposeTest module.
+#
+# This is consistent with Julia's existing system but it's not entirely clear
+# this is good - perhaps we should resolve to Base.* instead? Resolving to the
+# module-local version makes it exactly equivalent to `*`. But one might argue
+# this is confusing because the symbol `*` appears nowhere in the user's source
+# code.
+JuxtuposeTest.@emit_juxtupose
+#---------------------
+1   TestMod.JuxtuposeTest.*
+2   TestMod.JuxtuposeTest.x
+3   (call %₁ 10 %₂)
+4   (return %₃)
+
+########################################
+# @cfunction expansion with global generic function as function argument
+@cfunction(callable, Int, (Int, Float64))
+#---------------------
+1   (cfunction Ptr{Nothing} (static_eval TestMod.callable) (static_eval TestMod.Int) (static_eval (call core.svec TestMod.Int TestMod.Float64)) :ccall)
+2   (return %₁)
+
+########################################
+# @cfunction expansion with closed-over callable argument
+@cfunction($close_over, Int, (Int, Float64))
+#---------------------
+1   TestMod.close_over
+2   (cfunction Base.CFunction %₁ (static_eval TestMod.Int) (static_eval (call core.svec TestMod.Int TestMod.Float64)) :ccall)
+3   (return %₂)
+
+########################################
+# Error: Bad arg types to @cfunction
+@cfunction(f, Int, NotATuple)
+#---------------------
+MacroExpansionError while expanding @cfunction in module Main.TestMod:
+@cfunction(f, Int, NotATuple)
+#                  └───────┘ ── @cfunction argument types must be a literal tuple
+
+########################################
+# Error: Locals used in @cfunction return type
+let T=Float64
+    @cfunction(f, T, (Float64,))
+end
+#---------------------
+LoweringError:
+let T=Float64
+    @cfunction(f, T, (Float64,))
+#                 ╙ ── cfunction return type cannot reference local variable
+end
+
+########################################
+# Error: Locals used in @cfunction arg type
+let T=Float64
+    @cfunction(f, Float64, (Float64,T))
+end
+#---------------------
+LoweringError:
+let T=Float64
+    @cfunction(f, Float64, (Float64,T))
+#                                   ╙ ── cfunction argument type cannot reference local variable
+end
+
+########################################
+# Basic @ccall lowering
+@ccall foo(x::X, y::Y)::R
+#---------------------
+1   TestMod.X
+2   TestMod.x
+3   (= slot₁/arg1 (call Base.cconvert %₁ %₂))
+4   TestMod.Y
+5   TestMod.y
+6   (= slot₂/arg2 (call Base.cconvert %₄ %₅))
+7   TestMod.X
+8   slot₁/arg1
+9   (call Base.unsafe_convert %₇ %₈)
+10  TestMod.Y
+11  slot₂/arg2
+12  (call Base.unsafe_convert %₁₀ %₁₁)
+13  slot₁/arg1
+14  slot₂/arg2
+15  (foreigncall :foo (static_eval TestMod.R) (static_eval (call core.svec TestMod.X TestMod.Y)) 0 :($(QuoteNode((:ccall, 0x0000, false)))) %₉ %₁₂ %₁₃ %₁₄)
+16  (return %₁₅)
+
+########################################
+# @ccall lowering with gc_safe
+@ccall foo(x::X; y::Y)::R gc_safe=true
+#---------------------
+1   TestMod.X
+2   TestMod.x
+3   (= slot₁/arg1 (call Base.cconvert %₁ %₂))
+4   TestMod.Y
+5   TestMod.y
+6   (= slot₂/arg2 (call Base.cconvert %₄ %₅))
+7   TestMod.X
+8   slot₁/arg1
+9   (call Base.unsafe_convert %₇ %₈)
+10  TestMod.Y
+11  slot₂/arg2
+12  (call Base.unsafe_convert %₁₀ %₁₁)
+13  slot₁/arg1
+14  slot₂/arg2
+15  (foreigncall :foo (static_eval TestMod.R) (static_eval (call core.svec TestMod.X TestMod.Y)) 1 :($(QuoteNode((:ccall, 0x0000, true)))) %₉ %₁₂ %₁₃ %₁₄)
+16  (return %₁₅)
+
+########################################
+# non-macro ccall with vararg in signature, but none provided
+ccall(:fcntl, Cint, (RawFD, Cint, Cint...), s, F_GETFL)
+#---------------------
+1   TestMod.RawFD
+2   TestMod.Cint
+3   TestMod.s
+4   (call top.cconvert %₁ %₃)
+5   TestMod.F_GETFL
+6   (call top.cconvert %₂ %₅)
+7   (call top.unsafe_convert %₁ %₄)
+8   (call top.unsafe_convert %₂ %₆)
+9   (foreigncall :fcntl (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.RawFD TestMod.Cint)) 2 :ccall %₇ %₈ %₄ %₆)
+10  (return %₉)
+
+########################################
+# Error: No return annotation on @ccall
+@ccall strlen("foo"::Cstring)
+#---------------------
+MacroExpansionError while expanding @ccall in module Main.TestMod:
+@ccall strlen("foo"::Cstring)
+#                            └ ── Expected a return type annotation `::SomeType`
+
+########################################
+# Error: No argument type on @ccall
+@ccall foo("blah"::Cstring, "bad")::Int
+#---------------------
+MacroExpansionError while expanding @ccall in module Main.TestMod:
+@ccall foo("blah"::Cstring, "bad")::Int
+#                           └───┘ ── argument needs a type annotation
+
+########################################
+# Error: @ccall varags without one fixed argument
+@ccall foo(; x::Int)::Int
+#---------------------
+MacroExpansionError while expanding @ccall in module Main.TestMod:
+@ccall foo(; x::Int)::Int
+#          └──────┘ ── C ABI prohibits varargs without one required argument
+
+########################################
+# Error: Multiple varargs blocks
+@ccall foo(; x::Int; y::Float64)::Int
+#---------------------
+MacroExpansionError while expanding @ccall in module Main.TestMod:
+@ccall foo(; x::Int; y::Float64)::Int
+#                  └──────────┘ ── Multiple parameter blocks not allowed
+
+########################################
+# Error: Bad @ccall option
+@ccall foo(x::Int)::Int bad_opt
+#---------------------
+MacroExpansionError while expanding @ccall in module Main.TestMod:
+@ccall foo(x::Int)::Int bad_opt
+#                       └─────┘ ── Bad option to ccall
+
+########################################
+# Error: Unknown @ccall option name
+@ccall foo(x::Int)::Int bad_opt=true
+#---------------------
+MacroExpansionError while expanding @ccall in module Main.TestMod:
+@ccall foo(x::Int)::Int bad_opt=true
+#                       └─────┘ ── Unknown option name for ccall
+
+########################################
+# Error: Unknown option type
+@ccall foo(x::Int)::Int gc_safe="hi"
+#---------------------
+MacroExpansionError while expanding @ccall in module Main.TestMod:
+@ccall foo(x::Int)::Int gc_safe="hi"
+#                               └──┘ ── gc_safe must be true or false
+
+########################################
+# Error: unary & syntax
+&x
+#---------------------
+LoweringError:
+&x
+└┘ ── invalid syntax
+
+########################################
+# Error: $ outside quote/string
+$x
+#---------------------
+LoweringError:
+$x
+└┘ ── `$` expression outside string or quote block
+
+########################################
+# Error: splat outside call
+x...
+#---------------------
+LoweringError:
+x...
+└──┘ ── `...` expression outside call
+
+########################################
+# `include` should increment world age
+include("hi.jl")
+#---------------------
+1   TestMod.include
+2   (call %₁ "hi.jl")
+3   latestworld
+4   (return %₂)
+
+########################################
+# Const function assignment syntax (legacy)
+const f(x::Int)::Int = x+1
+#---------------------
+1   TestMod.f
+2   TestMod.x
+3   TestMod.Int
+4   (call core.typeassert %₂ %₃)
+5   (call %₁ %₄)
+6   TestMod.Int
+7   (call core.typeassert %₅ %₆)
+8   (return %₇)
+
+########################################
+# Error: Destructuring assignment method definitions (broken, legacy)
+f(x)::Int, g() = [1.0, 2.0]
+#---------------------
+LoweringError:
+f(x)::Int, g() = [1.0, 2.0]
+└──┘ ── invalid assignment location
+
+########################################
+# Error: Destructuring assignment typedef, variable, and function (broken, legacy)
+T{U}, (x::Float64, g()) = [Bool, (1, 2)]
+#---------------------
+LoweringError:
+T{U}, (x::Float64, g()) = [Bool, (1, 2)]
+#                  └─┘ ── invalid assignment location
diff --git a/JuliaLowering/test/modules.jl b/JuliaLowering/test/modules.jl
new file mode 100644
index 0000000000000..a68c5f8a8b6e2
--- /dev/null
+++ b/JuliaLowering/test/modules.jl
@@ -0,0 +1,54 @@
+@testset "modules" begin
+
+test_mod = Module()
+
+A = JuliaLowering.include_string(test_mod, """
+module A
+    function g()
+        return "hi"
+    end
+end
+""", "module_test")
+@test A isa Module
+@test A.g() == "hi"
+@test A.include isa Base.IncludeInto
+@test A.eval isa Core.EvalInto
+@test A.Base === Base
+@test A.eval(:(x = -2)) == -2
+@test A.x == -2
+
+B = JuliaLowering.include_string(test_mod, """
+baremodule B
+end
+""", "baremodule_test")
+@test B.Core === Core
+@test !isdefined(B, :include)
+@test !isdefined(B, :eval)
+@test !isdefined(B, :Base)
+
+# Module init order
+Amod = JuliaLowering.include_string(test_mod, """
+module A
+    init_order = []
+    __init__() = push!(init_order, "A")
+    module B
+        using ..A
+        __init__() = push!(A.init_order, "B")
+    end
+    module C
+        using ..A
+        __init__() = push!(A.init_order, "C")
+        module D
+            using ...A
+            __init__() = push!(A.init_order, "D")
+        end
+        module E
+            using ...A
+            __init__() = push!(A.init_order, "E")
+        end
+    end
+end
+""")
+@test Amod.init_order == ["B", "D", "E", "C", "A"]
+
+end
diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl
new file mode 100644
index 0000000000000..93ace74e948f2
--- /dev/null
+++ b/JuliaLowering/test/quoting.jl
@@ -0,0 +1,283 @@
+@testset "Syntax quoting & interpolation" begin
+
+test_mod = Module()
+
+ex = JuliaLowering.include_string(test_mod, """
+begin
+    x = 10
+    y = :(g(z))
+    quote
+        f(\$(x+1), \$y)
+    end
+end
+""")
+@test ex ≈ @ast_ [K"block"
+    [K"call"
+        "f"::K"Identifier"
+        11::K"Value"
+        [K"call"
+            "g"::K"Identifier"
+            "z"::K"Identifier"
+        ]
+    ]
+]
+@test sourcetext(ex[1]) == "f(\$(x+1), \$y)"
+@test sourcetext(ex[1][2]) == "\$(x+1)"
+@test sourcetext.(flattened_provenance(ex[1][3])) == ["\$y", "g(z)"]
+@test sprint(io->showprov(io, ex[1][3], tree=true)) == raw"""
+    (call g z)
+    ├─ (call g z)
+    │  └─ (call g z)
+    │     └─ @ string:3
+    └─ ($ y)
+       └─ @ string:5
+    """
+@test sprint(io->showprov(io, ex[1][3])) == raw"""
+    begin
+        x = 10
+        y = :(g(z))
+    #         └──┘ ── in source
+        quote
+            f($(x+1), $y)
+    # @ string:3
+
+        y = :(g(z))
+        quote
+            f($(x+1), $y)
+    #                 └┘ ── interpolated here
+        end
+    end
+    # @ string:5"""
+@test sprint(io->showprov(io, ex[1][3]; note="foo")) == raw"""
+    begin
+        x = 10
+        y = :(g(z))
+    #         └──┘ ── foo
+        quote
+            f($(x+1), $y)
+    # @ string:3
+
+        y = :(g(z))
+        quote
+            f($(x+1), $y)
+    #                 └┘ ── foo
+        end
+    end
+    # @ string:5"""
+
+
+# Test expression flags are preserved during interpolation
+@test JuliaSyntax.is_infix_op_call(JuliaLowering.include_string(test_mod, """
+let
+    x = 1
+    :(\$x + \$x)
+end
+"""))
+
+# Test that trivial interpolation without any nesting works.
+ex = JuliaLowering.include_string(test_mod, """
+let
+    x = 123
+    :(\$x)
+end
+""")
+@test kind(ex) == K"Value"
+@test ex.value == 123
+
+# Test that interpolation with field access works
+# (the field name can be interpolated into
+ex = JuliaLowering.include_string(test_mod, """
+let
+    field_name = :(a)
+    :(x.\$field_name)
+end
+""")
+@test kind(ex[2]) == K"Identifier"
+@test ex[2].name_val == "a"
+
+# Test quoted property access syntax like `Core.:(foo)` and `Core.:(!==)`
+@test JuliaLowering.include_string(test_mod, """
+    x = (a=1, b=2)
+    x.:(a)
+""") == 1
+@test JuliaLowering.include_string(test_mod, """
+    Core.:(!==)
+""") === (!==)
+
+# Test quoted operator function definitions (issue #20)
+@test JuliaLowering.include_string(test_mod, """
+begin
+    struct Issue20
+        x::Int
+    end
+    Base.:(==)(a::Issue20, b::Issue20) = a.x == b.x
+    Issue20(1) == Issue20(1)
+end
+""") === true
+
+@test JuliaLowering.include_string(test_mod, """
+begin
+    Base.:(<)(a::Issue20, b::Issue20) = a.x < b.x
+    Issue20(1) < Issue20(2)
+end
+""") === true
+
+# interpolations at multiple depths
+ex = JuliaLowering.include_string(test_mod, raw"""
+let
+    args = (:(x),:(y))
+    quote
+        x = 1
+        y = 2
+        quote
+            f($$(args...))
+        end
+    end
+end
+""")
+@test ex ≈ @ast_ [K"block"
+    [K"="
+        "x"::K"Identifier"
+        1::K"Integer"
+    ]
+    [K"="
+        "y"::K"Identifier"
+        2::K"Integer"
+    ]
+    [K"quote"
+        [K"block"
+            [K"call"
+                "f"::K"Identifier"
+                [K"$"
+                    "x"::K"Identifier"
+                    "y"::K"Identifier"
+                ]
+            ]
+        ]
+    ]
+]
+@test sourcetext(ex[3][1][1][2]) == "\$\$(args...)"
+@test sourcetext(ex[3][1][1][2][1]) == "x"
+@test sourcetext(ex[3][1][1][2][2]) == "y"
+
+ex2 = JuliaLowering.eval(test_mod, ex)
+@test sourcetext(ex2[1][2]) == "x"
+@test sourcetext(ex2[1][3]) == "y"
+
+@test JuliaLowering.include_string(test_mod, ":x") isa Symbol
+@test JuliaLowering.include_string(test_mod, ":(x)") isa SyntaxTree
+
+# Double interpolation
+double_interp_ex = JuliaLowering.include_string(test_mod, raw"""
+let
+    args = (:(xxx),)
+    :(:($$(args...)))
+end
+""")
+Base.eval(test_mod, :(xxx = 111))
+dinterp_eval = JuliaLowering.eval(test_mod, double_interp_ex)
+@test kind(dinterp_eval) == K"Value"
+@test dinterp_eval.value == 111
+
+multi_interp_ex = JuliaLowering.include_string(test_mod, raw"""
+let
+    args = (:(x), :(y))
+    :(:($$(args...)))
+end
+""")
+@test try
+    JuliaLowering.eval(test_mod, multi_interp_ex)
+    nothing
+catch exc
+    @test exc isa LoweringError
+    sprint(io->Base.showerror(io, exc, show_detail=false))
+end == raw"""
+LoweringError:
+let
+    args = (:(x), :(y))
+    :(:($$(args...)))
+#       └─────────┘ ── More than one value in bare `$` expression
+end"""
+
+@test try
+    JuliaLowering.eval(test_mod, multi_interp_ex, expr_compat_mode=true)
+    nothing
+catch exc
+    @test exc isa LoweringError
+    sprint(io->Base.showerror(io, exc, show_detail=false))
+end == raw"""
+LoweringError:
+No source for expression
+└ ── More than one value in bare `$` expression"""
+# ^ TODO: Improve error messages involving expr_to_syntaxtree!
+
+# Interpolation of SyntaxTree Identifier vs plain Symbol
+symbol_interp = JuliaLowering.include_string(test_mod, raw"""
+let
+    x = :xx    # Plain Symbol
+    y = :(yy)  # SyntaxTree K"Identifier"
+    :(f($x, $y, z))
+end
+""")
+@test symbol_interp ≈ @ast_ [K"call"
+    "f"::K"Identifier"
+    "xx"::K"Identifier"
+    "yy"::K"Identifier"
+    "z"::K"Identifier"
+]
+@test sourcetext(symbol_interp[2]) == "\$x" # No provenance for plain Symbol
+@test sourcetext(symbol_interp[3]) == "yy"
+
+# Mixing Expr into a SyntaxTree doesn't graft it onto the SyntaxTree AST but
+# treats it as a plain old value. (This is the conservative API choice and also
+# encourages ASTs to be written in the new form. However we may choose to
+# change this if necessary for compatibility.)
+expr_interp_is_value = JuliaLowering.include_string(test_mod, raw"""
+let
+    x = Expr(:call, :f, :x)
+    :(g($x))
+end
+""")
+@test expr_interp_is_value ≈ @ast_ [K"call"
+    "g"::K"Identifier"
+    Expr(:call, :f, :x)::K"Value"
+    # ^^ NB not [K"call" "f"::K"Identifier" "x"::K"Identifier"]
+]
+@test Expr(expr_interp_is_value) == Expr(:call, :g, QuoteNode(Expr(:call, :f, :x)))
+
+@testset "Interpolation in Expr compat mode" begin
+    expr_interp = JuliaLowering.include_string(test_mod, raw"""
+    let
+        x = :xx
+        :(f($x, z))
+    end
+    """, expr_compat_mode=true)
+    @test expr_interp == Expr(:call, :f, :xx, :z)
+
+    double_interp_expr = JuliaLowering.include_string(test_mod, raw"""
+    let
+        x = :xx
+        :(:(f($$x, $y)))
+    end
+    """, expr_compat_mode=true)
+    @test double_interp_expr == Expr(:quote, Expr(:call, :f, Expr(:$, :xx), Expr(:$, :y)))
+
+    # Test that ASTs are copied before they're seen by the user
+    @test JuliaLowering.include_string(test_mod, raw"""
+    exs = []
+    for i = 1:2
+        push!(exs, :(f(x,y)))
+        push!(exs[end].args, :z)
+    end
+    exs
+    """, expr_compat_mode=true) == Any[Expr(:call, :f, :x, :y, :z), Expr(:call, :f, :x, :y, :z)]
+
+    # Test interpolation into QuoteNode
+    @test JuliaLowering.include_string(test_mod, raw"""
+    let x = :push!
+        @eval Base.$x
+    end
+    """; expr_compat_mode=true) == Base.push!
+end
+
+end
diff --git a/JuliaLowering/test/quoting_ir.jl b/JuliaLowering/test/quoting_ir.jl
new file mode 100644
index 0000000000000..ccc61be3cf796
--- /dev/null
+++ b/JuliaLowering/test/quoting_ir.jl
@@ -0,0 +1,90 @@
+########################################
+# Simple interpolation
+quote
+    $x + 1
+end
+#---------------------
+1   TestMod.x
+2   (call core.tuple %₁)
+3   (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (call-i ($ x) + 1))) %₂)
+4   (return %₃)
+
+########################################
+# Trivial interpolation
+:($x)
+#---------------------
+1   TestMod.x
+2   (call core.tuple %₁)
+3   (call JuliaLowering.interpolate_ast SyntaxTree (inert ($ x)) %₂)
+4   (return %₃)
+
+########################################
+# Double escape
+quote
+    quote
+        $$x + 1
+    end
+end
+#---------------------
+1   TestMod.x
+2   (call core.tuple %₁)
+3   (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (quote (block (call-i ($ ($ x)) + 1))))) %₂)
+4   (return %₃)
+
+########################################
+# Symbols on `.` right hand side need to be scoped correctly
+let x = 1
+    :(A.$x)
+end
+#---------------------
+1   1
+2   (= slot₁/x %₁)
+3   slot₁/x
+4   (call core.tuple %₃)
+5   (call JuliaLowering.interpolate_ast SyntaxTree (inert (. A ($ x))) %₄)
+6   (return %₅)
+
+########################################
+# Error: Double escape
+quote
+    $$x + 1
+end
+#---------------------
+LoweringError:
+quote
+    $$x + 1
+#    └┘ ── `$` expression outside string or quote block
+end
+
+########################################
+# Quoted property access with identifier
+Core.:(foo)
+#---------------------
+1   TestMod.Core
+2   (call top.getproperty %₁ :foo)
+3   (return %₂)
+
+########################################
+# Quoted property access with operator
+Core.:(!==)
+#---------------------
+1   TestMod.Core
+2   (call top.getproperty %₁ :!==)
+3   (return %₂)
+
+########################################
+# Quoted operator function definition (issue #20)
+function Base.:(==)() end
+#---------------------
+1   TestMod.Base
+2   (call top.getproperty %₁ :==)
+3   (call core.Typeof %₂)
+4   (call core.svec %₃)
+5   (call core.svec)
+6   SourceLocation::1:10
+7   (call core.svec %₄ %₅ %₆)
+8   --- method core.nothing %₇
+    slots: [slot₁/#self#(!read)]
+    1   (return core.nothing)
+9   latestworld
+10  (return core.nothing)
diff --git a/JuliaLowering/test/repl_mode.jl b/JuliaLowering/test/repl_mode.jl
new file mode 100644
index 0000000000000..cf85717c03cbf
--- /dev/null
+++ b/JuliaLowering/test/repl_mode.jl
@@ -0,0 +1,84 @@
+# JuliaLowering REPL mode: an interactive test utility for lowering code (not
+# part of the unit tests)
+
+module JuliaLoweringREPL
+
+import ReplMaker
+import REPL
+
+using JuliaLowering: JuliaLowering, SyntaxTree, children
+using JuliaSyntax
+
+function is_incomplete(prompt_state)
+    str = String(take!(copy(REPL.LineEdit.buffer(prompt_state))))
+    stream = JuliaSyntax.ParseStream(str)
+    JuliaSyntax.parse!(stream, rule=:all)
+    if JuliaSyntax.any_error(stream)
+        tree = JuliaSyntax.build_tree(SyntaxNode, stream)
+        tag = JuliaSyntax._incomplete_tag(tree, 1)
+        return tag != :none
+    else
+        return false
+    end
+end
+
+function eval_ish(mod::Module, ex::SyntaxTree, do_eval::Bool, do_print_ir::Bool)
+    k = kind(ex)
+    if k == K"toplevel"
+        x = nothing
+        for e in children(ex)
+            x = eval_ish(mod, e, do_eval, do_print_ir)
+        end
+        return x
+    end
+    linear_ir = JuliaLowering.lower(mod, ex)
+    if do_print_ir
+        JuliaLowering.print_ir(stdout, linear_ir)
+    end
+    if do_eval
+        println(stdout, "#----------------------")
+        expr_form = JuliaLowering.to_lowered_expr(linear_ir)
+        Base.eval(mod, expr_form)
+    end
+end
+
+PRINT_IR::Bool = true
+DO_EVAL::Bool = false
+function opts(; do_eval=false, print_ir=false)
+    global DO_EVAL = do_eval
+    global PRINT_IR = print_ir
+end
+
+function handle_input(str)
+    global DO_EVAL, PRINT_IR
+    if str == "DO_EVAL"
+        DO_EVAL = true
+        return
+    elseif str == "!DO_EVAL"
+        DO_EVAL = false
+        return
+    elseif str == "PRINT_IR"
+        PRINT_IR = true
+        return
+    elseif str == "!PRINT_IR"
+        PRINT_IR = false
+        return
+    end
+    ex = parseall(SyntaxTree, str; filename="REPL")
+    eval_ish(Main, ex, DO_EVAL, PRINT_IR)
+end
+
+function init()
+    ReplMaker.initrepl(handle_input,
+                       valid_input_checker = !is_incomplete,
+                       prompt_text="Lowering> ",
+                       prompt_color = :blue,
+                       start_key=")",
+                       mode_name=:JuliaLowering)
+end
+
+function __init__()
+    init()
+end
+
+end
diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl
new file mode 100644
index 0000000000000..7451ecb5c179f
--- /dev/null
+++ b/JuliaLowering/test/runtests.jl
@@ -0,0 +1,28 @@
+include("utils.jl")
+
+@testset "JuliaLowering.jl" begin
+    include("syntax_graph.jl")
+
+    include("ir_tests.jl")
+
+    include("arrays.jl")
+    include("assignments.jl")
+    include("branching.jl")
+    include("closures.jl")
+    include("decls.jl")
+    include("destructuring.jl")
+    include("desugaring.jl")
+    include("exceptions.jl")
+    include("functions.jl")
+    include("generators.jl")
+    include("import.jl")
+    include("loops.jl")
+    @testset "macros" include("macros.jl")
+    include("misc.jl")
+    include("modules.jl")
+    include("quoting.jl")
+    include("scopes.jl")
+    include("typedefs.jl")
+    include("compat.jl")
+    include("hooks.jl")
+end
diff --git a/JuliaLowering/test/runtests_vendored.jl b/JuliaLowering/test/runtests_vendored.jl
new file mode 100644
index 0000000000000..77ce44a1bdff5
--- /dev/null
+++ b/JuliaLowering/test/runtests_vendored.jl
@@ -0,0 +1,19 @@
+old_active_project = Base.active_project()
+try
+    # test local (dev) copy of JuliaLowering, not yet vendored into Base
+    Base.set_active_project(joinpath(@__DIR__, "..", "Project.toml"))
+    manifest_path = joinpath(@__DIR__, "..", "Manifest.toml")
+
+    # restore error hints (emptied by `testdefs.jl`) so that errors print as
+    # JuliaLowering expects them to
+    Base.Experimental.register_error_hint(Base.UndefVarError_hint, UndefVarError)
+
+    # n.b.: these must be run in `Main`, so that type-printing is equivalent
+    # when running via Pkg.test() (e.g. "SyntaxGraph" should be printed instead
+    # of "JuliaLowering.SyntaxGraph")
+    @eval Main using JuliaLowering
+    Core.include(Main, joinpath(@__DIR__, "runtests.jl")) # run the actual tests
+finally
+    # Restore original load path and active project
+    Base.set_active_project(old_active_project)
+end
diff --git a/JuliaLowering/test/scopes.jl b/JuliaLowering/test/scopes.jl
new file mode 100644
index 0000000000000..e327343eb03e6
--- /dev/null
+++ b/JuliaLowering/test/scopes.jl
@@ -0,0 +1,80 @@
+@testset "Scopes" begin
+
+test_mod = Module()
+
+#-------------------------------------------------------------------------------
+# Scopes
+@test JuliaLowering.include_string(test_mod,
+"""
+let
+    y = 0
+    x = 1
+    let x = x + 1
+        y = x
+    end
+    (x, y)
+end
+""") == (1, 2)
+
+JuliaLowering.include_string(test_mod, """
+x = 101
+y = 202
+""")
+@test test_mod.x == 101
+@test test_mod.y == 202
+@test JuliaLowering.include_string(test_mod, "x + y") == 303
+
+@test JuliaLowering.include_string(test_mod, """
+begin
+    local x = 1
+    local x = 2
+    let (x,y) = (:x,:y)
+        (y,x)
+    end
+end
+""") === (:y,:x)
+
+# Types on left hand side of type decls refer to the outer scope
+# (In the flisp implementation they refer to the inner scope, but this seems
+# like a bug.)
+@test JuliaLowering.include_string(test_mod, """
+let x::Int = 10.0
+    local Int = Float64
+    x
+end
+""") === 10
+
+# Closures in let syntax can only capture values from the outside
+# (In the flisp implementation it captures from inner scope, but this is
+# inconsistent with let assignment where the rhs refers to the outer scope and
+# thus seems like a bug.)
+@test JuliaLowering.include_string(test_mod, """
+begin
+    local y = :outer_y
+    let f() = y
+        local y = :inner_y
+        f()
+    end
+end
+""") === :outer_y
+
+# wrap expression in scope block of `scope_type`
+function wrapscope(ex, scope_type)
+    g = JuliaLowering.ensure_attributes(ex._graph, scope_type=Symbol)
+    ex = JuliaLowering.reparent(g, ex)
+    makenode(ex, ex, K"scope_block", ex; scope_type=scope_type)
+end
+
+assign_z_2 = parsestmt(SyntaxTree, "begin z = 2 end", filename="foo.jl")
+Base.eval(test_mod, :(z=1))
+@test test_mod.z == 1
+# neutral (eg, for loops) and hard (eg, let) scopes create a new binding for z
+JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :neutral))
+@test test_mod.z == 1
+JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard))
+@test test_mod.z == 1
+# but wrapping neutral scope in soft scope uses the existing binding in test_mod
+JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft))
+@test test_mod.z == 2
+
+end
diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl
new file mode 100644
index 0000000000000..fc00174f144ff
--- /dev/null
+++ b/JuliaLowering/test/scopes_ir.jl
@@ -0,0 +1,532 @@
+using .JuliaLowering: @islocal
+using Base: @locals
+
+#*******************************************************************************
+########################################
+# let syntax with decl in binding list
+let x::T = rhs
+    local T = 1
+    T # <- This is a different `T` from the T in `x::T`
+end
+#---------------------
+1   TestMod.rhs
+2   TestMod.T
+3   (newvar slot₁/T)
+4   (= slot₃/tmp %₁)
+5   slot₃/tmp
+6   (call core.isa %₅ %₂)
+7   (gotoifnot %₆ label₉)
+8   (goto label₁₂)
+9   slot₃/tmp
+10  (call top.convert %₂ %₉)
+11  (= slot₃/tmp (call core.typeassert %₁₀ %₂))
+12  slot₃/tmp
+13  (= slot₂/x %₁₂)
+14  (= slot₁/T 1)
+15  slot₁/T
+16  (return %₁₅)
+
+########################################
+# let syntax with tuple on lhs
+let (x,y) = rhs
+end
+#---------------------
+1   TestMod.rhs
+2   (call top.indexed_iterate %₁ 1)
+3   (= slot₂/x (call core.getfield %₂ 1))
+4   (= slot₁/iterstate (call core.getfield %₂ 2))
+5   slot₁/iterstate
+6   (call top.indexed_iterate %₁ 2 %₅)
+7   (= slot₃/y (call core.getfield %₆ 1))
+8   (return core.nothing)
+
+########################################
+# let syntax with named tuple on lhs creates locals for the unpacked vars
+let (; x,y) = rhs
+end
+#---------------------
+1   TestMod.rhs
+2   (= slot₁/x (call top.getproperty %₁ :x))
+3   (= slot₂/y (call top.getproperty %₁ :y))
+4   (return core.nothing)
+
+########################################
+# Let syntax with the same name creates nested bindings
+let x = f(x), x = g(x)
+end
+#---------------------
+1   TestMod.f
+2   TestMod.x
+3   (call %₁ %₂)
+4   (= slot₁/x %₃)
+5   TestMod.g
+6   slot₁/x
+7   (call %₅ %₆)
+8   (= slot₂/x %₇)
+9   (return core.nothing)
+
+########################################
+# let syntax with a function definition in the binding list creates a closure
+let f() = body
+end
+#---------------------
+1   (call core.svec)
+2   (call core.svec)
+3   (call JuliaLowering.eval_closure_type TestMod :#f##0 %₁ %₂)
+4   latestworld
+5   TestMod.#f##0
+6   (new %₅)
+7   (= slot₁/f %₆)
+8   TestMod.#f##0
+9   (call core.svec %₈)
+10  (call core.svec)
+11  SourceLocation::1:5
+12  (call core.svec %₉ %₁₀ %₁₁)
+13  --- method core.nothing %₁₂
+    slots: [slot₁/#self#(!read)]
+    1   TestMod.body
+    2   (return %₁)
+14  latestworld
+15  (return core.nothing)
+
+########################################
+# Error: Invalid `let` var with K"::"
+let f[]::T = rhs
+end
+#---------------------
+LoweringError:
+let f[]::T = rhs
+#   └─┘ ── Invalid assignment location in let syntax
+end
+
+########################################
+# Error: Invalid `let` var
+let f[] = rhs
+end
+#---------------------
+LoweringError:
+let f[] = rhs
+#   └─┘ ── Invalid assignment location in let syntax
+end
+
+########################################
+# Error: Invalid function def in `let`
+let (obj::Callable)() = rhs
+end
+#---------------------
+LoweringError:
+let (obj::Callable)() = rhs
+#   └───────────────┘ ── Function signature does not define a local function name
+end
+
+########################################
+# @islocal with locals and undefined vars
+let x = 1
+    @islocal(a), @islocal(x)
+end
+#---------------------
+1   1
+2   (= slot₁/x %₁)
+3   (call core.tuple false true)
+4   (return %₃)
+
+########################################
+# @islocal with function arguments
+begin
+    local y = 2
+    function f(x)
+        @islocal(a), @islocal(x), @islocal(y)
+    end
+end
+#---------------------
+1   (= slot₁/y (call core.Box))
+2   2
+3   slot₁/y
+4   (call core.setfield! %₃ :contents %₂)
+5   (method TestMod.f)
+6   latestworld
+7   TestMod.f
+8   (call core.Typeof %₇)
+9   (call core.svec %₈ core.Any)
+10  (call core.svec)
+11  SourceLocation::3:14
+12  (call core.svec %₉ %₁₀ %₁₁)
+13  --- method core.nothing %₁₂
+    slots: [slot₁/#self#(!read) slot₂/x(!read)]
+    1   (call core.tuple false true true)
+    2   (return %₁)
+14  latestworld
+15  TestMod.f
+16  (return %₁₅)
+
+########################################
+# @islocal with global
+begin
+    global x
+    @islocal(x)
+end
+#---------------------
+1   (call core.declare_global TestMod :x false)
+2   latestworld
+3   (return false)
+
+########################################
+# @locals with local and global
+begin
+    global x
+    local y
+    @locals
+end
+#---------------------
+1   (newvar slot₁/y)
+2   (call core.declare_global TestMod :x false)
+3   latestworld
+4   (call core.apply_type top.Dict core.Symbol core.Any)
+5   (call %₄)
+6   (isdefined slot₁/y)
+7   (gotoifnot %₆ label₁₀)
+8   slot₁/y
+9   (call top.setindex! %₅ %₈ :y)
+10  (return %₅)
+
+########################################
+# @locals with function args (TODO: static parameters)
+function f(z)
+    @locals
+end
+#---------------------
+1   (method TestMod.f)
+2   latestworld
+3   TestMod.f
+4   (call core.Typeof %₃)
+5   (call core.svec %₄ core.Any)
+6   (call core.svec)
+7   SourceLocation::1:10
+8   (call core.svec %₅ %₆ %₇)
+9   --- method core.nothing %₈
+    slots: [slot₁/#self#(!read) slot₂/z]
+    1   (call core.apply_type top.Dict core.Symbol core.Any)
+    2   (call %₁)
+    3   (gotoifnot true label₅)
+    4   (call top.setindex! %₂ slot₂/z :z)
+    5   (return %₂)
+10  latestworld
+11  TestMod.f
+12  (return %₁₁)
+
+########################################
+# Error: Duplicate function argument names
+function f(x, x)
+end
+#---------------------
+LoweringError:
+function f(x, x)
+#             ╙ ── function argument name not unique
+end
+
+########################################
+# Error: Duplicate function argument with destructured arg
+function f(x, (x,))
+end
+#---------------------
+LoweringError:
+function f(x, (x,))
+#              ╙ ── function argument name not unique
+end
+
+########################################
+# Error: Static parameter name not unique
+function f(::T) where T where T
+end
+#---------------------
+LoweringError:
+function f(::T) where T where T
+#                     ╙ ── function static parameter name not unique
+end
+
+########################################
+# Error: static parameter colliding with argument names
+function f(x::x) where x
+end
+#---------------------
+LoweringError:
+function f(x::x) where x
+#                      ╙ ── static parameter name not distinct from function argument
+end
+
+########################################
+# Error: duplicate destructure args
+function f((x,), (x,))
+end
+#---------------------
+LoweringError:
+function f((x,), (x,))
+#                 ╙ ── function argument name not unique
+end
+
+########################################
+# Error: Conflicting local and global decls
+let
+    local x
+    global x
+end
+#---------------------
+LoweringError:
+let
+    local x
+    global x
+#          ╙ ── Variable `x` declared both local and global
+end
+
+########################################
+# Error: Conflicting argument and local
+function f(x)
+    local x
+end
+#---------------------
+LoweringError:
+function f(x)
+    local x
+#         ╙ ── local variable name `x` conflicts with an argument
+end
+
+########################################
+# Error: Conflicting argument and global
+function f(x)
+    global x
+end
+#---------------------
+LoweringError:
+function f(x)
+    global x
+#          ╙ ── global variable name `x` conflicts with an argument
+end
+
+########################################
+# Error: Conflicting destructured argument and global
+# TODO: The error could probably be a bit better here
+function f((x,))
+    global x
+end
+#---------------------
+LoweringError:
+function f((x,))
+    global x
+#          ╙ ── Variable `x` declared both local and global
+end
+
+########################################
+# Error: Conflicting static parameter and local
+function f(::T) where T
+    local T
+end
+#---------------------
+LoweringError:
+function f(::T) where T
+    local T
+#         ╙ ── local variable name `T` conflicts with a static parameter
+end
+
+########################################
+# Error: Conflicting static parameter and global
+function f(::T) where T
+    global T
+end
+#---------------------
+LoweringError:
+function f(::T) where T
+    global T
+#          ╙ ── global variable name `T` conflicts with a static parameter
+end
+
+########################################
+# Error: Conflicting static parameter and local in nested scope
+function f(::T) where T
+    let
+        local T
+    end
+end
+#---------------------
+LoweringError:
+function f(::T) where T
+    let
+        local T
+#             ╙ ── local variable name `T` conflicts with a static parameter
+    end
+end
+
+########################################
+# Error: Conflicting static parameter and global in nested scope
+function f(::T) where T
+    let
+        global T
+    end
+end
+#---------------------
+LoweringError:
+function f(::T) where T
+    let
+        global T
+#              ╙ ── global variable name `T` conflicts with a static parameter
+    end
+end
+
+########################################
+# Error: Conflicting static parameter and implicit local
+function f(::T) where T
+    let
+        T = rhs
+    end
+end
+#---------------------
+LoweringError:
+function f(::T) where T
+    let
+        T = rhs
+#       ╙ ── local variable name `T` conflicts with a static parameter
+    end
+end
+
+########################################
+# Error: Attempt to add methods to a function argument
+function f(g)
+    function g()
+    end
+end
+#---------------------
+LoweringError:
+function f(g)
+    function g()
+#            ╙ ── Cannot add method to a function argument
+    end
+end
+
+########################################
+# Error: Global method definition inside function scope
+function f()
+    global global_method
+    function global_method()
+    end
+end
+#---------------------
+LoweringError:
+function f()
+    global global_method
+    function global_method()
+#            └───────────┘ ── Global method definition needs to be placed at the top level, or use `eval()`
+    end
+end
+
+########################################
+# @isdefined with defined variables
+let x = 1
+    @isdefined x
+    @isdefined y
+end
+#---------------------
+1   1
+2   (= slot₁/x %₁)
+3   (call core.isdefinedglobal TestMod :y false)
+4   (return %₃)
+
+########################################
+# Global function defined inside let (let over lambda)
+let x = 1
+    global f(y) = x = y
+    global g() = x
+end
+#---------------------
+1   1
+2   (= slot₁/x (call core.Box))
+3   slot₁/x
+4   (call core.setfield! %₃ :contents %₁)
+5   (call core.declare_global TestMod :f false)
+6   latestworld
+7   (method TestMod.f)
+8   latestworld
+9   TestMod.f
+10  (call core.Typeof %₉)
+11  (call core.svec %₁₀ core.Any)
+12  (call core.svec)
+13  SourceLocation::2:12
+14  (call core.svec %₁₁ %₁₂ %₁₃)
+15  --- code_info
+    slots: [slot₁/#self#(!read) slot₂/y]
+    1   slot₂/y
+    2   (captured_local 1)
+    3   (call core.setfield! %₂ :contents %₁)
+    4   (return %₁)
+16  slot₁/x
+17  (call core.svec %₁₆)
+18  (call JuliaLowering.replace_captured_locals! %₁₅ %₁₇)
+19  --- method core.nothing %₁₄ %₁₈
+20  latestworld
+21  (call core.declare_global TestMod :g false)
+22  latestworld
+23  (method TestMod.g)
+24  latestworld
+25  TestMod.g
+26  (call core.Typeof %₂₅)
+27  (call core.svec %₂₆)
+28  (call core.svec)
+29  SourceLocation::3:12
+30  (call core.svec %₂₇ %₂₈ %₂₉)
+31  --- code_info
+    slots: [slot₁/#self#(!read) slot₂/x(!read)]
+    1   (captured_local 1)
+    2   (call core.isdefined %₁ :contents)
+    3   (gotoifnot %₂ label₅)
+    4   (goto label₇)
+    5   (newvar slot₂/x)
+    6   slot₂/x
+    7   (call core.getfield %₁ :contents)
+    8   (return %₇)
+32  slot₁/x
+33  (call core.svec %₃₂)
+34  (call JuliaLowering.replace_captured_locals! %₃₁ %₃₃)
+35  --- method core.nothing %₃₀ %₃₄
+36  latestworld
+37  TestMod.g
+38  (return %₃₇)
+
+########################################
+# Modify assignment operator on closure variable
+let x = 1
+    global f() = x += 1
+end
+#---------------------
+1   1
+2   (= slot₁/x (call core.Box))
+3   slot₁/x
+4   (call core.setfield! %₃ :contents %₁)
+5   (call core.declare_global TestMod :f false)
+6   latestworld
+7   (method TestMod.f)
+8   latestworld
+9   TestMod.f
+10  (call core.Typeof %₉)
+11  (call core.svec %₁₀)
+12  (call core.svec)
+13  SourceLocation::2:12
+14  (call core.svec %₁₁ %₁₂ %₁₃)
+15  --- code_info
+    slots: [slot₁/#self#(!read) slot₂/x(!read)]
+    1   TestMod.+
+    2   (captured_local 1)
+    3   (call core.isdefined %₂ :contents)
+    4   (gotoifnot %₃ label₆)
+    5   (goto label₈)
+    6   (newvar slot₂/x)
+    7   slot₂/x
+    8   (call core.getfield %₂ :contents)
+    9   (call %₁ %₈ 1)
+    10  (captured_local 1)
+    11  (call core.setfield! %₁₀ :contents %₉)
+    12  (return %₉)
+16  slot₁/x
+17  (call core.svec %₁₆)
+18  (call JuliaLowering.replace_captured_locals! %₁₅ %₁₇)
+19  --- method core.nothing %₁₄ %₁₈
+20  latestworld
+21  TestMod.f
+22  (return %₂₁)
diff --git a/JuliaLowering/test/syntax_graph.jl b/JuliaLowering/test/syntax_graph.jl
new file mode 100644
index 0000000000000..60fd10dde0c20
--- /dev/null
+++ b/JuliaLowering/test/syntax_graph.jl
@@ -0,0 +1,110 @@
+@testset "SyntaxGraph attrs" begin
+    st = parsestmt(SyntaxTree, "function foo end")
+    g_init = JuliaLowering.unfreeze_attrs(st._graph)
+    gf1 = JuliaLowering.freeze_attrs(g_init)
+    gu1 = JuliaLowering.unfreeze_attrs(gf1)
+
+    # Check that freeze/unfreeze do their jobs
+    @test gf1.attributes isa NamedTuple
+    @test gu1.attributes isa Dict
+    @test Set(keys(gf1.attributes)) == Set(keys(gu1.attributes))
+
+    # ensure_attributes
+    gf2 = JuliaLowering.ensure_attributes(gf1, test_attr=Symbol, foo=Type)
+    gu2 = JuliaLowering.ensure_attributes(gu1, test_attr=Symbol, foo=Type)
+    # returns a graph with the same attribute storage
+    @test gf2.attributes isa NamedTuple
+    @test gu2.attributes isa Dict
+    # does its job
+    @test (:test_attr=>Symbol) in JuliaLowering.attrdefs(gf2)
+    @test (:foo=>Type) in JuliaLowering.attrdefs(gf2)
+    @test Set(keys(gf2.attributes)) == Set(keys(gu2.attributes))
+    # no mutation
+    @test !((:test_attr=>Symbol) in JuliaLowering.attrdefs(gf1))
+    @test !((:foo=>Type) in JuliaLowering.attrdefs(gf1))
+    @test Set(keys(gf1.attributes)) == Set(keys(gu1.attributes))
+
+    # delete_attributes
+    gf3 = JuliaLowering.delete_attributes(gf2, :test_attr, :foo)
+    gu3 = JuliaLowering.delete_attributes(gu2, :test_attr, :foo)
+    # returns a graph with the same attribute storage
+    @test gf3.attributes isa NamedTuple
+    @test gu3.attributes isa Dict
+    # does its job
+    @test !((:test_attr=>Symbol) in JuliaLowering.attrdefs(gf3))
+    @test !((:foo=>Type) in JuliaLowering.attrdefs(gf3))
+    @test Set(keys(gf3.attributes)) == Set(keys(gu3.attributes))
+    # no mutation
+    @test (:test_attr=>Symbol) in JuliaLowering.attrdefs(gf2)
+    @test (:foo=>Type) in JuliaLowering.attrdefs(gf2)
+    @test Set(keys(gf2.attributes)) == Set(keys(gu2.attributes))
+end
+
+@testset "SyntaxTree" begin
+    # Expr conversion
+    @test Expr(parsestmt(SyntaxTree, "begin a + b ; c end", filename="none")) ==
+        Meta.parse("begin a + b ; c end")
+
+    tree1 = JuliaLowering.@SyntaxTree :(some_unique_identifier)
+    @test tree1 isa SyntaxTree
+    @test kind(tree1) == K"Identifier"
+    @test tree1.name_val == "some_unique_identifier"
+
+    tree2 = JuliaLowering.@SyntaxTree quote
+        x
+        $tree1
+    end
+    @test tree2 isa SyntaxTree
+    @test kind(tree2) == K"block"
+    @test kind(tree2[1]) == K"Identifier" && tree2[1].name_val == "x"
+    @test kind(tree2[2]) == K"Identifier" && tree2[2].name_val == "some_unique_identifier"
+
+    "For filling required attrs in graphs created by hand"
+    function testgraph(edge_ranges, edges, more_attrs...)
+        kinds = Dict(map(i->(i=>K"block"), eachindex(edge_ranges)))
+        sources = Dict(map(i->(i=>LineNumberNode(i)), eachindex(edge_ranges)))
+        SyntaxGraph(
+            edge_ranges,
+            edges,
+            Dict(:kind => kinds, :source => sources, more_attrs...))
+    end
+
+    @testset "copy_ast" begin
+        # 1 --> 2 --> 3     src(7-9) = line 7-9
+        # 4 --> 5 --> 6     src(i) = i + 3
+        # 7 --> 8 --> 9
+        g = testgraph([1:1, 2:2, 0:-1, 3:3, 4:4, 0:-1, 5:5, 6:6, 0:-1],
+                      [2, 3, 5, 6, 8, 9],
+                      :source => Dict(enumerate([
+                          map(i->i+3, 1:6)...
+                          map(LineNumberNode, 7:9)...])))
+        st = SyntaxTree(g, 1)
+        stcopy = JuliaLowering.copy_ast(g, st)
+        # Each node should be copied once
+        @test length(g.edge_ranges) === 18
+        @test st._id != stcopy._id
+        @test st ≈ stcopy
+        @test st.source !== stcopy.source
+        @test st.source[1] !== stcopy.source[1]
+        @test st.source[1][1] !== stcopy.source[1][1]
+
+        stcopy2 = JuliaLowering.copy_ast(g, st; copy_source=false)
+        # Only nodes 1-3 should be copied
+        @test length(g.edge_ranges) === 21
+        @test st._id != stcopy2._id
+        @test st ≈ stcopy2
+        @test st.source === stcopy2.source
+        @test st.source[1] === stcopy2.source[1]
+        @test st.source[1][1] === stcopy2.source[1][1]
+
+        # Copy into a new graph
+        new_g = ensure_attributes!(SyntaxGraph(); JuliaLowering.attrdefs(g)...)
+        stcopy3 = JuliaLowering.copy_ast(new_g, st)
+        @test length(new_g.edge_ranges) === 9
+        @test st ≈ stcopy3
+
+        new_g = ensure_attributes!(SyntaxGraph(); JuliaLowering.attrdefs(g)...)
+        # Disallow for now, since we can't prevent dangling sourcerefs
+        @test_throws ErrorException JuliaLowering.copy_ast(new_g, st; copy_source=false)
+    end
+end
diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl
new file mode 100644
index 0000000000000..f995d0ed1c9b5
--- /dev/null
+++ b/JuliaLowering/test/typedefs.jl
@@ -0,0 +1,316 @@
+@testset "Type definitions" begin
+
+test_mod = Module(:TestMod)
+
+Base.eval(test_mod, :(struct XX{S,T,U,W} end))
+
+@test JuliaLowering.include_string(test_mod, """
+XX{Int, <:Integer, Float64, >:AbstractChar}
+""") == (test_mod.XX{Int, T, Float64, S} where {T <: Integer, S >: AbstractChar})
+
+@test JuliaLowering.include_string(test_mod, """
+abstract type A end
+""") === nothing
+@test supertype(test_mod.A) === Any
+@test isabstracttype(test_mod.A)
+
+@test JuliaLowering.include_string(test_mod, """
+abstract type B <: A end
+""") === nothing
+@test supertype(test_mod.B) === test_mod.A
+
+@test JuliaLowering.include_string(test_mod, """
+abstract type C{X} end
+""") === nothing
+
+@test JuliaLowering.include_string(test_mod, """
+abstract type D{X<:A} end
+""") === nothing
+@test test_mod.D{test_mod.B} isa Type
+@test_throws Exception test_mod.D{Int}
+
+@test JuliaLowering.include_string(test_mod, """
+abstract type E <: C{E} end
+""") === nothing
+@test test_mod.E isa Type
+
+@test JuliaLowering.include_string(test_mod, """
+primitive type P <: A 16 end
+""") === nothing
+@test isconcretetype(test_mod.P)
+@test supertype(test_mod.P) === test_mod.A
+@test reinterpret(test_mod.P, 0x0001) isa test_mod.P
+@test reinterpret(UInt16, reinterpret(test_mod.P, 0x1337)) === 0x1337
+
+@test JuliaLowering.include_string(test_mod, """
+struct S1{X,Y} <: A
+    x::X
+    y::Y
+    z
+end
+""") === nothing
+@test !isconcretetype(test_mod.S1)
+@test fieldnames(test_mod.S1) == (:x, :y, :z)
+@test fieldtypes(test_mod.S1) == (Any, Any, Any)
+@test isconcretetype(test_mod.S1{Int,String})
+@test fieldtypes(test_mod.S1{Int,String}) == (Int, String, Any)
+@test supertype(test_mod.S1) == test_mod.A
+
+# Inner constructors: one field non-Any
+@test JuliaLowering.include_string(test_mod, """
+struct S2
+    x::Int
+    y
+end
+""") === nothing
+@test length(methods(test_mod.S2)) == 2
+let s = test_mod.S2(42, "hi")
+    # exact types
+    @test s.x === 42
+    @test s.y == "hi"
+end
+let s = test_mod.S2(42.0, "hi")
+    # converted types
+    @test s.x === 42
+    @test s.y == "hi"
+end
+
+# Constructors: All fields Any
+@test JuliaLowering.include_string(test_mod, """
+struct S3
+    x
+    y
+end
+""") === nothing
+@test length(methods(test_mod.S3)) == 1
+let s = test_mod.S3(42, "hi")
+    @test s.x === 42
+    @test s.y == "hi"
+end
+
+# Inner constructors: All fields Any; dynamically tested against whatever
+# S4_Field resolves to
+@test JuliaLowering.include_string(test_mod, """
+S4_Field = Any # actually Any!
+
+struct S4
+    x::S4_Field
+    y
+end
+""") === nothing
+@test length(methods(test_mod.S4)) == 1
+let s = test_mod.S4(42, "hi")
+    @test s.x === 42
+    @test s.y == "hi"
+end
+
+# Inner & outer constructors; parameterized types
+@test JuliaLowering.include_string(test_mod, """
+struct S5{U}
+    x::U
+    y
+end
+""") === nothing
+@test length(methods(test_mod.S5)) == 1
+let s = test_mod.S5(42, "hi")
+    @test s isa test_mod.S5{Int}
+    @test s.x === 42
+    @test s.y == "hi"
+end
+@test length(methods(test_mod.S5{Int})) == 1
+let s = test_mod.S5{Int}(42.0, "hi")
+    @test s isa test_mod.S5{Int}
+    @test s.x === 42
+    @test s.y == "hi"
+end
+let s = test_mod.S5{Any}(42.0, "hi")
+    @test s isa test_mod.S5{Any}
+    @test s.x === 42.0
+    @test s.y == "hi"
+end
+@test JuliaLowering.include_string(test_mod, """
+function S5{Int}(x::Int)
+    S5(x, x)
+end
+""") === nothing
+let s = test_mod.S5{Int}(1)
+    @test s.x === 1
+    @test s.y === 1
+    @test s isa test_mod.S5{Int}
+end
+@test_throws MethodError test_mod.S5{Int}(1.1)
+@test JuliaLowering.include_string(test_mod, """
+function S5{T}(x, y, z) where {T<:AbstractFloat}
+    S5(x, x)
+end
+""") === nothing
+let s = test_mod.S5{Float64}(Float64(1.1), 0, 0)
+    @test s.x === 1.1
+    @test s.y === 1.1
+    @test s isa test_mod.S5{Float64}
+end
+@test JuliaLowering.include_string(test_mod, """
+S5{<:AbstractFloat}(x) = S5(x, x)
+""") === nothing
+let s = test_mod.S5{<:AbstractFloat}(Float64(1.1))
+    @test s.x === 1.1
+    @test s.y === 1.1
+    @test s isa test_mod.S5{Float64}
+end
+@test JuliaLowering.include_string(test_mod, """
+S5{T}(x::T) where {T<:Real} = S5(x, x)
+""") === nothing
+let s = test_mod.S5{Real}(pi)
+    @test s.x === pi
+    @test s.y === pi
+    @test s isa test_mod.S5{<:Real}
+end
+outer_mod = Module()
+@test JuliaLowering.include_string(test_mod, """
+Base.Vector{T}(x::T) where {S5<:T<:S5} = T[x]
+""") === nothing
+let v = Base.Vector{test_mod.S5}(test_mod.S5(1,1))
+    @test v isa Vector{test_mod.S5}
+    @test v[1] === test_mod.S5(1,1)
+end
+
+# User defined inner constructors and helper functions for structs without type params
+@test JuliaLowering.include_string(test_mod, """
+struct S6
+    x
+    S6_f() = new(42)
+
+    "some docs"
+    S6() = S6_f()
+    S6(x) = new(x)
+end
+""") === nothing
+let s = test_mod.S6()
+    @test s isa test_mod.S6
+    @test s.x === 42
+end
+let s = test_mod.S6(2)
+    @test s isa test_mod.S6
+    @test s.x === 2
+end
+@test docstrings_equal(@doc(test_mod.S6), Markdown.doc"some docs")
+
+# User defined inner constructors and helper functions for structs with type params
+@test JuliaLowering.include_string(test_mod, """
+struct S7{S,T}
+    x::S
+    y
+
+    # Cases where full struct type may be deduced and used in body
+    S7{Int,String}() = new(10.0, "y1")
+    S7{S,T}() where {S,T} = new(10.0, "y2")
+    S7{Int,T}() where {T} = new(10.0, "y3")
+    (::Type{S7{Int,UInt8}})() = new{Int,UInt8}(10.0, "y4")
+
+    # Cases where new{...} is called
+    S7() = new{Int,Int}(10.0, "y5")
+    S7{UInt8}() = S7_f()
+    S7_f() = new{UInt8,UInt8}(10.0, "y6")
+end
+""") === nothing
+let s = test_mod.S7{Int,String}()
+    @test s isa test_mod.S7{Int,String}
+    @test s.x === 10
+    @test s.y === "y1"
+end
+let s = test_mod.S7{UInt16,UInt16}()
+    @test s isa test_mod.S7{UInt16,UInt16}
+    @test s.x === UInt16(10)
+    @test s.y === "y2"
+end
+let s = test_mod.S7{Int,UInt16}()
+    @test s isa test_mod.S7{Int,UInt16}
+    @test s.x === 10
+    @test s.y === "y3"
+end
+let s = test_mod.S7{Int,UInt8}()
+    @test s isa test_mod.S7{Int,UInt8}
+    @test s.x === 10
+    @test s.y === "y4"
+end
+let s = test_mod.S7()
+    @test s isa test_mod.S7{Int,Int}
+    @test s.x === 10
+    @test s.y === "y5"
+end
+let s = test_mod.S7{UInt8}()
+    @test s isa test_mod.S7{UInt8,UInt8}
+    @test s.x === UInt8(10)
+    @test s.y === "y6"
+end
+
+# new() with splats and typed fields
+@test JuliaLowering.include_string(test_mod, """
+struct S8
+    x::Int
+    y::Float64
+
+    S8(xs, ys) = new(xs..., ys...)
+end
+""") === nothing
+let s = test_mod.S8((10.0,), (20,))
+    @test s isa test_mod.S8
+    @test s.x === 10
+    @test s.y === 20.0
+end
+# Wrong number of args checked by lowering
+@test_throws ArgumentError test_mod.S8((1,), ())
+@test_throws ArgumentError test_mod.S8((1,2,3), ())
+
+# new() with splats and untyped fields
+@test JuliaLowering.include_string(test_mod, """
+struct S9
+    x
+    y
+
+    S9(xs) = new(xs...)
+end
+""") === nothing
+let s = test_mod.S9((10.0,20))
+    @test s isa test_mod.S9
+    @test s.x === 10.0
+    @test s.y === 20
+end
+# Wrong number of args checked by the runtime
+@test_throws ArgumentError test_mod.S9((1,))
+@test_throws ArgumentError test_mod.S9((1,2,3))
+
+# Test cases from
+# https://github.com/JuliaLang/julia/issues/36104
+# https://github.com/JuliaLang/julia/pull/36121
+JuliaLowering.include_string(test_mod, """
+# issue #36104
+module M36104
+struct T36104
+    v::Vector{M36104.T36104}
+end
+struct T36104   # check that redefining it works, issue #21816
+    v::Vector{T36104}
+end
+end
+""")
+@test fieldtypes(test_mod.M36104.T36104) == (Vector{test_mod.M36104.T36104},)
+@test_throws ErrorException("expected") JuliaLowering.include_string(test_mod, """struct X36104; x::error("expected"); end""")
+@test !isdefined(test_mod, :X36104)
+JuliaLowering.include_string(test_mod, "struct X36104; x::Int; end")
+@test fieldtypes(test_mod.X36104) == (Int,)
+JuliaLowering.include_string(test_mod, "primitive type P36104 8 end")
+JuliaLowering.include_string(test_mod, "const orig_P36104 = P36104")
+JuliaLowering.include_string(test_mod, "primitive type P36104 16 end")
+@test test_mod.P36104 !== test_mod.orig_P36104
+
+# Struct with outer constructor where one typevar is constrained by the other
+# See https://github.com/JuliaLang/julia/issues/27269)
+@test JuliaLowering.include_string(test_mod, """
+struct X27269{T, S <: Vector{T}}
+    v::Vector{S}
+end
+""") === nothing
+@test test_mod.X27269([[1,2]]) isa test_mod.X27269{Int, Vector{Int}}
+
+end
diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl
new file mode 100644
index 0000000000000..280f2719d6d6c
--- /dev/null
+++ b/JuliaLowering/test/typedefs_ir.jl
@@ -0,0 +1,1370 @@
+########################################
+# where expression without type bounds
+A where X
+#---------------------
+1   (call core.TypeVar :X)
+2   (= slot₁/X %₁)
+3   slot₁/X
+4   TestMod.A
+5   (call core.UnionAll %₃ %₄)
+6   (return %₅)
+
+########################################
+# where expression with upper bound
+A where X <: UB
+#---------------------
+1   TestMod.UB
+2   (call core.TypeVar :X %₁)
+3   (= slot₁/X %₂)
+4   slot₁/X
+5   TestMod.A
+6   (call core.UnionAll %₄ %₅)
+7   (return %₆)
+
+########################################
+# where expression with lower bound
+A where X >: LB
+#---------------------
+1   TestMod.LB
+2   (call core.TypeVar :X %₁ core.Any)
+3   (= slot₁/X %₂)
+4   slot₁/X
+5   TestMod.A
+6   (call core.UnionAll %₄ %₅)
+7   (return %₆)
+
+########################################
+# where expression with both bounds
+A where LB <: X <: UB
+#---------------------
+1   TestMod.LB
+2   TestMod.UB
+3   (call core.TypeVar :X %₁ %₂)
+4   (= slot₁/X %₃)
+5   slot₁/X
+6   TestMod.A
+7   (call core.UnionAll %₅ %₆)
+8   (return %₇)
+
+########################################
+# where expression with braces
+A where {X, Y<:X}
+#---------------------
+1   (call core.TypeVar :X)
+2   (= slot₁/X %₁)
+3   slot₁/X
+4   slot₁/X
+5   (call core.TypeVar :Y %₄)
+6   (= slot₂/Y %₅)
+7   slot₂/Y
+8   TestMod.A
+9   (call core.UnionAll %₇ %₈)
+10  (call core.UnionAll %₃ %₉)
+11  (return %₁₀)
+
+########################################
+# Equivalent nested where expression without braces
+A where Y<:X where X
+#---------------------
+1   (call core.TypeVar :X)
+2   (= slot₁/X %₁)
+3   slot₁/X
+4   slot₁/X
+5   (call core.TypeVar :Y %₄)
+6   (= slot₂/Y %₅)
+7   slot₂/Y
+8   TestMod.A
+9   (call core.UnionAll %₇ %₈)
+10  (call core.UnionAll %₃ %₉)
+11  (return %₁₀)
+
+########################################
+# Error: bad type bounds
+A where f()
+#---------------------
+LoweringError:
+A where f()
+#       └─┘ ── expected type name or type bounds
+
+########################################
+# Error: bad type bounds
+A where X < Y < Z
+#---------------------
+LoweringError:
+A where X < Y < Z
+#       └───────┘ ── invalid type bounds
+
+########################################
+# Error: bad type bounds
+A where X <: f() <: Z
+#---------------------
+LoweringError:
+A where X <: f() <: Z
+#            └─┘ ── expected type name
+
+########################################
+# Error: bad type bounds
+A where f() <: Y
+#---------------------
+LoweringError:
+A where f() <: Y
+#       └─┘ ── expected type name
+
+########################################
+# Error: bad type bounds
+A where Y >: f()
+#---------------------
+LoweringError:
+A where Y >: f()
+#            └─┘ ── expected type name
+
+########################################
+# Simple type application
+X{A,B,C}
+#---------------------
+1   TestMod.X
+2   TestMod.A
+3   TestMod.B
+4   TestMod.C
+5   (call core.apply_type %₁ %₂ %₃ %₄)
+6   (return %₅)
+
+########################################
+# Type with implicit where param upper bound
+X{<:A}
+#---------------------
+1   TestMod.A
+2   (call core.TypeVar :#T1 %₁)
+3   TestMod.X
+4   (call core.apply_type %₃ %₂)
+5   (call core.UnionAll %₂ %₄)
+6   (return %₅)
+
+########################################
+# Type with implicit where param lower bound
+X{>:A}
+#---------------------
+1   TestMod.A
+2   (call core.TypeVar :#T1 %₁ core.Any)
+3   TestMod.X
+4   (call core.apply_type %₃ %₂)
+5   (call core.UnionAll %₂ %₄)
+6   (return %₅)
+
+########################################
+# Type with several implicit where params
+X{S, <:A, T, >:B}
+#---------------------
+1   TestMod.A
+2   (call core.TypeVar :#T1 %₁)
+3   TestMod.B
+4   (call core.TypeVar :#T2 %₃ core.Any)
+5   TestMod.X
+6   TestMod.S
+7   TestMod.T
+8   (call core.apply_type %₅ %₆ %₂ %₇ %₄)
+9   (call core.UnionAll %₄ %₈)
+10  (call core.UnionAll %₂ %₉)
+11  (return %₁₀)
+
+########################################
+# Error: parameters in type application
+X{S, T; W}
+#---------------------
+LoweringError:
+X{S, T; W}
+#     └─┘ ── unexpected semicolon in type parameter list
+
+########################################
+# Error: assignment in type application
+X{S, T=w}
+#---------------------
+LoweringError:
+X{S, T=w}
+#   └──┘ ── misplace assignment in type parameter list
+
+########################################
+# Simple abstract type definition
+abstract type A end
+#---------------------
+1   (call core.svec)
+2   (call core._abstracttype TestMod :A %₁)
+3   (= slot₁/A %₂)
+4   (call core._setsuper! %₂ core.Any)
+5   slot₁/A
+6   (call core._typebody! false %₅)
+7   (call core.declare_global TestMod :A false)
+8   latestworld
+9   (call core.isdefinedglobal TestMod :A false)
+10  (gotoifnot %₉ label₁₅)
+11  TestMod.A
+12  (call core._equiv_typedef %₁₁ %₂)
+13  (gotoifnot %₁₂ label₁₅)
+14  (goto label₁₇)
+15  (call core.declare_const TestMod :A %₂)
+16  latestworld
+17  (return core.nothing)
+
+########################################
+# Abstract type definition with supertype
+abstract type A <: B end
+#---------------------
+1   (call core.svec)
+2   (call core._abstracttype TestMod :A %₁)
+3   (= slot₁/A %₂)
+4   TestMod.B
+5   (call core._setsuper! %₂ %₄)
+6   slot₁/A
+7   (call core._typebody! false %₆)
+8   (call core.declare_global TestMod :A false)
+9   latestworld
+10  (call core.isdefinedglobal TestMod :A false)
+11  (gotoifnot %₁₀ label₁₆)
+12  TestMod.A
+13  (call core._equiv_typedef %₁₂ %₂)
+14  (gotoifnot %₁₃ label₁₆)
+15  (goto label₁₈)
+16  (call core.declare_const TestMod :A %₂)
+17  latestworld
+18  (return core.nothing)
+
+########################################
+# Abstract type definition with multiple typevars
+abstract type A{X, Y <: X} end
+#---------------------
+1   (= slot₂/X (call core.TypeVar :X))
+2   slot₂/X
+3   (= slot₃/Y (call core.TypeVar :Y %₂))
+4   slot₂/X
+5   slot₃/Y
+6   (call core.svec %₄ %₅)
+7   (call core._abstracttype TestMod :A %₆)
+8   (= slot₁/A %₇)
+9   (call core._setsuper! %₇ core.Any)
+10  slot₁/A
+11  (call core._typebody! false %₁₀)
+12  (call core.declare_global TestMod :A false)
+13  latestworld
+14  (call core.isdefinedglobal TestMod :A false)
+15  (gotoifnot %₁₄ label₂₀)
+16  TestMod.A
+17  (call core._equiv_typedef %₁₆ %₇)
+18  (gotoifnot %₁₇ label₂₀)
+19  (goto label₂₂)
+20  (call core.declare_const TestMod :A %₇)
+21  latestworld
+22  (return core.nothing)
+
+########################################
+# Error: Abstract type definition with bad signature
+abstract type A() end
+#---------------------
+LoweringError:
+abstract type A() end
+#             └─┘ ── invalid type signature
+
+########################################
+# Error: Abstract type definition with bad signature
+abstract type A(){T} end
+#---------------------
+LoweringError:
+abstract type A(){T} end
+#             └────┘ ── invalid type signature
+
+########################################
+# Error: Abstract type definition with bad signature
+abstract type A() <: B end
+#---------------------
+LoweringError:
+abstract type A() <: B end
+#            └───────┘ ── invalid type signature
+
+########################################
+# Error: Abstract type definition in function scope
+function f()
+    abstract type A end
+end
+#---------------------
+LoweringError:
+function f()
+    abstract type A end
+#   └─────────────────┘ ── this syntax is only allowed in top level code
+end
+
+########################################
+# Simple primitive type definition
+primitive type P 8 end
+#---------------------
+1   (call core.svec)
+2   (call core._primitivetype TestMod :P %₁ 8)
+3   (= slot₁/P %₂)
+4   (call core._setsuper! %₂ core.Any)
+5   slot₁/P
+6   (call core._typebody! false %₅)
+7   (call core.declare_global TestMod :P false)
+8   latestworld
+9   (call core.isdefinedglobal TestMod :P false)
+10  (gotoifnot %₉ label₁₅)
+11  TestMod.P
+12  (call core._equiv_typedef %₁₁ %₂)
+13  (gotoifnot %₁₂ label₁₅)
+14  (goto label₁₇)
+15  (call core.declare_const TestMod :P %₂)
+16  latestworld
+17  (return core.nothing)
+
+########################################
+# Complex primitive type definition
+primitive type P{X,Y} <: Z 32 end
+#---------------------
+1   (= slot₂/X (call core.TypeVar :X))
+2   (= slot₃/Y (call core.TypeVar :Y))
+3   slot₂/X
+4   slot₃/Y
+5   (call core.svec %₃ %₄)
+6   (call core._primitivetype TestMod :P %₅ 32)
+7   (= slot₁/P %₆)
+8   TestMod.Z
+9   (call core._setsuper! %₆ %₈)
+10  slot₁/P
+11  (call core._typebody! false %₁₀)
+12  (call core.declare_global TestMod :P false)
+13  latestworld
+14  (call core.isdefinedglobal TestMod :P false)
+15  (gotoifnot %₁₄ label₂₀)
+16  TestMod.P
+17  (call core._equiv_typedef %₁₆ %₆)
+18  (gotoifnot %₁₇ label₂₀)
+19  (goto label₂₂)
+20  (call core.declare_const TestMod :P %₆)
+21  latestworld
+22  (return core.nothing)
+
+########################################
+# Primitive type definition with computed size (should this be allowed??)
+primitive type P P_nbits() end
+#---------------------
+1   (call core.svec)
+2   TestMod.P_nbits
+3   (call %₂)
+4   (call core._primitivetype TestMod :P %₁ %₃)
+5   (= slot₁/P %₄)
+6   (call core._setsuper! %₄ core.Any)
+7   slot₁/P
+8   (call core._typebody! false %₇)
+9   (call core.declare_global TestMod :P false)
+10  latestworld
+11  (call core.isdefinedglobal TestMod :P false)
+12  (gotoifnot %₁₁ label₁₇)
+13  TestMod.P
+14  (call core._equiv_typedef %₁₃ %₄)
+15  (gotoifnot %₁₄ label₁₇)
+16  (goto label₁₉)
+17  (call core.declare_const TestMod :P %₄)
+18  latestworld
+19  (return core.nothing)
+
+########################################
+# Empty struct
+struct X
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (call core.svec)
+4   (call core.svec)
+5   (call core.svec)
+6   (call core._structtype TestMod :X %₃ %₄ %₅ false 0)
+7   (= slot₁/X %₆)
+8   (call core._setsuper! %₆ core.Any)
+9   (call core.isdefinedglobal TestMod :X false)
+10  (gotoifnot %₉ label₁₄)
+11  TestMod.X
+12  (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆))
+13  (goto label₁₅)
+14  (= slot₂/if_val false)
+15  slot₂/if_val
+16  (gotoifnot %₁₅ label₂₀)
+17  TestMod.X
+18  (= slot₃/if_val %₁₇)
+19  (goto label₂₁)
+20  (= slot₃/if_val false)
+21  slot₃/if_val
+22  (gotoifnot %₁₅ label₂₃)
+23  (call core.svec)
+24  (call core._typebody! %₂₁ %₆ %₂₃)
+25  (call core.declare_const TestMod :X %₂₄)
+26  latestworld
+27  TestMod.X
+28  (call core.apply_type core.Type %₂₇)
+29  (call core.svec %₂₈)
+30  (call core.svec)
+31  SourceLocation::1:1
+32  (call core.svec %₂₉ %₃₀ %₃₁)
+33  --- method core.nothing %₃₂
+    slots: [slot₁/#self#(!read)]
+    1   TestMod.X
+    2   (new %₁)
+    3   (return %₂)
+34  latestworld
+35  (return core.nothing)
+
+########################################
+# Empty struct with empty ctor
+struct X
+    X() = new()
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (call core.svec)
+4   (call core.svec)
+5   (call core.svec)
+6   (call core._structtype TestMod :X %₃ %₄ %₅ false 0)
+7   (= slot₁/X %₆)
+8   (call core._setsuper! %₆ core.Any)
+9   (call core.isdefinedglobal TestMod :X false)
+10  (gotoifnot %₉ label₁₄)
+11  TestMod.X
+12  (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆))
+13  (goto label₁₅)
+14  (= slot₂/if_val false)
+15  slot₂/if_val
+16  (gotoifnot %₁₅ label₂₀)
+17  TestMod.X
+18  (= slot₃/if_val %₁₇)
+19  (goto label₂₁)
+20  (= slot₃/if_val false)
+21  slot₃/if_val
+22  (gotoifnot %₁₅ label₂₃)
+23  (call core.svec)
+24  (call core._typebody! %₂₁ %₆ %₂₃)
+25  (call core.declare_const TestMod :X %₂₄)
+26  latestworld
+27  TestMod.X
+28  (call core.apply_type core.Type %₂₇)
+29  (call core.svec %₂₈)
+30  (call core.svec)
+31  SourceLocation::2:5
+32  (call core.svec %₂₉ %₃₀ %₃₁)
+33  --- method core.nothing %₃₂
+    slots: [slot₁/#ctor-self#]
+    1   slot₁/#ctor-self#
+    2   (new %₁)
+    3   (return %₂)
+34  latestworld
+35  (return core.nothing)
+
+########################################
+# Basic struct
+struct X
+    a
+    b::T
+    c
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (call core.svec)
+4   (call core.svec :a :b :c)
+5   (call core.svec)
+6   (call core._structtype TestMod :X %₃ %₄ %₅ false 3)
+7   (= slot₁/X %₆)
+8   (call core._setsuper! %₆ core.Any)
+9   (call core.isdefinedglobal TestMod :X false)
+10  (gotoifnot %₉ label₁₄)
+11  TestMod.X
+12  (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆))
+13  (goto label₁₅)
+14  (= slot₂/if_val false)
+15  slot₂/if_val
+16  (gotoifnot %₁₅ label₂₀)
+17  TestMod.X
+18  (= slot₃/if_val %₁₇)
+19  (goto label₂₁)
+20  (= slot₃/if_val false)
+21  slot₃/if_val
+22  (gotoifnot %₁₅ label₂₃)
+23  TestMod.T
+24  (call core.svec core.Any %₂₃ core.Any)
+25  (call core._typebody! %₂₁ %₆ %₂₄)
+26  (call core.declare_const TestMod :X %₂₅)
+27  latestworld
+28  TestMod.T
+29  (call core.=== core.Any %₂₈)
+30  (gotoifnot %₂₉ label₃₂)
+31  (goto label₄₀)
+32  TestMod.X
+33  (call core.apply_type core.Type %₃₂)
+34  (call core.svec %₃₃ core.Any core.Any core.Any)
+35  (call core.svec)
+36  SourceLocation::1:1
+37  (call core.svec %₃₄ %₃₅ %₃₆)
+38  --- method core.nothing %₃₇
+    slots: [slot₁/#ctor-self# slot₂/a slot₃/b slot₄/c slot₅/tmp]
+    1   (call core.fieldtype slot₁/#ctor-self# 2)
+    2   slot₃/b
+    3   (= slot₅/tmp %₂)
+    4   slot₅/tmp
+    5   (call core.isa %₄ %₁)
+    6   (gotoifnot %₅ label₈)
+    7   (goto label₁₀)
+    8   slot₅/tmp
+    9   (= slot₅/tmp (call top.convert %₁ %₈))
+    10  slot₅/tmp
+    11  (new slot₁/#ctor-self# slot₂/a %₁₀ slot₄/c)
+    12  (return %₁₁)
+39  latestworld
+40  TestMod.X
+41  (call core.apply_type core.Type %₄₀)
+42  TestMod.T
+43  (call core.svec %₄₁ core.Any %₄₂ core.Any)
+44  (call core.svec)
+45  SourceLocation::1:1
+46  (call core.svec %₄₃ %₄₄ %₄₅)
+47  --- method core.nothing %₄₆
+    slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c]
+    1   TestMod.X
+    2   (new %₁ slot₂/a slot₃/b slot₄/c)
+    3   (return %₂)
+48  latestworld
+49  (return core.nothing)
+
+########################################
+# Struct with supertype and type params
+struct X{U, S <: V <: T} <: Z
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (= slot₂/U (call core.TypeVar :U))
+4   TestMod.S
+5   TestMod.T
+6   (= slot₃/V (call core.TypeVar :V %₄ %₅))
+7   slot₂/U
+8   slot₃/V
+9   (call core.svec %₇ %₈)
+10  (call core.svec)
+11  (call core.svec)
+12  (call core._structtype TestMod :X %₉ %₁₀ %₁₁ false 0)
+13  (= slot₄/X %₁₂)
+14  TestMod.Z
+15  (call core._setsuper! %₁₂ %₁₄)
+16  (call core.isdefinedglobal TestMod :X false)
+17  (gotoifnot %₁₆ label₂₁)
+18  TestMod.X
+19  (= slot₅/if_val (call core._equiv_typedef %₁₈ %₁₂))
+20  (goto label₂₂)
+21  (= slot₅/if_val false)
+22  slot₅/if_val
+23  (gotoifnot %₂₂ label₂₇)
+24  TestMod.X
+25  (= slot₆/if_val %₂₄)
+26  (goto label₂₈)
+27  (= slot₆/if_val false)
+28  slot₆/if_val
+29  (gotoifnot %₂₂ label₄₀)
+30  TestMod.X
+31  (call top.getproperty %₃₀ :body)
+32  (call top.getproperty %₃₁ :body)
+33  (call top.getproperty %₃₂ :parameters)
+34  (call top.indexed_iterate %₃₃ 1)
+35  (= slot₂/U (call core.getfield %₃₄ 1))
+36  (= slot₁/iterstate (call core.getfield %₃₄ 2))
+37  slot₁/iterstate
+38  (call top.indexed_iterate %₃₃ 2 %₃₇)
+39  (= slot₃/V (call core.getfield %₃₈ 1))
+40  (call core.svec)
+41  (call core._typebody! %₂₈ %₁₂ %₄₀)
+42  (call core.declare_const TestMod :X %₄₁)
+43  latestworld
+44  slot₂/U
+45  slot₃/V
+46  TestMod.X
+47  slot₂/U
+48  slot₃/V
+49  (call core.apply_type %₄₆ %₄₇ %₄₈)
+50  (call core.apply_type core.Type %₄₉)
+51  (call core.UnionAll %₄₅ %₅₀)
+52  (call core.UnionAll %₄₄ %₅₁)
+53  (call core.svec %₅₂)
+54  (call core.svec)
+55  SourceLocation::1:1
+56  (call core.svec %₅₃ %₅₄ %₅₅)
+57  --- method core.nothing %₅₆
+    slots: [slot₁/#ctor-self#]
+    1   (new slot₁/#ctor-self#)
+    2   (return %₁)
+58  latestworld
+59  (return core.nothing)
+
+########################################
+# Struct with const and atomic fields
+struct X
+    const a
+    @atomic b
+    const @atomic c
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (call core.svec)
+4   (call core.svec :a :b :c)
+5   (call core.svec 1 :const 2 :atomic 3 :atomic 3 :const)
+6   (call core._structtype TestMod :X %₃ %₄ %₅ false 3)
+7   (= slot₁/X %₆)
+8   (call core._setsuper! %₆ core.Any)
+9   (call core.isdefinedglobal TestMod :X false)
+10  (gotoifnot %₉ label₁₄)
+11  TestMod.X
+12  (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆))
+13  (goto label₁₅)
+14  (= slot₂/if_val false)
+15  slot₂/if_val
+16  (gotoifnot %₁₅ label₂₀)
+17  TestMod.X
+18  (= slot₃/if_val %₁₇)
+19  (goto label₂₁)
+20  (= slot₃/if_val false)
+21  slot₃/if_val
+22  (gotoifnot %₁₅ label₂₃)
+23  (call core.svec core.Any core.Any core.Any)
+24  (call core._typebody! %₂₁ %₆ %₂₃)
+25  (call core.declare_const TestMod :X %₂₄)
+26  latestworld
+27  TestMod.X
+28  (call core.apply_type core.Type %₂₇)
+29  (call core.svec %₂₈ core.Any core.Any core.Any)
+30  (call core.svec)
+31  SourceLocation::1:1
+32  (call core.svec %₂₉ %₃₀ %₃₁)
+33  --- method core.nothing %₃₂
+    slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c]
+    1   TestMod.X
+    2   (new %₁ slot₂/a slot₃/b slot₄/c)
+    3   (return %₂)
+34  latestworld
+35  (return core.nothing)
+
+########################################
+# Documented struct
+"""
+X docs
+"""
+struct X
+    "field a docs"
+    a
+    "field b docs"
+    b
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (call core.svec)
+4   (call core.svec :a :b)
+5   (call core.svec)
+6   (call core._structtype TestMod :X %₃ %₄ %₅ false 2)
+7   (= slot₁/X %₆)
+8   (call core._setsuper! %₆ core.Any)
+9   (call core.isdefinedglobal TestMod :X false)
+10  (gotoifnot %₉ label₁₄)
+11  TestMod.X
+12  (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆))
+13  (goto label₁₅)
+14  (= slot₂/if_val false)
+15  slot₂/if_val
+16  (gotoifnot %₁₅ label₂₀)
+17  TestMod.X
+18  (= slot₃/if_val %₁₇)
+19  (goto label₂₁)
+20  (= slot₃/if_val false)
+21  slot₃/if_val
+22  (gotoifnot %₁₅ label₂₃)
+23  (call core.svec core.Any core.Any)
+24  (call core._typebody! %₂₁ %₆ %₂₃)
+25  (call core.declare_const TestMod :X %₂₄)
+26  latestworld
+27  TestMod.X
+28  (call core.apply_type core.Type %₂₇)
+29  (call core.svec %₂₈ core.Any core.Any)
+30  (call core.svec)
+31  SourceLocation::4:1
+32  (call core.svec %₂₉ %₃₀ %₃₁)
+33  --- method core.nothing %₃₂
+    slots: [slot₁/#self#(!read) slot₂/a slot₃/b]
+    1   TestMod.X
+    2   (new %₁ slot₂/a slot₃/b)
+    3   (return %₂)
+34  latestworld
+35  JuliaLowering.bind_docs!
+36  (call core.tuple :field_docs)
+37  (call core.apply_type core.NamedTuple %₃₆)
+38  (call core.svec 1 "field a docs" 2 "field b docs")
+39  (call core.tuple %₃₈)
+40  (call %₃₇ %₃₉)
+41  TestMod.X
+42  SourceLocation::4:1
+43  (call core.kwcall %₄₀ %₃₅ %₄₁ "X docs\n" %₄₂)
+44  (return core.nothing)
+
+########################################
+# Struct with outer constructor
+struct X{U}
+    x::U
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (= slot₁/U (call core.TypeVar :U))
+4   slot₁/U
+5   (call core.svec %₄)
+6   (call core.svec :x)
+7   (call core.svec)
+8   (call core._structtype TestMod :X %₅ %₆ %₇ false 1)
+9   (= slot₂/X %₈)
+10  (call core._setsuper! %₈ core.Any)
+11  (call core.isdefinedglobal TestMod :X false)
+12  (gotoifnot %₁₁ label₁₆)
+13  TestMod.X
+14  (= slot₃/if_val (call core._equiv_typedef %₁₃ %₈))
+15  (goto label₁₇)
+16  (= slot₃/if_val false)
+17  slot₃/if_val
+18  (gotoifnot %₁₇ label₂₂)
+19  TestMod.X
+20  (= slot₄/if_val %₁₉)
+21  (goto label₂₃)
+22  (= slot₄/if_val false)
+23  slot₄/if_val
+24  (gotoifnot %₁₇ label₃₀)
+25  TestMod.X
+26  (call top.getproperty %₂₅ :body)
+27  (call top.getproperty %₂₆ :parameters)
+28  (call top.indexed_iterate %₂₇ 1)
+29  (= slot₁/U (call core.getfield %₂₈ 1))
+30  slot₁/U
+31  (call core.svec %₃₀)
+32  (call core._typebody! %₂₃ %₈ %₃₁)
+33  (call core.declare_const TestMod :X %₃₂)
+34  latestworld
+35  slot₁/U
+36  TestMod.X
+37  slot₁/U
+38  (call core.apply_type %₃₆ %₃₇)
+39  (call core.apply_type core.Type %₃₈)
+40  (call core.UnionAll %₃₅ %₃₉)
+41  (call core.svec %₄₀ core.Any)
+42  (call core.svec)
+43  SourceLocation::1:1
+44  (call core.svec %₄₁ %₄₂ %₄₃)
+45  --- method core.nothing %₄₄
+    slots: [slot₁/#ctor-self# slot₂/x slot₃/tmp]
+    1   (call core.fieldtype slot₁/#ctor-self# 1)
+    2   slot₂/x
+    3   (= slot₃/tmp %₂)
+    4   slot₃/tmp
+    5   (call core.isa %₄ %₁)
+    6   (gotoifnot %₅ label₈)
+    7   (goto label₁₀)
+    8   slot₃/tmp
+    9   (= slot₃/tmp (call top.convert %₁ %₈))
+    10  slot₃/tmp
+    11  (new slot₁/#ctor-self# %₁₀)
+    12  (return %₁₁)
+46  latestworld
+47  TestMod.X
+48  (call core.apply_type core.Type %₄₇)
+49  slot₁/U
+50  (call core.svec %₄₈ %₄₉)
+51  slot₁/U
+52  (call core.svec %₅₁)
+53  SourceLocation::1:1
+54  (call core.svec %₅₀ %₅₂ %₅₃)
+55  --- method core.nothing %₅₄
+    slots: [slot₁/#self#(!read) slot₂/x]
+    1   TestMod.X
+    2   static_parameter₁
+    3   (call core.apply_type %₁ %₂)
+    4   (new %₃ slot₂/x)
+    5   (return %₄)
+56  latestworld
+57  (return core.nothing)
+
+########################################
+# Struct with outer constructor where one typevar is constrained by the other
+# See https://github.com/JuliaLang/julia/issues/27269)
+struct X{T, S <: Vector{T}}
+    v::Vector{S}
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (= slot₃/T (call core.TypeVar :T))
+4   TestMod.Vector
+5   slot₃/T
+6   (call core.apply_type %₄ %₅)
+7   (= slot₂/S (call core.TypeVar :S %₆))
+8   slot₃/T
+9   slot₂/S
+10  (call core.svec %₈ %₉)
+11  (call core.svec :v)
+12  (call core.svec)
+13  (call core._structtype TestMod :X %₁₀ %₁₁ %₁₂ false 1)
+14  (= slot₄/X %₁₃)
+15  (call core._setsuper! %₁₃ core.Any)
+16  (call core.isdefinedglobal TestMod :X false)
+17  (gotoifnot %₁₆ label₂₁)
+18  TestMod.X
+19  (= slot₅/if_val (call core._equiv_typedef %₁₈ %₁₃))
+20  (goto label₂₂)
+21  (= slot₅/if_val false)
+22  slot₅/if_val
+23  (gotoifnot %₂₂ label₂₇)
+24  TestMod.X
+25  (= slot₆/if_val %₂₄)
+26  (goto label₂₈)
+27  (= slot₆/if_val false)
+28  slot₆/if_val
+29  (gotoifnot %₂₂ label₄₀)
+30  TestMod.X
+31  (call top.getproperty %₃₀ :body)
+32  (call top.getproperty %₃₁ :body)
+33  (call top.getproperty %₃₂ :parameters)
+34  (call top.indexed_iterate %₃₃ 1)
+35  (= slot₃/T (call core.getfield %₃₄ 1))
+36  (= slot₁/iterstate (call core.getfield %₃₄ 2))
+37  slot₁/iterstate
+38  (call top.indexed_iterate %₃₃ 2 %₃₇)
+39  (= slot₂/S (call core.getfield %₃₈ 1))
+40  TestMod.Vector
+41  slot₂/S
+42  (call core.apply_type %₄₀ %₄₁)
+43  (call core.svec %₄₂)
+44  (call core._typebody! %₂₈ %₁₃ %₄₃)
+45  (call core.declare_const TestMod :X %₄₄)
+46  latestworld
+47  slot₃/T
+48  slot₂/S
+49  TestMod.X
+50  slot₃/T
+51  slot₂/S
+52  (call core.apply_type %₄₉ %₅₀ %₅₁)
+53  (call core.apply_type core.Type %₅₂)
+54  (call core.UnionAll %₄₈ %₅₃)
+55  (call core.UnionAll %₄₇ %₅₄)
+56  (call core.svec %₅₅ core.Any)
+57  (call core.svec)
+58  SourceLocation::1:1
+59  (call core.svec %₅₆ %₅₇ %₅₈)
+60  --- method core.nothing %₅₉
+    slots: [slot₁/#ctor-self# slot₂/v slot₃/tmp]
+    1   (call core.fieldtype slot₁/#ctor-self# 1)
+    2   slot₂/v
+    3   (= slot₃/tmp %₂)
+    4   slot₃/tmp
+    5   (call core.isa %₄ %₁)
+    6   (gotoifnot %₅ label₈)
+    7   (goto label₁₀)
+    8   slot₃/tmp
+    9   (= slot₃/tmp (call top.convert %₁ %₈))
+    10  slot₃/tmp
+    11  (new slot₁/#ctor-self# %₁₀)
+    12  (return %₁₁)
+61  latestworld
+62  TestMod.X
+63  (call core.apply_type core.Type %₆₂)
+64  TestMod.Vector
+65  slot₂/S
+66  (call core.apply_type %₆₄ %₆₅)
+67  (call core.svec %₆₃ %₆₆)
+68  slot₃/T
+69  slot₂/S
+70  (call core.svec %₆₈ %₆₉)
+71  SourceLocation::1:1
+72  (call core.svec %₆₇ %₇₀ %₇₁)
+73  --- method core.nothing %₇₂
+    slots: [slot₁/#self#(!read) slot₂/v]
+    1   TestMod.X
+    2   static_parameter₁
+    3   static_parameter₂
+    4   (call core.apply_type %₁ %₂ %₃)
+    5   (new %₄ slot₂/v)
+    6   (return %₅)
+74  latestworld
+75  (return core.nothing)
+
+########################################
+# User defined inner constructors and helper functions for structs without type params
+struct X
+    x
+    f() = new(1)
+    X() = f() # this X() captures `f` (in flisp, as a Box :-/ )
+    X(x) = new(x)
+    X(y,z)::ReallyXIPromise = new(y+z)
+    """
+    Docs for X constructor
+    """
+    X(a,b,c) = new(a)
+end
+#---------------------
+1   (= slot₂/f (call core.Box))
+2   (call core.declare_global TestMod :X false)
+3   latestworld
+4   (call core.svec)
+5   (call core.svec :x)
+6   (call core.svec)
+7   (call core._structtype TestMod :X %₄ %₅ %₆ false 1)
+8   (= slot₁/X %₇)
+9   (call core._setsuper! %₇ core.Any)
+10  (call core.isdefinedglobal TestMod :X false)
+11  (gotoifnot %₁₀ label₁₅)
+12  TestMod.X
+13  (= slot₄/if_val (call core._equiv_typedef %₁₂ %₇))
+14  (goto label₁₆)
+15  (= slot₄/if_val false)
+16  slot₄/if_val
+17  (gotoifnot %₁₆ label₂₁)
+18  TestMod.X
+19  (= slot₅/if_val %₁₈)
+20  (goto label₂₂)
+21  (= slot₅/if_val false)
+22  slot₅/if_val
+23  (gotoifnot %₁₆ label₂₄)
+24  (call core.svec core.Any)
+25  (call core._typebody! %₂₂ %₇ %₂₄)
+26  (call core.declare_const TestMod :X %₂₅)
+27  latestworld
+28  (call core.svec)
+29  (call core.svec)
+30  (call JuliaLowering.eval_closure_type TestMod :#f##0 %₂₈ %₂₉)
+31  latestworld
+32  TestMod.#f##0
+33  (new %₃₂)
+34  slot₂/f
+35  (call core.setfield! %₃₄ :contents %₃₃)
+36  TestMod.#f##0
+37  (call core.svec %₃₆)
+38  (call core.svec)
+39  SourceLocation::3:5
+40  (call core.svec %₃₇ %₃₈ %₃₉)
+41  --- method core.nothing %₄₀
+    slots: [slot₁/#self#(!read)]
+    1   TestMod.X
+    2   (new %₁ 1)
+    3   (return %₂)
+42  latestworld
+43  TestMod.X
+44  (call core.apply_type core.Type %₄₃)
+45  (call core.svec %₄₄)
+46  (call core.svec)
+47  SourceLocation::4:5
+48  (call core.svec %₄₅ %₄₆ %₄₇)
+49  --- code_info
+    slots: [slot₁/#ctor-self#(!read) slot₂/f(!read)]
+    1   (captured_local 1)
+    2   (call core.isdefined %₁ :contents)
+    3   (gotoifnot %₂ label₅)
+    4   (goto label₇)
+    5   (newvar slot₂/f)
+    6   slot₂/f
+    7   (call core.getfield %₁ :contents)
+    8   (call %₇)
+    9   (return %₈)
+50  slot₂/f
+51  (call core.svec %₅₀)
+52  (call JuliaLowering.replace_captured_locals! %₄₉ %₅₁)
+53  --- method core.nothing %₄₈ %₅₂
+54  latestworld
+55  TestMod.X
+56  (call core.apply_type core.Type %₅₅)
+57  (call core.svec %₅₆ core.Any)
+58  (call core.svec)
+59  SourceLocation::5:5
+60  (call core.svec %₅₇ %₅₈ %₅₉)
+61  --- method core.nothing %₆₀
+    slots: [slot₁/#ctor-self# slot₂/x]
+    1   slot₁/#ctor-self#
+    2   (new %₁ slot₂/x)
+    3   (return %₂)
+62  latestworld
+63  TestMod.X
+64  (call core.apply_type core.Type %₆₃)
+65  (call core.svec %₆₄ core.Any core.Any)
+66  (call core.svec)
+67  SourceLocation::6:5
+68  (call core.svec %₆₅ %₆₆ %₆₇)
+69  --- method core.nothing %₆₈
+    slots: [slot₁/#ctor-self# slot₂/y slot₃/z slot₄/tmp(!read)]
+    1   TestMod.ReallyXIPromise
+    2   slot₁/#ctor-self#
+    3   TestMod.+
+    4   (call %₃ slot₂/y slot₃/z)
+    5   (= slot₄/tmp (new %₂ %₄))
+    6   slot₄/tmp
+    7   (call core.isa %₆ %₁)
+    8   (gotoifnot %₇ label₁₀)
+    9   (goto label₁₃)
+    10  slot₄/tmp
+    11  (call top.convert %₁ %₁₀)
+    12  (= slot₄/tmp (call core.typeassert %₁₁ %₁))
+    13  slot₄/tmp
+    14  (return %₁₃)
+70  latestworld
+71  TestMod.X
+72  (call core.apply_type core.Type %₇₁)
+73  (call core.svec %₇₂ core.Any core.Any core.Any)
+74  (call core.svec)
+75  SourceLocation::10:5
+76  (call core.svec %₇₃ %₇₄ %₇₅)
+77  --- method core.nothing %₇₆
+    slots: [slot₁/#ctor-self# slot₂/a slot₃/b(!read) slot₄/c(!read)]
+    1   slot₁/#ctor-self#
+    2   (new %₁ slot₂/a)
+    3   (return %₂)
+78  latestworld
+79  TestMod.X
+80  (call core.apply_type core.Type %₇₉)
+81  (call JuliaLowering.bind_docs! %₈₀ "Docs for X constructor\n" %₇₆)
+82  (return core.nothing)
+
+########################################
+# User defined inner constructors and helper functions for structs with type params
+struct X{S,T}
+    x
+    X{A,B}() = new(1)
+    X{U,V}() where {U,V} = new(1)
+    f() = new{A,B}(1)
+end
+#---------------------
+1   (newvar slot₅/f)
+2   (call core.declare_global TestMod :X false)
+3   latestworld
+4   (= slot₂/S (call core.TypeVar :S))
+5   (= slot₃/T (call core.TypeVar :T))
+6   slot₂/S
+7   slot₃/T
+8   (call core.svec %₆ %₇)
+9   (call core.svec :x)
+10  (call core.svec)
+11  (call core._structtype TestMod :X %₈ %₉ %₁₀ false 1)
+12  (= slot₄/X %₁₁)
+13  (call core._setsuper! %₁₁ core.Any)
+14  (call core.isdefinedglobal TestMod :X false)
+15  (gotoifnot %₁₄ label₁₉)
+16  TestMod.X
+17  (= slot₈/if_val (call core._equiv_typedef %₁₆ %₁₁))
+18  (goto label₂₀)
+19  (= slot₈/if_val false)
+20  slot₈/if_val
+21  (gotoifnot %₂₀ label₂₅)
+22  TestMod.X
+23  (= slot₉/if_val %₂₂)
+24  (goto label₂₆)
+25  (= slot₉/if_val false)
+26  slot₉/if_val
+27  (gotoifnot %₂₀ label₃₈)
+28  TestMod.X
+29  (call top.getproperty %₂₈ :body)
+30  (call top.getproperty %₂₉ :body)
+31  (call top.getproperty %₃₀ :parameters)
+32  (call top.indexed_iterate %₃₁ 1)
+33  (= slot₂/S (call core.getfield %₃₂ 1))
+34  (= slot₁/iterstate (call core.getfield %₃₂ 2))
+35  slot₁/iterstate
+36  (call top.indexed_iterate %₃₁ 2 %₃₅)
+37  (= slot₃/T (call core.getfield %₃₆ 1))
+38  (call core.svec core.Any)
+39  (call core._typebody! %₂₆ %₁₁ %₃₈)
+40  (call core.declare_const TestMod :X %₃₉)
+41  latestworld
+42  TestMod.X
+43  TestMod.A
+44  TestMod.B
+45  (call core.apply_type %₄₂ %₄₃ %₄₄)
+46  (call core.apply_type core.Type %₄₅)
+47  (call core.svec %₄₆)
+48  (call core.svec)
+49  SourceLocation::3:5
+50  (call core.svec %₄₇ %₄₈ %₄₉)
+51  --- method core.nothing %₅₀
+    slots: [slot₁/#ctor-self#]
+    1   slot₁/#ctor-self#
+    2   (new %₁ 1)
+    3   (return %₂)
+52  latestworld
+53  (= slot₆/U (call core.TypeVar :U))
+54  (= slot₇/V (call core.TypeVar :V))
+55  TestMod.X
+56  slot₆/U
+57  slot₇/V
+58  (call core.apply_type %₅₅ %₅₆ %₅₇)
+59  (call core.apply_type core.Type %₅₈)
+60  (call core.svec %₅₉)
+61  slot₆/U
+62  slot₇/V
+63  (call core.svec %₆₁ %₆₂)
+64  SourceLocation::4:5
+65  (call core.svec %₆₀ %₆₃ %₆₄)
+66  --- method core.nothing %₆₅
+    slots: [slot₁/#ctor-self#]
+    1   slot₁/#ctor-self#
+    2   (new %₁ 1)
+    3   (return %₂)
+67  latestworld
+68  (call core.svec)
+69  (call core.svec)
+70  (call JuliaLowering.eval_closure_type TestMod :#f##1 %₆₈ %₆₉)
+71  latestworld
+72  TestMod.#f##1
+73  (new %₇₂)
+74  (= slot₅/f %₇₃)
+75  TestMod.#f##1
+76  (call core.svec %₇₅)
+77  (call core.svec)
+78  SourceLocation::5:5
+79  (call core.svec %₇₆ %₇₇ %₇₈)
+80  --- method core.nothing %₇₉
+    slots: [slot₁/#self#(!read)]
+    1   TestMod.X
+    2   TestMod.A
+    3   TestMod.B
+    4   (call core.apply_type %₁ %₂ %₃)
+    5   (new %₄ 1)
+    6   (return %₅)
+81  latestworld
+82  (return core.nothing)
+
+########################################
+# new() calls with splats; `Any` fields
+struct X
+    x
+    y
+    X(xs) = new(xs...)
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (call core.svec)
+4   (call core.svec :x :y)
+5   (call core.svec)
+6   (call core._structtype TestMod :X %₃ %₄ %₅ false 2)
+7   (= slot₁/X %₆)
+8   (call core._setsuper! %₆ core.Any)
+9   (call core.isdefinedglobal TestMod :X false)
+10  (gotoifnot %₉ label₁₄)
+11  TestMod.X
+12  (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆))
+13  (goto label₁₅)
+14  (= slot₂/if_val false)
+15  slot₂/if_val
+16  (gotoifnot %₁₅ label₂₀)
+17  TestMod.X
+18  (= slot₃/if_val %₁₇)
+19  (goto label₂₁)
+20  (= slot₃/if_val false)
+21  slot₃/if_val
+22  (gotoifnot %₁₅ label₂₃)
+23  (call core.svec core.Any core.Any)
+24  (call core._typebody! %₂₁ %₆ %₂₃)
+25  (call core.declare_const TestMod :X %₂₄)
+26  latestworld
+27  TestMod.X
+28  (call core.apply_type core.Type %₂₇)
+29  (call core.svec %₂₈ core.Any)
+30  (call core.svec)
+31  SourceLocation::4:5
+32  (call core.svec %₂₉ %₃₀ %₃₁)
+33  --- method core.nothing %₃₂
+    slots: [slot₁/#ctor-self# slot₂/xs]
+    1   slot₁/#ctor-self#
+    2   (call core._apply_iterate top.iterate core.tuple slot₂/xs)
+    3   (splatnew %₁ %₂)
+    4   (return %₃)
+34  latestworld
+35  (return core.nothing)
+
+########################################
+# new() calls with splats; typed fields
+struct X{T}
+    x::T
+    y::A
+    X{T}(xs) where {T} = new(xs...)
+end
+#---------------------
+1   (call core.declare_global TestMod :X false)
+2   latestworld
+3   (= slot₁/T (call core.TypeVar :T))
+4   slot₁/T
+5   (call core.svec %₄)
+6   (call core.svec :x :y)
+7   (call core.svec)
+8   (call core._structtype TestMod :X %₅ %₆ %₇ false 2)
+9   (= slot₂/X %₈)
+10  (call core._setsuper! %₈ core.Any)
+11  (call core.isdefinedglobal TestMod :X false)
+12  (gotoifnot %₁₁ label₁₆)
+13  TestMod.X
+14  (= slot₄/if_val (call core._equiv_typedef %₁₃ %₈))
+15  (goto label₁₇)
+16  (= slot₄/if_val false)
+17  slot₄/if_val
+18  (gotoifnot %₁₇ label₂₂)
+19  TestMod.X
+20  (= slot₅/if_val %₁₉)
+21  (goto label₂₃)
+22  (= slot₅/if_val false)
+23  slot₅/if_val
+24  (gotoifnot %₁₇ label₃₀)
+25  TestMod.X
+26  (call top.getproperty %₂₅ :body)
+27  (call top.getproperty %₂₆ :parameters)
+28  (call top.indexed_iterate %₂₇ 1)
+29  (= slot₁/T (call core.getfield %₂₈ 1))
+30  slot₁/T
+31  TestMod.A
+32  (call core.svec %₃₀ %₃₁)
+33  (call core._typebody! %₂₃ %₈ %₃₂)
+34  (call core.declare_const TestMod :X %₃₃)
+35  latestworld
+36  (= slot₃/T (call core.TypeVar :T))
+37  TestMod.X
+38  slot₃/T
+39  (call core.apply_type %₃₇ %₃₈)
+40  (call core.apply_type core.Type %₃₉)
+41  (call core.svec %₄₀ core.Any)
+42  slot₃/T
+43  (call core.svec %₄₂)
+44  SourceLocation::4:5
+45  (call core.svec %₄₁ %₄₃ %₄₄)
+46  --- method core.nothing %₄₅
+    slots: [slot₁/#ctor-self# slot₂/xs slot₃/tmp slot₄/tmp]
+    1   (call core._apply_iterate top.iterate core.tuple slot₂/xs)
+    2   (call core.nfields %₁)
+    3   (call top.ult_int %₂ 2)
+    4   (gotoifnot %₃ label₇)
+    5   (call top.ArgumentError "too few arguments in `new` (expected 2)")
+    6   (call core.throw %₅)
+    7   (call top.ult_int 2 %₂)
+    8   (gotoifnot %₇ label₁₁)
+    9   (call top.ArgumentError "too many arguments in `new` (expected 2)")
+    10  (call core.throw %₉)
+    11  slot₁/#ctor-self#
+    12  (call core.fieldtype %₁₁ 1)
+    13  (= slot₃/tmp (call core.getfield %₁ 1))
+    14  slot₃/tmp
+    15  (call core.isa %₁₄ %₁₂)
+    16  (gotoifnot %₁₅ label₁₈)
+    17  (goto label₂₀)
+    18  slot₃/tmp
+    19  (= slot₃/tmp (call top.convert %₁₂ %₁₈))
+    20  slot₃/tmp
+    21  (call core.fieldtype %₁₁ 2)
+    22  (= slot₄/tmp (call core.getfield %₁ 2))
+    23  slot₄/tmp
+    24  (call core.isa %₂₃ %₂₁)
+    25  (gotoifnot %₂₄ label₂₇)
+    26  (goto label₂₉)
+    27  slot₄/tmp
+    28  (= slot₄/tmp (call top.convert %₂₁ %₂₇))
+    29  slot₄/tmp
+    30  (new %₁₁ %₂₀ %₂₉)
+    31  (return %₃₀)
+47  latestworld
+48  (return core.nothing)
+
+########################################
+# Error: new doesn't accept keywords
+struct X
+    X() = new(a=1)
+end
+#---------------------
+LoweringError:
+struct X
+    X() = new(a=1)
+#             └─┘ ── `new` does not accept keyword arguments
+end
+
+########################################
+# Error: new doesn't accept keywords (params block)
+struct X
+    X() = new(; a=1)
+end
+#---------------------
+LoweringError:
+struct X
+    X() = new(; a=1)
+#             └───┘ ── `new` does not accept keyword arguments
+end
+
+########################################
+# Error: User defined inner constructors without enough type params
+struct X{S,T}
+    X() = new{A}()
+end
+#---------------------
+LoweringError:
+struct X{S,T}
+    X() = new{A}()
+#         └────┘ ── too few type parameters specified in `new{...}`
+end
+
+########################################
+# Error: User defined inner constructors without enough type params
+struct X{S,T}
+    X{A}() = new()
+end
+#---------------------
+LoweringError:
+struct X{S,T}
+    X{A}() = new()
+#            └─┘ ── too few type parameters specified in `new`
+end
+
+########################################
+# Error: User defined inner constructors with too many type params
+struct X{S,T}
+    X() = new{A,B,C}()
+end
+#---------------------
+LoweringError:
+struct X{S,T}
+    X() = new{A,B,C}()
+#         └────────┘ ── too many type parameters specified in `new{...}`
+end
+
+########################################
+# Error: Struct not at top level
+function f()
+    struct X
+    end
+end
+#---------------------
+LoweringError:
+function f()
+#   ┌───────
+    struct X
+    end
+#─────┘ ── this syntax is only allowed in top level code
+end
+
+########################################
+# Constructor with type parameter
+A{<:Real}() = A(1)
+#---------------------
+1   TestMod.Real
+2   (call core.TypeVar :#T1 %₁)
+3   TestMod.A
+4   (call core.apply_type %₃ %₂)
+5   (call core.UnionAll %₂ %₄)
+6   (call core.Typeof %₅)
+7   (call core.svec %₆)
+8   (call core.svec)
+9   SourceLocation::1:1
+10  (call core.svec %₇ %₈ %₉)
+11  --- method core.nothing %₁₀
+    slots: [slot₁/#self#(!read)]
+    1   TestMod.A
+    2   (call %₁ 1)
+    3   (return %₂)
+12  latestworld
+13  (return core.nothing)
diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl
new file mode 100644
index 0000000000000..16f2f30294ffe
--- /dev/null
+++ b/JuliaLowering/test/utils.jl
@@ -0,0 +1,392 @@
+# Shared testing code which should be included before running individual test files.
+using Test
+
+using JuliaLowering
+using JuliaSyntax
+
+import FileWatching
+
+# The following are for docstrings testing. We need to load the REPL module
+# here for `Base.@doc` lookup to work at all. Yes this does seem really,
+# really, REALLY messed up.
+using Markdown
+import REPL
+
+using .JuliaSyntax: sourcetext, set_numeric_flags
+
+using .JuliaLowering:
+    SyntaxGraph, newnode!, ensure_attributes!,
+    Kind, SourceRef, SyntaxTree, NodeId,
+    makenode, makeleaf, setattr!, sethead!,
+    is_leaf, numchildren, children,
+    @ast, flattened_provenance, showprov, LoweringError, MacroExpansionError,
+    syntax_graph, Bindings, ScopeLayer, mapchildren
+
+function _ast_test_graph()
+    graph = SyntaxGraph()
+    ensure_attributes!(graph,
+                       kind=Kind, syntax_flags=UInt16,
+                       source=Union{SourceRef,NodeId,Tuple,LineNumberNode},
+                       var_id=Int, value=Any, name_val=String, is_toplevel_thunk=Bool,
+                       toplevel_pure=Bool)
+end
+
+function _source_node(graph, src)
+    id = newnode!(graph)
+    sethead!(graph, id, K"None")
+    setattr!(graph, id, source=src)
+    SyntaxTree(graph, id)
+end
+
+macro ast_(tree)
+    # TODO: Implement this in terms of new-style macros.
+    quote
+        graph = _ast_test_graph()
+        srcref = _source_node(graph, $(QuoteNode(__source__)))
+        @ast graph srcref $tree
+    end
+end
+
+function ≈(ex1, ex2)
+    if kind(ex1) != kind(ex2) || is_leaf(ex1) != is_leaf(ex2)
+        return false
+    end
+    if is_leaf(ex1)
+        return get(ex1, :value,    nothing) == get(ex2, :value,    nothing) &&
+               get(ex1, :name_val, nothing) == get(ex2, :name_val, nothing)
+    else
+        if numchildren(ex1) != numchildren(ex2)
+            return false
+        end
+        return all(c1 ≈ c2 for (c1,c2) in zip(children(ex1), children(ex2)))
+    end
+end
+
+
+#-------------------------------------------------------------------------------
+function _format_as_ast_macro(io, ex, indent)
+    k = kind(ex)
+    kind_str = repr(k)
+    if !is_leaf(ex)
+        println(io, indent, "[", kind_str)
+        ind2 = indent*"    "
+        for c in children(ex)
+            _format_as_ast_macro(io, c, ind2)
+        end
+        println(io, indent, "]")
+    else
+        val_str = if k == K"Identifier" || k == K"core" || k == K"top"
+            repr(ex.name_val)
+        elseif k == K"BindingId"
+            repr(ex.var_id)
+        else
+            repr(get(ex, :value, nothing))
+        end
+        println(io, indent, val_str, "::", kind_str)
+    end
+end
+
+function format_as_ast_macro(io::IO, ex)
+    print(io, "@ast_ ")
+    _format_as_ast_macro(io, ex, "")
+end
+
+"""
+    format_as_ast_macro(ex)
+
+Format AST `ex` as a Juila source code call to the `@ast_` macro for generating
+test case comparisons with the `≈` function.
+"""
+format_as_ast_macro(ex) = format_as_ast_macro(stdout, ex)
+
+#-------------------------------------------------------------------------------
+
+# Test tools
+
+function desugar(mod::Module, src::String)
+    ex = parsestmt(SyntaxTree, src, filename="foo.jl")
+    ctx = JuliaLowering.DesugaringContext(syntax_graph(ex), Bindings(), ScopeLayer[], mod)
+    JuliaLowering.expand_forms_2(ctx, ex)
+end
+
+function uncomment_description(desc)
+    replace(desc, r"^# ?"m=>"")
+end
+
+function comment_description(desc)
+    lines = replace(split(desc, '\n')) do line
+        strip("# " * line)
+    end
+    join(lines, '\n')
+end
+
+function match_ir_test_case(case_str)
+    m = match(r"(^#(?:.|\n)*?)^([^#](?:.|\n)*)"m, strip(case_str))
+    if isnothing(m)
+        error("Malformatted IR test case:\n$(repr(case_str))")
+    end
+    description = uncomment_description(m[1])
+    inout = split(m[2], r"#----*")
+    input, output = length(inout) == 2 ? inout          :
+                    length(inout) == 1 ? (inout[1], "") :
+                    error("Too many sections in IR test case")
+    expect_error = startswith(description, "Error")
+    is_broken = startswith(description, "FIXME")
+    method_filter = begin
+        mf = match(r"\[method_filter: *(.*)\]", description)
+        isnothing(mf) ? nothing : strip(mf[1])
+    end
+    (; expect_error=expect_error, is_broken=is_broken,
+     description=strip(description),
+     method_filter=method_filter,
+     input=strip(input), output=strip(output))
+end
+
+function read_ir_test_cases(filename)
+    str = read(filename, String)
+    parts = split(str, r"#\*+")
+    if length(parts) == 2
+        preamble_str = strip(parts[1])
+        cases_str = parts[2]
+    else
+        preamble_str = ""
+        cases_str = only(parts)
+    end
+    (preamble_str,
+     [match_ir_test_case(s) for s in split(cases_str, r"######*") if strip(s) != ""])
+end
+
+function setup_ir_test_module(preamble)
+    test_mod = Module(:TestMod)
+    Base.eval(test_mod, :(const JuliaLowering = $JuliaLowering))
+    Base.eval(test_mod, :(const var"@ast_" = $(var"@ast_")))
+    JuliaLowering.include_string(test_mod, preamble)
+    test_mod
+end
+
+function format_ir_for_test(mod, case)
+    ex = parsestmt(SyntaxTree, case.input)
+    try
+        if kind(ex) == K"macrocall" && kind(ex[1]) == K"macro_name" && ex[1][1].name_val == "ast_"
+            # Total hack, until @ast_ can be implemented in terms of new-style
+            # macros.
+            ex = Base.eval(mod, Expr(ex))
+        end
+        x = JuliaLowering.lower(mod, ex)
+        if case.expect_error
+            error("Expected a lowering error in test case \"$(case.description)\"")
+        end
+        ir = strip(sprint(JuliaLowering.print_ir, x, case.method_filter))
+        return replace(ir, string(mod)=>"TestMod")
+    catch exc
+        if exc isa InterruptException
+            rethrow()
+        elseif case.expect_error && (exc isa LoweringError)
+            return sprint(io->Base.showerror(io, exc, show_detail=false))
+        elseif case.expect_error && (exc isa MacroExpansionError)
+            return sprint(io->Base.showerror(io, exc))
+        elseif case.is_broken
+            return sprint(io->Base.showerror(io, exc))
+        else
+            throw("Error in test case \"$(case.description)\"")
+        end
+    end
+end
+
+function test_ir_cases(filename::AbstractString)
+    preamble, cases = read_ir_test_cases(filename)
+    test_mod = setup_ir_test_module(preamble)
+    for case in cases
+        if case.is_broken
+            continue
+        end
+        output = format_ir_for_test(test_mod, case)
+        @testset "$(case.description)" begin
+            if output != case.output
+                # Do additional error dumping, as @test will not format errors in a nice way
+                @error "Test \"$(case.description)\" failed" output=Text(output) ref=Text(case.output)
+            end
+            @test output == case.output
+        end
+    end
+end
+
+"""
+Update all IR test cases in `filename` when the IR format has changed.
+
+When `pattern` is supplied, update only those tests where
+`occursin(pattern, description)` is true.
+"""
+function refresh_ir_test_cases(filename, pattern=nothing)
+    preamble, cases = read_ir_test_cases(filename)
+    test_mod = setup_ir_test_module(preamble)
+    io = IOBuffer()
+    if !isempty(preamble)
+        println(io, preamble, "\n")
+        println(io, "#*******************************************************************************")
+    end
+    for case in cases
+        if isnothing(pattern) || occursin(pattern, case.description)
+            ir = format_ir_for_test(test_mod, case)
+            if rstrip(ir) != case.output
+                @info "Refreshing test case $(repr(case.description)) in $filename"
+            end
+        else
+            ir = case.output
+        end
+        (case == cases[end] ? print : println)(io,
+            """
+            ########################################
+            $(comment_description(case.description))
+            $(strip(case.input))
+            #---------------------
+            $ir
+            """
+        )
+    end
+    # Write only at the end to ensure we don't write rubbish if we crash!
+    write(filename, take!(io))
+    nothing
+end
+
+function refresh_all_ir_test_cases(test_dir=".")
+    foreach(refresh_ir_test_cases, filter(fn->endswith(fn, "ir.jl"), readdir(test_dir, join=true)))
+end
+
+function watch_ir_tests(dir, delay=0.5)
+    dir = abspath(dir)
+    while true
+        (name, event) = FileWatching.watch_folder(dir)
+        if endswith(name, "_ir.jl") && (event.changed || event.renamed)
+            FileWatching.unwatch_folder(dir)
+            sleep(delay)
+            try
+                refresh_ir_test_cases(joinpath(dir, name))
+            catch
+                @error "Error refreshing test case" exception=current_exceptions()
+            end
+        end
+    end
+end
+
+function lower_str(mod::Module, s::AbstractString)
+    ex = parsestmt(JuliaLowering.SyntaxTree, s)
+    return JuliaLowering.to_lowered_expr(JuliaLowering.lower(mod, ex))
+end
+
+# See Julia Base tests in "test/docs.jl"
+function docstrings_equal(d1, d2; debug=true)
+    io1 = IOBuffer()
+    io2 = IOBuffer()
+    show(io1, MIME"text/markdown"(), d1)
+    show(io2, MIME"text/markdown"(), d2)
+    s1 = String(take!(io1))
+    s2 = String(take!(io2))
+    if debug && s1 != s2
+        print(s1)
+        println("--------------------------------------------------------------------------------")
+        print(s2)
+        println("================================================================================")
+    end
+    return s1 == s2
+end
+docstrings_equal(d1::Docs.DocStr, d2) = docstrings_equal(Docs.parsedoc(d1), d2)
+
+#-------------------------------------------------------------------------------
+# Tools for test case reduction
+
+function block_reduction_1(is_lowering_error::Function, orig_ex::ST, ex::ST,
+                           curr_path = Int[]) where {ST <: SyntaxTree}
+    if !is_leaf(ex)
+        if kind(ex) == K"block"
+            for i in 1:numchildren(ex)
+                trial_ex = delete_block_child(orig_ex, orig_ex, curr_path, i)
+                if is_lowering_error(trial_ex)
+                    # @info "Reduced expression" curr_path i
+                    return trial_ex
+                end
+            end
+        end
+        for (i,e) in enumerate(children(ex))
+            push!(curr_path, i)
+            res = block_reduction_1(is_lowering_error, orig_ex, e, curr_path)
+            if !isnothing(res)
+                return res
+            end
+            pop!(curr_path)
+        end
+    end
+    return nothing
+end
+
+# Find children of all `K"block"`s in an expression and try deleting them while
+# preserving the invariant `is_lowering_error(reduced) == true`.
+function block_reduction(is_lowering_error, ex)
+    reduced = ex
+    was_reduced = false
+    while true
+        r = block_reduction_1(is_lowering_error, reduced, reduced)
+        if isnothing(r)
+            return (reduced, was_reduced)
+        end
+        reduced = r
+        was_reduced = true
+    end
+end
+
+function delete_block_child(ctx, ex, block_path, child_idx, depth=1)
+    if depth > length(block_path)
+        cs = copy(children(ex))
+        deleteat!(cs, child_idx)
+        @ast ctx ex [ex cs...]
+    else
+        j = block_path[depth]
+        mapchildren(ctx, ex, j:j) do e
+            delete_block_child(ctx, e, block_path, child_idx, depth+1)
+        end
+    end
+end
+
+function throws_lowering_exc(mod, ex)
+    try
+        debug_lower(mod, ex)
+        return false
+    catch exc
+        if exc isa LoweringError
+            return true
+        else
+            rethrow()
+        end
+    end
+end
+
+# Parse a file and lower the top level expression one child at a time, finding
+# any top level statement that fails lowering and producing a partially reduced
+# test case.
+function reduce_any_failing_toplevel(mod::Module, filename::AbstractString; do_eval::Bool=false)
+    text = read(filename, String)
+    ex0 = parseall(SyntaxTree, text; filename)
+    for ex in children(ex0)
+        try
+            ex_compiled = JuliaLowering.lower(mod, ex)
+            ex_expr = JuliaLowering.to_lowered_expr(ex_compiled)
+            if do_eval
+                Base.eval(mod, ex_expr)
+            end
+        catch exc
+            @error "Failure lowering code" ex
+            if !(exc isa LoweringError)
+                rethrow()
+            end
+            (reduced,was_reduced) = block_reduction(e->throws_lowering_exc(mod,e), ex)
+            if !was_reduced
+                @info "No reduction possible"
+                return ex
+            else
+                @info "Reduced code" reduced
+                return reduced
+            end
+        end
+    end
+    nothing
+end
diff --git a/JuliaSyntax/.gitignore b/JuliaSyntax/.gitignore
new file mode 100644
index 0000000000000..8a934c09e33c3
--- /dev/null
+++ b/JuliaSyntax/.gitignore
@@ -0,0 +1,7 @@
+/Manifest.toml
+/tools/pkgs
+/tools/pkg_tars
+/tools/logs.txt
+/docs/build
+*.cov
+/.vscode/settings.json
diff --git a/JuliaSyntax/.mailmap b/JuliaSyntax/.mailmap
new file mode 100644
index 0000000000000..a16a55c644b31
--- /dev/null
+++ b/JuliaSyntax/.mailmap
@@ -0,0 +1,2 @@
+Claire Foster <aka.c42f@gmail.com> <chris42f@gmail.com>
+Claire Foster <aka.c42f@gmail.com> <aka.c42f@gmail.com>
diff --git a/JuliaSyntax/LICENSE.md b/JuliaSyntax/LICENSE.md
new file mode 100644
index 0000000000000..7efd19088a06f
--- /dev/null
+++ b/JuliaSyntax/LICENSE.md
@@ -0,0 +1,45 @@
+The JuliaSyntax.jl package is licensed under the MIT "Expat" License:
+
+> Copyright (c) 2021 Julia Computing and contributors
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy
+> of this software and associated documentation files (the "Software"), to deal
+> in the Software without restriction, including without limitation the rights
+> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+> copies of the Software, and to permit persons to whom the Software is
+> furnished to do so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+
+The code in src/tokenize*.jl and test/tokenize.jl is derived from the Tokenize.jl
+package and is also licensed under the MIT "Expat" License:
+
+> Copyright (c) 2016: Kristoffer Carlsson.
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy
+> of this software and associated documentation files (the "Software"), to deal
+> in the Software without restriction, including without limitation the rights
+> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+> copies of the Software, and to permit persons to whom the Software is
+> furnished to do so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+>
diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml
new file mode 100644
index 0000000000000..6ab84fdfb4733
--- /dev/null
+++ b/JuliaSyntax/Project.toml
@@ -0,0 +1,16 @@
+name = "JuliaSyntax"
+uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4"
+authors = ["Claire Foster <aka.c42f@gmail.com> and contributors"]
+version = "2.0.0-DEV"
+
+[compat]
+Serialization = "1.0"
+julia = "1.0"
+
+[extras]
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test", "Serialization", "Logging"]
diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md
new file mode 100644
index 0000000000000..ae9b2b9760b36
--- /dev/null
+++ b/JuliaSyntax/README.md
@@ -0,0 +1,46 @@
+# JuliaSyntax
+
+[![Build Status](https://github.com/c42f/JuliaSyntax.jl/workflows/CI/badge.svg)](https://github.com/c42f/JuliaSyntax.jl/actions)
+[![codecov.io](https://codecov.io/github/JuliaLang/JuliaSyntax.jl/coverage.svg?branch=main)](https://codecov.io/github/JuliaLang/JuliaSyntax.jl?branch=main)
+
+A Julia compiler frontend, written in Julia.
+
+Read the [documentation](https://JuliaLang.github.io/JuliaSyntax.jl/dev) for
+more information.
+
+### Status
+
+JuliaSyntax.jl is used as the new default Julia parser in Julia 1.10.
+It's highly compatible with Julia's older
+[femtolisp-based parser](https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm) -
+It parses all of Base, the standard libraries and General registry. Some minor
+difference remain where we've decided to fix bugs or strange behaviors in the
+reference parser.
+
+The AST and tree data structures are usable but their APIs will evolve as we
+try out various use cases. Parsing to the standard `Expr` AST is always
+possible and will be stable.
+
+The intention is to extend this library over time to cover more of the Julia
+compiler frontend.
+
+# Getting involved
+
+For people who want to help improve Julia's error messages by contributing to
+JuliaSyntax, I'd suggest looking through the issue list at
+https://github.com/JuliaLang/JuliaSyntax.jl/issues and choosing a small issue
+or two to work on to familiarize yourself with the code. Anything marked with
+the labels `intro issue` or `bug` might be a good place to start.
+
+Also watching the [2022 JuliaCon talk](https://www.youtube.com/watch?v=CIiGng9Brrk)
+and reading the [design](https://julialang.github.io/JuliaSyntax.jl/dev/design/) and
+[reference](https://julialang.github.io/JuliaSyntax.jl/dev/reference/)
+documentation should be good for an overview.
+
+As of May 2023, we've got really good positional tracking within the source,
+but JuliaSyntax really needs a better system for parser recovery before the
+errors are really nice. This requires some research. For example, you could
+read up on how rust-analyzer does recovery, or rslint - both these are
+event-based recursive decent parsers with similar structure to JuliaSyntax
+(though in Rust). I also want to investigate whether we can do data-driven
+parser recovery using an ML technique. But again, this is a research project.
diff --git a/JuliaSyntax/docs/Manifest.toml b/JuliaSyntax/docs/Manifest.toml
new file mode 100644
index 0000000000000..ce4d6bed870e8
--- /dev/null
+++ b/JuliaSyntax/docs/Manifest.toml
@@ -0,0 +1,254 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.10.0"
+manifest_format = "2.0"
+project_hash = "46b5b82f24e4b5d97afc2843032730b022086b31"
+
+[[deps.ANSIColoredPrinters]]
+git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
+uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9"
+version = "0.0.1"
+
+[[deps.AbstractTrees]]
+git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c"
+uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
+version = "0.4.4"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.1"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
+[[deps.Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+
+[[deps.DocStringExtensions]]
+deps = ["LibGit2"]
+git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
+uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+version = "0.9.3"
+
+[[deps.Documenter]]
+deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "Test", "Unicode"]
+git-tree-sha1 = "2613dbec8f4748273bbe30ba71fd5cb369966bac"
+uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+version = "1.2.1"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.6.0"
+
+[[deps.Expat_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c"
+uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
+version = "2.5.0+0"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+
+[[deps.Git]]
+deps = ["Git_jll"]
+git-tree-sha1 = "51764e6c2e84c37055e846c516e9015b4a291c7d"
+uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2"
+version = "1.3.0"
+
+[[deps.Git_jll]]
+deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"]
+git-tree-sha1 = "bb8f7cc77ec1152414b2af6db533d9471cfbb2d1"
+uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb"
+version = "2.42.0+0"
+
+[[deps.IOCapture]]
+deps = ["Logging", "Random"]
+git-tree-sha1 = "d75853a0bdbfb1ac815478bacd89cd27b550ace6"
+uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
+version = "0.2.3"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[deps.JLLWrappers]]
+deps = ["Artifacts", "Preferences"]
+git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca"
+uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
+version = "1.5.0"
+
+[[deps.JSON]]
+deps = ["Dates", "Mmap", "Parsers", "Unicode"]
+git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a"
+uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+version = "0.21.4"
+
+[[deps.LazilyInitializedFields]]
+git-tree-sha1 = "8f7f3cabab0fd1800699663533b6d5cb3fc0e612"
+uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf"
+version = "1.2.2"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.4"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "8.4.0+0"
+
+[[deps.LibGit2]]
+deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.6.4+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.0+1"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[[deps.Libiconv_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175"
+uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
+version = "1.17.0+0"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[deps.Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[deps.MarkdownAST]]
+deps = ["AbstractTrees", "Markdown"]
+git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899"
+uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391"
+version = "0.1.2"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.2+1"
+
+[[deps.Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2023.1.10"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
+
+[[deps.OpenSSL_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f"
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.0.12+0"
+
+[[deps.PCRE2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.42.0+1"
+
+[[deps.Parsers]]
+deps = ["Dates", "PrecompileTools", "UUIDs"]
+git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "2.8.1"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.10.0"
+
+[[deps.PrecompileTools]]
+deps = ["Preferences"]
+git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f"
+uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+version = "1.2.0"
+
+[[deps.Preferences]]
+deps = ["TOML"]
+git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e"
+uuid = "21216c6a-2e73-6563-6e65-726566657250"
+version = "1.4.1"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+
+[[deps.REPL]]
+deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[deps.RegistryInstances]]
+deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"]
+git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51"
+uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3"
+version = "0.1.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.3"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.2.13+1"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.52.0+1"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.4.0+2"
diff --git a/JuliaSyntax/docs/Project.toml b/JuliaSyntax/docs/Project.toml
new file mode 100644
index 0000000000000..1814eb3304f3c
--- /dev/null
+++ b/JuliaSyntax/docs/Project.toml
@@ -0,0 +1,5 @@
+[deps]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+
+[compat]
+Documenter = "1"
diff --git a/JuliaSyntax/docs/make.jl b/JuliaSyntax/docs/make.jl
new file mode 100644
index 0000000000000..5c3a094ba8691
--- /dev/null
+++ b/JuliaSyntax/docs/make.jl
@@ -0,0 +1,26 @@
+using Documenter, JuliaSyntax
+
+makedocs(;
+    modules=[JuliaSyntax],
+    format=Documenter.HTML(
+        repolink="https://github.com/JuliaLang/JuliaSyntax.jl"
+    ),
+    pages=[
+        "Overview" => "index.md"
+        "How To" => "howto.md"
+        "Reference" => [
+            "reference.md"
+            "api.md"
+        ]
+        "Design Discussion" => "design.md"
+    ],
+    repo="https://github.com/JuliaLang/JuliaSyntax.jl/blob/{commit}{path}#L{line}",
+    sitename="JuliaSyntax.jl",
+    authors = "Claire Foster and contributors: https://github.com/JuliaLang/JuliaSyntax.jl/graphs/contributors",
+    warnonly = true
+)
+
+deploydocs(;
+    repo="github.com/JuliaLang/JuliaSyntax.jl",
+    push_preview=true
+)
diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md
new file mode 100644
index 0000000000000..5dfbec6e4fcc3
--- /dev/null
+++ b/JuliaSyntax/docs/src/api.md
@@ -0,0 +1,170 @@
+# API Reference
+
+## Parsing
+
+```@docs
+JuliaSyntax.parsestmt
+JuliaSyntax.parseall
+JuliaSyntax.parseatom
+```
+
+### Low level parsing API
+
+The `ParseStream` interface which provides a low-level stream-like I/O
+interface for writing the parser. The parser does not depend on or produce any
+concrete tree data structure as part of the parsing phase but the output spans
+can be post-processed into various tree data structures as required using
+[`JuliaSyntax.build_tree`](@ref).
+
+```@docs
+JuliaSyntax.parse!
+JuliaSyntax.ParseStream
+JuliaSyntax.build_tree
+```
+
+## Tokenization
+
+```@docs
+JuliaSyntax.tokenize
+JuliaSyntax.untokenize
+JuliaSyntax.Token
+```
+
+## Source code handling
+
+This section describes the generic functions for source text, source location
+computation and formatting functions.
+
+Contiguous syntax objects like nodes in the syntax tree should implement the
+following where possible:
+
+```@docs
+JuliaSyntax.sourcefile
+JuliaSyntax.byte_range
+```
+
+This will provide implementations of the following which include range
+information, line numbers, and fancy highlighting of source ranges:
+
+```@docs
+JuliaSyntax.first_byte
+JuliaSyntax.last_byte
+JuliaSyntax.filename
+JuliaSyntax.source_line
+JuliaSyntax.source_location
+JuliaSyntax.char_range
+JuliaSyntax.sourcetext
+JuliaSyntax.highlight
+```
+
+`SourceFile`-specific functions:
+
+```@docs
+JuliaSyntax.SourceFile
+JuliaSyntax.source_line_range
+```
+
+## Expression predicates, kinds and flags
+
+Expressions are tagged with a kind - like a type, but represented as an integer
+tag rather than a full Julia type for efficiency. (Very like the tag of a "sum
+type".) `Kind`s are constructed with the `@K_str` macro.
+
+```@docs
+JuliaSyntax.@K_str
+JuliaSyntax.Kind
+```
+
+The kind of an expression `ex` in a tree should be accessed with `kind(ex)`
+
+```@docs
+JuliaSyntax.kind
+```
+
+In addition to the `kind`, a small integer set of "flags" is included to
+further distinguish details of each expression, accessed with the `flags`
+function. The kind and flags can be wrapped into a `SyntaxHead` which is
+accessed with the `head` function.
+
+```@docs
+JuliaSyntax.flags
+JuliaSyntax.SyntaxHead
+JuliaSyntax.head
+```
+
+Details about the flags may be extracted using various predicates:
+
+```@docs
+JuliaSyntax.is_trivia
+JuliaSyntax.is_prefix_call
+JuliaSyntax.is_infix_op_call
+JuliaSyntax.is_prefix_op_call
+JuliaSyntax.is_postfix_op_call
+JuliaSyntax.is_dotted
+JuliaSyntax.is_suffixed
+JuliaSyntax.is_decorated
+JuliaSyntax.numeric_flags
+```
+
+Some of the more unusual predicates are accessed merely with `has_flags(x,
+flag_bits)`, where any of the following uppercase constants may be used for
+`flag_bits` after checking that the `kind` is correct.
+
+```@docs
+JuliaSyntax.has_flags
+JuliaSyntax.TRIPLE_STRING_FLAG
+JuliaSyntax.RAW_STRING_FLAG
+JuliaSyntax.PARENS_FLAG
+JuliaSyntax.TRAILING_COMMA_FLAG
+JuliaSyntax.COLON_QUOTE
+JuliaSyntax.TOPLEVEL_SEMICOLONS_FLAG
+JuliaSyntax.MUTABLE_FLAG
+JuliaSyntax.BARE_MODULE_FLAG
+JuliaSyntax.SHORT_FORM_FUNCTION_FLAG
+```
+
+## Syntax trees
+
+Access to the children of a tree node is provided by the functions
+
+```@docs
+JuliaSyntax.is_leaf
+JuliaSyntax.numchildren
+JuliaSyntax.children
+```
+
+For convenient access to the children, we also provide `node[i]`, `node[i:j]`
+and `node[begin:end]` by implementing `Base.getindex()`, `Base.firstindex()` and
+`Base.lastindex()`. We choose to return a view from `node[i:j]` to make it
+non-allocating.
+
+Tree traversal is supported by using these functions along with the predicates
+such as [`kind`](@ref) listed above.
+
+### Trees referencing the source
+
+```@docs
+JuliaSyntax.SyntaxNode
+```
+
+Functions applicable to `SyntaxNode` include everything in the sections on
+heads/kinds as well as the accessor functions in the source code handling
+section.
+
+### Relocatable syntax trees
+
+[`GreenNode`](@ref) is a special low level syntax tree: it's "relocatable" in
+the sense that it doesn't carry an absolute position in the source code or even
+a reference to the source text. This allows it to be reused for incremental
+parsing, but does make it a pain to work with directly!
+
+```@docs
+JuliaSyntax.GreenNode
+```
+
+Green nodes only have a relative position so implement `span()` instead of
+`byte_range()`:
+
+```@docs
+JuliaSyntax.span
+```
diff --git a/JuliaSyntax/docs/src/design.md b/JuliaSyntax/docs/src/design.md
new file mode 100644
index 0000000000000..a11d1b64140ed
--- /dev/null
+++ b/JuliaSyntax/docs/src/design.md
@@ -0,0 +1,850 @@
+# Design discussion and developer documentation
+
+## Goals
+
+* Lossless parsing of Julia code with precise source mapping
+* Production quality error recovery, reporting and unit testing
+* Parser structure similar to Julia's flisp-based parser
+* Speedy enough for interactive editing
+* "Compilation as an API" to support all sorts of tooling
+* Grow to encompass the rest of the compiler frontend: macro expansion,
+  desugaring and other lowering steps.
+* Replace Julia's flisp-based reference frontend
+
+## Design Opinions
+
+* Parser implementation should be independent from tree data structures. So
+  we have the `ParseStream` interface.
+* Tree data structures should be *layered* to balance losslessness with
+  abstraction and generality. So we have `SyntaxNode` (an AST) layered on top
+  of `GreenNode` (a lossless parse tree). We might need other tree types later.
+* Fancy parser generators still seem marginal for production compilers. We use
+  a boring but flexible recursive descent parser.
+
+# Parser implementation
+
+Our goal is to losslessly represent the source text with a tree; this may be
+called a "lossless syntax tree". (This is sometimes called a "concrete syntax
+tree", but that term has also been used for the parse tree of the full formal
+grammar for a language including any grammar hacks required to solve
+ambiguities, etc. So we avoid this term.)
+
+`JuliaSyntax` uses a mostly recursive descent parser which closely
+follows the high level structure of the flisp reference parser. This makes the
+code familiar and reduces porting bugs. It also gives a lot of flexibility for
+designing the diagnostics, tree data structures, compatibility with different
+Julia versions, etc. I didn't choose a parser generator as they still seem
+marginal for production compilers — for the parsing itself they don't seem
+*greatly* more expressive and they can be less flexible for the important
+"auxiliary" code which needs to be written in either case.
+
+### Lexing
+
+We use a hand-written lexer (a heavily modified version of
+[Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl))
+* Newline-containing whitespace is emitted as a separate kind
+* Tokens inside string interpolations are emitted separately from the string
+* Strings delimiters are separate tokens and the actual string always has the
+  `String` kind
+* Additional contextual keywords (`as`, `var`, `doc`) have been added and
+  moved to a subcategory of keywords.
+* Nonterminal kinds were added (though these should probably be factored out again)
+* Various bugs fixed and additions for newer Julia versions
+
+### Parsing with ParseStream
+
+The main parser innovation is the `ParseStream` interface which provides a
+stream-like I/O interface for writing the parser. The parser does not
+depend on or produce any concrete tree data structure as part of the parsing
+phase but the output nodes can be post-processed into various tree data
+structures as required. This is like the design of rust-analyzer though with a
+simpler implementation.
+
+Parsing proceeds by recursive descent;
+
+* The parser consumes a flat list of lexed tokens as *input* using `peek()` to
+  examine tokens and `bump()` to consume them.
+* The parser produces a flat list of `RawGreenNode`s as *output* using `bump()` to
+  transfer tokens to the output and `position()`/`emit()` for nonterminal nodes.
+* Diagnostics are emitted as separate text spans
+* Whitespace and comments are automatically `bump()`ed and don't need to be
+  handled explicitly. The exception is syntactically relevant newlines in space
+  sensitive mode.
+* Parser modes are passed down the call tree using `ParseState`.
+
+The output nodes track the byte range, a syntax "kind" stored as an integer
+tag, and some flags. Each node also stores either the number of child nodes
+(for non-terminals) or the original token kind (for terminals). The kind tag
+makes the nodes a [sum type](https://blog.waleedkhan.name/union-vs-sum-types/)
+but where the type is tracked explicitly outside of Julia's type system.
+
+For lossless parsing the output nodes must cover the entire input text. Using
+`bump()`, `position()` and `emit()` in a natural way also ensures that:
+* Nodes are cleanly nested with children contained entirely within their parents
+* Sibling nodes are emitted in source order
+* Parent nodes are emitted after all their children.
+
+These properties make the output nodes a post-order traversal of a
+["green tree"](#raw-syntax-tree--green-tree)
+in the terminology of C#'s Roslyn compiler, with the tree structure
+implicit in the node spans.
+
+### Tree construction
+
+The `build_tree` function uses the implicit tree structure in the `ParseStream`
+output to assemble concrete tree data structures. Since the output is already
+a post-order traversal of `RawGreenNode`s with node spans encoding parent-child
+relationships, tree construction is straightforward. We build on top of this to
+define `build_tree` for various tree types including `GreenNode`, the AST type
+`SyntaxNode`, and for normal Julia `Expr`.
+
+### Error recovery
+
+The goal of the parser is to produce well-formed hierarchical structure from
+the source text. For interactive tools we need this to work even when the
+source text contains errors; it's the job of the parser to include the recovery
+heuristics to make this work.
+
+Concretely, the parser in `JuliaSyntax` should always produce a green tree
+which is *well formed* in the sense that `GreenNode`s of a given `Kind` have
+well-defined layout of children. This means the `GreenNode` to `SyntaxNode`
+transformation is deterministic and tools can assume they're working with a
+"mostly valid" AST.
+
+What does "mostly valid" mean? We allow the tree to contain the following types
+of error nodes:
+
+* Missing tokens or nodes may be **added** as placeholders when they're needed
+  to complete a piece of syntax. For example, we could parse `a + (b *` as
+  `(call-i a + (call-i * b XXX))` where `XXX` is a placeholder error node.
+* A sequence of unexpected tokens may be **removed** by collecting
+  them as children of an error node and treating them as syntax trivia during
+  AST construction. For example, `a + b end * c` could be parsed as the green
+  tree `(call-i a + b (error-t end * c))`, and turned into the AST `(call + a b)`.
+
+We want to encode both these cases in a way which is simplest for downstream
+tools to use. This is an open question, but for now we use `K"error"` as the
+kind, with the `TRIVIA_FLAG` set for unexpected syntax.
+
+# Syntax trees
+
+Julia's `Expr` abstract syntax tree can't store precise source locations or
+deal with syntax trivia like whitespace or comments. So we need some new tree
+types in `JuliaSyntax`.
+
+JuliaSyntax currently deals in three types of trees:
+* `GreenNode` is a minimal *lossless syntax tree* where
+  - Nodes store a kind and length in bytes, but no text
+  - Syntax trivia are included in the list of children
+  - Children are strictly in source order
+* `SyntaxNode` is an *abstract syntax tree* which has
+  - An absolute position and pointer to the source text
+  - Children strictly in source order
+  - Leaf nodes store values, not text
+  - Trivia are ignored, but there is a 1:1 mapping of non-trivia nodes to the
+    associated `GreenTree` nodes.
+* `Expr` is used as a conversion target for compatibility
+
+## More about syntax kinds
+
+We generally track the type of syntax nodes with a syntax "kind", stored
+explicitly in each node an integer tag. This effectively makes the node type a
+[sum type](https://blog.waleedkhan.name/union-vs-sum-types/) in the type system
+sense, but with the type tracked explicitly outside of Julia's type system.
+
+Managing the type explicitly brings a few benefits:
+* Code and data structures for manipulating syntax nodes is always concretely
+  typed from the point of view of the compiler.
+* We control the data layout and can pack the kind into very few bits along
+  with other flags bits, as desired.
+* Predicates such as `is_operator` can be extremely efficient, given that we
+  know the meaning of the kind's bits.
+* The kind can be applied to several different tree data structures, or
+  manipulated by itself.
+* Pattern matching code is efficient when the full set of kinds is closed and
+  known during compilation.
+
+There's arguably a few downsides:
+* Normal Julia dispatch can't express dispatch over syntax kind. Luckily,
+  a pattern matching macro can provide a very elegant way of expressing such
+  algorithms over a non-extensible set of kinds, so this is not a big problem.
+* Different node kinds could come with different data fields, but a syntax
+  tree must have generic fields to cater for all kinds. (Consider as an analogy
+  the normal Julia AST `QuoteNode` with a single field vs `Expr` with generic
+  `head` and `args` fields.) This could be a disadvantage for code which
+  processes one specific kind but for generic code processing many kinds
+  having a generic but *concrete* data layout should be faster.
+
+# Differences from the flisp parser
+
+_See also the [§ Comparisons to other packages](#comparisons-to-other-packages) section._
+
+Practically the flisp parser is not quite a classic [recursive descent
+parser](https://en.wikipedia.org/wiki/Recursive_descent_parser), because it
+often looks back and modifies the output tree it has already produced. We've
+tried to eliminate this pattern in favor of lookahead where possible because
+
+* It works poorly when the parser is emitting a stream of node spans with
+  strict source ordering constraints.
+* It's confusing to reason about this kind of code
+
+However, on occasion it seems to solve genuine ambiguities where Julia code
+can't be parsed top-down with finite lookahead. Eg for the `kw` vs `=`
+ambiguity within parentheses. In these cases we put up with using the
+functions `look_behind` and `reset_node!()`.
+
+## Code structure
+
+Large structural changes were generally avoided while porting. In particular,
+nearly all function names for parsing productions are the same with `-`
+replaced by `_` and predicates prefixed by `is_`.
+
+Some notable differences:
+
+* `parse-arglist` and a parts of `parse-paren-` have been combined into a
+  general function `parse_brackets`. This function deals with all the odd
+  corner cases of how the AST is emitted when mixing `,` and `;` within
+  parentheses. In particular regard to:
+  - Determining whether `;` are block syntax separators or keyword parameters
+  - Determining whether to emit `parameter` sections based on context
+  - Emitting key-value pairs either as `kw` or `=` depending on context
+* The way that `parse-resword` is entered has been rearranged to avoid parsing
+  reserved words with `parse-atom` inside `parse-unary-prefix`. Instead, we
+  detect reserved words and enter `parse_resword` earlier.
+
+## Flisp parser bugs
+
+Here's some behaviors which seem to be bugs. (Some of these we replicate in the
+name of compatibility, perhaps with a warning.)
+
+* Macro module paths allow calls which gives weird stateful semantics!
+  ```julia
+  b() = rand() > 0.5 ? Base : Core
+  b().@info "hi"
+  ```
+* Misplaced `@` in macro module paths like `A.@B.x` is parsed as odd
+  broken-looking AST like `(macrocall (. A (quote (. B @x))))`.  It should
+  probably be rejected.
+* Operator prefix call syntax doesn't work in the cases like `+(a;b,c)` where
+  keyword parameters are separated by commas. A tuple is produced instead.
+* `const` and `global` allow chained assignment, but the right hand side is not
+  constant. `a` const here but not `b`.
+  ```julia
+  const a = b = 1
+  ```
+* Parsing the `ncat` array concatenation syntax within braces gives
+  strange AST: `{a ;; b}` parses to `(bracescat 2 a b)` which is the same as
+  `{2 ; a ; b}`, but should probably be `(bracescat (nrow 2 a b))` in analogy
+  to how `{a b}` produces `(bracescat (row a b))`.
+* `export a, \n $b` is rejected, but `export a, \n b` parses fine.
+* In try-catch-finally, the `finally` clause is allowed before the `catch`, but
+  always executes afterward. (Presumably was this a mistake? It seems pretty awful!)
+* When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is
+  correctly parsed as `Expr(:vect)` (maybe fixed in 1.7?)
+* `f(x for x in in xs)` is accepted, and parsed very strangely.
+* Octal escape sequences saturate rather than being reported as errors. Eg,
+  `"\777"` results in `"\xff"`.  This is inconsistent with
+  `Base.parse(::Type{Int}, ...)`
+* Leading dots in import paths with operator-named modules are parsed into
+  dotted operators rather than a relative path. Ie, we have `import .⋆` parsing
+  to `(import (. .⋆))` whereas it should be `(import (. . ⋆))` for consistency
+  with the parsing of `import .A`.
+* Looking back on the output disregards grouping parentheses which can lead to
+  odd results in some cases. For example, `f(((((x=1)))))` parses as a keyword
+  call to function `f` with the keyword `x=1`, but arguably it should be an
+  assignment.
+* Hexfloat literals can have a trailing `f` for example, `0x1p1f`
+  but this doesn't do anything. In the `flisp` C code such cases are treated as
+  Float32 literals and this was intentional https://github.com/JuliaLang/julia/pull/2925
+  but this has never been officially supported in Julia. It seems this bug
+  arises from `(set! pred char-hex?)` in `parse-number` accepting hex exponent
+  digits, all of which are detected as invalid except for a trailing `f` when
+  processed by `isnumtok_base`.
+* `begin` and `end` are not parsed as keywords when indexing. Typed comprehensions
+  initially look the same, but can be distinguished from indexing once we handle
+  a `for` token; it is safe to treat `begin` and `end` as keywords afterwards. The
+  reference parser *only* handles this well when there's a newline before `for`:
+  ```julia
+  Any[foo(i)
+      for i in x if begin
+          true
+      end
+  ]
+  ```
+  works, while
+  ```julia
+  Any[foo(i) for i in x if begin
+          true
+      end
+  ]
+  ```
+  does not. JuliaSyntax handles both cases.
+
+## Parsing / AST oddities and warts
+
+### Questionable allowed forms
+
+There's various allowed syntaxes which are fairly easily detected in the
+parser, but which will be rejected later during lowering. To allow building
+DSLs this is fine and good but some such allowed syntaxes don't seem very
+useful, even for DSLs:
+
+* `macro (x) end` is allowed but there are no anonymous macros.
+* `abstract type A < B end` and other subtype comparisons are allowed, but
+  only `A <: B` makes sense.
+* `x where {S T}` produces `(where x (bracescat (row S T)))`. This seems pretty weird!
+* `[x for outer x in xs]` parses, but `outer` makes no real sense in this
+  context (and using this form is a lowering error)
+
+### `kw` and `=` inconsistencies
+
+There's many apparent inconsistencies between how `kw` and `=` are used when
+parsing `key=val` pairs inside parentheses.
+
+* Inconsistent parsing of tuple keyword args inside vs outside of dot calls
+  ```julia
+  (a=1,)           # (tuple (= a 1))
+  f.(a=1)          # (tuple (kw a 1))
+  ```
+* Mixtures of `,` and `;` in calls give nested parameter AST which parses
+  strangely, and is kind-of-horrible to use.
+  ```julia
+  # (tuple (parameters (parameters e f) c d) a b)
+  (a,b; c,d; e,f)
+  ```
+* Long-form anonymous functions have argument lists which are parsed
+  as tuples (or blocks!) rather than argument lists and this mess appears to be
+  papered over as part of lowering. For example, in `function (a;b) end` the
+  `(a;b)` is parsed as a block! This leads to more inconsistency in the use of
+  `kw` for keywords.
+
+
+### Other oddities
+
+* Operators with suffixes don't seem to always be parsed consistently as the
+  same operator without a suffix. Unclear whether this is by design or mistake.
+  For example, `[x +y] ==> (hcat x (+ y))`, but `[x +₁y] ==> (hcat (call +₁ x y))`
+
+* `global const x=1` is normalized by the parser into `(const (global (= x 1)))`.
+  I suppose this is somewhat useful for AST consumers, but reversing the source
+  order is pretty weird and inconvenient when moving to a lossless parser.
+
+* `let` bindings might be stored in a block, or they might not be, depending on
+  special cases:
+  ```julia
+  # Special cases not in a block
+  let x=1 ; end   # ==>  (let (= x 1) (block))
+  let x::1 ; end  # ==>  (let (:: x 1) (block))
+  let x ; end     # ==>  (let x (block))
+
+  # In a block
+  let x=1,y=2 ; end  # ==>  (let (block (= x 1) (= y 2) (block)))
+  let x+=1 ; end     # ==>  (let (block (+= x 1)) (block))
+  ```
+
+* The `elseif` condition is always in a block but not the `if` condition.
+  Presumably because of the need to add a line number node in the flisp parser
+  `if a xx elseif b yy end   ==>  (if a (block xx) (elseif (block b) (block yy)))`
+
+* Spaces are allowed between import dots — `import . .A` is allowed, and
+  parsed the same as `import ..A`
+
+* `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.`
+  can't be a normal identifier.
+
+* The raw string escaping rules are *super* confusing for backslashes near
+  the end of the string: `raw"\\\\ "` contains four backslashes, whereas
+  `raw"\\\\"` contains only two. However this was an intentional feature to
+  allow all strings to be represented and it's unclear whether the situation
+  can be improved.
+
+* In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and
+  `@S {a b}` parse. Conversely, `@S[a b]` parses.
+
+* Macro names and invocations are post-processed from the output of
+  `parse-atom` / `parse-call`, which leads to some surprising and questionable
+  constructs which "work":
+  - Absurdities like `@(((((a))))) x ==> (macrocall @a x)`
+  - Infix macros!? `@(x + y)  ==>  (macrocall @+ x y)` (ok, kinda cute and has
+    some weird logic to it... but what?)
+  - Similarly additional parentheses are allowed `@(f(x)) ==> (macrocall @f x)`
+
+* Allowing `@` first in macro module paths (eg `@A.B.x` instead of `A.B.@x`)
+  seems like unnecessary variation in syntax. It makes parsing valid macro
+  module paths more complex and leads to oddities like `@$.x y ==> (macrocall
+  ($ (quote x)) y` where the `$` is first parsed as a macro name, but turns out
+  to be the module name after the `.` is parsed. But `$` can never be a valid
+  module name in normal Julia code so this makes no sense.
+
+* Triple quoted `var"""##"""` identifiers are allowed. But it's not clear these
+  are required or desired given that they come with the complex triple-quoted
+  string deindentation rules.
+
+* Deindentation of triple quoted strings with mismatched whitespace is weird
+  when there's nothing but whitespace. For example, we have
+  `"\"\"\"\n  \n \n  \"\"\"" ==> "\n \n"` so the middle line of whitespace
+  here isn't dedented but the other two longer lines are?? Here it seems more
+  consistent that either (a) the middle line should be deindented completely,
+  or (b) all lines should be dedented only one character, as that's the
+  matching prefix.
+
+* Parsing of anonymous function arguments is somewhat inconsistent.
+  `function (xs...) \n body end` parses the argument list as `(... xs)`, whereas
+  `function (x) \n body end` parses the argument list as `(tuple x)`.
+
+* The difference between multidimensional vs flattened iterators is subtle, and
+  perhaps too syntactically permissive.  For example,
+  - `[(x,y) for x * in 1:10, y in 1:10]` is a multidimensional iterator
+  - `[(x,y) for x * in 1:10 for y in 1:10]` is a flattened iterator
+  - `[(x,y) for x in 1:10, y in 1:10 if y < x]` is a flattened iterator
+
+  It's this last case which seems problematic (why not *require* the second
+  form as a more explicit way to indicate flattening?). It's not even pretty
+  printed correctly:
+  ```julia-repl
+  julia> :([(x,y) for x in 1:10, y in 1:10 if y < x])
+  :([(x, y) for $(Expr(:filter, :(y < x), :(x = 1:10), :(y = 1:10)))])
+  ```
+
+* The character `'` may be written without escaping as `'''` rather than
+  requiring the form `'\''`.
+
+# Comparisons to other packages
+
+### Official Julia compiler
+
+_See also the [§ Differences from the flisp parser](#differences-from-the-flisp-parser) section._
+
+The official Julia compiler frontend lives in the Julia source tree. It's
+mostly contained in just a few files:
+* The parser in [src/julia-parser.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-parser.scm)
+* Macro expansion in [src/ast.c](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/ast.c) and [src/macroexpand.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/macroexpand.scm)
+* Syntax lowering in [src/julia-syntax.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-syntax.scm)
+* The flisp runtime and C extensions for Julia in [src/flisp](https://github.com/JuliaLang/julia/tree/master/src/flisp)
+* Supporting utility functions in a few other `.scm` and `.c` files.
+
+There's two issues with the official reference frontend which suggest a rewrite.
+
+First, there's no support for precise source locations and the existing data
+structures (bare flisp lists) can't easily be extended to add these. Fixing
+this would require changes to nearly all of the code.
+
+Second, it's written in flisp: an aesthetically pleasing, minimal but obscure
+implementation of Scheme. Learning Scheme is actually a good way to appreciate
+some of Julia's design inspiration, but it's quite a barrier for developers of
+Julia language tooling. (Flisp has no user-level documentation but non-schemers
+can refer to the [Racket documentation](https://docs.racket-lang.org) which is
+quite compatible for basic things.) In addition to the social factors, having
+the embedded flisp interpreter and runtime with its own separate data
+structures and FFI is complex and inefficient.
+
+### JuliaParser.jl
+
+[JuliaParser.jl](https://github.com/JuliaLang/JuliaParser.jl)
+was a direct port of Julia's flisp reference parser, but was abandoned around
+Julia 0.5 or so. Furthermore, it doesn't support lossless parsing, and adding
+that feature would amount to a full rewrite. Given its divergence with the flisp
+reference parser since Julia-0.5, it seemed better just to start anew from the
+reference parser instead.
+
+### Tokenize.jl
+
+[Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl)
+is a fast lexer for Julia code. The code from Tokenize has been
+imported and used in JuliaSyntax, with some major modifications as discussed in
+the [lexer implementation](#lexing) section.
+
+### CSTParser.jl
+
+[CSTParser.jl](https://github.com/julia-vscode/CSTParser.jl)
+is a ([mostly?](https://github.com/domluna/JuliaFormatter.jl/issues/52#issuecomment-529945126))
+lossless parser with goals quite similar to JuliaParser. It is used extensively
+in the VSCode / LanguageServer / JuliaFormatter ecosystem. CSTParser is very
+useful, but I do find the implementation hard to understand, and I wanted to try
+a fresh approach with a focus on:
+
+* "Production readiness": Good docs, tests, diagnostics and maximum similarity
+  with the flisp parser, with the goal of getting the new parser into `Core`.
+* Learning from the latest ideas about composable parsing and data structures
+  from outside Julia. In particular the implementation of `rust-analyzer` is
+  very clean, well documented, and was a great source of inspiration.
+* Composability of tree data structures — I feel like the trees should be
+  layered somehow with a really lightweight [green tree](#raw-syntax-tree--green-tree)
+  at the most basic level, similar to Roslyn or rust-analyzer. In comparison,
+  CSTParser uses a more heavyweight non-layered data structure. Alternatively or
+  additionally, have a common tree API with many concrete task-specific
+  implementations.
+
+A big benefit of the JuliaSyntax parser is that it separates the parser code
+from the tree data structures entirely, which should give a lot of flexibility
+in experimenting with various tree representations.
+
+I also want JuliaSyntax to tackle macro expansion and other lowering steps, and
+provide APIs for this which can be used by both the core language and the
+editor tooling.
+
+### tree-sitter-julia
+
+Using a modern production-ready parser generator like `tree-sitter` is an
+interesting option and some progress has already been made in
+[tree-sitter-julia](https://github.com/tree-sitter/tree-sitter-julia).
+But I feel like the grammars for parser generators are only marginally more
+expressive than writing the parser by hand, after accounting for the effort
+spent on the weird edge cases of a real language and writing the parser's tests
+and "supporting code".
+
+On the other hand, a hand-written parser is completely flexible and can be
+mutually understood with the reference implementation, so I chose that approach
+for JuliaSyntax.
+
+# Resources
+
+## Julia issues
+
+Here's a few links to relevant Julia issues.
+
+#### Macro expansion
+
+* Automatic hygiene for macros https://github.com/JuliaLang/julia/pull/6910 —
+  would be interesting to implement this in a new frontend.
+
+#### Lowering
+
+* A partial implementation of lowering in Julia https://github.com/JuliaLang/julia/pull/32201 —
+  some of this should be ported. (Last commit at https://github.com/JuliaLang/julia/tree/df61138fcf97d03dcbbba10e962571af9700db56/ )
+* The closure capture problem https://github.com/JuliaLang/julia/issues/15276 —
+  would be interesting to see whether we can tackle some of the harder cases in
+  a new implementation.
+
+## C# Roslyn
+
+[Persistence, façades and Roslyn’s red-green trees](https://ericlippert.com/2012/06/08/red-green-trees/)
+* [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees)
+* [Literate C# Usage Example](https://johtela.github.io/LiterateCS/LiterateCS/BlockBuilder.html)
+
+
+## Rust-analyzer
+
+`rust-analyzer` seems to be very close to what I'm building here, and has come
+to the same conclusions on green tree layout with explicit trivia nodes.  Their
+document on internals
+[here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md)
+is great. Points of note:
+
+* They have *three* trees!
+  1. Green trees exactly like mine (pretty much all the same design
+     decisions, including trivia storage). Though note that the team are still
+     [toying with](https://github.com/rust-analyzer/rust-analyzer/issues/6584)
+     the idea of using the Roslyn model of trivia.
+  2. Untyped red syntax trees somewhat like mine, but much more minimal. For
+     example, these don't attempt to reorder children.
+  3. A typed AST layer with a type for each expression head. The AST searches
+     for children by dynamically traversing the child list each time, rather
+     than having a single canonical ordering or remembering the placement of
+     children which the parser knew.
+* "Parser does not see whitespace nodes. Instead, they are attached to the
+  tree in the TreeSink layer." This may be relevant to us - it's a pain to
+  attach whitespace to otherwise significant tokens, and inefficient to
+  allocate and pass around a dynamic list of whitespace trivia.
+* "In practice, incremental reparsing doesn't actually matter much for IDE
+  use-cases, parsing from scratch seems to be fast enough." (I wonder why
+  they've implemented incremental parsing then?)
+* There's various comments about macros... Rust macro expansion seems quite
+  different from Julia (it appears it may be interleaved with parsing??)
+
+In general I think it's unclear whether we want typed ASTs in Julia and we
+particularly need to deal with the fact that `Expr` is the existing public
+interface. Could we have `Expr2` wrap `SyntaxNode`?
+
+* A related very useful set of blog posts which discuss using the rust syntax
+  tree library (rowan) for representing of a non-rust toy language is here
+  https://dev.to/cad97/lossless-syntax-trees-280c
+
+Not all the design decisions in `rust-analyzer` are finalized but the
+[architecture document](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/architecture.md)
+is a fantastic source of design inspiration.
+
+Highlights:
+* "The parser is independent of the particular tree structure and particular
+  representation of the tokens. It transforms one flat stream of events into
+  another flat stream of events."  This seems great, let's adopt it!
+* TODO
+
+## RSLint
+
+[RSLint](https://rslint.org/dev) is a linter for javascript, built in Rust. It
+uses the same parsing infrastructure and green tree libraries `rust-analyzer`.
+There's an excellent and friendly high level overview of how all this works in
+the rslint [parsing devdocs](https://rslint.org/dev/parsing.html).
+
+Points of note:
+
+* Backtracking and restarting the parser on error is actually quite simple in
+  the architecture we (mostly) share with `rust-analyzer`:
+  > ... events allow us to cheaply backtrack the parser by simply draining
+  > the events and resetting the token source cursor back to some place.
+
+* The section on [error
+  recovery](https://rslint.org/dev/parsing.html#error-recovery) is interesting;
+  they talk about various error recovery strategies.
+
+## Diagnostics
+
+The paper [P2429 - Concepts Error Messages for
+Humans](https://wg21.tartanllama.xyz/P2429%20-%20Concepts%20Error%20Messages%20for%20Humans.pdf)
+is C++ centric, but has a nice review of quality error reporting in various
+compilers including Elm, ReasonML, Flow, D and Rust.
+
+Some Rust-specific resources:
+* [rustc_errors::Diagnostic](https://doc.rust-lang.org/stable/nightly-rustc/rustc_errors/struct.Diagnostic.html)
+* The source of the Rust compiler's diagnostics system:
+  - The [`println!` macro](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_builtin_macros/src/format.rs)
+    shows how these can be emitted from macros
+  - The parser's [diagnostics.rs](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_parse/src/parser/diagnostics.rs)
+
+## General resources about parsing
+
+* [Modern parser generator](https://matklad.github.io/2018/06/06/modern-parser-generator.html)
+  has a lot of practical notes on writing parsers. Highlights:
+  - Encourages writing tests for handwritten parsers as inline comments
+  - Mentions Pratt parsers for simple operator precedence parsing. Good articles:
+    - [From Aleksey Kladov (matklad - the main rust-analyzer author, etc)](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html)
+    - [From Bob Nystrom (munificent - one of the Dart devs, etc](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/)
+  - Some discussion of error recovery
+
+* Some notes about stateful lexers for parsing shell-like string interpolations:
+  http://www.oilshell.org/blog/2017/12/17.html
+
+
+# Design notes
+
+The following are some fairly disorganized design notes covering a mixture of
+things which have already been done and musings about further work.
+
+## Prototyping approach
+
+The tree datastructure design here is tricky:
+
+1. The symbolic part of compilation (the compiler frontend) incrementally
+   abstracts and transforms the source text, but errors along the way should
+   refer back to the source.
+  - The tree must be a lossless representation of the source text
+  - Some aspects of the source text (comments, most whitespace) are irrelevant
+    to parsing.
+  - More aspects of the source text are irrelevant after we have an abstract
+    syntax tree of the surface syntax. Some good examples here are the
+    parentheses in `2*(x + y)` and the explicit vs implicit multiplication
+    symbol in `2*x` vs `2x`.
+
+2. There's various type of *analyses*
+- There's many useful ways to augment a syntax tree depending on use case.
+- Analysis algorithms should be able to act on any tree type, ignoring
+  but carrying augmentations which they don't know about.
+
+Having so many use cases suggests it might be best to have several different
+tree types with a common interface rather than one main abstract syntax tree
+type. But it seems useful to figure this out by prototyping several important
+work flows:
+
+* Syntax transformations
+  - Choose some macros to implement. This is a basic test of mixing source
+    trees from different files while preserving precise source locations.
+    (Done in <test/syntax_interpolation.jl>.)
+* Formatting
+  - Re-indent a file. This tests the handling of syntax trivia.
+* Refactoring
+  - A pass to rename local variables. This tests how information from further
+    down the compilation pipeline can be attached to the syntax tree and used
+    to modify the source code.
+* Precise error reporting in lowering
+  - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment
+    location `[a, b]`". But at a precise source location.
+  - Try something several layers deeper inside lowering? For example "macro
+    definition not allowed inside a local scope"
+* Incremental reparsing
+  - Reparse a source file, given a byte range replacement
+
+
+## Tree design
+
+### Raw syntax tree / Green tree
+
+Raw syntax tree (or ["Green tree"](https://ericlippert.com/2012/06/08/red-green-trees/)
+in the terminology from Roslyn)
+
+We want GreenNode to be
+* *structurally minimal* — For efficiency and generality
+* *immutable*            — For efficiency (& thread safety)
+* *complete*             — To preserve parser knowledge
+* *token agnostic*       — To allow use with any source language
+
+The simplest idea possible is to have:
+* Leaf nodes are a single token
+* Children are in source order
+
+Call represents a challenge for the AST vs Green tree in terms of node
+placement / iteration for infix operators vs normal prefix function calls.
+
+- The normal problem of `a + 1` vs `+(a, 1)`
+- Or worse, `a + 1 + 2` vs `+(a, 1, 2)`
+
+Clearly in the AST's *interface* we need to abstract over this placement. For
+example with something like the normal Julia AST's iteration order.
+
+### Abstract syntax tree
+
+By pointing to green tree nodes, AST nodes become traceable back to the original
+source.
+
+Unlike most languages, designing a new AST is tricky because the existing
+`Expr` is a very public API used in every macro expansion. User-defined
+macro expansions interpose between the source text and lowering, and using
+`Expr` looses source information in many ways.
+
+There seems to be a few ways forward:
+* Maybe we can give `Expr` some new semi-hidden fields to point back to the
+  green tree nodes that the `Expr` or its `args` list came from?
+* We can use the existing `Expr` during macro expansion and try to recover
+  source information after macro expansion using heuristics. Likely the
+  presence of correct hygiene can help with this.
+* Introducing a new AST would be possible if it were opt-in for some
+  hypothetical "new-style macros" only. Fixing hygiene should go along with
+  this. Design challenge: How do we make manipulating expressions reasonable
+  when literals need to carry source location?
+
+One option which may help bridge between locationless ASTs and something new
+may be to have wrappers for the small number of literal types we need to cover.
+For example:
+
+```julia
+SourceSymbol <: AbstractSymbol
+SourceInt    <: Integer
+SourceString <: AbstractString
+```
+
+Having source location attached to symbols would potentially solve most of the
+hygiene problem. There's still the problem of macro helper functions which use
+symbol literals; we can't very well be changing the meaning of `:x`! Perhaps
+the trick there is to try capturing the current module at the location of the
+interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to
+`Core._expr(:call, :+, :y, x)`, but it could expand it to something like
+`Core._expr(:call, :+, :y, _add_source_symbol(_module_we_are_lowering_into, x))`?
+
+## Parsing
+
+### Error recovery
+
+Some disorganized musings about error recovery
+
+Different types of errors seem to occur...
+
+* Disallowed syntax (such as lack of spaces in conditional expressions)
+  where we can reasonably just continue parsing and emit the node with an error
+  flag which is otherwise fully formed. In some cases like parsing infix
+  expressions with a missing tail, emitting a zero width error token can lead
+  to a fully formed parse tree without the productions up the stack needing to
+  participate in recovery.
+* A token which is disallowed in current context. Eg, `=` in parse_atom, or a
+  closing token inside an infix expression. Here we can emit a `K"error"`, but
+  we can't descend further into the parse tree; we must pop several recursive
+  frames off. Seems tricky!
+
+A typical structure is as follows:
+
+```julia
+function parse_foo(ps)
+    mark = position(ps)
+    parse_bar(ps)  # What if this fails?
+    if peek(ps) == K"some-token"
+        bump(ps)
+        parse_baz(ps)  # What if this fails?
+        emit(ps, mark, K"foo")
+    end
+end
+```
+
+Emitting plain error tokens are good in unfinished infix expressions:
+
+```julia
+begin
+    a = x +
+end
+```
+
+The "missing end" problem is tricky, as the intermediate syntax is valid; the
+problem is often only obvious until we get to EOF.
+
+Missing end
+```julia
+function f()
+    begin
+        a = 10
+end
+
+# <-- Indentation would be wrong if g() was an inner function of f.
+function g()
+end
+```
+
+It seems like ideal error recovery would need to backtrack in this case. For
+example:
+
+- Pop back to the frame which was parsing `f()`
+- Backtrack through the parse events until we find a function with indentation
+  mismatched to the nesting of the parent.
+- Reset ParseStream to a parsing checkpoint before `g()` was called
+- Emit error and exit the function parsing `f()`
+- Restart parsing
+- Somehow make sure all of this can't result in infinite recursion 😅
+
+Missing commas or closing brackets in nested structures also present the
+existing parser with a problem.
+
+```julia
+f(a,
+  g(b,
+    c    # -- missing comma?
+    d),
+  e)
+```
+
+Again the local indentation might tell a story
+
+```julia
+f(a,
+  g(b,
+    c    # -- missing closing `)` ?
+  d)
+```
+
+But not always!
+
+```julia
+f(a,
+  g(b,
+    c    # -- missing closing `,` ?
+  d))
+```
+
+Another particularly difficult problem for diagnostics in the current system is
+broken parentheses or double quotes in string interpolations, especially when
+nested.
+
+# Fun research questions
+
+### Parser Recovery
+
+Can we learn fast and reasonably accurate recovery heuristics for when the
+parser encounters broken syntax, rather than hand-coding these? How would we
+set the parser up so that training works and injecting the model is
+nonintrusive? If the model is embedded in and works together with the parser,
+can it be made compact enough that training is fast and the model itself is
+tiny?
+
+### Formatting
+
+Given source and syntax tree, can we regress/learn a generative model of
+indentation from the syntax tree?  Source formatting involves a big pile of
+heuristics to get something which "looks nice"... and ML systems have become
+very good at heuristics. Also, we've got huge piles of training data — just
+choose some high quality, tastefully hand-formatted libraries.
diff --git a/JuliaSyntax/docs/src/howto.md b/JuliaSyntax/docs/src/howto.md
new file mode 100644
index 0000000000000..c8bd0503591d5
--- /dev/null
+++ b/JuliaSyntax/docs/src/howto.md
@@ -0,0 +1,37 @@
+# How-To
+
+This section contains brief recipes for particular tasks
+
+## Use JuliaSyntax as the default parser
+
+To use JuliaSyntax as the default Julia parser for the REPL and to `include()`
+files, parse code with `Meta.parse()`, etc, put the following in your
+startup.jl file:
+
+```julia
+using JuliaSyntax
+JuliaSyntax.enable_in_core!()
+```
+
+This works well in Julia 1.9 but in Julia 1.8 will cause some startup latency.
+To reduce that you can create a custom system image by running the code in
+`./sysimage/compile.jl` as a Julia script (or directly using the shell, on
+unix). Then use `julia -J $resulting_sysimage`.
+
+Using a custom sysimage has the advantage that package precompilation will also
+go through the JuliaSyntax parser.
+
+### VSCode
+
+To use JuliaSyntax as the default parser for Julia within VSCode, add the
+following to your `startup.jl` file:
+
+```julia
+import JuliaSyntax
+JuliaSyntax.enable_in_core!()
+```
+
+To reduce startup latency you can combine with a custom system as described in
+the [Julia VScode docs](https://www.julia-vscode.org/docs/dev/userguide/compilesysimage/#Creating-a-sysimage-for-the-active-environment),
+combined with the precompile execution file in `sysimage/precompile_exec.jl` in the source tree.
+For additional detail see the discussion in [issue #128](https://github.com/JuliaLang/JuliaSyntax.jl/issues/128).
diff --git a/JuliaSyntax/docs/src/index.md b/JuliaSyntax/docs/src/index.md
new file mode 100644
index 0000000000000..79b8d83b62e6a
--- /dev/null
+++ b/JuliaSyntax/docs/src/index.md
@@ -0,0 +1,79 @@
+# JuliaSyntax.jl
+
+A Julia compiler frontend, written in Julia.
+
+A [talk from JuliaCon 2022](https://youtu.be/CIiGng9Brrk) covered some aspects
+of this package.
+
+## Examples
+
+Here's what parsing of a small piece of code currently looks like in various
+forms. We'll use the `JuliaSyntax.parsestmt` function to demonstrate, there's also
+`JuliaSyntax.parse!` offering more fine-grained control.
+
+First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means
+the `call` has the infix `-i` flag):
+
+```julia
+julia> using JuliaSyntax
+
+julia> parsestmt(SyntaxNode, "(x + y)*z", filename="foo.jl")
+line:col│ tree                                   │ file_name
+   1:1  │[call-i]                                │foo.jl
+   1:1  │  [parens]
+   1:2  │    [call-i]
+   1:2  │      x
+   1:4  │      +
+   1:6  │      y
+   1:8  │  *
+   1:9  │  z
+```
+
+Internally this has a full representation of all syntax trivia (whitespace and
+comments) as can be seen with the more raw ["green tree"](#raw-syntax-tree--green-tree)
+representation with `GreenNode`. Here ranges on the left are byte ranges, and
+`✔` flags nontrivia tokens. Note that the parentheses are trivia in the tree
+representation, despite being important for parsing.
+
+```julia
+julia> text = "(x + y)*z"
+       greentree = parsestmt(JuliaSyntax.GreenNode, text)
+     1:9      │[call]
+     1:7      │  [parens]
+     1:1      │    (
+     2:6      │    [call]
+     2:2      │      Identifier         ✔
+     3:3      │      Whitespace
+     4:4      │      +                  ✔
+     5:5      │      Whitespace
+     6:6      │      Identifier         ✔
+     7:7      │    )
+     8:8      │  *                      ✔
+     9:9      │  Identifier             ✔
+```
+
+`GreenNode` stores only byte ranges, but the token strings can be shown by
+supplying the source text string:
+
+```julia
+julia> show(stdout, MIME"text/plain"(), greentree, text)
+     1:9      │[call]
+     1:7      │  [parens]
+     1:1      │    (                        "("
+     2:6      │    [call]
+     2:2      │      Identifier         ✔   "x"
+     3:3      │      Whitespace             " "
+     4:4      │      +                  ✔   "+"
+     5:5      │      Whitespace             " "
+     6:6      │      Identifier         ✔   "y"
+     7:7      │    )                        ")"
+     8:8      │  *                      ✔   "*"
+     9:9      │  Identifier             ✔   "z"
+```
+
+Julia `Expr` can also be produced:
+
+```julia
+julia> JuliaSyntax.parsestmt(Expr, "(x + y)*z")
+:((x + y) * z)
+```
diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md
new file mode 100644
index 0000000000000..086bc57ad8224
--- /dev/null
+++ b/JuliaSyntax/docs/src/reference.md
@@ -0,0 +1,326 @@
+# Syntax Trees
+
+This section describes the syntax trees produced by JuliaSyntax, mainly in
+terms of their similarities and differences with the `Expr` tree data
+structures used since Julia 0.1.
+
+## JuliaSyntax trees vs `Expr`
+
+The tree structure of `GreenNode`/`SyntaxNode` is similar to Julia's `Expr`
+data structure but there are various differences:
+
+### Source ordered children
+
+The children of our trees are strictly in source order. This has many
+consequences in places where `Expr` reorders child expressions.
+
+* Infix and postfix operator calls have the operator name in the *second* child position. `a + b` is parsed as `(call-i a + b)` - where the infix `-i` flag indicates infix child position - rather than `Expr(:call, :+, :a, :b)`.
+* Generators are represented in source order as a single node rather than multiple nested flatten and generator expressions.
+
+### No `LineNumberNode`s
+
+Our syntax nodes inherently stores source position, so there's no need for the
+`LineNumberNode`s used by `Expr`.
+
+### More consistent / less redundant `block`s
+
+Sometimes `Expr` needs redundant block constructs to store `LineNumberNode`s,
+but we don't need these. Also in cases which do use blocks we try to use them
+consistently.
+
+* No block is used on the right hand side of short form function syntax
+* No block is used for the conditional in `elseif`
+* No block is used for the body of anonymous functions after the `->`
+* `let` argument lists always use a block regardless of number or form of bindings
+
+### Faithful representation of the source text / avoid premature lowering
+
+Some cases of "premature lowering" have been removed, preferring to represent
+the source text more closely.
+
+* `K"macrocall"` - allow users to easily distinguish macrocalls with parentheses from those without them (#218)
+* Grouping parentheses are represented with a node of kind `K"parens"` (#222)
+* The right hand side of `x where {T}` retains the `K"braces"` node around the `T` to distinguish it from `x where T`.
+* Ternary syntax is not immediately lowered to an `if` node: `a ? b : c` parses as `(? a b c)` rather than `Expr(:if, :a, :b, :c)` (#85)
+* `global const` and `const global` are not normalized by the parser. This is done in `Expr` conversion (#130)
+* [`do` syntax](#Do-blocks) is nested as the last child of the call which the `do` lambda will be passed to (#98, #322)
+* `@.` is not lowered to `@__dot__` inside the parser (#146)
+* Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217)
+* Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220)
+* `return` without a value has zero children, rather than lowering to `return nothing` (#220)
+* Command syntax `` `foo` `` parses into a `cmdstring` tree node wrapping the string, as `(cmdstring "foo")` (#438). These are lowered to a macro call later rather than by the parser.
+
+### Containers for string-like constructs
+
+String-like constructs always come within a container node, not as a single
+token. These are useful for tooling which works with the tokens of the source
+text. Also separating the delimiters from the text they delimit removes a whole
+class of tokenization errors and lets the parser deal with them.
+
+* string always use `K"string"` to wrap strings, even when they only contain a single string chunk (#94)
+* char literals are wrapped in the `K"char"` kind, containing the character literal string along with their delimiters (#121)
+* backticks use the `K"cmdstring"` kind
+* `var""` syntax uses `K"var"` as the head (#127)
+* The parser splits triple quoted strings into string chunks interspersed with whitespace trivia
+
+### Improvements for AST inconsistencies
+
+* Field access syntax like `a.b` is parsed as `(. a b)` rather than `(. a (quote b))` to avoid the inconsistency between this and actual quoted syntax literals like `:(b)` and `quote b end` ([#342](https://github.com/JuliaLang/JuliaSyntax.jl/issues/324))
+* Dotted call syntax like `f.(a,b)` and `a .+ b` has been made consistent with the `K"dotcall"` head (#90)
+* Standalone dotted operators are always parsed as `(. op)`. For example `.*(x,y)` is parsed as `(call (. *) x y)` (#240)
+* The `K"="` kind is used for keyword syntax rather than `kw`, to avoid various inconsistencies and ambiguities (#103)
+* Unadorned postfix adjoint is parsed as `call` rather than as a syntactic operator for consistency with suffixed versions like `x'ᵀ` (#124)
+* The argument list in the left hand side of `->` is always a tuple. For example, `x->y` parses as `(-> (tuple x) y)` rather than `(-> x y)` (#522)
+
+### Improvements to awkward AST forms
+
+* `FrankenTuple`s with multiple parameter blocks like `(a=1, b=2; c=3; d=4)` are flattened into the parent tuple instead of using nested `K"parameters"` nodes (#133)
+* Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234)
+* The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244)
+* We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220)
+* Iterations are represented with the `iteration` and `in` heads rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration (in i is)) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a nested `(iteration (in a as) (in b bs))` rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below.
+* Short form functions like `f(x) = x + 1` are represented with the `function` head rather than the `=` head. In this case the `SHORT_FORM_FUNCTION_FLAG` flag is set to allow the surface syntactic form to be easily distinguished from long form functions.
+* All kinds of updating assignment operators like `+=` are represented with a single `K"op="` head, with the operator itself in infix position. For example, `x += 1` is `(op= x + 1)`, where the plus token is of kind `K"Identifier"`. This greatly reduces the number of distinct forms here from a rather big list (`$=` `%=` `&=` `*=` `+=` `-=` `//=` `/=` `<<=` `>>=` `>>>=` `\=` `^=` `|=` `÷=` `⊻=`) and makes the operator itself appear in the AST as kind `K"Identifier"`, as it should. It also makes it possible to add further unicode updating operators while keeping the AST stable.
+
+## More detail on tree differences
+
+### Generators
+
+Flattened generators are uniquely problematic because the Julia AST doesn't
+respect a key rule we normally expect: that the children of an AST node are a
+*contiguous* range in the source text. For example, the `for`s in
+`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to
+mean
+
+```
+for x in xs
+    for y in ys
+        push!(xy, collection)
+    end
+end
+```
+
+so the `xy` prefix is in the *body* of the innermost for loop. Following this,
+the standard Julia AST is like so:
+
+```
+(flatten
+  (generator
+    (generator
+      xy
+      (= y ys))
+    (= x xs)))
+```
+
+however, note that if this tree were flattened, the order would be
+`(xy) (y in ys) (x in xs)` and the `x` and `y` iterations are *opposite* of the
+source order.
+
+However, our green tree is strictly source-ordered, so we must deviate from the
+Julia AST. We deal with this by grouping cartesian products of iterators
+(separated by commas) within `iteration` blocks as in `for` loops, and
+use the length of the `iteration` block rather than the `flatten` head to
+distinguish flattened iterators. The nested flattens and generators of `Expr`
+forms are reconstructed later. In this form the tree structure resembles the
+source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as
+
+```
+(generator
+  xy
+  (iteration (in x xs))
+  (iteration (in y ys)))
+```
+
+And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as
+
+```
+(generator
+  xy
+  (iteration (in x xs) (in y ys)))
+```
+
+### Whitespace trivia inside strings
+
+For triple quoted strings, the indentation isn't part of the string data so
+should also be excluded from the string content within the green tree. That is,
+it should be treated as separate whitespace trivia tokens. With this separation
+things like formatting should be much easier. The same reasoning goes for
+escaping newlines and following whitespace with backslashes in normal strings.
+
+Detecting string trivia during parsing means that string content is split over
+several tokens. Here we wrap these in the K"string" kind (as is already used
+for interpolations). The individual chunks can then be reassembled during Expr
+construction. (A possible alternative might be to reuse the K"String" and
+K"CmdString" kinds for groups of string chunks (without interpolation).)
+
+Take as an example the following Julia fragment.
+
+```julia
+x = """
+    $a
+    b"""
+```
+
+Here this is parsed as `(= x (string-s a "\n" "b"))` (the `-s` flag in
+`string-s` means "triple quoted string")
+
+Looking at the green tree, we see the indentation before the `$a` and `b` are
+marked as trivia:
+
+```
+julia> text = "x = \"\"\"\n    \$a\n    b\"\"\""
+       show(stdout, MIME"text/plain"(), parseall(GreenNode, text, rule=:statement), text)
+     1:23     │[=]
+     1:1      │  Identifier             ✔   "x"
+     2:2      │  Whitespace                 " "
+     3:3      │  =                          "="
+     4:4      │  Whitespace                 " "
+     5:23     │  [string]
+     5:7      │    """                      "\"\"\""
+     8:8      │    String                   "\n"
+     9:12     │    Whitespace               "    "
+    13:13     │    $                        "\$"
+    14:14     │    Identifier           ✔   "a"
+    15:15     │    String               ✔   "\n"
+    16:19     │    Whitespace               "    "
+    20:20     │    String               ✔   "b"
+    21:23     │    """                      "\"\"\""
+```
+
+### String nodes always wrapped in `K"string"` or `K"cmdstring"`
+
+All strings are surrounded by a node of kind `K"string"`, even non-interpolated
+literals, so `"x"` parses as `(string "x")`. This makes string handling simpler
+and more systematic because interpolations and triple strings with embedded
+trivia don't need to be treated differently. It also gives a container in which
+to attach the delimiting quotes.
+
+The same goes for command strings which are always wrapped in `K"cmdstring"`
+regardless of whether they have multiple pieces (due to triple-quoted
+dedenting) or otherwise.
+
+### Do blocks
+
+`do` syntax is represented in the `Expr` AST with the `do` outside the call.
+This makes some sense syntactically (do appears as "an operator" after the
+function call).
+
+However semantically this nesting is awkward because the lambda represented by
+the do block is passed to the call. This same problem occurs for the macro form
+`@f(x) do \n body end` where the macro expander needs a special rule to expand
+nestings of the form `Expr(:do, Expr(:macrocall ...), ...)`, rearranging the
+expression which are passed to this macro call rather than passing the
+expressions up the tree.
+
+The implied closure is also lowered to a nested `Expr(:->)` expression, though
+it this somewhat premature to do this during parsing.
+
+To resolve these problems we parse
+
+    @f(x, y) do a, b\n body\n end
+    f(x, y) do a, b\n body\n end
+
+by tacking the `do` onto the end of the call argument list:
+
+    (macrocall @f x y (do (tuple a b) body))
+    (call f x y (do (tuple a b) body))
+
+This achieves the following desirable properties
+1. Content of `do` is nested inside the call which improves the match between AST and semantics
+2. Macro can be passed the syntax as-is rather than the macro expander rearranging syntax before passing it to the macro
+3. In the future, a macro can detect when it's being passed do syntax rather than lambda syntax
+4. `do` head is used uniformly for both call and macrocall
+5. We preserve the source ordering properties we need for the green tree.
+
+## Tree structure reference
+
+This section may eventually contain a full description of the Julia AST. For
+now, we describe a few of the more subtle features.
+
+### Concatenation syntax
+
+Concatenation syntax comes in two syntax forms:
+* The traditional `hcat`/`vcat`/`row` which deal with concatenation or matrix
+  construction along dimensions one and two.
+* The new `ncat`/`nrow` syntax which deals with concatenation or array
+  construction along arbitrary dimensions.
+
+We write `ncat-3` for concatenation along the third dimension. (The `3` is
+stored in the head flags for `SyntaxNode` trees, and in the first `arg` for
+`Expr` trees.) Semantically the new syntax can work like the old:
+* `ncat-1` is the same as `vcat`
+* `ncat-2` is the same as `hcat`
+* `row` is the same as `nrow-2`
+
+#### Vertical concatenation (dimension 1)
+
+Vertical concatenation along dimension 1 can be done with semicolons or newlines
+
+```julia-repl
+julia> print_tree(:([a
+                     b]))
+Expr(:vcat)
+├─ :a
+└─ :b
+
+julia> print_tree(:([a ; b]))
+Expr(:vcat)
+├─ :a
+└─ :b
+```
+
+#### Horizontal concatenation (dimension 2)
+
+For horizontal concatenation along dimension 2, use spaces or double semicolons
+
+```julia-repl
+julia> print_tree(:([a b]))
+Expr(:hcat)
+├─ :a
+└─ :b
+
+julia> print_tree(:([a ;; b]))
+Expr(:ncat)
+├─ 2
+├─ :a
+└─ :b
+```
+
+#### Mixed concatenation
+
+Concatenation along dimensions 1 and 2 can be done with spaces and single
+semicolons or newlines, producing a mixture of `vcat` and `row` expressions:
+
+```julia-repl
+julia> print_tree(:([a b
+                     c d]))
+# OR
+julia> print_tree(:([a b ; c d]))
+Expr(:vcat)
+├─ Expr(:row)
+│  ├─ :a
+│  └─ :b
+└─ Expr(:row)
+   ├─ :c
+   └─ :d
+```
+
+General n-dimensional concatenation results in nested `ncat` and `nrow`, for
+example
+
+```julia-repl
+julia> print_tree(:([a ; b ;; c ; d ;;; x]))
+Expr(:ncat)
+├─ 3
+├─ Expr(:nrow)
+│  ├─ 2
+│  ├─ Expr(:nrow)
+│  │  ├─ 1
+│  │  ├─ :a
+│  │  └─ :b
+│  └─ Expr(:nrow)
+│     ├─ 1
+│     ├─ :c
+│     └─ :d
+└─ :x
+```
diff --git a/JuliaSyntax/prototypes/simple_parser.jl b/JuliaSyntax/prototypes/simple_parser.jl
new file mode 100644
index 0000000000000..06a408a26860a
--- /dev/null
+++ b/JuliaSyntax/prototypes/simple_parser.jl
@@ -0,0 +1,174 @@
+# Example parser for a very basic Julia-like language of expressions, calls and
+# function definitions.
+
+using JuliaSyntax: @K_str, is_literal, is_keyword, is_operator
+
+function parse_toplevel(st)
+    mark = position(st)
+    while true
+        bump_trivia(st, skip_newlines=true)
+        if peek(st) == K"EndMarker"
+            break
+        end
+        parse_statement(st)
+    end
+    emit(st, mark, K"toplevel")
+end
+
+function parse_statement(st)
+    mark = position(st)
+    if peek(st) == K"function"
+        parse_function_def(st)
+    else
+        parse_assignment(st)
+    end
+end
+
+function parse_function_def(st)
+    mark = position(st)
+    @assert peek(st) == K"function"
+    bump(st, TRIVIA_FLAG)
+    parse_call(st)
+    parse_block(st, K"end")
+    emit(st, mark, K"function")
+end
+
+function parse_block(st, closing_kind, mark=position(st))
+    while true
+        bump_trivia(st, skip_newlines=true)
+        if peek(st) == closing_kind
+            bump(st, TRIVIA_FLAG)
+            break
+        elseif peek(st) == K"EndMarker"
+            emit_diagnostic(st, error="Unexpected end of input")
+            break
+        end
+        parse_assignment(st)
+    end
+    emit(st, mark, K"block")
+end
+
+function parse_assignment(st)
+    mark = position(st)
+    parse_expression(st)
+    if peek(st) == K"="
+        bump(st, TRIVIA_FLAG)
+        parse_expression(st)
+        emit(st, mark, K"=")
+    end
+end
+
+function parse_expression(st)
+    mark = position(st)
+    parse_term(st)
+    while peek(st) in (K"+", K"-")
+        bump(st)
+        parse_term(st)
+        emit(st, mark, K"call", INFIX_FLAG)
+    end
+end
+
+function parse_term(st)
+    mark = position(st)
+    parse_call(st)
+    while peek(st) in (K"*", K"/")
+        bump(st)
+        parse_call(st)
+        emit(st, mark, K"call", INFIX_FLAG)
+    end
+end
+
+function parse_call(st)
+    mark = position(st)
+    parse_atom(st)
+    if peek(st) == K"("
+        bump(st, TRIVIA_FLAG)
+        need_comma = false
+        while true
+            k = peek(st)
+            if need_comma && k == K","
+                bump(st, TRIVIA_FLAG)
+                k = peek(st)
+                need_comma = false
+            end
+            if k == K")"
+                bump(st, TRIVIA_FLAG)
+                break
+            elseif k == K"EndMarker"
+                emit_diagnostic(st, error="Unexpected end of input")
+                break
+            elseif need_comma
+                bump_invisible(st, K"error", TRIVIA_TOKEN, error="Expected a `,`")
+            end
+            parse_expression(st)
+            need_comma = true
+        end
+        emit(st, mark, K"call")
+    end
+end
+
+function parse_atom(st)
+    bump_trivia(st, skip_newlines=true)
+    mark = position(st)
+    k = peek(st)
+    if k == K"Identifier" || is_literal(k)
+        bump(st)
+    elseif k in (K"-", K"+")
+        bump(st)
+        parse_atom(st)
+        emit(st, mark, K"call")
+    elseif k == K"("
+        bump(st, TRIVIA_FLAG)
+        parse_expression(st)
+        if peek(st) == K")"
+            bump(st, TRIVIA_FLAG)
+            # emit(st, mark, K"(")
+        else
+            bump_invisible(st, K"error", TRIVIA_FLAG,
+                           error="Expected `)` following expression")
+        end
+    elseif k == K"begin"
+        bump(st, TRIVIA_FLAG)
+        parse_block(st, K"end", mark)
+    else
+        bump(st)
+        emit(st, mark, K"error",
+             error="Expected literal, identifier or opening parenthesis")
+    end
+end
+
+function parse_and_show(production::Function, code)
+    st = ParseStream(code)
+    production(st)
+    t = JuliaSyntax.build_tree(GreenNode, st)
+    show(stdout, MIME"text/plain"(), t, code, show_trivia=true)
+    if !isempty(st.diagnostics)
+        println()
+        for d in st.diagnostics
+            JuliaSyntax.show_diagnostic(stdout, d, code)
+        end
+    end
+    t
+end
+
+println()
+println("Example good parse:")
+parse_and_show(parse_toplevel,
+               """
+               function f(x, y)
+                   z = x - y
+                   begin
+                       a
+                       b
+                   end
+                   z * z
+               end
+
+               f(1,2)
+               """)
+
+println()
+println("Example diagnostics:")
+parse_and_show(parse_expression, "(x + a*y) * (b")
+
+nothing
diff --git a/JuliaSyntax/prototypes/syntax_interpolation.jl b/JuliaSyntax/prototypes/syntax_interpolation.jl
new file mode 100644
index 0000000000000..eddf6748bd423
--- /dev/null
+++ b/JuliaSyntax/prototypes/syntax_interpolation.jl
@@ -0,0 +1,56 @@
+# # Macros and expression interpolation
+
+using JuliaSyntax: SourceFile, SyntaxNode, parseall, child, setchild!
+
+# The following shows that SyntaxNode works nicely for simple macros which
+# just interpolate expressions into one another. In particular it shows how
+# precise source information from multiple files can coexist within the same
+# syntax tree.
+
+# First, here's the functionality that we're going to implement as a normal
+# Julia macro. It's similar to the standard @show macro.
+macro show2(ex)
+    name = sprint(Base.show_unquoted, ex)
+    quote
+        value = $(esc(ex))
+        println($name, " = ", value)
+        value
+    end
+end
+
+# Now, let's implement the same expression interpolation but using SyntaxNode
+# (and with a normal Julia function which we need to use, absent any deeper
+# integration with the Julia runtime)
+function at_show2(ex::SyntaxNode)
+    name = sprint(show, MIME"text/x.sexpression"(), ex)
+    quote
+        value = $(esc(ex))
+        println($name, " = ", value)
+        value
+    end
+    # The following emulates the expression interpolation lowering which is
+    # usually done by the compiler.
+    # 1. Extract the expression literal as `block`
+    tree = parseall(SyntaxNode, String(read(@__FILE__)), filename=@__FILE__)
+    block = child(tree, 3, 2, 2, 1)
+    # 2. Interpolate local variables into the block at positions of $'s
+    # Interpolating a SyntaxNode `ex` is simple:
+    setchild!(block, (1, 2), ex)
+    # The interpolation of a Julia *value* should inherit the source location
+    # of the $ interpolation expression. This is different to when substituting
+    # in a SyntaxNode which should just be inserted as-is.
+    setchild!(block, (2, 2),
+              JuliaSyntax.interpolate_literal(child(block, 2, 2), name))
+    block
+end
+
+# Usage of at_show2()
+
+# Let's have some simple expression to pass to at_show2. This will be
+# attributed to a different file foo.jl
+s2 = parseall(SyntaxNode, "foo +\n42", filename="foo.jl", rule=:statement)
+
+# Calling at_show2, we see that the precise source information is preserved for
+# both the surrounding expression and the interpolated fragments.
+println("\nInterpolation example")
+s3 = at_show2(s2)
diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl
new file mode 100644
index 0000000000000..da5861c0d5b62
--- /dev/null
+++ b/JuliaSyntax/src/JuliaSyntax.jl
@@ -0,0 +1,108 @@
+module JuliaSyntax
+
+macro _public(syms)
+    if VERSION >= v"1.11"
+        names = syms isa Symbol ? [syms] : syms.args
+        esc(Expr(:public, names...))
+    else
+        nothing
+    end
+end
+
+# Public API, in the order of docs/src/api.md
+
+# Parsing.
+export parsestmt,
+    parseall,
+    parseatom
+
+@_public parse!,
+    ParseStream,
+    build_tree
+
+# Tokenization
+export tokenize,
+    Token,
+    untokenize
+
+# Source file handling
+@_public sourcefile,
+    byte_range,
+    char_range,
+    first_byte,
+    last_byte,
+    filename,
+    source_line,
+    source_location,
+    sourcetext,
+    highlight
+
+export SourceFile
+@_public source_line_range
+
+# Expression predicates, kinds and flags
+export @K_str, kind
+@_public Kind
+
+@_public flags,
+    SyntaxHead,
+    head,
+    is_trivia,
+    is_prefix_call,
+    is_infix_op_call,
+    is_prefix_op_call,
+    is_postfix_op_call,
+    is_dotted,
+    is_suffixed,
+    is_decorated,
+    numeric_flags,
+    has_flags,
+    TRIPLE_STRING_FLAG,
+    RAW_STRING_FLAG,
+    PARENS_FLAG,
+    COLON_QUOTE,
+    TOPLEVEL_SEMICOLONS_FLAG,
+    MUTABLE_FLAG,
+    BARE_MODULE_FLAG,
+    SHORT_FORM_FUNCTION_FLAG
+
+# Syntax trees
+@_public is_leaf,
+    numchildren,
+    children
+
+export SyntaxNode
+
+@_public GreenNode, RedTreeCursor, GreenTreeCursor,
+    span
+
+# Helper utilities
+include("utils.jl")
+
+include("julia/kinds.jl")
+
+# Lexing uses a significantly modified version of Tokenize.jl
+include("julia/tokenize.jl")
+
+# Source and diagnostics
+include("core/source_files.jl")
+include("core/diagnostics.jl")
+
+# Parsing
+include("core/parse_stream.jl")
+include("core/tree_cursors.jl")
+include("julia/julia_parse_stream.jl")
+include("julia/parser.jl")
+include("julia/parser_api.jl")
+include("julia/literal_parsing.jl")
+
+# Tree data structures
+include("porcelain/green_node.jl")
+include("porcelain/syntax_tree.jl")
+include("integration/expr.jl")
+
+# Hooks to integrate the parser with Base
+include("integration/hooks.jl")
+include("precompile.jl")
+
+end
diff --git a/JuliaSyntax/src/core/diagnostics.jl b/JuliaSyntax/src/core/diagnostics.jl
new file mode 100644
index 0000000000000..39fa473fed2f9
--- /dev/null
+++ b/JuliaSyntax/src/core/diagnostics.jl
@@ -0,0 +1,110 @@
+"""
+    Diagnostic(first_byte, last_byte; [error="msg" | warning="msg"])
+
+A diagnostic message, referring to the source code byte range
+first_byte:last_byte, with a `warning` or `error` message.
+
+Messages should be concise, matter-of-fact and not include decorations:
+
+* Concise: "Show don't tell". Where possible, let's show the user what's wrong
+  by annotating their original source code via the byte range.
+* Matter-of-fact: Admonishing the user isn't helpful. Let's gently show them
+  what's wrong instead, using a neutral tone.
+* Decorations: Capitalization, punctuation and diagnostic class ("error" /
+  "warning") should be omitted. These decorations will be added by the
+  formatting code.
+
+TODO: At some point we should enhance Diagnostic to allow multiple sub-ranges
+for better annotation. Let's follow the excellent precedent set by Rust's
+[rustc_errors::Diagnostic](https://doc.rust-lang.org/stable/nightly-rustc/rustc_errors/struct.Diagnostic.html).
+
+TODO: We should cater for extended descriptions containing multiple sentences
+via a diagnostic code which can be used to look up detailed information. Again,
+Rust does this well.
+"""
+struct Diagnostic
+    first_byte::Int
+    last_byte::Int
+    level::Symbol
+    message::String
+end
+
+function Diagnostic(first_byte, last_byte; error=nothing, warning=nothing)
+    message = !isnothing(error)   ? error :
+              !isnothing(warning) ? warning :
+              Base.error("No message in diagnostic")
+    level = !isnothing(error) ? :error : :warning
+    Diagnostic(first_byte, last_byte, level, message)
+end
+
+byte_range(d::Diagnostic) = d.first_byte:d.last_byte
+is_error(d::Diagnostic)   = d.level === :error
+
+# Make relative path into a file URL
+function _file_url(filename)
+    try
+        @static if Sys.iswindows()
+            # TODO: Test this with windows terminal
+            path = replace(abspath(filename), '\\'=>'/')
+        else
+            path = abspath(filename)
+        end
+        return "file://$(path)"
+    catch exc
+        # abspath may fail if working directory doesn't exist
+        # TODO: It seems rather non-ideal to have the behavior here depend on
+        # the state of the local filesystem. And yet links in diagnostics seem
+        # useful.
+        #
+        # Ideally it'd be up to the caller to provide some notion of the
+        # "absolute location" of the source code resource when SourceFile is
+        # constructed. This is often not related to the local filesystem - it
+        # could be in memory, a fragment embedded in another file, etc etc.
+        return nothing
+    end
+end
+
+function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile)
+    color,prefix = diagnostic.level === :error   ? (:light_red, "Error")      :
+                   diagnostic.level === :warning ? (:light_yellow, "Warning") :
+                   diagnostic.level === :note    ? (:light_blue, "Note")      :
+                   (:normal, "Info")
+    line, col = source_location(source, first_byte(diagnostic))
+    linecol = "$line:$col"
+    fname = filename(source)
+    file_href = nothing
+    if !isempty(fname)
+        locstr = "$fname:$linecol"
+        if !startswith(fname, "REPL[") && get(io, :color, false)
+            url = _file_url(fname)
+            if !isnothing(url)
+                file_href = url*"#$linecol"
+            end
+        end
+    else
+        locstr = "line $linecol"
+    end
+    _printstyled(io, "# $prefix @ ", fgcolor=:light_black)
+    _printstyled(io, "$locstr", fgcolor=:light_black, href=file_href)
+    print(io, "\n")
+    highlight(io, source, byte_range(diagnostic),
+              note=diagnostic.message, notecolor=color,
+              context_lines_before=1, context_lines_after=0)
+end
+
+function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, source::SourceFile)
+    first = true
+    for d in diagnostics
+        first || println(io)
+        first = false
+        show_diagnostic(io, d, source)
+    end
+end
+
+function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, text::AbstractString)
+    show_diagnostics(io, diagnostics, SourceFile(text))
+end
+
+function any_error(diagnostics::AbstractVector{Diagnostic})
+    any(is_error(d) for d in diagnostics)
+end
diff --git a/JuliaSyntax/src/core/parse_stream.jl b/JuliaSyntax/src/core/parse_stream.jl
new file mode 100644
index 0000000000000..7bcd5745b8a48
--- /dev/null
+++ b/JuliaSyntax/src/core/parse_stream.jl
@@ -0,0 +1,999 @@
+#-------------------------------------------------------------------------------
+# Flags hold auxiliary information about tokens/nonterminals which the Kind
+# doesn't capture in a nice way.
+#
+# TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias?
+const RawFlags = UInt16
+const EMPTY_FLAGS = RawFlags(0)
+
+# Set for tokens or ranges which are syntax trivia after parsing
+const TRIVIA_FLAG = RawFlags(1<<0)
+
+"""
+Set for nodes that are non-terminals
+"""
+const NON_TERMINAL_FLAG = RawFlags(1<<7)
+
+function remove_flags(n::RawFlags, fs...)
+    RawFlags(n & ~(RawFlags((|)(fs...))))
+end
+
+"""
+    has_flags(x, test_flags)
+
+Return true if any of `test_flags` are set.
+"""
+has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0
+
+#-------------------------------------------------------------------------------
+"""
+    SyntaxHead(kind, flags)
+
+A `SyntaxHead` combines the [`Kind`](@ref) of a syntactic construct with a set
+of flags. The kind defines the broad "type" of the syntactic construct, while
+the flag bits compactly store more detailed information about the construct.
+"""
+struct SyntaxHead
+    kind::Kind
+    flags::RawFlags
+end
+
+kind(head::SyntaxHead) = head.kind
+
+"""
+    flags(x)
+
+Return the flag bits of a syntactic construct. Prefer to query these with the
+predicates `is_trivia`, `is_prefix_call`, `is_infix_op_call`,
+`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`, `is_suffixed`,
+`is_decorated`.
+
+Or extract numeric portion of the flags with `numeric_flags`.
+"""
+flags(head::SyntaxHead) = head.flags
+
+function Base.summary(head::SyntaxHead)
+    untokenize(head, unique=false, include_flag_suff=false)
+end
+
+#-------------------------------------------------------------------------------
+# Generic interface for types `T` which have kind and flags. Either:
+# 1. Define kind(::T) and flags(::T), or
+# 2. Define head(::T) to return a type like `SyntaxKind` for which `kind` and
+#    `flags` are defined
+kind(x)  = kind(head(x))
+flags(x) = flags(head(x))
+
+# Predicates based on flags()
+has_flags(x, test_flags) = has_flags(flags(x), test_flags)
+call_type_flags(x) = call_type_flags(flags(x))
+
+"""
+    is_trivia(x)
+
+Return true for "syntax trivia": tokens in the tree which are either largely
+invisible to the parser (eg, whitespace) or implied by the structure of the AST
+(eg, reserved words).
+"""
+is_trivia(x) = has_flags(x, TRIVIA_FLAG)
+
+#-------------------------------------------------------------------------------
+"""
+`SyntaxToken` is a token covering a contiguous byte range in the input text.
+
+We record only the `next_byte` here (the index of the next byte *after* the
+token) to avoid duplication of data between neighbouring tokens. This is more
+useful than recording the first byte, as it allows an initial fixed sentinel
+token to be used for recording the first byte of the first real token.
+"""
+struct SyntaxToken
+    head::SyntaxHead
+    orig_kind::Kind
+    preceding_whitespace::Bool
+    next_byte::UInt32
+end
+
+function Base.show(io::IO, tok::SyntaxToken)
+    print(io, rpad(untokenize(tok.head, unique=false), 15), " |", tok.next_byte)
+end
+
+head(tok::SyntaxToken) = tok.head
+preceding_whitespace(tok::SyntaxToken) = tok.preceding_whitespace
+
+
+#-------------------------------------------------------------------------------
+
+"""
+    RawGreenNode(head::SyntaxHead, byte_span::UInt32, orig_kind::Kind) # Terminal
+    RawGreenNode(head::SyntaxHead, byte_span::UInt32, nchildren::UInt32) # Non-terminal
+
+A "green tree" is a lossless syntax tree which overlays all the source text.
+The most basic properties of a green tree are that:
+
+* Nodes cover a contiguous span of bytes in the text
+* Sibling nodes are ordered in the same order as the text
+
+As implementation choices, we choose that:
+
+* Nodes are immutable and don't know their parents or absolute position, so can
+  be cached and reused
+* Nodes are homogeneously typed at the language level so they can be stored
+  concretely, with the `head` defining the node type. Normally this would
+  include a "syntax kind" enumeration, but it can also include flags and record
+  information the parser knew about the layout of the child nodes.
+* For simplicity and uniformity, leaf nodes cover a single token in the source.
+  This is like rust-analyzer, but different from Roslyn where leaves can
+  include syntax trivia.
+* The parser produces a single buffer of `RawGreenNode` which encodes the tree.
+  There are higher level accessors, which make working with this tree easier.
+"""
+struct RawGreenNode
+    head::SyntaxHead                  # Kind,flags
+    byte_span::UInt32                 # Number of bytes covered by this range
+    # If NON_TERMINAL_FLAG is set, this is the total number of child nodes
+    # Otherwise this is a terminal node (i.e. a token) and this is orig_kind
+    node_span_or_orig_kind::UInt32
+
+    # Constructor for terminal nodes (tokens)
+    function RawGreenNode(head::SyntaxHead, byte_span::Integer, orig_kind::Kind)
+        @assert (flags(head) & NON_TERMINAL_FLAG) == 0
+        new(head, UInt32(byte_span), UInt32(reinterpret(UInt16, orig_kind)))
+    end
+
+    # Constructor for non-terminal nodes - automatically sets NON_TERMINAL_FLAG
+    function RawGreenNode(head::SyntaxHead, byte_span::Integer, node_span::Integer)
+        h = SyntaxHead(kind(head), flags(head) | NON_TERMINAL_FLAG)
+        new(h, UInt32(byte_span), UInt32(node_span))
+    end
+
+    global reset_node
+    function reset_node(node::RawGreenNode, kind, flags)
+        new(_reset_node_head(node, kind, flags),
+            getfield(node, :byte_span),
+            getfield(node, :node_span_or_orig_kind))
+    end
+end
+
+function _reset_node_head(node, k, f)
+    if !isnothing(f)
+        f = RawFlags(f)
+        @assert (f & NON_TERMINAL_FLAG) == 0
+        f |= flags(node) & NON_TERMINAL_FLAG
+    else
+        f = flags(node)
+    end
+    h = SyntaxHead(isnothing(k) ? kind(node) : k, f)
+end
+
+Base.summary(node::RawGreenNode) = summary(node.head)
+function Base.show(io::IO, node::RawGreenNode)
+    print(io, summary(node), " (", node.byte_span, " bytes,")
+    if is_terminal(node)
+        print(io, " orig_kind=", node.orig_kind, ")")
+    else
+        print(io, " ", node.node_span, " children)")
+    end
+end
+
+function Base.getproperty(rgn::RawGreenNode, name::Symbol)
+    if name === :node_span
+        has_flags(getfield(rgn, :head), NON_TERMINAL_FLAG) || return UInt32(0) # Leaf nodes have no children
+        return getfield(rgn, :node_span_or_orig_kind)
+    elseif name === :orig_kind
+        has_flags(getfield(rgn, :head), NON_TERMINAL_FLAG) && error("Cannot access orig_kind for non-terminal node")
+        return Kind(getfield(rgn, :node_span_or_orig_kind))
+    end
+    getfield(rgn, name)
+end
+
+head(range::RawGreenNode) = range.head
+
+# Helper functions for unified output
+is_terminal(node::RawGreenNode) = !has_flags(node.head, NON_TERMINAL_FLAG)
+is_non_terminal(node::RawGreenNode) = has_flags(node.head, NON_TERMINAL_FLAG)
+
+#-------------------------------------------------------------------------------
+struct ParseStreamPosition
+    """
+    The current position in the byte stream, i.e. the byte at `byte_index` is
+    the first byte of the next token to be parsed.
+    """
+    byte_index::UInt32
+    """
+    The total number of nodes (terminal + non-terminal) in the output so far.
+    """
+    node_index::UInt32
+end
+
+const NO_POSITION = ParseStreamPosition(0, 0)
+
+#-------------------------------------------------------------------------------
+"""
+    ParseStream(text::AbstractString,          index::Integer=1; version=VERSION)
+    ParseStream(text::IO;                                        version=VERSION)
+    ParseStream(text::Vector{UInt8},           index::Integer=1; version=VERSION)
+    ParseStream(ptr::Ptr{UInt8}, len::Integer, index::Integer=1; version=VERSION)
+
+Construct a `ParseStream` from input which may come in various forms:
+* An string (zero copy for `String` and `SubString`)
+* An `IO` object (zero copy for `IOBuffer`). The `IO` object must be seekable.
+* A buffer of bytes (zero copy). The caller is responsible for preserving
+  buffers passed as `(ptr,len)`.
+
+A byte `index` may be provided as the position to start parsing.
+
+ParseStream provides an IO interface for the parser which provides lexing of
+the source text input into tokens, manages insignificant whitespace tokens on
+behalf of the parser, and stores output tokens and tree nodes in a pair of
+output arrays.
+
+`version` (default `VERSION`) may be used to set the syntax version to
+any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been
+added after v"1.0", emitting an error if it's not compatible with the requested
+`version`.
+"""
+mutable struct ParseStream
+    # `textbuf` is a buffer of UTF-8 encoded text of the source code. This is a
+    # natural representation as we desire random access and zero-copy parsing
+    # of UTF-8 text from various containers, and unsafe_wrap(Vector{UInt8},
+    # ...) allows us to use a Vector here.
+    #
+    # We want `ParseStream` to be concrete so that all `parse_*` functions only
+    # need to be compiled once. Thus `textbuf` must not be parameterized here.
+    textbuf::Vector{UInt8}
+    # GC root for the object which owns the memory in `textbuf`. `nothing` if
+    # the `textbuf` owner was unknown (eg, ptr,length was passed)
+    text_root::Any
+    # Lexer, transforming the input bytes into a token stream
+    lexer::Tokenize.Lexer{IOBuffer}
+    # Lookahead buffer for already lexed tokens
+    lookahead::Vector{SyntaxToken}
+    lookahead_index::Int
+    # Pool of stream positions for use as working space in parsing
+    position_pool::Vector{Vector{ParseStreamPosition}}
+    output::Vector{RawGreenNode}
+    # Current byte position in the output (the next byte to be written)
+    next_byte::Int
+    # Parsing diagnostics (errors/warnings etc)
+    diagnostics::Vector{Diagnostic}
+    # Counter for number of peek()s we've done without making progress via a bump()
+    peek_count::Int
+    # (major,minor) version of Julia we're parsing this code for.
+    # May be different from VERSION!
+    version::Tuple{Int,Int}
+
+    function ParseStream(text_buf::Vector{UInt8}, text_root, next_byte::Integer,
+                         version::VersionNumber)
+        io = IOBuffer(text_buf)
+        seek(io, next_byte-1)
+        lexer = Tokenize.Lexer(io)
+        # To avoid keeping track of the exact Julia development version where new
+        # features were added or comparing prerelease strings, we treat prereleases
+        # or dev versions as the release version using only major and minor version
+        # numbers. This means we're inexact for old dev versions but that seems
+        # like an acceptable tradeoff.
+        ver = (version.major, version.minor)
+        # Initial sentinel node (covering all ignored bytes before the first token)
+        sentinel = RawGreenNode(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), next_byte-1, K"TOMBSTONE")
+        new(text_buf,
+            text_root,
+            lexer,
+            Vector{SyntaxToken}(),
+            1,
+            Vector{Vector{ParseStreamPosition}}(),
+            RawGreenNode[sentinel],
+            next_byte,  # Initialize next_byte from the parameter
+            Vector{Diagnostic}(),
+            0,
+            ver)
+    end
+end
+
+function ParseStream(text::Vector{UInt8}, index::Integer=1; version=VERSION)
+    ParseStream(text, text, index, version)
+end
+
+# Buffer with unknown owner. Not exactly recommended, but good for C interop
+function ParseStream(ptr::Ptr{UInt8}, len::Integer, index::Integer=1; version=VERSION)
+    ParseStream(unsafe_wrap(Vector{UInt8}, ptr, len), nothing, index, version)
+end
+
+# Buffers originating from strings
+function ParseStream(text::String, index::Integer=1; version=VERSION)
+    ParseStream(unsafe_wrap(Vector{UInt8}, text),
+                text, index, version)
+end
+function ParseStream(text::SubString{String}, index::Integer=1; version=VERSION)
+    # See also IOBuffer(SubString("x"))
+    ParseStream(unsafe_wrap(Vector{UInt8}, pointer(text), sizeof(text)),
+                text, index, version)
+end
+function ParseStream(text::AbstractString, index::Integer=1; version=VERSION)
+    ParseStream(String(text), index; version=version)
+end
+
+# IO-based cases
+# TODO: switch ParseStream to use a Memory internally on newer versions of Julia
+VERSION < v"1.11.0-DEV.753" && function ParseStream(io::IOBuffer; version=VERSION)
+    ParseStream(io.data, io, position(io)+1, version)
+end
+function ParseStream(io::Base.GenericIOBuffer; version=VERSION)
+    textbuf = unsafe_wrap(Vector{UInt8}, pointer(io.data), length(io.data))
+    ParseStream(textbuf, io, position(io)+1, version)
+end
+function ParseStream(io::IO; version=VERSION)
+    textbuf = read(io)
+    ParseStream(textbuf, textbuf, 1, version)
+end
+
+function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream)
+    println(io, "ParseStream at position $(stream.next_byte)")
+end
+
+function show_diagnostics(io::IO, stream::ParseStream)
+    show_diagnostics(io, stream.diagnostics, SourceFile(stream))
+end
+
+# We manage a pool of stream positions as parser working space
+function acquire_positions(stream)
+    if isempty(stream.position_pool)
+        return Vector{ParseStreamPosition}()
+    end
+    pop!(stream.position_pool)
+end
+
+function release_positions(stream, positions)
+    empty!(positions)
+    push!(stream.position_pool, positions)
+end
+
+#-------------------------------------------------------------------------------
+# Return true when a terminal (token) was emitted last at stream position `pos`
+function token_is_last(stream, pos)
+    # In the unified structure, check if the node at pos is a terminal
+    return pos.node_index > 0 && pos.node_index <= length(stream.output) &&
+           is_terminal(stream.output[pos.node_index])
+end
+
+function lookahead_token_first_byte(stream, i)
+    i == 1 ? _next_byte(stream) : stream.lookahead[i-1].next_byte
+end
+
+function lookahead_token_last_byte(stream, i)
+    stream.lookahead[i].next_byte - 1
+end
+
+#-------------------------------------------------------------------------------
+# Stream input interface - the peek_* family of functions
+
+# Buffer several tokens ahead
+function _buffer_lookahead_tokens(lexer, lookahead)
+    had_whitespace = false
+    token_count = 0
+    while true
+        raw = Tokenize.next_token(lexer)
+        k = kind(raw)
+        was_whitespace = is_whitespace(k)
+        had_whitespace |= was_whitespace
+        f = EMPTY_FLAGS
+        raw.suffix     && (f |= SUFFIXED_FLAG)
+        push!(lookahead, SyntaxToken(SyntaxHead(k, f), k,
+                                     had_whitespace, raw.endbyte + 2))
+        token_count += 1
+        if k == K"EndMarker"
+            break
+        end
+        if !was_whitespace
+            # Buffer tokens in batches for lookahead. Generally we want a
+            # moderate-size buffer to make sure we hit the fast path of peek(),
+            # but not too large to avoid (a) polluting the processor cache and
+            # (b) doing unnecessary work when not parsing the whole input.
+            had_whitespace = false
+            if token_count > 100
+                break
+            end
+        end
+    end
+end
+
+# Return the index of the next byte of the input
+function _next_byte(stream)
+    stream.next_byte
+end
+
+# Find the index of the next nontrivia token
+@inline function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool)
+    # Much of the time we'll be peeking ahead a single token and have one or
+    # zero whitespace tokens before the next token. The following code is an
+    # unrolled optimized version for that fast path. Empirically it seems we
+    # only hit the slow path about 5% of the time here.
+    i = stream.lookahead_index
+    @inbounds if n == 1 && i+2 <= length(stream.lookahead)
+        if skip_newlines
+            k = kind(stream.lookahead[i])
+            if !(k == K"Whitespace" || k == K"Comment" || k == K"NewlineWs")
+                return i
+            end
+            i += 1
+            k = kind(stream.lookahead[i])
+            if !(k == K"Whitespace" || k == K"Comment" || k == K"NewlineWs")
+                return i
+            end
+        else
+            k = kind(stream.lookahead[i])
+            if !(k == K"Whitespace" || k == K"Comment")
+                return i
+            end
+            i += 1
+            k = kind(stream.lookahead[i])
+            if !(k == K"Whitespace" || k == K"Comment")
+                return i
+            end
+        end
+    end
+    # Fall through to the general case
+    return __lookahead_index(stream, n, skip_newlines)
+end
+
+@noinline function __lookahead_index(stream, n, skip_newlines)
+    i = stream.lookahead_index
+    while true
+        if i+1 > length(stream.lookahead)
+            n_to_delete = stream.lookahead_index-1
+            if n_to_delete > 0.9*length(stream.lookahead)
+                Base._deletebeg!(stream.lookahead, n_to_delete)
+                i -= n_to_delete
+                stream.lookahead_index = 1
+            end
+            _buffer_lookahead_tokens(stream.lexer, stream.lookahead)
+            continue
+        end
+        k = @inbounds kind(stream.lookahead[i])
+        if !((k == K"Whitespace" || k == K"Comment") ||
+             (k == K"NewlineWs" && skip_newlines))
+            if n == 1
+                return i
+            end
+            n -= 1
+        end
+        i += 1
+    end
+end
+
+@noinline function _parser_stuck_error(stream)
+    # Optimization: emit unlikely errors in a separate function
+    error("The parser seems stuck at byte $(stream.next_byte)")
+end
+
+"""
+    peek(stream::ParseStream [, n=1]; skip_newlines=false)
+
+Look ahead in the stream `n` tokens, returning the token kind. Comments and
+non-newline whitespace are skipped automatically. Whitespace containing a
+single newline is returned as kind `K"NewlineWs"` unless `skip_newlines` is
+true.
+"""
+function Base.peek(stream::ParseStream, n::Integer=1;
+                   skip_newlines::Bool=false, skip_whitespace=true)
+    kind(peek_token(stream, n; skip_newlines=skip_newlines, skip_whitespace=skip_whitespace))
+end
+
+"""
+    peek_token(stream [, n=1])
+
+Like `peek`, but return the full token information rather than just the kind.
+"""
+function peek_token(stream::ParseStream, n::Integer=1;
+                    skip_newlines=false, skip_whitespace=true)
+    stream.peek_count += 1
+    if stream.peek_count > 100_000
+        _parser_stuck_error(stream)
+    end
+    i = _lookahead_index(stream, n, skip_newlines)
+    if !skip_whitespace
+        i = stream.lookahead_index
+    end
+    return @inbounds stream.lookahead[i]
+end
+
+
+struct FullToken
+    head::SyntaxHead
+    first_byte::UInt32
+    last_byte::UInt32
+end
+
+head(t::FullToken) = t.head
+byte_range(t::FullToken) = t.first_byte:t.last_byte
+span(t::FullToken) = 1 + last_byte(t) - first_byte(t)
+
+function peek_full_token(stream::ParseStream, n::Integer=1;
+                         skip_newlines=false, skip_whitespace=true)
+    stream.peek_count += 1
+    if stream.peek_count > 100_000
+        _parser_stuck_error(stream)
+    end
+    i = _lookahead_index(stream, n, skip_newlines)
+    if !skip_whitespace
+        i = stream.lookahead_index
+    end
+    t = stream.lookahead[i]
+
+    FullToken(head(t), lookahead_token_first_byte(stream, i),
+              lookahead_token_last_byte(stream, i))
+end
+
+"""
+    peek_behind(ps; skip_trivia=true, skip_parens=true)
+    peek_behind(ps, pos::ParseStreamPosition)
+
+Return information about a span which was previously inserted into the output,
+defaulting to the most previous nontrivia node when `skip_trivia` is true, or
+at the provided position `pos`.
+
+Retroactively inspecting or modifying the parser's output can be confusing, so
+using this function should be avoided where possible.
+"""
+function peek_behind(stream::ParseStream, pos::ParseStreamPosition)
+    if pos.node_index > 0 && pos.node_index <= length(stream.output)
+        node = stream.output[pos.node_index]
+        if is_terminal(node)
+            return (kind=kind(node),
+                    flags=flags(node),
+                    orig_kind=node.orig_kind,
+                    is_leaf=true)
+        else
+            return (kind=kind(node),
+                    flags=flags(node),
+                    orig_kind=K"None",
+                    is_leaf=false)
+        end
+    else
+        return (kind=K"None",
+                flags=EMPTY_FLAGS,
+                orig_kind=K"None",
+                is_leaf=true)
+    end
+end
+
+"""
+    first_child_position(stream::ParseStream, pos::ParseStreamPosition)
+
+Find the first non-trivia child of this node (in the GreenTree/RedTree sense) and return
+its position.
+"""
+function first_child_position(stream::ParseStream, pos::ParseStreamPosition)
+    output = stream.output
+    @assert pos.node_index > 0
+    cursor = RedTreeCursor(GreenTreeCursor(output, pos.node_index), pos.byte_index-UInt32(1))
+    candidate = nothing
+    for child in reverse(cursor)
+        is_trivia(child) && continue
+        candidate = child
+    end
+
+    candidate !== nothing && return ParseStreamPosition(candidate.byte_end+UInt32(1), candidate.green.position)
+
+    # No children found - return the first non-trivia *token* (even if it
+    # is the child of a non-terminal trivia node (e.g. an error)).
+    byte_end = pos.byte_index
+    for i in pos.node_index-1:-1:(pos.node_index - treesize(cursor))
+        node = output[i]
+        if is_terminal(node)
+            if !is_trivia(node)
+                return ParseStreamPosition(byte_end, i)
+            end
+            byte_end -= node.byte_span
+        end
+    end
+
+    # Still none found. Return a sentinel value
+    return ParseStreamPosition(0, 0)
+end
+
+"""
+        first_child_position(stream::ParseStream, pos::ParseStreamPosition)
+
+    Find the last non-trivia child of this node (in the GreenTree/RedTree sense) and
+    return its position (i.e. the position as if that child had been the last thing parsed).
+"""
+function last_child_position(stream::ParseStream, pos::ParseStreamPosition)
+    output = stream.output
+    @assert pos.node_index > 0
+    cursor = RedTreeCursor(GreenTreeCursor(output, pos.node_index), pos.byte_index-1)
+    candidate = nothing
+    for child in reverse(cursor)
+        is_trivia(child) && continue
+        return ParseStreamPosition(child.byte_end+UInt32(1), child.green.position)
+    end
+    return ParseStreamPosition(0, 0)
+end
+
+# Get last position in stream "of interest", skipping
+# * parens nodes
+# * deleted tokens (TOMBSTONE)
+# * whitespace (if skip_trivia=true)
+function peek_behind_pos(stream::ParseStream; skip_trivia::Bool=true,
+                         skip_parens::Bool=true)
+    # Work backwards through the output
+    node_idx = length(stream.output)
+    byte_idx = stream.next_byte
+
+    # Skip parens nodes if requested
+    if skip_parens
+        while node_idx > 0
+            node = stream.output[node_idx]
+            if is_non_terminal(node) && kind(node) == K"parens"
+                node_idx -= 1
+            else
+                break
+            end
+        end
+    end
+
+    # Skip trivia if requested
+    while node_idx > 0
+        node = stream.output[node_idx]
+        if kind(node) == K"TOMBSTONE" || (skip_trivia && is_trivia(node))
+            byte_idx -= node.byte_span
+            # If this is a non-terminal node, skip its children without
+            # subtracting their byte_spans, as they're already included in the parent
+            if is_non_terminal(node)
+                node_idx -= (1 + node.node_span)
+            else
+                node_idx -= 1
+            end
+        else
+            break
+        end
+    end
+
+    return ParseStreamPosition(byte_idx, node_idx)
+end
+
+function peek_behind(stream::ParseStream; kws...)
+    peek_behind(stream, peek_behind_pos(stream; kws...))
+end
+
+#-------------------------------------------------------------------------------
+# Stream output interface - the `bump_*` and `emit_*` family of functions
+#
+# Though note bump() really does both input and output
+
+# Bump up until the `n`th token
+# flags and remap_kind are applied to any non-trivia tokens
+function _bump_until_n(stream::ParseStream, n::Integer, new_flags, remap_kind=K"None")
+    if n < stream.lookahead_index
+        return
+    end
+    for i in stream.lookahead_index:n
+        tok = stream.lookahead[i]
+        k = kind(tok)
+        if k == K"EndMarker"
+            break
+        end
+        f = new_flags | flags(tok)
+        is_trivia = is_whitespace(k)
+        is_trivia && (f |= TRIVIA_FLAG)
+        outk = (is_trivia || remap_kind == K"None") ? k : remap_kind
+        h = SyntaxHead(outk, f)
+
+        # Calculate byte span for this token
+        if i == stream.lookahead_index
+            # First token in this batch - calculate span from current stream position
+            prev_byte = stream.next_byte
+        else
+            # Subsequent tokens - use previous token's next_byte
+            prev_byte = stream.lookahead[i-1].next_byte
+        end
+        byte_span = Int(tok.next_byte) - Int(prev_byte)
+
+        # Create terminal RawGreenNode
+        node = RawGreenNode(h, byte_span, kind(tok))
+        push!(stream.output, node)
+
+        # Update next_byte
+        stream.next_byte += byte_span
+    end
+    stream.lookahead_index = n + 1
+    # Defuse the time bomb
+    stream.peek_count = 0
+end
+
+"""
+    bump(stream [, flags=EMPTY_FLAGS];
+         skip_newlines=false, error, remap_kind)
+
+Copy the current token from the input stream to the output. Adds the given
+flags to the output token (normally this would be the default `EMPTY_FLAGS` or
+`TRIVIA_FLAG`).
+
+Keyword arguments:
+* `skip_newlines` - if `true`, newlines are treated as whitespace.
+* `error` - if set, emit an error for this token
+* `remap_kind` - the kind of the token in the output token stream if it needs
+                 to be modified.
+"""
+function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false,
+              error=nothing, remap_kind::Kind=K"None")
+    emark = position(stream)
+    _bump_until_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, remap_kind)
+    if !isnothing(error)
+        emit(stream, emark, K"error", flags, error=error)
+    end
+    # Return last token location in output if needed for reset_node!
+    return position(stream)
+end
+
+"""
+Bump comments and whitespace tokens preceding the next token
+
+**Skips newlines** by default.  Set skip_newlines=false to avoid that.
+"""
+function bump_trivia(stream::ParseStream, flags=EMPTY_FLAGS;
+                     skip_newlines=true, error=nothing)
+    emark = position(stream)
+    _bump_until_n(stream, _lookahead_index(stream, 1, skip_newlines) - 1, EMPTY_FLAGS)
+    if !isnothing(error)
+        emit(stream, emark, K"error", flags, error=error)
+    end
+    return position(stream)
+end
+
+"""
+Bump an invisible zero-width token into the output
+
+This is useful when surrounding syntax implies the presence of a token.  For
+example, `2x` means `2*x` via the juxtaposition rules.
+"""
+function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS;
+                        error=nothing)
+    b = stream.next_byte
+    h = SyntaxHead(kind, flags)
+    # Zero-width token
+    node = RawGreenNode(h, 0, kind)
+    push!(stream.output, node)
+    # No need to update next_byte for zero-width token
+    if !isnothing(error)
+        emit_diagnostic(stream, b:b-1, error=error)
+    end
+    stream.peek_count = 0
+    return position(stream)
+end
+
+"""
+Bump several tokens, gluing them together into a single token
+
+This is for use in special circumstances where the parser needs to resolve
+lexing ambiguities. There's no special whitespace handling — bump any
+whitespace if necessary with bump_trivia.
+"""
+function bump_glue(stream::ParseStream, kind, flags)
+    i = stream.lookahead_index
+    h = SyntaxHead(kind, flags)
+    # Calculate byte span for glued tokens
+    start_byte = stream.next_byte
+    end_byte = stream.lookahead[i+1].next_byte
+    byte_span = end_byte - start_byte
+
+    node = RawGreenNode(h, byte_span, kind)
+    push!(stream.output, node)
+    stream.next_byte += byte_span
+    stream.lookahead_index += 2
+    stream.peek_count = 0
+    return position(stream)
+end
+
+"""
+Reset kind or flags of an existing node in the output stream
+
+This is a hack, but in some limited occasions the trailing syntax may change
+the kind or flags of a token in a way which would require unbounded lookahead
+in a recursive descent parser. Modifying the output with reset_node! is useful
+in those cases.
+"""
+function reset_node!(stream::ParseStream, pos::ParseStreamPosition;
+                     kind=nothing, flags=nothing)
+    node = stream.output[pos.node_index]
+    stream.output[pos.node_index] = reset_node(node, kind, flags)
+end
+
+"""
+Move `numbytes` from the range at output position `pos+1` to the output
+position `pos`. If the donor range becomes empty, mark it dead with
+K"TOMBSTONE" and return `true`, otherwise return `false`.
+
+Hack alert! This is used only for managing the complicated rules related to
+dedenting triple quoted strings.
+"""
+function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numbytes)
+    i = pos.node_index
+    t1 = stream.output[i]
+    t2 = stream.output[i+1]
+    @assert is_terminal(t1) && is_terminal(t2)
+
+    stream.output[i] = RawGreenNode(t1.head, t1.byte_span + numbytes,
+                                    t1.orig_kind)
+
+    t2_is_empty = t2.byte_span == numbytes
+    head2 = t2_is_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : t2.head
+    stream.output[i+1] = RawGreenNode(head2, t2.byte_span - numbytes,
+                                      t2.orig_kind)
+    return t2_is_empty
+end
+
+# Get position of last item emitted into the output stream
+function Base.position(stream::ParseStream)
+    byte_idx = stream.next_byte
+    node_idx = length(stream.output)
+
+    ParseStreamPosition(byte_idx, node_idx)
+end
+
+"""
+    emit(stream, mark, kind, flags = EMPTY_FLAGS; error=nothing)
+
+Emit a new non-terminal node into the output which covers source bytes from `mark` to
+the end of the most recent token which was `bump()`'ed. The starting `mark`
+should be a previous return value of `position()`. The emitted node will have
+its `node_span` set to the number of nodes emitted since `mark`.
+"""
+function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind,
+              flags::RawFlags = EMPTY_FLAGS; error=nothing)
+    # Calculate byte span from mark position to current
+    mark_byte = mark.byte_index
+    current_byte = stream.next_byte
+    byte_span = current_byte - mark_byte
+
+    # Calculate node span (number of children, exclusive of the node itself)
+    node_span = length(stream.output) - mark.node_index
+
+    # Create non-terminal RawGreenNode
+    node = RawGreenNode(SyntaxHead(kind, flags), byte_span, node_span)
+
+    if !isnothing(error)
+        emit_diagnostic(stream, mark_byte:current_byte-1, error=error)
+    end
+
+    push!(stream.output, node)
+    # Note: emit() for non-terminals doesn't advance next_byte
+    # because it's a range over already-emitted tokens
+    return position(stream)
+end
+
+function emit_diagnostic(stream::ParseStream, byterange::AbstractUnitRange; kws...)
+    emit_diagnostic(stream.diagnostics, byterange; kws...)
+    return nothing
+end
+
+"""
+Emit a diagnostic at the position of the next token
+
+If `whitespace` is true, the diagnostic is positioned on the whitespace before
+the next token. Otherwise it's positioned at the next token as returned by `peek()`.
+"""
+function emit_diagnostic(stream::ParseStream; whitespace=false, kws...)
+    i = _lookahead_index(stream, 1, true)
+    begin_tok_i = i
+    end_tok_i = i
+    if whitespace
+        # It's the whitespace which is the error. Find the range of the current
+        # whitespace.
+        begin_tok_i = stream.lookahead_index
+        end_tok_i = is_whitespace(stream.lookahead[i]) ?
+                    i : max(stream.lookahead_index, i - 1)
+    end
+    fbyte = lookahead_token_first_byte(stream, begin_tok_i)
+    lbyte = lookahead_token_last_byte(stream, end_tok_i)
+    emit_diagnostic(stream, fbyte:lbyte; kws...)
+    return nothing
+end
+
+function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; trim_whitespace=true, kws...)
+    # Find the byte range from mark to current position
+    start_byte = mark.byte_index
+    end_byte = stream.next_byte - 1
+
+    if trim_whitespace
+        # TODO: Implement whitespace trimming for unified output
+        # This would require scanning the output array
+    end
+
+    emit_diagnostic(stream, start_byte:end_byte; kws...)
+end
+
+function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition,
+                         end_mark::ParseStreamPosition; kws...)
+    emit_diagnostic(stream, mark.byte_index:end_mark.byte_index-1; kws...)
+end
+
+function emit_diagnostic(diagnostics::AbstractVector{Diagnostic},
+                         byterange::AbstractUnitRange; kws...)
+    push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...))
+end
+
+# Tree construction from the list of text ranges held by ParseStream
+
+# API for extracting results from ParseStream
+
+function sourcetext(stream::ParseStream; steal_textbuf=false)
+    Base.depwarn("Use of `sourcetext(::ParseStream)` is deprecated. Use `SourceFile(stream)` instead", :sourcetext)
+    root = stream.text_root
+    # The following kinda works but makes the return type of this method type
+    # unstable. (Also codeunit(root) == UInt8 doesn't imply UTF-8 encoding?)
+    # if root isa AbstractString && codeunit(root) == UInt8
+    #     return root
+    str = if root isa String || root isa SubString
+        root
+    elseif steal_textbuf
+        String(stream.textbuf)
+    else
+        # Safe default for other cases is to copy the buffer. Technically this
+        # could possibly be avoided in some situations, but might have side
+        # effects such as mutating stream.text_root or stealing the storage of
+        # stream.textbuf
+        String(copy(stream.textbuf))
+    end
+    SubString(str, first_byte(stream), thisind(str, last_byte(stream)))
+end
+
+function SourceFile(stream::ParseStream; kws...)
+    fbyte = first_byte(stream)
+    lbyte = last_byte(stream)
+    if !isempty(stream.diagnostics)
+        lbyte = max(lbyte, maximum(last_byte(d) for d in stream.diagnostics))
+    end
+    # See also sourcetext()
+    srcroot = stream.text_root
+    str = if srcroot isa String
+        SubString(srcroot, fbyte, thisind(srcroot, lbyte))
+    elseif srcroot isa SubString{String}
+        SubString(srcroot, fbyte, thisind(srcroot, lbyte))
+    else
+        SubString(String(stream.textbuf[fbyte:lbyte]))
+    end
+    return SourceFile(str; first_index=first_byte(stream), kws...)
+end
+
+"""
+    unsafe_textbuf(stream)
+
+Return the `Vector{UInt8}` text buffer being parsed by this `ParseStream`.
+
+!!! warning
+    The caller must hold a reference to `stream` while using textbuf
+"""
+unsafe_textbuf(stream) = stream.textbuf
+
+first_byte(stream::ParseStream) = first(stream.output).byte_span + 1 # After sentinel
+last_byte(stream::ParseStream) = stream.next_byte - 1
+any_error(stream::ParseStream) = any_error(stream.diagnostics)
+
+# Return last non-whitespace byte which was parsed
+function last_non_whitespace_byte(stream::ParseStream)
+    byte_pos = stream.next_byte
+    for i = length(stream.output):-1:1
+        node = stream.output[i]
+        if is_terminal(node)
+            if kind(node) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment" || kind(node) == K"error" && node.byte_span == 0
+                byte_pos -= node.byte_span
+            else
+                return byte_pos - 1
+            end
+        end
+    end
+    return first_byte(stream) - 1
+end
+
+function Base.empty!(stream::ParseStream)
+    # Keep only the sentinel
+    if !isempty(stream.output) && kind(stream.output[1]) == K"TOMBSTONE"
+        resize!(stream.output, 1)
+    else
+        empty!(stream.output)
+        # Restore sentinel node
+        push!(stream.output, RawGreenNode(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), 0, K"TOMBSTONE"))
+    end
+    # Reset next_byte to initial position
+    stream.next_byte = 1
+end
diff --git a/JuliaSyntax/src/core/source_files.jl b/JuliaSyntax/src/core/source_files.jl
new file mode 100644
index 0000000000000..87019aa81e594
--- /dev/null
+++ b/JuliaSyntax/src/core/source_files.jl
@@ -0,0 +1,392 @@
+#-------------------------------------------------------------------------------
+# Generic functions for source text, source location computation and formatting
+# functions
+
+"""
+    sourcefile(x)
+
+Get the source file object (usually `SourceFile`) for a given syntax object
+`x`. The source file along with a byte range may be used to compute
+`source_line()`, `source_location()`, `filename()`, etc.
+"""
+function sourcefile
+end
+
+"""
+    byte_range(x)
+
+Return the range of bytes which `x` covers in the source text. See also
+[`char_range`](@ref).
+"""
+function byte_range
+end
+
+"""
+    char_range(x)
+
+Compute the range in *character indices* over the source text for syntax object
+`x`. If you want to index the source string you need this, rather than
+[`byte_range`](@ref).
+"""
+function char_range(x)
+    br = byte_range(x)
+    first(br):thisind(sourcefile(x), last(br))
+end
+
+"""
+    first_byte(x)
+
+Return the first byte of `x` in the source text.
+"""
+first_byte(x) = first(byte_range(x))
+
+"""
+    last_byte(x)
+
+Return the last byte of `x` in the source text.
+"""
+last_byte(x) = last(byte_range(x))
+
+"""
+    filename(x)
+
+Get file name associated with `source`, or an empty string if one didn't exist.
+
+For objects `x` such as syntax trees, defers to `filename(sourcefile(x))` by
+default.
+"""
+function filename(x)
+    source = sourcefile(x)
+    isnothing(source) ? "" : filename(source)
+end
+
+"""
+    source_line(x)
+    source_line(source::SourceFile, byte_index::Integer)
+
+Get the line number of the first line on which object `x` appears. In the
+second form, get the line number at the given `byte_index` within `source`.
+"""
+source_line(x) = source_line(sourcefile(x), first_byte(x))
+
+"""
+    source_location(x)
+    source_location(source::SourceFile, byte_index::Integer)
+
+    source_location(LineNumberNode, x)
+    source_location(LineNumberNode, source, byte_index)
+
+Get `(line,column)` of the first byte where object `x` appears in the source.
+The second form allows one to be more precise with the `byte_index`, given the
+source file.
+
+Providing `LineNumberNode` as the first argument will return the line and file
+name in a line number node object.
+"""
+source_location(x) = source_location(sourcefile(x), first_byte(x))
+
+"""
+    sourcetext(x)
+
+Get the full source text syntax object `x`
+"""
+function sourcetext(x)
+    view(sourcefile(x), byte_range(x))
+end
+
+"""
+    highlight(io, x; color, note, notecolor,
+              context_lines_before, context_lines_inner, context_lines_after)
+
+    highlight(io::IO, source::SourceFile, range::UnitRange; kws...)
+
+Print the lines of source code surrounding `x` which is highlighted with
+background `color` and underlined with markers in the text. A `note` in
+`notecolor` may be provided as annotation. By default, `x` should be an object
+with `sourcefile(x)` and `byte_range(x)` implemented.
+
+The context arguments `context_lines_before`, etc, refer to the number of
+lines of code which will be printed as context before and after, with `inner`
+referring to context lines inside a multiline region.
+
+The second form shares the keywords of the first but allows an explicit source
+file and byte range to be supplied.
+"""
+function highlight(io::IO, x; kws...)
+    highlight(io, sourcefile(x), byte_range(x); kws...)
+end
+
+
+#-------------------------------------------------------------------------------
+"""
+    SourceFile(code [; filename=nothing, first_line=1, first_index=1])
+
+UTF-8 source text with associated file name and line number, storing the
+character indices of the start of each line. `first_line` and `first_index`
+can be used to specify the line number and index of the first character of
+`code` within a larger piece of source text.
+
+`SourceFile` may be indexed via `getindex` or `view` to get a string.  Line
+information for a byte offset can be looked up via the `source_line`,
+`source_location` and `source_line_range` functions.
+"""
+struct SourceFile
+    # TODO: Rename SourceFile -> SourceText / SourceChunk / SourceIndex / SourceLineIndex ?
+    # See https://github.com/JuliaLang/JuliaSyntax.jl/issues/190
+    code::SubString{String}
+    # Offset of `code` within a larger chunk of source text
+    byte_offset::Int
+    filename::Union{Nothing,String}
+    # first_column::Int ??
+    first_line::Int
+    # String index of start of every line
+    line_starts::Vector{Int}
+end
+
+Base.hash(s::SourceFile, h::UInt) =
+    hash(s.code, hash(s.byte_offset, hash(s.filename, hash(s.first_line, hash(s.line_starts, h)))))
+function Base.:(==)(a::SourceFile, b::SourceFile)
+    a.code == b.code && a.byte_offset == b.byte_offset && a.filename == b.filename &&
+    a.first_line == b.first_line && a.line_starts == b.line_starts
+end
+
+function SourceFile(code::AbstractString; filename=nothing, first_line=1,
+                    first_index=1)
+    line_starts = Int[1]
+    for i in eachindex(code)
+        # The line is considered to start after the `\n`
+        code[i] == '\n' && push!(line_starts, i+1)
+    end
+    SourceFile(code, first_index-1, filename, first_line, line_starts)
+end
+
+function SourceFile(; filename, kwargs...)
+    SourceFile(read(filename, String); filename=filename, kwargs...)
+end
+
+# Get line number of the given byte within the code
+function _source_line_index(source::SourceFile, byte_index)
+    searchsortedlast(source.line_starts, byte_index - source.byte_offset)
+end
+_source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1
+
+function source_location(::Type{LineNumberNode}, x)
+    source_location(LineNumberNode, sourcefile(x), first_byte(x))
+end
+
+source_line(source::SourceFile, byte_index::Integer) =
+    _source_line(source, _source_line_index(source, byte_index))
+
+function filename(source::SourceFile)
+    f = source.filename
+    !isnothing(f) ? f : ""
+end
+
+function source_location(source::SourceFile, byte_index::Integer)
+    lineidx = _source_line_index(source, byte_index)
+    i = source.line_starts[lineidx]
+    column = 1
+    while i < byte_index - source.byte_offset
+        i = nextind(source.code, i)
+        column += 1
+    end
+    _source_line(source, lineidx), column
+end
+
+"""
+Get byte range of the source line at byte_index, buffered by
+`context_lines_before` and `context_lines_after` before and after.
+"""
+function source_line_range(source::SourceFile, byte_index::Integer;
+                           context_lines_before=0, context_lines_after=0)
+    lineidx = _source_line_index(source, byte_index)
+    fbyte = source.line_starts[max(lineidx-context_lines_before, 1)]
+    lline = lineidx + context_lines_after
+    lbyte = lline >= lastindex(source.line_starts) ?
+        ncodeunits(source.code) : source.line_starts[lline + 1] - 1
+
+    return (fbyte + source.byte_offset,
+            lbyte + source.byte_offset)
+end
+
+function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index::Integer)
+    fn = filename(source)
+    LineNumberNode(source_line(source, byte_index), isempty(fn) ? nothing : Symbol(fn))
+end
+
+function Base.show(io::IO, ::MIME"text/plain", source::SourceFile)
+    fn = filename(source)
+    header = "## SourceFile$(isempty(fn) ? "" : " ")$fn ##"
+    print(io, header, "\n")
+    heightlim = displaysize(io)[1] ÷ 2
+    if !get(io, :limit, false) || length(source.line_starts) <= heightlim
+        print(io, source.code)
+    else
+        r1 = source_line_range(source, 1, context_lines_after=heightlim-3)
+        print(io, view(source, r1[1]:r1[2]))
+        println(io, "⋮")
+    end
+end
+
+function Base.getindex(source::SourceFile, rng::AbstractUnitRange)
+    i = first(rng) - source.byte_offset
+    # Convert byte range into unicode String character range.
+    # Assumes valid unicode! (SubString doesn't give us a reliable way to opt
+    # out of the valid unicode check. The SubString{String} inner constructor
+    # has some @boundscheck, but using @inbounds depends on inlining choices.)
+    j = prevind(source.code, last(rng) + 1 - source.byte_offset)
+    source.code[i:j]
+end
+
+# TODO: Change view() here to `sourcetext` ?
+function Base.view(source::SourceFile, rng::AbstractUnitRange)
+    i = first(rng) - source.byte_offset
+    j = prevind(source.code, last(rng) + 1 - source.byte_offset)
+    SubString(source.code, i, j)
+end
+
+function Base.getindex(source::SourceFile, i::Integer)
+    source.code[i - source.byte_offset]
+end
+
+function Base.thisind(source::SourceFile, i::Integer)
+    thisind(source.code, i - source.byte_offset) + source.byte_offset
+end
+
+function Base.nextind(source::SourceFile, i::Integer)
+    nextind(source.code, i - source.byte_offset) + source.byte_offset
+end
+
+Base.firstindex(source::SourceFile) = firstindex(source.code) + source.byte_offset
+Base.lastindex(source::SourceFile)  = lastindex(source.code)  + source.byte_offset
+
+"""
+    sourcetext(source::SourceFile)
+
+Get the full source text of a `SourceFile` as a string.
+"""
+function sourcetext(source::SourceFile)
+    return source.code
+end
+
+
+#-------------------------------------------------------------------------------
+# Tools for highlighting source ranges
+function _print_marker_line(io, prefix_str, str, underline, singleline, color,
+                            note, notecolor)
+    # Whitespace equivalent in length to `prefix_str`
+    # Getting exactly the same width of whitespace as `str` is tricky.
+    # Especially for mixtures of tabs and spaces.
+    # tabs are zero width according to textwidth
+    indent = join(isspace(c) ? c : repeat(' ', textwidth(c)) for c in prefix_str)
+
+    # Assume tabs are 4 wide rather than 0. (fixme: implement tab alignment?)
+    w = textwidth(str) + 4*count(c->c=='\t', str)
+    if !isempty(indent)
+        indent = "#" * (first(indent) == '\t' ? indent : indent[nextind(indent,1):end])
+    end
+
+    midchar = '─'
+    startstr, endstr, singlestart = underline ? ("└","┘","╙") : ("┌","┐","╓")
+
+    markline =
+    if singleline
+        w == 0 ? string(indent, startstr)    :
+        w == 1 ? string(indent, singlestart) :
+                 string(indent, startstr, repeat('─', w-2), endstr)
+    else
+        if underline && isempty(indent) && w > 1
+             string('#', repeat('─', w-2), endstr)
+        else
+            s,e = underline ? ("", endstr) : (startstr, "")
+            w == 0 ? string(indent, s, e) :
+                     string(indent, s, repeat('─', w-1), e)
+        end
+    end
+    if note isa AbstractString
+        markline *= " ── "
+    end
+    _printstyled(io, markline; fgcolor=color)
+    if !isnothing(note)
+        if note isa AbstractString
+            _printstyled(io, note, fgcolor=notecolor)
+        else
+            note(io, indent, w)
+        end
+    end
+end
+
+function highlight(io::IO, source::SourceFile, range::UnitRange;
+                   color=(120,70,70), context_lines_before=2,
+                   context_lines_inner=1, context_lines_after=2,
+                   note=nothing, notecolor=nothing)
+    p = first(range)
+    q = last(range)
+
+    x,y = source_line_range(source, p;
+                            context_lines_before=context_lines_before,
+                            context_lines_after=context_lines_inner)
+    a,b = source_line_range(source, p)
+    q1 = max(q, p) # Ignore q for empty ranges
+    c,d = source_line_range(source, q1)
+    z,w = source_line_range(source, q1;
+                            context_lines_before=context_lines_inner,
+                            context_lines_after=context_lines_after)
+
+    p_line = source_line(source, p)
+    q_line = source_line(source, q)
+
+    marker_line_color = :light_black
+
+    if p_line >= q_line
+        # x-----------------
+        # a---p-------q----b
+        # #   └───────┘ ── note
+        # -----------------w
+
+        hitext = source[p:q]
+        print(io, source[x:p-1])
+        _printstyled(io, hitext; bgcolor=color)
+        #print(io, source[q+1:d])
+        print(io, source[nextind(source,q):d])
+        if d >= firstindex(source) && source[thisind(source, d)] != '\n'
+            print(io, "\n")
+        end
+        _print_marker_line(io, source[a:p-1], hitext, true, true, marker_line_color, note, notecolor)
+    else
+        # x   --------------
+        # #   ┌─────
+        # a---p----b
+        # --------------y
+        # ---------------
+        # z--------------
+        # c   --------q----d
+        # #───────────┘ ── note
+        # -----------------w
+
+        prefix1 = source[a:p-1]
+        print(io, source[x:a-1])
+        _print_marker_line(io, prefix1, source[p:b], false, false, marker_line_color, nothing, notecolor)
+        print(io, '\n')
+        print(io, prefix1)
+        if q_line - p_line - 1 <= 2*context_lines_inner
+            # The diagnostic range is compact and we show the whole thing
+            _printstyled(io, source[p:q]; bgcolor=color)
+        else
+            # Or large and we truncate the code to show only the region around the
+            # start and end of the error.
+            _printstyled(io, source[p:y]; bgcolor=color)
+            print(io, "⋮\n")
+            _printstyled(io, source[z:q]; bgcolor=color)
+        end
+        print(io, source[nextind(source, q):d])
+        source[thisind(source, d)] == '\n' || print(io, "\n")
+        qline = source[c:q]
+        _print_marker_line(io, "", qline, true, false, marker_line_color, note, notecolor)
+    end
+    if context_lines_after > 0 && d+1 <= lastindex(source)
+        print(io, '\n')
+        w1 = source[thisind(source, w)] == '\n' ? w - 1 : w
+        print(io, source[d+1:w1])
+    end
+end
diff --git a/JuliaSyntax/src/core/tree_cursors.jl b/JuliaSyntax/src/core/tree_cursors.jl
new file mode 100644
index 0000000000000..75a5c0e44008f
--- /dev/null
+++ b/JuliaSyntax/src/core/tree_cursors.jl
@@ -0,0 +1,175 @@
+using Base.Iterators: Reverse
+
+"""
+    prev_sibling_assumed(cursor::GreenTreeCursor)::Union{Nothing, GreenTreeCursor}
+    prev_sibling_assumed(cursor::RedTreeCursor)::Union{Nothing, RedTreeCursor}
+
+Gives the previous sibling of the current node, but makes the assumption that
+there is one or that we are at the top level.
+Without knowing the parent, we cannot otherwise know which the last sibling is,
+unless we are at the top level in which case `nothing` is returned.
+"""
+function prev_sibling_assumed end
+
+"""
+    GreenTreeCursor
+
+Represents a cursors into a ParseStream output buffer that makes it easy to
+work with the green tree representation.
+"""
+struct GreenTreeCursor
+    parser_output::Vector{RawGreenNode}
+    position::UInt32
+end
+GreenTreeCursor(stream::ParseStream) = GreenTreeCursor(stream.output, length(stream.output))
+this(node::GreenTreeCursor) = node.parser_output[node.position]
+
+const SENTINEL_INDEX = UInt32(1)
+function prev_sibling_assumed(cursor::GreenTreeCursor)
+    next_idx = cursor.position - this(cursor).node_span - UInt32(1)
+    next_idx == SENTINEL_INDEX && return nothing
+    GreenTreeCursor(cursor.parser_output, next_idx)
+end
+
+function Base.in(child::GreenTreeCursor, parent::GreenTreeCursor)
+    @assert child.parser_output === parent.parser_output
+    child.position < parent.position || return false
+    return child.position >= parent.position - this(parent).node_span
+end
+
+# Debug printing
+function Base.show(io::IO, node::GreenTreeCursor)
+    print(io, Base.summary(this(node)), " @", node.position)
+end
+
+# Reverse iterator interface
+Base.reverse(node::GreenTreeCursor) = Base.Iterators.Reverse(node)
+Base.IteratorSize(::Type{Reverse{GreenTreeCursor}}) = Base.SizeUnknown()
+@inline function Base.iterate(node::Reverse{GreenTreeCursor},
+                              (next_idx, final)::NTuple{2, UInt32} =
+                              (node.itr.position-UInt32(1), node.itr.position - this(node.itr).node_span - UInt32(1)))::Union{Nothing, Tuple{GreenTreeCursor, NTuple{2, UInt32}}}
+    node = node.itr
+    while true
+        next_idx == final && return nothing
+        next_node = GreenTreeCursor(node.parser_output, next_idx)
+        nrgn = this(next_node)
+        if getfield(nrgn, :head).kind == K"TOMBSTONE"
+            # TOMBSTONED nodes are counted as part of the size of the tree, but
+            # do not contribute either byte ranges or children.
+            next_idx -= UInt32(1)
+            continue
+        end
+        # Inlined prev_sibling_assumed
+        new_next_idx = next_idx - nrgn.node_span - UInt32(1)
+        return (next_node, (new_next_idx, final))
+    end
+end
+
+# Accessors / predicates
+is_leaf(node::GreenTreeCursor)     = !is_non_terminal(this(node))
+head(node::GreenTreeCursor)        = this(node).head
+treesize(node::GreenTreeCursor)    = this(node).node_span
+is_non_terminal(node::GreenTreeCursor) = is_non_terminal(this(node))
+
+"""
+    span(node)
+
+Get the number of bytes this node covers in the source text.
+"""
+span(node::GreenTreeCursor) = this(node).byte_span
+
+"""
+    RedTreeCursor
+
+Wraps a `GreenTreeCursor` to keep track of the absolute position of the node
+in the original source text.
+"""
+struct RedTreeCursor
+    green::GreenTreeCursor
+    # The last byte that is still part of the node
+    byte_end::UInt32
+end
+RedTreeCursor(stream::ParseStream) = RedTreeCursor(
+    GreenTreeCursor(stream), stream.next_byte - UInt32(1))
+
+function prev_sibling_assumed(cursor::RedTreeCursor)
+    prevgreen = prev_sibling_assumed(cursor.green)
+    if prevgreen === nothing
+        return nothing
+    end
+    return RedTreeCursor(prevgreen, cursor.byte_end - span(cursor))
+end
+
+
+Base.reverse(node::RedTreeCursor) = Base.Iterators.Reverse(node)
+Base.IteratorSize(::Type{Reverse{RedTreeCursor}}) = Base.SizeUnknown()
+@inline function Base.iterate(node::Reverse{RedTreeCursor})::Union{Nothing, Tuple{RedTreeCursor, NTuple{3, UInt32}}}
+    r = iterate(Reverse(node.itr.green))
+    return _iterate_red_cursor(r, node.itr.byte_end)
+end
+
+@inline function Base.iterate(node::Reverse{RedTreeCursor}, state::NTuple{3, UInt32})::Union{Nothing, Tuple{RedTreeCursor, NTuple{3, UInt32}}}
+    r = iterate(Reverse(node.itr.green), Base.tail(state))
+    return _iterate_red_cursor(r, first(state))
+end
+
+@inline function _iterate_red_cursor(r, byte_end)
+    r === nothing && return nothing
+    next_node, next_idx = r
+    return RedTreeCursor(next_node, byte_end),
+           (byte_end - span(next_node), next_idx...)
+end
+
+is_leaf(node::RedTreeCursor)     = is_leaf(node.green)
+head(node::RedTreeCursor)        = head(node.green)
+span(node::RedTreeCursor)        = span(node.green)
+byte_range(node::RedTreeCursor)  = (node.byte_end - span(node.green) + UInt32(1)):node.byte_end
+treesize(node::RedTreeCursor)    = treesize(node.green)
+is_non_terminal(node::RedTreeCursor) = is_non_terminal(node.green)
+
+function Base.show(io::IO, node::RedTreeCursor)
+    print(io, node.green, " [", byte_range(node), "]")
+end
+
+has_toplevel_siblings(cursor::GreenTreeCursor) =
+    treesize(cursor)+1 != length(cursor.parser_output)-1
+has_toplevel_siblings(cursor::RedTreeCursor) =
+    has_toplevel_siblings(cursor.green)
+struct TopLevelSiblingIterator{C}
+    cursor::C
+end
+
+function reverse_toplevel_siblings(cursor::RedTreeCursor)
+    @assert cursor.green.position == length(cursor.green.parser_output)
+    TopLevelSiblingIterator(cursor)
+end
+
+function reverse_toplevel_siblings(cursor::GreenTreeCursor)
+    @assert cursor.position == length(cursor.parser_output)
+    TopLevelSiblingIterator(cursor)
+end
+
+function Base.iterate(tsi::TopLevelSiblingIterator)
+    return (tsi.cursor, tsi.cursor)
+end
+function Base.iterate(cursor::TopLevelSiblingIterator{C}, last::C) where {C}
+    this = prev_sibling_assumed(last)
+    this === nothing && return nothing
+    return (this, this)
+end
+
+# HACK: Force inlining of `filter` for our cursors to avoid significant perf
+# degradation.
+@inline function Base.iterate(f::Iterators.Filter{<:Any, Iterators.Reverse{T}}, state...) where {T<:Union{RedTreeCursor, GreenTreeCursor}}
+    y = iterate(f.itr, state...)
+    while y !== nothing
+        if f.flt(y[1])
+            return y
+        end
+        y = iterate(f.itr, y[2])
+    end
+    nothing
+end
+
+Base.in(child::GreenTreeCursor, parent::RedTreeCursor) =
+    in(child, parent.green)
diff --git a/JuliaSyntax/src/integration/expr.jl b/JuliaSyntax/src/integration/expr.jl
new file mode 100644
index 0000000000000..dafc8bb3fba49
--- /dev/null
+++ b/JuliaSyntax/src/integration/expr.jl
@@ -0,0 +1,692 @@
+#-------------------------------------------------------------------------------
+# Conversion to Base.Expr
+
+"""
+    @isexpr(ex, head)
+    @isexpr(ex, head, nargs)
+
+Type inference friendly replacement for `Meta.isexpr`.
+
+When using the pattern
+```julia
+if @isexpr(ex, headsym)
+    body
+end
+```
+Julia's type inference knows `ex isa Expr` inside `body`. But `Meta.isexpr`
+hides this information from the compiler, for whatever reason.
+"""
+macro isexpr(ex, head)
+    ex isa Symbol || error("First argument to `@isexpr` must be a variable name")
+    :($(esc(ex)) isa Expr && $(esc(ex)).head == $(esc(head)))
+end
+
+macro isexpr(ex, head, nargs)
+    ex isa Symbol || error("First argument to `@isexpr` must be a variable name")
+    :($(esc(ex)) isa Expr &&
+      $(esc(ex)).head == $(esc(head)) &&
+      length($(esc(ex)).args) == $(esc(nargs)))
+end
+
+function _reorder_parameters!(args::Vector{Any}, params_pos::Int)
+    p = 0
+    for i = length(args):-1:1
+        ai = args[i]
+        if !@isexpr(ai, :parameters)
+            break
+        end
+        p = i
+    end
+    if p == 0
+        return
+    end
+    # nest frankentuples parameters sections
+    for i = length(args)-1:-1:p
+        pushfirst!((args[i]::Expr).args, pop!(args))
+    end
+    # Move parameters to args[params_pos]
+    insert!(args, params_pos, pop!(args))
+end
+
+function _strip_parens(ex::Expr)
+    while true
+        if @isexpr(ex, :parens)
+            if length(ex.args) == 1
+                ex = ex.args[1]
+            else
+                # Only for error cases
+                return Expr(:block, ex.args...)
+            end
+        else
+            return ex
+        end
+    end
+end
+
+
+reverse_nontrivia_children(cursor::RedTreeCursor) = Iterators.filter(should_include_node, Iterators.reverse(cursor))
+reverse_nontrivia_children(cursor) = Iterators.filter(should_include_node, Iterators.reverse(children(cursor)))
+
+# Julia string literals in a `K"string"` node may be split into several chunks
+# interspersed with trivia in two situations:
+# 1. Triple quoted string indentation is trivia
+# 2. An \ before newline removes the newline and any following indentation
+#
+# This function concatenating adjacent string chunks together as done in the
+# reference parser.
+function _string_to_Expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32)
+    ret = Expr(:string)
+    args2 = Any[]
+    i = 1
+    it = reverse_nontrivia_children(cursor)
+    r = iterate(it)
+    while r !== nothing
+        (child, state) = r
+        ex = node_to_expr(child, source, txtbuf, txtbuf_offset)
+        if isa(ex, String)
+            # This branch combines consequent string chunks together.
+            # It's unrolled once to avoid unnecessary allocations.
+            r = iterate(it, state)
+            if r === nothing
+                pushfirst!(ret.args, ex)
+                continue
+            end
+            (child, state) = r
+            ex2 = node_to_expr(child, source, txtbuf, txtbuf_offset)
+            if !isa(ex2, String)
+                pushfirst!(ret.args, ex)
+                ex = ex2
+                # Fall through to process `ex` (!::String)
+            else
+                strings = String[ex2, ex]  # Note: reversed order since we're iterating backwards
+                r = iterate(it, state)
+                while r !== nothing
+                    (child, state) = r
+                    ex = node_to_expr(child, source, txtbuf, txtbuf_offset)
+                    isa(ex, String) || break
+                    pushfirst!(strings, ex)
+                    r = iterate(it, state)
+                end
+                buf = IOBuffer()
+                for s in strings
+                    write(buf, s)
+                end
+                pushfirst!(ret.args, String(take!(buf)))
+                r === nothing && break
+                # Fall through to process `ex` (!::String)
+            end
+        end
+        # ex not a string
+        if @isexpr(ex, :parens, 1)
+            ex = _strip_parens(ex)
+            if ex isa String
+                # Wrap interpolated literal strings in (string) so we can
+                # distinguish them from the surrounding text (issue #38501)
+                # Ie, "$("str")"  vs  "str"
+                # https://github.com/JuliaLang/julia/pull/38692
+                ex = Expr(:string, ex)
+            end
+        end
+        @assert ex !== nothing
+        pushfirst!(ret.args, ex)
+        r = iterate(it, state)
+    end
+
+    if length(ret.args) == 1 && ret.args[1] isa String
+        # If there's a single string remaining after joining, we unwrap
+        # to give a string literal.
+        #   """\n  a\n  b""" ==>  "a\nb"
+        return only(ret.args)
+    else
+        # This only happens when the kind is K"string" or when an error has occurred.
+        return ret
+    end
+end
+
+# Shared fixups for Expr children in cases where the type of the parent node
+# affects the child layout.
+function fixup_Expr_child(head::SyntaxHead, @nospecialize(arg), first::Bool)
+    isa(arg, Expr) || return arg
+    k = kind(head)
+    eq_to_kw_in_call = ((k == K"call" || k == K"dotcall") &&
+                        is_prefix_call(head)) || k == K"ref"
+    eq_to_kw_in_params = k != K"vect"   && k != K"curly" &&
+                         k != K"braces" && k != K"ref"
+    coalesce_dot = k in KSet"call dotcall curly" ||
+                   (k == K"quote" && has_flags(head, COLON_QUOTE))
+    was_parens = @isexpr(arg, :parens)
+    arg = _strip_parens(arg)
+    if @isexpr(arg, :(=)) && eq_to_kw_in_call && !first
+        arg = Expr(:kw, arg.args...)
+    elseif k != K"parens" && @isexpr(arg, :., 1) && arg.args[1] isa Tuple
+        # This undoes the "Hack" below"
+        h, a = arg.args[1]::Tuple{SyntaxHead,Any}
+        arg = ((!was_parens && coalesce_dot && first) ||
+                is_syntactic_operator(h)) ?
+            Symbol(".", a) : Expr(:., a)
+    elseif @isexpr(arg, :parameters) && eq_to_kw_in_params
+        pargs = arg.args
+        for j = 1:length(pargs)
+            pj = pargs[j]
+            if @isexpr(pj, :(=))
+                pargs[j] = Expr(:kw, pj.args...)
+            end
+        end
+    end
+    return arg
+end
+
+# Remove the `do` block from the final position in a function/macro call arg list
+function _extract_do_lambda!(args::Vector{Any})
+    if length(args) > 1 && Meta.isexpr(args[end], :do_lambda)
+        do_ex = pop!(args)::Expr
+        return Expr(:->, do_ex.args...)
+    else
+        return nothing
+    end
+end
+
+function _append_iterspec!(args::Vector{Any}, @nospecialize(ex))
+    if @isexpr(ex, :iteration)
+        for iter in ex.args::Vector{Any}
+            push!(args, Expr(:(=), iter.args...))
+        end
+    else
+        push!(args, ex)
+    end
+    return args
+end
+
+function parseargs!(retexpr::Expr, loc::LineNumberNode, cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32)
+    args = retexpr.args
+    firstchildhead = secondchildhead = head(cursor)
+    firstchildrange::UnitRange{UInt32} = byte_range(cursor)
+    itr = reverse_nontrivia_children(cursor)
+    r = iterate(itr)
+    while r !== nothing
+        (child, state) = r
+        r = iterate(itr, state)
+        expr = node_to_expr(child, source, txtbuf, txtbuf_offset)
+        @assert expr !== nothing
+        secondchildhead = firstchildhead
+        firstchildhead = head(child)
+        firstchildrange = byte_range(child)
+        pushfirst!(args, fixup_Expr_child(head(cursor), expr, r === nothing))
+    end
+    return (firstchildhead, secondchildhead, firstchildrange)
+end
+
+_expr_leaf_val(node::SyntaxNode, _...) = node.val
+_expr_leaf_val(cursor::RedTreeCursor, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) =
+    parse_julia_literal(txtbuf, head(cursor), byte_range(cursor) .+ txtbuf_offset)
+# Extended in JuliaLowering to support `node_to_expr(::SyntaxTree, ...)`
+
+# Convert `cursor` (SyntaxNode or RedTreeCursor) to an Expr
+# `source` is a SourceFile, or if node was an Expr originally, a LineNumberNode
+function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32=UInt32(0))
+    if !should_include_node(cursor)
+        return nothing
+    end
+
+    nodehead = head(cursor)
+    k = kind(cursor)
+    srcrange::UnitRange{UInt32} = byte_range(cursor)
+    if is_leaf(cursor)
+        if is_error(k)
+            return k == K"error" ?
+                Expr(:error) :
+                Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`")
+        elseif k == K"VERSION"
+            nv = numeric_flags(flags(nodehead))
+            return VersionNumber(1, nv ÷ 10, nv % 10)
+        else
+            scoped_val = _expr_leaf_val(cursor, txtbuf, txtbuf_offset)
+            val = @isexpr(scoped_val, :scope_layer) ? scoped_val.args[1] : scoped_val
+            if val isa Union{Int128,UInt128,BigInt}
+                # Ignore the values of large integers and convert them back to
+                # symbolic/textural form for compatibility with the Expr
+                # representation of these.
+                str = replace(source[srcrange], '_'=>"")
+                macname = val isa Int128  ? Symbol("@int128_str")  :
+                        val isa UInt128 ? Symbol("@uint128_str") :
+                        Symbol("@big_str")
+                return Expr(:macrocall, GlobalRef(Core, macname), nothing, str)
+            elseif is_identifier(k)
+                val2 = lower_identifier_name(val, k)
+                return @isexpr(scoped_val, :scope_layer) ?
+                    Expr(:scope_layer, val2, scoped_val.args[2]) : val2
+            else
+                return scoped_val
+            end
+        end
+    end
+
+    if k == K"string"
+        return _string_to_Expr(cursor, source, txtbuf, txtbuf_offset)
+    end
+
+    loc = source_location(LineNumberNode, source, first(srcrange))
+
+    if k == K"cmdstring"
+        return Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), loc,
+            _string_to_Expr(cursor, source, txtbuf, txtbuf_offset))
+    end
+
+    headstr = untokenize(nodehead, include_flag_suff=false)
+    headsym = !isnothing(headstr) ?
+              Symbol(headstr) :
+              error("Can't untokenize head of kind $(k)")
+    retexpr = Expr(headsym)
+
+    # Block gets special handling for extra line number nodes
+    if k == K"block" || (k == K"toplevel" && !has_flags(nodehead, TOPLEVEL_SEMICOLONS_FLAG))
+        args = retexpr.args
+        for child in reverse_nontrivia_children(cursor)
+            expr = node_to_expr(child, source, txtbuf, txtbuf_offset)
+            @assert expr !== nothing
+            # K"block" does not have special first-child handling, so we do not need to keep track of that here
+            pushfirst!(args, fixup_Expr_child(head(cursor), expr, false))
+            pushfirst!(args, source_location(LineNumberNode, source, first(byte_range(child))))
+        end
+        isempty(args) && push!(args, loc)
+        if k == K"block" && has_flags(nodehead, PARENS_FLAG)
+            popfirst!(args)
+        end
+        return retexpr
+    end
+
+    # Now recurse to parse all arguments
+    (firstchildhead, secondchildhead, firstchildrange) =
+        parseargs!(retexpr, loc, cursor, source, txtbuf, txtbuf_offset)
+
+    return _node_to_expr(retexpr, loc, srcrange,
+                         firstchildhead, secondchildhead, firstchildrange,
+                         nodehead, source)
+end
+
+function adjust_macro_name!(retexpr::Union{Expr, Symbol})
+    if retexpr isa Symbol
+        return lower_identifier_name(retexpr, K"macro_name")
+    else
+        retexpr::Expr
+        if length(retexpr.args) == 2 && retexpr.head == :(.)
+            arg2 = retexpr.args[2]
+            if isa(arg2, QuoteNode) && arg2.value isa Symbol
+                retexpr.args[2] = QuoteNode(lower_identifier_name(arg2.value, K"macro_name"))
+            end
+        end
+        return retexpr
+    end
+end
+
+# Split out from `node_to_expr` for codesize reasons, to avoid specialization on multiple
+# tree types.
+@noinline function _node_to_expr(retexpr::Expr, loc::LineNumberNode,
+                                 srcrange::UnitRange{UInt32},
+                                 firstchildhead::SyntaxHead, secondchildhead::SyntaxHead,
+                                 firstchildrange::UnitRange{UInt32},
+                                 nodehead::SyntaxHead,
+                                 source)
+    args = retexpr.args
+    k = kind(nodehead)
+    endloc = source_location(LineNumberNode, source, last(srcrange))
+    if (k == K"var" || k == K"char") && length(retexpr.args) == 1
+        # `var` and `char` nodes have a single argument which is the value.
+        # However, errors can add additional errors tokens which we represent
+        # as e.g. `Expr(:var, ..., Expr(:error))`.
+        return retexpr.args[1]
+    elseif k == K"macro_name"
+        return adjust_macro_name!(retexpr.args[1])
+    elseif k == K"?"
+        retexpr.head = :if
+    elseif k == K"op=" && length(args) == 3
+        lhs = args[1]
+        op = args[2]
+        rhs = args[3]
+        headstr = string(args[2], '=')
+        retexpr.head = Symbol(headstr)
+        retexpr.args = Any[lhs, rhs]
+    elseif k == K".op=" && length(args) == 3
+        lhs = args[1]
+        op = args[2]
+        rhs = args[3]
+        headstr = '.' * string(args[2], '=')
+        retexpr.head = Symbol(headstr)
+        retexpr.args = Any[lhs, rhs]
+    elseif k == K"macrocall"
+        if length(args) >= 2
+            a2 = args[2]
+            if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName"
+                # Fix up for custom cmd macros like foo`x`
+                args[2] = a2.args[3]
+            end
+            if kind(secondchildhead) == K"VERSION"
+                # Encode the syntax version into `loc` so that the argument order
+                # matches what ordinary macros expect.
+                loc = Core.MacroSource(loc, popat!(args, 2))
+            end
+        end
+        do_lambda = _extract_do_lambda!(args)
+        _reorder_parameters!(args, 2)
+        insert!(args, 2, loc)
+        if do_lambda isa Expr
+            return Expr(:do, retexpr, do_lambda)
+        end
+    elseif k == K"doc"
+        retexpr.head = :macrocall
+        retexpr.args = [GlobalRef(Core, Symbol("@doc")), loc, args...]
+    elseif k == K"dotcall" || k == K"call"
+        # Julia's standard `Expr` ASTs have children stored in a canonical
+        # order which is often not always source order. We permute the children
+        # here as necessary to get the canonical order.
+        if is_infix_op_call(nodehead) || is_postfix_op_call(nodehead)
+            args[2], args[1] = args[1], args[2]
+        end
+        # Lower (call x ') to special ' head
+        if is_postfix_op_call(nodehead) && args[1] == Symbol("'")
+            popfirst!(args)
+            retexpr.head = Symbol("'")
+        end
+        do_lambda = _extract_do_lambda!(args)
+        # Move parameters blocks to args[2]
+        _reorder_parameters!(args, 2)
+        if retexpr.head === :dotcall
+            funcname = args[1]
+            if is_prefix_call(nodehead)
+                retexpr.head = :.
+                retexpr.args = Any[funcname, Expr(:tuple, args[2:end]...)]
+            else
+                # operator calls
+                retexpr.head = :call
+                if funcname isa Symbol
+                    args[1] = Symbol(:., funcname)
+                end # else funcname could be an Expr(:error), just propagate it
+            end
+        end
+        if do_lambda isa Expr
+            return Expr(:do, retexpr, do_lambda)
+        end
+    elseif k == K"."
+        if length(args) == 2
+            a2 = args[2]
+            if !@isexpr(a2, :quote) && !(a2 isa QuoteNode)
+                args[2] = QuoteNode(a2)
+            end
+        elseif length(args) == 1
+            # Hack: Here we preserve the head of the operator to determine whether
+            # we need to coalesce it with the dot into a single symbol later on.
+            args[1] = (firstchildhead, args[1])
+        end
+    elseif k == K"ref" || k == K"curly"
+        # Move parameters blocks to args[2]
+        _reorder_parameters!(args, 2)
+    elseif k == K"for"
+        iters = _append_iterspec!([], args[1])
+        args[1] = length(iters) == 1 ? only(iters) : Expr(:block, iters...)
+        # Add extra line number node for the `end` of the block. This may seem
+        # useless but it affects code coverage.
+        push!(args[2].args, endloc)
+    elseif k == K"while"
+        # Line number node for the `end` of the block as in `for` loops.
+        push!(args[2].args, endloc)
+    elseif k in KSet"tuple vect braces"
+        # Move parameters blocks to args[1]
+        _reorder_parameters!(args, 1)
+    elseif k == K"where"
+        if length(args) == 2
+            a2 = args[2]
+            if @isexpr(a2, :braces)
+                a2a = a2.args
+                _reorder_parameters!(a2a, 2)
+                retexpr.args = Any[args[1], a2a...]
+            end
+        end
+    elseif k == K"catch"
+        if kind(firstchildhead) == K"Placeholder"
+            args[1] = false
+        end
+    elseif k == K"try"
+        # Try children in source order:
+        #   try_block catch_var catch_block else_block finally_block
+        # Expr ordering:
+        #   try_block catch_var catch_block [finally_block] [else_block]
+        try_ = args[1]
+        catch_var = false
+        catch_ = false
+        else_ = false
+        finally_ = false
+        for i in 2:length(args)
+            a = args[i]
+            if @isexpr(a, :catch)
+                catch_var = a.args[1]
+                catch_ = a.args[2]
+            elseif @isexpr(a, :else)
+                else_ = only(a.args)
+            elseif @isexpr(a, :finally)
+                finally_ = only(a.args)
+            elseif @isexpr(a, :error)
+                finally_ = Expr(:block, a) # Unclear where to put this but here will do?
+            else
+                @assert false "Illegal $a subclause in `try`"
+            end
+        end
+        empty!(args)
+        push!(args, try_, catch_var, catch_)
+        if finally_ !== false || else_ !== false
+            push!(args, finally_)
+            if else_ !== false
+                push!(args, else_)
+            end
+        end
+    elseif k == K"generator"
+        # Reconstruct the nested Expr form for generator from our flatter
+        # source-ordered `generator` format.
+        gen = args[1]
+        for j = length(args):-1:2
+            gen = Expr(:generator, gen)
+            _append_iterspec!(gen.args, args[j])
+            if j < length(args)
+                # Additional `for`s flatten the inner generator
+                gen = Expr(:flatten, gen)
+            end
+        end
+        return gen
+    elseif k == K"filter"
+        @assert length(args) == 2
+        retexpr.args = _append_iterspec!(Any[args[2]], args[1])
+    elseif k == K"nrow" || k == K"ncat"
+        # For lack of a better place, the dimension argument to nrow/ncat
+        # is stored in the flags
+        pushfirst!(args, numeric_flags(flags(nodehead)))
+    elseif k == K"typed_ncat"
+        insert!(args, 2, numeric_flags(flags(nodehead)))
+    elseif k == K"elseif"
+        # Block for conditional's source location
+        args[1] = Expr(:block, loc, args[1])
+    elseif k == K"->"
+        a1 = args[1]
+        if @isexpr(a1, :tuple)
+            # TODO: This makes the Expr form objectively worse for the sake of
+            # compatibility. We should consider deleting this special case in
+            # the future as a minor change.
+            if length(a1.args) == 1 &&
+                    (!has_flags(firstchildhead, PARENS_FLAG) ||
+                     !has_flags(firstchildhead, TRAILING_COMMA_FLAG)) &&
+                    !Meta.isexpr(a1.args[1], :parameters)
+                # `(a) -> c` is parsed without tuple on lhs in Expr form
+                args[1] = a1.args[1]
+            elseif length(a1.args) == 2 && (a11 = a1.args[1]; @isexpr(a11, :parameters) &&
+                                            length(a11.args) <= 1 && !Meta.isexpr(a1.args[2], :(...)))
+                # `(a; b=1) -> c`  parses args as `block` in Expr form :-(
+                if length(a11.args) == 0
+                    args[1] = Expr(:block, a1.args[2])
+                else
+                    a111 = only(a11.args)
+                    assgn = @isexpr(a111, :kw) ? Expr(:(=), a111.args...) : a111
+                    argloc = source_location(LineNumberNode, source, last(firstchildrange))
+                    args[1] = Expr(:block, a1.args[2], argloc, assgn)
+                end
+            end
+        end
+        a2 = args[2]
+        # Add function source location to rhs; add block if necessary
+        if @isexpr(a2, :block)
+            pushfirst!(a2.args, loc)
+        else
+            args[2] = Expr(:block, loc, args[2])
+        end
+    elseif k == K"function"
+        if length(args) > 1
+            if has_flags(nodehead, SHORT_FORM_FUNCTION_FLAG)
+                a1 = args[1]
+                a2 = args[2]
+                if !@isexpr(a2, :block) && !@isexpr(a1, Symbol("'"))
+                    args[2] = Expr(:block, a2)
+                end
+                retexpr.head = :(=)
+            else
+                a1 = args[1]
+                if @isexpr(a1, :tuple)
+                    # Convert to weird Expr forms for long-form anonymous functions.
+                    #
+                    # (function (tuple (... xs)) body) ==> (function (... xs) body)
+                    if length(a1.args) == 1 && (a11 = a1.args[1]; @isexpr(a11, :...))
+                        # function (xs...) \n body end
+                        args[1] = a11
+                    end
+                end
+            end
+            arg2 = args[2]
+            # Only push if this is an Expr - could be an ErrorVal
+            isa(arg2, Expr) && pushfirst!(arg2.args, loc)
+        end
+    elseif k == K"macro"
+        if length(args) > 1
+            pushfirst!((args[2]::Expr).args, loc)
+        end
+    elseif k == K"module"
+        insert!(args, kind(firstchildhead) == K"VERSION" ? 2 : 1, !has_flags(nodehead, BARE_MODULE_FLAG))
+        pushfirst!((args[end]::Expr).args, loc)
+    elseif k == K"quote"
+        if length(args) == 1
+            a1 = only(args)
+            if !(a1 isa Expr || a1 isa QuoteNode || a1 isa Bool)
+                # Flisp parser does an optimization here: simple values are stored
+                # as inert QuoteNode rather than in `Expr(:quote)` quasiquote
+                return QuoteNode(a1)
+            end
+        end
+    elseif k == K"do"
+        # Temporary head which is picked up by _extract_do_lambda
+        retexpr.head = :do_lambda
+    elseif k == K"let"
+        a1 = args[1]
+        if @isexpr(a1, :block)
+            a1a = (args[1]::Expr).args
+            filter!(a -> !(a isa LineNumberNode), a1a)
+            # Ugly logic to strip the Expr(:block) in certain cases for compatibility
+            if length(a1a) == 1
+                a = a1a[1]
+                if a isa Symbol || @isexpr(a, :(=)) || @isexpr(a, :(::))
+                    args[1] = a
+                end
+            end
+        end
+    elseif k == K"local" || k === K"global"
+        if length(args) == 1
+            a1 = args[1]
+            if @isexpr(a1, :const)
+                # Normalize `local const` to `const local`
+                args[1] = Expr(retexpr.head, (a1::Expr).args...)
+                retexpr.head = :const
+            elseif @isexpr(a1, :tuple)
+                # Normalize `global (x, y)` to `global x, y`
+                retexpr.args = a1.args
+            end
+        end
+    elseif k == K"return" && isempty(args)
+        push!(args, nothing)
+    elseif k == K"juxtapose"
+        retexpr.head = :call
+        pushfirst!(args, :*)
+    elseif k == K"struct"
+        @assert args[2].head == :block
+        orig_fields = args[2].args
+        fields = Expr(:block)
+        for field in orig_fields
+            if @isexpr(field, :macrocall) && field.args[1] == GlobalRef(Core, Symbol("@doc"))
+                # @doc macro calls don't occur within structs, in Expr form.
+                push!(fields.args, field.args[3])
+                push!(fields.args, field.args[4])
+            else
+                push!(fields.args, field)
+            end
+        end
+        args[2] = fields
+        pushfirst!(args, has_flags(nodehead, MUTABLE_FLAG))
+    elseif k == K"importpath"
+        retexpr.head = :.
+        for i = 1:length(args)
+            ai = args[i]
+            if ai isa QuoteNode
+                # Permit nonsense additional quoting such as
+                # import A.(:b).:c
+                args[i] = ai.value
+            end
+        end
+    elseif k == K"wrapper"
+        # This should only happen for errors wrapped next to what should have
+        # been single statements or atoms - represent these as blocks.
+        retexpr.head = :block
+    elseif k == K"comparison"
+        for i = 2:2:length(args)
+            arg = args[i]
+            if @isexpr(arg, :., 1)
+                args[i] = Symbol(".", arg.args[1])
+            end
+        end
+    elseif k == K"meta"
+        # Expr uses plain identifiers, but JuliaSyntax uses quoted (Symbol) identifiers
+        for (i, a) in enumerate(args)
+            if a isa QuoteNode && a.value isa Symbol
+                args[i] = a.value
+            end
+        end
+    end
+
+    return retexpr
+end
+
+function build_tree(::Type{Expr}, stream::ParseStream;
+                    filename=nothing, first_line=1,
+                    # unused, but required since `_parse` is written generic
+                    keep_parens=false)
+    source = SourceFile(stream, filename=filename, first_line=first_line)
+    return build_tree(Expr, stream, source)
+end
+
+function build_tree(::Type{Expr}, stream::ParseStream, source::SourceFile)
+    txtbuf = unsafe_textbuf(stream)
+    cursor = RedTreeCursor(stream)
+    wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS)
+    if has_toplevel_siblings(cursor)
+        entry = Expr(:block)
+        for child in
+                Iterators.filter(should_include_node, reverse_toplevel_siblings(cursor))
+            pushfirst!(entry.args, fixup_Expr_child(wrapper_head, node_to_expr(child, source, txtbuf), false))
+        end
+        length(entry.args) == 1 && (entry = only(entry.args))
+    else
+        entry = fixup_Expr_child(wrapper_head, node_to_expr(cursor, source, txtbuf), false)
+    end
+    return entry
+end
+
+function to_expr(node)
+    source = sourcefile(node)
+    txtbuf_offset, txtbuf = _unsafe_wrap_substring(sourcetext(source))
+    wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS)
+    return fixup_Expr_child(wrapper_head, node_to_expr(node, source, txtbuf, UInt32(txtbuf_offset)), false)
+end
+
+Base.Expr(node::SyntaxNode) = to_expr(node)
diff --git a/JuliaSyntax/src/integration/hooks.jl b/JuliaSyntax/src/integration/hooks.jl
new file mode 100644
index 0000000000000..7d5f1781af877
--- /dev/null
+++ b/JuliaSyntax/src/integration/hooks.jl
@@ -0,0 +1,430 @@
+# This file provides an adaptor to match the API expected by the Julia runtime
+# code in the binding Core._parse
+
+const _has_v1_6_hooks  = VERSION >= v"1.6"
+const _has_v1_10_hooks = isdefined(Core, :_setparser!)
+
+struct ErrorSpec
+    child_idx::Int
+    node::RedTreeCursor
+    parent_kind::Kind
+end
+
+function first_error_cursor(stream::ParseStream)
+    output = stream.output
+    for i = 2:length(output)
+        is_error(output[i]) && return GreenTreeCursor(output, i)
+    end
+end
+
+# Find the first error in a SyntaxNode tree, returning the index of the error
+# within its parent and the node itself.
+function first_tree_error(c::RedTreeCursor, error_cursor::GreenTreeCursor)
+    @assert !is_leaf(c) && !is_error(c)
+    first_child = first_error = nothing
+    it = reverse_nontrivia_children(c)
+    r = iterate(it)
+    local child
+    while r !== nothing
+        (child, state) = r
+        r = iterate(it, state)
+        (error_cursor in child || error_cursor == child.green) || continue
+        is_error(child) && break
+        return first_tree_error(child, error_cursor)
+    end
+    i = 1 # count node index
+    while r !== nothing
+        i += 1
+        (_, state) = r
+        r = iterate(it, state)
+    end
+    return ErrorSpec(i, child, kind(c))
+end
+
+function first_tree_error(stream::ParseStream)
+    c = RedTreeCursor(stream)
+    err = first_error_cursor(stream)
+    for c in reverse_toplevel_siblings(c)
+        is_error(c) && return ErrorSpec(0, c, K"wrapper")
+        is_leaf(c) && continue
+        return first_tree_error(c, err)
+    end
+end
+
+# Classify an incomplete expression, returning a Symbol compatible with
+# Base.incomplete_tag().
+#
+# Roughly, the intention here is to classify which expression head is expected
+# next if the incomplete stream was to continue. (Though this is just rough. In
+# practice several categories are combined for the purposes of the REPL -
+# perhaps we can/should do something more precise in the future.)
+function _incomplete_tag(theerror::ErrorSpec, codelen)
+    i = theerror.child_idx
+    c = theerror.node
+    kp = theerror.parent_kind
+    if isnothing(c) || last_byte(c) < codelen || codelen == 0
+        if kind(c) == K"ErrorEofMultiComment"
+            # This is the one weird case where the token itself is an
+            # incomplete error
+            return :comment
+        else
+            return :none
+        end
+    elseif first_byte(c) <= codelen && kind(c) != K"ErrorInvalidEscapeSequence"
+        # "ErrorInvalidEscapeSequence" may be incomplete, so we don't include it
+        # here as a hard error.
+        return :none
+    end
+    if kind(c) == K"error" && is_non_terminal(c)
+        for cc in reverse_nontrivia_children(c)
+            if kind(cc) == K"error"
+                return :other
+            end
+        end
+    end
+    if kp == K"wrapper"
+        return :other
+    elseif kp == K"string" || kp == K"var"
+        return :string
+    elseif kp == K"cmdstring"
+        return :cmd
+    elseif kp == K"char"
+        return :char
+    elseif kp in KSet"block quote let try"
+        return :block
+    elseif kp in KSet"for while function if"
+        return i == 1 ? :other : :block
+    elseif kp in KSet"module struct"
+        return i == 1 ? :other : :block
+    elseif kp == K"do"
+        return i < 3  ? :other : :block
+    else
+        return :other
+    end
+end
+
+#-------------------------------------------------------------------------------
+function _set_core_parse_hook(parser)
+    @static if _has_v1_10_hooks
+        Core._setparser!(parser)
+    else
+        # HACK! Fool the runtime into allowing us to set Core._parse, even during
+        # incremental compilation. (Ideally we'd just arrange for Core._parse to be
+        # set to the JuliaSyntax parser. But how do we signal that to the dumping
+        # code outside of the initial creation of Core?)
+        i = Base.fieldindex(Base.JLOptions, :incremental)
+        ptr = convert(Ptr{fieldtype(Base.JLOptions, i)},
+                      cglobal(:jl_options, Base.JLOptions) + fieldoffset(Base.JLOptions, i))
+        incremental = unsafe_load(ptr)
+        if incremental != 0
+            unsafe_store!(ptr, 0)
+        end
+
+        Base.eval(Core, :(_parse = $parser))
+
+        if incremental != 0
+            unsafe_store!(ptr, incremental)
+        end
+    end
+end
+
+
+# Wrap the function `f` so that it's always invoked in the given `world_age`
+#
+# NB: We need an inference barrier of one type or another here to prevent
+# invalidations. The invokes provide this currently.
+function fix_world_age(f, world_age::UInt)
+    if world_age == typemax(UInt)
+        function invoke_latestworld(args...; kws...)
+            Base.invokelatest(f, args...; kws...)
+        end
+    else
+        function invoke_fixedworld(args...; kws...)
+            Base.invoke_in_world(world_age, f, args...; kws...)
+        end
+    end
+end
+
+function _has_nested_error(ex)
+    if ex isa Expr
+        if ex.head == :error
+            return true
+        else
+            return any(_has_nested_error(e) for e in ex.args)
+        end
+    elseif ex isa QuoteNode
+        return _has_nested_error(ex.value)
+    else
+        return false
+    end
+end
+
+# Debug log file for dumping parsed code
+const _debug_log = Ref{Union{Nothing,IO}}(nothing)
+
+function core_parser_hook(code, filename::String, lineno::Int, offset::Int, options::Symbol; syntax_version = v"1.13")
+    try
+        # TODO: Check that we do all this input wrangling without copying the
+        # code buffer
+        if code isa Core.SimpleVector
+            # The C entry points will pass us this form.
+            (ptr,len) = code
+            code = String(unsafe_wrap(Array, ptr, len))
+        elseif !(code isa String || code isa SubString || code isa Vector{UInt8})
+            # For non-Base string types, convert to UTF-8 encoding, using an
+            # invokelatest to avoid world age issues.
+            code = Base.invokelatest(String, code)
+        end
+        if !isnothing(_debug_log[])
+            print(_debug_log[], """
+                  #-#-#-------------------------------
+                  # ENTER filename=$filename, lineno=$lineno, offset=$offset, options=$options"
+                  #-#-#-------------------------------
+                  """)
+            write(_debug_log[], code)
+        end
+
+        stream = ParseStream(code, offset+1; version = syntax_version)
+        if options === :statement || options === :atom
+            # To copy the flisp parser driver:
+            # * Parsing atoms      consumes leading trivia
+            # * Parsing statements consumes leading+trailing trivia
+            bump_trivia(stream)
+            if peek(stream) == K"EndMarker"
+                # If we're at the end of stream after skipping whitespace, just
+                # return `nothing` to indicate this rather than attempting to
+                # parse a statement or atom and failing.
+                return Core.svec(nothing, last_byte(stream))
+            end
+        end
+        parse!(stream; rule=options)
+        if options === :statement
+            bump_trivia(stream; skip_newlines=false)
+            if peek(stream) == K"NewlineWs"
+                bump(stream)
+            end
+        end
+
+        if any_error(stream)
+            pos_before_comments = last_non_whitespace_byte(stream)
+            errspec = first_tree_error(stream)
+            tag = _incomplete_tag(errspec, pos_before_comments)
+            if _has_v1_10_hooks
+                exc = ParseError(stream, filename=filename, first_line=lineno,
+                                 incomplete_tag=tag)
+                msg = sprint(showerror, exc)
+                error_ex = Expr(tag === :none ? :error : :incomplete,
+                                Meta.ParseError(msg, exc))
+            elseif tag !== :none
+                # Hack: For older Julia versions, replicate the messages which
+                # Base.incomplete_tag() will match
+                msg =
+                    tag === :string  ? "incomplete: invalid string syntax"     :
+                    tag === :comment ? "incomplete: unterminated multi-line comment #= ... =#" :
+                    tag === :block   ? "incomplete: construct requires end"    :
+                    tag === :cmd     ? "incomplete: invalid \"`\" syntax"      :
+                    tag === :char    ? "incomplete: invalid character literal" :
+                                       "incomplete: premature end of input"
+                error_ex = Expr(:incomplete, msg)
+            else
+                # In the flisp parser errors are normally `Expr(:error, msg)` where
+                # `msg` is a String. By using a JuliaSyntax.ParseError for msg
+                # we can do fancy error reporting instead.
+                error_ex = Expr(:error, ParseError(stream, filename=filename, first_line=lineno))
+            end
+            ex = if options === :all
+                # When encountering a toplevel error, the reference parser
+                # * truncates the top level expression arg list before that error
+                # * includes the last line number
+                # * appends the error message
+                source = SourceFile(stream, filename=filename, first_line=lineno)
+                topex = build_tree(Expr, stream, source)
+                @assert topex.head == :toplevel
+                i = findfirst(_has_nested_error, topex.args)
+                if i > 1 && topex.args[i-1] isa LineNumberNode
+                    i -= 1
+                end
+                resize!(topex.args, i-1)
+                push!(topex.args, LineNumberNode(source_line(source, first_byte(errspec.node)), filename))
+                push!(topex.args, error_ex)
+                topex
+            else
+                error_ex
+            end
+        else
+            # TODO: Figure out a way to show warnings. Meta.parse() has no API
+            # to communicate this, and we also can't show them to stdout as
+            # this is too side-effectful and can result in double-reporting in
+            # the REPL.
+            #
+            # show_diagnostics(stdout, stream.diagnostics, code)
+            #
+            ex = build_tree(Expr, stream; filename=filename, first_line=lineno)
+        end
+
+        # Note the next byte in 1-based indexing is `last_byte(stream) + 1` but
+        # the Core hook must return an offset (ie, it's 0-based) so the factors
+        # of one cancel here.
+        last_offset = last_byte(stream)
+
+        if !isnothing(_debug_log[])
+            println(_debug_log[], """
+                    #-#-#-
+                    # EXIT last_offset=$last_offset
+                    #-#-#-
+                    """)
+            flush(_debug_log[])
+        end
+
+        # Rewrap result in an svec for use by the C code
+        return Core.svec(ex, last_offset)
+    catch exc
+        if !isnothing(_debug_log[])
+            println(_debug_log[], """
+                    #-#-#-
+                    # ERROR EXIT
+                    # $exc
+                    #-#-#-
+                    """)
+            flush(_debug_log[])
+        end
+        @error("""JuliaSyntax parser failed — falling back to flisp!
+                  This is not your fault. Please submit a bug report to https://github.com/JuliaLang/JuliaSyntax.jl/issues""",
+               exception=(exc,catch_backtrace()),
+               offset=offset,
+               code=code)
+
+        _fl_parse_hook(code, filename, lineno, offset, options)
+    end
+end
+
+# Core._parse gained a `lineno` argument in
+# https://github.com/JuliaLang/julia/pull/43876
+# Prior to this, the following signature was needed:
+function core_parser_hook(code, filename, offset, options)
+    core_parser_hook(code, filename, 1, offset, options)
+end
+
+if _has_v1_10_hooks
+    Base.incomplete_tag(e::JuliaSyntax.ParseError) = e.incomplete_tag
+else
+    # Hack: Meta.parse() attempts to construct a ParseError from a string if it
+    # receives `Expr(:error)`. Add an override to the ParseError constructor to
+    # prevent this.
+    Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e
+end
+
+_default_system_parser = _has_v1_6_hooks ? Core._parse : nothing
+
+# hook into InteractiveUtils.@activate
+activate!(enable=true) = enable_in_core!(enable)
+
+"""
+    enable_in_core!([enable=true; freeze_world_age=true, debug_filename=nothing])
+
+Connect the JuliaSyntax parser to the Julia runtime so that it replaces the
+flisp parser for all parsing work. That is, JuliaSyntax will be used for
+`include()`, `Meta.parse()`, the REPL, etc. To reset to the reference parser,
+use `enable_in_core!(false)`.
+
+Keyword arguments:
+* `freeze_world_age` - Use a fixed world age for the parser to prevent
+  recompilation of the parser due to any user-defined methods (default `true`).
+* `debug_filename` - File name of parser debug log (defaults to `nothing` or
+  the value of `ENV["JULIA_SYNTAX_DEBUG_FILE"]`).
+"""
+function enable_in_core!(enable=true; freeze_world_age = true,
+        debug_filename   = get(ENV, "JULIA_SYNTAX_DEBUG_FILE", nothing))
+    if !_has_v1_6_hooks
+        error("Cannot use JuliaSyntax as the main Julia parser in Julia version $VERSION < 1.6")
+    end
+    if enable && !isnothing(debug_filename)
+        _debug_log[] = open(debug_filename, "w")
+    elseif !enable && !isnothing(_debug_log[])
+        close(_debug_log[])
+        _debug_log[] = nothing
+    end
+    if enable
+        world_age = freeze_world_age ? Base.get_world_counter() : typemax(UInt)
+        _set_core_parse_hook(fix_world_age(core_parser_hook, world_age))
+    else
+        @assert !isnothing(_default_system_parser)
+        _set_core_parse_hook(_default_system_parser)
+    end
+    nothing
+end
+
+
+#-------------------------------------------------------------------------------
+# Tools to call the reference flisp parser
+#
+# Call the flisp parser
+function _fl_parse_hook(code, filename, lineno, offset, options)
+    @static if VERSION >= v"1.8.0-DEV.1370" # https://github.com/JuliaLang/julia/pull/43876
+        return Core.Compiler.fl_parse(code, filename, lineno, offset, options)
+    elseif _has_v1_6_hooks
+        return Core.Compiler.fl_parse(code, filename, offset, options)
+    else
+        if options === :all
+            ex = Base.parse_input_line(String(code), filename=filename, depwarn=false)
+            if !@isexpr(ex, :toplevel)
+                ex = Expr(:toplevel, ex)
+            end
+            return ex, sizeof(code)
+        elseif options === :statement || options === :atom
+            ex, pos = Meta.parse(code, offset+1, greedy=options==:statement, raise=false)
+            return ex, pos-1
+        else
+            error("Unknown parse options $options")
+        end
+    end
+end
+
+#------------------------------------------------
+# Copy of the Meta.parse() API, but ensuring that we call the flisp parser
+# rather than using Meta.parse() which may be using the JuliaSyntax parser.
+
+"""
+Like Meta.parse() but always call the flisp reference parser.
+"""
+function fl_parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true)
+    ex, pos = fl_parse(str, 1, greedy=true, raise=raise, depwarn=depwarn)
+    if isa(ex,Expr) && ex.head === :error
+        return ex
+    end
+    if pos <= ncodeunits(str)
+        raise && throw(Meta.ParseError("extra token after end of expression"))
+        return Expr(:error, "extra token after end of expression")
+    end
+    return ex
+end
+
+function fl_parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true,
+                  depwarn::Bool=true)
+    ex, pos = _fl_parse_string(str, "none", 1, pos, greedy ? :statement : :atom)
+    if raise && isa(ex,Expr) && ex.head === :error
+        throw(Meta.ParseError(ex.args[1]))
+    end
+    return ex, pos
+end
+
+"""
+Like Meta.parseall() but always call the flisp reference parser.
+"""
+function fl_parseall(text::AbstractString; filename="none", lineno=1)
+    ex,_ = _fl_parse_string(text, String(filename), lineno, 1, :all)
+    return ex
+end
+
+function _fl_parse_string(text::AbstractString, filename::AbstractString,
+                          lineno::Integer, index::Integer, options)
+    if index < 1 || index > ncodeunits(text) + 1
+        throw(BoundsError(text, index))
+    end
+    ex, offset::Int = _fl_parse_hook(text, filename, lineno, index-1, options)
+    ex, offset+1
+end
+
+# Convenience functions to mirror `JuliaSyntax.parsestmt(Expr, ...)` in simple cases.
+fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...)
+fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...)
diff --git a/JuliaSyntax/src/julia/julia_parse_stream.jl b/JuliaSyntax/src/julia/julia_parse_stream.jl
new file mode 100644
index 0000000000000..87ad038699a77
--- /dev/null
+++ b/JuliaSyntax/src/julia/julia_parse_stream.jl
@@ -0,0 +1,331 @@
+# Token flags - may be set for operator kinded tokens
+# Operator has a suffix
+const SUFFIXED_FLAG = RawFlags(1<<2)
+
+# Set for K"call", K"dotcall" or any syntactic operator heads
+# Distinguish various syntaxes which are mapped to K"call"
+const PREFIX_CALL_FLAG = RawFlags(0<<3)
+const INFIX_FLAG       = RawFlags(1<<3)
+const PREFIX_OP_FLAG   = RawFlags(2<<3)
+const POSTFIX_OP_FLAG  = RawFlags(3<<3)
+
+# The following flags are quite head-specific and may overlap with numeric flags
+
+"""
+Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ```
+"""
+const TRIPLE_STRING_FLAG = RawFlags(1<<8)
+
+"""
+Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping
+"""
+const RAW_STRING_FLAG = RawFlags(1<<9)
+
+"""
+Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses
+"""
+const PARENS_FLAG = RawFlags(1<<8)
+
+"""
+Set for various delimited constructs when they contains a trailing comma. For
+example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where
+this applies are: `tuple call dotcall macrocall vect curly braces <: >:`.
+"""
+const TRAILING_COMMA_FLAG = RawFlags(1<<9)
+
+"""
+Set for K"quote" for the short form `:x` as opposed to long form `quote x end`
+"""
+const COLON_QUOTE = RawFlags(1<<8)
+
+"""
+Set for K"toplevel" which is delimited by parentheses
+"""
+const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<8)
+
+"""
+Set for K"function" in short form definitions such as `f() = 1`
+"""
+const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<8)
+
+"""
+Set for K"struct" when mutable
+"""
+const MUTABLE_FLAG = RawFlags(1<<8)
+
+"""
+Set for K"module" when it's not bare (`module`, not `baremodule`)
+"""
+const BARE_MODULE_FLAG = RawFlags(1<<8)
+
+# Flags holding the dimension of an nrow or other UInt8 not held in the source
+# TODO: Given this is only used for nrow/ncat, we could actually use all the flags?
+const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8)
+
+function set_numeric_flags(n::Integer)
+    f = RawFlags((n << 8) & NUMERIC_FLAGS)
+    if numeric_flags(f) != n
+        error("Numeric flags unable to hold large integer $n")
+    end
+    f
+end
+
+function call_type_flags(f::RawFlags)
+    f & 0b11000
+end
+
+function numeric_flags(f::RawFlags)
+    Int((f >> 8) % UInt8)
+end
+
+flags(tok::SyntaxToken) = remove_flags(flags(tok.head), NUMERIC_FLAGS)
+
+"""
+    is_prefix_call(x)
+
+Return true for normal prefix function call syntax such as the `f` call node
+parsed from `f(x)`.
+"""
+is_prefix_call(x)     = call_type_flags(x) == PREFIX_CALL_FLAG
+
+"""
+    is_infix_op_call(x)
+
+Return true for infix operator calls such as the `+` call node parsed from
+`x + y`.
+"""
+is_infix_op_call(x)   = call_type_flags(x) == INFIX_FLAG
+
+"""
+    is_prefix_op_call(x)
+
+Return true for prefix operator calls such as the `+` call node parsed from `+x`.
+"""
+is_prefix_op_call(x)  = call_type_flags(x) == PREFIX_OP_FLAG
+
+"""
+    is_postfix_op_call(x)
+
+Return true for postfix operator calls such as the `'ᵀ` call node parsed from `x'ᵀ`.
+"""
+is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG
+
+
+"""
+    is_suffixed(x)
+
+Return true for operators which have suffixes, such as `+₁`
+"""
+is_suffixed(x) = has_flags(x, SUFFIXED_FLAG)
+
+
+"""
+    numeric_flags(x)
+
+Return the number attached to a `SyntaxHead`. This is only for kinds `K"nrow"`
+and `K"ncat"`, for now.
+"""
+numeric_flags(x) = numeric_flags(flags(x))
+
+function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
+    str = (is_error(kind(head)) ? untokenize(kind(head); unique=false) :
+           untokenize(kind(head); unique=unique))::String
+    if include_flag_suff
+        is_trivia(head)  && (str = str*"-t")
+        is_infix_op_call(head)   && (str = str*"-i")
+        is_prefix_op_call(head)  && (str = str*"-pre")
+        is_postfix_op_call(head) && (str = str*"-post")
+
+        k = kind(head)
+        # Handle numeric flags for nrow/ncat nodes
+        if k in KSet"nrow ncat typed_ncat"
+            n = numeric_flags(head)
+            n != 0 && (str = str*"-"*string(n))
+        else
+            # Handle head-specific flags that overlap with numeric flags
+            if k in KSet"string cmdstring Identifier"
+                has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s")
+                has_flags(head, RAW_STRING_FLAG) && (str = str*"-r")
+            elseif k in KSet"tuple block macrocall"
+                has_flags(head, PARENS_FLAG) && (str = str*"-p")
+            elseif k == K"quote"
+                has_flags(head, COLON_QUOTE) && (str = str*"-:")
+            elseif k == K"toplevel"
+                has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;")
+            elseif k == K"function"
+                has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=")
+            elseif k == K"struct"
+                has_flags(head, MUTABLE_FLAG) && (str = str*"-mut")
+            elseif k == K"module"
+                has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare")
+            end
+            if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" &&
+                    has_flags(head, TRAILING_COMMA_FLAG)
+                str *= "-,"
+            end
+        end
+        is_suffixed(head) && (str = str*"-suf")
+    end
+    str
+end
+
+
+#-------------------------------------------------------------------------------
+# ParseStream Post-processing
+
+function validate_tokens(stream::ParseStream)
+    txtbuf = unsafe_textbuf(stream)
+    charbuf = IOBuffer()
+
+    # Process terminal nodes in the output
+    fbyte = stream.output[1].byte_span+1  # Start after sentinel
+    for i = 2:length(stream.output)
+        node = stream.output[i]
+        if !is_terminal(node) || kind(node) == K"TOMBSTONE"
+            continue
+        end
+
+        k = kind(node)
+        nbyte = fbyte + node.byte_span
+        tokrange = fbyte:nbyte-1
+        error_kind = K"None"
+
+        if k in KSet"Integer BinInt OctInt HexInt"
+            # The following shouldn't be able to error...
+            # parse_int_literal
+            # parse_uint_literal
+        elseif k == K"Float" || k == K"Float32"
+            underflow0 = false
+            if k == K"Float"
+                x, code = parse_float_literal(Float64, txtbuf, fbyte, nbyte)
+                # jl_strtod_c can return "underflow" even for valid cases such
+                # as `5e-324` where the source is an exact representation of
+                # `x`. So only warn when underflowing to zero.
+                underflow0 = code === :underflow && x == 0
+            else
+                x, code = parse_float_literal(Float32, txtbuf, fbyte, nbyte)
+                underflow0 = code === :underflow && x == 0
+            end
+            if code === :ok
+                # pass
+            elseif code === :overflow
+                emit_diagnostic(stream, tokrange,
+                                error="overflow in floating point literal")
+                error_kind = K"ErrorNumericOverflow"
+            elseif underflow0
+                emit_diagnostic(stream, tokrange,
+                                warning="underflow to zero in floating point literal")
+            end
+        elseif k == K"Char"
+            @assert fbyte < nbyte # Already handled in the parser
+            truncate(charbuf, 0)
+            had_error = unescape_julia_string(charbuf, txtbuf, fbyte,
+                                              nbyte, stream.diagnostics)
+            if had_error
+                error_kind = K"ErrorInvalidEscapeSequence"
+            else
+                seek(charbuf,0)
+                read(charbuf, Char)
+                if !eof(charbuf)
+                    error_kind = K"ErrorOverLongCharacter"
+                    emit_diagnostic(stream, tokrange,
+                                    error="character literal contains multiple characters")
+                end
+            end
+        elseif k == K"String" && !has_flags(node, RAW_STRING_FLAG)
+            had_error = unescape_julia_string(devnull, txtbuf, fbyte,
+                                              nbyte, stream.diagnostics)
+            if had_error
+                error_kind = K"ErrorInvalidEscapeSequence"
+            end
+        elseif is_error(k) && k != K"error"
+            # Emit messages for non-generic token errors
+            tokstr = String(txtbuf[tokrange])
+            msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart"
+                "$(_token_error_descriptions[k]) $(repr(tokstr[1]))"
+            elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting"
+                "$(_token_error_descriptions[k]) $(repr(tokstr))"
+            else
+                _token_error_descriptions[k]
+            end
+            emit_diagnostic(stream, tokrange, error=msg)
+        end
+
+        if error_kind != K"None"
+            # Update the node with new error kind
+            stream.output[i] = RawGreenNode(SyntaxHead(error_kind, EMPTY_FLAGS),
+                                          node.byte_span, node.orig_kind)
+        end
+
+        fbyte = nbyte
+    end
+    sort!(stream.diagnostics, by=first_byte)
+end
+
+"""
+    bump_split(stream, token_spec1, [token_spec2 ...])
+
+Bump the next token, splitting it into several pieces
+
+Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`.
+If all `nbyte` are positive, the sum must equal the token length. If one
+`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of
+all `nbyte` must equal zero.
+
+This is a hack which helps resolves the occasional lexing ambiguity. For
+example
+* Whether .+ should be a single token or the composite (. +) which is used for
+  standalone operators.
+* Whether ... is splatting (most of the time) or three . tokens in import paths
+
+TODO: Are these the only cases?  Can we replace this general utility with a
+simpler one which only splits preceding dots?
+"""
+function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N}
+    tok = stream.lookahead[stream.lookahead_index]
+    stream.lookahead_index += 1
+    start_b = _next_byte(stream)
+    toklen = tok.next_byte - start_b
+    prev_b = start_b
+    for (i, (nbyte, k, f)) in enumerate(split_spec)
+        h = SyntaxHead(k, f)
+        actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte
+        orig_k = k == K"." ? K"." : kind(tok)
+        node = RawGreenNode(h, actual_nbyte, orig_k)
+        push!(stream.output, node)
+        prev_b += actual_nbyte
+        stream.next_byte += actual_nbyte
+    end
+    @assert tok.next_byte == prev_b
+    stream.peek_count = 0
+    return position(stream)
+end
+
+function peek_dotted_op_token(ps, allow_whitespace=false)
+    # Peek the next token, but if it is a dot, peek the next one as well
+    t = peek_token(ps)
+    isdotted = kind(t) == K"."
+    if isdotted
+        t2 = peek_token(ps, 2)
+        if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2))
+            isdotted = false
+        else
+            t = t2
+        end
+    end
+    return (isdotted, t)
+end
+
+function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
+    if isdot
+        if emit_dot_node
+            dotmark = position(ps)
+            bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
+        else
+            bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
+        end
+    end
+    pos = bump(ps, flags, remap_kind=remap_kind)
+    isdot && emit_dot_node && (pos = emit(ps, dotmark, K"."))
+    return pos
+end
diff --git a/JuliaSyntax/src/julia/kinds.jl b/JuliaSyntax/src/julia/kinds.jl
new file mode 100644
index 0000000000000..7bd9bdb9f0c01
--- /dev/null
+++ b/JuliaSyntax/src/julia/kinds.jl
@@ -0,0 +1,1232 @@
+# Definition of Kind type - mapping from token string identifiers to
+# enumeration values as used in @K_str
+
+"""
+    K"name"
+    Kind(namestr)
+
+`Kind` is a type tag for specifying the type of tokens and interior nodes of
+a syntax tree. Abstractly, this tag is used to define our own *sum types* for
+syntax tree nodes. We do this explicitly outside the Julia type system because
+(a) Julia doesn't have sum types and (b) we want concrete data structures which
+are unityped from the Julia compiler's point of view, for efficiency.
+
+Naming rules:
+* Kinds which correspond to exactly one textural form are represented with that
+  text. This includes keywords like K"for" and operators like K"*".
+* Kinds which represent many textural forms have UpperCamelCase names. This
+  includes kinds like K"Identifier" and K"Comment".
+* Kinds which exist merely as delimiters are all uppercase
+"""
+primitive type Kind 16 end
+
+# The implementation of Kind here is basically similar to @enum. However we use
+# the K_str macro to self-name these kinds with their literal representation,
+# rather than needing to invent a new name for each.
+
+const _kind_str_to_int = Dict{String,UInt16}()
+const _kind_int_to_str = Dict{UInt16,String}()
+const _kind_modules = Dict{Int,Union{Symbol,Module}}(
+    0=>nameof(@__MODULE__),
+    1=>:JuliaLowering,
+    2=>:JuliaSyntaxFormatter
+)
+# Number of bits reserved for kind id's belonging to a single module
+const _kind_nbits = 10
+const _kind_module_id_max = typemax(UInt16) >> _kind_nbits
+
+function Kind(x::Integer)
+    if x < 0 || x > typemax(UInt16)
+        throw(ArgumentError("Kind out of range: $x"))
+    end
+    return Base.bitcast(Kind, convert(UInt16, x))
+end
+
+function Kind(s::AbstractString)
+    i = get(_kind_str_to_int, s) do
+        error("unknown Kind name $(repr(s))")
+    end
+    Kind(i)
+end
+
+Base.string(x::Kind) = get(_kind_int_to_str, reinterpret(UInt16, x), "<error: unknown kind>")
+Base.print(io::IO, x::Kind) = print(io, string(x))
+
+Base.isless(x::Kind, y::Kind) = reinterpret(UInt16, x) < reinterpret(UInt16, y)
+
+function Base.show(io::IO, k::Kind)
+    print(io, "K\"", k, "\"")
+end
+
+# Save the string representation rather than the bit pattern so that kinds
+# can be serialized and deserialized across different JuliaSyntax versions.
+function Base.write(io::IO, k::Kind)
+    str = string(k)
+    write(io, UInt8(sizeof(str))) + write(io, str)
+end
+function Base.read(io::IO, ::Type{Kind})
+    len = read(io, UInt8)
+    str = String(read(io, len))
+    Kind(str)
+end
+
+function Base.parentmodule(k::Kind)
+    mod_id = reinterpret(UInt16, k) >> _kind_nbits
+    _kind_modules[mod_id]::Module
+end
+
+function _register_kinds!(kind_modules, int_to_kindstr, kind_str_to_int, mod, module_id, names)
+    if module_id > _kind_module_id_max
+        error("Kind module id $module_id is out of range")
+    elseif length(names) >= 1 << _kind_nbits
+        error("Too many kind names")
+    elseif !haskey(kind_modules, module_id)
+        kind_modules[module_id] = mod
+    else
+        m = kind_modules[module_id]
+        if m == nameof(mod)
+            # Ok: known kind module, but not loaded until now
+            kind_modules[module_id] = mod
+        elseif m == mod
+            existing_kinds = Union{Nothing, Kind}[(i = get(kind_str_to_int, n, nothing);
+                               isnothing(i) ? nothing : Kind(i)) for n in names]
+            if any(isnothing, existing_kinds) ||
+                    !issorted(existing_kinds) ||
+                    any(k->parentmodule(k) != mod, existing_kinds)
+                error("Error registering kinds for module $mod (register_kinds() called more than once inconsistently, or conflict with existing module kinds?)")
+            else
+                # Assume we're re-registering kinds as in top level vs `__init__`
+                return
+            end
+        else
+            error("Kind module ID $module_id already claimed by module $m")
+        end
+    end
+    _register_kinds_names!(int_to_kindstr, kind_str_to_int, module_id, names)
+end
+
+# This function is separated from `_register_kinds!` to prevent sharing of the variable `i`
+# here and in the closure in `_register_kinds!`, which causes boxing and bad inference.
+function _register_kinds_names!(int_to_kindstr, kind_str_to_int, module_id, names)
+    # Process names to conflate category BEGIN/END markers with the first/last
+    # in the category.
+    i = 0
+    for name in names
+        normal_kind = false
+        if startswith(name, "BEGIN_")
+            j = i
+        elseif startswith(name, "END_")
+            j = i - 1
+        else
+            normal_kind = true
+            j = i
+            i += 1
+        end
+        kind_int = (module_id << _kind_nbits) | j
+        push!(kind_str_to_int, name=>kind_int)
+        if normal_kind
+            push!(int_to_kindstr, kind_int=>name)
+        end
+    end
+end
+
+"""
+    register_kinds!(mod, module_id, names)
+
+Register custom `Kind`s with the given `names`, belonging to a module `mod`.
+`names` is an array of arbitrary strings.
+
+In order for kinds to be represented by a small number of bits, some nontrivial
+cooperation is required between modules using custom kinds:
+* The integer `module_id` is globally unique for each `mod` which will be used
+  together, and not larger than $_kind_module_id_max.
+* No two modules register the same `name`. The semantics of a given `kind` name
+  should be defined by the module which owns it.
+
+To allow ranges of kinds to be delimited and quickly tested for, some special
+names are allowed: `BEGIN_section` and `END_section` pairs are detected, and
+alias the next and previous kind id's respectively so that kinds in `section`
+can be tested with `BEGIN_section <= k <= END_section`.
+"""
+function register_kinds!(mod, module_id, names)
+    _register_kinds!(_kind_modules, _kind_int_to_str, _kind_str_to_int, mod, module_id, names)
+end
+
+#-------------------------------------------------------------------------------
+
+"""
+    K"s"
+
+The kind of a token or AST internal node with string "s".
+
+For example
+* K")" is the kind of the right parenthesis token
+* K"block" is the kind of a block of code (eg, statements within a begin-end).
+"""
+macro K_str(s)
+    Kind(s)
+end
+
+"""
+A set of kinds which can be used with the `in` operator.  For example
+
+    k in KSet"+ - *"
+"""
+macro KSet_str(str)
+    kinds = [Kind(s) for s in split(str)]
+
+    quote
+        ($(kinds...),)
+    end
+end
+
+"""
+    kind(x)
+
+Return the `Kind` of `x`.
+"""
+kind(k::Kind) = k
+
+
+#-------------------------------------------------------------------------------
+# Kinds used by JuliaSyntax
+register_kinds!(JuliaSyntax, 0, [
+    # Whitespace
+    "Comment"
+    "Whitespace"
+    "NewlineWs"    # newline-containing whitespace
+
+    # Identifiers
+    "BEGIN_IDENTIFIERS"
+        "Identifier"
+        "Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering
+        # String and command macro names are modeled as a special kind of
+        # identifier as they need to be mangled before lookup.
+        "StrMacroName"
+        "CmdMacroName"
+    "END_IDENTIFIERS"
+
+    "BEGIN_KEYWORDS"
+        "baremodule"
+        "begin"
+        "break"
+        "const"
+        "continue"
+        "do"
+        "export"
+        "for"
+        "function"
+        "global"
+        "if"
+        "import"
+        "let"
+        "local"
+        "macro"
+        "module"
+        "quote"
+        "return"
+        "struct"
+        "try"
+        "using"
+        "while"
+        "BEGIN_BLOCK_CONTINUATION_KEYWORDS"
+            "catch"
+            "finally"
+            "else"
+            "elseif"
+            "end"
+        "END_BLOCK_CONTINUATION_KEYWORDS"
+        "BEGIN_CONTEXTUAL_KEYWORDS"
+            # contextual keywords
+            "abstract"
+            "as"
+            "doc"
+            "mutable"
+            "outer"
+            "primitive"
+            "public"
+            "type"
+            "var"
+            "VERSION"
+        "END_CONTEXTUAL_KEYWORDS"
+    "END_KEYWORDS"
+
+    "BEGIN_LITERAL"
+        "BEGIN_NUMBERS"
+            "Bool"
+            "Integer"
+            "BinInt"
+            "HexInt"
+            "OctInt"
+            "Float"
+            "Float32"
+        "END_NUMBERS"
+        "String"
+        "Char"
+        "CmdString"
+    "END_LITERAL"
+
+    "BEGIN_DELIMITERS"
+        # Punctuation
+        "@"
+        ","
+        ";"
+
+        # Paired delimiters
+        "["
+        "]"
+        "{"
+        "}"
+        "("
+        ")"
+        "\""
+        "\"\"\""
+        "`"
+        "```"
+    "END_DELIMITERS"
+
+    "BEGIN_OPS"
+    "ErrorInvalidOperator"
+    "Error**"
+
+    "..."
+
+    # Level 1
+    "BEGIN_ASSIGNMENTS"
+        "BEGIN_SYNTACTIC_ASSIGNMENTS"
+        "="
+        ".="
+        "op="  # Updating assignment operator ( $= %= &= *= += -= //= /= <<= >>= >>>= \= ^= |= ÷= ⊻= )
+        ".op="
+        ":="
+        "END_SYNTACTIC_ASSIGNMENTS"
+        "~"
+        "≔"
+        "⩴"
+        "≕"
+    "END_ASSIGNMENTS"
+
+    "BEGIN_PAIRARROW"
+        "=>"
+    "END_PAIRARROW"
+
+    # Level 2
+    "BEGIN_CONDITIONAL"
+    "?"
+    "END_CONDITIONAL"
+
+    # Level 3
+    "BEGIN_ARROW"
+        "-->"
+        "<--"
+        "<-->"
+        "←"
+        "→"
+        "↔"
+        "↚"
+        "↛"
+        "↞"
+        "↠"
+        "↢"
+        "↣"
+        "↤"
+        "↦"
+        "↮"
+        "⇎"
+        "⇍"
+        "⇏"
+        "⇐"
+        "⇒"
+        "⇔"
+        "⇴"
+        "⇶"
+        "⇷"
+        "⇸"
+        "⇹"
+        "⇺"
+        "⇻"
+        "⇼"
+        "⇽"
+        "⇾"
+        "⇿"
+        "⟵"
+        "⟶"
+        "⟷"
+        "⟹"
+        "⟺"
+        "⟻"
+        "⟼"
+        "⟽"
+        "⟾"
+        "⟿"
+        "⤀"
+        "⤁"
+        "⤂"
+        "⤃"
+        "⤄"
+        "⤅"
+        "⤆"
+        "⤇"
+        "⤌"
+        "⤍"
+        "⤎"
+        "⤏"
+        "⤐"
+        "⤑"
+        "⤔"
+        "⤕"
+        "⤖"
+        "⤗"
+        "⤘"
+        "⤝"
+        "⤞"
+        "⤟"
+        "⤠"
+        "⥄"
+        "⥅"
+        "⥆"
+        "⥇"
+        "⥈"
+        "⥊"
+        "⥋"
+        "⥎"
+        "⥐"
+        "⥒"
+        "⥓"
+        "⥖"
+        "⥗"
+        "⥚"
+        "⥛"
+        "⥞"
+        "⥟"
+        "⥢"
+        "⥤"
+        "⥦"
+        "⥧"
+        "⥨"
+        "⥩"
+        "⥪"
+        "⥫"
+        "⥬"
+        "⥭"
+        "⥰"
+        "⧴"
+        "⬱"
+        "⬰"
+        "⬲"
+        "⬳"
+        "⬴"
+        "⬵"
+        "⬶"
+        "⬷"
+        "⬸"
+        "⬹"
+        "⬺"
+        "⬻"
+        "⬼"
+        "⬽"
+        "⬾"
+        "⬿"
+        "⭀"
+        "⭁"
+        "⭂"
+        "⭃"
+        "⥷"
+        "⭄"
+        "⥺"
+        "⭇"
+        "⭈"
+        "⭉"
+        "⭊"
+        "⭋"
+        "⭌"
+        "￩"
+        "￫"
+        "⇜"
+        "⇝"
+        "↜"
+        "↝"
+        "↩"
+        "↪"
+        "↫"
+        "↬"
+        "↼"
+        "↽"
+        "⇀"
+        "⇁"
+        "⇄"
+        "⇆"
+        "⇇"
+        "⇉"
+        "⇋"
+        "⇌"
+        "⇚"
+        "⇛"
+        "⇠"
+        "⇢"
+        "↷"
+        "↶"
+        "↺"
+        "↻"
+        "🢲"
+    "END_ARROW"
+
+    # Level 4
+    "BEGIN_LAZYOR"
+        "||"
+        ".||"
+    "END_LAZYOR"
+
+    # Level 5
+    "BEGIN_LAZYAND"
+        "&&"
+        ".&&"
+    "END_LAZYAND"
+
+    # Level 6
+    "BEGIN_COMPARISON"
+        "<:"
+        ">:"
+        ">"
+        "<"
+        ">="
+        "≥"
+        "<="
+        "≤"
+        "=="
+        "==="
+        "≡"
+        "!="
+        "≠"
+        "!=="
+        "≢"
+        "∈"
+        "in"
+        "isa"
+        "∉"
+        "∋"
+        "∌"
+        "⊆"
+        "⊈"
+        "⊂"
+        "⊄"
+        "⊊"
+        "∝"
+        "∊"
+        "∍"
+        "∥"
+        "∦"
+        "∷"
+        "∺"
+        "∻"
+        "∽"
+        "∾"
+        "≁"
+        "≃"
+        "≂"
+        "≄"
+        "≅"
+        "≆"
+        "≇"
+        "≈"
+        "≉"
+        "≊"
+        "≋"
+        "≌"
+        "≍"
+        "≎"
+        "≐"
+        "≑"
+        "≒"
+        "≓"
+        "≖"
+        "≗"
+        "≘"
+        "≙"
+        "≚"
+        "≛"
+        "≜"
+        "≝"
+        "≞"
+        "≟"
+        "≣"
+        "≦"
+        "≧"
+        "≨"
+        "≩"
+        "≪"
+        "≫"
+        "≬"
+        "≭"
+        "≮"
+        "≯"
+        "≰"
+        "≱"
+        "≲"
+        "≳"
+        "≴"
+        "≵"
+        "≶"
+        "≷"
+        "≸"
+        "≹"
+        "≺"
+        "≻"
+        "≼"
+        "≽"
+        "≾"
+        "≿"
+        "⊀"
+        "⊁"
+        "⊃"
+        "⊅"
+        "⊇"
+        "⊉"
+        "⊋"
+        "⊏"
+        "⊐"
+        "⊑"
+        "⊒"
+        "⊜"
+        "⊩"
+        "⊬"
+        "⊮"
+        "⊰"
+        "⊱"
+        "⊲"
+        "⊳"
+        "⊴"
+        "⊵"
+        "⊶"
+        "⊷"
+        "⋍"
+        "⋐"
+        "⋑"
+        "⋕"
+        "⋖"
+        "⋗"
+        "⋘"
+        "⋙"
+        "⋚"
+        "⋛"
+        "⋜"
+        "⋝"
+        "⋞"
+        "⋟"
+        "⋠"
+        "⋡"
+        "⋢"
+        "⋣"
+        "⋤"
+        "⋥"
+        "⋦"
+        "⋧"
+        "⋨"
+        "⋩"
+        "⋪"
+        "⋫"
+        "⋬"
+        "⋭"
+        "⋲"
+        "⋳"
+        "⋴"
+        "⋵"
+        "⋶"
+        "⋷"
+        "⋸"
+        "⋹"
+        "⋺"
+        "⋻"
+        "⋼"
+        "⋽"
+        "⋾"
+        "⋿"
+        "⟈"
+        "⟉"
+        "⟒"
+        "⦷"
+        "⧀"
+        "⧁"
+        "⧡"
+        "⧣"
+        "⧤"
+        "⧥"
+        "⩦"
+        "⩧"
+        "⩪"
+        "⩫"
+        "⩬"
+        "⩭"
+        "⩮"
+        "⩯"
+        "⩰"
+        "⩱"
+        "⩲"
+        "⩳"
+        "⩵"
+        "⩶"
+        "⩷"
+        "⩸"
+        "⩹"
+        "⩺"
+        "⩻"
+        "⩼"
+        "⩽"
+        "⩾"
+        "⩿"
+        "⪀"
+        "⪁"
+        "⪂"
+        "⪃"
+        "⪄"
+        "⪅"
+        "⪆"
+        "⪇"
+        "⪈"
+        "⪉"
+        "⪊"
+        "⪋"
+        "⪌"
+        "⪍"
+        "⪎"
+        "⪏"
+        "⪐"
+        "⪑"
+        "⪒"
+        "⪓"
+        "⪔"
+        "⪕"
+        "⪖"
+        "⪗"
+        "⪘"
+        "⪙"
+        "⪚"
+        "⪛"
+        "⪜"
+        "⪝"
+        "⪞"
+        "⪟"
+        "⪠"
+        "⪡"
+        "⪢"
+        "⪣"
+        "⪤"
+        "⪥"
+        "⪦"
+        "⪧"
+        "⪨"
+        "⪩"
+        "⪪"
+        "⪫"
+        "⪬"
+        "⪭"
+        "⪮"
+        "⪯"
+        "⪰"
+        "⪱"
+        "⪲"
+        "⪳"
+        "⪴"
+        "⪵"
+        "⪶"
+        "⪷"
+        "⪸"
+        "⪹"
+        "⪺"
+        "⪻"
+        "⪼"
+        "⪽"
+        "⪾"
+        "⪿"
+        "⫀"
+        "⫁"
+        "⫂"
+        "⫃"
+        "⫄"
+        "⫅"
+        "⫆"
+        "⫇"
+        "⫈"
+        "⫉"
+        "⫊"
+        "⫋"
+        "⫌"
+        "⫍"
+        "⫎"
+        "⫏"
+        "⫐"
+        "⫑"
+        "⫒"
+        "⫓"
+        "⫔"
+        "⫕"
+        "⫖"
+        "⫗"
+        "⫘"
+        "⫙"
+        "⫷"
+        "⫸"
+        "⫹"
+        "⫺"
+        "⊢"
+        "⊣"
+        "⟂"
+        # ⫪,⫫ see https://github.com/JuliaLang/julia/issues/39350
+        "⫪"
+        "⫫"
+    "END_COMPARISON"
+
+    # Level 7
+    "BEGIN_PIPE"
+        "<|"
+        "|>"
+    "END_PIPE"
+
+    # Level 8
+    "BEGIN_COLON"
+        ":"
+        ".."
+        "…"
+        "⁝"
+        "⋮"
+        "⋱"
+        "⋰"
+        "⋯"
+    "END_COLON"
+
+    # Level 9
+    "BEGIN_PLUS"
+        "\$"
+        "+"
+        "-" # also used for "−"
+        "++"
+        "⊕"
+        "⊖"
+        "⊞"
+        "⊟"
+        "|"
+        "∪"
+        "∨"
+        "⊔"
+        "±"
+        "∓"
+        "∔"
+        "∸"
+        "≏"
+        "⊎"
+        "⊻"
+        "⊽"
+        "⋎"
+        "⋓"
+        "⟇"
+        "⧺"
+        "⧻"
+        "⨈"
+        "⨢"
+        "⨣"
+        "⨤"
+        "⨥"
+        "⨦"
+        "⨧"
+        "⨨"
+        "⨩"
+        "⨪"
+        "⨫"
+        "⨬"
+        "⨭"
+        "⨮"
+        "⨹"
+        "⨺"
+        "⩁"
+        "⩂"
+        "⩅"
+        "⩊"
+        "⩌"
+        "⩏"
+        "⩐"
+        "⩒"
+        "⩔"
+        "⩖"
+        "⩗"
+        "⩛"
+        "⩝"
+        "⩡"
+        "⩢"
+        "⩣"
+        "¦"
+    "END_PLUS"
+
+    # Level 10
+    "BEGIN_TIMES"
+        "*"
+        "/"
+        "÷"
+        "%"
+        "⋅" # also used for lookalikes "·" and "·"
+        "∘"
+        "×"
+        "\\"
+        "&"
+        "∩"
+        "∧"
+        "⊗"
+        "⊘"
+        "⊙"
+        "⊚"
+        "⊛"
+        "⊠"
+        "⊡"
+        "⊓"
+        "∗"
+        "∙"
+        "∤"
+        "⅋"
+        "≀"
+        "⊼"
+        "⋄"
+        "⋆"
+        "⋇"
+        "⋉"
+        "⋊"
+        "⋋"
+        "⋌"
+        "⋏"
+        "⋒"
+        "⟑"
+        "⦸"
+        "⦼"
+        "⦾"
+        "⦿"
+        "⧶"
+        "⧷"
+        "⨇"
+        "⨰"
+        "⨱"
+        "⨲"
+        "⨳"
+        "⨴"
+        "⨵"
+        "⨶"
+        "⨷"
+        "⨸"
+        "⨻"
+        "⨼"
+        "⨽"
+        "⩀"
+        "⩃"
+        "⩄"
+        "⩋"
+        "⩍"
+        "⩎"
+        "⩑"
+        "⩓"
+        "⩕"
+        "⩘"
+        "⩚"
+        "⩜"
+        "⩞"
+        "⩟"
+        "⩠"
+        "⫛"
+        "⊍"
+        "▷"
+        "⨝"
+        "⟕"
+        "⟖"
+        "⟗"
+        "⌿"
+        "⨟"
+    "END_TIMES"
+
+    # Level 11
+    "BEGIN_RATIONAL"
+        "//"
+    "END_RATIONAL"
+
+    # Level 12
+    "BEGIN_BITSHIFTS"
+        "<<"
+        ">>"
+        ">>>"
+    "END_BITSHIFTS"
+
+    # Level 13
+    "BEGIN_POWER"
+        "^"
+        "↑"
+        "↓"
+        "⇵"
+        "⟰"
+        "⟱"
+        "⤈"
+        "⤉"
+        "⤊"
+        "⤋"
+        "⤒"
+        "⤓"
+        "⥉"
+        "⥌"
+        "⥍"
+        "⥏"
+        "⥑"
+        "⥔"
+        "⥕"
+        "⥘"
+        "⥙"
+        "⥜"
+        "⥝"
+        "⥠"
+        "⥡"
+        "⥣"
+        "⥥"
+        "⥮"
+        "⥯"
+        "￪"
+        "￬"
+    "END_POWER"
+
+    # Level 14
+    "BEGIN_DECL"
+        "::"
+    "END_DECL"
+
+    # Level 15
+    "BEGIN_WHERE"
+        "where"
+    "END_WHERE"
+
+    # Level 16
+    "BEGIN_DOT"
+        "."
+    "END_DOT"
+
+    "!"
+    "'"
+    ".'"
+    "->"
+
+    "BEGIN_UNICODE_OPS"
+        "¬"
+        "√"
+        "∛"
+        "∜"
+    "END_UNICODE_OPS"
+    "END_OPS"
+
+    # 2. Nonterminals which are exposed in the AST, but where the surface
+    #    syntax doesn't have a token corresponding to the node type.
+    "BEGIN_SYNTAX_KINDS"
+        "block"
+        "call"
+        "dotcall"
+        "comparison"
+        "curly"
+        "juxtapose"      # Numeric juxtaposition like 2x
+        "string"         # A string interior node (possibly containing interpolations)
+        "cmdstring"      # A cmd string node (containing delimiters plus string)
+        "char"           # A char string node (containing delims + char data)
+        "macrocall"
+        "parameters"     # the list after ; in f(; a=1)
+        "toplevel"
+        "tuple"
+        "ref"
+        "vect"
+        "parens"
+        "importpath"
+        "meta"
+        # Concatenation syntax
+        "braces"
+        "bracescat"
+        "hcat"
+        "vcat"
+        "ncat"
+        "typed_hcat"
+        "typed_vcat"
+        "typed_ncat"
+        "row"
+        "nrow"
+        # Comprehensions
+        "generator"
+        "filter"
+        "iteration"
+        "comprehension"
+        "typed_comprehension"
+        "macro_name"
+        # Container for a single statement/atom plus any trivia and errors
+        "wrapper"
+    "END_SYNTAX_KINDS"
+
+    # Special tokens
+    "TOMBSTONE"    # Empty placeholder for kind to be filled later
+    "None"         # Never emitted by lexer/parser
+    "EndMarker"    # EOF
+
+    "BEGIN_ERRORS"
+        # Tokenization errors
+        "ErrorEofMultiComment"
+        "ErrorInvalidNumericConstant"
+        "ErrorHexFloatMustContainP"
+        "ErrorAmbiguousNumericConstant"
+        "ErrorAmbiguousNumericDotMultiply"
+        "ErrorInvalidInterpolationTerminator"
+        "ErrorNumericOverflow"
+        "ErrorInvalidEscapeSequence"
+        "ErrorOverLongCharacter"
+        "ErrorInvalidUTF8"
+        "ErrorInvisibleChar"
+        "ErrorIdentifierStart"
+        "ErrorUnknownCharacter"
+        "ErrorBidiFormatting"
+        # Generic error
+        "error"
+    "END_ERRORS"
+])
+
+#-------------------------------------------------------------------------------
+const _nonunique_kind_names = Set([
+    K"Comment"
+    K"Whitespace"
+    K"NewlineWs"
+    K"Identifier"
+    K"Placeholder"
+
+    K"ErrorEofMultiComment"
+    K"ErrorInvalidNumericConstant"
+    K"ErrorHexFloatMustContainP"
+    K"ErrorAmbiguousNumericConstant"
+    K"ErrorAmbiguousNumericDotMultiply"
+    K"ErrorInvalidInterpolationTerminator"
+    K"ErrorNumericOverflow"
+    K"ErrorInvalidEscapeSequence"
+    K"ErrorOverLongCharacter"
+    K"ErrorInvalidUTF8"
+    K"ErrorInvisibleChar"
+    K"ErrorUnknownCharacter"
+    K"ErrorBidiFormatting"
+    K"ErrorInvalidOperator"
+
+    K"Bool"
+    K"Integer"
+    K"BinInt"
+    K"HexInt"
+    K"OctInt"
+    K"Float"
+    K"Float32"
+    K"String"
+    K"Char"
+    K"CmdString"
+
+    K"StrMacroName"
+    K"CmdMacroName"
+])
+
+"""
+Return the string representation of a token kind, or `nothing` if the kind
+represents a class of tokens like K"Identifier".
+
+When `unique=true` only return a string when the kind uniquely defines the
+corresponding input token, otherwise return `nothing`.  When `unique=false`,
+return the name of the kind.
+
+TODO: Replace `untokenize()` with `Base.string()`?
+"""
+function untokenize(k::Kind; unique=true)
+    if unique && k in _nonunique_kind_names
+        return nothing
+    else
+        return string(k)
+    end
+end
+
+# Error kind => description
+const _token_error_descriptions = Dict{Kind, String}(
+    K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#",
+    K"ErrorInvalidNumericConstant" => "invalid numeric constant",
+    K"ErrorHexFloatMustContainP" => "hex float literal must contain `p` or `P`",
+    K"ErrorAmbiguousNumericConstant" => "ambiguous `.` syntax; add whitespace to clarify (eg `1.+2` might be `1.0+2` or `1 .+ 2`)",
+    K"ErrorAmbiguousNumericDotMultiply" => "numeric constant cannot be implicitly multiplied because it ends with `.`",
+    K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead",
+    K"ErrorNumericOverflow"=>"overflow in numeric literal",
+    K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence",
+    K"ErrorOverLongCharacter"=>"character literal contains multiple characters",
+    K"ErrorInvalidUTF8"=>"invalid UTF-8 sequence",
+    K"ErrorInvisibleChar"=>"invisible character",
+    K"ErrorIdentifierStart" => "identifier cannot begin with character",
+    K"ErrorUnknownCharacter"=>"unknown unicode character",
+    K"ErrorBidiFormatting"=>"unbalanced bidirectional unicode formatting",
+    K"ErrorInvalidOperator" => "invalid operator",
+    K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting",
+    K"error" => "unknown error token",
+)
+
+#-------------------------------------------------------------------------------
+# Predicates
+is_identifier(k::Kind) = K"BEGIN_IDENTIFIERS" <= k <= K"END_IDENTIFIERS"
+is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" <= k <= K"END_CONTEXTUAL_KEYWORDS"
+is_error(k::Kind) = K"BEGIN_ERRORS" <= k <= K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**"
+is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS"
+is_block_continuation_keyword(k::Kind) = K"BEGIN_BLOCK_CONTINUATION_KEYWORDS" <= k <= K"END_BLOCK_CONTINUATION_KEYWORDS"
+is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL"
+is_number(k::Kind)  = K"BEGIN_NUMBERS" <= k <= K"END_NUMBERS"
+is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS"
+is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where")
+
+is_identifier(x) = is_identifier(kind(x))
+is_contextual_keyword(x) = is_contextual_keyword(kind(x))
+is_error(x) = is_error(kind(x))
+is_keyword(x) = is_keyword(kind(x))
+is_literal(x) = is_literal(kind(x))
+is_number(x)  = is_number(kind(x))
+is_operator(x) = is_operator(kind(x))
+is_word_operator(x) = is_word_operator(kind(x))
+
+# Predicates for operator precedence
+# FIXME: Review how precedence depends on dottedness, eg
+# https://github.com/JuliaLang/julia/pull/36725
+is_prec_assignment(x)  = K"BEGIN_ASSIGNMENTS" <= kind(x) <= K"END_ASSIGNMENTS"
+is_prec_pair(x)        = K"BEGIN_PAIRARROW"   <= kind(x) <= K"END_PAIRARROW"
+is_prec_conditional(x) = K"BEGIN_CONDITIONAL" <= kind(x) <= K"END_CONDITIONAL"
+is_prec_arrow(x)       = K"BEGIN_ARROW"       <= kind(x) <= K"END_ARROW"
+is_prec_lazy_or(x)     = K"BEGIN_LAZYOR"      <= kind(x) <= K"END_LAZYOR"
+is_prec_lazy_and(x)    = K"BEGIN_LAZYAND"     <= kind(x) <= K"END_LAZYAND"
+is_prec_comparison(x)  = K"BEGIN_COMPARISON"  <= kind(x) <= K"END_COMPARISON"
+is_prec_pipe(x)        = K"BEGIN_PIPE"        <= kind(x) <= K"END_PIPE"
+is_prec_colon(x)       = K"BEGIN_COLON"       <= kind(x) <= K"END_COLON"
+is_prec_plus(x)        = K"BEGIN_PLUS"        <= kind(x) <= K"END_PLUS"
+is_prec_bitshift(x)    = K"BEGIN_BITSHIFTS"   <= kind(x) <= K"END_BITSHIFTS"
+is_prec_times(x)       = K"BEGIN_TIMES"       <= kind(x) <= K"END_TIMES"
+is_prec_rational(x)    = K"BEGIN_RATIONAL"    <= kind(x) <= K"END_RATIONAL"
+is_prec_power(x)       = K"BEGIN_POWER"       <= kind(x) <= K"END_POWER"
+is_prec_decl(x)        = K"BEGIN_DECL"        <= kind(x) <= K"END_DECL"
+is_prec_where(x)       = K"BEGIN_WHERE"       <= kind(x) <= K"END_WHERE"
+is_prec_dot(x)         = K"BEGIN_DOT"         <= kind(x) <= K"END_DOT"
+is_prec_unicode_ops(x) = K"BEGIN_UNICODE_OPS" <= kind(x) <= K"END_UNICODE_OPS"
+is_prec_pipe_lt(x)     = kind(x) == K"<|"
+is_prec_pipe_gt(x)     = kind(x) == K"|>"
+is_syntax_kind(x)      = K"BEGIN_SYNTAX_KINDS"<= kind(x) <= K"END_SYNTAX_KINDS"
+is_syntactic_assignment(x) = K"BEGIN_SYNTACTIC_ASSIGNMENTS" <= kind(x) <= K"END_SYNTACTIC_ASSIGNMENTS"
+
+function is_string_delim(x)
+    kind(x) in (K"\"", K"\"\"\"")
+end
+
+function is_radical_op(x)
+    kind(x) in (K"√", K"∛", K"∜")
+end
+
+"""
+Return true if `x` has whitespace or comment kind
+"""
+function is_whitespace(x)
+    k = kind(x)
+    return k == K"Whitespace" || k == K"NewlineWs" || k == K"Comment"
+end
+
+function is_syntactic_operator(x)
+    k = kind(x)
+    # TODO: Do we need to disallow dotted and suffixed forms when this is used
+    # in the parser? The lexer itself usually disallows such tokens, so it's
+    # not clear whether we need to handle them. (Though note `.->` is a
+    # token...)
+    return k in KSet"&& || . ... ->" || is_syntactic_assignment(k)
+end
diff --git a/JuliaSyntax/src/julia/literal_parsing.jl b/JuliaSyntax/src/julia/literal_parsing.jl
new file mode 100644
index 0000000000000..f0713b513cbcb
--- /dev/null
+++ b/JuliaSyntax/src/julia/literal_parsing.jl
@@ -0,0 +1,476 @@
+"""
+Nontrivia tokens (leaf nodes / literals) which are malformed are parsed into
+ErrorVal when `ignore_errors=true` during parsing.
+"""
+struct ErrorVal
+end
+
+Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red)
+
+#-------------------------------------------------------------------------------
+# This file contains utility functions for converting undecorated source
+# strings into Julia values.  For example, string->number, string unescaping, etc.
+
+function parse_int_literal(str::AbstractString)
+    # TODO: A specialized code path here can be a lot faster and also
+    # allocation free
+    str = replace(replace(str, '_'=>""), '−'=>'-')
+    x = Base.tryparse(Int, str)
+    if Int === Int32 && isnothing(x)
+        x = Base.tryparse(Int64, str)
+    end
+    if isnothing(x)
+        x = Base.tryparse(Int128, str)
+        if x === nothing
+            x = Base.parse(BigInt, str)
+        end
+    end
+    return x
+end
+
+function parse_uint_literal(str::AbstractString, k)
+    str = replace(str, '_'=>"")
+    if startswith(str, '+')
+        str = str[2:end]
+    end
+    ndigits = length(str)-2
+    if k == K"HexInt"
+        return ndigits <= 2  ? Base.parse(UInt8, str)   :
+               ndigits <= 4  ? Base.parse(UInt16, str)  :
+               ndigits <= 8  ? Base.parse(UInt32, str)  :
+               ndigits <= 16 ? Base.parse(UInt64, str)  :
+               ndigits <= 32 ? Base.parse(UInt128, str) :
+               Base.parse(BigInt, str)
+    elseif k == K"BinInt"
+        ndigits = length(str)-2
+        return ndigits <= 8   ? Base.parse(UInt8, str)   :
+               ndigits <= 16  ? Base.parse(UInt16, str)  :
+               ndigits <= 32  ? Base.parse(UInt32, str)  :
+               ndigits <= 64  ? Base.parse(UInt64, str)  :
+               ndigits <= 128 ? Base.parse(UInt128, str) :
+               Base.parse(BigInt, str)
+    elseif k == K"OctInt"
+        x = Base.tryparse(UInt64, str)
+        if isnothing(x)
+            x = Base.tryparse(UInt128, str)
+            if isnothing(x)
+                x = Base.parse(BigInt, str)
+            elseif ndigits > 43
+                x = BigInt(x)
+            end
+        else
+            x = ndigits <= 3  && x <= typemax(UInt8)  ? UInt8(x)   :
+                ndigits <= 6  && x <= typemax(UInt16) ? UInt16(x)  :
+                ndigits <= 11 && x <= typemax(UInt32) ? UInt32(x)  :
+                ndigits <= 22                         ? x          :
+                ndigits <= 43                         ? UInt128(x) :
+                BigInt(x)
+        end
+        return x
+    end
+end
+
+#-------------------------------------------------------------------------------
+"""
+Like `Base.parse(Union{Float64,Float32}, str)`, but permits float underflow
+
+Parse a Float64. str[firstind:lastind] must be a valid floating point literal
+string. If the value is outside Float64 range.
+"""
+function parse_float_literal(::Type{T}, str::Union{String,SubString,Vector{UInt8}},
+        firstind::Integer, endind::Integer) where {T} # force specialize with where {T}
+    strsize = endind - firstind
+    bufsz = 50
+    if strsize < bufsz
+        buf = Ref{NTuple{bufsz, UInt8}}()
+        ptr = Base.unsafe_convert(Ptr{UInt8}, pointer_from_objref(buf))
+        GC.@preserve str buf begin
+            n = _copy_normalize_number!(ptr, pointer(str, firstind), strsize)
+            _unsafe_parse_float(T, ptr, n)
+        end
+    else
+        # Slower path with allocation.
+        buf = Vector{UInt8}(undef, strsize+1)
+        ptr = pointer(buf)
+        GC.@preserve str buf begin
+            n = _copy_normalize_number!(ptr, pointer(str, firstind), strsize)
+            _unsafe_parse_float(T, ptr, n)
+        end
+    end
+end
+
+# Like replace(replace(str, '_'=>""), '−'=>'-')
+# dest must be of size at least srcsize+1
+function _copy_normalize_number!(dest, src, srcsize)
+    i = 0
+    j = 0
+    while i < srcsize
+        b = unsafe_load(src + i)
+        if b == UInt8('_')
+            i += 1
+            continue
+        elseif b == 0xe2 && i+2 < srcsize &&
+                unsafe_load(src + i + 1) == 0x88 &&
+                unsafe_load(src + i + 2) == 0x92
+            # src at i,i+1,i+2 is UTF-8 code for unicode minus sign '−'
+            b = UInt8('-')
+            i += 2
+        end
+        unsafe_store!(dest+j, b)
+        i += 1
+        j += 1
+    end
+    unsafe_store!(dest+j, 0x00)
+    return j
+end
+
+# Internals of parse_float_literal, split into a separate function to avoid some
+# apparent codegen issues https://github.com/JuliaLang/julia/issues/46509
+# (perhaps we don't want the `buf` in `GC.@preserve buf` to be stack allocated
+# on one branch and heap allocated in another?)
+@inline function _unsafe_parse_float(::Type{Float64}, ptr, strsize)
+    Libc.errno(0)
+    endptr = Ref{Ptr{UInt8}}(C_NULL)
+    x = ccall(:jl_strtod_c, Cdouble, (Ptr{UInt8}, Ptr{Ptr{UInt8}}), ptr, endptr)
+    @check endptr[] == ptr + strsize
+    status = :ok
+    if Libc.errno() == Libc.ERANGE
+        # strtod man page:
+        # * If  the  correct  value  would cause overflow, plus or
+        #   minus HUGE_VAL, HUGE_VALF, or HUGE_VALL is returned and
+        #   ERANGE is stored in errno.
+        # * If the correct value would cause underflow, a value with
+        #   magnitude no larger than DBL_MIN, FLT_MIN, or LDBL_MIN is
+        #   returned and ERANGE is stored in errno.
+        status = abs(x) < 1 ? :underflow : :overflow
+    end
+    return (x, status)
+end
+
+@inline function _unsafe_parse_float(::Type{Float32}, ptr, strsize)
+    # Convert float exponent 'f' to 'e' for strtof, eg, 1.0f0 => 1.0e0
+    # Presumes we can modify the data in ptr!
+    for p in ptr+strsize-1:-1:ptr
+        if unsafe_load(p) == UInt8('f')
+            unsafe_store!(p, UInt8('e'))
+            break
+        end
+    end
+    Libc.errno(0)
+    endptr = Ref{Ptr{UInt8}}(C_NULL)
+    status = :ok
+    @static if Sys.iswindows()
+        # Call strtod here and convert to Float32 on the Julia side because
+        # strtof seems buggy on windows and doesn't set ERANGE correctly on
+        # overflow. See also
+        # https://github.com/JuliaLang/julia/issues/46544
+        x = Float32(ccall(:jl_strtod_c, Cdouble, (Ptr{UInt8}, Ptr{Ptr{UInt8}}), ptr, endptr))
+        if isinf(x)
+            status = :overflow
+            # Underflow not detected, but that will only be a warning elsewhere.
+        end
+    else
+        x = ccall(:jl_strtof_c, Cfloat, (Ptr{UInt8}, Ptr{Ptr{UInt8}}), ptr, endptr)
+    end
+    @check endptr[] == ptr + strsize
+    if Libc.errno() == Libc.ERANGE
+        status = abs(x) < 1 ? :underflow : :overflow
+    end
+    return (x, status)
+end
+
+
+#-------------------------------------------------------------------------------
+"""
+Process Julia source code escape sequences for raw strings
+"""
+function unescape_raw_string(io::IO, txtbuf::Vector{UInt8},
+                             firstind, endind, is_cmd::Bool)
+    delim = is_cmd ? u8"`" : u8"\""
+    i = firstind
+    while i < endind
+        c = txtbuf[i]
+        if c != u8"\\"
+            if c == u8"\r"
+                # convert literal \r and \r\n in strings to \n (issue #11988)
+                if i+1 < endind && txtbuf[i+1] == u8"\n"
+                    i += 1
+                end
+                c = u8"\n"
+            end
+            write(io, c)
+            i += 1
+            continue
+        end
+        # Process \ escape sequences
+        j = i
+        while j < endind && txtbuf[j] == u8"\\"
+            j += 1
+        end
+        nbackslash = j - i
+        if (j < endind && txtbuf[j] == delim) || j >= endind
+            # Backslashes before a delimiter must also be escaped
+            nbackslash = div(nbackslash,2)
+        end
+        for k = 1:nbackslash
+            write(io, u8"\\")
+        end
+        i = j
+        if i < endind
+            write(io, txtbuf[i])
+            i += 1
+        end
+    end
+end
+
+"""
+Process Julia source code escape sequences for non-raw strings.
+`txtbuf` should be passed without delimiting quotes.
+"""
+function unescape_julia_string(io::IO, txtbuf::Vector{UInt8},
+                               firstind, endind, diagnostics)
+    had_error = false
+    i = firstind
+    while i < endind
+        c = txtbuf[i]
+        if c != u8"\\"
+            if c == u8"\r"
+                # convert literal \r and \r\n in strings to \n (issue #11988)
+                if i+1 < endind && txtbuf[i+1] == u8"\n"
+                    i += 1
+                end
+                c = u8"\n"
+            end
+            write(io, c)
+            i = nextind(txtbuf, i)
+            continue
+        end
+        # Process \ escape sequences.  See also Base.unescape_string which some
+        # of this code derives from (but which disallows \` \' \$)
+        escstart = i
+        i += 1
+        if i >= endind
+            emit_diagnostic(diagnostics, escstart:endind-1,
+                            error="invalid escape sequence")
+            had_error = true
+            break
+        end
+        c = txtbuf[i]
+        if c == u8"x" || c == u8"u" || c == u8"U"
+            n = k = 0
+            m = c == u8"x" ? 2 :
+                c == u8"u" ? 4 : 8
+            while (k += 1) <= m && i+1 < endind
+                nc = txtbuf[i+1]
+                n = u8"0" <= nc <= u8"9" ? n<<4 + (nc-u8"0") :
+                    u8"a" <= nc <= u8"f" ? n<<4 + (nc-u8"a"+10) :
+                    u8"A" <= nc <= u8"F" ? n<<4 + (nc-u8"A"+10) : break
+                i += 1
+            end
+            if k == 1 || n > 0x10ffff
+                u = m == 4 ? u8"u" : u8"U"
+                msg = (m == 2) ? "invalid hex escape sequence" :
+                                 "invalid unicode escape sequence"
+                emit_diagnostic(diagnostics, escstart:i, error=msg)
+                had_error = true
+            else
+                if m == 2 # \x escape sequence
+                    write(io, UInt8(n))
+                else
+                    print(io, Char(n))
+                end
+            end
+        elseif u8"0" <= c <= u8"7"
+            k = 1
+            n = Int(c - u8"0")
+            while (k += 1) <= 3 && i+1 < endind
+                c = txtbuf[i+1]
+                n = (u8"0" <= c <= u8"7") ? n<<3 + c-u8"0" : break
+                i += 1
+            end
+            if n > 255
+                emit_diagnostic(diagnostics, escstart:i,
+                                error="invalid octal escape sequence")
+                had_error = true
+            else
+                write(io, UInt8(n))
+            end
+        else
+            u = # C escapes
+                c == u8"n" ? u8"\n" :
+                c == u8"t" ? u8"\t" :
+                c == u8"r" ? u8"\r" :
+                c == u8"e" ? u8"\e" :
+                c == u8"b" ? u8"\b" :
+                c == u8"f" ? u8"\f" :
+                c == u8"v" ? u8"\v" :
+                c == u8"a" ? u8"\a" :
+                # Literal escapes allowed in Julia source
+                c == u8"\\" ? u8"\\" :
+                c == u8"'" ? u8"'" :
+                c == u8"\"" ? u8"\"" :
+                c == u8"$" ? u8"$" :
+                c == u8"`" ? u8"`" :
+                nothing
+            if isnothing(u)
+                emit_diagnostic(diagnostics, escstart:i,
+                                error="invalid escape sequence")
+                had_error = true
+            else
+                write(io, u)
+            end
+        end
+        # For non-ascii characters we may not be in the middle of the UTF-8
+        # encoding for that char, but this doesn't matter because unescaping
+        # only relies on the ascii subset.
+        i += 1
+    end
+    return had_error
+end
+
+#-------------------------------------------------------------------------------
+# Unicode normalization. As of Julia 1.8, this is part of Base and the Unicode
+# stdlib under the name `Unicode.julia_chartransform`. See
+# https://github.com/JuliaLang/julia/pull/42561
+#
+# To allow use on older Julia versions and to workaround the bug
+# https://github.com/JuliaLang/julia/issues/45716
+# we reproduce a specialized version of that logic here.
+
+# static wrapper around user callback function
+function utf8proc_custom_func(codepoint::UInt32, ::Ptr{Cvoid})::UInt32
+    (codepoint == 0x025B ? 0x03B5 :  # 'ɛ' => 'ε'
+    codepoint == 0x00B5 ? 0x03BC :   # 'µ' => 'μ'
+    codepoint == 0x00B7 ? 0x22C5 :   # '·' => '⋅'
+    codepoint == 0x0387 ? 0x22C5 :   # '·' => '⋅'
+    codepoint == 0x2212 ? 0x002D :   # '−' (\minus) => '-'
+    codepoint == 0x210F ? 0x0127 :   # 'ℏ' (\hslash) => 'ħ' \hbar
+    codepoint)
+end
+
+function utf8proc_decompose(str, options, buffer, nwords)
+    ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ptr{Cvoid}),
+                str, sizeof(str), buffer, nwords, options,
+                @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ptr{Cvoid})), C_NULL)
+    ret < 0 && Base.Unicode.utf8proc_error(ret)
+    return ret
+end
+
+function utf8proc_map(str::Union{String,SubString{String}}, options::Integer)
+    nwords = utf8proc_decompose(str, options, C_NULL, 0)
+    buffer = Base.StringVector(nwords*4)
+    nwords = utf8proc_decompose(str, options, buffer, nwords)
+    nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options)
+    nbytes < 0 && Base.Unicode.utf8proc_error(nbytes)
+    return String(resize!(buffer, nbytes))
+end
+
+function normalize_identifier(str)
+    flags = Base.Unicode.UTF8PROC_STABLE | Base.Unicode.UTF8PROC_COMPOSE
+    return isascii(str) ? str : utf8proc_map(str, flags)
+end
+
+
+#-------------------------------------------------------------------------------
+function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
+    k = kind(head)
+    # Any errors parsing literals are represented as ErrorVal() - this can
+    # happen when the user sets `ignore_errors=true` during parsing.
+    if k == K"Float"
+        v, code = parse_float_literal(Float64, txtbuf, first(srcrange),
+                                      last(srcrange)+1)
+        return (code === :ok || code === :underflow) ? v : ErrorVal()
+    elseif k == K"Float32"
+        v, code = parse_float_literal(Float32, txtbuf, first(srcrange),
+                                      last(srcrange)+1)
+        return (code === :ok || code === :underflow) ? v : ErrorVal()
+    elseif k == K"Char"
+        io = IOBuffer()
+        had_error = unescape_julia_string(io, txtbuf, first(srcrange),
+                                          last(srcrange)+1, Diagnostic[])
+        if had_error
+            return ErrorVal()
+        else
+            seek(io, 0)
+            c = read(io, Char)
+            return eof(io) ? c : ErrorVal()
+        end
+    elseif k in KSet"String CmdString"
+        io = IOBuffer()
+        had_error = false
+        if has_flags(head, RAW_STRING_FLAG)
+            unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1,
+                                k == K"CmdString")
+        else
+            had_error = unescape_julia_string(io, txtbuf, first(srcrange),
+                                              last(srcrange)+1, Diagnostic[])
+        end
+        return had_error ? ErrorVal() : String(take!(io))
+    elseif k == K"Bool"
+        return txtbuf[first(srcrange)] == u8"t"
+    elseif k == K"VERSION"
+        nv = numeric_flags(head)
+        return VersionNumber(1, nv ÷ 10, nv % 10)
+    end
+
+    # TODO: Avoid allocating temporary String here
+    val_str = String(txtbuf[srcrange])
+    if k == K"Integer"
+        parse_int_literal(val_str)
+    elseif k in KSet"BinInt OctInt HexInt"
+        parse_uint_literal(val_str, k)
+    elseif is_identifier(k)
+        if has_flags(head, RAW_STRING_FLAG)
+            io = IOBuffer()
+            unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false)
+            Symbol(normalize_identifier(String(take!(io))))
+        else
+            Symbol(normalize_identifier(val_str))
+        end
+    elseif is_operator(k)
+        isempty(srcrange)  ?
+            Symbol(untokenize(k)) : # synthetic invisible tokens
+            Symbol(normalize_identifier(val_str))
+    elseif k == K"error"
+        ErrorVal()
+    elseif is_syntax_kind(head)
+        nothing
+    elseif is_keyword(k)
+        # This should only happen for tokens nested inside errors
+        Symbol(val_str)
+    else
+        # Other kinds should only happen for tokens nested inside errors
+        # TODO: Consolidate this with the is_keyword() above? Something else?
+        ErrorVal()
+    end
+end
+
+"""
+    lower_identifier_name(name, kind)
+
+Lower a Julia identifier `name` of given `kind` to the name used by the Julia
+runtime. (In particular, this handles the name mangling of macros.)
+
+This is a lowering (rather than parsing) step, but is needed for `Expr`
+conversion and is also used for pretty printing.
+"""
+function lower_identifier_name(name::AbstractString, k::Kind)
+    # Replicate eager lowering done by the flisp parser
+    if k == K"macro_name"
+        name == "." ? "@__dot__" : "@$name"
+    elseif k == K"StrMacroName"
+        "@$(name)_str"
+    elseif k == K"CmdMacroName"
+        "@$(name)_cmd"
+    else
+        name
+    end
+end
+
+function lower_identifier_name(name::Symbol, k::Kind)
+    if k == K"Identifier"
+        name # avoid unnecessary conversion
+    else
+        Symbol(lower_identifier_name(string(name), k))
+    end
+end
diff --git a/JuliaSyntax/src/julia/parser.jl b/JuliaSyntax/src/julia/parser.jl
new file mode 100644
index 0000000000000..75c806caaa60a
--- /dev/null
+++ b/JuliaSyntax/src/julia/parser.jl
@@ -0,0 +1,3716 @@
+"""
+    ParseState(stream::ParseStream)
+
+ParseState is an internal data structure wrapping `ParseStream` to carry parser
+context as we recursively descend into the parse tree. For example, normally
+`x -y` means `(x) - (y)`, but when parsing matrix literals we're in
+`space_sensitive` mode, and `[x -y]` means [(x) (-y)].
+"""
+struct ParseState
+    stream::ParseStream
+
+    # Disable range colon for parsing ternary conditional operator
+    range_colon_enabled::Bool
+    # In space-sensitive mode "x -y" is 2 expressions, not a subtraction
+    space_sensitive::Bool
+    # Seeing `for` stops parsing macro arguments and makes a generator
+    for_generator::Bool
+    # Treat 'end' like a normal symbol instead of a reserved word
+    end_symbol::Bool
+    # Treat newline like ordinary whitespace instead of as a potential separator
+    whitespace_newline::Bool
+    # Enable parsing `where` with high precedence
+    where_enabled::Bool
+end
+
+# Normal context
+function ParseState(stream::ParseStream)
+    ParseState(stream, true, false, false, false, false, true)
+end
+
+function ParseState(ps::ParseState; range_colon_enabled=nothing,
+                    space_sensitive=nothing, for_generator=nothing,
+                    end_symbol=nothing, whitespace_newline=nothing,
+                    where_enabled=nothing)
+    ParseState(ps.stream,
+        range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled,
+        space_sensitive === nothing ? ps.space_sensitive : space_sensitive,
+        for_generator === nothing ? ps.for_generator : for_generator,
+        end_symbol === nothing ? ps.end_symbol : end_symbol,
+        whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline,
+        where_enabled === nothing ? ps.where_enabled : where_enabled)
+end
+
+# Functions to change parse state
+
+function normal_context(ps::ParseState)
+    ParseState(ps,
+               range_colon_enabled=true,
+               space_sensitive=false,
+               where_enabled=true,
+               for_generator=false,
+               end_symbol=false,
+               whitespace_newline=false)
+end
+
+function with_space_sensitive(ps::ParseState)
+    ParseState(ps,
+               space_sensitive=true,
+               whitespace_newline=false)
+end
+
+# Convenient wrappers for ParseStream
+
+function Base.peek(ps::ParseState, n=1; skip_newlines=nothing)
+    skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines
+    peek(ps.stream, n; skip_newlines=skip_nl)
+end
+
+function peek_token(ps::ParseState, n=1; skip_newlines=nothing)
+    skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines
+    peek_token(ps.stream, n, skip_newlines=skip_nl)
+end
+
+function peek_full_token(ps::ParseState, n=1; skip_newlines=nothing, kws...)
+    skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines
+    peek_full_token(ps.stream, n; skip_newlines=skip_nl, kws...)
+end
+
+function peek_behind(ps::ParseState, args...; kws...)
+    peek_behind(ps.stream, args...; kws...)
+end
+
+function peek_behind_pos(ps::ParseState, args...; kws...)
+    peek_behind_pos(ps.stream, args...; kws...)
+end
+
+function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...)
+    skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines
+    bump(ps.stream, flags; skip_newlines=skip_nl, kws...)
+end
+
+function bump_trivia(ps::ParseState, args...; kws...)
+    bump_trivia(ps.stream, args...; kws...)
+end
+
+function bump_invisible(ps::ParseState, args...; kws...)
+    bump_invisible(ps.stream, args...; kws...)
+end
+
+function bump_glue(ps::ParseState, args...; kws...)
+    bump_glue(ps.stream, args...; kws...)
+end
+
+function bump_split(ps::ParseState, args...; kws...)
+    bump_split(ps.stream, args...; kws...)
+end
+
+function reset_node!(ps::ParseState, args...; kws...)
+    reset_node!(ps.stream, args...; kws...)
+end
+
+function steal_token_bytes!(ps::ParseState, args...)
+    steal_token_bytes!(ps.stream, args...)
+end
+
+function Base.position(ps::ParseState, args...)
+    position(ps.stream, args...)
+end
+
+function emit(ps::ParseState, args...; kws...)
+    emit(ps.stream, args...; kws...)
+end
+
+function emit_diagnostic(ps::ParseState, args...; kws...)
+    emit_diagnostic(ps.stream, args...; kws...)
+end
+
+function unsafe_textbuf(ps::ParseState)
+    unsafe_textbuf(ps.stream)
+end
+
+function first_child_position(ps::ParseState, pos::ParseStreamPosition)
+    first_child_position(ps.stream, pos)
+end
+
+function last_child_position(ps::ParseState, pos::ParseStreamPosition)
+    last_child_position(ps.stream, pos)
+end
+#-------------------------------------------------------------------------------
+# Parser Utils
+
+# Bump an expected closing token.  If not found, discard unexpected tokens
+# until we find it or another closing token.
+#
+# Crude recovery heuristic: bump any tokens which aren't block or bracket
+# closing tokens.
+function bump_closing_token(ps, closing_kind, alternative_closer_hint=nothing)
+    # todo: Refactor with recover() ?
+    if peek(ps) == closing_kind
+        bump_trivia(ps)
+        bump(ps, TRIVIA_FLAG)
+        return
+    end
+    errmsg = "Expected `$(untokenize(closing_kind))`"
+    if !isnothing(alternative_closer_hint)
+        errmsg *= alternative_closer_hint
+    end
+    # We didn't find the closing token. Read ahead in the stream
+    mark = position(ps)
+    emit_diagnostic(ps, mark, mark, error=errmsg)
+    while true
+        k = peek(ps)
+        if is_closing_token(ps, k) && !(k in KSet", ;")
+            break
+        end
+        bump(ps)
+    end
+    # mark as trivia => ignore in AST.
+    emit(ps, mark, K"error", TRIVIA_FLAG)
+    if peek(ps) == closing_kind
+        bump(ps, TRIVIA_FLAG)
+    end
+end
+
+# Read tokens until we find an expected closing token.
+# Bump the big pile of resulting tokens as a single nontrivia error token
+function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; mark = position(ps), error="unexpected tokens")
+    while true
+        k = peek(ps)
+        if k == K"EndMarker"
+            bump_invisible(ps, K"error", TRIVIA_FLAG,
+                           error="premature end of input")
+            break
+        elseif is_closer(ps, k)
+            break
+        end
+        bump(ps)
+    end
+    emit(ps, mark, K"error", flags, error=error)
+end
+
+@noinline function min_supported_version_err(ps, mark, message, min_ver)
+    major, minor = ps.stream.version
+    msg = "$message not supported in Julia version $major.$minor < $(min_ver.major).$(min_ver.minor)"
+    emit(ps, mark, K"error", error=msg)
+end
+
+# Emit an error if the version is less than `min_ver`
+function min_supported_version(min_ver, ps, mark, message)
+    if ps.stream.version < (min_ver.major, min_ver.minor)
+        min_supported_version_err(ps, mark, message, min_ver)
+    end
+end
+
+# flisp: disallow-space
+function bump_disallowed_space(ps)
+    if preceding_whitespace(peek_token(ps))
+        bump_trivia(ps, TRIVIA_FLAG, skip_newlines=false,
+                    error="whitespace is not allowed here")
+    end
+end
+
+function bump_semicolon_trivia(ps)
+    while peek(ps) in KSet"; NewlineWs"
+        bump(ps, TRIVIA_FLAG)
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Parsing-specific predicates on tokens/kinds
+#
+# All these take either a raw kind or a token.
+
+function is_plain_equals(t)
+    kind(t) == K"=" && !is_suffixed(t)
+end
+
+function is_closing_token(ps::ParseState, k)
+    k = kind(k)
+    return k in KSet"else elseif catch finally , ) ] } ; EndMarker" ||
+        (k == K"end" && !ps.end_symbol)
+end
+
+function is_block_continuation_keyword(ps::ParseState, k)
+    is_block_continuation_keyword(k) && !(ps.end_symbol && k == K"end")
+end
+
+function is_closer_or_newline(ps::ParseState, k)
+    is_closing_token(ps,k) || k == K"NewlineWs"
+end
+
+function is_initial_reserved_word(ps::ParseState, k)
+    k = kind(k)
+    is_iresword = k in KSet"begin while if for try return break continue function
+                            macro quote let local global const do struct module
+                            baremodule using import export"
+    # `begin` means firstindex(a) inside a[...]
+    return is_iresword && !(k == K"begin" && ps.end_symbol)
+end
+
+function is_reserved_word(k)
+    k = kind(k)
+    is_keyword(k) && !is_contextual_keyword(k)
+end
+
+# Return true if the next word (or word pair) is reserved, introducing a
+# syntactic structure.
+function peek_initial_reserved_words(ps::ParseState)
+    k = peek(ps)
+    if is_initial_reserved_word(ps, k)
+        return true
+    elseif is_contextual_keyword(k)
+        k2 = peek(ps, 2, skip_newlines=false)
+        return (k == K"mutable"   && k2 == K"struct") ||
+               (k == K"primitive" && k2 == K"type")   ||
+               (k == K"abstract"  && k2 == K"type")
+    else
+        return false
+    end
+end
+
+function is_block_form(k)
+    kind(k) in KSet"block quote if for while let function macro
+                    abstract primitive struct try module"
+end
+
+function is_syntactic_unary_op(k)
+    kind(k) in KSet"$ & ::"
+end
+
+function is_type_operator(t, isdot)
+    kind(t) in KSet"<: >:" && !isdot
+end
+
+function is_unary_op(t, isdot)
+    k = kind(t)
+    !is_suffixed(t) && (
+        (k in KSet"<: >:" && !isdot) ||
+        k in KSet"+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓" # dotop allowed
+    )
+end
+
+# Operators that are both unary and binary
+function is_both_unary_and_binary(t, isdot)
+    k = kind(t)
+    # Preventing is_suffixed here makes this consistent with the flisp parser.
+    # But is this by design or happenstance?
+    !is_suffixed(t) && (
+        k in KSet"+ - ⋆ ± ∓" || (k in KSet"$ & ~" && !isdot)
+    )
+end
+
+function is_string_macro_suffix(k)
+    k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k)
+end
+
+# flisp: invalid-identifier?
+function is_valid_identifier(k)
+    k = kind(k)
+    !(is_syntactic_operator(k) || k in KSet"? .'")
+end
+
+# The expression is a call after stripping `where` and `::`
+function was_eventually_call(ps::ParseState)
+    stream = ps.stream
+    p = peek_behind_pos(ps)
+    while true
+        b = peek_behind(stream, p)
+        if b.kind == K"call"
+            return true
+        elseif b.kind == K"where" || b.kind == K"parens" ||
+                (b.kind == K"::" && has_flags(b.flags, INFIX_FLAG))
+            if b.kind == K"::"
+                p_last = last_child_position(ps, p)
+                if p == p_last
+                    return false
+                end
+            end
+            p = first_child_position(ps, p)
+        else
+            return false
+        end
+    end
+end
+
+
+#-------------------------------------------------------------------------------
+# Parser
+#
+# The definitions and top-level comments here were copied to match the
+# structure of Julia's previous flisp-based parser to make both codebases
+# mutually understandable and make porting changes simple.
+#
+# The `parse_*` functions are listed here roughly in order of increasing
+# precedence (lowest to highest binding power). A few helper functions are
+# interspersed.
+
+# parse left-to-right binary operator
+# produces structures like (+ (+ (+ 2 3) 4) 5)
+#
+# flisp: parse-LtoR
+function parse_LtoR(ps::ParseState, down, is_op)
+    mark = position(ps)
+    down(ps)
+    while true
+        isdot, tk = peek_dotted_op_token(ps)
+        is_op(tk) || break
+        isdot && bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
+        bump(ps, remap_kind=K"Identifier")
+        down(ps)
+        emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
+    end
+end
+
+# parse right-to-left binary operator
+# produces structures like (=> a (=> b (=> c d)))
+#
+# flisp: parse-RtoL
+function parse_RtoL(ps::ParseState, down, is_op, self)
+    mark = position(ps)
+    down(ps)
+    isdot, tk = peek_dotted_op_token(ps)
+    if is_op(tk)
+        bump_dotted(ps, isdot, remap_kind=K"Identifier")
+        self(ps)
+        emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
+    end
+end
+
+# parse block-like structures
+#
+# `delimiters` are a set of token kinds acting as delimiters; `closing_tokens`
+# stop the parsing.
+#
+# Returns true if the block was nontrivial and a node needs to be emitted by
+# the caller.
+#
+# flisp: parse-Nary
+function parse_Nary(ps::ParseState, down, delimiters, closing_tokens)
+    bump_trivia(ps)
+    k = peek(ps)
+    if k in closing_tokens
+        return true
+    end
+    n_delims = 0
+    if k in delimiters
+        # allow leading delimiters
+        # ; a  ==>  (block a)
+    else
+        # a ; b  ==>  (block a b)
+        down(ps)
+    end
+    while peek(ps) in delimiters
+        bump(ps, TRIVIA_FLAG)
+        n_delims += 1
+        k = peek(ps)
+        if k == K"EndMarker" || k in closing_tokens
+            break
+        elseif k in delimiters
+            # ignore empty delimited sections
+            # a;;;b  ==>  (block a b)
+            continue
+        end
+        down(ps)
+    end
+    return n_delims != 0
+end
+
+# Parse a sequence of top level statements separated by newlines, all wrapped
+# in a toplevel node.
+#
+#   a \n b ==>  (toplevel a b)
+#
+# Note that parse_stmts can also emit toplevel nodes for semicolon-separated
+# statements, so it's possible for these to be nested one level deep.
+#
+#   a;b \n c;d  ==>  (toplevel (toplevel a b) (toplevel c d))
+function parse_toplevel(ps::ParseState)
+    mark = position(ps)
+    while true
+        if peek(ps, skip_newlines=true) == K"EndMarker"
+            # Allow end of input if there is nothing left but whitespace
+            # a \n \n ==> (toplevel a)
+            # Empty files
+            #  ==> (toplevel)
+            bump_trivia(ps)
+            break
+        else
+            bump_trivia(ps)
+            parse_stmts(ps)
+        end
+    end
+    emit(ps, mark, K"toplevel")
+    nothing
+end
+
+# Parse a newline or semicolon-delimited list of expressions.
+# Repeated delimiters are allowed but ignored
+# a;b;c     ==>  (block a b c)
+# a;;;b;;   ==>  (block a b)
+# ;a        ==>  (block a)
+# \n a      ==>  (block a)
+# a \n b    ==>  (block a b)
+#
+# flisp: parse-block
+function parse_block(ps::ParseState, down=parse_eq, mark=position(ps))
+    parse_block_inner(ps::ParseState, down)
+    emit(ps, mark, K"block")
+end
+
+# Parse a block, but leave emitting the block up to the caller.
+function parse_block_inner(ps::ParseState, down::F) where {F <: Function}
+    parse_Nary(ps, down, KSet"NewlineWs ;", KSet"end else elseif catch finally")
+end
+
+# ";" at the top level produces a sequence of top level expressions
+#
+# a;b;c   ==>  (toplevel a b c)
+# a;;;b;; ==>  (toplevel a b)
+# "x" a ; "y" b ==>  (toplevel (doc (string "x") a) (doc (string "y") b))
+#
+# flisp: parse-stmts
+function parse_stmts(ps::ParseState)
+    mark = position(ps)
+    do_emit = parse_Nary(ps, parse_public, (K";",), (K"NewlineWs",))
+    # check for unparsed junk after an expression
+    junk_mark = position(ps)
+    while peek(ps) ∉ KSet"EndMarker NewlineWs"
+        # Error recovery
+        bump(ps)
+    end
+    if junk_mark != position(ps)
+        # x y  ==>  x (error-t y)
+        emit(ps, junk_mark, K"error", TRIVIA_FLAG,
+             error="extra tokens after end of expression")
+    end
+    if do_emit
+        emit(ps, mark, K"toplevel", TOPLEVEL_SEMICOLONS_FLAG)
+    end
+end
+
+# Parse `public foo, bar`
+#
+# We *only* call this from toplevel contexts (file and module level) for
+# compatibility. In the future we should probably make public a full fledged
+# keyword like `export`.
+function parse_public(ps::ParseState)
+    if ps.stream.version >= (1, 11) && peek(ps) == K"public"
+        if peek(ps, 2) ∈ KSet"( = ["
+            # this branch is for compatibility with use of public as a non-keyword.
+            # it should be removed at some point.
+            emit_diagnostic(ps, warning="using public as an identifier is deprecated")
+        else
+            return parse_resword(ps)
+        end
+    end
+    parse_docstring(ps)
+end
+
+# Parse docstrings attached by a space or single newline
+#
+# flisp: parse-docstring
+function parse_docstring(ps::ParseState, down=parse_eq)
+    mark = position(ps)
+    down(ps)
+    if peek_behind(ps).kind == K"string"
+        is_doc = true
+        k = peek(ps)
+        if is_closing_token(ps, k)
+            # "notdoc" ] ==> (string "notdoc")
+            is_doc = false
+        elseif k == K"NewlineWs"
+            k2 = peek(ps, 2)
+            if is_closing_token(ps, k2) || k2 == K"NewlineWs"
+                # "notdoc" \n]      ==> (string "notdoc")
+                # "notdoc" \n\n foo ==> (string "notdoc")
+                is_doc = false
+            else
+                # Allow a single newline
+                # "doc" \n foo ==> (doc (string "doc") foo)
+                bump(ps, TRIVIA_FLAG) # NewlineWs
+            end
+        else
+            # "doc" foo    ==> (doc (string "doc") foo)
+            # "doc $x" foo ==> (doc (string "doc " x) foo)
+            # Allow docstrings with embedded trailing whitespace trivia
+            # """\n doc\n """ foo ==> (doc (string-s "doc\n") foo)
+        end
+        if is_doc
+            down(ps)
+            emit(ps, mark, K"doc")
+        end
+    end
+end
+
+# Parse assignments with comma separated lists on each side
+# a = b         ==>  (= a b)
+# a .= b        ==>  (.= a b)
+# a += b        ==>  (+= a b)
+# a .+= b       ==>  (.+= a b)
+# a, b = c, d   ==>  (= (tuple a b) (tuple c d))
+# x, = xs       ==>  (= (tuple x) xs)
+#
+# flisp: parse-eq
+function parse_eq(ps::ParseState)
+    parse_assignment(ps, parse_comma)
+end
+
+# parse_eq_star is used where commas are special, for example in an argument list
+#
+# flisp: parse-eq*
+function parse_eq_star(ps::ParseState)
+    k = peek(ps)
+    k2 = peek(ps,2)
+    if (is_literal(k) || k == K"Identifier") && k2 in KSet", ) } ]"
+        # optimization: skip checking the whole precedence stack if we have a
+        # simple token followed by a common closing token
+        bump(ps)
+    else
+        parse_assignment(ps, parse_pair)
+    end
+end
+
+# a = b  ==>  (= a b)
+#
+# flisp: parse-assignment
+function parse_assignment(ps::ParseState, down)
+    mark = position(ps)
+    down(ps)
+    parse_assignment_with_initial_ex(ps, mark, down)
+end
+
+function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {T} # where => specialize on `down`
+    isdot, t = peek_dotted_op_token(ps)
+    k = kind(t)
+    if !is_prec_assignment(k)
+        return
+    end
+    if k == K"~"
+        if ps.space_sensitive && preceding_whitespace(t) && !preceding_whitespace(peek_token(ps, 2))
+            # Unary ~ in space sensitive context is not assignment precedence
+            # [a ~b]  ==>  (hcat a (call-pre ~ b))
+            return
+        end
+        # ~ is currently the only assignment-precedence operator which is parsed as a call.
+        # TODO: Make the other non-syntactic assignments such as `≔ ⩴ ≕` into calls as well?
+        # a ~ b      ==>  (call-i a ~ b)
+        # a .~ b     ==>  (dotcall-i a ~ b)
+        # [a ~ b c]  ==>  (hcat (call-i a ~ b) c)
+        # [a~b]      ==>  (vect (call-i a ~ b))
+        bump_dotted(ps, isdot, remap_kind=K"Identifier")
+        bump_trivia(ps)
+        parse_assignment(ps, down)
+        emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
+    else
+        # f() = 1  ==>  (function-= (call f) 1)
+        # f() .= 1 ==>  (.= (call f) 1)
+        # a += b   ==>  (+= a b)
+        # a .= b   ==>  (.= a b)
+        is_short_form_func = k == K"=" && !isdot && was_eventually_call(ps)
+        if k == K"op="
+            # x += y   ==>  (op= x + y)
+            # x .+= y  ==>  (.op= x + y)
+            bump_trivia(ps)
+            isdot && bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
+            bump_split(ps,
+                        (-1, K"Identifier", EMPTY_FLAGS),  # op
+                        (1, K"=", TRIVIA_FLAG))
+        else
+            bump_dotted(ps, isdot, TRIVIA_FLAG)
+        end
+        bump_trivia(ps)
+        # Syntax Edition TODO: We'd like to call `down` here when
+        # is_short_form_func is true, to prevent `f() = 1 = 2` from parsing.
+        parse_assignment(ps, down)
+        emit(ps, mark,
+             is_short_form_func ? K"function" : (isdot ? dotted(k) : k),
+             is_short_form_func ? SHORT_FORM_FUNCTION_FLAG : flags(t))
+    end
+end
+
+# parse_comma is needed for commas outside parens, for example a = b,c
+#
+# flisp: parse-comma
+function parse_comma(ps::ParseState, do_emit=true)
+    mark = position(ps)
+    n_commas = 0
+    parse_pair(ps)
+    while true
+        if peek(ps) != K","
+            if do_emit && n_commas >= 1
+                emit(ps, mark, K"tuple")
+            end
+            return n_commas
+        end
+        bump(ps, TRIVIA_FLAG)
+        n_commas += 1
+        if is_plain_equals(peek_token(ps))
+            # Allow trailing comma before `=`
+            # x, = xs  ==>  (tuple x)
+            continue
+        end
+        parse_pair(ps)
+    end
+end
+
+# flisp: parse-pair
+# a => b  ==>  (call-i a => b)
+# a .=> b ==>  (dotcall-i a => b)
+function parse_pair(ps::ParseState)
+    parse_RtoL(ps, parse_cond, is_prec_pair, parse_pair)
+end
+
+# Parse short form conditional expression
+# a ? b : c ==> (? a b c)
+#
+# flisp: parse-cond
+function parse_cond(ps::ParseState)
+    mark = position(ps)
+    parse_arrow(ps)
+    t = peek_token(ps)
+    if kind(t) != K"?"
+        return
+    end
+    if !preceding_whitespace(t)
+        # a? b : c  => (? a (error-t) b c)
+        bump_invisible(ps, K"error", TRIVIA_FLAG,
+                       error="space required before `?` operator")
+    end
+    bump(ps, TRIVIA_FLAG) # ?
+    t = peek_token(ps)
+    if !preceding_whitespace(t)
+        # a ?b : c  ==>  (? a (error-t) b c)
+        bump_invisible(ps, K"error", TRIVIA_FLAG,
+                       error="space required after `?` operator")
+    end
+    parse_eq_star(ParseState(ps, range_colon_enabled=false))
+    t = peek_token(ps)
+    if !preceding_whitespace(t)
+        # a ? b: c  ==>  (? a b (error-t) c)
+        bump_invisible(ps, K"error", TRIVIA_FLAG,
+                       error="space required before `:` in `?` expression")
+    end
+    if kind(t) == K":"
+        bump(ps, TRIVIA_FLAG)
+    else
+        # a ? b c  ==>  (? a b (error-t) c)
+        bump_invisible(ps, K"error", TRIVIA_FLAG, error="`:` expected in `?` expression")
+    end
+    t = peek_token(ps; skip_newlines = true)
+    if !preceding_whitespace(t)
+        # a ? b :c  ==>  (? a b (error-t) c)
+        bump_invisible(ps, K"error", TRIVIA_FLAG,
+                       error="space required after `:` in `?` expression")
+    end
+
+    # FIXME: This is a very specific case. Error recovery should be handled more
+    # generally elsewhere.
+    if is_block_continuation_keyword(ps, kind(t))
+        # a "continuation keyword" is likely to belong to the surrounding code, so
+        # we abort early
+
+        # if true; x ? true elseif true end  ==> (if true (block (if x true (error-t) (error-t))) (elseif true (block)))
+        # if true; x ? true end  ==> (if true (block (if x true (error-t) (error-t))))
+        # if true; x ? true\n end  ==> (if true (block (if x true (error-t) (error-t))))
+        # if true; x ? true : elseif true end  ==> (if true (block (if x true (error-t))) (elseif true (block)))
+        bump_invisible(ps, K"error", TRIVIA_FLAG, error="unexpected `$(kind(t))`")
+        emit(ps, mark, K"if")
+        return
+    else
+        # A[x ? y : end] ==> (ref A (? x y end))
+    end
+    parse_eq_star(ps)
+    emit(ps, mark, K"?")
+end
+
+# Parse arrows.  Like parse_RtoL, but specialized for --> syntactic operator
+#
+# flisp: parse-arrow
+function parse_arrow(ps::ParseState)
+    mark = position(ps)
+    parse_or(ps)
+    isdot, t = peek_dotted_op_token(ps)
+    k = kind(t)
+    if is_prec_arrow(k)
+        if kind(t) == K"-->" && !isdot && !is_suffixed(t)
+            # x --> y   ==>  (--> x y)           # The only syntactic arrow
+            bump(ps, TRIVIA_FLAG)
+            parse_arrow(ps)
+            emit(ps, mark, k, flags(t))
+        else
+            # x → y     ==>  (call-i x → y)
+            # x <--> y  ==>  (call-i x <--> y)
+            # x .--> y  ==>  (dotcall-i x --> y)
+            # x -->₁ y  ==>  (call-i x -->₁ y)
+            bump_dotted(ps, isdot, remap_kind=K"Identifier")
+            parse_arrow(ps)
+            emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
+        end
+    end
+end
+
+function dotted(k)
+    if k == K"||"
+        return K".||"
+    elseif k == K"&&"
+        return K".&&"
+    elseif k == K"="
+        return K".="
+    elseif k == K"op="
+        return K".op="
+    else
+        error("Unexpected dotted operator: $k")
+    end
+end
+
+# Like parse_RtoL, but specialized for the version test of dotted operators.
+function parse_lazy_cond(ps::ParseState, down, is_op, self)
+    mark = position(ps)
+    down(ps)
+    (isdot, t) = peek_dotted_op_token(ps)
+    k = kind(t)
+    if is_op(k)
+        bump_dotted(ps, isdot, TRIVIA_FLAG)
+        self(ps)
+        emit(ps, mark, isdot ? dotted(k) : k, flags(t))
+        if isdot
+            min_supported_version(v"1.7", ps, mark, "dotted operators `.||` and `.&&`")
+        end
+    end
+end
+
+# x || y || z   ==>   (|| x (|| y z))
+#v1.6: x .|| y  ==>   (error (.|| x y))
+#v1.7: x .|| y  ==>   (.|| x y)
+#
+# flisp: parse-or
+function parse_or(ps::ParseState)
+    parse_lazy_cond(ps, parse_and, is_prec_lazy_or, parse_or)
+end
+
+# x && y && z   ==>   (&& x (&& y z))
+#v1.6: x .&& y  ==>   (error (.&& x y))
+#v1.7: x .&& y  ==>   (.&& x y)
+#
+# flisp: parse-and
+function parse_and(ps::ParseState)
+    parse_lazy_cond(ps, parse_comparison, is_prec_lazy_and, parse_and)
+end
+
+# Parse binary comparisons and comparison chains
+#
+# flisp: parse-comparison
+function parse_comparison(ps::ParseState, subtype_comparison=false)
+    mark = position(ps)
+    if subtype_comparison && is_reserved_word(peek(ps))
+        # Recovery
+        # struct try end  ==>  (struct (error (try)) (block))
+        name = untokenize(peek(ps))
+        bump(ps)
+        emit(ps, mark, K"error", error="Invalid type name `$name`")
+    else
+        parse_pipe_lt(ps)
+    end
+    n_comparisons = 0
+    op_pos = NO_POSITION
+    op_dotted = false
+    (initial_dot, initial_tok) = peek_dotted_op_token(ps)
+    while ((isdot, t) = peek_dotted_op_token(ps); is_prec_comparison(t))
+        n_comparisons += 1
+        op_dotted = isdot
+        op_pos = bump_dotted(ps, isdot, emit_dot_node=true, remap_kind=K"Identifier")
+        parse_pipe_lt(ps)
+    end
+    if n_comparisons == 1
+        if is_type_operator(initial_tok, initial_dot)
+            # Type comparisons are syntactic
+            # x <: y  ==>  (<: x y)
+            # x >: y  ==>  (>: x y)
+            reset_node!(ps, op_pos, flags=TRIVIA_FLAG)
+            emit(ps, mark, kind(initial_tok))
+        else
+            # Normal binary comparisons
+            # x < y    ==>  (call-i x < y)
+            # x .< y   ==>  (dotcall-i x < y)
+            if op_dotted
+                # Reset the extra (non-terminal) K"." (e.g. in `(. <)`) node to just `. <`
+                reset_node!(ps, op_pos, kind=K"TOMBSTONE", flags=TRIVIA_FLAG)
+            end
+            emit(ps, mark, op_dotted ? K"dotcall" : K"call", INFIX_FLAG)
+        end
+    elseif n_comparisons > 1
+        # Comparison chains
+        # x < y < z    ==> (comparison x < y < z)
+        # x == y < z   ==> (comparison x == y < z)
+        # x .< y .< z  ==> (comparison x (. <) y (. <) z)
+        # x .< y < z   ==> (comparison x (. <) y < z)
+        emit(ps, mark, K"comparison")
+    end
+end
+
+# x <| y <| z  ==>  (call-i x <| (call-i y <| z))
+# flisp: parse-pipe<
+function parse_pipe_lt(ps::ParseState)
+    parse_RtoL(ps, parse_pipe_gt, is_prec_pipe_lt, parse_pipe_lt)
+end
+
+# x |> y |> z  ==>  (call-i (call-i x |> y) |> z)
+# x .|> y      ==>  (dotcall-i x |> y)
+# flisp: parse-pipe>
+function parse_pipe_gt(ps::ParseState)
+    parse_LtoR(ps, parse_range, is_prec_pipe_gt)
+end
+
+# parse ranges and postfix ...
+# colon is strange; 3 arguments with 2 colons yields one call:
+# 1:2       ==> (call-i 1 : 2)
+# 1:2:3     ==> (call-i 1 : 2 3)
+# Chaining gives
+# a:b:c:d:e ==> (call-i (call-i a : b c) : d e)
+#
+# flisp: parse-range
+function parse_range(ps::ParseState)
+    mark = position(ps)
+    parse_invalid_ops(ps)
+    (initial_dot, initial_tok) = peek_dotted_op_token(ps)
+    initial_kind = kind(initial_tok)
+    if initial_kind != K":" && is_prec_colon(initial_kind)
+        # a..b     ==>   (call-i a .. b)
+        # a … b    ==>   (call-i a … b)
+        # a .… b    ==>  (dotcall-i a … b)
+        bump_dotted(ps, initial_dot, remap_kind=K"Identifier")
+        parse_invalid_ops(ps)
+        emit(ps, mark, initial_dot ? K"dotcall" : K"call", INFIX_FLAG)
+    elseif initial_kind == K":" && ps.range_colon_enabled
+        # a ? b : c:d   ==>   (? a b (call-i c : d))
+        n_colons = 0
+        while peek(ps) == K":"
+            if ps.space_sensitive &&
+                    preceding_whitespace(peek_token(ps)) &&
+                    !preceding_whitespace(peek_token(ps, 2))
+                # Tricky cases in space sensitive mode
+                # [1 :a]      ==>  (hcat 1 (quote-: a))
+                # [1 2:3 :a]  ==>  (hcat 1 (call-i 2 : 3) (quote-: a))
+                break
+            end
+            t2 = peek_token(ps,2)
+            if kind(t2) in KSet"< >" && !preceding_whitespace(t2)
+                # Error heuristic: we found `:>` or `:<` which are invalid lookalikes
+                # for `<:` and `>:`. Attempt to recover by treating them as a
+                # comparison operator.
+                # a :> b   ==>  (call-i a (error : >) b)
+                bump_trivia(ps, skip_newlines=false)
+                emark = position(ps)
+                bump(ps, remap_kind=K"Identifier") # K":"
+                ks = untokenize(peek(ps))
+                bump(ps, remap_kind=K"Identifier") # K"<" or K">"
+                emit(ps, emark, K"error",
+                     error="Invalid `:$ks` found, maybe replace with `$ks:`")
+                parse_invalid_ops(ps)
+                emit(ps, mark, K"call", INFIX_FLAG)
+                break
+            end
+            n_colons += 1
+            bump(ps, n_colons == 1 ? EMPTY_FLAGS : TRIVIA_FLAG; remap_kind=K"Identifier")
+            had_newline = peek(ps) == K"NewlineWs"
+            t = peek_token(ps)
+            if is_closing_token(ps, kind(t))
+                # 1: }    ==>  (call-i 1 : (error))
+                # 1:2: }  ==>  (call-i 1 : 2 (error))
+                bump_invisible(ps, K"error",
+                               error="missing last argument in range expression")
+                emit(ps, mark, K"call", INFIX_FLAG)
+                emit_diagnostic(ps, error="found unexpected closing token")
+                return
+            end
+            if had_newline
+                # Error message for people coming from python
+                # 1:\n2   ==> (call-i 1 : (error))
+                # (1:\n2) ==> (parens (call-i 1 : 2))
+                emit_diagnostic(ps, whitespace=true,
+                                error="line break after `:` in range expression")
+                bump_invisible(ps, K"error")
+                emit(ps, mark, K"call", INFIX_FLAG)
+                return
+            end
+            parse_invalid_ops(ps)
+            if n_colons == 2
+                emit(ps, mark, K"call", INFIX_FLAG)
+                n_colons = 0
+            end
+        end
+        if n_colons > 0
+            emit(ps, mark, K"call", INFIX_FLAG)
+        end
+    end
+
+    # x...     ==>  (... x)
+    # x:y...   ==>  (... (call-i x : y))
+    # x..y...  ==>  (... (call-i x .. y))   # flisp parser fails here
+    if peek(ps) == K"..."
+        bump(ps, TRIVIA_FLAG)
+        emit(ps, mark, K"...")
+    end
+end
+
+# Parse invalid binary operators
+#
+# Having this is unnecessary, but it improves error messages and the
+# error-containing parse tree.
+#
+# a--b  ==>  (call-i a (error) b)
+function parse_invalid_ops(ps::ParseState)
+    mark = position(ps)
+    parse_expr(ps)
+    while ((isdot, t) = peek_dotted_op_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**")
+        bump_trivia(ps)
+        bump_dotted(ps, isdot)
+        parse_expr(ps)
+        emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
+    end
+end
+
+# a - b - c  ==>  (call-i (call-i a - b) - c)
+# a + b + c  ==>  (call-i a + b c)
+# a .+ b     ==>  (dotcall-i a + b)
+#
+# flisp: parse-expr
+function parse_expr(ps::ParseState)
+    parse_with_chains(ps, parse_term, is_prec_plus, KSet"+ ++")
+end
+
+# a * b * c  ==>  (call-i a * b c)
+#
+# flisp: parse-term
+function parse_term(ps::ParseState)
+    parse_with_chains(ps, parse_rational, is_prec_times, KSet"*")
+end
+
+# Parse left to right, combining any of `chain_ops` into one call
+#
+# flisp: parse-with-chains
+function parse_with_chains(ps::ParseState, down, is_op, chain_ops)
+    mark = position(ps)
+    down(ps)
+    while ((isdot, t) = peek_dotted_op_token(ps); is_op(kind(t)))
+        if ps.space_sensitive && preceding_whitespace(t) &&
+                is_both_unary_and_binary(t, isdot) &&
+                !preceding_whitespace(peek_token(ps, 2))
+            # The following is two elements of a hcat
+            # [x +y]     ==>  (hcat x (call-pre + y))
+            # [x+y +z]   ==>  (hcat (call-i x + y) (call-pre + z))
+            # Conversely the following are infix calls
+            # [x +₁y]    ==>  (vect (call-i x +₁ y))
+            # [x+y+z]    ==>  (vect (call-i x + y z))
+            # [x+y + z]  ==>  (vect (call-i x + y z))
+            break
+        end
+        bump_dotted(ps, isdot, remap_kind=K"Identifier")
+        down(ps)
+        if kind(t) in chain_ops && !is_suffixed(t) && !isdot
+            # a + b + c    ==>  (call-i a + b c)
+            # a + b .+ c   ==>  (dotcall-i (call-i a + b) + c)
+            parse_chain(ps, down, kind(t))
+        end
+        # a +₁ b +₁ c  ==>  (call-i (call-i a +₁ b) +₁ c)
+        # a .+ b .+ c  ==>  (dotcall-i (dotcall-i a + b) + c)
+        emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
+    end
+end
+
+# parse left to right chains of a given binary operator
+#
+# flisp: parse-chain
+function parse_chain(ps::ParseState, down, op_kind)
+    while true
+        isdot, t = peek_dotted_op_token(ps)
+        if kind(t) != op_kind || is_suffixed(t) || isdot
+            break
+        end
+        if ps.space_sensitive && preceding_whitespace(t) &&
+            is_both_unary_and_binary(t, false) &&
+            !preceding_whitespace(peek_token(ps, 2))
+            # [x +y]  ==>  (hcat x (call-pre + y))
+            break
+        end
+        bump(ps, TRIVIA_FLAG)
+        down(ps)
+    end
+end
+
+# flisp: parse-rational
+# x // y // z  ==>  (call-i (call-i x // y) // z)
+function parse_rational(ps::ParseState)
+    parse_LtoR(ps, parse_shift, is_prec_rational)
+end
+
+# flisp: parse-shift
+# x >> y >> z  ==>  (call-i (call-i x >> y) >> z)
+function parse_shift(ps::ParseState)
+    parse_LtoR(ps, parse_unary_subtype, is_prec_bitshift)
+end
+
+# parse `<: A where B` as `<: (A where B)` (issue #21545)
+#
+# flisp: parse-unary-subtype
+function parse_unary_subtype(ps::ParseState)
+    t = peek_token(ps)
+    if is_type_operator(t, false)
+        k2 = peek(ps, 2)
+        if is_closing_token(ps, k2) || k2 in KSet"NewlineWs ="
+            # return operator by itself
+            # <: )  ==>  <:
+            # <: \n ==>  <:
+            # <: =  ==>  <:
+            bump(ps)
+        elseif k2 in KSet"{ ("
+            # parse <:{T}(x::T) or <:(x::T) like other unary operators
+            # <:{T}(x::T)  ==>  (call (curly <: T) (:: x T))
+            # <:(x::T)     ==>  (<:-pre (parens (:: x T)))
+            parse_where(ps, parse_juxtapose)
+        else
+            # <: x          ==>  (<:-pre x)
+            # <: A where B  ==>  (<:-pre (where A B))
+            # <: <: x       ==>  (<:-pre (<:-pre x))
+            mark = position(ps)
+            bump(ps, TRIVIA_FLAG)
+            parse_unary_subtype(ps)
+            emit(ps, mark, kind(t), PREFIX_OP_FLAG)
+        end
+    else
+        parse_where(ps, parse_juxtapose)
+    end
+end
+
+# flisp: parse-where-chain
+function parse_where_chain(ps0::ParseState, mark)
+    ps = ParseState(ps0, where_enabled=false)
+    while peek(ps) == K"where"
+        bump(ps, TRIVIA_FLAG) # where
+        bump_trivia(ps)
+        k = peek(ps)
+        if k == K"{"
+            # x where \n {T}  ==>  (where x (braces T))
+            # x where {T,S}  ==>  (where x (braces T S))
+            # Also various nonsensical forms permitted
+            # x where {T S}  ==>  (where x (bracescat (row T S)))
+            # x where {y for y in ys}  ==>  (where x (braces (generator y (iteration (in y ys)))))
+            m = position(ps)
+            bump(ps, TRIVIA_FLAG)
+            ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol)
+            emit_braces(ps, m, ckind, cflags, dim)
+            emit(ps, mark, K"where")
+        else
+            # x where T     ==>  (where x T)
+            # x where \n T  ==>  (where x T)
+            # x where T<:S  ==>  (where x (<: T S))
+            parse_comparison(ps)
+            emit(ps, mark, K"where")
+        end
+    end
+end
+
+# flisp: parse-where
+function parse_where(ps::ParseState, down)
+    # `where` needs to be below unary for the following to work
+    # +(x::T,y::T) where {T} = x
+    mark = position(ps)
+    down(ps)
+    if ps.where_enabled && peek(ps) == K"where"
+        parse_where_chain(ps, mark)
+    end
+end
+
+# Juxtaposition. Kinda ugh but soo useful for units and Field identities like `im`
+#
+# flisp: parse-juxtapose
+function parse_juxtapose(ps::ParseState)
+    mark = position(ps)
+    parse_unary(ps)
+    n_terms = 1
+    while true
+        t = peek_token(ps)
+        k = kind(t)
+        prev_k = peek_behind(ps).kind
+        is_juxtapose = false
+        if !preceding_whitespace(t) &&
+                (is_number(prev_k) ||
+                    (!is_number(k) &&  # disallow "x.3" and "f(2)2"
+                     k != K"@"     &&  # disallow "x@y"
+                     !(is_block_form(prev_k)         ||
+                       is_syntactic_unary_op(prev_k) ||
+                       is_initial_reserved_word(ps, prev_k) )))  &&
+                (!is_operator(k) || is_radical_op(k))            &&
+                !is_closing_token(ps, k)
+            if prev_k == K"string" || is_string_delim(t)
+                bump_invisible(ps, K"error", TRIVIA_FLAG,
+                               error="cannot juxtapose string literal")
+                # JuliaLang/julia#20575
+                # Error, but assume juxtapose for recovery
+                # "a""b"  ==>  (juxtapose (string "a") (error-t) (string "b"))
+                # "a"x    ==>  (juxtapose (string "a") (error-t) x)
+                # "$y"x   ==>  (juxtapose (string y) (error-t) x)
+                # "a"begin end  ==> (juxtapose (string \"a\") (error-t) (block))
+                is_juxtapose = true
+            elseif !is_initial_reserved_word(ps, k)
+                # 2x       ==>  (juxtapose 2 x)
+                # 2(x)     ==>  (juxtapose 2 (parens x))
+                # (2)(3)x  ==>  (juxtapose (parens 2) (parens 3) x)
+                # (x-1)y   ==>  (juxtapose (parens (call-i x - 1)) y)
+                # x'y      ==>  (juxtapose (call-post x ') y)
+                # 1√x      ==>  (juxtapose 1 (call-pre √ x))
+                is_juxtapose = true
+            end
+        end
+        if !is_juxtapose
+            # x.3       ==>  x
+            # f(2)2     ==>  (call f 2)
+            # x' y      ==>  (call-post x ')
+            # x 'y      ==>  x
+            # x@y       ==>  x
+            break
+        end
+        if is_radical_op(t)
+            parse_unary(ps)
+        else
+            parse_factor(ps)
+        end
+        n_terms += 1
+    end
+    if n_terms > 1
+        emit(ps, mark, K"juxtapose")
+    end
+end
+
+# Parse numeric literal prefixes, calls to unary operators and prefix
+# calls involving arbitrary operators with bracketed arglists (as opposed to
+# infix notation)
+#
+# flisp: parse-unary, parse-unary-call
+function parse_unary(ps::ParseState)
+    mark = position(ps)
+    bump_trivia(ps)
+    (op_dotted, op_t) = peek_dotted_op_token(ps)
+    op_k = kind(op_t)
+    if (
+            !is_operator(op_k)           ||
+            is_word_operator(op_k)       ||
+            (op_k in KSet": ' .'")       ||
+            (is_syntactic_unary_op(op_k) && !op_dotted) ||
+            is_syntactic_operator(op_k)
+        )
+        # `op_t` is not an initial operator
+        # :T      ==>  (quote-: T)
+        # in::T   ==>  (:: in T)
+        # isa::T  ==>  (:: isa T)
+        parse_factor(ps)
+        return
+    end
+    t2 = peek_token(ps, 2+op_dotted)
+    k2 = kind(t2)
+    if op_k in KSet"- +" && !is_suffixed(op_t) && !op_dotted
+        if !preceding_whitespace(t2) && (k2 in KSet"Integer Float Float32" ||
+                                         (op_k == K"+" && k2 in KSet"BinInt HexInt OctInt"))
+
+            k3 = peek(ps, 3)
+            if is_prec_power(k3) || k3 in KSet"[ {"
+                # `[`, `{` (issue #18851) and `^` have higher precedence than
+                # unary negation
+                # -2^x      ==>  (call-pre - (call-i 2 ^ x))
+                # -2[1, 3]  ==>  (call-pre - (ref 2 1 3))
+                bump(ps, remap_kind=K"Identifier")
+                parse_factor(ps)
+                emit(ps, mark, K"call", PREFIX_OP_FLAG)
+            else
+                # We have a signed numeric literal. Glue the operator to the
+                # next token to create a signed literal:
+                # -2      ==>  -2
+                # +2.0    ==>  2.0
+                # -1.0f0  ==>  -1.0f0
+                # -2*x    ==>  (call-i -2 * x)
+                # +0xff   ==>  0xff
+                bump_glue(ps, kind(t2), EMPTY_FLAGS)
+                parse_factor_with_initial_ex(ps, mark)
+            end
+            return
+        end
+    end
+    if is_closing_token(ps, k2) || k2 in KSet"NewlineWs ="
+        # Standalone operators parsed as `op` or `(. op)`
+        # +)   ==>  +
+        # +\n  ==>  +
+        # + =  ==>  +
+        # .+   ==>  (. +)
+        # .&   ==>  (. &)
+        parse_atom(ps)
+    elseif k2 == K"{" || (!is_unary_op(op_t, op_dotted) && k2 == K"(")
+        # Call with type parameters or non-unary prefix call
+        # +{T}(x::T)  ==>  (call (curly + T) (:: x T))
+        # *(x)  ==>  (call * x)
+        # .*(x) ==>  (call .* x)
+        parse_factor(ps)
+    elseif k2 == K"("
+        # Cases like +(a;b) are ambiguous: are they prefix calls to + with b as
+        # a keyword argument, or is `a;b` a block?  We resolve this with a
+        # simple heuristic: if there were any commas (or an initial splat), it
+        # was a function call.
+        #
+        # (The flisp parser only considers commas before `;` and thus gets this
+        # last case wrong)
+        op_pos = bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier")
+
+        space_before_paren = preceding_whitespace(t2)
+        if space_before_paren
+            # Setup possible whitespace error between operator and (
+            ws_mark = position(ps)
+            bump_trivia(ps)
+            ws_error_pos = emit(ps, ws_mark, K"TOMBSTONE")
+            ws_mark_end = position(ps)
+        end
+
+        mark_before_paren = position(ps)
+        bump(ps, TRIVIA_FLAG) # (
+        initial_semi = peek(ps, skip_newlines=true) == K";"
+        opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs
+            is_paren_call = had_commas || had_splat               ||
+                            (initial_semi && num_subexprs > 0)    ||
+                            (initial_semi && num_semis == 1)      ||
+                            (num_semis == 0 && num_subexprs == 0)
+            return (needs_parameters=is_paren_call,
+                    is_paren_call=is_paren_call,
+                    is_block=!is_paren_call && num_semis > 0)
+        end
+
+        # The precedence between unary + and any following infix ^ depends on
+        # whether the parens are a function call or not
+        if opts.is_paren_call
+            if space_before_paren
+                # Whitespace not allowed before prefix function call bracket
+                # + (a,b)   ==> (call + (error) a b)
+                reset_node!(ps, ws_error_pos, kind=K"error")
+                emit_diagnostic(ps, ws_mark, ws_mark_end,
+                                error="whitespace not allowed between prefix function call and argument list")
+            end
+            # Prefix function calls for operators which are both binary and unary
+            # +(a,b)    ==>  (call + a b)
+            # +(a=1,)   ==>  (call + (= a 1))
+            # +(a...)   ==>  (call + (... a))
+            # +(a;b,c)  ==>  (call + a (parameters b c))
+            # +(;a)     ==>  (call + (parameters a))
+            # +()       ==>  (call +)
+            # Prefix calls have higher precedence than ^
+            # +(a,b)^2  ==>  (call-i (call + a b) ^ 2)
+            # +(a,b)(x)^2  ==>  (call-i (call (call + a b) x) ^ 2)
+            if is_type_operator(op_t, op_dotted)
+                # <:(a,)  ==>  (<: a)
+                emit(ps, mark, op_k, opts.delim_flags)
+                reset_node!(ps, op_pos, flags=TRIVIA_FLAG, kind=op_k)
+            else
+                emit(ps, mark, K"call", opts.delim_flags)
+            end
+            parse_call_chain(ps, mark)
+            parse_factor_with_initial_ex(ps, mark)
+        else
+            # Unary function calls with brackets as grouping, not an arglist
+            # .+(a)    ==>  (dotcall-pre + (parens a))
+            if opts.is_block
+                # +(a;b)   ==>  (call-pre + (block-p a b))
+                emit(ps, mark_before_paren, K"block", PARENS_FLAG)
+            else
+                emit(ps, mark_before_paren, K"parens")
+            end
+            # Not a prefix operator call but a block; `=` is not `kw`
+            # +(a=1)  ==>  (call-pre + (parens (= a 1)))
+            # Unary operators have lower precedence than ^
+            # +(a)^2  ==>  (call-pre + (call-i (parens a) ^ 2))
+            # .+(a)^2  ==>  (dotcall-pre + (call-i (parens a) ^ 2))
+            # +(a)(x,y)^2  ==>  (call-pre + (call-i (call (parens a) x y) ^ 2))
+            parse_call_chain(ps, mark_before_paren)
+            parse_factor_with_initial_ex(ps, mark_before_paren)
+            if is_type_operator(op_t, op_dotted)
+                # <:(a)  ==>  (<:-pre (parens a))
+                emit(ps, mark, op_k, PREFIX_OP_FLAG)
+                reset_node!(ps, op_pos, flags=TRIVIA_FLAG, kind=op_k)
+            else
+                if op_dotted
+                    emit(ps, mark, K"dotcall", PREFIX_OP_FLAG)
+                    # Reset the extra (non-terminal) K"." (e.g. in `(. +)`) node to just `. +`
+                    reset_node!(ps, op_pos, kind=K"TOMBSTONE")
+                else
+                    emit(ps, mark, K"call", PREFIX_OP_FLAG)
+                end
+            end
+        end
+    else
+        if is_unary_op(op_t, op_dotted)
+            # Normal unary calls
+            # +x  ==>  (call-pre + x)
+            # √x  ==>  (call-pre √ x)
+            # .~x ==>  (dotcall-pre ~ x)
+            # Things which are not quite negative literals
+            # -0x1 ==> (call-pre - 0x01)
+            # - 2  ==> (call-pre - 2)
+            # .-2  ==> (dotcall-pre - 2)
+            op_pos = bump_dotted(ps, op_dotted, remap_kind=K"Identifier")
+        else
+            # /x     ==>  (call-pre (error /) x)
+            # +₁ x   ==>  (call-pre (error +₁) x)
+            # .<: x  ==>  (dotcall-pre (error (. <:)) x)
+            bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier")
+            op_pos = emit(ps, mark, K"error", error="not a unary operator")
+        end
+        parse_unary(ps)
+        if is_type_operator(op_t, op_dotted)
+            reset_node!(ps, op_pos, flags=TRIVIA_FLAG)
+            emit(ps, mark, op_k, PREFIX_OP_FLAG)
+        else
+            emit(ps, mark, op_dotted ? K"dotcall" : K"call", PREFIX_OP_FLAG)
+        end
+    end
+end
+
+# handle ^ and .^
+#
+# x^y    ==>  (call-i x ^ y)
+# x^y^z  ==>  (call-i x ^ (call-i y ^ z))
+# x .^ y ==>  (dotcall-i x ^ y)
+# begin x end::T  ==>  (:: (block x) T)
+#
+# flisp: parse-factor
+function parse_factor(ps::ParseState)
+    mark = position(ps)
+    parse_call(ps)
+    parse_factor_with_initial_ex(ps, mark)
+end
+
+# flisp: parse-factor-with-initial-ex
+function parse_factor_with_initial_ex(ps::ParseState, mark)
+    parse_decl_with_initial_ex(ps, mark)
+    if ((isdot, t) = peek_dotted_op_token(ps); is_prec_power(kind(t)))
+        bump_dotted(ps, isdot, remap_kind=K"Identifier")
+        parse_factor_after(ps)
+        emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
+    end
+end
+
+# flisp: parse-factor-after
+function parse_factor_after(ps::ParseState)
+    parse_RtoL(ps, parse_juxtapose, is_prec_power, parse_factor_after)
+end
+
+# Parse type declarations and lambda syntax
+# a::b      ==>   (::-i a b)
+# a->b      ==>   (-> a b)
+#
+# flisp: parse-decl-with-initial-ex
+function parse_decl_with_initial_ex(ps::ParseState, mark)
+    while peek(ps) == K"::"
+        # a::b::c   ==>   (::-i (::-i a b) c)
+        bump(ps, TRIVIA_FLAG)
+        parse_where(ps, parse_call)
+        emit(ps, mark, K"::", INFIX_FLAG)
+    end
+    if peek(ps) == K"->"
+        kb = peek_behind(ps).kind
+        if kb == K"tuple"
+            # (x,y) -> z
+            # (x) -> y
+            # (x; a=1) -> y
+        elseif kb == K"where"
+            # `where` and `->` have the "wrong" precedence when writing anon functions.
+            # So ignore this case to allow use of grouping brackets with `where`.
+            # This needs to worked around in lowering :-(
+            # (x where T) -> y  ==>  (-> (x where T) y)
+        else
+            # x -> y    ==>  (-> (tuple x) y)
+            # a::b->c   ==>  (-> (tuple (::-i a b)) c)
+            emit(ps, mark, K"tuple")
+        end
+        bump(ps, TRIVIA_FLAG)
+        # -> is unusual: it binds tightly on the left and loosely on the right.
+        parse_eq_star(ps)
+        emit(ps, mark, K"->")
+    end
+end
+
+# parse function call, indexing, dot, and transpose expressions
+# also handles looking for syntactic reserved words
+#
+# flisp: parse-call
+function parse_call(ps::ParseState)
+    if peek_initial_reserved_words(ps)
+        parse_resword(ps)
+    else
+        mark = position(ps)
+        # f(x)   ==>  (call f x)
+        # $f(x)  ==>  (call ($ f) x)
+        parse_unary_prefix(ps)
+        parse_call_chain(ps, mark)
+    end
+end
+
+# parse syntactic unary operators
+#
+# &a   ==>  (& a)
+# ::a  ==>  (::-pre a)
+# $a   ==>  ($ a)
+#
+# flisp: parse-unary-prefix
+function parse_unary_prefix(ps::ParseState, has_unary_prefix=false)
+    mark = position(ps)
+    (isdot, t) = peek_dotted_op_token(ps)
+    k = kind(t)
+    if is_syntactic_unary_op(k) && !isdot
+        k2 = peek(ps, 2)
+        if k in KSet"& $" && (is_closing_token(ps, k2) || k2 == K"NewlineWs")
+            # &)   ==>  &
+            # $\n  ==>  $
+            bump(ps)
+        else
+            bump(ps, TRIVIA_FLAG)
+            if k in KSet"& ::"
+                # &a   ==>  (& a)
+                parse_where(ps, parse_call)
+            else
+                # $a   ==>  ($ a)
+                # $$a  ==>  ($ ($ a))
+                # $&a  ==>  ($ (& a))
+                parse_unary_prefix(ps, true)
+            end
+            # Only need PREFIX_OP_FLAG for ::
+            f = k == K"::" ? PREFIX_OP_FLAG : EMPTY_FLAGS
+            emit(ps, mark, k, f)
+        end
+    else
+        # .&(x,y)  ==>  (call .& x y)
+        parse_atom(ps, true, has_unary_prefix)
+    end
+end
+
+function maybe_parsed_macro_name(ps, processing_macro_name, last_identifier_orig_kind, mark)
+    if processing_macro_name
+        emit(ps, mark, K"macro_name")
+        maybe_parsed_special_macro(ps, last_identifier_orig_kind)
+    end
+    return false
+end
+
+function maybe_parsed_special_macro(ps, last_identifier_orig_kind)
+    is_syntax_version_macro = last_identifier_orig_kind == K"VERSION"
+    if is_syntax_version_macro && ps.stream.version >= (1, 14)
+        # Encode the current parser version into an invisible token
+        bump_invisible(ps, K"VERSION",
+            set_numeric_flags(ps.stream.version[2] * 10))
+    end
+end
+
+# Parses a chain of suffixes at function call precedence, leftmost binding
+# tightest. This handles
+#  * Bracketed calls like a() b[] c{}
+#  * Field access like a.b.c
+#    - Various dotted syntax like f.() and f.:x
+#  * Adjoint suffix like a'
+#  * String macros like a"str" b"""str""" c`str` d```str```
+#
+# f(a).g(b) ==> (call (. (call f a) g) b)
+#
+# flisp: parse-call-chain, parse-call-with-initial-ex
+function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
+    if is_number(peek_behind(ps).kind) && peek(ps) == K"("
+        # juxtaposition with numbers is multiply, not call
+        # 2(x) ==> (* 2 x)
+        return
+    end
+    processing_macro_name = is_macrocall
+    saw_misplaced_atsym = false
+    misplaced_atsym_mark = nothing
+    # source range of the @-prefixed part of a macro
+    macro_atname_range = nothing
+    # $A.@x  ==>  (macrocall (. ($ A) (macro_name x)))
+    maybe_strmac = true
+    last_identifier_pos = peek_behind_pos(ps)
+    last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind
+    while true
+        maybe_strmac_1 = false
+        t = peek_token(ps)
+        k = kind(t)
+        if !is_macrocall && ps.space_sensitive && preceding_whitespace(t) &&
+                k in KSet"( [ { \" \"\"\" ` ```"
+            # [f (x)]  ==>  (hcat f (parens x))
+            # [f x]    ==>  (hcat f x)
+            break
+        elseif is_macrocall && (preceding_whitespace(t) || !(k in KSet"( [ { ' ."))
+            # Macro calls with space-separated arguments
+            # @foo a b    ==> (macrocall (macro_name foo) a b)
+            # @foo (x)    ==> (macrocall (macro_name foo) (parens x))
+            # @foo (x,y)  ==> (macrocall (macro_name foo) (tuple-p x y))
+            # [@foo x]    ==> (vect (macrocall (macro_name foo) x))
+            # [@foo]      ==> (vect (macrocall (macro_name foo)))
+            # @var"#" a   ==> (macrocall (macro_name (var #)) a)
+            # A.@x y      ==> (macrocall (. A (macro_name x)) y)
+            # A.@var"#" a ==> (macrocall (. A (macro_name (var #))) a)
+            # @+x y       ==> (macrocall (macro_name +) x y)
+            # A.@.x       ==> (macrocall (. A (macro_name .)) x)
+            processing_macro_name = maybe_parsed_macro_name(
+                ps, processing_macro_name, last_identifier_orig_kind, mark)
+            let ps = with_space_sensitive(ps)
+                # Space separated macro arguments
+                # A.@foo a b    ==> (macrocall (. A (macro_name foo)) a b)
+                # @A.foo a b    ==> (macrocall (macro_name (. A foo)) a b)
+                n_args = parse_space_separated_exprs(ps)
+                is_doc_macro = last_identifier_orig_kind == K"doc"
+                if is_doc_macro && n_args == 1
+                    # Parse extended @doc args on next line
+                    # @doc x\ny      ==>  (macrocall (macro_name doc) x y)
+                    # A.@doc x\ny    ==>  (macrocall (. A (macro_name doc)) x y)
+                    # @A.doc x\ny    ==>  (macrocall (macro_name (. A doc)) x y)
+                    # @doc x y\nz    ==>  (macrocall (macro_name doc) x y)
+                    #
+                    # Excluded cases
+                    # @doc x\n\ny    ==>  (macrocall (macro_name doc) x)
+                    # @doc x\nend    ==>  (macrocall (macro_name doc) x)
+                    k2 = peek(ps, 2)
+                    if peek(ps) == K"NewlineWs" && !is_closing_token(ps, k2) &&
+                            k2 != K"NewlineWs"
+                        bump(ps) # newline
+                        parse_eq(ps)
+                    end
+                end
+                emit(ps, mark, K"macrocall")
+            end
+            break
+        elseif k == K"("
+            # f(a,b)  ==>  (call f a b)
+            # f(a=1; b=2) ==> (call f (= a 1) (parameters (= b 2)))
+            # f(a; b; c)  ==> (call f a (parameters b) (parameters c))
+            # (a=1)()  ==>  (call (parens (= a 1)))
+            # f (a)    ==>  (call f (error-t) a)
+            processing_macro_name = maybe_parsed_macro_name(
+                ps, processing_macro_name, last_identifier_orig_kind, mark)
+            bump_disallowed_space(ps)
+            bump(ps, TRIVIA_FLAG)
+            opts = parse_call_arglist(ps, K")")
+            if peek(ps) == K"do"
+                # f(x) do y body end  ==>  (call f x (do (tuple y) (block body)))
+                parse_do(ps)
+            end
+            emit(ps, mark, is_macrocall ? K"macrocall" : K"call",
+                 # TODO: Add PARENS_FLAG to all calls which use them?
+                 (is_macrocall ? PARENS_FLAG : EMPTY_FLAGS)|opts.delim_flags)
+            if is_macrocall
+                # @x(a, b)   ==>  (macrocall-p (macro_name x) a b)
+                # A.@x(y)    ==>  (macrocall-p (. A (macro_name x)) y)
+                # A.@x(y).z  ==>  (. (macrocall-p (. A (macro_name x)) y) z)
+                is_macrocall = false
+                # @f()()     ==>  (call (macrocall-p (macro_name f)))
+                macro_atname_range = nothing
+            end
+        elseif k == K"["
+            processing_macro_name = maybe_parsed_macro_name(
+                ps, processing_macro_name, last_identifier_orig_kind, mark)
+            m = position(ps)
+            # a [i]  ==>  (ref a (error-t) i)
+            bump_disallowed_space(ps)
+            bump(ps, TRIVIA_FLAG)
+            ckind, cflags, dim = parse_cat(ParseState(ps, end_symbol=true),
+                                      K"]", ps.end_symbol)
+            if is_macrocall
+                # @S[a,b]  ==>  (macrocall (macro_name S) (vect a b))
+                # @S[a b]  ==>  (macrocall (macro_name S) (hcat a b))
+                # @S[a; b] ==>  (macrocall (macro_name S) (vcat a b))
+                # A.@S[a]  ==>  (macrocall (. A (macro_name S)) (vect a))
+                # @S[a].b  ==>  (. (macrocall (macro_name S) (vect a)) b)
+                #v1.7: @S[a ;; b]  ==>  (macrocall (macro_name S) (ncat-2 a b))
+                #v1.6: @S[a ;; b]  ==>  (macrocall (macro_name S) (error (ncat-2 a b)))
+                emit(ps, m, ckind, cflags | set_numeric_flags(dim))
+                check_ncat_compat(ps, m, ckind)
+                emit(ps, mark, K"macrocall")
+                is_macrocall = false
+                macro_atname_range = nothing
+            else
+                # a[i]    ==>  (ref a i)
+                # a[i,j]  ==>  (ref a i j)
+                # (a=1)[] ==>  (ref (parens (= a 1)))
+                # a[end]  ==>  (ref a end)
+                # T[x   y]  ==>  (typed_hcat T x y)
+                # T[x ; y]  ==>  (typed_vcat T x y)
+                # T[a b; c d]  ==>  (typed_vcat T (row a b) (row c d))
+                # T[x for x in xs]  ==>  (typed_comprehension T (generator x (iteration (in x xs))))
+                #v1.8: T[a ; b ;; c ; d]  ==>  (typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))
+                outk = ckind == K"vect"          ? K"ref"                  :
+                       ckind == K"hcat"          ? K"typed_hcat"           :
+                       ckind == K"vcat"          ? K"typed_vcat"           :
+                       ckind == K"comprehension" ? K"typed_comprehension"  :
+                       ckind == K"ncat"          ? K"typed_ncat"           :
+                       internal_error("unrecognized kind in parse_cat ", string(ckind))
+                emit(ps, mark, outk, cflags | set_numeric_flags(dim))
+                check_ncat_compat(ps, mark, ckind)
+            end
+        elseif k == K"."
+            # Check if this is a dotted operator, not field access
+            k2 = peek(ps, 2)
+            if is_operator(k2) && !is_word_operator(k2) && k2 != K":" && k2 != K"$" && k2 != K"'" && k2 != K"?"
+                # This is a dotted operator like .=, .+, etc., not field access
+                # Let the appropriate parser handle it
+                break
+            end
+            # x .y  ==>  (. x (error-t) y)
+            bump_disallowed_space(ps)
+            emark = position(ps)
+            if !isnothing(macro_atname_range)
+                # Allow `@` in macrocall only in first and last position
+                # A.B.@x  ==>  (macrocall (. (. A B) (macro_name x)))
+                # @A.B.x  ==>  (macrocall (macro_name (. (. A B) x)))
+                # A.@B.x  ==>  (macrocall (. (. A (error-t) B) (macro_name (error-t) x)))
+                emit_diagnostic(ps, macro_atname_range...,
+                    error="`@` must appear on first or last macro name component")
+                # Recover by treating the `@` as if it had been on the last identifier
+                saw_misplaced_atsym = true
+                reset_node!(ps, macro_atname_range[2], kind=K"TOMBSTONE")
+                reset_node!(ps, macro_atname_range[1], kind=K"error")
+            end
+            bump(ps, TRIVIA_FLAG)
+            k = peek(ps)
+            if k == K"("
+                if is_macrocall
+                    # Recover by pretending we do have the syntax
+                    processing_macro_name = maybe_parsed_macro_name(
+                        ps, processing_macro_name, last_identifier_orig_kind, mark)
+                    # @M.(x)  ==> (macrocall (dotcall (macro_name M) (error-t) x))
+                    bump_invisible(ps, K"error", TRIVIA_FLAG)
+                    emit_diagnostic(ps, mark,
+                                    error="dot call syntax not supported for macros")
+                end
+                # f.(a,b)   ==>  (dotcall f a b)
+                # f. (x)    ==>  (dotcall f (error-t) x)
+                bump_disallowed_space(ps)
+                bump(ps, TRIVIA_FLAG)
+                opts = parse_call_arglist(ps, K")")
+                emit(ps, mark, K"dotcall", opts.delim_flags)
+            elseif k == K":"
+                # A.:+  ==>  (. A (quote-: +))
+                # A.: +  ==>  (. A (error-t) (quote-: +))
+                m = position(ps)
+                bump(ps, TRIVIA_FLAG)
+                bump_disallowed_space(ps)
+                parse_atom(ps, false)
+                emit(ps, m, K"quote", COLON_QUOTE)
+                emit(ps, mark, K".")
+            elseif k == K"$"
+                # f.$x      ==>  (. f ($ x))
+                # f.$(x+y)  ==>  (. f ($ (call + x y)))
+                # A.$B.@x   ==>  (macrocall (. (. A ($ B)) (macro_name x)))
+                # @A.$x a   ==>  (macrocall (macro_name (. A (error x))) a)
+                m = position(ps)
+                bump(ps, TRIVIA_FLAG)
+                parse_atom(ps)
+                if is_macrocall
+                    emit(ps, m, K"error", error="invalid macro name")
+                else
+                    emit(ps, m, K"$")
+                end
+                last_identifier_orig_kind = K"$"
+                emit(ps, mark, K".")
+            elseif k == K"@"
+                # A macro call after some prefix A has been consumed
+                # A.@x    ==>  (macrocall (. A (macro_name x)))
+                # A.@x a  ==>  (macrocall (. A (macro_name x)) a)
+                m = position(ps)
+                if is_macrocall
+                    # @A.B.@x a ==> (macrocall (. (. A B) (error-t) (macro_name x)) a)
+                    bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path")
+                else
+                    bump(ps, TRIVIA_FLAG)
+                end
+                parse_macro_name(ps)
+                last_identifier_pos = peek_behind_pos(ps)
+                last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind
+                !is_macrocall && emit(ps, m, K"macro_name")
+                macro_atname_range = (m, position(ps))
+                is_macrocall = true
+                emit(ps, mark, K".")
+                maybe_parsed_special_macro(ps, last_identifier_orig_kind)
+            elseif k == K"'"
+                # f.'  =>  (dotcall-post f (error '))
+                bump(ps, remap_kind=K"Identifier")  # bump '
+                # TODO: Reclaim dotted postfix operators :-)
+                emit(ps, emark, K"error",
+                     error="the .' operator for transpose is discontinued")
+                emit(ps, mark, K"dotcall", POSTFIX_OP_FLAG)
+            else
+                if saw_misplaced_atsym
+                    # If we saw a misplaced `@` earlier, this might be the place
+                    # where it should have been. Opportunistically bump the
+                    # zero-width error token here. If that's not right, we'll
+                    # reset it later.
+                    if misplaced_atsym_mark !== nothing
+                        reset_node!(ps, misplaced_atsym_mark[1], kind=K"TOMBSTONE")
+                        reset_node!(ps, misplaced_atsym_mark[2], kind=K"TOMBSTONE")
+                    end
+                    macro_name_mark = position(ps)
+                    bump_invisible(ps, K"error", TRIVIA_FLAG)
+                    aterror_mark = position(ps)
+                end
+                # Field/property syntax
+                # f.x.y ==> (. (. f x) y)
+                parse_atom(ps, false)
+                if saw_misplaced_atsym
+                    emit(ps, macro_name_mark, K"macro_name")
+                    misplaced_atsym_mark = (aterror_mark, position(ps))
+                end
+                last_identifier_pos = peek_behind_pos(ps)
+                last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind
+                maybe_strmac_1 = true
+                emit(ps, mark, K".")
+            end
+        elseif k == K"'" && !preceding_whitespace(t)
+            # f'  ==> (call-post f ')
+            # f'ᵀ ==> (call-post f 'ᵀ)
+            bump(ps, remap_kind=K"Identifier")
+            emit(ps, mark, K"call", POSTFIX_OP_FLAG)
+        elseif k == K"{"
+            processing_macro_name = maybe_parsed_macro_name(
+                ps, processing_macro_name, last_identifier_orig_kind, mark)
+            # Type parameter curlies and macro calls
+            m = position(ps)
+            # S {a} ==> (curly S (error-t) a)
+            bump_disallowed_space(ps)
+            bump(ps, TRIVIA_FLAG)
+            opts = parse_call_arglist(ps, K"}")
+            if is_macrocall
+                # @S{a,b} ==> (macrocall (macro_name S) (braces a b))
+                # A.@S{a}  ==> (macrocall (. A (macro_name S)) (braces a))
+                # @S{a}.b  ==> (. (macrocall (macro_name S) (braces a)) b)
+                emit(ps, m, K"braces", opts.delim_flags)
+                emit(ps, mark, K"macrocall")
+                min_supported_version(v"1.6", ps, mark, "macro call without space before `{}`")
+                is_macrocall = false
+                macro_atname_range = nothing
+            else
+                # S{a,b} ==> (curly S a b)
+                emit(ps, mark, K"curly", opts.delim_flags)
+            end
+        elseif k in KSet" \" \"\"\" ` ``` " &&
+                !preceding_whitespace(t) && maybe_strmac &&
+                (# Must mirror the logic in lex_quote() for consistency
+                 origk = last_identifier_orig_kind;
+                 origk == K"Identifier" || is_contextual_keyword(origk) || is_word_operator(origk))
+            # Custom string and command literals
+            # x"str" ==> (macrocall @x_str (string-r "str"))
+            # x`str` ==> (macrocall @x_cmd (cmdstring-r "str"))
+            # x""    ==> (macrocall @x_str (string-r ""))
+            # x``    ==> (macrocall @x_cmd (cmdstring-r ""))
+            # Triple quoted processing for custom strings
+            # r"""\nx"""          ==> (macrocall @r_str (string-s-r "x"))
+            # r"""\n x\n y"""     ==> (macrocall @r_str (string-s-r "x\n" "y"))
+            # r"""\n x\\n y"""    ==> (macrocall @r_str (string-s-r "x\\\n" "y"))
+            #
+            # Use a special token kind for string and cmd macro names so the
+            # names can be expanded later as necessary.
+            name_kind = is_string_delim(k) ? K"StrMacroName" : K"CmdMacroName"
+            reset_node!(ps, last_identifier_pos, kind=name_kind)
+            parse_string(ps, true)
+            t = peek_token(ps)
+            k = kind(t)
+            if !preceding_whitespace(t) && is_string_macro_suffix(k)
+                # Macro suffixes can include keywords and numbers
+                # x"s"y    ==> (macrocall @x_str (string-r "s") "y")
+                # x"s"end  ==> (macrocall @x_str (string-r "s") "end")
+                # x"s"in   ==> (macrocall @x_str (string-r "s") "in")
+                # x"s"2    ==> (macrocall @x_str (string-r "s") 2)
+                # x"s"10.0 ==> (macrocall @x_str (string-r "s") 10.0)
+                suffix_kind = (k == K"Identifier" || is_keyword(k) ||
+                               is_word_operator(k)) ? K"String" : k
+                bump(ps, remap_kind=suffix_kind)
+            end
+            emit(ps, mark, K"macrocall")
+        else
+            break
+        end
+        maybe_strmac = maybe_strmac_1
+    end
+end
+
+# Parse the `A<:B` part of type definitions like `struct A<:B end`
+#
+# flisp: parse-subtype-spec
+function parse_subtype_spec(ps::ParseState)
+    # Wart: why isn't the flisp parser more strict here?
+    # <: is the only operator which isn't a syntax error, but
+    # parse_comparison allows all sorts of things.
+    parse_comparison(ps, true)
+end
+
+# flisp: parse-struct-field
+function parse_struct_field(ps::ParseState)
+    mark = position(ps)
+    const_field = peek(ps) == K"const"
+    if const_field
+        bump(ps, TRIVIA_FLAG)
+    end
+    parse_eq(ps)
+    if const_field
+        # Const fields https://github.com/JuliaLang/julia/pull/43305
+        #v1.8: struct A const a end  ==>  (struct A (block (const x)))
+        #v1.7: struct A const a end  ==>  (struct A (block (error (const x))))
+        emit(ps, mark, K"const")
+        min_supported_version(v"1.8", ps, mark, "`const` struct field")
+    end
+end
+
+# parse expressions or blocks introduced by syntactic reserved words.
+#
+# The caller should use peek_initial_reserved_words to determine whether
+# to call parse_resword, or whether contextual keywords like `mutable` are
+# simple identifiers.
+#
+# flisp: parse-resword
+function parse_resword(ps::ParseState)
+    # In normal_context
+    # begin f() where T = x end  ==>  (block (= (where (call f) T) x))
+    ps = normal_context(ps)
+    bump_trivia(ps)
+    mark = position(ps)
+    word = peek(ps)
+    if word in KSet"begin quote"
+        # begin end         ==>  (block)
+        # begin a ; b end   ==>  (block a b)
+        # begin\na\nb\nend  ==>  (block a b)
+        bump(ps, TRIVIA_FLAG)
+        parse_block_inner(ps, parse_docstring)
+        bump_closing_token(ps, K"end")
+        emit(ps, mark, K"block")
+        if word == K"quote"
+            # quote end       ==>  (quote (block))
+            # quote body end  ==>  (quote (block body))
+            emit(ps, mark, K"quote")
+        end
+    elseif word == K"while"
+        # while cond body end  ==>  (while cond (block body))
+        # while x < y \n a \n b \n end ==> (while (call-i x < y) (block a b))
+        bump(ps, TRIVIA_FLAG)
+        parse_cond(ps)
+        parse_block(ps)
+        bump_closing_token(ps, K"end")
+        emit(ps, mark, K"while")
+    elseif word == K"for"
+        # for x in xs end  ==>  (for (iteration (in x xs)) (block))
+        # for x in xs, y in ys \n a \n end ==> (for (iteration (in x xs) (in y ys)) (block a))
+        bump(ps, TRIVIA_FLAG)
+        parse_iteration_specs(ps)
+        parse_block(ps)
+        bump_closing_token(ps, K"end")
+        emit(ps, mark, K"for")
+    elseif word == K"let"
+        bump(ps, TRIVIA_FLAG)
+        m = position(ps)
+        if peek(ps) in KSet"NewlineWs ;"
+            # let end           ==>  (let (block) (block))
+            # let ; end         ==>  (let (block) (block))
+            # let ; body end    ==>  (let (block) (block body))
+        else
+            # let x=1\n end     ==>  (let (block (= x 1)) (block))
+            # let x=1 ; end     ==>  (let (block (= x 1)) (block))
+            # let x::1 ; end    ==>  (let (block (::-i x 1)) (block))
+            # let x ; end       ==>  (let (block x) (block))
+            # let x=1,y=2 ; end ==>  (let (block (= x 1) (= y 2) (block)))
+            # let x+=1 ; end    ==>  (let (block (op= x + 1)) (block))
+            parse_comma_separated(ps, parse_eq_star)
+        end
+        emit(ps, m, K"block")
+        k = peek(ps)
+        if k in KSet"NewlineWs ;"
+            bump(ps, TRIVIA_FLAG)
+        elseif k == K"end"
+            # pass
+        else
+            recover(is_closer_or_newline, ps, TRIVIA_FLAG,
+                    error="let variables should end in `;` or newline")
+        end
+        # let\na\nb\nend    ==>  (let (block) (block a b))
+        parse_block(ps)
+        bump_closing_token(ps, K"end")
+        emit(ps, mark, K"let")
+    elseif word == K"if"
+        parse_if_elseif(ps)
+    elseif word in KSet"global local"
+        # global x   ==>  (global x)
+        # local x    ==>  (local x)
+        bump(ps, TRIVIA_FLAG)
+        const_mark = nothing
+        if peek(ps) == K"const"
+            const_mark = position(ps)
+            bump(ps, TRIVIA_FLAG)
+        end
+        had_assignment = parse_global_local_const_vars(ps)
+        if !isnothing(const_mark)
+            # global const x = 1  ==>  (global (const (= x 1)))
+            # local const x = 1   ==>  (local (const (= x 1)))
+            emit(ps, const_mark, K"const")
+            if !had_assignment
+                # global const x  ==>  (global (error (const x)))
+                emit(ps, mark, K"error", error="expected assignment after `const`")
+            end
+        end
+        emit(ps, mark, word)
+    elseif word == K"const"
+        # const x = 1  ==>  (const (= x 1))
+        bump(ps, TRIVIA_FLAG)
+        scope_mark = nothing
+        scope_k = peek(ps)
+        if scope_k in KSet"local global"
+            scope_mark = position(ps)
+            bump(ps, TRIVIA_FLAG)
+        end
+        had_assignment = parse_global_local_const_vars(ps)
+        if !isnothing(scope_mark)
+            # const global x = 1  ==>  (const (global (= x 1)))
+            # const local x = 1   ==>  (const (local (= x 1)))
+            emit(ps, scope_mark, scope_k)
+        end
+        emit(ps, mark, K"const")
+        if !had_assignment
+            # const x .= 1  ==>  (error (const (.= x 1)))
+            emit(ps, mark, K"error", error="expected assignment after `const`")
+        end
+    elseif word in KSet"function macro"
+        bump(ps, TRIVIA_FLAG)
+        bump_trivia(ps)
+        has_body = parse_function_signature(ps, word == K"function")
+        if has_body
+            # The function body
+            # function f() \n a \n b end  ==> (function (call f) (block a b))
+            # function f() end            ==> (function (call f) (block))
+            parse_block(ps)
+            bump_closing_token(ps, K"end")
+            emit(ps, mark, word)
+        else
+            # Function/macro definition with no methods
+            # function f end       ==> (function f)
+            # (function f \n end)  ==> (parens (function f))
+            # function f \n\n end  ==> (function f)
+            # function $f end      ==> (function ($ f))
+            # macro f end          ==> (macro f)
+            bump(ps, TRIVIA_FLAG, skip_newlines=true)
+            emit(ps, mark, word)
+        end
+    elseif word == K"abstract"
+        # Abstract type definitions
+        # abstract type A end             ==>  (abstract A)
+        # abstract type A ; end             ==>  (abstract A)
+        # abstract type \n\n A \n\n end   ==>  (abstract A)
+        # abstract type A <: B end        ==>  (abstract (<: A B))
+        # abstract type A <: B{T,S} end   ==>  (abstract (<: A (curly B T S)))
+        # Oddities allowed by parser
+        # abstract type A < B end         ==>  (abstract (call-i A < B))
+        bump(ps, TRIVIA_FLAG)
+        @check peek(ps) == K"type"
+        bump(ps, TRIVIA_FLAG)
+        parse_subtype_spec(ps)
+        bump_semicolon_trivia(ps)
+        bump_closing_token(ps, K"end")
+        emit(ps, mark, K"abstract")
+    elseif word in KSet"struct mutable"
+        # struct A <: B \n a::X \n end  ==>  (struct (<: A B) (block (::-i a X)))
+        # struct A \n a \n b \n end  ==>  (struct A (block a b))
+        #v1.7: struct A const a end  ==>  (struct A (block (error (const a))))
+        #v1.8: struct A const a end  ==>  (struct A (block (const a)))
+        is_mut = word == K"mutable"
+        if is_mut
+            # mutable struct A end  ==>  (struct-mut A (block))
+            bump(ps, TRIVIA_FLAG)
+        else
+            # struct A end  ==>  (struct A (block))
+        end
+        @check peek(ps) == K"struct"
+        bump(ps, TRIVIA_FLAG)
+        parse_subtype_spec(ps)
+        parse_block(ps, ps1->parse_docstring(ps1, parse_struct_field))
+        bump_closing_token(ps, K"end")
+        emit(ps, mark, K"struct", is_mut ? MUTABLE_FLAG : EMPTY_FLAGS)
+    elseif word == K"primitive"
+        # primitive type A 32 end             ==> (primitive A 32)
+        # primitive type A 32 ; end           ==> (primitive A 32)
+        # primitive type A $N end             ==> (primitive A ($ N))
+        # primitive type A <: B \n 8 \n end   ==> (primitive (<: A B) 8)
+        bump(ps, TRIVIA_FLAG)
+        @check peek(ps) == K"type"
+        bump(ps, TRIVIA_FLAG)
+        let ps = with_space_sensitive(ps)
+            parse_subtype_spec(ps)
+            parse_cond(ps)
+        end
+        bump_semicolon_trivia(ps)
+        bump_closing_token(ps, K"end")
+        emit(ps, mark, K"primitive")
+    elseif word == K"try"
+        parse_try(ps)
+    elseif word == K"return"
+        bump(ps, TRIVIA_FLAG)
+        k = peek(ps)
+        if k == K"NewlineWs" || is_closing_token(ps, k)
+            # return\nx   ==>  (return)
+            # return)     ==>  (return)
+        else
+            # return x    ==>  (return x)
+            # return x,y  ==>  (return (tuple x y))
+            parse_eq(ps)
+        end
+        emit(ps, mark, K"return")
+    elseif word in KSet"break continue"
+        # break     ==>  (break)
+        # continue  ==>  (continue)
+        bump(ps, TRIVIA_FLAG)
+        emit(ps, mark, word)
+        k = peek(ps)
+        if !(k in KSet"NewlineWs ; ) : EndMarker" || (k == K"end" && !ps.end_symbol))
+            recover(is_closer_or_newline, ps, TRIVIA_FLAG,
+                    error="unexpected token after $(untokenize(word))")
+        end
+    elseif word in KSet"module baremodule"
+        # module A end  ==> (module A (block))
+        # baremodule A end ==> (module-bare A (block))
+        bump(ps, TRIVIA_FLAG)
+        if is_reserved_word(peek(ps))
+            # module do \n end  ==>  (module (error do) (block))
+            bump(ps, error="Invalid module name")
+        else
+            if ps.stream.version >= (1, 14)
+                # Encode the parser version that parsed this module - the runtime
+                # will use this to set the same parser version for runtime `include`
+                # etc into this module.
+                bump_invisible(ps, K"VERSION",
+                    set_numeric_flags(ps.stream.version[2] * 10))
+            end
+            # module $A end  ==>  (module ($ A) (block))
+            parse_unary_prefix(ps)
+        end
+        # module A \n a \n b \n end  ==>  (module A (block a b))
+        # module A \n "x"\na \n end  ==>  (module A (block (doc (string "x") a)))
+        parse_block(ps, parse_public)
+        bump_closing_token(ps, K"end")
+        emit(ps, mark, K"module",
+             word == K"baremodule" ? BARE_MODULE_FLAG : EMPTY_FLAGS)
+    elseif word in KSet"export public"
+        # export a         ==>  (export a)
+        # export @a        ==>  (export (macro_name a))
+        # export a, \n @b  ==>  (export a (macro_name b))
+        # export +, ==     ==>  (export + ==)
+        # export \n a      ==>  (export a)
+        # export \$a, \$(a*b) ==> (export (\$ a) (\$ (parens (call-i a * b))))
+        bump(ps, TRIVIA_FLAG)
+        parse_comma_separated(ps, x->parse_import_atsym(x, false))
+        emit(ps, mark, word)
+    elseif word in KSet"import using"
+        parse_imports(ps)
+    elseif word == K"do"
+        bump(ps, TRIVIA_FLAG, error="invalid `do` syntax")
+    else
+        internal_error("unhandled reserved word ", string(word))
+    end
+end
+
+# Parse if-elseif-else-end expressions
+#
+# if a xx elseif b yy else zz end   ==>  (if a (block xx) (elseif b (block yy) (block zz)))
+function parse_if_elseif(ps, is_elseif=false, is_elseif_whitespace_err=false)
+    mark = position(ps)
+    word = peek(ps)
+    if is_elseif_whitespace_err
+        # Only get here on recovery from error case - pretend we're parsing elseif.
+        word = K"elseif"
+    else
+        bump(ps, TRIVIA_FLAG)
+    end
+    cond_mark = position(ps)
+    if peek(ps) in KSet"NewlineWs end"
+        # if end      ==>  (if (error) (block))
+        # if \n end   ==>  (if (error) (block))
+        bump_trivia(ps, error="missing condition in `$(untokenize(word))`")
+    else
+        # if a end      ==>  (if a (block))
+        # if a xx end   ==>  (if a (block xx))
+        parse_cond(ps)
+    end
+    # if a \n\n xx \n\n end   ==>  (if a (block xx))
+    parse_block(ps)
+    bump_trivia(ps)
+    k = peek(ps)
+    if k == K"elseif"
+        # if a xx elseif b yy end   ==>  (if a (block xx) (elseif b (block yy)))
+        parse_if_elseif(ps, true)
+    elseif k == K"else"
+        emark = position(ps)
+        bump(ps, TRIVIA_FLAG)
+        if peek(ps) == K"if"
+            # Recovery: User wrote `else if` by mistake ?
+            # if a xx else if b yy end  ==>  (if a (block xx) (error-t) (elseif b (block yy)))
+            bump(ps, TRIVIA_FLAG)
+            emit(ps, emark, K"error", TRIVIA_FLAG,
+                 error="use `elseif` instead of `else if`")
+            parse_if_elseif(ps, true, true)
+        else
+            # if a xx else yy end   ==>  (if a (block xx) (block yy))
+            parse_block(ps)
+        end
+    end
+    if !is_elseif
+        bump_closing_token(ps, K"end")
+    end
+    emit(ps, mark, word)
+end
+
+# Like parse_assignment, but specialized so that we can omit the
+# tuple when there's commas but no assignment.
+function parse_global_local_const_vars(ps)
+    mark = position(ps)
+    n_commas = parse_comma(ps, false)
+    (isdot, t) = peek_dotted_op_token(ps)
+    if is_prec_assignment(t)
+        if n_commas >= 1
+            # const x,y = 1,2  ==>  (const (= (tuple x y) (tuple 1 2)))
+            emit(ps, mark, K"tuple")
+        end
+        # const x = 1   ==>  (const (= x 1))
+        # global x ~ 1  ==>  (global (call-i x ~ 1))
+        # global x += 1 ==>  (global (+= x 1))
+        parse_assignment_with_initial_ex(ps, mark, parse_comma)
+    else
+        # global x,y   ==>  (global x y)
+    end
+    return kind(t) == K"=" && !isdot
+end
+
+# Parse function and macro definitions
+function parse_function_signature(ps::ParseState, is_function::Bool)
+    is_anon_func = false
+    parsed_call = false
+    needs_parse_call = true
+
+    mark = position(ps)
+    if !is_function
+        # Parse macro name
+        parse_unary_prefix(ps)
+        kb = peek_behind(ps).orig_kind
+        if is_initial_reserved_word(ps, kb)
+            # macro while(ex) end  ==> (macro (call (error while) ex) (block))
+            emit(ps, mark, K"error", error="invalid macro name")
+        else
+            # macro f()     end  ==>  (macro (call f) (block))
+            # macro (:)(ex) end  ==>  (macro (call (parens :) ex) (block))
+            # macro (type)(ex) end  ==>  (macro (call (parens type) ex) (block))
+            # macro $f()    end  ==>  (macro (call ($ f)) (block))
+            # macro ($f)()  end  ==>  (macro (call (parens ($ f))) (block))
+        end
+    else
+        if peek(ps) != K"("
+            # function f() end  ==> (function (call f))
+            parse_unary_prefix(ps)
+        else
+            # When an initial parenthesis is present, we need to distinguish
+            # between
+            # * The function name in parens, followed by (args...)
+            # * An anonymous function argument list in parens
+            # * The whole function declaration, in parens
+            bump(ps, TRIVIA_FLAG)
+            is_empty_tuple = peek(ps, skip_newlines=true) == K")"
+            opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs
+                _parsed_call = was_eventually_call(ps)
+                _maybe_grouping_parens = !had_commas && !had_splat && num_semis == 0 && num_subexprs == 1
+                # Skip intervening newlines only when the parentheses hold a single
+                # expression, which is the ambiguous case between a name like (::T)
+                # and an anonymous function parameter list.
+                next_kind = peek(ps, 2, skip_newlines=_maybe_grouping_parens)
+                _needs_parse_call = next_kind ∈ KSet"( ."
+                _is_anon_func = (!_needs_parse_call && !_parsed_call) || had_commas
+                return (needs_parameters      = _is_anon_func,
+                        is_anon_func          = _is_anon_func,
+                        parsed_call           = _parsed_call,
+                        needs_parse_call      = _needs_parse_call,
+                        maybe_grouping_parens = _maybe_grouping_parens)
+            end
+            is_anon_func = opts.is_anon_func
+            parsed_call = opts.parsed_call
+            needs_parse_call = opts.needs_parse_call
+            if is_anon_func
+                # function (x) body end ==>  (function (tuple-p x) (block body))
+                # function (x::f()) end ==>  (function (tuple-p (::-i x (call f))) (block))
+                # function (x,y) end    ==>  (function (tuple-p x y) (block))
+                # function (x=1) end    ==>  (function (tuple-p (= x 1)) (block))
+                # function (;x=1) end   ==>  (function (tuple-p (parameters (= x 1))) (block))
+                # function (f(x),) end  ==>  (function (tuple-p (call f x)) (block))
+                ambiguous_parens = opts.maybe_grouping_parens &&
+                                   peek_behind(ps).kind in KSet"macrocall $"
+                emit(ps, mark, K"tuple", PARENS_FLAG|opts.delim_flags)
+                if ambiguous_parens
+                    # Got something like `(@f(x))`. Is it anon `(@f(x),)` or named sig `@f(x)` ??
+                    emit(ps, mark, K"error", error="Ambiguous signature. Add a trailing comma if this is a 1-argument anonymous function; remove parentheses if this is a macro call acting as function signature.")
+                end
+            elseif is_empty_tuple
+                # Weird case which is consistent with parse_paren but will be
+                # rejected in lowering
+                # function ()(x) end  ==> (function (call (tuple-p) x) (block))
+                emit(ps, mark, K"tuple", PARENS_FLAG)
+            else
+                # function (A).f() end  ==> (function (call (. (parens A) f)) (block))
+                # function (:)() end    ==> (function (call (parens :)) (block))
+                # function (x::T)() end ==> (function (call (parens (::-i x T))) (block))
+                # function (::T)() end  ==> (function (call (parens (::-pre T))) (block))
+                # function (:*=(f))() end  ==> (function (call (parens (call (quote-: *=) f))) (block))
+                emit(ps, mark, K"parens", PARENS_FLAG)
+            end
+        end
+        if !is_anon_func
+            kb = peek_behind(ps).orig_kind
+            if is_reserved_word(kb)
+                # function begin() end  ==>  (function (call (error begin)) (block))
+                emit(ps, mark, K"error", error="invalid function name")
+            else
+                # function f() end     ==>  (function (call f) (block))
+                # function type() end  ==>  (function (call type) (block))
+                # function \n f() end  ==>  (function (call f) (block))
+                # function $f() end    ==>  (function (call ($ f)) (block))
+                # function (::Type{T})(x) end ==> (function (call (parens (::-pre (curly Type T))) x) (block))
+            end
+        end
+    end
+    if needs_parse_call
+        # Parse function argument list
+        # function f(x,y)  end    ==>  (function (call f x y) (block))
+        # function f{T}()  end    ==>  (function (call (curly f T)) (block))
+        # function A.f()   end    ==>  (function (call (. A f)) (block))
+        parse_call_chain(ps, mark)
+        sig_kind = peek_behind(ps).kind
+        if sig_kind in KSet"Identifier var $" && peek(ps, skip_newlines=true) == K"end"
+            # function f end ==> (function f)
+            # function $f end ==> (function $f)
+            return false
+        elseif sig_kind == K"macrocall"
+            min_supported_version(v"1.12", ps, mark, "macro call as function signature")
+        elseif sig_kind != K"call"
+            # function f body end  ==>  (function (error f) (block body))
+            emit(ps, mark, K"error",
+                error="Invalid signature in $(is_function ? "function" : "macro") definition")
+        end
+    end
+    if is_function && peek(ps) == K"::"
+        # Function return type
+        # function f()::T    end   ==>  (function (::-i (call f) T) (block))
+        # function f()::g(T) end   ==>  (function (::-i (call f) (call g T)) (block))
+        bump(ps, TRIVIA_FLAG)
+        parse_call(ps)
+        emit(ps, mark, K"::", INFIX_FLAG)
+    end
+    if peek(ps) == K"where"
+        # Function signature where syntax
+        # function f() where {T} end   ==>  (function (where (call f) (braces T)) (block))
+        # function f() where T   end   ==>  (function (where (call f) T) (block))
+        parse_where_chain(ps, mark)
+    end
+    # function f()::S where T end ==> (function (where (::-i (call f) S) T) (block))
+    #
+    # Ugly cases for compat where extra parentheses existed and we've
+    # already parsed at least the call part of the signature
+    #
+    # function (f() where T) end         ==> (function (where (call f) T) (block))
+    # function (f()) where T end         ==> (function (where (call f) T) (block))
+    # function (f() where T) where U end ==> (function (where (where (call f) T) U) (block))
+    # function (f()::S) end              ==> (function (parens (::-i (call f) S)) (block))
+    # function ((f()::S) where T) end    ==> (function (where (parens (::-i (call f) S)) T) (block))
+    #
+    # TODO: Warn for use of parens? The precedence of `::` and
+    # `where` don't work inside parens so this is a bit of a syntax
+    # oddity/aberration.
+    return true
+end
+
+# Parse a try block
+#
+# try \n x \n catch e \n y \n finally \n z end  ==>  (try (block x) (catch e (block y)) (finally (block z)))
+#v1.8: try \n x \n catch e \n y \n else z finally \n w end  ==>  (try (block x) (catch e (block y)) (else (block z)) (finally (block w)))
+#
+# flisp: embedded in parse_resword
+function parse_try(ps)
+    mark = position(ps)
+    bump(ps, TRIVIA_FLAG)
+    parse_block(ps)
+    has_catch = false
+    has_finally = false
+    bump_trivia(ps)
+    if peek(ps) == K"catch"
+        has_catch = true
+        parse_catch(ps)
+    end
+    bump_trivia(ps)
+    if peek(ps) == K"else"
+        # catch-else syntax: https://github.com/JuliaLang/julia/pull/42211
+        #
+        #v1.8: try catch ; else end ==> (try (block) (catch □ (block)) (else (block)))
+        else_mark = position(ps)
+        bump(ps, TRIVIA_FLAG)
+        parse_block(ps)
+        if !has_catch
+            #v1.8: try else x finally y end ==> (try (block) (else (error (block x))) (finally (block y)))
+            emit(ps, else_mark, K"error", error="Expected `catch` before `else`")
+        end
+        #v1.7: try catch ; else end ==> (try (block) (catch □ (block)) (else (error (block))))
+        min_supported_version(v"1.8", ps, else_mark, "`else` after `catch`")
+        emit(ps, else_mark, K"else")
+    end
+    bump_trivia(ps)
+    if peek(ps) == K"finally"
+        finally_mark = position(ps)
+        # try x finally y end  ==>  (try (block x) (finally (block y)))
+        has_finally = true
+        bump(ps, TRIVIA_FLAG)
+        parse_block(ps)
+        emit(ps, finally_mark, K"finally")
+    end
+    # Wart: the flisp parser allows finally before catch, the *opposite* order
+    # in which these blocks execute.
+    bump_trivia(ps)
+    if !has_catch && peek(ps) == K"catch"
+        # try x finally y catch e z end  ==>  (try (block x) (finally (block y)) (catch e (block z)))
+        m = position(ps)
+        parse_catch(ps)
+        emit_diagnostic(ps, m,
+            warning="`catch` after `finally` will execute out of order")
+    end
+    missing_recovery = !has_catch && !has_finally
+    if missing_recovery
+        # try x end  ==>  (try (block x) (error-t))
+        bump_invisible(ps, K"error", TRIVIA_FLAG)
+    end
+    bump_closing_token(ps, K"end")
+    emit(ps, mark, K"try")
+    if missing_recovery
+        emit_diagnostic(ps, mark, error="try without catch or finally")
+    end
+end
+
+function parse_catch(ps::ParseState)
+    mark = position(ps)
+    bump(ps, TRIVIA_FLAG)
+    k = peek(ps)
+    if k in KSet"NewlineWs ;" || is_closing_token(ps, k)
+        # try x catch end      ==>  (try (block x) (catch □ (block)))
+        # try x catch ; y end  ==>  (try (block x) (catch □ (block y)))
+        # try x catch \n y end ==>  (try (block x) (catch □ (block y)))
+        bump_invisible(ps, K"Placeholder")
+    else
+        # try x catch e y end   ==>  (try (block x) (catch e (block y)))
+        # try x catch $e y end  ==>  (try (block x) (catch ($ e) (block y)))
+        m = position(ps)
+        parse_eq_star(ps)
+        if !(peek_behind(ps).kind in KSet"Identifier var $")
+            # try x catch e+3 y end  ==>  (try (block x) (catch (error (call-i e + 3)) (block y)))
+            emit(ps, m, K"error", error="a variable name is expected after `catch`")
+        end
+    end
+    parse_block(ps)
+    emit(ps, mark, K"catch")
+end
+
+# flisp: parse-do
+function parse_do(ps::ParseState)
+    mark = position(ps)
+    bump(ps, TRIVIA_FLAG) # do
+    ps = normal_context(ps)
+    m = position(ps)
+    if peek(ps) in KSet"NewlineWs ;"
+        # f() do\nend        ==>  (call f (do (tuple) (block)))
+        # f() do ; body end  ==>  (call f (do (tuple) (block body)))
+        # this trivia needs to go into the tuple due to the way position()
+        # works.
+        bump(ps, TRIVIA_FLAG)
+    else
+        # f() do x, y\n body end  ==>  (call f (do (tuple x y) (block body)))
+        parse_comma_separated(ps, parse_range)
+    end
+    emit(ps, m, K"tuple")
+    parse_block(ps)
+    bump_closing_token(ps, K"end")
+    emit(ps, mark, K"do")
+end
+
+function _is_valid_macro_name(peektok)
+    return !is_error(peektok.kind) && (peektok.is_leaf || peektok.kind == K"var")
+end
+
+# flisp: parse-macro-name
+function parse_macro_name(ps::ParseState)
+    # @! x   ==>  (macrocall @! x)
+    # @.. x  ==>  (macrocall (macro_name ..) x)
+    # @$ x   ==>  (macrocall (macro_name $) x)
+    # @var"#" x   ==>  (macrocall (macro_name (var #)) x)
+    bump_disallowed_space(ps)
+    mark = position(ps)
+    parse_atom(ps, false)
+    b = peek_behind(ps, skip_parens=false)
+    if b.kind == K"parens"
+        emit_diagnostic(ps, mark,
+            warning="parenthesizing macro names is unnecessary")
+    elseif !_is_valid_macro_name(b)
+        # @[x] y z  ==>  (macrocall (macro_name (error (vect x))) y z)
+        emit(ps, mark, K"error", error="invalid macro name")
+    end
+end
+
+# Parse an identifier, interpolation or @-prefixed symbol
+#
+# flisp: parse-atsym
+function parse_import_atsym(ps::ParseState, allow_quotes=true)
+    bump_trivia(ps)
+    if peek(ps) == K"@"
+        mark = position(ps)
+        # export @a       ==>  (export (macro_name a))
+        # export @var"'"  ==>  (export (macro_name (var ')))
+        # export a, \n @b ==>  (export a (macro_name b))
+        bump(ps, TRIVIA_FLAG)
+        parse_macro_name(ps)
+        emit(ps, mark, K"macro_name")
+    else
+        # export a  ==>  (export a)
+        # export \n a  ==>  (export a)
+        # export $a, $(a*b)  ==>  (export ($ a) (parens ($ (call * a b))))
+        # export (x::T) ==> (export (error (parens (::-i x T))))
+        # export outer  ==> (export outer)
+        # export ($f)   ==> (export ($ f))
+        mark = position(ps)
+        # Syntax Edition TODO: make all the various ways to quote things inside
+        # import paths an error and require `var""` in the few remaining cases.
+        if allow_quotes && peek(ps) == K":" && !is_closing_token(ps, peek(ps,2))
+            # import A.:+  ==>  (import (importpath A (quote-: +)))
+            emit_diagnostic(ps, warning="quoting with `:` is not required here")
+        end
+        parse_unary_prefix(ps)
+        pos = position(ps)
+        warn_parens = false
+        if peek_behind(ps, pos).kind == K"parens"
+            # import A.(:+)  ==>  (import (importpath A (parens (quote-: +))))
+            pos = first_child_position(ps, pos)
+            warn_parens = true
+        end
+        if allow_quotes && peek_behind(ps, pos).kind == K"quote"
+            pos = first_child_position(ps, pos)
+            if peek_behind(ps, pos).kind == K"parens"
+                # import A.:(+)  ==>  (import (importpath A (quote-: (parens +))))
+                pos = first_child_position(ps, pos)
+                warn_parens = true
+            end
+        end
+        b = peek_behind(ps, pos)
+        if warn_parens && b.orig_kind != K".."
+            emit_diagnostic(ps, mark, warning="parentheses are not required here")
+        end
+        ok = (b.is_leaf  && (b.kind == K"Identifier" || is_operator(b.kind))) ||
+             (!b.is_leaf && b.kind in KSet"$ var")
+        if !ok
+            emit(ps, mark, K"error", error="expected identifier")
+        end
+    end
+end
+
+# Parse import and using syntax
+#
+# flisp: parse-imports
+function parse_imports(ps::ParseState)
+    mark = position(ps)
+    word = peek(ps)
+    @check word in KSet"import using"
+    bump(ps, TRIVIA_FLAG)
+    emark = position(ps)
+    initial_as = parse_import(ps, word, false)
+    t = peek_token(ps)
+    k = kind(t)
+    has_import_prefix = false  # true if we have `prefix:` in `import prefix: stuff`
+    has_comma = false
+    if k == K":" && !preceding_whitespace(t)
+        bump(ps, TRIVIA_FLAG)
+        has_import_prefix = true
+        if initial_as
+            # import A as B: x  ==>  (import (: (error (as (importpath A) B)) (importpath x)))
+            emit(ps, emark, K"error", error="`as` before `:` in import/using")
+        end
+    elseif k == K","
+        bump(ps, TRIVIA_FLAG)
+        has_comma = true
+    end
+    if has_import_prefix || has_comma
+        # import A, y      ==>  (import (importpath A) (importpath y))
+        # import A: x, y   ==>  (import (: (importpath A) (importpath x) (importpath y)))
+        # import A: +, ==  ==>  (import (: (importpath A) (importpath +) (importpath ==)))
+        has_import_prefix_ = has_import_prefix
+        parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix_))
+        if peek(ps) == K":"
+            # Error recovery
+            # import A: x, B: y ==> (import (: (importpath A) (importpath x) (importpath B) (error-t (importpath y))))
+            emark = position(ps)
+            bump(ps, TRIVIA_FLAG)
+            parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix_))
+            emit(ps, emark, K"error", TRIVIA_FLAG,
+                 error="`:` can only be used when importing a single module. Split imports into multiple lines")
+        end
+    end
+    if has_import_prefix
+        # import A: x  ==>  (import (: (importpath A) (importpath x)))
+        emit(ps, mark, K":")
+    end
+    # using  A  ==>  (using (importpath A))
+    # import A  ==>  (import (importpath A))
+    emit(ps, mark, word)
+end
+
+# Parse individual module path and renaming with `as`
+#
+# flisp: parse-import
+function parse_import(ps::ParseState, word, has_import_prefix)
+    mark = position(ps)
+    parse_import_path(ps)
+    # import A: x, y   ==>  (import (: (importpath A) (importpath x) (importpath y)))
+    if peek(ps) == K"as"
+        # import A as B     ==>  (import (as (importpath A) B))
+        # import A: x as y  ==>  (import (: (importpath A) (as (importpath x) y)))
+        # using  A: x as y  ==>  (using (: (importpath A) (as (importpath x) y)))
+        bump(ps, TRIVIA_FLAG)
+        parse_import_atsym(ps, false)
+        emit(ps, mark, K"as")
+        if word == K"using" && !has_import_prefix
+            # using A as B     ==>  (using (error (as (importpath A) B)))
+            # using A, B as C  ==>  (using (importpath A) (error (as (importpath B) C)))
+            emit(ps, mark, K"error",
+                 error="`using` with `as` renaming requires a `:` and context module")
+        end
+        #v1.5: import A as B     ==>  (import (error (as (importpath A) B)))
+        min_supported_version(v"1.6", ps, mark, "`import ... as`")
+        return true
+    else
+        return false
+    end
+end
+
+# flisp: parse-import-path
+function parse_import_path(ps::ParseState)
+    mark = position(ps)
+    bump_trivia(ps)
+    # The tokenizer produces conjoined dotted tokens .. and ...
+    # When parsing import we must split these into single dots
+    # import .A     ==> (import (importpath . A))
+    # import ..A    ==> (import (importpath . . A))
+    # import ...A   ==> (import (importpath . . . A))
+    # import ....A  ==> (import (importpath . . . . A))
+    # Dots with spaces are allowed (a misfeature?)
+    # import . .A    ==> (import (importpath . . A))
+    # Modules with operator symbol names
+    # import .⋆  ==>  (import (importpath . ⋆))
+    first_dot = true
+    while true
+        t = peek_token(ps)
+        k = kind(t)
+        if !first_dot && preceding_whitespace(t)
+            emit_diagnostic(ps, whitespace=true,
+                            warning="space between dots in import path")
+        end
+        if k == K"."
+            bump(ps)
+        elseif k == K".."
+            bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS))
+        elseif k == K"..."
+            bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS))
+        else
+            break
+        end
+        first_dot = false
+    end
+    # import @x     ==>  (import (importpath (macro_name x)))
+    # import $A     ==>  (import (importpath ($ A)))
+    parse_import_atsym(ps, false)
+    while true
+        t = peek_token(ps)
+        k = kind(t)
+        if k == K"."
+            # import A.B    ==>  (import (importpath A B))
+            # import $A.@x  ==>  (import (importpath ($ A) (macro_name x)))
+            # import A.B.C  ==>  (import (importpath A B C))
+            # import A.⋆.f  ==>  (import (importpath A ⋆ f))
+            next_tok = peek_token(ps, 2)
+            if is_operator(kind(next_tok))
+                if preceding_whitespace(t)
+                    # Whitespace in import path allowed but discouraged
+                    # import A .==  ==>  (import (importpath A ==))
+                    emit_diagnostic(ps, whitespace=true,
+                                    warning="space between dots in import path")
+                end
+                bump_trivia(ps)
+            else
+                bump_disallowed_space(ps)
+            end
+            bump(ps, TRIVIA_FLAG)
+            parse_import_atsym(ps)
+        elseif k == K"..."
+            # Import the .. operator
+            # import A...  ==>  (import (importpath A ..))
+            bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS))
+        elseif k in KSet"NewlineWs ; , : EndMarker"
+            # import A; B  ==>  (import (importpath A))
+            break
+        else
+            # Could we emit a more comprehensible error here?
+            break
+        end
+    end
+    emit(ps, mark, K"importpath")
+end
+
+# parse comma-separated assignments, like "i=1:n,j=1:m,..."
+#
+# flisp: parse-comma-separated
+function parse_comma_separated(ps::ParseState, down)
+    n_subexprs = 0
+    while true
+        down(ps)
+        n_subexprs += 1
+        if peek(ps) == K","
+            bump(ps, TRIVIA_FLAG)
+        else
+            break
+        end
+    end
+    return n_subexprs
+end
+
+# FIXME(sschaub): for backwards compatibility, allows newline before =/in/∈
+# in generator expressions. See issue #37393
+function peek_skip_newline_in_gen(ps::ParseState, n=1)
+    k = peek(ps, n)
+    if ps.for_generator && k == K"NewlineWs"
+        k = peek(ps, n+1)
+    end
+    return k
+end
+
+# parse comma-separated "assignment" but allowing `in` and `∈` as assignment operators
+#
+# i = rhs   ==>  (= i rhs)
+# i in rhs  ==>  (= i rhs)
+# i ∈ rhs   ==>  (= i rhs)
+#
+# i = 1:10       ==>  (= i (call : 1 10))
+# (i,j) in iter  ==>  (= (tuple-p i j) iter)
+#
+# flisp: parse-iteration-spec
+function parse_iteration_spec(ps::ParseState)
+    mark = position(ps)
+    k = peek(ps)
+    # Handle `outer` contextual keyword
+    parse_pipe_lt(with_space_sensitive(ps))
+    if peek_behind(ps).orig_kind == K"outer"
+        if peek_skip_newline_in_gen(ps) in KSet"= in ∈"
+            # Not outer keyword
+            # outer = rhs        ==>  (iteration (in outer rhs))
+            # outer <| x = rhs   ==>  (iteration (in (call-i outer <| x) rhs))
+        else
+            # outer i = rhs      ==>  (iteration (in (outer i) rhs))
+            # outer (x,y) = rhs  ==>  (iteration (in (outer (tuple-p x y)) rhs))
+            reset_node!(ps, position(ps), kind=K"outer", flags=TRIVIA_FLAG)
+            parse_pipe_lt(ps)
+            emit(ps, mark, K"outer")
+        end
+    end
+    if peek_skip_newline_in_gen(ps) in KSet"= in ∈"
+        bump(ps, TRIVIA_FLAG)
+        parse_pipe_lt(ps)
+    else
+        # Recovery heuristic
+        recover(ps, error="invalid iteration spec: expected one of `=` `in` or `∈`") do ps, k
+            k in KSet", NewlineWs" || is_closing_token(ps, k)
+        end
+        # Or try parse_pipe_lt ???
+    end
+    emit(ps, mark, K"in")
+end
+
+# Parse an iteration spec, or a comma separate list of such for for loops and
+# generators
+function parse_iteration_specs(ps::ParseState)
+    mark = position(ps)
+    n_iters = parse_comma_separated(ps, parse_iteration_spec)
+    emit(ps, mark, K"iteration")
+end
+
+# flisp: parse-space-separated-exprs
+function parse_space_separated_exprs(ps::ParseState)
+    ps = with_space_sensitive(ps)
+    n_sep = 0
+    while true
+        k = peek(ps)
+        if is_closing_token(ps, k) || k == K"NewlineWs" ||
+                (ps.for_generator && k == K"for")
+            break
+        end
+        parse_eq(ps)
+        n_sep += 1
+    end
+    return n_sep
+end
+
+# like parse-arglist, but with `for` parsed as a generator
+#
+# flisp: parse-call-arglist
+function parse_call_arglist(ps::ParseState, closer)
+    ps = ParseState(ps, for_generator=true)
+
+    parse_brackets(ps, closer, false) do _, _, _, _
+        return (needs_parameters=true,)
+    end
+end
+
+# Parse the suffix of comma-separated array expressions such as
+# [x, suffix].  Consumes `closer`, but does not emit the AST node for the
+# surrounding brackets.
+#
+# flisp: parse-vect
+function parse_vect(ps::ParseState, closer, prefix_trailing_comma)
+    # [x, y]        ==>  (vect x y)
+    # [x, y]        ==>  (vect x y)
+    # [x,y ; z]     ==>  (vect x y (parameters z))
+    # [x=1, y=2]    ==>  (vect (= x 1) (= y 2))
+    # [x=1, ; y=2]  ==>  (vect (= x 1) (parameters (= y 2)))
+    opts = parse_brackets(ps, closer) do _, _, _, num_subexprs
+        return (needs_parameters=true,
+                num_subexprs=num_subexprs)
+    end
+    delim_flags = opts.delim_flags
+    if opts.num_subexprs == 0 && prefix_trailing_comma
+        delim_flags |= TRAILING_COMMA_FLAG
+    end
+    return (K"vect", delim_flags)
+end
+
+# Parse generators
+#
+# We represent generators quite differently from `Expr`:
+# * Iteration variables and their iterators are grouped within K"iteration"
+#   nodes, as in the short form of `for` loops.
+# * The `generator` kind is used for both cartesian and flattened generators
+#
+# (x for a in as for b in bs) ==> (parens (generator x (iteration (in a as)) (iteration (in b bs))))
+# (x for a in as, b in bs) ==> (parens (generator x (iteration (in a as) (in b bs))))
+# (x for a in as, b in bs if z)  ==> (parens (generator x (filter (iteration (in a as) (in b bs)) z)))
+#
+# flisp: parse-generator
+function parse_generator(ps::ParseState, mark)
+    while (t = peek_token(ps); kind(t) == K"for")
+        if !preceding_whitespace(t)
+            # ((x)for x in xs)  ==>  (parens (generator (parens x) (error) (iteration (in x xs))))
+            bump_invisible(ps, K"error", TRIVIA_FLAG,
+                           error="Expected space before `for` in generator")
+        end
+        bump(ps, TRIVIA_FLAG)
+        iter_mark = position(ps)
+        parse_iteration_specs(ps)
+        if peek(ps) == K"if"
+            # (x for a in as if z) ==> (parens (generator x (filter (iteration (in a as)) z)))
+            bump(ps, TRIVIA_FLAG)
+            parse_cond(ps)
+            emit(ps, iter_mark, K"filter")
+        end
+    end
+    emit(ps, mark, K"generator")
+end
+
+# flisp: parse-comprehension
+function parse_comprehension(ps::ParseState, mark, closer)
+    # [x for a in as] ==> (comprehension (generator x (iteration (in a as))))
+    ps = ParseState(ps, whitespace_newline=true,
+                    space_sensitive=false,
+                    end_symbol=false)
+    parse_generator(ps, mark)
+    bump_closing_token(ps, closer)
+    return (K"comprehension", EMPTY_FLAGS)
+end
+
+# Parse array concatenation syntax with multiple semicolons
+#
+# Normal matrix construction syntax
+# [x y ; z w]     ==>  (vcat (row x y) (row z w))
+# [x y ; z w ; a b]  ==>  (vcat (row x y) (row z w) (row a b))
+# [x ; y ; z]     ==>  (vcat x y z)
+# [x;]            ==>  (vcat x)
+# [x y]           ==>  (hcat x y)
+#
+# Mismatched rows
+# [x y ; z]     ==>  (vcat (row x y) z)
+#
+# Single elements in rows
+#v1.7: [x ; y ;; z ]  ==>  (ncat-2 (nrow-1 x y) z)
+#v1.7: [x  y ;;; z ]  ==>  (ncat-3 (row x y) z)
+#
+# Higher dimensional ncat
+# Row major
+#v1.7: [x y ; z w ;;; a b ; c d]  ==>
+#     (ncat-3 (nrow-1 (row x y) (row z w)) (nrow-1 (row a b) (row c d)))
+# Column major
+#v1.7: [x ; y ;; z ; w ;;; a ; b ;; c ; d]  ==>
+#     (ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))
+#
+# flisp: parse-array
+function parse_array(ps::ParseState, mark, closer, end_is_symbol)
+    ps = ParseState(ps, end_symbol=end_is_symbol)
+
+    array_order = Ref(:unknown)
+    # Outer array parsing loop - parse chain of separators with descending
+    # precedence such as
+    #v1.7: [a ; b ;; c ;;; d ;;;; e] ==> (ncat-4 (ncat-3 (ncat-2 (ncat-1 a b) c) d) e)
+    #
+    # Ascending and equal precedence is handled by parse_array_inner.
+    #
+    # This is a variant of a Pratt parser, but we have a separate outer loop
+    # because there's no minimum precedence/binding power - you can always get
+    # a lower binding power by adding more semicolons.
+    #
+    # For an excellent overview of Pratt parsing, see
+    # https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
+    (dim, binding_power) = parse_array_separator(ps, array_order)
+    if binding_power == typemin(Int)
+        # [x@y  ==>  (hcat x (error-t ✘ y))
+        bump_closing_token(ps, closer)
+        return (K"hcat", 0)
+    end
+    while true
+        (next_dim, next_bp) = parse_array_inner(ps, binding_power, array_order)
+        if next_bp == typemin(Int)
+            break
+        end
+        if binding_power == 0
+            emit(ps, mark, K"row")
+        else
+            emit(ps, mark, K"nrow", set_numeric_flags(dim))
+        end
+        dim = next_dim
+        binding_power = next_bp
+    end
+    bump_closing_token(ps, closer)
+    return binding_power == -1 ? (K"vcat", 0) :
+           binding_power ==  0 ? (K"hcat", 0) :
+           (K"ncat", dim)
+end
+
+# Parse equal and ascending precedence chains of array concatenation operators -
+# semicolons, newlines and whitespace. Invariants:
+#
+# * The caller must have already consumed
+#   - The left hand side
+#   - The concatenation operator, providing `binding_power`.
+#   So eg, we're here in the input stream, either at an element or closing token
+#                |
+#          [a ;; b ; c ]
+#          [a ;; ]
+#
+# * The caller must call emit() to delimit the AST node for this binding power.
+#
+function parse_array_inner(ps, binding_power, array_order)
+    mark = NO_POSITION
+    dim = -1
+    bp = binding_power
+    while true
+        if bp < binding_power
+            return (dim, bp)
+        end
+        # Allow trailing separators
+        # [a ;] ==> (vcat a)
+        # [a ; b;;] ==> (ncat-2 (nrow-1 a b))
+        if is_closing_token(ps, peek(ps))
+            return (typemin(Int), typemin(Int))
+        end
+        if bp == binding_power
+            # Parse one expression
+            mark = position(ps)
+            parse_eq_star(ps)
+            (next_dim, next_bp) = parse_array_separator(ps, array_order)
+        else # bp > binding_power
+            # Recurse to parse a separator with greater binding power. Eg:
+            # [a ;; b ; c ]
+            #       |   ^------ the next input is here
+            #       '---------- the mark is here
+            (next_dim, next_bp) = parse_array_inner(ps, bp, array_order)
+            if bp == 0
+                emit(ps, mark, K"row")
+            else
+                emit(ps, mark, K"nrow", set_numeric_flags(dim))
+            end
+        end
+        dim, bp = next_dim, next_bp
+    end
+end
+
+# Parse a separator in an array concatenation
+#
+# Here we return a tuple (dim, binding_power) containing
+# * Dimension on which the next separator acts
+# * Binding power (precedence) of the separator, where whitespace binds
+#   tightest:  ... < `;;;` < `;;` < `;`,`\n` < whitespace. We choose binding
+#   power of 0 for whitespace and negative numbers for other separators.
+#
+function parse_array_separator(ps, array_order)
+    sep_mismatch_err = "cannot mix space and ;; separators in an array expression, except to wrap a line"
+    mark = position(ps)
+    t = peek_token(ps, skip_newlines=true)
+    if kind(t) == K";"
+        # Newlines before semicolons are not significant
+        # [a \n ;]     ==> (vcat a)
+        bump_trivia(ps)
+        n_semis = 1
+        while true
+            bump(ps, TRIVIA_FLAG)
+            t = peek_token(ps)
+            if kind(t) != K";"
+                break
+            end
+            if preceding_whitespace(t)
+                bump_disallowed_space(ps)
+            end
+            n_semis += 1
+        end
+        had_newline = peek(ps) == K"NewlineWs"
+        # Newlines after semicolons are not significant
+        # [a ; \n]     ==> (vcat a)
+        # [a ; \n\n b] ==> (vcat a b)
+        #v1.7: [a ;; \n b]  ==> (ncat-2 a b)
+        bump_trivia(ps)
+        if n_semis == 2
+            if array_order[] === :row_major
+                if had_newline
+                    # In hcat with spaces as separators, `;;` is a line
+                    # continuation character
+                    #v1.7: [a b ;; \n c]  ==>  (hcat a b c)
+                    #v1.7: [a b \n ;; c]  ==>  (ncat-2 (row a b (error-t)) c)
+                    return (2, 0)
+                else
+                    # Can't mix spaces and multiple ;;
+                    #v1.7:  [a b ;; c]  ==>  (ncat-2 (row a b (error-t)) c)
+                    emit(ps, mark, K"error", TRIVIA_FLAG, error=sep_mismatch_err)
+                end
+            else
+                array_order[] = :column_major
+            end
+        end
+        return (n_semis, -n_semis)
+    end
+    t = peek_token(ps)
+    k = kind(t)
+    if k == K"NewlineWs"
+        bump_trivia(ps)
+        if peek(ps) == K"]"
+            # Linebreaks not significant before closing `]`
+            # [a b\n\n]  ==>  (hcat a b)
+            return (typemin(Int), typemin(Int))
+        else
+            # Treat a linebreak prior to a value as a semicolon (ie, separator
+            # for the first dimension) if no previous semicolons observed
+            # [a \n b]  ==> (vcat a b)
+            return (1, -1)
+        end
+    elseif k == K","
+        # Treat `,` as semicolon for the purposes of recovery
+        # [a; b, c] ==> (vcat a b (error-t) c)
+        bump(ps, TRIVIA_FLAG, error="unexpected comma in array expression")
+        return (1, -1)
+    else
+        if preceding_whitespace(t) && !is_closing_token(ps, k)
+            if array_order[] === :column_major
+                # Can't mix multiple ;'s and spaces
+                #v1.7:  [a ;; b c]  ==>  (ncat-2 a (row b (error-t) c))
+                bump_trivia(ps, TRIVIA_FLAG, error=sep_mismatch_err)
+            else
+                array_order[] = :row_major
+            end
+            return (2, 0)
+        else
+            # Something else; use typemin to exit array parsing
+            return (typemin(Int), typemin(Int))
+        end
+    end
+end
+
+# Parse array concatenation/construction/indexing syntax inside of `[]` or `{}`.
+# The opening bracket has been consumed.
+#
+# flisp: parse-cat
+function parse_cat(ps::ParseState, closer, end_is_symbol)
+    ps = ParseState(ps, range_colon_enabled=true,
+                    space_sensitive=true,
+                    where_enabled=true,
+                    whitespace_newline=false,
+                    for_generator=true)
+    k = peek(ps, skip_newlines=true)
+    mark = position(ps)
+    if k == closer
+        # []  ==>  (vect)
+        ckind, cflags = parse_vect(ps, closer, false)
+        return (ckind, cflags, 0)
+    elseif k == K";"
+        #v1.8: [;]           ==>  (ncat-1)
+        #v1.8: [;;]          ==>  (ncat-2)
+        #v1.8: [\n  ;; \n ]  ==>  (ncat-2)
+        #v1.7: [;;]          ==>  (ncat-2 (error))
+        bump_trivia(ps)
+        dim, _ = parse_array_separator(ps, Ref(:unknown))
+        min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax")
+        bump_closing_token(ps, closer)
+        return (K"ncat", EMPTY_FLAGS, dim)
+    end
+    parse_eq_star(ps)
+    k = peek(ps, skip_newlines=true)
+    if k == K"," || (is_closing_token(ps, k) && k != K";")
+        prefix_trailing_comma = k == K","
+        if prefix_trailing_comma
+            # [x,]  ==>  (vect x)
+            bump(ps, TRIVIA_FLAG; skip_newlines = true)
+        end
+        # [x]      ==>  (vect x)
+        # [x \n ]  ==>  (vect x)
+        # [x       ==>  (vect x (error-t))
+        ckind, cflags = parse_vect(ps, closer, prefix_trailing_comma)
+        return (ckind, cflags, 0)
+    elseif k == K"for"
+        # [x for a in as]  ==>  (comprehension (generator x (iteration (in a as))))
+        # [x \n\n for a in as]  ==>  (comprehension (generator x (iteration (in a as))))
+        ckind, cflags = parse_comprehension(ps, mark, closer)
+        return (ckind, cflags, 0)
+    else
+        # [x y]  ==>  (hcat x y)
+        # and other forms; See parse_array.
+        ckind, dim = parse_array(ps, mark, closer, end_is_symbol)
+        return (ckind, EMPTY_FLAGS, dim)
+    end
+end
+
+function check_ncat_compat(ps, mark, k)
+    # https://github.com/JuliaLang/julia/pull/33697
+    if k == K"ncat"
+        min_supported_version(v"1.7", ps, mark, "multidimensional array syntax")
+    end
+end
+
+# Parse un-prefixed parenthesized syntax. This is hard because parentheses are
+# *very* overloaded!
+#
+# flisp: parse-paren / parse-paren-
+function parse_paren(ps::ParseState, check_identifiers=true, has_unary_prefix=false)
+    ps = ParseState(ps, range_colon_enabled=true,
+                    space_sensitive=false,
+                    where_enabled=true,
+                    whitespace_newline=true)
+    mark = position(ps)
+    @check peek(ps) == K"("
+    bump(ps, TRIVIA_FLAG) # K"("
+    after_paren_mark = position(ps)
+    (isdot, tok) = peek_dotted_op_token(ps)
+    k = kind(tok)
+    if k == K")"
+        # ()  ==>  (tuple-p)
+        bump(ps, TRIVIA_FLAG)
+        emit(ps, mark, K"tuple", PARENS_FLAG)
+    elseif is_syntactic_operator(k)
+        # allow :(=) etc in unchecked contexts, eg quotes
+        # :(=)  ==>  (quote-: (parens =))
+        parse_atom(ps, check_identifiers)
+        bump_closing_token(ps, K")")
+        emit(ps, mark, K"parens")
+    elseif !check_identifiers && k == K"::" &&
+            peek(ps, 2, skip_newlines=true) == K")"
+        # allow :(::) as a special case
+        # :(::)  ==>  (quote-: (parens ::))
+        bump(ps)
+        bump(ps, TRIVIA_FLAG, skip_newlines=true)
+        emit(ps, mark, K"parens")
+    else
+        # Deal with all other cases of tuple or block syntax via the generic
+        # parse_brackets
+        initial_semi = peek(ps) == K";"
+        opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs
+            is_tuple = had_commas || (had_splat && num_semis >= 1) ||
+                       (initial_semi && (num_semis == 1 || num_subexprs > 0)) ||
+                       (peek(ps, 2) == K"->" && (peek_behind(ps).kind != K"where" && !has_unary_prefix))
+            return (needs_parameters=is_tuple,
+                    is_tuple=is_tuple,
+                    is_block=num_semis > 0)
+        end
+        if opts.is_tuple
+            # Tuple syntax with commas
+            # (x,)        ==>  (tuple-p x)
+            # (x,y)       ==>  (tuple-p x y)
+            # (x=1, y=2)  ==>  (tuple-p (= x 1) (= y 2))
+            #
+            # Named tuple with initial semicolon
+            # (;)         ==>  (tuple-p (parameters))
+            # (; a=1)     ==>  (tuple-p (parameters (= a 1)))
+            #
+            # Extra credit: nested parameters and frankentuples
+            # (x...;)         ==> (tuple-p (... x) (parameters))
+            # (x...; y)       ==> (tuple-p (... x) (parameters y))
+            # (; a=1; b=2)    ==> (tuple-p (parameters (= a 1)) (parameters (= b 2)))
+            # (a; b; c,d)     ==> (tuple-p a (parameters b) (parameters c d))
+            # (a=1, b=2; c=3) ==> (tuple-p (= a 1) (= b 2) (parameters (= c 3)))
+            emit(ps, mark, K"tuple", PARENS_FLAG|opts.delim_flags)
+        elseif opts.is_block
+            # Blocks
+            # (;;)        ==>  (block-p)
+            # (a=1;)      ==>  (block-p (= a 1))
+            # (a;b;;c)    ==>  (block-p a b c)
+            # (a=1; b=2)  ==>  (block-p (= a 1) (= b 2))
+            emit(ps, mark, K"block", PARENS_FLAG)
+        else
+            # Parentheses used for grouping
+            # (a * b)     ==>  (parens (call-i * a b))
+            # (a=1)       ==>  (parens (= a 1))
+            # (x)         ==>  (parens x)
+            # (a...)      ==>  (parens (... a))
+            emit(ps, mark, K"parens")
+        end
+    end
+end
+
+# Handle bracketed syntax inside any of () [] or {} where there's a mixture
+# of commas and semicolon delimiters.
+#
+# For parentheses this is tricky because there's various cases to disambiguate,
+# depending on outside context and the content of the brackets (number of
+# semicolons, presence of commas or splats). The `after_parse` function must be
+# provided by the caller to disambiguate these cases.
+#
+# Expressions (X; Y; Z) with more semicolons are also allowed by the flisp
+# parser and generally parse as nested parameters blocks. This is invalid Julia
+# syntax so the parse tree is pretty strange in these cases!  Some macros
+# probably use it though.  Example:
+#
+# (a,b=1; c,d=2; e,f=3)  ==>  (tuple-p a (= b 1) (parameters c (= d 2)) (parameters e (= f 3)))
+#
+# flisp: parts of parse-paren- and parse-arglist
+function parse_brackets(after_parse::Function,
+                        ps::ParseState, closing_kind, generator_is_last=true)
+    ps = ParseState(ps, range_colon_enabled=true,
+                    space_sensitive=false,
+                    where_enabled=true,
+                    whitespace_newline=true)
+    params_positions = acquire_positions(ps.stream)
+    last_eq_before_semi = 0
+    num_subexprs = 0
+    num_semis = 0
+    had_commas = false
+    had_splat = false
+    param_start = nothing
+    trailing_comma = false
+    while true
+        k = peek(ps)
+        if k == closing_kind
+            break
+        elseif k == K";"
+            # Start of parameters list
+            # a, b; c d  ==>  a b (parameters c d)
+            if !isnothing(param_start)
+                push!(params_positions, emit(ps, param_start, K"TOMBSTONE"))
+            end
+            num_semis += 1
+            param_start = position(ps)
+            bump(ps, TRIVIA_FLAG)
+            bump_trivia(ps)
+        elseif is_closing_token(ps, k)
+            trailing_comma = false
+            # Error; handled below in bump_closing_token
+            break
+        else
+            mark = position(ps)
+            parse_eq_star(ps)
+            trailing_comma = false
+            num_subexprs += 1
+            if num_subexprs == 1
+                had_splat = peek_behind(ps).kind == K"..."
+            end
+            k = peek(ps, skip_newlines=true)
+            if k == K"for"
+                # Generator syntax
+                # (x for a in as)  ==>  (parens (generator x (iteration (in a as))))
+                parse_generator(ps, mark)
+                if generator_is_last
+                    break
+                end
+                k = peek(ps, skip_newlines=true)
+            end
+            if k == K","
+                had_commas = true
+                bump(ps, TRIVIA_FLAG)
+                trailing_comma = true
+            elseif k == K";" || k == closing_kind
+                # Handled above
+                continue
+            else
+                # Error - recovery done when consuming closing_kind
+                break
+            end
+        end
+    end
+    if !isnothing(param_start) && position(ps) != param_start
+        push!(params_positions, emit(ps, param_start, K"TOMBSTONE"))
+    end
+    opts = after_parse(had_commas, had_splat, num_semis, num_subexprs)
+    # Emit nested parameter nodes if necessary
+    if opts.needs_parameters
+        for pos in params_positions
+            reset_node!(ps, pos, kind=K"parameters")
+        end
+    end
+    release_positions(ps.stream, params_positions)
+    bump_closing_token(ps, closing_kind, " or `,`")
+    return (; opts..., delim_flags=trailing_comma ? TRAILING_COMMA_FLAG : EMPTY_FLAGS)
+end
+
+_is_indentation(b::UInt8) = (b == u8" " || b == u8"\t")
+
+# Parse a string, embedded interpolations and deindent triple quoted strings
+# by marking indentation characters as whitespace trivia.
+#
+# flisp: parse-string-literal-, parse-interpolate
+function parse_string(ps::ParseState, raw::Bool)
+    mark = position(ps)
+    delim_k = peek(ps)
+    triplestr = delim_k in KSet"\"\"\" ```"
+    string_chunk_kind = delim_k in KSet"\" \"\"\"" ? K"String" : K"CmdString"
+    indent_ref_i = 0
+    indent_ref_len = typemax(Int)
+    indent_chunks = acquire_positions(ps.stream)
+    txtbuf = unsafe_textbuf(ps)
+    chunk_flags = raw ? RAW_STRING_FLAG : EMPTY_FLAGS
+    bump(ps, TRIVIA_FLAG)
+    first_chunk = true
+    n_nontrivia_chunks = 0
+    removed_initial_newline = false
+    had_interpolation = false
+    prev_chunk_newline = false
+    while true
+        t = peek_full_token(ps)
+        k = kind(t)
+        if k == K"$"
+            if raw
+                # FIXME: This case is actually a tokenization error:
+                # The `K"$"` token should not occur when a raw string
+                # is being parsed, but this would require the lexer to know
+                # about the parse state. (see also parse_atom)
+                break
+            end
+            if prev_chunk_newline
+                # """\n$x\n a"""  ==>  (string-s x "\n" " a")
+                indent_ref_i = first_byte(t)
+                indent_ref_len = 0
+            end
+            bump(ps, TRIVIA_FLAG)
+            k = peek(ps)
+            if k == K"("
+                # "a $(x + y) b"  ==>  (string "a " (parens (call-i x + y)) " b")
+                # "hi$("ho")"     ==>  (string "hi" (parens (string "ho")))
+                m = position(ps)
+                bump(ps, TRIVIA_FLAG)
+                opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs
+                    return (needs_parameters=false,
+                            simple_interp=!had_commas && num_semis == 0 && num_subexprs == 1)
+                end
+                if !opts.simple_interp || peek_behind(ps, skip_parens=false).kind == K"generator"
+                    # "$(x,y)" ==> (string (parens (error x y)))
+                    emit(ps, m, K"error", error="invalid interpolation syntax")
+                end
+                emit(ps, m, K"parens")
+            elseif k == K"var"
+                # var identifiers disabled in strings
+                # "$var"  ==>  (string var)
+                bump(ps, remap_kind=K"Identifier")
+            elseif k == K"Identifier" || is_keyword(k) || is_word_operator(k)
+                # "a $foo b"  ==> (string "a " foo " b")
+                # "$outer"    ==> (string outer)
+                # "$in"       ==> (string in)
+                parse_atom(ps)
+            else
+                bump_invisible(ps, K"error",
+                    error="identifier or parenthesized expression expected after \$ in string")
+            end
+            first_chunk = false
+            n_nontrivia_chunks += 1
+            had_interpolation = true
+            prev_chunk_newline = false
+        elseif k == string_chunk_kind
+            if triplestr && first_chunk && span(t) <= 2 &&
+                    begin
+                        s = span(t)
+                        b = txtbuf[last_byte(t)]
+                        # Test whether the string is a single logical newline
+                        (s == 1 && (b == u8"\n" || b == u8"\r")) ||
+                        (s == 2 && (txtbuf[first_byte(t)] == u8"\r" && b == u8"\n"))
+                    end
+                # First line of triple string is a newline only: mark as trivia.
+                # """\nx"""    ==> (string-s "x")
+                # """\n\nx"""  ==> (string-s "\n" "x")
+                bump(ps, TRIVIA_FLAG)
+                first_chunk = false
+                prev_chunk_newline = true
+            else
+                if triplestr
+                    # Triple-quoted dedenting:
+                    # Various newlines (\n \r \r\n) and whitespace (' ' \t)
+                    # """\n x\n y"""      ==> (string-s "x\n" "y")
+                    # ```\n x\n y```      ==> (macrocall :(Core.var"@cmd") (cmdstring-s-r "x\n" "y"))
+                    # """\r x\r y"""      ==> (string-s "x\n" "y")
+                    # """\r\n x\r\n y"""  ==> (string-s "x\n" "y")
+                    # Spaces or tabs or mixtures acceptable
+                    # """\n\tx\n\ty"""    ==> (string-s "x\n" "y")
+                    # """\n \tx\n \ty"""  ==> (string-s "x\n" "y")
+                    #
+                    # Mismatched tab vs space not deindented
+                    # Find minimum common prefix in mismatched whitespace
+                    # """\n\tx\n y"""     ==> (string-s "\tx\n" " y")
+                    # """\n x\n  y"""   ==> (string-s "x\n" " y")
+                    # """\n  x\n y"""   ==> (string-s " x\n" "y")
+                    # """\n \tx\n  y""" ==> (string-s "\tx\n" " y")
+                    # """\n  x\n \ty""" ==> (string-s " x\n" "\ty")
+                    #
+                    # Empty lines don't affect dedenting
+                    # """\n x\n\n y"""    ==> (string-s "x\n" "\n" "y")
+                    # Non-empty first line doesn't participate in deindentation
+                    # """ x\n y"""    ==> (string-s " x\n" "y")
+                    #
+                    # Dedenting and interpolations
+                    # """\n  $a\n  $b"""    ==> (string-s a "\n" b)
+                    # """\n  $a \n  $b"""   ==> (string-s a " \n" b)
+                    # """\n  $a\n  $b\n"""  ==> (string-s "  " a "\n" "  " b "\n")
+                    #
+                    if prev_chunk_newline && (b = txtbuf[first_byte(t)];
+                                              b != u8"\n" && b != u8"\r")
+                        # Compute length of longest common prefix of mixed
+                        # spaces and tabs, in bytes
+                        #
+                        # Initial whitespace is never regarded as indentation
+                        # in any triple quoted string chunk, as it's always
+                        # preceded in the source code by a visible token of
+                        # some kind; either a """ delimiter or $()
+                        # interpolation.
+                        if indent_ref_i == 0
+                            # No indentation found yet. Find indentation we'll
+                            # use as a reference
+                            i = first_byte(t) - 1
+                            while i < last_byte(t) && _is_indentation(txtbuf[i+1])
+                                i += 1
+                            end
+                            indent_ref_i = first_byte(t)
+                            indent_ref_len = i - first_byte(t) + 1
+                        else
+                            # Matching the current indentation with reference,
+                            # shortening length if necessary.
+                            j = 0
+                            while j < span(t) && j < indent_ref_len
+                                if txtbuf[j + first_byte(t)] != txtbuf[j + indent_ref_i]
+                                    break
+                                end
+                                j += 1
+                            end
+                            indent_ref_len = min(indent_ref_len, j)
+                        end
+                        # Prepare a place for indentiation trivia, if necessary
+                        push!(indent_chunks, bump_invisible(ps, K"TOMBSTONE"))
+                    end
+                    b = txtbuf[last_byte(t)]
+                    prev_chunk_newline = b == UInt8('\n') || b == UInt8('\r')
+                end
+                bump(ps, chunk_flags)
+                first_chunk = false
+                n_nontrivia_chunks += 1
+            end
+        elseif  k == K"ErrorInvalidInterpolationTerminator" ||
+                k == K"ErrorBidiFormatting" ||
+                k == K"ErrorInvalidUTF8"
+            # Treat these errors as string chunks
+            bump(ps)
+            n_nontrivia_chunks += 1
+        else
+            break
+        end
+    end
+    had_end_delim = peek(ps) == delim_k
+    if triplestr && prev_chunk_newline && had_end_delim
+        # Newline at end of string
+        # """\n x\n y\n"""    ==> (string-s " x\n" " y\n")
+        indent_ref_len = 0
+    end
+    if triplestr && indent_ref_len > 0
+        for pos in indent_chunks
+            reset_node!(ps, pos, kind=K"Whitespace", flags=TRIVIA_FLAG)
+            rhs_empty = steal_token_bytes!(ps, pos, indent_ref_len)
+            if rhs_empty
+                # Empty chunks after dedent are removed
+                # """\n \n """        ==> (string-s "\n")
+                n_nontrivia_chunks -= 1
+            end
+        end
+    end
+    release_positions(ps.stream, indent_chunks)
+    if had_end_delim
+        if n_nontrivia_chunks == 0
+            # Empty strings, or empty after triple quoted processing
+            # "" ==> (string "")
+            # """\n  """ ==> (string-s "")
+            bump_invisible(ps, string_chunk_kind, chunk_flags)
+        end
+        bump(ps, TRIVIA_FLAG)
+    else
+        # Missing delimiter recovery
+        # "str   ==> (string "str" (error-t))
+        bump_invisible(ps, K"error", TRIVIA_FLAG, error="unterminated string literal")
+    end
+    # String interpolations
+    # "$x$y$z"  ==> (string x y z)
+    # "$(x)"    ==> (string (parens x))
+    # "$x"      ==> (string x)
+    # """$x"""  ==> (string-s x)
+    #
+    # Strings with embedded whitespace trivia
+    # "a\\\nb"      ==> (string "a" "b")
+    # "a\\\rb"      ==> (string "a" "b")
+    # "a\\\r\nb"    ==> (string "a" "b")
+    # "a\\\n \tb"   ==> (string "a" "b")
+    #
+    # Strings with only a single valid string chunk
+    # "str"     ==> (string "str")
+    # "a\\\n"   ==> (string "a")
+    # "a\\\r"   ==> (string "a")
+    # "a\\\r\n" ==> (string "a")
+    string_kind = delim_k in KSet"\" \"\"\"" ? K"string" : K"cmdstring"
+    str_flags = (triplestr ? TRIPLE_STRING_FLAG : EMPTY_FLAGS) |
+                (raw       ? RAW_STRING_FLAG : EMPTY_FLAGS)
+    emit(ps, mark, string_kind, str_flags)
+end
+
+function emit_braces(ps, mark, ckind, cflags, dim=0)
+    if ckind == K"hcat"
+        # {x y}  ==>  (bracescat (row x y))
+        emit(ps, mark, K"row", cflags & ~TRAILING_COMMA_FLAG)
+    elseif ckind == K"ncat"
+        # {x ;;; y}  ==>  (bracescat (nrow-3 x y))
+        emit(ps, mark, K"nrow", set_numeric_flags(dim))
+    end
+    check_ncat_compat(ps, mark, ckind)
+    outk = ckind in KSet"vect comprehension" ? K"braces" : K"bracescat"
+    delim_flags = outk == K"braces" ? (cflags & TRAILING_COMMA_FLAG) : EMPTY_FLAGS
+    emit(ps, mark, outk, delim_flags)
+end
+
+# parse numbers, identifiers, parenthesized expressions, lists, vectors, etc.
+#
+# If `check_identifiers` is true, identifiers are disallowed from being one of
+# the syntactic operators or closing tokens.
+#
+# flisp: parse-atom
+function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=false)
+    bump_trivia(ps)
+    mark = position(ps)
+    (leading_dot, leading_tok) = peek_dotted_op_token(ps)
+    leading_kind = kind(leading_tok)
+    # todo: Reorder to put most likely tokens first?
+    if leading_dot
+        is_operator(leading_kind) && @goto is_operator
+        bump(ps, remap_kind=K"Identifier")
+        if check_identifiers
+            # .   ==> (error .)
+            emit(ps, mark, K"error", error="invalid identifier")
+        end
+    elseif is_error(leading_kind)
+        # Errors for bad tokens are emitted in validate_tokens() rather than
+        # here.
+        bump(ps)
+    elseif leading_kind == K"'"
+        # char literal
+        bump(ps, TRIVIA_FLAG)
+        k = peek(ps)
+        if k == K"'"
+            # ''  ==>  (char (error))
+            bump_invisible(ps, K"error", error="empty character literal")
+            bump(ps, TRIVIA_FLAG)
+        elseif k == K"EndMarker"
+            # '   ==>  (char (error))
+            bump_invisible(ps, K"error", error="unterminated character literal")
+        else
+            if k == K"Char"
+                bump(ps)
+            elseif is_error(k)
+                bump(ps)
+            else
+                # FIXME: This case is actually a tokenization error.
+                # Make a best-effort attempt to workaround this for now by
+                # remapping the kind. This needs to be fixed by rewinding the
+                # tokenizer's buffer and re-tokenizing the next token as a
+                # char. (A lot of work for a very obscure edge case)
+                #
+                # x in'c'  ==>  (call-i x in (char 'c'))
+                bump(ps, remap_kind=K"Char")
+            end
+            if peek(ps) == K"'"
+                # 'a'         ==>  (char 'a')
+                # 'α'         ==>  (char 'α')
+                # '\xce\xb1'  ==>  (char 'α')
+                bump(ps, TRIVIA_FLAG)
+            else
+                # 'a  ==>  (char 'a' (error-t))
+                bump_invisible(ps, K"error", TRIVIA_FLAG,
+                               error="unterminated character literal")
+            end
+        end
+        emit(ps, mark, K"char")
+    elseif leading_kind == K"Char"
+        # FIXME: This is a tokenization error and should be preceded with
+        # K"'". However this workaround is better than emitting a bare Char.
+        bump(ps, remap_kind=K"Identifier")
+    elseif leading_kind == K":"
+        # symbol/expression quote
+        # :foo  ==>  (quote-: foo)
+        t = peek_token(ps, 2)
+        k = kind(t)
+        if is_closing_token(ps, k) && (!is_keyword(k) || preceding_whitespace(t))
+            # : is a literal colon in some circumstances
+            # :)     ==>  :
+            # : end  ==>  :
+            bump(ps) # K":"
+            return
+        end
+        bump(ps, TRIVIA_FLAG) # K":"
+        if preceding_whitespace(t)
+            # : foo   ==>  (quote-: (error-t) foo)
+            # :\nfoo  ==>  (quote-: (error-t) foo)
+            bump_trivia(ps, TRIVIA_FLAG,
+                        error="whitespace not allowed after `:` used for quoting")
+        end
+        # Being inside quote makes keywords into identifiers at the
+        # first level of nesting
+        # :end ==> (quote-: end)
+        # :(end) ==> (quote-: (parens (error-t)))
+        # Being inside quote makes end non-special again (issue #27690)
+        # a[:(end)]  ==>  (ref a (quote-: (error-t end)))
+        parse_atom(ParseState(ps, end_symbol=false), false)
+        emit(ps, mark, K"quote", COLON_QUOTE)
+    elseif check_identifiers && leading_kind == K"=" && is_plain_equals(peek_token(ps)) && !leading_dot
+        # =   ==> (error =)
+        bump(ps, error="unexpected `=`")
+    elseif leading_kind == K"Identifier"
+        # xx  ==>  xx
+        # x₁  ==>  x₁
+        bump(ps)
+    elseif is_word_operator(leading_kind)
+        # where=1 ==> (= where 1)
+        bump(ps, remap_kind=K"Identifier")
+    elseif is_operator(leading_kind)
+@label is_operator
+        # +     ==>  +
+        # .+    ==>  (. +)
+        bump_dotted(ps, leading_dot, emit_dot_node=true, remap_kind=
+                      is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier")
+        if check_identifiers && !is_valid_identifier(leading_kind)
+            # +=   ==>  (error (op= +))
+            # ?    ==>  (error ?)
+            # .+=  ==>  (error (. (op= +)))
+            emit(ps, mark, K"error", error="invalid identifier")
+        else
+            # Quoted syntactic operators allowed
+            # :+=  ==>  (quote-: (op= +))
+        end
+    elseif is_keyword(leading_kind)
+        if leading_kind == K"var" && (t = peek_token(ps,2);
+                                      kind(t) == K"\"" && !preceding_whitespace(t))
+            # var"x"     ==> (var x)
+            # Raw mode unescaping
+            # var""     ==> (var )
+            # var"\""   ==> (var ")
+            # var"\\""  ==> (var \")
+            # var"\\x"  ==> (var \\x)
+            #
+            # NB: Triple quoted var identifiers are not implemented, but with
+            # the complex deindentation rules they seem like a misfeature
+            # anyway, maybe?
+            # var"""x""" !=> x
+            bump(ps, TRIVIA_FLAG)
+            bump(ps, TRIVIA_FLAG)
+            if peek(ps) == K"String"
+                bump(ps, RAW_STRING_FLAG; remap_kind=K"Identifier")
+            else
+                bump_invisible(ps, K"Identifier", RAW_STRING_FLAG)
+            end
+            if peek(ps) == K"\""
+                bump(ps, TRIVIA_FLAG)
+            else
+                bump_invisible(ps, K"error", TRIVIA_FLAG,
+                               error="unterminated `var\"\"` identifier")
+            end
+            t = peek_token(ps)
+            k = kind(t)
+            if preceding_whitespace(t) || is_operator(k) ||
+                    k in KSet"( ) [ ] { } , ; @ EndMarker"
+                # var"x"+  ==>  x
+                # var"x")  ==>  x
+                # var"x"(  ==>  x
+            elseif is_string_macro_suffix(k)
+                # var"x"end  ==>  (var x (error-t))
+                # var"x"1    ==>  (var x (error-t))
+                # var"x"y    ==>  (var x (error-t))
+                bump(ps, TRIVIA_FLAG, error="suffix not allowed after `var\"...\"` syntax")
+            elseif k == K"`" || k == K"\"" || k == K"\"\"\"" || k == K"```"
+                # Disallow `var"#""str". To allow this we'd need to fix `raw`
+                # detection in lex_quote to be consistent with the parser.
+                bump_invisible(ps, K"error", TRIVIA_FLAG,
+                               error="`var\"...\"` syntax not supported as string macro name")
+            end
+            emit(ps, mark, K"var")
+        elseif check_identifiers && is_closing_token(ps, leading_kind)
+            # :(end)  ==>  (quote-: (error end))
+            bump(ps, error="invalid identifier")
+        else
+            # Remap keywords to identifiers.
+            # :end  ==>  (quote-: end)
+            # :<:   ==> (quote-: <:)
+            bump(ps, remap_kind=K"Identifier")
+        end
+    elseif leading_kind == K"(" # parens or tuple
+        parse_paren(ps, check_identifiers, has_unary_prefix)
+    elseif leading_kind == K"[" # cat expression
+        bump(ps, TRIVIA_FLAG)
+        ckind, cflags, dim = parse_cat(ps, K"]", ps.end_symbol)
+        emit(ps, mark, ckind, cflags | set_numeric_flags(dim))
+        check_ncat_compat(ps, mark, ckind)
+    elseif leading_kind == K"{" # cat expression
+        bump(ps, TRIVIA_FLAG)
+        ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol)
+        emit_braces(ps, mark, ckind, cflags, dim)
+    elseif leading_kind == K"@" # macro call
+        # Macro names can be keywords
+        # @end x  ==> (macrocall (macro_name end) x)
+        bump(ps, TRIVIA_FLAG)
+        parse_macro_name(ps)
+        parse_call_chain(ps, mark, true)
+    elseif is_string_delim(leading_kind)
+        parse_string(ps, false)
+    elseif leading_kind in KSet"` ```"
+        # ``          ==>  (cmdstring-r "")
+        # `cmd`       ==>  (cmdstring-r "cmd")
+        # ```cmd```   ==>  (cmdstring-s-r "cmd")
+        parse_string(ps, true)
+    elseif is_literal(leading_kind)
+        # 42   ==>  42
+        bump(ps)
+    elseif is_closing_token(ps, leading_kind)
+        # Leave closing token in place for other productions to
+        # recover with
+        # )  ==>  error
+        msg = leading_kind == K"EndMarker" ?
+              "premature end of input" :
+              "unexpected `$(untokenize(leading_kind))`"
+        emit_diagnostic(ps, error=msg)
+        bump_invisible(ps, K"error")
+    else
+        bump(ps, error="invalid syntax atom")
+    end
+end
diff --git a/JuliaSyntax/src/julia/parser_api.jl b/JuliaSyntax/src/julia/parser_api.jl
new file mode 100644
index 0000000000000..a3e2162bc985b
--- /dev/null
+++ b/JuliaSyntax/src/julia/parser_api.jl
@@ -0,0 +1,223 @@
+# The main parser API.
+#
+# This is defined separately from parser.jl so that:
+# * parser.jl doesn't need to refer to any tree data structures
+# * It's clear which parts are the public API
+
+struct ParseError <: Exception
+    source::SourceFile
+    diagnostics::Vector{Diagnostic}
+    incomplete_tag::Symbol # Used only for Base Expr(:incomplete) support
+end
+
+function ParseError(stream::ParseStream; incomplete_tag=:none, kws...)
+    source = SourceFile(stream; kws...)
+    ParseError(source, stream.diagnostics, incomplete_tag)
+end
+
+function Base.showerror(io::IO, err::ParseError)
+    # Only show the first parse error for now - later errors are often
+    # misleading due to the way recovery works
+    i = findfirst(is_error, err.diagnostics)
+    if isnothing(i)
+        i = lastindex(err.diagnostics)
+        level_info = " some warnings detected:"
+    else
+        level_info = ""
+    end
+    println(io, "ParseError:", level_info)
+    show_diagnostics(io, err.diagnostics[1:i], err.source)
+end
+
+sourcefile(err::ParseError) = err.source
+
+"""
+    parse!(stream::ParseStream; rule=:all)
+
+Parse Julia source code from a [`ParseStream`](@ref) object. Output tree data
+structures may be extracted from `stream` with the [`build_tree`](@ref) function.
+
+`rule` may be any of
+* `:all` (default) — parse a whole "file" of top level statements. In this
+  mode, the parser expects to fully consume the input.
+* `:statement` — parse a single statement, or statements separated by semicolons.
+* `:atom` — parse a single syntax "atom": a literal, identifier, or
+  parenthesized expression.
+"""
+function parse!(stream::ParseStream; rule::Symbol=:all)
+    if rule == :toplevel
+        Base.depwarn("Use of rule == :toplevel in parse!() is deprecated. use `rule=:all` instead.", :parse!)
+        rule = :all
+    end
+    ps = ParseState(stream)
+    if rule === :all
+        parse_toplevel(ps)
+    elseif rule === :statement
+        parse_stmts(ps)
+    elseif rule === :atom
+        parse_atom(ps)
+    else
+        throw(ArgumentError("Unknown grammar rule $rule"))
+    end
+    validate_tokens(stream)
+    stream
+end
+
+"""
+    parse!(TreeType, io::IO; rule=:all, version=VERSION)
+
+Parse Julia source code from a seekable `IO` object. The output is a tuple
+`(tree, diagnostics)`. When `parse!` returns, the stream `io` is positioned
+directly after the last byte which was consumed during parsing.
+"""
+function parse!(::Type{TreeType}, io::IO;
+                rule::Symbol=:all, version=VERSION, kws...) where {TreeType}
+    stream = ParseStream(io; version=version)
+    parse!(stream; rule=rule)
+    tree = build_tree(TreeType, stream; kws...)
+    seek(io, last_byte(stream))
+    tree, stream.diagnostics
+end
+
+function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION,
+                ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false,
+                ignore_warnings=ignore_errors, kws...) where {T}
+    stream = ParseStream(text, index; version=version)
+    if ignore_trivia && rule != :all
+        bump_trivia(stream, skip_newlines=true)
+    end
+    parse!(stream; rule=rule)
+    if need_eof
+        if (ignore_trivia  && peek(stream, skip_newlines=true) != K"EndMarker") ||
+           (!ignore_trivia && (peek(stream, skip_newlines=false, skip_whitespace=false) != K"EndMarker"))
+            emit_diagnostic(stream, error="unexpected text after parsing $rule")
+        end
+    end
+    if (!ignore_errors && any_error(stream.diagnostics)) ||
+          (!ignore_warnings && !isempty(stream.diagnostics))
+        throw(ParseError(stream, filename=filename, first_line=first_line))
+    end
+    tree = build_tree(T, stream; filename=filename, first_line=first_line, kws...)
+    tree, last_byte(stream) + 1
+end
+
+_parse_docs = """
+    # Parse a single expression/statement
+    parsestmt(TreeType, text, [index];
+              version=VERSION,
+              ignore_trivia=true,
+              filename=nothing,
+              ignore_errors=false,
+              ignore_warnings=ignore_errors)
+
+    # Parse all statements at top level (file scope)
+    parseall(...)
+
+    # Parse a single syntax atom
+    parseatom(...)
+
+Parse Julia source code string `text` into a data structure of type `TreeType`.
+`parsestmt` parses a single Julia statement, `parseall` parses top level statements
+at file scope and `parseatom` parses a single Julia identifier or other "syntax
+atom".
+
+If `text` is passed without `index`, all the input text must be consumed and a
+tree data structure is returned. When an integer byte `index` is passed, a
+tuple `(tree, next_index)` will be returned containing the next index in `text`
+to resume parsing. By default whitespace and comments before and after valid
+code are ignored but you can turn this off by setting `ignore_trivia=false`.
+
+`version` (default `VERSION`) may be used to set the syntax version to
+any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been
+added after v"1.0", emitting an error if it's not compatible with the requested
+`version`.
+
+Pass `filename` to set any file name information embedded within the output
+tree, if applicable. This will also annotate errors and warnings with the
+source file name.
+
+A `ParseError` will be thrown if any errors or warnings occurred during
+parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. To
+also avoid exceptions due to errors, use `ignore_errors=true`.
+"""
+
+"$_parse_docs"
+parsestmt(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1]
+
+"$_parse_docs"
+parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:all, true, T, text; kws...)[1]
+
+"$_parse_docs"
+parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1]
+
+parsestmt(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...)
+parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:all, false, T, text, index; kws...)
+parseatom(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:atom, false, T, text, index; kws...)
+
+#-------------------------------------------------------------------------------
+# Tokens interface
+"""
+Token type resulting from calling `tokenize(text)`
+
+Use
+* `kind(tok)` to get the token kind
+* `untokenize(tok, text)` to retrieve the text
+* Predicates like `is_error(tok)` to query token categories and flags
+"""
+struct Token
+    head::SyntaxHead
+    range::UnitRange{UInt32}
+end
+
+Token() = Token(SyntaxHead(K"None", EMPTY_FLAGS), 0:0)
+
+head(t::Token) = t.head
+
+"""
+    tokenize(text; operators_as_identifiers=true)
+
+Returns the tokenized UTF-8 encoded `text` as a vector of `Token`s. The
+text for the token can be retrieved by using `untokenize()`. The full text can be
+reconstructed with, for example, `join(untokenize.(tokenize(text), text))`.
+
+This interface works on UTF-8 encoded string or buffer data only.
+
+The keyword `operators_as_identifiers` specifies whether operators in
+identifier-position should have `K"Identifier"` as their kind, or be emitted as
+more specific operator kinds. For example, whether the `+` in `a + b` should be
+emitted as `K"Identifier"` (the default) or as `K"+"`.
+"""
+function tokenize(text; operators_as_identifiers=true)
+    ps = ParseStream(text)
+    parse!(ps, rule=:all)
+    ts = ps.output
+    output_tokens = Token[]
+    byte_start::UInt32 = ps.output[1].byte_span + 1
+    for i = 2:length(ts)
+        if kind(ts[i]) == K"TOMBSTONE" || is_non_terminal(ts[i])
+            continue
+        end
+        r = byte_start:(byte_start+ts[i].byte_span - 1)
+        byte_start = last(r) + 1
+        k = kind(ts[i])
+        if k == K"Identifier" && !operators_as_identifiers
+            orig_k = ts[i].orig_kind
+            if is_operator(orig_k) && !is_word_operator(orig_k)
+                k = orig_k
+            end
+        end
+        f = flags(ts[i])
+        push!(output_tokens, Token(SyntaxHead(k,f), r))
+    end
+    output_tokens
+end
+
+function untokenize(token::Token, text::AbstractString)
+    text[first(token.range):thisind(text, last(token.range))]
+end
+
+function untokenize(token::Token, text::Vector{UInt8})
+    text[token.range]
+end
+
+@deprecate parse parsestmt
diff --git a/JuliaSyntax/src/julia/tokenize.jl b/JuliaSyntax/src/julia/tokenize.jl
new file mode 100644
index 0000000000000..1f279af7c4921
--- /dev/null
+++ b/JuliaSyntax/src/julia/tokenize.jl
@@ -0,0 +1,1323 @@
+module Tokenize
+
+export tokenize, untokenize
+
+using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str, @callsite_inline
+
+import ..JuliaSyntax: kind,
+    is_literal, is_contextual_keyword, is_word_operator
+
+#-------------------------------------------------------------------------------
+# Character-based predicates for tokenization
+import Base.Unicode
+
+const EOF_CHAR = typemax(Char)
+
+function is_identifier_char(c::Char)
+    c == EOF_CHAR && return false
+    isvalid(c) || return false
+    return Base.is_id_char(c)
+end
+
+function is_identifier_start_char(c::Char)
+    c == EOF_CHAR && return false
+    isvalid(c) || return false
+    c == '🢲' && return false  # First divergence from Base.is_id_start_char
+    return Base.is_id_start_char(c)
+end
+
+function is_invisible_char(c::Char)
+    # These are the chars considered invisible by the reference parser.
+    # TODO: There's others we could add? See for example
+    # https://invisible-characters.com/
+    return c == '\u00ad' || # soft hyphen
+           c == '\u200b' || # zero width space
+           c == '\u200c' || # zero width non-joiner
+           c == '\u200d' || # zero width joiner
+           c == '\u200e' || # left-to-right mark
+           c == '\u200f' || # right-to-left mark
+           c == '\u2060' || # word joiner
+           c == '\u2061'    # function application
+    # https://github.com/JuliaLang/julia/issues/49850
+    # c == '\u115f' || # Hangul Choseong filler
+end
+
+# Chars that we will never allow to be part of a valid non-operator identifier
+function is_never_id_char(ch::Char)
+    isvalid(ch) || return true
+    cat = Unicode.category_code(ch)
+    c = UInt32(ch)
+    return (
+        # spaces and control characters:
+        (cat >= Unicode.UTF8PROC_CATEGORY_ZS && cat <= Unicode.UTF8PROC_CATEGORY_CS) ||
+
+        # ASCII and Latin1 non-connector punctuation
+        (c < 0xff &&
+         cat >= Unicode.UTF8PROC_CATEGORY_PD && cat <= Unicode.UTF8PROC_CATEGORY_PO) ||
+
+        c == UInt32('`') ||
+
+        # mathematical brackets
+        (c >= 0x27e6 && c <= 0x27ef) ||
+        # angle, corner, and lenticular brackets
+        (c >= 0x3008 && c <= 0x3011) ||
+        # tortoise shell, square, and more lenticular brackets
+        (c >= 0x3014 && c <= 0x301b) ||
+        # fullwidth parens
+        (c == 0xff08 || c == 0xff09) ||
+        # fullwidth square brackets
+        (c == 0xff3b || c == 0xff3d)
+    )
+end
+
+readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char)
+
+# Some unicode operators are normalized by the tokenizer into their equivalent
+# kinds. See also normalize_identifier()
+const _ops_with_unicode_aliases = [
+    # \minus '−' is normalized into K"-",
+    '−' => K"-"
+    # Lookalikes which are normalized into K"⋅",
+    # https://github.com/JuliaLang/julia/pull/25157,
+    '\u00b7' => K"⋅" # '·' Middle Dot,,
+    '\u0387' => K"⋅" # '·' Greek Ano Teleia,,
+]
+
+function _nondot_symbolic_operator_kinds()
+    op_range = reinterpret(UInt16, K"BEGIN_OPS"):reinterpret(UInt16, K"END_OPS")
+    setdiff(reinterpret.(Kind, op_range), [
+        K"ErrorInvalidOperator"
+        K"Error**"
+        K"..."
+        K"."
+        K"where"
+        K"isa"
+        K"in"
+        K".'"
+        K"op="
+    ])
+end
+
+function _char_in_set_expr(varname, firstchars)
+    codes = sort!(UInt32.(unique(firstchars)))
+    terms = []
+    i = 1
+    while i <= length(codes)
+        j = i
+        while j < length(codes) && codes[j+1] == codes[j]+1
+            j += 1
+        end
+        if i == j
+            push!(terms, :($varname == $(codes[i])))
+        else
+            push!(terms, :($(codes[i]) <= $varname <= $(codes[j])))
+        end
+        i = j+1
+    end
+    foldr((t1,t2)->:($t1 || $t2), terms)
+end
+
+@eval function is_operator_start_char(c)
+   if c == EOF_CHAR || !isvalid(c)
+       return false
+   end
+   u = UInt32(c)
+   return $(_char_in_set_expr(:u,
+       append!(first.(string.(_nondot_symbolic_operator_kinds())),
+               first.(_ops_with_unicode_aliases))))
+end
+
+# Checks whether a Char is an operator which can be prefixed with a dot `.`
+function is_dottable_operator_start_char(c)
+    return c != '?' && c != '$' && c != ':' && c != '\'' && is_operator_start_char(c)
+end
+
+@eval function isopsuffix(c::Char)
+    c == EOF_CHAR && return false
+    isvalid(c) || return false
+    u = UInt32(c)
+    if (u < 0xa1 || u > 0x10ffff)
+        return false
+    end
+    cat = Base.Unicode.category_code(u)
+    if (cat == Base.Unicode.UTF8PROC_CATEGORY_MN ||
+        cat == Base.Unicode.UTF8PROC_CATEGORY_MC ||
+        cat == Base.Unicode.UTF8PROC_CATEGORY_ME)
+        return true
+    end
+    # Additional allowed cases
+    return $(_char_in_set_expr(:u,
+        collect("²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽꜛꜜꜝ")))
+end
+
+function optakessuffix(k)
+    (K"BEGIN_OPS" <= k <= K"END_OPS") &&
+    !(
+        k == K"..." ||
+        K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" ||
+        k == K"?"   ||
+        k == K"<:"  ||
+        k == K">:"  ||
+        k == K"&&"  ||
+        k == K"||"  ||
+        k == K"in"  ||
+        k == K"isa" ||
+        k == K"≔"   ||
+        k == K"⩴"   ||
+        k == K":"   ||
+        k == K".."  ||
+        k == K"$"   ||
+        k == K"::"  ||
+        k == K"where" ||
+        k == K"."   ||
+        k == K"!"   ||
+        k == K".'"  ||
+        k == K"->"  ||
+        K"BEGIN_UNICODE_OPS" <= k <= K"END_UNICODE_OPS"
+    )
+end
+
+const _unicode_ops = let
+    ks = _nondot_symbolic_operator_kinds()
+    ss = string.(ks)
+
+    ops = Dict{Char, Kind}([first(s)=>k for (k,s) in zip(ks,ss)
+                            if length(s) == 1 && !isascii(s[1])])
+    for ck in _ops_with_unicode_aliases
+        push!(ops, ck)
+    end
+    ops
+end
+
+#-------------------------------------------------------------------------------
+# Tokens
+
+struct RawToken
+    kind::Kind
+    # Offsets into a string or buffer
+    startbyte::Int # The byte where the token start in the buffer
+    endbyte::Int # The byte where the token ended in the buffer
+    suffix::Bool
+end
+function RawToken(kind::Kind, startbyte::Int, endbyte::Int)
+    RawToken(kind, startbyte, endbyte, false)
+end
+RawToken() = RawToken(K"error", 0, 0, false)
+
+const EMPTY_TOKEN = RawToken()
+
+kind(t::RawToken) = t.kind
+
+startbyte(t::RawToken) = t.startbyte
+endbyte(t::RawToken) = t.endbyte
+
+
+function untokenize(t::RawToken, str::String)
+    String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)])
+end
+
+function Base.show(io::IO, t::RawToken)
+    print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " "))
+    print(io, rpad(kind(t), 15, " "))
+end
+
+#-------------------------------------------------------------------------------
+# Lexer
+
+@inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F')
+@inline isbinary(c::Char) = c == '0' || c == '1'
+@inline isoctal(c::Char) =  '0' ≤ c ≤ '7'
+@inline iswhitespace(c::Char) = (isvalid(c) && Base.isspace(c)) || c === '\ufeff'
+
+struct StringState
+    triplestr::Bool
+    raw::Bool
+    delim::Char
+    paren_depth::Int
+end
+
+"""
+`Lexer` reads from an input stream and emits a single token each time
+`next_token` is called.
+
+Ideally a lexer is stateless but some state is needed here for:
+* Disambiguating cases like x' (adjoint) vs 'x' (character literal)
+* Tokenizing code within string interpolations
+"""
+mutable struct Lexer{IO_t <: IO}
+    io::IO_t
+
+    token_startpos::Int
+
+    last_token::Kind
+    string_states::Vector{StringState}
+    chars::Tuple{Char,Char,Char,Char}
+    charspos::Tuple{Int,Int,Int,Int}
+end
+
+function Lexer(io::IO)
+    c1 = ' '
+    p1 = position(io)
+    if eof(io)
+        c2, p2 = EOF_CHAR, p1
+        c3, p3 = EOF_CHAR, p1
+        c4, p4 = EOF_CHAR, p1
+    else
+        c2 = read(io, Char)
+        p2 = position(io)
+        if eof(io)
+            c3, p3 = EOF_CHAR, p2
+            c4, p4 = EOF_CHAR, p2
+        else
+            c3 = read(io, Char)
+            p3 = position(io)
+            if eof(io)
+                c4, p4 = EOF_CHAR, p3
+            else
+                c4 = read(io, Char)
+                p4 = position(io)
+            end
+        end
+    end
+    Lexer(io, position(io),
+                  K"error", Vector{StringState}(),
+                  (c1,c2,c3,c4), (p1,p2,p3,p4))
+end
+Lexer(str::AbstractString) = Lexer(IOBuffer(str))
+
+"""
+    tokenize(x)
+
+Returns an `Iterable` containing the tokenized input. Can be reverted by e.g.
+`join(untokenize.(tokenize(x)))`.
+"""
+tokenize(x) = Lexer(x)
+
+# Iterator interface
+Base.IteratorSize(::Type{<:Lexer}) = Base.SizeUnknown()
+Base.IteratorEltype(::Type{<:Lexer}) = Base.HasEltype()
+Base.eltype(::Type{<:Lexer}) = RawToken
+
+
+function Base.iterate(l::Lexer)
+    l.token_startpos = position(l)
+
+    t = next_token(l)
+    return t, t.kind == K"EndMarker"
+end
+
+function Base.iterate(l::Lexer, isdone::Any)
+    isdone && return nothing
+    t = next_token(l)
+    return t, t.kind == K"EndMarker"
+end
+
+function Base.show(io::IO, l::Lexer)
+    print(io, typeof(l), " at position: ", position(l))
+end
+
+"""
+    startpos(l::Lexer)
+
+Return the latest `RawToken`'s starting position.
+"""
+startpos(l::Lexer) = l.token_startpos
+
+"""
+    startpos!(l::Lexer, i::Integer)
+
+Set a new starting position.
+"""
+startpos!(l::Lexer, i::Integer) = l.token_startpos = i
+
+"""
+    peekchar(l::Lexer)
+
+Returns the next character without changing the lexer's state.
+"""
+peekchar(l::Lexer) = l.chars[2]
+
+"""
+dpeekchar(l::Lexer)
+
+Returns the next two characters without changing the lexer's state.
+"""
+dpeekchar(l::Lexer) = l.chars[2], l.chars[3]
+
+"""
+peekchar3(l::Lexer)
+
+Returns the next three characters without changing the lexer's state.
+"""
+peekchar3(l::Lexer) = l.chars[2], l.chars[3], l.chars[4]
+
+"""
+    position(l::Lexer)
+
+Returns the current position.
+"""
+Base.position(l::Lexer) = l.charspos[1]
+
+"""
+    eof(l::Lexer)
+
+Determine whether the end of the lexer's underlying buffer has been reached.
+"""
+Base.eof(l::Lexer) = eof(l.io)
+
+Base.seek(l::Lexer, pos) = seek(l.io, pos)
+
+"""
+    start_token!(l::Lexer)
+
+Updates the lexer's state such that the next  `RawToken` will start at the current
+position.
+"""
+function start_token!(l::Lexer)
+    l.token_startpos = l.charspos[1]
+end
+
+"""
+    readchar(l::Lexer)
+
+Returns the next character and increments the current position.
+"""
+function readchar(l::Lexer)
+    c = readchar(l.io)
+    l.chars = (l.chars[2], l.chars[3], l.chars[4], c)
+    l.charspos = (l.charspos[2], l.charspos[3], l.charspos[4], position(l.io))
+    return l.chars[1]
+end
+
+"""
+    accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String})
+
+Consumes the next character `c` if either `f::Function(c)` returns true, `c == f`
+for `c::Char` or `c in f` otherwise. Returns `true` if a character has been
+consumed and `false` otherwise.
+"""
+@inline function accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String})
+    c = peekchar(l)
+    if isa(f, Function)
+        ok = f(c)
+    elseif isa(f, Char)
+        ok = c == f
+    else
+        ok = c in f
+    end
+    ok && readchar(l)
+    return ok
+end
+
+"""
+    accept_batch(l::Lexer, f)
+
+Consumes all following characters until `accept(l, f)` is `false`.
+"""
+@inline function accept_batch(l::Lexer, f)
+    ok = false
+    while accept(l, f)
+        ok = true
+    end
+    return ok
+end
+
+"""
+    emit(l::Lexer, kind::Kind)
+
+Returns a `RawToken` of kind `kind` with contents `str` and starts a new `RawToken`.
+"""
+function emit(l::Lexer, kind::Kind, maybe_op=true)
+    suffix = false
+    if optakessuffix(kind) && maybe_op
+        while isopsuffix(peekchar(l))
+            readchar(l)
+            suffix = true
+        end
+    end
+
+    tok = RawToken(kind, startpos(l), position(l) - 1, suffix)
+
+    l.last_token = kind
+    return tok
+end
+
+"""
+    next_token(l::Lexer)
+
+Returns the next `RawToken`.
+"""
+function next_token(l::Lexer, start = true)
+    start && start_token!(l)
+    if !isempty(l.string_states)
+        lex_string_chunk(l)
+    else
+        _next_token(l, readchar(l))
+    end
+end
+
+function _next_token(l::Lexer, c)
+    if c == EOF_CHAR
+        return emit(l, K"EndMarker")
+    elseif iswhitespace(c)
+        return lex_whitespace(l, c)
+    elseif c == '['
+        return emit(l, K"[")
+    elseif c == ']'
+        return emit(l, K"]")
+    elseif c == '{'
+        return emit(l, K"{")
+    elseif c == ';'
+        return emit(l, K";")
+    elseif c == '}'
+        return emit(l, K"}")
+    elseif c == '('
+        return emit(l, K"(")
+    elseif c == ')'
+        return emit(l, K")")
+    elseif c == ','
+        return emit(l, K",")
+    elseif c == '*'
+        return lex_star(l);
+    elseif c == '^'
+        return lex_circumflex(l);
+    elseif c == '@'
+        return emit(l, K"@")
+    elseif c == '?'
+        return emit(l, K"?")
+    elseif c == '$'
+        return lex_dollar(l);
+    elseif c == '⊻'
+        return lex_xor(l);
+    elseif c == '~'
+        return emit(l, K"~")
+    elseif c == '#'
+        return lex_comment(l)
+    elseif c == '='
+        return lex_equal(l)
+    elseif c == '!'
+        return lex_exclaim(l)
+    elseif c == '>'
+        return lex_greater(l)
+    elseif c == '<'
+        return lex_less(l)
+    elseif c == ':'
+        return lex_colon(l)
+    elseif c == '|'
+        return lex_bar(l)
+    elseif c == '&'
+        return lex_amper(l)
+    elseif c == '\''
+        return lex_prime(l)
+    elseif c == '÷'
+        return lex_division(l)
+    elseif c == '"'
+        return lex_quote(l);
+    elseif c == '%'
+        return lex_percent(l);
+    elseif c == '/'
+        return lex_forwardslash(l);
+    elseif c == '\\'
+        return lex_backslash(l);
+    elseif c == '.'
+        return lex_dot(l);
+    elseif c == '+'
+        return lex_plus(l);
+    elseif c == '-'
+        return lex_minus(l);
+    elseif c == '−' # \minus '−' treated as hyphen '-'
+        return emit(l, accept(l, '=') ? K"op=" : K"-")
+    elseif c == '`'
+        return lex_backtick(l);
+    elseif is_identifier_start_char(c)
+        return lex_identifier(l, c)
+    elseif isdigit(c)
+        return lex_digit(l, K"Integer")
+    elseif (k = get(_unicode_ops, c, K"None")) != K"None"
+        return emit(l, k)
+    else
+        emit(l,
+            !isvalid(c)           ? K"ErrorInvalidUTF8"   :
+            is_invisible_char(c)  ? K"ErrorInvisibleChar" :
+            is_identifier_char(c) ? K"ErrorIdentifierStart" :
+            K"ErrorUnknownCharacter")
+    end
+end
+
+# UAX #9: Unicode Bidirectional Algorithm
+# https://unicode.org/reports/tr9/
+# Very partial implementation - just enough to check correct nesting in strings
+# and multiline comments.
+function update_bidi_state((embedding_nesting, isolate_nesting), c)
+    if c == '\n'
+        embedding_nesting = 0
+        isolate_nesting = 0
+    elseif c == '\U202A' || c == '\U202B' || c == '\U202D' || c == '\U202E' # LRE RLE LRO RLO
+        embedding_nesting += 1
+    elseif c == '\U202C' # PDF
+        embedding_nesting -= 1
+    elseif c == '\U2066' || c == '\U2067' || c == '\U2068' # LRI RLI FSI
+        isolate_nesting += 1
+    elseif c == '\U2069' # PDI
+        isolate_nesting -= 1
+    end
+    return (embedding_nesting, isolate_nesting)
+end
+
+# We're inside a string; possibly reading the string characters, or maybe in
+# Julia code within an interpolation.
+function lex_string_chunk(l)
+    state = last(l.string_states)
+    if state.paren_depth > 0
+        # Read normal Julia code inside an interpolation but track nesting of
+        # parentheses.
+        # TODO: This stateful tracking should probably, somehow, be done by the
+        # parser instead? Especially for recovery of unbalanced parens inside
+        # interpolations?
+        c = readchar(l)
+        if c == '('
+            l.string_states[end] = StringState(state.triplestr, state.raw, state.delim,
+                                               state.paren_depth + 1)
+            return emit(l, K"(")
+        elseif c == ')'
+            l.string_states[end] = StringState(state.triplestr, state.raw, state.delim,
+                                               state.paren_depth - 1)
+            return emit(l, K")")
+        else
+            return _next_token(l, c)
+        end
+    end
+    pc = peekchar(l)
+    if l.last_token == K"$"
+        pc = peekchar(l)
+        # Interpolated symbol or expression
+        if pc == '('
+            readchar(l)
+            l.string_states[end] = StringState(state.triplestr, state.raw, state.delim,
+                                               state.paren_depth + 1)
+            return emit(l, K"(")
+        elseif is_identifier_start_char(pc)
+            return lex_identifier(l, readchar(l))
+        else
+            # Getting here is a syntax error - fall through to reading string
+            # characters and let the parser deal with it.
+        end
+    elseif l.last_token == K"Identifier" &&
+            !(pc == EOF_CHAR || is_operator_start_char(pc) || is_never_id_char(pc))
+        # Only allow certain characters after interpolated vars
+        # https://github.com/JuliaLang/julia/pull/25234
+        readchar(l)
+        return emit(l, K"ErrorInvalidInterpolationTerminator")
+    end
+    if pc == EOF_CHAR
+        return emit(l, K"EndMarker")
+    elseif !state.raw && pc == '$'
+        # Start interpolation
+        readchar(l)
+        return emit(l, K"$")
+    elseif !state.raw && pc == '\\' && (pc2 = dpeekchar(l)[2];
+                                        pc2 == '\r' || pc2 == '\n')
+        # Process escaped newline as whitespace
+        readchar(l)
+        readchar(l)
+        if pc2 == '\r' && peekchar(l) == '\n'
+            readchar(l)
+        end
+        while (pc = peekchar(l); pc == ' ' || pc == '\t')
+            readchar(l)
+        end
+        return emit(l, K"Whitespace")
+    elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr)
+        if state.delim == '\'' && l.last_token == K"'" && dpeekchar(l)[2] == '\''
+            # Handle '''
+            readchar(l)
+            return emit(l, K"Char")
+        end
+        # Terminate string
+        pop!(l.string_states)
+        readchar(l)
+        if state.triplestr
+            readchar(l); readchar(l)
+            return emit(l, state.delim == '"' ?
+                        K"\"\"\"" : K"```")
+        else
+            return emit(l, state.delim == '"' ? K"\"" :
+                           state.delim == '`' ? K"`"  : K"'", false)
+        end
+    end
+    # Read a chunk of string characters
+    init_bidi_state = (0,0)
+    bidi_state = init_bidi_state
+    valid = true
+    if state.raw
+        # Raw strings treat all characters as literals with the exception that
+        # the closing quotes can be escaped with an odd number of \ characters.
+        while true
+            pc = peekchar(l)
+            if string_terminates(l, state.delim, state.triplestr) || pc == EOF_CHAR
+                break
+            elseif state.triplestr && (pc == '\n' || pc == '\r')
+                # triple quoted newline splitting
+                readchar(l)
+                if pc == '\n'
+                    bidi_state = init_bidi_state
+                elseif pc == '\r' && peekchar(l) == '\n'
+                    bidi_state = init_bidi_state
+                    readchar(l)
+                end
+                break
+            end
+            c = readchar(l)
+            if c == '\\'
+                n = 1
+                while peekchar(l) == '\\'
+                    readchar(l)
+                    n += 1
+                end
+                if peekchar(l) == state.delim && !iseven(n)
+                    readchar(l)
+                end
+            end
+            bidi_state = update_bidi_state(bidi_state, c)
+            valid &= isvalid(c)
+        end
+    else
+        while true
+            pc = peekchar(l)
+            if pc == '$' || pc == EOF_CHAR
+                break
+            elseif state.triplestr && (pc == '\n' || pc == '\r')
+                # triple quoted newline splitting
+                readchar(l)
+                if pc == '\n'
+                    bidi_state = init_bidi_state
+                elseif pc == '\r' && peekchar(l) == '\n'
+                    readchar(l)
+                    bidi_state = init_bidi_state
+                end
+                break
+            elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr)
+                break
+            elseif pc == '\\'
+                # Escaped newline
+                _, pc2, pc3 = peekchar3(l)
+                if pc2 == '\r' || pc2 == '\n'
+                    if pc2 == '\n' || pc3 == '\n'
+                        bidi_state = init_bidi_state
+                    end
+                    break
+                end
+            end
+            c = readchar(l)
+            if c == '\\'
+                c = readchar(l)
+                c == EOF_CHAR && break
+            end
+            bidi_state = update_bidi_state(bidi_state, c)
+            valid &= isvalid(c)
+        end
+    end
+    outk = !valid                        ? K"ErrorInvalidUTF8"    :
+           state.delim == '\''           ? K"Char"                :
+           bidi_state != init_bidi_state ? K"ErrorBidiFormatting" :
+           state.delim == '"'            ? K"String"              :
+           state.delim == '`'            ? K"CmdString"           :
+           (@assert(state.delim in KSet"' \" `"); K"error")
+    return emit(l, outk)
+end
+
+# Lex whitespace, a whitespace char `c` has been consumed
+function lex_whitespace(l::Lexer, c)
+    k = K"Whitespace"
+    while true
+        if c == '\n'
+            k = K"NewlineWs"
+        end
+        pc, ppc = dpeekchar(l)
+        # stop on non whitespace and limit to a single newline in a token
+        if !iswhitespace(pc) ||
+                (k == K"NewlineWs" && (pc == '\n' || (pc == '\r' && ppc == '\n')))
+            break
+        end
+        c = readchar(l)
+    end
+    return emit(l, k)
+end
+
+function lex_comment(l::Lexer)
+    if peekchar(l) != '='
+        valid = true
+        while true
+            pc, ppc = dpeekchar(l)
+            if pc == '\n' || (pc == '\r' && ppc == '\n') || pc == EOF_CHAR
+                return emit(l, valid ? K"Comment" : K"ErrorInvalidUTF8")
+            end
+            valid &= isvalid(pc)
+            readchar(l)
+        end
+    else
+        c = readchar(l) # consume the '='
+        init_bidi_state = (0,0)
+        bidi_state = init_bidi_state
+        skip = true  # true => c was part of the prev comment marker pair
+        nesting = 1
+        valid = true
+        while true
+            if c == EOF_CHAR
+                return emit(l, K"ErrorEofMultiComment")
+            end
+            nc = readchar(l)
+            bidi_state = update_bidi_state(bidi_state, nc)
+            valid &= isvalid(nc)
+            if skip
+                skip = false
+            else
+                if c == '#' && nc == '='
+                    nesting += 1
+                    skip = true
+                elseif c == '=' && nc == '#'
+                    nesting -= 1
+                    skip = true
+                    if nesting == 0
+                        outk = !valid ? K"ErrorInvalidUTF8" :
+                               bidi_state != init_bidi_state ? K"ErrorBidiFormatting" :
+                               K"Comment"
+                        return emit(l, outk)
+                    end
+                end
+            end
+            c = nc
+        end
+    end
+end
+
+# Lex a greater char, a '>' has been consumed
+function lex_greater(l::Lexer)
+    if accept(l, '>')
+        if accept(l, '>')
+            if accept(l, '=')
+                return emit(l, K"op=")
+            else # >>>?, ? not a =
+                return emit(l, K">>>")
+            end
+        elseif accept(l, '=')
+            return emit(l, K"op=")
+        else
+            return emit(l, K">>")
+        end
+    elseif accept(l, '=')
+        return emit(l, K">=")
+    elseif accept(l, ':')
+        return emit(l, K">:")
+    else
+        return emit(l, K">")
+    end
+end
+
+# Lex a less char, a '<' has been consumed
+function lex_less(l::Lexer)
+    if accept(l, '<')
+        if accept(l, '=')
+            return emit(l, K"op=")
+        else # '<<?', ? not =, ' '
+            return emit(l, K"<<")
+        end
+    elseif accept(l, '=')
+        return emit(l, K"<=")
+    elseif accept(l, ':')
+        return emit(l, K"<:")
+    elseif accept(l, '|')
+        return emit(l, K"<|")
+    elseif dpeekchar(l) == ('-', '-')
+        readchar(l); readchar(l)
+        if accept(l, '-')
+            return emit(l, K"ErrorInvalidOperator")
+        else
+            if accept(l, '>')
+                return emit(l, K"<-->")
+            elseif accept(l, '-')
+                return emit(l, K"ErrorInvalidOperator")
+            else
+                return emit(l, K"<--")
+            end
+        end
+    else
+        return emit(l, K"<")
+    end
+end
+
+# Lex all tokens that start with an = character.
+# An '=' char has been consumed
+function lex_equal(l::Lexer)
+    if accept(l, '=')
+        if accept(l, '=')
+            emit(l, K"===")
+        else
+            emit(l, K"==")
+        end
+    elseif accept(l, '>')
+        emit(l, K"=>")
+    else
+        emit(l, K"=")
+    end
+end
+
+# Lex a colon, a ':' has been consumed
+function lex_colon(l::Lexer)
+    if accept(l, ':')
+        return emit(l, K"::")
+    elseif accept(l, '=')
+        return emit(l, K":=")
+    else
+        return emit(l, K":")
+    end
+end
+
+function lex_exclaim(l::Lexer)
+    if accept(l, '=')
+        if accept(l, '=')
+            return emit(l, K"!==")
+        else
+            return emit(l, K"!=")
+        end
+    else
+        return emit(l, K"!")
+    end
+end
+
+function lex_percent(l::Lexer)
+    if accept(l, '=')
+        return emit(l, K"op=")
+    else
+        return emit(l, K"%")
+    end
+end
+
+function lex_bar(l::Lexer)
+    if accept(l, '=')
+        return emit(l, K"op=")
+    elseif accept(l, '>')
+        return emit(l, K"|>")
+    elseif accept(l, '|')
+        return emit(l, K"||")
+    else
+        emit(l, K"|")
+    end
+end
+
+function lex_plus(l::Lexer)
+    if accept(l, '+')
+        return emit(l, K"++")
+    elseif accept(l, '=')
+        return emit(l, K"op=")
+    end
+    return emit(l, K"+")
+end
+
+function lex_minus(l::Lexer)
+    if accept(l, '-')
+        if accept(l, '>')
+            return emit(l, K"-->")
+        else
+            return emit(l, K"ErrorInvalidOperator") # "--" is an invalid operator
+        end
+    elseif l.last_token != K"." && accept(l, '>')
+        return emit(l, K"->")
+    elseif accept(l, '=')
+        return emit(l, K"op=")
+    end
+    return emit(l, K"-")
+end
+
+function lex_star(l::Lexer)
+    if accept(l, '*')
+        return emit(l, K"Error**") # "**" is an invalid operator use ^
+    elseif accept(l, '=')
+        return emit(l, K"op=")
+    end
+    return emit(l, K"*")
+end
+
+function lex_circumflex(l::Lexer)
+    if accept(l, '=')
+        return emit(l, K"op=")
+    end
+    return emit(l, K"^")
+end
+
+function lex_division(l::Lexer)
+    if accept(l, '=')
+        return emit(l, K"op=")
+    end
+    return emit(l, K"÷")
+end
+
+function lex_dollar(l::Lexer)
+    if accept(l, '=')
+        return emit(l, K"op=")
+    end
+    return emit(l, K"$")
+end
+
+function lex_xor(l::Lexer)
+    if accept(l, '=')
+        return emit(l, K"op=")
+    end
+    return emit(l, K"⊻")
+end
+
+function accept_number(l::Lexer, f::F) where {F}
+    lexed_number = false
+    while true
+        pc, ppc = dpeekchar(l)
+        if pc == '_' && !f(ppc)
+            return lexed_number
+        elseif f(pc) || pc == '_'
+            readchar(l)
+        else
+            return lexed_number
+        end
+        lexed_number = true
+    end
+end
+
+# A digit has been consumed
+function lex_digit(l::Lexer, kind)
+    accept_number(l, isdigit)
+    pc,ppc = dpeekchar(l)
+    if pc == '.'
+        if ppc == '.'
+            # Number followed by K".." or K"..."
+            return emit(l, kind)
+        elseif kind === K"Float"
+            # If we enter the function with kind == K"Float" then a '.' has been parsed.
+            readchar(l)
+            return emit(l, K"ErrorInvalidNumericConstant")
+        elseif is_dottable_operator_start_char(ppc)
+            readchar(l)
+            return emit(l, K"ErrorAmbiguousNumericConstant") # `1.+`
+        end
+        readchar(l)
+
+        kind = K"Float"
+        accept(l, '_') && return emit(l, K"ErrorInvalidNumericConstant") # `1._`
+        had_fraction_digs = accept_number(l, isdigit)
+        pc, ppc = dpeekchar(l)
+        if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−')
+            kind = pc == 'f' ? K"Float32" : K"Float"
+            readchar(l)
+            accept(l, "+-−")
+            if accept_batch(l, isdigit)
+                pc,ppc = dpeekchar(l)
+                if pc === '.' && !is_dottable_operator_start_char(ppc)
+                    readchar(l)
+                    return emit(l, K"ErrorInvalidNumericConstant") # `1.e1.`
+                end
+            else
+                return emit(l, K"ErrorInvalidNumericConstant") # `1.e`
+            end
+        elseif pc == '.' && ppc != '.' && !is_dottable_operator_start_char(ppc)
+            readchar(l)
+            return emit(l, K"ErrorInvalidNumericConstant") # `1.1.`
+        elseif !had_fraction_digs && (is_identifier_start_char(pc) ||
+                                      pc == '(' || pc == '[' || pc == '{' ||
+                                      pc == '@' || pc == '`' || pc == '"')
+            return emit(l, K"ErrorAmbiguousNumericDotMultiply") # `1.(` `1.x`
+        end
+    elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−')
+        kind = pc == 'f' ? K"Float32" : K"Float"
+        readchar(l)
+        accept(l, "+-−")
+        if accept_batch(l, isdigit)
+            pc,ppc = dpeekchar(l)
+            if pc === '.' && !is_dottable_operator_start_char(ppc)
+                accept(l, '.')
+                return emit(l, K"ErrorInvalidNumericConstant") # `1e1.`
+            end
+        else
+            return emit(l, K"ErrorInvalidNumericConstant") # `1e+`
+        end
+    elseif position(l) - startpos(l) == 1 && l.chars[1] == '0'
+        kind == K"Integer"
+        is_bin_oct_hex_int = false
+        if pc == 'x'
+            kind = K"HexInt"
+            isfloat = false
+            readchar(l)
+            had_digits = accept_number(l, ishex)
+            pc,ppc = dpeekchar(l)
+            if pc == '.' && ppc != '.'
+                readchar(l)
+                had_digits |= accept_number(l, ishex)
+                isfloat = true
+            end
+            if accept(l, "pP")
+                kind = K"Float"
+                accept(l, "+-−")
+                if !accept_number(l, isdigit) || !had_digits
+                    return emit(l, K"ErrorInvalidNumericConstant") # `0x1p` `0x.p0`
+                end
+                # Check for invalid trailing decimal point
+                # https://github.com/JuliaLang/julia/issues/60189
+                pc = peekchar(l)
+                if pc == '.'
+                    accept_batch(l, c->(c == '.' || isdigit(c)))
+                    # `0x1p3.` `0x1p3.2` `0x1.5p2.3`
+                    return emit(l, K"ErrorInvalidNumericConstant")
+                end
+            elseif isfloat
+                return emit(l, K"ErrorHexFloatMustContainP") # `0x.` `0x1.0`
+            end
+            is_bin_oct_hex_int = !isfloat
+        elseif pc == 'b'
+            readchar(l)
+            had_digits = accept_number(l, isbinary)
+            kind = K"BinInt"
+            is_bin_oct_hex_int = true
+        elseif pc == 'o'
+            readchar(l)
+            had_digits = accept_number(l, isoctal)
+            kind = K"OctInt"
+            is_bin_oct_hex_int = true
+        end
+        if is_bin_oct_hex_int
+            pc = peekchar(l)
+            if !had_digits || isdigit(pc) || is_identifier_start_char(pc)
+                accept_batch(l, c->isdigit(c) || is_identifier_start_char(c))
+                # `0x` `0xg` `0x_` `0x-`
+                # `0b123` `0o78p` `0xenomorph` `0xaα`
+                return emit(l, K"ErrorInvalidNumericConstant")
+            end
+        end
+    end
+    return emit(l, kind)
+end
+
+function lex_prime(l)
+    if l.last_token == K"Identifier"         ||
+         is_contextual_keyword(l.last_token) ||
+         is_word_operator(l.last_token)      ||
+         l.last_token == K"."                ||
+         l.last_token ==  K")"               ||
+         l.last_token ==  K"]"               ||
+         l.last_token ==  K"}"               ||
+         l.last_token == K"'"                ||
+         l.last_token == K"end"              ||
+         is_literal(l.last_token)
+        # FIXME ^ This doesn't cover all cases - probably needs involvement
+        # from the parser state.
+        return emit(l, K"'")
+    else
+        push!(l.string_states, StringState(false, true, '\'', 0))
+        return emit(l, K"'", false)
+    end
+end
+
+function lex_amper(l::Lexer)
+    if accept(l, '&')
+        return emit(l, K"&&")
+    elseif accept(l, '=')
+        return emit(l, K"op=")
+    else
+        return emit(l, K"&")
+    end
+end
+
+# Parse a token starting with a quote.
+# A '"' has been consumed
+function lex_quote(l::Lexer)
+    raw = l.last_token == K"Identifier" ||
+          is_contextual_keyword(l.last_token) ||
+          is_word_operator(l.last_token)
+    pc, dpc = dpeekchar(l)
+    triplestr = pc == '"' && dpc == '"'
+    push!(l.string_states, StringState(triplestr, raw, '"', 0))
+    if triplestr
+        readchar(l)
+        readchar(l)
+        emit(l, K"\"\"\"")
+    else
+        emit(l, K"\"")
+    end
+end
+
+function string_terminates(l, delim::Char, triplestr::Bool)
+    if triplestr
+        c1, c2, c3 = peekchar3(l)
+        c1 === delim && c2 === delim && c3 === delim
+    else
+        peekchar(l) === delim
+    end
+end
+
+# Parse a token starting with a forward slash.
+# A '/' has been consumed
+function lex_forwardslash(l::Lexer)
+    if accept(l, '/')
+        if accept(l, '=')
+            return emit(l, K"op=")
+        else
+            return emit(l, K"//")
+        end
+    elseif accept(l, '=')
+        return emit(l, K"op=")
+    else
+        return emit(l, K"/")
+    end
+end
+
+function lex_backslash(l::Lexer)
+    if accept(l, '=')
+        return emit(l, K"op=")
+    end
+    return emit(l, K"\\")
+end
+
+function lex_dot(l::Lexer)
+    if accept(l, '.')
+        if accept(l, '.')
+            l.last_token == K"@" && return emit(l, K"Identifier")
+            return emit(l, K"...")
+        else
+            if is_dottable_operator_start_char(peekchar(l))
+                readchar(l)
+                return emit(l, K"ErrorInvalidOperator")
+            else
+                l.last_token == K"@" && return emit(l, K"Identifier")
+                return emit(l, K"..")
+            end
+        end
+    elseif Base.isdigit(peekchar(l))
+        return lex_digit(l, K"Float")
+    else
+        l.last_token == K"@" && return emit(l, K"Identifier")
+        return emit(l, K".")
+    end
+end
+
+# A ` has been consumed
+function lex_backtick(l::Lexer)
+    pc, dpc = dpeekchar(l)
+    triplestr = pc == '`' && dpc == '`'
+    # Backticks always contain raw strings only. See discussion on bug
+    # https://github.com/JuliaLang/julia/issues/3150
+    raw = true
+    push!(l.string_states, StringState(triplestr, raw, '`', 0))
+    if triplestr
+        readchar(l)
+        readchar(l)
+        emit(l, K"```")
+    else
+        emit(l, K"`")
+    end
+end
+
+const MAX_KW_LENGTH = 10
+const ascii_is_identifier_char = Bool[is_identifier_char(Char(b)) for b=0x00:0x7f]
+function lex_identifier(l::Lexer, c)
+    h = simple_hash(c, UInt64(0))
+    n = 1
+    ascii = isascii(c)
+    graphemestate = Ref(Int32(ascii)) # all ASCII id chars are UTF8PROC_BOUNDCLASS_OTHER
+    graphemestate_peek = Ref(zero(Int32))
+    while true
+        pc, ppc = dpeekchar(l)
+        ascii = ascii && isascii(pc)
+        if ascii # fast path
+            pc_byte = pc % UInt8
+            @inbounds if (pc_byte == UInt8('!') && ppc == '=') || !ascii_is_identifier_char[pc_byte+1]
+                break
+            end
+        elseif @callsite_inline Unicode.isgraphemebreak!(graphemestate, c, pc)
+            if (pc == '!' && ppc == '=') || !is_identifier_char(pc)
+                break
+            end
+        elseif pc in ('\u200c','\u200d') # ZWNJ/ZWJ control characters
+            # ZWJ/ZWNJ only within grapheme sequences, not at end
+            graphemestate_peek[] = graphemestate[]
+            if @callsite_inline Unicode.isgraphemebreak!(graphemestate_peek, pc, ppc)
+                break
+            end
+        end
+        c = readchar(l)
+        h = simple_hash(c, h)
+        n += 1
+    end
+
+    if n > MAX_KW_LENGTH
+        emit(l, K"Identifier")
+    elseif h == _true_hash || h == _false_hash
+        emit(l, K"Bool")
+    else
+        emit(l, get(_kw_hash, h, K"Identifier"))
+    end
+end
+
+# This creates a hash for chars in [A-z] using 6 bit per char.
+# Requires an additional input-length check somewhere, because
+# this only works up to ~10 chars.
+@inline function simple_hash(c::Char, h::UInt64)
+    bytehash = (clamp(c - 'A' + 1, -1, 60) % UInt8) & 0x3f
+    h << 6 + bytehash
+end
+
+function simple_hash(str)
+    ind = 1
+    h = UInt64(0)
+    L = min(lastindex(str), MAX_KW_LENGTH)
+    while ind <= L
+        h = simple_hash(str[ind], h)
+        ind = nextind(str, ind)
+    end
+    h
+end
+
+kws = [
+K"baremodule",
+K"begin",
+K"break",
+K"catch",
+K"const",
+K"continue",
+K"do",
+K"else",
+K"elseif",
+K"end",
+K"export",
+K"finally",
+K"for",
+K"function",
+K"global",
+K"if",
+K"import",
+K"let",
+K"local",
+K"macro",
+K"module",
+K"public",
+K"quote",
+K"return",
+K"struct",
+K"try",
+K"using",
+K"while",
+K"in",
+K"isa",
+K"where",
+
+K"abstract",
+K"as",
+K"doc",
+K"mutable",
+K"outer",
+K"primitive",
+K"type",
+K"var",
+K"VERSION"
+]
+
+const _true_hash = simple_hash("true")
+const _false_hash = simple_hash("false")
+const _kw_hash = Dict(simple_hash(string(kw)) => kw for kw in kws)
+
+end # module
diff --git a/JuliaSyntax/src/porcelain/green_node.jl b/JuliaSyntax/src/porcelain/green_node.jl
new file mode 100644
index 0000000000000..7838ff733c0bf
--- /dev/null
+++ b/JuliaSyntax/src/porcelain/green_node.jl
@@ -0,0 +1,176 @@
+"""
+    struct GreenNode
+
+An explicit pointer-y representation of the green tree produced by the parser.
+See [`RawGreenNode`](@ref) for documentation on working with the implicit green
+tree directly. However, this representation is useful for introspection as it
+provides O(1) access to the children (as well as forward iteration).
+"""
+struct GreenNode{Head}
+    head::Head
+    span::UInt32
+    children::Union{Nothing,Vector{GreenNode{Head}}}
+end
+
+function GreenNode(head::Head, span::Integer, children=nothing) where {Head}
+    GreenNode{Head}(head, span, children)
+end
+
+# Accessors / predicates
+is_leaf(node::GreenNode)     = isnothing(node.children)
+children(node::GreenNode)    = node.children
+numchildren(node::GreenNode) = isnothing(node.children) ? 0 : length(node.children)
+head(node::GreenNode)        = node.head
+
+"""
+    span(node)
+
+Get the number of bytes this node covers in the source text.
+"""
+span(node::GreenNode) = node.span
+
+Base.getindex(node::GreenNode, i::Int) = children(node)[i]
+Base.getindex(node::GreenNode, rng::UnitRange) = view(children(node), rng)
+Base.firstindex(node::GreenNode) = 1
+Base.lastindex(node::GreenNode) = children(node) === nothing ? 0 : length(children(node))
+
+"""
+Get absolute position and span of the child of `node` at the given tree `path`.
+"""
+function child_position_span(node::GreenNode, path::Int...)
+    n = node
+    p = 1
+    for index in path
+        cs = children(n)
+        for i = 1:index-1
+            p += span(cs[i])
+        end
+        n = cs[index]
+    end
+    return n, p, n.span
+end
+
+function highlight(io::IO, source::SourceFile, node::GreenNode, path::Int...; kws...)
+    _, p, span = child_position_span(node, path...)
+    q = p + span - 1
+    highlight(io, source, p:q; kws...)
+end
+
+Base.summary(node::GreenNode) = summary(node.head)
+
+function Base.hash(node::GreenNode, h::UInt)
+    children = node.children
+    if children === nothing
+        h = hash(nothing, h)
+    else # optimization - avoid extra allocations from `hash(::AbstractVector, ::UInt)`
+        for child in children
+            h = hash(child, h)
+        end
+    end
+    hash(node.head, hash(node.span, h))
+end
+function Base.:(==)(n1::GreenNode, n2::GreenNode)
+    n1.head == n2.head && n1.span == n2.span && n1.children == n2.children
+end
+
+# Pretty printing
+function _show_green_node(io, node, indent, pos, str, show_trivia)
+    if !show_trivia && is_trivia(node)
+        return
+    end
+    posstr = "$(lpad(pos, 6)):$(rpad(pos+span(node)-1, 6)) │"
+    leaf = is_leaf(node)
+    if leaf
+        line = string(posstr, indent, summary(node))
+    else
+        line = string(posstr, indent, '[', summary(node), ']')
+    end
+    if !is_trivia(node) && leaf
+        line = rpad(line, 40) * "✔"
+    end
+    if is_error(node)
+        line = rpad(line, 41) * "✘"
+    end
+    if leaf && !isnothing(str)
+        line = string(rpad(line, 43), ' ', repr(str[pos:prevind(str, pos + span(node))]))
+    end
+    line = line*"\n"
+    if is_error(node)
+        printstyled(io, line, color=:light_red)
+    else
+        print(io, line)
+    end
+    if !leaf
+        new_indent = indent*"  "
+        p = pos
+        for x in children(node)
+            _show_green_node(io, x, new_indent, p, str, show_trivia)
+            p += x.span
+        end
+    end
+end
+
+function Base.show(io::IO, ::MIME"text/plain", node::GreenNode)
+    _show_green_node(io, node, "", 1, nothing, true)
+end
+
+function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::AbstractString; show_trivia=true)
+    _show_green_node(io, node, "", 1, str, show_trivia)
+end
+
+function _show_green_node_sexpr(io, node::GreenNode, position)
+    if is_leaf(node)
+        print(io, position, "-", position+node.span-1, "::", untokenize(head(node); unique=false))
+    else
+        print(io, "(", untokenize(head(node); unique=false))
+        p = position
+        for n in children(node)
+            print(io, ' ')
+            _show_green_node_sexpr(io, n, p)
+            p += n.span
+        end
+        print(io, ')')
+    end
+end
+
+function Base.show(io::IO, node::GreenNode)
+    _show_green_node_sexpr(io, node, 1)
+end
+
+function GreenNode(cursor::GreenTreeCursor)
+    chead = head(cursor)
+    T = typeof(chead)
+    if is_leaf(cursor)
+        return GreenNode{T}(head(cursor), span(cursor), nothing)
+    else
+        children = GreenNode{T}[]
+        for child in reverse(cursor)
+            pushfirst!(children, GreenNode(child))
+        end
+        return GreenNode{T}(head(cursor), span(cursor), children)
+    end
+end
+
+function build_tree(::Type{GreenNode}, stream::ParseStream;
+                    # unused, but required since `_parse` is written generic
+                    filename=nothing, first_line=1, keep_parens=false)
+    cursor = GreenTreeCursor(stream)
+    if has_toplevel_siblings(cursor)
+        # There are multiple toplevel nodes, e.g. because we're using this
+        # to test a partial parse. Wrap everything in K"wrapper"
+        all_processed = 0
+        local cs
+        for child in reverse_toplevel_siblings(cursor)
+            c = GreenNode(child)
+            if !@isdefined(cs)
+                cs = GreenNode{SyntaxHead}[c]
+            else
+                pushfirst!(cs, c)
+            end
+        end
+        @assert length(cs) != 1
+        return GreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG), stream.next_byte-1, cs)
+    else
+        return GreenNode(cursor)
+    end
+end
diff --git a/JuliaSyntax/src/porcelain/syntax_tree.jl b/JuliaSyntax/src/porcelain/syntax_tree.jl
new file mode 100644
index 0000000000000..ad08b25e6a1df
--- /dev/null
+++ b/JuliaSyntax/src/porcelain/syntax_tree.jl
@@ -0,0 +1,346 @@
+#-------------------------------------------------------------------------------
+# AST interface, built on top of raw tree
+
+abstract type AbstractSyntaxData end
+
+mutable struct TreeNode{NodeData}   # ? prevent others from using this with NodeData <: AbstractSyntaxData?
+    parent::Union{Nothing,TreeNode{NodeData}}
+    children::Union{Nothing,Vector{TreeNode{NodeData}}}
+    data::Union{Nothing,NodeData}
+
+    # Use this constructor rather than the automatically generated one to pass
+    # Test.detect_unbound_args() test in Base.
+    function TreeNode{NodeData}(parent::Union{Nothing,TreeNode{NodeData}},
+                                children::Union{Nothing,Vector{TreeNode{NodeData}}},
+                                data::Union{Nothing,NodeData}) where {NodeData}
+        new{NodeData}(parent, children, data)
+    end
+end
+
+# Exclude parent from hash and equality checks. This means that subtrees can compare equal.
+function Base.hash(node::TreeNode, h::UInt)
+    h = hash(node.data, h)
+    children = node.children
+    if children === nothing
+        return hash(nothing, h)
+    else # optimization - avoid extra allocations from `hash(::AbstractVector, ::UInt)`
+        for child in children
+            h = hash(child, h)
+        end
+        return h
+    end
+end
+function Base.:(==)(a::TreeNode{T}, b::TreeNode{T}) where T
+    a.children == b.children && a.data == b.data
+end
+
+# Implement "pass-through" semantics for field access: access fields of `data`
+# as if they were part of `TreeNode`
+function Base.getproperty(node::TreeNode, name::Symbol)
+    name === :parent && return getfield(node, :parent)
+    name === :children && return getfield(node, :children)
+    d = getfield(node, :data)
+    name === :data && return d
+    return getproperty(d, name)
+end
+
+function Base.setproperty!(node::TreeNode, name::Symbol, x)
+    name === :parent && return setfield!(node, :parent, x)
+    name === :children && return setfield!(node, :children, x)
+    name === :data && return setfield!(node, :data, x)
+    d = getfield(node, :data)
+    return setfield!(d, name, x)
+end
+
+const AbstractSyntaxNode = TreeNode{<:AbstractSyntaxData}
+
+struct SyntaxData <: AbstractSyntaxData
+    source::SourceFile
+    raw::GreenNode{SyntaxHead}
+    byte_end::UInt32
+    val::Any
+end
+function Base.getproperty(data::SyntaxData, name::Symbol)
+    if name === :position
+        # Previous versions of JuliaSyntax had `position::Int`.
+        # Allow access for compatibility. It was renamed (with changed) semantics
+        # to `byte_end::UInt32` to match the rest of the code base, which identified
+        # nodes, by their last byte.
+        return Int(getfield(data, :byte_end) - getfield(data, :raw).span + UInt32(1))
+    end
+    return getfield(data, name)
+end
+
+Base.hash(data::SyntaxData, h::UInt) =
+    hash(data.source, hash(data.raw, hash(data.byte_end,
+        # Avoid dynamic dispatch:
+        # This does not support custom `hash` implementation that may be defined for `typeof(data.val)`,
+        # However, such custom user types should not generally appear in the AST.
+        Core.invoke(hash, Tuple{Any,UInt}, data.val, h))))
+function Base.:(==)(a::SyntaxData, b::SyntaxData)
+    a.source == b.source && a.raw == b.raw && a.byte_end == b.byte_end && a.val === b.val
+end
+
+"""
+    SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead};
+               keep_parens=false, position::Integer=1)
+
+An AST node with a similar layout to `Expr`. Typically constructed from source
+text by calling one of the parser API functions such as [`parseall`](@ref)
+"""
+const SyntaxNode = TreeNode{SyntaxData}
+
+function SyntaxNode(source::SourceFile, cursor::RedTreeCursor;
+                    keep_parens=false)
+    # Build the full GreenNode tree once upfront (including trivia)
+    green = GreenNode(cursor.green)
+
+    GC.@preserve source begin
+        raw_offset, txtbuf = _unsafe_wrap_substring(source.code)
+        offset = raw_offset - source.byte_offset
+        _to_SyntaxNode(source, txtbuf, offset, cursor, green, keep_parens)
+    end
+end
+
+function SyntaxNode(source::SourceFile, cursor::RedTreeCursor, green::GreenNode{SyntaxHead};
+                    keep_parens=false)
+    GC.@preserve source begin
+        raw_offset, txtbuf = _unsafe_wrap_substring(source.code)
+        offset = raw_offset - source.byte_offset
+        _to_SyntaxNode(source, txtbuf, offset, cursor, green, keep_parens)
+    end
+end
+
+should_include_node(child) = !is_trivia(child) || is_error(child)
+
+function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int,
+                        cursor::RedTreeCursor, green::GreenNode{SyntaxHead}, keep_parens::Bool)
+    if is_leaf(cursor)
+        # Here we parse the values eagerly rather than representing them as
+        # strings. Maybe this is good. Maybe not.
+        valrange = byte_range(cursor)
+        val = parse_julia_literal(txtbuf, head(cursor), valrange .+ offset)
+        return SyntaxNode(nothing, nothing, SyntaxData(source, green, cursor.byte_end, val))
+    else
+        cs = SyntaxNode[]
+        green_children = children(green)
+
+        # We need to match up the filtered SyntaxNode children with the unfiltered GreenNode children
+        # Both cursor and green children need to be traversed in the same order
+        # Since cursor iterates in reverse, we need to match from the end of green_children
+        green_idx = green_children === nothing ? 0 : length(green_children)
+
+        for (i, child_cursor) in enumerate(reverse(cursor))
+            if should_include_node(child_cursor)
+                pushfirst!(cs, _to_SyntaxNode(source, txtbuf, offset, child_cursor, green[end-i+1], keep_parens))
+            end
+        end
+
+        if !keep_parens && kind(cursor) == K"parens" && length(cs) == 1
+            return cs[1]
+        end
+        node = SyntaxNode(nothing, cs, SyntaxData(source, green, cursor.byte_end, nothing))
+        for c in cs
+            c.parent = node
+        end
+        return node
+    end
+end
+
+"""
+    is_leaf(node)
+
+Determine whether the node is a leaf of the tree. In our trees a "leaf"
+corresponds to a single token in the source text.
+"""
+is_leaf(node::TreeNode) = node.children === nothing
+
+"""
+    children(node)
+
+Return an iterable list of children for the node. For leaves, return `nothing`.
+"""
+children(node::TreeNode) = node.children
+
+"""
+    numchildren(node)
+
+Return `length(children(node))` but possibly computed in a more efficient way.
+"""
+numchildren(node::TreeNode) = (isnothing(node.children) ? 0 : length(node.children))
+
+Base.getindex(node::AbstractSyntaxNode, i::Int) = children(node)[i]
+Base.getindex(node::AbstractSyntaxNode, rng::UnitRange) = view(children(node), rng)
+Base.firstindex(node::AbstractSyntaxNode) = 1
+Base.length(node::AbstractSyntaxNode) = length(children(node))
+Base.lastindex(node::AbstractSyntaxNode) = length(node)
+
+function Base.setindex!(node::SN, x::SN, i::Int) where {SN<:AbstractSyntaxNode}
+    children(node)[i] = x
+end
+
+"""
+    head(x)
+
+Get the [`SyntaxHead`](@ref) of a node of a tree or other syntax-related data
+structure.
+"""
+head(node::AbstractSyntaxNode) = head(node.raw)
+
+span(node::AbstractSyntaxNode) = node.raw.span
+
+byte_range(node::AbstractSyntaxNode) = (node.byte_end - span(node) + 1):node.byte_end
+
+first_byte(node::AbstractSyntaxNode) = first(byte_range(node))
+last_byte(node::AbstractSyntaxNode) = last(byte_range(node))
+
+sourcefile(node::AbstractSyntaxNode) = node.source
+
+function leaf_string(ex)
+    if !is_leaf(ex)
+        throw(ArgumentError("leaf_string should be used for leaf nodes only"))
+    end
+    k = kind(ex)
+    value = ex.val
+    # TODO: Dispatch on kind extension module (??)
+    return k == K"Placeholder" ? "□"*string(value) :
+           is_identifier(k)    ? string(value)     :
+           value isa Symbol    ? string(value)     : # see parse_julia_literal for other cases which go here
+           repr(value)
+end
+
+function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode,
+                           indent, show_location, show_kind)
+    line, col = source_location(node)
+    if show_location
+        fname = filename(node)
+        # Add filename if it's changed from the previous node
+        if fname != current_filename[]
+            println(io, indent, " -file- │ ", repr(fname))
+            current_filename[] = fname
+        end
+        posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│"
+    else
+        posstr = ""
+    end
+    val = node.val
+    nodestr = is_leaf(node) ? leaf_string(node) : "[$(untokenize(head(node)))]"
+    treestr = string(indent, nodestr)
+    if show_kind && is_leaf(node)
+        treestr = rpad(treestr, 40)*" :: "*string(kind(node))
+    end
+    println(io, posstr, treestr)
+    if !is_leaf(node)
+        new_indent = indent*"  "
+        for n in children(node)
+            _show_syntax_node(io, current_filename, n, new_indent, show_location, show_kind)
+        end
+    end
+end
+
+function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode, show_kind)
+    if is_leaf(node)
+        if is_error(node)
+            print(io, "(", untokenize(head(node)), ")")
+        else
+            str = leaf_string(node)
+            k = kind(node)
+            if is_identifier(k) && !show_kind
+                str = lower_identifier_name(str, k)
+            end
+            print(io, str)
+            if show_kind
+                print(io, "::", kind(node))
+            end
+        end
+    else
+        print(io, "(", untokenize(head(node)))
+        for n in children(node)
+            print(io, ' ')
+            _show_syntax_node_sexpr(io, n, show_kind)
+        end
+        print(io, ')')
+    end
+end
+
+function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode; show_location=false, show_kind=true)
+    println(io, "SyntaxNode:")
+    if show_location
+        println(io, "line:col│ byte_range  │ tree")
+    end
+    _show_syntax_node(io, Ref(""), node, "", show_location, show_kind)
+end
+
+function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode; show_kind=false)
+    _show_syntax_node_sexpr(io, node, show_kind)
+end
+
+function Base.show(io::IO, node::AbstractSyntaxNode)
+    _show_syntax_node_sexpr(io, node, false)
+end
+
+function Base.push!(node::SN, child::SN) where SN<:AbstractSyntaxNode
+    if is_leaf(node)
+        error("Cannot add children")
+    end
+    args = children(node)
+    push!(args, child)
+end
+
+function Base.copy(node::TreeNode)
+    # copy the container but not the data (ie, deep copy the tree, shallow copy the data). copy(::Expr) is similar
+    # copy "un-parents" the top-level `node` that you're copying
+    newnode = typeof(node)(nothing, is_leaf(node) ? nothing : typeof(node)[], copy(node.data))
+    if !is_leaf(node)
+        for child in children(node)
+            newchild = copy(child)
+            newchild.parent = newnode
+            push!(newnode, newchild)
+        end
+    end
+    return newnode
+end
+
+# shallow-copy the data
+Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.byte_end, data.val)
+
+function build_tree(::Type{SyntaxNode}, stream::ParseStream;
+                    filename=nothing, first_line=1, keep_parens=false)
+    source = SourceFile(stream, filename=filename, first_line=first_line)
+    cursor = RedTreeCursor(stream)
+    if has_toplevel_siblings(cursor)
+        # There are multiple toplevel nodes, e.g. because we're using this
+        # to test a partial parse. Wrap everything in K"wrapper"
+
+        # First build the full green tree for all children (including trivia)
+        green_children = GreenNode{SyntaxHead}[]
+        for child in reverse_toplevel_siblings(cursor)
+            pushfirst!(green_children, GreenNode(child.green))
+        end
+
+        # Create a wrapper GreenNode with children
+        green = GreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG),
+                                  stream.next_byte-1, green_children)
+
+        # Now build SyntaxNodes, iterating through cursors and green nodes together
+        cs = SyntaxNode[]
+        for (i, child) in enumerate(reverse_toplevel_siblings(cursor))
+            if should_include_node(child)
+                pushfirst!(cs, SyntaxNode(source, child, green[end-i+1], keep_parens=keep_parens))
+            end
+        end
+
+        length(cs) == 1 && return only(cs)
+
+        node = SyntaxNode(nothing, cs, SyntaxData(source, green,
+                                                   stream.next_byte-1, nothing))
+        for c in cs
+            c.parent = node
+        end
+        return node
+    else
+        return SyntaxNode(source, cursor, keep_parens=keep_parens)
+    end
+end
+
+@deprecate haschildren(x) !is_leaf(x) false
diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl
new file mode 100644
index 0000000000000..9fb71c74d7132
--- /dev/null
+++ b/JuliaSyntax/src/precompile.jl
@@ -0,0 +1,12 @@
+# Just parse some file as a precompile workload
+let filename = joinpath(@__DIR__, "julia/literal_parsing.jl")
+    text = read(filename, String)
+    parseall(Expr, text)
+    parseall(SyntaxNode, text)
+    if _has_v1_6_hooks
+        enable_in_core!()
+        Meta.parse("1 + 2")
+        Meta.parse(SubString("1 + 2"))
+        enable_in_core!(false)
+    end
+end
diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl
new file mode 100644
index 0000000000000..c21c251eb688a
--- /dev/null
+++ b/JuliaSyntax/src/utils.jl
@@ -0,0 +1,144 @@
+# Compatibility hacks for older Julia versions
+if VERSION < v"1.1"
+    isnothing(x) = x === nothing
+end
+if VERSION < v"1.4"
+    function only(x::Union{AbstractVector,AbstractString})
+        if length(x) != 1
+            error("Collection must contain exactly 1 element")
+        end
+        return x[1]
+    end
+end
+if VERSION < v"1.5"
+    import Base.peek
+end
+
+@static if VERSION < v"1.8"
+    macro callsite_inline(call)
+        esc(call)
+    end
+else
+    const var"@callsite_inline" = var"@inline"
+end
+
+
+_unsafe_wrap_substring(s) = (s.offset, unsafe_wrap(Vector{UInt8}, s.string))
+
+#--------------------------------------------------
+#
+# Internal error, used as assertion failure for cases we expect can't happen.
+@noinline function internal_error(strs::Vararg{String, N}) where {N}
+    error("Internal error: ", strs...)
+end
+
+# Like @assert, but always enabled and calls internal_error()
+macro check(ex, msgs...)
+    msg = isempty(msgs) ? ex : msgs[1]
+    if isa(msg, AbstractString)
+        msg = String(msg)
+    elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol))
+        msg = :(string($(esc(msg))))
+    else
+        msg = string(msg)
+    end
+    return :($(esc(ex)) ? nothing : internal_error($msg))
+end
+
+# Really remove line numbers, even from Expr(:toplevel)
+remove_linenums!(ex) = ex
+function remove_linenums!(ex::Expr)
+    if ex.head === :block || ex.head === :quote || ex.head === :toplevel
+        filter!(ex.args) do x
+            !(isa(x, Expr) && x.head === :line || isa(x, LineNumberNode))
+        end
+    end
+    for subex in ex.args
+        subex isa Expr && remove_linenums!(subex)
+    end
+    return ex
+end
+
+# String macro to get the UInt8 code of an ascii character
+macro u8_str(str)
+    c = str == "\\" ? '\\' : only(unescape_string(str))
+    isascii(c) || error("Non-ascii character in u8_str")
+    codepoint(c) % UInt8
+end
+
+#-------------------------------------------------------------------------------
+# Text printing/display utils
+
+const _fg_color_codes = Dict(
+    :black         => 30,
+    :red           => 31,
+    :green         => 32,
+    :yellow        => 33,
+    :blue          => 34,
+    :magenta       => 35,
+    :cyan          => 36,
+    :white         => 37,
+    :light_black   => 90, # gray
+    :light_red     => 91,
+    :light_green   => 92,
+    :light_yellow  => 93,
+    :light_blue    => 94,
+    :light_magenta => 95,
+    :light_cyan    => 96,
+    :light_white   => 97,
+)
+
+"""
+    _printstyled(io::IO, text;
+                 fgcolor=nothing, bgcolor=nothing, href=nothing)
+
+Like Base.printstyled, but allows providing RGB colors for true color
+terminals, both foreground and background colors, and hyperlinks. Colors may be
+given as one of the standard color names as in `Base.printstyled`, an integer
+for 256 color terms, or an (r,g,b) triple with `0 <= r <= 255` etc for true
+color terminals.
+
+* `fgcolor` - set foreground color
+* `bgcolor` - set background color
+* `href`    - set hyperlink reference
+"""
+function _printstyled(io::IO, text; fgcolor=nothing, bgcolor=nothing, href=nothing)
+    if (isnothing(fgcolor) && isnothing(bgcolor) && isnothing(href)) || !get(io, :color, false)
+        print(io, text)
+        return
+    end
+    colcode = ""
+    if !isnothing(fgcolor)
+        if fgcolor isa Symbol && haskey(_fg_color_codes, fgcolor)
+            colcode *= "\e[$(_fg_color_codes[fgcolor])m"
+        elseif fgcolor isa Integer && 0 <= fgcolor <= 255
+            colcode *= "\e[38;5;$(fgcolor)m"
+        elseif fgcolor isa Tuple && length(fgcolor) == 3 && all(0 .<= fgcolor .<= 255)
+            colcode *= "\e[38;2;$(fgcolor[1]);$(fgcolor[2]);$(fgcolor[3])m"
+        else
+            error("Invalid ansi color $fgcolor")
+        end
+    end
+    if !isnothing(bgcolor)
+        if bgcolor isa Symbol && haskey(_fg_color_codes, bgcolor)
+            colcode *= "\e[$(10 + _fg_color_codes[bgcolor])m"
+        elseif bgcolor isa Integer && 0 <= bgcolor <= 255
+            colcode *= "\e[48;5;$(bgcolor)m"
+        elseif bgcolor isa Tuple && length(bgcolor) == 3 && all(0 .<= bgcolor .<= 255)
+            colcode *= "\e[48;2;$(bgcolor[1]);$(bgcolor[2]);$(bgcolor[3])m"
+        else
+            error("Invalid ansi color $bgcolor")
+        end
+    end
+    colreset = "\e[0;0m"
+    first = true
+    for linepart in split(text, '\n')
+        first || print(io, '\n')
+        line = string(colcode, linepart, colreset)
+        if !isnothing(href)
+            line = "\e]8;;$href\e\\$line\e]8;;\e\\"
+        end
+        print(io, line)
+        first = false
+    end
+end
diff --git a/JuliaSyntax/sysimage/.gitignore b/JuliaSyntax/sysimage/.gitignore
new file mode 100644
index 0000000000000..344cdc510a371
--- /dev/null
+++ b/JuliaSyntax/sysimage/.gitignore
@@ -0,0 +1,3 @@
+JuliaSyntax
+Project.toml
+Manifest.toml
diff --git a/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml b/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml
new file mode 100644
index 0000000000000..7e31a0dd5bc26
--- /dev/null
+++ b/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml
@@ -0,0 +1,10 @@
+name = "JuliaSyntaxCore"
+uuid = "05e5f68f-ccd0-4d84-a81a-f557a333a331"
+authors = ["Chris Foster <chris42f@gmail.com> and contributors"]
+version = "0.1.0"
+
+[compat]
+julia = "1.6"
+
+[deps]
+JuliaSyntax = "54354a4c-6cac-4c00-8566-e7c1beb8bfd8"
diff --git a/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl b/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl
new file mode 100644
index 0000000000000..d1804b89053b7
--- /dev/null
+++ b/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl
@@ -0,0 +1,12 @@
+module JuliaSyntaxCore
+
+# A tiny module to hold initialization code for JuliaSyntax.jl integration with
+# the runtime.
+
+using JuliaSyntax
+
+function __init__()
+    JuliaSyntax.enable_in_core!()
+end
+
+end
diff --git a/JuliaSyntax/sysimage/compile.jl b/JuliaSyntax/sysimage/compile.jl
new file mode 100755
index 0000000000000..390901eb56cd1
--- /dev/null
+++ b/JuliaSyntax/sysimage/compile.jl
@@ -0,0 +1,47 @@
+#!/bin/bash
+#=
+[[ $1 == +* ]] && juliaup_arg=$1 && shift # release channel for juliaup
+exec julia ${juliaup_arg} --startup-file=no -e 'include(popfirst!(ARGS))' "$0" "$@"
+=#
+
+imgs_base_path = joinpath(first(DEPOT_PATH), "sysimages", "v$VERSION")
+mkpath(imgs_base_path)
+
+using Libdl
+
+cd(@__DIR__)
+
+# Create a copy of JuliaSyntax so we can change the project UUID.
+# This allows us to use an older version of JuliaSyntax for developing
+# JuliaSyntax itself.
+rm("JuliaSyntax", force=true, recursive=true)
+mkdir("JuliaSyntax")
+cp("../src", "JuliaSyntax/src")
+cp("../test", "JuliaSyntax/test")
+projstr = replace(read("../Project.toml", String),
+    "70703baa-626e-46a2-a12c-08ffd08c73b4"=>"54354a4c-6cac-4c00-8566-e7c1beb8bfd8")
+write("JuliaSyntax/Project.toml", projstr)
+
+using Pkg
+rm("Project.toml", force=true)
+rm("Manifest.toml", force=true)
+Pkg.activate(".")
+Pkg.develop(path="./JuliaSyntax")
+Pkg.develop(path="./JuliaSyntaxCore")
+Pkg.add("PackageCompiler")
+
+image_path = joinpath(imgs_base_path, "juliasyntax_sysimage."*Libdl.dlext)
+
+using PackageCompiler
+PackageCompiler.create_sysimage(
+    ["JuliaSyntaxCore"],
+    project=".",
+    sysimage_path=image_path,
+    precompile_execution_file="precompile_exec.jl",
+    incremental=true,
+)
+
+@info """## System image compiled!
+
+      Use it with `julia -J "$image_path"`
+      """
diff --git a/JuliaSyntax/sysimage/precompile.jl b/JuliaSyntax/sysimage/precompile.jl
new file mode 100644
index 0000000000000..a1ae9555f33a2
--- /dev/null
+++ b/JuliaSyntax/sysimage/precompile.jl
@@ -0,0 +1,6 @@
+function precompile_JuliaSyntax(mod, juliasyntax_path)
+    Base.include(mod, joinpath(juliasyntax_path, "test", "test_utils.jl"))
+    Base.include(mod, joinpath(juliasyntax_path, "test", "parser.jl"))
+    JuliaSyntax.enable_in_core!()
+    Meta.parse("x+y+z-w .+ [a b c]")
+end
diff --git a/JuliaSyntax/sysimage/precompile_exec.jl b/JuliaSyntax/sysimage/precompile_exec.jl
new file mode 100644
index 0000000000000..99c8069341275
--- /dev/null
+++ b/JuliaSyntax/sysimage/precompile_exec.jl
@@ -0,0 +1,3 @@
+import JuliaSyntax
+include("precompile.jl")
+precompile_JuliaSyntax(@__MODULE__(), pkgdir(JuliaSyntax))
diff --git a/JuliaSyntax/test/benchmark.jl b/JuliaSyntax/test/benchmark.jl
new file mode 100644
index 0000000000000..0a4e260e485e9
--- /dev/null
+++ b/JuliaSyntax/test/benchmark.jl
@@ -0,0 +1,35 @@
+using BenchmarkTools
+using JuliaSyntax
+
+include("test_utils.jl")
+
+function concat_base()
+    basedir = joinpath(Sys.BINDIR, "..", "share", "julia", "base")
+    io = IOBuffer()
+    for f in find_source_in_path(basedir)
+        write(io, read(f, String))
+        println(io)
+    end
+    return String(take!(io))
+end
+
+all_base_code = concat_base()
+
+b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:all)
+b_GreenNode   = @benchmark JuliaSyntax.parseall(JuliaSyntax.GreenNode, all_base_code, ignore_warnings=true)
+b_SyntaxNode  = @benchmark JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, all_base_code, ignore_warnings=true)
+b_Expr        = @benchmark JuliaSyntax.parseall(Expr, all_base_code, ignore_warnings=true)
+b_flisp       = @benchmark JuliaSyntax.fl_parseall(all_base_code)
+
+@info "Benchmarks" ParseStream=b_ParseStream GreenNode=b_GreenNode SyntaxNode=b_SyntaxNode Expr=b_Expr flisp=b_flisp
+
+
+# Allocation profiling
+#
+# using Profile.Allocs
+# using PProf
+# Allocs.clear()
+# stream = JuliaSyntax.ParseStream(text);
+# JuliaSyntax.peek(stream);
+# Allocs.@profile sample_rate=1 JuliaSyntax.parsestmt(stream)
+# PProf.Allocs.pprof()
diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl
new file mode 100644
index 0000000000000..151aad919c0ed
--- /dev/null
+++ b/JuliaSyntax/test/diagnostics.jl
@@ -0,0 +1,251 @@
+function diagnostic(str; only_first=false, allow_multiple=false, rule=:all, version=v"1.6")
+    stream = ParseStream(str; version=version)
+    parse!(stream, rule=rule)
+    if allow_multiple
+        stream.diagnostics
+    else
+        if !only_first
+            @test length(stream.diagnostics) == 1
+        end
+        return isempty(stream.diagnostics) ? nothing : stream.diagnostics[1]
+    end
+end
+
+@testset "token errors" begin
+    @test diagnostic("a\xf5b") == Diagnostic(2, 2, :error, "invalid UTF-8 sequence \"\\xf5\"")
+    @test diagnostic("# a\xf5b") == Diagnostic(1, 5, :error, "invalid UTF-8 sequence \"# a\\xf5b\"")
+    for c in ['\u00ad', '\u200b', '\u200c', '\u200d',
+              '\u200e', '\u200f', '\u2060', '\u2061']
+        @test diagnostic("a$(c)b") ==
+            Diagnostic(2, 1+sizeof(string(c)), :error, "invisible character $(repr(c))")
+    end
+    @test diagnostic("₁") == Diagnostic(1, 3, :error, "identifier cannot begin with character '₁'")
+    @test diagnostic(":⥻") == Diagnostic(2, 4, :error, "unknown unicode character '⥻'")
+
+    @test diagnostic("\"X \u202a X\"") == Diagnostic(2, 8, :error, "unbalanced bidirectional unicode formatting \"X \\u202a X\"")
+    @test diagnostic("#= \u202a =#") == Diagnostic(1, 9, :error, "unbalanced bidirectional unicode formatting \"#= \\u202a =#\"")
+    @test diagnostic("\"X \u202a \$xx\u202c\"", allow_multiple=true) == [
+        Diagnostic(2, 7, :error, "unbalanced bidirectional unicode formatting \"X \\u202a \"")
+        Diagnostic(11, 13, :error, "unbalanced bidirectional unicode formatting \"\\u202c\"")
+    ]
+
+    @test diagnostic("0x") == Diagnostic(1, 2, :error, "invalid numeric constant")
+    @test diagnostic("0x0.1") == Diagnostic(1, 5, :error, "hex float literal must contain `p` or `P`")
+end
+
+@testset "parser errors" begin
+    @test diagnostic("+ #==# (a,b)") ==
+        Diagnostic(2, 7, :error, "whitespace not allowed between prefix function call and argument list")
+    @test diagnostic("1 -+ (a=1, b=2)") ==
+        Diagnostic(5, 5, :error, "whitespace not allowed between prefix function call and argument list")
+    @test diagnostic("\n+ (x, y)") ==
+        Diagnostic(3, 3, :error, "whitespace not allowed between prefix function call and argument list")
+
+    @test diagnostic("function (\$f) body end") ==
+        Diagnostic(10, 13, :error, "Ambiguous signature. Add a trailing comma if this is a 1-argument anonymous function; remove parentheses if this is a macro call acting as function signature.")
+
+    @test diagnostic("A.@B.x", only_first=true) ==
+        Diagnostic(3, 4, :error, "`@` must appear on first or last macro name component")
+    @test diagnostic("@M.(x)") ==
+        Diagnostic(1, 3, :error, "dot call syntax not supported for macros")
+
+    @test diagnostic("try x end") ==
+        Diagnostic(1, 9, :error, "try without catch or finally")
+    # TODO: better range
+    @test diagnostic("@A.\$x a") ==
+        Diagnostic(4, 5, :error, "invalid macro name")
+
+    @test diagnostic("a, , b") ==
+        Diagnostic(4, 4, :error, "unexpected `,`")
+    @test diagnostic(")", allow_multiple=true) == [
+        Diagnostic(1, 1, :error, "unexpected `)`")
+        Diagnostic(1, 1, :error, "extra tokens after end of expression")
+    ]
+
+    @test diagnostic("if\nfalse\nend") ==
+        Diagnostic(3, 3, :error, "missing condition in `if`")
+    @test diagnostic("if false\nelseif\nend") ==
+        Diagnostic(16, 16, :error, "missing condition in `elseif`")
+
+    @test diagnostic("f(x::V) where {V) = x", allow_multiple=true) == [
+        Diagnostic(17, 16, :error, "Expected `}` or `,`")
+        Diagnostic(17, 21, :error, "extra tokens after end of expression")
+    ]
+    @test diagnostic("[1)", allow_multiple=true) == [
+        Diagnostic(3, 2, :error, "Expected `]` or `,`")
+        Diagnostic(3, 3, :error, "extra tokens after end of expression")
+    ]
+    @test diagnostic("f(x, y #=hi=#\ng(z)") == Diagnostic(7, 6, :error, "Expected `)` or `,`")
+    @test diagnostic("(x, y \nz") == Diagnostic(6, 5, :error, "Expected `)` or `,`")
+    @test diagnostic("function f(x, y \nz end") == Diagnostic(16, 15, :error, "Expected `)` or `,`")
+
+    @test diagnostic("sin. (1)") ==
+        Diagnostic(5, 5, :error, "whitespace is not allowed here")
+    @test diagnostic("x [i]") ==
+        Diagnostic(2, 2, :error, "whitespace is not allowed here")
+    @test diagnostic("\nf() [i]") ==
+        Diagnostic(5, 5, :error, "whitespace is not allowed here")
+    @test diagnostic("\nf() (i)") ==
+        Diagnostic(5, 5, :error, "whitespace is not allowed here")
+    @test diagnostic("\nf() .i") ==
+        Diagnostic(5, 5, :error, "whitespace is not allowed here")
+    @test diagnostic("\nf() {i}") ==
+        Diagnostic(5, 5, :error, "whitespace is not allowed here")
+    @test diagnostic("\n@ m") ==
+        Diagnostic(3, 3, :error, "whitespace is not allowed here")
+    @test diagnostic("\nusing a .b") ==
+        Diagnostic(9, 9, :error, "whitespace is not allowed here")
+
+    @test diagnostic("const x") ==
+        Diagnostic(1, 7, :error, "expected assignment after `const`")
+    @test diagnostic("global const x") ==
+        Diagnostic(1, 14, :error, "expected assignment after `const`")
+
+    @test diagnostic("(for i=1; println())") ==
+        Diagnostic(20, 19, :error, "Expected `end`")
+    @test diagnostic("(try i=1; println())", allow_multiple=true) == [
+        Diagnostic(2, 19, :error, "try without catch or finally")
+        Diagnostic(20, 19, :error, "Expected `end`")
+    ]
+
+    @test diagnostic("\"\$(x,y)\"") ==
+        Diagnostic(3, 7, :error, "invalid interpolation syntax")
+
+    @test diagnostic("", rule=:statement) ==
+        Diagnostic(1, 0, :error, "premature end of input")
+    @test diagnostic("", rule=:atom) ==
+        Diagnostic(1, 0, :error, "premature end of input")
+end
+
+@testset "parser warnings" begin
+    @test diagnostic("@(A)", only_first=true) ==
+        Diagnostic(2, 4, :warning, "parenthesizing macro names is unnecessary")
+    @test diagnostic("try finally catch a ; b end") ==
+        Diagnostic(13, 23, :warning, "`catch` after `finally` will execute out of order")
+    @test diagnostic("import .  .A") ==
+        Diagnostic(9, 10, :warning, "space between dots in import path")
+    @test diagnostic("import A .==") ==
+        Diagnostic(9, 9, :warning, "space between dots in import path")
+    @test diagnostic("import A.:+") ==
+        Diagnostic(10, 10, :warning, "quoting with `:` is not required here")
+    # No warnings for imports of `:` and parenthesized `(..)`
+    @test diagnostic("import A.:, :", allow_multiple=true) == []
+    @test diagnostic("import A: (..)", allow_multiple=true) == []
+    @test diagnostic("import A.(:+)") ==
+        Diagnostic(10, 13, :warning, "parentheses are not required here")
+    @test diagnostic("export (x)") ==
+        Diagnostic(8, 10, :warning, "parentheses are not required here")
+    @test diagnostic("import :A") ==
+        Diagnostic(8, 9, :error, "expected identifier")
+    @test diagnostic("export :x") ==
+        Diagnostic(8, 9, :error, "expected identifier")
+    @test diagnostic("public = 4", version=v"1.11") ==
+        diagnostic("public[7] = 5", version=v"1.11") ==
+        diagnostic("public() = 6", version=v"1.11") ==
+        Diagnostic(1, 6, :warning, "using public as an identifier is deprecated")
+end
+
+@testset "diagnostics for literal parsing" begin
+    # Float overflow/underflow
+    @test diagnostic("x = 10.0e1000;") ==
+        Diagnostic(5, 13, :error, "overflow in floating point literal")
+    @test diagnostic("x = 10.0f1000;") ==
+        Diagnostic(5, 13, :error, "overflow in floating point literal")
+    @test diagnostic("x = 10.0e-1000;") ==
+        Diagnostic(5, 14, :warning, "underflow to zero in floating point literal")
+    @test diagnostic("x = 10.0f-1000;") ==
+        Diagnostic(5, 14, :warning, "underflow to zero in floating point literal")
+    # Underflow boundary
+    @test diagnostic("5e-324", allow_multiple=true) == []
+    @test diagnostic("2e-324") ==
+        Diagnostic(1, 6, :warning, "underflow to zero in floating point literal")
+
+    # Char
+    @test diagnostic("x = ''") ==
+        Diagnostic(6, 5, :error, "empty character literal")
+    @test diagnostic("x = 'abc'") ==
+        Diagnostic(6, 8, :error, "character literal contains multiple characters")
+    @test diagnostic("x = '\\xq'") ==
+        Diagnostic(6, 7, :error, "invalid hex escape sequence")
+    @test diagnostic("x = '\\uq'") ==
+        Diagnostic(6, 7, :error, "invalid unicode escape sequence")
+    @test diagnostic("x = '\\Uq'") ==
+        Diagnostic(6, 7, :error, "invalid unicode escape sequence")
+    @test diagnostic("x = '\\777'") ==
+        Diagnostic(6, 9, :error, "invalid octal escape sequence")
+    @test diagnostic("x = '\\k'") ==
+        Diagnostic(6, 7, :error, "invalid escape sequence")
+    @test diagnostic("'\\", allow_multiple=true) == [
+        Diagnostic(2, 2, :error, "invalid escape sequence"),
+        Diagnostic(3, 2, :error, "unterminated character literal")
+    ]
+    # Various cases from Base
+    @test diagnostic("'\\xff\\xff\\xff\\xff'") ==
+        Diagnostic(2, 17, :error, "character literal contains multiple characters")
+    @test diagnostic("'\\100\\42'") ==
+        Diagnostic(2, 8, :error, "character literal contains multiple characters")
+    @test diagnostic("'\\xff\\xff\\xff\\xff\\xff'") ==
+        Diagnostic(2, 21, :error, "character literal contains multiple characters")
+    @test diagnostic("'abcd'") ==
+        Diagnostic(2, 5, :error, "character literal contains multiple characters")
+    @test diagnostic("'\\uff\\xff'") ==
+        Diagnostic(2, 9, :error, "character literal contains multiple characters")
+    @test diagnostic("'\\xffa'") ==
+        Diagnostic(2, 6, :error, "character literal contains multiple characters")
+    @test diagnostic("'\\uffffa'") ==
+        Diagnostic(2, 8, :error, "character literal contains multiple characters")
+    @test diagnostic("'\\U00002014a'") ==
+        Diagnostic(2, 12, :error, "character literal contains multiple characters")
+    @test diagnostic("'\\1000'") ==
+        Diagnostic(2, 6, :error, "character literal contains multiple characters")
+
+    # String
+    @test diagnostic("x = \"abc\\xq\"") ==
+        Diagnostic(9, 10, :error, "invalid hex escape sequence")
+    @test diagnostic("x = \"abc\\uq\"") ==
+        Diagnostic(9, 10, :error, "invalid unicode escape sequence")
+    @test diagnostic("x = \"abc\\Uq\"") ==
+        Diagnostic(9, 10, :error, "invalid unicode escape sequence")
+    @test diagnostic("x = \"abc\\777\"") ==
+        Diagnostic(9, 12, :error, "invalid octal escape sequence")
+    @test diagnostic("x = \"abc\\k\"") ==
+        Diagnostic(9, 10, :error, "invalid escape sequence")
+    @test diagnostic("x = \"abc\\k \\k\"", allow_multiple=true) == [
+        Diagnostic(9, 10, :error, "invalid escape sequence"),
+        Diagnostic(12, 13, :error, "invalid escape sequence")
+    ]
+    @test diagnostic("\"\$x෴  \"") ==
+        Diagnostic(4, 6, :error, "interpolated variable ends with invalid character; use `\$(...)` instead")
+end
+
+@testset "diagnostic printing" begin
+    stream = JuliaSyntax.ParseStream("a -- b -- c")
+    JuliaSyntax.parse!(stream)
+    @test sprint(JuliaSyntax.show_diagnostics, stream) == """
+        # Error @ line 1:3
+        a -- b -- c
+        # └┘ ── invalid operator
+        # Error @ line 1:8
+        a -- b -- c
+        #      └┘ ── invalid operator"""
+
+    stream = JuliaSyntax.ParseStream("a -- b")
+    JuliaSyntax.parse!(stream)
+    fname = "test.jl"
+    sf = SourceFile(stream, filename=fname)
+    url = JuliaSyntax._file_url(fname)
+    @test sprint(JuliaSyntax.show_diagnostics, stream.diagnostics, sf,
+                 context=:color=>true) == """
+        \e[90m# Error @ \e[0;0m\e]8;;$url#1:3\e\\\e[90mtest.jl:1:3\e[0;0m\e]8;;\e\\
+        a \e[48;2;120;70;70m--\e[0;0m b
+        \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m"""
+
+    if Sys.isunix()
+        tempdirname = mktempdir()
+        cd(tempdirname) do
+            rm(tempdirname)
+            # Test _file_url doesn't fail with nonexistent directories
+            @test isnothing(JuliaSyntax._file_url(joinpath("__nonexistent__", "test.jl")))
+        end
+    end
+end
diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl
new file mode 100644
index 0000000000000..d7818b76bbfae
--- /dev/null
+++ b/JuliaSyntax/test/expr.jl
@@ -0,0 +1,851 @@
+@testset "Expr parsing with $method" for method in ["build_tree", "SyntaxNode conversion"]
+    parseatom, parsestmt, parseall =
+        if method == "build_tree"
+            ((s; kws...) -> JuliaSyntax.parseatom(Expr, s; kws...),
+             (s; kws...) -> JuliaSyntax.parsestmt(Expr, s; kws...),
+             (s; kws...) -> JuliaSyntax.parseall(Expr, s; kws...))
+        else
+            ((s; kws...) -> Expr(JuliaSyntax.parseatom(SyntaxNode, s; keep_parens=true, kws...)),
+             (s; kws...) -> Expr(JuliaSyntax.parsestmt(SyntaxNode, s; keep_parens=true, kws...)),
+             (s; kws...) -> Expr(JuliaSyntax.parseall(SyntaxNode, s; keep_parens=true, kws...)))
+        end
+
+    @testset "Quote nodes" begin
+        @test parseatom(":(a)") == QuoteNode(:a)
+        @test parseatom(":(:a)") == Expr(:quote, QuoteNode(:a))
+        @test parseatom(":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2))
+        # Compatibility hack for VERSION >= v"1.4"
+        # https://github.com/JuliaLang/julia/pull/34077
+        @test parseatom(":true") == Expr(:quote, true)
+    end
+
+    @testset "Line numbers" begin
+        @testset "Blocks" begin
+            @test parsestmt("begin a\nb\n\nc\nend") ==
+                Expr(:block,
+                     LineNumberNode(1),
+                     :a,
+                     LineNumberNode(2),
+                     :b,
+                     LineNumberNode(4),
+                     :c,
+                )
+            @test parsestmt("(a;b;c)") ==
+                Expr(:block,
+                     :a,
+                     LineNumberNode(1),
+                     :b,
+                     LineNumberNode(1),
+                     :c,
+                )
+            @test parsestmt("begin end") ==
+                Expr(:block,
+                     LineNumberNode(1)
+                )
+            @test parsestmt("(;;)") ==
+                Expr(:block)
+
+            @test parseall("a\n\nb") ==
+                Expr(:toplevel,
+                     LineNumberNode(1),
+                     :a,
+                     LineNumberNode(3),
+                     :b,
+                )
+            @test parsestmt("a;b") ==
+                Expr(:toplevel, :a, :b)
+
+            @test parsestmt("module A\n\nbody\nend"; version=v"1.13") ==
+                Expr(:module,
+                     true,
+                     :A,
+                     Expr(:block,
+                          LineNumberNode(1),
+                          LineNumberNode(3),
+                          :body,
+                     ),
+                )
+
+            @test parseall("a\n\nx") ==
+                Expr(:toplevel,
+                    LineNumberNode(1),
+                    :a,
+                    LineNumberNode(3),
+                    :x
+                )
+            @test parseall("a\n\nx;y") ==
+                Expr(:toplevel,
+                    LineNumberNode(1),
+                    :a,
+                    LineNumberNode(3),
+                    Expr(:toplevel, :x, :y)
+                )
+        end
+
+        @testset "Function definition lines" begin
+            @test parsestmt("function f()\na\n\nb\nend") ==
+                Expr(:function,
+                     Expr(:call, :f),
+                     Expr(:block,
+                         LineNumberNode(1),
+                         LineNumberNode(2),
+                         :a,
+                         LineNumberNode(4),
+                         :b,
+                     )
+                )
+            @test parsestmt("f() = 1") ==
+                Expr(:(=),
+                     Expr(:call, :f),
+                     Expr(:block,
+                          LineNumberNode(1),
+                          1
+                     )
+                )
+            @test parsestmt("macro f()\na\nend") ==
+                Expr(:macro,
+                     Expr(:call, :f),
+                     Expr(:block,
+                         LineNumberNode(1),
+                         LineNumberNode(2),
+                         :a,
+                     )
+                )
+
+            # function/macro without methods
+            @test parsestmt("function f end") ==
+                Expr(:function, :f)
+            @test parsestmt("macro f end") ==
+                Expr(:macro, :f)
+
+            # weird cases with extra parens
+            @test parsestmt("function (f() where T) end") ==
+                Expr(:function, Expr(:where, Expr(:call, :f), :T),
+                     Expr(:block, LineNumberNode(1), LineNumberNode(1)))
+            @test parsestmt("function (f()::S) end") ==
+                Expr(:function, Expr(:(::), Expr(:call, :f), :S),
+                     Expr(:block, LineNumberNode(1), LineNumberNode(1)))
+        end
+
+        @testset "->" begin
+            @test parsestmt("a -> b") ==
+                Expr(:->, :a, Expr(:block, LineNumberNode(1), :b))
+            @test parsestmt("(a,) -> b") ==
+                Expr(:->, Expr(:tuple, :a), Expr(:block, LineNumberNode(1), :b))
+            @test parsestmt("(a where T) -> b") ==
+                Expr(:->, Expr(:where, :a, :T), Expr(:block, LineNumberNode(1), :b))
+            # @test parsestmt("a -> (\nb;c)") ==
+            #     Expr(:->, :a, Expr(:block, LineNumberNode(1), :b))
+            @test parsestmt("a -> begin\nb\nc\nend") ==
+                Expr(:->, :a, Expr(:block,
+                                   LineNumberNode(1),
+                                   LineNumberNode(2), :b,
+                                   LineNumberNode(3), :c))
+            @test parsestmt("(a;b=1) -> c") ==
+                Expr(:->,
+                     Expr(:block, :a, LineNumberNode(1), Expr(:(=), :b, 1)),
+                     Expr(:block, LineNumberNode(1), :c))
+            @test parsestmt("(a...;b...) -> c") ==
+                Expr(:->,
+                     Expr(:tuple, Expr(:parameters, Expr(:(...), :b)), Expr(:(...), :a)),
+                     Expr(:block, LineNumberNode(1), :c))
+            @test parsestmt("(;) -> c") ==
+                Expr(:->,
+                     Expr(:tuple, Expr(:parameters)),
+                     Expr(:block, LineNumberNode(1), :c))
+            @test parsestmt("a::T -> b") ==
+                Expr(:->, Expr(:(::), :a, :T), Expr(:block, LineNumberNode(1), :b))
+        end
+
+        @testset "elseif" begin
+            @test parsestmt("if a\nb\nelseif c\n d\nend") ==
+                Expr(:if,
+                     :a,
+                     Expr(:block,
+                          LineNumberNode(2),
+                          :b),
+                     Expr(:elseif,
+                          Expr(:block,
+                               LineNumberNode(3),  # Line number for elseif condition
+                               :c),
+                          Expr(:block,
+                               LineNumberNode(4),
+                               :d),
+                     )
+                )
+        end
+
+        @testset "No line numbers in let bindings" begin
+            @test parsestmt("let i=is, j=js\nbody\nend") ==
+                Expr(:let,
+                     Expr(:block,
+                         Expr(:(=), :i, :is),
+                         Expr(:(=), :j, :js),
+                     ),
+                     Expr(:block,
+                         LineNumberNode(2),
+                         :body
+                     )
+                )
+        end
+
+        @testset "Loops" begin
+            @test parsestmt("for x=xs\n\nend") ==
+                Expr(:for,
+                     Expr(:(=), :x, :xs),
+                     Expr(:block,
+                          LineNumberNode(1),
+                          LineNumberNode(3)
+                     )
+                )
+            @test parsestmt("for x=xs\ny\nend") ==
+                Expr(:for,
+                     Expr(:(=), :x, :xs),
+                     Expr(:block,
+                          LineNumberNode(2),
+                          :y,
+                          LineNumberNode(3)
+                     )
+                )
+            @test parsestmt("while cond\n\nend") ==
+                Expr(:while,
+                     :cond,
+                     Expr(:block,
+                          LineNumberNode(1),
+                          LineNumberNode(3)
+                     )
+                )
+            @test parsestmt("while cond\ny\nend") ==
+                Expr(:while,
+                     :cond,
+                     Expr(:block,
+                          LineNumberNode(2),
+                          :y,
+                          LineNumberNode(3)
+                     )
+                )
+        end
+    end
+
+    @testset "Short form function line numbers" begin
+        # A block is added to hold the line number node
+        @test parsestmt("f() = xs") ==
+            Expr(:(=),
+                 Expr(:call, :f),
+                 Expr(:block,
+                      LineNumberNode(1),
+                      :xs))
+
+        @test parsestmt("f() =\n(a;b)") ==
+            Expr(:(=),
+                 Expr(:call, :f),
+                 Expr(:block,
+                      LineNumberNode(1),
+                      :a,
+                      LineNumberNode(2),
+                      :b))
+
+        @test parsestmt("f() =\nbegin\na\nb\nend") ==
+            Expr(:(=),
+                 Expr(:call, :f),
+                 Expr(:block,
+                      LineNumberNode(1),
+                      LineNumberNode(3),
+                      :a,
+                      LineNumberNode(4),
+                      :b))
+
+        @test parsestmt("let f(x) =\ng(x)=1\nend") ==
+            Expr(:let,
+                 Expr(:(=),
+                      Expr(:call, :f, :x),
+                      Expr(:block,
+                           LineNumberNode(1),
+                           Expr(:(=),
+                               Expr(:call, :g, :x),
+                               Expr(:block,
+                                    LineNumberNode(2),
+                                    1)))),
+                 Expr(:block,
+                      LineNumberNode(3)))
+
+        # short-form postfix function shouldn't introduce a block
+        @test parsestmt("x' = 1") ==
+            Expr(:(=),
+                 Expr(Symbol("'"), :x),
+                 1)
+
+        # `.=` doesn't introduce short form functions
+        @test parsestmt("f() .= xs") ==
+            Expr(:(.=), Expr(:call, :f), :xs)
+    end
+
+    @testset "for" begin
+        @test parsestmt("for i=is body end") ==
+            Expr(:for,
+                 Expr(:(=), :i, :is),
+                 Expr(:block,
+                     LineNumberNode(1),
+                     :body,
+                     LineNumberNode(1)
+                 )
+            )
+        @test parsestmt("for i=is, j=js\nbody\nend") ==
+            Expr(:for,
+                 Expr(:block,
+                     Expr(:(=), :i, :is),
+                     Expr(:(=), :j, :js),
+                 ),
+                 Expr(:block,
+                     LineNumberNode(2),
+                     :body,
+                     LineNumberNode(3),
+                 )
+            )
+    end
+
+    @testset "Long form anonymous functions" begin
+        @test parsestmt("function (xs...)\nbody end") ==
+            Expr(:function,
+                 Expr(:..., :xs),
+                 Expr(:block,
+                      LineNumberNode(1),
+                      LineNumberNode(2),
+                      :body))
+    end
+
+    @testset "String conversions" begin
+        # String unwrapping / wrapping
+        @test parsestmt("\"str\"") == "str"
+        @test parsestmt("\"\$(\"str\")\"") ==
+            Expr(:string, Expr(:string, "str"))
+        # Concatenation of string chunks in triple quoted cases
+        @test parsestmt("```\n  a\n  b```") ==
+            Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1),
+                 "a\nb")
+        @test parsestmt("\"\"\"\n  a\n  \$x\n  b\n  c\"\"\"") ==
+            Expr(:string, "a\n", :x, "\nb\nc")
+        # Incomplete cases
+        @test parsestmt("`x", ignore_errors=true) ==
+            Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1),
+                 Expr(:string, "x", Expr(:error)))
+        @test parsestmt("`", ignore_errors=true) ==
+            Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1),
+                 Expr(:string, Expr(:error)))
+    end
+
+    @testset "Char conversions" begin
+        @test parsestmt("'a'") == 'a'
+        @test parsestmt("'α'") == 'α'
+        @test parsestmt("'\\xce\\xb1'") == 'α'
+    end
+
+    @testset "do block conversion" begin
+        @test parsestmt("f(x) do y\n body end") ==
+            Expr(:do,
+                 Expr(:call, :f, :x),
+                 Expr(:->, Expr(:tuple, :y),
+                      Expr(:block,
+                           LineNumberNode(2),
+                           :body)))
+
+        @test parsestmt("@f(x) do y body end") ==
+            Expr(:do,
+                 Expr(:macrocall, Symbol("@f"), LineNumberNode(1), :x),
+                 Expr(:->, Expr(:tuple, :y),
+                      Expr(:block,
+                           LineNumberNode(1),
+                           :body)))
+
+        @test parsestmt("f(x; a=1) do y body end") ==
+            Expr(:do,
+                 Expr(:call, :f, Expr(:parameters, Expr(:kw, :a, 1)), :x),
+                 Expr(:->, Expr(:tuple, :y),
+                      Expr(:block,
+                           LineNumberNode(1),
+                           :body)))
+
+        # Test calls with do inside them
+        @test parsestmt("g(f(x) do y\n body end)") ==
+            Expr(:call,
+                 :g,
+                 Expr(:do,
+                      Expr(:call, :f, :x),
+                      Expr(:->, Expr(:tuple, :y),
+                           Expr(:block,
+                                LineNumberNode(2),
+                                :body))))
+    end
+
+    @testset "= to Expr(:kw) conversion" begin
+        # Call
+        @test parsestmt("f(a=1)") ==
+            Expr(:call, :f, Expr(:kw, :a, 1))
+        @test parsestmt("f(; b=2)") ==
+            Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)))
+        @test parsestmt("f(a=1; b=2)") ==
+            Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))
+        @test parsestmt("f(a; b; c)") ==
+            Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a)
+        @test parsestmt("+(a=1,)") ==
+            Expr(:call, :+, Expr(:kw, :a, 1))
+        @test parsestmt("(a=1)()") ==
+            Expr(:call, Expr(:(=), :a, 1))
+
+        # Operator calls:  = is not :kw
+        @test parsestmt("(x=1) != 2") ==
+            Expr(:call, :!=, Expr(:(=), :x, 1), 2)
+        @test parsestmt("+(a=1)") ==
+            Expr(:call, :+, Expr(:(=), :a, 1))
+        @test parsestmt("(a=1)'") ==
+            Expr(Symbol("'"), Expr(:(=), :a, 1))
+        @test parsestmt("(a=1)'ᵀ") ==
+            Expr(:call, Symbol("'ᵀ"), Expr(:(=), :a, 1))
+
+        # Dotcall
+        @test parsestmt("f.(a=1; b=2)") ==
+            Expr(:., :f, Expr(:tuple,
+                              Expr(:parameters, Expr(:kw, :b, 2)),
+                              Expr(:kw, :a, 1)))
+
+        # Named tuples
+        @test parsestmt("(a=1,)") ==
+            Expr(:tuple, Expr(:(=), :a, 1))
+        @test parsestmt("(a=1,; b=2)") ==
+            Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1))
+        @test parsestmt("(a=1,; b=2; c=3)") ==
+            Expr(:tuple,
+                 Expr(:parameters,
+                      Expr(:parameters, Expr(:kw, :c, 3)),
+                      Expr(:kw, :b, 2)),
+                 Expr(:(=), :a, 1))
+
+        # ref
+        @test parsestmt("x[i=j]") ==
+            Expr(:ref, :x, Expr(:kw, :i, :j))
+        @test parsestmt("(i=j)[x]") ==
+            Expr(:ref, Expr(:(=), :i, :j), :x)
+        @test parsestmt("x[a, b; i=j]") ==
+            Expr(:ref, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b)
+        # curly
+        @test parsestmt("(i=j){x}") ==
+            Expr(:curly, Expr(:(=), :i, :j), :x)
+        @test parsestmt("x{a, b; i=j}") ==
+            Expr(:curly, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b)
+
+        # vect
+        @test parsestmt("[a=1,; b=2]") ==
+            Expr(:vect,
+                 Expr(:parameters, Expr(:(=), :b, 2)),
+                 Expr(:(=), :a, 1))
+        # braces
+        @test parsestmt("{a=1,; b=2}") ==
+            Expr(:braces,
+                 Expr(:parameters, Expr(:(=), :b, 2)),
+                 Expr(:(=), :a, 1))
+
+        # dotted = is not :kw
+        @test parsestmt("f(a .= 1)") ==
+            Expr(:call, :f, Expr(:.=, :a, 1))
+
+        # = inside parens in calls and tuples
+        @test parsestmt("f(((a = 1)))") ==
+            Expr(:call, :f, Expr(:kw, :a, 1))
+        @test parsestmt("(((a = 1)),)") ==
+            Expr(:tuple, Expr(:(=), :a, 1))
+        @test parsestmt("(;((a = 1)),)") ==
+            Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1)))
+    end
+
+    @testset "Field access syntax" begin
+        @test parsestmt("a.b") == Expr(:., :a, QuoteNode(:b))
+        @test parsestmt("a.\$b") == Expr(:., :a, QuoteNode(Expr(:$, :b)))
+        @test parsestmt("a.:b") == Expr(:., :a, QuoteNode(:b))
+        @test parsestmt("a.@b x") == Expr(:macrocall,
+                                          Expr(:., :a, QuoteNode(Symbol("@b"))),
+                                          LineNumberNode(1),
+                                          :x)
+    end
+
+    @testset "dotcall / dotted operators" begin
+        @test parsestmt("f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y))
+        @test parsestmt("f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1)))
+        @test parsestmt("f.(a=1; b=2)") ==
+            Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)))
+        @test parsestmt("(a=1).()") == Expr(:., Expr(:(=), :a, 1), Expr(:tuple))
+        @test parsestmt("x .+ y")  == Expr(:call, Symbol(".+"), :x, :y)
+        @test parsestmt("(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y)
+        @test parsestmt("a .< b .< c") == Expr(:comparison, :a, Symbol(".<"),
+                                                     :b, Symbol(".<"), :c)
+        @test parsestmt("a .< (.<) .< c") == Expr(:comparison, :a, Symbol(".<"),
+                                                        Expr(:., :<), Symbol(".<"), :c)
+        @test parsestmt(".*(x)")    == Expr(:call, Symbol(".*"), :x)
+        @test parsestmt(".+(x)")    == Expr(:call, Symbol(".+"), :x)
+        @test parsestmt(".+x")      == Expr(:call, Symbol(".+"), :x)
+        @test parsestmt("(.+)(x)")  == Expr(:call, Expr(:., :+), :x)
+        @test parsestmt("(.+).(x)") == Expr(:., Expr(:., :+), Expr(:tuple, :x))
+
+        @test parsestmt(".+")    == Expr(:., :+)
+        @test parsestmt(":.+")   == QuoteNode(Symbol(".+"))
+        @test parsestmt(":(.+)") == Expr(:quote, (Expr(:., :+)))
+        @test parsestmt("quote .+ end")   == Expr(:quote,
+                                                        Expr(:block,
+                                                             LineNumberNode(1),
+                                                             Expr(:., :+)))
+        @test parsestmt(".+{x}") == Expr(:curly, Symbol(".+"), :x)
+
+        # Quoted syntactic ops act different when in parens
+        @test parsestmt(":.=")   == QuoteNode(Symbol(".="))
+        @test parsestmt(":(.=)") == QuoteNode(Symbol(".="))
+
+        # A few other cases of bare dotted ops
+        @test parsestmt("f(.+)")   == Expr(:call, :f, Expr(:., :+))
+        @test parsestmt("(a, .+)") == Expr(:tuple, :a, Expr(:., :+))
+        @test parsestmt("A.:.+")   == Expr(:., :A, QuoteNode(Symbol(".+")))
+
+        # Issue #341
+        @test parsestmt("./x", ignore_errors=true) == Expr(:call, Expr(:error, Expr(:., :/)), :x)
+    end
+
+    @testset "syntactic update-assignment operators" begin
+        @test parsestmt("x += y") == Expr(:(+=), :x, :y)
+        @test parsestmt("x .+= y") == Expr(:(.+=), :x, :y)
+        @test parsestmt(":+=") == QuoteNode(Symbol("+="))
+        @test parsestmt(":(+=)") == QuoteNode(Symbol("+="))
+        @test parsestmt(":.+=") == QuoteNode(Symbol(".+="))
+        @test parsestmt(":(.+=)") == QuoteNode(Symbol(".+="))
+        @test parsestmt("x \u2212= y") == Expr(:(-=), :x, :y)
+    end
+
+    @testset "let" begin
+        @test parsestmt("let x=1\n end") ==
+            Expr(:let, Expr(:(=), :x, 1),  Expr(:block, LineNumberNode(2)))
+        @test parsestmt("let x=1 ; end") ==
+            Expr(:let, Expr(:(=), :x, 1),  Expr(:block, LineNumberNode(1)))
+        @test parsestmt("let x ; end") ==
+            Expr(:let, :x, Expr(:block, LineNumberNode(1)))
+        @test parsestmt("let x::1 ; end") ==
+            Expr(:let, Expr(:(::), :x, 1), Expr(:block, LineNumberNode(1)))
+        @test parsestmt("let x=1,y=2 end") ==
+            Expr(:let, Expr(:block, Expr(:(=), :x, 1), Expr(:(=), :y, 2)), Expr(:block, LineNumberNode(1)))
+        @test parsestmt("let x+=1 ; end") ==
+            Expr(:let, Expr(:block, Expr(:+=, :x, 1)), Expr(:block, LineNumberNode(1)))
+        @test parsestmt("let ; end") ==
+            Expr(:let, Expr(:block), Expr(:block, LineNumberNode(1)))
+        @test parsestmt("let ; body end") ==
+            Expr(:let, Expr(:block), Expr(:block, LineNumberNode(1), :body))
+        @test parsestmt("let\na\nb\nend") ==
+            Expr(:let, Expr(:block), Expr(:block, LineNumberNode(2), :a, LineNumberNode(3), :b))
+    end
+
+    @testset "where" begin
+        @test parsestmt("A where T") == Expr(:where, :A, :T)
+        @test parsestmt("A where {T}") == Expr(:where, :A, :T)
+        @test parsestmt("A where {S, T}") == Expr(:where, :A, :S, :T)
+        @test parsestmt("A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y)
+    end
+
+    @testset "macrocall" begin
+        # line numbers
+        @test parsestmt("@m\n") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1))
+        @test parsestmt("\n@m") == Expr(:macrocall, Symbol("@m"), LineNumberNode(2))
+        # parameters
+        @test parsestmt("@m(x; a)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1),
+                                              Expr(:parameters, :a), :x)
+        @test parsestmt("@m(a=1; b=2)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1),
+                                                  Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1))
+        # @__dot__
+        @test parsestmt("@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1))
+        @test parsestmt("using A: @.") == Expr(:using, Expr(Symbol(":"), Expr(:., :A), Expr(:., Symbol("@__dot__"))))
+
+        # var""
+        @test parsestmt("@var\"#\" a") == Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a)
+        @test parsestmt("@var\"\\\"\" a") == Expr(:macrocall, Symbol("@\""), LineNumberNode(1), :a)
+        @test parsestmt("A.@var\"#\" a") == Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a)
+
+        # Square brackets
+        @test parsestmt("@S[a,b]") ==
+            Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vect, :a, :b))
+        @test parsestmt("@S[a b]") ==
+            Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:hcat, :a, :b))
+        @test parsestmt("@S[a; b]") ==
+            Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vcat, :a, :b))
+        @test parsestmt("@S[a ;; b]", version=v"1.7") ==
+            Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:ncat, 2, :a, :b))
+    end
+
+    @testset "var" begin
+        @test parsestmt("var\"x\"") == :x
+        @test parsestmt("var\"\"")         == Symbol("")
+        @test parsestmt("var\"\\\"\"")     == Symbol("\"")
+        @test parsestmt("var\"\\\\\\\"\"") == Symbol("\\\"")
+        @test parsestmt("var\"\\\\x\"")    == Symbol("\\\\x")
+        @test parsestmt("var\"x\"+y")      == Expr(:call, :+, :x, :y)
+    end
+
+    @testset "vect" begin
+        @test parsestmt("[x,y ; z]") == Expr(:vect, Expr(:parameters, :z), :x, :y)
+    end
+
+    @testset "concatenation" begin
+        @test parsestmt("[a ;;; b ;;;; c]", version=v"1.7") ==
+            Expr(:ncat, 4, Expr(:nrow, 3, :a, :b), :c)
+        @test parsestmt("[a b ; c d]") ==
+            Expr(:vcat, Expr(:row, :a, :b), Expr(:row, :c, :d))
+        @test parsestmt("[a\nb]") == Expr(:vcat, :a, :b)
+        @test parsestmt("[a b]") == Expr(:hcat, :a, :b)
+        @test parsestmt("[a b ; c d]") ==
+            Expr(:vcat, Expr(:row, :a, :b), Expr(:row, :c, :d))
+
+        @test parsestmt("T[a ;;; b ;;;; c]", version=v"1.7") ==
+            Expr(:typed_ncat, :T, 4, Expr(:nrow, 3, :a, :b), :c)
+        @test parsestmt("T[a b ; c d]") ==
+            Expr(:typed_vcat, :T, Expr(:row, :a, :b), Expr(:row, :c, :d))
+        @test parsestmt("T[a\nb]") == Expr(:typed_vcat, :T, :a, :b)
+        @test parsestmt("T[a b]") == Expr(:typed_hcat, :T, :a, :b)
+        @test parsestmt("T[a b ; c d]") ==
+            Expr(:typed_vcat, :T, Expr(:row, :a, :b), Expr(:row, :c, :d))
+    end
+
+    @testset "generators" begin
+        @test parsestmt("(x for a in as for b in bs)") ==
+            Expr(:flatten, Expr(:generator,
+                                Expr(:generator, :x, Expr(:(=), :b, :bs)),
+                                Expr(:(=), :a, :as)))
+        @test parsestmt("(x for a in as, b in bs)") ==
+            Expr(:generator, :x, Expr(:(=), :a, :as), Expr(:(=), :b, :bs))
+        @test parsestmt("(x for a in as, b in bs if z)") ==
+            Expr(:generator, :x,
+                 Expr(:filter, :z, Expr(:(=), :a, :as), Expr(:(=), :b, :bs)))
+        @test parsestmt("(x for a in as, b in bs for c in cs, d in ds)") ==
+            Expr(:flatten,
+                Expr(:generator,
+                     Expr(:generator, :x, Expr(:(=), :c, :cs), Expr(:(=), :d, :ds)),
+                     Expr(:(=), :a, :as), Expr(:(=), :b, :bs)))
+        @test parsestmt("(x for a in as for b in bs if z)") ==
+            Expr(:flatten, Expr(:generator,
+                                Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :b, :bs))),
+                                Expr(:(=), :a, :as)))
+        @test parsestmt("(x for a in as if z for b in bs)") ==
+            Expr(:flatten, Expr(:generator,
+                                Expr(:generator, :x, Expr(:(=), :b, :bs)),
+                                Expr(:filter, :z, Expr(:(=), :a, :as))))
+        @test parsestmt("[x for a = as for b = bs if cond1 for c = cs if cond2]" ) ==
+            Expr(:comprehension,
+                 Expr(:flatten,
+                      Expr(:generator,
+                           Expr(:flatten,
+                                Expr(:generator,
+                                     Expr(:generator,
+                                          :x,
+                                          Expr(:filter,
+                                               :cond2,
+                                               Expr(:(=), :c, :cs))),
+                                     Expr(:filter,
+                                          :cond1,
+                                          Expr(:(=), :b, :bs)))),
+                           Expr(:(=), :a, :as))))
+        @test parsestmt("[x for a = as if begin cond2 end]" ) ==
+            Expr(:comprehension, Expr(:generator, :x,
+                                      Expr(:filter,
+                                           Expr(:block, LineNumberNode(1), :cond2),
+                                           Expr(:(=), :a, :as))))
+        @test parsestmt("(x for a in as if z)") ==
+            Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :a, :as)))
+    end
+
+    @testset "try" begin
+        @test parsestmt("try x catch e; y end") ==
+            Expr(:try,
+                 Expr(:block, LineNumberNode(1), :x),
+                 :e,
+                 Expr(:block, LineNumberNode(1), :y))
+        @test parsestmt("try x finally y end") ==
+            Expr(:try,
+                 Expr(:block, LineNumberNode(1), :x),
+                 false,
+                 false,
+                 Expr(:block, LineNumberNode(1), :y))
+        @test parsestmt("try x catch e; y finally z end") ==
+            Expr(:try,
+                 Expr(:block, LineNumberNode(1), :x),
+                 :e,
+                 Expr(:block, LineNumberNode(1), :y),
+                 Expr(:block, LineNumberNode(1), :z))
+        @test parsestmt("try x catch e; y else z end", version=v"1.8") ==
+            Expr(:try,
+                 Expr(:block, LineNumberNode(1), :x),
+                 :e,
+                 Expr(:block, LineNumberNode(1), :y),
+                 false,
+                 Expr(:block, LineNumberNode(1), :z))
+        @test parsestmt("try x catch e; y else z finally w end", version=v"1.8") ==
+            Expr(:try,
+                 Expr(:block, LineNumberNode(1), :x),
+                 :e,
+                 Expr(:block, LineNumberNode(1), :y),
+                 Expr(:block, LineNumberNode(1), :w),
+                 Expr(:block, LineNumberNode(1), :z))
+        # finally before catch
+        @test parsestmt("try x finally y catch e z end", ignore_warnings=true) ==
+            Expr(:try,
+                 Expr(:block, LineNumberNode(1), :x),
+                 :e,
+                 Expr(:block, LineNumberNode(1), :z),
+                 Expr(:block, LineNumberNode(1), :y))
+        # empty recovery
+        @test parsestmt("try x end", ignore_errors=true) ==
+            Expr(:try,
+                 Expr(:block, LineNumberNode(1), :x),
+                 false, false,
+                 Expr(:block, Expr(:error)))
+    end
+
+    @testset "juxtapose" begin
+        @test parsestmt("2x") == Expr(:call, :*, 2, :x)
+        @test parsestmt("(2)(3)x") == Expr(:call, :*, 2, 3, :x)
+    end
+
+    @testset "Core.@doc" begin
+        @test parsestmt("\"x\" f") ==
+            Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(1), "x", :f)
+        @test parsestmt("\n\"x\" f") ==
+            Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(2), "x", :f)
+    end
+
+    @testset "String and cmd macros" begin
+        # Custom string macros
+        @test parsestmt("foo\"str\"") ==
+            Expr(:macrocall, Symbol("@foo_str"), LineNumberNode(1), "str")
+        # Bare @cmd
+        @test parsestmt("\n`str`") ==
+            Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(2), "str")
+        # Custom cmd macros
+        @test parsestmt("foo`str`") ==
+            Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "str")
+        @test parsestmt("foo`str`flag") ==
+            Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "str", "flag")
+        @test parsestmt("foo```\n  a\n  b```") ==
+            Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "a\nb")
+        # Expr conversion distinguishes from explicit calls to a macro of the same name
+        @test parsestmt("@foo_cmd `str`") ==
+            Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1),
+                 Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), "str"))
+    end
+
+    @testset "return" begin
+        @test parsestmt("return x") == Expr(:return, :x)
+        @test parsestmt("return")  == Expr(:return, nothing)
+    end
+
+    @testset "Large integer macros" begin
+        @test parsestmt("0x00000000000000001") ==
+            Expr(:macrocall, GlobalRef(Core, Symbol("@uint128_str")),
+                 nothing, "0x00000000000000001")
+
+        @test parsestmt("(0x00000000000000001)") ==
+            Expr(:macrocall, GlobalRef(Core, Symbol("@uint128_str")),
+                 nothing, "0x00000000000000001")
+    end
+
+    @testset "struct" begin
+        @test parsestmt("struct A end") ==
+            Expr(:struct, false, :A, Expr(:block, LineNumberNode(1)))
+        @test parsestmt("mutable struct A end") ==
+            Expr(:struct, true, :A, Expr(:block, LineNumberNode(1)))
+
+        @test parsestmt("struct A <: B \n a::X \n end") ==
+            Expr(:struct, false, Expr(:<:, :A, :B),
+                 Expr(:block, LineNumberNode(2), Expr(:(::), :a, :X)))
+        @test parsestmt("struct A \n a \n b \n end") ==
+            Expr(:struct, false, :A,
+                 Expr(:block, LineNumberNode(2), :a, LineNumberNode(3), :b))
+        @test parsestmt("struct A const a end", version=v"1.8") ==
+            Expr(:struct, false, :A, Expr(:block, LineNumberNode(1), Expr(:const, :a)))
+
+        @test parsestmt("struct A \n \"doc\" \n a end") ==
+            Expr(:struct, false, :A, Expr(:block, LineNumberNode(2), "doc", :a))
+    end
+
+    @testset "export" begin
+        @test parsestmt("export a") == Expr(:export, :a)
+        @test parsestmt("export @a") == Expr(:export, Symbol("@a"))
+        @test parsestmt("export @var\"'\"") == Expr(:export, Symbol("@'"))
+        @test parsestmt("export a, \n @b") == Expr(:export, :a, Symbol("@b"))
+        @test parsestmt("export +, ==") == Expr(:export, :+, :(==))
+        @test parsestmt("export \n a") == Expr(:export, :a)
+    end
+
+    @testset "global/const/local" begin
+        @test parsestmt("global x") == Expr(:global, :x)
+        @test parsestmt("local x") == Expr(:local, :x)
+        @test parsestmt("global x,y") == Expr(:global, :x, :y)
+        @test parsestmt("global const x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1)))
+        @test parsestmt("local const x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1)))
+        @test parsestmt("const global x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1)))
+        @test parsestmt("const local x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1)))
+        @test parsestmt("const x,y = 1,2") == Expr(:const, Expr(:(=), Expr(:tuple, :x, :y), Expr(:tuple, 1, 2)))
+        @test parsestmt("const x = 1") == Expr(:const, Expr(:(=), :x, 1))
+        @test parsestmt("global x ~ 1") == Expr(:global, Expr(:call, :~, :x, 1))
+        @test parsestmt("global x += 1") == Expr(:global, Expr(:+=, :x, 1))
+
+        # Parsing of global/local with
+        @test parsestmt("global (x,y)") == Expr(:global, :x, :y)
+        @test parsestmt("local (x,y)") == Expr(:local, :x, :y)
+    end
+
+    @testset "tuples" begin
+        @test parsestmt("(;)")       == Expr(:tuple, Expr(:parameters))
+        @test parsestmt("(; a=1)")   == Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1)))
+        @test parsestmt("(; a=1; b=2)") ==
+            Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)))
+        @test parsestmt("(a; b; c,d)") ==
+            Expr(:tuple, Expr(:parameters, Expr(:parameters, :c, :d), :b), :a)
+    end
+
+    @testset "module" begin
+        @test parsestmt("module A end"; version=v"1.13") ==
+            Expr(:module, true,  :A, Expr(:block, LineNumberNode(1), LineNumberNode(1)))
+        @test parsestmt("module A end"; version=v"1.14") ==
+            Expr(:module, v"1.14", true,  :A, Expr(:block, LineNumberNode(1), LineNumberNode(1)))
+        @test parsestmt("baremodule A end"; version=v"1.13") ==
+            Expr(:module, false, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1)))
+    end
+
+    @testset "errors" begin
+        @test parsestmt("--", ignore_errors=true) ==
+            Expr(:error, "invalid operator: `--`")
+        @test parseall("a b", ignore_errors=true) ==
+            Expr(:toplevel, LineNumberNode(1), :a,
+                 LineNumberNode(1), Expr(:error, :b))
+        @test parsestmt("(x", ignore_errors=true) ==
+            Expr(:block, :x, Expr(:error))
+        @test parsestmt("x do", ignore_errors=true) ==
+            Expr(:block, :x, Expr(:error, :do))
+        @test parsestmt("x var\"y\"", ignore_errors=true) ==
+            Expr(:block, :x, Expr(:error, :var, ErrorVal(), "y", ErrorVal()))
+        @test parsestmt("var\"y", ignore_errors=true) ==
+            Expr(:var, :y, Expr(:error))
+    end
+
+    @testset "import" begin
+        @test parsestmt("import A") == Expr(:import, Expr(:., :A))
+        @test parsestmt("import A.(:b).:c: x.:z", ignore_warnings=true) ==
+            Expr(:import, Expr(Symbol(":"), Expr(:., :A, :b, :c), Expr(:., :x, :z)))
+        # Stupid parens and quotes in import paths
+        @test parsestmt("import A.:+", ignore_warnings=true) ==
+            Expr(:import, Expr(:., :A, :+))
+        @test parsestmt("import A.(:+)", ignore_warnings=true) ==
+            Expr(:import, Expr(:., :A, :+))
+        @test parsestmt("import A.:(+)", ignore_warnings=true) ==
+            Expr(:import, Expr(:., :A, :+))
+        @test parsestmt("import A.:(+) as y", ignore_warnings=true, version=v"1.6") ==
+            Expr(:import, Expr(:as, Expr(:., :A, :+), :y))
+    end
+end
+
+@testset "SyntaxNode->Expr conversion" begin
+    src = repeat('a', 1000) * '\n' * "@hi"
+    @test Expr(parsestmt(SyntaxNode, SubString(src, 1001:lastindex(src)))) ==
+        Expr(:macrocall, Symbol("@hi"), LineNumberNode(2))
+end
diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl
new file mode 100644
index 0000000000000..71c9ff77b1ab6
--- /dev/null
+++ b/JuliaSyntax/test/fuzz_test.jl
@@ -0,0 +1,1023 @@
+using .JuliaSyntax: tokenize
+import Logging
+import Test
+
+# Parser fuzz testing tools.
+
+const all_tokens = [
+    "#x\n"
+    "#==#"
+    " "
+    "\t"
+    "\n"
+    "x"
+    "@"
+    ","
+    ";"
+
+    "baremodule"
+    "begin"
+    "break"
+    "const"
+    "continue"
+    "do"
+    "export"
+    "for"
+    "function"
+    "global"
+    "if"
+    "import"
+    "let"
+    "local"
+    "macro"
+    "module"
+    "quote"
+    "return"
+    "struct"
+    "try"
+    "using"
+    "while"
+    "catch"
+    "finally"
+    "else"
+    "elseif"
+    "end"
+    "abstract"
+    "as"
+    "doc"
+    "mutable"
+    "outer"
+    "primitive"
+    "type"
+    "var"
+
+    "1"
+    "0b1"
+    "0x1"
+    "0o1"
+    "1.0"
+    "1.0f0"
+    "\"s\""
+    "'c'"
+    "`s`"
+    "true"
+    "false"
+
+    "["
+    "]"
+    "{"
+    "}"
+    "("
+    ")"
+    "\""
+    "\"\"\""
+    "`"
+    "```"
+
+    "="
+    "+="
+    "-="   # Also used for "−="
+    "−="
+    "*="
+    "/="
+    "//="
+    "|="
+    "^="
+    "÷="
+    "%="
+    "<<="
+    ">>="
+    ">>>="
+    "\\="
+    "&="
+    ":="
+    "~"
+    "\$="
+    "⊻="
+    "≔"
+    "⩴"
+    "≕"
+
+    "=>"
+
+    "?"
+
+    "-->"
+    "<--"
+    "<-->"
+    "←"
+    "→"
+    "↔"
+    "↚"
+    "↛"
+    "↞"
+    "↠"
+    "↢"
+    "↣"
+    "↤"
+    "↦"
+    "↮"
+    "⇎"
+    "⇍"
+    "⇏"
+    "⇐"
+    "⇒"
+    "⇔"
+    "⇴"
+    "⇶"
+    "⇷"
+    "⇸"
+    "⇹"
+    "⇺"
+    "⇻"
+    "⇼"
+    "⇽"
+    "⇾"
+    "⇿"
+    "⟵"
+    "⟶"
+    "⟷"
+    "⟹"
+    "⟺"
+    "⟻"
+    "⟼"
+    "⟽"
+    "⟾"
+    "⟿"
+    "⤀"
+    "⤁"
+    "⤂"
+    "⤃"
+    "⤄"
+    "⤅"
+    "⤆"
+    "⤇"
+    "⤌"
+    "⤍"
+    "⤎"
+    "⤏"
+    "⤐"
+    "⤑"
+    "⤔"
+    "⤕"
+    "⤖"
+    "⤗"
+    "⤘"
+    "⤝"
+    "⤞"
+    "⤟"
+    "⤠"
+    "⥄"
+    "⥅"
+    "⥆"
+    "⥇"
+    "⥈"
+    "⥊"
+    "⥋"
+    "⥎"
+    "⥐"
+    "⥒"
+    "⥓"
+    "⥖"
+    "⥗"
+    "⥚"
+    "⥛"
+    "⥞"
+    "⥟"
+    "⥢"
+    "⥤"
+    "⥦"
+    "⥧"
+    "⥨"
+    "⥩"
+    "⥪"
+    "⥫"
+    "⥬"
+    "⥭"
+    "⥰"
+    "⧴"
+    "⬱"
+    "⬰"
+    "⬲"
+    "⬳"
+    "⬴"
+    "⬵"
+    "⬶"
+    "⬷"
+    "⬸"
+    "⬹"
+    "⬺"
+    "⬻"
+    "⬼"
+    "⬽"
+    "⬾"
+    "⬿"
+    "⭀"
+    "⭁"
+    "⭂"
+    "⭃"
+    "⭄"
+    "⭇"
+    "⭈"
+    "⭉"
+    "⭊"
+    "⭋"
+    "⭌"
+    "￩"
+    "￫"
+    "⇜"
+    "⇝"
+    "↜"
+    "↝"
+    "↩"
+    "↪"
+    "↫"
+    "↬"
+    "↼"
+    "↽"
+    "⇀"
+    "⇁"
+    "⇄"
+    "⇆"
+    "⇇"
+    "⇉"
+    "⇋"
+    "⇌"
+    "⇚"
+    "⇛"
+    "⇠"
+    "⇢"
+    "↷"
+    "↶"
+    "↺"
+    "↻"
+
+    "||"
+
+    "&&"
+
+    "<:"
+    ">:"
+    ">"
+    "<"
+    ">="
+    "≥"
+    "<="
+    "≤"
+    "=="
+    "==="
+    "≡"
+    "!="
+    "≠"
+    "!=="
+    "≢"
+    "∈"
+    "in"
+    "isa"
+    "∉"
+    "∋"
+    "∌"
+    "⊆"
+    "⊈"
+    "⊂"
+    "⊄"
+    "⊊"
+    "∝"
+    "∊"
+    "∍"
+    "∥"
+    "∦"
+    "∷"
+    "∺"
+    "∻"
+    "∽"
+    "∾"
+    "≁"
+    "≃"
+    "≂"
+    "≄"
+    "≅"
+    "≆"
+    "≇"
+    "≈"
+    "≉"
+    "≊"
+    "≋"
+    "≌"
+    "≍"
+    "≎"
+    "≐"
+    "≑"
+    "≒"
+    "≓"
+    "≖"
+    "≗"
+    "≘"
+    "≙"
+    "≚"
+    "≛"
+    "≜"
+    "≝"
+    "≞"
+    "≟"
+    "≣"
+    "≦"
+    "≧"
+    "≨"
+    "≩"
+    "≪"
+    "≫"
+    "≬"
+    "≭"
+    "≮"
+    "≯"
+    "≰"
+    "≱"
+    "≲"
+    "≳"
+    "≴"
+    "≵"
+    "≶"
+    "≷"
+    "≸"
+    "≹"
+    "≺"
+    "≻"
+    "≼"
+    "≽"
+    "≾"
+    "≿"
+    "⊀"
+    "⊁"
+    "⊃"
+    "⊅"
+    "⊇"
+    "⊉"
+    "⊋"
+    "⊏"
+    "⊐"
+    "⊑"
+    "⊒"
+    "⊜"
+    "⊩"
+    "⊬"
+    "⊮"
+    "⊰"
+    "⊱"
+    "⊲"
+    "⊳"
+    "⊴"
+    "⊵"
+    "⊶"
+    "⊷"
+    "⋍"
+    "⋐"
+    "⋑"
+    "⋕"
+    "⋖"
+    "⋗"
+    "⋘"
+    "⋙"
+    "⋚"
+    "⋛"
+    "⋜"
+    "⋝"
+    "⋞"
+    "⋟"
+    "⋠"
+    "⋡"
+    "⋢"
+    "⋣"
+    "⋤"
+    "⋥"
+    "⋦"
+    "⋧"
+    "⋨"
+    "⋩"
+    "⋪"
+    "⋫"
+    "⋬"
+    "⋭"
+    "⋲"
+    "⋳"
+    "⋴"
+    "⋵"
+    "⋶"
+    "⋷"
+    "⋸"
+    "⋹"
+    "⋺"
+    "⋻"
+    "⋼"
+    "⋽"
+    "⋾"
+    "⋿"
+    "⟈"
+    "⟉"
+    "⟒"
+    "⦷"
+    "⧀"
+    "⧁"
+    "⧡"
+    "⧣"
+    "⧤"
+    "⧥"
+    "⩦"
+    "⩧"
+    "⩪"
+    "⩫"
+    "⩬"
+    "⩭"
+    "⩮"
+    "⩯"
+    "⩰"
+    "⩱"
+    "⩲"
+    "⩳"
+    "⩵"
+    "⩶"
+    "⩷"
+    "⩸"
+    "⩹"
+    "⩺"
+    "⩻"
+    "⩼"
+    "⩽"
+    "⩾"
+    "⩿"
+    "⪀"
+    "⪁"
+    "⪂"
+    "⪃"
+    "⪄"
+    "⪅"
+    "⪆"
+    "⪇"
+    "⪈"
+    "⪉"
+    "⪊"
+    "⪋"
+    "⪌"
+    "⪍"
+    "⪎"
+    "⪏"
+    "⪐"
+    "⪑"
+    "⪒"
+    "⪓"
+    "⪔"
+    "⪕"
+    "⪖"
+    "⪗"
+    "⪘"
+    "⪙"
+    "⪚"
+    "⪛"
+    "⪜"
+    "⪝"
+    "⪞"
+    "⪟"
+    "⪠"
+    "⪡"
+    "⪢"
+    "⪣"
+    "⪤"
+    "⪥"
+    "⪦"
+    "⪧"
+    "⪨"
+    "⪩"
+    "⪪"
+    "⪫"
+    "⪬"
+    "⪭"
+    "⪮"
+    "⪯"
+    "⪰"
+    "⪱"
+    "⪲"
+    "⪳"
+    "⪴"
+    "⪵"
+    "⪶"
+    "⪷"
+    "⪸"
+    "⪹"
+    "⪺"
+    "⪻"
+    "⪼"
+    "⪽"
+    "⪾"
+    "⪿"
+    "⫀"
+    "⫁"
+    "⫂"
+    "⫃"
+    "⫄"
+    "⫅"
+    "⫆"
+    "⫇"
+    "⫈"
+    "⫉"
+    "⫊"
+    "⫋"
+    "⫌"
+    "⫍"
+    "⫎"
+    "⫏"
+    "⫐"
+    "⫑"
+    "⫒"
+    "⫓"
+    "⫔"
+    "⫕"
+    "⫖"
+    "⫗"
+    "⫘"
+    "⫙"
+    "⫷"
+    "⫸"
+    "⫹"
+    "⫺"
+    "⊢"
+    "⊣"
+    "⟂"
+    "⫪"
+    "⫫"
+
+    "<|"
+    "|>"
+
+    ":"
+    ".."
+    "…"
+    "⁝"
+    "⋮"
+    "⋱"
+    "⋰"
+    "⋯"
+
+    "\$"
+    "+"
+    "-" # also used for "−"
+    "−"
+    "++"
+    "⊕"
+    "⊖"
+    "⊞"
+    "⊟"
+    "|"
+    "∪"
+    "∨"
+    "⊔"
+    "±"
+    "∓"
+    "∔"
+    "∸"
+    "≏"
+    "⊎"
+    "⊻"
+    "⊽"
+    "⋎"
+    "⋓"
+    "⧺"
+    "⧻"
+    "⨈"
+    "⨢"
+    "⨣"
+    "⨤"
+    "⨥"
+    "⨦"
+    "⨧"
+    "⨨"
+    "⨩"
+    "⨪"
+    "⨫"
+    "⨬"
+    "⨭"
+    "⨮"
+    "⨹"
+    "⨺"
+    "⩁"
+    "⩂"
+    "⩅"
+    "⩊"
+    "⩌"
+    "⩏"
+    "⩐"
+    "⩒"
+    "⩔"
+    "⩖"
+    "⩗"
+    "⩛"
+    "⩝"
+    "⩡"
+    "⩢"
+    "⩣"
+    "¦"
+
+    "*"
+    "/"
+    "÷"
+    "%"
+    "⋅" # also used for lookalikes "·" and "·"
+    "·"
+    "·"
+    "∘"
+    "×"
+    "\\"
+    "&"
+    "∩"
+    "∧"
+    "⊗"
+    "⊘"
+    "⊙"
+    "⊚"
+    "⊛"
+    "⊠"
+    "⊡"
+    "⊓"
+    "∗"
+    "∙"
+    "∤"
+    "⅋"
+    "≀"
+    "⊼"
+    "⋄"
+    "⋆"
+    "⋇"
+    "⋉"
+    "⋊"
+    "⋋"
+    "⋌"
+    "⋏"
+    "⋒"
+    "⟑"
+    "⦸"
+    "⦼"
+    "⦾"
+    "⦿"
+    "⧶"
+    "⧷"
+    "⨇"
+    "⨰"
+    "⨱"
+    "⨲"
+    "⨳"
+    "⨴"
+    "⨵"
+    "⨶"
+    "⨷"
+    "⨸"
+    "⨻"
+    "⨼"
+    "⨽"
+    "⩀"
+    "⩃"
+    "⩄"
+    "⩋"
+    "⩍"
+    "⩎"
+    "⩑"
+    "⩓"
+    "⩕"
+    "⩘"
+    "⩚"
+    "⩜"
+    "⩞"
+    "⩟"
+    "⩠"
+    "⫛"
+    "⊍"
+    "▷"
+    "⨝"
+    "⟕"
+    "⟖"
+    "⟗"
+    "⌿"
+    "⨟"
+
+    "//"
+
+    "<<"
+    ">>"
+    ">>>"
+
+    "^"
+    "↑"
+    "↓"
+    "⇵"
+    "⟰"
+    "⟱"
+    "⤈"
+    "⤉"
+    "⤊"
+    "⤋"
+    "⤒"
+    "⤓"
+    "⥉"
+    "⥌"
+    "⥍"
+    "⥏"
+    "⥑"
+    "⥔"
+    "⥕"
+    "⥘"
+    "⥙"
+    "⥜"
+    "⥝"
+    "⥠"
+    "⥡"
+    "⥣"
+    "⥥"
+    "⥮"
+    "⥯"
+    "￪"
+    "￬"
+
+    "::"
+
+    "where"
+
+    "."
+
+    "!"
+    "'"
+    ".'"
+    "->"
+
+    "¬"
+    "√"
+    "∛"
+    "∜"
+]
+
+const cutdown_tokens = [
+    "#x\n"
+    "#==#"
+    " "
+    "\t"
+    "\n"
+    "x"
+    "β"
+    "@"
+    ","
+    ";"
+
+    "baremodule"
+    "begin"
+    "break"
+    "const"
+    "continue"
+    "do"
+    "export"
+    "for"
+    "function"
+    "global"
+    "if"
+    "import"
+    "let"
+    "local"
+    "macro"
+    "module"
+    "quote"
+    "return"
+    "struct"
+    "try"
+    "using"
+    "while"
+    "catch"
+    "finally"
+    "else"
+    "elseif"
+    "end"
+    "abstract"
+    "as"
+    "doc"
+    "mutable"
+    "outer"
+    "primitive"
+    "type"
+    "var"
+
+    "1"
+    "0b1"
+    "0x1"
+    "0o1"
+    "1.0"
+    "1.0f0"
+    "\"s\""
+    "'c'"
+    "`s`"
+    "true"
+    "false"
+
+    "["
+    "]"
+    "{"
+    "}"
+    "("
+    ")"
+    "\""
+    "\"\"\""
+    "`"
+    "```"
+
+    "="
+    "+="
+    "~"
+
+    "=>"
+
+    "?"
+
+    "-->"
+
+    "||"
+
+    "&&"
+
+    "<:"
+    ">:"
+    ">"
+    "<"
+    ">="
+    "<="
+    "=="
+    "==="
+    "!="
+
+    "<|"
+    "|>"
+
+    ":"
+    ".."
+    "…"
+
+    "\$"
+    "+"
+    "−"
+    "-"
+    "|"
+
+    "*"
+    "/"
+    "⋅" # also used for lookalikes "·" and "·"
+    "·"
+    "\\"
+
+    "//"
+
+    "<<"
+
+    "^"
+
+    "::"
+
+    "where"
+
+    "."
+
+    "!"
+    "'"
+    "->"
+
+    "√"
+]
+
+#-------------------------------------------------------------------------------
+# Parsing functions for use with fuzz_test
+
+function try_parseall_failure(str)
+    try
+        JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true);
+        return nothing
+    catch exc
+        !(exc isa InterruptException) || rethrow()
+        rstr = reduce_text(str, parser_throws_exception)
+        @error "Parser threw exception" rstr exception=current_exceptions()
+        return rstr
+    end
+end
+
+function try_hook_failure(str)
+    try
+        test_logger = Test.TestLogger()
+        Logging.with_logger(test_logger) do
+            try
+                Meta_parseall(str)
+            catch exc
+                exc isa Meta.ParseError || exc isa JuliaSyntax.ParseError || rethrow()
+            end
+        end
+        if !isempty(test_logger.logs)
+            return str
+        end
+    catch exc
+        return str
+    end
+    return nothing
+end
+
+#-------------------------------------------------------------------------------
+"""Delete `nlines` adjacent lines from code, at `niters` randomly chosen positions"""
+function delete_lines(lines, nlines, niters)
+    selection = trues(length(lines))
+    for j=1:niters
+        i = rand(1:length(lines)-nlines)
+        selection[i:i+nlines] .= false
+    end
+    join(lines[selection], '\n')
+end
+
+"""Delete `ntokens` adjacent tokens from code, at `niters` randomly chosen positions"""
+function delete_tokens(code, tokens, ntokens, niters)
+    # [ aa bbbb cc d eeeeee  ]
+    #   |  |    |  | |     |
+    selection = trues(length(tokens))
+    for j=1:niters
+        i = rand(1:length(tokens)-ntokens)
+        selection[i:i+ntokens] .= false
+    end
+    io = IOBuffer()
+    i = 1
+    while true
+        while i <= length(selection) && !selection[i]
+            i += 1
+        end
+        if i > length(selection)
+            break
+        end
+        first_ind = first(tokens[i].range)
+        while selection[i] && i < length(selection)
+            i += 1
+        end
+        last_ind = last(tokens[i].range)
+        write(io, @view code[first_ind:last_ind])
+        if i == length(selection)
+            break
+        end
+    end
+    return String(take!(io))
+end
+
+#-------------------------------------------------------------------------------
+# Generators for "potentially bad input"
+
+"""
+Fuzz test parser against all tuples of length `N` with elements taken from
+`tokens`.
+"""
+function product_token_fuzz(tokens, N)
+    (join(ts) for ts in Iterators.product([tokens for _ in 1:N]...))
+end
+
+function random_token_fuzz(tokens, ntokens, ntries)
+    (join(rand(tokens, ntokens)) for _ in 1:ntries)
+end
+
+"""
+Fuzz test parser against randomly generated binary strings
+"""
+function random_binary_fuzz(nbytes, N)
+    (String(rand(UInt8, nbytes)) for _ in 1:N)
+end
+
+"""
+Fuzz test by deleting random lines of some given source `code`
+"""
+function deleted_line_fuzz(code, N; nlines=10, niters=10)
+    lines = split(code, '\n')
+    (delete_lines(lines, nlines, niters) for _=1:N)
+end
+
+"""
+Fuzz test by deleting random tokens from given source `code`
+"""
+function deleted_token_fuzz(code, N; ntokens=10, niters=10)
+    ts = tokenize(code)
+    (delete_tokens(code, ts, ntokens, niters) for _=1:N)
+end
+
+"""
+Fuzz test a parsing function by trying it with many "bad" input strings.
+
+`try_parsefail` should return `nothing` when the parser succeeds, and return a
+string (or reduced string) when parsing succeeds.
+"""
+function fuzz_test(try_parsefail::Function, bad_input_iter)
+    error_strings = []
+    for str in bad_input_iter
+        res = try_parsefail(str)
+        if !isnothing(res)
+            push!(error_strings, res)
+        end
+    end
+    return error_strings
+end
+
+
+# Examples
+#
+# fuzz_test(try_hook_failure, product_token_fuzz(cutdown_tokens, 2))
+# fuzz_test(try_parseall_failure, product_token_fuzz(cutdown_tokens, 2))
diff --git a/JuliaSyntax/test/green_node.jl b/JuliaSyntax/test/green_node.jl
new file mode 100644
index 0000000000000..0c3be65873c2e
--- /dev/null
+++ b/JuliaSyntax/test/green_node.jl
@@ -0,0 +1,68 @@
+@testset "GreenNode" begin
+    t = parsestmt(GreenNode, "aa + b")
+
+    @test span(t) == 6
+    @test !is_leaf(t)
+    @test head(t) == SyntaxHead(K"call", 0x0088)
+    @test span.(children(t)) == [2,1,1,1,1]
+    @test head.(children(t)) == [
+         SyntaxHead(K"Identifier", 0x0000)
+         SyntaxHead(K"Whitespace", 0x0001)
+         SyntaxHead(K"Identifier", 0x0000)
+         SyntaxHead(K"Whitespace", 0x0001)
+         SyntaxHead(K"Identifier", 0x0000)
+    ]
+
+    @test numchildren(t) == 5
+    @test !is_leaf(t)
+    @test is_leaf(t[1])
+
+    @test t[1] === children(t)[1]
+    @test t[2:4] == [t[2],t[3],t[4]]
+    @test firstindex(t) == 1
+    @test lastindex(t) == 5
+
+    t2 = parsestmt(GreenNode, "aa + b")
+    @test t == t2
+    @test t !== t2
+
+    text = "f(@x(y), z)"
+    @test sprint(show, MIME("text/plain"), parsestmt(GreenNode, text)) ==
+    """
+         1:11     │[call]
+         1:1      │  Identifier             ✔
+         2:2      │  (
+         3:7      │  [macrocall]
+         3:4      │    [macro_name]
+         3:3      │      @
+         4:4      │      Identifier         ✔
+         5:5      │    (
+         6:6      │    Identifier           ✔
+         7:7      │    )
+         8:8      │  ,
+         9:9      │  Whitespace
+        10:10     │  Identifier             ✔
+        11:11     │  )
+    """
+
+    @test sprint(show, MIME("text/plain"), parsestmt(GreenNode, text), text) ==
+    """
+         1:11     │[call]
+         1:1      │  Identifier             ✔   "f"
+         2:2      │  (                          "("
+         3:7      │  [macrocall]
+         3:4      │    [macro_name]
+         3:3      │      @                      "@"
+         4:4      │      Identifier         ✔   "x"
+         5:5      │    (                        "("
+         6:6      │    Identifier           ✔   "y"
+         7:7      │    )                        ")"
+         8:8      │  ,                          ","
+         9:9      │  Whitespace                 " "
+        10:10     │  Identifier             ✔   "z"
+        11:11     │  )                          ")"
+    """
+
+    @test sprint(show, parsestmt(GreenNode, "a + bb - f(ccc)")) ==
+        "(call-i (call-i 1-1::Identifier 2-2::Whitespace-t 3-3::Identifier 4-4::Whitespace-t 5-6::Identifier) 7-7::Whitespace-t 8-8::Identifier 9-9::Whitespace-t (call 10-10::Identifier 11-11::(-t 12-14::Identifier 15-15::)-t))"
+end
diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl
new file mode 100644
index 0000000000000..333344d7c50a1
--- /dev/null
+++ b/JuliaSyntax/test/hooks.jl
@@ -0,0 +1,516 @@
+function _unwrap_parse_error(core_hook_result)
+    @test Meta.isexpr(core_hook_result[1], :error, 1)
+    err = core_hook_result[1].args[1]
+    if JuliaSyntax._has_v1_10_hooks
+        @test err isa Meta.ParseError
+        return err.detail
+    else
+        @test err isa JuliaSyntax.ParseError
+        return err
+    end
+end
+
+@testset "Hooks for Core integration" begin
+    @testset "whitespace and comment parsing" begin
+        @test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0)
+        @test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0)
+
+        @test JuliaSyntax.core_parser_hook("  ", "somefile", 1, 2, :statement) == Core.svec(nothing,2)
+        @test JuliaSyntax.core_parser_hook(" #==# ", "somefile", 1, 6, :statement) == Core.svec(nothing,6)
+
+        @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 1, 0, :statement) == Core.svec(:x,4)
+        @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 1, 0, :atom)      == Core.svec(:x,2)
+
+        # https://github.com/JuliaLang/JuliaSyntax.jl/issues/316#issuecomment-1870294857
+        stmtstr =
+            """
+            plus(a, b) = a + b
+
+            # Issue #81
+            f() = nothing
+            """
+        @test JuliaSyntax.core_parser_hook(stmtstr, "somefile", 1, 0, :statement)[2] == 19
+    end
+
+    @testset "filename and lineno" begin
+        ex = JuliaSyntax.core_parser_hook("@a", "somefile", 1, 0, :statement)[1]
+        @test Meta.isexpr(ex, :macrocall)
+        @test ex.args[2] == LineNumberNode(1, "somefile")
+
+        ex = JuliaSyntax.core_parser_hook("@a", "otherfile", 2, 0, :statement)[1]
+        @test ex.args[2] == LineNumberNode(2, "otherfile")
+
+        # Errors also propagate file & lineno
+        err = _unwrap_parse_error(
+            JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement)
+        )
+        @test err isa JuliaSyntax.ParseError
+        @test filename(err) == "f1"
+        @test err.source.first_line == 1
+        err = _unwrap_parse_error(
+            JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement)
+        )
+        @test err isa JuliaSyntax.ParseError
+        @test filename(err) == "f2"
+        @test err.source.first_line == 2
+
+        # Errors including nontrivial offset indices
+        err = _unwrap_parse_error(
+            JuliaSyntax.core_parser_hook("a\nh{x)\nb", "test.jl", 1, 2, :statement)
+        )
+        @test err isa JuliaSyntax.ParseError
+        @test err.source.first_line == 1
+        @test err.diagnostics[1].first_byte == 6
+        @test err.diagnostics[1].last_byte == 5
+        @test err.diagnostics[1].message == "Expected `}` or `,`"
+    end
+
+    @testset "toplevel errors" begin
+        ex = JuliaSyntax.core_parser_hook("a\nb\n[x,\ny)", "somefile", 1, 0, :all)[1]
+        @test ex.head == :toplevel
+        @test ex.args[1:5] == [
+            LineNumberNode(1, "somefile"),
+            :a,
+            LineNumberNode(2, "somefile"),
+            :b,
+            LineNumberNode(4, "somefile"),
+        ]
+        @test Meta.isexpr(ex.args[6], :error)
+
+        ex = JuliaSyntax.core_parser_hook("x.", "somefile", 0, 0, :all)[1]
+        @test ex.head == :toplevel
+        @test ex.args[2].head == :incomplete
+    end
+
+    @testset "enable_in_core!" begin
+        JuliaSyntax.enable_in_core!()
+
+        @test Meta.parse("x + 1") == :(x + 1)
+        @test Meta.parse("x + 1", 1) == (:(x + 1), 6)
+
+        # Test that parsing statements incrementally works and stops after
+        # whitespace / comment trivia
+        @test Meta.parse("x + 1\n(y)\n", 1) == (:(x + 1), 7)
+        @test Meta.parse("x + 1\n(y)\n", 7) == (:y, 11)
+        @test Meta.parse(" x#==#", 1) == (:x, 7)
+        @test Meta.parse(" #==# ", 1) == (nothing, 7)
+
+        # Check the exception type that Meta.parse throws
+        if JuliaSyntax._has_v1_10_hooks
+            @test_throws Meta.ParseError Meta.parse("[x)")
+            @test_throws Meta.ParseError eval(Meta.parse("[x)", raise=false))
+            @test_throws Meta.ParseError eval(Meta.parse("(x")) # Expr(:incomplete)
+        else
+            @test_throws JuliaSyntax.ParseError Meta.parse("[x)")
+        end
+
+        # Check custom string types defined in a world age later than
+        # enable_in_core!() can be passed to Meta.parse()
+        mystr = @eval begin
+            struct MyString <: AbstractString
+                x::String
+            end
+            Base.String(s::MyString) = s.x
+            Base.ncodeunits(s::MyString) = ncodeunits(s.x)
+
+            MyString("hi")
+        end
+        @test Meta.parse(mystr) == :hi
+
+        err = Meta.parse("\"")
+        @test Meta.isexpr(err, :incomplete)
+        if JuliaSyntax._has_v1_10_hooks
+            @test err.args[1] isa Meta.ParseError
+            exc = err.args[1]
+            @test exc.msg == "ParseError:\n# Error @ none:1:2\n\"\n#└ ── unterminated string literal"
+            @test exc.detail isa JuliaSyntax.ParseError
+            @test exc.detail.incomplete_tag === :string
+        else
+            @test err.args[1] isa String
+        end
+
+        JuliaSyntax.enable_in_core!(false)
+    end
+
+    @testset "Expr(:incomplete)" begin
+        for (str, tag) in [
+                "\""           => :string
+                "\"\$foo"      => :string
+                "#="           => :comment
+                "'"            => :char
+                "'a"           => :char
+                "`"            => :cmd
+                "("            => :other
+                "["            => :other
+                "begin"        => :block
+                "quote"        => :block
+                "let"          => :block
+                "let;"         => :block
+                "for"          => :other
+                "for x=xs"     => :block
+                "function"     => :other
+                "function f()" => :block
+                "macro"        => :other
+                "macro f()"    => :block
+                "f() do"       => :other
+                "f() do x"     => :block
+                "module"       => :other
+                "module X"     => :block
+                "baremodule"   => :other
+                "baremodule X" => :block
+                "mutable struct"    => :other
+                "mutable struct X"  => :block
+                "struct"       => :other
+                "struct X"     => :block
+                "if"           => :other
+                "if x"         => :block
+                "while"        => :other
+                "while x"      => :block
+                "try"          => :block
+                # could be `try x catch exc body end` or `try x catch ; body end`
+                "try x catch"  => :block
+                "using"        => :other
+                "import"       => :other
+                "local"        => :other
+                "global"       => :other
+
+                "1 == 2 ?"     => :other
+                "1 == 2 ? 3 :" => :other
+                "1,"           => :other
+                "1, "          => :other
+                "1,\n"         => :other
+                "1, \n"        => :other
+                "f(1, "        => :other
+                "[x "          => :other
+                "( "           => :other
+
+                # Reference parser fails to detect incomplete exprs in this case
+                "(x for y"     => :other
+
+                # Syntax which may be an error but is not incomplete
+                ""             => :none
+                ")"            => :none
+                "1))"          => :none
+                "a b"          => :none
+                "()x"          => :none
+                "."            => :none
+
+                # Some error tokens which cannot be made complete by appending more characters
+                "1.e1."        => :none
+                "\u200b"       => :none
+                "x #=\xf5b\n=#" => :none
+                "₁"            => :none
+                "0x1.0\n"      => :none
+                "\"\$x෴\""     => :none
+                "10e1000"      => :none
+
+                # Multiline input with comments (#519)
+                "function f()\nbody #comment" => :block
+                "a = [\n1,\n2, #comment"      => :other
+
+                # Extended set of cases extracted from the REPL stdlib tests.
+                # There is some redundancy here, but we've mostly left these
+                # here because incomplete-detection is partly heuristic and
+                # it's good to have a wide variety of incomplete expressions.
+                #
+                # The "desired" incomplete tag here was generated from the
+                # flisp parser.
+                "Main.CompletionFoo." => :other
+                "Base.return_types(getin" => :other
+                "test7()." => :other
+                "(3,2)." => :other
+                "Base.print(\"lol" => :string
+                "run(`lol" => :cmd
+                "copy(A')." => :other
+                "cd(\"path_to_an_empty_folder_should_not_complete_latex\\\\\\alpha" => :string
+                "\"C:\\\\ \\alpha" => :string
+                "cd(\"C:\\U" => :string
+                "max(" => :other
+                "!(" => :other
+                "!isnothing(" => :other
+                "!!isnothing(" => :other
+                "CompletionFoo.test(1, 1, " => :other
+                "CompletionFoo.test(CompletionFoo.array," => :other
+                "CompletionFoo.test(1,1,1," => :other
+                "CompletionFoo.test1(Int," => :other
+                "CompletionFoo.test1(Float64," => :other
+                "prevind(\"θ\",1," => :other
+                "(1, CompletionFoo.test2(\")\"," => :other
+                "(1, CompletionFoo.test2(')'," => :other
+                "(1, CompletionFoo.test2(`')'`," => :other
+                "CompletionFoo.test3([1, 2] .+ CompletionFoo.varfloat," => :other
+                "CompletionFoo.test3([1.,2.], 1.," => :other
+                "CompletionFoo.test4(\"e\",r\" \"," => :other
+                "CompletionFoo.test5(broadcast((x,y)->x==y, push!(Base.split(\"\",' '),\"\",\"\"), \"\")," => :other
+                "CompletionFoo.test5(Bool[x==1 for x=1:4]," => :other
+                "CompletionFoo.test4(CompletionFoo.test_y_array[1]()[1], CompletionFoo.test_y_array[1]()[2], " => :other
+                "CompletionFoo.test4(\"\\\"\"," => :other
+                "convert(" => :other
+                "convert(" => :other
+                "CompletionFoo.test5(AbstractArray[Bool[]][1]," => :other
+                "CompletionFoo.test3(@time([1, 2] .+ CompletionFoo.varfloat)," => :other
+                "CompletionFoo.kwtest( " => :other
+                "CompletionFoo.kwtest(;" => :other
+                "CompletionFoo.kwtest(; x=1, " => :other
+                "CompletionFoo.kwtest(; kw=1, " => :other
+                "CompletionFoo.kwtest(x=1, " => :other
+                "CompletionFoo.kwtest(x=1; " => :other
+                "CompletionFoo.kwtest(x=kw=1, " => :other
+                "CompletionFoo.kwtest(; x=kw=1, " => :other
+                "CompletionFoo.kwtest2(1, x=1," => :other
+                "CompletionFoo.kwtest2(1; x=1, " => :other
+                "CompletionFoo.kwtest2(1, x=1; " => :other
+                "CompletionFoo.kwtest2(1, kw=1, " => :other
+                "CompletionFoo.kwtest2(1; kw=1, " => :other
+                "CompletionFoo.kwtest2(1, kw=1; " => :other
+                "CompletionFoo.kwtest2(y=3, 1, " => :other
+                "CompletionFoo.kwtest2(y=3, 1; " => :other
+                "CompletionFoo.kwtest2(kw=3, 1, " => :other
+                "CompletionFoo.kwtest2(kw=3, 1; " => :other
+                "CompletionFoo.kwtest2(1; " => :other
+                "CompletionFoo.kwtest2(1, " => :other
+                "CompletionFoo.kwtest4(x23=18, x; " => :other
+                "CompletionFoo.kwtest4(x23=18, x, " => :other
+                "CompletionFoo.kwtest4(x23=18, " => :other
+                "CompletionFoo.kwtest5(3, somekwarg=6," => :other
+                "CompletionFoo.kwtest5(3, somekwarg=6, anything, " => :other
+                "CompletionFoo.?([1,2,3], 2.0" => :other
+                "CompletionFoo.?('c'" => :other
+                "CompletionFoo.?(false, \"a\", 3, " => :other
+                "CompletionFoo.?(false, \"a\", 3, " => :other
+                "CompletionFoo.?(\"a\", 3, " => :other
+                "CompletionFoo.?(; " => :other
+                "CompletionFoo.?(" => :other
+                "CompletionFoo.test10(z, Integer[]...," => :other
+                "CompletionFoo.test10(3, Integer[]...," => :other
+                "CompletionFoo.test10(3, 4," => :other
+                "CompletionFoo.test10(3, 4, 5," => :other
+                "CompletionFoo.test10(z, z, 0, " => :other
+                "CompletionFoo.test10(\"a\", Union{Signed,Bool,String}[3][1], " => :other
+                "CompletionFoo.test11(Integer[false][1], Integer[14][1], " => :other
+                "CompletionFoo.test11(Integer[-7][1], Integer[0x6][1], 6," => :other
+                "CompletionFoo.test11(3, 4," => :other
+                "CompletionFoo.test11(0x8, 5," => :other
+                "CompletionFoo.test11(0x8, 'c'," => :other
+                "CompletionFoo.test11('d', 3," => :other
+                "CompletionFoo.test!12(" => :other
+                "CompletionFoo.kwtest(; x=2, y=4; kw=3, " => :other
+                "CompletionFoo.kwtest(x=2; y=4; " => :other
+                "CompletionFoo.kwtest((x=y)=4, " => :other
+                "CompletionFoo.kwtest(; (x=y)=4, " => :other
+                "CompletionFoo.kwtest(; w...=16, " => :other
+                "CompletionFoo.kwtest(; 2, " => :other
+                "CompletionFoo.kwtest(; 2=3, " => :other
+                "CompletionFoo.kwtest3(im; (true ? length : length), " => :other
+                "CompletionFoo.kwtest.(x=2; y=4; " => :other
+                "CompletionFoo.kwtest.(; w...=16, " => :other
+                "(1+2im)." => :other
+                "((1+2im))." => :other
+                "CompletionFoo.test_y_array[1]." => :other
+                "CompletionFoo.named." => :other
+                "#=\n\\alpha" => :comment
+                "#=\nmax" => :comment
+                "using " => :other
+                "(max" => :other
+                "@show \"/dev/nul" => :string
+                "@show \"/tm" => :string
+                "@show \"/dev/nul" => :string
+                "(Iter" => :other
+                "\"/tmp/jl_4sjOtz/tmpfoob" => :string
+                "\"~" => :string
+                "\"~user" => :string
+                "\"/tmp/jl_Mn9Rbz/selfsym" => :string
+                "\"~/ka8w5rsz" => :string
+                "\"foo~bar" => :string
+                "\"~/Zx6Wa0GkC" => :string
+                "\"~/Zx6Wa0GkC0" => :string
+                "\"~/Zx6Wa0GkC0/my_" => :string
+                "\"~/Zx6Wa0GkC0/my_file" => :string
+                "cd(\"folder_do_not_exist_77/file" => :string
+                "CompletionFoo.tuple." => :other
+                "CompletionFoo.test_dict[\"ab" => :string
+                "CompletionFoo.test_dict[\"abcd" => :string
+                "CompletionFoo.test_dict[ \"abcd" => :string
+                "CompletionFoo.test_dict[\"abcd" => :string
+                "CompletionFoo.test_dict[:b" => :other
+                "CompletionFoo.test_dict[:bar2" => :other
+                "CompletionFoo.test_dict[Ba" => :other
+                "CompletionFoo.test_dict[occ" => :other
+                "CompletionFoo.test_dict[`l" => :cmd
+                "CompletionFoo.test_dict[6" => :other
+                "CompletionFoo.test_dict[66" => :other
+                "CompletionFoo.test_dict[(" => :other
+                "CompletionFoo.test_dict[\"\\alp" => :string
+                "CompletionFoo.test_dict[\"\\alpha" => :string
+                "CompletionFoo.test_dict[\"α" => :string
+                "CompletionFoo.test_dict[:α" => :other
+                "CompletionFoo.test_dict[" => :other
+                "CompletionFoo.test_customdict[\"ab" => :string
+                "CompletionFoo.test_customdict[\"abcd" => :string
+                "CompletionFoo.test_customdict[ \"abcd" => :string
+                "CompletionFoo.test_customdict[\"abcd" => :string
+                "CompletionFoo.test_customdict[:b" => :other
+                "CompletionFoo.test_customdict[:bar2" => :other
+                "CompletionFoo.test_customdict[Ba" => :other
+                "CompletionFoo.test_customdict[occ" => :other
+                "CompletionFoo.test_customdict[`l" => :cmd
+                "CompletionFoo.test_customdict[6" => :other
+                "CompletionFoo.test_customdict[66" => :other
+                "CompletionFoo.test_customdict[(" => :other
+                "CompletionFoo.test_customdict[\"\\alp" => :string
+                "CompletionFoo.test_customdict[\"\\alpha" => :string
+                "CompletionFoo.test_customdict[\"α" => :string
+                "CompletionFoo.test_customdict[:α" => :other
+                "CompletionFoo.test_customdict[" => :other
+                "test_repl_comp_dict[\"ab" => :string
+                "test_repl_comp_dict[\"abcd" => :string
+                "test_repl_comp_dict[ \"abcd" => :string
+                "test_repl_comp_dict[\"abcd" => :string
+                "test_repl_comp_dict[:b" => :other
+                "test_repl_comp_dict[:bar2" => :other
+                "test_repl_comp_dict[Ba" => :other
+                "test_repl_comp_dict[occ" => :other
+                "test_repl_comp_dict[`l" => :cmd
+                "test_repl_comp_dict[6" => :other
+                "test_repl_comp_dict[66" => :other
+                "test_repl_comp_dict[(" => :other
+                "test_repl_comp_dict[\"\\alp" => :string
+                "test_repl_comp_dict[\"\\alpha" => :string
+                "test_repl_comp_dict[\"α" => :string
+                "test_repl_comp_dict[:α" => :other
+                "test_repl_comp_dict[" => :other
+                "test_repl_comp_customdict[\"ab" => :string
+                "test_repl_comp_customdict[\"abcd" => :string
+                "test_repl_comp_customdict[ \"abcd" => :string
+                "test_repl_comp_customdict[\"abcd" => :string
+                "test_repl_comp_customdict[:b" => :other
+                "test_repl_comp_customdict[:bar2" => :other
+                "test_repl_comp_customdict[Ba" => :other
+                "test_repl_comp_customdict[occ" => :other
+                "test_repl_comp_customdict[`l" => :cmd
+                "test_repl_comp_customdict[6" => :other
+                "test_repl_comp_customdict[66" => :other
+                "test_repl_comp_customdict[(" => :other
+                "test_repl_comp_customdict[\"\\alp" => :string
+                "test_repl_comp_customdict[\"\\alpha" => :string
+                "test_repl_comp_customdict[\"α" => :string
+                "test_repl_comp_customdict[:α" => :other
+                "test_repl_comp_customdict[" => :other
+                "CompletionFoo.kwtest3(a;foob" => :other
+                "CompletionFoo.kwtest3(a; le" => :other
+                "CompletionFoo.kwtest3.(a;\nlength" => :other
+                "CompletionFoo.kwtest3(a, length=4, l" => :other
+                "CompletionFoo.kwtest3(a; kwargs..., fo" => :other
+                "CompletionFoo.kwtest3(a; another!kwarg=0, le" => :other
+                "CompletionFoo.kwtest3(a; another!" => :other
+                "CompletionFoo.kwtest3(a; another!kwarg=0, foob" => :other
+                "CompletionFoo.kwtest3(a; namedarg=0, foob" => :other
+                "kwtest3(blabla; unknown=4, namedar" => :other
+                "kwtest3(blabla; named" => :other
+                "kwtest3(blabla; named." => :other
+                "kwtest3(blabla; named..., another!" => :other
+                "kwtest3(blabla; named..., len" => :other
+                "kwtest3(1+3im; named" => :other
+                "kwtest3(1+3im; named." => :other
+                "CompletionFoo.kwtest4(a; x23=0, _" => :other
+                "CompletionFoo.kwtest4(a; xαβγ=1, _" => :other
+                "CompletionFoo.kwtest4.(a; xαβγ=1, _" => :other
+                "CompletionFoo.kwtest4(a; x23=0, x" => :other
+                "CompletionFoo.kwtest4.(a; x23=0, x" => :other
+                "CompletionFoo.kwtest4(a; _a1b=1, x" => :other
+                "CompletionFoo.kwtest5(3, 5; somek" => :other
+                "CompletionFoo.kwtest5(3, 5, somekwarg=4, somek" => :other
+                "CompletionFoo.kwtest5(3, 5, 7; somekw" => :other
+                "CompletionFoo.kwtest5(3, 5, 7, 9; somekw" => :other
+                "CompletionFoo.kwtest5(3, 5, 7, 9, Any[]...; somek" => :other
+                "CompletionFoo.kwtest5(unknownsplat...; somekw" => :other
+                "CompletionFoo.kwtest5(3, 5, 7, 9, somekwarg=4, somek" => :other
+                "CompletionFoo.kwtest5(String[]..., unknownsplat...; xy" => :other
+                "CompletionFoo.kwtest5('a', unknownsplat...; xy" => :other
+                "CompletionFoo.kwtest5('a', 3, String[]...; xy" => :other
+                "CompletionFoo.kwtest3(" => :other
+                "CompletionFoo.kwtest3(a;" => :other
+                "CompletionFoo.kwtest3(a; len2=" => :other
+                "CompletionFoo.kwtest3(a; len2=le" => :other
+                "CompletionFoo.kwtest3(a; len2=3 " => :other
+                "CompletionFoo.kwtest3(a; [le" => :other
+                "CompletionFoo.kwtest3([length; le" => :other
+                "CompletionFoo.kwtest3(a; (le" => :other
+                "CompletionFoo.kwtest3(a; foo(le" => :other
+                "CompletionFoo.kwtest3(a; (; le" => :other
+                "CompletionFoo.kwtest3(a; length, " => :other
+                "CompletionFoo.kwtest3(a; kwargs..., " => :other
+                ":(function foo(::Int) end).args[1].args[2]." => :other
+                "log(log.(varfloat)," => :other
+                "Base.return_types(getin" => :other
+                "test(1,1, " => :other
+                "test.(1,1, " => :other
+                "prevind(\"θ\",1," => :other
+                "typeof(+)." => :other
+                "test_dict[\"ab" => :string
+                "CompletionFoo.x." => :other
+                "@noexist." => :other
+                "Main.@noexist." => :none # <- Invalid syntax which adding a suffix can't fix
+                "@Main.noexist." => :other
+                "@show." => :other
+                "@macroexpand." => :other
+                "CompletionFoo.@foobar()." => :other
+                "CompletionFoo.@foobar(4)." => :other
+                "foo(#=#==#=##==#).rs[1]." => :other
+                "foo().r." => :other
+                "foo(#=#=# =#= =#).r." => :other
+                "test_47594." => :other
+                "Issue36437(42)." => :other
+                "Some(Issue36437(42)).value." => :other
+                "some_issue36437.value." => :other
+                "some_issue36437.value.a, some_issue36437.value." => :other
+                "@show some_issue36437.value.a; some_issue36437.value." => :other
+                "()." => :other
+                "Ref(Issue36437(42))[]." => :other
+                "global_dict[:r]." => :other
+                "global_dict_nested[:g][:r]." => :other
+                "global_dict_nested[" => :other
+                "global_dict_nested[:g][" => :other
+                "pop!(global_xs)." => :other
+                "tcd1." => :other
+                "tcd1.x." => :other
+                "tcd1.x.v." => :other
+                "getkeyelem(mutable_const_prop)." => :other
+                "getkeyelem(mutable_const_prop).value." => :other
+                "var\"complicated " => :string
+                "WeirdNames().var\"oh " => :string
+                "WeirdNames().var\"" => :string
+                "\"abc\"." => :other
+                "(rand(Bool) ? issue51499_2_1 : issue51499_2_2)." => :other
+                "union_somes(1, 1.0)." => :other
+                "union_some_ref(1, 1.0)." => :other
+                "Issue49892(fal" => :other
+                "-CompletionFoo.Test_y(3)." => :other
+                "99 ⨷⁻ᵨ⁷ CompletionFoo.type_test." => :other
+                "CompletionFoo.type_test + CompletionFoo.Test_y(2)." => :other
+                "(CompletionFoo.type_test + CompletionFoo.Test_y(2))." => :other
+                "CompletionFoo.type_test + CompletionFoo.unicode_αβγ." => :other
+                "(CompletionFoo.type_test + CompletionFoo.unicode_αβγ)." => :other
+                "using Base." => :other
+                "@time(using .Iss" => :other
+                "using .Issue52922.Inner1." => :other
+                "Issue53126()." => :other
+                "using " => :other
+                "global xxx::Number = Base." => :other
+                "let x = 1 # comment" => :other
+            ]
+            @testset "$(repr(str))" begin
+                # Test :statement parsing
+                ex = JuliaSyntax.core_parser_hook(str, "somefile", 1, 0, :statement)[1]
+                @test Base.incomplete_tag(ex) == tag
+                # Test :all parsing - this is what the REPL uses to parse user input.
+                ex = JuliaSyntax.core_parser_hook(str, "somefile", 1, 0, :all)[1]
+                @test ex.head == :toplevel
+                @test Base.incomplete_tag(ex.args[end]) == tag
+            end
+        end
+
+        # Should not throw
+        @test JuliaSyntax.core_parser_hook("+=", "somefile", 1, 0, :statement)[1] isa Expr
+    end
+end
diff --git a/JuliaSyntax/test/kinds.jl b/JuliaSyntax/test/kinds.jl
new file mode 100644
index 0000000000000..5179544ec15d3
--- /dev/null
+++ b/JuliaSyntax/test/kinds.jl
@@ -0,0 +1,59 @@
+# Only test this once per session, as kind modules must be unique (ugh)
+if !isdefined(@__MODULE__, :FooKinds)
+@eval module FooKinds
+
+using ..JuliaSyntax
+
+function _init_kinds()
+    JuliaSyntax.register_kinds!(@__MODULE__, 42, [
+        "BEGIN_FOO"
+        "foo_1"
+        "foo_2"
+        "BEGIN_FOOBAR"
+        "foobar_1"
+        "foobar_2"
+        "END_FOOBAR"
+        "END_FOO"
+    ])
+end
+
+_init_kinds()
+
+k_before_init = K"foo_1"
+
+function __init__()
+    _init_kinds()
+end
+
+end
+
+@eval module BarKinds
+    # Intentionally empty
+end
+
+end
+
+@testset "Kinds" begin
+    @test K"foo_1" != K"foo_2"
+
+    @test FooKinds.k_before_init == K"foo_1"
+
+    @test K"BEGIN_FOO" == K"foo_1"
+    @test K"foo_2" < K"BEGIN_FOOBAR"
+    @test K"BEGIN_FOOBAR" == K"foobar_1"
+    @test K"END_FOOBAR" == K"foobar_2"
+    @test K"END_FOO" == K"foobar_2"
+
+    @test parentmodule(K"foo_1") == FooKinds
+    @test sprint(show, K"foo_1") == "K\"foo_1\""
+
+    # Too many kind modules
+    @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 64, ["hoo?"])
+    # Too many kind names per module
+    @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, string.(1:1024))
+    # Re-registering or registering new kinds is not supported
+    @test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_2", "foo_1"])
+    @test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_3"])
+    # Module ID already taken by FooKinds
+    @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, ["hii?"])
+end
diff --git a/JuliaSyntax/test/literal_parsing.jl b/JuliaSyntax/test/literal_parsing.jl
new file mode 100644
index 0000000000000..bfb8e932458ad
--- /dev/null
+++ b/JuliaSyntax/test/literal_parsing.jl
@@ -0,0 +1,256 @@
+using .JuliaSyntax:
+    parse_int_literal,
+    parse_uint_literal,
+    parse_float_literal,
+    unescape_julia_string
+
+@testset "Float parsing" begin
+    # Float64
+    @test parse_float_literal(Float64, "123", 1, 4)   === (123.0, :ok)
+    @test parse_float_literal(Float64, "123", 2, 4)   === (23.0,  :ok)
+    @test parse_float_literal(Float64, "123", 2, 3)   === (2.0,   :ok)
+    @test parse_float_literal(Float64, "1.3", 1, 4)   === (1.3,   :ok)
+    @test parse_float_literal(Float64, "1.3e2", 1, 6) === (1.3e2, :ok)
+    @test parse_float_literal(Float64, "1.3E2", 1, 6) === (1.3e2, :ok)
+    @test parse_float_literal(Float64, "1.0e-1000", 1, 10) === (0.0, :underflow)
+    @test parse_float_literal(Float64, "1.0e+1000", 1, 10) === (Inf, :overflow)
+    # Slow path (exceeds static buffer size)
+    @test parse_float_literal(Float64, "0.000000000000000000000000000000000000000000000000000000000001", 1, 63) === (1e-60, :ok)
+    # hexfloat
+    @test parse_float_literal(Float64, "0x0ap-0", 1, 8) === (Float64(10), :ok)
+    @test parse_float_literal(Float64, "0xffp-0", 1, 8) === (Float64(255), :ok)
+
+    # Float32
+    @test parse_float_literal(Float32, "123", 1, 4) === (123.0f0, :ok)
+    @test parse_float_literal(Float32, "1.3f2", 1, 6) === (1.3f2, :ok)
+    if !Sys.iswindows()
+        @test parse_float_literal(Float32, "1.0f-50", 1, 8) === (0.0f0, :underflow)
+    end
+    @test parse_float_literal(Float32, "1.0f+50", 1, 8) === (Inf32, :overflow)
+
+    # Assertions
+    @test_throws ErrorException parse_float_literal(Float64, "x", 1, 2)
+    @test_throws ErrorException parse_float_literal(Float64, "1x", 1, 3)
+
+    # Underscore and \minus allowed
+    @test parse_float_literal(Float64, "10_000.0_0", 1, 9) === (Float64(10000), :ok)
+    @test parse_float_literal(Float64, "−10.0", 1, 8)      === (Float64(-10), :ok)
+    @test parse_float_literal(Float64, "10e\u22121", 1, 8) === (Float64(1), :ok)
+end
+
+hexint(s) = parse_uint_literal(s, K"HexInt")
+binint(s) = parse_uint_literal(s, K"BinInt")
+octint(s) = parse_uint_literal(s, K"OctInt")
+
+@testset "Integer parsing" begin
+    # Integers
+    @testset "Signed Integers" begin
+        @test parse_int_literal("-1") isa Int
+        @test parse_int_literal("1") isa Int
+        @test parse_int_literal("2147483647") isa Int
+        @test parse_int_literal("9223372036854775807") isa Int64
+        @test parse_int_literal("9223372036854775808") isa Int128
+        @test parse_int_literal("170141183460469231731687303715884105727") isa Int128
+        @test parse_int_literal("170141183460469231731687303715884105728") isa BigInt
+    end
+
+    # HexInt
+    @testset "HexInt numeric limits for different types" begin
+        @test hexint("0xff")  === UInt8(0xff)
+        @test hexint("0x100") === UInt16(0x100)
+        @test hexint("0xffff") === UInt16(0xffff)
+        @test hexint("0x10000") === UInt32(0x10000)
+        @test hexint("0xffffffff") === UInt32(0xffffffff)
+        @test hexint("0x100000000") === UInt64(0x100000000)
+        @test hexint("0xffffffffffffffff") === UInt64(0xffffffffffffffff)
+        @test hexint("0x10000000000000000") === UInt128(0x10000000000000000)
+        @test hexint("0xffffffffffffffffffffffffffffffff") === UInt128(0xffffffffffffffffffffffffffffffff)
+        @test (n = hexint("0x100000000000000000000000000000000");
+               n isa BigInt && n == big"0x100000000000000000000000000000000")
+    end
+    @testset "HexInt string length limits for different types" begin
+        @test hexint("0x00")  === UInt8(0)
+        @test hexint("0x000")  === UInt16(0)
+        @test hexint("0x0000")  === UInt16(0)
+        @test hexint("0x00000")  === UInt32(0)
+        @test hexint("0x00000000") === UInt32(0)
+        @test hexint("0x000000000") === UInt64(0)
+        @test hexint("0x0000000000000000") === UInt64(0)
+        @test hexint("0x00000000000000000") === UInt128(0)
+        @test hexint("0x00000000000000000000000000000000") === UInt128(0)
+        @test (n = hexint("0x000000000000000000000000000000000");
+               n isa BigInt && n == 0)
+    end
+
+    # BinInt
+    @testset "BinInt numeric limits for different types" begin
+        @test binint("0b11111111")  === UInt8(0xff)
+        @test binint("0b100000000") === UInt16(0x100)
+        @test binint("0b1111111111111111") === UInt16(0xffff)
+        @test binint("0b10000000000000000") === UInt32(0x10000)
+        @test binint("0b11111111111111111111111111111111") === UInt32(0xffffffff)
+        @test binint("0b100000000000000000000000000000000") === UInt64(0x100000000)
+        @test binint("0b1111111111111111111111111111111111111111111111111111111111111111") === UInt64(0xffffffffffffffff)
+        @test binint("0b10000000000000000000000000000000000000000000000000000000000000000") === UInt128(0x10000000000000000)
+        @test binint("0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111") === UInt128(0xffffffffffffffffffffffffffffffff)
+        @test (n = binint("0b100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000");
+               n isa BigInt && n == big"0x100000000000000000000000000000000")
+    end
+    @testset "BinInt string length limits for different types" begin
+        @test binint("0b00000000")  === UInt8(0)
+        @test binint("0b000000000")  === UInt16(0)
+        @test binint("0b0000000000000000")  === UInt16(0)
+        @test binint("0b00000000000000000")  === UInt32(0)
+        @test binint("0b00000000000000000000000000000000") === UInt32(0)
+        @test binint("0b000000000000000000000000000000000") === UInt64(0)
+        @test binint("0b0000000000000000000000000000000000000000000000000000000000000000") === UInt64(0)
+        @test binint("0b00000000000000000000000000000000000000000000000000000000000000000") === UInt128(0)
+        @test binint("0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") === UInt128(0)
+        @test (n = binint("0b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000");
+               n isa BigInt && n == 0)
+    end
+
+    # OctInt
+    @testset "OctInt numeric limits for different types" begin
+        @test octint("0o377")  === UInt8(0xff)
+        @test octint("0o400") === UInt16(0x100)
+        @test octint("0o177777") === UInt16(0xffff)
+        @test octint("0o200000") === UInt32(0x10000)
+        @test octint("0o37777777777") === UInt32(0xffffffff)
+        @test octint("0o40000000000") === UInt64(0x100000000)
+        @test octint("0o1777777777777777777777") === UInt64(0xffffffffffffffff)
+        @test octint("0o2000000000000000000000") === UInt128(0x10000000000000000)
+        @test octint("0o3777777777777777777777777777777777777777777") === UInt128(0xffffffffffffffffffffffffffffffff)
+        @test (n = octint("0o4000000000000000000000000000000000000000000");
+               n isa BigInt && n == big"0x100000000000000000000000000000000")
+    end
+    @testset "OctInt string length limits for different types" begin
+        @test octint("0o000")  === UInt8(0)
+        @test octint("0o0000")  === UInt16(0)
+        @test octint("0o000000")  === UInt16(0)
+        @test octint("0o0000000")  === UInt32(0)
+        @test octint("0o00000000000") === UInt32(0)
+        @test octint("0o000000000000") === UInt64(0)
+        @test octint("0o0000000000000000000000") === UInt64(0)
+        @test octint("0o00000000000000000000000") === UInt128(0)
+        @test octint("0o0000000000000000000000000000000000000000000") === UInt128(0)
+        @test (n = octint("0o00000000000000000000000000000000000000000000");
+               n isa BigInt && n == 0)
+    end
+
+    @testset "Underscore separators" begin
+        @test parse_int_literal("10_000") === 10000
+        @test parse_uint_literal("0xff_ff",     K"HexInt")  === 0xffff
+        @test parse_uint_literal("0b1111_1111", K"BinInt")  === 0xff
+        @test parse_uint_literal("0o177_777",   K"OctInt")  === 0xffff
+    end
+
+    @testset "\\minus ('\\u2212' / '−') allowed in numbers" begin
+        @test parse_int_literal("−10")  === -10
+    end
+end
+
+function unesc(str, firstind=firstindex(str), endind=lastindex(str)+1; diagnostics=false)
+    io = IOBuffer()
+    ds = JuliaSyntax.Diagnostic[]
+    unescape_julia_string(io, Vector{UInt8}(str), firstind, endind, ds)
+    if diagnostics
+        ds
+    else
+        @test isempty(ds)
+        String(take!(io))
+    end
+end
+
+@testset "String unescaping" begin
+    # offsets
+    @test unesc("abcd", 1, 3) == "ab"
+    @test unesc("abcd", 2, 4) == "bc"
+    @test unesc("abcd", 3, 5) == "cd"
+
+    # Allowed escapes of delimiters and dollar sign
+    @test unesc("\\\\") == "\\"
+    @test unesc("\\\"") == "\""
+    @test unesc("\\\$") == "\$"
+    @test unesc("\\'")  == "\'"
+    @test unesc("\\`")  == "`"
+
+    # Newline normalization
+    @test unesc("a\nb\rc\r\nd") == "a\nb\nc\nd"
+
+    # Invalid escapes
+    @test !isempty(unesc("\\.", diagnostics=true))
+    @test !isempty(unesc("\\z", diagnostics=true))
+
+    # Standard C escape sequences
+    @test codeunits(unesc("\\n\\t\\r\\e\\b\\f\\v\\a")) ==
+        UInt8[0x0a, 0x09, 0x0d, 0x1b, 0x08, 0x0c, 0x0b, 0x07]
+
+    # Hex and unicode escapes; \x \u and \U
+    @test unesc("x\\x61x") == "xax"
+    @test unesc("x\\u03b1x") == "xαx"
+    @test unesc("x\\U001F604x") == "x😄x"
+    # Maximum unicode code point
+    @test unesc("x\\U10ffffx") == "x\U10ffffx"
+    @test !isempty(unesc("x\\U110000x", diagnostics=true))
+
+    # variable-length octal
+    @test unesc("x\\7x") == "x\ax"
+    @test unesc("x\\77x") == "x?x"
+    @test unesc("x\\141x") == "xax"
+    @test unesc("x\\377x") == "x\xffx"
+    @test !isempty(unesc("x\\400x", diagnostics=true))
+end
+
+function unesc_raw(str, is_cmd)
+    io = IOBuffer()
+    JuliaSyntax.unescape_raw_string(io, Vector{UInt8}(str),
+                                    firstindex(str), lastindex(str)+1, is_cmd)
+    return String(take!(io))
+end
+
+@testset "Raw string unescaping" begin
+    # " delimited
+    # x\"x ==> x"x
+    @test unesc_raw("x\\\"x",     false) == "x\"x"
+    # x\`x ==> x\`x
+    @test unesc_raw("x\\`x",      false) == "x\\`x"
+    # x\\\"x ==> x\"x
+    @test unesc_raw("x\\\\\\\"x", false) == "x\\\"x"
+    # x\\\`x ==> x\\\`x
+    @test unesc_raw("x\\\\\\`x",  false) == "x\\\\\\`x"
+    # '\\ ' ==> '\\ '
+    @test unesc_raw("\\\\ ",      false) == "\\\\ "
+    # '\\' ==> '\'
+    @test unesc_raw("\\\\",       false) == "\\"
+    # '\\\\' ==> '\\'
+    @test unesc_raw("\\\\\\\\",   false) == "\\\\"
+
+    # ` delimited
+    # x\"x ==> x\"x
+    @test unesc_raw("x\\\"x",     true) == "x\\\"x"
+    # x\`x ==> x`x
+    @test unesc_raw("x\\`x",      true)  == "x`x"
+    # x\\\"x ==> x\"x
+    @test unesc_raw("x\\\\\\\"x", true) == "x\\\\\\\"x"
+    # x\\\`x ==> x\`x
+    @test unesc_raw("x\\\\\\`x",  true) == "x\\`x"
+    # '\\ ' ==> '\\ '
+    @test unesc_raw("\\\\ ",      true) == "\\\\ "
+end
+
+@testset "Normalization of identifiers" begin
+    # NFC normalization
+    # https://github.com/JuliaLang/julia/issues/5434
+    # https://github.com/JuliaLang/julia/pull/19464
+    @test JuliaSyntax.normalize_identifier("\u0069\u0302") == "\u00ee"
+
+    # Special Julia normalization
+    # https://github.com/JuliaLang/julia/pull/42561
+    @test JuliaSyntax.normalize_identifier("julia\u025B\u00B5\u00B7\u0387\u2212") ==
+        "julia\u03B5\u03BC\u22C5\u22C5\u002D"
+
+    # https://github.com/JuliaLang/julia/issues/48870
+    # ℏ -> ħ
+    @test JuliaSyntax.normalize_identifier("\u210f") == "\u0127"
+end
diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl
new file mode 100644
index 0000000000000..b5b08f488a918
--- /dev/null
+++ b/JuliaSyntax/test/parse_packages.jl
@@ -0,0 +1,79 @@
+# Full-scale parsing tests of JuliaSyntax itself, Julia Base, etc.
+
+juliasyntax_dir = joinpath(@__DIR__, "..")
+@testset "Parse JuliaSyntax" begin
+    test_parse_all_in_path(joinpath(juliasyntax_dir, "src"))
+end
+@testset "Parse JuliaSyntax tests" begin
+    test_parse_all_in_path(joinpath(juliasyntax_dir, "test"))
+end
+
+base_path = let
+    p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base")
+    if !isdir(p)
+        # For julia 1.9 images.
+        p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "src", "base")
+        if !isdir(p)
+            error("source for Julia base not found")
+        end
+    end
+    p
+end
+@testset "Parse Base at $base_path" begin
+    test_parse_all_in_path(base_path) do f
+        if endswith(f, "gmp.jl")
+            # Loose comparison due to `f(::g(w) = z) = a` syntax
+            return exprs_roughly_equal
+        end
+        return exprs_equal_no_linenum
+    end
+end
+
+base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")
+@testset "Parse Base tests at $base_tests_path" begin
+    test_parse_all_in_path(base_tests_path) do f
+        # In julia-1.6, test/copy.jl had spurious syntax which became the
+        # multidimensional array syntax in 1.7.
+        if endswith(f, "copy.jl") && v"1.6" <= VERSION < v"1.7"
+            return nothing
+        end
+
+        # syntax.jl has some intentionally weird syntax which we parse
+        # differently than the flisp parser, and some cases which we've
+        # decided are syntax errors.
+        if endswith(f, "syntax.jl")
+            return nothing
+        end
+
+        if endswith(f, "core.jl")
+            # The test
+            # @test Union{Tuple{T}, Tuple{T,Int}} where {T} === widen_diagonal(Union{Tuple{T}, Tuple{T,Int}} where {T})
+            # depends on a JuliaSyntax bugfix and parses differently (wrong) using
+            # flisp. This was added in julia#52228 and backported in julia#52045
+            if v"1.10.0-rc1.39" <= VERSION
+                return nothing
+            else
+                # Loose comparison due to `for f() = 1:3` syntax
+                return exprs_roughly_equal
+            end
+        end
+
+        # subtype.jl also depends on the where precedence JuliaSyntax bugfix as of julia#53034
+        if endswith(f, "subtype.jl") && v"1.11.0-DEV.1382" <= VERSION
+            return nothing
+        end
+
+        return exprs_equal_no_linenum
+    end
+end
+
+@testset "Parse Julia stdlib at $(Sys.STDLIB)" begin
+    for stdlib in readdir(Sys.STDLIB)
+        fulldir = joinpath(Sys.STDLIB, stdlib)
+        if isdir(fulldir)
+            @testset "Parse $stdlib" begin
+                test_parse_all_in_path(joinpath(Sys.STDLIB, fulldir))
+            end
+        end
+    end
+end
diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl
new file mode 100644
index 0000000000000..cda8443be113a
--- /dev/null
+++ b/JuliaSyntax/test/parse_stream.jl
@@ -0,0 +1,168 @@
+# Prototype ParseStream interface
+#
+# Here we test the ParseStream interface, by taking input code and checking
+# that the correct sequence of emit() and bump() produces a valid parse tree.
+
+using .JuliaSyntax: ParseStream,
+    peek, peek_token,
+    bump, bump_trivia, bump_invisible,
+    emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG,
+    ParseStreamPosition, first_child_position, last_child_position,
+    parsestmt
+
+# Here we manually issue parse events in the order the Julia parser would issue
+# them
+@testset "ParseStream" begin
+    code = """
+    for i = 1:10
+        xx[i] + 2
+        # hi
+        yy
+    end
+    """
+    st = ParseStream(code)
+
+    p1 = position(st)
+        @test peek(st) == K"for"
+        bump(st, TRIVIA_FLAG)
+        p2 = position(st)
+            @test peek(st) == K"Identifier"    # 'i'
+            bump(st)
+            @test peek(st) == K"="
+            bump(st, TRIVIA_FLAG)
+            p3 = position(st)
+                @test peek(st) == K"Integer"   # 1
+                bump(st)
+                @test peek(st) == K":"
+                bump(st) # :
+                @test peek(st) == K"Integer"   # 10
+                bump(st) # 10
+            emit(st, p3, K"call", INFIX_FLAG)
+        emit(st, p2, K"=")
+        @test peek(st) == K"NewlineWs"
+        bump(st, TRIVIA_FLAG)
+        p4 = position(st)
+            p5 = position(st) # [call]
+                p6 = position(st) # [ref]
+                    @test peek(st) == K"Identifier" # 'xx'
+                    bump(st)
+                    @test peek(st) == K"["
+                    bump(st, TRIVIA_FLAG)
+                    @test peek(st) == K"Identifier" # 'i'
+                    bump(st)
+                    @test peek(st) == K"]"
+                    bump(st, TRIVIA_FLAG)
+                emit(st, p6, K"ref")
+                @test peek(st) == K"+"
+                bump(st)
+                @test peek(st) == K"Integer"        # 2
+                bump(st)
+            emit(st, p5, K"call", INFIX_FLAG)
+            @test peek(st) == K"NewlineWs"
+            bump(st, TRIVIA_FLAG)
+            @test peek(st) == K"NewlineWs"
+            bump(st, TRIVIA_FLAG)
+            @test peek(st) == K"Identifier" # 'yy'
+            bump(st)
+        emit(st, p4, K"block")
+        @test peek(st) == K"NewlineWs"
+        bump(st, TRIVIA_FLAG)
+        bump(st, TRIVIA_FLAG) # end
+    emit(st, p1, K"for")
+    @test peek(st) == K"NewlineWs"
+    bump(st, TRIVIA_FLAG)
+    emit(st, p1, K"toplevel")
+end
+
+@testset "ParseStream constructors" begin
+    @testset "Byte buffer inputs" begin
+        # Vector{UInt8}
+        let
+            st = ParseStream(Vector{UInt8}("x+y"))
+            bump(st)
+            @test build_tree(Expr, st) == :x
+            @test JuliaSyntax.last_byte(st) == 1
+        end
+        let
+            st = ParseStream(Vector{UInt8}("x+y"), 3)
+            bump(st)
+            @test build_tree(Expr, st) == :y
+            @test JuliaSyntax.last_byte(st) == 3
+        end
+        # Ptr{UInt8}, len
+        code = "x+y"
+        GC.@preserve code begin
+            let
+                st = ParseStream(pointer(code), 3)
+                bump(st)
+                @test build_tree(Expr, st) == :x
+                @test JuliaSyntax.last_byte(st) == 1
+            end
+        end
+    end
+end
+
+@testset "ParseStream tree traversal" begin
+    # NB: ParseStreamPosition.node_index includes an initial sentinel token so
+    # indices here are one more than "might be expected". Additionally, note that
+    # the byte index points to the first byte after the token.
+    st = parse_sexpr("((a b) c)")
+    child1_pos = first_child_position(st, position(st))
+    @test child1_pos == ParseStreamPosition(7, 8)
+    @test first_child_position(st, child1_pos) == ParseStreamPosition(4, 4)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(9, 10)
+    @test last_child_position(st, child1_pos) == ParseStreamPosition(6, 6)
+
+    st = parse_sexpr("( (a b) c)")
+    child1_pos = first_child_position(st, position(st))
+    @test child1_pos == ParseStreamPosition(8, 9)
+    @test first_child_position(st, child1_pos) == ParseStreamPosition(5, 5)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(10, 11)
+    @test last_child_position(st, child1_pos) == ParseStreamPosition(7, 7)
+
+    st = parse_sexpr("(a (b c))")
+    @test first_child_position(st, position(st)) == ParseStreamPosition(3, 3)
+    child2_pos = last_child_position(st, position(st))
+    @test child2_pos == ParseStreamPosition(9, 10)
+    @test first_child_position(st, child2_pos) == ParseStreamPosition(6, 6)
+    @test last_child_position(st, child2_pos) == ParseStreamPosition(8, 8)
+
+    st = parse_sexpr("( a (b c))")
+    @test first_child_position(st, position(st)) == ParseStreamPosition(4, 4)
+    child2_pos = last_child_position(st, position(st))
+    @test child2_pos == ParseStreamPosition(10, 11)
+    @test first_child_position(st, child2_pos) == ParseStreamPosition(7, 7)
+    @test last_child_position(st, child2_pos) == ParseStreamPosition(9, 9)
+
+    st = parse_sexpr("a (b c)")
+    @test first_child_position(st, position(st)) == ParseStreamPosition(5, 5)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(7, 7)
+
+    st = parse_sexpr("(a) (b c)")
+    @test first_child_position(st, position(st)) == ParseStreamPosition(7, 8)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(9, 10)
+
+    st = parse_sexpr("(() ())")
+    @test first_child_position(st, position(st)) == ParseStreamPosition(4, 5)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(7, 9)
+end
+
+@testset "SubString{GenericString} (issue #505)" begin
+    x = Test.GenericString("1 2")
+    @test x == "1 2"
+    y = split(x)[1]
+    @test y == "1"
+    @test y isa SubString{GenericString}
+    @test ParseStream(y) isa ParseStream
+    @test parsestmt(Expr, y) == parsestmt(Expr, "1")
+end
+
+@testset "peek_behind_pos with negative byte index" begin
+    # Test that peek_behind_pos doesn't cause InexactError when byte_idx goes negative
+    # This can happen when parsing certain incomplete keywords like "do"
+    # where trivia skipping walks back past the beginning of the stream
+    @test_throws JuliaSyntax.ParseError parseall(GreenNode, "do")
+    @test_throws JuliaSyntax.ParseError parseall(GreenNode, "do ")
+    @test_throws JuliaSyntax.ParseError parseall(GreenNode, " do")
+    @test_throws JuliaSyntax.ParseError parseall(GreenNode, "do\n")
+end
diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl
new file mode 100644
index 0000000000000..2b3e106e1dae4
--- /dev/null
+++ b/JuliaSyntax/test/parser.jl
@@ -0,0 +1,1248 @@
+"""
+Parse string to SyntaxNode tree and show as an sexpression
+"""
+function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", show_kws...)
+    stream = ParseStream(code, version=v)
+    production(ParseState(stream))
+    JuliaSyntax.validate_tokens(stream)
+    s = build_tree(SyntaxNode, stream, keep_parens=true)
+    return sprint(io->show(io, MIME("text/x.sexpression"), s; show_kws...))
+end
+
+function test_parse(production, input, expected)
+    if !(input isa AbstractString)
+        opts, input = input
+    else
+        opts = NamedTuple()
+    end
+    parsed = parse_to_sexpr_str(production, input; opts...)
+    if expected isa Regex # Could be AbstractPattern, but that type was added in Julia 1.6.
+        @test match(expected, parsed) !== nothing
+    else
+        @test parsed == expected
+    end
+end
+
+function test_parse(inout::Pair)
+    test_parse(JuliaSyntax.parse_toplevel, inout...)
+end
+
+PARSE_ERROR = r"\(error-t "
+
+with_version(v::VersionNumber, (i,o)::Pair) = ((;v=v), i) => o
+
+# TODO:
+# * Extract the following test cases from the source itself.
+# * Use only the green tree to generate the S-expressions
+#   (add flag annotations to heads)
+tests = [
+    JuliaSyntax.parse_toplevel => [
+        "a \n b"     =>  "(toplevel a b)"
+        "a;b \n c;d" =>  "(toplevel (toplevel-; a b) (toplevel-; c d))"
+        "a \n \n"    =>  "(toplevel a)"
+        ""           =>  "(toplevel)"
+    ],
+    JuliaSyntax.parse_block => [
+        "a;b;c"   => "(block a b c)"
+        "a;;;b;;" => "(block a b)"
+        ";a"      => "(block a)"
+        "\n a"    => "(block a)"
+        "a\nb"    => "(block a b)"
+    ],
+    JuliaSyntax.parse_stmts => [
+        "a;b;c"   => "(toplevel-; a b c)"
+        "a;;;b;;" => "(toplevel-; a b)"
+        """ "x" a ; "y" b """ =>
+            """(toplevel-; (doc (string "x") a) (doc (string "y") b))"""
+        "x y"  =>  "(wrapper x (error-t y))"
+    ],
+    JuliaSyntax.parse_eq => [
+        # parse_assignment
+        "a = b"       =>  "(= a b)"
+        "a .= b"      =>  "(.= a b)"
+        "a += b"      =>  "(op= a + b)"
+        "a .+= b"     =>  "(.op= a + b)"
+        "a, b = c, d" =>  "(= (tuple a b) (tuple c d))"
+        "x, = xs"     =>  "(= (tuple x) xs)"
+        "[a ~b]"      =>  "(hcat a (call-pre ~ b))"
+        "a ~ b"       =>  "(call-i a ~ b)"
+        "a .~ b"      =>  "(dotcall-i a ~ b)"
+        "[a ~ b c]"   =>  "(hcat (call-i a ~ b) c)"
+        "[a~b]"       =>  "(vect (call-i a ~ b))"
+        "f(x) .= 1"   =>  "(.= (call f x) 1)"
+        "::g() = 1"   =>  "(= (::-pre (call g)) 1)"
+        "f(x) = 1"    =>  "(function-= (call f x) 1)"
+        "f(x)::T = 1" =>  "(function-= (::-i (call f x) T) 1)"
+        "f(x) where S where U = 1" =>  "(function-= (where (where (call f x) S) U) 1)"
+        "(f(x)::T) where S = 1" =>  "(function-= (where (parens (::-i (call f x) T)) S) 1)"
+        "f(x) = 1 = 2"    =>  "(function-= (call f x) (= 1 2))" # Should be a warning!
+    ],
+    JuliaSyntax.parse_pair => [
+        "a => b"  =>  "(call-i a => b)"
+        "a .=> b" => "(dotcall-i a => b)"
+    ],
+    JuliaSyntax.parse_cond => [
+        "a ? b : c"   => "(? a b c)"
+        "a ?\nb : c"  => "(? a b c)"
+        "a ? b :\nc"  => "(? a b c)"
+        "a ? b : c:d" =>   "(? a b (call-i c : d))"
+        # Following are errors but should recover
+        "a? b : c"    => "(? a (error-t) b c)"
+        "a ?b : c"    => "(? a (error-t) b c)"
+        "a ? b: c"    => "(? a b (error-t) c)"
+        "a ? b :c"    => "(? a b (error-t) c)"
+        "a ? b c"     => "(? a b (error-t) c)"
+        "A[x ? y : end]" => "(ref A (? x y end))"
+    ],
+    JuliaSyntax.parse_arrow => [
+        "x → y"     =>  "(call-i x → y)"
+        "x <--> y"  =>  "(call-i x <--> y)"
+        "x --> y"   =>  "(--> x y)"
+        "x .--> y"  =>  "(dotcall-i x --> y)"
+        "x -->₁ y"  =>  "(call-i x -->₁ y)"
+    ],
+    JuliaSyntax.parse_or => [
+        "x || y || z" => "(|| x (|| y z))"
+        ((v=v"1.6",), "x .|| y") => "(error (.|| x y))"
+        ((v=v"1.7",), "x .|| y") => "(.|| x y)"
+    ],
+    JuliaSyntax.parse_and => [
+        "x && y && z" => "(&& x (&& y z))"
+        ((v=v"1.6",), "x .&& y") => "(error (.&& x y))"
+        ((v=v"1.7",), "x .&& y") => "(.&& x y)"
+    ],
+    JuliaSyntax.parse_comparison => [
+        # Type comparisons are syntactic
+        "x <: y"      => "(<: x y)"
+        "x >: y"      => "(>: x y)"
+        # Normal binary comparisons
+        "x < y"       => "(call-i x < y)"
+        "x .< y"      => "(dotcall-i x < y)"
+        "x .<: y"     => "(dotcall-i x <: y)"
+        ":. == :."    => "(call-i (quote-: .) == (quote-: .))"
+        # Comparison chains
+        "x < y < z"   => "(comparison x < y < z)"
+        "x == y < z"  => "(comparison x == y < z)"
+        "x .< y .< z" => "(comparison x (. <) y (. <) z)"
+        "x .< y < z"  => "(comparison x (. <) y < z)"
+    ],
+    JuliaSyntax.parse_pipe_lt => [
+        "x <| y <| z" => "(call-i x <| (call-i y <| z))"
+    ],
+    JuliaSyntax.parse_pipe_gt => [
+        "x |> y |> z" => "(call-i (call-i x |> y) |> z)"
+        "x .|> y"     => "(dotcall-i x |> y)"
+    ],
+    JuliaSyntax.parse_range => [
+        "1:2"       => "(call-i 1 : 2)"
+        "1:2:3"     => "(call-i 1 : 2 3)"
+        "a:b:c:d:e" => "(call-i (call-i a : b c) : d e)"
+        "a :< b"    => "(call-i a (error : <) b)"
+        "1:\n2"     => "(call-i 1 : (error))"
+    ],
+    JuliaSyntax.parse_range => [
+        "a..b"       => "(call-i a .. b)"
+        "a … b"      => "(call-i a … b)"
+        "a .… b"     => "(dotcall-i a … b)"
+        "[1 :a]"     => "(hcat 1 (quote-: a))"
+        "[1 2:3 :a]" =>  "(hcat 1 (call-i 2 : 3) (quote-: a))"
+        "x..."     => "(... x)"
+        "x:y..."   => "(... (call-i x : y))"
+        "x..y..."  => "(... (call-i x .. y))"
+    ],
+    JuliaSyntax.parse_invalid_ops => [
+        "a--b"  =>  "(call-i a (ErrorInvalidOperator) b)"
+    ],
+    JuliaSyntax.parse_expr => [
+        "a - b - c"  => "(call-i (call-i a - b) - c)"
+        "a + b + c"  => "(call-i a + b c)"
+        "a + b .+ c" => "(dotcall-i (call-i a + b) + c)"
+        # parse_with_chains:
+        # The following is two elements of a hcat
+        "[x +y]"     =>  "(hcat x (call-pre + y))"
+        "[x+y +z]"   =>  "(hcat (call-i x + y) (call-pre + z))"
+        # Conversely the following are infix calls
+        "[x +₁y]"    =>  "(vect (call-i x +₁ y))"
+        "[x+y+z]"    =>  "(vect (call-i x + y z))"
+        "[x+y + z]"  =>  "(vect (call-i x + y z))"
+        # Dotted and normal operators
+        "a +₁ b +₁ c" =>  "(call-i (call-i a +₁ b) +₁ c)"
+        "a .+ b .+ c" =>  "(dotcall-i (dotcall-i a + b) + c)"
+    ],
+    JuliaSyntax.parse_term => [
+        "a * b * c"  => "(call-i a * b c)"
+        "a .* b"     => "(dotcall-i a * b)"
+        "-2*x"       => "(call-i -2 * x)"
+    ],
+    JuliaSyntax.parse_rational => [
+        "x // y // z" => "(call-i (call-i x // y) // z)"
+    ],
+    JuliaSyntax.parse_shift => [
+        "x >> y >> z" => "(call-i (call-i x >> y) >> z)"
+    ],
+    JuliaSyntax.parse_juxtapose => [
+        "2x"         => "(juxtapose 2 x)"
+        "2x"         => "(juxtapose 2 x)"
+        "2(x)"       => "(juxtapose 2 (parens x))"
+        "(2)(3)x"    => "(juxtapose (parens 2) (parens 3) x)"
+        "(x-1)y"     => "(juxtapose (parens (call-i x - 1)) y)"
+        "x'y"        => "(juxtapose (call-post x ') y)"
+        "1√x"        =>  "(juxtapose 1 (call-pre √ x))"
+        # errors
+        "\"a\"\"b\"" => "(juxtapose (string \"a\") (error-t) (string \"b\"))"
+        "\"a\"x"     => "(juxtapose (string \"a\") (error-t) x)"
+        "\"\$y\"x"   => "(juxtapose (string y) (error-t) x)"
+        "\"a\"begin end" => "(juxtapose (string \"a\") (error-t) (block))"
+        # Not juxtaposition - parse_juxtapose will consume only the first token.
+        "x.3"       =>  "x"
+        "f(2)2"     =>  "(call f 2)"
+        "x' y"      =>  "(call-post x ')"
+        "x 'y"      =>  "x"
+        "x@y"       =>  "x"
+        "(begin end)x" => "(parens (block))"
+    ],
+    JuliaSyntax.parse_unary => [
+        ":T"       => "(quote-: T)"
+        "in::T"    => "(::-i in T)"
+        "isa::T"   => "(::-i isa T)"
+        "-2^x"     => "(call-pre - (call-i 2 ^ x))"
+        "-2[1, 3]" => "(call-pre - (ref 2 1 3))"
+        # signed literals
+        "-2"       => "-2"
+        "+2.0"     => "2.0"
+        "-1.0f0"   => "-1.0f0"
+        "-0xf.0p0" => "-15.0"
+        "+0b10010" => "0x12"
+        "+0o22"    => "0x12"
+        "+0x12"    => "0x12"
+        "-0b10010" => "(call-pre - 0x12)"
+        "-0o22"    => "(call-pre - 0x12)"
+        "-0x12"    => "(call-pre - 0x12)"
+        "-1::T"    => "(::-i -1 T)"
+        # Standalone dotted operators are parsed as (|.| op)
+        ".+"   =>  "(. +)"
+        ".+\n" =>  "(. +)"
+        ".+ =" =>  "(. +)"
+        ".+)"  =>  "(. +)"
+        ".&"   =>  "(. &)"
+        # Standalone non-dotted operators
+        "+)"   =>  "+"
+        # Call with type parameters or non-unary prefix call
+        "+{T}(x::T)"  =>  "(call (curly + T) (::-i x T))"
+        "*(x)"        =>  "(call * x)"
+        ".*(x)"       =>  "(call (. *) x)"
+        # Prefix function calls for operators which are both binary and unary
+        "+(a,b)"   =>  "(call + a b)"
+        "+(a,)"    =>  "(call-, + a)"
+        ".+(a,)"   =>  "(call-, (. +) a)"
+        "(.+)(a)"  =>  "(call (parens (. +)) a)"
+        "(.~(a))"  =>  "(parens (dotcall-pre ~ (parens a)))"
+        "+(a=1,)"  =>  "(call-, + (= a 1))"
+        "+(a...)"  =>  "(call + (... a))"
+        "+(a;b,c)" =>  "(call + a (parameters b c))"
+        "+(;a)"    =>  "(call + (parameters a))"
+        "+(;;a)"   =>  "(call + (parameters) (parameters a))"
+        "+()"      =>  "(call +)"
+        "+(\n;a)"  =>  "(call + (parameters a))"
+        "+(;)"     =>  "(call + (parameters))"
+        "+(\n;\n)" =>  "(call + (parameters))"
+        "+(\n)"    =>  "(call +)"
+        # Whitespace not allowed before prefix function call bracket
+        "+ (a,b)"  =>  "(call + (error) a b)"
+        # Prefix calls have higher precedence than ^
+        "+(a,b)^2"  =>  "(call-i (call + a b) ^ 2)"
+        "+(a,b)(x)^2"  =>  "(call-i (call (call + a b) x) ^ 2)"
+        "<:(a,)"  =>  "(<:-, a)"
+        # Unary function calls with brackets as grouping, not an arglist
+        ".+(a)"   =>  "(dotcall-pre + (parens a))"
+        "+(a;b)"  =>  "(call-pre + (block-p a b))"
+        "+(;;)"   =>  "(call-pre + (block-p))"
+        "+(;;)"   =>  "(call-pre + (block-p))"
+        "+(a;)"   =>  "(call-pre + (block-p a))"
+        "+(a;;)"  =>  "(call-pre + (block-p a))"
+        "+(\n;\n;\n)" =>  "(call-pre + (block-p))"
+        "+(a=1)"  =>  "(call-pre + (parens (= a 1)))"
+        # Unary operators have lower precedence than ^
+        "+(a)^2"  =>  "(call-pre + (call-i (parens a) ^ 2))"
+        ".+(a)^2" =>  "(dotcall-pre + (call-i (parens a) ^ 2))"
+        "+(a)(x,y)^2"  =>  "(call-pre + (call-i (call (parens a) x y) ^ 2))"
+        "<:(a)"   =>  "(<:-pre (parens a))"
+        # Normal unary calls
+        "+x" => "(call-pre + x)"
+        "√x" => "(call-pre √ x)"
+        ".~x" => "(dotcall-pre ~ x)"
+        # Things which are not quite negative literals
+        "-0x1"=> "(call-pre - 0x01)"
+        "- 2" => "(call-pre - 2)"
+        ".-2" => "(dotcall-pre - 2)"
+        # Not a unary operator
+        "/x"     => "(call-pre (error /) x)"
+        "+₁ x"   => "(call-pre (error +₁) x)"
+        ".<: x"  => "(dotcall-pre (error (. <:)) x)"
+        "?\"str\"" => """(call-pre (error ?) (string "str"))"""
+    ],
+    JuliaSyntax.parse_factor => [
+        "x^y"      =>  "(call-i x ^ y)"
+        "x^y^z"    =>  "(call-i x ^ (call-i y ^ z))"
+        "x .^ y"   =>  "(dotcall-i x ^ y)"
+        "begin x end::T"  =>  "(::-i (block x) T)"
+        # parse_decl_with_initial_ex
+        "a::b"     =>  "(::-i a b)"
+        "a::b::c"  =>  "(::-i (::-i a b) c)"
+        "a->b"     =>  "(-> (tuple a) b)"
+        "(a,b)->c" =>  "(-> (tuple-p a b) c)"
+        "(a;b=1)->c" =>  "(-> (tuple-p a (parameters (= b 1))) c)"
+        "x::T->c"  =>  "(-> (tuple (::-i x T)) c)"
+        "\$a->b"   =>  "(-> (tuple (\$ a)) b)"
+        "\$(a)->b" =>  "(-> (tuple (\$ (parens a))) b)"
+        # FIXME "&(a)->b"  =>  "(-> (tuple-p (& (parens a))) b)"
+        # FIXME "::(a)->b" =>  "(-> (tuple-p (:: (parens a))) b)"
+        # `where` combined with `->` still parses strangely. However:
+        # * It's extra hard to add a tuple around the `x` in this syntax corner case.
+        # * The user already needs to add additional, ugly, parens to get this
+        #   to parse correctly because the precedence of `where` is
+        #   inconsistent with `::` and `->` in this case.
+        "(x where T)->c" => "(-> (parens (where x T)) c)"
+        "((x::T) where T)->c" => "(-> (parens (where (parens (::-i x T)) T)) c)"
+    ],
+    JuliaSyntax.parse_unary_subtype => [
+        "<: )"    =>  "<:"
+        "<: \n"   =>  "<:"
+        "<: ="    =>  "<:"
+        "<:{T}(x::T)"   =>  "(call (curly <: T) (::-i x T))"
+        "<:(x::T)"      =>  "(<:-pre (parens (::-i x T)))"
+        "<: x"          =>  "(<:-pre x)"
+        "<: <: x"       =>  "(<:-pre (<:-pre x))"
+        "<: A where B"  =>  "(<:-pre (where A B))"
+        # FIXME: The following bizarre precedence seems broken, but is
+        # compatible with the reference parser (see #248)
+        "+ <: A where B"  =>  "(where (call-pre + (<:-pre A)) B)"
+        # Really for parse_where
+        "x where \n {T}"  =>  "(where x (braces T))"
+        "x where {T,S}"  =>  "(where x (braces T S))"
+        "x where {T,S,}" =>  "(where x (braces-, T S))"
+        "x where {T S}"  =>  "(where x (bracescat (row T S)))"
+        "x where {y for y in ys}"  =>  "(where x (braces (generator y (iteration (in y ys)))))"
+        "x where T"  =>  "(where x T)"
+        "x where \n T"  =>  "(where x T)"
+        "x where T<:S"  =>  "(where x (<: T S))"
+        # nested unary and unary-syntactic ops
+        "<: + <: + A" => "(<:-pre (call-pre + (<:-pre (call-pre + A))))"
+        "* <: A"      => "(call-pre (error *) (<:-pre A))"
+    ],
+    JuliaSyntax.parse_unary_prefix => [
+        "&)"   => "&"
+        "\$\n" => "\$"
+        "&a"   => "(& a)"
+        "::a"  => "(::-pre a)"
+        "\$a"  => "(\$ a)"
+        "\$\$a"  => "(\$ (\$ a))"
+    ],
+    JuliaSyntax.parse_call => [
+        # parse_call
+        "f(x)"    =>  "(call f x)"
+        "\$f(x)"  =>  "(call (\$ f) x)"
+        ".&(x,y)" =>  "(call (. &) x y)"
+        # parse_call_chain
+        "f(a).g(b)" => "(call (. (call f a) g) b)"
+        "\$A.@x"    =>  "(macrocall (. (\$ A) (macro_name x)))"
+
+        # non-errors in space sensitive contexts
+        "[f (x)]"    =>  "(hcat f (parens x))"
+        "[f x]"      =>  "(hcat f x)"
+        # space separated macro calls
+        "@foo a b"     =>  "(macrocall (macro_name foo) a b)"
+        "@foo (x)"     =>  "(macrocall (macro_name foo) (parens x))"
+        "@foo (x,y)"   =>  "(macrocall (macro_name foo) (tuple-p x y))"
+        "A.@foo a b"   =>  "(macrocall (. A (macro_name foo)) a b)"
+        "@A.foo a b"   =>  "(macrocall (macro_name (. A foo)) a b)"
+        "[@foo x]"     =>  "(vect (macrocall (macro_name foo) x))"
+        "[@foo]"       =>  "(vect (macrocall (macro_name foo)))"
+        "@var\"#\" a"  =>  "(macrocall (macro_name (var #)) a)"
+        "@(A) x"       =>  "(macrocall (macro_name (parens A)) x)"
+        "A.@x y"       =>  "(macrocall (. A (macro_name x)) y)"
+        "A.@var\"#\" a"=>  "(macrocall (. A (macro_name (var #))) a)"
+        "@+x y"        =>  "(macrocall (macro_name +) x y)"
+        "A.@.x"        =>  "(macrocall (. A (macro_name .)) x)"
+        # Macro names
+        "@! x"  => "(macrocall (macro_name !) x)"
+        "@.. x" => "(macrocall (macro_name ..) x)"
+        "@\$ y"  => "(macrocall (macro_name \$) y)"
+        "@[x] y z" => "(macrocall (macro_name (error (vect x))) y z)"
+        # Special @doc parsing rules
+        "@doc x\ny"    =>  "(macrocall (macro_name doc) x y)"
+        "A.@doc x\ny"  =>  "(macrocall (. A (macro_name doc)) x y)"
+        "@A.doc x\ny"  =>  "(macrocall (macro_name (. A doc)) x y)"
+        "@doc x y\nz"  =>  "(macrocall (macro_name doc) x y)"
+        "@doc x\n\ny"  =>  "(macrocall (macro_name doc) x)"
+        "@doc x\nend"  =>  "(macrocall (macro_name doc) x)"
+
+        # Special 1.14 @VERSION parsing rules
+        ((v=v"1.13",), "@VERSION")        =>  "(macrocall (macro_name VERSION))"
+        ((v=v"1.13",), "@A.B.VERSION")    =>  "(macrocall (macro_name (. (. A B) VERSION)))"
+        ((v=v"1.13",), "A.B.@VERSION")     =>  "(macrocall (. (. A B) (macro_name VERSION)))"
+        ((v=v"1.14",), "@VERSION")        =>  "(macrocall (macro_name VERSION) v\"1.14.0\")"
+        ((v=v"1.14",), "@A.B.VERSION")    =>  "(macrocall (macro_name (. (. A B) VERSION)) v\"1.14.0\")"
+        ((v=v"1.14",), "A.B.@VERSION")     =>  "(macrocall (. (. A B) (macro_name VERSION)) v\"1.14.0\")"
+
+        # calls with brackets
+        "f(a,b)"  => "(call f a b)"
+        "f(a,)"   => "(call-, f a)"
+        "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))"
+        "f(a; b; c)" => "(call f a (parameters b) (parameters c))"
+        "(a=1)()" =>  "(call (parens (= a 1)))"
+        "f (a)" => "(call f (error-t) a)"
+        "@x(a, b)"   =>  "(macrocall-p (macro_name x) a b)"
+        "@x(a, b,)"  =>  "(macrocall-p-, (macro_name x) a b)"
+        "A.@x(y)"    =>  "(macrocall-p (. A (macro_name x)) y)"
+        "A.@x(y).z"  =>  "(. (macrocall-p (. A (macro_name x)) y) z)"
+        "f(y for x = xs; a)" => "(call f (generator y (iteration (in x xs))) (parameters a))"
+        # do
+        "f() do\nend"         =>  "(call f (do (tuple) (block)))"
+        "f() do ; body end"   =>  "(call f (do (tuple) (block body)))"
+        "f() do x, y\n body end"  =>  "(call f (do (tuple x y) (block body)))"
+        "f(x) do y body end"  =>  "(call f x (do (tuple y) (block body)))"
+        "@f(x) do y body end" =>  "(macrocall-p (macro_name f) x (do (tuple y) (block body)))"
+
+        # square brackets
+        "@S[a,b]"  => "(macrocall (macro_name S) (vect a b))"
+        "@S[a b]"  => "(macrocall (macro_name S) (hcat a b))"
+        "@S[a; b]" => "(macrocall (macro_name S) (vcat a b))"
+        "A.@S[a]"  =>  "(macrocall (. A (macro_name S)) (vect a))"
+        "@S[a].b"  =>  "(. (macrocall (macro_name S) (vect a)) b)"
+        ((v=v"1.7",), "@S[a ;; b]")  =>  "(macrocall (macro_name S) (ncat-2 a b))"
+        ((v=v"1.6",), "@S[a ;; b]")  =>  "(macrocall (macro_name S) (error (ncat-2 a b)))"
+        "a[i]"  =>  "(ref a i)"
+        "a [i]"  =>  "(ref a (error-t) i)"
+        "a[i,j]"  =>  "(ref a i j)"
+        "(a=1)[]" =>  "(ref (parens (= a 1)))"
+        "a[end]"  =>  "(ref a end)"
+        "a[begin]"  =>  "(ref a begin)"
+        "a[:(end)]" => "(typed_hcat a (quote-: (parens (error-t))) (error-t))"
+        "T[x   y]"  =>  "(typed_hcat T x y)"
+        "T[x ; y]"  =>  "(typed_vcat T x y)"
+        "T[a b; c d]"  =>  "(typed_vcat T (row a b) (row c d))"
+        "T[x for x in xs]"  =>  "(typed_comprehension T (generator x (iteration (in x xs))))"
+        ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))"
+
+        # Dotted forms
+        # Allow `@` in macrocall only in first and last position
+        "A.B.@x"    =>  "(macrocall (. (. A B) (macro_name x)))"
+        "@A.B.x"    =>  "(macrocall (macro_name (. (. A B) x)))"
+        "A.@B.x"    =>  "(macrocall (. (. A (error-t) B) (macro_name (error-t) x)))"
+        "@M.(x)"    =>  "(macrocall (dotcall (macro_name M) (error-t) x))"
+        "f.(a,b)"   =>  "(dotcall f a b)"
+        "f.(a,b,)"  =>  "(dotcall-, f a b)"
+        "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))"
+        "(a=1).()" =>  "(dotcall (parens (= a 1)))"
+        "f. (x)"    =>  "(dotcall f (error-t) x)"
+        # Other dotted syntax
+        "A.:+"      =>  "(. A (quote-: +))"
+        "A.:.+"     =>  "(. A (quote-: (. +)))"
+        "A.: +"     =>  "(. A (quote-: (error-t) +))"
+        "f.\$x"     =>  "(. f (\$ x))"
+        "f.\$(x+y)" =>  "(. f (\$ (parens (call-i x + y))))"
+        "A.\$B.@x"  =>  "(macrocall (. (. A (\$ B)) (macro_name x)))"
+        "@A.\$x a"  =>  "(macrocall (macro_name (. A (error x))) a)"
+        "A.@x"      =>  "(macrocall (. A (macro_name x)))"
+        "A.@x a"    =>  "(macrocall (. A (macro_name x)) a)"
+        "@A.B.@x a" =>  "(macrocall (macro_name (. (. A B) (error-t) x)) a)"
+        # .' discontinued
+        "f.'"    =>  "(dotcall-post f (error '))"
+        # Field/property syntax
+        "f.x.y"  =>  "(. (. f x) y)"
+        "x .y"   =>  "(. x (error-t) y)"
+        "x.?"    =>  "(. x ?)"
+        "x.in"   =>  "(. x in)"
+        # Adjoint
+        "f'"  => "(call-post f ')"
+        "f'ᵀ" => "(call-post f 'ᵀ)"
+        # Curly calls
+        "S {a}"   => "(curly S (error-t) a)"
+        "A.@S{a}" => "(macrocall (. A (macro_name S)) (braces a))"
+        "@S{a,b}" => "(macrocall (macro_name S) (braces a b))"
+        "A.@S{a}" => "(macrocall (. A (macro_name S)) (braces a))"
+        "@S{a}.b" => "(. (macrocall (macro_name S) (braces a)) b)"
+        # Macro calls with chained operations
+        "@a[b][c]" => "(ref (macrocall (macro_name a) (vect b)) c)"
+        "@a{b}{c}" => "(curly (macrocall (macro_name a) (braces b)) c)"
+        "@a[b]{c}" => "(curly (macrocall (macro_name a) (vect b)) c)"
+        "@a{b}[c]" => "(ref (macrocall (macro_name a) (braces b)) c)"
+        "S{a,b}"  => "(curly S a b)"
+        "T{y for x = xs; a}" => "(curly T (generator y (iteration (in x xs))) (parameters a))"
+        # String macros
+        "x\"str\""   => """(macrocall @x_str (string-r "str"))"""
+        "x`str`"     => """(macrocall @x_cmd (cmdstring-r "str"))"""
+        "x\"\""      => """(macrocall @x_str (string-r ""))"""
+        "x``"        => """(macrocall @x_cmd (cmdstring-r ""))"""
+        "in\"str\""  => """(macrocall @in_str (string-r "str"))"""
+        "outer\"str\"" => """(macrocall @outer_str (string-r "str"))"""
+        "A.x\"str\"" => """(macrocall (. A @x_str) (string-r "str"))"""
+        "A.x`str`" => """(macrocall (. A @x_cmd) (cmdstring-r "str"))"""
+        # Triple quoted processing for custom strings
+        "r\"\"\"\nx\"\"\""        => raw"""(macrocall @r_str (string-s-r "x"))"""
+        "r\"\"\"\n x\n y\"\"\""   => raw"""(macrocall @r_str (string-s-r "x\n" "y"))"""
+        "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\\\n" "y"))"""
+        # Macro suffixes can include keywords and numbers
+        "x\"s\"y"    => """(macrocall @x_str (string-r "s") "y")"""
+        "x\"s\"end"  => """(macrocall @x_str (string-r "s") "end")"""
+        "x\"s\"in"   => """(macrocall @x_str (string-r "s") "in")"""
+        "x\"s\"2"    => """(macrocall @x_str (string-r "s") 2)"""
+        "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)"""
+        # Cmd macro suffixes
+        "x`s`y"    => """(macrocall @x_cmd (cmdstring-r "s") "y")"""
+        "x`s`end"  => """(macrocall @x_cmd (cmdstring-r "s") "end")"""
+        "x`s`in"   => """(macrocall @x_cmd (cmdstring-r "s") "in")"""
+        "x`s`2"    => """(macrocall @x_cmd (cmdstring-r "s") 2)"""
+        "x`s`10.0" => """(macrocall @x_cmd (cmdstring-r "s") 10.0)"""
+    ],
+    JuliaSyntax.parse_resword => [
+        # In normal_context
+        "begin f() where T = x end" => "(block (function-= (where (call f) T) x))"
+        # block
+        "begin end"         =>  "(block)"
+        "begin a ; b end"   =>  "(block a b)"
+        "begin\na\nb\nend"  =>  "(block a b)"
+        # quote
+        "quote end"         =>  "(quote (block))"
+        "quote body end"    =>  "(quote (block body))"
+        # while
+        "while cond body end"  =>  "(while cond (block body))"
+        "while x < y \n a \n b \n end"  =>  "(while (call-i x < y) (block a b))"
+        # for
+        "for x in xs end" => "(for (iteration (in x xs)) (block))"
+        "for x in xs, y in ys \n a \n end" => "(for (iteration (in x xs) (in y ys)) (block a))"
+        # let
+        "let x=1\n end"    =>  "(let (block (= x 1)) (block))"
+        "let x=1 ; end"    =>  "(let (block (= x 1)) (block))"
+        "let x ; end"      =>  "(let (block x) (block))"
+        "let x::1 ; end"   =>  "(let (block (::-i x 1)) (block))"
+        "let x=1,y=2 end"  =>  "(let (block (= x 1) (= y 2)) (block))"
+        "let x+=1 ; end"   =>  "(let (block (op= x + 1)) (block))"
+        "let ; end"        =>  "(let (block) (block))"
+        "let ; body end"   =>  "(let (block) (block body))"
+        "let\na\nb\nend"   =>  "(let (block) (block a b))"
+        # abstract type
+        "abstract type A end"            =>  "(abstract A)"
+        "abstract type A ; end"          =>  "(abstract A)"
+        "abstract type \n\n A \n\n end"  =>  "(abstract A)"
+        "abstract type A <: B end"       =>  "(abstract (<: A B))"
+        "abstract type A <: B{T,S} end"  =>  "(abstract (<: A (curly B T S)))"
+        "abstract type A < B end"        =>  "(abstract (call-i A < B))"
+        # primitive type
+        "primitive type A 32 end"   =>  "(primitive A 32)"
+        "primitive type A 32 ; end" =>  "(primitive A 32)"
+        "primitive type A \$N end"  =>  "(primitive A (\$ N))"
+        "primitive type A <: B \n 8 \n end"  =>  "(primitive (<: A B) 8)"
+        # struct
+        "struct A <: B \n a::X \n end" =>  "(struct (<: A B) (block (::-i a X)))"
+        "struct A \n a \n b \n end"    =>  "(struct A (block a b))"
+        "struct A \n \"doca\" \n a \n \"docb\" \n b \n end"    =>  "(struct A (block (doc (string \"doca\") a) (doc (string \"docb\") b)))"
+        "mutable struct A end"         =>  "(struct-mut A (block))"
+        ((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))"
+        ((v=v"1.7",), "struct A const a end") => "(struct A (block (error (const a))))"
+        "struct A end"    =>  "(struct A (block))"
+        "struct try end"  =>  "(struct (error try) (block))"
+        # return
+        "return\nx"   =>  "(return)"
+        "return)"     =>  "(return)"
+        "return x"    =>  "(return x)"
+        "return x,y"  =>  "(return (tuple x y))"
+        # break/continue
+        "break"    => "(break)"
+        "continue" => "(continue)"
+        # module/baremodule
+        "module A end"      =>  "(module A (block))"
+        "baremodule A end"  =>  "(module-bare A (block))"
+        "module do \n end"  =>  "(module (error do) (block))"
+        "module \$A end"    =>  "(module (\$ A) (block))"
+        "module A \n a \n b \n end"  =>  "(module A (block a b))"
+        """module A \n "x"\na\n end""" => """(module A (block (doc (string "x") a)))"""
+        # export
+        "export a"   =>  "(export a)"
+        "export @a"  =>  "(export (macro_name a))"
+        "export @var\"'\"" =>  "(export (macro_name (var ')))"
+        "export a, \n @b"  =>  "(export a (macro_name b))"
+        "export +, =="     =>  "(export + ==)"
+        "export \n a"      =>  "(export a)"
+        "export \$a, \$(a*b)"  =>  "(export (\$ a) (\$ (parens (call-i a * b))))"
+        "export (x::T)"  =>  "(export (error (parens (::-i x T))))"
+        "export outer"  =>  "(export outer)"
+        "export (\$f)"  =>  "(export (parens (\$ f)))"
+    ],
+    JuliaSyntax.parse_if_elseif => [
+        "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif b (block yy) (block zz)))"
+        "if end"        =>  "(if (error) (block))"
+        "if \n end"     =>  "(if (error) (block))"
+        "if a end"      =>  "(if a (block))"
+        "if a xx end"   =>  "(if a (block xx))"
+        "if a \n\n xx \n\n end"   =>  "(if a (block xx))"
+        "if a xx elseif b yy end"   =>  "(if a (block xx) (elseif b (block yy)))"
+        "if a xx else if b yy end"  =>  "(if a (block xx) (error-t) (elseif b (block yy)))"
+        "if a xx else yy end"   =>  "(if a (block xx) (block yy))"
+        "if true; x ? true elseif true end"  => "(if true (block (if x true (error-t) (error-t))) (elseif true (block)))"
+        "if true; x ? true end"  => "(if true (block (if x true (error-t) (error-t))))"
+        "if true; x ? true\nend"  => "(if true (block (if x true (error-t) (error-t))))"
+        "if true; x ? true : elseif true end"  => "(if true (block (if x true (error-t))) (elseif true (block)))"
+    ],
+    JuliaSyntax.parse_resword => [
+        "global x"    =>  "(global x)"
+        "local x"     =>  "(local x)"
+        "global x,y"  =>  "(global x y)"
+        "global const x = 1" => "(global (const (= x 1)))"
+        "local const x = 1"  => "(local (const (= x 1)))"
+        "const global x = 1" => "(const (global (= x 1)))"
+        "const local x = 1"  => "(const (local (= x 1)))"
+        "const x,y = 1,2"    => "(const (= (tuple x y) (tuple 1 2)))"
+        "const x = 1"    =>  "(const (= x 1))"
+        "const x .= 1"   => "(error (const (.= x 1)))"
+        "global x ~ 1"   =>  "(global (call-i x ~ 1))"
+        "global x += 1"  => "(global (op= x + 1))"
+        "const x"        => "(error (const x))"
+        "global const x" => "(global (error (const x)))"
+        "const global x" => "(error (const (global x)))"
+    ],
+    JuliaSyntax.parse_resword => [
+        # Macros and functions
+        "macro while(ex) end"  =>  "(macro (call (error while) ex) (block))"
+        "macro f()     end"    =>  "(macro (call f) (block))"
+        "macro (:)(ex) end"    =>  "(macro (call (parens :) ex) (block))"
+        "macro (type)(ex) end" =>  "(macro (call (parens type) ex) (block))"
+        "macro \$f()    end"   =>  "(macro (call (\$ f)) (block))"
+        "macro (\$f)()  end"   =>  "(macro (call (parens (\$ f))) (block))"
+        "function (x) body end"=>  "(function (tuple-p x) (block body))"
+        "function (x,y) end"   =>  "(function (tuple-p x y) (block))"
+        "function (x,y,) end"  =>  "(function (tuple-p-, x y) (block))"
+        "function (x=1) end"   =>  "(function (tuple-p (= x 1)) (block))"
+        "function (;x=1) end"  =>  "(function (tuple-p (parameters (= x 1))) (block))"
+        "function (f(x),) end" =>  "(function (tuple-p-, (call f x)) (block))"
+        "function (@f(x);) end" => "(function (tuple-p (macrocall-p (macro_name f) x) (parameters)) (block))"
+        "function (@f(x)...) end" =>  "(function (tuple-p (... (macrocall-p (macro_name f) x))) (block))"
+        "function (@f(x)) end" =>  "(function (error (tuple-p (macrocall-p (macro_name f) x))) (block))"
+        "function (\$f) end"   =>  "(function (error (tuple-p (\$ f))) (block))"
+        "function ()(x) end"   =>  "(function (call (tuple-p) x) (block))"
+        "function (A).f() end" =>  "(function (call (. (parens A) f)) (block))"
+        "function (:)() end"   =>  "(function (call (parens :)) (block))"
+        "function (x::T)() end"=>  "(function (call (parens (::-i x T))) (block))"
+        "function (::g(x))() end" => "(function (call (parens (::-pre (call g x)))) (block))"
+        "function (f::T{g(i)})() end" => "(function (call (parens (::-i f (curly T (call g i))))) (block))"
+        "function (::T)() end" =>  "(function (call (parens (::-pre T))) (block))"
+        "function (\n        ::T\n        )() end" =>  "(function (call (parens (::-pre T))) (block))"
+        "function (\n        x::T\n        )() end" =>  "(function (call (parens (::-i x T))) (block))"
+        "function (\n        f\n        )() end" =>  "(function (call (parens f)) (block))"
+        "function (\n        A\n        ).f() end" =>  "(function (call (. (parens A) f)) (block))"
+        "function (\n        ::T\n        )(x, y) end" =>  "(function (call (parens (::-pre T)) x y) (block))"
+        "function (\n        f::T{g(i)}\n        )() end" => "(function (call (parens (::-i f (curly T (call g i))))) (block))"
+        "function (\n        x, y\n        ) x + y end" => "(function (tuple-p x y) (block (call-i x + y)))"
+        "function (:*=(f))() end" => "(function (call (parens (call (quote-: *=) f))) (block))"
+        "function begin() end" =>  "(function (call (error begin)) (block))"
+        "function f() end"     =>  "(function (call f) (block))"
+        "function type() end"  =>  "(function (call type) (block))"
+        "function \n f() end"  =>  "(function (call f) (block))"
+        "function \$f() end"   =>  "(function (call (\$ f)) (block))"
+        "function (::Type{T})(x) end"  =>  "(function (call (parens (::-pre (curly Type T))) x) (block))"
+        # Function/macro definition with no methods
+        "function f end"      =>  "(function f)"
+        "function f \n\n end" =>  "(function f)"
+        "function \$f end"    =>  "(function (\$ f))"
+        "function var\".\" end" => "(function (var .))"
+        "macro f end"         =>  "(macro f)"
+        # Function argument list
+        "function f(x,y) end"    =>  "(function (call f x y) (block))"
+        "function f{T}() end"    =>  "(function (call (curly f T)) (block))"
+        "function A.f()   end"   =>  "(function (call (. A f)) (block))"
+        "function f body end"    =>  "(function (error f) (block body))"
+        "function f()::T    end" =>  "(function (::-i (call f) T) (block))"
+        "function f()::g(T) end" =>  "(function (::-i (call f) (call g T)) (block))"
+        "function f() where {T} end"  => "(function (where (call f) (braces T)) (block))"
+        "function f() where T   end"  => "(function (where (call f) T) (block))"
+        "function f()::S where T end" => "(function (where (::-i (call f) S) T) (block))"
+        # Ugly cases for compat where extra parentheses existed and we've
+        # already parsed at least the call part of the signature
+        "function (f() where T) end" => "(function (parens (where (call f) T)) (block))"
+        "function (f()) where T end" => "(function (where (parens (call f)) T) (block))"
+        "function (f() where T) where U end" => "(function (where (parens (where (call f) T)) U) (block))"
+        "function (f()::S) end"=>  "(function (parens (::-i (call f) S)) (block))"
+        "function ((f()::S) where T) end" => "(function (parens (where (parens (::-i (call f) S)) T)) (block))"
+        "function (x*y ) end" => "(function (parens (call-i x * y)) (block))"
+        # body
+        "function f() \n a \n b end"  =>  "(function (call f) (block a b))"
+        "function f() end"       =>  "(function (call f) (block))"
+        # Macrocall as sig
+        ((v=v"1.12",), "function @callmemacro(a::Int) \n 1 \n end") => "(function (macrocall-p (macro_name callmemacro) (::-i a Int)) (block 1))"
+        ((v=v"1.12",), "function @callmemacro(a::T, b::T) where T <: Int64\n3\nend") => "(function (where (macrocall-p (macro_name callmemacro) (::-i a T) (::-i b T)) (<: T Int64)) (block 3))"
+        ((v=v"1.12",), "function @callmemacro(a::Int, b::Int, c::Int)::Float64\n4\nend") => "(function (::-i (macrocall-p (macro_name callmemacro) (::-i a Int) (::-i b Int) (::-i c Int)) Float64) (block 4))"
+        ((v=v"1.12",), "function @f()() end") => "(function (call (macrocall-p (macro_name f))) (block))"
+        # Errors
+        "function"            => "(function (error (error)) (block (error)) (error-t))"
+    ],
+    JuliaSyntax.parse_try => [
+        "try \n x \n catch e \n y \n finally \n z end" =>
+            "(try (block x) (catch e (block y)) (finally (block z)))"
+        ((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") =>
+            "(try (block x) (catch e (block y)) (else (block z)) (finally (block w)))"
+        "try x catch end"       =>  "(try (block x) (catch □ (block)))"
+        "try x catch ; y end"   =>  "(try (block x) (catch □ (block y)))"
+        "try x catch \n y end"  =>  "(try (block x) (catch □ (block y)))"
+        "try x catch e y end"   =>  "(try (block x) (catch e (block y)))"
+        "try x catch \$e y end" =>  "(try (block x) (catch (\$ e) (block y)))"
+        "try x catch var\"#\" y end" => "(try (block x) (catch (var #) (block y)))"
+        "try x catch e+3 y end" =>  "(try (block x) (catch (error (call-i e + 3)) (block y)))"
+        "try x finally y end"   =>  "(try (block x) (finally (block y)))"
+        # v1.8 only
+        ((v=v"1.8",), "try catch ; else end") => "(try (block) (catch □ (block)) (else (block)))"
+        ((v=v"1.8",), "try else x finally y end") => "(try (block) (else (error (block x))) (finally (block y)))"
+        ((v=v"1.7",), "try catch ; else end")  =>  "(try (block) (catch □ (block)) (else (error (block))))"
+        # finally before catch :-(
+        "try x finally y catch e z end"  =>  "(try (block x) (finally (block y)) (catch e (block z)))"
+        "try x end" => "(try (block x) (error-t))"
+    ],
+    JuliaSyntax.parse_imports => [
+        "import A as B: x" => "(import (: (error (as (importpath A) B)) (importpath x)))"
+        "import A, y"      => "(import (importpath A) (importpath y))"
+        "import A: +, =="  => "(import (: (importpath A) (importpath +) (importpath ==)))"
+        "import A: x, y"   => "(import (: (importpath A) (importpath x) (importpath y)))"
+        "import A: x, B: y" => "(import (: (importpath A) (importpath x) (importpath B) (error-t (importpath y))))"
+        "import A: x"      => "(import (: (importpath A) (importpath x)))"
+        "using  A"         => "(using (importpath A))"
+        "import A"         => "(import (importpath A))"
+        # parse_import
+        "import A: x, y"   =>  "(import (: (importpath A) (importpath x) (importpath y)))"
+        "import A as B"    =>  "(import (as (importpath A) B))"
+        "import A: x as y" =>  "(import (: (importpath A) (as (importpath x) y)))"
+        "using  A: x as y" =>  "(using (: (importpath A) (as (importpath x) y)))"
+        ((v=v"1.5",), "import A as B") =>  "(import (error (as (importpath A) B)))"
+        "using A as B"     =>  "(using (error (as (importpath A) B)))"
+        "using A, B as C"  =>  "(using (importpath A) (error (as (importpath B) C)))"
+        # parse_import_path
+        # When parsing import we must split initial dots into nontrivial
+        # leading dots for relative paths
+        "import .A"     =>  "(import (importpath . A))"
+        "import ..A"    =>  "(import (importpath . . A))"
+        "import ...A"   =>  "(import (importpath . . . A))"
+        "import ....A"  =>  "(import (importpath . . . . A))"
+        # Dots with spaces are allowed (a misfeature?)
+        "import . .A"   =>  "(import (importpath . . A))"
+        # Modules with operator symbol names
+        "import .⋆"     =>  "(import (importpath . ⋆))"
+        # Expressions allowed in import paths
+        "import @x"     =>  "(import (importpath (macro_name x)))"
+        "import \$A"    =>  "(import (importpath (\$ A)))"
+        "import \$A.@x" =>  "(import (importpath (\$ A) (macro_name x)))"
+        "import A.B"    =>  "(import (importpath A B))"
+        "import A.B.C"  =>  "(import (importpath A B C))"
+        "import A.:+"   =>  "(import (importpath A (quote-: +)))"
+        "import A.(:+)" =>  "(import (importpath A (parens (quote-: +))))"
+        "import A.:(+)" =>  "(import (importpath A (quote-: (parens +))))"
+        "import A.=="   =>  "(import (importpath A ==))"
+        "import A.⋆.f"  =>  "(import (importpath A ⋆ f))"
+        "import A..."   =>  "(import (importpath A ..))"
+        "import A; B"   =>  "(import (importpath A))"
+        # Colons not allowed first in import paths
+        # but are allowed in trailing components (#473)
+        "using :A"         =>  "(using (importpath (error (quote-: A))))"
+        "using A: :b"      =>  "(using (: (importpath A) (importpath (error (quote-: b)))))"
+        "using A: b.:c"    =>  "(using (: (importpath A) (importpath b (quote-: c))))"
+        # Syntactic operators not allowed in import
+    ],
+    JuliaSyntax.parse_iteration_specs => [
+        "i = rhs"        =>  "(iteration (in i rhs))"
+        "i in rhs"       =>  "(iteration (in i rhs))"
+        "i ∈ rhs"        =>  "(iteration (in i rhs))"
+        "i = 1:10"       =>  "(iteration (in i (call-i 1 : 10)))"
+        "(i,j) in iter"  =>  "(iteration (in (tuple-p i j) iter))"
+        "outer = rhs"       =>  "(iteration (in outer rhs))"
+        "outer <| x = rhs"  =>  "(iteration (in (call-i outer <| x) rhs))"
+        "outer i = rhs"     =>  "(iteration (in (outer i) rhs))"
+        "outer (x,y) = rhs" =>  "(iteration (in (outer (tuple-p x y)) rhs))"
+    ],
+    JuliaSyntax.parse_paren => [
+        # Tuple syntax with commas
+        "()"          =>  "(tuple-p)"
+        "(x,)"        =>  "(tuple-p-, x)"
+        "(x,y)"       =>  "(tuple-p x y)"
+        "(x=1, y=2)"  =>  "(tuple-p (= x 1) (= y 2))"
+        # Named tuples with initial semicolon
+        "(;)"         =>  "(tuple-p (parameters))"
+        "(; a=1)"     =>  "(tuple-p (parameters (= a 1)))"
+        # Extra credit: nested parameters and frankentuples
+        "(x...; y)"       => "(tuple-p (... x) (parameters y))"
+        "(x...;)"         => "(tuple-p (... x) (parameters))"
+        "(; a=1; b=2)"    => "(tuple-p (parameters (= a 1)) (parameters (= b 2)))"
+        "(a; b; c,d)"     => "(tuple-p a (parameters b) (parameters c d))"
+        "(a=1, b=2; c=3)" => "(tuple-p (= a 1) (= b 2) (parameters (= c 3)))"
+        # Block syntax
+        "(;;)"        =>  "(block-p)"
+        "(a=1;)"      =>  "(block-p (= a 1))"
+        "(a;b;;c)"    =>  "(block-p a b c)"
+        "(a=1; b=2)"  =>  "(block-p (= a 1) (= b 2))"
+        # Following is an error for flisp compatibility. But it could be
+        # allowed as valid block syntax in the future?
+        "(y for x = xs; a)" => "(parens (generator y (iteration (in x xs))) (error-t ✘ a))"
+        # Parentheses used for grouping
+        "(a * b)"     =>  "(parens (call-i a * b))"
+        "(a=1)"       =>  "(parens (= a 1))"
+        "(x)"         =>  "(parens x)"
+        "(a...)"      =>  "(parens (... a))"
+        # Generators
+        "(x for a in as)"       =>  "(parens (generator x (iteration (in a as))))"
+        "(x \n\n for a in as)"  =>  "(parens (generator x (iteration (in a as))))"
+        # Range parsing in parens
+        "(1:\n2)" => "(parens (call-i 1 : 2))"
+        "(1:2)" => "(parens (call-i 1 : 2))"
+    ],
+    JuliaSyntax.parse_atom => [
+        # char literal
+        "'a'"           =>  "(char 'a')"
+        "'α'"           =>  "(char 'α')"
+        "'\\xce\\xb1'"  =>  "(char 'α')"
+        "'\\u03b1'"     =>  "(char 'α')"
+        "'\\U1D7DA'"    =>  "(char '𝟚')"
+        "'a"            =>  "(char 'a' (error-t))"
+        "''"            =>  "(char (error))"
+        "'"             =>  "(char (error))"
+        # symbol/expression quote
+        ":foo"   => "(quote-: foo)"
+        # Literal colons
+        ":)"     => ":"
+        ": end"  => ":"
+        # Whitespace after quoting colon
+        ": foo"  => "(quote-: (error-t) foo)"
+        ":\nfoo" => "(quote-: (error-t) foo)"
+        # plain equals
+        "="      => "(error =)"
+        # Identifiers
+        "xx"     => "xx"
+        "x₁"     => "x₁"
+        # var syntax
+        """var"x" """   =>  "(var x)"
+        # var syntax raw string unescaping
+        "var\"\""          =>  "(var )"
+        "var\"\\\"\""      =>  "(var \")"
+        "var\"\\\\\\\"\""  =>  "(var \\\")"
+        "var\"\\\\x\""     =>  "(var \\\\x)"
+        # trailing syntax after var
+        """var"x"+"""   =>  "(var x)"
+        """var"x")"""   =>  "(var x)"
+        """var"x"("""   =>  "(var x)"
+        """var"x"end""" =>  "(var x (error-t))"
+        """var"x"1"""   =>  "(var x (error-t))"
+        """var"x"y"""   =>  "(var x (error-t))"
+        # Standalone syntactic operators are errors
+        "?"   =>  "(error ?)"
+        "&&"  =>  "(error &&)"
+        "||"  =>  "(error ||)"
+        "."   =>  "(error .)"
+        "..." =>  "(error ...)"
+        "+="  =>  "(error +=)"
+        "-="  =>  "(error -=)"
+        "*="  =>  "(error *=)"
+        "/="  =>  "(error /=)"
+        "//=" =>  "(error //=)"
+        "|="  =>  "(error |=)"
+        "^="  =>  "(error ^=)"
+        "÷="  =>  "(error ÷=)"
+        "%="  =>  "(error %=)"
+        "<<=" =>  "(error <<=)"
+        ">>=" =>  "(error >>=)"
+        ">>>="=>  "(error >>>=)"
+        "\\=" =>  "(error \\=)"
+        "&="  =>  "(error &=)"
+        ":="  =>  "(error :=)"
+        "\$=" =>  "(error \$=)"
+        "⊻="  =>  "(error ⊻=)"
+        ".+=" =>  "(error (. +=))"
+        # Normal operators
+        "+"  =>  "+"
+        # Assignment-precedence operators which can be used as identifiers
+        "~"  =>  "~"
+        "≔"  =>  "≔"
+        "⩴"  =>  "⩴"
+        "≕"  =>  "≕"
+        # Quoted syntactic operators allowed
+        ":+="  =>  "(quote-: +=)"
+        ":.+=" =>  "(quote-: (. +=))"
+        ":.="  =>  "(quote-: (. =))"
+        ":.&&" =>  "(quote-: (. &&))"
+        # Special symbols quoted
+        ":end" => "(quote-: end)"
+        ":(end)" => "(quote-: (parens (error-t)))"
+        ":<:"  => "(quote-: <:)"
+        # unexpected =
+        "="    => "(error =)"
+        # parse_cat
+        "[]"        =>  "(vect)"
+        "[x,]"      =>  "(vect-, x)"
+        "[x,y,]"    =>  "(vect-, x y)"
+        "[x\n,,]"   =>  "(vect-, x (error-t ✘))"
+        "[x]"       =>  "(vect x)"
+        "[x \n ]"   =>  "(vect x)"
+        "[x \n, ]"  =>  "(vect-, x)"
+        "[x"        =>  "(vect x (error-t))"
+        "[x \n\n ]" =>  "(vect x)"
+        "[x for a in as]"  =>  "(comprehension (generator x (iteration (in a as))))"
+        "[x \n\n for a in as]"  =>  "(comprehension (generator x (iteration (in a as))))"
+        # parse_generator
+        "(x for a in as for b in bs)" => "(parens (generator x (iteration (in a as)) (iteration (in b bs))))"
+        "(x for a in as, b in bs)" => "(parens (generator x (iteration (in a as) (in b bs))))"
+        "(x for a in as, b in bs if z)" => "(parens (generator x (filter (iteration (in a as) (in b bs)) z)))"
+        "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (iteration (in a as) (in b bs)) (iteration (in c cs) (in d ds))))"
+        "(x for a in as for b in bs if z)" => "(parens (generator x (iteration (in a as)) (filter (iteration (in b bs)) z)))"
+        "(x for a in as if z for b in bs)" => "(parens (generator x (filter (iteration (in a as)) z) (iteration (in b bs))))"
+        "[x for a = as for b = bs if cond1 for c = cs if cond2]"  =>  "(comprehension (generator x (iteration (in a as)) (filter (iteration (in b bs)) cond1) (filter (iteration (in c cs)) cond2)))"
+        "[x for a = as if begin cond2 end]"  =>  "(comprehension (generator x (filter (iteration (in a as)) (block cond2))))"
+        "[(x)for x in xs]"  =>  "(comprehension (generator (parens x) (error-t) (iteration (in x xs))))"
+        "(x for a in as if z)" => "(parens (generator x (filter (iteration (in a as)) z)))"
+        # parse_vect
+        "[x, y]"        =>  "(vect x y)"
+        "[x, y,]"       =>  "(vect-, x y)"
+        "[x,\n y]"      =>  "(vect x y)"
+        "[x\n, y]"      =>  "(vect x y)"
+        "[x\n,, y]"     =>  "(vect-, x (error-t ✘ y))"
+        "[x,y ; z]"     =>  "(vect x y (parameters z))"
+        "[x=1, y=2]"    =>  "(vect (= x 1) (= y 2))"
+        "[x=1, ; y=2]"  =>  "(vect (= x 1) (parameters (= y 2)))"
+        # parse_paren
+        ":(=)"  =>  "(quote-: (parens =))"
+        ":(::)"  =>  "(quote-: (parens ::))"
+        ":(::\n)" => "(quote-: (parens ::))"
+        "(function f \n end)" => "(parens (function f))"
+        # braces
+        "{x,y}"      =>  "(braces x y)"
+        "{x,y,}"     =>  "(braces-, x y)"
+        "{x y}"      =>  "(bracescat (row x y))"
+        ((v=v"1.7",), "{x ;;; y}") =>  "(bracescat (nrow-3 x y))"
+        ((v=v"1.7",), "{a ;; b}") =>  "(bracescat (nrow-2 a b))"
+        ((v=v"1.7",), "{a ;;;; b}") =>  "(bracescat (nrow-4 a b))"
+        # Macro names can be keywords
+        "@end x" => "(macrocall (macro_name end) x)"
+        # __dot__ macro
+        "@. x" => "(macrocall (macro_name .) x)"
+        # cmd strings
+        "``"         =>  "(cmdstring-r \"\")"
+        "`cmd`"      =>  "(cmdstring-r \"cmd\")"
+        "```cmd```"  =>  "(cmdstring-s-r \"cmd\")"
+        # literals
+        "true" => "true"
+        "42"   => "42"
+        "1.0e-1000"   => "0.0"
+        "0x123456789abcdefp+0" => "8.19855292164869e16"
+        # closing tokens
+        ")"    => "(error)"
+    ],
+    JuliaSyntax.parse_atom => [
+        # Actually parse_array
+        # Normal matrix construction syntax
+        "[x y ; z w]"  =>  "(vcat (row x y) (row z w))"
+        "[x y ; z w ; a b]"  =>  "(vcat (row x y) (row z w) (row a b))"
+        "[x ; y ; z]"  =>  "(vcat x y z)"
+        "[x;]"  =>  "(vcat x)"
+        "[x y]"  =>  "(hcat x y)"
+        # Early abort in array parsing
+        "[x@y"   =>  "(hcat x (error-t ✘ y))"
+        "[x@y]"  =>  "(hcat x (error-t ✘ y))"
+        # Mismatched rows
+        "[x y ; z]"  =>  "(vcat (row x y) z)"
+        # Single elements in rows
+        ((v=v"1.7",), "[x ; y ;; z ]")  =>  "(ncat-2 (nrow-1 x y) z)"
+        ((v=v"1.7",), "[x  y ;;; z ]")  =>  "(ncat-3 (row x y) z)"
+        # Higher dimensional ncat
+        # Row major
+        ((v=v"1.7",), "[x y ; z w ;;; a b ; c d]")  =>
+            "(ncat-3 (nrow-1 (row x y) (row z w)) (nrow-1 (row a b) (row c d)))"
+        # Column major
+        ((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]")  =>
+            "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))"
+        # Dimension 4 ncat
+        ((v=v"1.7",), "[x ;;;; y]")  =>  "(ncat-4 x y)"
+        ((v=v"1.7",), "[a ; b ;;;; c ; d]")  =>  "(ncat-4 (nrow-1 a b) (nrow-1 c d))"
+        ((v=v"1.7",), "[a b ; c d ;;;; e f ; g h]")  =>
+            "(ncat-4 (nrow-1 (row a b) (row c d)) (nrow-1 (row e f) (row g h)))"
+        # Array separators
+        # Newlines before semicolons are not significant
+        "[a \n ;]"  =>  "(vcat a)"
+        # Newlines after semicolons are not significant
+        "[a ; \n]"  =>  "(vcat a)"
+        "[a ; \n\n b]"  =>  "(vcat a b)"
+        ((v=v"1.7",), "[a ;; \n b]")  =>  "(ncat-2 a b)"
+        # In hcat with spaces as separators, `;;` is a line
+        # continuation character
+        ((v=v"1.7",), "[a b ;; \n c]")  =>  "(hcat a b c)"
+        ((v=v"1.7",), "[a b \n ;; c]")  =>  "(ncat-2 (row a b (error-t)) c)"
+        # Can't mix spaces and multiple ;'s
+        ((v=v"1.7",), "[a b ;; c]")  =>  "(ncat-2 (row a b (error-t)) c)"
+        # Linebreaks not significant before closing `]`
+        "[a b\n\n]" =>  "(hcat a b)"
+        # Treat a linebreak prior to a value as a semicolon (ie, separator for
+        # the first dimension) if no previous semicolons observed
+        "[a \n b]"  =>  "(vcat a b)"
+        # Can't mix multiple ;'s and spaces
+        ((v=v"1.7",), "[a ;; b c]")  =>  "(ncat-2 a (row b (error-t) c))"
+        # Empty N-dimensional arrays
+        ((v=v"1.8",), "[;]")   =>  "(ncat-1)"
+        ((v=v"1.8",), "[;;]")  =>  "(ncat-2)"
+        ((v=v"1.8",), "[\n  ;; \n ]")  =>  "(ncat-2)"
+        ((v=v"1.7",), "[;;]")  =>  "(ncat-2 (error))"
+        # parse_string
+        "\"\"\"\n\$x\n a\"\"\"" => "(string-s x \"\\n\" \" a\")"
+        "\"a \$(x + y) b\""  =>  "(string \"a \" (parens (call-i x + y)) \" b\")"
+        "\"hi\$(\"ho\")\""   =>  "(string \"hi\" (parens (string \"ho\")))"
+        "\"\$(x,y)\""        =>  "(string (parens (error x y)))"
+        "\"\$(x;y)\""        =>  "(string (parens (error x y)))"
+        "\"\$(x for y in z)\"" => "(string (parens (error (generator x (iteration (in y z))))))"
+        "\"\$((x for y in z))\"" => "(string (parens (parens (generator x (iteration (in y z))))))"
+        "\"\$(xs...)\""  =>  "(string (parens (... xs)))"
+        "\"a \$foo b\""  =>  "(string \"a \" foo \" b\")"
+        "\"\$var\""      =>  "(string var)"
+        "\"\$outer\""    =>  "(string outer)"
+        "\"\$in\""       =>  "(string in)"
+        # Triple-quoted dedenting:
+        "\"\"\"\nx\"\"\""   =>  raw"""(string-s "x")"""
+        "\"\"\"\n\nx\"\"\"" =>  raw"""(string-s "\n" "x")"""
+        "```\n x\n y```"    =>  raw"""(cmdstring-s-r "x\n" "y")"""
+        # Various newlines (\n \r \r\n) and whitespace (' ' \t)
+        "\"\"\"\n x\n y\"\"\""  =>  raw"""(string-s "x\n" "y")"""
+        "\"\"\"\r x\r y\"\"\""  =>  raw"""(string-s "x\n" "y")"""
+        "\"\"\"\r\n x\r\n y\"\"\""  =>  raw"""(string-s "x\n" "y")"""
+        # Spaces or tabs or mixtures acceptable
+        "\"\"\"\n\tx\n\ty\"\"\""  =>  raw"""(string-s "x\n" "y")"""
+        "\"\"\"\n \tx\n \ty\"\"\""  =>  raw"""(string-s "x\n" "y")"""
+        # Mismatched tab vs space not deindented
+        # Find minimum common prefix in mismatched whitespace
+        "\"\"\"\n\tx\n y\"\"\""  =>  raw"""(string-s "\tx\n" " y")"""
+        "\"\"\"\n x\n  y\"\"\""  =>  raw"""(string-s "x\n" " y")"""
+        "\"\"\"\n  x\n y\"\"\""  =>  raw"""(string-s " x\n" "y")"""
+        "\"\"\"\n \tx\n  y\"\"\""  =>  raw"""(string-s "\tx\n" " y")"""
+        "\"\"\"\n  x\n \ty\"\"\""  =>  raw"""(string-s " x\n" "\ty")"""
+        # Empty lines don't affect dedenting
+        "\"\"\"\n x\n\n y\"\"\""  =>  raw"""(string-s "x\n" "\n" "y")"""
+        # Non-empty first line doesn't participate in deindentation
+        "\"\"\" x\n y\"\"\""  =>  raw"""(string-s " x\n" "y")"""
+        # Dedenting and interpolations
+        "\"\"\"\n  \$a\n  \$b\"\"\""  =>  raw"""(string-s a "\n" b)"""
+        "\"\"\"\n  \$a \n  \$b\"\"\""  =>  raw"""(string-s a " \n" b)"""
+        "\"\"\"\n  \$a\n  \$b\n\"\"\""  =>  raw"""(string-s "  " a "\n" "  " b "\n")"""
+        # Empty chunks after dedent are removed
+        "\"\"\"\n \n \"\"\""  =>  "(string-s \"\\n\")"
+        # Newline at end of string
+        "\"\"\"\n x\n y\n\"\"\""  =>  raw"""(string-s " x\n" " y\n")"""
+        # Empty strings, or empty after triple quoted processing
+        "\"\""              =>  "(string \"\")"
+        "\"\"\"\n  \"\"\""  =>  "(string-s \"\")"
+        # Missing delimiter
+        "\"str"  =>  "(string \"str\" (error-t))"
+        # String interpolations
+        "\"\$x\$y\$z\""  =>  "(string x y z)"
+        "\"\$(x)\""  =>  "(string (parens x))"
+        "\"\$x\""  =>  "(string x)"
+        # Strings with embedded whitespace trivia
+        "\"a\\\nb\""     =>  raw"""(string "a" "b")"""
+        "\"a\\\rb\""     =>  raw"""(string "a" "b")"""
+        "\"a\\\r\nb\""   =>  raw"""(string "a" "b")"""
+        "\"a\\\n \tb\""  =>  raw"""(string "a" "b")"""
+        # Strings with only a single valid string chunk
+        "\"str\""     => "(string \"str\")"
+        "\"a\\\n\""   => "(string \"a\")"
+        "\"a\\\r\""   => "(string \"a\")"
+        "\"a\\\r\n\"" => "(string \"a\")"
+    ],
+    JuliaSyntax.parse_atom => [
+        # errors in literals
+        "\"\\xqqq\""  =>  "(string (ErrorInvalidEscapeSequence))"
+        "'\\xq'"      =>  "(char (ErrorInvalidEscapeSequence))"
+        "'ab'"        =>  "(char (ErrorOverLongCharacter))"
+        "\"\xf5\""    =>  "(string (ErrorInvalidUTF8))"
+        "'\xf5'"      =>  "(char (ErrorInvalidUTF8))"
+        "`\xf5`"      =>  "(cmdstring-r (ErrorInvalidUTF8))"
+        "10.0e1000'"  =>  "(ErrorNumericOverflow)"
+        "10.0f100'"   =>  "(ErrorNumericOverflow)"
+    ],
+    JuliaSyntax.parse_stmts => with_version.(v"1.11", [
+        "function f(public)\n    public + 3\nend"       => "(function (call f public) (block (call-i public + 3)))"
+        "public A, B"                                   => "(public A B)"
+        "if true \n public *= 4 \n end"                 => "(if true (block (op= public * 4)))"
+        "module Mod\n public A, B \n end"               => "(module Mod (block (public A B)))"
+        "module Mod2\n a = 3; b = 6; public a, b\n end" => "(module Mod2 (block (= a 3) (= b 6) (public a b)))"
+        "a = 3; b = 6; public a, b"                     => "(toplevel-; (= a 3) (= b 6) (public a b))"
+        "begin \n public A, B \n end"                   => PARSE_ERROR
+        "if true \n public A, B \n end"                 => PARSE_ERROR
+        "public export=true foo, bar"                   => PARSE_ERROR # but these may be
+        "public experimental=true foo, bar"             => PARSE_ERROR # supported soon ;)
+        "public(x::String) = false"                     => "(function-= (call public (::-i x String)) false)"
+        "module M; export @a; end"                      => "(module M (block (export (macro_name a))))"
+        "module M; public @a; end"                      => "(module M (block (public (macro_name a))))"
+        "module M; export ⤈; end"                       => "(module M (block (export ⤈)))"
+        "module M; public ⤈; end"                       => "(module M (block (public ⤈)))"
+        "public = 4"                                    => "(= public 4)"
+        "public[7] = 5"                                 => "(= (ref public 7) 5)"
+        "public() = 6"                                  => "(function-= (call public) 6)"
+    ]),
+    JuliaSyntax.parse_stmts => [
+        ((v = v"1.12",), "@callmemacro(b::Float64) = 2") => "(= (macrocall-p (macro_name callmemacro) (::-i b Float64)) 2)"
+    ],
+    JuliaSyntax.parse_docstring => [
+        """ "notdoc" ]        """ => "(string \"notdoc\")"
+        """ "notdoc" \n]      """ => "(string \"notdoc\")"
+        """ "notdoc" \n\n foo """ => "(string \"notdoc\")"
+        """ "doc" \n foo      """ => """(doc (string "doc") foo)"""
+        """ "doc" foo         """ => """(doc (string "doc") foo)"""
+        """ "doc \$x" foo     """ => """(doc (string "doc " x) foo)"""
+        # Allow docstrings with embedded trailing whitespace trivia
+        "\"\"\"\n doc\n \"\"\" foo"  => """(doc (string-s "doc\\n") foo)"""
+    ],
+]
+
+@testset "Inline test cases" begin
+    @testset "$production" for (production, test_specs) in tests
+        @testset "$(repr(input))" for (input, output) in test_specs
+            test_parse(production, input, output)
+        end
+    end
+end
+
+parsestmt_test_specs = [
+    # whitespace before keywords in space-insensitive mode
+    "(y::\nif x z end)" => "(parens (::-i y (if x (block z))))"
+    # Contextual keyword pairs inside parentheses
+    "(abstract type X end)" => "(parens (abstract X))"
+    "(mutable struct X end)" => "(parens (struct-mut X (block)))"
+    # parsing of tricky primes
+    "x in'c'"   => "(call-i x in (char 'c'))"
+    "1where'c'" => "(where 1 (char 'c'))"
+    ":+'y'"     => "(juxtapose (call-post (quote-: +) ') (call-post y '))"
+    # unary subtype ops and newlines
+    "a +\n\n<:" => "(call-i a + <:)"
+    "for\n\n<:" => "(for (iteration (in <: (error (error-t)))) (block (error)) (error-t))"
+    # Empty character consumes trailing ' delimiter (ideally this could be
+    # tested above but we don't require the input stream to be consumed in the
+    # unit tests there.
+    "''" => "(char (error))"
+
+    # The following may not be ideal error recovery! But at least the parser
+    # shouldn't crash
+    "@(x y)" => "(macrocall (macro_name (parens x (error-t y))))"
+    "|(&\nfunction" => "(call | (& (function (error (error)) (block (error)) (error-t))) (error-t))"
+    "@(" => "(macrocall (macro_name (parens (error-t))))"
+    "x = @(" => "(= x (macrocall (macro_name (parens (error-t)))))"
+    "function(where" => "(function (tuple-p where (error-t)) (block (error)) (error-t))"
+    # Contextual keyword pairs must not be separated by newlines even within parens
+    "(abstract\ntype X end)" => "(wrapper (parens abstract (error-t type X)) (error-t end ✘))"
+    "(mutable\nstruct X end)" => "(wrapper (parens mutable (error-t struct X)) (error-t end ✘))"
+
+    # Lexer vs parser: issues detecting which tokens are string delimiters and
+    # detecting raw vs non-raw strings. The old parser was tightly coupled to
+    # the lexer and the parser state was used to disambiguate these cases.
+    "x in' '" => "(call-i x in (char (error)))"
+    "x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (cmdstring-r (error-t))) \$ (error)))"
+    "var\"#\"`str`" => "(juxtapose (var # (error-t)) (cmdstring-r \"str\"))"
+    "var\"#\"\"str\"" => "(juxtapose (var # (error-t)) (error-t) (string \"str\"))"
+
+    # trailing junk in generators (issue #407)
+    "(x for x = xs a)"      =>  "(parens (generator x (iteration (in x xs))) (error-t a))"
+    "(x for x = xs a, b)"   =>  "(parens (generator x (iteration (in x xs))) (error-t a ✘ b))"
+    "f(x for x = xs a)"     =>  "(call f (generator x (iteration (in x xs))) (error-t a))"
+]
+
+@testset "Parser does not crash on broken code" begin
+    @testset "$(repr(input))" for (input, output) in parsestmt_test_specs
+        test_parse(JuliaSyntax.parse_stmts, input, output)
+    end
+end
+
+parsestmt_with_kind_tests = [
+    # Most operators are semantically just normal identifiers after parsing so
+    # get the Kind K"Identifier"
+    "+"      => "+::Identifier"
+    "a + b"  => "(call-i a::Identifier +::Identifier b::Identifier)"
+    "a .+ b" => "(dotcall-i a::Identifier +::Identifier b::Identifier)"
+    "a |> b" => "(call-i a::Identifier |>::Identifier b::Identifier)"
+    "a => b" => "(call-i a::Identifier =>::Identifier b::Identifier)"
+    "a →  b" => "(call-i a::Identifier →::Identifier b::Identifier)"
+    "a < b < c" => "(comparison a::Identifier <::Identifier b::Identifier <::Identifier c::Identifier)"
+    "a .<: b"=> "(dotcall-i a::Identifier <:::Identifier b::Identifier)"
+    "a .. b" => "(call-i a::Identifier ..::Identifier b::Identifier)"
+    "a : b"  => "(call-i a::Identifier :::Identifier b::Identifier)"
+    "-2^x"   => "(call-pre -::Identifier (call-i 2::Integer ^::Identifier x::Identifier))"
+    "-(2)"   => "(call-pre -::Identifier (parens 2::Integer))"
+    "<:(a,)" => "(<:-, a::Identifier)"
+    "- 2"    => "(call-pre -::Identifier 2::Integer)"
+    "/x"     => "(call-pre (error /::Identifier) x::Identifier)"
+    "a^b"    => "(call-i a::Identifier ^::Identifier b::Identifier)"
+    "f.'"    => "(dotcall-post f::Identifier (error '::Identifier))"
+    "f'"     => "(call-post f::Identifier '::Identifier)"
+    # Standalone syntactic ops which keep their kind - they can't really be
+    # used in a sane way as identifiers or interpolated into expressions
+    # because they have their own syntactic forms.
+    ":(::)"  => "(quote-: (parens ::::::))"
+    ":(\$)"  => "(quote-: (parens \$::\$))"
+    ":(<:)"  => "(quote-: (parens <:::<:))"
+    ":(&&)"  => "(quote-: (parens &&::&&))"
+    ":(=)"   => "(quote-: (parens =::=))"
+    "a := b" => "(:= a::Identifier b::Identifier)"
+    "a += b" => "(op= a::Identifier +::Identifier b::Identifier)"
+    "a .+= b" => "(.op= a::Identifier +::Identifier b::Identifier)"
+    "a >>= b" => "(op= a::Identifier >>::Identifier b::Identifier)"
+    ":+="    => "(quote-: +=::op=)"
+    ":.+="   => "(quote-: (. +=::op=))"
+    # str/cmd macro name kinds
+    "x\"str\""   => """(macrocall x::StrMacroName (string-r "str"::String))"""
+    "x`str`"     => """(macrocall x::CmdMacroName (cmdstring-r "str"::CmdString))"""
+]
+
+@testset "parser `Kind` remapping" begin
+    @testset "$(repr(input))" for (input, output) in parsestmt_with_kind_tests
+        input = ((show_kind=true,), input)
+        test_parse(JuliaSyntax.parse_stmts, input, output)
+    end
+end
+
+@testset "Trivia attachment" begin
+    # TODO: Need to expand this greatly to cover as many forms as possible!
+    @test show_green_tree("f(a;b)") == """
+         1:6      │[toplevel]
+         1:6      │  [call]
+         1:1      │    Identifier           ✔   "f"
+         2:2      │    (                        "("
+         3:3      │    Identifier           ✔   "a"
+         4:5      │    [parameters]
+         4:4      │      ;                      ";"
+         5:5      │      Identifier         ✔   "b"
+         6:6      │    )                        ")"
+    """
+end
+
+@testset "Unicode normalization in tree conversion" begin
+    # ɛµ normalizes to εμ
+    @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)"
+    @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall (macro_name \u03B5\u03BC))"
+    @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str (string-r \"\"))"
+    @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd (cmdstring-r \"\"))"
+    # · and · normalize to ⋅
+    @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)"
+    @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)"
+    # − ('\u2212') normalizes to - ('\u002d')
+    @test parse_to_sexpr_str(JuliaSyntax.parse_expr, "a \u2212 b")  == "(call-i a - b)"
+    @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u2212= b") == "(op= a - b)"
+    @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.op= a - b)"
+end
+
+@testset "Unbalanced bidirectional unicode" begin
+    # https://trojansource.codes
+    @test_throws JuliaSyntax.ParseError parsestmt(GreenNode, """
+    function checkUserAccess(u::User)
+        if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066"
+            return true
+        end
+        return false
+    end
+    """)
+
+    @test_throws JuliaSyntax.ParseError parsestmt(GreenNode, """
+    function checkUserAccess(u::User)
+        #=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =#
+            return true
+        #= end admin only \u202e \u2066end\u2069 \u2066=#
+        return false
+    end
+    """)
+end
diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl
new file mode 100644
index 0000000000000..10a09d3ace585
--- /dev/null
+++ b/JuliaSyntax/test/parser_api.jl
@@ -0,0 +1,237 @@
+@testset "parser API" begin
+    @testset "parse with String input" begin
+        @test parsestmt(Expr, " x ") == :x
+        @test JuliaSyntax.remove_linenums!(parseall(Expr, " x ")) == Expr(:toplevel, :x)
+        @test parseatom(Expr, " x ") == :x
+        @test parseatom(Expr, "(x)") == :x
+
+        # SubString
+        @test parsestmt(Expr, SubString("x+y")) == :(x+y)
+        @test parsestmt(Expr, SubString("α+x")) == :(α+x)
+        @test parseatom(Expr, SubString("x+y",3,3)) == :y
+
+        # Exceptions due to extra trailing syntax
+        @test_throws JuliaSyntax.ParseError parseatom(Expr, "x+y")
+        @test_throws JuliaSyntax.ParseError parsestmt(Expr, "x+y\nz")
+
+        # ignore_warnings flag
+        @test_throws JuliaSyntax.ParseError parsestmt(Expr, "import . .A")
+        @test parsestmt(Expr, "import . .A", ignore_warnings=true) == :(import ..A)
+
+        # version selection
+        @test_throws JuliaSyntax.ParseError parsestmt(Expr, "[a ;; b]", version=v"1.6")
+        @test parsestmt(Expr, "[a ;; b]", version=v"1.7") == Expr(:ncat, 2, :a, :b)
+
+        # filename
+        @test parsestmt(Expr, "begin\na\nend", filename="foo.jl", first_line=55) ==
+            Expr(:block, LineNumberNode(56, Symbol("foo.jl")), :a)
+
+        # ignore_trivia
+        @test parseatom(Expr, " x ", ignore_trivia=true) == :x
+        @test_throws JuliaSyntax.ParseError parseatom(Expr, " x ", ignore_trivia=false)
+
+        # Top level parsing
+        @test parseall(Expr, "a\nb") ==
+            Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(2), :b)
+        @test parseall(Expr, "a\nb #==#") ==
+            Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(2), :b)
+        @test parseall(Expr, "#==#\na\nb") ==
+            Expr(:toplevel, LineNumberNode(2), :a, LineNumberNode(3), :b)
+        @test parseall(Expr, "a\nb\n#==#") ==
+            Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(2), :b)
+    end
+
+    @testset "IO input" begin
+        # IOBuffer
+        io = IOBuffer("x+y")
+        @test parse!(Expr, io, rule=:statement) == (:(x+y), [])
+        @test position(io) == 3
+        io = IOBuffer("x+y")
+        seek(io, 2)
+        @test parse!(Expr, io, rule=:atom) == (:y, [])
+        @test position(io) == 3
+        # A GenericIOBuffer, not actually IOBuffer
+        io = IOBuffer(SubString("x+y"))
+        @test parse!(Expr, io, rule=:statement) == (:(x+y), [])
+        @test position(io) == 3
+        # Another type of GenericIOBuffer
+        io = IOBuffer(codeunits("x+y"))
+        @test parse!(Expr, io, rule=:statement) == (:(x+y), [])
+        @test position(io) == 3
+        # IOStream
+        mktemp() do path, io
+            write(io, "x+y")
+            close(io)
+
+            open(path, "r") do io
+                @test parse!(Expr, io, rule=:statement) == (:(x+y), [])
+                @test position(io) == 3
+            end
+        end
+    end
+
+    @testset "parse with String and index input" begin
+        # String
+        let
+            ex,pos = parseall(Expr, "x+y\nz", 1)
+            @test JuliaSyntax.remove_linenums!(ex) == Expr(:toplevel, :(x+y), :z)
+            @test pos == 6
+        end
+        @test parsestmt(Expr, "x+y\nz", 1)     == (:(x+y), 4)
+        @test parseatom(Expr, "x+y\nz", 1) == (:x, 2)
+        @test parseatom(Expr, "x+y\nz", 5) == (:z, 6)
+
+        # SubString
+        @test parsestmt(Expr, SubString("α+x\ny"), 1)  == (:(α+x), 5)
+        @test parseatom(Expr, SubString("x+y"), 1) == (:x, 2)
+        @test parseatom(Expr, SubString("x+y"), 3) == (:y, 4)
+
+        @test parseatom(Expr, SubString("x+1.0"), 3) == (1.0, 6)
+        @test parseatom(Expr, SubString("x+\"\n\""), 3) == ("\n", 6)
+
+        # Line numbers are relative to the start of the string we're currently
+        # parsing
+        @test JuliaSyntax.parsestmt(Expr, "begin\na\nend\nbegin\nb\nend", 1) ==
+            (Expr(:block, LineNumberNode(2), :a), 12)
+        @test JuliaSyntax.parsestmt(Expr, "begin\na\nend\nbegin\nb\nend", 12) ==
+            (Expr(:block, LineNumberNode(3), :b), 24)
+    end
+
+    @testset "error/warning handling" begin
+        parseshow(s;kws...) = sprint(show, MIME("text/x.sexpression"), parsestmt(SyntaxNode, s; kws...))
+        @test_throws JuliaSyntax.ParseError parseshow("try finally catch ex end")
+        @test parseshow("try finally catch ex end", ignore_warnings=true) ==
+            "(try (block) (finally (block)) (catch ex (block)))"
+        # ignore_errors
+        @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]")
+        @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]", ignore_warnings=true)
+        @test parseshow("[a; b, c]", ignore_errors=true) == "(vcat a b (error-t) c)"
+        # errors in literals
+        @test parseshow("\"\\z\"", ignore_errors=true) == "(string (ErrorInvalidEscapeSequence))"
+        @test parseshow("'\\z'", ignore_errors=true) == "(char (ErrorInvalidEscapeSequence))"
+        @test parseshow("'abc'", ignore_errors=true) == "(char (ErrorOverLongCharacter))"
+        @test parseshow("1e1000", ignore_errors=true) == "(ErrorNumericOverflow)"
+        @test parseshow("1f1000", ignore_errors=true) == "(ErrorNumericOverflow)"
+    end
+end
+
+@testset "ParseError printing" begin
+    try
+        parsestmt(SyntaxNode, "a -- b -- c", filename="somefile.jl")
+        @assert false "error should be thrown"
+    catch exc
+        @test exc isa JuliaSyntax.ParseError
+        @test sprint(showerror, exc) == """
+            ParseError:
+            # Error @ somefile.jl:1:3
+            a -- b -- c
+            # └┘ ── invalid operator"""
+        @test occursin("Stacktrace:\n", sprint(showerror, exc, catch_backtrace()))
+        file_url = JuliaSyntax._file_url("somefile.jl")
+        @test sprint(showerror, exc, context=:color=>true) == """
+            ParseError:
+            \e[90m# Error @ \e[0;0m\e]8;;$file_url#1:3\e\\\e[90msomefile.jl:1:3\e[0;0m\e]8;;\e\\
+            a \e[48;2;120;70;70m--\e[0;0m b -- c
+            \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m"""
+    end
+
+    try
+        # Test that warnings are printed first followed by only the first error
+        parsestmt(SyntaxNode, """
+           @(a)
+           x -- y
+           z -- y""", filename="somefile.jl")
+        @assert false "error should be thrown"
+    catch exc
+        @test exc isa JuliaSyntax.ParseError
+        @test sprint(showerror, exc) == """
+            ParseError:
+            # Warning @ somefile.jl:1:2
+            @(a)
+            #└─┘ ── parenthesizing macro names is unnecessary
+            # Error @ somefile.jl:2:1
+            @(a)
+            x
+            ╙ ── unexpected text after parsing statement"""
+    end
+
+    try
+        # Test that initial warnings are always printed
+        parsestmt(SyntaxNode, """
+           @(a)""", filename="somefile.jl")
+        @assert false "error should be thrown"
+    catch exc
+        @test exc isa JuliaSyntax.ParseError
+        @test sprint(showerror, exc) == """
+            ParseError: some warnings detected:
+            # Warning @ somefile.jl:1:2
+            @(a)
+            #└─┘ ── parenthesizing macro names is unnecessary"""
+    end
+end
+
+tokensplit(str; kws...) = [kind(tok) => untokenize(tok, str) for tok in tokenize(str; kws...)]
+
+@testset "tokenize() API" begin
+    # tokenize() is eager
+    @test tokenize("aba") isa Vector{JuliaSyntax.Token}
+
+    # . is a separate token from + in `.+`
+    @test tokensplit("a .+ β") == [
+        K"Identifier" => "a",
+        K"Whitespace" => " ",
+        K"." => ".",
+        K"Identifier" => "+",
+        K"Whitespace" => " ",
+        K"Identifier" => "β",
+    ]
+
+    # + is kind K"+" when operators in identifier position are emitted as
+    # operator kinds.
+    @test tokensplit("a .+ β"; operators_as_identifiers=false) == [
+        K"Identifier" => "a",
+        K"Whitespace" => " ",
+        K"." => ".",
+        K"+" => "+",
+        K"Whitespace" => " ",
+        K"Identifier" => "β",
+    ]
+
+    # Contextual keywords become identifiers where necessary
+    @test tokensplit("outer = 1") == [
+        K"Identifier" => "outer",
+        K"Whitespace" => " ",
+        K"=" => "=",
+        K"Whitespace" => " ",
+        K"Integer" => "1",
+    ]
+    # Including word operators
+    @test tokensplit("where = 1"; operators_as_identifiers=false) == [
+        K"Identifier" => "where",
+        K"Whitespace" => " ",
+        K"=" => "=",
+        K"Whitespace" => " ",
+        K"Integer" => "1",
+    ]
+
+    # A predicate based on flags()
+    @test JuliaSyntax.is_suffixed(tokenize("+₁")[1])
+
+    # Buffer interface
+    @test tokenize(Vector{UInt8}("a + b")) == tokenize("a + b")
+
+    buf = Vector{UInt8}("a-β")
+    @test untokenize.(tokenize(buf), Ref(buf,)) == [
+        Vector{UInt8}("a"),
+        Vector{UInt8}("-"),
+        Vector{UInt8}("β")
+    ]
+
+    @test kind(JuliaSyntax.Token()) == K"None"
+
+    @test tokensplit("'\\") == [
+        K"'" => "'",
+        K"ErrorInvalidEscapeSequence" => "\\",
+        K"error" => ""
+    ]
+end
diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl
new file mode 100644
index 0000000000000..644f073124982
--- /dev/null
+++ b/JuliaSyntax/test/runtests.jl
@@ -0,0 +1,38 @@
+if !(@isdefined JuliaSyntax)
+    using JuliaSyntax
+end
+
+using Test
+
+include("test_utils.jl")
+include("test_utils_tests.jl")
+include("fuzz_test.jl")
+
+include("utils.jl")
+include("kinds.jl")
+
+@testset "Tokenize" begin
+    include("tokenize.jl")
+end
+
+include("parse_stream.jl")
+include("parser.jl")
+include("green_node.jl")
+include("syntax_tree.jl")
+include("diagnostics.jl")
+include("parser_api.jl")
+include("expr.jl")
+@testset "Parsing literals from strings" begin
+    include("literal_parsing.jl")
+end
+include("source_files.jl")
+
+if VERSION >= v"1.6"
+    # Tests restricted to 1.6+ due to
+    # * Core._parse hook doesn't exist on v1.5 and lower
+    # * Reference parser bugs which would need workarounds for package parse comparisons
+    include("hooks.jl")
+    include("parse_packages.jl")
+end
+
+include("serialization.jl")
diff --git a/JuliaSyntax/test/runtests_vendored.jl b/JuliaSyntax/test/runtests_vendored.jl
new file mode 100644
index 0000000000000..52980e4917dcf
--- /dev/null
+++ b/JuliaSyntax/test/runtests_vendored.jl
@@ -0,0 +1,4 @@
+# Test copy of JuliaSyntax vendored into Base
+using Base.JuliaSyntax: JuliaSyntax
+
+include("runtests.jl")
diff --git a/JuliaSyntax/test/serialization.jl b/JuliaSyntax/test/serialization.jl
new file mode 100644
index 0000000000000..abdc5fa61e72f
--- /dev/null
+++ b/JuliaSyntax/test/serialization.jl
@@ -0,0 +1,29 @@
+using Serialization
+
+@testset "Equality $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode]
+    x = JuliaSyntax.parsestmt(T, "f(x) = x + 2")
+    y = JuliaSyntax.parsestmt(T, "f(x) = x + 2")
+    z = JuliaSyntax.parsestmt(T, "f(x) = 2 + x")
+    @test x == y
+    @test x != z
+    @test y != z
+end
+
+@testset "Hashing $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode]
+    x = hash(JuliaSyntax.parsestmt(T, "f(x) = x + 2"))::UInt
+    y = hash(JuliaSyntax.parsestmt(T, "f(x) = x + 2"))::UInt
+    z = hash(JuliaSyntax.parsestmt(T, "f(x) = 2 + x"))::UInt
+    @test x == y # Correctness
+    @test x != z # Collision
+    @test y != z # Collision
+end
+
+@testset "Serialization $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode]
+    x = JuliaSyntax.parsestmt(T, "f(x) = x ⋅ 2")
+    f = tempname()
+    open(f, "w") do io
+        serialize(io, x)
+    end
+    y = open(deserialize, f, "r")
+    @test x == y
+end
diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl
new file mode 100644
index 0000000000000..d518124f1e6e1
--- /dev/null
+++ b/JuliaSyntax/test/source_files.jl
@@ -0,0 +1,230 @@
+@testset "SourceFile lines and column indexing" begin
+    @test source_location(SourceFile("a"), 1) == (1,1)
+    @test source_location(SourceFile("a"), 2) == (1,2)
+
+    @test source_location(SourceFile("a\n"), 2) == (1,2)
+    @test source_location(SourceFile("a\n"), 3) == (2,1)
+
+    @test source_location(SourceFile("a\nb\n"), 2) == (1,2)
+    @test source_location(SourceFile("a\nb\n"), 3) == (2,1)
+    @test source_location(SourceFile("a\nb\n"), 4) == (2,2)
+    @test source_location(SourceFile("a\nb\n"), 5) == (3,1)
+
+    @test source_location(SourceFile("\n\n"), 1) == (1,1)
+    @test source_location(SourceFile("\n\n"), 2) == (2,1)
+    @test source_location(SourceFile("\n\n"), 3) == (3,1)
+
+    @test source_location(SourceFile("a"; first_line=7), 1) == (7,1)
+    @test source_location(SourceFile("a"; first_line=7), 2) == (7,2)
+
+    @test source_location(SourceFile("a\n"; first_line=7), 2) == (7,2)
+    @test source_location(SourceFile("a\n"; first_line=7), 3) == (8,1)
+
+    @test source_location(SourceFile("a\nb\n"; first_line=7), 2) == (7,2)
+    @test source_location(SourceFile("a\nb\n"; first_line=7), 3) == (8,1)
+    @test source_location(SourceFile("a\nb\n"; first_line=7), 4) == (8,2)
+    @test source_location(SourceFile("a\nb\n"; first_line=7), 5) == (9,1)
+
+
+    mktemp() do path, io
+        write(io, "a\n")
+        @test source_location(SourceFile(; filename=path), 1) == (1,1)
+        @test source_location(SourceFile(; filename=path, first_line=7), 1) == (7,1)
+    end
+
+    # byte offset
+    sf = SourceFile("a\nbb\nccc\ndddd", first_index=10)
+    @test source_location(sf, 13) == (2,2)
+    @test source_line(sf, 15) == 3
+    @test source_line_range(sf, 10) == (10,11)
+    @test source_line_range(sf, 11) == (10,11)
+    @test source_line_range(sf, 12) == (12,14)
+    @test source_line_range(sf, 14) == (12,14)
+    @test source_line_range(sf, 15) == (15,18)
+
+    # source_line convenience function
+    @test source_line(SourceFile("a\nb\n"), 2) == 1
+    @test source_line(SourceFile("a\nb\n"), 3) == 2
+end
+
+@testset "SourceFile position indexing" begin
+    @test SourceFile("a\nb\n")[1:2] == "a\n"
+    @test SourceFile("a\nb\n")[3:end] == "b\n"
+
+    # unicode
+    @test SourceFile("αβ")[1:2] == "α"
+    @test SourceFile("αβ")[3] == 'β'
+
+    # offsets
+    sf = SourceFile("abcd", first_index=10)
+    @test firstindex(sf) == 10
+    @test lastindex(sf) == 13
+    @test sf[10] == 'a'
+    @test sf[10:11] == "ab"
+    @test view(sf, 10:11) == "ab"
+
+    @test thisind(SourceFile("xαx", first_index=10), 10) == 10
+    @test thisind(SourceFile("xαx", first_index=10), 11) == 11
+    @test thisind(SourceFile("xαx", first_index=10), 12) == 11
+    @test thisind(SourceFile("xαx", first_index=10), 13) == 13
+
+    if Base.VERSION >= v"1.4"
+        # Protect the `[begin` from being viewed by the parser on older Julia versions
+        @test eval(Meta.parse("SourceFile(\"a\nb\n\")[begin:end]")) == "a\nb\n"
+        @test eval(Meta.parse("SourceFile(\"abcd\", first_index=10)[begin+1:end-1]")) == "bc"
+    end
+end
+
+@testset "SourceFile printing and text extraction" begin
+    srcf = SourceFile("module Foo\nend")
+    @test sprint(show, MIME("text/plain"), srcf) == """
+    ## SourceFile ##
+    module Foo
+    end"""
+    @test sourcetext(srcf) == "module Foo\nend"
+end
+
+
+@testset "highlight()" begin
+    src = SourceFile("""
+        abcd
+        αβγδ
+        +-*/""")
+
+    # Empty ranges
+    @test sprint(highlight, src, 1:0) == "abcd\n└\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 2:1) == "abcd\n#└\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 3:2) == "abcd\n# └\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 4:3) == "abcd\n#  └\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 5:4) == "abcd\n#   └\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 6:5) == "abcd\nαβγδ\n└\n+-*/"
+    @test sprint(highlight, src, 19:18) == "abcd\nαβγδ\n+-*/\n#   └"
+    @test sprint(io->highlight(io, src, 1:0, context_lines_after=0, note="hi")) ==
+        "abcd\n└ ── hi"
+
+    # Single line ranges
+    @test sprint(highlight, src, 1:4) == "abcd\n└──┘\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 2:4) == "abcd\n#└─┘\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 3:4) == "abcd\n# └┘\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 4:4) == "abcd\n#  ╙\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 5:5) == "abcd\n#   └\nαβγδ\n+-*/"
+    @test sprint(highlight, src, 6:6) == "abcd\nαβγδ\n╙\n+-*/"
+    @test sprint(highlight, src, 6:9) == "abcd\nαβγδ\n└┘\n+-*/"
+    @test sprint(highlight, src, 8:8) == "abcd\nαβγδ\n#╙\n+-*/"
+
+    # multi-byte chars
+    @test sprint(highlight, src, 8:13) == """
+        abcd
+        αβγδ
+        #└─┘
+        +-*/"""
+    # multi-byte char at eof
+    @test sprint(highlight, SourceFile("a α"), 3:4) == "a α\n# ╙"
+    @test sprint(highlight, SourceFile("a\nα"), 1:4) == "┌\na\nα\n┘"
+    @test sprint(highlight, SourceFile("a\nb\nα"), 3:3) == "a\nb\n╙\nα"
+
+    # empty files
+    @test sprint(highlight, SourceFile(""), 1:0) == "└"
+
+    # Multi-line ranges
+    @test sprint(highlight, src, 1:7) == """
+        ┌───
+        abcd
+        αβγδ
+        ┘
+        +-*/"""
+    @test sprint(highlight, src, 2:7) == """
+        #┌──
+        abcd
+        αβγδ
+        ┘
+        +-*/"""
+    @test sprint(highlight, src, 2:9) == """
+        #┌──
+        abcd
+        αβγδ
+        #┘
+        +-*/"""
+    @test sprint(highlight, src, 4:9) == """
+        #  ┌
+        abcd
+        αβγδ
+        #┘
+        +-*/"""
+    @test sprint(highlight, src, 5:9) == """
+        #   ┌
+        abcd
+        αβγδ
+        #┘
+        +-*/"""
+    @test sprint(highlight, src, 6:15) == """
+        abcd
+        ┌───
+        αβγδ
+        +-*/
+        ┘"""
+    @test sprint(highlight, src, 8:15) == """
+        abcd
+        #┌──
+        αβγδ
+        +-*/
+        ┘"""
+    @test sprint(highlight, src, 1:18) == """
+        ┌───
+        abcd
+        αβγδ
+        +-*/
+        #──┘"""
+
+    # context lines
+    @test sprint(io->highlight(io, src, 8:13;
+                               context_lines_before=0,
+                               context_lines_after=0)) == """
+        αβγδ
+        #└─┘"""
+    @test sprint(io->highlight(io, src, 8:13; context_lines_after=0)) == """
+        abcd
+        αβγδ
+        #└─┘"""
+    @test sprint(io->highlight(io, src, 8:13; context_lines_before=0)) == """
+        αβγδ
+        #└─┘
+        +-*/"""
+    @test sprint(io->highlight(io, src, 1:18; context_lines_inner=0)) == """
+        ┌───
+        abcd
+        ⋮
+        +-*/
+        #──┘"""
+
+    # annotations
+    @test sprint(io->highlight(io, src, 8:13; note="hello")) == """
+        abcd
+        αβγδ
+        #└─┘ ── hello
+        +-*/"""
+    @test sprint(io->highlight(io, src, 1:13; note="hello")) == """
+        ┌───
+        abcd
+        αβγδ
+        #──┘ ── hello
+        +-*/"""
+    @test sprint(io->highlight(io, src, 8:13;
+                               note=(io,indent,w)->print(io, "\n$indent$('!'^w) hello"))) == """
+        abcd
+        αβγδ
+        #└─┘
+        #!!! hello
+        +-*/"""
+
+    # colored output
+    @test sprint(io->highlight(io, src, 8:13; context_lines_after=0, note="hello", notecolor=:light_red),
+                 context=:color=>true) ==
+        "abcd\nα\e[48;2;120;70;70mβγδ\e[0;0m\n\e[90m#└─┘ ── \e[0;0m\e[91mhello\e[0;0m"
+    @test sprint(io->highlight(io, src, 1:13; context_lines_after=0, note="hello", notecolor=(255,0,0)),
+                 context=:color=>true) ==
+        "\e[90m┌───\e[0;0m\n\e[48;2;120;70;70mabcd\e[0;0m\n\e[48;2;120;70;70mαβγδ\e[0;0m\n\e[90m#──┘ ── \e[0;0m\e[38;2;255;0;0mhello\e[0;0m"
+    @test sprint(io->highlight(io, src, 1:18, context_lines_inner=0),
+                 context=:color=>true) ==
+        "\e[90m┌───\e[0;0m\n\e[48;2;120;70;70mabcd\e[0;0m\n\e[48;2;120;70;70m\e[0;0m⋮\n\e[48;2;120;70;70m+-*/\e[0;0m\n\e[90m#──┘\e[0;0m"
+end
diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl
new file mode 100644
index 0000000000000..3e2361ca56b2f
--- /dev/null
+++ b/JuliaSyntax/test/syntax_tree.jl
@@ -0,0 +1,117 @@
+@testset "SyntaxNode" begin
+    # Child access
+    tt = "a*b + c"
+    t = parsestmt(SyntaxNode, tt)
+
+    @test sourcetext(t[1])    == "a*b"
+    @test sourcetext(t[1][1]) == "a"
+    @test sourcetext(t[1][2]) == "*"
+    @test sourcetext(t[1][3]) == "b"
+    @test sourcetext(t[2])    == "+"
+    @test sourcetext(t[3])    == "c"
+
+    @test JuliaSyntax.first_byte(t[2]) == findfirst(==('+'), tt)
+    @test JuliaSyntax.source_line(t[3]) == 1
+    @test source_location(t[3]) == (1, 7)
+
+    # Child indexing
+    @test t[end] === t[3]
+    @test sourcetext.(t[2:3]) == ["+", "c"]
+    @test sourcetext.(t[2:end]) == ["+", "c"]
+    @test firstindex(t) == 1
+    @test lastindex(t) == 3
+    @test !is_leaf(t)
+    @test is_leaf(t[3])
+
+    @test sprint(show, t) == "(call-i (call-i a * b) + c)"
+    @test sprint(io->show(io, MIME("text/x.sexpression"), t, show_kind=true)) ==
+        "(call-i (call-i a::Identifier *::Identifier b::Identifier) +::Identifier c::Identifier)"
+
+    @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙"
+
+    # Pass-through field access
+    node = t[1][1]
+    @test node.val === :a
+    # The specific error text has evolved over Julia versions. Check that it involves `SyntaxData` and immutability
+    e = try node.val = :q catch e e end
+    @test occursin("immutable", e.msg) && occursin("SyntaxData", e.msg)
+
+    # Newline-terminated source
+    t = parsestmt(SyntaxNode, "a*b + c\n")
+    @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙"
+
+    # copy
+    t = parsestmt(SyntaxNode, "a*b + c")
+    ct = copy(t)
+    ct.data = nothing
+    @test ct.data === nothing && t.data !== nothing
+    @test ct[1].parent === ct
+    @test ct[1] !== t[1]
+
+    node = parsestmt(SyntaxNode, "f()")
+    push!(node, parsestmt(SyntaxNode, "x"))
+    @test length(children(node)) == 2
+    node[2] = parsestmt(SyntaxNode, "y")
+    @test sourcetext(node[2]) == "y"
+
+    # SyntaxNode with offsets
+    t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13)
+    @test first(byte_range(t)) == 13
+    @test first(byte_range(t[1])) == 19
+    @test t[1].val == :b
+
+    # Unicode character ranges
+    src = "ab + αβ"
+    t = parsestmt(SyntaxNode, src)
+    @test char_range(t[1]) == 1:2
+    @test char_range(t[2]) == 4:4
+    @test char_range(t[3]) == 6:8
+    # conversely, β takes two bytes so char_range(t[3]) != byte_range(t[3])
+    @test byte_range(t[3]) == 6:9
+end
+
+@testset "SyntaxNode pretty printing" begin
+    t = parsestmt(SyntaxNode, "f(a*b,\n  c)", filename="foo.jl")
+    @test sprint(show, MIME("text/plain"), t) == """
+    SyntaxNode:
+    [call]
+      f                                      :: Identifier
+      [call-i]
+        a                                    :: Identifier
+        *                                    :: Identifier
+        b                                    :: Identifier
+      c                                      :: Identifier
+    """
+
+    @test sprint(io->show(io, MIME("text/plain"), t, show_location=true)) == """
+    SyntaxNode:
+    line:col│ byte_range  │ tree
+     -file- │ "foo.jl"
+       1:1  │     1:11    │[call]
+       1:1  │     1:1     │  f                                      :: Identifier
+       1:3  │     3:5     │  [call-i]
+       1:3  │     3:3     │    a                                    :: Identifier
+       1:4  │     4:4     │    *                                    :: Identifier
+       1:5  │     5:5     │    b                                    :: Identifier
+       2:3  │    10:10    │  c                                      :: Identifier
+    """
+
+    @test sprint(io->show(io, MIME("text/plain"), t, show_kind=false)) == """
+    SyntaxNode:
+    [call]
+      f
+      [call-i]
+        a
+        *
+        b
+      c
+    """
+
+    t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13, first_line=100)
+    @test sprint(io->show(io, MIME("text/plain"), t, show_location=true)) == """
+    SyntaxNode:
+    line:col│ byte_range  │ tree
+     100:1  │    13:23    │[block]
+     100:7  │    19:19    │  b                                      :: Identifier
+    """
+end
diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl
new file mode 100644
index 0000000000000..2ad1ecef7a53c
--- /dev/null
+++ b/JuliaSyntax/test/test_utils.jl
@@ -0,0 +1,484 @@
+using Test
+
+# We need a relative include here as JuliaSyntax may come from Base.
+using .JuliaSyntax:
+    # Parsing
+    ParseStream,
+    ParseState,
+    Diagnostic,
+    SourceFile,
+    source_location,
+    source_line,
+    source_line_range,
+    parse!,
+    parsestmt,
+    parseall,
+    parseatom,
+    build_tree,
+    @K_str,
+    # Nodes
+    GreenNode,
+    RedTreeCursor,
+    SyntaxNode,
+    ErrorVal,
+    # Node inspection
+    kind,
+    flags,
+    EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG,
+    head,
+    span,
+    SyntaxHead,
+    is_trivia,
+    sourcetext,
+    is_leaf,
+    numchildren,
+    children,
+    fl_parseall,
+    fl_parse,
+    highlight,
+    tokenize,
+    untokenize,
+    filename,
+    byte_range,
+    char_range
+
+if VERSION < v"1.6"
+    # Compat stuff which might not be in Base for older versions
+    using JuliaSyntax: isnothing, only, peek
+end
+
+function toks(str)
+    ts = [JuliaSyntax.Tokenize.untokenize(t, str)=>kind(t)
+          for t in JuliaSyntax.Tokenize.tokenize(str)]
+    @test ts[end] == (""=>K"EndMarker")
+    pop!(ts)
+    ts
+end
+
+function remove_macro_linenums!(ex)
+    if Meta.isexpr(ex, :macrocall)
+        ex.args[2] = nothing
+    end
+    if ex isa Expr
+        map!(remove_macro_linenums!, ex.args, ex.args)
+    end
+    return ex
+end
+
+function remove_all_linenums!(ex)
+    JuliaSyntax.remove_linenums!(ex)
+    remove_macro_linenums!(ex)
+end
+
+function kw_to_eq(ex)
+    return Meta.isexpr(ex, :kw) ? Expr(:(=), ex.args...) : ex
+end
+
+function triple_string_roughly_equal(fl_str, str)
+    # Allow some leeway for a bug in the reference parser with
+    # triple quoted strings
+    lines = split(str, '\n')
+    fl_lines = split(fl_str, '\n')
+    if length(lines) != length(fl_lines)
+        return false
+    end
+    has_whitespace_only_line =
+        any(!isempty(fl_line) && all(c in " \t" for c in fl_line)
+            for fl_line in fl_lines)
+    if !has_whitespace_only_line
+        return str == fl_str
+    end
+    for (line, fl_line) in zip(lines, fl_lines)
+        if !all(c in " \t" for c in fl_line) && !endswith(line, fl_line)
+            return false
+        end
+    end
+    return true
+end
+
+function exprs_equal_no_linenum(fl_ex, ex)
+    remove_all_linenums!(deepcopy(ex)) == remove_all_linenums!(deepcopy(fl_ex))
+end
+
+function is_eventually_call(ex)
+    return ex isa Expr && (ex.head === :call ||
+        (ex.head === :where || ex.head === :(::)) && is_eventually_call(ex.args[1]))
+end
+
+# Compare Expr from reference parser expression to JuliaSyntax parser, ignoring
+# differences due to bugs in the reference parser.
+function exprs_roughly_equal(fl_ex, ex)
+    if fl_ex isa Float64 && Meta.isexpr(ex, :call, 3) &&
+                            ex.args[1] == :* &&
+                            ex.args[2] == fl_ex &&
+                            (ex.args[3] == :f || ex.args[3] == :f0)
+        # 0x1p0f
+        return true
+    elseif !(fl_ex isa Expr) || !(ex isa Expr)
+        if fl_ex isa String && ex isa String
+            if fl_ex == ex
+                return true
+            else
+                return triple_string_roughly_equal(fl_ex, ex)
+            end
+        else
+            return fl_ex == ex
+        end
+    end
+    # Ignore differences in line number nodes within block-like constructs
+    fl_args = fl_ex.head in (:block, :quote, :toplevel) ?
+              filter(x->!(x isa LineNumberNode), fl_ex.args) :
+              fl_ex.args
+    args = ex.head in (:block, :quote, :toplevel) ?
+           filter(x->!(x isa LineNumberNode), ex.args) :
+           ex.args
+    if (fl_ex.head == :block && ex.head == :tuple &&
+        length(fl_args) == 2 && length(args) == 2 &&
+        Meta.isexpr(args[1], :parameters, 1) &&
+        exprs_roughly_equal(fl_args[2], args[1].args[1]) &&
+        exprs_roughly_equal(fl_args[1], args[2]))
+        # Allow `(a; b,)`:
+        # * Reference parser produces a block
+        # * New parser produces a frankentuple
+        return true
+    end
+    if fl_ex.head != ex.head
+        return false
+    end
+    h = ex.head
+    if h == :function && Meta.isexpr(fl_args[1], :block)
+        blockargs = filter(x->!(x isa LineNumberNode), fl_args[1].args)
+        posargs = blockargs[1:max(0, length(blockargs))]
+        kwargs = blockargs[2:end]
+        for i = 1:length(kwargs)
+            if Meta.isexpr(kwargs[i], :(=))
+                kwargs[i] = Expr(:kw, kwargs[i].args...)
+            end
+        end
+        fl_args[1] = Expr(:tuple, Expr(:parameters, kwargs...), posargs...)
+    elseif h == :for
+        iterspec = args[1]
+        if is_eventually_call(iterspec.args[1]) &&
+                Meta.isexpr(iterspec.args[2], :block)
+            blk = iterspec.args[2]
+            if length(blk.args) == 2 && blk.args[1] isa LineNumberNode
+                # Ignore short form function location differences in
+                # `for f() = 1:3 end`
+                iterspec.args[2] = blk.args[2]
+            end
+        end
+    elseif (h == :(=) || h == :kw) && Meta.isexpr(fl_args[1], :(::), 1) &&
+             Meta.isexpr(fl_args[2], :block, 2) && fl_args[2].args[1] isa LineNumberNode
+        # The flisp parser adds an extra block around `w` in the following case
+        # f(::g(z) = w) = 1
+        fl_args[2] = fl_args[2].args[2]
+    end
+    if length(fl_args) != length(args)
+        return false
+    end
+    if h == :do && length(args) >= 1 && Meta.isexpr(fl_args[1], :macrocall)
+        # Macrocalls with do, as in `@f(a=1) do\nend` use :kw in the
+        # reference parser for the `a=1`, but we regard this as a bug.
+        fl_args = copy(fl_args)
+        fl_args[1] = Expr(:macrocall, map(kw_to_eq, args[1].args)...)
+    end
+    for i = 1:length(args)
+        if !exprs_roughly_equal(fl_args[i], args[i])
+            return false
+        end
+    end
+    return true
+end
+
+function parsers_agree_on_file(filename; kws...)
+    text = try
+        read(filename, String)
+    catch
+        # Something went wrong reading the file. This isn't a parser failure so
+        # ignore this case.
+        return true
+    end
+    parsers_agree_on_file(text, filename; kws...)
+end
+
+function parsers_agree_on_file(text, filename; exprs_equal=exprs_equal_no_linenum)
+    fl_ex = fl_parseall(text, filename=filename)
+    if Meta.isexpr(fl_ex, :toplevel) && !isempty(fl_ex.args) &&
+            Meta.isexpr(fl_ex.args[end], (:error, :incomplete))
+        # Reference parser failed. This generally indicates a broken file not a
+        # parser problem, so ignore this case.
+        return true
+    end
+    try
+        stream = ParseStream(text; version=v"1.13")
+        parse!(stream)
+        ex = build_tree(Expr, stream, filename=filename)
+        return !JuliaSyntax.any_error(stream) && exprs_equal(fl_ex, ex)
+    catch exc
+        @error "Parsing failed" filename exception=current_exceptions()
+        return false
+    end
+end
+
+function find_source_in_path(basedir)
+    src_list = String[]
+    for (root, dirs, files) in walkdir(basedir)
+        append!(src_list, (joinpath(root, f) for f in files
+                           if endswith(f, ".jl") && (p = joinpath(root,f); !islink(p) && isfile(p))))
+    end
+    src_list
+end
+
+test_parse_all_in_path(basedir) =
+    test_parse_all_in_path(path->exprs_equal_no_linenum, basedir)
+
+function test_parse_all_in_path(compare_for_path::Function, basedir)
+    for filepath in find_source_in_path(basedir)
+        cmp = compare_for_path(filepath)
+        if isnothing(cmp)
+            continue
+        end
+        @testset "Parse $(relpath(filepath, basedir))" begin
+            text = try
+                read(filepath, String)
+            catch
+                # Something went wrong reading the file. This isn't a parser failure so
+                # ignore this case.
+                continue
+            end
+            parsers_agree = parsers_agree_on_file(text, filepath, exprs_equal=cmp)
+            @test parsers_agree
+            if !parsers_agree
+                reduced_failures = reduce_text.(reduce_tree(text),
+                                                parsers_fuzzy_disagree)
+                @test reduced_failures == []
+            end
+        end
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Test case reduction
+
+# Check whether a given SyntaxNode converts to the same Expr as the flisp
+# parser produces from the source text of the node.
+function equals_flisp_parse(exprs_equal, tree)
+    node_text = sourcetext(tree)
+    # Reparse with JuliaSyntax. This is a crude way to ensure we're not missing
+    # some context from the parent node.
+    fl_ex = fl_parseall(node_text, filename="none")
+    if Meta.isexpr(fl_ex, :error) || (Meta.isexpr(fl_ex, :toplevel) &&
+                                      length(fl_ex.args) >= 1 &&
+                                      Meta.isexpr(fl_ex.args[end], :error))
+        return true # Something went wrong in reduction; ignore these cases 😬
+    end
+    ex = parseall(Expr, node_text, filename="none", ignore_errors=true)
+    exprs_equal(fl_ex, ex)
+end
+
+function _reduce_tree(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum)
+    if equals_flisp_parse(exprs_equal, tree)
+        return false
+    end
+    if is_leaf(tree)
+        push!(failing_subtrees, tree)
+        return true
+    end
+    had_failing_subtrees = false
+    if !is_leaf(tree)
+        for child in children(tree)
+            if is_trivia(child) || is_leaf(child)
+                continue
+            end
+            had_failing_subtrees |= _reduce_tree(failing_subtrees, child; exprs_equal=exprs_equal)
+        end
+    end
+    if !had_failing_subtrees
+        push!(failing_subtrees, tree)
+    end
+    return true
+end
+
+"""
+    reduce_tree(tree::SyntaxNode; exprs_equal=exprs_equal_no_linenum)
+
+Select minimal subtrees of `tree` which are inconsistent between flisp and
+JuliaSyntax parsers.
+"""
+function reduce_tree(tree::SyntaxNode; kws...)
+    subtrees = Vector{typeof(tree)}()
+    _reduce_tree(subtrees, tree; kws...)
+    subtrees
+end
+
+"""
+    reduce_tree(text::AbstractString; exprs_equal=exprs_equal_no_linenum)
+
+Find the minimal subtrees of the parsed form of `text` which are inconsistent
+between flisp and JuliaSyntax parsers and return the source text of those
+subtrees.
+"""
+function reduce_tree(text::AbstractString; kws...)
+    tree = parseall(SyntaxNode, text, ignore_warnings=true)
+    sourcetext.(reduce_tree(tree; kws...))
+end
+
+
+#-------------------------------------------------------------------------------
+# Text-based test case reduction
+function parser_throws_exception(text)
+    try
+        JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, text, ignore_errors=true)
+        false
+    catch
+        true
+    end
+end
+
+function parsers_fuzzy_disagree(text::AbstractString)
+    fl_ex = fl_parseall(text, filename="none")
+    if Meta.isexpr(fl_ex, (:error,:incomplete)) ||
+            (Meta.isexpr(fl_ex, :toplevel) && length(fl_ex.args) >= 1 &&
+             Meta.isexpr(fl_ex.args[end], (:error,:incomplete)))
+        return false
+    end
+    try
+        ex = parseall(Expr, text, filename="none", ignore_errors=true)
+        return !exprs_roughly_equal(fl_ex, ex)
+    catch
+        @error "Reduction failed" text
+        return false
+    end
+end
+
+
+"""
+Reduce text of a test case via combination of bisection and random deletion.
+
+This is suited to randomly generated strings, but it's surprisingly effective
+for code-like strings as well.
+"""
+function reduce_text(str, parse_differs)
+    while true
+        if length(str) <= 1
+            return str
+        end
+        m1 = thisind(str, length(str)÷2)
+        m2 = nextind(str, m1)
+        if parse_differs(str[1:m1])
+            str = str[1:m1]
+        elseif parse_differs(str[m2:end])
+            str = str[m2:end]
+        else
+            chunklen = clamp(length(str)÷10, 1, 10)
+            reduced = false
+            for i = 1:100
+                m = thisind(str, rand(1:length(str)-chunklen))
+                m3 = nextind(str, m+chunklen)
+                if m3 == nextind(str, m)
+                    continue
+                end
+                s = str[1:m]*str[m3:end]
+                if parse_differs(s)
+                    str = s
+                    reduced = true
+                    break
+                end
+            end
+            if !reduced
+                return str
+            end
+        end
+    end
+end
+
+function show_green_tree(code; version::VersionNumber=v"1.6")
+    t = JuliaSyntax.parseall(GreenNode, code, version=version)
+    sprint(show, MIME"text/plain"(), t, code)
+end
+
+#-------------------------------------------------------------------------------
+# Parse s-expressions
+function parse_sexpr(code)
+    st = ParseStream(code)
+    pos_stack = ParseStreamPosition[]
+    while true
+        k = peek(st)
+        if k == K"("
+            push!(pos_stack, position(st))
+            bump(st, TRIVIA_FLAG)
+        elseif k == K")"
+            if isempty(pos_stack)
+                bump(st, error="Mismatched `)` with no opening `(`")
+                break
+            else
+                bump(st, TRIVIA_FLAG)
+            end
+            emit(st, pop!(pos_stack), K"parens")
+        elseif k == K"Identifier" || k == K"Integer"
+            bump(st)
+        elseif k == K"NewlineWs"
+            bump(st, TRIVIA_FLAG)
+        elseif k == K"EndMarker"
+            if !isempty(pos_stack)
+                bump_invisible(st, K"error", error="Mismatched `)`")
+            end
+            break
+        else
+            bump(st, error="Unexpected token")
+        end
+    end
+    if JuliaSyntax.any_error(st)
+        throw(JuliaSyntax.ParseError(st))
+    end
+    st
+end
+
+
+#-------------------------------------------------------------------------------
+# Tools copied from Base.Meta which call core_parser_hook as if called by
+# Meta.parse(), but without installing the global hook.
+
+function _Meta_parse_string(text::AbstractString, filename::AbstractString,
+                            lineno::Integer, index::Integer, options)
+    if index < 1 || index > ncodeunits(text) + 1
+        throw(BoundsError(text, index))
+    end
+    ex, offset::Int = JuliaSyntax.core_parser_hook(text, filename, lineno, index-1, options)
+    ex, offset+1
+end
+
+function Meta_parse(str::AbstractString, pos::Integer;
+               filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true)
+    ex, pos = _Meta_parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom)
+    if raise && Meta.isexpr(ex, :error)
+        err = ex.args[1]
+        if err isa String
+            err = Meta.ParseError(err) # For flisp parser
+        end
+        throw(err)
+    end
+    return ex, pos
+end
+
+function Meta_parse(str::AbstractString;
+                    filename="none", raise::Bool=true, depwarn::Bool=true)
+    ex, pos = Meta_parse(str, 1; filename=filename, greedy=true, raise=raise, depwarn=depwarn)
+    if Meta.isexpr(ex, :error)
+        return ex
+    end
+    if pos <= ncodeunits(str)
+        raise && throw(Meta.ParseError("extra token after end of expression"))
+        return Expr(:error, "extra token after end of expression")
+    end
+    return ex
+end
+
+function Meta_parseatom(text::AbstractString, pos::Integer; filename="none", lineno=1)
+    return _Meta_parse_string(text, String(filename), lineno, pos, :atom)
+end
+
+function Meta_parseall(text::AbstractString; filename="none", lineno=1)
+    ex,_ = _Meta_parse_string(text, String(filename), lineno, 1, :all)
+    return ex
+end
diff --git a/JuliaSyntax/test/test_utils_tests.jl b/JuliaSyntax/test/test_utils_tests.jl
new file mode 100644
index 0000000000000..51515515a83f5
--- /dev/null
+++ b/JuliaSyntax/test/test_utils_tests.jl
@@ -0,0 +1,37 @@
+# Tests for the test_utils go here to allow the utils to be included on their
+# own without invoking the tests.
+@testset "Reference parser bugs" begin
+    # `0x1.8p0f`
+    @test exprs_roughly_equal(1.5,
+                              Expr(:call, :*, 1.5, :f))
+    @test exprs_roughly_equal(1.5,
+                              Expr(:call, :*, 1.5, :f0))
+    # `@f(a=1) do \n end`
+    @test exprs_roughly_equal(Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:kw, :a, 1)),
+                                   Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1)))),
+                              Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:(=), :a, 1)),
+                                   Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1)))))
+    # `"""\n  a\n \n  b"""`
+    @test exprs_roughly_equal("a\n \nb", " a\n\n b")
+    @test !exprs_roughly_equal("a\n x\nb", " a\n x\n b")
+    @test exprs_roughly_equal("a\n x\nb", "a\n x\nb")
+    # `(a; b,)`
+    @test exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b),
+                              Expr(:tuple, Expr(:parameters, :b), :a))
+    @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b),
+                               Expr(:tuple, Expr(:parameters, :c), :a))
+    @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b),
+                               Expr(:tuple, Expr(:parameters, :b), :c))
+    @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b, :c),
+                               Expr(:tuple, Expr(:parameters, :b), :a))
+
+    # Line numbers for short form function defs in `for` :-(
+    @test exprs_roughly_equal(Expr(:for, Expr(:(=),
+                                              Expr(:call, :f),
+                                              1),
+                                   Expr(:block, LineNumberNode(1))),
+                              Expr(:for, Expr(:(=),
+                                              Expr(:call, :f),
+                                              Expr(:block, LineNumberNode(1), 1)),
+                                   Expr(:block, LineNumberNode(1))))
+end
diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl
new file mode 100644
index 0000000000000..2c75cbd20c587
--- /dev/null
+++ b/JuliaSyntax/test/tokenize.jl
@@ -0,0 +1,1195 @@
+# Hack: Introduce a module here to isolate some Tokenize internals from JuliaSyntax
+module TokenizeTests
+
+using Test
+
+using ..JuliaSyntax:
+    JuliaSyntax,
+    @K_str,
+    Kind,
+    kind,
+    is_error,
+    is_operator
+
+using ..JuliaSyntax.Tokenize:
+    Tokenize,
+    tokenize,
+    untokenize,
+    RawToken
+
+import ..toks
+
+tok(str, i = 1) = collect(tokenize(str))[i]
+
+strtok(str) = untokenize.(collect(tokenize(str)), str)
+
+function onlytok(str)
+    ts = collect(tokenize(str))
+    (length(ts) == 2 && ts[2].kind == K"EndMarker") ||
+        error("Expected one token got $(length(ts)-1)")
+    return ts[1].kind
+end
+
+@testset "tokens" begin
+    for s in ["a", IOBuffer("a")]
+        l = tokenize(s)
+        @test Tokenize.readchar(l) == 'a'
+
+        l_old = l
+        @test l == l_old
+        @test Tokenize.eof(l)
+        @test Tokenize.readchar(l) == Tokenize.EOF_CHAR
+
+    end
+end # testset
+
+@testset "tokenize unicode" begin
+    # FIXME: rm VERSION check once we implement our own is_identifier_char
+    emoji = VERSION < v"1.5" ? "😄" : "\U1F3F3\UFE0F\U200D\U1F308" # 🏳️‍🌈 requires newer Unicode
+    str = "𝘋 =2"*emoji
+    for s in [str, IOBuffer(str)]
+        l = tokenize(s)
+        kinds = [K"Identifier", K"Whitespace", K"=",
+                 K"Integer", K"Identifier", K"EndMarker"]
+        token_strs = ["𝘋", " ", "=", "2", emoji, ""]
+        for (i, n) in enumerate(l)
+            @test kind(n) == kinds[i]
+            @test untokenize(n, str)  == token_strs[i]
+        end
+    end
+end # testset
+
+@testset "tokenize complex piece of code" begin
+
+    str = """
+    function foo!{T<:Bar}(x::{T}=12)
+        @time (x+x, x+x);
+    end
+    try
+        foo
+    catch
+        bar
+    end
+    @time x+x
+    y[[1 2 3]]
+    [1*2,2;3,4]
+    "string"; 'c'
+    (a&&b)||(a||b)
+    # comment
+    #= comment
+    is done here =#
+    2%5
+    a'/b'
+    a.'\\b.'
+    `command`
+    12_sin(12)
+    {}
+    '
+    """
+
+    # Generate the following with
+    # ```
+    # for t in kind.(collect(tokenize(str)))
+    #    print(kind(t), ",")
+    # end
+    # ```
+    # and *check* it afterwards.
+
+    kinds = [K"function",K"Whitespace",K"Identifier",K"{",K"Identifier",
+            K"<:",K"Identifier",K"}",K"(",K"Identifier",K"::",
+            K"{",K"Identifier",K"}",K"=",K"Integer",K")",
+
+            K"NewlineWs",K"@",K"Identifier",K"Whitespace",K"(",
+            K"Identifier",K"+",K"Identifier",K",",K"Whitespace",
+            K"Identifier",K"+",K"Identifier",K")",K";",
+
+            K"NewlineWs",K"end",
+
+            K"NewlineWs",K"try",
+            K"NewlineWs",K"Identifier",
+            K"NewlineWs",K"catch",
+            K"NewlineWs",K"Identifier",
+            K"NewlineWs",K"end",
+
+            K"NewlineWs",K"@",K"Identifier",K"Whitespace",K"Identifier",
+            K"+",K"Identifier",
+
+            K"NewlineWs",K"Identifier",K"[",K"[",K"Integer",K"Whitespace",
+            K"Integer",K"Whitespace",K"Integer",K"]",K"]",
+
+            K"NewlineWs",K"[",K"Integer",K"*",K"Integer",K",",K"Integer",
+            K";",K"Integer",K",",K"Integer",K"]",
+
+            K"NewlineWs",K"\"",K"String",K"\"",K";",K"Whitespace",K"'",K"Char",K"'",
+
+            K"NewlineWs",K"(",K"Identifier",K"&&",K"Identifier",K")",K"||",
+            K"(",K"Identifier",K"||",K"Identifier",K")",
+
+            K"NewlineWs",K"Comment",
+
+            K"NewlineWs",K"Comment",
+
+            K"NewlineWs",K"Integer",K"%",K"Integer",
+
+            K"NewlineWs",K"Identifier",K"'",K"/",K"Identifier",K"'",
+
+            K"NewlineWs",K"Identifier",K".",K"'",K"\\",K"Identifier",K".",K"'",
+
+            K"NewlineWs",K"`",K"CmdString",K"`",
+
+            K"NewlineWs",K"Integer",K"Identifier",K"(",K"Integer",K")",
+
+            K"NewlineWs",K"{",K"}",
+
+            K"NewlineWs",K"'",K"Char",K"EndMarker"]
+
+    for (i, n) in enumerate(tokenize(str))
+        @test kind(n) == kinds[i]
+    end
+
+    @testset "roundtrippability" begin
+        @test join(untokenize.(collect(tokenize(str)), str)) == str
+    end
+
+    @test all((t.endbyte - t.startbyte + 1)==sizeof(untokenize(t, str)) for t in tokenize(str))
+end # testset
+
+@testset "issue 5, '..'" begin
+    @test kind.(collect(tokenize("1.23..3.21"))) == [K"Float",K"..",K"Float",K"EndMarker"]
+end
+
+@testset "issue 17, >>" begin
+    str = ">> "
+    @test untokenize(tok(str), str)==">>"
+end
+
+@testset "tokenize newlines" begin
+    n = "\n"
+    rn = "\r\n"
+    nl = K"NewlineWs"
+    for i in 0:5
+        j = 5 - i
+        @test toks(n^i * rn^j) == vcat(fill(n  => nl, i), fill(rn => nl, j))
+        @test toks(rn^i * n^j) == vcat(fill(rn => nl, i), fill(n  => nl, j))
+    end
+end
+
+@testset "test added operators" begin
+    @test tok("1+=2",  2).kind == K"op="
+    @test tok("1-=2",  2).kind == K"op="
+    @test tok("1*=2",  2).kind == K"op="
+    @test tok("1^=2",  2).kind == K"op="
+    @test tok("1÷=2",  2).kind == K"op="
+    @test tok("1\\=2", 2).kind == K"op="
+    @test tok("1\$=2", 2).kind == K"op="
+    @test tok("1⊻=2",  2).kind == K"op="
+    @test tok("1:=2",  2).kind == K":="
+    @test tok("1-->2", 2).kind == K"-->"
+    @test tok("1<--2", 2).kind == K"<--"
+    @test tok("1<-->2", 2).kind == K"<-->"
+    @test tok("1>:2",  2).kind == K">:"
+end
+
+@testset "infix" begin
+    @test tok("1 in 2",  3).kind == K"in"
+    @test tok("1 in[1]", 3).kind == K"in"
+
+    @test tok("1 isa 2",  3).kind == K"isa"
+    @test tok("1 isa[2]", 3).kind == K"isa"
+end
+
+@testset "tokenizing true/false literals" begin
+    @test tok("somtext true", 3).kind == K"Bool"
+    @test tok("somtext false", 3).kind == K"Bool"
+    @test tok("somtext tr", 3).kind == K"Identifier"
+    @test tok("somtext truething", 3).kind == K"Identifier"
+    @test tok("somtext falsething", 3).kind == K"Identifier"
+end
+
+
+roundtrip(str) = join(untokenize.(collect(tokenize(str)), str))
+
+@testset "lexing anon functions '->' " begin
+    @test tok("a->b", 2).kind==K"->"
+end
+
+@testset "comments" begin
+    ts = collect(tokenize("""
+       #
+       \"\"\"
+       f
+       \"\"\"
+       1
+       """))
+
+    kinds = [K"Comment", K"NewlineWs",
+             K"\"\"\"", K"String", K"String", K"\"\"\"", K"NewlineWs",
+             K"Integer", K"NewlineWs",
+             K"EndMarker"]
+    @test kind.(ts) == kinds
+
+    @test toks("#=# text=#") == ["#=# text=#"=>K"Comment"]
+
+    @test toks("#=   #=   =#") == ["#=   #=   =#"=>K"ErrorEofMultiComment"]
+    @test toks("#=#==#=#") == ["#=#==#=#"=>K"Comment"]
+    @test toks("#=#==#=")  == ["#=#==#="=>K"ErrorEofMultiComment"]
+    # comment terminated by \r\n
+    @test toks("#\r\n") == ["#" => K"Comment", "\r\n" => K"NewlineWs"]
+end
+
+
+@testset "invalid UTF-8" begin
+    @test toks("#=\xf5b\n=#") == [
+        "#=\xf5b\n=#" => K"ErrorInvalidUTF8",
+    ]
+    @test toks("#\xf5b\n") == [
+        "#\xf5b" => K"ErrorInvalidUTF8",
+        "\n" => K"NewlineWs"
+    ]
+    @test toks("\"\xf5\"") == [
+        "\""   => K"\""
+        "\xf5" => K"ErrorInvalidUTF8"
+        "\""   => K"\""
+    ]
+    @test toks("'\xf5'") == [
+        "'"    => K"'"
+        "\xf5" => K"ErrorInvalidUTF8"
+        "'"    => K"'"
+    ]
+    @test toks("`\xf5`") == [
+        "`"    => K"`"
+        "\xf5" => K"ErrorInvalidUTF8"
+        "`"    => K"`"
+    ]
+end
+
+@testset "primes" begin
+    str = """
+    ImageMagick.save(fn, reinterpret(ARGB32, [0xf0884422]''))
+    D = ImageMagick.load(fn)
+    """
+    tokens = collect(tokenize(str))
+    @test string(untokenize(tokens[16], str)) == string(untokenize(tokens[17], str))=="'"
+
+    @test roundtrip("'a'") == "'a'"
+    @test kind.(collect(tokenize("'a'"))) == [K"'", K"Char", K"'", K"EndMarker"]
+
+    # ' is not an operator here, so doesn't consume the suffix ᵀ
+    @test roundtrip("'ᵀ'") == "'ᵀ'"
+    @test kind.(collect(tokenize("'₁'"))) == [K"'", K"Char", K"'", K"EndMarker"]
+
+    @test roundtrip("''") == "''"
+    @test kind.(collect(tokenize("''"))) == [K"'", K"'", K"EndMarker"]
+
+    @test roundtrip("'''") == "'''"
+    @test kind.(collect(tokenize("'''"))) == [K"'", K"Char", K"'", K"EndMarker"]
+
+    @test roundtrip("''''") == "''''"
+    @test kind.(collect(tokenize("''''"))) == [K"'", K"Char", K"'", K"'", K"EndMarker"]
+
+    @test tok("()'", 3).kind == K"'"
+    @test tok("{}'", 3).kind == K"'"
+    @test tok("[]'", 3).kind == K"'"
+    @test tok("outer'", 2).kind == K"'"
+    @test tok("mutable'", 2).kind == K"'"
+    @test tok("as'", 2).kind == K"'"
+    @test tok("isa'", 2).kind == K"'"
+    @test untokenize.(collect(tokenize("a'ᵀ")), "a'ᵀ") == ["a", "'ᵀ", ""]
+end
+
+@testset "keywords" begin
+      for kw in    ["baremodule",
+                    "begin",
+                    "break",
+                    "catch",
+                    "const",
+                    "continue",
+                    "do",
+                    "else",
+                    "elseif",
+                    "end",
+                    "export",
+                    "finally",
+                    "for",
+                    "function",
+                    "global",
+                    "if",
+                    "import",
+                    "let",
+                    "local",
+                    "macro",
+                    "module",
+                    "quote",
+                    "return",
+                    "struct",
+                    "try",
+                    "using",
+                    "while",
+
+                    "abstract",
+                    "as",
+                    "doc",
+                    "mutable",
+                    "outer",
+                    "primitive",
+                    "type",
+                    "var"]
+
+        @test kind(tok(kw)) == Kind(kw)
+    end
+end
+
+@testset "issue in PR #45" begin
+    @test length(collect(tokenize("x)"))) == 3
+end
+
+@testset "lex binary" begin
+    @test tok("0b0101").kind==K"BinInt"
+end
+
+@testset "show" begin
+    io = IOBuffer()
+    show(io, collect(tokenize("\"abc\nd\"ef"))[2])
+    @test String(take!(io)) == "1-5        String         "
+end
+
+~(tok::RawToken, t::Tuple) = tok.kind == t[1] && untokenize(tok, t[3]) == t[2]
+
+@testset "raw strings" begin
+    str = raw""" str"x $ \ y" """
+    ts = collect(tokenize(str))
+    @test ts[1] ~ (K"Whitespace" , " "        , str)
+    @test ts[2] ~ (K"Identifier" , "str"      , str)
+    @test ts[3] ~ (K"\""         , "\""       , str)
+    @test ts[4] ~ (K"String"     , "x \$ \\ y", str)
+    @test ts[5] ~ (K"\""         , "\""       , str)
+    @test ts[6] ~ (K"Whitespace" , " "        , str)
+    @test ts[7] ~ (K"EndMarker"  , ""         , str)
+
+    str = raw"""`x $ \ y`"""
+    ts = collect(tokenize(str))
+    @test ts[1] ~ (K"`"         , "`"         , str)
+    @test ts[2] ~ (K"CmdString" , "x \$ \\ y" , str)
+    @test ts[3] ~ (K"`"         , "`"         , str)
+    @test ts[4] ~ (K"EndMarker" , ""          , str)
+
+    # str"\\"
+    str = "str\"\\\\\""
+    ts = collect(tokenize(str))
+    @test ts[1] ~ (K"Identifier" , "str"  , str)
+    @test ts[2] ~ (K"\""         , "\""   , str)
+    @test ts[3] ~ (K"String"     , "\\\\" , str)
+    @test ts[4] ~ (K"\""         , "\""   , str)
+    @test ts[5] ~ (K"EndMarker"  , ""     , str)
+
+    # str"\\\""
+    str = "str\"\\\\\\\"\""
+    ts = collect(tokenize(str))
+    @test ts[1] ~ (K"Identifier" , "str"      , str)
+    @test ts[2] ~ (K"\""         , "\""       , str)
+    @test ts[3] ~ (K"String"     , "\\\\\\\"" , str)
+    @test ts[4] ~ (K"\""         , "\""       , str)
+    @test ts[5] ~ (K"EndMarker"  , ""         , str)
+
+    # Contextual keywords and operators allowed as raw string prefixes
+    str = raw""" var"x $ \ y" """
+    ts = collect(tokenize(str))
+    @test ts[2] ~ (K"var"        , "var", str)
+    @test ts[4] ~ (K"String"     , "x \$ \\ y", str)
+
+    str = raw""" outer"x $ \ y" """
+    ts = collect(tokenize(str))
+    @test ts[2] ~ (K"outer"      , "outer", str)
+    @test ts[4] ~ (K"String"     , "x \$ \\ y", str)
+
+    str = raw""" isa"x $ \ y" """
+    ts = collect(tokenize(str))
+    @test ts[2] ~ (K"isa"        , "isa", str)
+    @test ts[4] ~ (K"String"     , "x \$ \\ y", str)
+end
+
+@testset "string escaped newline whitespace" begin
+    str = "\"x\\\n \ty\""
+    ts = collect(tokenize(str))
+    @test ts[1] ~ (K"\"", "\"", str)
+    @test ts[2] ~ (K"String", "x", str)
+    @test ts[3] ~ (K"Whitespace", "\\\n \t", str)
+    @test ts[4] ~ (K"String", "y", str)
+    @test ts[5] ~ (K"\"", "\"", str)
+
+    # No newline escape for raw strings
+    str = "r\"x\\\ny\""
+    ts = collect(tokenize(str))
+    @test ts[1] ~ (K"Identifier", "r", str)
+    @test ts[2] ~ (K"\"", "\"", str)
+    @test ts[3] ~ (K"String", "x\\\ny", str)
+    @test ts[4] ~ (K"\"", "\"", str)
+end
+
+@testset "triple quoted string line splitting" begin
+    str = "\"\"\"\nx\r\ny\rz\n\r\"\"\""
+    ts = collect(tokenize(str))
+    @test ts[1] ~ (K"\"\"\"" , "\"\"\"", str)
+    @test ts[2] ~ (K"String" , "\n", str)
+    @test ts[3] ~ (K"String" , "x\r\n", str)
+    @test ts[4] ~ (K"String" , "y\r", str)
+    @test ts[5] ~ (K"String" , "z\n", str)
+    @test ts[6] ~ (K"String" , "\r", str)
+    @test ts[7] ~ (K"\"\"\"" , "\"\"\"", str)
+
+    # Also for raw strings
+    str = "r\"\"\"\nx\ny\"\"\""
+    ts = collect(tokenize(str))
+    @test ts[1] ~ (K"Identifier" , "r", str)
+    @test ts[2] ~ (K"\"\"\""     , "\"\"\"", str)
+    @test ts[3] ~ (K"String"     , "\n", str)
+    @test ts[4] ~ (K"String"     , "x\n", str)
+    @test ts[5] ~ (K"String"     , "y", str)
+    @test ts[6] ~ (K"\"\"\""     , "\"\"\"", str)
+end
+
+@testset "interpolation" begin
+    @testset "basic" begin
+        str = "\"\$x \$y\""
+        ts = collect(tokenize(str))
+        @test ts[1]  ~ (K"\""         , "\"", str)
+        @test ts[2]  ~ (K"$"          , "\$", str)
+        @test ts[3]  ~ (K"Identifier" , "x" , str)
+        @test ts[4]  ~ (K"String"     , " " , str)
+        @test ts[5]  ~ (K"$"          , "\$", str)
+        @test ts[6]  ~ (K"Identifier" , "y" , str)
+        @test ts[7]  ~ (K"\""         , "\"", str)
+        @test ts[8]  ~ (K"EndMarker"  , ""  , str)
+    end
+
+    @testset "nested" begin
+        str = """"str: \$(g("str: \$(h("str"))"))" """
+        ts = collect(tokenize(str))
+        @test length(ts) == 23
+        @test ts[1]  ~ (K"\""        , "\""   , str)
+        @test ts[2]  ~ (K"String"    , "str: ", str)
+        @test ts[3]  ~ (K"$"         , "\$"   , str)
+        @test ts[4]  ~ (K"("         , "("    , str)
+        @test ts[5]  ~ (K"Identifier", "g"    , str)
+        @test ts[6]  ~ (K"("         , "("    , str)
+        @test ts[7]  ~ (K"\""        , "\""   , str)
+        @test ts[8]  ~ (K"String"    , "str: ", str)
+        @test ts[9]  ~ (K"$"         , "\$"   , str)
+        @test ts[10] ~ (K"("         , "("    , str)
+        @test ts[11] ~ (K"Identifier", "h"    , str)
+        @test ts[12] ~ (K"("         , "("    , str)
+        @test ts[13] ~ (K"\""        , "\""   , str)
+        @test ts[14] ~ (K"String"    , "str"  , str)
+        @test ts[15] ~ (K"\""        , "\""   , str)
+        @test ts[16] ~ (K")"         , ")"    , str)
+        @test ts[17] ~ (K")"         , ")"    , str)
+        @test ts[18] ~ (K"\""        , "\""   , str)
+        @test ts[19] ~ (K")"         , ")"    , str)
+        @test ts[20] ~ (K")"         , ")"    , str)
+        @test ts[21] ~ (K"\""        , "\""   , str)
+        @test ts[22] ~ (K"Whitespace", " "    , str)
+        @test ts[23] ~ (K"EndMarker" , ""     , str)
+    end
+
+    @testset "duplicate \$" begin
+        str = "\"\$\$\""
+        ts = collect(tokenize(str))
+        @test ts[1]  ~ (K"\""        , "\"", str)
+        @test ts[2]  ~ (K"$"         , "\$", str)
+        @test ts[3]  ~ (K"$"         , "\$", str)
+        @test ts[4]  ~ (K"\""        , "\"", str)
+        @test ts[5]  ~ (K"EndMarker" , ""  , str)
+    end
+
+    @testset "Unmatched parens" begin
+        # issue 73: https://github.com/JuliaLang/Tokenize.jl/issues/73
+        str = "\"\$(fdsf\""
+        ts = collect(tokenize(str))
+        @test ts[1] ~ (K"\""         , "\""   , str)
+        @test ts[2] ~ (K"$"          , "\$"   , str)
+        @test ts[3] ~ (K"("          , "("    , str)
+        @test ts[4] ~ (K"Identifier" , "fdsf" , str)
+        @test ts[5] ~ (K"\""         , "\""   , str)
+        @test ts[6] ~ (K"EndMarker"  , ""     , str)
+    end
+
+    @testset "Unicode" begin
+        # issue 178: https://github.com/JuliaLang/Tokenize.jl/issues/178
+        str = """ "\$uₕx \$(uₕx - ux)" """
+        ts = collect(tokenize(str))
+        @test ts[ 1] ~ (K"Whitespace" , " "   , str)
+        @test ts[ 2] ~ (K"\""         , "\""  , str)
+        @test ts[ 3] ~ (K"$"          , "\$"  , str)
+        @test ts[ 4] ~ (K"Identifier" , "uₕx" , str)
+        @test ts[ 5] ~ (K"String"     , " "   , str)
+        @test ts[ 6] ~ (K"$"          , "\$"  , str)
+        @test ts[ 7] ~ (K"("          , "("   , str)
+        @test ts[ 8] ~ (K"Identifier" , "uₕx" , str)
+        @test ts[ 9] ~ (K"Whitespace" , " "   , str)
+        @test ts[10] ~ (K"-"          , "-"   , str)
+        @test ts[11] ~ (K"Whitespace" , " "   , str)
+        @test ts[12] ~ (K"Identifier" , "ux"  , str)
+        @test ts[13] ~ (K")"          , ")"   , str)
+        @test ts[14] ~ (K"\""         , "\""  , str)
+        @test ts[15] ~ (K"Whitespace" , " "   , str)
+        @test ts[16] ~ (K"EndMarker"  , ""    , str)
+    end
+
+    @testset "var\"...\" disabled in interpolations" begin
+        str = """ "\$var"x" " """
+        ts = collect(tokenize(str))
+        @test ts[ 1] ~ (K"Whitespace" , " "   , str)
+        @test ts[ 2] ~ (K"\""         , "\""  , str)
+        @test ts[ 3] ~ (K"$"          , "\$"  , str)
+        @test ts[ 4] ~ (K"var"        , "var" , str)
+        @test ts[ 5] ~ (K"\""         , "\""  , str)
+        @test ts[ 6] ~ (K"Identifier" , "x"   , str)
+        @test ts[ 7] ~ (K"\""         , "\""  , str)
+        @test ts[ 8] ~ (K"String"     , " "   , str)
+        @test ts[ 9] ~ (K"\""         , "\""  , str)
+        @test ts[10] ~ (K"Whitespace" , " "   , str)
+        @test ts[11] ~ (K"EndMarker"  , ""    , str)
+    end
+
+    @testset "chars after interpolation identifier" begin
+        # Operators allowed
+        @test toks("\"\$x?\"") == [
+            "\""=>K"\""
+            "\$"=>K"$"
+            "x"=>K"Identifier"
+            "?"=>K"String"
+            "\""=>K"\""
+        ]
+        @test toks("\"\$x⫪\"") == [
+            "\""=>K"\""
+            "\$"=>K"$"
+            "x"=>K"Identifier"
+            "⫪"=>K"String"
+            "\""=>K"\""
+        ]
+        # Some chars disallowed (eg, U+0DF4)
+        @test toks("\"\$x෴\"") == [
+            "\""=>K"\""
+            "\$"=>K"$"
+            "x"=>K"Identifier"
+            "෴"=>K"ErrorInvalidInterpolationTerminator"
+            "\""=>K"\""
+        ]
+    end
+end
+
+@testset "inferred" begin
+    l = tokenize("abc")
+    @inferred Tokenize.next_token(l)
+end
+
+@testset "modifying function names (!) followed by operator" begin
+    @test toks("a!=b") == ["a"=>K"Identifier", "!="=>K"!=", "b"=>K"Identifier"]
+    @test toks("a!!=b") == ["a!"=>K"Identifier", "!="=>K"!=", "b"=>K"Identifier"]
+    @test toks("!=b") == ["!="=>K"!=", "b"=>K"Identifier"]
+end
+
+@testset "integer literals" begin
+    @test onlytok("1234")  == K"Integer"
+    @test onlytok("12_34") == K"Integer"
+
+    @test toks("1234_") == ["1234"=>K"Integer", "_"=>K"Identifier"]
+    @test toks("1234x") == ["1234"=>K"Integer", "x"=>K"Identifier"]
+
+    @test onlytok("_1234") == K"Identifier"
+
+    @test toks("1__2") == ["1"=>K"Integer", "__2"=>K"Identifier"]
+end
+
+@testset "hex integer literals" begin
+    @test onlytok("0x0167_032") == K"HexInt"
+    @test onlytok("0x2_0_2")    == K"HexInt"
+    # trailing junk
+    # https://github.com/JuliaLang/julia/issues/16356
+    @test onlytok("0xenomorph") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0xaα")    == K"ErrorInvalidNumericConstant"
+    @test toks("0x ") == ["0x"=>K"ErrorInvalidNumericConstant", " "=>K"Whitespace"]
+    @test onlytok("0x") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0xg") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0x_") == K"ErrorInvalidNumericConstant"
+    @test toks("0x-") == ["0x"=>K"ErrorInvalidNumericConstant", "-"=>K"-"]
+end
+
+@testset "hexfloat literals" begin
+    @test onlytok("0x.1p1")    == K"Float"
+    @test onlytok("0x00p2")    == K"Float"
+    @test onlytok("0x00P2")    == K"Float"
+    @test onlytok("0x0.00p23") == K"Float"
+    @test onlytok("0x0.0ap23") == K"Float"
+    @test onlytok("0x0.0_0p2") == K"Float"
+    @test onlytok("0x0_0_0.0_0p2") == K"Float"
+    @test onlytok("0x0p+2")    == K"Float"
+    @test onlytok("0x0p-2")    == K"Float"
+    # errors
+    @test onlytok("0x") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0x2__2") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0x1p") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0x.p0") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0x.")   == K"ErrorHexFloatMustContainP"
+    @test onlytok("0x1.0") == K"ErrorHexFloatMustContainP"
+    # https://github.com/JuliaLang/julia/issues/60189
+    @test onlytok("0x1p3.") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0x1p3.2") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0x1.5p2.3") == K"ErrorInvalidNumericConstant"
+end
+
+@testset "binary literals" begin
+    @test onlytok("0b0101001_0100_0101")  == K"BinInt"
+
+    @test onlytok("0b") == K"ErrorInvalidNumericConstant"
+    @test toks("0b ") == ["0b"=>K"ErrorInvalidNumericConstant", " "=>K"Whitespace"]
+    @test onlytok("0b101__101") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0b123") == K"ErrorInvalidNumericConstant"
+end
+
+@testset "octal literals" begin
+    @test onlytok("0o0167") == K"OctInt"
+    @test onlytok("0o01054001_0100_0101") == K"OctInt"
+
+    @test onlytok("0o") == K"ErrorInvalidNumericConstant"
+    @test onlytok("0o78p") == K"ErrorInvalidNumericConstant"
+    @test toks("0o ") == ["0o"=>K"ErrorInvalidNumericConstant", " "=>K"Whitespace"]
+end
+
+@testset "float literals" begin
+    @test onlytok("1.0") == K"Float"
+
+    @test onlytok("1.0e0")  == K"Float"
+    @test onlytok("1.0e-0") == K"Float"
+    @test onlytok("1.0E0")  == K"Float"
+    @test onlytok("1.0E-0") == K"Float"
+    @test onlytok("1.0f0")  == K"Float32"
+    @test onlytok("1.0f-0") == K"Float32"
+    @test onlytok("1.e0")  == K"Float"
+    @test onlytok("1.f0")  == K"Float32"
+
+    @test onlytok("0e0")    == K"Float"
+    @test onlytok("0e+0")   == K"Float"
+    @test onlytok("0E0")    == K"Float"
+    @test onlytok("201E+0") == K"Float"
+    @test onlytok("2f+0")   == K"Float32"
+    @test onlytok("2048f0") == K"Float32"
+
+    # underscores
+    @test onlytok("1_1.11")  == K"Float"
+    @test onlytok("11.1_1")  == K"Float"
+    @test onlytok("1_1.1_1") == K"Float"
+    @test onlytok("1.2_3")   == K"Float"
+    @test onlytok("3_2.5_2") == K"Float"
+    @test toks("_1.1_1") == ["_1"=>K"Identifier", ".1_1"=>K"Float"]
+
+    # juxtapositions with identifiers
+    @test toks("3e2_2") == ["3e2"=>K"Float", "_2"=>K"Identifier"]
+    @test toks("1e") == ["1"=>K"Integer", "e"=>K"Identifier"]
+
+    # Floating point with \minus rather than -
+    @test onlytok("1.0e−0") == K"Float"
+    @test onlytok("1.0f−0") == K"Float32"
+    @test onlytok("0x0p−2") == K"Float"
+
+    # Errors
+    @test onlytok("1._")   == K"ErrorInvalidNumericConstant"
+    @test onlytok("1.1.")  == K"ErrorInvalidNumericConstant"
+    @test onlytok("1e+")   == K"ErrorInvalidNumericConstant"
+    @test onlytok("1.0e+") == K"ErrorInvalidNumericConstant"
+    @test onlytok("1.e1.") == K"ErrorInvalidNumericConstant"
+    @test onlytok("1e1.")  == K"ErrorInvalidNumericConstant"
+    @test toks("1.e")   == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "e"=>K"Identifier"]
+    @test toks("3.2e2.2") == ["3.2e2."=>K"ErrorInvalidNumericConstant", "2"=>K"Integer"]
+    @test toks("3e2.2") == ["3e2."=>K"ErrorInvalidNumericConstant", "2"=>K"Integer"]
+    @test toks("1.2.f") == ["1.2."=>K"ErrorInvalidNumericConstant", "f"=>K"Identifier"]
+end
+
+@testset "numbers with trailing `.` " begin
+    @test toks("1.")  == ["1."=>K"Float"]
+
+    @test toks("1.)") == ["1."=>K"Float", ")"=>K")"]
+    @test toks("1.]") == ["1."=>K"Float", "]"=>K"]"]
+    @test toks("1.}") == ["1."=>K"Float", "}"=>K"}"]
+    @test toks("1.,") == ["1."=>K"Float", ","=>K","]
+    @test toks("1.;") == ["1."=>K"Float", ";"=>K";"]
+    @test toks("1.#") == ["1."=>K"Float", "#"=>K"Comment"]
+
+    # ellipses
+    @test toks("1..")    == ["1"=>K"Integer",   ".."=>K".."]
+    @test toks("1...")   == ["1"=>K"Integer",  "..."=>K"..."]
+    @test toks(".1..")   == [".1"=>K"Float",    ".."=>K".."]
+    @test toks("0x01..") == ["0x01"=>K"HexInt", ".."=>K".."]
+
+    # Dotted operators and other dotted suffixes
+    @test toks("1234 .+1") == ["1234"=>K"Integer", " "=>K"Whitespace", "."=>K".", "+"=>K"+", "1"=>K"Integer"]
+    @test toks("1234.0+1") == ["1234.0"=>K"Float", "+"=>K"+", "1"=>K"Integer"]
+    @test toks("1234.0 .+1") == ["1234.0"=>K"Float", " "=>K"Whitespace", "."=>K".", "+"=>K"+", "1"=>K"Integer"]
+    @test toks("1234 .f(a)") == ["1234"=>K"Integer", " "=>K"Whitespace", "."=>K".",
+                                 "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"]
+    @test toks("1234.0 .f(a)") == ["1234.0"=>K"Float", " "=>K"Whitespace", "."=>K".",
+                                   "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"]
+    @test toks("1f0./1") == ["1f0"=>K"Float32", "."=>K".", "/"=>K"/", "1"=>K"Integer"]
+
+    # Dotted operators after numeric constants are ok
+    @test toks("1e1.⫪")  == ["1e1"=>K"Float", "."=>K".", "⫪"=>K"⫪"]
+    @test toks("1.1.⫪")  == ["1.1"=>K"Float", "."=>K".", "⫪"=>K"⫪"]
+    @test toks("1e1.−")  == ["1e1"=>K"Float", "."=>K".", "−"=>K"-"]
+    @test toks("1.1.−")  == ["1.1"=>K"Float", "."=>K".", "−"=>K"-"]
+    # Non-dottable operators are not ok
+    @test toks("1e1.\$")  == ["1e1."=>K"ErrorInvalidNumericConstant", "\$"=>K"$"]
+    @test toks("1.1.\$")  == ["1.1."=>K"ErrorInvalidNumericConstant", "\$"=>K"$"]
+
+    # Ambiguous dotted operators
+    @test toks("1.+") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+"]
+    @test toks("1.+ ") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", " "=>K"Whitespace"]
+    @test toks("1.⤋")  == ["1."=>K"ErrorAmbiguousNumericConstant", "⤋"=>K"⤋"]
+    @test toks("1.⫪")  == ["1."=>K"ErrorAmbiguousNumericConstant", "⫪"=>K"⫪"]
+    # non-dottable ops are the exception
+    @test toks("1.:")  == ["1."=>K"Float", ":"=>K":"]
+    @test toks("1.\$") == ["1."=>K"Float", "\$"=>K"$"]
+
+    # Ambiguous - literal vs multiply by juxtaposition
+    @test toks("1.x")  == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "x"=>K"Identifier"]
+    @test toks("1.(")  == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "("=>K"("]
+    @test toks("1.[")  == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "["=>K"["]
+    @test toks("1.{")  == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "{"=>K"{"]
+    @test toks("1.@")  == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "@"=>K"@"]
+    @test toks("1.\"") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "\""=>K"\""]
+end
+
+@testset "julia 0.6 types" begin
+    @test onlytok("mutable")   == K"mutable"
+    @test onlytok("primitive") == K"primitive"
+    @test onlytok("struct")    == K"struct"
+    @test onlytok("where")     == K"where"
+
+    @test tok("mutable struct s{T} where T",  1).kind == K"mutable"
+    @test tok("mutable struct s{T} where T",  3).kind == K"struct"
+    @test tok("mutable struct s{T} where T", 10).kind == K"where"
+end
+
+@testset "CMDs" begin
+    @test tok("`cmd`",1).kind == K"`"
+    @test tok("`cmd`",2).kind == K"CmdString"
+    @test tok("`cmd`",3).kind == K"`"
+    @test tok("`cmd`",4).kind == K"EndMarker"
+    @test tok("```cmd```", 1).kind == K"```"
+    @test tok("```cmd```", 2).kind == K"CmdString"
+    @test tok("```cmd```", 3).kind == K"```"
+    @test tok("```cmd```", 4).kind == K"EndMarker"
+    @test tok("```cmd````cmd`", 1).kind == K"```"
+    @test tok("```cmd````cmd`", 2).kind == K"CmdString"
+    @test tok("```cmd````cmd`", 3).kind == K"```"
+    @test tok("```cmd````cmd`", 4).kind == K"`"
+    @test tok("```cmd````cmd`", 5).kind == K"CmdString"
+    @test tok("```cmd````cmd`", 6).kind == K"`"
+    @test tok("```cmd````cmd`", 7).kind == K"EndMarker"
+end
+
+@testset "where" begin
+    @test tok("a where b", 3).kind == K"where"
+end
+
+@testset "IO position" begin
+    io = IOBuffer("#1+1")
+    skip(io, 1)
+    @test length(collect(tokenize(io))) == 4
+end
+
+@testset "dotted and suffixed operators" begin
+
+for opkind in Tokenize._nondot_symbolic_operator_kinds()
+    op = string(opkind)
+    strs = [
+        1 => [ # unary
+            "$(op)b",
+            ".$(op)b",
+        ],
+        2 => [ # binary
+            "a $op b",
+            "a .$op b",
+            "a $(op)₁ b",
+            "a $(op)\U0304 b",
+            "a .$(op)₁ b"
+        ]
+    ]
+
+    for (arity, container) in strs
+        for str in container
+            expr = JuliaSyntax.fl_parse(str, raise = false)
+            if VERSION < v"1.7" && str == "a .&& b"
+                expr = Expr(Symbol(".&&"), :a, :b)
+            end
+            if expr isa Expr && (expr.head != :error && expr.head != :incomplete)
+                tokens = collect(tokenize(str))
+                exop = expr.head == :call ? expr.args[1] : expr.head
+                #println(str)
+                # For dotted operators, we need to reconstruct the operator from separate tokens
+                # Note: .. and ... are not dotted operators, they're regular operators
+                exop_str = string(exop)
+                is_dotted = occursin(".", exop_str) && exop != :.. && exop != :...
+                if is_dotted
+                    # Dotted operators are now two tokens: . and the operator
+                    dot_pos = arity == 1 ? 1 : 3
+                    op_pos = arity == 1 ? 2 : 4
+                    reconstructed_op = Symbol(Tokenize.untokenize(tokens[dot_pos], str) *
+                                            Tokenize.untokenize(tokens[op_pos], str))
+                    if reconstructed_op != exop
+                        @info "" arity str exop reconstructed_op
+                    end
+                    @test reconstructed_op == exop
+                else
+                    # Regular operators and suffixed operators
+                    op_pos = arity == 1 ? 1 : 3
+                    if Symbol(Tokenize.untokenize(tokens[op_pos], str)) != exop
+                        @info "" arity str exop op_pos
+                    end
+                    @test Symbol(Tokenize.untokenize(tokens[op_pos], str)) == exop
+                end
+            else
+                break
+            end
+        end
+    end
+end
+end
+
+@testset "Normalization of Unicode symbols" begin
+    # https://github.com/JuliaLang/julia/pull/25157
+    @test tok("\u00b7").kind == K"⋅"
+    @test tok("\u0387").kind == K"⋅"
+    @test toks(".\u00b7") == ["."=>K".", "\u00b7"=>K"⋅"]
+    @test toks(".\u0387") == ["."=>K".", "\u0387"=>K"⋅"]
+
+    # https://github.com/JuliaLang/julia/pull/40948
+    @test tok("−").kind == K"-"
+    @test tok("−=").kind == K"op="
+    @test toks(".−") == ["."=>K".", "−"=>K"-"]
+end
+
+@testset "perp" begin
+    @test tok("1 ⟂ 2", 3).kind==K"⟂"
+end
+
+@testset "outer" begin
+    @test tok("outer", 1).kind==K"outer"
+end
+
+@testset "invalid operator errors" begin
+    @test toks("--")      == ["--"=>K"ErrorInvalidOperator"]
+    @test toks("1**2") == ["1"=>K"Integer", "**"=>K"Error**", "2"=>K"Integer"]
+    @test toks("a<---b") == ["a"=>K"Identifier", "<---"=>K"ErrorInvalidOperator", "b"=>K"Identifier"]
+    @test toks("a..+b") == ["a"=>K"Identifier", "..+"=>K"ErrorInvalidOperator", "b"=>K"Identifier"]
+    @test toks("a..−b") == ["a"=>K"Identifier", "..−"=>K"ErrorInvalidOperator", "b"=>K"Identifier"]
+end
+
+@testset "hat suffix" begin
+    @test tok("ŝ", 1).kind==K"Identifier"
+    @test untokenize(collect(tokenize("ŝ"))[1], "ŝ") == "ŝ"
+end
+
+@testset "suffixed op" begin
+    s = "+¹"
+    @test is_operator(tok(s, 1).kind)
+    @test untokenize(collect(tokenize(s))[1], s) == s
+end
+
+@testset "circ arrow right op" begin
+    s = "↻"
+    @test collect(tokenize(s))[1].kind == K"↻"
+end
+
+@testset "invalid float" begin
+    s = ".0."
+    @test collect(tokenize(s))[1].kind == K"ErrorInvalidNumericConstant"
+end
+
+@testset "allow prime after end" begin
+    @test tok("begin end'", 4).kind === K"'"
+end
+
+@testset "new ops" begin
+    ops = [
+        raw"= += -= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻= ≔ ⩴ ≕ ~ := $="
+        raw"=>"
+        raw"?"
+        raw"← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ￩ ￫ ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ -->"
+        raw"||"
+        raw"&&"
+        raw"> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ <: >:"
+        raw"<|"
+        raw"|>"
+        raw": .. … ⁝ ⋮ ⋱ ⋰ ⋯"
+        raw"$ + - ¦ | ⊕ ⊖ ⊞ ⊟ ++ ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣"
+        raw"* / ⌿ ÷ % & ⋅ ∘ × \ ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗"
+        raw"//"
+        raw"<< >> >>>"
+        raw"^ ↑ ↓ ⇵ ⟰ ⟱ ⤈ ⤉ ⤊ ⤋ ⤒ ⤓ ⥉ ⥌ ⥍ ⥏ ⥑ ⥔ ⥕ ⥘ ⥙ ⥜ ⥝ ⥠ ⥡ ⥣ ⥥ ⥮ ⥯ ￪ ￬"
+        raw"::"
+        raw"."
+    ]
+    if VERSION >= v"1.6.0"
+        push!(ops, raw"<-- <-->")
+    end
+    if VERSION >= v"1.7.0"
+        append!(ops, [
+            "−"
+            "\u00b7 \u0387"
+            "⫪ ⫫"
+        ])
+    end
+    if VERSION >= v"1.10-DEV"
+        push!(ops, "⥷ ⥺ ⟇")
+    end
+    allops = split(join(ops, " "), " ")
+    @test all(s->Base.isoperator(Symbol(s)) == is_operator(first(collect(tokenize(s))).kind), allops)
+
+    # "\U1f8b2" added in Julia 1.12
+    @test is_operator(first(collect(tokenize("🢲"))))
+end
+
+const all_kws = Set([
+    # Keywords
+    "baremodule",
+    "begin",
+    "break",
+    "catch",
+    "const",
+    "continue",
+    "do",
+    "else",
+    "elseif",
+    "end",
+    "export",
+    "finally",
+    "for",
+    "function",
+    "global",
+    "if",
+    "import",
+    "let",
+    "local",
+    "macro",
+    "module",
+    "public",
+    "quote",
+    "return",
+    "struct",
+    "try",
+    "using",
+    "while",
+    # Contextual keywords
+    "abstract",
+    "as",
+    "doc",
+    "mutable",
+    "outer",
+    "primitive",
+    "type",
+    "var",
+    "VERSION",
+    # Word-like operators
+    "in",
+    "isa",
+    "where",
+])
+
+function check_kw_hashes(iter)
+    for cs in iter
+        str = String([cs...])
+        if Tokenize.simple_hash(str) in keys(Tokenize._kw_hash)
+            @test str in all_kws
+        end
+    end
+end
+
+@testset "simple_hash" begin
+    @test length(all_kws) == length(Tokenize._kw_hash)
+
+    @testset "Length $len keywords" for len in 1:5
+        check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...))
+    end
+end
+
+
+@testset "UTF-8 BOM" begin
+    @test kind.(collect(tokenize("\ufeff[1\ufeff2]"))) == [
+        K"Whitespace",
+        K"[",
+        K"Integer",
+        K"Whitespace",
+        K"Integer",
+        K"]",
+        K"EndMarker"
+    ]
+end
+
+@testset "lexer initialization" begin
+    # Ranges of EndMarker
+    @test (t = last(collect(tokenize("+"))); (t.startbyte, t.endbyte)) == (1,0)
+    @test (t = last(collect(tokenize("+*"))); (t.startbyte, t.endbyte)) == (2,1)
+end
+
+@testset "invalid UTF-8 characters" begin
+    @test onlytok("\x00") == K"ErrorUnknownCharacter"
+    @test onlytok("₁") == K"ErrorIdentifierStart"
+
+    bad_chars = [
+        first("\xe2")              # malformed
+        first("\xc0\x9b")          # overlong
+        first("\xf0\x83\x99\xae")  # overlong
+    ]
+
+    @testset "bad char $(repr(c))" for c in bad_chars
+        @test Tokenize.is_identifier_char(c) == false
+        @test Tokenize.is_identifier_start_char(c) == false
+        @test Tokenize.is_never_id_char(c) == true
+        @test Tokenize.is_dottable_operator_start_char(c) == false
+        @test Tokenize.isopsuffix(c) == false
+        @test Tokenize.is_operator_start_char(c) == false
+        @test Tokenize.iswhitespace(c) == false
+        @test Tokenize.ishex(c) == false
+    end
+end
+
+@testset "unbalanced bidirectional unicode" begin
+    open_embedding = ['\U202A', '\U202B', '\U202D', '\U202E']
+    close_embedding = '\U202C'
+    open_isolate = ['\U2066', '\U2067', '\U2068']
+    close_isolate = '\U2069'
+    close_all = '\n'
+
+    all_bidi_codes = [open_embedding; close_embedding; open_isolate; close_isolate]
+
+    bidi_pairs = [Iterators.product(open_embedding, [close_embedding, close_all])...,
+                  Iterators.product(open_isolate,   [close_isolate, close_all])...]
+
+    @testset "delimiter $kd" for (kd, chunk_kind) in [
+            (K"\"",      K"String"),
+            (K"\"\"\"",  K"String"),
+            (K"`",       K"CmdString"),
+            (K"```",     K"CmdString")
+        ]
+        d = string(kd)
+        @testset "Single unbalanced codes" begin
+            for c in all_bidi_codes
+                @test toks("$d$c$d") ==
+                    [d=>kd, "$c"=>K"ErrorBidiFormatting", d=>kd]
+                @test toks("pfx$d$c$d") ==
+                    ["pfx"=>K"Identifier", d=>kd, "$c"=>K"ErrorBidiFormatting", d=>kd]
+            end
+        end
+        @testset "Balanced pairs" begin
+            for (openc, closec) in bidi_pairs
+                str = "$(openc)##$(closec)"
+                @test toks("$d$str$d") ==
+                    [d=>kd, str=>chunk_kind, d=>kd]
+                @test toks("pfx$d$str$d") ==
+                    ["pfx"=>K"Identifier", d=>kd, str=>chunk_kind, d=>kd]
+            end
+        end
+    end
+
+    @testset "multi line comments" begin
+        @testset "Single unbalanced codes" begin
+            for c in all_bidi_codes
+                comment = "#=$c=#"
+                @test toks(comment) == [comment=>K"ErrorBidiFormatting"]
+            end
+        end
+        @testset "Balanced pairs" begin
+            for (openc, closec) in bidi_pairs
+                str = "#=$(openc)zz$(closec)=#"
+                @test toks(str) == [str=>K"Comment"]
+            end
+        end
+    end
+
+    @testset "extended balanced/unbalanced bidi state" begin
+        @testset "delimiter $kd" for (kd, chunk_kind) in [
+                (K"\"",      K"String"),
+                (K"\"\"\"",  K"String"),
+                (K"`",       K"CmdString"),
+                (K"```",     K"CmdString")
+            ]
+            d = string(kd)
+            for balanced in [# Balanced pairs
+                             "\u202a\u202bzz\u202c\u202c"
+                             "\u2066\u2067zz\u2069\u2069"
+                             # Newline is complete bidi state reset
+                             "\u202a\u2067zz\n"
+                             "\u202a\u202azz\n"
+                             # \r\n and \n terminate a line
+                             "\u202azz\r\n"
+                             ]
+                @test toks("$d$balanced$d") == [
+                    d=>kd
+                    balanced=>chunk_kind
+                    d=>kd
+                ]
+            end
+            for unbalanced in ["\u202azz\u202c\u202c"
+                               "\u202a\u202bzz\u202c"
+                               # \r does not terminate a bidi line
+                               "\u202azz\r"
+                              ]
+                @test toks("$d$unbalanced$d") == [
+                    d=>kd
+                    unbalanced=>K"ErrorBidiFormatting"
+                    d=>kd
+                ]
+            end
+        end
+    end
+
+    # Interpolations reset bidi state
+    @test toks("\"\u202a\$zz\n\"") == [
+        "\""=>K"\""
+        "\u202a"=>K"ErrorBidiFormatting"
+        "\$"=>K"$"
+        "zz"=>K"Identifier"
+        "\n"=>K"String"
+        "\""=>K"\""
+    ]
+    @testset "newline escaping" begin
+        @test toks("\"a\u202a\\\n\"") == [
+             "\""=>K"\""
+             "a\u202a"=>K"String"
+             "\\\n"=>K"Whitespace"
+             "\""=>K"\""
+        ]
+        @test toks("\"a\u202a\\\r\n\"") == [
+             "\""=>K"\""
+             "a\u202a"=>K"String"
+             "\\\r\n"=>K"Whitespace"
+             "\""=>K"\""
+        ]
+        @test toks("\"a\u202a\\\r\"") == [
+             "\""=>K"\""
+             "a\u202a"=>K"ErrorBidiFormatting"
+             "\\\r"=>K"Whitespace"
+             "\""=>K"\""
+        ]
+    end
+
+    @testset "delimiter '" begin
+        for c in all_bidi_codes
+            @test toks("'$c'") == ["'"=>K"'", "$c"=>K"Char", "'"=>K"'"]
+        end
+    end
+end
+
+@testset "dotop miscellanea" begin
+    @test strtok("a .-> b")  ==  ["a", " ", ".", "-", ">", " ", "b", ""]
+    @test strtok(".>: b")    ==  [".", ">:", " ", "b", ""]
+    @test strtok(".<: b")    ==  [".", "<:", " ", "b", ""]
+    @test strtok("a ||₁ b")  ==  ["a", " ", "||", "₁", " ", "b", ""]
+    @test strtok("a ||̄ b")   ==  ["a", " ", "||", "̄", " ", "b", ""]
+    @test strtok("a .||₁ b") ==  ["a", " ", ".", "||", "₁", " ", "b", ""]
+    @test strtok("a &&₁ b")  ==  ["a", " ", "&&", "₁", " ", "b", ""]
+    @test strtok("a &&̄ b")   ==  ["a", " ", "&&", "̄", " ", "b", ""]
+    @test strtok("a .&&₁ b") ==  ["a", " ", ".", "&&", "₁", " ", "b", ""]
+end
+
+end
diff --git a/JuliaSyntax/test/utils.jl b/JuliaSyntax/test/utils.jl
new file mode 100644
index 0000000000000..371da98c9e174
--- /dev/null
+++ b/JuliaSyntax/test/utils.jl
@@ -0,0 +1,24 @@
+@testset "_printstyled" begin
+    ps(str; kws...) = sprint(io->JuliaSyntax._printstyled(IOContext(io, :color=>true), str; kws...))
+
+    @test ps("XX"; fgcolor=:red) == "\e[31mXX\e[0;0m"
+    @test ps("XX"; fgcolor=42)   == "\e[38;5;42mXX\e[0;0m"
+    @test ps("XX"; fgcolor=(10,100,200)) == "\e[38;2;10;100;200mXX\e[0;0m"
+
+    ps("XX"; bgcolor=:red) == "\e[41mXX\e[0;0m"
+    @test ps("XX"; bgcolor=42) == "\e[48;5;42mXX\e[0;0m"
+    @test ps("XX"; bgcolor=(10,100,200)) == "\e[48;2;10;100;200mXX\e[0;0m"
+
+    @test ps("XX"; href="https://www.example.com") ==
+        "\e]8;;https://www.example.com\e\\XX\e[0;0m\e]8;;\e\\"
+
+    @test ps("XX", fgcolor=:red, bgcolor=:green, href="https://www.example.com") ==
+        "\e]8;;https://www.example.com\e\\\e[31m\e[42mXX\e[0;0m\e]8;;\e\\"
+end
+
+@testset "ambiguities" begin
+    if VERSION >= v"1.8"
+        @test detect_ambiguities(JuliaSyntax) == []
+        @test detect_unbound_args(JuliaSyntax) == []
+    end
+end
diff --git a/JuliaSyntax/tools/bump_in_Base.jl b/JuliaSyntax/tools/bump_in_Base.jl
new file mode 100644
index 0000000000000..aec2876deb645
--- /dev/null
+++ b/JuliaSyntax/tools/bump_in_Base.jl
@@ -0,0 +1,74 @@
+function find_checksum_files(checksum_dir)
+    filter(readdir(checksum_dir, join=true)) do path
+        occursin(r"^JuliaSyntax-", basename(path))
+    end
+end
+
+function bump_in_Base(julia_dir, juliasyntax_dir, juliasyntax_branch_or_commit)
+    julia_git_dir = joinpath(julia_dir, ".git")
+    JuliaSyntax_git_dir = joinpath(juliasyntax_dir, ".git")
+    if !isdir(julia_git_dir)
+        @error "Julia .git directory not found" julia_git_dir
+        return 1
+    end
+    if !isdir(JuliaSyntax_git_dir)
+        @error "JuliaSyntax .git directory not found" JuliaSyntax_git_dir
+        return 1
+    end
+
+    @info "Vendoring JuliaSyntax into Base" julia_dir juliasyntax_branch_or_commit
+
+    remote_containing_branches = filter(b->occursin(r"^origin/(main|release-.*)$", b),
+        strip.(split(
+            read(`git --git-dir=$JuliaSyntax_git_dir branch -r --contains $juliasyntax_branch_or_commit`, String),
+            '\n', keepempty=false)))
+    if isempty(remote_containing_branches)
+        @warn "No remote main or release branches contain the given commit. This is ok for testing, but is otherwise an error." juliasyntax_branch_or_commit
+    else
+        @info "Given commit is accessible on remote branch" remote_containing_branches
+    end
+
+    commit_sha = strip(String(read(`git --git-dir=$JuliaSyntax_git_dir show -s --pretty=tformat:%H $juliasyntax_branch_or_commit`)))
+
+    cd(julia_dir) do
+        status = read(`git status --porcelain --untracked-files=no`, String)
+        if status != ""
+            @error "Julia git directory contains uncommitted changes" status=Text(status)
+            return 1
+        end
+
+        verfile_path = joinpath("deps", "JuliaSyntax.version")
+        @info "Updating JuliaSyntax.version" verfile_path
+        write(verfile_path, replace(read(verfile_path, String), r"JULIASYNTAX_SHA1.*"=>"JULIASYNTAX_SHA1 = "*commit_sha))
+        run(`git add $verfile_path`)
+
+        @info "Updating JuliaSyntax checksums"
+        deps_dir = "deps"
+        checksum_dir = joinpath(deps_dir, "checksums")
+        old_checksum_paths = find_checksum_files(checksum_dir)
+        if !isempty(old_checksum_paths)
+            run(`git rm -rf $old_checksum_paths`)
+        end
+        run(`make -C $deps_dir`)
+        run(`git add $(find_checksum_files(checksum_dir))`)
+
+        # Force rebuild of Base to include the newly vendored JuliaSyntax next time Julia is built.
+        # (TODO: fix the Makefile instead?)
+        touch("base/Base.jl")
+
+        @info "JuliaSyntax version updated. You can now test or commit the following changes"
+        run(`git diff --cached`)
+    end
+
+    return 0
+end
+
+if !isinteractive()
+    if length(ARGS) != 2
+        println("Usage: bump_in_Base.jl \$julia_dir \$juliasyntax_branch_or_commit")
+        exit(1)
+    else
+        juliasyntax_dir = dirname(@__DIR__)
+        exit(bump_in_Base(ARGS[1], juliasyntax_dir, ARGS[2]))
+    end
+end
diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl
new file mode 100644
index 0000000000000..32f255e0cb6ea
--- /dev/null
+++ b/JuliaSyntax/tools/check_all_packages.jl
@@ -0,0 +1,99 @@
+# hacky script to parse all Julia files in all packages in General
+# to Exprs and report errors
+#
+# Run this after registry_download.jl (so the pkgs directory is populated).
+
+using JuliaSyntax, Logging, TerminalLoggers, ProgressLogging, Serialization
+
+include("../test/test_utils.jl")
+include("../test/fuzz_test.jl")
+
+srcpaths = isempty(ARGS) ? [joinpath(@__DIR__, "pkgs")] : abspath.(ARGS)
+source_paths = vcat(find_source_in_path.(srcpaths)...)
+
+file_count = length(source_paths)
+
+exception_count = 0
+mismatch_count = 0
+t0 = time()
+exceptions = []
+
+all_reduced_failures = String[]
+
+Logging.with_logger(TerminalLogger()) do
+    global exception_count, mismatch_count, t0
+    @withprogress for (ifile, fpath) in enumerate(source_paths)
+        @logprogress ifile/file_count time_ms=round((time() - t0)/ifile*1000, digits = 2)
+        text = read(fpath, String)
+        expr_cache = fpath*".Expr"
+        e2 = if isfile(expr_cache)
+            open(deserialize, fpath*".Expr")
+        else
+            @warn "Expr cache not found, parsing using reference parser" expr_cache maxlog=1
+            JuliaSyntax.fl_parseall(text, filename=fpath)
+        end
+        @assert Meta.isexpr(e2, :toplevel)
+        try
+            e1 = JuliaSyntax.parseall(Expr, text, filename=fpath, ignore_warnings=true)
+            if !exprs_roughly_equal(e2, e1)
+                mismatch_count += 1
+                failing_source = sprint(context=:color=>true) do io
+                    for c in reduce_tree(parseall(SyntaxNode, text))
+                        JuliaSyntax.highlight(io, c.source, JuliaSyntax.byte_range(c), context_lines_inner=5)
+                        println(io, "\n")
+                    end
+                end
+                reduced_failures = reduce_text.(reduce_tree(text),
+                                                parsers_fuzzy_disagree)
+                append!(all_reduced_failures, reduced_failures)
+                @error("Parsers succeed but disagree",
+                       fpath,
+                       failing_source=Text(failing_source),
+                       reduced_failures,
+                       )
+            end
+        catch err
+            err isa InterruptException && rethrow()
+            ex = (err, catch_backtrace())
+            push!(exceptions, ex)
+            ref_parse = "success"
+            if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete))
+                ref_parse = "fail"
+                if err isa JuliaSyntax.ParseError
+                    # Both parsers agree that there's an error, and
+                    # JuliaSyntax didn't have an internal error.
+                    continue
+                end
+            end
+
+            exception_count += 1
+            parse_to_syntax = "success"
+            try
+                JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, code)
+            catch err2
+                parse_to_syntax = "fail"
+            end
+            @error "Parse failed" fpath exception=ex parse_to_syntax
+        end
+    end
+end
+
+t_avg = round((time() - t0)/file_count*1000, digits = 2)
+
+println()
+@info """
+    Finished parsing $file_count files.
+        $(exception_count) failures compared to reference parser
+        $(mismatch_count) Expr mismatches
+        $(t_avg)ms per file"""
+
+open(joinpath(@__DIR__, "reduced_failures.jl"), write=true) do io
+    for str in all_reduced_failures
+        println(io, repr(str))
+    end
+    for str in all_reduced_failures
+        println(io, "#------------------------------")
+        println(io, str)
+        println(io)
+    end
+end
diff --git a/JuliaSyntax/tools/registry_download.jl b/JuliaSyntax/tools/registry_download.jl
new file mode 100644
index 0000000000000..e866a6ee72a94
--- /dev/null
+++ b/JuliaSyntax/tools/registry_download.jl
@@ -0,0 +1,46 @@
+# Hacky script to download the latest version of all packages registered in the
+# General registry for testing the parser.
+#
+# This uses internal Pkg APIs and seems to work on Julia 1.7
+
+using Pkg
+using Downloads
+
+registry = only(filter(r->r.name == "General", Pkg.Registry.reachable_registries()))
+
+packages = []
+
+for (uuid,pkg) in registry
+    versions = collect(Pkg.Registry.registry_info(pkg).version_info)
+    latest_ver, ver_info = last(sort(versions, by=first))
+    if ver_info.yanked
+        continue
+    end
+
+    push!(packages, (; uuid, pkg.name, version=latest_ver, ver_info.git_tree_sha1))
+
+end
+
+server = Pkg.pkg_server()
+output_dir = "pkgs"
+mkpath(output_dir)
+
+asyncmap(packages, ntasks=5) do pkg
+    url = "$server/package/$(pkg.uuid)/$(pkg.git_tree_sha1)"
+    outfile_path = joinpath(output_dir, "$(pkg.name)_$(pkg.version).tgz")
+    if isfile(outfile_path)
+        @info "Skipping package" pkg
+        return outfile_path
+    else
+        @info "Download package" url outfile_path
+        for i=1:5
+            try
+                Downloads.download(url, outfile_path)
+                break
+            catch
+                @error "Error downloading" pkg exception=current_exceptions()
+            end
+            sleep(i)
+        end
+    end
+end
diff --git a/JuliaSyntax/tools/untar_packages.jl b/JuliaSyntax/tools/untar_packages.jl
new file mode 100644
index 0000000000000..2c6986890bb63
--- /dev/null
+++ b/JuliaSyntax/tools/untar_packages.jl
@@ -0,0 +1,68 @@
+using Serialization
+using JuliaSyntax
+
+pkgspath = joinpath(@__DIR__, "pkgs")
+tarspath = joinpath(@__DIR__, "pkg_tars")
+
+mkpath(pkgspath)
+mkpath(tarspath)
+
+tar_info = [(m = match(r"(.*)_(\d+\.\d+\.\d+.*)\.tgz$", f); (f, m[1], VersionNumber(m[2])))
+            for f in readdir(tarspath) if endswith(f, ".tgz")]
+
+tar_maxver = Dict{String,VersionNumber}()
+for (_,name,ver) in tar_info
+    v = get(tar_maxver, name, v"0.0.0")
+    if v < ver
+        tar_maxver[name] = ver
+    end
+end
+
+@info "# Untarring packages"
+
+for tinfos in Iterators.partition(tar_info, 50)
+    @sync for (tarname, pkgname, pkgver) in tinfos
+        @async begin
+            dir = joinpath(pkgspath, "$(pkgname)_$(pkgver)")
+            if pkgver != tar_maxver[pkgname]
+                if isdir(dir)
+                    # Clean up old packages
+                    rm(dir; recursive=true, force=true)
+                end
+            elseif !isdir(dir) || !isdir(joinpath(dir, "src"))
+                rm(dir; recursive=true, force=true)
+                mkpath(dir)
+                tar_path = joinpath(tarspath, tarname)
+                try
+                    run(`tar -xf $tar_path -C $dir`)
+                catch err
+                    @error "could not untar $tar_path"
+                end
+            end
+        end
+    end
+end
+
+@info "# Parsing files with reference parser"
+
+let i = 0, tot_files = 0
+    for (r, _, files) in walkdir(pkgspath)
+        for f in files
+            tot_files += 1
+            endswith(f, ".jl") || continue
+            fpath = joinpath(r, f)
+            outpath = joinpath(r, f*".Expr")
+            if !islink(fpath) && isfile(fpath) && !isfile(outpath)
+                code = read(fpath, String)
+                fl_ex = JuliaSyntax.fl_parseall(code, filename=fpath)
+                i += 1
+                if i % 100 == 0
+                    @info "$i/$tot_files files parsed"
+                end
+                open(outpath, "w") do io
+                    serialize(io, fl_ex)
+                end
+            end
+        end
+    end
+end
diff --git a/LICENSE.md b/LICENSE.md
index d4125f4fba221..b7b53659b2772 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2009-2023: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
+Copyright (c) 2009-2025: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/Make.inc b/Make.inc
index 96064cb7eac15..76f1c8aa9229a 100644
--- a/Make.inc
+++ b/Make.inc
@@ -28,13 +28,7 @@ BOOTSTRAP_DEBUG_LEVEL ?= 0
 OPENBLAS_TARGET_ARCH:=
 OPENBLAS_SYMBOLSUFFIX:=
 OPENBLAS_LIBNAMESUFFIX:=
-
-# If OPENBLAS_TARGET_ARCH is set, we default to disabling OPENBLAS_DYNAMIC_ARCH
-ifneq ($(OPENBLAS_TARGET_ARCH),)
 OPENBLAS_DYNAMIC_ARCH:=0
-else
-OPENBLAS_DYNAMIC_ARCH:=1
-endif
 OPENBLAS_USE_THREAD:=1
 
 # Flags for using libraries available on the system instead of building them.
@@ -57,7 +51,7 @@ USE_SYSTEM_MPFR:=0
 USE_SYSTEM_LIBSUITESPARSE:=0
 USE_SYSTEM_LIBUV:=0
 USE_SYSTEM_UTF8PROC:=0
-USE_SYSTEM_MBEDTLS:=0
+USE_SYSTEM_OPENSSL:=0
 USE_SYSTEM_LIBSSH2:=0
 USE_SYSTEM_NGHTTP2:=0
 USE_SYSTEM_CURL:=0
@@ -65,9 +59,14 @@ USE_SYSTEM_LIBGIT2:=0
 USE_SYSTEM_PATCHELF:=0
 USE_SYSTEM_LIBWHICH:=0
 USE_SYSTEM_ZLIB:=0
+USE_SYSTEM_ZSTD:=0
 USE_SYSTEM_P7ZIP:=0
 USE_SYSTEM_LLD:=0
 
+# Link libjulia-internal with static libgcc and libstdc++
+USE_RT_STATIC_LIBGCC:=1
+USE_RT_STATIC_LIBSTDCXX:=1
+
 # Link to the LLVM shared library
 USE_LLVM_SHLIB := 1
 
@@ -92,6 +91,9 @@ WITH_DTRACE := 0
 # Enable ITTAPI integration
 WITH_ITTAPI := 0
 
+# Enable NVTX integration
+WITH_NVTX := 0
+
 # Enable Tracy support
 WITH_TRACY := 0
 WITH_TRACY_CALLSTACKS := 0
@@ -99,6 +101,9 @@ WITH_TRACY_CALLSTACKS := 0
 # Enable Timing Counts support
 WITH_TIMING_COUNTS := 0
 
+# Should --gc-sections/-dead_strip be used to remove unreferenced code?
+USE_LINKER_GC:=1
+
 # Prevent picking up $ARCH from the environment variables
 ARCH:=
 
@@ -111,10 +116,60 @@ endef
 COMMA:=,
 SPACE:=$(eval) $(eval)
 
+# define various helper macros for safe interpolation into various parsers
+shell_escape='$(subst ','\'',$1)'
+c_escape="$(subst ",\",$(subst \,\\,$1))"
+julia_escape=$(call c_escape,$1)
+
 # force a sane / stable configuration
 export LC_ALL=C
 export LANG=C
 
+# Respect `FORCE_COLOR` environment variable: <https://force-color.org/>.
+ifndef FORCE_COLOR
+FORCE_COLOR := ""
+endif
+
+# Respect `NO_COLOR` environment variable: <https://no-color.org/>.
+ifndef NO_COLOR
+NO_COLOR := ""
+endif
+
+# When both `FORCE_COLOR` and `NO_COLOR` are defined, the former has precedence.
+ifneq ($(FORCE_COLOR), "")
+NO_COLOR = ""
+endif
+
+WARNCOLOR:="\033[33;1m"
+ENDCOLOR:="\033[0m"
+
+CCCOLOR:="\033[34m"
+LINKCOLOR:="\033[34;1m"
+PERLCOLOR:="\033[35m"
+FLISPCOLOR:="\033[32m"
+JULIACOLOR:="\033[32;1m"
+DTRACECOLOR:="\033[32;1m"
+
+SRCCOLOR:="\033[33m"
+BINCOLOR:="\033[37;1m"
+JULCOLOR:="\033[34;1m"
+
+ifneq ($(NO_COLOR), "")
+WARNCOLOR:=""
+ENDCOLOR:=""
+
+CCCOLOR:=""
+LINKCOLOR:=""
+PERLCOLOR:=""
+FLISPCOLOR:=""
+JULIACOLOR:=""
+DTRACECOLOR:=""
+
+SRCCOLOR:=""
+BINCOLOR:=""
+JULCOLOR:=""
+endif
+
 # We need python for things like BB triplet recognition and relative path computation.
 # We don't really care about version, generally, so just find something that works:
 PYTHON := "$(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo "{python|python3|python2} not found")"
@@ -137,7 +192,7 @@ ifeq ($(BUILDROOT),)
 ifeq ("$(origin O)", "command line")
   BUILDROOT := $(abspath $O)
   BUILDDIR := $(abspath $(BUILDROOT)/$(call rel_path,$(JULIAHOME),$(SRCDIR)))
-  $(info $(shell printf '\033[32;1mBuilding into $(BUILDROOT)\033[0m')) # use printf to expand the escape sequences
+  $(info $(shell printf '$(JULIACOLOR)Building into $(BUILDROOT)$(ENDCOLOR)')) # use printf to expand the escape sequences
 else
   BUILDROOT:=$(JULIAHOME)
 endif
@@ -326,11 +381,15 @@ $(1)_rel_eval = $(call rel_path,$(2),$($(1)))
 $(1)_rel = $$(call hit_cache,$(1)_rel_eval)
 endef
 $(foreach D,libdir private_libdir datarootdir libexecdir private_libexecdir docdir sysconfdir includedir,$(eval $(call cache_rel_path,$(D),$(bindir))))
-$(foreach D,build_libdir build_private_libdir,$(eval $(call cache_rel_path,$(D),$(build_bindir))))
+$(foreach D,build_libdir build_private_libdir                                                           ,$(eval $(call cache_rel_path,$(D),$(build_bindir))))
 
 # Save a special one: reverse_private_libdir_rel: usually just `../`, but good to be general:
 reverse_private_libdir_rel_eval = $(call rel_path,$(private_libdir),$(libdir))
 reverse_private_libdir_rel = $(call hit_cache,reverse_private_libdir_rel_eval)
+reverse_private_libexecdir_rel_eval = $(call rel_path,$(private_libexecdir),$(private_libdir))
+reverse_private_libexecdir_rel = $(call hit_cache,reverse_private_libexecdir_rel_eval)
+reverse_build_private_libexecdir_rel_eval = $(call rel_path,$(build_private_libexecdir),$(build_libdir))
+reverse_build_private_libexecdir_rel = $(call hit_cache,reverse_build_private_libexecdir_rel_eval)
 
 INSTALL_F := $(JULIAHOME)/contrib/install.sh 644
 INSTALL_M := $(JULIAHOME)/contrib/install.sh 755
@@ -359,6 +418,10 @@ USE_MLIR := 0
 # Options to use RegionVectorizer
 USE_RV := 0
 
+# Use `ccache` for speeding up recompilation of the C/C++ part of Julia.
+# Requires the `ccache` executable to be in the `PATH` environment variable.
+USECCACHE := 0
+
 # Cross-compile
 #XC_HOST := i686-w64-mingw32
 #XC_HOST := x86_64-w64-mingw32
@@ -375,9 +438,10 @@ export PKG_CONFIG_PATH = $(JULIAHOME)/usr/lib/pkgconfig
 export PKG_CONFIG_LIBDIR = $(JULIAHOME)/usr/lib/pkgconfig
 
 # Figure out OS and architecture
-BUILD_OS := $(shell uname)
+RAW_BUILD_OS = $(shell uname)
+BUILD_OS := $(RAW_BUILD_OS)
 
-ifneq (,$(findstring CYGWIN,$(BUILD_OS)))
+ifneq (,$(findstring CYGWIN,$(RAW_BUILD_OS)))
 XC_HOST ?= $(shell uname -m)-w64-mingw32
 endif
 
@@ -440,11 +504,15 @@ endif
 # Set to 1 to enable profiling with perf
 ifeq ("$(OS)", "Linux")
 USE_PERF_JITEVENTS ?= 1
+ifeq ($(ARCH),x86_64)
 USE_INTEL_JITEVENTS ?= 1
-else
+else # ARCH x86_64
+USE_INTEL_JITEVENTS ?= 0
+endif # ARCH x86_64
+else # OS Linux
 USE_PERF_JITEVENTS ?= 0
 USE_INTEL_JITEVENTS ?= 0
-endif
+endif # OS Linux
 
 JULIACODEGEN := LLVM
 
@@ -461,6 +529,10 @@ endif
 
 # Compiler specific stuff
 
+ifneq ($(USECLANG)$(USEGCC),)
+$(error "These internal variables are not overridable. Set CC instead.")
+endif
+
 ifeq (default,$(origin CC))
 CC := $(CROSS_COMPILE)$(CC) # attempt to add cross-compiler prefix, if the user
                             # is not overriding the default, to form target-triple-cc (which
@@ -478,7 +550,7 @@ endif
 
 FC := $(CROSS_COMPILE)gfortran
 
-# Note: Supporting only macOS Yosemite and above
+# Note: Supporting only macOS Mojave and above
 ifeq ($(OS), Darwin)
 APPLE_ARCH := $(shell uname -m)
 ifneq ($(APPLE_ARCH),arm64)
@@ -488,26 +560,34 @@ MACOSX_VERSION_MIN := 11.0
 endif
 endif
 
-JCFLAGS_COMMON    := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
-JCFLAGS_CLANG     := $(JCFLAGS_COMMON)
-JCFLAGS_GCC       := $(JCFLAGS_COMMON) -fno-gnu-unique
+# These are lazy expansion variables, so that arguments can be added to them later and they'll affect all of these uses too
+JCFLAGS_COMMON     = -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -Wformat -Wformat-security
+JCFLAGS_CLANG      = $(JCFLAGS_COMMON)
+JCFLAGS_GCC        = $(JCFLAGS_COMMON) -fno-gnu-unique
+
+
+# These flags are needed to generate decent debug info
+JCPPFLAGS_COMMON   = -fasynchronous-unwind-tables
+JCPPFLAGS_CLANG    = $(JCPPFLAGS_COMMON) -mllvm -enable-tail-merge=0
+JCPPFLAGS_GCC      = $(JCPPFLAGS_COMMON) -fno-tree-tail-merge
 
-# AArch64 needs this flag to generate the .eh_frame used by libunwind
-JCPPFLAGS_COMMON  := -fasynchronous-unwind-tables
-JCPPFLAGS_CLANG   := $(JCPPFLAGS_COMMON)
-JCPPFLAGS_GCC     := $(JCPPFLAGS_COMMON)
+JCXXFLAGS_COMMON   = -pipe $(fPIC) -fno-rtti -std=c++17 -Wformat -Wformat-security -fno-strict-aliasing
+JCXXFLAGS_CLANG    = $(JCXXFLAGS_COMMON) -pedantic
+JCXXFLAGS_GCC      = $(JCXXFLAGS_COMMON) -fno-gnu-unique
 
-JCXXFLAGS_COMMON  := -pipe $(fPIC) -fno-rtti -std=c++14
-JCXXFLAGS_CLANG   := $(JCXXFLAGS_COMMON) -pedantic
-JCXXFLAGS_GCC     := $(JCXXFLAGS_COMMON) -fno-gnu-unique
+DEBUGFLAGS_COMMON  = -O0 -DJL_DEBUG_BUILD -fstack-protector
+DEBUGFLAGS_CLANG   = $(DEBUGFLAGS_COMMON) -g
+DEBUGFLAGS_GCC     = $(DEBUGFLAGS_COMMON) -ggdb2
 
-DEBUGFLAGS_COMMON := -O0 -DJL_DEBUG_BUILD -fstack-protector
-DEBUGFLAGS_CLANG  := $(DEBUGFLAGS_COMMON) -g
-DEBUGFLAGS_GCC    := $(DEBUGFLAGS_COMMON) -ggdb2
+SHIPFLAGS_COMMON   = -O3
+SHIPFLAGS_CLANG    = $(SHIPFLAGS_COMMON) -g
+SHIPFLAGS_GCC      = $(SHIPFLAGS_COMMON) -ggdb2 -falign-functions
 
-SHIPFLAGS_COMMON  := -O3
-SHIPFLAGS_CLANG   := $(SHIPFLAGS_COMMON) -g
-SHIPFLAGS_GCC     := $(SHIPFLAGS_COMMON) -ggdb2 -falign-functions
+
+BOLT_LDFLAGS :=
+
+BOLT_CFLAGS_GCC    :=
+BOLT_CFLAGS_CLANG  :=
 
 ifeq ($(OS), Darwin)
 JCPPFLAGS_CLANG   += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1
@@ -521,21 +601,23 @@ endif
 ifeq ($(USEGCC),1)
 CC         := $(CROSS_COMPILE)gcc
 CXX        := $(CROSS_COMPILE)g++
-JCFLAGS    := $(JCFLAGS_GCC)
-JCPPFLAGS  := $(JCPPFLAGS_GCC)
-JCXXFLAGS  := $(JCXXFLAGS_GCC)
-DEBUGFLAGS := $(DEBUGFLAGS_GCC)
-SHIPFLAGS  := $(SHIPFLAGS_GCC)
+JCFLAGS     = $(JCFLAGS_GCC)
+JCPPFLAGS   = $(JCPPFLAGS_GCC)
+JCXXFLAGS   = $(JCXXFLAGS_GCC)
+DEBUGFLAGS  = $(DEBUGFLAGS_GCC)
+SHIPFLAGS   = $(SHIPFLAGS_GCC) $(BOLT_CFLAGS_GCC)
+BOLT_CFLAGS  := $(BOLT_CFLAGS_GCC)
 endif
 
 ifeq ($(USECLANG),1)
 CC         := $(CROSS_COMPILE)clang
 CXX        := $(CROSS_COMPILE)clang++
-JCFLAGS    := $(JCFLAGS_CLANG)
-JCPPFLAGS  := $(JCPPFLAGS_CLANG)
-JCXXFLAGS  := $(JCXXFLAGS_CLANG)
-DEBUGFLAGS := $(DEBUGFLAGS_CLANG)
-SHIPFLAGS  := $(SHIPFLAGS_CLANG)
+JCFLAGS     = $(JCFLAGS_CLANG)
+JCPPFLAGS   = $(JCPPFLAGS_CLANG)
+JCXXFLAGS   = $(JCXXFLAGS_CLANG)
+DEBUGFLAGS  = $(DEBUGFLAGS_CLANG)
+SHIPFLAGS   = $(SHIPFLAGS_CLANG) $(BOLT_CFLAGS_CLANG)
+BOLT_CFLAGS  := $(BOLT_CFLAGS_CLANG)
 
 ifeq ($(OS), Darwin)
 CC += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
@@ -546,7 +628,17 @@ export MACOSX_DEPLOYMENT_TARGET=$(MACOSX_VERSION_MIN)
 endif
 endif
 
-JLDFLAGS :=
+# Conditional setting of RELRO flag for enhanced security on Linux builds.
+# RELRO (Read-Only Relocations) is a security feature that marks certain sections
+# of the binary as read-only to prevent exploitation techniques like
+# GOT (Global Offset Table) overwriting attacks.
+ifeq ($(OS),Linux)
+    RELRO_FLAG := -Wl,-z,relro
+else
+    RELRO_FLAG :=
+endif
+
+JLDFLAGS := $(RELRO_FLAG)
 
 ifeq ($(USECCACHE), 1)
 # Expand CC, CXX and FC here already because we want the original definition and not the ccache version.
@@ -567,19 +659,19 @@ CXX_BASE := ccache
 FC_BASE  := ccache
 ifeq ($(USECLANG),1)
 # ccache and Clang don't do well together
-# http://petereisentraut.blogspot.be/2011/05/ccache-and-clang.html
+# https://petereisentraut.blogspot.be/2011/05/ccache-and-clang.html
 CC += -Qunused-arguments
 CXX += -Qunused-arguments
-# http://petereisentraut.blogspot.be/2011/09/ccache-and-clang-part-2.html
+# https://petereisentraut.blogspot.be/2011/09/ccache-and-clang-part-2.html
 export CCACHE_CPP2 := yes
 endif
 else #USECCACHE
-CC_BASE := $(shell echo $(CC) | cut -d' ' -f1)
-CC_ARG := $(shell echo $(CC) | cut -s -d' ' -f2-)
-CXX_BASE := $(shell echo $(CXX) | cut -d' ' -f1)
-CXX_ARG := $(shell echo $(CXX) | cut -s -d' ' -f2-)
-FC_BASE := $(shell echo $(FC) 2>/dev/null | cut -d' ' -f1)
-FC_ARG := $(shell echo $(FC) 2>/dev/null | cut -s -d' ' -f2-)
+CC_BASE := $(shell printf "%s\n" $(call shell_escape,$(CC)) | cut -d' ' -f1)
+CC_ARG := $(shell printf "%s\n" $(call shell_escape,$(CC)) | cut -s -d' ' -f2-)
+CXX_BASE := $(shell printf "%s\n" $(call shell_escape,$(CXX)) | cut -d' ' -f1)
+CXX_ARG := $(shell printf "%s\n" $(call shell_escape,$(CXX)) | cut -s -d' ' -f2-)
+FC_BASE := $(shell printf "%s\n" $(call shell_escape,$(FC)) 2>/dev/null | cut -d' ' -f1)
+FC_ARG := $(shell printf "%s\n" $(call shell_escape,$(FC)) 2>/dev/null | cut -s -d' ' -f2-)
 endif
 
 JFFLAGS := -O2 $(fPIC)
@@ -595,23 +687,26 @@ CPP_STDOUT := $(CPP) -P
 # file extensions
 ifeq ($(OS), WINNT)
   SHLIB_EXT := dll
+  PATHSEP := ;
 else ifeq ($(OS), Darwin)
   SHLIB_EXT := dylib
+  PATHSEP := :
 else
   SHLIB_EXT := so
+  PATHSEP := :
 endif
 
 ifeq ($(OS),WINNT)
 define versioned_libname
-$$(if $(2),$(1)-$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+$(if $(2),$(1)-$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
 endef
 else ifeq ($(OS),Darwin)
 define versioned_libname
-$$(if $(2),$(1).$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+$(if $(2),$(1).$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
 endef
 else
 define versioned_libname
-$$(if $(2),$(1).$(SHLIB_EXT).$(2),$(1).$(SHLIB_EXT))
+$(if $(2),$(1).$(SHLIB_EXT).$(2),$(1).$(SHLIB_EXT))
 endef
 endif
 
@@ -663,7 +758,7 @@ JL_MAJOR_SHLIB_EXT := $(SHLIB_EXT).$(SOMAJOR)
 endif
 endif
 
-ifeq ($(OS), FreeBSD)
+ifneq ($(findstring $(OS),FreeBSD OpenBSD),)
 LOCALBASE ?= /usr/local
 else
 LOCALBASE ?= /usr
@@ -703,7 +798,7 @@ LDFLAGS += -L$(build_libdir) -Wl,-rpath,$(build_libdir)
 endif # gfortran
 endif # FreeBSD
 
-ifneq ($(CC_BASE)$(CXX_BASE),$(shell echo $(CC) | cut -d' ' -f1)$(shell echo $(CXX) | cut -d' ' -f1))
+ifneq ($(CC_BASE)$(CXX_BASE),$(shell printf "%s\n" $(call shell_escape,$(CC)) | cut -d' ' -f1)$(shell printf "%s\n" $(call shell_escape,$(CXX)) | cut -d' ' -f1))
     $(error Forgot override directive on CC or CXX in Make.user? Cowardly refusing to build)
 endif
 
@@ -713,13 +808,15 @@ ifneq ($(OS), Darwin)
 endif
 endif
 
+bootstrap_julia_flags :=
+
 ifeq ($(SANITIZE),1)
 SANITIZE_OPTS :=
 SANITIZE_LDFLAGS :=
 ifeq ($(SANITIZE_MEMORY),1)
 SANITIZE_OPTS += -fsanitize=memory -fsanitize-memory-track-origins -fno-omit-frame-pointer
 SANITIZE_LDFLAGS += $(SANITIZE_OPTS)
-ifneq ($(findstring $(OS),Linux FreeBSD),)
+ifneq ($(findstring $(OS),Linux FreeBSD OpenBSD),)
 SANITIZE_LDFLAGS += -Wl,--warn-unresolved-symbols
 endif # OS Linux or FreeBSD
 endif # SANITIZE_MEMORY=1
@@ -728,8 +825,11 @@ SANITIZE_OPTS += -fsanitize=address
 SANITIZE_LDFLAGS += -fsanitize=address -shared-libasan
 endif
 ifeq ($(SANITIZE_THREAD),1)
-SANITIZE_OPTS += -fsanitize=thread
+SANITIZE_OPTS += -fsanitize=thread -fsanitize-ignorelist=$(JULIAHOME)/contrib/tsan/ignorelist.txt
 SANITIZE_LDFLAGS += -fsanitize=thread
+ifneq ($(CROSS_BOOTSTRAP_JULIA),)
+bootstrap_julia_flags += --target-sanitize=thread
+endif
 endif
 ifeq ($(SANITIZE_OPTS),)
 $(error SANITIZE=1, but no sanitizer selected, set either SANITIZE_MEMORY, SANITIZE_THREAD, or SANITIZE_ADDRESS)
@@ -747,16 +847,65 @@ $(error "please install either GNU tar or bsdtar")
 endif
 endif
 
+# Do not try to extract owner uids, even if we're root (e.g. in sandboxes)
+TAR += --no-same-owner
+
 ifeq ($(WITH_GC_VERIFY), 1)
 JCXXFLAGS += -DGC_VERIFY
 JCFLAGS += -DGC_VERIFY
 endif
 
+ifneq ($(JL_STACK_SIZE),)
+JCXXFLAGS += -DJL_STACK_SIZE=$(JL_STACK_SIZE)
+JCFLAGS += -DJL_STACK_SIZE=$(JL_STACK_SIZE)
+endif
+
+
 ifeq ($(WITH_GC_DEBUG_ENV), 1)
 JCXXFLAGS += -DGC_DEBUG_ENV
 JCFLAGS += -DGC_DEBUG_ENV
 endif
 
+# By default we use the stock GC
+WITH_THIRD_PARTY_GC ?= 0
+
+# Convert to lowercase
+USE_THIRD_PARTY_GC := $(shell echo $(WITH_THIRD_PARTY_GC) | tr A-Z a-z)
+
+# NB: When adding a new GC, make sure to add -DWITH_THIRD_PARTY_HEAP=<#NEW_GC>
+# to the variables JCFLAGS and JCXXFLAGS according to the mapping (MMTK=1, #NEW_GC=2)
+
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+JCXXFLAGS += -DWITH_THIRD_PARTY_HEAP=1
+JCFLAGS += -DWITH_THIRD_PARTY_HEAP=1
+
+# Must specify a supported MMTk Plan: Immix or StickyImmix
+ifeq (${MMTK_PLAN},Immix)
+JCXXFLAGS += -DMMTK_PLAN_IMMIX
+JCFLAGS += -DMMTK_PLAN_IMMIX
+else ifeq (${MMTK_PLAN},StickyImmix)
+JCXXFLAGS += -DMMTK_PLAN_STICKYIMMIX
+JCFLAGS += -DMMTK_PLAN_STICKYIMMIX
+else
+$(error "Unsupported MMTk plan: $(MMTK_PLAN). Supported plan(s): Immix or StickyImmix.")
+endif
+
+# Do a release build on the binding by default
+MMTK_BUILD ?= release
+
+# Location of mmtk-julia binding
+# (needed for api/*.h and .so file)
+MMTK_JULIA_DIR ?= $(BUILDROOT)/usr/lib/mmtk_julia
+MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk
+MMTK_API_INC = ${MMTK_DIR}/api
+MMTK_LIB := -lmmtk_julia
+
+# Must specify a supported third-party GC or use the stock GC.
+# Supported third-party GCs: mmtk
+else ifneq (${USE_THIRD_PARTY_GC},0)
+$(error "Unsupported third-party GC: $(WITH_THIRD_PARTY_GC). Supported option(s): mmtk")
+endif
+
 ifeq ($(WITH_DTRACE), 1)
 JCXXFLAGS += -DUSE_DTRACE
 JCFLAGS += -DUSE_DTRACE
@@ -785,6 +934,28 @@ JCXXFLAGS += -DUSE_TIMING_COUNTS
 JCFLAGS += -DUSE_TIMING_COUNTS
 endif
 
+ifeq ($(WITH_NVTX), 1)
+JCXXFLAGS += -DUSE_NVTX
+JCFLAGS += -DUSE_NVTX
+endif
+
+ifneq ($(findstring $(OS),WINNT FreeBSD OpenBSD),)
+  USE_LINKER_GC := 0
+  USE_RT_STATIC_LIBGCC := 0
+  USE_RT_STATIC_LIBSTDCXX := 0
+endif
+
+# Linker garbage collection
+ifeq ($(USE_LINKER_GC), 1)
+ifeq ($(OS), Darwin)
+  JLDFLAGS += -Wl,-dead_strip
+else
+  JLDFLAGS += -Wl,--gc-sections
+  JCFLAGS += -ffunction-sections -fdata-sections
+  JCXXFLAGS += -ffunction-sections -fdata-sections
+endif
+endif
+
 # ===========================================================================
 
 # Select the cpu architecture to target, or automatically detects the user's compiler
@@ -852,6 +1023,19 @@ ARCH := $(BUILD_OS)
 endif
 endif
 
+
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+# MMTk is only available on x86_64 Linux for now
+ifeq ($(OS),Linux)
+MMTK_LIB_NAME := libmmtk_julia.so
+else
+$(error "Unsupported OS for MMTk")
+endif
+ifneq ($(ARCH),x86_64)
+$(error "Unsupported build architecture for MMTk")
+endif
+endif
+
 # Detect common pre-SSE2 JULIA_CPU_TARGET values known not to work (#7185)
 ifeq ($(MARCH),)
 ifneq ($(findstring $(ARCH),i386 i486 i586 i686),)
@@ -897,20 +1081,55 @@ else
 ISX86:=0
 endif
 
+
+#If nothing is set default to native unless we are cross-compiling
+ifeq ($(MARCH)$(MCPU)$(MTUNE)$(JULIA_CPU_TARGET)$(XC_HOST),)
+ifeq ($(ARCH),aarch64)
+# ARM recommends only setting MCPU for AArch64
+MCPU=native
+else ifneq (,$(findstring riscv64,$(ARCH)))
+# RISC-V doesn't have a native option
+$(error Building for RISC-V requires a specific MARCH to be set))
+else
+MARCH=native
+MTUNE=native
+endif
+endif
+
+# If we are running on x86 or x86_64, set certain options automatically
+ifeq (1,$(ISX86))
+OPENBLAS_DYNAMIC_ARCH:=1
+endif
+
 # If we are running on powerpc64le or ppc64le, set certain options automatically
 ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
 JCFLAGS += -fsigned-char
+OPENBLAS_DYNAMIC_ARCH:=1
 OPENBLAS_TARGET_ARCH:=POWER8
 BINARY:=64
 # GCC doesn't do -march= on ppc64le
 MARCH=
 endif
 
+
 # If we are running on powerpc64 or ppc64, fail out dramatically
 ifneq (,$(filter $(ARCH), powerpc64 ppc64))
 $(error Big-endian PPC64 is not supported, to ignore this error, set ARCH=ppc64le)
 endif
 
+# Architecture and platform-specific compiler flags
+# Allow Clang to use CRC instructions (only applicable on AArch64) when no specific march is set
+ifneq (,$(findstring aarch64,$(ARCH)))
+ifeq (,$(MARCH))
+JCFLAGS_CLANG += -mcrc
+endif
+endif
+
+# Add platform-specific GCC flags
+ifeq ($(ISX86),1)
+SHIPFLAGS_GCC += -momit-leaf-frame-pointer
+endif
+
 # File name of make binary-dist result
 ifeq ($(JULIA_BINARYDIST_FILENAME),)
 DIST_OS:=$(shell echo $(OS) | tr '[:upper:]' '[:lower:]')
@@ -937,6 +1156,9 @@ endif
 ifneq (,$(findstring arm,$(ARCH)))
 DIST_ARCH:=arm
 endif
+ifneq (,$(findstring riscv64,$(ARCH)))
+DIST_ARCH:=riscv64
+endif
 
 JULIA_BINARYDIST_FILENAME := julia-$(JULIA_COMMIT)-$(DIST_OS)$(DIST_ARCH)
 endif
@@ -944,16 +1166,20 @@ endif
 # If we are running on ARM, set certain options automatically
 ifneq (,$(findstring arm,$(ARCH)))
 JCFLAGS += -fsigned-char
-USE_BLAS64:=0
-OPENBLAS_DYNAMIC_ARCH:=0
 OPENBLAS_TARGET_ARCH:=ARMV7
+BINARY:=32
 endif
 
 # If we are running on aarch64 (e.g. ARMv8 or ARM64), set certain options automatically
 ifneq (,$(findstring aarch64,$(ARCH)))
-OPENBLAS_DYNAMIC_ARCH:=0
+OPENBLAS_DYNAMIC_ARCH:=1
 OPENBLAS_TARGET_ARCH:=ARMV8
-USE_BLAS64:=1
+BINARY:=64
+endif
+
+# If we are running on riscv64, set certain options automatically
+ifneq (,$(findstring riscv64,$(ARCH)))
+OPENBLAS_DYNAMIC_ARCH:=1
 BINARY:=64
 endif
 
@@ -962,8 +1188,12 @@ ifneq ($(MARCH),)
 CC += -march=$(MARCH)
 CXX += -march=$(MARCH)
 FC += -march=$(MARCH)
+# On RISC-V, don't forward the MARCH ISA string to JULIA_CPU_TARGET,
+# as it's always incompatible with LLVM's CPU target name parser.
+ifeq (,$(findstring riscv64,$(ARCH)))
 JULIA_CPU_TARGET ?= $(MARCH)
 endif
+endif
 
 # Set MCPU-specific flags
 ifneq ($(MCPU),)
@@ -973,6 +1203,14 @@ FC += -mcpu=$(MCPU)
 JULIA_CPU_TARGET ?= $(MCPU)
 endif
 
+# Set MTUNE-specific flags
+ifneq ($(MTUNE),)
+CC += -mtune=$(MTUNE)
+CXX += -mtune=$(MTUNE)
+FC += -mtune=$(MTUNE)
+JULIA_CPU_TARGET ?= $(MTUNE)
+endif
+
 ifneq ($(MARCH)$(MCPU),)
 ifeq ($(OS),Darwin)
 # on Darwin, the standalone `as` program doesn't know
@@ -1018,32 +1256,20 @@ endif
 endif
 endif
 
-ifeq ($(USEGCC),1)
-ifeq ($(ISX86),1)
-  SHIPFLAGS += -momit-leaf-frame-pointer
-endif
-endif
 
 ifeq ($(OS),WINNT)
 LIBUNWIND:=
 else ifneq ($(DISABLE_LIBUNWIND), 0)
 LIBUNWIND:=
 else
-ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
-ifneq ($(OS),Darwin)
 LIBUNWIND:=-lunwind
-# Only for linux since we want to use not yet released libunwind features
+ifneq ($(findstring $(OS),Darwin OpenBSD),)
+JCPPFLAGS+=-DLLVMLIBUNWIND
+else ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
+# Only for linux and freebsd since we want to use not yet released gnu libunwind features
 JCFLAGS+=-DSYSTEM_LIBUNWIND
 JCPPFLAGS+=-DSYSTEM_LIBUNWIND
 endif
-else
-ifeq ($(OS),Darwin)
-LIBUNWIND:=-lunwind
-JCPPFLAGS+=-DLLVMLIBUNWIND
-else
-LIBUNWIND:=-lunwind
-endif
-endif
 endif
 
 ifeq ($(origin LLVM_CONFIG), undefined)
@@ -1171,7 +1397,7 @@ endif
 
 # We need python for things like BB triplet recognition.  We don't really care
 # about version, generally, so just find something that works:
-PYTHON := "$(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo not found)"
+PYTHON := $(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo not found)
 PYTHON_SYSTEM := $(shell $(PYTHON) -c 'from __future__ import print_function; import platform; print(platform.system())')
 
 # If we're running on Cygwin, but using a native-windows Python, we need to use cygpath -w
@@ -1229,14 +1455,19 @@ LIBGFORTRAN_VERSION := $(subst libgfortran,,$(filter libgfortran%,$(subst -,$(SP
 # shipped with CSL. Although we do not depend on any of the symbols, it is entirely
 # possible that a user might choose to install a library which depends on symbols provided
 # by a newer libstdc++. Without runtime detection, those libraries would break.
-CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.31|GLIBCXX_3\.5\.|GLIBCXX_4\.
+CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.34|GLIBCXX_3\.5\.|GLIBCXX_4\.
 
 
 # This is the set of projects that BinaryBuilder dependencies are hooked up for.
 # Note: we explicitly _do not_ define `CSL` here, since it requires some more
 # advanced techniques to decide whether it should be installed from a BB source
 # or not.  See `deps/csl.mk` for more detail.
-BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT
+BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP OPENSSL LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB ZSTD P7ZIP LLD LIBTRACYCLIENT BOLT
+
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+BB_PROJECTS += MMTK_JULIA
+endif
+
 define SET_BB_DEFAULT
 # First, check to see if BB is disabled on a global setting
 ifeq ($$(USE_BINARYBUILDER),0)
@@ -1301,11 +1532,9 @@ RPATH_LIB := $(RPATH_ORIGIN)
 
 # --whole-archive
 ifeq ($(OS), Darwin)
-  WHOLE_ARCHIVE := -Xlinker -all_load
-  NO_WHOLE_ARCHIVE :=
+  whole_archive = -Xlinker -force_load $(1)
 else
-  WHOLE_ARCHIVE := -Wl,--whole-archive
-  NO_WHOLE_ARCHIVE := -Wl,--no-whole-archive
+  whole_archive = -Wl,--whole-archive $(1) -Wl,--no-whole-archive
 endif
 
 # Initialize these once, then add to them in OS-specific blocks
@@ -1342,17 +1571,34 @@ OSLIBS += -lelf -lkvm -lrt -lpthread -latomic
 # make it loaded first to
 # prevent from linking to outdated system libs.
 # See #21788
+# TODO: Determine whether the condition here on AArch64 (added in #55089) should actually
+# be `ifneq ($(USE_BINARYBUILDER),0)`. We vendor a correctly versioned libgcc_s when using
+# BinaryBuilder which we want to link in early as noted above, but it could be the case
+# that without BinaryBuilder, regardless of architecture, we need to delay linking libgcc_s
+# to avoid getting the system one.
+ifeq (,$(findstring aarch64,$(ARCH)))
 OSLIBS += -lgcc_s
+endif
 
-OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
-	$(NO_WHOLE_ARCHIVE)
+OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(BUILDROOT)/src/julia.expmap -Wl,--no-whole-archive
+endif
+
+ifeq ($(OS), OpenBSD)
+JLDFLAGS += -Wl,--Bdynamic
+ifneq ($(SANITIZE),1)
+JLDFLAGS += -Wl,-no-undefined
+endif
+
+JLIBLDFLAGS += -Wl,-Bsymbolic-functions
+
+OSLIBS += -Wl,--no-as-needed -lpthread -lm -lc++abi -lc
+OSLIBS += -Wl,--whole-archive -lcompiler_rt -Wl,--no-whole-archive
+OSLIBS += -Wl,--export-dynamic,--as-needed,--version-script=$(BUILDROOT)/src/julia.expmap
 endif
 
 ifeq ($(OS), Darwin)
 SHLIB_EXT := dylib
 OSLIBS += -framework CoreFoundation
-WHOLE_ARCHIVE := -Xlinker -all_load
-NO_WHOLE_ARCHIVE :=
 HAVE_SSP := 1
 JLIBLDFLAGS += -Wl,-compatibility_version,$(SOMAJOR) -Wl,-current_version,$(JULIA_MAJOR_VERSION).$(JULIA_MINOR_VERSION).$(JULIA_PATCH_VERSION)
 endif
@@ -1360,12 +1606,14 @@ endif
 ifeq ($(OS), WINNT)
 HAVE_SSP := 1
 OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
-	$(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic
+	-Wl,--no-whole-archive -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic -lole32
+# N.B.: Unlike in the sysimage, we cannot -Wl,--disable-auto-import -Wl,--disable-runtime-pseudo-reloc here, because libstdc++/LLVM are not fully correct under
+# enforced visibility at this point.
 JLDFLAGS += -Wl,--stack,8388608
 ifeq ($(ARCH),i686)
 JLDFLAGS += -Wl,--large-address-aware
 endif
-JCPPFLAGS += -D_WIN32_WINNT=0x0502
+JCPPFLAGS += -D_WIN32_WINNT=0x0602 # (0x0602 = _WIN32_WINNT_WIN8)
 UNTRUSTED_SYSTEM_LIBM := 1
 # Use hard links for files on windows, rather than soft links
 #   https://stackoverflow.com/questions/3648819/how-to-make-a-symbolic-link-with-cygwin-in-windows-7
@@ -1450,12 +1698,12 @@ endif
 # Note: we're passing *FLAGS here computed based on your system compiler to
 # clang. If that causes you problems, you might want to build and/or run
 # specific clang-sa-* files with clang explicitly selected:
-#   make CC=~+/../usr/tools/clang CXX=~+/../usr/tools/clang USECLANG=1 analyzegc
-#   make USECLANG=1 clang-sa-*
+#   make CC=~+/../usr/tools/clang CXX=~+/../usr/tools/clang analyzegc
+#   make clang-sa-*
 CLANGSA_FLAGS :=
 CLANGSA_CXXFLAGS :=
 ifeq ($(OS), Darwin) # on new XCode, the files are hidden
-CLANGSA_FLAGS += -isysroot $(shell xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
+ CLANGSA_FLAGS += -isysroot $(shell xcrun --show-sdk-path -sdk macosx)
 endif
 ifeq ($(USEGCC),1)
 # try to help clang find the c++ files for CC by guessing the value for --prefix
@@ -1477,6 +1725,12 @@ $(subst /,\\,$(subst $(shell $(2) pwd),$(shell $(2) cmd //C cd),$(abspath $(1)))
 endef
 endif
 
+ifeq ($(OS), WINNT)
+normalize_path = $(subst /,\,$1)
+else
+normalize_path = $1
+endif
+
 define symlink_target # (from, to-dir, to-name)
 CLEAN_TARGETS += clean-$$(abspath $(2)/$(3))
 clean-$$(abspath $(2)/$(3)):
@@ -1503,7 +1757,7 @@ endef
 WINE ?= wine
 
 ifeq ($(BINARY),32)
-HEAPLIM := --heap-size-hint=500M
+HEAPLIM := --heap-size-hint=1000M
 else
 HEAPLIM :=
 endif
@@ -1534,29 +1788,35 @@ JULIA_BUILD_MODE := debug
 endif
 endif
 
+ifneq ($(CROSS_BOOTSTRAP_JULIA),)
+JULIA_EXECUTABLE_debug := $(CROSS_BOOTSTRAP_JULIA)
+JULIA_EXECUTABLE_release := $(CROSS_BOOTSTRAP_JULIA)
+JULIA_EXECUTABLE := $(CROSS_BOOTSTRAP_JULIA)
+else
 JULIA_EXECUTABLE_debug := $(build_bindir)/julia-debug$(EXE)
 JULIA_EXECUTABLE_release := $(build_bindir)/julia$(EXE)
 JULIA_EXECUTABLE := $(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE))
+endif
 
 JULIA_SYSIMG_debug := $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
 JULIA_SYSIMG_release := $(build_private_libdir)/sys.$(SHLIB_EXT)
 JULIA_SYSIMG := $(JULIA_SYSIMG_$(JULIA_BUILD_MODE))
 
 define dep_lib_path
-$(shell $(PYTHON) $(call python_cygpath,$(JULIAHOME)/contrib/relative_path.py) $(1) $(2))
+$(call normalize_path,$(shell $(PYTHON) $(call python_cygpath,$(JULIAHOME)/contrib/relative_path.py) $(1) $(2)))
 endef
 
-LIBJULIAINTERNAL_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT))
-LIBJULIAINTERNAL_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIAINTERNAL_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIAINTERNAL_INSTALL_DEPLIB := $(call dep_lib_path,$(shlibdir),$(private_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT))
 
-LIBJULIAINTERNAL_DEBUG_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))
-LIBJULIAINTERNAL_DEBUG_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIAINTERNAL_DEBUG_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIAINTERNAL_DEBUG_INSTALL_DEPLIB := $(call dep_lib_path,$(shlibdir),$(private_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))
 
-LIBJULIACODEGEN_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))
-LIBJULIACODEGEN_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIACODEGEN_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIACODEGEN_INSTALL_DEPLIB := $(call dep_lib_path,$(shlibdir),$(private_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))
 
-LIBJULIACODEGEN_DEBUG_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))
-LIBJULIACODEGEN_DEBUG_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIACODEGEN_DEBUG_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIACODEGEN_DEBUG_INSTALL_DEPLIB := $(call dep_lib_path,$(shlibdir),$(private_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))
 
 ifeq ($(OS),WINNT)
 ifeq ($(BINARY),32)
@@ -1584,34 +1844,34 @@ endif
 
 # USE_SYSTEM_CSL causes it to get symlinked into build_private_shlibdir
 ifeq ($(USE_SYSTEM_CSL),1)
-LIBGCC_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBGCC_NAME))
+LIBGCC_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_private_shlibdir)/$(LIBGCC_NAME))
 else
-LIBGCC_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/$(LIBGCC_NAME))
+LIBGCC_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_shlibdir)/$(LIBGCC_NAME))
 endif
-LIBGCC_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBGCC_NAME))
+LIBGCC_INSTALL_DEPLIB := $(call dep_lib_path,$(shlibdir),$(private_shlibdir)/$(LIBGCC_NAME))
 
 # We only bother to define this on Linux, as that's the only platform that does libstdc++ probing
 # On all other platforms, the LIBSTDCXX_*_DEPLIB variables will be empty.
 ifeq ($(OS),Linux)
 LIBSTDCXX_NAME := libstdc++.so.6
 ifeq ($(USE_SYSTEM_CSL),1)
-LIBSTDCXX_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBSTDCXX_NAME))
+LIBSTDCXX_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_private_shlibdir)/$(LIBSTDCXX_NAME))
 else
-LIBSTDCXX_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/$(LIBSTDCXX_NAME))
+LIBSTDCXX_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_shlibdir)/$(LIBSTDCXX_NAME))
 endif
-LIBSTDCXX_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBSTDCXX_NAME))
+LIBSTDCXX_INSTALL_DEPLIB := $(call dep_lib_path,$(shlibdir),$(private_shlibdir)/$(LIBSTDCXX_NAME))
 endif
 
 
 # USE_SYSTEM_LIBM and USE_SYSTEM_OPENLIBM causes it to get symlinked into build_private_shlibdir
 ifeq ($(USE_SYSTEM_LIBM),1)
-LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
+LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
 else ifeq ($(USE_SYSTEM_OPENLIBM),1)
-LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
+LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
 else
-LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
+LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_shlibdir),$(build_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
 endif
-LIBM_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
+LIBM_INSTALL_DEPLIB := $(call dep_lib_path,$(shlibdir),$(private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
 
 # We list:
 #  * libgcc_s, because FreeBSD needs to load ours, not the system one.
@@ -1671,25 +1931,11 @@ ifndef VERBOSE
 VERBOSE := 0
 endif
 
-WARNCOLOR:="\033[33;1m"
-ENDCOLOR:="\033[0m"
-
 ifeq ($(VERBOSE), 0)
 
 QUIET_MAKE = -s
 
-CCCOLOR:="\033[34m"
-LINKCOLOR:="\033[34;1m"
-PERLCOLOR:="\033[35m"
-FLISPCOLOR:="\033[32m"
-JULIACOLOR:="\033[32;1m"
-DTRACECOLOR:="\033[32;1m"
-
-SRCCOLOR:="\033[33m"
-BINCOLOR:="\033[37;1m"
-JULCOLOR:="\033[34;1m"
-
-GOAL=$(subst ','\'',$(subst $(abspath $(JULIAHOME))/,,$(abspath $@)))
+GOAL=$(call shell_escape,$(subst $(abspath $(JULIAHOME))/,,$(abspath $@)))
 
 PRINT_CC = printf '    %b %b\n' $(CCCOLOR)CC$(ENDCOLOR) $(SRCCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 PRINT_ANALYZE = printf '    %b %b\n' $(CCCOLOR)ANALYZE$(ENDCOLOR) $(SRCCOLOR)$(GOAL)$(ENDCOLOR); $(1)
@@ -1701,18 +1947,18 @@ PRINT_DTRACE = printf '    %b %b\n' $(DTRACECOLOR)DTRACE$(ENDCOLOR) $(BINCOLOR)$
 
 else
 QUIET_MAKE =
-PRINT_CC = echo '$(subst ','\'',$(1))'; $(1)
-PRINT_ANALYZE = echo '$(subst ','\'',$(1))'; $(1)
-PRINT_LINK = echo '$(subst ','\'',$(1))'; $(1)
-PRINT_PERL = echo '$(subst ','\'',$(1))'; $(1)
-PRINT_FLISP = echo '$(subst ','\'',$(1))'; $(1)
-PRINT_JULIA = echo '$(subst ','\'',$(1))'; $(1)
-PRINT_DTRACE = echo '$(subst ','\'',$(1))'; $(1)
+PRINT_CC = printf "%s\n" $(call shell_escape,$(1)); $(1)
+PRINT_ANALYZE = printf "%s\n" $(call shell_escape,$(1)); $(1)
+PRINT_LINK = printf "%s\n" $(call shell_escape,$(1)); $(1)
+PRINT_PERL = printf "%s\n" $(call shell_escape,$(1)); $(1)
+PRINT_FLISP = printf "%s\n" $(call shell_escape,$(1)); $(1)
+PRINT_JULIA = printf "%s\n" $(call shell_escape,$(1)); $(1)
+PRINT_DTRACE = printf "%s\n" $(call shell_escape,$(1)); $(1)
 
-endif
+endif # VERBOSE
 
 # Makefile debugging trick:
 # call print-VARIABLE to see the runtime value of any variable
 # (hardened against any special characters appearing in the output)
 print-%:
-	@echo '$*=$(subst ','\'',$(subst $(newline),\n,$($*)))'
+	@printf "%s\n" $(call shell_escape,$*)=$(call shell_escape,$(subst $(newline),\n,$($*)))
diff --git a/Makefile b/Makefile
index 5e9b4ccf5460a..7ce1734bab2ef 100644
--- a/Makefile
+++ b/Makefile
@@ -4,38 +4,41 @@ include $(JULIAHOME)/Make.inc
 include $(JULIAHOME)/deps/llvm-ver.make
 
 # Make sure the user didn't try to build in a path that will confuse the shell or make
-METACHARACTERS := [][?*{}() $$%:;&|!\#,\\`\":]\|/\./\|/\.\./
+METACHARACTERS := [][?*{}() $$%:;&|!\#,\\`\": ]\|/\./\|/\.\./
 ifneq (,$(findstring ',$(value BUILDROOT)))
 $(error cowardly refusing to build into directory with a single-quote in the path)
 endif
 ifneq (,$(findstring ',$(value JULIAHOME)))
 $(error cowardly refusing to build from source directory with a single-quote in the path)
 endif
-ifneq (,$(shell echo '$(value BUILDROOT)/' | grep '$(METACHARACTERS)'))
+ifneq (,$(shell printf "%s\n" $(call shell_escape,$(value BUILDROOT)/) | grep '$(METACHARACTERS)'))
 $(error cowardly refusing to build into directory with a shell-metacharacter in the path\
     (got: $(value BUILDROOT)))
 endif
-ifneq (,$(shell echo '$(value JULIAHOME)/' | grep '$(METACHARACTERS)'))
+ifneq (,$(shell printf "%s\n" $(call shell_escape,$(value JULIAHOME)/) | grep '$(METACHARACTERS)'))
 $(error cowardly refusing to build from source directory with a shell-metacharacter in the path\
     (got: $(value JULIAHOME)))
 endif
 
 VERSDIR := v`cut -d. -f1-2 < $(JULIAHOME)/VERSION`
 
+.PHONY: default
 default: $(JULIA_BUILD_MODE) # contains either "debug" or "release"
+
+.PHONY: all
 all: debug release
 
 # sort is used to remove potential duplicates
-DIRS := $(sort $(build_bindir) $(build_depsbindir) $(build_libdir) $(build_private_libdir) $(build_libexecdir) $(build_includedir) $(build_includedir)/julia $(build_sysconfdir)/julia $(build_datarootdir)/julia $(build_datarootdir)/julia/stdlib $(build_man1dir))
+DIRS := $(sort $(build_bindir) $(build_depsbindir) $(build_libdir) $(build_private_libdir) $(build_private_libexecdir) $(build_libexecdir) $(build_includedir) $(build_includedir)/julia $(build_sysconfdir)/julia $(build_datarootdir)/julia $(build_datarootdir)/julia/stdlib $(build_man1dir))
 ifneq ($(BUILDROOT),$(JULIAHOME))
 BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa cli doc deps stdlib test test/clangsa test/embedding test/gcext test/llvmpasses)
 BUILDDIRMAKE := $(addsuffix /Makefile,$(BUILDDIRS)) $(BUILDROOT)/sysimage.mk $(BUILDROOT)/pkgimage.mk
 DIRS += $(BUILDDIRS)
 $(BUILDDIRMAKE): | $(BUILDDIRS)
 	@# add Makefiles to the build directories for convenience (pointing back to the source location of each)
-	@echo '# -- This file is automatically generated in julia/Makefile -- #' > $@
-	@echo 'BUILDROOT=$(BUILDROOT)' >> $@
-	@echo 'include $(JULIAHOME)$(patsubst $(BUILDROOT)%,%,$@)' >> $@
+	@printf "%s\n" '# -- This file is automatically generated in julia/Makefile -- #' > $@
+	@printf "%s\n" 'BUILDROOT=$(BUILDROOT)' >> $@
+	@printf "%s\n" 'include $(JULIAHOME)$(patsubst $(BUILDROOT)%,%,$@)' >> $@
 julia-deps: | $(BUILDDIRMAKE)
 configure-y: | $(BUILDDIRMAKE)
 configure:
@@ -61,9 +64,14 @@ $(foreach link,base $(JULIAHOME)/test,$(eval $(call symlink_target,$(link),$$(bu
 julia_flisp.boot.inc.phony: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src julia_flisp.boot.inc.phony
 
+# Build the HTML docs (skipped if already exists, notably in tarballs)
+$(BUILDROOT)/doc/_build/html/en/index.html: $(shell find $(BUILDROOT)/base $(BUILDROOT)/doc \( -path $(BUILDROOT)/doc/_build -o -path $(BUILDROOT)/doc/deps -o -name *_constants.jl -o -name *_h.jl -o -name version_git.jl \) -prune -o -type f -print)
+	@$(MAKE) docs
+
+.PHONY: julia-symlink
 julia-symlink: julia-cli-$(JULIA_BUILD_MODE)
 ifeq ($(OS),WINNT)
-	echo '@"%~dp0/'"$$(echo '$(call rel_path,$(BUILDROOT),$(JULIA_EXECUTABLE))')"'" %*' | tr / '\\' > $(BUILDROOT)/julia.bat
+	printf '@"%%~dp0/%s" %%*\n' "$$(printf "%s\n" '$(call rel_path,$(BUILDROOT),$(JULIA_EXECUTABLE))')" | tr / '\\' > $(BUILDROOT)/julia.bat
 	chmod a+x $(BUILDROOT)/julia.bat
 else
 ifndef JULIA_VAGRANT_BUILD
@@ -71,50 +79,75 @@ ifndef JULIA_VAGRANT_BUILD
 endif
 endif
 
+TOP_LEVEL_PKGS := Compiler JuliaSyntax JuliaLowering
+
+TOP_LEVEL_PKG_LINK_TARGETS := $(addprefix $(build_datarootdir)/julia/,$(TOP_LEVEL_PKGS))
+
+# Generate symlinks for top level pkgs in usr/share/julia/
+$(foreach module, $(TOP_LEVEL_PKGS), $(eval $(call symlink_target,$$(JULIAHOME)/$(module),$$(build_datarootdir)/julia,$(module))))
+
+.PHONY: julia-deps
 julia-deps: | $(DIRS) $(build_datarootdir)/julia/base $(build_datarootdir)/julia/test
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/deps
 
 # `julia-stdlib` depends on `julia-deps` so that the fake JLL stdlibs can copy in their Artifacts.toml files.
+.PHONY: julia-stdlib
 julia-stdlib: | $(DIRS) julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/stdlib
 
-julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl
+.PHONY: julia-base
+julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl $(build_datarootdir)/julia/juliac/juliac.jl $(build_datarootdir)/julia/juliac/abi_export.jl $(build_datarootdir)/julia/juliac/juliac-buildscript.jl $(build_datarootdir)/julia/juliac/juliac-trim-base.jl $(build_datarootdir)/julia/juliac/juliac-trim-stdlib.jl $(build_datarootdir)/julia/juliac/Artifacts.toml
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/base
 
 julia-libccalltest: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libccalltest
 
+julia-libccalllazyfoo: julia-deps
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libccalllazyfoo
+
+julia-libccalllazybar: julia-deps julia-libccalllazyfoo
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libccalllazybar
+
 julia-libllvmcalltest: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libllvmcalltest
 
+.PHONY: julia-src-release julia-src-debug
 julia-src-release julia-src-debug : julia-src-% : julia-deps julia_flisp.boot.inc.phony julia-cli-%
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src $*
 
+.PHONY: julia-cli-release julia-cli-debug
 julia-cli-release julia-cli-debug: julia-cli-% : julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/cli $*
 
-julia-sysimg-ji : julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-src-$(JULIA_BUILD_MODE) | $(build_private_libdir)
-	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-ji JULIA_EXECUTABLE='$(JULIA_EXECUTABLE)'
-
-julia-sysimg-bc : julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-src-$(JULIA_BUILD_MODE) | $(build_private_libdir)
-	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-bc JULIA_EXECUTABLE='$(JULIA_EXECUTABLE)'
-
-julia-sysimg-release julia-sysimg-debug : julia-sysimg-% : julia-sysimg-ji julia-src-%
+.PHONY: julia-sysimg-release julia-sysimg-debug
+julia-sysimg-release julia-sysimg-debug : julia-sysimg-% : julia-src-% $(TOP_LEVEL_PKG_LINK_TARGETS) julia-stdlib julia-base julia-cli-% | $(build_private_libdir)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-$*
 
-julia-debug julia-release : julia-% : julia-sysimg-% julia-src-% julia-symlink julia-libccalltest julia-libllvmcalltest julia-base-cache
+# Useful for cross-bootstrapping
+.PHONY: julia-sysbase-release julia-sysbase-debug
+julia-sysbase-release julia-sysbase-debug : julia-sysbase-% : julia-src-% $(TOP_LEVEL_PKG_LINK_TARGETS) julia-stdlib julia-base julia-cli-% | $(build_private_libdir)
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysbase-$*
+
+.PHONY: julia-debug julia-release
+julia-debug julia-release : julia-% : julia-sysimg-% julia-src-% julia-symlink julia-libccalltest \
+                                      julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest julia-base-cache
 
+.PHONY: stdlibs-cache-release stdlibs-cache-debug
 stdlibs-cache-release stdlibs-cache-debug : stdlibs-cache-% : julia-%
-	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f pkgimage.mk all-$*
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f pkgimage.mk $*
 
+.PHONY: debug release
 debug release : % : julia-% stdlibs-cache-%
 
-docs: julia-sysimg-$(JULIA_BUILD_MODE)
+.PHONY: docs
+docs: julia-sysimg-$(JULIA_BUILD_MODE) stdlibs-cache-$(JULIA_BUILD_MODE)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/doc JULIA_EXECUTABLE='$(call spawn,$(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE))) --startup-file=no'
 
+.PHONY: docs-revise
 docs-revise:
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/doc JULIA_EXECUTABLE='$(call spawn,$(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE))) --startup-file=no' revise=true
 
+.PHONY: check-whitespace
 check-whitespace:
 ifneq ($(NO_GIT), 1)
 	@# Append the directory containing the julia we just built to the end of `PATH`,
@@ -124,6 +157,17 @@ else
 	$(warn "Skipping whitespace check because git is unavailable")
 endif
 
+.PHONY: fix-whitespace
+fix-whitespace:
+ifneq ($(NO_GIT), 1)
+	@# Append the directory containing the julia we just built to the end of `PATH`,
+	@# to give us the best chance of being able to run this check.
+	@PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" julia $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) --fix
+else
+	$(warn "Skipping whitespace fix because git is unavailable")
+endif
+
+.PHONY: release-candidate
 release-candidate: release testall
 	@$(JULIA_EXECUTABLE) $(JULIAHOME)/contrib/add_license_to_files.jl #add license headers
 	@#Check documentation
@@ -152,13 +196,14 @@ release-candidate: release testall
 	@echo 7. Clean out old .tar.gz files living in deps/, "\`git clean -fdx\`" seems to work	#"`
 	@echo 8. Replace github release tarball with tarballs created from make light-source-dist and make full-source-dist with USE_BINARYBUILDER=0
 	@echo 9. Check that 'make && make install && make test' succeed with unpacked tarballs even without Internet access.
-	@echo 10. Follow packaging instructions in doc/build/distributing.md to create binary packages for all platforms
-	@echo 11. Upload to AWS, update https://julialang.org/downloads and http://status.julialang.org/stable links
+	@echo 10. Follow packaging instructions in doc/src/devdocs/build/distributing.md to create binary packages for all platforms
+	@echo 11. Upload to AWS, update https://julialang.org/downloads and https://status.julialang.org/stable links
 	@echo 12. Update checksums on AWS for tarball and packaged binaries
 	@echo 13. Update versions.json. Wait at least 60 minutes before proceeding to step 14.
 	@echo 14. Push to Juliaup (https://github.com/JuliaLang/juliaup/wiki/Adding-a-Julia-version)
 	@echo 15. Announce on mailing lists
 	@echo 16. Change master to release-0.X in base/version.jl and base/version_git.sh as in 4cb1e20
+	@echo 17. Move NEWS.md contents to HISTORY.md
 	@echo
 
 $(build_man1dir)/julia.1: $(JULIAHOME)/doc/man/julia.1 | $(build_man1dir)
@@ -170,15 +215,17 @@ $(build_sysconfdir)/julia/startup.jl: $(JULIAHOME)/etc/startup.jl | $(build_sysc
 	@echo Creating usr/etc/julia/startup.jl
 	@cp $< $@
 
-$(build_datarootdir)/julia/julia-config.jl: $(JULIAHOME)/contrib/julia-config.jl | $(build_datarootdir)/julia
+$(build_datarootdir)/julia/%: $(JULIAHOME)/contrib/% | $(build_datarootdir)/julia
+	mkdir -p $(dir $@)
 	$(INSTALL_M) $< $(dir $@)
 
 $(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(build_depsbindir)
 	@$(call PRINT_CC, $(HOSTCC) -o $(build_depsbindir)/stringreplace $(JULIAHOME)/contrib/stringreplace.c)
 
+.PHONY: julia-base-cache
 julia-base-cache: julia-sysimg-$(JULIA_BUILD_MODE) | $(DIRS) $(build_datarootdir)/julia
-	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
-		$(call spawn, $(JULIA_EXECUTABLE) --startup-file=no $(call cygpath_w,$(JULIAHOME)/etc/write_base_cache.jl) \
+	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) JULIA_FALLBACK_REPL=1 WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
+		$(call spawn, $(JULIA_EXECUTABLE) --startup-file=no $(call cygpath_w,$(JULIAHOME)/contrib/write_base_cache.jl) \
 		$(call cygpath_w,$(build_datarootdir)/julia/base.cache))
 
 # public libraries, that are installed in $(prefix)/lib
@@ -189,14 +236,22 @@ JL_TARGETS := julia-debug
 endif
 
 # private libraries, that are installed in $(prefix)/lib/julia
-JL_PRIVATE_LIBS-0 := libccalltest libllvmcalltest
+JL_PRIVATE_LIBS-0 := libccalltest libccalllazyfoo libccalllazybar libllvmcalltest
+JL_PRIVATE_LIBS-1 := # libraries from USE_SYSTEM=1
+JL_PRIVATE_EXES := 7z
+JL_PRIVATE_TOOLS :=
 ifeq ($(JULIA_BUILD_MODE),release)
 JL_PRIVATE_LIBS-0 += libjulia-internal libjulia-codegen
 else ifeq ($(JULIA_BUILD_MODE),debug)
 JL_PRIVATE_LIBS-0 += libjulia-internal-debug libjulia-codegen-debug
 endif
+# BSD-3-Clause
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libcamd libccolamd libcolamd libsuitesparseconfig
+# LGPL-2.1+
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libbtf libklu libldl
 ifeq ($(USE_GPL_LIBS), 1)
-JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libbtf libcamd libccolamd libcholmod libcolamd libklu libldl librbio libspqr libsuitesparseconfig libumfpack
+# GPL-2.0+
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libcholmod librbio libspqr libumfpack
 endif
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBBLASTRAMPOLINE) += libblastrampoline
 JL_PRIVATE_LIBS-$(USE_SYSTEM_PCRE) += libpcre2-8
@@ -205,7 +260,7 @@ JL_PRIVATE_LIBS-$(USE_SYSTEM_GMP) += libgmp libgmpxx
 JL_PRIVATE_LIBS-$(USE_SYSTEM_MPFR) += libmpfr
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSSH2) += libssh2
 JL_PRIVATE_LIBS-$(USE_SYSTEM_NGHTTP2) += libnghttp2
-JL_PRIVATE_LIBS-$(USE_SYSTEM_MBEDTLS) += libmbedtls libmbedcrypto libmbedx509
+JL_PRIVATE_LIBS-$(USE_SYSTEM_OPENSSL) += libcrypto libssl
 JL_PRIVATE_LIBS-$(USE_SYSTEM_CURL) += libcurl
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBGIT2) += libgit2
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBUV) += libuv
@@ -214,9 +269,12 @@ JL_PRIVATE_LIBS-$(USE_SYSTEM_ZLIB) += zlib
 else
 JL_PRIVATE_LIBS-$(USE_SYSTEM_ZLIB) += libz
 endif
+JL_PRIVATE_LIBS-$(USE_SYSTEM_ZLIB) += libzstd
+JL_PRIVATE_EXES += zstd$(EXE) zstdmt$(EXE)
 ifeq ($(USE_LLVM_SHLIB),1)
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LLVM) += libLLVM $(LLVM_SHARED_LIB_NAME)
 endif
+JL_PRIVATE_TOOLS += lld$(EXE) dsymutil$(EXE)
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBUNWIND) += libunwind
 
 ifeq ($(USE_SYSTEM_LIBM),0)
@@ -239,9 +297,9 @@ JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libpthread
 endif
 ifeq ($(SANITIZE),1)
 ifeq ($(USECLANG),1)
-JL_PRIVATE_LIBS-1 += libclang_rt.asan
+JL_PRIVATE_LIBS-0 += libclang_rt.asan-*
 else
-JL_PRIVATE_LIBS-1 += libasan
+JL_PRIVATE_LIBS-0 += libasan
 endif
 endif
 
@@ -258,14 +316,37 @@ endif
 endif
 endif
 
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+# Make sure we use the right version of $MMTK_PLAN, $MMTK_MOVING and $MMTK_BUILD
+# if we use the BinaryBuilder version of mmtk-julia
+ifeq ($(USE_BINARYBUILDER_MMTK_JULIA),1)
+ifeq (${MMTK_PLAN},Immix)
+LIB_PATH_PLAN = immix
+else ifeq (${MMTK_PLAN},StickyImmix)
+LIB_PATH_PLAN = sticky
+endif
+
+ifeq ($(MMTK_MOVING), 1)
+LIB_PATH_MOVING := moving
+else
+LIB_PATH_MOVING := non_moving
+endif
+
+JL_PRIVATE_LIBS-0 += $(LIB_PATH_PLAN)/$(LIB_PATH_MOVING)/$(MMTK_BUILD)/libmmtk_julia
+else
+JL_PRIVATE_LIBS-0 += libmmtk_julia
+endif
+endif
+
 # Note that we disable MSYS2's path munging here, as otherwise
 # it replaces our `:`-separated list as a `;`-separated one.
 define stringreplace
-	MSYS2_ARG_CONV_EXCL='*' $(build_depsbindir)/stringreplace $$(strings -t x - '$1' | grep "$2" | awk '{print $$1;}') "$3" 255 "$(call cygpath_w,$1)"
+	MSYS2_ARG_CONV_EXCL='*' $(build_depsbindir)/stringreplace $$(strings -t x - '$1' | grep '$2' | awk '{print $$1;}') '$3' 255 '$(call cygpath_w,$1)'
 endef
 
 
-install: $(build_depsbindir)/stringreplace docs
+.PHONY: install
+install: $(build_depsbindir)/stringreplace $(BUILDROOT)/doc/_build/html/en/index.html
 	@$(MAKE) $(QUIET_MAKE) $(JULIA_BUILD_MODE)
 	@for subdir in $(bindir) $(datarootdir)/julia/stdlib/$(VERSDIR) $(docdir) $(man1dir) $(includedir)/julia $(libdir) $(private_libdir) $(sysconfdir) $(private_libexecdir); do \
 		mkdir -p $(DESTDIR)$$subdir; \
@@ -273,39 +354,33 @@ install: $(build_depsbindir)/stringreplace docs
 
 	$(INSTALL_M) $(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE)) $(DESTDIR)$(bindir)/
 ifeq ($(OS),WINNT)
-	-$(INSTALL_M) $(wildcard $(build_bindir)/*.dll) $(DESTDIR)$(bindir)/
+	$(INSTALL_M) $(wildcard $(build_bindir)/*.dll) $(DESTDIR)$(bindir)/
 ifeq ($(JULIA_BUILD_MODE),release)
-	-$(INSTALL_M) $(build_libdir)/libjulia.dll.a $(DESTDIR)$(libdir)/
-	-$(INSTALL_M) $(build_libdir)/libjulia-internal.dll.a $(DESTDIR)$(libdir)/
+	$(INSTALL_M) $(build_libdir)/libjulia.dll.a $(DESTDIR)$(libdir)/
+	$(INSTALL_M) $(build_libdir)/libjulia-internal.dll.a $(DESTDIR)$(libdir)/
 else ifeq ($(JULIA_BUILD_MODE),debug)
-	-$(INSTALL_M) $(build_libdir)/libjulia-debug.dll.a $(DESTDIR)$(libdir)/
-	-$(INSTALL_M) $(build_libdir)/libjulia-internal-debug.dll.a $(DESTDIR)$(libdir)/
-endif
-
-	# We have a single exception; we want 7z.dll to live in private_libexecdir, not bindir, so that 7z.exe can find it.
-	-mv $(DESTDIR)$(bindir)/7z.dll $(DESTDIR)$(private_libexecdir)/
-	-$(INSTALL_M) $(build_bindir)/libopenlibm.dll.a $(DESTDIR)$(libdir)/
-	-$(INSTALL_M) $(build_libdir)/libssp.dll.a $(DESTDIR)$(libdir)/
-	# The rest are compiler dependencies, as an example memcpy is exported by msvcrt
-	# These are files from mingw32 and required for creating shared libraries like our caches.
-	-$(INSTALL_M) $(build_libdir)/libgcc_s.a $(DESTDIR)$(libdir)/
-	-$(INSTALL_M) $(build_libdir)/libgcc.a $(DESTDIR)$(libdir)/
-	-$(INSTALL_M) $(build_libdir)/libmsvcrt.a $(DESTDIR)$(libdir)/
+	$(INSTALL_M) $(build_libdir)/libjulia-debug.dll.a $(DESTDIR)$(libdir)/
+	$(INSTALL_M) $(build_libdir)/libjulia-internal-debug.dll.a $(DESTDIR)$(libdir)/
+endif
+	$(INSTALL_M) $(filter-out %-bc.a %-o.a,$(wildcard $(build_private_libdir)/lib*.a)) $(DESTDIR)$(private_libdir)/
+
+	$(INSTALL_M) $(build_bindir)/libopenlibm.dll.a $(DESTDIR)$(libdir)/
+	$(INSTALL_M) $(build_libdir)/libssp.dll.a $(DESTDIR)$(libdir)/
 else
 
 # Copy over .dSYM directories directly for Darwin
 ifneq ($(DARWIN_FRAMEWORK),1)
 ifeq ($(OS),Darwin)
 ifeq ($(JULIA_BUILD_MODE),release)
-	-cp -a $(build_libdir)/libjulia.*.dSYM $(DESTDIR)$(libdir)
-	-cp -a $(build_libdir)/libjulia-internal.*.dSYM $(DESTDIR)$(private_libdir)
-	-cp -a $(build_libdir)/libjulia-codegen.*.dSYM $(DESTDIR)$(private_libdir)
-	-cp -a $(build_private_libdir)/sys.dylib.dSYM $(DESTDIR)$(private_libdir)
+	cp -a $(build_libdir)/libjulia.*.dSYM $(DESTDIR)$(libdir)
+	cp -a $(build_libdir)/libjulia-internal.*.dSYM $(DESTDIR)$(private_libdir)
+	cp -a $(build_libdir)/libjulia-codegen.*.dSYM $(DESTDIR)$(private_libdir)
+	cp -a $(build_private_libdir)/sys.dylib.dSYM $(DESTDIR)$(private_libdir)
 else ifeq ($(JULIA_BUILD_MODE),debug)
-	-cp -a $(build_libdir)/libjulia-debug.*.dSYM $(DESTDIR)$(libdir)
-	-cp -a $(build_libdir)/libjulia-internal-debug.*.dSYM $(DESTDIR)$(private_libdir)
-	-cp -a $(build_libdir)/libjulia-codegen-debug.*.dSYM $(DESTDIR)$(private_libdir)
-	-cp -a $(build_private_libdir)/sys-debug.dylib.dSYM $(DESTDIR)$(private_libdir)
+	cp -a $(build_libdir)/libjulia-debug.*.dSYM $(DESTDIR)$(libdir)
+	cp -a $(build_libdir)/libjulia-internal-debug.*.dSYM $(DESTDIR)$(private_libdir)
+	cp -a $(build_libdir)/libjulia-codegen-debug.*.dSYM $(DESTDIR)$(private_libdir)
+	cp -a $(build_private_libdir)/sys-debug.dylib.dSYM $(DESTDIR)$(private_libdir)
 endif
 endif
 
@@ -313,7 +388,7 @@ endif
 	for suffix in $(JL_TARGETS) ; do \
 		for lib in $(build_libdir)/lib$${suffix}.*$(SHLIB_EXT)*; do \
 			if [ "$${lib##*.}" != "dSYM" ]; then \
-				$(INSTALL_M) $$lib $(DESTDIR)$(libdir) ; \
+				$(INSTALL_M) $$lib $(DESTDIR)$(libdir) || exit 1; \
 			fi \
 		done \
 	done
@@ -333,26 +408,24 @@ endif
 	for suffix in $(JL_PRIVATE_LIBS-0) ; do \
 		for lib in $(build_libdir)/$${suffix}.*$(SHLIB_EXT)*; do \
 			if [ "$${lib##*.}" != "dSYM" ]; then \
-				$(INSTALL_M) $$lib $(DESTDIR)$(private_libdir) ; \
+				$(INSTALL_M) $$lib $(DESTDIR)$(private_libdir) || exit 1; \
 			fi \
 		done \
 	done
 	for suffix in $(JL_PRIVATE_LIBS-1) ; do \
 		for lib in $(build_private_libdir)/$${suffix}.$(SHLIB_EXT)*; do \
 			if [ "$${lib##*.}" != "dSYM" ]; then \
-				$(INSTALL_M) $$lib $(DESTDIR)$(private_libdir) ; \
+				$(INSTALL_M) $$lib $(DESTDIR)$(private_libdir) || exit 1; \
 			fi \
 		done \
 	done
 endif
-	# Install `7z` into private_libexecdir
-	$(INSTALL_M) $(build_bindir)/7z$(EXE) $(DESTDIR)$(private_libexecdir)/
-
-	# Install `lld` into private_libexecdir
-	$(INSTALL_M) $(build_depsbindir)/lld$(EXE) $(DESTDIR)$(private_libexecdir)/
-
-	# Install `dsymutil` into private_libexecdir/
-	$(INSTALL_M) $(build_depsbindir)/dsymutil$(EXE) $(DESTDIR)$(private_libexecdir)/
+	for exe in $(JL_PRIVATE_EXES) ; do \
+		$(INSTALL_M) $(build_private_libexecdir)/$$exe $(DESTDIR)$(private_libexecdir) || exit 1; \
+	done
+	for exe in $(JL_PRIVATE_TOOLS) ; do \
+		$(INSTALL_M) $(build_depsbindir)/$$exe $(DESTDIR)$(private_libexecdir) || exit 1; \
+	done
 
 	# Copy public headers
 	cp -R -L $(build_includedir)/julia/* $(DESTDIR)$(includedir)/julia
@@ -368,6 +441,11 @@ endif
 	cp -R -L $(JULIAHOME)/base/* $(DESTDIR)$(datarootdir)/julia/base
 	cp -R -L $(JULIAHOME)/test/* $(DESTDIR)$(datarootdir)/julia/test
 	cp -R -L $(build_datarootdir)/julia/* $(DESTDIR)$(datarootdir)/julia
+
+	# Set .jl sources as read-only to match package directories
+	find $(DESTDIR)$(datarootdir)/julia/base -type f -name \*.jl -exec chmod 0444 '{}' \;
+	find $(DESTDIR)$(datarootdir)/julia/test -type f -name \*.jl -exec chmod 0444 '{}' \;
+
 	# Copy documentation
 	cp -R -L $(BUILDROOT)/doc/_build/html $(DESTDIR)$(docdir)/
 	# Remove various files which should not be installed
@@ -387,21 +465,25 @@ endif
 	mkdir -p $(DESTDIR)$(datarootdir)/applications/
 	$(INSTALL_F) $(JULIAHOME)/contrib/julia.desktop $(DESTDIR)$(datarootdir)/applications/
 	# Install appdata file
-	mkdir -p $(DESTDIR)$(datarootdir)/appdata/
-	$(INSTALL_F) $(JULIAHOME)/contrib/julia.appdata.xml $(DESTDIR)$(datarootdir)/appdata/
+	mkdir -p $(DESTDIR)$(datarootdir)/metainfo/
+	$(INSTALL_F) $(JULIAHOME)/contrib/julia.appdata.xml $(DESTDIR)$(datarootdir)/metainfo/
+	# Install terminal info database
+ifneq ($(WITH_TERMINFO),0)
+	cp -R -L $(build_datarootdir)/julia/terminfo $(DESTDIR)$(datarootdir)/julia/
+endif
 
 	# Update RPATH entries and JL_SYSTEM_IMAGE_PATH if $(private_libdir_rel) != $(build_private_libdir_rel)
 ifneq ($(private_libdir_rel),$(build_private_libdir_rel))
 ifeq ($(OS), Darwin)
 ifneq ($(DARWIN_FRAMEWORK),1)
 	for j in $(JL_TARGETS) ; do \
-		install_name_tool -rpath @executable_path/$(build_private_libdir_rel) @executable_path/$(private_libdir_rel) $(DESTDIR)$(bindir)/$$j; \
-		install_name_tool -add_rpath @executable_path/$(build_libdir_rel) @executable_path/$(libdir_rel) $(DESTDIR)$(bindir)/$$j; \
+		install_name_tool -rpath @executable_path/$(build_private_libdir_rel) @executable_path/$(private_libdir_rel) $(DESTDIR)$(bindir)/$$j || exit 1; \
+		install_name_tool -rpath @executable_path/$(build_libdir_rel) @executable_path/$(libdir_rel) $(DESTDIR)$(bindir)/$$j || exit 1; \
 	done
 endif
 else ifneq (,$(findstring $(OS),Linux FreeBSD))
 	for j in $(JL_TARGETS) ; do \
-		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN/$(private_libdir_rel):$$ORIGIN/$(libdir_rel)' $(DESTDIR)$(bindir)/$$j; \
+		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN/$(private_libdir_rel):$$ORIGIN/$(libdir_rel)' $(DESTDIR)$(bindir)/$$j || exit 1; \
 	done
 endif
 
@@ -424,11 +506,11 @@ endif
 ifeq ($(OS), Darwin)
 ifneq ($(DARWIN_FRAMEWORK),1)
 ifeq ($(JULIA_BUILD_MODE),release)
-	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT)
-	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-codegen.$(SHLIB_EXT)
+	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel) $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT)
+	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel) $(DESTDIR)$(private_libdir)/libjulia-codegen.$(SHLIB_EXT)
 else ifeq ($(JULIA_BUILD_MODE),debug)
-	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT)
-	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-codegen-debug.$(SHLIB_EXT)
+	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel) $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT)
+	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel) $(DESTDIR)$(private_libdir)/libjulia-codegen-debug.$(SHLIB_EXT)
 endif
 endif
 else ifneq (,$(findstring $(OS),Linux FreeBSD))
@@ -439,13 +521,43 @@ else ifeq ($(JULIA_BUILD_MODE),debug)
 	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT)
 	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-codegen-debug.$(SHLIB_EXT)
 endif
+endif
+
+ifeq ($(OS), Darwin)
+ifneq ($(DARWIN_FRAMEWORK),1)
+	for j in $(JL_PRIVATE_TOOLS) ; do \
+		[ -L $(DESTDIR)$(private_libexecdir)/$$j ] && continue; \
+		install_name_tool -rpath @loader_path/$(build_libdir_rel) @executable_path/$(reverse_private_libexecdir_rel) $(DESTDIR)$(private_libexecdir)/$$j || exit 1; \
+	done
+endif
+else ifneq (,$(findstring $(OS),Linux FreeBSD))
+	for j in $(JL_PRIVATE_TOOLS) ; do \
+		[ -L $(DESTDIR)$(private_libexecdir)/$$j ] && continue; \
+		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN/$(reverse_private_libexecdir_rel)' $(DESTDIR)$(private_libexecdir)/$$j || exit 1; \
+	done
+endif
+
+ifneq ($(reverse_private_libexecdir_rel),$(reverse_build_private_libexecdir_rel))
+ifeq ($(OS), Darwin)
+ifneq ($(DARWIN_FRAMEWORK),1)
+	for j in $(JL_PRIVATE_EXES) ; do \
+		[ $$j = 7z ] && continue; \
+		[ -L $(DESTDIR)$(private_libexecdir)/$$j ] && continue; \
+		install_name_tool -rpath @executable_path/$(reverse_build_private_libexecdir_rel) @executable_path/$(reverse_private_libexecdir_rel) $(DESTDIR)$(private_libexecdir)/$$j || exit 1; \
+	done
+endif
+else ifneq (,$(findstring $(OS),Linux FreeBSD))
+	for j in $(JL_PRIVATE_EXES) ; do \
+		[ -L $(DESTDIR)$(private_libexecdir)/$$j ] && continue; \
+		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN/$(reverse_private_libexecdir_rel)' $(DESTDIR)$(private_libexecdir)/$$j || exit 1; \
+	done
+endif
 endif
 
 	# Fix rpaths for dependencies. This should be fixed in BinaryBuilder later.
 ifeq ($(OS), Linux)
-	-$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $(DESTDIR)$(private_shlibdir)/libLLVM.$(SHLIB_EXT)
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $(DESTDIR)$(private_shlibdir)/libLLVM.$(SHLIB_EXT)
 endif
-
 ifneq ($(LOADER_BUILD_DEP_LIBS),$(LOADER_INSTALL_DEP_LIBS))
 	# Next, overwrite relative path to libjulia-internal in our loader if $$(LOADER_BUILD_DEP_LIBS) != $$(LOADER_INSTALL_DEP_LIBS)
 ifeq ($(JULIA_BUILD_MODE),release)
@@ -465,7 +577,7 @@ ifeq ($(OS),FreeBSD)
 	# don't set libgfortran's RPATH, it won't be able to find its friends on systems
 	# that don't have the exact GCC port installed used for the build.
 	for lib in $(DESTDIR)$(private_libdir)/libgfortran*$(SHLIB_EXT)*; do \
-		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $$lib; \
+		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $$lib || exit 1; \
 	done
 endif
 
@@ -479,6 +591,7 @@ endif
 distclean:
 	-rm -fr $(BUILDROOT)/julia-*.tar.gz $(BUILDROOT)/julia*.exe $(BUILDROOT)/julia-$(JULIA_COMMIT)
 
+.PHONY: binary-dist
 binary-dist: distclean
 ifeq ($(USE_SYSTEM_BLAS),0)
 ifeq ($(ISX86),1)
@@ -509,7 +622,7 @@ endif
 ifeq ($(OS), WINNT)
 	cd $(BUILDROOT)/julia-$(JULIA_COMMIT)/bin && rm -f llvm* llc.exe lli.exe opt.exe LTO.dll bugpoint.exe macho-dump.exe
 endif
-	cd $(BUILDROOT) && $(TAR) zcvf $(JULIA_BINARYDIST_FILENAME).tar.gz julia-$(JULIA_COMMIT)
+	cd $(BUILDROOT) && $(TAR) -zcvf $(JULIA_BINARYDIST_FILENAME).tar.gz julia-$(JULIA_COMMIT)
 
 
 exe:
@@ -525,7 +638,8 @@ app:
 darwinframework:
 	$(MAKE) -C $(JULIAHOME)/contrib/mac/framework
 
-light-source-dist.tmp: docs
+.PHONY: light-source-dist.tmp
+light-source-dist.tmp: $(BUILDROOT)/doc/_build/html/en/index.html
 ifneq ($(BUILDROOT),$(JULIAHOME))
 	$(error make light-source-dist does not work in out-of-tree builds)
 endif
@@ -547,6 +661,7 @@ endif
 	find doc/_build/html >> light-source-dist.tmp
 
 # Make tarball with only Julia code + stdlib tarballs
+.PHONY: light-source-dist
 light-source-dist: light-source-dist.tmp
 	# Prefix everything with "julia-$(commit-sha)/" or "julia-$(version)/" and then create tarball
 	# To achieve prefixing, we temporarily create a symlink in the source directory that points back
@@ -556,10 +671,12 @@ light-source-dist: light-source-dist.tmp
 	tar -cz --no-recursion -T light-source-dist.tmp1 -f julia-$(JULIA_VERSION)_$(JULIA_COMMIT).tar.gz
 	rm julia-${JULIA_COMMIT}
 
+.PHONY: source-dist
 source-dist:
 	@echo \'source-dist\' target is deprecated: use \'full-source-dist\' instead.
 
 # Make tarball with Julia code plus all dependencies
+.PHONY: full-source-dist
 full-source-dist: light-source-dist.tmp
 	# Get all the dependencies downloaded
 	@$(MAKE) -C deps getall DEPS_GIT=0 USE_BINARYBUILDER=0
@@ -576,6 +693,7 @@ full-source-dist: light-source-dist.tmp
 	tar -cz --no-recursion -T full-source-dist.tmp1 -f julia-$(JULIA_VERSION)_$(JULIA_COMMIT)-full.tar.gz
 	rm julia-${JULIA_COMMIT}
 
+.PHONY: clean
 clean: | $(CLEAN_TARGETS)
 	@-$(MAKE) -C $(BUILDROOT)/base clean
 	@-$(MAKE) -C $(BUILDROOT)/doc clean
@@ -583,6 +701,7 @@ clean: | $(CLEAN_TARGETS)
 	@-$(MAKE) -C $(BUILDROOT)/cli clean
 	@-$(MAKE) -C $(BUILDROOT)/test clean
 	@-$(MAKE) -C $(BUILDROOT)/stdlib clean
+	@-$(MAKE) -C $(BUILDROOT) -f pkgimage.mk clean
 	-rm -f $(BUILDROOT)/julia
 	-rm -f $(BUILDROOT)/*.tar.gz
 	-rm -f $(build_depsbindir)/stringreplace \
@@ -592,49 +711,53 @@ clean: | $(CLEAN_TARGETS)
 # Teporarily add this line to the Makefile to remove extras
 	-rm -fr $(build_datarootdir)/julia/extras
 
+.PHONY: cleanall
 cleanall: clean
 	@-$(MAKE) -C $(BUILDROOT)/src clean-flisp clean-support
-	@-$(MAKE) -C $(BUILDROOT)/deps clean-libuv
+	@-$(MAKE) -C $(BUILDROOT)/deps clean-libuv clean-utf8proc
 	-rm -fr $(build_prefix) $(build_staging)
 
+.PHONY: distcleanall
 distcleanall: cleanall
 	@-$(MAKE) -C $(BUILDROOT)/stdlib distclean
 	@-$(MAKE) -C $(BUILDROOT)/deps distcleanall
 	@-$(MAKE) -C $(BUILDROOT)/doc cleanall
 
 .FORCE:
-.PHONY: .FORCE default debug release check-whitespace release-candidate \
-	julia-debug julia-release julia-stdlib julia-deps julia-deps-libs \
-	julia-cli-release julia-cli-debug julia-src-release julia-src-debug \
-	julia-symlink julia-base julia-sysimg julia-sysimg-ji julia-sysimg-release julia-sysimg-debug \
-	test testall testall1 test \
-	clean distcleanall cleanall $(CLEAN_TARGETS) \
-	run-julia run-julia-debug run-julia-release run \
-	install binary-dist light-source-dist.tmp light-source-dist \
-	dist full-source-dist source-dist
+.PHONY: .FORCE
+
+.PHONY: $(CLEAN_TARGETS)
+
+# Generate compilation database (leverages existing clang tooling setup)
+.PHONY: compile-database
+compile-database:
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src compile-database
 
+.PHONY: test
 test: check-whitespace $(JULIA_BUILD_MODE)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test default JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
+.PHONY: testall
 testall: check-whitespace $(JULIA_BUILD_MODE)
 	cp $(JULIA_SYSIMG) $(BUILDROOT)/local.$(SHLIB_EXT)
 	$(call spawn,$(JULIA_EXECUTABLE) -J $(call cygpath_w,$(BUILDROOT)/local.$(SHLIB_EXT)) -e 'true')
 	rm $(BUILDROOT)/local.$(SHLIB_EXT)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
+.PHONY: testall1
 testall1: check-whitespace $(JULIA_BUILD_MODE)
 	@env JULIA_CPU_THREADS=1 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
 test-%: check-whitespace $(JULIA_BUILD_MODE) .FORCE
 	@([ $$(( $$(date +%s) - $$(date -r $(build_private_libdir)/sys.$(SHLIB_EXT) +%s) )) -le 100 ] && \
-		printf '\033[93m    HINT The system image was recently rebuilt. Are you aware of the test-revise-* targets? See CONTRIBUTING.md. \033[0m\n') || true
+		printf '$(WARNCOLOR)    HINT The system image was recently rebuilt. Are you aware of the test-revise-* targets? See CONTRIBUTING.md. $(ENDCOLOR)\n') || true
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test $* JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
 test-revise-%: .FORCE
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test revise-$* JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
 # download target for some hardcoded windows dependencies
-.PHONY: win-extras wine_path
+.PHONY: win-extras
 win-extras:
 	@$(MAKE) -C $(BUILDROOT)/deps install-p7zip
 	mkdir -p $(JULIAHOME)/dist-extras
@@ -647,7 +770,7 @@ win-extras:
 ifeq ($(USE_SYSTEM_LLVM), 1)
 LLVM_SIZE := llvm-size$(EXE)
 else
-LLVM_SIZE := $(build_depsbindir)/llvm-size$(EXE)
+LLVM_SIZE := PATH=$(build_bindir):$$PATH; $(build_depsbindir)/llvm-size$(EXE)
 endif
 build-stats:
 ifeq ($(USE_BINARYBUILDER_LLVM),1)
@@ -656,7 +779,14 @@ endif
 	@printf $(JULCOLOR)' ==> ./julia binary sizes\n'$(ENDCOLOR)
 	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_private_libdir)/sys.$(SHLIB_EXT)) \
 		$(call cygpath_w,$(build_shlibdir)/libjulia.$(SHLIB_EXT)) \
+		$(call cygpath_w,$(build_shlibdir)/libjulia-internal.$(SHLIB_EXT)) \
+		$(call cygpath_w,$(build_shlibdir)/libjulia-codegen.$(SHLIB_EXT)) \
 		$(call cygpath_w,$(build_bindir)/julia$(EXE)))
+ifeq ($(OS),Darwin)
+	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_shlibdir)/libLLVM.$(SHLIB_EXT)))
+else
+	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_shlibdir)/$(LLVM_SHARED_LIB_NAME).$(SHLIB_EXT)))
+endif
 	@printf $(JULCOLOR)' ==> ./julia launch speedtest\n'$(ENDCOLOR)
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
diff --git a/NEWS.md b/NEWS.md
index d60008e2b3831..1818967e8ffb5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,11 +1,30 @@
-Julia v1.11 Release Notes
+Julia v1.13 Release Notes
 ========================
 
 New language features
 ---------------------
 
+  - New `Base.@acquire` macro for a non-closure version of `Base.acquire(f, s::Base.Semaphore)`, like `@lock`. ([#56845])
+  - New `nth` function to access the `n`-th element of a generic iterable. ([#56580])
+  - New `@__FUNCTION__` macro to refer to the innermost enclosing function. ([#58940])
+  - The character U+1F8B2 🢲 (RIGHTWARDS ARROW WITH LOWER HOOK), newly added by Unicode 16,
+    is now a valid operator with arrow precedence, accessible as `\hookunderrightarrow` at the REPL.
+    ([JuliaLang/JuliaSyntax.jl#525], [#57143])
+  - Support for Unicode 17 ([#59534]).
+  - It is now possible to control which version of the Julia syntax will be used to parse a package by setting the
+    `compat.julia` or `syntax.julia_version` key in Project.toml. This feature is similar to the notion of "editions"
+    in other language ecosystems and will allow non-breaking evolution of Julia syntax in future versions.
+    See the "Syntax Versioning" section in the code loading documentation ([#60018]).
+
 Language changes
 ----------------
+* `mod(x::AbstractFloat, -Inf)` now returns `x` (as long as `x` is finite), this aligns with C standard and is considered a bug fix ([#47102])
+
+* The `hash` algorithm and its values have changed for certain types, most notably AbstractString. Any `hash` specializations for equal types to those that changed, such as some third-party string packages, may need to be deleted. ([#57509], [#59691])
+
+* The `hash(::AbstractString)` function is now a zero-copy / zero-cost function, based upon providing a correct implementation of the `codeunit` and `iterate` functions. Third-party string packages should migrate to the new algorithm by deleting their existing overrides of the `hash` function. ([#59691])
+
+* Indexless `getindex` and `setindex!` (i.e. `A[]`) on `ReinterpretArray` now correctly throw a `BoundsError` when there is more than one element. ([#58814])
 
 Compiler/Runtime improvements
 -----------------------------
@@ -13,66 +32,115 @@ Compiler/Runtime improvements
 Command-line option changes
 ---------------------------
 
+* The option `--sysimage-native-code=no` has been deprecated.
+* The `JULIA_CPU_TARGET` environment variable now supports a `sysimage` keyword to match (or extend) the CPU target used to build the current system image ([#58970]).
+* The `--code-coverage=all` option now automatically throws away sysimage caches so that code coverage can be accurately measured on methods within the sysimage. It is thrown away after startup (and after startup.jl), before any user code is executed ([#59234])
+* New `--trace-eval` command-line option to show expressions being evaluated during top-level evaluation. Supports `--trace-eval=loc` or just `--trace-eval` (show location only), `--trace-eval=full` (show full expressions), and `--trace-eval=no` (disable tracing). Also adds `Base.TRACE_EVAL` global control that takes priority over the command-line option and can be set to `:no`, `:loc`, `:full`, or `nothing` (to use command-line setting). ([#57137])
+* Julia now automatically enables verbose debugging options (`--trace-eval` and `JULIA_TEST_VERBOSE`) when CI debugging has been triggered. i.e. via the "debug logging" UI toggle is enabled on github actions re-runs. Other platforms are supported too ([#59551])
+
 Multi-threading changes
 -----------------------
 
+* A new `AbstractSpinLock` is defined with `SpinLock <: AbstractSpinLock` ([#55944]).
+* A new `PaddedSpinLock <: AbstractSpinLock` is defined.  It has extra padding to avoid false sharing ([#55944]).
+* New types are defined to handle the pattern of code that must run once per process, called
+  a `OncePerProcess{T}` type, which allows defining a function that should be run exactly once
+  the first time it is called, and then always return the same result value of type `T`
+  every subsequent time afterwards. There are also `OncePerThread{T}` and `OncePerTask{T}` types for
+  similar usage with threads or tasks. ([#TBD])
+
 Build system changes
 --------------------
 
 New library functions
 ---------------------
-* `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]).
+
 * `unzip(itr)` is now provided, essentially as an inverse of `zip` ([#33515]).
+* `ispositive(::Real)` and `isnegative(::Real)` are provided for performance and convenience ([#53677]).
+* The `Test` module now supports the `JULIA_TEST_VERBOSE` environment variable. When set to `true`,
+  it enables verbose testset entry/exit messages with timing information and sets the default `verbose=true`
+  for `DefaultTestSet` to show detailed hierarchical test summaries ([#59295]).
+* Exporting function `fieldindex` to get the index of a struct's field ([#58119]).
+* `Base.donotdelete` is now public. It prevents deadcode elimination of its arguments ([#55774]).
+* `Sys.sysimage_target()` returns the CPU target string used to build the current system image ([#58970]).
+* `Iterators.findeach` is a lazy version of `findall` ([#54124]).
 
 New library features
 --------------------
-* `replace(string, pattern...)` now supports an optional `IO` argument to
-  write the output to a stream rather than returning a string ([#48625]).
+
+* `fieldoffset` now also accepts the field name as a symbol as `fieldtype` already did ([#58100]).
+* `sort(keys(::Dict))` and `sort(values(::Dict))` now automatically collect, they previously threw ([#56978]).
+* `Base.AbstractOneTo` is added as a supertype of one-based axes, with `Base.OneTo` as its subtype ([#56902]).
+* `takestring!(::IOBuffer)` removes the content from the buffer, returning the content as a `String`.
+* `chopprefix` and `chopsuffix` can now also accept an `AbstractChar` as the prefix/suffix to remove.
+* The `macroexpand` (with default true) and the new `macroexpand!` (with default false)
+  functions now support a `legacyscope` boolean keyword argument to control whether to run
+  the legacy scope resolution pass over the result. The legacy scope resolution code has
+  known design bugs and will be disabled by default in a future version. Users should
+  migrate now by calling `legacyscope=false` or using `macroexpand!`. This may often require
+  fixes to the code calling `macroexpand` with `Meta.unescape` and `Meta.reescape` or by
+  updating tests to expect `hygienic-scope` or `escape` markers might appear in the result.
+* `Base.ScopedValues.LazyScopedValue{T}` is introduced for scoped values that compute their default using a
+  `OncePerProcess{T}` callback, allowing for lazy initialization of the default value. `AbstractScopedValue` is
+  now the abstract base type for both `ScopedValue` and `LazyScopedValue`. ([#59372])
+* New `Base.active_manifest()` function to return the path of the active manifest, like `Base.active_project()`.
+  Also can return the manifest that would be used for a given project file ([#57937])
 
 Standard library changes
 ------------------------
 
-#### Package Manager
+* `codepoint(c)` now succeeds for overlong encodings.  `Base.ismalformed`, `Base.isoverlong`, and
+  `Base.show_invalid` are now `public` and documented (but not exported) ([#55152]).
 
-#### LinearAlgebra
+#### JuliaSyntaxHighlighting
 
-#### Printf
+#### LinearAlgebra
 
 #### Profile
 
 #### Random
 
-#### REPL
+* `randperm!` and `randcycle!` now support non-`Array` `AbstractArray` inputs, assuming they are mutable and their indices are one-based ([#58596]).
 
-#### SuiteSparse
+* `shuffle` now may take an argument of `NTuple` value ([#56906]).
 
+#### REPL
 
-#### SparseArrays
+* The Julia REPL now support bracketed paste on Windows which should significantly speed up pasting large code blocks into the REPL ([#59825])
+* The REPL now provides syntax highlighting for input as you type. See the REPL docs for more info about customization.
+* The REPL now supports automatic insertion of closing brackets, parentheses, and quotes. See the REPL docs for more info about customization.
+* History searching has been rewritten to use a new interactive modal dialogue, using a fzf-like style.
+* The display of `AbstractChar`s in the main REPL mode now includes LaTeX input information like what is shown in help mode ([#58181]).
+* Display of repeated frames and cycles in stack traces has been improved by bracketing them in the trace and treating them consistently ([#55841]).
+* The superscript character U+107A5 𐞥 (MODIFIER LETTER SMALL Q), which was already supported in the language, can now be accessed at the REPL with `\^q` ([#59544]).
 
 #### Test
 
-#### Dates
-
-
-#### Distributed
-
-#### Unicode
-
-
-#### DelimitedFiles
-
+* Test failures when using the `@test` macro now show evaluated arguments for all function calls ([#57825], [#57839]).
+* Transparent test sets (`@testset let`) now show context when tests error ([#58727]).
+* `@test_throws` now supports a three-argument form `@test_throws ExceptionType pattern expr` to test both exception type and message pattern in one call ([#59117]).
+* The testset stack was changed to use `ScopedValue` rather than task local storage ([#53462]).
 
 #### InteractiveUtils
 
-Deprecated or removed
----------------------
+* Introspection utilities such as `@code_typed`, `@which` and `@edit` now accept type annotations as substitutes for values, recognizing forms such as `f(1, ::Float64, 3)` or even `sum(::Vector{T}; init = ::T) where {T<:Real}`. Type-annotated variables as in `f(val::Int; kw::Float64)` are not evaluated if the type annotation provides the necessary information, making this syntax compatible with signatures found in stacktraces ([#57909], [#58222]).
+* Code introspection macros such as `@code_lowered` and `@code_typed` now have a much better support for broadcasting expressions, including broadcasting assignments of the form `x .+= f(y)` ([#58349]).
 
+#### Dates
+
+* `isoweekdate`, `isoyear`, `weeksinyear` are now implemented and exported for week based calendars, following [ISO week date](https://en.wikipedia.org/wiki/ISO_week_date) ([#48507]).
 
 External dependencies
 ---------------------
 
+  * 7-Zip updated from p7zip v17.06 to upstream 7-Zip v25.01. On Windows, the full 7z.exe/7z.dll bundle is replaced with standalone 7za.exe, which supports fewer formats but unifies cross-platform behavior. ([#60025]).
+
 Tooling Improvements
 --------------------
 
+Deprecated or removed
+---------------------
+
+* The method `merge(combine::Callable, d::AbstractDict...)` is now deprecated to favor `mergewith` instead ([#59775]).
 
 <!--- generated by NEWS-update.jl: -->
diff --git a/README.md b/README.md
index a4480ecf482cd..3971999691d77 100644
--- a/README.md
+++ b/README.md
@@ -35,19 +35,20 @@
 ## The Julia Language
 
 Julia is a high-level, high-performance dynamic language for technical
-computing.  The main homepage for Julia can be found at
-[julialang.org](https://julialang.org/).  This is the GitHub
+computing. The main homepage for Julia can be found at
+[julialang.org](https://julialang.org/). This is the GitHub
 repository of Julia source code, including instructions for compiling
 and installing Julia, below.
 
 ## Resources
 
 - **Homepage:** <https://julialang.org>
-- **Binaries:** <https://julialang.org/downloads/>
+- **Install:** <https://julialang.org/downloads/>
 - **Source code:** <https://github.com/JuliaLang/julia>
 - **Documentation:** <https://docs.julialang.org>
 - **Packages:** <https://julialang.org/packages/>
 - **Discussion forum:** <https://discourse.julialang.org>
+- **Zulip:** <https://julialang.zulipchat.com/>
 - **Slack:** <https://julialang.slack.com> (get an invite from <https://julialang.org/slack/>)
 - **YouTube:** <https://www.youtube.com/user/JuliaLanguage>
 - **Code coverage:** <https://coveralls.io/r/JuliaLang/julia>
@@ -56,27 +57,30 @@ New developers may find the notes in
 [CONTRIBUTING](https://github.com/JuliaLang/julia/blob/master/CONTRIBUTING.md)
 helpful to start contributing to the Julia codebase.
 
-### External Resources
+### Learning Julia
 
-- [**StackOverflow**](https://stackoverflow.com/questions/tagged/julia-lang)
-- [**Twitter**](https://twitter.com/JuliaLanguage)
 - [**Learning resources**](https://julialang.org/learning/)
 
 ## Binary Installation
 
-If you would rather not compile the latest Julia from source,
-platform-specific tarballs with pre-compiled binaries are also
-[available for download](https://julialang.org/downloads/). The
-downloads page also provides details on the
-[different tiers of support](https://julialang.org/downloads/#supported_platforms)
-for OS and platform combinations.
-
-If everything works correctly, you will see a Julia banner and an
-interactive prompt into which you can enter expressions for
-evaluation.  You can read about [getting
-started](https://docs.julialang.org/en/v1/manual/getting-started/) in the manual.
-
-**Note**: Although some system package managers provide Julia, such
+The recommended way of installing Julia is to use `juliaup` which will install
+the latest stable `julia` for you and help keep it up to date. It can also let
+you install and run different Julia versions simultaneously. Instructions for
+this can be found [here](https://julialang.org/downloads/). If you want to manually
+download specific Julia binaries, you can find those on the [Manual Downloads
+page](https://julialang.org/downloads/manual-downloads/). The downloads page also provides
+details on the [different tiers of
+support](https://julialang.org/downloads/support) for OS and
+platform combinations.
+
+If everything works correctly, you will get a `julia` program and when you run
+it in a terminal or command prompt, you will see a Julia banner and an
+interactive prompt into which you can enter expressions for evaluation. You can
+read about [getting
+started](https://docs.julialang.org/en/v1/manual/getting-started/) in the
+manual.
+
+**Note**: Although some OS package managers provide Julia, such
 installations are neither maintained nor endorsed by the Julia
 project. They may be outdated, broken and/or unmaintained. We
 recommend you use the official Julia binaries instead.
@@ -89,11 +93,11 @@ Then, acquire the source code by cloning the git repository:
 
     git clone https://github.com/JuliaLang/julia.git
 
-and then use the command prompt to change into the resulting julia directory. By default you will be building the latest unstable version of
+and then use the command prompt to change into the resulting julia directory. By default, you will be building the latest unstable version of
 Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases)
 of Julia. You can get this version by running:
 
-    git checkout v1.9.0
+    git checkout v1.12.2
 
 To build the `julia` executable, run `make` from within the julia directory.
 
@@ -116,7 +120,7 @@ started](https://docs.julialang.org/en/v1/manual/getting-started/)
 in the manual.
 
 Detailed build instructions, should they be necessary,
-are included in the [build documentation](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/).
+are included in the [build documentation](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/build.md).
 
 ### Uninstalling Julia
 
@@ -131,14 +135,14 @@ The Julia source code is organized as follows:
 | Directory         | Contents                                                           |
 | -                 | -                                                                  |
 | `base/`           | source code for the Base module (part of Julia's standard library) |
-| `stdlib/`         | source code for other standard library packages                    |
 | `cli/`            | source for the command line interface/REPL                         |
 | `contrib/`        | miscellaneous scripts                                              |
 | `deps/`           | external dependencies                                              |
 | `doc/src/`        | source for the user manual                                         |
+| `etc/`            | contains `startup.jl`                                              |
 | `src/`            | source for Julia language core                                     |
+| `stdlib/`         | source code for other standard library packages                    |
 | `test/`           | test suites                                                        |
-| `usr/`            | binaries and shared libraries loaded by Julia's standard libraries |
 
 ## Terminal, Editors and IDEs
 
@@ -146,7 +150,7 @@ The Julia REPL is quite powerful. See the section in the manual on
 [the Julia REPL](https://docs.julialang.org/en/v1/stdlib/REPL/)
 for more details.
 
-On Windows we highly recommend running Julia in a modern terminal,
+On Windows, we highly recommend running Julia in a modern terminal,
 such as [Windows Terminal from the Microsoft Store](https://aka.ms/terminal).
 
 Support for editing Julia is available for many
@@ -157,7 +161,7 @@ Support for editing Julia is available for many
 others.
 
 For users who prefer IDEs, we recommend using VS Code with the
-[julia-vscode](https://www.julia-vscode.org/) plugin.
+[julia-vscode](https://www.julia-vscode.org/) plugin.\
 For notebook users, [Jupyter](https://jupyter.org/) notebook support is available through the
 [IJulia](https://github.com/JuliaLang/IJulia.jl) package, and
 the [Pluto.jl](https://github.com/fonsp/Pluto.jl) package provides Pluto notebooks.
diff --git a/THIRDPARTY.md b/THIRDPARTY.md
index 51950d9e2c6a1..06973b0163e5e 100644
--- a/THIRDPARTY.md
+++ b/THIRDPARTY.md
@@ -1,12 +1,11 @@
 The Julia language is licensed under the MIT License (see [LICENSE.md](./LICENSE.md) ). The "language" consists
-of the compiler (the contents of src/), most of the standard library (base/),
+of the compiler (the contents of `src/`), most of the standard library (`base/` and `stdlib/`),
 and some utilities (most of the rest of the files in this repository). See below
 for exceptions.
 
 - [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
 - [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
 - [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp) [UIUC]
-- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
 - [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
 - [Python](https://docs.python.org/3/license.html) (for strtod implementation on Windows) [PSF]
 - [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
@@ -26,7 +25,8 @@ own licenses:
 
 and optionally:
 
-- [ITTAPI](https://github.com/intel/ittapi/blob/master/LICENSES/BSD-3-Clause.txt) [BSD-3]
+- [LibTracyClient](https://github.com/wolfpld/tracy/blob/master/LICENSE) [BSD-3]
+- [ITTAPI](https://github.com/intel/ittapi/tree/master/LICENSES) [BSD-3 AND GPL2]
 
 Julia's `stdlib` uses the following external libraries, which have their own licenses:
 
@@ -36,25 +36,38 @@ Julia's `stdlib` uses the following external libraries, which have their own lic
 - [LIBGIT2](https://github.com/libgit2/libgit2/blob/development/COPYING) [GPL2+ with unlimited linking exception]
 - [CURL](https://curl.haxx.se/docs/copyright.html) [MIT/X derivative]
 - [LIBSSH2](https://github.com/libssh2/libssh2/blob/master/COPYING) [BSD-3]
-- [MBEDTLS](https://github.com/ARMmbed/mbedtls/blob/development/LICENSE) [Apache 2.0]
+- [OPENSSL](https://www.openssl.org/source/license.html) [Apache 2.0]
 - [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Copying) [LGPL3+]
 - [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
 - [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
 - [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
-- [SUITESPARSE](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/master/LICENSE.txt) [mix of LGPL2+ and GPL2+; see individual module licenses]
+- [SUITESPARSE](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/master/LICENSE.txt) [mix of BSD-3-Clause, LGPL2.1+ and GPL2+; see individual module licenses]
+  - [`libamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/AMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libcamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/CAMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libccolamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/CCOLAMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libcolamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/COLAMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libsuitesparseconfig`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/SuiteSparse_config/README.txt) [BSD-3-Clause]
+  - [`libbtf`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/BTF/Doc/License.txt) [LGPL-2.1+]
+  - [`libklu`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/KLU/Doc/License.txt) [LGPL-2.1+]
+  - [`libldl`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/LDL/Doc/License.txt) [LGPL-2.1+]
+  - [`libcholmod`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/CHOLMOD/Doc/License.txt) [LGPL-2.1+ and GPL-2.0+]
+  - [`librbio`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/RBio/Doc/License.txt) [GPL-2.0+]
+  - [`libspqr`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/SPQR/Doc/License.txt) [GPL-2.0+]
+  - [`libumfpack`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/UMFPACK/Doc/License.txt) [GPL-2.0+]
 - [LIBBLASTRAMPOLINE](https://github.com/staticfloat/libblastrampoline/blob/main/LICENSE) [MIT]
 - [NGHTTP2](https://github.com/nghttp2/nghttp2/blob/master/COPYING) [MIT]
 
 Julia's build process uses the following external tools:
 
-- [PATCHELF](https://nixos.org/patchelf.html)
-- [OBJCONV](https://www.agner.org/optimize/#objconv)
+- [PATCHELF](https://github.com/NixOS/patchelf/blob/master/COPYING) [GPL3]
+- [OBJCONV](https://www.agner.org/optimize/#objconv) [GPL3]
 - [LIBWHICH](https://github.com/vtjnash/libwhich/blob/master/LICENSE) [MIT]
 
 Julia bundles the following external programs and libraries:
 
 - [7-Zip](https://www.7-zip.org/license.txt)
 - [ZLIB](https://zlib.net/zlib_license.html)
+- [ZSTD](https://github.com/facebook/zstd/blob/v1.5.7/LICENSE)
 
 On some platforms, distributions of Julia contain SSL certificate authority certificates,
 released under the [Mozilla Public License](https://en.wikipedia.org/wiki/Mozilla_Public_License).
diff --git a/VERSION b/VERSION
index 0bc25cfcab2c1..cd293038ed3bf 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.11.0-DEV
+1.14.0-DEV
diff --git a/base/.gitignore b/base/.gitignore
index 0fab5b41fda08..4d02a98c01000 100644
--- a/base/.gitignore
+++ b/base/.gitignore
@@ -7,5 +7,6 @@
 /uv_constants.jl
 /version_git.jl
 /version_git.jl.phony
+/version_git_dirty
 /userimg.jl
 /JuliaSyntax
diff --git a/base/Base.jl b/base/Base.jl
index 4f7032a4d0868..c5513b0af0ce3 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -1,169 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-baremodule Base
-
-using Core.Intrinsics, Core.IR
-
-# to start, we're going to use a very simple definition of `include`
-# that doesn't require any function (except what we can get from the `Core` top-module)
-const _included_files = Array{Tuple{Module,String},1}(Core.undef, 1)
-function include(mod::Module, path::String)
-    ccall(:jl_array_grow_end, Cvoid, (Any, UInt), _included_files, UInt(1))
-    Core.arrayset(true, _included_files, (mod, ccall(:jl_prepend_cwd, Any, (Any,), path)), arraylen(_included_files))
-    Core.println(path)
-    ccall(:jl_uv_flush, Nothing, (Ptr{Nothing},), Core.io_pointer(Core.stdout))
-    Core.include(mod, path)
-end
-include(path::String) = include(Base, path)
-
-# from now on, this is now a top-module for resolving syntax
-const is_primary_base_module = ccall(:jl_module_parent, Ref{Module}, (Any,), Base) === Core.Main
-ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module)
-
-# The @inline/@noinline macros that can be applied to a function declaration are not available
-# until after array.jl, and so we will mark them within a function body instead.
-macro inline()   Expr(:meta, :inline)   end
-macro noinline() Expr(:meta, :noinline) end
-
-# Try to help prevent users from shooting them-selves in the foot
-# with ambiguities by defining a few common and critical operations
-# (and these don't need the extra convert code)
-getproperty(x::Module, f::Symbol) = (@inline; getglobal(x, f))
-getproperty(x::Type, f::Symbol) = (@inline; getfield(x, f))
-setproperty!(x::Type, f::Symbol, v) = error("setfield! fields of Types should not be changed")
-getproperty(x::Tuple, f::Int) = (@inline; getfield(x, f))
-setproperty!(x::Tuple, f::Int, v) = setfield!(x, f, v) # to get a decent error
-
-getproperty(x, f::Symbol) = (@inline; getfield(x, f))
-function setproperty!(x, f::Symbol, v)
-    ty = fieldtype(typeof(x), f)
-    val = v isa ty ? v : convert(ty, v)
-    return setfield!(x, f, val)
-end
-
-dotgetproperty(x, f) = getproperty(x, f)
-
-getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getglobal(x, f, order))
-function setproperty!(x::Module, f::Symbol, v, order::Symbol=:monotonic)
-    @inline
-    ty = Core.get_binding_type(x, f)
-    val = v isa ty ? v : convert(ty, v)
-    return setglobal!(x, f, val, order)
-end
-getproperty(x::Type, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
-setproperty!(x::Type, f::Symbol, v, order::Symbol) = error("setfield! fields of Types should not be changed")
-getproperty(x::Tuple, f::Int, order::Symbol) = (@inline; getfield(x, f, order))
-setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
-
-getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
-function setproperty!(x, f::Symbol, v, order::Symbol)
-    @inline
-    ty = fieldtype(typeof(x), f)
-    val = v isa ty ? v : convert(ty, v)
-    return setfield!(x, f, val, order)
-end
-
-function swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic)
-    @inline
-    ty = fieldtype(typeof(x), f)
-    val = v isa ty ? v : convert(ty, v)
-    return Core.swapfield!(x, f, val, order)
-end
-function modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic)
-    @inline
-    return Core.modifyfield!(x, f, op, v, order)
-end
-function replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
-    @inline
-    ty = fieldtype(typeof(x), f)
-    val = desired isa ty ? desired : convert(ty, desired)
-    return Core.replacefield!(x, f, expected, val, success_order, fail_order)
-end
-
-convert(::Type{Any}, Core.@nospecialize x) = x
-convert(::Type{T}, x::T) where {T} = x
-include("coreio.jl")
-
-eval(x) = Core.eval(Base, x)
-eval(m::Module, x) = Core.eval(m, x)
-
-# init core docsystem
-import Core: @doc, @__doc__, WrappedException, @int128_str, @uint128_str, @big_str, @cmd
-if isdefined(Core, :Compiler)
-    import Core.Compiler.CoreDocs
-    Core.atdoc!(CoreDocs.docm)
-end
-
-include("exports.jl")
-
-if false
-    # simple print definitions for debugging. enable these if something
-    # goes wrong during bootstrap before printing code is available.
-    # otherwise, they just just eventually get (noisily) overwritten later
-    global show, print, println
-    show(io::IO, x) = Core.show(io, x)
-    print(io::IO, a...) = Core.print(io, a...)
-    println(io::IO, x...) = Core.println(io, x...)
-end
-
-"""
-    time_ns() -> UInt64
+const start_base_include = time_ns()
 
-Get the time in nanoseconds. The time corresponding to 0 is undefined, and wraps every 5.8 years.
-"""
-time_ns() = ccall(:jl_hrtime, UInt64, ())
-
-start_base_include = time_ns()
-
-## Load essential files and libraries
-include("essentials.jl")
-include("ctypes.jl")
-include("gcutils.jl")
-include("generator.jl")
 include("reflection.jl")
-include("options.jl")
-
-# define invoke(f, T, args...; kwargs...), without kwargs wrapping
-# to forward to invoke
-function Core.kwcall(kwargs::NamedTuple, ::typeof(invoke), f, T, args...)
-    @inline
-    # prepend kwargs and f to the invoked from the user
-    T = rewrap_unionall(Tuple{Core.Typeof(kwargs), Core.Typeof(f), (unwrap_unionall(T)::DataType).parameters...}, T)
-    return invoke(Core.kwcall, T, kwargs, f, args...)
-end
-# invoke does not have its own call cache, but kwcall for invoke does
-setfield!(typeof(invoke).name.mt, :max_args, 3, :monotonic) # invoke, f, T, args...
-
-# define applicable(f, T, args...; kwargs...), without kwargs wrapping
-# to forward to applicable
-function Core.kwcall(kwargs::NamedTuple, ::typeof(applicable), @nospecialize(args...))
-    @inline
-    return applicable(Core.kwcall, kwargs, args...)
-end
-function Core._hasmethod(@nospecialize(f), @nospecialize(t)) # this function has a special tfunc (TODO: make this a Builtin instead like applicable)
-    tt = rewrap_unionall(Tuple{Core.Typeof(f), (unwrap_unionall(t)::DataType).parameters...}, t)
-    return Core._hasmethod(tt)
-end
-
-
-# core operations & types
-include("promotion.jl")
-include("tuple.jl")
-include("expr.jl")
-include("pair.jl")
-include("traits.jl")
-include("range.jl")
-include("error.jl")
-
-# core numeric operations & types
-==(x, y) = x === y
-include("bool.jl")
-include("number.jl")
-include("int.jl")
-include("operators.jl")
-include("pointer.jl")
-include("refvalue.jl")
-include("cmem.jl")
 include("refpointer.jl")
 
 # now replace the Pair constructor (relevant for NamedTuples) with one that calls our Base.convert
@@ -174,57 +13,59 @@ delete_method(which(Pair{Any,Any}, (Any, Any)))
 end
 
 # The REPL stdlib hooks into Base using this Ref
-const REPL_MODULE_REF = Ref{Module}()
-
-include("checked.jl")
-using .Checked
-function cld end
-function fld end
+const REPL_MODULE_REF = Ref{Module}(Base)
+process_sysimg_args!()
 
-# Lazy strings
-include("strings/lazy.jl")
+include(strcat(BUILDROOT, "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
+include(strcat(BUILDROOT, "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
 
-# array structures
-include("indices.jl")
-include("array.jl")
-include("abstractarray.jl")
-include("subarray.jl")
-include("views.jl")
-include("baseext.jl")
-
-include("ntuple.jl")
-
-include("abstractdict.jl")
-include("iddict.jl")
-include("idset.jl")
+# Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
+# a slightly more verbose fashion than usual, because we're running so early.
+let os = ccall(:jl_get_UNAME, Any, ())
+    if os === :Darwin || os === :Apple
+        if DARWIN_FRAMEWORK
+            push!(DL_LOAD_PATH, "@loader_path/Frameworks")
+        end
+        push!(DL_LOAD_PATH, "@loader_path")
+    end
+end
 
-include("iterators.jl")
-using .Iterators: zip, unzip, enumerate, only
-using .Iterators: Flatten, Filter, product  # for generators
-using .Iterators: Stateful    # compat (was formerly used in reinterpretarray.jl)
+# metaprogramming
+include("meta.jl")
 
-include("namedtuple.jl")
+# Strings
+include("multimedia.jl")
+using .Multimedia
 
-# For OS specific stuff
-# We need to strcat things here, before strings are really defined
-function strcat(x::String, y::String)
-    out = ccall(:jl_alloc_string, Ref{String}, (Csize_t,), Core.sizeof(x) + Core.sizeof(y))
-    GC.@preserve x y out begin
-        out_ptr = unsafe_convert(Ptr{UInt8}, out)
-        unsafe_copyto!(out_ptr, unsafe_convert(Ptr{UInt8}, x), Core.sizeof(x))
-        unsafe_copyto!(out_ptr + Core.sizeof(x), unsafe_convert(Ptr{UInt8}, y), Core.sizeof(y))
+include("char.jl")
+function array_new_memory(mem::Memory{UInt8}, newlen::Int)
+    # add an optimization to array_new_memory for StringVector
+    if (@assume_effects :total @ccall jl_genericmemory_owner(mem::Any,)::Any) === mem
+        # TODO: when implemented, this should use a memory growing call
+        return typeof(mem)(undef, newlen)
+    else
+        # If data is in a String, keep it that way.
+        # When implemented, this could use jl_gc_expand_string(oldstr, newlen) as an optimization
+        str = _string_n(newlen)
+        return (@assume_effects :total !:consistent @ccall jl_string_to_genericmemory(str::Any,)::Memory{UInt8})
     end
-    return out
 end
-include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
-include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
+include("strings/basic.jl")
+include("strings/string.jl")
+include("strings/substring.jl")
+include("strings/cstring.jl")
 
-# numeric operations
+include("cartesian.jl")
+using .Cartesian
 include("hashing.jl")
-include("rounding.jl")
-using .Rounding
+include("osutils.jl")
+
+# subarrays
+include("subarray.jl")
+include("views.jl")
+
+# numeric operations
 include("div.jl")
-include("float.jl")
 include("twiceprecision.jl")
 include("complex.jl")
 include("rational.jl")
@@ -245,62 +86,37 @@ include("reduce.jl")
 ## core structures
 include("reshapedarray.jl")
 include("reinterpretarray.jl")
-include("bitarray.jl")
-include("bitset.jl")
-
-if !isdefined(Core, :Compiler)
-    include("docs/core.jl")
-    Core.atdoc!(CoreDocs.docm)
-end
-
-include("multimedia.jl")
-using .Multimedia
 
 # Some type
 include("some.jl")
 
 include("dict.jl")
-include("abstractset.jl")
 include("set.jl")
 
-# Strings
-include("char.jl")
-include("strings/basic.jl")
-include("strings/string.jl")
-include("strings/substring.jl")
-
-# Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
-# a slightly more verbose fashion than usual, because we're running so early.
-const DL_LOAD_PATH = String[]
-let os = ccall(:jl_get_UNAME, Any, ())
-    if os === :Darwin || os === :Apple
-        if Base.DARWIN_FRAMEWORK
-            push!(DL_LOAD_PATH, "@loader_path/Frameworks")
-        end
-        push!(DL_LOAD_PATH, "@loader_path")
-    end
-end
-
-include("osutils.jl")
-include("c.jl")
-
 # Core I/O
 include("io.jl")
 include("iobuffer.jl")
 
+# Concurrency (part 1)
+include("linked_list.jl")
+include("condition.jl")
+include("threads.jl")
+include("lock.jl")
+
 # strings & printing
 include("intfuncs.jl")
 include("strings/strings.jl")
 include("regex.jl")
 include("parse.jl")
 include("shell.jl")
+const IRShow = Compiler.IRShow # an alias for compatibility
+include("stacktraces.jl")
+using .StackTraces
 include("show.jl")
 include("arrayshow.jl")
 include("methodshow.jl")
 
 # multidimensional arrays
-include("cartesian.jl")
-using .Cartesian
 include("multidimensional.jl")
 
 include("broadcast.jl")
@@ -314,31 +130,40 @@ include("missing.jl")
 # version
 include("version.jl")
 
+#=
+isdebugbuild is defined here as this is imported in libdl.jl (included in libc.jl)
+The method is added in util.jl
+=#
+function isdebugbuild end
+
 # system & environment
 include("sysinfo.jl")
 include("libc.jl")
 using .Libc: getpid, gethostname, time, memcpy, memset, memmove, memcmp
 
+const USING_STOCK_GC = occursin("stock", GC.gc_active_impl())
+
 # These used to be in build_h.jl and are retained for backwards compatibility.
 # NOTE: keep in sync with `libblastrampoline_jll.libblastrampoline`.
 const libblas_name = "libblastrampoline" * (Sys.iswindows() ? "-5" : "")
 const liblapack_name = libblas_name
 
-# Logging
-include("logging.jl")
-using .CoreLogging
-
-# Concurrency
-include("linked_list.jl")
-include("condition.jl")
-include("threads.jl")
-include("lock.jl")
+# Concurrency (part 2)
+# Note that `atomics.jl` here should be deprecated
+Core.eval(Threads, :(include("atomics.jl")))
 include("channels.jl")
 include("partr.jl")
 include("task.jl")
 include("threads_overloads.jl")
 include("weakkeydict.jl")
 
+# ScopedValues
+include("scopedvalues.jl")
+
+# Logging
+include("logging/logging.jl")
+using .CoreLogging
+
 include("env.jl")
 
 # functions defined in Random
@@ -354,7 +179,8 @@ include("filesystem.jl")
 using .Filesystem
 include("cmd.jl")
 include("process.jl")
-include("ttyhascolor.jl")
+include("terminfo.jl")
+include("Terminals.jl") # Moved from REPL to reduce invalidations
 include("secretbuffer.jl")
 
 # core math functions
@@ -388,17 +214,10 @@ include("accumulate.jl")
 include("permuteddimsarray.jl")
 using .PermutedDimsArrays
 
-# basic data structures
-include("ordering.jl")
-using .Order
-
 # Combinatorics
 include("sort.jl")
 using .Sort
 
-# BinaryPlatforms, used by Artifacts.  Needs `Sort`.
-include("binaryplatforms.jl")
-
 # Fast math
 include("fastmath.jl")
 using .FastMath
@@ -428,13 +247,6 @@ include("irrationals.jl")
 include("mathconstants.jl")
 using .MathConstants: ℯ, π, pi
 
-# metaprogramming
-include("meta.jl")
-
-# Stack frames and traces
-include("stacktraces.jl")
-using .StackTraces
-
 # experimental API's
 include("experimental.jl")
 
@@ -443,8 +255,10 @@ include("deepcopy.jl")
 include("download.jl")
 include("summarysize.jl")
 include("errorshow.jl")
+include("util.jl")
 
 include("initdefs.jl")
+Filesystem.__postinit__()
 
 # worker threads
 include("threadcall.jl")
@@ -456,9 +270,11 @@ include("toml_parser.jl")
 include("linking.jl")
 include("loading.jl")
 
+# BinaryPlatforms, used by Artifacts.  Needs `Sort`.
+include("binaryplatforms.jl")
+
 # misc useful functions & macros
 include("timing.jl")
-include("util.jl")
 include("client.jl")
 include("asyncmap.jl")
 
@@ -471,14 +287,18 @@ include("docs/basedocs.jl")
 # Documentation -- should always be included last in sysimg.
 include("docs/Docs.jl")
 using .Docs
-if isdefined(Core, :Compiler) && is_primary_base_module
-    Docs.loaddocs(Core.Compiler.CoreDocs.DOCS)
-end
+Docs.loaddocs(CoreDocs.DOCS)
+@eval CoreDocs DOCS = DocLinkedList()
+
+include("precompilation.jl")
 
 # finally, now make `include` point to the full version
 for m in methods(include)
     delete_method(m)
 end
+for m in methods(IncludeInto(Base))
+    delete_method(m)
+end
 
 # This method is here only to be overwritten during the test suite to test
 # various sysimg related invalidation scenarios.
@@ -486,88 +306,36 @@ a_method_to_overwrite_in_test() = inferencebarrier(1)
 
 # These functions are duplicated in client.jl/include(::String) for
 # nicer stacktraces. Modifications here have to be backported there
-include(mod::Module, _path::AbstractString) = _include(identity, mod, _path)
-include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path)
+@noinline include(mod::Module, _path::AbstractString) = _include(identity, mod, _path)
+@noinline include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path)
+(this::IncludeInto)(fname::AbstractString) = include(identity, this.m, fname)
+(this::IncludeInto)(mapexpr::Function, fname::AbstractString) = include(mapexpr, this.m, fname)
 
-# External libraries vendored into Base
+# Compatibility with when Compiler was in Core
+@eval Core const Compiler = $Base.Compiler
+@eval Compiler const fl_parse = $Base.fl_parse
+
+# Compiler frontend
 Core.println("JuliaSyntax/src/JuliaSyntax.jl")
-include(@__MODULE__, "JuliaSyntax/src/JuliaSyntax.jl")
+include(@__MODULE__, string(DATAROOT, "julia/JuliaSyntax/src/JuliaSyntax.jl"))
 
 end_base_include = time_ns()
 
 const _sysimage_modules = PkgId[]
 in_sysimage(pkgid::PkgId) = pkgid in _sysimage_modules
 
-# Precompiles for Revise and other packages
-# TODO: move these to contrib/generate_precompile.jl
-# The problem is they don't work there
-for match = _methods(+, (Int, Int), -1, get_world_counter())
-    m = match.method
-    delete!(push!(Set{Method}(), m), m)
-    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match), typemax(UInt)))
-
-    empty!(Set())
-    push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
-    (setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
-    (setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
-    (setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
-    (setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
-    (setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
-    Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
-    Dict(Base => [:(1+1)])[Base]
-    Dict(:one => [1])[:one]
-    Dict("abc" => Set())["abc"]
-    pushfirst!([], sum)
-    get(Base.pkgorigins, Base.PkgId(Base), nothing)
-    sort!([1,2,3])
-    unique!([1,2,3])
-    cumsum([1,2,3])
-    append!(Int[], BitSet())
-    isempty(BitSet())
-    delete!(BitSet([1,2]), 3)
-    deleteat!(Int32[1,2,3], [1,3])
-    deleteat!(Any[1,2,3], [1,3])
-    Core.svec(1, 2) == Core.svec(3, 4)
-    any(t->t[1].line > 1, [(LineNumberNode(2,:none), :(1+1))])
-
-    # Code loading uses this
-    sortperm(mtime.(readdir(".")), rev=true)
-    # JLLWrappers uses these
-    Dict{UUID,Set{String}}()[UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210")] = Set{String}()
-    get!(Set{String}, Dict{UUID,Set{String}}(), UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210"))
-    eachindex(IndexLinear(), Expr[])
-    push!(Expr[], Expr(:return, false))
-    vcat(String[], String[])
-    k, v = (:hello => nothing)
-    precompile(indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int))
-    precompile(indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int))
-    # Preferences uses these
-    precompile(get_preferences, (UUID,))
-    precompile(record_compiletime_preference, (UUID, String))
-    get(Dict{String,Any}(), "missing", nothing)
-    delete!(Dict{String,Any}(), "missing")
-    for (k, v) in Dict{String,Any}()
-        println(k)
-    end
-
-    break   # only actually need to do this once
-end
-
 if is_primary_base_module
 
 # Profiling helper
 # triggers printing the report and (optionally) saving a heap snapshot after a SIGINFO/SIGUSR1 profile request
 # Needs to be in Base because Profile is no longer loaded on boot
-const PROFILE_PRINT_COND = Ref{Base.AsyncCondition}()
-function profile_printing_listener()
+function profile_printing_listener(cond::AsyncCondition)
     profile = nothing
     try
-        while true
-            wait(PROFILE_PRINT_COND[])
-            profile = @something(profile, require(PkgId(UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile")))
-
+        while _trywait(cond)
+            profile = @something(profile, require_stdlib(PkgId(UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile")))::Module
             invokelatest(profile.peek_report[])
-            if Base.get_bool_env("JULIA_PROFILE_PEEK_HEAP_SNAPSHOT", false) === true
+            if get_bool_env("JULIA_PROFILE_PEEK_HEAP_SNAPSHOT", false) === true
                 println(stderr, "Saving heap snapshot...")
                 fname = invokelatest(profile.take_heap_snapshot)
                 println(stderr, "Heap snapshot saved to `$(fname)`")
@@ -578,10 +346,32 @@ function profile_printing_listener()
             @error "Profile printing listener crashed" exception=ex,catch_backtrace()
         end
     end
+    nothing
+end
+
+function start_profile_listener()
+    cond = AsyncCondition()
+    uv_unref(cond.handle)
+    t = errormonitor(Threads.@spawn(profile_printing_listener(cond)))
+    atexit() do
+        # destroy this callback when exiting
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+        # this will prompt any ongoing or pending event to flush also
+        close(cond)
+        # error-propagation is not needed, since the errormonitor will handle printing that better
+        t === current_task() || _wait(t)
+    end
+    finalizer(cond) do c
+        # if something goes south, still make sure we aren't keeping a reference in C to this
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+    end
+    ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), cond.handle)
 end
 
 function __init__()
     # Base library init
+    global _atexit_hooks_finished = false
+    Filesystem.__postinit__()
     reinit_stdio()
     Multimedia.reinit_displays() # since Multimedia.displays uses stdout as fallback
     # initialize loading
@@ -589,34 +379,50 @@ function __init__()
     init_load_path()
     init_active_project()
     append!(empty!(_sysimage_modules), keys(loaded_modules))
+    empty!(loaded_precompiles) # If we load a packageimage when building the image this might not be empty
+    for mod in loaded_modules_order
+        push!(get!(Vector{Module}, loaded_precompiles, PkgId(mod)), mod)
+    end
     if haskey(ENV, "JULIA_MAX_NUM_PRECOMPILE_FILES")
         MAX_NUM_PRECOMPILE_FILES[] = parse(Int, ENV["JULIA_MAX_NUM_PRECOMPILE_FILES"])
     end
     # Profiling helper
     @static if !Sys.iswindows()
         # triggering a profile via signals is not implemented on windows
-        cond = Base.AsyncCondition()
-        Base.uv_unref(cond.handle)
-        PROFILE_PRINT_COND[] = cond
-        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle)
-        errormonitor(Threads.@spawn(profile_printing_listener()))
+        start_profile_listener()
     end
     _require_world_age[] = get_world_counter()
     # Prevent spawned Julia process from getting stuck waiting on Tracy to connect.
     delete!(ENV, "JULIA_WAIT_FOR_TRACY")
-    if get_bool_env("JULIA_USE_NEW_PARSER", true) === true
+    if get_bool_env("JULIA_USE_FLISP_PARSER", false) === false
         JuliaSyntax.enable_in_core!()
     end
+
+    CoreLogging.global_logger(CoreLogging.ConsoleLogger())
     nothing
 end
 
 # enable threads support
 @eval PCRE PCRE_COMPILE_LOCK = Threads.SpinLock()
 
+# Record dependency information for files belonging to the Compiler, so that
+# we know whether the .ji can just give the Base copy or not.
+# TODO: We may want to do this earlier to avoid TOCTOU issues.
+const _compiler_require_dependencies = Any[]
+@Core.latestworld
+for i = 1:length(_included_files)
+    (mod, file) = _included_files[i]
+    if mod === Compiler || parentmodule(mod) === Compiler || endswith(file, "/Compiler.jl")
+        _include_dependency!(_compiler_require_dependencies, true, mod, file, true, false)
+    end
 end
+# Make relative to DATAROOTDIR to allow relocation
+let basedir = joinpath(Sys.BINDIR, DATAROOTDIR)
+for i = 1:length(_compiler_require_dependencies)
+    tup = _compiler_require_dependencies[i]
+    _compiler_require_dependencies[i] = (tup[1], relpath(tup[2], basedir), tup[3:end]...)
+end
+end
+@assert length(_compiler_require_dependencies) >= 15
 
-# Ensure this file is also tracked
-@assert !isassigned(_included_files, 1)
-_included_files[1] = (parentmodule(Base), abspath(@__FILE__))
-
-end # baremodule Base
+end
diff --git a/base/Base_compiler.jl b/base/Base_compiler.jl
new file mode 100644
index 0000000000000..992f95015586b
--- /dev/null
+++ b/base/Base_compiler.jl
@@ -0,0 +1,411 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module Base
+
+Core._import(Base, Core, :_eval_import, :_eval_import, true)
+Core._import(Base, Core, :_eval_using, :_eval_using, true)
+
+using .Core.Intrinsics, .Core.IR
+
+# to start, we're going to use a very simple definition of `include`
+# that doesn't require any function (except what we can get from the `Core` top-module)
+# start this big so that we don't have to resize before we have defined how to grow an array
+const _included_files = Array{Tuple{Module,String},1}(Core.undef, 400)
+setfield!(_included_files, :size, (1,))
+function include(mod::Module, path::String)
+    len = getfield(_included_files.size, 1)
+    memlen = _included_files.ref.mem.length
+    lenp1 = Core.add_int(len, 1)
+    if len === memlen # by the time this is true we hopefully will have defined _growend!
+        _growend!(_included_files, UInt(1))
+    else
+        setfield!(_included_files, :size, (lenp1,))
+    end
+    Core.memoryrefset!(Core.memoryref(_included_files.ref, lenp1), (mod, ccall(:jl_prepend_cwd, Any, (Any,), path)), :not_atomic, true)
+    Core.println(path)
+    ccall(:jl_uv_flush, Nothing, (Ptr{Nothing},), Core.io_pointer(Core.stdout))
+    Core.include(mod, path)
+end
+include(path::String) = include(Base, path)
+
+struct IncludeInto <: Function
+    m::Module
+end
+(this::IncludeInto)(fname::AbstractString) = include(this.m, fname)
+
+# from now on, this is now a top-module for resolving syntax
+const is_primary_base_module = ccall(:jl_module_parent, Ref{Module}, (Any,), Base) === Core.Main
+ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module)
+
+# The @inline/@noinline macros that can be applied to a function declaration are not available
+# until after array.jl, and so we will mark them within a function body instead.
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
+
+macro _boundscheck() Expr(:boundscheck) end
+
+# Try to help prevent users from shooting them-selves in the foot
+# with ambiguities by defining a few common and critical operations
+# (and these don't need the extra convert code)
+getproperty(x::Module, f::Symbol) = (@inline; getglobal(x, f))
+getproperty(x::Type, f::Symbol) = (@inline; getfield(x, f))
+setproperty!(x::Type, f::Symbol, v) = error("setfield! fields of Types should not be changed")
+setproperty!(x::Array, f::Symbol, v) = error("setfield! fields of Array should not be changed")
+getproperty(x::Tuple, f::Int) = (@inline; getfield(x, f))
+setproperty!(x::Tuple, f::Int, v) = setfield!(x, f, v) # to get a decent error
+
+getproperty(x, f::Symbol) = (@inline; getfield(x, f))
+function setproperty!(x, f::Symbol, v)
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return setfield!(x, f, val)
+end
+
+typeof(function getproperty end).name.constprop_heuristic = Core.FORCE_CONST_PROP
+typeof(function setproperty! end).name.constprop_heuristic = Core.FORCE_CONST_PROP
+
+dotgetproperty(x, f) = getproperty(x, f)
+
+getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getglobal(x, f, order))
+function setproperty!(x::Module, f::Symbol, v, order::Symbol=:monotonic)
+    @inline
+    ty = Core.get_binding_type(x, f)
+    val = v isa ty ? v : convert(ty, v)
+    return setglobal!(x, f, val, order)
+end
+getproperty(x::Type, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x::Type, f::Symbol, v, order::Symbol) = error("setfield! fields of Types should not be changed")
+getproperty(x::Tuple, f::Int, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
+
+getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+function setproperty!(x, f::Symbol, v, order::Symbol)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return setfield!(x, f, val, order)
+end
+
+function swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return Core.swapfield!(x, f, val, order)
+end
+function modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic)
+    @inline
+    return Core.modifyfield!(x, f, op, v, order)
+end
+function replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.replacefield!(x, f, expected, val, success_order, fail_order)
+end
+function setpropertyonce!(x, f::Symbol, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.setfieldonce!(x, f, val, success_order, fail_order)
+end
+
+function swapproperty!(x::Module, f::Symbol, v, order::Symbol=:not_atomic)
+    @inline
+    ty = Core.get_binding_type(x, f)
+    val = v isa ty ? v : convert(ty, v)
+    return Core.swapglobal!(x, f, val, order)
+end
+function modifyproperty!(x::Module, f::Symbol, op, v, order::Symbol=:not_atomic)
+    @inline
+    return Core.modifyglobal!(x, f, op, v, order)
+end
+function replaceproperty!(x::Module, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = Core.get_binding_type(x, f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.replaceglobal!(x, f, expected, val, success_order, fail_order)
+end
+function setpropertyonce!(x::Module, f::Symbol, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = Core.get_binding_type(x, f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.setglobalonce!(x, f, val, success_order, fail_order)
+end
+
+convert(::Type{Any}, Core.@nospecialize x) = x
+convert(::Type{T}, x::T) where {T} = x
+include("coreio.jl")
+
+import Core: @doc, @__doc__, WrappedException, @int128_str, @uint128_str, @big_str, @cmd
+
+# Export list
+include("exports.jl")
+
+function set_syntax_version end
+_topmod(m::Module) = ccall(:jl_base_relative_to, Any, (Any,), m)::Module
+function _setup_module!(mod::Module, Core.@nospecialize syntax_ver)
+    # using Base
+    Core._using(mod, _topmod(mod), UInt8(0))
+    Core.declare_const(mod, :include, IncludeInto(mod))
+    Core.declare_const(mod, :eval, Core.EvalInto(mod))
+    if syntax_ver === nothing
+        return nothing
+    end
+    set_syntax_version(mod, syntax_ver)
+    return nothing
+end
+
+# core docsystem
+include("docs/core.jl")
+Core.atdoc!(CoreDocs.docm)
+
+eval(x) = Core.eval(Base, x)
+eval(m::Module, x) = Core.eval(m, x)
+
+include("public.jl")
+
+if false
+    # simple print definitions for debugging. enable these if something
+    # goes wrong during bootstrap before printing code is available.
+    # otherwise, they just just eventually get (noisily) overwritten later
+    global show, print, println
+    show(io::IO, x) = Core.show(io, x)
+    print(io::IO, a...) = Core.print(io, a...)
+    println(io::IO, x...) = Core.println(io, x...)
+end
+
+## Load essential files and libraries
+include("essentials.jl")
+
+# Because lowering inserts direct references, it is mandatory for this binding
+# to exist before we start inferring code.
+function string end
+import Core: String
+
+# For OS specific stuff
+# We need to strcat things here, before strings are really defined
+function strcat(x::String, y::String)
+    out = ccall(:jl_alloc_string, Ref{String}, (Int,), Core.sizeof(x) + Core.sizeof(y))
+    gc_x = @_gc_preserve_begin(x)
+    gc_y = @_gc_preserve_begin(y)
+    gc_out = @_gc_preserve_begin(out)
+    out_ptr = unsafe_convert(Ptr{UInt8}, out)
+    unsafe_copyto!(out_ptr, unsafe_convert(Ptr{UInt8}, x), Core.sizeof(x))
+    unsafe_copyto!(out_ptr + Core.sizeof(x), unsafe_convert(Ptr{UInt8}, y), Core.sizeof(y))
+    @_gc_preserve_end(gc_x)
+    @_gc_preserve_end(gc_y)
+    @_gc_preserve_end(gc_out)
+    return out
+end
+
+
+"""
+    time_ns()::UInt64
+
+Get the time in nanoseconds relative to some machine-specific arbitrary time in the past.
+The primary use is for measuring elapsed times during program execution. The return value is guaranteed to
+be monotonic (mod 2⁶⁴) while the system is running, and is unaffected by clock drift or changes to local calendar time,
+but it may change arbitrarily across system reboots or suspensions.
+
+(Although the returned time is always in nanoseconds, the timing resolution is platform-dependent.)
+"""
+time_ns() = ccall(:jl_hrtime, UInt64, ())
+
+# A warning to be interpolated in the docstring of every dangerous mutating function in Base, see PR #50824
+const _DOCS_ALIASING_WARNING = """
+!!! warning
+    Behavior can be unexpected when any mutated argument shares memory with any other argument.
+"""
+
+include("ctypes.jl")
+include("gcutils.jl")
+include("generator.jl")
+include("runtime_internals.jl")
+include("options.jl")
+
+# define invoke(f, T, args...; kwargs...), without kwargs wrapping
+# to forward to invoke
+function Core.kwcall(kwargs::NamedTuple, ::typeof(invoke), f, T, args...)
+    @inline
+    # prepend kwargs and f to the invoked from the user
+    T = rewrap_unionall(Tuple{Core.Typeof(kwargs), Core.Typeof(f), (unwrap_unionall(T)::DataType).parameters...}, T)
+    return invoke(Core.kwcall, T, kwargs, f, args...)
+end
+# invoke does not have its own call cache, but kwcall for invoke does
+setfield!(typeof(invoke).name, :max_args, Int32(3), :monotonic) # invoke, f, T, args...
+
+# define applicable(f, T, args...; kwargs...), without kwargs wrapping
+# to forward to applicable
+function Core.kwcall(kwargs::NamedTuple, ::typeof(applicable), @nospecialize(args...))
+    @inline
+    return applicable(Core.kwcall, kwargs, args...)
+end
+function Core._hasmethod(@nospecialize(f), @nospecialize(t)) # this function has a special tfunc (TODO: make this a Builtin instead like applicable)
+    tt = rewrap_unionall(Tuple{Core.Typeof(f), (unwrap_unionall(t)::DataType).parameters...}, t)
+    return Core._hasmethod(tt)
+end
+
+"""
+    invokelatest(f, args...; kwargs...)
+
+Calls `f(args...; kwargs...)`, but guarantees that the most recent method of `f`
+will be executed.   This is useful in specialized circumstances,
+e.g. long-running event loops or callback functions that may
+call obsolete versions of a function `f`.
+(The drawback is that `invokelatest` is somewhat slower than calling
+`f` directly, and the type of the result cannot be inferred by the compiler.)
+
+!!! compat "Julia 1.9"
+    Prior to Julia 1.9, this function was not exported, and was called as `Base.invokelatest`.
+"""
+const invokelatest = Core.invokelatest
+
+# define invokelatest(f, args...; kwargs...), without kwargs wrapping
+# to forward to invokelatest
+function Core.kwcall(kwargs::NamedTuple, ::typeof(invokelatest), f, args...)
+    @inline
+    return Core.invokelatest(Core.kwcall, kwargs, f, args...)
+end
+setfield!(typeof(invokelatest).name, :max_args, Int32(2), :monotonic) # invokelatest, f, args...
+
+"""
+    invoke_in_world(world, f, args...; kwargs...)
+
+Call `f(args...; kwargs...)` in a fixed world age, `world`.
+
+This is useful for infrastructure running in the user's Julia session which is
+not part of the user's program. For example, things related to the REPL, editor
+support libraries, etc. In these cases it can be useful to prevent unwanted
+method invalidation and recompilation latency, and to prevent the user from
+breaking supporting infrastructure by mistake.
+
+The global world age can be queried using [`Base.get_world_counter()`](@ref)
+and stored for later use within the lifetime of the current Julia session, or
+when serializing and reloading the system image.
+
+Technically, `invoke_in_world` will prevent any function called by `f` from
+being extended by the user during their Julia session. That is, generic
+function method tables seen by `f` (and any functions it calls) will be frozen
+as they existed at the given `world` age. In a sense, this is like the opposite
+of [`invokelatest`](@ref).
+
+!!! note
+    It is not valid to store world ages obtained in precompilation for later use.
+    This is because precompilation generates a "parallel universe" where the
+    world age refers to system state unrelated to the main Julia session.
+"""
+const invoke_in_world = Core.invoke_in_world
+
+function Core.kwcall(kwargs::NamedTuple, ::typeof(invoke_in_world), world::UInt, f, args...)
+    @inline
+    return Core.invoke_in_world(world, Core.kwcall, kwargs, f, args...)
+end
+setfield!(typeof(invoke_in_world).name, :max_args, Int32(3), :monotonic) # invoke_in_world, world, f, args...
+
+# core operations & types
+include("promotion.jl")
+include("tuple.jl")
+include("expr.jl")
+include("pair.jl")
+include("traits.jl")
+include("range.jl")
+include("error.jl")
+
+# core numeric operations & types
+==(x, y) = x === y
+include("bool.jl")
+include("number.jl")
+include("int.jl")
+include("operators.jl")
+include("pointer.jl")
+include("refvalue.jl")
+include("cmem.jl")
+
+function nextfloat end
+function prevfloat end
+include("rounding.jl")
+include("float.jl")
+
+# Lazy strings
+include("strings/lazy.jl")
+
+function cld end
+function fld end
+include("checked.jl")
+using .Checked
+
+# array structures
+include("indices.jl")
+include("genericmemory.jl")
+include("array.jl")
+include("abstractarray.jl")
+include("baseext.jl")
+
+include("c.jl")
+include("abstractset.jl")
+include("bitarray.jl")
+include("bitset.jl")
+include("abstractdict.jl")
+include("iddict.jl")
+include("idset.jl")
+include("ntuple.jl")
+include("iterators.jl")
+using .Iterators: zip, unzip, enumerate, only
+using .Iterators: Flatten, Filter, product  # for generators
+using .Iterators: Stateful    # compat (was formerly used in reinterpretarray.jl)
+include("namedtuple.jl")
+
+include("anyall.jl")
+
+include("ordering.jl")
+using .Order
+
+include("coreir.jl")
+include("module.jl")
+
+BUILDROOT::String = ""
+DATAROOT::String = ""
+const DL_LOAD_PATH = String[]
+
+baremodule BuildSettings end
+
+function process_sysimg_args!()
+    let i = 2 # skip file name
+        while i <= length(Core.ARGS)
+            if Core.ARGS[i] == "--buildsettings"
+                include(BuildSettings, ARGS[i+1])
+            elseif Core.ARGS[i] == "--buildroot"
+                global BUILDROOT = Core.ARGS[i+1]
+            elseif Core.ARGS[i] == "--dataroot"
+                global DATAROOT = Core.ARGS[i+1]
+            else
+                error(strcat("invalid sysimage argument: ", Core.ARGS[i]))
+            end
+            i += 2
+        end
+    end
+end
+process_sysimg_args!()
+
+function isready end
+
+include(strcat(DATAROOT, "julia/Compiler/src/Compiler.jl"))
+using .Compiler.ReinferUtils: ReinferUtils, invalidate_code_for_globalref!
+
+const _return_type = Compiler.return_type
+
+# Enable compiler
+Compiler.bootstrap!()
+
+include("flfrontend.jl")
+Core._setparser!(fl_parse)
+Core._setlowerer!(fl_lower)
+
+# Further definition of Base will happen in Base.jl if loaded.
+
+# Ensure this file is also tracked
+@assert !isassigned(_included_files, 1)
+_included_files[1] = (@__MODULE__, ccall(:jl_prepend_cwd, Any, (Any,), "Base_compiler.jl"))
+
+end # module Base
+using .Base
diff --git a/base/Enums.jl b/base/Enums.jl
index 45a1b66753484..d4094945853ec 100644
--- a/base/Enums.jl
+++ b/base/Enums.jl
@@ -44,7 +44,7 @@ Base.print(io::IO, x::Enum) = print(io, _symbol(x))
 function Base.show(io::IO, x::Enum)
     sym = _symbol(x)
     if !(get(io, :compact, false)::Bool)
-        from = get(io, :module, Base.active_module())
+        from = get(io, :module, Main)
         def = parentmodule(typeof(x))
         if from === nothing || !Base.isvisible(sym, def, from)
             show(io, def)
@@ -90,7 +90,7 @@ end
 # give Enum types scalar behavior in broadcasting
 Base.broadcastable(x::Enum) = Ref(x)
 
-@noinline enum_argument_error(typename, x) = throw(ArgumentError(string("invalid value for Enum $(typename): $x")))
+@noinline enum_argument_error(typename, x) = throw(ArgumentError(LazyString("invalid value for Enum ", typename, ": ", x)))
 
 """
     @enum EnumName[::BaseType] value1[=x] value2[=y]
@@ -143,7 +143,7 @@ julia> Symbol(apple)
 """
 macro enum(T::Union{Symbol,Expr}, syms...)
     if isempty(syms)
-        throw(ArgumentError("no arguments given for Enum $T"))
+        throw(ArgumentError(LazyString("no arguments given for Enum ", T)))
     end
     basetype = Int32
     typename = T
@@ -151,10 +151,11 @@ macro enum(T::Union{Symbol,Expr}, syms...)
         typename = T.args[1]
         basetype = Core.eval(__module__, T.args[2])
         if !isa(basetype, DataType) || !(basetype <: Integer) || !isbitstype(basetype)
-            throw(ArgumentError("invalid base type for Enum $typename, $T=::$basetype; base type must be an integer primitive type"))
+            throw(ArgumentError(
+                LazyString("invalid base type for Enum ", typename, ", ", T, "=::", basetype, "; base type must be an integer primitive type")))
         end
     elseif !isa(T, Symbol)
-        throw(ArgumentError("invalid type expression for enum $T"))
+        throw(ArgumentError(LazyString("invalid type expression for enum ", T)))
     end
     values = Vector{basetype}()
     seen = Set{Symbol}()
@@ -169,32 +170,32 @@ macro enum(T::Union{Symbol,Expr}, syms...)
         s isa LineNumberNode && continue
         if isa(s, Symbol)
             if i == typemin(basetype) && !isempty(values)
-                throw(ArgumentError("overflow in value \"$s\" of Enum $typename"))
+                throw(ArgumentError(LazyString("overflow in value \"", s, "\" of Enum ", typename)))
             end
         elseif isa(s, Expr) &&
                (s.head === :(=) || s.head === :kw) &&
                length(s.args) == 2 && isa(s.args[1], Symbol)
             i = Core.eval(__module__, s.args[2]) # allow exprs, e.g. uint128"1"
             if !isa(i, Integer)
-                throw(ArgumentError("invalid value for Enum $typename, $s; values must be integers"))
+                throw(ArgumentError(LazyString("invalid value for Enum ", typename, ", ", s, "; values must be integers")))
             end
             i = convert(basetype, i)
             s = s.args[1]
             hasexpr = true
         else
-            throw(ArgumentError(string("invalid argument for Enum ", typename, ": ", s)))
+            throw(ArgumentError(LazyString("invalid argument for Enum ", typename, ": ", s)))
         end
         s = s::Symbol
         if !Base.isidentifier(s)
-            throw(ArgumentError("invalid name for Enum $typename; \"$s\" is not a valid identifier"))
+            throw(ArgumentError(LazyString("invalid name for Enum ", typename, "; \"", s, "\" is not a valid identifier")))
         end
         if hasexpr && haskey(namemap, i)
-            throw(ArgumentError("both $s and $(namemap[i]) have value $i in Enum $typename; values must be unique"))
+            throw(ArgumentError(LazyString("both ", s, " and ", namemap[i], " have value ", i, " in Enum ", typename, "; values must be unique")))
         end
         namemap[i] = s
         push!(values, i)
         if s in seen
-            throw(ArgumentError("name \"$s\" in Enum $typename is not unique"))
+            throw(ArgumentError(LazyString("name \"", s, "\" in Enum ", typename, " is not unique")))
         end
         push!(seen, s)
         if length(values) == 1
diff --git a/base/Makefile b/base/Makefile
index 493302af78b02..eb36d6d0a4319 100644
--- a/base/Makefile
+++ b/base/Makefile
@@ -18,9 +18,9 @@ else
 endif
 
 define parse_features
-@echo "# $(2) features" >> $@
+@printf "%s\n" "# $(2) features" >> $@
 @$(call PRINT_PERL, cat $(SRCDIR)/../src/features_$(1).h | perl -lne 'print "const JL_$(2)_$$1 = UInt32($$2)" if /^\s*JL_FEATURE_DEF(?:_NAME)?\(\s*(\w+)\s*,\s*([^,]+)\s*,.*\)\s*(?:\/\/.*)?$$/' >> $@)
-@echo >> $@
+@printf "\n" >> $@
 endef
 
 $(BUILDDIR)/features_h.jl: $(SRCDIR)/../src/features_x86.h $(SRCDIR)/../src/features_aarch32.h $(SRCDIR)/../src/features_aarch64.h
@@ -33,7 +33,7 @@ $(BUILDDIR)/pcre_h.jl: $(PCRE_INCL_PATH)
 	@$(call PRINT_PERL, $(CPP) -D PCRE2_CODE_UNIT_WIDTH=8 -dM $< | perl -nle '/^\s*#define\s+PCRE2_(\w*)\s*\(?($(PCRE_CONST))\)?u?\s*$$/ and print index($$1, "ERROR_") == 0 ? "const $$1 = Cint($$2)" : "const $$1 = UInt32($$2)"' | LC_ALL=C sort > $@)
 
 $(BUILDDIR)/errno_h.jl:
-	@$(call PRINT_PERL, echo '#include <errno.h>' | $(CPP) -dM - | perl -nle 'print "const $$1 = Int32($$2)" if /^#define\s+(E\w+)\s+(\d+)\s*$$/' | LC_ALL=C sort > $@)
+	@$(call PRINT_PERL, printf "%s\n" '#include <errno.h>' | $(CPP) -dM - | perl -nle 'print "const $$1 = Int32($$2)" if /^#define\s+(E\w+)\s+(\d+)\s*$$/' | LC_ALL=C sort > $@)
 
 $(BUILDDIR)/file_constants.jl: $(SRCDIR)/../src/file_constants.h
 	@$(call PRINT_PERL, $(CPP_STDOUT) -DJULIA $< | perl -nle 'print "$$1 0o$$2" if /^(\s*const\s+[A-z_]+\s+=)\s+(0[0-9]*)\s*$$/; print "$$1" if /^\s*(const\s+[A-z_]+\s+=\s+([1-9]|0x)[0-9A-z]*)\s*$$/' > $@)
@@ -42,57 +42,47 @@ $(BUILDDIR)/uv_constants.jl: $(SRCDIR)/../src/uv_constants.h $(LIBUV_INC)/uv/err
 	@$(call PRINT_PERL, $(CPP_STDOUT) "-I$(LIBUV_INC)" -DJULIA $< | tail -n 16 > $@)
 
 $(BUILDDIR)/build_h.jl.phony:
-	@echo "# This file is automatically generated in base/Makefile" > $@
+	@printf "%s\n" "# This file is automatically generated in base/Makefile" > $@
 ifeq ($(XC_HOST),)
-	@echo "const MACHINE = \"$(BUILD_MACHINE)\"" >> $@
+	@printf "%s\n" "const MACHINE = \"$(BUILD_MACHINE)\"" >> $@
 else
-	@echo "const MACHINE = \"$(XC_HOST)\"" >> $@
+	@printf "%s\n" "const MACHINE = \"$(XC_HOST)\"" >> $@
 endif
-	@echo "const libm_name = \"$(LIBMNAME)\"" >> $@
+	@printf "%s\n" "const libm_name = \"$(LIBMNAME)\"" >> $@
 ifeq ($(USE_BLAS64), 1)
-	@echo "const USE_BLAS64 = true" >> $@
+	@printf "%s\n" "const USE_BLAS64 = true" >> $@
 else
-	@echo "const USE_BLAS64 = false" >> $@
+	@printf "%s\n" "const USE_BLAS64 = false" >> $@
 endif
 ifeq ($(USE_GPL_LIBS), 1)
-	@echo "const USE_GPL_LIBS = true" >> $@
+	@printf "%s\n" "const USE_GPL_LIBS = true" >> $@
 else
-	@echo "const USE_GPL_LIBS = false" >> $@
-endif
-	@echo "const libllvm_version_string = \"$$($(LLVM_CONFIG_HOST) --version)\"" >> $@
-	@echo "const libllvm_name = \"$(LLVM_SHARED_LIB_NAME)\"" >> $@
-	@echo "const VERSION_STRING = \"$(JULIA_VERSION)\"" >> $@
-	@echo "const TAGGED_RELEASE_BANNER = \"$(TAGGED_RELEASE_BANNER)\"" >> $@
-ifeq ($(OS),WINNT)
-	@printf 'const SYSCONFDIR = "%s"\n' '$(subst /,\\,$(sysconfdir_rel))' >> $@
-	@printf 'const DATAROOTDIR = "%s"\n' '$(subst /,\\,$(datarootdir_rel))' >> $@
-	@printf 'const DOCDIR = "%s"\n' '$(subst /,\\,$(docdir_rel))' >> $@
-	@printf 'const LIBDIR = "%s"\n' '$(subst /,\\,$(libdir_rel))' >> $@
-	@printf 'const LIBEXECDIR = "%s"\n' '$(subst /,\\,$(libexecdir_rel))' >> $@
-	@printf 'const PRIVATE_LIBDIR = "%s"\n' '$(subst /,\\,$(private_libdir_rel))' >> $@
-	@printf 'const PRIVATE_LIBEXECDIR = "%s"\n' '$(subst /,\\,$(private_libexecdir_rel))' >> $@
-	@printf 'const INCLUDEDIR = "%s"\n' '$(subst /,\\,$(includedir_rel))' >> $@
-else
-	@echo "const SYSCONFDIR = \"$(sysconfdir_rel)\"" >> $@
-	@echo "const DATAROOTDIR = \"$(datarootdir_rel)\"" >> $@
-	@echo "const DOCDIR = \"$(docdir_rel)\"" >> $@
-	@echo "const LIBDIR = \"$(libdir_rel)\"" >> $@
-	@echo "const LIBEXECDIR = \"$(libexecdir_rel)\"" >> $@
-	@echo "const PRIVATE_LIBDIR = \"$(private_libdir_rel)\"" >> $@
-	@echo "const PRIVATE_LIBEXECDIR = \"$(private_libexecdir_rel)\"" >> $@
-	@echo "const INCLUDEDIR = \"$(includedir_rel)\"" >> $@
+	@printf "%s\n" "const USE_GPL_LIBS = false" >> $@
 endif
+	@printf "%s\n" "const libllvm_version_string = \"$$($(LLVM_CONFIG_HOST) --version)\"" >> $@
+	@printf "%s\n" "const libllvm_name = \"$(LLVM_SHARED_LIB_NAME)\"" >> $@
+	@printf "%s\n" "const VERSION_STRING = \"$(JULIA_VERSION)\"" >> $@
+	@printf "%s\n" "const TAGGED_RELEASE_BANNER = \"$(TAGGED_RELEASE_BANNER)\"" >> $@
+	@printf "%s\n" "const SYSCONFDIR = "$(call shell_escape,$(call julia_escape,$(call normalize_path,$(sysconfdir_rel)))) >> $@
+	@printf "%s\n" "const DATAROOTDIR = "$(call shell_escape,$(call julia_escape,$(call normalize_path,$(datarootdir_rel)))) >> $@
+	@printf "%s\n" "const DOCDIR = "$(call shell_escape,$(call julia_escape,$(call normalize_path,$(docdir_rel)))) >> $@
+	@printf "%s\n" "const LIBDIR = "$(call shell_escape,$(call julia_escape,$(call normalize_path,$(libdir_rel)))) >> $@
+	@printf "%s\n" "const LIBEXECDIR = "$(call shell_escape,$(call julia_escape,$(call normalize_path,$(libexecdir_rel)))) >> $@
+	@printf "%s\n" "const PRIVATE_LIBDIR = "$(call shell_escape,$(call julia_escape,$(call normalize_path,$(private_libdir_rel)))) >> $@
+	@printf "%s\n" "const PRIVATE_LIBEXECDIR = "$(call shell_escape,$(call julia_escape,$(call normalize_path,$(private_libexecdir_rel)))) >> $@
+	@printf "%s\n" "const INCLUDEDIR = "$(call shell_escape,$(call julia_escape,$(call normalize_path,$(includedir_rel)))) >> $@
+	@printf "%s\n" "const SOURCEDIR = "$(call shell_escape,$(call julia_escape,$(call normalize_path,$(shell echo $(call cygpath_w,$(JULIAHOME)))))) >> $@
 ifeq ($(DARWIN_FRAMEWORK), 1)
-	@echo "const DARWIN_FRAMEWORK = true" >> $@
-	@echo "const DARWIN_FRAMEWORK_NAME = \"$(FRAMEWORK_NAME)\"" >> $@
+	@printf "%s\n" "const DARWIN_FRAMEWORK = true" >> $@
+	@printf "%s\n" "const DARWIN_FRAMEWORK_NAME = \"$(FRAMEWORK_NAME)\"" >> $@
 else
-	@echo "const DARWIN_FRAMEWORK = false" >> $@
+	@printf "%s\n" "const DARWIN_FRAMEWORK = false" >> $@
 endif
 ifeq ($(OS), Darwin)
-	@echo "const MACOS_PRODUCT_VERSION = \"$(shell sw_vers -productVersion)\"" >> $@
-	@echo "const MACOS_PLATFORM_VERSION = \"$(shell xcrun --show-sdk-version)\"" >> $@
+	@printf "%s\n" "const MACOS_PRODUCT_VERSION = \"$(shell sw_vers -productVersion)\"" >> $@
+	@printf "%s\n" "const MACOS_PLATFORM_VERSION = \"$(shell xcrun --show-sdk-version)\"" >> $@
 endif
-	@echo "const BUILD_TRIPLET = \"$(BB_TRIPLET_LIBGFORTRAN_CXXABI)\"" >> $@
+	@printf "%s\n" "const BUILD_TRIPLET = \"$(BB_TRIPLET_LIBGFORTRAN_CXXABI)\"" >> $@
 
 	@# This to ensure that we always rebuild this file, but only when it is modified do we touch build_h.jl,
 	@# ensuring we rebuild the system image as infrequently as possible
@@ -115,10 +105,10 @@ ifneq ($(NO_GIT), 1)
 	    rm -f $@; \
 	fi
 else
-ifeq ($(shell [ -f $(BUILDDIR)/version_git.jl ] && echo "true"), true)
+ifeq ($(shell [ -f $(BUILDDIR)/version_git.jl ] && printf "true\n"), true)
 	@# Give warning if boilerplate git is used
 	@if grep -q "Default output if git is not available" $(BUILDDIR)/version_git.jl; then \
-	    echo "WARNING: Using boilerplate git version info" >&2; \
+	    printf "WARNING: Using boilerplate git version info\n" >&2; \
 	fi
 else
 	$(warning "WARNING: Generating boilerplate git version info")
@@ -141,7 +131,7 @@ resolve_path = \
 	if [ -n "$${$1_}" ]; then $1_wd=`dirname "$${$1}"`; $1="$${$1_}"; fi
 ## if it's a relative path, make it an absolute path
 resolve_path += && \
-	if [ -z "`echo $${$1} | grep '^/'`" ]; then $1=$${$1_wd}/$${$1}; fi
+	if [ -z "`printf "%s\n" "$${$1}" | grep '^/'`" ]; then $1=$${$1_wd}/$${$1}; fi
 ifeq ($(OS), Darwin)
 # try to use the install_name id instead (unless it is an @rpath or such)
 # if it's a relative path, make it an absolute path using the working directory from $1,
@@ -150,7 +140,7 @@ resolve_path += && \
 	$1_=`otool -D $${$1} | tail -n +2 | sed -e 's/^@.*$$//'` && \
 	if [ -n "$${$1_}" ]; then \
 	$1_wd=`dirname "$${$1}"`; $1=$${$1_}; \
-	if [ -z "`echo $${$1} | grep '^/'`" ]; then $1=$${$1_wd}/$${$1}; fi; \
+	if [ -z "`printf "%s\n" $${$1} | grep '^/'`" ]; then $1=$${$1_wd}/$${$1}; fi; \
 	fi
 else
 # try to use the SO_NAME (if the named file exists)
@@ -164,10 +154,10 @@ endif
 
 ## debug code: `make resolve-path P=<path to test>`
 #resolve_path += && \
-#	echo "$${$1_wd} $${$1}"
+#	printf "%s\n" "$${$1_wd} $${$1}"
 #resolve-path:
 #	$(call resolve_path,P) && \
-#	echo "$$P"
+#	printf "%s\n" "$$P"
 
 define symlink_system_library
 libname_$2 := $$(notdir $(call versioned_libname,$2,$3))
@@ -179,11 +169,11 @@ $$(build_private_libdir)/$$(libname_$2):
 		$$(call resolve_path,REALPATH) && \
 		[ -e "$$$$REALPATH" ] && \
 		rm -f "$$@" && \
-		echo ln -sf "$$$$REALPATH" "$$@" && \
+		printf "ln -sf %s %s\n" "$$$$REALPATH" "$$@" && \
 		ln -sf "$$$$REALPATH" "$$@"; \
 	else \
 		if [ "$4" != "ALLOW_FAILURE" ]; then \
-			echo "System library symlink failure: Unable to locate $$(libname_$2) on your system!" >&2; \
+			printf "%s\n" "System library symlink failure: Unable to locate $$(libname_$2) on your system!" >&2; \
 			false; \
 		fi; \
 	fi
@@ -203,19 +193,25 @@ endif
 $(build_bindir)/7z$(EXE):
 	[ -e "$(7Z_PATH)" ] && \
 	rm -f "$@" && \
-	ln -svf "$(7Z_PATH)" "$@"
+	ln -sf "$(7Z_PATH)" "$@"
 
-symlink_lld: $(build_bindir)/lld$(EXE)
+symlink_llvm_utils: $(build_depsbindir)/lld$(EXE) $(build_depsbindir)/dsymutil$(EXE)
 
 ifneq ($(USE_SYSTEM_LLD),0)
-SYMLINK_SYSTEM_LIBRARIES += symlink_lld
+SYMLINK_SYSTEM_LIBRARIES += symlink_llvm_utils
 LLD_PATH := $(shell which lld$(EXE))
+DSYMUTIL_PATH := $(shell which dsymutil$(EXE))
 endif
 
-$(build_bindir)/lld$(EXE):
+$(build_depsbindir)/lld$(EXE):
 	[ -e "$(LLD_PATH)" ] && \
 	rm -f "$@" && \
-	ln -svf "$(LLD_PATH)" "$@"
+	ln -sf "$(LLD_PATH)" "$@"
+
+$(build_depsbindir)/dsymutil$(EXE):
+	[ -e "$(DSYMUTIL_PATH)" ] && \
+	rm -f "$@" && \
+	ln -sf "$(DSYMUTIL_PATH)" "$@"
 
 # the following excludes: libuv.a, libutf8proc.a
 
@@ -243,12 +239,12 @@ endif
 ifneq (,$(LIBGFORTRAN_VERSION))
 $(eval $(call symlink_system_library,CSL,libgfortran,$(LIBGFORTRAN_VERSION)))
 endif
-$(eval $(call symlink_system_library,CSL,libquadmath,0))
 $(eval $(call symlink_system_library,CSL,libstdc++,6))
-# We allow libssp, libatomic and libgomp to fail as they are not available on all systems
+# We allow libssp, libatomic, libgomp and libquadmath to fail as they are not available on all systems
 $(eval $(call symlink_system_library,CSL,libssp,0,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,CSL,libatomic,1,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,CSL,libgomp,1,ALLOW_FAILURE))
+$(eval $(call symlink_system_library,CSL,libquadmath,0,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,PCRE,libpcre2-8))
 $(eval $(call symlink_system_library,DSFMT,libdSFMT))
 $(eval $(call symlink_system_library,LIBBLASTRAMPOLINE,libblastrampoline))
@@ -258,9 +254,8 @@ $(eval $(call symlink_system_library,LAPACK,$(LIBLAPACKNAME)))
 endif
 $(eval $(call symlink_system_library,GMP,libgmp))
 $(eval $(call symlink_system_library,MPFR,libmpfr))
-$(eval $(call symlink_system_library,MBEDTLS,libmbedtls))
-$(eval $(call symlink_system_library,MBEDTLS,libmbedcrypto))
-$(eval $(call symlink_system_library,MBEDTLS,libmbedx509))
+$(eval $(call symlink_system_library,OPENSSL,libcrypto))
+$(eval $(call symlink_system_library,OPENSSL,libssl))
 $(eval $(call symlink_system_library,LIBSSH2,libssh2))
 $(eval $(call symlink_system_library,NGHTTP2,libnghttp2))
 $(eval $(call symlink_system_library,CURL,libcurl))
@@ -290,7 +285,7 @@ $(build_private_libdir)/libLLVM.$(SHLIB_EXT):
 	$(call resolve_path,REALPATH) && \
 	[ -e "$$REALPATH" ] && \
 	rm -f "$@" && \
-	echo ln -sf "$$REALPATH" "$@" && \
+	printf "%s\n" ln -sf "$$REALPATH" "$@" && \
 	ln -sf "$$REALPATH" "$@"
 ifneq ($(USE_SYSTEM_LLVM),0)
 ifneq ($(USE_LLVM_SHLIB),0)
@@ -312,4 +307,5 @@ clean:
 	-rm -f $(BUILDDIR)/file_constants.jl
 	-rm -f $(BUILDDIR)/version_git.jl
 	-rm -f $(BUILDDIR)/version_git.jl.phony
+	-rm -f $(BUILDDIR)/version_git_dirty
 	-rm -f $(build_private_libdir)/lib*.$(SHLIB_EXT)*
diff --git a/stdlib/REPL/src/Terminals.jl b/base/Terminals.jl
similarity index 86%
rename from stdlib/REPL/src/Terminals.jl
rename to base/Terminals.jl
index dac19406b3fc1..71a287573ffbf 100644
--- a/stdlib/REPL/src/Terminals.jl
+++ b/base/Terminals.jl
@@ -30,9 +30,7 @@ import Base:
     displaysize,
     flush,
     pipe_reader,
-    pipe_writer,
-    read,
-    readuntil
+    pipe_writer
 
 ## AbstractTerminal: abstract supertype of all terminals ##
 
@@ -99,6 +97,7 @@ abstract type UnixTerminal <: TextTerminal end
 pipe_reader(t::UnixTerminal) = t.in_stream::IO
 pipe_writer(t::UnixTerminal) = t.out_stream::IO
 
+@nospecialize
 mutable struct TerminalBuffer <: UnixTerminal
     out_stream::IO
 end
@@ -109,6 +108,7 @@ mutable struct TTYTerminal <: UnixTerminal
     out_stream::IO
     err_stream::IO
 end
+@specialize
 
 const CSI = "\x1b["
 
@@ -120,23 +120,28 @@ cmove_line_up(t::UnixTerminal, n) = (cmove_up(t, n); cmove_col(t, 1))
 cmove_line_down(t::UnixTerminal, n) = (cmove_down(t, n); cmove_col(t, 1))
 cmove_col(t::UnixTerminal, n) = (write(t.out_stream, '\r'); n > 1 && cmove_right(t, n-1))
 
-const is_precompiling = Ref(false)
 if Sys.iswindows()
     function raw!(t::TTYTerminal,raw::Bool)
-        is_precompiling[] && return true
-        check_open(t.in_stream)
         if Base.ispty(t.in_stream)
-            run((raw ? `stty raw -echo onlcr -ocrnl opost` : `stty sane`),
-                t.in_stream, t.out_stream, t.err_stream)
-            true
+            try
+                run((raw ? `stty raw -echo onlcr -ocrnl opost` : `stty sane`),
+                    t.in_stream, t.out_stream, t.err_stream)
+                true
+            catch ex
+                # Fall back to ccall if stty fails (e.g., in some CI environments)
+                if ex isa ProcessFailedException
+                    ccall(:jl_tty_set_mode, Int32, (Ptr{Cvoid},Int32), t.in_stream.handle::Ptr{Cvoid}, raw) == 0
+                else
+                    rethrow()
+                end
+            end
         else
-            ccall(:jl_tty_set_mode, Int32, (Ptr{Cvoid},Int32), t.in_stream.handle::Ptr{Cvoid}, raw) != -1
+            ccall(:jl_tty_set_mode, Int32, (Ptr{Cvoid},Int32), t.in_stream.handle::Ptr{Cvoid}, raw) == 0
         end
     end
 else
     function raw!(t::TTYTerminal, raw::Bool)
-        check_open(t.in_stream)
-        ccall(:jl_tty_set_mode, Int32, (Ptr{Cvoid},Int32), t.in_stream.handle::Ptr{Cvoid}, raw) != -1
+        ccall(:jl_tty_set_mode, Int32, (Ptr{Cvoid},Int32), t.in_stream.handle::Ptr{Cvoid}, raw) == 0
     end
 end
 
@@ -150,7 +155,7 @@ end
 @eval clear_line(t::UnixTerminal) = write(t.out_stream, $"\r$(CSI)0K")
 beep(t::UnixTerminal) = write(t.err_stream,"\x7")
 
-Base.displaysize(t::UnixTerminal) = displaysize(t.out_stream)
+Base.displaysize(t::UnixTerminal) = displaysize(t.out_stream)::Tuple{Int,Int}
 
 hascolor(t::TTYTerminal) = get(t.out_stream, :color, false)::Bool
 
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 1417987847ec4..a451bf56cce97 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -39,7 +39,11 @@ julia> size(A, 2)
 3
 ```
 """
-size(t::AbstractArray{T,N}, d) where {T,N} = d::Integer <= N ? size(t)[d] : 1
+function size(t::AbstractArray, dim)
+    d = Int(dim)::Int
+    s = size(t)
+    d <= length(s) ? s[d] : 1
+end
 
 """
     axes(A, d)
@@ -95,7 +99,7 @@ julia> axes(A)
 """
 function axes(A)
     @inline
-    map(oneto, size(A))
+    map(unchecked_oneto, size(A))
 end
 
 """
@@ -103,17 +107,20 @@ end
     has_offset_axes(A, B, ...)
 
 Return `true` if the indices of `A` start with something other than 1 along any axis.
-If multiple arguments are passed, equivalent to `has_offset_axes(A) | has_offset_axes(B) | ...`.
+If multiple arguments are passed, equivalent to `has_offset_axes(A) || has_offset_axes(B) || ...`.
 
 See also [`require_one_based_indexing`](@ref).
 """
+has_offset_axes() = false
 has_offset_axes(A) = _any_tuple(x->Int(first(x))::Int != 1, false, axes(A)...)
 has_offset_axes(A::AbstractVector) = Int(firstindex(A))::Int != 1 # improve performance of a common case (ranges)
-# Use `_any_tuple` to avoid unneeded invoke.
-# note: this could call `any` directly if the compiler can infer it
-has_offset_axes(As...) = _any_tuple(has_offset_axes, false, As...)
 has_offset_axes(::Colon) = false
 has_offset_axes(::Array) = false
+# note: this could call `any` directly if the compiler can infer it. We don't use _any_tuple
+# here because it stops full elision in some cases (#49332) and we don't need handling of
+# `missing` (has_offset_axes(A) always returns a Bool)
+has_offset_axes(A, As...) = has_offset_axes(A) || has_offset_axes(As...)
+
 
 """
     require_one_based_indexing(A::AbstractArray)
@@ -254,7 +261,7 @@ julia> Base.elsize(rand(Float32, 10))
 elsize(A::AbstractArray) = elsize(typeof(A))
 
 """
-    ndims(A::AbstractArray) -> Integer
+    ndims(A::AbstractArray)::Integer
 
 Return the number of dimensions of `A`.
 
@@ -268,12 +275,12 @@ julia> ndims(A)
 3
 ```
 """
-ndims(::AbstractArray{T,N}) where {T,N} = N
-ndims(::Type{<:AbstractArray{<:Any,N}}) where {N} = N
+ndims(::AbstractArray{T,N}) where {T,N} = N::Int
+ndims(::Type{<:AbstractArray{<:Any,N}}) where {N} = N::Int
 ndims(::Type{Union{}}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
 
 """
-    length(collection) -> Integer
+    length(collection)::Integer
 
 Return the number of elements in the collection.
 
@@ -309,7 +316,7 @@ julia> length([1 2; 3 4])
 4
 ```
 """
-length(t::AbstractArray) = (@inline; prod(size(t)))
+length(t::AbstractArray)
 
 # `eachindex` is mostly an optimization of `keys`
 eachindex(itrs...) = keys(itrs...)
@@ -318,11 +325,13 @@ eachindex(itrs...) = keys(itrs...)
 eachindex(A::AbstractVector) = (@inline(); axes1(A))
 
 
-@noinline function throw_eachindex_mismatch_indices(::IndexLinear, inds...)
-    throw(DimensionMismatch("all inputs to eachindex must have the same indices, got $(join(inds, ", ", " and "))"))
-end
-@noinline function throw_eachindex_mismatch_indices(::IndexCartesian, inds...)
-    throw(DimensionMismatch("all inputs to eachindex must have the same axes, got $(join(inds, ", ", " and "))"))
+# we unroll the join for easier inference
+_join_comma_and(indsA, indsB) = LazyString(indsA, " and ", indsB)
+_join_comma_and(indsA, indsB, indsC...) = LazyString(indsA, ", ", _join_comma_and(indsB, indsC...))
+@noinline function throw_eachindex_mismatch_indices(indices_str, indsA, indsBs...)
+    throw(DimensionMismatch(
+            LazyString("all inputs to eachindex must have the same ", indices_str, ", got ",
+                _join_comma_and(indsA, indsBs...))))
 end
 
 """
@@ -382,27 +391,24 @@ function eachindex(A::AbstractArray, B::AbstractArray...)
     @inline
     eachindex(IndexStyle(A,B...), A, B...)
 end
+eachindex(::IndexLinear, A::Union{Array, Memory}) = unchecked_oneto(length(A))
 eachindex(::IndexLinear, A::AbstractArray) = (@inline; oneto(length(A)))
 eachindex(::IndexLinear, A::AbstractVector) = (@inline; axes1(A))
 function eachindex(::IndexLinear, A::AbstractArray, B::AbstractArray...)
     @inline
     indsA = eachindex(IndexLinear(), A)
-    _all_match_first(X->eachindex(IndexLinear(), X), indsA, B...) ||
-        throw_eachindex_mismatch_indices(IndexLinear(), eachindex(A), eachindex.(B)...)
+    indsBs = map(X -> eachindex(IndexLinear(), X), B)
+    all(==(indsA), indsBs) ||
+        throw_eachindex_mismatch_indices("indices", indsA, indsBs...)
     indsA
 end
-function _all_match_first(f::F, inds, A, B...) where F<:Function
-    @inline
-    (inds == f(A)) & _all_match_first(f, inds, B...)
-end
-_all_match_first(f::F, inds) where F<:Function = true
 
 # keys with an IndexStyle
 keys(s::IndexStyle, A::AbstractArray, B::AbstractArray...) = eachindex(s, A, B...)
 
 """
-    lastindex(collection) -> Integer
-    lastindex(collection, d) -> Integer
+    lastindex(collection)::Integer
+    lastindex(collection, d)::Integer
 
 Return the last index of `collection`. If `d` is given, return the last index of `collection` along dimension `d`.
 
@@ -424,8 +430,8 @@ lastindex(a::AbstractArray) = (@inline; last(eachindex(IndexLinear(), a)))
 lastindex(a, d) = (@inline; last(axes(a, d)))
 
 """
-    firstindex(collection) -> Integer
-    firstindex(collection, d) -> Integer
+    firstindex(collection)::Integer
+    firstindex(collection, d)::Integer
 
 Return the first index of `collection`. If `d` is given, return the first index of `collection` along dimension `d`.
 
@@ -446,7 +452,7 @@ julia> firstindex(rand(3,4,5), 2)
 firstindex(a::AbstractArray) = (@inline; first(eachindex(IndexLinear(), a)))
 firstindex(a, d) = (@inline; first(axes(a, d)))
 
-first(a::AbstractArray) = a[first(eachindex(a))]
+@propagate_inbounds first(a::AbstractArray) = a[first(eachindex(a))]
 
 """
     first(coll)
@@ -499,7 +505,7 @@ Bool[]
 first(itr, n::Integer) = collect(Iterators.take(itr, n))
 # Faster method for vectors
 function first(v::AbstractVector, n::Integer)
-    n < 0 && throw(ArgumentError("Number of elements must be nonnegative"))
+    n < 0 && throw(ArgumentError("Number of elements must be non-negative"))
     v[range(begin, length=min(n, checked_length(v)))]
 end
 
@@ -549,7 +555,7 @@ Float64[]
 last(itr, n::Integer) = reverse!(collect(Iterators.take(Iterators.reverse(itr), n)))
 # Faster method for arrays
 function last(v::AbstractVector, n::Integer)
-    n < 0 && throw(ArgumentError("Number of elements must be nonnegative"))
+    n < 0 && throw(ArgumentError("Number of elements must be non-negative"))
     v[range(stop=lastindex(v), length=min(n, checked_length(v)))]
 end
 
@@ -678,15 +684,12 @@ function checkbounds(::Type{Bool}, A::AbstractArray, I...)
     checkbounds_indices(Bool, axes(A), I)
 end
 
-# Linear indexing is explicitly allowed when there is only one (non-cartesian) index
+# Linear indexing is explicitly allowed when there is only one (non-cartesian) index;
+# indices that do not allow linear indexing (e.g., logical arrays, cartesian indices, etc)
+# must add specialized methods to implement their restrictions
 function checkbounds(::Type{Bool}, A::AbstractArray, i)
     @inline
-    checkindex(Bool, eachindex(IndexLinear(), A), i)
-end
-# As a special extension, allow using logical arrays that match the source array exactly
-function checkbounds(::Type{Bool}, A::AbstractArray{<:Any,N}, I::AbstractArray{Bool,N}) where N
-    @inline
-    axes(A) == axes(I)
+    return checkindex(Bool, eachindex(IndexLinear(), A), i)
 end
 
 """
@@ -720,18 +723,13 @@ of `IA`.
 
 See also [`checkbounds`](@ref).
 """
-function checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple)
-    @inline
-    checkindex(Bool, IA[1], I[1])::Bool & checkbounds_indices(Bool, tail(IA), tail(I))
-end
-function checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple)
+function checkbounds_indices(::Type{Bool}, inds::Tuple, I::Tuple{Any, Vararg})
     @inline
-    checkindex(Bool, OneTo(1), I[1])::Bool & checkbounds_indices(Bool, (), tail(I))
+    return checkindex(Bool, get(inds, 1, OneTo(1)), I[1])::Bool &
+        checkbounds_indices(Bool, safe_tail(inds), tail(I))
 end
-checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@inline; all(x->length(x)==1, IA))
-checkbounds_indices(::Type{Bool}, ::Tuple{}, ::Tuple{}) = true
 
-throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
+checkbounds_indices(::Type{Bool}, inds::Tuple, ::Tuple{}) = (@inline; all(x->length(x)==1, inds))
 
 # check along a single dimension
 """
@@ -753,20 +751,19 @@ julia> checkindex(Bool, 1:20, 21)
 false
 ```
 """
-checkindex(::Type{Bool}, inds::AbstractUnitRange, i) =
-    throw(ArgumentError("unable to check bounds for indices of type $(typeof(i))"))
+checkindex(::Type{Bool}, inds, i) = throw(ArgumentError(LazyString("unable to check bounds for indices of type ", typeof(i))))
 checkindex(::Type{Bool}, inds::AbstractUnitRange, i::Real) = (first(inds) <= i) & (i <= last(inds))
 checkindex(::Type{Bool}, inds::IdentityUnitRange, i::Real) = checkindex(Bool, inds.indices, i)
 checkindex(::Type{Bool}, inds::OneTo{T}, i::T) where {T<:BitInteger} = unsigned(i - one(i)) < unsigned(last(inds))
 checkindex(::Type{Bool}, inds::AbstractUnitRange, ::Colon) = true
 checkindex(::Type{Bool}, inds::AbstractUnitRange, ::Slice) = true
-function checkindex(::Type{Bool}, inds::AbstractUnitRange, r::AbstractRange)
-    @_propagate_inbounds_meta
-    isempty(r) | (checkindex(Bool, inds, first(r)) & checkindex(Bool, inds, last(r)))
-end
-checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractVector{Bool}) = indx == axes1(I)
-checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractArray{Bool}) = false
-function checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractArray)
+checkindex(::Type{Bool}, inds::AbstractUnitRange, i::AbstractRange) =
+    isempty(i) | (checkindex(Bool, inds, first(i)) & checkindex(Bool, inds, last(i)))
+# range like indices with cheap `extrema`
+checkindex(::Type{Bool}, inds::AbstractUnitRange, i::LinearIndices) =
+    isempty(i) | (checkindex(Bool, inds, first(i)) & checkindex(Bool, inds, last(i)))
+
+function checkindex(::Type{Bool}, inds, I::AbstractArray)
     @inline
     b = true
     for i in I
@@ -804,7 +801,7 @@ julia> similar(1:10, 1, 4)
 Conversely, `similar(trues(10,10), 2)` returns an uninitialized `BitVector` with two
 elements since `BitArray`s are both mutable and can support 1-dimensional arrays:
 
-```julia-repl
+```jldoctest; filter = r"[01]"
 julia> similar(trues(10,10), 2)
 2-element BitVector:
  0
@@ -824,17 +821,19 @@ julia> similar(falses(10), Float64, 2, 4)
 See also: [`undef`](@ref), [`isassigned`](@ref).
 """
 similar(a::AbstractArray{T}) where {T}                             = similar(a, T)
-similar(a::AbstractArray, ::Type{T}) where {T}                     = similar(a, T, to_shape(axes(a)))
-similar(a::AbstractArray{T}, dims::Tuple) where {T}                = similar(a, T, to_shape(dims))
-similar(a::AbstractArray{T}, dims::DimOrInd...) where {T}          = similar(a, T, to_shape(dims))
-similar(a::AbstractArray, ::Type{T}, dims::DimOrInd...) where {T}  = similar(a, T, to_shape(dims))
+similar(a::AbstractArray, ::Type{T}) where {T}                     = similar(a, T, axes(a))
+similar(a::AbstractArray{T}, dims::Tuple) where {T}                = similar(a, T, dims)
+similar(a::AbstractArray{T}, dims::DimOrInd...) where {T}          = similar(a, T, dims)
+similar(a::AbstractArray, ::Type{T}, dims::DimOrInd...) where {T}  = similar(a, T, dims)
 # Similar supports specifying dims as either Integers or AbstractUnitRanges or any mixed combination
 # thereof. Ideally, we'd just convert Integers to OneTos and then call a canonical method with the axes,
 # but we don't want to require all AbstractArray subtypes to dispatch on Base.OneTo. So instead we
 # define this method to convert supported axes to Ints, with the expectation that an offset array
 # package will define a method with dims::Tuple{Union{Integer, UnitRange}, Vararg{Union{Integer, UnitRange}}}
+similar(a::AbstractArray, ::Type{T}, dims::Tuple{Union{Integer, AbstractOneTo}, Vararg{Union{Integer, AbstractOneTo}}}) where {T} = similar(a, T, to_shape(dims))
+# legacy method for packages that specialize similar(A::AbstractArray, ::Type{T}, dims::Tuple{Union{Integer, OneTo, CustomAxis}, Vararg{Union{Integer, OneTo, CustomAxis}}}
+# leaving this method in ensures that Base owns the more specific method
 similar(a::AbstractArray, ::Type{T}, dims::Tuple{Union{Integer, OneTo}, Vararg{Union{Integer, OneTo}}}) where {T} = similar(a, T, to_shape(dims))
-similar(a::AbstractArray, ::Type{T}, dims::Tuple{Integer, Vararg{Integer}}) where {T} = similar(a, T, to_shape(dims))
 # similar creates an Array by default
 similar(a::AbstractArray, ::Type{T}, dims::Dims{N}) where {T,N}    = Array{T,N}(undef, dims)
 
@@ -844,7 +843,9 @@ to_shape(dims::DimsOrInds) = map(to_shape, dims)::DimsOrInds
 # each dimension
 to_shape(i::Int) = i
 to_shape(i::Integer) = Int(i)
-to_shape(r::OneTo) = Int(last(r))
+to_shape(r::AbstractOneTo) = _to_shape(last(r))
+_to_shape(x::Integer) = to_shape(x)
+_to_shape(x) = Int(x)
 to_shape(r::AbstractUnitRange) = r
 
 """
@@ -870,6 +871,8 @@ would create a 1-dimensional logical array whose indices match those
 of the columns of `A`.
 """
 similar(::Type{T}, dims::DimOrInd...) where {T<:AbstractArray} = similar(T, dims)
+similar(::Type{T}, shape::Tuple{Union{Integer, AbstractOneTo}, Vararg{Union{Integer, AbstractOneTo}}}) where {T<:AbstractArray} = similar(T, to_shape(shape))
+# legacy method for packages that specialize similar(::Type{T}, dims::Tuple{Union{Integer, OneTo, CustomAxis}, Vararg{Union{Integer, OneTo, CustomAxis}})
 similar(::Type{T}, shape::Tuple{Union{Integer, OneTo}, Vararg{Union{Integer, OneTo}}}) where {T<:AbstractArray} = similar(T, to_shape(shape))
 similar(::Type{T}, dims::Dims) where {T<:AbstractArray} = T(undef, dims)
 
@@ -890,7 +893,7 @@ julia> empty([1.0, 2.0, 3.0], String)
 String[]
 ```
 """
-empty(a::AbstractVector{T}, ::Type{U}=T) where {T,U} = Vector{U}()
+empty(a::AbstractVector{T}, ::Type{U}=T) where {T,U} = similar(a, U, 0)
 
 # like empty, but should return a mutable collection, a Vector by default
 emptymutable(a::AbstractVector{T}, ::Type{U}=T) where {T,U} = Vector{U}()
@@ -902,11 +905,18 @@ emptymutable(itr, ::Type{U}) where {U} = Vector{U}()
 In-place [`copy`](@ref) of `src` into `dst`, discarding any pre-existing
 elements in `dst`.
 If `dst` and `src` are of the same type, `dst == src` should hold after
-the call. If `dst` and `src` are multidimensional arrays, they must have
+the call. If `dst` and `src` are vector types, they must have equal
+offset. If `dst` and `src` are multidimensional arrays, they must have
 equal [`axes`](@ref).
 
+$(_DOCS_ALIASING_WARNING)
+
 See also [`copyto!`](@ref).
 
+!!! note
+    When operating on vector types, if `dst` and `src` are not of the
+    same length, `dst` is resized to `length(src)` prior to the `copy`.
+
 !!! compat "Julia 1.1"
     This method requires at least Julia 1.1. In Julia 1.0 this method
     is available from the `Future` standard library as `Future.copy!`.
@@ -961,7 +971,7 @@ function copyto!(dest::AbstractArray, dstart::Integer, src)
     return dest
 end
 
-# copy from an some iterable object into an AbstractArray
+# copy from an iterable object into an AbstractArray
 function copyto!(dest::AbstractArray, dstart::Integer, src, sstart::Integer)
     if (sstart < 1)
         throw(ArgumentError(LazyString("source start offset (",sstart,") is < 1")))
@@ -993,7 +1003,7 @@ end
 # this method must be separate from the above since src might not have a length
 function copyto!(dest::AbstractArray, dstart::Integer, src, sstart::Integer, n::Integer)
     n < 0 && throw(ArgumentError(LazyString("tried to copy n=",n,
-        ", elements, but n should be nonnegative")))
+        ", elements, but n should be non-negative")))
     n == 0 && return dest
     dmax = dstart + n - 1
     inds = LinearIndices(dest)
@@ -1011,14 +1021,19 @@ function copyto!(dest::AbstractArray, dstart::Integer, src, sstart::Integer, n::
         end
         y = iterate(src, y[2])
     end
+    if y === nothing
+        throw(ArgumentError(LazyString(
+            "source has fewer elements than required, ",
+            "expected at least ",sstart," got ", sstart-1)))
+    end
+    val, st = y
     i = Int(dstart)
-    while i <= dmax && y !== nothing
-        val, st = y
-        @inbounds dest[i] = val
-        y = iterate(src, st)
+    @inbounds dest[i] = val
+    for val in Iterators.take(Iterators.rest(src, st), n-1)
         i += 1
+        @inbounds dest[i] = val
     end
-    i <= dmax && throw(BoundsError(dest, i))
+    i < dmax && throw(BoundsError(dest, i))
     return dest
 end
 
@@ -1034,6 +1049,8 @@ the other elements are left untouched.
 
 See also [`copy!`](@ref Base.copy!), [`copy`](@ref).
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> x = [1., 0., 3., 0., 5.];
@@ -1098,11 +1115,8 @@ function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle:
             end
         else
             # Dual-iterator implementation
-            ret = iterate(iterdest)
-            @inbounds for a in src
-                idx, state = ret::NTuple{2,Any}
-                dest[idx] = a
-                ret = iterate(iterdest, state)
+            for (Idest, Isrc) in zip(iterdest, itersrc)
+                @inbounds dest[Idest] = src[Isrc]
             end
         end
     end
@@ -1120,11 +1134,11 @@ function copyto!(dest::AbstractArray, dstart::Integer, src::AbstractArray, sstar
 end
 
 function copyto!(dest::AbstractArray, dstart::Integer,
-               src::AbstractArray, sstart::Integer,
-               n::Integer)
+                 src::AbstractArray, sstart::Integer,
+                 n::Integer)
     n == 0 && return dest
     n < 0 && throw(ArgumentError(LazyString("tried to copy n=",
-        n," elements, but n should be nonnegative")))
+        n," elements, but n should be non-negative")))
     destinds, srcinds = LinearIndices(dest), LinearIndices(src)
     (checkbounds(Bool, destinds, dstart) && checkbounds(Bool, destinds, dstart+n-1)) || throw(BoundsError(dest, dstart:dstart+n-1))
     (checkbounds(Bool, srcinds, sstart)  && checkbounds(Bool, srcinds, sstart+n-1))  || throw(BoundsError(src,  sstart:sstart+n-1))
@@ -1198,7 +1212,26 @@ function copymutable(a::AbstractArray)
 end
 copymutable(itr) = collect(itr)
 
-zero(x::AbstractArray{T}) where {T} = fill!(similar(x, typeof(zero(T))), zero(T))
+zero(x::AbstractArray{T}) where {T<:Number} = fill!(similar(x, typeof(zero(T))), zero(T))
+zero(x::AbstractArray{S}) where {S<:Union{Missing, Number}} = fill!(similar(x, typeof(zero(S))), zero(S))
+zero(x::AbstractArray) = map(zero, x)
+
+function _one(unit::T, mat::AbstractMatrix) where {T}
+    (rows, cols) = axes(mat)
+    (length(rows) == length(cols)) ||
+      throw(DimensionMismatch("multiplicative identity defined only for square matrices"))
+    zer = zero(unit)::T
+    require_one_based_indexing(mat)
+    I = similar(mat, T)
+    fill!(I, zer)
+    for i ∈ rows
+        I[i, i] = unit
+    end
+    I
+end
+
+one(x::AbstractMatrix{T}) where {T} = _one(one(T), x)
+oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x)
 
 ## iteration support for arrays by iterating over `eachindex` in the array ##
 # Allows fast iteration by default for both IndexLinear and IndexCartesian arrays
@@ -1206,11 +1239,19 @@ zero(x::AbstractArray{T}) where {T} = fill!(similar(x, typeof(zero(T))), zero(T)
 # While the definitions for IndexLinear are all simple enough to inline on their
 # own, IndexCartesian's CartesianIndices is more complicated and requires explicit
 # inlining.
-function iterate(A::AbstractArray, state=(eachindex(A),))
+iterate_starting_state(A) = iterate_starting_state(A, IndexStyle(A))
+iterate_starting_state(A, ::IndexLinear) = firstindex(A)
+iterate_starting_state(A, ::IndexStyle) = (eachindex(A),)
+@inline iterate(A::AbstractArray, state = iterate_starting_state(A)) = _iterate_abstractarray(A, state)
+@inline function _iterate_abstractarray(A::AbstractArray, state::Tuple)
     y = iterate(state...)
     y === nothing && return nothing
     A[y[1]], (state[1], tail(y)...)
 end
+@inline function _iterate_abstractarray(A::AbstractArray, state::Integer)
+    checkbounds(Bool, A, state) || return nothing
+    A[state], state + one(state)
+end
 
 isempty(a::AbstractArray) = (length(a) == 0)
 
@@ -1229,10 +1270,10 @@ end
 # note: the following type definitions don't mean any AbstractArray is convertible to
 # a data Ref. they just map the array element type to the pointer type for
 # convenience in cases that work.
-pointer(x::AbstractArray{T}) where {T} = unsafe_convert(Ptr{T}, x)
+pointer(x::AbstractArray{T}) where {T} = unsafe_convert(Ptr{T}, cconvert(Ptr{T}, x))
 function pointer(x::AbstractArray{T}, i::Integer) where T
     @inline
-    unsafe_convert(Ptr{T}, x) + Int(_memory_offset(x, i))::Int
+    pointer(x) + Int(_memory_offset(x, i))::Int
 end
 
 # The distance from pointer(x) to the element at x[I...] in bytes
@@ -1242,6 +1283,10 @@ function _memory_offset(x::AbstractArray, I::Vararg{Any,N}) where {N}
     return sum(map((i, s, o)->s*(i-o), J, strides(x), Tuple(first(CartesianIndices(x)))))*elsize(x)
 end
 
+## Special constprop heuristics for getindex/setindex
+typename(typeof(function getindex end)).constprop_heuristic = Core.ARRAY_INDEX_HEURISTIC
+typename(typeof(function setindex! end)).constprop_heuristic = Core.ARRAY_INDEX_HEURISTIC
+
 ## Approach:
 # We only define one fallback method on getindex for all argument types.
 # That dispatches to an (inlined) internal _getindex function, where the goal is
@@ -1254,8 +1299,16 @@ end
 """
     getindex(A, inds...)
 
-Return a subset of array `A` as specified by `inds`, where each `ind` may be,
-for example, an `Int`, an [`AbstractRange`](@ref), or a [`Vector`](@ref).
+Return a subset of array `A` as selected by the indices `inds`.
+
+Each index may be any [supported index type](@ref man-supported-index-types), such
+as an [`Integer`](@ref), [`CartesianIndex`](@ref), [range](@ref Base.AbstractRange), or [array](@ref man-multi-dim-arrays) of supported indices.
+A [:](@ref Base.Colon) may be used to select all elements along a specific dimension, and a boolean array (e.g. an `Array{Bool}` or a [`BitArray`](@ref)) may be used to filter for elements where the corresponding index is `true`.
+
+When `inds` selects multiple elements, this function returns a newly
+allocated array. To index multiple elements without making a copy,
+use [`view`](@ref) instead.
+
 See the manual section on [array indexing](@ref man-array-indexing) for details.
 
 # Examples
@@ -1278,6 +1331,27 @@ julia> getindex(A, 2:4)
  3
  2
  4
+
+julia> getindex(A, 2, 1)
+3
+
+julia> getindex(A, CartesianIndex(2, 1))
+3
+
+julia> getindex(A, :, 2)
+2-element Vector{Int64}:
+ 2
+ 4
+
+julia> getindex(A, 2, :)
+2-element Vector{Int64}:
+ 3
+ 4
+
+julia> getindex(A, A .> 2)
+2-element Vector{Int64}:
+ 3
+ 4
 ```
 """
 function getindex(A::AbstractArray, I...)
@@ -1288,11 +1362,7 @@ end
 # To avoid invalidations from multidimensional.jl: getindex(A::Array, i1::Union{Integer, CartesianIndex}, I::Union{Integer, CartesianIndex}...)
 @propagate_inbounds getindex(A::Array, i1::Integer, I::Integer...) = A[to_indices(A, (i1, I...))...]
 
-function unsafe_getindex(A::AbstractArray, I...)
-    @inline
-    @inbounds r = getindex(A, I...)
-    r
-end
+@inline unsafe_getindex(A::AbstractArray, I...) = @inbounds getindex(A, I...)
 
 struct CanonicalIndexError <: Exception
     func::String
@@ -1369,6 +1439,8 @@ _unsafe_ind2sub(sz, i) = (@inline; _ind2sub(sz, i))
 Store values from array `X` within some subset of `A` as specified by `inds`.
 The syntax `A[inds...] = X` is equivalent to `(setindex!(A, X, inds...); X)`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = zeros(2,2);
@@ -1425,6 +1497,8 @@ function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) whe
     r
 end
 
+_unsetindex!(A::AbstractArray, i::Integer) = _unsetindex!(A, to_index(i))
+
 """
     parent(A)
 
@@ -1490,12 +1564,14 @@ much more common case where aliasing does not occur. By default,
 unaliascopy(A::Array) = copy(A)
 unaliascopy(A::AbstractArray)::typeof(A) = (@noinline; _unaliascopy(A, copy(A)))
 _unaliascopy(A::T, C::T) where {T} = C
-_unaliascopy(A, C) = throw(ArgumentError("""
-    an array of type `$(typename(typeof(A)).wrapper)` shares memory with another argument
-    and must make a preventative copy of itself in order to maintain consistent semantics,
-    but `copy(::$(typeof(A)))` returns a new array of type `$(typeof(C))`.
-    To fix, implement:
-        `Base.unaliascopy(A::$(typename(typeof(A)).wrapper))::typeof(A)`"""))
+function _unaliascopy(A, C)
+    Aw = typename(typeof(A)).wrapper
+    throw(ArgumentError(LazyString("an array of type `", Aw, "` shares memory with another argument ",
+    "and must make a preventative copy of itself in order to maintain consistent semantics, ",
+    "but `copy(::", typeof(A), ")` returns a new array of type `", typeof(C), "`.\n",
+    """To fix, implement:
+        `Base.unaliascopy(A::""", Aw, ")::typeof(A)`")))
+end
 unaliascopy(A) = A
 
 """
@@ -1506,7 +1582,7 @@ Perform a conservative test to check if arrays `A` and `B` might share the same
 By default, this simply checks if either of the arrays reference the same memory
 regions, as identified by their [`Base.dataids`](@ref).
 """
-mightalias(A::AbstractArray, B::AbstractArray) = !isbits(A) && !isbits(B) && !_isdisjoint(dataids(A), dataids(B))
+mightalias(A::AbstractArray, B::AbstractArray) = !isbits(A) && !isbits(B) && !isempty(A) && !isempty(B) && !_isdisjoint(dataids(A), dataids(B))
 mightalias(x, y) = false
 
 _isdisjoint(as::Tuple{}, bs::Tuple{}) = true
@@ -1529,8 +1605,9 @@ parts can specialize this method to return the concatenation of the `dataids` of
 their component parts.  A typical definition for an array that wraps a parent is
 `Base.dataids(C::CustomArray) = dataids(C.parent)`.
 """
-dataids(A::AbstractArray) = (UInt(objectid(A)),)
-dataids(A::Array) = (UInt(pointer(A)),)
+dataids(A::AbstractArray) = (objectid(A),)
+dataids(A::Memory) = (UInt(A.ptr),)
+dataids(A::Array) = dataids(A.ref.mem)
 dataids(::AbstractRange) = ()
 dataids(x) = ()
 
@@ -1584,11 +1661,19 @@ replace_in_print_matrix(A::AbstractVector,i::Integer,j::Integer,s::AbstractStrin
 eltypeof(x) = typeof(x)
 eltypeof(x::AbstractArray) = eltype(x)
 
-promote_eltypeof() = Bottom
-promote_eltypeof(v1, vs...) = promote_type(eltypeof(v1), promote_eltypeof(vs...))
+promote_eltypeof() = error()
+promote_eltypeof(v1) = eltypeof(v1)
+promote_eltypeof(v1, v2) = promote_type(eltypeof(v1), eltypeof(v2))
+promote_eltypeof(v1, v2, vs...) = (@inline; afoldl(((::Type{T}, y) where {T}) -> promote_type(T, eltypeof(y)), promote_eltypeof(v1, v2), vs...))
+promote_eltypeof(v1::T, vs::T...) where {T} = eltypeof(v1)
+promote_eltypeof(v1::AbstractArray{T}, vs::AbstractArray{T}...) where {T} = T
 
-promote_eltype() = Bottom
-promote_eltype(v1, vs...) = promote_type(eltype(v1), promote_eltype(vs...))
+promote_eltype() = error()
+promote_eltype(v1) = eltype(v1)
+promote_eltype(v1, v2) = promote_type(eltype(v1), eltype(v2))
+promote_eltype(v1, v2, vs...) = (@inline; afoldl(((::Type{T}, y) where {T}) -> promote_type(T, eltype(y)), promote_eltype(v1, v2), vs...))
+promote_eltype(v1::T, vs::T...) where {T} = eltype(T)
+promote_eltype(v1::AbstractArray{T}, vs::AbstractArray{T}...) where {T} = T
 
 #TODO: ERROR CHECK
 _cat(catdim::Int) = Vector{Any}()
@@ -1597,10 +1682,10 @@ typed_vcat(::Type{T}) where {T} = Vector{T}()
 typed_hcat(::Type{T}) where {T} = Vector{T}()
 
 ## cat: special cases
-vcat(X::T...) where {T}         = T[ X[i] for i=1:length(X) ]
-vcat(X::T...) where {T<:Number} = T[ X[i] for i=1:length(X) ]
-hcat(X::T...) where {T}         = T[ X[j] for i=1:1, j=1:length(X) ]
-hcat(X::T...) where {T<:Number} = T[ X[j] for i=1:1, j=1:length(X) ]
+vcat(X::T...) where {T}         = T[ X[i] for i=eachindex(X) ]
+vcat(X::T...) where {T<:Number} = T[ X[i] for i=eachindex(X) ]
+hcat(X::T...) where {T}         = T[ X[j] for _=1:1, j=eachindex(X) ]
+hcat(X::T...) where {T<:Number} = T[ X[j] for _=1:1, j=eachindex(X) ]
 
 vcat(X::Number...) = hvcat_fill!(Vector{promote_typeof(X...)}(undef, length(X)), X)
 hcat(X::Number...) = hvcat_fill!(Matrix{promote_typeof(X...)}(undef, 1,length(X)), X)
@@ -1651,7 +1736,7 @@ function _typed_hcat(::Type{T}, A::AbstractVecOrTuple{AbstractVecOrMat}) where T
     for j = 1:nargs
         Aj = A[j]
         if size(Aj, 1) != nrows
-            throw(ArgumentError("number of rows of each array must match (got $(map(x->size(x,1), A)))"))
+            throw(DimensionMismatch("number of rows of each array must match (got $(map(x->size(x,1), A)))"))
         end
         dense &= isa(Aj,Array)
         nd = ndims(Aj)
@@ -1686,7 +1771,7 @@ function _typed_vcat(::Type{T}, A::AbstractVecOrTuple{AbstractVecOrMat}) where T
     ncols = size(A[1], 2)
     for j = 2:nargs
         if size(A[j], 2) != ncols
-            throw(ArgumentError("number of columns of each array must match (got $(map(x->size(x,2), A)))"))
+            throw(DimensionMismatch("number of columns of each array must match (got $(map(x->size(x,2), A)))"))
         end
     end
     B = similar(A[1], T, nrows, ncols)
@@ -1742,7 +1827,7 @@ function cat_shape(dims, shapes::Tuple)
     return out_shape
 end
 # The new way to compute the shape (more inferable than combining cat_size & cat_shape, due to Varargs + issue#36454)
-cat_size_shape(dims) = ntuple(zero, Val(length(dims)))
+cat_size_shape(dims) = ntuple(Returns(0), Val(length(dims)))
 @inline cat_size_shape(dims, X, tail...) = _cat_size_shape(dims, _cshp(1, dims, (), cat_size(X)), tail...)
 _cat_size_shape(dims, shape) = shape
 @inline _cat_size_shape(dims, shape, X, tail...) = _cat_size_shape(dims, _cshp(1, dims, shape, cat_size(X)), tail...)
@@ -1796,7 +1881,7 @@ end
 @inline cat_t(::Type{T}, X...; dims) where {T} = _cat_t(dims, T, X...)
 
 # Why isn't this called `__cat!`?
-__cat(A, shape, catdims, X...) = __cat_offset!(A, shape, catdims, ntuple(zero, length(shape)), X...)
+__cat(A, shape, catdims, X...) = __cat_offset!(A, shape, catdims, ntuple(Returns(0), length(shape)), X...)
 
 function __cat_offset!(A, shape, catdims, offsets, x, X...)
     # splitting the "work" on x from X... may reduce latency (fewer costly specializations)
@@ -1809,17 +1894,16 @@ function __cat_offset1!(A, shape, catdims, offsets, x)
     inds = ntuple(length(offsets)) do i
         (i <= length(catdims) && catdims[i]) ? offsets[i] .+ cat_indices(x, i) : 1:shape[i]
     end
-    if x isa AbstractArray
-        A[inds...] = x
-    else
-        fill!(view(A, inds...), x)
-    end
+    _copy_or_fill!(A, inds, x)
     newoffsets = ntuple(length(offsets)) do i
         (i <= length(catdims) && catdims[i]) ? offsets[i] + cat_size(x, i) : offsets[i]
     end
     return newoffsets
 end
 
+_copy_or_fill!(A, inds, x) = fill!(view(A, inds...), x)
+_copy_or_fill!(A, inds, x::AbstractArray) = (A[inds...] = x)
+
 """
     vcat(A...)
 
@@ -1856,7 +1940,7 @@ julia> vcat(range(1, 2, length=3))  # collects lazy ranges
  2.0
 
 julia> two = ([10, 20, 30]', Float64[4 5 6; 7 8 9])  # row vector and a matrix
-([10 20 30], [4.0 5.0 6.0; 7.0 8.0 9.0])
+(adjoint([10, 20, 30]), [4.0 5.0 6.0; 7.0 8.0 9.0])
 
 julia> vcat(two...)
 3×3 Matrix{Float64}:
@@ -1959,24 +2043,91 @@ The keyword also accepts `Val(dims)`.
     For multiple dimensions `dims = Val(::Tuple)` was added in Julia 1.8.
 
 # Examples
+
+Concatenate two arrays in different dimensions:
+```jldoctest
+julia> a = [1 2 3]
+1×3 Matrix{Int64}:
+ 1  2  3
+
+julia> b = [4 5 6]
+1×3 Matrix{Int64}:
+ 4  5  6
+
+julia> cat(a, b; dims=1)
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+
+julia> cat(a, b; dims=2)
+1×6 Matrix{Int64}:
+ 1  2  3  4  5  6
+
+julia> cat(a, b; dims=(1, 2))
+2×6 Matrix{Int64}:
+ 1  2  3  0  0  0
+ 0  0  0  4  5  6
+```
+
+# Extended Help
+
+Concatenate 3D arrays:
+```jldoctest
+julia> a = ones(2, 2, 3);
+
+julia> b = ones(2, 2, 4);
+
+julia> c = cat(a, b; dims=3);
+
+julia> size(c) == (2, 2, 7)
+true
+```
+
+Concatenate arrays of different sizes:
 ```jldoctest
 julia> cat([1 2; 3 4], [pi, pi], fill(10, 2,3,1); dims=2)  # same as hcat
 2×6×1 Array{Float64, 3}:
 [:, :, 1] =
  1.0  2.0  3.14159  10.0  10.0  10.0
  3.0  4.0  3.14159  10.0  10.0  10.0
+```
 
+Construct a block diagonal matrix:
+```
 julia> cat(true, trues(2,2), trues(4)', dims=(1,2))  # block-diagonal
 4×7 Matrix{Bool}:
  1  0  0  0  0  0  0
  0  1  1  0  0  0  0
  0  1  1  0  0  0  0
  0  0  0  1  1  1  1
+```
 
+```
 julia> cat(1, [2], [3;;]; dims=Val(2))
 1×3 Matrix{Int64}:
  1  2  3
 ```
+
+!!! note
+    `cat` does not join two strings, you may want to use `*`.
+
+```jldoctest
+julia> a = "aaa";
+
+julia> b = "bbb";
+
+julia> cat(a, b; dims=1)
+2-element Vector{String}:
+ "aaa"
+ "bbb"
+
+julia> cat(a, b; dims=2)
+1×2 Matrix{String}:
+ "aaa"  "bbb"
+
+julia> a * b
+"aaabbb"
+```
 """
 @inline cat(A...; dims) = _cat(dims, A...)
 # `@constprop :aggressive` allows `catdims` to be propagated as constant improving return type inference
@@ -1984,16 +2135,14 @@ julia> cat(1, [2], [3;;]; dims=Val(2))
 
 # The specializations for 1 and 2 inputs are important
 # especially when running with --inline=no, see #11158
-# The specializations for Union{AbstractVecOrMat,Number} are necessary
-# to have more specialized methods here than in LinearAlgebra/uniformscaling.jl
 vcat(A::AbstractArray) = cat(A; dims=Val(1))
 vcat(A::AbstractArray, B::AbstractArray) = cat(A, B; dims=Val(1))
 vcat(A::AbstractArray...) = cat(A...; dims=Val(1))
-vcat(A::Union{AbstractVecOrMat,Number}...) = cat(A...; dims=Val(1))
+vcat(A::Union{AbstractArray,Number}...) = cat(A...; dims=Val(1))
 hcat(A::AbstractArray) = cat(A; dims=Val(2))
 hcat(A::AbstractArray, B::AbstractArray) = cat(A, B; dims=Val(2))
 hcat(A::AbstractArray...) = cat(A...; dims=Val(2))
-hcat(A::Union{AbstractVecOrMat,Number}...) = cat(A...; dims=Val(2))
+hcat(A::Union{AbstractArray,Number}...) = cat(A...; dims=Val(2))
 
 typed_vcat(T::Type, A::AbstractArray) = _cat_t(Val(1), T, A)
 typed_vcat(T::Type, A::AbstractArray, B::AbstractArray) = _cat_t(Val(1), T, A, B)
@@ -2055,51 +2204,11 @@ julia> hvcat((2,2,2), a,b,c,d,e,f) == hvcat(2, a,b,c,d,e,f)
 true
 ```
 """
-hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractVecOrMat...) = typed_hvcat(promote_eltype(xs...), rows, xs...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractVecOrMat{T}...) where {T} = typed_hvcat(T, rows, xs...)
-
-function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as::AbstractVecOrMat...) where T
-    nbr = length(rows)  # number of block rows
-
-    nc = 0
-    for i=1:rows[1]
-        nc += size(as[i],2)
-    end
+hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractArray...) = typed_hvcat(promote_eltype(xs...), rows, xs...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractArray{T}...) where {T} = typed_hvcat(T, rows, xs...)
 
-    nr = 0
-    a = 1
-    for i = 1:nbr
-        nr += size(as[a],1)
-        a += rows[i]
-    end
-
-    out = similar(as[1], T, nr, nc)
-
-    a = 1
-    r = 1
-    for i = 1:nbr
-        c = 1
-        szi = size(as[a],1)
-        for j = 1:rows[i]
-            Aj = as[a+j-1]
-            szj = size(Aj,2)
-            if size(Aj,1) != szi
-                throw(ArgumentError("mismatched height in block row $(i) (expected $szi, got $(size(Aj,1)))"))
-            end
-            if c-1+szj > nc
-                throw(ArgumentError("block row $(i) has mismatched number of columns (expected $nc, got $(c-1+szj))"))
-            end
-            out[r:r-1+szi, c:c-1+szj] = Aj
-            c += szj
-        end
-        if c != nc+1
-            throw(ArgumentError("block row $(i) has mismatched number of columns (expected $nc, got $(c-1))"))
-        end
-        r += szi
-        a += rows[i]
-    end
-    out
-end
+rows_to_dimshape(rows::Tuple{Vararg{Int}}) = all(==(rows[1]), rows) ? (length(rows), rows[1]) : (rows, (sum(rows),))
+typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as::AbstractVecOrMat...) where T = typed_hvncat(T, rows_to_dimshape(rows), true, as...)
 
 hvcat(rows::Tuple{Vararg{Int}}) = []
 typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}) where {T} = Vector{T}()
@@ -2115,7 +2224,7 @@ function hvcat(rows::Tuple{Vararg{Int}}, xs::T...) where T<:Number
     k = 1
     @inbounds for i=1:nr
         if nc != rows[i]
-            throw(ArgumentError("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
+            throw(DimensionMismatch("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
         end
         for j=1:nc
             a[i,j] = xs[k]
@@ -2144,29 +2253,20 @@ end
 hvcat(rows::Tuple{Vararg{Int}}, xs::Number...) = typed_hvcat(promote_typeof(xs...), rows, xs...)
 hvcat(rows::Tuple{Vararg{Int}}, xs...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
 # the following method is needed to provide a more specific one compared to LinearAlgebra/uniformscaling.jl
-hvcat(rows::Tuple{Vararg{Int}}, xs::Union{AbstractVecOrMat,Number}...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::Union{AbstractArray,Number}...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
 
 function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, xs::Number...) where T
     nr = length(rows)
     nc = rows[1]
     for i = 2:nr
         if nc != rows[i]
-            throw(ArgumentError("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
+            throw(DimensionMismatch("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
         end
     end
     hvcat_fill!(Matrix{T}(undef, nr, nc), xs)
 end
 
-function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as...) where T
-    nbr = length(rows)  # number of block rows
-    rs = Vector{Any}(undef, nbr)
-    a = 1
-    for i = 1:nbr
-        rs[i] = typed_hcat(T, as[a:a-1+rows[i]]...)
-        a += rows[i]
-    end
-    T[rs...;]
-end
+typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as...) where T = typed_hvncat(T, rows_to_dimshape(rows), true, as...)
 
 ## N-dimensional concatenation ##
 
@@ -2287,14 +2387,14 @@ _typed_hvncat_0d_only_one() =
 
 function _typed_hvncat(::Type{T}, ::Val{N}) where {T, N}
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
-    return Array{T, N}(undef, ntuple(x -> 0, Val(N)))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
+    return Array{T, N}(undef, ntuple(Returns(0), Val(N)))
 end
 
 function _typed_hvncat(T::Type, ::Val{N}, xs::Number...) where N
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
-    A = cat_similar(xs[1], T, (ntuple(x -> 1, Val(N - 1))..., length(xs)))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
+    A = cat_similar(xs[1], T, (ntuple(Returns(1), Val(N - 1))..., length(xs)))
     hvncat_fill!(A, false, xs)
     return A
 end
@@ -2305,10 +2405,10 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N}
     length(as) > 0 ||
         throw(ArgumentError("must have at least one element"))
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     for a ∈ as
         ndims(a) <= N || all(x -> size(a, x) == 1, (N + 1):ndims(a)) ||
-            return _typed_hvncat(T, (ntuple(x -> 1, Val(N - 1))..., length(as), 1), false, as...)
+            return _typed_hvncat(T, (ntuple(Returns(1), Val(N - 1))..., length(as), 1), false, as...)
             # the extra 1 is to avoid an infinite cycle
     end
 
@@ -2319,11 +2419,11 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N}
         Ndim += cat_size(as[i], N)
         nd = max(nd, cat_ndims(as[i]))
         for d ∈ 1:N - 1
-            cat_size(as[1], d) == cat_size(as[i], d) || throw(ArgumentError("mismatched size along axis $d in element $i"))
+            cat_size(as[1], d) == cat_size(as[i], d) || throw(DimensionMismatch("mismatched size along axis $d in element $i"))
         end
     end
 
-    A = cat_similar(as[1], T, (ntuple(d -> size(as[1], d), N - 1)..., Ndim, ntuple(x -> 1, nd - N)...))
+    A = cat_similar(as[1], T, (ntuple(d -> size(as[1], d), N - 1)..., Ndim, ntuple(Returns(1), nd - N)...))
     k = 1
     for a ∈ as
         for i ∈ eachindex(a)
@@ -2338,7 +2438,7 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as...) where {T, N}
     length(as) > 0 ||
         throw(ArgumentError("must have at least one element"))
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     nd = N
     Ndim = 0
     for i ∈ eachindex(as)
@@ -2346,11 +2446,11 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as...) where {T, N}
         nd = max(nd, cat_ndims(as[i]))
         for d ∈ 1:N-1
             cat_size(as[i], d) == 1 ||
-                throw(ArgumentError("all dimensions of element $i other than $N must be of length 1"))
+                throw(DimensionMismatch("all dimensions of element $i other than $N must be of length 1"))
         end
     end
 
-    A = Array{T, nd}(undef, ntuple(x -> 1, Val(N - 1))..., Ndim, ntuple(x -> 1, nd - N)...)
+    A = Array{T, nd}(undef, ntuple(Returns(1), Val(N - 1))..., Ndim, ntuple(Returns(1), nd - N)...)
 
     k = 1
     for a ∈ as
@@ -2418,7 +2518,7 @@ function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple)
             dd = nrc * (d - 1)
             for i ∈ 1:nr
                 Ai = dd + i
-                for j ∈ 1:nc
+                for _ ∈ 1:nc
                     @inbounds A[Ai] = xs[k]
                     k += 1
                     Ai += nr
@@ -2435,7 +2535,7 @@ end
 function _typed_hvncat(T::Type, dims::NTuple{N, Int}, row_first::Bool, as...) where {N}
     # function barrier after calculating the max is necessary for high performance
     nd = max(maximum(cat_ndims(a) for a ∈ as), N)
-    return _typed_hvncat_dims(T, (dims..., ntuple(x -> 1, nd - N)...), row_first, as)
+    return _typed_hvncat_dims(T, (dims..., ntuple(Returns(1), nd - N)...), row_first, as)
 end
 
 function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as::Tuple) where {T, N}
@@ -2463,7 +2563,7 @@ function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as
                 for dd ∈ 1:N
                     dd == d && continue
                     if cat_size(as[startelementi], dd) != cat_size(as[i], dd)
-                        throw(ArgumentError("incompatible shape in element $i"))
+                        throw(DimensionMismatch("incompatible shape in element $i"))
                     end
                 end
             end
@@ -2471,16 +2571,23 @@ function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as
     end
 
     # discover number of rows or columns
+    # d1 dimension is increased by 1 to appropriately handle 0-length arrays
     for i ∈ 1:dims[d1]
         outdims[d1] += cat_size(as[i], d1)
     end
 
+    # adjustment to handle 0-length arrays
+    first_dim_zero = outdims[d1] == 0
+    if first_dim_zero
+        outdims[d1] = dims[d1]
+    end
+
     currentdims = zeros(Int, N)
     blockcount = 0
     elementcount = 0
     for i ∈ eachindex(as)
         elementcount += cat_length(as[i])
-        currentdims[d1] += cat_size(as[i], d1)
+        currentdims[d1] += first_dim_zero ? 1 : cat_size(as[i], d1)
         if currentdims[d1] == outdims[d1]
             currentdims[d1] = 0
             for d ∈ (d2, 3:N...)
@@ -2500,21 +2607,25 @@ function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as
                     elseif currentdims[d] < outdims[d] # dimension in progress
                         break
                     else # exceeded dimension
-                        throw(ArgumentError("argument $i has too many elements along axis $d"))
+                        throw(DimensionMismatch("argument $i has too many elements along axis $d"))
                     end
                 end
             end
         elseif currentdims[d1] > outdims[d1] # exceeded dimension
-            throw(ArgumentError("argument $i has too many elements along axis $d1"))
+            throw(DimensionMismatch("argument $i has too many elements along axis $d1"))
         end
     end
+    # restore 0-length adjustment
+    if first_dim_zero
+        outdims[d1] = 0
+    end
 
     outlen = prod(outdims)
     elementcount == outlen ||
-        throw(ArgumentError("mismatched number of elements; expected $(outlen), got $(elementcount)"))
+        throw(DimensionMismatch("mismatched number of elements; expected $(outlen), got $(elementcount)"))
 
     # copy into final array
-    A = cat_similar(as[1], T, outdims)
+    A = cat_similar(as[1], T, ntuple(i -> outdims[i], N))
     # @assert all(==(0), currentdims)
     outdims .= 0
     hvncat_fill!(A, currentdims, outdims, d1, d2, as)
@@ -2533,7 +2644,7 @@ end
 function _typed_hvncat(T::Type, shape::NTuple{N, Tuple}, row_first::Bool, as...) where {N}
     # function barrier after calculating the max is necessary for high performance
     nd = max(maximum(cat_ndims(a) for a ∈ as), N)
-    return _typed_hvncat_shape(T, (shape..., ntuple(x -> shape[end], nd - N)...), row_first, as)
+    return _typed_hvncat_shape(T, (shape..., ntuple(Returns(shape[end]), nd - N)...), row_first, as)
 end
 
 function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::Tuple) where {T, N}
@@ -2572,8 +2683,8 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
             if d == 1 || i == 1 || wasstartblock
                 currentdims[d] += dsize
             elseif dsize != cat_size(as[i - 1], ad)
-                throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
-                                    expected $(cat_size(as[i - 1], ad)), got $dsize"))
+                throw(DimensionMismatch("argument $i has a mismatched number of elements along axis $ad; \
+                                         expected $(cat_size(as[i - 1], ad)), got $dsize"))
             end
 
             wasstartblock = blockcounts[d] == 1 # remember for next dimension
@@ -2583,15 +2694,15 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
                 if outdims[d] == -1
                     outdims[d] = currentdims[d]
                 elseif outdims[d] != currentdims[d]
-                    throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
-                                        expected $(abs(outdims[d] - (currentdims[d] - dsize))), got $dsize"))
+                    throw(DimensionMismatch("argument $i has a mismatched number of elements along axis $ad; \
+                                             expected $(abs(outdims[d] - (currentdims[d] - dsize))), got $dsize"))
                 end
                 currentdims[d] = 0
                 blockcounts[d] = 0
                 shapepos[d] += 1
                 d > 1 && (blockcounts[d - 1] == 0 ||
-                    throw(ArgumentError("shape in level $d is inconsistent; level counts must nest \
-                                        evenly into each other")))
+                    throw(DimensionMismatch("shape in level $d is inconsistent; level counts must nest \
+                                             evenly into each other")))
             end
         end
     end
@@ -2608,7 +2719,7 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
     # @assert all(==(0), blockcounts)
 
     # copy into final array
-    A = cat_similar(as[1], T, outdims)
+    A = cat_similar(as[1], T, ntuple(i -> outdims[i], nd))
     hvncat_fill!(A, currentdims, blockcounts, d1, d2, as)
     return A
 end
@@ -2754,7 +2865,7 @@ julia> hvcat(5, M...) |> size  # hvcat puts matrices next to each other
 (14, 15)
 ```
 """
-stack(iter; dims=:) = _stack(dims, iter)
+stack(iter; dims::D=:) where {D} = _stack(dims, iter)
 
 """
     stack(f, args...; [dims])
@@ -2783,14 +2894,14 @@ julia> stack(eachrow([1 2 3; 4 5 6]), (10, 100); dims=1) do row, n
  4.0  5.0  6.0  400.0  500.0  600.0  0.04  0.05  0.06
 ```
 """
-stack(f, iter; dims=:) = _stack(dims, f(x) for x in iter)
-stack(f, xs, yzs...; dims=:) = _stack(dims, f(xy...) for xy in zip(xs, yzs...))
+stack(f, iter; dims::D=:) where {D} = _stack(dims, f(x) for x in iter)
+stack(f, xs, yzs...; dims::D=:) where {D} = _stack(dims, f(xy...) for xy in zip(xs, yzs...))
 
-_stack(dims::Union{Integer, Colon}, iter) = _stack(dims, IteratorSize(iter), iter)
+_stack(dims::D, iter) where {D<:Union{Integer, Colon}} = _stack(dims, IteratorSize(iter), iter)
 
-_stack(dims, ::IteratorSize, iter) = _stack(dims, collect(iter))
+_stack(dims::D, ::IteratorSize, iter) where {D} = _stack(dims, collect(iter))
 
-function _stack(dims, ::Union{HasShape, HasLength}, iter)
+function _stack(dims::D, ::Union{HasShape, HasLength}, iter) where {D}
     S = @default_eltype iter
     T = S != Union{} ? eltype(S) : Any  # Union{} occurs for e.g. stack(1,2), postpone the error
     if isconcretetype(T)
@@ -2828,8 +2939,6 @@ _iterator_axes(x, ::IteratorSize) = axes(x)
 # For some dims values, stack(A; dims) == stack(vec(A)), and the : path will be faster
 _typed_stack(dims::Integer, ::Type{T}, ::Type{S}, A) where {T,S} =
     _typed_stack(dims, T, S, IteratorSize(S), A)
-_typed_stack(dims::Integer, ::Type{T}, ::Type{S}, ::HasLength, A) where {T,S} =
-    _typed_stack(dims, T, S, HasShape{1}(), A)
 function _typed_stack(dims::Integer, ::Type{T}, ::Type{S}, ::HasShape{N}, A) where {T,S,N}
     if dims == N+1
         _typed_stack(:, T, S, A, (_vec_axis(A),))
@@ -2865,8 +2974,8 @@ _vec_axis(A, ax=_iterator_axes(A)) = length(ax) == 1 ? only(ax) : OneTo(prod(len
 end
 
 function _dim_stack!(::Val{dims}, B::AbstractArray, x1, xrest) where {dims}
-    before = ntuple(d -> Colon(), dims - 1)
-    after = ntuple(d -> Colon(), ndims(B) - dims)
+    before = ntuple(Returns(Colon()), dims - 1)
+    after = ntuple(Returns(Colon()), ndims(B) - dims)
 
     i = firstindex(B, dims)
     copyto!(view(B, before..., i, after...), x1)
@@ -2881,7 +2990,7 @@ end
 @inline function _stack_size_check(x, ax1::Tuple)
     if _iterator_axes(x) != ax1
         uax1 = map(UnitRange, ax1)
-        uaxN = map(UnitRange, axes(x))
+        uaxN = map(UnitRange, _iterator_axes(x))
         throw(DimensionMismatch(
             LazyString("stack expects uniform slices, got axes(x) == ", uaxN, " while first had ", uax1)))
     end
@@ -2909,6 +3018,8 @@ function isequal(A::AbstractArray, B::AbstractArray)
 end
 
 function cmp(A::AbstractVector, B::AbstractVector)
+    ai1, bi1 = firstindex(A), firstindex(B)
+    isequal(ai1, bi1) || return cmp(ai1, bi1)
     for (a, b) in zip(A, B)
         if !isequal(a, b)
             return isless(a, b) ? -1 : 1
@@ -2917,10 +3028,20 @@ function cmp(A::AbstractVector, B::AbstractVector)
     return cmp(length(A), length(B))
 end
 
+"""
+    isless(A::AbstractArray{<:Any,0}, B::AbstractArray{<:Any,0})
+
+Return `true` when the only element of `A` is less than the only element of `B`.
+"""
+function isless(A::AbstractArray{<:Any,0}, B::AbstractArray{<:Any,0})
+    isless(only(A), only(B))
+end
+
 """
     isless(A::AbstractVector, B::AbstractVector)
 
-Return `true` when `A` is less than `B` in lexicographic order.
+Return `true` when `A` is less than `B`. Vectors are first compared by
+their starting indices, and then lexicographically by their elements.
 """
 isless(A::AbstractVector, B::AbstractVector) = cmp(A, B) < 0
 
@@ -3000,14 +3121,14 @@ end
 function _ind2sub_recurse(inds, ind)
     @inline
     r1 = inds[1]
-    indnext, f, l = _div(ind, r1)
-    (ind-l*indnext+f, _ind2sub_recurse(tail(inds), indnext)...)
+    indnext, indsub = divrem(ind, _indexlength(r1))
+    (_lookup(indsub, r1), _ind2sub_recurse(tail(inds), indnext)...)
 end
 
+_indexlength(d::Integer) = d
+_indexlength(r::AbstractUnitRange) = length(r)
 _lookup(ind, d::Integer) = ind+1
 _lookup(ind, r::AbstractUnitRange) = ind+first(r)
-_div(ind, d::Integer) = div(ind, d), 1, d
-_div(ind, r::AbstractUnitRange) = (d = length(r); (div(ind, d), first(r), d))
 
 # Vectorized forms
 function _sub2ind(inds::Indices{1}, I1::AbstractVector{T}, I::AbstractVector{T}...) where T<:Integer
@@ -3042,8 +3163,7 @@ _sub2ind_vec(i, I1, I...) = (@inline; (I1[i], _sub2ind_vec(i, I...)...))
 _sub2ind_vec(i) = ()
 
 function _ind2sub(inds::Union{DimsInteger{N},Indices{N}}, ind::AbstractVector{<:Integer}) where N
-    M = length(ind)
-    t = ntuple(n->similar(ind),Val(N))
+    t = ntuple(_->similar(ind),Val(N))
     for (i,idx) in pairs(IndexLinear(), ind)
         sub = _ind2sub(inds, idx)
         for j = 1:N
@@ -3056,7 +3176,7 @@ end
 ## iteration utilities ##
 
 """
-    foreach(f, c...) -> Nothing
+    foreach(f, c...) -> nothing
 
 Call function `f` on each element of iterable `c`.
 For multiple iterable arguments, `f` is called elementwise, and iteration stops when
@@ -3083,9 +3203,8 @@ julia> foreach((x, y) -> println(x, " with ", y), tri, 'a':'z')
 7 with c
 ```
 """
-foreach(f) = (f(); nothing)
 foreach(f, itr) = (for x in itr; f(x); end; nothing)
-foreach(f, itrs...) = (for z in zip(itrs...); f(z...); end; nothing)
+foreach(f, itr, itrs...) = (for z in zip(itr, itrs...); f(z...); end; nothing)
 
 ## map over arrays ##
 
@@ -3162,7 +3281,7 @@ one *without* a colon in the slice. This is `view(A,:,i,:)`, whereas
 `mapslices(f, A; dims=(1,3))` uses `A[:,i,:]`. The function `f` may mutate
 values in the slice without affecting `A`.
 """
-function mapslices(f, A::AbstractArray; dims)
+@constprop :aggressive function mapslices(f, A::AbstractArray; dims)
     isempty(dims) && return map(f, A)
 
     for d in dims
@@ -3257,10 +3376,6 @@ end
 concatenate_setindex!(R, v, I...) = (R[I...] .= (v,); R)
 concatenate_setindex!(R, X::AbstractArray, I...) = (R[I...] = X)
 
-## 0 arguments
-
-map(f) = f()
-
 ## 1 argument
 
 function map!(f::F, dest::AbstractArray, A::AbstractArray) where F
@@ -3283,6 +3398,8 @@ mapany(f, itr) = Any[f(x) for x in itr]
 Transform collection `c` by applying `f` to each element. For multiple collection arguments,
 apply `f` elementwise, and stop when any of them is exhausted.
 
+The element type of the result is determined in the same manner as in [`collect`](@ref).
+
 See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref).
 
 # Examples
@@ -3324,12 +3441,19 @@ function ith_all(i, as)
 end
 
 function map_n!(f::F, dest::AbstractArray, As) where F
-    idxs1 = LinearIndices(As[1])
-    @boundscheck LinearIndices(dest) == idxs1 && all(x -> LinearIndices(x) == idxs1, As)
-    for i = idxs1
-        @inbounds I = ith_all(i, As)
-        val = f(I...)
-        @inbounds dest[i] = val
+    idxs = LinearIndices(dest)
+    if all(x -> LinearIndices(x) == idxs, As)
+        for i in idxs
+            @inbounds as = ith_all(i, As)
+            val = f(as...)
+            @inbounds dest[i] = val
+        end
+    else
+        for (i, Is...) in zip(eachindex(dest), map(eachindex, As)...)
+            as = ntuple(j->getindex(As[j], Is[j]), length(As))
+            val = f(as...)
+            dest[i] = val
+        end
     end
     return dest
 end
@@ -3340,6 +3464,8 @@ end
 Like [`map`](@ref), but stores the result in `destination` rather than a new
 collection. `destination` must be at least as large as the smallest collection.
 
+$(_DOCS_ALIASING_WARNING)
+
 See also: [`map`](@ref), [`foreach`](@ref), [`zip`](@ref), [`copyto!`](@ref).
 
 # Examples
@@ -3364,11 +3490,32 @@ julia> map!(+, zeros(Int, 5), 100:999, 1:3)
 ```
 """
 function map!(f::F, dest::AbstractArray, As::AbstractArray...) where {F}
-    isempty(As) && throw(ArgumentError(
-        """map! requires at least one "source" argument"""))
+    @assert !isempty(As) # should dispatch to map!(f, A)
     map_n!(f, dest, As)
 end
 
+"""
+    map!(function, array)
+
+Like [`map`](@ref), but stores the result in the same array.
+!!! compat "Julia 1.12"
+    This method requires Julia 1.12 or later. To support previous versions too,
+    use the equivalent `map!(function, array, array)`.
+
+# Examples
+```jldoctest
+julia> a = [1 2 3; 4 5 6];
+
+julia> map!(x -> x^3, a);
+
+julia> a
+2×3 Matrix{$Int}:
+  1    8   27
+ 64  125  216
+```
+"""
+map!(f::F, inout::AbstractArray) where F = map!(f, inout, inout)
+
 """
     map(f, A::AbstractArray...) -> N-array
 
@@ -3394,7 +3541,37 @@ julia> map(+, [1 2; 3 4], [1,10,100,1000], zeros(3,1))  # iterates until 3rd is
  102.0
 ```
 """
-map(f, iters...) = collect(Generator(f, iters...))
+map(f, it, iters...) = collect(Generator(f, it, iters...))
+
+# Generic versions of push! for AbstractVector
+# These are specialized further for Vector for faster resizing and setindexing
+function push!(a::AbstractVector{T}, item) where T
+    # convert first so we don't grow the array if the assignment won't work
+    itemT = item isa T ? item : convert(T, item)::T
+    new_length = length(a) + 1
+    resize!(a, new_length)
+    a[end] = itemT
+    return a
+end
+
+# specialize and optimize the single argument case
+function push!(a::AbstractVector{Any}, @nospecialize x)
+    new_length = length(a) + 1
+    resize!(a, new_length)
+    a[end] = x
+    return a
+end
+function push!(a::AbstractVector{Any}, @nospecialize x...)
+    @_terminates_locally_meta
+    na = length(a)
+    nx = length(x)
+    resize!(a, na + nx)
+    e = lastindex(a) - nx
+    for i = 1:nx
+        a[e+i] = x[i]
+    end
+    return a
+end
 
 # multi-item push!, pushfirst! (built on top of type-specific 1-item version)
 # (note: must not cause a dispatch loop when 1-item case is not defined)
@@ -3403,80 +3580,8 @@ push!(A, a, b, c...) = push!(push!(A, a, b), c...)
 pushfirst!(A, a, b) = pushfirst!(pushfirst!(A, b), a)
 pushfirst!(A, a, b, c...) = pushfirst!(pushfirst!(A, c...), a, b)
 
-## hashing AbstractArray ##
-
-const hash_abstractarray_seed = UInt === UInt64 ? 0x7e2d6fb6448beb77 : 0xd4514ce5
-function hash(A::AbstractArray, h::UInt)
-    h += hash_abstractarray_seed
-    # Axes are themselves AbstractArrays, so hashing them directly would stack overflow
-    # Instead hash the tuple of firsts and lasts along each dimension
-    h = hash(map(first, axes(A)), h)
-    h = hash(map(last, axes(A)), h)
-
-    # For short arrays, it's not worth doing anything complicated
-    if length(A) < 8192
-        for x in A
-            h = hash(x, h)
-        end
-        return h
-    end
-
-    # Goal: Hash approximately log(N) entries with a higher density of hashed elements
-    # weighted towards the end and special consideration for repeated values. Colliding
-    # hashes will often subsequently be compared by equality -- and equality between arrays
-    # works elementwise forwards and is short-circuiting. This means that a collision
-    # between arrays that differ by elements at the beginning is cheaper than one where the
-    # difference is towards the end. Furthermore, choosing `log(N)` arbitrary entries from a
-    # sparse array will likely only choose the same element repeatedly (zero in this case).
-
-    # To achieve this, we work backwards, starting by hashing the last element of the
-    # array. After hashing each element, we skip `fibskip` elements, where `fibskip`
-    # is pulled from the Fibonacci sequence -- Fibonacci was chosen as a simple
-    # ~O(log(N)) algorithm that ensures we don't hit a common divisor of a dimension
-    # and only end up hashing one slice of the array (as might happen with powers of
-    # two). Finally, we find the next distinct value from the one we just hashed.
-
-    # This is a little tricky since skipping an integer number of values inherently works
-    # with linear indices, but `findprev` uses `keys`. Hoist out the conversion "maps":
-    ks = keys(A)
-    key_to_linear = LinearIndices(ks) # Index into this map to compute the linear index
-    linear_to_key = vec(ks)           # And vice-versa
-
-    # Start at the last index
-    keyidx = last(ks)
-    linidx = key_to_linear[keyidx]
-    fibskip = prevfibskip = oneunit(linidx)
-    first_linear = first(LinearIndices(linear_to_key))
-    n = 0
-    while true
-        n += 1
-        # Hash the element
-        elt = A[keyidx]
-        h = hash(keyidx=>elt, h)
-
-        # Skip backwards a Fibonacci number of indices -- this is a linear index operation
-        linidx = key_to_linear[keyidx]
-        linidx < fibskip + first_linear && break
-        linidx -= fibskip
-        keyidx = linear_to_key[linidx]
-
-        # Only increase the Fibonacci skip once every N iterations. This was chosen
-        # to be big enough that all elements of small arrays get hashed while
-        # obscenely large arrays are still tractable. With a choice of N=4096, an
-        # entirely-distinct 8000-element array will have ~75% of its elements hashed,
-        # with every other element hashed in the first half of the array. At the same
-        # time, hashing a `typemax(Int64)`-length Float64 range takes about a second.
-        if rem(n, 4096) == 0
-            fibskip, prevfibskip = fibskip + prevfibskip, fibskip
-        end
-
-        # Find a key index with a value distinct from `elt` -- might be `keyidx` itself
-        keyidx = findprev(!isequal(elt), A, keyidx)
-        keyidx === nothing && break
-    end
-
-    return h
-end
+# sizehint! does not nothing by default
+sizehint!(a::AbstractVector, _) = a
 
 # The semantics of `collect` are weird. Better to write our own
 function rest(a::AbstractArray{T}, state...) where {T}
@@ -3486,7 +3591,6 @@ function rest(a::AbstractArray{T}, state...) where {T}
     return foldl(push!, Iterators.rest(a, state...), init=v)
 end
 
-
 ## keepat! ##
 
 # NOTE: since these use `@inbounds`, they are actually only intended for Vector and BitVector
@@ -3523,12 +3627,36 @@ function _keepat!(a::AbstractVector, m::AbstractVector{Bool})
     deleteat!(a, j:lastindex(a))
 end
 
-## 1-d circshift ##
+"""
+    circshift!(a::AbstractVector, shift::Integer)
+
+Circularly shift, or rotate, the data in vector `a` by `shift` positions.
+
+# Examples
+
+```jldoctest
+julia> circshift!([1, 2, 3, 4, 5], 2)
+5-element Vector{Int64}:
+ 4
+ 5
+ 1
+ 2
+ 3
+
+julia> circshift!([1, 2, 3, 4, 5], -2)
+5-element Vector{Int64}:
+ 3
+ 4
+ 5
+ 1
+ 2
+```
+"""
 function circshift!(a::AbstractVector, shift::Integer)
     n = length(a)
-    n == 0 && return
+    n == 0 && return a
     shift = mod(shift, n)
-    shift == 0 && return
+    shift == 0 && return a
     l = lastindex(a)
     reverse!(a, firstindex(a), l-shift)
     reverse!(a, l-shift+1, lastindex(a))
diff --git a/base/abstractarraymath.jl b/base/abstractarraymath.jl
index 70c304d9060c1..7992daee621b9 100644
--- a/base/abstractarraymath.jl
+++ b/base/abstractarraymath.jl
@@ -9,7 +9,7 @@ isreal(x::AbstractArray{<:Real}) = true
 ## Constructors ##
 
 """
-    vec(a::AbstractArray) -> AbstractVector
+    vec(a::AbstractArray)::AbstractVector
 
 Reshape the array `a` as a one-dimensional column vector. Return `a` if it is
 already an `AbstractVector`. The resulting array
@@ -88,11 +88,81 @@ function _dropdims(A::AbstractArray, dims::Dims)
             dims[j] == dims[i] && throw(ArgumentError("dropped dims must be unique"))
         end
     end
+    ox = axes(A)
     ax = _foldoneto((ds, d) -> d in dims ? ds : (ds..., axes(A,d)), (), Val(ndims(A)))
-    reshape(A, ax::typeof(_sub(axes(A), dims)))
+    if isconcretetype(eltype(ox))
+        # if all the axes are the same type, we can use the tail as the
+        # axes of the result rather than extracting one at each index
+        return reshape(A, ax::typeof(_sub(ox, dims)))
+    else
+        return reshape(A, ax)
+    end
 end
 _dropdims(A::AbstractArray, dim::Integer) = _dropdims(A, (Int(dim),))
 
+"""
+    insertdims(A; dims)
+
+Inverse of [`dropdims`](@ref); return an array with new singleton dimensions
+at every dimension in `dims`.
+
+Repeated dimensions are forbidden and the largest entry in `dims` must be
+less than or equal than `ndims(A) + length(dims)`.
+
+The result shares the same underlying data as `A`, such that the
+result is mutable if and only if `A` is mutable, and setting elements of one
+alters the values of the other.
+
+See also: [`dropdims`](@ref), [`reshape`](@ref), [`vec`](@ref).
+# Examples
+```jldoctest
+julia> x = [1 2 3; 4 5 6]
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+
+julia> insertdims(x, dims=3)
+2×3×1 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2  3
+ 4  5  6
+
+julia> insertdims(x, dims=(1,2,5)) == reshape(x, 1, 1, 2, 3, 1)
+true
+
+julia> dropdims(insertdims(x, dims=(1,2,5)), dims=(1,2,5))
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+```
+
+!!! compat "Julia 1.12"
+    Requires Julia 1.12 or later.
+"""
+insertdims(A; dims) = _insertdims(A, dims)
+function _insertdims(A::AbstractArray{T, N}, dims::NTuple{M, Int}) where {T, N, M}
+    for i in eachindex(dims)
+        1 ≤ dims[i] || throw(ArgumentError("the smallest entry in dims must be ≥ 1."))
+        dims[i] ≤ N+M || throw(ArgumentError("the largest entry in dims must be not larger than the dimension of the array and the length of dims added"))
+        for j = 1:i-1
+            dims[j] == dims[i] && throw(ArgumentError("inserted dims must be unique"))
+        end
+    end
+
+    # acc is a tuple, where the first entry is the final shape
+    # the second entry off acc is a counter for the axes of A
+    inds= Base._foldoneto((acc, i) ->
+                            i ∈ dims
+                                ? ((acc[1]..., Base.OneTo(1)), acc[2])
+                                : ((acc[1]..., axes(A, acc[2])), acc[2] + 1),
+                            ((), 1), Val(N+M))
+    new_shape = inds[1]
+    return reshape(A, new_shape)
+end
+_insertdims(A::AbstractArray, dim::Integer) = _insertdims(A, (Int(dim),))
+
+
+
 ## Unary operators ##
 
 """
@@ -119,6 +189,7 @@ julia> A
 """
 conj!(A::AbstractArray{<:Number}) = (@inbounds broadcast!(conj, A, A); A)
 conj!(x::AbstractArray{<:Real}) = x
+conj!(A::AbstractArray) = (foreach(conj!, A); A)
 
 """
     conj(A::AbstractArray)
@@ -264,10 +335,13 @@ circshift(a::AbstractArray, shiftamt::DimsInteger) = circshift!(similar(a), a, s
 """
     circshift(A, shifts)
 
-Circularly shift, i.e. rotate, the data in an array. The second argument is a tuple or
+Circularly shift, i.e. rotate, the data in `A`. The second argument is a tuple or
 vector giving the amount to shift in each dimension, or an integer to shift only in the
 first dimension.
 
+The generated code is most efficient when the shift amounts are known at compile-time, i.e.,
+compile-time constants.
+
 See also: [`circshift!`](@ref), [`circcopy!`](@ref), [`bitrotate`](@ref), [`<<`](@ref).
 
 # Examples
@@ -316,6 +390,18 @@ julia> circshift(a, -1)
  0
  1
  1
+
+julia> x = (1, 2, 3, 4, 5)
+(1, 2, 3, 4, 5)
+
+julia> circshift(x, 4)
+(2, 3, 4, 5, 1)
+
+julia> z = (1, 'a', -7.0, 3)
+(1, 'a', -7.0, 3)
+
+julia> circshift(z, -1)
+('a', -7.0, 3, 1)
 ```
 """
 function circshift(a::AbstractArray, shiftamt)
@@ -353,7 +439,7 @@ julia> repeat([1, 2, 3], 2, 3)
 ```
 """
 function repeat(A::AbstractArray, counts...)
-    return _RepeatInnerOuter.repeat(A, outer=counts)
+    return repeat(A, outer=counts)
 end
 
 """
@@ -438,6 +524,9 @@ function check(arr, inner, outer)
         # TODO: Currently one based indexing is demanded for inner !== nothing,
         # but not for outer !== nothing. Decide for something consistent.
         Base.require_one_based_indexing(arr)
+        if !all(n -> n isa Integer, inner)
+            throw(ArgumentError("repeat requires integer counts, got inner = $inner"))
+        end
         if any(<(0), inner)
             throw(ArgumentError("no inner repetition count may be negative; got $inner"))
         end
@@ -446,6 +535,9 @@ function check(arr, inner, outer)
         end
     end
     if outer !== nothing
+        if !all(n -> n isa Integer, outer)
+            throw(ArgumentError("repeat requires integer counts, got outer = $outer"))
+        end
         if any(<(0), outer)
             throw(ArgumentError("no outer repetition count may be negative; got $outer"))
         end
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index 9dba5369a2a66..cdea331d49c51 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -12,6 +12,8 @@ struct KeyError <: Exception
     key
 end
 
+KeyTypeError(K, key) = TypeError(:var"dict key", K, key)
+
 const secret_table_token = :__c782dbf1cf4d6a2e5e3865d7e95634f2e09b5902__
 
 haskey(d::AbstractDict, k) = in(k, keys(d))
@@ -86,11 +88,11 @@ Return an iterator over all keys in a dictionary.
 When the keys are stored internally in a hash table,
 as is the case for `Dict`,
 the order in which they are returned may vary.
-But `keys(a)` and `values(a)` both iterate `a` and
-return the elements in the same order.
+But `keys(a)`, `values(a)` and `pairs(a)` all iterate `a`
+and return the elements in the same order.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+'\\S'.*\$"m
 julia> D = Dict('a'=>2, 'b'=>3)
 Dict{Char, Int64} with 2 entries:
   'a' => 2
@@ -112,11 +114,11 @@ Return an iterator over all values in a collection.
 When the values are stored internally in a hash table,
 as is the case for `Dict`,
 the order in which they are returned may vary.
-But `keys(a)` and `values(a)` both iterate `a` and
-return the elements in the same order.
+But `keys(a)`, `values(a)` and `pairs(a)` all iterate `a`
+and return the elements in the same order.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+(\\s+=>\\s+\\d)?\$"m
 julia> D = Dict('a'=>2, 'b'=>3)
 Dict{Char, Int64} with 2 entries:
   'a' => 2
@@ -136,9 +138,13 @@ values(a::AbstractDict) = ValueIterator(a)
 Return an iterator over `key => value` pairs for any
 collection that maps a set of keys to a set of values.
 This includes arrays, where the keys are the array indices.
+When the entries are stored internally in a hash table,
+as is the case for `Dict`, the order in which they are returned may vary.
+But `keys(a)`, `values(a)` and `pairs(a)` all iterate `a`
+and return the elements in the same order.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> a = Dict(zip(["a", "b", "c"], [1, 2, 3]))
 Dict{String, Int64} with 3 entries:
   "c" => 3
@@ -201,7 +207,7 @@ Update collection with pairs from the other collections.
 See also [`merge`](@ref).
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> d1 = Dict(1 => 2, 3 => 4);
 
 julia> d2 = Dict(1 => 4, 4 => 5);
@@ -218,7 +224,7 @@ Dict{Int64, Int64} with 3 entries:
 function merge!(d::AbstractDict, others::AbstractDict...)
     for other in others
         if haslength(d) && haslength(other)
-            sizehint!(d, length(d) + length(other))
+            sizehint!(d, length(d) + length(other); shrink = false)
         end
         for (k,v) in other
             d[k] = v
@@ -245,7 +251,7 @@ compatibility.
     `mergewith!` requires Julia 1.5 or later.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> d1 = Dict(1 => 2, 3 => 4);
 
 julia> d2 = Dict(1 => 4, 4 => 5);
@@ -299,7 +305,7 @@ julia> keytype(Dict(Int32(1) => "foo"))
 Int32
 ```
 """
-keytype(::Type{<:AbstractDict{K,V}}) where {K,V} = K
+keytype(::Type{<:AbstractDict{K}}) where {K} = K
 keytype(a::AbstractDict) = keytype(typeof(a))
 
 """
@@ -313,7 +319,7 @@ julia> valtype(Dict(Int32(1) => "foo"))
 String
 ```
 """
-valtype(::Type{<:AbstractDict{K,V}}) where {K,V} = V
+valtype(::Type{<:AbstractDict{<:Any,V}}) where {V} = V
 valtype(a::AbstractDict) = valtype(typeof(a))
 
 """
@@ -326,7 +332,7 @@ value for that key will be the value it has in the last collection listed.
 See also [`mergewith`](@ref) for custom handling of values with the same key.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\S+\$"m
 julia> a = Dict("foo" => 0.0, "bar" => 42.0)
 Dict{String, Float64} with 2 entries:
   "bar" => 42.0
@@ -356,7 +362,6 @@ merge(d::AbstractDict, others::AbstractDict...) =
 """
     mergewith(combine, d::AbstractDict, others::AbstractDict...)
     mergewith(combine)
-    merge(combine, d::AbstractDict, others::AbstractDict...)
 
 Construct a merged collection from the given collections. If necessary, the
 types of the resulting collection will be promoted to accommodate the types of
@@ -364,14 +369,11 @@ the merged collections. Values with the same key will be combined using the
 combiner function.  The curried form `mergewith(combine)` returns the function
 `(args...) -> mergewith(combine, args...)`.
 
-Method `merge(combine::Union{Function,Type}, args...)` as an alias of
-`mergewith(combine, args...)` is still available for backward compatibility.
-
 !!! compat "Julia 1.5"
     `mergewith` requires Julia 1.5 or later.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\S+\$"m
 julia> a = Dict("foo" => 0.0, "bar" => 42.0)
 Dict{String, Float64} with 2 entries:
   "bar" => 42.0
@@ -390,13 +392,15 @@ Dict{String, Float64} with 3 entries:
 
 julia> ans == mergewith(+)(a, b)
 true
+
+julia> mergewith(-, Dict(), Dict(:a=>1))  # Combining function only used if key is present in both
+Dict{Any, Any} with 1 entry:
+  :a => 1
 ```
 """
 mergewith(combine, d::AbstractDict, others::AbstractDict...) =
     mergewith!(combine, _typeddict(d, others...), others...)
 mergewith(combine) = (args...) -> mergewith(combine, args...)
-merge(combine::Callable, d::AbstractDict, others::AbstractDict...) =
-    merge!(combine, _typeddict(d, others...), others...)
 
 promoteK(K) = K
 promoteV(V) = V
@@ -414,8 +418,8 @@ end
 Update `d`, removing elements for which `f` is `false`.
 The function `f` is passed `key=>value` pairs.
 
-# Example
-```jldoctest
+# Examples
+```jldoctest; filter = r"^\\s+\\d\\s+=>\\s+\\S+\$"m
 julia> d = Dict(1=>"a", 2=>"b", 3=>"c")
 Dict{Int64, String} with 3 entries:
   2 => "b"
@@ -457,7 +461,7 @@ Return a copy of `d`, removing elements for which `f` is `false`.
 The function `f` is passed `key=>value` pairs.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\d\\s+=>\\s+\\S+\$"m
 julia> d = Dict(1=>"a", 2=>"b")
 Dict{Int64, String} with 2 entries:
   2 => "b"
@@ -577,6 +581,55 @@ _tablesz(x::T) where T <: Integer = x < 16 ? T(16) : one(T)<<(top_set_bit(x-one(
 
 TP{K,V} = Union{Type{Tuple{K,V}},Type{Pair{K,V}}}
 
+# This error is thrown if `grow_to!` cannot validate the contents of the iterator argument to it, which it does by testing the iteration protocol (isiterable) on it each time it is about to start iteration on it
+_throw_dict_kv_error() = throw(ArgumentError("AbstractDict(kv): kv needs to be an iterator of 2-tuples or pairs"))
+
+function grow_to!(dest::AbstractDict, itr)
+    applicable(iterate, itr) || _throw_dict_kv_error()
+    y = iterate(itr)
+    y === nothing && return dest
+    kv, st = y
+    applicable(iterate, kv) || _throw_dict_kv_error()
+    k = iterate(kv)
+    k === nothing && _throw_dict_kv_error()
+    k, kvst = k
+    v = iterate(kv, kvst)
+    v === nothing && _throw_dict_kv_error()
+    v, kvst = v
+    iterate(kv, kvst) === nothing || _throw_dict_kv_error()
+    if !(dest isa AbstractDict{typeof(k), typeof(v)})
+        dest = empty(dest, typeof(k), typeof(v))
+    end
+    dest[k] = v
+    return grow_to!(dest, itr, st)
+end
+
+function grow_to!(dest::AbstractDict{K,V}, itr, st) where {K, V}
+    y = iterate(itr, st)
+    while y !== nothing
+        kv, st = y
+        applicable(iterate, kv) || _throw_dict_kv_error()
+        kst = iterate(kv)
+        kst === nothing && _throw_dict_kv_error()
+        k, kvst = kst
+        vst = iterate(kv, kvst)
+        vst === nothing && _throw_dict_kv_error()
+        v, kvst = vst
+        iterate(kv, kvst) === nothing || _throw_dict_kv_error()
+        if isa(k, K) && isa(v, V)
+            dest[k] = v
+        else
+            new = empty(dest, promote_typejoin(K, typeof(k)), promote_typejoin(V, typeof(v)))
+            merge!(new, dest)
+            new[k] = v
+            return grow_to!(new, itr, st)
+        end
+        y = iterate(itr, st)
+    end
+    return dest
+end
+
+
 dict_with_eltype(DT_apply, kv, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
 dict_with_eltype(DT_apply, kv::Generator, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
 dict_with_eltype(DT_apply, ::Type{Pair{K,V}}) where {K,V} = DT_apply(K, V)()
@@ -601,7 +654,7 @@ of `dict` then it will be converted to the value type if possible and otherwise
     `map!(f, values(dict::AbstractDict))` requires Julia 1.2 or later.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+(\\s+=>\\s+\\d)?\$"m
 julia> d = Dict(:a => 1, :b => 2)
 Dict{Symbol, Int64} with 2 entries:
   :a => 1
diff --git a/base/abstractset.jl b/base/abstractset.jl
index 5d0d65dad2de6..f9dc19811d495 100644
--- a/base/abstractset.jl
+++ b/base/abstractset.jl
@@ -25,7 +25,7 @@ This is an infix operator, allowing `s ∪ itr`.
 See also [`unique`](@ref), [`intersect`](@ref), [`isdisjoint`](@ref), [`vcat`](@ref), [`Iterators.flatten`](@ref).
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\d\$"m
 julia> union([1, 2], [3])
 3-element Vector{Int64}:
  1
@@ -65,8 +65,10 @@ const ∪ = union
 Construct the [`union`](@ref) of passed in sets and overwrite `s` with the result.
 Maintain order with arrays.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\d\$"m
 julia> a = Set([3, 4, 5]);
 
 julia> union!(a, 1:2:7);
@@ -99,7 +101,7 @@ max_values(::Type{Bool}) = 2
 max_values(::Type{Nothing}) = 1
 
 function union!(s::AbstractSet{T}, itr) where T
-    haslength(itr) && sizehint!(s, length(s) + Int(length(itr))::Int)
+    haslength(itr) && sizehint!(s, length(s) + Int(length(itr))::Int; shrink = false)
     for x in itr
         push!(s, x)
         length(s) == max_values(T) && break
@@ -182,6 +184,8 @@ const ∩ = intersect
 
 Intersect all passed in sets and overwrite `s` with the result.
 Maintain order with arrays.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function intersect!(s::AbstractSet, itrs...)
     for x in itrs
@@ -197,7 +201,7 @@ intersect!(s::AbstractSet, itr) =
     setdiff(s, itrs...)
 
 Construct the set of elements in `s` but not in any of the iterables in `itrs`.
-Maintain order with arrays.
+Maintain order with arrays. The result will have the same element type as `s`.
 
 See also [`setdiff!`](@ref), [`union`](@ref) and [`intersect`](@ref).
 
@@ -207,6 +211,10 @@ julia> setdiff([1,2,3], [3,4,5])
 2-element Vector{Int64}:
  1
  2
+
+julia> setdiff([1,2,3], [1.0, 2.0])
+1-element Vector{Int64}:
+ 3
 ```
 """
 setdiff(s::AbstractSet, itrs...) = setdiff!(copymutable(s), itrs...)
@@ -218,6 +226,8 @@ setdiff(s) = union(s)
 Remove from set `s` (in-place) each element of each iterable from `itrs`.
 Maintain order with arrays.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> a = Set([1, 3, 4, 5]);
@@ -272,6 +282,8 @@ symdiff(s) = symdiff!(copy(s))
 Construct the symmetric difference of the passed in sets, and overwrite `s` with the result.
 When `s` is an array, the order is maintained.
 Note that in this case the multiplicity of elements matters.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function symdiff!(s::AbstractSet, itrs...)
     for x in itrs
@@ -294,9 +306,9 @@ end
 const ⊆ = issubset
 function ⊇ end
 """
-    issubset(a, b) -> Bool
-    ⊆(a, b) -> Bool
-    ⊇(b, a) -> Bool
+    issubset(a, b)::Bool
+    ⊆(a, b)::Bool
+    ⊇(b, a)::Bool
 
 Determine whether every element of `a` is also in `b`, using [`in`](@ref).
 
@@ -338,13 +350,17 @@ function issubset(a, b)
 end
 
 """
-    hasfastin(T)
+    Base.hasfastin(T)
 
 Determine whether the computation `x ∈ collection` where `collection::T` can be considered
 as a "fast" operation (typically constant or logarithmic complexity).
 The definition `hasfastin(x) = hasfastin(typeof(x))` is provided for convenience so that instances
 can be passed instead of types.
 However the form that accepts a type argument should be defined for new types.
+
+The default for `hasfastin(T)` is `true` for subtypes of
+[`AbstractSet`](@ref), [`AbstractDict`](@ref) and [`AbstractRange`](@ref)
+and `false` otherwise.
 """
 hasfastin(::Type) = false
 hasfastin(::Union{Type{<:AbstractSet},Type{<:AbstractDict},Type{<:AbstractRange}}) = true
@@ -352,15 +368,40 @@ hasfastin(x) = hasfastin(typeof(x))
 
 ⊇(a, b) = b ⊆ a
 
+"""
+    issubset(x)
+
+Create a function that compares its argument to `x` using [`issubset`](@ref), i.e.
+a function equivalent to `y -> issubset(y, x)`.
+The returned function is of type `Base.Fix2{typeof(issubset)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+issubset(a) = Fix2(issubset, a)
+
+"""
+    ⊇(x)
+
+Create a function that compares its argument to `x` using [`⊇`](@ref), i.e.
+a function equivalent to `y -> y ⊇ x`.
+The returned function is of type `Base.Fix2{typeof(⊇)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊇(a) = Fix2(⊇, a)
 ## strict subset comparison
 
 function ⊊ end
 function ⊋ end
 """
-    ⊊(a, b) -> Bool
-    ⊋(b, a) -> Bool
+    ⊊(a, b)::Bool
+    ⊋(b, a)::Bool
 
-Determines if `a` is a subset of, but not equal to, `b`.
+Determine if `a` is a subset of, but not equal to, `b`.
 
 See also [`issubset`](@ref) (`⊆`), [`⊈`](@ref).
 
@@ -381,11 +422,36 @@ false
 ⊊(a, b) = Set(a) ⊊ Set(b)
 ⊋(a, b) = b ⊊ a
 
+"""
+    ⊋(x)
+
+Create a function that compares its argument to `x` using [`⊋`](@ref), i.e.
+a function equivalent to `y -> y ⊋ x`.
+The returned function is of type `Base.Fix2{typeof(⊋)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊋(a) = Fix2(⊋, a)
+"""
+    ⊊(x)
+
+Create a function that compares its argument to `x` using [`⊊`](@ref), i.e.
+a function equivalent to `y -> y ⊊ x`.
+The returned function is of type `Base.Fix2{typeof(⊊)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊊(a) = Fix2(⊊, a)
+
 function ⊈ end
 function ⊉ end
 """
-    ⊈(a, b) -> Bool
-    ⊉(b, a) -> Bool
+    ⊈(a, b)::Bool
+    ⊉(b, a)::Bool
 
 Negation of `⊆` and `⊇`, i.e. checks that `a` is not a subset of `b`.
 
@@ -405,10 +471,36 @@ false
 ⊈(a, b) = !⊆(a, b)
 ⊉(a, b) = b ⊈ a
 
+"""
+    ⊉(x)
+
+Create a function that compares its argument to `x` using [`⊉`](@ref), i.e.
+a function equivalent to `y -> y ⊉ x`.
+The returned function is of type `Base.Fix2{typeof(⊉)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊉(a) = Fix2(⊉, a)
+
+"""
+    ⊈(x)
+
+Create a function that compares its argument to `x` using [`⊈`](@ref), i.e.
+a function equivalent to `y -> y ⊈ x`.
+The returned function is of type `Base.Fix2{typeof(⊈)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊈(a) = Fix2(⊈, a)
+
 ## set equality comparison
 
 """
-    issetequal(a, b) -> Bool
+    issetequal(a, b)::Bool
 
 Determine whether `a` and `b` have the same elements. Equivalent
 to `a ⊆ b && b ⊆ a` but more efficient when possible.
@@ -441,9 +533,22 @@ function issetequal(a, b)
     return issetequal(Set(a), Set(b))
 end
 
+"""
+    issetequal(x)
+
+Create a function that compares its argument to `x` using [`issetequal`](@ref), i.e.
+a function equivalent to `y -> issetequal(y, x)`.
+The returned function is of type `Base.Fix2{typeof(issetequal)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+issetequal(a) = Fix2(issetequal, a)
+
 ## set disjoint comparison
 """
-    isdisjoint(a, b) -> Bool
+    isdisjoint(a, b)::Bool
 
 Determine whether the collections `a` and `b` are disjoint.
 Equivalent to `isempty(a ∩ b)` but more efficient when possible.
@@ -487,6 +592,19 @@ function isdisjoint(a::AbstractRange{T}, b::AbstractRange{T}) where T
     end
 end
 
+"""
+    isdisjoint(x)
+
+Create a function that compares its argument to `x` using [`isdisjoint`](@ref), i.e.
+a function equivalent to `y -> isdisjoint(y, x)`.
+The returned function is of type `Base.Fix2{typeof(isdisjoint)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+isdisjoint(a) = Fix2(isdisjoint, a)
+
 _overlapping_range_isdisjoint(a::AbstractRange{T}, b::AbstractRange{T}) where T = invoke(isdisjoint, Tuple{Any,Any}, a, b)
 
 function _overlapping_range_isdisjoint(a::AbstractRange{T}, b::AbstractRange{T}) where T<:Integer
diff --git a/base/accumulate.jl b/base/accumulate.jl
index eeb9759e125c7..c155ecfb4f75f 100644
--- a/base/accumulate.jl
+++ b/base/accumulate.jl
@@ -5,12 +5,14 @@
 # it does double the number of operations compared to accumulate,
 # though for cheap operations like + this does not have much impact (20%)
 function _accumulate_pairwise!(op::Op, c::AbstractVector{T}, v::AbstractVector, s, i1, n)::T where {T,Op}
-    @inbounds if n < 128
-        s_ = v[i1]
-        c[i1] = op(s, s_)
+    if n < 128
+        @inbounds s_ = v[i1]
+        ci1 = op(s, s_)
+        @inbounds c[i1] = ci1
         for i = i1+1:i1+n-1
-            s_ = op(s_, v[i])
-            c[i] = op(s, s_)
+            s_ = op(s_, @inbounds(v[i]))
+            ci = op(s, s_)
+            @inbounds c[i] = ci
         end
     else
         n2 = n >> 1
@@ -26,14 +28,15 @@ function accumulate_pairwise!(op::Op, result::AbstractVector, v::AbstractVector)
     n = length(li)
     n == 0 && return result
     i1 = first(li)
-    @inbounds result[i1] = v1 = reduce_first(op,v[i1])
+    v1 = reduce_first(op, @inbounds(v[i1]))
+    @inbounds result[i1] = v1
     n == 1 && return result
     _accumulate_pairwise!(op, result, v, v1, i1+1, n-1)
     return result
 end
 
 function accumulate_pairwise(op, v::AbstractVector{T}) where T
-    out = similar(v, promote_op(op, T, T))
+    out = similar(v, _accumulate_promote_op(op, v))
     return accumulate_pairwise!(op, out, v)
 end
 
@@ -42,6 +45,8 @@ end
     cumsum!(B, A; dims::Integer)
 
 Cumulative sum of `A` along the dimension `dims`, storing the result in `B`. See also [`cumsum`](@ref).
+
+$(_DOCS_ALIASING_WARNING)
 """
 cumsum!(B::AbstractArray{T}, A; dims::Integer) where {T} =
     accumulate!(add_sum, B, A, dims=dims)
@@ -109,8 +114,8 @@ julia> cumsum(a, dims=2)
     widening happens and integer overflow results in `Int8[100, -128]`.
 """
 function cumsum(A::AbstractArray{T}; dims::Integer) where T
-    out = similar(A, promote_op(add_sum, T, T))
-    cumsum!(out, A, dims=dims)
+    out = similar(A, _accumulate_promote_op(add_sum, A))
+    return cumsum!(out, A, dims=dims)
 end
 
 """
@@ -150,6 +155,8 @@ cumsum(itr) = accumulate(add_sum, itr)
 
 Cumulative product of `A` along the dimension `dims`, storing the result in `B`.
 See also [`cumprod`](@ref).
+
+$(_DOCS_ALIASING_WARNING)
 """
 cumprod!(B::AbstractArray{T}, A; dims::Integer) where {T} =
     accumulate!(mul_prod, B, A, dims=dims)
@@ -159,6 +166,8 @@ cumprod!(B::AbstractArray{T}, A; dims::Integer) where {T} =
 
 Cumulative product of a vector `x`, storing the result in `y`.
 See also [`cumprod`](@ref).
+
+$(_DOCS_ALIASING_WARNING)
 """
 cumprod!(y::AbstractVector, x::AbstractVector) = cumprod!(y, x, dims=1)
 
@@ -274,14 +283,13 @@ function accumulate(op, A; dims::Union{Nothing,Integer}=nothing, kw...)
         # This branch takes care of the cases not handled by `_accumulate!`.
         return collect(Iterators.accumulate(op, A; kw...))
     end
+
     nt = values(kw)
-    if isempty(kw)
-        out = similar(A, promote_op(op, eltype(A), eltype(A)))
-    elseif keys(nt) === (:init,)
-        out = similar(A, promote_op(op, typeof(nt.init), eltype(A)))
-    else
+    if !(isempty(kw) || keys(nt) === (:init,))
         throw(ArgumentError("accumulate does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
     end
+
+    out = similar(A, _accumulate_promote_op(op, A; kw...))
     accumulate!(op, out, A; dims=dims, kw...)
 end
 
@@ -301,6 +309,8 @@ Cumulative operation `op` on `A` along the dimension `dims`, storing the result
 Providing `dims` is optional for vectors.  If the keyword argument `init` is given, its
 value is used to instantiate the accumulation.
 
+$(_DOCS_ALIASING_WARNING)
+
 See also [`accumulate`](@ref), [`cumsum!`](@ref), [`cumprod!`](@ref).
 
 # Examples
@@ -371,16 +381,16 @@ function _accumulate!(op, B, A, dims::Integer, init::Union{Nothing, Some})
         # We can accumulate to a temporary variable, which allows
         # register usage and will be slightly faster
         ind1 = inds_t[1]
-        @inbounds for I in CartesianIndices(tail(inds_t))
+        for I in CartesianIndices(tail(inds_t))
             if init === nothing
-                tmp = reduce_first(op, A[first(ind1), I])
+                tmp = reduce_first(op, @inbounds(A[first(ind1), I]))
             else
-                tmp = op(something(init), A[first(ind1), I])
+                tmp = op(something(init), @inbounds(A[first(ind1), I]))
             end
-            B[first(ind1), I] = tmp
+            @inbounds B[first(ind1), I] = tmp
             for i_1 = first(ind1)+1:last(ind1)
-                tmp = op(tmp, A[i_1, I])
-                B[i_1, I] = tmp
+                tmp = op(tmp, @inbounds(A[i_1, I]))
+                @inbounds B[i_1, I] = tmp
             end
         end
     else
@@ -394,12 +404,15 @@ end
 @noinline function _accumulaten!(op, B, A, R1, ind, R2, init::Nothing)
     # Copy the initial element in each 1d vector along dimension `dim`
     ii = first(ind)
-    @inbounds for J in R2, I in R1
-        B[I, ii, J] = reduce_first(op, A[I, ii, J])
+    for J in R2, I in R1
+        tmp = reduce_first(op, @inbounds(A[I, ii, J]))
+        @inbounds B[I, ii, J] = tmp
     end
     # Accumulate
-    @inbounds for J in R2, i in first(ind)+1:last(ind), I in R1
-        B[I, i, J] = op(B[I, i-1, J], A[I, i, J])
+    for J in R2, i in first(ind)+1:last(ind), I in R1
+        @inbounds Bv, Av = B[I, i-1, J], A[I, i, J]
+        tmp = op(Bv, Av)
+        @inbounds B[I, i, J] = tmp
     end
     B
 end
@@ -407,12 +420,15 @@ end
 @noinline function _accumulaten!(op, B, A, R1, ind, R2, init::Some)
     # Copy the initial element in each 1d vector along dimension `dim`
     ii = first(ind)
-    @inbounds for J in R2, I in R1
-        B[I, ii, J] = op(something(init), A[I, ii, J])
+    for J in R2, I in R1
+        tmp = op(something(init), @inbounds(A[I, ii, J]))
+        @inbounds B[I, ii, J] = tmp
     end
     # Accumulate
-    @inbounds for J in R2, i in first(ind)+1:last(ind), I in R1
-        B[I, i, J] = op(B[I, i-1, J], A[I, i, J])
+    for J in R2, i in first(ind)+1:last(ind), I in R1
+        @inbounds Bv, Av = B[I, i-1, J], A[I, i, J]
+        tmp = op(Bv, Av)
+        @inbounds B[I, i, J] = tmp
     end
     B
 end
@@ -426,11 +442,50 @@ function _accumulate1!(op, B, v1, A::AbstractVector, dim::Integer)
     cur_val = v1
     B[i1] = cur_val
     next = iterate(inds, state)
-    @inbounds while next !== nothing
+    while next !== nothing
         (i, state) = next
-        cur_val = op(cur_val, A[i])
-        B[i] = cur_val
+        cur_val = op(cur_val, @inbounds(A[i]))
+        @inbounds B[i] = cur_val
         next = iterate(inds, state)
     end
     return B
 end
+
+# Internal function used to identify the widest possible eltype required for accumulate results
+function _accumulate_promote_op(op, v; init=nothing)
+    # Nested mock functions used to infer the widest necessary eltype
+    # NOTE: We are just passing this to promote_op for inference and should never be run.
+
+    # Initialization function used to identify initial type of `r`
+    # NOTE: reduce_first may have a different return type than calling `op`
+    function f(op, v, init)
+        val = first(something(iterate(v)))
+        return isnothing(init) ? Base.reduce_first(op, val) : op(init, val)
+    end
+
+    # Infer iteration type independent of the initialization type
+    # If `op` fails then this will return `Union{}` as `k` will be undefined.
+    # Returning `Union{}` is desirable as it won't break the `promote_type` call in the
+    # outer scope below
+    function g(op, v, r)
+        local k
+        for val in v
+            k = op(r, val)
+        end
+        return k
+    end
+
+    # Finally loop again with the two types promoted together
+    # If the `op` fails and reduce_first was used then then this will still just
+    # return the initial type, allowing the `op` to error during execution.
+    function h(op, v, r)
+        for val in v
+            r = op(r, val)
+        end
+        return r
+    end
+
+    R = Base.promote_op(f, typeof(op), typeof(v), typeof(init))
+    K = Base.promote_op(g, typeof(op), typeof(v), R)
+    return Base.promote_op(h, typeof(op), typeof(v), Base.promote_type(R, K))
+end
diff --git a/base/anyall.jl b/base/anyall.jl
new file mode 100644
index 0000000000000..395666dc6765a
--- /dev/null
+++ b/base/anyall.jl
@@ -0,0 +1,257 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## all & any
+
+"""
+    any(itr)::Bool
+
+Test whether any elements of a boolean collection are `true`, returning `true` as
+soon as the first `true` value in `itr` is encountered (short-circuiting). To
+short-circuit on `false`, use [`all`](@ref).
+
+If the input contains [`missing`](@ref) values, return `missing` if all non-missing
+values are `false` (or equivalently, if the input contains no `true` value), following
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
+
+See also: [`all`](@ref), [`count`](@ref), [`sum`](@ref), [`|`](@ref), [`||`](@ref).
+
+# Examples
+```jldoctest
+julia> a = [true,false,false,true]
+4-element Vector{Bool}:
+ 1
+ 0
+ 0
+ 1
+
+julia> any(a)
+true
+
+julia> any((println(i); v) for (i, v) in enumerate(a))
+1
+true
+
+julia> any([missing, true])
+true
+
+julia> any([false, missing])
+missing
+```
+"""
+any(itr) = any(identity, itr)
+
+"""
+    all(itr)::Bool
+
+Test whether all elements of a boolean collection are `true`, returning `false` as
+soon as the first `false` value in `itr` is encountered (short-circuiting). To
+short-circuit on `true`, use [`any`](@ref).
+
+If the input contains [`missing`](@ref) values, return `missing` if all non-missing
+values are `true` (or equivalently, if the input contains no `false` value), following
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
+
+See also: [`all!`](@ref), [`any`](@ref), [`count`](@ref), [`&`](@ref), [`&&`](@ref), [`allunique`](@ref).
+
+# Examples
+```jldoctest
+julia> a = [true,false,false,true]
+4-element Vector{Bool}:
+ 1
+ 0
+ 0
+ 1
+
+julia> all(a)
+false
+
+julia> all((println(i); v) for (i, v) in enumerate(a))
+1
+2
+false
+
+julia> all([missing, false])
+false
+
+julia> all([true, missing])
+missing
+```
+"""
+all(itr) = all(identity, itr)
+
+"""
+    any(p, itr)::Bool
+
+Determine whether predicate `p` returns `true` for any elements of `itr`, returning
+`true` as soon as the first item in `itr` for which `p` returns `true` is encountered
+(short-circuiting). To short-circuit on `false`, use [`all`](@ref).
+
+If the input contains [`missing`](@ref) values, return `missing` if all non-missing
+values are `false` (or equivalently, if the input contains no `true` value), following
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
+
+# Examples
+```jldoctest
+julia> any(i->(4<=i<=6), [3,5,7])
+true
+
+julia> any(i -> (println(i); i > 3), 1:10)
+1
+2
+3
+4
+true
+
+julia> any(i -> i > 0, [1, missing])
+true
+
+julia> any(i -> i > 0, [-1, missing])
+missing
+
+julia> any(i -> i > 0, [-1, 0])
+false
+```
+"""
+any(f, itr) = _any(f, itr, :)
+
+for ItrT = (Tuple,Any)
+    # define a generic method and a specialized version for `Tuple`,
+    # whose method bodies are identical, while giving better effects to the later
+    @eval function _any(f, itr::$ItrT, ::Colon)
+        $(ItrT === Tuple ? :(@_terminates_locally_meta) : :nothing)
+        anymissing = false
+        for x in itr
+            v = f(x)
+            if ismissing(v)
+                anymissing = true
+            else
+                v && return true
+            end
+        end
+        return anymissing ? missing : false
+    end
+end
+
+# When the function is side effect-free, we may avoid short-circuiting to help
+# vectorize the loop.
+function _any(::typeof(identity), itr::Tuple{Vararg{Bool}}, ::Colon)
+    @_terminates_locally_meta
+    r = false
+    for i in eachindex(itr)
+        # Avoid bounds checking to help vectorization. Use `getfield` directly,
+        # instead of `@inbounds itr[i]`, for better effects.
+        v = getfield(itr, i, false)
+        r |= v
+    end
+    r
+end
+
+# Specialized versions of any(f, ::Tuple)
+# We fall back to the for loop implementation all elements have the same type or
+# if the tuple is too large.
+function any(f, itr::Tuple)
+    if itr isa NTuple || length(itr) > 32
+        return _any(f, itr, :)
+    end
+    _any_tuple(f, false, itr...)
+end
+
+@inline function _any_tuple(f, anymissing, x, rest...)
+    v = f(x)
+    if ismissing(v)
+        anymissing = true
+    elseif v
+        return true
+    end
+    return _any_tuple(f, anymissing, rest...)
+end
+@inline _any_tuple(f, anymissing) = anymissing ? missing : false
+
+"""
+    all(p, itr)::Bool
+
+Determine whether predicate `p` returns `true` for all elements of `itr`, returning
+`false` as soon as the first item in `itr` for which `p` returns `false` is encountered
+(short-circuiting). To short-circuit on `true`, use [`any`](@ref).
+
+If the input contains [`missing`](@ref) values, return `missing` if all non-missing
+values are `true` (or equivalently, if the input contains no `false` value), following
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
+
+# Examples
+```jldoctest
+julia> all(i->(4<=i<=6), [4,5,6])
+true
+
+julia> all(i -> (println(i); i < 3), 1:10)
+1
+2
+3
+false
+
+julia> all(i -> i > 0, [1, missing])
+missing
+
+julia> all(i -> i > 0, [-1, missing])
+false
+
+julia> all(i -> i > 0, [1, 2])
+true
+```
+"""
+all(f, itr) = _all(f, itr, :)
+
+for ItrT = (Tuple,Any)
+    # define a generic method and a specialized version for `Tuple`,
+    # whose method bodies are identical, while giving better effects to the later
+    @eval function _all(f, itr::$ItrT, ::Colon)
+        $(ItrT === Tuple ? :(@_terminates_locally_meta) : :nothing)
+        anymissing = false
+        for x in itr
+            v = f(x)
+            if ismissing(v)
+                anymissing = true
+            else
+                v || return false
+            end
+        end
+        return anymissing ? missing : true
+    end
+end
+
+# When the function is side effect-free, we may avoid short-circuiting to help
+# vectorize the loop.
+function _all(::typeof(identity), itr::Tuple{Vararg{Bool}}, ::Colon)
+    @_terminates_locally_meta
+    r = true
+    for i in eachindex(itr)
+        # Avoid bounds checking to help vectorization. Use `getfield` directly,
+        # instead of `@inbounds itr[i]`, for better effects.
+        v = getfield(itr, i, false)
+        r &= v
+    end
+    r
+end
+
+# Specialized versions of all(f, ::Tuple),
+# This is similar to any(f, ::Tuple) defined above.
+function all(f, itr::Tuple)
+    if itr isa NTuple || length(itr) > 32
+        return _all(f, itr, :)
+    end
+    _all_tuple(f, false, itr...)
+end
+
+@inline function _all_tuple(f, anymissing, x, rest...)
+    v = f(x)
+    if ismissing(v)
+        anymissing = true
+    # this syntax allows throwing a TypeError for non-Bool, for consistency with any
+    elseif v
+        nothing
+    else
+        return false
+    end
+    return _all_tuple(f, anymissing, rest...)
+end
+@inline _all_tuple(f, anymissing) = anymissing ? missing : true
diff --git a/base/array.jl b/base/array.jl
index b99ec7ee2b015..b6a4e9e852aef 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -120,39 +120,37 @@ const DenseVecOrMat{T} = Union{DenseVector{T}, DenseMatrix{T}}
 
 ## Basic functions ##
 
-using Core: arraysize, arrayset, const_arrayref
-
 """
     @_safeindex
 
 This internal macro converts:
-- `getindex(xs::Tuple, )` -> `__inbounds_getindex(args...)`
-- `setindex!(xs::Vector, args...)` -> `__inbounds_setindex!(xs, args...)`
+- `getindex(xs::Tuple, i::Int)` -> `__safe_getindex(xs, i)`
+- `setindex!(xs::Vector{T}, x, i::Int)` -> `__safe_setindex!(xs, x, i)`
 to tell the compiler that indexing operations within the applied expression are always
 inbounds and do not need to taint `:consistent` and `:nothrow`.
 """
 macro _safeindex(ex)
-    return esc(_safeindex(__module__, ex))
+    return esc(_safeindex(ex))
 end
-function _safeindex(__module__, ex)
+function _safeindex(ex)
     isa(ex, Expr) || return ex
     if ex.head === :(=)
-        lhs = arrayref(true, ex.args, 1)
+        lhs = ex.args[1]
         if isa(lhs, Expr) && lhs.head === :ref # xs[i] = x
-            rhs = arrayref(true, ex.args, 2)
-            xs = arrayref(true, lhs.args, 1)
+            rhs = ex.args[2]
+            xs = lhs.args[1]
             args = Vector{Any}(undef, length(lhs.args)-1)
             for i = 2:length(lhs.args)
-                arrayset(true, args, _safeindex(__module__, arrayref(true, lhs.args, i)), i-1)
+                args[i-1] = _safeindex(lhs.args[i])
             end
-            return Expr(:call, GlobalRef(__module__, :__inbounds_setindex!), xs, _safeindex(__module__, rhs), args...)
+            return Expr(:call, GlobalRef(@__MODULE__, :__safe_setindex!), xs, _safeindex(rhs), args...)
         end
     elseif ex.head === :ref # xs[i]
-        return Expr(:call, GlobalRef(__module__, :__inbounds_getindex), ex.args...)
+        return Expr(:call, GlobalRef(@__MODULE__, :__safe_getindex), ex.args...)
     end
     args = Vector{Any}(undef, length(ex.args))
     for i = 1:length(ex.args)
-        arrayset(true, args, _safeindex(__module__, arrayref(true, ex.args, i)), i)
+        args[i] = _safeindex(ex.args[i])
     end
     return Expr(ex.head, args...)
 end
@@ -187,12 +185,7 @@ function vect(X...)
     return T[X...]
 end
 
-size(a::Array, d::Integer) = arraysize(a, d isa Int ? d : convert(Int, d))
-size(a::Vector) = (arraysize(a,1),)
-size(a::Matrix) = (arraysize(a,1), arraysize(a,2))
-size(a::Array{<:Any,N}) where {N} = (@inline; ntuple(M -> size(a, M), Val(N))::Dims)
-
-asize_from(a::Array, n) = n > ndims(a) ? () : (arraysize(a,n), asize_from(a, n+1)...)
+asize_from(a::Array, n) = n > ndims(a) ? () : (size(a,n), asize_from(a, n+1)...)
 
 allocatedinline(@nospecialize T::Type) = (@_total_meta; ccall(:jl_stored_inline, Cint, (Any,), T) != Cint(0))
 
@@ -210,49 +203,18 @@ julia> Base.isbitsunion(Union{Float64, String})
 false
 ```
 """
-isbitsunion(u::Union) = allocatedinline(u)
-isbitsunion(x) = false
+isbitsunion(u::Type) = u isa Union && allocatedinline(u)
 
-function _unsetindex!(A::Array{T}, i::Int) where {T}
+function _unsetindex!(A::Array, i::Int)
     @inline
     @boundscheck checkbounds(A, i)
-    t = @_gc_preserve_begin A
-    p = Ptr{Ptr{Cvoid}}(pointer(A, i))
-    if !allocatedinline(T)
-        Intrinsics.atomic_pointerset(p, C_NULL, :monotonic)
-    elseif T isa DataType
-        if !datatype_pointerfree(T)
-            for j = 1:Core.sizeof(Ptr{Cvoid}):Core.sizeof(T)
-                Intrinsics.atomic_pointerset(p + j - 1, C_NULL, :monotonic)
-            end
-        end
-    end
-    @_gc_preserve_end t
+    @inbounds _unsetindex!(memoryref(A.ref, i))
     return A
 end
 
 
-"""
-    Base.bitsunionsize(U::Union) -> Int
-
-For a `Union` of [`isbitstype`](@ref) types, return the size of the largest type; assumes `Base.isbitsunion(U) == true`.
-
-# Examples
-```jldoctest
-julia> Base.bitsunionsize(Union{Float64, UInt8})
-8
-
-julia> Base.bitsunionsize(Union{Float64, UInt8, Int128})
-16
-```
-"""
-function bitsunionsize(u::Union)
-    isinline, sz, _ = uniontype_layout(u)
-    @assert isinline
-    return sz
-end
-
-elsize(@nospecialize _::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
+# TODO: deprecate this (aligned_sizeof and/or elsize and/or sizeof(Some{T}) are more correct)
+elsize(::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
 function elsize(::Type{Ptr{T}}) where T
     # this only must return something valid for values which satisfy is_valid_intrinsic_elptr(T),
     # which includes Any and most concrete datatypes
@@ -261,15 +223,25 @@ function elsize(::Type{Ptr{T}}) where T
     return LLT_ALIGN(Core.sizeof(T), datatype_alignment(T))
 end
 elsize(::Type{Union{}}, slurp...) = 0
-sizeof(a::Array) = Core.sizeof(a)
+
+sizeof(a::Array) = length(a) * elsize(typeof(a)) # n.b. this ignores bitsunion bytes, as a historical fact
 
 function isassigned(a::Array, i::Int...)
     @inline
+    @_noub_if_noinbounds_meta
     @boundscheck checkbounds(Bool, a, i...) || return false
-    ii = (_sub2ind(size(a), i...) % UInt) - 1
-    ccall(:jl_array_isassigned, Cint, (Any, UInt), a, ii) == 1
+    ii = _sub2ind(size(a), i...)
+    return @inbounds isassigned(memoryrefnew(a.ref, ii, false))
+end
+
+function isassigned(a::Vector, i::Int) # slight compiler simplification for the most common case
+    @inline
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(Bool, a, i) || return false
+    return @inbounds isassigned(memoryrefnew(a.ref, i, false))
 end
 
+
 ## copy ##
 
 """
@@ -289,110 +261,108 @@ function unsafe_copyto!(dest::Ptr{T}, src::Ptr{T}, n) where T
     return dest
 end
 
-
-function _unsafe_copyto!(dest, doffs, src, soffs, n)
-    destp = pointer(dest, doffs)
-    srcp = pointer(src, soffs)
-    @inbounds if destp < srcp || destp > srcp + n
-        for i = 1:n
-            if isassigned(src, soffs + i - 1)
-                dest[doffs + i - 1] = src[soffs + i - 1]
-            else
-                _unsetindex!(dest, doffs + i - 1)
-            end
-        end
-    else
-        for i = n:-1:1
-            if isassigned(src, soffs + i - 1)
-                dest[doffs + i - 1] = src[soffs + i - 1]
-            else
-                _unsetindex!(dest, doffs + i - 1)
-            end
-        end
-    end
-    return dest
-end
-
 """
-    unsafe_copyto!(dest::Array, do, src::Array, so, N)
+    unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n)
 
-Copy `N` elements from a source array to a destination, starting at the linear index `so` in the
-source and `do` in the destination (1-indexed).
+Copy `n` elements from a source array to a destination, starting at the linear index `soffs` in the
+source and `doffs` in the destination (1-indexed).
 
 The `unsafe` prefix on this function indicates that no validation is performed to ensure
-that N is inbounds on either array. Incorrect usage may corrupt or segfault your program, in
+that n is inbounds on either array. Incorrect usage may corrupt or segfault your program, in
 the same manner as C.
 """
-function unsafe_copyto!(dest::Array{T}, doffs, src::Array{T}, soffs, n) where T
-    t1 = @_gc_preserve_begin dest
-    t2 = @_gc_preserve_begin src
-    destp = pointer(dest, doffs)
-    srcp = pointer(src, soffs)
-    if !allocatedinline(T)
-        ccall(:jl_array_ptr_copy, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int),
-              dest, destp, src, srcp, n)
-    elseif isbitstype(T)
-        memmove(destp, srcp, n * aligned_sizeof(T))
-    elseif isbitsunion(T)
-        memmove(destp, srcp, n * aligned_sizeof(T))
-        # copy selector bytes
-        memmove(
-              ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), dest) + doffs - 1,
-              ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), src) + soffs - 1,
-              n)
-    else
-        _unsafe_copyto!(dest, doffs, src, soffs, n)
-    end
-    @_gc_preserve_end t2
-    @_gc_preserve_end t1
+function unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n)
+    n == 0 && return dest
+    unsafe_copyto!(memoryref(dest.ref, doffs), memoryref(src.ref, soffs), n)
     return dest
 end
 
-unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n) =
-    _unsafe_copyto!(dest, doffs, src, soffs, n)
-
 """
-    copyto!(dest, do, src, so, N)
+    copyto!(dest, doffs, src, soffs, n)
 
-Copy `N` elements from collection `src` starting at the linear index `so`, to array `dest` starting at
-the index `do`. Return `dest`.
+Copy `n` elements from collection `src` starting at the linear index `soffs`, to array `dest` starting at
+the index `doffs`. Return `dest`.
 """
-function copyto!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer)
-    return _copyto_impl!(dest, doffs, src, soffs, n)
-end
+copyto!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
+copyto!(dest::Array, doffs::Integer, src::Memory, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
+copyto!(dest::Memory, doffs::Integer, src::Array, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
 
 # this is only needed to avoid possible ambiguities with methods added in some packages
-function copyto!(dest::Array{T}, doffs::Integer, src::Array{T}, soffs::Integer, n::Integer) where T
-    return _copyto_impl!(dest, doffs, src, soffs, n)
-end
+copyto!(dest::Array{T}, doffs::Integer, src::Array{T}, soffs::Integer, n::Integer) where {T} = _copyto_impl!(dest, doffs, src, soffs, n)
 
-function _copyto_impl!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer)
+function _copyto_impl!(dest::Union{Array,Memory}, doffs::Integer, src::Union{Array,Memory}, soffs::Integer, n::Integer)
     n == 0 && return dest
-    n > 0 || _throw_argerror("Number of elements to copy must be nonnegative.")
+    n > 0 || _throw_argerror("Number of elements to copy must be non-negative.")
     @boundscheck checkbounds(dest, doffs:doffs+n-1)
     @boundscheck checkbounds(src, soffs:soffs+n-1)
-    unsafe_copyto!(dest, doffs, src, soffs, n)
+    @inbounds let dest = memoryref(dest isa Array ? getfield(dest, :ref) : dest, doffs),
+                  src = memoryref(src isa Array ? getfield(src, :ref) : src, soffs)
+        unsafe_copyto!(dest, src, n)
+    end
     return dest
 end
 
+
 # Outlining this because otherwise a catastrophic inference slowdown
 # occurs, see discussion in #27874.
 # It is also mitigated by using a constant string.
 _throw_argerror(s) = (@noinline; throw(ArgumentError(s)))
 
-copyto!(dest::Array, src::Array) = copyto!(dest, 1, src, 1, length(src))
+_copyto2arg!(dest, src) = copyto!(dest, firstindex(dest), src, firstindex(src), length(src))
+
+copyto!(dest::Array, src::Array) = _copyto2arg!(dest, src)
+copyto!(dest::Array, src::Memory) = _copyto2arg!(dest, src)
+copyto!(dest::Memory, src::Array) = _copyto2arg!(dest, src)
 
 # also to avoid ambiguities in packages
-copyto!(dest::Array{T}, src::Array{T}) where {T} = copyto!(dest, 1, src, 1, length(src))
+copyto!(dest::Array{T}, src::Array{T}) where {T} = _copyto2arg!(dest, src)
+copyto!(dest::Array{T}, src::Memory{T}) where {T} = _copyto2arg!(dest, src)
+copyto!(dest::Memory{T}, src::Array{T}) where {T} = _copyto2arg!(dest, src)
+
+# N.B: This generic definition in for multidimensional arrays is here instead of
+# `multidimensional.jl` for bootstrapping purposes.
+"""
+    fill!(A, x)
+
+Fill array `A` with the value `x`. If `x` is an object reference, all elements will refer to
+the same object. `fill!(A, Foo())` will return `A` filled with the result of evaluating
+`Foo()` once.
 
-# N.B: The generic definition in multidimensional.jl covers, this, this is just here
-# for bootstrapping purposes.
-function fill!(dest::Array{T}, x) where T
+# Examples
+```jldoctest
+julia> A = zeros(2,3)
+2×3 Matrix{Float64}:
+ 0.0  0.0  0.0
+ 0.0  0.0  0.0
+
+julia> fill!(A, 2.)
+2×3 Matrix{Float64}:
+ 2.0  2.0  2.0
+ 2.0  2.0  2.0
+
+julia> a = [1, 1, 1]; A = fill!(Vector{Vector{Int}}(undef, 3), a); a[1] = 2; A
+3-element Vector{Vector{Int64}}:
+ [2, 1, 1]
+ [2, 1, 1]
+ [2, 1, 1]
+
+julia> x = 0; f() = (global x += 1; x); fill!(Vector{Int}(undef, 3), f())
+3-element Vector{Int64}:
+ 1
+ 1
+ 1
+```
+"""
+function fill!(A::AbstractArray{T}, x) where T
+    @inline
     xT = x isa T ? x : convert(T, x)::T
-    for i in eachindex(dest)
-        @inbounds dest[i] = xT
+    return _fill!(A, xT)
+end
+function _fill!(A::AbstractArray{T}, x::T) where T
+    for i in eachindex(A)
+        A[i] = x
     end
-    return dest
+    return A
 end
 
 """
@@ -406,17 +376,37 @@ See also [`copy!`](@ref Base.copy!), [`copyto!`](@ref), [`deepcopy`](@ref).
 """
 copy
 
-copy(a::T) where {T<:Array} = ccall(:jl_array_copy, Ref{T}, (Any,), a)
+@eval function copy(a::Array)
+    # `copy` only throws when the size exceeds the max allocation size,
+    # but since we're copying an existing array, we're guaranteed that this will not happen.
+    @_nothrow_meta
+    ref = a.ref
+    newmem = typeof(ref.mem)(undef, length(a))
+    @inbounds unsafe_copyto!(memoryref(newmem), ref, length(a))
+    return $(Expr(:new, :(typeof(a)), :(memoryref(newmem)), :(a.size)))
+end
+
+# a mutating version of copyto! that results in dst aliasing src afterwards
+function _take!(dst::Array{T,N}, src::Array{T,N}) where {T,N}
+    if getfield(dst, :ref) !== getfield(src, :ref)
+        setfield!(dst, :ref, getfield(src, :ref))
+    end
+    if getfield(dst, :size) !== getfield(src, :size)
+        setfield!(dst, :size, getfield(src, :size))
+    end
+    return dst
+end
 
 ## Constructors ##
 
-similar(a::Array{T,1}) where {T}                    = Vector{T}(undef, size(a,1))
-similar(a::Array{T,2}) where {T}                    = Matrix{T}(undef, size(a,1), size(a,2))
-similar(a::Array{T,1}, S::Type) where {T}           = Vector{S}(undef, size(a,1))
-similar(a::Array{T,2}, S::Type) where {T}           = Matrix{S}(undef, size(a,1), size(a,2))
+similar(a::Vector{T}) where {T}                    = Vector{T}(undef, size(a,1))
+similar(a::Matrix{T}) where {T}                    = Matrix{T}(undef, size(a,1), size(a,2))
+similar(a::Vector{T}, S::Type) where {T}           = Vector{S}(undef, size(a,1))
+similar(a::Matrix{T}, S::Type) where {T}           = Matrix{S}(undef, size(a,1), size(a,2))
 similar(a::Array{T}, m::Int) where {T}              = Vector{T}(undef, m)
 similar(a::Array, T::Type, dims::Dims{N}) where {N} = Array{T,N}(undef, dims)
 similar(a::Array{T}, dims::Dims{N}) where {T,N}     = Array{T,N}(undef, dims)
+similar(::Type{Array{T,N}}, dims::Dims) where {T,N} = similar(Array{T}, dims)
 
 # T[x...] constructs Array{T,1}
 """
@@ -469,9 +459,10 @@ end
 getindex(::Type{Any}) = Vector{Any}()
 
 function fill!(a::Union{Array{UInt8}, Array{Int8}}, x::Integer)
-    t = @_gc_preserve_begin a
-    p = unsafe_convert(Ptr{Cvoid}, a)
-    memset(p, x isa eltype(a) ? x : convert(eltype(a), x), length(a))
+    ref = a.ref
+    t = @_gc_preserve_begin ref
+    p = unsafe_convert(Ptr{Cvoid}, ref)
+    memset(p, x isa eltype(a) ? x : convert(eltype(a), x), length(a) % UInt)
     @_gc_preserve_end t
     return a
 end
@@ -580,6 +571,7 @@ function fill end
 fill(v, dims::DimOrInd...) = fill(v, dims)
 fill(v, dims::NTuple{N, Union{Integer, OneTo}}) where {N} = fill(v, map(to_dim, dims))
 fill(v, dims::NTuple{N, Integer}) where {N} = (a=Array{typeof(v),N}(undef, dims); fill!(a, v); a)
+fill(v, dims::NTuple{N, DimOrInd}) where {N} = (a=similar(Array{typeof(v),N}, dims); fill!(a, v); a)
 fill(v, dims::Tuple{}) = (a=Array{typeof(v),0}(undef, dims); fill!(a, v); a)
 
 """
@@ -640,24 +632,14 @@ for (fname, felt) in ((:zeros, :zero), (:ones, :one))
             fill!(a, $felt(T))
             return a
         end
+        function $fname(::Type{T}, dims::NTuple{N, DimOrInd}) where {T,N}
+            a = similar(Array{T,N}, dims)
+            fill!(a, $felt(T))
+            return a
+        end
     end
 end
 
-function _one(unit::T, x::AbstractMatrix) where T
-    require_one_based_indexing(x)
-    m,n = size(x)
-    m==n || throw(DimensionMismatch("multiplicative identity defined only for square matrices"))
-    # Matrix{T}(I, m, m)
-    I = zeros(T, m, m)
-    for i in 1:m
-        I[i,i] = unit
-    end
-    I
-end
-
-one(x::AbstractMatrix{T}) where {T} = _one(one(T), x)
-oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x)
-
 ## Conversions ##
 
 convert(::Type{T}, a::AbstractArray) where {T<:Array} = a isa T ? a : T(a)::T
@@ -666,11 +648,9 @@ promote_rule(a::Type{Array{T,n}}, b::Type{Array{S,n}}) where {T,n,S} = el_same(p
 
 ## Constructors ##
 
-if nameof(@__MODULE__) === :Base  # avoid method overwrite
 # constructors should make copies
 Array{T,N}(x::AbstractArray{S,N})         where {T,N,S} = copyto_axcheck!(Array{T,N}(undef, size(x)), x)
 AbstractArray{T,N}(A::AbstractArray{S,N}) where {T,N,S} = copyto_axcheck!(similar(A,T), A)
-end
 
 ## copying iterators to containers
 
@@ -725,33 +705,55 @@ _array_for(::Type{T}, itr, isz) where {T} = _array_for(T, isz, _similar_shape(it
 
 
 """
-    collect(collection)
+    collect(iterator)
 
 Return an `Array` of all items in a collection or iterator. For dictionaries, returns
-`Vector{Pair{KeyType, ValType}}`. If the argument is array-like or is an iterator with the
-[`HasShape`](@ref IteratorSize) trait, the result will have the same shape
+a `Vector` of `key=>value` [Pair](@ref Pair)s. If the argument is array-like or is an iterator
+with the [`HasShape`](@ref IteratorSize) trait, the result will have the same shape
 and number of dimensions as the argument.
 
-Used by comprehensions to turn a generator into an `Array`.
+Used by [comprehensions](@ref man-comprehensions) to turn a [generator expression](@ref man-generators)
+into an `Array`. Thus, *on generators*, the square-brackets notation may be used instead of calling `collect`,
+see second example.
+
+The element type of the returned array is based on the types of the values collected. However, if the
+iterator is empty then the element type of the returned (empty) array is determined by type inference.
 
 # Examples
+
+Collect items from a `UnitRange{Int64}` collection:
+
 ```jldoctest
-julia> collect(1:2:13)
-7-element Vector{Int64}:
-  1
-  3
-  5
-  7
-  9
- 11
- 13
+julia> collect(1:3)
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+```
 
-julia> [x^2 for x in 1:8 if isodd(x)]
-4-element Vector{Int64}:
-  1
-  9
- 25
- 49
+Collect items from a generator (same output as `[x^2 for x in 1:3]`):
+
+```jldoctest
+julia> collect(x^2 for x in 1:3)
+3-element Vector{Int64}:
+ 1
+ 4
+ 9
+```
+
+Collecting an empty iterator where the result type depends on type inference:
+
+```jldoctest
+julia> [rand(Bool) ? 1 : missing for _ in []]
+Union{Missing, Int64}[]
+```
+
+When the iterator is non-empty, the result type depends only on values:
+
+```julia-repl
+julia> [rand(Bool) ? 1 : missing for _ in [""]]
+1-element Vector{Int64}:
+ 1
 ```
 """
 collect(itr) = _collect(1:1 #= Array =#, itr, IteratorEltype(itr), IteratorSize(itr))
@@ -771,9 +773,16 @@ function _collect(cont, itr, ::HasEltype, isz::SizeUnknown)
     return a
 end
 
-_collect_indices(::Tuple{}, A) = copyto!(Array{eltype(A),0}(undef), A)
-_collect_indices(indsA::Tuple{Vararg{OneTo}}, A) =
-    copyto!(Array{eltype(A)}(undef, length.(indsA)), A)
+function _collect_indices(::Tuple{}, A)
+    dest = Array{eltype(A),0}(undef)
+    isempty(A) && return dest
+    return copyto_unaliased!(IndexStyle(dest), dest, IndexStyle(A), A)
+end
+function _collect_indices(indsA::Tuple{Vararg{OneTo}}, A)
+    dest = Array{eltype(A)}(undef, length.(indsA))
+    isempty(A) && return dest
+    return copyto_unaliased!(IndexStyle(dest), dest, IndexStyle(A), A)
+end
 function _collect_indices(indsA, A)
     B = Array{eltype(A)}(undef, length.(indsA))
     copyto!(B, CartesianIndices(axes(B)), A, CartesianIndices(indsA))
@@ -797,28 +806,15 @@ end
 # gets inlined into the caller before recursion detection
 # gets a chance to see it, so that recursive calls to the caller
 # don't trigger the inference limiter
-if isdefined(Core, :Compiler)
-    macro default_eltype(itr)
-        I = esc(itr)
-        return quote
-            if $I isa Generator && ($I).f isa Type
-                T = ($I).f
-            else
-                T = Core.Compiler.return_type(_iterator_upper_bound, Tuple{typeof($I)})
-            end
-            promote_typejoin_union(T)
-        end
-    end
-else
-    macro default_eltype(itr)
-        I = esc(itr)
-        return quote
-            if $I isa Generator && ($I).f isa Type
-                promote_typejoin_union($I.f)
-            else
-                Any
-            end
+macro default_eltype(itr)
+    I = esc(itr)
+    return quote
+        if $I isa Generator && ($I).f isa Type
+            T = ($I).f
+        else
+            T = Base._return_type(_iterator_upper_bound, Tuple{typeof($I)})
         end
+        promote_typejoin_union(T)
     end
 end
 
@@ -938,10 +934,6 @@ function grow_to!(dest, itr, st)
     return dest
 end
 
-## Iteration ##
-
-iterate(A::Array, i=1) = (@inline; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
-
 ## Indexing: getindex ##
 
 """
@@ -953,7 +945,7 @@ Retrieve the value(s) stored at the given key or index within a collection. The
 See also [`get`](@ref), [`keys`](@ref), [`eachindex`](@ref).
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> A = Dict("a" => 1, "b" => 2)
 Dict{String, Int64} with 2 entries:
   "b" => 2
@@ -965,6 +957,12 @@ julia> getindex(A, "a")
 """
 function getindex end
 
+function getindex(A::Array, i1::Int, i2::Int, I::Int...)
+    @inline
+    @boundscheck checkbounds(A, i1, i2, I...) # generally _to_linear_index requires bounds checking
+    return @inbounds A[_to_linear_index(A, i1, i2, I...)]
+end
+
 # Faster contiguous indexing using copyto! for AbstractUnitRange and Colon
 function getindex(A::Array, I::AbstractUnitRange{<:Integer})
     @inline
@@ -1003,7 +1001,7 @@ Store the given value at the given key or index within a collection. The syntax
 x` is converted by the compiler to `(setindex!(a, x, i, j, ...); x)`.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> a = Dict("a"=>1)
 Dict{String, Int64} with 1 entry:
   "a" => 1
@@ -1016,27 +1014,48 @@ Dict{String, Int64} with 2 entries:
 """
 function setindex! end
 
-@eval setindex!(A::Array{T}, x, i1::Int) where {T} =
-    arrayset($(Expr(:boundscheck)), A, x isa T ? x : convert(T,x)::T, i1)
-@eval setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
-    (@inline; arrayset($(Expr(:boundscheck)), A, x isa T ? x : convert(T,x)::T, i1, i2, I...))
+function setindex!(A::Array{T}, x, i::Int) where {T}
+    @_propagate_inbounds_meta
+    x = x isa T ? x : convert(T, x)::T
+    return _setindex!(A, x, i)
+end
+function _setindex!(A::Array{T}, x::T, i::Int) where {T}
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(Bool, A, i) || throw_boundserror(A, (i,))
+    memoryrefset!(memoryrefnew(A.ref, i, false), x, :not_atomic, false)
+    return A
+end
+function setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T}
+    @_propagate_inbounds_meta
+    x = x isa T ? x : convert(T, x)::T
+    return _setindex!(A, x, i1, i2, I...)
+end
+function _setindex!(A::Array{T}, x::T, i1::Int, i2::Int, I::Int...) where {T}
+    @inline
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(A, i1, i2, I...) # generally _to_linear_index requires bounds checking
+    memoryrefset!(memoryrefnew(A.ref, _to_linear_index(A, i1, i2, I...), false), x, :not_atomic, false)
+    return A
+end
 
-__inbounds_setindex!(A::Array{T}, x, i1::Int) where {T} =
-    arrayset(false, A, convert(T,x)::T, i1)
-__inbounds_setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
-    (@inline; arrayset(false, A, convert(T,x)::T, i1, i2, I...))
+__safe_setindex!(A::Vector{Any}, @nospecialize(x), i::Int) = (@inline; @_nothrow_noub_meta;
+    memoryrefset!(memoryrefnew(A.ref, i, false), x, :not_atomic, false); return A)
+__safe_setindex!(A::Vector{T}, x::T, i::Int) where {T} = (@inline; @_nothrow_noub_meta;
+    memoryrefset!(memoryrefnew(A.ref, i, false), x, :not_atomic, false); return A)
+__safe_setindex!(A::Vector{T}, x,    i::Int) where {T} = (@inline;
+    __safe_setindex!(A, convert(T, x)::T, i))
 
 # This is redundant with the abstract fallbacks but needed and helpful for bootstrap
 function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
     @_propagate_inbounds_meta
     @boundscheck setindex_shape_check(X, length(I))
+    @boundscheck checkbounds(A, I)
     require_one_based_indexing(X)
     X′ = unalias(A, X)
     I′ = unalias(A, I)
     count = 1
     for i in I′
-        @inbounds x = X′[count]
-        A[i] = x
+        @inbounds A[i] = X′[count]
         count += 1
     end
     return A
@@ -1063,24 +1082,230 @@ function setindex!(A::Array{T}, X::Array{T}, c::Colon) where T
     return A
 end
 
-# efficiently grow an array
+# Pick new memory size for efficiently growing an array
+# TODO: This should know about the size of our GC pools
+# Specifically we are wasting ~10% of memory for small arrays
+# by not picking memory sizes that max out a GC pool
+function overallocation(maxsize)
+    # compute maxsize = maxsize + 3*maxsize^(7/8) + maxsize/8
+    # for small n, we grow faster than O(n)
+    # for large n, we grow at O(n/8)
+    # and as we reach O(memory) for memory>>1MB,
+    # this means we end by adding about 10% of memory each time
+    # most commonly, this will take steps of 0-3-9-34 or 1-4-16-66 or 2-8-33
+    exp2 = sizeof(maxsize) * 8 - Core.Intrinsics.ctlz_int(maxsize)
+    maxsize += (1 << div(exp2 * 7, 8)) * 3 + div(maxsize, 8)
+    return maxsize
+end
 
-_growbeg!(a::Vector, delta::Integer) =
-    ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), a, delta)
-_growend!(a::Vector, delta::Integer) =
-    ccall(:jl_array_grow_end, Cvoid, (Any, UInt), a, delta)
-_growat!(a::Vector, i::Integer, delta::Integer) =
-    ccall(:jl_array_grow_at, Cvoid, (Any, Int, UInt), a, i - 1, delta)
+array_new_memory(mem::Memory, newlen::Int) = typeof(mem)(undef, newlen) # when implemented, this should attempt to first expand mem
 
-# efficiently delete part of an array
+function _growbeg_internal!(a::Vector, delta::Int, len::Int)
+    @_terminates_locally_meta
+    ref = a.ref
+    mem = ref.mem
+    offset = memoryrefoffset(ref)
+    newlen = len + delta
+    memlen = length(mem)
+    if offset + len - 1 > memlen || offset < 1
+        throw(ConcurrencyViolationError("Vector has invalid state. Don't modify internal fields incorrectly, or resize without correct locks"))
+    end
+    # since we will allocate the array in the middle of the memory we need at least 2*delta extra space
+    # the +1 is because I didn't want to have an off by 1 error.
+    newmemlen = max(overallocation(len), len + 2 * delta + 1)
+    newoffset = div(newmemlen - newlen, 2) + 1
+    # If there is extra data after the end of the array we can use that space so long as there is enough
+    # space at the end that there won't be quadratic behavior with a mix of growth from both ends.
+    # Specifically, we want to ensure that we will only do this operation once before
+    # increasing the size of the array, and that we leave enough space at both the beginning and the end.
+    if newoffset + newlen < memlen
+        newoffset = div(memlen - newlen, 2) + 1
+        newmem = mem
+        unsafe_copyto!(newmem, newoffset + delta, mem, offset, len)
+        for j in offset:newoffset+delta-1
+            @inbounds _unsetindex!(mem, j)
+        end
+    else
+        newmem = array_new_memory(mem, newmemlen)
+        unsafe_copyto!(newmem, newoffset + delta, mem, offset, len)
+    end
+    if ref !== a.ref
+        throw(ConcurrencyViolationError("Vector can not be resized concurrently"))
+    end
+    setfield!(a, :ref, @inbounds memoryref(newmem, newoffset))
+end
+
+function _growbeg!(a::Vector, delta::Integer)
+    @_noub_meta
+    delta = Int(delta)
+    delta == 0 && return # avoid attempting to index off the end
+    delta >= 0 || throw(ArgumentError("grow requires delta >= 0"))
+    ref = a.ref
+    len = length(a)
+    offset = memoryrefoffset(ref)
+    newlen = len + delta
+    # if offset is far enough advanced to fit data in existing memory without copying
+    if delta <= offset - 1
+        setfield!(a, :ref, @inbounds memoryref(ref, 1 - delta))
+        setfield!(a, :size, (newlen,))
+    else
+        @noinline _growbeg_internal!(a, delta, len)
+        setfield!(a, :size, (newlen,))
+    end
+    return
+end
+
+function _growend_internal!(a::Vector, delta::Int, len::Int)
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    newlen = len + delta
+    offset = memoryrefoffset(ref)
+    newmemlen = offset + newlen - 1
+    if offset + len - 1 > memlen || offset < 1
+        throw(ConcurrencyViolationError("Vector has invalid state. Don't modify internal fields incorrectly, or resize without correct locks"))
+    end
+
+    if offset - 1 > div(5 * newlen, 4)
+        # If the offset is far enough that we can copy without resizing
+        # while maintaining proportional spacing on both ends of the array
+        # note that this branch prevents infinite growth when doing combinations
+        # of push! and popfirst! (i.e. when using a Vector as a queue)
+        newmem = mem
+        newoffset = div(newlen, 8) + 1
+    else
+        # grow either by our computed overallocation factor
+        # or exactly the requested size, whichever is larger
+        # TODO we should possibly increase the offset if the current offset is nonzero.
+        newmemlen2 = max(overallocation(memlen), newmemlen)
+        newmem = array_new_memory(mem, newmemlen2)
+        newoffset = offset
+    end
+    newref = @inbounds memoryref(newmem, newoffset)
+    unsafe_copyto!(newref, ref, len)
+    if ref !== a.ref
+        @noinline throw(ConcurrencyViolationError("Vector can not be resized concurrently"))
+    end
+    setfield!(a, :ref, newref)
+return
+end
+
+function _growend!(a::Vector, delta::Integer)
+    @_noub_meta
+    delta = Int(delta)
+    delta >= 0 || throw(ArgumentError("grow requires delta >= 0"))
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    len = length(a)
+    newlen = len + delta
+    offset = memoryrefoffset(ref)
+    newmemlen = offset + newlen - 1
+    if memlen < newmemlen
+        @noinline _growend_internal!(a, delta, len)
+    end
+    setfield!(a, :size, (newlen,))
+    return
+end
 
-_deletebeg!(a::Vector, delta::Integer) =
-    ccall(:jl_array_del_beg, Cvoid, (Any, UInt), a, delta)
-_deleteend!(a::Vector, delta::Integer) =
-    ccall(:jl_array_del_end, Cvoid, (Any, UInt), a, delta)
-_deleteat!(a::Vector, i::Integer, delta::Integer) =
-    ccall(:jl_array_del_at, Cvoid, (Any, Int, UInt), a, i - 1, delta)
+function _growat!(a::Vector, i::Integer, delta::Integer)
+    @_terminates_globally_noub_meta
+    delta = Int(delta)
+    i = Int(i)
+    i == 1 && return _growbeg!(a, delta)
+    len = length(a)
+    i == len + 1 && return _growend!(a, delta)
+    delta >= 0 || throw(ArgumentError("grow requires delta >= 0"))
+    1 < i <= len || throw(BoundsError(a, i))
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    newlen = len + delta
+    offset = memoryrefoffset(ref)
+    newmemlen = offset + newlen - 1
+
+    # which side would we rather grow into?
+    prefer_start = i <= div(len, 2)
+    # if offset is far enough advanced to fit data in beginning of the memory
+    if prefer_start && delta <= offset - 1
+        newref = @inbounds memoryref(mem, offset - delta)
+        unsafe_copyto!(newref, ref, i)
+        setfield!(a, :ref, newref)
+        setfield!(a, :size, (newlen,))
+        for j in i:i+delta-1
+            @inbounds _unsetindex!(a, j)
+        end
+    elseif !prefer_start && memlen >= newmemlen
+        unsafe_copyto!(mem, offset - 1 + delta + i, mem, offset - 1 + i, len - i + 1)
+        setfield!(a, :size, (newlen,))
+        for j in i:i+delta-1
+            @inbounds _unsetindex!(a, j)
+        end
+    else
+        # since we will allocate the array in the middle of the memory we need at least 2*delta extra space
+        # the +1 is because I didn't want to have an off by 1 error.
+        newmemlen = max(overallocation(memlen), len+2*delta+1)
+        newoffset = (newmemlen - newlen) ÷ 2 + 1
+        newmem = array_new_memory(mem, newmemlen)
+        newref = @inbounds memoryref(newmem, newoffset)
+        unsafe_copyto!(newref, ref, i-1)
+        unsafe_copyto!(newmem, newoffset + delta + i - 1, mem, offset + i - 1, len - i + 1)
+        setfield!(a, :ref, newref)
+        setfield!(a, :size, (newlen,))
+    end
+end
 
+# efficiently delete part of an array
+function _deletebeg!(a::Vector, delta::Integer)
+    delta = Int(delta)
+    len = length(a)
+    # See comment in _deleteend!
+    if unsigned(delta) > unsigned(len)
+        throw(ArgumentError("_deletebeg! requires delta in 0:length(a)"))
+    end
+    for i in 1:delta
+        @inbounds _unsetindex!(a, i)
+    end
+    newlen = len - delta
+    setfield!(a, :size, (newlen,))
+    if newlen != 0 # if newlen==0 we could accidentally index past the memory
+        newref = @inbounds memoryref(a.ref, delta + 1)
+        setfield!(a, :ref, newref)
+    end
+    return
+end
+function _deleteend!(a::Vector, delta::Integer)
+    delta = Int(delta)
+    len = length(a)
+    # Do the comparison unsigned, to so the compiler knows `len` cannot be negative.
+    # This works because if delta is negative, it will overflow and still trigger.
+    # This enables the compiler to skip the check sometimes.
+    if unsigned(delta) > unsigned(len)
+        throw(ArgumentError("_deleteend! requires delta in 0:length(a)"))
+    end
+    newlen = len - delta
+    for i in newlen+1:len
+        @inbounds _unsetindex!(a, i)
+    end
+    setfield!(a, :size, (newlen,))
+    return
+end
+function _deleteat!(a::Vector, i::Integer, delta::Integer)
+    i = Int(i)
+    len = length(a)
+    0 <= delta || throw(ArgumentError("_deleteat! requires delta >= 0"))
+    1 <= i <= len || throw(BoundsError(a, i))
+    i + delta <= len + 1 || throw(BoundsError(a, i + delta - 1))
+    newa = a
+    if 2*i + delta <= len
+        unsafe_copyto!(newa, 1 + delta, a, 1, i - 1)
+        _deletebeg!(a, delta)
+    else
+        unsafe_copyto!(newa, i, a, i + delta, len + 1 - delta - i)
+        _deleteend!(a, delta)
+    end
+    return
+end
 ## Dequeue functionality ##
 
 """
@@ -1112,10 +1337,16 @@ See also [`pushfirst!`](@ref).
 function push! end
 
 function push!(a::Vector{T}, item) where T
+    @inline
     # convert first so we don't grow the array if the assignment won't work
-    itemT = item isa T ? item : convert(T, item)::T
+    # and also to avoid a dynamic dynamic dispatch in the common case that
+    # `item` is poorly-typed and `a` is well-typed
+    item = item isa T ? item : convert(T, item)::T
+    return _push!(a, item)
+end
+function _push!(a::Vector{T}, item::T) where T
     _growend!(a, 1)
-    @_safeindex a[length(a)] = itemT
+    @_safeindex a[length(a)] = item
     return a
 end
 
@@ -1174,30 +1405,23 @@ and [`prepend!`](@ref) and [`pushfirst!`](@ref) for the opposite order.
 """
 function append! end
 
-function append!(a::Vector, items::AbstractVector)
-    itemindices = eachindex(items)
-    n = length(itemindices)
+function append!(a::Vector{T}, items::Union{AbstractVector{<:T},Tuple}) where T
+    items isa Tuple && (items = map(x -> convert(T, x), items))
+    n = Int(length(items))::Int
     _growend!(a, n)
-    copyto!(a, length(a)-n+1, items, first(itemindices), n)
+    copyto!(a, length(a)-n+1, items, firstindex(items), n)
     return a
 end
 
 append!(a::AbstractVector, iter) = _append!(a, IteratorSize(iter), iter)
 push!(a::AbstractVector, iter...) = append!(a, iter)
-
-append!(a::AbstractVector, iter...) = foldl(append!, iter, init=a)
+append!(a::AbstractVector, iter...) = (foreach(v -> append!(a, v), iter); a)
 
 function _append!(a::AbstractVector, ::Union{HasLength,HasShape}, iter)
-    @_terminates_locally_meta
-    n = length(a)
-    i = lastindex(a)
-    resize!(a, n+Int(length(iter))::Int)
-    for (i, item) in zip(i+1:lastindex(a), iter)
-        if isa(a, Vector) # give better effects for builtin vectors
-            @_safeindex a[i] = item
-        else
-            a[i] = item
-        end
+    n = Int(length(iter))::Int
+    sizehint!(a, length(a) + n; shrink=false)
+    for item in iter
+        push!(a, item)
     end
     a
 end
@@ -1239,32 +1463,31 @@ julia> prepend!([6], [1, 2], [3, 4, 5])
 """
 function prepend! end
 
-function prepend!(a::Vector, items::AbstractVector)
-    itemindices = eachindex(items)
-    n = length(itemindices)
+function prepend!(a::Vector{T}, items::Union{AbstractVector{<:T},Tuple}) where T
+    items isa Tuple && (items = map(x -> convert(T, x), items))
+    n = length(items)
     _growbeg!(a, n)
-    if a === items
-        copyto!(a, 1, items, n+1, n)
-    else
-        copyto!(a, 1, items, first(itemindices), n)
-    end
+    # in case of aliasing, the _growbeg might have shifted our data, so copy
+    # just the last n elements instead of all of them from the first
+    copyto!(a, 1, items, lastindex(items)-n+1, n)
     return a
 end
 
-prepend!(a::Vector, iter) = _prepend!(a, IteratorSize(iter), iter)
-pushfirst!(a::Vector, iter...) = prepend!(a, iter)
-
-prepend!(a::AbstractVector, iter...) = foldr((v, a) -> prepend!(a, v), iter, init=a)
+prepend!(a::AbstractVector, iter) = _prepend!(a, IteratorSize(iter), iter)
+pushfirst!(a::AbstractVector, iter...) = prepend!(a, iter)
+prepend!(a::AbstractVector, iter...) = (for v = reverse(iter); prepend!(a, v); end; return a)
 
 function _prepend!(a::Vector, ::Union{HasLength,HasShape}, iter)
     @_terminates_locally_meta
     require_one_based_indexing(a)
-    n = length(iter)
-    _growbeg!(a, n)
-    i = 0
+    n = Int(length(iter))::Int
+    sizehint!(a, length(a) + n; first=true, shrink=false)
+    n = 0
     for item in iter
-        @_safeindex a[i += 1] = item
+        n += 1
+        pushfirst!(a, item)
     end
+    reverse!(a, 1, n)
     a
 end
 function _prepend!(a::Vector, ::IteratorSize, iter)
@@ -1278,7 +1501,7 @@ function _prepend!(a::Vector, ::IteratorSize, iter)
 end
 
 """
-    resize!(a::Vector, n::Integer) -> Vector
+    resize!(a::Vector, n::Integer) -> a
 
 Resize `a` to contain `n` elements. If `n` is smaller than the current collection
 length, the first `n` elements will be retained. If `n` is larger, the new elements are not
@@ -1307,10 +1530,14 @@ julia> a[1:6]
  1
 ```
 """
-function resize!(a::Vector, nl::Integer)
+function resize!(a::Vector, nl_::Integer)
+    nl = Int(nl_)::Int
     l = length(a)
     if nl > l
-        _growend!(a, nl-l)
+        # Since l is positive, if nl > l, both are positive, and so nl-l is also
+        # positive. But the compiler does not know that, so we mask out top bit.
+        # This allows the compiler to skip the check
+        _growend!(a, (nl-l) & typemax(Int))
     elseif nl != l
         if nl < 0
             _throw_argerror("new length must be ≥ 0")
@@ -1321,13 +1548,21 @@ function resize!(a::Vector, nl::Integer)
 end
 
 """
-    sizehint!(s, n) -> s
+    sizehint!(s, n; first::Bool=false, shrink::Bool=true) -> s
 
 Suggest that collection `s` reserve capacity for at least `n` elements. That is, if
 you expect that you're going to have to push a lot of values onto `s`, you can avoid
 the cost of incremental reallocation by doing it once up front; this can improve
 performance.
 
+If `first` is `true`, then any additional space is reserved before the start of the collection.
+This way, subsequent calls to `pushfirst!` (instead of `push!`) may become faster.
+Supplying this keyword may result in an error if the collection is not ordered
+or if `pushfirst!` is not supported for this collection.
+
+If `shrink=true` (the default), the collection's capacity may be reduced if its current
+capacity is greater than `n`.
+
 See also [`resize!`](@ref).
 
 # Notes on the performance model
@@ -1342,14 +1577,54 @@ For types that support `sizehint!`,
    `Base`.
 
 3. `empty!` is nearly costless (and O(1)) for types that support this kind of preallocation.
+
+!!! compat "Julia 1.11"
+    The `shrink` and `first` arguments were added in Julia 1.11.
 """
 function sizehint! end
 
-function sizehint!(a::Vector, sz::Integer)
-    ccall(:jl_array_sizehint, Cvoid, (Any, UInt), a, sz)
+function sizehint!(a::Vector, sz::Integer; first::Bool=false, shrink::Bool=true)
+    len = length(a)
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    sz = max(Int(sz), len)
+    inc = sz - len
+    if sz <= memlen
+        # if we don't save at least 1/8th memlen then its not worth it to shrink
+        if !shrink || memlen - sz <= div(memlen, 8)
+            return a
+        end
+        newmem = array_new_memory(mem, sz)
+        if first
+            newref = memoryref(newmem, inc + 1)
+        else
+            newref = memoryref(newmem)
+        end
+        unsafe_copyto!(newref, ref, len)
+        setfield!(a, :ref, newref)
+    elseif first
+        _growbeg!(a, inc)
+        newref = getfield(a, :ref)
+        newref = memoryref(newref, inc + 1)
+        setfield!(a, :size, (len,)) # undo the size change from _growbeg!
+        setfield!(a, :ref, newref) # undo the offset change from _growbeg!
+    else # last
+        _growend!(a, inc)
+        setfield!(a, :size, (len,)) # undo the size change from _growend!
+    end
     a
 end
 
+# Fall-back implementation for non-shrinkable collections
+# avoid defining this the normal way to avoid avoid infinite recursion
+function Core.kwcall(kwargs::NamedTuple{names}, ::typeof(sizehint!), a, sz) where names
+    get(kwargs, :first, false)::Bool
+    get(kwargs, :shrink, true)::Bool
+    isempty(diff_names(names, (:first, :shrink))) || kwerr(kwargs, sizehint!, a, sz)
+    sizehint!(a, sz)
+end
+
 """
     pop!(collection) -> item
 
@@ -1433,6 +1708,7 @@ ERROR: BoundsError: attempt to access 3-element Vector{Int64} at index [4]
 ```
 """
 function popat!(a::Vector, i::Integer)
+    @_propagate_inbounds_meta
     x = a[i]
     _deleteat!(a, i, 1)
     x
@@ -1468,7 +1744,11 @@ julia> pushfirst!([1, 2, 3, 4], 5, 6)
 ```
 """
 function pushfirst!(a::Vector{T}, item) where T
+    @inline
     item = item isa T ? item : convert(T, item)::T
+    return _pushfirst!(a, item)
+end
+function _pushfirst!(a::Vector{T}, item::T) where T
     _growbeg!(a, 1)
     @_safeindex a[1] = item
     return a
@@ -1482,7 +1762,6 @@ function pushfirst!(a::Vector{Any}, @nospecialize x)
 end
 function pushfirst!(a::Vector{Any}, @nospecialize x...)
     @_terminates_locally_meta
-    na = length(a)
     nx = length(x)
     _growbeg!(a, nx)
     @_safeindex for i = 1:nx
@@ -1553,12 +1832,17 @@ julia> insert!(Any[1:6;], 3, "here")
  6
 ```
 """
-function insert!(a::Array{T,1}, i::Integer, item) where T
+function insert!(a::Vector{T}, i::Integer, item) where T
+    @_propagate_inbounds_meta
+    item = item isa T ? item : convert(T, item)::T
+    return _insert!(a, i, item)
+end
+function _insert!(a::Vector{T}, i::Integer, item::T) where T
+    @_noub_meta
     # Throw convert error before changing the shape of the array
-    _item = item isa T ? item : convert(T, item)::T
     _growat!(a, i, 1)
-    # _growat! already did bound check
-    @inbounds a[i] = _item
+    # :noub, because _growat! already did bound check
+    @inbounds a[i] = item
     return a
 end
 
@@ -1591,7 +1875,6 @@ function deleteat!(a::Vector, r::AbstractUnitRange{<:Integer})
     if eltype(r) === Bool
         return invoke(deleteat!, Tuple{Vector, AbstractVector{Bool}}, a, r)
     else
-        n = length(a)
         f = first(r)
         f isa Bool && depwarn("passing Bool as an index is deprecated", :deleteat!)
         isempty(r) || _deleteat!(a, f, length(r))
@@ -1635,17 +1918,19 @@ struct Nowhere; end
 push!(::Nowhere, _) = nothing
 _growend!(::Nowhere, _) = nothing
 
-@inline function _push_deleted!(dltd, a::Vector, ind)
-    if @inbounds isassigned(a, ind)
-        push!(dltd, @inbounds a[ind])
+function _push_deleted!(dltd, a::Vector, ind)
+    @_propagate_inbounds_meta
+    if isassigned(a, ind)
+        push!(dltd, a[ind])
     else
         _growend!(dltd, 1)
     end
 end
 
-@inline function _copy_item!(a::Vector, p, q)
-    if @inbounds isassigned(a, q)
-        @inbounds a[p] = a[q]
+function _copy_item!(a::Vector, p, q)
+    @_propagate_inbounds_meta
+    if isassigned(a, q)
+        a[p] = a[q]
     else
         _unsetindex!(a, p)
     end
@@ -1657,7 +1942,7 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
     y === nothing && return a
     (p, s) = y
     checkbounds(a, p)
-    _push_deleted!(dltd, a, p)
+    @inbounds _push_deleted!(dltd, a, p)
     q = p+1
     while true
         y = iterate(inds, s)
@@ -1671,14 +1956,14 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
             end
         end
         while q < i
-            _copy_item!(a, p, q)
+            @inbounds _copy_item!(a, p, q)
             p += 1; q += 1
         end
-        _push_deleted!(dltd, a, i)
+        @inbounds _push_deleted!(dltd, a, i)
         q = i+1
     end
     while q <= n
-        _copy_item!(a, p, q)
+        @inbounds _copy_item!(a, p, q)
         p += 1; q += 1
     end
     _deleteend!(a, n-p+1)
@@ -1691,7 +1976,7 @@ function deleteat!(a::Vector, inds::AbstractVector{Bool})
     length(inds) == n || throw(BoundsError(a, inds))
     p = 1
     for (q, i) in enumerate(inds)
-        _copy_item!(a, p, q)
+        @inbounds _copy_item!(a, p, q)
         p += !i
     end
     _deleteend!(a, n-p+1)
@@ -1781,6 +2066,8 @@ place of the removed items; in this case, `indices` must be a `AbstractUnitRange
 To insert `replacement` before an index `n` without removing any items, use
 `splice!(collection, n:n-1, replacement)`.
 
+$(_DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.5"
     Prior to Julia 1.5, `indices` must always be a `UnitRange`.
 
@@ -1841,10 +2128,12 @@ end
 
 # use memcmp for cmp on byte arrays
 function cmp(a::Array{UInt8,1}, b::Array{UInt8,1})
-    ta = @_gc_preserve_begin a
-    tb = @_gc_preserve_begin b
-    pa = unsafe_convert(Ptr{Cvoid}, a)
-    pb = unsafe_convert(Ptr{Cvoid}, b)
+    aref = a.ref
+    bref = b.ref
+    ta = @_gc_preserve_begin aref
+    tb = @_gc_preserve_begin bref
+    pa = unsafe_convert(Ptr{Cvoid}, aref)
+    pb = unsafe_convert(Ptr{Cvoid}, bref)
     c = memcmp(pa, pb, min(length(a),length(b)))
     @_gc_preserve_end ta
     @_gc_preserve_end tb
@@ -1855,10 +2144,12 @@ const BitIntegerArray{N} = Union{map(T->Array{T,N}, BitInteger_types)...} where
 # use memcmp for == on bit integer types
 function ==(a::Arr, b::Arr) where {Arr <: BitIntegerArray}
     if size(a) == size(b)
-        ta = @_gc_preserve_begin a
-        tb = @_gc_preserve_begin b
-        pa = unsafe_convert(Ptr{Cvoid}, a)
-        pb = unsafe_convert(Ptr{Cvoid}, b)
+        aref = a.ref
+        bref = b.ref
+        ta = @_gc_preserve_begin aref
+        tb = @_gc_preserve_begin bref
+        pa = unsafe_convert(Ptr{Cvoid}, aref)
+        pb = unsafe_convert(Ptr{Cvoid}, bref)
         c = memcmp(pa, pb, sizeof(eltype(Arr)) * length(a))
         @_gc_preserve_end ta
         @_gc_preserve_end tb
@@ -1871,11 +2162,13 @@ end
 function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray{1}
     len = length(a)
     if len == length(b)
-        ta = @_gc_preserve_begin a
-        tb = @_gc_preserve_begin b
+        aref = a.ref
+        bref = b.ref
+        ta = @_gc_preserve_begin aref
+        tb = @_gc_preserve_begin bref
         T = eltype(Arr)
-        pa = unsafe_convert(Ptr{T}, a)
-        pb = unsafe_convert(Ptr{T}, b)
+        pa = unsafe_convert(Ptr{T}, aref)
+        pb = unsafe_convert(Ptr{T}, bref)
         c = memcmp(pa, pb, sizeof(T) * len)
         @_gc_preserve_end ta
         @_gc_preserve_end tb
@@ -1944,7 +2237,7 @@ end
 # 1d special cases of reverse(A; dims) and reverse!(A; dims):
 for (f,_f) in ((:reverse,:_reverse), (:reverse!,:_reverse!))
     @eval begin
-        $f(A::AbstractVector; dims=:) = $_f(A, dims)
+        $f(A::AbstractVector; dims::D=:) where {D} = $_f(A, dims)
         $_f(A::AbstractVector, ::Colon) = $f(A, firstindex(A), lastindex(A))
         $_f(A::AbstractVector, dim::Tuple{Integer}) = $_f(A, first(dim))
         function $_f(A::AbstractVector, dim::Integer)
@@ -2041,18 +2334,6 @@ function vcat(arrays::Vector{T}...) where T
 end
 vcat(A::Vector...) = cat(A...; dims=Val(1)) # more special than SparseArrays's vcat
 
-# disambiguation with LinAlg/special.jl
-# Union{Number,Vector,Matrix} is for LinearAlgebra._DenseConcatGroup
-# VecOrMat{T} is for LinearAlgebra._TypedDenseConcatGroup
-hcat(A::Union{Number,Vector,Matrix}...) = cat(A...; dims=Val(2))
-hcat(A::VecOrMat{T}...) where {T} = typed_hcat(T, A...)
-vcat(A::Union{Number,Vector,Matrix}...) = cat(A...; dims=Val(1))
-vcat(A::VecOrMat{T}...) where {T} = typed_vcat(T, A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::Union{Number,Vector,Matrix}...) =
-    typed_hvcat(promote_eltypeof(xs...), rows, xs...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::VecOrMat{T}...) where {T} =
-    typed_hvcat(T, rows, xs...)
-
 _cat(n::Integer, x::Integer...) = reshape([x...], (ntuple(Returns(1), n-1)..., length(x)))
 
 ## find ##
@@ -2135,7 +2416,9 @@ findfirst(A::AbstractArray) = findnext(A, first(keys(A)))
     findnext(predicate::Function, A, i)
 
 Find the next index after or including `i` of an element of `A`
-for which `predicate` returns `true`, or `nothing` if not found.
+for which `predicate` returns `true`, or `nothing` if not found. This works for
+Arrays, Strings, and most other collections that support [`getindex`](@ref),
+[`keys(A)`](@ref), and [`nextind`](@ref).
 
 Indices are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
@@ -2153,6 +2436,9 @@ julia> A = [1 4; 2 2];
 
 julia> findnext(isodd, A, CartesianIndex(1, 1))
 CartesianIndex(1, 1)
+
+julia> findnext(isspace, "a b c", 3)
+4
 ```
 """
 function findnext(testf::Function, A, start)
@@ -2214,20 +2500,29 @@ end
 findfirst(testf::Function, A::Union{AbstractArray, AbstractString}) =
     findnext(testf, A, first(keys(A)))
 
-findfirst(p::Union{Fix2{typeof(isequal),Int},Fix2{typeof(==),Int}}, r::OneTo{Int}) =
-    1 <= p.x <= r.stop ? p.x : nothing
+findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::OneTo) where {T<:Integer} =
+    1 <= p.x <= r.stop ? convert(keytype(r), p.x) : nothing
 
-findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::AbstractUnitRange) where {T<:Integer} =
-    first(r) <= p.x <= last(r) ? firstindex(r) + Int(p.x - first(r)) : nothing
+findfirst(::typeof(iszero), ::OneTo) = nothing
+findfirst(::typeof(isone), r::OneTo) = isempty(r) ? nothing : oneunit(keytype(r))
+
+function findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::AbstractUnitRange{<:Integer}) where {T<:Integer}
+    first(r) <= p.x <= last(r) || return nothing
+    i1 = first(keys(r))
+    return i1 + oftype(i1, p.x - first(r))
+end
 
 function findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::StepRange{T,S}) where {T,S}
     isempty(r) && return nothing
     minimum(r) <= p.x <= maximum(r) || return nothing
-    d = convert(S, p.x - first(r))::S
+    d = p.x - first(r)
     iszero(d % step(r)) || return nothing
-    return d ÷ step(r) + 1
+    return convert(keytype(r), d ÷ step(r) + 1)
 end
 
+findfirst(::typeof(iszero), r::AbstractRange) = findfirst(==(zero(first(r))), r)
+findfirst(::typeof(isone), r::AbstractRange) = findfirst(==(one(first(r))), r)
+
 """
     findprev(A, i)
 
@@ -2309,7 +2604,9 @@ findlast(A::AbstractArray) = findprev(A, last(keys(A)))
     findprev(predicate::Function, A, i)
 
 Find the previous index before or including `i` of an element of `A`
-for which `predicate` returns `true`, or `nothing` if not found.
+for which `predicate` returns `true`, or `nothing` if not found. This works for
+Arrays, Strings, and most other collections that support [`getindex`](@ref),
+[`keys(A)`](@ref), and [`nextind`](@ref).
 
 Indices are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
@@ -2335,6 +2632,9 @@ julia> A = [4 6; 1 2]
 
 julia> findprev(isodd, A, CartesianIndex(1, 2))
 CartesianIndex(2, 1)
+
+julia> findprev(isspace, "a b c", 3)
+2
 ```
 """
 function findprev(testf::Function, A, start)
@@ -2393,6 +2693,17 @@ end
 findlast(testf::Function, A::Union{AbstractArray, AbstractString}) =
     findprev(testf, A, last(keys(A)))
 
+# for monotonic ranges, there is a unique index corresponding to a value, so findfirst and findlast are identical
+function findlast(p::Union{Fix2{typeof(isequal),<:Integer},Fix2{typeof(==),<:Integer},typeof(iszero),typeof(isone)},
+        r::AbstractUnitRange{<:Integer})
+    findfirst(p, r)
+end
+
+function findlast(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T},typeof(iszero),typeof(isone)},
+        r::StepRange{T,S}) where {T,S}
+    findfirst(p, r)
+end
+
 """
     findall(f::Function, A)
 
@@ -2437,7 +2748,7 @@ Dict{Symbol, Int64} with 3 entries:
   :B => -1
   :C => 0
 
-julia> findall(x -> x >= 0, d)
+julia> findall(≥(0), d)
 2-element Vector{Symbol}:
  :A
  :C
@@ -2498,42 +2809,19 @@ function findall(A)
 end
 
 # Allocating result upfront is faster (possible only when collection can be iterated twice)
-function _findall(f::Function, A::AbstractArray{Bool})
-    n = count(f, A)
+function findall(A::AbstractArray{Bool})
+    n = count(A)
     I = Vector{eltype(keys(A))}(undef, n)
-    isempty(I) && return I
-    _findall(f, I, A)
-end
-
-function _findall(f::Function, I::Vector, A::AbstractArray{Bool})
-    cnt = 1
-    len = length(I)
-    for (k, v) in pairs(A)
-        @inbounds I[cnt] = k
-        cnt += f(v)
-        cnt > len && return I
-    end
-    # In case of impure f, this line could potentially be hit. In that case,
-    # we can't assume I is the correct length.
-    resize!(I, cnt - 1)
-end
-
-function _findall(f::Function, I::Vector, A::AbstractVector{Bool})
-    i = firstindex(A)
     cnt = 1
-    len = length(I)
-    while cnt ≤ len
-        @inbounds I[cnt] = i
-        cnt += f(@inbounds A[i])
-        i = nextind(A, i)
+    for (i,a) in pairs(A)
+        if a
+            I[cnt] = i
+            cnt += 1
+        end
     end
-    cnt - 1 == len ? I : resize!(I, cnt - 1)
+    I
 end
 
-findall(f::Function, A::AbstractArray{Bool}) = _findall(f, A)
-findall(f::Fix2{typeof(in)}, A::AbstractArray{Bool}) = _findall(f, A)
-findall(A::AbstractArray{Bool}) = _findall(identity, A)
-
 findall(x::Bool) = x ? [1] : Vector{Int}()
 findall(testf::Function, x::Number) = testf(x) ? [1] : Vector{Int}()
 findall(p::Fix2{typeof(in)}, x::Number) = x in p.x ? [1] : Vector{Int}()
@@ -2581,14 +2869,14 @@ function indexin(a, b::AbstractArray)
     ]
 end
 
-function _findin(a::Union{AbstractArray, Tuple}, b)
+function _findin(a::Union{AbstractArray, Tuple}, b::AbstractSet)
     ind  = Vector{eltype(keys(a))}()
-    bset = Set(b)
     @inbounds for (i,ai) in pairs(a)
-        ai in bset && push!(ind, i)
+        ai in b && push!(ind, i)
     end
     ind
 end
+_findin(a::Union{AbstractArray, Tuple}, b) = _findin(a, Set(b))
 
 # If two collections are already sorted, _findin can be computed with
 # a single traversal of the two collections. This is much faster than
@@ -2794,6 +3082,8 @@ Remove the items at all the indices which are not given by `inds`,
 and return the modified `a`.
 Items which are kept are shifted to fill the resulting gaps.
 
+$(_DOCS_ALIASING_WARNING)
+
 `inds` must be an iterator of sorted and unique integer indices.
 See also [`deleteat!`](@ref).
 
@@ -2878,14 +3168,17 @@ setdiff!(  v::AbstractVector, itrs...) = _shrink!(setdiff!, v, itrs)
 
 vectorfilter(T::Type, f, v) = T[x for x in v if f(x)]
 
-function _shrink(shrinker!::F, itr, itrs) where F
+function intersect(itr, itrs...)
     T = promote_eltype(itr, itrs...)
-    keep = shrinker!(Set{T}(itr), itrs...)
+    keep = intersect!(Set{T}(itr), itrs...)
     vectorfilter(T, _shrink_filter!(keep), itr)
 end
 
-intersect(itr, itrs...) = _shrink(intersect!, itr, itrs)
-setdiff(  itr, itrs...) = _shrink(setdiff!, itr, itrs)
+function setdiff(itr, itrs...)
+    T = eltype(itr)
+    keep = setdiff!(Set{T}(itr), itrs...)
+    vectorfilter(T, _shrink_filter!(keep), itr)
+end
 
 function intersect(v::AbstractVector, r::AbstractRange)
     T = promote_eltype(v, r)
@@ -2894,3 +3187,61 @@ function intersect(v::AbstractVector, r::AbstractRange)
     return vectorfilter(T, _shrink_filter!(seen), common)
 end
 intersect(r::AbstractRange, v::AbstractVector) = intersect(v, r)
+
+# Here instead of range.jl for bootstrapping because `@propagate_inbounds` depends on Vectors.
+@propagate_inbounds function getindex(v::AbstractRange, i::Integer)
+    if i isa Bool # Not via dispatch to avoid ambiguities
+        throw(ArgumentError("invalid index: $i of type Bool"))
+    else
+        _getindex(v, i)
+    end
+end
+
+"""
+    wrap(Array, m::Union{Memory{T}, MemoryRef{T}}, dims)
+
+Create an array of size `dims` using `m` as the underlying memory. This can be thought of as a safe version
+of [`unsafe_wrap`](@ref) utilizing `Memory` or `MemoryRef` instead of raw pointers.
+"""
+function wrap end
+
+# validity checking for _wrap calls, separate from allocation of Array so that it can be more likely to inline into the caller
+function _wrap(ref::MemoryRef{T}, dims::NTuple{N, Int}) where {T, N}
+    mem = ref.mem
+    mem_len = length(mem) + 1 - memoryrefoffset(ref)
+    len = Core.checked_dims(dims...)
+    @boundscheck mem_len >= len || invalid_wrap_err(mem_len, dims, len)
+    return ref
+end
+
+@noinline invalid_wrap_err(len, dims, proddims) = throw(DimensionMismatch(LazyString(
+    "Attempted to wrap a MemoryRef of length ", len, " with an Array of size dims=", dims,
+    " which is invalid because prod(dims) = ", proddims, " > ", len,
+    " so that the array would have more elements than the underlying memory can store.")))
+
+@eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, dims::NTuple{N, Integer}) where {T, N}
+    dims = convert(Dims, dims)
+    ref = _wrap(m, dims)
+    $(Expr(:new, :(Array{T, N}), :ref, :dims))
+end
+
+@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, dims::NTuple{N, Integer}) where {T, N}
+    dims = convert(Dims, dims)
+    ref = _wrap(memoryref(m), dims)
+    $(Expr(:new, :(Array{T, N}), :ref, :dims))
+end
+@eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, l::Integer) where {T}
+    dims = (Int(l),)
+    ref = _wrap(m, dims)
+    $(Expr(:new, :(Array{T, 1}), :ref, :dims))
+end
+@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, l::Integer) where {T}
+    dims = (Int(l),)
+    ref = _wrap(memoryref(m), (l,))
+    $(Expr(:new, :(Array{T, 1}), :ref, :dims))
+end
+@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}) where {T}
+    ref = memoryref(m)
+    dims = (length(m),)
+    $(Expr(:new, :(Array{T, 1}), :ref, :dims))
+end
diff --git a/base/arraymath.jl b/base/arraymath.jl
index 62dc3772e4938..31ecb48daff68 100644
--- a/base/arraymath.jl
+++ b/base/arraymath.jl
@@ -2,26 +2,57 @@
 
 ## Binary arithmetic operators ##
 
+function _broadcast_preserving_zero_d(f, A, B)
+    broadcast_preserving_zero_d(f, A, B)
+end
+
+# Using map over broadcast enables vectorization for wide matrices with few rows.
+# This is because we use linear indexing in `map` as opposed to Cartesian indexing in broadcasting.
+# https://github.com/JuliaLang/julia/issues/47873#issuecomment-1352472461
+function _broadcast_preserving_zero_d(f, A::Array{<:Any,N}, B::Array{<:Any,N}, Cs::Array{<:Any,N}...) where {N}
+    map(f, A, B, Cs...)
+end
+
+function _broadcast_preserving_zero_d(f, A::Array, B::Array, Cs::Array...)
+    # we already know that the shapes are compatible.
+    # We just need to select the size corresponding to the higest ndims
+    # and reshape all the arrays to that size
+    arrays = (A, B, Cs...)
+    sz = mapreduce(size, (x,y) -> length(x) > length(y) ? x : y, arrays)
+    # Skip reshaping where possible to avoid the overhead
+    arrays_sameshape = map(x -> length(sz) == ndims(x) ? x : reshape(x, sz), arrays)
+    map(f, arrays_sameshape...)
+end
+
+function _broadcast_preserving_zero_d(f, A::Array, B::Number)
+    map(Fix2(f, B), A)
+end
+
+function _broadcast_preserving_zero_d(f, A::Number, B::Array)
+    map(Fix1(f, A), B)
+end
+
 for f in (:+, :-)
     @eval function ($f)(A::AbstractArray, B::AbstractArray)
         promote_shape(A, B) # check size compatibility
-        broadcast_preserving_zero_d($f, A, B)
+        _broadcast_preserving_zero_d($f, A, B)
     end
 end
 
-function +(A::Array, Bs::Array...)
-    for B in Bs
-        promote_shape(A, B) # check size compatibility
+function +(A::Array, B::Array, Cs::Array...)
+    promote_shape(A, B)
+    for C in Cs
+        promote_shape(A, C) # check size compatibility
     end
-    broadcast_preserving_zero_d(+, A, Bs...)
+    _broadcast_preserving_zero_d(+, A, B, Cs...)
 end
 
 for f in (:/, :\, :*)
     if f !== :/
-        @eval ($f)(A::Number, B::AbstractArray) = broadcast_preserving_zero_d($f, A, B)
+        @eval ($f)(A::Number, B::AbstractArray) = _broadcast_preserving_zero_d($f, A, B)
     end
     if f !== :\
-        @eval ($f)(A::AbstractArray, B::Number) = broadcast_preserving_zero_d($f, A, B)
+        @eval ($f)(A::AbstractArray, B::Number) = _broadcast_preserving_zero_d($f, A, B)
     end
 end
 
@@ -56,8 +87,8 @@ julia> reverse(b)
 !!! compat "Julia 1.6"
     Prior to Julia 1.6, only single-integer `dims` are supported in `reverse`.
 """
-reverse(A::AbstractArray; dims=:) = _reverse(A, dims)
-_reverse(A, dims) = reverse!(copymutable(A); dims)
+reverse(A::AbstractArray; dims::D=:) where {D} = _reverse(A, dims)
+_reverse(A, dims::D) where {D} = reverse!(copymutable(A); dims)
 
 """
     reverse!(A; dims=:)
@@ -67,17 +98,17 @@ Like [`reverse`](@ref), but operates in-place in `A`.
 !!! compat "Julia 1.6"
     Multidimensional `reverse!` requires Julia 1.6.
 """
-reverse!(A::AbstractArray; dims=:) = _reverse!(A, dims)
+reverse!(A::AbstractArray; dims::D=:) where {D} = _reverse!(A, dims)
 _reverse!(A::AbstractArray{<:Any,N}, ::Colon) where {N} = _reverse!(A, ntuple(identity, Val{N}()))
 _reverse!(A, dim::Integer) = _reverse!(A, (Int(dim),))
 _reverse!(A, dims::NTuple{M,Integer}) where {M} = _reverse!(A, Int.(dims))
 function _reverse!(A::AbstractArray{<:Any,N}, dims::NTuple{M,Int}) where {N,M}
+    dims === () && return A # nothing to reverse
     dimrev = ntuple(k -> k in dims, Val{N}()) # boolean tuple indicating reversed dims
 
     if N < M || M != sum(dimrev)
         throw(ArgumentError("invalid dimensions $dims in reverse!"))
     end
-    M == 0 && return A # nothing to reverse
 
     # swapping loop only needs to traverse ≈half of the array
     halfsz = ntuple(k -> k == dims[1] ? size(A,k) ÷ 2 : size(A,k), Val{N}())
diff --git a/base/arrayshow.jl b/base/arrayshow.jl
index a05a8d4dac51c..cbdca7b2686c4 100644
--- a/base/arrayshow.jl
+++ b/base/arrayshow.jl
@@ -41,7 +41,7 @@ Accept keyword args `c` for alternate single character marker.
 """
 function replace_with_centered_mark(s::AbstractString;c::AbstractChar = '⋅')
     N = textwidth(ANSIIterator(s))
-    return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
+    return N == 0 ? string(c) : join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
 end
 
 const undef_ref_alignment = (3,3)
@@ -100,7 +100,7 @@ function print_matrix_row(io::IO,
         #= `axes(X)` may not infer, set this in caller =# idxlast::Integer=last(axes(X, 2)))
     for (k, j) = enumerate(cols)
         k > length(A) && break
-        if isassigned(X,Int(i),Int(j)) # isassigned accepts only `Int` indices
+        if isassigned(X,i,j)
             x = X[i,j]
             a = alignment(io, x)::Tuple{Int,Int}
 
@@ -361,16 +361,16 @@ print_array(io::IO, X::AbstractArray) = show_nd(io, X, print_matrix, true)
 # typeinfo aware
 # implements: show(io::IO, ::MIME"text/plain", X::AbstractArray)
 function show(io::IO, ::MIME"text/plain", X::AbstractArray)
-    if isempty(X) && (get(io, :compact, false)::Bool || X isa Vector)
+    if isempty(X) && (get(io, :compact, false)::Bool || X isa AbstractVector)
         return show(io, X)
     end
-    # 0) show summary before setting :compact
+    # 1) show summary before setting :compact
     summary(io, X)
     isempty(X) && return
     print(io, ":")
     show_circular(io, X) && return
 
-    # 1) compute new IOContext
+    # 2) compute new IOContext
     if !haskey(io, :compact) && length(axes(X, 2)) > 1
         io = IOContext(io, :compact => true)
     end
@@ -385,7 +385,7 @@ function show(io::IO, ::MIME"text/plain", X::AbstractArray)
         println(io)
     end
 
-    # 2) update typeinfo
+    # 3) update typeinfo
     #
     # it must come after printing the summary, which can exploit :typeinfo itself
     # (e.g. views)
@@ -394,7 +394,7 @@ function show(io::IO, ::MIME"text/plain", X::AbstractArray)
     # checking for current :typeinfo (this could be changed in the future)
     io = IOContext(io, :typeinfo => eltype(X))
 
-    # 2) show actual content
+    # 4) show actual content
     recur_io = IOContext(io, :SHOWN_SET => X)
     print_array(recur_io, X)
 end
@@ -488,9 +488,12 @@ function show(io::IO, X::AbstractArray)
     if !implicit
         io = IOContext(io, :typeinfo => eltype(X))
     end
-    isempty(X) ?
-        _show_empty(io, X) :
-        _show_nonempty(io, X, prefix)
+    if isempty(X)
+        return _show_empty(io, X)
+    end
+    show_circular(io, X) && return
+    recur_io = IOContext(io, :SHOWN_SET => X)
+    _show_nonempty(recur_io, X, prefix)
 end
 
 ### 0-dimensional arrays (#31481)
@@ -545,6 +548,12 @@ typeinfo_eltype(typeinfo::Type{<:AbstractArray{T}}) where {T} = eltype(typeinfo)
 typeinfo_eltype(typeinfo::Type{<:AbstractDict{K,V}}) where {K,V} = eltype(typeinfo)
 typeinfo_eltype(typeinfo::Type{<:AbstractSet{T}}) where {T} = eltype(typeinfo)
 
+# This is a fancy way to make de-specialize a call to `typeinfo_implicit(T)`
+# which is unfortunately invalidated by Dates
+#  (https://github.com/JuliaLang/julia/issues/56080)
+#
+# This makes the call less efficient, but avoids being invalidated by Dates.
+_typeinfo_implicit(@nospecialize(T)) = Base.invoke_in_world(Base.tls_world_age(), typeinfo_implicit, T)::Bool
 
 # types that can be parsed back accurately from their un-decorated representations
 function typeinfo_implicit(@nospecialize(T))
@@ -553,9 +562,9 @@ function typeinfo_implicit(@nospecialize(T))
         return true
     end
     return isconcretetype(T) &&
-        ((T <: Array && typeinfo_implicit(eltype(T))) ||
-         ((T <: Tuple || T <: Pair) && all(typeinfo_implicit, fieldtypes(T))) ||
-         (T <: AbstractDict && typeinfo_implicit(keytype(T)) && typeinfo_implicit(valtype(T))))
+        ((T <: Array && _typeinfo_implicit(eltype(T))) ||
+         ((T <: Tuple || T <: NamedTuple || T <: Pair) && all(_typeinfo_implicit, fieldtypes(T))) ||
+         (T <: AbstractDict && _typeinfo_implicit(keytype(T)) && _typeinfo_implicit(valtype(T))))
 end
 
 # X not constrained, can be any iterable (cf. show_vector)
@@ -573,7 +582,7 @@ function typeinfo_prefix(io::IO, X)
     if X isa AbstractDict
         if eltype_X == eltype_ctx
             sprint(show_type_name, typeof(X).name; context=io), false
-        elseif !isempty(X) && typeinfo_implicit(keytype(X)) && typeinfo_implicit(valtype(X))
+        elseif !isempty(X) && _typeinfo_implicit(keytype(X)) && _typeinfo_implicit(valtype(X))
             sprint(show_type_name, typeof(X).name; context=io), true
         else
             sprint(print, typeof(X); context=io), false
@@ -582,7 +591,7 @@ function typeinfo_prefix(io::IO, X)
         # Types hard-coded here are those which are created by default for a given syntax
         if eltype_X == eltype_ctx
             "", false
-        elseif !isempty(X) && typeinfo_implicit(eltype_X)
+        elseif !isempty(X) && _typeinfo_implicit(eltype_X)
             "", true
         elseif print_without_params(eltype_X)
             sprint(show_type_name, unwrap_unionall(eltype_X).name; context=io), false # Print "Array" rather than "Array{T,N}"
diff --git a/base/asyncevent.jl b/base/asyncevent.jl
index 498fb054ecd02..a4a82b4aba120 100644
--- a/base/asyncevent.jl
+++ b/base/asyncevent.jl
@@ -5,11 +5,12 @@
 """
     AsyncCondition()
 
-Create a async condition that wakes up tasks waiting for it
+Create an async condition that wakes up tasks waiting for it
 (by calling [`wait`](@ref) on the object)
 when notified from C by a call to `uv_async_send`.
 Waiting tasks are woken with an error when the object is closed (by [`close`](@ref)).
-Use [`isopen`](@ref) to check whether it is still active.
+Use [`isopen`](@ref) to check whether it is still active. A closed condition is inactive and will
+not wake up tasks.
 
 This provides an implicit acquire & release memory ordering between the sending and waiting threads.
 """
@@ -40,7 +41,7 @@ end
 """
     AsyncCondition(callback::Function)
 
-Create a async condition that calls the given `callback` function. The `callback` is passed one argument,
+Create an async condition that calls the given `callback` function. The `callback` is passed one argument,
 the async condition object itself.
 """
 function AsyncCondition(cb::Function)
@@ -74,7 +75,24 @@ Create a timer that wakes up tasks waiting for it (by calling [`wait`](@ref) on
 Waiting tasks are woken after an initial delay of at least `delay` seconds, and then repeating after
 at least `interval` seconds again elapse. If `interval` is equal to `0`, the timer is only triggered
 once. When the timer is closed (by [`close`](@ref)) waiting tasks are woken with an error. Use
-[`isopen`](@ref) to check whether a timer is still active.
+[`isopen`](@ref) to check whether a timer is still active. An inactive timer will not fire.
+Use `t.timeout` and `t.interval` to read the setup conditions of a `Timer` `t`.
+
+```julia-repl
+julia> t = Timer(1.0; interval=0.5)
+Timer (open, timeout: 1.0 s, interval: 0.5 s) @0x000000010f4e6e90
+
+julia> isopen(t)
+true
+
+julia> t.timeout
+1.0
+
+julia> close(t)
+
+julia> isopen(t)
+false
+```
 
 !!! note
     `interval` is subject to accumulating time skew. If you need precise events at a particular
@@ -84,12 +102,17 @@ once. When the timer is closed (by [`close`](@ref)) waiting tasks are woken with
     A `Timer` requires yield points to update its state. For instance, `isopen(t::Timer)` cannot be
     used to timeout a non-yielding while loop.
 
+!!! compat "Julia 1.12
+    The `timeout` and `interval` readable properties were added in Julia 1.12.
+
 """
 mutable struct Timer
     @atomic handle::Ptr{Cvoid}
     cond::ThreadSynchronizer
     @atomic isopen::Bool
     @atomic set::Bool
+    timeout_ms::UInt64
+    interval_ms::UInt64
 
     function Timer(timeout::Real; interval::Real = 0.0)
         timeout ≥ 0 || throw(ArgumentError("timer cannot have negative timeout of $timeout seconds"))
@@ -99,7 +122,7 @@ mutable struct Timer
         intervalms = ceil(UInt64, interval * 1000)
         loop = eventloop()
 
-        this = new(Libc.malloc(_sizeof_uv_timer), ThreadSynchronizer(), true, false)
+        this = new(Libc.malloc(_sizeof_uv_timer), ThreadSynchronizer(), true, false, timeoutms, intervalms)
         associate_julia_struct(this.handle, this)
         iolock_begin()
         err = ccall(:uv_timer_init, Cint, (Ptr{Cvoid}, Ptr{Cvoid}), loop, this)
@@ -114,18 +137,43 @@ mutable struct Timer
         return this
     end
 end
+function getproperty(t::Timer, f::Symbol)
+    if f == :timeout
+        t.timeout_ms == 0 && return 0.0
+        return (t.timeout_ms - 1) / 1000 # remove the +1ms compensation from the constructor
+    elseif f == :interval
+        return t.interval_ms / 1000
+    else
+        return getfield(t, f)
+    end
+end
+propertynames(::Timer) = (:handle, :cond, :isopen, :set, :timeout, :timeout_ms, :interval, :interval_ms)
+
+function show(io::IO, t::Timer)
+    state = isopen(t) ? "open" : "closed"
+    interval = t.interval
+    interval_str = interval > 0 ? ", interval: $(t.interval) s" : ""
+    print(io, "Timer ($state, timeout: $(t.timeout) s$interval_str) @0x$(string(convert(UInt, pointer_from_objref(t)), base = 16, pad = Sys.WORD_SIZE>>2))")
+end
 
 unsafe_convert(::Type{Ptr{Cvoid}}, t::Timer) = t.handle
 unsafe_convert(::Type{Ptr{Cvoid}}, async::AsyncCondition) = async.handle
 
+# if this returns true, the object has been signaled
+# if this returns false, the object is closed
 function _trywait(t::Union{Timer, AsyncCondition})
     set = t.set
     if set
         # full barrier now for AsyncCondition
         t isa Timer || Core.Intrinsics.atomic_fence(:acquire_release)
     else
-        t.isopen || return false
-        t.handle == C_NULL && return false
+        if !isopen(t)
+            set = t.set
+            if !set
+                close(t) # wait for the close to complete
+                return false
+            end
+        end
         iolock_begin()
         set = t.set
         if !set
@@ -133,7 +181,7 @@ function _trywait(t::Union{Timer, AsyncCondition})
             lock(t.cond)
             try
                 set = t.set
-                if !set && t.isopen && t.handle != C_NULL
+                if !set && t.handle != C_NULL # wait for set or handle, but not the isopen flag
                     iolock_end()
                     set = wait(t.cond)
                     unlock(t.cond)
@@ -147,7 +195,7 @@ function _trywait(t::Union{Timer, AsyncCondition})
         end
         iolock_end()
     end
-    @atomic :monotonic t.set = false
+    @atomic :monotonic t.set = false # if there are multiple waiters, an unspecified number may short-circuit past here
     return set
 end
 
@@ -157,13 +205,41 @@ function wait(t::Union{Timer, AsyncCondition})
 end
 
 
-isopen(t::Union{Timer, AsyncCondition}) = t.isopen && t.handle != C_NULL
+isopen(t::Union{Timer, AsyncCondition}) = @atomic :acquire t.isopen
+
+"""
+    close(t::Union{Timer, AsyncCondition})
+
+Close an object `t` and thus mark it as inactive. Once a timer or condition is inactive, it will not produce
+a new event.
 
+See also: [`isopen`](@ref)
+"""
 function close(t::Union{Timer, AsyncCondition})
+    t.handle == C_NULL && !t.isopen && return # short-circuit path, :monotonic
     iolock_begin()
-    if isopen(t)
-        @atomic :monotonic t.isopen = false
-        ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+    if t.handle != C_NULL
+        if t.isopen
+            @atomic :release t.isopen = false
+            ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+        end
+        # implement _trywait here without the auto-reset function, just waiting for the final close signal
+        preserve_handle(t)
+        lock(t.cond)
+        try
+            while t.handle != C_NULL
+                iolock_end()
+                wait(t.cond)
+                unlock(t.cond)
+                iolock_begin()
+                lock(t.cond)
+            end
+        finally
+            unlock(t.cond)
+            unpreserve_handle(t)
+        end
+    elseif t.isopen
+        @atomic :release t.isopen = false
     end
     iolock_end()
     nothing
@@ -176,8 +252,8 @@ function uvfinalize(t::Union{Timer, AsyncCondition})
         if t.handle != C_NULL
             disassociate_julia_struct(t.handle) # not going to call the usual close hooks anymore
             if t.isopen
-                @atomic :monotonic t.isopen = false
-                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
+                @atomic :release t.isopen = false
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle) # this will call Libc.free
             end
             @atomic :monotonic t.handle = C_NULL
             notify(t.cond, false)
@@ -192,8 +268,10 @@ end
 function _uv_hook_close(t::Union{Timer, AsyncCondition})
     lock(t.cond)
     try
-        @atomic :monotonic t.isopen = false
-        Libc.free(@atomicswap :monotonic t.handle = C_NULL)
+        handle = t.handle
+        @atomic :release t.isopen = false
+        @atomic :monotonic t.handle = C_NULL
+        Libc.free(handle)
         notify(t.cond, false)
     finally
         unlock(t.cond)
@@ -220,7 +298,10 @@ function uv_timercb(handle::Ptr{Cvoid})
         @atomic :monotonic t.set = true
         if ccall(:uv_timer_get_repeat, UInt64, (Ptr{Cvoid},), t) == 0
             # timer is stopped now
-            close(t)
+            if t.isopen
+                @atomic :release t.isopen = false
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+            end
         end
         notify(t.cond, true)
     finally
@@ -243,7 +324,7 @@ end
 
 # timer with repeated callback
 """
-    Timer(callback::Function, delay; interval = 0)
+    Timer(callback::Function, delay; interval = 0, spawn::Union{Nothing,Bool}=nothing)
 
 Create a timer that runs the function `callback` at each timer expiration.
 
@@ -253,6 +334,13 @@ callback is only run once. The function `callback` is called with a single argum
 itself. Stop a timer by calling `close`. The `callback` may still be run one final time, if the timer
 has already expired.
 
+If `spawn` is `true`, the created task will be spawned, meaning that it will be allowed
+to move thread, which avoids the side-effect of forcing the parent task to get stuck to the thread
+it is on. If `spawn` is `nothing` (default), the task will be spawned if the parent task isn't sticky.
+
+!!! compat "Julia 1.12"
+    The `spawn` argument was introduced in Julia 1.12.
+
 # Examples
 
 Here the first number is printed after a delay of two seconds, then the following numbers are
@@ -272,7 +360,8 @@ julia> begin
 3
 ```
 """
-function Timer(cb::Function, timeout; kwargs...)
+function Timer(cb::Function, timeout; spawn::Union{Nothing,Bool}=nothing, kwargs...)
+    sticky = spawn === nothing ? current_task().sticky : !spawn
     timer = Timer(timeout; kwargs...)
     t = @task begin
         unpreserve_handle(timer)
@@ -287,6 +376,7 @@ function Timer(cb::Function, timeout; kwargs...)
             isopen(timer) || return
         end
     end
+    t.sticky = sticky
     # here we are mimicking parts of _trywait, in coordination with task `t`
     preserve_handle(timer)
     @lock timer.cond begin
@@ -302,11 +392,24 @@ end
 """
     timedwait(testcb, timeout::Real; pollint::Real=0.1)
 
-Waits until `testcb()` returns `true` or `timeout` seconds have passed, whichever is earlier.
+Wait until `testcb()` returns `true` or `timeout` seconds have passed, whichever is earlier.
 The test function is polled every `pollint` seconds. The minimum value for `pollint` is 0.001 seconds,
 that is, 1 millisecond.
 
 Return `:ok` or `:timed_out`.
+
+# Examples
+```jldoctest
+julia> cb() = (sleep(5); return);
+
+julia> t = @async cb();
+
+julia> timedwait(()->istaskdone(t), 1)
+:timed_out
+
+julia> timedwait(()->istaskdone(t), 6.5)
+:ok
+```
 """
 function timedwait(testcb, timeout::Real; pollint::Real=0.1)
     pollint >= 1e-3 || throw(ArgumentError("pollint must be ≥ 1 millisecond"))
diff --git a/base/asyncmap.jl b/base/asyncmap.jl
index be16ba1b27610..1914ddc645f31 100644
--- a/base/asyncmap.jl
+++ b/base/asyncmap.jl
@@ -9,6 +9,8 @@ Uses multiple concurrent tasks to map `f` over a collection (or multiple
 equal length collections). For multiple collection arguments, `f` is
 applied elementwise.
 
+The output is guaranteed to be the same order as the elements of the collection(s) `c`.
+
 `ntasks` specifies the number of tasks to run concurrently.
 Depending on the length of the collections, if `ntasks` is unspecified,
 up to 100 tasks will be used for concurrent mapping.
@@ -26,11 +28,11 @@ The following examples highlight execution in different tasks by returning
 the `objectid` of the tasks in which the mapping function is executed.
 
 First, with `ntasks` undefined, each element is processed in a different task.
-```
+```julia-repl
 julia> tskoid() = objectid(current_task());
 
 julia> asyncmap(x->tskoid(), 1:5)
-5-element Array{UInt64,1}:
+5-element Vector{UInt64}:
  0x6e15e66c75c75853
  0x440f8819a1baa682
  0x9fb3eeadd0c83985
@@ -42,9 +44,9 @@ julia> length(unique(asyncmap(x->tskoid(), 1:5)))
 ```
 
 With `ntasks=2` all elements are processed in 2 tasks.
-```
+```julia-repl
 julia> asyncmap(x->tskoid(), 1:5; ntasks=2)
-5-element Array{UInt64,1}:
+5-element Vector{UInt64}:
  0x027ab1680df7ae94
  0xa23d2f80cd7cf157
  0x027ab1680df7ae94
@@ -58,12 +60,12 @@ julia> length(unique(asyncmap(x->tskoid(), 1:5; ntasks=2)))
 With `batch_size` defined, the mapping function needs to be changed to accept an array
 of argument tuples and return an array of results. `map` is used in the modified mapping
 function to achieve this.
-```
+```julia-repl
 julia> batch_func(input) = map(x->string("args_tuple: ", x, ", element_val: ", x[1], ", task: ", tskoid()), input)
 batch_func (generic function with 1 method)
 
 julia> asyncmap(batch_func, 1:5; ntasks=2, batch_size=2)
-5-element Array{String,1}:
+5-element Vector{String}:
  "args_tuple: (1,), element_val: 1, task: 9118321258196414413"
  "args_tuple: (2,), element_val: 2, task: 4904288162898683522"
  "args_tuple: (3,), element_val: 3, task: 9118321258196414413"
@@ -394,6 +396,8 @@ length(itr::AsyncGenerator) = length(itr.collector.enumerator)
 
 Like [`asyncmap`](@ref), but stores output in `results` rather than
 returning a collection.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function asyncmap!(f, r, c1, c...; ntasks=0, batch_size=nothing)
     foreach(identity, AsyncCollector(f, r, c1, c...; ntasks=ntasks, batch_size=batch_size))
diff --git a/base/atomics.jl b/base/atomics.jl
index 7312206c19896..432c9120939ac 100644
--- a/base/atomics.jl
+++ b/base/atomics.jl
@@ -1,7 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Core.Intrinsics: llvmcall
-
 import .Base: setindex!, getindex, unsafe_convert
 import .Base.Sys: ARCH, WORD_SIZE
 
@@ -13,34 +11,6 @@ export
     atomic_and!, atomic_nand!, atomic_or!, atomic_xor!,
     atomic_max!, atomic_min!,
     atomic_fence
-##
-# Filter out unsupported atomic types on platforms
-# - 128-bit atomics do not exist on AArch32.
-# - Omitting 128-bit types on 32bit x86 and ppc64
-# - LLVM doesn't currently support atomics on floats for ppc64
-#   C++20 is adding limited support for atomics on float, but as of
-#   now Clang does not support that yet.
-if Sys.ARCH === :i686 || startswith(string(Sys.ARCH), "arm") ||
-   Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
-    const inttypes = (Int8, Int16, Int32, Int64,
-                      UInt8, UInt16, UInt32, UInt64)
-else
-    const inttypes = (Int8, Int16, Int32, Int64, Int128,
-                      UInt8, UInt16, UInt32, UInt64, UInt128)
-end
-const floattypes = (Float16, Float32, Float64)
-const arithmetictypes = (inttypes..., floattypes...)
-# TODO: Support Ptr
-if Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
-    const atomictypes = (inttypes..., Bool)
-else
-    const atomictypes = (arithmetictypes..., Bool)
-end
-
-const IntTypes = Union{inttypes...}
-const FloatTypes = Union{floattypes...}
-const ArithmeticTypes = Union{arithmetictypes...}
-const AtomicTypes = Union{atomictypes...}
 
 """
     Threads.Atomic{T}
@@ -48,10 +18,6 @@ const AtomicTypes = Union{atomictypes...}
 Holds a reference to an object of type `T`, ensuring that it is only
 accessed atomically, i.e. in a thread-safe manner.
 
-Only certain "simple" types can be used atomically, namely the
-primitive boolean, integer, and float-point types. These are `Bool`,
-`Int8`...`Int128`, `UInt8`...`UInt128`, and `Float16`...`Float64`.
-
 New atomic objects can be created from a non-atomic values; if none is
 specified, the atomic object is initialized with zero.
 
@@ -72,14 +38,21 @@ julia> x[]
 Atomic operations use an `atomic_` prefix, such as [`atomic_add!`](@ref),
 [`atomic_xchg!`](@ref), etc.
 """
-mutable struct Atomic{T<:AtomicTypes}
-    value::T
-    Atomic{T}() where {T<:AtomicTypes} = new(zero(T))
-    Atomic{T}(value) where {T<:AtomicTypes} = new(value)
+mutable struct Atomic{T}
+    @atomic value::T
+    Atomic{T}() where {T} = new(zero(T))
+    Atomic{T}(value) where {T} = new(value)
 end
 
 Atomic() = Atomic{Int}()
 
+const LOCK_PROFILING = Atomic{Int}(0)
+lock_profiling(state::Bool) = state ? atomic_add!(LOCK_PROFILING, 1) : atomic_sub!(LOCK_PROFILING, 1)
+lock_profiling() = LOCK_PROFILING[] > 0
+
+const LOCK_CONFLICT_COUNT = Atomic{Int}(0);
+inc_lock_conflict_count() = atomic_add!(LOCK_CONFLICT_COUNT, 1)
+
 """
     Threads.atomic_cas!(x::Atomic{T}, cmp::T, newval::T) where T
 
@@ -325,120 +298,21 @@ julia> x[]
 """
 function atomic_min! end
 
-unsafe_convert(::Type{Ptr{T}}, x::Atomic{T}) where {T} = convert(Ptr{T}, pointer_from_objref(x))
-setindex!(x::Atomic{T}, v) where {T} = setindex!(x, convert(T, v))
-
-const llvmtypes = IdDict{Any,String}(
-    Bool => "i8",  # julia represents bools with 8-bits for now. # TODO: is this okay?
-    Int8 => "i8", UInt8 => "i8",
-    Int16 => "i16", UInt16 => "i16",
-    Int32 => "i32", UInt32 => "i32",
-    Int64 => "i64", UInt64 => "i64",
-    Int128 => "i128", UInt128 => "i128",
-    Float16 => "half",
-    Float32 => "float",
-    Float64 => "double",
-)
-inttype(::Type{T}) where {T<:Integer} = T
-inttype(::Type{Float16}) = Int16
-inttype(::Type{Float32}) = Int32
-inttype(::Type{Float64}) = Int64
-
-
-import ..Base.gc_alignment
-
-# All atomic operations have acquire and/or release semantics, depending on
-# whether the load or store values. Most of the time, this is what one wants
-# anyway, and it's only moderately expensive on most hardware.
-for typ in atomictypes
-    lt = llvmtypes[typ]
-    ilt = llvmtypes[inttype(typ)]
-    rt = "$lt, $lt*"
-    irt = "$ilt, $ilt*"
-    @eval getindex(x::Atomic{$typ}) =
-        GC.@preserve x llvmcall($"""
-                 %ptr = inttoptr i$WORD_SIZE %0 to $lt*
-                 %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
-                 ret $lt %rv
-                 """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
-    @eval setindex!(x::Atomic{$typ}, v::$typ) =
-        GC.@preserve x llvmcall($"""
-                 %ptr = inttoptr i$WORD_SIZE %0 to $lt*
-                 store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
-                 ret void
-                 """, Cvoid, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
-
-    # Note: atomic_cas! succeeded (i.e. it stored "new") if and only if the result is "cmp"
-    if typ <: Integer
-        @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            GC.@preserve x llvmcall($"""
-                     %ptr = inttoptr i$WORD_SIZE %0 to $lt*
-                     %rs = cmpxchg $lt* %ptr, $lt %1, $lt %2 acq_rel acquire
-                     %rv = extractvalue { $lt, i1 } %rs, 0
-                     ret $lt %rv
-                     """, $typ, Tuple{Ptr{$typ},$typ,$typ},
-                     unsafe_convert(Ptr{$typ}, x), cmp, new)
-    else
-        @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            GC.@preserve x llvmcall($"""
-                     %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
-                     %icmp = bitcast $lt %1 to $ilt
-                     %inew = bitcast $lt %2 to $ilt
-                     %irs = cmpxchg $ilt* %iptr, $ilt %icmp, $ilt %inew acq_rel acquire
-                     %irv = extractvalue { $ilt, i1 } %irs, 0
-                     %rv = bitcast $ilt %irv to $lt
-                     ret $lt %rv
-                     """, $typ, Tuple{Ptr{$typ},$typ,$typ},
-                     unsafe_convert(Ptr{$typ}, x), cmp, new)
-    end
-
-    arithmetic_ops = [:add, :sub]
-    for rmwop in [arithmetic_ops..., :xchg, :and, :nand, :or, :xor, :max, :min]
-        rmw = string(rmwop)
-        fn = Symbol("atomic_", rmw, "!")
-        if (rmw == "max" || rmw == "min") && typ <: Unsigned
-            # LLVM distinguishes signedness in the operation, not the integer type.
-            rmw = "u" * rmw
-        end
-        if rmwop in arithmetic_ops && !(typ <: ArithmeticTypes) continue end
-        if typ <: Integer
-            @eval $fn(x::Atomic{$typ}, v::$typ) =
-                GC.@preserve x llvmcall($"""
-                         %ptr = inttoptr i$WORD_SIZE %0 to $lt*
-                         %rv = atomicrmw $rmw $lt* %ptr, $lt %1 acq_rel
-                         ret $lt %rv
-                         """, $typ, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
-        else
-            rmwop === :xchg || continue
-            @eval $fn(x::Atomic{$typ}, v::$typ) =
-                GC.@preserve x llvmcall($"""
-                         %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
-                         %ival = bitcast $lt %1 to $ilt
-                         %irv = atomicrmw $rmw $ilt* %iptr, $ilt %ival acq_rel
-                         %rv = bitcast $ilt %irv to $lt
-                         ret $lt %rv
-                         """, $typ, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
-        end
-    end
-end
-
-# Provide atomic floating-point operations via atomic_cas!
-const opnames = Dict{Symbol, Symbol}(:+ => :add, :- => :sub)
-for op in [:+, :-, :max, :min]
-    opname = get(opnames, op, op)
-    @eval function $(Symbol("atomic_", opname, "!"))(var::Atomic{T}, val::T) where T<:FloatTypes
-        IT = inttype(T)
-        old = var[]
-        while true
-            new = $op(old, val)
-            cmp = old
-            old = atomic_cas!(var, cmp, new)
-            reinterpret(IT, old) == reinterpret(IT, cmp) && return old
-            # Temporary solution before we have gc transition support in codegen.
-            ccall(:jl_gc_safepoint, Cvoid, ())
-        end
-    end
-end
+#const nand = (~) ∘ (&) # ComposedFunction generated very poor code quality
+nand(x, y) = ~(x & y)
+
+getindex(x::Atomic) = @atomic :acquire x.value
+setindex!(x::Atomic, v) = (@atomic :release x.value = v; x)
+atomic_cas!(x::Atomic, cmp, new) = (@atomicreplace :acquire_release :acquire x.value cmp => new).old
+atomic_add!(x::Atomic, v) = (@atomic :acquire_release x.value + v).first
+atomic_sub!(x::Atomic, v) = (@atomic :acquire_release x.value - v).first
+atomic_and!(x::Atomic, v) = (@atomic :acquire_release x.value & v).first
+atomic_or!(x::Atomic, v) = (@atomic :acquire_release x.value | v).first
+atomic_xor!(x::Atomic, v) = (@atomic :acquire_release x.value ⊻ v).first
+atomic_nand!(x::Atomic, v) = (@atomic :acquire_release x.value nand v).first
+atomic_xchg!(x::Atomic, v) = (@atomicswap :acquire_release x.value = v)
+atomic_min!(x::Atomic, v) = (@atomic :acquire_release x.value min v).first
+atomic_max!(x::Atomic, v) = (@atomic :acquire_release x.value max v).first
 
 """
     Threads.atomic_fence()
@@ -455,7 +329,4 @@ fences should not be necessary in most cases.
 
 For further details, see LLVM's `fence` instruction.
 """
-atomic_fence() = llvmcall("""
-                          fence seq_cst
-                          ret void
-                          """, Cvoid, Tuple{})
+atomic_fence() = Core.Intrinsics.atomic_fence(:sequentially_consistent)
diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl
index f96887d554af0..76aaf927b60f2 100644
--- a/base/binaryplatforms.jl
+++ b/base/binaryplatforms.jl
@@ -157,7 +157,7 @@ end
 
 # Hash definition to ensure that it's stable
 function Base.hash(p::Platform, h::UInt)
-    h += 0x506c6174666f726d % UInt
+    h ⊻= 0x506c6174666f726d % UInt
     h = hash(p.tags, h)
     h = hash(p.compare_strategies, h)
     return h
@@ -170,20 +170,18 @@ end
 
 
 # Allow us to easily serialize Platform objects
-function Base.repr(p::Platform; context=nothing)
-    str = string(
-        "Platform(",
-        repr(arch(p)),
-        ", ",
-        repr(os(p)),
-        "; ",
-        join(("$(k) = $(repr(v))" for (k, v) in tags(p) if k ∉ ("arch", "os")), ", "),
-        ")",
-    )
+function Base.show(io::IO, p::Platform)
+    print(io, "Platform(")
+    show(io, arch(p))
+    print(io, ", ")
+    show(io, os(p))
+    print(io, "; ")
+    join(io, ("$(k) = $(repr(v))" for (k, v) in tags(p) if k ∉ ("arch", "os")), ", ")
+    print(io, ")")
 end
 
 # Make showing the platform a bit more palatable
-function Base.show(io::IO, p::Platform)
+function Base.show(io::IO, ::MIME"text/plain", p::Platform)
     str = string(platform_name(p), " ", arch(p))
     # Add on all the other tags not covered by os/arch:
     other_tags = sort!(filter!(kv -> kv[1] ∉ ("os", "arch"), collect(tags(p))))
@@ -196,11 +194,11 @@ end
 function validate_tags(tags::Dict)
     throw_invalid_key(k) = throw(ArgumentError("Key \"$(k)\" cannot have value \"$(tags[k])\""))
     # Validate `arch`
-    if tags["arch"] ∉ ("x86_64", "i686", "armv7l", "armv6l", "aarch64", "powerpc64le")
+    if tags["arch"] ∉ ("x86_64", "i686", "armv7l", "armv6l", "aarch64", "powerpc64le", "riscv64")
         throw_invalid_key("arch")
     end
     # Validate `os`
-    if tags["os"] ∉ ("linux", "macos", "freebsd", "windows")
+    if tags["os"] ∉ ("linux", "macos", "freebsd", "openbsd", "windows")
         throw_invalid_key("os")
     end
     # Validate `os`/`arch` combination
@@ -308,7 +306,7 @@ function compare_version_cap(a::String, b::String, a_requested::Bool, b_requeste
         return a == b
     end
 
-    # Otherwise, do the comparison between the the single version cap and the single version:
+    # Otherwise, do the comparison between the single version cap and the single version:
     if a_requested
         return b <= a
     else
@@ -377,8 +375,10 @@ function os()
         return "windows"
     elseif Sys.isapple()
         return "macos"
-    elseif Sys.isbsd()
+    elseif Sys.isfreebsd()
         return "freebsd"
+    elseif Sys.isopenbsd()
+        return "openbsd"
     else
         return "linux"
     end
@@ -424,6 +424,7 @@ const platform_names = Dict(
     "macos" => "macOS",
     "windows" => "Windows",
     "freebsd" => "FreeBSD",
+    "openbsd" => "OpenBSD",
     nothing => "Unknown",
 )
 
@@ -494,7 +495,7 @@ julia> wordsize(Platform("x86_64", "macos"))
 wordsize(p::AbstractPlatform) = (arch(p) ∈ ("i686", "armv6l", "armv7l")) ? 32 : 64
 
 """
-    triplet(p::AbstractPlatform; exclude_tags::Vector{String})
+    triplet(p::AbstractPlatform)
 
 Get the target triplet for the given `Platform` object as a `String`.
 
@@ -519,14 +520,17 @@ function triplet(p::AbstractPlatform)
     )
 
     # Tack on optional compiler ABI flags
-    if libgfortran_version(p) !== nothing
-        str = string(str, "-libgfortran", libgfortran_version(p).major)
+    libgfortran_version_ = libgfortran_version(p)
+    if libgfortran_version_ !== nothing
+        str = string(str, "-libgfortran", libgfortran_version_.major)
     end
-    if cxxstring_abi(p) !== nothing
-        str = string(str, "-", cxxstring_abi(p))
+    cxxstring_abi_ = cxxstring_abi(p)
+    if cxxstring_abi_ !== nothing
+        str = string(str, "-", cxxstring_abi_)
     end
-    if libstdcxx_version(p) !== nothing
-        str = string(str, "-libstdcxx", libstdcxx_version(p).patch)
+    libstdcxx_version_ = libstdcxx_version(p)
+    if libstdcxx_version_ !== nothing
+        str = string(str, "-libstdcxx", libstdcxx_version_.patch)
     end
 
     # Tack on all extra tags
@@ -558,6 +562,8 @@ function os_str(p::AbstractPlatform)
         else
             return "-unknown-freebsd"
         end
+    elseif os(p) == "openbsd"
+        return "-unknown-openbsd"
     else
         return "-unknown"
     end
@@ -583,7 +589,8 @@ Sys.isapple(p::AbstractPlatform) = os(p) == "macos"
 Sys.islinux(p::AbstractPlatform) = os(p) == "linux"
 Sys.iswindows(p::AbstractPlatform) = os(p) == "windows"
 Sys.isfreebsd(p::AbstractPlatform) = os(p) == "freebsd"
-Sys.isbsd(p::AbstractPlatform) = os(p) ∈ ("freebsd", "macos")
+Sys.isopenbsd(p::AbstractPlatform) = os(p) == "openbsd"
+Sys.isbsd(p::AbstractPlatform) = os(p) ∈ ("freebsd", "openbsd", "macos")
 Sys.isunix(p::AbstractPlatform) = Sys.isbsd(p) || Sys.islinux(p)
 
 const arch_mapping = Dict(
@@ -593,6 +600,7 @@ const arch_mapping = Dict(
     "armv7l" => "arm(v7l)?", # if we just see `arm-linux-gnueabihf`, we assume it's `armv7l`
     "armv6l" => "armv6l",
     "powerpc64le" => "p(ower)?pc64le",
+    "riscv64" => "(rv64|riscv64)",
 )
 # Keep this in sync with `CPUID.ISAs_by_family`
 # These are the CPUID side of the microarchitectures targeted by GCC flags in BinaryBuilder.jl
@@ -628,12 +636,16 @@ const arch_march_isa_mapping = let
         ],
         "powerpc64le" => [
             "power8" => get_set("powerpc64le", "power8"),
-        ]
+        ],
+        "riscv64" => [
+            "riscv64" => get_set("riscv64", "riscv64"),
+        ],
     )
 end
 const os_mapping = Dict(
     "macos" => "-apple-darwin[\\d\\.]*",
     "freebsd" => "-(.*-)?freebsd[\\d\\.]*",
+    "openbsd" => "-(.*-)?openbsd[\\d\\.]*",
     "windows" => "-w64-mingw32",
     "linux" => "-(.*-)?linux",
 )
@@ -663,18 +675,12 @@ const libstdcxx_version_mapping = Dict{String,String}(
     "libstdcxx" => "-libstdcxx\\d+",
 )
 
-"""
-    parse(::Type{Platform}, triplet::AbstractString)
-
-Parses a string platform triplet back into a `Platform` object.
-"""
-function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = false)
+const triplet_regex = let
     # Helper function to collapse dictionary of mappings down into a regex of
     # named capture groups joined by "|" operators
     c(mapping) = string("(",join(["(?<$k>$v)" for (k, v) in mapping], "|"), ")")
 
-    # We're going to build a mondo regex here to parse everything:
-    triplet_regex = Regex(string(
+    Regex(string(
         "^",
         # First, the core triplet; arch/os/libc/call_abi
         c(arch_mapping),
@@ -689,7 +695,14 @@ function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = f
         "(?<tags>(?:-[^-]+\\+[^-]+)*)?",
         "\$",
     ))
+end
+
+"""
+    parse(::Type{Platform}, triplet::AbstractString)
 
+Parses a string platform triplet back into a `Platform` object.
+"""
+function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = false)
     m = match(triplet_regex, triplet)
     if m !== nothing
         # Helper function to find the single named field within the giant regex
@@ -741,11 +754,14 @@ function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = f
         end
         os_version = nothing
         if os == "macos"
-            os_version = extract_os_version("macos", r".*darwin([\d\.]+)"sa)
+            os_version = extract_os_version("macos", r".*darwin([\d.]+)"sa)
         end
         if os == "freebsd"
             os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)"sa)
         end
+        if os == "openbsd"
+            os_version = extract_os_version("openbsd", r".*openbsd([\d.]+)"sa)
+        end
         tags["os_version"] = os_version
 
         return Platform(arch, os, tags; validate_strict)
@@ -783,6 +799,17 @@ function platform_dlext(p::AbstractPlatform = HostPlatform())
     end
 end
 
+# Not general purpose, just for parse_dl_name_version
+function _this_os_name()
+    if Sys.iswindows()
+        return "windows"
+    elseif Sys.isapple()
+        return "macos"
+    else
+        return "other"
+    end
+end
+
 """
     parse_dl_name_version(path::String, platform::AbstractPlatform)
 
@@ -793,17 +820,19 @@ valid dynamic library, this method throws an error.  If no soversion
 can be extracted from the filename, as in "libbar.so" this method
 returns `"libbar", nothing`.
 """
-function parse_dl_name_version(path::String, os::String)
+function parse_dl_name_version(path::String, os::String=_this_os_name())
     # Use an extraction regex that matches the given OS
     local dlregex
+    # Keep this up to date with _this_os_name
     if os == "windows"
-        # On Windows, libraries look like `libnettle-6.dll`
-        dlregex = r"^(.*?)(?:-((?:[\.\d]+)*))?\.dll$"sa
+        # On Windows, libraries look like `libnettle-6.dll`.
+        # Stay case-insensitive, the suffix might be `.DLL`.
+        dlregex = r"^(.*?)(?:-((?:[\.\d]+)*))?\.dll$"isa
     elseif os == "macos"
         # On OSX, libraries look like `libnettle.6.3.dylib`
         dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"sa
     else
-        # On Linux and FreeBSD, libraries look like `libnettle.so.6.3.0`
+        # On Linux and others BSD, libraries look like `libnettle.so.6.3.0`
         dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"sa
     end
 
@@ -824,24 +853,52 @@ function parse_dl_name_version(path::String, os::String)
 end
 
 # Adapter for `AbstractString`
-function parse_dl_name_version(path::AbstractString, os::AbstractString)
+function parse_dl_name_version(path::AbstractString, os::AbstractString=_this_os_name())
     return parse_dl_name_version(string(path)::String, string(os)::String)
 end
 
+function get_csl_member(member::Symbol)
+    # If CompilerSupportLibraries_jll is a stdlib, we can just grab things from it
+    csl_pkgids = filter(pkgid -> pkgid.name == "CompilerSupportLibraries_jll", keys(Base.loaded_modules))
+    if !isempty(csl_pkgids)
+        CSL_mod = Base.loaded_modules[first(csl_pkgids)]
+
+        # This can fail during bootstrap, so we skip in that case.
+        if isdefined(CSL_mod, member)
+            return getproperty(CSL_mod, member)
+        end
+    end
+
+    return nothing
+end
+
 """
     detect_libgfortran_version()
 
 Inspects the current Julia process to determine the libgfortran version this Julia is
-linked against (if any).
+linked against (if any).  Returns `nothing` if no libgfortran version dependence is
+detected.
 """
 function detect_libgfortran_version()
-    libgfortran_paths = filter!(x -> occursin("libgfortran", x), Libdl.dllist())
-    if isempty(libgfortran_paths)
+    function get_libgfortran_path()
+        # If CompilerSupportLibraries_jll is a stdlib, we can just directly ask for
+        # the path here, without checking `dllist()`:
+        libgfortran_path = get_csl_member(:libgfortran_path)
+        if libgfortran_path !== nothing
+            return libgfortran_path::String
+        end
+
+        # Otherwise, look for it having already been loaded by something
+        libgfortran_paths = filter!(x -> occursin("libgfortran", x), Libdl.dllist())
+        if !isempty(libgfortran_paths)
+            return first(libgfortran_paths)::String
+        end
+
         # One day, I hope to not be linking against libgfortran in base Julia
         return nothing
     end
-    libgfortran_path = first(libgfortran_paths)
 
+    libgfortran_path = get_libgfortran_path()
     name, version = parse_dl_name_version(libgfortran_path, os())
     if version === nothing
         # Even though we complain about this, we allow it to continue in the hopes that
@@ -865,24 +922,35 @@ it is linked against (if any).  `max_minor_version` is the latest version in the
 3.4 series of GLIBCXX where the search is performed.
 """
 function detect_libstdcxx_version(max_minor_version::Int=30)
-    libstdcxx_paths = filter!(x -> occursin("libstdc++", x), Libdl.dllist())
-    if isempty(libstdcxx_paths)
-        # This can happen if we were built by clang, so we don't link against
-        # libstdc++ at all.
+    function get_libstdcxx_handle()
+        # If CompilerSupportLibraries_jll is a stdlib, we can just directly open it
+        libstdcxx = get_csl_member(:libstdcxx)
+        if libstdcxx !== nothing
+            return nothing
+        end
+
+        # Otherwise, look for it having already been loaded by something
+        libstdcxx_paths = filter!(x -> occursin("libstdc++", x), Libdl.dllist())
+        if !isempty(libstdcxx_paths)
+            return Libdl.dlopen(first(libstdcxx_paths), Libdl.RTLD_NOLOAD)::Ptr{Cvoid}
+        end
+
+        # One day, I hope to not be linking against libgfortran in base Julia
         return nothing
     end
 
     # Brute-force our way through GLIBCXX_* symbols to discover which version we're linked against
-    hdl = Libdl.dlopen(first(libstdcxx_paths))::Ptr{Cvoid}
-    # Try all GLIBCXX versions down to GCC v4.8:
-    # https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
-    for minor_version in max_minor_version:-1:18
-        if Libdl.dlsym(hdl, "GLIBCXX_3.4.$(minor_version)"; throw_error=false) !== nothing
-            Libdl.dlclose(hdl)
-            return VersionNumber("3.4.$(minor_version)")
+    libstdcxx = get_libstdcxx_handle()
+
+    if libstdcxx !== nothing
+        # Try all GLIBCXX versions down to GCC v4.8:
+        # https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
+        for minor_version in max_minor_version:-1:18
+            if Libdl.dlsym(libstdcxx, "GLIBCXX_3.4.$(minor_version)"; throw_error=false) !== nothing
+                return VersionNumber("3.4.$(minor_version)")
+            end
         end
     end
-    Libdl.dlclose(hdl)
     return nothing
 end
 
@@ -1028,7 +1096,7 @@ function platforms_match(a::AbstractPlatform, b::AbstractPlatform)
 
         # Call the comparator, passing in which objects requested this comparison (one, the other, or both)
         # For some comparators this doesn't matter, but for non-symmetrical comparisons, it does.
-        if !(comparator(ak, bk, a_comp === comparator, b_comp === comparator)::Bool)
+        if !(@invokelatest(comparator(ak, bk, a_comp === comparator, b_comp === comparator))::Bool)
             return false
         end
     end
diff --git a/base/bitarray.jl b/base/bitarray.jl
index f29b30d0ac8c0..9770fe0a336c5 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -53,7 +53,7 @@ Construct an undef [`BitArray`](@ref) with the given dimensions.
 Behaves identically to the [`Array`](@ref) constructor. See [`undef`](@ref).
 
 # Examples
-```julia-repl
+```jldoctest; filter = r"[01]"
 julia> BitArray(undef, 2, 2)
 2×2 BitMatrix:
  0  0
@@ -81,7 +81,7 @@ BitVector() = BitVector(undef, 0)
 
 Construct a `BitVector` from a tuple of `Bool`.
 # Examples
-```julia-repl
+```jldoctest
 julia> nt = (true, false, true, false)
 (true, false, true, false)
 
@@ -104,11 +104,6 @@ length(B::BitArray) = B.len
 size(B::BitVector) = (B.len,)
 size(B::BitArray) = B.dims
 
-@inline function size(B::BitVector, d::Integer)
-    d < 1 && throw_boundserror(size(B), d)
-    ifelse(d == 1, B.len, 1)
-end
-
 isassigned(B::BitArray, i::Int) = 1 <= i <= length(B)
 
 IndexStyle(::Type{<:BitArray}) = IndexLinear()
@@ -404,6 +399,7 @@ falses(dims::DimOrInd...) = falses(dims)
 falses(dims::NTuple{N, Union{Integer, OneTo}}) where {N} = falses(map(to_dim, dims))
 falses(dims::NTuple{N, Integer}) where {N} = fill!(BitArray(undef, dims), false)
 falses(dims::Tuple{}) = fill!(BitArray(undef, dims), false)
+falses(dims::NTuple{N, DimOrInd}) where {N} = fill!(similar(BitArray, dims), false)
 
 """
     trues(dims)
@@ -422,6 +418,7 @@ trues(dims::DimOrInd...) = trues(dims)
 trues(dims::NTuple{N, Union{Integer, OneTo}}) where {N} = trues(map(to_dim, dims))
 trues(dims::NTuple{N, Integer}) where {N} = fill!(BitArray(undef, dims), true)
 trues(dims::Tuple{}) = fill!(BitArray(undef, dims), true)
+trues(dims::NTuple{N, DimOrInd}) where {N} = fill!(similar(BitArray, dims), true)
 
 function one(x::BitMatrix)
     m, n = size(x)
@@ -462,7 +459,7 @@ copyto!(dest::BitArray, doffs::Integer, src::Union{BitArray,Array}, soffs::Integ
     _copyto_int!(dest, Int(doffs), src, Int(soffs), Int(n))
 function _copyto_int!(dest::BitArray, doffs::Int, src::Union{BitArray,Array}, soffs::Int, n::Int)
     n == 0 && return dest
-    n < 0 && throw(ArgumentError("Number of elements to copy must be nonnegative."))
+    n < 0 && throw(ArgumentError("Number of elements to copy must be non-negative."))
     soffs < 1 && throw(BoundsError(src, soffs))
     doffs < 1 && throw(BoundsError(dest, doffs))
     soffs+n-1 > length(src) && throw(BoundsError(src, length(src)+1))
@@ -482,7 +479,7 @@ end
 reshape(B::BitArray, dims::Tuple{Vararg{Int}}) = _bitreshape(B, dims)
 function _bitreshape(B::BitArray, dims::NTuple{N,Int}) where N
     prod(dims) == length(B) ||
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $(length(B))"))
+        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $(length(B))"))
     Br = BitArray{N}(undef, ntuple(i->0,Val(N))...)
     Br.chunks = B.chunks
     Br.len = prod(dims)
@@ -541,10 +538,8 @@ end
 reinterpret(::Type{Bool}, B::BitArray, dims::NTuple{N,Int}) where {N} = reinterpret(B, dims)
 reinterpret(B::BitArray, dims::NTuple{N,Int}) where {N} = reshape(B, dims)
 
-if nameof(@__MODULE__) === :Base  # avoid method overwrite
 (::Type{T})(x::T) where {T<:BitArray} = copy(x)::T
 BitArray(x::BitArray) = copy(x)
-end
 
 """
     BitArray(itr)
@@ -807,7 +802,7 @@ prepend!(B::BitVector, items) = prepend!(B, BitArray(items))
 prepend!(A::Vector{Bool}, items::BitVector) = prepend!(A, Array(items))
 
 function sizehint!(B::BitVector, sz::Integer)
-    ccall(:jl_array_sizehint, Cvoid, (Any, UInt), B.chunks, num_bit_chunks(sz))
+    sizehint!(B.chunks, num_bit_chunks(sz))
     return B
 end
 
@@ -1338,7 +1333,7 @@ function (>>>)(B::BitVector, i::UInt)
 end
 
 """
-    >>(B::BitVector, n) -> BitVector
+    >>(B::BitVector, n)::BitVector
 
 Right bit shift operator, `B >> n`. For `n >= 0`, the result is `B`
 with elements shifted `n` positions forward, filling with `false`
@@ -1376,7 +1371,7 @@ julia> B >> -1
 
 # signed integer version of shift operators with handling of negative values
 """
-    <<(B::BitVector, n) -> BitVector
+    <<(B::BitVector, n)::BitVector
 
 Left bit shift operator, `B << n`. For `n >= 0`, the result is `B`
 with elements shifted `n` positions backwards, filling with `false`
@@ -1413,7 +1408,7 @@ julia> B << -1
 (<<)(B::BitVector, i::Int) = (i >=0 ? B << unsigned(i) : B >> unsigned(-i))
 
 """
-    >>>(B::BitVector, n) -> BitVector
+    >>>(B::BitVector, n)::BitVector
 
 Unsigned right bitshift operator, `B >>> n`. Equivalent to `B >> n`. See [`>>`](@ref) for
 details and examples.
@@ -1791,9 +1786,10 @@ function bit_map!(f::F, dest::BitArray, A::BitArray) where F
     dest_last = destc[len_Ac]
     _msk = _msk_end(A)
     # first zero out the bits mask is going to change
-    destc[len_Ac] = (dest_last & (~_msk))
     # then update bits by `or`ing with a masked RHS
-    destc[len_Ac] |= f(Ac[len_Ac]) & _msk
+    # DO NOT SEPARATE ONTO TO LINES.
+    # Otherwise there will be bugs when Ac aliases destc
+    destc[len_Ac] = (dest_last & (~_msk)) | f(Ac[len_Ac]) & _msk
     dest
 end
 function bit_map!(f::F, dest::BitArray, A::BitArray, B::BitArray) where F
@@ -1812,9 +1808,10 @@ function bit_map!(f::F, dest::BitArray, A::BitArray, B::BitArray) where F
     dest_last = destc[len_Ac]
     _msk = _msk_end(min_bitlen)
     # first zero out the bits mask is going to change
-    destc[len_Ac] = (dest_last & ~(_msk))
     # then update bits by `or`ing with a masked RHS
-    destc[len_Ac] |= f(Ac[end], Bc[end]) & _msk
+    # DO NOT SEPARATE ONTO TO LINES.
+    # Otherwise there will be bugs when Ac or Bc aliases destc
+    destc[len_Ac] = (dest_last & ~(_msk)) | f(Ac[end], Bc[end]) & _msk
     dest
 end
 
diff --git a/base/bitset.jl b/base/bitset.jl
index 240be822fa263..78d8fc8769de1 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -15,7 +15,11 @@ mutable struct BitSet <: AbstractSet{Int}
     # 1st stored Int equals 64*offset
     offset::Int
 
-    BitSet() = new(resize!(Vector{UInt64}(undef, 4), 0), NO_OFFSET)
+    function BitSet()
+        a = Vector{UInt64}(undef, 4) # start with some initial space for holding 0:255 without additional allocations later
+        setfield!(a, :size, (0,)) # aka `empty!(a)` inlined
+        return new(a, NO_OFFSET)
+   end
 end
 
 """
@@ -51,7 +55,10 @@ function copy!(dest::BitSet, src::BitSet)
     dest
 end
 
-sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s)
+function sizehint!(s::BitSet, n::Integer; first::Bool=false, shrink::Bool=true)
+    sizehint!(s.bits, (n+63) >> 6; first, shrink)
+    s
+end
 
 function _bits_getindex(b::Bits, n::Int, offset::Int)
     ci = _div64(n) - offset + 1
diff --git a/base/bool.jl b/base/bool.jl
index d7dcf76caa91b..12144756c76c8 100644
--- a/base/bool.jl
+++ b/base/bool.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+import Core: Bool
+
 # promote Bool to any other numeric type
 promote_rule(::Type{Bool}, ::Type{T}) where {T<:Number} = T
 
@@ -154,6 +156,7 @@ abs(x::Bool) = x
 abs2(x::Bool) = x
 iszero(x::Bool) = !x
 isone(x::Bool) = x
+ispositive(x::Bool) = x # could use fallback once #21712 is resolved
 
 <(x::Bool, y::Bool) = y&!x
 <=(x::Bool, y::Bool) = y|!x
@@ -184,3 +187,5 @@ end
 div(x::Bool, y::Bool) = y ? x : throw(DivideError())
 rem(x::Bool, y::Bool) = y ? false : throw(DivideError())
 mod(x::Bool, y::Bool) = rem(x,y)
+
+Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
diff --git a/base/boot.jl b/base/boot.jl
index 78b7daaf47d64..903725979bb91 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -52,8 +52,26 @@
 #abstract type AbstractArray{T,N} end
 #abstract type DenseArray{T,N} <: AbstractArray{T,N} end
 
+#primitive type AddrSpace{Backend::Module} 8 end
+#const CPU = bitcast(AddrSpace{Core}, 0x00)
+
+#struct GenericMemory{kind::Symbol, T, AS::AddrSpace}
+#   length::Int
+#   const data::Ptr{Cvoid} # make this GenericPtr{addrspace, Cvoid}
+#   Union{ # hidden data
+#       elements :: NTuple{length, T}
+#       owner :: Any
+#   }
+#end
+
+#struct GenericMemoryRef{kind::Symbol, T, AS::AddrSpace}
+#    mem::GenericMemory{kind, T, AS}
+#    data::Ptr{Cvoid} # make this GenericPtr{addrspace, Cvoid}
+#end
+
 #mutable struct Array{T,N} <: DenseArray{T,N}
-## opaque
+#  ref::MemoryRef{T}
+#  size::NTuple{N,Int}
 #end
 
 #mutable struct Module
@@ -107,12 +125,13 @@
 #    file::Union{Symbol,Nothing}
 #end
 
-#struct LineInfoNode
-#    module::Module
-#    method::Any (Union{Symbol, Method, MethodInstance})
-#    file::Symbol
-#    line::Int32
-#    inlined_at::Int32
+#struct LegacyLineInfoNode end # only used internally during lowering
+
+#struct DebugInfo
+#    def::Any # (Union{Symbol, Method, MethodInstance})
+#    linetable::Any # (Union{Nothing,DebugInfo})
+#    edges::SimpleVector # Vector{DebugInfo}
+#    codelocs::String # compressed Vector{UInt8}
 #end
 
 #struct GotoNode
@@ -156,15 +175,33 @@
 #end
 
 #mutable struct Task
-#    parent::Task
+#    next::Any
+#    queue::Any
 #    storage::Any
-#    state::Symbol
 #    donenotify::Any
 #    result::Any
-#    exception::Any
-#    backtrace::Any
-#    logstate::Any
+#    scope::Any
 #    code::Any
+#    @atomic _state::UInt8
+#    sticky::UInt8
+#    priority::UInt16
+#    @atomic _isexception::UInt8
+#    pad00::UInt8
+#    pad01::UInt8
+#    pad02::UInt8
+#    rngState0::UInt64
+#    rngState1::UInt64
+#    rngState2::UInt64
+#    rngState3::UInt64
+#    rngState4::UInt64
+#    const metrics_enabled::Bool
+#    pad10::UInt8
+#    pad11::UInt8
+#    pad12::UInt8
+#    @atomic first_enqueued_at::UInt64
+#    @atomic last_started_running_at::UInt64
+#    @atomic running_time_ns::UInt64
+#    @atomic finished_at::UInt64
 #end
 
 export
@@ -173,8 +210,8 @@ export
     Tuple, Type, UnionAll, TypeVar, Union, Nothing, Cvoid,
     AbstractArray, DenseArray, NamedTuple, Pair,
     # special objects
-    Function, Method,
-    Module, Symbol, Task, Array, UndefInitializer, undef, WeakRef, VecElement,
+    Function, Method, Module, Symbol, Task, UndefInitializer, undef, WeakRef, VecElement,
+    Array, Memory, MemoryRef, AtomicMemory, AtomicMemoryRef, GenericMemory, GenericMemoryRef,
     # numeric types
     Number, Real, Integer, Bool, Ref, Ptr,
     AbstractFloat, Float16, Float32, Float64,
@@ -187,21 +224,23 @@ export
     InterruptException, InexactError, OutOfMemoryError, ReadOnlyMemoryError,
     OverflowError, StackOverflowError, SegmentationFault, UndefRefError, UndefVarError,
     TypeError, ArgumentError, MethodError, AssertionError, LoadError, InitError,
-    UndefKeywordError, ConcurrencyViolationError,
+    UndefKeywordError, ConcurrencyViolationError, FieldError,
     # AST representation
     Expr, QuoteNode, LineNumberNode, GlobalRef,
     # object model functions
-    fieldtype, getfield, setfield!, swapfield!, modifyfield!, replacefield!,
-    nfields, throw, tuple, ===, isdefined, eval,
+    fieldtype, getfield, setfield!, swapfield!, modifyfield!, replacefield!, setfieldonce!,
+    nfields, throw, tuple, ===, isdefined,
     # access to globals
-    getglobal, setglobal!,
+    getglobal, setglobal!, swapglobal!, modifyglobal!, replaceglobal!, setglobalonce!, isdefinedglobal,
     # ifelse, sizeof    # not exported, to avoid conflicting with Base
     # type reflection
     <:, typeof, isa, typeassert,
     # method reflection
     applicable, invoke,
     # constants
-    nothing, Main
+    nothing, Main,
+    # backwards compatibility
+    arrayref, arrayset, arraysize, const_arrayref
 
 const getproperty = getfield # TODO: use `getglobal` for modules instead
 const setproperty! = setfield!
@@ -217,6 +256,8 @@ primitive type Float16 <: AbstractFloat 16 end
 primitive type Float32 <: AbstractFloat 32 end
 primitive type Float64 <: AbstractFloat 64 end
 
+primitive type BFloat16 <: AbstractFloat 16 end
+
 #primitive type Bool <: Integer 8 end
 abstract type AbstractChar end
 primitive type Char <: AbstractChar 32 end
@@ -238,22 +279,66 @@ else
     const UInt = UInt32
 end
 
-function iterate end
 function Typeof end
 ccall(:jl_toplevel_eval_in, Any, (Any, Any),
       Core, quote
       (f::typeof(Typeof))(x) = ($(_expr(:meta,:nospecialize,:x)); isa(x,Type) ? Type{x} : typeof(x))
       end)
 
+function iterate end
+
 macro nospecialize(x)
     _expr(:meta, :nospecialize, x)
 end
+Expr(@nospecialize args...) = _expr(args...)
 
-TypeVar(n::Symbol) = _typevar(n, Union{}, Any)
-TypeVar(n::Symbol, @nospecialize(ub)) = _typevar(n, Union{}, ub)
-TypeVar(n::Symbol, @nospecialize(lb), @nospecialize(ub)) = _typevar(n, lb, ub)
+macro latestworld() Expr(:latestworld) end
 
-UnionAll(v::TypeVar, @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v, t)
+_is_internal(__module__) = __module__ === Core
+# can be used in place of `@assume_effects :total` (supposed to be used for bootstrapping)
+macro _total_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#true,
+        #=:effect_free=#true,
+        #=:nothrow=#true,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
+end
+# can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
+macro _foldable_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#true,
+        #=:effect_free=#true,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
+end
+
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
+
+macro _boundscheck() Expr(:boundscheck) end
+
+# n.b. the effects and model of these is refined in inference abstractinterpretation.jl
+TypeVar(@nospecialize(n)) = _typevar(n::Symbol, Union{}, Any)
+TypeVar(@nospecialize(n), @nospecialize(ub)) = _typevar(n::Symbol, Union{}, ub)
+TypeVar(@nospecialize(n), @nospecialize(lb), @nospecialize(ub)) = _typevar(n::Symbol, lb, ub)
+UnionAll(@nospecialize(v), @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v::TypeVar, t)
+
+const Memory{T} = GenericMemory{:not_atomic, T, CPU}
+const MemoryRef{T} = GenericMemoryRef{:not_atomic, T, CPU}
 
 # simple convert for use by constructors of types in Core
 # note that there is no actual conversion defined here,
@@ -263,6 +348,11 @@ convert(::Type{T}, x::T) where {T} = x
 cconvert(::Type{T}, x) where {T} = convert(T, x)
 unsafe_convert(::Type{T}, x::T) where {T} = x
 
+# will be inserted by the frontend for closures
+_typeof_captured_variable(@nospecialize t) = (@_total_meta; t isa Type && has_free_typevars(t) ? typeof(t) : Typeof(t))
+
+has_free_typevars(@nospecialize t) = (@_total_meta; ccall(:jl_has_free_typevars, Int32, (Any,), t) === Int32(1))
+
 # dispatch token indicating a kwarg (keyword sorter) call
 function kwcall end
 # deprecated internal functions:
@@ -275,16 +365,11 @@ kwftype(@nospecialize(t)) = typeof(kwcall)
 Union{}(a...) = throw(ArgumentError("cannot construct a value of type Union{} for return result"))
 kwcall(kwargs, ::Type{Union{}}, a...) = Union{}(a...)
 
-Expr(@nospecialize args...) = _expr(args...)
-
 abstract type Exception end
 struct ErrorException <: Exception
     msg::AbstractString
 end
 
-macro inline()   Expr(:meta, :inline)   end
-macro noinline() Expr(:meta, :noinline) end
-
 struct BoundsError <: Exception
     a::Any
     i::Any
@@ -300,10 +385,17 @@ struct StackOverflowError  <: Exception end
 struct UndefRefError       <: Exception end
 struct UndefVarError <: Exception
     var::Symbol
+    world::UInt
+    scope # a Module or Symbol or other object describing the context where this variable was looked for (e.g. Main or :local or :static_parameter)
+    UndefVarError(var::Symbol) = new(var, ccall(:jl_get_tls_world_age, UInt, ()))
+    UndefVarError(var::Symbol, @nospecialize scope) = new(var, ccall(:jl_get_tls_world_age, UInt, ()), scope)
 end
 struct ConcurrencyViolationError <: Exception
     msg::AbstractString
 end
+struct MissingCodeError <: Exception
+    mi::MethodInstance
+end
 struct InterruptException <: Exception end
 struct DomainError <: Exception
     val
@@ -318,7 +410,7 @@ struct TypeError <: Exception
     # `context` optionally adds extra detail, e.g. the name of the type parameter
     # that got a bad value.
     func::Symbol
-    context::Union{AbstractString,Symbol}
+    context::Union{AbstractString,GlobalRef,Symbol}
     expected::Type
     got
     TypeError(func, context, @nospecialize(expected::Type), @nospecialize(got)) =
@@ -328,9 +420,8 @@ TypeError(where, @nospecialize(expected::Type), @nospecialize(got)) =
     TypeError(Symbol(where), "", expected, got)
 struct InexactError <: Exception
     func::Symbol
-    T  # Type
-    val
-    InexactError(f::Symbol, @nospecialize(T), @nospecialize(val)) = (@noinline; new(f, T, val))
+    args
+    InexactError(f::Symbol, @nospecialize(args...)) = (@noinline; new(f, args))
 end
 struct OverflowError <: Exception
     msg::AbstractString
@@ -343,13 +434,15 @@ struct UndefKeywordError <: Exception
     var::Symbol
 end
 
+const typemax_UInt = Intrinsics.sext_int(UInt, 0xFF)
+const typemax_Int = Core.Intrinsics.udiv_int(Core.Intrinsics.sext_int(Int, 0xFF), 2)
+
 struct MethodError <: Exception
     f
     args
     world::UInt
     MethodError(@nospecialize(f), @nospecialize(args), world::UInt) = new(f, args, world)
 end
-const typemax_UInt = ccall(:jl_typemax_uint, Any, (Any,), UInt)
 MethodError(@nospecialize(f), @nospecialize(args)) = MethodError(f, args, typemax_UInt)
 
 struct AssertionError <: Exception
@@ -357,6 +450,11 @@ struct AssertionError <: Exception
 end
 AssertionError() = AssertionError("")
 
+struct FieldError <: Exception
+    type::DataType
+    field::Symbol
+end
+
 abstract type WrappedException <: Exception end
 
 struct LoadError <: WrappedException
@@ -370,6 +468,15 @@ struct InitError <: WrappedException
     error
 end
 
+struct ABIOverride
+    abi::Type
+    def::MethodInstance
+    ABIOverride(@nospecialize(abi::Type), def::MethodInstance) = new(abi, def)
+end
+
+struct PrecompilableError <: Exception end
+struct TrimFailure <: Exception end
+
 String(s::String) = s  # no constructor yet
 
 const Cvoid = Nothing
@@ -378,9 +485,13 @@ Nothing() = nothing
 # This should always be inlined
 getptls() = ccall(:jl_get_ptls_states, Ptr{Cvoid}, ())
 
-include(m::Module, fname::String) = ccall(:jl_load_, Any, (Any, Any), m, fname)
+include(m::Module, fname::String) = (@noinline; ccall(:jl_load_, Any, (Any, Any), m, fname))
+eval(m::Module, @nospecialize(e)) = (@noinline; ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e))
 
-eval(m::Module, @nospecialize(e)) = ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e)
+struct EvalInto <: Function
+    m::Module
+end
+(this::EvalInto)(@nospecialize(e)) = eval(this.m, e)
 
 mutable struct Box
     contents::Any
@@ -405,6 +516,30 @@ struct VecElement{T}
 end
 VecElement(arg::T) where {T} = VecElement{T}(arg)
 
+# inference lattice element types (moved from jltypes.c)
+struct Const
+    val
+end
+
+struct PartialStruct
+    typ
+    undefs
+    fields::Array{Any, 1}
+end
+
+struct InterConditional
+    slot::Int
+    thentype
+    elsetype
+end
+
+struct PartialOpaque
+    typ::Type
+    env
+    parent::MethodInstance
+    source
+end
+
 eval(Core, quote
     GotoNode(label::Int) = $(Expr(:new, :GotoNode, :label))
     NewvarNode(slot::SlotNumber) = $(Expr(:new, :NewvarNode, :slot))
@@ -414,13 +549,17 @@ eval(Core, quote
     ReturnNode(@nospecialize val) = $(Expr(:new, :ReturnNode, :val))
     ReturnNode() = $(Expr(:new, :ReturnNode)) # unassigned val indicates unreachable
     GotoIfNot(@nospecialize(cond), dest::Int) = $(Expr(:new, :GotoIfNot, :cond, :dest))
+    EnterNode(dest::Int) = $(Expr(:new, :EnterNode, :dest))
+    EnterNode(dest::Int, @nospecialize(scope)) = $(Expr(:new, :EnterNode, :dest, :scope))
     LineNumberNode(l::Int) = $(Expr(:new, :LineNumberNode, :l, nothing))
     function LineNumberNode(l::Int, @nospecialize(f))
         isa(f, String) && (f = Symbol(f))
         return $(Expr(:new, :LineNumberNode, :l, :f))
     end
-    LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) =
-        $(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))
+    DebugInfo(def::Union{Method,MethodInstance,Symbol}, linetable::Union{Nothing,DebugInfo}, edges::SimpleVector, codelocs::String) =
+        $(Expr(:new, :DebugInfo, :def, :linetable, :edges, :codelocs))
+    DebugInfo(def::Union{Method,MethodInstance,Symbol}) =
+        $(Expr(:new, :DebugInfo, :def, nothing, Core.svec(), ""))
     SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))
     PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))
     PiNode(@nospecialize(val), @nospecialize(typ)) = $(Expr(:new, :PiNode, :val, :typ))
@@ -428,23 +567,33 @@ eval(Core, quote
     UpsilonNode(@nospecialize(val)) = $(Expr(:new, :UpsilonNode, :val))
     UpsilonNode() = $(Expr(:new, :UpsilonNode))
     Const(@nospecialize(v)) = $(Expr(:new, :Const, :v))
-    # NOTE the main constructor is defined within `Core.Compiler`
-    _PartialStruct(@nospecialize(typ), fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :fields))
+    _PartialStruct(@nospecialize(typ), undef, fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :undef, :fields))
     PartialOpaque(@nospecialize(typ), @nospecialize(env), parent::MethodInstance, source) = $(Expr(:new, :PartialOpaque, :typ, :env, :parent, :source))
     InterConditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype)) = $(Expr(:new, :InterConditional, :slot, :thentype, :elsetype))
     MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) = $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))
 end)
 
+const NullDebugInfo = DebugInfo(:none)
+
+struct LineInfoNode # legacy support for aiding Serializer.deserialize of old IR
+    mod::Module
+    method
+    file::Symbol
+    line::Int32
+    inlined_at::Int32
+    LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) = new(mod, method, file, line, inlined_at)
+end
+
+
 function CodeInstance(
-    mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
+    mi::Union{MethodInstance, ABIOverride}, owner, @nospecialize(rettype), @nospecialize(exctype), @nospecialize(inferred_const),
     @nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
-    ipo_effects::UInt32, effects::UInt32, @nospecialize(argescapes#=::Union{Nothing,Vector{ArgEscapeInfo}}=#),
-    relocatability::UInt8)
+    effects::UInt32, @nospecialize(analysis_results),
+    di::Union{DebugInfo,Nothing}, edges::SimpleVector)
     return ccall(:jl_new_codeinst, Ref{CodeInstance},
-        (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8),
-        mi, rettype, inferred_const, inferred, const_flags, min_world, max_world,
-        ipo_effects, effects, argescapes,
-        relocatability)
+        (Any, Any, Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
+        mi, owner, rettype, exctype, inferred_const, inferred, const_flags, min_world, max_world,
+        effects, analysis_results, di, edges)
 end
 GlobalRef(m::Module, s::Symbol) = ccall(:jl_module_globalref, Ref{GlobalRef}, (Any, Any), m, s)
 Module(name::Symbol=:anonymous, std_imports::Bool=true, default_names::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, default_names)
@@ -453,52 +602,104 @@ function _Task(@nospecialize(f), reserved_stack::Int, completion_future)
     return ccall(:jl_new_task, Ref{Task}, (Any, Any, Int), f, completion_future, reserved_stack)
 end
 
-_is_internal(__module__) = __module__ === Core
-# can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
-macro _foldable_meta()
-    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
-        #=:consistent=#true,
-        #=:effect_free=#true,
-        #=:nothrow=#false,
-        #=:terminates_globally=#true,
-        #=:terminates_locally=#false,
-        #=:notaskstate=#false,
-        #=:inaccessiblememonly=#false))
-end
-
 const NTuple{N,T} = Tuple{Vararg{T,N}}
 
 ## primitive Array constructors
 struct UndefInitializer end
 const undef = UndefInitializer()
+
+# type and dimensionality specified
+(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} = memorynew(self, m)
+(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, d::NTuple{1,Int}) where {T,kind,addrspace} = self(undef, getfield(d,1))
+# empty vector constructor
+(self::Type{GenericMemory{kind,T,addrspace}})() where {T,kind,addrspace} = self(undef, 0)
+
+# memoryref is simply convenience wrapper function around memoryrefnew
+memoryref(mem::GenericMemory) = memoryrefnew(mem)
+memoryref(mem::GenericMemory, i::Integer) = memoryrefnew(mem, Int(i), @_boundscheck)
+memoryref(ref::GenericMemoryRef, i::Integer) = memoryrefnew(ref, Int(i), @_boundscheck)
+GenericMemoryRef(mem::GenericMemory) = memoryref(mem)
+GenericMemoryRef(mem::GenericMemory, i::Integer) = memoryref(mem, i)
+GenericMemoryRef(mem::GenericMemoryRef, i::Integer) = memoryref(mem, i)
+
+const AtomicMemory{T} = GenericMemory{:atomic, T, CPU}
+const AtomicMemoryRef{T} = GenericMemoryRef{:atomic, T, CPU}
+
+# construction helpers for Array
+new_as_memoryref(self::Type{GenericMemoryRef{kind,T,addrspace}}, m::Int) where {T,kind,addrspace} = memoryref(fieldtype(self, :mem)(undef, m))
+
+# checked-multiply intrinsic function for dimensions
+_checked_mul_dims() = 1, false
+_checked_mul_dims(m::Int) = m, Intrinsics.ule_int(typemax_Int, m) # equivalently: (m + 1) < 1
+function _checked_mul_dims(m::Int, n::Int)
+    b = Intrinsics.checked_smul_int(m, n)
+    a = getfield(b, 1)
+    ovflw = getfield(b, 2)
+    ovflw = Intrinsics.or_int(ovflw, Intrinsics.ule_int(typemax_Int, m))
+    ovflw = Intrinsics.or_int(ovflw, Intrinsics.ule_int(typemax_Int, n))
+    return a, ovflw
+end
+function _checked_mul_dims(m::Int, d::Int...)
+    @_foldable_meta # the compiler needs to know this loop terminates
+    a = m
+    i = 1
+    ovflw = false
+    neg = Intrinsics.ule_int(typemax_Int, m)
+    zero = false # if m==0 we won't have overflow since we go left to right
+    while Intrinsics.sle_int(i, nfields(d))
+        di = getfield(d, i)
+        b = Intrinsics.checked_smul_int(a, di)
+        zero = Intrinsics.or_int(zero, di === 0)
+        ovflw = Intrinsics.or_int(ovflw, getfield(b, 2))
+        neg = Intrinsics.or_int(neg, Intrinsics.ule_int(typemax_Int, di))
+        a = getfield(b, 1)
+        i = Intrinsics.add_int(i, 1)
+   end
+   return a, Intrinsics.or_int(neg, Intrinsics.and_int(ovflw, Intrinsics.not_int(zero)))
+end
+
+# convert a set of dims to a length, with overflow checking
+checked_dims() = 1
+checked_dims(m::Int) = m # defer this check to Memory constructor instead
+function checked_dims(d::Int...)
+    b = _checked_mul_dims(d...)
+    getfield(b, 2) && throw(ArgumentError("invalid Array dimensions"))
+    return getfield(b, 1)
+end
+
 # type and dimensionality specified, accepting dims as series of Ints
-Array{T,1}(::UndefInitializer, m::Int) where {T} =
-    ccall(:jl_alloc_array_1d, Array{T,1}, (Any, Int), Array{T,1}, m)
-Array{T,2}(::UndefInitializer, m::Int, n::Int) where {T} =
-    ccall(:jl_alloc_array_2d, Array{T,2}, (Any, Int, Int), Array{T,2}, m, n)
-Array{T,3}(::UndefInitializer, m::Int, n::Int, o::Int) where {T} =
-    ccall(:jl_alloc_array_3d, Array{T,3}, (Any, Int, Int, Int), Array{T,3}, m, n, o)
-Array{T,N}(::UndefInitializer, d::Vararg{Int,N}) where {T,N} =
-    ccall(:jl_new_array, Array{T,N}, (Any, Any), Array{T,N}, d)
+eval(Core, :(function (self::Type{Array{T,1}})(::UndefInitializer, m::Int) where {T}
+    mem = fieldtype(fieldtype(self, :ref), :mem)(undef, m)
+    return $(Expr(:new, :self, :(memoryref(mem)), :((m,))))
+end))
+eval(Core, :(function (self::Type{Array{T,2}})(::UndefInitializer, m::Int, n::Int) where {T}
+    return $(Expr(:new, :self, :(new_as_memoryref(fieldtype(self, :ref), checked_dims(m, n))), :((m, n))))
+end))
+eval(Core, :(function (self::Type{Array{T,3}})(::UndefInitializer, m::Int, n::Int, o::Int) where {T}
+    return $(Expr(:new, :self, :(new_as_memoryref(fieldtype(self, :ref), checked_dims(m, n, o))), :((m, n, o))))
+end))
+eval(Core, :(function (self::Type{Array{T, N}})(::UndefInitializer, d::Vararg{Int, N}) where {T, N}
+    return $(Expr(:new, :self, :(new_as_memoryref(fieldtype(self, :ref), checked_dims(d...))), :d))
+end))
 # type and dimensionality specified, accepting dims as tuples of Ints
-Array{T,1}(::UndefInitializer, d::NTuple{1,Int}) where {T} = Array{T,1}(undef, getfield(d,1))
-Array{T,2}(::UndefInitializer, d::NTuple{2,Int}) where {T} = Array{T,2}(undef, getfield(d,1), getfield(d,2))
-Array{T,3}(::UndefInitializer, d::NTuple{3,Int}) where {T} = Array{T,3}(undef, getfield(d,1), getfield(d,2), getfield(d,3))
-Array{T,N}(::UndefInitializer, d::NTuple{N,Int}) where {T,N} = ccall(:jl_new_array, Array{T,N}, (Any, Any), Array{T,N}, d)
+(self::Type{Array{T,1}})(::UndefInitializer, d::NTuple{1, Int}) where {T} = self(undef, getfield(d, 1))
+(self::Type{Array{T,2}})(::UndefInitializer, d::NTuple{2, Int}) where {T} = self(undef, getfield(d, 1), getfield(d, 2))
+(self::Type{Array{T,3}})(::UndefInitializer, d::NTuple{3, Int}) where {T} = self(undef, getfield(d, 1), getfield(d, 2), getfield(d, 3))
+(self::Type{Array{T,N}})(::UndefInitializer, d::NTuple{N, Int}) where {T, N} = self(undef, d...)
 # type but not dimensionality specified
-Array{T}(::UndefInitializer, m::Int) where {T} = Array{T,1}(undef, m)
-Array{T}(::UndefInitializer, m::Int, n::Int) where {T} = Array{T,2}(undef, m, n)
-Array{T}(::UndefInitializer, m::Int, n::Int, o::Int) where {T} = Array{T,3}(undef, m, n, o)
-Array{T}(::UndefInitializer, d::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, d)
+Array{T}(::UndefInitializer, m::Int) where {T} = Array{T, 1}(undef, m)
+Array{T}(::UndefInitializer, m::Int, n::Int) where {T} = Array{T, 2}(undef, m, n)
+Array{T}(::UndefInitializer, m::Int, n::Int, o::Int) where {T} = Array{T, 3}(undef, m, n, o)
+Array{T}(::UndefInitializer, d::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, d)
 # empty vector constructor
-Array{T,1}() where {T} = Array{T,1}(undef, 0)
+(self::Type{Array{T, 1}})() where {T} = self(undef, 0)
 
-(Array{T,N} where T)(x::AbstractArray{S,N}) where {S,N} = Array{S,N}(x)
+(Array{T, N} where T)(x::AbstractArray{S, N}) where {S, N} = Array{S, N}(x)
 
-Array(A::AbstractArray{T,N})    where {T,N}   = Array{T,N}(A)
-Array{T}(A::AbstractArray{S,N}) where {T,N,S} = Array{T,N}(A)
+Array(A::AbstractArray{T, N})    where {T, N}   = Array{T, N}(A)
+Array{T}(A::AbstractArray{S, N}) where {T, N, S} = Array{T, N}(A)
 
-AbstractArray{T}(A::AbstractArray{S,N}) where {T,S,N} = AbstractArray{T,N}(A)
+AbstractArray{T}(A::AbstractArray{S, N}) where {T, S, N} = AbstractArray{T, N}(A)
 
 # primitive Symbol constructors
 
@@ -513,24 +714,74 @@ function Symbol(s::String)
     @noinline
     return _Symbol(ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s), sizeof(s), s)
 end
-function Symbol(a::Array{UInt8,1})
+function Symbol(a::Array{UInt8, 1})
     @noinline
-    return _Symbol(ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), a), Intrinsics.arraylen(a), a)
+    return _Symbol(bitcast(Ptr{UInt8}, a.ref.ptr_or_offset), getfield(a.size, 1), a.ref.mem)
 end
 Symbol(s::Symbol) = s
 
+# Minimal implementations of using/import for bootstrapping (supports only
+# `import .M: a, b, c, ...`, little error checking)
+let
+    fail() = throw(ArgumentError("unsupported import/using while bootstrapping"))
+    length(a::Array{T, 1}) where {T} = getfield(getfield(a, :size), 1)
+    function getindex(A::Array, i::Int)
+        Intrinsics.ult_int(Intrinsics.bitcast(UInt, Intrinsics.sub_int(i, 1)), Intrinsics.bitcast(UInt, length(A))) || fail()
+        memoryrefget(memoryrefnew(getfield(A, :ref), i, false), :not_atomic, false)
+    end
+    x == y = Intrinsics.eq_int(x, y)
+    x + y = Intrinsics.add_int(x, y)
+    x <= y = Intrinsics.sle_int(x, y)
+
+    global function _eval_import(explicit::Bool, to::Module, from::Union{Expr, Nothing}, paths::Expr...)
+        from isa Expr || fail()
+        if length(from.args) == 2 && getindex(from.args, 1) === :.
+            from = getglobal(to, getindex(from.args, 2))
+        elseif length(from.args) == 1 && getindex(from.args, 1) === :Core
+            from = Core
+        elseif length(from.args) == 1 && getindex(from.args, 1) === :Base
+            from = Main.Base
+        else
+            fail()
+        end
+        from isa Module || fail()
+        i = 1
+        while i <= nfields(paths)
+            a = getfield(paths, i).args
+            length(a) == 1 || fail()
+            s = getindex(a, 1)
+            Core._import(to, from, s, s, explicit)
+            i += 1
+        end
+    end
+
+    global function _eval_using(to::Module, path::Expr)
+        getindex(path.args, 1) === :. || fail()
+        from = getglobal(to, getindex(path.args, 2))
+        i = 3
+        while i <= length(path.args)
+            from = getfield(from, getindex(path.args, i))
+            i += 1
+        end
+        from isa Module || fail()
+        Core._using(to, from)
+    end
+end
+
 # module providing the IR object model
+# excluding types already exported by Core (GlobalRef, QuoteNode, Expr, LineNumberNode)
+# any type beyond these is self-quoting (see also Base.isa_ast_node)
 module IR
 
 export CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
     NewvarNode, SSAValue, SlotNumber, Argument,
-    PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct, InterConditional
+    PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
+    Const, PartialStruct, InterConditional, EnterNode
 
-import Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
+using Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
     NewvarNode, SSAValue, SlotNumber, Argument,
-    PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct, InterConditional
+    PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
+    Const, PartialStruct, InterConditional, EnterNode
 
 end # module IR
 
@@ -543,8 +794,18 @@ end
 macro __doc__(x)
     return Expr(:escape, Expr(:block, Expr(:meta, :doc), x))
 end
-atdoc     = (source, mod, str, expr) -> Expr(:escape, expr)
-atdoc!(λ) = global atdoc = λ
+
+isbasicdoc(@nospecialize x) = (isa(x, Expr) && x.head === :.) || isa(x, Union{QuoteNode, Symbol})
+firstarg(arg1, args...) = arg1
+iscallexpr(ex::Expr) = (isa(ex, Expr) && ex.head === :where) ? iscallexpr(firstarg(ex.args...)) : (isa(ex, Expr) && ex.head === :call)
+iscallexpr(ex) = false
+function ignoredoc(source, mod, str, expr)
+    (isbasicdoc(expr) || iscallexpr(expr)) && return Expr(:escape, nothing)
+    Expr(:escape, expr)
+end
+
+global atdoc = ignoredoc
+atdoc!(λ)    = global atdoc = λ
 
 # macros for big integer syntax
 macro int128_str end
@@ -594,26 +855,12 @@ struct GeneratedFunctionStub
     spnames::SimpleVector
 end
 
-# invoke and wrap the results of @generated expression
-function (g::GeneratedFunctionStub)(world::UInt, source::LineNumberNode, @nospecialize args...)
-    # args is (spvals..., argtypes...)
-    body = g.gen(args...)
-    file = source.file
-    file isa Symbol || (file = :none)
-    lam = Expr(:lambda, Expr(:argnames, g.argnames...).args,
-               Expr(:var"scope-block",
-                    Expr(:block,
-                         source,
-                         Expr(:meta, :push_loc, file, :var"@generated body"),
-                         Expr(:return, body),
-                         Expr(:meta, :pop_loc))))
-    spnames = g.spnames
-    if spnames === svec()
-        return lam
-    else
-        return Expr(Symbol("with-static-parameters"), lam, spnames...)
-    end
-end
+# If the generator is a subtype of this trait, inference caches the generated unoptimized
+# code, sacrificing memory space to improve the performance of subsequent inferences.
+# This tradeoff is not appropriate in general cases (e.g., for `GeneratedFunctionStub`s
+# generated from the front end), but it can be justified for generators involving complex
+# code transformations, such as a Cassette-like system.
+abstract type CachedGenerator end
 
 NamedTuple() = NamedTuple{(),Tuple{}}(())
 
@@ -622,15 +869,13 @@ eval(Core, :(NamedTuple{names}(args::Tuple) where {names} =
 
 using .Intrinsics: sle_int, add_int
 
-eval(Core, :(NamedTuple{names,T}(args::T) where {names, T <: Tuple} =
-             $(Expr(:splatnew, :(NamedTuple{names,T}), :args))))
+eval(Core, :((NT::Type{NamedTuple{names,T}})(args::T) where {names, T <: Tuple} =
+             $(Expr(:splatnew, :NT, :args))))
 
 # constructors for built-in types
 
 import .Intrinsics: eq_int, trunc_int, lshr_int, sub_int, shl_int, bitcast, sext_int, zext_int, and_int
 
-throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = (@noinline; throw(InexactError(f, T, val)))
-
 function is_top_bit_set(x)
     @inline
     eq_int(trunc_int(UInt8, lshr_int(x, sub_int(shl_int(sizeof(x), 3), 1))), trunc_int(UInt8, 1))
@@ -641,9 +886,14 @@ function is_top_bit_set(x::Union{Int8,UInt8})
     eq_int(lshr_int(x, 7), trunc_int(typeof(x), 1))
 end
 
-function check_top_bit(::Type{To}, x) where {To}
+# n.b. This function exists for CUDA to overload to configure error behavior (see #48097)
+throw_inexacterror(func::Symbol, to, val) = throw(InexactError(func, to, val))
+
+function check_sign_bit(::Type{To}, x) where {To}
     @inline
-    is_top_bit_set(x) && throw_inexacterror(:check_top_bit, To, x)
+    # the top bit is the sign bit of x but "sign bit" sounds better in stacktraces
+    # n.b. if x is signed, then sizeof(x) === sizeof(To), otherwise sizeof(x) >= sizeof(To)
+    is_top_bit_set(x) && throw_inexacterror(sizeof(x) === sizeof(To) ? :convert : :trunc, To, x)
     x
 end
 
@@ -668,11 +918,11 @@ toInt8(x::Int16)      = checked_trunc_sint(Int8, x)
 toInt8(x::Int32)      = checked_trunc_sint(Int8, x)
 toInt8(x::Int64)      = checked_trunc_sint(Int8, x)
 toInt8(x::Int128)     = checked_trunc_sint(Int8, x)
-toInt8(x::UInt8)      = bitcast(Int8, check_top_bit(Int8, x))
-toInt8(x::UInt16)     = checked_trunc_sint(Int8, check_top_bit(Int8, x))
-toInt8(x::UInt32)     = checked_trunc_sint(Int8, check_top_bit(Int8, x))
-toInt8(x::UInt64)     = checked_trunc_sint(Int8, check_top_bit(Int8, x))
-toInt8(x::UInt128)    = checked_trunc_sint(Int8, check_top_bit(Int8, x))
+toInt8(x::UInt8)      = bitcast(Int8, check_sign_bit(Int8, x))
+toInt8(x::UInt16)     = checked_trunc_sint(Int8, check_sign_bit(Int8, x))
+toInt8(x::UInt32)     = checked_trunc_sint(Int8, check_sign_bit(Int8, x))
+toInt8(x::UInt64)     = checked_trunc_sint(Int8, check_sign_bit(Int8, x))
+toInt8(x::UInt128)    = checked_trunc_sint(Int8, check_sign_bit(Int8, x))
 toInt8(x::Bool)       = and_int(bitcast(Int8, x), Int8(1))
 toInt16(x::Int8)      = sext_int(Int16, x)
 toInt16(x::Int16)     = x
@@ -680,10 +930,10 @@ toInt16(x::Int32)     = checked_trunc_sint(Int16, x)
 toInt16(x::Int64)     = checked_trunc_sint(Int16, x)
 toInt16(x::Int128)    = checked_trunc_sint(Int16, x)
 toInt16(x::UInt8)     = zext_int(Int16, x)
-toInt16(x::UInt16)    = bitcast(Int16, check_top_bit(Int16, x))
-toInt16(x::UInt32)    = checked_trunc_sint(Int16, check_top_bit(Int16, x))
-toInt16(x::UInt64)    = checked_trunc_sint(Int16, check_top_bit(Int16, x))
-toInt16(x::UInt128)   = checked_trunc_sint(Int16, check_top_bit(Int16, x))
+toInt16(x::UInt16)    = bitcast(Int16, check_sign_bit(Int16, x))
+toInt16(x::UInt32)    = checked_trunc_sint(Int16, check_sign_bit(Int16, x))
+toInt16(x::UInt64)    = checked_trunc_sint(Int16, check_sign_bit(Int16, x))
+toInt16(x::UInt128)   = checked_trunc_sint(Int16, check_sign_bit(Int16, x))
 toInt16(x::Bool)      = and_int(zext_int(Int16, x), Int16(1))
 toInt32(x::Int8)      = sext_int(Int32, x)
 toInt32(x::Int16)     = sext_int(Int32, x)
@@ -692,9 +942,9 @@ toInt32(x::Int64)     = checked_trunc_sint(Int32, x)
 toInt32(x::Int128)    = checked_trunc_sint(Int32, x)
 toInt32(x::UInt8)     = zext_int(Int32, x)
 toInt32(x::UInt16)    = zext_int(Int32, x)
-toInt32(x::UInt32)    = bitcast(Int32, check_top_bit(Int32, x))
-toInt32(x::UInt64)    = checked_trunc_sint(Int32, check_top_bit(Int32, x))
-toInt32(x::UInt128)   = checked_trunc_sint(Int32, check_top_bit(Int32, x))
+toInt32(x::UInt32)    = bitcast(Int32, check_sign_bit(Int32, x))
+toInt32(x::UInt64)    = checked_trunc_sint(Int32, check_sign_bit(Int32, x))
+toInt32(x::UInt128)   = checked_trunc_sint(Int32, check_sign_bit(Int32, x))
 toInt32(x::Bool)      = and_int(zext_int(Int32, x), Int32(1))
 toInt64(x::Int8)      = sext_int(Int64, x)
 toInt64(x::Int16)     = sext_int(Int64, x)
@@ -704,8 +954,8 @@ toInt64(x::Int128)    = checked_trunc_sint(Int64, x)
 toInt64(x::UInt8)     = zext_int(Int64, x)
 toInt64(x::UInt16)    = zext_int(Int64, x)
 toInt64(x::UInt32)    = zext_int(Int64, x)
-toInt64(x::UInt64)    = bitcast(Int64, check_top_bit(Int64, x))
-toInt64(x::UInt128)   = checked_trunc_sint(Int64, check_top_bit(Int64, x))
+toInt64(x::UInt64)    = bitcast(Int64, check_sign_bit(Int64, x))
+toInt64(x::UInt128)   = checked_trunc_sint(Int64, check_sign_bit(Int64, x))
 toInt64(x::Bool)      = and_int(zext_int(Int64, x), Int64(1))
 toInt128(x::Int8)     = sext_int(Int128, x)
 toInt128(x::Int16)    = sext_int(Int128, x)
@@ -716,9 +966,9 @@ toInt128(x::UInt8)    = zext_int(Int128, x)
 toInt128(x::UInt16)   = zext_int(Int128, x)
 toInt128(x::UInt32)   = zext_int(Int128, x)
 toInt128(x::UInt64)   = zext_int(Int128, x)
-toInt128(x::UInt128)  = bitcast(Int128, check_top_bit(Int128, x))
+toInt128(x::UInt128)  = bitcast(Int128, check_sign_bit(Int128, x))
 toInt128(x::Bool)     = and_int(zext_int(Int128, x), Int128(1))
-toUInt8(x::Int8)      = bitcast(UInt8, check_top_bit(UInt8, x))
+toUInt8(x::Int8)      = bitcast(UInt8, check_sign_bit(UInt8, x))
 toUInt8(x::Int16)     = checked_trunc_uint(UInt8, x)
 toUInt8(x::Int32)     = checked_trunc_uint(UInt8, x)
 toUInt8(x::Int64)     = checked_trunc_uint(UInt8, x)
@@ -729,8 +979,8 @@ toUInt8(x::UInt32)    = checked_trunc_uint(UInt8, x)
 toUInt8(x::UInt64)    = checked_trunc_uint(UInt8, x)
 toUInt8(x::UInt128)   = checked_trunc_uint(UInt8, x)
 toUInt8(x::Bool)      = and_int(bitcast(UInt8, x), UInt8(1))
-toUInt16(x::Int8)     = sext_int(UInt16, check_top_bit(UInt16, x))
-toUInt16(x::Int16)    = bitcast(UInt16, check_top_bit(UInt16, x))
+toUInt16(x::Int8)     = sext_int(UInt16, check_sign_bit(UInt16, x))
+toUInt16(x::Int16)    = bitcast(UInt16, check_sign_bit(UInt16, x))
 toUInt16(x::Int32)    = checked_trunc_uint(UInt16, x)
 toUInt16(x::Int64)    = checked_trunc_uint(UInt16, x)
 toUInt16(x::Int128)   = checked_trunc_uint(UInt16, x)
@@ -740,9 +990,9 @@ toUInt16(x::UInt32)   = checked_trunc_uint(UInt16, x)
 toUInt16(x::UInt64)   = checked_trunc_uint(UInt16, x)
 toUInt16(x::UInt128)  = checked_trunc_uint(UInt16, x)
 toUInt16(x::Bool)     = and_int(zext_int(UInt16, x), UInt16(1))
-toUInt32(x::Int8)     = sext_int(UInt32, check_top_bit(UInt32, x))
-toUInt32(x::Int16)    = sext_int(UInt32, check_top_bit(UInt32, x))
-toUInt32(x::Int32)    = bitcast(UInt32, check_top_bit(UInt32, x))
+toUInt32(x::Int8)     = sext_int(UInt32, check_sign_bit(UInt32, x))
+toUInt32(x::Int16)    = sext_int(UInt32, check_sign_bit(UInt32, x))
+toUInt32(x::Int32)    = bitcast(UInt32, check_sign_bit(UInt32, x))
 toUInt32(x::Int64)    = checked_trunc_uint(UInt32, x)
 toUInt32(x::Int128)   = checked_trunc_uint(UInt32, x)
 toUInt32(x::UInt8)    = zext_int(UInt32, x)
@@ -751,10 +1001,10 @@ toUInt32(x::UInt32)   = x
 toUInt32(x::UInt64)   = checked_trunc_uint(UInt32, x)
 toUInt32(x::UInt128)  = checked_trunc_uint(UInt32, x)
 toUInt32(x::Bool)     = and_int(zext_int(UInt32, x), UInt32(1))
-toUInt64(x::Int8)     = sext_int(UInt64, check_top_bit(UInt64, x))
-toUInt64(x::Int16)    = sext_int(UInt64, check_top_bit(UInt64, x))
-toUInt64(x::Int32)    = sext_int(UInt64, check_top_bit(UInt64, x))
-toUInt64(x::Int64)    = bitcast(UInt64, check_top_bit(UInt64, x))
+toUInt64(x::Int8)     = sext_int(UInt64, check_sign_bit(UInt64, x))
+toUInt64(x::Int16)    = sext_int(UInt64, check_sign_bit(UInt64, x))
+toUInt64(x::Int32)    = sext_int(UInt64, check_sign_bit(UInt64, x))
+toUInt64(x::Int64)    = bitcast(UInt64, check_sign_bit(UInt64, x))
 toUInt64(x::Int128)   = checked_trunc_uint(UInt64, x)
 toUInt64(x::UInt8)    = zext_int(UInt64, x)
 toUInt64(x::UInt16)   = zext_int(UInt64, x)
@@ -762,11 +1012,11 @@ toUInt64(x::UInt32)   = zext_int(UInt64, x)
 toUInt64(x::UInt64)   = x
 toUInt64(x::UInt128)  = checked_trunc_uint(UInt64, x)
 toUInt64(x::Bool)     = and_int(zext_int(UInt64, x), UInt64(1))
-toUInt128(x::Int8)    = sext_int(UInt128, check_top_bit(UInt128, x))
-toUInt128(x::Int16)   = sext_int(UInt128, check_top_bit(UInt128, x))
-toUInt128(x::Int32)   = sext_int(UInt128, check_top_bit(UInt128, x))
-toUInt128(x::Int64)   = sext_int(UInt128, check_top_bit(UInt128, x))
-toUInt128(x::Int128)  = bitcast(UInt128, check_top_bit(UInt128, x))
+toUInt128(x::Int8)    = sext_int(UInt128, check_sign_bit(UInt128, x))
+toUInt128(x::Int16)   = sext_int(UInt128, check_sign_bit(UInt128, x))
+toUInt128(x::Int32)   = sext_int(UInt128, check_sign_bit(UInt128, x))
+toUInt128(x::Int64)   = sext_int(UInt128, check_sign_bit(UInt128, x))
+toUInt128(x::Int128)  = bitcast(UInt128, check_sign_bit(UInt128, x))
 toUInt128(x::UInt8)   = zext_int(UInt128, x)
 toUInt128(x::UInt16)  = zext_int(UInt128, x)
 toUInt128(x::UInt32)  = zext_int(UInt128, x)
@@ -795,8 +1045,8 @@ if Int === Int32
 Int64(x::Ptr) = Int64(UInt32(x))
 UInt64(x::Ptr) = UInt64(UInt32(x))
 end
-Ptr{T}(x::Union{Int,UInt,Ptr}) where {T} = bitcast(Ptr{T}, x)
-Ptr{T}() where {T} = Ptr{T}(0)
+(PT::Type{Ptr{T}} where T)(x::Union{Int,UInt,Ptr}=0) = bitcast(PT, x)
+(AS::Type{AddrSpace{Backend}} where Backend)(x::UInt8) = bitcast(AS, x)
 
 Signed(x::UInt8)    = Int8(x)
 Unsigned(x::Int8)   = UInt8(x)
@@ -815,8 +1065,17 @@ Unsigned(x::Union{Float16, Float32, Float64, Bool}) = UInt(x)
 Integer(x::Integer) = x
 Integer(x::Union{Float16, Float32, Float64}) = Int(x)
 
-# Binding for the julia parser, called as
-#
+# During definition of struct type `B`, if an `A.B` expression refers to
+# the eventual global name of the struct, then return the partially-initialized
+# type object.
+# TODO: remove. This is a shim for backwards compatibility.
+function struct_name_shim(@nospecialize(x), name::Symbol, mod::Module, @nospecialize(t))
+    return x === mod ? t : getfield(x, name)
+end
+
+# Bindings for the julia frontend.  The internal jl_parse and jl_lower will call
+# Core._parse and Core._lower respectively (if they are not `nothing`.)
+
 #    Core._parse(text, filename, lineno, offset, options)
 #
 # Parse Julia code from the buffer `text`, starting at `offset` and attributing
@@ -826,14 +1085,23 @@ Integer(x::Union{Float16, Float32, Float64}) = Int(x)
 #
 # `_parse` must return an `svec` containing an `Expr` and the new offset as an
 # `Int`.
-#
-# The internal jl_parse will call into Core._parse if not `nothing`.
 _parse = nothing
 
+#    Core._lower(code, module, filename="none", linenum=0, world=0xfff..., warn=false)
+#
+# Lower `code` (usually Expr), returning `svec(e::Any xs::Any...)` where `e` is
+# the lowered code, and `xs` is possible additional information from
+# JuliaLowering (TBD).
+_lower = nothing
+
 _setparser!(parser) = setglobal!(Core, :_parse, parser)
+_setlowerer!(lowerer) = setglobal!(Core, :_lower, lowerer)
 
-# support for deprecated uses of internal _apply function
-_apply(x...) = Core._apply_iterate(Main.Base.iterate, x...)
+# support for deprecated uses of builtin functions
+_apply(x...) = _apply_iterate(Main.Base.iterate, x...)
+const _apply_pure = _apply
+const _call_latest = invokelatest
+const _call_in_world = invoke_in_world
 
 struct Pair{A, B}
     first::A
@@ -850,8 +1118,57 @@ struct Pair{A, B}
 end
 
 function _hasmethod(@nospecialize(tt)) # this function has a special tfunc
-    world = ccall(:jl_get_tls_world_age, UInt, ())
+    world = ccall(:jl_get_tls_world_age, UInt, ()) # tls_world_age()
     return Intrinsics.not_int(ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tt, nothing, world) === nothing)
 end
 
+# for backward compat
+arrayref(inbounds::Bool, A::Array, i::Int...) = Main.Base.getindex(A, i...)
+const_arrayref(inbounds::Bool, A::Array, i::Int...) = Main.Base.getindex(A, i...)
+arrayset(inbounds::Bool, A::Array{T}, x::Any, i::Int...) where {T} = Main.Base.setindex!(A, x::T, i...)
+arraysize(a::Array) = a.size
+arraysize(a::Array, i::Int) = sle_int(i, nfields(a.size)) ? getfield(a.size, i) : 1
+const check_top_bit = check_sign_bit
+
+# For convenience
+EnterNode(old::EnterNode, new_dest::Int) = isdefined(old, :scope) ?
+    EnterNode(new_dest, old.scope) : EnterNode(new_dest)
+
+# typename(_).constprop_heuristic
+const FORCE_CONST_PROP      = 0x1
+const ARRAY_INDEX_HEURISTIC = 0x2
+const ITERATE_HEURISTIC     = 0x3
+const SAMETYPE_HEURISTIC    = 0x4
+
+# `typename` has special tfunc support in inference to improve
+# the result for `Type{Union{...}}`. It is defined here, so that the Compiler
+# can look it up by value.
+struct TypeNameError <: Exception
+    a
+    TypeNameError(@nospecialize(a)) = new(a)
+end
+
+typename(a) = throw(TypeNameError(a))
+typename(a::DataType) = a.name
+function typename(a::Union)
+    ta = typename(a.a)
+    tb = typename(a.b)
+    ta === tb || throw(TypeNameError(a))
+    return tb
+end
+typename(union::UnionAll) = typename(union.body)
+
+# Special inference support to avoid execess specialization of these methods.
+# TODO: Replace this by a generic heuristic.
+(>:)(@nospecialize(a), @nospecialize(b)) = (b <: a)
+(!==)(@nospecialize(a), @nospecialize(b)) = Intrinsics.not_int(a === b)
+
+include(Core, "optimized_generics.jl")
+
+# Used only be the magic @VERSION macro
+struct MacroSource
+    lno::Any # ::LineNumberNode, but needs to be a pointer
+    syntax_ver::Any # ::VersionNumber =#
+end
+
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index 1e057789509ed..bbdb296515b66 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -196,14 +196,18 @@ const andand = AndAnd()
 broadcasted(::AndAnd, a, b) = broadcasted((a, b) -> a && b, a, b)
 function broadcasted(::AndAnd, a, bc::Broadcasted)
     bcf = flatten(bc)
-    broadcasted((a, args...) -> a && bcf.f(args...), a, bcf.args...)
+    # Vararg type signature to specialize on args count. This is necessary for performance
+    # and innexpensive because this should only ever get called with 1+N = length(bc.args)
+    broadcasted(((a, args::Vararg{Any, N}) where {N}) -> a && bcf.f(args...), a, bcf.args...)
 end
 struct OrOr end
 const oror = OrOr()
 broadcasted(::OrOr, a, b) = broadcasted((a, b) -> a || b, a, b)
 function broadcasted(::OrOr, a, bc::Broadcasted)
     bcf = flatten(bc)
-    broadcasted((a, args...) -> a || bcf.f(args...), a, bcf.args...)
+    # Vararg type signature to specialize on args count. This is necessary for performance
+    # and innexpensive because this should only ever get called with 1+N = length(bc.args)
+    broadcasted(((a, args::Vararg{Any, N}) where {N}) -> a || bcf.f(args...), a, bcf.args...)
 end
 
 Base.convert(::Type{Broadcasted{NewStyle}}, bc::Broadcasted{<:Any,Axes,F,Args}) where {NewStyle,Axes,F,Args} =
@@ -222,12 +226,12 @@ end
 ## Allocating the output container
 Base.similar(bc::Broadcasted, ::Type{T}) where {T} = similar(bc, T, axes(bc))
 Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}, dims) where {N,ElType} =
-    similar(Array{ElType}, dims)
+    similar(Array{ElType, length(dims)}, dims)
 Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{Bool}, dims) where N =
     similar(BitArray, dims)
 # In cases of conflict we fall back on Array
 Base.similar(::Broadcasted{ArrayConflict}, ::Type{ElType}, dims) where ElType =
-    similar(Array{ElType}, dims)
+    similar(Array{ElType, length(dims)}, dims)
 Base.similar(::Broadcasted{ArrayConflict}, ::Type{Bool}, dims) =
     similar(BitArray, dims)
 
@@ -246,9 +250,11 @@ BroadcastStyle(::Type{<:Broadcasted{S}}) where {S<:Union{Nothing,Unknown}} =
 argtype(::Type{BC}) where {BC<:Broadcasted} = fieldtype(BC, :args)
 argtype(bc::Broadcasted) = argtype(typeof(bc))
 
-@inline Base.eachindex(bc::Broadcasted) = _eachindex(axes(bc))
-_eachindex(t::Tuple{Any}) = t[1]
-_eachindex(t::Tuple) = CartesianIndices(t)
+@inline Base.eachindex(bc::Broadcasted) = eachindex(IndexStyle(bc), bc)
+@inline Base.eachindex(s::IndexStyle, bc::Broadcasted) = _eachindex(s, axes(bc))
+_eachindex(::IndexCartesian, t::Tuple) = CartesianIndices(t)
+_eachindex(s::IndexLinear, t::Tuple) = eachindex(s, LinearIndices(t))
+_eachindex(::IndexLinear, t::Tuple{Any}) = t[1]
 
 Base.IndexStyle(bc::Broadcasted) = IndexStyle(typeof(bc))
 Base.IndexStyle(::Type{<:Broadcasted{<:Any,<:Tuple{Any}}}) = IndexLinear()
@@ -258,6 +264,17 @@ Base.LinearIndices(bc::Broadcasted{<:Any,<:Tuple{Any}}) = LinearIndices(axes(bc)
 
 Base.ndims(bc::Broadcasted) = ndims(typeof(bc))
 Base.ndims(::Type{<:Broadcasted{<:Any,<:NTuple{N,Any}}}) where {N} = N
+Base.ndims(BC::Type{<:Broadcasted{<:Any,Nothing}}) = _maxndims(argtype(BC))
+function Base.ndims(BC::Type{<:Broadcasted{<:AbstractArrayStyle{N},Nothing}}) where {N}
+    N isa Int ? N : _maxndims(argtype(BC))
+end
+_maxndims(::Type{Tuple{}}) = 0
+_maxndims(::Type{Tuple{T}}) where {T} = T <: Tuple ? 1 : Int(ndims(T))::Int
+function _maxndims(Args::Type{<:Tuple{T,Vararg}}) where {T}
+    m = T <: Tuple ? 1 : Int(ndims(T))::Int
+    n = _maxndims(Base.tuple_type_tail(Args))
+    max(m, n)
+end
 
 Base.size(bc::Broadcasted) = map(length, axes(bc))
 Base.length(bc::Broadcasted) = prod(size(bc))
@@ -274,19 +291,6 @@ Base.@propagate_inbounds function Base.iterate(bc::Broadcasted, s)
 end
 
 Base.IteratorSize(::Type{T}) where {T<:Broadcasted} = Base.HasShape{ndims(T)}()
-Base.ndims(BC::Type{<:Broadcasted{<:Any,Nothing}}) = _maxndims(fieldtype(BC, :args))
-Base.ndims(::Type{<:Broadcasted{<:AbstractArrayStyle{N},Nothing}}) where {N<:Integer} = N
-
-_maxndims(T::Type{<:Tuple}) = reduce(max, (ntuple(n -> _ndims(fieldtype(T, n)), Base._counttuple(T))))
-_maxndims(::Type{<:Tuple{T}}) where {T} = ndims(T)
-_maxndims(::Type{<:Tuple{T}}) where {T<:Tuple} = _ndims(T)
-function _maxndims(::Type{<:Tuple{T, S}}) where {T, S}
-    return T<:Tuple || S<:Tuple ? max(_ndims(T), _ndims(S)) : max(ndims(T), ndims(S))
-end
-
-_ndims(x) = ndims(x)
-_ndims(::Type{<:Tuple}) = 1
-
 Base.IteratorEltype(::Type{<:Broadcasted}) = Base.EltypeUnknown()
 
 ## Instantiation fills in the "missing" fields in Broadcasted.
@@ -341,20 +345,17 @@ function flatten(bc::Broadcasted)
     isflat(bc) && return bc
     # concatenate the nested arguments into {a, b, c, d}
     args = cat_nested(bc)
-    # build a function `makeargs` that takes a "flat" argument list and
-    # and creates the appropriate input arguments for `f`, e.g.,
-    #          makeargs = (w, x, y, z) -> (w, g(x, y), z)
-    #
-    # `makeargs` is built recursively and looks a bit like this:
-    #     makeargs(w, x, y, z) = (w, makeargs1(x, y, z)...)
-    #                          = (w, g(x, y), makeargs2(z)...)
-    #                          = (w, g(x, y), z)
-    let makeargs = make_makeargs(()->(), bc.args), f = bc.f
-        newf = @inline function(args::Vararg{Any,N}) where N
-            f(makeargs(args...)...)
-        end
-        return Broadcasted(bc.style, newf, args, bc.axes)
-    end
+    # build a tuple of functions `makeargs`. Its elements take
+    # the whole "flat" argument list and generate the appropriate
+    # input arguments for the broadcasted function `f`, e.g.,
+    #          makeargs[1] = ((w, x, y, z)) -> w
+    #          makeargs[2] = ((w, x, y, z)) -> g(x, y)
+    #          makeargs[3] = ((w, x, y, z)) -> z
+    makeargs = make_makeargs(bc.args)
+    f = Base.maybeconstructor(bc.f)
+    # TODO: consider specializing on args... if performance problems emerge:
+    newf = (args...) -> (@inline; f(prepare_args(makeargs, args)...))
+    return Broadcasted(bc.style, newf, args, bc.axes)
 end
 
 const NestedTuple = Tuple{<:Broadcasted,Vararg{Any}}
@@ -363,85 +364,54 @@ _isflat(args::NestedTuple) = false
 _isflat(args::Tuple) = _isflat(tail(args))
 _isflat(args::Tuple{}) = true
 
-cat_nested(t::Broadcasted, rest...) = (cat_nested(t.args...)..., cat_nested(rest...)...)
-cat_nested(t::Any, rest...) = (t, cat_nested(rest...)...)
-cat_nested() = ()
+cat_nested(bc::Broadcasted) = cat_nested_args(bc.args)
+cat_nested_args(::Tuple{}) = ()
+cat_nested_args(t::Tuple{Any}) = cat_nested(t[1])
+cat_nested_args(t::Tuple) = (cat_nested(t[1])..., cat_nested_args(tail(t))...)
+cat_nested(a) = (a,)
 
 """
-    make_makeargs(makeargs_tail::Function, t::Tuple) -> Function
+    make_makeargs(t::Tuple)::Tuple{Vararg{Function}}
 
 Each element of `t` is one (consecutive) node in a broadcast tree.
-Ignoring `makeargs_tail` for the moment, the job of `make_makeargs` is
-to return a function that takes in flattened argument list and returns a
-tuple (each entry corresponding to an entry in `t`, having evaluated
-the corresponding element in the broadcast tree). As an additional
-complication, the passed in tuple may be longer than the number of leaves
-in the subtree described by `t`. The `makeargs_tail` function should
-be called on such additional arguments (but not the arguments consumed
-by `t`).
+The returned `Tuple` are functions which take in the (whole) flattened
+list and generate the inputs for the corresponding broadcasted function.
 """
-@inline make_makeargs(makeargs_tail, t::Tuple{}) = makeargs_tail
-@inline function make_makeargs(makeargs_tail, t::Tuple)
-    makeargs = make_makeargs(makeargs_tail, tail(t))
-    (head, tail...)->(head, makeargs(tail...)...)
+make_makeargs(args::Tuple) = _make_makeargs(args, 1)[1]
+
+# We build `makeargs` by traversing the broadcast nodes recursively.
+# note: `n` indicates the flattened index of the next unused argument.
+@inline function _make_makeargs(args::Tuple, n::Int)
+    head, n = _make_makeargs1(args[1], n)
+    rest, n = _make_makeargs(tail(args), n)
+    (head, rest...), n
 end
-function make_makeargs(makeargs_tail, t::Tuple{<:Broadcasted, Vararg{Any}})
-    bc = t[1]
-    # c.f. the same expression in the function on leaf nodes above. Here
-    # we recurse into siblings in the broadcast tree.
-    let makeargs_tail = make_makeargs(makeargs_tail, tail(t)),
-            # Here we recurse into children. It would be valid to pass in makeargs_tail
-            # here, and not use it below. However, in that case, our recursion is no
-            # longer purely structural because we're building up one argument (the closure)
-            # while destructuing another.
-            makeargs_head = make_makeargs((args...)->args, bc.args),
-            f = bc.f
-        # Create two functions, one that splits of the first length(bc.args)
-        # elements from the tuple and one that yields the remaining arguments.
-        # N.B. We can't call headargs on `args...` directly because
-        # args is flattened (i.e. our children have not been evaluated
-        # yet).
-        headargs, tailargs = make_headargs(bc.args), make_tailargs(bc.args)
-        return @inline function(args::Vararg{Any,N}) where N
-            args1 = makeargs_head(args...)
-            a, b = headargs(args1...), makeargs_tail(tailargs(args1...)...)
-            (f(a...), b...)
-        end
-    end
-end
-
-@inline function make_headargs(t::Tuple)
-    let headargs = make_headargs(tail(t))
-        return @inline function(head, tail::Vararg{Any,N}) where N
-            (head, headargs(tail...)...)
-        end
-    end
-end
-@inline function make_headargs(::Tuple{})
-    return @inline function(tail::Vararg{Any,N}) where N
-        ()
-    end
+_make_makeargs(::Tuple{}, n::Int) = (), n
+
+# A help struct to store the flattened index statically
+struct Pick{N} <: Function end
+(::Pick{N})(@nospecialize(args::Tuple)) where {N} = args[N]
+
+# For flat nodes, we just consume one argument (n += 1), and return the "Pick" function
+@inline _make_makeargs1(_, n::Int) = Pick{n}(), n + 1
+# For nested nodes, we form the `makeargs1` based on the child `makeargs` (n += length(cat_nested(bc)))
+@inline function _make_makeargs1(bc::Broadcasted, n::Int)
+    makeargs, n = _make_makeargs(bc.args, n)
+    f = Base.maybeconstructor(bc.f)
+    makeargs1 = (args::Tuple) -> (@inline; f(prepare_args(makeargs, args)...))
+    makeargs1, n
 end
 
-@inline function make_tailargs(t::Tuple)
-    let tailargs = make_tailargs(tail(t))
-        return @inline function(head, tail::Vararg{Any,N}) where N
-            tailargs(tail...)
-        end
-    end
-end
-@inline function make_tailargs(::Tuple{})
-    return @inline function(tail::Vararg{Any,N}) where N
-        tail
-    end
-end
+@inline prepare_args(makeargs::Tuple, @nospecialize(x::Tuple)) = (makeargs[1](x), prepare_args(tail(makeargs), x)...)
+@inline prepare_args(makeargs::Tuple{Any}, @nospecialize(x::Tuple)) = (makeargs[1](x),)
+prepare_args(::Tuple{}, ::Tuple) = ()
 
 ## Broadcasting utilities ##
 
 ## logic for deciding the BroadcastStyle
 
 """
-    combine_styles(cs...) -> BroadcastStyle
+    combine_styles(cs...)::BroadcastStyle
 
 Decides which `BroadcastStyle` to use for any number of value arguments.
 Uses [`BroadcastStyle`](@ref) to get the style for each argument, and uses
@@ -458,11 +428,15 @@ function combine_styles end
 
 combine_styles() = DefaultArrayStyle{0}()
 combine_styles(c) = result_style(BroadcastStyle(typeof(c)))
+function combine_styles(bc::Broadcasted)
+    bc.style isa Union{Nothing,Unknown} || return bc.style
+    throw(ArgumentError("Broadcasted{Unknown} wrappers do not have a style assigned"))
+end
 combine_styles(c1, c2) = result_style(combine_styles(c1), combine_styles(c2))
 @inline combine_styles(c1, c2, cs...) = result_style(combine_styles(c1), combine_styles(c2, cs...))
 
 """
-    result_style(s1::BroadcastStyle[, s2::BroadcastStyle]) -> BroadcastStyle
+    result_style(s1::BroadcastStyle[, s2::BroadcastStyle])::BroadcastStyle
 
 Takes one or two `BroadcastStyle`s and combines them using [`BroadcastStyle`](@ref) to
 determine a common `BroadcastStyle`.
@@ -480,7 +454,9 @@ Base.Broadcast.DefaultArrayStyle{1}()
 function result_style end
 
 result_style(s::BroadcastStyle) = s
-result_style(s1::S, s2::S) where S<:BroadcastStyle = S()
+function result_style(s1::S, s2::S) where S<:BroadcastStyle
+    s1 ≡ s2 ? s1 : error("inconsistent broadcast styles, custom rule needed")
+end
 # Test both orders so users typically only have to declare one order
 result_style(s1, s2) = result_join(s1, s2, BroadcastStyle(s1, s2), BroadcastStyle(s2, s1))
 
@@ -496,7 +472,8 @@ result_join(::Any, ::Any, s::BroadcastStyle, ::Unknown) = s
 result_join(::AbstractArrayStyle, ::AbstractArrayStyle, ::Unknown, ::Unknown) =
     ArrayConflict()
 # Fallbacks in case users define `rule` for both argument-orders (not recommended)
-result_join(::Any, ::Any, ::S, ::S) where S<:BroadcastStyle = S()
+result_join(::Any, ::Any, s1::S, s2::S) where S<:BroadcastStyle = result_style(s1, s2)
+
 @noinline function result_join(::S, ::T, ::U, ::V) where {S,T,U,V}
     error("""
 conflicting broadcast rules defined
@@ -508,7 +485,7 @@ end
 # Indices utilities
 
 """
-    combine_axes(As...) -> Tuple
+    combine_axes(As...)::Tuple
 
 Determine the result axes for broadcasting across all values in `As`.
 
@@ -525,7 +502,7 @@ julia> Broadcast.combine_axes(1, 1, 1)
 combine_axes(A) = axes(A)
 
 """
-    broadcast_shape(As...) -> Tuple
+    broadcast_shape(As...)::Tuple
 
 Determine the result axes for broadcasting across all axes (size Tuples) in `As`.
 
@@ -549,10 +526,10 @@ function _bcs(shape::Tuple, newshape::Tuple)
     return (_bcs1(shape[1], newshape[1]), _bcs(tail(shape), tail(newshape))...)
 end
 # _bcs1 handles the logic for a single dimension
-_bcs1(a::Integer, b::Integer) = a == 1 ? b : (b == 1 ? a : (a == b ? a : throw(DimensionMismatch("arrays could not be broadcast to a common size; got a dimension with lengths $a and $b"))))
-_bcs1(a::Integer, b) = a == 1 ? b : (first(b) == 1 && last(b) == a ? b : throw(DimensionMismatch("arrays could not be broadcast to a common size; got a dimension with lengths $a and $(length(b))")))
+_bcs1(a::Integer, b::Integer) = a == 1 ? b : (b == 1 ? a : (a == b ? a : throw(DimensionMismatch(LazyString("arrays could not be broadcast to a common size; got a dimension with lengths ", a, " and ", b)))))
+_bcs1(a::Integer, b) = a == 1 ? b : (first(b) == 1 && last(b) == a ? b : throw(DimensionMismatch(LazyString("arrays could not be broadcast to a common size; got a dimension with lengths ", a, " and ", length(b)))))
 _bcs1(a, b::Integer) = _bcs1(b, a)
-_bcs1(a, b) = _bcsm(b, a) ? axistype(b, a) : (_bcsm(a, b) ? axistype(a, b) : throw(DimensionMismatch("arrays could not be broadcast to a common size; got a dimension with lengths $(length(a)) and $(length(b))")))
+_bcs1(a, b) = _bcsm(b, a) ? axistype(b, a) : _bcsm(a, b) ? axistype(a, b) : throw(DimensionMismatch(LazyString("arrays could not be broadcast to a common size: a has axes ", a, " and b has axes ", b)))
 # _bcsm tests whether the second index is consistent with the first
 _bcsm(a, b) = a == b || length(b) == 1
 _bcsm(a, b::Number) = b == 1
@@ -603,17 +580,19 @@ an `Int`.
     Any remaining indices in `I` beyond the length of the `keep` tuple are truncated. The `keep` and `default`
     tuples may be created by `newindexer(argument)`.
 """
-Base.@propagate_inbounds newindex(arg, I::CartesianIndex) = CartesianIndex(_newindex(axes(arg), I.I))
-Base.@propagate_inbounds newindex(arg, I::Integer) = CartesianIndex(_newindex(axes(arg), (I,)))
-Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(length(ax[1]) == 1, ax[1][1], I[1]), _newindex(tail(ax), tail(I))...)
+Base.@propagate_inbounds newindex(arg, I::CartesianIndex) = to_index(_newindex(axes(arg), I.I))
+Base.@propagate_inbounds newindex(arg, I::Integer) = to_index(_newindex(axes(arg), (I,)))
+Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(length(ax[1]) == 1, ax[1][begin], I[1]), _newindex(tail(ax), tail(I))...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple) = ()
-Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple{}) = (ax[1][1], _newindex(tail(ax), ())...)
+Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple{}) = (ax[1][begin], _newindex(tail(ax), ())...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple{}) = ()
 
 # If dot-broadcasting were already defined, this would be `ifelse.(keep, I, Idefault)`.
-@inline newindex(I::CartesianIndex, keep, Idefault) = CartesianIndex(_newindex(I.I, keep, Idefault))
-@inline newindex(i::Integer, keep::Tuple, idefault) = ifelse(keep[1], i, idefault[1])
-@inline newindex(i::Integer, keep::Tuple{}, idefault) = CartesianIndex(())
+@inline newindex(I::CartesianIndex, keep, Idefault) = to_index(_newindex(I.I, keep, Idefault))
+@inline newindex(I::CartesianIndex{1}, keep, Idefault) = newindex(I.I[1], keep, Idefault)
+@inline newindex(i::Integer, keep::Tuple, idefault) = CartesianIndex(ifelse(keep[1], Int(i), Int(idefault[1])), idefault[2])
+@inline newindex(i::Integer, keep::Tuple{Bool}, idefault) = ifelse(keep[1], i, idefault[1])
+@inline newindex(i::Integer, keep::Tuple{}, idefault) = CartesianIndex()
 @inline _newindex(I, keep, Idefault) =
     (ifelse(keep[1], I[1], Idefault[1]), _newindex(tail(I), tail(keep), tail(Idefault))...)
 @inline _newindex(I, keep::Tuple{}, Idefault) = ()  # truncate if keep is shorter than I
@@ -631,22 +610,32 @@ Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple{}) = ()
     (Base.length(ind1)::Integer != 1, keep...), (first(ind1), Idefault...)
 end
 
-@inline function Base.getindex(bc::Broadcasted, I::Union{Integer,CartesianIndex})
+Base.@propagate_inbounds function Base.getindex(bc::Broadcasted, Is::Vararg{Union{Integer,CartesianIndex},N}) where {N}
+    I = to_index(Base.IteratorsMD.flatten(Is))
+    _getindex(IndexStyle(bc), bc, I)
+end
+@inline function _getindex(::IndexStyle, bc, I)
     @boundscheck checkbounds(bc, I)
     @inbounds _broadcast_getindex(bc, I)
 end
-Base.@propagate_inbounds Base.getindex(
-    bc::Broadcasted,
-    i1::Union{Integer,CartesianIndex},
-    i2::Union{Integer,CartesianIndex},
-    I::Union{Integer,CartesianIndex}...,
-) =
-    bc[CartesianIndex((i1, i2, I...))]
-Base.@propagate_inbounds Base.getindex(bc::Broadcasted) = bc[CartesianIndex(())]
-
-@inline Base.checkbounds(bc::Broadcasted, I::Union{Integer,CartesianIndex}) =
+Base.@propagate_inbounds function _getindex(s::IndexCartesian, bc, I::Integer)
+    C = CartesianIndices(axes(bc))
+    _getindex(s, bc, C[I])
+end
+Base.@propagate_inbounds function _getindex(s::IndexLinear, bc, I::CartesianIndex)
+    L = LinearIndices(axes(bc))
+    _getindex(s, bc, L[I])
+end
+to_index(::Tuple{}) = CartesianIndex()
+to_index(Is::Tuple{Any}) = Is[1]
+to_index(Is::Tuple) = CartesianIndex(Is)
+
+@inline Base.checkbounds(bc::Broadcasted, I::CartesianIndex) =
     Base.checkbounds_indices(Bool, axes(bc), (I,)) || Base.throw_boundserror(bc, (I,))
 
+@inline Base.checkbounds(bc::Broadcasted, I::Integer) =
+    Base.checkindex(Bool, eachindex(IndexLinear(), bc), I) || Base.throw_boundserror(bc, (I,))
+
 
 """
     _broadcast_getindex(A, I)
@@ -750,8 +739,8 @@ _broadcast_getindex_eltype(A) = eltype(A)  # Tuple, Array, etc.
 eltypes(::Tuple{}) = Tuple{}
 eltypes(t::Tuple{Any}) = Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]))
 eltypes(t::Tuple{Any,Any}) = Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), _broadcast_getindex_eltype(t[2]))
-# eltypes(t::Tuple) = (TT = eltypes(tail(t)); TT === Union{} ? Union{} : Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), TT.parameters...))
-eltypes(t::Tuple) = Iterators.TupleOrBottom(ntuple(i -> _broadcast_getindex_eltype(t[i]), Val(length(t)))...)
+eltypes(t::Tuple) = (TT = eltypes(tail(t)); TT === Union{} ? Union{} : Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), TT.parameters...))
+# eltypes(t::Tuple) = Iterators.TupleOrBottom(ntuple(i -> _broadcast_getindex_eltype(t[i]), Val(length(t)))...)
 
 # Inferred eltype of result of broadcast(f, args...)
 function combine_eltypes(f, args::Tuple)
@@ -782,6 +771,7 @@ The resulting container type is established by the following rules:
  - All other combinations of arguments default to returning an `Array`, but
    custom container types can define their own implementation and promotion-like
    rules to customize the result when they appear as arguments.
+ - The element type is determined in the same manner as in [`collect`](@ref).
 
 A special syntax exists for broadcasting: `f.(args...)` is equivalent to
 `broadcast(f, args...)`, and nested `f.(g.(args...))` calls are fused into a
@@ -1007,26 +997,41 @@ preprocess(dest, x) = extrude(broadcast_unalias(dest, x))
 end
 
 # Performance optimization: for BitArray outputs, we cache the result
-# in a "small" Vector{Bool}, and then copy in chunks into the output
+# in a 64-bit register before writing into memory (to bypass LSQ)
 @inline function copyto!(dest::BitArray, bc::Broadcasted{Nothing})
     axes(dest) == axes(bc) || throwdm(axes(dest), axes(bc))
     ischunkedbroadcast(dest, bc) && return chunkedcopyto!(dest, bc)
-    length(dest) < 256 && return invoke(copyto!, Tuple{AbstractArray, Broadcasted{Nothing}}, dest, bc)
-    tmp = Vector{Bool}(undef, bitcache_size)
-    destc = dest.chunks
-    cind = 1
+    ndims(dest) == 0 && (dest[] = bc[]; return dest)
     bc′ = preprocess(dest, bc)
-    @inbounds for P in Iterators.partition(eachindex(bc′), bitcache_size)
-        ind = 1
-        @simd for I in P
-            tmp[ind] = bc′[I]
-            ind += 1
+    ax = axes(bc′)
+    ax1, out = ax[1], CartesianIndices(tail(ax))
+    destc, indc = dest.chunks, 0
+    bitst, remain = 0, UInt64(0)
+    for I in out
+        i = first(ax1) - 1
+        if ndims(bc) == 1 || bitst >= 64 - length(ax1)
+            if ndims(bc) > 1 && bitst != 0
+                @inbounds @simd for j = bitst:63
+                    remain |= UInt64(convert(Bool, bc′[i+=1, I])) << (j & 63)
+                end
+                @inbounds destc[indc+=1] = remain
+                bitst, remain = 0, UInt64(0)
+            end
+            while i <= last(ax1) - 64
+                z = UInt64(0)
+                @inbounds @simd for j = 0:63
+                    z |= UInt64(convert(Bool, bc′[i+=1, I])) << (j & 63)
+                end
+                @inbounds destc[indc+=1] = z
+            end
         end
-        @simd for i in ind:bitcache_size
-            tmp[i] = false
+        @inbounds @simd for j = i+1:last(ax1)
+            remain |= UInt64(convert(Bool, bc′[j, I])) << (bitst & 63)
+            bitst += 1
         end
-        dumpbitcache(destc, cind, tmp)
-        cind += bitcache_chunks
+    end
+    @inbounds if bitst != 0
+        destc[indc+=1] = remain
     end
     return dest
 end
@@ -1078,7 +1083,7 @@ end
 
 
 @noinline throwdm(axdest, axsrc) =
-    throw(DimensionMismatch("destination axes $axdest are not compatible with source axes $axsrc"))
+    throw(DimensionMismatch(LazyString("destination axes ", axdest, " are not compatible with source axes ", axsrc)))
 
 function restart_copyto_nonleaf!(newdest, dest, bc, val, I, iter, state, count)
     # Function barrier that makes the copying to newdest type stable
diff --git a/base/c.jl b/base/c.jl
index 662986501d59d..6e9633ccb2301 100644
--- a/base/c.jl
+++ b/base/c.jl
@@ -2,7 +2,7 @@
 
 # definitions related to C interface
 
-import Core.Intrinsics: cglobal, bitcast
+import .Intrinsics: cglobal
 
 """
     cglobal((symbol, library) [, type=Cvoid])
@@ -91,7 +91,7 @@ Equivalent to the native `char` c-type.
 Cchar
 
 # The ccall here is equivalent to Sys.iswindows(), but that's not defined yet
-@static if ccall(:jl_get_UNAME, Any, ()) === :NT
+if ccall(:jl_get_UNAME, Any, ()) === :NT
     const Clong = Int32
     const Culong = UInt32
     const Cwchar_t = UInt16
@@ -122,32 +122,7 @@ Equivalent to the native `wchar_t` c-type ([`Int32`](@ref)).
 """
 Cwchar_t
 
-"""
-    Cwstring
-
-A C-style string composed of the native wide character type
-[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
-C-style strings composed of the native character
-type, see [`Cstring`](@ref). For more information
-about string interoperability with C, see the
-[manual](@ref man-bits-types).
-
-"""
-Cwstring
-
-"""
-    Cstring
-
-A C-style string composed of the native character type
-[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
-C-style strings composed of the native wide character
-type, see [`Cwstring`](@ref). For more information
-about string interoperability with C, see the
-[manual](@ref man-bits-types).
-"""
-Cstring
-
-@static if ccall(:jl_get_UNAME, Any, ()) !== :NT
+if ccall(:jl_get_UNAME, Any, ()) !== :NT
     const sizeof_mode_t = ccall(:jl_sizeof_mode_t, Cint, ())
     if sizeof_mode_t == 2
         const Cmode_t = Int16
@@ -155,292 +130,11 @@ Cstring
         const Cmode_t = Int32
     elseif sizeof_mode_t == 8
         const Cmode_t = Int64
+    else
+        error("invalid sizeof mode_t")
     end
 end
 
-# construction from pointers
-Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
-Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
-Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
-Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
-
-convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
-convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
-convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
-convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
-
-"""
-    pointer(array [, index])
-
-Get the native address of an array or string, optionally at a given location `index`.
-
-This function is "unsafe". Be careful to ensure that a Julia reference to
-`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
-macro should be used to protect the `array` argument from garbage collection
-within a given block of code.
-
-Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
-"""
-function pointer end
-
-pointer(p::Cstring) = convert(Ptr{Cchar}, p)
-pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
-
-# comparisons against pointers (mainly to support `cstr==C_NULL`)
-==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
-==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
-
-unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
-
-# convert strings to String etc. to pass as pointers
-cconvert(::Type{Cstring}, s::String) = s
-cconvert(::Type{Cstring}, s::AbstractString) =
-    cconvert(Cstring, String(s)::String)
-
-function cconvert(::Type{Cwstring}, s::AbstractString)
-    v = transcode(Cwchar_t, String(s))
-    !isempty(v) && v[end] == 0 || push!(v, 0)
-    return v
-end
-
-eltype(::Type{Cstring}) = Cchar
-eltype(::Type{Cwstring}) = Cwchar_t
-
-containsnul(p::Ptr, len) =
-    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
-containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
-containsnul(s::AbstractString) = '\0' in s
-
-function unsafe_convert(::Type{Cstring}, s::Union{String,AbstractVector{UInt8}})
-    p = unsafe_convert(Ptr{Cchar}, s)
-    containsnul(p, sizeof(s)) &&
-        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
-    return Cstring(p)
-end
-
-function unsafe_convert(::Type{Cwstring}, v::Vector{Cwchar_t})
-    for i = 1:length(v)-1
-        v[i] == 0 &&
-            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
-    end
-    v[end] == 0 ||
-        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
-    p = unsafe_convert(Ptr{Cwchar_t}, v)
-    return Cwstring(p)
-end
-
-# symbols are guaranteed not to contain embedded NUL
-cconvert(::Type{Cstring}, s::Symbol) = s
-unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
-
-@static if ccall(:jl_get_UNAME, Any, ()) === :NT
-"""
-    Base.cwstring(s)
-
-Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
-functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
-conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
-same argument.
-
-This is only available on Windows.
-"""
-function cwstring(s::AbstractString)
-    bytes = codeunits(String(s))
-    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
-    return push!(transcode(UInt16, bytes), 0)
-end
-end
-
-# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
-# and also UTF-32 for APIs using Cwchar_t on other platforms.
-
-"""
-    transcode(T, src)
-
-Convert string data between Unicode encodings. `src` is either a
-`String` or a `Vector{UIntXX}` of UTF-XX code units, where
-`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
-`String` to return a (UTF-8 encoded) `String` or `UIntXX`
-to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
-can also be used as the integer type, for converting `wchar_t*` strings
-used by external C libraries.)
-
-The `transcode` function succeeds as long as the input data can be
-reasonably represented in the target encoding; it always succeeds for
-conversions between UTF-XX encodings, even for invalid Unicode data.
-
-Only conversion to/from UTF-8 is currently supported.
-
-# Examples
-```jldoctest
-julia> str = "αβγ"
-"αβγ"
-
-julia> transcode(UInt16, str)
-3-element Vector{UInt16}:
- 0x03b1
- 0x03b2
- 0x03b3
-
-julia> transcode(String, transcode(UInt16, str))
-"αβγ"
-```
-"""
-function transcode end
-
-transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
-transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
-transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
-    transcode(T, String(Vector(src)))
-transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
-    transcode(T, String(src))
-
-function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
-    buf = IOBuffer()
-    for c in src
-        print(buf, Char(c))
-    end
-    take!(buf)
-end
-transcode(::Type{String}, src::String) = src
-transcode(T, src::String) = transcode(T, codeunits(src))
-transcode(::Type{String}, src) = String(transcode(UInt8, src))
-
-function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
-    require_one_based_indexing(src)
-    dst = UInt16[]
-    i, n = 1, length(src)
-    n > 0 || return dst
-    sizehint!(dst, 2n)
-    a = src[1]
-    while true
-        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
-            b = src[i += 1]
-            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
-                # invalid UTF-8 (non-continuation or too-high code point)
-                push!(dst, a)
-                a = b; continue
-            elseif a < 0xe0 # 2-byte UTF-8
-                push!(dst, xor(0x3080, UInt16(a) << 6, b))
-            elseif i < n # 3/4-byte character
-                c = src[i += 1]
-                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
-                    push!(dst, a, b)
-                    a = c; continue
-                elseif a < 0xf0 # 3-byte UTF-8
-                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
-                elseif i < n
-                    d = src[i += 1]
-                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
-                        push!(dst, a, b, c)
-                        a = d; continue
-                    elseif a == 0xf0 && b < 0x90 # overlong encoding
-                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
-                    else # 4-byte UTF-8
-                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
-                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
-                    end
-                else # too short
-                    push!(dst, a, b, c)
-                    break
-                end
-            else # too short
-                push!(dst, a, b)
-                break
-            end
-        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
-            push!(dst, a)
-        end
-        i < n || break
-        a = src[i += 1]
-    end
-    return dst
-end
-
-function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
-    require_one_based_indexing(src)
-    n = length(src)
-    n == 0 && return UInt8[]
-
-    # Precompute m = sizeof(dst).   This involves annoying duplication
-    # of the loop over the src array.   However, this is not just an
-    # optimization: it is problematic for security reasons to grow
-    # dst dynamically, because Base.winprompt uses this function to
-    # convert passwords to UTF-8 and we don't want to make unintentional
-    # copies of the password data.
-    a = src[1]
-    i, m = 1, 0
-    while true
-        if a < 0x80
-            m += 1
-        elseif a < 0x800 # 2-byte UTF-8
-            m += 2
-        elseif a & 0xfc00 == 0xd800 && i < length(src)
-            b = src[i += 1]
-            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
-                m += 4
-            else
-                m += 3
-                a = b; continue
-            end
-        else
-            # 1-unit high UTF-16 or unpaired high surrogate
-            # either way, encode as 3-byte UTF-8 code point
-            m += 3
-        end
-        i < n || break
-        a = src[i += 1]
-    end
-
-    dst = StringVector(m)
-    a = src[1]
-    i, j = 1, 0
-    while true
-        if a < 0x80 # ASCII
-            dst[j += 1] = a % UInt8
-        elseif a < 0x800 # 2-byte UTF-8
-            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
-            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
-        elseif a & 0xfc00 == 0xd800 && i < n
-            b = src[i += 1]
-            if (b & 0xfc00) == 0xdc00
-                # 2-unit UTF-16 sequence => 4-byte UTF-8
-                a += 0x2840
-                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
-                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
-                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
-                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
-            else
-                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
-                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
-                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
-                a = b; continue
-            end
-        else
-            # 1-unit high UTF-16 or unpaired high surrogate
-            # either way, encode as 3-byte UTF-8 code point
-            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
-            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
-            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
-        end
-        i < n || break
-        a = src[i += 1]
-    end
-    return dst
-end
-
-function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
-    transcode(String, unsafe_wrap(Array, p, length; own=false))
-end
-function unsafe_string(cw::Cwstring)
-    p = convert(Ptr{Cwchar_t}, cw)
-    n = 1
-    while unsafe_load(p, n) != 0
-        n += 1
-    end
-    return unsafe_string(p, n - 1)
-end
-
 # deferring (or un-deferring) ctrl-c handler for external C code that
 # is not interrupt safe (see also issue #2622).  The sigatomic_begin/end
 # functions should always be called in matched pairs, ideally via:
@@ -509,11 +203,11 @@ function exit_on_sigint(on::Bool)
     ccall(:jl_exit_on_sigint, Cvoid, (Cint,), on)
 end
 
-function _ccallable(rt::Type, sigt::Type)
-    ccall(:jl_extern_c, Cvoid, (Any, Any), rt, sigt)
+function _ccallable(name::Union{Nothing, String}, rt::Type, sigt::Type)
+    ccall(:jl_extern_c, Cvoid, (Any, Any, Any), name, rt, sigt)
 end
 
-function expand_ccallable(rt, def)
+function expand_ccallable(name, rt, def)
     if isa(def,Expr) && (def.head === :(=) || def.head === :function)
         sig = def.args[1]
         if sig.head === :(::)
@@ -532,16 +226,16 @@ function expand_ccallable(rt, def)
             else
                 f = :(typeof($f))
             end
-            at = map(sig.args[2:end]) do a
-                if isa(a,Expr) && a.head === :(::)
-                    a.args[end]
-                else
-                    :Any
-                end
-            end
+            at = Any[let a = sig.args[i]
+                    if isa(a,Expr) && a.head === :(::)
+                        a.args[end]
+                    else
+                        :Any
+                    end
+                end for i in 2:length(sig.args)]
             return quote
-                $(esc(def))
-                _ccallable($(esc(rt)), $(Expr(:curly, :Tuple, esc(f), map(esc, at)...)))
+                @__doc__ $(esc(def))
+                _ccallable($name, $(esc(rt)), $(Expr(:curly, :Tuple, esc(f), map!(esc, at, at)...)))
             end
         end
     end
@@ -549,16 +243,22 @@ function expand_ccallable(rt, def)
 end
 
 """
-    @ccallable(def)
+    @ccallable ["name"] function f(...)::RetType ... end
 
 Make the annotated function be callable from C using its name. This can, for example,
-be used to expose functionality as a C-API when creating a custom Julia sysimage.
+be used to expose functionality as a C API when creating a custom Julia sysimage.
+
+If the first argument is a string, it is used as the external name of the function.
 """
 macro ccallable(def)
-    expand_ccallable(nothing, def)
+    expand_ccallable(nothing, nothing, def)
 end
 macro ccallable(rt, def)
-    expand_ccallable(rt, def)
+    if rt isa String
+        expand_ccallable(rt, nothing, def)
+    else
+        expand_ccallable(nothing, rt, def)
+    end
 end
 
 # @ccall implementation
@@ -574,26 +274,54 @@ The above input outputs this:
 
     (:printf, :Cvoid, [:Cstring, :Cuint], ["%d", :value])
 """
-function ccall_macro_parse(expr::Expr)
+function ccall_macro_parse(exprs)
+    gc_safe = false
+    expr = nothing
+    if exprs isa Expr
+        expr = exprs
+    elseif length(exprs) == 1
+        expr = exprs[1]
+    elseif length(exprs) == 2
+        gc_expr = exprs[1]
+        expr = exprs[2]
+        if gc_expr.head == :(=) && gc_expr.args[1] == :gc_safe
+            if gc_expr.args[2] == true
+                gc_safe = true
+            elseif gc_expr.args[2] == false
+                gc_safe = false
+            else
+                throw(ArgumentError("gc_safe must be true or false"))
+            end
+        else
+            throw(ArgumentError("@ccall option must be `gc_safe=true` or `gc_safe=false`"))
+        end
+    else
+        throw(ArgumentError("@ccall needs a function signature with a return type"))
+    end
+
     # setup and check for errors
-    if !Meta.isexpr(expr, :(::))
+    if !isexpr(expr, :(::))
         throw(ArgumentError("@ccall needs a function signature with a return type"))
     end
     rettype = expr.args[2]
 
     call = expr.args[1]
-    if !Meta.isexpr(call, :call)
+    if !isexpr(call, :call)
         throw(ArgumentError("@ccall has to take a function call"))
     end
 
     # get the function symbols
     func = let f = call.args[1]
-        if Meta.isexpr(f, :.)
-            :(($(f.args[2]), $(f.args[1])))
-        elseif Meta.isexpr(f, :$)
-            f
+        if isexpr(f, :.)
+            Expr(:tuple, f.args[2], f.args[1])
+        elseif isexpr(f, :$)
+            func = f.args[1]
+            if isa(func, String) || (isa(func, QuoteNode) && !isa(func.value, Ptr)) || isa(func, Tuple) || isexpr(func, :tuple)
+                throw(ArgumentError("interpolated value should be a variable or expression, not a literal name or tuple"))
+            end
+            func
         elseif f isa Symbol
-            QuoteNode(f)
+            Expr(:tuple, QuoteNode(f))
         else
             throw(ArgumentError("@ccall function name must be a symbol, a `.` node (e.g. `libc.printf`) or an interpolated function pointer (with `\$`)"))
         end
@@ -603,7 +331,7 @@ function ccall_macro_parse(expr::Expr)
     varargs = nothing
     argstart = 2
     callargs = call.args
-    if length(callargs) >= 2 && Meta.isexpr(callargs[2], :parameters)
+    if length(callargs) >= 2 && isexpr(callargs[2], :parameters)
         argstart = 3
         varargs = callargs[2].args
     end
@@ -613,7 +341,7 @@ function ccall_macro_parse(expr::Expr)
     types = []
 
     function pusharg!(arg)
-        if !Meta.isexpr(arg, :(::))
+        if !isexpr(arg, :(::))
             throw(ArgumentError("args in @ccall need type annotations. '$arg' doesn't have one."))
         end
         push!(args, arg.args[1])
@@ -625,7 +353,7 @@ function ccall_macro_parse(expr::Expr)
     end
     # add any varargs if necessary
     nreq = 0
-    if !isnothing(varargs)
+    if varargs !== nothing
         if length(args) == 0
             throw(ArgumentError("C ABI prohibits vararg without one required argument"))
         end
@@ -634,33 +362,18 @@ function ccall_macro_parse(expr::Expr)
             pusharg!(a)
         end
     end
-
-    return func, rettype, types, args, nreq
+    return func, rettype, types, args, gc_safe, nreq
 end
 
 
-function ccall_macro_lower(convention, func, rettype, types, args, nreq)
-    statements = []
-
-    # if interpolation was used, ensure the value is a function pointer at runtime.
-    if Meta.isexpr(func, :$)
-        push!(statements, Expr(:(=), :func, esc(func.args[1])))
-        name = QuoteNode(func.args[1])
-        func = :func
-        check = quote
-            if !isa(func, Ptr{Cvoid})
-                name = $name
-                throw(ArgumentError("interpolated function `$name` was not a Ptr{Cvoid}, but $(typeof(func))"))
-            end
-        end
-        push!(statements, check)
+function ccall_macro_lower(convention, func, rettype, types, args, gc_safe, nreq)
+    if convention isa Tuple
+        cconv = Expr(:cconv, (convention..., gc_safe), nreq)
     else
-        func = esc(func)
+        cconv = Expr(:cconv, (convention, UInt16(0), gc_safe), nreq)
     end
-
-    return Expr(:block, statements...,
-                Expr(:call, :ccall, func, Expr(:cconv, convention, nreq), esc(rettype),
-                     Expr(:tuple, map(esc, types)...), map(esc, args)...))
+    return Expr(:call, :ccall, esc(func), cconv, esc(rettype),
+                 Expr(:tuple, map!(esc, types, types)...), map!(esc, args, args)...)
 end
 
 """
@@ -710,11 +423,25 @@ Example using an external library:
 
 The string literal could also be used directly before the function
 name, if desired `"libglib-2.0".g_uri_escape_string(...`
+
+It's possible to declare the ccall as `gc_safe` by using the `gc_safe = true` option:
+
+    @ccall gc_safe=true strlen(s::Cstring)::Csize_t
+
+This allows the garbage collector to run concurrently with the ccall, which can be useful whenever
+the `ccall` may block outside of julia.
+
+!!! warning
+    This option should be used with caution, as it can lead to undefined behavior if the ccall
+    calls back into the julia runtime. (`@cfunction`/`@ccallables` are safe however)
+
+!!! compat "Julia 1.12"
+    The `gc_safe` argument requires Julia 1.12 or higher.
 """
-macro ccall(expr)
-    return ccall_macro_lower(:ccall, ccall_macro_parse(expr)...)
+macro ccall(exprs...)
+    return ccall_macro_lower((:ccall), ccall_macro_parse(exprs)...)
 end
 
-macro ccall_effects(effects::UInt8, expr)
-    return ccall_macro_lower((:ccall, effects), ccall_macro_parse(expr)...)
+macro ccall_effects(effects::UInt16, exprs...)
+    return ccall_macro_lower((:ccall, effects), ccall_macro_parse(exprs)...)
 end
diff --git a/base/cartesian.jl b/base/cartesian.jl
index 5f96a2061880f..9935269c5b1c2 100644
--- a/base/cartesian.jl
+++ b/base/cartesian.jl
@@ -2,7 +2,7 @@
 
 module Cartesian
 
-export @nloops, @nref, @ncall, @nexprs, @nextract, @nall, @nany, @ntuple, @nif
+export @nloops, @nref, @ncall, @ncallkw, @nexprs, @nextract, @nall, @nany, @ntuple, @nif
 
 ### Cartesian-specific macros
 
@@ -36,15 +36,14 @@ If you want just a post-expression, supply [`nothing`](@ref) for the pre-express
 parentheses and semicolons, you can supply multi-statement expressions.
 """
 macro nloops(N, itersym, rangeexpr, args...)
-    _nloops(N, itersym, rangeexpr, args...)
+    _nloops(N, itersym, true, rangeexpr, args...)
 end
 
-function _nloops(N::Int, itersym::Symbol, arraysym::Symbol, args::Expr...)
-    @gensym d
-    _nloops(N, itersym, :($d->Base.axes($arraysym, $d)), args...)
+function _nloops(N::Int, itersym::Symbol, esc_rng::Bool, arraysym::Symbol, args::Expr...)
+    _nloops(N, itersym, false, :(d->axes($(esc(arraysym)), d)), args...)
 end
 
-function _nloops(N::Int, itersym::Symbol, rangeexpr::Expr, args::Expr...)
+function _nloops(N::Int, itersym::Symbol, esc_rng::Bool, rangeexpr::Expr, args::Expr...)
     if rangeexpr.head !== :->
         throw(ArgumentError("second argument must be an anonymous function expression to compute the range"))
     end
@@ -55,14 +54,16 @@ function _nloops(N::Int, itersym::Symbol, rangeexpr::Expr, args::Expr...)
     ex = Expr(:escape, body)
     for dim = 1:N
         itervar = inlineanonymous(itersym, dim)
+        itervar = esc(itervar)
         rng = inlineanonymous(rangeexpr, dim)
-        preexpr = length(args) > 1 ? inlineanonymous(args[1], dim) : (:(nothing))
-        postexpr = length(args) > 2 ? inlineanonymous(args[2], dim) : (:(nothing))
+        esc_rng && (rng = esc(rng))
+        preexpr = length(args) > 1 ? esc(inlineanonymous(args[1], dim)) : nothing
+        postexpr = length(args) > 2 ? esc(inlineanonymous(args[2], dim)) : nothing
         ex = quote
-            for $(esc(itervar)) = $(esc(rng))
-                $(esc(preexpr))
+            for $itervar = $rng
+                $preexpr
                 $ex
-                $(esc(postexpr))
+                $postexpr
             end
         end
     end
@@ -104,10 +105,38 @@ while `@ncall 2 func a b i->c[i]` yields
 macro ncall(N::Int, f, args...)
     pre = args[1:end-1]
     ex = args[end]
-    vars = Any[ inlineanonymous(ex,i) for i = 1:N ]
+    vars = (inlineanonymous(ex, i) for i = 1:N)
     Expr(:escape, Expr(:call, f, pre..., vars...))
 end
 
+"""
+    @ncallkw N f kw sym...
+
+Generate a function call expression with keyword arguments `kw...`. As
+in the case of [`@ncall`](@ref), `sym` represents any number of function arguments, the
+last of which may be an anonymous-function expression and is expanded into `N` arguments.
+
+# Examples
+```jldoctest
+julia> using Base.Cartesian
+
+julia> f(x...; a, b = 1, c = 2, d = 3) = +(x..., a, b, c, d);
+
+julia> x_1, x_2 = (-1, -2); b = 0; kw = (c = 0, d = 0);
+
+julia> @ncallkw 2 f (; a = 0, b, kw...) x
+-3
+
+```
+"""
+macro ncallkw(N::Int, f, kw, args...)
+    pre = args[1:end-1]
+    ex = args[end]
+    vars = (inlineanonymous(ex, i) for i = 1:N)
+    param = Expr(:parameters, Expr(:(...), kw))
+    Expr(:escape, Expr(:call, f, param, pre..., vars...))
+end
+
 """
     @nexprs N expr
 
@@ -262,14 +291,15 @@ struct LReplace{S<:AbstractString}
 end
 LReplace(sym::Symbol, val::Integer) = LReplace(sym, string(sym), val)
 
-lreplace(ex::Expr, sym::Symbol, val) = lreplace!(copy(ex), LReplace(sym, val))
+lreplace(ex::Expr, sym::Symbol, val) = lreplace!(copy(ex), LReplace(sym, val), false, 0)
 
-function lreplace!(sym::Symbol, r::LReplace)
+function lreplace!(sym::Symbol, r::LReplace, in_quote_context::Bool, escs::Int)
+    escs == 0 || return sym
     sym == r.pat_sym && return r.val
-    Symbol(lreplace!(string(sym), r))
+    Symbol(lreplace_string!(string(sym), r))
 end
 
-function lreplace!(str::AbstractString, r::LReplace)
+function lreplace_string!(str::String, r::LReplace)
     i = firstindex(str)
     pat = r.pat_str
     j = firstindex(pat)
@@ -301,7 +331,7 @@ function lreplace!(str::AbstractString, r::LReplace)
         if matching && j > lastindex(pat)
             if i > lastindex(str) || str[i] == '_'
                 # We have a match
-                return string(str[1:prevind(str, istart)], r.val, lreplace!(str[i:end], r))
+                return string(str[1:prevind(str, istart)], r.val, lreplace_string!(str[i:end], r))
             end
             matching = false
             j = firstindex(pat)
@@ -311,24 +341,42 @@ function lreplace!(str::AbstractString, r::LReplace)
     str
 end
 
-function lreplace!(ex::Expr, r::LReplace)
+function lreplace!(ex::Expr, r::LReplace, in_quote_context::Bool, escs::Int)
     # Curly-brace notation, which acts like parentheses
-    if ex.head === :curly && length(ex.args) == 2 && isa(ex.args[1], Symbol) && endswith(string(ex.args[1]::Symbol), "_")
-        excurly = exprresolve(lreplace!(ex.args[2], r))
+    if !in_quote_context && ex.head === :curly && length(ex.args) == 2 && isa(ex.args[1], Symbol) && endswith(string(ex.args[1]::Symbol), "_")
+        excurly = exprresolve(lreplace!(ex.args[2], r, in_quote_context, escs))
         if isa(excurly, Int)
             return Symbol(ex.args[1]::Symbol, excurly)
         else
             ex.args[2] = excurly
             return ex
         end
+    elseif ex.head === :meta || ex.head === :inert
+        return ex
+    elseif ex.head === :$
+        # no longer an executable expression (handle all equivalent forms of :inert, :quote, and QuoteNode the same way)
+        in_quote_context = false
+    elseif ex.head === :quote
+        # executable again
+        in_quote_context = true
+    elseif ex.head === :var"hygienic-scope"
+        # no longer our expression
+        escs += 1
+    elseif ex.head === :escape
+        # our expression again once zero
+        escs == 0 && return ex
+        escs -= 1
+    elseif ex.head === :macrocall
+        # n.b. blithely go about altering arguments to macros also, assuming that is at all what the user intended
+        # it is probably the user's fault if they put a macro inside here and didn't mean for it to get rewritten
     end
     for i in 1:length(ex.args)
-        ex.args[i] = lreplace!(ex.args[i], r)
+        ex.args[i] = lreplace!(ex.args[i], r, in_quote_context, escs)
     end
     ex
 end
 
-lreplace!(arg, r::LReplace) = arg
+lreplace!(@nospecialize(arg), r::LReplace, in_quote_context::Bool, escs::Int) = arg
 
 
 poplinenum(arg) = arg
@@ -347,9 +395,9 @@ end
 
 ## Resolve expressions at parsing time ##
 
-const exprresolve_arith_dict = Dict{Symbol,Function}(:+ => +,
+const exprresolve_arith_dict = IdDict{Symbol,Function}(:+ => +,
     :- => -, :* => *, :/ => /, :^ => ^, :div => div)
-const exprresolve_cond_dict = Dict{Symbol,Function}(:(==) => ==,
+const exprresolve_cond_dict = IdDict{Symbol,Function}(:(==) => ==,
     :(<) => <, :(>) => >, :(<=) => <=, :(>=) => >=)
 
 function exprresolve_arith(ex::Expr)
@@ -374,6 +422,8 @@ function exprresolve_conditional(ex::Expr)
                 return true, exprresolve_cond_dict[callee](ex.args[2], ex.args[3])
             end
         end
+    elseif Meta.isexpr(ex, :block, 2) && ex.args[1] isa LineNumberNode
+        return exprresolve_conditional(ex.args[2])
     end
     false, false
 end
@@ -402,10 +452,16 @@ function exprresolve(ex::Expr)
         return ex.args[1][ex.args[2:end]...]
     end
     # Resolve conditionals
-    if ex.head === :if
+    if ex.head === :if || ex.head === :elseif
         can_eval, tf = exprresolve_conditional(ex.args[1])
         if can_eval
-            ex = tf ? ex.args[2] : ex.args[3]
+            if tf
+                return ex.args[2]
+            elseif length(ex.args) == 3
+                return ex.args[3]
+            else
+                return nothing
+            end
         end
     end
     ex
diff --git a/base/channels.jl b/base/channels.jl
index 1b5b427f92671..bfb393e0265b4 100644
--- a/base/channels.jl
+++ b/base/channels.jl
@@ -13,7 +13,7 @@ popfirst!(c::AbstractChannel) = take!(c)
 """
     Channel{T=Any}(size::Int=0)
 
-Constructs a `Channel` with an internal buffer that can hold a maximum of `size` objects
+Construct a `Channel` with an internal buffer that can hold a maximum of `size` objects
 of type `T`.
 [`put!`](@ref) calls on a full channel block until an object is removed with [`take!`](@ref).
 
@@ -59,9 +59,9 @@ Channel(sz=0) = Channel{Any}(sz)
 
 # special constructors
 """
-    Channel{T=Any}(func::Function, size=0; taskref=nothing, spawn=false)
+    Channel{T=Any}(func::Function, size=0; taskref=nothing, spawn=false, threadpool=nothing)
 
-Create a new task from `func`, bind it to a new channel of type
+Create a new task from `func`, [`bind`](@ref) it to a new channel of type
 `T` and size `size`, and schedule the task, all in a single call.
 The channel is automatically closed when the task terminates.
 
@@ -70,9 +70,14 @@ The channel is automatically closed when the task terminates.
 If you need a reference to the created task, pass a `Ref{Task}` object via
 the keyword argument `taskref`.
 
-If `spawn = true`, the Task created for `func` may be scheduled on another thread
+If `spawn=true`, the `Task` created for `func` may be scheduled on another thread
 in parallel, equivalent to creating a task via [`Threads.@spawn`](@ref).
 
+If `spawn=true` and the `threadpool` argument is not set, it defaults to `:default`.
+
+If the `threadpool` argument is set (to `:default` or `:interactive`), this implies
+that `spawn=true` and the new Task is spawned to the specified threadpool.
+
 Return a `Channel`.
 
 # Examples
@@ -117,24 +122,32 @@ true
     In earlier versions of Julia, Channel used keyword arguments to set `size` and `T`, but
     those constructors are deprecated.
 
+!!! compat "Julia 1.9"
+    The `threadpool=` argument was added in Julia 1.9.
+
 ```jldoctest
 julia> chnl = Channel{Char}(1, spawn=true) do ch
            for c in "hello world"
                put!(ch, c)
            end
-       end
-Channel{Char}(1) (2 items available)
+       end;
 
 julia> String(collect(chnl))
 "hello world"
 ```
 """
-function Channel{T}(func::Function, size=0; taskref=nothing, spawn=false) where T
+function Channel{T}(func::Function, size=0; taskref=nothing, spawn=false, threadpool=nothing) where T
     chnl = Channel{T}(size)
     task = Task(() -> func(chnl))
+    if threadpool === nothing
+        threadpool = :default
+    else
+        spawn = true
+    end
     task.sticky = !spawn
     bind(chnl, task)
     if spawn
+        Threads._spawn_set_thrpool(task, threadpool)
         schedule(task) # start it on (potentially) another thread
     else
         yield(task) # immediately start it, yielding the current thread
@@ -149,17 +162,17 @@ Channel(func::Function, args...; kwargs...) = Channel{Any}(func, args...; kwargs
 # of course not deprecated.)
 # We use `nothing` default values to check which arguments were set in order to throw the
 # deprecation warning if users try to use `spawn=` with `ctype=` or `csize=`.
-function Channel(func::Function; ctype=nothing, csize=nothing, taskref=nothing, spawn=nothing)
+function Channel(func::Function; ctype=nothing, csize=nothing, taskref=nothing, spawn=nothing, threadpool=nothing)
     # The spawn= keyword argument was added in Julia v1.3, and cannot be used with the
     # deprecated keyword arguments `ctype=` or `csize=`.
-    if (ctype !== nothing || csize !== nothing) && spawn !== nothing
-        throw(ArgumentError("Cannot set `spawn=` in the deprecated constructor `Channel(f; ctype=Any, csize=0)`. Please use `Channel{T=Any}(f, size=0; taskref=nothing, spawn=false)` instead!"))
+    if (ctype !== nothing || csize !== nothing) && (spawn !== nothing || threadpool !== nothing)
+        throw(ArgumentError("Cannot set `spawn=` or `threadpool=` in the deprecated constructor `Channel(f; ctype=Any, csize=0)`. Please use `Channel{T=Any}(f, size=0; taskref=nothing, spawn=false, threadpool=nothing)` instead!"))
     end
     # Set the actual default values for the arguments.
     ctype === nothing && (ctype = Any)
     csize === nothing && (csize = 0)
     spawn === nothing && (spawn = false)
-    return Channel{ctype}(func, csize; taskref=taskref, spawn=spawn)
+    return Channel{ctype}(func, csize; taskref=taskref, spawn=spawn, threadpool=threadpool)
 end
 
 closed_exception() = InvalidStateException("Channel is closed.", :closed)
@@ -197,7 +210,69 @@ function close(c::Channel, @nospecialize(excp::Exception))
     end
     nothing
 end
-isopen(c::Channel) = ((@atomic :monotonic c.state) === :open)
+
+"""
+    isopen(c::Channel)
+
+Determine whether a [`Channel`](@ref) is open for new [`put!`](@ref) operations.
+Notice that a `Channel` can be closed and still have buffered elements which can be
+consumed with [`take!`](@ref).
+
+# Examples
+
+## Buffered channel with task
+```jldoctest
+julia> c = Channel(ch -> put!(ch, 1), 1);
+
+julia> isopen(c) # The channel is closed to new `put!`s
+false
+
+julia> isready(c) # The channel is closed but still contains elements
+true
+
+julia> take!(c)
+1
+
+julia> isready(c)
+false
+```
+
+## Unbuffered channel
+```jldoctest
+julia> c = Channel{Int}();
+
+julia> isopen(c)
+true
+
+julia> close(c)
+
+julia> isopen(c)
+false
+```
+"""
+function isopen(c::Channel)
+    # Use acquire here to pair with release store in `close`, so that subsequent `isready` calls
+    # are forced to see `isready == true` if they see `isopen == false`. This means users must
+    # call `isopen` before `isready` if you are using the race-y APIs (or call `iterate`, which
+    # does this right for you).
+    return ((@atomic :acquire c.state) === :open)
+end
+
+"""
+    empty!(c::Channel)
+
+Empty a Channel `c` by calling `empty!` on the internal buffer.
+Return the empty channel.
+"""
+function Base.empty!(c::Channel)
+    @lock c begin
+        ndrop = length(c.data)
+        empty!(c.data)
+        _increment_n_avail(c, -ndrop)
+        notify(c.cond_put)
+    end
+    return c
+end
 
 """
     bind(chnl::Channel, task::Task)
@@ -387,7 +462,7 @@ Note: `fetch` is unsupported on an unbuffered (0-size) `Channel`.
 
 # Examples
 
-Buffered channel:
+## Buffered channel
 ```jldoctest
 julia> c = Channel(3) do ch
            foreach(i -> put!(ch, i), 1:3)
@@ -418,7 +493,6 @@ function fetch_buffered(c::Channel)
 end
 fetch_unbuffered(c::Channel) = throw(ErrorException("`fetch` is not supported on an unbuffered Channel."))
 
-
 """
     take!(c::Channel)
 
@@ -427,7 +501,7 @@ For unbuffered channels, blocks until a [`put!`](@ref) is performed by a differe
 
 # Examples
 
-Buffered channel:
+## Buffered channel
 ```jldoctest
 julia> c = Channel(1);
 
@@ -437,7 +511,7 @@ julia> take!(c)
 1
 ```
 
-Unbuffered channel:
+## Unbuffered channel
 ```jldoctest
 julia> c = Channel(0);
 
@@ -481,14 +555,14 @@ end
 """
     isready(c::Channel)
 
-Determines whether a [`Channel`](@ref) has a value stored in it.
+Determine whether a [`Channel`](@ref) has a value stored in it.
 Returns immediately, does not block.
 
-For unbuffered channels returns `true` if there are tasks waiting on a [`put!`](@ref).
+For unbuffered channels, return `true` if there are tasks waiting on a [`put!`](@ref).
 
 # Examples
 
-Buffered channel:
+## Buffered channel
 ```jldoctest
 julia> c = Channel(1);
 
@@ -501,7 +575,7 @@ julia> isready(c)
 true
 ```
 
-Unbuffered channel:
+## Unbuffered channel
 ```jldoctest
 julia> c = Channel();
 
@@ -515,7 +589,6 @@ julia> schedule(task);  # schedule a put! task
 julia> isready(c)
 true
 ```
-
 """
 isready(c::Channel) = n_avail(c) > 0
 isempty(c::Channel) = n_avail(c) == 0
@@ -524,6 +597,47 @@ function n_avail(c::Channel)
     @atomic :monotonic c.n_avail_items
 end
 
+"""
+    isfull(c::Channel)
+
+Determine if a [`Channel`](@ref) is full, in the sense
+that calling `put!(c, some_value)` would have blocked.
+Returns immediately, does not block.
+
+Note that it may frequently be the case that `put!` will
+not block after this returns `true`. Users must take
+precautions not to accidentally create live-lock bugs
+in their code by calling this method, as these are
+generally harder to debug than deadlocks. It is also
+possible that `put!` will block after this call
+returns `false`, if there are multiple producer
+tasks calling `put!` in parallel.
+
+# Examples
+
+## Buffered channel
+```jldoctest
+julia> c = Channel(1); # capacity = 1
+
+julia> isfull(c)
+false
+
+julia> put!(c, 1);
+
+julia> isfull(c)
+true
+```
+
+## Unbuffered channel
+```jldoctest
+julia> c = Channel(); # capacity = 0
+
+julia> isfull(c) # unbuffered channel is always full
+true
+```
+"""
+isfull(c::Channel) = n_avail(c) ≥ c.sz_max
+
 lock(c::Channel) = lock(c.cond_take)
 lock(f, c::Channel) = lock(f, c.cond_take)
 unlock(c::Channel) = unlock(c.cond_take)
@@ -532,7 +646,7 @@ trylock(c::Channel) = trylock(c.cond_take)
 """
     wait(c::Channel)
 
-Blocks until the `Channel` [`isready`](@ref).
+Block until the `Channel` [`isready`](@ref).
 
 ```jldoctest
 julia> c = Channel(1);
@@ -600,6 +714,15 @@ function iterate(c::Channel, state=nothing)
             end
         end
     else
+        # If the channel was closed with an exception, it needs to be thrown
+        if (@atomic :acquire c.state) === :closed
+            e = c.excp
+            if isa(e, InvalidStateException) && e.state === :closed
+                nothing
+            else
+                throw(e)
+            end
+        end
         return nothing
     end
 end
diff --git a/base/char.jl b/base/char.jl
index 08d661c41de56..a75019bf429f5 100644
--- a/base/char.jl
+++ b/base/char.jl
@@ -1,19 +1,25 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+import Core: AbstractChar, Char
+
 """
 The `AbstractChar` type is the supertype of all character implementations
-in Julia. A character represents a Unicode code point, and can be converted
-to an integer via the [`codepoint`](@ref) function in order to obtain the
-numerical value of the code point, or constructed from the same integer.
-These numerical values determine how characters are compared with `<` and `==`,
-for example.  New `T <: AbstractChar` types should define a `codepoint(::T)`
+in Julia. A character normally represents a Unicode codepoint (and can
+also encapsulate other information from an encoded byte sequence as described below),
+and characters can be converted to integer codepoint values via the [`codepoint`](@ref)
+function, or can be constructed from the same integer.  At least for valid,
+properly encoded Unicode characters, these numerical codepoint values
+determine how characters are compared with `<` and `==`, for example.
+New `T <: AbstractChar` types should define a `codepoint(::T)`
 method and a `T(::UInt32)` constructor, at minimum.
 
 A given `AbstractChar` subtype may be capable of representing only a subset
 of Unicode, in which case conversion from an unsupported `UInt32` value
 may throw an error. Conversely, the built-in [`Char`](@ref) type represents
 a *superset* of Unicode (in order to losslessly encode invalid byte streams),
-in which case conversion of a non-Unicode value *to* `UInt32` throws an error.
+in which case conversion of a non-Unicode value *to* `UInt32` throws an error
+(see [`Base.ismalformed`](@ref)), and on the other hand a `Char` can also represent
+a nonstandard "overlong" encoding ([`Base.isoverlong`](@ref)) of a codepoint.
 The [`isvalid`](@ref) function can be used to check which codepoints are
 representable in a given `AbstractChar` type.
 
@@ -45,14 +51,14 @@ represents a valid Unicode character.
 """
 Char
 
-@constprop :aggressive (::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
+@constprop :aggressive (::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x)::UInt32)
 @constprop :aggressive AbstractChar(x::Number) = Char(x)
 @constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(codepoint(x))
 @constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Int32,Int64}} = codepoint(x) % T
 (::Type{T})(x::T) where {T<:AbstractChar} = x
 
 """
-    ncodeunits(c::Char) -> Int
+    ncodeunits(c::Char)::Int
 
 Return the number of code units required to encode a character as UTF-8.
 This is the number of bytes which will be printed if the character is written
@@ -62,16 +68,32 @@ to an output stream, or `ncodeunits(string(c))` but computed efficiently.
     This method requires at least Julia 1.1. In Julia 1.0 consider
     using `ncodeunits(string(c))`.
 """
-ncodeunits(c::Char) = write(devnull, c) # this is surprisingly efficient
+function ncodeunits(c::Char)
+    u = reinterpret(UInt32, c)
+    # We care about how many trailing bytes are all zero
+    # subtract that from the total number of bytes
+    n_nonzero_bytes = sizeof(UInt32) - div(trailing_zeros(u), 0x8)
+    # Take care of '\0', which has an all-zero bitpattern
+    n_nonzero_bytes + iszero(u)
+end
 
 """
-    codepoint(c::AbstractChar) -> Integer
+    codepoint(c::AbstractChar)::Integer
 
 Return the Unicode codepoint (an unsigned integer) corresponding
-to the character `c` (or throw an exception if `c` does not represent
-a valid character). For `Char`, this is a `UInt32` value, but
+to the character `c` (or throw an exception if `c` represents
+a malformed character). For `Char`, this is a `UInt32` value, but
 `AbstractChar` types that represent only a subset of Unicode may
 return a different-sized integer (e.g. `UInt8`).
+
+Should succeed for any non-malformed character, i.e. when
+[`Base.ismalformed(c)`](@ref) returns `false`.   This includes
+invalid Unicode characters (such as unpaired surrogates)
+and overlong encodings.
+
+!!! compat "Julia 1.12"
+    Prior to Julia 1.12, `codepoint(c)` fails for overlong encodings (when
+    [`Base.isoverlong(c)`](@ref) is `true`), and `Base.decode_overlong(c)` was needed.
 """
 function codepoint end
 
@@ -105,22 +127,31 @@ end
 #           not to support malformed or overlong encodings.
 
 """
-    ismalformed(c::AbstractChar) -> Bool
+    ismalformed(c::AbstractChar)::Bool
 
-Return `true` if `c` represents malformed (non-Unicode) data according to the
+Return `true` if `c` represents malformed (non-codepoint / mis-encoded) data according to the
 encoding used by `c`. Defaults to `false` for non-`Char` types.
 
-See also [`show_invalid`](@ref).
+Any *non*-malformed `c` can be mapped to an integer codepoint
+by [`codepoint(c)`](@ref); this includes codepoints that are
+not valid Unicode characters ([`isvalid(c)`](@ref) is `false`).
+For example, well-formed characters can include invalid Unicode
+codepoints like `'\\U110000'`, unpaired surrogates such as `'\\ud800'`,
+and can also include overlong encodings ([`Base.isoverlong`](@ref)).
+Malformed data, in contrast, cannot be decoded to a codepoint
+(`codepoint` will throw an exception).
+
+See also [`Base.show_invalid`](@ref).
 """
 ismalformed(c::AbstractChar) = false
 
 """
-    isoverlong(c::AbstractChar) -> Bool
+    isoverlong(c::AbstractChar)::Bool
 
 Return `true` if `c` represents an overlong UTF-8 sequence. Defaults
 to `false` for non-`Char` types.
 
-See also [`decode_overlong`](@ref) and [`show_invalid`](@ref).
+See also [`Base.show_invalid`](@ref).
 """
 isoverlong(c::AbstractChar) = false
 
@@ -131,7 +162,7 @@ isoverlong(c::AbstractChar) = false
     l1 = leading_ones(u)
     t0 = trailing_zeros(u) & 56
     (l1 == 1) | (8l1 + t0 > 32) |
-    ((((u & 0x00c0c0c0) ⊻ 0x00808080) >> t0 != 0) | is_overlong_enc(u)) &&
+    (((u & 0x00c0c0c0) ⊻ 0x00808080) >> t0 != 0) &&
         throw_invalid_char(c)
     u &= 0xffffffff >> l1
     u >>= t0
@@ -140,23 +171,21 @@ isoverlong(c::AbstractChar) = false
 end
 
 """
-    decode_overlong(c::AbstractChar) -> Integer
+    decode_overlong(c::AbstractChar)::Integer
 
 When [`isoverlong(c)`](@ref) is `true`, `decode_overlong(c)` returns
-the Unicode codepoint value of `c`. `AbstractChar` implementations
-that support overlong encodings should implement `Base.decode_overlong`.
+the Unicode codepoint value of `c`.   Deprecated in favor of
+`codepoint(c)`.
+
+!!! compat "Julia 1.12"
+    In Julia 1.12 or later, `decode_overlong(c)` simply calls
+    `codepoint(c)`, which should now work for overlong encodings.
+    `AbstractChar` implementations that support overlong encodings
+    should implement `Base.decode_overlong` on older releases.
 """
 function decode_overlong end
 
-@constprop :aggressive function decode_overlong(c::Char)
-    u = bitcast(UInt32, c)
-    l1 = leading_ones(u)
-    t0 = trailing_zeros(u) & 56
-    u &= 0xffffffff >> l1
-    u >>= t0
-    ((u & 0x0000007f) >> 0) | ((u & 0x00007f00) >> 2) |
-    ((u & 0x007f0000) >> 4) | ((u & 0x7f000000) >> 6)
-end
+@constprop :aggressive decode_overlong(c::AbstractChar) = codepoint(c)
 
 @constprop :aggressive function Char(u::UInt32)
     u < 0x80 && return bitcast(Char, u << 24)
@@ -196,7 +225,7 @@ size(c::AbstractChar, d::Integer) = d < 1 ? throw(BoundsError()) : 1
 ndims(c::AbstractChar) = 0
 ndims(::Type{<:AbstractChar}) = 0
 length(c::AbstractChar) = 1
-IteratorSize(::Type{Char}) = HasShape{0}()
+IteratorSize(::Type{<:AbstractChar}) = HasShape{0}()
 firstindex(c::AbstractChar) = 1
 lastindex(c::AbstractChar) = 1
 getindex(c::AbstractChar) = c
@@ -213,14 +242,12 @@ in(x::AbstractChar, y::AbstractChar) = x == y
 ==(x::Char, y::Char) = bitcast(UInt32, x) == bitcast(UInt32, y)
 isless(x::Char, y::Char) = bitcast(UInt32, x) < bitcast(UInt32, y)
 hash(x::Char, h::UInt) =
-    hash_uint64(((bitcast(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h))
-
-first_utf8_byte(c::Char) = (bitcast(UInt32, c) >> 24) % UInt8
+    hash_finalizer(((bitcast(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h)) % UInt
 
 # fallbacks:
-isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
-==(x::AbstractChar, y::AbstractChar) = Char(x) == Char(y)
-hash(x::AbstractChar, h::UInt) = hash(Char(x), h)
+isless(x::AbstractChar, y::AbstractChar) = isless(Char(x)::Char, Char(y)::Char)
+==(x::AbstractChar, y::AbstractChar) = Char(x)::Char == Char(y)::Char
+hash(x::AbstractChar, h::UInt) = hash(Char(x)::Char, h)
 widen(::Type{T}) where {T<:AbstractChar} = T
 
 @inline -(x::AbstractChar, y::AbstractChar) = Int(x) - Int(y)
@@ -250,7 +277,7 @@ end
 # (Packages may implement other IO subtypes to specify different encodings.)
 # In contrast, `write(io, c)` outputs a `c` in an encoding determined by typeof(c).
 print(io::IO, c::Char) = (write(io, c); nothing)
-print(io::IO, c::AbstractChar) = print(io, Char(c)) # fallback: convert to output UTF-8
+print(io::IO, c::AbstractChar) = print(io, Char(c)::Char) # fallback: convert to output UTF-8
 
 const hex_chars = UInt8['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                         'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
@@ -270,7 +297,7 @@ function show_invalid(io::IO, c::Char)
 end
 
 """
-    show_invalid(io::IO, c::AbstractChar)
+    Base.show_invalid(io::IO, c::AbstractChar)
 
 Called by `show(io, c)` when [`isoverlong(c)`](@ref) or
 [`ismalformed(c)`](@ref) return `true`.   Subclasses
@@ -323,7 +350,7 @@ function show(io::IO, ::MIME"text/plain", c::T) where {T<:AbstractChar}
         print(io, ": ")
         if isoverlong(c)
             print(io, "[overlong] ")
-            u = decode_overlong(c)
+            u = decode_overlong(c) # backwards compat Julia < 1.12
             c = T(u)
         else
             u = codepoint(c)
diff --git a/base/checked.jl b/base/checked.jl
index d5b4112397e84..39d487cba6e37 100644
--- a/base/checked.jl
+++ b/base/checked.jl
@@ -13,15 +13,16 @@ return both the unchecked results and a boolean value denoting the presence of a
 module Checked
 
 export checked_neg, checked_abs, checked_add, checked_sub, checked_mul,
-       checked_div, checked_rem, checked_fld, checked_mod, checked_cld,
+       checked_div, checked_rem, checked_fld, checked_mod, checked_cld, checked_pow,
        checked_length, add_with_overflow, sub_with_overflow, mul_with_overflow
 
-import Core.Intrinsics:
+import Core: Intrinsics
+import .Intrinsics:
        checked_sadd_int, checked_ssub_int, checked_smul_int, checked_sdiv_int,
        checked_srem_int,
        checked_uadd_int, checked_usub_int, checked_umul_int, checked_udiv_int,
        checked_urem_int
-import ..no_op_err, ..@inline, ..@noinline, ..checked_length
+import Base: no_op_err, @inline, @noinline, checked_length
 
 # define promotion behavior for checked operations
 checked_add(x::Integer, y::Integer) = checked_add(promote(x,y)...)
@@ -358,6 +359,19 @@ The overflow protection may impose a perceptible performance penalty.
 """
 checked_cld(x::T, y::T) where {T<:Integer} = cld(x, y) # Base.cld already checks
 
+"""
+    Base.checked_pow(x, y)
+
+Calculates `^(x,y)`, checking for overflow errors where applicable.
+
+The overflow protection may impose a perceptible performance penalty.
+"""
+checked_pow(x::Integer, y::Integer) = checked_power_by_squaring(x, y)
+
+checked_power_by_squaring(x_, p::Integer) = Base.power_by_squaring(x_, p; mul = checked_mul)
+# For Booleans, the default implementation covers all cases.
+checked_power_by_squaring(x::Bool, p::Integer) = Base.power_by_squaring(x, p)
+
 """
     Base.checked_length(r)
 
diff --git a/base/client.jl b/base/client.jl
index 6e30c9991e45e..97c0232493c3c 100644
--- a/base/client.jl
+++ b/base/client.jl
@@ -4,6 +4,7 @@
 ##             and REPL
 
 have_color = nothing
+have_truecolor = nothing
 const default_color_warn = :yellow
 const default_color_error = :light_red
 const default_color_info = :cyan
@@ -31,16 +32,12 @@ stackframe_lineinfo_color() = repl_color("JULIA_STACKFRAME_LINEINFO_COLOR", :bol
 stackframe_function_color() = repl_color("JULIA_STACKFRAME_FUNCTION_COLOR", :bold)
 
 function repl_cmd(cmd, out)
-    shell = shell_split(get(ENV, "JULIA_SHELL", get(ENV, "SHELL", "/bin/sh")))
-    shell_name = Base.basename(shell[1])
-
     # Immediately expand all arguments, so that typing e.g. ~/bin/foo works.
     cmd.exec .= expanduser.(cmd.exec)
 
     if isempty(cmd.exec)
         throw(ArgumentError("no cmd to execute"))
     elseif cmd.exec[1] == "cd"
-        new_oldpwd = pwd()
         if length(cmd.exec) > 2
             throw(ArgumentError("cd method only takes one argument"))
         elseif length(cmd.exec) == 2
@@ -51,28 +48,30 @@ function repl_cmd(cmd, out)
                 end
                 dir = ENV["OLDPWD"]
             end
-            cd(dir)
         else
-            cd()
+            dir = homedir()
+        end
+        try
+            ENV["OLDPWD"] = pwd()
+        catch ex
+            ex isa IOError || rethrow()
+            # if current dir has been deleted, then pwd() will throw an IOError: pwd(): no such file or directory (ENOENT)
+            delete!(ENV, "OLDPWD")
         end
-        ENV["OLDPWD"] = new_oldpwd
+        cd(dir)
         println(out, pwd())
     else
-        @static if !Sys.iswindows()
-            if shell_name == "fish"
-                shell_escape_cmd = "begin; $(shell_escape_posixly(cmd)); and true; end"
-            else
-                shell_escape_cmd = "($(shell_escape_posixly(cmd))) && true"
-            end
+        if !Sys.iswindows()
+            shell = shell_split(get(ENV, "JULIA_SHELL", get(ENV, "SHELL", "/bin/sh")))
+            shell_escape_cmd = shell_escape_posixly(cmd)
             cmd = `$shell -c $shell_escape_cmd`
         end
         try
             run(ignorestatus(cmd))
         catch
-            # Windows doesn't shell out right now (complex issue), so Julia tries to run the program itself
-            # Julia throws an exception if it can't find the program, but the stack trace isn't useful
+            # Julia throws an exception if it can't find the cmd (which may be the shell itself), but the stack trace isn't useful
             lasterr = current_exceptions()
-            lasterr = ExceptionStack([(exception = e[1], backtrace = [] ) for e in lasterr])
+            lasterr = ExceptionStack(NamedTuple[(exception = e[1], backtrace = [] ) for e in lasterr])
             invokelatest(display_error, lasterr)
         end
     end
@@ -94,17 +93,17 @@ function scrub_repl_backtrace(bt)
     if bt !== nothing && !(bt isa Vector{Any}) # ignore our sentinel value types
         bt = bt isa Vector{StackFrame} ? copy(bt) : stacktrace(bt)
         # remove REPL-related frames from interactive printing
-        eval_ind = findlast(frame -> !frame.from_c && frame.func === :eval, bt)
+        eval_ind = findlast(frame -> !frame.from_c && startswith(String(frame.func), "__repl_entry"), bt)
         eval_ind === nothing || deleteat!(bt, eval_ind:length(bt))
     end
     return bt
 end
 scrub_repl_backtrace(stack::ExceptionStack) =
-    ExceptionStack(Any[(;x.exception, backtrace = scrub_repl_backtrace(x.backtrace)) for x in stack])
+    ExceptionStack(NamedTuple[(;x.exception, backtrace = scrub_repl_backtrace(x.backtrace)) for x in stack])
 
 istrivialerror(stack::ExceptionStack) =
-    length(stack) == 1 && length(stack[1].backtrace) ≤ 1
-    # frame 1 = top level; assumes already went through scrub_repl_backtrace
+    length(stack) == 1 && length(stack[1].backtrace) ≤ 1 && !isa(stack[1].exception, MethodError)
+    # frame 1 = top level; assumes already went through scrub_repl_backtrace; MethodError see #50803
 
 function display_error(io::IO, stack::ExceptionStack)
     printstyled(io, "ERROR: "; bold=true, color=Base.error_color())
@@ -121,6 +120,10 @@ function display_error(io::IO, er, bt)
 end
 display_error(er, bt=nothing) = display_error(stderr, er, bt)
 
+# N.B.: Any functions starting with __repl_entry cut off backtraces when printing in the REPL.
+__repl_entry_client_lower(mod::Module, @nospecialize(ast)) = Meta.lower(mod, ast)
+__repl_entry_client_eval(mod::Module, @nospecialize(ast)) = Core.eval(mod, ast)
+
 function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
     errcount = 0
     lasterr = nothing
@@ -137,8 +140,8 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
                 errcount = 0
                 lasterr = nothing
             else
-                ast = Meta.lower(Main, ast)
-                value = Core.eval(Main, ast)
+                ast = __repl_entry_client_lower(Main, ast)
+                value = __repl_entry_client_eval(Main, ast)
                 setglobal!(Base.MainInclude, :ans, value)
                 if !(value === nothing) && show_value
                     if have_color
@@ -170,8 +173,8 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
     nothing
 end
 
-function _parse_input_line_core(s::String, filename::String)
-    ex = Meta.parseall(s, filename=filename)
+function _parse_input_line_core(s::String, filename::String, mod::Union{Module, Nothing})
+    ex = Meta.parseall(s; filename, mod)
     if ex isa Expr && ex.head === :toplevel
         if isempty(ex.args)
             return nothing
@@ -186,18 +189,18 @@ function _parse_input_line_core(s::String, filename::String)
     return ex
 end
 
-function parse_input_line(s::String; filename::String="none", depwarn=true)
+function parse_input_line(s::String; filename::String="none", depwarn=true, mod::Union{Module, Nothing}=nothing)
     # For now, assume all parser warnings are depwarns
     ex = if depwarn
-        _parse_input_line_core(s, filename)
+        _parse_input_line_core(s, filename, mod)
     else
         with_logger(NullLogger()) do
-            _parse_input_line_core(s, filename)
+            _parse_input_line_core(s, filename, mod)
         end
     end
     return ex
 end
-parse_input_line(s::AbstractString) = parse_input_line(String(s))
+parse_input_line(s::AbstractString; kwargs...) = parse_input_line(String(s); kwargs...)
 
 # detect the reason which caused an :incomplete expression
 # from the error message
@@ -217,42 +220,53 @@ function incomplete_tag(ex::Expr)
         return :none
     elseif isempty(ex.args)
         return :other
-    elseif ex.args[1] isa String
-        return fl_incomplete_tag(ex.args[1])
     else
-        return incomplete_tag(ex.args[1])
+        a = ex.args[1]
+        if a isa String
+            return fl_incomplete_tag(a)::Symbol
+        else
+            return incomplete_tag(a)::Symbol
+        end
     end
 end
 incomplete_tag(exc::Meta.ParseError) = incomplete_tag(exc.detail)
 
 function exec_options(opts)
-    quiet                 = (opts.quiet != 0)
     startup               = (opts.startupfile != 2)
-    history_file          = (opts.historyfile != 0)
-    color_set             = (opts.color != 0) # --color!=auto
-    global have_color     = color_set ? (opts.color == 1) : nothing # --color=on
+    global have_color     = colored_text(opts)
     global is_interactive = (opts.isinteractive != 0)
 
+    # Enable verbose debugging options when requested by other frameworks
+    debug_env_vars = (
+        "RUNNER_DEBUG",   # github actions when UI "debug logging" is enabled
+        "CI_DEBUG_TRACE", # gitlab CI when UI "debug" toggle is enabled
+        "SYSTEM_DEBUG",   # azure pipelines when UI "System diagnostics" is enabled
+    )
+    for v in debug_env_vars
+        if get_bool_env(v, false)
+            Base.TRACE_EVAL = Base.TRACE_EVAL === :full ? :full : :loc # Enable --trace-eval (location only)
+            ENV["JULIA_TEST_VERBOSE"] = "true" # Set JULIA_TEST_VERBOSE for this session
+            break
+        end
+    end
+
     # pre-process command line argument list
     arg_is_program = !isempty(ARGS)
     repl = !arg_is_program
     cmds = unsafe_load_commands(opts.commands)
     for (cmd, arg) in cmds
-        if cmd == 'e'
+        if cmd_suppresses_program(cmd)
             arg_is_program = false
             repl = false
-        elseif cmd == 'E'
-            arg_is_program = false
-            repl = false
-        elseif cmd == 'L'
+        elseif cmd == 'L' || cmd == 'm'
             # nothing
         elseif cmd == 'B' # --bug-report
             # If we're doing a bug report, don't load anything else. We will
             # spawn a child in which to execute these options.
             let InteractiveUtils = load_InteractiveUtils()
-                InteractiveUtils.report_bug(arg)
+                invokelatest(InteractiveUtils.report_bug, arg)
             end
-            return nothing
+            return false
         else
             @warn "Unexpected command -$cmd'$arg'"
         end
@@ -265,11 +279,10 @@ function exec_options(opts)
     distributed_mode = (opts.worker == 1) || (opts.nprocs > 0) || (opts.machine_file != C_NULL)
     if distributed_mode
         let Distributed = require(PkgId(UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
-            Core.eval(Main, :(const Distributed = $Distributed))
-            Core.eval(Main, :(using .Distributed))
+            MainInclude.Distributed = Distributed
+            Core.eval(Main, :(using Base.MainInclude.Distributed))
+            invokelatest(Distributed.process_opts, opts)
         end
-
-        invokelatest(Main.Distributed.process_opts, opts)
     end
 
     interactiveinput = (repl || is_interactive::Bool) && isa(stdin, TTY)
@@ -285,6 +298,12 @@ function exec_options(opts)
         end
     end
 
+    # drop all caches if code coverage is enabled. Do it here not earlier, so julia has a chance
+    # of starting up quickly
+    if Base.JLOptions().code_coverage == 2
+        Base.drop_all_caches()
+    end
+
     # process cmds list
     for (cmd, arg) in cmds
         if cmd == 'e'
@@ -292,6 +311,13 @@ function exec_options(opts)
         elseif cmd == 'E'
             invokelatest(show, Core.eval(Main, parse_input_line(arg)))
             println()
+        elseif cmd == 'm'
+            entrypoint = push!(split(arg, "."), "main")
+            Base.eval(Main, Expr(:import, Expr(:., Symbol.(entrypoint)...)))
+            if !invokelatest(should_use_main_entrypoint)
+                error("`main` in `$arg` not declared as entry point (use `@main` to do so)")
+            end
+            return false
         elseif cmd == 'L'
             # load file immediately on all processors
             if !distributed_mode
@@ -324,15 +350,8 @@ function exec_options(opts)
             end
         end
     end
-    if repl || is_interactive::Bool
-        if interactiveinput
-            banner = (opts.banner != 0) # --banner!=no
-        else
-            banner = (opts.banner == 1) # --banner=yes
-        end
-        run_main_repl(interactiveinput, quiet, banner, history_file, color_set)
-    end
-    nothing
+
+    return repl
 end
 
 function _global_julia_startup_file()
@@ -340,11 +359,13 @@ function _global_julia_startup_file()
     # If it is not found, then continue on to the relative path based on Sys.BINDIR
     BINDIR = Sys.BINDIR
     SYSCONFDIR = Base.SYSCONFDIR
+    p1 = nothing
     if !isempty(SYSCONFDIR)
         p1 = abspath(BINDIR, SYSCONFDIR, "julia", "startup.jl")
         isfile(p1) && return p1
     end
     p2 = abspath(BINDIR, "..", "etc", "julia", "startup.jl")
+    p1 == p2 && return nothing # don't check the same path twice
     isfile(p2) && return p2
     return nothing
 end
@@ -391,86 +412,68 @@ _atreplinit(repl) = invokelatest(__atreplinit, repl)
 
 function load_InteractiveUtils(mod::Module=Main)
     # load interactive-only libraries
-    if !isdefined(mod, :InteractiveUtils)
+    if !isdefined(MainInclude, :InteractiveUtils)
         try
-            let InteractiveUtils = require(PkgId(UUID(0xb77e0a4c_d291_57a0_90e8_8db25a27a240), "InteractiveUtils"))
-                Core.eval(mod, :(const InteractiveUtils = $InteractiveUtils))
-                Core.eval(mod, :(using .InteractiveUtils))
-                return InteractiveUtils
+            # TODO: we have to use require_stdlib here because it is a dependency of REPL, but we would sort of prefer not to
+            let InteractiveUtils = require_stdlib(PkgId(UUID(0xb77e0a4c_d291_57a0_90e8_8db25a27a240), "InteractiveUtils"))
+                MainInclude.InteractiveUtils = InteractiveUtils
             end
         catch ex
             @warn "Failed to import InteractiveUtils into module $mod" exception=(ex, catch_backtrace())
+            return nothing
         end
-        return nothing
     end
-    return getfield(mod, :InteractiveUtils)
+    return Core.eval(mod, :(using Base.MainInclude.InteractiveUtils; Base.MainInclude.InteractiveUtils))
 end
 
-global active_repl
+function load_REPL()
+    # load interactive-only libraries
+    try
+        return Base.require_stdlib(PkgId(UUID(0x3fa0cd96_eef1_5676_8a61_b3b8758bbffb), "REPL"))
+    catch ex
+        @warn "Failed to import REPL" exception=(ex, catch_backtrace())
+    end
+    return nothing
+end
 
-# run the requested sort of evaluation loop on stdio
-function run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_file::Bool, color_set::Bool)
-    load_InteractiveUtils()
-
-    if interactive && isassigned(REPL_MODULE_REF)
-        invokelatest(REPL_MODULE_REF[]) do REPL
-            term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
-            term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
-            banner && Base.banner(term)
-            if term.term_type == "dumb"
-                repl = REPL.BasicREPL(term)
-                quiet || @warn "Terminal not fully functional"
+global active_repl::Any
+global active_repl_backend = nothing
+
+function run_fallback_repl(interactive::Bool)
+    let input = stdin
+        if isa(input, File) || isa(input, IOStream)
+            # for files, we can slurp in the whole thing at once
+            ex = parse_input_line(read(input, String); mod=Main)
+            if Meta.isexpr(ex, :toplevel)
+                # if we get back a list of statements, eval them sequentially
+                # as if we had parsed them sequentially
+                for stmt in ex.args
+                    eval_user_input(stderr, stmt, true)
+                end
+                body = ex.args
             else
-                repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
-                repl.history_file = history_file
+                eval_user_input(stderr, ex, true)
             end
-            global active_repl = repl
-            # Make sure any displays pushed in .julia/config/startup.jl ends up above the
-            # REPLDisplay
-            pushdisplay(REPL.REPLDisplay(repl))
-            _atreplinit(repl)
-            REPL.run_repl(repl, backend->(global active_repl_backend = backend))
-        end
-    else
-        # otherwise provide a simple fallback
-        if interactive && !quiet
-            @warn "REPL provider not available: using basic fallback"
-        end
-        banner && Base.banner()
-        let input = stdin
-            if isa(input, File) || isa(input, IOStream)
-                # for files, we can slurp in the whole thing at once
-                ex = parse_input_line(read(input, String))
-                if Meta.isexpr(ex, :toplevel)
-                    # if we get back a list of statements, eval them sequentially
-                    # as if we had parsed them sequentially
-                    for stmt in ex.args
-                        eval_user_input(stderr, stmt, true)
-                    end
-                    body = ex.args
-                else
-                    eval_user_input(stderr, ex, true)
+        else
+            while true
+                if interactive
+                    print("julia> ")
+                    flush(stdout)
                 end
-            else
-                while isopen(input) || !eof(input)
-                    if interactive
-                        print("julia> ")
-                        flush(stdout)
-                    end
-                    try
-                        line = ""
-                        ex = nothing
-                        while !eof(input)
-                            line *= readline(input, keep=true)
-                            ex = parse_input_line(line)
-                            if !(isa(ex, Expr) && ex.head === :incomplete)
-                                break
-                            end
+                eof(input) && break
+                try
+                    line = ""
+                    ex = nothing
+                    while !eof(input)
+                        line *= readline(input, keep=true)
+                        ex = parse_input_line(line; mod=Main)
+                        if !(isa(ex, Expr) && ex.head === :incomplete)
+                            break
                         end
-                        eval_user_input(stderr, ex, true)
-                    catch err
-                        isa(err, InterruptException) ? print("\n\n") : rethrow()
                     end
+                    eval_user_input(stderr, ex, true)
+                catch err
+                    isa(err, InterruptException) ? print("\n\n") : rethrow()
                 end
             end
         end
@@ -478,17 +481,61 @@ function run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_fil
     nothing
 end
 
-# MainInclude exists to hide Main.include and eval from `names(Main)`.
+function run_std_repl(REPL::Module, quiet::Bool, banner::Symbol, history_file::Bool)
+    term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
+    term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
+    banner == :no || REPL.banner(term, short=banner==:short)
+    if term.term_type == "dumb"
+        repl = REPL.BasicREPL(term)
+        quiet || @warn "Terminal not fully functional"
+    else
+        repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
+        repl.history_file = history_file
+    end
+    # Make sure any displays pushed in .julia/config/startup.jl ends up above the
+    # REPLDisplay
+    d = REPL.REPLDisplay(repl)
+    last_active_repl = @isdefined(active_repl) ? active_repl : nothing
+    last_active_repl_backend = active_repl_backend
+    global active_repl = repl
+    pushdisplay(d)
+    try
+        global active_repl = repl
+        _atreplinit(repl)
+        REPL.run_repl(repl, backend->(global active_repl_backend = backend))
+    finally
+        popdisplay(d)
+        active_repl = last_active_repl
+        active_repl_backend = last_active_repl_backend
+    end
+    nothing
+end
+
+# run the requested sort of evaluation loop on stdio
+function run_main_repl(interactive::Bool, quiet::Bool, banner::Symbol, history_file::Bool)
+    fallback_repl = parse(Bool, get(ENV, "JULIA_FALLBACK_REPL", "false"))
+    if !fallback_repl && interactive
+        load_InteractiveUtils()
+        REPL = REPL_MODULE_REF[]
+        if REPL === Base
+            load_REPL()
+        end
+    end
+    REPL = REPL_MODULE_REF[]
+    if !fallback_repl && interactive && REPL !== Base
+        invokelatest(run_std_repl, REPL, quiet, banner, history_file)
+    else
+        if !fallback_repl && interactive && !quiet
+            @warn "REPL provider not available: using basic fallback" LOAD_PATH=join(Base.LOAD_PATH, Sys.iswindows() ? ';' : ':')
+        end
+        run_fallback_repl(interactive)
+    end
+    nothing
+end
+
+# MainInclude exists to weakly add certain identifiers to Main
 baremodule MainInclude
 using ..Base
-# These definitions calls Base._include rather than Base.include to get
-# one-frame stacktraces for the common case of using include(fname) in Main.
-include(mapexpr::Function, fname::AbstractString) = Base._include(mapexpr, Main, fname)
-function include(fname::AbstractString)
-    isa(fname, String) || (fname = Base.convert(String, fname)::String)
-    Base._include(identity, Main, fname)
-end
-eval(x) = Core.eval(Main, x)
 
 """
     ans
@@ -505,56 +552,137 @@ The thrown errors are collected in a stack of exceptions.
 """
 global err = nothing
 
+# Used for memoizing require_stdlib of these modules
+global InteractiveUtils::Module
+global Distributed::Module
+
 # weakly exposes ans and err variables to Main
 export ans, err
-
 end
 
-"""
-    eval(expr)
-
-Evaluate an expression in the global scope of the containing module.
-Every `Module` (except those defined with `baremodule`) has its own 1-argument
-definition of `eval`, which evaluates expressions in that module.
-"""
-MainInclude.eval
-
-"""
-    include([mapexpr::Function,] path::AbstractString)
-
-Evaluate the contents of the input source file in the global scope of the containing module.
-Every module (except those defined with `baremodule`) has its own
-definition of `include`, which evaluates the file in that module.
-Returns the result of the last evaluated expression of the input file. During including,
-a task-local include path is set to the directory containing the file. Nested calls to
-`include` will search relative to that path. This function is typically used to load source
-interactively, or to combine files in packages that are broken into multiple source files.
-The argument `path` is normalized using [`normpath`](@ref) which will resolve
-relative path tokens such as `..` and convert `/` to the appropriate path separator.
-
-The optional first argument `mapexpr` can be used to transform the included code before
-it is evaluated: for each parsed expression `expr` in `path`, the `include` function
-actually evaluates `mapexpr(expr)`.  If it is omitted, `mapexpr` defaults to [`identity`](@ref).
-
-Use [`Base.include`](@ref) to evaluate a file into another module.
-
-!!! compat "Julia 1.5"
-    Julia 1.5 is required for passing the `mapexpr` argument.
-"""
-MainInclude.include
+function should_use_main_entrypoint()
+    isdefined(Main, :main) || return false
+    M_binding_owner = Base.binding_module(Main, :main)
+    (isdefined(M_binding_owner, Symbol("#__main_is_entrypoint__#")) && M_binding_owner.var"#__main_is_entrypoint__#") || return false
+    return true
+end
 
 function _start()
     empty!(ARGS)
     append!(ARGS, Core.ARGS)
     # clear any postoutput hooks that were saved in the sysimage
     empty!(Base.postoutput_hooks)
+    local ret = 0
     try
-        exec_options(JLOptions())
+        repl_was_requested = exec_options(JLOptions())
+        if invokelatest(should_use_main_entrypoint) && !is_interactive
+            main = invokelatest(getglobal, Main, :main)
+            if Base.generating_output()
+                precompile(main, (typeof(ARGS),))
+            else
+                ret = invokelatest(main, ARGS)
+            end
+        elseif (repl_was_requested || is_interactive)
+            # Run the Base `main`, which will either load the REPL stdlib
+            # or run the fallback REPL
+            ret = repl_main(ARGS)
+        end
+        ret === nothing && (ret = 0)
+        ret = try
+            Cint(ret)
+        catch
+            @error "The return value of `main` should be `nothing` or convertible to `Cint`"
+            Cint(1)
+        end
     catch
+        ret = Cint(1)
         invokelatest(display_error, scrub_repl_backtrace(current_exceptions()))
-        exit(1)
     end
     if is_interactive && get(stdout, :color, false)
         print(color_normal)
     end
+    return ret
+end
+
+function repl_main(_)
+    opts = Base.JLOptions()
+    interactiveinput = isa(stdin, Base.TTY)
+    b = opts.banner
+    auto = b == -1
+    banner = b == 0 || (auto && !interactiveinput) ? :no  :
+             b == 1 || (auto && interactiveinput)  ? :yes :
+             :short # b == 2
+
+    quiet                 = (opts.quiet != 0)
+    history_file          = (opts.historyfile != 0)
+    return run_main_repl(interactiveinput, quiet, banner, history_file)
+end
+
+"""
+    @main
+
+This macro is used to mark that the binding `main` in the current module is considered an
+entrypoint. The precise semantics of the entrypoint depend on the CLI driver.
+
+In the `julia` driver, if `Main.main` is marked as an entrypoint, it will be automatically called upon
+the completion of script execution.
+
+The `@main` macro may be used standalone or as part of the function definition.
+The following are equivalent:
+
+```
+function @main(args)
+    println("Hello World")
+end
+```
+
+```
+function main(args)
+end
+@main
+```
+
+## Detailed semantics
+
+The entrypoint semantics attach to the owner of the binding owner. In particular, if a marked entrypoint is
+imported into `Main`, it will be treated as an entrypoint in `Main`:
+
+```
+module MyApp
+    export main
+    @main(args) = println("Hello World")
+end
+using .MyApp
+# `julia` Will execute MyApp.main at the conclusion of script execution
+```
+
+Note that in particular, the semantics do not attach to the method
+or the name:
+```
+module MyApp
+    @main(args) = println("Hello World")
+end
+const main = MyApp.main
+# `julia` Will *NOT* execute MyApp.main unless there is a separate `@main` annotation in `Main`
+```
+
+!!! compat "Julia 1.11"
+    This macro is new in Julia 1.11. At present, the precise semantics of `@main` are still subject to change.
+"""
+macro main(args...)
+    if isdefined(__module__, :main)
+        if Base.binding_module(__module__, :main) !== __module__
+            error("Symbol `main` is already a resolved import in module $(__module__). `@main` must be used in the defining module.")
+        end
+    end
+    Core.eval(__module__, quote
+        # Force the binding to resolve to this module
+        global main
+        global var"#__main_is_entrypoint__#"::Bool = true
+    end)
+    if !isempty(args)
+        Expr(:call, esc(:main), map(esc, args)...)
+    else
+        esc(:main)
+    end
 end
diff --git a/base/cmd.jl b/base/cmd.jl
index 475a62a82d4d7..7c66c01d678af 100644
--- a/base/cmd.jl
+++ b/base/cmd.jl
@@ -3,9 +3,12 @@
 abstract type AbstractCmd end
 
 # libuv process option flags
-const UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS = UInt8(1 << 2)
-const UV_PROCESS_DETACHED = UInt8(1 << 3)
-const UV_PROCESS_WINDOWS_HIDE = UInt8(1 << 4)
+const UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS = UInt32(1 << 2)
+const UV_PROCESS_DETACHED = UInt32(1 << 3)
+const UV_PROCESS_WINDOWS_HIDE = UInt32(1 << 4)
+const UV_PROCESS_SETUID = UInt32(1 << 5)
+const UV_PROCESS_SETGID = UInt32(1 << 6)
+const UV_PROCESS_WINDOWS_DISABLE_EXACT_NAME = UInt32(1 << 7)
 
 struct Cmd <: AbstractCmd
     exec::Vector{String}
@@ -14,13 +17,17 @@ struct Cmd <: AbstractCmd
     env::Union{Vector{String},Nothing}
     dir::String
     cpus::Union{Nothing,Vector{UInt16}}
-    Cmd(exec::Vector{String}) =
-        new(exec, false, 0x00, nothing, "", nothing)
-    Cmd(cmd::Cmd, ignorestatus, flags, env, dir, cpus = nothing) =
+    uid::Union{Nothing,UInt32}
+    gid::Union{Nothing,UInt32}
+    Cmd(exec::Vector{<:AbstractString}) =
+        new(exec, false, 0x00, nothing, "", nothing, nothing, nothing)
+    Cmd(cmd::Cmd, ignorestatus, flags, env, dir, cpus = nothing, uid = nothing, gid = nothing) =
         new(cmd.exec, ignorestatus, flags, env,
-            dir === cmd.dir ? dir : cstr(dir), cpus)
+            dir === cmd.dir ? dir : cstr(dir), cpus, uid, gid)
     function Cmd(cmd::Cmd; ignorestatus::Bool=cmd.ignorestatus, env=cmd.env, dir::AbstractString=cmd.dir,
                  cpus::Union{Nothing,Vector{UInt16}} = cmd.cpus,
+                 uid::Union{Nothing,UInt32} = cmd.uid,
+                 gid::Union{Nothing,UInt32} = cmd.gid,
                  detach::Bool = 0 != cmd.flags & UV_PROCESS_DETACHED,
                  windows_verbatim::Bool = 0 != cmd.flags & UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS,
                  windows_hide::Bool = 0 != cmd.flags & UV_PROCESS_WINDOWS_HIDE)
@@ -28,7 +35,7 @@ struct Cmd <: AbstractCmd
                 windows_verbatim * UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS |
                 windows_hide * UV_PROCESS_WINDOWS_HIDE
         new(cmd.exec, ignorestatus, flags, byteenv(env),
-            dir === cmd.dir ? dir : cstr(dir), cpus)
+            dir === cmd.dir ? dir : cstr(dir), cpus, uid, gid)
     end
 end
 
@@ -37,10 +44,12 @@ has_nondefault_cmd_flags(c::Cmd) =
     c.flags != 0x00 ||
     c.env !== nothing ||
     c.dir !== "" ||
-    c.cpus !== nothing
+    c.cpus !== nothing ||
+    c.uid !== nothing ||
+    c.gid !== nothing
 
 """
-    Cmd(cmd::Cmd; ignorestatus, detach, windows_verbatim, windows_hide, env, dir)
+    Cmd(cmd::Cmd; ignorestatus, detach, windows_verbatim, windows_hide, env, dir, uid, gid)
     Cmd(exec::Vector{String})
 
 Construct a new `Cmd` object, representing an external program and arguments, from `cmd`,
@@ -70,6 +79,8 @@ while changing the settings of the optional keyword arguments:
   elements, use [`addenv()`](@ref) which will return a `Cmd` object with the updated environment.
 * `dir::AbstractString`: Specify a working directory for the command (instead
   of the current directory).
+* `uid::Union{Nothing,UInt32}`: Set the user ID for the process (Unix only).
+* `gid::Union{Nothing,UInt32}`: Set the group ID for the process (Unix only).
 
 For any keywords that are not specified, the current settings from `cmd` are used.
 
@@ -85,9 +96,9 @@ This can then be passed to the `Cmd` constructor to modify its settings, e.g.
 """
 Cmd
 
-hash(x::Cmd, h::UInt) = hash(x.exec, hash(x.env, hash(x.ignorestatus, hash(x.dir, hash(x.flags, h)))))
+hash(x::Cmd, h::UInt) = hash(x.exec, hash(x.env, hash(x.ignorestatus, hash(x.dir, hash(x.flags, hash(x.uid, hash(x.gid, h)))))))
 ==(x::Cmd, y::Cmd) = x.exec == y.exec && x.env == y.env && x.ignorestatus == y.ignorestatus &&
-                     x.dir == y.dir && isequal(x.flags, y.flags)
+                     x.dir == y.dir && isequal(x.flags, y.flags) && x.uid == y.uid && x.gid == y.gid
 
 struct OrCmds <: AbstractCmd
     a::AbstractCmd
@@ -124,9 +135,15 @@ escape_microsoft_c_args(io::IO, cmd::Cmd) =
 function show(io::IO, cmd::Cmd)
     print_env = cmd.env !== nothing
     print_dir = !isempty(cmd.dir)
-    (print_env || print_dir) && print(io, "setenv(")
+    print_uid = cmd.uid !== nothing
+    print_gid = cmd.gid !== nothing
     print_cpus = cmd.cpus !== nothing
+
+    (print_env || print_dir) && print(io, "setenv(")
     print_cpus && print(io, "setcpuaffinity(")
+    print_gid && print(io, "setgid(")
+    print_uid && print(io, "setuid(")
+
     print(io, '`')
     join(io, map(cmd.exec) do arg
         replace(sprint(context=io) do io
@@ -136,14 +153,19 @@ function show(io::IO, cmd::Cmd)
         end, '`' => "\\`")
     end, ' ')
     print(io, '`')
+
+    print_uid && (print(io, ", "); show(io, Int32(cmd.uid)); print(io, ")"))
+    print_gid && (print(io, ", "); show(io, Int32(cmd.gid)); print(io, ")"))
     if print_cpus
         print(io, ", ")
         show(io, collect(Int, something(cmd.cpus)))
         print(io, ")")
     end
-    print_env && (print(io, ","); show(io, cmd.env))
-    print_dir && (print(io, "; dir="); show(io, cmd.dir))
-    (print_dir || print_env) && print(io, ")")
+    if print_env || print_dir
+        print_env && (print(io, ","); show(io, cmd.env))
+        print_dir && (print(io, "; dir="); show(io, cmd.dir))
+        print(io, ")")
+    end
     nothing
 end
 
@@ -187,7 +209,7 @@ if OS_HANDLE !== RawFD
 end
 setup_stdio(stdio::Union{DevNull,OS_HANDLE,RawFD}, ::Bool) = (stdio, false)
 
-const Redirectable = Union{IO, FileRedirect, RawFD, OS_HANDLE}
+const Redirectable = Union{IO, IOServer, FileRedirect, RawFD, OS_HANDLE}
 const StdIOSet = NTuple{3, Redirectable}
 
 struct CmdRedirect <: AbstractCmd
@@ -224,6 +246,8 @@ Mark a command object so that running it will not throw an error if the result c
 ignorestatus(cmd::Cmd) = Cmd(cmd, ignorestatus=true)
 ignorestatus(cmd::Union{OrCmds,AndCmds}) =
     typeof(cmd)(ignorestatus(cmd.a), ignorestatus(cmd.b))
+ignorestatus(cmd::CmdRedirect) =
+    CmdRedirect(ignorestatus(cmd.cmd), cmd.handle, cmd.stream_no, cmd.readable)
 
 """
     detach(command)
@@ -354,6 +378,54 @@ function setcpuaffinity end
 setcpuaffinity(cmd::Cmd, ::Nothing) = Cmd(cmd; cpus = nothing)
 setcpuaffinity(cmd::Cmd, cpus) = Cmd(cmd; cpus = collect(UInt16, cpus))
 
+"""
+    setuid(original_command::Cmd, uid) -> command::Cmd
+
+Set the user ID (UID) of the `command`. On Unix systems, this allows
+the command to run as a different user. Passing `uid = nothing` removes
+any previously set UID.
+
+This function is only supported on Unix-based systems (Linux, macOS, etc.).
+Requires appropriate permissions to set UID.
+
+!!! compat "Julia 1.13"
+    This function requires at least Julia 1.13.
+
+# Examples
+
+```julia
+julia> run(setuid(`id -u`, 1000));
+1000
+```
+"""
+function setuid end
+setuid(cmd::Cmd, ::Nothing) = Cmd(cmd; uid = nothing)
+setuid(cmd::Cmd, uid::Integer) = Cmd(cmd; uid = UInt32(uid))
+
+"""
+    setgid(original_command::Cmd, gid) -> command::Cmd
+
+Set the group ID (GID) of the `command`. On Unix systems, this allows
+the command to run as a different group. Passing `gid = nothing` removes
+any previously set GID.
+
+This function is only supported on Unix-based systems (Linux, macOS, etc.).
+Requires appropriate permissions to set GID.
+
+!!! compat "Julia 1.13"
+    This function requires at least Julia 1.13.
+
+# Examples
+
+```julia
+julia> run(setgid(`id -g`, 1000));
+1000
+```
+"""
+function setgid end
+setgid(cmd::Cmd, ::Nothing) = Cmd(cmd; gid = nothing)
+setgid(cmd::Cmd, gid::Integer) = Cmd(cmd; gid = UInt32(gid))
+
 (&)(left::AbstractCmd, right::AbstractCmd) = AndCmds(left, right)
 redir_out(src::AbstractCmd, dest::AbstractCmd) = OrCmds(src, dest)
 redir_err(src::AbstractCmd, dest::AbstractCmd) = ErrOrCmds(src, dest)
@@ -402,9 +474,21 @@ function pipeline(cmd::AbstractCmd; stdin=nothing, stdout=nothing, stderr=nothin
     return cmd
 end
 
-pipeline(cmd::AbstractCmd, dest) = pipeline(cmd, stdout=dest)
+pipeline(cmd::AbstractCmd, dest::Union{AbstractCmd, AbstractString, Redirectable}) = pipeline(cmd, stdout=dest)
 pipeline(src::Union{Redirectable,AbstractString}, cmd::AbstractCmd) = pipeline(cmd, stdin=src)
 
+"""
+    pipeline(command, redir::Pair{<:Integer, <:Redirectable})
+
+Redirect fd number `redir.first` of `command` to or from the given `redir.second`, which can be
+an I/O stream, a filename, or a file descriptor. This method is primarily used to pass additional
+fds beyond the standard ios that are not supported by the keyword argument interface.
+
+!!! compat "Julia 1.13"
+    This method requires Julia 1.13 or later.
+"""
+pipeline(cmd::AbstractCmd, redir::Pair{<:Integer, <:Redirectable}) = CmdRedirect(cmd, redir.second, Int(redir.first))
+
 """
     pipeline(from, to, ...)
 
@@ -467,12 +551,12 @@ function cmd_gen(parsed)
     args = String[]
     if length(parsed) >= 1 && isa(parsed[1], Tuple{Cmd})
         cmd = (parsed[1]::Tuple{Cmd})[1]
-        (ignorestatus, flags, env, dir) = (cmd.ignorestatus, cmd.flags, cmd.env, cmd.dir)
+        (ignorestatus, flags, env, dir, cpus, uid, gid) = (cmd.ignorestatus, cmd.flags, cmd.env, cmd.dir, cmd.cpus, cmd.uid, cmd.gid)
         append!(args, cmd.exec)
         for arg in tail(parsed)
             append!(args, Base.invokelatest(arg_gen, arg...)::Vector{String})
         end
-        return Cmd(Cmd(args), ignorestatus, flags, env, dir)
+        return Cmd(Cmd(args), ignorestatus, flags, env, dir, cpus, uid, gid)
     else
         for arg in parsed
             append!(args, arg_gen(arg...)::Vector{String})
@@ -481,10 +565,16 @@ function cmd_gen(parsed)
     end
 end
 
+@assume_effects :foldable !:consistent function cmd_gen(
+    parsed::Tuple{Vararg{Tuple{Vararg{Union{String, SubString{String}}}}}}
+)
+    return @invoke cmd_gen(parsed::Any)
+end
+
 """
     @cmd str
 
-Similar to `cmd`, generate a `Cmd` from the `str` string which represents the shell command(s) to be executed.
+Similar to ``` `str` ```, generate a `Cmd` from the `str` string which represents the shell command(s) to be executed.
 The [`Cmd`](@ref) object can be run as a process and can outlive the spawning julia process (see `Cmd` for more).
 
 # Examples
@@ -497,7 +587,7 @@ julia> run(cm)
 Process(`echo 1`, ProcessExited(0))
 ```
 """
-macro cmd(str)
+macro cmd(str::String)
     cmd_ex = shell_parse(str, special=shell_special, filename=String(__source__.file))[1]
     return :(cmd_gen($(esc(cmd_ex))))
 end
diff --git a/base/cmem.jl b/base/cmem.jl
index 8b0b99b3a6ebd..531fac434d097 100644
--- a/base/cmem.jl
+++ b/base/cmem.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-    memcpy(dst::Ptr, src::Ptr, n::Integer) -> Ptr{Cvoid}
+    memcpy(dst::Ptr, src::Ptr, n::Integer)::Ptr{Cvoid}
 
 Call `memcpy` from the C standard library.
 
@@ -10,11 +10,12 @@ Call `memcpy` from the C standard library.
 
 """
 function memcpy(dst::Ptr, src::Ptr, n::Integer)
+    @_terminates_globally_meta
     ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n)
 end
 
 """
-    memmove(dst::Ptr, src::Ptr, n::Integer) -> Ptr{Cvoid}
+    memmove(dst::Ptr, src::Ptr, n::Integer)::Ptr{Cvoid}
 
 Call `memmove` from the C standard library.
 
@@ -23,11 +24,12 @@ Call `memmove` from the C standard library.
 
 """
 function memmove(dst::Ptr, src::Ptr, n::Integer)
+    @_terminates_globally_meta
     ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n)
 end
 
 """
-    memset(dst::Ptr, val, n::Integer) -> Ptr{Cvoid}
+    memset(dst::Ptr, val, n::Integer)::Ptr{Cvoid}
 
 Call `memset` from the C standard library.
 
@@ -36,11 +38,12 @@ Call `memset` from the C standard library.
 
 """
 function memset(p::Ptr, val, n::Integer)
+    @_terminates_globally_meta
     ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), p, val, n)
 end
 
 """
-    memcmp(a::Ptr, b::Ptr, n::Integer) -> Int
+    memcmp(a::Ptr, b::Ptr, n::Integer)::Int
 
 Call `memcmp` from the C standard library.
 
@@ -49,5 +52,6 @@ Call `memcmp` from the C standard library.
 
 """
 function memcmp(a::Ptr, b::Ptr, n::Integer)
+    @_terminates_globally_meta
     ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), a, b, n % Csize_t) % Int
 end
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
index d09a5b6c0ce83..5180f830ce187 100644
--- a/base/combinatorics.jl
+++ b/base/combinatorics.jl
@@ -2,23 +2,30 @@
 
 # Factorials
 
-const _fact_table64 = Vector{Int64}(undef, 20)
-_fact_table64[1] = 1
-for n in 2:20
-    _fact_table64[n] = _fact_table64[n-1] * n
+const _fact_table64 = let _fact_table64 = Vector{Int64}(undef, 20)
+    _fact_table64[1] = 1
+    for n in 2:20
+        _fact_table64[n] = _fact_table64[n-1] * n
+    end
+    Tuple(_fact_table64)
 end
 
-const _fact_table128 = Vector{UInt128}(undef, 34)
-_fact_table128[1] = 1
-for n in 2:34
-    _fact_table128[n] = _fact_table128[n-1] * n
+const _fact_table128 = let _fact_table128 = Vector{UInt128}(undef, 34)
+    _fact_table128[1] = 1
+    for n in 2:34
+        _fact_table128[n] = _fact_table128[n-1] * n
+    end
+    Tuple(_fact_table128)
 end
 
-function factorial_lookup(n::Integer, table, lim)
-    n < 0 && throw(DomainError(n, "`n` must not be negative."))
-    n > lim && throw(OverflowError(string(n, " is too large to look up in the table; consider using `factorial(big(", n, "))` instead")))
-    n == 0 && return one(n)
-    @inbounds f = table[n]
+function factorial_lookup(
+    n::Union{Checked.SignedInt,Checked.UnsignedInt},
+    table::Union{NTuple{20,Int64},NTuple{34,UInt128}}, lim::Int)
+    idx = Int(n)
+    idx < 0 && throw(DomainError(n, "`n` must not be negative."))
+    idx > lim && throw(OverflowError(lazy"$n is too large to look up in the table; consider using `factorial(big($n))` instead"))
+    idx == 0 && return one(n)
+    f = getfield(table, idx)
     return oftype(n, f)
 end
 
@@ -53,7 +60,7 @@ end
 end
 
 """
-    isperm(v) -> Bool
+    isperm(v)::Bool
 
 Return `true` if `v` is a valid permutation.
 
@@ -136,38 +143,55 @@ function permutecols!!(a::AbstractMatrix, p::AbstractVector{<:Integer})
     a
 end
 
-function permute!!(a, p::AbstractVector{<:Integer})
+# Row and column permutations for AbstractMatrix
+permutecols!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _permute!(a, p, Base.swapcols!)
+permuterows!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _permute!(a, p, Base.swaprows!)
+@inline function _permute!(a::AbstractMatrix, p::AbstractVector{<:Integer}, swapfun!::F) where {F}
     require_one_based_indexing(a, p)
-    count = 0
-    start = 0
-    while count < length(a)
-        ptr = start = findnext(!iszero, p, start+1)::Int
-        temp = a[start]
-        next = p[start]
-        count += 1
-        while next != start
-            a[ptr] = a[next]
-            p[ptr] = 0
-            ptr = next
-            next = p[next]
-            count += 1
+    p .= .-p
+    for i in 1:length(p)
+        p[i] > 0 && continue
+        j = i
+        in = p[j] = -p[j]
+        while p[in] < 0
+            swapfun!(a, in, j)
+            j = in
+            in = p[in] = -p[in]
         end
-        a[ptr] = temp
-        p[ptr] = 0
     end
     a
 end
+invpermutecols!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _invpermute!(a, p, Base.swapcols!)
+invpermuterows!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _invpermute!(a, p, Base.swaprows!)
+@inline function _invpermute!(a::AbstractMatrix, p::AbstractVector{<:Integer}, swapfun!::F) where {F}
+    require_one_based_indexing(a, p)
+    p .= .-p
+    for i in 1:length(p)
+        p[i] > 0 && continue
+        j = p[i] = -p[i]
+        while j != i
+           swapfun!(a, j, i)
+           j = p[j] = -p[j]
+        end
+     end
+    a
+end
 
 """
     permute!(v, p)
 
-Permute vector `v` in-place, according to permutation `p`. No checking is done
-to verify that `p` is a permutation.
+Permute vector `v` according to permutation `p`, storing the result back into `v`.
+No checking is done to verify that `p` is a permutation.
 
 To return a new permutation, use `v[p]`. This is generally faster than `permute!(v, p)`;
 it is even faster to write into a pre-allocated output array with `u .= @view v[p]`.
-(Even though `permute!` overwrites `v` in-place, it internally requires some allocation
-to keep track of which elements have been moved.)
+(Even though `permute!` overwrites `v` in-place, it internally requires some allocation.)
+
+$(_DOCS_ALIASING_WARNING)
 
 See also [`invpermute!`](@ref).
 
@@ -189,30 +213,6 @@ julia> A
 """
 permute!(v, p::AbstractVector) = (v .= v[p])
 
-function invpermute!!(a, p::AbstractVector{<:Integer})
-    require_one_based_indexing(a, p)
-    count = 0
-    start = 0
-    while count < length(a)
-        start = findnext(!iszero, p, start+1)::Int
-        temp = a[start]
-        next = p[start]
-        count += 1
-        while next != start
-            temp_next = a[next]
-            a[next] = temp
-            temp = temp_next
-            ptr = p[next]
-            p[next] = 0
-            next = ptr
-            count += 1
-        end
-        a[next] = temp
-        p[next] = 0
-    end
-    a
-end
-
 """
     invpermute!(v, p)
 
@@ -222,6 +222,8 @@ Note that if you have a pre-allocated output array (e.g. `u = similar(v)`),
 it is quicker to instead employ `u[p] = v`.  (`invpermute!` internally
 allocates a copy of the data.)
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1, 1, 3, 4];
@@ -283,7 +285,7 @@ julia> B[invperm(v)]
 """
 function invperm(a::AbstractVector)
     require_one_based_indexing(a)
-    b = zero(a) # similar vector of zeros
+    b = fill!(similar(a), zero(eltype(a))) # mutable vector of zeros
     n = length(a)
     @inbounds for (i, j) in enumerate(a)
         ((1 <= j <= n) && b[j] == 0) ||
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
deleted file mode 100644
index 0cf3e6c00a1b7..0000000000000
--- a/base/compiler/abstractinterpretation.jl
+++ /dev/null
@@ -1,3162 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# See if the inference result of the current statement's result value might affect
-# the final answer for the method (aside from optimization potential and exceptions).
-# To do that, we need to check both for slot assignment and SSA usage.
-call_result_unused(sv::InferenceState, currpc::Int) =
-    isexpr(sv.src.code[currpc], :call) && isempty(sv.ssavalue_uses[currpc])
-call_result_unused(si::StmtInfo) = !si.used
-
-function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
-                                  arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
-                                  sv::AbsIntState, max_methods::Int)
-    ⊑ₚ = ⊑(ipo_lattice(interp))
-    if !should_infer_this_call(interp, sv)
-        add_remark!(interp, sv, "Skipped call in throw block")
-        # At this point we are guaranteed to end up throwing on this path,
-        # which is all that's required for :consistent-cy. Of course, we don't
-        # know anything else about this statement.
-        effects = Effects(; consistent=ALWAYS_TRUE, nonoverlayed=!isoverlayed(method_table(interp)))
-        return CallMeta(Any, effects, NoCallInfo())
-    end
-
-    argtypes = arginfo.argtypes
-    matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
-        InferenceParams(interp).max_union_splitting, max_methods)
-    if isa(matches, FailedMethodMatch)
-        add_remark!(interp, sv, matches.reason)
-        return CallMeta(Any, Effects(), NoCallInfo())
-    end
-
-    (; valid_worlds, applicable, info) = matches
-    update_valid_age!(sv, valid_worlds)
-    napplicable = length(applicable)
-    rettype = Bottom
-    edges = MethodInstance[]
-    conditionals = nothing # keeps refinement information of call argument types when the return type is boolean
-    seen = 0               # number of signatures actually inferred
-    any_const_result = false
-    const_results = Union{Nothing,ConstResult}[]
-    multiple_matches = napplicable > 1
-    fargs = arginfo.fargs
-    all_effects = EFFECTS_TOTAL
-    if !matches.nonoverlayed
-        # currently we don't have a good way to execute the overlayed method definition,
-        # so we should give up concrete eval when any of the matched methods is overlayed
-        f = nothing
-        all_effects = Effects(all_effects; nonoverlayed=false)
-    end
-
-    𝕃ₚ = ipo_lattice(interp)
-    for i in 1:napplicable
-        match = applicable[i]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            # only infer concrete call sites in top-level expressions
-            add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
-            break
-        end
-        this_rt = Bottom
-        splitunions = false
-        # TODO: this used to trigger a bug in inference recursion detection, and is unmaintained now
-        # sigtuple = unwrap_unionall(sig)::DataType
-        # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
-        if splitunions
-            splitsigs = switchtupleunion(sig)
-            for sig_n in splitsigs
-                result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)
-                (; rt, edge, effects) = result
-                this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
-                this_arginfo = ArgInfo(fargs, this_argtypes)
-                const_call_result = abstract_call_method_with_const_args(interp,
-                    result, f, this_arginfo, si, match, sv)
-                const_result = nothing
-                if const_call_result !== nothing
-                    if const_call_result.rt ⊑ₚ rt
-                        rt = const_call_result.rt
-                        (; effects, const_result, edge) = const_call_result
-                    else
-                        add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
-                    end
-                end
-                all_effects = merge_effects(all_effects, effects)
-                push!(const_results, const_result)
-                any_const_result |= const_result !== nothing
-                edge === nothing || push!(edges, edge)
-                this_rt = tmerge(this_rt, rt)
-                if bail_out_call(interp, this_rt, sv)
-                    break
-                end
-            end
-            this_conditional = ignorelimited(this_rt)
-            this_rt = widenwrappedconditional(this_rt)
-        else
-            result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)
-            (; rt, edge, effects) = result
-            this_conditional = ignorelimited(rt)
-            this_rt = widenwrappedconditional(rt)
-            # try constant propagation with argtypes for this match
-            # this is in preparation for inlining, or improving the return result
-            this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
-            this_arginfo = ArgInfo(fargs, this_argtypes)
-            const_call_result = abstract_call_method_with_const_args(interp,
-                result, f, this_arginfo, si, match, sv)
-            const_result = nothing
-            if const_call_result !== nothing
-                this_const_conditional = ignorelimited(const_call_result.rt)
-                this_const_rt = widenwrappedconditional(const_call_result.rt)
-                # return type of const-prop' inference can be wider than that of non const-prop' inference
-                # e.g. in cases when there are cycles but cached result is still accurate
-                if this_const_rt ⊑ₚ this_rt
-                    this_conditional = this_const_conditional
-                    this_rt = this_const_rt
-                    (; effects, const_result, edge) = const_call_result
-                else
-                    add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
-                end
-            end
-            all_effects = merge_effects(all_effects, effects)
-            push!(const_results, const_result)
-            any_const_result |= const_result !== nothing
-            edge === nothing || push!(edges, edge)
-        end
-        @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
-        seen += 1
-        rettype = tmerge(𝕃ₚ, rettype, this_rt)
-        if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
-            if conditionals === nothing
-                conditionals = Any[Bottom for _ in 1:length(argtypes)],
-                               Any[Bottom for _ in 1:length(argtypes)]
-            end
-            for i = 1:length(argtypes)
-                cnd = conditional_argtype(this_conditional, sig, argtypes, i)
-                conditionals[1][i] = tmerge(conditionals[1][i], cnd.thentype)
-                conditionals[2][i] = tmerge(conditionals[2][i], cnd.elsetype)
-            end
-        end
-        if bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-
-    if any_const_result && seen == napplicable
-        @assert napplicable == nmatches(info) == length(const_results)
-        info = ConstCallInfo(info, const_results)
-    end
-
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        rettype = Any
-        all_effects = Effects()
-    elseif isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
-            (!all(matches.fullmatches) || any_ambig(matches))
-        # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-        all_effects = Effects(all_effects; nothrow=false)
-    end
-
-    rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
-
-    # Also considering inferring the compilation signature for this method, so
-    # it is available to the compiler in case it ends up needing it.
-    if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
-        (1 == seen == napplicable) && rettype !== Any && rettype !== Bottom &&
-        !is_removable_if_unused(all_effects))
-        match = applicable[1]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        mi = specialize_method(match; preexisting=true)
-        if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
-            csig = get_compileable_sig(method, sig, match.sparams)
-            if csig !== nothing && csig !== sig
-                abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)
-            end
-        end
-    end
-
-    if call_result_unused(si) && !(rettype === Bottom)
-        add_remark!(interp, sv, "Call result type was widened because the return value is unused")
-        # We're mainly only here because the optimizer might want this code,
-        # but we ourselves locally don't typically care about it locally
-        # (beyond checking if it always throws).
-        # So avoid adding an edge, since we don't want to bother attempting
-        # to improve our result even if it does change (to always throw),
-        # and avoid keeping track of a more complex result type.
-        rettype = Any
-    end
-    add_call_backedges!(interp, rettype, all_effects, edges, matches, atype, sv)
-    if isa(sv, InferenceState)
-        # TODO (#48913) implement a proper recursion handling for irinterp:
-        # This works just because currently the `:terminate` condition guarantees that
-        # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-        # We should revisit this once we have a better story for handling cycles in irinterp.
-        if !isempty(sv.pclimitations) # remove self, if present
-            delete!(sv.pclimitations, sv)
-            for caller in callers_in_cycle(sv)
-                delete!(sv.pclimitations, caller)
-            end
-        end
-    end
-    return CallMeta(rettype, all_effects, info)
-end
-
-struct FailedMethodMatch
-    reason::String
-end
-
-struct MethodMatches
-    applicable::Vector{Any}
-    info::MethodMatchInfo
-    valid_worlds::WorldRange
-    mt::MethodTable
-    fullmatch::Bool
-    nonoverlayed::Bool
-end
-any_ambig(info::MethodMatchInfo) = info.results.ambig
-any_ambig(m::MethodMatches) = any_ambig(m.info)
-
-struct UnionSplitMethodMatches
-    applicable::Vector{Any}
-    applicable_argtypes::Vector{Vector{Any}}
-    info::UnionSplitInfo
-    valid_worlds::WorldRange
-    mts::Vector{MethodTable}
-    fullmatches::Vector{Bool}
-    nonoverlayed::Bool
-end
-any_ambig(m::UnionSplitMethodMatches) = any(any_ambig, m.info.matches)
-
-function find_matching_methods(𝕃::AbstractLattice,
-                               argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
-                               max_union_splitting::Int, max_methods::Int)
-    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
-    if 1 < unionsplitcost(𝕃, argtypes) <= max_union_splitting
-        split_argtypes = switchtupleunion(𝕃, argtypes)
-        infos = MethodMatchInfo[]
-        applicable = Any[]
-        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
-        valid_worlds = WorldRange()
-        mts = MethodTable[]
-        fullmatches = Bool[]
-        nonoverlayed = true
-        for i in 1:length(split_argtypes)
-            arg_n = split_argtypes[i]::Vector{Any}
-            sig_n = argtypes_to_type(arg_n)
-            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
-            mt === nothing && return FailedMethodMatch("Could not identify method table for call")
-            mt = mt::MethodTable
-            result = findall(sig_n, method_table; limit = max_methods)
-            if result === nothing
-                return FailedMethodMatch("For one of the union split cases, too many methods matched")
-            end
-            (; matches, overlayed) = result
-            nonoverlayed &= !overlayed
-            push!(infos, MethodMatchInfo(matches))
-            for m in matches
-                push!(applicable, m)
-                push!(applicable_argtypes, arg_n)
-            end
-            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
-            thisfullmatch = any(match::MethodMatch->match.fully_covers, matches)
-            found = false
-            for (i, mt′) in enumerate(mts)
-                if mt′ === mt
-                    fullmatches[i] &= thisfullmatch
-                    found = true
-                    break
-                end
-            end
-            if !found
-                push!(mts, mt)
-                push!(fullmatches, thisfullmatch)
-            end
-        end
-        return UnionSplitMethodMatches(applicable,
-                                       applicable_argtypes,
-                                       UnionSplitInfo(infos),
-                                       valid_worlds,
-                                       mts,
-                                       fullmatches,
-                                       nonoverlayed)
-    else
-        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
-        if mt === nothing
-            return FailedMethodMatch("Could not identify method table for call")
-        end
-        mt = mt::MethodTable
-        result = findall(atype, method_table; limit = max_methods)
-        if result === nothing
-            # this means too many methods matched
-            # (assume this will always be true, so we don't compute / update valid age in this case)
-            return FailedMethodMatch("Too many methods matched")
-        end
-        (; matches, overlayed) = result
-        fullmatch = any(match::MethodMatch->match.fully_covers, matches)
-        return MethodMatches(matches.matches,
-                             MethodMatchInfo(matches),
-                             matches.valid_worlds,
-                             mt,
-                             fullmatch,
-                             !overlayed)
-    end
-end
-
-"""
-    from_interprocedural!(interp::AbstractInterpreter, rt, sv::AbsIntState,
-                          arginfo::ArgInfo, maybecondinfo) -> newrt
-
-Converts inter-procedural return type `rt` into a local lattice element `newrt`,
-that is appropriate in the context of current local analysis frame `sv`, especially:
-- unwraps `rt::LimitedAccuracy` and collects its limitations into the current frame `sv`
-- converts boolean `rt` to new boolean `newrt` in a way `newrt` can propagate extra conditional
-  refinement information, e.g. translating `rt::InterConditional` into `newrt::Conditional`
-  that holds a type constraint information about a variable in `sv`
-
-This function _should_ be used wherever we propagate results returned from
-`abstract_call_method` or `abstract_call_method_with_const_args`.
-
-When `maybecondinfo !== nothing`, this function also tries extra conditional argument type refinement.
-In such cases `maybecondinfo` should be either of:
-- `maybecondinfo::Tuple{Vector{Any},Vector{Any}}`: precomputed argument type refinement information
-- method call signature tuple type
-When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by
-`tmerge`ing argument signature type of each method call.
-"""
-function from_interprocedural!(interp::AbstractInterpreter, @nospecialize(rt), sv::AbsIntState,
-                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
-    rt = collect_limitations!(rt, sv)
-    if isa(rt, InterMustAlias)
-        rt = from_intermustalias(rt, arginfo)
-    elseif is_lattice_bool(ipo_lattice(interp), rt)
-        if maybecondinfo === nothing
-            rt = widenconditional(rt)
-        else
-            rt = from_interconditional(typeinf_lattice(interp), rt, sv, arginfo, maybecondinfo)
-        end
-    end
-    @assert !(rt isa InterConditional || rt isa InterMustAlias) "invalid lattice element returned from inter-procedural context"
-    return rt
-end
-
-function collect_limitations!(@nospecialize(typ), sv::InferenceState)
-    if isa(typ, LimitedAccuracy)
-        union!(sv.pclimitations, typ.causes)
-        return typ.typ
-    end
-    return typ
-end
-
-function from_intermustalias(rt::InterMustAlias, arginfo::ArgInfo)
-    fargs = arginfo.fargs
-    if fargs !== nothing && 1 ≤ rt.slot ≤ length(fargs)
-        arg = fargs[rt.slot]
-        if isa(arg, SlotNumber)
-            argtyp = widenslotwrapper(arginfo.argtypes[rt.slot])
-            if rt.vartyp ⊑ argtyp
-                return MustAlias(arg, rt.vartyp, rt.fldidx, rt.fldtyp)
-            else
-                # TODO optimize this case?
-            end
-        end
-    end
-    return widenmustalias(rt)
-end
-
-function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::AbsIntState,
-                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
-    has_conditional(𝕃ᵢ, sv) || return widenconditional(rt)
-    (; fargs, argtypes) = arginfo
-    fargs === nothing && return widenconditional(rt)
-    slot = 0
-    alias = nothing
-    thentype = elsetype = Any
-    condval = maybe_extract_const_bool(rt)
-    for i in 1:length(fargs)
-        # find the first argument which supports refinement,
-        # and intersect all equivalent arguments with it
-        argtyp = argtypes[i]
-        if alias === nothing
-            arg = ssa_def_slot(fargs[i], sv)
-            if isa(arg, SlotNumber) && widenslotwrapper(argtyp) isa Type
-                old = argtyp
-                id = slot_id(arg)
-            elseif argtyp isa MustAlias
-                old = argtyp.fldtyp
-                id = argtyp.slot
-            else
-                continue # unlikely to refine
-            end
-        elseif argtyp isa MustAlias && issubalias(argtyp, alias)
-            arg = nothing
-            old = alias.fldtyp
-            id = alias.slot
-        else
-            continue
-        end
-        if slot == 0 || id == slot
-            if isa(maybecondinfo, Tuple{Vector{Any},Vector{Any}})
-                # if we have already computed argument refinement information, apply that now to get the result
-                new_thentype = maybecondinfo[1][i]
-                new_elsetype = maybecondinfo[2][i]
-            else
-                # otherwise compute it on the fly
-                cnd = conditional_argtype(rt, maybecondinfo, argtypes, i)
-                new_thentype = cnd.thentype
-                new_elsetype = cnd.elsetype
-            end
-            if condval === false
-                thentype = Bottom
-            elseif ⊑(𝕃ᵢ, new_thentype, thentype)
-                thentype = new_thentype
-            else
-                thentype = tmeet(𝕃ᵢ, thentype, widenconst(new_thentype))
-            end
-            if condval === true
-                elsetype = Bottom
-            elseif ⊑(𝕃ᵢ, new_elsetype, elsetype)
-                elsetype = new_elsetype
-            else
-                elsetype = tmeet(𝕃ᵢ, elsetype, widenconst(new_elsetype))
-            end
-            if (slot > 0 || condval !== false) && ⋤(𝕃ᵢ, thentype, old)
-                slot = id
-                if !(arg isa SlotNumber) && argtyp isa MustAlias
-                    alias = argtyp
-                end
-            elseif (slot > 0 || condval !== true) && ⋤(𝕃ᵢ, elsetype, old)
-                slot = id
-                if !(arg isa SlotNumber) && argtyp isa MustAlias
-                    alias = argtyp
-                end
-            else # reset: no new useful information for this slot
-                slot = 0
-                alias = nothing
-                thentype = elsetype = Any
-            end
-        end
-    end
-    if thentype === Bottom && elsetype === Bottom
-        return Bottom # accidentally proved this call to be dead / throw !
-    elseif slot > 0
-        if alias !== nothing
-            return form_mustalias_conditional(alias, thentype, elsetype)
-        end
-        return Conditional(slot, thentype, elsetype) # record a Conditional improvement to this slot
-    end
-    return widenconditional(rt)
-end
-
-function conditional_argtype(@nospecialize(rt), @nospecialize(sig), argtypes::Vector{Any}, i::Int)
-    if isa(rt, InterConditional) && rt.slot == i
-        return rt
-    else
-        thentype = elsetype = tmeet(widenslotwrapper(argtypes[i]), fieldtype(sig, i))
-        condval = maybe_extract_const_bool(rt)
-        condval === true && (elsetype = Bottom)
-        condval === false && (thentype = Bottom)
-        return InterConditional(i, thentype, elsetype)
-    end
-end
-
-function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype), all_effects::Effects,
-    edges::Vector{MethodInstance}, matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
-    sv::AbsIntState)
-    # don't bother to add backedges when both type and effects information are already
-    # maximized to the top since a new method couldn't refine or widen them anyway
-    if rettype === Any
-        # ignore the `:nonoverlayed` property if `interp` doesn't use overlayed method table
-        # since it will never be tainted anyway
-        if !isoverlayed(method_table(interp))
-            all_effects = Effects(all_effects; nonoverlayed=false)
-        end
-        if (# ignore the `:noinbounds` property if `:consistent`-cy is tainted already
-            (sv isa InferenceState && sv.ipo_effects.consistent === ALWAYS_FALSE) ||
-            all_effects.consistent === ALWAYS_FALSE ||
-            # or this `:noinbounds` doesn't taint it
-            !stmt_taints_inbounds_consistency(sv))
-            all_effects = Effects(all_effects; noinbounds=false)
-        end
-        all_effects === Effects() && return nothing
-    end
-    for edge in edges
-        add_backedge!(sv, edge)
-    end
-    # also need an edge to the method table in case something gets
-    # added that did not intersect with any existing method
-    if isa(matches, MethodMatches)
-        matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype)
-    else
-        for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
-            thisfullmatch || add_mt_backedge!(sv, mt, atype)
-        end
-    end
-    return nothing
-end
-
-const RECURSION_UNUSED_MSG = "Bounded recursion detected with unused result. Annotated return type may be wider than true result."
-const RECURSION_MSG = "Bounded recursion detected. Call was widened to force convergence."
-const RECURSION_MSG_HARDLIMIT = "Bounded recursion detected under hardlimit. Call was widened to force convergence."
-
-function abstract_call_method(interp::AbstractInterpreter,
-                              method::Method, @nospecialize(sig), sparams::SimpleVector,
-                              hardlimit::Bool, si::StmtInfo, sv::AbsIntState)
-    if method.name === :depwarn && isdefined(Main, :Base) && method.module === Main.Base
-        add_remark!(interp, sv, "Refusing to infer into `depwarn`")
-        return MethodCallResult(Any, false, false, nothing, Effects())
-    end
-    sigtuple = unwrap_unionall(sig)
-    sigtuple isa DataType || return MethodCallResult(Any, false, false, nothing, Effects())
-
-    if is_nospecializeinfer(method)
-        sig = get_nospecializeinfer_sig(method, sig, sparams)
-    end
-
-    # Limit argument type tuple growth of functions:
-    # look through the parents list to see if there's a call to the same method
-    # and from the same method.
-    # Returns the topmost occurrence of that repeated edge.
-    edgecycle = edgelimited = false
-    topmost = nothing
-
-    for sv′ in AbsIntStackUnwind(sv)
-        infmi = frame_instance(sv′)
-        if method === infmi.def
-            if infmi.specTypes::Type == sig::Type
-                # avoid widening when detecting self-recursion
-                # TODO: merge call cycle and return right away
-                if call_result_unused(si)
-                    add_remark!(interp, sv, RECURSION_UNUSED_MSG)
-                    # since we don't use the result (typically),
-                    # we have a self-cycle in the call-graph, but not in the inference graph (typically):
-                    # break this edge now (before we record it) by returning early
-                    # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                    return MethodCallResult(Any, true, true, nothing, Effects())
-                end
-                topmost = nothing
-                edgecycle = true
-                break
-            end
-            topmost === nothing || continue
-            if edge_matches_sv(interp, sv′, method, sig, sparams, hardlimit, sv)
-                topmost = sv′
-                edgecycle = true
-            end
-        end
-    end
-    washardlimit = hardlimit
-
-    if topmost !== nothing
-        msig = unwrap_unionall(method.sig)::DataType
-        spec_len = length(msig.parameters) + 1
-        ls = length(sigtuple.parameters)
-        mi = frame_instance(sv)
-
-        if method === mi.def
-            # Under direct self-recursion, permit much greater use of reducers.
-            # here we assume that complexity(specTypes) :>= complexity(sig)
-            comparison = mi.specTypes
-            l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
-            spec_len = max(spec_len, l_comparison)
-        else
-            comparison = method.sig
-        end
-
-        if isdefined(method, :recursion_relation)
-            # We don't require the recursion_relation to be transitive, so
-            # apply a hard limit
-            hardlimit = true
-        end
-
-        # see if the type is actually too big (relative to the caller), and limit it if required
-        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : mi.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, spec_len)
-
-        if newsig !== sig
-            # continue inference, but note that we've limited parameter complexity
-            # on this call (to ensure convergence), so that we don't cache this result
-            if call_result_unused(si)
-                add_remark!(interp, sv, RECURSION_UNUSED_MSG)
-                # if we don't (typically) actually care about this result,
-                # don't bother trying to examine some complex abstract signature
-                # since it's very unlikely that we'll try to inline this,
-                # or want make an invoke edge to its calling convention return type.
-                # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                return MethodCallResult(Any, true, true, nothing, Effects())
-            end
-            add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
-            # TODO (#48913) implement a proper recursion handling for irinterp:
-            # This works just because currently the `:terminate` condition guarantees that
-            # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-            # We should revisit this once we have a better story for handling cycles in irinterp.
-            if isa(topmost, InferenceState)
-                parentframe = frame_parent(topmost)
-                if isa(sv, InferenceState) && isa(parentframe, InferenceState)
-                    poison_callstack!(sv, parentframe === nothing ? topmost : parentframe)
-                end
-            end
-            sig = newsig
-            sparams = svec()
-            edgelimited = true
-        end
-    end
-
-    # if sig changed, may need to recompute the sparams environment
-    if isa(method.sig, UnionAll) && isempty(sparams)
-        recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), sig, method.sig)::SimpleVector
-        #@assert recomputed[1] !== Bottom
-        # We must not use `sig` here, since that may re-introduce structural complexity that
-        # our limiting heuristic sought to eliminate. The alternative would be to not increment depth over covariant contexts,
-        # but we prefer to permit inference of tuple-destructuring, so we don't do that right now
-        # For example, with a signature such as `Tuple{T, Ref{T}} where {T <: S}`
-        # we might want to limit this to `Tuple{S, Ref}`, while type-intersection can instead give us back the original type
-        # (which moves `S` back up to a lower comparison depth)
-        # Optionally, we could try to drive this to a fixed point, but I think this is getting too complex,
-        # and this would only cause more questions and more problems
-        # (the following is only an example, most of the statements are probable in the wrong order):
-        #     newsig = sig
-        #     seen = IdSet()
-        #     while !(newsig in seen)
-        #         push!(seen, newsig)
-        #         lsig = length((unwrap_unionall(sig)::DataType).parameters)
-        #         newsig = limit_type_size(newsig, sig, sv.linfo.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, lsig)
-        #         recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), newsig, method.sig)::SimpleVector
-        #         newsig = recomputed[2]
-        #     end
-        #     sig = ?
-        sparams = recomputed[2]::SimpleVector
-    end
-
-    (; rt, edge, effects) = typeinf_edge(interp, method, sig, sparams, sv)
-
-    if edge === nothing
-        edgecycle = edgelimited = true
-    end
-
-    # we look for the termination effect override here as well, since the :terminates effect
-    # may have been tainted due to recursion at this point even if it's overridden
-    if is_effect_overridden(sv, :terminates_globally)
-        # this frame is known to terminate
-        effects = Effects(effects, terminates=true)
-    elseif is_effect_overridden(method, :terminates_globally)
-        # this edge is known to terminate
-        effects = Effects(effects; terminates=true)
-    elseif edgecycle
-        # Some sort of recursion was detected.
-        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
-            # no `MethodInstance` cycles -- don't taint :terminate
-        else
-            # we cannot guarantee that the call will terminate
-            effects = Effects(effects; terminates=false)
-        end
-    end
-
-    return MethodCallResult(rt, edgecycle, edgelimited, edge, effects)
-end
-
-function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
-                         method::Method, @nospecialize(sig), sparams::SimpleVector,
-                         hardlimit::Bool, sv::AbsIntState)
-    # The `method_for_inference_heuristics` will expand the given method's generator if
-    # necessary in order to retrieve this field from the generated `CodeInfo`, if it exists.
-    # The other `CodeInfo`s we inspect will already have this field inflated, so we just
-    # access it directly instead (to avoid regeneration).
-    world = get_world_counter(interp)
-    callee_method2 = method_for_inference_heuristics(method, sig, sparams, world) # Union{Method, Nothing}
-
-    inf_method2 = method_for_inference_limit_heuristics(frame) # limit only if user token match
-    inf_method2 isa Method || (inf_method2 = nothing)
-    if callee_method2 !== inf_method2
-        return false
-    end
-    if !hardlimit || InferenceParams(interp).ignore_recursion_hardlimit
-        # if this is a soft limit,
-        # also inspect the parent of this edge,
-        # to see if they are the same Method as sv
-        # in which case we'll need to ensure it is convergent
-        # otherwise, we don't
-
-        # check in the cycle list first
-        # all items in here are mutual parents of all others
-        if !any(p::AbsIntState->matches_sv(p, sv), callers_in_cycle(frame))
-            let parent = frame_parent(frame)
-                parent !== nothing || return false
-                (is_cached(parent) || frame_parent(parent) !== nothing) || return false
-                matches_sv(parent, sv) || return false
-            end
-        end
-
-        # If the method defines a recursion relation, give it a chance
-        # to tell us that this recursion is actually ok.
-        if isdefined(method, :recursion_relation)
-            if Core._apply_pure(method.recursion_relation, Any[method, callee_method2, sig, frame_instance(frame).specTypes])
-                return false
-            end
-        end
-    end
-    return true
-end
-
-# This function is used for computing alternate limit heuristics
-function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector, world::UInt)
-    if isdefined(method, :generator) && !(method.generator isa Core.GeneratedFunctionStub) && may_invoke_generator(method, sig, sparams)
-        method_instance = specialize_method(method, sig, sparams)
-        if isa(method_instance, MethodInstance)
-            cinfo = get_staged(method_instance, world)
-            if isa(cinfo, CodeInfo)
-                method2 = cinfo.method_for_inference_limit_heuristics
-                if method2 isa Method
-                    return method2
-                end
-            end
-        end
-    end
-    return nothing
-end
-
-function matches_sv(parent::AbsIntState, sv::AbsIntState)
-    sv_method2 = method_for_inference_limit_heuristics(sv) # limit only if user token match
-    sv_method2 isa Method || (sv_method2 = nothing)
-    parent_method2 = method_for_inference_limit_heuristics(parent) # limit only if user token match
-    parent_method2 isa Method || (parent_method2 = nothing)
-    return frame_instance(parent).def === frame_instance(sv).def && sv_method2 === parent_method2
-end
-
-function is_edge_recursed(edge::MethodInstance, caller::AbsIntState)
-    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
-        return edge === frame_instance(sv)
-    end
-end
-
-function is_method_recursed(method::Method, caller::AbsIntState)
-    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
-        return method === frame_instance(sv).def
-    end
-end
-
-function is_constprop_edge_recursed(edge::MethodInstance, caller::AbsIntState)
-    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
-        return edge === frame_instance(sv) && is_constproped(sv)
-    end
-end
-
-function is_constprop_method_recursed(method::Method, caller::AbsIntState)
-    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
-        return method === frame_instance(sv).def && is_constproped(sv)
-    end
-end
-
-# keeps result and context information of abstract_method_call, which will later be used for
-# backedge computation, and concrete evaluation or constant-propagation
-struct MethodCallResult
-    rt
-    edgecycle::Bool
-    edgelimited::Bool
-    edge::Union{Nothing,MethodInstance}
-    effects::Effects
-    function MethodCallResult(@nospecialize(rt),
-                              edgecycle::Bool,
-                              edgelimited::Bool,
-                              edge::Union{Nothing,MethodInstance},
-                              effects::Effects)
-        return new(rt, edgecycle, edgelimited, edge, effects)
-    end
-end
-
-struct InvokeCall
-    types     # ::Type
-    lookupsig # ::Type
-    InvokeCall(@nospecialize(types), @nospecialize(lookupsig)) = new(types, lookupsig)
-end
-
-struct ConstCallResults
-    rt::Any
-    const_result::ConstResult
-    effects::Effects
-    edge::MethodInstance
-    ConstCallResults(@nospecialize(rt),
-                     const_result::ConstResult,
-                     effects::Effects,
-                     edge::MethodInstance) =
-        new(rt, const_result, effects, edge)
-end
-
-function abstract_call_method_with_const_args(interp::AbstractInterpreter,
-    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
-    match::MethodMatch, sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing)
-    if !const_prop_enabled(interp, sv, match)
-        return nothing
-    end
-    if bail_out_const_call(interp, result, si)
-        add_remark!(interp, sv, "[constprop] No more information to be gained")
-        return nothing
-    end
-    eligibility = concrete_eval_eligible(interp, f, result, arginfo, sv)
-    if eligibility === :concrete_eval
-        return concrete_eval_call(interp, f, result, arginfo, sv, invokecall)
-    end
-    mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv)
-    mi === nothing && return nothing
-    if is_constprop_recursed(result, mi, sv)
-        add_remark!(interp, sv, "[constprop] Edge cycle encountered")
-        return nothing
-    end
-    # try semi-concrete evaluation
-    if eligibility === :semi_concrete_eval
-        res = semi_concrete_eval_call(interp, mi, result, arginfo, sv)
-        if res !== nothing
-            return res
-        end
-    end
-    # try constant prop'
-    return const_prop_call(interp, mi, result, arginfo, sv)
-end
-
-function const_prop_enabled(interp::AbstractInterpreter, sv::AbsIntState, match::MethodMatch)
-    if !InferenceParams(interp).ipo_constant_propagation
-        add_remark!(interp, sv, "[constprop] Disabled by parameter")
-        return false
-    end
-    if is_no_constprop(match.method)
-        add_remark!(interp, sv, "[constprop] Disabled by method parameter")
-        return false
-    end
-    return true
-end
-
-function bail_out_const_call(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo)
-    if is_removable_if_unused(result.effects)
-        if isa(result.rt, Const) || call_result_unused(si)
-            return true
-        end
-    end
-    return false
-end
-
-function concrete_eval_eligible(interp::AbstractInterpreter,
-    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
-    (;effects) = result
-    if inbounds_option() === :off
-        if !is_nothrow(effects)
-            # Disable concrete evaluation in `--check-bounds=no` mode,
-            # unless it is known to not throw.
-            return :none
-        end
-    end
-    if !effects.noinbounds && stmt_taints_inbounds_consistency(sv)
-        # If the current statement is @inbounds or we propagate inbounds, the call's consistency
-        # is tainted and not consteval eligible.
-        add_remark!(interp, sv, "[constprop] Concrete evel disabled for inbounds")
-        return :none
-    end
-    if isoverlayed(method_table(interp)) && !is_nonoverlayed(effects)
-        # disable concrete-evaluation if this function call is tainted by some overlayed
-        # method since currently there is no direct way to execute overlayed methods
-        add_remark!(interp, sv, "[constprop] Concrete evel disabled for overlayed methods")
-        return :none
-    end
-    if result.edge !== nothing && is_foldable(effects)
-        if f !== nothing && is_all_const_arg(arginfo, #=start=#2)
-            return :concrete_eval
-        elseif !any_conditional(arginfo)
-            return :semi_concrete_eval
-        end
-    end
-    return :none
-end
-
-is_all_const_arg(arginfo::ArgInfo, start::Int) = is_all_const_arg(arginfo.argtypes, start::Int)
-function is_all_const_arg(argtypes::Vector{Any}, start::Int)
-    for i = start:length(argtypes)
-        a = widenslotwrapper(argtypes[i])
-        isa(a, Const) || isconstType(a) || issingletontype(a) || return false
-    end
-    return true
-end
-
-any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes)
-any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes)
-
-collect_const_args(arginfo::ArgInfo, start::Int) = collect_const_args(arginfo.argtypes, start)
-function collect_const_args(argtypes::Vector{Any}, start::Int)
-    return Any[ let a = widenslotwrapper(argtypes[i])
-                    isa(a, Const) ? a.val :
-                    isconstType(a) ? (a::DataType).parameters[1] :
-                    (a::DataType).instance
-                end for i = start:length(argtypes) ]
-end
-
-function concrete_eval_call(interp::AbstractInterpreter,
-    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo,
-    sv::AbsIntState, invokecall::Union{InvokeCall,Nothing})
-    args = collect_const_args(arginfo, #=start=#2)
-    if invokecall !== nothing
-        # this call should be `invoke`d, rewrite `args` back now
-        pushfirst!(args, f, invokecall.types)
-        f = invoke
-    end
-    world = get_world_counter(interp)
-    edge = result.edge::MethodInstance
-    value = try
-        Core._call_in_world_total(world, f, args...)
-    catch
-        # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime
-        return ConstCallResults(Union{}, ConcreteResult(edge, result.effects), result.effects, edge)
-    end
-    return ConstCallResults(Const(value), ConcreteResult(edge, EFFECTS_TOTAL, value), EFFECTS_TOTAL, edge)
-end
-
-# check if there is a cycle and duplicated inference of `mi`
-function is_constprop_recursed(result::MethodCallResult, mi::MethodInstance, sv::AbsIntState)
-    result.edgecycle || return false
-    if result.edgelimited
-        return is_constprop_method_recursed(mi.def::Method, sv)
-    else
-        # if the type complexity limiting didn't decide to limit the call signature (as
-        # indicated by `result.edgelimited === false`), we can relax the cycle detection
-        # by comparing `MethodInstance`s and allow inference to propagate different
-        # constant elements if the recursion is finite over the lattice
-        return is_constprop_edge_recursed(mi, sv)
-    end
-end
-
-# if there's a possibility we could get a better result with these constant arguments
-# (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise
-function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
-    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
-    match::MethodMatch, sv::AbsIntState)
-    method = match.method
-    force = force_const_prop(interp, f, method)
-    force || const_prop_entry_heuristic(interp, result, si, sv) || return nothing
-    nargs::Int = method.nargs
-    method.isva && (nargs -= 1)
-    length(arginfo.argtypes) < nargs && return nothing
-    if !const_prop_argument_heuristic(interp, arginfo, sv)
-        add_remark!(interp, sv, "[constprop] Disabled by argument and rettype heuristics")
-        return nothing
-    end
-    all_overridden = is_all_overridden(interp, arginfo, sv)
-    if !force && !const_prop_function_heuristic(interp, f, arginfo, nargs, all_overridden, sv)
-        add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
-        return nothing
-    end
-    force |= all_overridden
-    mi = specialize_method(match; preexisting=!force)
-    if mi === nothing
-        add_remark!(interp, sv, "[constprop] Failed to specialize")
-        return nothing
-    end
-    mi = mi::MethodInstance
-    if !force && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
-        add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
-        return nothing
-    end
-    return mi
-end
-
-function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo, sv::AbsIntState)
-    if call_result_unused(si) && result.edgecycle
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (edgecycle with unused result)")
-        return false
-    end
-    # check if this return type is improvable (i.e. whether it's possible that with more
-    # information, we might get a more precise type)
-    rt = result.rt
-    if isa(rt, Type)
-        # could always be improved to `Const`, `PartialStruct` or just a more precise type,
-        # unless we're already at `Bottom`
-        if rt === Bottom
-            add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (erroneous result)")
-            return false
-        else
-            return true
-        end
-    elseif isa(rt, PartialStruct) || isa(rt, InterConditional) || isa(rt, InterMustAlias)
-        # could be improved to `Const` or a more precise wrapper
-        return true
-    elseif isa(rt, LimitedAccuracy)
-        # optimizations like inlining are disabled for limited frames,
-        # thus there won't be much benefit in constant-prop' here
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (limited accuracy)")
-        return false
-    else
-        if isa(rt, Const)
-            if !is_nothrow(result.effects)
-                # Could still be improved to Bottom (or at least could see the effects improved)
-                return true
-            end
-        end
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (unimprovable result)")
-        return false
-    end
-end
-
-# determines heuristically whether if constant propagation can be worthwhile
-# by checking if any of given `argtypes` is "interesting" enough to be propagated
-function const_prop_argument_heuristic(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
-    𝕃ᵢ = typeinf_lattice(interp)
-    argtypes = arginfo.argtypes
-    for i in 1:length(argtypes)
-        a = argtypes[i]
-        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && arginfo.fargs !== nothing
-            is_const_prop_profitable_conditional(a, arginfo.fargs, sv) && return true
-        else
-            a = widenslotwrapper(a)
-            has_nontrivial_extended_info(𝕃ᵢ, a) && is_const_prop_profitable_arg(𝕃ᵢ, a) && return true
-        end
-    end
-    return false
-end
-
-function is_const_prop_profitable_conditional(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
-    slotid = find_constrained_arg(cnd, fargs, sv)
-    if slotid !== nothing
-        return true
-    end
-    # as a minor optimization, we just check the result is a constant or not,
-    # since both `has_nontrivial_extended_info`/`is_const_prop_profitable_arg` return `true`
-    # for `Const(::Bool)`
-    return isa(widenconditional(cnd), Const)
-end
-
-function find_constrained_arg(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
-    slot = cnd.slot
-    for i in 1:length(fargs)
-        arg = ssa_def_slot(fargs[i], sv)
-        if isa(arg, SlotNumber) && slot_id(arg) == slot
-            return i
-        end
-    end
-    return nothing
-end
-
-# checks if all argtypes has additional information other than what `Type` can provide
-function is_all_overridden(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::AbsIntState)
-    𝕃ᵢ = typeinf_lattice(interp)
-    for i in 1:length(argtypes)
-        a = argtypes[i]
-        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && fargs !== nothing
-            is_const_prop_profitable_conditional(a, fargs, sv) || return false
-        else
-            is_forwardable_argtype(𝕃ᵢ, widenslotwrapper(a)) || return false
-        end
-    end
-    return true
-end
-
-function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method::Method)
-    return is_aggressive_constprop(method) ||
-           InferenceParams(interp).aggressive_constant_propagation ||
-           istopfunction(f, :getproperty) ||
-           istopfunction(f, :setproperty!)
-end
-
-function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f),
-    arginfo::ArgInfo, nargs::Int, all_overridden::Bool, sv::AbsIntState)
-    argtypes = arginfo.argtypes
-    if nargs > 1
-        𝕃ᵢ = typeinf_lattice(interp)
-        if istopfunction(f, :getindex) || istopfunction(f, :setindex!)
-            arrty = argtypes[2]
-            # don't propagate constant index into indexing of non-constant array
-            if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
-                # For static arrays, allow the constprop if we could possibly
-                # deduce nothrow as a result.
-                still_nothrow = isa(sv, InferenceState) ? is_nothrow(sv.ipo_effects) : false
-                if !still_nothrow || ismutabletype(arrty)
-                    return false
-                end
-            elseif ⊑(𝕃ᵢ, arrty, Array)
-                return false
-            end
-        elseif istopfunction(f, :iterate)
-            itrty = argtypes[2]
-            if ⊑(𝕃ᵢ, itrty, Array)
-                return false
-            end
-        end
-    end
-    if !all_overridden && (istopfunction(f, :+) || istopfunction(f, :-) || istopfunction(f, :*) ||
-                           istopfunction(f, :(==)) || istopfunction(f, :!=) ||
-                           istopfunction(f, :<=) || istopfunction(f, :>=) || istopfunction(f, :<) || istopfunction(f, :>) ||
-                           istopfunction(f, :<<) || istopfunction(f, :>>))
-        # it is almost useless to inline the op when all the same type,
-        # but highly worthwhile to inline promote of a constant
-        length(argtypes) > 2 || return false
-        t1 = widenconst(argtypes[2])
-        for i in 3:length(argtypes)
-            at = argtypes[i]
-            ty = isvarargtype(at) ? unwraptv(at) : widenconst(at)
-            if ty !== t1
-                return true
-            end
-        end
-        return false
-    end
-    return true
-end
-
-# This is a heuristic to avoid trying to const prop through complicated functions
-# where we would spend a lot of time, but are probably unlikely to get an improved
-# result anyway.
-function const_prop_methodinstance_heuristic(interp::AbstractInterpreter,
-    mi::MethodInstance, arginfo::ArgInfo, sv::AbsIntState)
-    method = mi.def::Method
-    if method.is_for_opaque_closure
-        # Not inlining an opaque closure can be very expensive, so be generous
-        # with the const-prop-ability. It is quite possible that we can't infer
-        # anything at all without const-propping, so the inlining check below
-        # isn't particularly helpful here.
-        return true
-    end
-    # now check if the source of this method instance is inlineable, since the extended type
-    # information we have here would be discarded if it is not inlined into a callee context
-    # (modulo the inferred return type that can be potentially refined)
-    if is_declared_inline(method)
-        # this method is declared as `@inline` and will be inlined
-        return true
-    end
-    flag = get_curr_ssaflag(sv)
-    if is_stmt_inline(flag)
-        # force constant propagation for a call that is going to be inlined
-        # since the inliner will try to find this constant result
-        # if these constant arguments arrive there
-        return true
-    elseif is_stmt_noinline(flag)
-        # this call won't be inlined, thus this constant-prop' will most likely be unfruitful
-        return false
-    else
-        # Peek at the inferred result for the method to determine if the optimizer
-        # was able to cut it down to something simple (inlineable in particular).
-        # If so, there will be a good chance we might be able to const prop
-        # all the way through and learn something new.
-        code = get(code_cache(interp), mi, nothing)
-        if isa(code, CodeInstance)
-            inferred = @atomic :monotonic code.inferred
-            # TODO propagate a specific `CallInfo` that conveys information about this call
-            if inlining_policy(interp, inferred, NoCallInfo(), IR_FLAG_NULL, mi, arginfo.argtypes) !== nothing
-                return true
-            end
-        end
-    end
-    return false # the cache isn't inlineable, so this constant-prop' will most likely be unfruitful
-end
-
-function semi_concrete_eval_call(interp::AbstractInterpreter,
-    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
-    world = frame_world(sv)
-    mi_cache = WorldView(code_cache(interp), world)
-    code = get(mi_cache, mi, nothing)
-    if code !== nothing
-        irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world)
-        if irsv !== nothing
-            irsv.parent = sv
-            rt, nothrow = ir_abstract_constant_propagation(interp, irsv)
-            @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp"
-            if !(isa(rt, Type) && hasintersect(rt, Bool))
-                ir = irsv.ir
-                # TODO (#48913) enable double inlining pass when there are any calls
-                # that are newly resovled by irinterp
-                # state = InliningState(interp)
-                # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv))
-                new_effects = Effects(result.effects; nothrow)
-                return ConstCallResults(rt, SemiConcreteResult(mi, ir, new_effects), new_effects, mi)
-            end
-        end
-    end
-    return nothing
-end
-
-function const_prop_call(interp::AbstractInterpreter,
-    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
-    inf_cache = get_inference_cache(interp)
-    𝕃ᵢ = typeinf_lattice(interp)
-    inf_result = cache_lookup(𝕃ᵢ, mi, arginfo.argtypes, inf_cache)
-    if inf_result === nothing
-        # fresh constant prop'
-        argtypes = has_conditional(𝕃ᵢ, sv) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes)
-        inf_result = InferenceResult(mi, argtypes, typeinf_lattice(interp))
-        if !any(inf_result.overridden_by_const)
-            add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
-            return nothing
-        end
-        frame = InferenceState(inf_result, #=cache=#:local, interp)
-        if frame === nothing
-            add_remark!(interp, sv, "[constprop] Could not retrieve the source")
-            return nothing # this is probably a bad generated function (unsound), but just ignore it
-        end
-        frame.parent = sv
-        if !typeinf(interp, frame)
-            add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
-            return nothing
-        end
-        @assert inf_result.result !== nothing
-    else
-        # found the cache for this constant prop'
-        if inf_result.result === nothing
-            add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle")
-            return nothing
-        end
-    end
-    return ConstCallResults(inf_result.result, ConstPropResult(inf_result), inf_result.ipo_effects, mi)
-end
-
-# TODO implement MustAlias forwarding
-
-struct ConditionalArgtypes <: ForwardableArgtypes
-    arginfo::ArgInfo
-    sv::InferenceState
-end
-
-"""
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
-                            conditional_argtypes::ConditionalArgtypes)
-
-The implementation is able to forward `Conditional` of `conditional_argtypes`,
-as well as the other general extended lattice inforamtion.
-"""
-function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
-                                 conditional_argtypes::ConditionalArgtypes)
-    (; arginfo, sv) = conditional_argtypes
-    (; fargs, argtypes) = arginfo
-    given_argtypes = Vector{Any}(undef, length(argtypes))
-    def = linfo.def::Method
-    nargs = Int(def.nargs)
-    cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo)
-    local condargs = nothing
-    for i in 1:length(argtypes)
-        argtype = argtypes[i]
-        # forward `Conditional` if it conveys a constraint on any other argument
-        if isa(argtype, Conditional) && fargs !== nothing
-            cnd = argtype
-            slotid = find_constrained_arg(cnd, fargs, sv)
-            if slotid !== nothing
-                # using union-split signature, we may be able to narrow down `Conditional`
-                sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid])
-                thentype = tmeet(cnd.thentype, sigt)
-                elsetype = tmeet(cnd.elsetype, sigt)
-                if thentype === Bottom && elsetype === Bottom
-                    # we accidentally proved this method match is impossible
-                    # TODO bail out here immediately rather than just propagating Bottom ?
-                    given_argtypes[i] = Bottom
-                else
-                    if condargs === nothing
-                        condargs = Tuple{Int,Int}[]
-                    end
-                    push!(condargs, (slotid, i))
-                    given_argtypes[i] = Conditional(slotid, thentype, elsetype)
-                end
-                continue
-            end
-        end
-        given_argtypes[i] = widenslotwrapper(argtype)
-    end
-    if condargs !== nothing
-        given_argtypes = let condargs=condargs
-            va_process_argtypes(𝕃, given_argtypes, linfo) do isva_given_argtypes::Vector{Any}, last::Int
-                # invalidate `Conditional` imposed on varargs
-                for (slotid, i) in condargs
-                    if slotid ≥ last && (1 ≤ i ≤ length(isva_given_argtypes)) # `Conditional` is already widened to vararg-tuple otherwise
-                        isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i])
-                    end
-                end
-            end
-        end
-    else
-        given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo)
-    end
-    return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes)
-end
-
-# This is only for use with `Conditional`.
-# In general, usage of this is wrong.
-function ssa_def_slot(@nospecialize(arg), sv::InferenceState)
-    code = sv.src.code
-    init = sv.currpc
-    while isa(arg, SSAValue)
-        init = arg.id
-        arg = code[init]
-    end
-    if arg isa SlotNumber
-        # found this kind of pattern:
-        # %init = SlotNumber(x)
-        # [...]
-        # goto if not isa(%init, T)
-        # now conservatively make sure there isn't potentially another conflicting assignment
-        # to the same slot between the def and usage
-        # we can assume the IR is sorted, since the front-end only creates SSA values in order
-        for i = init:(sv.currpc-1)
-            e = code[i]
-            if isexpr(e, :(=)) && e.args[1] === arg
-                return nothing
-            end
-        end
-    else
-        # there might still be the following kind of pattern (see #45499):
-        # %init = ...
-        # [...]
-        # SlotNumber(x) = %init
-        # [...]
-        # goto if not isa(%init, T)
-        # let's check if there is a slot assigned to the def SSA value but also there isn't
-        # any potentially conflicting assignment to the same slot
-        arg = nothing
-        def = SSAValue(init)
-        for i = (init+1):(sv.currpc-1)
-            e = code[i]
-            if isexpr(e, :(=))
-                lhs = e.args[1]
-                if isa(lhs, SlotNumber)
-                    lhs === arg && return nothing
-                    rhs = e.args[2]
-                    if rhs === def
-                        arg = lhs
-                    end
-                end
-            end
-        end
-    end
-    return arg
-end
-
-struct AbstractIterationResult
-    cti::Vector{Any}
-    info::MaybeAbstractIterationInfo
-    ai_effects::Effects
-end
-AbstractIterationResult(cti::Vector{Any}, info::MaybeAbstractIterationInfo) =
-    AbstractIterationResult(cti, info, EFFECTS_TOTAL)
-
-# `typ` is the inferred type for expression `arg`.
-# if the expression constructs a container (e.g. `svec(x,y,z)`),
-# refine its type to an array of element types.
-# Union of Tuples of the same length is converted to Tuple of Unions.
-# returns an array of types
-function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(typ),
-                                sv::AbsIntState)
-    if isa(typ, PartialStruct)
-        widet = typ.typ
-        if isa(widet, DataType)
-            if widet.name === Tuple.name
-                return AbstractIterationResult(typ.fields, nothing)
-            elseif widet.name === _NAMEDTUPLE_NAME
-                return AbstractIterationResult(typ.fields, nothing)
-            end
-        end
-    end
-
-    if isa(typ, Const)
-        val = typ.val
-        if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple)
-            return AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing) # avoid making a tuple Generator here!
-        end
-    end
-
-    tti0 = widenconst(typ)
-    tti = unwrap_unionall(tti0)
-    if isa(tti, DataType) && tti.name === _NAMEDTUPLE_NAME
-        # A NamedTuple iteration is the same as the iteration of its Tuple parameter:
-        # compute a new `tti == unwrap_unionall(tti0)` based on that Tuple type
-        tti = unwraptv(tti.parameters[2])
-        tti0 = rewrap_unionall(tti, tti0)
-    end
-    if isa(tti, Union)
-        utis = uniontypes(tti)
-        if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
-            return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
-        end
-        ltp = length((utis[1]::DataType).parameters)
-        for t in utis
-            if length((t::DataType).parameters) != ltp
-                return AbstractIterationResult(Any[Vararg{Any}], nothing)
-            end
-        end
-        result = Any[ Union{} for _ in 1:ltp ]
-        for t in utis
-            tps = (t::DataType).parameters
-            _all(valid_as_lattice, tps) || continue
-            for j in 1:ltp
-                result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
-            end
-        end
-        return AbstractIterationResult(result, nothing)
-    elseif tti0 <: Tuple
-        if isa(tti0, DataType)
-            return AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing)
-        elseif !isa(tti, DataType)
-            return AbstractIterationResult(Any[Vararg{Any}], nothing)
-        else
-            len = length(tti.parameters)
-            last = tti.parameters[len]
-            va = isvarargtype(last)
-            elts = Any[ fieldtype(tti0, i) for i = 1:len ]
-            if va
-                if elts[len] === Union{}
-                    pop!(elts)
-                else
-                    elts[len] = Vararg{elts[len]}
-                end
-            end
-            return AbstractIterationResult(elts, nothing)
-        end
-    elseif tti0 === SimpleVector
-        return AbstractIterationResult(Any[Vararg{Any}], nothing)
-    elseif tti0 === Any
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
-    elseif tti0 <: Array
-        if eltype(tti0) === Union{}
-            return AbstractIterationResult(Any[], nothing)
-        end
-        return AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing)
-    else
-        return abstract_iteration(interp, itft, typ, sv)
-    end
-end
-
-# simulate iteration protocol on container type up to fixpoint
-function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(itertype), sv::AbsIntState)
-    if isa(itft, Const)
-        iteratef = itft.val
-    else
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
-    end
-    @assert !isvarargtype(itertype)
-    call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)
-    stateordonet = call.rt
-    info = call.info
-    # Return Bottom if this is not an iterator.
-    # WARNING: Changes to the iteration protocol must be reflected here,
-    # this is not just an optimization.
-    # TODO: this doesn't realize that Array, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
-    stateordonet === Bottom && return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, call.effects, info)], true))
-    valtype = statetype = Bottom
-    ret = Any[]
-    calls = CallMeta[call]
-    stateordonet_widened = widenconst(stateordonet)
-    𝕃ᵢ = typeinf_lattice(interp)
-
-    # Try to unroll the iteration up to max_tuple_splat, which covers any finite
-    # length iterators, or interesting prefix
-    while true
-        if stateordonet_widened === Nothing
-            return AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
-        end
-        if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
-            break
-        end
-        if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
-            break
-        end
-        nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
-        # If there's no new information in this statetype, don't bother continuing,
-        # the iterator won't be finite.
-        if ⊑(𝕃ᵢ, nstatetype, statetype)
-            return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
-        end
-        valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
-        push!(ret, valtype)
-        statetype = nstatetype
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        stateordonet = call.rt
-        stateordonet_widened = widenconst(stateordonet)
-        push!(calls, call)
-    end
-    # From here on, we start asking for results on the widened types, rather than
-    # the precise (potentially const) state type
-    # statetype and valtype are reinitialized in the first iteration below from the
-    # (widened) stateordonet, which has not yet been fully analyzed in the loop above
-    valtype = statetype = Bottom
-    may_have_terminated = Nothing <: stateordonet_widened
-    while valtype !== Any
-        nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
-        if nounion !== Union{} && !isa(nounion, DataType)
-            # nounion is of a type we cannot handle
-            valtype = Any
-            break
-        end
-        if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
-            # reached a fixpoint or iterator failed/gave invalid answer
-            if !hasintersect(stateordonet_widened, Nothing)
-                # ... but cannot terminate
-                if !may_have_terminated
-                    #  ... and cannot have terminated prior to this loop
-                    return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
-                else
-                    # iterator may have terminated prior to this loop, but not during it
-                    valtype = Bottom
-                end
-            end
-            break
-        end
-        valtype = tmerge(valtype, nounion.parameters[1])
-        statetype = tmerge(statetype, nounion.parameters[2])
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        push!(calls, call)
-        stateordonet = call.rt
-        stateordonet_widened = widenconst(stateordonet)
-    end
-    if valtype !== Union{}
-        push!(ret, Vararg{valtype})
-    end
-    return AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
-end
-
-# do apply(af, fargs...), where af is a function value
-function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo,
-                        sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv))
-    itft = argtype_by_index(argtypes, 2)
-    aft = argtype_by_index(argtypes, 3)
-    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    aargtypes = argtype_tail(argtypes, 4)
-    aftw = widenconst(aft)
-    if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
-        if !isconcretetype(aftw) || (aftw <: Builtin)
-            add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
-            # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
-            # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
-            return CallMeta(Any, Effects(), NoCallInfo())
-        end
-    end
-    res = Union{}
-    nargs = length(aargtypes)
-    splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum
-    ctypes = [Any[aft]]
-    infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
-    effects = EFFECTS_TOTAL
-    for i = 1:nargs
-        ctypes´ = Vector{Any}[]
-        infos′ = Vector{MaybeAbstractIterationInfo}[]
-        for ti in (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
-            if !isvarargtype(ti)
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, ti, sv)
-            else
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, unwrapva(ti), sv)
-                # We can't represent a repeating sequence of the same types,
-                # so tmerge everything together to get one type that represents
-                # everything.
-                argt = cti[end]
-                if isvarargtype(argt)
-                    argt = unwrapva(argt)
-                end
-                for i in 1:(length(cti)-1)
-                    argt = tmerge(argt, cti[i])
-                end
-                cti = Any[Vararg{argt}]
-            end
-            effects = merge_effects(effects, ai_effects)
-            if info !== nothing
-                for call in info.each
-                    effects = merge_effects(effects, call.effects)
-                end
-            end
-            if any(@nospecialize(t) -> t === Bottom, cti)
-                continue
-            end
-            for j = 1:length(ctypes)
-                ct = ctypes[j]::Vector{Any}
-                if isvarargtype(ct[end])
-                    # This is vararg, we're not gonna be able to do any inlining,
-                    # drop the info
-                    info = nothing
-                    tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
-                    push!(ctypes´, push!(ct[1:(end - 1)], tail))
-                else
-                    push!(ctypes´, append!(ct[:], cti))
-                end
-                push!(infos′, push!(copy(infos[j]), info))
-            end
-        end
-        ctypes = ctypes´
-        infos = infos′
-    end
-    retinfos = ApplyCallInfo[]
-    retinfo = UnionSplitApplyCallInfo(retinfos)
-    napplicable = length(ctypes)
-    seen = 0
-    for i = 1:napplicable
-        ct = ctypes[i]
-        arginfo = infos[i]
-        lct = length(ct)
-        # truncate argument list at the first Vararg
-        for i = 1:lct-1
-            cti = ct[i]
-            if isvarargtype(cti)
-                ct[i] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(i+1):lct])
-                resize!(ct, i)
-                break
-            end
-        end
-        call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)
-        seen += 1
-        push!(retinfos, ApplyCallInfo(call.info, arginfo))
-        res = tmerge(typeinf_lattice(interp), res, call.rt)
-        effects = merge_effects(effects, call.effects)
-        if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv)
-            add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        res = Any
-        effects = Effects()
-        retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
-    end
-    # TODO: Add a special info type to capture all the iteration info.
-    # For now, only propagate info if we don't also union-split the iteration
-    return CallMeta(res, effects, retinfo)
-end
-
-function argtype_by_index(argtypes::Vector{Any}, i::Int)
-    n = length(argtypes)
-    na = argtypes[n]
-    if isvarargtype(na)
-        return i >= n ? unwrapva(na) : argtypes[i]
-    else
-        return i > n ? Bottom : argtypes[i]
-    end
-end
-
-function argtype_tail(argtypes::Vector{Any}, i::Int)
-    n = length(argtypes)
-    if isvarargtype(argtypes[n]) && i > n
-        i = n
-    end
-    return argtypes[i:n]
-end
-
-struct ConditionalTypes
-    thentype
-    elsetype
-    ConditionalTypes(thentype, elsetype) = (@nospecialize; new(thentype, elsetype))
-end
-
-@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int,
-    @nospecialize(rt))
-    if isa(rt, Const)
-        xt = widenslotwrapper(xt)
-        if rt.val === false
-            return ConditionalTypes(Bottom, xt)
-        elseif rt.val === true
-            return ConditionalTypes(xt, Bottom)
-        end
-    end
-    return isa_condition(xt, ty, max_union_splitting)
-end
-@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int)
-    tty_ub, isexact_tty = instanceof_tfunc(ty)
-    tty = widenconst(xt)
-    if isexact_tty && !isa(tty_ub, TypeVar)
-        tty_lb = tty_ub # TODO: this would be wrong if !isexact_tty, but instanceof_tfunc doesn't preserve this info
-        if !has_free_typevars(tty_lb) && !has_free_typevars(tty_ub)
-            thentype = typeintersect(tty, tty_ub)
-            if iskindtype(tty_ub) && thentype !== Bottom
-                # `typeintersect` may be unable narrow down `Type`-type
-                thentype = tty_ub
-            end
-            valid_as_lattice(thentype) || (thentype = Bottom)
-            elsetype = typesubtract(tty, tty_lb, max_union_splitting)
-            return ConditionalTypes(thentype, elsetype)
-        end
-    end
-    return nothing
-end
-
-@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int,
-    @nospecialize(rt))
-    thentype = c
-    elsetype = widenslotwrapper(xt)
-    if rt === Const(false)
-        thentype = Bottom
-    elseif rt === Const(true)
-        elsetype = Bottom
-    elseif elsetype isa Type && isdefined(typeof(c.val), :instance) # can only widen a if it is a singleton
-        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
-    end
-    return ConditionalTypes(thentype, elsetype)
-end
-@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int)
-    thentype = c
-    elsetype = widenslotwrapper(xt)
-    if elsetype isa Type && issingletontype(typeof(c.val)) # can only widen a if it is a singleton
-        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
-    end
-    return ConditionalTypes(thentype, elsetype)
-end
-
-function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo,
-                               sv::AbsIntState)
-    @nospecialize f
-    la = length(argtypes)
-    𝕃ᵢ = typeinf_lattice(interp)
-    ⊑ᵢ = ⊑(𝕃ᵢ)
-    if has_conditional(𝕃ᵢ, sv) && f === Core.ifelse && fargs isa Vector{Any} && la == 4
-        cnd = argtypes[2]
-        if isa(cnd, Conditional)
-            newcnd = widenconditional(cnd)
-            tx = argtypes[3]
-            ty = argtypes[4]
-            if isa(newcnd, Const)
-                # if `cnd` is constant, we should just respect its constantness to keep inference accuracy
-                return newcnd.val::Bool ? tx : ty
-            else
-                # try to simulate this as a real conditional (`cnd ? x : y`), so that the penalty for using `ifelse` instead isn't too high
-                a = ssa_def_slot(fargs[3], sv)
-                b = ssa_def_slot(fargs[4], sv)
-                if isa(a, SlotNumber) && cnd.slot == slot_id(a)
-                    tx = (cnd.thentype ⊑ᵢ tx ? cnd.thentype : tmeet(𝕃ᵢ, tx, widenconst(cnd.thentype)))
-                end
-                if isa(b, SlotNumber) && cnd.slot == slot_id(b)
-                    ty = (cnd.elsetype ⊑ᵢ ty ? cnd.elsetype : tmeet(𝕃ᵢ, ty, widenconst(cnd.elsetype)))
-                end
-                return tmerge(𝕃ᵢ, tx, ty)
-            end
-        end
-    end
-    rt = builtin_tfunction(interp, f, argtypes[2:end], sv)
-    if has_mustalias(𝕃ᵢ) && f === getfield && isa(fargs, Vector{Any}) && la ≥ 3
-        a3 = argtypes[3]
-        if isa(a3, Const)
-            if rt !== Bottom && !isalreadyconst(rt)
-                var = fargs[2]
-                if isa(var, SlotNumber)
-                    vartyp = widenslotwrapper(argtypes[2])
-                    fldidx = maybe_const_fldidx(vartyp, a3.val)
-                    if fldidx !== nothing
-                        # wrap this aliasable field into `MustAlias` for possible constraint propagations
-                        return MustAlias(var, vartyp, fldidx, rt)
-                    end
-                end
-            end
-        end
-    elseif has_conditional(𝕃ᵢ, sv) && (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
-        # perform very limited back-propagation of type information for `is` and `isa`
-        if f === isa
-            # try splitting value argument, based on types
-            a = ssa_def_slot(fargs[2], sv)
-            a2 = argtypes[2]
-            a3 = argtypes[3]
-            if isa(a, SlotNumber)
-                cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting, rt)
-                if cndt !== nothing
-                    return Conditional(a, cndt.thentype, cndt.elsetype)
-                end
-            end
-            if isa(a2, MustAlias)
-                if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
-                    cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting)
-                    if cndt !== nothing
-                        return form_mustalias_conditional(a2, cndt.thentype, cndt.elsetype)
-                    end
-                end
-            end
-            # try splitting type argument, based on value
-            if isdispatchelem(widenconst(a2)) && a3 isa Union && !has_free_typevars(a3) && !isa(rt, Const)
-                b = ssa_def_slot(fargs[3], sv)
-                if isa(b, SlotNumber)
-                    # !(x isa T) implies !(Type{a2} <: T)
-                    # TODO: complete splitting, based on which portions of the Union a3 for which isa_tfunc returns Const(true) or Const(false) instead of Bool
-                    elsetype = typesubtract(a3, Type{widenconst(a2)}, InferenceParams(interp).max_union_splitting)
-                    return Conditional(b, a3, elsetype)
-                end
-            end
-        elseif f === (===)
-            a = ssa_def_slot(fargs[2], sv)
-            b = ssa_def_slot(fargs[3], sv)
-            aty = argtypes[2]
-            bty = argtypes[3]
-            # if doing a comparison to a singleton, consider returning a `Conditional` instead
-            if isa(aty, Const)
-                if isa(b, SlotNumber)
-                    cndt = egal_condition(aty, bty, InferenceParams(interp).max_union_splitting, rt)
-                    return Conditional(b, cndt.thentype, cndt.elsetype)
-                elseif isa(bty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
-                    cndt = egal_condition(aty, bty.fldtyp, InferenceParams(interp).max_union_splitting)
-                    return form_mustalias_conditional(bty, cndt.thentype, cndt.elsetype)
-                end
-            elseif isa(bty, Const)
-                if isa(a, SlotNumber)
-                    cndt = egal_condition(bty, aty, InferenceParams(interp).max_union_splitting, rt)
-                    return Conditional(a, cndt.thentype, cndt.elsetype)
-                elseif isa(aty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
-                    cndt = egal_condition(bty, aty.fldtyp, InferenceParams(interp).max_union_splitting)
-                    return form_mustalias_conditional(aty, cndt.thentype, cndt.elsetype)
-                end
-            end
-            # TODO enable multiple constraints propagation here, there are two possible improvements:
-            # 1. propagate constraints for both lhs and rhs
-            # 2. we can propagate both constraints on aliased fields and slots
-            # As for 2, for now, we prioritize constraints on aliased fields, since currently
-            # different slots that represent the same object can't share same field constraint,
-            # and thus binding `MustAlias` to the other slot is less likely useful
-            if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
-                if isa(bty, MustAlias)
-                    thentype = widenslotwrapper(aty)
-                    elsetype = bty.fldtyp
-                    if thentype ⊏ elsetype
-                        return form_mustalias_conditional(bty, thentype, elsetype)
-                    end
-                elseif isa(aty, MustAlias)
-                    thentype = widenslotwrapper(bty)
-                    elsetype = aty.fldtyp
-                    if thentype ⊏ elsetype
-                        return form_mustalias_conditional(aty, thentype, elsetype)
-                    end
-                end
-            end
-            # narrow the lattice slightly (noting the dependency on one of the slots), to promote more effective smerge
-            if isa(b, SlotNumber)
-                thentype = rt === Const(false) ? Bottom : widenslotwrapper(bty)
-                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(bty)
-                return Conditional(b, thentype, elsetype)
-            elseif isa(a, SlotNumber)
-                thentype = rt === Const(false) ? Bottom : widenslotwrapper(aty)
-                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(aty)
-                return Conditional(a, thentype, elsetype)
-            end
-        elseif f === Core.Compiler.not_int
-            aty = argtypes[2]
-            if isa(aty, Conditional)
-                thentype = rt === Const(false) ? Bottom : aty.elsetype
-                elsetype = rt === Const(true)  ? Bottom : aty.thentype
-                return Conditional(aty.slot, thentype, elsetype)
-            end
-        elseif f === isdefined
-            uty = argtypes[2]
-            a = ssa_def_slot(fargs[2], sv)
-            if isa(uty, Union) && isa(a, SlotNumber)
-                fld = argtypes[3]
-                thentype = Bottom
-                elsetype = Bottom
-                for ty in uniontypes(uty)
-                    cnd = isdefined_tfunc(𝕃ᵢ, ty, fld)
-                    if isa(cnd, Const)
-                        if cnd.val::Bool
-                            thentype = tmerge(thentype, ty)
-                        else
-                            elsetype = tmerge(elsetype, ty)
-                        end
-                    else
-                        thentype = tmerge(thentype, ty)
-                        elsetype = tmerge(elsetype, ty)
-                    end
-                end
-                return Conditional(a, thentype, elsetype)
-            end
-        end
-    end
-    @assert !isa(rt, TypeVar) "unhandled TypeVar"
-    return rt
-end
-
-function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{Any})
-    if length(argtypes) == 3
-        canconst = true
-        a2 = argtypes[2]
-        a3 = argtypes[3]
-        ⊑ᵢ = ⊑(typeinf_lattice(interp))
-        nothrow = a2 ⊑ᵢ TypeVar && (a3 ⊑ᵢ Type || a3 ⊑ᵢ TypeVar)
-        if isa(a3, Const)
-            body = a3.val
-        elseif isType(a3)
-            body = a3.parameters[1]
-            canconst = false
-        else
-            return CallMeta(Any, Effects(EFFECTS_TOTAL; nothrow), NoCallInfo())
-        end
-        if !(isa(body, Type) || isa(body, TypeVar))
-            return CallMeta(Any, EFFECTS_THROWS, NoCallInfo())
-        end
-        if has_free_typevars(body)
-            if isa(a2, Const)
-                tv = a2.val
-            elseif isa(a2, PartialTypeVar)
-                tv = a2.tv
-                canconst = false
-            else
-                return CallMeta(Any, EFFECTS_THROWS, NoCallInfo())
-            end
-            isa(tv, TypeVar) || return CallMeta(Any, EFFECTS_THROWS, NoCallInfo())
-            body = UnionAll(tv, body)
-        end
-        ret = canconst ? Const(body) : Type{body}
-        return CallMeta(ret, Effects(EFFECTS_TOTAL; nothrow), NoCallInfo())
-    end
-    return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-end
-
-function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::AbsIntState)
-    ft′ = argtype_by_index(argtypes, 2)
-    ft = widenconst(ft′)
-    ft === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3))
-    isexact || return CallMeta(Any, Effects(), NoCallInfo())
-    unwrapped = unwrap_unionall(types)
-    if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
-        return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    end
-    argtype = argtypes_to_type(argtype_tail(argtypes, 4))
-    nargtype = typeintersect(types, argtype)
-    nargtype === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    nargtype isa DataType || return CallMeta(Any, Effects(), NoCallInfo()) # other cases are not implemented below
-    isdispatchelem(ft) || return CallMeta(Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
-    ft = ft::DataType
-    lookupsig = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
-    nargtype = Tuple{ft, nargtype.parameters...}
-    argtype = Tuple{ft, argtype.parameters...}
-    match, valid_worlds, overlayed = findsup(lookupsig, method_table(interp))
-    match === nothing && return CallMeta(Any, Effects(), NoCallInfo())
-    update_valid_age!(sv, valid_worlds)
-    method = match.method
-    tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
-    ti = tienv[1]; env = tienv[2]::SimpleVector
-    result = abstract_call_method(interp, method, ti, env, false, si, sv)
-    (; rt, edge, effects) = result
-    match = MethodMatch(ti, env, method, argtype <: method.sig)
-    res = nothing
-    sig = match.spec_types
-    argtypes′ = invoke_rewrite(argtypes)
-    fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
-    arginfo = ArgInfo(fargs′, argtypes′)
-    # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
-    # for i in 1:length(argtypes′)
-    #     t, a = ti.parameters[i], argtypes′[i]
-    #     argtypes′[i] = t ⊑ a ? t : a
-    # end
-    𝕃ₚ = ipo_lattice(interp)
-    f = overlayed ? nothing : singleton_type(ft′)
-    invokecall = InvokeCall(types, lookupsig)
-    const_call_result = abstract_call_method_with_const_args(interp,
-        result, f, arginfo, si, match, sv, invokecall)
-    const_result = nothing
-    if const_call_result !== nothing
-        if ⊑(𝕃ₚ, const_call_result.rt, rt)
-            (; rt, effects, const_result, edge) = const_call_result
-        end
-    end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
-    effects = Effects(effects; nonoverlayed=!overlayed)
-    info = InvokeCallInfo(match, const_result)
-    edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
-    return CallMeta(rt, effects, info)
-end
-
-function invoke_rewrite(xs::Vector{Any})
-    x0 = xs[2]
-    newxs = xs[3:end]
-    newxs[1] = x0
-    return newxs
-end
-
-function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
-    if length(argtypes) == 3
-        finalizer_argvec = Any[argtypes[2], argtypes[3]]
-        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)
-        return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects))
-    end
-    return CallMeta(Nothing, Effects(), NoCallInfo())
-end
-
-# call where the function is known exactly
-function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
-        arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
-        max_methods::Int = get_max_methods(interp, f, sv))
-    (; fargs, argtypes) = arginfo
-    la = length(argtypes)
-
-    𝕃ᵢ = typeinf_lattice(interp)
-    if isa(f, Builtin)
-        if f === _apply_iterate
-            return abstract_apply(interp, argtypes, si, sv, max_methods)
-        elseif f === invoke
-            return abstract_invoke(interp, arginfo, si, sv)
-        elseif f === modifyfield!
-            return abstract_modifyfield!(interp, argtypes, si, sv)
-        elseif f === Core.finalizer
-            return abstract_finalizer(interp, argtypes, sv)
-        elseif f === applicable
-            return abstract_applicable(interp, argtypes, sv, max_methods)
-        end
-        rt = abstract_call_builtin(interp, f, arginfo, sv)
-        effects = builtin_effects(𝕃ᵢ, f, arginfo, rt)
-        if f === getfield && (fargs !== nothing && isexpr(fargs[end], :boundscheck)) && !is_nothrow(effects) && isa(sv, InferenceState)
-            # As a special case, we delayed tainting `noinbounds` for getfield calls in case we can prove
-            # in-boundedness indepedently. Here we need to put that back in other cases.
-            # N.B.: This isn't about the effects of the call itself, but a delayed contribution of the :boundscheck
-            # statement, so we need to merge this directly into sv, rather than modifying thte effects.
-            merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; noinbounds=false,
-                consistent = (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0 ? ALWAYS_FALSE : ALWAYS_TRUE))
-        end
-        return CallMeta(rt, effects, NoCallInfo())
-    elseif isa(f, Core.OpaqueClosure)
-        # calling an OpaqueClosure about which we have no information returns no information
-        return CallMeta(typeof(f).parameters[2], Effects(), NoCallInfo())
-    elseif f === TypeVar
-        # Manually look through the definition of TypeVar to
-        # make sure to be able to get `PartialTypeVar`s out.
-        (la < 2 || la > 4) && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-        n = argtypes[2]
-        ub_var = Const(Any)
-        lb_var = Const(Union{})
-        if la == 4
-            ub_var = argtypes[4]
-            lb_var = argtypes[3]
-        elseif la == 3
-            ub_var = argtypes[3]
-        end
-        pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
-        effects = builtin_effects(𝕃ᵢ, Core._typevar, ArgInfo(nothing,
-            Any[Const(Core._typevar), n, lb_var, ub_var]), pT)
-        return CallMeta(pT, effects, NoCallInfo())
-    elseif f === UnionAll
-        return abstract_call_unionall(interp, argtypes)
-    elseif f === Tuple && la == 2
-        aty = argtypes[2]
-        ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
-        if !isconcretetype(ty)
-            return CallMeta(Tuple, EFFECTS_UNKNOWN, NoCallInfo())
-        end
-    elseif is_return_type(f)
-        return return_type_tfunc(interp, argtypes, si, sv)
-    elseif la == 2 && istopfunction(f, :!)
-        # handle Conditional propagation through !Bool
-        aty = argtypes[2]
-        if isa(aty, Conditional)
-            call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Bool]), si, Tuple{typeof(f), Bool}, sv, max_methods) # make sure we've inferred `!(::Bool)`
-            return CallMeta(Conditional(aty.slot, aty.elsetype, aty.thentype), call.effects, call.info)
-        end
-    elseif la == 3 && istopfunction(f, :!==)
-        # mark !== as exactly a negated call to ===
-        rty = abstract_call_known(interp, (===), arginfo, si, sv, max_methods).rt
-        if isa(rty, Conditional)
-            return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), EFFECTS_TOTAL, NoCallInfo()) # swap if-else
-        elseif isa(rty, Const)
-            return CallMeta(Const(rty.val === false), EFFECTS_TOTAL, MethodResultPure())
-        end
-        return CallMeta(rty, EFFECTS_TOTAL, NoCallInfo())
-    elseif la == 3 && istopfunction(f, :(>:))
-        # mark issupertype as a exact alias for issubtype
-        # swap T1 and T2 arguments and call <:
-        if fargs !== nothing && length(fargs) == 3
-            fargs = Any[<:, fargs[3], fargs[2]]
-        else
-            fargs = nothing
-        end
-        argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
-        return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
-    elseif la == 2 && istopfunction(f, :typename)
-        return CallMeta(typename_static(argtypes[2]), EFFECTS_TOTAL, MethodResultPure())
-    elseif f === Core._hasmethod
-        return _hasmethod_tfunc(interp, argtypes, sv)
-    end
-    atype = argtypes_to_type(argtypes)
-    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)
-end
-
-function abstract_call_opaque_closure(interp::AbstractInterpreter,
-    closure::PartialOpaque, arginfo::ArgInfo, si::StmtInfo, sv::InferenceState, check::Bool=true)
-    sig = argtypes_to_type(arginfo.argtypes)
-    result = abstract_call_method(interp, closure.source::Method, sig, Core.svec(), false, si, sv)
-    (; rt, edge, effects) = result
-    tt = closure.typ
-    sigT = (unwrap_unionall(tt)::DataType).parameters[1]
-    match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
-    𝕃ₚ = ipo_lattice(interp)
-    ⊑ₚ = ⊑(𝕃ₚ)
-    const_result = nothing
-    if !result.edgecycle
-        const_call_result = abstract_call_method_with_const_args(interp, result,
-            nothing, arginfo, si, match, sv)
-        if const_call_result !== nothing
-            if const_call_result.rt ⊑ₚ rt
-                (; rt, effects, const_result, edge) = const_call_result
-            end
-        end
-    end
-    if check # analyze implicit type asserts on argument and return type
-        ftt = closure.typ
-        (aty, rty) = (unwrap_unionall(ftt)::DataType).parameters
-        rty = rewrap_unionall(rty isa TypeVar ? rty.lb : rty, ftt)
-        if !(rt ⊑ₚ rty && tuple_tfunc(𝕃ₚ, arginfo.argtypes[2:end]) ⊑ₚ rewrap_unionall(aty, ftt))
-            effects = Effects(effects; nothrow=false)
-        end
-    end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
-    info = OpaqueClosureCallInfo(match, const_result)
-    edge !== nothing && add_backedge!(sv, edge)
-    return CallMeta(rt, effects, info)
-end
-
-function most_general_argtypes(closure::PartialOpaque)
-    ret = Any[]
-    cc = widenconst(closure)
-    argt = (unwrap_unionall(cc)::DataType).parameters[1]
-    if !isa(argt, DataType) || argt.name !== typename(Tuple)
-        argt = Tuple
-    end
-    return Any[argt.parameters...]
-end
-
-function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft),
-                               arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
-                               max_methods::Int)
-    if isa(ft, PartialOpaque)
-        newargtypes = copy(arginfo.argtypes)
-        newargtypes[1] = ft.env
-        return abstract_call_opaque_closure(interp,
-            ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true)
-    end
-    wft = widenconst(ft)
-    if hasintersect(wft, Builtin)
-        add_remark!(interp, sv, "Could not identify method table for call")
-        return CallMeta(Any, Effects(), NoCallInfo())
-    elseif hasintersect(wft, Core.OpaqueClosure)
-        uft = unwrap_unionall(wft)
-        if isa(uft, DataType)
-            return CallMeta(rewrap_unionall(uft.parameters[2], wft), Effects(), NoCallInfo())
-        end
-        return CallMeta(Any, Effects(), NoCallInfo())
-    end
-    # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic
-    atype = argtypes_to_type(arginfo.argtypes)
-    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)
-end
-
-# call where the function is any lattice element
-function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo,
-                       sv::AbsIntState, max_methods::Int=typemin(Int))
-    ft = widenslotwrapper(arginfo.argtypes[1])
-    f = singleton_type(ft)
-    if f === nothing
-        max_methods = max_methods == typemin(Int) ? get_max_methods(interp, sv) : max_methods
-        return abstract_call_unknown(interp, ft, arginfo, si, sv, max_methods)
-    end
-    max_methods = max_methods == typemin(Int) ? get_max_methods(interp, f, sv) : max_methods
-    return abstract_call_known(interp, f, arginfo, si, sv, max_methods)
-end
-
-function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
-    isref = false
-    if unwrapva(T) === Bottom
-        return Bottom
-    elseif isa(T, Type)
-        if isa(T, DataType) && (T::DataType).name === Ref.body.name
-            isref = true
-            T = T.parameters[1]
-            if isreturn && T === Any
-                return Bottom # a return type of Ref{Any} is invalid
-            end
-        end
-    else
-        return Any
-    end
-    if isa(linfo.def, Method)
-        spsig = linfo.def.sig
-        if isa(spsig, UnionAll)
-            if !isempty(linfo.sparam_vals)
-                sparam_vals = Any[isvarargtype(v) ? TypeVar(:N, Union{}, Any) :
-                                  v for v in  linfo.sparam_vals]
-                T = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), T, spsig, sparam_vals)
-                isref && isreturn && T === Any && return Bottom # catch invalid return Ref{T} where T = Any
-                for v in sparam_vals
-                    if isa(v, TypeVar)
-                        T = UnionAll(v, T)
-                    end
-                end
-            else
-                T = rewrap_unionall(T, spsig)
-            end
-        end
-    end
-    return unwraptv(T)
-end
-
-function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    f = abstract_eval_value(interp, e.args[2], vtypes, sv)
-    # rt = sp_type_rewrap(e.args[3], sv.linfo, true)
-    atv = e.args[4]::SimpleVector
-    at = Vector{Any}(undef, length(atv) + 1)
-    at[1] = f
-    for i = 1:length(atv)
-        at[i + 1] = sp_type_rewrap(at[i], frame_instance(sv), false)
-        at[i + 1] === Bottom && return
-    end
-    # this may be the wrong world for the call,
-    # but some of the result is likely to be valid anyways
-    # and that may help generate better codegen
-    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)
-    nothing
-end
-
-function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    rt = Any
-    head = e.head
-    if head === :static_parameter
-        n = e.args[1]::Int
-        nothrow = false
-        if 1 <= n <= length(sv.sptypes)
-            sp = sv.sptypes[n]
-            rt = sp.typ
-            nothrow = !sp.undef
-        end
-        merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow))
-        return rt
-    elseif head === :boundscheck
-        if isa(sv, InferenceState)
-            stmt = sv.src.code[sv.currpc]
-            if isexpr(stmt, :call)
-                f = abstract_eval_value(interp, stmt.args[1], vtypes, sv)
-                if f isa Const && f.val === getfield
-                    # boundscheck of `getfield` call is analyzed by tfunc potentially without
-                    # tainting :inbounds or :consistent when it's known to be nothrow
-                    @goto delay_effects_analysis
-                end
-            end
-            # If there is no particular `@inbounds` for this function, then we only taint `:noinbounds`,
-            # which will subsequently taint `:consistent`-cy if this function is called from another
-            # function that uses `@inbounds`. However, if this `:boundscheck` is itself within an
-            # `@inbounds` region, its value depends on `--check-bounds`, so we need to taint
-            # `:consistent`-cy here also.
-            merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; noinbounds=false,
-                consistent = (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0 ? ALWAYS_FALSE : ALWAYS_TRUE))
-        end
-        @label delay_effects_analysis
-        rt = Bool
-    elseif head === :inbounds
-        @assert false && "Expected this to have been moved into flags"
-    elseif head === :the_exception
-        merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE))
-    end
-    return rt
-end
-
-function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    if isa(e, QuoteNode)
-        return Const(e.value)
-    elseif isa(e, SSAValue)
-        return abstract_eval_ssavalue(e, sv)
-    elseif isa(e, SlotNumber)
-        if vtypes !== nothing
-            vtyp = vtypes[slot_id(e)]
-            if vtyp.undef
-                merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow=false))
-            end
-            return vtyp.typ
-        end
-        merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow=false))
-        return Any
-    elseif isa(e, Argument)
-        if vtypes !== nothing
-            return vtypes[slot_id(e)].typ
-        else
-            @assert isa(sv, IRInterpretationState)
-            return sv.ir.argtypes[e.n] # TODO frame_argtypes(sv)[e.n] and remove the assertion
-        end
-    elseif isa(e, GlobalRef)
-        return abstract_eval_globalref(interp, e, sv)
-    end
-
-    return Const(e)
-end
-
-function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    if isa(e, Expr)
-        return abstract_eval_value_expr(interp, e, vtypes, sv)
-    else
-        typ = abstract_eval_special_value(interp, e, vtypes, sv)
-        return collect_limitations!(typ, sv)
-    end
-end
-
-function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    n = length(ea)
-    argtypes = Vector{Any}(undef, n)
-    @inbounds for i = 1:n
-        ai = abstract_eval_value(interp, ea[i], vtypes, sv)
-        if ai === Bottom
-            return nothing
-        end
-        argtypes[i] = ai
-    end
-    return argtypes
-end
-
-struct RTEffects
-    rt
-    effects::Effects
-    RTEffects(@nospecialize(rt), effects::Effects) = new(rt, effects)
-end
-
-function mark_curr_effect_flags!(sv::AbsIntState, effects::Effects)
-    if isa(sv, InferenceState)
-        if is_effect_free(effects)
-            add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE)
-        else
-            sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE)
-        end
-        if is_nothrow(effects)
-            add_curr_ssaflag!(sv, IR_FLAG_NOTHROW)
-        else
-            sub_curr_ssaflag!(sv, IR_FLAG_NOTHROW)
-        end
-        if is_consistent(effects)
-            add_curr_ssaflag!(sv, IR_FLAG_CONSISTENT)
-        else
-            sub_curr_ssaflag!(sv, IR_FLAG_CONSISTENT)
-        end
-    end
-end
-
-function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::InferenceState)
-    si = StmtInfo(!call_result_unused(sv, sv.currpc))
-    (; rt, effects, info) = abstract_call(interp, arginfo, si, sv)
-    sv.stmt_info[sv.currpc] = info
-    # mark this call statement as DCE-elgible
-    # TODO better to do this in a single pass based on the `info` object at the end of abstractinterpret?
-    mark_curr_effect_flags!(sv, effects)
-    return RTEffects(rt, effects)
-end
-
-function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
-                            sv::AbsIntState)
-    ea = e.args
-    argtypes = collect_argtypes(interp, ea, vtypes, sv)
-    if argtypes === nothing
-        return RTEffects(Bottom, Effects())
-    end
-    arginfo = ArgInfo(ea, argtypes)
-    return abstract_call(interp, arginfo, sv)
-end
-
-function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
-                                      sv::AbsIntState)
-    effects = Effects()
-    ehead = e.head
-    𝕃ᵢ = typeinf_lattice(interp)
-    ⊑ᵢ = ⊑(𝕃ᵢ)
-    if ehead === :call
-        (; rt, effects) = abstract_eval_call(interp, e, vtypes, sv)
-        t = rt
-    elseif ehead === :new
-        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
-        ut = unwrap_unionall(t)
-        consistent = ALWAYS_FALSE
-        nothrow = false
-        if isa(ut, DataType) && !isabstracttype(ut)
-            ismutable = ismutabletype(ut)
-            fcount = datatype_fieldcount(ut)
-            nargs = length(e.args) - 1
-            if (fcount === nothing || (fcount > nargs && (let t = t
-                    any(i::Int -> !is_undefref_fieldtype(fieldtype(t, i)), (nargs+1):fcount)
-                end)))
-                # allocation with undefined field leads to undefined behavior and should taint `:consistent`-cy
-                consistent = ALWAYS_FALSE
-            elseif ismutable
-                # mutable object isn't `:consistent`, but we can still give the return
-                # type information a chance to refine this `:consistent`-cy later
-                consistent = CONSISTENT_IF_NOTRETURNED
-            else
-                consistent = ALWAYS_TRUE
-            end
-            if isconcretedispatch(t)
-                nothrow = true
-                @assert fcount !== nothing && fcount ≥ nargs "malformed :new expression" # syntactically enforced by the front-end
-                ats = Vector{Any}(undef, nargs)
-                local anyrefine = false
-                local allconst = true
-                for i = 1:nargs
-                    at = widenslotwrapper(abstract_eval_value(interp, e.args[i+1], vtypes, sv))
-                    ft = fieldtype(t, i)
-                    nothrow && (nothrow = at ⊑ᵢ ft)
-                    at = tmeet(𝕃ᵢ, at, ft)
-                    at === Bottom && @goto always_throw
-                    if ismutable && !isconst(t, i)
-                        ats[i] = ft # can't constrain this field (as it may be modified later)
-                        continue
-                    end
-                    allconst &= isa(at, Const)
-                    if !anyrefine
-                        anyrefine = has_nontrivial_extended_info(𝕃ᵢ, at) || # extended lattice information
-                                    ⋤(𝕃ᵢ, at, ft) # just a type-level information, but more precise than the declared type
-                    end
-                    ats[i] = at
-                end
-                # For now, don't allow:
-                # - Const/PartialStruct of mutables (but still allow PartialStruct of mutables
-                #   with `const` fields if anything refined)
-                # - partially initialized Const/PartialStruct
-                if fcount == nargs
-                    if consistent === ALWAYS_TRUE && allconst
-                        argvals = Vector{Any}(undef, nargs)
-                        for j in 1:nargs
-                            argvals[j] = (ats[j]::Const).val
-                        end
-                        t = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), t, argvals, nargs))
-                    elseif anyrefine
-                        t = PartialStruct(t, ats)
-                    end
-                end
-            else
-                t = refine_partial_type(t)
-            end
-        end
-        effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
-    elseif ehead === :splatnew
-        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
-        nothrow = false # TODO: More precision
-        if length(e.args) == 2 && isconcretedispatch(t) && !ismutabletype(t)
-            at = abstract_eval_value(interp, e.args[2], vtypes, sv)
-            n = fieldcount(t)
-            if (isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
-                (let t = t, at = at
-                    all(i::Int->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n)
-                end))
-                nothrow = isexact
-                t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
-            elseif (isa(at, PartialStruct) && at ⊑ᵢ Tuple && n > 0 && n == length(at.fields::Vector{Any}) && !isvarargtype(at.fields[end]) &&
-                    (let t = t, at = at, ⊑ᵢ = ⊑ᵢ
-                        all(i::Int->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n)
-                    end))
-                nothrow = isexact
-                t = PartialStruct(t, at.fields::Vector{Any})
-            end
-        else
-            t = refine_partial_type(t)
-        end
-        consistent = !ismutabletype(t) ? ALWAYS_TRUE : CONSISTENT_IF_NOTRETURNED
-        effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
-    elseif ehead === :new_opaque_closure
-        t = Union{}
-        effects = Effects() # TODO
-        merge_effects!(interp, sv, effects)
-        if length(e.args) >= 4
-            ea = e.args
-            argtypes = collect_argtypes(interp, ea, vtypes, sv)
-            if argtypes === nothing
-                t = Bottom
-            else
-                mi = frame_instance(sv)
-                t = opaque_closure_tfunc(𝕃ᵢ, argtypes[1], argtypes[2], argtypes[3],
-                    argtypes[4], argtypes[5:end], mi)
-                if isa(t, PartialOpaque) && isa(sv, InferenceState) && !call_result_unused(sv, sv.currpc)
-                    # Infer this now so that the specialization is available to
-                    # optimization.
-                    argtypes = most_general_argtypes(t)
-                    pushfirst!(argtypes, t.env)
-                    callinfo = abstract_call_opaque_closure(interp, t,
-                        ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)
-                    sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
-                end
-            end
-        end
-    elseif ehead === :foreigncall
-        (; rt, effects) = abstract_eval_foreigncall(interp, e, vtypes, sv)
-        t = rt
-        mark_curr_effect_flags!(sv, effects)
-    elseif ehead === :cfunction
-        effects = EFFECTS_UNKNOWN
-        t = e.args[1]
-        isa(t, Type) || (t = Any)
-        abstract_eval_cfunction(interp, e, vtypes, sv)
-    elseif ehead === :method
-        t = (length(e.args) == 1) ? Any : Nothing
-        effects = EFFECTS_UNKNOWN
-    elseif ehead === :copyast
-        effects = EFFECTS_UNKNOWN
-        t = abstract_eval_value(interp, e.args[1], vtypes, sv)
-        if t isa Const && t.val isa Expr
-            # `copyast` makes copies of Exprs
-            t = Expr
-        end
-    elseif ehead === :invoke || ehead === :invoke_modify
-        error("type inference data-flow error: tried to double infer a function")
-    elseif ehead === :isdefined
-        sym = e.args[1]
-        t = Bool
-        effects = EFFECTS_TOTAL
-        if isa(sym, SlotNumber) && vtypes !== nothing
-            vtyp = vtypes[slot_id(sym)]
-            if vtyp.typ === Bottom
-                t = Const(false) # never assigned previously
-            elseif !vtyp.undef
-                t = Const(true) # definitely assigned previously
-            end
-        elseif isa(sym, Symbol)
-            if isdefined(frame_module(sv), sym)
-                t = Const(true)
-            elseif InferenceParams(interp).assume_bindings_static
-                t = Const(false)
-            end
-        elseif isa(sym, GlobalRef)
-            if isdefined(sym.mod, sym.name)
-                t = Const(true)
-            elseif InferenceParams(interp).assume_bindings_static
-                t = Const(false)
-            end
-        elseif isexpr(sym, :static_parameter)
-            n = sym.args[1]::Int
-            if 1 <= n <= length(sv.sptypes)
-                sp = sv.sptypes[n]
-                if !sp.undef
-                    t = Const(true)
-                elseif sp.typ === Bottom
-                    t = Const(false)
-                end
-            end
-        end
-    elseif false
-        @label always_throw
-        t = Bottom
-        effects = EFFECTS_THROWS
-    else
-        t = abstract_eval_value_expr(interp, e, vtypes, sv)
-        effects = EFFECTS_TOTAL
-    end
-    return RTEffects(t, effects)
-end
-
-# refine the result of instantiation of partially-known type `t` if some invariant can be assumed
-function refine_partial_type(@nospecialize t)
-    t′ = unwrap_unionall(t)
-    if isa(t′, DataType) && t′.name === _NAMEDTUPLE_NAME && length(t′.parameters) == 2 &&
-        (t′.parameters[1] === () || t′.parameters[2] === Tuple{})
-        # if the first/second parameter of `NamedTuple` is known to be empty,
-        # the second/first argument should also be empty tuple type,
-        # so refine it here
-        return Const(NamedTuple())
-    end
-    return t
-end
-
-function abstract_eval_foreigncall(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    abstract_eval_value(interp, e.args[1], vtypes, sv)
-    mi = frame_instance(sv)
-    t = sp_type_rewrap(e.args[2], mi, true)
-    for i = 3:length(e.args)
-        if abstract_eval_value(interp, e.args[i], vtypes, sv) === Bottom
-            return RTEffects(Bottom, EFFECTS_THROWS)
-        end
-    end
-    effects = foreigncall_effects(e) do @nospecialize x
-        abstract_eval_value(interp, x, vtypes, sv)
-    end
-    cconv = e.args[5]
-    if isa(cconv, QuoteNode) && (v = cconv.value; isa(v, Tuple{Symbol, UInt8}))
-        override = decode_effects_override(v[2])
-        effects = Effects(
-            override.consistent          ? ALWAYS_TRUE : effects.consistent,
-            override.effect_free         ? ALWAYS_TRUE : effects.effect_free,
-            override.nothrow             ? true        : effects.nothrow,
-            override.terminates_globally ? true        : effects.terminates,
-            override.notaskstate         ? true        : effects.notaskstate,
-            override.inaccessiblememonly ? ALWAYS_TRUE : effects.inaccessiblememonly,
-            effects.nonoverlayed,
-            effects.noinbounds)
-    end
-    return RTEffects(t, effects)
-end
-
-function abstract_eval_phi(interp::AbstractInterpreter, phi::PhiNode, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    rt = Union{}
-    for i in 1:length(phi.values)
-        isassigned(phi.values, i) || continue
-        val = phi.values[i]
-        rt = tmerge(typeinf_lattice(interp), rt, abstract_eval_special_value(interp, val, vtypes, sv))
-    end
-    return rt
-end
-
-function stmt_taints_inbounds_consistency(sv::AbsIntState)
-    propagate_inbounds(sv) && return true
-    return (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0
-end
-
-function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
-    if !isa(e, Expr)
-        if isa(e, PhiNode)
-            add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)
-            return abstract_eval_phi(interp, e, vtypes, sv)
-        end
-        return abstract_eval_special_value(interp, e, vtypes, sv)
-    end
-    (; rt, effects) = abstract_eval_statement_expr(interp, e, vtypes, sv)
-    if !effects.noinbounds
-        if !propagate_inbounds(sv)
-            # The callee read our inbounds flag, but unless we propagate inbounds,
-            # we ourselves don't read our parent's inbounds.
-            effects = Effects(effects; noinbounds=true)
-        end
-        if (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0
-            effects = Effects(effects; consistent=ALWAYS_FALSE)
-        end
-    end
-    merge_effects!(interp, sv, effects)
-    e = e::Expr
-    @assert !isa(rt, TypeVar) "unhandled TypeVar"
-    rt = maybe_singleton_const(rt)
-    if !isempty(sv.pclimitations)
-        if rt isa Const || rt === Union{}
-            empty!(sv.pclimitations)
-        else
-            rt = LimitedAccuracy(rt, sv.pclimitations)
-            sv.pclimitations = IdSet{InferenceState}()
-        end
-    end
-    return rt
-end
-
-function isdefined_globalref(g::GlobalRef)
-    return ccall(:jl_globalref_boundp, Cint, (Any,), g) != 0
-end
-
-function abstract_eval_globalref(g::GlobalRef)
-    if isdefined_globalref(g) && isconst(g)
-        return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
-    end
-    ty = ccall(:jl_get_binding_type, Any, (Any, Any), g.mod, g.name)
-    ty === nothing && return Any
-    return ty
-end
-abstract_eval_global(M::Module, s::Symbol) = abstract_eval_globalref(GlobalRef(M, s))
-
-function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, sv::AbsIntState)
-    rt = abstract_eval_globalref(g)
-    consistent = inaccessiblememonly = ALWAYS_FALSE
-    nothrow = false
-    if isa(rt, Const)
-        consistent = ALWAYS_TRUE
-        nothrow = true
-        if is_mutation_free_argtype(rt)
-            inaccessiblememonly = ALWAYS_TRUE
-        end
-    elseif isdefined_globalref(g)
-        nothrow = true
-    elseif InferenceParams(interp).assume_bindings_static
-        consistent = inaccessiblememonly = ALWAYS_TRUE
-        rt = Union{}
-    end
-    merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly))
-    return rt
-end
-
-function handle_global_assignment!(interp::AbstractInterpreter, frame::InferenceState, lhs::GlobalRef, @nospecialize(newty))
-    effect_free = ALWAYS_FALSE
-    nothrow = global_assignment_nothrow(lhs.mod, lhs.name, newty)
-    inaccessiblememonly = ALWAYS_FALSE
-    merge_effects!(interp, frame, Effects(EFFECTS_TOTAL; effect_free, nothrow, inaccessiblememonly))
-    return nothing
-end
-
-abstract_eval_ssavalue(s::SSAValue, sv::InferenceState) = abstract_eval_ssavalue(s, sv.ssavaluetypes)
-
-function abstract_eval_ssavalue(s::SSAValue, ssavaluetypes::Vector{Any})
-    typ = ssavaluetypes[s.id]
-    if typ === NOT_FOUND
-        return Bottom
-    end
-    return typ
-end
-
-struct BestguessInfo{Interp<:AbstractInterpreter}
-    interp::Interp
-    bestguess
-    nargs::Int
-    slottypes::Vector{Any}
-    changes::VarTable
-    function BestguessInfo(interp::Interp, @nospecialize(bestguess), nargs::Int,
-        slottypes::Vector{Any}, changes::VarTable) where Interp<:AbstractInterpreter
-        new{Interp}(interp, bestguess, nargs, slottypes, changes)
-    end
-end
-
-@nospecializeinfer function widenreturn(@nospecialize(rt), info::BestguessInfo)
-    return widenreturn(typeinf_lattice(info.interp), rt, info)
-end
-
-@nospecializeinfer function widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
-    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
-end
-@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
-    return widenreturn_noslotwrapper(widenlattice(𝕃ᵢ), rt, info)
-end
-
-@nospecializeinfer function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::BestguessInfo)
-    if isa(rt, MustAlias)
-        if 1 ≤ rt.slot ≤ info.nargs
-            rt = InterMustAlias(rt)
-        else
-            rt = widenmustalias(rt)
-        end
-    end
-    isa(rt, InterMustAlias) && return rt
-    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
-end
-
-@nospecializeinfer function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::BestguessInfo)
-    ⊑ᵢ = ⊑(𝕃ᵢ)
-    if !(⊑(ipo_lattice(info.interp), info.bestguess, Bool)) || info.bestguess === Bool
-        # give up inter-procedural constraint back-propagation
-        # when tmerge would widen the result anyways (as an optimization)
-        rt = widenconditional(rt)
-    else
-        if isa(rt, Conditional)
-            id = rt.slot
-            if 1 ≤ id ≤ info.nargs
-                old_id_type = widenconditional(info.slottypes[id]) # same as `(states[1]::VarTable)[id].typ`
-                if (!(rt.thentype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.thentype) &&
-                   (!(rt.elsetype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.elsetype)
-                   # discard this `Conditional` since it imposes
-                   # no new constraint on the argument type
-                   # (the caller will recreate it if needed)
-                   rt = widenconditional(rt)
-               end
-            else
-                # discard this `Conditional` imposed on non-call arguments,
-                # since it's not interesting in inter-procedural context;
-                # we may give constraints on other call argument
-                rt = widenconditional(rt)
-            end
-        end
-        if isa(rt, Conditional)
-            rt = InterConditional(rt.slot, rt.thentype, rt.elsetype)
-        elseif is_lattice_bool(𝕃ᵢ, rt)
-            rt = bool_rt_to_conditional(rt, info)
-        end
-    end
-    if isa(rt, Conditional)
-        rt = InterConditional(rt)
-    end
-    isa(rt, InterConditional) && return rt
-    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
-end
-@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo)
-    bestguess = info.bestguess
-    if isa(bestguess, InterConditional)
-        # if the bestguess so far is already `Conditional`, try to convert
-        # this `rt` into `Conditional` on the slot to avoid overapproximation
-        # due to conflict of different slots
-        rt = bool_rt_to_conditional(rt, bestguess.slot, info)
-    else
-        # pick up the first "interesting" slot, convert `rt` to its `Conditional`
-        # TODO: ideally we want `Conditional` and `InterConditional` to convey
-        # constraints on multiple slots
-        for slot_id = 1:info.nargs
-            rt = bool_rt_to_conditional(rt, slot_id, info)
-            rt isa InterConditional && break
-        end
-    end
-    return rt
-end
-@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::BestguessInfo)
-    ⊑ᵢ = ⊑(typeinf_lattice(info.interp))
-    old = info.slottypes[slot_id]
-    new = widenslotwrapper(info.changes[slot_id].typ) # avoid nested conditional
-    if new ⊑ᵢ old && !(old ⊑ᵢ new)
-        if isa(rt, Const)
-            val = rt.val
-            if val === true
-                return InterConditional(slot_id, new, Bottom)
-            elseif val === false
-                return InterConditional(slot_id, Bottom, new)
-            end
-        elseif rt === Bool
-            return InterConditional(slot_id, new, new)
-        end
-    end
-    return rt
-end
-
-@nospecializeinfer function widenreturn(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
-    return widenreturn_partials(𝕃ᵢ, rt, info)
-end
-@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
-    return widenreturn_partials(𝕃ᵢ, rt, info)
-end
-@nospecializeinfer function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
-    if isa(rt, PartialStruct)
-        fields = copy(rt.fields)
-        local anyrefine = false
-        𝕃 = typeinf_lattice(info.interp)
-        for i in 1:length(fields)
-            a = fields[i]
-            a = isvarargtype(a) ? a : widenreturn_noslotwrapper(𝕃, a, info)
-            if !anyrefine
-                # TODO: consider adding && const_prop_profitable(a) here?
-                anyrefine = has_extended_info(a) ||
-                            ⊏(𝕃, a, fieldtype(rt.typ, i))
-            end
-            fields[i] = a
-        end
-        anyrefine && return PartialStruct(rt.typ, fields)
-    end
-    if isa(rt, PartialOpaque)
-        return rt # XXX: this case was missed in #39512
-    end
-    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
-end
-
-@nospecializeinfer function widenreturn(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
-    return widenreturn_consts(rt)
-end
-@nospecializeinfer function widenreturn_noslotwrapper(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
-    return widenreturn_consts(rt)
-end
-@nospecializeinfer function widenreturn_consts(@nospecialize(rt))
-    isa(rt, Const) && return rt
-    return widenconst(rt)
-end
-
-@nospecializeinfer function widenreturn(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
-    return widenconst(rt)
-end
-@nospecializeinfer function widenreturn_noslotwrapper(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
-    return widenconst(rt)
-end
-
-function handle_control_backedge!(interp::AbstractInterpreter, frame::InferenceState, from::Int, to::Int)
-    if from > to
-        if is_effect_overridden(frame, :terminates_locally)
-            # this backedge is known to terminate
-        else
-            merge_effects!(interp, frame, Effects(EFFECTS_TOTAL; terminates=false))
-        end
-    end
-    return nothing
-end
-
-struct BasicStmtChange
-    changes::Union{Nothing,StateUpdate}
-    type::Any # ::Union{Type, Nothing} - `nothing` if this statement may not be used as an SSA Value
-    # TODO effects::Effects
-    BasicStmtChange(changes::Union{Nothing,StateUpdate}, @nospecialize type) = new(changes, type)
-end
-
-@inline function abstract_eval_basic_statement(interp::AbstractInterpreter,
-    @nospecialize(stmt), pc_vartable::VarTable, frame::InferenceState)
-    if isa(stmt, NewvarNode)
-        changes = StateUpdate(stmt.slot, VarState(Bottom, true), pc_vartable, false)
-        return BasicStmtChange(changes, nothing)
-    elseif !isa(stmt, Expr)
-        t = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, t)
-    end
-    changes = nothing
-    stmt = stmt::Expr
-    hd = stmt.head
-    if hd === :(=)
-        t = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame)
-        if t === Bottom
-            return BasicStmtChange(nothing, Bottom)
-        end
-        lhs = stmt.args[1]
-        if isa(lhs, SlotNumber)
-            changes = StateUpdate(lhs, VarState(t, false), pc_vartable, false)
-        elseif isa(lhs, GlobalRef)
-            handle_global_assignment!(interp, frame, lhs, t)
-        elseif !isa(lhs, SSAValue)
-            merge_effects!(interp, frame, EFFECTS_UNKNOWN)
-        end
-        return BasicStmtChange(changes, t)
-    elseif hd === :method
-        fname = stmt.args[1]
-        if isa(fname, SlotNumber)
-            changes = StateUpdate(fname, VarState(Any, false), pc_vartable, false)
-        end
-        return BasicStmtChange(changes, nothing)
-    elseif (hd === :code_coverage_effect || (
-            hd !== :boundscheck && # :boundscheck can be narrowed to Bool
-            is_meta_expr(stmt)))
-        return BasicStmtChange(nothing, Nothing)
-    else
-        t = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, t)
-    end
-end
-
-function update_bbstate!(𝕃ᵢ::AbstractLattice, frame::InferenceState, bb::Int, vartable::VarTable)
-    bbtable = frame.bb_vartables[bb]
-    if bbtable === nothing
-        # if a basic block hasn't been analyzed yet,
-        # we can update its state a bit more aggressively
-        frame.bb_vartables[bb] = copy(vartable)
-        return true
-    else
-        return stupdate!(𝕃ᵢ, bbtable, vartable)
-    end
-end
-
-function init_vartable!(vartable::VarTable, frame::InferenceState)
-    nargtypes = length(frame.result.argtypes)
-    for i = 1:length(vartable)
-        vartable[i] = VarState(Bottom, i > nargtypes)
-    end
-    return vartable
-end
-
-# make as much progress on `frame` as possible (without handling cycles)
-function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
-    @assert !is_inferred(frame)
-    frame.dont_work_on_me = true # mark that this function is currently on the stack
-    W = frame.ip
-    nargs = narguments(frame, #=include_va=#false)
-    slottypes = frame.slottypes
-    ssavaluetypes = frame.ssavaluetypes
-    bbs = frame.cfg.blocks
-    nbbs = length(bbs)
-    𝕃ₚ, 𝕃ᵢ = ipo_lattice(interp), typeinf_lattice(interp)
-
-    currbb = frame.currbb
-    if currbb != 1
-        currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
-    end
-
-    states = frame.bb_vartables
-    currstate = copy(states[currbb]::VarTable)
-    while currbb <= nbbs
-        delete!(W, currbb)
-        bbstart = first(bbs[currbb].stmts)
-        bbend = last(bbs[currbb].stmts)
-
-        for currpc in bbstart:bbend
-            frame.currpc = currpc
-            empty_backedges!(frame, currpc)
-            stmt = frame.src.code[currpc]
-            # If we're at the end of the basic block ...
-            if currpc == bbend
-                # Handle control flow
-                if isa(stmt, GotoNode)
-                    succs = bbs[currbb].succs
-                    @assert length(succs) == 1
-                    nextbb = succs[1]
-                    ssavaluetypes[currpc] = Any
-                    handle_control_backedge!(interp, frame, currpc, stmt.label)
-                    @goto branch
-                elseif isa(stmt, GotoIfNot)
-                    condx = stmt.cond
-                    condt = abstract_eval_value(interp, condx, currstate, frame)
-                    if condt === Bottom
-                        ssavaluetypes[currpc] = Bottom
-                        empty!(frame.pclimitations)
-                        @goto find_next_bb
-                    end
-                    orig_condt = condt
-                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condx, SlotNumber)
-                        # if this non-`Conditional` object is a slot, we form and propagate
-                        # the conditional constraint on it
-                        condt = Conditional(condx, Const(true), Const(false))
-                    end
-                    condval = maybe_extract_const_bool(condt)
-                    if !isempty(frame.pclimitations)
-                        # we can't model the possible effect of control
-                        # dependencies on the return
-                        # directly to all the return values (unless we error first)
-                        condval isa Bool || union!(frame.limitations, frame.pclimitations)
-                        empty!(frame.pclimitations)
-                    end
-                    ssavaluetypes[currpc] = Any
-                    if condval === true
-                        @goto fallthrough
-                    else
-                        succs = bbs[currbb].succs
-                        if length(succs) == 1
-                            @assert condval === false || (stmt.dest === currpc + 1)
-                            nextbb = succs[1]
-                            @goto branch
-                        end
-                        @assert length(succs) == 2
-                        truebb = currbb + 1
-                        falsebb = succs[1] == truebb ? succs[2] : succs[1]
-                        if condval === false
-                            nextbb = falsebb
-                            handle_control_backedge!(interp, frame, currpc, stmt.dest)
-                            @goto branch
-                        else
-                            if !⊑(𝕃ᵢ, orig_condt, Bool)
-                                merge_effects!(interp, frame, EFFECTS_THROWS)
-                                if !hasintersect(widenconst(orig_condt), Bool)
-                                    ssavaluetypes[currpc] = Bottom
-                                    @goto find_next_bb
-                                end
-                            end
-
-                            # We continue with the true branch, but process the false
-                            # branch here.
-                            if isa(condt, Conditional)
-                                else_change = conditional_change(𝕃ᵢ, currstate, condt.elsetype, condt.slot)
-                                if else_change !== nothing
-                                    false_vartable = stoverwrite1!(copy(currstate), else_change)
-                                else
-                                    false_vartable = currstate
-                                end
-                                changed = update_bbstate!(𝕃ᵢ, frame, falsebb, false_vartable)
-                                then_change = conditional_change(𝕃ᵢ, currstate, condt.thentype, condt.slot)
-                                if then_change !== nothing
-                                    stoverwrite1!(currstate, then_change)
-                                end
-                            else
-                                changed = update_bbstate!(𝕃ᵢ, frame, falsebb, currstate)
-                            end
-                            if changed
-                                handle_control_backedge!(interp, frame, currpc, stmt.dest)
-                                push!(W, falsebb)
-                            end
-                            @goto fallthrough
-                        end
-                    end
-                elseif isa(stmt, ReturnNode)
-                    bestguess = frame.bestguess
-                    rt = abstract_eval_value(interp, stmt.val, currstate, frame)
-                    rt = widenreturn(rt, BestguessInfo(interp, bestguess, nargs, slottypes, currstate))
-                    # narrow representation of bestguess slightly to prepare for tmerge with rt
-                    if rt isa InterConditional && bestguess isa Const
-                        let slot_id = rt.slot
-                            old_id_type = slottypes[slot_id]
-                            if bestguess.val === true && rt.elsetype !== Bottom
-                                bestguess = InterConditional(slot_id, old_id_type, Bottom)
-                            elseif bestguess.val === false && rt.thentype !== Bottom
-                                bestguess = InterConditional(slot_id, Bottom, old_id_type)
-                            end
-                        end
-                    end
-                    # copy limitations to return value
-                    if !isempty(frame.pclimitations)
-                        union!(frame.limitations, frame.pclimitations)
-                        empty!(frame.pclimitations)
-                    end
-                    if !isempty(frame.limitations)
-                        rt = LimitedAccuracy(rt, copy(frame.limitations))
-                    end
-                    if !⊑(𝕃ₚ, rt, bestguess)
-                        # new (wider) return type for frame
-                        bestguess = tmerge(𝕃ₚ, bestguess, rt)
-                        # TODO: if bestguess isa InterConditional && !interesting(bestguess); bestguess = widenconditional(bestguess); end
-                        frame.bestguess = bestguess
-                        for (caller, caller_pc) in frame.cycle_backedges
-                            if !(caller.ssavaluetypes[caller_pc] === Any)
-                                # no reason to revisit if that call-site doesn't affect the final result
-                                push!(caller.ip, block_for_inst(caller.cfg, caller_pc))
-                            end
-                        end
-                    end
-                    ssavaluetypes[frame.currpc] = Any
-                    @goto find_next_bb
-                elseif isexpr(stmt, :enter)
-                    # Propagate entry info to exception handler
-                    l = stmt.args[1]::Int
-                    catchbb = block_for_inst(frame.cfg, l)
-                    if update_bbstate!(𝕃ᵢ, frame, catchbb, currstate)
-                        push!(W, catchbb)
-                    end
-                    ssavaluetypes[currpc] = Any
-                    @goto fallthrough
-                end
-                # Fall through terminator - treat as regular stmt
-            end
-            # Process non control-flow statements
-            (; changes, type) = abstract_eval_basic_statement(interp,
-                stmt, currstate, frame)
-            if type === Bottom
-                ssavaluetypes[currpc] = Bottom
-                @goto find_next_bb
-            end
-            if changes !== nothing
-                stoverwrite1!(currstate, changes)
-                let cur_hand = frame.handler_at[currpc], l, enter
-                    while cur_hand != 0
-                        enter = frame.src.code[cur_hand]::Expr
-                        l = enter.args[1]::Int
-                        exceptbb = block_for_inst(frame.cfg, l)
-                        # propagate new type info to exception handler
-                        # the handling for Expr(:enter) propagates all changes from before the try/catch
-                        # so this only needs to propagate any changes
-                        if stupdate1!(𝕃ᵢ, states[exceptbb]::VarTable, changes)
-                            push!(W, exceptbb)
-                        end
-                        cur_hand = frame.handler_at[cur_hand]
-                    end
-                end
-            end
-            if type === nothing
-                ssavaluetypes[currpc] = Any
-                continue
-            end
-            if !isempty(frame.ssavalue_uses[currpc])
-                record_ssa_assign!(𝕃ᵢ, currpc, type, frame)
-            else
-                ssavaluetypes[currpc] = type
-            end
-        end # for currpc in bbstart:bbend
-
-        # Case 1: Fallthrough termination
-        begin @label fallthrough
-            nextbb = currbb + 1
-        end
-
-        # Case 2: Directly branch to a different BB
-        begin @label branch
-            if update_bbstate!(𝕃ᵢ, frame, nextbb, currstate)
-                push!(W, nextbb)
-            end
-        end
-
-        # Case 3: Control flow ended along the current path (converged, return or throw)
-        begin @label find_next_bb
-            currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
-            currbb == -1 && break # the working set is empty
-            currbb > nbbs && break
-
-            nexttable = states[currbb]
-            if nexttable === nothing
-                init_vartable!(currstate, frame)
-            else
-                stoverwrite!(currstate, nexttable)
-            end
-        end
-    end # while currbb <= nbbs
-
-    frame.dont_work_on_me = false
-    nothing
-end
-
-function conditional_change(𝕃ᵢ::AbstractLattice, state::VarTable, @nospecialize(typ), slot::Int)
-    vtype = state[slot]
-    oldtyp = vtype.typ
-    if iskindtype(typ)
-        # this code path corresponds to the special handling for `isa(x, iskindtype)` check
-        # implemented within `abstract_call_builtin`
-    elseif ⊑(𝕃ᵢ, ignorelimited(typ), ignorelimited(oldtyp))
-        # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
-        # since we probably formed these types with `typesubstract`,
-        # the comparison is likely simple
-    else
-        return nothing
-    end
-    if oldtyp isa LimitedAccuracy
-        # typ is better unlimited, but we may still need to compute the tmeet with the limit
-        # "causes" since we ignored those in the comparison
-        typ = tmerge(𝕃ᵢ, typ, LimitedAccuracy(Bottom, oldtyp.causes))
-    end
-    return StateUpdate(SlotNumber(slot), VarState(typ, vtype.undef), state, true)
-end
-
-# make as much progress on `frame` as possible (by handling cycles)
-function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
-    typeinf_local(interp, frame)
-
-    # If the current frame is part of a cycle, solve the cycle before finishing
-    no_active_ips_in_callers = false
-    while !no_active_ips_in_callers
-        no_active_ips_in_callers = true
-        for caller in frame.callers_in_cycle
-            caller.dont_work_on_me && return false # cycle is above us on the stack
-            if !isempty(caller.ip)
-                # Note that `typeinf_local(interp, caller)` can potentially modify the other frames
-                # `frame.callers_in_cycle`, which is why making incremental progress requires the
-                # outer while loop.
-                typeinf_local(interp, caller)
-                no_active_ips_in_callers = false
-            end
-            update_valid_age!(caller, frame.valid_worlds)
-        end
-    end
-    return true
-end
diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl
deleted file mode 100644
index 1f62d21c9d2d9..0000000000000
--- a/base/compiler/bootstrap.jl
+++ /dev/null
@@ -1,51 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# make sure that typeinf is executed before turning on typeinf_ext
-# this ensures that typeinf_ext doesn't recurse before it can add the item to the workq
-# especially try to make sure any recursive and leaf functions have concrete signatures,
-# since we won't be able to specialize & infer them at runtime
-
-time() = ccall(:jl_clock_now, Float64, ())
-
-let interp = NativeInterpreter()
-
-    analyze_escapes_tt = Tuple{typeof(analyze_escapes), IRCode, Int, Bool, typeof(null_escape_cache)}
-    fs = Any[
-        # we first create caches for the optimizer, because they contain many loop constructions
-        # and they're better to not run in interpreter even during bootstrapping
-        #=analyze_escapes_tt,=# run_passes,
-        # then we create caches for inference entries
-        typeinf_ext, typeinf, typeinf_edge,
-    ]
-    # tfuncs can't be inferred from the inference entries above, so here we infer them manually
-    for x in T_FFUNC_VAL
-        push!(fs, x[3])
-    end
-    for i = 1:length(T_IFUNC)
-        if isassigned(T_IFUNC, i)
-            x = T_IFUNC[i]
-            push!(fs, x[3])
-        else
-            println(stderr, "WARNING: tfunc missing for ", reinterpret(IntrinsicFunction, Int32(i)))
-        end
-    end
-    starttime = time()
-    for f in fs
-        if isa(f, DataType) && f.name === typename(Tuple)
-            tt = f
-        else
-            tt = Tuple{typeof(f), Vararg{Any}}
-        end
-        for m in _methods_by_ftype(tt, 10, get_world_counter())::Vector
-            # remove any TypeVars from the intersection
-            m = m::MethodMatch
-            typ = Any[m.spec_types.parameters...]
-            for i = 1:length(typ)
-                typ[i] = unwraptv(typ[i])
-            end
-            typeinf_type(interp, m.method, Tuple{typ...}, m.sparams)
-        end
-    end
-    endtime = time()
-    println("Core.Compiler ──── ", sub_float(endtime,starttime), " seconds")
-end
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
deleted file mode 100644
index 04b0791d9a79e..0000000000000
--- a/base/compiler/compiler.jl
+++ /dev/null
@@ -1,177 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-getfield(Core, :eval)(Core, :(baremodule Compiler
-
-using Core.Intrinsics, Core.IR
-
-import Core: print, println, show, write, unsafe_write, stdout, stderr,
-             _apply_iterate, svec, apply_type, Builtin, IntrinsicFunction,
-             MethodInstance, CodeInstance, MethodTable, MethodMatch, PartialOpaque,
-             TypeofVararg
-
-const getproperty = Core.getfield
-const setproperty! = Core.setfield!
-const swapproperty! = Core.swapfield!
-const modifyproperty! = Core.modifyfield!
-const replaceproperty! = Core.replacefield!
-
-ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Compiler, false)
-
-eval(x) = Core.eval(Compiler, x)
-eval(m, x) = Core.eval(m, x)
-
-include(x) = Core.include(Compiler, x)
-include(mod, x) = Core.include(mod, x)
-
-# The @inline/@noinline macros that can be applied to a function declaration are not available
-# until after array.jl, and so we will mark them within a function body instead.
-macro inline()   Expr(:meta, :inline)   end
-macro noinline() Expr(:meta, :noinline) end
-
-convert(::Type{Any}, Core.@nospecialize x) = x
-convert(::Type{T}, x::T) where {T} = x
-
-# mostly used by compiler/methodtable.jl, but also by reflection.jl
-abstract type MethodTableView end
-abstract type AbstractInterpreter end
-
-# essential files and libraries
-include("essentials.jl")
-include("ctypes.jl")
-include("generator.jl")
-include("reflection.jl")
-include("options.jl")
-
-ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@inline; (f(1),))
-ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
-ntuple(f, ::Val{n}) where {n} = ntuple(f, n::Int)
-ntuple(f, n) = (Any[f(i) for i = 1:n]...,)
-
-# core operations & types
-function return_type end # promotion.jl expects this to exist
-is_return_type(Core.@nospecialize(f)) = f === return_type
-include("promotion.jl")
-include("tuple.jl")
-include("pair.jl")
-include("traits.jl")
-include("range.jl")
-include("expr.jl")
-include("error.jl")
-
-# core numeric operations & types
-==(x::T, y::T) where {T} = x === y
-include("bool.jl")
-include("number.jl")
-include("int.jl")
-include("operators.jl")
-include("pointer.jl")
-include("refvalue.jl")
-
-# the same constructor as defined in float.jl, but with a different name to avoid redefinition
-_Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
-# fld(x,y) == div(x,y) - ((x>=0) != (y>=0) && rem(x,y) != 0 ? 1 : 0)
-fld(x::T, y::T) where {T<:Unsigned} = div(x, y)
-function fld(x::T, y::T) where T<:Integer
-    d = div(x, y)
-    return d - (signbit(x ⊻ y) & (d * y != x))
-end
-# cld(x,y) = div(x,y) + ((x>0) == (y>0) && rem(x,y) != 0 ? 1 : 0)
-function cld(x::T, y::T) where T<:Unsigned
-    d = div(x, y)
-    return d + (d * y != x)
-end
-function cld(x::T, y::T) where T<:Integer
-    d = div(x, y)
-    return d + (((x > 0) == (y > 0)) & (d * y != x))
-end
-
-
-# checked arithmetic
-const checked_add = +
-const checked_sub = -
-const SignedInt = Union{Int8,Int16,Int32,Int64,Int128}
-const UnsignedInt = Union{UInt8,UInt16,UInt32,UInt64,UInt128}
-sub_with_overflow(x::T, y::T) where {T<:SignedInt}   = checked_ssub_int(x, y)
-sub_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_usub_int(x, y)
-sub_with_overflow(x::Bool, y::Bool) = (x-y, false)
-add_with_overflow(x::T, y::T) where {T<:SignedInt}   = checked_sadd_int(x, y)
-add_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_uadd_int(x, y)
-add_with_overflow(x::Bool, y::Bool) = (x+y, false)
-
-include("cmem.jl")
-include("strings/lazy.jl")
-
-# core array operations
-include("indices.jl")
-include("array.jl")
-include("abstractarray.jl")
-
-# core structures
-include("bitarray.jl")
-include("bitset.jl")
-include("abstractdict.jl")
-include("iddict.jl")
-include("idset.jl")
-include("abstractset.jl")
-include("iterators.jl")
-using .Iterators: zip, enumerate
-using .Iterators: Flatten, Filter, product  # for generators
-include("namedtuple.jl")
-
-# core docsystem
-include("docs/core.jl")
-import Core.Compiler.CoreDocs
-Core.atdoc!(CoreDocs.docm)
-
-# sorting
-include("ordering.jl")
-using .Order
-include("compiler/sort.jl")
-
-# We don't include some.jl, but this definition is still useful.
-something(x::Nothing, y...) = something(y...)
-something(x::Any, y...) = x
-
-############
-# compiler #
-############
-
-include("compiler/cicache.jl")
-include("compiler/methodtable.jl")
-include("compiler/effects.jl")
-include("compiler/types.jl")
-include("compiler/utilities.jl")
-include("compiler/validation.jl")
-
-function argextype end # imported by EscapeAnalysis
-function stmt_effect_free end # imported by EscapeAnalysis
-function alloc_array_ndims end # imported by EscapeAnalysis
-function try_compute_field end # imported by EscapeAnalysis
-include("compiler/ssair/basicblock.jl")
-include("compiler/ssair/domtree.jl")
-include("compiler/ssair/ir.jl")
-
-include("compiler/abstractlattice.jl")
-include("compiler/inferenceresult.jl")
-include("compiler/inferencestate.jl")
-
-include("compiler/typeutils.jl")
-include("compiler/typelimits.jl")
-include("compiler/typelattice.jl")
-include("compiler/tfuncs.jl")
-include("compiler/stmtinfo.jl")
-
-include("compiler/abstractinterpretation.jl")
-include("compiler/typeinfer.jl")
-include("compiler/optimize.jl")
-
-include("compiler/bootstrap.jl")
-ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel)
-
-include("compiler/parsing.jl")
-Core._setparser!(fl_parse)
-
-end # baremodule Compiler
-))
diff --git a/base/compiler/effects.jl b/base/compiler/effects.jl
deleted file mode 100644
index 7d09769e5b31b..0000000000000
--- a/base/compiler/effects.jl
+++ /dev/null
@@ -1,265 +0,0 @@
-"""
-    effects::Effects
-
-Represents computational effects of a method call.
-
-The effects are a composition of different effect bits that represent some program property
-of the method being analyzed. They are represented as `Bool` or `UInt8` bits with the
-following meanings:
-- `effects.consistent::UInt8`:
-  * `ALWAYS_TRUE`: this method is guaranteed to return or terminate consistently.
-  * `ALWAYS_FALSE`: this method may be not return or terminate consistently, and there is
-    no need for further analysis with respect to this effect property as this conclusion
-    will not be refined anyway.
-  * `CONSISTENT_IF_NOTRETURNED`: the `:consistent`-cy of this method can later be refined to
-    `ALWAYS_TRUE` in a case when the return value of this method never involves newly
-    allocated mutable objects.
-  * `CONSISTENT_IF_INACCESSIBLEMEMONLY`: the `:consistent`-cy of this method can later be
-    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
-- `effect_free::UInt8`:
-  * `ALWAYS_TRUE`: this method is free from externally semantically visible side effects.
-  * `ALWAYS_FALSE`: this method may not be free from externally semantically visible side effects, and there is
-    no need for further analysis with respect to this effect property as this conclusion
-    will not be refined anyway.
-  * `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`: the `:effect-free`-ness of this method can later be
-    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
-- `nothrow::Bool`: this method is guaranteed to not throw an exception.
-- `terminates::Bool`: this method is guaranteed to terminate.
-- `notaskstate::Bool`: this method does not access any state bound to the current
-  task and may thus be moved to a different task without changing observable
-  behavior. Note that this currently implies that `noyield` as well, since
-  yielding modifies the state of the current task, though this may be split
-  in the future.
-- `inaccessiblememonly::UInt8`:
-  * `ALWAYS_TRUE`: this method does not access or modify externally accessible mutable memory.
-    This state corresponds to LLVM's `inaccessiblememonly` function attribute.
-  * `ALWAYS_FALSE`: this method may access or modify externally accessible mutable memory.
-  * `INACCESSIBLEMEM_OR_ARGMEMONLY`: this method does not access or modify externally accessible mutable memory,
-    except that it may access or modify mutable memory pointed to by its call arguments.
-    This may later be refined to `ALWAYS_TRUE` in a case when call arguments are known to be immutable.
-    This state corresponds to LLVM's `inaccessiblemem_or_argmemonly` function attribute.
-- `nonoverlayed::Bool`: indicates that any methods that may be called within this method
-  are not defined in an [overlayed method table](@ref OverlayMethodTable).
-- `noinbounds::Bool`: If set, indicates that this method does not read the parent's `:inbounds`
-  state. In particular, it does not have any reached `:boundscheck` exprs, not propagates inbounds
-  to any children that do.
-
-Note that the representations above are just internal implementation details and thus likely
-to change in the future. See [`Base.@assume_effects`](@ref) for more detailed explanation
-on the definitions of these properties.
-
-Along the abstract interpretation, `Effects` at each statement are analyzed locally and they
-are merged into the single global `Effects` that represents the entire effects of the
-analyzed method (see the implementation of `merge_effects!`). Each effect property is
-initialized with `ALWAYS_TRUE`/`true` and then transitioned towards `ALWAYS_FALSE`/`false`.
-Note that within the current flow-insensitive analysis design, effects detected by local
-analysis on each statement usually taint the global conclusion conservatively.
-
-## Key for `show` output of Effects:
-
-The output represents the state of different effect properties in the following order:
-
-1. `consistent` (`c`):
-    - `+c` (green): `ALWAYS_TRUE`
-    - `-c` (red): `ALWAYS_FALSE`
-    - `?c` (yellow): `CONSISTENT_IF_NOTRETURNED` and/or `CONSISTENT_IF_INACCESSIBLEMEMONLY`
-2. `effect_free` (`e`):
-    - `+e` (green): `ALWAYS_TRUE`
-    - `-e` (red): `ALWAYS_FALSE`
-    - `?e` (yellow): `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`
-3. `nothrow` (`n`):
-    - `+n` (green): `true`
-    - `-n` (red): `false`
-4. `terminates` (`t`):
-    - `+t` (green): `true`
-    - `-t` (red): `false`
-5. `notaskstate` (`s`):
-    - `+s` (green): `true`
-    - `-s` (red): `false`
-6. `inaccessiblememonly` (`m`):
-    - `+m` (green): `ALWAYS_TRUE`
-    - `-m` (red): `ALWAYS_FALSE`
-    - `?m` (yellow): `INACCESSIBLEMEM_OR_ARGMEMONLY`
-7. `noinbounds` (`i`):
-    - `+i` (green): `true`
-    - `-i` (red): `false`
-
-Additionally, if the `nonoverlayed` property is false, a red prime symbol (′) is displayed after the tuple.
-"""
-struct Effects
-    consistent::UInt8
-    effect_free::UInt8
-    nothrow::Bool
-    terminates::Bool
-    notaskstate::Bool
-    inaccessiblememonly::UInt8
-    nonoverlayed::Bool
-    noinbounds::Bool
-    function Effects(
-        consistent::UInt8,
-        effect_free::UInt8,
-        nothrow::Bool,
-        terminates::Bool,
-        notaskstate::Bool,
-        inaccessiblememonly::UInt8,
-        nonoverlayed::Bool,
-        noinbounds::Bool)
-        return new(
-            consistent,
-            effect_free,
-            nothrow,
-            terminates,
-            notaskstate,
-            inaccessiblememonly,
-            nonoverlayed,
-            noinbounds)
-    end
-end
-
-const ALWAYS_TRUE  = 0x00
-const ALWAYS_FALSE = 0x01
-
-# :consistent-cy bits
-const CONSISTENT_IF_NOTRETURNED         = 0x01 << 1
-const CONSISTENT_IF_INACCESSIBLEMEMONLY = 0x01 << 2
-
-# :effect_free-ness bits
-const EFFECT_FREE_IF_INACCESSIBLEMEMONLY = 0x01 << 1
-
-# :inaccessiblememonly bits
-const INACCESSIBLEMEM_OR_ARGMEMONLY = 0x01 << 1
-
-const EFFECTS_TOTAL    = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  true,  true)
-const EFFECTS_THROWS   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  true,  true)
-const EFFECTS_UNKNOWN  = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, true,  true)  # unknown mostly, but it's not overlayed and noinbounds at least (e.g. it's not a call)
-const _EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, false, false) # unknown really
-
-function Effects(e::Effects = _EFFECTS_UNKNOWN;
-    consistent::UInt8 = e.consistent,
-    effect_free::UInt8 = e.effect_free,
-    nothrow::Bool = e.nothrow,
-    terminates::Bool = e.terminates,
-    notaskstate::Bool = e.notaskstate,
-    inaccessiblememonly::UInt8 = e.inaccessiblememonly,
-    nonoverlayed::Bool = e.nonoverlayed,
-    noinbounds::Bool = e.noinbounds)
-    return Effects(
-        consistent,
-        effect_free,
-        nothrow,
-        terminates,
-        notaskstate,
-        inaccessiblememonly,
-        nonoverlayed,
-        noinbounds)
-end
-
-function merge_effects(old::Effects, new::Effects)
-    return Effects(
-        merge_effectbits(old.consistent, new.consistent),
-        merge_effectbits(old.effect_free, new.effect_free),
-        merge_effectbits(old.nothrow, new.nothrow),
-        merge_effectbits(old.terminates, new.terminates),
-        merge_effectbits(old.notaskstate, new.notaskstate),
-        merge_effectbits(old.inaccessiblememonly, new.inaccessiblememonly),
-        merge_effectbits(old.nonoverlayed, new.nonoverlayed),
-        merge_effectbits(old.noinbounds, new.noinbounds))
-end
-
-function merge_effectbits(old::UInt8, new::UInt8)
-    if old === ALWAYS_FALSE || new === ALWAYS_FALSE
-        return ALWAYS_FALSE
-    end
-    return old | new
-end
-merge_effectbits(old::Bool, new::Bool) = old & new
-
-is_consistent(effects::Effects)          = effects.consistent === ALWAYS_TRUE
-is_effect_free(effects::Effects)         = effects.effect_free === ALWAYS_TRUE
-is_nothrow(effects::Effects)             = effects.nothrow
-is_terminates(effects::Effects)          = effects.terminates
-is_notaskstate(effects::Effects)         = effects.notaskstate
-is_inaccessiblememonly(effects::Effects) = effects.inaccessiblememonly === ALWAYS_TRUE
-is_nonoverlayed(effects::Effects)        = effects.nonoverlayed
-
-# implies `is_notaskstate` & `is_inaccessiblememonly`, but not explicitly checked here
-is_foldable(effects::Effects) =
-    is_consistent(effects) &&
-    is_effect_free(effects) &&
-    is_terminates(effects)
-
-is_foldable_nothrow(effects::Effects) =
-    is_foldable(effects) &&
-    is_nothrow(effects)
-
-is_removable_if_unused(effects::Effects) =
-    is_effect_free(effects) &&
-    is_terminates(effects) &&
-    is_nothrow(effects)
-
-is_finalizer_inlineable(effects::Effects) =
-    is_nothrow(effects) &&
-    is_notaskstate(effects)
-
-is_consistent_if_notreturned(effects::Effects)         = !iszero(effects.consistent & CONSISTENT_IF_NOTRETURNED)
-is_consistent_if_inaccessiblememonly(effects::Effects) = !iszero(effects.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
-
-is_effect_free_if_inaccessiblememonly(effects::Effects) = !iszero(effects.effect_free & EFFECT_FREE_IF_INACCESSIBLEMEMONLY)
-
-is_inaccessiblemem_or_argmemonly(effects::Effects) = effects.inaccessiblememonly === INACCESSIBLEMEM_OR_ARGMEMONLY
-
-function encode_effects(e::Effects)
-    return ((e.consistent          % UInt32) << 0) |
-           ((e.effect_free         % UInt32) << 3) |
-           ((e.nothrow             % UInt32) << 5) |
-           ((e.terminates          % UInt32) << 6) |
-           ((e.notaskstate         % UInt32) << 7) |
-           ((e.inaccessiblememonly % UInt32) << 8) |
-           ((e.nonoverlayed        % UInt32) << 10)|
-           ((e.noinbounds          % UInt32) << 11)
-end
-
-function decode_effects(e::UInt32)
-    return Effects(
-        UInt8((e >> 0) & 0x07),
-        UInt8((e >> 3) & 0x03),
-        _Bool((e >> 5) & 0x01),
-        _Bool((e >> 6) & 0x01),
-        _Bool((e >> 7) & 0x01),
-        UInt8((e >> 8) & 0x03),
-        _Bool((e >> 10) & 0x01),
-        _Bool((e >> 11) & 0x01))
-end
-
-struct EffectsOverride
-    consistent::Bool
-    effect_free::Bool
-    nothrow::Bool
-    terminates_globally::Bool
-    terminates_locally::Bool
-    notaskstate::Bool
-    inaccessiblememonly::Bool
-end
-
-function encode_effects_override(eo::EffectsOverride)
-    e = 0x00
-    eo.consistent          && (e |= (0x01 << 0))
-    eo.effect_free         && (e |= (0x01 << 1))
-    eo.nothrow             && (e |= (0x01 << 2))
-    eo.terminates_globally && (e |= (0x01 << 3))
-    eo.terminates_locally  && (e |= (0x01 << 4))
-    eo.notaskstate         && (e |= (0x01 << 5))
-    eo.inaccessiblememonly && (e |= (0x01 << 6))
-    return e
-end
-
-function decode_effects_override(e::UInt8)
-    return EffectsOverride(
-        (e & (0x01 << 0)) != 0x00,
-        (e & (0x01 << 1)) != 0x00,
-        (e & (0x01 << 2)) != 0x00,
-        (e & (0x01 << 3)) != 0x00,
-        (e & (0x01 << 4)) != 0x00,
-        (e & (0x01 << 5)) != 0x00,
-        (e & (0x01 << 6)) != 0x00)
-end
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
deleted file mode 100644
index 3a96b21d7c40a..0000000000000
--- a/base/compiler/inferenceresult.jl
+++ /dev/null
@@ -1,233 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance) ->
-        (cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-
-Returns argument types `cache_argtypes::Vector{Any}` for `linfo` that are in the native
-Julia type domain. `overridden_by_const::BitVector` is all `false` meaning that
-there is no additional extended lattice information there.
-
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::ForwardableArgtypes) ->
-        (cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-
-Returns cache-correct extended lattice argument types `cache_argtypes::Vector{Any}`
-for `linfo` given some `argtypes` accompanied by `overridden_by_const::BitVector`
-that marks which argument contains additional extended lattice information.
-
-In theory, there could be a `cache` containing a matching `InferenceResult`
-for the provided `linfo` and `given_argtypes`. The purpose of this function is
-to return a valid value for `cache_lookup(𝕃, linfo, argtypes, cache).argtypes`,
-so that we can construct cache-correct `InferenceResult`s in the first place.
-"""
-function matching_cache_argtypes end
-
-function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance)
-    mthd = isa(linfo.def, Method) ? linfo.def::Method : nothing
-    cache_argtypes = most_general_argtypes(mthd, linfo.specTypes)
-    return cache_argtypes, falses(length(cache_argtypes))
-end
-
-struct SimpleArgtypes <: ForwardableArgtypes
-    argtypes::Vector{Any}
-end
-
-"""
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::SimpleArgtypes)
-
-The implementation for `argtypes` with general extended lattice information.
-This is supposed to be used for debugging and testing or external `AbstractInterpreter`
-usages and in general `matching_cache_argtypes(::MethodInstance, ::ConditionalArgtypes)`
-is more preferred it can forward `Conditional` information.
-"""
-function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, simple_argtypes::SimpleArgtypes)
-    (; argtypes) = simple_argtypes
-    given_argtypes = Vector{Any}(undef, length(argtypes))
-    for i = 1:length(argtypes)
-        given_argtypes[i] = widenslotwrapper(argtypes[i])
-    end
-    given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo)
-    return pick_const_args(𝕃, linfo, given_argtypes)
-end
-
-function pick_const_args(𝕃::AbstractLattice, linfo::MethodInstance, given_argtypes::Vector{Any})
-    cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo)
-    return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes)
-end
-
-function pick_const_args!(𝕃::AbstractLattice, cache_argtypes::Vector{Any}, overridden_by_const::BitVector, given_argtypes::Vector{Any})
-    for i = 1:length(given_argtypes)
-        given_argtype = given_argtypes[i]
-        cache_argtype = cache_argtypes[i]
-        if !is_argtype_match(𝕃, given_argtype, cache_argtype, false)
-            # prefer the argtype we were given over the one computed from `linfo`
-            cache_argtypes[i] = given_argtype
-            overridden_by_const[i] = true
-        end
-    end
-    return cache_argtypes, overridden_by_const
-end
-
-function is_argtype_match(𝕃::AbstractLattice,
-                          @nospecialize(given_argtype),
-                          @nospecialize(cache_argtype),
-                          overridden_by_const::Bool)
-    if is_forwardable_argtype(𝕃, given_argtype)
-        return is_lattice_equal(𝕃, given_argtype, cache_argtype)
-    end
-    return !overridden_by_const
-end
-
-va_process_argtypes(𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance) =
-    va_process_argtypes(Returns(nothing), 𝕃, given_argtypes, mi)
-function va_process_argtypes(@nospecialize(va_handler!), 𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance)
-    def = mi.def
-    isva = isa(def, Method) ? def.isva : false
-    nargs = isa(def, Method) ? Int(def.nargs) : length(mi.specTypes.parameters)
-    if isva || isvarargtype(given_argtypes[end])
-        isva_given_argtypes = Vector{Any}(undef, nargs)
-        for i = 1:(nargs-isva)
-            isva_given_argtypes[i] = argtype_by_index(given_argtypes, i)
-        end
-        if isva
-            if length(given_argtypes) < nargs && isvarargtype(given_argtypes[end])
-                last = length(given_argtypes)
-            else
-                last = nargs
-            end
-            isva_given_argtypes[nargs] = tuple_tfunc(𝕃, given_argtypes[last:end])
-            va_handler!(isva_given_argtypes, last)
-        end
-        return isva_given_argtypes
-    end
-    @assert length(given_argtypes) == nargs "invalid `given_argtypes` for `mi`"
-    return given_argtypes
-end
-
-function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(specTypes),
-    withfirst::Bool = true)
-    toplevel = method === nothing
-    isva = !toplevel && method.isva
-    linfo_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
-    nargs::Int = toplevel ? 0 : method.nargs
-    # For opaque closure, the closure environment is processed elsewhere
-    withfirst || (nargs -= 1)
-    cache_argtypes = Vector{Any}(undef, nargs)
-    # First, if we're dealing with a varargs method, then we set the last element of `args`
-    # to the appropriate `Tuple` type or `PartialStruct` instance.
-    if !toplevel && isva
-        if specTypes::Type == Tuple
-            linfo_argtypes = Any[Any for i = 1:nargs]
-            if nargs > 1
-                linfo_argtypes[end] = Tuple
-            end
-            vargtype = Tuple
-        else
-            linfo_argtypes_length = length(linfo_argtypes)
-            if nargs > linfo_argtypes_length
-                va = linfo_argtypes[linfo_argtypes_length]
-                if isvarargtype(va)
-                    new_va = rewrap_unionall(unconstrain_vararg_length(va), specTypes)
-                    vargtype = Tuple{new_va}
-                else
-                    vargtype = Tuple{}
-                end
-            else
-                vargtype_elements = Any[]
-                for i in nargs:linfo_argtypes_length
-                    p = linfo_argtypes[i]
-                    p = unwraptv(isvarargtype(p) ? unconstrain_vararg_length(p) : p)
-                    push!(vargtype_elements, elim_free_typevars(rewrap_unionall(p, specTypes)))
-                end
-                for i in 1:length(vargtype_elements)
-                    atyp = vargtype_elements[i]
-                    if issingletontype(atyp)
-                        # replace singleton types with their equivalent Const object
-                        vargtype_elements[i] = Const(atyp.instance)
-                    elseif isconstType(atyp)
-                        vargtype_elements[i] = Const(atyp.parameters[1])
-                    end
-                end
-                vargtype = tuple_tfunc(fallback_lattice, vargtype_elements)
-            end
-        end
-        cache_argtypes[nargs] = vargtype
-        nargs -= 1
-    end
-    # Now, we propagate type info from `linfo_argtypes` into `cache_argtypes`, improving some
-    # type info as we go (where possible). Note that if we're dealing with a varargs method,
-    # we already handled the last element of `cache_argtypes` (and decremented `nargs` so that
-    # we don't overwrite the result of that work here).
-    linfo_argtypes_length = length(linfo_argtypes)
-    if linfo_argtypes_length > 0
-        n = linfo_argtypes_length > nargs ? nargs : linfo_argtypes_length
-        tail_index = n
-        local lastatype
-        for i = 1:n
-            atyp = linfo_argtypes[i]
-            if i == n && isvarargtype(atyp)
-                atyp = unwrapva(atyp)
-                tail_index -= 1
-            end
-            atyp = unwraptv(atyp)
-            if issingletontype(atyp)
-                # replace singleton types with their equivalent Const object
-                atyp = Const(atyp.instance)
-            elseif isconstType(atyp)
-                atyp = Const(atyp.parameters[1])
-            else
-                atyp = elim_free_typevars(rewrap_unionall(atyp, specTypes))
-            end
-            i == n && (lastatype = atyp)
-            cache_argtypes[i] = atyp
-        end
-        for i = (tail_index + 1):nargs
-            cache_argtypes[i] = lastatype
-        end
-    else
-        @assert nargs == 0 "invalid specialization of method" # wrong number of arguments
-    end
-    cache_argtypes
-end
-
-# eliminate free `TypeVar`s in order to make the life much easier down the road:
-# at runtime only `Type{...}::DataType` can contain invalid type parameters, and other
-# malformed types here are user-constructed type arguments given at an inference entry
-# so this function will replace only the malformed `Type{...}::DataType` with `Type`
-# and simply replace other possibilities with `Any`
-function elim_free_typevars(@nospecialize t)
-    if has_free_typevars(t)
-        return isType(t) ? Type : Any
-    else
-        return t
-    end
-end
-
-function cache_lookup(lattice::AbstractLattice, linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{InferenceResult})
-    method = linfo.def::Method
-    nargs::Int = method.nargs
-    method.isva && (nargs -= 1)
-    length(given_argtypes) >= nargs || return nothing
-    for cached_result in cache
-        cached_result.linfo === linfo || continue
-        cache_match = true
-        cache_argtypes = cached_result.argtypes
-        cache_overridden_by_const = cached_result.overridden_by_const
-        for i in 1:nargs
-            if !is_argtype_match(lattice, widenmustalias(given_argtypes[i]),
-                                 cache_argtypes[i],
-                                 cache_overridden_by_const[i])
-                cache_match = false
-                break
-            end
-        end
-        if method.isva && cache_match
-            cache_match = is_argtype_match(lattice, tuple_tfunc(lattice, given_argtypes[(nargs + 1):end]),
-                                           cache_argtypes[end],
-                                           cache_overridden_by_const[end])
-        end
-        cache_match || continue
-        return cached_result
-    end
-    return nothing
-end
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
deleted file mode 100644
index c4608dd5781e1..0000000000000
--- a/base/compiler/inferencestate.jl
+++ /dev/null
@@ -1,894 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# data structures
-# ===============
-
-mutable struct BitSetBoundedMinPrioritySet <: AbstractSet{Int}
-    elems::BitSet
-    min::Int
-    # Stores whether min is exact or a lower bound
-    # If exact, it is not set in elems
-    min_exact::Bool
-    max::Int
-end
-
-function BitSetBoundedMinPrioritySet(max::Int)
-    bs = BitSet()
-    bs.offset = 0
-    BitSetBoundedMinPrioritySet(bs, max+1, true, max)
-end
-
-@noinline function _advance_bsbmp!(bsbmp::BitSetBoundedMinPrioritySet)
-    @assert !bsbmp.min_exact
-    bsbmp.min = _bits_findnext(bsbmp.elems.bits, bsbmp.min)::Int
-    bsbmp.min < 0 && (bsbmp.min = bsbmp.max + 1)
-    bsbmp.min_exact = true
-    delete!(bsbmp.elems, bsbmp.min)
-    return nothing
-end
-
-function isempty(bsbmp::BitSetBoundedMinPrioritySet)
-    if bsbmp.min > bsbmp.max
-        return true
-    end
-    bsbmp.min_exact && return false
-    _advance_bsbmp!(bsbmp)
-    return bsbmp.min > bsbmp.max
-end
-
-function popfirst!(bsbmp::BitSetBoundedMinPrioritySet)
-    bsbmp.min_exact || _advance_bsbmp!(bsbmp)
-    m = bsbmp.min
-    m > bsbmp.max && throw(ArgumentError("BitSetBoundedMinPrioritySet must be non-empty"))
-    bsbmp.min = m+1
-    bsbmp.min_exact = false
-    return m
-end
-
-function push!(bsbmp::BitSetBoundedMinPrioritySet, idx::Int)
-    if idx <= bsbmp.min
-        if bsbmp.min_exact && bsbmp.min < bsbmp.max && idx != bsbmp.min
-            push!(bsbmp.elems, bsbmp.min)
-        end
-        bsbmp.min = idx
-        bsbmp.min_exact = true
-        return nothing
-    end
-    push!(bsbmp.elems, idx)
-    return nothing
-end
-
-function in(idx::Int, bsbmp::BitSetBoundedMinPrioritySet)
-    if bsbmp.min_exact && idx == bsbmp.min
-        return true
-    end
-    return idx in bsbmp.elems
-end
-
-function append!(bsbmp::BitSetBoundedMinPrioritySet, itr)
-    for val in itr
-        push!(bsbmp, val)
-    end
-end
-
-mutable struct TwoPhaseVectorView <: AbstractVector{Int}
-    const data::Vector{Int}
-    count::Int
-    const range::UnitRange{Int}
-end
-size(tpvv::TwoPhaseVectorView) = (tpvv.count,)
-function getindex(tpvv::TwoPhaseVectorView, i::Int)
-    checkbounds(tpvv, i)
-    @inbounds tpvv.data[first(tpvv.range) + i - 1]
-end
-function push!(tpvv::TwoPhaseVectorView, v::Int)
-    tpvv.count += 1
-    tpvv.data[first(tpvv.range) + tpvv.count - 1] = v
-    return nothing
-end
-
-"""
-    mutable struct TwoPhaseDefUseMap
-
-This struct is intended as a memory- and GC-pressure-efficient mechanism
-for incrementally computing def-use maps. The idea is that the def-use map
-is constructed into two passes over the IR. In the first, we simply count the
-the number of uses, computing the number of uses for each def as well as the
-total number of uses. In the second pass, we actually fill in the def-use
-information.
-
-The idea is that either of these two phases can be combined with other useful
-work that needs to scan the instruction stream anyway, while avoiding the
-significant allocation pressure of e.g. allocating an array for every SSA value
-or attempting to dynamically move things around as new uses are discovered.
-
-The def-use map is presented as a vector of vectors. For every def, indexing
-into the map will return a vector of uses.
-"""
-mutable struct TwoPhaseDefUseMap <: AbstractVector{TwoPhaseVectorView}
-    ssa_uses::Vector{Int}
-    data::Vector{Int}
-    complete::Bool
-end
-
-function complete!(tpdum::TwoPhaseDefUseMap)
-    cumsum = 0
-    for i = 1:length(tpdum.ssa_uses)
-        this_val = cumsum + 1
-        cumsum += tpdum.ssa_uses[i]
-        tpdum.ssa_uses[i] = this_val
-    end
-    resize!(tpdum.data, cumsum)
-    fill!(tpdum.data, 0)
-    tpdum.complete = true
-end
-
-function TwoPhaseDefUseMap(nssas::Int)
-    ssa_uses = zeros(Int, nssas)
-    data = Int[]
-    complete = false
-    return TwoPhaseDefUseMap(ssa_uses, data, complete)
-end
-
-function count!(tpdum::TwoPhaseDefUseMap, arg::SSAValue)
-    @assert !tpdum.complete
-    tpdum.ssa_uses[arg.id] += 1
-end
-
-function kill_def_use!(tpdum::TwoPhaseDefUseMap, def::Int, use::Int)
-    if !tpdum.complete
-        tpdum.ssa_uses[def] -= 1
-    else
-        range = tpdum.ssa_uses[def]:(def == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[def + 1] - 1))
-        # TODO: Sorted
-        useidx = findfirst(idx->tpdum.data[idx] == use, range)
-        @assert useidx !== nothing
-        idx = range[useidx]
-        while idx < lastindex(range)
-            ndata = tpdum.data[idx+1]
-            ndata == 0 && break
-            tpdum.data[idx] = ndata
-            idx += 1
-        end
-        tpdum.data[idx] = 0
-    end
-end
-kill_def_use!(tpdum::TwoPhaseDefUseMap, def::SSAValue, use::Int) =
-    kill_def_use!(tpdum, def.id, use)
-
-function getindex(tpdum::TwoPhaseDefUseMap, idx::Int)
-    @assert tpdum.complete
-    range = tpdum.ssa_uses[idx]:(idx == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[idx + 1] - 1))
-    # TODO: Make logarithmic
-    nelems = 0
-    for i in range
-        tpdum.data[i] == 0 && break
-        nelems += 1
-    end
-    return TwoPhaseVectorView(tpdum.data, nelems, range)
-end
-
-mutable struct LazyGenericDomtree{IsPostDom}
-    ir::IRCode
-    domtree::GenericDomTree{IsPostDom}
-    LazyGenericDomtree{IsPostDom}(ir::IRCode) where {IsPostDom} = new{IsPostDom}(ir)
-end
-function get!(x::LazyGenericDomtree{IsPostDom}) where {IsPostDom}
-    isdefined(x, :domtree) && return x.domtree
-    return @timeit "domtree 2" x.domtree = IsPostDom ?
-        construct_postdomtree(x.ir.cfg.blocks) :
-        construct_domtree(x.ir.cfg.blocks)
-end
-
-const LazyDomtree = LazyGenericDomtree{false}
-const LazyPostDomtree = LazyGenericDomtree{true}
-
-# InferenceState
-# ==============
-
-"""
-    const VarTable = Vector{VarState}
-
-The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
-Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
-Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
-to enable flow-sensitive analysis.
-"""
-const VarTable = Vector{VarState}
-
-mutable struct InferenceState
-    #= information about this method instance =#
-    linfo::MethodInstance
-    world::UInt
-    mod::Module
-    sptypes::Vector{VarState}
-    slottypes::Vector{Any}
-    src::CodeInfo
-    cfg::CFG
-    method_info::MethodInfo
-
-    #= intermediate states for local abstract interpretation =#
-    currbb::Int
-    currpc::Int
-    ip::BitSet#=TODO BoundedMinPrioritySet=# # current active instruction pointers
-    handler_at::Vector{Int} # current exception handler info
-    ssavalue_uses::Vector{BitSet} # ssavalue sparsity and restart info
-    # TODO: Could keep this sparsely by doing structural liveness analysis ahead of time.
-    bb_vartables::Vector{Union{Nothing,VarTable}} # nothing if not analyzed yet
-    ssavaluetypes::Vector{Any}
-    stmt_edges::Vector{Union{Nothing,Vector{Any}}}
-    stmt_info::Vector{CallInfo}
-
-    #= intermediate states for interprocedural abstract interpretation =#
-    pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
-    limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return
-    cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
-    callers_in_cycle::Vector{InferenceState}
-    dont_work_on_me::Bool
-    parent # ::Union{Nothing,AbsIntState}
-
-    #= results =#
-    result::InferenceResult # remember where to put the result
-    valid_worlds::WorldRange
-    bestguess #::Type
-    ipo_effects::Effects
-
-    #= flags =#
-    # Whether to restrict inference of abstract call sites to avoid excessive work
-    # Set by default for toplevel frame.
-    restrict_abstract_call_sites::Bool
-    cached::Bool # TODO move this to InferenceResult?
-    insert_coverage::Bool
-
-    # The interpreter that created this inference state. Not looked at by
-    # NativeInterpreter. But other interpreters may use this to detect cycles
-    interp::AbstractInterpreter
-
-    # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
-    function InferenceState(result::InferenceResult, src::CodeInfo, cache::Symbol,
-                            interp::AbstractInterpreter)
-        linfo = result.linfo
-        world = get_world_counter(interp)
-        def = linfo.def
-        mod = isa(def, Method) ? def.module : def
-        sptypes = sptypes_from_meth_instance(linfo)
-        code = src.code::Vector{Any}
-        cfg = compute_basic_blocks(code)
-        method_info = MethodInfo(src)
-
-        currbb = currpc = 1
-        ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
-        handler_at = compute_trycatch(code, BitSet())
-        nssavalues = src.ssavaluetypes::Int
-        ssavalue_uses = find_ssavalue_uses(code, nssavalues)
-        nstmts = length(code)
-        stmt_edges = Union{Nothing, Vector{Any}}[ nothing for i = 1:nstmts ]
-        stmt_info = CallInfo[ NoCallInfo() for i = 1:nstmts ]
-
-        nslots = length(src.slotflags)
-        slottypes = Vector{Any}(undef, nslots)
-        bb_vartables = Union{Nothing,VarTable}[ nothing for i = 1:length(cfg.blocks) ]
-        bb_vartable1 = bb_vartables[1] = VarTable(undef, nslots)
-        argtypes = result.argtypes
-        nargtypes = length(argtypes)
-        for i = 1:nslots
-            argtyp = (i > nargtypes) ? Bottom : argtypes[i]
-            slottypes[i] = argtyp
-            bb_vartable1[i] = VarState(argtyp, i > nargtypes)
-        end
-        src.ssavaluetypes = ssavaluetypes = Any[ NOT_FOUND for i = 1:nssavalues ]
-
-        pclimitations = IdSet{InferenceState}()
-        limitations = IdSet{InferenceState}()
-        cycle_backedges = Vector{Tuple{InferenceState,Int}}()
-        callers_in_cycle = Vector{InferenceState}()
-        dont_work_on_me = false
-        parent = nothing
-
-        valid_worlds = WorldRange(src.min_world, src.max_world == typemax(UInt) ? get_world_counter() : src.max_world)
-        bestguess = Bottom
-        ipo_effects = EFFECTS_TOTAL
-
-        insert_coverage = should_insert_coverage(mod, src)
-        if insert_coverage
-            ipo_effects = Effects(ipo_effects; effect_free = ALWAYS_FALSE)
-        end
-
-        restrict_abstract_call_sites = isa(linfo.def, Module)
-        @assert cache === :no || cache === :local || cache === :global
-        cached = cache === :global
-
-        # some more setups
-        InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
-        cache !== :no && push!(get_inference_cache(interp), result)
-
-        return new(
-            linfo, world, mod, sptypes, slottypes, src, cfg, method_info,
-            currbb, currpc, ip, handler_at, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
-            pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent,
-            result, valid_worlds, bestguess, ipo_effects,
-            restrict_abstract_call_sites, cached, insert_coverage,
-            interp)
-    end
-end
-
-is_inferred(sv::InferenceState) = is_inferred(sv.result)
-is_inferred(result::InferenceResult) = result.result !== nothing
-
-was_reached(sv::InferenceState, pc::Int) = sv.ssavaluetypes[pc] !== NOT_FOUND
-
-function compute_trycatch(code::Vector{Any}, ip::BitSet)
-    # The goal initially is to record the frame like this for the state at exit:
-    # 1: (enter 3) # == 0
-    # 3: (expr)    # == 1
-    # 3: (leave 1) # == 1
-    # 4: (expr)    # == 0
-    # then we can find all trys by walking backwards from :enter statements,
-    # and all catches by looking at the statement after the :enter
-    n = length(code)
-    empty!(ip)
-    ip.offset = 0 # for _bits_findnext
-    push!(ip, n + 1)
-    handler_at = fill(0, n)
-
-    # start from all :enter statements and record the location of the try
-    for pc = 1:n
-        stmt = code[pc]
-        if isexpr(stmt, :enter)
-            l = stmt.args[1]::Int
-            handler_at[pc + 1] = pc
-            push!(ip, pc + 1)
-            handler_at[l] = pc
-            push!(ip, l)
-        end
-    end
-
-    # now forward those marks to all :leave statements
-    pc´´ = 0
-    while true
-        # make progress on the active ip set
-        pc = _bits_findnext(ip.bits, pc´´)::Int
-        pc > n && break
-        while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
-            pc´ = pc + 1 # next program-counter (after executing instruction)
-            if pc == pc´´
-                pc´´ = pc´
-            end
-            delete!(ip, pc)
-            cur_hand = handler_at[pc]
-            @assert cur_hand != 0 "unbalanced try/catch"
-            stmt = code[pc]
-            if isa(stmt, GotoNode)
-                pc´ = stmt.label
-            elseif isa(stmt, GotoIfNot)
-                l = stmt.dest::Int
-                if handler_at[l] != cur_hand
-                    @assert handler_at[l] == 0 "unbalanced try/catch"
-                    handler_at[l] = cur_hand
-                    if l < pc´´
-                        pc´´ = l
-                    end
-                    push!(ip, l)
-                end
-            elseif isa(stmt, ReturnNode)
-                @assert !isdefined(stmt, :val) "unbalanced try/catch"
-                break
-            elseif isa(stmt, Expr)
-                head = stmt.head
-                if head === :enter
-                    cur_hand = pc
-                elseif head === :leave
-                    l = stmt.args[1]::Int
-                    for i = 1:l
-                        cur_hand = handler_at[cur_hand]
-                    end
-                    cur_hand == 0 && break
-                end
-            end
-
-            pc´ > n && break # can't proceed with the fast-path fall-through
-            if handler_at[pc´] != cur_hand
-                @assert handler_at[pc´] == 0 "unbalanced try/catch"
-                handler_at[pc´] = cur_hand
-            elseif !in(pc´, ip)
-                break  # already visited
-            end
-            pc = pc´
-        end
-    end
-
-    @assert first(ip) == n + 1
-    return handler_at
-end
-
-# check if coverage mode is enabled
-function should_insert_coverage(mod::Module, src::CodeInfo)
-    coverage_enabled(mod) && return true
-    JLOptions().code_coverage == 3 || return false
-    # path-specific coverage mode: if any line falls in a tracked file enable coverage for all
-    linetable = src.linetable
-    if isa(linetable, Vector{Any})
-        for line in linetable
-            line = line::LineInfoNode
-            if is_file_tracked(line.file)
-                return true
-            end
-        end
-    elseif isa(linetable, Vector{LineInfoNode})
-        for line in linetable
-            if is_file_tracked(line.file)
-                return true
-            end
-        end
-    end
-    return false
-end
-
-function InferenceState(result::InferenceResult, cache::Symbol, interp::AbstractInterpreter)
-    # prepare an InferenceState object for inferring lambda
-    world = get_world_counter(interp)
-    src = retrieve_code_info(result.linfo, world)
-    src === nothing && return nothing
-    validate_code_in_debug_mode(result.linfo, src, "lowered")
-    return InferenceState(result, src, cache, interp)
-end
-
-"""
-    constrains_param(var::TypeVar, sig, covariant::Bool, type_constrains::Bool)
-
-Check if `var` will be constrained to have a definite value
-in any concrete leaftype subtype of `sig`.
-
-It is used as a helper to determine whether type intersection is guaranteed to be able to
-find a value for a particular type parameter.
-A necessary condition for type intersection to not assign a parameter is that it only
-appears in a `Union[All]` and during subtyping some other union component (that does not
-constrain the type parameter) is selected.
-
-The `type_constrains` flag determines whether Type{T} is considered to be constraining
-`T`. This is not true in general, because of the existence of types with free type
-parameters, however, some callers would like to ignore this corner case.
-"""
-function constrains_param(var::TypeVar, @nospecialize(typ), covariant::Bool, type_constrains::Bool=false)
-    typ === var && return true
-    while typ isa UnionAll
-        covariant && constrains_param(var, typ.var.ub, covariant, type_constrains) && return true
-        # typ.var.lb doesn't constrain var
-        typ = typ.body
-    end
-    if typ isa Union
-        # for unions, verify that both options would constrain var
-        ba = constrains_param(var, typ.a, covariant, type_constrains)
-        bb = constrains_param(var, typ.b, covariant, type_constrains)
-        (ba && bb) && return true
-    elseif typ isa DataType
-        # return true if any param constrains var
-        fc = length(typ.parameters)
-        if fc > 0
-            if typ.name === Tuple.name
-                # vararg tuple needs special handling
-                for i in 1:(fc - 1)
-                    p = typ.parameters[i]
-                    constrains_param(var, p, covariant, type_constrains) && return true
-                end
-                lastp = typ.parameters[fc]
-                vararg = unwrap_unionall(lastp)
-                if vararg isa Core.TypeofVararg && isdefined(vararg, :N)
-                    constrains_param(var, vararg.N, covariant, type_constrains) && return true
-                    # T = vararg.parameters[1] doesn't constrain var
-                else
-                    constrains_param(var, lastp, covariant, type_constrains) && return true
-                end
-            else
-                if typ.name === typename(Type) && typ.parameters[1] === var && var.ub === Any
-                    # Types with free type parameters are <: Type cause the typevar
-                    # to be unconstrained because Type{T} with free typevars is illegal
-                    return type_constrains
-                end
-                for i in 1:fc
-                    p = typ.parameters[i]
-                    constrains_param(var, p, false, type_constrains) && return true
-                end
-            end
-        end
-    end
-    return false
-end
-
-const EMPTY_SPTYPES = VarState[]
-
-function sptypes_from_meth_instance(linfo::MethodInstance)
-    def = linfo.def
-    isa(def, Method) || return EMPTY_SPTYPES # toplevel
-    sig = def.sig
-    if isempty(linfo.sparam_vals)
-        isa(sig, UnionAll) || return EMPTY_SPTYPES
-        # linfo is unspecialized
-        spvals = Any[]
-        sig′ = sig
-        while isa(sig′, UnionAll)
-            push!(spvals, sig′.var)
-            sig′ = sig′.body
-        end
-    else
-        spvals = linfo.sparam_vals
-    end
-    nvals = length(spvals)
-    sptypes = Vector{VarState}(undef, nvals)
-    for i = 1:nvals
-        v = spvals[i]
-        if v isa TypeVar
-            temp = sig
-            for j = 1:i-1
-                temp = temp.body
-            end
-            vᵢ = (temp::UnionAll).var
-            sigtypes = (unwrap_unionall(temp)::DataType).parameters
-            for j = 1:length(sigtypes)
-                sⱼ = sigtypes[j]
-                if isType(sⱼ) && sⱼ.parameters[1] === vᵢ
-                    # if this parameter came from `arg::Type{T}`,
-                    # then `arg` is more precise than `Type{T} where lb<:T<:ub`
-                    ty = fieldtype(linfo.specTypes, j)
-                    @goto ty_computed
-                end
-            end
-            ub = unwraptv_ub(v)
-            if has_free_typevars(ub)
-                ub = Any
-            end
-            lb = unwraptv_lb(v)
-            if has_free_typevars(lb)
-                lb = Bottom
-            end
-            if Any === ub && lb === Bottom
-                ty = Any
-            else
-                tv = TypeVar(v.name, lb, ub)
-                ty = UnionAll(tv, Type{tv})
-            end
-            @label ty_computed
-            undef = !(let sig=sig
-                # if the specialized signature `linfo.specTypes` doesn't contain any free
-                # type variables, we can use it for a more accurate analysis of whether `v`
-                # is constrained or not, otherwise we should use `def.sig` which always
-                # doesn't contain any free type variables
-                if !has_free_typevars(linfo.specTypes)
-                    sig = linfo.specTypes
-                end
-                @assert !has_free_typevars(sig)
-                constrains_param(v, sig, #=covariant=#true)
-            end)
-        elseif isvarargtype(v)
-            ty = Int
-            undef = false
-        else
-            ty = Const(v)
-            undef = false
-        end
-        sptypes[i] = VarState(ty, undef)
-    end
-    return sptypes
-end
-
-_topmod(sv::InferenceState) = _topmod(frame_module(sv))
-
-function record_ssa_assign!(𝕃ᵢ::AbstractLattice, ssa_id::Int, @nospecialize(new), frame::InferenceState)
-    ssavaluetypes = frame.ssavaluetypes
-    old = ssavaluetypes[ssa_id]
-    if old === NOT_FOUND || !⊑(𝕃ᵢ, new, old)
-        # typically, we expect that old ⊑ new (that output information only
-        # gets less precise with worse input information), but to actually
-        # guarantee convergence we need to use tmerge here to ensure that is true
-        ssavaluetypes[ssa_id] = old === NOT_FOUND ? new : tmerge(𝕃ᵢ, old, new)
-        W = frame.ip
-        for r in frame.ssavalue_uses[ssa_id]
-            if was_reached(frame, r)
-                usebb = block_for_inst(frame.cfg, r)
-                # We're guaranteed to visit the statement if it's in the current
-                # basic block, since SSA values can only ever appear after their
-                # def.
-                if usebb != frame.currbb
-                    push!(W, usebb)
-                end
-            end
-        end
-    end
-    return nothing
-end
-
-function add_cycle_backedge!(caller::InferenceState, frame::InferenceState, currpc::Int)
-    update_valid_age!(caller, frame.valid_worlds)
-    backedge = (caller, currpc)
-    contains_is(frame.cycle_backedges, backedge) || push!(frame.cycle_backedges, backedge)
-    add_backedge!(caller, frame.linfo)
-    return frame
-end
-
-function get_stmt_edges!(caller::InferenceState, currpc::Int=caller.currpc)
-    stmt_edges = caller.stmt_edges
-    edges = stmt_edges[currpc]
-    if edges === nothing
-        edges = stmt_edges[currpc] = []
-    end
-    return edges
-end
-
-function empty_backedges!(frame::InferenceState, currpc::Int=frame.currpc)
-    edges = frame.stmt_edges[currpc]
-    edges === nothing || empty!(edges)
-    return nothing
-end
-
-function print_callstack(sv::InferenceState)
-    while sv !== nothing
-        print(sv.linfo)
-        !sv.cached && print("  [uncached]")
-        println()
-        for cycle in sv.callers_in_cycle
-            print(' ', cycle.linfo)
-            println()
-        end
-        sv = sv.parent
-    end
-end
-
-function narguments(sv::InferenceState, include_va::Bool=true)
-    def = sv.linfo.def
-    nargs = length(sv.result.argtypes)
-    if !include_va
-        nargs -= isa(def, Method) && def.isva
-    end
-    return nargs
-end
-
-# IRInterpretationState
-# =====================
-
-# TODO add `result::InferenceResult` and put the irinterp result into the inference cache?
-mutable struct IRInterpretationState
-    const method_info::MethodInfo
-    const ir::IRCode
-    const mi::MethodInstance
-    const world::UInt
-    curridx::Int
-    const argtypes_refined::Vector{Bool}
-    const sptypes::Vector{VarState}
-    const tpdum::TwoPhaseDefUseMap
-    const ssa_refined::BitSet
-    const lazydomtree::LazyDomtree
-    valid_worlds::WorldRange
-    const edges::Vector{Any}
-    parent # ::Union{Nothing,AbsIntState}
-
-    function IRInterpretationState(interp::AbstractInterpreter,
-        method_info::MethodInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
-        world::UInt, min_world::UInt, max_world::UInt)
-        curridx = 1
-        given_argtypes = Vector{Any}(undef, length(argtypes))
-        for i = 1:length(given_argtypes)
-            given_argtypes[i] = widenslotwrapper(argtypes[i])
-        end
-        given_argtypes = va_process_argtypes(optimizer_lattice(interp), given_argtypes, mi)
-        argtypes_refined = Bool[!⊑(optimizer_lattice(interp), ir.argtypes[i], given_argtypes[i])
-            for i = 1:length(given_argtypes)]
-        empty!(ir.argtypes)
-        append!(ir.argtypes, given_argtypes)
-        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
-        ssa_refined = BitSet()
-        lazydomtree = LazyDomtree(ir)
-        valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
-        edges = Any[]
-        parent = nothing
-        return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
-                   ssa_refined, lazydomtree, valid_worlds, edges, parent)
-    end
-end
-
-function IRInterpretationState(interp::AbstractInterpreter,
-    code::CodeInstance, mi::MethodInstance, argtypes::Vector{Any}, world::UInt)
-    @assert code.def === mi
-    src = @atomic :monotonic code.inferred
-    if isa(src, String)
-        src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src)::CodeInfo
-    else
-        isa(src, CodeInfo) || return nothing
-    end
-    method_info = MethodInfo(src)
-    ir = inflate_ir(src, mi)
-    return IRInterpretationState(interp, method_info, ir, mi, argtypes, world,
-                                 src.min_world, src.max_world)
-end
-
-# AbsIntState
-# ===========
-
-const AbsIntState = Union{InferenceState,IRInterpretationState}
-
-frame_instance(sv::InferenceState) = sv.linfo
-frame_instance(sv::IRInterpretationState) = sv.mi
-
-function frame_module(sv::AbsIntState)
-    mi = frame_instance(sv)
-    def = mi.def
-    isa(def, Module) && return def
-    return def.module
-end
-
-frame_parent(sv::InferenceState) = sv.parent::Union{Nothing,AbsIntState}
-frame_parent(sv::IRInterpretationState) = sv.parent::Union{Nothing,AbsIntState}
-
-is_constproped(sv::InferenceState) = any(sv.result.overridden_by_const)
-is_constproped(::IRInterpretationState) = true
-
-is_cached(sv::InferenceState) = sv.cached
-is_cached(::IRInterpretationState) = false
-
-method_info(sv::InferenceState) = sv.method_info
-method_info(sv::IRInterpretationState) = sv.method_info
-
-propagate_inbounds(sv::AbsIntState) = method_info(sv).propagate_inbounds
-method_for_inference_limit_heuristics(sv::AbsIntState) = method_info(sv).method_for_inference_limit_heuristics
-
-frame_world(sv::InferenceState) = sv.world
-frame_world(sv::IRInterpretationState) = sv.world
-
-callers_in_cycle(sv::InferenceState) = sv.callers_in_cycle
-callers_in_cycle(sv::IRInterpretationState) = ()
-
-is_effect_overridden(sv::AbsIntState, effect::Symbol) = is_effect_overridden(frame_instance(sv), effect)
-function is_effect_overridden(linfo::MethodInstance, effect::Symbol)
-    def = linfo.def
-    return isa(def, Method) && is_effect_overridden(def, effect)
-end
-is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
-is_effect_overridden(override::EffectsOverride, effect::Symbol) = getfield(override, effect)
-
-has_conditional(𝕃::AbstractLattice, ::InferenceState) = has_conditional(𝕃)
-has_conditional(::AbstractLattice, ::IRInterpretationState) = false
-
-# work towards converging the valid age range for sv
-function update_valid_age!(sv::AbsIntState, valid_worlds::WorldRange)
-    valid_worlds = sv.valid_worlds = intersect(valid_worlds, sv.valid_worlds)
-    @assert sv.world in valid_worlds "invalid age range update"
-    return valid_worlds
-end
-
-"""
-    AbsIntStackUnwind(sv::AbsIntState)
-
-Iterate through all callers of the given `AbsIntState` in the abstract interpretation stack
-(including the given `AbsIntState` itself), visiting children before their parents (i.e.
-ascending the tree from the given `AbsIntState`).
-Note that cycles may be visited in any order.
-"""
-struct AbsIntStackUnwind
-    sv::AbsIntState
-end
-iterate(unw::AbsIntStackUnwind) = (unw.sv, (unw.sv, 0))
-function iterate(unw::AbsIntStackUnwind, (sv, cyclei)::Tuple{AbsIntState, Int})
-    # iterate through the cycle before walking to the parent
-    if cyclei < length(callers_in_cycle(sv))
-        cyclei += 1
-        parent = callers_in_cycle(sv)[cyclei]
-    else
-        cyclei = 0
-        parent = frame_parent(sv)
-    end
-    parent === nothing && return nothing
-    return (parent, (parent, cyclei))
-end
-
-# temporarily accumulate our edges to later add as backedges in the callee
-function add_backedge!(caller::InferenceState, mi::MethodInstance)
-    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
-    return push!(get_stmt_edges!(caller), mi)
-end
-function add_backedge!(irsv::IRInterpretationState, mi::MethodInstance)
-    return push!(irsv.edges, mi)
-end
-
-function add_invoke_backedge!(caller::InferenceState, @nospecialize(invokesig::Type), mi::MethodInstance)
-    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
-    return push!(get_stmt_edges!(caller), invokesig, mi)
-end
-function add_invoke_backedge!(irsv::IRInterpretationState, @nospecialize(invokesig::Type), mi::MethodInstance)
-    return push!(irsv.edges, invokesig, mi)
-end
-
-# used to temporarily accumulate our no method errors to later add as backedges in the callee method table
-function add_mt_backedge!(caller::InferenceState, mt::MethodTable, @nospecialize(typ))
-    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
-    return push!(get_stmt_edges!(caller), mt, typ)
-end
-function add_mt_backedge!(irsv::IRInterpretationState, mt::MethodTable, @nospecialize(typ))
-    return push!(irsv.edges, mt, typ)
-end
-
-get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
-get_curr_ssaflag(sv::IRInterpretationState) = sv.ir.stmts[sv.curridx][:flag]
-
-add_curr_ssaflag!(sv::InferenceState, flag::UInt8) = sv.src.ssaflags[sv.currpc] |= flag
-add_curr_ssaflag!(sv::IRInterpretationState, flag::UInt8) = sv.ir.stmts[sv.curridx][:flag] |= flag
-
-sub_curr_ssaflag!(sv::InferenceState, flag::UInt8) = sv.src.ssaflags[sv.currpc] &= ~flag
-sub_curr_ssaflag!(sv::IRInterpretationState, flag::UInt8) = sv.ir.stmts[sv.curridx][:flag] &= ~flag
-
-merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::Effects) =
-    caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
-merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return
-
-struct InferenceLoopState
-    sig
-    rt
-    effects::Effects
-    function InferenceLoopState(@nospecialize(sig), @nospecialize(rt), effects::Effects)
-        new(sig, rt, effects)
-    end
-end
-
-bail_out_toplevel_call(::AbstractInterpreter, state::InferenceLoopState, sv::InferenceState) =
-    sv.restrict_abstract_call_sites && !isdispatchtuple(state.sig)
-bail_out_toplevel_call(::AbstractInterpreter, ::InferenceLoopState, ::IRInterpretationState) = false
-
-bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
-    state.rt === Any && !is_foldable(state.effects)
-bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
-    state.rt === Any && !is_foldable(state.effects)
-
-bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
-    state.rt === Any
-bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
-    state.rt === Any
-
-function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState)
-    if InferenceParams(interp).unoptimize_throw_blocks
-        # Disable inference of calls in throw blocks, since we're unlikely to
-        # need their types. There is one exception however: If up until now, the
-        # function has not seen any side effects, we would like to make sure there
-        # aren't any in the throw block either to enable other optimizations.
-        if is_stmt_throw_block(get_curr_ssaflag(sv))
-            should_infer_for_effects(sv) || return false
-        end
-    end
-    return true
-end
-function should_infer_for_effects(sv::InferenceState)
-    effects = sv.ipo_effects
-    return is_terminates(effects) && is_effect_free(effects)
-end
-should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true
-
-add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
-add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
-
-function get_max_methods(interp::AbstractInterpreter, @nospecialize(f), sv::AbsIntState)
-    fmax = get_max_methods_for_func(f)
-    fmax !== nothing && return fmax
-    return get_max_methods(interp, sv)
-end
-function get_max_methods(interp::AbstractInterpreter, @nospecialize(f))
-    fmax = get_max_methods_for_func(f)
-    fmax !== nothing && return fmax
-    return get_max_methods(interp)
-end
-function get_max_methods(interp::AbstractInterpreter, sv::AbsIntState)
-    mmax = get_max_methods_for_module(sv)
-    mmax !== nothing && return mmax
-    return get_max_methods(interp)
-end
-get_max_methods(interp::AbstractInterpreter) = InferenceParams(interp).max_methods
-
-function get_max_methods_for_func(@nospecialize(f))
-    if f !== nothing
-        fmm = typeof(f).name.max_methods
-        fmm !== UInt8(0) && return Int(fmm)
-    end
-    return nothing
-end
-get_max_methods_for_module(sv::AbsIntState) = get_max_methods_for_module(frame_module(sv))
-function get_max_methods_for_module(mod::Module)
-    max_methods = ccall(:jl_get_module_max_methods, Cint, (Any,), mod) % Int
-    max_methods < 0 && return nothing
-    return max_methods
-end
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
deleted file mode 100644
index dd50f8c9d47e1..0000000000000
--- a/base/compiler/optimize.jl
+++ /dev/null
@@ -1,849 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#############
-# constants #
-#############
-
-# The slot has uses that are not statically dominated by any assignment
-# This is implied by `SLOT_USEDUNDEF`.
-# If this is not set, all the uses are (statically) dominated by the defs.
-# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
-const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
-const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
-const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
-# const SLOT_CALLED      = 64
-
-# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
-
-const IR_FLAG_NULL        = 0x00
-# This statement is marked as @inbounds by user.
-# Ff replaced by inlining, any contained boundschecks may be removed.
-const IR_FLAG_INBOUNDS    = 0x01 << 0
-# This statement is marked as @inline by user
-const IR_FLAG_INLINE      = 0x01 << 1
-# This statement is marked as @noinline by user
-const IR_FLAG_NOINLINE    = 0x01 << 2
-const IR_FLAG_THROW_BLOCK = 0x01 << 3
-# This statement may be removed if its result is unused. In particular,
-# it must be both :effect_free and :nothrow.
-# TODO: Separate these out.
-const IR_FLAG_EFFECT_FREE = 0x01 << 4
-# This statement was proven not to throw
-const IR_FLAG_NOTHROW     = 0x01 << 5
-# This is :consistent
-const IR_FLAG_CONSISTENT  = 0x01 << 6
-# An optimization pass has updated this statement in a way that may
-# have exposed information that inference did not see. Re-running
-# inference on this statement may be profitable.
-const IR_FLAG_REFINED     = 0x01 << 7
-
-const TOP_TUPLE = GlobalRef(Core, :tuple)
-
-# This corresponds to the type of `CodeInfo`'s `inlining_cost` field
-const InlineCostType = UInt16
-const MAX_INLINE_COST = typemax(InlineCostType)
-const MIN_INLINE_COST = InlineCostType(10)
-const MaybeCompressed = Union{CodeInfo, String}
-
-is_inlineable(@nospecialize src::MaybeCompressed) =
-    ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) != MAX_INLINE_COST
-set_inlineable!(src::CodeInfo, val::Bool) =
-    src.inlining_cost = (val ? MIN_INLINE_COST : MAX_INLINE_COST)
-
-function inline_cost_clamp(x::Int)::InlineCostType
-    x > MAX_INLINE_COST && return MAX_INLINE_COST
-    x < MIN_INLINE_COST && return MIN_INLINE_COST
-    return convert(InlineCostType, x)
-end
-
-is_declared_inline(@nospecialize src::MaybeCompressed) =
-    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 1
-
-is_declared_noinline(@nospecialize src::MaybeCompressed) =
-    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 2
-
-#####################
-# OptimizationState #
-#####################
-
-is_source_inferred(@nospecialize src::MaybeCompressed) =
-    ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
-
-function inlining_policy(interp::AbstractInterpreter,
-    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt8, mi::MethodInstance,
-    argtypes::Vector{Any})
-    if isa(src, MaybeCompressed)
-        is_source_inferred(src) || return nothing
-        src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
-        return src_inlineable ? src : nothing
-    elseif src === nothing && is_stmt_inline(stmt_flag)
-        # if this statement is forced to be inlined, make an additional effort to find the
-        # inferred source in the local cache
-        # we still won't find a source for recursive call because the "single-level" inlining
-        # seems to be more trouble and complex than it's worth
-        inf_result = cache_lookup(optimizer_lattice(interp), mi, argtypes, get_inference_cache(interp))
-        inf_result === nothing && return nothing
-        src = inf_result.src
-        if isa(src, CodeInfo)
-            src_inferred = is_source_inferred(src)
-            return src_inferred ? src : nothing
-        else
-            return nothing
-        end
-    elseif isa(src, IRCode)
-        return src
-    elseif isa(src, SemiConcreteResult)
-        if is_declared_noinline(mi.def::Method)
-            # For `NativeInterpreter`, `SemiConcreteResult` may be produced for
-            # a `@noinline`-declared method when it's marked as `@constprop :aggressive`.
-            # Suppress the inlining here.
-            return nothing
-        end
-        return src
-    end
-    return nothing
-end
-
-struct InliningState{Interp<:AbstractInterpreter}
-    edges::Vector{Any}
-    world::UInt
-    interp::Interp
-end
-function InliningState(sv::InferenceState, interp::AbstractInterpreter)
-    edges = sv.stmt_edges[1]::Vector{Any}
-    return InliningState(edges, sv.world, interp)
-end
-function InliningState(interp::AbstractInterpreter)
-    return InliningState(Any[], get_world_counter(interp), interp)
-end
-
-# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
-code_cache(state::InliningState) = WorldView(code_cache(state.interp), state.world)
-
-include("compiler/ssair/driver.jl")
-
-mutable struct OptimizationState{Interp<:AbstractInterpreter}
-    linfo::MethodInstance
-    src::CodeInfo
-    ir::Union{Nothing, IRCode}
-    stmt_info::Vector{CallInfo}
-    mod::Module
-    sptypes::Vector{VarState}
-    slottypes::Vector{Any}
-    inlining::InliningState{Interp}
-    cfg::Union{Nothing,CFG}
-    insert_coverage::Bool
-end
-function OptimizationState(sv::InferenceState, interp::AbstractInterpreter,
-                           recompute_cfg::Bool=true)
-    inlining = InliningState(sv, interp)
-    cfg = recompute_cfg ? nothing : sv.cfg
-    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
-                             sv.sptypes, sv.slottypes, inlining, cfg, sv.insert_coverage)
-end
-function OptimizationState(linfo::MethodInstance, src::CodeInfo, interp::AbstractInterpreter)
-    # prepare src for running optimization passes if it isn't already
-    nssavalues = src.ssavaluetypes
-    if nssavalues isa Int
-        src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
-    else
-        nssavalues = length(src.ssavaluetypes::Vector{Any})
-    end
-    sptypes = sptypes_from_meth_instance(linfo)
-    nslots = length(src.slotflags)
-    slottypes = src.slottypes
-    if slottypes === nothing
-        slottypes = Any[ Any for i = 1:nslots ]
-    end
-    stmt_info = CallInfo[ NoCallInfo() for i = 1:nssavalues ]
-    # cache some useful state computations
-    def = linfo.def
-    mod = isa(def, Method) ? def.module : def
-    # Allow using the global MI cache, but don't track edges.
-    # This method is mostly used for unit testing the optimizer
-    inlining = InliningState(interp)
-    return OptimizationState(linfo, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, nothing, false)
-end
-function OptimizationState(linfo::MethodInstance, interp::AbstractInterpreter)
-    world = get_world_counter(interp)
-    src = retrieve_code_info(linfo, world)
-    src === nothing && return nothing
-    return OptimizationState(linfo, src, interp)
-end
-
-function ir_to_codeinf!(opt::OptimizationState)
-    (; linfo, src) = opt
-    src = ir_to_codeinf!(src, opt.ir::IRCode)
-    opt.ir = nothing
-    validate_code_in_debug_mode(linfo, src, "optimized")
-    return src
-end
-
-function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
-    replace_code_newstyle!(src, ir)
-    widen_all_consts!(src)
-    src.inferred = true
-    return src
-end
-
-# widen all Const elements in type annotations
-function widen_all_consts!(src::CodeInfo)
-    ssavaluetypes = src.ssavaluetypes::Vector{Any}
-    for i = 1:length(ssavaluetypes)
-        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
-    end
-
-    for i = 1:length(src.code)
-        x = src.code[i]
-        if isa(x, PiNode)
-            src.code[i] = PiNode(x.val, widenconst(x.typ))
-        end
-    end
-
-    src.rettype = widenconst(src.rettype)
-
-    return src
-end
-
-#########
-# logic #
-#########
-
-_topmod(sv::OptimizationState) = _topmod(sv.mod)
-
-is_stmt_inline(stmt_flag::UInt8)      = stmt_flag & IR_FLAG_INLINE      ≠ 0
-is_stmt_noinline(stmt_flag::UInt8)    = stmt_flag & IR_FLAG_NOINLINE    ≠ 0
-is_stmt_throw_block(stmt_flag::UInt8) = stmt_flag & IR_FLAG_THROW_BLOCK ≠ 0
-
-function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
-    Targ = args[1]
-    atyp = argextype(Targ, src)
-    # `Expr(:new)` of unknown type could raise arbitrary TypeError.
-    typ, isexact = instanceof_tfunc(atyp)
-    if !isexact
-        atyp = unwrap_unionall(widenconst(atyp))
-        if isType(atyp) && isTypeDataType(atyp.parameters[1])
-            typ = atyp.parameters[1]
-        else
-            return (false, false, false)
-        end
-        isabstracttype(typ) && return (false, false, false)
-    else
-        isconcretedispatch(typ) || return (false, false, false)
-    end
-    typ = typ::DataType
-    fcount = datatype_fieldcount(typ)
-    fcount === nothing && return (false, false, false)
-    fcount >= length(args) - 1 || return (false, false, false)
-    for fidx in 1:(length(args) - 1)
-        farg = args[fidx + 1]
-        eT = argextype(farg, src)
-        fT = fieldtype(typ, fidx)
-        if !isexact && has_free_typevars(fT)
-            if pattern_match !== nothing && pattern_match(src, typ, fidx, Targ, farg)
-                continue
-            end
-            return (false, false, false)
-        end
-        ⊑(𝕃ₒ, eT, fT) || return (false, false, false)
-    end
-    return (false, true, true)
-end
-
-"""
-    stmt_effect_flags(stmt, rt, src::Union{IRCode,IncrementalCompact}) ->
-        (consistent::Bool, effect_free_and_nothrow::Bool, nothrow::Bool)
-
-Returns a tuple of `(:consistent, :effect_free_and_nothrow, :nothrow)` flags for a given statement.
-"""
-function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
-    # TODO: We're duplicating analysis from inference here.
-    isa(stmt, PiNode) && return (true, true, true)
-    isa(stmt, PhiNode) && return (true, true, true)
-    isa(stmt, ReturnNode) && return (true, false, true)
-    isa(stmt, GotoNode) && return (true, false, true)
-    isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
-    if isa(stmt, GlobalRef)
-        nothrow = isdefined(stmt.mod, stmt.name)
-        consistent = nothrow && isconst(stmt.mod, stmt.name)
-        return (consistent, nothrow, nothrow)
-    elseif isa(stmt, Expr)
-        (; head, args) = stmt
-        if head === :static_parameter
-            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
-            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
-            nothrow = !sptypes[args[1]::Int].undef
-            return (true, nothrow, nothrow)
-        end
-        if head === :call
-            f = argextype(args[1], src)
-            f = singleton_type(f)
-            f === nothing && return (false, false, false)
-            if f === UnionAll
-                # TODO: This is a weird special case - should be determined in inference
-                argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
-                nothrow = _builtin_nothrow(𝕃ₒ, f, argtypes, rt)
-                return (true, nothrow, nothrow)
-            end
-            if f === Intrinsics.cglobal
-                # TODO: these are not yet linearized
-                return (false, false, false)
-            end
-            isa(f, Builtin) || return (false, false, false)
-            # Needs to be handled in inlining to look at the callee effects
-            f === Core._apply_iterate && return (false, false, false)
-            argtypes = Any[argextype(args[arg], src) for arg in 1:length(args)]
-            effects = builtin_effects(𝕃ₒ, f, ArgInfo(args, argtypes), rt)
-            consistent = is_consistent(effects)
-            effect_free = is_effect_free(effects)
-            nothrow = is_nothrow(effects)
-            return (consistent, effect_free & nothrow, nothrow)
-        elseif head === :new
-            return new_expr_effect_flags(𝕃ₒ, args, src)
-        elseif head === :foreigncall
-            effects = foreigncall_effects(stmt) do @nospecialize x
-                argextype(x, src)
-            end
-            consistent = is_consistent(effects)
-            effect_free = is_effect_free(effects)
-            nothrow = is_nothrow(effects)
-            return (consistent, effect_free & nothrow, nothrow)
-        elseif head === :new_opaque_closure
-            length(args) < 4 && return (false, false, false)
-            typ = argextype(args[1], src)
-            typ, isexact = instanceof_tfunc(typ)
-            isexact || return (false, false, false)
-            ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
-            rt_lb = argextype(args[2], src)
-            rt_ub = argextype(args[3], src)
-            source = argextype(args[4], src)
-            if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
-                return (false, false, false)
-            end
-            return (false, true, true)
-        elseif head === :isdefined || head === :the_exception || head === :copyast || head === :inbounds || head === :boundscheck
-            return (true, true, true)
-        else
-            # e.g. :loopinfo
-            return (false, false, false)
-        end
-    end
-    isa(stmt, UnoptSlot) && error("unexpected IR elements")
-    return (true, true, true)
-end
-
-"""
-    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
-    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
-
-Return the type of value `x` in the context of inferred source `src`.
-Note that `t` might be an extended lattice element.
-Use `widenconst(t)` to get the native Julia type of `x`.
-"""
-argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
-    argextype(x, ir, sptypes, ir.argtypes)
-function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
-    isa(x, AnySSAValue) && return types(compact)[x]
-    return argextype(x, compact, sptypes, compact.ir.argtypes)
-end
-argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState}) = argextype(x, src, sptypes, src.slottypes::Vector{Any})
-function argextype(
-    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
-    sptypes::Vector{VarState}, slottypes::Vector{Any})
-    if isa(x, Expr)
-        if x.head === :static_parameter
-            return sptypes[x.args[1]::Int].typ
-        elseif x.head === :boundscheck
-            return Bool
-        elseif x.head === :copyast
-            return argextype(x.args[1], src, sptypes, slottypes)
-        end
-        Core.println("argextype called on Expr with head ", x.head,
-                     " which is not valid for IR in argument-position.")
-        @assert false
-    elseif isa(x, SlotNumber)
-        return slottypes[x.id]
-    elseif isa(x, TypedSlot)
-        return x.typ
-    elseif isa(x, SSAValue)
-        return abstract_eval_ssavalue(x, src)
-    elseif isa(x, Argument)
-        return slottypes[x.n]
-    elseif isa(x, QuoteNode)
-        return Const(x.value)
-    elseif isa(x, GlobalRef)
-        return abstract_eval_globalref(x)
-    elseif isa(x, PhiNode)
-        return Any
-    elseif isa(x, PiNode)
-        return x.typ
-    else
-        return Const(x)
-    end
-end
-abstract_eval_ssavalue(s::SSAValue, src::CodeInfo) = abstract_eval_ssavalue(s, src.ssavaluetypes::Vector{Any})
-abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
-
-"""
-    finish(interp::AbstractInterpreter, opt::OptimizationState,
-           ir::IRCode, caller::InferenceResult)
-
-Post-process information derived by Julia-level optimizations for later use.
-In particular, this function determines the inlineability of the optimized code.
-"""
-function finish(interp::AbstractInterpreter, opt::OptimizationState,
-                ir::IRCode, caller::InferenceResult)
-    (; src, linfo) = opt
-    (; def, specTypes) = linfo
-
-    force_noinline = is_declared_noinline(src)
-
-    # compute inlining and other related optimizations
-    result = caller.result
-    @assert !(result isa LimitedAccuracy)
-    result = widenslotwrapper(result)
-
-    opt.ir = ir
-
-    # determine and cache inlineability
-    if !force_noinline
-        sig = unwrap_unionall(specTypes)
-        if !(isa(sig, DataType) && sig.name === Tuple.name)
-            force_noinline = true
-        end
-        if !is_declared_inline(src) && result === Bottom
-            force_noinline = true
-        end
-    end
-    if force_noinline
-        set_inlineable!(src, false)
-    elseif isa(def, Method)
-        if is_declared_inline(src) && isdispatchtuple(specTypes)
-            # obey @inline declaration if a dispatch barrier would not help
-            set_inlineable!(src, true)
-        else
-            # compute the cost (size) of inlining this code
-            params = OptimizationParams(interp)
-            cost_threshold = default = params.inline_cost_threshold
-            if ⊑(optimizer_lattice(interp), result, Tuple) && !isconcretetype(widenconst(result))
-                cost_threshold += params.inline_tupleret_bonus
-            end
-            # if the method is declared as `@inline`, increase the cost threshold 20x
-            if is_declared_inline(src)
-                cost_threshold += 19*default
-            end
-            # a few functions get special treatment
-            if def.module === _topmod(def.module)
-                name = def.name
-                if name === :iterate || name === :unsafe_convert || name === :cconvert
-                    cost_threshold += 4*default
-                end
-            end
-            src.inlining_cost = inline_cost(ir, params, cost_threshold)
-        end
-    end
-    return nothing
-end
-
-# run the optimization work
-function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
-    @timeit "optimizer" ir = run_passes(opt.src, opt, caller)
-    return finish(interp, opt, ir, caller)
-end
-
-using .EscapeAnalysis
-import .EscapeAnalysis: EscapeState, ArgEscapeCache, is_ipo_profitable
-
-"""
-    cache_escapes!(caller::InferenceResult, estate::EscapeState)
-
-Transforms escape information of call arguments of `caller`,
-and then caches it into a global cache for later interprocedural propagation.
-"""
-cache_escapes!(caller::InferenceResult, estate::EscapeState) =
-    caller.argescapes = ArgEscapeCache(estate)
-
-function ipo_escape_cache(mi_cache::MICache) where MICache
-    return function (linfo::Union{InferenceResult,MethodInstance})
-        if isa(linfo, InferenceResult)
-            argescapes = linfo.argescapes
-        else
-            codeinst = get(mi_cache, linfo, nothing)
-            isa(codeinst, CodeInstance) || return nothing
-            argescapes = codeinst.argescapes
-        end
-        return argescapes !== nothing ? argescapes::ArgEscapeCache : nothing
-    end
-end
-null_escape_cache(linfo::Union{InferenceResult,MethodInstance}) = nothing
-
-macro pass(name, expr)
-    optimize_until = esc(:optimize_until)
-    stage = esc(:__stage__)
-    macrocall = :(@timeit $(esc(name)) $(esc(expr)))
-    macrocall.args[2] = __source__  # `@timeit` may want to use it
-    quote
-        $macrocall
-        matchpass($optimize_until, ($stage += 1), $(esc(name))) && $(esc(:(@goto __done__)))
-    end
-end
-
-matchpass(optimize_until::Int, stage, _) = optimize_until == stage
-matchpass(optimize_until::String, _, name) = optimize_until == name
-matchpass(::Nothing, _, _) = false
-
-function run_passes(
-    ci::CodeInfo,
-    sv::OptimizationState,
-    caller::InferenceResult,
-    optimize_until = nothing,  # run all passes by default
-)
-    __stage__ = 0  # used by @pass
-    # NOTE: The pass name MUST be unique for `optimize_until::AbstractString` to work
-    @pass "convert"   ir = convert_to_ircode(ci, sv)
-    @pass "slot2reg"  ir = slot2reg(ir, ci, sv)
-    # TODO: Domsorting can produce an updated domtree - no need to recompute here
-    @pass "compact 1" ir = compact!(ir)
-    @pass "Inlining"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
-    # @timeit "verify 2" verify_ir(ir)
-    @pass "compact 2" ir = compact!(ir)
-    @pass "SROA"      ir = sroa_pass!(ir, sv.inlining)
-    @pass "ADCE"      ir = adce_pass!(ir, sv.inlining)
-    @pass "compact 3" ir = compact!(ir)
-    if JLOptions().debug_level == 2
-        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
-    end
-    @label __done__  # used by @pass
-    return ir
-end
-
-function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
-    linetable = ci.linetable
-    if !isa(linetable, Vector{LineInfoNode})
-        linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
-    end
-
-    # Go through and add an unreachable node after every
-    # Union{} call. Then reindex labels.
-    code = copy_exprargs(ci.code)
-    stmtinfo = sv.stmt_info
-    codelocs = ci.codelocs
-    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
-    ssaflags = ci.ssaflags
-    meta = Expr[]
-    idx = 1
-    oldidx = 1
-    nstmts = length(code)
-    ssachangemap = labelchangemap = nothing
-    prevloc = zero(eltype(ci.codelocs))
-    while idx <= length(code)
-        codeloc = codelocs[idx]
-        if sv.insert_coverage && codeloc != prevloc && codeloc != 0
-            # insert a side-effect instruction before the current instruction in the same basic block
-            insert!(code, idx, Expr(:code_coverage_effect))
-            insert!(codelocs, idx, codeloc)
-            insert!(ssavaluetypes, idx, Nothing)
-            insert!(stmtinfo, idx, NoCallInfo())
-            insert!(ssaflags, idx, IR_FLAG_NULL)
-            if ssachangemap === nothing
-                ssachangemap = fill(0, nstmts)
-            end
-            if labelchangemap === nothing
-                labelchangemap = fill(0, nstmts)
-            end
-            ssachangemap[oldidx] += 1
-            if oldidx < length(labelchangemap)
-                labelchangemap[oldidx + 1] += 1
-            end
-            idx += 1
-            prevloc = codeloc
-        end
-        if code[idx] isa Expr && ssavaluetypes[idx] === Union{}
-            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
-                # insert unreachable in the same basic block after the current instruction (splitting it)
-                insert!(code, idx + 1, ReturnNode())
-                insert!(codelocs, idx + 1, codelocs[idx])
-                insert!(ssavaluetypes, idx + 1, Union{})
-                insert!(stmtinfo, idx + 1, NoCallInfo())
-                insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
-                if ssachangemap === nothing
-                    ssachangemap = fill(0, nstmts)
-                end
-                if labelchangemap === nothing
-                    labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
-                end
-                if oldidx < length(ssachangemap)
-                    ssachangemap[oldidx + 1] += 1
-                    sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
-                end
-                idx += 1
-            end
-        end
-        idx += 1
-        oldidx += 1
-    end
-
-    cfg = sv.cfg
-    if ssachangemap !== nothing && labelchangemap !== nothing
-        renumber_ir_elements!(code, ssachangemap, labelchangemap)
-        cfg = nothing # recompute CFG
-    end
-
-    for i = 1:length(code)
-        code[i] = process_meta!(meta, code[i])
-    end
-    strip_trailing_junk!(ci, code, stmtinfo)
-    types = Any[]
-    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
-    if cfg === nothing
-        cfg = compute_basic_blocks(code)
-    end
-    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
-    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
-    # and eliminates slots (see below)
-    argtypes = sv.slottypes
-    return IRCode(stmts, cfg, linetable, argtypes, meta, sv.sptypes)
-end
-
-function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
-    if isexpr(stmt, :meta) && length(stmt.args) ≥ 1
-        push!(meta, stmt)
-        return nothing
-    end
-    return stmt
-end
-
-function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
-    # need `ci` for the slot metadata, IR for the code
-    svdef = sv.linfo.def
-    nargs = isa(svdef, Method) ? Int(svdef.nargs) : 0
-    @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
-    defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
-    𝕃ₒ = optimizer_lattice(sv.inlining.interp)
-    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, sv.slottypes, 𝕃ₒ) # consumes `ir`
-    # NOTE now we have converted `ir` to the SSA form and eliminated slots
-    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
-    resize!(ir.argtypes, nargs)
-    return ir
-end
-
-## Computing the cost of a function body
-
-# saturating sum (inputs are nonnegative), prevents overflow with typemax(Int) below
-plus_saturate(x::Int, y::Int) = max(x, y, x+y)
-
-# known return type
-isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
-
-function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
-                        params::OptimizationParams, error_path::Bool = false)
-    head = ex.head
-    if is_meta_expr_head(head)
-        return 0
-    elseif head === :call
-        farg = ex.args[1]
-        ftyp = argextype(farg, src, sptypes)
-        if ftyp === IntrinsicFunction && farg isa SSAValue
-            # if this comes from code that was already inlined into another function,
-            # Consts have been widened. try to recover in simple cases.
-            farg = isa(src, CodeInfo) ? src.code[farg.id] : src.stmts[farg.id][:inst]
-            if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
-                ftyp = argextype(farg, src, sptypes)
-            end
-        end
-        f = singleton_type(ftyp)
-        if isa(f, IntrinsicFunction)
-            iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
-            if !isassigned(T_IFUNC_COST, iidx)
-                # unknown/unhandled intrinsic
-                return params.inline_nonleaf_penalty
-            end
-            return T_IFUNC_COST[iidx]
-        end
-        if isa(f, Builtin) && f !== invoke
-            # The efficiency of operations like a[i] and s.b
-            # depend strongly on whether the result can be
-            # inferred, so check the type of ex
-            if f === Core.getfield || f === Core.tuple || f === Core.getglobal
-                # we might like to penalize non-inferrability, but
-                # tuple iteration/destructuring makes that impossible
-                # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
-                return 0
-            elseif (f === Core.arrayref || f === Core.const_arrayref || f === Core.arrayset) && length(ex.args) >= 3
-                atyp = argextype(ex.args[3], src, sptypes)
-                return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
-                return 1
-            end
-            fidx = find_tfunc(f)
-            if fidx === nothing
-                # unknown/unhandled builtin
-                # Use the generic cost of a direct function call
-                return 20
-            end
-            return T_FFUNC_COST[fidx]
-        end
-        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
-        if extyp === Union{}
-            return 0
-        end
-        return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-    elseif head === :foreigncall || head === :invoke || head === :invoke_modify
-        # Calls whose "return type" is Union{} do not actually return:
-        # they are errors. Since these are not part of the typical
-        # run-time of the function, we omit them from
-        # consideration. This way, non-inlined error branches do not
-        # prevent inlining.
-        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
-        return extyp === Union{} ? 0 : 20
-    elseif head === :(=)
-        if ex.args[1] isa GlobalRef
-            cost = 20
-        else
-            cost = 0
-        end
-        a = ex.args[2]
-        if a isa Expr
-            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params, error_path))
-        end
-        return cost
-    elseif head === :copyast
-        return 100
-    elseif head === :enter
-        # try/catch is a couple function calls,
-        # but don't inline functions with try/catch
-        # since these aren't usually performance-sensitive functions,
-        # and llvm is more likely to miscompile them when these functions get large
-        return typemax(Int)
-    end
-    return 0
-end
-
-function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
-                                  params::OptimizationParams)
-    thiscost = 0
-    dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
-    if stmt isa Expr
-        thiscost = statement_cost(stmt, line, src, sptypes, params,
-                                  is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
-    elseif stmt isa GotoNode
-        # loops are generally always expensive
-        # but assume that forward jumps are already counted for from
-        # summing the cost of the not-taken branch
-        thiscost = dst(stmt.label) < line ? 40 : 0
-    elseif stmt isa GotoIfNot
-        thiscost = dst(stmt.dest) < line ? 40 : 0
-    end
-    return thiscost
-end
-
-function inline_cost(ir::IRCode, params::OptimizationParams,
-                       cost_threshold::Integer=params.inline_cost_threshold)::InlineCostType
-    bodycost::Int = 0
-    for line = 1:length(ir.stmts)
-        stmt = ir.stmts[line][:inst]
-        thiscost = statement_or_branch_cost(stmt, line, ir, ir.sptypes, params)
-        bodycost = plus_saturate(bodycost, thiscost)
-        bodycost > cost_threshold && return MAX_INLINE_COST
-    end
-    return inline_cost_clamp(bodycost)
-end
-
-function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, params::OptimizationParams)
-    maxcost = 0
-    for line = 1:length(body)
-        stmt = body[line]
-        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
-                                            params)
-        cost[line] = thiscost
-        if thiscost > maxcost
-            maxcost = thiscost
-        end
-    end
-    return maxcost
-end
-
-function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int})
-    return renumber_ir_elements!(body, ssachangemap, ssachangemap)
-end
-
-function cumsum_ssamap!(ssachangemap::Vector{Int})
-    any_change = false
-    rel_change = 0
-    for i = 1:length(ssachangemap)
-        val = ssachangemap[i]
-        any_change |= val ≠ 0
-        rel_change += val
-        if val == -1
-            # Keep a marker that this statement was deleted
-            ssachangemap[i] = typemin(Int)
-        else
-            ssachangemap[i] = rel_change
-        end
-    end
-    return any_change
-end
-
-function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
-    any_change = cumsum_ssamap!(labelchangemap)
-    if ssachangemap !== labelchangemap
-        any_change |= cumsum_ssamap!(ssachangemap)
-    end
-    any_change || return
-    for i = 1:length(body)
-        el = body[i]
-        if isa(el, GotoNode)
-            body[i] = GotoNode(el.label + labelchangemap[el.label])
-        elseif isa(el, GotoIfNot)
-            cond = el.cond
-            if isa(cond, SSAValue)
-                cond = SSAValue(cond.id + ssachangemap[cond.id])
-            end
-            was_deleted = labelchangemap[el.dest] == typemin(Int)
-            body[i] = was_deleted ? cond : GotoIfNot(cond, el.dest + labelchangemap[el.dest])
-        elseif isa(el, ReturnNode)
-            if isdefined(el, :val)
-                val = el.val
-                if isa(val, SSAValue)
-                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
-                end
-            end
-        elseif isa(el, SSAValue)
-            body[i] = SSAValue(el.id + ssachangemap[el.id])
-        elseif isa(el, PhiNode)
-            i = 1
-            edges = el.edges
-            values = el.values
-            while i <= length(edges)
-                was_deleted = ssachangemap[edges[i]] == typemin(Int)
-                if was_deleted
-                    deleteat!(edges, i)
-                    deleteat!(values, i)
-                else
-                    edges[i] += ssachangemap[edges[i]]
-                    val = values[i]
-                    if isa(val, SSAValue)
-                        values[i] = SSAValue(val.id + ssachangemap[val.id])
-                    end
-                    i += 1
-                end
-            end
-        elseif isa(el, Expr)
-            if el.head === :(=) && el.args[2] isa Expr
-                el = el.args[2]::Expr
-            end
-            if el.head === :enter
-                tgt = el.args[1]::Int
-                el.args[1] = tgt + labelchangemap[tgt]
-            elseif !is_meta_expr_head(el.head)
-                args = el.args
-                for i = 1:length(args)
-                    el = args[i]
-                    if isa(el, SSAValue)
-                        args[i] = SSAValue(el.id + ssachangemap[el.id])
-                    end
-                end
-            end
-        end
-    end
-end
diff --git a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl b/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
deleted file mode 100644
index 26b0e5b404641..0000000000000
--- a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
+++ /dev/null
@@ -1,159 +0,0 @@
-# TODO this file contains many duplications with the inlining analysis code, factor them out
-
-import Core.Compiler:
-    MethodInstance, InferenceResult, Signature, ConstPropResult, ConcreteResult,
-    SemiConcreteResult, CallInfo, NoCallInfo, MethodResultPure, MethodMatchInfo,
-    UnionSplitInfo, ConstCallInfo, InvokeCallInfo,
-    call_sig, argtypes_to_type, is_builtin, is_return_type, istopfunction,
-    validate_sparams, specialize_method, invoke_rewrite
-
-const Linfo = Union{MethodInstance,InferenceResult}
-struct EACallInfo
-    linfos::Vector{Linfo}
-    nothrow::Bool
-end
-
-function resolve_call(ir::IRCode, stmt::Expr, @nospecialize(info::CallInfo))
-    # TODO: if effect free, return true
-    sig = call_sig(ir, stmt)
-    if sig === nothing
-        return missing
-    end
-    # TODO handle _apply_iterate
-    if is_builtin(𝕃ₒ, sig) && sig.f !== invoke
-        return false
-    end
-    # handling corresponding to late_inline_special_case!
-    (; f, argtypes) = sig
-    if length(argtypes) == 3 && istopfunction(f, :!==)
-        return true
-    elseif length(argtypes) == 3 && istopfunction(f, :(>:))
-        return true
-    elseif f === TypeVar && 2 ≤ length(argtypes) ≤ 4 && (argtypes[2] ⊑ Symbol)
-        return true
-    elseif f === UnionAll && length(argtypes) == 3 && (argtypes[2] ⊑ TypeVar)
-        return true
-    elseif is_return_type(f)
-        return true
-    end
-    if info isa MethodResultPure
-        return true
-    elseif info === NoCallInfo
-        return missing
-    end
-    # TODO handle OpaqueClosureCallInfo
-    if sig.f === invoke
-        isa(info, InvokeCallInfo) || return missing
-        return analyze_invoke_call(sig, info)
-    elseif isa(info, ConstCallInfo)
-        return analyze_const_call(sig, info)
-    elseif isa(info, MethodMatchInfo)
-        infos = MethodMatchInfo[info]
-    elseif isa(info, UnionSplitInfo)
-        infos = info.matches
-    else # isa(info, ReturnTypeCallInfo), etc.
-        return missing
-    end
-    return analyze_call(sig, infos)
-end
-
-function analyze_invoke_call(sig::Signature, info::InvokeCallInfo)
-    match = info.match
-    if !match.fully_covers
-        # TODO: We could union split out the signature check and continue on
-        return missing
-    end
-    result = info.result
-    if isa(result, ConstPropResult)
-        return EACallInfo(Linfo[result.result], true)
-    elseif isa(result, ConcreteResult)
-        return EACallInfo(Linfo[result.mi], true)
-    elseif isa(result, SemiConcreteResult)
-        return EACallInfo(Linfo[result.mi], true)
-    else
-        argtypes = invoke_rewrite(sig.argtypes)
-        mi = analyze_match(match, length(argtypes))
-        mi === nothing && return missing
-        return EACallInfo(Linfo[mi], true)
-    end
-end
-
-function analyze_const_call(sig::Signature, cinfo::ConstCallInfo)
-    linfos = Linfo[]
-    (; call, results) = cinfo
-    infos = isa(call, MethodMatchInfo) ? MethodMatchInfo[call] : call.matches
-    local nothrow = true # required to account for potential escape via MethodError
-    local j = 0
-    for i in 1:length(infos)
-        meth = infos[i].results
-        nothrow &= !meth.ambig
-        nmatch = Core.Compiler.length(meth)
-        if nmatch == 0 # No applicable methods
-            # mark this call may potentially throw, and the try next union split
-            nothrow = false
-            continue
-        end
-        for i = 1:nmatch
-            j += 1
-            result = results[j]
-            match = Core.Compiler.getindex(meth, i)
-            if result === nothing
-                mi = analyze_match(match, length(sig.argtypes))
-                mi === nothing && return missing
-                push!(linfos, mi)
-            elseif isa(result, ConcreteResult)
-                # TODO we may want to feedback information that this call always throws if !isdefined(result, :result)
-                push!(linfos, result.mi)
-            elseif isa(result, SemiConcreteResult)
-                push!(linfos, result.mi)
-            elseif isa(result, ConstPropResult)
-                push!(linfos, result.result)
-            end
-            nothrow &= match.fully_covers
-        end
-    end
-    return EACallInfo(linfos, nothrow)
-end
-
-function analyze_call(sig::Signature, infos::Vector{MethodMatchInfo})
-    linfos = Linfo[]
-    local nothrow = true # required to account for potential escape via MethodError
-    for i in 1:length(infos)
-        meth = infos[i].results
-        nothrow &= !meth.ambig
-        nmatch = Core.Compiler.length(meth)
-        if nmatch == 0 # No applicable methods
-            # mark this call may potentially throw, and the try next union split
-            nothrow = false
-            continue
-        end
-        for i = 1:nmatch
-            match = Core.Compiler.getindex(meth, i)
-            mi = analyze_match(match, length(sig.argtypes))
-            mi === nothing && return missing
-            push!(linfos, mi)
-            nothrow &= match.fully_covers
-        end
-    end
-    return EACallInfo(linfos, nothrow)
-end
-
-function analyze_match(match::MethodMatch, npassedargs::Int)
-    method = match.method
-    na = Int(method.nargs)
-    if na != npassedargs && !(na > 0 && method.isva)
-        # we have a method match only because an earlier
-        # inference step shortened our call args list, even
-        # though we have too many arguments to actually
-        # call this function
-        return nothing
-    end
-
-    # Bail out if any static parameters are left as TypeVar
-    # COMBAK is this needed for escape analysis?
-    validate_sparams(match.sparams) || return nothing
-
-    # See if there exists a specialization for this method signature
-    mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
-    return mi
-end
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
deleted file mode 100644
index 1946a76714e57..0000000000000
--- a/base/compiler/ssair/driver.jl
+++ /dev/null
@@ -1,24 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-if false
-    import Base: Base, @show
-else
-    macro show(ex...)
-        blk = Expr(:block)
-        for s in ex
-            push!(blk.args, :(println(stdout, $(QuoteNode(s)), " = ",
-                                              begin local value = $(esc(s)) end)))
-        end
-        isempty(ex) || push!(blk.args, :value)
-        blk
-    end
-end
-
-include("compiler/ssair/heap.jl")
-include("compiler/ssair/slot2ssa.jl")
-include("compiler/ssair/inlining.jl")
-include("compiler/ssair/verify.jl")
-include("compiler/ssair/legacy.jl")
-include("compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl")
-include("compiler/ssair/passes.jl")
-include("compiler/ssair/irinterp.jl")
diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl
deleted file mode 100644
index f4c04ea4e1380..0000000000000
--- a/base/compiler/ssair/irinterp.jl
+++ /dev/null
@@ -1,388 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-function collect_limitations!(@nospecialize(typ), ::IRInterpretationState)
-    @assert !isa(typ, LimitedAccuracy) "irinterp is unable to handle heavy recursion"
-    return typ
-end
-
-function concrete_eval_invoke(interp::AbstractInterpreter,
-    inst::Expr, mi::MethodInstance, irsv::IRInterpretationState)
-    world = frame_world(irsv)
-    mi_cache = WorldView(code_cache(interp), world)
-    code = get(mi_cache, mi, nothing)
-    code === nothing && return Pair{Any,Bool}(nothing, false)
-    argtypes = collect_argtypes(interp, inst.args[2:end], nothing, irsv)
-    argtypes === nothing && return Pair{Any,Bool}(Bottom, false)
-    effects = decode_effects(code.ipo_purity_bits)
-    if is_foldable(effects) && is_all_const_arg(argtypes, #=start=#1)
-        args = collect_const_args(argtypes, #=start=#1)
-        value = let world = get_world_counter(interp)
-            try
-                Core._call_in_world_total(world, args...)
-            catch
-                return Pair{Any,Bool}(Bottom, false)
-            end
-        end
-        return Pair{Any,Bool}(Const(value), true)
-    else
-        if is_constprop_edge_recursed(mi, irsv)
-            return Pair{Any,Bool}(nothing, is_nothrow(effects))
-        end
-        newirsv = IRInterpretationState(interp, code, mi, argtypes, world)
-        if newirsv !== nothing
-            newirsv.parent = irsv
-            return ir_abstract_constant_propagation(interp, newirsv)
-        end
-        return Pair{Any,Bool}(nothing, is_nothrow(effects))
-    end
-end
-
-abstract_eval_ssavalue(s::SSAValue, sv::IRInterpretationState) = abstract_eval_ssavalue(s, sv.ir)
-
-function abstract_eval_phi_stmt(interp::AbstractInterpreter, phi::PhiNode, ::Int, irsv::IRInterpretationState)
-    return abstract_eval_phi(interp, phi, nothing, irsv)
-end
-
-function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState)
-    si = StmtInfo(true) # TODO better job here?
-    (; rt, effects, info) = abstract_call(interp, arginfo, si, irsv)
-    irsv.ir.stmts[irsv.curridx][:info] = info
-    return RTEffects(rt, effects)
-end
-
-function update_phi!(irsv::IRInterpretationState, from::Int, to::Int)
-    ir = irsv.ir
-    if length(ir.cfg.blocks[to].preds) == 0
-        # Kill the entire block
-        for bidx = ir.cfg.blocks[to].stmts
-            ir.stmts[bidx][:inst] = nothing
-            ir.stmts[bidx][:type] = Bottom
-            ir.stmts[bidx][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-        end
-        return
-    end
-    for sidx = ir.cfg.blocks[to].stmts
-        sinst = ir.stmts[sidx][:inst]
-        isa(sinst, Nothing) && continue # allowed between `PhiNode`s
-        isa(sinst, PhiNode) || break
-        for (eidx, edge) in enumerate(sinst.edges)
-            if edge == from
-                deleteat!(sinst.edges, eidx)
-                deleteat!(sinst.values, eidx)
-                push!(irsv.ssa_refined, sidx)
-                break
-            end
-        end
-    end
-end
-update_phi!(irsv::IRInterpretationState) = (from::Int, to::Int)->update_phi!(irsv, from, to)
-
-function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb::Int=block_for_inst(irsv.ir, term_idx))
-    ir = irsv.ir
-    inst = ir[SSAValue(term_idx)][:inst]
-    if isa(inst, GotoIfNot)
-        kill_edge!(ir, bb, inst.dest, update_phi!(irsv))
-        kill_edge!(ir, bb, bb+1, update_phi!(irsv))
-    elseif isa(inst, GotoNode)
-        kill_edge!(ir, bb, inst.label, update_phi!(irsv))
-    elseif isa(inst, ReturnNode)
-        # Nothing to do
-    else
-        @assert !isexpr(inst, :enter)
-        kill_edge!(ir, bb, bb+1, update_phi!(irsv))
-    end
-end
-
-function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union{Int,Nothing},
-    @nospecialize(inst), @nospecialize(typ), irsv::IRInterpretationState)
-    ir = irsv.ir
-    if isa(inst, GotoIfNot)
-        cond = inst.cond
-        condval = maybe_extract_const_bool(argextype(cond, ir))
-        if condval isa Bool
-            if isa(cond, SSAValue)
-                kill_def_use!(irsv.tpdum, cond, idx)
-            end
-            if bb === nothing
-                bb = block_for_inst(ir, idx)
-            end
-            ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
-            if condval
-                ir.stmts[idx][:inst] = nothing
-                ir.stmts[idx][:type] = Any
-                kill_edge!(ir, bb, inst.dest, update_phi!(irsv))
-            else
-                ir.stmts[idx][:inst] = GotoNode(inst.dest)
-                kill_edge!(ir, bb, bb+1, update_phi!(irsv))
-            end
-            return true
-        end
-        return false
-    end
-    rt = nothing
-    if isa(inst, Expr)
-        head = inst.head
-        if head === :call || head === :foreigncall || head === :new || head === :splatnew || head === :static_parameter || head === :isdefined
-            (; rt, effects) = abstract_eval_statement_expr(interp, inst, nothing, irsv)
-            ir.stmts[idx][:flag] |= flags_for_effects(effects)
-        elseif head === :invoke
-            rt, nothrow = concrete_eval_invoke(interp, inst, inst.args[1]::MethodInstance, irsv)
-            if nothrow
-                ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
-            end
-        elseif head === :throw_undef_if_not
-            condval = maybe_extract_const_bool(argextype(inst.args[2], ir))
-            condval isa Bool || return false
-            if condval
-                ir.stmts[idx][:inst] = nothing
-                # We simplified the IR, but we did not update the type
-                return false
-            end
-            rt = Union{}
-        elseif head === :gc_preserve_begin ||
-               head === :gc_preserve_end
-            return false
-        else
-            error("reprocess_instruction!: unhandled expression found")
-        end
-    elseif isa(inst, PhiNode)
-        rt = abstract_eval_phi_stmt(interp, inst, idx, irsv)
-    elseif isa(inst, ReturnNode)
-        # Handled at the very end
-        return false
-    elseif isa(inst, PiNode)
-        rt = tmeet(typeinf_lattice(interp), argextype(inst.val, ir), widenconst(inst.typ))
-    elseif inst === nothing
-        return false
-    elseif isa(inst, GlobalRef)
-        # GlobalRef is not refinable
-    else
-        rt = argextype(inst, irsv.ir)
-    end
-    if rt !== nothing
-        if isa(rt, Const)
-            ir.stmts[idx][:type] = rt
-            if is_inlineable_constant(rt.val) && (ir.stmts[idx][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-                ir.stmts[idx][:inst] = quoted(rt.val)
-            end
-            return true
-        elseif !⊑(typeinf_lattice(interp), typ, rt)
-            ir.stmts[idx][:type] = rt
-            return true
-        end
-    end
-    return false
-end
-
-# Process the terminator and add the successor to `bb_ip`. Returns whether a backedge was seen.
-function process_terminator!(ir::IRCode, @nospecialize(inst), idx::Int, bb::Int,
-    all_rets::Vector{Int}, bb_ip::BitSetBoundedMinPrioritySet)
-    if isa(inst, ReturnNode)
-        if isdefined(inst, :val)
-            push!(all_rets, idx)
-        end
-        return false
-    elseif isa(inst, GotoNode)
-        backedge = inst.label <= bb
-        backedge || push!(bb_ip, inst.label)
-        return backedge
-    elseif isa(inst, GotoIfNot)
-        backedge = inst.dest <= bb
-        backedge || push!(bb_ip, inst.dest)
-        push!(bb_ip, bb+1)
-        return backedge
-    elseif isexpr(inst, :enter)
-        dest = inst.args[1]::Int
-        @assert dest > bb
-        push!(bb_ip, dest)
-        push!(bb_ip, bb+1)
-        return false
-    else
-        push!(bb_ip, bb+1)
-        return false
-    end
-end
-
-function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
-        externally_refined::Union{Nothing,BitSet} = nothing)
-    interp = switch_to_irinterp(interp)
-
-    (; ir, tpdum, ssa_refined) = irsv
-
-    bbs = ir.cfg.blocks
-    bb_ip = BitSetBoundedMinPrioritySet(length(bbs))
-    push!(bb_ip, 1)
-    all_rets = Int[]
-
-    # Fast path: Scan both use counts and refinement in one single pass of
-    #            of the instructions. In the absence of backedges, this will
-    #            converge.
-    while !isempty(bb_ip)
-        bb = popfirst!(bb_ip)
-        stmts = bbs[bb].stmts
-        lstmt = last(stmts)
-        for idx = stmts
-            irsv.curridx = idx
-            inst = ir.stmts[idx][:inst]
-            typ = ir.stmts[idx][:type]
-            flag = ir.stmts[idx][:flag]
-            any_refined = false
-            if (flag & IR_FLAG_REFINED) != 0
-                any_refined = true
-                ir.stmts[idx][:flag] &= ~IR_FLAG_REFINED
-            end
-            for ur in userefs(inst)
-                val = ur[]
-                if isa(val, Argument)
-                    any_refined |= irsv.argtypes_refined[val.n]
-                elseif isa(val, SSAValue)
-                    any_refined |= val.id in ssa_refined
-                    count!(tpdum, val)
-                end
-            end
-            if isa(inst, PhiNode) && idx in ssa_refined
-                any_refined = true
-                delete!(ssa_refined, idx)
-            end
-            is_terminator_or_phi = isa(inst, PhiNode) || isa(inst, GotoNode) || isa(inst, GotoIfNot) || isa(inst, ReturnNode) || isexpr(inst, :enter)
-            if typ === Bottom && (idx != lstmt || !is_terminator_or_phi)
-                continue
-            end
-            if (any_refined && reprocess_instruction!(interp,
-                    idx, bb, inst, typ, irsv)) ||
-               (externally_refined !== nothing && idx in externally_refined)
-                push!(ssa_refined, idx)
-                inst = ir.stmts[idx][:inst]
-                typ = ir.stmts[idx][:type]
-            end
-            if typ === Bottom && !is_terminator_or_phi
-                kill_terminator_edges!(irsv, lstmt, bb)
-                if idx != lstmt
-                    for idx2 in (idx+1:lstmt-1)
-                        ir[SSAValue(idx2)] = nothing
-                    end
-                    ir[SSAValue(lstmt)][:inst] = ReturnNode()
-                end
-                break
-            end
-            if idx == lstmt
-                process_terminator!(ir, inst, idx, bb, all_rets, bb_ip) && @goto residual_scan
-            end
-        end
-    end
-    @goto compute_rt
-
-    # Slow path
-    begin @label residual_scan
-        stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
-
-        # Slow Path Phase 1.A: Complete use scanning
-        while !isempty(bb_ip)
-            bb = popfirst!(bb_ip)
-            stmts = bbs[bb].stmts
-            lstmt = last(stmts)
-            for idx = stmts
-                irsv.curridx = idx
-                inst = ir.stmts[idx][:inst]
-                flag = ir.stmts[idx][:flag]
-                if (flag & IR_FLAG_REFINED) != 0
-                    ir.stmts[idx][:flag] &= ~IR_FLAG_REFINED
-                    push!(stmt_ip, idx)
-                end
-                for ur in userefs(inst)
-                    val = ur[]
-                    if isa(val, Argument)
-                        if irsv.argtypes_refined[val.n]
-                            push!(stmt_ip, idx)
-                        end
-                    elseif isa(val, SSAValue)
-                        count!(tpdum, val)
-                    end
-                end
-                idx == lstmt && process_terminator!(ir, inst, idx, bb, all_rets, bb_ip)
-            end
-        end
-
-        # Slow Path Phase 1.B: Assemble def-use map
-        complete!(tpdum)
-        push!(bb_ip, 1)
-        while !isempty(bb_ip)
-            bb = popfirst!(bb_ip)
-            stmts = bbs[bb].stmts
-            lstmt = last(stmts)
-            for idx = stmts
-                irsv.curridx = idx
-                inst = ir.stmts[idx][:inst]
-                for ur in userefs(inst)
-                    val = ur[]
-                    if isa(val, SSAValue)
-                        push!(tpdum[val.id], idx)
-                    end
-                end
-                idx == lstmt && process_terminator!(ir, inst, idx, bb, all_rets, bb_ip)
-            end
-        end
-
-        # Slow Path Phase 2: Use def-use map to converge cycles.
-        # TODO: It would be possible to return to the fast path after converging
-        #       each cycle, but that's somewhat complicated.
-        for val in ssa_refined
-            append!(stmt_ip, tpdum[val])
-        end
-        while !isempty(stmt_ip)
-            idx = popfirst!(stmt_ip)
-            irsv.curridx = idx
-            inst = ir.stmts[idx][:inst]
-            typ = ir.stmts[idx][:type]
-            if reprocess_instruction!(interp,
-                idx, nothing, inst, typ, irsv)
-                append!(stmt_ip, tpdum[idx])
-            end
-        end
-    end
-
-    begin @label compute_rt
-        ultimate_rt = Bottom
-        for idx in all_rets
-            bb = block_for_inst(ir.cfg, idx)
-            if bb != 1 && length(ir.cfg.blocks[bb].preds) == 0
-                # Could have discovered this block is dead after the initial scan
-                continue
-            end
-            inst = ir.stmts[idx][:inst]::ReturnNode
-            rt = argextype(inst.val, ir)
-            ultimate_rt = tmerge(typeinf_lattice(interp), ultimate_rt, rt)
-        end
-    end
-
-    nothrow = true
-    for idx = 1:length(ir.stmts)
-        if (ir.stmts[idx][:flag] & IR_FLAG_NOTHROW) == 0
-            nothrow = false
-            break
-        end
-    end
-
-    if last(irsv.valid_worlds) >= get_world_counter()
-        # if we aren't cached, we don't need this edge
-        # but our caller might, so let's just make it anyways
-        store_backedges(frame_instance(irsv), irsv.edges)
-    end
-
-    return Pair{Any,Bool}(maybe_singleton_const(ultimate_rt), nothrow)
-end
-
-function ir_abstract_constant_propagation(interp::NativeInterpreter, irsv::IRInterpretationState)
-    if __measure_typeinf__[]
-        inf_frame = Timings.InferenceFrameInfo(irsv.mi, irsv.world, VarState[], Any[], length(irsv.ir.argtypes))
-        Timings.enter_new_timer(inf_frame)
-        ret = _ir_abstract_constant_propagation(interp, irsv)
-        append!(inf_frame.slottypes, irsv.ir.argtypes)
-        Timings.exit_current_timer(inf_frame)
-        return ret
-    else
-        return _ir_abstract_constant_propagation(interp, irsv)
-    end
-end
-ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState) =
-    _ir_abstract_constant_propagation(interp, irsv)
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
deleted file mode 100644
index 9f55d56181838..0000000000000
--- a/base/compiler/stmtinfo.jl
+++ /dev/null
@@ -1,225 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-@nospecialize
-
-"""
-    call::CallMeta
-
-A simple struct that captures both the return type (`call.rt`)
-and any additional information (`call.info`) for a given generic call.
-"""
-struct CallMeta
-    rt::Any
-    effects::Effects
-    info::CallInfo
-end
-
-struct NoCallInfo <: CallInfo end
-
-"""
-    info::MethodMatchInfo <: CallInfo
-
-Captures the result of a `:jl_matching_methods` lookup for the given call (`info.results`).
-This info may then be used by the optimizer to inline the matches, without having
-to re-consult the method table. This info is illegal on any statement that is
-not a call to a generic function.
-"""
-struct MethodMatchInfo <: CallInfo
-    results::MethodLookupResult
-end
-nsplit_impl(info::MethodMatchInfo) = 1
-getsplit_impl(info::MethodMatchInfo, idx::Int) = (@assert idx == 1; info.results)
-getresult_impl(::MethodMatchInfo, ::Int) = nothing
-
-"""
-    info::UnionSplitInfo <: CallInfo
-
-If inference decides to partition the method search space by splitting unions,
-it will issue a method lookup query for each such partition. This info indicates
-that such partitioning happened and wraps the corresponding `MethodMatchInfo` for
-each partition (`info.matches::Vector{MethodMatchInfo}`).
-This info is illegal on any statement that is not a call to a generic function.
-"""
-struct UnionSplitInfo <: CallInfo
-    matches::Vector{MethodMatchInfo}
-end
-
-nmatches(info::MethodMatchInfo) = length(info.results)
-function nmatches(info::UnionSplitInfo)
-    n = 0
-    for mminfo in info.matches
-        n += nmatches(mminfo)
-    end
-    return n
-end
-nsplit_impl(info::UnionSplitInfo) = length(info.matches)
-getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit_impl(info.matches[idx], 1)
-getresult_impl(::UnionSplitInfo, ::Int) = nothing
-
-abstract type ConstResult end
-
-struct ConstPropResult <: ConstResult
-    result::InferenceResult
-end
-
-struct ConcreteResult <: ConstResult
-    mi::MethodInstance
-    effects::Effects
-    result
-    ConcreteResult(mi::MethodInstance, effects::Effects) = new(mi, effects)
-    ConcreteResult(mi::MethodInstance, effects::Effects, @nospecialize val) = new(mi, effects, val)
-end
-
-struct SemiConcreteResult <: ConstResult
-    mi::MethodInstance
-    ir::IRCode
-    effects::Effects
-end
-
-"""
-    info::ConstCallInfo <: CallInfo
-
-The precision of this call was improved using constant information.
-In addition to the original call information `info.call`, this info also keeps the results
-of constant inference `info.results::Vector{Union{Nothing,ConstResult}}`.
-"""
-struct ConstCallInfo <: CallInfo
-    call::Union{MethodMatchInfo,UnionSplitInfo}
-    results::Vector{Union{Nothing,ConstResult}}
-end
-nsplit_impl(info::ConstCallInfo) = nsplit(info.call)
-getsplit_impl(info::ConstCallInfo, idx::Int) = getsplit(info.call, idx)
-getresult_impl(info::ConstCallInfo, idx::Int) = info.results[idx]
-
-"""
-    info::MethodResultPure <: CallInfo
-
-This struct represents a method result constant was proven to be
-effect-free, including being no-throw (typically because the value was computed
-by calling an `@pure` function).
-"""
-struct MethodResultPure <: CallInfo
-    info::CallInfo
-end
-let instance = MethodResultPure(NoCallInfo())
-    global MethodResultPure
-    MethodResultPure() = instance
-end
-
-"""
-    ainfo::AbstractIterationInfo
-
-Captures all the information for abstract iteration analysis of a single value.
-Each (abstract) call to `iterate`, corresponds to one entry in `ainfo.each::Vector{CallMeta}`.
-"""
-struct AbstractIterationInfo
-    each::Vector{CallMeta}
-    complete::Bool
-end
-
-const MaybeAbstractIterationInfo = Union{Nothing, AbstractIterationInfo}
-
-"""
-    info::ApplyCallInfo <: CallInfo
-
-This info applies to any call of `_apply_iterate(...)` and captures both the
-info of the actual call being applied and the info for any implicit call
-to the `iterate` function. Note that it is possible for the call itself
-to be yet another `_apply_iterate`, in which case the `info.call` field will
-be another `ApplyCallInfo`. This info is illegal on any statement that is
-not an `_apply_iterate` call.
-"""
-struct ApplyCallInfo <: CallInfo
-    # The info for the call itself
-    call::Any
-    # AbstractIterationInfo for each argument, if applicable
-    arginfo::Vector{MaybeAbstractIterationInfo}
-end
-
-"""
-    info::UnionSplitApplyCallInfo <: CallInfo
-
-Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than `MethodMatchInfo`.
-This info is illegal on any statement that is not an `_apply_iterate` call.
-"""
-struct UnionSplitApplyCallInfo <: CallInfo
-    infos::Vector{ApplyCallInfo}
-end
-
-"""
-    info::InvokeCallInfo
-
-Represents a resolved call to `Core.invoke`, carrying the `info.match::MethodMatch` of
-the method that has been processed.
-Optionally keeps `info.result::InferenceResult` that keeps constant information.
-"""
-struct InvokeCallInfo <: CallInfo
-    match::MethodMatch
-    result::Union{Nothing,ConstResult}
-end
-
-"""
-    info::OpaqueClosureCallInfo
-
-Represents a resolved call of opaque closure, carrying the `info.match::MethodMatch` of
-the method that has been processed.
-Optionally keeps `info.result::InferenceResult` that keeps constant information.
-"""
-struct OpaqueClosureCallInfo <: CallInfo
-    match::MethodMatch
-    result::Union{Nothing,ConstResult}
-end
-
-"""
-    info::OpaqueClosureCreateInfo <: CallInfo
-
-This info may be constructed upon opaque closure construction, with `info.unspec::CallMeta`
-carrying out inference result of an unreal, partially specialized call (i.e. specialized on
-the closure environment, but not on the argument types of the opaque closure) in order to
-allow the optimizer to rewrite the return type parameter of the `OpaqueClosure` based on it.
-"""
-struct OpaqueClosureCreateInfo <: CallInfo
-    unspec::CallMeta
-    function OpaqueClosureCreateInfo(unspec::CallMeta)
-        @assert isa(unspec.info, OpaqueClosureCallInfo)
-        return new(unspec)
-    end
-end
-
-# Stmt infos that are used by external consumers, but not by optimization.
-# These are not produced by default and must be explicitly opted into by
-# the AbstractInterpreter.
-
-"""
-    info::ReturnTypeCallInfo <: CallInfo
-
-Represents a resolved call of `Core.Compiler.return_type`.
-`info.call` wraps the info corresponding to the call that `Core.Compiler.return_type` call
-was supposed to analyze.
-"""
-struct ReturnTypeCallInfo <: CallInfo
-    info::CallInfo
-end
-
-"""
-    info::FinalizerInfo <: CallInfo
-
-Represents the information of a potential (later) call to the finalizer on the given
-object type.
-"""
-struct FinalizerInfo <: CallInfo
-    info::CallInfo   # the callinfo for the finalizer call
-    effects::Effects # the effects for the finalizer call
-end
-
-"""
-    info::ModifyFieldInfo <: CallInfo
-
-Represents a resolved all of `modifyfield!(obj, name, op, x, [order])`.
-`info.info` wraps the call information of `op(getfield(obj, name), x)`.
-"""
-struct ModifyFieldInfo <: CallInfo
-    info::CallInfo # the callinfo for the `op(getfield(obj, name), x)` call
-end
-
-@specialize
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
deleted file mode 100644
index 1198aa9fa6b35..0000000000000
--- a/base/compiler/typeinfer.jl
+++ /dev/null
@@ -1,1131 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Tracking of newly-inferred CodeInstances during precompilation
-const track_newly_inferred = RefValue{Bool}(false)
-const newly_inferred = CodeInstance[]
-
-# build (and start inferring) the inference frame for the top-level MethodInstance
-function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
-    frame = InferenceState(result, cache, interp)
-    frame === nothing && return false
-    cache === :global && lock_mi_inference(interp, result.linfo)
-    return typeinf(interp, frame)
-end
-
-"""
-The module `Core.Compiler.Timings` provides a simple implementation of nested timers that
-can be used to measure the exclusive time spent inferring each method instance that is
-recursively inferred during type inference.
-
-This is meant to be internal to the compiler, and makes some specific assumptions about
-being used for this purpose alone.
-"""
-module Timings
-
-using Core.Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
-    @inbounds, copy, backtrace
-
-# What we record for any given frame we infer during type inference.
-struct InferenceFrameInfo
-    mi::Core.MethodInstance
-    world::UInt64
-    sptypes::Vector{Core.Compiler.VarState}
-    slottypes::Vector{Any}
-    nargs::Int
-end
-
-function _typeinf_identifier(frame::Core.Compiler.InferenceState)
-    mi_info = InferenceFrameInfo(
-        frame.linfo,
-        frame.world,
-        copy(frame.sptypes),
-        copy(frame.slottypes),
-        length(frame.result.argtypes),
-    )
-    return mi_info
-end
-
-_typeinf_identifier(frame::InferenceFrameInfo) = frame
-
-"""
-    Core.Compiler.Timing(mi_info, start_time, ...)
-
-Internal type containing the timing result for running type inference on a single
-MethodInstance.
-"""
-struct Timing
-    mi_info::InferenceFrameInfo
-    start_time::UInt64
-    cur_start_time::UInt64
-    time::UInt64
-    children::Core.Array{Timing,1}
-    bt         # backtrace collected upon initial entry to typeinf
-end
-Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
-Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
-
-_time_ns() = ccall(:jl_hrtime, UInt64, ())  # Re-implemented here because Base not yet available.
-
-# We keep a stack of the Timings for each of the MethodInstances currently being timed.
-# Since type inference currently operates via a depth-first search (during abstract
-# evaluation), this vector operates like a call stack. The last node in _timings is the
-# node currently being inferred, and its parent is directly before it, etc.
-# Each Timing also contains its own vector for all of its children, so that the tree
-# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
-# because we create a new node for duplicates.)
-const _timings = Timing[]
-# ROOT() is an empty function used as the top-level Timing node to measure all time spent
-# *not* in type inference during a given recording trace. It is used as a "dummy" node.
-function ROOT() end
-const ROOTmi = Core.Compiler.specialize_method(
-    first(Core.Compiler.methods(ROOT)), Tuple{typeof(ROOT)}, Core.svec())
-"""
-    Core.Compiler.reset_timings()
-
-Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
-start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
-"""
-function reset_timings()
-    empty!(_timings)
-    push!(_timings, Timing(
-        # The MethodInstance for ROOT(), and default empty values for other fields.
-        InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
-        _time_ns()))
-    return nothing
-end
-reset_timings()
-
-# (This is split into a function so that it can be called both in this module, at the top
-# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
-# the operation and called `reset_timings()`.)
-# NOTE: the @inline annotations here are not to make it faster, but to reduce the gap between
-# timer manipulations and the tasks we're timing.
-@inline function close_current_timer()
-    stop_time = _time_ns()
-    parent_timer = _timings[end]
-    accum_time = stop_time - parent_timer.cur_start_time
-
-    # Add in accum_time ("modify" the immutable struct)
-    @inbounds begin
-        _timings[end] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            parent_timer.cur_start_time,
-            parent_timer.time + accum_time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-    return nothing
-end
-
-@inline function enter_new_timer(frame)
-    # Very first thing, stop the active timer: get the current time and add in the
-    # time since it was last started to its aggregate exclusive time.
-    close_current_timer()
-
-    mi_info = _typeinf_identifier(frame)
-
-    # Start the new timer right before returning
-    push!(_timings, Timing(mi_info, UInt64(0)))
-    len = length(_timings)
-    new_timer = @inbounds _timings[len]
-    # Set the current time _after_ appending the node, to try to exclude the
-    # overhead from measurement.
-    start = _time_ns()
-
-    @inbounds begin
-        _timings[len] = Timing(
-            new_timer.mi_info,
-            start,
-            start,
-            new_timer.time,
-            new_timer.children,
-        )
-    end
-
-    return nothing
-end
-
-# _expected_frame_ is not needed within this function; it is used in the `@assert`, to
-# assert that indeed we are always returning to a parent after finishing all of its
-# children (that is, asserting that inference proceeds via depth-first-search).
-@inline function exit_current_timer(_expected_frame_)
-    # Finish the new timer
-    stop_time = _time_ns()
-
-    expected_mi_info = _typeinf_identifier(_expected_frame_)
-
-    # Grab the new timer again because it might have been modified in _timings
-    # (since it's an immutable struct)
-    # And remove it from the current timings stack
-    new_timer = pop!(_timings)
-    Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi
-
-    # Prepare to unwind one level of the stack and record in the parent
-    parent_timer = _timings[end]
-
-    accum_time = stop_time - new_timer.cur_start_time
-    # Add in accum_time ("modify" the immutable struct)
-    new_timer = Timing(
-        new_timer.mi_info,
-        new_timer.start_time,
-        new_timer.cur_start_time,
-        new_timer.time + accum_time,
-        new_timer.children,
-        parent_timer.mi_info.mi === ROOTmi ? backtrace() : nothing,
-    )
-    # Record the final timing with the original parent timer
-    push!(parent_timer.children, new_timer)
-
-    # And finally restart the parent timer:
-    len = length(_timings)
-    @inbounds begin
-        _timings[len] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            _time_ns(),
-            parent_timer.time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-
-    return nothing
-end
-
-end  # module Timings
-
-"""
-    Core.Compiler.__set_measure_typeinf(onoff::Bool)
-
-If set to `true`, record per-method-instance timings within type inference in the Compiler.
-"""
-__set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
-const __measure_typeinf__ = fill(false)
-
-# Wrapper around `_typeinf` that optionally records the exclusive time for
-# each inference performed by `NativeInterpreter`.
-function typeinf(interp::NativeInterpreter, frame::InferenceState)
-    if __measure_typeinf__[]
-        Timings.enter_new_timer(frame)
-        v = _typeinf(interp, frame)
-        Timings.exit_current_timer(frame)
-        return v
-    else
-        return _typeinf(interp, frame)
-    end
-end
-typeinf(interp::AbstractInterpreter, frame::InferenceState) = _typeinf(interp, frame)
-
-function finish!(interp::AbstractInterpreter, caller::InferenceResult)
-    # If we didn't transform the src for caching, we may have to transform
-    # it anyway for users like typeinf_ext. Do that here.
-    opt = caller.src
-    if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true`
-        if opt.ir !== nothing
-            if caller.must_be_codeinf
-                caller.src = ir_to_codeinf!(opt)
-            elseif is_inlineable(opt.src)
-                # TODO: If the CFG is too big, inlining becomes more expensive and if we're going to
-                # use this IR over and over, it's worth simplifying it. Round trips through
-                # CodeInstance do this implicitly, since they recompute the CFG, so try to
-                # match that behavior here.
-                # ir = cfg_simplify!(opt.ir)
-                caller.src = opt.ir
-            else
-                # Not cached and not inlineable - drop the ir
-                caller.src = nothing
-            end
-        end
-    end
-    return caller.src
-end
-
-function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
-    interp = switch_from_irinterp(interp)
-    typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
-    # with no active ip's, frame is done
-    frames = frame.callers_in_cycle
-    isempty(frames) && push!(frames, frame)
-    valid_worlds = WorldRange()
-    for caller in frames
-        @assert !(caller.dont_work_on_me)
-        caller.dont_work_on_me = true
-        # might might not fully intersect these earlier, so do that now
-        valid_worlds = intersect(caller.valid_worlds, valid_worlds)
-    end
-    for caller in frames
-        caller.valid_worlds = valid_worlds
-        finish(caller, caller.interp)
-    end
-    for caller in frames
-        opt = caller.result.src
-        if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
-            optimize(caller.interp, opt, caller.result)
-        end
-    end
-    for caller in frames
-        (; result ) = caller
-        valid_worlds = result.valid_worlds
-        if last(valid_worlds) >= get_world_counter()
-            # if we aren't cached, we don't need this edge
-            # but our caller might, so let's just make it anyways
-            store_backedges(result, caller.stmt_edges[1])
-        end
-        if caller.cached
-            cache_result!(caller.interp, result)
-        end
-        finish!(caller.interp, result)
-    end
-    empty!(frames)
-    return true
-end
-
-function CodeInstance(interp::AbstractInterpreter, result::InferenceResult,
-                      @nospecialize(inferred_result), valid_worlds::WorldRange)
-    local const_flags::Int32
-    result_type = result.result
-    @assert !(result_type === nothing || result_type isa LimitedAccuracy)
-
-    if isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
-        # use constant calling convention
-        rettype_const = result_type.val
-        const_flags = 0x3
-        if may_discard_trees(interp)
-            inferred_result = nothing
-        end
-    else
-        if isa(result_type, Const)
-            rettype_const = result_type.val
-            const_flags = 0x2
-        elseif isa(result_type, PartialOpaque)
-            rettype_const = result_type
-            const_flags = 0x2
-        elseif isconstType(result_type)
-            rettype_const = result_type.parameters[1]
-            const_flags = 0x2
-        elseif isa(result_type, PartialStruct)
-            rettype_const = result_type.fields
-            const_flags = 0x2
-        elseif isa(result_type, InterConditional)
-            rettype_const = result_type
-            const_flags = 0x2
-        elseif isa(result_type, InterMustAlias)
-            rettype_const = result_type
-            const_flags = 0x2
-        else
-            rettype_const = nothing
-            const_flags = 0x00
-        end
-    end
-    relocatability = 0x0
-    if isa(inferred_result, String)
-        t = @_gc_preserve_begin inferred_result
-        relocatability = unsafe_load(unsafe_convert(Ptr{UInt8}, inferred_result), Core.sizeof(inferred_result))
-        @_gc_preserve_end t
-    elseif inferred_result === nothing
-        relocatability = 0x1
-    end
-    # relocatability = isa(inferred_result, String) ? inferred_result[end] : UInt8(0)
-    return CodeInstance(result.linfo,
-        widenconst(result_type), rettype_const, inferred_result,
-        const_flags, first(valid_worlds), last(valid_worlds),
-        # TODO: Actually do something with non-IPO effects
-        encode_effects(result.ipo_effects), encode_effects(result.ipo_effects), result.argescapes,
-        relocatability)
-end
-
-function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInstance, ci::CodeInfo)
-    def = linfo.def
-    toplevel = !isa(def, Method)
-    if toplevel
-        return ci
-    end
-    if may_discard_trees(interp)
-        cache_the_tree = ci.inferred && (is_inlineable(ci) || isa_compileable_sig(linfo.specTypes, linfo.sparam_vals, def))
-    else
-        cache_the_tree = true
-    end
-    if cache_the_tree
-        if may_compress(interp)
-            nslots = length(ci.slotflags)
-            resize!(ci.slottypes::Vector{Any}, nslots)
-            resize!(ci.slotnames, nslots)
-            return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
-        else
-            return ci
-        end
-    else
-        return nothing
-    end
-end
-
-function transform_result_for_cache(interp::AbstractInterpreter,
-    linfo::MethodInstance, valid_worlds::WorldRange, result::InferenceResult)
-    inferred_result = result.src
-    if inferred_result isa OptimizationState{typeof(interp)}
-        # TODO respect must_be_codeinf setting here?
-        result.src = inferred_result = ir_to_codeinf!(inferred_result)
-    end
-    if inferred_result isa CodeInfo
-        inferred_result.min_world = first(valid_worlds)
-        inferred_result.max_world = last(valid_worlds)
-        inferred_result = maybe_compress_codeinfo(interp, linfo, inferred_result)
-    end
-    # The global cache can only handle objects that codegen understands
-    if !isa(inferred_result, MaybeCompressed)
-        inferred_result = nothing
-    end
-    return inferred_result
-end
-
-function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
-    valid_worlds = result.valid_worlds
-    if last(valid_worlds) == get_world_counter()
-        # if we've successfully recorded all of the backedges in the global reverse-cache,
-        # we can now widen our applicability in the global cache too
-        valid_worlds = WorldRange(first(valid_worlds), typemax(UInt))
-    end
-    # check if the existing linfo metadata is also sufficient to describe the current inference result
-    # to decide if it is worth caching this
-    linfo = result.linfo
-    already_inferred = already_inferred_quick_test(interp, linfo)
-    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), linfo)
-        already_inferred = true
-    end
-
-    # TODO: also don't store inferred code if we've previously decided to interpret this function
-    if !already_inferred
-        inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result)
-        code_cache(interp)[linfo] = ci = CodeInstance(interp, result, inferred_result, valid_worlds)
-        if track_newly_inferred[]
-            m = linfo.def
-            if isa(m, Method) && m.module != Core
-                ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
-            end
-        end
-    end
-    unlock_mi_inference(interp, linfo)
-    nothing
-end
-
-function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
-    if typ isa LimitedAccuracy
-        if sv.parent === nothing
-            # when part of a cycle, we might have unintentionally introduced a limit marker
-            @assert !isempty(sv.callers_in_cycle)
-            return typ.typ
-        end
-        causes = copy(typ.causes)
-        delete!(causes, sv)
-        for caller in sv.callers_in_cycle
-            delete!(causes, caller)
-        end
-        if isempty(causes)
-            return typ.typ
-        end
-        if length(causes) != length(typ.causes)
-            return LimitedAccuracy(typ.typ, causes)
-        end
-    end
-    return typ
-end
-
-function adjust_effects(sv::InferenceState)
-    ipo_effects = sv.ipo_effects
-
-    # refine :consistent-cy effect using the return type information
-    # TODO this adjustment tries to compromise imprecise :consistent-cy information,
-    # that is currently modeled in a flow-insensitive way: ideally we want to model it
-    # with a proper dataflow analysis instead
-    rt = sv.bestguess
-    if ipo_effects.noinbounds && rt === Bottom
-        # always throwing an error counts or never returning both count as consistent
-        ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
-    end
-    if is_inaccessiblemem_or_argmemonly(ipo_effects) && all(1:narguments(sv, #=include_va=#true)) do i::Int
-            return is_mutation_free_argtype(sv.slottypes[i])
-        end
-        ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
-    end
-    if is_consistent_if_notreturned(ipo_effects) && is_identity_free_argtype(rt)
-        # in a case when the :consistent-cy here is only tainted by mutable allocations
-        # (indicated by `CONSISTENT_IF_NOTRETURNED`), we may be able to refine it if the return
-        # type guarantees that the allocations are never returned
-        consistent = ipo_effects.consistent & ~CONSISTENT_IF_NOTRETURNED
-        ipo_effects = Effects(ipo_effects; consistent)
-    end
-    if is_consistent_if_inaccessiblememonly(ipo_effects)
-        if is_inaccessiblememonly(ipo_effects)
-            consistent = ipo_effects.consistent & ~CONSISTENT_IF_INACCESSIBLEMEMONLY
-            ipo_effects = Effects(ipo_effects; consistent)
-        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
-        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
-            ipo_effects = Effects(ipo_effects; consistent=ALWAYS_FALSE)
-        end
-    end
-    if is_effect_free_if_inaccessiblememonly(ipo_effects)
-        if is_inaccessiblememonly(ipo_effects)
-            effect_free = ipo_effects.effect_free & ~EFFECT_FREE_IF_INACCESSIBLEMEMONLY
-            ipo_effects = Effects(ipo_effects; effect_free)
-        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
-        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
-            ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_FALSE)
-        end
-    end
-
-    # override the analyzed effects using manually annotated effect settings
-    def = sv.linfo.def
-    if isa(def, Method)
-        override = decode_effects_override(def.purity)
-        if is_effect_overridden(override, :consistent)
-            ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
-        end
-        if is_effect_overridden(override, :effect_free)
-            ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_TRUE)
-        end
-        if is_effect_overridden(override, :nothrow)
-            ipo_effects = Effects(ipo_effects; nothrow=true)
-        end
-        if is_effect_overridden(override, :terminates_globally)
-            ipo_effects = Effects(ipo_effects; terminates=true)
-        end
-        if is_effect_overridden(override, :notaskstate)
-            ipo_effects = Effects(ipo_effects; notaskstate=true)
-        end
-        if is_effect_overridden(override, :inaccessiblememonly)
-            ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
-        end
-    end
-
-    return ipo_effects
-end
-
-# inference completed on `me`
-# update the MethodInstance
-function finish(me::InferenceState, interp::AbstractInterpreter)
-    # prepare to run optimization passes on fulltree
-    s_edges = me.stmt_edges[1]
-    if s_edges === nothing
-        s_edges = me.stmt_edges[1] = []
-    end
-    for edges in me.stmt_edges
-        edges === nothing && continue
-        edges === s_edges && continue
-        append!(s_edges, edges)
-        empty!(edges)
-    end
-    if me.src.edges !== nothing
-        append!(s_edges, me.src.edges::Vector)
-        me.src.edges = nothing
-    end
-    # inspect whether our inference had a limited result accuracy,
-    # else it may be suitable to cache
-    bestguess = me.bestguess = cycle_fix_limited(me.bestguess, me)
-    limited_ret = bestguess isa LimitedAccuracy
-    limited_src = false
-    if !limited_ret
-        gt = me.ssavaluetypes
-        for j = 1:length(gt)
-            gt[j] = gtj = cycle_fix_limited(gt[j], me)
-            if gtj isa LimitedAccuracy && me.parent !== nothing
-                limited_src = true
-                break
-            end
-        end
-    end
-    if limited_ret
-        # a parent may be cached still, but not this intermediate work:
-        # we can throw everything else away now
-        me.result.src = nothing
-        me.cached = false
-        set_inlineable!(me.src, false)
-        unlock_mi_inference(interp, me.linfo)
-    elseif limited_src
-        # a type result will be cached still, but not this intermediate work:
-        # we can throw everything else away now
-        me.result.src = nothing
-        set_inlineable!(me.src, false)
-    else
-        # annotate fulltree with type information,
-        # either because we are the outermost code, or we might use this later
-        doopt = (me.cached || me.parent !== nothing)
-        recompute_cfg = type_annotate!(interp, me, doopt)
-        if doopt && may_optimize(interp)
-            me.result.src = OptimizationState(me, interp, recompute_cfg)
-        else
-            me.result.src = me.src::CodeInfo # stash a convenience copy of the code (e.g. for reflection)
-        end
-    end
-    me.result.valid_worlds = me.valid_worlds
-    me.result.result = bestguess
-    me.ipo_effects = me.result.ipo_effects = adjust_effects(me)
-    validate_code_in_debug_mode(me.linfo, me.src, "inferred")
-    nothing
-end
-
-# record the backedges
-function store_backedges(caller::InferenceResult, edges::Vector{Any})
-    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
-    return store_backedges(caller.linfo, edges)
-end
-
-function store_backedges(caller::MethodInstance, edges::Vector{Any})
-    for itr in BackedgeIterator(edges)
-        callee = itr.caller
-        if isa(callee, MethodInstance)
-            ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
-        else
-            typeassert(callee, MethodTable)
-            ccall(:jl_method_table_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
-        end
-    end
-    return nothing
-end
-
-function record_slot_assign!(sv::InferenceState)
-    # look at all assignments to slots
-    # and union the set of types stored there
-    # to compute a lower bound on the storage required
-    body = sv.src.code::Vector{Any}
-    slottypes = sv.slottypes::Vector{Any}
-    ssavaluetypes = sv.ssavaluetypes
-    for i = 1:length(body)
-        expr = body[i]
-        # find all reachable assignments to locals
-        if was_reached(sv, i) && isexpr(expr, :(=))
-            lhs = expr.args[1]
-            if isa(lhs, SlotNumber)
-                typ = ssavaluetypes[i]
-                @assert typ !== NOT_FOUND "active slot in unreached region"
-                vt = widenconst(typ)
-                if vt !== Bottom
-                    id = slot_id(lhs)
-                    otherTy = slottypes[id]
-                    if otherTy === Bottom
-                        slottypes[id] = vt
-                    elseif otherTy === Any
-                        slottypes[id] = Any
-                    else
-                        slottypes[id] = tmerge(otherTy, vt)
-                    end
-                end
-            end
-        end
-    end
-    sv.src.slottypes = slottypes
-    return nothing
-end
-
-function record_bestguess!(sv::InferenceState)
-    bestguess = sv.bestguess
-    @assert !(bestguess isa LimitedAccuracy)
-    sv.src.rettype = bestguess
-    return nothing
-end
-
-function annotate_slot_load!(interp::AbstractInterpreter, undefs::Vector{Bool}, idx::Int, sv::InferenceState, @nospecialize x)
-    if isa(x, SlotNumber)
-        id = slot_id(x)
-        pc = find_dominating_assignment(id, idx, sv)
-        if pc === nothing
-            block = block_for_inst(sv.cfg, idx)
-            state = sv.bb_vartables[block]::VarTable
-            vt = state[id]
-            undefs[id] |= vt.undef
-            typ = widenslotwrapper(ignorelimited(vt.typ))
-        else
-            typ = sv.ssavaluetypes[pc]
-            @assert typ !== NOT_FOUND "active slot in unreached region"
-        end
-        # add type annotations where needed
-        if !⊑(typeinf_lattice(interp), sv.slottypes[id], typ)
-            return TypedSlot(id, typ)
-        end
-        return x
-    elseif isa(x, Expr)
-        head = x.head
-        i0 = 1
-        if is_meta_expr_head(head) || head === :const
-            return x
-        end
-        if head === :(=) || head === :method
-            i0 = 2
-        end
-        for i = i0:length(x.args)
-            x.args[i] = annotate_slot_load!(interp, undefs, idx, sv, x.args[i])
-        end
-        return x
-    elseif isa(x, ReturnNode) && isdefined(x, :val)
-        return ReturnNode(annotate_slot_load!(interp, undefs, idx, sv, x.val))
-    elseif isa(x, GotoIfNot)
-        return GotoIfNot(annotate_slot_load!(interp, undefs, idx, sv, x.cond), x.dest)
-    end
-    return x
-end
-
-# find the dominating assignment to the slot `id` in the block containing statement `idx`,
-# returns `nothing` otherwise
-function find_dominating_assignment(id::Int, idx::Int, sv::InferenceState)
-    block = block_for_inst(sv.cfg, idx)
-    for pc in reverse(sv.cfg.blocks[block].stmts) # N.B. reverse since the last assignment is dominating this block
-        pc < idx || continue # N.B. needs pc ≠ idx as `id` can be assigned at `idx`
-        stmt = sv.src.code[pc]
-        isexpr(stmt, :(=)) || continue
-        lhs = stmt.args[1]
-        isa(lhs, SlotNumber) || continue
-        slot_id(lhs) == id || continue
-        return pc
-    end
-    return nothing
-end
-
-# annotate types of all symbols in AST, preparing for optimization
-function type_annotate!(interp::AbstractInterpreter, sv::InferenceState, run_optimizer::Bool)
-    # widen `Conditional`s from `slottypes`
-    slottypes = sv.slottypes
-    for i = 1:length(slottypes)
-        slottypes[i] = widenconditional(slottypes[i])
-    end
-
-    # compute the required type for each slot
-    # to hold all of the items assigned into it
-    record_slot_assign!(sv)
-
-    record_bestguess!(sv)
-
-    # annotate variables load types
-    # remove dead code optimization
-    # and compute which variables may be used undef
-    src = sv.src
-    stmts = src.code
-    nstmt = length(stmts)
-    ssavaluetypes = sv.ssavaluetypes
-    slotflags = src.slotflags
-    nslots = length(slotflags)
-    undefs = fill(false, nslots)
-    any_unreachable = false
-
-    # this statement traversal does five things:
-    # 1. introduce temporary `TypedSlot`s that are supposed to be replaced with π-nodes later
-    # 2. mark used-undef slots (required by the `slot2reg` conversion)
-    # 3. mark unreached statements for a bulk code deletion (see issue #7836)
-    # 4. widen slot wrappers (`Conditional` and `MustAlias`) and remove `NOT_FOUND` from `ssavaluetypes`
-    #    NOTE because of this, `was_reached` will no longer be available after this point
-    # 5. eliminate GotoIfNot if either branch target is unreachable
-    changemap = nothing # initialized if there is any dead region
-    for i = 1:nstmt
-        expr = stmts[i]
-        if was_reached(sv, i)
-            if run_optimizer
-                if isa(expr, GotoIfNot) && widenconst(argextype(expr.cond, src, sv.sptypes)) === Bool
-                    # 5: replace this live GotoIfNot with:
-                    # - GotoNode if the fallthrough target is unreachable
-                    # - no-op if the branch target is unreachable
-                    if !was_reached(sv, i+1)
-                        expr = GotoNode(expr.dest)
-                    elseif !was_reached(sv, expr.dest)
-                        expr = nothing
-                    end
-                end
-            end
-            stmts[i] = annotate_slot_load!(interp, undefs, i, sv, expr) # 1&2
-            ssavaluetypes[i] = widenslotwrapper(ssavaluetypes[i]) # 4
-        else # i.e. any runtime execution will never reach this statement
-            any_unreachable = true
-            if is_meta_expr(expr) # keep any lexically scoped expressions
-                ssavaluetypes[i] = Any # 4
-            else
-                ssavaluetypes[i] = Bottom # 4
-                stmts[i] = Const(expr) # annotate that this statement actually is dead
-            end
-        end
-    end
-
-    # finish marking used-undef variables
-    for j = 1:nslots
-        if undefs[j]
-            slotflags[j] |= SLOT_USEDUNDEF | SLOT_STATICUNDEF
-        end
-    end
-
-    return any_unreachable
-end
-
-# at the end, all items in b's cycle
-# will now be added to a's cycle
-function union_caller_cycle!(a::InferenceState, b::InferenceState)
-    callers_in_cycle = b.callers_in_cycle
-    b.parent = a.parent
-    b.callers_in_cycle = a.callers_in_cycle
-    contains_is(a.callers_in_cycle, b) || push!(a.callers_in_cycle, b)
-    if callers_in_cycle !== a.callers_in_cycle
-        for caller in callers_in_cycle
-            if caller !== b
-                caller.parent = a.parent
-                caller.callers_in_cycle = a.callers_in_cycle
-                push!(a.callers_in_cycle, caller)
-            end
-        end
-    end
-    return
-end
-
-function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState, ancestor::InferenceState, child::InferenceState)
-    # add backedge of parent <- child
-    # then add all backedges of parent <- parent.parent
-    # and merge all of the callers into ancestor.callers_in_cycle
-    # and ensure that walking the parent list will get the same result (DAG) from everywhere
-    while true
-        add_cycle_backedge!(parent, child, parent.currpc)
-        union_caller_cycle!(ancestor, child)
-        child = parent
-        child === ancestor && break
-        parent = frame_parent(child)
-        while !isa(parent, InferenceState)
-            # XXX we may miss some edges here?
-            parent = frame_parent(parent::IRInterpretationState)
-        end
-        parent = parent::InferenceState
-    end
-end
-
-function is_same_frame(interp::AbstractInterpreter, mi::MethodInstance, frame::InferenceState)
-    return mi === frame_instance(frame)
-end
-
-function poison_callstack!(infstate::InferenceState, topmost::InferenceState)
-    push!(infstate.pclimitations, topmost)
-    nothing
-end
-
-# Walk through `mi`'s upstream call chain, starting at `parent`. If a parent
-# frame matching `mi` is encountered, then there is a cycle in the call graph
-# (i.e. `mi` is a descendant callee of itself). Upon encountering this cycle,
-# we "resolve" it by merging the call chain, which entails unioning each intermediary
-# frame's `callers_in_cycle` field and adding the appropriate backedges. Finally,
-# we return `mi`'s pre-existing frame. If no cycles are found, `nothing` is
-# returned instead.
-function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
-    # TODO (#48913) implement a proper recursion handling for irinterp:
-    # This works just because currently the `:terminate` condition guarantees that
-    # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-    # We should revisit this once we have a better story for handling cycles in irinterp.
-    isa(parent, InferenceState) || return false
-    frame = parent
-    uncached = false
-    while isa(frame, InferenceState)
-        uncached |= !is_cached(frame) # ensure we never add an uncached frame to a cycle
-        if is_same_frame(interp, mi, frame)
-            if uncached
-                # our attempt to speculate into a constant call lead to an undesired self-cycle
-                # that cannot be converged: poison our call-stack (up to the discovered duplicate frame)
-                # with the limited flag and abort (set return type to Any) now
-                poison_callstack!(parent, frame)
-                return true
-            end
-            merge_call_chain!(interp, parent, frame, frame)
-            return frame
-        end
-        for caller in callers_in_cycle(frame)
-            if is_same_frame(interp, mi, caller)
-                if uncached
-                    poison_callstack!(parent, frame)
-                    return true
-                end
-                merge_call_chain!(interp, parent, frame, caller)
-                return caller
-            end
-        end
-        frame = frame_parent(frame)
-    end
-    return false
-end
-
-generating_sysimg() = ccall(:jl_generating_output, Cint, ()) != 0 && JLOptions().incremental == 0
-
-ipo_effects(code::CodeInstance) = decode_effects(code.ipo_purity_bits)
-
-struct EdgeCallResult
-    rt #::Type
-    edge::Union{Nothing,MethodInstance}
-    effects::Effects
-    function EdgeCallResult(@nospecialize(rt),
-                            edge::Union{Nothing,MethodInstance},
-                            effects::Effects)
-        return new(rt, edge, effects)
-    end
-end
-
-# compute (and cache) an inferred AST and return the current best estimate of the result type
-function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState)
-    mi = specialize_method(method, atype, sparams)::MethodInstance
-    code = get(code_cache(interp), mi, nothing)
-    if code isa CodeInstance # return existing rettype if the code is already inferred
-        inferred = @atomic :monotonic code.inferred
-        if inferred === nothing && is_stmt_inline(get_curr_ssaflag(caller))
-            # we already inferred this edge before and decided to discard the inferred code,
-            # nevertheless we re-infer it here again and keep it around in the local cache
-            # since the inliner will request to use it later
-            cache = :local
-        else
-            effects = ipo_effects(code)
-            update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
-            rettype = code.rettype
-            if isdefined(code, :rettype_const)
-                rettype_const = code.rettype_const
-                # the second subtyping/egal conditions are necessary to distinguish usual cases
-                # from rare cases when `Const` wrapped those extended lattice type objects
-                if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
-                    rettype = PartialStruct(rettype, rettype_const)
-                elseif isa(rettype_const, PartialOpaque) && rettype <: Core.OpaqueClosure
-                    rettype = rettype_const
-                elseif isa(rettype_const, InterConditional) && rettype !== InterConditional
-                    rettype = rettype_const
-                elseif isa(rettype_const, InterMustAlias) && rettype !== InterMustAlias
-                    rettype = rettype_const
-                else
-                    rettype = Const(rettype_const)
-                end
-            end
-            return EdgeCallResult(rettype, mi, effects)
-        end
-    else
-        cache = :global # cache edge targets by default
-    end
-    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
-        add_remark!(interp, caller, "Inference is disabled for the target module")
-        return EdgeCallResult(Any, nothing, Effects())
-    end
-    if !is_cached(caller) && frame_parent(caller) === nothing
-        # this caller exists to return to the user
-        # (if we asked resolve_call_cycle!, it might instead detect that there is a cycle that it can't merge)
-        frame = false
-    else
-        frame = resolve_call_cycle!(interp, mi, caller)
-    end
-    if frame === false
-        # completely new
-        lock_mi_inference(interp, mi)
-        result = InferenceResult(mi, typeinf_lattice(interp))
-        frame = InferenceState(result, cache, interp) # always use the cache for edge targets
-        if frame === nothing
-            add_remark!(interp, caller, "Failed to retrieve source")
-            # can't get the source for this, so we know nothing
-            unlock_mi_inference(interp, mi)
-            return EdgeCallResult(Any, nothing, Effects())
-        end
-        if is_cached(caller) || frame_parent(caller) !== nothing # don't involve uncached functions in cycle resolution
-            frame.parent = caller
-        end
-        typeinf(interp, frame)
-        update_valid_age!(caller, frame.valid_worlds)
-        edge = is_inferred(frame) ? mi : nothing
-        return EdgeCallResult(frame.bestguess, edge, frame.ipo_effects) # effects are adjusted already within `finish`
-    elseif frame === true
-        # unresolvable cycle
-        return EdgeCallResult(Any, nothing, Effects())
-    end
-    # return the current knowledge about this cycle
-    frame = frame::InferenceState
-    update_valid_age!(caller, frame.valid_worlds)
-    return EdgeCallResult(frame.bestguess, nothing, adjust_effects(frame))
-end
-
-#### entry points for inferring a MethodInstance given a type signature ####
-
-# compute an inferred AST and return type
-function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
-    frame = typeinf_frame(interp, method, atype, sparams, run_optimizer)
-    frame === nothing && return nothing, Any
-    is_inferred(frame) || return nothing, Any
-    code = frame.src
-    rt = widenconst(ignorelimited(frame.result.result))
-    return code, rt
-end
-
-"""
-    typeinf_ircode(
-        interp::AbstractInterpreter,
-        method::Method,
-        atype,
-        sparams::SimpleVector,
-        optimize_until::Union{Integer,AbstractString,Nothing},
-    ) -> (ir::Union{IRCode,Nothing}, returntype::Type)
-
-Infer a `method` and return an `IRCode` with inferred `returntype` on success.
-"""
-function typeinf_ircode(
-    interp::AbstractInterpreter,
-    method::Method,
-    @nospecialize(atype),
-    sparams::SimpleVector,
-    optimize_until::Union{Integer,AbstractString,Nothing},
-)
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    frame = typeinf_frame(interp, method, atype, sparams, false)
-    if frame === nothing
-        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-        return nothing, Any
-    end
-    (; result) = frame
-    opt = OptimizationState(frame, interp)
-    ir = run_passes(opt.src, opt, result, optimize_until)
-    rt = widenconst(ignorelimited(result.result))
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    return ir, rt
-end
-
-# compute an inferred frame
-function typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
-    mi = specialize_method(method, atype, sparams)::MethodInstance
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    result = InferenceResult(mi, typeinf_lattice(interp))
-    frame = InferenceState(result, run_optimizer ? :global : :no, interp)
-    frame === nothing && return nothing
-    typeinf(interp, frame)
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    return frame
-end
-
-# compute (and cache) an inferred AST and return type
-function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
-    method = mi.def::Method
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    code = get(code_cache(interp), mi, nothing)
-    if code isa CodeInstance
-        # see if this code already exists in the cache
-        inf = @atomic :monotonic code.inferred
-        if use_const_api(code)
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            tree = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-            rettype_const = code.rettype_const
-            tree.code = Any[ ReturnNode(quoted(rettype_const)) ]
-            nargs = Int(method.nargs)
-            tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
-            tree.slotflags = fill(IR_FLAG_NULL, nargs)
-            tree.ssavaluetypes = 1
-            tree.codelocs = Int32[1]
-            tree.linetable = LineInfoNode[LineInfoNode(method.module, mi, method.file, method.line, Int32(0))]
-            tree.ssaflags = UInt8[0]
-            set_inlineable!(tree, true)
-            tree.parent = mi
-            tree.rettype = Core.Typeof(rettype_const)
-            tree.min_world = code.min_world
-            tree.max_world = code.max_world
-            tree.inferred = true
-            return tree
-        elseif isa(inf, CodeInfo)
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            if !(inf.min_world == code.min_world &&
-                    inf.max_world == code.max_world &&
-                    inf.rettype === code.rettype)
-                inf = copy(inf)
-                inf.min_world = code.min_world
-                inf.max_world = code.max_world
-                inf.rettype = code.rettype
-            end
-            return inf
-        elseif isa(inf, String)
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            inf = _uncompressed_ir(code, inf)
-            return inf
-        end
-    end
-    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
-        return retrieve_code_info(mi, get_world_counter(interp))
-    end
-    lock_mi_inference(interp, mi)
-    result = InferenceResult(mi, typeinf_lattice(interp))
-    frame = InferenceState(result, #=cache=#:global, interp)
-    frame === nothing && return nothing
-    typeinf(interp, frame)
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    frame.src.inferred || return nothing
-    return frame.src
-end
-
-# compute (and cache) an inferred AST and return the inferred return type
-function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector)
-    if contains_is(unwrap_unionall(atype).parameters, Union{})
-        return Union{} # don't ask: it does weird and unnecessary things, if it occurs during bootstrap
-    end
-    mi = specialize_method(method, atype, sparams)::MethodInstance
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    code = get(code_cache(interp), mi, nothing)
-    if code isa CodeInstance
-        # see if this rettype already exists in the cache
-        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-        return code.rettype
-    end
-    result = InferenceResult(mi, typeinf_lattice(interp))
-    typeinf(interp, result, :global)
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    is_inferred(result) || return nothing
-    return widenconst(ignorelimited(result.result))
-end
-
-# This is a bridge for the C code calling `jl_typeinf_func()`
-typeinf_ext_toplevel(mi::MethodInstance, world::UInt) = typeinf_ext_toplevel(NativeInterpreter(world), mi)
-function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance)
-    if isa(linfo.def, Method)
-        # method lambda - infer this specialization via the method cache
-        src = typeinf_ext(interp, linfo)
-    else
-        src = linfo.uninferred::CodeInfo
-        if !src.inferred
-            # toplevel lambda - infer directly
-            start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-            if !src.inferred
-                result = InferenceResult(linfo, typeinf_lattice(interp))
-                frame = InferenceState(result, src, #=cache=#:global, interp)
-                typeinf(interp, frame)
-                @assert is_inferred(frame) # TODO: deal with this better
-                src = frame.src
-            end
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-        end
-    end
-    return src
-end
-
-function return_type(@nospecialize(f), t::DataType) # this method has a special tfunc
-    world = ccall(:jl_get_tls_world_age, UInt, ())
-    args = Any[_return_type, NativeInterpreter(world), Tuple{Core.Typeof(f), t.parameters...}]
-    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Ptr{Cvoid}}, Cint), args, length(args))
-end
-
-function return_type(@nospecialize(f), t::DataType, world::UInt)
-    return return_type(Tuple{Core.Typeof(f), t.parameters...}, world)
-end
-
-function return_type(t::DataType)
-    world = ccall(:jl_get_tls_world_age, UInt, ())
-    return return_type(t, world)
-end
-
-function return_type(t::DataType, world::UInt)
-    args = Any[_return_type, NativeInterpreter(world), t]
-    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Ptr{Cvoid}}, Cint), args, length(args))
-end
-
-function _return_type(interp::AbstractInterpreter, t::DataType)
-    rt = Union{}
-    f = singleton_type(t.parameters[1])
-    if isa(f, Builtin)
-        args = Any[t.parameters...]
-        popfirst!(args)
-        rt = builtin_tfunction(interp, f, args, nothing)
-        rt = widenconst(rt)
-    else
-        for match in _methods_by_ftype(t, -1, get_world_counter(interp))::Vector
-            match = match::MethodMatch
-            ty = typeinf_type(interp, match.method, match.spec_types, match.sparams)
-            ty === nothing && return Any
-            rt = tmerge(rt, ty)
-            rt === Any && break
-        end
-    end
-    return rt
-end
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
deleted file mode 100644
index f3c5694535ce6..0000000000000
--- a/base/compiler/utilities.jl
+++ /dev/null
@@ -1,521 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-###########
-# generic #
-###########
-
-if !@isdefined(var"@timeit")
-    # This is designed to allow inserting timers when loading a second copy
-    # of inference for performing performance experiments.
-    macro timeit(args...)
-        esc(args[end])
-    end
-end
-
-# avoid cycle due to over-specializing `any` when used by inference
-function _any(@nospecialize(f), a)
-    for x in a
-        f(x) && return true
-    end
-    return false
-end
-any(@nospecialize(f), itr) = _any(f, itr)
-any(itr) = _any(identity, itr)
-
-function _all(@nospecialize(f), a)
-    for x in a
-        f(x) || return false
-    end
-    return true
-end
-all(@nospecialize(f), itr) = _all(f, itr)
-all(itr) = _all(identity, itr)
-
-function contains_is(itr, @nospecialize(x))
-    for y in itr
-        if y === x
-            return true
-        end
-    end
-    return false
-end
-
-anymap(f::Function, a::Array{Any,1}) = Any[ f(a[i]) for i in 1:length(a) ]
-
-###########
-# scoping #
-###########
-
-_topmod(m::Module) = ccall(:jl_base_relative_to, Any, (Any,), m)::Module
-
-function istopfunction(@nospecialize(f), name::Symbol)
-    tn = typeof(f).name
-    if tn.mt.name === name
-        top = _topmod(tn.module)
-        return isdefined(top, name) && isconst(top, name) && f === getglobal(top, name)
-    end
-    return false
-end
-
-#######
-# AST #
-#######
-
-# Meta expression head, these generally can't be deleted even when they are
-# in a dead branch but can be ignored when analyzing uses/liveness.
-is_meta_expr_head(head::Symbol) = head === :boundscheck || head === :meta || head === :loopinfo
-is_meta_expr(@nospecialize x) = isa(x, Expr) && is_meta_expr_head(x.head)
-
-sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0
-
-function is_self_quoting(@nospecialize(x))
-    return isa(x,Number) || isa(x,AbstractString) || isa(x,Tuple) || isa(x,Type) ||
-        isa(x,Char) || x === nothing || isa(x,Function)
-end
-
-function quoted(@nospecialize(x))
-    return is_self_quoting(x) ? x : QuoteNode(x)
-end
-
-############
-# inlining #
-############
-
-const MAX_INLINE_CONST_SIZE = 256
-
-function count_const_size(@nospecialize(x), count_self::Bool = true)
-    (x isa Type || x isa Core.TypeName || x isa Symbol) && return 0
-    ismutable(x) && return MAX_INLINE_CONST_SIZE + 1
-    isbits(x) && return Core.sizeof(x)
-    dt = typeof(x)
-    sz = count_self ? sizeof(dt) : 0
-    sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
-    dtfd = DataTypeFieldDesc(dt)
-    for i = 1:nfields(x)
-        isdefined(x, i) || continue
-        f = getfield(x, i)
-        if !dtfd[i].isptr && datatype_pointerfree(typeof(f))
-            continue
-        end
-        sz += count_const_size(f, dtfd[i].isptr)
-        sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
-    end
-    return sz
-end
-
-function is_inlineable_constant(@nospecialize(x))
-    return count_const_size(x) <= MAX_INLINE_CONST_SIZE
-end
-
-is_nospecialized(method::Method) = method.nospecialize ≠ 0
-
-is_nospecializeinfer(method::Method) = method.nospecializeinfer && is_nospecialized(method)
-
-###########################
-# MethodInstance/CodeInfo #
-###########################
-
-invoke_api(li::CodeInstance) = ccall(:jl_invoke_api, Cint, (Any,), li)
-use_const_api(li::CodeInstance) = invoke_api(li) == 2
-
-function get_staged(mi::MethodInstance, world::UInt)
-    may_invoke_generator(mi) || return nothing
-    try
-        # user code might throw errors – ignore them
-        ci = ccall(:jl_code_for_staged, Any, (Any, UInt), mi, world)::CodeInfo
-        return ci
-    catch
-        return nothing
-    end
-end
-
-function retrieve_code_info(linfo::MethodInstance, world::UInt)
-    m = linfo.def::Method
-    c = nothing
-    if isdefined(m, :generator)
-        # user code might throw errors – ignore them
-        c = get_staged(linfo, world)
-    end
-    if c === nothing && isdefined(m, :source)
-        src = m.source
-        if src === nothing
-            # can happen in images built with --strip-ir
-            return nothing
-        elseif isa(src, String)
-            c = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, src)
-        else
-            c = copy(src::CodeInfo)
-        end
-    end
-    if c isa CodeInfo
-        c.parent = linfo
-        return c
-    end
-    return nothing
-end
-
-function get_compileable_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
-    isa(atype, DataType) || return nothing
-    mt = ccall(:jl_method_get_table, Any, (Any,), method)
-    mt === nothing && return nothing
-    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint),
-        mt, atype, sparams, method, #=int return_if_compileable=#1)
-end
-
-function get_nospecializeinfer_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
-    isa(atype, DataType) || return method.sig
-    mt = ccall(:jl_method_table_for, Any, (Any,), atype)
-    mt === nothing && return method.sig
-    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint),
-        mt, atype, sparams, method, #=int return_if_compileable=#0)
-end
-
-isa_compileable_sig(@nospecialize(atype), sparams::SimpleVector, method::Method) =
-    !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any, Any), atype, sparams, method))
-
-# eliminate UnionAll vars that might be degenerate due to having identical bounds,
-# or a concrete upper bound and appearing covariantly.
-function subst_trivial_bounds(@nospecialize(atype))
-    if !isa(atype, UnionAll)
-        return atype
-    end
-    v = atype.var
-    if isconcretetype(v.ub) || v.lb === v.ub
-        subst = try
-            atype{v.ub}
-        catch
-            # Note in rare cases a var bound might not be valid to substitute.
-            nothing
-        end
-        if subst !== nothing
-            return subst_trivial_bounds(subst)
-        end
-    end
-    return UnionAll(v, subst_trivial_bounds(atype.body))
-end
-
-has_typevar(@nospecialize(t), v::TypeVar) = ccall(:jl_has_typevar, Cint, (Any, Any), t, v) != 0
-
-# If removing trivial vars from atype results in an equivalent type, use that
-# instead. Otherwise we can get a case like issue #38888, where a signature like
-#   f(x::S) where S<:Int
-# gets cached and matches a concrete dispatch case.
-function normalize_typevars(method::Method, @nospecialize(atype), sparams::SimpleVector)
-    at2 = subst_trivial_bounds(atype)
-    if at2 !== atype && at2 == atype
-        atype = at2
-        sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), at2, method.sig)::SimpleVector
-        sparams = sp_[2]::SimpleVector
-    end
-    return Pair{Any,SimpleVector}(atype, sparams)
-end
-
-# get a handle to the unique specialization object representing a particular instantiation of a call
-@inline function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false)
-    if isa(atype, UnionAll)
-        atype, sparams = normalize_typevars(method, atype, sparams)
-    end
-    if is_nospecializeinfer(method)
-        atype = get_nospecializeinfer_sig(method, atype, sparams)
-    end
-    if preexisting
-        # check cached specializations
-        # for an existing result stored there
-        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atype)::Union{Nothing,MethodInstance}
-    end
-    return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atype, sparams)
-end
-
-function specialize_method(match::MethodMatch; kwargs...)
-    return specialize_method(match.method, match.spec_types, match.sparams; kwargs...)
-end
-
-"""
-    is_declared_inline(method::Method) -> Bool
-
-Check if `method` is declared as `@inline`.
-"""
-is_declared_inline(method::Method) = _is_declared_inline(method, true)
-
-"""
-    is_declared_noinline(method::Method) -> Bool
-
-Check if `method` is declared as `@noinline`.
-"""
-is_declared_noinline(method::Method) = _is_declared_inline(method, false)
-
-function _is_declared_inline(method::Method, inline::Bool)
-    isdefined(method, :source) || return false
-    src = method.source
-    isa(src, MaybeCompressed) || return false
-    return (inline ? is_declared_inline : is_declared_noinline)(src)
-end
-
-"""
-    is_aggressive_constprop(method::Union{Method,CodeInfo}) -> Bool
-
-Check if `method` is declared as `Base.@constprop :aggressive`.
-"""
-is_aggressive_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x01
-
-"""
-    is_no_constprop(method::Union{Method,CodeInfo}) -> Bool
-
-Check if `method` is declared as `Base.@constprop :none`.
-"""
-is_no_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x02
-
-#############
-# backedges #
-#############
-
-"""
-    BackedgeIterator(backedges::Vector{Any})
-
-Return an iterator over a list of backedges. Iteration returns `(sig, caller)` elements,
-which will be one of the following:
-
-- `BackedgePair(nothing, caller::MethodInstance)`: a call made by ordinary inferable dispatch
-- `BackedgePair(invokesig::Type, caller::MethodInstance)`: a call made by `invoke(f, invokesig, args...)`
-- `BackedgePair(specsig::Type, mt::MethodTable)`: an abstract call
-
-# Examples
-
-```julia
-julia> callme(x) = x+1
-callme (generic function with 1 method)
-
-julia> callyou(x) = callme(x)
-callyou (generic function with 1 method)
-
-julia> callyou(2.0)
-3.0
-
-julia> mi = which(callme, (Any,)).specializations
-MethodInstance for callme(::Float64)
-
-julia> @eval Core.Compiler for (; sig, caller) in BackedgeIterator(Main.mi.backedges)
-           println(sig)
-           println(caller)
-       end
-nothing
-callyou(Float64) from callyou(Any)
-```
-"""
-struct BackedgeIterator
-    backedges::Vector{Any}
-end
-
-const empty_backedge_iter = BackedgeIterator(Any[])
-
-struct BackedgePair
-    sig # ::Union{Nothing,Type}
-    caller::Union{MethodInstance,MethodTable}
-    BackedgePair(@nospecialize(sig), caller::Union{MethodInstance,MethodTable}) = new(sig, caller)
-end
-
-function iterate(iter::BackedgeIterator, i::Int=1)
-    backedges = iter.backedges
-    i > length(backedges) && return nothing
-    item = backedges[i]
-    isa(item, MethodInstance) && return BackedgePair(nothing, item), i+1      # regular dispatch
-    isa(item, MethodTable) && return BackedgePair(backedges[i+1], item), i+2  # abstract dispatch
-    return BackedgePair(item, backedges[i+1]::MethodInstance), i+2            # `invoke` calls
-end
-
-#########
-# types #
-#########
-
-@nospecializeinfer function singleton_type(@nospecialize(ft))
-    ft = widenslotwrapper(ft)
-    if isa(ft, Const)
-        return ft.val
-    elseif isconstType(ft)
-        return ft.parameters[1]
-    elseif issingletontype(ft)
-        return ft.instance
-    end
-    return nothing
-end
-
-@nospecializeinfer function maybe_singleton_const(@nospecialize(t))
-    if isa(t, DataType)
-        if issingletontype(t)
-            return Const(t.instance)
-        elseif isconstType(t)
-            return Const(t.parameters[1])
-        end
-    end
-    return t
-end
-
-###################
-# SSAValues/Slots #
-###################
-
-function ssamap(f, @nospecialize(stmt))
-    urs = userefs(stmt)
-    for op in urs
-        val = op[]
-        if isa(val, SSAValue)
-            op[] = f(val)
-        end
-    end
-    return urs[]
-end
-
-function foreachssa(@specialize(f), @nospecialize(stmt))
-    urs = userefs(stmt)
-    for op in urs
-        val = op[]
-        if isa(val, SSAValue)
-            f(val)
-        end
-    end
-end
-
-function foreach_anyssa(@specialize(f), @nospecialize(stmt))
-    urs = userefs(stmt)
-    for op in urs
-        val = op[]
-        if isa(val, AnySSAValue)
-            f(val)
-        end
-    end
-end
-
-function find_ssavalue_uses(body::Vector{Any}, nvals::Int)
-    uses = BitSet[ BitSet() for i = 1:nvals ]
-    for line in 1:length(body)
-        e = body[line]
-        if isa(e, ReturnNode)
-            e = e.val
-        elseif isa(e, GotoIfNot)
-            e = e.cond
-        end
-        if isa(e, SSAValue)
-            push!(uses[e.id], line)
-        elseif isa(e, Expr)
-            find_ssavalue_uses(e, uses, line)
-        elseif isa(e, PhiNode)
-            find_ssavalue_uses(e, uses, line)
-        end
-    end
-    return uses
-end
-
-function find_ssavalue_uses(e::Expr, uses::Vector{BitSet}, line::Int)
-    head = e.head
-    is_meta_expr_head(head) && return
-    skiparg = (head === :(=))
-    for a in e.args
-        if skiparg
-            skiparg = false
-        elseif isa(a, SSAValue)
-            push!(uses[a.id], line)
-        elseif isa(a, Expr)
-            find_ssavalue_uses(a, uses, line)
-        end
-    end
-end
-
-function find_ssavalue_uses(e::PhiNode, uses::Vector{BitSet}, line::Int)
-    for val in e.values
-        if isa(val, SSAValue)
-            push!(uses[val.id], line)
-        end
-    end
-end
-
-function is_throw_call(e::Expr)
-    if e.head === :call
-        f = e.args[1]
-        if isa(f, GlobalRef)
-            ff = abstract_eval_globalref(f)
-            if isa(ff, Const) && ff.val === Core.throw
-                return true
-            end
-        end
-    end
-    return false
-end
-
-function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Int})
-    for stmt in find_throw_blocks(src.code, handler_at)
-        src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
-    end
-    return nothing
-end
-
-function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
-    stmts = BitSet()
-    n = length(code)
-    for i in n:-1:1
-        s = code[i]
-        if isa(s, Expr)
-            if s.head === :gotoifnot
-                if i+1 in stmts && s.args[2]::Int in stmts
-                    push!(stmts, i)
-                end
-            elseif s.head === :return
-                # see `ReturnNode` handling
-            elseif is_throw_call(s)
-                if handler_at[i] == 0
-                    push!(stmts, i)
-                end
-            elseif i+1 in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, ReturnNode)
-            # NOTE: it potentially makes sense to treat unreachable nodes
-            # (where !isdefined(s, :val)) as `throw` points, but that can cause
-            # worse codegen around the call site (issue #37558)
-        elseif isa(s, GotoNode)
-            if s.label in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, GotoIfNot)
-            if i+1 in stmts && s.dest in stmts
-                push!(stmts, i)
-            end
-        elseif i+1 in stmts
-            push!(stmts, i)
-        end
-    end
-    return stmts
-end
-
-# using a function to ensure we can infer this
-@inline function slot_id(s)
-    isa(s, SlotNumber) && return s.id
-    isa(s, Argument) && return s.n
-    return (s::TypedSlot).id
-end
-
-###########
-# options #
-###########
-
-is_root_module(m::Module) = false
-
-inlining_enabled() = (JLOptions().can_inline == 1)
-function coverage_enabled(m::Module)
-    ccall(:jl_generating_output, Cint, ()) == 0 || return false # don't alter caches
-    cov = JLOptions().code_coverage
-    if cov == 1 # user
-        m = moduleroot(m)
-        m === Core && return false
-        isdefined(Main, :Base) && m === Main.Base && return false
-        return true
-    elseif cov == 2 # all
-        return true
-    end
-    return false
-end
-function inbounds_option()
-    opt_check_bounds = JLOptions().check_bounds
-    opt_check_bounds == 0 && return :default
-    opt_check_bounds == 1 && return :on
-    return :off
-end
diff --git a/base/complex.jl b/base/complex.jl
index 97b47eac91a5a..daba8847468f0 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -120,10 +120,10 @@ Float64
 real(T::Type) = typeof(real(zero(T)))
 real(::Type{T}) where {T<:Real} = T
 real(C::Type{<:Complex}) = fieldtype(C, 1)
-real(::Type{Union{}}, slurp...) = Union{}(im)
+real(::Type{Union{}}, slurp...) = Union{}
 
 """
-    isreal(x) -> Bool
+    isreal(x)::Bool
 
 Test whether `x` or all its elements are numerically equal to some real number
 including infinities and NaNs. `isreal(x)` is true if `isequal(x, real(x))`
@@ -162,12 +162,6 @@ Convert real numbers or arrays to complex. `i` defaults to zero.
 ```jldoctest
 julia> complex(7)
 7 + 0im
-
-julia> complex([1, 2, 3])
-3-element Vector{Complex{Int64}}:
- 1 + 0im
- 2 + 0im
- 3 + 0im
 ```
 """
 complex(z::Complex) = z
@@ -178,7 +172,7 @@ complex(x::Real, y::Real) = Complex(x, y)
     complex(T::Type)
 
 Return an appropriate type which can represent a value of type `T` as a complex number.
-Equivalent to `typeof(complex(zero(T)))`.
+Equivalent to `typeof(complex(zero(T)))` if `T` does not contain `Missing`.
 
 # Examples
 ```jldoctest
@@ -187,10 +181,14 @@ Complex{Int64}
 
 julia> complex(Int)
 Complex{Int64}
+
+julia> complex(Union{Int, Missing})
+Union{Missing, Complex{Int64}}
 ```
 """
 complex(::Type{T}) where {T<:Real} = Complex{T}
 complex(::Type{Complex{T}}) where {T<:Real} = Complex{T}
+complex(::Type{Union{}}, slurp...) = Union{}
 
 flipsign(x::Complex, y::Real) = ifelse(signbit(y), -x, x)
 
@@ -198,18 +196,18 @@ function show(io::IO, z::Complex)
     r, i = reim(z)
     compact = get(io, :compact, false)::Bool
     show(io, r)
-    if signbit(i) && !isnan(i)
+    bufio = IOBuffer()
+    show(IOContext(bufio, io), i)
+    seekstart(bufio)
+    if peek(bufio) === UInt8('-')
+        seek(bufio, 1)
         print(io, compact ? "-" : " - ")
-        if isa(i,Signed) && !isa(i,BigInt) && i == typemin(typeof(i))
-            show(io, -widen(i))
-        else
-            show(io, -i)
-        end
+        write(io, bufio)
     else
         print(io, compact ? "+" : " + ")
-        show(io, i)
+        write(io, bufio)
     end
-    if !(isa(i,Integer) && !isa(i,Bool) || isa(i,AbstractFloat) && isfinite(i))
+    if !(isa(i,Signed) || isa(i,AbstractFloat) && isfinite(i))
         print(io, "*")
     end
     print(io, "im")
@@ -251,11 +249,7 @@ isequal(z::Real, w::Complex) = isequal(z,real(w))::Bool & isequal(zero(z),imag(w
 
 in(x::Complex, r::AbstractRange{<:Real}) = isreal(x) && real(x) in r
 
-if UInt === UInt64
-    const h_imag = 0x32a7a07f3e7cd1f9
-else
-    const h_imag = 0x3e7cd1f9
-end
+const h_imag = 0x32a7a07f3e7cd1f9 % UInt
 const hash_0_imag = hash(0, h_imag)
 
 function hash(z::Complex, h::UInt)
@@ -296,9 +290,11 @@ inv(z::Complex{<:Integer}) = inv(float(z))
 *(z::Complex, w::Complex) = Complex(real(z) * real(w) - imag(z) * imag(w),
                                     real(z) * imag(w) + imag(z) * real(w))
 
+_mulsub(a, b, c) = _mulsub(promote(a, b, c)...)
+_mulsub(a::T, b::T, c::T) where {T<:Real} = muladd(a, b, -c)
 muladd(z::Complex, w::Complex, x::Complex) =
-    Complex(muladd(real(z), real(w), -muladd(imag(z), imag(w), -real(x))),
-            muladd(real(z), imag(w),  muladd(imag(z), real(w),  imag(x))))
+    Complex(muladd(real(z), real(w), -_mulsub(imag(z), imag(w), real(x))),
+            muladd(real(z), imag(w), muladd(imag(z), real(w), imag(x))))
 
 # handle Bool and Complex{Bool}
 # avoid type signature ambiguity warnings
@@ -339,13 +335,13 @@ end
 *(x::Real, z::Complex) = Complex(x * real(z), x * imag(z))
 *(z::Complex, x::Real) = Complex(x * real(z), x * imag(z))
 
-muladd(x::Real, z::Complex, y::Number) = muladd(z, x, y)
+muladd(x::Real, z::Complex, y::Union{Real,Complex}) = muladd(z, x, y)
 muladd(z::Complex, x::Real, y::Real) = Complex(muladd(real(z),x,y), imag(z)*x)
 muladd(z::Complex, x::Real, w::Complex) =
     Complex(muladd(real(z),x,real(w)), muladd(imag(z),x,imag(w)))
 muladd(x::Real, y::Real, z::Complex) = Complex(muladd(x,y,real(z)), imag(z))
 muladd(z::Complex, w::Complex, x::Real) =
-    Complex(muladd(real(z), real(w), -muladd(imag(z), imag(w), -x)),
+    Complex(muladd(real(z), real(w), -_mulsub(imag(z), imag(w), x)),
             muladd(real(z), imag(w), imag(z) * real(w)))
 
 /(a::R, z::S) where {R<:Real,S<:Complex} = (T = promote_type(R,S); a*inv(T(z)))
@@ -567,7 +563,7 @@ end
 """
     cis(x)
 
-More efficient method for `exp(im*x)` by using Euler's formula: ``cos(x) + i sin(x) = \\exp(i x)``.
+More efficient method for `exp(im*x)` by using Euler's formula: ``\\cos(x) + i \\sin(x) = \\exp(i x)``.
 
 See also [`cispi`](@ref), [`sincos`](@ref), [`exp`](@ref), [`angle`](@ref).
 
@@ -622,7 +618,10 @@ end
 
 Compute the phase angle in radians of a complex number `z`.
 
-See also: [`atan`](@ref), [`cis`](@ref).
+Returns a number `-pi ≤ angle(z) ≤ pi`, and is thus discontinuous
+along the negative real axis.
+
+See also: [`atan`](@ref), [`cis`](@ref), [`rad2deg`](@ref).
 
 # Examples
 ```jldoctest
@@ -632,8 +631,11 @@ julia> rad2deg(angle(1 + im))
 julia> rad2deg(angle(1 - im))
 -45.0
 
-julia> rad2deg(angle(-1 - im))
--135.0
+julia> rad2deg(angle(-1 + 1e-20im))
+180.0
+
+julia> rad2deg(angle(-1 - 1e-20im))
+-180.0
 ```
 """
 angle(z::Complex) = atan(imag(z), real(z))
@@ -750,7 +752,7 @@ function log1p(z::Complex{T}) where T
         # allegedly due to Kahan, only modified to handle real(u) <= 0
         # differently to avoid inaccuracy near z==-2 and for correct branch cut
         u = one(float(T)) + z
-        u == 1 ? convert(typeof(u), z) : real(u) <= 0 ? log(u) : log(u)*z/(u-1)
+        u == 1 ? convert(typeof(u), z) : real(u) <= 0 ? log(u) : log(u)*(z/(u-1))
     elseif isnan(zr)
         Complex(zr, zr)
     elseif isfinite(zi)
@@ -1028,24 +1030,22 @@ end
 function atanh(z::Complex{T}) where T
     z = float(z)
     Tf = float(T)
-    Ω = prevfloat(typemax(Tf))
-    θ = sqrt(Ω)/4
-    ρ = 1/θ
     x, y = reim(z)
     ax = abs(x)
     ay = abs(y)
+    θ = sqrt(floatmax(Tf))/4
     if ax > θ || ay > θ #Prevent overflow
         if isnan(y)
             if isinf(x)
                 return Complex(copysign(zero(x),x), y)
             else
-                return Complex(real(1/z), y)
+                return Complex(real(inv(z)), y)
             end
         end
         if isinf(y)
             return Complex(copysign(zero(x),x), copysign(oftype(y,pi)/2, y))
         end
-        return Complex(real(1/z), copysign(oftype(y,pi)/2, y))
+        return Complex(real(inv(z)), copysign(oftype(y,pi)/2, y))
     end
     β = copysign(one(Tf), x)
     z *= β
@@ -1055,16 +1055,15 @@ function atanh(z::Complex{T}) where T
             ξ = oftype(x, Inf)
             η = y
         else
-            ym = ay+ρ
-            ξ = log(sqrt(sqrt(4+y*y))/sqrt(ym))
-            η = copysign(oftype(y,pi)/2 + atan(ym/2), y)/2
+            ξ = log(sqrt(sqrt(muladd(y, y, 4)))/sqrt(ay))
+            η = copysign(oftype(y,pi)/2 + atan(ay/2), y)/2
         end
     else #Normal case
-        ysq = (ay+ρ)^2
+        ysq = ay^2
         if x == 0
             ξ = x
         else
-            ξ = log1p(4x/((1-x)^2 + ysq))/4
+            ξ = log1p(4x/(muladd(1-x, 1-x, ysq)))/4
         end
         η = angle(Complex((1-x)*(1+x)-ysq, 2y))/2
     end
@@ -1088,7 +1087,7 @@ second is used for rounding the imaginary components.
 which rounds to the nearest integer, with ties (fractional values of 0.5)
 being rounded to the nearest even integer.
 
-# Example
+# Examples
 ```jldoctest
 julia> round(3.14 + 4.5im)
 3.0 + 4.0im
@@ -1116,7 +1115,23 @@ big(::Type{Complex{T}}) where {T<:Real} = Complex{big(T)}
 big(z::Complex{T}) where {T<:Real} = Complex{big(T)}(z)
 
 ## Array operations on complex numbers ##
+"""
+    complex(A::AbstractArray)
 
+Return an array containing the complex analog of each entry in array `A`.
+
+Equivalent to `complex.(A)`, except that the return value may share memory with all or
+part of `A` in accordance with the behavior of `convert(T, A)` given output type `T`.
+
+# Examples
+```jldoctest
+julia> complex([1, 2, 3])
+3-element Vector{Complex{Int64}}:
+ 1 + 0im
+ 2 + 0im
+ 3 + 0im
+```
+"""
 complex(A::AbstractArray{<:Complex}) = A
 
 function complex(A::AbstractArray{T}) where T
@@ -1125,3 +1140,9 @@ function complex(A::AbstractArray{T}) where T
     end
     convert(AbstractArray{typeof(complex(zero(T)))}, A)
 end
+
+## Machine epsilon for complex ##
+
+eps(z::Complex{<:AbstractFloat}) = hypot(eps(real(z)), eps(imag(z)))
+
+eps(::Type{Complex{T}}) where {T<:AbstractFloat} = sqrt(2*one(T))*eps(T)
diff --git a/base/condition.jl b/base/condition.jl
index 20481c98ee805..fd771c9be346a 100644
--- a/base/condition.jl
+++ b/base/condition.jl
@@ -69,6 +69,8 @@ struct GenericCondition{L<:AbstractLock}
     GenericCondition(l::AbstractLock) = new{typeof(l)}(IntrusiveLinkedList{Task}(), l)
 end
 
+show(io::IO, c::GenericCondition) = print(io, GenericCondition, "(", c.lock, ")")
+
 assert_havelock(c::GenericCondition) = assert_havelock(c.lock)
 lock(c::GenericCondition) = lock(c.lock)
 unlock(c::GenericCondition) = unlock(c.lock)
@@ -103,17 +105,16 @@ end
 """
     wait([x])
 
-Block the current task until some event occurs, depending on the type of the argument:
+Block the current task until some event occurs.
 
 * [`Channel`](@ref): Wait for a value to be appended to the channel.
 * [`Condition`](@ref): Wait for [`notify`](@ref) on a condition and return the `val`
-  parameter passed to `notify`. Waiting on a condition additionally allows passing
-  `first=true` which results in the waiter being put _first_ in line to wake up on `notify`
-  instead of the usual first-in-first-out behavior.
+  parameter passed to `notify`. See the `Condition`-specific docstring of `wait` for
+  the exact behavior.
 * `Process`: Wait for a process or process chain to exit. The `exitcode` field of a process
   can be used to determine success or failure.
-* [`Task`](@ref): Wait for a `Task` to finish. If the task fails with an exception, a
-  `TaskFailedException` (which wraps the failed task) is thrown.
+* [`Task`](@ref): Wait for a `Task` to finish. See the `Task`-specific docstring of `wait` for
+  the exact behavior.
 * [`RawFD`](@ref): Wait for changes on a file descriptor (see the `FileWatching` package).
 
 If no argument is passed, the task blocks for an undefined period. A task can only be
@@ -122,6 +123,16 @@ restarted by an explicit call to [`schedule`](@ref) or [`yieldto`](@ref).
 Often `wait` is called within a `while` loop to ensure a waited-for condition is met before
 proceeding.
 """
+function wait end
+
+"""
+    wait(c::GenericCondition; first::Bool=false)
+
+Wait for [`notify`](@ref) on `c` and return the `val` parameter passed to `notify`.
+
+If the keyword `first` is set to `true`, the waiter will be put _first_
+in line to wake up on `notify`. Otherwise, `wait` has first-in-first-out (FIFO) behavior.
+"""
 function wait(c::GenericCondition; first::Bool=false)
     ct = current_task()
     _wait2(c, ct, first)
@@ -129,7 +140,7 @@ function wait(c::GenericCondition; first::Bool=false)
     try
         return wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         rethrow()
     finally
         relockall(c.lock, token)
@@ -175,8 +186,9 @@ isempty(c::GenericCondition) = isempty(c.waitq)
 
 Create an edge-triggered event source that tasks can wait for. Tasks that call [`wait`](@ref) on a
 `Condition` are suspended and queued. Tasks are woken up when [`notify`](@ref) is later called on
-the `Condition`. Edge triggering means that only tasks waiting at the time [`notify`](@ref) is
-called can be woken up. For level-triggered notifications, you must keep extra state to keep
+the `Condition`. Waiting on a condition can return a value or raise an error if the optional arguments
+of [`notify`](@ref) are used. Edge triggering means that only tasks waiting at the time [`notify`](@ref)
+is called can be woken up. For level-triggered notifications, you must keep extra state to keep
 track of whether a notification has happened. The [`Channel`](@ref) and [`Threads.Event`](@ref) types do
 this, and can be used for level-triggered events.
 
@@ -184,6 +196,8 @@ This object is NOT thread-safe. See [`Threads.Condition`](@ref) for a thread-saf
 """
 const Condition = GenericCondition{AlwaysLockedST}
 
+show(io::IO, ::Condition) = print(io, Condition, "()")
+
 lock(c::GenericCondition{AlwaysLockedST}) =
     throw(ArgumentError("`Condition` is not thread-safe. Please use `Threads.Condition` instead for multi-threaded code."))
 unlock(c::GenericCondition{AlwaysLockedST}) =
diff --git a/base/coreio.jl b/base/coreio.jl
index 3e508c64a0a64..b5c543a25d5ad 100644
--- a/base/coreio.jl
+++ b/base/coreio.jl
@@ -1,8 +1,13 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+print(x) = print(stdout, x)
+print(x1, x2) = print(stdout, x1, x2)
+println(x) = print(stdout, x, "\n")
+println(x1, x2) = print(stdout, x1, x2, "\n")
+
 print(xs...)   = print(stdout, xs...)
-println(xs...) = println(stdout, xs...)
-println(io::IO) = print(io, '\n')
+println(xs...) = print(stdout, xs..., "\n")  # fewer allocations than `println(stdout, xs...)`
+println(io::IO) = print(io, "\n")
 
 function show end
 function repr end
@@ -11,6 +16,7 @@ struct DevNull <: IO end
 const devnull = DevNull()
 write(::DevNull, ::UInt8) = 1
 unsafe_write(::DevNull, ::Ptr{UInt8}, n::UInt)::Int = n
+closewrite(::DevNull) = nothing
 close(::DevNull) = nothing
 wait_close(::DevNull) = wait()
 bytesavailable(io::DevNull) = 0
diff --git a/base/coreir.jl b/base/coreir.jl
new file mode 100644
index 0000000000000..1cd226aae5f2d
--- /dev/null
+++ b/base/coreir.jl
@@ -0,0 +1,116 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+Core.PhiNode() = Core.PhiNode(Int32[], Any[])
+
+"""
+    struct Const
+        val
+    end
+
+The type representing a constant value.
+"""
+Core.Const
+
+"""
+    struct PartialStruct
+        typ
+        undefs::Vector{Union{Nothing,Bool}} # represents whether a given field may be undefined
+        fields::Vector{Any} # i-th element describes the lattice element for the i-th defined field
+    end
+
+This extended lattice element is introduced when we have information about an object's
+fields beyond what can be obtained from the object type. E.g. it represents a tuple where
+some elements are known to be constants or a struct whose `Any`-typed field is initialized
+with value whose type is concrete.
+
+- `typ` indicates the type of the object
+- `undefs` records defined-ness of each field
+- `fields` holds the lattice elements corresponding to each field of the object
+
+`fields` corresponds to the fields that `typ` can have.
+If `typ` is a struct that can have `n` fields, then `length(fields) == n`.
+A special case: if `typ` is a variable-length `Tuple`,
+then `length(fields) == datatype_min_ninitialized(typ) + 1`.
+The last element represents the `Vararg` element.
+
+`undefs` is a `Vector{Union{Nothing,Bool}}` with the same length as `fields`, encoding
+the following information about field defined-ness:
+- `undefs[i] === nothing` indicates the corresponding element in `fields` may be undefined
+- `undefs[i] === false` indicates the corresponding element in `fields` is guaranteed to be defined
+- `undefs[i] === true` indicates the corresponding element in `fields` is guaranteed to be undefined
+If `field[i]` is of type `Union{}`, it means the `i`-th field is never be initialized and
+thus never be defined. In this case, `undefs[i]` should always be `true`.
+
+The same applies if `typ` is a `Tuple`, and because of how `Tuple` elements are initialized,
+`undefs[i] === false` holds except that `undefs[end]` may be `nothing` when the last element
+is `Vararg`.
+"""
+Core.PartialStruct
+
+function Core.PartialStruct(typ::Type, undefs::Vector{Union{Nothing,Bool}}, fields::Vector{Any})
+    fldcnt = fieldcount_noerror(typ)
+    if fldcnt !== nothing
+        @assert fldcnt == length(fields)
+    else
+        @assert typ <: Tuple && isvarargtype(fields[end])
+        @assert datatype_min_ninitialized(typ) == length(fields) - 1
+        @assert undefs[end] === nothing
+    end
+    @assert length(fields) == length(undefs)
+    for i = 1:length(fields)
+        if fields[i] === Union{}
+            @assert undefs[i] === true "`Union{}` typed field should be strictly undefined"
+        end
+    end
+    return Core._PartialStruct(typ, undefs, fields)
+end
+
+# Legacy constructor
+function Core.PartialStruct(@nospecialize(typ), fields::Vector{Any})
+    undefs = partialstruct_init_undefs(typ, fields)
+    undefs === nothing && error("This object never exists at runtime")
+    return Core.PartialStruct(typ, undefs, fields)
+end
+
+function partialstruct_init_undefs(@nospecialize(typ), fields::Vector{Any})
+    nf = length(fields)
+    minf = datatype_min_ninitialized(typ)
+    for i = 1:minf
+        if fields[i] === Union{}
+            return nothing # disallow runtime-invalid `PartialStruct`
+        end
+    end
+    undefs = Union{Nothing,Bool}[nothing for _ in 1:nf]
+    for i in 1:minf
+        undefs[i] = false
+    end
+    for i = minf+1:nf
+        if fields[i] === Union{}
+            undefs[i] = true
+        end
+    end
+    return undefs
+end
+
+a::PartialStruct == b::PartialStruct = a.typ === b.typ && a.undefs == b.undefs && a.fields == b.fields
+
+function Base.getproperty(pstruct::Core.PartialStruct, name::Symbol)
+    name === :undefs && return getfield(pstruct, :undefs)::Vector{Union{Nothing,Bool}}
+    return getfield(pstruct, name)
+end
+
+"""
+    struct InterConditional
+        slot::Int
+        thentype
+        elsetype
+    end
+
+Similar to `Conditional`, but conveys inter-procedural constraints imposed on call arguments.
+This is separate from `Conditional` to catch logic errors: the lattice element name is `InterConditional`
+while processing a call, then `Conditional` everywhere else.
+"""
+Core.InterConditional
+
+Core.InterConditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
+    InterConditional(slot_id(var), thentype, elsetype)
diff --git a/base/cpuid.jl b/base/cpuid.jl
index 48930d8064ba9..0370bd33b83e5 100644
--- a/base/cpuid.jl
+++ b/base/cpuid.jl
@@ -21,7 +21,7 @@ Base.:<=(a::ISA, b::ISA) = a.features <= b.features
 Base.:<(a::ISA,  b::ISA) = a.features <  b.features
 Base.isless(a::ISA,  b::ISA) = a < b
 
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "features_h.jl"))  # include($BUILDROOT/base/features_h.jl)
+include(string(Base.BUILDROOT, "features_h.jl"))  # include($BUILDROOT/base/features_h.jl)
 
 # Keep in sync with `arch_march_isa_mapping`.
 const ISAs_by_family = Dict(
@@ -61,10 +61,17 @@ const ISAs_by_family = Dict(
         "a64fx" => ISA(Set((JL_AArch64_v8_2a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_sha2, JL_AArch64_ccpp, JL_AArch64_complxnum, JL_AArch64_fullfp16, JL_AArch64_sve))),
         "apple_m1" => ISA(Set((JL_AArch64_v8_5a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_aes, JL_AArch64_sha2, JL_AArch64_sha3, JL_AArch64_ccpp, JL_AArch64_complxnum, JL_AArch64_fp16fml, JL_AArch64_fullfp16, JL_AArch64_dotprod, JL_AArch64_rcpc, JL_AArch64_altnzcv))),
     ],
+    "riscv64" => [
+        "riscv64" => ISA(Set{UInt32}()),
+    ],
     "powerpc64le" => [
         # We have no way to test powerpc64le features yet, so we're only going to declare the lowest ISA:
         "power8" => ISA(Set{UInt32}()),
-    ]
+    ],
+    "riscv64" => [
+        # We have no way to test riscv64 features yet, so we're only going to declare the lowest ISA:
+        "riscv64" => ISA(Set{UInt32}()),
+    ],
 )
 
 # Test a CPU feature exists on the currently-running host
diff --git a/base/ctypes.jl b/base/ctypes.jl
index 26640ed82bef5..45f01b684902f 100644
--- a/base/ctypes.jl
+++ b/base/ctypes.jl
@@ -113,3 +113,7 @@ const Cfloat = Float32
 Equivalent to the native `double` c-type ([`Float64`](@ref)).
 """
 const Cdouble = Float64
+
+
+# we have no `Float16` alias, because C does not define a standard fp16 type. Julia follows
+# the _Float16 C ABI; if that becomes standard, we can add an appropriate alias here.
diff --git a/base/deepcopy.jl b/base/deepcopy.jl
index eae8974326d06..58c753705a61f 100644
--- a/base/deepcopy.jl
+++ b/base/deepcopy.jl
@@ -9,8 +9,11 @@
     deepcopy(x)
 
 Create a deep copy of `x`: everything is copied recursively, resulting in a fully
-independent object. For example, deep-copying an array produces a new array whose elements
-are deep copies of the original elements. Calling `deepcopy` on an object should generally
+independent object. For example, deep-copying an array creates deep copies of all
+the objects it contains and produces a new array with the consistent relationship
+structure (e.g., if the first two elements are the same object in the original array,
+the first two elements of the new array will also be the same `deepcopy`ed object).
+Calling `deepcopy` on an object should generally
 have the same effect as serializing and then deserializing it.
 
 While it isn't normally necessary, user-defined types can override the default `deepcopy`
@@ -20,6 +23,11 @@ where `T` is the type to be specialized for, and `dict` keeps track of objects c
 so far within the recursion. Within the definition, `deepcopy_internal` should be used
 in place of `deepcopy`, and the `dict` variable should be
 updated as appropriate before returning.
+
+!!! warning
+    It is better to avoid this function in favor of custom `copy` methods or use-case-specific
+    copying functions. `deepcopy` is slow and can easily copy too many objects, or generate an
+    object that violates invariants, since it does not respect abstraction boundaries.
 """
 function deepcopy(@nospecialize x)
     isbitstype(typeof(x)) && return x
@@ -34,7 +42,7 @@ deepcopy_internal(x::Module, stackdict::IdDict) = error("deepcopy of Modules not
 
 function deepcopy_internal(x::SimpleVector, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = Core.svec(Any[deepcopy_internal(x[i], stackdict) for i = 1:length(x)]...)
     stackdict[x] = y
@@ -43,7 +51,7 @@ end
 
 function deepcopy_internal(x::String, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = GC.@preserve x unsafe_string(pointer(x), sizeof(x))
     stackdict[x] = y
@@ -55,14 +63,16 @@ function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
     nf = nfields(x)
     if ismutable(x)
         if haskey(stackdict, x)
-            return stackdict[x]
+            return stackdict[x]::typeof(x)
         end
         y = ccall(:jl_new_struct_uninit, Any, (Any,), T)
         stackdict[x] = y
         for i in 1:nf
             if isdefined(x, i)
                 xi = getfield(x, i)
-                xi = deepcopy_internal(xi, stackdict)::typeof(xi)
+                if !isbits(xi)
+                    xi = deepcopy_internal(xi, stackdict)::typeof(xi)
+                end
                 ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), y, i-1, xi)
             end
         end
@@ -73,7 +83,9 @@ function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
         for i in 1:nf
             if isdefined(x, i)
                 xi = getfield(x, i)
-                xi = deepcopy_internal(xi, stackdict)::typeof(xi)
+                if !isbits(xi)
+                    xi = deepcopy_internal(xi, stackdict)::typeof(xi)
+                end
                 flds[i] = xi
             else
                 nf = i - 1 # rest of tail must be undefined values
@@ -85,30 +97,54 @@ function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
     return y::T
 end
 
-function deepcopy_internal(x::Array, stackdict::IdDict)
+function deepcopy_internal(x::Memory, stackdict::IdDict)
     if haskey(stackdict, x)
         return stackdict[x]::typeof(x)
     end
-    _deepcopy_array_t(x, eltype(x), stackdict)
+    _deepcopy_memory_t(x, eltype(x), stackdict)
 end
 
-function _deepcopy_array_t(@nospecialize(x::Array), T, stackdict::IdDict)
+function _deepcopy_memory_t(@nospecialize(x::Memory), T, stackdict::IdDict)
     if isbitstype(T)
         return (stackdict[x]=copy(x))
     end
-    dest = similar(x)
+    dest = typeof(x)(undef, length(x))
     stackdict[x] = dest
+    xr = memoryref(x)
+    dr = memoryref(dest)
     for i = 1:length(x)
-        if ccall(:jl_array_isassigned, Cint, (Any, Csize_t), x, i-1) != 0
-            xi = ccall(:jl_arrayref, Any, (Any, Csize_t), x, i-1)
+        xi = Core.memoryrefnew(xr, i, false)
+        if Core.memoryref_isassigned(xi, :not_atomic, false)
+            xi = Core.memoryrefget(xi, :not_atomic, false)
             if !isbits(xi)
                 xi = deepcopy_internal(xi, stackdict)::typeof(xi)
             end
-            ccall(:jl_arrayset, Cvoid, (Any, Any, Csize_t), dest, xi, i-1)
+            di = Core.memoryrefnew(dr, i, false)
+            Core.memoryrefset!(di, xi, :not_atomic, false)
         end
     end
     return dest
 end
+function deepcopy_internal(x::Array{T, N}, stackdict::IdDict) where {T, N}
+    if haskey(stackdict, x)
+        return stackdict[x]::typeof(x)
+    end
+    y = stackdict[x] = Array{T, N}(undef, ntuple(Returns(0), Val{N}()))
+    setfield!(y, :ref, deepcopy_internal(x.ref, stackdict))
+    setfield!(y, :size, x.size)
+    y
+end
+function deepcopy_internal(x::GenericMemoryRef, stackdict::IdDict)
+    if haskey(stackdict, x)
+        return stackdict[x]::typeof(x)
+    end
+    mem = getfield(x, :mem)
+    dest = memoryref(deepcopy_internal(mem, stackdict)::typeof(mem))
+    i = memoryrefoffset(x)
+    i == 1 || (dest = Core.memoryrefnew(dest, i, true))
+    return dest
+end
+
 
 function deepcopy_internal(x::Union{Dict,IdDict}, stackdict::IdDict)
     if haskey(stackdict, x)
@@ -129,7 +165,7 @@ end
 
 function deepcopy_internal(x::AbstractLock, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = typeof(x)()
     stackdict[x] = y
@@ -138,7 +174,7 @@ end
 
 function deepcopy_internal(x::GenericCondition, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = typeof(x)(deepcopy_internal(x.lock, stackdict))
     stackdict[x] = y
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 1b661716cc2d9..7045a52aba98e 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -1,5 +1,116 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# Internal changes mechanism.
+# Instructions for Julia Core Developers:
+# 1. When making a breaking change that is known to be depnedet upon by an
+#    important and closely coupled package, decide on a unique `change_name`
+#    for your PR and add it to the list below. In general, it is better to
+#    err on the side of caution and assign a `change_name` even if it is not
+#    clear that it is required. `change_name`s may also be assigned after the
+#    fact in a separate PR. (Note that this may cause packages to misbehave
+#    on versions in between the change and the assignment of the `change_name`,
+#    but this is often still better than the alternative of misbehaving on unknown
+#    versions).
+
+# Instructions for Release Managers:
+# 1. Upon tagging any release, clear the list of internal changes.
+# 2. Upon tagging an -alpha version
+#    a. On master, set __next_removal_version to v"1.(x+1)-alpha"
+#    b. On the release branch, set __next_removal_version to v"1.x" (no -alpha)
+# 3. Upong tagging a release candidate, clear the list of internal changes and
+#    set __next_removal_version to `nothing`.
+const __next_removal_version = v"1.12-alpha"
+const __internal_changes_list = (
+    :invertedlinetables,
+    :codeinforefactor,
+    :miuninferredrm,
+    :codeinfonargs, #54341
+    :ocnopartial,
+    :printcodeinfocalls,
+    :syntacticccall, #59165
+    # Add new change names above this line
+)
+
+if !isempty(__internal_changes_list)
+    if VERSION == __next_removal_version
+        error("You have tagged a new release without clearing the internal changes list.")
+    end
+elseif __next_removal_version === nothing
+    error("You have tagged a new release candidate without clearing the internal changes list.")
+end
+
+"""
+    __has_internal_change(version_or::VersionNumber, change_name::Symbol)
+
+Some Julia packages have known dependencies on Julia internals (e.g. for introspection of
+internal julia datastructures). To ease the co-development of such packages with julia,
+a `change_name` is assigned on a best-effort basis or when explicitly requested.
+This `change_name` can be used to probe whether or not the particular pre-release build of julia has
+a particular change. In particular this function tests change scheduled for `version_or`
+is present in our current julia build, either because our current version
+is greater than `version_or` or because we're running a pre-release build that
+includes the change.
+
+Using this mechanism is a superior alternative to commit-number based `VERSION`
+comparisons, which can be brittle during pre-release stages when there are multiple
+actively developed branches.
+
+The list of changes is cleared twice during the release process:
+1. With the release of the first alpha
+2. For the first release candidate
+
+No new `change_name`s will be added during release candidates or bugfix releases
+(so in particular on any released version, the list of changes will be empty and
+`__has_internal_change` will always be equivalent to a version comparison.
+
+# Example
+
+Julia version `v"1.12.0-DEV.173"` changed the internal representation of line number debug info.
+Several debugging packages have custom code to display this information and need to be changed
+accordingly. In previous practice, this would often be accomplished with something like the following
+```
+@static if VERSION > v"1.12.0-DEV.173"
+    # Code to handle new format
+else
+    # Code to handle old format
+end
+```
+
+However, because such checks cannot be introduced until a VERSION number is assigned
+(which also automatically pushes out the change to all nightly users), there was a builtin period
+of breakage. With `__has_internal_change`, this can instead be written as:
+
+```
+@static if __has_internal_change(v"1.12-alpha", :invertedlinenames)
+    # Code to handle new format
+else
+    # Code to handle old format
+end
+```
+
+To find out the correct version to use as the first argument, you may use
+`Base.__next_removal_version`, which is set to the next version number in which
+the list of changes will be cleared.
+
+The primary advantage of this approach is that it allows a new version of the
+package to be tagged and released *in advance* of the break on the nightly
+build, thus ensuring continuity of package operation for nightly users.
+
+!!! warning
+
+    This functionality is intended to help package developers which make use of
+    internal julia functionality. Doing so is explicitly discouraged unless absolutely
+    required and comes with the explicit understanding that the package will break.
+    In particular, this is not a generic feature-testing mechanism, but only a
+    simple, courtesy coordination mechanism for changes that are known (or found) to
+    be breaking a package depending on julia internals.
+"""
+function __has_internal_change(version_or::VersionNumber, change_name::Symbol)
+    VERSION > version_or && return true
+    change_name in __internal_changes_list
+end
+export __has_internal_change
+
 # Deprecated functions and objects
 #
 # Please add new deprecations at the bottom of the file.
@@ -10,9 +121,7 @@
 # and of exporting the function.
 #
 # For more complex cases, move the body of the deprecated method in this file,
-# and call depwarn() directly from inside it. The symbol depwarn() expects is
-# the name of the function, which is used to ensure that the deprecation warning
-# is only printed the first time for each call place.
+# and call depwarn() directly from inside it.
 
 """
     @deprecate old new [export_old=true]
@@ -22,6 +131,8 @@ with the specified signature in the process.
 
 To prevent `old` from being exported, set `export_old` to `false`.
 
+See also [`Base.depwarn()`](@ref).
+
 !!! compat "Julia 1.5"
     As of Julia 1.5, functions defined by `@deprecate` do not print warning when `julia`
     is run without the `--depwarn=yes` flag set, as the default value of `--depwarn` option
@@ -29,11 +140,11 @@ To prevent `old` from being exported, set `export_old` to `false`.
 
 # Examples
 ```jldoctest
-julia> @deprecate old(x) new(x)
-old (generic function with 1 method)
+julia> @deprecate old_export(x) new(x)
+old_export (generic function with 1 method)
 
-julia> @deprecate old(x) new(x) false
-old (generic function with 1 method)
+julia> @deprecate old_public(x) new(x) false
+old_public (generic function with 1 method)
 ```
 
 Calls to `@deprecate` without explicit type-annotations will define
@@ -101,7 +212,7 @@ macro deprecate(old, new, export_old=true)
             maybe_export,
             :($(esc(old)) = begin
                   $meta
-                  depwarn($"`$oldcall` is deprecated, use `$newcall` instead.", Core.Typeof($(esc(fnexpr))).name.mt.name)
+                  depwarn($"`$oldcall` is deprecated, use `$newcall` instead.", Core.Typeof($(esc(fnexpr))).name.singletonname)
                   $(esc(new))
               end))
     else
@@ -112,13 +223,41 @@ macro deprecate(old, new, export_old=true)
             export_old ? Expr(:export, esc(old)) : nothing,
             :(function $(esc(old))(args...; kwargs...)
                   $meta
-                  depwarn($"`$old` is deprecated, use `$new` instead.", Core.Typeof($(esc(old))).name.mt.name)
+                  depwarn($"`$old` is deprecated, use `$new` instead.", Core.Typeof($(esc(old))).name.singletonname)
                   $(esc(new))(args...; kwargs...)
               end))
     end
 end
 
-function depwarn(msg, funcsym; force::Bool=false)
+"""
+    Base.depwarn(msg::String, funcsym::Symbol; force=false)
+
+Print `msg` as a deprecation warning. The symbol `funcsym` should be the name
+of the calling function, which is used to ensure that the deprecation warning is
+only printed the first time for each call place. Set `force=true` to force the
+warning to always be shown, even if Julia was started with `--depwarn=no` (the
+default).
+
+See also [`@deprecate`](@ref).
+
+# Examples
+```julia
+function deprecated_func()
+    Base.depwarn("Don't use `deprecated_func()`!", :deprecated_func)
+
+    1 + 1
+end
+```
+"""
+@nospecializeinfer function depwarn(msg, funcsym; force::Bool=false)
+    @nospecialize
+    # N.B. With this use of `@invokelatest`, we're preventing the addition of backedges from
+    # callees, such as `convert`, to this user-facing method. This approach is designed to
+    # enhance the resilience of packages that utilize `depwarn` against invalidation.
+    return @invokelatest _depwarn(msg, funcsym, force)
+end
+@nospecializeinfer function _depwarn(msg, funcsym, force::Bool)
+    @nospecialize
     opts = JLOptions()
     if opts.depwarn == 2
         throw(ErrorException(msg))
@@ -215,6 +354,7 @@ end
 @deprecate one(i::CartesianIndex)                    oneunit(i)
 @deprecate one(I::Type{CartesianIndex{N}}) where {N} oneunit(I)
 
+import .MPFR: BigFloat
 @deprecate BigFloat(x, prec::Int)                               BigFloat(x; precision=prec)
 @deprecate BigFloat(x, prec::Int, rounding::RoundingMode)       BigFloat(x, rounding; precision=prec)
 @deprecate BigFloat(x::Real, prec::Int)                         BigFloat(x; precision=prec)
@@ -271,14 +411,10 @@ getindex(match::Core.MethodMatch, field::Int) =
 # these were internal functions, but some packages seem to be relying on them
 tuple_type_head(T::Type) = fieldtype(T, 1)
 tuple_type_cons(::Type, ::Type{Union{}}) = Union{}
-function tuple_type_cons(::Type{S}, ::Type{T}) where T<:Tuple where S
-    @_foldable_meta
+@assume_effects :foldable tuple_type_cons(::Type{S}, ::Type{T}) where T<:Tuple where S =
     Tuple{S, T.parameters...}
-end
-function parameter_upper_bound(t::UnionAll, idx)
-    @_foldable_meta
-    return rewrap_unionall((unwrap_unionall(t)::DataType).parameters[idx], t)
-end
+@assume_effects :foldable parameter_upper_bound(t::UnionAll, idx) =
+    rewrap_unionall((unwrap_unionall(t)::DataType).parameters[idx], t)
 
 # these were internal functions, but some packages seem to be relying on them
 @deprecate cat_shape(dims, shape::Tuple{}, shapes::Tuple...) cat_shape(dims, shapes) false
@@ -299,7 +435,8 @@ const All16{T,N} = Tuple{T,T,T,T,T,T,T,T,
 
 # the plan is to eventually overload getproperty to access entries of the dict
 @noinline function getproperty(x::Pairs, s::Symbol)
-    depwarn("use values(kwargs) and keys(kwargs) instead of kwargs.data and kwargs.itr", :getproperty, force=true)
+    s == :data && depwarn("use values(kwargs) instead of kwargs.data", :getproperty, force=true)
+    s == :itr && depwarn("use keys(kwargs) instead of kwargs.itr", :getproperty, force=true)
     return getfield(x, s)
 end
 
@@ -384,3 +521,65 @@ macro pure(ex)
 end
 
 # END 1.10 deprecations
+
+# BEGIN 1.11 deprecations
+
+# these were never a part of the public API and so they can be removed without deprecation
+# in a minor release but we're being nice and trying to avoid transient breakage.
+@deprecate permute!!(a, p::AbstractVector{<:Integer}) permute!(a, p) false
+@deprecate invpermute!!(a, p::AbstractVector{<:Integer}) invpermute!(a, p) false
+
+# END 1.11 deprecations
+
+# BEGIN 1.12 deprecations
+
+@deprecate isbindingresolved(m::Module, var::Symbol) true false
+
+"""
+    isbindingresolved(m::Module, s::Symbol) -> Bool
+
+Return whether the binding of a symbol in a module is resolved.
+
+See also: [`isexported`](@ref), [`ispublic`](@ref), [`isdeprecated`](@ref)
+
+```jldoctest
+julia> module Mod
+           foo() = 17
+       end
+Mod
+
+julia> Base.isbindingresolved(Mod, :foo)
+true
+```
+
+!!! warning
+    This function is deprecated. The concept of binding "resolvedness" was removed in Julia 1.12.
+    The function now always returns `true`.
+"""
+isbindingresolved
+
+# Some packages call this function
+function to_power_type(x::Number)
+    T = promote_type(typeof(x), typeof(x*x))
+    convert(T, x)
+end
+to_power_type(x) = oftype(x*x, x)
+
+# END 1.12 deprecations
+
+# BEGIN 1.13 deprecations
+
+@deprecate merge(combine::Callable, d::AbstractDict, others::AbstractDict...) mergewith(combine, d, others...)
+
+# end 1.13 deprecations
+
+# BEGIN 1.14 deprecations
+
+# Revise calls this
+function explicit_manifest_entry_path(args...)
+    spec = explicit_manifest_entry_load_spec(args...)
+    spec === nothing && return nothing
+    return spec.path
+end
+
+# END 1.14 deprecations
diff --git a/base/dict.jl b/base/dict.jl
index 8a78c1fa8da45..32562ec62e622 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -38,7 +38,7 @@ Given a single iterable argument, constructs a [`Dict`](@ref) whose key-value pa
 are taken from 2-tuples `(key,value)` generated by the argument.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> Dict([("A", 1), ("B", 2)])
 Dict{String, Int64} with 2 entries:
   "B" => 2
@@ -47,18 +47,26 @@ Dict{String, Int64} with 2 entries:
 
 Alternatively, a sequence of pair arguments may be passed.
 
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> Dict("A"=>1, "B"=>2)
 Dict{String, Int64} with 2 entries:
   "B" => 2
   "A" => 1
 ```
+
+!!! warning
+
+    Keys are allowed to be mutable, but if you do mutate stored
+    keys, the hash table may become internally inconsistent, in which case
+    the `Dict` will not work properly. [`IdDict`](@ref) can be an
+    alternative if you need to mutate keys.
+
 """
 mutable struct Dict{K,V} <: AbstractDict{K,V}
     # Metadata: empty => 0x00, removed => 0x7f, full => 0b1[7 most significant hash bits]
-    slots::Vector{UInt8}
-    keys::Array{K,1}
-    vals::Array{V,1}
+    slots::Memory{UInt8}
+    keys::Memory{K}
+    vals::Memory{V}
     ndel::Int
     count::Int
     age::UInt
@@ -66,14 +74,16 @@ mutable struct Dict{K,V} <: AbstractDict{K,V}
     maxprobe::Int
 
     function Dict{K,V}() where V where K
-        n = 16
-        new(zeros(UInt8,n), Vector{K}(undef, n), Vector{V}(undef, n), 0, 0, 0, n, 0)
+        n = 0
+        slots = Memory{UInt8}(undef,n)
+        fill!(slots, 0x0)
+        new(slots, Memory{K}(undef, n), Memory{V}(undef, n), 0, 0, 0, max(1, n), 0)
     end
     function Dict{K,V}(d::Dict{K,V}) where V where K
         new(copy(d.slots), copy(d.keys), copy(d.vals), d.ndel, d.count, d.age,
             d.idxfloor, d.maxprobe)
     end
-    function Dict{K, V}(slots, keys, vals, ndel, count, age, idxfloor, maxprobe) where {K, V}
+    function Dict{K, V}(slots::Memory{UInt8}, keys::Memory{K}, vals::Memory{V}, ndel::Int, count::Int, age::UInt, idxfloor::Int, maxprobe::Int) where {K, V}
         new(slots, keys, vals, ndel, count, age, idxfloor, maxprobe)
     end
 end
@@ -104,45 +114,7 @@ const AnyDict = Dict{Any,Any}
 Dict(ps::Pair{K,V}...) where {K,V} = Dict{K,V}(ps)
 Dict(ps::Pair...)                  = Dict(ps)
 
-function Dict(kv)
-    try
-        dict_with_eltype((K, V) -> Dict{K, V}, kv, eltype(kv))
-    catch
-        if !isiterable(typeof(kv)) || !all(x->isa(x,Union{Tuple,Pair}),kv)
-            throw(ArgumentError("Dict(kv): kv needs to be an iterator of tuples or pairs"))
-        else
-            rethrow()
-        end
-    end
-end
-
-function grow_to!(dest::AbstractDict{K, V}, itr) where V where K
-    y = iterate(itr)
-    y === nothing && return dest
-    ((k,v), st) = y
-    dest2 = empty(dest, typeof(k), typeof(v))
-    dest2[k] = v
-    grow_to!(dest2, itr, st)
-end
-
-# this is a special case due to (1) allowing both Pairs and Tuples as elements,
-# and (2) Pair being invariant. a bit annoying.
-function grow_to!(dest::AbstractDict{K,V}, itr, st) where V where K
-    y = iterate(itr, st)
-    while y !== nothing
-        (k,v), st = y
-        if isa(k,K) && isa(v,V)
-            dest[k] = v
-        else
-            new = empty(dest, promote_typejoin(K,typeof(k)), promote_typejoin(V,typeof(v)))
-            merge!(new, dest)
-            new[k] = v
-            return grow_to!(new, itr, st)
-        end
-        y = iterate(itr, st)
-    end
-    return dest
-end
+Dict(kv) = dict_with_eltype((K, V) -> Dict{K, V}, kv, eltype(kv))
 
 empty(a::AbstractDict, ::Type{K}, ::Type{V}) where {K, V} = Dict{K, V}()
 
@@ -152,9 +124,10 @@ _shorthash7(hsh::UInt) = (hsh >> (8sizeof(UInt)-7))%UInt8 | 0x80
 # hashindex (key, sz) - computes optimal position and shorthash7
 #     idx - optimal position in the hash table
 #     sh::UInt8 - short hash (7 highest hash bits)
-function hashindex(key, sz)
+function hashindex(key, sz::Integer)
+    sz = Int(sz)::Int
     hsh = hash(key)::UInt
-    idx = (((hsh % Int) & (sz-1)) + 1)::Int
+    idx = ((hsh % Int) & (sz-1)) + 1
     return idx, _shorthash7(hsh)
 end
 
@@ -171,17 +144,20 @@ end
     h.age += 1
     h.idxfloor = 1
     if h.count == 0
-        resize!(h.slots, newsz)
+        # TODO: tryresize
+        h.slots = Memory{UInt8}(undef, newsz)
         fill!(h.slots, 0x0)
-        resize!(h.keys, newsz)
-        resize!(h.vals, newsz)
+        h.keys = Memory{K}(undef, newsz)
+        h.vals = Memory{V}(undef, newsz)
         h.ndel = 0
+        h.maxprobe = 0
         return h
     end
 
-    slots = zeros(UInt8,newsz)
-    keys = Vector{K}(undef, newsz)
-    vals = Vector{V}(undef, newsz)
+    slots = Memory{UInt8}(undef, newsz)
+    fill!(slots, 0x0)
+    keys = Memory{K}(undef, newsz)
+    vals = Memory{V}(undef, newsz)
     age0 = h.age
     count = 0
     maxprobe = 0
@@ -190,7 +166,7 @@ end
         @inbounds if (olds[i] & 0x80) != 0
             k = oldk[i]
             v = oldv[i]
-            index, sh = hashindex(k, newsz)
+            index, _ = hashindex(k, newsz)
             index0 = index
             while slots[index] != 0
                 index = (index & (newsz-1)) + 1
@@ -215,13 +191,14 @@ end
     return h
 end
 
-function sizehint!(d::Dict{T}, newsz) where T
+function sizehint!(d::Dict{T}, newsz::Integer; shrink::Bool=true) where T
+    newsz = Int(newsz)::Int
     oldsz = length(d.slots)
     # limit new element count to max_values of the key type
     newsz = min(max(newsz, length(d)), max_values(T)::Int)
     # need at least 1.5n space to hold n elements
     newsz = _tablesz(cld(3 * newsz, 2))
-    return newsz == oldsz ? d : rehash!(d, newsz)
+    return (shrink ? newsz == oldsz : newsz <= oldsz) ? d : rehash!(d, newsz)
 end
 
 """
@@ -230,7 +207,7 @@ end
 Remove all elements from a `collection`.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> A = Dict("a" => 1, "b" => 2)
 Dict{String, Int64} with 2 entries:
   "b" => 2
@@ -245,19 +222,20 @@ Dict{String, Int64}()
 function empty!(h::Dict{K,V}) where V where K
     fill!(h.slots, 0x0)
     sz = length(h.slots)
-    empty!(h.keys)
-    empty!(h.vals)
-    resize!(h.keys, sz)
-    resize!(h.vals, sz)
+    for i in 1:sz
+        _unsetindex!(h.keys, i)
+        _unsetindex!(h.vals, i)
+    end
     h.ndel = 0
     h.count = 0
+    h.maxprobe = 0
     h.age += 1
-    h.idxfloor = sz
+    h.idxfloor = max(1, sz)
     return h
 end
 
 # get the index where a key is stored, or -1 if not present
-@assume_effects :terminates_locally function ht_keyindex(h::Dict{K,V}, key) where V where K
+function ht_keyindex(h::Dict{K,V}, key) where V where K
     isempty(h) && return -1
     sz = length(h.keys)
     iter = 0
@@ -266,9 +244,9 @@ end
     index, sh = hashindex(key, sz)
     keys = h.keys
 
-    @inbounds while true
+    @assume_effects :terminates_locally :noub @inbounds while true
         isslotempty(h,index) && return -1
-        if h.slots[index] == sh
+        if sh == h.slots[index]
             k = keys[index]
             if (key ===  k || isequal(key, k))
                 return index
@@ -288,6 +266,11 @@ end
 # This version is for use by setindex! and get!
 function ht_keyindex2_shorthash!(h::Dict{K,V}, key) where V where K
     sz = length(h.keys)
+    if sz == 0 # if Dict was empty resize and then return location to insert
+        rehash!(h, 4)
+        index, sh = hashindex(key, length(h.keys))
+        return -index, sh
+    end
     iter = 0
     maxprobe = h.maxprobe
     index, sh = hashindex(key, sz)
@@ -353,7 +336,7 @@ ht_keyindex2!(h::Dict, key) = ht_keyindex2_shorthash!(h, key)[1]
     # Rehash now if necessary
     if (h.count + h.ndel)*3 > sz*2
         # > 2/3 full (including tombstones)
-        rehash!(h, h.count > 64000 ? h.count*2 : h.count*4)
+        rehash!(h, h.count > 64000 ? h.count*2 : max(h.count*4, 4))
     end
     nothing
 end
@@ -364,7 +347,7 @@ function setindex!(h::Dict{K,V}, v0, key0) where V where K
     else
         key = convert(K, key0)::K
         if !(isequal(key, key0)::Bool)
-            throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+            throw(KeyTypeError(K, key0))
         end
     end
     setindex!(h, v0, key)
@@ -408,7 +391,7 @@ Return the value stored for the given key, or if no mapping for the key is prese
 `key => default`, and return `default`.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> d = Dict("a"=>1, "b"=>2, "c"=>3);
 
 julia> get!(d, "a", 5)
@@ -462,7 +445,7 @@ function get!(default::Callable, h::Dict{K,V}, key0) where V where K
     else
         key = convert(K, key0)::K
         if !isequal(key, key0)
-            throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+            throw(KeyTypeError(K, key0))
         end
     end
     return get!(default, h, key)
@@ -493,7 +476,7 @@ end
 
 function getindex(h::Dict{K,V}, key) where V where K
     index = ht_keyindex(h, key)
-    @inbounds return (index < 0) ? throw(KeyError(key)) : h.vals[index]::V
+    return index < 0 ? throw(KeyError(key)) : @assume_effects :noub @inbounds h.vals[index]::V
 end
 
 """
@@ -546,12 +529,12 @@ function get(default::Callable, h::Dict{K,V}, key) where V where K
 end
 
 """
-    haskey(collection, key) -> Bool
+    haskey(collection, key)::Bool
 
 Determine whether a collection has a mapping for a given `key`.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> D = Dict('a'=>2, 'b'=>3)
 Dict{Char, Int64} with 2 entries:
   'a' => 2
@@ -573,7 +556,7 @@ in(key, v::KeySet{<:Any, <:Dict}) = (ht_keyindex(v.dict, key) >= 0)
 Return the key matching argument `key` if one exists in `collection`, otherwise return `default`.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> D = Dict('a'=>2, 'b'=>3)
 Dict{Char, Int64} with 2 entries:
   'a' => 2
@@ -673,7 +656,7 @@ end
 Delete the mapping for the given key in a collection, if any, and return the collection.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> d = Dict("a"=>1, "b"=>2)
 Dict{String, Int64} with 2 entries:
   "b" => 2
@@ -715,15 +698,17 @@ function skip_deleted_floor!(h::Dict)
     idx
 end
 
-@propagate_inbounds _iterate(t::Dict{K,V}, i) where {K,V} = i == 0 ? nothing : (Pair{K,V}(t.keys[i],t.vals[i]), i == typemax(Int) ? 0 : i+1)
+@propagate_inbounds _iterate_dict(t::Dict{K,V}, i) where {K,V} = i == 0 ? nothing : (Pair{K,V}(t.keys[i],t.vals[i]), i == typemax(Int) ? 0 : i+1)
 @propagate_inbounds function iterate(t::Dict)
-    _iterate(t, skip_deleted(t, t.idxfloor))
+    _iterate_dict(t, skip_deleted(t, t.idxfloor))
 end
-@propagate_inbounds iterate(t::Dict, i) = _iterate(t, skip_deleted(t, i))
+@propagate_inbounds iterate(t::Dict, i) = _iterate_dict(t, skip_deleted(t, i))
 
 isempty(t::Dict) = (t.count == 0)
 length(t::Dict) = t.count
 
+@propagate_inbounds Iterators.only(t::Dict) = Iterators._only(t, first)
+
 @propagate_inbounds function Base.iterate(v::T, i::Int = v.dict.idxfloor) where T <: Union{KeySet{<:Any, <:Dict}, ValueIterator{<:Dict}}
     i == 0 && return nothing
     i = skip_deleted(v.dict, i)
@@ -761,7 +746,7 @@ function map!(f, iter::ValueIterator{<:Dict})
 end
 
 function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
-    haslength(d2) && sizehint!(d1, length(d1) + length(d2))
+    haslength(d2) && sizehint!(d1, length(d1) + length(d2), shrink=false)
     for (k, v) in d2
         i, sh = ht_keyindex2_shorthash!(d1, k)
         if i > 0
@@ -770,7 +755,7 @@ function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
             if !(k isa K)
                 k1 = convert(K, k)::K
                 if !isequal(k, k1)
-                    throw(ArgumentError("$(limitrepr(k)) is not a valid key for type $K"))
+                    throw(KeyTypeError(K, k))
                 end
                 k = k1
             end
@@ -869,3 +854,180 @@ empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}()
 _similar_for(c::AbstractDict, ::Type{Pair{K,V}}, itr, isz, len) where {K, V} = empty(c, K, V)
 _similar_for(c::AbstractDict, ::Type{T}, itr, isz, len) where {T} =
     throw(ArgumentError("for AbstractDicts, similar requires an element type of Pair;\n  if calling map, consider a comprehension instead"))
+
+
+include("hamt.jl")
+using .HashArrayMappedTries
+using Core.OptimizedGenerics: KeyValue
+const HAMT = HashArrayMappedTries
+
+struct PersistentDict{K,V} <: AbstractDict{K,V}
+    trie::HAMT.HAMT{K,V}
+    # Serves as a marker for an empty initialization
+    @noinline function KeyValue.set(::Type{PersistentDict{K, V}}) where {K, V}
+        new{K, V}(HAMT.HAMT{K,V}())
+    end
+    @noinline function KeyValue.set(::Type{PersistentDict{K, V}}, ::Nothing, key, val) where {K, V}
+        new{K, V}(HAMT.HAMT{K, V}(key => val))
+    end
+    @noinline Base.@assume_effects :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key, val) where {K, V} = @inline _keyvalueset(dict, key, val)
+    @noinline Base.@assume_effects :nothrow :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key::K, val::V) where {K, V} = @inline _keyvalueset(dict, key, val)
+    global function _keyvalueset(dict::PersistentDict{K, V}, key, val) where {K, V}
+        trie = dict.trie
+        h = HAMT.HashState(key)
+        found, present, trie, i, bi, top, hs = HAMT.path(trie, key, h, #=persistent=#true)
+        HAMT.insert!(found, present, trie, i, bi, hs, val)
+        return new{K, V}(top)
+    end
+    @noinline Base.@assume_effects :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key) where {K, V} = @inline _keyvalueset(dict, key)
+    @noinline Base.@assume_effects :nothrow :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key::K) where {K, V} = @inline _keyvalueset(dict, key)
+    global function _keyvalueset(dict::PersistentDict{K, V}, key) where {K, V}
+        trie = dict.trie
+        h = HAMT.HashState(key)
+        found, present, trie, i, bi, top, _ = HAMT.path(trie, key, h, #=persistent=#true)
+        if found && present
+            deleteat!(trie.data, i)
+            HAMT.unset!(trie, bi)
+        end
+        return new{K, V}(top)
+    end
+end
+
+"""
+    PersistentDict
+
+`PersistentDict` is a dictionary implemented as a hash array mapped trie,
+which is optimal for situations where you need persistence, each operation
+returns a new dictionary separate from the previous one, but the underlying
+implementation is space-efficient and may share storage across multiple
+separate dictionaries.
+
+!!! note
+    It behaves like an IdDict.
+
+```julia
+PersistentDict(KV::Pair)
+```
+
+# Examples
+
+```jldoctest
+julia> dict = Base.PersistentDict(:a=>1)
+Base.PersistentDict{Symbol, Int64} with 1 entry:
+  :a => 1
+
+julia> dict2 = Base.delete(dict, :a)
+Base.PersistentDict{Symbol, Int64}()
+
+julia> dict3 = Base.PersistentDict(dict, :a=>2)
+Base.PersistentDict{Symbol, Int64} with 1 entry:
+  :a => 2
+```
+"""
+PersistentDict
+
+PersistentDict{K,V}() where {K, V} = KeyValue.set(PersistentDict{K,V})
+function PersistentDict{K,V}(KV::Pair) where {K,V}
+    KeyValue.set(
+        PersistentDict{K, V},
+        nothing,
+        KV...)
+end
+function PersistentDict(KV::Pair{K,V}) where {K,V}
+    KeyValue.set(
+        PersistentDict{K, V},
+        nothing,
+        KV...)
+end
+PersistentDict(dict::PersistentDict, pair::Pair) = PersistentDict(dict, pair...)
+PersistentDict{K,V}(dict::PersistentDict{K,V}, pair::Pair) where {K,V} = PersistentDict(dict, pair...)
+
+
+function PersistentDict(dict::PersistentDict{K,V}, key, val) where {K,V}
+    key = convert(K, key)
+    val = convert(V, val)
+    return KeyValue.set(dict, key, val)
+end
+
+function PersistentDict{K,V}(KV::Pair, rest::Pair...) where {K,V}
+    dict = PersistentDict{K,V}(KV)
+    for (key, value) in rest
+        dict = PersistentDict(dict, key, value)
+    end
+    return dict
+end
+
+function PersistentDict(kv::Pair, rest::Pair...)
+    dict = PersistentDict(kv)
+    for (key, value) in rest
+        dict = PersistentDict(dict, key, value)
+    end
+    return dict
+end
+
+eltype(::PersistentDict{K,V}) where {K,V} = Pair{K,V}
+
+function in(key_val::Pair{K,V}, dict::PersistentDict{K,V}, valcmp=(==)) where {K,V}
+    key, val = key_val
+    found = KeyValue.get(dict, key)
+    found === nothing && return false
+    return valcmp(val, only(found))
+end
+
+function haskey(dict::PersistentDict{K}, key::K) where K
+    return KeyValue.get(dict, key) !== nothing
+end
+
+function getindex(dict::PersistentDict{K,V}, key::K) where {K,V}
+    found = KeyValue.get(dict, key)
+    found === nothing && throw(KeyError(key))
+    return only(found)
+end
+
+function get(dict::PersistentDict{K,V}, key::K, default) where {K,V}
+    found = KeyValue.get(dict, key)
+    found === nothing && return default
+    return only(found)
+end
+
+@noinline function KeyValue.get(dict::PersistentDict{K, V}, key) where {K, V}
+    trie = dict.trie
+    if HAMT.islevel_empty(trie)
+        return nothing
+    end
+    h = HAMT.HashState(key)
+    found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
+    if found && present
+        leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
+        return (leaf.val,)
+    end
+    return nothing
+end
+
+@noinline function KeyValue.get(default, dict::PersistentDict, key)
+    found = KeyValue.get(dict, key)
+    found === nothing && return default()
+    return only(found)
+end
+
+function get(default::Callable, dict::PersistentDict{K,V}, key::K) where {K,V}
+    found = KeyValue.get(dict, key)
+    found === nothing && return default()
+    return only(found)
+end
+
+function delete(dict::PersistentDict{K}, key::K) where K
+    return KeyValue.set(dict, key)
+end
+
+iterate(dict::PersistentDict, state=nothing) = HAMT.iterate(dict.trie, state)
+
+length(dict::PersistentDict) = HAMT.length(dict.trie)
+isempty(dict::PersistentDict) = HAMT.isempty(dict.trie)
+empty(::PersistentDict, ::Type{K}, ::Type{V}) where {K, V} = PersistentDict{K, V}()
+
+@propagate_inbounds Iterators.only(dict::PersistentDict) = Iterators._only(dict, first)
diff --git a/base/div.jl b/base/div.jl
index 9c2187e662ee9..71e467332818f 100644
--- a/base/div.jl
+++ b/base/div.jl
@@ -22,6 +22,8 @@ See also [`fld`](@ref) and [`cld`](@ref), which are special cases of this functi
 
 # Examples:
 ```jldoctest
+julia> div(4, 3, RoundToZero) # Matches div(4, 3)
+1
 julia> div(4, 3, RoundDown) # Matches fld(4, 3)
 1
 julia> div(4, 3, RoundUp) # Matches cld(4, 3)
@@ -41,6 +43,21 @@ julia> div(4, 3, RoundFromZero)
 julia> div(-4, 3, RoundFromZero)
 -2
 ```
+Because `div(x, y)` implements strictly correct truncated rounding based on the true
+value of floating-point numbers, unintuitive situations can arise. For example:
+```jldoctest
+julia> div(6.0, 0.1)
+59.0
+julia> 6.0 / 0.1
+60.0
+julia> 6.0 / big(0.1)
+59.99999999999999666933092612453056361837965690217069245739573412231113406246995
+```
+What is happening here is that the true value of the floating-point number written
+as `0.1` is slightly larger than the numerical value 1/10 while `6.0` represents
+the number 6 precisely. Therefore the true value of `6.0 / 0.1` is slightly less
+than 60. When doing division, this is rounded to precisely `60.0`, but
+`div(6.0, 0.1, RoundToZero)` always truncates the true value, so the result is `59.0`.
 """
 div(x, y, r::RoundingMode)
 
@@ -305,7 +322,7 @@ end
 
 # For bootstrapping purposes, we define div for integers directly. Provide the
 # generic signature also
-div(a::T, b::T, ::typeof(RoundToZero)) where {T<:Union{BitSigned, BitUnsigned64}} = div(a, b)
+div(a::T, b::T, ::typeof(RoundToZero)) where {T<:Union{BitSigned, BitUnsigned}} = div(a, b)
 div(a::Bool, b::Bool, r::RoundingMode) = div(a, b)
 # Prevent ambiguities
 for rm in (RoundUp, RoundDown, RoundToZero, RoundFromZero)
@@ -318,10 +335,6 @@ function div(x::Bool, y::Bool, rnd::Union{typeof(RoundNearest),
 end
 fld(a::T, b::T) where {T<:Union{Integer,AbstractFloat}} = div(a, b, RoundDown)
 cld(a::T, b::T) where {T<:Union{Integer,AbstractFloat}} = div(a, b, RoundUp)
-div(a::Int128, b::Int128, ::typeof(RoundToZero)) = div(a, b)
-div(a::UInt128, b::UInt128, ::typeof(RoundToZero)) = div(a, b)
-rem(a::Int128, b::Int128, ::typeof(RoundToZero)) = rem(a, b)
-rem(a::UInt128, b::UInt128, ::typeof(RoundToZero)) = rem(a, b)
 
 # These are kept for compatibility with external packages overriding fld / cld.
 # In 2.0, packages should extend div(a, b, r) instead, in which case, these can
@@ -368,3 +381,9 @@ end
 # NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
 # so it is used here as the basis of float div().
 div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y))
+
+# Vincent Lefèvre: "The Euclidean Division Implemented with a Floating-Point Division and a Floor"
+# https://inria.hal.science/inria-00070403
+# Theorem 1 implies that the following are exact if eps(x/y) <= 1
+div(x::Float32, y::Float32, r::RoundingMode) = Float32(round(Float64(x) / Float64(y), r))
+div(x::Float16, y::Float16, r::RoundingMode) = Float16(round(Float32(x) / Float32(y), r))
diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl
index e0733280e7c7d..5d1662551ea62 100644
--- a/base/docs/Docs.jl
+++ b/base/docs/Docs.jl
@@ -3,7 +3,7 @@
 """
     Docs
 
-The `Docs` module provides the `@doc` macro which can be used to set and retrieve
+The `Docs` module provides the [`@doc`](@ref) macro which can be used to set and retrieve
 documentation metadata for Julia objects.
 
 Please see the manual section on [documentation](@ref man-documentation) for more
@@ -19,8 +19,9 @@ module Docs
 Functions, methods and types can be documented by placing a string before the definition:
 
     \"\"\"
-    # The Foo Function
-    `foo(x)`: Foo the living hell out of `x`.
+        foo(x)
+
+    Return a fooified version of `x`.
     \"\"\"
     foo(x) = ...
 
@@ -33,8 +34,8 @@ The macro has special parsing so that the documented object may occur on the nex
 By default, documentation is written as Markdown, but any object can be used as
 the first argument.
 
-## Documenting objects after they are defined
-You can document an object after its definition by
+## Documenting objects separately from their definitions
+You can document an object before or after its definition with
 
     @doc "foo" function_to_doc
     @doc "bar" TypeToDoc
@@ -50,6 +51,10 @@ You can retrieve docs for functions, macros and other objects as follows:
     @doc @time
     @doc md""
 
+!!! compat "Julia 1.11"
+    In Julia 1.11 and newer, retrieving documentation with the `@doc` macro requires that
+    the `REPL` stdlib is loaded.
+
 ## Functions & Methods
 Placing documentation before a method definition (e.g. `function foo() ...` or `foo() = ...`)
 will cause that specific method to be documented, as opposed to the whole function. Method
@@ -60,12 +65,12 @@ function.
 
 include("bindings.jl")
 
-import .Base.Meta: quot, isexpr
+import .Base.Meta: quot, isexpr, unblock, unescape, uncurly
 import .Base: Callable, with_output_color
 using .Base: RefValue, mapany
 import ..CoreDocs: lazy_iterpolate
 
-export doc
+export doc, hasdoc, undocumented_names
 
 # Basic API / Storage
 
@@ -74,18 +79,23 @@ const META    = gensym(:meta)
 const METAType = IdDict{Any,Any}
 
 function meta(m::Module; autoinit::Bool=true)
-    if !isdefined(m, META) || getfield(m, META) === nothing
-        autoinit ? initmeta(m) : return nothing
+    if !invokelatest(isdefinedglobal, m, META)
+        return autoinit ? initmeta(m) : nothing
     end
-    return getfield(m, META)::METAType
+    # TODO: This `invokelatest` is not technically required, but because
+    # of the automatic constant backdating is currently required to avoid
+    # a warning.
+    return invokelatest(getglobal, m, META)::METAType
 end
 
 function initmeta(m::Module)
-    if !isdefined(m, META) || getfield(m, META) === nothing
-        Core.eval(m, :($META = $(METAType())))
+    if !invokelatest(isdefinedglobal, m, META)
+        val = METAType()
+        Core.eval(m, :(const $META = $val))
         push!(modules, m)
+        return val
     end
-    nothing
+    return invokelatest(getglobal, m, META)
 end
 
 function signature!(tv::Vector{Any}, expr::Expr)
@@ -194,7 +204,7 @@ docexpr(__source__, __module__, args...) = Expr(:call, docstr, args...)
 Stores a collection of docstrings for related objects, ie. a `Function`/`DataType` and
 associated `Method` objects.
 
-Each documented object in a `MultiDoc` is referred to by it's signature which is represented
+Each documented object in a `MultiDoc` is referred to by its signature which is represented
 by a `Union` of `Tuple` types. For example, the following `Method` definition
 
     f(x, y) = ...
@@ -243,7 +253,7 @@ function doc!(__module__::Module, b::Binding, str::DocStr, @nospecialize sig = U
             @warn "Replacing docs for `$b :: $sig` in module `$(__module__)`"
     else
         # The ordering of docstrings for each Binding is defined by the order in which they
-        # are initially added. Replacing a specific docstring does not change it's ordering.
+        # are initially added. Replacing a specific docstring does not change its ordering.
         push!(m.order, sig)
     end
     m.docs[sig] = str
@@ -285,39 +295,31 @@ catdoc(xs...) = vcat(xs...)
 
 const keywords = Dict{Symbol, DocStr}()
 
-function unblock(@nospecialize ex)
-    while isexpr(ex, :var"hygienic-scope")
-        isexpr(ex.args[1], :escape) || break
-        ex = ex.args[1].args[1]
-    end
-    isexpr(ex, :block) || return ex
-    exs = filter(ex -> !(isa(ex, LineNumberNode) || isexpr(ex, :line)), ex.args)
-    length(exs) == 1 || return ex
-    return unblock(exs[1])
-end
-
-# peek through ex to figure out what kind of expression it may eventually act like
-# but ignoring scopes and line numbers
-function unescape(@nospecialize ex)
-    ex = unblock(ex)
-    while isexpr(ex, :escape) || isexpr(ex, :var"hygienic-scope")
-       ex = unblock(ex.args[1])
-    end
-    return ex
-end
-
-uncurly(@nospecialize ex) = isexpr(ex, :curly) ? ex.args[1] : ex
-
-namify(@nospecialize x) = astname(x, isexpr(x, :macro))::Union{Symbol,Expr,GlobalRef}
+namify(@nospecialize x) = astname(x, isexpr(x, :macro))
 
 function astname(x::Expr, ismacro::Bool)
     head = x.head
     if head === :.
         ismacro ? macroname(x) : x
-    elseif head === :call && isexpr(x.args[1], :(::))
-        return astname((x.args[1]::Expr).args[end], ismacro)
+elseif head === :call && length(x.args) >= 1 && isexpr(x.args[1], :(::))
+        # for documenting (x::y)(args...), extract the name from y
+        # otherwise, for documenting `x::y`, it will be extracted from x
+        astname((x.args[1]::Expr).args[end], ismacro)
     else
-        n = isexpr(x, (:module, :struct)) ? 2 : 1
+        n = if isexpr(x, :module)
+            isa(x.args[1], Bool) ? 2 : 3
+        elseif isexpr(x, :struct)
+            2
+        elseif isexpr(x, (:call, :macrocall, :function, :(=), :macro, :where, :curly,
+                          :(::), :(<:), :(>:), :local, :global, :const, :atomic,
+                          :copyast, :quote, :inert, :primitive, :abstract,
+                          :escape, :var"hygienic-scope"))
+            # similar to is_function_def, but without -> and with various assignments, quoted statements, and miscellaneous that might be encountered in struct definitions also
+            1
+        else
+            return x # nothing to see here--bindingexpr will convert this to an error if defining a doc
+        end
+        length(x.args) < n && return x
         astname(x.args[n], ismacro)
     end
 end
@@ -369,7 +371,7 @@ function metadata(__source__, __module__, expr, ismodule)
             if isa(eachex, Symbol) || isexpr(eachex, :(::))
                 # a field declaration
                 if last_docstr !== nothing
-                    push!(fields, P(namify(eachex::Union{Symbol,Expr}), last_docstr))
+                    push!(fields, P(namify(eachex), last_docstr))
                     last_docstr = nothing
                 end
             elseif isexpr(eachex, :function) || isexpr(eachex, :(=))
@@ -396,8 +398,26 @@ function objectdoc(__source__, __module__, str, def, expr, sig = :(Union{}))
     @nospecialize str def expr sig
     binding = esc(bindingexpr(namify(expr)))
     docstr  = esc(docexpr(__source__, __module__, lazy_iterpolate(str), metadata(__source__, __module__, expr, false)))
-    # Note: we want to avoid introducing line number nodes here (issue #24468)
-    return Expr(:block, esc(def), :($(doc!)($__module__, $binding, $docstr, $(esc(sig)))))
+    # Store the result of the definition and return it after documenting
+    docex = :($(doc!)($__module__, $binding, $docstr, $(esc(sig))))
+    if def === nothing
+        return Expr(:block, docex)
+    else
+        exdef = esc(def)
+        if isexpr(def, :global, 1) && def.args[1] isa Union{Symbol,GlobalRef}
+            # Special case: `global x` should return nothing to avoid syntax errors with assigning to a value
+            val = nothing
+        else
+            if isexpr(def, :(=), 2) && isexpr(def.args[1], :curly)
+                # workaround for lowering bug #60001
+                exdef = Expr(:block, exdef)
+            end
+            val = :val
+            exdef = Expr(:(=), val, exdef)
+        end
+        # Note: we want to avoid introducing line number nodes here (issue #24468) for def
+        return Expr(:block, exdef, docex, val)
+    end
 end
 
 function calldoc(__source__, __module__, str, def::Expr)
@@ -421,9 +441,10 @@ function moduledoc(__source__, __module__, meta, def, def′::Expr)
     if def === nothing
         esc(:(Core.eval($name, $(quot(docex)))))
     else
+        has_version = !isa(def.args[1], Bool)
         def = unblock(def)
-        block = def.args[3].args
-        if !def.args[1]
+        block = def.args[3 + has_version].args
+        if !def.args[1 + has_version]
             pushfirst!(block, :(import Base: @doc))
         end
         push!(block, docex)
@@ -431,7 +452,9 @@ function moduledoc(__source__, __module__, meta, def, def′::Expr)
     end
 end
 
-# Shares a single doc, `meta`, between several expressions from the tuple expression `ex`.
+# Shares a single doc, `meta`, between several expressions from the tuple expression `ex`
+# (but don't actually create the tuple for the result and just return the final one,
+# as if this was a C++ comma operator or a block separated by `;` instead of `,`).
 function multidoc(__source__, __module__, meta, ex::Expr, define::Bool)
     @nospecialize meta
     out = Expr(:block)
@@ -469,6 +492,52 @@ more than one expression is marked then the same docstring is applied to each ex
     end
 
 `@__doc__` has no effect when a macro that uses it is not documented.
+
+!!! compat "Julia 1.12"
+
+    This section documents a very subtle corner case that is only relevant to
+    macros which themselves both define other macros and then attempt to use them
+    within the same expansion. Such macros were impossible to write prior to
+    Julia 1.12 and are still quite rare. If you are not writing such a macro,
+    you may ignore this note.
+
+    In versions prior to Julia 1.12, macroexpansion would recursively expand through
+    `Expr(:toplevel)` blocks. This behavior was changed in Julia 1.12 to allow
+    macros to recursively define other macros and use them in the same returned
+    expression. However, to preserve backwards compatibility with existing uses of
+    `@__doc__`, the doc system will still expand through `Expr(:toplevel)` blocks
+    when looking for `@__doc__` markers. As a result, macro-defining-macros will
+    have an observable behavior difference when annotated with a docstring:
+
+    ```julia
+    julia> macro macroception()
+        Expr(:toplevel, :(macro foo() 1 end), :(@foo))
+    end
+
+    julia> @macroception
+    1
+
+    julia> "Docstring" @macroception
+    ERROR: LoadError: UndefVarError: `@foo` not defined in `Main`
+    ```
+
+    The supported workaround is to manually expand the `@__doc__` macro in the
+    defining macro, which the docsystem will recognize and suppress the recursive
+    expansion:
+
+    ```julia
+    julia> macro macroception()
+        Expr(:toplevel,
+            macroexpand(__module__, :(@__doc__ macro foo() 1 end); recursive=false),
+            :(@foo))
+    end
+
+    julia> @macroception
+    1
+
+    julia> "Docstring" @macroception
+    1
+    ```
 """
 :(Core.@__doc__)
 
@@ -476,17 +545,23 @@ function __doc__!(source, mod, meta, def, define::Bool)
     @nospecialize source mod meta def
     # Two cases must be handled here to avoid redefining all definitions contained in `def`:
     if define
-        # `def` has not been defined yet (this is the common case, i.e. when not generating
-        # the Base image). We just need to convert each `@__doc__` marker to an `@doc`.
-        finddoc(def) do each
+        function replace_meta_doc(each)
             each.head = :macrocall
             each.args = Any[Symbol("@doc"), source, mod, nothing, meta, each.args[end], define]
         end
+
+        # `def` has not been defined yet (this is the common case, i.e. when not generating
+        # the Base image). We just need to convert each `@__doc__` marker to an `@doc`.
+        found = finddoc(replace_meta_doc, mod, def; expand_toplevel = false)
+
+        if !found
+            found = finddoc(replace_meta_doc, mod, def; expand_toplevel = true)
+        end
     else
         # `def` has already been defined during Base image gen so we just need to find and
         # document any subexpressions marked with `@__doc__`.
         docs  = []
-        found = finddoc(def) do each
+        found = finddoc(mod, def; expand_toplevel = true) do each
             push!(docs, :(@doc($source, $mod, $meta, $(each.args[end]), $define)))
         end
         # If any subexpressions have been documented then replace the entire expression with
@@ -495,25 +570,30 @@ function __doc__!(source, mod, meta, def, define::Bool)
             def.head = :toplevel
             def.args = docs
         end
-        found
     end
+    return found
 end
 # Walk expression tree `def` and call `λ` when any `@__doc__` markers are found. Returns
 # `true` to signify that at least one `@__doc__` has been found, and `false` otherwise.
-function finddoc(λ, def::Expr)
+function finddoc(λ, mod::Module, def::Expr; expand_toplevel::Bool=false)
     if isexpr(def, :block, 2) && isexpr(def.args[1], :meta, 1) && (def.args[1]::Expr).args[1] === :doc
         # Found the macroexpansion of an `@__doc__` expression.
         λ(def)
         true
     else
+        if expand_toplevel && isexpr(def, :toplevel)
+            for i = 1:length(def.args)
+                def.args[i] = macroexpand(mod, def.args[i])
+            end
+        end
         found = false
         for each in def.args
-            found |= finddoc(λ, each)
+            found |= finddoc(λ, mod, each; expand_toplevel)
         end
         found
     end
 end
-finddoc(λ, @nospecialize def) = false
+finddoc(λ, mod::Module, @nospecialize def; expand_toplevel::Bool=false) = false
 
 # Predicates and helpers for `docm` expression selection:
 
@@ -528,14 +608,68 @@ isquotedmacrocall(@nospecialize x) =
 isbasicdoc(@nospecialize x) = isexpr(x, :.) || isa(x, Union{QuoteNode, Symbol})
 is_signature(@nospecialize x) = isexpr(x, :call) || (isexpr(x, :(::), 2) && isexpr(x.args[1], :call)) || isexpr(x, :where)
 
+function _doc(binding::Binding, sig::Type = Union{})
+    if defined(binding)
+        result = getdoc(resolve(binding), sig)
+        result === nothing || return result
+    end
+    # Lookup first match for `binding` and `sig` in all modules of the docsystem.
+    for mod in modules
+        dict = meta(mod; autoinit=false)
+        isnothing(dict) && continue
+        if haskey(dict, binding)
+            multidoc = dict[binding]
+            for msig in multidoc.order
+                sig <: msig && return multidoc.docs[msig]
+            end
+            # if no matching signatures, return first
+            if !isempty(multidoc.docs)
+                return first(values(multidoc.docs))
+            end
+        end
+    end
+    return nothing
+end
+
+# Some additional convenience `doc` methods that take objects rather than `Binding`s.
+_doc(obj::UnionAll) = _doc(Base.unwrap_unionall(obj))
+_doc(object, sig::Type = Union{}) = _doc(aliasof(object, typeof(object)), sig)
+_doc(object, sig...)              = _doc(object, Tuple{sig...})
+
+function simple_lookup_doc(ex)
+    if isa(ex, Expr) && ex.head !== :(.) && Base.isoperator(ex.head)
+        # handle syntactic operators, e.g. +=, ::, .=
+        ex = ex.head
+    end
+    if haskey(keywords, ex)
+        return keywords[ex]
+    elseif !isa(ex, Expr) && !isa(ex, Symbol)
+        return :($(_doc)($(typeof)($(esc(ex)))))
+    end
+    name = namify(ex)
+    # If namify couldn't extract a meaningful name and returned an Expr
+    # that can't be converted to a binding, treat it like a value
+    if isa(name, Expr) && !isexpr(name, :(.))
+        return :($(_doc)($(typeof)($(esc(ex)))))
+    end
+    binding = esc(bindingexpr(name))
+    if isexpr(ex, :call) || isexpr(ex, :macrocall) || isexpr(ex, :where)
+        sig = esc(signature(ex))
+        :($(_doc)($binding, $sig))
+    else
+        :($(_doc)($binding))
+    end
+end
+
 function docm(source::LineNumberNode, mod::Module, ex)
     @nospecialize ex
     if isexpr(ex, :->) && length(ex.args) > 1
         return docm(source, mod, ex.args...)
-    elseif isassigned(Base.REPL_MODULE_REF)
+    elseif (REPL = Base.REPL_MODULE_REF[]) !== Base
         # TODO: this is a shim to continue to allow `@doc` for looking up docstrings
-        REPL = Base.REPL_MODULE_REF[]
-        return REPL.lookup_doc(ex)
+        return invokelatest(REPL.lookup_doc, ex)
+    else
+        return simple_lookup_doc(ex)
     end
     return nothing
 end
@@ -546,13 +680,42 @@ docm(source::LineNumberNode, mod::Module, _, _, x...) = docm(source, mod, x...)
 # also part of a :where expression, so it unwraps the :where layers until it reaches the
 # "actual" expression
 iscallexpr(ex::Expr) = isexpr(ex, :where) ? iscallexpr(ex.args[1]) : isexpr(ex, :call)
-iscallexpr(ex) = false
+iscallexpr(@nospecialize ex) = false
 
 function docm(source::LineNumberNode, mod::Module, meta, ex, define::Bool = true)
     @nospecialize meta ex
     # Some documented expressions may be decorated with macro calls which obscure the actual
-    # expression. Expand the macro calls and remove extra blocks.
-    x = unblock(macroexpand(mod, ex))
+    # expression. Expand the macro calls.
+    x = macroexpand(mod, ex)
+    return _docm(source, mod, meta, x, define)
+end
+
+function _docm(source::LineNumberNode, mod::Module, meta, x, define::Bool = true)
+    if isexpr(x, :var"hygienic-scope")
+        x.args[1] = _docm(source, mod, meta, x.args[1])
+        return x
+    elseif isexpr(x, :escape)
+        x.args[1] = _docm(source, mod, meta, x.args[1])
+        return x
+    elseif isexpr(x, :block)
+        docarg = 0
+        for i = 1:length(x.args)
+            isa(x.args[i], LineNumberNode) && continue
+            if docarg == 0
+                docarg = i
+                continue
+            end
+            # More than one documentable expression in the block, treat it as a whole
+            # expression, which will fall through and look for (Expr(:meta, doc))
+            docarg = 0
+            break
+        end
+        if docarg != 0
+            x.args[docarg] = _docm(source, mod, meta, x.args[docarg], define)
+            return x
+        end
+    end
+
     # Don't try to redefine expressions. This is only needed for `Base` img gen since
     # otherwise calling `loaddocs` would redefine all documented functions and types.
     def = define ? x : nothing
@@ -582,7 +745,7 @@ function docm(source::LineNumberNode, mod::Module, meta, ex, define::Bool = true
     #   f(::T, ::U) where T where U
     #
     isexpr(x, FUNC_HEADS) && is_signature((x::Expr).args[1]) ? objectdoc(source, mod, meta, def, x::Expr, signature(x::Expr)) :
-    isexpr(x, [:function, :macro])  && !isexpr((x::Expr).args[1], :call) ? objectdoc(source, mod, meta, def, x::Expr) :
+    (isexpr(x, :function) || isexpr(x, :macro)) && !isexpr((x::Expr).args[1], :call) ? objectdoc(source, mod, meta, def, x::Expr) :
     iscallexpr(x) ? calldoc(source, mod, meta, x::Expr) :
 
     # Type definitions.
@@ -602,7 +765,7 @@ function docm(source::LineNumberNode, mod::Module, meta, ex, define::Bool = true
     isexpr(x, BINDING_HEADS) && !isexpr((x::Expr).args[1], :call) ? objectdoc(source, mod, meta, def, x::Expr) :
 
     # Quoted macrocall syntax. `:@time` / `:(Base.@time)`.
-    isquotedmacrocall(x) ? objectdoc(source, mod, meta, def, x) :
+    isquotedmacrocall(x) ? objectdoc(source, mod, meta, nothing, x) :
     # Modules and baremodules.
     isexpr(x, :module) ? moduledoc(source, mod, meta, def, x::Expr) :
     # Document several expressions with the same docstring. `a, b, c`.
@@ -617,7 +780,7 @@ function docm(source::LineNumberNode, mod::Module, meta, ex, define::Bool = true
     # All other expressions are undocumentable and should be handled on a case-by-case basis
     # with `@__doc__`. Unbound string literals are also undocumentable since they cannot be
     # retrieved from the module's metadata `IdDict` without a reference to the string.
-    docerror(ex)
+    docerror(x)
 
     return doc
 end
@@ -638,21 +801,91 @@ include("utils.jl")
 # Swap out the bootstrap macro with the real one.
 Core.atdoc!(docm)
 
-function loaddocs(docs::Vector{Core.SimpleVector})
-    for (mod, ex, str, file, line) in docs
+function loaddocs(docs::Base.CoreDocs.DocLinkedList)
+    while isdefined(docs, :doc)
+        (mod, ex, str, file, line) = docs.doc
         data = Dict{Symbol,Any}(:path => string(file), :linenumber => line)
         doc = docstr(str, data)
         lno = LineNumberNode(line, file)
         docstring = docm(lno, mod, doc, ex, false) # expand the real @doc macro now
         Core.eval(mod, Expr(:var"hygienic-scope", docstring, Docs, lno))
+        docs = docs.next
     end
-    empty!(docs)
     nothing
 end
 
+# FIXME: formatdoc, parsedoc, apropos, and doc are defined here (but only doc is exported)
+# for historical reasons (#25738), but are *implemented* in REPL/src/docview.jl, while
+# apropos is *exported* by InteractiveUtils and doc is exported by Docs.  Seems
+# like a more sensible refactoring should be possible.
+
 function formatdoc end
 function parsedoc end
+
+"""
+    apropos([io::IO=stdout], pattern::Union{AbstractString,Regex})
+
+Search available docstrings for entries containing `pattern`.
+
+When `pattern` is a string, case is ignored. Results are printed to `io`.
+
+`apropos` can be called from the help mode in the REPL by wrapping the query in double quotes:
+```
+help?> "pattern"
+```
+
+!!! compat "Julia 1.11"
+    In Julia 1.11 and newer, `apropos` requires that the `REPL` stdlib is loaded.
+"""
 function apropos end
+
+"""
+    Docs.doc(binding, sig)
+
+Return all documentation that matches both `binding` and `sig`.
+
+If `getdoc` returns a non-`nothing` result on the value of the binding, then a
+dynamic docstring is returned instead of one based on the binding itself.
+
+!!! compat "Julia 1.11"
+    In Julia 1.11 and newer, `Docs.doc` requires that the `REPL` stdlib is loaded.
+"""
 function doc end
 
+"""
+    Docs.hasdoc(mod::Module, sym::Symbol)::Bool
+
+Return `true` if `sym` in `mod` has a docstring and `false` otherwise.
+"""
+hasdoc(mod::Module, sym::Symbol) = hasdoc(Docs.Binding(mod, sym))
+function hasdoc(binding::Docs.Binding, sig::Type = Union{})
+    # this function is based on the Base.Docs.doc method implemented
+    # in REPL/src/docview.jl.  TODO: refactor and unify these methods.
+    defined(binding) && !isnothing(getdoc(resolve(binding), sig)) && return true
+    for mod in modules
+        dict = meta(mod; autoinit=false)
+        !isnothing(dict) && haskey(dict, binding) && return true
+    end
+    alias = aliasof(binding)
+    return alias == binding ? false : hasdoc(alias, sig)
+end
+
+
+"""
+    undocumented_names(mod::Module; private=false)
+
+Return a sorted vector of undocumented symbols in `module` (that is, lacking docstrings).
+`private=false` (the default) returns only identifiers declared with `public` and/or
+`export`, whereas `private=true` returns all symbols in the module (excluding
+compiler-generated hidden symbols starting with `#`).
+
+See also: [`names`](@ref), [`Docs.hasdoc`](@ref), [`Base.ispublic`](@ref).
+"""
+function undocumented_names(mod::Module; private::Bool=false)
+    filter!(names(mod; all=true)) do sym
+        !hasdoc(mod, sym) && !startswith(string(sym), '#') &&
+            (private || Base.ispublic(mod, sym))
+    end
+end
+
 end
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index fd8c35a5fdf76..99316e51cf260 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -1,4 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+#
 
 module BaseDocs
 
@@ -36,6 +37,14 @@ kw"help", kw"Julia", kw"julia", kw""
 available for direct use. Names can also be used via dot syntax (e.g. `Foo.foo` to access
 the name `foo`), whether they are `export`ed or not.
 See the [manual section about modules](@ref modules) for details.
+
+!!! note
+    When two or more packages/modules export a name and that name does not refer to the
+    same thing in each of the packages, and the packages are loaded via `using` without
+    an explicit list of names, it is an error to reference that name without qualification.
+    It is thus recommended that code intended to be forward-compatible with future versions
+    of its dependencies and of Julia, e.g., code in released packages, list the names it
+    uses from each loaded package, e.g., `using Foo: Foo, f` rather than `using Foo`.
 """
 kw"using"
 
@@ -52,13 +61,30 @@ kw"import"
 """
     export
 
-`export` is used within modules to tell Julia which functions should be
+`export` is used within modules to tell Julia which names should be
 made available to the user. For example: `export foo` makes the name
 `foo` available when [`using`](@ref) the module.
 See the [manual section about modules](@ref modules) for details.
 """
 kw"export"
 
+"""
+    public
+
+`public` is used within modules to tell Julia which names are part of the
+public API of the module. For example: `public foo` indicates that the name
+`foo` is public, without making it available when [`using`](@ref) the module.
+
+As [`export`](@ref) already indicates that a name is public, it is
+unnecessary and an error to declare a name both as `public` and as `export`ed.
+See the [manual section about modules](@ref modules) for details.
+
+!!! compat "Julia 1.11"
+    The public keyword was added in Julia 1.11. Prior to this the notion
+    of publicness was less explicit.
+"""
+kw"public"
+
 """
     as
 
@@ -101,7 +127,7 @@ kw"abstract type", kw"abstract"
 
 `module` declares a [`Module`](@ref), which is a separate global variable workspace. Within a
 module, you can control which names from other modules are visible (via importing), and
-specify which of your names are intended to be public (via exporting).
+specify which of your names are intended to be public (via `export` and `public`).
 Modules allow you to create top-level definitions without worrying about name conflicts
 when your code is used together with somebody else’s.
 See the [manual section about modules](@ref modules) for more details.
@@ -135,6 +161,8 @@ runtime initialization functions of external C libraries and initializing global
 that involve pointers returned by external libraries.
 See the [manual section about modules](@ref modules) for more details.
 
+See also: [`OncePerProcess`](@ref).
+
 # Examples
 ```julia
 const foo_data_ptr = Ref{Ptr{Cvoid}}(0)
@@ -151,7 +179,7 @@ kw"__init__"
     baremodule
 
 `baremodule` declares a module that does not contain `using Base` or local definitions of
-[`eval`](@ref Base.MainInclude.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
+[`eval`](@ref Main.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
 
 ```julia
 module Mod
@@ -203,7 +231,7 @@ kw"primitive type"
 A macro maps a sequence of argument expressions to a returned expression, and the
 resulting expression is substituted directly into the program at the point where
 the macro is invoked.
-Macros are a way to run generated code without calling [`eval`](@ref Base.MainInclude.eval),
+Macros are a way to run generated code without calling [`eval`](@ref Main.eval),
 since the generated code instead simply becomes part of the surrounding program.
 Macro arguments may include expressions, literal values, and symbols. Macros can be defined for
 variable number of arguments (varargs), but do not accept keyword arguments.
@@ -378,11 +406,11 @@ Assigning `a` to `b` does not create a copy of `b`; instead use [`copy`](@ref) o
 
 ```jldoctest
 julia> b = [1]; a = b; b[1] = 2; a
-1-element Array{Int64, 1}:
+1-element Vector{Int64}:
  2
 
 julia> b = [1]; a = copy(b); b[1] = 2; a
-1-element Array{Int64, 1}:
+1-element Vector{Int64}:
  1
 
 ```
@@ -392,7 +420,7 @@ julia> function f!(x); x[:] .+= 1; end
 f! (generic function with 1 method)
 
 julia> a = [1]; f!(a); a
-1-element Array{Int64, 1}:
+1-element Vector{Int64}:
  2
 
 ```
@@ -411,7 +439,7 @@ julia> a, b
 Assignment can operate on multiple variables in series, and will return the value of the right-hand-most expression:
 ```jldoctest
 julia> a = [1]; b = [2]; c = [3]; a = b = c
-1-element Array{Int64, 1}:
+1-element Vector{Int64}:
  3
 
 julia> b[1] = 2; a, b, c
@@ -421,11 +449,11 @@ julia> b[1] = 2; a, b, c
 Assignment at out-of-bounds indices does not grow a collection. If the collection is a [`Vector`](@ref) it can instead be grown with [`push!`](@ref) or [`append!`](@ref).
 ```jldoctest
 julia> a = [1, 1]; a[3] = 2
-ERROR: BoundsError: attempt to access 2-element Array{Int64, 1} at index [3]
+ERROR: BoundsError: attempt to access 2-element Vector{Int64} at index [3]
 [...]
 
 julia> push!(a, 2, 3)
-4-element Array{Int64, 1}:
+4-element Vector{Int64}:
  1
  1
  2
@@ -439,7 +467,7 @@ ERROR: DimensionMismatch: tried to assign 0 elements to 1 destinations
 [...]
 
 julia> filter!(x -> x > 1, a) # in-place & thus more efficient than a = a[a .> 1]
-2-element Array{Int64, 1}:
+2-element Vector{Int64}:
  2
  3
 
@@ -462,14 +490,14 @@ assignment expression is converted into a single loop.
 julia> A = zeros(4, 4); B = [1, 2, 3, 4];
 
 julia> A .= B
-4×4 Array{Float64, 2}:
+4×4 Matrix{Float64}:
  1.0  1.0  1.0  1.0
  2.0  2.0  2.0  2.0
  3.0  3.0  3.0  3.0
  4.0  4.0  4.0  4.0
 
 julia> A
-4×4 Array{Float64, 2}:
+4×4 Matrix{Float64}:
  1.0  1.0  1.0  1.0
  2.0  2.0  2.0  2.0
  3.0  3.0  3.0  3.0
@@ -645,8 +673,11 @@ kw"{", kw"{}", kw"}"
 """
     []
 
-Square braces are used for [indexing](@ref man-array-indexing), [indexed assignment](@ref man-indexed-assignment),
-[array literals](@ref man-array-literals), and [array comprehensions](@ref man-comprehensions).
+Square brackets are used for [indexing](@ref man-array-indexing) ([`getindex`](@ref)),
+[indexed assignment](@ref man-indexed-assignment) ([`setindex!`](@ref)),
+[array literals](@ref man-array-literals) ([`Base.vect`](@ref)),
+[array concatenation](@ref man-array-concatenation) ([`vcat`](@ref), [`hcat`](@ref), [`hvcat`](@ref), [`hvncat`](@ref)),
+and [array comprehensions](@ref man-comprehensions) ([`collect`](@ref)).
 """
 kw"[", kw"[]", kw"]"
 
@@ -916,11 +947,14 @@ expression, rather than the side effects that evaluating `b` or `c` may have.
 See the manual section on [control flow](@ref man-conditional-evaluation) for more details.
 
 # Examples
-```
+```jldoctest
 julia> x = 1; y = 2;
 
-julia> x > y ? println("x is larger") : println("y is larger")
-y is larger
+julia> x > y ? println("x is larger") : println("x is not larger")
+x is not larger
+
+julia> x > y ? "x is larger" : x == y ? "x and y are equal" : "y is larger"
+"y is larger"
 ```
 """
 kw"?", kw"?:"
@@ -984,12 +1018,12 @@ collection or the last index of a dimension of an array.
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
-2×2 Array{Int64, 2}:
+2×2 Matrix{Int64}:
  1  2
  3  4
 
 julia> A[end, :]
-2-element Array{Int64, 1}:
+2-element Vector{Int64}:
  3
  4
 ```
@@ -1029,15 +1063,58 @@ end
 The syntax `catch e` (where `e` is any variable) assigns the thrown
 exception object to the given variable within the `catch` block.
 
+```julia
+try
+    a_dangerous_operation()
+catch e
+    if isa(e, EOFError)
+        @warn "The operation failed - EOF."
+    elseif isa(e, OutOfMemoryError)
+        @warn "The operation failed - OOM."
+    else
+        rethrow() # ensure other exceptions can bubble up the call stack
+    end
+end
+```
+
 The power of the `try`/`catch` construct lies in the ability to unwind a deeply
 nested computation immediately to a much higher level in the stack of calling functions.
+
+A `try/catch` block can also have an `else` clause that executes only if no exception occurred:
+```julia
+try
+    a_dangerous_operation()
+catch
+    @warn "The operation failed."
+else
+    @info "The operation succeeded."
+end
+```
+
+A `try` or `try`/`catch` block can also have a [`finally`](@ref) clause that executes
+at the end, regardless of whether an exception occurred.  For example, this can be
+used to guarantee that an opened file is closed:
+```julia
+f = open("file")
+try
+    operate_on_file(f)
+catch
+    @warn "An error occurred!"
+finally
+    close(f)
+end
+```
+(`finally` can also be used without a `catch` block.)
+
+!!! compat "Julia 1.8"
+    Else clauses require at least Julia 1.8.
 """
 kw"try", kw"catch"
 
 """
     finally
 
-Run some code when a given block of code exits, regardless
+Run some code when a given `try` block of code exits, regardless
 of how it exits. For example, here is how we can guarantee that an opened file is
 closed:
 
@@ -1055,6 +1132,41 @@ normally), [`close(f)`](@ref) will be executed. If the `try` block exits due to
 the exception will continue propagating. A `catch` block may be combined with `try` and
 `finally` as well. In this case the `finally` block will run after `catch` has handled
 the error.
+
+When evaluating a `try/catch/else/finally` expression, the value of the entire
+expression is the value of the last block executed, excluding the `finally`
+block. For example:
+
+```jldoctest
+julia> try
+           1
+       finally
+           2
+       end
+1
+
+julia> try
+           error("")
+       catch
+           1
+       else
+           2
+       finally
+           3
+       end
+1
+
+julia> try
+           0
+       catch
+           1
+       else
+           2
+       finally
+           3
+       end
+2
+```
 """
 kw"finally"
 
@@ -1242,6 +1354,12 @@ kw";"
 
 Short-circuiting boolean AND.
 
+This is equivalent to `x ? y : false`: it returns `false` if `x` is `false` and the result of evaluating `y` if `x` is `true`.
+Note that if `y` is an expression, it is only evaluated when `x` is `true`, which is called "short-circuiting" behavior.
+
+Also, `y` does not need to have a boolean value.  This means that `(condition) && (statement)` can be used as shorthand for
+`if condition; statement; end` for an arbitrary `statement`.
+
 See also [`&`](@ref), the ternary operator `? :`, and the manual section on [control flow](@ref man-conditional-evaluation).
 
 # Examples
@@ -1253,6 +1371,9 @@ true
 
 julia> x < 0 && error("expected positive x")
 false
+
+julia> x > 0 && "not a boolean"
+"not a boolean"
 ```
 """
 kw"&&"
@@ -1262,6 +1383,12 @@ kw"&&"
 
 Short-circuiting boolean OR.
 
+This is equivalent to `x ? true : y`: it returns `true` if `x` is `true` and the result of evaluating `y` if `x` is `false`.
+Note that if `y` is an expression, it is only evaluated when `x` is `false`, which is called "short-circuiting" behavior.
+
+Also, `y` does not need to have a boolean value.  This means that `(condition) || (statement)` can be used as shorthand for
+`if !(condition); statement; end` for an arbitrary `statement`.
+
 See also: [`|`](@ref), [`xor`](@ref), [`&&`](@ref).
 
 # Examples
@@ -1271,6 +1398,9 @@ true
 
 julia> false || true || println("neither is true!")
 true
+
+julia> pi < 3 || "not a boolean"
+"not a boolean"
 ```
 """
 kw"||"
@@ -1315,7 +1445,9 @@ a tuple of types. All types, as well as the LLVM code, should be specified as li
 not as variables or expressions (it may be necessary to use `@eval` to generate these
 literals).
 
-See `test/llvmcall.jl` for usage examples.
+See
+[`test/llvmcall.jl`](https://github.com/JuliaLang/julia/blob/v$VERSION/test/llvmcall.jl)
+for usage examples.
 """
 Core.Intrinsics.llvmcall
 
@@ -1335,17 +1467,21 @@ Usually `begin` will not be necessary, since keywords such as [`function`](@ref)
 implicitly begin blocks of code. See also [`;`](@ref).
 
 `begin` may also be used when indexing to represent the first index of a
-collection or the first index of a dimension of an array.
+collection or the first index of a dimension of an array. For example,
+`a[begin]` is the first element of an array `a`.
+
+!!! compat "Julia 1.4"
+    Use of `begin` as an index requires Julia 1.4 or later.
 
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
-2×2 Array{Int64,2}:
+2×2 Matrix{Int64}:
  1  2
  3  4
 
 julia> A[begin, :]
-2-element Array{Int64,1}:
+2-element Matrix{Int64}:
  1
  2
 ```
@@ -1396,8 +1532,20 @@ kw"struct"
     mutable struct
 
 `mutable struct` is similar to [`struct`](@ref), but additionally allows the
-fields of the type to be set after construction. See the manual section on
-[Composite Types](@ref) for more information.
+fields of the type to be set after construction.
+
+Individual fields of a mutable struct can be marked as `const` to make them immutable:
+
+```julia
+mutable struct Baz
+    a::Int
+    const b::Float64
+end
+```
+!!! compat "Julia 1.8"
+    The `const` keyword for fields of mutable structs requires at least Julia 1.8.
+
+See the manual section on [Composite Types](@ref) for more information.
 """
 kw"mutable struct"
 
@@ -1414,7 +1562,7 @@ kw"new"
 """
     where
 
-The `where` keyword creates a type that is an iterated union of other types, over all
+The `where` keyword creates a [`UnionAll`](@ref) type, which may be thought of as an iterated union of other types, over all
 values of some variable. For example `Vector{T} where T<:Real` includes all [`Vector`](@ref)s
 where the element type is some kind of `Real` number.
 
@@ -1499,6 +1647,8 @@ Nothing
 The singleton instance of type [`Nothing`](@ref), used by convention when there is no value to return
 (as in a C `void` function) or when a variable or field holds no value.
 
+A return value of `nothing` is not displayed by the REPL and similar interactive environments.
+
 See also: [`isnothing`](@ref), [`something`](@ref), [`missing`](@ref).
 """
 nothing
@@ -1601,6 +1751,34 @@ julia> ex.msg
 """
 ErrorException
 
+"""
+    FieldError(type::DataType, field::Symbol)
+
+An operation tried to access invalid `field` on an object of `type`.
+
+!!! compat "Julia 1.12"
+    Prior to Julia 1.12, invalid field access threw an [`ErrorException`](@ref)
+
+See [`getfield`](@ref)
+
+# Examples
+```jldoctest
+julia> struct AB
+          a::Float32
+          b::Float64
+       end
+
+julia> ab = AB(1, 3)
+AB(1.0f0, 3.0)
+
+julia> ab.c # field `c` doesn't exist
+ERROR: FieldError: type AB has no field `c`, available fields: `a`, `b`
+Stacktrace:
+[...]
+```
+"""
+FieldError
+
 """
     WrappedException(msg)
 
@@ -1738,12 +1916,22 @@ Stacktrace:
 DomainError
 
 """
-    Task(func)
+    Task(func[, reserved_stack::Int])
 
 Create a `Task` (i.e. coroutine) to execute the given function `func` (which
 must be callable with no arguments). The task exits when this function returns.
 The task will run in the "world age" from the parent at construction when [`schedule`](@ref)d.
 
+The optional `reserved_stack` argument specifies the size of the stack available
+for this task, in bytes. The default, `0`, uses the system-dependent stack size default.
+
+!!! warning
+    By default tasks will have the sticky bit set to true `t.sticky`. This models the
+    historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
+    they are first scheduled on, and when scheduled will make the task that they were scheduled
+    from sticky. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    bit manually to `false`.
+
 # Examples
 ```jldoctest
 julia> a() = sum(i for i in 1:1000);
@@ -1764,7 +1952,7 @@ recurses infinitely.
 StackOverflowError
 
 """
-    nfields(x) -> Int
+    nfields(x)::Int
 
 Get the number of fields in the given object.
 
@@ -1794,14 +1982,14 @@ In these examples, `a` is a [`Rational`](@ref), which has two fields.
 nfields
 
 """
-    UndefVarError(var::Symbol)
+    UndefVarError(var::Symbol, [scope])
 
 A symbol in the current scope is not defined.
 
 # Examples
 ```jldoctest
 julia> a
-ERROR: UndefVarError: `a` not defined
+ERROR: UndefVarError: `a` not defined in `Main`
 
 julia> a = 1;
 
@@ -1866,7 +2054,7 @@ to let `InterruptException` be thrown by CTRL+C during the execution.
 InterruptException
 
 """
-    applicable(f, args...) -> Bool
+    applicable(f, args...)::Bool
 
 Determine whether the given generic function has a method applicable to the given arguments.
 
@@ -1889,21 +2077,49 @@ applicable
 
 """
     invoke(f, argtypes::Type, args...; kwargs...)
+    invoke(f, argtypes::Method, args...; kwargs...)
+    invoke(f, argtypes::CodeInstance, args...; kwargs...)
 
 Invoke a method for the given generic function `f` matching the specified types `argtypes` on the
 specified arguments `args` and passing the keyword arguments `kwargs`. The arguments `args` must
 conform with the specified types in `argtypes`, i.e. conversion is not automatically performed.
 This method allows invoking a method other than the most specific matching method, which is useful
 when the behavior of a more general definition is explicitly needed (often as part of the
-implementation of a more specific method of the same function).
+implementation of a more specific method of the same function). However, because this means
+the runtime must do more work, `invoke` is generally also slower--sometimes significantly
+so--than doing normal dispatch with a regular call.
 
-Be careful when using `invoke` for functions that you don't write.  What definition is used
+Be careful when using `invoke` for functions that you don't write. What definition is used
 for given `argtypes` is an implementation detail unless the function is explicitly states
 that calling with certain `argtypes` is a part of public API.  For example, the change
 between `f1` and `f2` in the example below is usually considered compatible because the
 change is invisible by the caller with a normal (non-`invoke`) call.  However, the change is
 visible if you use `invoke`.
 
+# Passing a `Method` instead of a signature
+The `argtypes` argument may be a `Method`, in which case the ordinary method table lookup is
+bypassed entirely and the given method is invoked directly. Needing this feature is uncommon.
+Note in particular that the specified `Method` may be entirely unreachable from ordinary dispatch
+(or ordinary invoke), e.g. because it was replaced or fully covered by more specific methods.
+If the method is part of the ordinary method table, this call behaves similar
+to `invoke(f, method.sig, args...)`.
+
+!!! compat "Julia 1.12"
+    Passing a `Method` requires Julia 1.12.
+
+# Passing a `CodeInstance` instead of a signature
+The `argtypes` argument may be a `CodeInstance`, bypassing both method lookup and specialization.
+The semantics of this invocation are similar to a function pointer call of the `CodeInstance`'s
+`invoke` pointer. It is an error to invoke a `CodeInstance` with arguments that do not match its
+parent `MethodInstance` or from a world age not included in the `min_world`/`max_world` range.
+It is undefined behavior to invoke a `CodeInstance` whose behavior does not match the constraints
+specified in its fields. For some code instances with `owner !== nothing` (i.e. those generated
+by external compilers), it may be an error to invoke them after passing through precompilation.
+This is an advanced interface intended for use with external compiler plugins.
+
+!!! compat "Julia 1.12"
+    Passing a `CodeInstance` requires Julia 1.12.
+
 # Examples
 ```jldoctest
 julia> f(x::Real) = x^2;
@@ -1936,7 +2152,7 @@ Integer
 invoke
 
 """
-    isa(x, type) -> Bool
+    isa(x, type)::Bool
 
 Determine whether `x` is of the given `type`. Can also be used as an infix operator, e.g.
 `x isa type`.
@@ -2003,7 +2219,21 @@ AbstractFloat
 """
     Integer <: Real
 
-Abstract supertype for all integers.
+Abstract supertype for all integers (e.g. [`Signed`](@ref), [`Unsigned`](@ref), and [`Bool`](@ref)).
+
+See also [`isinteger`](@ref), [`trunc`](@ref), [`div`](@ref).
+
+# Examples
+```
+julia> 42 isa Integer
+true
+
+julia> 1.0 isa Integer
+false
+
+julia> isinteger(1.0)
+true
+```
 """
 Integer
 
@@ -2018,6 +2248,21 @@ Signed
     Unsigned <: Integer
 
 Abstract supertype for all unsigned integers.
+
+Built-in unsigned integers are printed in hexadecimal, with prefix `0x`,
+and can be entered in the same way.
+
+# Examples
+```
+julia> typemax(UInt8)
+0xff
+
+julia> Int(0x00d)
+13
+
+julia> unsigned(true)
+0x0000000000000001
+```
 """
 Unsigned
 
@@ -2028,57 +2273,147 @@ Boolean type, containing the values `true` and `false`.
 
 `Bool` is a kind of number: `false` is numerically
 equal to `0` and `true` is numerically equal to `1`.
-Moreover, `false` acts as a multiplicative "strong zero":
+Moreover, `false` acts as a multiplicative "strong zero"
+against [`NaN`](@ref) and [`Inf`](@ref):
 
 ```jldoctest
-julia> false == 0
+julia> [true, false] == [1, 0]
 true
 
-julia> true == 1
-true
+julia> 42.0 + true
+43.0
+
+julia> 0 .* (NaN, Inf, -Inf)
+(NaN, NaN, NaN)
+
+julia> false .* (NaN, Inf, -Inf)
+(0.0, 0.0, -0.0)
+```
+
+Branches via [`if`](@ref) and other conditionals only accept `Bool`.
+There are no "truthy" values in Julia.
 
-julia> 0 * NaN
-NaN
+Comparisons typically return `Bool`, and broadcasted comparisons may
+return [`BitArray`](@ref) instead of an `Array{Bool}`.
 
-julia> false * NaN
-0.0
+```jldoctest
+julia> [1 2 3 4 5] .< pi
+1×5 BitMatrix:
+ 1  1  1  0  0
+
+julia> map(>(pi), [1 2 3 4 5])
+1×5 Matrix{Bool}:
+ 0  0  0  1  1
 ```
 
-See also: [`digits`](@ref), [`iszero`](@ref), [`NaN`](@ref).
+See also [`trues`](@ref), [`falses`](@ref), [`ifelse`](@ref).
 """
 Bool
 
-for (bit, sign, exp, frac) in ((16, 1, 5, 10), (32, 1, 8, 23), (64, 1, 11, 52))
-    @eval begin
-        """
-            Float$($bit) <: AbstractFloat
+"""
+    Float64 <: AbstractFloat <: Real
 
-        $($bit)-bit floating point number type (IEEE 754 standard).
+64-bit floating point number type (IEEE 754 standard).
+Binary format is 1 sign, 11 exponent, 52 fraction bits.
+See [`bitstring`](@ref), [`signbit`](@ref), [`exponent`](@ref), [`frexp`](@ref),
+and [`significand`](@ref) to access various bits.
 
-        Binary format: $($sign) sign, $($exp) exponent, $($frac) fraction bits.
-        """
-        $(Symbol("Float", bit))
-    end
-end
+This is the default for floating point literals, `1.0 isa Float64`,
+and for many operations such as `1/2, 2pi, log(2), range(0,90,length=4)`.
+Unlike integers, this default does not change with `Sys.WORD_SIZE`.
+
+The exponent for scientific notation can be entered as `e` or `E`,
+thus `2e3 === 2.0E3 === 2.0 * 10^3`. Doing so is strongly preferred over
+`10^n` because integers overflow, thus `2.0 * 10^19 < 0` but `2e19 > 0`.
+
+See also [`Inf`](@ref), [`NaN`](@ref), [`floatmax`](@ref), [`Float32`](@ref), [`Complex`](@ref).
+"""
+Float64
+
+"""
+    Float32 <: AbstractFloat <: Real
+
+32-bit floating point number type (IEEE 754 standard).
+Binary format is 1 sign, 8 exponent, 23 fraction bits.
+
+The exponent for scientific notation should be entered as lower-case `f`,
+thus `2f3 === 2.0f0 * 10^3 === Float32(2_000)`.
+For array literals and comprehensions, the element type can be specified before
+the square brackets: `Float32[1,4,9] == Float32[i^2 for i in 1:3]`.
+
+See also [`Inf32`](@ref), [`NaN32`](@ref), [`Float16`](@ref), [`exponent`](@ref), [`frexp`](@ref).
+"""
+Float32
+
+"""
+    Float16 <: AbstractFloat <: Real
+
+16-bit floating point number type (IEEE 754 standard).
+Binary format is 1 sign, 5 exponent, 10 fraction bits.
+"""
+Float16
 
 for bit in (8, 16, 32, 64, 128)
+    type = Symbol(:Int, bit)
+    srange = bit > 31 ? "" : "Represents numbers `n ∈ " * repr(eval(:(typemin($type):typemax($type)))) * "`.\n"
+    unshow = repr(eval(Symbol(:UInt, bit))(bit-1))
+
     @eval begin
         """
-            Int$($bit) <: Signed
+            Int$($bit) <: Signed <: Integer
 
         $($bit)-bit signed integer type.
+
+        $($(srange))Note that such integers overflow without warning,
+        thus `typemax($($type)) + $($type)(1) < 0`.
+
+        See also [`Int`](@ref $Int), [`widen`](@ref), [`BigInt`](@ref).
         """
         $(Symbol("Int", bit))
 
         """
-            UInt$($bit) <: Unsigned
+            UInt$($bit) <: Unsigned <: Integer
 
         $($bit)-bit unsigned integer type.
+
+        Printed in hexadecimal, thus $($(unshow)) == $($(bit-1)).
         """
         $(Symbol("UInt", bit))
     end
 end
 
+"""
+    Int
+
+Sys.WORD_SIZE-bit signed integer type, `Int <: Signed <: Integer <: Real`.
+
+This is the default type of most integer literals and is an alias for either `Int32`
+or `Int64`, depending on `Sys.WORD_SIZE`. It is the type returned by functions such as
+[`length`](@ref), and the standard type for indexing arrays.
+
+Note that integers overflow without warning, thus `typemax(Int) + 1 < 0` and `10^19 < 0`.
+Overflow can be avoided by using [`BigInt`](@ref).
+Very large integer literals will use a wider type, for instance `10_000_000_000_000_000_000 isa Int128`.
+
+Integer division is [`div`](@ref) alias `÷`,
+whereas [`/`](@ref) acting on integers returns [`Float64`](@ref).
+
+See also [`$(Symbol("Int", Sys.WORD_SIZE))`](@ref), [`widen`](@ref), [`typemax`](@ref), [`bitstring`](@ref).
+"""
+Int
+
+"""
+    UInt
+
+Sys.WORD_SIZE-bit unsigned integer type, `UInt <: Unsigned <: Integer`.
+
+Like [`Int`](@ref Int), the alias `UInt` may point to either `UInt32` or `UInt64`,
+according to the value of `Sys.WORD_SIZE` on a given computer.
+
+Printed and parsed in hexadecimal: `UInt(15) === $(repr(UInt(15)))`.
+"""
+UInt
+
 """
     Symbol
 
@@ -2110,7 +2445,7 @@ iteration over characters.
 Symbol
 
 """
-    Symbol(x...) -> Symbol
+    Symbol(x...)::Symbol
 
 Create a [`Symbol`](@ref) by concatenating the string representations of the arguments together.
 
@@ -2147,14 +2482,19 @@ julia> Tuple(Real[1, 2, pi])  # takes a collection
 tuple
 
 """
-    getfield(value, name::Symbol, [order::Symbol])
-    getfield(value, i::Int, [order::Symbol])
+    getfield(value, name::Symbol, [order::Symbol], [boundscheck::Bool=true])
+    getfield(value, i::Int, [order::Symbol], [boundscheck::Bool=true])
+
+Extract a field from a composite `value` by name or position.
+
+Optionally, an ordering can be defined for the operation.
+If the field was declared `@atomic`, the specification is strongly recommended to be
+compatible with the stores to that location.
+Otherwise, if not declared as `@atomic`, this parameter must be `:not_atomic` if specified.
+
+The bounds check may be disabled, in which case the behavior of this function is
+undefined if `i` is out of bounds.
 
-Extract a field from a composite `value` by name or position. Optionally, an
-ordering can be defined for the operation. If the field was declared `@atomic`,
-the specification is strongly recommended to be compatible with the stores to
-that location. Otherwise, if not declared as `@atomic`, this parameter must be
-`:not_atomic` if specified.
 See also [`getproperty`](@ref Base.getproperty) and [`fieldnames`](@ref).
 
 # Examples
@@ -2211,19 +2551,22 @@ setfield!
     swapfield!(value, name::Symbol, x, [order::Symbol])
     swapfield!(value, i::Int, x, [order::Symbol])
 
-These atomically perform the operations to simultaneously get and set a field:
+Atomically perform the operations to simultaneously get and set a field:
 
     y = getfield(value, name)
     setfield!(value, name, x)
     return y
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
 """
 swapfield!
 
 """
-    modifyfield!(value, name::Symbol, op, x, [order::Symbol]) -> Pair
-    modifyfield!(value, i::Int, op, x, [order::Symbol]) -> Pair
+    modifyfield!(value, name::Symbol, op, x, [order::Symbol])::Pair
+    modifyfield!(value, i::Int, op, x, [order::Symbol])::Pair
 
-These atomically perform the operations to get and set a field after applying
+Atomically perform the operations to get and set a field after applying
 the function `op`.
 
     y = getfield(value, name)
@@ -2233,6 +2576,9 @@ the function `op`.
 
 If supported by the hardware (for example, atomic increment), this may be
 optimized to the appropriate hardware instruction, otherwise it'll use a loop.
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
 """
 modifyfield!
 
@@ -2242,7 +2588,7 @@ modifyfield!
     replacefield!(value, i::Int, expected, desired,
                   [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
 
-These atomically perform the operations to get and conditionally set a field to
+Atomically perform the operations to get and conditionally set a field to
 a given value.
 
     y = getfield(value, name, fail_order)
@@ -2254,9 +2600,30 @@ a given value.
 
 If supported by the hardware, this may be optimized to the appropriate hardware
 instruction, otherwise it'll use a loop.
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
 """
 replacefield!
 
+"""
+    setfieldonce!(value, name::Union{Int,Symbol}, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> success::Bool
+
+Atomically perform the operations to set a field to
+a given value, only if it was previously not set.
+
+    ok = !isdefined(value, name, fail_order)
+    if ok
+        setfield!(value, name, desired, success_order)
+    end
+    return ok
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+setfieldonce!
+
 """
     getglobal(module::Module, name::Symbol, [order::Symbol=:monotonic])
 
@@ -2297,6 +2664,7 @@ julia> getglobal(M, :a)
 """
 getglobal
 
+
 """
     setglobal!(module::Module, name::Symbol, x, [order::Symbol=:monotonic])
 
@@ -2318,11 +2686,17 @@ cases.
 See also [`setproperty!`](@ref Base.setproperty!) and [`getglobal`](@ref)
 
 # Examples
-```jldoctest
-julia> module M end;
+```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*\\n.*)*"
+julia> module M; global a; end;
 
 julia> M.a  # same as `getglobal(M, :a)`
-ERROR: UndefVarError: `a` not defined
+ERROR: UndefVarError: `a` not defined in `M`
+Suggestion: add an appropriate import or assignment. This global was declared but not assigned.
+Stacktrace:
+ [1] getproperty(x::Module, f::Symbol)
+   @ Base ./Base_compiler.jl:40
+ [2] top-level scope
+   @ none:1
 
 julia> setglobal!(M, :a, 1)
 1
@@ -2333,6 +2707,134 @@ julia> M.a
 """
 setglobal!
 
+"""
+    Core.get_binding_type(module::Module, name::Symbol)
+
+Retrieve the declared type of the binding `name` from the module `module`.
+
+!!! compat "Julia 1.9"
+    This function requires Julia 1.9 or later.
+"""
+Core.get_binding_type
+
+"""
+    swapglobal!(module::Module, name::Symbol, x, [order::Symbol=:monotonic])
+
+Atomically perform the operations to simultaneously get and set a global.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`swapproperty!`](@ref Base.swapproperty!) and [`setglobal!`](@ref).
+"""
+swapglobal!
+
+"""
+    modifyglobal!(module::Module, name::Symbol, op, x, [order::Symbol=:monotonic])::Pair
+
+Atomically perform the operations to get and set a global after applying
+the function `op`.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`modifyproperty!`](@ref Base.modifyproperty!) and [`setglobal!`](@ref).
+"""
+modifyglobal!
+
+"""
+    replaceglobal!(module::Module, name::Symbol, expected, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
+
+Atomically perform the operations to get and conditionally set a global to
+a given value.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`replaceproperty!`](@ref Base.replaceproperty!) and [`setglobal!`](@ref).
+"""
+replaceglobal!
+
+"""
+    setglobalonce!(module::Module, name::Symbol, value,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> success::Bool
+
+Atomically perform the operations to set a global to
+a given value, only if it was previously not set.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`setpropertyonce!`](@ref Base.setpropertyonce!) and [`setglobal!`](@ref).
+"""
+setglobalonce!
+
+"""
+    declare_global(module::Module, name::Symbol, strong::Bool=false, [ty::Type])
+
+Declare the global `name` in module `module`.  If `ty` is given, declares a
+"strong" global, which cannot be replaced with a constant binding, otherwise
+declares a weak global.
+
+See also [`global`](@ref), [`setglobal!`](@ref), [`get_binding_type`](@ref Core.get_binding_type).
+"""
+Core.declare_global
+
+"""
+    declare_const(module::Module, name::Symbol, [x])
+
+Create or replace the constant `name` in `module` with the new value `x`.  When
+replacing, `x` does not need to have the same type as the original constant.
+
+When `x` is not given, `name` becomes an undefined constant; it cannot be read
+or written to, but can be redefined.
+
+Unlike the syntax `const`, calling this function does not insert `Core.@latestworld` to update the world age of the current frame:
+```
+julia> begin
+           const x = 1
+           println(x)
+           const x = 2
+           println(x)
+           Core.declare_const(Main, :x, 3)
+           println(x)
+           Core.@latestworld
+           println(x)
+       end
+1
+2
+2
+3
+```
+
+!!! compat "Julia 1.12"
+    This function requires Julia 1.12 or later.  Redefining constants on earlier
+    versions of Julia is unpredictable.
+
+See also [`const`](@ref).
+"""
+Core.declare_const
+
+"""
+   _import(to::Module, from::Module, asname::Symbol, [sym::Symbol, imported::Bool])
+
+With all five arguments, imports `sym` from module `from` into `to` with name
+`asname`.  `imported` is true for bindings created with `import` (set it to
+false for `using A: ...`).
+
+With only the first three arguments, creates a binding for the module `from`
+with name `asname` in `to`.
+"""
+Core._import
+
+"""
+   _using(to::Module, from::Module)
+
+Add `from` to the usings list of `to`.
+"""
+Core._using
+
 """
     typeof(x)
 
@@ -2369,6 +2871,9 @@ compatible with the stores to that location. Otherwise, if not declared as
 
 To test whether an array element is defined, use [`isassigned`](@ref) instead.
 
+The global variable variant is supported for compatibility with older julia
+releases. For new code, prefer [`isdefinedglobal`](@ref).
+
 See also [`@isdefined`](@ref).
 
 # Examples
@@ -2397,6 +2902,76 @@ false
 isdefined
 
 
+"""
+    isdefinedglobal(m::Module, s::Symbol, [allow_import::Bool=true, [order::Symbol=:unordered]])
+
+Tests whether a global variable `s` is defined in module `m` (in the current world age).
+A variable is considered defined if and only if a value may be read from this global variable
+and an access will not throw. This includes both constants and global variables that have
+a value set.
+
+If `allow_import` is `false`, the global variable must be defined inside `m`
+and may not be imported from another module.
+
+!!! compat "Julia 1.12"
+    This function requires Julia 1.12 or later.
+
+See also [`@isdefined`](@ref).
+
+# Examples
+```jldoctest
+julia> isdefinedglobal(Base, :sum)
+true
+
+julia> isdefinedglobal(Base, :NonExistentMethod)
+false
+
+julia> isdefinedglobal(Base, :sum, false)
+true
+
+julia> isdefinedglobal(Main, :sum, false)
+false
+```
+"""
+isdefinedglobal
+
+"""
+    Memory{T}(undef, n)
+
+Construct an uninitialized [`Memory{T}`](@ref) of length `n`. All Memory
+objects of length 0 might alias, since there is no reachable mutable content
+from them.
+
+# Examples
+```julia-repl
+julia> Memory{Float64}(undef, 3)
+3-element Memory{Float64}:
+ 6.90966e-310
+ 6.90966e-310
+ 6.90966e-310
+```
+"""
+Memory{T}(::UndefInitializer, n)
+
+"""
+    memoryref(::GenericMemory)
+
+Construct a `GenericMemoryRef` from a memory object. This does not fail, but the
+resulting memory will point out-of-bounds if and only if the memory is empty.
+"""
+memoryref(::GenericMemory)
+
+"""
+    memoryref(::GenericMemory, index::Integer)
+    memoryref(::GenericMemoryRef, index::Integer)
+
+Construct a `GenericMemoryRef` from a memory object and an offset index (1-based) which
+can also be negative. This always returns an inbounds object, and will throw an
+error if that is not possible (because the index would result in a shift
+out-of-bounds of the underlying memory).
+"""
+memoryref(::Union{GenericMemory,GenericMemoryRef}, ::Integer)
+
 """
     Vector{T}(undef, n)
 
@@ -2405,7 +2980,7 @@ Construct an uninitialized [`Vector{T}`](@ref) of length `n`.
 # Examples
 ```julia-repl
 julia> Vector{Float64}(undef, 3)
-3-element Array{Float64, 1}:
+3-element Vector{Float64}:
  6.90966e-310
  6.90966e-310
  6.90966e-310
@@ -2455,7 +3030,7 @@ Construct an uninitialized [`Matrix{T}`](@ref) of size `m`×`n`.
 # Examples
 ```julia-repl
 julia> Matrix{Float64}(undef, 2, 3)
-2×3 Array{Float64, 2}:
+2×3 Matrix{Float64}:
  2.36365e-314  2.28473e-314    5.0e-324
  2.26704e-314  2.26711e-314  NaN
 
@@ -2593,7 +3168,7 @@ an alias for `UndefInitializer()`.
 # Examples
 ```julia-repl
 julia> Array{Float64, 1}(UndefInitializer(), 3)
-3-element Array{Float64, 1}:
+3-element Vector{Float64}:
  2.2752528595e-314
  2.202942107e-314
  2.275252907e-314
@@ -2624,14 +3199,20 @@ undef
 """
     Ptr{T}()
 
-Creates a null pointer to type `T`.
+Create a null pointer to type `T`.
 """
 Ptr{T}()
 
 """
     +(x, y...)
 
-Addition operator. `x+y+z+...` calls this function with all arguments, i.e. `+(x, y, z, ...)`.
+Addition operator.
+
+Infix `x+y+z+...` calls this function with all arguments, i.e. `+(x, y, z, ...)`,
+which by default then calls `(x+y) + z + ...` starting from the left.
+
+Note that overflow is possible for most integer types, including the
+default `Int`, when adding large numbers.
 
 # Examples
 ```jldoctest
@@ -2640,6 +3221,14 @@ julia> 1 + 20 + 4
 
 julia> +(1, 20, 4)
 25
+
+julia> [1,2] + [3,4]
+2-element Vector{Int64}:
+ 4
+ 6
+
+julia> typemax(Int) + 1 < 0
+true
 ```
 """
 (+)(x, y...)
@@ -2663,6 +3252,12 @@ julia> -[1 2; 3 4]
 2×2 Matrix{Int64}:
  -1  -2
  -3  -4
+
+julia> -(true)  # promotes to Int
+-1
+
+julia> -(0x003)
+0xfffd
 ```
 """
 -(x)
@@ -2686,7 +3281,18 @@ julia> -(2, 4.5)
 """
     *(x, y...)
 
-Multiplication operator. `x*y*z*...` calls this function with all arguments, i.e. `*(x, y, z, ...)`.
+Multiplication operator.
+
+Infix `x*y*z*...` calls this function with all arguments, i.e. `*(x, y, z, ...)`,
+which by default then calls `(x*y) * z * ...` starting from the left.
+
+Juxtaposition such as `2pi` also calls `*(2, pi)`. Note that this operation
+has higher precedence than a literal `*`. Note also that juxtaposition "0x..."
+(integer zero times a variable whose name starts with `x`) is forbidden as
+it clashes with unsigned integer literals: `0x01 isa UInt8`.
+
+Note that overflow is possible for most integer types, including the default `Int`,
+when multiplying large numbers.
 
 # Examples
 ```jldoctest
@@ -2695,6 +3301,17 @@ julia> 2 * 7 * 8
 
 julia> *(2, 7, 8)
 112
+
+julia> [2 0; 0 3] * [1, 10]  # matrix * vector
+2-element Vector{Int64}:
+  2
+ 30
+
+julia> 1/2pi, 1/2*pi  # juxtaposition has higher precedence
+(0.15915494309189535, 1.5707963267948966)
+
+julia> x = [1, 2]; x'x  # adjoint vector * vector
+5
 ```
 """
 (*)(x, y...)
@@ -2702,8 +3319,10 @@ julia> *(2, 7, 8)
 """
     /(x, y)
 
-Right division operator: multiplication of `x` by the inverse of `y` on the right. Gives
-floating-point results for integer arguments.
+Right division operator: multiplication of `x` by the inverse of `y` on the right.
+
+Gives floating-point results for integer arguments.
+See [`÷`](@ref div) for integer division, or [`//`](@ref) for [`Rational`](@ref) results.
 
 # Examples
 ```jldoctest
@@ -2782,14 +3401,27 @@ Any
 """
     Union{}
 
-`Union{}`, the empty [`Union`](@ref) of types, is the type that has no values. That is, it has the defining
-property `isa(x, Union{}) == false` for any `x`. `Base.Bottom` is defined as its alias and the type of `Union{}`
-is `Core.TypeofBottom`.
+`Union{}`, the empty [`Union`](@ref) of types, is the *bottom* type of the type system. That is, for each
+`T::Type`, `Union{} <: T`. Also see the subtyping operator's documentation: [`<:`](@ref).
+
+As such, `Union{}` is also an *empty*/*uninhabited* type, meaning that it has no values. That is, for each `x`,
+`isa(x, Union{}) == false`.
+
+`Base.Bottom` is defined as its alias and the type of `Union{}` is `Core.TypeofBottom`.
 
 # Examples
 ```jldoctest
 julia> isa(nothing, Union{})
 false
+
+julia> Union{} <: Int
+true
+
+julia> typeof(Union{}) === Core.TypeofBottom
+true
+
+julia> isa(Union{}, Union)
+false
 ```
 """
 kw"Union{}", Base.Bottom
@@ -2797,23 +3429,33 @@ kw"Union{}", Base.Bottom
 """
     Union{Types...}
 
-A type union is an abstract type which includes all instances of any of its argument types. The empty
-union [`Union{}`](@ref) is the bottom type of Julia.
+A `Union` type is an abstract type which includes all instances of any of its argument types.
+This means that `T <: Union{T,S}` and `S <: Union{T,S}`.
+
+Like other abstract types, it cannot be instantiated, even if all of its arguments are non
+abstract.
 
 # Examples
 ```jldoctest
 julia> IntOrString = Union{Int,AbstractString}
 Union{Int64, AbstractString}
 
-julia> 1 isa IntOrString
+julia> 1 isa IntOrString # instance of Int is included in the union
 true
 
-julia> "Hello!" isa IntOrString
+julia> "Hello!" isa IntOrString # String is also included
 true
 
-julia> 1.0 isa IntOrString
+julia> 1.0 isa IntOrString # Float64 is not included because it is neither Int nor AbstractString
 false
 ```
+
+# Extended Help
+
+Unlike most other parametric types, unions are covariant in their parameters. For example,
+`Union{Real, String}` is a subtype of `Union{Number, AbstractString}`.
+
+The empty union [`Union{}`](@ref) is the bottom type of Julia.
 """
 Union
 
@@ -2822,7 +3464,7 @@ Union
     UnionAll
 
 A union of types over all values of a type parameter. `UnionAll` is used to describe parametric types
-where the values of some parameters are not known.
+where the values of some parameters are not known. See the manual section on [UnionAll Types](@ref).
 
 # Examples
 ```jldoctest
@@ -3090,14 +3732,30 @@ Base.modifyproperty!
     replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
 
 Perform a compare-and-swap operation on `x.f` from `expected` to `desired`, per
-egal. The syntax `@atomic_replace! x.f expected => desired` can be used instead
+egal. The syntax `@atomicreplace x.f expected => desired` can be used instead
 of the function call form.
 
 See also [`replacefield!`](@ref Core.replacefield!)
-and [`setproperty!`](@ref Base.setproperty!).
+[`setproperty!`](@ref Base.setproperty!),
+[`setpropertyonce!`](@ref Base.setpropertyonce!).
 """
 Base.replaceproperty!
 
+"""
+    setpropertyonce!(x, f::Symbol, value, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+
+Perform a compare-and-swap operation on `x.f` to set it to `value` if previously unset.
+The syntax `@atomiconce x.f = value` can be used instead of the function call form.
+
+See also [`setfieldonce!`](@ref Core.replacefield!),
+[`setproperty!`](@ref Base.setproperty!),
+[`replaceproperty!`](@ref Base.replaceproperty!).
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Base.setpropertyonce!
+
 
 """
     StridedArray{T, N}
@@ -3241,7 +3899,7 @@ kw"atomic"
 
 This function prevents dead-code elimination (DCE) of itself and any arguments
 passed to it, but is otherwise the lightest barrier possible. In particular,
-it is not a GC safepoint, does model an observable heap effect, does not expand
+it is not a GC safepoint, does not model an observable heap effect, does not expand
 to any code itself and may be re-ordered with respect to other side effects
 (though the total number of executions may not change).
 
@@ -3259,6 +3917,17 @@ unused and delete the entire benchmark code).
     `donotdelete(1+1)`, no add instruction needs to be executed at runtime and
     the code is semantically equivalent to `donotdelete(2).`
 
+!!! note
+    This intrinsic does not affect the semantics of code that is dead because it is
+    *unreachable*. For example, the body of the function `f(x) = false && donotdelete(x)`
+    may be deleted in its entirety. The semantics of this intrinsic only guarantee that
+    *if* the intrinsic is semantically executed, then there is some program state at
+    which the value of the arguments of this intrinsic were available (in a register,
+    in memory, etc.).
+
+!!! compat "Julia 1.8"
+    This method was added in Julia 1.8.
+
 # Examples
 
 ```julia
@@ -3277,11 +3946,9 @@ Base.donotdelete
 """
     Base.compilerbarrier(setting::Symbol, val)
 
-This function puts a barrier at a specified compilation phase.
-It is supposed to only influence the compilation behavior according to `setting`,
-and its runtime semantics is just to return the second argument `val` (except that
-this function will perform additional checks on `setting` in a case when `setting`
-isn't known precisely at compile-time.)
+This function acts a compiler barrier at a specified compilation phase.
+The dynamic semantics of this intrinsic are to return the `val` argument, unmodified.
+However, depending on the `setting`, the compiler is prevented from assuming this behavior.
 
 Currently either of the following `setting`s is allowed:
 - Barriers on abstract interpretation:
@@ -3294,9 +3961,9 @@ Currently either of the following `setting`s is allowed:
 - Any barriers on optimization aren't implemented yet
 
 !!! note
-    This function is supposed to be used _with `setting` known precisely at compile-time_.
-    Note that in a case when the `setting` isn't known precisely at compile-time, the compiler
-    currently will put the most strongest barrier(s) rather than emitting a compile-time warning.
+    This function is expected to be used with `setting` known precisely at compile-time.
+    If the `setting` is not known precisely at compile-time, the compiler will emit the
+    strongest barrier(s). No compile-time warning is issued.
 
 # Examples
 
@@ -3351,4 +4018,20 @@ The current differences are:
 """
 Core.finalizer
 
+"""
+    ConcurrencyViolationError(msg) <: Exception
+
+An error thrown when a detectable violation of concurrent semantics has occurred.
+
+A non-exhaustive list of examples of when this is used include:
+
+ * Throwing when a deadlock has been detected (e.g. `wait(current_task())`)
+ * Known-unsafe behavior is attempted (e.g. `yield(current_task)`)
+ * A known non-threadsafe datastructure is attempted to be modified from multiple concurrent tasks
+ * A lock is being unlocked that wasn't locked by this task
+"""
+ConcurrencyViolationError
+
+Base.include(BaseDocs, "intrinsicsdocs.jl")
+
 end
diff --git a/base/docs/bindings.jl b/base/docs/bindings.jl
index 6095d52a28e5a..fc72375e8cebe 100644
--- a/base/docs/bindings.jl
+++ b/base/docs/bindings.jl
@@ -1,7 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-export @var
-
 struct Binding
     mod::Module
     var::Symbol
@@ -16,21 +14,15 @@ end
 
 bindingexpr(x) = Expr(:call, Binding, splitexpr(x)...)
 
-defined(b::Binding) = isdefined(b.mod, b.var)
-resolve(b::Binding) = getfield(b.mod, b.var)
+defined(b::Binding) = invokelatest(isdefinedglobal, b.mod, b.var)
+resolve(b::Binding) = invokelatest(getglobal, b.mod, b.var)
 
 function splitexpr(x::Expr)
-    isexpr(x, :macrocall) ? splitexpr(x.args[1]) :
-    isexpr(x, :.)         ? (x.args[1], x.args[2]) :
-    error("Invalid @var syntax `$x`.")
+    isexpr(x, :.) ? (x.args[1], x.args[2]) : error("Could not find something to document in `$x`.")
 end
-splitexpr(s::Symbol) = Expr(:macrocall, getfield(Base, Symbol("@__MODULE__")), nothing), quot(s)
+splitexpr(s::Symbol) = :($Base.@__MODULE__), quot(s) # this somewhat complex form allows deferring resolving the Module for module docstring until after the module is created
 splitexpr(r::GlobalRef) = r.mod, quot(r.name)
-splitexpr(other)     = error("Invalid @var syntax `$other`.")
-
-macro var(x)
-    esc(bindingexpr(x))
-end
+splitexpr(other)     = error("Could not find something to document in `$other`.")
 
 function Base.show(io::IO, b::Binding)
     if b.mod === Base.active_module()
@@ -42,6 +34,6 @@ end
 
 aliasof(b::Binding)     = defined(b) ? (a = aliasof(resolve(b), b); defined(a) ? a : b) : b
 aliasof(d::DataType, b) = Binding(d.name.module, d.name.name)
-aliasof(λ::Function, b) = (m = typeof(λ).name.mt; Binding(m.module, m.name))
+aliasof(λ::Function, b) = (m = typeof(λ).name; Binding(m.module, m.singletonname))
 aliasof(m::Module,   b) = Binding(m, nameof(m))
 aliasof(other,       b) = b
diff --git a/base/docs/core.jl b/base/docs/core.jl
index 718e49917632f..93265416099f9 100644
--- a/base/docs/core.jl
+++ b/base/docs/core.jl
@@ -2,15 +2,21 @@
 
 module CoreDocs
 
-import ..esc, ..push!, ..getindex, ..unsafe_load, ..Csize_t, ..@nospecialize
+import Core: @nospecialize, SimpleVector
 
-@nospecialize # don't specialize on any arguments of the methods declared herein
+struct DocLinkedList
+    doc::SimpleVector
+    next::DocLinkedList
+    DocLinkedList() = new()
+    DocLinkedList(doc::SimpleVector, next::DocLinkedList) = new(doc, next)
+end
 
+global DOCS = DocLinkedList()
 function doc!(source::LineNumberNode, mod::Module, str, ex)
-    push!(DOCS, Core.svec(mod, ex, str, source.file, source.line))
+    global DOCS
+    DOCS = DocLinkedList(Core.svec(mod, ex, str, source.file, source.line), DOCS)
     nothing
 end
-const DOCS = Array{Core.SimpleVector,1}()
 
 isexpr(x, h::Symbol) = isa(x, Expr) && x.head === h
 
@@ -25,9 +31,9 @@ function docm(source::LineNumberNode, mod::Module, str, x)
     else
         out = Expr(:block, x, out)
     end
-    return esc(out)
+    return Expr(:escape, out)
 end
 docm(source::LineNumberNode, mod::Module, x) =
-    isexpr(x, :->) ? docm(source, mod, x.args[1], x.args[2].args[2]) : error("invalid '@doc'.")
+    (isa(x, Expr) && x.head === :->) ? docm(source, mod, x.args[1], x.args[2].args[2]) : error("invalid '@doc'.")
 
 end
diff --git a/base/docs/intrinsicsdocs.jl b/base/docs/intrinsicsdocs.jl
new file mode 100644
index 0000000000000..db54c1d0dc437
--- /dev/null
+++ b/base/docs/intrinsicsdocs.jl
@@ -0,0 +1,218 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    Core.IR
+
+The `Core.IR` module exports the IR object model.
+"""
+Core.IR
+
+"""
+    Core.IntrinsicFunction <: Core.Builtin <: Function
+
+The `Core.IntrinsicFunction` function define some basic primitives for what defines the
+abilities and behaviors of a Julia program
+"""
+Core.IntrinsicFunction
+
+"""
+    Core.Intrinsics
+
+The `Core.Intrinsics` module holds the `Core.IntrinsicFunction` objects.
+"""
+Core.Intrinsics
+
+"""
+    Core.memorynew(::Type{T} where T <: GenericMemory, n::Int)
+
+Construct an uninitialized [`GenericMemory`](@ref) of length `n`.
+
+See also [`Memory`](@ref Core.Memory), [`Memory{T}(undef, n)`](@ref Core.Memory(::UndefInitializer, ::Int)).
+"""
+Core.memorynew
+
+"""
+    Core.memoryrefnew(::GenericMemory)
+    Core.memoryrefnew(::GenericMemoryRef, index::Int, [boundscheck::Bool])
+
+Return a `GenericMemoryRef` for a `GenericMemory`. See [`memoryref`](@ref).
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryrefnew
+
+"""
+    Core..memoryrefoffset(::GenericMemoryRef)
+
+Return the offset index that was used to construct the `MemoryRef`. See [`memoryref`](@ref).
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryrefoffset
+
+"""
+    Core.memoryrefget(::GenericMemoryRef, ordering::Symbol, boundscheck::Bool)
+
+Return the value stored at the `MemoryRef`, throwing a `BoundsError` if the `Memory` is empty. See `ref[]`.
+The memory ordering specified must be compatible with the `isatomic` parameter.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryrefget
+
+"""
+    Core.memoryrefset!(::GenericMemoryRef, value, ordering::Symbol, boundscheck::Bool)
+
+Store the value to the `MemoryRef`, throwing a `BoundsError` if the `Memory` is empty. See `ref[] = value`.
+The memory ordering specified must be compatible with the `isatomic` parameter.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryrefset!
+
+"""
+    Core.memoryref_isassigned(::GenericMemoryRef, ordering::Symbol, boundscheck::Bool)
+
+Return whether there is a value stored at the `MemoryRef`, returning false if the `Memory`
+is empty. See [`isassigned(::Base.RefValue)`](@ref), [`Core.memoryrefget`](@ref).
+The memory ordering specified must be compatible with the `isatomic` parameter.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryref_isassigned
+
+"""
+    Core.memoryrefswap!(::GenericMemoryRef, value, ordering::Symbol, boundscheck::Bool)
+
+Atomically perform the operations to simultaneously get and set a `MemoryRef` value.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`swapproperty!`](@ref Base.swapproperty!) and [`Core.memoryrefset!`](@ref).
+"""
+Core.memoryrefswap!
+
+"""
+    Core.memoryrefmodify!(::GenericMemoryRef, op, value, ordering::Symbol, boundscheck::Bool)::Pair
+
+Atomically perform the operations to get and set a `MemoryRef` value after applying
+the function `op`.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`modifyproperty!`](@ref Base.modifyproperty!) and [`Core.memoryrefset!`](@ref).
+"""
+Core.memoryrefmodify!
+
+"""
+    Core.memoryrefreplace!(::GenericMemoryRef, expected, desired,
+                           success_order::Symbol, fail_order::Symbol=success_order, boundscheck::Bool) -> (; old, success::Bool)
+
+Atomically perform the operations to get and conditionally set a `MemoryRef` value.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`replaceproperty!`](@ref Base.replaceproperty!) and [`Core.memoryrefset!`](@ref).
+"""
+Core.memoryrefreplace!
+
+"""
+    Core.memoryrefsetonce!(::GenericMemoryRef, value,
+                           success_order::Symbol, fail_order::Symbol=success_order, boundscheck::Bool) -> success::Bool
+
+Atomically perform the operations to set a `MemoryRef` to
+a given value, only if it was previously not set.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`setpropertyonce!`](@ref Base.replaceproperty!) and [`Core.memoryrefset!`](@ref).
+"""
+Core.memoryrefsetonce!
+
+
+"""
+    Core.Intrinsics.pointerref(p::Ptr{T}, i::Int, align::Int)
+
+Load a value of type `T` from the address of the `i`th element (1-indexed)
+starting at `p`. This is equivalent to the C expression `p[i-1]`.
+
+The alignment must be a power of two, or 0, indicating the default alignment
+for `T`. If `p[i-1]` is out of bounds, invalid, or is not aligned, the behavior
+is undefined. An alignment of 1 is always safe.
+
+See also [`unsafe_load`](@ref).
+"""
+Core.Intrinsics.pointerref
+
+"""
+    Core.Intrinsics.pointerset(p::Ptr{T}, x::T, i::Int, align::Int)
+
+Store a value of type `T` to the address of the `i`th element (1-indexed)
+starting at `p`.  This is equivalent to the C expression `p[i-1] = x`.
+
+The alignment must be a power of two, or `0`, indicating the default alignment
+for `T`. If `p[i-1]` is out of bounds, invalid, or is not aligned, the behavior
+is undefined. An alignment of 1 is always safe.
+
+See also [`unsafe_store!`](@ref).
+"""
+Core.Intrinsics.pointerset
+
+"""
+    Core.Intrinsics.atomic_pointerref(pointer::Ptr{T}, order::Symbol) --> T
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_load`](@ref Base.unsafe_load).
+"""
+Core.Intrinsics.atomic_pointerref
+
+"""
+    Core.Intrinsics.atomic_pointerset(pointer::Ptr{T}, new::T, order::Symbol) --> pointer
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_store!`](@ref Base.unsafe_store!).
+"""
+Core.Intrinsics.atomic_pointerset
+
+"""
+    Core.Intrinsics.atomic_pointerswap(pointer::Ptr{T}, new::T, order::Symbol) --> old
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_swap!`](@ref Base.unsafe_swap!).
+"""
+Core.Intrinsics.atomic_pointerswap
+
+"""
+    Core.Intrinsics.atomic_pointermodify(pointer::Ptr{T}, function::(old::T,arg::S)->T, arg::S, order::Symbol) --> old
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_modify!`](@ref Base.unsafe_modify!).
+"""
+Core.Intrinsics.atomic_pointermodify
+
+"""
+    Core.Intrinsics.atomic_pointerreplace(pointer::Ptr{T}, expected::Any, new::T, success_order::Symbol, failure_order::Symbol) --> (old, cmp)
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_replace!`](@ref Base.unsafe_replace!).
+"""
+Core.Intrinsics.atomic_pointerreplace
diff --git a/base/docs/utils.jl b/base/docs/utils.jl
index 928dfde01ccf0..1ed576c7362ff 100644
--- a/base/docs/utils.jl
+++ b/base/docs/utils.jl
@@ -23,7 +23,7 @@ You can also use a stream for large amounts of data:
     `HTML` is currently exported to maintain
     backwards compatibility, but this export
     is deprecated. It is recommended to use
-    this type as `Docs.HTML` or to explicitly
+    this type as [`Docs.HTML`](@ref) or to explicitly
     import it from `Docs`.
 """
 mutable struct HTML{T}
@@ -81,7 +81,7 @@ You can also use a stream for large amounts of data:
     `Text` is currently exported to maintain
     backwards compatibility, but this export
     is deprecated. It is recommended to use
-    this type as `Docs.Text` or to explicitly
+    this type as [`Docs.Text`](@ref) or to explicitly
     import it from `Docs`.
 """
 mutable struct Text{T}
diff --git a/base/env.jl b/base/env.jl
index a4a55d9dad013..5472456e22885 100644
--- a/base/env.jl
+++ b/base/env.jl
@@ -3,12 +3,29 @@
 if Sys.iswindows()
     const ERROR_ENVVAR_NOT_FOUND = UInt32(203)
 
+    const env_dict = Lockable(Dict{String, Vector{Cwchar_t}}())
+
+    function memoized_env_lookup(str::AbstractString)
+        # Windows environment variables have a different format from Linux / MacOS, and previously
+        # incurred allocations because we had to convert a String to a Vector{Cwchar_t} each time
+        # an environment variable was looked up. This function memoizes that lookup process, storing
+        # the String => Vector{Cwchar_t} pairs in env_dict
+        @lock env_dict begin
+            var = get(env_dict[], str, nothing)
+            if isnothing(var)
+                var = cwstring(str)
+                env_dict[][str] = var
+            end
+            return var
+        end
+    end
+
     _getenvlen(var::Vector{UInt16}) = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,C_NULL,0)
     _hasenv(s::Vector{UInt16}) = _getenvlen(s) != 0 || Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND
-    _hasenv(s::AbstractString) = _hasenv(cwstring(s))
+    _hasenv(s::AbstractString) = _hasenv(memoized_env_lookup(s))
 
     function access_env(onError::Function, str::AbstractString)
-        var = cwstring(str)
+        var = memoized_env_lookup(str)
         len = _getenvlen(var)
         if len == 0
             return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? "" : onError(str)
@@ -21,7 +38,7 @@ if Sys.iswindows()
     end
 
     function _setenv(svar::AbstractString, sval::AbstractString, overwrite::Bool=true)
-        var = cwstring(svar)
+        var = memoized_env_lookup(svar)
         val = cwstring(sval)
         if overwrite || !_hasenv(var)
             ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,val)
@@ -30,7 +47,7 @@ if Sys.iswindows()
     end
 
     function _unsetenv(svar::AbstractString)
-        var = cwstring(svar)
+        var = memoized_env_lookup(svar)
         ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,C_NULL)
         windowserror(:setenv, ret == 0 && Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND)
     end
@@ -57,7 +74,7 @@ end # os test
 ## ENV: hash interface ##
 
 """
-    EnvDict() -> EnvDict
+    EnvDict()::EnvDict
 
 A singleton of this type provides a hash table interface to environment variables.
 """
@@ -111,25 +128,34 @@ const get_bool_env_falsy = (
     "0")
 
 """
-    Base.get_bool_env(name::String, default::Bool)::Union{Bool,Nothing}
+    Base.get_bool_env(name::String, default::Bool; throw=false)::Union{Bool,Nothing}
+    Base.get_bool_env(f_default::Callable, name::String; throw=false)::Union{Bool,Nothing}
 
-Evaluate whether the value of environnment variable `name` is a truthy or falsy string,
-and return `nothing` if it is not recognized as either. If the variable is not set, or is set to "",
-return `default`.
+Evaluate whether the value of environment variable `name` is a truthy or falsy string,
+and return `nothing` (or throw if `throw=true`) if it is not recognized as either. If
+the variable is not set, or is set to "", return `default` or the result of executing `f_default()`.
 
 Recognized values are the following, and their Capitalized and UPPERCASE forms:
     truthy: "t", "true", "y", "yes", "1"
     falsy:  "f", "false", "n", "no", "0"
 """
-function get_bool_env(name::String, default::Bool)
-    haskey(ENV, name) || return default
-    val = ENV[name]
-    if isempty(val)
-        return default
-    elseif val in get_bool_env_truthy
+get_bool_env(name::String, default::Bool; kwargs...) = get_bool_env(Returns(default), name; kwargs...)
+function get_bool_env(f_default::Callable, name::String; kwargs...)
+    if haskey(ENV, name)
+        val = ENV[name]
+        if !isempty(val)
+            return parse_bool_env(name, val; kwargs...)
+        end
+    end
+    return f_default()
+end
+function parse_bool_env(name::String, val::String = ENV[name]; throw::Bool=false)
+    if val in get_bool_env_truthy
         return true
     elseif val in get_bool_env_falsy
         return false
+    elseif throw
+        Base.throw(ArgumentError("Value for environment variable `$name` could not be parsed as Boolean: $(repr(val))"))
     else
         return nothing
     end
@@ -138,6 +164,10 @@ end
 getindex(::EnvDict, k::AbstractString) = access_env(k->throw(KeyError(k)), k)
 get(::EnvDict, k::AbstractString, def) = access_env(Returns(def), k)
 get(f::Callable, ::EnvDict, k::AbstractString) = access_env(k->f(), k)
+function get!(default::Callable, ::EnvDict, k::AbstractString)
+    haskey(ENV, k) && return ENV[k]
+    ENV[k] = default()
+end
 in(k::AbstractString, ::KeySet{String, EnvDict}) = _hasenv(k)
 pop!(::EnvDict, k::AbstractString) = (v = ENV[k]; _unsetenv(k); v)
 pop!(::EnvDict, k::AbstractString, def) = haskey(ENV,k) ? pop!(ENV,k) : def
diff --git a/base/error.jl b/base/error.jl
index 4e9be0e172d61..149a929966361 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -27,17 +27,27 @@ throw
 
 ## native julia error handling ##
 
+# This is `Experimental.@max_methods 2 function error end`, which is not available at this point in bootstrap.
+# NOTE It is important to always be able to infer the return type of `error` as `Union{}`,
+# but there's a hitch when a package globally sets `@max_methods 1` and it causes inference
+# for `error(::Any)` to fail (JuliaLang/julia#54029).
+# This definition site `@max_methods 2` setting overrides any global `@max_methods 1` settings
+# on package side, guaranteeing that return type inference on `error` is successful always.
+function error end
+typeof(error).name.max_methods = UInt8(2)
+
 """
     error(message::AbstractString)
 
 Raise an `ErrorException` with the given message.
 """
 error(s::AbstractString) = throw(ErrorException(s))
+error() = throw(ErrorException(""))
 
 """
     error(msg...)
 
-Raise an `ErrorException` with the given message.
+Raise an `ErrorException` with a message constructed by `string(msg...)`.
 """
 function error(s::Vararg{Any,N}) where {N}
     @noinline
@@ -62,7 +72,7 @@ rethrow() = ccall(:jl_rethrow, Bottom, ())
 rethrow(@nospecialize(e)) = ccall(:jl_rethrow_other, Bottom, (Any,), e)
 
 struct InterpreterIP
-    code::Union{CodeInfo,Core.MethodInstance,Nothing}
+    code::Union{CodeInfo,Core.MethodInstance,Core.CodeInstance,Nothing}
     stmt::Csize_t
     mod::Union{Module,Nothing}
 end
@@ -87,7 +97,7 @@ function _reformat_bt(bt::Array{Ptr{Cvoid},1}, bt2::Array{Any,1})
         tag       = (entry_metadata >> 6) & 0xf
         header    =  entry_metadata >> 10
         if tag == 1 # JL_BT_INTERP_FRAME_TAG
-            code = bt2[j]::Union{CodeInfo,Core.MethodInstance,Nothing}
+            code = bt2[j]::Union{CodeInfo,Core.MethodInstance,Core.CodeInstance,Nothing}
             mod = njlvalues == 2 ? bt2[j+1]::Union{Module,Nothing} : nothing
             push!(ret, InterpreterIP(code, header, mod))
         else
@@ -125,8 +135,8 @@ function catch_backtrace()
     return _reformat_bt(bt::Vector{Ptr{Cvoid}}, bt2::Vector{Any})
 end
 
-struct ExceptionStack <: AbstractArray{Any,1}
-    stack::Array{Any,1}
+struct ExceptionStack <: AbstractArray{NamedTuple{(:exception, :backtrace)},1}
+    stack::Array{NamedTuple{(:exception, :backtrace)},1}
 end
 
 """
@@ -149,7 +159,7 @@ uncaught exceptions.
 """
 function current_exceptions(task::Task=current_task(); backtrace::Bool=true)
     raw = ccall(:jl_get_excstack, Any, (Any,Cint,Cint), task, backtrace, typemax(Cint))::Vector{Any}
-    formatted = Any[]
+    formatted = NamedTuple{(:exception, :backtrace)}[]
     stride = backtrace ? 3 : 1
     for i = reverse(1:stride:length(raw))
         exc = raw[i]
@@ -162,7 +172,7 @@ end
 ## keyword arg lowering generates calls to this ##
 function kwerr(kw, args::Vararg{Any,N}) where {N}
     @noinline
-    throw(MethodError(Core.kwcall, (kw, args...)))
+    throw(MethodError(Core.kwcall, (kw, args...), tls_world_age()))
 end
 
 ## system error handling ##
@@ -197,15 +207,17 @@ windowserror(p, code::UInt32=Libc.GetLastError(); extrainfo=nothing) = throw(Mai
 """
     @assert cond [text]
 
-Throw an [`AssertionError`](@ref) if `cond` is `false`. Preferred syntax for writing assertions.
-Message `text` is optionally displayed upon assertion failure.
+Throw an [`AssertionError`](@ref) if `cond` is `false`. This is the preferred syntax for
+writing assertions, which are conditions that are assumed to be true, but that the user
+might decide to check anyways, as an aid to debugging if they fail.
+The optional message `text` is displayed upon assertion failure.
 
 !!! warning
-    An assert might be disabled at various optimization levels.
+    An assert might be disabled at some optimization levels.
     Assert should therefore only be used as a debugging tool
-    and not used for authentication verification (e.g., verifying passwords),
-    nor should side effects needed for the function to work correctly
-    be used inside of asserts.
+    and not used for authentication verification (e.g., verifying passwords or checking array bounds).
+    The code must not rely on the side effects of running `cond` for the correct behavior
+    of a function.
 
 # Examples
 ```jldoctest
@@ -221,20 +233,21 @@ macro assert(ex, msgs...)
         msg = msg # pass-through
     elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol))
         # message is an expression needing evaluating
-        msg = :(Main.Base.string($(esc(msg))))
+        msg = :($_assert_tostring($(esc(msg))))
     elseif isdefined(Main, :Base) && isdefined(Main.Base, :string) && applicable(Main.Base.string, msg)
         msg = Main.Base.string(msg)
     else
         # string() might not be defined during bootstrap
-        msg = quote
-            msg = $(Expr(:quote,msg))
-            isdefined(Main, :Base) ? Main.Base.string(msg) :
-                (Core.println(msg); "Error during bootstrap. See stdout.")
-        end
+        msg = :($_assert_tostring($(Expr(:quote,msg))))
     end
     return :($(esc(ex)) ? $(nothing) : throw(AssertionError($msg)))
 end
 
+# this may be overridden in contexts where `string(::Expr)` doesn't work
+_assert_tostring(@nospecialize(msg)) = Core.compilerbarrier(:type, __assert_tostring)(msg)
+__assert_tostring(msg) = isdefined(Main, :Base) ? Main.Base.string(msg) :
+    (Core.println(msg); "Error during bootstrap. See stdout.")
+
 struct ExponentialBackOff
     n::Int
     first_delay::Float64
diff --git a/base/errorshow.jl b/base/errorshow.jl
index 81f4c9c2ee9e0..33edb4cee92a4 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -43,6 +43,15 @@ function showerror(io::IO, ex::Meta.ParseError)
     end
 end
 
+function showerror(io::IO, ex::Core.TypeNameError)
+    print(io, "TypeNameError: ")
+    if isa(ex.a, Union)
+        print(io, "typename does not apply to unions whose components have different typenames")
+    else
+        print(io, "typename does not apply to this type")
+    end
+end
+
 function showerror(io::IO, ex::BoundsError)
     print(io, "BoundsError")
     if isdefined(ex, :a)
@@ -70,6 +79,8 @@ function showerror(io::IO, ex::TypeError)
     print(io, "TypeError: ")
     if ex.expected === Bool
         print(io, "non-boolean (", typeof(ex.got), ") used in boolean context")
+    elseif ex.func === :var"dict key"
+        print(io, "$(limitrepr(ex.got)) is not a valid key for type $(ex.expected)")
     else
         if isvarargtype(ex.got)
             targs = (ex.got,)
@@ -80,7 +91,10 @@ function showerror(io::IO, ex::TypeError)
         end
         if ex.context == ""
             ctx = "in $(ex.func)"
-        elseif ex.func === Symbol("keyword argument")
+        elseif isa(ex.context, Core.GlobalRef)
+            gr = ex.context
+            ctx = "in $(ex.func) of global binding `$(gr.mod).$(gr.name)`"
+        elseif ex.func === :var"keyword argument"
             ctx = "in keyword argument $(ex.context)"
         else
             ctx = "in $(ex.func), in $(ex.context)"
@@ -147,13 +161,7 @@ showerror(io::IO, ::DivideError) = print(io, "DivideError: integer division erro
 showerror(io::IO, ::StackOverflowError) = print(io, "StackOverflowError:")
 showerror(io::IO, ::UndefRefError) = print(io, "UndefRefError: access to undefined reference")
 showerror(io::IO, ::EOFError) = print(io, "EOFError: read end of file")
-function showerror(io::IO, ex::ErrorException)
-    print(io, ex.msg)
-    if ex.msg == "type String has no field data"
-        println(io)
-        print(io, "Use `codeunits(str)` instead.")
-    end
-end
+showerror(io::IO, ex::ErrorException) = print(io, ex.msg)
 showerror(io::IO, ex::KeyError) = (print(io, "KeyError: key ");
                                    show(io, ex.key);
                                    print(io, " not found"))
@@ -168,13 +176,31 @@ showerror(io::IO, ex::UndefKeywordError) =
 
 function showerror(io::IO, ex::UndefVarError)
     print(io, "UndefVarError: `$(ex.var)` not defined")
+    if isdefined(ex, :scope)
+        scope = ex.scope
+        if scope isa Module
+            print(io, " in `$scope`")
+        elseif scope === :static_parameter
+            print(io, " in static parameter matching")
+        else
+            print(io, " in $scope scope")
+        end
+    end
     Experimental.show_error_hints(io, ex)
 end
 
 function showerror(io::IO, ex::InexactError)
     print(io, "InexactError: ", ex.func, '(')
-    nameof(ex.T) === ex.func || print(io, ex.T, ", ")
-    print(io, ex.val, ')')
+    T = first(ex.args)
+    nameof(T) === ex.func || print(io, T, ", ")
+    # `join` calls `string` on its arguments, which shadows the size of e.g. Inf16
+    # as `string(Inf16) == "Inf"` instead of "Inf16". Thus we cannot use `join` here.
+    for arg in ex.args[2:end-1]
+        show(io, arg)
+        print(io, ", ")
+    end
+    show(io, ex.args[end])
+    print(io, ")")
     Experimental.show_error_hints(io, ex)
 end
 
@@ -182,7 +208,7 @@ function showerror(io::IO, ex::CanonicalIndexError)
     print(io, "CanonicalIndexError: ", ex.func, " not defined for ", ex.type)
 end
 
-typesof(@nospecialize args...) = Tuple{Any[ Core.Typeof(args[i]) for i in 1:length(args) ]...}
+typesof(@nospecialize args...) = Tuple{Any[Core.Typeof(arg) for arg in args]...}
 
 function print_with_compare(io::IO, @nospecialize(a::DataType), @nospecialize(b::DataType), color::Symbol)
     if a.name === b.name
@@ -230,87 +256,128 @@ function show_convert_error(io::IO, ex::MethodError, arg_types_param)
 end
 
 function showerror(io::IO, ex::MethodError)
+    @nospecialize io
     # ex.args is a tuple type if it was thrown from `invoke` and is
     # a tuple of the arguments otherwise.
-    is_arg_types = isa(ex.args, DataType)
-    arg_types = (is_arg_types ? ex.args : typesof(ex.args...))::DataType
+    is_arg_types = !isa(ex.args, Tuple)
+    arg_types = is_arg_types ? ex.args : typesof(ex.args...)
+    arg_types_param::SimpleVector = (unwrap_unionall(arg_types)::DataType).parameters
+    san_arg_types_param = Any[rewrap_unionall(arg_types_param[i], arg_types) for i in 1:length(arg_types_param)]
     f = ex.f
     meth = methods_including_ambiguous(f, arg_types)
     if isa(meth, MethodList) && length(meth) > 1
         return showerror_ambiguous(io, meth, f, arg_types)
     end
-    arg_types_param::SimpleVector = arg_types.parameters
-    show_candidates = true
     print(io, "MethodError: ")
     ft = typeof(f)
     f_is_function = false
-    kwargs = ()
-    if f === Core.kwcall && !is_arg_types
-        f = (ex.args::Tuple)[2]
-        ft = typeof(f)
+    kwargs = []
+    if f === Core.kwcall && length(arg_types_param) >= 2 && arg_types_param[1] <: NamedTuple && !is_arg_types
+        # if this is a kwcall, reformat it as a call with kwargs
+        # TODO: handle !is_arg_types here (aka invoke with kwargs), which needs a value for `f`
+        local kwt
+        let args = ex.args::Tuple
+            f = args[2]
+            ft = typeof(f)
+            kwt = typeof(args[1])
+            ex = MethodError(f, args[3:end], ex.world)
+        end
         arg_types_param = arg_types_param[3:end]
-        kwargs = pairs(ex.args[1])
-        ex = MethodError(f, ex.args[3:end::Int], ex.world)
+        san_arg_types_param = san_arg_types_param[3:end]
+        keys = kwt.parameters[1]::Tuple
+        kwargs = Any[(keys[i], fieldtype(kwt, i)) for i in eachindex(keys)]
+        arg_types = rewrap_unionall(Tuple{arg_types_param...}, arg_types)
     end
-    name = ft.name.mt.name
     if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types
         f_is_function = true
         show_convert_error(io, ex, arg_types_param)
-    elseif f === mapreduce_empty || f === reduce_empty
-        print(io, "reducing over an empty collection is not allowed; consider supplying `init` to the reducer")
-        show_candidates = false
     elseif isempty(methods(f)) && isa(f, DataType) && isabstracttype(f)
         print(io, "no constructors have been defined for ", f)
     elseif isempty(methods(f)) && !isa(f, Function) && !isa(f, Type)
-        print(io, "objects of type ", ft, " are not callable")
+        println(io, "objects of type ", ft, " are not callable.")
+        print(io, "In case you did not try calling it explicitly, check if a ", ft,
+            " has been passed as an argument to a method that expects a callable instead.")
     else
         if ft <: Function && isempty(ft.parameters) && _isself(ft)
             f_is_function = true
         end
-        print(io, "no method matching ")
-        show_signature_function(io, isa(f, Type) ? Type{f} : typeof(f))
-        print(io, "(")
-        for (i, typ) in enumerate(arg_types_param)
-            print(io, "::", typ)
-            i == length(arg_types_param) || print(io, ", ")
-        end
-        if !isempty(kwargs)
-            print(io, "; ")
-            for (i, (k, v)) in enumerate(kwargs)
-                print(io, k, "::", typeof(v))
-                i == length(kwargs)::Int || print(io, ", ")
-            end
+        if is_arg_types
+            print(io, "no method matching invoke ")
+        else
+            print(io, "no method matching ")
         end
-        print(io, ")")
+        buf = IOBuffer()
+        iob = IOContext(buf, io)     # for type abbreviation as in #49795; some, like `convert(T, x)`, should not abbreviate
+        show_signature_function(iob, Core.Typeof(f))
+        show_tuple_as_call(iob, :function, arg_types; hasfirst=false, kwargs = isempty(kwargs) ? nothing : kwargs)
+        str = takestring!(buf)
+        str = type_limited_string_from_context(io, str)
+        print(io, str)
     end
     # catch the two common cases of element-wise addition and subtraction
-    if (f === Base.:+ || f === Base.:-) && length(arg_types_param) == 2
+    if (f === Base.:+ || f === Base.:-) && length(san_arg_types_param) == 2
         # we need one array of numbers and one number, in any order
-        if any(x -> x <: AbstractArray{<:Number}, arg_types_param) &&
-            any(x -> x <: Number, arg_types_param)
+        if any(x -> x <: AbstractArray{<:Number}, san_arg_types_param) &&
+            any(x -> x <: Number, san_arg_types_param)
 
             nounf = f === Base.:+ ? "addition" : "subtraction"
             varnames = ("scalar", "array")
-            first, second = arg_types_param[1] <: Number ? varnames : reverse(varnames)
+            first, second = san_arg_types_param[1] <: Number ? varnames : reverse(varnames)
             fstring = f === Base.:+ ? "+" : "-"  # avoid depending on show_default for functions (invalidation)
             print(io, "\nFor element-wise $nounf, use broadcasting with dot syntax: $first .$fstring $second")
         end
     end
     if ft <: AbstractArray
-        print(io, "\nUse square brackets [] for indexing an Array.")
+        print(io, "\nIn case you're trying to index into the array, use square brackets [] instead of parentheses ().")
     end
-    # Check for local functions that shadow methods in Base
-    if f_is_function && isdefined(Base, name)
-        basef = getfield(Base, name)
-        if basef !== ex.f && hasmethod(basef, arg_types)
-            print(io, "\nYou may have intended to import ")
-            show_unquoted(io, Expr(:., :Base, QuoteNode(name)))
+    # Check for functions with the same name in other modules
+    if f_is_function && ex.world != typemax(UInt)
+        let name = ft.name.singletonname
+            modules_to_check = Set{Module}()
+            push!(modules_to_check, Base)
+            for T in san_arg_types_param
+                modulesof!(modules_to_check, T)
+            end
+
+            # Check all modules (sorted for consistency)
+            sorted_modules = sort!(collect(modules_to_check), by=nameof)
+            for mod in sorted_modules
+                if isdefinedglobal(mod, name)
+                    candidate = getglobal(mod, name)
+                    if candidate !== f && hasmethod(candidate, arg_types; world=ex.world)
+                        if mod === Base
+                            print(io, "\nYou may have intended to import ")
+                            show_unquoted(io, Expr(:., :Base, QuoteNode(name)))
+                        else
+                            print(io, "\nThe definition in ")
+                            show_unquoted(io, mod)
+                            print(io, " may have intended to extend ")
+                            f_module = parentmodule(ft)
+                            show_unquoted(io, Expr(:., f_module, QuoteNode(name)))
+                        end
+                    end
+                end
+            end
         end
     end
-    if (ex.world != typemax(UInt) && hasmethod(ex.f, arg_types) &&
-        !hasmethod(ex.f, arg_types, world = ex.world))
+    if ex.world == typemax(UInt) || hasmethod(f, arg_types, world=ex.world)
+        if !isempty(kwargs)
+            print(io, "\nThis method does not support all of the given keyword arguments (and may not support any).")
+        end
+        if ex.world == typemax(UInt) || isempty(kwargs)
+            print(io, "\nThis error has been manually thrown, explicitly, so the method may exist but be intentionally marked as unimplemented.")
+        end
+    elseif hasmethod(f, arg_types) && !hasmethod(f, arg_types, world=ex.world)
         curworld = get_world_counter()
         print(io, "\nThe applicable method may be too new: running in world age $(ex.world), while current world is $(curworld).")
+    elseif f isa Function
+        print(io, "\nThe ")
+        isgensym(nameof(f)) && print(io, "anonymous ")
+        print(io, "function `$f` exists, but no method is defined for this combination of argument types.")
+    elseif f isa Type
+        print(io, "\nThe type `$f` exists, but no method is defined for this combination of argument types when trying to construct it.")
+    else
+        print(io, "\nThe object of type `$(typeof(f))` exists, but no method is defined for this combination of argument types when trying to treat it as a callable object.")
     end
     if !is_arg_types
         # Check for row vectors used where a column vector is intended.
@@ -327,27 +394,30 @@ function showerror(io::IO, ex::MethodError)
                       "\nYou can convert to a column vector with the vec() function.")
         end
     end
-    Experimental.show_error_hints(io, ex, arg_types_param, kwargs)
-    show_candidates && try
+    Experimental.show_error_hints(io, ex, san_arg_types_param, kwargs)
+    try
         show_method_candidates(io, ex, kwargs)
     catch ex
         @error "Error showing method candidates, aborted" exception=ex,catch_backtrace()
     end
+    nothing
+end
+
+function showerror(io::IO, exc::FieldError)
+    @nospecialize
+    print(io, "FieldError: type $(exc.type.name.wrapper) has no field `$(exc.field)`")
+    Base.Experimental.show_error_hints(io, exc)
 end
 
 striptype(::Type{T}) where {T} = T
 striptype(::Any) = nothing
 
-function showerror_ambiguous(io::IO, meths, f, args)
+function showerror_ambiguous(io::IO, meths, f, args::Type)
+    @nospecialize f args
     print(io, "MethodError: ")
     show_signature_function(io, isa(f, Type) ? Type{f} : typeof(f))
-    print(io, "(")
-    p = args.parameters
-    for (i,a) in enumerate(p)
-        print(io, "::", a)
-        i < length(p) && print(io, ", ")
-    end
-    println(io, ") is ambiguous.\n\nCandidates:")
+    show_tuple_as_call(io, :var"", args, hasfirst=false)
+    println(io, " is ambiguous.\n\nCandidates:")
     sigfix = Any
     for m in meths
         print(io, "  ")
@@ -356,10 +426,10 @@ function showerror_ambiguous(io::IO, meths, f, args)
         sigfix = typeintersect(m.sig, sigfix)
     end
     if isa(unwrap_unionall(sigfix), DataType) && sigfix <: Tuple
-        let sigfix=sigfix
-            if all(m->morespecific(sigfix, m.sig), meths)
+        let sigfix=Core.Box(sigfix)
+            if all(m->morespecific(sigfix.contents, m.sig), meths)
                 print(io, "\nPossible fix, define\n  ")
-                Base.show_tuple_as_call(io, :function,  sigfix)
+                show_tuple_as_call(io, :function,  sigfix.contents)
             else
                 print(io, "To resolve the ambiguity, try making one of the methods more specific, or ")
                 print(io, "adding a new method more specific than any of the existing applicable methods.")
@@ -372,7 +442,7 @@ end
 
 #Show an error by directly calling jl_printf.
 #Useful in Base submodule __init__ functions where stderr isn't defined yet.
-function showerror_nostdio(err, msg::AbstractString)
+function showerror_nostdio(@nospecialize(err), msg::AbstractString)
     stderr_stream = ccall(:jl_stderr_stream, Ptr{Cvoid}, ())
     ccall(:jl_printf, Cint, (Ptr{Cvoid},Cstring), stderr_stream, msg)
     ccall(:jl_printf, Cint, (Ptr{Cvoid},Cstring), stderr_stream, ":\n")
@@ -384,17 +454,21 @@ stacktrace_expand_basepaths()::Bool = Base.get_bool_env("JULIA_STACKTRACE_EXPAND
 stacktrace_contract_userdir()::Bool = Base.get_bool_env("JULIA_STACKTRACE_CONTRACT_HOMEDIR", true) === true
 stacktrace_linebreaks()::Bool = Base.get_bool_env("JULIA_STACKTRACE_LINEBREAKS", false) === true
 
-function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=())
-    is_arg_types = isa(ex.args, DataType)
+function show_method_candidates(io::IO, ex::MethodError, kwargs=[])
+    @nospecialize io
+    is_arg_types = !isa(ex.args, Tuple)
     arg_types = is_arg_types ? ex.args : typesof(ex.args...)
-    arg_types_param = Any[arg_types.parameters...]
+    arg_types_param = Any[(unwrap_unionall(arg_types)::DataType).parameters...]
+    arg_types_param = Any[rewrap_unionall(a, arg_types) for a in arg_types_param]
     # Displays the closest candidates of the given function by looping over the
     # functions methods and counting the number of matching arguments.
     f = ex.f
     ft = typeof(f)
-    lines = []
+    lines = String[]
+    line_score = Int[]
     # These functions are special cased to only show if first argument is matched.
     special = f === convert || f === getindex || f === setindex!
+    f isa Core.Builtin && return # `methods` isn't very useful for a builtin
     funcs = Tuple{Any,Vector{Any}}[(f, arg_types_param)]
 
     # An incorrect call method produces a MethodError for convert.
@@ -402,7 +476,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
     # pool MethodErrors for these two functions.
     if f === convert && !isempty(arg_types_param)
         at1 = arg_types_param[1]
-        if isType(at1) && !Core.Compiler.has_free_typevars(at1)
+        if isType(at1) && !has_free_typevars(at1) && at1.parameters[1] isa Type
             push!(funcs, (at1.parameters[1], arg_types_param[2:end]))
         end
     end
@@ -424,8 +498,8 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
             end
             sig0 = sig0::DataType
             s1 = sig0.parameters[1]
-            if sig0 === Tuple || !isa(func, rewrap_unionall(s1, method.sig))
-                # function itself doesn't match or is a builtin
+            if !isa(func, rewrap_unionall(s1, method.sig))
+                # function itself doesn't match
                 continue
             else
                 print(iob, "  ")
@@ -440,10 +514,10 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 # If isvarargtype then it checks whether the rest of the input arguments matches
                 # the varargtype
                 if Base.isvarargtype(sig[i])
-                    sigstr = (unwrapva(unwrap_unionall(sig[i])), "...")
+                    sigstr = Core.svec(unwrapva(unwrap_unionall(sig[i])), "...")
                     j = length(t_i)
                 else
-                    sigstr = (sig[i],)
+                    sigstr = Core.svec(sig[i],)
                     j = i
                 end
                 # Checks if the type of arg 1:i of the input intersects with the current method
@@ -483,85 +557,80 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 end
             end
 
-            if right_matches > 0 || length(arg_types_param) < 2
-                if length(t_i) < length(sig)
-                    # If the methods args is longer than input then the method
-                    # arguments is printed as not a match
-                    for (k, sigtype) in enumerate(sig[length(t_i)+1:end])
-                        sigtype = isvarargtype(sigtype) ? unwrap_unionall(sigtype) : sigtype
-                        if Base.isvarargtype(sigtype)
-                            sigstr = (unwrapva(sigtype::Core.TypeofVararg), "...")
-                        else
-                            sigstr = (sigtype,)
-                        end
-                        if !((min(length(t_i), length(sig)) == 0) && k==1)
-                            print(iob, ", ")
-                        end
-                        if k == 1 && Base.isvarargtype(sigtype)
-                            # There wasn't actually a mismatch - the method match failed for
-                            # some other reason, e.g. world age. Just print the sigstr.
-                            print(iob, sigstr...)
-                        elseif get(io, :color, false)::Bool
-                            let sigstr=sigstr
-                                Base.with_output_color(Base.error_color(), iob) do iob
-                                    print(iob, "::", sigstr...)
-                                end
-                            end
-                        else
-                            print(iob, "!Matched::", sigstr...)
-                        end
+            if length(t_i) < length(sig)
+                # If the methods args is longer than input then the method
+                # arguments is printed as not a match
+                for (k, sigtype) in enumerate(sig[length(t_i)+1:end])
+                    sigtype = isvarargtype(sigtype) ? unwrap_unionall(sigtype) : sigtype
+                    if Base.isvarargtype(sigtype)
+                        sigstr = Core.svec(unwrapva(sigtype::Core.TypeofVararg), "...")
+                    else
+                        sigstr = Core.svec(sigtype,)
                     end
-                end
-                kwords = kwarg_decl(method)
-                if !isempty(kwords)
-                    print(iob, "; ")
-                    join(iob, kwords, ", ")
-                end
-                print(iob, ")")
-                show_method_params(iob0, tv)
-                file, line = updated_methodloc(method)
-                if file === nothing
-                    file = string(method.file)
-                end
-                stacktrace_contract_userdir() && (file = contractuser(file))
-
-                if !isempty(kwargs)::Bool
-                    unexpected = Symbol[]
-                    if isempty(kwords) || !(any(endswith(string(kword), "...") for kword in kwords))
-                        for (k, v) in kwargs
-                            if !(k::Symbol in kwords)
-                                push!(unexpected, k::Symbol)
+                    if !((min(length(t_i), length(sig)) == 0) && k==1)
+                        print(iob, ", ")
+                    end
+                    if k == 1 && Base.isvarargtype(sigtype)
+                        # There wasn't actually a mismatch - the method match failed for
+                        # some other reason, e.g. world age. Just print the sigstr.
+                        print(iob, sigstr...)
+                    elseif get(io, :color, false)::Bool
+                        let sigstr=sigstr
+                            Base.with_output_color(Base.error_color(), iob) do iob
+                                print(iob, "::", sigstr...)
                             end
                         end
+                    else
+                        print(iob, "!Matched::", sigstr...)
                     end
-                    if !isempty(unexpected)
-                        Base.with_output_color(Base.error_color(), iob) do iob
-                            plur = length(unexpected) > 1 ? "s" : ""
-                            print(iob, " got unsupported keyword argument$plur \"", join(unexpected, "\", \""), "\"")
+                end
+            end
+            kwords = kwarg_decl(method)
+            if !isempty(kwords)
+                print(iob, "; ")
+                join(iob, kwords, ", ")
+            end
+            print(iob, ")")
+            show_method_params(iob0, tv)
+            file, line = updated_methodloc(method)
+            if file === nothing
+                file = string(method.file)
+            end
+            stacktrace_contract_userdir() && (file = contractuser(file))
+
+            if !isempty(kwargs)::Bool
+                unexpected = Symbol[]
+                if isempty(kwords) || !(any(endswith(string(kword), "...") for kword in kwords))
+                    for (k, v) in kwargs
+                        if !(k::Symbol in kwords)
+                            push!(unexpected, k::Symbol)
                         end
                     end
                 end
-                if ex.world < reinterpret(UInt, method.primary_world)
-                    print(iob, " (method too new to be called from this world context.)")
-                elseif ex.world > reinterpret(UInt, method.deleted_world)
-                    print(iob, " (method deleted before this world age.)")
+                if !isempty(unexpected)
+                    Base.with_output_color(Base.error_color(), iob) do iob
+                        plur = length(unexpected) > 1 ? "s" : ""
+                        print(iob, " got unsupported keyword argument$plur \"", join(unexpected, "\", \""), "\"")
+                    end
                 end
-                println(iob)
-
-                m = parentmodule_before_main(method)
-                modulecolor = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
-                print_module_path_file(iob, m, string(file), line; modulecolor, digit_align_width = 3)
-
-                # TODO: indicate if it's in the wrong world
-                push!(lines, (buf, right_matches))
             end
+            if ex.world < reinterpret(UInt, method.primary_world)
+                print(iob, " (method too new to be called from this world context.)")
+            end
+            println(iob)
+
+            m = parentmodule_before_main(method)
+            modulecolor = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
+            print_module_path_file(iob, m, string(file), line; modulecolor, digit_align_width = 3)
+            push!(lines, takestring!(buf))
+            push!(line_score, -(right_matches * 2 + (length(arg_types_param) < 2 ? 1 : 0)))
         end
     end
 
     if !isempty(lines) # Display up to three closest candidates
         Base.with_output_color(:normal, io) do io
             print(io, "\n\nClosest candidates are:")
-            sort!(lines, by = x -> -x[2])
+            permute!(lines, sortperm(line_score))
             i = 0
             for line in lines
                 println(io)
@@ -570,11 +639,12 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                     break
                 end
                 i += 1
-                print(io, String(take!(line[1])))
+                print(io, line)
             end
             println(io) # extra newline for spacing to stacktrace
         end
     end
+    nothing
 end
 
 # In case the line numbers in the source code have changed since the code was compiled,
@@ -591,107 +661,146 @@ const update_stackframes_callback = Ref{Function}(identity)
 const STACKTRACE_MODULECOLORS = Iterators.Stateful(Iterators.cycle([:magenta, :cyan, :green, :yellow]))
 const STACKTRACE_FIXEDCOLORS = IdDict(Base => :light_black, Core => :light_black)
 
-function show_full_backtrace(io::IO, trace::Vector; print_linebreaks::Bool)
-    num_frames = length(trace)
-    ndigits_max = ndigits(num_frames)
-
-    println(io, "\nStacktrace:")
-
-    for (i, (frame, n)) in enumerate(trace)
-        print_stackframe(io, i, frame, n, ndigits_max, STACKTRACE_FIXEDCOLORS, STACKTRACE_MODULECOLORS)
-        if i < num_frames
-            println(io)
-            print_linebreaks && println(io)
-        end
-    end
-end
-
 const BIG_STACKTRACE_SIZE = 50 # Arbitrary constant chosen here
 
-function show_reduced_backtrace(io::IO, t::Vector)
+function _backtrace_find_and_remove_cycles(t)
     recorded_positions = IdDict{UInt, Vector{Int}}()
     #= For each frame of hash h, recorded_positions[h] is the list of indices i
     such that hash(t[i-1]) == h, ie the list of positions in which the
     frame appears just before. =#
 
+    max_nested_cycles = 0
     displayed_stackframes = []
-    repeated_cycle = Tuple{Int,Int,Int}[]
-    # First:  line to introuce the "cycle repetition" message
-    # Second: length of the cycle
-    # Third:  number of repetitions
+    repeated_cycles = Tuple{Int,Int,Int}[]
+    # First:  index into `display_stackframes` to introuce the cycle bracket on
+    # Second: length of the cycle as a count in the trace
+    # Third:  number of cycle repetitions
+
+    t_curr = 1
     frame_counter = 1
-    while frame_counter < length(t)
-        (last_frame, n) = t[frame_counter]
-        frame_counter += 1 # Indicating the next frame
-
-        current_hash = hash(last_frame)
-        positions = get(recorded_positions, current_hash, Int[])
-        recorded_positions[current_hash] = push!(positions, frame_counter)
-
-        repetitions = 0
-        for index_p in length(positions)-1:-1:1 # More recent is more likely
-            p = positions[index_p]
-            cycle_length = frame_counter - p
-            i = frame_counter
-            j = p
-            while i < length(t) && t[i] == t[j]
-                i += 1
-                j += 1
+
+    while t_curr ≤ length(t)
+        (last_frame, n) = t[t_curr]
+        current_hash = hash(t[t_curr])
+        positions = get(recorded_positions, current_hash,  Int[])
+
+        t_curr += 1
+        recorded_positions[current_hash] = push!(positions, t_curr)
+
+        # Check previous positions for cycles
+        ncycles = 0
+        nnested_cycles = n > 0
+        for k ∈ reverse(eachindex(positions))[2:end] # More recent is more likely
+            t_prev = positions[k]
+            t_cycle_length = t_curr - t_prev
+
+            # walk trace at current and previous matching positions until matching stops
+            t_curr_end = t_curr
+            t_prev_end = t_prev
+            while t_curr_end < length(t) && t[t_curr_end] == t[t_prev_end]
+                t_curr_end += 1
+                t_prev_end += 1
             end
-            if j >= frame_counter-1
+
+            if t_prev_end ≥ t_curr - 1
                 #= At least one cycle repeated =#
-                repetitions = div(i - frame_counter + 1, cycle_length)
-                push!(repeated_cycle, (length(displayed_stackframes), cycle_length, repetitions))
-                frame_counter += cycle_length * repetitions - 1
-                break
+                ncycles = div(t_curr_end - t_prev + 1, t_cycle_length)
+                push!(repeated_cycles, (length(displayed_stackframes) - 1, t_cycle_length, ncycles))
+                t_curr += t_cycle_length * (ncycles - 1) - 1
+                nnested_cycles += 1
             end
         end
 
-        if repetitions==0
+        # ensure an outer cycle comes before a contained inner cycle
+        sort!(repeated_cycles, by = x -> (x[1], -x[2]))
+        max_nested_cycles = max(max_nested_cycles, nnested_cycles)
+
+        if ncycles == 0
             push!(displayed_stackframes, (last_frame, n))
         end
     end
+    return displayed_stackframes, repeated_cycles, max_nested_cycles
+end
+
+function _backtrace_print_repetition_closings!(io::IO, i, current_cycles, frame_counter, max_nested_cycles, nactive_cycles, ndigits_max; prefix = nothing)
+    while !isempty(current_cycles)
+        start_line = current_cycles[end][1]
+        cycle_length = current_cycles[end][2]
+        end_line = start_line + cycle_length - 1
+        repetitions = current_cycles[end][3]
+        frame_counter_advance = current_cycles[end][4]
+
+        i != end_line && break
+
+        println(io)
+        prefix === nothing || print(io, prefix)
+        line_length = (max_nested_cycles - nactive_cycles) + ndigits_max + 2
+        nactive_cycles -= 1
+        printstyled(io, " ", "│" ^ nactive_cycles, "╰", "─" ^ (line_length); color = :light_black)
+        printstyled(io, " repeated $repetitions times"; color = :light_black, italic = true)
+
+        pop!(current_cycles)
+
+        if cycle_length > 1
+            # adjust cycle_length in outer cycles to reflect displayed frames consumed by this inner cycle
+            for j ∈ eachindex(current_cycles)
+                current_cycles[j] = (current_cycles[j][1], current_cycles[j][2] - cycle_length * (repetitions - 1), current_cycles[j][3:4]...)
+            end
+        else
+            # adjust frame_counter_advance in outer cycles to reflect frames consumed by a single repeated frame
+            for j ∈ eachindex(current_cycles)
+                current_cycles[j] = (current_cycles[j][1:3]..., current_cycles[j][4] + (frame_counter_advance * (current_cycles[j][3] - 1)))
+            end
+        end
+
+        frame_counter += frame_counter_advance
+    end
+    return frame_counter, nactive_cycles
+end
 
-    try invokelatest(update_stackframes_callback[], displayed_stackframes) catch end
+function show_processed_backtrace(io::IO, trace::Vector, num_frames::Int, repeated_cycles::Vector{NTuple{3, Int}}, max_nested_cycles::Int; print_linebreaks::Bool, prefix = nothing)
+    println(io)
+    prefix === nothing || print(io, prefix)
+    println(io, "Stacktrace:")
 
-    println(io, "\nStacktrace:")
+    ndigits_max = ndigits(num_frames)
 
-    ndigits_max = ndigits(length(t))
+    push!(repeated_cycles, (0,0,0)) # repeated_cycles is never empty
 
-    push!(repeated_cycle, (0,0,0)) # repeated_cycle is never empty
     frame_counter = 1
-    for i in 1:length(displayed_stackframes)
-        (frame, n) = displayed_stackframes[i]
+    current_cycles = NTuple{4, Int}[] # adding a value to track amount to advance frame_counter when cycle is closed
 
-        print_stackframe(io, frame_counter, frame, n, ndigits_max, STACKTRACE_FIXEDCOLORS, STACKTRACE_MODULECOLORS)
+    for i in eachindex(trace)
+        (frame, n) = trace[i]
 
-        if i < length(displayed_stackframes)
-            println(io)
-            stacktrace_linebreaks() && println(io)
+        ncycle_starts = 0
+        while repeated_cycles[1][1] == i
+            cycle = popfirst!(repeated_cycles)
+            push!(current_cycles, (cycle..., cycle[2] * (cycle[3] - 1)))
+            ncycle_starts += 1
         end
-
-        while repeated_cycle[1][1] == i # never empty because of the initial (0,0,0)
-            cycle_length = repeated_cycle[1][2]
-            repetitions = repeated_cycle[1][3]
-            popfirst!(repeated_cycle)
-            printstyled(io,
-                "--- the last ", cycle_length, " lines are repeated ",
-                  repetitions, " more time", repetitions>1 ? "s" : "", " ---", color = :light_black)
-            if i < length(displayed_stackframes)
-                println(io)
-                stacktrace_linebreaks() && println(io)
-            end
-            frame_counter += cycle_length * repetitions
+        if n > 1
+            push!(current_cycles, (i, 1, n, n - 1))
+            ncycle_starts += 1
         end
+        nactive_cycles = length(current_cycles)
+
+        print_stackframe(io, frame_counter, frame, ndigits_max, max_nested_cycles, nactive_cycles, ncycle_starts, STACKTRACE_FIXEDCOLORS, STACKTRACE_MODULECOLORS; prefix)
+
+        frame_counter, nactive_cycles = _backtrace_print_repetition_closings!(io, i, current_cycles, frame_counter, max_nested_cycles, nactive_cycles, ndigits_max; prefix)
         frame_counter += 1
+
+        if i < length(trace)
+            println(io)
+            print_linebreaks && println(io)
+        end
     end
 end
 
-
 # Print a stack frame where the module color is determined by looking up the parent module in
 # `modulecolordict`. If the module does not have a color, yet, a new one can be drawn
 # from `modulecolorcycler`.
-function print_stackframe(io, i, frame::StackFrame, n::Int, ndigits_max, modulecolordict, modulecolorcycler)
+function print_stackframe(io, i, frame::StackFrame, ndigits_max::Int, max_nested_cycles::Int, nactive_cycles::Int, ncycle_starts::Int, modulecolordict, modulecolorcycler; prefix = nothing)
     m = Base.parentmodule(frame)
     modulecolor = if m !== nothing
         m = parentmodule_before_main(m)
@@ -699,7 +808,7 @@ function print_stackframe(io, i, frame::StackFrame, n::Int, ndigits_max, modulec
     else
         :default
     end
-    print_stackframe(io, i, frame, n, ndigits_max, modulecolor)
+    print_stackframe(io, i, frame, ndigits_max, max_nested_cycles, nactive_cycles, ncycle_starts, modulecolor; prefix)
 end
 
 # Gets the topmost parent module that isn't Main
@@ -714,11 +823,8 @@ end
 parentmodule_before_main(x) = parentmodule_before_main(parentmodule(x))
 
 # Print a stack frame where the module color is set manually with `modulecolor`.
-function print_stackframe(io, i, frame::StackFrame, n::Int, ndigits_max, modulecolor)
+function print_stackframe(io, i, frame::StackFrame, ndigits_max::Int, max_nested_cycles::Int, nactive_cycles::Int, ncycle_starts::Int, modulecolor; prefix = nothing)
     file, line = string(frame.file), frame.line
-    file = fixup_stdlib_path(file)
-    stacktrace_expand_basepaths() && (file = something(find_source_file(file), file))
-    stacktrace_contract_userdir() && (file = contractuser(file))
 
     # Used by the REPL to make it possible to open
     # the location of a stackframe/method in the editor.
@@ -729,20 +835,29 @@ function print_stackframe(io, i, frame::StackFrame, n::Int, ndigits_max, modulec
     inlined = getfield(frame, :inlined)
     modul = parentmodule(frame)
 
-    digit_align_width = ndigits_max + 2
+    digit_align_width = ndigits_max + 2 + max_nested_cycles - nactive_cycles
+
+    # repeated section bracket line 1
+    prefix === nothing || print(io, prefix)
+    print(io, " ")
+    printstyled(io, "├" ^ (nactive_cycles - ncycle_starts); color = :light_black)
+    printstyled(io, "┌" ^ ncycle_starts; color = :light_black)
 
     # frame number
-    print(io, " ", lpad("[" * string(i) * "]", digit_align_width))
+    print(io, lpad("[" * string(i) * "]", digit_align_width))
     print(io, " ")
 
+    # func name and arguments
     StackTraces.show_spec_linfo(IOContext(io, :backtrace=>true), frame)
-    if n > 1
-        printstyled(io, " (repeats $n times)"; color=:light_black)
-    end
     println(io)
 
+    # repeated section bracket line 2
+    prefix === nothing || print(io, prefix)
+    print(io, " ")
+    printstyled(io, "│" ^ nactive_cycles; color = :light_black)
+
     # @ Module path / file : line
-    print_module_path_file(io, modul, file, line; modulecolor, digit_align_width)
+    print_module_path_file(io, modul, file, line; modulecolor, digit_align_width = digit_align_width - 1)
 
     # inlined
     printstyled(io, inlined ? " [inlined]" : "", color = :light_black)
@@ -758,6 +873,7 @@ function print_module_path_file(io, modul, file, line; modulecolor = :light_blac
     end
 
     # filepath
+    file = fixup_stdlib_path(file)
     stacktrace_expand_basepaths() && (file = something(find_source_file(file), file))
     stacktrace_contract_userdir() && (file = contractuser(file))
     print(io, " ")
@@ -768,54 +884,121 @@ function print_module_path_file(io, modul, file, line; modulecolor = :light_blac
     printstyled(io, basename(file), ":", line; color = :light_black, underline = true)
 end
 
-function show_backtrace(io::IO, t::Vector)
+#=
+
+Stacktrace processing pipeline:
+1. Raw traces extracted with `backtrace` or `catch_backtrace` as vector of instruction pointers.
+2. IP traces converted to frames with `stacktrace`, which may or may not include C frames.
+3. Originator trims frames related to itself (e.g. REPL removes REPL-specific frames)
+   - CapturedException only keeps a limit of 100 frames by processing before display
+4. `process_backtrace` filters a trace for internal implementation or redundant frames and summarizes repeated single frames:
+    - `kwcall` frames removed
+    - `include`-related stack frames removed
+    - Some frames that have the same location info are merged
+    - Repeated frames are removed and summarized with a count
+    - Output is an Any[] containing (StackFrame, count) tuple elements and this form is exposed to e.g. Revise
+5. If a trace is too long, cycles are identified and summarized
+6. `update_stackframes_callback[]` provides e.g. Revise an opportunity to edit line info
+
+=#
+
+function show_backtrace(io::IO, t::Vector; prefix = nothing)
     if haskey(io, :last_shown_line_infos)
         empty!(io[:last_shown_line_infos])
     end
-    # this will be set to true if types in the stacktrace are truncated
-    limitflag = Ref(false)
-    io = IOContext(io, :stacktrace_types_limited => limitflag)
 
-    # t is a pre-processed backtrace (ref #12856)
-    if t isa Vector{Any}
+    # Process backtrace if it has not yet been. A processed backtrace is a Vector{Any}
+    # with elements of type Tuple{StackFrame, Int}. (ref #12856)
+    if t isa Vector{Any} && (length(t) == 0 || t[1] isa Tuple{StackFrame,Int})
         filtered = t
     else
-        filtered = process_backtrace(t)
+        # t is a raw trace requiring lookup
+        if t isa Vector{<:Union{Base.InterpreterIP,Ptr{Cvoid}}}
+            frametrace = stacktrace(t)
+        else
+            frametrace = t
+        end
+        filtered = process_backtrace(frametrace)
     end
     isempty(filtered) && return
 
-    if length(filtered) == 1 && StackTraces.is_top_level_frame(filtered[1][1])
+    nframes = sum(last(x) for x ∈ filtered)
+
+    # don't show a single top-level frame with no location info
+    if nframes == 1 && StackTraces.is_top_level_frame(filtered[1][1])
         f = filtered[1][1]::StackFrame
-        if f.line == 0 && f.file === Symbol("")
-            # don't show a single top-level frame with no location info
+        if f.line == 0 && f.file === :var""
             return
         end
     end
 
+    # Find repeated cycles if trace is too long
     if length(filtered) > BIG_STACKTRACE_SIZE
-        show_reduced_backtrace(IOContext(io, :backtrace => true), filtered)
-        return
+        filtered, repeated_cycles, max_nested_cycles = _backtrace_find_and_remove_cycles(filtered)
     else
-        try invokelatest(update_stackframes_callback[], filtered) catch end
-        # process_backtrace returns a Vector{Tuple{Frame, Int}}
-        show_full_backtrace(io, filtered; print_linebreaks = stacktrace_linebreaks())
-    end
-    if limitflag[]
-        print(io, "\nSome type information was truncated. Use `show(err)` to see complete types.")
+        repeated_cycles = NTuple{3, Int}[]
+        max_nested_cycles = any(x -> last(x) > 1, filtered) ? 1 : 0
     end
+
+    # Allow external code to edit information in the frames (e.g. line numbers with Revise)
+    try invokelatest(update_stackframes_callback[], filtered) catch end
+
+    show_processed_backtrace(IOContext(io, :backtrace => true), filtered, nframes, repeated_cycles, max_nested_cycles; print_linebreaks = stacktrace_linebreaks(), prefix)
     nothing
 end
 
+function _backtrace_collapse_and_count_repeated_frames(frames::Vector{StackFrame})
+    n = 0
+    last_frame = StackTraces.UNKNOWN
+    tracecount = Any[]
+    for frame in frames
+        if frame.file != last_frame.file || frame.line != last_frame.line || frame.func != last_frame.func || frame.linfo !== last_frame.linfo
+            if n > 0
+                push!(tracecount, (last_frame, n))
+            end
+            n = 1
+            last_frame = frame
+        else
+            n += 1
+        end
+    end
+    if n > 0
+        push!(tracecount, (last_frame, n))
+    end
+    return tracecount
+end
+
+function _backtrace_remove_kwcall_frames!(trace)
+    todelete = findall(trace) do (frame, _)
+        code = frame.linfo
+        if code isa MethodInstance
+            def = code.def
+            if def isa Method && def.name !== :kwcall && def.sig <: Tuple{typeof(Core.kwcall),NamedTuple,Any,Vararg}
+                # hide kwcall() methods, which are probably internal keyword sorter methods
+                # (we print the internal method instead, after demangling
+                # the argument list, since it has the right line number info)
+                return true
+            end
+        else
+            frame.func === :kwcall && return true
+        end
+        return false
+    end
+    deleteat!(trace, todelete)
+end
 
 # For improved user experience, filter out frames for include() implementation
 # - see #33065. See also #35371 for extended discussion of internal frames.
-function _simplify_include_frames(trace)
+function _backtrace_simplify_include_frames!(trace)
     kept_frames = trues(length(trace))
     first_ignored = nothing
     for i in length(trace):-1:1
         frame::StackFrame, _ = trace[i]
         mod = parentmodule(frame)
-        if first_ignored === nothing
+        if mod === Base && frame.func === :IncludeInto ||
+           mod === Core && frame.func === :EvalInto
+            kept_frames[i] = false
+        elseif first_ignored === nothing
             if mod === Base && frame.func === :_include
                 # Hide include() machinery by default
                 first_ignored = i
@@ -838,14 +1021,14 @@ function _simplify_include_frames(trace)
     if first_ignored !== nothing
         kept_frames[1:first_ignored] .= false
     end
-    return trace[kept_frames]
+    keepat!(trace, kept_frames)
 end
 
 # Collapse frames that have the same location (in some cases)
-function _collapse_repeated_frames(trace)
+function _backtrace_collapse_repeated_locations!(trace)
     kept_frames = trues(length(trace))
     last_frame = nothing
-    for i in 1:length(trace)
+    for i in eachindex(trace)
         frame::StackFrame, _ = trace[i]
         if last_frame !== nothing && frame.file == last_frame.file && frame.line == last_frame.line
             #=
@@ -877,10 +1060,10 @@ function _collapse_repeated_frames(trace)
             [3] g(x::Int64) <-- useless
             @ Main ./REPL[1]:1
             =#
-            if frame.linfo isa MethodInstance && last_frame.linfo isa MethodInstance &&
-                frame.linfo.def isa Method && last_frame.linfo.def isa Method
-                m, last_m = frame.linfo.def::Method, last_frame.linfo.def::Method
-                params, last_params = Base.unwrap_unionall(m.sig).parameters, Base.unwrap_unionall(last_m.sig).parameters
+            m, last_m = StackTraces.frame_method_or_module(frame),
+                        StackTraces.frame_method_or_module(last_frame)
+            if m isa Method && last_m isa Method
+                params, last_params = Base.unwrap_unionall(m.sig).parameters::SimpleVector, Base.unwrap_unionall(last_m.sig).parameters::SimpleVector
                 if last_m.nkw != 0
                     pos_sig_params = last_params[(last_m.nkw+2):end]
                     issame = true
@@ -890,7 +1073,7 @@ function _collapse_repeated_frames(trace)
                 end
                 if length(last_params) > length(params)
                     issame = true
-                    for i = 1:length(params)
+                    for i = eachindex(params)
                         issame &= params[i] == last_params[i]
                     end
                     if issame
@@ -903,65 +1086,19 @@ function _collapse_repeated_frames(trace)
         end
         last_frame = frame
     end
-    return trace[kept_frames]
+    keepat!(trace, kept_frames)
 end
 
+function process_backtrace(t::Vector{StackFrame})
+    tracecount = _backtrace_collapse_and_count_repeated_frames(t)
+    process_backtrace(tracecount)
+end
 
-function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
-    n = 0
-    last_frame = StackTraces.UNKNOWN
-    count = 0
-    ret = Any[]
-    for i in eachindex(t)
-        lkups = t[i]
-        if lkups isa StackFrame
-            lkups = [lkups]
-        else
-            lkups = StackTraces.lookup(lkups)
-        end
-        for lkup in lkups
-            if lkup === StackTraces.UNKNOWN
-                continue
-            end
-
-            if (lkup.from_c && skipC)
-                continue
-            end
-            code = lkup.linfo
-            if code isa MethodInstance
-                def = code.def
-                if def isa Method && def.name !== :kwcall && def.sig <: Tuple{typeof(Core.kwcall),NamedTuple,Any,Vararg}
-                    # hide kwcall() methods, which are probably internal keyword sorter methods
-                    # (we print the internal method instead, after demangling
-                    # the argument list, since it has the right line number info)
-                    continue
-                end
-            elseif !lkup.from_c
-                lkup.func === :kwcall && continue
-            end
-            count += 1
-            if count > limit
-                break
-            end
-
-            if lkup.file != last_frame.file || lkup.line != last_frame.line || lkup.func != last_frame.func || lkup.linfo !== last_frame.linfo
-                if n > 0
-                    push!(ret, (last_frame, n))
-                end
-                n = 1
-                last_frame = lkup
-            else
-                n += 1
-            end
-        end
-        count > limit && break
-    end
-    if n > 0
-        push!(ret, (last_frame, n))
-    end
-    trace = _simplify_include_frames(ret)
-    trace = _collapse_repeated_frames(trace)
-    return trace
+function process_backtrace(tracecount::Vector{Any})
+    _backtrace_remove_kwcall_frames!(tracecount)
+    _backtrace_simplify_include_frames!(tracecount)
+    _backtrace_collapse_repeated_locations!(tracecount)
+    return tracecount
 end
 
 function show_exception_stack(io::IO, stack)
@@ -1003,11 +1140,35 @@ end
 
 Experimental.register_error_hint(noncallable_number_hint_handler, MethodError)
 
+# handler for displaying a hint in case the user tries to call setindex! on
+# something that doesn't support it:
+#  - a number (probably attempting to use wrong indexing)
+#    eg: a = [1 2; 3 4]; a[1][2] = 5
+#  - a type (probably tried to initialize without parentheses)
+#    eg: d = Dict; d["key"] = 2
+function nonsetable_type_hint_handler(io, ex, arg_types, kwargs)
+    @nospecialize
+    if ex.f === setindex!
+        T = arg_types[1]
+        if T <: Number
+            print(io, "\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ")
+            printstyled(io, "a[1, 2]", color=:cyan)
+            print(io, " rather than a[1][2]")
+        elseif isType(T)
+            Tx = T.parameters[1]
+            print(io, "\nYou attempted to index the type $Tx, rather than an instance of the type. Make sure you create the type using its constructor: ")
+            printstyled(io, "d = $Tx([...])", color=:cyan)
+            print(io, " rather than d = $Tx")
+        end
+    end
+end
+
+Experimental.register_error_hint(nonsetable_type_hint_handler, MethodError)
+
 # Display a hint in case the user tries to use the + operator on strings
 # (probably attempting concatenation)
-function string_concatenation_hint_handler(io, ex, arg_types, kwargs)
-    @nospecialize
-    if (ex.f === +) && all(i -> i <: AbstractString, arg_types)
+function string_concatenation_hint_handler(@nospecialize(io::IO), ex::MethodError, arg_types::Vector{Any}, kwargs::Vector{Any})
+    if (ex.f === +) && !isempty(arg_types) && all(@nospecialize(a) -> unwrapva(a) <: AbstractString, arg_types)
         print(io, "\nString concatenation is performed with ")
         printstyled(io, "*", color=:cyan)
         print(io, " (See also: https://docs.julialang.org/en/v1/manual/strings/#man-concatenation).")
@@ -1016,6 +1177,182 @@ end
 
 Experimental.register_error_hint(string_concatenation_hint_handler, MethodError)
 
+# Display a hint in case the user tries to use the min or max function on an iterable
+# or tries to use something like `collect` on an iterator without defining either IteratorSize or length
+function methods_on_iterable(io, ex, arg_types, kwargs)
+    @nospecialize
+    f = ex.f
+    if (f === max || f === min) && length(arg_types) == 1 && Base.isiterable(only(arg_types))
+        f_correct = f === max ? "maximum" : "minimum"
+        print(io, "\nFinding the $f_correct of an iterable is performed with `$f_correct`.")
+    end
+    if (f === Base.length || f === Base.size) && length(arg_types) >= 1
+        arg_type_tuple = Tuple{arg_types...}
+        if hasmethod(iterate, arg_type_tuple)
+            iterkind = IteratorSize(arg_types[1])
+            if iterkind isa HasLength
+                print(io, "\nYou may need to implement the `length` method or define `IteratorSize` for this type to be `SizeUnknown`.")
+            elseif iterkind isa HasShape
+                print(io, "\nYou may need to implement the `length` and `size` methods for `IteratorSize` `HasShape`.")
+            end
+        end
+    end
+    nothing
+end
+
+Experimental.register_error_hint(methods_on_iterable, MethodError)
+
+# Display a hint in case the user tries to access non-member fields of container type datastructures
+function fielderror_dict_hint_handler(io, exc)
+    @nospecialize
+    field = exc.field
+    type = exc.type
+    if type <: AbstractDict
+        print(io, "\nDid you mean to access dict values using key: `:$field` ? Consider using indexing syntax ")
+        printstyled(io, "dict[:$(field)]", color=:cyan)
+        println(io)
+    end
+end
+
+Experimental.register_error_hint(fielderror_dict_hint_handler, FieldError)
+
+function fielderror_listfields_hint_handler(io, exc)
+    fields = fieldnames(exc.type)
+    if isempty(fields)
+        print(io, "; $(exc.type.name.wrapper) has no fields at all.")
+    else
+        print(io, ", available fields: $(join(map(k -> "`$k`", fields), ", "))")
+    end
+    props = _propertynames_bytype(exc.type)
+    isnothing(props) && return
+    props = setdiff(props, fields)
+    isempty(props) && return
+    print(io, "\nAvailable properties: $(join(map(k -> "`$k`", props), ", "))")
+end
+
+function _propertynames_bytype(T::Type)
+    which(propertynames, (T,)) === which(propertynames, (Any,)) && return nothing
+    inferred_names = promote_op(Val∘propertynames, T)
+    inferred_names isa DataType && inferred_names <: Val || return nothing
+    inferred_names = inferred_names.parameters[1]
+    inferred_names isa NTuple{<:Any, Symbol} || return nothing
+    return Symbol[inferred_names[i] for i in 1:length(inferred_names)]
+end
+
+Experimental.register_error_hint(fielderror_listfields_hint_handler, FieldError)
+
+function UndefVarError_hint(io::IO, ex::UndefVarError)
+    var = ex.var
+    if isdefined(ex, :scope)
+        scope = ex.scope
+        if scope isa Module
+            bpart = lookup_binding_partition(ex.world, GlobalRef(scope, var))
+            kind = binding_kind(bpart)
+
+            # Get the current world's binding partition for comparison
+            curworld = tls_world_age()
+            cur_bpart = lookup_binding_partition(curworld, GlobalRef(scope, var))
+            cur_kind = binding_kind(cur_bpart)
+
+            # Track if we printed the "too new" message
+            printed_too_new = false
+
+            # Check if the binding exists in the current world but was undefined in the error's world
+            if kind === PARTITION_KIND_GUARD
+                if isdefinedglobal(scope, var)
+                    print(io, "\nThe binding may be too new: running in world age $(ex.world), while current world is $(curworld).")
+                    printed_too_new = true
+                else
+                    print(io, "\nSuggestion: check for spelling errors or missing imports.")
+                end
+            elseif kind === PARTITION_KIND_GLOBAL || kind === PARTITION_KIND_UNDEF_CONST || kind == PARTITION_KIND_DECLARED
+                print(io, "\nSuggestion: add an appropriate import or assignment. This global was declared but not assigned.")
+            elseif kind === PARTITION_KIND_FAILED
+                print(io, "\nHint: It looks like two or more modules export different ",
+                "bindings with this name, resulting in ambiguity. Try explicitly ",
+                "importing it from a particular module, or qualifying the name ",
+                "with the module it should come from.")
+            elseif is_some_explicit_imported(kind)
+                print(io, "\nSuggestion: this global was defined as `$(partition_restriction(bpart).globalref)` but not assigned a value.")
+            elseif kind === PARTITION_KIND_BACKDATED_CONST
+                print(io, "\nSuggestion: define the const at top-level before running function that uses it (stricter Julia v1.12+ rule).")
+            end
+
+            # Check if binding kind changed between the error's world and current world
+            if !printed_too_new && kind !== cur_kind
+                print(io, "\nNote: the binding state changed since the error occurred (was: $(kind), now: $(cur_kind)).")
+            end
+        elseif scope === :static_parameter
+            print(io, "\nSuggestion: run Test.detect_unbound_args to detect method arguments that do not fully constrain a type parameter.")
+        elseif scope === :local
+            print(io, "\nSuggestion: check for an assignment to a local variable that shadows a global of the same name.")
+        end
+    else
+        scope = undef
+    end
+    if scope !== Base
+        warned = _UndefVarError_warnfor(io, [Base], var)
+
+        if !warned
+            modules_to_check = (m for m in Base.loaded_modules_order
+                                if m !== Core && m !== Base && m !== Main && m !== scope)
+            warned |= _UndefVarError_warnfor(io, modules_to_check, var)
+        end
+
+        warned || _UndefVarError_warnfor(io, [Core, Main], var)
+    end
+    return nothing
+end
+
+function _UndefVarError_warnfor(io::IO, modules, var::Symbol)
+    active_mod = Base.active_module()
+
+    warned = false
+    # collect modules which export or make public the variable by
+    # the module in which the variable is defined
+    to_warn_about = Dict{Module, Vector{Module}}()
+    for m in modules
+        # only include in info if binding has a value and is exported or public
+        if !Base.isdefined(m, var) || (!Base.isexported(m, var) && !Base.ispublic(m, var))
+            continue
+        end
+        warned = true
+
+        # handle case where the undefined variable is the name of a loaded module
+        if Symbol(m) == var && !isdefined(active_mod, var)
+            print(io, "\nHint: $m is loaded but not imported in the active module $active_mod.")
+            continue
+        end
+
+        binding_m = Base.binding_module(m, var)
+        if !haskey(to_warn_about, binding_m)
+            to_warn_about[binding_m] = [m]
+        else
+            push!(to_warn_about[binding_m], m)
+        end
+    end
+
+    for (binding_m, modules) in pairs(to_warn_about)
+        print(io, "\nHint: a global variable of this name also exists in ", binding_m, ".")
+        for m in modules
+            m == binding_m && continue
+            how_available = if Base.isexported(m, var)
+                "exported by"
+            elseif Base.ispublic(m, var)
+                "declared public in"
+            end
+            print(io, "\n    - Also $how_available $m")
+            if !isdefined(active_mod, nameof(m)) || (getproperty(active_mod, nameof(m)) !== m)
+                print(io, " (loaded but not imported in $active_mod)")
+            end
+            print(io, ".")
+        end
+    end
+    return warned
+end
+
+Base.Experimental.register_error_hint(UndefVarError_hint, UndefVarError)
+
 # ExceptionStack implementation
 size(s::ExceptionStack) = size(s.stack)
 getindex(s::ExceptionStack, i::Int) = s.stack[i]
diff --git a/base/essentials.jl b/base/essentials.jl
index 7b70c0dff074d..797c247949147 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -1,17 +1,18 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-import Core: CodeInfo, SimpleVector, donotdelete, compilerbarrier, arrayref
+using Core: CodeInfo, SimpleVector, donotdelete, compilerbarrier, memoryref, memoryrefnew, memoryrefget, memoryrefset!
 
 const Callable = Union{Function,Type}
 
 const Bottom = Union{}
 
 # Define minimal array interface here to help code used in macros:
-length(a::Array) = arraylen(a)
+size(a::Array) = getfield(a, :size)
+length(t::AbstractArray) = (@inline; prod(size(t)))
+size(a::GenericMemory) = (getfield(a, :length),)
+throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
 
-# This is more complicated than it needs to be in order to get Win64 through bootstrap
-eval(:(getindex(A::Array, i1::Int) = arrayref($(Expr(:boundscheck)), A, i1)))
-eval(:(getindex(A::Array, i1::Int, i2::Int, I::Int...) = (@inline; arrayref($(Expr(:boundscheck)), A, i1, i2, I...))))
+# multidimensional getindex will be defined later on
 
 ==(a::GlobalRef, b::GlobalRef) = a.mod === b.mod && a.name === b.name
 
@@ -88,9 +89,9 @@ f(y) = [x for x in y]
     standard ones) on type-inference. Use [`Base.@nospecializeinfer`](@ref) together with
     `@nospecialize` to additionally suppress inference.
 
-# Example
+# Examples
 
-```julia
+```jldoctest; setup = :(using InteractiveUtils)
 julia> f(A::AbstractArray) = g(A)
 f (generic function with 1 method)
 
@@ -99,7 +100,7 @@ g (generic function with 1 method)
 
 julia> @code_typed f([1.0])
 CodeInfo(
-1 ─ %1 = invoke Main.g(_2::AbstractArray)::Float64
+1 ─ %1 =    invoke g(A::AbstractArray)::Float64
 └──      return %1
 ) => Float64
 ```
@@ -144,7 +145,7 @@ macro specialize(vars...)
 end
 
 """
-    @isdefined s -> Bool
+    @isdefined(s)::Bool
 
 Tests whether variable `s` is defined in the current scope.
 
@@ -178,25 +179,11 @@ macro isdefined(s::Symbol)
     return Expr(:escape, Expr(:isdefined, s))
 end
 
-"""
-    nameof(m::Module) -> Symbol
-
-Get the name of a `Module` as a [`Symbol`](@ref).
-
-# Examples
-```jldoctest
-julia> nameof(Base.Broadcast)
-:Broadcast
-```
-"""
-nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
+_nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
 
 function _is_internal(__module__)
-    if ccall(:jl_base_relative_to, Any, (Any,), __module__)::Module === Core.Compiler ||
-       nameof(__module__) === :Base
-        return true
-    end
-    return false
+    return _nameof(__module__) === :Base ||
+      _nameof(ccall(:jl_base_relative_to, Any, (Any,), __module__)::Module) === :Compiler
 end
 
 # can be used in place of `@assume_effects :total` (supposed to be used for bootstrapping)
@@ -208,7 +195,11 @@ macro _total_meta()
         #=:terminates_globally=#true,
         #=:terminates_locally=#false,
         #=:notaskstate=#true,
-        #=:inaccessiblememonly=#true))
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
 end
 # can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
 macro _foldable_meta()
@@ -218,8 +209,102 @@ macro _foldable_meta()
         #=:nothrow=#false,
         #=:terminates_globally=#true,
         #=:terminates_locally=#false,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
+end
+# can be used in place of `@assume_effects :terminates_locally` (supposed to be used for bootstrapping)
+macro _terminates_locally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :terminates_globally` (supposed to be used for bootstrapping)
+macro _terminates_globally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#true,
         #=:notaskstate=#false,
-        #=:inaccessiblememonly=#true))
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :terminates_globally :notaskstate` (supposed to be used for bootstrapping)
+macro _terminates_globally_notaskstate_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :terminates_globally :noub` (supposed to be used for bootstrapping)
+macro _terminates_globally_noub_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :effect_free :terminates_locally` (supposed to be used for bootstrapping)
+macro _effect_free_terminates_locally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#true,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :nothrow :noub` (supposed to be used for bootstrapping)
+macro _nothrow_noub_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#true,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :nothrow` (supposed to be used for bootstrapping)
 macro _nothrow_meta()
@@ -230,37 +315,103 @@ macro _nothrow_meta()
         #=:terminates_globally=#false,
         #=:terminates_locally=#false,
         #=:notaskstate=#false,
-        #=:inaccessiblememonly=#false))
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
-# can be used in place of `@assume_effects :terminates_locally` (supposed to be used for bootstrapping)
-macro _terminates_locally_meta()
+# can be used in place of `@assume_effects :noub` (supposed to be used for bootstrapping)
+macro _noub_meta()
     return _is_internal(__module__) && Expr(:meta, Expr(:purity,
         #=:consistent=#false,
         #=:effect_free=#false,
         #=:nothrow=#false,
         #=:terminates_globally=#false,
-        #=:terminates_locally=#true,
+        #=:terminates_locally=#false,
         #=:notaskstate=#false,
-        #=:inaccessiblememonly=#false))
+        #=:inaccessiblememonly=#false,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
-# can be used in place of `@assume_effects :effect_free :terminates_locally` (supposed to be used for bootstrapping)
-macro _effect_free_terminates_locally_meta()
+# can be used in place of `@assume_effects :notaskstate` (supposed to be used for bootstrapping)
+macro _notaskstate_meta()
     return _is_internal(__module__) && Expr(:meta, Expr(:purity,
         #=:consistent=#false,
-        #=:effect_free=#true,
+        #=:effect_free=#false,
         #=:nothrow=#false,
         #=:terminates_globally=#false,
-        #=:terminates_locally=#true,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :noub_if_noinbounds` (supposed to be used for bootstrapping)
+macro _noub_if_noinbounds_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#false,
         #=:notaskstate=#false,
-        #=:inaccessiblememonly=#false))
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#true,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 
 # another version of inlining that propagates an inbounds context
 macro _propagate_inbounds_meta()
     return Expr(:meta, :inline, :propagate_inbounds)
 end
+macro _nospecializeinfer_meta()
+    return Expr(:meta, :nospecializeinfer)
+end
 
-function iterate end
+# These special checkbounds methods are defined early for bootstrapping
+function checkbounds(::Type{Bool}, A::Union{Array, Memory}, i::Int)
+    @inline
+    ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, length(A)))
+end
+function checkbounds(A::Union{Array, GenericMemory}, i::Int)
+    @inline
+    checkbounds(Bool, A, i) || throw_boundserror(A, (i,))
+end
+
+default_access_order(::GenericMemory{:not_atomic}) = :not_atomic
+default_access_order(::GenericMemory{:atomic}) = :monotonic
+default_access_order(::GenericMemoryRef{:not_atomic}) = :not_atomic
+default_access_order(::GenericMemoryRef{:atomic}) = :monotonic
+
+function getindex(A::GenericMemory, i::Int)
+    @_noub_if_noinbounds_meta
+    (@_boundscheck) && checkbounds(A, i)
+    memoryrefget(memoryrefnew(A, i, false), default_access_order(A), false)
+end
+
+getindex(A::GenericMemoryRef) = memoryrefget(A, default_access_order(A), @_boundscheck)
+
+"""
+    nameof(m::Module)::Symbol
+
+Get the name of a `Module` as a [`Symbol`](@ref).
+
+# Examples
+```jldoctest
+julia> nameof(Base.Broadcast)
+:Broadcast
+```
+"""
+nameof(m::Module) = (@_total_meta; ccall(:jl_module_name, Ref{Symbol}, (Any,), m))
+
+typeof(function iterate end).name.constprop_heuristic = Core.ITERATE_HEURISTIC
 
 """
     convert(T, x)
@@ -282,8 +433,9 @@ Stacktrace:
 [...]
 ```
 
-If `T` is a [`AbstractFloat`](@ref) type,
-then it will return the closest value to `x` representable by `T`.
+If `T` is an [`AbstractFloat`](@ref) type, then it will return the
+closest value to `x` representable by `T`. Inf is treated as one
+ulp greater than `floatmax(T)` for purposes of determining nearest.
 
 ```jldoctest
 julia> x = 1/3
@@ -297,7 +449,7 @@ julia> convert(BigFloat, x)
 ```
 
 If `T` is a collection type and `x` a collection, the result of
-`convert(T, x)` may alias all or part of `x`.
+`convert(T, x)` may share memory with all or part of `x`.
 ```jldoctest
 julia> x = Int[1, 2, 3];
 
@@ -312,7 +464,7 @@ See also: [`round`](@ref), [`trunc`](@ref), [`oftype`](@ref), [`reinterpret`](@r
 function convert end
 
 # ensure this is never ambiguous, and therefore fast for lookup
-convert(T::Type{Union{}}, x...) = throw(ArgumentError("cannot convert a value to Union{} for assignment"))
+convert(::Type{Union{}}, _...) = throw(ArgumentError("cannot convert a value to Union{} for assignment"))
 
 convert(::Type{Type}, x::Type) = x # the ssair optimizer is strongly dependent on this method existing to avoid over-specialization
                                    # in the absence of inlining-enabled
@@ -327,10 +479,18 @@ Evaluate an expression with values interpolated into it using `eval`.
 If two arguments are provided, the first is the module to evaluate in.
 """
 macro eval(ex)
-    return Expr(:escape, Expr(:call, GlobalRef(Core, :eval), __module__, Expr(:quote, ex)))
+    return Expr(:let, Expr(:(=), :eval_local_result,
+            Expr(:escape, Expr(:call, GlobalRef(Core, :eval), __module__, Expr(:quote, ex)))),
+        Expr(:block,
+            Expr(:var"latestworld-if-toplevel"),
+            :eval_local_result))
 end
 macro eval(mod, ex)
-    return Expr(:escape, Expr(:call, GlobalRef(Core, :eval), mod, Expr(:quote, ex)))
+    return Expr(:let, Expr(:(=), :eval_local_result,
+            Expr(:escape, Expr(:call, GlobalRef(Core, :eval), mod, Expr(:quote, ex)))),
+        Expr(:block,
+            Expr(:var"latestworld-if-toplevel"),
+            :eval_local_result))
 end
 
 # use `@eval` here to directly form `:new` expressions avoid implicit `convert`s
@@ -341,19 +501,19 @@ end
     Pairs{K, V, I, A}(data, itr) where {K, V, I, A} = $(Expr(:new, :(Pairs{K, V, I, A}), :(data isa A ? data : convert(A, data)), :(itr isa I ? itr : convert(I, itr))))
     Pairs{K, V}(data::A, itr::I) where {K, V, I, A} = $(Expr(:new, :(Pairs{K, V, I, A}), :data, :itr))
     Pairs{K}(data::A, itr::I) where {K, I, A} = $(Expr(:new, :(Pairs{K, eltype(A), I, A}), :data, :itr))
-    Pairs(data::A, itr::I) where  {I, A} = $(Expr(:new, :(Pairs{eltype(I), eltype(A), I, A}), :data, :itr))
+    Pairs(data::A, itr::I) where {I, A} = $(Expr(:new, :(Pairs{I !== Nothing ? eltype(I) : keytype(A), eltype(A), I, A}), :data, :itr))
 end
-pairs(::Type{NamedTuple}) = Pairs{Symbol, V, NTuple{N, Symbol}, NamedTuple{names, T}} where {V, N, names, T<:NTuple{N, Any}}
+pairs(::Type{NamedTuple}) = Pairs{Symbol, V, Nothing, NT} where {V, NT <: NamedTuple}
 
 """
-    Iterators.Pairs(values, keys) <: AbstractDict{eltype(keys), eltype(values)}
+    Base.Pairs(values, keys) <: AbstractDict{eltype(keys), eltype(values)}
 
-Transforms an indexable container into a Dictionary-view of the same data.
+Transform an indexable container into a Dictionary-view of the same data.
 Modifying the key-space of the underlying data may invalidate this object.
 """
 Pairs
 
-argtail(x, rest...) = rest
+argtail(_, rest...) = rest
 
 """
     tail(x::Tuple)::Tuple
@@ -375,6 +535,7 @@ tail(x::Tuple) = argtail(x...)
 tail(::Tuple{}) = throw(ArgumentError("Cannot call tail on an empty tuple."))
 
 function unwrap_unionall(@nospecialize(a))
+    @_foldable_meta
     while isa(a,UnionAll)
         a = a.body
     end
@@ -382,6 +543,7 @@ function unwrap_unionall(@nospecialize(a))
 end
 
 function rewrap_unionall(@nospecialize(t), @nospecialize(u))
+    @_foldable_meta
     if !isa(u, UnionAll)
         return t
     end
@@ -389,6 +551,7 @@ function rewrap_unionall(@nospecialize(t), @nospecialize(u))
 end
 
 function rewrap_unionall(t::Core.TypeofVararg, @nospecialize(u))
+    @_foldable_meta
     isdefined(t, :T) || return t
     if !isa(u, UnionAll)
         return t
@@ -411,11 +574,17 @@ function rename_unionall(@nospecialize(u))
     return UnionAll(nv, body{nv})
 end
 
+# remove concrete constraint on diagonal TypeVar if it comes from troot
+function widen_diagonal(@nospecialize(t), troot::UnionAll)
+    return ccall(:jl_widen_diagonal, Any, (Any, Any), t, troot)
+end
+
 function isvarargtype(@nospecialize(t))
     return isa(t, Core.TypeofVararg)
 end
 
 function isvatuple(@nospecialize(t))
+    @_foldable_meta
     t = unwrap_unionall(t)
     if isa(t, DataType)
         n = length(t.parameters)
@@ -436,15 +605,40 @@ function unconstrain_vararg_length(va::Core.TypeofVararg)
     return Vararg{unwrapva(va)}
 end
 
-typename(a) = error("typename does not apply to this type")
-typename(a::DataType) = a.name
-function typename(a::Union)
-    ta = typename(a.a)
-    tb = typename(a.b)
-    ta === tb || error("typename does not apply to unions whose components have different typenames")
-    return tb
+# Compute the minimum number of initialized fields for a particular datatype
+# (therefore also a lower bound on the number of fields)
+function datatype_min_ninitialized(@nospecialize t0)
+    t = unwrap_unionall(t0)
+    t isa DataType || return 0
+    isabstracttype(t) && return 0
+    if t.name === _NAMEDTUPLE_NAME
+        names, types = t.parameters[1], t.parameters[2]
+        if names isa Tuple
+            return length(names)
+        end
+        t = argument_datatype(types)
+        t isa DataType || return 0
+        t.name === Tuple.name || return 0
+    end
+    if t.name === Tuple.name
+        n = length(t.parameters)
+        n == 0 && return 0
+        va = t.parameters[n]
+        if isvarargtype(va)
+            n -= 1
+            if isdefined(va, :N)
+                va = va.N
+                if va isa Int
+                    n += va
+                end
+            end
+        end
+        return n
+    end
+    return length(t.name.names) - t.name.n_uninitialized
 end
-typename(union::UnionAll) = typename(union.body)
+
+import Core: typename
 
 _tuple_error(T::Type, x) = (@noinline; throw(MethodError(convert, (T, x))))
 
@@ -535,12 +729,14 @@ Neither `convert` nor `cconvert` should take a Julia object and turn it into a `
 """
 function cconvert end
 
-cconvert(T::Type, x) = x isa T ? x : convert(T, x) # do the conversion eagerly in most cases
+cconvert(::Type{T}, x) where {T} = x isa T ? x : convert(T, x) # do the conversion eagerly in most cases
 cconvert(::Type{Union{}}, x...) = convert(Union{}, x...)
 cconvert(::Type{<:Ptr}, x) = x # but defer the conversion to Ptr to unsafe_convert
 unsafe_convert(::Type{T}, x::T) where {T} = x # unsafe_convert (like convert) defaults to assuming the convert occurred
 unsafe_convert(::Type{T}, x::T) where {T<:Ptr} = x  # to resolve ambiguity with the next method
 unsafe_convert(::Type{P}, x::Ptr) where {P<:Ptr} = convert(P, x)
+unsafe_convert(::Type{Ptr{UInt8}}, s::String) = ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s)
+unsafe_convert(::Type{Ptr{Int8}}, s::String) = ccall(:jl_string_ptr, Ptr{Int8}, (Any,), s)
 
 """
     reinterpret(::Type{Out}, x::In)
@@ -549,7 +745,7 @@ Change the type-interpretation of the binary data in the isbits value `x`
 to that of the isbits type `Out`.
 The size (ignoring padding) of `Out` has to be the same as that of the type of `x`.
 For example, `reinterpret(Float32, UInt32(7))` interprets the 4 bytes corresponding to `UInt32(7)` as a
-[`Float32`](@ref).
+[`Float32`](@ref). Note that `reinterpret(In, reinterpret(Out, x)) === x`
 
 ```jldoctest
 julia> reinterpret(Float32, UInt32(7))
@@ -565,14 +761,20 @@ julia> reinterpret(Tuple{UInt16, UInt8}, (0x01, 0x0203))
 (0x0301, 0x02)
 ```
 
+!!! note
+
+    The treatment of padding differs from reinterpret(::DataType, ::AbstractArray).
+
 !!! warning
 
     Use caution if some combinations of bits in `Out` are not considered valid and would
     otherwise be prevented by the type's constructors and methods. Unexpected behavior
     may result without additional validation.
+
 """
-function reinterpret(Out::Type, x::In) where {In}
-    if isprimitivetype(Out) && isprimitivetype(In)
+function reinterpret(::Type{Out}, x) where {Out}
+    @inline
+    if isprimitivetype(Out) && isprimitivetype(typeof(x))
         return bitcast(Out, x)
     end
     # only available when Base is fully loaded.
@@ -641,9 +843,6 @@ julia> ifelse(1 > 2, 1, 2)
 """
 ifelse(condition::Bool, x, y) = Core.ifelse(condition, x, y)
 
-# simple Array{Any} operations needed for bootstrap
-@eval setindex!(A::Array{Any}, @nospecialize(x), i::Int) = arrayset($(Expr(:boundscheck)), A, x, i)
-
 """
     esc(e)
 
@@ -723,11 +922,11 @@ end
 
     Using `@inbounds` may return incorrect results/crashes/corruption
     for out-of-bounds indices. The user is responsible for checking it manually.
-    Only use `@inbounds` when it is certain from the information locally available
-    that all accesses are in bounds. In particular, using `1:length(A)` instead of
-    `eachindex(A)` in a function like the one above is _not_ safely inbounds because
-    the first index of `A` may not be `1` for all user defined types that subtype
-    `AbstractArray`.
+    Only use `@inbounds` when you are certain that all accesses are in bounds (as
+    undefined behavior, e.g. crashes, might occur if this assertion is violated). For
+    example, using `1:length(A)` instead of `eachindex(A)` in a function like
+    the one above is _not_ safely inbounds because the first index of `A` may not
+    be `1` for all user defined types that subtype `AbstractArray`.
 """
 macro inbounds(blk)
     return Expr(:block,
@@ -759,17 +958,30 @@ macro goto(name::Symbol)
     return esc(Expr(:symbolicgoto, name))
 end
 
+# linear indexing
+function getindex(A::Array, i::Int)
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(A, i)
+    memoryrefget(memoryrefnew(getfield(A, :ref), i, false), :not_atomic, false)
+end
+# simple Array{Any} operations needed for bootstrap
+function setindex!(A::Array{Any}, @nospecialize(x), i::Int)
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(A, i)
+    memoryrefset!(memoryrefnew(getfield(A, :ref), i, false), x, :not_atomic, false)
+    return A
+end
+setindex!(A::Memory{Any}, @nospecialize(x), i::Int) = (memoryrefset!(memoryrefnew(A, i, @_boundscheck), x, :not_atomic, @_boundscheck); A)
+setindex!(A::MemoryRef{T}, x) where {T} = (memoryrefset!(A, convert(T, x), :not_atomic, @_boundscheck); A)
+setindex!(A::MemoryRef{Any}, @nospecialize(x)) = (memoryrefset!(A, x, :not_atomic, @_boundscheck); A)
+
 # SimpleVector
 
-@eval getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref($(Expr(:boundscheck)), v, i))
+getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref(v, i))
 function length(v::SimpleVector)
-    @_total_meta
-    t = @_gc_preserve_begin v
-    len = unsafe_load(Ptr{Int}(pointer_from_objref(v)))
-    @_gc_preserve_end t
-    return len
+    Core._svec_len(v)
 end
-firstindex(v::SimpleVector) = 1
+firstindex(::SimpleVector) = 1
 lastindex(v::SimpleVector) = length(v)
 iterate(v::SimpleVector, i=1) = (length(v) < i ? nothing : (v[i], i + 1))
 eltype(::Type{SimpleVector}) = Any
@@ -793,7 +1005,7 @@ getindex(v::SimpleVector, I::AbstractArray) = Core.svec(Any[ v[i] for i in I ]..
 unsafe_convert(::Type{Ptr{Any}}, sv::SimpleVector) = convert(Ptr{Any},pointer_from_objref(sv)) + sizeof(Ptr)
 
 """
-    isassigned(array, i) -> Bool
+    isassigned(array, i)::Bool
 
 Test whether the given array has a value associated with index `i`. Return `false`
 if the index is out of bounds, or has an undefined reference.
@@ -842,6 +1054,10 @@ struct Colon <: Function
 end
 const (:) = Colon()
 
+function show(io::IO, ::Colon)
+    show_type_name(io, Colon.name)
+    print(io, "()")
+end
 
 """
     Val(c)
@@ -869,73 +1085,25 @@ end
 Val(x) = Val{x}()
 
 """
-    invokelatest(f, args...; kwargs...)
-
-Calls `f(args...; kwargs...)`, but guarantees that the most recent method of `f`
-will be executed.   This is useful in specialized circumstances,
-e.g. long-running event loops or callback functions that may
-call obsolete versions of a function `f`.
-(The drawback is that `invokelatest` is somewhat slower than calling
-`f` directly, and the type of the result cannot be inferred by the compiler.)
-
-!!! compat "Julia 1.9"
-    Prior to Julia 1.9, this function was not exported, and was called as `Base.invokelatest`.
-"""
-function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...)
-    kwargs = merge(NamedTuple(), kwargs)
-    if isempty(kwargs)
-        return Core._call_latest(f, args...)
-    end
-    return Core._call_latest(Core.kwcall, kwargs, f, args...)
-end
+    inferencebarrier(x)
 
+A shorthand for `compilerbarrier(:type, x)` causes the type of this statement to be inferred as `Any`.
+See [`Base.compilerbarrier`](@ref) for more info.
 """
-    invoke_in_world(world, f, args...; kwargs...)
-
-Call `f(args...; kwargs...)` in a fixed world age, `world`.
-
-This is useful for infrastructure running in the user's Julia session which is
-not part of the user's program. For example, things related to the REPL, editor
-support libraries, etc. In these cases it can be useful to prevent unwanted
-method invalidation and recompilation latency, and to prevent the user from
-breaking supporting infrastructure by mistake.
-
-The current world age can be queried using [`Base.get_world_counter()`](@ref)
-and stored for later use within the lifetime of the current Julia session, or
-when serializing and reloading the system image.
-
-Technically, `invoke_in_world` will prevent any function called by `f` from
-being extended by the user during their Julia session. That is, generic
-function method tables seen by `f` (and any functions it calls) will be frozen
-as they existed at the given `world` age. In a sense, this is like the opposite
-of [`invokelatest`](@ref).
-
-!!! note
-    It is not valid to store world ages obtained in precompilation for later use.
-    This is because precompilation generates a "parallel universe" where the
-    world age refers to system state unrelated to the main Julia session.
-"""
-function invoke_in_world(world::UInt, @nospecialize(f), @nospecialize args...; kwargs...)
-    kwargs = Base.merge(NamedTuple(), kwargs)
-    if isempty(kwargs)
-        return Core._call_in_world(world, f, args...)
-    end
-    return Core._call_in_world(world, Core.kwcall, kwargs, f, args...)
-end
-
 inferencebarrier(@nospecialize(x)) = compilerbarrier(:type, x)
 
 """
-    isempty(collection) -> Bool
+    isempty(collection)::Bool
 
 Determine whether a collection is empty (has no elements).
 
 !!! warning
 
     `isempty(itr)` may consume the next element of a stateful iterator `itr`
-    unless an appropriate `Base.isdone(itr)` or `isempty` method is defined.
-    Use of `isempty` should therefore be avoided when writing generic
-    code which should support any iterator type.
+    unless an appropriate [`Base.isdone(itr)`](@ref) method is defined.
+    Stateful iterators *should* implement `isdone`, but you may want to avoid
+    using `isempty` when writing generic code which should support any iterator
+    type.
 
 # Examples
 ```jldoctest
@@ -961,7 +1129,7 @@ This function simply returns its argument by default, since the elements
 of a general iterator are normally considered its "values".
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\d\$"m
 julia> d = Dict("a"=>1, "b"=>2);
 
 julia> values(d)
@@ -1044,22 +1212,27 @@ end
 
 # Iteration
 """
-    isdone(itr, state...) -> Union{Bool, Missing}
+    isdone(itr, [state])::Union{Bool, Missing}
 
 This function provides a fast-path hint for iterator completion.
-This is useful for mutable iterators that want to avoid having elements
-consumed, if they are not going to be exposed to the user (e.g. to check
-for done-ness in `isempty` or `zip`). Mutable iterators that want to
-opt into this feature should define an isdone method that returns
-true/false depending on whether the iterator is done or not. Stateless
-iterators need not implement this function. If the result is `missing`,
-callers may go ahead and compute `iterate(x, state...) === nothing` to
-compute a definite answer.
+This is useful for stateful iterators that want to avoid having elements
+consumed if they are not going to be exposed to the user (e.g. when checking
+for done-ness in `isempty` or `zip`).
+
+Stateful iterators that want to opt into this feature should define an `isdone`
+method that returns true/false depending on whether the iterator is done or
+not. Stateless iterators need not implement this function.
+
+If the result is `missing`, then `isdone` cannot determine whether the iterator
+state is terminal, and callers must compute `iterate(itr, state) === nothing`
+to obtain a definitive answer.
+
+See also [`iterate`](@ref), [`isempty`](@ref)
 """
-isdone(itr, state...) = missing
+isdone(_, _...) = missing
 
 """
-    iterate(iter [, state]) -> Union{Nothing, Tuple{Any, Any}}
+    iterate(iter [, state])::Union{Nothing, Tuple{Any, Any}}
 
 Advance the iterator to obtain the next element. If no elements
 remain, `nothing` should be returned. Otherwise, a 2-tuple of the
@@ -1068,7 +1241,7 @@ next element and the new iteration state should be returned.
 function iterate end
 
 """
-    isiterable(T) -> Bool
+    isiterable(T)::Bool
 
 Test if type `T` is an iterable collection type or not,
 that is whether it has an `iterate` method or not.
@@ -1076,3 +1249,67 @@ that is whether it has an `iterate` method or not.
 function isiterable(T)::Bool
     return hasmethod(iterate, Tuple{T})
 end
+
+"""
+    @world(sym, world)
+
+Resolve the binding `sym` in world `world`. See [`invoke_in_world`](@ref) for running
+arbitrary code in fixed worlds. `world` may be `UnitRange`, in which case the macro
+will error unless the binding is valid and has the same value across the entire world
+range.
+
+As a special case, the world `∞` always refers to the latest world, even if that world
+is newer than the world currently running.
+
+The `@world` macro is primarily used in the printing of bindings that are no longer
+available in the current world.
+
+## Example
+```julia-repl
+julia> struct Foo; a::Int; end
+Foo
+
+julia> fold = Foo(1)
+
+julia> Int(Base.get_world_counter())
+26866
+
+julia> struct Foo; a::Int; b::Int end
+Foo
+
+julia> fold
+@world(Foo, 26866)(1)
+```
+
+!!! compat "Julia 1.12"
+    This functionality requires at least Julia 1.12.
+"""
+macro world(sym, world)
+    if world == :∞
+        world = Expr(:call, get_world_counter)
+    end
+    if isa(sym, Symbol)
+        return :($(_resolve_in_world)($(esc(world)), $(QuoteNode(GlobalRef(__module__, sym)))))
+    elseif isa(sym, GlobalRef)
+        return :($(_resolve_in_world)($(esc(world)), $(QuoteNode(sym))))
+    elseif isa(sym, Expr) && sym.head === :(.) &&
+            length(sym.args) == 2 && isa(sym.args[2], QuoteNode) && isa(sym.args[2].value, Symbol)
+        return :($(_resolve_in_world)($(esc(world)), $(GlobalRef)($(esc(sym.args[1])), $(sym.args[2]))))
+    else
+        error("`@world` requires a symbol or GlobalRef")
+    end
+end
+
+_resolve_in_world(world::Integer, gr::GlobalRef) =
+    invoke_in_world(UInt(world), Core.getglobal, gr.mod, gr.name)
+
+# Special constprop heuristics for various binary opes
+typename(typeof(function + end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function - end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function * end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function == end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function != end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function <= end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function >= end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function < end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function > end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
diff --git a/base/experimental.jl b/base/experimental.jl
index cc8d368023b49..2deb3bc76af6c 100644
--- a/base/experimental.jl
+++ b/base/experimental.jl
@@ -9,7 +9,8 @@
 """
 module Experimental
 
-using Base: Threads, sync_varname
+using Base: Threads, sync_varname, is_function_def, @propagate_inbounds
+using Base: GenericCondition
 using Base.Meta
 
 """
@@ -28,22 +29,19 @@ end
 Base.IndexStyle(::Type{<:Const}) = IndexLinear()
 Base.size(C::Const) = size(C.a)
 Base.axes(C::Const) = axes(C.a)
-@eval Base.getindex(A::Const, i1::Int) =
-    (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1))
-@eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =
-  (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
+@propagate_inbounds Base.getindex(A::Const, i1::Int, I::Int...) = A.a[i1, I...]
 
 """
     @aliasscope expr
 
-Allows the compiler to assume that all `Const`s are not being modified through stores
+Allow the compiler to assume that all `Const`s are not being modified through stores
 within this scope, even if the compiler can't prove this to be the case.
 
 !!! warning
     Experimental API. Subject to change without deprecation.
 """
 macro aliasscope(body)
-    sym = gensym()
+    sym = :aliasscope_result
     quote
         $(Expr(:aliasscope))
         $sym = $(esc(body))
@@ -86,11 +84,16 @@ end
 """
     Experimental.@sync
 
-Wait until all lexically-enclosed uses of `@async`, `@spawn`, `@spawnat` and `@distributed`
+Wait until all lexically-enclosed uses of [`@async`](@ref), [`@spawn`](@ref Threads.@spawn),
+`Distributed.@spawnat` and `Distributed.@distributed`
 are complete, or at least one of them has errored. The first exception is immediately
 rethrown. It is the responsibility of the user to cancel any still-running operations
 during error handling.
 
+!!! Note
+    This is different to [`@sync`](@ref) in that errors from wrapped tasks are thrown immediately,
+    potentially before all tasks have returned.
+
 !!! Note
     This interface is experimental and subject to change or removal without notice.
 """
@@ -141,7 +144,7 @@ code to resort to runtime dispatch instead.
 Supported values are `1`, `2`, `3`, `4`, and `default` (currently equivalent to `3`).
 """
 macro max_methods(n::Int)
-    0 < n < 5 || error("We must have that `1 <= max_methods <= 4`, but `max_methods = $n`.")
+    1 <= n <= 4 || error("We must have that `1 <= max_methods <= 4`, but `max_methods = $n`.")
     return Expr(:meta, :max_methods, n)
 end
 
@@ -154,13 +157,13 @@ for max_methods. This setting is global for the entire generic function (or more
 the MethodTable).
 """
 macro max_methods(n::Int, fdef::Expr)
-    0 < n <= 255 || error("We must have that `1 <= max_methods <= 255`, but `max_methods = $n`.")
+    1 <= n <= 255 || error("We must have that `1 <= max_methods <= 255`, but `max_methods = $n`.")
     (fdef.head === :function && length(fdef.args) == 1) || error("Second argument must be a function forward declaration")
     return :(typeof($(esc(fdef))).name.max_methods = $(UInt8(n)))
 end
 
 """
-    Experimental.@compiler_options optimize={0,1,2,3} compile={yes,no,all,min} infer={yes,no} max_methods={default,1,2,3,...}
+    Experimental.@compiler_options optimize={0,1,2,3} compile={yes,no,all,min} infer={true,false} max_methods={default,1,2,3,4}
 
 Set compiler options for code in the enclosing module. Options correspond directly to
 command-line options with the same name, where applicable. The following options
@@ -193,7 +196,7 @@ macro compiler_options(args...)
             elseif ex.args[1] === :max_methods
                 a = ex.args[2]
                 a = a === :default ? 3 :
-                  a isa Int ? ((0 < a < 5) ? a : error("We must have that `1 <= max_methods <= 4`, but `max_methods = $a`.")) :
+                  a isa Int ? ((1 <= a <= 4) ? a : error("We must have that `1 <= max_methods <= 4`, but `max_methods = $a`.")) :
                   error("invalid argument to \"max_methods\" option")
                 push!(opts.args, Expr(:meta, :max_methods, a))
             else
@@ -252,7 +255,7 @@ When issuing a hint, the output should typically start with `\\n`.
 If you define custom exception types, your `showerror` method can
 support hints by calling [`Experimental.show_error_hints`](@ref).
 
-# Example
+# Examples
 
 ```
 julia> module Hinter
@@ -278,6 +281,7 @@ Then if you call `Hinter.only_int` on something that isn't an `Int` (thereby tri
 ```
 julia> Hinter.only_int(1.0)
 ERROR: MethodError: no method matching only_int(::Float64)
+The function `only_int` exists, but no method is defined for this combination of argument types.
 Did you mean to call `any_number`?
 Closest candidates are:
     ...
@@ -291,18 +295,18 @@ Closest candidates are:
     `if isdefined(Base.Experimental, :register_error_hint) ... end` block.
 """
 function register_error_hint(@nospecialize(handler), @nospecialize(exct::Type))
-    list = get!(Vector{Any}, _hint_handlers, exct)
-    push!(list, handler)
+    list = get!(Vector{Any}, _hint_handlers, Core.typename(exct))
+    push!(list, (exct, handler))
     return nothing
 end
 
-const _hint_handlers = IdDict{Type,Vector{Any}}()
+const _hint_handlers = IdDict{Core.TypeName,Vector{Any}}()
 
 """
     Experimental.show_error_hints(io, ex, args...)
 
 Invoke all handlers from [`Experimental.register_error_hint`](@ref) for the particular
-exception type `typeof(ex)`. `args` must contain any other arguments expected by
+exception type `typeof(ex)` and all of its supertypes. `args` must contain any other arguments expected by
 the handler for that type.
 
 !!! compat "Julia 1.5"
@@ -311,15 +315,21 @@ the handler for that type.
     This interface is experimental and subject to change or removal without notice.
 """
 function show_error_hints(io, ex, args...)
-    hinters = get(_hint_handlers, typeof(ex), nothing)
-    isnothing(hinters) && return
-    for handler in hinters
-        try
-            Base.invokelatest(handler, io, ex, args...)
-        catch err
-            tn = typeof(handler).name
-            @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error"
+    @nospecialize
+    ex_supertype = typeof(ex)
+    while ex_supertype != Any
+        hinters = get(_hint_handlers, Core.typename(ex_supertype), Any[])
+        for (exct, handler) in hinters
+            ex isa exct || continue
+            try
+                # TODO: deal with handlers accepting different signatures?
+                @invokelatest handler(io, ex, args...)
+            catch
+                tn = typeof(handler).name
+                @error "Hint-handler $handler for $(ex_supertype) in $(tn.module) caused an error" exception=current_exceptions()
+            end
         end
+        ex_supertype = supertype(ex_supertype)
     end
 end
 
@@ -327,27 +337,113 @@ end
 include("opaque_closure.jl")
 
 """
-    Experimental.@overlay mt [function def]
+    Base.Experimental.@overlay mt def
 
 Define a method and add it to the method table `mt` instead of to the global method table.
 This can be used to implement a method override mechanism. Regular compilation will not
 consider these methods, and you should customize the compilation flow to look in these
 method tables (e.g., using [`Core.Compiler.OverlayMethodTable`](@ref)).
 
+!!! note
+    Please be aware that when defining overlay methods using `@overlay`, it is not necessary
+    to have an original method that corresponds exactly in terms of how the method dispatches.
+    This means that the method overlay mechanism enabled by `@overlay` is not implemented by
+    replacing the methods themselves, but through an additional and prioritized method
+    lookup during the method dispatch.
+
+    Considering this, it is important to understand that in compilations using an overlay
+    method table like the following, the method dispatched by `callx(x)` is not the regular
+    method `callx(::Float64)`, but the overlay method `callx(x::Real)`:
+    ```julia
+    callx(::Real) = :real
+    @overlay SOME_OVERLAY_MT callx(::Real) = :overlay_real
+    callx(::Float64) = :float64
+
+    # some overlay callsite
+    let x::Float64
+        callx(x) #> :overlay_real
+    end
+    ```
 """
 macro overlay(mt, def)
-    def = macroexpand(__module__, def) # to expand @inline, @generated, etc
-    if !isexpr(def, [:function, :(=)])
-        error("@overlay requires a function Expr")
-    end
-    if isexpr(def.args[1], :call)
-        def.args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1])
-    elseif isexpr(def.args[1], :where)
-        def.args[1].args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1].args[1])
+    inner = Base.unwrap_macrocalls(def)
+    is_function_def(inner) || error("@overlay requires a function definition")
+    overlay_def!(mt, inner)
+    return esc(def)
+end
+
+"""
+    Base.Experimental.@consistent_overlay mt def
+
+This macro operates almost identically to [`Base.Experimental.@overlay`](@ref), defining a
+new overlay method. The key difference with this macro is that it informs the compiler that
+the invocation of the overlay method it defines is `:consistent` with a regular,
+non-overlayed method call.
+
+More formally, when evaluating a generic function call ``f(x)`` at a specific world age
+``i``, if a regular method call ``fᵢ(x)`` is redirected to an overlay method call ``fᵢ′(x)``
+defined by this macro, ``fᵢ(x)`` and ``fᵢ′(x)`` are considered `:consistent` if the following
+conditions are met:
+- If ``fᵢ(x)`` returns a value ``y``, then ``fᵢ′(x)`` also returns some value ``yᵢ``, and ``y ≡ yᵢ`` holds.
+- If ``fᵢ(x)`` throws an exception, then ``fᵢ′(x)`` also throws some exception.
+
+For a detailed definition of `:consistent`-cy, consult the corresponding section in
+[`Base.@assume_effects`](@ref).
+
+!!! note
+    Note that the requirements for `:consistent`-cy include not only that the return values
+    are egal, but also that the manner of termination is the same. However, it's important
+    to aware that when they throw exceptions, the exceptions themselves don't necessarily
+    have to be egal. In other words, if ``fᵢ(x)`` throws an exception, ``fᵢ′(x)`` is
+    required to also throw one, but the exact exceptions may differ.
+
+!!! note
+    Please note that the `:consistent`-cy requirement applies not to method itself but to
+    _method invocation_. This means that for the use of `@consistent_overlay`, it is
+    necessary for method invocations with the native regular compilation and those with
+    a compilation with overlay method table to be `:consistent`.
+
+    For example, it is important to understand that, `@consistent_overlay` can be used like
+    the following:
+    ```julia
+    callsin(x::Real) = x < 0 ? error(x) : sin(x)
+    @consistent_overlay SOME_OVERLAY_MT callsin(x::Float64) =
+        x < 0 ? error_somehow(x) : sin(x)
+    ```
+    However, be aware that this `@consistent_overlay` will immediately become invalid if a
+    new method for `callsin` is defined subsequently, such as:
+    ```julia
+    callsin(x::Float64) = cos(x)
+    ```
+
+    This specifically implies that the use of `@consistent_overlay` should be restricted as
+    much as possible to cases where a regular method with a concrete signature is replaced
+    by an overlay method with the same concrete signature.
+
+    This constraint is closely related to the note in [`Base.Experimental.@overlay`](@ref);
+    you are advised to consult that as well.
+"""
+macro consistent_overlay(mt, def)
+    inner = Base.unwrap_macrocalls(def)
+    is_function_def(inner) || error("@consistent_overlay requires a function definition")
+    overlay_def!(mt, inner)
+    override = Base.EffectsOverride(; consistent_overlay=true)
+    Base.pushmeta!(def::Expr, Base.form_purity_expr(override))
+    return esc(def)
+end
+
+function overlay_def!(mt, @nospecialize ex)
+    arg1 = ex.args[1]
+    if isexpr(arg1, :call)
+        arg1.args[1] = Expr(:overlay, mt, arg1.args[1])
+    elseif isexpr(arg1, :(::))
+        overlay_def!(mt, arg1)
+    elseif isexpr(arg1, :where)
+        overlay_def!(mt, arg1)
     else
-        error("@overlay requires a function Expr")
+        error("@overlay requires a function definition")
     end
-    esc(def)
+    return ex
 end
 
 let new_mt(name::Symbol, mod::Module) = begin
@@ -360,12 +456,389 @@ let new_mt(name::Symbol, mod::Module) = begin
 end
 
 """
-    Experimental.@MethodTable(name)
+    Base.Experimental.@MethodTable name
 
 Create a new MethodTable in the current module, bound to `name`. This method table can be
-used with the [`Experimental.@overlay`](@ref) macro to define methods for a function without
-adding them to the global method table.
+used with the [`Base.Experimental.@overlay`](@ref) macro to define methods for a function
+without adding them to the global method table.
 """
 :@MethodTable
 
+"""
+    Base.Experimental.make_io_thread()
+
+Create a new thread that will run the Julia IO loop. This can potentially reduce the latency of some
+IO operations as they no longer depend on the main thread to run it. This does mean that code that uses
+this as implicit synchronization needs to be checked for correctness.
+"""
+function make_io_thread()
+    tid = UInt[0]
+    threadwork = @cfunction function(arg::Ptr{Cvoid})
+            current_task().donenotify = Base.ThreadSynchronizer() #TODO: Should this happen by default in adopt thread?
+            Base.errormonitor(current_task()) # this may not go particularly well if the IO loop is dead, but try anyways
+            @ccall jl_set_io_loop_tid((Threads.threadid() - 1)::Int16)::Cvoid
+            wait() # spin uv_run as long as needed
+            nothing
+        end Cvoid (Ptr{Cvoid},)
+    err = @ccall uv_thread_create(tid::Ptr{UInt}, threadwork::Ptr{Cvoid}, C_NULL::Ptr{Cvoid})::Cint
+    err == 0 || Base.uv_error("uv_thread_create", err)
+    @ccall uv_thread_detach(tid::Ptr{UInt})::Cint
+    err == 0 || Base.uv_error("uv_thread_detach", err)
+    # n.b. this does not wait for the thread to start or to take ownership of the event loop
+end
+
+"""
+    Base.Experimental.entrypoint(f, argtypes::Tuple)
+
+Mark a method for inclusion when the `--trim` option is specified.
+"""
+function entrypoint(@nospecialize(f), @nospecialize(argtypes::Tuple))
+    entrypoint(Tuple{Core.Typeof(f), argtypes...})
+end
+
+function entrypoint(@nospecialize(argt::Type))
+    # Only add to entrypoint list if we're generating output and in trim mode
+    if ccall(:jl_generating_output, Cint, ()) != 0
+        Base.Compiler.add_entrypoint(argt)
+    end
+    nothing
+end
+
+"""
+    Base.Experimental.disable_new_worlds()
+
+Mark that no new worlds (methods additions, deletions, etc) are permitted to be created at
+any future time, allowing for lower latencies for some operations and slightly lower memory
+usage, by eliminating the tracking of those possible invalidation.
+"""
+disable_new_worlds() = ccall(:jl_disable_new_worlds, Cvoid, ())
+
+### Task metrics
+
+"""
+    Base.Experimental.task_metrics(::Bool)
+
+Enable or disable the collection of per-task metrics.
+A `Task` created when `Base.Experimental.task_metrics(true)` is in effect will have
+[`Base.Experimental.task_running_time_ns`](@ref) and [`Base.Experimental.task_wall_time_ns`](@ref)
+timing information available.
+
+!!! note
+    Task metrics can be enabled at start-up via the `--task-metrics=yes` command line option.
+"""
+function task_metrics(b::Bool)
+    if b
+        ccall(:jl_task_metrics_enable, Cvoid, ())
+    else
+        ccall(:jl_task_metrics_disable, Cvoid, ())
+    end
+    return nothing
 end
+
+"""
+    Base.Experimental.task_running_time_ns(t::Task)::Union{UInt64, Nothing}
+
+Return the total nanoseconds that the task `t` has spent running.
+This metric is only updated when `t` yields or completes unless `t` is the current task, in
+which it will be updated continuously.
+See also [`Base.Experimental.task_wall_time_ns`](@ref).
+
+Return `nothing` if task timings are not enabled.
+See [`Base.Experimental.task_metrics`](@ref).
+
+!!! note "This metric is from the Julia scheduler"
+    A task may be running on an OS thread that is descheduled by the OS
+    scheduler, this time still counts towards the metric.
+
+!!! compat "Julia 1.12"
+    This method was added in Julia 1.12.
+"""
+function task_running_time_ns(t::Task=current_task())
+    t.metrics_enabled || return nothing
+    if t == current_task()
+        # These metrics fields can't update while we're running.
+        # But since we're running we need to include the time since we last started running!
+        return t.running_time_ns + (time_ns() - t.last_started_running_at)
+    else
+        return t.running_time_ns
+    end
+end
+
+"""
+    Base.Experimental.task_wall_time_ns(t::Task)::Union{UInt64, Nothing}
+
+Return the total nanoseconds that the task `t` was runnable.
+This is the time since the task first entered the run queue until the time at which it
+completed, or until the current time if the task has not yet completed.
+See also [`Base.Experimental.task_running_time_ns`](@ref).
+
+Return `nothing` if task timings are not enabled.
+See [`Base.Experimental.task_metrics`](@ref).
+
+!!! compat "Julia 1.12"
+    This method was added in Julia 1.12.
+"""
+function task_wall_time_ns(t::Task=current_task())
+    t.metrics_enabled || return nothing
+    start_at = t.first_enqueued_at
+    start_at == 0 && return UInt64(0)
+    end_at = t.finished_at
+    end_at == 0 && return time_ns() - start_at
+    return end_at - start_at
+end
+
+# wait_with_timeout
+#
+# A version of `wait(c::Condition)` that additionally allows the
+# specification of a timeout. This is experimental as it will likely
+# be dropped when a cancellation framework is added.
+#
+# The parallel behavior of wait_with_timeout is specified here. There
+# are three concurrent entities that can interact:
+# 1. Task W: the task that calls wait_with_timeout.
+# 2. Task T: the task created to handle a timeout.
+# 3. Task N: the task that notifies the Condition being waited on.
+#
+# Typical flow:
+# - W enters the Condition's wait queue.
+# - W creates T and stops running (calls wait()).
+# - T, when scheduled, waits on a Timer.
+# - Two common outcomes:
+#   - N notifies the Condition.
+#     - W starts running, closes the Timer, sets waiter_left and returns
+#       the notify'ed value.
+#     - The closed Timer throws an EOFError to T which simply ends.
+#   - The Timer expires.
+#     - T starts running and locks the Condition.
+#     - T confirms that waiter_left is unset and that W is still in the
+#       Condition's wait queue; it then removes W from the wait queue,
+#       sets dosched to true and unlocks the Condition.
+#     - If dosched is true, T schedules W with the special :timed_out
+#       value.
+#     - T ends.
+#     - W runs and returns :timed_out.
+#
+# Some possible interleavings:
+# - N notifies the Condition but the Timer expires and T starts running
+#   before W:
+#   - W closing the expired Timer is benign.
+#   - T will find that W is no longer in the Condition's wait queue
+#     (which is protected by a lock) and will not schedule W.
+# - N notifies the Condition; W runs and calls wait on the Condition
+#   again before the Timer expires:
+#   - W sets waiter_left before leaving. When T runs, it will find that
+#     waiter_left is set and will not schedule W.
+#
+# The lock on the Condition's wait queue and waiter_left together
+# ensure proper synchronization and behavior of the tasks involved.
+
+"""
+    wait_with_timeout(c::GenericCondition; first::Bool=false, timeout::Real=0.0)
+
+Wait for [`notify`](@ref) on `c` and return the `val` parameter passed to `notify`.
+
+If the keyword `first` is set to `true`, the waiter will be put _first_
+in line to wake up on `notify`. Otherwise, `wait` has first-in-first-out (FIFO) behavior.
+
+If `timeout` is specified, cancel the `wait` when it expires and return
+`:timed_out`. The minimum value for `timeout` is 0.001 seconds, i.e. 1
+millisecond.
+"""
+function wait_with_timeout(c::GenericCondition; first::Bool=false, timeout::Real=0.0)
+    ct = current_task()
+    Base._wait2(c, ct, first)
+    token = Base.unlockall(c.lock)
+
+    timer::Union{Timer, Nothing} = nothing
+    waiter_left::Union{Threads.Atomic{Bool}, Nothing} = nothing
+    if timeout > 0.0
+        timer = Timer(timeout)
+        waiter_left = Threads.Atomic{Bool}(false)
+        # start a task to wait on the timer
+        t = Task() do
+            try
+                wait(timer)
+            catch e
+                # if the timer was closed, the waiting task has been scheduled; do nothing
+                e isa EOFError && return
+            end
+            dosched = false
+            lock(c.lock)
+            # Confirm that the waiting task is still in the wait queue and remove it. If
+            # the task is not in the wait queue, it must have been notified already so we
+            # don't do anything here.
+            if !waiter_left[] && ct.queue === c.waitq
+                dosched = true
+                Base.list_deletefirst!(c.waitq, ct)
+            end
+            unlock(c.lock)
+            # send the waiting task a timeout
+            dosched && schedule(ct, :timed_out)
+        end
+        t.sticky = false
+        Threads._spawn_set_thrpool(t, :interactive)
+        schedule(t)
+    end
+
+    try
+        res = wait()
+        if timer !== nothing
+            close(timer)
+            waiter_left[] = true
+        end
+        return res
+    catch
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
+        rethrow()
+    finally
+        Base.relockall(c.lock, token)
+    end
+end
+
+"""
+    Base.Experimental.@reexport using Module
+
+Automatically re-export all exported names from a module when using it.
+
+# Examples
+
+```jldoctest
+julia> module A
+           export foo
+           foo() = "foo from A"
+       end
+A
+
+julia> module B
+           using Base.Experimental: @reexport
+           @reexport using ..A
+           # Now B exports foo, even though it's defined in A
+       end
+B
+
+julia> using .B
+
+julia> foo()
+"foo from A"
+```
+
+!!! warning
+    This interface is experimental and subject to change or removal without notice.
+"""
+macro reexport(ex)
+    if !Meta.isexpr(ex, :using) || isempty(ex.args)
+        error("@reexport must be used with a `using` statement, e.g., `@reexport using MyModule`")
+    end
+
+    # Check for `using Foo: x, y` syntax (not supported)
+    if any(arg -> Meta.isexpr(arg, :(:)), ex.args)
+        error("@reexport does not support `using Module: names` syntax")
+    end
+
+    # Generate _eval_using calls for each module in the using statement
+    calls = Expr(:block)
+    for mod_path in ex.args
+        push!(calls.args, :($(Core._eval_using)($(__module__), $(QuoteNode(mod_path)), $(Base.JL_MODULE_USING_REEXPORT))))
+    end
+    push!(calls.args, Expr(:latestworld))
+    push!(calls.args, :nothing)
+
+    return esc(calls)
+end
+
+struct VersionedParse
+    ver::VersionNumber
+end
+
+function (vp::VersionedParse)(code, filename::String, lineno::Int, offset::Int, options::Symbol)
+    if !isdefined(Base, :JuliaSyntax)
+        if vp.ver === VERSION
+            return Core._parse
+        end
+        error("JuliaSyntax module is required for syntax version $(vp.ver), but it is not loaded.")
+    end
+    Base.JuliaSyntax.core_parser_hook(code, filename, lineno, offset, options; syntax_version=vp.ver)
+end
+
+struct VersionedLower
+    ver::VersionNumber
+end
+
+function (vp::VersionedLower)(@nospecialize(code), mod::Module,
+                              file="none", line=0, world=typemax(Csize_t), warn=false)
+    if !isdefined(Base, :JuliaLowering)
+        if vp.ver === VERSION
+            return Core._parse
+        end
+        error("JuliaLowering module is required for syntax version $(vp.ver), but it is not loaded.")
+    end
+    Base.JuliaLowering.core_lowering_hook(code, filename, lineno, offset, options; syntax_version=vp.ver)
+end
+
+function Base.set_syntax_version(m::Module, ver::VersionNumber)
+    parser = VersionedParse(ver)
+    Core.declare_const(m, :_internal_julia_parse, parser)
+    #lowerer = VersionedLower(ver)
+    #Core.declare_const(m, :_internal_julia_lower, lowerer)
+    nothing
+end
+
+"""
+    Base.Experimental.@set_syntax_version ver
+
+Sets the syntax version to the current module to `ver`. This overrides settings of `syntax.julia_version` or
+`compat.julia` from Project.toml.
+
+!!! compat "Julia 1.14"
+    This macro was added in Julia 1.14.
+
+!!! warning
+    The new syntax version will take effect only for code parsed after the *invocation* of the result of the macro
+    expansion. This may be unintuitive if the macro is used inside a module body, as the entire module will be parsed
+    before any statements therein are executed, e.g. consider.
+
+    ```
+    @set_syntax_version v"1.13"
+    module ChangeSyntax
+        @set_syntax_version v"1.14"
+        expr1 # Parsed with syntax version 1.13
+     # The call itself is parsed with syntax version 1.13, but the included code is parsed with syntax version 1.14
+        include_string(ChangeSyntax, "expr2")
+        expr3 # Parsed with syntax version 1.13
+    end
+    ```
+
+    For this reason, the Project.toml mechanism is strongly preferred for packages.
+    However, this macro may be useful for scripts or the REPL.
+
+!!! warning
+    This interface is experimental and subject to change or removal without notice.
+"""
+macro set_syntax_version(ver)
+    Expr(:call, Base.set_syntax_version, __module__, esc(ver))
+end
+
+"""
+    Base.Experimental.@VERSION ver
+
+This macro provides access to parser (and possibly in the future other frontend component) language version
+information. In particular, `(@VERSION).syntax` provides the syntax version used to parse the location where the macro is invoked.
+
+!!! compat "Julia 1.14"
+    This macro was added in Julia 1.14.
+
+!!! note
+    Calls to this macro have special handling in the parser and the name `@VERSION` is mandatory. At this time, other macros do not
+    have access to source syntax version information.
+"""
+function var"@VERSION"(__source__::Union{LineNumberNode, Core.MacroSource}, __module__::Module)
+    # This macro has special handling in the parser, which puts the current syntax
+    # version into __source__.
+    if isa(__source__, LineNumberNode)
+        return :((; syntax = v"1.13", runtime = VERSION))
+    else
+        return :((; syntax = $(__source__.syntax_ver), runtime = VERSION))
+    end
+end
+
+end # module
diff --git a/base/exports.jl b/base/exports.jl
index cda1937253ef4..11de257880ad8 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -1,5 +1,44 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# Re-exports from `Core`
+export Core,
+    # key types
+    Any, DataType, Vararg, NTuple,
+    Tuple, Type, UnionAll, TypeVar, Union, Nothing, Cvoid,
+    AbstractArray, DenseArray, NamedTuple, Pair,
+    # special objects
+    Function, Method, Module, Symbol, Task, UndefInitializer, undef, WeakRef, VecElement,
+    Array, Memory, MemoryRef, AtomicMemory, AtomicMemoryRef, GenericMemory, GenericMemoryRef,
+    # numeric types
+    Number, Real, Integer, Bool, Ref, Ptr,
+    AbstractFloat, Float16, Float32, Float64,
+    Signed, Int, Int8, Int16, Int32, Int64, Int128,
+    Unsigned, UInt, UInt8, UInt16, UInt32, UInt64, UInt128,
+    # string types
+    AbstractChar, Char, AbstractString, String, IO,
+    # errors
+    ErrorException, BoundsError, DivideError, DomainError, Exception,
+    InterruptException, InexactError, OutOfMemoryError, ReadOnlyMemoryError,
+    OverflowError, StackOverflowError, SegmentationFault, UndefRefError, UndefVarError,
+    TypeError, ArgumentError, MethodError, AssertionError, LoadError, InitError,
+    UndefKeywordError, ConcurrencyViolationError, FieldError,
+    # AST representation
+    Expr, QuoteNode, LineNumberNode, GlobalRef,
+    # object model functions
+    fieldtype, getfield, setfield!, swapfield!, modifyfield!, replacefield!, setfieldonce!,
+    nfields, throw, tuple, ===, isdefined,
+    # access to globals
+    getglobal, setglobal!, swapglobal!, modifyglobal!, replaceglobal!, setglobalonce!, isdefinedglobal,
+    # ifelse, sizeof    # not exported, to avoid conflicting with Base
+    # type reflection
+    <:, typeof, isa, typeassert,
+    # method reflection
+    applicable, invoke,
+    # constants
+    nothing, Main,
+    # backwards compatibility
+    arrayref, arrayset, arraysize, const_arrayref
+
 export
 # Modules
     Meta,
@@ -58,6 +97,7 @@ export
     IOBuffer,
     IOStream,
     LinRange,
+    Lockable,
     Irrational,
     LazyString,
     Matrix,
@@ -65,9 +105,13 @@ export
     Missing,
     NTuple,
     IdDict,
+    IdSet,
     OrdinalRange,
     Pair,
     PartialQuickSort,
+    OncePerProcess,
+    OncePerTask,
+    OncePerThread,
     PermutedDimsArray,
     QuickSort,
     Rational,
@@ -294,7 +338,9 @@ export
     isinf,
     isinteger,
     isnan,
+    isnegative,
     isodd,
+    ispositive,
     ispow2,
     isqrt,
     isreal,
@@ -405,12 +451,14 @@ export
     indexin,
     argmax,
     argmin,
+    insertdims,
     invperm,
     invpermute!,
     isassigned,
     isperm,
     issorted,
     last,
+    logrange,
     mapslices,
     max,
     maximum!,
@@ -532,6 +580,7 @@ export
     getkey,
     haskey,
     in,
+    in!,
     intersect!,
     intersect,
     isdisjoint,
@@ -546,6 +595,7 @@ export
     mapfoldl,
     mapfoldr,
     mapreduce,
+    memoryref,
     merge!,
     mergewith!,
     merge,
@@ -590,9 +640,11 @@ export
     codepoint,
     codeunit,
     codeunits,
+    ctruncate,
     digits,
     digits!,
     eachsplit,
+    eachrsplit,
     escape_string,
     hex2bytes,
     hex2bytes!,
@@ -613,6 +665,7 @@ export
     join,
     lpad,
     lstrip,
+    ltruncate,
     ncodeunits,
     ndigits,
     nextind,
@@ -625,9 +678,11 @@ export
     rpad,
     rsplit,
     rstrip,
+    rtruncate,
     split,
     string,
     strip,
+    takestring!,
     textwidth,
     thisind,
     titlecase,
@@ -702,6 +757,8 @@ export
     yield,
     yieldto,
     wait,
+    waitany,
+    waitall,
     timedwait,
     asyncmap,
     asyncmap!,
@@ -710,6 +767,7 @@ export
 # channels
     take!,
     put!,
+    isfull,
     isready,
     fetch,
     bind,
@@ -749,9 +807,11 @@ export
     swapproperty!,
     modifyproperty!,
     replaceproperty!,
+    setpropertyonce!,
     fieldoffset,
     fieldname,
     fieldnames,
+    fieldindex,
     fieldcount,
     fieldtypes,
     hasfield,
@@ -778,6 +838,7 @@ export
     gensym,
     @kwdef,
     macroexpand,
+    macroexpand!,
     @macroexpand1,
     @macroexpand,
     parse,
@@ -802,6 +863,7 @@ export
     @invoke,
     invokelatest,
     @invokelatest,
+    @world,
 
 # loading source files
     __precompile__,
@@ -925,6 +987,7 @@ export
     isblockdev,
     ischardev,
     isdir,
+    isexecutable,
     isfifo,
     isfile,
     islink,
@@ -967,6 +1030,8 @@ export
     setenv,
     addenv,
     setcpuaffinity,
+    setuid,
+    setgid,
     success,
     withenv,
 
@@ -998,6 +1063,7 @@ export
     @__DIR__,
     @__LINE__,
     @__MODULE__,
+    @__FUNCTION__,
     @int128_str,
     @uint128_str,
     @big_str,
@@ -1029,6 +1095,7 @@ export
     @elapsed,
     @allocated,
     @allocations,
+    @lock_conflicts,
 
     # tasks
     @sync,
@@ -1057,10 +1124,13 @@ export
     @atomic,
     @atomicswap,
     @atomicreplace,
+    @atomiconce,
     @__dot__,
     @enum,
     @label,
     @goto,
     @view,
     @views,
-    @static
+    @static,
+
+    @main
diff --git a/base/expr.jl b/base/expr.jl
index 234085d9c9774..eaa04aa0a0226 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -9,7 +9,11 @@ const is_expr = isexpr
 """
     gensym([tag])
 
-Generates a symbol which will not conflict with other variable names (in the same module).
+Generate a symbol unique among all calls to this function within the same process.
+If a string or symbol tag argument is specified, it is included in the generated name.
+
+Note that packages may be precompiled in separate processes, so names will not be unique
+between definition time and run time.
 """
 gensym() = ccall(:jl_gensym, Ref{Symbol}, ())
 
@@ -19,10 +23,10 @@ gensym(ss::String...) = map(gensym, ss)
 gensym(s::Symbol) = ccall(:jl_tagged_gensym, Ref{Symbol}, (Ptr{UInt8}, Csize_t), s, -1 % Csize_t)
 
 """
-    @gensym
+    @gensym var1 var2 ...
 
-Generates a gensym symbol for a variable. For example, `@gensym x y` is transformed into
-`x = gensym("x"); y = gensym("y")`.
+Generate symbols with [`gensym`](@ref) and assign them to the given variables.
+For example, `@gensym x y` is transformed into `x = gensym("x"); y = gensym("y")`.
 """
 macro gensym(names...)
     blk = Expr(:block)
@@ -39,33 +43,40 @@ isexpr(@nospecialize(ex), head::Symbol) = isa(ex, Expr) && ex.head === head
 isexpr(@nospecialize(ex), head::Symbol, n::Int) = isa(ex, Expr) && ex.head === head && length(ex.args) == n
 
 copy(e::Expr) = exprarray(e.head, copy_exprargs(e.args))
+function copy(x::PhiNode)
+    values = x.values
+    nvalues = length(values)
+    new_values = Vector{Any}(undef, nvalues)
+    @inbounds for i = 1:nvalues
+        isassigned(values, i) || continue
+        new_values[i] = copy_exprs(values[i])
+    end
+    return PhiNode(copy(x.edges), new_values)
+end
+function copy(x::PhiCNode)
+    values = x.values
+    nvalues = length(values)
+    new_values = Vector{Any}(undef, nvalues)
+    @inbounds for i = 1:nvalues
+        isassigned(values, i) || continue
+        new_values[i] = copy_exprs(values[i])
+    end
+    return PhiCNode(new_values)
+end
 
-# copy parts of an AST that the compiler mutates
+# copy parts of an IR that the compiler mutates
+# (this is not a general-purpose copy for an Expr AST)
 function copy_exprs(@nospecialize(x))
     if isa(x, Expr)
         return copy(x)
     elseif isa(x, PhiNode)
-        values = x.values
-        nvalues = length(values)
-        new_values = Vector{Any}(undef, nvalues)
-        @inbounds for i = 1:nvalues
-            isassigned(values, i) || continue
-            new_values[i] = copy_exprs(values[i])
-        end
-        return PhiNode(copy(x.edges), new_values)
+        return copy(x)
     elseif isa(x, PhiCNode)
-        values = x.values
-        nvalues = length(values)
-        new_values = Vector{Any}(undef, nvalues)
-        @inbounds for i = 1:nvalues
-            isassigned(values, i) || continue
-            new_values[i] = copy_exprs(values[i])
-        end
-        return PhiCNode(new_values)
+        return copy(x)
     end
     return x
 end
-copy_exprargs(x::Array{Any,1}) = Any[copy_exprs(@inbounds x[i]) for i in 1:length(x)]
+copy_exprargs(x::Array{Any,1}) = Any[copy_exprs(@inbounds x[i]) for i in eachindex(x)]
 
 @eval exprarray(head::Symbol, arg::Array{Any,1}) = $(Expr(:new, :Expr, :head, :arg))
 
@@ -76,30 +87,105 @@ function copy(c::CodeInfo)
     cnew.slotnames = copy(cnew.slotnames)
     cnew.slotflags = copy(cnew.slotflags)
     if cnew.slottypes !== nothing
-        cnew.slottypes = copy(cnew.slottypes)
+        cnew.slottypes = copy(cnew.slottypes::Vector{Any})
     end
-    cnew.codelocs  = copy(cnew.codelocs)
-    cnew.linetable = copy(cnew.linetable::Union{Vector{Any},Vector{Core.LineInfoNode}})
     cnew.ssaflags  = copy(cnew.ssaflags)
-    cnew.edges     = cnew.edges === nothing ? nothing : copy(cnew.edges::Vector)
+    cnew.edges     = cnew.edges === nothing || cnew.edges isa Core.SimpleVector ? cnew.edges : copy(cnew.edges::Vector)
     ssavaluetypes  = cnew.ssavaluetypes
     ssavaluetypes isa Vector{Any} && (cnew.ssavaluetypes = copy(ssavaluetypes))
     return cnew
 end
 
+function isequal_exprarg(@nospecialize(x), @nospecialize(y))
+    x isa typeof(y) || return false
+    x === y && return true
+    # c.f. list of types in copy_expr also
+    if x isa Expr
+        x == (y::Expr) && return true
+    elseif x isa QuoteNode
+        x == (y::QuoteNode) && return true
+    elseif x isa PhiNode
+        x == (y::PhiNode) && return true
+    elseif x isa PhiCNode
+        x == (y::PhiCNode) && return true
+    elseif x isa CodeInfo
+        x == (y::CodeInfo) && return true
+    end
+    return false
+end
+
 
-==(x::Expr, y::Expr) = x.head === y.head && isequal(x.args, y.args)
-==(x::QuoteNode, y::QuoteNode) = isequal(x.value, y.value)
-==(stmt1::Core.PhiNode, stmt2::Core.PhiNode) = stmt1.edges == stmt2.edges && stmt1.values == stmt2.values
+function isequal_exprargs(x::Array{Any,1}, y::Array{Any,1})
+    l = length(x)
+    l == length(y) || return false
+    for i = 1:l
+        if !isassigned(x, i)
+            # phi and phic values are permitted to be undef
+            isassigned(y, i) && return false
+        else
+            isassigned(y, i) || return false
+            isequal_exprarg(x[i], y[i]) || return false
+        end
+    end
+    return true
+end
+
+# define == such that == inputs to parsing (including line numbers) yield == outputs from lowering (including all metadata)
+# (aside from cases where parsing just returns a number, which are ambiguous here)
+==(x::Expr, y::Expr) = x.head === y.head && isequal_exprargs(x.args, y.args)
+
+==(x::QuoteNode, y::QuoteNode) = isequal_exprarg(x.value, y.value)
+
+==(stmt1::Core.PhiNode, stmt2::Core.PhiNode) = isequal(stmt1.edges, stmt2.edges) && isequal_exprargs(stmt1.values, stmt2.values)
+
+==(stmt1::Core.PhiCNode, stmt2::Core.PhiCNode) = isequal_exprargs(stmt1.values, stmt2.values)
+
+function ==(stmt1::CodeInfo, stmt2::CodeInfo)
+    for i in 1:nfields(stmt1)
+        if !isdefined(stmt1, i)
+            isdefined(stmt2, i) && return false
+        else
+            isdefined(stmt2, i) || return false
+            f1 = getfield(stmt1, i)
+            f2 = getfield(stmt2, i)
+            f1 isa typeof(f2) || return false
+            if f1 isa Vector{Any}
+                # code or types vectors
+                isequal_exprargs(f1, f2::Vector{Any}) || return false
+            elseif f1 isa DebugInfo
+                f1 == f2::DebugInfo || return false
+            elseif f1 isa Vector
+                # misc data
+                l = length(f1)
+                l == length(f2::Vector) || return false
+                for i = 1:l
+                    f1[i] === f2[i] || return false
+                end
+            else
+                # misc fields
+                f1 === f2 || return false
+            end
+        end
+    end
+    return true
+end
+
+function ==(x::DebugInfo, y::DebugInfo)
+    for i in 1:nfields(x)
+        getfield(x, i) == getfield(y, i) || return false
+    end
+    return true
+end
 
 """
-    macroexpand(m::Module, x; recursive=true)
+    macroexpand(m::Module, x; recursive=true, legacyscope=true)
 
 Take the expression `x` and return an equivalent expression with all macros removed (expanded)
 for executing in module `m`.
 The `recursive` keyword controls whether deeper levels of nested macros are also expanded.
+The `legacyscope` keyword controls whether legacy macroscope expansion is performed.
 This is demonstrated in the example below:
-```julia-repl
+```jldoctest; filter = r"#= .*:6 =#"
 julia> module M
            macro m1()
                42
@@ -114,21 +200,38 @@ julia> macroexpand(M, :(@m2()), recursive=true)
 42
 
 julia> macroexpand(M, :(@m2()), recursive=false)
-:(#= REPL[16]:6 =# M.@m1)
+:(#= REPL[1]:6 =# @m1)
 ```
 """
-function macroexpand(m::Module, @nospecialize(x); recursive=true)
-    if recursive
-        ccall(:jl_macroexpand, Any, (Any, Any), x, m)
-    else
-        ccall(:jl_macroexpand1, Any, (Any, Any), x, m)
-    end
+function macroexpand(m::Module, @nospecialize(x); recursive=true, legacyscope=true)
+    ccall(:jl_macroexpand, Any, (Any, Any, Cint, Cint, Cint), x, m, recursive, false, legacyscope)
 end
 
 """
-    @macroexpand
+    macroexpand!(m::Module, x; recursive=true, legacyscope=false)
+
+Take the expression `x` and return an equivalent expression with all macros removed (expanded)
+for executing in module `m`, modifying `x` in place without copying.
+The `recursive` keyword controls whether deeper levels of nested macros are also expanded.
+The `legacyscope` keyword controls whether legacy macroscope expansion is performed.
+
+This function performs macro expansion without the initial copy step, making it more efficient
+when the original expression is no longer needed. By default, macroscope expansion is disabled
+for in-place expansion as it can be called separately if needed.
+
+!!! warning
+    This function modifies the input expression `x` in place. Use `macroexpand` if you need
+    to preserve the original expression.
+"""
+function macroexpand!(m::Module, @nospecialize(x); recursive=true, legacyscope=false)
+    ccall(:jl_macroexpand, Any, (Any, Any, Cint, Cint, Cint), x, m, recursive, true, legacyscope)
+end
+
+"""
+    @macroexpand [mod,] ex
 
 Return equivalent expression with all macros removed (expanded).
+If two arguments are provided, the first is the module to evaluate in.
 
 There are differences between `@macroexpand` and [`macroexpand`](@ref).
 
@@ -139,7 +242,7 @@ There are differences between `@macroexpand` and [`macroexpand`](@ref).
   expands with respect to the module in which it is called.
 
 This is best seen in the following example:
-```julia-repl
+```jldoctest
 julia> module M
            macro m()
                1
@@ -147,7 +250,7 @@ julia> module M
            function f()
                (@macroexpand(@m),
                 macroexpand(M, :(@m)),
-                macroexpand(Main, :(@m))
+                macroexpand(parentmodule(M), :(@m))
                )
            end
        end
@@ -163,19 +266,27 @@ julia> M.f()
 ```
 With `@macroexpand` the expression expands where `@macroexpand` appears in the code (module `M` in the example).
 With `macroexpand` the expression expands in the module given as the first argument.
+
+!!! compat "Julia 1.11"
+    The two-argument form requires at least Julia 1.11.
 """
 macro macroexpand(code)
-    return :(macroexpand($__module__, $(QuoteNode(code)), recursive=true))
+    return :(macroexpand($__module__, $(QuoteNode(code)); recursive=true, legacyscope=true))
+end
+macro macroexpand(mod, code)
+    return :(macroexpand($(esc(mod)), $(QuoteNode(code)); recursive=true, legacyscope=true))
 end
-
 
 """
-    @macroexpand1
+    @macroexpand1 [mod,] ex
 
 Non recursive version of [`@macroexpand`](@ref).
 """
 macro macroexpand1(code)
-    return :(macroexpand($__module__, $(QuoteNode(code)), recursive=false))
+    return :(macroexpand($__module__, $(QuoteNode(code)); recursive=false, legacyscope=true))
+end
+macro macroexpand1(mod, code)
+    return :(macroexpand($(esc(mod)), $(QuoteNode(code)); recursive=false, legacyscope=true))
 end
 
 ## misc syntax ##
@@ -243,6 +354,14 @@ Give a hint to the compiler that calls within `block` are worth inlining.
     end
     ```
 
+!!! note
+    The callsite annotation applies to all calls in the block, including function arguments
+    that are themselves calls:
+    ```julia
+    # The compiler will not inline `getproperty`, `g` or `f`
+    @noinline f(x.inner, g(y))
+    ```
+
 !!! note
     When there are nested callsite annotations, the innermost annotation has the precedence:
     ```julia
@@ -343,39 +462,39 @@ macro noinline(x)
 end
 
 """
-    @constprop setting [ex]
+    Base.@constprop setting [ex]
 
 Control the mode of interprocedural constant propagation for the annotated function.
 
 Two `setting`s are supported:
 
-- `@constprop :aggressive [ex]`: apply constant propagation aggressively.
+- `Base.@constprop :aggressive [ex]`: apply constant propagation aggressively.
   For a method where the return type depends on the value of the arguments,
   this can yield improved inference results at the cost of additional compile time.
-- `@constprop :none [ex]`: disable constant propagation. This can reduce compile
+- `Base.@constprop :none [ex]`: disable constant propagation. This can reduce compile
   times for functions that Julia might otherwise deem worthy of constant-propagation.
   Common cases are for functions with `Bool`- or `Symbol`-valued arguments or keyword arguments.
 
-`@constprop` can be applied immediately before a function definition or within a function body.
+`Base.@constprop` can be applied immediately before a function definition or within a function body.
 
 ```julia
 # annotate long-form definition
-@constprop :aggressive function longdef(x)
-  ...
+Base.@constprop :aggressive function longdef(x)
+    ...
 end
 
 # annotate short-form definition
-@constprop :aggressive shortdef(x) = ...
+Base.@constprop :aggressive shortdef(x) = ...
 
 # annotate anonymous function that a `do` block creates
 f() do
-    @constprop :aggressive
+    Base.@constprop :aggressive
     ...
 end
 ```
 
 !!! compat "Julia 1.10"
-  The usage within a function body requires at least Julia 1.10.
+    The usage within a function body requires at least Julia 1.10.
 """
 macro constprop(setting, ex)
     sym = constprop_setting(setting)
@@ -388,71 +507,93 @@ macro constprop(setting)
 end
 
 function constprop_setting(@nospecialize setting)
+    s = setting
     isa(setting, QuoteNode) && (setting = setting.value)
     if setting === :aggressive
         return :aggressive_constprop
     elseif setting === :none
         return :no_constprop
     end
-    throw(ArgumentError(LazyString("@constprop "), setting, "not supported"))
+    throw(ArgumentError(LazyString("`Base.@constprop ", s, "` not supported")))
 end
 
 """
-    @assume_effects setting... [ex]
+    Base.@assume_effects setting... [ex]
 
-Override the compiler's effect modeling for the given method or foreign call.
-`@assume_effects` can be applied immediately before a function definition or within a function body.
-It can also be applied immediately before a `@ccall` expression.
-
-!!! compat "Julia 1.8"
-    Using `Base.@assume_effects` requires Julia version 1.8.
+Override the compiler's effect modeling.
+This macro can be used in several contexts:
+1. Immediately before a method definition, to override the entire effect modeling of the applied method.
+2. Within a function body without any arguments, to override the entire effect modeling of the enclosing method.
+3. Applied to a code block, to override the local effect modeling of the applied code block.
 
 # Examples
 ```jldoctest
-julia> Base.@assume_effects :terminates_locally function pow(x)
-           # this :terminates_locally allows `pow` to be constant-folded
+julia> Base.@assume_effects :terminates_locally function fact(x)
+           # usage 1:
+           # this :terminates_locally allows `fact` to be constant-folded
            res = 1
-           1 < x < 20 || error("bad pow")
+           0 ≤ x < 20 || error("bad fact")
            while x > 1
                res *= x
                x -= 1
            end
            return res
        end
-pow (generic function with 1 method)
+fact (generic function with 1 method)
 
 julia> code_typed() do
-           pow(12)
-       end
-1-element Vector{Any}:
- CodeInfo(
+           fact(12)
+       end |> only
+CodeInfo(
 1 ─     return 479001600
 ) => Int64
 
 julia> code_typed() do
            map((2,3,4)) do x
+               # usage 2:
                # this :terminates_locally allows this anonymous function to be constant-folded
                Base.@assume_effects :terminates_locally
                res = 1
-               1 < x < 20 || error("bad pow")
+               0 ≤ x < 20 || error("bad fact")
                while x > 1
                    res *= x
                    x -= 1
                end
                return res
            end
-       end
-1-element Vector{Any}:
- CodeInfo(
+       end |> only
+CodeInfo(
 1 ─     return (2, 6, 24)
 ) => Tuple{Int64, Int64, Int64}
 
-julia> Base.@assume_effects :total !:nothrow @ccall jl_type_intersection(Vector{Int}::Any, Vector{<:Integer}::Any)::Any
-Vector{Int64} (alias for Array{Int64, 1})
+julia> code_typed() do
+           map((2,3,4)) do x
+               res = 1
+               0 ≤ x < 20 || error("bad fact")
+               # usage 3:
+               # with this :terminates_locally annotation the compiler skips tainting
+               # `:terminates` effect within this `while` block, allowing the parent
+               # anonymous function to be constant-folded
+               Base.@assume_effects :terminates_locally while x > 1
+                   res *= x
+                   x -= 1
+               end
+               return res
+           end
+       end |> only
+CodeInfo(
+1 ─     return (2, 6, 24)
+) => Tuple{Int64, Int64, Int64}
 ```
 
+!!! compat "Julia 1.8"
+    Using `Base.@assume_effects` requires Julia version 1.8.
+
 !!! compat "Julia 1.10"
-  The usage within a function body requires at least Julia 1.10.
+    The usage within a function body requires at least Julia 1.10.
+
+!!! compat "Julia 1.11"
+    The code block annotation requires at least Julia 1.11.
 
 !!! warning
     Improper use of this macro causes undefined behavior (including crashes,
@@ -475,6 +616,9 @@ The following `setting`s are supported.
 - `:terminates_locally`
 - `:notaskstate`
 - `:inaccessiblememonly`
+- `:noub`
+- `:noub_if_noinbounds`
+- `:nortcall`
 - `:foldable`
 - `:removable`
 - `:total`
@@ -494,16 +638,20 @@ The `:consistent` setting asserts that for egal (`===`) inputs:
     contents) are not egal.
 
 !!! note
-    The `:consistent`-cy assertion is made world-age wise. More formally, write
-    ``fᵢ`` for the evaluation of ``f`` in world-age ``i``, then we require:
+    The `:consistent`-cy assertion is made with respect to a particular world range `R`.
+    More formally, write ``fᵢ`` for the evaluation of ``f`` in world-age ``i``, then this setting requires:
     ```math
-    ∀ i, x, y: x ≡ y → fᵢ(x) ≡ fᵢ(y)
+    ∀ i ∈ R, j ∈ R, x, y: x ≡ y → fᵢ(x) ≡ fⱼ(y)
     ```
-    However, for two world ages ``i``, ``j`` s.t. ``i ≠ j``, we may have ``fᵢ(x) ≢ fⱼ(y)``.
+
+    For `@assume_effects`, the range `R` is `m.primary_world:m.deleted_world` of
+    the annotated or containing method.
+
+    For ordinary code instances, `R` is `ci.min_world:ci.max_world`.
 
     A further implication is that `:consistent` functions may not make their
     return value dependent on the state of the heap or any other global state
-    that is not constant for a given world age.
+    that is not constant over the given world age range.
 
 !!! note
     The `:consistent`-cy includes all legal rewrites performed by the optimizer.
@@ -512,13 +660,6 @@ The `:consistent` setting asserts that for egal (`===`) inputs:
     even for the same world age (e.g. because one ran in the interpreter, while
     the other was optimized).
 
-!!! note
-    The `:consistent`-cy assertion currently includes the assertion that the function
-    will not execute any undefined behavior (for any input). Note that undefined behavior
-    may technically cause the function to violate other effect assertions (such as
-    `:nothrow` or `:effect_free`) as well, but we do not model this, and all effects
-    except `:consistent` assume the absence of undefined behavior.
-
 !!! note
     If `:consistent` functions terminate by throwing an exception, that exception
     itself is not required to meet the egality requirement specified above.
@@ -556,7 +697,7 @@ were not executed.
 ---
 ## `:nothrow`
 
-The `:nothrow` settings asserts that this method does not terminate abnormally
+The `:nothrow` settings asserts that this method does not throw an exception
 (i.e. will either always return a value or never return).
 
 !!! note
@@ -565,7 +706,11 @@ The `:nothrow` settings asserts that this method does not terminate abnormally
     method itself.
 
 !!! note
-    `MethodErrors` and similar exceptions count as abnormal termination.
+    If the execution of a method may raise `MethodError`s and similar exceptions, then
+    the method is not considered as `:nothrow`.
+    However, note that environment-dependent errors like `StackOverflowError` or `InterruptException`
+    are not modeled by this effect and thus a method that may result in `StackOverflowError`
+    does not necessarily need to be `!:nothrow` (although it should usually be `!:terminates` too).
 
 ---
 ## `:terminates_globally`
@@ -578,7 +723,7 @@ The `:terminates_globally` settings asserts that this method will eventually ter
 
 !!! note
     The compiler will consider this a strong indication that the method will
-    terminate relatively *quickly* and may (if otherwise legal), call this
+    terminate relatively *quickly* and may (if otherwise legal) call this
     method at compile time. I.e. it is a bad idea to annotate this setting
     on a method that *technically*, but not *practically*, terminates.
 
@@ -638,6 +783,28 @@ global state or mutable memory pointed to by its arguments.
 !!! note
     This `:inaccessiblememonly` assertion covers any other methods called by the annotated method.
 
+---
+## `:noub`
+
+The `:noub` setting asserts that the method will not execute any undefined behavior
+(for any input). Note that undefined behavior may technically cause the method to violate
+any other effect assertions (such as `:consistent` or `:effect_free`) as well, but we do
+not model this, and they assume the absence of undefined behavior.
+
+---
+## `:nortcall`
+
+The `:nortcall` setting asserts that the method does not call `Core.Compiler.return_type`,
+and that any other methods this method might call also do not call `Core.Compiler.return_type`.
+
+!!! note
+    To be precise, this assertion can be used when a call to `Core.Compiler.return_type` is
+    not made at runtime; that is, when the result of `Core.Compiler.return_type` is known
+    exactly at compile time and the call is eliminated by the optimizer. However, since
+    whether the result of `Core.Compiler.return_type` is folded at compile time depends
+    heavily on the compiler's implementation, it is generally risky to assert this if
+    the method in question uses `Core.Compiler.return_type` in any form.
+
 ---
 ## `:foldable`
 
@@ -647,6 +814,8 @@ currently equivalent to the following `setting`s:
 - `:consistent`
 - `:effect_free`
 - `:terminates_globally`
+- `:noub`
+- `:nortcall`
 
 !!! note
     This list in particular does not include `:nothrow`. The compiler will still
@@ -679,6 +848,8 @@ the following other `setting`s:
 - `:terminates_globally`
 - `:notaskstate`
 - `:inaccessiblememonly`
+- `:noub`
+- `:nortcall`
 
 !!! warning
     `:total` is a very strong assertion and will likely gain additional semantics
@@ -698,68 +869,159 @@ the call is generally total, it may however throw.
 """
 macro assume_effects(args...)
     lastex = args[end]
-    inner = unwrap_macrocalls(lastex)
-    if is_function_def(inner)
-        ex = lastex
-        idx = length(args)-1
+    override = compute_assumed_settings(args[begin:end-1])
+    if is_function_def(unwrap_macrocalls(lastex))
+        return esc(pushmeta!(lastex::Expr, form_purity_expr(override)))
     elseif isexpr(lastex, :macrocall) && lastex.args[1] === Symbol("@ccall")
-        ex = lastex
-        idx = length(args)-1
-    else # anonymous function case
-        ex = nothing
-        idx = length(args)
+        lastex.args[1] = GlobalRef(Base, Symbol("@ccall_effects"))
+        insert!(lastex.args, 3, encode_effects_override(override))
+        return esc(lastex)
     end
-    (consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly) =
-        (false, false, false, false, false, false, false, false)
-    for org_setting in args[1:idx]
-        (setting, val) = compute_assumed_setting(org_setting)
-        if setting === :consistent
-            consistent = val
-        elseif setting === :effect_free
-            effect_free = val
-        elseif setting === :nothrow
-            nothrow = val
-        elseif setting === :terminates_globally
-            terminates_globally = val
-        elseif setting === :terminates_locally
-            terminates_locally = val
-        elseif setting === :notaskstate
-            notaskstate = val
-        elseif setting === :inaccessiblememonly
-            inaccessiblememonly = val
-        elseif setting === :foldable
-            consistent = effect_free = terminates_globally = val
-        elseif setting === :removable
-            effect_free = nothrow = terminates_globally = val
-        elseif setting === :total
-            consistent = effect_free = nothrow = terminates_globally = notaskstate = inaccessiblememonly = val
-        else
-            throw(ArgumentError("@assume_effects $org_setting not supported"))
-        end
+    override′ = compute_assumed_setting(override, lastex)
+    if override′ !== nothing
+        # anonymous function case
+        return Expr(:meta, form_purity_expr(override′))
+    else
+        # call site annotation case
+        return Expr(:block,
+                    form_purity_expr(override),
+                    Expr(:local, Expr(:(=), :val, esc(lastex))),
+                    Expr(:purity), # region end token
+                    :val)
     end
-    if is_function_def(inner)
-        return esc(pushmeta!(ex, :purity,
-            consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly))
-    elseif isexpr(ex, :macrocall) && ex.args[1] === Symbol("@ccall")
-        ex.args[1] = GlobalRef(Base, Symbol("@ccall_effects"))
-        insert!(ex.args, 3, Core.Compiler.encode_effects_override(Core.Compiler.EffectsOverride(
-            consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly,
-        )))
-        return esc(ex)
-    else # anonymous function case
-        return Expr(:meta, Expr(:purity,
-            consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly))
+end
+
+function compute_assumed_settings(settings)
+    override = EffectsOverride()
+    for setting in settings
+        override = compute_assumed_setting(override, setting)
+        override === nothing &&
+            throw(ArgumentError("`@assume_effects $setting` not supported"))
     end
+    return override
+end
+
+struct EffectsOverride
+    consistent::Bool
+    effect_free::Bool
+    nothrow::Bool
+    terminates_globally::Bool
+    terminates_locally::Bool
+    notaskstate::Bool
+    inaccessiblememonly::Bool
+    noub::Bool
+    noub_if_noinbounds::Bool
+    consistent_overlay::Bool
+    nortcall::Bool
 end
 
-function compute_assumed_setting(@nospecialize(setting), val::Bool=true)
+function EffectsOverride(
+    override::EffectsOverride =
+        EffectsOverride(false, false, false, false, false, false, false, false, false, false, false);
+    consistent::Bool = override.consistent,
+    effect_free::Bool = override.effect_free,
+    nothrow::Bool = override.nothrow,
+    terminates_globally::Bool = override.terminates_globally,
+    terminates_locally::Bool = override.terminates_locally,
+    notaskstate::Bool = override.notaskstate,
+    inaccessiblememonly::Bool = override.inaccessiblememonly,
+    noub::Bool = override.noub,
+    noub_if_noinbounds::Bool = override.noub_if_noinbounds,
+    consistent_overlay::Bool = override.consistent_overlay,
+    nortcall::Bool = override.nortcall)
+    return EffectsOverride(
+        consistent,
+        effect_free,
+        nothrow,
+        terminates_globally,
+        terminates_locally,
+        notaskstate,
+        inaccessiblememonly,
+        noub,
+        noub_if_noinbounds,
+        consistent_overlay,
+        nortcall)
+end
+
+const NUM_EFFECTS_OVERRIDES = 11 # sync with julia.h
+
+function compute_assumed_setting(override::EffectsOverride, @nospecialize(setting), val::Bool=true)
     if isexpr(setting, :call) && setting.args[1] === :(!)
-        return compute_assumed_setting(setting.args[2], !val)
+        return compute_assumed_setting(override, setting.args[2], !val)
     elseif isa(setting, QuoteNode)
-        return compute_assumed_setting(setting.value, val)
-    else
-        return (setting, val)
+        return compute_assumed_setting(override, setting.value, val)
+    end
+    if setting === :consistent
+        return EffectsOverride(override; consistent = val)
+    elseif setting === :effect_free
+        return EffectsOverride(override; effect_free = val)
+    elseif setting === :nothrow
+        return EffectsOverride(override; nothrow = val)
+    elseif setting === :terminates_globally
+        return EffectsOverride(override; terminates_globally = val)
+    elseif setting === :terminates_locally
+        return EffectsOverride(override; terminates_locally = val)
+    elseif setting === :notaskstate
+        return EffectsOverride(override; notaskstate = val)
+    elseif setting === :inaccessiblememonly
+        return EffectsOverride(override; inaccessiblememonly = val)
+    elseif setting === :noub
+        return EffectsOverride(override; noub = val)
+    elseif setting === :noub_if_noinbounds
+        return EffectsOverride(override; noub_if_noinbounds = val)
+    elseif setting === :foldable
+        consistent = effect_free = terminates_globally = noub = nortcall = val
+        return EffectsOverride(override; consistent, effect_free, terminates_globally, noub, nortcall)
+    elseif setting === :removable
+        effect_free = nothrow = terminates_globally = val
+        return EffectsOverride(override; effect_free, nothrow, terminates_globally)
+    elseif setting === :total
+        consistent = effect_free = nothrow = terminates_globally = notaskstate =
+            inaccessiblememonly = noub = nortcall = val
+        return EffectsOverride(override;
+            consistent, effect_free, nothrow, terminates_globally, notaskstate,
+            inaccessiblememonly, noub, nortcall)
+    end
+    return nothing
+end
+
+function encode_effects_override(eo::EffectsOverride)
+    e = 0x0000
+    eo.consistent          && (e |= (0x0001 << 0))
+    eo.effect_free         && (e |= (0x0001 << 1))
+    eo.nothrow             && (e |= (0x0001 << 2))
+    eo.terminates_globally && (e |= (0x0001 << 3))
+    eo.terminates_locally  && (e |= (0x0001 << 4))
+    eo.notaskstate         && (e |= (0x0001 << 5))
+    eo.inaccessiblememonly && (e |= (0x0001 << 6))
+    eo.noub                && (e |= (0x0001 << 7))
+    eo.noub_if_noinbounds  && (e |= (0x0001 << 8))
+    eo.consistent_overlay  && (e |= (0x0001 << 9))
+    eo.nortcall            && (e |= (0x0001 << 10))
+    return e
+end
+
+function decode_effects_override(e::UInt16)
+    return EffectsOverride(
+        !iszero(e & (0x0001 << 0)),
+        !iszero(e & (0x0001 << 1)),
+        !iszero(e & (0x0001 << 2)),
+        !iszero(e & (0x0001 << 3)),
+        !iszero(e & (0x0001 << 4)),
+        !iszero(e & (0x0001 << 5)),
+        !iszero(e & (0x0001 << 6)),
+        !iszero(e & (0x0001 << 7)),
+        !iszero(e & (0x0001 << 8)),
+        !iszero(e & (0x0001 << 9)),
+        !iszero(e & (0x0001 << 10)))
+end
+
+function form_purity_expr(override::EffectsOverride)
+    ex = Expr(:purity)
+    for i = 1:NUM_EFFECTS_OVERRIDES
+        push!(ex.args, getfield(override, i))
     end
+    return ex
 end
 
 """
@@ -772,9 +1034,9 @@ end
 Tells the compiler to infer `f` using the declared types of `@nospecialize`d arguments.
 This can be used to limit the number of compiler-generated specializations during inference.
 
-# Example
+# Examples
 
-```julia
+```jldoctest; setup = :(using InteractiveUtils)
 julia> f(A::AbstractArray) = g(A)
 f (generic function with 1 method)
 
@@ -783,7 +1045,7 @@ g (generic function with 1 method)
 
 julia> @code_typed f([1.0])
 CodeInfo(
-1 ─ %1 = invoke Main.g(_2::AbstractArray)::Any
+1 ─ %1 =    invoke g(A::AbstractArray)::Any
 └──      return %1
 ) => Any
 ```
@@ -795,6 +1057,9 @@ while it can not infer the concrete return type of it.
 Without the `@nospecializeinfer`, `f([1.0])` would infer the return type of `g` as `Float64`,
 indicating that inference ran for `g(::Vector{Float64})` despite the prohibition on
 specialized code generation.
+
+!!! compat "Julia 1.10"
+    Using `Base.@nospecializeinfer` requires Julia version 1.10.
 """
 macro nospecializeinfer(ex)
     esc(isa(ex, Expr) ? pushmeta!(ex, :nospecializeinfer) : ex)
@@ -827,29 +1092,23 @@ end
 unwrap_macrocalls(@nospecialize(x)) = x
 function unwrap_macrocalls(ex::Expr)
     inner = ex
-    while inner.head === :macrocall
-        inner = inner.args[end]::Expr
+    while isexpr(inner, :macrocall)
+        inner = inner.args[end]
     end
     return inner
 end
 
-function pushmeta!(ex::Expr, sym::Symbol, args::Any...)
-    if isempty(args)
-        tag = sym
-    else
-        tag = Expr(sym, args...)::Expr
-    end
-
+function pushmeta!(ex::Expr, tag::Union{Symbol,Expr})
     inner = unwrap_macrocalls(ex)
-
     idx, exargs = findmeta(inner)
     if idx != 0
-        push!(exargs[idx].args, tag)
+        metastmt = exargs[idx]::Expr
+        push!(metastmt.args, tag)
     else
         body = inner.args[2]::Expr
         pushfirst!(body.args, Expr(:meta, tag))
     end
-    ex
+    return ex
 end
 
 popmeta!(body, sym) = _getmeta(body, sym, true)
@@ -945,26 +1204,29 @@ function findmeta_block(exargs, argsmatch=args->true)
     return 0, []
 end
 
-remove_linenums!(ex) = ex
-function remove_linenums!(ex::Expr)
-    if ex.head === :block || ex.head === :quote
-        # remove line number expressions from metadata (not argument literal or inert) position
-        filter!(ex.args) do x
-            isa(x, Expr) && x.head === :line && return false
-            isa(x, LineNumberNode) && return false
-            return true
+"""
+    Base.remove_linenums!(ex)
+
+Remove all line-number metadata from expression-like object `ex`.
+"""
+function remove_linenums!(@nospecialize ex)
+    if ex isa Expr
+        if ex.head === :block || ex.head === :quote
+            # remove line number expressions from metadata (not argument literal or inert) position
+            filter!(ex.args) do x
+                isa(x, Expr) && x.head === :line && return false
+                isa(x, LineNumberNode) && return false
+                return true
+            end
         end
-    end
-    for subex in ex.args
-        subex isa Expr && remove_linenums!(subex)
+        for subex in ex.args
+            subex isa Expr && remove_linenums!(subex)
+        end
+    elseif ex isa CodeInfo
+        ex.debuginfo = Core.DebugInfo(ex.debuginfo.def) # TODO: filter partially, but keep edges
     end
     return ex
 end
-function remove_linenums!(src::CodeInfo)
-    src.codelocs .= 0
-    length(src.linetable) > 1 && resize!(src.linetable, 1)
-    return src
-end
 
 replace_linenums!(ex, ln::LineNumberNode) = ex
 function replace_linenums!(ex::Expr, ln::LineNumberNode)
@@ -1023,7 +1285,6 @@ macro generated(f)
     if isa(f, Expr) && (f.head === :function || is_short_function_def(f))
         body = f.args[2]
         lno = body.args[1]
-        tmp = gensym("tmp")
         return Expr(:escape,
                     Expr(f.head, f.args[1],
                          Expr(:block,
@@ -1050,13 +1311,22 @@ If no `order` is specified it defaults to :sequentially_consistent.
     @atomic a.b.x += addend
     @atomic :release a.b.x = new
     @atomic :acquire_release a.b.x += addend
+    @atomic m[idx] = new
+    @atomic m[idx] += addend
+    @atomic :release m[idx] = new
+    @atomic :acquire_release m[idx] += addend
 
 Perform the store operation expressed on the right atomically and return the
 new value.
 
-With `=`, this operation translates to a `setproperty!(a.b, :x, new)` call.
-With any operator also, this operation translates to a `modifyproperty!(a.b,
-:x, +, addend)[2]` call.
+With assignment (`=`), this operation translates to a `setproperty!(a.b, :x, new)`
+or, in case of reference, to a `setindex_atomic!(m, order, new, idx)` call,
+with `order` defaulting to `:sequentially_consistent`.
+
+With any modifying operator this operation translates to a
+`modifyproperty!(a.b, :x, op, addend)[2]` or, in case of reference, to a
+`modifyindex_atomic!(m, order, op, addend, idx...)[2]` call,
+with `order` defaulting to `:sequentially_consistent`.
 
     @atomic a.b.x max arg2
     @atomic a.b.x + arg2
@@ -1064,12 +1334,20 @@ With any operator also, this operation translates to a `modifyproperty!(a.b,
     @atomic :acquire_release max(a.b.x, arg2)
     @atomic :acquire_release a.b.x + arg2
     @atomic :acquire_release a.b.x max arg2
+    @atomic m[idx] max arg2
+    @atomic m[idx] + arg2
+    @atomic max(m[idx], arg2)
+    @atomic :acquire_release max(m[idx], arg2)
+    @atomic :acquire_release m[idx] + arg2
+    @atomic :acquire_release m[idx] max arg2
 
 Perform the binary operation expressed on the right atomically. Store the
-result into the field in the first argument and return the values `(old, new)`.
-
-This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` call.
+result into the field or the reference in the first argument, and return the values
+`(old, new)`.
 
+This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` or,
+in case of reference to a `modifyindex_atomic!(m, order, func, arg2, idx)` call,
+with `order` defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1102,8 +1380,36 @@ julia> @atomic a.x max 5 # again change field x of a to the max value, with sequ
 10 => 10
 ```
 
+```jldoctest
+julia> mem = AtomicMemory{Int}(undef, 2);
+
+julia> @atomic mem[1] = 2 # set mem[1] to value 2 with sequential consistency
+2
+
+julia> @atomic :monotonic mem[1] # fetch the first value of mem, with monotonic consistency
+2
+
+julia> @atomic mem[1] += 1 # increment the first value of mem, with sequential consistency
+3
+
+julia> @atomic mem[1] + 1 # increment the first value of mem, with sequential consistency
+3 => 4
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+4
+
+julia> @atomic max(mem[1], 10) # change the first value of mem to the max value, with sequential consistency
+4 => 10
+
+julia> @atomic mem[1] max 5 # again change the first value of mem to the max value, with sequential consistency
+10 => 10
+```
+
 !!! compat "Julia 1.7"
-    This functionality requires at least Julia 1.7.
+    Atomic fields functionality requires at least Julia 1.7.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomic(ex)
     if !isa(ex, Symbol) && !is_expr(ex, :(::))
@@ -1130,11 +1436,17 @@ function make_atomic(order, ex)
             return :(getproperty($l, $r, $order))
         elseif isexpr(ex, :call, 3)
             return make_atomic(order, ex.args[2], ex.args[1], ex.args[3])
+        elseif isexpr(ex, :ref)
+            x, idcs = esc(ex.args[1]), map(esc, ex.args[2:end])
+            return :(getindex_atomic($x, $order, $(idcs...)))
         elseif ex.head === :(=)
             l, r = ex.args[1], esc(ex.args[2])
             if is_expr(l, :., 2)
                 ll, lr = esc(l.args[1]), esc(l.args[2])
                 return :(setproperty!($ll, $lr, $r, $order))
+            elseif is_expr(l, :ref)
+                x, idcs = esc(l.args[1]), map(esc, l.args[2:end])
+                return :(setindex_atomic!($x, $order, $r, $(idcs...)))
             end
         end
         if length(ex.args) == 2
@@ -1142,6 +1454,10 @@ function make_atomic(order, ex)
                 op = :+
             elseif ex.head === :(-=)
                 op = :-
+            elseif ex.head === :(|=)
+                op = :|
+            elseif ex.head === :(&=)
+                op = :&
             elseif @isdefined string
                 shead = string(ex.head)
                 if endswith(shead, '=')
@@ -1157,19 +1473,29 @@ function make_atomic(order, ex)
 end
 function make_atomic(order, a1, op, a2)
     @nospecialize
-    is_expr(a1, :., 2) || error("@atomic modify expression missing field access")
-    a1l, a1r, op, a2 = esc(a1.args[1]), esc(a1.args[2]), esc(op), esc(a2)
-    return :(modifyproperty!($a1l, $a1r, $op, $a2, $order))
+    if is_expr(a1, :., 2)
+        a1l, a1r, op, a2 = esc(a1.args[1]), esc(a1.args[2]), esc(op), esc(a2)
+        return :(modifyproperty!($a1l, $a1r, $op, $a2, $order))
+    elseif is_expr(a1, :ref)
+        x, idcs, op, a2 = esc(a1.args[1]), map(esc, a1.args[2:end]), esc(op), esc(a2)
+        return :(modifyindex_atomic!($x, $order, $op, $a2, $(idcs...)))
+    end
+    error("@atomic modify expression missing field access or indexing")
 end
 
 
 """
     @atomicswap a.b.x = new
     @atomicswap :sequentially_consistent a.b.x = new
+    @atomicswap m[idx] = new
+    @atomicswap :sequentially_consistent m[idx] = new
 
-Stores `new` into `a.b.x` and returns the old value of `a.b.x`.
+Stores `new` into `a.b.x` (`m[idx]` in case of reference) and returns the old
+value of `a.b.x` (the old value stored at `m[idx]`, respectively).
 
-This operation translates to a `swapproperty!(a.b, :x, new)` call.
+This operation translates to a `swapproperty!(a.b, :x, new)` or,
+in case of reference, `swapindex_atomic!(mem, order, new, idx)` call,
+with `order` defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1187,8 +1513,23 @@ julia> @atomic a.x # fetch field x of a, with sequential consistency
 4
 ```
 
+```jldoctest
+julia> mem = AtomicMemory{Int}(undef, 2);
+
+julia> @atomic mem[1] = 1;
+
+julia> @atomicswap mem[1] = 4 # replace the first value of `mem` with 4, with sequential consistency
+1
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+4
+```
+
 !!! compat "Julia 1.7"
-    This functionality requires at least Julia 1.7.
+    Atomic fields functionality requires at least Julia 1.7.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomicswap(order, ex)
     order isa QuoteNode || (order = esc(order))
@@ -1201,9 +1542,14 @@ function make_atomicswap(order, ex)
     @nospecialize
     is_expr(ex, :(=), 2) || error("@atomicswap expression missing assignment")
     l, val = ex.args[1], esc(ex.args[2])
-    is_expr(l, :., 2) || error("@atomicswap expression missing field access")
-    ll, lr = esc(l.args[1]), esc(l.args[2])
-    return :(swapproperty!($ll, $lr, $val, $order))
+    if is_expr(l, :., 2)
+        ll, lr = esc(l.args[1]), esc(l.args[2])
+        return :(swapproperty!($ll, $lr, $val, $order))
+    elseif is_expr(l, :ref)
+        x, idcs = esc(l.args[1]), map(esc, l.args[2:end])
+        return :(swapindex_atomic!($x, $order, $val, $(idcs...)))
+    end
+    error("@atomicswap expression missing field access or indexing")
 end
 
 
@@ -1211,12 +1557,18 @@ end
     @atomicreplace a.b.x expected => desired
     @atomicreplace :sequentially_consistent a.b.x expected => desired
     @atomicreplace :sequentially_consistent :monotonic a.b.x expected => desired
+    @atomicreplace m[idx] expected => desired
+    @atomicreplace :sequentially_consistent m[idx] expected => desired
+    @atomicreplace :sequentially_consistent :monotonic m[idx] expected => desired
 
 Perform the conditional replacement expressed by the pair atomically, returning
 the values `(old, success::Bool)`. Where `success` indicates whether the
 replacement was completed.
 
-This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` call.
+This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` or,
+in case of reference, to a
+`replaceindex_atomic!(mem, success_order, fail_order, expected, desired, idx)` call,
+with both orders defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1233,7 +1585,7 @@ julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 2
 
-julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
+julia> @atomicreplace a.x 1 => 3 # replace field x of a with 2 if it was 1, with sequential consistency
 (old = 2, success = false)
 
 julia> xchg = 2 => 0; # replace field x of a with 0 if it was 2, with sequential consistency
@@ -1245,8 +1597,34 @@ julia> @atomic a.x # fetch field x of a, with sequential consistency
 0
 ```
 
+```jldoctest
+julia> mem = AtomicMemory{Int}(undef, 2);
+
+julia> @atomic mem[1] = 1;
+
+julia> @atomicreplace mem[1] 1 => 2 # replace the first value of mem with 2 if it was 1, with sequential consistency
+(old = 1, success = true)
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+2
+
+julia> @atomicreplace mem[1] 1 => 3 # replace field x of a with 2 if it was 1, with sequential consistency
+(old = 2, success = false)
+
+julia> xchg = 2 => 0; # replace field x of a with 0 if it was 2, with sequential consistency
+
+julia> @atomicreplace mem[1] xchg
+(old = 2, success = true)
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+0
+```
+
 !!! compat "Julia 1.7"
-    This functionality requires at least Julia 1.7.
+    Atomic fields functionality requires at least Julia 1.7.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomicreplace(success_order, fail_order, ex, old_new)
     fail_order isa QuoteNode || (fail_order = esc(fail_order))
@@ -1262,13 +1640,207 @@ macro atomicreplace(ex, old_new)
 end
 function make_atomicreplace(success_order, fail_order, ex, old_new)
     @nospecialize
-    is_expr(ex, :., 2) || error("@atomicreplace expression missing field access")
-    ll, lr = esc(ex.args[1]), esc(ex.args[2])
-    if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
-        exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
-        return :(replaceproperty!($ll, $lr, $exp, $rep, $success_order, $fail_order))
-    else
-        old_new = esc(old_new)
-        return :(replaceproperty!($ll, $lr, $old_new::Pair..., $success_order, $fail_order))
+    if is_expr(ex, :., 2)
+        ll, lr = esc(ex.args[1]), esc(ex.args[2])
+        if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
+            exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
+            return :(replaceproperty!($ll, $lr, $exp, $rep, $success_order, $fail_order))
+        else
+            old_new = esc(old_new)
+            return :(replaceproperty!($ll, $lr, $old_new::Pair..., $success_order, $fail_order))
+        end
+    elseif is_expr(ex, :ref)
+        x, idcs = esc(ex.args[1]), map(esc, ex.args[2:end])
+        if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
+            exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
+            return :(replaceindex_atomic!($x, $success_order, $fail_order, $exp, $rep, $(idcs...)))
+        else
+            old_new = esc(old_new)
+            return :(replaceindex_atomic!($x, $success_order, $fail_order, $old_new::Pair..., $(idcs...)))
+        end
+    end
+    error("@atomicreplace expression missing field access or indexing")
+end
+
+"""
+    @atomiconce a.b.x = value
+    @atomiconce :sequentially_consistent a.b.x = value
+    @atomiconce :sequentially_consistent :monotonic a.b.x = value
+    @atomiconce m[idx] = value
+    @atomiconce :sequentially_consistent m[idx] = value
+    @atomiconce :sequentially_consistent :monotonic m[idx] = value
+
+Perform the conditional assignment of value atomically if it was previously
+unset. Returned value `success::Bool` indicates whether the assignment was completed.
+
+This operation translates to a `setpropertyonce!(a.b, :x, value)` or,
+in case of reference, to a `setindexonce_atomic!(m, success_order, fail_order, value, idx)` call,
+with both orders defaulting to `:sequentially_consistent`.
+
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
+
+# Examples
+```jldoctest
+julia> mutable struct AtomicOnce
+           @atomic x
+           AtomicOnce() = new()
+       end
+
+julia> a = AtomicOnce()
+AtomicOnce(#undef)
+
+julia> @atomiconce a.x = 1 # set field x of a to 1, if unset, with sequential consistency
+true
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+1
+
+julia> @atomiconce :monotonic a.x = 2 # set field x of a to 1, if unset, with monotonic consistence
+false
+```
+
+```jldoctest
+julia> mem = AtomicMemory{Vector{Int}}(undef, 1);
+
+julia> isassigned(mem, 1)
+false
+
+julia> @atomiconce mem[1] = [1] # set the first value of mem to [1], if unset, with sequential consistency
+true
+
+julia> isassigned(mem, 1)
+true
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+1-element Vector{Int64}:
+ 1
+
+julia> @atomiconce :monotonic mem[1] = [2] # set the first value of mem to [2], if unset, with monotonic
+false
+
+julia> @atomic mem[1]
+1-element Vector{Int64}:
+ 1
+```
+
+!!! compat "Julia 1.11"
+    Atomic fields functionality requires at least Julia 1.11.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
+"""
+macro atomiconce(success_order, fail_order, ex)
+    fail_order isa QuoteNode || (fail_order = esc(fail_order))
+    success_order isa QuoteNode || (success_order = esc(success_order))
+    return make_atomiconce(success_order, fail_order, ex)
+end
+macro atomiconce(order, ex)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomiconce(order, order, ex)
+end
+macro atomiconce(ex)
+    return make_atomiconce(QuoteNode(:sequentially_consistent), QuoteNode(:sequentially_consistent), ex)
+end
+function make_atomiconce(success_order, fail_order, ex)
+    @nospecialize
+    is_expr(ex, :(=), 2) || error("@atomiconce expression missing assignment")
+    l, val = ex.args[1], esc(ex.args[2])
+    if is_expr(l, :., 2)
+        ll, lr = esc(l.args[1]), esc(l.args[2])
+        return :(setpropertyonce!($ll, $lr, $val, $success_order, $fail_order))
+    elseif is_expr(l, :ref)
+        x, idcs = esc(l.args[1]), map(esc, l.args[2:end])
+        return :(setindexonce_atomic!($x, $success_order, $fail_order, $val, $(idcs...)))
+    end
+    error("@atomiconce expression missing field access or indexing")
+end
+
+# Meta expression head, these generally can't be deleted even when they are
+# in a dead branch but can be ignored when analyzing uses/liveness.
+is_meta_expr_head(head::Symbol) = head === :boundscheck || head === :meta || head === :loopinfo
+is_meta_expr(@nospecialize x) = isa(x, Expr) && is_meta_expr_head(x.head)
+
+"""
+    isa_ast_node(x)
+
+Return false if `x` is not interpreted specially by any of inference, lowering,
+or codegen as either an AST or IR special form.
+"""
+function isa_ast_node(@nospecialize x)
+    # c.f. Core.IR module, augmented with AST types
+    return x isa NewvarNode ||
+           x isa CodeInfo ||
+           x isa LineNumberNode ||
+           x isa GotoNode ||
+           x isa GotoIfNot ||
+           x isa EnterNode ||
+           x isa ReturnNode ||
+           x isa SSAValue ||
+           x isa SlotNumber ||
+           x isa Argument ||
+           x isa QuoteNode ||
+           x isa GlobalRef ||
+           x isa Symbol ||
+           x isa PiNode ||
+           x isa PhiNode ||
+           x isa PhiCNode ||
+           x isa UpsilonNode ||
+           x isa Expr
+end
+
+is_self_quoting(@nospecialize(x)) = !isa_ast_node(x)
+
+"""
+    quoted(x)
+
+Return `x` made safe for inserting as a constant into IR. Note that this does
+not make it safe for inserting into an AST, since eval will sometimes copy some
+types of AST object inside, and even may sometimes evaluate and interpolate any
+`\$` inside, depending on the context.
+"""
+quoted(@nospecialize(x)) = isa_ast_node(x) ? QuoteNode(x) : x
+
+# Implementation of generated functions
+function generated_body_to_codeinfo(ex::Expr, defmod::Module, isva::Bool)
+    ci = ccall(:jl_fl_lower, Any, (Any, Any, Ptr{UInt8}, Csize_t, Csize_t, Cint),
+               ex, defmod, "none", 0, typemax(Csize_t), 0)[1]
+    if !isa(ci, CodeInfo)
+        if isa(ci, Expr) && ci.head === :error
+            msg = ci.args[1]
+            error(msg isa String ? strcat("syntax: ", msg) : msg)
+        end
+        error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure, a comprehension or a generator.")
+    end
+    ci.isva = isva
+    code = ci.code
+    bindings = IdSet{Core.Binding}()
+    for i = 1:length(code)
+        stmt = code[i]
+        if isa(stmt, GlobalRef)
+            push!(bindings, convert(Core.Binding, stmt))
+        end
     end
+    if !isempty(bindings)
+        ci.edges = Core.svec(bindings...)
+    end
+    return ci
+end
+
+# invoke and wrap the results of @generated expression
+function (g::Core.GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize args...)
+    # args is (spvals..., argtypes...)
+    body = g.gen(args...)
+    file = source.file
+    file isa Symbol || (file = :none)
+    lam = Expr(:lambda, Expr(:argnames, g.argnames...).args,
+               Expr(:var"scope-block",
+                    Expr(:block,
+                         LineNumberNode(Int(source.line), source.file),
+                         Expr(:meta, :push_loc, file, :var"@generated body"),
+                         Expr(:return, Expr(:toplevel_pure, body)),
+                         Expr(:meta, :pop_loc))))
+    spnames = g.spnames
+    return generated_body_to_codeinfo(spnames === Core.svec() ? lam : Expr(Symbol("with-static-parameters"), lam, spnames...),
+        source.module,
+        source.isva)
 end
diff --git a/base/fastmath.jl b/base/fastmath.jl
index 44440ebad2050..ed686fb92bf34 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -6,7 +6,7 @@
 # strict IEEE semantics.
 
 # This allows the following transformations. For more information see
-# http://llvm.org/docs/LangRef.html#fast-math-flags:
+# https://llvm.org/docs/LangRef.html#fast-math-flags:
 # nnan: No NaNs - Allow optimizations to assume the arguments and
 #       result are not NaN. Such optimizations are required to retain
 #       defined behavior over NaNs, but the value of the result is
@@ -28,8 +28,9 @@ module FastMath
 export @fastmath
 
 import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
-    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast,
+    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, min_float_fast, max_float_fast,
     eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
+import Base: afoldl
 
 const fast_op =
     Dict(# basic arithmetic
@@ -101,9 +102,12 @@ const rewrite_op =
 function make_fastmath(expr::Expr)
     if expr.head === :quote
         return expr
-    elseif expr.head === :call && expr.args[1] === :^ && expr.args[3] isa Integer
-        # mimic Julia's literal_pow lowering of literal integer powers
-        return Expr(:call, :(Base.FastMath.pow_fast), make_fastmath(expr.args[2]), Val{expr.args[3]}())
+    elseif expr.head === :call && expr.args[1] === :^
+        ea = expr.args
+        if length(ea) >= 3 && isa(ea[3], Int)
+            # mimic Julia's literal_pow lowering of literal integer powers
+            return Expr(:call, :(Base.FastMath.pow_fast), make_fastmath(ea[2]), Val(ea[3]))
+        end
     end
     op = get(rewrite_op, expr.head, :nothing)
     if op !== :nothing
@@ -136,7 +140,7 @@ may violate strict IEEE semantics. This allows the fastest possible operation,
 but results are undefined -- be careful when doing this, as it may change numerical
 results.
 
-This sets the [LLVM Fast-Math flags](http://llvm.org/docs/LangRef.html#fast-math-flags),
+This sets the [LLVM Fast-Math flags](https://llvm.org/docs/LangRef.html#fast-math-flags),
 and corresponds to the `-ffast-math` option in clang. See [the notes on performance
 annotations](@ref man-performance-annotations) for more details.
 
@@ -164,11 +168,9 @@ add_fast(x::T, y::T) where {T<:FloatTypes} = add_float_fast(x, y)
 sub_fast(x::T, y::T) where {T<:FloatTypes} = sub_float_fast(x, y)
 mul_fast(x::T, y::T) where {T<:FloatTypes} = mul_float_fast(x, y)
 div_fast(x::T, y::T) where {T<:FloatTypes} = div_float_fast(x, y)
-
-add_fast(x::T, y::T, zs::T...) where {T<:FloatTypes} =
-    add_fast(add_fast(x, y), zs...)
-mul_fast(x::T, y::T, zs::T...) where {T<:FloatTypes} =
-    mul_fast(mul_fast(x, y), zs...)
+max_fast(x::T, y::T) where {T<:FloatTypes} = max_float_fast(x, y)
+min_fast(x::T, y::T) where {T<:FloatTypes} = min_float_fast(x, y)
+minmax_fast(x::T, y::T) where {T<:FloatTypes} = (min_fast(x, y), max_fast(x, y))
 
 @fastmath begin
     cmp_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(x==y, 0, ifelse(x<y, -1, +1))
@@ -237,14 +239,6 @@ ComplexTypes = Union{ComplexF32, ComplexF64}
 
     ne_fast(x::T, y::T) where {T<:ComplexTypes} = !(x==y)
 
-    # Note: we use the same comparison for min, max, and minmax, so
-    # that the compiler can convert between them
-    max_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, y, x)
-    min_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, x, y)
-    minmax_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, (x,y), (y,x))
-
-    max_fast(x::T, y::T, z::T...) where {T<:FloatTypes} = max_fast(max_fast(x, y), z...)
-    min_fast(x::T, y::T, z::T...) where {T<:FloatTypes} = min_fast(min_fast(x, y), z...)
 end
 
 # fall-back implementations and type promotion
@@ -257,7 +251,7 @@ for op in (:abs, :abs2, :conj, :inv, :sign)
     end
 end
 
-for op in (:+, :-, :*, :/, :(==), :!=, :<, :<=, :cmp, :rem, :min, :max, :minmax)
+for op in (:-, :/, :(==), :!=, :<, :<=, :cmp, :rem, :minmax)
     op_fast = fast_op[op]
     @eval begin
         # fall-back implementation for non-numeric types
@@ -270,6 +264,31 @@ for op in (:+, :-, :*, :/, :(==), :!=, :<, :<=, :cmp, :rem, :min, :max, :minmax)
     end
 end
 
+for op in (:+, :*, :min, :max)
+    op_fast = fast_op[op]
+    @eval begin
+        $op_fast(x) = $op(x)
+        # fall-back implementation for non-numeric types
+        $op_fast(x, y) = $op(x, y)
+        # type promotion
+        $op_fast(x::Number, y::Number) =
+            $op_fast(promote(x,y)...)
+        # fall-back implementation that applies after promotion
+        $op_fast(x::T,y::T) where {T<:Number} = $op(x,y)
+        # note: these definitions must not cause a dispatch loop when +(a,b) is
+        # not defined, and must only try to call 2-argument definitions, so
+        # that defining +(a,b) is sufficient for full functionality.
+        ($op_fast)(a, b, c, xs...) = (@inline; afoldl($op_fast, ($op_fast)(($op_fast)(a,b),c), xs...))
+        # a further concern is that it's easy for a type like (Int,Int...)
+        # to match many definitions, so we need to keep the number of
+        # definitions down to avoid losing type information.
+        # type promotion
+        $op_fast(a::Number, b::Number, c::Number, xs::Number...) =
+            $op_fast(promote(a,b,c,xs...)...)
+        # fall-back implementation that applies after promotion
+        $op_fast(a::T, b::T, c::T, xs::T...) where {T<:Number} = (@inline; afoldl($op_fast, ($op_fast)(($op_fast)(a,b),c), xs...))
+    end
+end
 
 # Math functions
 exp2_fast(x::Union{Float32,Float64})  = Base.Math.exp2_fast(x)
@@ -278,8 +297,12 @@ exp10_fast(x::Union{Float32,Float64}) = Base.Math.exp10_fast(x)
 
 # builtins
 
-pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32.i32", llvmcall, Float32, (Float32, Int32), x, y)
-pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64.i32", llvmcall, Float64, (Float64, Int32), x, y)
+@inline function pow_fast(x::Float64, y::Integer)
+    z = y % Int32
+    z == y ? pow_fast(x, z) : x^y
+end
+pow_fast(x::Float32, y::Integer) = x^y
+pow_fast(x::Float64, y::Int32) = ccall("llvm.powi.f64.i32", llvmcall, Float64, (Float64, Int32), x, y)
 pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi
 @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)
 
@@ -309,7 +332,7 @@ end
         Complex{T}(c, s)
     end
 
-    # See <http://en.cppreference.com/w/cpp/numeric/complex>
+    # See <https://en.cppreference.com/w/cpp/numeric/complex>
     pow_fast(x::T, y::T) where {T<:ComplexTypes} = exp(y*log(x))
     pow_fast(x::T, y::Complex{T}) where {T<:FloatTypes} = exp(y*log(x))
     pow_fast(x::Complex{T}, y::T) where {T<:FloatTypes} = exp(y*log(x))
@@ -364,6 +387,10 @@ for f in (:^, :atan, :hypot, :log)
         # fall-back implementation that applies after promotion
         $f_fast(x::T, y::T) where {T<:Number} = $f(x, y)
     end
+    # Issue 53886 - avoid promotion of Int128 etc to be consistent with non-fastmath
+    if f === :^
+        @eval $f_fast(x::Number, y::Integer) = $f(x, y)
+    end
 end
 
 # Reductions
diff --git a/base/file.jl b/base/file.jl
index 866e82b6e39c2..b57aa3611a9d3 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -32,7 +32,7 @@ export
 # get and set current directory
 
 """
-    pwd() -> String
+    pwd()::String
 
 Get the current working directory.
 
@@ -128,7 +128,7 @@ julia> pwd()
 "/home/JuliaUser"
 
 julia> cd(readdir, "/home/JuliaUser/Projects/julia")
-34-element Array{String,1}:
+34-element Vector{String}:
  ".circleci"
  ".freebsdci.sh"
  ".git"
@@ -164,7 +164,7 @@ required intermediate directories.
 Return `path`.
 
 # Examples
-```julia-repl
+```jldoctest; setup = :(curdir = pwd(); testdir = mktempdir(); cd(testdir)), teardown = :(cd(curdir); rm(testdir, recursive=true)), filter = r"^\\".*testingdir\\"\$"
 julia> mkdir("testingdir")
 "testingdir"
 
@@ -211,17 +211,17 @@ julia> mkpath("my/test/dir") # creates three directories
 "my/test/dir"
 
 julia> readdir()
-1-element Array{String,1}:
+1-element Vector{String}:
  "my"
 
 julia> cd("my")
 
 julia> readdir()
-1-element Array{String,1}:
+1-element Vector{String}:
  "test"
 
 julia> readdir("test")
-1-element Array{String,1}:
+1-element Vector{String}:
  "dir"
 
 julia> mkpath("intermediate_dir/actually_a_directory.txt") # creates two directories
@@ -230,15 +230,19 @@ julia> mkpath("intermediate_dir/actually_a_directory.txt") # creates two directo
 julia> isdir("intermediate_dir/actually_a_directory.txt")
 true
 
+julia> mkpath("my/test/dir/") # returns the original `path`
+"my/test/dir/"
 ```
 """
 function mkpath(path::AbstractString; mode::Integer = 0o777)
-    isdirpath(path) && (path = dirname(path))
-    dir = dirname(path)
-    (path == dir || isdir(path)) && return path
-    mkpath(dir, mode = checkmode(mode))
+    parent = dirname(path)
+    # stop recursion for `""`, `"/"`, or existing dir
+    (path == parent || isdir(path)) && return path
+    mkpath(parent, mode = checkmode(mode))
     try
-        mkdir(path, mode = mode)
+        # The `isdir` check could be omitted, then `mkdir` will throw an error in cases like `x/`.
+        # Although the error will not be rethrown, we avoid it in advance for performance reasons.
+        isdir(path) || mkdir(path, mode = mode)
     catch err
         # If there is a problem with making the directory, but the directory
         # does in fact exist, then ignore the error. Else re-throw it.
@@ -246,9 +250,13 @@ function mkpath(path::AbstractString; mode::Integer = 0o777)
             rethrow()
         end
     end
-    path
+    return path
 end
 
+# Files that were requested to be deleted but can't be by the current process,
+# i.e. loaded DLLs on Windows, are listed in the directory below
+delayed_delete_ref() = joinpath(tempdir(), "julia_delayed_deletes_ref")
+
 """
     rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
 
@@ -270,20 +278,20 @@ Stacktrace:
 [...]
 ```
 """
-function rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
+function rm(path::AbstractString; force::Bool=false, recursive::Bool=false, allow_delayed_delete::Bool=true)
+    # allow_delayed_delete is used by Pkg.gc() but is otherwise not part of the public API
     if islink(path) || !isdir(path)
         try
-            @static if Sys.iswindows()
-                # is writable on windows actually means "is deletable"
-                st = lstat(path)
-                if ispath(st) && (filemode(st) & 0o222) == 0
-                    chmod(path, 0o777)
-                end
-            end
             unlink(path)
         catch err
-            if force && isa(err, IOError) && err.code==Base.UV_ENOENT
-                return
+            if isa(err, IOError)
+                force && err.code==Base.UV_ENOENT && return
+                @static if Sys.iswindows()
+                    if allow_delayed_delete && err.code==Base.UV_EACCES && endswith(path, ".dll")
+                        delayed_delete_dll(path)
+                        return
+                    end
+                end
             end
             rethrow()
         end
@@ -291,19 +299,23 @@ function rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
         if recursive
             try
                 for p in readdir(path)
-                    rm(joinpath(path, p), force=force, recursive=true)
+                    try
+                        rm(joinpath(path, p), force=force, recursive=true)
+                    catch err
+                        (isa(err, IOError) && err.code==Base.UV_EACCES) || rethrow()
+                    end
                 end
             catch err
-                if !(isa(err, IOError) && err.code==Base.UV_EACCES)
-                    rethrow(err)
-                end
+                (isa(err, IOError) && err.code==Base.UV_EACCES) || rethrow()
             end
         end
         req = Libc.malloc(_sizeof_uv_fs)
         try
             ret = ccall(:uv_fs_rmdir, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), C_NULL, req, path, C_NULL)
             uv_fs_req_cleanup(req)
-            ret < 0 && uv_error("rm($(repr(path)))", ret)
+            if ret < 0 && !(force && ret == Base.UV_ENOENT)
+                uv_error("rm($(repr(path)))", ret)
+            end
             nothing
         finally
             Libc.free(req)
@@ -312,6 +324,22 @@ function rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
 end
 
 
+# Loaded DLLs cannot be deleted on Windows, even with posix delete mode but they can be renamed.
+# delayed_delete_dll(path) does so temporarily, until later cleanup by Pkg.gc().
+function delayed_delete_dll(path)
+    # in-use DLL must be kept on the same drive
+    temp_path = tempname(abspath(dirname(path)); cleanup=false, suffix=string("_", basename(path)))
+    @debug "Could not delete DLL most likely because it is loaded, moving to a temporary path" path temp_path
+    mkpath(delayed_delete_ref())
+    io = last(mktemp(delayed_delete_ref(); cleanup=false))
+    try
+        print(io, temp_path) # record the temporary path for Pkg.gc()
+    finally
+        close(io)
+    end
+    rename(path, temp_path) # do not call mv which could recursively call rm(path)
+end
+
 # The following use Unix command line facilities
 function checkfor_mv_cp_cptree(src::AbstractString, dst::AbstractString, txt::AbstractString;
                                                           force::Bool=false)
@@ -367,7 +395,7 @@ of the file or directory `src` refers to.
 Return `dst`.
 
 !!! note
-    The `cp` function is different from the `cp` command. The `cp` function always operates on
+    The `cp` function is different from the `cp` Unix command. The `cp` function always operates on
     the assumption that `dst` is a file, while the command does different things depending
     on whether `dst` is a directory or a file.
     Using `force=true` when `dst` is a directory will result in loss of all the contents present
@@ -420,13 +448,73 @@ julia> mv("hello.txt", "goodbye.txt", force=true)
 julia> rm("goodbye.txt");
 
 ```
+
+!!! note
+    The `mv` function is different from the `mv` Unix command. The `mv` function by
+    default will error if `dst` exists, while the command will delete
+    an existing `dst` file by default.
+    Also the `mv` function always operates on
+    the assumption that `dst` is a file, while the command does different things depending
+    on whether `dst` is a directory or a file.
+    Using `force=true` when `dst` is a directory will result in loss of all the contents present
+    in the `dst` directory, and `dst` will become a file that has the contents of `src` instead.
 """
 function mv(src::AbstractString, dst::AbstractString; force::Bool=false)
-    checkfor_mv_cp_cptree(src, dst, "moving"; force=force)
-    rename(src, dst)
+    if force
+        _mv_replace(src, dst)
+    else
+        _mv_noreplace(src, dst)
+    end
+end
+
+function _mv_replace(src::AbstractString, dst::AbstractString)
+    # This check is copied from checkfor_mv_cp_cptree
+    if ispath(dst) && Base.samefile(src, dst)
+        abs_src = islink(src) ? abspath(readlink(src)) : abspath(src)
+        abs_dst = islink(dst) ? abspath(readlink(dst)) : abspath(dst)
+        throw(ArgumentError(string("'src' and 'dst' refer to the same file/dir. ",
+                                   "This is not supported.\n  ",
+                                   "`src` refers to: $(abs_src)\n  ",
+                                   "`dst` refers to: $(abs_dst)\n")))
+    end
+    # First try to do a regular rename, because this might avoid a situation
+    # where dst is deleted or truncated.
+    try
+        rename(src, dst)
+    catch err
+        err isa IOError || rethrow()
+        err.code==Base.UV_ENOENT && rethrow()
+        # on rename error try to delete dst if it exists and isn't the same as src
+        checkfor_mv_cp_cptree(src, dst, "moving"; force=true)
+        try
+            rename(src, dst)
+        catch err
+            err isa IOError || rethrow()
+            # on second error, default to force cp && rm
+            cp(src, dst; force=true, follow_symlinks=false)
+            rm(src; recursive=true)
+        end
+    end
+    dst
+end
+
+function _mv_noreplace(src::AbstractString, dst::AbstractString)
+    # Error if dst exists.
+    # This check currently has TOCTTOU issues.
+    checkfor_mv_cp_cptree(src, dst, "moving"; force=false)
+    try
+        rename(src, dst)
+    catch err
+        err isa IOError || rethrow()
+        err.code==Base.UV_ENOENT && rethrow()
+        # on error, default to cp && rm
+        cp(src, dst; force=false, follow_symlinks=false)
+        rm(src; recursive=true)
+    end
     dst
 end
 
+
 """
     touch(path::AbstractString)
     touch(fd::File)
@@ -438,7 +526,7 @@ If the file does not exist a new file is created.
 Return `path`.
 
 # Examples
-```julia-repl
+```jldoctest; setup = :(curdir = pwd(); testdir = mktempdir(); cd(testdir)), teardown = :(cd(curdir); rm(testdir, recursive=true)), filter = r"[\\d\\.]+e[\\+\\-]?\\d+"
 julia> write("my_little_file", 2);
 
 julia> mtime("my_little_file")
@@ -466,7 +554,7 @@ end
 """
     tempdir()
 
-Gets the path of the temporary directory. On Windows, `tempdir()` uses the first environment
+Get the path of the temporary directory. On Windows, `tempdir()` uses the first environment
 variable found in the ordered list `TMP`, `TEMP`, `USERPROFILE`. On all other operating
 systems, `tempdir()` uses the first environment variable found in the ordered list `TMPDIR`,
 `TMP`, `TEMP`, and `TEMPDIR`. If none of these are found, the path `"/tmp"` is used.
@@ -478,13 +566,26 @@ function tempdir()
         rc = ccall(:uv_os_tmpdir, Cint, (Ptr{UInt8}, Ptr{Csize_t}), buf, sz)
         if rc == 0
             resize!(buf, sz[])
-            return String(buf)
+            break
         elseif rc == Base.UV_ENOBUFS
             resize!(buf, sz[] - 1)  # space for null-terminator implied by StringVector
         else
             uv_error("tempdir()", rc)
         end
     end
+    tempdir = String(buf)
+    try
+        s = stat(tempdir)
+        if !ispath(s)
+            @warn "tempdir path does not exist" tempdir
+        elseif !isdir(s)
+            @warn "tempdir path is not a directory" tempdir
+        end
+    catch ex
+        ex isa IOError || ex isa SystemError || rethrow()
+        @warn "accessing tempdir path failed" _exception=ex
+    end
+    return tempdir
 end
 
 """
@@ -502,13 +603,19 @@ function prepare_for_deletion(path::AbstractString)
         return
     end
 
-    try chmod(path, filemode(path) | 0o333)
-    catch; end
+    try
+        chmod(path, filemode(path) | 0o333)
+    catch ex
+        ex isa IOError || ex isa SystemError || rethrow()
+    end
     for (root, dirs, files) in walkdir(path; onerror=x->())
         for dir in dirs
             dpath = joinpath(root, dir)
-            try chmod(dpath, filemode(dpath) | 0o333)
-            catch; end
+            try
+                chmod(dpath, filemode(dpath) | 0o333)
+            catch ex
+                ex isa IOError || ex isa SystemError || rethrow()
+            end
         end
     end
 end
@@ -519,37 +626,97 @@ const TEMP_CLEANUP = Dict{String,Bool}()
 const TEMP_CLEANUP_LOCK = ReentrantLock()
 
 function temp_cleanup_later(path::AbstractString; asap::Bool=false)
-    lock(TEMP_CLEANUP_LOCK)
+    @lock TEMP_CLEANUP_LOCK begin
     # each path should only be inserted here once, but if there
     # is a collision, let !asap win over asap: if any user might
     # still be using the path, don't delete it until process exit
     TEMP_CLEANUP[path] = get(TEMP_CLEANUP, path, true) & asap
     if length(TEMP_CLEANUP) > TEMP_CLEANUP_MAX[]
-        temp_cleanup_purge()
+        temp_cleanup_purge_prelocked(false)
         TEMP_CLEANUP_MAX[] = max(TEMP_CLEANUP_MIN[], 2*length(TEMP_CLEANUP))
     end
-    unlock(TEMP_CLEANUP_LOCK)
-    return nothing
+    end
+    nothing
+end
+
+function temp_cleanup_forget(path::AbstractString)
+    @lock TEMP_CLEANUP_LOCK delete!(TEMP_CLEANUP, path)
+    nothing
 end
 
-function temp_cleanup_purge(; force::Bool=false)
-    need_gc = Sys.iswindows()
-    for (path, asap) in TEMP_CLEANUP
+function temp_cleanup_purge_prelocked(force::Bool)
+    filter!(TEMP_CLEANUP) do (path, asap)
         try
-            if (force || asap) && ispath(path)
-                need_gc && GC.gc(true)
-                need_gc = false
+            ispath(path) || return false
+            if force || asap
                 prepare_for_deletion(path)
                 rm(path, recursive=true, force=true)
             end
-            !ispath(path) && delete!(TEMP_CLEANUP, path)
+            return ispath(path)
         catch ex
             @warn """
                 Failed to clean up temporary path $(repr(path))
                 $ex
                 """ _group=:file
+            ex isa InterruptException && rethrow()
+            return true
         end
     end
+    nothing
+end
+
+function temp_cleanup_purge_all()
+    may_need_gc = false
+    @lock TEMP_CLEANUP_LOCK filter!(TEMP_CLEANUP) do (path, asap)
+        try
+            ispath(path) || return false
+            may_need_gc = true
+            return true
+        catch ex
+            ex isa InterruptException && rethrow()
+            return true
+        end
+    end
+    if may_need_gc
+        # this is only usually required on Sys.iswindows(), but may as well do it everywhere
+        GC.gc(true)
+    end
+    @lock TEMP_CLEANUP_LOCK temp_cleanup_purge_prelocked(true)
+    nothing
+end
+
+# deprecated internal function used by some packages
+temp_cleanup_purge(; force=false) = force ? temp_cleanup_purge_all() : @lock TEMP_CLEANUP_LOCK temp_cleanup_purge_prelocked(false)
+
+function temp_cleanup_postprocess(cleanup_dirs)
+    if !isempty(cleanup_dirs)
+        rmcmd = """
+        cleanuplist = readlines(stdin) # This loop won't start running until stdin is closed, which is supposed to be sequenced after the process exits
+        sleep(1) # Wait for the operating system to hopefully be ready, since the OS implementation is probably incorrect, given the history of buggy work-arounds like this that have existed for ages in dotNet and libuv
+        for path in cleanuplist
+            try
+                rm(path, force=true, recursive=true)
+            catch ex
+                @warn "Failed to clean up temporary path \$(repr(path))\n\$ex" _group=:file
+            end
+        end
+        """
+        cmd = Cmd(Base.cmd_gen(((Base.julia_cmd(),), ("--startup-file=no",), ("-e",), (rmcmd,))); ignorestatus = true, detach = true)
+        pw = Base.PipeEndpoint()
+        run(cmd, pw, devnull, stderr; wait=false)
+        join(pw, cleanup_dirs, "\n")
+        Base.dup(Base._fd(pw)) # intentionally leak a reference, until the process exits
+        close(pw)
+    end
+end
+
+function temp_cleanup_atexit()
+    temp_cleanup_purge_all()
+    @lock TEMP_CLEANUP_LOCK temp_cleanup_postprocess(keys(TEMP_CLEANUP))
+end
+
+function __postinit__()
+    Base.atexit(temp_cleanup_atexit)
 end
 
 const temp_prefix = "jl_"
@@ -566,13 +733,13 @@ end
 
 
 # Obtain a temporary filename.
-function tempname(parent::AbstractString=tempdir(); max_tries::Int = 100, cleanup::Bool=true)
+function tempname(parent::AbstractString=tempdir(); max_tries::Int = 100, cleanup::Bool=true, suffix::AbstractString="")
     isdir(parent) || throw(ArgumentError("$(repr(parent)) is not a directory"))
 
     prefix = joinpath(parent, temp_prefix)
     filename = nothing
     for i in 1:max_tries
-        filename = string(prefix, _rand_filename())
+        filename = string(prefix, _rand_filename(), suffix)
         if ispath(filename)
             filename = nothing
         else
@@ -628,7 +795,7 @@ end # os-test
 
 
 """
-    tempname(parent=tempdir(); cleanup=true) -> String
+    tempname(parent=tempdir(); cleanup=true, suffix="")::String
 
 Generate a temporary file path. This function only returns a path; no file is
 created. The path is likely to be unique, but this cannot be guaranteed due to
@@ -639,18 +806,22 @@ existing at the time of the call to `tempname`.
 When called with no arguments, the temporary name will be an absolute path to a
 temporary name in the system temporary directory as given by `tempdir()`. If a
 `parent` directory argument is given, the temporary path will be in that
-directory instead.
+directory instead. If a suffix is given the tempname will end with that suffix
+and be tested for uniqueness with that suffix.
 
 The `cleanup` option controls whether the process attempts to delete the
 returned path automatically when the process exits. Note that the `tempname`
 function does not create any file or directory at the returned location, so
 there is nothing to cleanup unless you create a file or directory there. If
-you do and `clean` is `true` it will be deleted upon process termination.
+you do and `cleanup` is `true` it will be deleted upon process termination.
 
 !!! compat "Julia 1.4"
     The `parent` and `cleanup` arguments were added in 1.4. Prior to Julia 1.4
     the path `tempname` would never be cleaned up at process termination.
 
+!!! compat "Julia 1.12"
+    The `suffix` keyword argument was added in Julia 1.12.
+
 !!! warning
 
     This can lead to security holes if another process obtains the same
@@ -731,10 +902,11 @@ temporary file upon completion.
 See also: [`mktempdir`](@ref).
 """
 function mktemp(fn::Function, parent::AbstractString=tempdir())
-    (tmp_path, tmp_io) = mktemp(parent, cleanup=false)
+    (tmp_path, tmp_io) = mktemp(parent)
     try
         fn(tmp_path, tmp_io)
     finally
+        temp_cleanup_forget(tmp_path)
         try
             close(tmp_io)
             ispath(tmp_path) && rm(tmp_path)
@@ -750,7 +922,7 @@ end
     mktempdir(f::Function, parent=tempdir(); prefix=$(repr(temp_prefix)))
 
 Apply the function `f` to the result of [`mktempdir(parent; prefix)`](@ref) and remove the
-temporary directory all of its contents upon completion.
+temporary directory and all of its contents upon completion.
 
 See also: [`mktemp`](@ref), [`mkdir`](@ref).
 
@@ -759,10 +931,11 @@ See also: [`mktemp`](@ref), [`mkdir`](@ref).
 """
 function mktempdir(fn::Function, parent::AbstractString=tempdir();
     prefix::AbstractString=temp_prefix)
-    tmpdir = mktempdir(parent; prefix=prefix, cleanup=false)
+    tmpdir = mktempdir(parent; prefix=prefix)
     try
         fn(tmpdir)
     finally
+        temp_cleanup_forget(tmpdir)
         try
             if ispath(tmpdir)
                 prepare_for_deletion(tmpdir)
@@ -785,7 +958,7 @@ end
     readdir(dir::AbstractString=pwd();
         join::Bool = false,
         sort::Bool = true,
-    ) -> Vector{String}
+    )::Vector{String}
 
 Return the names in the directory `dir` or the current working directory if not
 given. When `join` is false, `readdir` returns just the names in the directory
@@ -807,7 +980,7 @@ See also: [`walkdir`](@ref).
 julia> cd("/home/JuliaUser/dev/julia")
 
 julia> readdir()
-30-element Array{String,1}:
+30-element Vector{String}:
  ".appveyor.yml"
  ".git"
  ".gitattributes"
@@ -817,7 +990,7 @@ julia> readdir()
  "usr-staging"
 
 julia> readdir(join=true)
-30-element Array{String,1}:
+30-element Vector{String}:
  "/home/JuliaUser/dev/julia/.appveyor.yml"
  "/home/JuliaUser/dev/julia/.git"
  "/home/JuliaUser/dev/julia/.gitattributes"
@@ -827,7 +1000,7 @@ julia> readdir(join=true)
  "/home/JuliaUser/dev/julia/usr-staging"
 
 julia> readdir("base")
-145-element Array{String,1}:
+145-element Vector{String}:
  ".gitignore"
  "Base.jl"
  "Enums.jl"
@@ -837,7 +1010,7 @@ julia> readdir("base")
  "weakkeydict.jl"
 
 julia> readdir("base", join=true)
-145-element Array{String,1}:
+145-element Vector{String}:
  "base/.gitignore"
  "base/Base.jl"
  "base/Enums.jl"
@@ -847,7 +1020,7 @@ julia> readdir("base", join=true)
  "base/weakkeydict.jl"
 
 julia> readdir(abspath("base"), join=true)
-145-element Array{String,1}:
+145-element Vector{String}:
  "/home/JuliaUser/dev/julia/base/.gitignore"
  "/home/JuliaUser/dev/julia/base/Base.jl"
  "/home/JuliaUser/dev/julia/base/Enums.jl"
@@ -857,7 +1030,79 @@ julia> readdir(abspath("base"), join=true)
  "/home/JuliaUser/dev/julia/base/weakkeydict.jl"
 ```
 """
-function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true)
+readdir(; join::Bool=false, kwargs...) = readdir(join ? pwd() : "."; join, kwargs...)::Vector{String}
+readdir(dir::AbstractString; kwargs...) = _readdir(dir; return_objects=false, kwargs...)::Vector{String}
+
+# this might be better as an Enum but they're not available here
+# UV_DIRENT_T
+const UV_DIRENT_UNKNOWN = Cint(0)
+const UV_DIRENT_FILE = Cint(1)
+const UV_DIRENT_DIR = Cint(2)
+const UV_DIRENT_LINK = Cint(3)
+const UV_DIRENT_FIFO = Cint(4)
+const UV_DIRENT_SOCKET = Cint(5)
+const UV_DIRENT_CHAR = Cint(6)
+const UV_DIRENT_BLOCK = Cint(7)
+
+"""
+    DirEntry
+
+A type representing a filesystem entry that contains the name of the entry, the directory, and
+the raw type of the entry. The full path of the entry can be obtained lazily by accessing the
+`path` field. The type of the entry can be checked for by calling [`isfile`](@ref), [`isdir`](@ref),
+[`islink`](@ref), [`isfifo`](@ref), [`issocket`](@ref), [`ischardev`](@ref), and [`isblockdev`](@ref)
+"""
+struct DirEntry
+    dir::String
+    name::String
+    rawtype::Cint
+end
+function Base.getproperty(obj::DirEntry, p::Symbol)
+    if p === :path
+        return joinpath(obj.dir, obj.name)
+    else
+        return getfield(obj, p)
+    end
+end
+Base.propertynames(::DirEntry) = (:dir, :name, :path, :rawtype)
+Base.isless(a::DirEntry, b::DirEntry) = a.dir == b.dir ? isless(a.name, b.name) : isless(a.dir, b.dir)
+Base.hash(o::DirEntry, h::UInt) = hash(o.dir, hash(o.name, hash(o.rawtype, h)))
+Base.:(==)(a::DirEntry, b::DirEntry) = a.name == b.name && a.dir == b.dir && a.rawtype == b.rawtype
+joinpath(obj::DirEntry, args...) = joinpath(obj.path, args...)
+isunknown(obj::DirEntry) =  obj.rawtype == UV_DIRENT_UNKNOWN
+islink(obj::DirEntry) =     isunknown(obj) ? islink(obj.path) : obj.rawtype == UV_DIRENT_LINK
+isfile(obj::DirEntry) =     (isunknown(obj) || islink(obj)) ? isfile(obj.path)      : obj.rawtype == UV_DIRENT_FILE
+isdir(obj::DirEntry) =      (isunknown(obj) || islink(obj)) ? isdir(obj.path)       : obj.rawtype == UV_DIRENT_DIR
+isfifo(obj::DirEntry) =     (isunknown(obj) || islink(obj)) ? isfifo(obj.path)      : obj.rawtype == UV_DIRENT_FIFO
+issocket(obj::DirEntry) =   (isunknown(obj) || islink(obj)) ? issocket(obj.path)    : obj.rawtype == UV_DIRENT_SOCKET
+ischardev(obj::DirEntry) =  (isunknown(obj) || islink(obj)) ? ischardev(obj.path)   : obj.rawtype == UV_DIRENT_CHAR
+isblockdev(obj::DirEntry) = (isunknown(obj) || islink(obj)) ? isblockdev(obj.path)  : obj.rawtype == UV_DIRENT_BLOCK
+realpath(obj::DirEntry) = realpath(obj.path)
+
+"""
+    _readdirx(dir::AbstractString=pwd(); sort::Bool = true)::Vector{DirEntry}
+
+Return a vector of [`DirEntry`](@ref) objects representing the contents of the directory `dir`,
+or the current working directory if not given. If `sort` is true, the returned vector is
+sorted by name.
+
+Unlike [`readdir`](@ref), `_readdirx` returns [`DirEntry`](@ref) objects, which contain the name of the
+file, the directory it is in, and the type of the file which is determined during the
+directory scan. This means that calls to [`isfile`](@ref), [`isdir`](@ref), [`islink`](@ref), [`isfifo`](@ref),
+[`issocket`](@ref), [`ischardev`](@ref), and [`isblockdev`](@ref) can be made on the
+returned objects without further stat calls. However, for some filesystems, the type of the file
+cannot be determined without a stat call. In these cases the `rawtype` field of the [`DirEntry`](@ref))
+object will be 0 (`UV_DIRENT_UNKNOWN`) and [`isfile`](@ref) etc. will fall back to a `stat` call.
+
+```julia
+for obj in _readdirx()
+    isfile(obj) && println("\$(obj.name) is a file with path \$(obj.path)")
+end
+```
+"""
+_readdirx(dir::AbstractString=pwd(); sort::Bool=true) = _readdir(dir; return_objects=true, sort)::Vector{DirEntry}
+
+function _readdir(dir::AbstractString; return_objects::Bool=false, join::Bool=false, sort::Bool=true)
     # Allocate space for uv_fs_t struct
     req = Libc.malloc(_sizeof_uv_fs)
     try
@@ -867,11 +1112,16 @@ function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true)
         err < 0 && uv_error("readdir($(repr(dir)))", err)
 
         # iterate the listing into entries
-        entries = String[]
+        entries = return_objects ? DirEntry[] : String[]
         ent = Ref{uv_dirent_t}()
         while Base.UV_EOF != ccall(:uv_fs_scandir_next, Cint, (Ptr{Cvoid}, Ptr{uv_dirent_t}), req, ent)
             name = unsafe_string(ent[].name)
-            push!(entries, join ? joinpath(dir, name) : name)
+            if return_objects
+                rawtype = ent[].typ
+                push!(entries, DirEntry(dir, name, rawtype))
+            else
+                push!(entries, join ? joinpath(dir, name) : name)
+            end
         end
 
         # Clean up the request string
@@ -885,52 +1135,59 @@ function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true)
         Libc.free(req)
     end
 end
-readdir(; join::Bool=false, sort::Bool=true) =
-    readdir(join ? pwd() : ".", join=join, sort=sort)
 
 """
-    walkdir(dir; topdown=true, follow_symlinks=false, onerror=throw)
+    walkdir(dir = pwd(); topdown=true, follow_symlinks=false, onerror=throw)
 
 Return an iterator that walks the directory tree of a directory.
-The iterator returns a tuple containing `(rootpath, dirs, files)`.
+
+The iterator returns a tuple containing `(path, dirs, files)`.
+Each iteration `path` will change to the next directory in the tree;
+then `dirs` and `files` will be vectors containing the directories and files
+in the current `path` directory.
 The directory tree can be traversed top-down or bottom-up.
 If `walkdir` or `stat` encounters a `IOError` it will rethrow the error by default.
 A custom error handling function can be provided through `onerror` keyword argument.
 `onerror` is called with a `IOError` as argument.
+The returned iterator is stateful so when accessed repeatedly each access will
+resume where the last left off, like [`Iterators.Stateful`](@ref).
 
 See also: [`readdir`](@ref).
 
+!!! compat "Julia 1.12"
+    `pwd()` as the default directory was added in Julia 1.12.
+
 # Examples
 ```julia
-for (root, dirs, files) in walkdir(".")
-    println("Directories in \$root")
+for (path, dirs, files) in walkdir(".")
+    println("Directories in \$path")
     for dir in dirs
-        println(joinpath(root, dir)) # path to directories
+        println(joinpath(path, dir)) # path to directories
     end
-    println("Files in \$root")
+    println("Files in \$path")
     for file in files
-        println(joinpath(root, file)) # path to files
+        println(joinpath(path, file)) # path to files
     end
 end
 ```
 
-```julia-repl
+```jldoctest; setup = :(prevdir = pwd(); tmpdir = mktempdir(); cd(tmpdir)), teardown = :(cd(prevdir); rm(tmpdir, recursive=true))
 julia> mkpath("my/test/dir");
 
 julia> itr = walkdir("my");
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my", ["test"], String[])
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my/test", ["dir"], String[])
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my/test/dir", String[], String[])
 ```
 """
-function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
-    function _walkdir(chnl, root)
+function walkdir(path = pwd(); topdown=true, follow_symlinks=false, onerror=throw)
+    function _walkdir(chnl, path)
         tryf(f, p) = try
                 f(p)
             catch err
@@ -942,33 +1199,31 @@ function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
                 end
                 return
             end
-        content = tryf(readdir, root)
-        content === nothing && return
-        dirs = Vector{eltype(content)}()
-        files = Vector{eltype(content)}()
-        for name in content
-            path = joinpath(root, name)
-
+        entries = tryf(_readdirx, path)
+        entries === nothing && return
+        dirs = Vector{String}()
+        files = Vector{String}()
+        for entry in entries
             # If we're not following symlinks, then treat all symlinks as files
-            if (!follow_symlinks && something(tryf(islink, path), true)) || !something(tryf(isdir, path), false)
-                push!(files, name)
+            if (!follow_symlinks && something(tryf(islink, entry), true)) || !something(tryf(isdir, entry), false)
+                push!(files, entry.name)
             else
-                push!(dirs, name)
+                push!(dirs, entry.name)
             end
         end
 
         if topdown
-            push!(chnl, (root, dirs, files))
+            push!(chnl, (path, dirs, files))
         end
         for dir in dirs
-            _walkdir(chnl, joinpath(root, dir))
+            _walkdir(chnl, joinpath(path, dir))
         end
         if !topdown
-            push!(chnl, (root, dirs, files))
+            push!(chnl, (path, dirs, files))
         end
         nothing
     end
-    return Channel{Tuple{String,Vector{String},Vector{String}}}(chnl -> _walkdir(chnl, root))
+    return Channel{Tuple{String,Vector{String},Vector{String}}}(chnl -> _walkdir(chnl, path))
 end
 
 function unlink(p::AbstractString)
@@ -977,15 +1232,38 @@ function unlink(p::AbstractString)
     nothing
 end
 
-# For move command
-function rename(src::AbstractString, dst::AbstractString; force::Bool=false)
-    err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), src, dst)
-    # on error, default to cp && rm
+"""
+    Base.rename(oldpath::AbstractString, newpath::AbstractString)
+
+Change the name of a file or directory from `oldpath` to `newpath`.
+If `newpath` is an existing file or empty directory it may be replaced.
+Equivalent to [rename(2)](https://man7.org/linux/man-pages/man2/rename.2.html) on Unix.
+If a path contains a "\\0" throw an `ArgumentError`.
+On other failures throw an `IOError`.
+Return `newpath`.
+
+This is a lower level filesystem operation used to implement [`mv`](@ref).
+
+OS-specific restrictions may apply when `oldpath` and `newpath` are in different directories.
+
+Currently there are a few differences in behavior on Windows which may be resolved in a future release.
+Specifically, currently on Windows:
+1. `rename` will fail if `oldpath` or `newpath` are opened files.
+2. `rename` will fail if `newpath` is an existing directory.
+3. `rename` may work if `newpath` is a file and `oldpath` is a directory.
+4. `rename` may remove `oldpath` if it is a hardlink to `newpath`.
+
+See also: [`mv`](@ref).
+
+!!! compat "Julia 1.12"
+    This method was made public in Julia 1.12.
+"""
+function rename(oldpath::AbstractString, newpath::AbstractString)
+    err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), oldpath, newpath)
     if err < 0
-        cp(src, dst; force=force, follow_symlinks=false)
-        rm(src; recursive=true)
+        uv_error("rename($(repr(oldpath)), $(repr(newpath)))", err)
     end
-    nothing
+    newpath
 end
 
 function sendfile(src::AbstractString, dst::AbstractString)
@@ -1019,7 +1297,7 @@ end
 """
     hardlink(src::AbstractString, dst::AbstractString)
 
-Creates a hard link to an existing source file `src` with the name `dst`. The
+Create a hard link to an existing source file `src` with the name `dst`. The
 destination, `dst`, must not exist.
 
 See also: [`symlink`](@ref).
@@ -1039,7 +1317,7 @@ end
 """
     symlink(target::AbstractString, link::AbstractString; dir_target = false)
 
-Creates a symbolic link to `target` with the name `link`.
+Create a symbolic link to `target` with the name `link`.
 
 On Windows, symlinks must be explicitly declared as referring to a directory
 or not.  If `target` already exists, by default the type of `link` will be auto-
@@ -1113,7 +1391,7 @@ function symlink(target::AbstractString, link::AbstractString;
 end
 
 """
-    readlink(path::AbstractString) -> String
+    readlink(path::AbstractString)::String
 
 Return the target location a symbolic link `path` points to.
 """
@@ -1212,7 +1490,7 @@ Base.show(io::IO, x::DiskStat) =
 """
     diskstat(path=pwd())
 
-Returns statistics in bytes about the disk that contains the file or directory pointed at by
+Return statistics in bytes about the disk that contains the file or directory pointed at by
 `path`. If no argument is passed, statistics about the disk that contains the current
 working directory are returned.
 
diff --git a/base/filesystem.jl b/base/filesystem.jl
index 63fe4281f6e59..36d60f1d3318a 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -4,6 +4,45 @@
 
 module Filesystem
 
+"""
+    JL_O_APPEND
+    JL_O_ASYNC
+    JL_O_CLOEXEC
+    JL_O_CREAT
+    JL_O_DIRECT
+    JL_O_DIRECTORY
+    JL_O_DSYNC
+    JL_O_EXCL
+    JL_O_FSYNC
+    JL_O_LARGEFILE
+    JL_O_NDELAY
+    JL_O_NOATIME
+    JL_O_NOCTTY
+    JL_O_NOFOLLOW
+    JL_O_NONBLOCK
+    JL_O_PATH
+    JL_O_RANDOM
+    JL_O_RDONLY
+    JL_O_RDWR
+    JL_O_RSYNC
+    JL_O_SEQUENTIAL
+    JL_O_SHORT_LIVED
+    JL_O_SYNC
+    JL_O_TEMPORARY
+    JL_O_TMPFILE
+    JL_O_TRUNC
+    JL_O_WRONLY
+
+Enum constant for the `open` syscall, where `JL_O_*` corresponds to the `O_*` constant.
+See [the libuv docs](https://docs.libuv.org/en/v1.x/fs.html#file-open-constants) for more details.
+"""
+(:JL_O_APPEND, :JL_O_ASYNC, :JL_O_CLOEXEC, :JL_O_CREAT, :JL_O_DIRECT,
+ :JL_O_DIRECTORY, :JL_O_DSYNC, :JL_O_EXCL, :JL_O_FSYNC, :JL_O_LARGEFILE,
+ :JL_O_NOATIME, :JL_O_NOCTTY, :JL_O_NDELAY, :JL_O_NOFOLLOW, :JL_O_NONBLOCK,
+ :JL_O_PATH, :JL_O_RANDOM, :JL_O_RDONLY, :JL_O_RDWR, :JL_O_RSYNC,
+ :JL_O_SEQUENTIAL, :JL_O_SHORT_LIVED, :JL_O_SYNC, :JL_O_TEMPORARY,
+ :JL_O_TMPFILE, :JL_O_TRUNC, :JL_O_WRONLY)
+
 const S_IFDIR  = 0o040000  # directory
 const S_IFCHR  = 0o020000  # character device
 const S_IFBLK  = 0o060000  # block device
@@ -31,6 +70,36 @@ const S_IWOTH = 0o0002  # write by other
 const S_IXOTH = 0o0001  # execute by other
 const S_IRWXO = 0o0007  # mask for other permissions
 
+"""
+    S_IRUSR
+    S_IWUSR
+    S_IXUSR
+    S_IRGRP
+    S_IWGRP
+    S_IXGRP
+    S_IROTH
+    S_IWOTH
+    S_IXOTH
+
+Constants for file access permission bits.
+The general structure is `S_I[permission][class]`
+where `permission` is `R` for read, `W` for write, and `X` for execute,
+and `class` is `USR` for user/owner, `GRP` for group, and `OTH` for other.
+"""
+(:S_IRUSR, :S_IWUSR, :S_IXUSR, :S_IRGRP, :S_IWGRP, :S_IXGRP, :S_IROTH, :S_IWOTH, :S_IXOTH)
+
+"""
+    S_IRWXU
+    S_IRWXG
+    S_IRWXO
+
+Constants for file access permission masks, i.e. the combination of read, write,
+and execute permissions for a class.
+The general structure is `S_IRWX[class]`
+where `class` is `U` for user/owner, `G` for group, and `O` for other.
+"""
+(:S_IRWXU, :S_IRWXG, :S_IRWXO)
+
 export File,
        StatStruct,
        # open,
@@ -48,7 +117,6 @@ export File,
        JL_O_SEQUENTIAL,
        JL_O_RANDOM,
        JL_O_NOCTTY,
-       JL_O_NOCTTY,
        JL_O_NONBLOCK,
        JL_O_NDELAY,
        JL_O_SYNC,
@@ -69,10 +137,11 @@ export File,
        S_IROTH, S_IWOTH, S_IXOTH, S_IRWXO
 
 import .Base:
-    IOError, _UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
-    bytesavailable, position, read, read!, readavailable, seek, seekend, show,
-    skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error,
-    setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
+    IOError, _UVError, _sizeof_uv_fs, check_open, close, closewrite, eof, eventloop, fd, isopen,
+    bytesavailable, position, read, read!, readbytes!, readavailable, seek, seekend, show,
+    skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error, _uv_error,
+    setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize,
+    isexecutable, isreadable, iswritable, MutableDenseArrayType, truncate, unsafe_takestring!
 
 import .Base.RefValue
 
@@ -90,7 +159,7 @@ uv_fs_req_cleanup(req) = ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
 include("path.jl")
 include("stat.jl")
 include("file.jl")
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "file_constants.jl"))  # include($BUILDROOT/base/file_constants.jl)
+include(string(Base.BUILDROOT, "file_constants.jl"))  # include($BUILDROOT/base/file_constants.jl)
 
 ## Operations with File (fd) objects ##
 
@@ -143,6 +212,8 @@ function close(f::File)
     nothing
 end
 
+closewrite(f::File) = nothing
+
 # sendfile is the most efficient way to copy from a file descriptor
 function sendfile(dst::File, src::File, src_offset::Int64, bytes::Int)
     check_open(dst)
@@ -193,18 +264,23 @@ end
 
 function read(f::File, ::Type{UInt8})
     check_open(f)
-    ret = ccall(:jl_fs_read_byte, Int32, (OS_HANDLE,), f.handle)
+    p = Ref{UInt8}()
+    ret = ccall(:jl_fs_read, Int32, (OS_HANDLE, Ptr{Cvoid}, Csize_t),
+                f.handle, p, 1)
     uv_error("read", ret)
-    return ret % UInt8
+    @assert ret <= sizeof(p) == 1
+    ret < 1 && throw(EOFError())
+    return p[] % UInt8
 end
 
 function read(f::File, ::Type{Char})
     b0 = read(f, UInt8)
-    l = 8 * (4 - leading_ones(b0))
+    l = 0x08 * (0x04 - UInt8(leading_ones(b0)))
     c = UInt32(b0) << 24
-    if l < 24
+    if l ≤ 0x10
         s = 16
         while s ≥ l && !eof(f)
+            # this works around lack of peek(::File)
             p = position(f)
             b = read(f, UInt8)
             if b & 0xc0 != 0x80
@@ -233,7 +309,7 @@ bytesavailable(f::File) = max(0, filesize(f) - position(f)) # position can be >
 
 eof(f::File) = bytesavailable(f) == 0
 
-function readbytes!(f::File, b::Array{UInt8}, nb=length(b))
+function readbytes!(f::File, b::MutableDenseArrayType{UInt8}, nb=length(b))
     nr = min(nb, bytesavailable(f))
     if length(b) < nr
         resize!(b, nr)
@@ -290,5 +366,85 @@ function touch(f::File)
     f
 end
 
+"""
+    isexecutable(path::String)
+
+Return `true` if the given `path` has executable permissions.
+
+!!! note
+    This permission may change before the user executes `path`,
+    so it is recommended to execute the file and handle the error if that fails,
+    rather than calling `isexecutable` first.
+
+!!! note
+    Prior to Julia 1.6, this did not correctly interrogate filesystem
+    ACLs on Windows, therefore it would return `true` for any
+    file.  From Julia 1.6 on, it correctly determines whether the
+    file is marked as executable or not.
+
+See also [`ispath`](@ref), [`isreadable`](@ref), [`iswritable`](@ref).
+"""
+function isexecutable(path::String)
+    # We use `access()` and `X_OK` to determine if a given path is
+    # executable by the current user.  `X_OK` comes from `unistd.h`.
+    X_OK = 0x01
+    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, X_OK) == 0
+end
+isexecutable(path::AbstractString) = isexecutable(String(path)::String)
+
+"""
+    isreadable(path::String)
+
+Return `true` if the access permissions for the given `path` permitted reading by the current user.
+
+!!! note
+    This permission may change before the user calls `open`,
+    so it is recommended to just call `open` alone and handle the error if that fails,
+    rather than calling `isreadable` first.
+
+!!! note
+    Currently this function does not correctly interrogate filesystem
+    ACLs on Windows, therefore it can return wrong results.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+
+See also [`ispath`](@ref), [`isexecutable`](@ref), [`iswritable`](@ref).
+"""
+function isreadable(path::String)
+    # We use `access()` and `R_OK` to determine if a given path is
+    # readable by the current user.  `R_OK` comes from `unistd.h`.
+    R_OK = 0x04
+    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, R_OK) == 0
+end
+isreadable(path::AbstractString) = isreadable(String(path)::String)
+
+"""
+    iswritable(path::String)
+
+Return `true` if the access permissions for the given `path` permitted writing by the current user.
+
+!!! note
+    This permission may change before the user calls `open`,
+    so it is recommended to just call `open` alone and handle the error if that fails,
+    rather than calling `iswritable` first.
+
+!!! note
+    Currently this function does not correctly interrogate filesystem
+    ACLs on Windows, therefore it can return wrong results.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+
+See also [`ispath`](@ref), [`isexecutable`](@ref), [`isreadable`](@ref).
+"""
+function iswritable(path::String)
+    # We use `access()` and `W_OK` to determine if a given path is
+    # writeable by the current user.  `W_OK` comes from `unistd.h`.
+    W_OK = 0x02
+    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, W_OK) == 0
+end
+iswritable(path::AbstractString) = iswritable(String(path)::String)
+
 
 end
diff --git a/base/compiler/parsing.jl b/base/flfrontend.jl
similarity index 71%
rename from base/compiler/parsing.jl
rename to base/flfrontend.jl
index 8b474cf148fb2..86b291cf7328b 100644
--- a/base/compiler/parsing.jl
+++ b/base/flfrontend.jl
@@ -17,3 +17,10 @@ end
 function fl_parse(text::AbstractString, filename::AbstractString, lineno, offset, options)
     fl_parse(String(text), String(filename), lineno, offset, options)
 end
+
+function fl_lower(ex, mod::Module, filename::Union{String,Ptr{UInt8}}="none",
+                  lineno=0, world::Unsigned=typemax(Csize_t), warn::Bool=false)
+    warn = warn ? 1 : 0
+    ccall(:jl_fl_lower, Any, (Any, Any, Ptr{UInt8}, Csize_t, Csize_t, Cint),
+          ex, mod, filename, lineno, world, warn)
+end
diff --git a/base/float.jl b/base/float.jl
index d5280ef74fbce..0ed55b4902194 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -2,6 +2,9 @@
 
 const IEEEFloat = Union{Float16, Float32, Float64}
 
+import Core: Float16, Float32, Float64, AbstractFloat
+import Core: Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128
+
 ## floating point traits ##
 
 """
@@ -14,6 +17,8 @@ const Inf16 = bitcast(Float16, 0x7c00)
     NaN16
 
 A not-a-number value of type [`Float16`](@ref).
+
+See also: [`NaN`](@ref).
 """
 const NaN16 = bitcast(Float16, 0x7e00)
 """
@@ -26,6 +31,8 @@ const Inf32 = bitcast(Float32, 0x7f800000)
     NaN32
 
 A not-a-number value of type [`Float32`](@ref).
+
+See also: [`NaN`](@ref).
 """
 const NaN32 = bitcast(Float32, 0x7fc00000)
 const Inf64 = bitcast(Float64, 0x7ff0000000000000)
@@ -69,9 +76,23 @@ NaN
 julia> Inf - Inf
 NaN
 
-julia> NaN == NaN, isequal(NaN, NaN), NaN === NaN
+julia> NaN == NaN, isequal(NaN, NaN), isnan(NaN)
 (false, true, true)
 ```
+
+!!! note
+    Always use [`isnan`](@ref) or [`isequal`](@ref) for checking for `NaN`.
+    Using `x === NaN` may give unexpected results:
+    ```jldoctest
+    julia> reinterpret(UInt32, NaN32)
+    0x7fc00000
+
+    julia> NaN32p1 = reinterpret(Float32, 0x7fc00001)
+    NaN32
+
+    julia> NaN32p1 === NaN32, isequal(NaN32p1, NaN32), isnan(NaN32p1)
+    (false, true, true)
+    ```
 """
 NaN, NaN64
 
@@ -104,13 +125,16 @@ significand_mask(::Type{Float16}) = 0x03ff
 mantissa(x::T) where {T} = reinterpret(Unsigned, x) & significand_mask(T)
 
 for T in (Float16, Float32, Float64)
-    @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T)))
-    @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1)
-    @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T)))
+    sb = trailing_ones(significand_mask(T))
+    em = exponent_mask(T)
+    eb = Int(exponent_one(T) >> sb)
+    @eval significand_bits(::Type{$T}) = $(sb)
+    @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - sb - 1)
+    @eval exponent_bias(::Type{$T}) = $(eb)
     # maximum float exponent
-    @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T) - 1)
+    @eval exponent_max(::Type{$T}) = $(Int(em >> sb) - eb - 1)
     # maximum float exponent without bias
-    @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)))
+    @eval exponent_raw_max(::Type{$T}) = $(Int(em >> sb))
 end
 
 """
@@ -138,7 +162,69 @@ i.e. the maximum integer value representable by [`exponent_bits(T)`](@ref) bits.
 function exponent_raw_max end
 
 """
-    uabs(x::Integer)
+IEEE 754 definition of the minimum exponent.
+"""
+ieee754_exponent_min(::Type{T}) where {T<:IEEEFloat} = Int(1 - exponent_max(T))::Int
+
+exponent_min(::Type{Float16}) = ieee754_exponent_min(Float16)
+exponent_min(::Type{Float32}) = ieee754_exponent_min(Float32)
+exponent_min(::Type{Float64}) = ieee754_exponent_min(Float64)
+
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, exponent_field::Integer, significand_field::Integer
+) where {F<:IEEEFloat}
+    T = uinttype(F)
+    ret::T = sign_bit
+    ret <<= exponent_bits(F)
+    ret |= exponent_field
+    ret <<= significand_bits(F)
+    ret |= significand_field
+end
+
+# ±floatmax(T)
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:omega}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, exponent_raw_max(F) - 1, significand_mask(F))
+end
+
+# NaN or an infinity
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:nan}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, exponent_raw_max(F), significand_field)
+end
+
+# NaN with default payload
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:nan}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, one(uinttype(F)) << (significand_bits(F) - 1), Val(:nan))
+end
+
+# Infinity
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:inf}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, false, Val(:nan))
+end
+
+# Subnormal or zero
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:subnormal}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, false, significand_field)
+end
+
+# Zero
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:zero}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, false, Val(:subnormal))
+end
+
+"""
+    Base.uabs(x::Integer)
 
 Return the absolute value of `x`, possibly returning a different type should the
 operation be susceptible to overflow. This typically arises when `x` is a two's complement
@@ -168,8 +254,6 @@ for t1 in (Float16, Float32, Float64)
     end
 end
 
-Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
-
 promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64
 promote_rule(::Type{Float64}, ::Type{Int128}) = Float64
 promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32
@@ -284,9 +368,6 @@ See also: [`complex`](@ref), [`oftype`](@ref), [`convert`](@ref).
 
 # Examples
 ```jldoctest
-julia> float(1:1000)
-1.0:1.0:1000.0
-
 julia> float(typemax(Int32))
 2.147483647e9
 ```
@@ -310,7 +391,7 @@ Float64
 """
 float(::Type{T}) where {T<:Number} = typeof(float(zero(T)))
 float(::Type{T}) where {T<:AbstractFloat} = T
-float(::Type{Union{}}, slurp...) = Union{}(0.0)
+float(::Type{Union{}}, slurp...) = Union{}
 
 """
     unsafe_trunc(T, x)
@@ -325,8 +406,8 @@ See also [`trunc`](@ref).
 julia> unsafe_trunc(Int, -2.2)
 -2
 
-julia> unsafe_trunc(Int, NaN)
--9223372036854775808
+julia> unsafe_trunc(Int, NaN) isa Int
+true
 ```
 """
 function unsafe_trunc end
@@ -374,26 +455,28 @@ unsafe_trunc(::Type{UInt128}, x::Float16) = unsafe_trunc(UInt128, Float32(x))
 unsafe_trunc(::Type{Int128}, x::Float16) = unsafe_trunc(Int128, Float32(x))
 
 # matches convert methods
-# also determines floor, ceil, round
-trunc(::Type{Signed}, x::IEEEFloat) = trunc(Int,x)
-trunc(::Type{Unsigned}, x::IEEEFloat) = trunc(UInt,x)
-trunc(::Type{Integer}, x::IEEEFloat) = trunc(Int,x)
-
-# fallbacks
-floor(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundDown))
-ceil(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundUp))
-round(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundNearest))
-
-# Bool
-trunc(::Type{Bool}, x::AbstractFloat) = (-1 < x < 2) ? 1 <= x : throw(InexactError(:trunc, Bool, x))
-floor(::Type{Bool}, x::AbstractFloat) = (0 <= x < 2) ? 1 <= x : throw(InexactError(:floor, Bool, x))
-ceil(::Type{Bool}, x::AbstractFloat)  = (-1 < x <= 1) ? 0 < x : throw(InexactError(:ceil, Bool, x))
-round(::Type{Bool}, x::AbstractFloat) = (-0.5 <= x < 1.5) ? 0.5 < x : throw(InexactError(:round, Bool, x))
-
-round(x::IEEEFloat, r::RoundingMode{:ToZero})  = trunc_llvm(x)
-round(x::IEEEFloat, r::RoundingMode{:Down})    = floor_llvm(x)
-round(x::IEEEFloat, r::RoundingMode{:Up})      = ceil_llvm(x)
-round(x::IEEEFloat, r::RoundingMode{:Nearest}) = rint_llvm(x)
+# also determines trunc, floor, ceil
+round(::Type{Signed},   x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
+round(::Type{Unsigned}, x::IEEEFloat, r::RoundingMode) = round(UInt, x, r)
+round(::Type{Integer},  x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
+
+round(x::IEEEFloat, ::RoundingMode{:ToZero})  = trunc_llvm(x)
+round(x::IEEEFloat, ::RoundingMode{:Down})    = floor_llvm(x)
+round(x::IEEEFloat, ::RoundingMode{:Up})      = ceil_llvm(x)
+round(x::IEEEFloat, ::RoundingMode{:Nearest}) = rint_llvm(x)
+
+rounds_up(x, ::RoundingMode{:Down}) = false
+rounds_up(x, ::RoundingMode{:Up}) = true
+rounds_up(x, ::RoundingMode{:ToZero}) = signbit(x)
+rounds_up(x, ::RoundingMode{:FromZero}) = !signbit(x)
+function _round_convert(::Type{T}, x_integer, x, r::Union{RoundingMode{:ToZero}, RoundingMode{:FromZero}, RoundingMode{:Up}, RoundingMode{:Down}}) where {T<:AbstractFloat}
+    x_t = convert(T, x_integer)
+    if rounds_up(x, r)
+        x_t < x ? nextfloat(x_t) : x_t
+    else
+        x_t > x ? prevfloat(x_t) : x_t
+    end
+end
 
 ## floating point promotions ##
 promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
@@ -444,7 +527,8 @@ function _to_float(number::U, ep) where {U<:Unsigned}
     return reinterpret(F, bits)
 end
 
-@assume_effects :terminates_locally :nothrow function rem_internal(x::T, y::T) where {T<:IEEEFloat}
+function rem_internal(x::T, y::T) where {T<:IEEEFloat}
+    @_terminates_locally_meta
     xuint = reinterpret(Unsigned, x)
     yuint = reinterpret(Unsigned, y)
     if xuint <= yuint
@@ -519,7 +603,10 @@ function rem(x::T, y::T) where {T<:IEEEFloat}
     end
 end
 
-function mod(x::T, y::T) where {T<:AbstractFloat}
+function mod(x::T, y::T) where T<:AbstractFloat
+    if isinf(y) && isfinite(x)
+        return x
+    end
     r = rem(x,y)
     if r == 0
         copysign(r,y)
@@ -532,20 +619,21 @@ end
 
 ## floating point comparisons ##
 ==(x::T, y::T) where {T<:IEEEFloat} = eq_float(x, y)
-!=(x::T, y::T) where {T<:IEEEFloat} = ne_float(x, y)
 <( x::T, y::T) where {T<:IEEEFloat} = lt_float(x, y)
 <=(x::T, y::T) where {T<:IEEEFloat} = le_float(x, y)
 
 isequal(x::T, y::T) where {T<:IEEEFloat} = fpiseq(x, y)
 
 # interpret as sign-magnitude integer
-@inline function _fpint(x)
+function _fpint(x)
+    @inline
     IntT = inttype(typeof(x))
     ix = reinterpret(IntT, x)
     return ifelse(ix < zero(IntT), ix ⊻ typemax(IntT), ix)
 end
 
-@inline function isless(a::T, b::T) where T<:IEEEFloat
+function isless(a::T, b::T) where T<:IEEEFloat
+    @inline
     (isnan(a) || isnan(b)) && return !isnan(a)
 
     return _fpint(a) < _fpint(b)
@@ -610,7 +698,7 @@ end
 abs(x::IEEEFloat) = abs_float(x)
 
 """
-    isnan(f) -> Bool
+    isnan(f)::Bool
 
 Test whether a number value is a NaN, an indeterminate value which is neither an infinity
 nor a finite number ("not a number").
@@ -620,12 +708,12 @@ See also: [`iszero`](@ref), [`isone`](@ref), [`isinf`](@ref), [`ismissing`](@ref
 isnan(x::AbstractFloat) = (x != x)::Bool
 isnan(x::Number) = false
 
-isfinite(x::AbstractFloat) = !isnan(x - x)
+isfinite(x::AbstractFloat) = !(isnan(x - x)::Bool)
 isfinite(x::Real) = decompose(x)[3] != 0
 isfinite(x::Integer) = true
 
 """
-    isinf(f) -> Bool
+    isinf(f)::Bool
 
 Test whether a number is infinite.
 
@@ -634,82 +722,6 @@ See also: [`Inf`](@ref), [`iszero`](@ref), [`isfinite`](@ref), [`isnan`](@ref).
 isinf(x::Real) = !isnan(x) & !isfinite(x)
 isinf(x::IEEEFloat) = abs(x) === oftype(x, Inf)
 
-const hx_NaN = hash_uint64(reinterpret(UInt64, NaN))
-function hash(x::Float64, h::UInt)
-    # see comments on trunc and hash(Real, UInt)
-    if typemin(Int64) <= x < typemax(Int64)
-        xi = fptosi(Int64, x)
-        if isequal(xi, x)
-            return hash(xi, h)
-        end
-    elseif typemin(UInt64) <= x < typemax(UInt64)
-        xu = fptoui(UInt64, x)
-        if isequal(xu, x)
-            return hash(xu, h)
-        end
-    elseif isnan(x)
-        return hx_NaN ⊻ h # NaN does not have a stable bit pattern
-    end
-    return hash_uint64(bitcast(UInt64, x)) - 3h
-end
-
-hash(x::Float32, h::UInt) = hash(Float64(x), h)
-
-function hash(x::Float16, h::UInt)
-    # see comments on trunc and hash(Real, UInt)
-    if isfinite(x) # all finite Float16 fit in Int64
-        xi = fptosi(Int64, x)
-        if isequal(xi, x)
-            return hash(xi, h)
-        end
-    elseif isnan(x)
-        return hx_NaN ⊻ h # NaN does not have a stable bit pattern
-    end
-    return hash_uint64(bitcast(UInt64, Float64(x))) - 3h
-end
-
-## generic hashing for rational values ##
-function hash(x::Real, h::UInt)
-    # decompose x as num*2^pow/den
-    num, pow, den = decompose(x)
-
-    # handle special values
-    num == 0 && den == 0 && return hash(NaN, h)
-    num == 0 && return hash(ifelse(den > 0, 0.0, -0.0), h)
-    den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
-
-    # normalize decomposition
-    if den < 0
-        num = -num
-        den = -den
-    end
-    num_z = trailing_zeros(num)
-    num >>= num_z
-    den_z = trailing_zeros(den)
-    den >>= den_z
-    pow += num_z - den_z
-
-    # handle values representable as Int64, UInt64, Float64
-    if den == 1
-        left = top_set_bit(abs(num)) + pow
-        right = pow + den_z
-        if -1074 <= right
-            if 0 <= right
-                left <= 63 && return hash(Int64(num) << Int(pow), h)
-                left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow), h)
-            end # typemin(Int64) handled by Float64 case
-            left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num), pow), h)
-        end
-    else
-        h = hash_integer(den, h)
-    end
-
-    # handle generic rational values
-    h = hash_integer(pow, h)
-    h = hash_integer(num, h)
-    return h
-end
-
 #=
 `decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`.
 
@@ -783,25 +795,20 @@ number of significand digits in that base.
 """
 function precision end
 
-_precision(::Type{Float16}) = 11
-_precision(::Type{Float32}) = 24
-_precision(::Type{Float64}) = 53
-function _precision(x, base::Integer=2)
+_precision_with_base_2(::Type{Float16}) = 11
+_precision_with_base_2(::Type{Float32}) = 24
+_precision_with_base_2(::Type{Float64}) = 53
+function _precision(x, base::Integer)
     base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
-    p = _precision(x)
+    p = _precision_with_base_2(x)
     return base == 2 ? Int(p) : floor(Int, p / log2(base))
 end
 precision(::Type{T}; base::Integer=2) where {T<:AbstractFloat} = _precision(T, base)
 precision(::T; base::Integer=2) where {T<:AbstractFloat} = precision(T; base)
 
 
-"""
-    nextfloat(x::AbstractFloat, n::Integer)
-
-The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n`
-applications of [`prevfloat`](@ref) if `n < 0`.
-"""
-function nextfloat(f::IEEEFloat, d::Integer)
+function _nextfloat(f::IEEEFloat, dneg::Bool, da::Integer)
+    # da must be > 0
     F = typeof(f)
     fumax = reinterpret(Unsigned, F(Inf))
     U = typeof(fumax)
@@ -811,8 +818,6 @@ function nextfloat(f::IEEEFloat, d::Integer)
     fneg = fi < 0
     fu = unsigned(fi & typemax(fi))
 
-    dneg = d < 0
-    da = uabs(d)
     if da > typemax(U)
         fneg = dneg
         fu = fumax
@@ -839,15 +844,23 @@ function nextfloat(f::IEEEFloat, d::Integer)
     reinterpret(F, fu)
 end
 
+"""
+    nextfloat(x::AbstractFloat, n::Integer)
+
+The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n`
+applications of [`prevfloat`](@ref) if `n < 0`.
+"""
+nextfloat(f::AbstractFloat, d::Integer) = _nextfloat(f, isnegative(d), uabs(d))
+
 """
     nextfloat(x::AbstractFloat)
 
-Return the smallest floating point number `y` of the same type as `x` such `x < y`. If no
-such `y` exists (e.g. if `x` is `Inf` or `NaN`), then return `x`.
+Return the smallest floating point number `y` of the same type as `x` such that `x < y`.
+If no such `y` exists (e.g. if `x` is `Inf` or `NaN`), then return `x`.
 
 See also: [`prevfloat`](@ref), [`eps`](@ref), [`issubnormal`](@ref).
 """
-nextfloat(x::AbstractFloat) = nextfloat(x,1)
+nextfloat(x::AbstractFloat) = nextfloat(x, 1)
 
 """
     prevfloat(x::AbstractFloat, n::Integer)
@@ -855,15 +868,15 @@ nextfloat(x::AbstractFloat) = nextfloat(x,1)
 The result of `n` iterative applications of `prevfloat` to `x` if `n >= 0`, or `-n`
 applications of [`nextfloat`](@ref) if `n < 0`.
 """
-prevfloat(x::AbstractFloat, d::Integer) = nextfloat(x, -d)
+prevfloat(x::AbstractFloat, d::Integer) = _nextfloat(x, ispositive(d), uabs(d))
 
 """
     prevfloat(x::AbstractFloat)
 
-Return the largest floating point number `y` of the same type as `x` such `y < x`. If no
-such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`.
+Return the largest floating point number `y` of the same type as `x` such that `y < x`.
+If no such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`.
 """
-prevfloat(x::AbstractFloat) = nextfloat(x,-1)
+prevfloat(x::AbstractFloat) = nextfloat(x, -1)
 
 for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
     for Tf in (Float16, Float32, Float64)
@@ -872,15 +885,18 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn
             # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or
             # rounded to `Inf` (e.g. when `Ti==UInt128 && Tf==Float32`).
             @eval begin
-                function trunc(::Type{$Ti},x::$Tf)
+                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
                     if $(Tf(typemin(Ti))-one(Tf)) < x < $(Tf(typemax(Ti))+one(Tf))
                         return unsafe_trunc($Ti,x)
                     else
-                        throw(InexactError(:trunc, $Ti, x))
+                        throw(InexactError(:round, $Ti, x, RoundToZero))
                     end
                 end
                 function (::Type{$Ti})(x::$Tf)
-                    if ($(Tf(typemin(Ti))) <= x <= $(Tf(typemax(Ti)))) && isinteger(x)
+                    # When typemax(Ti) is not representable by Tf but typemax(Ti) + 1 is,
+                    # then < Tf(typemax(Ti) + 1) is stricter than <= Tf(typemax(Ti)). Using
+                    # the former causes us to throw on UInt64(Float64(typemax(UInt64))+1)
+                    if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti))+one(Tf))) && isinteger(x)
                         return unsafe_trunc($Ti,x)
                     else
                         throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
@@ -893,11 +909,11 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn
             # be rounded up. This assumes that `Tf(typemin(Ti)) > -Inf`, which is true for
             # these types, but not for `Float16` or larger integer types.
             @eval begin
-                function trunc(::Type{$Ti},x::$Tf)
+                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
                     if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))
                         return unsafe_trunc($Ti,x)
                     else
-                        throw(InexactError(:trunc, $Ti, x))
+                        throw(InexactError(:round, $Ti, x, RoundToZero))
                     end
                 end
                 function (::Type{$Ti})(x::$Tf)
@@ -913,7 +929,7 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn
 end
 
 """
-    issubnormal(f) -> Bool
+    issubnormal(f)::Bool
 
 Test whether a floating point number is subnormal.
 
@@ -958,19 +974,32 @@ isodd(x::AbstractFloat) = isinteger(x) && abs(x) ≤ maxintfloat(x) && isodd(Int
     floatmax(::Type{Float32}) = $(bitcast(Float32, 0x7f7fffff))
     floatmax(::Type{Float64}) = $(bitcast(Float64, 0x7fefffffffffffff))
 
-    eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= floatmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN)
     eps(::Type{Float16}) = $(bitcast(Float16, 0x1400))
     eps(::Type{Float32}) = $(bitcast(Float32, 0x34000000))
     eps(::Type{Float64}) = $(bitcast(Float64, 0x3cb0000000000000))
     eps() = eps(Float64)
 end
 
+eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= floatmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN)
+
+function eps(x::T) where T<:IEEEFloat
+    # For isfinite(x), toggling the LSB will produce either prevfloat(x) or
+    # nextfloat(x) but will never change the sign or exponent.
+    # For !isfinite(x), this will map Inf to NaN and NaN to NaN or Inf.
+    y = reinterpret(T, reinterpret(Unsigned, x) ⊻ true)
+    # The absolute difference between these values is eps(x). This is true even
+    # for Inf/NaN values.
+    return abs(x - y)
+end
+
 """
     floatmin(T = Float64)
 
 Return the smallest positive normal number representable by the floating-point
 type `T`.
 
+See also: [`typemin`](@ref), [`maxintfloat`](@ref), [`floatmax`](@ref), [`eps`](@ref).
+
 # Examples
 ```jldoctest
 julia> floatmin(Float16)
@@ -990,7 +1019,7 @@ floatmin(x::T) where {T<:AbstractFloat} = floatmin(T)
 
 Return the largest finite number representable by the floating-point type `T`.
 
-See also: [`typemax`](@ref), [`floatmin`](@ref), [`eps`](@ref).
+See also: [`typemax`](@ref), [`maxintfloat`](@ref), [`floatmin`](@ref), [`eps`](@ref).
 
 # Examples
 ```jldoctest
@@ -1035,6 +1064,13 @@ julia> 1.0 + eps()
 julia> 1.0 + eps()/2
 1.0
 ```
+
+More generally, for any floating-point numeric type, `eps` corresponds to an
+upper bound on the distance to the nearest floating-point complex value: if ``\text{fl}(x)`` is the closest
+floating-point value to a number ``x`` (e.g. an arbitrary real number), then ``\text{fl}(x)``
+satisfies ``|x - \text{fl}(x)| ≤ \text{eps}(x)/2``, not including overflow cases.
+This allows the definition of `eps` to be extended to complex numbers,
+for which ``\text{fl}(a + ib) = \text{fl}(a) + i \text{fl}(b)``.
 """
 eps(::Type{<:AbstractFloat})
 
@@ -1105,7 +1141,20 @@ floattype(::Type{Int16}) = Float16
 
 
 ## Array operations on floating point numbers ##
+"""
+    float(A::AbstractArray)
+
+Return an array containing the floating-point analog of each entry in array `A`.
+
+Equivalent to `float.(A)`, except that the return value may share memory with all or
+part of `A` in accordance with the behavior of `convert(T, A)` given output type `T`.
 
+# Examples
+```jldoctest
+julia> float(1:1000)
+1.0:1.0:1000.0
+```
+"""
 float(A::AbstractArray{<:AbstractFloat}) = A
 
 function float(A::AbstractArray{T}) where T
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index 9b8ca4b04ee28..5bc4ae9827d4f 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -26,6 +26,8 @@ That is, `maxintfloat` returns the smallest positive integer-valued floating-poi
 `n` such that `n+1` is *not* exactly representable in the type `T`.
 
 When an `Integer`-type value is needed, use `Integer(maxintfloat(T))`.
+
+See also: [`typemax`](@ref), [`floatmax`](@ref).
 """
 maxintfloat(::Type{Float64}) = 9007199254740992.
 maxintfloat(::Type{Float32}) = Float32(16777216.)
@@ -42,87 +44,9 @@ it is the minimum of `maxintfloat(T)` and [`typemax(S)`](@ref).
 maxintfloat(::Type{S}, ::Type{T}) where {S<:AbstractFloat, T<:Integer} = min(maxintfloat(S), S(typemax(T)))
 maxintfloat() = maxintfloat(Float64)
 
-isinteger(x::AbstractFloat) = (x - trunc(x) == 0)
-
-"""
-    round([T,] x, [r::RoundingMode])
-    round(x, [r::RoundingMode]; digits::Integer=0, base = 10)
-    round(x, [r::RoundingMode]; sigdigits::Integer, base = 10)
-
-Rounds the number `x`.
-
-Without keyword arguments, `x` is rounded to an integer value, returning a value of type
-`T`, or of the same type of `x` if no `T` is provided. An [`InexactError`](@ref) will be
-thrown if the value is not representable by `T`, similar to [`convert`](@ref).
-
-If the `digits` keyword argument is provided, it rounds to the specified number of digits
-after the decimal place (or before if negative), in base `base`.
-
-If the `sigdigits` keyword argument is provided, it rounds to the specified number of
-significant digits, in base `base`.
-
-The [`RoundingMode`](@ref) `r` controls the direction of the rounding; the default is
-[`RoundNearest`](@ref), which rounds to the nearest integer, with ties (fractional values
-of 0.5) being rounded to the nearest even integer. Note that `round` may give incorrect
-results if the global rounding mode is changed (see [`rounding`](@ref)).
-
-# Examples
-```jldoctest
-julia> round(1.7)
-2.0
-
-julia> round(Int, 1.7)
-2
-
-julia> round(1.5)
-2.0
-
-julia> round(2.5)
-2.0
-
-julia> round(pi; digits=2)
-3.14
-
-julia> round(pi; digits=3, base=2)
-3.125
-
-julia> round(123.456; sigdigits=2)
-120.0
-
-julia> round(357.913; sigdigits=4, base=2)
-352.0
-```
-
-!!! note
-    Rounding to specified digits in bases other than 2 can be inexact when
-    operating on binary floating point numbers. For example, the [`Float64`](@ref)
-    value represented by `1.15` is actually *less* than 1.15, yet will be
-    rounded to 1.2. For example:
+isinteger(x::AbstractFloat) = iszero(x - trunc(x)) # note: x == trunc(x) would be incorrect for x=Inf
 
-    ```jldoctest
-    julia> x = 1.15
-    1.15
-
-    julia> big(1.15)
-    1.149999999999999911182158029987476766109466552734375
-
-    julia> x < 115//100
-    true
-
-    julia> round(x, digits=1)
-    1.2
-    ```
-
-# Extensions
-
-To extend `round` to new numeric types, it is typically sufficient to define `Base.round(x::NewType, r::RoundingMode)`.
-"""
-round(T::Type, x)
-
-function round(::Type{T}, x::AbstractFloat, r::RoundingMode) where {T<:Integer}
-    r != RoundToZero && (x = round(x,r))
-    trunc(T, x)
-end
+# See rounding.jl for docstring.
 
 # NOTE: this relies on the current keyword dispatch behaviour (#9498).
 function round(x::Real, r::RoundingMode=RoundNearest;
@@ -150,12 +74,6 @@ function round(x::Real, r::RoundingMode=RoundNearest;
     end
 end
 
-trunc(x::Real; kwargs...) = round(x, RoundToZero; kwargs...)
-floor(x::Real; kwargs...) = round(x, RoundDown; kwargs...)
-ceil(x::Real; kwargs...)  = round(x, RoundUp; kwargs...)
-
-round(x::Integer, r::RoundingMode) = x
-
 # round x to multiples of 1/invstep
 function _round_invstep(x, invstep, r::RoundingMode)
     y = round(x * invstep, r) / invstep
@@ -304,7 +222,22 @@ true
 function isapprox(x::Number, y::Number;
                   atol::Real=0, rtol::Real=rtoldefault(x,y,atol),
                   nans::Bool=false, norm::Function=abs)
-    x == y || (isfinite(x) && isfinite(y) && norm(x-y) <= max(atol, rtol*max(norm(x), norm(y)))) || (nans && isnan(x) && isnan(y))
+    x′, y′ = promote(x, y) # to avoid integer overflow
+    x == y ||
+        (isfinite(x) && isfinite(y) && norm(x-y) <= max(atol, rtol*max(norm(x′), norm(y′)))) ||
+         (nans && isnan(x) && isnan(y))
+end
+
+function isapprox(x::Integer, y::Integer;
+                  atol::Real=0, rtol::Real=rtoldefault(x,y,atol),
+                  nans::Bool=false, norm::Function=abs)
+    if norm === abs && atol < 1 && rtol == 0
+        return x == y
+    else
+        # We need to take the difference `max` - `min` when comparing unsigned integers.
+        _x, _y = x < y ? (x, y) : (y, x)
+        return norm(_y - _x) <= max(atol, rtol*max(norm(_x), norm(_y)))
+    end
 end
 
 """
@@ -340,11 +273,14 @@ end
 """
     fma(x, y, z)
 
-Computes `x*y+z` without rounding the intermediate result `x*y`. On some systems this is
+Compute `x*y+z` without rounding the intermediate result `x*y`. On some systems this is
 significantly more expensive than `x*y+z`. `fma` is used to improve accuracy in certain
 algorithms. See [`muladd`](@ref).
 """
 function fma end
+function fma_emulated(a::Float16, b::Float16, c::Float16)
+    Float16(muladd(Float32(a), Float32(b), Float32(c))) #don't use fma if the hardware doesn't have it.
+end
 function fma_emulated(a::Float32, b::Float32, c::Float32)::Float32
     ab = Float64(a) * b
     res = ab+c
@@ -417,19 +353,14 @@ function fma_emulated(a::Float64, b::Float64,c::Float64)
     s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo)
     return r+s
 end
-fma_llvm(x::Float32, y::Float32, z::Float32) = fma_float(x, y, z)
-fma_llvm(x::Float64, y::Float64, z::Float64) = fma_float(x, y, z)
 
 # Disable LLVM's fma if it is incorrect, e.g. because LLVM falls back
 # onto a broken system libm; if so, use a software emulated fma
-@assume_effects :consistent fma(x::Float32, y::Float32, z::Float32) = Core.Intrinsics.have_fma(Float32) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
-@assume_effects :consistent fma(x::Float64, y::Float64, z::Float64) = Core.Intrinsics.have_fma(Float64) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
-
-function fma(a::Float16, b::Float16, c::Float16)
-    Float16(muladd(Float32(a), Float32(b), Float32(c))) #don't use fma if the hardware doesn't have it.
+@assume_effects :consistent function fma(x::T, y::T, z::T) where {T<:IEEEFloat}
+    Core.Intrinsics.have_fma(T) ? fma_float(x,y,z) : fma_emulated(x,y,z)
 end
 
-# This is necessary at least on 32-bit Intel Linux, since fma_llvm may
+# This is necessary at least on 32-bit Intel Linux, since fma_float may
 # have called glibc, and some broken glibc fma implementations don't
 # properly restore the rounding mode
 Rounding.setrounding_raw(Float32, Rounding.JL_FE_TONEAREST)
diff --git a/base/gcutils.jl b/base/gcutils.jl
index fed30befd7d5c..67ae7cc7a837c 100644
--- a/base/gcutils.jl
+++ b/base/gcutils.jl
@@ -38,7 +38,7 @@ WeakRef
 # Used by `Base.finalizer` to validate mutability of an object being finalized.
 function _check_mutable(@nospecialize(o)) @noinline
     if !ismutable(o)
-        error("objects of type ", typeof(o), " cannot be finalized")
+        error("objects of type ", typeof(o), " cannot be finalized because they are not mutable")
     end
 end
 
@@ -70,7 +70,6 @@ end
 A finalizer may be registered at object construction. In the following example note that
 we implicitly rely on the finalizer returning the newly created mutable struct `x`.
 
-# Example
 ```julia
 mutable struct MyMutableStruct
     bar
@@ -110,6 +109,8 @@ Module with garbage collection utilities.
 """
 module GC
 
+public gc, enable, @preserve, safepoint, enable_logging, logging_enabled
+
 # mirrored from julia.h
 const GC_AUTO = 0
 const GC_FULL = 1
@@ -119,9 +120,12 @@ const GC_INCREMENTAL = 2
     GC.gc([full=true])
 
 Perform garbage collection. The argument `full` determines the kind of
-collection: A full collection (default) sweeps all objects, which makes the
-next GC scan much slower, while an incremental collection may only sweep
-so-called young objects.
+collection: a full collection (default) traverses all live objects (i.e. full mark)
+and should reclaim memory from all unreachable objects. An incremental collection only
+reclaims memory from young objects which are not reachable.
+
+The GC may decide to perform a full collection even if an incremental collection was
+requested.
 
 !!! warning
     Excessive use will likely lead to poor performance.
@@ -165,7 +169,7 @@ end
 """
     GC.in_finalizer()::Bool
 
-Returns `true` if the current task is running a finalizer, returns `false`
+Return `true` if the current task is running a finalizer, return `false`
 otherwise. Will also return `false` within a finalizer which was inlined by the
 compiler's eager finalization optimization, or if `finalize` is called on the
 finalizer directly.
@@ -239,12 +243,21 @@ end
     GC.safepoint()
 
 Inserts a point in the program where garbage collection may run.
-This can be useful in rare cases in multi-threaded programs where some threads
-are allocating memory (and hence may need to run GC) but other threads are doing
-only simple operations (no allocation, task switches, or I/O).
-Calling this function periodically in non-allocating threads allows garbage
+
+Safepoints are fast and do not themselves trigger garbage collection.
+However, if another thread has requested the GC to run, reaching a safepoint will
+cause the current thread to block and wait for the GC.
+
+This can be useful in rare cases in multi-threaded programs where some tasks
+are allocating memory (and hence may need to run GC) but other tasks are doing
+only simple operations (no allocation, task switches, or I/O), which do not
+yield control to Julia's runtime, and therefore blocks the GC from running.
+Calling this function periodically in the non-allocating tasks allows garbage
 collection to run.
 
+Note that even though safepoints are fast (typically around 2 clock cycles),
+they can still degrade performance if called in a tight loop.
+
 !!! compat "Julia 1.4"
     This function is available as of Julia 1.4.
 """
@@ -259,4 +272,24 @@ function enable_logging(on::Bool=true)
     ccall(:jl_enable_gc_logging, Cvoid, (Cint,), on)
 end
 
+"""
+    GC.logging_enabled()
+
+Return whether GC logging has been enabled via [`GC.enable_logging`](@ref).
+"""
+function logging_enabled()
+    ccall(:jl_is_gc_logging_enabled, Cint, ()) != 0
+end
+
+"""
+    GC.gc_active_impl()
+
+Return a string stating which GC implementation is being used and possibly
+its version according to the list of supported GCs
+"""
+function gc_active_impl()
+    unsafe_string(ccall(:jl_gc_active_impl, Ptr{UInt8}, ()))
+end
+
+
 end # module GC
diff --git a/base/generator.jl b/base/generator.jl
index aa4b7f67cba95..26bb7c7d91b5d 100644
--- a/base/generator.jl
+++ b/base/generator.jl
@@ -5,25 +5,26 @@
 
 Given a function `f` and an iterator `iter`, construct an iterator that yields
 the values of `f` applied to the elements of `iter`.
-The syntax for constructing an instance of this type is `f(x) for x in iter [if cond(x)::Bool] `.
-The `[if cond(x)::Bool]` expression is optional and acts as a "guard", effectively
-filtering out values where the condition is false.
+The syntax `f(x) for x in iter` is syntax for constructing an instance of this
+type.
 
 ```jldoctest
-julia> g = (abs2(x) for x in 1:5 if x != 3);
+julia> g = (abs2(x) for x in 1:5);
 
 julia> for x in g
            println(x)
        end
 1
 4
+9
 16
 25
 
 julia> collect(g)
-4-element Vector{Int64}:
+5-element Vector{Int64}:
   1
   4
+  9
  16
  25
 ```
@@ -65,7 +66,7 @@ struct HasShape{N} <: IteratorSize end
 struct IsInfinite <: IteratorSize end
 
 """
-    IteratorSize(itertype::Type) -> IteratorSize
+    IteratorSize(itertype::Type)::IteratorSize
 
 Given the type of an iterator, return one of the following values:
 
@@ -97,7 +98,7 @@ IteratorSize(::Type{Any}) = SizeUnknown()
 
 IteratorSize(::Type{<:Tuple}) = HasLength()
 IteratorSize(::Type{<:AbstractArray{<:Any,N}})  where {N} = HasShape{N}()
-IteratorSize(::Type{Generator{I,F}}) where {I,F} = IteratorSize(I)
+IteratorSize(::Type{<:Generator{I}}) where {I} = (@isdefined I) ? IteratorSize(I) : SizeUnknown()
 
 haslength(iter) = IteratorSize(iter) isa Union{HasShape, HasLength}
 
@@ -106,7 +107,7 @@ struct EltypeUnknown <: IteratorEltype end
 struct HasEltype <: IteratorEltype end
 
 """
-    IteratorEltype(itertype::Type) -> IteratorEltype
+    IteratorEltype(itertype::Type)::IteratorEltype
 
 Given the type of an iterator, return one of the following values:
 
diff --git a/base/genericmemory.jl b/base/genericmemory.jl
new file mode 100644
index 0000000000000..b34095ea37d48
--- /dev/null
+++ b/base/genericmemory.jl
@@ -0,0 +1,424 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## genericmemory.jl: Managed Memory
+
+"""
+    GenericMemory{kind::Symbol, T, addrspace=Core.CPU} <: DenseVector{T}
+
+Fixed-size [`DenseVector{T}`](@ref DenseVector).
+
+`kind` can currently be either `:not_atomic` or `:atomic`. For details on what `:atomic` implies, see [`AtomicMemory`](@ref)
+
+`addrspace` can currently only be set to `Core.CPU`. It is designed to permit extension by other systems such as GPUs, which might define values such as:
+```julia
+module CUDA
+const Generic = bitcast(Core.AddrSpace{CUDA}, 0)
+const Global = bitcast(Core.AddrSpace{CUDA}, 1)
+end
+```
+The exact semantics of these other addrspaces is defined by the specific backend, but will error if the user is attempting to access these on the CPU.
+
+!!! compat "Julia 1.11"
+    This type requires Julia 1.11 or later.
+"""
+GenericMemory
+
+"""
+    Memory{T} == GenericMemory{:not_atomic, T, Core.CPU}
+
+Fixed-size [`DenseVector{T}`](@ref DenseVector).
+
+!!! compat "Julia 1.11"
+    This type requires Julia 1.11 or later.
+"""
+Memory
+
+"""
+    AtomicMemory{T} == GenericMemory{:atomic, T, Core.CPU}
+
+Fixed-size [`DenseVector{T}`](@ref DenseVector).
+Fetching of any of its individual elements is performed atomically
+(with `:monotonic` ordering by default).
+
+!!! warning
+    The access to `AtomicMemory` must be done by either using the [`@atomic`](@ref)
+    macro or the lower level interface functions: `Base.getindex_atomic`,
+    `Base.setindex_atomic!`, `Base.setindexonce_atomic!`,
+    `Base.swapindex_atomic!`, `Base.modifyindex_atomic!`, and `Base.replaceindex_atomic!`.
+
+For details, see [Atomic Operations](@ref man-atomic-operations) as well as macros
+[`@atomic`](@ref), [`@atomiconce`](@ref), [`@atomicswap`](@ref), and [`@atomicreplace`](@ref).
+
+!!! compat "Julia 1.11"
+    This type requires Julia 1.11 or later.
+
+!!! compat "Julia 1.12"
+    Lower level interface functions or `@atomic` macro requires Julia 1.12 or later.
+"""
+AtomicMemory
+
+## Basic functions ##
+
+using Core: memoryrefoffset, memoryref_isassigned # import more functions which were not essential
+
+IndexStyle(::Type{<:GenericMemory}) = IndexLinear()
+
+parent(ref::GenericMemoryRef) = ref.mem
+
+"""
+    memoryindex(ref::GenericMemoryRef)::Int
+
+Get the 1-based index of `ref` in its `GenericMemory`.
+
+# Examples
+```jldoctest
+julia> mem = Memory{String}(undef, 10);
+
+julia> ref = Base.memoryindex(memoryref(mem, 3))
+3
+
+julia> Base.memoryindex(memoryref(Memory{Nothing}(undef, 10), 8))
+8
+```
+
+!!! compat "Julia 1.13"
+    This function requires at least Julia 1.13.
+"""
+memoryindex(ref::GenericMemoryRef) = memoryrefoffset(ref)
+
+pointer(mem::GenericMemoryRef) = unsafe_convert(Ptr{Cvoid}, mem) # no bounds check, even for empty array
+
+_unsetindex!(A::Memory, i::Int) =  (@_propagate_inbounds_meta; _unsetindex!(memoryref(A, i)); A)
+function _unsetindex!(A::MemoryRef{T}) where T
+    @_terminates_locally_meta
+    @_propagate_inbounds_meta
+    @inline
+    @boundscheck memoryref(A, 1)
+    mem = A.mem
+    MemT = typeof(mem)
+    arrayelem = datatype_arrayelem(MemT)
+    elsz = datatype_layoutsize(MemT)
+    isbits = 0; isboxed = 1; isunion = 2
+    arrayelem == isbits && datatype_pointerfree(T::DataType) && return A
+    t = @_gc_preserve_begin mem
+    p = Ptr{Ptr{Cvoid}}(@inbounds pointer(A))
+    if arrayelem == isboxed
+        Intrinsics.atomic_pointerset(p, C_NULL, :monotonic)
+    elseif arrayelem != isunion
+        for j = 1:Core.sizeof(Ptr{Cvoid}):elsz
+            # XXX: this violates memory ordering, since it writes more than one C_NULL to each
+            Intrinsics.atomic_pointerset(p + j - 1, C_NULL, :monotonic)
+        end
+    end
+    @_gc_preserve_end t
+    return A
+end
+
+elsize(@nospecialize _::Type{A}) where {T,A<:GenericMemory{<:Any,T}} = aligned_sizeof(T) # XXX: probably supposed to be the stride?
+sizeof(a::GenericMemory) = Core.sizeof(a)
+
+# multi arg case will be overwritten later. This is needed for bootstrapping
+function isassigned(a::GenericMemory, i::Int)
+    @inline
+    @boundscheck checkbounds(Bool, a, i) || return false
+    return @inbounds memoryref_isassigned(memoryref(a, i), default_access_order(a), false)
+end
+
+isassigned(a::GenericMemoryRef) = memoryref_isassigned(a, default_access_order(a), @_boundscheck)
+
+## copy ##
+function unsafe_copyto!(dest::MemoryRef{T}, src::MemoryRef{T}, n) where {T}
+    @_terminates_globally_notaskstate_meta
+    n == 0 && return dest
+    @boundscheck memoryref(dest, n), memoryref(src, n)
+    if isbitstype(T)
+        tdest = @_gc_preserve_begin dest
+        tsrc = @_gc_preserve_begin src
+        pdest = unsafe_convert(Ptr{Cvoid}, dest)
+        psrc = unsafe_convert(Ptr{Cvoid}, src)
+        memmove(pdest, psrc, aligned_sizeof(T) * n)
+        @_gc_preserve_end tdest
+        @_gc_preserve_end tsrc
+    else
+        ccall(:jl_genericmemory_copyto, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest.mem, dest.ptr_or_offset, src.mem, src.ptr_or_offset, Int(n))
+    end
+    return dest
+end
+
+function unsafe_copyto!(dest::GenericMemoryRef, src::GenericMemoryRef, n)
+    n == 0 && return dest
+    @boundscheck memoryref(dest, n), memoryref(src, n)
+    unsafe_copyto!(dest.mem, memoryrefoffset(dest), src.mem, memoryrefoffset(src), n)
+    return dest
+end
+
+function unsafe_copyto!(dest::Memory{T}, doffs, src::Memory{T}, soffs, n) where{T}
+    n == 0 && return dest
+    unsafe_copyto!(memoryref(dest, doffs), memoryref(src, soffs), n)
+    return dest
+end
+
+#fallback method when types don't match
+function unsafe_copyto!(dest::Memory, doffs, src::Memory, soffs, n)
+    @_terminates_locally_meta
+    n == 0 && return dest
+    # use pointer math to determine if they are deemed to alias
+    destp = pointer(dest, doffs)
+    srcp = pointer(src, soffs)
+    endp = pointer(src, soffs + n - 1)
+    @inbounds if destp < srcp || destp > endp
+        for i = 1:n
+            if isassigned(src, soffs + i - 1)
+                dest[doffs + i - 1] = src[soffs + i - 1]
+            else
+                _unsetindex!(dest, doffs + i - 1)
+            end
+        end
+    else
+        for i = n:-1:1
+            if isassigned(src, soffs + i - 1)
+                dest[doffs + i - 1] = src[soffs + i - 1]
+            else
+                _unsetindex!(dest, doffs + i - 1)
+            end
+        end
+    end
+    return dest
+end
+
+function copy(a::T) where {T<:Memory}
+    # `copy` only throws when the size exceeds the max allocation size,
+    # but since we're copying an existing array, we're guaranteed that this will not happen.
+    @_nothrow_meta
+    newmem = T(undef, length(a))
+    @inbounds unsafe_copyto!(newmem, 1, a, 1, length(a))
+end
+
+copyto!(dest::Memory, src::Memory) = copyto!(dest, 1, src, 1, length(src))
+function copyto!(dest::Memory, doffs::Integer, src::Memory, soffs::Integer, n::Integer)
+    n < 0 && _throw_argerror("Number of elements to copy must be non-negative.")
+    unsafe_copyto!(dest, doffs, src, soffs, n)
+    return dest
+end
+
+
+## Constructors ##
+
+similar(a::GenericMemory) =
+    typeof(a)(undef, length(a))
+similar(a::GenericMemory{kind,<:Any,AS}, T::Type) where {kind,AS} =
+    GenericMemory{kind,T,AS}(undef, length(a))
+similar(a::GenericMemory, m::Int) =
+    typeof(a)(undef, m)
+similar(a::GenericMemory{kind,<:Any,AS}, T::Type, dims::Dims{1}) where {kind,AS} =
+    GenericMemory{kind,T,AS}(undef, dims[1])
+similar(a::GenericMemory, dims::Dims{1}) =
+    typeof(a)(undef, dims[1])
+
+function fill!(a::Union{Memory{UInt8}, Memory{Int8}}, x::Integer)
+    t = @_gc_preserve_begin a
+    p = unsafe_convert(Ptr{Cvoid}, a)
+    T = eltype(a)
+    memset(p, x isa T ? x : convert(T, x), length(a) % UInt)
+    @_gc_preserve_end t
+    return a
+end
+
+## Conversions ##
+
+convert(::Type{T}, a::AbstractArray) where {T<:Memory} = a isa T ? a : T(a)::T
+
+promote_rule(a::Type{Memory{T}}, b::Type{Memory{S}}) where {T,S} = el_same(promote_type(T,S), a, b)
+
+## Constructors ##
+
+# constructors should make copies
+Memory{T}(x::AbstractArray{S,1}) where {T,S} = copyto_axcheck!(Memory{T}(undef, size(x)), x)
+
+## copying iterators to containers
+
+## Indexing: getindex ##
+
+# Faster contiguous indexing using copyto! for AbstractUnitRange and Colon
+function getindex(A::Memory, I::AbstractUnitRange{<:Integer})
+    @inline
+    @boundscheck checkbounds(A, I)
+    lI = length(I)
+    X = similar(A, axes(I))
+    if lI > 0
+        copyto!(X, firstindex(X), A, first(I), lI)
+    end
+    return X
+end
+
+# getindex for carrying out logical indexing for AbstractUnitRange{Bool} as Bool <: Integer
+getindex(a::Memory, r::AbstractUnitRange{Bool}) = getindex(a, to_index(r))
+
+getindex(A::Memory, c::Colon) = copy(A)
+
+## Indexing: setindex! ##
+
+function _setindex!(A::Memory{T}, x::T, i1::Int) where {T}
+    ref = memoryrefnew(memoryref(A), i1, @_boundscheck)
+    memoryrefset!(ref, x, :not_atomic, @_boundscheck)
+    return A
+end
+
+function setindex!(A::Memory{T}, x, i1::Int) where {T}
+    @_propagate_inbounds_meta
+    val = x isa T ? x : convert(T,x)::T
+    return _setindex!(A, val, i1)
+end
+
+function setindex!(A::Memory{T}, x, i1::Int, i2::Int, I::Int...) where {T}
+    @inline
+    @boundscheck (i2 == 1 && all(==(1), I)) || throw_boundserror(A, (i1, i2, I...))
+    setindex!(A, x, i1)
+end
+
+# Faster contiguous setindex! with copyto!
+function setindex!(A::Memory{T}, X::Memory{T}, I::AbstractUnitRange{Int}) where T
+    @inline
+    @boundscheck checkbounds(A, I)
+    lI = length(I)
+    @boundscheck setindex_shape_check(X, lI)
+    if lI > 0
+        unsafe_copyto!(A, first(I), X, 1, lI)
+    end
+    return A
+end
+function setindex!(A::Memory{T}, X::Memory{T}, c::Colon) where T
+    @inline
+    lI = length(A)
+    @boundscheck setindex_shape_check(X, lI)
+    if lI > 0
+        unsafe_copyto!(A, 1, X, 1, lI)
+    end
+    return A
+end
+
+# use memcmp for cmp on byte arrays
+function cmp(a::Memory{UInt8}, b::Memory{UInt8})
+    ta = @_gc_preserve_begin a
+    tb = @_gc_preserve_begin b
+    pa = unsafe_convert(Ptr{Cvoid}, a)
+    pb = unsafe_convert(Ptr{Cvoid}, b)
+    c = memcmp(pa, pb, min(length(a),length(b)))
+    @_gc_preserve_end ta
+    @_gc_preserve_end tb
+    return c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
+end
+
+const BitIntegerMemory{N} = Union{map(T->Memory{T}, BitInteger_types)...}
+# use memcmp for == on bit integer types
+function ==(a::M, b::M) where {M <: BitIntegerMemory}
+    if length(a) == length(b)
+        ta = @_gc_preserve_begin a
+        tb = @_gc_preserve_begin b
+        pa = unsafe_convert(Ptr{Cvoid}, a)
+        pb = unsafe_convert(Ptr{Cvoid}, b)
+        c = memcmp(pa, pb, sizeof(eltype(M)) * length(a))
+        @_gc_preserve_end ta
+        @_gc_preserve_end tb
+        return c == 0
+    else
+        return false
+    end
+end
+
+function findall(pred::Fix2{typeof(in),<:Union{Memory{<:Real},Real}}, x::Memory{<:Real})
+    if issorted(x, Sort.Forward) && issorted(pred.x, Sort.Forward)
+        return _sortedfindin(x, pred.x)
+    else
+        return _findin(x, pred.x)
+    end
+end
+
+# Copying subregions
+function indcopy(sz::Dims, I::GenericMemory)
+    n = length(I)
+    s = sz[n]
+    for i = n+1:length(sz)
+        s *= sz[i]
+    end
+    dst = eltype(I)[_findin(I[i], i < n ? (1:sz[i]) : (1:s)) for i = 1:n]
+    src = eltype(I)[I[i][_findin(I[i], i < n ? (1:sz[i]) : (1:s))] for i = 1:n]
+    dst, src
+end
+
+# get, set(once), modify, swap and replace at index, atomically
+function getindex_atomic(mem::GenericMemory, order::Symbol, i::Int)
+    @_propagate_inbounds_meta
+    memref = memoryref(mem, i)
+    return memoryrefget(memref, order, @_boundscheck)
+end
+
+function setindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    @_propagate_inbounds_meta
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return memoryrefset!(
+        memref,
+        val isa T ? val : convert(T, val)::T,
+        order,
+        @_boundscheck
+    )
+end
+
+function setindexonce_atomic!(
+    mem::GenericMemory,
+    success_order::Symbol,
+    fail_order::Symbol,
+    val,
+    i::Int,
+)
+    @_propagate_inbounds_meta
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return Core.memoryrefsetonce!(
+        memref,
+        val isa T ? val : convert(T, val)::T,
+        success_order,
+        fail_order,
+        @_boundscheck
+    )
+end
+
+function modifyindex_atomic!(mem::GenericMemory, order::Symbol, op, val, i::Int)
+    @_propagate_inbounds_meta
+    memref = memoryref(mem, i)
+    return Core.memoryrefmodify!(memref, op, val, order, @_boundscheck)
+end
+
+function swapindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    @_propagate_inbounds_meta
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return Core.memoryrefswap!(
+        memref,
+        val isa T ? val : convert(T, val)::T,
+        order,
+        @_boundscheck
+    )
+end
+
+function replaceindex_atomic!(
+    mem::GenericMemory,
+    success_order::Symbol,
+    fail_order::Symbol,
+    expected,
+    desired,
+    i::Int,
+)
+    @_propagate_inbounds_meta
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return Core.memoryrefreplace!(
+        memref,
+        expected,
+        desired isa T ? desired : convert(T, desired)::T,
+        success_order,
+        fail_order,
+        @_boundscheck,
+    )
+end
diff --git a/base/gmp.jl b/base/gmp.jl
index 8a1451be7a590..ee8e620603e9d 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -10,7 +10,10 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor,
              trailing_zeros, trailing_ones, count_ones, count_zeros, tryparse_internal,
              bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb,
              widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit,
-             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit
+             sign, isodd, iseven, digits!, hash, hash_integer, top_set_bit,
+             ispositive, isnegative, clamp, unsafe_takestring
+
+import Core: Signed, Float16, Float32, Float64
 
 if Clong == Int32
     const ClongMax = Union{Int8, Int16, Int32}
@@ -29,10 +32,13 @@ else
     const libgmp = "libgmp.so.10"
 end
 
-version() = VersionNumber(unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar}))))
+_version() = unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar})))
+version() = VersionNumber(_version())
+major_version() = _version()[1]
 bits_per_limb() = Int(unsafe_load(cglobal((:__gmp_bits_per_limb, libgmp), Cint)))
 
 const VERSION = version()
+const MAJOR_VERSION = major_version()
 const BITS_PER_LIMB = bits_per_limb()
 
 # GMP's mp_limb_t is by default a typedef of `unsigned long`, but can also be configured to be either
@@ -101,7 +107,7 @@ const ALLOC_OVERFLOW_FUNCTION = Ref(false)
 
 function __init__()
     try
-        if version().major != VERSION.major || bits_per_limb() != BITS_PER_LIMB
+        if major_version() != MAJOR_VERSION || bits_per_limb() != BITS_PER_LIMB
             msg = """The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))
                      does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).
                      Please rebuild Julia."""
@@ -145,7 +151,7 @@ using ..GMP: BigInt, Limb, BITS_PER_LIMB, libgmp
 const mpz_t = Ref{BigInt}
 const bitcnt_t = Culong
 
-gmpz(op::Symbol) = (Symbol(:__gmpz_, op), libgmp)
+gmpz(op::Symbol) = Expr(:tuple, QuoteNode(Symbol(:__gmpz_, op)), GlobalRef(MPZ, :libgmp))
 
 init!(x::BigInt) = (ccall((:__gmpz_init, libgmp), Cvoid, (mpz_t,), x); x)
 init2!(x::BigInt, a) = (ccall((:__gmpz_init2, libgmp), Cvoid, (mpz_t, bitcnt_t), x, a); x)
@@ -170,8 +176,8 @@ end
 
 invert!(x::BigInt, a::BigInt, b::BigInt) =
     ccall((:__gmpz_invert, libgmp), Cint, (mpz_t, mpz_t, mpz_t), x, a, b)
-invert(a::BigInt, b::BigInt) = invert!(BigInt(), a, b)
 invert!(x::BigInt, b::BigInt) = invert!(x, x, b)
+invert(a::BigInt, b::BigInt) = (ret=BigInt(); invert!(ret, a, b); ret)
 
 for op in (:add_ui, :sub_ui, :mul_ui, :mul_2exp, :fdiv_q_2exp, :pow_ui, :bin_ui)
     op! = Symbol(op, :!)
@@ -251,6 +257,7 @@ function export!(a::AbstractVector{T}, n::BigInt; order::Integer=-1, nails::Inte
     stride(a, 1) == 1 || throw(ArgumentError("a must have stride 1"))
     ndigits = cld(sizeinbase(n, 2), 8*sizeof(T) - nails)
     length(a) < ndigits && resize!(a, ndigits)
+    fill!(a, zero(T))
     count = Ref{Csize_t}()
     ccall((:__gmpz_export, libgmp), Ptr{T}, (Ptr{T}, Ref{Csize_t}, Cint, Csize_t, Cint, Csize_t, mpz_t),
         a, count, order, sizeof(T), endian, nails, n)
@@ -260,8 +267,6 @@ end
 
 limbs_write!(x::BigInt, a) = ccall((:__gmpz_limbs_write, libgmp), Ptr{Limb}, (mpz_t, Clong), x, a)
 limbs_finish!(x::BigInt, a) = ccall((:__gmpz_limbs_finish, libgmp), Cvoid, (mpz_t, Clong), x, a)
-import!(x::BigInt, a, b, c, d, e, f) = ccall((:__gmpz_import, libgmp), Cvoid,
-    (mpz_t, Csize_t, Cint, Csize_t, Cint, Csize_t, Ptr{Cvoid}), x, a, b, c, d, e, f)
 
 setbit!(x, a) = (ccall((:__gmpz_setbit, libgmp), Cvoid, (mpz_t, bitcnt_t), x, a); x)
 tstbit(a::BigInt, b) = ccall((:__gmpz_tstbit, libgmp), Cint, (mpz_t, bitcnt_t), a, b) % Bool
@@ -281,8 +286,6 @@ signed(x::BigInt) = x
 BigInt(x::BigInt) = x
 Signed(x::BigInt) = x
 
-hastypemax(::Type{BigInt}) = false
-
 function tryparse_internal(::Type{BigInt}, s::AbstractString, startpos::Int, endpos::Int, base_::Integer, raise::Bool)
     # don't make a copy in the common case where we are parsing a whole String
     bstr = startpos == firstindex(s) && endpos == lastindex(s) ? String(s) : String(SubString(s,startpos,endpos))
@@ -320,11 +323,6 @@ function BigInt(x::Float64)
     unsafe_trunc(BigInt,x)
 end
 
-function trunc(::Type{BigInt}, x::Union{Float16,Float32,Float64})
-    isfinite(x) || throw(InexactError(:trunc, BigInt, x))
-    unsafe_trunc(BigInt,x)
-end
-
 BigInt(x::Float16) = BigInt(Float64(x))
 BigInt(x::Float32) = BigInt(Float64(x))
 
@@ -363,6 +361,8 @@ end
 
 rem(x::Integer, ::Type{BigInt}) = BigInt(x)
 
+clamp(x, ::Type{BigInt}) = convert(BigInt, x)
+
 isodd(x::BigInt) = MPZ.tstbit(x, 0)
 iseven(x::BigInt) = !isodd(x)
 
@@ -383,7 +383,7 @@ function (::Type{T})(x::BigInt) where T<:Base.BitSigned
     else
         0 <= n <= cld(sizeof(T),sizeof(Limb)) || throw(InexactError(nameof(T), T, x))
         y = x % T
-        ispos(x) ⊻ (y > 0) && throw(InexactError(nameof(T), T, x)) # catch overflow
+        ispositive(x) ⊻ (y > 0) && throw(InexactError(nameof(T), T, x)) # catch overflow
         y
     end
 end
@@ -605,14 +605,23 @@ Number of ones in the binary representation of abs(x).
 """
 count_ones_abs(x::BigInt) = iszero(x) ? 0 : MPZ.mpn_popcount(x)
 
+# all uses of _bit_magnitude MUST ensure at callsite that `x` is strictly positive, otherwise it is UB
+_bit_magnitude(x::BigInt) = x.size * sizeof(Limb) << 3 - leading_zeros(GC.@preserve x unsafe_load(x.d, x.size))
+
+function exponent(x::BigInt)
+    iszero(x) && throw(DomainError(x, "cannot be zero"))
+    ux = abs(x)
+    return _bit_magnitude(ux) - 1
+end
+
 function top_set_bit(x::BigInt)
-    isneg(x) && throw(DomainError(x, "top_set_bit only supports negative arguments when they have type BitSigned."))
+    isnegative(x) && throw(DomainError(x, "top_set_bit only supports negative arguments when they have type BitSigned."))
     iszero(x) && return 0
-    x.size * sizeof(Limb) << 3 - leading_zeros(GC.@preserve x unsafe_load(x.d, x.size))
+    return _bit_magnitude(x)
 end
 
-divrem(x::BigInt, y::BigInt) = MPZ.tdiv_qr(x, y)
-divrem(x::BigInt, y::Integer) = MPZ.tdiv_qr(x, big(y))
+divrem(x::BigInt, y::BigInt,  ::typeof(RoundToZero) = RoundToZero) = MPZ.tdiv_qr(x, y)
+divrem(x::BigInt, y::Integer, ::typeof(RoundToZero) = RoundToZero) = MPZ.tdiv_qr(x, BigInt(y))
 
 cmp(x::BigInt, y::BigInt) = sign(MPZ.cmp(x, y))
 cmp(x::BigInt, y::ClongMax) = sign(MPZ.cmp_si(x, y))
@@ -628,11 +637,11 @@ isqrt(x::BigInt) = MPZ.sqrt(x)
 ^(x::BigInt, y::Culong) = MPZ.pow_ui(x, y)
 
 function bigint_pow(x::BigInt, y::Integer)
+    x == 1 && return x
+    x == -1 && return isodd(y) ? x : -x
     if y<0; throw(DomainError(y, "`y` cannot be negative.")); end
     @noinline throw1(y) =
         throw(OverflowError("exponent $y is too large and computation will overflow"))
-    if x== 1; return x; end
-    if x==-1; return isodd(y) ? x : -x; end
     if y>typemax(Culong)
        x==0 && return x
 
@@ -663,11 +672,6 @@ end
 powermod(x::Integer, p::Integer, m::BigInt) = powermod(big(x), big(p), m)
 
 function gcdx(a::BigInt, b::BigInt)
-    if iszero(b) # shortcut this to ensure consistent results with gcdx(a,b)
-        return a < 0 ? (-a,-ONE,b) : (a,one(BigInt),b)
-        # we don't return the globals ONE and ZERO in case the user wants to
-        # mutate the result
-    end
     g, s, t = MPZ.gcdext(a, b)
     if t == 0
         # work around a difference in some versions of GMP
@@ -685,27 +689,30 @@ sum(arr::Union{AbstractArray{BigInt}, Tuple{BigInt, Vararg{BigInt}}}) =
     foldl(MPZ.add!, arr; init=BigInt(0))
 
 function prod(arr::AbstractArray{BigInt})
-    # compute first the needed number of bits for the result,
-    # to avoid re-allocations;
-    # GMP will always request n+m limbs for the result in MPZ.mul!,
-    # if the arguments have n and m limbs; so we add all the bits
-    # taken by the array elements, and add BITS_PER_LIMB to that,
-    # to account for the rounding to limbs in MPZ.mul!
-    # (BITS_PER_LIMB-1 would typically be enough, to which we add
-    # 1 for the initial multiplication by init=1 in foldl)
-    nbits = BITS_PER_LIMB
-    for x in arr
-        iszero(x) && return zero(BigInt)
-        xsize = abs(x.size)
-        lz = GC.@preserve x leading_zeros(unsafe_load(x.d, xsize))
-        nbits += xsize * BITS_PER_LIMB - lz
+    any(iszero, arr) && return zero(BigInt)
+    _prod(arr, firstindex(arr), lastindex(arr))
+end
+function _prod(arr::AbstractArray{BigInt}, lo, hi)
+    if hi - lo + 1 <= 16
+        # compute first the needed number of bits for the result,
+        # to avoid re-allocations
+        nlimbs = 0
+        for i in lo:hi
+            nlimbs += abs(arr[i].size)
+        end
+        init = BigInt(; nbits=nlimbs*BITS_PER_LIMB)
+        MPZ.set_si!(init, 1)
+        for i in lo:hi
+            MPZ.mul!(init, arr[i])
+        end
+        init
+    else
+        mid = (lo + hi) ÷ 2
+        MPZ.mul!(_prod(arr, lo, mid), _prod(arr, mid+1, hi))
     end
-    init = BigInt(; nbits)
-    MPZ.set_si!(init, 1)
-    foldl(MPZ.mul!, arr; init)
 end
 
-factorial(x::BigInt) = isneg(x) ? BigInt(0) : MPZ.fac_ui(x)
+factorial(n::BigInt) = !isnegative(n) ? MPZ.fac_ui(n) : throw(DomainError(n, "`n` must not be negative."))
 
 function binomial(n::BigInt, k::Integer)
     k < 0 && return BigInt(0)
@@ -737,17 +744,17 @@ isone(x::BigInt) = x == Culong(1)
 <(i::Integer, x::BigInt) = cmp(x,i) > 0
 <(x::BigInt, f::CdoubleMax) = isnan(f) ? false : cmp(x,f) < 0
 <(f::CdoubleMax, x::BigInt) = isnan(f) ? false : cmp(x,f) > 0
-isneg(x::BigInt) = x.size < 0
-ispos(x::BigInt) = x.size > 0
+isnegative(x::BigInt) = x.size < 0
+ispositive(x::BigInt) = x.size > 0
 
-signbit(x::BigInt) = isneg(x)
+signbit(x::BigInt) = isnegative(x)
 flipsign!(x::BigInt, y::Integer) = (signbit(y) && (x.size = -x.size); x)
 flipsign( x::BigInt, y::Integer) = signbit(y) ? -x : x
 flipsign( x::BigInt, y::BigInt)  = signbit(y) ? -x : x
 # above method to resolving ambiguities with flipsign(::T, ::T) where T<:Signed
 function sign(x::BigInt)
-    isneg(x) && return -one(x)
-    ispos(x) && return one(x)
+    isnegative(x) && return -one(x)
+    ispositive(x) && return one(x)
     return x
 end
 
@@ -759,13 +766,13 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1)
     iszero(n) && pad < 1 && return ""
     nd1 = ndigits(n, base=base)
     nd  = max(nd1, pad)
-    sv  = Base.StringVector(nd + isneg(n))
+    sv  = Base.StringMemory(nd + isnegative(n))
     GC.@preserve sv MPZ.get_str!(pointer(sv) + nd - nd1, base, n)
-    @inbounds for i = (1:nd-nd1) .+ isneg(n)
+    @inbounds for i = (1:nd-nd1) .+ isnegative(n)
         sv[i] = '0' % UInt8
     end
-    isneg(n) && (sv[1] = '-' % UInt8)
-    String(sv)
+    isnegative(n) && (sv[1] = '-' % UInt8)
+    unsafe_takestring(sv)
 end
 
 function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer}
@@ -774,7 +781,7 @@ function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:
             # fast path using mpz_get_str via string(n; base)
             s = codeunits(string(n; base))
             i, j = firstindex(a)-1, length(s)+1
-            lasti = min(lastindex(a), firstindex(a) + length(s)-1 - isneg(n))
+            lasti = min(lastindex(a), firstindex(a) + length(s)-1 - isnegative(n))
             while i < lasti
                 # base ≤ 36: 0-9, plus a-z for 10-35
                 # base > 36: 0-9, plus A-Z for 10-35 and a-z for 36..61
@@ -783,14 +790,14 @@ function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:
             end
             lasti = lastindex(a)
             while i < lasti; a[i+=1] = zero(T); end
-            return isneg(n) ? map!(-,a,a) : a
+            return isnegative(n) ? map!(-,a,a) : a
         elseif a isa StridedVector{<:Base.BitInteger} && stride(a,1) == 1 && ispow2(base) && base-1 ≤ typemax(T)
             # fast path using mpz_export
             origlen = length(a)
             _, writelen = MPZ.export!(a, n; nails = 8sizeof(T) - trailing_zeros(base))
             length(a) != origlen && resize!(a, origlen) # truncate to least-significant digits
             a[begin+writelen:end] .= zero(T)
-            return isneg(n) ? map!(-,a,a) : a
+            return isnegative(n) ? map!(-,a,a) : a
         end
     end
     return invoke(digits!, Tuple{typeof(a), Integer}, a, n; base) # slow generic fallback
@@ -839,31 +846,68 @@ Base.add_with_overflow(a::BigInt, b::BigInt) = a + b, false
 Base.sub_with_overflow(a::BigInt, b::BigInt) = a - b, false
 Base.mul_with_overflow(a::BigInt, b::BigInt) = a * b, false
 
-Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), stackdict, x)
+# checked_pow doesn't follow the same promotion rules as the others, above.
+Base.checked_pow(x::BigInt, p::Integer) = x^p
+Base.checked_pow(x::Integer, p::BigInt) = x^p
+Base.checked_pow(x::BigInt, p::BigInt) = x^p
+
+Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), stackdict, x)::BigInt
 
 ## streamlined hashing for BigInt, by avoiding allocation from shifts ##
 
+Base._hash_shl!(x::BigInt, n) = MPZ.mul_2exp!(x, n)
+
 if Limb === UInt64 === UInt
     # On 64 bit systems we can define
     # an optimized version for BigInt of hash_integer (used e.g. for Rational{BigInt}),
     # and of hash
 
-    using .Base: hash_uint
+    using .Base: HASH_SECRET, hash_bytes, hash_finalizer
 
-    function hash_integer(n::BigInt, h::UInt)
-        GC.@preserve n begin
-            s = n.size
-            s == 0 && return hash_integer(0, h)
-            p = convert(Ptr{UInt64}, n.d)
-            b = unsafe_load(p)
-            h ⊻= hash_uint(ifelse(s < 0, -b, b) ⊻ h)
-            for k = 2:abs(s)
-                h ⊻= hash_uint(unsafe_load(p, k) ⊻ h)
-            end
-            return h
+    # UnsafeLimbView provides a safe iterator interface to BigInt limb data
+    struct UnsafeLimbView <: AbstractVector{UInt8}
+        bigint::BigInt
+        start_byte::Int
+        num_bytes::Int
+    end
+
+    function Base.size(view::UnsafeLimbView)
+        return (view.num_bytes,)
+    end
+
+    function Base.getindex(view::UnsafeLimbView, i::Int)
+        @boundscheck checkbounds(view, i)
+        GC.@preserve view begin
+            limb_index = div(view.start_byte + i - 2, 8) + 1
+            byte_in_limb = (view.start_byte + i - 2) % 8
+            limb = unsafe_load(view.bigint.d, limb_index)
+            return UInt8((limb >> (8 * byte_in_limb)) & 0xff)
         end
     end
 
+    function Base.iterate(view::UnsafeLimbView, state::Int = 1)
+        state > view.num_bytes && return nothing
+        return @inbounds(view[state]), state + 1
+    end
+
+    function Base.length(view::UnsafeLimbView)
+        return view.num_bytes
+    end
+
+    function hash_integer(n::BigInt, h::UInt)
+        iszero(n) && return hash_integer(0, h)
+        s = n.size
+        h ⊻= (s < 0)
+
+        us = abs(s)
+        leading_zero_bytes = div(leading_zeros(unsafe_load(n.d, us)), 8)
+        num_bytes = 8 * us - leading_zero_bytes
+
+        # Use UnsafeLimbView for safe iterator-based access
+        limb_view = UnsafeLimbView(n, 1, num_bytes)
+        return hash_bytes(limb_view, h, HASH_SECRET)
+    end
+
     function hash(x::BigInt, h::UInt)
         GC.@preserve x begin
             sz = x.size
@@ -892,23 +936,15 @@ if Limb === UInt64 === UInt
                 return hash(ldexp(flipsign(Float64(limb), sz), pow), h)
             end
             h = hash_integer(pow, h)
-            h ⊻= hash_uint(flipsign(limb, sz) ⊻ h)
-            for idx = idx+1:asz
-                if shift == 0
-                    limb = unsafe_load(ptr, idx)
-                else
-                    limb1 = limb2
-                    if idx == asz
-                        limb = limb1 >> shift
-                        limb == 0 && break # don't hash leading zeros
-                    else
-                        limb2 = unsafe_load(ptr, idx+1)
-                        limb = limb2 << upshift | limb1 >> shift
-                    end
-                end
-                h ⊻= hash_uint(limb ⊻ h)
-            end
-            return h
+
+            h ⊻= (sz < 0)
+            leading_zero_bytes = div(leading_zeros(unsafe_load(x.d, asz)), 8)
+            trailing_zero_bytes = div(pow, 8)
+            num_bytes = 8 * asz - (leading_zero_bytes + trailing_zero_bytes)
+
+            # Use UnsafeLimbView for safe iterator-based access
+            limb_view = UnsafeLimbView(x, trailing_zero_bytes + 1, num_bytes)
+            return hash_bytes(limb_view, h, HASH_SECRET)
         end
     end
 end
@@ -917,9 +953,9 @@ module MPQ
 
 # Rational{BigInt}
 import .Base: unsafe_rational, __throw_rational_argerror_zero
-import ..GMP: BigInt, MPZ, Limb, isneg, libgmp
+import ..GMP: BigInt, MPZ, Limb, libgmp
 
-gmpq(op::Symbol) = (Symbol(:__gmpq_, op), libgmp)
+gmpq(op::Symbol) = Expr(:tuple, QuoteNode(Symbol(:__gmpq_, op)), GlobalRef(MPZ, :libgmp))
 
 mutable struct _MPQ
     num_alloc::Cint
@@ -995,7 +1031,7 @@ end
 # define add, sub, mul, div, and their inplace versions
 function add!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
     if iszero(x.den) || iszero(y.den)
-        if iszero(x.den) && iszero(y.den) && isneg(x.num) != isneg(y.num)
+        if iszero(x.den) && iszero(y.den) && isnegative(x.num) != isnegative(y.num)
             throw(DivideError())
         end
         return set!(z, iszero(x.den) ? x : y)
@@ -1008,7 +1044,7 @@ end
 
 function sub!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
     if iszero(x.den) || iszero(y.den)
-        if iszero(x.den) && iszero(y.den) && isneg(x.num) == isneg(y.num)
+        if iszero(x.den) && iszero(y.den) && isnegative(x.num) == isnegative(y.num)
             throw(DivideError())
         end
         iszero(x.den) && return set!(z, x)
@@ -1025,7 +1061,7 @@ function mul!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
         if iszero(x.num) || iszero(y.num)
             throw(DivideError())
         end
-        return set_si!(z, ifelse(xor(isneg(x.num), isneg(y.num)), -1, 1), 0)
+        return set_si!(z, ifelse(xor(isnegative(x.num), isnegative(y.num)), -1, 1), 0)
     end
     zq = _MPQ(z)
     ccall((:__gmpq_mul, libgmp), Cvoid,
@@ -1038,7 +1074,7 @@ function div!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
         if iszero(y.den)
             throw(DivideError())
         end
-        isneg(y.num) || return set!(z, x)
+        isnegative(y.num) || return set!(z, x)
         return set_si!(z, flipsign(-1, x.num), 0)
     elseif iszero(y.den)
         return set_si!(z, 0, 1)
diff --git a/base/hamt.jl b/base/hamt.jl
new file mode 100644
index 0000000000000..c77c592b17e58
--- /dev/null
+++ b/base/hamt.jl
@@ -0,0 +1,277 @@
+module HashArrayMappedTries
+
+export HAMT
+
+##
+# Implements "Ideal Hash Trees" Phil Bagwell 2000
+#
+# Notable divergence is that we forgo a resizable root table.
+# Root tables improve lookup performance for large sizes, but
+# limit space efficiency if the HAMT is used for a persistent
+# dictionary, since each persistent operation would duplicate
+# the root table.
+#
+# We do not handle perfect hash-collision. We would need to
+# add an additional node type for Collisions. Perfect hash
+# collisions should not occur in practice since we perform
+# rehashing after using 55 bits (MAX_SHIFT) of the original hash.
+#
+# Use https://github.com/vchuravy/HashArrayMappedTries.jl if
+# you want to use this implementation in a package.
+#
+# A HAMT is formed by tree of levels, where at each level
+# we use a portion of the bits of the hash for indexing
+#
+# We use a branching width (ENTRY_COUNT) of 32, giving us
+# 5bits of indexing per level
+# 0000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000
+# L11  L10   L9    L8    L7    L6    L5    L4    L3    L2    L1    L0
+#
+# At each level we use a 32bit bitmap to store which elements are occupied.
+# Since our storage is "sparse" we need to map from index in [0,31] to
+# the actual storage index. We mask the bitmap with (1 << i) - 1 and count
+# the ones in the result. The number of set ones (+1) gives us the index
+# into the storage array.
+#
+# HAMT can be both persistent and non-persistent.
+# The `path` function searches for a matching entries, and for persistency
+# optionally copies the path so that it can be safely mutated.
+
+# TODO:
+# When `trie.data` becomes empty we could remove it from it's parent,
+# but we only know so fairly late. Maybe have a compact function?
+
+const ENTRY_COUNT = UInt(32)
+const BITMAP = UInt32
+const NBITS = sizeof(UInt) * 8
+# @assert ispow2(ENTRY_COUNT)
+const BITS_PER_LEVEL = trailing_zeros(ENTRY_COUNT)
+const LEVEL_MASK = (UInt(1) << BITS_PER_LEVEL) - UInt(1)
+const MAX_SHIFT = (NBITS ÷ BITS_PER_LEVEL - 1) *  BITS_PER_LEVEL
+
+mutable struct Leaf{K, V}
+    const key::K
+    const val::V
+end
+
+"""
+    HAMT{K,V}
+
+A HashArrayMappedTrie that optionally supports persistence.
+"""
+mutable struct HAMT{K, V}
+    const data::Vector{Union{Leaf{K, V}, HAMT{K, V}}}
+    bitmap::BITMAP
+    HAMT{K,V}(data, bitmap) where {K,V} = new{K,V}(data, bitmap)
+    HAMT{K, V}() where {K, V} = new{K,V}(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 0), zero(BITMAP))
+end
+
+Base.@assume_effects :nothrow :effect_free function init_hamt(K, V, k, v)
+    # For a single element we can't have a 'hash-collision
+    trie = HAMT{K,V}(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 1), zero(BITMAP))
+    trie.data[1] = Leaf{K,V}(k,v)
+    return trie
+end
+
+Base.@assume_effects :effect_free function HAMT{K,V}((k,v)::Pair{K,V}) where {K, V}
+    trie = init_hamt(K, V, k, v)
+    bi = BitmapIndex(HashState(k))
+    set!(trie, bi)
+    return trie
+end
+HAMT{K,V}(kv::Pair) where {K, V} = HAMT{K,V}(convert(Pair{K,V}, kv))
+
+HAMT(pair::Pair{K,V}) where {K, V} = HAMT{K,V}(pair)
+
+# TODO: Parameterize by hash function
+struct HashState{K}
+    key::K
+    hash::UInt
+    depth::Int
+    shift::Int
+end
+HashState(key) = HashState(key, objectid(key), 0, 0)
+# Reconstruct
+Base.@assume_effects :terminates_locally function HashState(other::HashState, key)
+    h = HashState(key)
+    while h.depth !== other.depth
+        h = next(h)
+    end
+    return h
+end
+
+function next(h::HashState)
+    depth = h.depth + 1
+    shift = h.shift + BITS_PER_LEVEL
+    # Assert disabled for effect precision
+    # @assert h.shift <= MAX_SHIFT
+    if shift > MAX_SHIFT
+        # Note we use `UInt(depth ÷ BITS_PER_LEVEL)` to seed the hash function
+        # the hash docs, do we need to hash `UInt(depth ÷ BITS_PER_LEVEL)` first?
+        h_hash = hash(objectid(h.key), UInt(depth ÷ BITS_PER_LEVEL))
+        shift = 0
+    else
+        h_hash = h.hash
+    end
+    return HashState(h.key, h_hash, depth, shift)
+end
+
+struct BitmapIndex
+    x::UInt
+end
+BitmapIndex(h::HashState) = BitmapIndex((h.hash >> h.shift) & LEVEL_MASK)
+
+Base.:(<<)(v, bi::BitmapIndex) = v << bi.x
+Base.:(>>)(v, bi::BitmapIndex) = v >> bi.x
+
+isset(trie::HAMT, bi::BitmapIndex) = isodd(trie.bitmap >> bi)
+function set!(trie::HAMT, bi::BitmapIndex)
+    trie.bitmap |= (UInt32(1) << bi)
+    # Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
+end
+
+function unset!(trie::HAMT, bi::BitmapIndex)
+    trie.bitmap &= ~(UInt32(1) << bi)
+    # Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
+end
+
+function entry_index(trie::HAMT, bi::BitmapIndex)
+    mask = (UInt32(1) << bi.x) - UInt32(1)
+    count_ones(trie.bitmap & mask) + 1
+end
+
+islevel_empty(trie::HAMT) = trie.bitmap == 0
+islevel_empty(::Leaf) = false
+
+"""
+    path(trie, h, copyf)::(found, present, trie, i, top, level)
+
+Internal function that walks a HAMT and finds the slot for hash.
+Returns if a value is `present` and a value is `found`.
+
+It returns the `trie` and the index `i` into `trie.data`, as well
+as the current `level`.
+
+If a copy function is provided `copyf` use the return `top` for the
+new persistent tree.
+"""
+@inline @Base.assume_effects :noub :terminates_locally function path(trie::HAMT{K,V}, key, h::HashState, copy=false) where {K, V}
+    if copy
+        trie = top = HAMT{K,V}(Base.copy(trie.data), trie.bitmap)
+    else
+        trie = top = trie
+    end
+    while true
+        bi = BitmapIndex(h)
+        i = entry_index(trie, bi)
+        if isset(trie, bi)
+            next = @inbounds trie.data[i]
+            if next isa Leaf{K,V}
+                # Check if key match if not we will need to grow.
+                found = next.key === h.key
+                return found, true, trie, i, bi, top, h
+            end
+            if copy
+                next = HAMT{K,V}(Base.copy(next.data), next.bitmap)
+                # :noub because entry_index is guaranteed to be inbounds for trie.data
+                @inbounds trie.data[i] = next
+            end
+            trie = next::HAMT{K,V}
+        else
+            # found empty slot
+            return true, false, trie, i, bi, top, h
+        end
+        h = HashArrayMappedTries.next(h)
+    end
+end
+
+"""
+Internal function that given an obtained path, either set the value
+or grows the HAMT by inserting a new trie instead.
+"""
+@inline @Base.assume_effects :terminates_locally function insert!(found, present, trie::HAMT{K,V}, i, bi, h, val) where {K,V}
+    if found # we found a slot, just set it to the new leaf
+        # replace or insert
+        if present # replace
+            @inbounds trie.data[i] = Leaf{K, V}(h.key, val)
+        else
+            Base.insert!(trie.data, i, Leaf{K, V}(h.key, val))
+        end
+        set!(trie, bi)
+    else
+        @assert present
+        # collision -> grow
+        leaf = @inbounds trie.data[i]::Leaf{K,V}
+        leaf_h = HashState(h, leaf.key)
+        if leaf_h.hash == h.hash
+            error("Perfect hash collision")
+        end
+        while true
+            new_trie = HAMT{K, V}()
+            if present
+                @inbounds trie.data[i] = new_trie
+            else
+                i = entry_index(trie, bi)
+                Base.insert!(trie.data, i, new_trie)
+            end
+            set!(trie, bi)
+
+            h = next(h)
+            leaf_h = next(leaf_h)
+            bi_new = BitmapIndex(h)
+            bi_old = BitmapIndex(leaf_h)
+            if bi_new == bi_old # collision in new trie -> retry
+                trie = new_trie
+                bi = bi_new
+                present = false
+                continue
+            end
+            i_new = entry_index(new_trie, bi_new)
+            Base.insert!(new_trie.data, i_new, Leaf{K, V}(h.key, val))
+            set!(new_trie, bi_new)
+
+            i_old = entry_index(new_trie, bi_old)
+            Base.insert!(new_trie.data, i_old, leaf)
+            set!(new_trie, bi_old)
+
+            break
+        end
+    end
+end
+
+Base.length(::Leaf) = 1
+Base.length(trie::HAMT) = sum((length(trie.data[i]) for i in eachindex(trie.data)), init=0)
+
+Base.isempty(::Leaf) = false
+function Base.isempty(trie::HAMT)
+    if islevel_empty(trie)
+        return true
+    end
+    return all(isempty(trie.data[i]) for i in eachindex(trie.data))
+end
+
+# DFS
+function Base.iterate(trie::HAMT, state=nothing)
+    if state === nothing
+        state = (;parent=nothing, trie, i=1)
+    end
+    while state !== nothing
+        i = state.i
+        if i > Base.length(state.trie.data)
+            state = state.parent
+            continue
+        end
+        trie = state.trie.data[i]
+        state = (;parent=state.parent, trie=state.trie, i=i+1)
+        if trie isa Leaf
+            return (trie.key => trie.val, state)
+        else
+            # we found a new level
+            state = (;parent=state, trie, i=1)
+            continue
+        end
+    end
+    return nothing
+end
+
+end # module HashArrayMapTries
diff --git a/base/hashing.jl b/base/hashing.jl
index 5dbae09123bd6..260e9e5054980 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -1,119 +1,637 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-## hashing a single value ##
+const HASH_SEED = UInt == UInt64 ? 0xbdd89aa982704029 : 0xeabe9406
+const HASH_SECRET = (
+    0x2d358dccaa6c78a5,
+    0x8bb84b93962eacc9,
+    0x4b33a62ed433d4a3,
+    0xaaaaaaaaaaaaaaaa,
+)
 
 """
-    hash(x[, h::UInt]) -> UInt
+    hash(x[, h::UInt])::UInt
 
-Compute an integer hash code such that `isequal(x,y)` implies `hash(x)==hash(y)`. The
+Compute an integer hash code such that `isequal(x,y)` implies `isequal(hash(x), hash(y))`. The
 optional second argument `h` is another hash code to be mixed with the result.
 
 New types should implement the 2-argument form, typically by calling the 2-argument `hash`
 method recursively in order to mix hashes of the contents with each other (and with `h`).
 Typically, any type that implements `hash` should also implement its own [`==`](@ref) (hence
-[`isequal`](@ref)) to guarantee the property mentioned above. Types supporting subtraction
-(operator `-`) should also implement [`widen`](@ref), which is required to hash
-values inside heterogeneous arrays.
+[`isequal`](@ref)) to guarantee the property mentioned above.
 
 The hash value may change when a new Julia process is started.
 
-```jldoctest
+!!! warning
+    When implementing the 2-argument form, the second argument `h` should _not_ be given a
+    default value such `h = UInt(0)` as this will implicitly create a 1-argument method that
+    is more specific than the fallback (see [Note on Optional and keyword Arguments](@ref)),
+    but potentially with the wrong seed, causing hash inconsistencies.
+
+```jldoctest; filter = r"0x[0-9a-f]{16}"
 julia> a = hash(10)
-0x95ea2955abd45275
+0x759d18cc5346a65f
 
 julia> hash(10, a) # only use the output of another hash function as the second argument
-0xd42bad54a8575b16
+0x03158cd61b1b0bd1
 ```
 
 See also: [`objectid`](@ref), [`Dict`](@ref), [`Set`](@ref).
 """
-hash(x::Any) = hash(x, zero(UInt))
+hash(data::Any) = hash(data, HASH_SEED)
 hash(w::WeakRef, h::UInt) = hash(w.value, h)
-hash(T::Type, h::UInt) = hash_uint(3h - ccall(:jl_type_hash, UInt, (Any,), T))
-
-## hashing general objects ##
 
-hash(@nospecialize(x), h::UInt) = hash_uint(3h - objectid(x))
+# Types can't be deleted, so marking as total allows the compiler to look up the hash
+@noinline _jl_type_hash(T::Type) = @assume_effects :total ccall(:jl_type_hash, UInt, (Any,), T)
+hash(T::Type, h::UInt) = hash(_jl_type_hash(T), h)
+hash(@nospecialize(data), h::UInt) = hash(objectid(data), h)
 
-hash(x::Symbol) = objectid(x)
+function mul_parts(a::UInt64, b::UInt64)
+    p = widemul(a, b)
+    return (p >> 64) % UInt64, p % UInt64
+end
+hash_mix(a::UInt64, b::UInt64) = ⊻(mul_parts(a, b)...)
 
-## core data hashing functions ##
-
-function hash_64_64(n::UInt64)
-    a::UInt64 = n
-    a = ~a + a << 21
-    a =  a ⊻ a >> 24
-    a =  a + a << 3 + a << 8
-    a =  a ⊻ a >> 14
-    a =  a + a << 2 + a << 4
-    a =  a ⊻ a >> 28
-    a =  a + a << 31
-    return a
-end
-
-function hash_64_32(n::UInt64)
-    a::UInt64 = n
-    a = ~a + a << 18
-    a =  a ⊻ a >> 31
-    a =  a * 21
-    a =  a ⊻ a >> 11
-    a =  a + a << 6
-    a =  a ⊻ a >> 22
-    return a % UInt32
-end
-
-function hash_32_32(n::UInt32)
-    a::UInt32 = n
-    a = a + 0x7ed55d16 + a << 12
-    a = a ⊻ 0xc761c23c ⊻ a >> 19
-    a = a + 0x165667b1 + a << 5
-    a = a + 0xd3a2646c ⊻ a << 9
-    a = a + 0xfd7046c5 + a << 3
-    a = a ⊻ 0xb55a4f09 ⊻ a >> 16
-    return a
+# faster-but-weaker than hash_mix intended for small keys
+hash_mix_linear(x::Union{UInt64, UInt32}, h::UInt) = 3h - x
+function hash_finalizer(x::UInt64)
+    x ⊻= (x >> 32)
+    x *= 0x63652a4cd374b267
+    x ⊻= (x >> 33)
+    return x
 end
 
+hash_64_64(data::UInt64) = hash_finalizer(data)
+hash_64_32(data::UInt64) = hash_64_64(data) % UInt32
+hash_32_32(data::UInt32) = hash_64_32(UInt64(data))
+
 if UInt === UInt64
-    hash_uint64(x::UInt64) = hash_64_64(x)
-    hash_uint(x::UInt)     = hash_64_64(x)
+    const hash_uint64 = hash_64_64
+    const hash_uint = hash_64_64
 else
-    hash_uint64(x::UInt64) = hash_64_32(x)
-    hash_uint(x::UInt)     = hash_32_32(x)
+    const hash_uint64 = hash_64_32
+    const hash_uint = hash_32_32
+end
+
+hash(x::UInt64, h::UInt) = hash_uint64(hash_mix_linear(x, h))
+hash(x::Int64, h::UInt) = hash(bitcast(UInt64, x), h)
+hash(x::Union{Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32}, h::UInt) = hash(Int64(x), h)
+
+# IntegerCodeUnits provides a little-endian byte representation of integers
+struct IntegerCodeUnits{T<:Integer} <: AbstractVector{UInt8}
+    uvalue::T
+    num_bytes::Int
+
+    function IntegerCodeUnits(x::T) where {T<:Integer}
+        # Calculate number of bytes needed (always pad to full byte)
+        u = abs(x)
+        num_bytes = max(cld(top_set_bit(u), 8), 1)
+        return new{T}(u, num_bytes)
+    end
+end
+size(units::IntegerCodeUnits) = (units.num_bytes,)
+length(units::IntegerCodeUnits) = units.num_bytes
+@inline getindex(units::IntegerCodeUnits, i::Int) = (units.uvalue >>> (8 * (i - 1))) % UInt8
+@inline load_le_array(::Type{UInt64}, units::IntegerCodeUnits, idx) = (units.uvalue >>> (8 * (idx - 1))) % UInt64
+@inline load_le_array(::Type{UInt32}, units::IntegerCodeUnits, idx) = (units.uvalue >>> (8 * (idx - 1))) % UInt32
+
+
+# Main interface function to get little-endian byte representation of integers
+codeunits(x::Integer) = IntegerCodeUnits(x)
+
+# UTF8Units provides UTF-8 byte iteration for any AbstractString
+struct UTF8Units{T<:AbstractString}
+    string::T
+end
+
+utf8units(s::AbstractString) = codeunit(s) <: UInt8 ? codeunits(s) : UTF8Units(s)
+
+# Iterator state: (char_iter_state, remaining_utf8_bytes)
+function iterate(units::UTF8Units)
+    char_result = iterate(units.string)
+    char_result === nothing && return nothing
+    char, char_state = char_result
+
+    # Decode char to UTF-8 bytes (similar to the write function)
+    u = bswap(reinterpret(UInt32, char))
+
+    # Return first byte and set up state for remaining bytes
+    first_byte = u % UInt8
+    remaining_bytes = u >> 8
+    return first_byte, (char_state, remaining_bytes)
+end
+
+function iterate(units::UTF8Units, state)
+    char_state, remaining_bytes = state
+    # If we have more bytes from current char, return next byte
+    if remaining_bytes != 0
+        byte = remaining_bytes % UInt8
+        new_remaining = remaining_bytes >> 8
+        return byte, (char_state, new_remaining)
+    end
+
+    # Move to next char
+    char_result = iterate(units.string, char_state)
+    char_result === nothing && return nothing
+    char, new_char_state = char_result
+
+    # Decode new char to UTF-8 bytes
+    u = bswap(reinterpret(UInt32, char))
+
+    # Return first byte and set up state for remaining bytes
+    first_byte = u % UInt8
+    remaining_bytes = u >> 8
+
+    return first_byte, (new_char_state, remaining_bytes)
 end
 
-## efficient value-based hashing of integers ##
+hash_integer(x::Integer, h::UInt) = _hash_integer(x, UInt64(h)) % UInt
+function _hash_integer(
+        x::Integer,
+        seed::UInt64,
+        secret::NTuple{4, UInt64} = HASH_SECRET
+    )
+    # Handle sign by XOR-ing with seed
+    seed ⊻= (x < 0)
+    # Get little-endian byte representation of absolute value
+    # and hash using the new safe hash_bytes function
+    u = abs(x) # n.b.: this hashes typemin(IntN) correctly even if abs fails
+    return hash_bytes(codeunits(u), seed, secret)
+end
+
+
+## efficient value-based hashing of floats ##
 
-hash(x::Int64,  h::UInt) = hash_uint64(bitcast(UInt64, x)) - 3h
-hash(x::UInt64, h::UInt) = hash_uint64(x) - 3h
-hash(x::Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32}, h::UInt) = hash(Int64(x), h)
+const hx_NaN = hash(reinterpret(UInt64, NaN))
+function hash(x::Float64, h::UInt)
+    # see comments on trunc and hash(Real, UInt)
+    if typemin(Int64) <= x < typemax(Int64)
+        xi = fptosi(Int64, x)
+        if isequal(xi, x)
+            return hash(xi, h)
+        end
+    elseif typemin(UInt64) <= x < typemax(UInt64)
+        xu = fptoui(UInt64, x)
+        if isequal(xu, x)
+            return hash(xu, h)
+        end
+    elseif isnan(x)
+        return hx_NaN ⊻ h # NaN does not have a stable bit pattern
+    end
+    return hash(bitcast(UInt64, x), h)
+end
+
+hash(x::Float32, h::UInt) = hash(Float64(x), h)
 
-function hash_integer(n::Integer, h::UInt)
-    h ⊻= hash_uint((n % UInt) ⊻ h)
-    n = abs(n)
-    n >>>= sizeof(UInt) << 3
-    while n != 0
-        h ⊻= hash_uint((n % UInt) ⊻ h)
-        n >>>= sizeof(UInt) << 3
+function hash(x::Float16, h::UInt)
+    # see comments on trunc and hash(Real, UInt)
+    if isfinite(x) # all finite Float16 fit in Int64
+        xi = fptosi(Int64, x)
+        if isequal(xi, x)
+            return hash(xi, h)
+        end
+    elseif isnan(x)
+        return hx_NaN ⊻ h # NaN does not have a stable bit pattern
     end
+    return hash(bitcast(UInt64, Float64(x)), h)
+end
+
+## generic hashing for rational values ##
+_hash_shl!(x, n) = (x << n)
+function hash(x::Real, h::UInt)
+    # decompose x as num*2^pow/den
+    num, pow, den = decompose(x)
+
+    # handle special values
+    num == 0 && den == 0 && return hash(NaN, h)
+    num == 0 && return hash(ifelse(den > 0, 0.0, -0.0), h)
+    den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
+
+    # normalize decomposition
+    if den < 0
+        num = -num
+        den = -den
+    end
+    num_z = trailing_zeros(num)
+
+    num >>= num_z
+    den_z = trailing_zeros(den)
+    den >>= den_z
+    pow += num_z - den_z
+    # If the real can be represented as an Int64, UInt64, or Float64, hash as those types.
+    # To be an Integer the denominator must be 1 and the power must be non-negative.
+    if den == 1
+        # left = ceil(log2(num*2^pow))
+        left = top_set_bit(abs(num)) + pow
+        # 2^-1074 is the minimum Float64 so if the power is smaller, not a Float64
+        if -1074 <= pow
+            if 0 <= pow # if pow is non-negative, it is an integer
+                left <= 63 && return hash(Int64(num) << Int(pow), h)
+                left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow), h)
+            end # typemin(Int64) handled by Float64 case
+            # 2^1024 is the maximum Float64 so if the power is greater, not a Float64
+            # Float64s only have 53 mantisa bits (including implicit bit)
+            left <= 1024 && left - pow <= 53 && return hash(ldexp(Float64(num), pow), h)
+        end
+    else
+        h = hash_integer(den, h)
+    end
+    # handle generic rational values
+    h = hash_integer(pow, h)
+
+    # trimming only whole bytes of trailing zeros simplifies greatly
+    # some specializations for memory-backed bitintegers
+    h = hash_integer((pow > 0) ? _hash_shl!(num, pow % 8) : num, h)
     return h
 end
 
-## symbol & expression hashing ##
 
+## symbol & expression hashing ##
 if UInt === UInt64
-    hash(x::Expr, h::UInt) = hash(x.args, hash(x.head, h + 0x83c7900696d26dc6))
-    hash(x::QuoteNode, h::UInt) = hash(x.value, h + 0x2c97bf8b3de87020)
+    # conservatively hash using == equality of all of the data, even though == often uses === internally
+    hash(x::QuoteNode, h::UInt) = hash(x.value, h ⊻ 0x2c97bf8b3de87020)
+    hash(x::PhiNode, h::UInt) = hash(x.edges, hash(x.values, h ⊻ 0x2c97bf8b3de87020))
+    hash(x::PhiCNode, h::UInt) = hash(x.values, h ⊻ 0x2c97bf8b3de87020)
 else
-    hash(x::Expr, h::UInt) = hash(x.args, hash(x.head, h + 0x96d26dc6))
-    hash(x::QuoteNode, h::UInt) = hash(x.value, h + 0x469d72af)
+    hash(x::QuoteNode, h::UInt) = hash(x.value, h ⊻ 0x469d72af)
+    hash(x::PhiNode, h::UInt) = hash(x.edges, hash(x.values, h ⊻ 0x469d72af))
+    hash(x::PhiCNode, h::UInt) = hash(x.values, h ⊻ 0x469d72af)
+end
+
+function hash(x::Expr, h::UInt)
+    h = hash(x.head, h ⊻ (UInt === UInt64 ? 0x83c7900696d26dc6 : 0x469d72af))
+    # Hint that `x.args::Vector{Any}` is mostly Expr, Symbol, and LineNumberNode.
+    hash_shaped(x.args, h ⊻ hash_abstractarray_seed, (Val{Expr}(), Val{Symbol}(), Val{LineNumberNode}()))
+end
+
+function hash(x::CodeInfo, h::UInt)
+    h ⊻= UInt === UInt64 ? 0x2c97bf8b3de87020 : 0x469d72af
+    for i in 1:nfields(x)
+        h = hash(isdefined(x, i) ? getfield(x, i) : missing, h)
+    end
+    return h
+end
+
+function hash(x::DebugInfo, h::UInt)
+    h ⊻= UInt === UInt64 ? 0x2c97bf8b3de87020 : 0x469d72af
+    for i in 1:nfields(x)
+        h = hash(getfield(x, i), h)
+    end
+    return h
+end
+
+hash(x::Symbol) = objectid(x)
+
+
+load_le(::Type{T}, ptr::Ptr{UInt8}, i) where {T <: Union{UInt32, UInt64}} =
+    unsafe_load(convert(Ptr{T}, ptr + i - 1))
+
+@assume_effects :terminates_globally function hash_bytes(
+        ptr::Ptr{UInt8},
+        n::Int,
+        seed::UInt64,
+        secret::NTuple{4, UInt64}
+    )
+    # Adapted with gratitude from [rapidhash](https://github.com/Nicoshev/rapidhash)
+    buflen = UInt64(n)
+    seed = seed ⊻ hash_mix(seed ⊻ secret[3], secret[2])
+
+    a = zero(UInt64)
+    b = zero(UInt64)
+    i = buflen
+
+    if buflen ≤ 16
+        if buflen ≥ 4
+            seed ⊻= buflen
+            if buflen ≥ 8
+                a = load_le(UInt64, ptr, 1)
+                b = load_le(UInt64, ptr, n - 7)
+            else
+                a = UInt64(load_le(UInt32, ptr, 1))
+                b = UInt64(load_le(UInt32, ptr, n - 3))
+            end
+        elseif buflen > 0
+            a = (UInt64(unsafe_load(ptr)) << 45) | UInt64(unsafe_load(ptr, n))
+            b = UInt64(unsafe_load(ptr, div(n, 2) + 1))
+        end
+    else
+        pos = 1
+        if i > 48
+            see1 = seed
+            see2 = seed
+            while i > 48
+                seed = hash_mix(
+                    load_le(UInt64, ptr, pos) ⊻ secret[1],
+                    load_le(UInt64, ptr, pos + 8) ⊻ seed
+                )
+                see1 = hash_mix(
+                    load_le(UInt64, ptr, pos + 16) ⊻ secret[2],
+                    load_le(UInt64, ptr, pos + 24) ⊻ see1
+                )
+                see2 = hash_mix(
+                    load_le(UInt64, ptr, pos + 32) ⊻ secret[3],
+                    load_le(UInt64, ptr, pos + 40) ⊻ see2
+                )
+                pos += 48
+                i -= 48
+            end
+            seed ⊻= see1
+            seed ⊻= see2
+        end
+        if i > 16
+            seed = hash_mix(
+                load_le(UInt64, ptr, pos) ⊻ secret[3],
+                load_le(UInt64, ptr, pos + 8) ⊻ seed
+            )
+            if i > 32
+                seed = hash_mix(
+                    load_le(UInt64, ptr, pos + 16) ⊻ secret[3],
+                    load_le(UInt64, ptr, pos + 24) ⊻ seed
+                )
+            end
+        end
+
+        a = load_le(UInt64, ptr, n - 15) ⊻ i
+        b = load_le(UInt64, ptr, n - 7)
+    end
+
+    a = a ⊻ secret[2]
+    b = b ⊻ seed
+    b, a = mul_parts(a, b)
+    return hash_mix(a ⊻ secret[4], b ⊻ secret[2] ⊻ i)
+end
+
+@inline function load_le_array(::Type{UInt64}, arr::AbstractArray{UInt8}, idx)
+    # n.b. for whatever reason, writing this as a loop ensures LLVM
+    # optimizations (particular SROA) don't make a disaster of this code
+    # early on so it can actually emit the optimal result
+    result = zero(UInt64)
+    for i in 0:7
+        byte = @inbounds arr[idx + i]
+        result |= UInt64(byte) << (8 * i)
+    end
+    return result
+end
+
+@inline function load_le_array(::Type{UInt32}, arr::AbstractArray{UInt8}, idx)
+    result = zero(UInt32)
+    for i in 0:3
+        byte = @inbounds arr[idx + i]
+        result |= UInt32(byte) << (8 * i)
+    end
+    return result
 end
 
-## hashing strings ##
+@assume_effects :terminates_globally function hash_bytes(
+        arr::AbstractArray{UInt8},
+        seed::UInt64,
+        secret::NTuple{4, UInt64}
+    )
+    # Adapted with gratitude from [rapidhash](https://github.com/Nicoshev/rapidhash)
+    n = length(arr)
+    buflen = UInt64(n)
+    seed = seed ⊻ hash_mix(seed ⊻ secret[3], secret[2])
+    firstidx = firstindex(arr)
 
-const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
-const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
+    a = zero(UInt64)
+    b = zero(UInt64)
+    i = buflen
 
-@assume_effects :total function hash(s::String, h::UInt)
-    h += memhash_seed
-    ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof(s), h % UInt32) + h
+    if buflen ≤ 16
+        if buflen ≥ 4
+            seed ⊻= buflen
+            if buflen ≥ 8
+                a = load_le_array(UInt64, arr, firstidx)
+                b = load_le_array(UInt64, arr, firstidx + n - 8)
+            else
+                a = UInt64(load_le_array(UInt32, arr, firstidx))
+                b = UInt64(load_le_array(UInt32, arr, firstidx + n - 4))
+            end
+        elseif buflen > 0
+            a = (UInt64(@inbounds arr[firstidx]) << 45) | UInt64(@inbounds arr[firstidx + n - 1])
+            b = UInt64(@inbounds arr[firstidx + div(n, 2)])
+        end
+    else
+        pos = 0
+        if i > 48
+            see1 = seed
+            see2 = seed
+            while i > 48
+                seed = hash_mix(
+                    load_le_array(UInt64, arr, firstidx + pos) ⊻ secret[1],
+                    load_le_array(UInt64, arr, firstidx + pos + 8) ⊻ seed
+                )
+                see1 = hash_mix(
+                    load_le_array(UInt64, arr, firstidx + pos + 16) ⊻ secret[2],
+                    load_le_array(UInt64, arr, firstidx + pos + 24) ⊻ see1
+                )
+                see2 = hash_mix(
+                    load_le_array(UInt64, arr, firstidx + pos + 32) ⊻ secret[3],
+                    load_le_array(UInt64, arr, firstidx + pos + 40) ⊻ see2
+                )
+                pos += 48
+                i -= 48
+            end
+            seed ⊻= see1
+            seed ⊻= see2
+        end
+        if i > 16
+            seed = hash_mix(
+                load_le_array(UInt64, arr, firstidx + pos) ⊻ secret[3],
+                load_le_array(UInt64, arr, firstidx + pos + 8) ⊻ seed
+            )
+            if i > 32
+                seed = hash_mix(
+                    load_le_array(UInt64, arr, firstidx + pos + 16) ⊻ secret[3],
+                    load_le_array(UInt64, arr, firstidx + pos + 24) ⊻ seed
+                )
+            end
+        end
+
+        a = load_le_array(UInt64, arr, firstidx + n - 16) ⊻ i
+        b = load_le_array(UInt64, arr, firstidx + n - 8)
+    end
+
+    a = a ⊻ secret[2]
+    b = b ⊻ seed
+    b, a = mul_parts(a, b)
+    return hash_mix(a ⊻ secret[4], b ⊻ secret[2] ⊻ i)
+end
+
+
+# Helper function to concatenate two UInt64 values with a byte shift
+# Returns the result of shifting 'low' right by 'shift_bytes' bytes and
+# filling the high bits with the low bits of 'high'
+@inline function concat_shift(low::UInt64, high::UInt64, shift_bytes::UInt8)
+    shift_bits = (shift_bytes * 0x8) & 0x3f
+    return (low >> shift_bits) | (high << (0x40 - shift_bits))
+end
+
+@inline function read_uint64_from_uint8_iter(iter, state)
+    value = zero(UInt64)
+    @nexprs 8 i -> begin
+        next_result = iterate(iter, state)
+        next_result === nothing && return value, state, UInt8(i - 1)
+        byte, state = next_result
+        value |= UInt64(byte) << ((i - 1) * 8)
+    end
+    return value, state, 0x8
+end
+
+@inline function read_uint64_from_uint8_iter(iter)
+    next_result = iterate(iter)
+    next_result === nothing && return nothing
+    byte, state = next_result
+    value = UInt64(byte)
+    @nexprs 7 i -> begin
+        next_result = iterate(iter, state)
+        next_result === nothing && return value, state, UInt8(i)
+        byte, state = next_result
+        value |= UInt64(byte::UInt8) << (i * 8)
+    end
+    return value, state, 0x8
 end
+
+@assume_effects :terminates_globally function hash_bytes(
+        iter,
+        seed::UInt64,
+        secret::NTuple{4, UInt64}
+    )
+    seed = seed ⊻ hash_mix(seed ⊻ secret[3], secret[2])
+
+    a = zero(UInt64)
+    b = zero(UInt64)
+    buflen = zero(UInt64)
+
+    see1 = seed
+    see2 = seed
+    l0 = zero(UInt64)
+    l1 = zero(UInt64)
+    l2 = zero(UInt64)
+    l3 = zero(UInt64)
+    l4 = zero(UInt64)
+    l5 = zero(UInt64)
+    b0 = 0x0
+    b1 = 0x0
+    b2 = 0x0
+    b3 = 0x0
+    b4 = 0x0
+    b5 = 0x0
+    t0 = zero(UInt64)
+    t1 = zero(UInt64)
+
+    # Handle first iteration separately
+    read = read_uint64_from_uint8_iter(iter)
+    if read !== nothing
+        l0, state, b0 = read
+        # Repeat hashing chunks until a short read
+        while true
+            l1, state, b1 = read_uint64_from_uint8_iter(iter, state)
+            if b1 == 0x8
+                l2, state, b2 = read_uint64_from_uint8_iter(iter, state)
+                if b2 == 0x8
+                    l3, state, b3 = read_uint64_from_uint8_iter(iter, state)
+                    if b3 == 0x8
+                        l4, state, b4 = read_uint64_from_uint8_iter(iter, state)
+                        if b4 == 0x8
+                            l5, state, b5 = read_uint64_from_uint8_iter(iter, state)
+                            if b5 == 0x8
+                                # Read start of next chunk
+                                read = read_uint64_from_uint8_iter(iter, state)
+                                if read[3] == 0x0
+                                    # Read exactly 48 bytes
+                                    t0 = l4
+                                    t1 = l5
+                                    break
+                                else
+                                    # Read more than 48 bytes - process and continue to next chunk
+                                    seed = hash_mix(l0 ⊻ secret[1], l1 ⊻ seed)
+                                    see1 = hash_mix(l2 ⊻ secret[2], l3 ⊻ see1)
+                                    see2 = hash_mix(l4 ⊻ secret[3], l5 ⊻ see2)
+                                    buflen += 48
+                                    l0, state, b0 = read
+                                    b1 = 0
+                                    b2 = 0
+                                    b3 = 0
+                                    b4 = 0
+                                    b5 = 0
+                                    if b0 < 8
+                                        t0 = concat_shift(l4, l5, b0)
+                                        t1 = concat_shift(l5, l0, b0)
+                                        break
+                                    end
+                                end
+                            else
+                                # Extract final 16 bytes at the first short read
+                                t0 = concat_shift(l3, l4, b5)
+                                t1 = concat_shift(l4, l5, b5)
+                                break
+                            end
+                        else
+                            t0 = concat_shift(l2, l3, b4)
+                            t1 = concat_shift(l3, l4, b4)
+                            break
+                        end
+                    else
+                        t0 = concat_shift(l1, l2, b3)
+                        t1 = concat_shift(l2, l3, b3)
+                        break
+                    end
+                else
+                    t0 = concat_shift(l0, l1, b2)
+                    t1 = concat_shift(l1, l2, b2)
+                    break
+                end
+            else
+                t0 = concat_shift(l5, l0, b1)
+                t1 = concat_shift(l0, l1, b1)
+                break
+            end
+        end
+    end
+
+    # Partial chunk, handle based on size
+    bytes_chunk = b0 + b1 + b2 + b3 + b4 + b5
+    if buflen > 0
+        # Finalize last full chunk
+        seed ⊻= see1
+        seed ⊻= see2
+    end
+    buflen += bytes_chunk
+    if buflen ≤ 16
+        if bytes_chunk ≥ 0x4
+            seed ⊻= bytes_chunk
+            if bytes_chunk ≥ 0x8
+                a = l0
+                b = t1
+            else
+                a = UInt64(l0 % UInt32)
+                b = UInt64((l0 >>> ((0x8 * (bytes_chunk - 0x4)) % 0x3f)) % UInt32)
+            end
+        elseif bytes_chunk > 0x0
+            b0 = l0 % UInt8
+            b1 = (l0 >>> ((0x8 * div(bytes_chunk, 0x2)) % 0x3f)) % UInt8
+            b2 = (l0 >>> ((0x8 * (bytes_chunk - 0x1)) % 0x3f)) % UInt8
+            a = (UInt64(b0) << 45) | UInt64(b2)
+            b = UInt64(b1)
+        end
+    else
+        if bytes_chunk > 0x10
+            seed = hash_mix(l0 ⊻ secret[3], l1 ⊻ seed)
+            if bytes_chunk > 0x20
+                seed = hash_mix(l2 ⊻ secret[3], l3 ⊻ seed)
+            end
+        end
+        a = t0 ⊻ bytes_chunk
+        b = t1
+    end
+
+    a = a ⊻ secret[2]
+    b = b ⊻ seed
+    b, a = mul_parts(a, b)
+    return hash_mix(a ⊻ secret[4], b ⊻ secret[2] ⊻ bytes_chunk)
+end
+
+hash(data::AbstractString, h::UInt) =
+    hash_bytes(utf8units(data), UInt64(h), HASH_SECRET) % UInt
+@assume_effects :total hash(data::String, h::UInt) =
+    GC.@preserve data hash_bytes(pointer(data), sizeof(data), UInt64(h), HASH_SECRET) % UInt
diff --git a/base/iddict.jl b/base/iddict.jl
index 01ff213305d7b..ec5392cf7b5b8 100644
--- a/base/iddict.jl
+++ b/base/iddict.jl
@@ -4,14 +4,15 @@
     IdDict([itr])
 
 `IdDict{K,V}()` constructs a hash table using [`objectid`](@ref) as hash and
-`===` as equality with keys of type `K` and values of type `V`.
+`===` as equality with keys of type `K` and values of type `V`. See [`Dict`](@ref)
+for further help and [`IdSet`](@ref) for the set version of this.
 
-See [`Dict`](@ref) for further help. In the example below, The `Dict`
-keys are all `isequal` and therefore get hashed the same, so they get overwritten.
-The `IdDict` hashes by object-id, and thus preserves the 3 different keys.
+In the example below, the `Dict` keys are all `isequal` and therefore get hashed
+the same, so they get overwritten. The `IdDict` hashes by object-id, and thus
+preserves the 3 different keys.
 
 # Examples
-```julia-repl
+```jldoctest; filter = r"  \\S+ +=> \\S+" => "  KEY => VALUE"
 julia> Dict(true => "yes", 1 => "no", 1.0 => "maybe")
 Dict{Real, String} with 1 entry:
   1.0 => "maybe"
@@ -24,10 +25,10 @@ IdDict{Any, String} with 3 entries:
 ```
 """
 mutable struct IdDict{K,V} <: AbstractDict{K,V}
-    ht::Vector{Any}
+    ht::Memory{Any}
     count::Int
     ndel::Int
-    IdDict{K,V}() where {K, V} = new{K,V}(Vector{Any}(undef, 32), 0, 0)
+    IdDict{K,V}() where {K, V} = new{K,V}(Memory{Any}(undef, 32), 0, 0)
 
     function IdDict{K,V}(itr) where {K, V}
         d = IdDict{K,V}()
@@ -53,27 +54,16 @@ IdDict(ps::Pair{K}...)             where {K}   = IdDict{K,Any}(ps)
 IdDict(ps::(Pair{K,V} where K)...) where {V}   = IdDict{Any,V}(ps)
 IdDict(ps::Pair...)                            = IdDict{Any,Any}(ps)
 
-function IdDict(kv)
-    try
-        dict_with_eltype((K, V) -> IdDict{K, V}, kv, eltype(kv))
-    catch
-        if !applicable(iterate, kv) || !all(x->isa(x,Union{Tuple,Pair}),kv)
-            throw(ArgumentError(
-                "IdDict(kv): kv needs to be an iterator of tuples or pairs"))
-        else
-            rethrow()
-        end
-    end
-end
+IdDict(kv) = dict_with_eltype((K, V) -> IdDict{K, V}, kv, eltype(kv))
 
 empty(d::IdDict, ::Type{K}, ::Type{V}) where {K, V} = IdDict{K,V}()
 
-function rehash!(d::IdDict, newsz = length(d.ht)%UInt)
-    d.ht = ccall(:jl_idtable_rehash, Vector{Any}, (Any, Csize_t), d.ht, newsz)
+function rehash!(d::IdDict, newsz::Integer = length(d.ht)%UInt)
+    d.ht = ccall(:jl_idtable_rehash, Memory{Any}, (Any, Csize_t), d.ht, newsz)
     d
 end
 
-function sizehint!(d::IdDict, newsz)
+function sizehint!(d::IdDict, newsz::Integer)
     newsz = _tablesz(newsz*2)  # *2 for keys and values in same array
     oldsz = length(d.ht)
     # grow at least 25%
@@ -84,7 +74,7 @@ function sizehint!(d::IdDict, newsz)
 end
 
 function setindex!(d::IdDict{K,V}, @nospecialize(val), @nospecialize(key)) where {K, V}
-    !isa(key, K) && throw(ArgumentError("$(limitrepr(key)) is not a valid key for type $K"))
+    !isa(key, K) && throw(KeyTypeError(K, key))
     if !(val isa V) # avoid a dynamic call
         val = convert(V, val)::V
     end
@@ -93,7 +83,7 @@ function setindex!(d::IdDict{K,V}, @nospecialize(val), @nospecialize(key)) where
         d.ndel = 0
     end
     inserted = RefValue{Cint}(0)
-    d.ht = ccall(:jl_eqtable_put, Array{Any,1}, (Any, Any, Any, Ptr{Cint}), d.ht, key, val, inserted)
+    d.ht = ccall(:jl_eqtable_put, Memory{Any}, (Any, Any, Any, Ptr{Cint}), d.ht, key, val, inserted)
     d.count += inserted[]
     return d
 end
@@ -133,10 +123,10 @@ function delete!(d::IdDict{K}, @nospecialize(key)) where K
 end
 
 function empty!(d::IdDict)
-    resize!(d.ht, 32)
+    d.ht = Memory{Any}(undef, 32)
     ht = d.ht
     t = @_gc_preserve_begin ht
-    memset(unsafe_convert(Ptr{Cvoid}, ht), 0, sizeof(ht))
+    memset(unsafe_convert(Ptr{Cvoid}, ht), 0, sizeof(ht) % UInt)
     @_gc_preserve_end t
     d.ndel = 0
     d.count = 0
diff --git a/base/idset.jl b/base/idset.jl
index 0a4d4275b4231..59b47dee64a04 100644
--- a/base/idset.jl
+++ b/base/idset.jl
@@ -1,13 +1,39 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Like Set, but using IdDict
-mutable struct IdSet{T} <: AbstractSet{T}
-    dict::IdDict{T,Nothing}
+"""
+    IdSet{T}([itr])
+    IdSet()
 
-    IdSet{T}() where {T} = new(IdDict{T,Nothing}())
-    IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.dict))
-end
+`IdSet{T}()` constructs a set (see [`Set`](@ref)) using
+`===` as equality with values of type `T`.
+
+In the example below, the values are all `isequal` so they get overwritten in the ordinary `Set`.
+The `IdSet` compares by `===` and so preserves the 3 different values.
+
+!!! compat "Julia 1.11"
+    Exported in Julia 1.11 and later.
 
+# Examples
+```jldoctest; filter = r"\\n\\s*(1|1\\.0|true)"
+julia> Set(Any[true, 1, 1.0])
+Set{Any} with 1 element:
+  1.0
+
+julia> IdSet{Any}(Any[true, 1, 1.0])
+IdSet{Any} with 3 elements:
+  1.0
+  1
+  true
+```
+"""
+mutable struct IdSet{K} <: AbstractSet{K}
+    list::Memory{Any}
+    idxs::Union{Memory{UInt8}, Memory{UInt16}, Memory{UInt32}}
+    count::Int
+    max::Int # n.b. always <= length(list)
+    IdSet{T}() where {T} = new(Memory{Any}(undef, 0), Memory{UInt8}(undef, 0), 0, 0)
+    IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.list), copy(s.idxs), s.count, s.max)
+end
 IdSet{T}(itr) where {T} = union!(IdSet{T}(), itr)
 IdSet() = IdSet{Any}()
 
@@ -15,22 +41,86 @@ copymutable(s::IdSet) = typeof(s)(s)
 emptymutable(s::IdSet{T}, ::Type{U}=T) where {T,U} = IdSet{U}()
 copy(s::IdSet) = typeof(s)(s)
 
-isempty(s::IdSet) = isempty(s.dict)
-length(s::IdSet)  = length(s.dict)
-in(@nospecialize(x), s::IdSet) = haskey(s.dict, x)
-push!(s::IdSet, @nospecialize(x)) = (s.dict[x] = nothing; s)
-pop!(s::IdSet, @nospecialize(x)) = (pop!(s.dict, x); x)
-pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = (x in s ? pop!(s, x) : default)
-delete!(s::IdSet, @nospecialize(x)) = (delete!(s.dict, x); s)
+haskey(s::IdSet, @nospecialize(key)) = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, key) != -1
+isempty(s::IdSet) = s.count == 0
+length(s::IdSet)  = s.count
+in(@nospecialize(x), s::IdSet) = haskey(s, x)
+function push!(s::IdSet, @nospecialize(x))
+    idx = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, x)
+    if idx >= 0
+        s.list[idx + 1] = x
+    else
+        if s.max < length(s.list)
+            idx = s.max
+            @assert !isassigned(s.list, idx + 1)
+            s.list[idx + 1] = x
+            s.max = idx + 1
+        else
+            newidx = RefValue{Int}(0)
+            setfield!(s, :list, ccall(:jl_idset_put_key, Any, (Any, Any, Ptr{Int}), s.list, x, newidx))
+            idx = newidx[]
+            s.max = idx < 0 ? -idx : idx + 1
+        end
+        @assert s.list[s.max] === x
+        setfield!(s, :idxs, ccall(:jl_idset_put_idx, Any, (Any, Any, Int), s.list, s.idxs, idx))
+        s.count += 1
+    end
+    s
+end
+function _pop!(s::IdSet, @nospecialize(x))
+    removed = ccall(:jl_idset_pop, Int, (Any, Any, Any), s.list, s.idxs, x)
+    if removed != -1
+        s.count -= 1
+        while s.max > 0 && !isassigned(s.list, s.max)
+            s.max -= 1
+        end
+    end
+    removed
+end
+pop!(s::IdSet, @nospecialize(x)) = _pop!(s, x) == -1 ? throw(KeyError(x)) : x
+pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = _pop!(s, x) == -1 ? default : x
+delete!(s::IdSet, @nospecialize(x)) = (_pop!(s, x); s)
+
+function sizehint!(s::IdSet, newsz::Integer)
+    # TODO: grow/compact list and perform rehash, if profitable?
+    # TODO: shrink?
+    # s.list = resize(s.list, newsz)
+    # newsz = _tablesz(newsz)
+    # oldsz = length(s.idxs)
+    # #grow at least 25%
+    # if newsz < (oldsz*5)>>2
+    #     return s
+    # end
+    # rehash!(s, newsz)
+    nothing
+end
+
+function _zero!(a::Memory{<:BitInteger})
+    t = @_gc_preserve_begin a
+    p = unsafe_convert(Ptr{Cvoid}, a)
+    T = eltype(a)
+    memset(p, 0x0, (sizeof(T) * length(a)) % UInt)
+    @_gc_preserve_end t
+    return a
+end
 
-sizehint!(s::IdSet, newsz) = (sizehint!(s.dict, newsz); s)
-empty!(s::IdSet) = (empty!(s.dict); s)
+function empty!(s::IdSet)
+    _zero!(s.idxs)
+    list = s.list
+    for i = 1:s.max
+        _unsetindex!(list, i)
+    end
+    s.count = 0
+    s.max = 0
+    s
+end
 
 filter!(f, d::IdSet) = unsafe_filter!(f, d)
 
-function iterate(s::IdSet, state...)
-    y = iterate(s.dict, state...)
-    y === nothing && return nothing
-    ((k, _), i) = y
-    return (k, i)
+function iterate(s::IdSet{S}, state=0) where {S}
+    while true
+        state += 1
+        state > s.max && return nothing
+        isassigned(s.list, state) && return s.list[state]::S, state
+    end
 end
diff --git a/base/indices.jl b/base/indices.jl
index 15a2a2f3c0ac7..0d0e56b12be4b 100644
--- a/base/indices.jl
+++ b/base/indices.jl
@@ -106,26 +106,49 @@ IndexStyle(::IndexStyle, ::IndexStyle) = IndexCartesian()
 
 promote_shape(::Tuple{}, ::Tuple{}) = ()
 
-function promote_shape(a::Tuple{Int,}, b::Tuple{Int,})
-    if a[1] != b[1]
-        throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b"))
+# Consistent error message for promote_shape mismatch, hiding type details like
+# OneTo. When b ≡ nothing, it is omitted; i can be supplied for an index.
+function throw_promote_shape_mismatch(a::Tuple, b::Union{Nothing,Tuple}, i = nothing)
+    if a isa Tuple{Vararg{Base.OneTo}} && (b === nothing || b isa Tuple{Vararg{Base.OneTo}})
+        a = map(lastindex, a)::Dims
+        b === nothing || (b = map(lastindex, b)::Dims)
+    end
+    _has_axes = !(a isa Dims && (b === nothing || b isa Dims))
+    if _has_axes
+        _normalize(d) = map(x -> firstindex(x):lastindex(x), d)
+        a = _normalize(a)
+        b === nothing || (b = _normalize(b))
+        _things = "axes "
+    else
+        _things = "size "
+    end
+    msg = IOBuffer()
+    print(msg, "a has ", _things)
+    print(msg, a)
+    if b ≢ nothing
+        print(msg, ", b has ", _things)
+        print(msg, b)
+    end
+    if i ≢ nothing
+        print(msg, ", mismatch at dim ", i)
     end
+    throw(DimensionMismatch(takestring!(msg)))
+end
+
+function promote_shape(a::Tuple{Int,}, b::Tuple{Int,})
+    a[1] != b[1] && throw_promote_shape_mismatch(a, b)
     return a
 end
 
 function promote_shape(a::Tuple{Int,Int}, b::Tuple{Int,})
-    if a[1] != b[1] || a[2] != 1
-        throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b"))
-    end
+    (a[1] != b[1] || a[2] != 1) && throw_promote_shape_mismatch(a, b)
     return a
 end
 
 promote_shape(a::Tuple{Int,}, b::Tuple{Int,Int}) = promote_shape(b, a)
 
 function promote_shape(a::Tuple{Int, Int}, b::Tuple{Int, Int})
-    if a[1] != b[1] || a[2] != b[2]
-        throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b"))
-    end
+    (a[1] != b[1] || a[2] != b[2]) && throw_promote_shape_mismatch(a, b)
     return a
 end
 
@@ -153,14 +176,10 @@ function promote_shape(a::Dims, b::Dims)
         return promote_shape(b, a)
     end
     for i=1:length(b)
-        if a[i] != b[i]
-            throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b, mismatch at $i"))
-        end
+        a[i] != b[i] && throw_promote_shape_mismatch(a, b, i)
     end
     for i=length(b)+1:length(a)
-        if a[i] != 1
-            throw(DimensionMismatch("dimensions must match: a has dims $a, must have singleton at dim $i"))
-        end
+        a[i] != 1 && throw_promote_shape_mismatch(a, nothing, i)
     end
     return a
 end
@@ -174,14 +193,10 @@ function promote_shape(a::Indices, b::Indices)
         return promote_shape(b, a)
     end
     for i=1:length(b)
-        if a[i] != b[i]
-            throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b, mismatch at $i"))
-        end
+        a[i] != b[i] && throw_promote_shape_mismatch(a, b, i)
     end
     for i=length(b)+1:length(a)
-        if a[i] != 1:1
-            throw(DimensionMismatch("dimensions must match: a has dims $a, must have singleton at dim $i"))
-        end
+        a[i] != 1:1 && throw_promote_shape_mismatch(a, nothing, i)
     end
     return a
 end
@@ -295,9 +310,9 @@ to_index(I::AbstractArray{Bool}) = LogicalIndex(I)
 to_index(I::AbstractArray) = I
 to_index(I::AbstractArray{Union{}}) = I
 to_index(I::AbstractArray{<:Union{AbstractArray, Colon}}) =
-    throw(ArgumentError("invalid index: $(limitrepr(I)) of type $(typeof(I))"))
+    throw(ArgumentError(LazyString("invalid index: ", limitrepr(I), " of type ", typeof(I))))
 to_index(::Colon) = throw(ArgumentError("colons must be converted by to_indices(...)"))
-to_index(i) = throw(ArgumentError("invalid index: $(limitrepr(i)) of type $(typeof(i))"))
+to_index(i) = throw(ArgumentError(LazyString("invalid index: ", limitrepr(i), " of type ", typeof(i))))
 
 # The general to_indices is mostly defined in multidimensional.jl, but this
 # definition is required for bootstrap:
@@ -349,15 +364,8 @@ to_indices(A, I::Tuple{}) = ()
 to_indices(A, I::Tuple{Vararg{Int}}) = I
 to_indices(A, I::Tuple{Vararg{Integer}}) = (@inline; to_indices(A, (), I))
 to_indices(A, inds, ::Tuple{}) = ()
-function to_indices(A, inds, I::Tuple{Any, Vararg{Any}})
-    @inline
-    head = _to_indices1(A, inds, I[1])
-    rest = to_indices(A, _cutdim(inds, I[1]), tail(I))
-    (head..., rest...)
-end
-
-_to_indices1(A, inds, I1) = (to_index(A, I1),)
-_cutdim(inds, I1) = safe_tail(inds)
+to_indices(A, inds, I::Tuple{Any, Vararg}) =
+    (@inline; (to_index(A, I[1]), to_indices(A, safe_tail(inds), tail(I))...))
 
 """
     Slice(indices)
@@ -377,15 +385,13 @@ end
 Slice(S::Slice) = S
 Slice{T}(S::Slice) where {T<:AbstractUnitRange} = Slice{T}(T(S.indices))
 
-axes(S::Slice) = (IdentityUnitRange(S.indices),)
+axes(S::Slice) = (axes1(S),)
 axes1(S::Slice) = IdentityUnitRange(S.indices)
-axes(S::Slice{<:OneTo}) = (S.indices,)
-axes1(S::Slice{<:OneTo}) = S.indices
+axes1(S::Slice{<:AbstractOneTo{<:Integer}}) = S.indices
 
 first(S::Slice) = first(S.indices)
 last(S::Slice) = last(S.indices)
 size(S::Slice) = (length(S.indices),)
-length(S::Slice) = length(S.indices)
 getindex(S::Slice, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
 getindex(S::Slice, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
 getindex(S::Slice, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
@@ -406,24 +412,64 @@ IdentityUnitRange(S::IdentityUnitRange) = S
 IdentityUnitRange{T}(S::IdentityUnitRange) where {T<:AbstractUnitRange} = IdentityUnitRange{T}(T(S.indices))
 
 # IdentityUnitRanges are offset and thus have offset axes, so they are their own axes
-axes(S::IdentityUnitRange) = (S,)
+axes(S::IdentityUnitRange) = (axes1(S),)
 axes1(S::IdentityUnitRange) = S
-axes(S::IdentityUnitRange{<:OneTo}) = (S.indices,)
-axes1(S::IdentityUnitRange{<:OneTo}) = S.indices
+axes1(S::IdentityUnitRange{<:AbstractOneTo{<:Integer}}) = S.indices
 
 first(S::IdentityUnitRange) = first(S.indices)
 last(S::IdentityUnitRange) = last(S.indices)
 size(S::IdentityUnitRange) = (length(S.indices),)
-length(S::IdentityUnitRange) = length(S.indices)
-getindex(S::IdentityUnitRange, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
+unsafe_length(S::IdentityUnitRange) = unsafe_length(S.indices)
+getindex(S::IdentityUnitRange, i::Integer) = (@inline; @boundscheck checkbounds(S, i); convert(eltype(S), i))
+getindex(S::IdentityUnitRange, i::Bool) = throw(ArgumentError("invalid index: $i of type Bool"))
+function getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return convert(AbstractUnitRange{eltype(S)}, i)
+end
+function getindex(S::IdentityUnitRange, i::AbstractUnitRange{Bool})
+    @inline
+    @boundscheck checkbounds(S, i)
+    range(first(i) ? first(S) : last(S), length = last(i))
+end
+function getindex(S::IdentityUnitRange, i::StepRange{<:Integer})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return convert(AbstractRange{eltype(S)}, i)
+end
+function getindex(S::IdentityUnitRange, i::StepRange{Bool})
+    @inline
+    @boundscheck checkbounds(S, i)
+    range(first(i) ? first(S) : last(S), length = last(i), step = Int(step(i)))
+end
+# Indexing with offset ranges should preserve the axes of the indices
+# however, this is only really possible in general with OffsetArrays.
+# In some cases, though, we may obtain correct results using Base ranges
+# the following methods are added to allow OffsetArrays to dispatch on the first argument without ambiguities
+function getindex(S::IdentityUnitRange{<:AbstractUnitRange{<:Integer}},
+                    i::IdentityUnitRange{<:AbstractUnitRange{<:Integer}})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return i
+end
+function getindex(S::Slice{<:AbstractUnitRange{<:Integer}},
+                    i::IdentityUnitRange{<:AbstractUnitRange{<:Integer}})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return i
+end
 show(io::IO, r::IdentityUnitRange) = print(io, "Base.IdentityUnitRange(", r.indices, ")")
 iterate(S::IdentityUnitRange, s...) = iterate(S.indices, s...)
 
-# For OneTo, the values and indices of the values are identical, so this may be defined in Base.
+# For AbstractOneTo, the values and indices of the values are identical, so this may be defined in Base.
 # In general such an indexing operation would produce offset ranges
-getindex(S::OneTo, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}}) = (@inline; @boundscheck checkbounds(S, I); I)
+# This should also ideally return an AbstractUnitRange{eltype(S)}, but currently
+# we're restricted to eltype(::IdentityUnitRange) == Int by definition
+function getindex(S::AbstractOneTo{<:Integer}, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}})
+    @inline
+    @boundscheck checkbounds(S, I)
+    return I
+end
 
 """
     LinearIndices(A::AbstractArray)
@@ -456,7 +502,7 @@ julia> extrema(b)
 
 Return a `LinearIndices` array with the specified shape or [`axes`](@ref).
 
-# Example
+# Examples
 
 The main purpose of this constructor is intuitive conversion
 from cartesian to linear indexing:
@@ -485,7 +531,7 @@ LinearIndices(inds::NTuple{N,Union{<:Integer,AbstractUnitRange{<:Integer}}}) whe
     LinearIndices(map(_convert2ind, inds))
 LinearIndices(A::Union{AbstractArray,SimpleVector}) = LinearIndices(axes(A))
 
-_convert2ind(i::Integer) = Base.OneTo(i)
+_convert2ind(i::Integer) = oneto(i)
 _convert2ind(ind::AbstractUnitRange) = first(ind):last(ind)
 
 function indices_promote_type(::Type{Tuple{R1,Vararg{R1,N}}}, ::Type{Tuple{R2,Vararg{R2,N}}}) where {R1,R2,N}
@@ -515,6 +561,7 @@ function getindex(iter::LinearIndices, i::AbstractRange{<:Integer})
     @boundscheck checkbounds(iter, i)
     @inbounds isa(iter, LinearIndices{1}) ? iter.indices[1][i] : (first(iter):last(iter))[i]
 end
+copy(iter::LinearIndices) = iter
 # More efficient iteration — predominantly for non-vector LinearIndices
 # but one-dimensional LinearIndices must be special-cased to support OffsetArrays
 iterate(iter::LinearIndices{1}, s...) = iterate(axes1(iter.indices[1]), s...)
@@ -525,3 +572,7 @@ first(iter::LinearIndices) = 1
 first(iter::LinearIndices{1}) = (@inline; first(axes1(iter.indices[1])))
 last(iter::LinearIndices) = (@inline; length(iter))
 last(iter::LinearIndices{1}) = (@inline; last(axes1(iter.indices[1])))
+
+function show(io::IO, iter::LinearIndices)
+    print(io, "LinearIndices(", iter.indices, ")")
+end
diff --git a/base/initdefs.jl b/base/initdefs.jl
index ed0aa3856f339..d17c7e9e1e02b 100644
--- a/base/initdefs.jl
+++ b/base/initdefs.jl
@@ -9,7 +9,7 @@ A string containing the script name passed to Julia from the command line. Note
 script name remains unchanged from within included files. Alternatively see
 [`@__FILE__`](@ref).
 """
-global PROGRAM_FILE = ""
+global PROGRAM_FILE::String = ""
 
 """
     ARGS
@@ -25,19 +25,19 @@ Stop the program with an exit code. The default exit code is zero, indicating th
 program completed successfully. In an interactive session, `exit()` can be called with
 the keyboard shortcut `^D`.
 """
-exit(n) = ccall(:jl_exit, Cvoid, (Int32,), n)
+exit(n) = ccall(:jl_exit, Union{}, (Int32,), n)
 exit() = exit(0)
 
 const roottask = current_task()
 
-is_interactive = false
+is_interactive::Bool = false
 
 """
-    isinteractive() -> Bool
+    isinteractive()::Bool
 
 Determine whether Julia is running an interactive session.
 """
-isinteractive() = (is_interactive::Bool)
+isinteractive() = is_interactive
 
 ## package depots (registries, packages, environments) ##
 
@@ -73,25 +73,31 @@ environment variable if set.
 Each entry in `DEPOT_PATH` is a path to a directory which contains subdirectories used by Julia for various purposes.
 Here is an overview of some of the subdirectories that may exist in a depot:
 
+* `artifacts`: Contains content that packages use for which Pkg manages the installation of.
 * `clones`: Contains full clones of package repos. Maintained by `Pkg.jl` and used as a cache.
+* `config`: Contains julia-level configuration such as a `startup.jl`.
 * `compiled`: Contains precompiled `*.ji` files for packages. Maintained by Julia.
 * `dev`: Default directory for `Pkg.develop`. Maintained by `Pkg.jl` and the user.
 * `environments`: Default package environments. For instance the global environment for a specific julia version. Maintained by `Pkg.jl`.
-* `logs`: Contains logs of `Pkg` and `REPL` operations. Maintained by `Pkg.jl` and `Julia`.
+* `logs`: Contains logs of `Pkg` and `REPL` operations. Maintained by `Pkg.jl` and Julia.
 * `packages`: Contains packages, some of which were explicitly installed and some which are implicit dependencies. Maintained by `Pkg.jl`.
 * `registries`: Contains package registries. By default only `General`. Maintained by `Pkg.jl`.
+* `scratchspaces`: Contains content that a package itself installs via the [`Scratch.jl`](https://github.com/JuliaPackaging/Scratch.jl) package. `Pkg.gc()` will delete content that is known to be unused.
+
+!!! note
+    Packages that want to store content should use the `scratchspaces` subdirectory via
+    [`Scratch.jl`](https://github.com/JuliaPackaging/Scratch.jl) instead of creating new
+    subdirectories in the depot root.
 
 See also [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), and
 [Code Loading](@ref code-loading).
 """
 const DEPOT_PATH = String[]
 
-function append_default_depot_path!(DEPOT_PATH)
-    path = joinpath(homedir(), ".julia")
-    path in DEPOT_PATH || push!(DEPOT_PATH, path)
+function append_bundled_depot_path!(DEPOT_PATH)
     path = abspath(Sys.BINDIR, "..", "local", "share", "julia")
     path in DEPOT_PATH || push!(DEPOT_PATH, path)
-    path = abspath(Sys.BINDIR, "..", "share", "julia")
+    path = abspath(Sys.BINDIR, Base.DATAROOTDIR, "julia")
     path in DEPOT_PATH || push!(DEPOT_PATH, path)
     return DEPOT_PATH
 end
@@ -100,17 +106,34 @@ function init_depot_path()
     empty!(DEPOT_PATH)
     if haskey(ENV, "JULIA_DEPOT_PATH")
         str = ENV["JULIA_DEPOT_PATH"]
+
+        # explicitly setting JULIA_DEPOT_PATH to the empty string means using no depot
         isempty(str) && return
-        for path in eachsplit(str, Sys.iswindows() ? ';' : ':')
+
+        # otherwise, populate the depot path with the entries in JULIA_DEPOT_PATH,
+        # expanding empty strings to the bundled depot
+        pushfirst_default = true
+        for (i, path) in enumerate(eachsplit(str, Sys.iswindows() ? ';' : ':'))
             if isempty(path)
-                append_default_depot_path!(DEPOT_PATH)
+                append_bundled_depot_path!(DEPOT_PATH)
             else
                 path = expanduser(path)
                 path in DEPOT_PATH || push!(DEPOT_PATH, path)
+                if i == 1
+                    # if a first entry is given, don't add the default depot at the start
+                    pushfirst_default = false
+                end
             end
         end
+
+        # backwards compatibility: if JULIA_DEPOT_PATH only contains empty entries
+        # (e.g., JULIA_DEPOT_PATH=':'), make sure to use the default depot
+        if pushfirst_default
+            pushfirst!(DEPOT_PATH, joinpath(homedir(), ".julia"))
+        end
     else
-        append_default_depot_path!(DEPOT_PATH)
+        push!(DEPOT_PATH, joinpath(homedir(), ".julia"))
+        append_bundled_depot_path!(DEPOT_PATH)
     end
     nothing
 end
@@ -225,8 +248,14 @@ function init_load_path()
     if haskey(ENV, "JULIA_LOAD_PATH")
         paths = parse_load_path(ENV["JULIA_LOAD_PATH"])
     else
-        paths = filter!(env -> env !== nothing,
-            String[env == "@." ? current_project() : env for env in DEFAULT_LOAD_PATH])
+        paths = String[]
+        for env in DEFAULT_LOAD_PATH
+            if env == "@."
+                env = current_project()
+                env === nothing && continue
+            end
+            push!(paths, env)
+        end
     end
     append!(empty!(LOAD_PATH), paths)
 end
@@ -243,15 +272,27 @@ function init_active_project()
 end
 
 ## load path expansion: turn LOAD_PATH entries into concrete paths ##
+cmd_suppresses_program(cmd) = cmd in ('e', 'E')
 
 function load_path_expand(env::AbstractString)::Union{String, Nothing}
     # named environment?
     if startswith(env, '@')
-        # `@` in JULIA_LOAD_PATH is expanded early (at startup time)
-        # if you put a `@` in LOAD_PATH manually, it's expanded late
+        # `@.` in JULIA_LOAD_PATH is expanded early (at startup time)
+        # if you put a `@.` in LOAD_PATH manually, it's expanded late
         env == "@" && return active_project(false)
         env == "@." && return current_project()
+        env == "@temp" && return mktempdir()
         env == "@stdlib" && return Sys.STDLIB
+        if startswith(env, "@script")
+            program_file = JLOptions().program_file
+            program_file = program_file != C_NULL ? unsafe_string(program_file) : nothing
+            isnothing(program_file) && return nothing # User did not pass a script
+
+            # Expand trailing relative path
+            dir = dirname(program_file)
+            dir = env != "@script" ? (dir * env[length("@script")+1:end]) : dir
+            return current_project(dir)
+        end
         env = replace(env, '#' => VERSION.major, count=1)
         env = replace(env, '#' => VERSION.minor, count=1)
         env = replace(env, '#' => VERSION.patch, count=1)
@@ -330,6 +371,27 @@ function set_active_project(projfile::Union{AbstractString,Nothing})
     end
 end
 
+"""
+    active_manifest()
+    active_manifest(project_file::AbstractString)
+
+Return the path of the active manifest file, or the manifest file that would be used for a given `project_file`.
+
+In a stacked environment (where multiple environments exist in the load path), this returns the manifest
+file for the primary (active) environment only, not the manifests from other environments in the stack.
+See the manual section on [Environment stacks](@ref) for more details on how stacked environments work.
+
+See [`Project environments`](@ref project-environments) for details on the difference between a project and a manifest, and the naming
+options and their priority in package loading.
+
+See also [`Base.active_project`](@ref), [`Base.set_active_project`](@ref).
+"""
+function active_manifest(project_file::Union{AbstractString,Nothing}=nothing; search_load_path::Bool=true)
+    # If `project_file` was specified, use that, otherwise get the active project:
+    project_file = !isnothing(project_file) ? project_file : active_project(search_load_path)
+    project_file === nothing && return nothing
+    return project_file_manifest_path(project_file)
+end
 
 """
     load_path()
@@ -354,9 +416,7 @@ end
 
 ## atexit: register exit hooks ##
 
-const atexit_hooks = Callable[
-    () -> Filesystem.temp_cleanup_purge(force=true)
-]
+const atexit_hooks = Callable[]
 const _atexit_hooks_lock = ReentrantLock()
 global _atexit_hooks_finished::Bool = false
 
@@ -394,13 +454,18 @@ function atexit(f::Function)
 end
 
 function _atexit(exitcode::Cint)
+    # this current task shouldn't be scheduled anywhere, but if it was (because
+    # this exit came from a signal for example), then try to clear that state
+    # to minimize scheduler issues later
+    ct = current_task()
+    q = ct.queue; q === nothing || list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
     # Don't hold the lock around the iteration, just in case any other thread executing in
     # parallel tries to register a new atexit hook while this is running. We don't want to
     # block that thread from proceeding, and we can allow it to register its hook which we
     # will immediately run here.
     while true
         local f
-        Base.@lock _atexit_hooks_lock begin
+        @lock _atexit_hooks_lock begin
             # If this is the last iteration, atomically disable atexit hooks to prevent
             # someone from registering a hook that will never be run.
             # (We do this inside the loop, so that it is atomic: no one can have registered
@@ -421,7 +486,7 @@ function _atexit(exitcode::Cint)
             end
         catch ex
             showerror(stderr, ex)
-            Base.show_backtrace(stderr, catch_backtrace())
+            show_backtrace(stderr, catch_backtrace())
             println(stderr)
         end
     end
@@ -441,7 +506,7 @@ function _postoutput()
             f()
         catch ex
             showerror(stderr, ex)
-            Base.show_backtrace(stderr, catch_backtrace())
+            show_backtrace(stderr, catch_backtrace())
             println(stderr)
         end
     end
@@ -449,11 +514,14 @@ end
 
 ## hook for disabling threaded libraries ##
 
-library_threading_enabled = true
-const disable_library_threading_hooks = []
+library_threading_enabled::Bool = true
+
+# Base.OncePerProcess ensures that any registered hooks do not outlive the session.
+# (even if they are registered during the sysimage build process by top-level code)
+const disable_library_threading_hooks = Base.OncePerProcess(Vector{Any})
 
 function at_disable_library_threading(f)
-    push!(disable_library_threading_hooks, f)
+    push!(disable_library_threading_hooks(), f)
     if !library_threading_enabled
         disable_library_threading()
     end
@@ -462,8 +530,8 @@ end
 
 function disable_library_threading()
     global library_threading_enabled = false
-    while !isempty(disable_library_threading_hooks)
-        f = pop!(disable_library_threading_hooks)
+    while !isempty(disable_library_threading_hooks())
+        f = pop!(disable_library_threading_hooks())
         try
             f()
         catch err
diff --git a/base/int.jl b/base/int.jl
index 4b2f542bba788..c7c542d261157 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -96,8 +96,46 @@ inv(x::Integer) = float(one(x)) / float(x)
 # skip promotion for system integer types
 (/)(x::BitInteger, y::BitInteger) = float(x) / float(y)
 
+
+"""
+    mul_hi(a::T, b::T) where {T<:Base.Integer}
+
+Return the higher half of the product of `a` and `b` where `T` is a fixed size integer.
+
+# Examples
+```jldoctest
+julia> Base.mul_hi(12345678987654321, 123456789)
+82624
+
+julia> (widen(12345678987654321) * 123456789) >> 64
+82624
+
+julia> Base.mul_hi(0xff, 0xff)
+0xfe
+```
+"""
+function mul_hi(a::T, b::T) where {T<:Integer}
+    ((widen(a)*b) >>> Base.top_set_bit(-1 % T)) % T
+end
+
+function mul_hi(a::UInt128, b::UInt128)
+    shift = sizeof(a)*4
+    mask = typemax(UInt128) >> shift
+    a1, a2 = a >>> shift, a & mask
+    b1, b2 = b >>> shift, b & mask
+    a1b1, a1b2, a2b1, a2b2 = a1*b1, a1*b2, a2*b1, a2*b2
+    carry = ((a1b2 & mask) + (a2b1 & mask) + (a2b2 >>> shift)) >>> shift
+    a1b1 + (a1b2 >>> shift) + (a2b1 >>> shift) + carry
+end
+
+function mul_hi(a::Int128, b::Int128)
+    shift = sizeof(a)*8 - 1
+    t1, t2 = (a >> shift) & b % UInt128, (b >> shift) & a % UInt128
+    (mul_hi(a % UInt128, b % UInt128) - t1 - t2) % Int128
+end
+
 """
-    isodd(x::Number) -> Bool
+    isodd(x::Number)::Bool
 
 Return `true` if `x` is an odd integer (that is, an integer not divisible by 2), and `false` otherwise.
 
@@ -117,7 +155,7 @@ isodd(n::Number) = isreal(n) && isodd(real(n))
 isodd(n::Real) = isinteger(n) && !iszero(rem(Integer(n), 2))
 
 """
-    iseven(x::Number) -> Bool
+    iseven(x::Number)::Bool
 
 Return `true` if `x` is an even integer (that is, an integer divisible by 2), and `false` otherwise.
 
@@ -139,6 +177,8 @@ iseven(n::Real) = isinteger(n) && iszero(rem(Integer(n), 2))
 signbit(x::Integer) = x < 0
 signbit(x::Unsigned) = false
 
+isnegative(x::Unsigned) = false
+
 flipsign(x::T, y::T) where {T<:BitSigned} = flipsign_int(x, y)
 flipsign(x::BitSigned, y::BitSigned) = flipsign_int(promote(x, y)...) % typeof(x)
 
@@ -250,7 +290,7 @@ end
 The reduction of `x` modulo `y`, or equivalently, the remainder of `x` after floored
 division by `y`, i.e. `x - y*fld(x,y)` if computed without intermediate rounding.
 
-The result will have the same sign as `y`, and magnitude less than `abs(y)` (with some
+The result will have the same sign as `y` if `isfinite(y)`, and magnitude less than `abs(y)` (with some
 exceptions, see note below).
 
 !!! note
@@ -286,8 +326,14 @@ function mod(x::T, y::T) where T<:Integer
     y == -1 && return T(0)   # avoid potential overflow in fld
     return x - fld(x, y) * y
 end
-mod(x::BitSigned, y::Unsigned) = rem(y + unsigned(rem(x, y)), y)
-mod(x::Unsigned, y::Signed) = rem(y + signed(rem(x, y)), y)
+function mod(x::BitSigned, y::Unsigned)
+    remval = rem(x, y) # correct iff  remval>=0
+    return unsigned(remval + (remval<zero(remval))*y)
+end
+function mod(x::Unsigned, y::Signed)
+    remval =  signed(rem(x, y)) #remval>0 so correct iff y>0 or remval==0
+    return remval + (!iszero(remval) && y<zero(y))*y
+end
 mod(x::T, y::T) where {T<:Unsigned} = rem(x, y)
 
 # Don't promote integers for div/rem/mod since there is no danger of overflow,
@@ -399,7 +445,7 @@ bswap(x::Union{Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128}) =
     bswap_int(x)
 
 """
-    count_ones(x::Integer) -> Integer
+    count_ones(x::Integer)::Integer
 
 Number of ones in the binary representation of `x`.
 
@@ -415,7 +461,7 @@ julia> count_ones(Int32(-1))
 count_ones(x::BitInteger) = (ctpop_int(x) % Int)::Int
 
 """
-    leading_zeros(x::Integer) -> Integer
+    leading_zeros(x::Integer)::Integer
 
 Number of zeros leading the binary representation of `x`.
 
@@ -428,7 +474,7 @@ julia> leading_zeros(Int32(1))
 leading_zeros(x::BitInteger) = (ctlz_int(x) % Int)::Int
 
 """
-    trailing_zeros(x::Integer) -> Integer
+    trailing_zeros(x::Integer)::Integer
 
 Number of zeros trailing the binary representation of `x`.
 
@@ -441,7 +487,7 @@ julia> trailing_zeros(2)
 trailing_zeros(x::BitInteger) = (cttz_int(x) % Int)::Int
 
 """
-    count_zeros(x::Integer) -> Integer
+    count_zeros(x::Integer)::Integer
 
 Number of zeros in the binary representation of `x`.
 
@@ -457,7 +503,7 @@ julia> count_zeros(-1)
 count_zeros(x::Integer) = count_ones(~x)
 
 """
-    leading_ones(x::Integer) -> Integer
+    leading_ones(x::Integer)::Integer
 
 Number of ones leading the binary representation of `x`.
 
@@ -470,7 +516,7 @@ julia> leading_ones(UInt32(2 ^ 32 - 2))
 leading_ones(x::Integer) = leading_zeros(~x)
 
 """
-    trailing_ones(x::Integer) -> Integer
+    trailing_ones(x::Integer)::Integer
 
 Number of ones trailing the binary representation of `x`.
 
@@ -483,7 +529,7 @@ julia> trailing_ones(3)
 trailing_ones(x::Integer) = trailing_zeros(~x)
 
 """
-    top_set_bit(x::Integer) -> Integer
+    top_set_bit(x::Integer)::Integer
 
 The number of bits in `x`'s binary representation, excluding leading zeros.
 
@@ -587,37 +633,32 @@ julia> bitstring(bitrotate(0b01110010, 8))
 bitrotate(x::T, k::Integer) where {T <: BitInteger} =
     (x << ((sizeof(T) << 3 - 1) & k)) | (x >>> ((sizeof(T) << 3 - 1) & -k))
 
-# @doc isn't available when running in Core at this point.
-# Tuple syntax for documentation two function signatures at the same time
-# doesn't work either at this point.
-if nameof(@__MODULE__) === :Base
-    for fname in (:mod, :rem)
-        @eval @doc """
-            rem(x::Integer, T::Type{<:Integer}) -> T
-            mod(x::Integer, T::Type{<:Integer}) -> T
-            %(x::Integer, T::Type{<:Integer}) -> T
-
-        Find `y::T` such that `x` ≡ `y` (mod n), where n is the number of integers representable
-        in `T`, and `y` is an integer in `[typemin(T),typemax(T)]`.
-        If `T` can represent any integer (e.g. `T == BigInt`), then this operation corresponds to
-        a conversion to `T`.
-
-        # Examples
-        ```jldoctest
-        julia> x = 129 % Int8
-        -127
-
-        julia> typeof(x)
-        Int8
-
-        julia> x = 129 % BigInt
-        129
-
-        julia> typeof(x)
-        BigInt
-        ```
-        """ $fname(x::Integer, T::Type{<:Integer})
-    end
+for fname in (:mod, :rem)
+    @eval @doc """
+        rem(x::Integer, T::Type{<:Integer})::T
+        mod(x::Integer, T::Type{<:Integer})::T
+        %(x::Integer, T::Type{<:Integer})::T
+
+    Find `y::T` such that `x` ≡ `y` (mod n), where n is the number of integers representable
+    in `T`, and `y` is an integer in `[typemin(T),typemax(T)]`.
+    If `T` can represent any integer (e.g. `T == BigInt`), then this operation corresponds to
+    a conversion to `T`.
+
+    # Examples
+    ```jldoctest
+    julia> x = 129 % Int8
+    -127
+
+    julia> typeof(x)
+    Int8
+
+    julia> x = 129 % BigInt
+    129
+
+    julia> typeof(x)
+    BigInt
+    ```
+    """ $fname(x::Integer, T::Type{<:Integer})
 end
 
 rem(x::T, ::Type{T}) where {T<:Integer} = x
@@ -629,70 +670,6 @@ mod(x::Integer, ::Type{T}) where {T<:Integer} = rem(x, T)
 
 unsafe_trunc(::Type{T}, x::Integer) where {T<:Integer} = rem(x, T)
 
-"""
-    trunc([T,] x)
-    trunc(x; digits::Integer= [, base = 10])
-    trunc(x; sigdigits::Integer= [, base = 10])
-
-`trunc(x)` returns the nearest integral value of the same type as `x` whose absolute value
-is less than or equal to the absolute value of `x`.
-
-`trunc(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
-not representable.
-
-Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
-
-See also: [`%`](@ref rem), [`floor`](@ref), [`unsigned`](@ref), [`unsafe_trunc`](@ref).
-
-# Examples
-```jldoctest
-julia> trunc(2.22)
-2.0
-
-julia> trunc(-2.22, digits=1)
--2.2
-
-julia> trunc(Int, -2.22)
--2
-```
-"""
-function trunc end
-
-"""
-    floor([T,] x)
-    floor(x; digits::Integer= [, base = 10])
-    floor(x; sigdigits::Integer= [, base = 10])
-
-`floor(x)` returns the nearest integral value of the same type as `x` that is less than or
-equal to `x`.
-
-`floor(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
-not representable.
-
-Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
-"""
-function floor end
-
-"""
-    ceil([T,] x)
-    ceil(x; digits::Integer= [, base = 10])
-    ceil(x; sigdigits::Integer= [, base = 10])
-
-`ceil(x)` returns the nearest integral value of the same type as `x` that is greater than or
-equal to `x`.
-
-`ceil(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is not
-representable.
-
-Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
-"""
-function ceil end
-
-round(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
-trunc(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
-floor(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
- ceil(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
-
 ## integer construction ##
 
 """
@@ -711,7 +688,7 @@ ERROR: LoadError: ArgumentError: invalid base 10 digit '.' in "123456789123.4"
 [...]
 ```
 """
-macro int128_str(s)
+macro int128_str(s::String)
     return parse(Int128, s)
 end
 
@@ -731,7 +708,7 @@ ERROR: LoadError: ArgumentError: invalid base 10 digit '-' in "-123456789123"
 [...]
 ```
 """
-macro uint128_str(s)
+macro uint128_str(s::String)
     return parse(UInt128, s)
 end
 
@@ -754,18 +731,28 @@ julia> big"_"
 ERROR: ArgumentError: invalid number format _ for BigInt or BigFloat
 [...]
 ```
+
+!!! warning
+    Using `@big_str` for constructing [`BigFloat`](@ref) values may not result
+    in the behavior that might be naively expected: as a macro, `@big_str`
+    obeys the global precision ([`setprecision`](@ref)) and rounding mode
+    ([`setrounding`](@ref)) settings as they are at *load time*. Thus, a
+    function like `() -> precision(big"0.3")` returns a constant whose value
+    depends on the value of the precision at the point when the function is
+    defined, **not** at the precision at the time when the function is called.
 """
-macro big_str(s)
+macro big_str(s::String)
     message = "invalid number format $s for BigInt or BigFloat"
     throw_error =  :(throw(ArgumentError($message)))
     if '_' in s
-        # remove _ in s[2:end-1]
-        bf = IOBuffer(maxsize=lastindex(s))
+        # remove _ in s[2:end-1].
+        # Do not allow '_' right before or after dot.
+        bf = IOBuffer(sizehint=ncodeunits(s))
         c = s[1]
         print(bf, c)
         is_prev_underscore = (c == '_')
         is_prev_dot = (c == '.')
-        for c in SubString(s, 2, lastindex(s)-1)
+        for c in SubString(s, nextind(s, 1), prevind(s, lastindex(s)))
             c != '_' && print(bf, c)
             c == '_' && is_prev_dot && return throw_error
             c == '.' && is_prev_underscore && return throw_error
@@ -773,7 +760,7 @@ macro big_str(s)
             is_prev_dot = (c == '.')
         end
         print(bf, s[end])
-        s = String(take!(bf))
+        s = unsafe_takestring!(bf)
     end
     n = tryparse(BigInt, s)
     n === nothing || return n
@@ -807,7 +794,7 @@ promote_rule(::Type{UInt128}, ::Type{Int128}) = UInt128
 
 The lowest value representable by the given (real) numeric DataType `T`.
 
-See also: [`floatmin`](@ref), [`typemax`](@ref), [`eps`](@ref).
+See also: [`floatmin`](@ref), [`maxintfloat`](@ref), [`typemax`](@ref), [`eps`](@ref).
 
 # Examples
 ```jldoctest
@@ -823,8 +810,11 @@ julia> typemin(Float16)
 julia> typemin(Float32)
 -Inf32
 
-julia> nextfloat(-Inf32)  # smallest finite Float32 floating point number
--3.4028235f38
+julia> floatmin(Float32)  # smallest positive finite Float32 floating point number
+1.1754944f-38
+
+julia> nextfloat(-Inf32) == -floatmax(Float32)  # equivalent ways of getting the lowest finite Float32 floating point number
+true
 ```
 """
 function typemin end
@@ -834,7 +824,7 @@ function typemin end
 
 The highest value representable by the given (real) numeric `DataType`.
 
-See also: [`floatmax`](@ref), [`typemin`](@ref), [`eps`](@ref).
+See also: [`floatmax`](@ref), [`maxintfloat`](@ref), [`typemin`](@ref), [`eps`](@ref).
 
 # Examples
 ```jldoctest
@@ -850,7 +840,7 @@ Inf
 julia> typemax(Float32)
 Inf32
 
-julia> floatmax(Float32)  # largest finite Float32 floating point number
+julia> floatmax(Float32)  # largest positive finite Float32 floating point number
 3.4028235f38
 ```
 """
@@ -892,7 +882,7 @@ widen(::Type{UInt64}) = UInt128
 # |x|<=2^(k-1), |y|<=2^k-1   =>   |x*y|<=2^(2k-1)-1
 widemul(x::Signed,y::Unsigned) = widen(x) * signed(widen(y))
 widemul(x::Unsigned,y::Signed) = signed(widen(x)) * widen(y)
-# multplication by Bool doesn't require widening
+# multiplication by Bool doesn't require widening
 widemul(x::Bool,y::Bool) = x * y
 widemul(x::Bool,y::Number) = x * y
 widemul(x::Number,y::Bool) = x * y
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 1b007700f4331..e56cc63e4ff75 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -8,6 +8,9 @@
 Greatest common (positive) divisor (or zero if all arguments are zero).
 The arguments may be integer and rational numbers.
 
+``a`` is a divisor of ``b`` if there exists an integer ``m`` such
+that ``ma=b``.
+
 !!! compat "Julia 1.4"
     Rational arguments require Julia 1.4 or later.
 
@@ -97,6 +100,9 @@ end
 Least common (positive) multiple (or zero if any argument is zero).
 The arguments may be integer and rational numbers.
 
+``a`` is a multiple of ``b`` if there exists an integer ``m`` such
+that ``a=mb``.
+
 !!! compat "Julia 1.4"
     Rational arguments require Julia 1.4 or later.
 
@@ -137,11 +143,19 @@ function lcm(a::T, b::T) where T<:Integer
     end
 end
 
+function _promote_mixed_signs(a::Signed, b::Unsigned)
+    # handle the case a == typemin(typeof(a)) if R != typeof(a)
+    R = promote_typeof(a, b)
+    promote(abs(a % signed(R)), b)
+end
+
 gcd(a::Integer) = checked_abs(a)
 gcd(a::Rational) = checked_abs(a.num) // a.den
 lcm(a::Union{Integer,Rational}) = gcd(a)
-gcd(a::Unsigned, b::Signed) = gcd(promote(a, abs(b))...)
-gcd(a::Signed, b::Unsigned) = gcd(promote(abs(a), b)...)
+gcd(a::Unsigned, b::Signed) = gcd(b, a)
+gcd(a::Signed, b::Unsigned) = gcd(_promote_mixed_signs(a, b)...)
+lcm(a::Unsigned, b::Signed) = lcm(promote(a, abs(b))...)
+lcm(a::Signed, b::Unsigned) = lcm(_promote_mixed_signs(a, b)...)
 gcd(a::Real, b::Real) = gcd(promote(a,b)...)
 lcm(a::Real, b::Real) = lcm(promote(a,b)...)
 gcd(a::Real, b::Real, c::Real...) = gcd(a, gcd(b, c...))
@@ -150,7 +164,16 @@ gcd(a::T, b::T) where T<:Real = throw(MethodError(gcd, (a,b)))
 lcm(a::T, b::T) where T<:Real = throw(MethodError(lcm, (a,b)))
 
 gcd(abc::AbstractArray{<:Real}) = reduce(gcd, abc; init=zero(eltype(abc)))
-lcm(abc::AbstractArray{<:Real}) = reduce(lcm, abc; init=one(eltype(abc)))
+function lcm(abc::AbstractArray{<:Real})
+    # Using reduce with init=one(eltype(abc)) is buggy for Rationals.
+    l = length(abc)
+    if l == 0
+        eltype(abc) <: Integer && return one(eltype(abc))
+        throw(ArgumentError("lcm has no identity for $(eltype(abc))"))
+    end
+    l == 1 && return abs(only(abc))
+    return reduce(lcm, abc)
+end
 
 function gcd(abc::AbstractArray{<:Integer})
     a = zero(eltype(abc))
@@ -165,17 +188,24 @@ end
 
 # return (gcd(a, b), x, y) such that ax+by == gcd(a, b)
 """
-    gcdx(a, b)
+    gcdx(a, b...)
 
-Computes the greatest common (positive) divisor of `a` and `b` and their Bézout
+Compute the greatest common (positive) divisor of `a` and `b` and their Bézout
 coefficients, i.e. the integer coefficients `u` and `v` that satisfy
-``ua+vb = d = gcd(a, b)``. ``gcdx(a, b)`` returns ``(d, u, v)``.
+``u*a + v*b = d = gcd(a, b)``. ``gcdx(a, b)`` returns ``(d, u, v)``.
+
+For more arguments than two, i.e., `gcdx(a, b, c, ...)` the Bézout coefficients are computed
+recursively, returning a solution `(d, u, v, w, ...)` to
+``u*a + v*b + w*c + ... = d = gcd(a, b, c, ...)``.
 
 The arguments may be integer and rational numbers.
 
 !!! compat "Julia 1.4"
     Rational arguments require Julia 1.4 or later.
 
+!!! compat "Julia 1.12"
+    More or fewer arguments than two require Julia 1.12 or later.
+
 # Examples
 ```jldoctest
 julia> gcdx(12, 42)
@@ -183,6 +213,9 @@ julia> gcdx(12, 42)
 
 julia> gcdx(240, 46)
 (2, -9, 47)
+
+julia> gcdx(15, 12, 20)
+(1, 7, -7, -1)
 ```
 
 !!! note
@@ -196,29 +229,62 @@ julia> gcdx(240, 46)
     their `typemax`, and the identity then holds only via the unsigned
     integers' modulo arithmetic.
 """
-Base.@assume_effects :terminates_locally function gcdx(a::Integer, b::Integer)
-    T = promote_type(typeof(a), typeof(b))
+Base.@assume_effects :terminates_locally function gcdx(a::T, b::T) where {T<:Integer}
+    if iszero(a) && iszero(b)
+        return (zero(T), zero(T), zero(T))
+    elseif isone(abs(b))
+        # handles (typemin(::Signed), -1)
+        return (one(T), zero(T), b)
+    elseif isone(abs(a))
+        return (one(T), a, zero(T))
+    end
     # a0, b0 = a, b
     s0, s1 = oneunit(T), zero(T)
     t0, t1 = s1, s0
     # The loop invariant is: s0*a0 + t0*b0 == a && s1*a0 + t1*b0 == b
-    x = a % T
-    y = b % T
-    while y != 0
-        q, r = divrem(x, y)
-        x, y = y, r
+    while !iszero(b)
+        q, r = divrem(a, b)
+        a, b = b, r
         s0, s1 = s1, s0 - q*s1
         t0, t1 = t1, t0 - q*t1
     end
-    x < 0 ? (-x, -s0, -t0) : (x, s0, t0)
+    # for cases like abs(Int8(-128))
+    if isnegative(a) && isnegative(abs(a))
+        throw(DomainError((a, b), LazyString("gcd not representable in ", T)))
+    else
+        return isnegative(a) ? (abs(a), -s0, -t0) : (a, s0, t0)
+    end
 end
 gcdx(a::Real, b::Real) = gcdx(promote(a,b)...)
 gcdx(a::T, b::T) where T<:Real = throw(MethodError(gcdx, (a,b)))
+gcdx(a::Real) = (gcd(a), signbit(a) ? -one(a) : one(a))
+function gcdx(a::Real, b::Real, cs::Real...)
+    # a solution to the 3-arg `gcdx(a,b,c)` problem, `u*a + v*b + w*c = gcd(a,b,c)`, can be
+    # obtained from the 2-arg problem in three steps:
+    #   1. `gcdx(a,b)`: solve `i*a + j*b = d′ = gcd(a,b)` for `(i,j)`
+    #   2. `gcdx(d′,c)`: solve `x*gcd(a,b) + yc = gcd(gcd(a,b),c) = gcd(a,b,c)` for `(x,y)`
+    #   3. return `d = gcd(a,b,c)`, `u = i*x`, `v = j*x`, and `w = y`
+    # the N-arg solution proceeds similarly by recursion
+    d, i, j = gcdx(a, b)
+    d′, x, ys... = gcdx(d, cs...)
+    return d′, i*x, j*x, ys...
+end
+
+function gcdx(a::Signed, b::Unsigned)
+    R = promote_typeof(a, b)
+    d, u, v = gcdx(promote(abs(a % signed(R)), b)...)
+    flip_typemin = isnegative(a) & (R <: Signed)
+    d, flipsign(u, a - flip_typemin), v
+end
+function gcdx(a::Unsigned, b::Signed)
+    d, v, u = gcdx(b, a)
+    d, u, v
+end
 
 # multiplicative inverse of n mod m, error if none
 
 """
-    invmod(n, m)
+    invmod(n::Integer, m::Integer)
 
 Take the inverse of `n` modulo `m`: `y` such that ``n y = 1 \\pmod m``,
 and ``div(y,m) = 0``. This will throw an error if ``m = 0``, or if
@@ -237,49 +303,113 @@ julia> invmod(5, 6)
 ```
 """
 function invmod(n::Integer, m::Integer)
+    # The postcondition is: mod(widemul(result, n), m) == mod(one(T), m) && iszero(div(result, m))
     iszero(m) && throw(DomainError(m, "`m` must not be 0."))
-    if n isa Signed && hastypemax(typeof(n))
-        # work around inconsistencies in gcdx
-        # https://github.com/JuliaLang/julia/issues/33781
-        T = promote_type(typeof(n), typeof(m))
-        n == typemin(typeof(n)) && m == typeof(n)(-1) && return T(0)
-        n == typeof(n)(-1) && m == typemin(typeof(n)) && return T(-1)
+    R = promote_typeof(n, m)
+    if R <: Signed
+        x = _bezout_coef(n, m)
+        return mod(x, m)
+    else
+        S = signed(R)
+        if !hastypemax(S) || (n <= typemax(S)) && (m <= typemax(S))
+            x = _bezout_coef(n % S, m % S)
+
+            # this branch is only hit if R <: Unsigned, so we don't have
+            # to worry about abs(typemin(::Signed)) overflow. If `m` is
+            # signed then `x` must be unsigned, and thus never negative
+            isnegative(x) && (x += abs(m))
+            return mod(x % R, m)
+        else
+            # since gcdx only promises bezout w.r.t overflow for unsigned ints,
+            # we have to widen to a signed type
+            W = widen(S)
+            x = _bezout_coef(n % W, m % W)
+            t = mod(x, m % W)
+            isnegative(m) && (t -= m)
+            return mod(t % R, m)
+        end
     end
-    g, x, y = gcdx(n, m)
+end
+
+function _bezout_coef(n, m)
+    g, x, _ = gcdx(n, m)
     g != 1 && throw(DomainError((n, m), LazyString("Greatest common divisor is ", g, ".")))
-    # Note that m might be negative here.
-    if n isa Unsigned && hastypemax(typeof(n)) && x > typemax(n)>>1
-        # x might have wrapped if it would have been negative
-        # adding back m forces a correction
-        x += m
+    return x
+end
+
+"""
+    invmod(n::Integer, T) where {T <: Base.BitInteger}
+    invmod(n::T) where {T <: Base.BitInteger}
+
+Compute the modular inverse of `n` in the integer ring of type `T`, i.e. modulo
+`2^N` where `N = 8*sizeof(T)` (e.g. `N = 32` for `Int32`). In other words, these
+methods satisfy the following identities:
+```
+n * invmod(n) == 1
+(n * invmod(n, T)) % T == 1
+(n % T) * invmod(n, T) == 1
+```
+Note that `*` here is modular multiplication in the integer ring, `T`.  This will
+throw an error if `n` is even, because then it is not relatively prime with `2^N`
+and thus has no such inverse.
+
+Specifying the modulus implied by an integer type as an explicit value is often
+inconvenient since the modulus is by definition too big to be represented by the
+type.
+
+The modular inverse is computed much more efficiently than the general case
+using the algorithm described in https://arxiv.org/pdf/2204.04342.pdf.
+
+!!! compat "Julia 1.11"
+    The `invmod(n)` and `invmod(n, T)` methods require Julia 1.11 or later.
+"""
+invmod(n::Integer, ::Type{T}) where {T<:BitInteger} = invmod(n % T)
+
+function invmod(n::T) where {T<:BitInteger}
+    isodd(n) || throw(DomainError(n, "Argument must be odd."))
+    x = (3*n ⊻ 2) % T
+    y = (1 - n*x) % T
+    for _ = 1:trailing_zeros(2*sizeof(T))
+        x *= y + true
+        y *= y
     end
-    # The postcondition is: mod(result * n, m) == mod(T(1), m) && div(result, m) == 0
-    return mod(x, m)
+    return x
 end
 
 # ^ for any x supporting *
-to_power_type(x) = convert(Base._return_type(*, Tuple{typeof(x), typeof(x)}), x)
 @noinline throw_domerr_powbysq(::Any, p) = throw(DomainError(p, LazyString(
     "Cannot raise an integer x to a negative power ", p, ".",
     "\nConvert input to float.")))
 @noinline throw_domerr_powbysq(::Integer, p) = throw(DomainError(p, LazyString(
     "Cannot raise an integer x to a negative power ", p, ".",
     "\nMake x or ", p, " a float by adding a zero decimal ",
-    "(e.g., 2.0^", p, " or 2^", float(p), " instead of 2^", p, ")",
+    "(e.g., 2.0^", p, " or 2^", float(p), " instead of 2^", p, ") ",
     "or write 1/x^", -p, ", float(x)^", p, ", x^float(", p, ") or (x//1)^", p, ".")))
 @noinline throw_domerr_powbysq(::AbstractMatrix, p) = throw(DomainError(p, LazyString(
     "Cannot raise an integer matrix x to a negative power ", p, ".",
     "\nMake x a float matrix by adding a zero decimal ",
-    "(e.g., [2.0 1.0;1.0 0.0]^", p, " instead of [2 1;1 0]^", p, ")",
+    "(e.g., [2.0 1.0;1.0 0.0]^", p, " instead of [2 1;1 0]^", p, ") ",
     "or write float(x)^", p, " or Rational.(x)^", p, ".")))
-@assume_effects :terminates_locally function power_by_squaring(x_, p::Integer)
-    x = to_power_type(x_)
+# The * keyword supports `*=checked_mul` for `checked_pow`
+@assume_effects :terminates_locally function power_by_squaring(x_, p::Integer; mul=*)
+    x_squared_ = x_ * x_
+    x_squared_type = typeof(x_squared_)
+    T = if x_ isa Number
+        promote_type(typeof(x_), x_squared_type)
+    else
+        x_squared_type
+    end
+    x = convert(T, x_)
+    square_is_useful = mul === *
     if p == 1
         return copy(x)
     elseif p == 0
         return one(x)
     elseif p == 2
-        return x*x
+        if square_is_useful  # avoid performing the same multiplication a second time when possible
+            return convert(T, x_squared_)
+        end
+        return mul(x, x)
     elseif p < 0
         isone(x) && return copy(x)
         isone(-x) && return iseven(p) ? one(x) : copy(x)
@@ -287,17 +417,22 @@ to_power_type(x) = convert(Base._return_type(*, Tuple{typeof(x), typeof(x)}), x)
     end
     t = trailing_zeros(p) + 1
     p >>= t
+    if square_is_useful  # avoid performing the same multiplication a second time when possible
+        if (t -= 1) > 0
+            x = convert(T, x_squared_)
+        end
+    end
     while (t -= 1) > 0
-        x *= x
+        x = mul(x, x)
     end
     y = x
     while p > 0
         t = trailing_zeros(p) + 1
         p >>= t
         while (t -= 1) >= 0
-            x *= x
+            x = mul(x, x)
         end
-        y *= x
+        y = mul(y, x)
     end
     return y
 end
@@ -321,7 +456,7 @@ end
 
 # Restrict inlining to hardware-supported arithmetic types, which
 # are fast enough to benefit from inlining.
-const HWReal = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64,Float32,Float64}
+const HWReal = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64}
 const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
 
 # Inline x^2 and x^3 for Val
@@ -384,34 +519,63 @@ julia> powermod(5, 3, 19)
 function powermod(x::Integer, p::Integer, m::T) where T<:Integer
     p == 0 && return mod(one(m),m)
     # When the concrete type of p is signed and has the lowest value,
-    # `p != 0 && p == -p` is equivalent to `p == typemin(typeof(p))` for 2's complement representation.
+    # `p < 0 && p == -p` is equivalent to `p == typemin(typeof(p))` for 2's complement representation.
     # but will work for integer types like `BigInt` that don't have `typemin` defined
     # It needs special handling otherwise will cause overflow problem.
-    if p == -p
-        imod = invmod(x, m)
-        rhalf = powermod(imod, -(p÷2), m)
-        r::T = mod(widemul(rhalf, rhalf), m)
-        isodd(p) && (r = mod(widemul(r, imod), m))
-        #else odd
-        return r
-    elseif p < 0
-        return powermod(invmod(x, m), -p, m)
+    if p < 0
+        if p == -p
+            imod = invmod(x, m)
+            rhalf = powermod(imod, -(p÷2), m)
+            r::T = mod(widemul(rhalf, rhalf), m)
+            isodd(p) && (r = mod(widemul(r, imod), m))
+            #else odd
+            return r
+        else
+            return powermod(invmod(x, m), -p, m)
+        end
     end
     (m == 1 || m == -1) && return zero(m)
-    b = oftype(m,mod(x,m))  # this also checks for divide by zero
-
-    t = prevpow(2, p)
-    r = 1
-    while true
-        if p >= t
-            r = mod(widemul(r,b),m)
-            p -= t
+
+    mm = uabs(m)
+    rr = one(mm)
+    bb = oftype(mm, mod(x, mm))
+
+    # legal && profitable
+    if _powermod_mi_legal(mm) && (p > 2sizeof(mm))
+        if bb == 0
+            rr = zero(mm)
+        else
+            mis = MultiplicativeInverses.multiplicativeinverse(mm)
+            Base.@assume_effects :terminates_locally while true
+                if (p & 1) != 0
+                    rr = mod(rr * bb, mis)
+                end
+                p >>= 1
+                p == 0 && break
+                bb = mod(bb * bb, mis)
+            end
+        end
+    else
+        if bb == 0
+            rr = zero(mm)
+        else
+            Base.@assume_effects :terminates_locally while true
+                if (p & 1) != 0
+                    rr = oftype(mm, mod(widemul(rr, bb), mm))
+                end
+                p >>= 1
+                p == 0 && break
+                bb = oftype(mm, mod(widemul(bb, bb), mm))
+            end
         end
-        t >>>= 1
-        t <= 0 && break
-        r = mod(widemul(r,r),m)
     end
-    return r
+    r = oftype(m, rr)
+    return (iszero(r) || (m > 0)) ? r : r + m
+end
+
+_powermod_mi_legal(::Integer) = false
+function _powermod_mi_legal(mm::T) where {T<:Unsigned}
+    return Base.hastypemax(T) && (mm <= (typemax(T) >> (sizeof(T) << 2)))
 end
 
 # optimization: promote the modulus m to BigInt only once (cf. widemul in generic powermod above)
@@ -423,7 +587,7 @@ _prevpow2(x::Unsigned) = one(x) << unsigned(top_set_bit(x)-1)
 _prevpow2(x::Integer) = reinterpret(typeof(x),x < 0 ? -_prevpow2(unsigned(-x)) : _prevpow2(unsigned(x)))
 
 """
-    ispow2(n::Number) -> Bool
+    ispow2(n::Number)::Bool
 
 Test whether `n` is an integer power of two.
 
@@ -488,7 +652,8 @@ function nextpow(a::Real, x::Real)
     n = ceil(Integer,log(a, x))
     # round-off error of log can go either direction, so need some checks
     p = a^(n-1)
-    x > typemax(p) && throw(DomainError(x,"argument is beyond the range of type of the base"))
+    hastypemax(typeof(p)) && x > typemax(p) &&
+        throw(DomainError(x,"argument is beyond the range of type of the base"))
     p >= x && return p
     wp = a^n
     wp > p || throw(OverflowError("result is beyond the range of type of the base"))
@@ -529,9 +694,10 @@ function prevpow(a::T, x::Real) where T <: Real
     n = floor(Integer,log(a, x))
     # round-off error of log can go either direction, so need some checks
     p = a^n
-    x > typemax(p) && throw(DomainError(x,"argument is beyond the range of type of the base"))
+    hastypemax(typeof(p)) && x > typemax(p) &&
+        throw(DomainError(x,"argument is beyond the range of type of the base"))
     if a isa Integer
-        wp, overflow = mul_with_overflow(a, p)
+        wp, overflow = mul_with_overflow(promote(a, p)...)
         wp <= x && !overflow && return wp
     else
         wp = a^(n+1)
@@ -558,7 +724,7 @@ function bit_ndigits0z(x::Base.BitUnsigned64)
 end
 function bit_ndigits0z(x::UInt128)
     n = 0
-    while x > 0x8ac7230489e80000
+    while x > 0x8ac7230489e80000 # 10e18
         x = div(x,0x8ac7230489e80000)
         n += 19
     end
@@ -665,7 +831,8 @@ function ndigits0z(x::Integer, b::Integer)
 end
 
 # Extends the definition in base/int.jl
-top_set_bit(x::Integer) = ceil(Integer, log2(x + oneunit(x)))
+# assume x >= 0. result is implementation-defined for negative values
+top_set_bit(x::Integer) = iszero(x) ? 0 : exponent(x) + 1
 
 """
     ndigits(n::Integer; base::Integer=10, pad::Integer=1)
@@ -704,7 +871,7 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba
 function bin(x::Unsigned, pad::Int, neg::Bool)
     m = top_set_bit(x)
     n = neg + max(pad, m)
-    a = StringVector(n)
+    a = StringMemory(n)
     # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes
     #     @inbounds a[n - i] = 0x30 + (((x >> i) % UInt8)::UInt8 & 0x1)
     # end
@@ -724,50 +891,98 @@ function bin(x::Unsigned, pad::Int, neg::Bool)
         x >>= 0x1
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 function oct(x::Unsigned, pad::Int, neg::Bool)
     m = div(top_set_bit(x) + 2, 3)
     n = neg + max(pad, m)
-    a = StringVector(n)
+    a = StringMemory(n)
     i = n
     while i > neg
         @inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x7)
         x >>= 0x3
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 # 2-digit decimal characters ("00":"99")
-const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+const _dec_d100 = UInt16[
+# generating expression: UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+#    0 0,    0 1,    0 2,    0 3, and so on in little-endian
+  0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
+  0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
+  0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
+  0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
+  0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
+  0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
+  0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
+  0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
+  0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
+  0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939
+]
 
-function dec(x::Unsigned, pad::Int, neg::Bool)
-    n = neg + ndigits(x, pad=pad)
-    a = StringVector(n)
-    i = n
-    @inbounds while i >= 2
-        d, r = divrem(x, 0x64)
-        d100 = _dec_d100[(r % Int)::Int + 1]
-        a[i-1] = d100 % UInt8
-        a[i] = (d100 >> 0x8) % UInt8
-        x = oftype(x, d)
+function append_c_digits(olength::Int, digits::Unsigned, buf, pos::Int)
+    i = olength
+    while i >= 2
+        d, c = divrem(digits, 0x64)
+        digits = oftype(digits, d)
+        @inbounds d100 = _dec_d100[(c % Int)::Int + 1]
+        @inbounds buf[pos + i - 2] = d100 % UInt8
+        @inbounds buf[pos + i - 1] = (d100 >> 0x8) % UInt8
         i -= 2
     end
-    if i > neg
-        @inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
+    if i == 1
+        @inbounds buf[pos] = UInt8('0') + rem(digits, 0xa) % UInt8
+        i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    return pos + olength
+end
+
+function append_nine_digits(digits::Unsigned, buf, pos::Int)
+    if digits == 0
+        for _ = 1:9
+            @inbounds buf[pos] = UInt8('0')
+            pos += 1
+        end
+        return pos
+    end
+    return @inline append_c_digits(9, digits, buf, pos) # force loop-unrolling on the length
+end
+
+function append_c_digits_fast(olength::Int, digits::Unsigned, buf, pos::Int)
+    i = olength
+    # n.b. olength may be larger than required to print all of `digits` (and will be padded
+    # with zeros), but the printed number will be undefined if it is smaller, and may include
+    # bits of both the high and low bytes.
+    maxpow10 = 0x3b9aca00 # 10e9 as UInt32
+    while i > 9 && digits > typemax(UInt)
+        # do everything in cheap math chunks, using the processor's native math size
+        d, c = divrem(digits, maxpow10)
+        digits = oftype(digits, d)
+        append_nine_digits(c % UInt32, buf, pos + i - 9)
+        i -= 9
+    end
+    append_c_digits(i, digits % UInt, buf, pos)
+    return pos + olength
+end
+
+
+function dec(x::Unsigned, pad::Int, neg::Bool)
+    n = neg + ndigits(x, pad=pad)
+    a = StringMemory(n)
+    append_c_digits_fast(n, x, a, 1)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 function hex(x::Unsigned, pad::Int, neg::Bool)
     m = 2 * sizeof(x) - (leading_zeros(x) >> 2)
     n = neg + max(pad, m)
-    a = StringVector(n)
+    a = StringMemory(n)
     i = n
     while i >= 2
         b = (x % UInt8)::UInt8
@@ -781,8 +996,8 @@ function hex(x::Unsigned, pad::Int, neg::Bool)
         d = (x % UInt8)::UInt8 & 0xf
         @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 const base36digits = UInt8['0':'9';'a':'z']
@@ -794,7 +1009,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
     b = (base % Int)::Int
     digits = abs(b) <= 36 ? base36digits : base62digits
     n = neg + ndigits(x, base=b, pad=pad)
-    a = StringVector(n)
+    a = StringMemory(n)
     i = n
     @inbounds while i > neg
         if b > 0
@@ -806,8 +1021,8 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
         end
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 split_sign(n::Integer) = unsigned(abs(n)), n < 0
@@ -819,7 +1034,8 @@ split_sign(n::Unsigned) = n, false
 Convert an integer `n` to a string in the given `base`,
 optionally specifying a number of digits to pad to.
 
-See also [`digits`](@ref), [`bitstring`](@ref), [`count_zeros`](@ref).
+See also [`digits`](@ref), [`bitstring`](@ref), [`count_zeros`](@ref),
+and the Printf standard library.
 
 # Examples
 ```jldoctest
@@ -828,6 +1044,14 @@ julia> string(5, base = 13, pad = 4)
 
 julia> string(-13, base = 5, pad = 4)
 "-0023"
+
+julia> using Printf
+
+julia> @sprintf("%04i", 5)
+"0005"
+
+julia> @sprintf("%4i", 5)
+"   5"
 ```
 """
 function string(n::Integer; base::Integer = 10, pad::Integer = 1)
@@ -854,7 +1078,8 @@ string(b::Bool) = b ? "true" : "false"
 """
     bitstring(n)
 
-A string giving the literal bit representation of a primitive type.
+A string giving the literal bit representation of a primitive type
+(in bigendian order, i.e. most-significant bit first).
 
 See also [`count_ones`](@ref), [`count_zeros`](@ref), [`digits`](@ref).
 
@@ -868,9 +1093,9 @@ julia> bitstring(2.2)
 ```
 """
 function bitstring(x::T) where {T}
-    isprimitivetype(T) || throw(ArgumentError("$T not a primitive type"))
+    isprimitivetype(T) || throw(ArgumentError(LazyString(T, " not a primitive type")))
     sz = sizeof(T) * 8
-    str = StringVector(sz)
+    str = StringMemory(sz)
     i = sz
     @inbounds while i >= 4
         b = UInt32(sizeof(T) == 1 ? bitcast(UInt8, x) : trunc_int(UInt8, x))
@@ -882,7 +1107,7 @@ function bitstring(x::T) where {T}
         x = lshr_int(x, 4)
         i -= 4
     end
-    return String(str)
+    return unsafe_takestring(str)
 end
 
 """
@@ -890,7 +1115,7 @@ end
 
 Return an array with element type `T` (default `Int`) of the digits of `n` in the given
 base, optionally padded with zeros to a specified size. More significant digits are at
-higher indices, such that `n == sum(digits[k]*base^(k-1) for k=1:length(digits))`.
+higher indices, such that `n == sum(digits[k]*base^(k-1) for k in 1:length(digits))`.
 
 See also [`ndigits`](@ref), [`digits!`](@ref),
 and for base 2 also [`bitstring`](@ref), [`count_ones`](@ref).
@@ -931,12 +1156,10 @@ function digits(T::Type{<:Integer}, n::Integer; base::Integer = 10, pad::Integer
 end
 
 """
-    hastypemax(T::Type) -> Bool
+    hastypemax(T::Type)::Bool
 
 Return `true` if and only if the extrema `typemax(T)` and `typemin(T)` are defined.
 """
-hastypemax(::Base.BitIntegerType) = true
-hastypemax(::Type{Bool}) = true
 hastypemax(::Type{T}) where {T} = applicable(typemax, T) && applicable(typemin, T)
 
 """
@@ -968,7 +1191,7 @@ julia> digits!([2, 2, 2, 2, 2, 2], 10, base = 2)
 function digits!(a::AbstractVector{T}, n::Integer; base::Integer = 10) where T<:Integer
     2 <= abs(base) || throw(DomainError(base, "base must be ≥ 2 or ≤ -2"))
     hastypemax(T) && abs(base) - 1 > typemax(T) &&
-        throw(ArgumentError("type $T too small for base $base"))
+        throw(ArgumentError(LazyString("type ", T, " too small for base ", base)))
     isempty(a) && return a
 
     if base > 0
@@ -1047,7 +1270,7 @@ julia> factorial(big(21))
 * [Factorial](https://en.wikipedia.org/wiki/Factorial) on Wikipedia.
 """
 function factorial(n::Integer)
-    n < 0 && throw(DomainError(n, "`n` must be nonnegative."))
+    n < 0 && throw(DomainError(n, "`n` must be non-negative."))
     f::typeof(n*n) = 1
     for i::typeof(n*n) = 2:n
         f *= i
@@ -1089,6 +1312,8 @@ julia> binomial(-5, 3)
 # External links
 * [Binomial coefficient](https://en.wikipedia.org/wiki/Binomial_coefficient) on Wikipedia.
 """
+binomial(n::Integer, k::Integer) = binomial(promote(n, k)...)
+
 Base.@assume_effects :terminates_locally function binomial(n::T, k::T) where T<:Integer
     n0, k0 = n, k
     k < 0 && return zero(T)
@@ -1148,3 +1373,102 @@ function binomial(x::Number, k::Integer)
     # and instead divide each term by i, to avoid spurious overflow.
     return prod(i -> (x-(i-1))/i, OneTo(k), init=oneunit(x)/one(k))
 end
+
+"""
+    clamp(x, lo, hi)
+
+Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
+are promoted to a common type.
+
+See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` as the first argument requires at least Julia 1.3.
+
+# Examples
+```jldoctest
+julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
+3-element Vector{BigFloat}:
+ 3.141592653589793238462643383279502884197169399375105820974944592307816406286198
+ 2.0
+ 9.0
+
+julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
+3-element Vector{Int64}:
+  6
+  6
+ 10
+```
+"""
+function clamp(x::X, lo::L, hi::H) where {X,L,H}
+    T = promote_type(X, L, H)
+    return (x > hi) ? convert(T, hi) : (x < lo) ? convert(T, lo) : convert(T, x)
+end
+
+"""
+    clamp(x, T)::T
+
+Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
+
+See also [`trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> clamp(200, Int8)
+127
+
+julia> clamp(-200, Int8)
+-128
+
+julia> trunc(Int, 4pi^2)
+39
+```
+"""
+function clamp(x, ::Type{T}) where {T<:Integer}
+    # delegating to clamp(x, typemin(T), typemax(T)) would promote types
+    # this way, we avoid unnecessary conversions
+    # think of, e.g., clamp(big(2) ^ 200, Int16)
+    lo = typemin(T)
+    hi = typemax(T)
+    return (x > hi) ? hi : (x < lo) ? lo : convert(T, x)
+end
+
+
+"""
+    clamp!(array::AbstractArray, lo, hi)
+
+Restrict values in `array` to the specified range, in-place.
+See also [`clamp`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` entries in `array` require at least Julia 1.3.
+
+# Examples
+```jldoctest
+julia> row = collect(-4:4)';
+
+julia> clamp!(row, 0, Inf)
+1×9 adjoint(::Vector{Int64}) with eltype Int64:
+ 0  0  0  0  0  1  2  3  4
+
+julia> clamp.((-4:4)', 0, Inf)
+1×9 Matrix{Float64}:
+ 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
+```
+"""
+function clamp!(x::AbstractArray, lo, hi)
+    @inbounds for i in eachindex(x)
+        x[i] = clamp(x[i], lo, hi)
+    end
+    x
+end
+
+"""
+    clamp(x::Integer, r::AbstractUnitRange)
+
+Clamp `x` to lie within range `r`.
+
+!!! compat "Julia 1.6"
+     This method requires at least Julia 1.6.
+"""
+clamp(x::Integer, r::AbstractUnitRange{<:Integer}) = clamp(x, first(r), last(r))
diff --git a/base/io.jl b/base/io.jl
index c62d6393d12ec..0f13e59baea8d 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -2,6 +2,13 @@
 
 # Generic IO stubs -- all subtypes should implement these (if meaningful)
 
+"""
+    IO
+
+Abstract supertype for input/output types.
+"""
+IO
+
 """
     EOFError()
 
@@ -25,19 +32,25 @@ end
 
 lock(::IO) = nothing
 unlock(::IO) = nothing
+
+"""
+    reseteof(io)
+
+Clear the EOF flag from IO so that further reads (and possibly writes) are
+again allowed. Note that it may immediately get re-set, if the underlying
+stream object is at EOF and cannot be resumed.
+"""
 reseteof(x::IO) = nothing
 
 const SZ_UNBUFFERED_IO = 65536
 buffer_writes(x::IO, bufsize=SZ_UNBUFFERED_IO) = x
 
 """
-    isopen(object) -> Bool
+    isopen(object)::Bool
 
-Determine whether an object - such as a stream or timer
--- is not yet closed. Once an object is closed, it will never produce a new event.
-However, since a closed stream may still have data to read in its buffer,
-use [`eof`](@ref) to check for the ability to read data.
-Use the `FileWatching` package to be notified when a stream might be writable or readable.
+Determine whether an object, such as an IO or timer, is still open and hence active.
+
+See also: [`close`](@ref)
 
 # Examples
 ```jldoctest
@@ -55,9 +68,19 @@ false
 function isopen end
 
 """
-    close(stream)
+    close(io::IO)
+
+Close `io`. Performs a [`flush`](@ref) first.
 
-Close an I/O stream. Performs a [`flush`](@ref) first.
+Closing an IO signals that its underlying resources (OS handle, network
+connections, etc) should be destroyed.
+A closed IO is in an undefined state and should not be written to or read from.
+When attempting to do so, the IO may throw an exception, continue to behave
+normally, or read/write zero bytes, depending on the implementation.
+However, implementations should make sure that reading to or writing from a
+closed IO does not cause undefined behaviour.
+
+See also: [`isopen`](@ref)
 """
 function close end
 
@@ -68,6 +91,10 @@ Shutdown the write half of a full-duplex I/O stream. Performs a [`flush`](@ref)
 first. Notify the other end that no more data will be written to the underlying
 file. This is not supported by all IO types.
 
+If implemented, `closewrite` causes subsequent `read` or `eof` calls that would
+block to instead throw EOF or return true, respectively. If the stream is
+already closed, this is idempotent.
+
 # Examples
 ```jldoctest
 julia> io = Base.BufferStream(); # this never blocks, so we can read and write on the same Task
@@ -85,9 +112,11 @@ julia> read(io, String)
 function closewrite end
 
 """
-    flush(stream)
+    flush(io::IO)
 
-Commit all currently buffered writes to the given stream.
+Commit all currently buffered writes to the given io.
+This has a default implementation `flush(::IO) = nothing`, so may be called
+in generic IO code.
 """
 function flush end
 
@@ -119,8 +148,10 @@ data has already been buffered. The result is a `Vector{UInt8}`.
 """
 function readavailable end
 
+function isexecutable end
+
 """
-    isreadable(io) -> Bool
+    isreadable(io)::Bool
 
 Return `false` if the specified IO object is not readable.
 
@@ -143,7 +174,7 @@ julia> rm("myfile.txt")
 isreadable(io::IO) = isopen(io)
 
 """
-    iswritable(io) -> Bool
+    iswritable(io)::Bool
 
 Return `false` if the specified IO object is not writable.
 
@@ -166,7 +197,7 @@ julia> rm("myfile.txt")
 iswritable(io::IO) = isopen(io)
 
 """
-    eof(stream) -> Bool
+    eof(stream)::Bool
 
 Test whether an I/O stream is at end-of-file. If the stream is not yet exhausted, this
 function will block to wait for more data if necessary, and then return `false`. Therefore
@@ -233,7 +264,7 @@ The endianness of the written value depends on the endianness of the host system
 Convert to/from a fixed endianness when writing/reading (e.g. using  [`htol`](@ref) and
 [`ltoh`](@ref)) to get results that are consistent across platforms.
 
-You can write multiple values with the same `write` call. i.e. the following are equivalent:
+You can write multiple values with the same `write` call, i.e. the following are equivalent:
 
     write(io, x, y...)
     write(io, x) + write(io, y...)
@@ -263,13 +294,13 @@ julia> io = IOBuffer();
 julia> write(io, "JuliaLang is a GitHub organization.", " It has many members.")
 56
 
-julia> String(take!(io))
+julia> takestring!(io)
 "JuliaLang is a GitHub organization. It has many members."
 
 julia> write(io, "Sometimes those members") + write(io, " write documentation.")
 44
 
-julia> String(take!(io))
+julia> takestring!(io)
 "Sometimes those members write documentation."
 ```
 User-defined plain-data types without `write` methods can be written when wrapped in a `Ref`:
@@ -337,7 +368,7 @@ peek(s) = peek(s, UInt8)::UInt8
 # Generic `open` methods
 
 """
-    open_flags(; keywords...) -> NamedTuple
+    open_flags(; keywords...)::NamedTuple
 
 Compute the `read`, `write`, `create`, `truncate`, `append` flag value for
 a given set of keyword arguments to [`open`](@ref) a [`NamedTuple`](@ref).
@@ -402,7 +433,14 @@ end
 """
     AbstractPipe
 
-`AbstractPipe` is the abstract supertype for IO pipes that provide for communication between processes.
+`AbstractPipe` is an abstract supertype that exists for the convenience of creating
+pass-through wrappers for other IO objects, so that you only need to implement the
+additional methods relevant to your type. A subtype only needs to implement one or both of
+these methods:
+
+    struct P <: AbstractPipe; ...; end
+    pipe_reader(io::P) = io.out
+    pipe_writer(io::P) = io.in
 
 If `pipe isa AbstractPipe`, it must obey the following interface:
 
@@ -522,8 +560,9 @@ julia> rm("my_file.txt")
 ```
 """
 readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String)
-readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...))
-readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...)))
+readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...))
+readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = takestring!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...))
+readuntil(stream::IO, delim::T; keep::Bool=false) where T = _copyuntil(Vector{T}(), stream, delim, keep)
 
 
 """
@@ -544,10 +583,10 @@ Similar to [`readuntil`](@ref), which returns a `String`; in contrast,
 ```jldoctest
 julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
 
-julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", 'L')))
+julia> takestring!(copyuntil(IOBuffer(), "my_file.txt", 'L'))
 "Julia"
 
-julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", '.', keep = true)))
+julia> takestring!(copyuntil(IOBuffer(), "my_file.txt", '.', keep = true))
 "JuliaLang is a GitHub organization."
 
 julia> rm("my_file.txt")
@@ -594,8 +633,7 @@ Logan
 """
 readline(filename::AbstractString; keep::Bool=false) =
     open(io -> readline(io; keep), filename)
-readline(s::IO=stdin; keep::Bool=false) =
-    String(_unsafe_take!(copyline(IOBuffer(sizehint=70), s; keep)))
+readline(s::IO=stdin; keep::Bool=false) = takestring!(copyline(IOBuffer(sizehint=16), s; keep))
 
 """
     copyline(out::IO, io::IO=stdin; keep::Bool=false)
@@ -620,10 +658,10 @@ See also [`copyuntil`](@ref) for reading until more general delimiters.
 ```jldoctest
 julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
 
-julia> String(take!(copyline(IOBuffer(), "my_file.txt")))
+julia> takestring!(copyline(IOBuffer(), "my_file.txt"))
 "JuliaLang is a GitHub organization."
 
-julia> String(take!(copyline(IOBuffer(), "my_file.txt", keep=true)))
+julia> takestring!(copyline(IOBuffer(), "my_file.txt", keep=true))
 "JuliaLang is a GitHub organization.\\n"
 
 julia> rm("my_file.txt")
@@ -746,7 +784,7 @@ htol(x)
 
 
 """
-    isreadonly(io) -> Bool
+    isreadonly(io)::Bool
 
 Determine whether a stream is read-only.
 
@@ -779,10 +817,17 @@ end
 @noinline unsafe_write(s::IO, p::Ref{T}, n::Integer) where {T} =
     unsafe_write(s, unsafe_convert(Ref{T}, p)::Ptr, n) # mark noinline to ensure ref is gc-rooted somewhere (by the caller)
 unsafe_write(s::IO, p::Ptr, n::Integer) = unsafe_write(s, convert(Ptr{UInt8}, p), convert(UInt, n))
-write(s::IO, x::Ref{T}) where {T} = unsafe_write(s, x, Core.sizeof(T))
+function write(s::IO, x::Ref{T}) where {T}
+    x isa Ptr && error("write cannot copy from a Ptr")
+    if isbitstype(T)
+        Int(unsafe_write(s, x, Core.sizeof(T)))
+    else
+        write(s, x[])
+    end
+end
 write(s::IO, x::Int8) = write(s, reinterpret(UInt8, x))
 function write(s::IO, x::Union{Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128,Float16,Float32,Float64})
-    return write(s, Ref(x))
+    return unsafe_write(s, Ref(x), Core.sizeof(x))
 end
 
 write(s::IO, x::Bool) = write(s, UInt8(x))
@@ -793,48 +838,52 @@ function write(s::IO, A::AbstractArray)
         error("`write` is not supported on non-isbits arrays")
     end
     nb = 0
+    r = Ref{eltype(A)}()
     for a in A
-        nb += write(s, a)
+        r[] = a
+        nb += @noinline unsafe_write(s, r, Core.sizeof(r)) # r must be heap-allocated
     end
     return nb
 end
 
-function write(s::IO, a::Array)
-    if isbitstype(eltype(a))
-        return GC.@preserve a unsafe_write(s, pointer(a), sizeof(a))
-    else
+function write(s::IO, A::StridedArray)
+    if !isbitstype(eltype(A))
         error("`write` is not supported on non-isbits arrays")
     end
-end
-
-function write(s::IO, a::SubArray{T,N,<:Array}) where {T,N}
-    if !isbitstype(T) || !isa(a, StridedArray)
-        return invoke(write, Tuple{IO, AbstractArray}, s, a)
+    _checkcontiguous(Bool, A) &&
+        return GC.@preserve A unsafe_write(s, pointer(A), elsize(A) * length(A))
+    sz::Dims = size(A)
+    st::Dims = strides(A)
+    msz, mst, n = merge_adjacent_dim(sz, st)
+    mst == 1 || return invoke(write, Tuple{IO, AbstractArray}, s, A)
+    n == ndims(A) &&
+        return GC.@preserve A unsafe_write(s, pointer(A), elsize(A) * length(A))
+    sz′, st′ = tail(sz), tail(st)
+    while n > 1
+        sz′ = (tail(sz′)..., 1)
+        st′ = (tail(st′)..., 0)
+        n -= 1
     end
-    elsz = elsize(a)
-    colsz = size(a,1) * elsz
-    GC.@preserve a if stride(a,1) != 1
-        for idxs in CartesianIndices(size(a))
-            unsafe_write(s, pointer(a, idxs), elsz)
-        end
-        return elsz * length(a)
-    elseif N <= 1
-        return unsafe_write(s, pointer(a, 1), colsz)
-    else
-        for colstart in CartesianIndices((1, size(a)[2:end]...))
-            unsafe_write(s, pointer(a, colstart), colsz)
+    GC.@preserve A begin
+        nb = 0
+        iter = CartesianIndices(sz′)
+        for I in iter
+            p = pointer(A)
+            for i in 1:length(sz′)
+                p += elsize(A) * st′[i] * (I[i] - 1)
+            end
+            nb += unsafe_write(s, p, elsize(A) * msz)
         end
-        return colsz * trailingsize(a,2)
+        return nb
     end
 end
 
 function write(io::IO, c::Char)
     u = bswap(reinterpret(UInt32, c))
-    n = 1
+    n = 0
     while true
-        write(io, u % UInt8)
+        n += write(io, u % UInt8)
         (u >>= 8) == 0 && return n
-        n += 1
     end
 end
 # write(io, ::AbstractChar) is not defined: implementations
@@ -855,37 +904,81 @@ end
 
 @noinline unsafe_read(s::IO, p::Ref{T}, n::Integer) where {T} = unsafe_read(s, unsafe_convert(Ref{T}, p)::Ptr, n) # mark noinline to ensure ref is gc-rooted somewhere (by the caller)
 unsafe_read(s::IO, p::Ptr, n::Integer) = unsafe_read(s, convert(Ptr{UInt8}, p), convert(UInt, n))
-read!(s::IO, x::Ref{T}) where {T} = (unsafe_read(s, x, Core.sizeof(T)); x)
+function read!(s::IO, x::Ref{T}) where {T}
+    x isa Ptr && error("read! cannot copy into a Ptr")
+    if isbitstype(T)
+        unsafe_read(s, x, Core.sizeof(T))
+    else
+        x[] = read(s, T)
+    end
+    return x
+end
 
 read(s::IO, ::Type{Int8}) = reinterpret(Int8, read(s, UInt8))
 function read(s::IO, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}})
-    return read!(s, Ref{T}(0))[]::T
+    r = Ref{T}(0)
+    unsafe_read(s, r, Core.sizeof(T))
+    return r[]
 end
 
 read(s::IO, ::Type{Bool}) = (read(s, UInt8) != 0)
 read(s::IO, ::Type{Ptr{T}}) where {T} = convert(Ptr{T}, read(s, UInt))
 
-function read!(s::IO, a::Array{UInt8})
-    GC.@preserve a unsafe_read(s, pointer(a), sizeof(a))
-    return a
+function read!(s::IO, A::AbstractArray{T}) where {T}
+    if isbitstype(T) && _checkcontiguous(Bool, A)
+        GC.@preserve A unsafe_read(s, pointer(A), elsize(A) * length(A))
+    else
+        if isbitstype(T)
+            r = Ref{T}()
+            for i in eachindex(A)
+                @noinline unsafe_read(s, r, Core.sizeof(r)) # r must be heap-allocated
+                A[i] = r[]
+            end
+        else
+            for i in eachindex(A)
+                A[i] = read(s, T)
+            end
+        end
+    end
+    return A
 end
 
-function read!(s::IO, a::AbstractArray{T}) where T
-    if isbitstype(T) && (a isa Array || a isa FastContiguousSubArray{T,<:Any,<:Array{T}})
-        GC.@preserve a unsafe_read(s, pointer(a), sizeof(a))
+function read!(s::IO, A::StridedArray{T}) where {T}
+    if !isbitstype(T) || _checkcontiguous(Bool, A)
+        return invoke(read!, Tuple{IO, AbstractArray}, s, A)
+    end
+    sz::Dims = size(A)
+    st::Dims = strides(A)
+    msz, mst, n = merge_adjacent_dim(sz, st)
+    mst == 1 || return invoke(read!, Tuple{IO, AbstractArray}, s, A)
+    if n == ndims(A)
+        GC.@preserve A unsafe_read(s, pointer(A), elsize(A) * length(A))
     else
-        for i in eachindex(a)
-            a[i] = read(s, T)
+        sz′, st′ = tail(sz), tail(st)
+        while n > 1
+            sz′ = (tail(sz′)..., 1)
+            st′ = (tail(st′)..., 0)
+            n -= 1
+        end
+        GC.@preserve A begin
+            iter = CartesianIndices(sz′)
+            for I in iter
+                p = pointer(A)
+                for i in 1:length(sz′)
+                    p += elsize(A) * st′[i] * (I[i] - 1)
+                end
+                unsafe_read(s, p, elsize(A) * msz)
+            end
         end
     end
-    return a
+    return A
 end
 
 function read(io::IO, ::Type{Char})
     b0 = read(io, UInt8)::UInt8
-    l = 8(4-leading_ones(b0))
+    l = 0x08 * (0x04 - UInt8(leading_ones(b0)))
     c = UInt32(b0) << 24
-    if l < 24
+    if l ≤ 0x10
         s = 16
         while s ≥ l && !eof(io)::Bool
             peek(io) & 0xc0 == 0x80 || break
@@ -914,6 +1007,10 @@ function copyuntil(out::IO, s::IO, delim::AbstractChar; keep::Bool=false)
 end
 
 # note: optimized methods of copyuntil for IOStreams and delim::UInt8 in iostream.jl
+#       and for IOBuffer with delim::UInt8 in iobuffer.jl
+copyuntil(out::IO, s::IO, delim; keep::Bool=false) = _copyuntil(out, s, delim, keep)
+
+# supports out::Union{IO, AbstractVector} for use with both copyuntil & readuntil
 function _copyuntil(out, s::IO, delim::T, keep::Bool) where T
     output! = isa(out, IO) ? write : push!
     for c in readeach(s, T)
@@ -925,12 +1022,6 @@ function _copyuntil(out, s::IO, delim::T, keep::Bool) where T
     end
     return out
 end
-readuntil(s::IO, delim::T; keep::Bool=false) where T =
-    _copyuntil(Vector{T}(), s, delim, keep)
-readuntil(s::IO, delim::UInt8; keep::Bool=false) =
-    _copyuntil(resize!(StringVector(70), 0), s, delim, keep)
-copyuntil(out::IO, s::IO, delim::T; keep::Bool=false) where T =
-    _copyuntil(out, s, delim, keep)
 
 # requires that indices for target are the integer unit range from firstindex to lastindex
 # returns whether the delimiter was matched
@@ -1035,7 +1126,7 @@ function copyuntil(out::IO, io::IO, target::AbstractString; keep::Bool=false)
 end
 
 function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T
-    out = (T === UInt8 ? resize!(StringVector(70), 0) : Vector{T}())
+    out = (T === UInt8 ? resize!(StringVector(16), 0) : Vector{T}())
     readuntil_vector!(io, target, keep, out)
     return out
 end
@@ -1215,7 +1306,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state)
         buf.size = _stripnewline(r.itr.keep, buf.size, buf.data)
         empty!(chunks) # will cause next iteration to terminate
         seekend(r.itr.stream) # reposition to end of stream for isdone
-        s = String(_unsafe_take!(buf))
+        s = unsafe_takestring!(buf)
     else
         # extract the string from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline]
         if ichunk == jchunk # common case: current and previous newline in same chunk
@@ -1232,7 +1323,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state)
             end
             write(buf, view(chunks[jchunk], 1:jnewline))
             buf.size = _stripnewline(r.itr.keep, buf.size, buf.data)
-            s = String(_unsafe_take!(buf))
+            s = unsafe_takestring!(buf)
 
             # overwrite obsolete chunks (ichunk+1:jchunk)
             i = jchunk
@@ -1350,7 +1441,7 @@ previously marked position. Throw an error if the stream is not marked.
 See also [`mark`](@ref), [`unmark`](@ref), [`ismarked`](@ref).
 """
 function reset(io::T) where T<:IO
-    ismarked(io) || throw(ArgumentError("$T not marked"))
+    ismarked(io) || throw(ArgumentError(LazyString(T, " not marked")))
     m = io.mark
     seek(io, m)
     io.mark = -1 # must be after seek, or seek may fail
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index deb86e774f4e4..6645ffeeb777a 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -1,40 +1,176 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-## work with AbstractVector{UInt8} via I/O primitives ##
+# IOBuffer is a Memory{UInt8} backed IO type for in-memory IO.
+
+# Here, u represents used bytes (already read), X represents bytes still to read,
+# - represents bytes uninitialized data but which can be written to later.
+# . represents bytes before offset, which the buffer will not touch, until
+# a write operation happens.
+
+#   .....uuuuuuuuuuuuuXXXXXXXXXXXXX------------
+#   |   |            |            |           |    |
+#   |   offset       ptr         size         |    maxsize
+#   1                                         lastindex(data)
+
+# N.B: `mark` does not correspond to any index in the buffer. Instead, it stores
+# the mark at virtual offset in the buffer.
+
+#            AFTER COMPACTION
+
+#   XXXXXXXXXXXXX--------------------------
+#  ||    |           |                    |    |
+#  |1    ptr         size                 |    maxsize
+#  |                                      lastindex(data)
+#  offset (set to zero)
+
+# * The underlying array is always 1-indexed
+# * The IOBuffer has full control (ownership) of the underlying array, only when
+#   buffer.write == true.
+# * Unreachable data can be deleted in the buffer's data, shifting the whole thing to the left
+#   to make room for more data, without replacing or resizing data.
+#   This can be done only if the buffer is not seekable
 
-# Stateful string
 mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
-    data::T # T should support: getindex, setindex!, length, copyto!, and resize!
-    reinit::Bool # if true, data needs to be re-allocated (after take!)
+    # T should support: getindex, setindex!, length, copyto!, similar, size and (optionally) resize!
+    data::T
+
+    # The user can take control of `data` out of this struct. When that happens, instead of eagerly allocating
+    # a new array, we set `.reinit` to true, and then allocate a new one when needed.
+    # If reinit is true, the buffer is writable, and offset_or_compacted and size is zero. See `take!`
+    reinit::Bool
     readable::Bool
     writable::Bool
-    seekable::Bool # if not seekable, implementation is free to destroy (compact) past read data
-    append::Bool # add data at end instead of at pointer
-    size::Int # end pointer (and write pointer if append == true)
-    maxsize::Int # fixed array size (typically pre-allocated)
-    ptr::Int # read (and maybe write) pointer
-    mark::Int # reset mark location for ptr (or <0 for no mark)
 
-    function GenericIOBuffer{T}(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
-                                maxsize::Integer) where T<:AbstractVector{UInt8}
-        require_one_based_indexing(data)
-        new(data,false,readable,writable,seekable,append,length(data),maxsize,1,-1)
+    # If not seekable, implementation is free to destroy (compact) data before ptr, unless
+    # it can be recovered using the mark by using `reset`.
+    # If it IS seekable, the user may always recover any data in 1:size by seeking,
+    # so no data can be destroyed.
+    # Non-seekable IOBuffers can only be constructed with `PipeBuffer`, which are writable,
+    # readable and append.
+    seekable::Bool
+
+    # If true, write new data to the index size+1 instead of the index ptr.
+    append::Bool
+
+    # Last index of `data` that has been written to. Data in size+1:end has not yet been used,
+    # and may contain arbitrary values.
+    # This value is always in 0 : lastindex(data)
+    size::Int
+
+    # When the buffer is resized, or a new buffer allocated, this is the maximum size of the buffer.
+    # A new GenericIOBuffer may be constructed with an existing data larger than `maxsize`.
+    # When that happensm we must make sure to not have more than `maxsize` bytes in the buffer,
+    # else reallocating will lose data. So, never write to indices > `maxsize + get_offset(io)`
+    # This value is always in 0:typemax(Int).
+    maxsize::Int
+
+    # Data is read/written from/to ptr, except in situations where append is true, in which case
+    # data is still read from ptr, but written to size+1.
+    # This value is always in offset + 1 : size+1
+    ptr::Int
+
+    # This field has two distinct meanings:
+    # If the value is positive, it encodes an offset of the start of the data in `data`.
+    # This is used if the buffer is instantiated from a Vector with non-zero memory offset.
+    # Then, the IOBuffer stores the underlying memory, and so the first data in the buffer
+    # is not at index 1.
+    # If the value is negative, then `-io.offset_or_compacted` gets the number of compacted
+    # bytes. That's the number of unused bytes deleted from a non-seekable stream to make space.
+    # We need to keep track of it in order to make `mark` and `position` etc work, that is,
+    # we need to know the virtual position of the mark even when an arbitrary number
+    # of unused bytes has been deleted due to compaction.
+    # Since compaction will move data in the buffer and thereby zero the offset, either the
+    # offset or the number of compacted bytes will be zero at any point, so both can be
+    # stored in one field.
+    # If offset: Value is always in 0:lastindex(data)
+    # If compacted: Value is in typemin(Int):0
+    offset_or_compacted::Int
+
+    # The mark is -1 if not set, else the zero-indexed virtual position of ptr in the buffer.
+    # Due to compaction and offset, this value is not an index into the buffer, but may be translated
+    # to an index.
+    # This value is in -1:typemax(Int)
+    mark::Int
+
+    # Unsafe constructor which does not do any checking
+    global function _new_generic_iobuffer(
+            ::Type{T},
+            data::T,
+            readable::Bool,
+            writable::Bool,
+            seekable::Bool,
+            append::Bool,
+            maxsize::Int,
+        ) where T<:AbstractVector{UInt8}
+        len = Int(length(data))::Int
+        return new{T}(data, false, readable, writable, seekable, append, len, maxsize, 1, 0, -1)
+    end
+end
+
+function GenericIOBuffer{T}(
+        data::T,
+        readable::Bool,
+        writable::Bool,
+        seekable::Bool,
+        append::Bool,
+        maxsize::Integer,
+        truncate::Bool,
+    ) where T<:AbstractVector{UInt8}
+    require_one_based_indexing(data)
+    mz = Int(maxsize)::Int
+    len = Int(length(data))::Int
+    if !truncate && mz < len
+        throw(ArgumentError("maxsize must not be smaller than data length"))
+    end
+    buf = _new_generic_iobuffer(T, data, readable, writable, seekable, append, mz)
+    if truncate
+        buf.size = buf.offset_or_compacted
     end
+    buf
 end
-const IOBuffer = GenericIOBuffer{Vector{UInt8}}
+
+const IOBuffer = GenericIOBuffer{Memory{UInt8}}
 
 function GenericIOBuffer(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
-                         maxsize::Integer) where T<:AbstractVector{UInt8}
-    GenericIOBuffer{T}(data, readable, writable, seekable, append, maxsize)
+                         maxsize::Integer, truncate::Bool) where T<:AbstractVector{UInt8}
+    GenericIOBuffer{T}(data, readable, writable, seekable, append, maxsize, truncate)
 end
 
+# For this method, we use the underlying Memory of the vector. Therefore, we need to set the,
+# ptr and size accordingly, so the buffer only uses the part of the memory that the vector does.
+function GenericIOBuffer(data::Vector{UInt8}, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
+                         maxsize::Integer, truncate::Bool)
+    ref = data.ref
+    mem = ref.mem
+    offset = memoryrefoffset(ref) - 1
+    # The user may pass a vector of length <= maxsize, but where the underlying memory
+    # is larger than maxsize. Don't throw an error in that case.
+    mz = Int(maxsize)::Int
+    if !truncate && mz < length(data)
+        throw(ArgumentError("maxsize must not be smaller than data length"))
+    end
+    buf = _new_generic_iobuffer(Memory{UInt8}, mem, readable, writable, seekable, append, mz)
+    buf.offset_or_compacted = offset
+    buf.ptr = offset + 1
+    if truncate
+        buf.size = offset
+    else
+        buf.size = length(data) + offset
+    end
+    return buf
+end
+
+get_offset(io::GenericIOBuffer) = max(0, io.offset_or_compacted)
+get_compacted(io::GenericIOBuffer) = max(0, -io.offset_or_compacted)
+
 # allocate Vector{UInt8}s for IOBuffer storage that can efficiently become Strings
-StringVector(n::Integer) = unsafe_wrap(Vector{UInt8}, _string_n(n))
+StringMemory(n::Integer) = unsafe_wrap(Memory{UInt8}, _string_n(n))
+StringVector(n::Integer) = wrap(Array, StringMemory(n))
 
 # IOBuffers behave like Files. They are typically readable and writable. They are seekable. (They can be appendable).
 
 """
-    IOBuffer([data::AbstractVector{UInt8}]; keywords...) -> IOBuffer
+    IOBuffer([data::AbstractVector{UInt8}]; keywords...)::IOBuffer
 
 Create an in-memory I/O stream, which may optionally operate on a pre-existing array.
 
@@ -46,6 +182,15 @@ It may take optional keyword arguments:
 
 When `data` is not given, the buffer will be both readable and writable by default.
 
+!!! warning "Passing `data` as scratch space to `IOBuffer` with `write=true` may give unexpected behavior"
+    Once `write` is called on an `IOBuffer`, it is best to consider any
+    previous references to `data` invalidated; in effect `IOBuffer` "owns"
+    this data until a call to `take!`. Any indirect mutations to `data`
+    could lead to undefined behavior by breaking the abstractions expected
+    by `IOBuffer`. If `write=true` the IOBuffer may store data at any
+    offset leaving behind arbitrary values at other offsets. If `maxsize > length(data)`,
+    the IOBuffer might re-allocate the data entirely, which
+    may or may not be visible in any outstanding bindings to `array`.
 # Examples
 ```jldoctest
 julia> io = IOBuffer();
@@ -53,7 +198,7 @@ julia> io = IOBuffer();
 julia> write(io, "JuliaLang is a GitHub organization.", " It has many members.")
 56
 
-julia> String(take!(io))
+julia> takestring!(io)
 "JuliaLang is a GitHub organization. It has many members."
 
 julia> io = IOBuffer(b"JuliaLang is a GitHub organization.")
@@ -71,7 +216,7 @@ IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=fa
 julia> write(io, "JuliaLang is a GitHub organization.")
 34
 
-julia> String(take!(io))
+julia> takestring!(io)
 "JuliaLang is a GitHub organization"
 
 julia> length(read(IOBuffer(b"data", read=true, truncate=false)))
@@ -89,17 +234,11 @@ function IOBuffer(
         truncate::Union{Bool,Nothing}=nothing,
         maxsize::Integer=typemax(Int),
         sizehint::Union{Integer,Nothing}=nothing)
-    if maxsize < 0
-        throw(ArgumentError("negative maxsize"))
-    end
     if sizehint !== nothing
         sizehint!(data, sizehint)
     end
     flags = open_flags(read=read, write=write, append=append, truncate=truncate)
-    buf = GenericIOBuffer(data, flags.read, flags.write, true, flags.append, Int(maxsize))
-    if flags.truncate
-        buf.size = 0
-    end
+    buf = GenericIOBuffer(data, flags.read, flags.write, true, flags.append, maxsize, flags.truncate)
     return buf
 end
 
@@ -109,24 +248,31 @@ function IOBuffer(;
         append::Union{Bool,Nothing}=nothing,
         truncate::Union{Bool,Nothing}=true,
         maxsize::Integer=typemax(Int),
-        sizehint::Union{Integer,Nothing}=nothing)
-    size = sizehint !== nothing ? Int(sizehint) : maxsize != typemax(Int) ? Int(maxsize) : 32
+        sizehint::Union{Integer,Nothing}=nothing,
+    )
+    mz = Int(maxsize)::Int
+    if mz < 0
+        throw(ArgumentError("negative maxsize"))
+    end
+    size = if sizehint !== nothing
+        # Allow negative sizehint, just like `sizehint!` does
+        min(mz, max(0, Int(sizehint)::Int))
+    else
+        min(mz, 32)
+    end
     flags = open_flags(read=read, write=write, append=append, truncate=truncate)
-    buf = IOBuffer(
-        StringVector(size),
-        read=flags.read,
-        write=flags.write,
-        append=flags.append,
-        truncate=flags.truncate,
-        maxsize=maxsize)
-    fill!(buf.data, 0)
+    # A common usecase of IOBuffer is to incrementally construct strings. By using StringMemory
+    # as the default storage, we can turn the result into a string without copying.
+    buf = _new_generic_iobuffer(Memory{UInt8}, StringMemory(size), flags.read, flags.write, true, flags.append, mz)
+    buf.size = 0
     return buf
 end
 
-# PipeBuffers behave like Unix Pipes. They are typically readable and writable, they act appendable, and are not seekable.
+# PipeBuffers behave somewhat more like Unix Pipes (than Files). They are typically readable and writable, they act appendable, and are not seekable.
+# However, they do not support stream notification, so for that there is the BufferStream wrapper around this.
 
 """
-    PipeBuffer(data::Vector{UInt8}=UInt8[]; maxsize::Integer = typemax(Int))
+    PipeBuffer(data::AbstractVector{UInt8}=UInt8[]; maxsize::Integer = typemax(Int))
 
 An [`IOBuffer`](@ref) that allows reading and performs writes by appending.
 Seeking and truncating are not supported.
@@ -134,20 +280,54 @@ See [`IOBuffer`](@ref) for the available constructors.
 If `data` is given, creates a `PipeBuffer` to operate on a data vector,
 optionally specifying a size beyond which the underlying `Array` may not be grown.
 """
-PipeBuffer(data::Vector{UInt8}=UInt8[]; maxsize::Int = typemax(Int)) =
-    GenericIOBuffer(data,true,true,false,true,maxsize)
-PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringVector(maxsize), maxsize = maxsize); x.size=0; x)
+PipeBuffer(data::AbstractVector{UInt8}=Memory{UInt8}(); maxsize::Int = typemax(Int)) =
+    GenericIOBuffer(data, true, true, false, true, maxsize, false)
+PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringMemory(maxsize), maxsize = maxsize); x.size = 0; x)
+
+# Internal method where truncation IS supported
+function _truncated_pipebuffer(data::AbstractVector{UInt8}=Memory{UInt8}(); maxsize::Int = typemax(Int))
+    buf = PipeBuffer(data)
+    buf.size = get_offset(buf)
+    buf.maxsize = maxsize
+    buf
+end
 
 _similar_data(b::GenericIOBuffer, len::Int) = similar(b.data, len)
-_similar_data(b::IOBuffer, len::Int) = StringVector(len)
-
-function copy(b::GenericIOBuffer)
-    ret = typeof(b)(b.reinit ? _similar_data(b, 0) : b.writable ?
-                    copyto!(_similar_data(b, length(b.data)), b.data) : b.data,
-                    b.readable, b.writable, b.seekable, b.append, b.maxsize)
-    ret.size = b.size
-    ret.ptr  = b.ptr
-    return ret
+_similar_data(b::IOBuffer, len::Int) = StringMemory(len)
+
+# Note: Copying may change the value of the position (and mark) for un-seekable streams.
+# However, these values are not stable anyway due to compaction.
+
+function copy(b::GenericIOBuffer{T}) where T
+    if b.reinit
+        # If buffer is used up, allocate a new size-zero buffer
+        # Reinit implies writable, and that ptr, size, offset and mark are already the default values
+        return typeof(b)(_similar_data(b, 0), b.readable, b.writable, b.seekable, b.append, b.maxsize, false)
+    elseif b.writable
+        # Else, we just copy the reachable bytes. If buffer is seekable, all bytes
+        # after offset are reachable, since they can be seeked to
+        used_span = get_used_span(b)
+        compacted = first(used_span) - get_offset(b) - 1
+        len = length(used_span)
+        data = copyto!(_similar_data(b, len), view(b.data, used_span))
+        ret = typeof(b)(data, b.readable, b.writable, b.seekable, b.append, b.maxsize, false)
+        ret.size = len
+        # Copying data over implicitly compacts, and may add compaction
+        ret.offset_or_compacted = -get_compacted(b) - compacted
+        ret.ptr = b.ptr - first(used_span) + 1
+        ret.mark = b.mark
+        return ret
+    else
+        # When the buffer is just readable, they can share the same data, so we just make
+        # a shallow copy of the IOBuffer struct.
+        # Use internal constructor because we want to allow b.maxsize to be larger than data,
+        # in case that is the case for `b`.
+        ret = _new_generic_iobuffer(T, b.data, b.readable, b.writable, b.seekable, b.append, b.maxsize)
+        ret.offset_or_compacted = b.offset_or_compacted
+        ret.ptr = b.ptr
+        ret.mark = b.mark
+        return ret
+    end
 end
 
 show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
@@ -155,9 +335,9 @@ show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
                                       "writable=", b.writable, ", ",
                                       "seekable=", b.seekable, ", ",
                                       "append=",   b.append, ", ",
-                                      "size=",     b.size, ", ",
+                                      "size=",     b.size - get_offset(b), ", ",
                                       "maxsize=",  b.maxsize == typemax(Int) ? "Inf" : b.maxsize, ", ",
-                                      "ptr=",      b.ptr, ", ",
+                                      "ptr=",      b.ptr - get_offset(b), ", ",
                                       "mark=",     b.mark, ")")
 
 @noinline function _throw_not_readable()
@@ -167,9 +347,9 @@ end
 
 function unsafe_read(from::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt)
     from.readable || _throw_not_readable()
-    avail = bytesavailable(from)
+    avail = bytesavailable(from) % UInt
     adv = min(avail, nb)
-    GC.@preserve from unsafe_copyto!(p, pointer(from.data, from.ptr), adv)
+    unsafe_read!(p, from.data, from.ptr, adv)
     from.ptr += adv
     if nb > avail
         throw(EOFError())
@@ -177,7 +357,64 @@ function unsafe_read(from::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt)
     nothing
 end
 
-function peek(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}})
+function unsafe_read!(dest::Ptr{UInt8}, src::AbstractVector{UInt8}, so::Integer, nbytes::UInt)
+    for i in 1:nbytes
+        unsafe_store!(dest, @inbounds(src[so+i-1]), i)
+    end
+end
+
+# Note: Currently, CodeUnits <: DenseVector, which makes this union redundant w.r.t
+# DenseArrayType{UInt8}, but this is a bug, and may be removed in future versions
+# of Julia. See #54002
+const DenseBytes = Union{
+    <:DenseArrayType{UInt8},
+    CodeUnits{UInt8, <:Union{String, SubString{String}}},
+}
+
+function unsafe_read!(dest::Ptr{UInt8}, src::DenseBytes, so::Integer, nbytes::UInt)
+    GC.@preserve src unsafe_copyto!(dest, pointer(src, so), nbytes)
+    nothing
+end
+
+const MultiByteBitNumberType = Union{
+    Type{UInt16},
+    Type{Int16},
+    Type{UInt32},
+    Type{Int32},
+    Type{UInt64},
+    Type{Int64},
+    Type{UInt128},
+    Type{Int128},
+    Type{Float16},
+    Type{Float32},
+    Type{Float64},
+}
+
+function load_from_array(T::MultiByteBitNumberType, data::AbstractArray{UInt8}, from::Int)
+    x = if T <: AbstractFloat
+        uinttype(T)(0)
+    else
+        unsigned(T)(0)
+    end
+    for i in 0:sizeof(x)-1
+        x |= typeof(x)(data[from + i]) << (8 * i)
+    end
+    reinterpret(T, ltoh(x))
+end
+
+function peek(from::GenericIOBuffer, T::MultiByteBitNumberType)
+    from.readable || _throw_not_readable()
+    avail = bytesavailable(from)
+    nb = sizeof(T)
+    if nb > avail
+        throw(EOFError())
+    end
+    return load_from_array(T, from.data, from.ptr)
+end
+
+# This method can use a pointer, since the underlying buffer is dense
+# and memory backed
+function peek(from::GenericIOBuffer{<:MutableDenseArrayType}, T::MultiByteBitNumberType)
     from.readable || _throw_not_readable()
     avail = bytesavailable(from)
     nb = sizeof(T)
@@ -191,29 +428,12 @@ function peek(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int3
     return x
 end
 
-function read(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}})
+function read(from::GenericIOBuffer, T::MultiByteBitNumberType)
     x = peek(from, T)
     from.ptr += sizeof(T)
     return x
 end
 
-function read_sub(from::GenericIOBuffer, a::AbstractArray{T}, offs, nel) where T
-    require_one_based_indexing(a)
-    from.readable || _throw_not_readable()
-    if offs+nel-1 > length(a) || offs < 1 || nel < 0
-        throw(BoundsError())
-    end
-    if isbitstype(T) && isa(a,Array)
-        nb = UInt(nel * sizeof(T))
-        GC.@preserve a unsafe_read(from, pointer(a, offs), nb)
-    else
-        for i = offs:offs+nel-1
-            a[i] = read(from, T)
-        end
-    end
-    return a
-end
-
 @inline function read(from::GenericIOBuffer, ::Type{UInt8})
     from.readable || _throw_not_readable()
     ptr = from.ptr
@@ -239,29 +459,66 @@ read(from::GenericIOBuffer, ::Type{Ptr{T}}) where {T} = convert(Ptr{T}, read(fro
 isreadable(io::GenericIOBuffer) = io.readable
 iswritable(io::GenericIOBuffer) = io.writable
 
-# TODO: GenericIOBuffer is not iterable, so doesn't really have a length.
-# This should maybe be sizeof() instead.
-#length(io::GenericIOBuffer) = (io.seekable ? io.size : bytesavailable(io))
+# Number of bytes that can be read from the buffer, if you seek to the start first.
+filesize(io::GenericIOBuffer) = (io.seekable ? io.size - get_offset(io) : bytesavailable(io))
+
+# Number of bytes that can be read from the buffer.
 bytesavailable(io::GenericIOBuffer) = io.size - io.ptr + 1
-position(io::GenericIOBuffer) = io.ptr-1
+
+# TODO: Document that position for an unmarked and unseekable stream is invalid (and make it error?)
+function position(io::GenericIOBuffer)
+    # Position is zero-indexed, but ptr is one-indexed, hence the -1
+    io.ptr - io.offset_or_compacted - 1
+end
 
 function skip(io::GenericIOBuffer, n::Integer)
-    seekto = io.ptr + n
-    n < 0 && return seek(io, seekto-1) # Does error checking
-    io.ptr = min(seekto, io.size+1)
-    return io
+    skip(io, clamp(n, Int))
+end
+
+function skip(io::GenericIOBuffer, n::Int)
+    # In both cases, the result will never go to before the first position,
+    # nor beyond the last position, and will not throw an error unless the stream
+    # is not seekable and try to skip a negative number of bytes.
+    if signbit(n)
+        # Skipping a negative number of bytes is equivalent to seeking backwards.
+        seekto = clamp(widen(position(io)) + widen(n), Int)
+        seek(io, seekto) # Does error checking
+    else
+        # Don't use seek in order to allow a non-seekable IO to still skip bytes.
+        # Handle overflow.
+        n_max = io.size + 1 - io.ptr
+        io.ptr += min(n, n_max)
+        io
+    end
 end
 
 function seek(io::GenericIOBuffer, n::Integer)
+    seek(io, clamp(n, Int))
+end
+
+function translate_seek_position(io::GenericIOBuffer, n::Int)
+    # If there is an offset (the field F is positive), then there are F unused bytes at the beginning
+    # of the data, and we need to seek to n + F + 1. (Also compensate for `seek` being zero-
+    # indexed)
+
+    # If bytes has been compacted (field F is negative), then F bytes has been deleted from
+    # the buffer, and a virtual position n means a position n + F in the data.
+    # Remember that F is negative, so n + F is subtracting from n. So we also end up with
+    # n + F + 1.
+    clamp(widen(n) + widen(io.offset_or_compacted) + widen(1), Int)
+end
+
+function seek(io::GenericIOBuffer, n::Int)
     if !io.seekable
         ismarked(io) || throw(ArgumentError("seek failed, IOBuffer is not seekable and is not marked"))
         n == io.mark || throw(ArgumentError("seek failed, IOBuffer is not seekable and n != mark"))
     end
+
     # TODO: REPL.jl relies on the fact that this does not throw (by seeking past the beginning or end
-    #       of an GenericIOBuffer), so that would need to be fixed in order to throw an error here
-    #(n < 0 || n > io.size) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries."))
-    #io.ptr = n+1
-    io.ptr = max(min(n+1, io.size+1), 1)
+    #       of a GenericIOBuffer), so that would need to be fixed in order to throw an error here
+    max_ptr = io.size + 1
+    min_ptr = get_offset(io) + 1
+    io.ptr = clamp(translate_seek_position(io, n), min_ptr, max_ptr)
     return io
 end
 
@@ -270,90 +527,177 @@ function seekend(io::GenericIOBuffer)
     return io
 end
 
+# Resize the io's data to `new_size`, which must not be > io.maxsize.
+# Use `resize!` if the data supports it, else reallocate a new one and
+# copy the old data over.
+# If not `exact` and resizing is not supported, overallocate in order to
+# prevent excessive resizing.
+function _resize!(io::GenericIOBuffer, new_size::Int, exact::Bool)
+    old_data = io.data
+    if applicable(resize!, old_data, new_size)
+        resize!(old_data, new_size)
+    else
+        new_size = exact ? new_size : min(io.maxsize, overallocation(new_size))
+        used_span = get_used_span(io)
+        deleted = first(used_span) - 1
+        compacted = deleted - get_offset(io)
+        new_data = _similar_data(io, new_size)
+        io.data = new_data
+        iszero(new_size) && return io
+        len_used = length(used_span)
+        iszero(len_used) || copyto!(new_data, 1, old_data, first(used_span), len_used)
+        # Copying will implicitly compact, and so compaction must be updated
+        io.offset_or_compacted = -get_compacted(io) - compacted
+        io.ptr -= deleted
+        io.size = len_used
+    end
+    return io
+end
+
 function truncate(io::GenericIOBuffer, n::Integer)
     io.writable || throw(ArgumentError("truncate failed, IOBuffer is not writeable"))
+    # Non-seekable buffers can only be constructed with `PipeBuffer`, which is explicitly
+    # documented to not be truncatable.
     io.seekable || throw(ArgumentError("truncate failed, IOBuffer is not seekable"))
     n < 0 && throw(ArgumentError("truncate failed, n bytes must be ≥ 0, got $n"))
     n > io.maxsize && throw(ArgumentError("truncate failed, $(n) bytes is exceeds IOBuffer maxsize $(io.maxsize)"))
+    n = Int(n)::Int
+    offset = get_offset(io)
+    current_size = io.size - offset
     if io.reinit
-        io.data = _similar_data(io, n)
+        # If reinit, we don't need to truncate anything but just reinitializes
+        # the buffer with zeros. Mark, ptr and offset has already been reset.
+        io.data = fill!(_similar_data(io, n), 0x00)
         io.reinit = false
-    elseif n > length(io.data)
-        resize!(io.data, n)
+        io.size = n
+    elseif n < current_size
+        # Else, if we need to shrink the iobuffer, we simply change the pointers without
+        # actually shrinking the underlying storage, or copying data.
+
+        # Clear the mark if it points to data that has now been deleted.
+        if translate_seek_position(io, io.mark) > n+offset
+            io.mark = -1
+        end
+        io.size = n + offset
+        io.ptr = min(io.ptr, n + offset + 1)
+    elseif n > current_size
+        if n + offset > io.maxsize
+            compact!(io)
+        end
+        _resize!(io, n + get_offset(io), false)
+        fill!(view(io.data, io.size + 1:min(length(io.data), n + get_offset(io))), 0x00)
+        io.size = min(length(io.data), n + get_offset(io))
     end
-    io.data[io.size+1:n] .= 0
-    io.size = n
-    io.ptr = min(io.ptr, n+1)
-    ismarked(io) && io.mark > n && unmark(io)
     return io
 end
 
-function compact(io::GenericIOBuffer)
-    io.writable || throw(ArgumentError("compact failed, IOBuffer is not writeable"))
-    io.seekable && throw(ArgumentError("compact failed, IOBuffer is seekable"))
-    local ptr::Int, bytes_to_move::Int
-    if ismarked(io) && io.mark < io.ptr
-        if io.mark == 0 return end
-        ptr = io.mark
-        bytes_to_move = bytesavailable(io) + (io.ptr-io.mark)
-    else
-        ptr = io.ptr
-        bytes_to_move = bytesavailable(io)
+# Ensure that the buffer has room for at least `nshort` more bytes, except when
+# doing that would exceed maxsize.
+@inline ensureroom(io::GenericIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort))
+
+@inline function ensureroom(io::GenericIOBuffer, nshort::UInt)
+    # If the IO is not writable, we call the slow path only to error.
+    # If reinit, the data has been handed out to the user, and the IOBuffer
+    # no longer controls it, so we need to allocate a new one.
+    if !io.writable || io.reinit
+        return ensureroom_reallocate(io, nshort)
+    end
+    # The fast path here usually checks there is already room, then does nothing.
+    # When append is true, new data is added after io.size, not io.ptr
+    start_offset = io.append ? io.size : io.ptr - 1
+    existing_space = min(lastindex(io.data) - start_offset, io.maxsize - (start_offset - get_offset(io)))
+    if existing_space < nshort % Int
+        # Outline this function to make it more likely that ensureroom inlines itself
+        return ensureroom_slowpath(io, nshort, existing_space)
     end
-    copyto!(io.data, 1, io.data, ptr, bytes_to_move)
-    io.size -= ptr - 1
-    io.ptr -= ptr - 1
-    io.mark -= ptr - 1
     return io
 end
 
-@noinline function ensureroom_slowpath(io::GenericIOBuffer, nshort::UInt)
+# Throw error (placed in this function to outline it) or reinit the buffer
+@noinline function ensureroom_reallocate(io::GenericIOBuffer, nshort::UInt)
     io.writable || throw(ArgumentError("ensureroom failed, IOBuffer is not writeable"))
-    if !io.seekable
-        if !ismarked(io) && io.ptr > 1 && io.size <= io.ptr - 1
-            io.ptr = 1
-            io.size = 0
-        else
-            datastart = ismarked(io) ? io.mark : io.ptr
-            if (io.size+nshort > io.maxsize) ||
-                (datastart > 4096 && datastart > io.size - io.ptr) ||
-                (datastart > 262144)
-                # apply somewhat arbitrary heuristics to decide when to destroy
-                # old, read data to make more room for new data
-                compact(io)
-            end
-        end
-    end
-    return
+    io.data = _similar_data(io, min(io.maxsize, nshort % Int))
+    io.reinit = false
+    io.offset_or_compacted = -get_compacted(io)
+    return io
 end
 
-@inline ensureroom(io::GenericIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort))
-@inline function ensureroom(io::GenericIOBuffer, nshort::UInt)
-    if !io.writable || (!io.seekable && io.ptr > 1)
-        ensureroom_slowpath(io, nshort)
+# Here, we already know there is not enough room at the end of the io's data.
+@noinline function ensureroom_slowpath(io::GenericIOBuffer, nshort::UInt, available_bytes::Int)
+    reclaimable_bytes = first(get_used_span(io)) - 1
+    # Avoid resizing and instead compact the buffer, only if we gain enough bytes from
+    # doing so (at least 32 bytes and 1/8th of the data length). Also, if we would have
+    # to resize anyway, there would be no point in compacting, so also check that.
+    if (
+            reclaimable_bytes ≥ 32 &&
+            reclaimable_bytes ≥ length(io.data) >>> 3 &&
+            (reclaimable_bytes + available_bytes) % UInt ≥ nshort
+        )
+        compact!(io)
+        return io
     end
-    n = min((nshort % Int) + (io.append ? io.size : io.ptr-1), io.maxsize)
-    if io.reinit
-        io.data = _similar_data(io, n)
-        io.reinit = false
-    else
-        l = length(io.data)
-        if n > l
-            _growend!(io.data, (n - l) % UInt)
+
+    desired_size = length(io.data) + Int(nshort) - available_bytes
+    if desired_size > io.maxsize
+        # If we can't fit all the requested data in the new buffer, we need to
+        # fit as much as possible, so we must compact
+        if !iszero(reclaimable_bytes)
+            desired_size -= compact!(io)
         end
+        # Max out the buffer size if we want more than the buffer size
+        if length(io.data) < io.maxsize
+            _resize!(io, io.maxsize, true)
+        end
+    else
+        # Else, we request only the requested size, but set `exact` to `false`,
+        # in order to overallocate to avoid growing the buffer by too little
+        _resize!(io, desired_size, false)
     end
+
     return io
 end
 
-eof(io::GenericIOBuffer) = (io.ptr-1 == io.size)
+# Get the indices in data which cannot be deleted
+function get_used_span(io::IOBuffer)
+    # A seekable buffer can recover data before ptr
+    return if io.seekable
+        get_offset(io) + 1 : io.size
+    # If non-seekable, the mark can be used to recover data before ptr,
+    # so data at the mark and after must also be saved
+    elseif io.mark > -1
+        min(io.ptr, translate_seek_position(io, io.mark)) : io.size
+    else
+        io.ptr : io.size
+    end
+end
+
+# Delete any offset, and also compact data if buffer is not seekable.
+# Return the number of bytes deleted
+function compact!(io::GenericIOBuffer)::Int
+    offset = get_offset(io)
+    used_span = get_used_span(io)
+    deleted = first(used_span) - 1
+    compacted = deleted - offset
+    iszero(deleted) && return 0
+    data = io.data
+    copyto!(data, 1, data, deleted + 1, length(used_span))
+    io.offset_or_compacted = -get_compacted(io) - compacted
+    io.ptr -= deleted
+    io.size -= deleted
+    return deleted
+end
+
+eof(io::GenericIOBuffer) = (io.ptr - 1 >= io.size)
 
 function closewrite(io::GenericIOBuffer)
     io.writable = false
-    # OR throw(_UVError("closewrite", UV_ENOTSOCK))
     nothing
 end
 
 @noinline function close(io::GenericIOBuffer{T}) where T
+    if io.writable && !io.reinit
+        _resize!(io, 0, true)
+    end
     io.readable = false
     io.writable = false
     io.seekable = false
@@ -361,9 +705,7 @@ end
     io.maxsize = 0
     io.ptr = 1
     io.mark = -1
-    if io.writable
-        resize!(io.data, 0)
-    end
+    io.offset_or_compacted = -get_compacted(io)
     nothing
 end
 
@@ -386,51 +728,136 @@ julia> String(take!(io))
 ```
 """
 function take!(io::GenericIOBuffer)
-    ismarked(io) && unmark(io)
+    io.mark = -1
     if io.seekable
-        nbytes = io.size
-        data = copyto!(StringVector(nbytes), 1, io.data, 1, nbytes)
+        # If the buffer is seekable, then the previously consumed bytes from ptr+1:size
+        # must still be output, as they are not truly gone.
+        # Hence, we output all bytes from 1:io.size
+        offset = get_offset(io)
+        nbytes = io.size - offset
+        data = copyto!(StringVector(nbytes), 1, io.data, offset + 1, nbytes)
     else
+        # Else, if not seekable, bytes from 1:ptr-1 are truly gone and should not
+        # be output. Hence, we output `bytesavailable`, which is ptr:size
         nbytes = bytesavailable(io)
-        data = read!(io,StringVector(nbytes))
+        data = read!(io, StringVector(nbytes))
     end
     if io.writable
+        io.reinit = true
         io.ptr = 1
         io.size = 0
+        io.offset_or_compacted = 0
     end
     return data
 end
+
+# This method is specialized because we know the underlying data is a Memory, so we can
+# e.g. wrap directly in an array without copying. Otherwise the logic is the same as
+# the generic method
 function take!(io::IOBuffer)
-    ismarked(io) && unmark(io)
+    io.mark = -1
     if io.seekable
-        if io.writable
-            if io.reinit
-                data = StringVector(0)
-            else
-                data = resize!(io.data, io.size)
-                io.reinit = true
-            end
+        nbytes = filesize(io)
+        if nbytes == 0 || io.reinit
+            data = StringVector(0)
+        elseif io.writable
+            data = wrap(Array, memoryref(io.data, get_offset(io) + 1), nbytes)
         else
-            data = copyto!(StringVector(io.size), 1, io.data, 1, io.size)
+            data = copyto!(StringVector(nbytes), 1, io.data, get_offset(io) + 1, nbytes)
         end
     else
         nbytes = bytesavailable(io)
-        if io.writable
-            data = io.data
-            io.reinit = true
-            _deletebeg!(data, io.ptr-1)
-            resize!(data, nbytes)
+        if nbytes == 0
+            data = StringVector(0)
+        elseif io.writable
+            data = wrap(Array, memoryref(io.data, io.ptr), nbytes)
         else
-            data = read!(io, StringVector(nbytes))
+            error("Unreachable IOBuffer state")
         end
     end
     if io.writable
+        io.reinit = true
         io.ptr = 1
         io.size = 0
+        io.offset_or_compacted = 0
     end
     return data
 end
 
+"Internal method. This method can be faster than takestring!, because it does not
+reset the buffer to a usable state, and it does not check for io.reinit.
+Using the buffer after calling unsafe_takestring! may cause undefined behaviour.
+This function is meant to be used when the buffer is only used as a temporary
+string builder, which is discarded after the string is built."
+function unsafe_takestring!(io::IOBuffer)
+    used_span = get_used_span(io)
+    nbytes = length(used_span)
+    from = first(used_span)
+    isempty(used_span) && return ""
+    # The C function can only copy from the start of the memory.
+    # Fortunately, in most cases, the offset will be zero.
+    return if isone(from)
+        ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), io.data, nbytes)
+    else
+        mem = StringMemory(nbytes % UInt)
+        unsafe_copyto!(mem, 1, io.data, from, nbytes)
+        unsafe_takestring(mem)
+    end
+end
+
+"""
+    takestring!(io::IOBuffer) -> String
+
+Return the content of `io` as a `String`, resetting the buffer to its initial
+state.
+This is preferred over calling `String(take!(io))` to create a string from
+an `IOBuffer`.
+
+# Examples
+```jldoctest
+julia> io = IOBuffer();
+
+julia> write(io, [0x61, 0x62, 0x63]);
+
+julia> s = takestring!(io)
+"abc"
+
+julia> isempty(take!(io)) # io is now empty
+true
+```
+
+!!! compat "Julia 1.13"
+    This function requires at least Julia 1.13.
+"""
+function takestring!(io::IOBuffer)
+    # If the buffer has been used up and needs to be replaced, there are no bytes, and
+    # we can return an empty string without interacting with the buffer at all.
+    io.reinit && return ""
+
+    # If the iobuffer is writable, taking will remove the buffer from `io`.
+    # So, we reset the iobuffer, and directly unsafe takestring.
+    return if io.writable
+        s = unsafe_takestring!(io)
+        io.reinit = true
+        io.mark = -1
+        io.ptr = 1
+        io.size = 0
+        io.offset_or_compacted = 0
+        s
+    else
+        # If the buffer is not writable, taking will NOT remove the buffer,
+        # so if we just converted the buffer to a string, garbage collecting
+        # the string would free the memory underneath the iobuffer
+        used_span = get_used_span(io)
+        mem = StringMemory(length(used_span))
+        unsafe_copyto!(mem, 1, io.data, first(used_span), length(used_span))
+        unsafe_takestring(mem)
+    end
+end
+
+# Fallback methods
+takestring!(io::GenericIOBuffer) = String(take!(io))
+
 """
     _unsafe_take!(io::IOBuffer)
 
@@ -440,43 +867,83 @@ state.  This should only be used internally for performance-critical
 `String` routines that immediately discard `io` afterwards, and it
 *assumes* that `io` is writable and seekable.
 
-It saves no allocations compared to `take!`, it just omits some checks.
+It might save an allocation compared to `take!` (if the compiler elides the
+Array allocation), as well as omits some checks.
 """
-_unsafe_take!(io::IOBuffer) = resize!(io.data, io.size)
+function _unsafe_take!(io::IOBuffer)
+    offset = get_offset(io)
+    mem = if io.size == offset
+        memoryref(Memory{UInt8}())
+    else
+        memoryref(io.data, offset + 1)
+    end
+    wrap(Array, mem, io.size - offset)
+end
 
 function write(to::IO, from::GenericIOBuffer)
+    # This would cause an infinite loop, as it should read until the end, but more
+    # data is being written into it continuously.
     if to === from
+        throw(ArgumentError("Writing all content fron an IOBuffer into itself in invalid"))
+    else
+        from.readable || _throw_not_readable()
+        written = write(to, view(from.data, from.ptr:from.size))
         from.ptr = from.size + 1
-        return 0
     end
-    written::Int = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(bytesavailable(from)))
-    from.ptr += written
     return written
 end
 
 function unsafe_write(to::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt)
     ensureroom(to, nb)
-    ptr = (to.append ? to.size+1 : to.ptr)
-    written = Int(min(nb, Int(length(to.data))::Int - ptr + 1))
-    towrite = written
-    d = to.data
-    while towrite > 0
-        @inbounds d[ptr] = unsafe_load(p)
-        ptr += 1
+    size = to.size
+    append = to.append
+    ptr = append ? size+1 : to.ptr
+    data = to.data
+    start_offset = ptr - 1
+    to_write = max(0, min(nb, (min(Int(length(data))::Int - start_offset, to.maxsize - (start_offset - get_offset(to)))) % UInt) % Int)
+    # Dispatch based on the type of data, to possibly allow using memcpy
+    _unsafe_write(data, p, ptr, to_write % UInt)
+    # Update to.size only if the ptr has advanced to higher than
+    # the previous size. Otherwise, we just overwrote existing data
+    to.size = max(size, start_offset + to_write)
+    # If to.append, we only update size, not ptr.
+    if !append
+        to.ptr = ptr + to_write
+    end
+    return to_write
+end
+
+@inline function _unsafe_write(data::AbstractVector{UInt8}, p::Ptr{UInt8}, from::Int, nb::UInt)
+    for i in 0:nb-1
+        data[from + i] = unsafe_load(p)
         p += 1
-        towrite -= 1
     end
-    to.size = max(to.size, ptr - 1)
-    if !to.append
-        to.ptr += written
+end
+
+@inline function _unsafe_write(data::MutableDenseArrayType{UInt8}, p::Ptr{UInt8}, from::Int, nb::UInt)
+    # Calling `unsafe_copyto!` is very efficient for large arrays, but has some overhead
+    # for small (< 5 bytes) arrays.
+    # Since a common use case of IOBuffer is to construct strings incrementally, often
+    # one char at a time, it's crucial to be fast in the case of small arrays.
+    # This optimization only gives a minor 10% speed boost in the best case.
+    if nb < 5
+        @inbounds for i in UInt(1):nb
+            data[from + (i % Int) - 1] = unsafe_load(p, i)
+        end
+    else
+        GC.@preserve data begin
+            ptr = Ptr{UInt8}(pointer(data, from))::Ptr{UInt8}
+            @inline unsafe_copyto!(ptr, p, nb)
+        end
     end
-    return written
 end
 
 @inline function write(to::GenericIOBuffer, a::UInt8)
     ensureroom(to, UInt(1))
     ptr = (to.append ? to.size+1 : to.ptr)
-    if ptr > to.maxsize
+    # We have just ensured there is room for 1 byte, EXCEPT if we were to exceed
+    # maxsize. So, we just need to check that here.
+    if ptr - get_offset(to) > to.maxsize
         return 0
     else
         to.data[ptr] = a
@@ -488,32 +955,27 @@ end
     return sizeof(UInt8)
 end
 
-readbytes!(io::GenericIOBuffer, b::Array{UInt8}, nb=length(b)) = readbytes!(io, b, Int(nb))
-function readbytes!(io::GenericIOBuffer, b::Array{UInt8}, nb::Int)
-    nr = min(nb, bytesavailable(io))
-    if length(b) < nr
-        resize!(b, nr)
+readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb=length(b)) = readbytes!(io, b, Int(nb)::Int)
+
+function readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb::Int)
+    io.readable || _throw_not_readable()
+    to_read = min(nb, bytesavailable(io))
+    if length(b) < to_read
+        resize!(b, to_read)
     end
-    read_sub(io, b, 1, nr)
-    return nr
+    checkbounds(b, 1:to_read)
+    GC.@preserve b unsafe_read(io, pointer(b), to_read)
+    to_read
 end
-read(io::GenericIOBuffer) = read!(io,StringVector(bytesavailable(io)))
+read(io::GenericIOBuffer) = read!(io, StringVector(bytesavailable(io)))
+
+# For IO buffers, all the data is immediately available.
 readavailable(io::GenericIOBuffer) = read(io)
-read(io::GenericIOBuffer, nb::Integer) = read!(io,StringVector(min(nb, bytesavailable(io))))
 
-function occursin(delim::UInt8, buf::IOBuffer)
-    p = pointer(buf.data, buf.ptr)
-    q = GC.@preserve buf ccall(:memchr,Ptr{UInt8},(Ptr{UInt8},Int32,Csize_t),p,delim,bytesavailable(buf))
-    return q != C_NULL
-end
+read(io::GenericIOBuffer, nb::Integer) = read!(io, StringVector(min(nb, bytesavailable(io))))
 
 function occursin(delim::UInt8, buf::GenericIOBuffer)
-    data = buf.data
-    for i = buf.ptr:buf.size
-        @inbounds b = data[i]
-        b == delim && return true
-    end
-    return false
+    return in(delim, view(buf.data, buf.ptr:buf.size))
 end
 
 function copyuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
@@ -530,28 +992,53 @@ function copyuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
 end
 
 function copyline(out::GenericIOBuffer, s::IO; keep::Bool=false)
-    copyuntil(out, s, 0x0a, keep=true)
-    line = out.data
-    i = out.size
-    if keep || i == 0 || line[i] != 0x0a
+    # If the data is copied into the middle of the buffer of `out` instead of appended to the end,
+    # and !keep, and the line copied ends with \r\n, then the copyuntil (even if keep=false)
+    # will overwrite one too many bytes with the new \r byte.
+    # Work around this by making a new temporary buffer.
+    # Could perhaps be done better
+    if !out.append && out.ptr < out.size + 1
+        newbuf = IOBuffer()
+        copyuntil(newbuf, s, 0x0a, keep=true)
+        v = take!(newbuf)
+        # Remove \r\n or \n if present
+        if !keep
+            if length(v) > 1 && last(v) == UInt8('\n')
+                pop!(v)
+            end
+            if length(v) > 1 && last(v) == UInt8('\r')
+                pop!(v)
+            end
+        end
+        write(out, v)
         return out
-    elseif i < 2 || line[i-1] != 0x0d
-        i -= 1
     else
-        i -= 2
-    end
-    out.size = i
-    if !out.append
-        out.ptr = i+1
+        # Else, we can just copy the data directly into the buffer, and then
+        # subtract the last one or two bytes depending on `keep`.
+        copyuntil(out, s, 0x0a, keep=true)
+        line = out.data
+        i = out.size
+        if keep || i == out.offset_or_compacted || line[i] != 0x0a
+            return out
+        elseif i < 2 || line[i-1] != 0x0d
+            i -= 1
+        else
+            i -= 2
+        end
+        out.size = i
+        if !out.append
+            out.ptr = i+1
+        end
+        return out
     end
-    return out
 end
 
 function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false)
     data = view(io.data, io.ptr:io.size)
     # note: findfirst + copyto! is much faster than a single loop
     #       except for nout ≲ 20.  A single loop is 2x faster for nout=5.
-    nout = nread = something(findfirst(==(0x0a), data), length(data))
+    nout = nread = something(findfirst(==(0x0a), data), length(data))::Int
+    # Remove the 0x0a (newline) if not keep, and also remove the 0x0d (\r) if it is there
     if !keep && nout > 0 && data[nout] == 0x0a
         nout -= 1
         nout > 0 && data[nout] == 0x0d && (nout -= 1)
@@ -560,6 +1047,7 @@ function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false)
     io.ptr += nread
     return out
 end
+
 copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep)
 copyline(out::GenericIOBuffer, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep)
 
diff --git a/base/iostream.jl b/base/iostream.jl
index f5a8c0a8dffc8..87b29716ef036 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -12,13 +12,13 @@ Mostly used to represent files returned by [`open`](@ref).
 """
 mutable struct IOStream <: IO
     handle::Ptr{Cvoid}
-    ios::Array{UInt8,1}
+    ios::Vector{UInt8}
     name::String
     mark::Int64
     lock::ReentrantLock
     _dolock::Bool
 
-    IOStream(name::AbstractString, buf::Array{UInt8,1}) = new(pointer(buf), buf, name, -1, ReentrantLock(), true)
+    IOStream(name::AbstractString, buf::Vector{UInt8}) = new(pointer(buf), buf, name, -1, ReentrantLock(), true)
 end
 
 function IOStream(name::AbstractString, finalize::Bool)
@@ -47,15 +47,44 @@ macro _lock_ios(s, expr)
 end
 
 """
-    fd(stream)
+    fd(x)::RawFD
 
-Return the file descriptor backing the stream or file. Note that this function only applies
-to synchronous `File`'s and `IOStream`'s not to any of the asynchronous streams.
+Return the file descriptor backing the stream, file, or socket.
+
+`RawFD` objects can be passed directly to other languages via the `ccall` interface.
+
+!!! compat "Julia 1.12"
+    Prior to 1.12, this function returned an `Int` instead of a `RawFD`. You may use
+    `RawFD(fd(x))` to produce a `RawFD` in all Julia versions.
+
+!!! compat "Julia 1.12"
+    Getting the file descriptor of sockets are supported as of Julia 1.12.
+
+!!! warning
+    Duplicate the returned file descriptor with [`Libc.dup()`](@ref) before
+    passing it to another system that will take ownership of it (e.g. a C
+    library). Otherwise both the Julia object `x` and the other system may try
+    to close the file descriptor, which will cause errors.
+
+!!! warning
+    The file descriptors for sockets are asynchronous (i.e. `O_NONBLOCK` on
+    POSIX and `OVERLAPPED` on Windows), they may behave differently than regular
+    file descriptors.
 """
-fd(s::IOStream) = Int(ccall(:jl_ios_fd, Clong, (Ptr{Cvoid},), s.ios))
+fd(s::IOStream) = RawFD(ccall(:jl_ios_fd, Clong, (Ptr{Cvoid},), s.ios))
 
 stat(s::IOStream) = stat(fd(s))
 
+"""
+    isopen(s::IOStream)
+
+Check if the stream is not yet closed.
+
+A closed `IOStream` may still have data to read in its buffer,
+use [`eof`](@ref) to check for the ability to read data.
+
+Use the `FileWatching` package to be notified when a file might be writable or readable.
+"""
 isopen(s::IOStream) = ccall(:ios_isopen, Cint, (Ptr{Cvoid},), s.ios) != 0
 
 function close(s::IOStream)
@@ -63,6 +92,8 @@ function close(s::IOStream)
     systemerror("close", bad)
 end
 
+closewrite(s::IOStream) = nothing
+
 function flush(s::IOStream)
     sigatomic_begin()
     bad = @_lock_ios s ccall(:ios_flush, Cint, (Ptr{Cvoid},), s.ios) != 0
@@ -90,7 +121,7 @@ julia> write(io, "JuliaLang is a GitHub organization.")
 julia> truncate(io, 15)
 IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=false, size=15, maxsize=Inf, ptr=16, mark=-1)
 
-julia> String(take!(io))
+julia> takestring!(io)
 "JuliaLang is a "
 
 julia> io = IOBuffer();
@@ -99,7 +130,7 @@ julia> write(io, "JuliaLang is a GitHub organization.");
 
 julia> truncate(io, 40);
 
-julia> String(take!(io))
+julia> takestring!(io)
 "JuliaLang is a GitHub organization.\\0\\0\\0\\0\\0"
 ```
 """
@@ -222,8 +253,8 @@ end
 function filesize(s::IOStream)
     sz = @_lock_ios s ccall(:ios_filesize, Int64, (Ptr{Cvoid},), s.ios)
     if sz == -1
-        err = Libc.errno()
-        throw(IOError(string("filesize: ", Libc.strerror(err), " for ", s.name), err))
+        # if `s` is not seekable `ios_filesize` can fail, so fall back to slower stat method
+        sz = filesize(stat(s))
     end
     return sz
 end
@@ -236,7 +267,7 @@ eof(s::IOStream) = @_lock_ios s _eof_nolock(s)
 # "own" means the descriptor will be closed with the IOStream
 
 """
-    fdio([name::AbstractString, ]fd::Integer[, own::Bool=false]) -> IOStream
+    fdio([name::AbstractString, ]fd::Integer[, own::Bool=false])::IOStream
 
 Create an [`IOStream`](@ref) object from an integer file descriptor. If `own` is `true`, closing
 this object will close the underlying descriptor. By default, an `IOStream` is closed when
@@ -251,7 +282,7 @@ end
 fdio(fd::Integer, own::Bool=false) = fdio(string("<fd ",fd,">"), fd, own)
 
 """
-    open(filename::AbstractString; lock = true, keywords...) -> IOStream
+    open(filename::AbstractString; lock = true, keywords...)::IOStream
 
 Open a file in a mode specified by five boolean keyword arguments:
 
@@ -290,19 +321,22 @@ function open(fname::String; lock = true,
     if !lock
         s._dolock = false
     end
-    systemerror("opening file $(repr(fname))",
-                ccall(:ios_file, Ptr{Cvoid},
-                      (Ptr{UInt8}, Cstring, Cint, Cint, Cint, Cint),
-                      s.ios, fname, flags.read, flags.write, flags.create, flags.truncate) == C_NULL)
+    if ccall(:ios_file, Ptr{Cvoid},
+             (Ptr{UInt8}, Cstring, Cint, Cint, Cint, Cint),
+             s.ios, fname, flags.read, flags.write, flags.create, flags.truncate) == C_NULL
+        systemerror("opening file $(repr(fname))")
+    end
     if flags.append
-        systemerror("seeking to end of file $fname", ccall(:ios_seek_end, Int64, (Ptr{Cvoid},), s.ios) != 0)
+        if ccall(:ios_seek_end, Int64, (Ptr{Cvoid},), s.ios) != 0
+            systemerror("seeking to end of file $fname")
+        end
     end
     return s
 end
 open(fname::AbstractString; kwargs...) = open(convert(String, fname)::String; kwargs...)
 
 """
-    open(filename::AbstractString, [mode::AbstractString]; lock = true) -> IOStream
+    open(filename::AbstractString, [mode::AbstractString]; lock = true)::IOStream
 
 Alternate syntax for open, where a string-based mode specifier is used instead of the five
 booleans. The values of `mode` correspond to those from `fopen(3)` or Perl `open`, and are
@@ -436,7 +470,7 @@ take!(s::IOStream) =
     @_lock_ios s ccall(:jl_take_buffer, Vector{UInt8}, (Ptr{Cvoid},), s.ios)
 
 function readuntil(s::IOStream, delim::UInt8; keep::Bool=false)
-    @_lock_ios s ccall(:jl_readuntil, Array{UInt8,1}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 0, !keep)
+    @_lock_ios s ccall(:jl_readuntil, Vector{UInt8}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 0, !keep)
 end
 
 # like readuntil, above, but returns a String without requiring a copy
@@ -444,35 +478,33 @@ function readuntil_string(s::IOStream, delim::UInt8, keep::Bool)
     @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 1, !keep)
 end
 readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) =
-    delim ≤ '\x7f' ? readuntil_string(s, delim % UInt8, keep) :
-    String(unsafe_take!(copyuntil(IOBuffer(sizehint=70), s, delim; keep)))
+    isascii(delim) ? readuntil_string(s, delim % UInt8, keep) :
+    takestring!(copyuntil(IOBuffer(sizehint=70), s, delim; keep))
 
 function readline(s::IOStream; keep::Bool=false)
     @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2)
 end
 
 function copyuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false)
-    ensureroom(out, 16)
-    ptr = (out.append ? out.size+1 : out.ptr)
-    d = out.data
-    len = length(d)
+    ensureroom(out, 1) # make sure we can read at least 1 byte, for iszero(n) check below
     while true
+        d = out.data
+        len = length(d)
+        ptr = (out.append ? out.size+1 : out.ptr)
         GC.@preserve d @_lock_ios s n=
             Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t),
                 s.ios, delim, pointer(d, ptr), (len - ptr + 1) % Csize_t))
         iszero(n) && break
         ptr += n
-        if d[ptr-1] == delim
-            keep || (ptr -= 1)
-            break
-        end
+        found = (d[ptr - 1] == delim)
+        found && !keep && (ptr -= 1)
+        out.size = max(out.size, ptr - 1)
+        out.append || (out.ptr = ptr)
+        found && break
         (eof(s) || len == out.maxsize) && break
         len = min(2len + 64, out.maxsize)
-        resize!(d, len)
-    end
-    out.size = max(out.size, ptr - 1)
-    if !out.append
-        out.ptr = ptr
+        ensureroom(out, len)
+        @assert length(out.data) >= len
     end
     return out
 end
@@ -483,9 +515,7 @@ function copyuntil(out::IOStream, s::IOStream, delim::UInt8; keep::Bool=false)
     return out
 end
 
-function readbytes_all!(s::IOStream,
-                        b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
-                        nb::Integer)
+function readbytes_all!(s::IOStream, b::MutableDenseArrayType{UInt8}, nb::Integer)
     olb = lb = length(b)
     nr = 0
     let l = s._dolock, slock = s.lock
@@ -513,9 +543,7 @@ function readbytes_all!(s::IOStream,
     return nr
 end
 
-function readbytes_some!(s::IOStream,
-                         b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
-                         nb::Integer)
+function readbytes_some!(s::IOStream, b::MutableDenseArrayType{UInt8}, nb::Integer)
     olb = length(b)
     if nb > olb
         resize!(b, nb)
@@ -544,10 +572,7 @@ requested bytes, until an error or end-of-file occurs. If `all` is `false`, at m
 `read` call is performed, and the amount of data returned is device-dependent. Note that not
 all stream types support the `all` option.
 """
-function readbytes!(s::IOStream,
-                    b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
-                    nb=length(b);
-                    all::Bool=true)
+function readbytes!(s::IOStream, b::MutableDenseArrayType{UInt8}, nb=length(b); all::Bool=true)
     return all ? readbytes_all!(s, b, nb) : readbytes_some!(s, b, nb)
 end
 
diff --git a/base/irrationals.jl b/base/irrationals.jl
index 6513e3269a4d7..8cf7abba8558e 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -28,6 +28,9 @@ See also [`AbstractIrrational`](@ref).
 """
 struct Irrational{sym} <: AbstractIrrational end
 
+typemin(::Type{T}) where {T<:Irrational} = T()
+typemax(::Type{T}) where {T<:Irrational} = T()
+
 show(io::IO, x::Irrational{sym}) where {sym} = print(io, sym)
 
 function show(io::IO, ::MIME"text/plain", x::Irrational{sym}) where {sym}
@@ -42,34 +45,65 @@ promote_rule(::Type{<:AbstractIrrational}, ::Type{Float16}) = Float16
 promote_rule(::Type{<:AbstractIrrational}, ::Type{Float32}) = Float32
 promote_rule(::Type{<:AbstractIrrational}, ::Type{<:AbstractIrrational}) = Float64
 promote_rule(::Type{<:AbstractIrrational}, ::Type{T}) where {T<:Real} = promote_type(Float64, T)
-promote_rule(::Type{S}, ::Type{T}) where {S<:AbstractIrrational,T<:Number} = promote_type(promote_type(S, real(T)), T)
+
+function promote_rule(::Type{S}, ::Type{T}) where {S<:AbstractIrrational,T<:Number}
+    U = promote_type(S, real(T))
+    if S <: U
+        # prevent infinite recursion
+        promote_type(Float64, T)
+    else
+        promote_type(U, T)
+    end
+end
 
 AbstractFloat(x::AbstractIrrational) = Float64(x)::Float64
 Float16(x::AbstractIrrational) = Float16(Float32(x)::Float32)
 Complex{T}(x::AbstractIrrational) where {T<:Real} = Complex{T}(T(x))
 
-# XXX this may change `DEFAULT_PRECISION`, thus not effect free
-@assume_effects :total function Rational{T}(x::AbstractIrrational) where T<:Integer
-    o = precision(BigFloat)
+function _irrational_to_rational_at_current_precision(::Type{T}, x::AbstractIrrational) where {T <: Integer}
+    bx = BigFloat(x)
+    r = rationalize(T, bx, tol = 0)
+    if abs(BigFloat(r) - bx) > eps(bx)
+        r
+    else
+        nothing  # Error is too small, repeat with greater precision.
+    end
+end
+function _irrational_to_rational_at_precision(::Type{T}, x::AbstractIrrational, p::Int) where {T <: Integer}
+    f = let x = x
+        () -> _irrational_to_rational_at_current_precision(T, x)
+    end
+    setprecision(f, BigFloat, p)
+end
+function _irrational_to_rational_at_current_rounding_mode(::Type{T}, x::AbstractIrrational) where {T <: Integer}
+    if T <: BigInt
+        _throw_argument_error_irrational_to_rational_bigint()  # avoid infinite loop
+    end
     p = 256
     while true
-        setprecision(BigFloat, p)
-        bx = BigFloat(x)
-        r = rationalize(T, bx, tol=0)
-        if abs(BigFloat(r) - bx) > eps(bx)
-            setprecision(BigFloat, o)
+        r = _irrational_to_rational_at_precision(T, x, p)
+        if r isa Number
             return r
         end
         p += 32
     end
 end
-Rational{BigInt}(x::AbstractIrrational) = throw(ArgumentError("Cannot convert an AbstractIrrational to a Rational{BigInt}: use rationalize(BigInt, x) instead"))
+function _irrational_to_rational(::Type{T}, x::AbstractIrrational) where {T <: Integer}
+    f = let x = x
+        () -> _irrational_to_rational_at_current_rounding_mode(T, x)
+    end
+    setrounding(f, BigFloat, RoundNearest)
+end
+Rational{T}(x::AbstractIrrational) where {T<:Integer} = _irrational_to_rational(T, x)
+_throw_argument_error_irrational_to_rational_bigint() = throw(ArgumentError("Cannot convert an AbstractIrrational to a Rational{BigInt}: use rationalize(BigInt, x) instead"))
+Rational{BigInt}(::AbstractIrrational) = _throw_argument_error_irrational_to_rational_bigint()
 
-@assume_effects :total function (t::Type{T})(x::AbstractIrrational, r::RoundingMode) where T<:Union{Float32,Float64}
+function _irrational_to_float(::Type{T}, x::AbstractIrrational, r::RoundingMode) where T<:Union{Float32,Float64}
     setprecision(BigFloat, 256) do
         T(BigFloat(x)::BigFloat, r)
     end
 end
+(::Type{T})(x::AbstractIrrational, r::RoundingMode) where {T<:Union{Float32,Float64}} = _irrational_to_float(T, x, r)
 
 float(::Type{<:AbstractIrrational}) = Float64
 
@@ -107,14 +141,18 @@ end
 <=(x::AbstractFloat, y::AbstractIrrational) = x < y
 
 # Irrational vs Rational
-@assume_effects :total function rationalize(::Type{T}, x::AbstractIrrational; tol::Real=0) where T
+function _rationalize_irrational(::Type{T}, x::AbstractIrrational, tol::Real) where {T<:Integer}
     return rationalize(T, big(x), tol=tol)
 end
-@assume_effects :total function lessrational(rx::Rational{<:Integer}, x::AbstractIrrational)
-    # an @assume_effects :total version of `<` for determining if the rationalization of
-    # an irrational number required rounding up or down
+function rationalize(::Type{T}, x::AbstractIrrational; tol::Real=0) where {T<:Integer}
+    return _rationalize_irrational(T, x, tol)
+end
+function _lessrational(rx::Rational, x::AbstractIrrational)
     return rx < big(x)
 end
+function lessrational(rx::Rational, x::AbstractIrrational)
+    return _lessrational(rx, x)
+end
 function <(x::AbstractIrrational, y::Rational{T}) where T
     T <: Unsigned && x < 0.0 && return true
     rx = rationalize(T, x)
@@ -144,7 +182,7 @@ isinteger(::AbstractIrrational) = false
 iszero(::AbstractIrrational) = false
 isone(::AbstractIrrational) = false
 
-hash(x::Irrational, h::UInt) = 3*objectid(x) - h
+hash(x::Irrational, h::UInt) = 3h - objectid(x)
 
 widen(::Type{T}) where {T<:Irrational} = T
 
@@ -213,9 +251,9 @@ function irrational(sym, val, def)
     esym = esc(sym)
     qsym = esc(Expr(:quote, sym))
     bigconvert = isa(def,Symbol) ? quote
-        function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=MPFR.ROUNDING_MODE[]; precision=precision(BigFloat))
+        function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=Rounding.rounding_raw(BigFloat); precision=precision(BigFloat))
             c = BigFloat(;precision=precision)
-            ccall(($(string("mpfr_const_", def)), :libmpfr),
+            ccall(($(string("mpfr_const_", def)), Base.MPFR.libmpfr),
                   Cint, (Ref{BigFloat}, MPFR.MPFRRoundingMode), c, r)
             return c
         end
diff --git a/base/iterators.jl b/base/iterators.jl
index 3b23313cbada0..13983bd9672fb 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -6,40 +6,34 @@ Methods for working with Iterators.
 baremodule Iterators
 
 # small dance to make this work from Base or Intrinsics
-import ..@__MODULE__, ..parentmodule
+import Base: @__MODULE__, parentmodule
 const Base = parentmodule(@__MODULE__)
 using .Base:
     @inline, Pair, Pairs, AbstractDict, IndexLinear, IndexStyle, AbstractVector, Vector,
     SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo,
-    @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator,
+    @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator, IdDict,
     AbstractRange, AbstractUnitRange, UnitRange, LinearIndices, TupleOrBottom,
-    (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing, copyto!,
+    (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, =>, missing, copyto!,
     any, _counttuple, eachindex, ntuple, zero, prod, reduce, in, firstindex, lastindex,
-    tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape
+    tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape, LazyString,
+    afoldl, mod1, @default_eltype
+using .Core
 using Core: @doc
 
-if Base !== Core.Compiler
-using .Base:
-    cld, fld, SubArray, view, resize!, IndexCartesian
-using .Base.Checked: checked_mul
-else
-    # Checked.checked_mul is not available during bootstrapping:
-    const checked_mul = *
-end
+using Base:
+    cld, fld, resize!, IndexCartesian, Checked
+using .Checked: checked_mul
 
-import .Base:
+import Base:
     first, last,
     isempty, length, size, axes, ndims,
-    eltype, IteratorSize, IteratorEltype,
+    eltype, IteratorSize, IteratorEltype, promote_typejoin,
     haskey, keys, values, pairs,
     getindex, setindex!, get, iterate,
     popfirst!, isdone, peek, intersect
 
-export enumerate, zip, unzip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap
-
-if Base !== Core.Compiler
-export partition
-end
+export enumerate, zip, unzip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap, partition, nth, findeach
+public accumulate, filter, map, peel, reverse, Stateful
 
 """
     Iterators.map(f, iterators...)
@@ -59,10 +53,7 @@ julia> collect(Iterators.map(x -> x^2, 1:3))
  9
 ```
 """
-map(f, args...) = Base.Generator(f, args...)
-
-tail_if_any(::Tuple{}) = ()
-tail_if_any(x::Tuple) = tail(x)
+map(f, arg, args...) = Base.Generator(f, arg, args...)
 
 _min_length(a, b, ::IsInfinite, ::IsInfinite) = min(length(a),length(b)) # inherit behaviour, error
 _min_length(a, b, A, ::IsInfinite) = length(a)
@@ -207,7 +198,7 @@ size(e::Enumerate) = size(e.itr)
     n === nothing && return n
     (i, n[1]), (i+1, n[2])
 end
-last(e::Enumerate) = (length(e.itr), e.itr[end])
+last(e::Enumerate) = (length(e.itr), last(e.itr))
 
 eltype(::Type{Enumerate{I}}) where {I} = TupleOrBottom(Int, eltype(I))
 
@@ -276,50 +267,48 @@ pairs(::IndexLinear,    A::AbstractArray) = Pairs(A, LinearIndices(A))
 # preserve indexing capabilities for known indexable types
 # faster than zip(keys(a), values(a)) for arrays
 pairs(tuple::Tuple) = Pairs{Int}(tuple, keys(tuple))
-pairs(nt::NamedTuple) = Pairs{Symbol}(nt, keys(nt))
+pairs(nt::NamedTuple) = Pairs{Symbol}(nt, nothing)
 pairs(v::Core.SimpleVector) = Pairs(v, LinearIndices(v))
 pairs(A::AbstractVector) = pairs(IndexLinear(), A)
 # pairs(v::Pairs) = v # listed for reference, but already defined from being an AbstractDict
 
-if Base !== Core.Compiler
 pairs(::IndexCartesian, A::AbstractArray) = Pairs(A, Base.CartesianIndices(axes(A)))
 pairs(A::AbstractArray)  = pairs(IndexCartesian(), A)
-end
 
-length(v::Pairs) = length(getfield(v, :itr))
-axes(v::Pairs) = axes(getfield(v, :itr))
-size(v::Pairs) = size(getfield(v, :itr))
+length(v::Pairs) = length(keys(v))
+axes(v::Pairs) = axes(keys(v))
+size(v::Pairs) = size(keys(v))
 
 Base.@eval @propagate_inbounds function _pairs_elt(p::Pairs{K, V}, idx) where {K, V}
     return $(Expr(:new, :(Pair{K, V}), :idx, :(getfield(p, :data)[idx])))
 end
 
 @propagate_inbounds function iterate(p::Pairs{K, V}, state...) where {K, V}
-    x = iterate(getfield(p, :itr), state...)
+    x = iterate(keys(p), state...)
     x === nothing && return x
     idx, next = x
     return (_pairs_elt(p, idx), next)
 end
 
-@propagate_inbounds function iterate(r::Reverse{<:Pairs}, state=(reverse(getfield(r.itr, :itr)),))
+@propagate_inbounds function iterate(r::Reverse{<:Pairs}, state=(reverse(keys(r.itr)),))
     x = iterate(state...)
     x === nothing && return x
     idx, next = x
     return (_pairs_elt(r.itr, idx), (state[1], next))
 end
 
-@inline isdone(v::Pairs, state...) = isdone(getfield(v, :itr), state...)
+@inline isdone(v::Pairs, state...) = isdone(keys(v), state...)
 
 IteratorSize(::Type{<:Pairs{<:Any, <:Any, I}}) where {I} = IteratorSize(I)
 IteratorSize(::Type{<:Pairs{<:Any, <:Any, <:AbstractUnitRange, <:Tuple}}) = HasLength()
 
 function last(v::Pairs{K, V}) where {K, V}
-    idx = last(getfield(v, :itr))
+    idx = last(keys(v))
     return Pair{K, V}(idx, v[idx])
 end
 
-haskey(v::Pairs, key) = (key in getfield(v, :itr))
-keys(v::Pairs) = getfield(v, :itr)
+haskey(v::Pairs, key) = key in keys(v)
+keys(v::Pairs) = getfield(v, :itr) === nothing ? keys(getfield(v, :data)) : getfield(v, :itr)
 values(v::Pairs) = getfield(v, :data) # TODO: this should be a view of data subset by itr
 getindex(v::Pairs, key) = getfield(v, :data)[key]
 setindex!(v::Pairs, value, key) = (getfield(v, :data)[key] = value; v)
@@ -387,6 +376,22 @@ function _zip_min_length(is)
     end
 end
 _zip_min_length(is::Tuple{}) = nothing
+
+# For a collection of iterators `is`, returns a tuple (b, n), where
+# `b` is true when every component of `is` has a statically-known finite
+# length and all such lengths are equal. Otherwise, `b` is false.
+# `n` is an implementation detail, and will be the `length` of the first
+# iterator if it is statically-known and finite. Otherwise, `n` is `nothing`.
+function _zip_lengths_finite_equal(is)
+    i = is[1]
+    if IteratorSize(i) isa Union{IsInfinite, SizeUnknown}
+        return (false, nothing)
+    else
+        b, n = _zip_lengths_finite_equal(tail(is))
+        return (b && (n === nothing || n == length(i)), length(i))
+    end
+end
+_zip_lengths_finite_equal(is::Tuple{}) = (true, nothing)
 size(z::Zip) = _promote_tuple_shape(Base.map(size, z.is)...)
 axes(z::Zip) = _promote_tuple_shape(Base.map(axes, z.is)...)
 _promote_tuple_shape((a,)::Tuple{OneTo}, (b,)::Tuple{OneTo}) = (intersect(a, b),)
@@ -468,8 +473,93 @@ zip_iteratoreltype() = HasEltype()
 zip_iteratoreltype(a) = a
 zip_iteratoreltype(a, tail...) = and_iteratoreltype(a, zip_iteratoreltype(tail...))
 
-reverse(z::Zip) = Zip(Base.map(reverse, z.is)) # n.b. we assume all iterators are the same length
-last(z::Zip) = getindex.(z.is, minimum(Base.map(lastindex, z.is)))
+last(z::Zip) = nth(z, length(z))
+
+function reverse(z::Zip)
+    if !first(_zip_lengths_finite_equal(z.is))
+        throw(ArgumentError("Cannot reverse zipped iterators of unknown, infinite, or unequal lengths"))
+    end
+    Zip(Base.map(reverse, z.is))
+end
+
+# unzip
+
+"""
+    unzip(itrs) -> NTuple{length(first(itrs)), Vector}
+
+The `unzip` function takes an iterator of iterators and returns a tuple of
+vectors such that the first vector contains the first element yielded by each
+iterator, the second vector the second element yielded by each iterator, etc.
+`unzip` is sort of an inverse to the `zip` operation, as the name suggests.
+In particular, if we define
+
+    ≐(a, b) = collect(collect.(a)) == collect(collect.(b))
+
+then the following identities relating `zip` and `unzip` hold for any `itrs`
+that is is an iterator of iterators:
+
+    unzip(zip(itrs...)) ≐ itrs
+    zip(unzip(itrs)...) ≐ itrs
+
+Note that `unzip` does not return an iterator: it always consumes all of
+its argument and all of each iterator yielded by its argument. It is only
+associated with iteration because it is the inverse of `zip`.
+
+# Examples
+
+```jldoctest
+julia> unzip(enumerate("Hello"))
+([1, 2, 3, 4, 5], ['H', 'e', 'l', 'l', 'o'])
+
+julia> unzip([[1, "apple"], [2.5, "orange"], [0, "mango"]])
+(Real[1, 2.5, 0], ["apple", "orange", "mango"])
+```
+
+!!! compat "Julia 1.11"
+    The `unzip` function requires Julia 1.11 or later.
+"""
+function unzip(itrs)
+    n = Base.haslength(itrs) ? length(itrs) : nothing
+    outer = iterate(itrs)
+    outer === nothing && return ()
+    vals, state = outer
+    vecs = ntuple(length(vals)) do i
+        x = vals[i]
+        v = Vector{typeof(x)}(undef, Base.something(n, 1))
+        @inbounds v[1] = x
+        return v
+    end
+    unzip_rest(vecs, typeof(vals), n isa Int ? 1 : nothing, itrs, state)
+end
+
+function unzip_rest(vecs, eltypes, i, itrs, state)
+    while true
+        i isa Int && (i += 1)
+        outer = iterate(itrs, state)
+        outer === nothing && return vecs
+        itr, state = outer
+        vals = Tuple(itr)
+        if vals isa eltypes
+            for (v, x) in zip(vecs, vals)
+                if i isa Int
+                    @inbounds v[i] = x
+                else
+                    push!(v, x)
+                end
+            end
+        else
+            vecs′ = map(vecs, vals) do v, x
+                T = Base.promote_typejoin(eltype(v), typeof(x))
+                v′ = Vector{T}(undef, length(v) + !(i isa Int))
+                copyto!(v′, v)
+                @inbounds v′[Base.something(i, end)] = x
+                return v′
+            end
+            eltypes′ = Tuple{map(eltype, vecs′)...}
+            return unzip_rest(Tuple(vecs′), eltypes′, i, itrs, state)
+        end
+    end
+end
 
 # unzip
 
@@ -570,6 +660,15 @@ invocation of `filter`. Calls to `flt` will be made when iterating over the
 returned iterable object. These calls are not cached and repeated calls will be
 made when reiterating.
 
+!!! warning
+    Subsequent *lazy* transformations on the iterator returned from `filter`, such
+    as those performed by `Iterators.reverse` or `cycle`, will also delay calls to `flt`
+    until collecting or iterating over the returned iterable object. If the filter
+    predicate is nondeterministic or its return values depend on the order of iteration
+    over the elements of `itr`, composition with lazy transformations may result in
+    surprising behavior. If this is undesirable, either ensure that `flt` is a pure
+    function or collect intermediate `filter` iterators before further transformations.
+
 See [`Base.filter`](@ref) for an eager implementation of filtering for arrays.
 
 # Examples
@@ -594,10 +693,15 @@ filter(flt, itr) = Filter(flt, itr)
 function iterate(f::Filter, state...)
     y = iterate(f.itr, state...)
     while y !== nothing
-        if f.flt(y[1])
-            return y
+        v, s = y
+        if f.flt(v)
+            if y isa Tuple{Any,Any}
+                return (v, s) # incorporate type information that may be improved by user-provided `f.flt`
+            else
+                return y
+            end
         end
-        y = iterate(f.itr, y[2])
+        y = iterate(f.itr, s)
     end
     nothing
 end
@@ -685,13 +789,19 @@ end
 """
     rest(iter, state)
 
-An iterator that yields the same elements as `iter`, but starting at the given `state`.
+An iterator that yields the same elements as `iter`, but starting at the given `state`, which
+must be a state obtainable via a sequence of one or more calls to `iterate(iter[, state])`
 
 See also: [`Iterators.drop`](@ref), [`Iterators.peel`](@ref), [`Base.rest`](@ref).
 
 # Examples
 ```jldoctest
-julia> collect(Iterators.rest([1,2,3,4], 2))
+julia> iter = [1,2,3,4];
+
+julia> val, state = iterate(iter)
+(1, 2)
+
+julia> collect(Iterators.rest(iter, state))
 3-element Vector{Int64}:
  2
  3
@@ -784,7 +894,7 @@ struct Take{I}
     xs::I
     n::Int
     function Take(xs::I, n::Integer) where {I}
-        n < 0 && throw(ArgumentError("Take length must be nonnegative"))
+        n < 0 && throw(ArgumentError("Take length must be non-negative"))
         return new{I}(xs, n)
     end
 end
@@ -843,7 +953,7 @@ struct Drop{I}
     xs::I
     n::Int
     function Drop(xs::I, n::Integer) where {I}
-        n < 0 && throw(ArgumentError("Drop length must be nonnegative"))
+        n < 0 && throw(ArgumentError("Drop length must be non-negative"))
         return new{I}(xs, n)
     end
 end
@@ -994,6 +1104,34 @@ IteratorSize(::Type{<:DropWhile}) = SizeUnknown()
 eltype(::Type{DropWhile{I,P}}) where {I,P} = eltype(I)
 IteratorEltype(::Type{DropWhile{I,P}}) where {I,P} = IteratorEltype(I)
 
+"""
+    findeach(f, it)
+    findeach(it)
+
+An iterator that generates every key from the key/value pairs of `pairs(it)`,
+where `f(value)` returns `true`.
+
+If `f` is not specified, default to `identity`.
+
+`Iterators.findeach` is the lazy equivalent of `findall`.
+
+!!! compat "Julia 1.13"
+    `findeach` requires at least Julia 1.13.
+
+# Examples
+```jldoctest
+julia> collect(Iterators.findeach(isodd, Dict(2 => 3, 3 => 2)))
+1-element Vector{Int64}:
+ 2
+
+julia> only(Iterators.findeach(==(1), [3,6,2,1]))
+4
+```
+"""
+findeach(f, it) = (k for (k, v) in pairs(it) if f(v))
+
+findeach(it) = findeach(identity, it)
+
 
 # Cycle an iterator forever
 
@@ -1002,12 +1140,17 @@ struct Cycle{I}
 end
 
 """
-    cycle(iter)
+    cycle(iter[, n::Int])
 
 An iterator that cycles through `iter` forever.
-If `iter` is empty, so is `cycle(iter)`.
+If `n` is specified, then it cycles through `iter` that many times.
+When `iter` is empty, so are `cycle(iter)` and `cycle(iter, n)`.
 
-See also: [`Iterators.repeated`](@ref), [`Base.repeat`](@ref).
+`Iterators.cycle(iter, n)` is the lazy equivalent of [`Base.repeat`](@ref)`(vector, n)`,
+while [`Iterators.repeated`](@ref)`(iter, n)` is the lazy [`Base.fill`](@ref)`(item, n)`.
+
+!!! compat "Julia 1.11"
+    The method `cycle(iter, n)` was added in Julia 1.11.
 
 # Examples
 ```jldoctest
@@ -1016,13 +1159,23 @@ julia> for (i, v) in enumerate(Iterators.cycle("hello"))
            i > 10 && break
        end
 hellohelloh
+
+julia> foreach(print, Iterators.cycle(['j', 'u', 'l', 'i', 'a'], 3))
+juliajuliajulia
+
+julia> repeat([1,2,3], 4) == collect(Iterators.cycle([1,2,3], 4))
+true
+
+julia> fill([1,2,3], 4) == collect(Iterators.repeated([1,2,3], 4))
+true
 ```
 """
 cycle(xs) = Cycle(xs)
+cycle(xs, n::Integer) = flatten(repeated(xs, n))
 
 eltype(::Type{Cycle{I}}) where {I} = eltype(I)
 IteratorEltype(::Type{Cycle{I}}) where {I} = IteratorEltype(I)
-IteratorSize(::Type{Cycle{I}}) where {I} = IsInfinite()
+IteratorSize(::Type{Cycle{I}}) where {I} = IsInfinite() # XXX: this is false if iterator ever becomes empty
 
 iterate(it::Cycle) = iterate(it.xs)
 isdone(it::Cycle) = isdone(it.xs)
@@ -1036,6 +1189,7 @@ end
 reverse(it::Cycle) = Cycle(reverse(it.xs))
 last(it::Cycle) = last(it.xs)
 
+
 # Repeated - repeat an object infinitely many times
 
 struct Repeated{O}
@@ -1049,7 +1203,7 @@ repeated(x) = Repeated(x)
 An iterator that generates the value `x` forever. If `n` is specified, generates `x` that
 many times (equivalent to `take(repeated(x), n)`).
 
-See also: [`Iterators.cycle`](@ref), [`Base.repeat`](@ref).
+See also [`fill`](@ref Base.fill), and compare [`Iterators.cycle`](@ref).
 
 # Examples
 ```jldoctest
@@ -1061,6 +1215,12 @@ julia> collect(a)
  [1 2]
  [1 2]
  [1 2]
+
+julia> ans == fill([1 2], 4)
+true
+
+julia> Iterators.cycle([1 2], 4) |> collect |> println
+[1, 2, 1, 2, 1, 2, 1, 2]
 ```
 """
 repeated(x, n::Integer) = take(repeated(x), Int(n))
@@ -1127,7 +1287,7 @@ _prod_size(t::Tuple) = (_prod_size1(t[1], IteratorSize(t[1]))..., _prod_size(tai
 _prod_size1(a, ::HasShape)  = size(a)
 _prod_size1(a, ::HasLength) = (length(a),)
 _prod_size1(a, A) =
-    throw(ArgumentError("Cannot compute size for object of type $(typeof(a))"))
+    throw(ArgumentError(LazyString("Cannot compute size for object of type ", typeof(a))))
 
 axes(P::ProductIterator) = _prod_indices(P.iterators)
 _prod_indices(::Tuple{}) = ()
@@ -1135,7 +1295,7 @@ _prod_indices(t::Tuple) = (_prod_axes1(t[1], IteratorSize(t[1]))..., _prod_indic
 _prod_axes1(a, ::HasShape)  = axes(a)
 _prod_axes1(a, ::HasLength) = (OneTo(length(a)),)
 _prod_axes1(a, A) =
-    throw(ArgumentError("Cannot compute indices for object of type $(typeof(a))"))
+    throw(ArgumentError(LazyString("Cannot compute indices for object of type ", typeof(a))))
 
 ndims(p::ProductIterator) = length(axes(p))
 length(P::ProductIterator) = reduce(checked_mul, size(P); init=1)
@@ -1174,6 +1334,8 @@ end
     next === nothing && return nothing
     restnext = _piterate(rest...)
     restnext === nothing && return nothing
+    VS = @default_eltype(iter1)
+    next = Pair{VS, typeof(next[2])}(next[1], next[2])
     return (next, restnext...)
 end
 @inline function iterate(P::ProductIterator)
@@ -1186,8 +1348,8 @@ end
 @inline _piterate1(::Tuple{}, ::Tuple{}) = nothing
 @inline function _piterate1(iters, states)
     iter1 = first(iters)
-    next = iterate(iter1, first(states)[2])
-    restnext = tail(states)
+    state1, restnext... = states
+    next = iterate(iter1, state1[2])
     if next === nothing
         isdone(iter1) === true && return nothing
         restnext = _piterate1(tail(iters), restnext)
@@ -1195,6 +1357,7 @@ end
         next = iterate(iter1)
         next === nothing && return nothing
     end
+    next = Pair{fieldtype(typeof(state1), 1), typeof(next[2])}(next[1], next[2])
     return (next, restnext...)
 end
 @inline function iterate(P::ProductIterator, states)
@@ -1243,7 +1406,13 @@ julia> [(x,y) for x in 0:1 for y in 'a':'c']  # collects generators involving It
 flatten(itr) = Flatten(itr)
 
 eltype(::Type{Flatten{I}}) where {I} = eltype(eltype(I))
-eltype(::Type{Flatten{Tuple{}}}) = eltype(Tuple{})
+
+# For tuples, we statically know the element type of each index, so we can compute
+# this at compile time.
+function eltype(::Type{Flatten{I}}) where {I<:Union{Tuple,NamedTuple}}
+    afoldl((T, i) -> promote_typejoin(T, eltype(i)), Union{}, fieldtypes(I)...)
+end
+
 IteratorEltype(::Type{Flatten{I}}) where {I} = _flatteneltype(I, IteratorEltype(I))
 IteratorEltype(::Type{Flatten{Tuple{}}}) = IteratorEltype(Tuple{})
 _flatteneltype(I, ::HasEltype) = IteratorEltype(eltype(I))
@@ -1271,20 +1440,48 @@ flatten_length(f, T) = throw(ArgumentError(
 length(f::Flatten{I}) where {I} = flatten_length(f, eltype(I))
 length(f::Flatten{Tuple{}}) = 0
 
-@propagate_inbounds function iterate(f::Flatten, state=())
-    if state !== ()
-        y = iterate(tail(state)...)
-        y !== nothing && return (y[1], (state[1], state[2], y[2]))
+@propagate_inbounds function iterate(fl::Flatten)
+    it_result = iterate(fl.it)
+    it_result === nothing && return nothing
+
+    inner_iterator, next_outer_state = it_result
+    inner_it_result = iterate(inner_iterator)
+
+    while inner_it_result === nothing
+        it_result = iterate(fl.it, next_outer_state)
+        it_result === nothing && return nothing
+
+        inner_iterator, next_outer_state = it_result
+        inner_it_result = iterate(inner_iterator)
     end
-    x = (state === () ? iterate(f.it) : iterate(f.it, state[1]))
-    x === nothing && return nothing
-    y = iterate(x[1])
-    while y === nothing
-         x = iterate(f.it, x[2])
-         x === nothing && return nothing
-         y = iterate(x[1])
+
+    item, next_inner_state = inner_it_result
+    return item, (next_outer_state, inner_iterator, next_inner_state)
+end
+
+@propagate_inbounds function iterate(fl::Flatten, state)
+    next_outer_state, inner_iterator, next_inner_state = state
+
+    # try to advance the inner iterator
+    inner_it_result = iterate(inner_iterator, next_inner_state)
+    if inner_it_result !== nothing
+        item, next_inner_state = inner_it_result
+        return item, (next_outer_state, inner_iterator, next_inner_state)
+    end
+
+    # advance the outer iterator
+    while true
+        outer_it_result = iterate(fl.it, next_outer_state)
+        outer_it_result === nothing && return nothing
+
+        inner_iterator, next_outer_state = outer_it_result
+        inner_it_result = iterate(inner_iterator)
+
+        if inner_it_result !== nothing
+            item, next_inner_state = inner_it_result
+            return item, (next_outer_state, inner_iterator, next_inner_state)
+        end
     end
-    return y[1], (x[2], x[1], y[2])
 end
 
 reverse(f::Flatten) = Flatten(reverse(itr) for itr in reverse(f.it))
@@ -1331,7 +1528,6 @@ true
 """
 flatmap(f, c...) = flatten(map(f, c...))
 
-if Base !== Core.Compiler # views are not defined
 @doc """
     partition(collection, n)
 
@@ -1362,7 +1558,7 @@ eltype(::Type{PartitionIterator{T}}) where {T} = Vector{eltype(T)}
 # Arrays use a generic `view`-of-a-`vec`, so we cannot exactly predict what we'll get back
 eltype(::Type{PartitionIterator{T}}) where {T<:AbstractArray} = AbstractVector{eltype(T)}
 # But for some common implementations in Base we know the answer exactly
-eltype(::Type{PartitionIterator{T}}) where {T<:Vector} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true}
+eltype(::Type{PartitionIterator{T}}) where {T<:Vector} = Base.SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true}
 
 IteratorEltype(::Type{PartitionIterator{T}}) where {T} = IteratorEltype(T)
 IteratorEltype(::Type{PartitionIterator{T}}) where {T<:AbstractArray} = EltypeUnknown()
@@ -1388,7 +1584,7 @@ end
 function iterate(itr::PartitionIterator{<:AbstractArray}, state = firstindex(itr.c))
     state > lastindex(itr.c) && return nothing
     r = min(state + itr.n - 1, lastindex(itr.c))
-    return @inbounds view(itr.c, state:r), r + 1
+    return @inbounds Base.view(itr.c, state:r), r + 1
 end
 
 struct IterationCutShort; end
@@ -1471,43 +1667,30 @@ julia> sum(a) # Sum the remaining elements
 7
 ```
 """
-mutable struct Stateful{T, VS, N<:Integer}
+mutable struct Stateful{T, VS}
     itr::T
     # A bit awkward right now, but adapted to the new iteration protocol
     nextvalstate::Union{VS, Nothing}
-
-    # Number of remaining elements, if itr is HasLength or HasShape.
-    # if not, store -1 - number_of_consumed_elements.
-    # This allows us to defer calculating length until asked for.
-    # See PR #45924
-    remaining::N
     @inline function Stateful{<:Any, Any}(itr::T) where {T}
-        itl = iterlength(itr)
-        new{T, Any, typeof(itl)}(itr, iterate(itr), itl)
+        return new{T, Any}(itr, iterate(itr))
     end
     @inline function Stateful(itr::T) where {T}
         VS = approx_iter_type(T)
-        itl = iterlength(itr)
-        return new{T, VS, typeof(itl)}(itr, iterate(itr)::VS, itl)
+        return new{T, VS}(itr, iterate(itr)::VS)
     end
 end
 
-function iterlength(it)::Signed
-    if IteratorSize(it) isa Union{HasShape, HasLength}
-       return length(it)
-    else
-        -1
-    end
+function reset!(s::Stateful)
+    setfield!(s, :nextvalstate, iterate(s.itr)) # bypass convert call of setproperty!
+    return s
 end
-
-function reset!(s::Stateful{T,VS}, itr::T=s.itr) where {T,VS}
+function reset!(s::Stateful{T}, itr::T) where {T}
     s.itr = itr
-    itl = iterlength(itr)
-    setfield!(s, :nextvalstate, iterate(itr))
-    s.remaining = itl
-    s
+    reset!(s)
+    return s
 end
 
+
 # Try to find an appropriate type for the (value, state tuple),
 # by doing a recursive unrolling of the iteration protocol up to
 # fixpoint.
@@ -1529,7 +1712,6 @@ end
 
 Stateful(x::Stateful) = x
 convert(::Type{Stateful}, itr) = Stateful(itr)
-
 @inline isdone(s::Stateful, st=nothing) = s.nextvalstate === nothing
 
 @inline function popfirst!(s::Stateful)
@@ -1539,8 +1721,6 @@ convert(::Type{Stateful}, itr) = Stateful(itr)
     else
         val, state = vs
         Core.setfield!(s, :nextvalstate, iterate(s.itr, state))
-        rem = s.remaining
-        s.remaining = rem - typeof(rem)(1)
         return val
     end
 end
@@ -1550,22 +1730,10 @@ end
     return ns !== nothing ? ns[1] : sentinel
 end
 @inline iterate(s::Stateful, state=nothing) = s.nextvalstate === nothing ? nothing : (popfirst!(s), nothing)
-IteratorSize(::Type{<:Stateful{T}}) where {T} = IteratorSize(T) isa HasShape ? HasLength() : IteratorSize(T)
+IteratorSize(::Type{<:Stateful{T}}) where {T} = IteratorSize(T) isa IsInfinite ? IsInfinite() : SizeUnknown()
 eltype(::Type{<:Stateful{T}}) where {T} = eltype(T)
 IteratorEltype(::Type{<:Stateful{T}}) where {T} = IteratorEltype(T)
 
-function length(s::Stateful)
-    rem = s.remaining
-    # If rem is actually remaining length, return it.
-    # else, rem is number of consumed elements.
-    if rem >= 0
-        rem
-    else
-        length(s.itr) - (typeof(rem)(1) - rem)
-    end
-end
-end # if statement several hundred lines above
-
 """
     only(x)
 
@@ -1596,7 +1764,9 @@ Stacktrace:
 [...]
 ```
 """
-@propagate_inbounds function only(x)
+@propagate_inbounds only(x) = _only(x, iterate)
+
+@propagate_inbounds function _only(x, ::typeof(iterate))
     i = iterate(x)
     @boundscheck if i === nothing
         throw(ArgumentError("Collection is empty, must contain exactly 1 element"))
@@ -1608,18 +1778,140 @@ Stacktrace:
     return ret
 end
 
-# Collections of known size
-only(x::Ref) = x[]
-only(x::Number) = x
-only(x::Char) = x
+@inline function _only(x, ::typeof(first))
+    @boundscheck if length(x) != 1
+        throw(ArgumentError("Collection must contain exactly 1 element"))
+    end
+    @inbounds first(x)
+end
+
+@propagate_inbounds only(x::IdDict) = _only(x, first)
+
+# Specific error messages for tuples and named tuples
 only(x::Tuple{Any}) = x[1]
 only(x::Tuple) = throw(
     ArgumentError("Tuple contains $(length(x)) elements, must contain exactly 1 element")
 )
-only(a::AbstractArray{<:Any, 0}) = @inbounds return a[]
 only(x::NamedTuple{<:Any, <:Tuple{Any}}) = first(x)
 only(x::NamedTuple) = throw(
     ArgumentError("NamedTuple contains $(length(x)) elements, must contain exactly 1 element")
 )
 
+"""
+    IterableStatePairs(x)
+
+This internal type is returned by [`pairs`](@ref), when the key is the same as
+the state of `iterate`. This allows the iterator to determine the key => value
+pairs by only calling iterate on the values.
+
+"""
+struct IterableStatePairs{T}
+    x::T
+end
+
+IteratorSize(::Type{<:IterableStatePairs{T}}) where T = IteratorSize(T)
+length(x::IterableStatePairs) = length(x.x)
+Base.eltype(::Type{IterableStatePairs{T}}) where T = Pair{<:Any, eltype(T)}
+
+function iterate(x::IterableStatePairs, state=first(keys(x.x)))
+    it = iterate(x.x, state)
+    it === nothing && return nothing
+    (state => first(it), last(it))
+end
+
+reverse(x::IterableStatePairs) = IterableStatePairs(Iterators.reverse(x.x))
+reverse(x::IterableStatePairs{<:Iterators.Reverse}) = IterableStatePairs(x.x.itr)
+
+function iterate(x::IterableStatePairs{<:Iterators.Reverse}, state=last(keys(x.x.itr)))
+    it = iterate(x.x, state)
+    it === nothing && return nothing
+    (state => first(it), last(it))
+end
+
+# According to the docs of iterate(::AbstractString), the iteration state must
+# be the same as the keys, so this is a valid optimization (see #51631)
+pairs(s::AbstractString) = IterableStatePairs(s)
+
+"""
+    nth(itr, n::Integer)
+
+Get the `n`th element of an iterable collection. Throw a `BoundsError`[@ref] if not existing.
+Will advance any `Stateful`[@ref] iterator.
+
+See also: [`first`](@ref), [`last`](@ref)
+
+# Examples
+```jldoctest
+julia> Iterators.nth(2:2:10, 4)
+8
+
+julia> Iterators.nth(reshape(1:30, (5,6)), 6)
+6
+
+julia> stateful = Iterators.Stateful(1:10); Iterators.nth(stateful, 7)
+7
+
+julia> first(stateful)
+8
+```
+"""
+nth(itr, n::Integer) = _nth(IteratorSize(itr), itr, n)
+nth(itr::Cycle{I}, n::Integer) where I = _nth(IteratorSize(I), itr, n)
+nth(itr::Flatten{Take{Repeated{O}}}, n::Integer) where O = _nth(IteratorSize(O), itr, n)
+@propagate_inbounds nth(itr::AbstractArray, n::Integer) = itr[begin + n - 1]
+
+function _nth(::Union{HasShape, HasLength}, itr::Cycle{I}, n::Integer) where {I}
+    N = length(itr.xs)
+    N == 0 && throw(BoundsError(itr, n))
+
+    # prevents wrap around behaviour and inherit the error handling
+    return nth(itr.xs, n > 0 ? mod1(n, N) : n)
+end
+
+# Flatten{Take{Repeated{O}}} is the actual type of an Iterators.cycle(iterable::O, m) iterator
+function _nth(::Union{HasShape, HasLength}, itr::Flatten{Take{Repeated{O}}}, n::Integer) where {O}
+    cycles = itr.it.n
+    torepeat = itr.it.xs.x
+    k = length(torepeat)
+    (n > k*cycles || k == 0) && throw(BoundsError(itr, n))
+
+    # prevent wrap around behaviour and inherit the error handling
+    return nth(torepeat, n > 0 ? mod1(n, k) : n)
+end
+
+function _nth(::IteratorSize, itr, n::Integer)
+    # unrolled version of `first(drop)`
+    n > 0 || throw(BoundsError(itr, n))
+    y = iterate(itr)
+    for _ in 1:n-1
+        y === nothing && break
+        y = iterate(itr, y[2])
+    end
+    y === nothing && throw(BoundsError(itr, n))
+    y[1]
+end
+
+_nth(::IteratorSize, z::Zip, n::Integer) = Base.map(nth(n), z.is)
+
+"""
+    nth(n::Integer)
+
+Return a function that gets the `n`-th element from any iterator passed to it.
+Equivalent to `Base.Fix2(nth, n)` or `itr -> nth(itr, n)`.
+
+See also: [`nth`](@ref), [`Base.Fix2`](@ref)
+# Examples
+```jldoctest
+julia> fifth_element = Iterators.nth(5)
+(::Base.Fix2{typeof(Base.Iterators.nth), Int64}) (generic function with 2 methods)
+
+julia> fifth_element(reshape(1:30, (5,6)))
+5
+
+julia> map(fifth_element, ("Willis", "Jovovich", "Oldman"))
+('i', 'v', 'a')
+```
+"""
+nth(n::Integer) = Base.Fix2(nth, n)
+
 end
diff --git a/base/libc.jl b/base/libc.jl
index 99e8dce6b87e5..a24253242192c 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -6,18 +6,18 @@ Interface to libc, the C standard library.
 """ Libc
 
 import Base: transcode, windowserror, show
-# these need to be defined seperately for bootstrapping but belong to Libc
+# these need to be defined separately for bootstrapping but belong to Libc
 import Base: memcpy, memmove, memset, memcmp
 import Core.Intrinsics: bitcast
 
 export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, memcpy,
     memmove, memset, calloc, realloc, errno, strerror, flush_cstdio, systemsleep, time,
-    transcode
+    transcode, mkfifo
 if Sys.iswindows()
     export GetLastError, FormatMessage
 end
 
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "errno_h.jl"))  # include($BUILDROOT/base/errno_h.jl)
+include(string(Base.BUILDROOT, "errno_h.jl"))  # include($BUILDROOT/base/errno_h.jl)
 
 ## RawFD ##
 
@@ -36,15 +36,27 @@ RawFD(fd::Integer) = bitcast(RawFD, Cint(fd))
 RawFD(fd::RawFD) = fd
 Base.cconvert(::Type{Cint}, fd::RawFD) = bitcast(Cint, fd)
 
+"""
+    dup(src::RawFD[, target::RawFD])::RawFD
+
+Duplicate the file descriptor `src` so that the duplicate refers to the same OS
+resource (e.g. a file or socket). A `target` file descriptor may optionally
+be passed to use for the new duplicate.
+"""
 dup(x::RawFD) = ccall((@static Sys.iswindows() ? :_dup : :dup), RawFD, (RawFD,), x)
 dup(src::RawFD, target::RawFD) = systemerror("dup", -1 ==
     ccall((@static Sys.iswindows() ? :_dup2 : :dup2), Int32,
                 (RawFD, RawFD), src, target))
 
-show(io::IO, fd::RawFD) = print(io, "RawFD(", bitcast(UInt32, fd), ')')  # avoids invalidation via show_default
+show(io::IO, fd::RawFD) = print(io, "RawFD(", bitcast(Int32, fd), ')')  # avoids invalidation via show_default
 
 # Wrapper for an OS file descriptor (for Windows)
 if Sys.iswindows()
+    @doc """
+        WindowsRawSocket
+
+    Primitive type which wraps the native Windows file `HANDLE`.
+    """
     primitive type WindowsRawSocket sizeof(Ptr) * 8 end # On Windows file descriptors are HANDLE's and 64-bit on 64-bit Windows
     WindowsRawSocket(handle::Ptr{Cvoid}) = bitcast(WindowsRawSocket, handle)
     WindowsRawSocket(handle::WindowsRawSocket) = handle
@@ -75,6 +87,34 @@ end
 
 ## FILE (not auto-finalized) ##
 
+"""
+    FILE(::Ptr)
+    FILE(::IO)
+
+A libc `FILE*`, representing an opened file.
+
+It can be passed as a `Ptr{FILE}` argument to [`ccall`](@ref) and also supports
+[`seek`](@ref), [`position`](@ref) and [`close`](@ref).
+
+A `FILE` can be constructed from an ordinary `IO` object, provided it is an open file. It
+must be closed afterward.
+
+# Examples
+```jldoctest
+julia> using Base.Libc
+
+julia> mktemp() do _, io
+           # write to the temporary file using `puts(char*, FILE*)` from libc
+           file = FILE(io)
+           ccall(:fputs, Cint, (Cstring, Ptr{FILE}), "hello world", file)
+           close(file)
+           # read the file again
+           seek(io, 0)
+           read(io, String)
+       end
+"hello world"
+```
+"""
 struct FILE
     ptr::Ptr{Cvoid}
 end
@@ -238,23 +278,25 @@ end
 # system date in seconds
 
 """
-    time(t::TmStruct) -> Float64
+    time(t::TmStruct)::Float64
 
-Converts a `TmStruct` struct to a number of seconds since the epoch.
+Convert a `TmStruct` struct to a number of seconds since the epoch.
 """
 time(tm::TmStruct) = Float64(ccall(:mktime, Int, (Ref{TmStruct},), tm))
 
 """
-    time() -> Float64
+    time()::Float64
 
 Get the system time in seconds since the epoch, with fairly high (typically, microsecond) resolution.
+
+See also [`time_ns`](@ref).
 """
 time() = ccall(:jl_clock_now, Float64, ())
 
 ## process-related functions ##
 
 """
-    getpid() -> Int32
+    getpid()::Int32
 
 Get Julia's process ID.
 """
@@ -263,7 +305,7 @@ getpid() = ccall(:uv_os_getpid, Int32, ())
 ## network functions ##
 
 """
-    gethostname() -> String
+    gethostname()::String
 
 Get the local machine's host name.
 """
@@ -352,14 +394,14 @@ free(p::Cstring) = free(convert(Ptr{UInt8}, p))
 free(p::Cwstring) = free(convert(Ptr{Cwchar_t}, p))
 
 """
-    malloc(size::Integer) -> Ptr{Cvoid}
+    malloc(size::Integer)::Ptr{Cvoid}
 
 Call `malloc` from the C standard library.
 """
 malloc(size::Integer) = ccall(:malloc, Ptr{Cvoid}, (Csize_t,), size)
 
 """
-    realloc(addr::Ptr, size::Integer) -> Ptr{Cvoid}
+    realloc(addr::Ptr, size::Integer)::Ptr{Cvoid}
 
 Call `realloc` from the C standard library.
 
@@ -369,7 +411,7 @@ obtained from [`malloc`](@ref).
 realloc(p::Ptr, size::Integer) = ccall(:realloc, Ptr{Cvoid}, (Ptr{Cvoid}, Csize_t), p, size)
 
 """
-    calloc(num::Integer, size::Integer) -> Ptr{Cvoid}
+    calloc(num::Integer, size::Integer)::Ptr{Cvoid}
 
 Call `calloc` from the C standard library.
 """
@@ -409,6 +451,33 @@ function srand(seed::Integer=_make_uint64_seed())
     ccall(:jl_srand, Cvoid, (UInt64,), seed % UInt64)
 end
 
+"""
+    mkfifo(path::AbstractString, [mode::Integer]) -> path
+
+Make a FIFO special file (a named pipe) at `path`.  Return `path` as-is on success.
+
+`mkfifo` is supported only in Unix platforms.
+
+!!! compat "Julia 1.11"
+    `mkfifo` requires at least Julia 1.11.
+"""
+function mkfifo(
+    path::AbstractString,
+    mode::Integer = Base.S_IRUSR | Base.S_IWUSR | Base.S_IRGRP | Base.S_IWGRP |
+                    Base.S_IROTH | Base.S_IWOTH,
+)
+    @static if Sys.isunix()
+        # Default `mode` is compatible with `mkfifo` CLI in coreutils.
+        ret = ccall(:mkfifo, Cint, (Cstring, Base.Cmode_t), path, mode)
+        systemerror("mkfifo", ret == -1)
+        return path
+    else
+        # Using normal `error` because `systemerror("mkfifo", ENOTSUP)` does not
+        # seem to work on Windows.
+        error("mkfifo: Operation not supported")
+    end
+end
+
 struct Cpasswd
    username::Cstring
    uid::Culong
@@ -438,6 +507,26 @@ struct Group
     mem::Vector{String}
 end
 
+# Gets password-file entry for default user, or a subset thereof
+# (e.g., uid and guid are set to -1 on Windows)
+function getpw()
+    ref_pd = Ref(Cpasswd())
+    ret = ccall(:uv_os_get_passwd, Cint, (Ref{Cpasswd},), ref_pd)
+    Base.uv_error("getpw", ret)
+
+    pd = ref_pd[]
+    pd = Passwd(
+        pd.username == C_NULL ? "" : unsafe_string(pd.username),
+        pd.uid,
+        pd.gid,
+        pd.shell == C_NULL ? "" : unsafe_string(pd.shell),
+        pd.homedir == C_NULL ? "" : unsafe_string(pd.homedir),
+        pd.gecos == C_NULL ? "" : unsafe_string(pd.gecos),
+    )
+    ccall(:uv_os_free_passwd, Cvoid, (Ref{Cpasswd},), ref_pd)
+    return pd
+end
+
 function getpwuid(uid::Unsigned, throw_error::Bool=true)
     ref_pd = Ref(Cpasswd())
     ret = ccall(:uv_os_get_passwd2, Cint, (Ref{Cpasswd}, Culong), ref_pd, uid)
@@ -457,6 +546,7 @@ function getpwuid(uid::Unsigned, throw_error::Bool=true)
     ccall(:uv_os_free_passwd, Cvoid, (Ref{Cpasswd},), ref_pd)
     return pd
 end
+
 function getgrgid(gid::Unsigned, throw_error::Bool=true)
     ref_gp = Ref(Cgroup())
     ret = ccall(:uv_os_get_group, Cint, (Ref{Cgroup}, Culong), ref_gp, gid)
@@ -484,9 +574,9 @@ end
 
 getuid() = ccall(:jl_getuid, Culong, ())
 geteuid() = ccall(:jl_geteuid, Culong, ())
+getegid() = Sys.iswindows() ? Culong(-1) : ccall(:getegid, Culong, ())
 
 # Include dlopen()/dlpath() code
 include("libdl.jl")
-using .Libdl
 
 end # module
diff --git a/base/libdl.jl b/base/libdl.jl
index fdf6103d1800b..024c88ecf2a16 100644
--- a/base/libdl.jl
+++ b/base/libdl.jl
@@ -5,11 +5,11 @@ module Libdl
 Interface to libdl. Provides dynamic linking support.
 """ Libdl
 
-import Base.DL_LOAD_PATH
+import Base: DL_LOAD_PATH, isdebugbuild
 
 export DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
     RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW, dlclose, dlopen, dlopen_e, dlsym, dlsym_e,
-    dlpath, find_library, dlext, dllist
+    dlpath, find_library, dlext, dllist, LazyLibrary, LazyLibraryPath, BundledLazyLibraryPath
 
 """
     DL_LOAD_PATH
@@ -45,6 +45,9 @@ applicable.
 """
 (RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL, RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW)
 
+# The default flags for `dlopen()`
+const default_rtld_flags = RTLD_LAZY | RTLD_DEEPBIND
+
 """
     dlsym(handle, sym; throw_error::Bool = true)
 
@@ -57,8 +60,8 @@ function dlsym(hnd::Ptr, s::Union{Symbol,AbstractString}; throw_error::Bool = tr
     hnd == C_NULL && throw(ArgumentError("NULL library handle"))
     val = Ref(Ptr{Cvoid}(0))
     symbol_found = ccall(:jl_dlsym, Cint,
-        (Ptr{Cvoid}, Cstring, Ref{Ptr{Cvoid}}, Cint),
-        hnd, s, val, Int64(throw_error)
+        (Ptr{Cvoid}, Cstring, Ref{Ptr{Cvoid}}, Cint, Cint),
+        hnd, s, val, Int64(throw_error), Int64(1)
     )
     if symbol_found == 0
         return nothing
@@ -72,8 +75,8 @@ end
 Look up a symbol from a shared library handle, silently return `C_NULL` on lookup failure.
 This method is now deprecated in favor of `dlsym(handle, sym; throw_error=false)`.
 """
-function dlsym_e(hnd::Ptr, s::Union{Symbol,AbstractString})
-    return something(dlsym(hnd, s; throw_error=false), C_NULL)
+function dlsym_e(args...)
+    return something(dlsym(args...; throw_error=false), C_NULL)
 end
 
 """
@@ -110,12 +113,12 @@ If the library cannot be found, this method throws an error, unless the keyword
 """
 function dlopen end
 
-dlopen(s::Symbol, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND; kwargs...) =
+dlopen(s::Symbol, flags::Integer = default_rtld_flags; kwargs...) =
     dlopen(string(s), flags; kwargs...)
 
-function dlopen(s::AbstractString, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND; throw_error::Bool = true)
+function dlopen(s::AbstractString, flags::Integer = default_rtld_flags; throw_error::Bool = true)
     ret = ccall(:jl_load_dynamic_library, Ptr{Cvoid}, (Cstring,UInt32,Cint), s, flags, Cint(throw_error))
-    if ret == C_NULL
+    if !throw_error && ret == C_NULL
         return nothing
     end
     return ret
@@ -127,7 +130,7 @@ end
 Wrapper for usage with `do` blocks to automatically close the dynamic library once
 control flow leaves the `do` block scope.
 
-# Example
+# Examples
 ```julia
 vendor = dlopen("libblas") do lib
     if Libdl.dlsym(lib, :openblas_set_num_threads; throw_error=false) !== nothing
@@ -138,10 +141,10 @@ vendor = dlopen("libblas") do lib
 end
 ```
 """
-function dlopen(f::Function, args...; kwargs...)
+function dlopen(f::Function, name, args...; kwargs...)
     hdl = nothing
     try
-        hdl = dlopen(args...; kwargs...)
+        hdl = dlopen(name, args...; kwargs...)
         f(hdl)
     finally
         dlclose(hdl)
@@ -231,7 +234,7 @@ end
 
 Get the full path of the library `libname`.
 
-# Example
+# Examples
 ```julia-repl
 julia> dlpath("libjulia")
 ```
@@ -276,7 +279,7 @@ if (Sys.islinux() || Sys.isbsd()) && !Sys.isapple()
 
     # This callback function called by dl_iterate_phdr() on Linux and BSD's
     # DL_ITERATE_PHDR(3) on freebsd
-    function dl_phdr_info_callback(di::dl_phdr_info, size::Csize_t, dynamic_libraries::Array{String,1})
+    function dl_phdr_info_callback(di::dl_phdr_info, size::Csize_t, dynamic_libraries::Vector{String})
         name = unsafe_string(di.name)
         push!(dynamic_libraries, name)
         return Cint(0)
@@ -314,4 +317,214 @@ function dllist()
     return dynamic_libraries
 end
 
+
+"""
+    LazyLibraryPath(path_pieces...)
+
+Helper type for lazily constructed library paths for use with [`LazyLibrary`](@ref).
+Path pieces are stored unevaluated and joined with `joinpath()` when the library is first
+accessed. Arguments must be able to have `string()` called on them.
+
+# Example
+
+```julia
+const mylib = LazyLibrary(LazyLibraryPath(artifact_dir, "lib", "libmylib.so.1.2.3"))
+```
+
+!!! compat "Julia 1.11"
+    `LazyLibraryPath` was added in Julia 1.11.
+
+See also [`LazyLibrary`](@ref), [`BundledLazyLibraryPath`](@ref).
+"""
+struct LazyLibraryPath
+    pieces::Tuple{Vararg{Any}}
+    LazyLibraryPath(pieces...) = new(pieces)
+end
+@inline Base.string(llp::LazyLibraryPath) = joinpath(String[string(p) for p in llp.pieces])
+Base.cconvert(::Type{Cstring}, llp::LazyLibraryPath) = Base.cconvert(Cstring, string(llp))
+# Define `print` so that we can wrap this in a `LazyString`
+Base.print(io::IO, llp::LazyLibraryPath) = print(io, string(llp))
+
+# Helper to get `$(private_shlibdir)` at runtime
+struct PrivateShlibdirGetter; end
+const private_shlibdir = Base.OncePerProcess{String}() do
+    libname = ifelse(isdebugbuild(), "libjulia-internal-debug", "libjulia-internal")
+    dirname(dlpath(libname))
+end
+Base.string(::PrivateShlibdirGetter) = private_shlibdir()
+
+"""
+    BundledLazyLibraryPath(subpath)
+
+Helper type for lazily constructed library paths within the Julia distribution.
+Constructs paths relative to Julia's private shared library directory.
+
+Primarily used by Julia's standard library. For example:
+```julia
+const libgmp = LazyLibrary(BundledLazyLibraryPath("libgmp.so.10"))
+```
+
+!!! compat "Julia 1.11"
+    `BundledLazyLibraryPath` was added in Julia 1.11.
+
+See also [`LazyLibrary`](@ref), [`LazyLibraryPath`](@ref).
+"""
+BundledLazyLibraryPath(subpath) = LazyLibraryPath(PrivateShlibdirGetter(), subpath)
+
+# Small helper struct to initialize a LazyLibrary with its initial set of dependencies
+struct InitialDependencies{T}
+    dependencies::Vector{T}
+end
+(init::InitialDependencies)() = copy(init.dependencies)
+
+"""
+    LazyLibrary(name; flags = <default dlopen flags>,
+                dependencies = LazyLibrary[], on_load_callback = nothing)
+
+Represents a lazily-loaded shared library that delays loading itself and its dependencies
+until first use in a `ccall()`, `@ccall`, `dlopen()`, `dlsym()`, `dlpath()`, or `cglobal()`.
+This is a thread-safe mechanism for on-demand library initialization.
+
+# Arguments
+
+- `name`: Library name (or lazy path computation) as a `String`,
+  [`LazyLibraryPath`](@ref), or [`BundledLazyLibraryPath`](@ref).
+- `flags`: Optional `dlopen` flags (default: `RTLD_LAZY | RTLD_DEEPBIND`). See [`dlopen`](@ref).
+- `dependencies`: Vector of `LazyLibrary` object references to load before this one.
+- `on_load_callback`: Optional function to run arbitrary code on first load (use sparingly,
+  as it is not expected that `ccall()` should result in large amounts of Julia code being run.
+  You may call `ccall()` from within the `on_load_callback` but only for the current library
+  and its dependencies, and user should not call `wait()` on any tasks within the on load
+  callback as they may deadlock).
+
+The dlopen operation is thread-safe: only one thread loads the library, acquired after the
+release store of the reference to each dependency from loading of each dependency. Other
+tasks block until loading completes. The handle is then cached and reused for all subsequent
+calls (there is no dlclose for lazy library and dlclose should not be called on the returned
+handled).
+
+# Examples
+
+```julia
+# Basic usage
+const mylib = LazyLibrary("libmylib")
+@ccall mylib.myfunc(42::Cint)::Cint
+
+# With dependencies
+const libfoo = LazyLibrary("libfoo")
+const libbar = LazyLibrary("libbar"; dependencies=[libfoo])
+```
+
+For more examples including platform-specific libraries, lazy path construction, and
+migration from `__init__()` patterns, see the manual section on
+[Using LazyLibrary for Lazy Loading](@ref man-lazylibrary).
+
+!!! compat "Julia 1.11"
+    `LazyLibrary` was added in Julia 1.11.
+
+See also [`LazyLibraryPath`](@ref), [`BundledLazyLibraryPath`](@ref), [`dlopen`](@ref),
+[`dlsym`](@ref), [`add_dependency!`](@ref).
+"""
+mutable struct LazyLibrary
+    # Name and flags to open with
+    const path
+    const flags::UInt32
+
+    # Dependencies that must be loaded before we can load
+    #
+    # The OncePerProcess is introduced here so that any registered dependencies are
+    # always ephemeral to a given process (instead of, e.g., persisting depending
+    # on whether they were added in the process where this LazyLibrary was created)
+    dependencies::Base.OncePerProcess{Vector{LazyLibrary}, InitialDependencies{LazyLibrary}}
+
+    # Function that get called once upon initial load
+    on_load_callback
+    const lock::Base.ReentrantLock
+
+    # Pointer that we eventually fill out upon first `dlopen()`
+    @atomic handle::Ptr{Cvoid}
+    function LazyLibrary(path; flags = default_rtld_flags, dependencies = LazyLibrary[],
+                         on_load_callback = nothing)
+        return new(
+            path,
+            UInt32(flags),
+            Base.OncePerProcess{Vector{LazyLibrary}}(
+                InitialDependencies{LazyLibrary}(dependencies)
+            ),
+            on_load_callback,
+            Base.ReentrantLock(),
+            C_NULL,
+        )
+    end
+end
+
+# We support adding dependencies only because of very special situations
+# such as LBT needing to have OpenBLAS_jll added as a dependency dynamically.
+"""
+    add_dependency!(library::LazyLibrary, dependency::LazyLibrary)
+
+Dynamically add a dependency that must be loaded before `library`. Only needed when
+dependencies cannot be determined at construction time.
+
+!!! warning
+    Dependencies added with this function are **ephemeral** and only persist within the
+    current process. They will not persist across precompilation boundaries.
+
+Prefer specifying dependencies in the `LazyLibrary` constructor when possible.
+
+!!! compat "Julia 1.11"
+    `add_dependency!` was added in Julia 1.11.
+
+See also [`LazyLibrary`](@ref).
+"""
+function add_dependency!(ll::LazyLibrary, dep::LazyLibrary)
+    @lock ll.lock begin
+        push!(ll.dependencies(), dep)
+    end
+end
+
+# Register `jl_libdl_dlopen_func` so that `ccall()` lowering knows
+# how to call `dlopen()`.
+Base.unsafe_store!(cglobal(:jl_libdl_dlopen_func, Any), dlopen)
+
+function dlopen(ll::LazyLibrary, flags::Integer = ll.flags; kwargs...)
+    handle = @atomic :acquire ll.handle
+    if handle == C_NULL
+        @lock ll.lock begin
+            # Check to see if another thread has already run this
+            if ll.handle == C_NULL
+                # Ensure that all dependencies are loaded
+                for dep in ll.dependencies()
+                    dlopen(dep; kwargs...)
+                end
+
+                # Load our library
+                handle = dlopen(string(ll.path), flags; kwargs...)
+                @atomic :release ll.handle = handle
+
+                # Only the thread that loaded the library calls the `on_load_callback()`.
+                if ll.on_load_callback !== nothing
+                    ll.on_load_callback()
+                end
+            end
+        end
+    else
+        # Invoke our on load callback, if it exists
+        if ll.on_load_callback !== nothing
+            # This empty lock protects against the case where we have updated
+            # `ll.handle` in the branch above, but not exited the lock.  We want
+            # a second thread that comes in at just the wrong time to have to wait
+            # for that lock to be released (and thus for the on_load_callback to
+            # have finished), hence the empty lock here. But we want the
+            # on_load_callback thread to bypass this, which will be happen thanks
+            # to the fact that we're using a reentrant lock here.
+            @lock ll.lock begin end
+        end
+    end
+
+    return handle
+end
+dlopen(x::Any) = throw(TypeError(:dlopen, "", Union{Symbol,String,LazyLibrary}, x))
+dlsym(ll::LazyLibrary, args...; kwargs...) = dlsym(dlopen(ll), args...; kwargs...)
+dlpath(ll::LazyLibrary) = dlpath(dlopen(ll))
 end # module Libdl
diff --git a/base/libuv.jl b/base/libuv.jl
index 24a04f5bcad78..5e9bdfaf1e75c 100644
--- a/base/libuv.jl
+++ b/base/libuv.jl
@@ -2,7 +2,7 @@
 
 # Core definitions for interacting with the libuv library from Julia
 
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "uv_constants.jl"))  # include($BUILDROOT/base/uv_constants.jl)
+include(string(Base.BUILDROOT, "uv_constants.jl"))  # include($BUILDROOT/base/uv_constants.jl)
 
 # convert UV handle data to julia object, checking for null
 function uv_sizeof_handle(handle)
@@ -26,10 +26,10 @@ for r in uv_req_types
 @eval const $(Symbol("_sizeof_", lowercase(string(r)))) = uv_sizeof_req($r)
 end
 
-uv_handle_data(handle) = ccall(:jl_uv_handle_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
-uv_req_data(handle) = ccall(:jl_uv_req_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
-uv_req_set_data(req, data) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data)
-uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data)
+uv_handle_data(handle) = ccall(:uv_handle_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
+uv_req_data(handle) = ccall(:uv_req_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
+uv_req_set_data(req, data) = ccall(:uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data)
+uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:uv_handle_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data)
 
 macro handle_as(hand, typ)
     return quote
@@ -39,8 +39,15 @@ macro handle_as(hand, typ)
     end
 end
 
-associate_julia_struct(handle::Ptr{Cvoid}, @nospecialize(jlobj)) =
+function _uv_hook_close end
+
+function associate_julia_struct(handle::Ptr{Cvoid}, jlobj::T) where T
+    # This `cfunction` is not used anywhere, but it triggers compilation of this
+    # MethodInstance for `--trim` so that it will be available when dispatched to
+    # by `jl_uv_call_close_callback()`
+    _ = @cfunction(Base._uv_hook_close, Cvoid, (Ref{T},))
     ccall(:jl_uv_associate_julia_struct, Cvoid, (Ptr{Cvoid}, Any), handle, jlobj)
+end
 disassociate_julia_struct(uv) = disassociate_julia_struct(uv.handle)
 disassociate_julia_struct(handle::Ptr{Cvoid}) =
     handle != C_NULL && ccall(:jl_uv_disassociate_julia_struct, Cvoid, (Ptr{Cvoid},), handle)
@@ -52,14 +59,14 @@ iolock_end() = ccall(:jl_iolock_end, Cvoid, ())
 # and should thus not be garbage collected
 const uvhandles = IdDict()
 const preserve_handle_lock = Threads.SpinLock()
-function preserve_handle(x)
+@nospecializeinfer function preserve_handle(@nospecialize(x))
     lock(preserve_handle_lock)
     v = get(uvhandles, x, 0)::Int
     uvhandles[x] = v + 1
     unlock(preserve_handle_lock)
     nothing
 end
-function unpreserve_handle(x)
+@nospecializeinfer function unpreserve_handle(@nospecialize(x))
     lock(preserve_handle_lock)
     v = get(uvhandles, x, 0)::Int
     if v == 0
@@ -82,7 +89,13 @@ struct IOError <: Exception
     IOError(msg::AbstractString, code::Integer) = new(msg, code)
 end
 
-showerror(io::IO, e::IOError) = print(io, "IOError: ", e.msg)
+function showerror(io::IO, e::IOError)
+    print(io, "IOError: ", e.msg)
+    if e.code == UV_ENOENT && '~' in e.msg
+        print(io, "\nMany shells expand '~' to the home directory in unquoted strings. To replicate this behavior, call",
+                  " `expanduser` to expand the '~' character to the user’s home directory.")
+    end
+end
 
 function _UVError(pfx::AbstractString, code::Integer)
     code = Int32(code)
@@ -97,14 +110,24 @@ struverror(err::Int32) = unsafe_string(ccall(:uv_strerror, Cstring, (Int32,), er
 uverrorname(err::Int32) = unsafe_string(ccall(:uv_err_name, Cstring, (Int32,), err))
 
 uv_error(prefix::Symbol, c::Integer) = uv_error(string(prefix), c)
-uv_error(prefix::AbstractString, c::Integer) = c < 0 ? throw(_UVError(prefix, c)) : nothing
+uv_error(prefix::AbstractString, c::Integer) = c < 0 ? _uv_error(prefix, c) : nothing
+_uv_error(prefix::AbstractString, c::Integer) = throw(_UVError(prefix, c))
 
 ## event loop ##
 
 eventloop() = ccall(:jl_global_event_loop, Ptr{Cvoid}, ())
 
-uv_unref(h::Ptr{Cvoid}) = ccall(:uv_unref, Cvoid, (Ptr{Cvoid},), h)
-uv_ref(h::Ptr{Cvoid}) = ccall(:uv_ref, Cvoid, (Ptr{Cvoid},), h)
+function uv_unref(h::Ptr{Cvoid})
+    iolock_begin()
+    ccall(:uv_unref, Cvoid, (Ptr{Cvoid},), h)
+    iolock_end()
+end
+
+function uv_ref(h::Ptr{Cvoid})
+    iolock_begin()
+    ccall(:uv_ref, Cvoid, (Ptr{Cvoid},), h)
+    iolock_end()
+end
 
 function process_events()
     return ccall(:jl_process_events, Int32, ())
@@ -118,18 +141,21 @@ function uv_return_spawn end
 function uv_asynccb end
 function uv_timercb end
 
-function reinit_stdio()
-    global stdin = init_stdio(ccall(:jl_stdin_stream, Ptr{Cvoid}, ()))
-    global stdout = init_stdio(ccall(:jl_stdout_stream, Ptr{Cvoid}, ()))
-    global stderr = init_stdio(ccall(:jl_stderr_stream, Ptr{Cvoid}, ()))
+reinit_stdio() = _reinit_stdio()
+# we need this so it can be called by codegen to print errors, even after
+# reinit_stdio has been redefined by the juliac build script.
+function _reinit_stdio()
+    global stdin = init_stdio(ccall(:jl_stdin_stream, Ptr{Cvoid}, ()))::IO
+    global stdout = init_stdio(ccall(:jl_stdout_stream, Ptr{Cvoid}, ()))::IO
+    global stderr = init_stdio(ccall(:jl_stderr_stream, Ptr{Cvoid}, ()))::IO
     opts = JLOptions()
-    if opts.color != 0
-        have_color = (opts.color == 1)
+    color = colored_text(opts)
+    if !isnothing(color)
         if !isa(stdout, TTY)
-            global stdout = IOContext(stdout, :color => have_color)
+            global stdout = IOContext(stdout, :color => color::Bool)
         end
         if !isa(stderr, TTY)
-            global stderr = IOContext(stderr, :color => have_color)
+            global stderr = IOContext(stderr, :color => color::Bool)
         end
     end
     nothing
diff --git a/base/linking.jl b/base/linking.jl
index fd21ce74c9268..84b53c2134ede 100644
--- a/base/linking.jl
+++ b/base/linking.jl
@@ -1,17 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 module Linking
 
+import Base: isdebugbuild
 import Base.Libc: Libdl
 
-# inlined LLD_jll
-# These get calculated in __init__()
-const PATH = Ref("")
-const LIBPATH = Ref("")
-const PATH_list = String[]
-const LIBPATH_list = String[]
-const lld_path = Ref{String}()
+# from LLD_jll
 const lld_exe = Sys.iswindows() ? "lld.exe" : "lld"
-const dsymutil_path = Ref{String}()
 const dsymutil_exe = Sys.iswindows() ? "dsymutil.exe" : "dsymutil"
 
 if Sys.iswindows()
@@ -47,61 +41,51 @@ function adjust_ENV!(env::Dict, PATH::String, LIBPATH::String, adjust_PATH::Bool
     return env
 end
 
-function __init_lld_path()
+const lld_path = OncePerProcess{String}() do
     # Prefer our own bundled lld, but if we don't have one, pick it up off of the PATH
     # If this is an in-tree build, `lld` will live in `tools`.  Otherwise, it'll be in `private_libexecdir`
     for bundled_lld_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, lld_exe),
                              joinpath(Sys.BINDIR, "..", "tools", lld_exe),
                              joinpath(Sys.BINDIR, lld_exe))
         if isfile(bundled_lld_path)
-            lld_path[] = abspath(bundled_lld_path)
-            return
+            return abspath(bundled_lld_path)
         end
     end
-    lld_path[] = something(Sys.which(lld_exe), lld_exe)
-    return
+    return something(Sys.which(lld_exe), lld_exe)
 end
 
-function __init_dsymutil_path()
-    #Same as with lld but for dsymutil
+const dsymutil_path = OncePerProcess{String}() do
+    # Same as with lld but for dsymutil
     for bundled_dsymutil_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, dsymutil_exe),
                              joinpath(Sys.BINDIR, "..", "tools", dsymutil_exe),
                              joinpath(Sys.BINDIR, dsymutil_exe))
         if isfile(bundled_dsymutil_path)
-            dsymutil_path[] = abspath(bundled_dsymutil_path)
-            return
+            return abspath(bundled_dsymutil_path)
         end
     end
-    dsymutil_path[] = something(Sys.which(dsymutil_exe), dsymutil_exe)
-    return
+    return something(Sys.which(dsymutil_exe), dsymutil_exe)
 end
 
-const VERBOSE = Ref{Bool}(false)
+PATH() = dirname(lld_path())
 
-function __init__()
-    VERBOSE[] = Base.get_bool_env("JULIA_VERBOSE_LINKING", false)
-
-    __init_lld_path()
-    __init_dsymutil_path()
-    PATH[] = dirname(lld_path[])
+const LIBPATH = OncePerProcess{String}() do
     if Sys.iswindows()
         # On windows, the dynamic libraries (.dll) are in Sys.BINDIR ("usr\\bin")
-        append!(LIBPATH_list, [abspath(Sys.BINDIR, Base.LIBDIR, "julia"), Sys.BINDIR])
+        LIBPATH_list = [abspath(Sys.BINDIR, Base.LIBDIR, "julia"), Sys.BINDIR]
     else
-        append!(LIBPATH_list, [abspath(Sys.BINDIR, Base.LIBDIR, "julia"), abspath(Sys.BINDIR, Base.LIBDIR)])
+        LIBPATH_list = [abspath(Sys.BINDIR, Base.LIBDIR, "julia"), abspath(Sys.BINDIR, Base.LIBDIR)]
     end
-    LIBPATH[] = join(LIBPATH_list, pathsep)
-    return
+    return join(LIBPATH_list, pathsep)
 end
 
 function lld(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
-    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
-    return Cmd(Cmd([lld_path[]]); env)
+    env = adjust_ENV!(copy(ENV), PATH(), LIBPATH(), adjust_PATH, adjust_LIBPATH)
+    return Cmd(Cmd([lld_path()]); env)
 end
 
 function dsymutil(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
-    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
-    return Cmd(Cmd([dsymutil_path[]]); env)
+    env = adjust_ENV!(copy(ENV), PATH(), LIBPATH(), adjust_PATH, adjust_LIBPATH)
+    return Cmd(Cmd([dsymutil_path()]); env)
 end
 
 function ld()
@@ -110,7 +94,7 @@ function ld()
         # LLD supports mingw style linking
         flavor = "gnu"
         m = Sys.ARCH == :x86_64 ? "i386pep" : "i386pe"
-        default_args = `-m $m -Bdynamic --enable-auto-image-base --allow-multiple-definition`
+        default_args = `-m $m -Bdynamic --enable-auto-image-base --allow-multiple-definition --disable-auto-import --disable-runtime-pseudo-reloc`
     elseif Sys.isapple()
         flavor = "darwin"
         arch = Sys.ARCH == :aarch64 ? :arm64 : Sys.ARCH
@@ -134,13 +118,23 @@ else
     "--no-whole-archive"
 end
 
+# Prefer whole_archive to WHOLE_ARCHIVE
+whole_archive(paths::String; is_cc=false) = whole_archive([paths]; is_cc)
+function whole_archive(paths::Vector{String}; is_cc=false)
+    cc_arg(a) = is_cc ? "-Wl,$a" : a
+    if Sys.isapple()
+        Cmd(collect(Iterators.flatmap(p -> (cc_arg("-force_load"), p), paths)))
+    else
+        `$(cc_arg("--whole-archive")) $paths $(cc_arg("--no-whole-archive"))`
+    end
+end
+
 const SHARED = if Sys.isapple()
     "-dylib"
 else
     "-shared"
 end
 
-is_debug() = ccall(:jl_is_debugbuild, Cint, ()) == 1
 libdir() = abspath(Sys.BINDIR, Base.LIBDIR)
 private_libdir() = abspath(Sys.BINDIR, Base.PRIVATE_LIBDIR)
 if Sys.iswindows()
@@ -149,17 +143,27 @@ else
     shlibdir() = libdir()
 end
 
+verbose_linking() = something(Base.get_bool_env("JULIA_VERBOSE_LINKING", false), false)
+
 function link_image_cmd(path, out)
-    LIBDIR = "-L$(libdir())"
     PRIVATE_LIBDIR = "-L$(private_libdir())"
     SHLIBDIR = "-L$(shlibdir())"
-    LIBS = is_debug() ? ("-ljulia-debug", "-ljulia-internal-debug") : ("-ljulia", "-ljulia-internal")
+    LIBS = isdebugbuild() ? ("-ljulia-debug", "-ljulia-internal-debug") :
+                        ("-ljulia", "-ljulia-internal")
     @static if Sys.iswindows()
-        LIBS = (LIBS..., "-lopenlibm", "-lssp", "-lgcc_s", "-lgcc", "-lmsvcrt")
+        LIBS = (LIBS..., "-lopenlibm", "-lgcc_s", "-lgcc", "-lmsvcrt")
+        if isdebugbuild()
+            LIBS = (LIBS..., "-lssp")
+            if isfile(joinpath(private_libdir(), "libmingwex.a"))
+                # In MinGW 11, the ssp implementation was moved from libssp to
+                # libmingwex with ssp only being a stub. See #59020.
+                LIBS = (LIBS..., "-lmingwex", "-lkernel32")
+            end
+        end
     end
 
-    V = VERBOSE[] ? "--verbose" : ""
-    `$(ld()) $V $SHARED -o $out $WHOLE_ARCHIVE $path $NO_WHOLE_ARCHIVE $LIBDIR $PRIVATE_LIBDIR $SHLIBDIR $LIBS`
+    V = verbose_linking() ? "--verbose" : ""
+    `$(ld()) $V $SHARED -o $out $(whole_archive(path)) $PRIVATE_LIBDIR $SHLIBDIR $LIBS`
 end
 
 function link_image(path, out, internal_stderr::IO=stderr, internal_stdout::IO=stdout)
diff --git a/base/loading.jl b/base/loading.jl
index 1ea4412ecc68f..32617b77ad72b 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -179,16 +179,15 @@ function dummy_uuid(project_file::String)
     end
 end
 
-## package path slugs: turning UUID + SHA1 into a pair of 4-byte "slugs" ##
+## package path slugs: turning UUID + SHA1 into a pair of 5-byte "slugs" ##
 
 const slug_chars = String(['A':'Z'; 'a':'z'; '0':'9'])
 
 function slug(x::UInt32, p::Int)
-    y::UInt32 = x
     sprint(sizehint=p) do io
-        n = length(slug_chars)
+        n = UInt32(length(slug_chars))
         for i = 1:p
-            y, d = divrem(y, n)
+            x, d = divrem(x, n)
             write(io, slug_chars[1+d])
         end
     end
@@ -250,25 +249,39 @@ function get_updated_dict(p::TOML.Parser, f::CachedTOMLDict)
     return f.d
 end
 
+"""
+    struct PkgLoadSpec
+
+A PkgLoadSpec is the result of a `locate_package` operation and specifies how
+and wherefrom to load a julia package.
+"""
+struct PkgLoadSpec
+    path::String
+    julia_syntax_version::VersionNumber
+end
+
 struct LoadingCache
     load_path::Vector{String}
     dummy_uuid::Dict{String, UUID}
     env_project_file::Dict{String, Union{Bool, String}}
     project_file_manifest_path::Dict{String, Union{Nothing, String}}
     require_parsed::Set{String}
-    identified_where::Dict{Tuple{PkgId, String}, Union{Nothing, Tuple{PkgId, Union{Nothing, String}}}}
-    identified::Dict{String, Union{Nothing, Tuple{PkgId, Union{Nothing, String}}}}
-    located::Dict{Tuple{PkgId, Union{String, Nothing}}, Union{Tuple{Union{String, Nothing}, Union{String, Nothing}}, Nothing}}
+    identified_where::Dict{Tuple{PkgId, String}, Union{Nothing, Tuple{PkgId, String}}}
+    identified::Dict{String, Union{Nothing, Tuple{PkgId, String}}}
+    located::Dict{Tuple{PkgId, Union{String, Nothing}}, Union{Tuple{PkgLoadSpec, String}, Nothing}}
 end
-const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing)
+const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing) # n.b.: all access to and through this are protected by require_lock
 LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set(), Dict(), Dict(), Dict())
 
 
-struct TOMLCache
-    p::TOML.Parser
+struct TOMLCache{Dates}
+    p::TOML.Parser{Dates}
     d::Dict{String, CachedTOMLDict}
 end
-const TOML_CACHE = TOMLCache(TOML.Parser(), Dict{String, Dict{String, Any}}())
+TOMLCache(p::TOML.Parser) = TOMLCache(p, Dict{String, CachedTOMLDict}())
+TOMLCache(p::TOML.Parser, d::Dict{String, Dict{String, Any}}) = TOMLCache(p, convert(Dict{String, CachedTOMLDict}, d))
+
+const TOML_CACHE = TOMLCache(TOML.Parser{nothing}())
 
 parsed_toml(project_file::AbstractString) = parsed_toml(project_file, TOML_CACHE, require_lock)
 function parsed_toml(project_file::AbstractString, toml_cache::TOMLCache, toml_lock::ReentrantLock)
@@ -298,66 +311,104 @@ end
 ## package identification: determine unique identity of package to be loaded ##
 
 # Used by Pkg but not used in loading itself
-function find_package(arg)
+function find_package(arg) # ::Union{Nothing,String}
+    @lock require_lock begin
     pkgenv = identify_package_env(arg)
     pkgenv === nothing && return nothing
     pkg, env = pkgenv
     return locate_package(pkg, env)
+    end
+end
+
+# is there a better/faster ground truth?
+function is_stdlib(pkgid::PkgId)
+    pkgid.name in readdir(Sys.STDLIB) || return false
+    stdlib_root = joinpath(Sys.STDLIB, pkgid.name)
+    project_file = locate_project_file(stdlib_root)
+    if project_file isa String
+        d = parsed_toml(project_file)
+        uuid = get(d, "uuid", nothing)
+        if uuid !== nothing
+            return UUID(uuid) == pkgid.uuid
+        end
+    end
+    return false
 end
 
 """
     Base.identify_package_env(name::String)::Union{Tuple{PkgId, String}, Nothing}
-    Base.identify_package_env(where::Union{Module,PkgId}, name::String)::Union{Tuple{PkgId, String} Nothing}
+    Base.identify_package_env(where::Union{Module,PkgId}, name::String)::Union{Tuple{PkgId, Union{String, Nothing}}, Nothing}
 
 Same as [`Base.identify_package`](@ref) except that the path to the environment where the package is identified
-is also returned.
+is also returned, except when the identity is not identified.
 """
 identify_package_env(where::Module, name::String) = identify_package_env(PkgId(where), name)
-function identify_package_env(where::PkgId, name::String)
-    cache = LOADING_CACHE[]
-    if cache !== nothing
-        pkg_env = get(cache.identified_where, (where, name), nothing)
-        pkg_env === nothing || return pkg_env
-    end
-    pkg_env = nothing
-    if where.name === name
-        pkg_env = where, nothing
-    elseif where.uuid === nothing
-        pkg_env = identify_package_env(name) # ignore `where`
-    else
-        for env in load_path()
-            pkgid = manifest_deps_get(env, where, name)
-            pkgid === nothing && continue # not found--keep looking
-            if pkgid.uuid !== nothing
-                pkg_env = pkgid, env # found in explicit environment--use it
-            end
-            break # found in implicit environment--return "not found"
+function identify_package_env(where::Union{PkgId, Nothing}, name::String)
+    # Special cases
+    if where !== nothing
+        if where.name === name
+            # Project tries to load itself
+            return (where, nothing)
+        elseif where.uuid === nothing
+            # Project without Project.toml - treat as toplevel load
+            where = nothing
         end
     end
-    if cache !== nothing
-        cache.identified_where[(where, name)] = pkg_env
-    end
-    return pkg_env
-end
-function identify_package_env(name::String)
+
+    # Check if we have a cached answer for this
+    assert_havelock(require_lock)
     cache = LOADING_CACHE[]
+    cache_key = where === nothing ? name : (where, name)
     if cache !== nothing
-        pkg_env = get(cache.identified, name, nothing)
-        pkg_env === nothing || return pkg_env
+        env_cache = where === nothing ? cache.identified : cache.identified_where
+        pkg_env = get(env_cache, cache_key, missing)
+        pkg_env === missing || return pkg_env
     end
+
+    # Main part: Search through all environments in the load path to see if we have
+    # a matching entry.
     pkg_env = nothing
     for env in load_path()
-        pkg = project_deps_get(env, name)
-        if pkg !== nothing
-            pkg_env = pkg, env # found--return it
-            break
+        pkgid = environment_deps_get(env, where, name)
+        # If we didn't find `where` at all, keep looking through the environment stack
+        pkgid === nothing && continue
+        if pkgid.uuid !== nothing || where === nothing
+            pkg_env = pkgid, env
         end
+        # If we don't have pkgid.uuid, still break here - this is a sentinel that indicates
+        # that we've found `where` but it did not have the required dependency. We terminate the search.
+        break
     end
+    if pkg_env === nothing && where !== nothing && is_stdlib(where)
+        # if not found it could be that manifests are from a different julia version/commit
+        # where stdlib dependencies have changed, so look up deps based on the stdlib Project.toml
+        # as a fallback
+        pkg_env = identify_stdlib_project_dep(where, name)
+    end
+
+    # Cache the result
     if cache !== nothing
-        cache.identified[name] = pkg_env
+        env_cache[cache_key] = pkg_env
     end
     return pkg_env
 end
+identify_package_env(name::String) = identify_package_env(nothing, name)
+
+function identify_stdlib_project_dep(stdlib::PkgId, depname::String)
+    @debug """
+    Stdlib $(repr("text/plain", stdlib)) is trying to load `$depname`
+    which is not listed as a dep in the load path manifests, so resorting to search
+    in the stdlib Project.tomls for true deps"""
+    stdlib_projfile = locate_project_file(joinpath(Sys.STDLIB, stdlib.name))
+    stdlib_projfile === nothing && return nothing
+    found = explicit_project_deps_get(stdlib_projfile, depname)
+    if found !== nothing
+        @debug "$(repr("text/plain", stdlib)) indeed depends on $depname in project $stdlib_projfile"
+        pkgid = PkgId(found, depname)
+        return pkgid, stdlib_projfile
+    end
+    return nothing
+end
 
 _nothing_or_first(x) = x === nothing ? nothing : first(x)
 
@@ -371,13 +422,13 @@ its `PkgId`, or `nothing` if it cannot be found.
 If only the `name` argument is provided, it searches each environment in the
 stack and its named direct dependencies.
 
-There `where` argument provides the context from where to search for the
+The `where` argument provides the context from where to search for the
 package: in this case it first checks if the name matches the context itself,
 otherwise it searches all recursive dependencies (from the resolved manifest of
 each environment) until it locates the context `where`, and from there
 identifies the dependency with the corresponding name.
 
-```julia-repl
+```jldoctest
 julia> Base.identify_package("Pkg") # Pkg is a dependency of the default environment
 Pkg [44cfe95a-1eb2-52ea-b672-e2afdf69b78f]
 
@@ -386,32 +437,34 @@ julia> using LinearAlgebra
 julia> Base.identify_package(LinearAlgebra, "Pkg") # Pkg is not a dependency of LinearAlgebra
 ```
 """
-identify_package(where::Module, name::String) = _nothing_or_first(identify_package_env(where, name))
-identify_package(where::PkgId, name::String)  = _nothing_or_first(identify_package_env(where, name))
-identify_package(name::String)                = _nothing_or_first(identify_package_env(name))
+identify_package(where::Module, name::String) = @lock require_lock _nothing_or_first(identify_package_env(where, name))
+identify_package(where::PkgId, name::String)  = @lock require_lock _nothing_or_first(identify_package_env(where, name))
+identify_package(name::String)                = @lock require_lock _nothing_or_first(identify_package_env(name))
 
-function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
+function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Union{Nothing,Tuple{PkgLoadSpec, String}}
+    assert_havelock(require_lock)
     cache = LOADING_CACHE[]
     if cache !== nothing
-        pathenv = get(cache.located, (pkg, stopenv), nothing)
-        pathenv === nothing || return pathenv
+        specenv = get(cache.located, (pkg, stopenv), missing)
+        specenv === missing || return specenv
     end
-    path = nothing
+    spec = nothing
     env′ = nothing
+    syntax_version = VERSION
     if pkg.uuid === nothing
+        # The project we're looking for does not have a Project.toml (n.b. - present
+        # `Project.toml` without UUID gets a path-based dummy UUID). It must have
+        # come from an implicit manifest environment, so go through those only.
+        # N.B.: Implicitly loaded packages do not participate in syntax versioning.
         for env in load_path()
-            env′ = env
-            # look for the toplevel pkg `pkg.name` in this entry
-            found = project_deps_get(env, pkg.name)
-            if found !== nothing
+            project_file = env_project_file(env)
+            (project_file isa Bool && project_file) || continue
+            found = implicit_manifest_pkgid(env, pkg.name)
+            if found !== nothing && found.uuid === nothing
                 @assert found.name == pkg.name
-                if found.uuid === nothing
-                    # pkg.name is present in this directory or project file,
-                    # return the path the entry point for the code, if it could be found
-                    # otherwise, signal failure
-                    path = implicit_manifest_uuid_path(env, pkg)
-                    @goto done
-                end
+                spec = implicit_manifest_uuid_load_spec(env, pkg)
+                env′ = env
+                @goto done
             end
             if !(loading_extension || precompiling_extension)
                 stopenv == env && @goto done
@@ -419,34 +472,40 @@ function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
         end
     else
         for env in load_path()
-            env′ = env
-            path = manifest_uuid_path(env, pkg)
+            spec = manifest_uuid_load_spec(env, pkg)
             # missing is used as a sentinel to stop looking further down in envs
-            if path === missing
-                path = nothing
+            if spec === missing
+                is_stdlib(pkg) && @goto stdlib_fallback
+                spec = nothing
                 @goto done
             end
-            if path !== nothing
-                path = entry_path(path, pkg.name)
+            if spec !== nothing
+                env′ = env
                 @goto done
             end
             if !(loading_extension || precompiling_extension)
                 stopenv == env && break
             end
         end
+        @label stdlib_fallback
         # Allow loading of stdlibs if the name/uuid are given
         # e.g. if they have been explicitly added to the project/manifest
-        mbypath = manifest_uuid_path(Sys.STDLIB, pkg)
-        if mbypath isa String
-            path = entry_path(mbypath, pkg.name)
+        mbyspec = manifest_uuid_load_spec(Sys.STDLIB, pkg)
+        if mbyspec isa PkgLoadSpec
+            spec = mbyspec
+            env′ = Sys.STDLIB
             @goto done
         end
     end
     @label done
+    if spec !== nothing && !isfile_casesensitive(spec.path)
+        spec = nothing
+    end
     if cache !== nothing
-        cache.located[(pkg, stopenv)] = path, env′
+        cache.located[(pkg, stopenv)] = spec === nothing ? nothing : (spec, something(env′))
     end
-    return path, env′
+    spec === nothing && return nothing
+    return spec, something(env′)
 end
 
 """
@@ -464,7 +523,19 @@ julia> Base.locate_package(pkg)
 ```
 """
 function locate_package(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Union{Nothing,String}
-    _nothing_or_first(locate_package_env(pkg, stopenv))
+    @lock require_lock begin
+        specenv = locate_package_env(pkg, stopenv)
+        specenv === nothing && return nothing
+        specenv[1].path
+    end
+end
+
+function locate_package_load_spec(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Union{Nothing,PkgLoadSpec}
+    @lock require_lock begin
+        specenv = locate_package_env(pkg, stopenv)
+        specenv === nothing && return nothing
+        specenv[1]
+    end
 end
 
 """
@@ -475,11 +546,12 @@ or `nothing` if `m` was not imported from a package.
 
 Use [`dirname`](@ref) to get the directory part and [`basename`](@ref)
 to get the file name part of the path.
+
+See also [`pkgdir`](@ref).
 """
 function pathof(m::Module)
     @lock require_lock begin
-    pkgid = get(module_keys, m, nothing)
-    pkgid === nothing && return nothing
+    pkgid = PkgId(m)
     origin = get(pkgorigins, pkgid, nothing)
     origin === nothing && return nothing
     path = origin.path
@@ -499,6 +571,8 @@ package root.
 To get the root directory of the package that implements the current module
 the form `pkgdir(@__MODULE__)` can be used.
 
+If an extension module is given, the root of the parent package is returned.
+
 ```julia-repl
 julia> pkgdir(Foo)
 "/path/to/Foo.jl"
@@ -507,6 +581,8 @@ julia> pkgdir(Foo, "src", "file.jl")
 "/path/to/Foo.jl/src/file.jl"
 ```
 
+See also [`pathof`](@ref).
+
 !!! compat "Julia 1.7"
     The optional argument `paths` requires at least Julia 1.7.
 """
@@ -514,7 +590,19 @@ function pkgdir(m::Module, paths::String...)
     rootmodule = moduleroot(m)
     path = pathof(rootmodule)
     path === nothing && return nothing
-    return joinpath(dirname(dirname(path)), paths...)
+    original = path
+    path, base = splitdir(dirname(path))
+    if base == "src"
+        # package source in `../src/Foo.jl`
+    elseif base == "ext"
+        # extension source in `../ext/FooExt.jl`
+    elseif basename(path) == "ext"
+        # extension source in `../ext/FooExt/FooExt.jl`
+        path = dirname(path)
+    else
+        error("Unexpected path structure for module source: $original")
+    end
+    return joinpath(path, paths...)
 end
 
 function get_pkgversion_from_path(path)
@@ -532,9 +620,8 @@ end
 """
     pkgversion(m::Module)
 
-Return the version of the package that imported module `m`,
-or `nothing` if `m` was not imported from a package, or imported
-from a package without a version field set.
+If the module `m` belongs to a versioned package, return the
+version number of that package. Otherwise return `nothing`.
 
 The version is read from the package's Project.toml during package
 load.
@@ -562,7 +649,12 @@ end
 ## generic project & manifest API ##
 
 const project_names = ("JuliaProject.toml", "Project.toml")
-const manifest_names = ("JuliaManifest.toml", "Manifest.toml")
+const manifest_names = (
+    "JuliaManifest-v$(VERSION.major).$(VERSION.minor).toml",
+    "Manifest-v$(VERSION.major).$(VERSION.minor).toml",
+    "JuliaManifest.toml",
+    "Manifest.toml",
+)
 const preferences_names = ("JuliaLocalPreferences.toml", "LocalPreferences.toml")
 
 function locate_project_file(env::String)
@@ -600,35 +692,75 @@ function env_project_file(env::String)::Union{Bool,String}
     end
 end
 
-function project_deps_get(env::String, name::String)::Union{Nothing,PkgId}
-    project_file = env_project_file(env)
-    if project_file isa String
-        pkg_uuid = explicit_project_deps_get(project_file, name)
-        pkg_uuid === nothing || return PkgId(pkg_uuid, name)
-    elseif project_file
-        return implicit_project_deps_get(env, name)
+function base_project(project_file)
+    home_dir = abspath(homedir())
+    project_dir = abspath(dirname(project_file))
+    current_dir = project_dir
+    # Only stop at home boundary if we started under home
+    started_in_home = startswith(project_dir, home_dir)
+
+    while true
+        parent_dir = dirname(current_dir)
+        # Stop if we've reached root
+        if parent_dir == current_dir
+            return nothing
+        end
+        # Stop if we started in home and have now left it
+        if started_in_home && !startswith(parent_dir, home_dir)
+            return nothing
+        end
+
+        base_project_file = env_project_file(parent_dir)
+        if base_project_file isa String
+            d = parsed_toml(base_project_file)
+            workspace = get(d, "workspace", nothing)::Union{Dict{String, Any}, Nothing}
+            if workspace !== nothing
+                projects = get(workspace, "projects", nothing)::Union{Vector{String}, Nothing, String}
+                if projects isa Vector
+                    # Check if any project in the workspace matches the original project
+                    workspace_root = dirname(base_project_file)
+                    for project in projects
+                        project_path = joinpath(workspace_root, project)
+                        if isdir(project_path)
+                            if samefile(project_path, project_dir)
+                                return base_project_file
+                            end
+                        end
+                    end
+                end
+            end
+        end
+        current_dir = parent_dir
     end
-    return nothing
 end
 
-function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothing,PkgId}
-    uuid = where.uuid
-    @assert uuid !== nothing
-    project_file = env_project_file(env)
-    if project_file isa String
-        # first check if `where` names the Project itself
+function package_get_here(project_file, name::String)
+    # if `where` matches the project, use [deps] section as manifest, and stop searching
+    pkg_uuid = explicit_project_deps_get(project_file, name)
+    pkg_uuid === nothing && return PkgId(name)
+    return PkgId(pkg_uuid, name)
+end
+
+function package_get(project_file, where::Union{Nothing, PkgId}, name::String)
+    if where !== nothing
         proj = project_file_name_uuid(project_file, where.name)
-        if proj == where
-            # if `where` matches the project, use [deps] section as manifest, and stop searching
-            pkg_uuid = explicit_project_deps_get(project_file, name)
-            return PkgId(pkg_uuid, name)
-        end
-        d = parsed_toml(project_file)
-        exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
-        if exts !== nothing
-            # Check if `where` is an extension of the project
-            if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::UUID, where.name)
-                # Extensions can load weak deps...
+        proj != where && return nothing
+    end
+    return package_get_here(project_file, name)
+end
+
+ext_may_load_weakdep(exts::String, name::String) = exts == name
+ext_may_load_weakdep(exts::Vector{String}, name::String) = name in exts
+
+function package_extension_get(project_file, where::PkgId, name::String)
+    d = parsed_toml(project_file)
+    exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
+    if exts !== nothing
+        proj = project_file_name_uuid(project_file, where.name)
+        # Check if `where` is an extension of the project
+        if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::UUID, where.name)
+            # Extensions can load weak deps if they are an extension trigger
+            if ext_may_load_weakdep(exts[where.name]::Union{String, Vector{String}}, name)
                 weakdeps = get(d, "weakdeps", nothing)::Union{Dict{String, Any}, Nothing}
                 if weakdeps !== nothing
                     wuuid = get(weakdeps, name, nothing)::Union{String, Nothing}
@@ -636,35 +768,110 @@ function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothi
                         return PkgId(UUID(wuuid), name)
                     end
                 end
-                # ... and they can load same deps as the project itself
-                mby_uuid = explicit_project_deps_get(project_file, name)
-                mby_uuid === nothing || return PkgId(mby_uuid, name)
             end
+            # ... and they can load same deps as the project itself
+            return package_get_here(project_file, name)
         end
-        # look for manifest file and `where` stanza
-        return explicit_manifest_deps_get(project_file, where, name)
-    elseif project_file
-        # if env names a directory, search it
-        return implicit_manifest_deps_get(env, where, name)
     end
     return nothing
 end
 
-function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String,Missing}
+function environment_deps_get(env::String, where::Union{Nothing,PkgId}, name::String)::Union{Nothing,PkgId}
+    @assert where === nothing || where.uuid !== nothing
+    project_file = env_project_file(env)
+    implicit_manifest = !(project_file isa String)
+    if implicit_manifest
+        project_file || return nothing
+        if where === nothing
+            # Toplevel load with a directory (implicit manifest) - all we look for is the
+            # existence of the package name in the directory.
+            pkg = implicit_manifest_pkgid(env, name)
+            return pkg
+        end
+        project_file = implicit_manifest_project(env, where)
+        project_file === nothing && return nothing
+    end
+
+    # Are we
+    #    a) loading into a top-level project itself
+    #    b) loading into a non-top-level project that was part of an implicit
+    #       manifest environment (and for which we found the project file above)
+    #    c) performing a top-level load (where === nothing) - i.e. we're looking
+    #       at an environment's project file.
+    #
+    # If so, we may load either:
+    #   I: the project itself (if name matches where)
+    #   II: a dependency from [deps] section of the project file
+    #
+    # N.B.: Here "top-level" includes package loaded from an implicit manifest, which
+    #       uses the same code path. Otherwise this is the active project.
+    pkg = package_get(project_file, where, name)
+    if pkg !== nothing
+        if where === nothing && pkg.uuid === nothing
+            # This is a top-level load - even though we didn't find the dependency
+            # here, we still want to keep looking through the top-level environment stack.
+            return nothing
+        end
+        return pkg
+    end
+
+    @assert where !== nothing
+
+    # Are we an extension of a project from cases a), b) above
+    # If so, in addition to I, II above, we get:
+    #   III: A dependency from [weakdeps] section of the project file as long
+    #        as it is an extension trigger for `where` in the `extensions` section.
+    pkg = package_extension_get(project_file, where, name)
+    pkg === nothing || return pkg
+
+    if implicit_manifest
+        # With an implicit manifest, getting here means that our (implicit) environment
+        # *has* the package `where`. If we don't find it, it just means that `where` doesn't
+        # have `name` as a dependency - c.f. the analogous case in `explicit_manifest_deps_get`.
+        return PkgId(name)
+    end
+
+    # All other cases, dependencies come from the (top-level) manifest
+    return explicit_manifest_deps_get(project_file, where, name)
+end
+
+function manifest_uuid_load_spec(env::String, pkg::PkgId)::Union{Nothing,PkgLoadSpec,Missing}
     project_file = env_project_file(env)
     if project_file isa String
         proj = project_file_name_uuid(project_file, pkg.name)
         if proj == pkg
             # if `pkg` matches the project, return the project itself
-            return project_file_path(project_file)
+            return project_file_load_spec(project_file, pkg.name)
         end
-        mby_ext = project_file_ext_path(project_file, pkg.name)
+        mby_ext = project_file_ext_load_spec(project_file, pkg)
         mby_ext === nothing || return mby_ext
         # look for manifest file and `where` stanza
-        return explicit_manifest_uuid_path(project_file, pkg)
+        return explicit_manifest_uuid_load_spec(project_file, pkg)
     elseif project_file
         # if env names a directory, search it
-        return implicit_manifest_uuid_path(env, pkg)
+        # Implicit environments do not participate in syntax versioning
+        proj = implicit_manifest_uuid_load_spec(env, pkg)
+        proj === nothing || return proj
+        # if not found, this might be an extension - first we fast path needing
+        # to scan the whole directory for a matching extension by peeking at
+        # EXT_PRIMED. However, this only works if the parent package was loaded.
+        # This is usually the case, but not always, e.g. in precompilation.
+        triggers = get(EXT_PRIMED, pkg, nothing)
+        if triggers !== nothing
+            parentid = triggers[1]
+            _, parent_project_file = entry_point_and_project_file(env, parentid.name)
+            if parent_project_file !== nothing
+                parentproj = project_file_name_uuid(parent_project_file, parentid.name)
+                if parentproj == parentid
+                    mby_ext = project_file_ext_load_spec(parent_project_file, pkg)
+                    mby_ext === nothing || return mby_ext
+                end
+            end
+        else
+            # We still need to scan the whole directory for extensions.
+            ext_ls, ext_proj = implicit_env_project_file_extension(env, pkg)
+            ext_ls === nothing || return ext_ls
+        end
     end
     return nothing
 end
@@ -676,13 +883,14 @@ function find_ext_path(project_path::String, extname::String)
     return joinpath(project_path, "ext", extname * ".jl")
 end
 
-function project_file_ext_path(project_file::String, name::String)
+function project_file_ext_load_spec(project_file::String, ext::PkgId)
     d = parsed_toml(project_file)
-    p = project_file_path(project_file)
+    p = dirname(project_file)
     exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
     if exts !== nothing
-        if name in keys(exts)
-            return find_ext_path(p, name)
+        if ext.name in keys(exts) && ext.uuid == uuid5(UUID(d["uuid"]::String), ext.name)
+            # Syntax version of the main package applies to its extensions
+            return PkgLoadSpec(find_ext_path(p, ext.name), project_get_syntax_version(d))
         end
     end
     return nothing
@@ -697,9 +905,55 @@ function project_file_name_uuid(project_file::String, name::String)::PkgId
     return PkgId(uuid, name)
 end
 
-function project_file_path(project_file::String)
+const NON_VERSIONED_SYNTAX = v"1.13"
+
+function project_get_syntax_version(d::Dict)
+    # Syntax Evolution. First check syntax.julia_version entry
+    sv = nothing
+    ds = get(d, "syntax", nothing)
+    if ds !== nothing
+        sv = VersionNumber(get(ds, "julia_version", nothing))
+    end
+    # If not found, default to minimum(compat["julia"])
+    if sv === nothing
+        cs = get(d, "compat", nothing)
+        if cs !== nothing
+            jv = get(cs, "julia", nothing)
+            if jv !== nothing
+                sv = VersionNumber(minimum(semver_spec(jv)).t...)
+            end
+        end
+    end
+    # Finally, if neither of those are set, default to the current Julia version.
+    # N.B.: This choice is less "compatible" than defaulting to a fixed older version.
+    # However, it avoids surprises from moving over scripts and REPL code to packages
+    if sv === nothing
+        sv = VERSION
+    elseif sv <= NON_VERSIONED_SYNTAX
+        # Syntax versioning was first introduced in Julia 1.14 - we do not support
+        # going back to versions before syntax version 1.13.
+        sv = NON_VERSIONED_SYNTAX
+    end
+    return sv
+end
+
+function project_file_load_spec(project_file::String, name::String)
     d = parsed_toml(project_file)
-    joinpath(dirname(project_file), get(d, "path", "")::String)
+    entryfile = get(d, "path", nothing)::Union{String, Nothing}
+    # "path" entry in project file is soft deprecated
+    if entryfile === nothing
+        entryfile = get(d, "entryfile", nothing)::Union{String, Nothing}
+    end
+    sv = project_get_syntax_version(d)
+    return PkgLoadSpec(entry_path(dirname(project_file), name, entryfile), sv)
+end
+
+function workspace_manifest(project_file)
+    base = base_project(project_file)
+    if base !== nothing
+        return project_file_manifest_path(base)
+    end
+    return nothing
 end
 
 # find project file's corresponding manifest file
@@ -711,7 +965,12 @@ function project_file_manifest_path(project_file::String)::Union{Nothing,String}
         manifest_path === missing || return manifest_path
     end
     dir = abspath(dirname(project_file))
+    isfile_casesensitive(project_file) || return nothing
     d = parsed_toml(project_file)
+    base_manifest = workspace_manifest(project_file)
+    if base_manifest !== nothing
+        return base_manifest
+    end
     explicit_manifest = get(d, "manifest", nothing)::Union{String, Nothing}
     manifest_path = nothing
     if explicit_manifest !== nothing
@@ -752,33 +1011,47 @@ end
 # given a project directory (implicit env from LOAD_PATH) and a name,
 # find an entry point for `name`, and see if it has an associated project file
 function entry_point_and_project_file(dir::String, name::String)::Union{Tuple{Nothing,Nothing},Tuple{String,Nothing},Tuple{String,String}}
-    path = normpath(joinpath(dir, "$name.jl"))
-    isfile_casesensitive(path) && return path, nothing
-    dir = joinpath(dir, name)
-    path, project_file = entry_point_and_project_file_inside(dir, name)
+    dir_name = joinpath(dir, name)
+    path, project_file = entry_point_and_project_file_inside(dir_name, name)
     path === nothing || return path, project_file
-    dir = dir * ".jl"
-    path, project_file = entry_point_and_project_file_inside(dir, name)
+    dir_jl = dir_name * ".jl"
+    path, project_file = entry_point_and_project_file_inside(dir_jl, name)
     path === nothing || return path, project_file
+    # check for less likely case with a bare file and no src directory last to minimize stat calls
+    path = normpath(joinpath(dir, "$name.jl"))
+    isfile_casesensitive(path) && return path, nothing
+    return nothing, nothing
+end
+
+# Find the project file for the extension `ext` in the implicit env `dir``
+function implicit_env_project_file_extension(dir::String, ext::PkgId)
+    for pkg in readdir(dir; join=true)
+        project_file = env_project_file(pkg)
+        project_file isa String || continue
+        ls = project_file_ext_load_spec(project_file, ext)
+        if ls !== nothing
+            return ls, project_file
+        end
+    end
     return nothing, nothing
 end
 
-# given a path and a name, return the entry point
-function entry_path(path::String, name::String)::Union{Nothing,String}
+# given a path, name, and possibly an entryfile, return the entry point
+function entry_path(path::String, name::String, entryfile::Union{Nothing,String})::String
     isfile_casesensitive(path) && return normpath(path)
-    path = normpath(joinpath(path, "src", "$name.jl"))
-    isfile_casesensitive(path) && return path
-    return nothing # source not found
+    entrypoint = entryfile === nothing ? joinpath("src", "$name.jl") : entryfile
+    return normpath(joinpath(path, entrypoint))
 end
 
 ## explicit project & manifest API ##
 
 # find project file root or deps `name => uuid` mapping
+# `ext` is the name of the extension if `name` is loaded from one
 # return `nothing` if `name` is not found
 function explicit_project_deps_get(project_file::String, name::String)::Union{Nothing,UUID}
     d = parsed_toml(project_file)
-    root_uuid = dummy_uuid(project_file)
     if get(d, "name", nothing)::Union{String, Nothing} === name
+        root_uuid = dummy_uuid(project_file)
         uuid = get(d, "uuid", nothing)::Union{String, Nothing}
         return uuid === nothing ? root_uuid : UUID(uuid)
     end
@@ -813,85 +1086,96 @@ function get_deps(raw_manifest::Dict)
     end
 end
 
-# find `where` stanza and return the PkgId for `name`
-# return `nothing` if it did not find `where` (indicating caller should continue searching)
+function dep_stanza_get(stanza::Dict{String, Any}, name::String)::Union{Nothing, PkgId}
+    for (dep, uuid) in stanza
+        uuid::String
+        if dep === name
+            return PkgId(UUID(uuid), name)
+        end
+    end
+    return nothing
+end
+
+function dep_stanza_get(stanza::Vector{String}, name::String)::Union{Nothing, PkgId}
+    name in stanza && return PkgId(name)
+    return nothing
+end
+
+dep_stanza_get(stanza::Nothing, name::String) = nothing
+
 function explicit_manifest_deps_get(project_file::String, where::PkgId, name::String)::Union{Nothing,PkgId}
     manifest_file = project_file_manifest_path(project_file)
     manifest_file === nothing && return nothing # manifest not found--keep searching LOAD_PATH
     d = get_deps(parsed_toml(manifest_file))
-    found_where = false
-    found_name = false
     for (dep_name, entries) in d
         entries::Vector{Any}
         for entry in entries
             entry = entry::Dict{String, Any}
             uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
             uuid === nothing && continue
+            # deps is either a list of names (deps = ["DepA", "DepB"]) or
+            # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."}
+            deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}
+            local dep::Union{Nothing, PkgId}
             if UUID(uuid) === where.uuid
-                found_where = true
-                # deps is either a list of names (deps = ["DepA", "DepB"]) or
-                # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."}
-                deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}
-                if deps isa Vector{String}
-                    found_name = name in deps
-                    break
-                elseif deps isa Dict{String, Any}
-                    deps = deps::Dict{String, Any}
-                    for (dep, uuid) in deps
-                        uuid::String
-                        if dep === name
-                            return PkgId(UUID(uuid), name)
-                        end
-                    end
-                end
-            else # Check for extensions
+                dep = dep_stanza_get(deps, name)
+
+                # We found `where` in this environment, but it did not have a deps entry for
+                # `name`. This is likely because the dependency was modified without a corresponding
+                # change to dependency's Project or our Manifest. Return a sentinel here indicating
+                # that we know the package, but do not know its UUID. The caller will terminate the
+                # search and provide an appropriate error to the user.
+                dep === nothing && return PkgId(name)
+            else
+                # Check if we're trying to load into an extension of this package
                 extensions = get(entry, "extensions", nothing)
                 if extensions !== nothing
                     if haskey(extensions, where.name) && where.uuid == uuid5(UUID(uuid), where.name)
-                        found_where = true
                         if name == dep_name
+                            # Extension loads its base package
                             return PkgId(UUID(uuid), name)
                         end
                         exts = extensions[where.name]::Union{String, Vector{String}}
-                        if (exts isa String && name == exts) || (exts isa Vector{String} && name in exts)
-                            weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}
-                            if weakdeps !== nothing
-                                if weakdeps isa Vector{String}
-                                    found_name = name in weakdeps
-                                    break
-                                elseif weakdeps isa Dict{String, Any}
-                                    weakdeps = weakdeps::Dict{String, Any}
-                                    for (dep, uuid) in weakdeps
-                                        uuid::String
-                                        if dep === name
-                                            return PkgId(UUID(uuid), name)
-                                        end
-                                    end
-                                end
-                            end
+                        # Extensions are allowed to load:
+                        # 1. Any ordinary dep of the parent package
+                        # 2. Any weakdep of the parent package declared as an extension trigger
+                        for deps′ in (ext_may_load_weakdep(exts, name) ?
+                                (get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}, deps) :
+                                (deps,))
+                            dep = dep_stanza_get(deps′, name)
+                            dep === nothing && continue
+                            @goto have_dep
                         end
-                        # `name` is not an ext, do standard lookup as if this was the parent
-                        return identify_package(PkgId(UUID(uuid), dep_name), name)
+                        return PkgId(name)
                     end
                 end
+                continue
+            end
+
+            @label have_dep
+            dep.uuid !== nothing && return dep
+
+            # We have the dep, but it did not specify a UUID. In this case,
+            # it must be that the name is unique in the manifest - so lookup
+            # the UUID at the lop level by name
+            name_deps = get(d, name, nothing)::Union{Nothing, Vector{Any}}
+            if name_deps === nothing || length(name_deps) != 1
+                error("expected a single entry for $(repr(name)) in $(repr(project_file))")
             end
+            entry = first(name_deps::Vector{Any})::Dict{String, Any}
+            uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
+            uuid === nothing && return PkgId(name)
+            return PkgId(UUID(uuid), name)
         end
     end
-    found_where || return nothing
-    found_name || return PkgId(name)
-    # Only reach here if deps was not a dict which mean we have a unique name for the dep
-    name_deps = get(d, name, nothing)::Union{Nothing, Vector{Any}}
-    if name_deps === nothing || length(name_deps) != 1
-        error("expected a single entry for $(repr(name)) in $(repr(project_file))")
-    end
-    entry = first(name_deps::Vector{Any})::Dict{String, Any}
-    uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
-    uuid === nothing && return nothing
-    return PkgId(UUID(uuid), name)
+
+    # We did not find `where` in this environment, either as a package or as an extension.
+    # The caller should continue searching the environment stack.
+    return nothing
 end
 
 # find `uuid` stanza, return the corresponding path
-function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{Nothing,String,Missing}
+function explicit_manifest_uuid_load_spec(project_file::String, pkg::PkgId)::Union{Nothing,PkgLoadSpec,Missing}
     manifest_file = project_file_manifest_path(project_file)
     manifest_file === nothing && return nothing # no manifest, skip env
 
@@ -903,7 +1187,7 @@ function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{No
             uuid = get(entry, "uuid", nothing)::Union{Nothing, String}
             uuid === nothing && continue
             if UUID(uuid) === pkg.uuid
-                return explicit_manifest_entry_path(manifest_file, pkg, entry)
+                return explicit_manifest_entry_load_spec(manifest_file, pkg, entry)
             end
         end
     end
@@ -911,36 +1195,64 @@ function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{No
     for (name, entries) in d
         entries = entries::Vector{Any}
         for entry in entries
+            entry = entry::Dict{String, Any}
             uuid = get(entry, "uuid", nothing)::Union{Nothing, String}
             extensions = get(entry, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
             if extensions !== nothing && haskey(extensions, pkg.name) && uuid !== nothing && uuid5(UUID(uuid), pkg.name) == pkg.uuid
-                parent_path = locate_package(PkgId(UUID(uuid), name))
-                if parent_path === nothing
+                parent_load_spec = explicit_manifest_entry_load_spec(manifest_file, PkgId(UUID(uuid), name), entry)
+                if parent_load_spec === nothing || parent_load_spec === missing
                     error("failed to find source of parent package: \"$name\"")
                 end
+                parent_path = parent_load_spec.path
                 p = normpath(dirname(parent_path), "..")
-                return find_ext_path(p, pkg.name)
+                return PkgLoadSpec(find_ext_path(p, pkg.name), parent_load_spec.julia_syntax_version)
             end
         end
     end
     return nothing
 end
 
-function explicit_manifest_entry_path(manifest_file::String, pkg::PkgId, entry::Dict{String,Any})
+function explicit_manifest_entry_load_spec(manifest_file::String, pkg::PkgId, entry::Dict{String,Any})::Union{Nothing, Missing, PkgLoadSpec}
+    # Resolve syntax version. N.B.: Unlike in project files, an absent syntax.julia_version
+    # entry in manifest files means defaulting to 1.13. This is because we assume the
+    # manifest was created by an older version of julia that did not support syntax versioning.
+    # Newer versions of Pkg will provide syntax version information in the manifest,
+    # even if absent from the project file.
+    syntax_version = NON_VERSIONED_SYNTAX
+    syntax_table = get(entry, "syntax", nothing)
+    if syntax_table !== nothing
+        syntax_version = VersionNumber(get(syntax_table, "julia_version", nothing))
+        # Clamp to minimum supported syntax version
+        if syntax_version <= NON_VERSIONED_SYNTAX
+            syntax_version = NON_VERSIONED_SYNTAX
+        end
+    end
+
+    # Resolve path
     path = get(entry, "path", nothing)::Union{Nothing, String}
+    entryfile = get(entry, "entryfile", nothing)::Union{Nothing, String}
     if path !== nothing
-        path = normpath(abspath(dirname(manifest_file), path))
-        return path
+        path = entry_path(normpath(abspath(dirname(manifest_file), path)), pkg.name, entryfile)
+        return PkgLoadSpec(path, syntax_version)
     end
     hash = get(entry, "git-tree-sha1", nothing)::Union{Nothing, String}
-    hash === nothing && return nothing
+    if hash === nothing
+        # stdlibs do not have a git-hash so cannot be loaded from depots. As
+        # a special case, we allow loading these directly from the stdlib location
+        # (treated as an implicit environment).
+        mbyspec = manifest_uuid_load_spec(Sys.STDLIB, pkg)
+        if mbyspec isa PkgLoadSpec && isfile(mbyspec.path)
+            return mbyspec
+        end
+        return nothing
+    end
     hash = SHA1(hash)
     # Keep the 4 since it used to be the default
     uuid = pkg.uuid::UUID # checked within `explicit_manifest_uuid_path`
     for slug in (version_slug(uuid, hash), version_slug(uuid, hash, 4))
         for depot in DEPOT_PATH
             path = joinpath(depot, "packages", pkg.name, slug)
-            ispath(path) && return abspath(path)
+            ispath(path) && return PkgLoadSpec(entry_path(abspath(path), pkg.name, entryfile), syntax_version)
         end
     end
     # no depot contains the package, return missing to stop looking
@@ -948,10 +1260,7 @@ function explicit_manifest_entry_path(manifest_file::String, pkg::PkgId, entry::
 end
 
 ## implicit project & manifest API ##
-
-# look for an entry point for `name` from a top-level package (no environment)
-# otherwise return `nothing` to indicate the caller should keep searching
-function implicit_project_deps_get(dir::String, name::String)::Union{Nothing,PkgId}
+function implicit_manifest_pkgid(dir::String, name::String)::Union{Nothing,PkgId}
     path, project_file = entry_point_and_project_file(dir, name)
     if project_file === nothing
         path === nothing && return nothing
@@ -962,30 +1271,29 @@ function implicit_project_deps_get(dir::String, name::String)::Union{Nothing,Pkg
     return proj
 end
 
-# look for an entry-point for `name`, check that UUID matches
-# if there's a project file, look up `name` in its deps and return that
-# otherwise return `nothing` to indicate the caller should keep searching
-function implicit_manifest_deps_get(dir::String, where::PkgId, name::String)::Union{Nothing,PkgId}
-    @assert where.uuid !== nothing
-    project_file = entry_point_and_project_file(dir, where.name)[2]
-    project_file === nothing && return nothing # a project file is mandatory for a package with a uuid
-    proj = project_file_name_uuid(project_file, where.name)
-    proj == where || return nothing # verify that this is the correct project file
-    # this is the correct project, so stop searching here
-    pkg_uuid = explicit_project_deps_get(project_file, name)
-    return PkgId(pkg_uuid, name)
+function implicit_manifest_project(dir::String, pkg::PkgId)::Union{Nothing, String}
+    @assert pkg.uuid !== nothing
+    project_file = entry_point_and_project_file(dir, pkg.name)[2]
+    if project_file === nothing
+        # `where` could be an extension
+        return implicit_env_project_file_extension(dir, pkg)[2]
+    end
+    proj = project_file_name_uuid(project_file, pkg.name)
+    proj == pkg || return nothing
+    return project_file
 end
 
 # look for an entry-point for `pkg` and return its path if UUID matches
-function implicit_manifest_uuid_path(dir::String, pkg::PkgId)::Union{Nothing,String}
+function implicit_manifest_uuid_load_spec(dir::String, pkg::PkgId)::Union{Nothing, PkgLoadSpec}
     path, project_file = entry_point_and_project_file(dir, pkg.name)
     if project_file === nothing
         pkg.uuid === nothing || return nothing
-        return path
+        # Without a project file, treat as empty - which defaults to VERSION
+        return PkgLoadSpec(path, VERSION)
     end
     proj = project_file_name_uuid(project_file, pkg.name)
     proj == pkg || return nothing
-    return path
+    return PkgLoadSpec(path, project_get_syntax_version(parsed_toml(project_file)))
 end
 
 ## other code loading functionality ##
@@ -996,16 +1304,20 @@ function find_source_file(path::AbstractString)
     return isfile(base_path) ? normpath(base_path) : nothing
 end
 
-cache_file_entry(pkg::PkgId) = joinpath(
-    "compiled",
-    "v$(VERSION.major).$(VERSION.minor)",
-    pkg.uuid === nothing ? ""       : pkg.name),
-    pkg.uuid === nothing ? pkg.name : package_slug(pkg.uuid)
+function cache_file_entry(pkg::PkgId)
+    uuid = pkg.uuid
+    return joinpath(
+        "compiled",
+        "v$(VERSION.major).$(VERSION.minor)",
+        uuid === nothing ? ""       : pkg.name),
+        uuid === nothing ? pkg.name : package_slug(uuid)
+end
 
-function find_all_in_cache_path(pkg::PkgId)
+function find_all_in_cache_path(pkg::PkgId, DEPOT_PATH::typeof(DEPOT_PATH)=DEPOT_PATH)
     paths = String[]
     entrypath, entryfile = cache_file_entry(pkg)
-    for path in joinpath.(DEPOT_PATH, entrypath)
+    for path in DEPOT_PATH
+        path = joinpath(path, entrypath)
         isdir(path) || continue
         for file in readdir(path, sort = false) # no sort given we sort later
             if !((pkg.uuid === nothing && file == entryfile * ".ji") ||
@@ -1018,89 +1330,196 @@ function find_all_in_cache_path(pkg::PkgId)
         end
     end
     if length(paths) > 1
-        # allocating the sort vector is less expensive than using sort!(.. by=mtime), which would
-        # call the relatively slow mtime multiple times per path
-        p = sortperm(mtime.(paths), rev = true)
+        function sort_by(path)
+            # when using pkgimages, consider those cache files first
+            pkgimage = if JLOptions().use_pkgimages != 0
+                io = open(path, "r")
+                try
+                    if iszero(isvalid_cache_header(io))
+                        false
+                    else
+                        _, _, _, _, _, _, _, flags = parse_cache_header(io, path)
+                        CacheFlags(flags).use_pkgimages
+                    end
+                finally
+                    close(io)
+                end
+            else
+                false
+            end
+            (; pkgimage, mtime=mtime(path))
+        end
+        function sort_lt(a, b)
+            if a.pkgimage != b.pkgimage
+                return a.pkgimage < b.pkgimage
+            end
+            return a.mtime < b.mtime
+        end
+
+        # allocating the sort vector is less expensive than using sort!(.. by=sort_by),
+        # which would call the relatively slow mtime multiple times per path
+        p = sortperm(sort_by.(paths), lt=sort_lt, rev=true)
         return paths[p]
     else
         return paths
     end
 end
 
-ocachefile_from_cachefile(cachefile) = string(chopsuffix(cachefile, ".ji"), ".", Base.Libc.dlext)
-cachefile_from_ocachefile(cachefile) = string(chopsuffix(cachefile, ".$(Base.Libc.dlext)"), ".ji")
+ocachefile_from_cachefile(cachefile) = string(chopsuffix(cachefile, ".ji"), ".", Libc.Libdl.dlext)
+cachefile_from_ocachefile(cachefile) = string(chopsuffix(cachefile, ".$(Libc.Libdl.dlext)"), ".ji")
 
 
 # use an Int counter so that nested @time_imports calls all remain open
 const TIMING_IMPORTS = Threads.Atomic{Int}(0)
 
+# loads a precompile cache file, ignoring stale_cachefile tests
+# assuming all depmods are already loaded and everything is valid
 # these return either the array of modules loaded from the path / content given
 # or an Exception that describes why it couldn't be loaded
 # and it reconnects the Base.Docs.META
-function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any})
+function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}; register::Bool=true)
     assert_havelock(require_lock)
     timing_imports = TIMING_IMPORTS[] > 0
     try
-    if timing_imports
-        t_before = time_ns()
-        cumulative_compile_timing(true)
-        t_comp_before = cumulative_compile_time_ns()
-    end
+        if timing_imports
+            t_before = time_ns()
+            cumulative_compile_timing(true)
+            t_comp_before = cumulative_compile_time_ns()
+        end
 
-    if ocachepath !== nothing
-        @debug "Loading object cache file $ocachepath for $pkg"
-        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring), ocachepath, depmods, false, pkg.name)
-    else
-        @debug "Loading cache file $path for $pkg"
-        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring), path, depmods, false, pkg.name)
-    end
-    if isa(sv, Exception)
-        return sv
-    end
+        for i in eachindex(depmods)
+            dep = depmods[i]
+            dep isa Module && continue
+            _, depkey, depbuild_id = dep::Tuple{PkgLoadSpec, PkgId, UInt128}
+            dep = something(maybe_loaded_precompile(depkey, depbuild_id))
+            @assert PkgId(dep) == depkey && module_build_id(dep) === depbuild_id
+            depmods[i] = dep
+        end
 
-    restored = register_restored_modules(sv, pkg, path)
+        ignore_native = false
+        unlock(require_lock) # temporarily _unlock_ during these operations
+        sv = try
+            if ocachepath !== nothing
+                @debug "Loading object cache file $ocachepath for $(repr("text/plain", pkg))"
+                ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring, Cint),
+                    ocachepath, depmods, #=completeinfo=#false, pkg.name, ignore_native)
+            else
+                @debug "Loading cache file $path for $(repr("text/plain", pkg))"
+                ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring),
+                    path, depmods, #=completeinfo=#false, pkg.name)
+            end
+        finally
+            lock(require_lock)
+        end
+        if isa(sv, Exception)
+            return sv
+        end
 
-    for M in restored
-        M = M::Module
-        if parentmodule(M) === M && PkgId(M) == pkg
-            if timing_imports
-                elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
-                comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
-                print(lpad(elapsed, 9), " ms  ")
-                parentid = get(EXT_PRIMED, pkg, nothing)
-                if parentid !== nothing
-                    print(parentid.name, " → ")
-                end
-                print(pkg.name)
-                if comp_time > 0
-                    printstyled(" ", Ryu.writefixed(Float64(100 * comp_time / (elapsed * 1e6)), 2), "% compilation time", color = Base.info_color())
-                end
-                if recomp_time > 0
-                    perc = Float64(100 * recomp_time / comp_time)
-                    printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
+        sv = sv::SimpleVector
+        internal_methods = sv[3]::Vector{Any}
+        Compiler.@zone "CC: INSERT_BACKEDGES" begin
+            ReinferUtils.insert_backedges_typeinf(internal_methods)
+        end
+        restored = register_restored_modules(sv, pkg, path)
+
+        for M in restored
+            M = M::Module
+            if is_root_module(M) && PkgId(M) == pkg
+                register && register_root_module(M)
+                if timing_imports
+                    elapsed_time = time_ns() - t_before
+                    comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
+                    print_time_imports_report(M, elapsed_time, comp_time, recomp_time)
                 end
-                println()
+                return M
             end
-            return M
         end
-    end
-    return ErrorException("Required dependency $pkg failed to load from a cache file.")
+        return ErrorException("Required dependency $(repr("text/plain", pkg)) failed to load from a cache file.")
 
     finally
         timing_imports && cumulative_compile_timing(false)
     end
 end
 
+# printing functions for @time_imports
+# note that the time inputs are UInt64 on all platforms. Give default values here so that we don't have
+# confusing UInt64 types in generate_precompile.jl
+function print_time_imports_report(
+        mod::Module,
+        elapsed_time::UInt64=UInt64(1),
+        comp_time::UInt64=UInt64(1),
+        recomp_time::UInt64=UInt64(1)
+    )
+    print(lpad(round(elapsed_time / 1e6, digits=1), 9), " ms  ")
+    ext_parent = extension_parent_name(mod)
+    if ext_parent !== nothing
+        print(ext_parent::String, " → ")
+    end
+    print(string(mod))
+    if comp_time > 0
+        perc = Ryu.writefixed(Float64(100 * comp_time / (elapsed_time)), 2)
+        printstyled(" $perc% compilation time", color = Base.info_color())
+    end
+    if recomp_time > 0
+        perc = Float64(100 * recomp_time / comp_time)
+        perc_show = perc < 1 ? "<1" : Ryu.writefixed(perc, 0)
+        printstyled(" ($perc_show% recompilation)", color = Base.warn_color())
+    end
+    println()
+end
+function print_time_imports_report_init(
+        mod::Module, i::Int=1,
+        elapsed_time::UInt64=UInt64(1),
+        comp_time::UInt64=UInt64(1),
+        recomp_time::UInt64=UInt64(1)
+    )
+    connector = i > 1 ? "├" : "┌"
+    printstyled("               $connector ", color = :light_black)
+    print("$(round(elapsed_time / 1e6, digits=1)) ms $mod.__init__() ")
+    if comp_time > 0
+        perc = Ryu.writefixed(Float64(100 * (comp_time) / elapsed_time), 2)
+        printstyled("$perc% compilation time", color = Base.info_color())
+    end
+    if recomp_time > 0
+        perc = Float64(100 * recomp_time / comp_time)
+        printstyled(" ($(perc < 1 ? "<1" : Ryu.writefixed(perc, 0))% recompilation)", color = Base.warn_color())
+    end
+    println()
+end
+
+# if M is an extension, return the string name of the parent. Otherwise return nothing
+function extension_parent_name(M::Module)
+    rootmodule = moduleroot(M)
+    src_path = pathof(rootmodule)
+    src_path === nothing && return nothing
+    pkgdir_parts = splitpath(src_path)
+    ext_pos = findlast(==("ext"), pkgdir_parts)
+    if ext_pos !== nothing && ext_pos >= length(pkgdir_parts) - 2
+        parent_package_root = joinpath(pkgdir_parts[1:ext_pos-1]...)
+        parent_package_project_file = locate_project_file(parent_package_root)
+        if parent_package_project_file isa String
+            d = parsed_toml(parent_package_project_file)
+            name = get(d, "name", nothing)
+            if name !== nothing
+                return name
+            end
+        end
+    end
+    return nothing
+end
+
 function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String)
     # This function is also used by PkgCacheInspector.jl
+    assert_havelock(require_lock)
     restored = sv[1]::Vector{Any}
     for M in restored
         M = M::Module
-        if isdefined(M, Base.Docs.META) && getfield(M, Base.Docs.META) !== nothing
+        if isdefinedglobal(M, Base.Docs.META)
             push!(Base.Docs.modules, M)
         end
-        if parentmodule(M) === M
-            register_root_module(M)
+        if is_root_module(M)
+            push!(loaded_modules_order, M)
+            push!(get!(Vector{Module}, loaded_precompiles, pkg), M)
         end
     end
 
@@ -1126,31 +1545,18 @@ function run_module_init(mod::Module, i::Int=1)
     # `i` informs ordering for the `@time_imports` report formatting
     if TIMING_IMPORTS[] == 0
         ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
-    else
-        if isdefined(mod, :__init__)
-            connector = i > 1 ? "├" : "┌"
-            printstyled("               $connector ", color = :light_black)
-
-            elapsedtime = time_ns()
-            cumulative_compile_timing(true)
-            compile_elapsedtimes = cumulative_compile_time_ns()
+    elseif isdefined(mod, :__init__)
+        elapsed_time = time_ns()
+        cumulative_compile_timing(true)
+        compile_elapsedtimes = cumulative_compile_time_ns()
 
-            ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
+        ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
 
-            elapsedtime = (time_ns() - elapsedtime) / 1e6
-            cumulative_compile_timing(false);
-            comp_time, recomp_time = (cumulative_compile_time_ns() .- compile_elapsedtimes) ./ 1e6
+        elapsed_time = time_ns() - elapsed_time
+        cumulative_compile_timing(false);
+        comp_time, recomp_time = cumulative_compile_time_ns() .- compile_elapsedtimes
 
-            print(round(elapsedtime, digits=1), " ms $mod.__init__() ")
-            if comp_time > 0
-                printstyled(Ryu.writefixed(Float64(100 * comp_time / elapsedtime), 2), "% compilation time", color = Base.info_color())
-            end
-            if recomp_time > 0
-                perc = Float64(100 * recomp_time / comp_time)
-                printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
-            end
-            println()
-        end
+        print_time_imports_report_init(mod, i, elapsed_time, comp_time, recomp_time)
     end
 end
 
@@ -1180,10 +1586,11 @@ end
 mutable struct ExtensionId
     const id::PkgId
     const parentid::PkgId # just need the name, for printing
+    const n_total_triggers::Int
     ntriggers::Int # how many more packages must be defined until this is loaded
 end
 
-const EXT_PRIMED = Dict{PkgId, PkgId}() # Extension -> Parent
+const EXT_PRIMED = Dict{PkgId,Vector{PkgId}}() # Extension -> Parent + Triggers (parent is always first)
 const EXT_DORMITORY = Dict{PkgId,Vector{ExtensionId}}() # Trigger -> Extensions that can be triggered by it
 const EXT_DORMITORY_FAILED = ExtensionId[]
 
@@ -1191,30 +1598,35 @@ function insert_extension_triggers(pkg::PkgId)
     pkg.uuid === nothing && return
     path_env_loc = locate_package_env(pkg)
     path_env_loc === nothing && return
-    path, env_loc = path_env_loc
-    if path === nothing || env_loc === nothing
-        return
-    end
+    _, env_loc = path_env_loc
     insert_extension_triggers(env_loc, pkg)
 end
 
 function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missing}
     project_file = env_project_file(env)
-    if project_file isa String
+    if project_file isa String || project_file
+        implicit_project_file = project_file
+        if !(implicit_project_file isa String)
+            # if env names a directory, search it for an implicit project file (for stdlibs)
+            path, implicit_project_file = entry_point_and_project_file(env, pkg.name)
+            if !(implicit_project_file isa String)
+                return nothing
+            end
+        end
         # Look in project for extensions to insert
-        proj_pkg = project_file_name_uuid(project_file, pkg.name)
+        proj_pkg = project_file_name_uuid(implicit_project_file, pkg.name)
         if pkg == proj_pkg
-            d_proj = parsed_toml(project_file)
-            weakdeps = get(d_proj, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}}
+            d_proj = parsed_toml(implicit_project_file)
             extensions = get(d_proj, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
             extensions === nothing && return
-            weakdeps === nothing && return
-            if weakdeps isa Dict{String, Any}
-                return _insert_extension_triggers(pkg, extensions, weakdeps)
-            end
+            weakdeps = get(Dict{String, Any}, d_proj, "weakdeps")::Dict{String,Any}
+            deps = get(Dict{String, Any}, d_proj, "deps")::Dict{String,Any}
+            total_deps = merge(weakdeps, deps)
+            return _insert_extension_triggers(pkg, extensions, total_deps)
         end
 
         # Now look in manifest
+        project_file isa String || return nothing
         manifest_file = project_file_manifest_path(project_file)
         manifest_file === nothing && return
         d = get_deps(parsed_toml(manifest_file))
@@ -1225,27 +1637,35 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi
                 uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
                 uuid === nothing && continue
                 if UUID(uuid) == pkg.uuid
-                    weakdeps = get(entry, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}}
                     extensions = get(entry, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
                     extensions === nothing && return
-                    weakdeps === nothing && return
-                    if weakdeps isa Dict{String, Any}
-                        return _insert_extension_triggers(pkg, extensions, weakdeps)
+                    weakdeps = get(Dict{String, Any}, entry, "weakdeps")::Union{Vector{String}, Dict{String,Any}}
+                    deps = get(Dict{String, Any}, entry, "deps")::Union{Vector{String}, Dict{String,Any}}
+
+                    function expand_deps_list(deps′::Vector{String})
+                        deps′_expanded = Dict{String, Any}()
+                        for (dep_name, entries) in d
+                            dep_name in deps′ || continue
+                            entries::Vector{Any}
+                            if length(entries) != 1
+                                error("expected a single entry for $(repr(dep_name)) in $(repr(project_file))")
+                            end
+                            entry = first(entries)::Dict{String, Any}
+                            uuid = entry["uuid"]::String
+                            deps′_expanded[dep_name] = uuid
+                        end
+                        return deps′_expanded
                     end
 
-                    d_weakdeps = Dict{String, Any}()
-                    for (dep_name, entries) in d
-                        dep_name in weakdeps || continue
-                        entries::Vector{Any}
-                        if length(entries) != 1
-                            error("expected a single entry for $(repr(dep_name)) in $(repr(project_file))")
-                        end
-                        entry = first(entries)::Dict{String, Any}
-                        uuid = entry["uuid"]::String
-                        d_weakdeps[dep_name] = uuid
+                    if weakdeps isa Vector{String}
+                        weakdeps = expand_deps_list(weakdeps)
+                    end
+                    if deps isa Vector{String}
+                        deps = expand_deps_list(deps)
                     end
-                    @assert length(d_weakdeps) == length(weakdeps)
-                    return _insert_extension_triggers(pkg, extensions, d_weakdeps)
+
+                    total_deps = merge(weakdeps, deps)
+                    return _insert_extension_triggers(pkg, extensions, total_deps)
                 end
             end
         end
@@ -1253,22 +1673,23 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi
     return nothing
 end
 
-function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}, weakdeps::Dict{String, Any})
+function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}, totaldeps::Dict{String, Any})
     for (ext, triggers) in extensions
         triggers = triggers::Union{String, Vector{String}}
         triggers isa String && (triggers = [triggers])
-        id = PkgId(uuid5(parent.uuid, ext), ext)
-        if id in keys(EXT_PRIMED) || haskey(Base.loaded_modules, id)
+        id = PkgId(uuid5(parent.uuid::UUID, ext), ext)
+        if haskey(EXT_PRIMED, id) || haskey(Base.loaded_modules, id)
             continue  # extension is already primed or loaded, don't add it again
         end
-        EXT_PRIMED[id] = parent
-        gid = ExtensionId(id, parent, 1 + length(triggers))
+        EXT_PRIMED[id] = trigger_ids = PkgId[parent]
+        gid = ExtensionId(id, parent, 1 + length(triggers), 1 + length(triggers))
         trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, parent)
         push!(trigger1, gid)
         for trigger in triggers
             # TODO: Better error message if this lookup fails?
-            uuid_trigger = UUID(weakdeps[trigger]::String)
+            uuid_trigger = UUID(totaldeps[trigger]::String)
             trigger_id = PkgId(uuid_trigger, trigger)
+            push!(trigger_ids, trigger_id)
             if !haskey(Base.loaded_modules, trigger_id) || haskey(package_locks, trigger_id)
                 trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, trigger_id)
                 push!(trigger1, gid)
@@ -1280,6 +1701,7 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}
 end
 
 loading_extension::Bool = false
+loadable_extensions::Union{Nothing,Vector{PkgId}} = nothing
 precompiling_extension::Bool = false
 function run_extension_callbacks(extid::ExtensionId)
     assert_havelock(require_lock)
@@ -1291,9 +1713,14 @@ function run_extension_callbacks(extid::ExtensionId)
         true
     catch
         # Try to continue loading if loading an extension errors
-        errs = current_exceptions()
-        @error "Error during loading of extension $(extid.id.name) of $(extid.parentid.name), \
+        if JLOptions().incremental != 0
+            # during incremental precompilation, this should be fail-fast
+            rethrow()
+        else
+            errs = current_exceptions()
+            @error "Error during loading of extension $(extid.id.name) of $(extid.parentid.name), \
                 use `Base.retry_load_extensions()` to retry." exception=errs
+        end
         false
     finally
         global loading_extension = false
@@ -1306,25 +1733,22 @@ function run_extension_callbacks(pkgid::PkgId)
     # take ownership of extids that depend on this pkgid
     extids = pop!(EXT_DORMITORY, pkgid, nothing)
     extids === nothing && return
+    extids_to_load = Vector{ExtensionId}()
     for extid in extids
-        if extid.ntriggers > 0
-            # indicate pkgid is loaded
-            extid.ntriggers -= 1
-        end
-        if extid.ntriggers < 0
-            # indicate pkgid is loaded
-            extid.ntriggers += 1
-            succeeded = false
-        else
-            succeeded = true
-        end
-        if extid.ntriggers == 0
-            # actually load extid, now that all dependencies are met,
-            # and record the result
-            succeeded = succeeded && run_extension_callbacks(extid)
-            succeeded || push!(EXT_DORMITORY_FAILED, extid)
+        @assert extid.ntriggers > 0
+        extid.ntriggers -= 1
+        if extid.ntriggers == 0 && (loadable_extensions === nothing || extid.id in loadable_extensions)
+            push!(extids_to_load, extid)
         end
     end
+    # Load extensions with the fewest triggers first
+    sort!(extids_to_load, by=extid->extid.n_total_triggers)
+    for extid in extids_to_load
+        # actually load extid, now that all dependencies are met,
+        succeeded = run_extension_callbacks(extid)
+        succeeded || push!(EXT_DORMITORY_FAILED, extid)
+    end
+
     return
 end
 
@@ -1359,18 +1783,241 @@ get_extension(parent::Module, ext::Symbol) = get_extension(PkgId(parent), ext)
 function get_extension(parentid::PkgId, ext::Symbol)
     parentid.uuid === nothing && return nothing
     extid = PkgId(uuid5(parentid.uuid, string(ext)), string(ext))
-    return get(loaded_modules, extid, nothing)
+    return maybe_root_module(extid)
 end
 
 # End extensions
 
+
+struct CacheFlags
+    # OOICCDDP - see jl_cache_flags
+    use_pkgimages::Bool
+    debug_level::Int
+    check_bounds::Int
+    inline::Bool
+    opt_level::Int
+end
+function CacheFlags(f::UInt8)
+    use_pkgimages = Bool(f & 1)
+    debug_level = Int((f >> 1) & 3)
+    check_bounds = Int((f >> 3) & 3)
+    inline = Bool((f >> 5) & 1)
+    opt_level = Int((f >> 6) & 3) # define OPT_LEVEL in statiddata_utils
+    CacheFlags(use_pkgimages, debug_level, check_bounds, inline, opt_level)
+end
+CacheFlags(f::Int) = CacheFlags(UInt8(f))
+function CacheFlags(cf::CacheFlags=CacheFlags(ccall(:jl_cache_flags, UInt8, ()));
+            use_pkgimages::Union{Nothing,Bool}=nothing,
+            debug_level::Union{Nothing,Int}=nothing,
+            check_bounds::Union{Nothing,Int}=nothing,
+            inline::Union{Nothing,Bool}=nothing,
+            opt_level::Union{Nothing,Int}=nothing
+        )
+    return CacheFlags(
+        use_pkgimages === nothing ? cf.use_pkgimages : use_pkgimages,
+        debug_level === nothing ? cf.debug_level : debug_level,
+        check_bounds === nothing ? cf.check_bounds : check_bounds,
+        inline === nothing ? cf.inline : inline,
+        opt_level === nothing ? cf.opt_level : opt_level
+    )
+end
+# reflecting jloptions.c defaults
+const DefaultCacheFlags = CacheFlags(use_pkgimages=true, debug_level=isdebugbuild() ? 2 : 1, check_bounds=0, inline=true, opt_level=2)
+
+function _cacheflag_to_uint8(cf::CacheFlags)::UInt8
+    f = UInt8(0)
+    f |= cf.use_pkgimages << 0
+    f |= cf.debug_level << 1
+    f |= cf.check_bounds << 3
+    f |= cf.inline << 5
+    f |= cf.opt_level << 6
+    return f
+end
+
+function translate_cache_flags(cacheflags::CacheFlags, defaultflags::CacheFlags)
+    opts = String[]
+    cacheflags.use_pkgimages    != defaultflags.use_pkgimages   && push!(opts, cacheflags.use_pkgimages ? "--pkgimages=yes" : "--pkgimages=no")
+    cacheflags.debug_level      != defaultflags.debug_level     && push!(opts, "-g$(cacheflags.debug_level)")
+    cacheflags.check_bounds     != defaultflags.check_bounds    && push!(opts, ("--check-bounds=auto", "--check-bounds=yes", "--check-bounds=no")[cacheflags.check_bounds + 1])
+    cacheflags.inline           != defaultflags.inline          && push!(opts, cacheflags.inline ? "--inline=yes" : "--inline=no")
+    cacheflags.opt_level        != defaultflags.opt_level       && push!(opts, "-O$(cacheflags.opt_level)")
+    return opts
+end
+
+function show(io::IO, cf::CacheFlags)
+    print(io, "CacheFlags(")
+    print(io, "; use_pkgimages=")
+    print(io, cf.use_pkgimages)
+    print(io, ", debug_level=")
+    print(io, cf.debug_level)
+    print(io, ", check_bounds=")
+    print(io, cf.check_bounds)
+    print(io, ", inline=")
+    print(io, cf.inline)
+    print(io, ", opt_level=")
+    print(io, cf.opt_level)
+    print(io, ")")
+end
+
+function Base.parse(::Type{CacheFlags}, s::AbstractString)
+    e = Meta.parse(s)
+    if !(e isa Expr && e.head === :call && length(e.args) == 2 &&
+        e.args[1] === :CacheFlags &&
+        e.args[2] isa Expr && e.args[2].head == :parameters)
+        throw(ArgumentError("Malformed CacheFlags string"))
+    end
+    params = Dict{Symbol, Any}(p.args[1] => p.args[2] for p in e.args[2].args)
+    use_pkgimages = get(params, :use_pkgimages, nothing)
+    debug_level = get(params, :debug_level, nothing)
+    check_bounds = get(params, :check_bounds, nothing)
+    inline = get(params, :inline, nothing)
+    opt_level = get(params, :opt_level, nothing)
+    return CacheFlags(; use_pkgimages, debug_level, check_bounds, inline, opt_level)
+end
+
+struct ImageTarget
+    name::String
+    flags::Int32
+    ext_features::String
+    features_en::Vector{UInt8}
+    features_dis::Vector{UInt8}
+end
+
+function parse_image_target(io::IO)
+    flags = read(io, Int32)
+    nfeature = read(io, Int32)
+    feature_en = read(io, 4*nfeature)
+    feature_dis = read(io, 4*nfeature)
+    name_len = read(io, Int32)
+    name = String(read(io, name_len))
+    ext_features_len = read(io, Int32)
+    ext_features = String(read(io, ext_features_len))
+    ImageTarget(name, flags, ext_features, feature_en, feature_dis)
+end
+
+function parse_image_targets(targets::Vector{UInt8})
+    io = IOBuffer(targets)
+    ntargets = read(io, Int32)
+    targets = Vector{ImageTarget}(undef, ntargets)
+    for i in 1:ntargets
+        targets[i] = parse_image_target(io)
+    end
+    return targets
+end
+
+function current_image_targets()
+    targets = @ccall jl_reflect_clone_targets()::Vector{UInt8}
+    return parse_image_targets(targets)
+end
+
+struct FeatureName
+    name::Cstring
+    bit::UInt32 # bit index into a `uint32_t` array;
+    llvmver::UInt32 # 0 if it is available on the oldest LLVM version we support
+end
+
+function feature_names()
+    fnames = Ref{Ptr{FeatureName}}()
+    nf = Ref{Csize_t}()
+    @ccall jl_reflect_feature_names(fnames::Ptr{Ptr{FeatureName}}, nf::Ptr{Csize_t})::Cvoid
+    if fnames[] == C_NULL
+        @assert nf[] == 0
+        return Vector{FeatureName}(undef, 0)
+    end
+    Base.unsafe_wrap(Array, fnames[], nf[], own=false)
+end
+
+function test_feature(features::Vector{UInt8}, feat::FeatureName)
+    bitidx = feat.bit
+    u8idx = div(bitidx, 8) + 1
+    bit = bitidx % 8
+    return (features[u8idx] & (1 << bit)) != 0
+end
+
+function show(io::IO, it::ImageTarget)
+    print(io, it.name)
+    if !isempty(it.ext_features)
+        print(io, ",", it.ext_features)
+    end
+    print(io, "; flags=", it.flags)
+    print(io, "; features_en=(")
+    first = true
+    for feat in feature_names()
+        if test_feature(it.features_en, feat)
+            name = Base.unsafe_string(feat.name)
+            if first
+                first = false
+                print(io, name)
+            else
+                print(io, ", ", name)
+            end
+        end
+    end
+    print(io, ")")
+    # Is feature_dis useful?
+end
+
 # should sync with the types of arguments of `stale_cachefile`
-const StaleCacheKey = Tuple{Base.PkgId, UInt128, String, String}
+const StaleCacheKey = Tuple{PkgId, UInt128, PkgLoadSpec, String, Bool, CacheFlags}
+
+function compilecache_freshest_path(pkg::PkgId;
+        ignore_loaded::Bool=false,
+        stale_cache::Dict{StaleCacheKey,Bool}=Dict{StaleCacheKey, Bool}(),
+        cachepath_cache::Dict{PkgId, Vector{String}}=Dict{PkgId, Vector{String}}(),
+        cachepaths::Vector{String}=get(() -> find_all_in_cache_path(pkg), cachepath_cache, pkg),
+        sourcespec::Union{PkgLoadSpec,Nothing}=Base.locate_package_load_spec(pkg),
+        flags::CacheFlags=CacheFlags())
+    isnothing(sourcespec) && error("Cannot locate source for $(repr("text/plain", pkg))")
+    try_build_ids = UInt128[UInt128(0)]
+    if !ignore_loaded
+        let loaded = get(loaded_precompiles, pkg, nothing)
+            if loaded !== nothing
+                for mod in loaded # try these in reverse original load order to see if one is already valid
+                    pushfirst!(try_build_ids, module_build_id(mod))
+                end
+            end
+        end
+    end
+    for build_id in try_build_ids
+        for path_to_try in cachepaths
+            staledeps = stale_cachefile(pkg, build_id, sourcespec, path_to_try; ignore_loaded, requested_flags=flags)
+            if staledeps === true
+                continue
+            end
+            staledeps, _, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
+            # finish checking staledeps module graph
+            for dep in staledeps
+                dep isa Module && continue
+                modspec, modkey, modbuild_id = dep::Tuple{PkgLoadSpec, PkgId, UInt128}
+                modpaths = get(() -> find_all_in_cache_path(modkey), cachepath_cache, modkey)
+                for modpath_to_try in modpaths::Vector{String}
+                    stale_cache_key = (modkey, modbuild_id, modspec, modpath_to_try, ignore_loaded, flags)::StaleCacheKey
+                    if get!(() -> stale_cachefile(modkey, modbuild_id, modspec, modpath_to_try; ignore_loaded, requested_flags=flags) === true,
+                            stale_cache, stale_cache_key)
+                        continue
+                    end
+                    @goto check_next_dep
+                end
+                @goto check_next_path
+                @label check_next_dep
+            end
+            try
+                # update timestamp of precompilation file so that it is the first to be tried by code loading
+                touch(path_to_try)
+            catch ex
+                # file might be read-only and then we fail to update timestamp, which is fine
+                ex isa IOError || rethrow()
+            end
+            return path_to_try
+            @label check_next_path
+        end
+    end
+end
 
 """
     Base.isprecompiled(pkg::PkgId; ignore_loaded::Bool=false)
 
-Returns whether a given PkgId within the active project is precompiled.
+Return whether a given PkgId within the active project is precompiled.
 
 By default this check observes the same approach that code loading takes
 with respect to when different versions of dependencies are currently loaded
@@ -1380,123 +2027,94 @@ fresh julia session specify `ignore_loaded=true`.
 !!! compat "Julia 1.10"
     This function requires at least Julia 1.10.
 """
-function isprecompiled(pkg::PkgId;
-        ignore_loaded::Bool=false,
-        stale_cache::Dict{StaleCacheKey,Bool}=Dict{StaleCacheKey, Bool}(),
-        cachepaths::Vector{String}=Base.find_all_in_cache_path(pkg),
-        sourcepath::Union{String,Nothing}=Base.locate_package(pkg)
-    )
-    isnothing(sourcepath) && error("Cannot locate source for $(repr(pkg))")
-    for path_to_try in cachepaths
-        staledeps = stale_cachefile(sourcepath, path_to_try, ignore_loaded = true)
-        if staledeps === true
-            continue
-        end
-        staledeps, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}}
-        # finish checking staledeps module graph
-        for i in 1:length(staledeps)
-            dep = staledeps[i]
-            dep isa Module && continue
-            modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
-            modpaths = find_all_in_cache_path(modkey)
-            for modpath_to_try in modpaths::Vector{String}
-                stale_cache_key = (modkey, modbuild_id, modpath, modpath_to_try)::StaleCacheKey
-                if get!(() -> stale_cachefile(stale_cache_key...; ignore_loaded) === true,
-                        stale_cache, stale_cache_key)
-                    continue
-                end
-                @goto check_next_dep
-            end
-            @goto check_next_path
-            @label check_next_dep
-        end
-        try
-            # update timestamp of precompilation file so that it is the first to be tried by code loading
-            touch(path_to_try)
-        catch ex
-            # file might be read-only and then we fail to update timestamp, which is fine
-            ex isa IOError || rethrow()
-        end
-        return true
-        @label check_next_path
-    end
-    return false
+function isprecompiled(pkg::PkgId; ignore_loaded::Bool=false)
+    path = compilecache_freshest_path(pkg; ignore_loaded)
+    return !isnothing(path)
 end
 
-# loads a precompile cache file, after checking stale_cachefile tests
-function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
-    assert_havelock(require_lock)
-    loaded = nothing
-    if root_module_exists(modkey)
-        loaded = root_module(modkey)
-    else
-        loaded = start_loading(modkey)
-        if loaded === nothing
-            try
-                modpath = locate_package(modkey)
-                modpath === nothing && return nothing
-                set_pkgorigin_version_path(modkey, String(modpath))
-                loaded = _require_search_from_serialized(modkey, String(modpath), build_id)
-            finally
-                end_loading(modkey, loaded)
-            end
-            if loaded isa Module
-                insert_extension_triggers(modkey)
-                run_package_callbacks(modkey)
+"""
+    Base.isrelocatable(pkg::PkgId)
+
+Return whether a given PkgId within the active project is precompiled and the
+associated cache is relocatable.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+"""
+function isrelocatable(pkg::PkgId)
+    path = compilecache_freshest_path(pkg)
+    isnothing(path) && return false
+    io = open(path, "r")
+    try
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile."))
+        _, (includes, includes_srcfiles, _), _... = _parse_cache_header(io, path)
+        for inc in includes
+            !startswith(inc.filename, "@depot") && return false
+            if inc ∉ includes_srcfiles
+                # its an include_dependency
+                track_content = inc.mtime == -1.0
+                track_content || return false
             end
         end
+    finally
+        close(io)
     end
-    if !(loaded isa Module) || PkgId(loaded) != modkey
-        return ErrorException("Required dependency $modkey failed to load from a cache file.")
+    return true
+end
+
+function parse_cache_buildid(cachepath::String)
+    f = open(cachepath, "r")
+    try
+        checksum = isvalid_cache_header(f)
+        iszero(checksum) && throw(ArgumentError("Incompatible header in cache file $cachefile."))
+        flags = read(f, UInt8)
+        syntax_version = read(f, UInt8)
+        n = read(f, Int32)
+        n == 0 && error("no module defined in $cachefile")
+        skip(f, n) # module name
+        uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID
+        build_id = (UInt128(checksum) << 64) | read(f, UInt64)
+        return build_id, uuid
+    finally
+        close(f)
     end
-    return loaded
 end
 
-# loads a precompile cache file, ignoring stale_cachefile tests
-# assuming all depmods are already loaded and everything is valid
-function _tryrequire_from_serialized(modkey::PkgId, path::String, ocachepath::Union{Nothing, String}, sourcepath::String, depmods::Vector{Any})
+# search for a precompile cache file to load, after some various checks
+function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
     assert_havelock(require_lock)
-    loaded = nothing
-    if root_module_exists(modkey)
-        loaded = root_module(modkey)
-    else
-        loaded = start_loading(modkey)
-        if loaded === nothing
-            try
-                for i in 1:length(depmods)
-                    dep = depmods[i]
-                    dep isa Module && continue
-                    _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
-                    @assert root_module_exists(depkey)
-                    dep = root_module(depkey)
-                    depmods[i] = dep
-                end
-                set_pkgorigin_version_path(modkey, sourcepath)
-                loaded = _include_from_serialized(modkey, path, ocachepath, depmods)
-            finally
-                end_loading(modkey, loaded)
-            end
-            if loaded isa Module
-                insert_extension_triggers(modkey)
-                run_package_callbacks(modkey)
-            end
+    loaded = start_loading(modkey, build_id, false)
+    if loaded === nothing
+        try
+            modspec = locate_package_load_spec(modkey)
+            isnothing(modspec) && error("Cannot locate source for $(repr("text/plain", modkey))")
+            set_pkgorigin_version_path(modkey, modspec.path)
+            loaded = _require_search_from_serialized(modkey, modspec, build_id, true)
+        finally
+            end_loading(modkey, loaded)
+        end
+        if loaded isa Module
+            insert_extension_triggers(modkey)
+            run_package_callbacks(modkey)
         end
     end
-    if !(loaded isa Module) || PkgId(loaded) != modkey
-        return ErrorException("Required dependency $modkey failed to load from a cache file.")
+    if loaded isa Module && PkgId(loaded) == modkey && module_build_id(loaded) === build_id
+        return loaded
     end
-    return loaded
+    return ErrorException("Required dependency $modkey failed to load from a cache file.")
 end
 
 # loads a precompile cache file, ignoring stale_cachefile tests
-# load the best available (non-stale) version of all dependent modules first
+# load all dependent modules first
 function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String})
     assert_havelock(require_lock)
     local depmodnames
     io = open(path, "r")
     try
-        iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.")
-        _, _, depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io)
+        iszero(isvalid_cache_header(io)) && return ArgumentError("Incompatible header in cache file $path.")
+        _, (includes, _, _), depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io, path)
+
+
         pkgimage = !isempty(clone_targets)
         if pkgimage
             ocachepath !== nothing || return ArgumentError("Expected ocachepath to be provided")
@@ -1522,121 +2140,235 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union
         depmods[i] = dep
     end
     # then load the file
-    return _include_from_serialized(pkg, path, ocachepath, depmods)
+    loaded = _include_from_serialized(pkg, path, ocachepath, depmods; register = true)
+    return loaded
 end
 
-# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it
+# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it or it was stale
 # returns the set of modules restored if the cache load succeeded
-@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128)
+@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcespec::PkgLoadSpec, build_id::UInt128, stalecheck::Bool; reasons=nothing, DEPOT_PATH::typeof(DEPOT_PATH)=DEPOT_PATH)
     assert_havelock(require_lock)
-    paths = find_all_in_cache_path(pkg)
-    for path_to_try in paths::Vector{String}
-        staledeps = stale_cachefile(pkg, build_id, sourcepath, path_to_try)
-        if staledeps === true
-            continue
-        end
-        staledeps, ocachefile = staledeps::Tuple{Vector{Any}, Union{Nothing, String}}
-        # finish checking staledeps module graph
-        for i in 1:length(staledeps)
-            dep = staledeps[i]
-            dep isa Module && continue
-            modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
-            modpaths = find_all_in_cache_path(modkey)
-            for modpath_to_try in modpaths
-                modstaledeps = stale_cachefile(modkey, modbuild_id, modpath, modpath_to_try)
-                if modstaledeps === true
-                    continue
+    paths = find_all_in_cache_path(pkg, DEPOT_PATH)
+    newdeps = PkgId[]
+    try_build_ids = UInt128[build_id]
+    if build_id == UInt128(0)
+        let loaded = get(loaded_precompiles, pkg, nothing)
+            if loaded !== nothing
+                for mod in loaded # try these in reverse original load order to see if one is already valid
+                    pushfirst!(try_build_ids, module_build_id(mod))
                 end
-                modstaledeps, modocachepath = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}}
-                staledeps[i] = (modpath, modkey, modpath_to_try, modstaledeps, modocachepath)
-                @goto check_next_dep
             end
-            @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
-            @goto check_next_path
-            @label check_next_dep
-        end
-        try
-            touch(path_to_try) # update timestamp of precompilation file
-        catch ex # file might be read-only and then we fail to update timestamp, which is fine
-            ex isa IOError || rethrow()
         end
-        # finish loading module graph into staledeps
-        for i in 1:length(staledeps)
-            dep = staledeps[i]
-            dep isa Module && continue
-            modpath, modkey, modcachepath, modstaledeps, modocachepath = dep::Tuple{String, PkgId, String, Vector{Any}, Union{Nothing, String}}
-            dep = _tryrequire_from_serialized(modkey, modcachepath, modocachepath, modpath, modstaledeps)
-            if !isa(dep, Module)
-                @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modcachepath." exception=dep
-                @goto check_next_path
+    end
+    for build_id in try_build_ids
+        for path_to_try in paths::Vector{String}
+            staledeps = stale_cachefile(pkg, build_id, sourcespec, path_to_try; reasons, stalecheck)
+            if staledeps === true
+                continue
+            end
+            staledeps, ocachefile, newbuild_id = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
+            startedloading = length(staledeps) + 1
+            try # any exit from here (goto, break, continue, return) will end_loading
+                # finish checking staledeps module graph, while acquiring all start_loading locks
+                # so that concurrent require calls won't make any different decisions that might conflict with the decisions here
+                # note that start_loading will drop the loading lock if necessary
+                let i = 0
+                    # start_loading here has a deadlock problem if we try to load `A,B,C` and `B,A,D` at the same time:
+                    # it will claim A,B have a cycle, but really they just have an ambiguous order and need to be batch-acquired rather than singly
+                    # solve that by making sure we can start_loading everything before allocating each of those and doing all the stale checks
+                    while i < length(staledeps)
+                        i += 1
+                        dep = staledeps[i]
+                        dep isa Module && continue
+                        _, modkey, modbuild_id = dep::Tuple{PkgLoadSpec, PkgId, UInt128}
+                        dep = canstart_loading(modkey, modbuild_id, stalecheck)
+                        if dep isa Module
+                            if PkgId(dep) == modkey && module_build_id(dep) === modbuild_id
+                                staledeps[i] = dep
+                                continue
+                            else
+                                @debug "Rejecting cache file $path_to_try because module $modkey got loaded at a different version than expected."
+                                @goto check_next_path
+                            end
+                            continue
+                        elseif dep === nothing
+                            continue
+                        end
+                        wait(dep) # releases require_lock, so requires restarting this loop
+                        i = 0
+                    end
+                end
+                for i in reverse(eachindex(staledeps))
+                    dep = staledeps[i]
+                    dep isa Module && continue
+                    modspec, modkey, modbuild_id = dep::Tuple{PkgLoadSpec, PkgId, UInt128}
+                    # inline a call to start_loading here
+                    @assert canstart_loading(modkey, modbuild_id, stalecheck) === nothing
+                    package_locks[modkey] = (current_task(), Threads.Condition(require_lock), modbuild_id)
+                    startedloading = i
+                    modpaths = find_all_in_cache_path(modkey, DEPOT_PATH)
+                    for modpath_to_try in modpaths
+                        modstaledeps = stale_cachefile(modkey, modbuild_id, modspec, modpath_to_try; stalecheck)
+                        if modstaledeps === true
+                            continue
+                        end
+                        modstaledeps, modocachepath, _ = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
+                        staledeps[i] = (modspec, modkey, modbuild_id, modpath_to_try, modstaledeps, modocachepath)
+                        @goto check_next_dep
+                    end
+                    @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
+                    @goto check_next_path
+                    @label check_next_dep
+                end
+                M = maybe_loaded_precompile(pkg, newbuild_id)
+                if isa(M, Module)
+                    stalecheck && register_root_module(M)
+                    return M
+                end
+                if stalecheck
+                    try
+                        touch(path_to_try) # update timestamp of precompilation file
+                    catch ex # file might be read-only and then we fail to update timestamp, which is fine
+                        ex isa IOError || rethrow()
+                    end
+                end
+                # finish loading module graph into staledeps
+                # n.b. this runs __init__ methods too early, so it is very unwise to have those, as they may see inconsistent loading state, causing them to fail unpredictably here
+                for i in eachindex(staledeps)
+                    dep = staledeps[i]
+                    dep isa Module && continue
+                    modspec, modkey, modbuild_id, modcachepath, modstaledeps, modocachepath = dep::Tuple{PkgLoadSpec, PkgId, UInt128, String, Vector{Any}, Union{Nothing, String}}
+                    set_pkgorigin_version_path(modkey, modspec.path)
+                    dep = _include_from_serialized(modkey, modcachepath, modocachepath, modstaledeps; register = stalecheck)
+                    if !isa(dep, Module)
+                        @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modcachepath." exception=dep
+                        @goto check_next_path
+                    else
+                        startedloading = i + 1
+                        end_loading(modkey, dep)
+                        staledeps[i] = dep
+                        push!(newdeps, modkey)
+                    end
+                end
+                restored = maybe_loaded_precompile(pkg, newbuild_id)
+                if !isa(restored, Module)
+                    restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps; register = stalecheck)
+                end
+                isa(restored, Module) && return restored
+                @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
+                @label check_next_path
+            finally
+                # cancel all start_loading locks that were taken but not fulfilled before failing
+                for i in startedloading:length(staledeps)
+                    dep = staledeps[i]
+                    dep isa Module && continue
+                    if dep isa Tuple{PkgLoadSpec, PkgId, UInt128}
+                        _, modkey, _ = dep
+                    else
+                        _, modkey, _ = dep::Tuple{PkgLoadSpec, PkgId, UInt128, String, Vector{Any}, Union{Nothing, String}}
+                    end
+                    end_loading(modkey, nothing)
+                end
+                for modkey in newdeps
+                    insert_extension_triggers(modkey)
+                    stalecheck && run_package_callbacks(modkey)
+                end
             end
-            staledeps[i] = dep
         end
-        restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps)
-        isa(restored, Module) && return restored
-        @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
-        continue
-        @label check_next_path
     end
     return nothing
 end
 
 # to synchronize multiple tasks trying to import/using something
-const package_locks = Dict{PkgId,Pair{Task,Threads.Condition}}()
+const package_locks = Dict{PkgId,Tuple{Task,Threads.Condition,UInt128}}()
 
 debug_loading_deadlocks::Bool = true # Enable a slightly more expensive, but more complete algorithm that can handle simultaneous tasks.
                                # This only triggers if you have multiple tasks trying to load the same package at the same time,
-                               # so it is unlikely to make a difference normally.
-function start_loading(modkey::PkgId)
-    # handle recursive calls to require
+                               # so it is unlikely to make a performance difference normally.
+
+function canstart_loading(modkey::PkgId, build_id::UInt128, stalecheck::Bool)
     assert_havelock(require_lock)
+    require_lock.reentrancy_cnt == 1 || throw(ConcurrencyViolationError("recursive call to start_loading"))
     loading = get(package_locks, modkey, nothing)
-    if loading !== nothing
-        # load already in progress for this module on the task
-        task, cond = loading
-        deps = String[modkey.name]
-        pkgid = modkey
-        assert_havelock(cond.lock)
-        if debug_loading_deadlocks && current_task() !== task
-            waiters = Dict{Task,Pair{Task,PkgId}}() # invert to track waiting tasks => loading tasks
-            for each in package_locks
-                cond2 = each[2][2]
-                assert_havelock(cond2.lock)
-                for waiting in cond2.waitq
-                    push!(waiters, waiting => (each[2][1] => each[1]))
-                end
+    if loading === nothing
+        loaded = stalecheck ? maybe_root_module(modkey) : nothing
+        loaded isa Module && return loaded
+        if build_id != UInt128(0)
+            loaded = maybe_loaded_precompile(modkey, build_id)
+            loaded isa Module && return loaded
+        end
+        return nothing
+    end
+    if !stalecheck && build_id != UInt128(0) && loading[3] != build_id
+        # don't block using an existing specific loaded module on needing a different concurrently loaded one
+        loaded = maybe_loaded_precompile(modkey, build_id)
+        loaded isa Module && return loaded
+    end
+    # load already in progress for this module on the task
+    task, cond = loading
+    deps = String[modkey.name]
+    assert_havelock(cond.lock)
+    if debug_loading_deadlocks && current_task() !== task
+        waiters = Dict{Task,Pair{Task,PkgId}}() # invert to track waiting tasks => loading tasks
+        for each in package_locks
+            cond2 = each[2][2]
+            assert_havelock(cond2.lock)
+            for waiting in cond2.waitq
+                push!(waiters, waiting => (each[2][1] => each[1]))
             end
-            while true
-                running = get(waiters, task, nothing)
-                running === nothing && break
-                task, pkgid = running
-                push!(deps, pkgid.name)
-                task === current_task() && break
-            end
-        end
-        if current_task() === task
-            others = String[modkey.name] # repeat this to emphasize the cycle here
-            for each in package_locks # list the rest of the packages being loaded too
-                if each[2][1] === task
-                    other = each[1].name
-                    other == modkey.name || other == pkgid.name || push!(others, other)
-                end
+        end
+        while true
+            running = get(waiters, task, nothing)
+            running === nothing && break
+            task, pkgid = running
+            push!(deps, pkgid.name)
+            task === current_task() && break
+        end
+    end
+    if current_task() === task
+        push!(deps, modkey.name) # repeat this to emphasize the cycle here
+        others = Set{String}()
+        for each in package_locks # list the rest of the packages being loaded too
+            if each[2][1] === task
+                other = each[1].name
+                other == modkey.name || push!(others, other)
             end
-            msg = sprint(deps, others) do io, deps, others
-                print(io, "deadlock detected in loading ")
-                join(io, deps, " -> ")
-                print(io, " -> ")
-                join(io, others, " && ")
+        end
+        # remove duplicates from others already in deps
+        for dep in deps
+            delete!(others, dep)
+        end
+        msg = sprint(deps, others) do io, deps, others
+            print(io, "deadlock detected in loading ")
+            join(io, deps, " using ")
+            if !isempty(others)
+                print(io, " (while loading ")
+                join(io, others, " and ")
+                print(io, ")")
             end
-            throw(ConcurrencyViolationError(msg))
         end
-        return wait(cond)
+        throw(ConcurrencyViolationError(msg))
+    end
+    return cond
+end
+
+function start_loading(modkey::PkgId, build_id::UInt128, stalecheck::Bool)
+    # handle recursive and concurrent calls to require
+    while true
+        loaded = canstart_loading(modkey, build_id, stalecheck)
+        if loaded === nothing
+            package_locks[modkey] = (current_task(), Threads.Condition(require_lock), build_id)
+            return nothing
+        elseif loaded isa Module
+            return loaded
+        end
+        loaded = wait(loaded)
+        loaded isa Module && return loaded
     end
-    package_locks[modkey] = current_task() => Threads.Condition(require_lock)
-    return
 end
 
 function end_loading(modkey::PkgId, @nospecialize loaded)
+    assert_havelock(require_lock)
     loading = pop!(package_locks, modkey)
     notify(loading[2], loaded, all=true)
     nothing
@@ -1652,43 +2384,69 @@ const package_callbacks = Any[]
 const include_callbacks = Any[]
 
 # used to optionally track dependencies when requiring a module:
-const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
-const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled
+const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", because they are explicitly loaded, and the process should try to avoid invalidating them
+const _require_dependencies = Any[] # a list of (mod::Module, abspath::String, fsize::UInt64, hash::UInt32, mtime::Float64) tuples that are the file dependencies of the module currently being precompiled
 const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies
-function _include_dependency(mod::Module, _path::AbstractString)
+
+function _include_dependency(mod::Module, _path::AbstractString; track_content::Bool=true,
+                             path_may_be_dir::Bool=false)
+    _include_dependency!(_require_dependencies, _track_dependencies[], mod, _path, track_content, path_may_be_dir)
+end
+
+function _include_dependency!(dep_list::Vector{Any}, track_dependencies::Bool,
+                              mod::Module, _path::AbstractString,
+                              track_content::Bool, path_may_be_dir::Bool)
     prev = source_path(nothing)
     if prev === nothing
         path = abspath(_path)
     else
         path = normpath(joinpath(dirname(prev), _path))
     end
-    if _track_dependencies[]
+    if !track_dependencies[]
+        if !path_may_be_dir && !isfile(path)
+            throw(SystemError("opening file $(repr(path))", Libc.ENOENT))
+        elseif path_may_be_dir && !Filesystem.isreadable(path)
+            throw(SystemError("opening file or folder $(repr(path))", Libc.ENOENT))
+        end
+    else
         @lock require_lock begin
-        push!(_require_dependencies, (mod, path, mtime(path)))
+            if track_content
+                hash = (isdir(path) ? _crc32c(join(readdir(path))) : open(_crc32c, path, "r"))::UInt32
+                # use mtime=-1.0 here so that fsize==0 && mtime==0.0 corresponds to a missing include_dependency
+                push!(dep_list, (mod, path, UInt64(filesize(path)), hash, -1.0))
+            else
+                push!(dep_list, (mod, path, UInt64(0), UInt32(0), mtime(path)))
+            end
         end
     end
     return path, prev
 end
 
 """
-    include_dependency(path::AbstractString)
+    include_dependency(path::AbstractString; track_content::Bool=true)
 
 In a module, declare that the file, directory, or symbolic link specified by `path`
-(relative or absolute) is a dependency for precompilation; that is, the module will need
-to be recompiled if the modification time of `path` changes.
+(relative or absolute) is a dependency for precompilation; that is, if `track_content=true`
+the module will need to be recompiled if the content of `path` changes
+(if `path` is a directory the content equals `join(readdir(path))`).
+If `track_content=false` recompilation is triggered when the modification time `mtime` of `path` changes.
 
 This is only needed if your module depends on a path that is not used via [`include`](@ref). It has
 no effect outside of compilation.
+
+!!! compat "Julia 1.11"
+    Keyword argument `track_content` requires at least Julia 1.11.
+    An error is now thrown if `path` is not readable.
 """
-function include_dependency(path::AbstractString)
-    _include_dependency(Main, path)
+function include_dependency(path::AbstractString; track_content::Bool=true)
+    _include_dependency(Main, path, track_content=track_content, path_may_be_dir=true)
     return nothing
 end
 
 # we throw PrecompilableError when a module doesn't want to be precompiled
-struct PrecompilableError <: Exception end
+import Core: PrecompilableError
 function show(io::IO, ex::PrecompilableError)
-    print(io, "Declaring __precompile__(false) is not allowed in files that are being precompiled.")
+    print(io, "Error when precompiling module, potentially caused by a __precompile__(false) declaration in the module.")
 end
 precompilableerror(ex::PrecompilableError) = true
 precompilableerror(ex::WrappedException) = precompilableerror(ex.error)
@@ -1703,17 +2461,49 @@ If a module or file is *not* safely precompilable, it should call `__precompile_
 order to throw an error if Julia attempts to precompile it.
 """
 @noinline function __precompile__(isprecompilable::Bool=true)
-    if !isprecompilable && ccall(:jl_generating_output, Cint, ()) != 0
+    if !isprecompilable && generating_output()
         throw(PrecompilableError())
     end
     nothing
 end
 
 # require always works in Main scope and loads files from node 1
+# XXX: (this is deprecated, but still used by Distributed)
 const toplevel_load = Ref(true)
 
 const _require_world_age = Ref{UInt}(typemax(UInt))
 
+"""
+    Base.TRACE_EVAL
+
+Global control for expression tracing during top-level evaluation. This setting takes priority
+over the `--trace-eval` command-line option.
+
+Set to:
+- `nothing` - use the command-line `--trace-eval` setting (default)
+- `:no` - disable expression tracing
+- `:loc` - show only location information during evaluation
+- `:full` - show full expressions being evaluated
+
+# Examples
+```julia
+# Enable full expression tracing
+Base.TRACE_EVAL = :full
+
+# Show only locations
+Base.TRACE_EVAL = :loc
+
+# Disable tracing (overrides command-line setting)
+Base.TRACE_EVAL = :no
+
+# Reset to use command-line setting
+Base.TRACE_EVAL = nothing
+```
+
+See also: [Command-line Interface](@ref cli) for the `--trace-eval` option.
+"""
+TRACE_EVAL::Union{Symbol,Nothing} = nothing
+
 """
     require(into::Module, module::Symbol)
 
@@ -1736,14 +2526,34 @@ For more details regarding code loading, see the manual sections on [modules](@r
 [parallel computing](@ref code-availability).
 """
 function require(into::Module, mod::Symbol)
-    if _require_world_age[] != typemax(UInt)
-        Base.invoke_in_world(_require_world_age[], __require, into, mod)
-    else
-        @invokelatest __require(into, mod)
+    world = _require_world_age[]
+    if world == typemax(UInt)
+        world = get_world_counter()
+    end
+    return Compiler.@zone "LOAD_Require" invoke_in_world(world, __require, into, mod)
+end
+
+function check_for_hint(into, mod)
+    return begin
+        if isdefined(into, mod) && getfield(into, mod) isa Module
+            true, "."
+        elseif isdefined(parentmodule(into), mod) && getfield(parentmodule(into), mod) isa Module
+            true, ".."
+        else
+            false, ""
+        end
     end
 end
 
 function __require(into::Module, mod::Symbol)
+    if into === __toplevel__ && generating_output(#=incremental=#true)
+        error("`using/import $mod` outside of a Module detected. Importing a package outside of a module \
+         is not allowed during package precompilation.")
+    end
+    topmod = moduleroot(into)
+    if nameof(topmod) === mod
+        return topmod
+    end
     @lock require_lock begin
     LOADING_CACHE[] = LoadingCache()
     try
@@ -1752,24 +2562,22 @@ function __require(into::Module, mod::Symbol)
         if uuidkey_env === nothing
             where = PkgId(into)
             if where.uuid === nothing
-                hint, dots = begin
-                    if isdefined(into, mod) && getfield(into, mod) isa Module
-                        true, "."
-                    elseif isdefined(parentmodule(into), mod) && getfield(parentmodule(into), mod) isa Module
-                        true, ".."
-                    else
-                        false, ""
-                    end
-                end
+                hint, dots = invokelatest(check_for_hint, into, mod)
                 hint_message = hint ? ", maybe you meant `import/using $(dots)$(mod)`" : ""
-                start_sentence = hint ? "Otherwise, run" : "Run"
-                throw(ArgumentError("""
-                    Package $mod not found in current path$hint_message.
-                    - $start_sentence `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package."""))
+                install_message = if mod != :Pkg
+                    start_sentence = hint ? "Otherwise, run" : "Run"
+                    "\n- $start_sentence `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package."
+                else  # for some reason Pkg itself isn't availability so do not tell them to use Pkg to install it.
+                    ""
+                end
+
+                throw(ArgumentError("Package $mod not found in current path$hint_message.$install_message"))
             else
+                manifest_warnings = collect_manifest_warnings()
                 throw(ArgumentError("""
-                Package $(where.name) does not have $mod in its dependencies:
-                - You may have a partially installed environment. Try `Pkg.instantiate()`
+                Cannot load (`using/import`) module $mod into module $into in package $(where.name)
+                because package $(where.name) does not have $mod in its dependencies:
+                $manifest_warnings- You may have a partially installed environment. Try `Pkg.instantiate()`
                   to ensure all packages in the environment are installed.
                 - Or, if you have $(where.name) checked out for development and have
                   added $mod as a dependency but haven't updated your primary
@@ -1779,7 +2587,8 @@ function __require(into::Module, mod::Symbol)
         end
         uuidkey, env = uuidkey_env
         if _track_dependencies[]
-            push!(_require_dependencies, (into, binpack(uuidkey), 0.0))
+            path = binpack(uuidkey)
+            push!(_require_dependencies, (into, path, UInt64(0), UInt32(0), 0.0))
         end
         return _require_prelocked(uuidkey, env)
     finally
@@ -1788,36 +2597,82 @@ function __require(into::Module, mod::Symbol)
     end
 end
 
-require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey)
-
-const REPL_PKGID = PkgId(UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL")
+function find_unsuitable_manifests_versions()
+    unsuitable_manifests = String[]
+    dev_manifests = String[]
+    for env in load_path()
+        project_file = env_project_file(env)
+        project_file isa String || continue # no project file
+        manifest_file = project_file_manifest_path(project_file)
+        manifest_file isa String || continue # no manifest file
+        m = parsed_toml(manifest_file)
+        man_julia_version = get(m, "julia_version", nothing)
+        man_julia_version isa String || @goto mark
+        man_julia_version = VersionNumber(man_julia_version)
+        thispatch(man_julia_version) != thispatch(VERSION) && @goto mark
+        isempty(man_julia_version.prerelease) != isempty(VERSION.prerelease) && @goto mark
+        isempty(man_julia_version.prerelease) && continue
+        man_julia_version.prerelease[1] != VERSION.prerelease[1] && @goto mark
+        if VERSION.prerelease[1] == "DEV"
+            # manifests don't store the 2nd part of prerelease, so cannot check further
+            # so treat them specially in the warning
+            push!(dev_manifests, manifest_file)
+        end
+        continue
+        @label mark
+        push!(unsuitable_manifests, string(manifest_file, " (v", man_julia_version, ")"))
+    end
+    return unsuitable_manifests, dev_manifests
+end
 
-function _require_prelocked(uuidkey::PkgId, env=nothing)
-    if _require_world_age[] != typemax(UInt)
-        Base.invoke_in_world(_require_world_age[], __require_prelocked, uuidkey, env)
-    else
-        @invokelatest __require_prelocked(uuidkey, env)
+function collect_manifest_warnings()
+    unsuitable_manifests, dev_manifests = find_unsuitable_manifests_versions()
+    msg = ""
+    if !isempty(unsuitable_manifests)
+        msg *= """
+        - Note that the following manifests in the load path were resolved with a different
+          julia version, which may be the cause of the error. Try to re-resolve them in the
+          current version, or consider deleting them if that fails:
+            $(join(unsuitable_manifests, "\n    "))
+        """
     end
+    if !isempty(dev_manifests)
+        msg *= """
+        - Note that the following manifests in the load path were resolved with a potentially
+          different DEV version of the current version, which may be the cause of the error.
+          Try to re-resolve them in the current version, or consider deleting them if that fails:
+            $(join(dev_manifests, "\n    "))
+        """
+    end
+    return msg
 end
 
-function __require_prelocked(uuidkey::PkgId, env=nothing)
+function require(uuidkey::PkgId)
+    world = _require_world_age[]
+    if world == typemax(UInt)
+        world = get_world_counter()
+    end
+    return invoke_in_world(world, __require, uuidkey)
+end
+__require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey)
+function _require_prelocked(uuidkey::PkgId, env=nothing)
     assert_havelock(require_lock)
-    if !root_module_exists(uuidkey)
-        newm = _require(uuidkey, env)
-        if newm === nothing
-            error("package `$(uuidkey.name)` did not define the expected \
-                  module `$(uuidkey.name)`, check for typos in package module name")
+    m = start_loading(uuidkey, UInt128(0), true)
+    if m === nothing
+        last = toplevel_load[]
+        try
+            toplevel_load[] = false
+            m = __require_prelocked(uuidkey, env)
+            m isa Module || check_package_module_loaded_error(uuidkey)
+        finally
+            toplevel_load[] = last
+            end_loading(uuidkey, m)
         end
         insert_extension_triggers(uuidkey)
         # After successfully loading, notify downstream consumers
         run_package_callbacks(uuidkey)
-        if uuidkey == REPL_PKGID
-            REPL_MODULE_REF[] = newm
-        end
-    else
-        newm = root_module(uuidkey)
     end
-    return newm
+    return m
 end
 
 mutable struct PkgOrigin
@@ -1828,12 +2683,26 @@ end
 PkgOrigin() = PkgOrigin(nothing, nothing, nothing)
 const pkgorigins = Dict{PkgId,PkgOrigin}()
 
-const loaded_modules = Dict{PkgId,Module}()
+const loaded_modules = Dict{PkgId,Module}() # available to be explicitly loaded
+const loaded_precompiles = Dict{PkgId,Vector{Module}}() # extended (complete) list of modules, available to be loaded
 const loaded_modules_order = Vector{Module}()
-const module_keys = IdDict{Module,PkgId}() # the reverse
 
-is_root_module(m::Module) = @lock require_lock haskey(module_keys, m)
-root_module_key(m::Module) = @lock require_lock module_keys[m]
+root_module_key(m::Module) = PkgId(m)
+
+function maybe_loaded_precompile(key::PkgId, buildid::UInt128)
+    @lock require_lock begin
+    mods = get(loaded_precompiles, key, nothing)
+    mods === nothing && return
+    for mod in mods
+        module_build_id(mod) == buildid && return mod
+    end
+    end
+end
+
+function module_build_id(m::Module)
+    hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
+    return (UInt128(hi) << 64) | lo
+end
 
 @constprop :none function register_root_module(m::Module)
     # n.b. This is called from C after creating a new module in `Base.__toplevel__`,
@@ -1843,16 +2712,15 @@ root_module_key(m::Module) = @lock require_lock module_keys[m]
     if haskey(loaded_modules, key)
         oldm = loaded_modules[key]
         if oldm !== m
-            if (0 != ccall(:jl_generating_output, Cint, ())) && (JLOptions().incremental != 0)
+            if generating_output(#=incremental=#true)
                 error("Replacing module `$(key.name)`")
             else
                 @warn "Replacing module `$(key.name)`"
             end
         end
     end
-    push!(loaded_modules_order, m)
+    maybe_loaded_precompile(key, module_build_id(m)) === nothing && push!(loaded_modules_order, m)
     loaded_modules[key] = m
-    module_keys[m] = key
     end
     nothing
 end
@@ -1866,164 +2734,200 @@ register_root_module(Main)
 # to the loaded_modules table instead of getting bindings.
 baremodule __toplevel__
 using Base
+global _internal_julia_parse = Core._parse
+global _internal_julia_lower = Core._lower
+
+# Used for version checking of precompiled cache files only
+global _internal_syntax_version::UInt8 = 0
 end
 
 # get a top-level Module from the given key
+# this is similar to `require`, but worse in almost every possible way
 root_module(key::PkgId) = @lock require_lock loaded_modules[key]
 function root_module(where::Module, name::Symbol)
     key = identify_package(where, String(name))
     key isa PkgId || throw(KeyError(name))
     return root_module(key)
 end
+root_module_exists(key::PkgId) = @lock require_lock haskey(loaded_modules, key)
 maybe_root_module(key::PkgId) = @lock require_lock get(loaded_modules, key, nothing)
 
-root_module_exists(key::PkgId) = @lock require_lock haskey(loaded_modules, key)
 loaded_modules_array() = @lock require_lock copy(loaded_modules_order)
 
+# after unreference_module, a subsequent require call will try to load a new copy of it, if stale
+# reload(m) = (unreference_module(m); require(m))
 function unreference_module(key::PkgId)
+    @lock require_lock begin
     if haskey(loaded_modules, key)
         m = pop!(loaded_modules, key)
         # need to ensure all modules are GC rooted; will still be referenced
-        # in module_keys
+        # in loaded_modules_order
+    end
     end
 end
 
 # whoever takes the package_locks[pkg] must call this function immediately
-function set_pkgorigin_version_path(pkg::PkgId, path::Union{String,Nothing})
+function set_pkgorigin_version_path(pkg::PkgId, path::String)
     assert_havelock(require_lock)
     pkgorigin = get!(PkgOrigin, pkgorigins, pkg)
-    if path !== nothing
-        # Pkg needs access to the version of packages in the sysimage.
-        if Core.Compiler.generating_sysimg()
-            pkgorigin.version = get_pkgversion_from_path(joinpath(dirname(path), ".."))
-        end
+    # Pkg needs access to the version of packages in the sysimage.
+    if generating_output(#=incremental=#false)
+        pkgorigin.version = get_pkgversion_from_path(joinpath(dirname(path), ".."))
     end
     pkgorigin.path = path
     nothing
 end
 
-# A hook to allow code load to use Pkg.precompile
+# Unused
 const PKG_PRECOMPILE_HOOK = Ref{Function}()
+disable_parallel_precompile::Bool = false
 
 # Returns `nothing` or the new(ish) module
-function _require(pkg::PkgId, env=nothing)
+function __require_prelocked(pkg::PkgId, env)
     assert_havelock(require_lock)
-    loaded = start_loading(pkg)
-    loaded === nothing || return loaded
 
-    last = toplevel_load[]
-    try
-        toplevel_load[] = false
-        # perform the search operation to select the module file require intends to load
-        path = locate_package(pkg, env)
-        if path === nothing
-            throw(ArgumentError("""
-                Package $pkg is required but does not seem to be installed:
-                 - Run `Pkg.instantiate()` to install all recorded dependencies.
-                """))
-        end
-        set_pkgorigin_version_path(pkg, path)
-
-        pkg_precompile_attempted = false # being safe to avoid getting stuck in a Pkg.precompile loop
-
-        # attempt to load the module file via the precompile cache locations
-        if JLOptions().use_compiled_modules != 0
-            @label load_from_cache
-            m = _require_search_from_serialized(pkg, path, UInt128(0))
-            if m isa Module
-                return m
-            end
-        end
-
-        # if the module being required was supposed to have a particular version
-        # but it was not handled by the precompile loader, complain
-        for (concrete_pkg, concrete_build_id) in _concrete_dependencies
-            if pkg == concrete_pkg
-                @warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
-                     This may mean $pkg does not support precompilation but is imported by a module that does."""
-                if JLOptions().incremental != 0
-                    # during incremental precompilation, this should be fail-fast
-                    throw(PrecompilableError())
-                end
+    # perform the search operation to select the module file require intends to load
+    specenv = locate_package_env(pkg, env)
+    if specenv === nothing
+        throw(ArgumentError("""
+            Package $(repr("text/plain", pkg)) is required but does not seem to be installed:
+             - Run `Pkg.instantiate()` to install all recorded dependencies.
+            """))
+    end
+    spec = specenv[1]
+    path = spec.path
+    set_pkgorigin_version_path(pkg, path)
+
+    parallel_precompile_attempted = false # being safe to avoid getting stuck in a precompilepkgs loop
+    reasons = Dict{String,Int}()
+    # attempt to load the module file via the precompile cache locations
+    if JLOptions().use_compiled_modules != 0
+        @label load_from_cache
+        loaded = _require_search_from_serialized(pkg, spec, UInt128(0), true; reasons)
+        if loaded isa Module
+            return loaded
+        end
+    end
+
+    if JLOptions().use_compiled_modules == 3
+        error("Precompiled image $pkg not available with flags $(CacheFlags())")
+    end
+
+    # if the module being required was supposed to have a particular version
+    # but it was not handled by the precompile loader, complain
+    for (concrete_pkg, concrete_build_id) in _concrete_dependencies
+        if pkg == concrete_pkg
+            @warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
+                 This may mean $(repr("text/plain", pkg)) does not support precompilation but is imported by a module that does."""
+            if JLOptions().incremental != 0
+                # during incremental precompilation, this should be fail-fast
+                throw(PrecompilableError())
             end
         end
+    end
+
+    if JLOptions().use_compiled_modules == 1
+        if !generating_output(#=incremental=#false)
+            project = active_project()
+            # spawn off a new incremental pre-compile task for recursive `require` calls
+            loaded = let spec = spec, reasons = reasons
+                maybe_cachefile_lock(pkg, spec.path) do
+                    # double-check the search now that we have lock
+                    m = _require_search_from_serialized(pkg, spec, UInt128(0), true)
+                    m isa Module && return m
+
+                    verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
+                    @logmsg verbosity "Precompiling $(repr("text/plain", pkg))$(list_reasons(reasons))"
 
-        if JLOptions().use_compiled_modules != 0
-            if (0 == ccall(:jl_generating_output, Cint, ())) || (JLOptions().incremental != 0)
-                if !pkg_precompile_attempted && isinteractive() && isassigned(PKG_PRECOMPILE_HOOK)
-                    pkg_precompile_attempted = true
                     unlock(require_lock)
                     try
-                        PKG_PRECOMPILE_HOOK[](pkg.name, _from_loading = true)
+                        if !generating_output() && !parallel_precompile_attempted && !disable_parallel_precompile && @isdefined(Precompilation)
+                            parallel_precompile_attempted = true
+                            precompiled = Precompilation.precompilepkgs([pkg]; _from_loading=true, ignore_loaded=false)
+                            # prcompiled returns either nothing, indicating it needs serial precompile,
+                            # or the entry(ies) that it found would be best to load (possibly because it just created it)
+                            # or an empty set of entries (indicating the precompile should be skipped)
+                            if precompiled !== nothing
+                                isempty(precompiled) && return PrecompilableError() # oops, Precompilation forgot to report what this might actually be
+                                local cachefile = precompiled[1]
+                                local ocachefile = nothing
+                                if JLOptions().use_pkgimages == 1
+                                    ocachefile = ocachefile_from_cachefile(cachefile)
+                                end
+                                return cachefile, ocachefile
+                            end
+                        end
+                        triggers = get(EXT_PRIMED, pkg, nothing)
+                        loadable_exts = nothing
+                        if triggers !== nothing # extension
+                            loadable_exts = PkgId[]
+                            for (ext′, triggers′) in EXT_PRIMED
+                                if triggers′ ⊊ triggers
+                                    push!(loadable_exts, ext′)
+                                end
+                            end
+                        end
+                        return compilecache(pkg, spec; loadable_exts)
                     finally
                         lock(require_lock)
                     end
-                    @goto load_from_cache
-                end
-                # spawn off a new incremental pre-compile task for recursive `require` calls
-                cachefile_or_module = maybe_cachefile_lock(pkg, path) do
-                    # double-check now that we have lock
-                    m = _require_search_from_serialized(pkg, path, UInt128(0))
-                    m isa Module && return m
-                    compilecache(pkg, path)
                 end
-                cachefile_or_module isa Module && return cachefile_or_module::Module
-                cachefile = cachefile_or_module
-                if isnothing(cachefile) # maybe_cachefile_lock returns nothing if it had to wait for another process
-                    @goto load_from_cache # the new cachefile will have the newest mtime so will come first in the search
-                elseif isa(cachefile, Exception)
-                    if precompilableerror(cachefile)
-                        verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
-                        @logmsg verbosity "Skipping precompilation since __precompile__(false). Importing $pkg."
-                    else
-                        @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m
-                    end
-                    # fall-through to loading the file locally if not incremental
+            end
+            loaded isa Module && return loaded
+            if isnothing(loaded) # maybe_cachefile_lock returns nothing if it had to wait for another process
+                @goto load_from_cache # the new cachefile will have the newest mtime so will come first in the search
+            elseif isa(loaded, Exception)
+                if precompilableerror(loaded)
+                    verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
+                    @logmsg verbosity "Skipping precompilation due to precompilable error. Importing $(repr("text/plain", pkg))." exception=loaded
                 else
-                    cachefile, ocachefile = cachefile::Tuple{String, Union{Nothing, String}}
-                    m = _tryrequire_from_serialized(pkg, cachefile, ocachefile)
-                    if !isa(m, Module)
-                        @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m
-                    else
-                        return m
-                    end
+                    @warn "The call to compilecache failed to create a usable precompiled cache file for $(repr("text/plain", pkg))" exception=loaded
                 end
-                if JLOptions().incremental != 0
-                    # during incremental precompilation, this should be fail-fast
-                    throw(PrecompilableError())
+                # fall-through to loading the file locally if not incremental
+            else
+                cachefile, ocachefile = loaded::Tuple{String, Union{Nothing, String}}
+                loaded = _tryrequire_from_serialized(pkg, cachefile, ocachefile)
+                if !isa(loaded, Module)
+                    @warn "The call to compilecache failed to create a usable precompiled cache file for $(repr("text/plain", pkg))" exception=loaded
+                else
+                    return loaded
                 end
             end
+            if JLOptions().incremental != 0
+                # during incremental precompilation, this should be fail-fast
+                throw(PrecompilableError())
+            end
         end
+    end
 
-        # just load the file normally via include
-        # for unknown dependencies
-        uuid = pkg.uuid
-        uuid = (uuid === nothing ? (UInt64(0), UInt64(0)) : convert(NTuple{2, UInt64}, uuid))
-        old_uuid = ccall(:jl_module_uuid, NTuple{2, UInt64}, (Any,), __toplevel__)
+    # just load the file normally via include
+    # for unknown dependencies
+    uuid = pkg.uuid
+    uuid = (uuid === nothing ? (UInt64(0), UInt64(0)) : convert(NTuple{2, UInt64}, uuid))
+    old_uuid = ccall(:jl_module_uuid, NTuple{2, UInt64}, (Any,), __toplevel__)
+    if uuid !== old_uuid
+        ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, uuid)
+    end
+    __toplevel__._internal_julia_parse = Experimental.VersionedParse(spec.julia_syntax_version)
+    unlock(require_lock)
+    try
+        include(__toplevel__, path)
+        loaded = maybe_root_module(pkg)
+    finally
+        __toplevel__._internal_julia_parse = Core._parse
+        lock(require_lock)
         if uuid !== old_uuid
-            ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, uuid)
-        end
-        unlock(require_lock)
-        try
-            include(__toplevel__, path)
-            loaded = get(loaded_modules, pkg, nothing)
-        finally
-            lock(require_lock)
-            if uuid !== old_uuid
-                ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, old_uuid)
-            end
+            ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, old_uuid)
         end
-    finally
-        toplevel_load[] = last
-        end_loading(pkg, loaded)
     end
     return loaded
 end
 
-# Only used from test/precompile.jl
-function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Union{String, Nothing})
+# load a serialized file directly, including dependencies (without checking staleness except for immediate conflicts)
+# this does not call start_loading / end_loading, so can lead to some odd behaviors
+function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Union{String, Nothing}, sourcepath::String)
     @lock require_lock begin
-    set_pkgorigin_version_path(uuidkey, nothing)
+    set_pkgorigin_version_path(uuidkey, sourcepath)
     newm = _tryrequire_from_serialized(uuidkey, path, ocachepath)
     newm isa Module || throw(newm)
     insert_extension_triggers(uuidkey)
@@ -2033,7 +2937,99 @@ function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Unio
     end
 end
 
-
+# load a serialized file directly from append_bundled_depot_path for uuidkey without stalechecks
+"""
+    require_stdlib(package_uuidkey::PkgId, [ext::String, from::Module])
+
+!!! warning "May load duplicate copies of stdlib packages."
+
+    This requires that all stdlib packages loaded are compatible with having concurrent
+    copies of themselves loaded into memory. It also places additional restrictions on
+    the kinds of type-piracy that are allowed in stdlibs, since type-piracy can cause the
+    dispatch table to become visibly "torn" across multiple different packages.
+
+    The specific requirements are:
+
+      The import side (caller of `require_stdlib`) must not leak any stdlib types, esp.
+      to any context that may have a conflicting copy of the stdlib(s) (or vice-versa).
+         - e.g., if an output is forwarded to user code, it must contain only Base types.
+         - e.g., if an output contains types from the stdlib, it must be consumed "internally"
+                 before reaching user code.
+
+      The imported code (loaded stdlibs) must be very careful about type piracy:
+         - It must not access any global state that may differ between stdlib copies in
+           type-pirated methods.
+         - It must not return any stdlib types from any type-pirated public methods (since
+           a loaded duplicate would overwrite the Base method again, returning different
+           types that don't correspond to the user-accessible copy of the stdlib).
+         - It must not pass / discriminate stdlib types in type-pirated methods, except
+           indirectly via methods defined in Base and implemented (w/o type-piracy) in
+           all copies of the stdlib over their respective types.
+
+      The idea behind the above restrictions is that any type-pirated methods in the stdlib
+      must return a result that is simultaneously correct for all of the stdlib's loaded
+      copies, including accounting for global state differences and split type identities.
+
+      Furthermore, any imported code must not leak any stdlib types to globals and containers
+      (e.g. Vectors and mutable structs) in upstream Modules, since this will also lead to
+      type-confusion when the type is later pulled out in user / stdlib code.
+
+    For examples of issues like the above, see:
+      [1] https://github.com/JuliaLang/Pkg.jl/issues/4017#issuecomment-2377589989
+      [2] https://github.com/JuliaLang/StyledStrings.jl/issues/91#issuecomment-2379602914
+"""
+require_stdlib(package_uuidkey::PkgId) = require_stdlib(package_uuidkey, nothing, Base)
+function require_stdlib(package_uuidkey::PkgId, ext::Union{Nothing, String}, from::Module)
+    if generating_output(#=incremental=#true)
+        # Otherwise this would lead to awkward dependency issues by loading a package that isn't in the Project/Manifest
+        error("This interactive function requires a stdlib to be loaded, and package code should instead use it directly from that stdlib.")
+    end
+    @lock require_lock begin
+    # the PkgId of the ext, or package if not an ext
+    this_uuidkey = ext isa String ? PkgId(uuid5(package_uuidkey.uuid, ext), ext) : package_uuidkey
+    env = Sys.STDLIB
+    newm = start_loading(this_uuidkey, UInt128(0), true)
+    newm === nothing || return newm
+    try
+        depot_path = append_bundled_depot_path!(empty(DEPOT_PATH))
+        from_stdlib = true # set to false if `from` is a normal package so we do not want the internal loader for the extension either
+        if ext isa String
+            from_uuid = PkgId(from)
+            from_m = get(loaded_modules, from_uuid, nothing)
+            if from_m === from
+                # if from_uuid is either nothing or points to something else, assume we should use require_stdlib
+                # otherwise check cachepath for from to see if it looks like it is from depot_path, since try_build_ids
+                cachepath = get(PkgOrigin, pkgorigins, from_uuid).cachepath
+                entrypath, entryfile = cache_file_entry(from_uuid)
+                from_stdlib = any(x -> startswith(entrypath, x), depot_path)
+            end
+        end
+        if from_stdlib
+            # first since this is a stdlib, try to look there directly first
+            if ext === nothing
+                sourcepath = normpath(env, this_uuidkey.name, "src", this_uuidkey.name * ".jl")
+            else
+                sourcepath = find_ext_path(normpath(joinpath(env, package_uuidkey.name)), ext)
+            end
+            set_pkgorigin_version_path(this_uuidkey, sourcepath)
+            newm = _require_search_from_serialized(this_uuidkey, PkgLoadSpec(sourcepath, VERSION), UInt128(0), false; DEPOT_PATH=depot_path)
+        end
+    finally
+        end_loading(this_uuidkey, newm)
+    end
+    if newm isa Module
+        # After successfully loading, notify downstream consumers
+        insert_extension_triggers(env, this_uuidkey)
+        run_package_callbacks(this_uuidkey)
+    else
+        # if the user deleted their bundled depot, next try to load it completely normally
+        # if it is an extension, we first need to indicate where to find its parant via EXT_PRIMED
+        ext isa String && (EXT_PRIMED[this_uuidkey] = PkgId[package_uuidkey])
+        newm = _require_prelocked(this_uuidkey)
+    end
+    return newm
+    end # release lock
+end
 
 # relative-path load
 
@@ -2053,8 +3049,12 @@ function include_string(mapexpr::Function, mod::Module, code::AbstractString,
                         filename::AbstractString="string")
     loc = LineNumberNode(1, Symbol(filename))
     try
-        ast = Meta.parseall(code, filename=filename)
-        @assert Meta.isexpr(ast, :toplevel)
+        ast = Meta.parseall(code; filename, mod)
+        if !Meta.isexpr(ast, :toplevel)
+            @assert Core._lower != fl_lower
+            # Only reached when JuliaLowering and alternate parse functions are activated
+            return Core.eval(mod, ast)
+        end
         result = nothing
         line_and_ex = Expr(:toplevel, loc, nothing)
         for ex in ast.args
@@ -2067,6 +3067,28 @@ function include_string(mapexpr::Function, mod::Module, code::AbstractString,
             # Wrap things to be eval'd in a :toplevel expr to carry line
             # information as part of the expr.
             line_and_ex.args[2] = ex
+            # Check global TRACE_EVAL first, fall back to command line option
+            trace_eval_setting = TRACE_EVAL
+            trace_eval = if trace_eval_setting !== nothing
+                # Convert symbol to integer value
+                setting = trace_eval_setting
+                if setting === :no
+                    0
+                elseif setting === :loc
+                    1
+                elseif setting === :full
+                    2
+                else
+                    error("Invalid TRACE_EVAL value: $(setting). Must be :no, :loc, or :full")
+                end
+            else
+                JLOptions().trace_eval
+            end
+            if trace_eval == 2 # show everything
+                println(stderr, "eval: ", line_and_ex)
+            elseif trace_eval == 1 # show top location only
+                println(stderr, "eval: ", line_and_ex.args[1])
+            end
             result = Core.eval(mod, line_and_ex)
         end
         return result
@@ -2145,7 +3167,7 @@ and return the value of the last expression.
 The optional `args` argument can be used to set the input arguments of the script (i.e. the global `ARGS` variable).
 Note that definitions (e.g. methods, globals) are evaluated in the anonymous module and do not affect the current module.
 
-# Example
+# Examples
 
 ```jldoctest
 julia> write("testfile.jl", \"\"\"
@@ -2163,12 +3185,12 @@ julia> rm("testfile.jl")
 ```
 """
 function evalfile(path::AbstractString, args::Vector{String}=String[])
-    return Core.eval(Module(:__anon__),
+    m = Module(:__anon__)
+    return Core.eval(m,
         Expr(:toplevel,
              :(const ARGS = $args),
-             :(eval(x) = $(Expr(:core, :eval))(__anon__, x)),
-             :(include(x) = $(Expr(:top, :include))(__anon__, x)),
-             :(include(mapexpr::Function, x) = $(Expr(:top, :include))(mapexpr, __anon__, x)),
+             :(const include = $(Base.IncludeInto(m))),
+             :(const eval = $(Core.EvalInto(m))),
              :(include($path))))
 end
 evalfile(path::AbstractString, args::Vector) = evalfile(path, String[args...])
@@ -2192,9 +3214,16 @@ function load_path_setup_code(load_path::Bool=true)
     return code
 end
 
+# Const global for GC root
+const newly_inferred = CodeInstance[]
+
 # this is called in the external process that generates precompiled package files
-function include_package_for_output(pkg::PkgId, input::String, depot_path::Vector{String}, dl_load_path::Vector{String}, load_path::Vector{String},
+function include_package_for_output(pkg::PkgId, input::String, syntax_version::VersionNumber, depot_path::Vector{String}, dl_load_path::Vector{String}, load_path::Vector{String},
                                     concrete_deps::typeof(_concrete_dependencies), source::Union{Nothing,String})
+
+    @lock require_lock begin
+    m = start_loading(pkg, UInt128(0), false)
+    @assert m === nothing
     append!(empty!(Base.DEPOT_PATH), depot_path)
     append!(empty!(Base.DL_LOAD_PATH), dl_load_path)
     append!(empty!(Base.LOAD_PATH), load_path)
@@ -2203,6 +3232,8 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
     Base._track_dependencies[] = true
     get!(Base.PkgOrigin, Base.pkgorigins, pkg).path = input
     append!(empty!(Base._concrete_dependencies), concrete_deps)
+    end
+
     uuid_tuple = pkg.uuid === nothing ? (UInt64(0), UInt64(0)) : convert(NTuple{2, UInt64}, pkg.uuid)
 
     ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Base.__toplevel__, uuid_tuple)
@@ -2210,8 +3241,11 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
         task_local_storage()[:SOURCE_PATH] = source
     end
 
-    ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
-    Core.Compiler.track_newly_inferred.x = true
+    ccall(:jl_set_newly_inferred, Cvoid, (Any,), newly_inferred)
+    # This one changes the parser behavior
+    __toplevel__._internal_julia_parse = Experimental.VersionedParse(syntax_version)
+    # This one is the compatibility marker for cache loading
+    __toplevel__._internal_syntax_version = cache_syntax_version(syntax_version)
     try
         Base.include(Base.__toplevel__, input)
     catch ex
@@ -2219,52 +3253,90 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
         @debug "Aborting `create_expr_cache'" exception=(ErrorException("Declaration of __precompile__(false) not allowed"), catch_backtrace())
         exit(125) # we define status = 125 means PrecompileableError
     finally
-        Core.Compiler.track_newly_inferred.x = false
+        ccall(:jl_set_newly_inferred, Cvoid, (Any,), nothing)
     end
+    # check that the package defined the expected module so we can give a nice error message if not
+    m = maybe_root_module(pkg)
+    m isa Module || check_package_module_loaded_error(pkg)
+
+    # Re-populate the runtime's newly-inferred array, which will be included
+    # in the output. We removed it above to avoid including any code we may
+    # have compiled for error handling and validation.
+    ccall(:jl_set_newly_inferred, Cvoid, (Any,), newly_inferred)
+    @lock require_lock end_loading(pkg, m)
+    # insert_extension_triggers(pkg)
+    # run_package_callbacks(pkg)
+end
+
+function check_package_module_loaded_error(pkg)
+    # match compilecache error type for non-125 errors
+    error("package `$(pkg.name)` did not define the expected \
+          module `$(pkg.name)`, check for typos in package module name")
 end
 
+# protects against PkgId and UUID being imported and losing Base prefix
+_pkg_str(_pkg::PkgId) = (_pkg.uuid === nothing) ? "Base.PkgId($(repr(_pkg.name)))" : "Base.PkgId(Base.UUID(\"$(_pkg.uuid)\"), $(repr(_pkg.name)))"
+_pkg_str(_pkg::Vector) = sprint(show, eltype(_pkg); context = :module=>nothing) * "[" * join(map(_pkg_str, _pkg), ",") * "]"
+_pkg_str(_pkg::Pair{PkgId}) = _pkg_str(_pkg.first) * " => " * repr(_pkg.second)
+_pkg_str(_pkg::Nothing) = "nothing"
+
 const PRECOMPILE_TRACE_COMPILE = Ref{String}()
-function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::Union{Nothing, String},
-                           concrete_deps::typeof(_concrete_dependencies), internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+function create_expr_cache(pkg::PkgId, input::PkgLoadSpec, output::String, output_o::Union{Nothing, String},
+                           concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(),
+                           internal_stderr::IO = stderr, internal_stdout::IO = stdout, loadable_exts::Union{Vector{PkgId},Nothing}=nothing)
     @nospecialize internal_stderr internal_stdout
     rm(output, force=true)   # Remove file if it exists
     output_o === nothing || rm(output_o, force=true)
-    depot_path = map(abspath, DEPOT_PATH)
-    dl_load_path = map(abspath, DL_LOAD_PATH)
-    load_path = map(abspath, Base.load_path())
+    depot_path = String[abspath(x) for x in DEPOT_PATH]
+    dl_load_path = String[abspath(x) for x in DL_LOAD_PATH]
+    load_path = String[abspath(x) for x in Base.load_path()]
+    # if pkg is a stdlib, append its parent Project.toml to the load path
+    triggers = get(EXT_PRIMED, pkg, nothing)
+    if triggers !== nothing
+        parentid = triggers[1]
+        for env in load_path
+            project_file = env_project_file(env)
+            if project_file === true
+                _, parent_project_file = entry_point_and_project_file(env, parentid.name)
+                if parent_project_file !== nothing
+                    parentproj = project_file_name_uuid(parent_project_file, parentid.name)
+                    if parentproj == parentid
+                        push!(load_path, parent_project_file)
+                    end
+                end
+            end
+        end
+    end
     path_sep = Sys.iswindows() ? ';' : ':'
     any(path -> path_sep in path, load_path) &&
         error("LOAD_PATH entries cannot contain $(repr(path_sep))")
 
-    deps_strs = String[]
-    function pkg_str(_pkg::PkgId)
-        if _pkg.uuid === nothing
-            "Base.PkgId($(repr(_pkg.name)))"
-        else
-            "Base.PkgId(Base.UUID(\"$(_pkg.uuid)\"), $(repr(_pkg.name)))"
-        end
-    end
-    for (pkg, build_id) in concrete_deps
-        push!(deps_strs, "$(pkg_str(pkg)) => $(repr(build_id))")
+    if output_o === nothing
+        # remove options that make no difference given the other cache options
+        cacheflags = CacheFlags(cacheflags, opt_level=0)
     end
-
+    opts = translate_cache_flags(cacheflags, CacheFlags()) # julia_cmd is generated for the running system, and must be fixed if running for precompile instead
     if output_o !== nothing
+        @debug "Generating object cache file for $(repr("text/plain", pkg))"
         cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing)
-        opt_level = Base.JLOptions().opt_level
-        opts = `-O$(opt_level) --output-o $(output_o) --output-ji $(output) --output-incremental=yes`
+        push!(opts, "--output-o", output_o)
     else
+        @debug "Generating cache file for $(repr("text/plain", pkg))"
         cpu_target = nothing
-        opts = `-O0 --output-ji $(output) --output-incremental=yes`
+    end
+    push!(opts, "--output-ji", output)
+    if isassigned(PRECOMPILE_TRACE_COMPILE)
+        push!(opts, "--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])")
+        push!(opts, "--trace-compile-timing")
     end
 
-    deps_eltype = sprint(show, eltype(concrete_deps); context = :module=>nothing)
-    deps = deps_eltype * "[" * join(deps_strs, ",") * "]"
-    trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])` : ``
-    io = open(pipeline(addenv(`$(julia_cmd(;cpu_target)::Cmd) $(opts)
-                              --startup-file=no --history-file=no --warn-overwrite=yes
-                              --color=$(have_color === nothing ? "auto" : have_color ? "yes" : "no")
-                              $trace
-                              -`,
+    io = open(pipeline(addenv(`$(julia_cmd(;cpu_target)::Cmd)
+                               $(flags)
+                               $(opts)
+                               --output-incremental=yes
+                               --startup-file=no --history-file=no --warn-overwrite=yes
+                               $(have_color === nothing ? "--color=auto" : have_color ? "--color=yes" : "--color=no")
+                               -`,
                               "OPENBLAS_NUM_THREADS" => 1,
                               "JULIA_NUM_THREADS" => 1),
                        stderr = internal_stderr, stdout = internal_stdout),
@@ -2272,20 +3344,34 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::
     # write data over stdin to avoid the (unlikely) case of exceeding max command line size
     write(io.in, """
         empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated
+        Base.track_nested_precomp($(_pkg_str(vcat(Base.precompilation_stack, pkg))))
+        Base.loadable_extensions = $(_pkg_str(loadable_exts))
         Base.precompiling_extension = $(loading_extension)
-        Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)),
-            $(repr(load_path)), $deps, $(repr(source_path(nothing))))
+        Base.include_package_for_output($(_pkg_str(pkg)), $(repr(abspath(input.path))), $(repr(input.julia_syntax_version)), $(repr(depot_path)), $(repr(dl_load_path)),
+            $(repr(load_path)), $(_pkg_str(concrete_deps)), $(repr(source_path(nothing))))
         """)
     close(io.in)
     return io
 end
 
+const precompilation_stack = Vector{PkgId}()
+# Helpful for debugging when precompilation is unexpectedly nested.
+# Enable with `JULIA_DEBUG=nested_precomp`. Note that it expected to be nested in classical code-load precompilation
+# TODO: Add detection if extension precompilation is nested and error / return early?
+function track_nested_precomp(pkgs::Vector{PkgId})
+    append!(precompilation_stack, pkgs)
+    if length(precompilation_stack) > 1
+        list() = join(map(p->p.name, precompilation_stack), " > ")
+        @debug "Nested precompilation: $(list())" _group=:nested_precomp
+    end
+end
+
 function compilecache_dir(pkg::PkgId)
     entrypath, entryfile = cache_file_entry(pkg)
     return joinpath(DEPOT_PATH[1], entrypath)
 end
 
-function compilecache_path(pkg::PkgId, prefs_hash::UInt64; project::String=something(Base.active_project(), ""))::String
+function compilecache_path(pkg::PkgId, prefs_hash::UInt64; flags::CacheFlags=CacheFlags(), project::String=something(Base.active_project(), ""))::String
     entrypath, entryfile = cache_file_entry(pkg)
     cachepath = joinpath(DEPOT_PATH[1], entrypath)
     isdir(cachepath) || mkpath(cachepath)
@@ -2295,7 +3381,7 @@ function compilecache_path(pkg::PkgId, prefs_hash::UInt64; project::String=somet
         crc = _crc32c(project)
         crc = _crc32c(unsafe_string(JLOptions().image_file), crc)
         crc = _crc32c(unsafe_string(JLOptions().julia_bin), crc)
-        crc = _crc32c(ccall(:jl_cache_flags, UInt8, ()), crc)
+        crc = _crc32c(_cacheflag_to_uint8(flags), crc)
 
         cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing)
         if cpu_target === nothing
@@ -2312,44 +3398,45 @@ end
 """
     Base.compilecache(module::PkgId)
 
-Creates a precompiled cache file for a module and all of its dependencies.
+Create a precompiled cache file for a module and all of its dependencies.
 This can be used to reduce package load times. Cache files are stored in
 `DEPOT_PATH[1]/compiled`. See [Module initialization and precompilation](@ref)
 for important notes.
 """
-function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout; flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(), loadable_exts::Union{Vector{PkgId},Nothing}=nothing)
     @nospecialize internal_stderr internal_stdout
-    path = locate_package(pkg)
-    path === nothing && throw(ArgumentError("$pkg not found during precompilation"))
-    return compilecache(pkg, path, internal_stderr, internal_stdout)
+    spec = locate_package_load_spec(pkg)
+    spec === nothing && throw(ArgumentError("$(repr("text/plain", pkg)) not found during precompilation"))
+    return compilecache(pkg, spec, internal_stderr, internal_stdout; flags, cacheflags, loadable_exts)
 end
 
 const MAX_NUM_PRECOMPILE_FILES = Ref(10)
 
-function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout,
-                      keep_loaded_modules::Bool = true)
+function compilecache(pkg::PkgId, spec::PkgLoadSpec, internal_stderr::IO = stderr, internal_stdout::IO = stdout,
+                      keep_loaded_modules::Bool = true; flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(),
+                      loadable_exts::Union{Vector{PkgId},Nothing}=nothing)
 
     @nospecialize internal_stderr internal_stdout
     # decide where to put the resulting cache file
     cachepath = compilecache_dir(pkg)
 
     # build up the list of modules that we want the precompile process to preserve
-    concrete_deps = copy(_concrete_dependencies)
     if keep_loaded_modules
-        for mod in loaded_modules_array()
-            if !(mod === Main || mod === Core || mod === Base)
-                push!(concrete_deps, PkgId(mod) => module_build_id(mod))
+        concrete_deps = copy(_concrete_dependencies)
+        for (pkgreq, modreq) in loaded_modules
+            if !(pkgreq === Main || pkgreq === Core || pkgreq === Base)
+                push!(concrete_deps, pkgreq => module_build_id(modreq))
             end
         end
+    else
+        concrete_deps = empty(_concrete_dependencies)
     end
     # run the expression and cache the result
-    verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
-    @logmsg verbosity "Precompiling $pkg"
 
     # create a temporary file in `cachepath` directory, write the cache in it,
     # write the checksum, _and then_ atomically move the file to `cachefile`.
     mkpath(cachepath)
-    cache_objects = JLOptions().use_pkgimages != 0
+    cache_objects = JLOptions().use_pkgimages == 1
     tmppath, tmpio = mktemp(cachepath)
 
     if cache_objects
@@ -2365,7 +3452,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             close(tmpio_o)
             close(tmpio_so)
         end
-        p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, internal_stderr, internal_stdout)
+        p = create_expr_cache(pkg, spec, tmppath, tmppath_o, concrete_deps, flags, cacheflags, internal_stderr, internal_stdout, loadable_exts)
 
         if success(p)
             if cache_objects
@@ -2376,7 +3463,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             # Read preferences hash back from .ji file (we can't precompute because
             # we don't actually know what the list of compile-time preferences are without compiling)
             prefs_hash = preferences_hash(tmppath)
-            cachefile = compilecache_path(pkg, prefs_hash)
+            cachefile = compilecache_path(pkg, prefs_hash; flags=cacheflags)
             ocachefile = cache_objects ? ocachefile_from_cachefile(cachefile) : nothing
 
             # append checksum for so to the end of the .ji file:
@@ -2388,7 +3475,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             # append extra crc to the end of the .ji file:
             open(tmppath, "r+") do f
                 if iszero(isvalid_cache_header(f))
-                    error("Invalid header for $pkg in new cache file $(repr(tmppath)).")
+                    error("Incompatible header for $(repr("text/plain", pkg)) in new cache file $(repr(tmppath)).")
                 end
                 seekend(f)
                 write(f, crc_so)
@@ -2397,13 +3484,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             end
 
             # inherit permission from the source file (and make them writable)
-            chmod(tmppath, filemode(path) & 0o777 | 0o200)
-            if cache_objects
-                # Ensure that the user can execute the `.so` we're generating
-                # Note that on windows, `filemode(path)` typically returns `0o666`, so this
-                # addition of the execute bit for the user is doubly needed.
-                chmod(tmppath_so, filemode(path) & 0o777 | 0o333)
-            end
+            chmod(tmppath, filemode(spec.path) & 0o777 | 0o200)
 
             # prune the directory with cache files
             if pkg.uuid !== nothing
@@ -2426,33 +3507,19 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             end
 
             if cache_objects
-                try
-                    rename(tmppath_so, ocachefile::String; force=true)
-                catch e
-                    e isa IOError || rethrow()
-                    isfile(ocachefile::String) || rethrow()
-                    # Windows prevents renaming a file that is in use so if there is a Julia session started
-                    # with a package image loaded, we cannot rename that file.
-                    # The code belows append a `_i` to the name of the cache file where `i` is the smallest number such that
-                    # that cache file does not exist.
-                    ocachename, ocacheext = splitext(ocachefile::String)
-                    old_cachefiles = Set(readdir(cachepath))
-                    num = 1
-                    while true
-                        ocachefile = ocachename * "_$num" * ocacheext
-                        in(basename(ocachefile), old_cachefiles) || break
-                        num += 1
-                    end
-                    # TODO: Risk for a race here if some other process grabs this name before us
-                    cachefile = cachefile_from_ocachefile(ocachefile)
-                    rename(tmppath_so, ocachefile::String; force=true)
+                ocachefile_new = rename_unique_ocachefile(tmppath_so, ocachefile)
+                if ocachefile_new != ocachefile
+                    cachefile = cachefile_from_ocachefile(ocachefile_new)
+                    ocachefile = ocachefile_new
                 end
                 @static if Sys.isapple()
                     run(`$(Linking.dsymutil()) $ocachefile`, Base.DevNull(), Base.DevNull(), Base.DevNull())
                 end
             end
             # this is atomic according to POSIX (not Win32):
-            rename(tmppath, cachefile; force=true)
+            # but force=true means it will fall back to non atomic
+            # move if the initial rename fails.
+            mv(tmppath, cachefile; force=true)
             return cachefile, ocachefile
         end
     finally
@@ -2465,13 +3532,37 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
     if p.exitcode == 125
         return PrecompilableError()
     else
-        error("Failed to precompile $pkg to $(repr(tmppath)).")
+        error("Failed to precompile $(repr("text/plain", pkg)) to $(repr(tmppath)) ($(Base.process_status(p))).")
     end
 end
 
-function module_build_id(m::Module)
-    hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
-    return (UInt128(hi) << 64) | lo
+function rename_unique_ocachefile(tmppath_so::String, ocachefile_orig::String, ocachefile::String = ocachefile_orig, num = 0)
+    try
+        mv(tmppath_so, ocachefile; force=true)
+    catch e
+        e isa IOError || rethrow()
+        # If `rm` was called on a dir containing a loaded DLL, we moved it to temp for cleanup
+        # on restart. However the old path cannot be used (UV_EACCES) while the DLL is loaded
+        if !isfile(ocachefile) && e.code != Base.UV_EACCES
+            rethrow()
+        end
+        # Windows prevents renaming a file that is in use so if there is a Julia session started
+        # with a package image loaded, we cannot rename that file.
+        # The code belows append a `_i` to the name of the cache file where `i` is the smallest number such that
+        # that cache file does not exist.
+        ocachename, ocacheext = splitext(ocachefile_orig)
+        ocachefile_unique = ocachename * "_$num" * ocacheext
+        ocachefile = rename_unique_ocachefile(tmppath_so, ocachefile_orig, ocachefile_unique, num + 1)
+    end
+    return ocachefile
+end
+
+function object_build_id(obj)
+    mod = ccall(:jl_object_top_module, Any, (Any,), obj)
+    if mod === nothing
+        return nothing
+    end
+    return module_build_id(mod::Module)
 end
 
 function isvalid_cache_header(f::IOStream)
@@ -2494,25 +3585,73 @@ function isvalid_pkgimage_crc(f::IOStream, ocachefile::String)
     expected_crc_so == crc_so
 end
 
-struct CacheHeaderIncludes
-    id::PkgId
+mutable struct CacheHeaderIncludes
+    const id::PkgId
     filename::String
-    mtime::Float64
-    modpath::Vector{String}   # seemingly not needed in Base, but used by Revise
+    const fsize::UInt64
+    const hash::UInt32
+    const mtime::Float64
+    const modpath::Vector{String}   # seemingly not needed in Base, but used by Revise
 end
 
-function parse_cache_header(f::IO)
-    flags = read(f, UInt8)
-    modules = Vector{Pair{PkgId, UInt64}}()
+function CacheHeaderIncludes(dep_tuple::Tuple{Module, String, UInt64, UInt32, Float64})
+    return CacheHeaderIncludes(PkgId(dep_tuple[1]), dep_tuple[2:end]..., String[])
+end
+
+function replace_depot_path(path::AbstractString, depots::Vector{String}=normalize_depots_for_relocation())
+    for depot in depots
+        if startswith(path, string(depot, Filesystem.pathsep())) || path == depot
+            path = replace(path, depot => "@depot"; count=1)
+            break
+        end
+    end
+    return path
+end
+
+function normalize_depots_for_relocation()
+    depots = String[]
+    sizehint!(depots, length(DEPOT_PATH))
+    for d in DEPOT_PATH
+        isdir(d) || continue
+        if isdirpath(d)
+            d = dirname(d)
+        end
+        push!(depots, abspath(d))
+    end
+    return depots
+end
+
+function restore_depot_path(path::AbstractString, depot::AbstractString)
+    replace(path, r"^@depot" => depot; count=1)
+end
+
+function resolve_depot(inc::AbstractString)
+    startswith(inc, string("@depot", Filesystem.pathsep())) || return :not_relocatable
+    for depot in DEPOT_PATH
+        ispath(restore_depot_path(inc, depot)) && return depot
+    end
+    return :no_depot_found
+end
+
+function read_module_list(f::IO, has_buildid_hi::Bool)
+    modules = Vector{Pair{PkgId, UInt128}}()
     while true
         n = read(f, Int32)
         n == 0 && break
         sym = String(read(f, n)) # module name
         uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID
-        build_id = read(f, UInt64) # build UUID (mostly just a timestamp)
+        build_id_hi = UInt128(has_buildid_hi ? read(f, UInt64) : UInt64(0)) << 64
+        build_id = (build_id_hi | read(f, UInt64)) # build id (checksum + time - not a UUID)
         push!(modules, PkgId(uuid, sym) => build_id)
     end
-    totbytes = read(f, Int64) # total bytes for file dependencies + preferences
+    return modules
+end
+
+function _parse_cache_header(f::IO, cachefile::AbstractString)
+    flags = read(f, UInt8)
+    syntax_version = read(f, UInt8)
+    modules = read_module_list(f, false)
+    totbytes = Int64(read(f, UInt64)) # total bytes for file dependencies + preferences
     # read the list of requirements
     # and split the list into include and requires statements
     includes = CacheHeaderIncludes[]
@@ -2525,6 +3664,10 @@ function parse_cache_header(f::IO)
         end
         depname = String(read(f, n2))
         totbytes -= n2
+        fsize = read(f, UInt64)
+        totbytes -= 8
+        hash = read(f, UInt32)
+        totbytes -= 4
         mtime = read(f, Float64)
         totbytes -= 8
         n1 = read(f, Int32)
@@ -2547,7 +3690,7 @@ function parse_cache_header(f::IO)
         if depname[1] == '\0'
             push!(requires, modkey => binunpack(depname))
         else
-            push!(includes, CacheHeaderIncludes(modkey, depname, mtime, modpath))
+            push!(includes, CacheHeaderIncludes(modkey, depname, fsize, hash, mtime, modpath))
         end
     end
     prefs = String[]
@@ -2566,82 +3709,155 @@ function parse_cache_header(f::IO)
     totbytes -= 8
     @assert totbytes == 0 "header of cache file appears to be corrupt (totbytes == $(totbytes))"
     # read the list of modules that are required to be present during loading
-    required_modules = Vector{Pair{PkgId, UInt128}}()
-    while true
-        n = read(f, Int32)
-        n == 0 && break
-        sym = String(read(f, n)) # module name
-        uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID
-        build_id = UInt128(read(f, UInt64)) << 64
-        build_id |= read(f, UInt64)
-        push!(required_modules, PkgId(uuid, sym) => build_id)
-    end
+    required_modules = read_module_list(f, true)
     l = read(f, Int32)
     clone_targets = read(f, l)
 
-    return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags
+    srcfiles = srctext_files(f, srctextpos, includes)
+
+    return modules, (includes, srcfiles, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags, syntax_version
+end
+
+function parse_cache_header(f::IO, cachefile::AbstractString)
+    modules, (includes, srcfiles, requires), required_modules,
+        srctextpos, prefs, prefs_hash, clone_targets, flags, syntax_version = _parse_cache_header(f, cachefile)
+
+    includes_srcfiles = CacheHeaderIncludes[]
+    includes_depfiles = CacheHeaderIncludes[]
+    for inc in includes
+        if inc.filename ∈ srcfiles
+            push!(includes_srcfiles, inc)
+        else
+            push!(includes_depfiles, inc)
+        end
+    end
+
+
+    # The @depot resolution logic for include() files:
+    # 1. If the cache is not relocatable because of an absolute path,
+    #    we ignore that path for the depot search.
+    #    Recompilation will be triggered by stale_cachefile() if that absolute path does not exist.
+    # 2. If we can't find a depot for a relocatable path,
+    #    we still replace it with the depot we found from other files.
+    #    Recompilation will be triggered by stale_cachefile() because the resolved path does not exist.
+    # 3. We require that relocatable paths all resolve to the same depot.
+    # 4. We explicitly check that all relocatable paths resolve to the same depot. This has two reasons:
+    #    - We want to scan all source files in order to provide logs for 1. and 2. above.
+    #    - It is possible that a depot might be missing source files.
+    #      Assume that we have two depots on DEPOT_PATH, depot_complete and depot_incomplete.
+    #      If DEPOT_PATH=["depot_complete","depot_incomplete"] then no recompilation shall happen,
+    #      because depot_complete will be picked.
+    #      If DEPOT_PATH=["depot_incomplete","depot_complete"] we trigger recompilation and
+    #      hopefully a meaningful error about missing files is thrown.
+    #      If we were to just select the first depot we find, then whether recompilation happens would
+    #      depend on whether the first relocatable file resolves to depot_complete or depot_incomplete.
+    srcdepot = nothing
+    any_not_relocatable = false
+    any_no_depot_found = false
+    multiple_depots_found = false
+    for src in srcfiles
+        depot = resolve_depot(src)
+        if depot === :not_relocatable
+            any_not_relocatable = true
+        elseif depot === :no_depot_found
+            any_no_depot_found = true
+        elseif isnothing(srcdepot)
+            srcdepot = depot
+        elseif depot != srcdepot
+            multiple_depots_found = true
+        end
+    end
+    if any_no_depot_found
+        @debug("Unable to resolve @depot tag for at least one include() file from cache file $cachefile", srcfiles, _group=:relocatable)
+    end
+    if any_not_relocatable
+        @debug("At least one include() file from $cachefile is not relocatable", srcfiles, _group=:relocatable)
+    end
+    if multiple_depots_found
+        @debug("Some include() files from $cachefile are distributed over multiple depots", srcfiles, _group=:relocatable)
+    elseif !isnothing(srcdepot)
+        for inc in includes_srcfiles
+            inc.filename = restore_depot_path(inc.filename, srcdepot)
+        end
+    end
+
+    # unlike include() files, we allow each relocatable include_dependency() file to resolve
+    # to a separate depot, #52161
+    for inc in includes_depfiles
+        depot = resolve_depot(inc.filename)
+        if depot === :no_depot_found
+            @debug("Unable to resolve @depot tag for include_dependency() file $(inc.filename) from cache file $cachefile", _group=:relocatable)
+        elseif depot === :not_relocatable
+            @debug("include_dependency() file $(inc.filename) from $cachefile is not relocatable", _group=:relocatable)
+        else
+            inc.filename = restore_depot_path(inc.filename, depot)
+        end
+    end
+
+    return modules, (includes, includes_srcfiles, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags, syntax_version
 end
 
-function parse_cache_header(cachefile::String; srcfiles_only::Bool=false)
+function parse_cache_header(cachefile::String)
     io = open(cachefile, "r")
     try
-        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
-        ret = parse_cache_header(io)
-        srcfiles_only || return ret
-        _, (includes, _), _, srctextpos, _... = ret
-        srcfiles = srctext_files(io, srctextpos)
-        delidx = Int[]
-        for (i, chi) in enumerate(includes)
-            chi.filename ∈ srcfiles || push!(delidx, i)
-        end
-        deleteat!(includes, delidx)
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile."))
+        ret = parse_cache_header(io, cachefile)
         return ret
     finally
         close(io)
     end
 end
 
-preferences_hash(f::IO) = parse_cache_header(f)[6]
+preferences_hash(f::IO, cachefile::AbstractString) = parse_cache_header(f, cachefile)[6]
 function preferences_hash(cachefile::String)
     io = open(cachefile, "r")
     try
         if iszero(isvalid_cache_header(io))
-            throw(ArgumentError("Invalid header in cache file $cachefile."))
+            throw(ArgumentError("Incompatible header in cache file $cachefile."))
         end
-        return preferences_hash(io)
+        return preferences_hash(io, cachefile)
     finally
         close(io)
     end
 end
 
-function cache_dependencies(f::IO)
-    _, (includes, _), modules, _... = parse_cache_header(f)
-    return modules, map(chi -> (chi.filename, chi.mtime), includes)  # return just filename and mtime
+function cache_dependencies(f::IO, cachefile::AbstractString)
+    _, (includes, _, _), modules, _... = parse_cache_header(f, cachefile)
+    return modules, map(chi -> chi.filename, includes)  # return just filename
 end
 
 function cache_dependencies(cachefile::String)
     io = open(cachefile, "r")
     try
-        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
-        return cache_dependencies(io)
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile."))
+        return cache_dependencies(io, cachefile)
     finally
         close(io)
     end
 end
 
-function read_dependency_src(io::IO, filename::AbstractString)
-    srctextpos = parse_cache_header(io)[4]
+function read_dependency_src(io::IO, cachefile::AbstractString, filename::AbstractString)
+    _, (includes, _, _), _, srctextpos, _, _, _, _ = parse_cache_header(io, cachefile)
     srctextpos == 0 && error("no source-text stored in cache file")
     seek(io, srctextpos)
-    return _read_dependency_src(io, filename)
+    return _read_dependency_src(io, filename, includes)
 end
 
-function _read_dependency_src(io::IO, filename::AbstractString)
+function _read_dependency_src(io::IO, filename::AbstractString, includes::Vector{CacheHeaderIncludes}=CacheHeaderIncludes[])
     while !eof(io)
         filenamelen = read(io, Int32)
         filenamelen == 0 && break
-        fn = String(read(io, filenamelen))
+        depotfn = String(read(io, filenamelen))
         len = read(io, UInt64)
+        fn = if !startswith(depotfn, string("@depot", Filesystem.pathsep()))
+            depotfn
+        else
+            basefn = restore_depot_path(depotfn, "")
+            idx = findfirst(includes) do inc
+                endswith(inc.filename, basefn)
+            end
+            isnothing(idx) ? depotfn : includes[idx].filename
+        end
         if fn == filename
             return String(read(io, len))
         end
@@ -2653,23 +3869,23 @@ end
 function read_dependency_src(cachefile::String, filename::AbstractString)
     io = open(cachefile, "r")
     try
-        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
-        return read_dependency_src(io, filename)
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile."))
+        return read_dependency_src(io, cachefile, filename)
     finally
         close(io)
     end
 end
 
-function srctext_files(f::IO, srctextpos::Int64)
+function srctext_files(f::IO, srctextpos::Int64, includes::Vector{CacheHeaderIncludes})
     files = Set{String}()
     srctextpos == 0 && return files
     seek(f, srctextpos)
     while !eof(f)
         filenamelen = read(f, Int32)
         filenamelen == 0 && break
-        fn = String(read(f, filenamelen))
+        filename = String(read(f, filenamelen))
         len = read(f, UInt64)
-        push!(files, fn)
+        push!(files, filename)
         seek(f, position(f) + len)
     end
     return files
@@ -2788,9 +4004,27 @@ function recursive_prefs_merge(base::Dict{String, Any}, overrides::Dict{String,
     return new_base
 end
 
+function get_projects_workspace_to_root(project_file)
+    projects = String[project_file]
+    while true
+        project_file = base_project(project_file)
+        if project_file === nothing
+            return projects
+        end
+        push!(projects, project_file)
+    end
+end
+
 function get_preferences(uuid::Union{UUID,Nothing} = nothing)
     merged_prefs = Dict{String,Any}()
-    for env in reverse(load_path())
+    loadpath = load_path()
+    projects_to_merge_prefs = String[]
+    append!(projects_to_merge_prefs, Iterators.drop(loadpath, 1))
+    if length(loadpath) >= 1
+        prepend!(projects_to_merge_prefs, get_projects_workspace_to_root(first(loadpath)))
+    end
+
+    for env in reverse(projects_to_merge_prefs)
         project_toml = env_project_file(env)
         if !isa(project_toml, String)
             continue
@@ -2841,72 +4075,47 @@ get_compiletime_preferences(m::Module) = get_compiletime_preferences(PkgId(m).uu
 get_compiletime_preferences(::Nothing) = String[]
 
 function check_clone_targets(clone_targets)
-    try
-        ccall(:jl_check_pkgimage_clones, Cvoid, (Ptr{Cchar},), clone_targets)
-        return true
-    catch
-        return false
-    end
-end
-
-struct CacheFlags
-    # OOICCDDP - see jl_cache_flags
-    use_pkgimages::Bool
-    debug_level::Int
-    check_bounds::Int
-    inline::Bool
-    opt_level::Int
-
-    function CacheFlags(f::UInt8)
-        use_pkgimages = Bool(f & 1)
-        debug_level = Int((f >> 1) & 3)
-        check_bounds = Int((f >> 3) & 3)
-        inline = Bool((f >> 5) & 1)
-        opt_level = Int((f >> 6) & 3) # define OPT_LEVEL in statiddata_utils
-        new(use_pkgimages, debug_level, check_bounds, inline, opt_level)
+    rejection_reason = ccall(:jl_check_pkgimage_clones, Any, (Ptr{Cchar},), clone_targets)
+    if rejection_reason !== nothing
+        return rejection_reason
     end
 end
-CacheFlags(f::Int) = CacheFlags(UInt8(f))
-CacheFlags() = CacheFlags(ccall(:jl_cache_flags, UInt8, ()))
-
-function show(io::IO, cf::CacheFlags)
-    print(io, "use_pkgimages = ", cf.use_pkgimages)
-    print(io, ", debug_level = ", cf.debug_level)
-    print(io, ", check_bounds = ", cf.check_bounds)
-    print(io, ", inline = ", cf.inline)
-    print(io, ", opt_level = ", cf.opt_level)
-end
 
 # Set by FileWatching.__init__()
-global mkpidlock_hook
-global trymkpidlock_hook
-global parse_pidfile_hook
+global mkpidlock_hook::Any
+global trymkpidlock_hook::Any
+global parse_pidfile_hook::Any
 
 # The preferences hash is only known after precompilation so just assume no preferences.
 # Also ignore the active project, which means that if all other conditions are equal,
 # the same package cannot be precompiled from different projects and/or different preferences at the same time.
-compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0); project="") * ".pidfile"
+compilecache_pidfile_path(pkg::PkgId; flags::CacheFlags=CacheFlags()) = compilecache_path(pkg, UInt64(0); project="", flags) * ".pidfile"
+
+const compilecache_pidlock_stale_age = 10
 
 # Allows processes to wait if another process is precompiling a given source already.
-# The lock file is deleted and precompilation will proceed after `stale_age` seconds if
-#  - the locking process no longer exists
-#  - the lock is held by another host, since processes cannot be checked remotely
-# or after `stale_age * 25` seconds if the process does still exist.
-function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=300)
+# The lock file mtime will be updated when held at most every `stale_age/2` seconds, with expected
+# variance of 10 seconds or more being infrequent but not unusual.
+# After `stale_age` seconds beyond the mtime of the lock file, the lock file is deleted and
+# precompilation will proceed if the locking process no longer exists or after `stale_age * 5`
+# seconds if the process does still exist.
+# If the lock is held by another host, it will conservatively wait `stale_age * 5`
+# seconds since processes cannot be checked remotely
+function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=compilecache_pidlock_stale_age)
     if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook)
         pidfile = compilecache_pidfile_path(pkg)
-        cachefile = invokelatest(trymkpidlock_hook, f, pidfile; stale_age)
+        cachefile = @invokelatest trymkpidlock_hook(f, pidfile; stale_age)
         if cachefile === false
-            pid, hostname, age = invokelatest(parse_pidfile_hook, pidfile)
+            pid, hostname, age = @invokelatest parse_pidfile_hook(pidfile)
             verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
             if isempty(hostname) || hostname == gethostname()
-                @logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg"
+                @logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $(repr("text/plain", pkg)). Pidfile: $pidfile"
             else
-                @logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg"
+                @logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $(repr("text/plain", pkg)). Pidfile: $pidfile"
             end
             # wait until the lock is available, but don't actually acquire it
             # returning nothing indicates a process waited for another
-            return invokelatest(mkpidlock_hook, Returns(nothing), pidfile; stale_age)
+            return @invokelatest mkpidlock_hook(Returns(nothing), pidfile; stale_age)
         end
         return cachefile
     else
@@ -2915,29 +4124,112 @@ function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=300)
     end
 end
 
+function record_reason(reasons::Dict{String,Int}, reason::String)
+    reasons[reason] = get(reasons, reason, 0) + 1
+end
+record_reason(::Nothing, ::String) = nothing
+function list_reasons(reasons::Dict{String,Int})
+    isempty(reasons) && return ""
+    return " (caches not reused: $(join(("$v for $k" for (k,v) in reasons), ", ")))"
+end
+list_reasons(::Nothing) = ""
+
+function any_includes_stale(includes::Vector{CacheHeaderIncludes}, cachefile::String, reasons::Union{Dict{String,Int},Nothing}=nothing)
+    for chi in includes
+        f, fsize_req, hash_req, ftime_req = chi.filename, chi.fsize, chi.hash, chi.mtime
+        if startswith(f, string("@depot", Filesystem.pathsep()))
+            @debug("Rejecting stale cache file $cachefile because its depot could not be resolved")
+            record_reason(reasons, "file location uses unresolved depot path")
+            return true
+        end
+        if !ispath(f)
+            _f = fixup_stdlib_path(f)
+            if _f != f && isfile(_f) && startswith(_f, Sys.STDLIB)
+                continue
+            end
+            @debug "Rejecting stale cache file $cachefile because file $f does not exist"
+            record_reason(reasons, "source file not found")
+            return true
+        end
+        if ftime_req >= 0.0
+            # this is an include_dependency for which we only recorded the mtime
+            ftime = mtime(f)
+            is_stale = ( ftime != ftime_req ) &&
+                       ( ftime != floor(ftime_req) ) &&           # Issue #13606, PR #13613: compensate for Docker images rounding mtimes
+                       ( ftime != ceil(ftime_req) ) &&            # PR: #47433 Compensate for CirceCI's truncating of timestamps in its caching
+                       ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds
+                       ( ftime != 1.0 )  &&                       # PR #43090: provide compatibility with Nix mtime.
+                       !( 0 < (ftime_req - ftime) < 1e-6 )        # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond
+            if is_stale
+                @debug "Rejecting stale cache file $cachefile because mtime of include_dependency $f has changed (mtime $ftime, before $ftime_req)"
+                record_reason(reasons, "file modification time changed")
+                return true
+            end
+        else
+            fstat = stat(f)
+            fsize = filesize(fstat)
+            if fsize != fsize_req
+                @debug "Rejecting stale cache file $cachefile because file size of $f has changed (file size $fsize, before $fsize_req)"
+                record_reason(reasons, "file size changed")
+                return true
+            end
+            hash = isdir(fstat) ? _crc32c(join(readdir(f))) : open(_crc32c, f, "r")
+            if hash != hash_req
+                @debug "Rejecting stale cache file $cachefile because hash of $f has changed (hash $hash, before $hash_req)"
+                record_reason(reasons, "file content changed")
+                return true
+            end
+        end
+    end
+    return false
+end
+
+function cache_syntax_version(ver::VersionNumber)
+    UInt8(clamp(ver.minor - 13, 0, 255))
+end
+
 # returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey
 # otherwise returns the list of dependencies to also check
-@constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false)
-    return stale_cachefile(PkgId(""), UInt128(0), modpath, cachefile; ignore_loaded)
-end
-@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String; ignore_loaded::Bool = false)
-    io = open(cachefile, "r")
+@constprop :none function stale_cachefile(modpath::String, cachefile::String; kwargs...)
+    return stale_cachefile(PkgLoadSpec(modpath, VERSION), cachefile; kwargs...)
+end
+@constprop :none function stale_cachefile(modspec::PkgLoadSpec, cachefile::String; ignore_loaded::Bool = false, requested_flags::CacheFlags=CacheFlags(), reasons=nothing)
+    return stale_cachefile(PkgId(""), UInt128(0), modspec, cachefile; ignore_loaded, requested_flags, reasons)
+end
+@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modspec::PkgLoadSpec, cachefile::String;
+                                          ignore_loaded::Bool=false, requested_flags::CacheFlags=CacheFlags(),
+                                          reasons::Union{Dict{String,Int},Nothing}=nothing, stalecheck::Bool=true)
+    # n.b.: this function does nearly all of the file validation, not just those checks related to stale, so the name is potentially unclear
+    io = try
+        open(cachefile, "r")
+    catch ex
+        ex isa IOError || ex isa SystemError || rethrow()
+        @debug "Rejecting cache file $cachefile for $modkey because it could not be opened" isfile(cachefile)
+        return true
+    end
     try
         checksum = isvalid_cache_header(io)
         if iszero(checksum)
-            @debug "Rejecting cache file $cachefile due to it containing an invalid cache header"
-            return true # invalid cache file
+            @debug "Rejecting cache file $cachefile due to it containing an incompatible cache header"
+            record_reason(reasons, "different Julia build configuration")
+            return true # incompatible cache file
         end
-        modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io)
+        modules, (includes, _, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, actual_flags, syntax_version = parse_cache_header(io, cachefile)
         if isempty(modules)
             return true # ignore empty file
         end
-        if ccall(:jl_match_cache_flags, UInt8, (UInt8,), flags) == 0
+        if @ccall(jl_match_cache_flags(_cacheflag_to_uint8(requested_flags)::UInt8, actual_flags::UInt8)::UInt8) == 0
             @debug """
             Rejecting cache file $cachefile for $modkey since the flags are mismatched
-              current session: $(CacheFlags())
-              cache file:      $(CacheFlags(flags))
+              requested flags: $(requested_flags) [$(_cacheflag_to_uint8(requested_flags))]
+              cache file:      $(CacheFlags(actual_flags)) [$actual_flags]
             """
+            record_reason(reasons, "different compilation options")
+            return true
+        end
+        if syntax_version != cache_syntax_version(modspec.julia_syntax_version)
+            @debug "Rejecting cache file $cachefile for $modkey since it was parsed for a different Julia syntax version"
+            record_reason(reasons, "different Julia syntax version")
             return true
         end
         pkgimage = !isempty(clone_targets)
@@ -2946,14 +4238,21 @@ end
             if JLOptions().use_pkgimages == 0
                 # presence of clone_targets means native code cache
                 @debug "Rejecting cache file $cachefile for $modkey since it would require usage of pkgimage"
+                record_reason(reasons, "native code caching disabled")
                 return true
             end
-            if !check_clone_targets(clone_targets)
-                @debug "Rejecting cache file $cachefile for $modkey since pkgimage can't be loaded on this target"
+            rejection_reasons = check_clone_targets(clone_targets)
+            if !isnothing(rejection_reasons)
+                @debug("Rejecting cache file $cachefile for $modkey:",
+                    Reasons=rejection_reasons,
+                    var"Image Targets"=parse_image_targets(clone_targets),
+                    var"Current Targets"=current_image_targets())
+                record_reason(reasons, "different system or CPU target")
                 return true
             end
             if !isfile(ocachefile)
                 @debug "Rejecting cache file $cachefile for $modkey since pkgimage $ocachefile was not found"
+                record_reason(reasons, "native code cache file not found")
                 return true
             end
         else
@@ -2962,12 +4261,15 @@ end
         id = first(modules)
         if id.first != modkey && modkey != PkgId("")
             @debug "Rejecting cache file $cachefile for $modkey since it is for $id instead"
+            record_reason(reasons, "different package identifier")
             return true
         end
+        id_build = id.second
+        id_build = (UInt128(checksum) << 64) | (id_build % UInt64)
         if build_id != UInt128(0)
-            id_build = (UInt128(checksum) << 64) | id.second
             if id_build != build_id
-                @debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it is does not provide desired build_id ($((UUID(build_id))))"
+                @debug "Ignoring cache file $cachefile for $modkey ($(UUID(id_build))) since it does not provide desired build_id ($((UUID(build_id))))"
+                record_reason(reasons, "different build identifier")
                 return true
             end
         end
@@ -2979,93 +4281,95 @@ end
         depmods = Vector{Any}(undef, ndeps)
         for i in 1:ndeps
             req_key, req_build_id = required_modules[i]
-            # Module is already loaded
-            if root_module_exists(req_key)
-                M = root_module(req_key)
+            # Check if module is already loaded
+            M = stalecheck ? nothing : maybe_loaded_precompile(req_key, req_build_id)
+            if M !== nothing
+                @assert PkgId(M) == req_key && module_build_id(M) === req_build_id
+                depmods[i] = M
+                continue
+            end
+            M = maybe_root_module(req_key)
+            if M isa Module
                 if PkgId(M) == req_key && module_build_id(M) === req_build_id
                     depmods[i] = M
-                elseif ignore_loaded
+                    continue
+                elseif M == Core
+                    @debug "Rejecting cache file $cachefile because it was made with a different julia version"
+                    record_reason(reasons, "different Julia version")
+                    return true # Won't be able to fulfill dependency
+                elseif ignore_loaded || !stalecheck
                     # Used by Pkg.precompile given that there it's ok to precompile different versions of loaded packages
-                    @goto locate_branch
                 else
                     @debug "Rejecting cache file $cachefile because module $req_key is already loaded and incompatible."
+                    record_reason(reasons, "different dependency version already loaded")
                     return true # Won't be able to fulfill dependency
                 end
-            else
-                @label locate_branch
-                path = locate_package(req_key)
-                if path === nothing
-                    @debug "Rejecting cache file $cachefile because dependency $req_key not found."
-                    return true # Won't be able to fulfill dependency
-                end
-                depmods[i] = (path, req_key, req_build_id)
             end
+            spec = locate_package_load_spec(req_key) # TODO: add env and/or skip this when stalecheck is false
+            if spec === nothing
+                @debug "Rejecting cache file $cachefile because dependency $req_key not found."
+                record_reason(reasons, "dependency source file not found")
+                return true # Won't be able to fulfill dependency
+            end
+            depmods[i] = (spec, req_key, req_build_id)
         end
 
         # check if this file is going to provide one of our concrete dependencies
         # or if it provides a version that conflicts with our concrete dependencies
         # or neither
-        skip_timecheck = false
-        for (req_key, req_build_id) in _concrete_dependencies
-            build_id = get(modules, req_key, UInt64(0))
-            if build_id !== UInt64(0)
-                build_id |= UInt128(checksum) << 64
-                if build_id === req_build_id
-                    skip_timecheck = true
-                    break
+        if stalecheck
+            for (req_key, req_build_id) in _concrete_dependencies
+                build_id = get(modules, req_key, UInt64(0))
+                if build_id !== UInt64(0)
+                    build_id |= UInt128(checksum) << 64
+                    if build_id === req_build_id
+                        stalecheck = false
+                        break
+                    end
+                    @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
+                    record_reason(reasons, "different dependency build identifier")
+                    return true # cachefile doesn't provide the required version of the dependency
                 end
-                @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
-                return true # cachefile doesn't provide the required version of the dependency
             end
         end
 
-        # now check if this file is fresh relative to its source files
-        if !skip_timecheck
-            if !samefile(includes[1].filename, modpath) && !samefile(fixup_stdlib_path(includes[1].filename), modpath)
-                @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath"
-                return true # cache file was compiled from a different path
+        # now check if this file's content hash has changed relative to its source files
+        if stalecheck
+            if !samefile(includes[1].filename, modspec.path)
+                # In certain cases the path rewritten by `fixup_stdlib_path` may
+                # point to an unreadable directory, make sure we can `stat` the
+                # file before comparing it with `modpath`.
+                stdlib_path = fixup_stdlib_path(includes[1].filename)
+                if !(isreadable(stdlib_path) && samefile(stdlib_path, modspec.path))
+                    @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath"
+                    record_reason(reasons, "different source file path")
+                    return true # cache file was compiled from a different path
+                end
             end
             for (modkey, req_modkey) in requires
                 # verify that `require(modkey, name(req_modkey))` ==> `req_modkey`
-                if identify_package(modkey, req_modkey.name) != req_modkey
-                    @debug "Rejecting cache file $cachefile because uuid mapping for $modkey => $req_modkey has changed"
+                pkg = identify_package(modkey, req_modkey.name)
+                if pkg != req_modkey
+                    @debug "Rejecting cache file $cachefile because uuid mapping for $modkey => $req_modkey has changed, expected $modkey => $(repr("text/plain", pkg))"
+                    record_reason(reasons, "dependency identifier changed")
                     return true
                 end
             end
-            for chi in includes
-                f, ftime_req = chi.filename, chi.mtime
-                if !ispath(f)
-                    _f = fixup_stdlib_path(f)
-                    if isfile(_f) && startswith(_f, Sys.STDLIB)
-                        # mtime is changed by extraction
-                        @debug "Skipping mtime check for file $f used by $cachefile, since it is a stdlib"
-                        continue
-                    end
-                    @debug "Rejecting stale cache file $cachefile because file $f does not exist"
-                    return true
-                end
-                ftime = mtime(f)
-                is_stale = ( ftime != ftime_req ) &&
-                           ( ftime != floor(ftime_req) ) &&           # Issue #13606, PR #13613: compensate for Docker images rounding mtimes
-                           ( ftime != ceil(ftime_req) ) &&            # PR: #47433 Compensate for CirceCI's truncating of timestamps in its caching
-                           ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds
-                           ( ftime != 1.0 )  &&                       # PR #43090: provide compatibility with Nix mtime.
-                           !( 0 < (ftime_req - ftime) < 1e-6 )        # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond
-                if is_stale
-                    @debug "Rejecting stale cache file $cachefile (mtime $ftime_req) because file $f (mtime $ftime) has changed"
-                    return true
-                end
+            if any_includes_stale(includes, cachefile, reasons)
+                return true
             end
         end
 
         if !isvalid_file_crc(io)
             @debug "Rejecting cache file $cachefile because it has an invalid checksum"
+            record_reason(reasons, "cache file checksum is invalid")
             return true
         end
 
         if pkgimage
             if !isvalid_pkgimage_crc(io, ocachefile::String)
                 @debug "Rejecting cache file $cachefile because $ocachefile has an invalid checksum"
+                record_reason(reasons, "native code cache checksum is invalid")
                 return true
             end
         end
@@ -3073,10 +4377,11 @@ end
         curr_prefs_hash = get_preferences_hash(id.uuid, prefs)
         if prefs_hash != curr_prefs_hash
             @debug "Rejecting cache file $cachefile because preferences hash does not match 0x$(string(prefs_hash, base=16)) != 0x$(string(curr_prefs_hash, base=16))"
+            record_reason(reasons, "package preferences changed")
             return true
         end
 
-        return depmods, ocachefile # fresh cachefile
+        return depmods, ocachefile, id_build # fresh cachefile
     finally
         close(io)
     end
@@ -3098,9 +4403,32 @@ end
 """
     @__DIR__ -> String
 
-Expand to a string with the absolute path to the directory of the file
-containing the macrocall.
-Return the current working directory if run from a REPL or if evaluated by `julia -e <expr>`.
+Macro to obtain the absolute path of the current directory as a string.
+
+If in a script, returns the directory of the script containing the `@__DIR__` macrocall. If run from a
+REPL or if evaluated by `julia -e <expr>`, returns the current working directory.
+
+# Examples
+
+The example illustrates the difference in the behaviors of `@__DIR__` and `pwd()`, by creating
+a simple script in a different directory than the current working one and executing both commands:
+
+```julia-repl
+julia> cd("/home/JuliaUser") # working directory
+
+julia> # create script at /home/JuliaUser/Projects
+       open("/home/JuliaUser/Projects/test.jl","w") do io
+           print(io, \"\"\"
+               println("@__DIR__ = ", @__DIR__)
+               println("pwd() = ", pwd())
+           \"\"\")
+       end
+
+julia> # outputs script directory and current working directory
+       include("/home/JuliaUser/Projects/test.jl")
+@__DIR__ = /home/JuliaUser/Projects
+pwd() = /home/JuliaUser
+```
 """
 macro __DIR__()
     __source__.file === nothing && return nothing
@@ -3108,6 +4436,31 @@ macro __DIR__()
     return isempty(_dirname) ? pwd() : abspath(_dirname)
 end
 
+function prepare_compiler_stub_image!()
+    ccall(:jl_add_to_module_init_list, Cvoid, (Any,), Compiler)
+    register_root_module(Compiler)
+    filter!(mod->mod !== Compiler, loaded_modules_order)
+end
+
+function expand_compiler_path(tup)
+    (tup[1], joinpath(Sys.BINDIR, DATAROOTDIR, tup[2]), tup[3:end]...)
+end
+compiler_chi(tup::Tuple) = CacheHeaderIncludes(expand_compiler_path(tup))
+
+"""
+    isprecompilable(f, argtypes::Tuple{Vararg{Any}})
+
+Check, as far as is possible without actually compiling, if the given
+function `f` can be compiled for the argument tuple (of types) `argtypes`.
+"""
+function isprecompilable(@nospecialize(f), @nospecialize(argtypes::Tuple))
+    isprecompilable(Tuple{Core.Typeof(f), argtypes...})
+end
+
+function isprecompilable(@nospecialize(argt::Type))
+    ccall(:jl_is_compilable, Int32, (Any,), argt) != 0
+end
+
 """
     precompile(f, argtypes::Tuple{Vararg{Any}})
 
@@ -3127,8 +4480,8 @@ function precompile(@nospecialize(argt::Type))
 end
 
 # Variants that work for `invoke`d calls for which the signature may not be sufficient
-precompile(mi::Core.MethodInstance, world::UInt=get_world_counter()) =
-    (ccall(:jl_compile_method_instance, Cvoid, (Any, Any, UInt), mi, C_NULL, world); return true)
+precompile(mi::MethodInstance, world::UInt=get_world_counter()) =
+    (ccall(:jl_compile_method_instance, Cvoid, (Any, Ptr{Cvoid}, UInt), mi, C_NULL, world); return true)
 
 """
     precompile(f, argtypes::Tuple{Vararg{Any}}, m::Method)
@@ -3143,11 +4496,11 @@ end
 
 function precompile(@nospecialize(argt::Type), m::Method)
     atype, sparams = ccall(:jl_type_intersection_with_env, Any, (Any, Any), argt, m.sig)::SimpleVector
-    mi = Core.Compiler.specialize_method(m, atype, sparams)
+    mi = Base.Compiler.specialize_method(m, atype, sparams)
     return precompile(mi)
 end
 
-precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing))
-precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String))
-precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), IO, IO))
-precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), IO, IO))
+precompile(include_package_for_output, (PkgId, String, VersionNumber, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing)) || @assert false
+precompile(include_package_for_output, (PkgId, String, VersionNumber, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String)) || @assert false
+precompile(create_expr_cache, (PkgId, PkgLoadSpec, String, String, typeof(_concrete_dependencies), Cmd, CacheFlags, IO, IO)) || @assert false
+precompile(create_expr_cache, (PkgId, PkgLoadSpec, String, Nothing, typeof(_concrete_dependencies), Cmd, CacheFlags, IO, IO)) || @assert false
diff --git a/base/lock.jl b/base/lock.jl
index 1663a765111bb..627b9ddad3122 100644
--- a/base/lock.jl
+++ b/base/lock.jl
@@ -2,16 +2,31 @@
 
 const ThreadSynchronizer = GenericCondition{Threads.SpinLock}
 
+"""
+    current_task()
+
+Get the currently running [`Task`](@ref).
+"""
+current_task() = ccall(:jl_get_current_task, Ref{Task}, ())
+
+# This bit is set in the `havelock` of a `ReentrantLock` when that lock is locked by some task.
+const LOCKED_BIT = 0b01
+# This bit is set in the `havelock` of a `ReentrantLock` just before parking a task. A task is being
+# parked if it wants to lock the lock, but it is currently being held by some other task.
+const PARKED_BIT = 0b10
+
+const MAX_SPIN_ITERS = 40
+
 # Advisory reentrant lock
 """
     ReentrantLock()
 
-Creates a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can
+Create a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can
 acquire the lock as many times as required (this is what the "Reentrant" part
 of the name means). Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
 
-Calling 'lock' will also inhibit running of finalizers on that thread until the
-corresponding 'unlock'. Use of the standard lock pattern illustrated below
+Calling `lock` will also inhibit running of finalizers on that thread until the
+corresponding `unlock`. Use of the standard lock pattern illustrated below
 should naturally be supported, but beware of inverting the try/lock order or
 missing the try block entirely (e.g. attempting to return with the lock still
 held):
@@ -36,7 +51,28 @@ mutable struct ReentrantLock <: AbstractLock
     # offset32 = 20, offset64 = 24
     reentrancy_cnt::UInt32
     # offset32 = 24, offset64 = 28
-    @atomic havelock::UInt8 # 0x0 = none, 0x1 = lock, 0x2 = conflict
+    #
+    # This atomic integer holds the current state of the lock instance. Only the two lowest bits
+    # are used. See `LOCKED_BIT` and `PARKED_BIT` for the bitmask for these bits.
+    #
+    # # State table:
+    #
+    # PARKED_BIT | LOCKED_BIT | Description
+    #     0      |     0      | The lock is not locked, nor is anyone waiting for it.
+    # -----------+------------+------------------------------------------------------------------
+    #     0      |     1      | The lock is locked by exactly one task. No other task is
+    #            |            | waiting for it.
+    # -----------+------------+------------------------------------------------------------------
+    #     1      |     0      | The lock is not locked. One or more tasks are parked.
+    # -----------+------------+------------------------------------------------------------------
+    #     1      |     1      | The lock is locked by exactly one task. One or more tasks are
+    #            |            | parked waiting for the lock to become available.
+    #            |            | In this state, PARKED_BIT is only ever cleared when the cond_wait lock
+    #            |            | is held (i.e. on unlock). This ensures that
+    #            |            | we never end up in a situation where there are parked tasks but
+    #            |            | PARKED_BIT is not set (which would result in those tasks
+    #            |            | potentially never getting woken up).
+    @atomic havelock::UInt8
     # offset32 = 28, offset64 = 32
     cond_wait::ThreadSynchronizer # 2 words
     # offset32 = 36, offset64 = 48
@@ -51,6 +87,20 @@ end
 
 assert_havelock(l::ReentrantLock) = assert_havelock(l, l.locked_by)
 
+show(io::IO, ::ReentrantLock) = print(io, ReentrantLock, "()")
+
+function show(io::IO, ::MIME"text/plain", l::ReentrantLock)
+    show(io, l)
+    if !(get(io, :compact, false)::Bool)
+        locked_by = l.locked_by
+        if locked_by isa Task
+            print(io, " (locked by ", locked_by === current_task() ? "current " : "", locked_by, ")")
+        else
+            print(io, " (unlocked)")
+        end
+    end
+end
+
 """
     islocked(lock) -> Status (Boolean)
 
@@ -91,7 +141,7 @@ function islocked end
 # `ReentrantLock`.
 
 function islocked(rl::ReentrantLock)
-    return (@atomic :monotonic rl.havelock) != 0
+    return (@atomic :monotonic rl.havelock) & LOCKED_BIT != 0
 end
 
 """
@@ -115,7 +165,6 @@ function trylock end
 @inline function trylock(rl::ReentrantLock)
     ct = current_task()
     if rl.locked_by === ct
-        #@assert rl.havelock !== 0x00
         rl.reentrancy_cnt += 0x0000_0001
         return true
     end
@@ -123,9 +172,8 @@ function trylock end
 end
 @noinline function _trylock(rl::ReentrantLock, ct::Task)
     GC.disable_finalizers()
-    if (@atomicreplace :acquire rl.havelock 0x00 => 0x01).success
-        #@assert rl.locked_by === nothing
-        #@assert rl.reentrancy_cnt === 0
+    state = (@atomic :monotonic rl.havelock) & PARKED_BIT
+    if (@atomicreplace :acquire rl.havelock state => (state | LOCKED_BIT)).success
         rl.reentrancy_cnt = 0x0000_0001
         @atomic :release rl.locked_by = ct
         return true
@@ -145,24 +193,71 @@ Each `lock` must be matched by an [`unlock`](@ref).
 """
 @inline function lock(rl::ReentrantLock)
     trylock(rl) || (@noinline function slowlock(rl::ReentrantLock)
+        Threads.lock_profiling() && Threads.inc_lock_conflict_count()
         c = rl.cond_wait
-        lock(c.lock)
-        try
-            while true
-                if (@atomicreplace rl.havelock 0x01 => 0x02).old == 0x00 # :sequentially_consistent ? # now either 0x00 or 0x02
-                    # it was unlocked, so try to lock it ourself
-                    _trylock(rl, current_task()) && break
-                else # it was locked, so now wait for the release to notify us
-                    wait(c)
+        ct = current_task()
+        iteration = 1
+        while true
+            state = @atomic :monotonic rl.havelock
+            # Grab the lock if it isn't locked, even if there is a queue on it
+            if state & LOCKED_BIT == 0
+                GC.disable_finalizers()
+                result = (@atomicreplace :acquire :monotonic rl.havelock state => (state | LOCKED_BIT))
+                if result.success
+                    rl.reentrancy_cnt = 0x0000_0001
+                    @atomic :release rl.locked_by = ct
+                    return
                 end
+                GC.enable_finalizers()
+                continue
             end
-        finally
-            unlock(c.lock)
+
+            if state & PARKED_BIT == 0
+                # If there is no queue, try spinning a few times
+                if iteration <= MAX_SPIN_ITERS
+                    Base.yield()
+                    iteration += 1
+                    continue
+                end
+
+                # If still not locked, try setting the parked bit
+                @atomicreplace :monotonic :monotonic rl.havelock state => (state | PARKED_BIT)
+            end
+
+            # lock the `cond_wait`
+            lock(c.lock)
+
+            # Last check before we wait to make sure `unlock` did not win the race
+            # to the `cond_wait` lock and cleared the parked bit
+            state = @atomic :acquire rl.havelock
+            if state != LOCKED_BIT | PARKED_BIT
+                unlock(c.lock)
+                continue
+            end
+
+            # It was locked, so now wait for the unlock to notify us
+            wait_no_relock(c)
+
+            # Loop back and try locking again
+            iteration = 1
         end
     end)(rl)
     return
 end
 
+function wait_no_relock(c::GenericCondition)
+    ct = current_task()
+    _wait2(c, ct)
+    token = unlockall(c.lock)
+    try
+        return wait()
+    catch
+        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        rethrow()
+    end
+end
+
+
 """
     unlock(lock)
 
@@ -179,18 +274,27 @@ internal counter and return immediately.
         rl.reentrancy_cnt = n
         if n == 0x0000_00000
             @atomic :monotonic rl.locked_by = nothing
-            if (@atomicswap :release rl.havelock = 0x00) == 0x02
+            result = (@atomicreplace :release :monotonic rl.havelock LOCKED_BIT => 0x00)
+            if result.success
+                return true
+            else
                 (@noinline function notifywaiters(rl)
                     cond_wait = rl.cond_wait
                     lock(cond_wait)
-                    try
-                        notify(cond_wait)
-                    finally
-                        unlock(cond_wait)
+
+                    notify(cond_wait, all=false)
+                    if !isempty(cond_wait.waitq)
+                        @atomic :release rl.havelock = PARKED_BIT
+                    else
+                        # We may have won the race to the `cond_wait` lock as a task was about to park
+                        # but we unlock anyway as any parking task will retry
+                        @atomic :release rl.havelock = 0x00
                     end
+
+                    unlock(cond_wait)
                 end)(rl)
+                return true
             end
-            return true
         end
         return false
     end)(rl) && GC.enable_finalizers()
@@ -220,6 +324,8 @@ available.
 When this function returns, the `lock` has been released, so the caller should
 not attempt to `unlock` it.
 
+See also: [`@lock`](@ref).
+
 !!! compat "Julia 1.7"
     Using a [`Channel`](@ref) as the second argument requires Julia 1.7 or later.
 """
@@ -258,6 +364,9 @@ end
 ```
 This is similar to using [`lock`](@ref) with a `do` block, but avoids creating a closure
 and thus can improve the performance.
+
+!!! compat
+    `@lock` was added in Julia 1.3, and exported in Julia 1.7.
 """
 macro lock(l, expr)
     quote
@@ -288,6 +397,63 @@ macro lock_nofail(l, expr)
     end
 end
 
+"""
+    Lockable(value, lock = ReentrantLock())
+
+Create a `Lockable` object that wraps `value` and
+associates it with the provided `lock`. This object
+supports [`@lock`](@ref), [`lock`](@ref), [`trylock`](@ref),
+[`unlock`](@ref). To access the value, index the lockable object while
+holding the lock.
+
+!!! compat "Julia 1.11"
+    Requires at least Julia 1.11.
+
+## Example
+
+```jldoctest
+julia> locked_list = Base.Lockable(Int[]);
+
+julia> @lock(locked_list, push!(locked_list[], 1)) # must hold the lock to access the value
+1-element Vector{Int64}:
+ 1
+
+julia> lock(summary, locked_list)
+"1-element Vector{Int64}"
+```
+"""
+struct Lockable{T, L <: AbstractLock}
+    value::T
+    lock::L
+end
+
+Lockable(value) = Lockable(value, ReentrantLock())
+getindex(l::Lockable) = (assert_havelock(l.lock); l.value)
+
+"""
+    lock(f::Function, l::Lockable)
+
+Acquire the lock associated with `l`, execute `f` with the lock held,
+and release the lock when `f` returns. `f` will receive one positional
+argument: the value wrapped by `l`. If the lock is already locked by a
+different task/thread, wait for it to become available.
+When this function returns, the `lock` has been released, so the caller should
+not attempt to `unlock` it.
+
+!!! compat "Julia 1.11"
+    Requires at least Julia 1.11.
+"""
+function lock(f, l::Lockable)
+    lock(l.lock) do
+        f(l.value)
+    end
+end
+
+# implement the rest of the Lock interface on Lockable
+lock(l::Lockable) = lock(l.lock)
+trylock(l::Lockable) = trylock(l.lock)
+unlock(l::Lockable) = unlock(l.lock)
+
 @eval Threads begin
     """
         Threads.Condition([lock])
@@ -395,6 +561,36 @@ function acquire(f, s::Semaphore)
     end
 end
 
+"""
+    Base.@acquire s::Semaphore expr
+
+Macro version of `Base.acquire(f, s::Semaphore)` but with `expr` instead of `f` function.
+Expands to:
+```julia
+Base.acquire(s)
+try
+    expr
+finally
+    Base.release(s)
+end
+```
+This is similar to using [`acquire`](@ref) with a `do` block, but avoids creating a closure.
+
+!!! compat "Julia 1.13"
+    `Base.@acquire` was added in Julia 1.13
+"""
+macro acquire(s, expr)
+    quote
+        local temp = $(esc(s))
+        Base.acquire(temp)
+        try
+            $(esc(expr))
+        finally
+            Base.release(temp)
+        end
+    end
+end
+
 """
     release(s::Semaphore)
 
@@ -435,8 +631,8 @@ This provides an acquire & release memory ordering on notify/wait.
     The `autoreset` functionality and memory ordering guarantee requires at least Julia 1.8.
 """
 mutable struct Event
-    notify::Threads.Condition
-    autoreset::Bool
+    const notify::Threads.Condition
+    const autoreset::Bool
     @atomic set::Bool
     Event(autoreset::Bool=false) = new(Threads.Condition(), autoreset, false)
 end
@@ -493,3 +689,303 @@ end
     import .Base: Event
     export Event
 end
+
+const PerStateInitial       = 0x00
+const PerStateHasrun        = 0x01
+const PerStateErrored       = 0x02
+const PerStateConcurrent    = 0x03
+
+"""
+    OncePerProcess{T}(init::Function)() -> T
+
+Calling a `OncePerProcess` object returns a value of type `T` by running the
+function `initializer` exactly once per process. All concurrent and future
+calls in the same process will return exactly the same value. This is useful in
+code that will be precompiled, as it allows setting up caches or other state
+which won't get serialized.
+
+!!! compat "Julia 1.12"
+    This type requires Julia 1.12 or later.
+
+## Example
+
+```jldoctest
+julia> const global_state = Base.OncePerProcess{Vector{UInt32}}() do
+           println("Making lazy global value...done.")
+           return [Libc.rand()]
+       end;
+
+julia> (procstate = global_state()) |> typeof
+Making lazy global value...done.
+Vector{UInt32} (alias for Array{UInt32, 1})
+
+julia> procstate === global_state()
+true
+
+julia> procstate === fetch(@async global_state())
+true
+```
+"""
+mutable struct OncePerProcess{T, F} <: Function
+    value::Union{Nothing,T}
+    @atomic state::UInt8 # 0=initial, 1=hasrun, 2=error
+    @atomic allow_compile_time::Bool
+    const initializer::F
+    const lock::ReentrantLock
+
+    function OncePerProcess{T,F}(initializer::F) where {T, F}
+        once = new{T,F}(nothing, PerStateInitial, true, initializer, ReentrantLock())
+        return once
+    end
+end
+OncePerProcess{T}(initializer::Type{U}) where {T, U} = OncePerProcess{T, Type{U}}(initializer)
+OncePerProcess{T}(initializer::F) where {T, F} = OncePerProcess{T, F}(initializer)
+OncePerProcess(initializer::Type{U}) where U = OncePerProcess{Base.promote_op(initializer), Type{U}}(initializer)
+OncePerProcess(initializer) = OncePerProcess{Base.promote_op(initializer), typeof(initializer)}(initializer)
+@inline function (once::OncePerProcess{T,F})() where {T,F}
+    state = (@atomic :acquire once.state)
+    if state != PerStateHasrun
+        (@noinline function init_perprocesss(once::OncePerProcess{T,F}, state::UInt8) where {T,F}
+            state == PerStateErrored && error("OncePerProcess initializer failed previously")
+            once.allow_compile_time || __precompile__(false)
+            lock(once.lock)
+            try
+                state = @atomic :monotonic once.state
+                if state == PerStateInitial
+                    ccall(:jl_set_precompile_field_replace, Cvoid, (Any, Any, Any),
+                        once, :value, nothing)
+                    ccall(:jl_set_precompile_field_replace, Cvoid, (Any, Any, Any),
+                        once, :state, PerStateInitial)
+                    once.value = once.initializer()
+                elseif state == PerStateErrored
+                    error("OncePerProcess initializer failed previously")
+                elseif state != PerStateHasrun
+                    error("invalid state for OncePerProcess")
+                end
+            catch
+                state == PerStateErrored || @atomic :release once.state = PerStateErrored
+                unlock(once.lock)
+                rethrow()
+            end
+            state == PerStateHasrun || @atomic :release once.state = PerStateHasrun
+            unlock(once.lock)
+            nothing
+        end)(once, state)
+    end
+    return once.value::T
+end
+
+function copyto_monotonic!(dest::AtomicMemory, src)
+    i = 1
+    for j in eachindex(src)
+        if isassigned(src, j)
+            @atomic :monotonic dest[i] = src[j]
+        #else
+        #    _unsetindex_atomic!(dest, i, src[j], :monotonic)
+        end
+        i += 1
+    end
+    dest
+end
+
+function fill_monotonic!(dest::AtomicMemory, x)
+    for i = 1:length(dest)
+        @atomic :monotonic dest[i] = x
+    end
+    dest
+end
+
+
+# share a lock/condition, since we just need it briefly, so some contention is okay
+const PerThreadLock = Threads.SpinLock()
+"""
+    OncePerThread{T}(init::Function)() -> T
+
+Calling a `OncePerThread` object returns a value of type `T` by running the function
+`initializer` exactly once per thread. All future calls in the same thread, and
+concurrent or future calls with the same thread id, will return exactly the
+same value. The object can also be indexed by the threadid for any existing
+thread, to get (or initialize *on this thread*) the value stored for that
+thread. Incorrect usage can lead to data-races or memory corruption so use only
+if that behavior is correct within your library's threading-safety design.
+
+!!! warning
+    It is not necessarily true that a Task only runs on one thread, therefore the value
+    returned here may alias other values or change in the middle of your program. This function
+    may get deprecated in the future. If initializer yields, the thread running the current
+    task after the call might not be the same as the one at the start of the call.
+
+See also: [`OncePerTask`](@ref).
+
+!!! compat "Julia 1.12"
+    This type requires Julia 1.12 or later.
+
+## Example
+
+```jldoctest
+julia> const thread_state = Base.OncePerThread{Vector{UInt32}}() do
+           println("Making lazy thread value...done.")
+           return [Libc.rand()]
+       end;
+
+julia> (threadvec = thread_state()) |> typeof
+Making lazy thread value...done.
+Vector{UInt32} (alias for Array{UInt32, 1})
+
+julia> threadvec === fetch(@async thread_state())
+true
+
+julia> threadvec === thread_state[Threads.threadid()]
+true
+```
+"""
+mutable struct OncePerThread{T, F} <: Function
+    @atomic xs::AtomicMemory{T} # values
+    @atomic ss::AtomicMemory{UInt8} # states: 0=initial, 1=hasrun, 2=error, 3==concurrent
+    const initializer::F
+
+    function OncePerThread{T,F}(initializer::F) where {T, F}
+        xs, ss = AtomicMemory{T}(), AtomicMemory{UInt8}()
+        once = new{T,F}(xs, ss, initializer)
+        return once
+    end
+end
+OncePerThread{T}(initializer::Type{U}) where {T, U} = OncePerThread{T,Type{U}}(initializer)
+OncePerThread{T}(initializer::F) where {T, F} = OncePerThread{T,F}(initializer)
+OncePerThread(initializer::Type{U}) where U = OncePerThread{Base.promote_op(initializer), Type{U}}(initializer)
+OncePerThread(initializer) = OncePerThread{Base.promote_op(initializer), typeof(initializer)}(initializer)
+@inline (once::OncePerThread{T,F})() where {T,F} = once[Threads.threadid()]
+@inline function getindex(once::OncePerThread{T,F}, tid::Integer) where {T,F}
+    tid = Int(tid)
+    ss = @atomic :acquire once.ss
+    xs = @atomic :monotonic once.xs
+    # n.b. length(xs) >= length(ss)
+    if tid <= 0 || tid > length(ss) || (@atomic :acquire ss[tid]) != PerStateHasrun
+        (@noinline function init_perthread(once::OncePerThread{T,F}, tid::Int) where {T,F}
+            local ss = @atomic :acquire once.ss
+            local xs = @atomic :monotonic once.xs
+            local len = length(ss)
+            # slow path to allocate it
+            nt = Threads.maxthreadid()
+            0 < tid <= nt || throw(ArgumentError("thread id outside of allocated range"))
+            if tid <= length(ss) && (@atomic :acquire ss[tid]) == PerStateErrored
+                error("OncePerThread initializer failed previously")
+            end
+            newxs = xs
+            newss = ss
+            if tid > len
+                # attempt to do all allocations outside of PerThreadLock for better scaling
+                @assert length(xs) >= length(ss) "logical constraint violation"
+                newxs = typeof(xs)(undef, len + nt)
+                newss = typeof(ss)(undef, len + nt)
+            end
+            # uses state and locks to ensure this runs exactly once per tid argument
+            lock(PerThreadLock)
+            try
+                ss = @atomic :monotonic once.ss
+                xs = @atomic :monotonic once.xs
+                if tid > length(ss)
+                    if length(ss) == 0 # We are the first to initialize
+                        ccall(:jl_set_precompile_field_replace, Cvoid, (Any, Any, Any),
+                            once, :xs, xs)
+                        ccall(:jl_set_precompile_field_replace, Cvoid, (Any, Any, Any),
+                            once, :ss, ss)
+                    end
+                    @assert len <= length(ss) <= length(newss) "logical constraint violation"
+                    fill_monotonic!(newss, PerStateInitial)
+                    xs = copyto_monotonic!(newxs, xs)
+                    ss = copyto_monotonic!(newss, ss)
+                    @atomic :release once.xs = xs
+                    @atomic :release once.ss = ss
+                end
+                state = @atomic :monotonic ss[tid]
+                while state == PerStateConcurrent
+                    # lost race, wait for notification this is done running elsewhere
+                    # without releasing this thread
+                    unlock(PerThreadLock)
+                    while state == PerStateConcurrent
+                        # spin loop until ready
+                        ss = @atomic :acquire once.ss
+                        state = @atomic :monotonic ss[tid]
+                        GC.safepoint()
+                    end
+                    lock(PerThreadLock)
+                    ss = @atomic :monotonic once.ss
+                    state = @atomic :monotonic ss[tid]
+                end
+                if state == PerStateInitial
+                    # won the race, drop lock in exchange for state, and run user initializer
+                    @atomic :monotonic ss[tid] = PerStateConcurrent
+                    result = try
+                        unlock(PerThreadLock)
+                        once.initializer()
+                    catch
+                        lock(PerThreadLock)
+                        ss = @atomic :monotonic once.ss
+                        @atomic :release ss[tid] = PerStateErrored
+                        rethrow()
+                    end
+                    # store result and notify waiters
+                    lock(PerThreadLock)
+                    xs = @atomic :monotonic once.xs
+                    @atomic :release xs[tid] = result
+                    ss = @atomic :monotonic once.ss
+                    @atomic :release ss[tid] = PerStateHasrun
+                elseif state == PerStateErrored
+                    error("OncePerThread initializer failed previously")
+                elseif state != PerStateHasrun
+                    error("invalid state for OncePerThread")
+                end
+            finally
+                unlock(PerThreadLock)
+            end
+            nothing
+        end)(once, tid)
+        xs = @atomic :monotonic once.xs
+    end
+    return xs[tid]
+end
+
+"""
+    OncePerTask{T}(init::Function)() -> T
+
+Calling a `OncePerTask` object returns a value of type `T` by running the function `initializer`
+exactly once per Task. All future calls in the same Task will return exactly the same value.
+
+See also: [`task_local_storage`](@ref).
+
+!!! compat "Julia 1.12"
+    This type requires Julia 1.12 or later.
+
+## Example
+
+```jldoctest
+julia> const task_state = Base.OncePerTask{Vector{UInt32}}() do
+           println("Making lazy task value...done.")
+           return [Libc.rand()]
+       end;
+
+julia> (taskvec = task_state()) |> typeof
+Making lazy task value...done.
+Vector{UInt32} (alias for Array{UInt32, 1})
+
+julia> taskvec === task_state()
+true
+
+julia> taskvec === fetch(@async task_state())
+Making lazy task value...done.
+false
+```
+"""
+mutable struct OncePerTask{T, F} <: Function
+    const initializer::F
+
+    OncePerTask{T}(initializer::Type{U}) where {T, U} = new{T,Type{U}}(initializer)
+    OncePerTask{T}(initializer::F) where {T, F} = new{T,F}(initializer)
+    OncePerTask{T,F}(initializer::F) where {T, F} = new{T,F}(initializer)
+    OncePerTask(initializer::Type{U}) where U = new{Base.promote_op(initializer), Type{U}}(initializer)
+    OncePerTask(initializer) = new{Base.promote_op(initializer), typeof(initializer)}(initializer)
+end
+@inline function (once::OncePerTask{T,F})() where {T,F}
+    get!(once.initializer, task_local_storage(), once)::T
+end
diff --git a/base/locks-mt.jl b/base/locks-mt.jl
index 5d355b9ed200c..237e0d9856996 100644
--- a/base/locks-mt.jl
+++ b/base/locks-mt.jl
@@ -3,7 +3,7 @@
 import .Base: unsafe_convert, lock, trylock, unlock, islocked, wait, notify, AbstractLock
 
 export SpinLock
-
+public PaddedSpinLock
 # Important Note: these low-level primitives defined here
 #   are typically not for general usage
 
@@ -12,33 +12,68 @@ export SpinLock
 ##########################################
 
 """
-    SpinLock()
+    abstract type AbstractSpinLock <: AbstractLock end
 
-Create a non-reentrant, test-and-test-and-set spin lock.
+A non-reentrant, test-and-test-and-set spin lock.
 Recursive use will result in a deadlock.
 This kind of lock should only be used around code that takes little time
 to execute and does not block (e.g. perform I/O).
 In general, [`ReentrantLock`](@ref) should be used instead.
 
 Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
-If [`!islocked(lck::SpinLock)`](@ref islocked) holds, [`trylock(lck)`](@ref trylock)
+If [`!islocked(lck::AbstractSpinLock)`](@ref islocked) holds, [`trylock(lck)`](@ref trylock)
 succeeds unless there are other tasks attempting to hold the lock "at the same time."
 
 Test-and-test-and-set spin locks are quickest up to about 30ish
 contending threads. If you have more contention than that, different
 synchronization approaches should be considered.
 """
-mutable struct SpinLock <: AbstractLock
+abstract type AbstractSpinLock <: AbstractLock end
+
+"""
+    SpinLock() <: AbstractSpinLock
+
+Spinlocks are not padded, and so may suffer from false sharing.
+See also [`PaddedSpinLock`](@ref).
+
+See the documentation for [`AbstractSpinLock`](@ref) regarding correct usage.
+"""
+mutable struct SpinLock <: AbstractSpinLock
     # we make this much larger than necessary to minimize false-sharing
     @atomic owned::Int
     SpinLock() = new(0)
 end
 
+# TODO: Determine the cache line size using e.g., CPUID. Meanwhile, this is correct for most
+# processors.
+const CACHE_LINE_SIZE = 64
+
+"""
+    PaddedSpinLock() <: AbstractSpinLock
+
+PaddedSpinLocks are padded so that each is guaranteed to be on its own cache line, to avoid
+false sharing.
+See also [`SpinLock`](@ref).
+
+See the documentation for [`AbstractSpinLock`](@ref) regarding correct usage.
+"""
+mutable struct PaddedSpinLock <: AbstractSpinLock
+    # we make this much larger than necessary to minimize false-sharing
+    _padding_before::NTuple{max(0, CACHE_LINE_SIZE - sizeof(Int)), UInt8}
+    @atomic owned::Int
+    _padding_after::NTuple{max(0, CACHE_LINE_SIZE - sizeof(Int)), UInt8}
+    function PaddedSpinLock()
+        l = new()
+        @atomic l.owned = 0
+        return l
+    end
+end
+
 # Note: this cannot assert that the lock is held by the correct thread, because we do not
 # track which thread locked it. Users beware.
-Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_violation()
+Base.assert_havelock(l::AbstractSpinLock) = islocked(l) ? nothing : Base.concurrency_violation()
 
-function lock(l::SpinLock)
+function lock(l::AbstractSpinLock)
     while true
         if @inline trylock(l)
             return
@@ -49,7 +84,7 @@ function lock(l::SpinLock)
     end
 end
 
-function trylock(l::SpinLock)
+function trylock(l::AbstractSpinLock)
     if l.owned == 0
         GC.disable_finalizers()
         p = @atomicswap :acquire l.owned = 1
@@ -61,7 +96,7 @@ function trylock(l::SpinLock)
     return false
 end
 
-function unlock(l::SpinLock)
+function unlock(l::AbstractSpinLock)
     if (@atomicswap :release l.owned = 0) == 0
         error("unlock count must match lock count")
     end
@@ -70,6 +105,6 @@ function unlock(l::SpinLock)
     return
 end
 
-function islocked(l::SpinLock)
+function islocked(l::AbstractSpinLock)
     return (@atomic :monotonic l.owned) != 0
 end
diff --git a/stdlib/Logging/src/ConsoleLogger.jl b/base/logging/ConsoleLogger.jl
similarity index 80%
rename from stdlib/Logging/src/ConsoleLogger.jl
rename to base/logging/ConsoleLogger.jl
index 747f8a2b22966..8666c145da6d7 100644
--- a/stdlib/Logging/src/ConsoleLogger.jl
+++ b/base/logging/ConsoleLogger.jl
@@ -9,6 +9,9 @@ interactive work with the Julia REPL.
 
 Log levels less than `min_level` are filtered out.
 
+This Logger is thread-safe, with locks for both orchestration of message
+limits i.e. `maxlog`, and writes to the stream.
+
 Message formatting can be controlled by setting keyword arguments:
 
 * `meta_formatter` is a function which takes the log event metadata
@@ -24,6 +27,7 @@ Message formatting can be controlled by setting keyword arguments:
 """
 struct ConsoleLogger <: AbstractLogger
     stream::IO
+    lock::ReentrantLock # do not log within this lock
     min_level::LogLevel
     meta_formatter
     show_limited::Bool
@@ -33,19 +37,19 @@ end
 function ConsoleLogger(stream::IO, min_level=Info;
                        meta_formatter=default_metafmt, show_limited=true,
                        right_justify=0)
-    ConsoleLogger(stream, min_level, meta_formatter,
+    ConsoleLogger(stream, ReentrantLock(), min_level, meta_formatter,
                   show_limited, right_justify, Dict{Any,Int}())
 end
 function ConsoleLogger(min_level=Info;
                        meta_formatter=default_metafmt, show_limited=true,
                        right_justify=0)
-    ConsoleLogger(closed_stream, min_level, meta_formatter,
+    ConsoleLogger(closed_stream, ReentrantLock(), min_level, meta_formatter,
                   show_limited, right_justify, Dict{Any,Int}())
 end
 
 
 shouldlog(logger::ConsoleLogger, level, _module, group, id) =
-    get(logger.message_limits, id, 1) > 0
+    @lock logger.lock get(logger.message_limits, id, 1) > 0
 
 min_enabled_level(logger::ConsoleLogger) = logger.min_level
 
@@ -109,19 +113,30 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
     hasmaxlog = haskey(kwargs, :maxlog) ? 1 : 0
     maxlog = get(kwargs, :maxlog, nothing)
     if maxlog isa Core.BuiltinInts
-        remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
-        logger.message_limits[id] = remaining - 1
-        remaining > 0 || return
+        @lock logger.lock begin
+            remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
+            remaining == 0 && return
+            logger.message_limits[id] = remaining - 1
+        end
     end
 
     # Generate a text representation of the message and all key value pairs,
-    # split into lines.
-    msglines = [(indent=0, msg=l) for l in split(chomp(convert(String, string(message))::String), '\n')]
+    # split into lines.  This is specialised to improve type inference,
+    # and reduce the risk of resulting method invalidations.
+    message = string(message)
+    msglines = if Base._isannotated(message) && !isempty(Base.annotations(message)::Vector{Base.RegionAnnotation})
+        message = Base.AnnotatedString(String(message)::String, Base.annotations(message)::Vector{Base.RegionAnnotation})
+        @NamedTuple{indent::Int, msg::Union{SubString{Base.AnnotatedString{String}}, SubString{String}}}[
+            (indent=0, msg=l) for l in split(chomp(message), '\n')]
+    else
+        [(indent=0, msg=l) for l in split(
+             chomp(convert(String, message)::String), '\n')]
+    end
     stream::IO = logger.stream
     if !(isopen(stream)::Bool)
         stream = stderr
     end
-    dsize = displaysize(stream)::Tuple{Int,Int}
+    dsize = Base.displaysize_(stream)::Tuple{Int,Int}
     nkwargs = length(kwargs)::Int
     if nkwargs > hasmaxlog
         valbuf = IOBuffer()
@@ -132,7 +147,7 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
         for (key, val) in kwargs
             key === :maxlog && continue
             showvalue(valio, val)
-            vallines = split(String(take!(valbuf)), '\n')
+            vallines = split(takestring!(valbuf), '\n')
             if length(vallines) == 1
                 push!(msglines, (indent=2, msg=SubString("$key = $(vallines[1])")))
             else
@@ -175,6 +190,7 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
         println(iob)
     end
 
-    write(stream, take!(buf))
+    b = take!(buf)
+    @lock logger.lock write(stream, b)
     nothing
 end
diff --git a/base/logging.jl b/base/logging/logging.jl
similarity index 87%
rename from base/logging.jl
rename to base/logging/logging.jl
index c42af08d8f4ae..25f4dbe4902be 100644
--- a/base/logging.jl
+++ b/base/logging/logging.jl
@@ -3,6 +3,7 @@
 module CoreLogging
 
 import Base: isless, +, -, convert, show
+import Base.ScopedValues: ScopedValue, with, @with
 
 export
     AbstractLogger,
@@ -59,7 +60,7 @@ function min_enabled_level end
     catch_exceptions(logger)
 
 Return `true` if the logger should catch exceptions which happen during log
-record construction.  By default, messages are caught
+record construction.  By default, messages are caught.
 
 By default all exceptions are caught to prevent log message generation from
 crashing the program.  This lets users confidently toggle little-used
@@ -116,7 +117,7 @@ filtered, before any other work is done to construct the log record data
 structure itself.
 
 # Examples
-```julia-repl
+```jldoctest; setup = :(import Logging)
 julia> Logging.LogLevel(0) == Logging.Info
 true
 ```
@@ -131,7 +132,13 @@ isless(a::LogLevel, b::LogLevel) = isless(a.level, b.level)
 +(level::LogLevel, inc::Integer) = LogLevel(level.level+inc)
 -(level::LogLevel, inc::Integer) = LogLevel(level.level-inc)
 convert(::Type{LogLevel}, level::Integer) = LogLevel(level)
+convert(::Type{Int32}, level::LogLevel) = level.level
 
+"""
+    BelowMinLevel
+
+Alias for [`LogLevel(-1_000_001)`](@ref LogLevel).
+"""
 const BelowMinLevel = LogLevel(-1000001)
 """
     Debug
@@ -157,8 +164,17 @@ const Warn          = LogLevel(    1000)
 Alias for [`LogLevel(2000)`](@ref LogLevel).
 """
 const Error         = LogLevel(    2000)
+"""
+    AboveMaxLevel
+
+Alias for [`LogLevel(1_000_001)`](@ref LogLevel).
+"""
 const AboveMaxLevel = LogLevel( 1000001)
 
+# Global log limiting mechanism for super fast but inflexible global log limiting.
+# Atomic ensures that the value is always consistent across threads.
+const _min_enabled_level = Threads.Atomic{Int32}(Debug)
+
 function show(io::IO, level::LogLevel)
     if     level == BelowMinLevel  print(io, "BelowMinLevel")
     elseif level == Debug          print(io, "Debug")
@@ -319,6 +335,15 @@ function issimplekw(@nospecialize val)
     return false
 end
 
+# helper function to get the current logger, if enabled for the specified message type
+@noinline Base.@constprop :none function current_logger_for_env(std_level::LogLevel, group, _module)
+    logstate = @inline current_logstate()
+    if std_level >= logstate.min_enabled_level || env_override_minlevel(group, _module)
+        return logstate.logger
+    end
+    return nothing
+end
+
 # Generate code for logging macros
 function logmsg_code(_module, file, line, level, message, exs...)
     @nospecialize
@@ -335,12 +360,12 @@ function logmsg_code(_module, file, line, level, message, exs...)
         checkerrors = nothing
         for kwarg in reverse(log_data.kwargs)
             if isa(kwarg.args[2].args[1], Symbol)
-                checkerrors = Expr(:if, Expr(:isdefined, kwarg.args[2]), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(kwarg.args[2].args[1])))
+                checkerrors = Expr(:if, Expr(:isdefined, kwarg.args[2]), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(kwarg.args[2].args[1]), QuoteNode(:local)))
             end
         end
         if isa(message, Symbol)
             message = esc(message)
-            checkerrors = Expr(:if, Expr(:isdefined, message), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(message.args[1])))
+            checkerrors = Expr(:if, Expr(:isdefined, message), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(message.args[1]), QuoteNode(:local)))
         end
         logrecord = quote
             let err = $checkerrors
@@ -349,7 +374,7 @@ function logmsg_code(_module, file, line, level, message, exs...)
                     kwargs = (;$(log_data.kwargs...))
                     true
                 else
-                    @invokelatest logging_error(logger, level, _module, group, id, file, line, err, false)
+                    @invokelatest $(logging_error)(logger, level, _module, group, id, file, line, err, false)
                     false
                 end
             end
@@ -361,7 +386,7 @@ function logmsg_code(_module, file, line, level, message, exs...)
                 kwargs = (;$(log_data.kwargs...))
                 true
             catch err
-                @invokelatest logging_error(logger, level, _module, group, id, file, line, err, true)
+                @invokelatest $(logging_error)(logger, level, _module, group, id, file, line, err, true)
                 false
             end
         end
@@ -370,25 +395,29 @@ function logmsg_code(_module, file, line, level, message, exs...)
         let
             level = $level
             # simplify std_level code emitted, if we know it is one of our global constants
-            std_level = $(level isa Symbol ? :level : :(level isa LogLevel ? level : convert(LogLevel, level)::LogLevel))
-            if std_level >= _min_enabled_level[]
+            std_level = $(level isa Symbol ? :level : :(level isa $LogLevel ? level : convert($LogLevel, level)::$LogLevel))
+            if std_level.level >= $(_min_enabled_level)[]
                 group = $(log_data._group)
                 _module = $(log_data._module)
-                logger = current_logger_for_env(std_level, group, _module)
+                logger = $(current_logger_for_env)(std_level, group, _module)
                 if !(logger === nothing)
                     id = $(log_data._id)
                     # Second chance at an early bail-out (before computing the message),
                     # based on arbitrary logger-specific logic.
-                    if invokelatest(shouldlog, logger, level, _module, group, id)
+                    if invokelatest($shouldlog, logger, level, _module, group, id)
                         file = $(log_data._file)
                         if file isa String
                             file = Base.fixup_stdlib_path(file)
                         end
                         line = $(log_data._line)
                         local msg, kwargs
-                        $(logrecord) && invokelatest(handle_message,
-                            logger, level, msg, _module, group, id, file, line;
-                            kwargs...)
+                        if $(logrecord)
+                            @assert @isdefined(msg) "Assertion to tell the compiler about the definedness of this variable"
+                            @assert @isdefined(kwargs) "Assertion to tell the compiler about the definedness of this variable"
+                            $handle_message_nothrow(
+                                logger, level, msg, _module, group, id, file, line;
+                                kwargs...)
+                        end
                     end
                 end
             end
@@ -397,6 +426,18 @@ function logmsg_code(_module, file, line, level, message, exs...)
     end
 end
 
+@noinline function handle_message_nothrow(logger, level, msg, _module, group, id, file, line; kwargs...)
+    @nospecialize
+    try
+        @invokelatest handle_message(
+            logger, level, msg, _module, group, id, file, line;
+            kwargs...)
+
+    catch err
+        @invokelatest logging_error(logger, level, _module, group, id, file, line, err, true)
+    end
+end
+
 function process_logmsg_exs(_orig_module, _file, _line, level, message, exs...)
     @nospecialize
     local _group, _id
@@ -481,9 +522,6 @@ function logmsg_shim(level, message, _module, group, id, file, line, kwargs)
     nothing
 end
 
-# Global log limiting mechanism for super fast but inflexible global log limiting.
-const _min_enabled_level = Ref{LogLevel}(Debug)
-
 # LogState - a cache of data extracted from the logger, plus the logger itself.
 struct LogState
     min_enabled_level::LogLevel
@@ -492,31 +530,14 @@ end
 
 LogState(logger) = LogState(LogLevel(_invoked_min_enabled_level(logger)), logger)
 
+const CURRENT_LOGSTATE = ScopedValue{LogState}()
+
 function current_logstate()
-    logstate = current_task().logstate
-    return (logstate !== nothing ? logstate : _global_logstate)::LogState
+    maybe = @inline Base.ScopedValues.get(CURRENT_LOGSTATE)
+    return something(maybe, _global_logstate)::LogState
 end
 
-# helper function to get the current logger, if enabled for the specified message type
-@noinline Base.@constprop :none function current_logger_for_env(std_level::LogLevel, group, _module)
-    logstate = current_logstate()
-    if std_level >= logstate.min_enabled_level || env_override_minlevel(group, _module)
-        return logstate.logger
-    end
-    return nothing
-end
-
-function with_logstate(f::Function, logstate)
-    @nospecialize
-    t = current_task()
-    old = t.logstate
-    try
-        t.logstate = logstate
-        f()
-    finally
-        t.logstate = old
-    end
-end
+with_logstate(f::Function, logstate) = @with(CURRENT_LOGSTATE => logstate, f())
 
 #-------------------------------------------------------------------------------
 # Control of the current logger and early log filtering
@@ -526,7 +547,8 @@ end
 
 Disable all log messages at log levels equal to or less than `level`.  This is
 a *global* setting, intended to make debug logging extremely cheap when
-disabled.
+disabled. Note that this cannot be used to enable logging that is currently disabled
+by other mechanisms.
 
 # Examples
 ```julia
@@ -587,6 +609,8 @@ end
 end
 
 
+global _global_logstate::LogState
+
 """
     global_logger()
 
@@ -610,7 +634,7 @@ end
 
 Execute `function`, directing all log messages to `logger`.
 
-# Example
+# Examples
 
 ```julia
 function test(x)
@@ -646,17 +670,21 @@ close(closed_stream)
 Simplistic logger for logging all messages with level greater than or equal to
 `min_level` to `stream`. If stream is closed then messages with log level
 greater or equal to `Warn` will be logged to `stderr` and below to `stdout`.
+
+This Logger is thread-safe, with a lock taken around orchestration of message
+limits i.e. `maxlog`, and writes to the stream.
 """
 struct SimpleLogger <: AbstractLogger
     stream::IO
+    lock::ReentrantLock
     min_level::LogLevel
     message_limits::Dict{Any,Int}
 end
-SimpleLogger(stream::IO, level=Info) = SimpleLogger(stream, level, Dict{Any,Int}())
+SimpleLogger(stream::IO, level=Info) = SimpleLogger(stream, ReentrantLock(), level, Dict{Any,Int}())
 SimpleLogger(level=Info) = SimpleLogger(closed_stream, level)
 
 shouldlog(logger::SimpleLogger, level, _module, group, id) =
-    get(logger.message_limits, id, 1) > 0
+    @lock logger.lock get(logger.message_limits, id, 1) > 0
 
 min_enabled_level(logger::SimpleLogger) = logger.min_level
 
@@ -667,15 +695,14 @@ function handle_message(logger::SimpleLogger, level::LogLevel, message, _module,
     @nospecialize
     maxlog = get(kwargs, :maxlog, nothing)
     if maxlog isa Core.BuiltinInts
-        remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
-        logger.message_limits[id] = remaining - 1
-        remaining > 0 || return
+        @lock logger.lock begin
+            remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
+            remaining == 0 && return
+            logger.message_limits[id] = remaining - 1
+        end
     end
     buf = IOBuffer()
     stream::IO = logger.stream
-    if !(isopen(stream)::Bool)
-        stream = stderr
-    end
     iob = IOContext(buf, stream)
     levelstr = level == Warn ? "Warning" : string(level)
     msglines = eachsplit(chomp(convert(String, string(message))::String), '\n')
@@ -689,10 +716,18 @@ function handle_message(logger::SimpleLogger, level::LogLevel, message, _module,
         println(iob, "│   ", key, " = ", val)
     end
     println(iob, "└ @ ", _module, " ", filepath, ":", line)
-    write(stream, take!(buf))
+    b = take!(buf)
+    @lock logger.lock begin
+        if !(isopen(stream)::Bool)
+            stream = stderr
+        end
+        write(stream, b)
+    end
     nothing
 end
 
 _global_logstate = LogState(SimpleLogger())
 
+include("logging/ConsoleLogger.jl")
+
 end # CoreLogging
diff --git a/base/math.jl b/base/math.jl
index 71bd4949498b5..65343bc9ca8e6 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -23,9 +23,9 @@ import .Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
 using .Base: sign_mask, exponent_mask, exponent_one,
             exponent_half, uinttype, significand_mask,
             significand_bits, exponent_bits, exponent_bias,
-            exponent_max, exponent_raw_max
+            exponent_max, exponent_raw_max, clamp, clamp!
 
-using Core.Intrinsics: sqrt_llvm
+using Core.Intrinsics: sqrt_llvm, min_float, max_float
 
 using .Base: IEEEFloat
 
@@ -69,98 +69,6 @@ end
     return Txy, T(xy-Txy)
 end
 
-"""
-    clamp(x, lo, hi)
-
-Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
-are promoted to a common type.
-
-See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
-
-!!! compat "Julia 1.3"
-    `missing` as the first argument requires at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
-3-element Vector{BigFloat}:
- 3.141592653589793238462643383279502884197169399375105820974944592307816406286198
- 2.0
- 9.0
-
-julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
-3-element Vector{Int64}:
-  6
-  6
- 10
-```
-"""
-clamp(x::X, lo::L, hi::H) where {X,L,H} =
-    ifelse(x > hi, convert(promote_type(X,L,H), hi),
-           ifelse(x < lo,
-                  convert(promote_type(X,L,H), lo),
-                  convert(promote_type(X,L,H), x)))
-
-"""
-    clamp(x, T)::T
-
-Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
-
-See also [`trunc`](@ref).
-
-# Examples
-```jldoctest
-julia> clamp(200, Int8)
-127
-
-julia> clamp(-200, Int8)
--128
-
-julia> trunc(Int, 4pi^2)
-39
-```
-"""
-clamp(x, ::Type{T}) where {T<:Integer} = clamp(x, typemin(T), typemax(T)) % T
-
-
-"""
-    clamp!(array::AbstractArray, lo, hi)
-
-Restrict values in `array` to the specified range, in-place.
-See also [`clamp`](@ref).
-
-!!! compat "Julia 1.3"
-    `missing` entries in `array` require at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> row = collect(-4:4)';
-
-julia> clamp!(row, 0, Inf)
-1×9 adjoint(::Vector{Int64}) with eltype Int64:
- 0  0  0  0  0  1  2  3  4
-
-julia> clamp.((-4:4)', 0, Inf)
-1×9 Matrix{Float64}:
- 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
-```
-"""
-function clamp!(x::AbstractArray, lo, hi)
-    @inbounds for i in eachindex(x)
-        x[i] = clamp(x[i], lo, hi)
-    end
-    x
-end
-
-"""
-    clamp(x::Integer, r::AbstractUnitRange)
-
-Clamp `x` to lie within range `r`.
-
-!!! compat "Julia 1.6"
-     This method requires at least Julia 1.6.
-"""
-clamp(x::Integer, r::AbstractUnitRange{<:Integer}) = clamp(x, first(r), last(r))
 
 """
     evalpoly(x, p)
@@ -177,7 +85,7 @@ a Goertzel-like [^DK62] algorithm if `x` is complex.
 !!! compat "Julia 1.4"
     This function requires Julia 1.4 or later.
 
-# Example
+# Examples
 ```jldoctest
 julia> evalpoly(2, (1, 2, 3))
 17
@@ -304,17 +212,43 @@ end
 
 # polynomial evaluation using compensated summation.
 # much more accurate, especially when lo can be combined with other rounding errors
-Base.@assume_effects :terminates_locally @inline function exthorner(x, p::Tuple)
-    hi, lo = p[end], zero(x)
-    for i in length(p)-1:-1:1
-        pi = getfield(p, i) # needed to prove consistency
-        prod, err = two_mul(hi,x)
-        hi = pi+prod
-        lo = fma(lo, x, prod - (hi - pi) + err)
-    end
+@inline function exthorner(x::T, p::Tuple{T,T,T}) where T<:Union{Float32,Float64}
+    hi, lo = p[lastindex(p)], zero(x)
+    hi, lo = _exthorner(2, x, p, hi, lo)
+    hi, lo = _exthorner(1, x, p, hi, lo)
+    return hi, lo
+end
+
+@inline function _exthorner(i::Int, x::T, p::Tuple{T,T,T}, hi::T, lo::T) where T<:Union{Float32,Float64}
+    i == 2 || i == 1 || error("unexpected index")
+    pi = p[i]
+    prod, err = two_mul(hi,x)
+    hi = pi+prod
+    lo = fma(lo, x, prod - (hi - pi) + err)
     return hi, lo
 end
 
+# generic, but involves double rounding
+function _180_over_pi(z::AbstractFloat)
+    180 / oftype(z, pi)
+end
+function _pi_over_180(z::AbstractFloat)
+    oftype(z, pi) / 180
+end
+
+# rounded to closest representable number where necessary
+function _180_over_pi(z::Union{Float16, Float32})
+    if z isa Float16
+        r = Float16(57.28)
+    elseif z isa Float32
+        r = 57.29578f0
+    end
+    r
+end
+function _pi_over_180(::Float16)
+    Float16(0.01746)
+end
+
 """
     rad2deg(x)
 
@@ -328,7 +262,7 @@ julia> rad2deg(pi)
 180.0
 ```
 """
-rad2deg(z::AbstractFloat) = z * (180 / oftype(z, pi))
+rad2deg(z::AbstractFloat) = z * _180_over_pi(z)
 
 """
     deg2rad(x)
@@ -343,7 +277,7 @@ julia> deg2rad(90)
 1.5707963267948966
 ```
 """
-deg2rad(z::AbstractFloat) = z * (oftype(z, pi) / 180)
+deg2rad(z::AbstractFloat) = z * _pi_over_180(z)
 rad2deg(z::Real) = rad2deg(float(z))
 deg2rad(z::Real) = deg2rad(float(z))
 rad2deg(z::Number) = (z/pi)*180
@@ -354,7 +288,7 @@ log(b::T, x::T) where {T<:Number} = log(x)/log(b)
 """
     log(b,x)
 
-Compute the base `b` logarithm of `x`. Throws [`DomainError`](@ref) for negative
+Compute the base `b` logarithm of `x`. Throw a [`DomainError`](@ref) for negative
 [`Real`](@ref) arguments.
 
 # Examples
@@ -394,15 +328,14 @@ Stacktrace:
 """
 log(b::Number, x::Number) = log(promote(b,x)...)
 
-# type specific math functions
-
 const libm = Base.libm_name
-
 # functions with no domain error
 """
     sinh(x)
 
 Compute hyperbolic sine of `x`.
+
+See also [`sin`](@ref).
 """
 sinh(x::Number)
 
@@ -410,6 +343,8 @@ sinh(x::Number)
     cosh(x)
 
 Compute hyperbolic cosine of `x`.
+
+See also [`cos`](@ref).
 """
 cosh(x::Number)
 
@@ -448,7 +383,7 @@ tanh(x::Number)
 
 Compute the inverse tangent of `y` or `y/x`, respectively.
 
-For one argument, this is the angle in radians between the positive *x*-axis and the point
+For one real argument, this is the angle in radians between the positive *x*-axis and the point
 (1, *y*), returning a value in the interval ``[-\\pi/2, \\pi/2]``.
 
 For two arguments, this is the angle in radians between the positive *x*-axis and the
@@ -488,10 +423,12 @@ asinh(x::Number)
 
 # functions that return NaN on non-NaN argument for domain error
 """
-    sin(x)
+    sin(x::T) where {T <: Number} -> float(T)
 
 Compute sine of `x`, where `x` is in radians.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
 See also [`sind`](@ref), [`sinpi`](@ref), [`sincos`](@ref), [`cis`](@ref), [`asin`](@ref).
 
 # Examples
@@ -519,26 +456,34 @@ julia> round(exp(im*pi/6), digits=3)
 sin(x::Number)
 
 """
-    cos(x)
+    cos(x::T) where {T <: Number} -> float(T)
 
 Compute cosine of `x`, where `x` is in radians.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
 See also [`cosd`](@ref), [`cospi`](@ref), [`sincos`](@ref), [`cis`](@ref).
 """
 cos(x::Number)
 
 """
-    tan(x)
+    tan(x::T) where {T <: Number} -> float(T)
 
 Compute tangent of `x`, where `x` is in radians.
+
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
+See also [`tanh`](@ref).
 """
 tan(x::Number)
 
 """
-    asin(x)
+    asin(x::T) where {T <: Number} -> float(T)
 
 Compute the inverse sine of `x`, where the output is in radians.
 
+Return a `T(NaN)` if `isnan(x)`.
+
 See also [`asind`](@ref) for output in degrees.
 
 # Examples
@@ -553,9 +498,11 @@ julia> asind.((0, 1/2, 1))
 asin(x::Number)
 
 """
-    acos(x)
+    acos(x::T) where {T <: Number} -> float(T)
+
+Compute the inverse cosine of `x`, where the output is in radians.
 
-Compute the inverse cosine of `x`, where the output is in radians
+Return a `T(NaN)` if `isnan(x)`.
 """
 acos(x::Number)
 
@@ -576,8 +523,14 @@ atanh(x::Number)
 """
     log(x)
 
-Compute the natural logarithm of `x`. Throws [`DomainError`](@ref) for negative
-[`Real`](@ref) arguments. Use complex negative arguments to obtain complex results.
+Compute the natural logarithm of `x`.
+
+Throw a [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
+Use [`Complex`](@ref) arguments to obtain [`Complex`](@ref) results.
+
+!!! note "Branch cut"
+    `log` has a branch cut along the negative real axis; `-0.0im` is taken
+    to be below the axis.
 
 See also [`ℯ`](@ref), [`log1p`](@ref), [`log2`](@ref), [`log10`](@ref).
 
@@ -593,6 +546,12 @@ Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
 
+julia> log(-3 + 0im)
+1.0986122886681098 + 3.141592653589793im
+
+julia> log(-3 - 0.0im)
+1.0986122886681098 - 3.141592653589793im
+
 julia> log.(exp.(-1:1))
 3-element Vector{Float64}:
  -1.0
@@ -605,7 +564,7 @@ log(x::Number)
 """
     log2(x)
 
-Compute the logarithm of `x` to base 2. Throws [`DomainError`](@ref) for negative
+Compute the logarithm of `x` to base 2. Throw a [`DomainError`](@ref) for negative
 [`Real`](@ref) arguments.
 
 See also: [`exp2`](@ref), [`ldexp`](@ref), [`ispow2`](@ref).
@@ -638,7 +597,7 @@ log2(x)
     log10(x)
 
 Compute the logarithm of `x` to base 10.
-Throws [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
+Throw a [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
 
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
@@ -661,7 +620,7 @@ log10(x)
 """
     log1p(x)
 
-Accurate natural logarithm of `1+x`. Throws [`DomainError`](@ref) for [`Real`](@ref)
+Accurate natural logarithm of `1+x`. Throw a [`DomainError`](@ref) for [`Real`](@ref)
 arguments less than -1.
 
 # Examples
@@ -690,8 +649,16 @@ end
 """
     sqrt(x)
 
-Return ``\\sqrt{x}``. Throws [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
-Use complex negative arguments instead. The prefix operator `√` is equivalent to `sqrt`.
+Return ``\\sqrt{x}``.
+
+Throw a [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
+Use [`Complex`](@ref) negative arguments instead to obtain a [`Complex`](@ref) result.
+
+The prefix operator `√` is equivalent to `sqrt`.
+
+!!! note "Branch cut"
+    `sqrt` has a branch cut along the negative real axis; `-0.0im` is taken
+    to be below the axis.
 
 See also: [`hypot`](@ref).
 
@@ -710,6 +677,9 @@ Stacktrace:
 julia> sqrt(big(complex(-81)))
 0.0 + 9.0im
 
+julia> sqrt(-81 - 0.0im)  # -0.0im is below the branch cut
+0.0 - 9.0im
+
 julia> .√(1:4)
 4-element Vector{Float64}:
  1.0
@@ -773,8 +743,8 @@ true
 ```
 """
 hypot(x::Number) = abs(float(x))
-hypot(x::Number, y::Number) = _hypot(promote(float(x), y)...)
-hypot(x::Number, y::Number, xs::Number...) = _hypot(promote(float(x), y, xs...))
+hypot(x::Number, y::Number) = _hypot(float.(promote(x, y))...)
+hypot(x::Number, y::Number, xs::Number...) = _hypot(float.(promote(x, y, xs...)))
 function _hypot(x, y)
     # preserves unit
     axu = abs(x)
@@ -879,47 +849,12 @@ min(x::T, y::T) where {T<:AbstractFloat} = isnan(x) || ~isnan(y) && _isless(x, y
 max(x::T, y::T) where {T<:AbstractFloat} = isnan(x) || ~isnan(y) && _isless(y, x) ? x : y
 minmax(x::T, y::T) where {T<:AbstractFloat} = min(x, y), max(x, y)
 
-_isless(x::Float16, y::Float16) = signbit(widen(x) - widen(y))
-
-const has_native_fminmax = Sys.ARCH === :aarch64
-@static if has_native_fminmax
-    @eval begin
-        Base.@assume_effects :total @inline llvm_min(x::Float64, y::Float64) = ccall("llvm.minimum.f64", llvmcall, Float64, (Float64, Float64), x, y)
-        Base.@assume_effects :total @inline llvm_min(x::Float32, y::Float32) = ccall("llvm.minimum.f32", llvmcall, Float32, (Float32, Float32), x, y)
-        Base.@assume_effects :total @inline llvm_max(x::Float64, y::Float64) = ccall("llvm.maximum.f64", llvmcall, Float64, (Float64, Float64), x, y)
-        Base.@assume_effects :total @inline llvm_max(x::Float32, y::Float32) = ccall("llvm.maximum.f32", llvmcall, Float32, (Float32, Float32), x, y)
-    end
-end
-
-function min(x::T, y::T) where {T<:Union{Float32,Float64}}
-    @static if has_native_fminmax
-        return llvm_min(x,y)
-    end
-    diff = x - y
-    argmin = ifelse(signbit(diff), x, y)
-    anynan = isnan(x)|isnan(y)
-    return ifelse(anynan, diff, argmin)
-end
-
-function max(x::T, y::T) where {T<:Union{Float32,Float64}}
-    @static if has_native_fminmax
-        return llvm_max(x,y)
-    end
-    diff = x - y
-    argmax = ifelse(signbit(diff), y, x)
-    anynan = isnan(x)|isnan(y)
-    return ifelse(anynan, diff, argmax)
+function min(x::T, y::T) where {T<:IEEEFloat}
+    return min_float(x, y)
 end
 
-function minmax(x::T, y::T) where {T<:Union{Float32,Float64}}
-    @static if has_native_fminmax
-        return llvm_min(x, y), llvm_max(x, y)
-    end
-    diff = x - y
-    sdiff = signbit(diff)
-    min, max = ifelse(sdiff, x, y), ifelse(sdiff, y, x)
-    anynan = isnan(x)|isnan(y)
-    return ifelse(anynan, diff, min), ifelse(anynan, diff, max)
+function max(x::T, y::T) where {T<:IEEEFloat}
+    return max_float(x, y)
 end
 
 """
@@ -927,9 +862,11 @@ end
 
 Compute ``x \\times 2^n``.
 
+See also [`frexp`](@ref), [`exponent`](@ref).
+
 # Examples
 ```jldoctest
-julia> ldexp(5., 2)
+julia> ldexp(5.0, 2)
 20.0
 ```
 """
@@ -979,27 +916,36 @@ end
 ldexp(x::Float16, q::Integer) = Float16(ldexp(Float32(x), q))
 
 """
-    exponent(x) -> Int
+    exponent(x::Real)::Int
 
-Returns the largest integer `y` such that `2^y ≤ abs(x)`.
+Return the largest integer `y` such that `2^y ≤ abs(x)`.
 For a normalized floating-point number `x`, this corresponds to the exponent of `x`.
 
+Throws a `DomainError` when `x` is zero, infinite, or [`NaN`](@ref).
+For any other non-subnormal floating-point number `x`, this corresponds to the exponent bits of `x`.
+
+See also [`signbit`](@ref), [`significand`](@ref), [`frexp`](@ref), [`issubnormal`](@ref), [`log2`](@ref), [`ldexp`](@ref).
 # Examples
 ```jldoctest
 julia> exponent(8)
 3
 
-julia> exponent(64//1)
-6
-
 julia> exponent(6.5)
 2
 
-julia> exponent(16.0)
-4
+julia> exponent(-1//4)
+-2
 
 julia> exponent(3.142e-4)
 -12
+
+julia> exponent(floatmin(Float32)), exponent(nextfloat(0.0f0))
+(-126, -149)
+
+julia> exponent(0.0)
+ERROR: DomainError with 0.0:
+Cannot be ±0.0.
+[...]
 ```
 """
 function exponent(x::T) where T<:IEEEFloat
@@ -1031,6 +977,32 @@ function _exponent_finite_nonzero(x::T) where T<:IEEEFloat
     return k - exponent_bias(T)
 end
 
+function _ilog2_step(y::T, d::T, s) where {T<:Integer}
+    if (y >> s) >= d
+        y, n = _ilog2_step(y, d*d, s+s)
+    else
+        n = 0
+    end
+    if y >= d
+        y >>= s
+        n = Base.checked_add(n, s)
+    end
+    return y, n
+end
+
+function exponent(x::Integer)
+    iszero(x) && throw(DomainError(x, "cannot be zero"))
+    ux = Base.uabs(x)
+    _, n = _ilog2_step(ux, one(ux) + one(ux), 1)
+    return n
+end
+
+function exponent(x::Base.BitInteger)
+    iszero(x) && throw(DomainError(x, "cannot be zero"))
+    ux = Base.uabs(x)
+    return 8sizeof(ux) - leading_zeros(ux) - 1
+end
+
 """
     significand(x)
 
@@ -1039,6 +1011,8 @@ a non-zero finite number, then the result will be a number of the same type and
 sign as `x`, and whose absolute value is on the interval ``[1,2)``. Otherwise
 `x` is returned.
 
+See also [`frexp`](@ref), [`exponent`](@ref).
+
 # Examples
 ```jldoctest
 julia> significand(15.2)
@@ -1073,10 +1047,19 @@ end
 
 Return `(x,exp)` such that `x` has a magnitude in the interval ``[1/2, 1)`` or 0,
 and `val` is equal to ``x \\times 2^{exp}``.
+
+See also [`significand`](@ref), [`exponent`](@ref), [`ldexp`](@ref).
+
 # Examples
 ```jldoctest
-julia> frexp(12.8)
-(0.8, 4)
+julia> frexp(6.0)
+(0.75, 3)
+
+julia> significand(6.0), exponent(6.0)  # interval [1, 2) instead
+(1.5, 2)
+
+julia> frexp(0.0), frexp(NaN), frexp(-Inf)  # exponent would give an error
+((0.0, 0), (NaN, 0), (-Inf, 0))
 ```
 """
 function frexp(x::T) where T<:IEEEFloat
@@ -1189,105 +1172,13 @@ function modf(x::T) where T<:IEEEFloat
     return (rx, ix)
 end
 
-# @constprop aggressive to help the compiler see the switch between the integer and float
-# variants for callers with constant `y`
-@constprop :aggressive function ^(x::Float64, y::Float64)
-    xu = reinterpret(UInt64, x)
-    xu == reinterpret(UInt64, 1.0) && return 1.0
-    # Exponents greater than this will always overflow or underflow.
-    # Note that NaN can pass through this, but that will end up fine.
-    if !(abs(y)<0x1.8p62)
-        isnan(y) && return y
-        y = sign(y)*0x1.8p62
-    end
-    yint = unsafe_trunc(Int64, y) # This is actually safe since julia freezes the result
-    y == yint && return @noinline x^yint
-    2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x==0
-    x<0 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
-    !isfinite(x) && return x*(y>0 || isnan(x))           # x is inf or NaN
-    if xu < (UInt64(1)<<52) # x is subnormal
-        xu = reinterpret(UInt64, x * 0x1p52) # normalize x
-        xu &= ~sign_mask(Float64)
-        xu -= UInt64(52) << 52 # mess with the exponent
-    end
-    return pow_body(xu, y)
-end
-
-@inline function pow_body(xu::UInt64, y::Float64)
-    logxhi,logxlo = Base.Math._log_ext(xu)
-    xyhi, xylo = two_mul(logxhi,y)
-    xylo = muladd(logxlo, y, xylo)
-    hi = xyhi+xylo
-    return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
-end
-
-@constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32}
-    x == 1 && return one(T)
-    # Exponents greater than this will always overflow or underflow.
-    # Note that NaN can pass through this, but that will end up fine.
-    max_exp = T == Float16 ? T(3<<14) : T(0x1.Ap30)
-    if !(abs(y)<max_exp)
-        isnan(y) && return y
-        y = sign(y)*max_exp
-    end
-    yint = unsafe_trunc(Int32, y) # This is actually safe since julia freezes the result
-    y == yint && return x^yint
-    x < 0 && throw_exp_domainerror(x)
-    !isfinite(x) && return x*(y>0 || isnan(x))
-    x==0 && return abs(y)*T(Inf)*(!(y>0))
-    return pow_body(x, y)
-end
-
-@inline function pow_body(x::T, y::T) where T <: Union{Float16, Float32}
-    return T(exp2(log2(abs(widen(x))) * y))
-end
 
-# compensated power by squaring
-@constprop :aggressive @inline function ^(x::Float64, n::Integer)
-    n == 0 && return one(x)
-    return pow_body(x, n)
-end
-
-@assume_effects :terminates_locally @noinline function pow_body(x::Float64, n::Integer)
-    y = 1.0
-    xnlo = ynlo = 0.0
-    n == 3 && return x*x*x # keep compatibility with literal_pow
-    if n < 0
-        rx = inv(x)
-        n==-2 && return rx*rx #keep compatibility with literal_pow
-        isfinite(x) && (xnlo = -fma(x, rx, -1.) * rx)
-        x = rx
-        n = -n
-    end
-    while n > 1
-        if n&1 > 0
-            err = muladd(y, xnlo, x*ynlo)
-            y, ynlo = two_mul(x,y)
-            ynlo += err
-        end
-        err = x*2*xnlo
-        x, xnlo = two_mul(x, x)
-        xnlo += err
-        n >>>= 1
-    end
-    err = muladd(y, xnlo, x*ynlo)
-    return ifelse(isfinite(x) & isfinite(err), muladd(x, y, err), x*y)
-end
-
-function ^(x::Float32, n::Integer)
-    n == -2 && return (i=inv(x); i*i)
-    n == 3 && return x*x*x #keep compatibility with literal_pow
-    n < 0 && return Float32(Base.power_by_squaring(inv(Float64(x)),-n))
-    Float32(Base.power_by_squaring(Float64(x),n))
-end
-@inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ y)
-@inline literal_pow(::typeof(^), x::Float16, ::Val{p}) where {p} = Float16(literal_pow(^,Float32(x),Val(p)))
 
 ## rem2pi-related calculations ##
 
 function add22condh(xh::Float64, xl::Float64, yh::Float64, yl::Float64)
     # This algorithm, due to Dekker, computes the sum of two
-    # double-double numbers and returns the high double. References:
+    # double-double numbers and return the high double. References:
     # [1] http://www.digizeitschriften.de/en/dms/img/?PID=GDZPPN001170007
     # [2] https://doi.org/10.1007/BF01397083
     r = xh+yh
@@ -1296,19 +1187,6 @@ function add22condh(xh::Float64, xl::Float64, yh::Float64, yl::Float64)
     return zh
 end
 
-# multiples of pi/2, as double-double (ie with "tail")
-const pi1o2_h  = 1.5707963267948966     # convert(Float64, pi * BigFloat(1/2))
-const pi1o2_l  = 6.123233995736766e-17  # convert(Float64, pi * BigFloat(1/2) - pi1o2_h)
-
-const pi2o2_h  = 3.141592653589793      # convert(Float64, pi * BigFloat(1))
-const pi2o2_l  = 1.2246467991473532e-16 # convert(Float64, pi * BigFloat(1) - pi2o2_h)
-
-const pi3o2_h  = 4.71238898038469       # convert(Float64, pi * BigFloat(3/2))
-const pi3o2_l  = 1.8369701987210297e-16 # convert(Float64, pi * BigFloat(3/2) - pi3o2_h)
-
-const pi4o2_h  = 6.283185307179586      # convert(Float64, pi * BigFloat(2))
-const pi4o2_l  = 2.4492935982947064e-16 # convert(Float64, pi * BigFloat(2) - pi4o2_h)
-
 """
     rem2pi(x, r::RoundingMode)
 
@@ -1341,133 +1219,6 @@ julia> rem2pi(7pi/4, RoundDown)
 ```
 """
 function rem2pi end
-function rem2pi(x::Float64, ::RoundingMode{:Nearest})
-    isnan(x) && return x
-    isinf(x) && return NaN
-
-    abs(x) < pi && return x
-
-    n,y = rem_pio2_kernel(x)
-
-    if iseven(n)
-        if n & 2 == 2 # n % 4 == 2: add/subtract pi
-            if y.hi <= 0
-                return add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l)
-            else
-                return add22condh(y.hi,y.lo,-pi2o2_h,-pi2o2_l)
-            end
-        else          # n % 4 == 0: add 0
-            return y.hi+y.lo
-        end
-    else
-        if n & 2 == 2 # n % 4 == 3: subtract pi/2
-            return add22condh(y.hi,y.lo,-pi1o2_h,-pi1o2_l)
-        else          # n % 4 == 1: add pi/2
-            return add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l)
-        end
-    end
-end
-function rem2pi(x::Float64, ::RoundingMode{:ToZero})
-    isnan(x) && return x
-    isinf(x) && return NaN
-
-    ax = abs(x)
-    ax <= 2*Float64(pi,RoundDown) && return x
-
-    n,y = rem_pio2_kernel(ax)
-
-    if iseven(n)
-        if n & 2 == 2 # n % 4 == 2: add pi
-            z = add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l)
-        else          # n % 4 == 0: add 0 or 2pi
-            if y.hi > 0
-                z = y.hi+y.lo
-            else      # negative: add 2pi
-                z = add22condh(y.hi,y.lo,pi4o2_h,pi4o2_l)
-            end
-        end
-    else
-        if n & 2 == 2 # n % 4 == 3: add 3pi/2
-            z = add22condh(y.hi,y.lo,pi3o2_h,pi3o2_l)
-        else          # n % 4 == 1: add pi/2
-            z = add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l)
-        end
-    end
-    copysign(z,x)
-end
-function rem2pi(x::Float64, ::RoundingMode{:Down})
-    isnan(x) && return x
-    isinf(x) && return NaN
-
-    if x < pi4o2_h
-        if x >= 0
-            return x
-        elseif x > -pi4o2_h
-            return add22condh(x,0.0,pi4o2_h,pi4o2_l)
-        end
-    end
-
-    n,y = rem_pio2_kernel(x)
-
-    if iseven(n)
-        if n & 2 == 2 # n % 4 == 2: add pi
-            return add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l)
-        else          # n % 4 == 0: add 0 or 2pi
-            if y.hi > 0
-                return y.hi+y.lo
-            else      # negative: add 2pi
-                return add22condh(y.hi,y.lo,pi4o2_h,pi4o2_l)
-            end
-        end
-    else
-        if n & 2 == 2 # n % 4 == 3: add 3pi/2
-            return add22condh(y.hi,y.lo,pi3o2_h,pi3o2_l)
-        else          # n % 4 == 1: add pi/2
-            return add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l)
-        end
-    end
-end
-function rem2pi(x::Float64, ::RoundingMode{:Up})
-    isnan(x) && return x
-    isinf(x) && return NaN
-
-    if x > -pi4o2_h
-        if x <= 0
-            return x
-        elseif x < pi4o2_h
-            return add22condh(x,0.0,-pi4o2_h,-pi4o2_l)
-        end
-    end
-
-    n,y = rem_pio2_kernel(x)
-
-    if iseven(n)
-        if n & 2 == 2 # n % 4 == 2: sub pi
-            return add22condh(y.hi,y.lo,-pi2o2_h,-pi2o2_l)
-        else          # n % 4 == 0: sub 0 or 2pi
-            if y.hi < 0
-                return y.hi+y.lo
-            else      # positive: sub 2pi
-                return add22condh(y.hi,y.lo,-pi4o2_h,-pi4o2_l)
-            end
-        end
-    else
-        if n & 2 == 2 # n % 4 == 3: sub pi/2
-            return add22condh(y.hi,y.lo,-pi1o2_h,-pi1o2_l)
-        else          # n % 4 == 1: sub 3pi/2
-            return add22condh(y.hi,y.lo,-pi3o2_h,-pi3o2_l)
-        end
-    end
-end
-
-rem2pi(x::Float32, r::RoundingMode) = Float32(rem2pi(Float64(x), r))
-rem2pi(x::Float16, r::RoundingMode) = Float16(rem2pi(Float64(x), r))
-rem2pi(x::Int32, r::RoundingMode) = rem2pi(Float64(x), r)
-function rem2pi(x::Int64, r::RoundingMode)
-    fx = Float64(x)
-    fx == x || throw(ArgumentError("Int64 argument to rem2pi is too large: $x"))
-    rem2pi(fx, r)
-end
 
 """
     mod2pi(x)
@@ -1547,7 +1298,10 @@ include("special/exp.jl")
 include("special/hyperbolic.jl")
 include("special/trig.jl")
 include("special/rem_pio2.jl")
+include("special/rem2pi.jl")
 include("special/log.jl")
+include("special/pow.jl")
+
 
 
 # Float16 definitions
@@ -1571,10 +1325,10 @@ sincos(a::Float16) = Float16.(sincos(Float32(a)))
 for f in (:sin, :cos, :tan, :asin, :atan, :acos,
           :sinh, :cosh, :tanh, :asinh, :acosh, :atanh,
           :exp, :exp2, :exp10, :expm1, :log, :log2, :log10, :log1p,
-          :exponent, :sqrt, :cbrt)
+          :exponent, :sqrt, :cbrt, :sinpi, :cospi, :sincospi, :tanpi)
     @eval function ($f)(x::Real)
         xf = float(x)
-        x === xf && throw(MethodError($f, (x,)))
+        xf isa typeof(x) && throw(MethodError($f, (x,)))
         return ($f)(xf)
     end
     @eval $(f)(::Missing) = missing
@@ -1588,7 +1342,6 @@ end
 
 exp2(x::AbstractFloat) = 2^x
 exp10(x::AbstractFloat) = 10^x
-clamp(::Missing, lo, hi) = missing
 fourthroot(::Missing) = missing
 
 end # module
diff --git a/base/mathconstants.jl b/base/mathconstants.jl
index 4bb8c409acf00..d26f5115b5ccb 100644
--- a/base/mathconstants.jl
+++ b/base/mathconstants.jl
@@ -16,6 +16,26 @@ Base.@irrational γ        euler
 Base.@irrational φ        (1+sqrt(big(5)))/2
 Base.@irrational catalan  catalan
 
+const _KnownIrrational = Union{
+    typeof(π), typeof(ℯ), typeof(γ), typeof(φ), typeof(catalan)
+}
+
+function Rational{BigInt}(::_KnownIrrational)
+    Base._throw_argument_error_irrational_to_rational_bigint()
+end
+Base.@assume_effects :foldable function Rational{T}(x::_KnownIrrational) where {T<:Integer}
+    Base._irrational_to_rational(T, x)
+end
+Base.@assume_effects :foldable function (::Type{T})(x::_KnownIrrational, r::RoundingMode) where {T<:Union{Float32,Float64}}
+    Base._irrational_to_float(T, x, r)
+end
+Base.@assume_effects :foldable function Base.rationalize(::Type{T}, x::_KnownIrrational; tol::Real=0) where {T<:Integer}
+    Base._rationalize_irrational(T, x, tol)
+end
+Base.@assume_effects :foldable function Base.lessrational(rx::Rational, x::_KnownIrrational)
+    Base._lessrational(rx, x)
+end
+
 # aliases
 """
     π
diff --git a/base/meta.jl b/base/meta.jl
index 31fef1b9697e3..1f5f49b72575f 100644
--- a/base/meta.jl
+++ b/base/meta.jl
@@ -5,8 +5,6 @@ Convenience functions for metaprogramming.
 """
 module Meta
 
-using ..CoreLogging
-
 export quot,
        isexpr,
        isidentifier,
@@ -18,9 +16,123 @@ export quot,
        show_sexpr,
        @dump
 
-using Base: isidentifier, isoperator, isunaryoperator, isbinaryoperator, ispostfixoperator
+public parse
+
 import Base: isexpr
 
+## AST decoding helpers ##
+
+is_id_start_char(c::AbstractChar) = ccall(:jl_id_start_char, Cint, (UInt32,), c) != 0
+is_id_char(c::AbstractChar) = ccall(:jl_id_char, Cint, (UInt32,), c) != 0
+
+"""
+     isidentifier(s) -> Bool
+
+Return whether the symbol or string `s` contains characters that are parsed as
+a valid ordinary identifier (not a binary/unary operator) in Julia code;
+see also [`Base.isoperator`](@ref).
+
+Internally Julia allows any sequence of characters in a `Symbol` (except `\\0`s),
+and macros automatically use variable names containing `#` in order to avoid
+naming collision with the surrounding code. In order for the parser to
+recognize a variable, it uses a limited set of characters (greatly extended by
+Unicode). `isidentifier()` makes it possible to query the parser directly
+whether a symbol contains valid characters.
+
+# Examples
+```jldoctest
+julia> Meta.isidentifier(:x), Meta.isidentifier("1x")
+(true, false)
+```
+"""
+function isidentifier(s::AbstractString)
+    x = Iterators.peel(s)
+    isnothing(x) && return false
+    (s == "true" || s == "false") && return false
+    c, rest = x
+    is_id_start_char(c) || return false
+    return all(is_id_char, rest)
+end
+isidentifier(s::Symbol) = isidentifier(string(s))
+
+is_op_suffix_char(c::AbstractChar) = ccall(:jl_op_suffix_char, Cint, (UInt32,), c) != 0
+
+_isoperator(s) = ccall(:jl_is_operator, Cint, (Cstring,), s) != 0
+
+"""
+    isoperator(s::Symbol)
+
+Return `true` if the symbol can be used as an operator, `false` otherwise.
+
+# Examples
+```jldoctest
+julia> Meta.isoperator(:+), Meta.isoperator(:f)
+(true, false)
+```
+"""
+isoperator(s::Union{Symbol,AbstractString}) = _isoperator(s) || ispostfixoperator(s)
+
+"""
+    isunaryoperator(s::Symbol)
+
+Return `true` if the symbol can be used as a unary (prefix) operator, `false` otherwise.
+
+# Examples
+```jldoctest
+julia> Meta.isunaryoperator(:-), Meta.isunaryoperator(:√), Meta.isunaryoperator(:f)
+(true, true, false)
+```
+"""
+isunaryoperator(s::Symbol) = ccall(:jl_is_unary_operator, Cint, (Cstring,), s) != 0
+is_unary_and_binary_operator(s::Symbol) = ccall(:jl_is_unary_and_binary_operator, Cint, (Cstring,), s) != 0
+is_syntactic_operator(s::Symbol) = ccall(:jl_is_syntactic_operator, Cint, (Cstring,), s) != 0
+
+"""
+    isbinaryoperator(s::Symbol)
+
+Return `true` if the symbol can be used as a binary (infix) operator, `false` otherwise.
+
+# Examples
+```jldoctest
+julia> Meta.isbinaryoperator(:-), Meta.isbinaryoperator(:√), Meta.isbinaryoperator(:f)
+(true, false, false)
+```
+"""
+function isbinaryoperator(s::Symbol)
+    return _isoperator(s) && (!isunaryoperator(s) || is_unary_and_binary_operator(s)) &&
+        s !== Symbol("'")
+end
+
+"""
+    ispostfixoperator(s::Union{Symbol,AbstractString})
+
+Return `true` if the symbol can be used as a postfix operator, `false` otherwise.
+
+# Examples
+```jldoctest
+julia> Meta.ispostfixoperator(Symbol("'")), Meta.ispostfixoperator(Symbol("'ᵀ")), Meta.ispostfixoperator(:-)
+(true, true, false)
+```
+"""
+function ispostfixoperator(s::Union{Symbol,AbstractString})
+    s = String(s)::String
+    return startswith(s, '\'') && all(is_op_suffix_char, SubString(s, 2))
+end
+
+const keyword_syms = IdSet{Symbol}([
+    :baremodule, :begin, :break, :catch, :const, :continue, :do, :else, :elseif,
+    :end, :export, :var"false", :finally, :for, :function, :global, :if, :import,
+    :let, :local, :macro, :module, :public, :quote, :return, :struct, :var"true",
+    :try, :using, :while ])
+
+function is_valid_identifier(sym)
+    return (isidentifier(sym) && !(sym in keyword_syms)) ||
+        (_isoperator(sym) &&
+        !(sym in (Symbol("'"), :(::), :?)) &&
+        !is_syntactic_operator(sym)
+    )
+end
+
 """
     Meta.quot(ex)::Expr
 
@@ -160,7 +272,7 @@ Takes the expression `x` and returns an equivalent expression in lowered form
 for executing in module `m`.
 See also [`code_lowered`](@ref).
 """
-lower(m::Module, @nospecialize(x)) = ccall(:jl_expand, Any, (Any, Any), x, m)
+lower(m::Module, @nospecialize(x)) = Core._lower(x, m, "none", 0, typemax(Csize_t), false)[1]
 
 """
     @lower [m] x
@@ -192,12 +304,21 @@ end
 
 ParseError(msg::AbstractString) = ParseError(msg, nothing)
 
+# N.B.: Should match definition in src/ast.c:jl_parse
+function parser_for_module(mod::Union{Module, Nothing})
+    mod === nothing && return Core._parse
+    isdefined(mod, :_internal_julia_parse) ?
+        getglobal(mod, :_internal_julia_parse) :
+        Core._parse
+end
+
 function _parse_string(text::AbstractString, filename::AbstractString,
-                       lineno::Integer, index::Integer, options)
+                       lineno::Integer, index::Integer, options,
+                       _parse=parser_for_module(nothing))
     if index < 1 || index > ncodeunits(text) + 1
         throw(BoundsError(text, index))
     end
-    ex, offset::Int = Core._parse(text, filename, lineno, index-1, options)
+    ex, offset::Int = _parse(text, filename, lineno, index-1, options)
     ex, offset+1
 end
 
@@ -234,8 +355,8 @@ julia> Meta.parse("(α, β) = 3, 5", 11, greedy=false)
 ```
 """
 function parse(str::AbstractString, pos::Integer;
-               filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true)
-    ex, pos = _parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom)
+               filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true, mod = nothing)
+    ex, pos = _parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom, parser_for_module(mod))
     if raise && isexpr(ex, :error)
         err = ex.args[1]
         if err isa String
@@ -274,8 +395,8 @@ julia> Meta.parse("x = ")
 ```
 """
 function parse(str::AbstractString;
-               filename="none", raise::Bool=true, depwarn::Bool=true)
-    ex, pos = parse(str, 1; filename, greedy=true, raise, depwarn)
+               filename="none", raise::Bool=true, depwarn::Bool=true, mod = nothing)
+    ex, pos = parse(str, 1; filename, greedy=true, raise, depwarn, mod = mod)
     if isexpr(ex, :error)
         return ex
     end
@@ -286,12 +407,12 @@ function parse(str::AbstractString;
     return ex
 end
 
-function parseatom(text::AbstractString, pos::Integer; filename="none", lineno=1)
-    return _parse_string(text, String(filename), lineno, pos, :atom)
+function parseatom(text::AbstractString, pos::Integer; filename="none", lineno=1, mod = nothing)
+    return _parse_string(text, String(filename), lineno, pos, :atom, parser_for_module(mod))
 end
 
-function parseall(text::AbstractString; filename="none", lineno=1)
-    ex,_ = _parse_string(text, String(filename), lineno, 1, :all)
+function parseall(text::AbstractString; filename="none", lineno=1, mod = nothing)
+    ex,_ = _parse_string(text, String(filename), lineno, 1, :all, parser_for_module(mod))
     return ex
 end
 
@@ -364,11 +485,29 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
         x.edges .+= slot_offset
         return x
     end
+    if isa(x, Core.UpsilonNode)
+        if !isdefined(x, :val)
+            return x
+        end
+        return Core.UpsilonNode(
+            _partially_inline!(x.val, slot_replacements, type_signature, static_param_values,
+                               slot_offset, statement_offset, boundscheck),
+        )
+    end
+    if isa(x, Core.PhiCNode)
+        _partially_inline!(x.values, slot_replacements, type_signature, static_param_values,
+                           slot_offset, statement_offset, boundscheck)
+    end
     if isa(x, Core.ReturnNode)
+       # Unreachable doesn't have val defined
+       if !isdefined(x, :val)
+          return x
+       else
         return Core.ReturnNode(
             _partially_inline!(x.val, slot_replacements, type_signature, static_param_values,
                                slot_offset, statement_offset, boundscheck),
         )
+       end
     end
     if isa(x, Core.GotoIfNot)
         return Core.GotoIfNot(
@@ -377,6 +516,12 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
             x.dest + statement_offset,
         )
     end
+    if isa(x, Core.EnterNode)
+        if x.catch_dest == 0
+            return x
+        end
+        return Core.EnterNode(x, x.catch_dest + statement_offset)
+    end
     if isa(x, Expr)
         head = x.head
         if head === :static_parameter
@@ -403,7 +548,7 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
                 elseif i == 4
                     @assert isa(x.args[4], Int)
                 elseif i == 5
-                    @assert isa((x.args[5]::QuoteNode).value, Union{Symbol, Tuple{Symbol, UInt8}})
+                    @assert isa((x.args[5]::QuoteNode).value, Union{Symbol, Tuple{Symbol, UInt16, Bool}})
                 else
                     x.args[i] = _partially_inline!(x.args[i], slot_replacements,
                                                    type_signature, static_param_values,
@@ -424,8 +569,6 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
                                            static_param_values, slot_offset,
                                            statement_offset, boundscheck)
             x.args[2] += statement_offset
-        elseif head === :enter
-            x.args[1] += statement_offset
         elseif head === :isdefined
             arg = x.args[1]
             # inlining a QuoteNode or literal into `Expr(:isdefined, x)` is invalid, replace with true
@@ -450,7 +593,7 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
                 @assert isa(arg, Union{GlobalRef, Symbol})
                 return x
             end
-        elseif !Core.Compiler.is_meta_expr_head(head)
+        elseif !Base.is_meta_expr_head(head)
             partially_inline!(x.args, slot_replacements, type_signature, static_param_values,
                               slot_offset, statement_offset, boundscheck)
         end
@@ -460,4 +603,63 @@ end
 
 _instantiate_type_in_env(x, spsig, spvals) = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), x, spsig, spvals)
 
+"""
+    Meta.unblock(expr)
+
+Peel away redundant block expressions.
+
+Specifically, the following expressions are stripped by this function:
+- `:block` expressions with a single non-line-number argument.
+- Pairs of `:var"hygienic-scope"` / `:escape` expressions.
+"""
+function unblock(@nospecialize ex)
+    while isexpr(ex, :var"hygienic-scope")
+        isexpr(ex.args[1], :escape) || break
+        ex = ex.args[1].args[1]
+    end
+    isexpr(ex, :block) || return ex
+    exs = filter(ex -> !(isa(ex, LineNumberNode) || isexpr(ex, :line)), ex.args)
+    length(exs) == 1 || return ex
+    return unblock(exs[1])
+end
+
+"""
+    Meta.unescape(expr)
+
+Peel away `:escape` expressions and redundant block expressions (see
+[`unblock`](@ref)).
+"""
+function unescape(@nospecialize ex)
+    ex = unblock(ex)
+    while isexpr(ex, :escape) || isexpr(ex, :var"hygienic-scope")
+       ex = unblock(ex.args[1])
+    end
+    return ex
+end
+
+"""
+    Meta.reescape(unescaped_expr, original_expr)
+
+Re-wrap `unescaped_expr` with the same level of escaping as `original_expr` had.
+This is the inverse operation of [`unescape`](@ref) - if the original expression
+was escaped, the unescaped expression is wrapped in `:escape` again.
+"""
+function reescape(@nospecialize(unescaped_expr), @nospecialize(original_expr))
+    if isexpr(original_expr, :escape)
+        return reescape(Expr(:escape, unescaped_expr), original_expr.args[1])
+    elseif isexpr(original_expr, :var"hygienic-scope")
+        next, ctx... = original_expr.args
+        return reescape(Expr(:var"hygienic-scope", unescaped_expr, ctx...), next)
+    else
+        return unescaped_expr
+    end
+end
+
+"""
+    Meta.uncurly(expr)
+
+Turn `T{P...}` into just `T`.
+"""
+uncurly(@nospecialize ex) = isexpr(ex, :curly) ? ex.args[1] : ex
+
 end # module
diff --git a/base/methodshow.jl b/base/methodshow.jl
index 0eb99dc88303f..1cf9195ddfebb 100644
--- a/base/methodshow.jl
+++ b/base/methodshow.jl
@@ -78,10 +78,10 @@ end
 
 # NOTE: second argument is deprecated and is no longer used
 function kwarg_decl(m::Method, kwtype = nothing)
-    if m.sig !== Tuple # OpaqueClosure or Builtin
+    if !(m.sig === Tuple || m.sig <: Tuple{Core.Builtin, Vararg}) # OpaqueClosure or Builtin
         kwtype = typeof(Core.kwcall)
         sig = rewrap_unionall(Tuple{kwtype, NamedTuple, (unwrap_unionall(m.sig)::DataType).parameters...}, m.sig)
-        kwli = ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), kwtype.name.mt, sig, get_world_counter())
+        kwli = ccall(:jl_methtable_lookup, Any, (Any, UInt), sig, get_world_counter())
         if kwli !== nothing
             kwli = kwli::Method
             slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), kwli.slot_syms)
@@ -131,13 +131,17 @@ function fixup_stdlib_path(path::String)
     # The file defining Base.Sys gets included after this file is included so make sure
     # this function is valid even in this intermediary state
     if isdefined(@__MODULE__, :Sys)
-        BUILD_STDLIB_PATH = Sys.BUILD_STDLIB_PATH::String
-        STDLIB = Sys.STDLIB::String
-        if BUILD_STDLIB_PATH != STDLIB
+        if Sys.BUILD_STDLIB_PATH != Sys.STDLIB
             # BUILD_STDLIB_PATH gets defined in sysinfo.jl
             npath = normpath(path)
-            npath′ = replace(npath, normpath(BUILD_STDLIB_PATH) => normpath(STDLIB))
-            return npath == npath′ ? path : npath′
+            npath′ = replace(npath, normpath(Sys.BUILD_STDLIB_PATH) => normpath(Sys.STDLIB))
+            path = npath == npath′ ? path : npath′
+        end
+        if isdefined(@__MODULE__, :Core) && isdefined(Core, :Compiler)
+            compiler_folder = dirname(String(Base.moduleloc(Core.Compiler).file))
+            if dirname(path) == compiler_folder
+                return abspath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "Compiler", "src", basename(path))
+            end
         end
     end
     return path
@@ -177,6 +181,7 @@ end
 Return a tuple `(filename,line)` giving the location of a generic `Function` definition.
 """
 functionloc(@nospecialize(f), @nospecialize(types)) = functionloc(which(f,types))
+functionloc(@nospecialize(argtypes::Union{Tuple, Type{<:Tuple}})) = functionloc(which(argtypes))
 
 function functionloc(@nospecialize(f))
     mt = methods(f)
@@ -210,10 +215,11 @@ show(io::IO, m::Method; kwargs...) = show_method(IOContext(io, :compact=>true),
 
 show(io::IO, ::MIME"text/plain", m::Method; kwargs...) = show_method(io, m; kwargs...)
 
-function show_method(io::IO, m::Method; modulecolor = :light_black, digit_align_width = 1)
+function show_method(io::IO, m::Method;
+                     modulecolor = :light_black, digit_align_width = 1,
+                     print_signature_only::Bool = get(io, :print_method_signature_only, false)::Bool)
     tv, decls, file, line = arg_decl_parts(m)
-    sig = unwrap_unionall(m.sig)
-    if sig === Tuple
+    if m.sig <: Tuple{Core.Builtin, Vararg}
         # Builtin
         print(io, m.name, "(...)")
         file = "none"
@@ -246,19 +252,21 @@ function show_method(io::IO, m::Method; modulecolor = :light_black, digit_align_
         show_method_params(io, tv)
     end
 
-    if !(get(io, :compact, false)::Bool) # single-line mode
-        println(io)
-        digit_align_width += 4
+    if !print_signature_only
+        if !(get(io, :compact, false)::Bool) # single-line mode
+            println(io)
+            digit_align_width += 4
+        end
+        # module & file, re-using function from errorshow.jl
+        print_module_path_file(io, parentmodule(m), string(file), line; modulecolor, digit_align_width)
     end
-    # module & file, re-using function from errorshow.jl
-    print_module_path_file(io, parentmodule(m), string(file), line; modulecolor, digit_align_width)
 end
 
 function show_method_list_header(io::IO, ms::MethodList, namefmt::Function)
-    mt = ms.mt
-    name = mt.name
-    hasname = isdefined(mt.module, name) &&
-              typeof(getfield(mt.module, name)) <: Function
+    tn = ms.tn
+    name = tn.singletonname
+    hasname = isdefined(tn.module, name) &&
+              typeof(getfield(tn.module, name)) <: Function
     n = length(ms)
     m = n==1 ? "method" : "methods"
     print(io, "# $n $m")
@@ -267,18 +275,18 @@ function show_method_list_header(io::IO, ms::MethodList, namefmt::Function)
     if hasname
         what = (startswith(sname, '@') ?
                     "macro"
-               : mt.module === Core && mt.defs isa Core.TypeMapEntry && (mt.defs.func::Method).sig === Tuple ?
+               : tn.module === Core && tn.wrapper <: Core.Builtin ?
                     "builtin function"
                : # else
                     "generic function")
         print(io, " for ", what, " ", namedisplay, " from ")
 
-        col = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, parentmodule_before_main(ms.mt.module))
+        col = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, parentmodule_before_main(tn.module))
 
-        printstyled(io, ms.mt.module, color=col)
+        printstyled(io, tn.module, color=col)
     elseif '#' in sname
         print(io, " for anonymous function ", namedisplay)
-    elseif mt === _TYPE_NAME.mt
+    elseif tn === _TYPE_NAME || iskindtype(tn.wrapper)
         print(io, " for type constructor")
     else
         print(io, " for callable object")
@@ -286,11 +294,36 @@ function show_method_list_header(io::IO, ms::MethodList, namefmt::Function)
     !iszero(n) && print(io, ":")
 end
 
+# Determine the `modulecolor` value to pass to `show_method`
+function _modulecolor(method::Method)
+    mmt = get_methodtable(method)
+    # TODO: this looks like a buggy bit of internal hacking, so disable for now
+    return nothing
+    if mmt === nothing || mmt.module === parentmodule(method)
+        return nothing
+    end
+    # `mmt` is only particularly relevant for external method tables. Since the primary
+    # method table is shared, we now need to distinguish "primary" methods by trying to
+    # check if there is a primary `DataType` to identify it with. c.f. how `jl_method_def`
+    # would derive this same information (for the name).
+    ft = argument_datatype((unwrap_unionall(method.sig)::DataType).parameters[1])
+    # `ft` should be the type associated with the first argument in the method signature.
+    # If it's `Type`, try to unwrap it again.
+    if isType(ft)
+        ft = argument_datatype(ft.parameters[1])
+    end
+    if ft === nothing || parentmodule(method) === parentmodule(ft) !== Core
+        return nothing
+    end
+    m = parentmodule_before_main(method)
+    return get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
+end
+
 function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=true)
-    mt = ms.mt
-    name = mt.name
-    hasname = isdefined(mt.module, name) &&
-              typeof(getfield(mt.module, name)) <: Function
+    tn = ms.tn
+    name = tn.singletonname
+    hasname = isdefined(tn.module, name) &&
+              typeof(getfield(tn.module, name)) <: Function
     if header
         show_method_list_header(io, ms, str -> "\""*str*"\"")
     end
@@ -300,12 +333,6 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
     last_shown_line_infos = get(io, :last_shown_line_infos, nothing)
     last_shown_line_infos === nothing || empty!(last_shown_line_infos)
 
-    modul = if mt === _TYPE_NAME.mt && length(ms) > 0 # type constructor
-            which(ms.ms[1].module, ms.ms[1].name)
-        else
-            mt.module
-        end
-
     digit_align_width = length(string(max > 0 ? max : length(ms)))
 
     for meth in ms
@@ -315,13 +342,7 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
 
             print(io, " ", lpad("[$n]", digit_align_width + 2), " ")
 
-            modulecolor = if parentmodule(meth) == modul
-                nothing
-            else
-                m = parentmodule_before_main(meth)
-                get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
-            end
-            show_method(io, meth; modulecolor)
+            show_method(io, meth; modulecolor=_modulecolor(meth))
 
             file, line = updated_methodloc(meth)
             if last_shown_line_infos !== nothing
@@ -348,7 +369,7 @@ end
 
 show(io::IO, ms::MethodList) = show_method_table(io, ms)
 show(io::IO, ::MIME"text/plain", ms::MethodList) = show_method_table(io, ms)
-show(io::IO, mt::Core.MethodTable) = show_method_table(io, MethodList(mt))
+show(io::IO, mt::Core.MethodTable) = print(io, mt.module, ".", mt.name, " is a Core.MethodTable with ", length(mt), " methods.")
 
 function inbase(m::Module)
     if m == Base
@@ -367,7 +388,6 @@ function url(m::Method)
     line = m.line
     line <= 0 || occursin(r"In\[[0-9]+\]"a, file) && return ""
     Sys.iswindows() && (file = replace(file, '\\' => '/'))
-    libgit2_id = PkgId(UUID((0x76f85450_5226_5b5a,0x8eaa_529ad045b433)), "LibGit2")
     if inbase(M)
         if isempty(Base.GIT_VERSION_INFO.commit)
             # this url will only work if we're on a tagged release
@@ -375,36 +395,38 @@ function url(m::Method)
         else
             return "https://github.com/JuliaLang/julia/tree/$(Base.GIT_VERSION_INFO.commit)/base/$file#L$line"
         end
-    elseif root_module_exists(libgit2_id)
-        LibGit2 = root_module(libgit2_id)
+    end
+    libgit2_id = PkgId(UUID((0x76f85450_5226_5b5a,0x8eaa_529ad045b433)), "LibGit2")
+    LibGit2 = maybe_root_module(libgit2_id)
+    if LibGit2 isa Module
         try
             d = dirname(file)
-            return LibGit2.with(LibGit2.GitRepoExt(d)) do repo
-                LibGit2.with(LibGit2.GitConfig(repo)) do cfg
-                    u = LibGit2.get(cfg, "remote.origin.url", "")
-                    u = (match(LibGit2.GITHUB_REGEX,u)::AbstractMatch).captures[1]
-                    commit = string(LibGit2.head_oid(repo))
-                    root = LibGit2.path(repo)
-                    if startswith(file, root) || startswith(realpath(file), root)
-                        "https://github.com/$u/tree/$commit/"*file[length(root)+1:end]*"#L$line"
-                    else
-                        fileurl(file)
+            return let file = file
+                LibGit2.with(LibGit2.GitRepoExt(d)) do repo
+                    LibGit2.with(LibGit2.GitConfig(repo)) do cfg
+                        u = LibGit2.get(cfg, "remote.origin.url", "")
+                        u = (match(LibGit2.GITHUB_REGEX,u)::AbstractMatch).captures[1]
+                        commit = string(LibGit2.head_oid(repo))
+                        root = LibGit2.path(repo)
+                        if startswith(file, root) || startswith(realpath(file), root)
+                            "https://github.com/$u/tree/$commit/"*file[length(root)+1:end]*"#L$line"
+                        else
+                            fileurl(file)
+                        end
                     end
                 end
-            end
+            end::String
         catch
-            return fileurl(file)
+            # oops, this was a bad idea
         end
-    else
-        return fileurl(file)
     end
+    return fileurl(file)
 end
 
 function show(io::IO, ::MIME"text/html", m::Method)
     tv, decls, file, line = arg_decl_parts(m, true)
     sig = unwrap_unionall(m.sig)
-    if sig === Tuple
-        # Builtin
+    if sig <: Tuple{Core.Builtin, Vararg}
         print(io, m.name, "(...) in ", parentmodule(m))
         return
     end
@@ -443,7 +465,6 @@ function show(io::IO, ::MIME"text/html", m::Method)
 end
 
 function show(io::IO, mime::MIME"text/html", ms::MethodList)
-    mt = ms.mt
     show_method_list_header(io, ms, str -> "<b>"*str*"</b>")
     print(io, "<ul>")
     for meth in ms
@@ -454,8 +475,6 @@ function show(io::IO, mime::MIME"text/html", ms::MethodList)
     print(io, "</ul>")
 end
 
-show(io::IO, mime::MIME"text/html", mt::Core.MethodTable) = show(io, mime, MethodList(mt))
-
 # pretty-printing of AbstractVector{Method}
 function show(io::IO, mime::MIME"text/plain", mt::AbstractVector{Method})
     last_shown_line_infos = get(io, :last_shown_line_infos, nothing)
diff --git a/base/missing.jl b/base/missing.jl
index f6f5fe507260b..f3f7276855e5d 100644
--- a/base/missing.jl
+++ b/base/missing.jl
@@ -36,7 +36,7 @@ Any
 !!! compat "Julia 1.3"
     This function is exported as of Julia 1.3.
 """
-nonmissingtype(::Type{T}) where {T} = typesplit(T, Missing)
+nonmissingtype(@nospecialize(T::Type)) = typesplit(T, Missing)
 
 function nonmissingtype_checked(T::Type)
     R = nonmissingtype(T)
@@ -86,6 +86,8 @@ isequal(::Any, ::Missing) = false
 isless(::Missing, ::Missing) = false
 isless(::Missing, ::Any) = false
 isless(::Any, ::Missing) = true
+ispositive(::Missing) = missing
+isnegative(::Missing) = missing
 isapprox(::Missing, ::Missing; kwargs...) = missing
 isapprox(::Missing, ::Any; kwargs...) = missing
 isapprox(::Any, ::Missing; kwargs...) = missing
@@ -100,19 +102,14 @@ for f in (:(!), :(~), :(+), :(-), :(*), :(&), :(|), :(xor),
           :(real), :(imag), :(sign), :(inv))
     @eval ($f)(::Missing) = missing
 end
-for f in (:(Base.zero), :(Base.one), :(Base.oneunit))
+for f in (:zero, :one, :oneunit)
+    @eval ($f)(::Type{Any}) = throw(MethodError($f, (Any,)))  # To prevent StackOverflowError
     @eval ($f)(::Type{Missing}) = missing
-    @eval function $(f)(::Type{Union{T, Missing}}) where T
-        T === Any && throw(MethodError($f, (Any,)))  # To prevent StackOverflowError
-        $f(T)
-    end
+    @eval ($f)(::Type{T}) where {T>:Missing} = $f(nonmissingtype_checked(T))
 end
-for f in (:(Base.float), :(Base.complex))
-    @eval $f(::Type{Missing}) = Missing
-    @eval function $f(::Type{Union{T, Missing}}) where T
-        T === Any && throw(MethodError($f, (Any,)))  # To prevent StackOverflowError
-        Union{$f(T), Missing}
-    end
+for f in (:float, :real, :complex)
+    @eval ($f)(::Type{Any}) = throw(MethodError($f, (Any,)))  # To prevent StackOverflowError
+    @eval ($f)(::Type{T}) where {T>:Missing} = Union{$f(nonmissingtype(T)), Missing}
 end
 
 # Binary operators/functions
@@ -135,6 +132,7 @@ min(::Any,     ::Missing) = missing
 max(::Missing, ::Missing) = missing
 max(::Missing, ::Any)     = missing
 max(::Any,     ::Missing) = missing
+clamp(::Missing, lo, hi) = missing
 
 missing_conversion_msg(@nospecialize T) =
     LazyString("cannot convert a missing value to type ", T, ": use Union{", T, ", Missing} instead")
@@ -146,21 +144,10 @@ round(::Type{T}, ::Missing, ::RoundingMode=RoundNearest) where {T} =
     throw(MissingException(missing_conversion_msg(T)))
 round(::Type{T}, x::Any, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
 # to fix ambiguities
+round(::Type{T}, x::Real, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
 round(::Type{T}, x::Rational{Tr}, r::RoundingMode=RoundNearest) where {T>:Missing,Tr} = round(nonmissingtype_checked(T), x, r)
 round(::Type{T}, x::Rational{Bool}, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
 
-# Handle ceil, floor, and trunc separately as they have no RoundingMode argument
-for f in (:(ceil), :(floor), :(trunc))
-    @eval begin
-        ($f)(::Missing; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing
-        ($f)(::Type{>:Missing}, ::Missing) = missing
-        ($f)(::Type{T}, ::Missing) where {T} = throw(MissingException(missing_conversion_msg(T)))
-        ($f)(::Type{T}, x::Any) where {T>:Missing} = $f(nonmissingtype_checked(T), x)
-        # to fix ambiguities
-        ($f)(::Type{T}, x::Rational) where {T>:Missing} = $f(nonmissingtype_checked(T), x)
-    end
-end
-
 # to avoid ambiguity warnings
 (^)(::Missing, ::Integer) = missing
 
@@ -252,7 +239,7 @@ function iterate(itr::SkipMissing, state...)
     y = iterate(itr.x, state...)
     y === nothing && return nothing
     item, state = y
-    while item === missing
+    while ismissing(item)
         y = iterate(itr.x, state)
         y === nothing && return nothing
         item, state = y
@@ -262,12 +249,12 @@ end
 
 IndexStyle(::Type{<:SkipMissing{T}}) where {T} = IndexStyle(T)
 eachindex(itr::SkipMissing) =
-    Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, eachindex(itr.x))
+    Iterators.filter(i -> !ismissing(@inbounds(itr.x[i])), eachindex(itr.x))
 keys(itr::SkipMissing) =
-    Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, keys(itr.x))
+    Iterators.filter(i -> !ismissing(@inbounds(itr.x[i])), keys(itr.x))
 @propagate_inbounds function getindex(itr::SkipMissing, I...)
     v = itr.x[I...]
-    v === missing && throw(MissingException(LazyString("the value at index ", I, " is missing")))
+    ismissing(v) && throw(MissingException(LazyString("the value at index ", I, " is missing")))
     v
 end
 
@@ -291,18 +278,18 @@ function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
     ilast = last(inds)
     for outer i in i:ilast
         @inbounds ai = A[i]
-        ai !== missing && break
+        !ismissing(ai) && break
     end
-    ai === missing && return mapreduce_empty(f, op, eltype(itr))
+    ismissing(ai) && return mapreduce_empty(f, op, eltype(itr))
     a1::eltype(itr) = ai
     i == typemax(typeof(i)) && return mapreduce_first(f, op, a1)
     i += 1
     ai = missing
     for outer i in i:ilast
         @inbounds ai = A[i]
-        ai !== missing && break
+        !ismissing(ai) && break
     end
-    ai === missing && return mapreduce_first(f, op, a1)
+    ismissing(ai) && return mapreduce_first(f, op, a1)
     # We know A contains at least two non-missing entries: the result cannot be nothing
     something(mapreduce_impl(f, op, itr, first(inds), last(inds)))
 end
@@ -320,7 +307,7 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
         return nothing
     elseif ifirst == ilast
         @inbounds a1 = A[ifirst]
-        if a1 === missing
+        if ismissing(a1)
             return nothing
         else
             return Some(mapreduce_first(f, op, a1))
@@ -331,25 +318,25 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
         i = ifirst
         for outer i in i:ilast
             @inbounds ai = A[i]
-            ai !== missing && break
+            !ismissing(ai) && break
         end
-        ai === missing && return nothing
+        ismissing(ai) && return nothing
         a1 = ai::eltype(itr)
         i == typemax(typeof(i)) && return Some(mapreduce_first(f, op, a1))
         i += 1
         ai = missing
         for outer i in i:ilast
             @inbounds ai = A[i]
-            ai !== missing && break
+            !ismissing(ai) && break
         end
-        ai === missing && return Some(mapreduce_first(f, op, a1))
+        ismissing(ai) && return Some(mapreduce_first(f, op, a1))
         a2 = ai::eltype(itr)
         i == typemax(typeof(i)) && return Some(op(f(a1), f(a2)))
         i += 1
         v = op(f(a1), f(a2))
         @simd for i = i:ilast
             @inbounds ai = A[i]
-            if ai !== missing
+            if !ismissing(ai)
                 v = op(v, f(ai))
             end
         end
@@ -395,7 +382,7 @@ julia> filter(isodd, skipmissing(x))
 function filter(f, itr::SkipMissing{<:AbstractArray})
     y = similar(itr.x, eltype(itr), 0)
     for xi in itr.x
-        if xi !== missing && f(xi)
+        if !ismissing(xi) && f(xi)
             push!(y, xi)
         end
     end
@@ -461,7 +448,7 @@ ERROR: `b` is still missing
 macro coalesce(args...)
     expr = :(missing)
     for arg in reverse(args)
-        expr = :((val = $arg) !== missing ? val : $expr)
+        expr = :(!ismissing((val = $(esc(arg));)) ? val : $expr)
     end
-    return esc(:(let val; $expr; end))
+    return :(let val; $expr; end)
 end
diff --git a/base/module.jl b/base/module.jl
new file mode 100644
index 0000000000000..9cf74f81fccae
--- /dev/null
+++ b/base/module.jl
@@ -0,0 +1,143 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Full-featured versions of _eval_import and _eval_using
+
+for m in methods(_eval_import)
+    delete_method(m)
+end
+for m in methods(_eval_using)
+    delete_method(m)
+end
+
+function eval_import_path(at::Module, from::Union{Module, Nothing}, path::Expr, keyword::String)
+    isempty(path.args) && error("malformed import statement")
+
+    i::Int = 1
+    function next!()
+        local v
+        i <= length(path.args) || error("invalid module path")
+        v = path.args[i]
+        i += 1
+        v isa Symbol || throw(TypeError(Symbol(keyword), "", Symbol, v))
+        v
+    end
+    v = next!()
+    m = nothing
+
+    if from !== nothing
+        m = from
+    elseif v !== :.
+        # `A.B`: call the loader to obtain the root A in the current environment.
+        if v === :Core
+            m = Core
+        elseif v === :Base
+            m = Base
+        else
+            m = require(at, v)
+            m isa Module || error("failed to load module $v")
+        end
+        i > lastindex(path.args) && return m, nothing
+        v = next!()
+    else
+        # `.A.B.C`: strip off leading dots by following parent links
+        m = at
+        while (v = next!()) === :.
+            m = parentmodule(m)
+        end
+    end
+
+    while true
+        v === :. && error("invalid $keyword path: \".\" in identifier path")
+        i > lastindex(path.args) && break
+        m = getglobal(m, v)
+        m isa Module || error("invalid $keyword path: \"$v\" does not name a module")
+        v = next!()
+    end
+    m, v
+end
+
+function eval_import_path_all(at::Module, path::Expr, keyword::String)
+    m, v = eval_import_path(at, nothing, path, keyword)
+    if v !== nothing
+        m = getglobal(m, v)
+        m isa Module || error("invalid $keyword path: \"$v\" does not name a module")
+    end
+    m
+end
+
+function check_macro_rename(from::Symbol, to::Symbol, keyword::String)
+    c1(sym) = bitcast(Char, UInt32(unsafe_load(unsafe_convert(Ptr{UInt8}, sym))) << 24)
+    from_c, to_c = c1(from), c1(to)
+    if from_c == '@' && to_c != '@'
+        error("cannot rename macro \"$from\" to non-macro \"$to\" in \"$keyword\"")
+    end
+    if from_c != '@' && to_c == '@'
+        error("cannot rename non-macro \"$from\" to macro \"$to\" in \"$keyword\"")
+    end
+end
+
+"""
+    _eval_import(imported::Bool, to::Module, from::Union{Expr, Nothing}, paths::Expr...)
+
+Evaluate the import paths, calling `Core._import` for each name to be imported.
+`imported` imports are created with `import`, `using A: x` sets this to false.
+The `from` is the part of the import path before the `:`.  This is the lowered
+form of `import`, `import ...:`, and `using ...:`.
+
+```
+import A             => _eval_import(true,  Main, nothing,          Expr(:., :A))
+import A.b           => _eval_import(true,  Main, nothing,          Expr(:., :A, :b))
+import A.b as c      => _eval_import(true,  Main, nothing,          Expr(:as, Expr(:., :A, :b), :c))
+import A.B: C.d, e   => _eval_import(true,  Main, Expr(:., :A, :B), Expr(:., :C, :d), Expr(:., :e))
+import A.B: C.d as e => _eval_import(true,  Main, Expr(:., :A, :B), Expr(:as, Expr(:., :C, :d), :e))
+using  A.B: C.d, e   => _eval_import(false, Main, Expr(:., :A, :B), Expr(:., :C, :d), Expr(:., :e))
+
+See also [`_import`](@ref Core._import).
+```
+"""
+function _eval_import(imported::Bool, to::Module, from::Union{Expr, Nothing}, paths::Expr...)
+    keyword = imported ? "import" : "using"
+    fail() = error("malformed \"$keyword\" statement")
+    from = from !== nothing ? eval_import_path_all(to, from, keyword) : nothing
+
+    for path in paths
+        path isa Expr || fail()
+        asname = nothing
+        if path.head === :as && length(path.args) == 2
+            path, asname = path.args
+        elseif path.head !== :.
+            fail()
+        end
+        m, name = eval_import_path(to, from, path, keyword)
+
+        if name !== nothing
+            asname = asname === nothing ? name : asname
+            check_macro_rename(name, asname, keyword)
+            Core._import(to, m, asname, name, imported)
+        else
+            Core._import(to, m, asname === nothing ? nameof(m) : asname)
+        end
+    end
+end
+
+"""
+    _eval_using(to::Module, path::Expr)
+
+Evaluate the import path to a module and call [`Core._using`](@ref) on it,
+making its exports available to the `to` module; this is the lowered form of
+`using A`.
+
+```
+using A.B            => _module_using(Main, Expr(:., :A, :B))
+```
+
+See also [`_using`](@ref Core._using).
+"""
+function _eval_using(to::Module, path::Expr, flags::UInt8=UInt8(0))
+    from = eval_import_path_all(to, path, "using")
+    Core._using(to, from, flags)
+    is_package = length(path.args) == 1 && path.args[1] !== :.
+    if to == Main && is_package
+        Core._import(to, from, nameof(from))
+    end
+end
diff --git a/base/mpfr.jl b/base/mpfr.jl
index 2e03018f7669f..f17ac72c7e198 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -16,14 +16,20 @@ import
         cosh, sinh, tanh, sech, csch, coth, acosh, asinh, atanh, lerpi,
         cbrt, typemax, typemin, unsafe_trunc, floatmin, floatmax, rounding,
         setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
-        isone, big, _string_n, decompose, minmax,
-        sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand
+        isone, big, _string_n, decompose, minmax, _precision_with_base_2,
+        sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
+        uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
+        ispositive, isnegative
 
+import .Core: AbstractFloat
+import .Base: Rational, Float16, Float32, Float64, Bool
 
 using .Base.Libc
-import ..Rounding: rounding_raw, setrounding_raw
+import ..Rounding: Rounding,
+    rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero,
+    tie_breaker_is_to_even, correct_rounding_requires_increment
 
-import ..GMP: ClongMax, CulongMax, CdoubleMax, Limb, libgmp
+import ..GMP: ClongMax, CulongMax, CdoubleMax, Limb, libgmp, BigInt
 
 import ..FastMath.sincos_fast
 
@@ -35,7 +41,6 @@ else
     const libmpfr = "libmpfr.so.6"
 end
 
-
 version() = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Ptr{Cchar}, ())))
 patches() = split(unsafe_string(ccall((:mpfr_get_patches,libmpfr), Ptr{Cchar}, ())),' ')
 
@@ -89,65 +94,168 @@ function convert(::Type{RoundingMode}, r::MPFRRoundingMode)
     end
 end
 
+rounds_to_nearest(m::MPFRRoundingMode) = m == MPFRRoundNearest
+function rounds_away_from_zero(m::MPFRRoundingMode, sign_bit::Bool)
+    if m == MPFRRoundToZero
+        false
+    elseif m == MPFRRoundUp
+        !sign_bit
+    elseif m == MPFRRoundDown
+        sign_bit
+    else
+        # Assuming `m == MPFRRoundFromZero`
+        true
+    end
+end
+tie_breaker_is_to_even(::MPFRRoundingMode) = true
+
 const ROUNDING_MODE = Ref{MPFRRoundingMode}(MPFRRoundNearest)
+const CURRENT_ROUNDING_MODE = Base.ScopedValues.ScopedValue{MPFRRoundingMode}()
 const DEFAULT_PRECISION = Ref{Clong}(256)
-
+const CURRENT_PRECISION = Base.ScopedValues.ScopedValue{Clong}()
 # Basic type and initialization definitions
 
-"""
-    BigFloat <: AbstractFloat
+# Warning: the constants are MPFR implementation details from
+# `src/mpfr-impl.h`, search for `MPFR_EXP_ZERO`.
+const mpfr_special_exponent_zero = typemin(Clong) + true
+const mpfr_special_exponent_nan = mpfr_special_exponent_zero + true
+const mpfr_special_exponent_inf = mpfr_special_exponent_nan + true
 
-Arbitrary precision floating point number type.
-"""
-mutable struct BigFloat <: AbstractFloat
+struct BigFloatLayout
     prec::Clong
     sign::Cint
     exp::Clong
     d::Ptr{Limb}
-    # _d::Buffer{Limb} # Julia gc handle for memory @ d
-    _d::String # Julia gc handle for memory @ d (optimized)
+    # possible padding
+    p::Limb # Tuple{Vararg{Limb}}
+end
+const offset_prec = fieldoffset(BigFloatLayout, 1) % Int
+const offset_sign = fieldoffset(BigFloatLayout, 2) % Int
+const offset_exp = fieldoffset(BigFloatLayout, 3) % Int
+const offset_d = fieldoffset(BigFloatLayout, 4) % Int
+const offset_p_limbs = ((fieldoffset(BigFloatLayout, 5) % Int + sizeof(Limb) - 1) ÷ sizeof(Limb))
+const offset_p = offset_p_limbs * sizeof(Limb)
+
+"""
+    BigFloat <: AbstractFloat
+
+Arbitrary precision floating point number type.
+"""
+struct BigFloat <: AbstractFloat
+    d::Memory{Limb}
 
     # Not recommended for general use:
     # used internally by, e.g. deepcopy
-    global function _BigFloat(prec::Clong, sign::Cint, exp::Clong, d::String)
-        # ccall-based version, inlined below
-        #z = new(zero(Clong), zero(Cint), zero(Clong), C_NULL, d)
-        #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), d, prec) # currently seems to be a no-op in mpfr
-        #NAN_KIND = Cint(0)
-        #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, d)
-        #return z
-        return new(prec, sign, exp, pointer(d), d)
+    global function _BigFloat(d::Memory{Limb})
+        Base.unsafe_convert(Ref{BigFloat}, BigFloatData(d)) # force early initialization of pointer field of z.d
+        return new(d)
     end
 
-    function BigFloat(; precision::Integer=DEFAULT_PRECISION[])
+    function BigFloat(; precision::Integer=_precision_with_base_2(BigFloat))
         precision < 1 && throw(DomainError(precision, "`precision` cannot be less than 1."))
         nb = ccall((:mpfr_custom_get_size,libmpfr), Csize_t, (Clong,), precision)
-        nb = (nb + Core.sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
-        #d = Vector{Limb}(undef, nb)
-        d = _string_n(nb * Core.sizeof(Limb))
-        EXP_NAN = Clong(1) - Clong(typemax(Culong) >> 1)
-        return _BigFloat(Clong(precision), one(Cint), EXP_NAN, d) # +NAN
+        nl = (nb + offset_p + sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
+        d = Memory{Limb}(undef, nl % Int)
+        # ccall-based version, inlined below
+        #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), BigFloatData(d), prec) # currently seems to be a no-op in mpfr
+        #NAN_KIND = Cint(0)
+        #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, BigFloatData(d))
+        p = Base.unsafe_convert(Ptr{Limb}, d)
+        GC.@preserve d begin # initialize to +NAN
+            unsafe_store!(Ptr{Clong}(p) + offset_prec, Clong(precision))
+            unsafe_store!(Ptr{Cint}(p) + offset_sign, one(Cint))
+            unsafe_store!(Ptr{Clong}(p) + offset_exp, mpfr_special_exponent_nan)
+            unsafe_store!(Ptr{Ptr{Limb}}(p) + offset_d, p + offset_p)
+        end
+        return new(d)
+    end
+end
+
+"""
+Segment of raw words of bits interpreted as a big integer. Less
+significant words come first. Each word is in machine-native bit-order.
+"""
+struct BigFloatData{Limb}
+    d::Memory{Limb}
+end
+
+# BigFloat interface
+@inline function Base.getproperty(x::BigFloat, s::Symbol)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    if s === :prec
+        return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_prec)
+    elseif s === :sign
+        return GC.@preserve d unsafe_load(Ptr{Cint}(p) + offset_sign)
+    elseif s === :exp
+        return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_exp)
+    elseif s === :d
+        return BigFloatData(d)
+    else
+        return throw(FieldError(typeof(x), s))
+    end
+end
+
+# While BigFloat (like all Numbers) is considered immutable, for practical reasons
+# of writing the algorithms on it we allow mutating sign, exp, and the contents of d
+@inline function Base.setproperty!(x::BigFloat, s::Symbol, v)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    if s === :sign
+        return GC.@preserve d unsafe_store!(Ptr{Cint}(p) + offset_sign, v)
+    elseif s === :exp
+        return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_exp, v)
+    #elseif s === :d || s === :prec # not mutable
+    else
+        return throw(FieldError(x, s))
+    end
+end
+
+# Ref interface: make sure the conversion to C is done properly
+Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = error("not compatible with mpfr")
+Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat}) = error("not compatible with mpfr")
+Base.cconvert(::Type{Ref{BigFloat}}, x::BigFloat) = x.d # BigFloatData is the Ref type for BigFloat
+Base.cconvert(::Type{Ref{BigFloat}}, x::Number) = convert(BigFloat, x).d # avoid default conversion to Ref(BigFloat(x))
+Base.cconvert(::Type{Ref{BigFloat}}, x::Ref{BigFloat}) = x[].d
+function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::BigFloatData)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    dptrptr = Ptr{Ptr{Limb}}(p) + offset_d
+    dptr = p + offset_p
+    GC.@preserve d if unsafe_load(dptrptr, :monotonic) != dptr # make sure this pointer value was recomputed after any deserialization or copying
+        unsafe_store!(dptrptr, dptr, :monotonic) # :monotonic ensure that TSAN knows that this isn't a data race
     end
+    return Ptr{BigFloat}(p)
+end
+Base.unsafe_convert(::Type{Ptr{Limb}}, fd::BigFloatData) = Base.unsafe_convert(Ptr{Limb}, getfield(fd, :d)) + offset_p
+function Base.setindex!(fd::BigFloatData, v, i)
+    d = getfield(fd, :d)
+    @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i))
+    @inbounds d[i + offset_p_limbs] = v
+    return fd
+end
+function Base.getindex(fd::BigFloatData, i)
+    d = getfield(fd, :d)
+    @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i))
+    @inbounds d[i + offset_p_limbs]
 end
+Base.length(fd::BigFloatData) = length(getfield(fd, :d)) - offset_p_limbs
+Base.copyto!(fd::BigFloatData, limbs) = copyto!(getfield(fd, :d), offset_p_limbs + 1, limbs) # for Random
 
-rounding_raw(::Type{BigFloat}) = ROUNDING_MODE[]
+include("rawbigfloats.jl")
+
+rounding_raw(::Type{BigFloat}) = @something(Base.ScopedValues.get(CURRENT_ROUNDING_MODE), ROUNDING_MODE[])
 setrounding_raw(::Type{BigFloat}, r::MPFRRoundingMode) = ROUNDING_MODE[]=r
+function setrounding_raw(f::Function, ::Type{BigFloat}, r::MPFRRoundingMode)
+    Base.ScopedValues.@with(CURRENT_ROUNDING_MODE => r, f())
+end
 
 rounding(::Type{BigFloat}) = convert(RoundingMode, rounding_raw(BigFloat))
 setrounding(::Type{BigFloat}, r::RoundingMode) = setrounding_raw(BigFloat, convert(MPFRRoundingMode, r))
+setrounding(f::Function, ::Type{BigFloat}, r::RoundingMode) =
+    setrounding_raw(f, BigFloat, convert(MPFRRoundingMode, r))
 
 
-# overload the definition of unsafe_convert to ensure that `x.d` is assigned
-# it may have been dropped in the event that the BigFloat was serialized
-Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = x
-@inline function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat})
-    x = x[]
-    if x.d == C_NULL
-        x.d = pointer(x._d)
-    end
-    return convert(Ptr{BigFloat}, Base.pointer_from_objref(x))
-end
-
 """
     BigFloat(x::Union{Real, AbstractString} [, rounding::RoundingMode=rounding(BigFloat)]; [precision::Integer=precision(BigFloat)])
 
@@ -192,8 +300,8 @@ BigFloat(x, r::RoundingMode)
 widen(::Type{Float64}) = BigFloat
 widen(::Type{BigFloat}) = BigFloat
 
-function BigFloat(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
-    if precision == _precision(x)
+function BigFloat(x::BigFloat, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
+    if precision == _precision_with_base_2(x)
         return x
     else
         z = BigFloat(;precision=precision)
@@ -204,7 +312,7 @@ function BigFloat(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::I
 end
 
 function _duplicate(x::BigFloat)
-    z = BigFloat(;precision=_precision(x))
+    z = BigFloat(;precision=_precision_with_base_2(x))
     ccall((:mpfr_set, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Int32), z, x, 0)
     return z
 end
@@ -212,7 +320,7 @@ end
 # convert to BigFloat
 for (fJ, fC) in ((:si,:Clong), (:ui,:Culong))
     @eval begin
-        function BigFloat(x::($fC), r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
+        function BigFloat(x::($fC), r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
             z = BigFloat(;precision=precision)
             ccall(($(string(:mpfr_set_,fJ)), libmpfr), Int32, (Ref{BigFloat}, $fC, MPFRRoundingMode), z, x, r)
             return z
@@ -220,7 +328,7 @@ for (fJ, fC) in ((:si,:Clong), (:ui,:Culong))
     end
 end
 
-function BigFloat(x::Float64, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
+function BigFloat(x::Float64, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
     z = BigFloat(;precision)
     # punt on the hard case where we might have to deal with rounding
     # we could use this path in all cases, but mpfr_set_d has a lot of overhead.
@@ -234,11 +342,11 @@ function BigFloat(x::Float64, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::In
     z.sign = 1-2*signbit(x)
     if iszero(x) || !isfinite(x)
         if isinf(x)
-            z.exp = Clong(2) - typemax(Clong)
+            z.exp = mpfr_special_exponent_inf
         elseif isnan(x)
-            z.exp = Clong(1) - typemax(Clong)
+            z.exp = mpfr_special_exponent_nan
         else
-            z.exp = - typemax(Clong)
+            z.exp = mpfr_special_exponent_zero
         end
         return z
     end
@@ -248,56 +356,63 @@ function BigFloat(x::Float64, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::In
     nlimbs = (precision + 8*Core.sizeof(Limb) - 1) ÷ (8*Core.sizeof(Limb))
 
     # Limb is a CLong which is a UInt32 on windows (thank M$) which makes this more complicated and slower.
+    zd = z.d
     if Limb === UInt64
         for i in 1:nlimbs-1
-            unsafe_store!(z.d, 0x0, i)
+            @inbounds setindex!(zd, 0x0, i)
         end
-        unsafe_store!(z.d, val, nlimbs)
+        @inbounds setindex!(zd, val, nlimbs)
     else
         for i in 1:nlimbs-2
-            unsafe_store!(z.d, 0x0, i)
+            @inbounds setindex!(zd, 0x0, i)
         end
-        unsafe_store!(z.d, val % UInt32, nlimbs-1)
-        unsafe_store!(z.d, (val >> 32) % UInt32, nlimbs)
+        @inbounds setindex!(zd, val % UInt32, nlimbs-1)
+        @inbounds setindex!(zd, (val >> 32) % UInt32, nlimbs)
     end
     z
 end
 
-function BigFloat(x::BigInt, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
+function BigFloat(x::BigInt, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
     z = BigFloat(;precision=precision)
     ccall((:mpfr_set_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, r)
     return z
 end
 
-BigFloat(x::Integer; precision::Integer=DEFAULT_PRECISION[]) =
-    BigFloat(BigInt(x)::BigInt, ROUNDING_MODE[]; precision=precision)
-BigFloat(x::Integer, r::MPFRRoundingMode; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Integer; precision::Integer=_precision_with_base_2(BigFloat)) =
+    BigFloat(BigInt(x)::BigInt, rounding_raw(BigFloat); precision=precision)
+BigFloat(x::Integer, r::MPFRRoundingMode; precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(BigInt(x)::BigInt, r; precision=precision)
 
-BigFloat(x::Union{Bool,Int8,Int16,Int32}, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Union{Bool,Int8,Int16,Int32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(convert(Clong, x), r; precision=precision)
-BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(convert(Culong, x), r; precision=precision)
 
-BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(Float64(x), r; precision=precision)
 
-function BigFloat(x::Rational, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
+function BigFloat(x::Rational, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
+    r_den = _opposite_round(r)
     setprecision(BigFloat, precision) do
         setrounding_raw(BigFloat, r) do
-            BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x))::BigFloat
+            BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x), r_den)::BigFloat
         end
     end
 end
+function _opposite_round(r::MPFRRoundingMode)
+    r == MPFRRoundUp && return MPFRRoundDown
+    r == MPFRRoundDown && return MPFRRoundUp
+    return r
+end
 
-function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=DEFAULT_PRECISION[], rounding::MPFRRoundingMode=ROUNDING_MODE[])
+function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=_precision_with_base_2(BigFloat), rounding::MPFRRoundingMode=rounding_raw(BigFloat))
     !isempty(s) && isspace(s[end]) && return tryparse(BigFloat, rstrip(s), base = base)
     z = BigFloat(precision=precision)
     err = ccall((:mpfr_set_str, libmpfr), Int32, (Ref{BigFloat}, Cstring, Int32, MPFRRoundingMode), z, s, base, rounding)
     err == 0 ? z : nothing
 end
 
-BigFloat(x::AbstractString, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::AbstractString, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
     parse(BigFloat, x; precision=precision, rounding=r)
 
 Rational(x::BigFloat) = convert(Rational{BigInt}, x)
@@ -305,9 +420,9 @@ AbstractFloat(x::BigInt) = BigFloat(x)
 
 float(::Type{BigInt}) = BigFloat
 
-BigFloat(x::Real, r::RoundingMode; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Real, r::RoundingMode; precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(x, convert(MPFRRoundingMode, r); precision=precision)::BigFloat
-BigFloat(x::AbstractString, r::RoundingMode; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::AbstractString, r::RoundingMode; precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(x, convert(MPFRRoundingMode, r); precision=precision)
 
 ## BigFloat -> Integer
@@ -352,18 +467,15 @@ round(::Type{T}, x::BigFloat, r::RoundingMode) where T<:Union{Signed, Unsigned}
     invoke(round, Tuple{Type{<:Union{Signed, Unsigned}}, BigFloat, Union{RoundingMode, MPFRRoundingMode}}, T, x, r)
 round(::Type{BigInt}, x::BigFloat, r::RoundingMode) =
     invoke(round, Tuple{Type{BigInt}, BigFloat, Union{RoundingMode, MPFRRoundingMode}}, BigInt, x, r)
-round(::Type{<:Integer}, x::BigFloat, r::RoundingMode) = throw(MethodError(round, (Integer, x, r)))
 
 
 unsafe_trunc(::Type{T}, x::BigFloat) where {T<:Integer} = unsafe_trunc(T, _unchecked_cast(T, x, RoundToZero))
 unsafe_trunc(::Type{BigInt}, x::BigFloat) = _unchecked_cast(BigInt, x, RoundToZero)
 
-# TODO: Ideally the base fallbacks for these would already exist
-for (f, rnd) in zip((:trunc, :floor, :ceil, :round),
-                 (RoundToZero, RoundDown, RoundUp, :(ROUNDING_MODE[])))
-    @eval $f(::Type{T}, x::BigFloat) where T<:Union{Unsigned, Signed, BigInt} = round(T, x, $rnd)
-    @eval $f(::Type{Integer}, x::BigFloat) = $f(BigInt, x)
-end
+round(::Type{T}, x::BigFloat) where T<:Integer = round(T, x, rounding_raw(BigFloat))
+# these two methods are split to increase their precedence in disambiguation:
+round(::Type{Integer}, x::BigFloat, r::RoundingMode) = round(BigInt, x, r)
+round(::Type{Integer}, x::BigFloat, r::MPFRRoundingMode) = round(BigInt, x, r)
 
 function Bool(x::BigFloat)
     iszero(x) && return false
@@ -380,35 +492,69 @@ function (::Type{T})(x::BigFloat) where T<:Integer
     trunc(T,x)
 end
 
-## BigFloat -> AbstractFloat
-_cpynansgn(x::AbstractFloat, y::BigFloat) = isnan(x) && signbit(x) != signbit(y) ? -x : x
-
-Float64(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
-    _cpynansgn(ccall((:mpfr_get_d,libmpfr), Float64, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
-Float64(x::BigFloat, r::RoundingMode) = Float64(x, convert(MPFRRoundingMode, r))
-
-Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
-    _cpynansgn(ccall((:mpfr_get_flt,libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
-Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
-
-function Float16(x::BigFloat) :: Float16
-    res = Float32(x)
-    resi = reinterpret(UInt32, res)
-    if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal
-        #shift so that the mantissa lines up where it would for normal Float16
-        shift = 113-((resi & 0x7f800000)>>23)
-        if shift<23
-            resi |= 0x0080_0000 # set implicit bit
-            resi >>= shift
+function to_ieee754(::Type{T}, x::BigFloat, rm) where {T<:AbstractFloat}
+    sb = signbit(x)
+    is_zero = iszero(x)
+    is_inf = isinf(x)
+    is_nan = isnan(x)
+    is_regular = !is_zero & !is_inf & !is_nan
+    ieee_exp = Int(x.exp) - 1
+    ieee_precision = precision(T)
+    ieee_exp_max = exponent_max(T)
+    ieee_exp_min = exponent_min(T)
+    exp_diff = ieee_exp - ieee_exp_min
+    is_normal = 0 ≤ exp_diff
+    (rm_is_to_zero, rm_is_from_zero) = if rounds_to_nearest(rm)
+        (false, false)
+    else
+        let from = rounds_away_from_zero(rm, sb)
+            (!from, from)
         end
-    end
-    if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
-        # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
-        res = nextfloat(res, cmp(x, res))
-    end
-    return res
+    end::NTuple{2,Bool}
+    exp_is_huge_p = ieee_exp_max < ieee_exp
+    exp_is_huge_n = signbit(exp_diff + ieee_precision)
+    rounds_to_inf = is_regular & exp_is_huge_p & !rm_is_to_zero
+    rounds_to_zero = is_regular & exp_is_huge_n & !rm_is_from_zero
+    U = uinttype(T)
+
+    ret_u = if is_regular & !rounds_to_inf & !rounds_to_zero
+        if !exp_is_huge_p
+            # significand
+            v = x.d::BigFloatData
+            len = max(ieee_precision + min(exp_diff, 0), 0)::Int
+            signif = truncated(U, v, len) & significand_mask(T)
+
+            # round up if necessary
+            rh = BigFloatDataRoundingIncrementHelper(v, len)
+            incr = correct_rounding_requires_increment(rh, rm, sb)
+
+            # exponent
+            exp_field = max(exp_diff, 0) + is_normal
+
+            ieee754_representation(T, sb, exp_field, signif) + incr
+        else
+            ieee754_representation(T, sb, Val(:omega))
+        end
+    else
+        if is_zero | rounds_to_zero
+            ieee754_representation(T, sb, Val(:zero))
+        elseif is_inf | rounds_to_inf
+            ieee754_representation(T, sb, Val(:inf))
+        else
+            ieee754_representation(T, sb, Val(:nan))
+        end
+    end::U
+
+    reinterpret(T, ret_u)
 end
 
+Float16(x::BigFloat, r::MPFRRoundingMode=rounding_raw(BigFloat)) = to_ieee754(Float16, x, r)
+Float32(x::BigFloat, r::MPFRRoundingMode=rounding_raw(BigFloat)) = to_ieee754(Float32, x, r)
+Float64(x::BigFloat, r::MPFRRoundingMode=rounding_raw(BigFloat)) = to_ieee754(Float64, x, r)
+Float16(x::BigFloat, r::RoundingMode) = to_ieee754(Float16, x, r)
+Float32(x::BigFloat, r::RoundingMode) = to_ieee754(Float32, x, r)
+Float64(x::BigFloat, r::RoundingMode) = to_ieee754(Float64, x, r)
+
 promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat
 promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat
 promote_rule(::Type{BigFloat}, ::Type{<:AbstractFloat}) = BigFloat
@@ -431,14 +577,14 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # BigFloat
         function ($fJ)(x::BigFloat, y::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
             return z
         end
 
         # Unsigned Integer
         function ($fJ)(x::BigFloat, c::CulongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         ($fJ)(c::CulongMax, x::BigFloat) = ($fJ)(x,c)
@@ -446,7 +592,7 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # Signed Integer
         function ($fJ)(x::BigFloat, c::ClongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         ($fJ)(c::ClongMax, x::BigFloat) = ($fJ)(x,c)
@@ -454,7 +600,7 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # Float32/Float64
         function ($fJ)(x::BigFloat, c::CdoubleMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         ($fJ)(c::CdoubleMax, x::BigFloat) = ($fJ)(x,c)
@@ -462,7 +608,7 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # BigInt
         function ($fJ)(x::BigFloat, c::BigInt)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         ($fJ)(c::BigInt, x::BigFloat) = ($fJ)(x,c)
@@ -474,50 +620,50 @@ for (fJ, fC) in ((:-,:sub), (:/,:div))
         # BigFloat
         function ($fJ)(x::BigFloat, y::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
             return z
         end
 
         # Unsigned Int
         function ($fJ)(x::BigFloat, c::CulongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(c::CulongMax, x::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,:ui_,fC)), libmpfr), Int32, (Ref{BigFloat}, Culong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,:ui_,fC)), libmpfr), Int32, (Ref{BigFloat}, Culong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, rounding_raw(BigFloat))
             return z
         end
 
         # Signed Integer
         function ($fJ)(x::BigFloat, c::ClongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(c::ClongMax, x::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,:si_,fC)), libmpfr), Int32, (Ref{BigFloat}, Clong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,:si_,fC)), libmpfr), Int32, (Ref{BigFloat}, Clong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, rounding_raw(BigFloat))
             return z
         end
 
         # Float32/Float64
         function ($fJ)(x::BigFloat, c::CdoubleMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(c::CdoubleMax, x::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,:d_,fC)), libmpfr), Int32, (Ref{BigFloat}, Cdouble, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,:d_,fC)), libmpfr), Int32, (Ref{BigFloat}, Cdouble, Ref{BigFloat}, MPFRRoundingMode), z, c, x, rounding_raw(BigFloat))
             return z
         end
 
         # BigInt
         function ($fJ)(x::BigFloat, c::BigInt)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         # no :mpfr_z_div function
@@ -526,7 +672,7 @@ end
 
 function -(c::BigInt, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_z_sub, libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+    ccall((:mpfr_z_sub, libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, Ref{BigFloat}, MPFRRoundingMode), z, c, x, rounding_raw(BigFloat))
     return z
 end
 
@@ -534,7 +680,7 @@ inv(x::BigFloat) = one(Clong) / x # faster than fallback one(x)/x
 
 function fma(x::BigFloat, y::BigFloat, z::BigFloat)
     r = BigFloat()
-    ccall(("mpfr_fma",libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), r, x, y, z, ROUNDING_MODE[])
+    ccall(("mpfr_fma",libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), r, x, y, z, rounding_raw(BigFloat))
     return r
 end
 
@@ -605,23 +751,23 @@ for (fJ, fC, fI) in ((:+, :add, 0), (:*, :mul, 1))
     @eval begin
         function ($fJ)(a::BigFloat, b::BigFloat, c::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(a::BigFloat, b::BigFloat, c::BigFloat, d::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(a::BigFloat, b::BigFloat, c::BigFloat, d::BigFloat, e::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, e, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, e, rounding_raw(BigFloat))
             return z
         end
     end
@@ -629,14 +775,14 @@ end
 
 function -(x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_neg, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+    ccall((:mpfr_neg, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
     return z
 end
 
 function sqrt(x::BigFloat)
     isnan(x) && return x
     z = BigFloat()
-    ccall((:mpfr_sqrt, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+    ccall((:mpfr_sqrt, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
     isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
     return z
 end
@@ -645,25 +791,25 @@ sqrt(x::BigInt) = sqrt(BigFloat(x))
 
 function ^(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_pow, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
 function ^(x::BigFloat, y::CulongMax)
     z = BigFloat()
-    ccall((:mpfr_pow_ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow_ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
 function ^(x::BigFloat, y::ClongMax)
     z = BigFloat()
-    ccall((:mpfr_pow_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
 function ^(x::BigFloat, y::BigInt)
     z = BigFloat()
-    ccall((:mpfr_pow_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
@@ -673,7 +819,7 @@ end
 for f in (:exp, :exp2, :exp10, :expm1, :cosh, :sinh, :tanh, :sech, :csch, :coth, :cbrt)
     @eval function $f(x::BigFloat)
         z = BigFloat()
-        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
         return z
     end
 end
@@ -681,7 +827,7 @@ end
 function sincos_fast(v::BigFloat)
     s = BigFloat()
     c = BigFloat()
-    ccall((:mpfr_sin_cos, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), s, c, v, ROUNDING_MODE[])
+    ccall((:mpfr_sin_cos, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), s, c, v, rounding_raw(BigFloat))
     return (s, c)
 end
 sincos(v::BigFloat) = sincos_fast(v)
@@ -689,18 +835,18 @@ sincos(v::BigFloat) = sincos_fast(v)
 # return log(2)
 function big_ln2()
     c = BigFloat()
-    ccall((:mpfr_const_log2, libmpfr), Cint, (Ref{BigFloat}, MPFRRoundingMode), c, MPFR.ROUNDING_MODE[])
+    ccall((:mpfr_const_log2, libmpfr), Cint, (Ref{BigFloat}, MPFRRoundingMode), c, MPFR.rounding_raw(BigFloat))
     return c
 end
 
 function ldexp(x::BigFloat, n::Clong)
     z = BigFloat()
-    ccall((:mpfr_mul_2si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, n, ROUNDING_MODE[])
+    ccall((:mpfr_mul_2si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, n, rounding_raw(BigFloat))
     return z
 end
 function ldexp(x::BigFloat, n::Culong)
     z = BigFloat()
-    ccall((:mpfr_mul_2ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, n, ROUNDING_MODE[])
+    ccall((:mpfr_mul_2ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, n, rounding_raw(BigFloat))
     return z
 end
 ldexp(x::BigFloat, n::ClongMax) = ldexp(x, convert(Clong, n))
@@ -713,13 +859,13 @@ function factorial(x::BigFloat)
     end
     ui = convert(Culong, x)
     z = BigFloat()
-    ccall((:mpfr_fac_ui, libmpfr), Int32, (Ref{BigFloat}, Culong, MPFRRoundingMode), z, ui, ROUNDING_MODE[])
+    ccall((:mpfr_fac_ui, libmpfr), Int32, (Ref{BigFloat}, Culong, MPFRRoundingMode), z, ui, rounding_raw(BigFloat))
     return z
 end
 
 function hypot(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_hypot, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_hypot, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
@@ -731,7 +877,7 @@ for f in (:log, :log2, :log10)
                               "with a complex argument. Try ", $f, "(complex(x)).")))
         end
         z = BigFloat()
-        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
         return z
     end
 end
@@ -743,7 +889,7 @@ function log1p(x::BigFloat)
                           "with a complex argument. Try log1p(complex(x)).")))
     end
     z = BigFloat()
-    ccall((:mpfr_log1p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+    ccall((:mpfr_log1p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
     return z
 end
 
@@ -767,19 +913,19 @@ end
 function modf(x::BigFloat)
     zint = BigFloat()
     zfloat = BigFloat()
-    ccall((:mpfr_modf, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), zint, zfloat, x, ROUNDING_MODE[])
+    ccall((:mpfr_modf, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), zint, zfloat, x, rounding_raw(BigFloat))
     return (zfloat, zint)
 end
 
 function rem(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_fmod, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_fmod, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
 function rem(x::BigFloat, y::BigFloat, ::RoundingMode{:Nearest})
     z = BigFloat()
-    ccall((:mpfr_remainder, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_remainder, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
@@ -790,7 +936,7 @@ function sum(arr::AbstractArray{BigFloat})
     z = BigFloat(0)
     for i in arr
         ccall((:mpfr_add, libmpfr), Int32,
-            (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, i, ROUNDING_MODE[])
+            (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, i, rounding_raw(BigFloat))
     end
     return z
 end
@@ -801,7 +947,7 @@ for f in (:sin, :cos, :tan, :sec, :csc, :acos, :asin, :atan, :acosh, :asinh, :at
         function ($f)(x::BigFloat)
             isnan(x) && return x
             z = BigFloat()
-            ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
             isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
             return z
         end
@@ -811,7 +957,7 @@ sincospi(x::BigFloat) = (sinpi(x), cospi(x))
 
 function atan(y::BigFloat, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_atan2, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, y, x, ROUNDING_MODE[])
+    ccall((:mpfr_atan2, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, y, x, rounding_raw(BigFloat))
     return z
 end
 
@@ -821,14 +967,14 @@ for f in (:sin, :cos, :tan)
         function ($(Symbol(f,:d)))(x::BigFloat)
             isnan(x) && return x
             z = BigFloat()
-            ccall(($(string(:mpfr_,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, rounding_raw(BigFloat))
             isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
             return z
         end
         function ($(Symbol(:a,f,:d)))(x::BigFloat)
             isnan(x) && return x
             z = BigFloat()
-            ccall(($(string(:mpfr_a,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_a,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, rounding_raw(BigFloat))
             isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
             return z
         end
@@ -836,7 +982,7 @@ for f in (:sin, :cos, :tan)
 end
 function atand(y::BigFloat, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_atan2u, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, y, x, 360, ROUNDING_MODE[])
+    ccall((:mpfr_atan2u, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, y, x, 360, rounding_raw(BigFloat))
     return z
 end
 
@@ -844,9 +990,7 @@ end
 # Utility functions
 ==(x::BigFloat, y::BigFloat) = ccall((:mpfr_equal_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
 <=(x::BigFloat, y::BigFloat) = ccall((:mpfr_lessequal_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
->=(x::BigFloat, y::BigFloat) = ccall((:mpfr_greaterequal_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
 <(x::BigFloat, y::BigFloat) = ccall((:mpfr_less_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
->(x::BigFloat, y::BigFloat) = ccall((:mpfr_greater_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
 
 function cmp(x::BigFloat, y::BigInt)
     isnan(x) && return 1
@@ -885,19 +1029,27 @@ cmp(x::CdoubleMax, y::BigFloat) = -cmp(y,x)
 <=(x::BigFloat, y::CdoubleMax) = !isnan(x) && !isnan(y) && cmp(x,y) <= 0
 <=(x::CdoubleMax, y::BigFloat) = !isnan(x) && !isnan(y) && cmp(y,x) >= 0
 
-signbit(x::BigFloat) = ccall((:mpfr_signbit, libmpfr), Int32, (Ref{BigFloat},), x) != 0
+# Note: this inlines the implementation of `mpfr_signbit` to avoid a
+# `ccall`.
+signbit(x::BigFloat) = signbit(x.sign)
+
 function sign(x::BigFloat)
     c = cmp(x, 0)
     (c == 0 || isnan(x)) && return x
     return c < 0 ? -one(x) : one(x)
 end
 
-function _precision(x::BigFloat)  # precision of an object of type BigFloat
+function _precision_with_base_2(x::BigFloat)  # precision of an object of type BigFloat
     return ccall((:mpfr_get_prec, libmpfr), Clong, (Ref{BigFloat},), x)
 end
 precision(x::BigFloat; base::Integer=2) = _precision(x, base)
 
-_precision(::Type{BigFloat}) = Int(DEFAULT_PRECISION[]) # default precision of the type BigFloat itself
+
+_convert_precision_from_base(precision::Integer, base::Integer) =
+    base == 2 ? precision : ceil(Int, precision * log2(base))
+
+_precision_with_base_2(::Type{BigFloat}) =
+    Int(@something(Base.ScopedValues.get(CURRENT_PRECISION), DEFAULT_PRECISION[])) # default precision of the type BigFloat itself
 
 """
     setprecision([T=BigFloat,] precision::Int; base=2)
@@ -918,7 +1070,7 @@ at least `precision` digits in the given `base`.
 function setprecision(::Type{BigFloat}, precision::Integer; base::Integer=2)
     base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
     precision > 0 || throw(DomainError(precision, "`precision` cannot be less than 1."))
-    DEFAULT_PRECISION[] = base == 2 ? precision : ceil(Int, precision * log2(base))
+    DEFAULT_PRECISION[] = _convert_precision_from_base(precision, base)
     return precision
 end
 
@@ -929,7 +1081,7 @@ maxintfloat(::Type{BigFloat}) = BigFloat(2)^precision(BigFloat)
 
 function copysign(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_copysign, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_copysign, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
@@ -944,16 +1096,16 @@ end
 function frexp(x::BigFloat)
     z = BigFloat()
     c = Ref{Clong}()
-    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, ROUNDING_MODE[])
+    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, rounding_raw(BigFloat))
     return (z, c[])
 end
 
 function significand(x::BigFloat)
     z = BigFloat()
     c = Ref{Clong}()
-    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, ROUNDING_MODE[])
+    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, rounding_raw(BigFloat))
     # Double the significand to make it work as Base.significand
-    ccall((:mpfr_mul_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, z, 2, ROUNDING_MODE[])
+    ccall((:mpfr_mul_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, z, 2, rounding_raw(BigFloat))
     return z
 end
 
@@ -976,18 +1128,22 @@ for (f,R) in ((:roundeven, :Nearest),
 end
 
 function isinf(x::BigFloat)
-    return ccall((:mpfr_inf_p, libmpfr), Int32, (Ref{BigFloat},), x) != 0
+    return x.exp == mpfr_special_exponent_inf
 end
 
 function isnan(x::BigFloat)
-    return ccall((:mpfr_nan_p, libmpfr), Int32, (Ref{BigFloat},), x) != 0
+    return x.exp == mpfr_special_exponent_nan
 end
 
 isfinite(x::BigFloat) = !isinf(x) && !isnan(x)
 
-iszero(x::BigFloat) = x == Clong(0)
+iszero(x::BigFloat) = x.exp == mpfr_special_exponent_zero
 isone(x::BigFloat) = x == Clong(1)
 
+# In theory, `!iszero(x) && !isnan(x)` should be the same as `x.exp > mpfr_special_exponent_nan`, but this is safer.
+ispositive(x::BigFloat) = !signbit(x) && !iszero(x) && !isnan(x)
+isnegative(x::BigFloat) = signbit(x) && !iszero(x) && !isnan(x)
+
 @eval typemax(::Type{BigFloat}) = $(BigFloat(Inf))
 @eval typemin(::Type{BigFloat}) = $(BigFloat(-Inf))
 
@@ -1031,10 +1187,20 @@ Often used as `setprecision(T, precision) do ... end`
 Note: `nextfloat()`, `prevfloat()` do not use the precision mentioned by
 `setprecision`.
 
+!!! warning
+    There is a fallback implementation of this method that calls `precision`
+    and `setprecision`, but it should no longer be relied on. Instead, you
+    should define the 3-argument form directly in a way that uses `ScopedValue`,
+    or recommend that callers use `ScopedValue` and `@with` themselves.
+
 !!! compat "Julia 1.8"
     The `base` keyword requires at least Julia 1.8.
 """
 function setprecision(f::Function, ::Type{T}, prec::Integer; kws...) where T
+    depwarn("""
+            The fallback `setprecision(::Function, ...)` method is deprecated. Packages overloading this method should
+            implement their own specialization using `ScopedValue` instead.
+            """, :setprecision)
     old_prec = precision(T)
     setprecision(T, prec; kws...)
     try
@@ -1044,6 +1210,10 @@ function setprecision(f::Function, ::Type{T}, prec::Integer; kws...) where T
     end
 end
 
+function setprecision(f::Function, ::Type{BigFloat}, prec::Integer; base::Integer=2)
+    Base.ScopedValues.@with(CURRENT_PRECISION => _convert_precision_from_base(prec, base), f())
+end
+
 setprecision(f::Function, prec::Integer; base::Integer=2) = setprecision(f, BigFloat, prec; base)
 
 function string_mpfr(x::BigFloat, fmt::String)
@@ -1088,7 +1258,7 @@ function _prettify_bigfloat(s::String)::String
             string(neg ? '-' : "", '0', '.', '0'^(-expo-1), int, frac == "0" ? "" : frac)
         end
     else
-        string(mantissa, 'e', exponent)
+        string(mantissa, 'e', expo)
     end
 end
 
@@ -1125,13 +1295,11 @@ set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, libmpfr), Cint, (Clong,
 
 function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict)
     get!(stackdict, x) do
-        # d = copy(x._d)
-        d = x._d
-        d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
-        y = _BigFloat(x.prec, x.sign, x.exp, d′)
+        d′ = copy(getfield(x, :d))
+        y = _BigFloat(d′)
         #ccall((:mpfr_custom_move,libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
         return y
-    end
+    end::BigFloat
 end
 
 function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
@@ -1142,7 +1310,8 @@ function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
     s.size = cld(x.prec, 8*sizeof(Limb)) # limbs
     b = s.size * sizeof(Limb)            # bytes
     ccall((:__gmpz_realloc2, libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits
-    memcpy(s.d, x.d, b)
+    xd = x.d
+    GC.@preserve xd memcpy(s.d, Base.unsafe_convert(Ptr{Limb}, xd), b)
     s, x.exp - 8b, x.sign
 end
 
@@ -1154,7 +1323,8 @@ end
 # flags
 clear_flags() = ccall((:mpfr_clear_flags, libmpfr), Cvoid, ())
 had_underflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0
-had_overflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0
+had_overflow() = ccall((:mpfr_overflow_p, libmpfr), Cint, ()) != 0
+had_divbyzero() = ccall((:mpfr_divby0_p, libmpfr), Cint, ()) != 0
 had_nan() = ccall((:mpfr_nanflag_p, libmpfr), Cint, ()) != 0
 had_inexact_exception() = ccall((:mpfr_inexflag_p, libmpfr), Cint, ()) != 0
 had_range_exception() = ccall((:mpfr_erangeflag_p, libmpfr), Cint, ()) != 0
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index ba4e6eb12695a..28da821f5ee79 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -4,12 +4,14 @@
 module IteratorsMD
     import .Base: eltype, length, size, first, last, in, getindex, setindex!,
                   min, max, zero, oneunit, isless, eachindex,
-                  convert, show, iterate, promote_rule
+                  convert, show, iterate, promote_rule, to_indices, copy,
+                  isassigned, lastindex, firstindex
 
     import .Base: +, -, *, (:)
     import .Base: simd_outer_range, simd_inner_length, simd_index, setindex
-    import .Base: to_indices, to_index, _to_indices1, _cutdim
-    using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex, fill_to_length, tail,
+    import Core: Tuple
+    using .Base: to_index, fill_to_length, tail, safe_tail
+    using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex,
         ReshapedArray, ReshapedArrayLF, OneTo, Fix1
     using .Base.Iterators: Reverse, PartitionIterator
     using .Base: @propagate_inbounds
@@ -83,12 +85,17 @@ module IteratorsMD
     CartesianIndex{N}(index::Integer...) where {N} = CartesianIndex{N}(index)
     CartesianIndex{N}() where {N} = CartesianIndex{N}(())
     # Un-nest passed CartesianIndexes
+    CartesianIndex{N}(index::CartesianIndex{N}) where {N} = index
     CartesianIndex(index::Union{Integer, CartesianIndex}...) = CartesianIndex(flatten(index))
     flatten(::Tuple{}) = ()
     flatten(I::Tuple{Any}) = Tuple(I[1])
     @inline flatten(I::Tuple) = (Tuple(I[1])..., flatten(tail(I))...)
     CartesianIndex(index::Tuple{Vararg{Union{Integer, CartesianIndex}}}) = CartesianIndex(index...)
-    show(io::IO, i::CartesianIndex) = (print(io, "CartesianIndex"); show(io, i.I))
+    function show(io::IO, i::CartesianIndex)
+        print(io, "CartesianIndex(")
+        join(io, i.I, ", ")
+        print(io, ")")
+    end
 
     # length
     length(::CartesianIndex{N}) where {N} = N
@@ -96,6 +103,8 @@ module IteratorsMD
 
     # indexing
     getindex(index::CartesianIndex, i::Integer) = index.I[i]
+    firstindex(index::CartesianIndex) = firstindex(index.I)
+    lastindex(index::CartesianIndex) = lastindex(index.I)
     Base.get(A::AbstractArray, I::CartesianIndex, default) = get(A, I.I, default)
     eltype(::Type{T}) where {T<:CartesianIndex} = eltype(fieldtype(T, :I))
 
@@ -114,6 +123,7 @@ module IteratorsMD
     oneunit(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(Returns(1), Val(N)))
 
     # arithmetic, min/max
+    @inline (+)(index::CartesianIndex) = index
     @inline (-)(index::CartesianIndex{N}) where {N} =
         CartesianIndex{N}(map(-, index.I))
     @inline (+)(index1::CartesianIndex{N}, index2::CartesianIndex{N}) where {N} =
@@ -138,7 +148,7 @@ module IteratorsMD
     # hashing
     const cartindexhash_seed = UInt == UInt64 ? 0xd60ca92f8284b8b0 : 0xf2ea7c2e
     function Base.hash(ci::CartesianIndex, h::UInt)
-        h += cartindexhash_seed
+        h ⊻= cartindexhash_seed
         for i in ci.I
             h = hash(i, h)
         end
@@ -166,6 +176,37 @@ module IteratorsMD
     Base.iterate(::CartesianIndex) =
         error("iteration is deliberately unsupported for CartesianIndex. Use `I` rather than `I...`, or use `Tuple(I)...`")
 
+    # ranges are deliberately disabled to prevent ambiguities with the colon constructor
+    Base.range_start_step_length(start::CartesianIndex, step::CartesianIndex, len::Integer) =
+        error("range with a specified length is deliberately unsupported for CartesianIndex arguments."*
+            " Use StepRangeLen($start, $step, $len) to construct this range")
+
+    # show is special-cased to avoid the start:stop:step display,
+    # which constructs a CartesianIndices
+    # See #50784
+    function show(io::IO, r::StepRangeLen{<:CartesianIndex})
+        print(io, "StepRangeLen(", first(r), ", ",
+                    step(r), ", ", length(r), ")")
+    end
+
+    Base.in(x::CartesianIndex, r::AbstractRange{<:CartesianIndex}) = false
+    function Base.in(x::CartesianIndex{N}, r::AbstractRange{CartesianIndex{N}}) where {N}
+        isempty(r) && return false
+        f, st, l = first(r), step(r), last(r)
+        # The n-th element of the range is a CartesianIndex
+        # whose elements are the n-th along each dimension
+        # Find the first dimension along which the index is changing,
+        # so that n may be uniquely determined
+        for i in 1:N
+            iszero(st[i]) && continue
+            n = findfirst(==(x[i]), f[i]:st[i]:l[i])
+            isnothing(n) && return false
+            return r[n] == x
+        end
+        # if the step is zero, the elements are identical, so compare with the first
+        return x == f
+    end
+
     # Iteration
     const OrdinalRangeInt = OrdinalRange{Int, Int}
     """
@@ -267,7 +308,7 @@ module IteratorsMD
     CartesianIndices(A::AbstractArray) = CartesianIndices(axes(A))
 
     _convert2ind(sz::Bool) = Base.OneTo(Int8(sz))
-    _convert2ind(sz::Integer) = Base.OneTo(sz)
+    _convert2ind(sz::Integer) = Base.oneto(sz)
     _convert2ind(sz::AbstractUnitRange) = first(sz):last(sz)
     _convert2ind(sz::OrdinalRange) = first(sz):step(sz):last(sz)
 
@@ -304,6 +345,8 @@ module IteratorsMD
     CartesianIndices((2:1:3, 1:2:3))
     ```
     """
+    (:)(::CartesianIndex, ::CartesianIndex, ::CartesianIndex)
+
     (:)(I::CartesianIndex{N}, J::CartesianIndex{N}) where N =
         CartesianIndices(map((i,j) -> i:j, Tuple(I), Tuple(J)))
     (:)(I::CartesianIndex{N}, S::CartesianIndex{N}, J::CartesianIndex{N}) where N =
@@ -351,7 +394,7 @@ module IteratorsMD
     end
 
     # getindex for a 0D CartesianIndices is necessary for disambiguation
-    @propagate_inbounds function Base.getindex(iter::CartesianIndices{0,R}) where {R}
+    @inline function Base.getindex(iter::CartesianIndices{0,R}) where {R}
         CartesianIndex()
     end
     @inline function Base.getindex(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R}
@@ -393,7 +436,9 @@ module IteratorsMD
 
     @inline function eachindex(::IndexCartesian, A::AbstractArray, B::AbstractArray...)
         axsA = axes(A)
-        Base._all_match_first(axes, axsA, B...) || Base.throw_eachindex_mismatch_indices(IndexCartesian(), axes(A), axes.(B)...)
+        axsBs = map(axes, B)
+        all(==(axsA), axsBs) ||
+            Base.throw_eachindex_mismatch_indices("axes", axsA, axsBs...)
         CartesianIndices(axsA)
     end
 
@@ -404,7 +449,7 @@ module IteratorsMD
         end
         iterfirst, iterfirst
     end
-    @inline function iterate(iter::CartesianIndices, state)
+    @inline function iterate(iter::CartesianIndices, state::CartesianIndex)
         valid, I = __inc(state.I, iter.indices)
         valid || return nothing
         return CartesianIndex(I...), CartesianIndex(I...)
@@ -424,36 +469,24 @@ module IteratorsMD
     @inline function __inc(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
         rng = indices[1]
         I = state[1] + step(rng)
-        valid = __is_valid_range(I, rng) && state[1] != last(rng)
-        return valid, (I, )
+        valid = state[1] != last(rng)
+        return valid, (I,)
     end
     @inline function __inc(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         rng = indices[1]
-        I = state[1] + step(rng)
-        if __is_valid_range(I, rng) && state[1] != last(rng)
+        if state[1] != last(rng)
+            I = state[1] + step(rng)
             return true, (I, tail(state)...)
         end
-        valid, I = __inc(tail(state), tail(indices))
-        return valid, (first(rng), I...)
-    end
-
-    @inline __is_valid_range(I, rng::AbstractUnitRange) = I in rng
-    @inline function __is_valid_range(I, rng::OrdinalRange)
-        if step(rng) > 0
-            lo, hi = first(rng), last(rng)
-        else
-            lo, hi = last(rng), first(rng)
-        end
-        lo <= I <= hi
+        valid, Itail = __inc(tail(state), tail(indices))
+        return valid, (first(rng), Itail...)
     end
 
     # 0-d cartesian ranges are special-cased to iterate once and only once
-    iterate(iter::CartesianIndices{0}, done=false) = done ? nothing : (CartesianIndex(), true)
+    iterate(iter::CartesianIndices{0}, done::Bool=false) = done ? nothing : (CartesianIndex(), true)
 
     size(iter::CartesianIndices) = map(length, iter.indices)
 
-    length(iter::CartesianIndices) = prod(size(iter))
-
     # make CartesianIndices a multidimensional range
     Base.step(iter::CartesianIndices) = CartesianIndex(map(step, iter.indices))
 
@@ -461,15 +494,19 @@ module IteratorsMD
     last(iter::CartesianIndices)  = CartesianIndex(map(last, iter.indices))
 
     # When used as indices themselves, CartesianIndices can simply become its tuple of ranges
-    _to_indices1(A, inds, I1::CartesianIndices) = map(Fix1(to_index, A), I1.indices)
-    _cutdim(inds::Tuple, I1::CartesianIndices) = split(inds, Val(ndims(I1)))[2]
-
+    @inline function to_indices(A, inds, I::Tuple{CartesianIndices{N}, Vararg}) where N
+        _, indstail = split(inds, Val(N))
+        (map(Fix1(to_index, A), I[1].indices)..., to_indices(A, indstail, tail(I))...)
+    end
     # but preserve CartesianIndices{0} as they consume a dimension.
-    _to_indices1(A, inds, I1::CartesianIndices{0}) = (I1,)
+    @inline to_indices(A, inds, I::Tuple{CartesianIndices{0}, Vararg}) =
+        (first(I), to_indices(A, inds, tail(I))...)
 
     @inline in(i::CartesianIndex, r::CartesianIndices) = false
     @inline in(i::CartesianIndex{N}, r::CartesianIndices{N}) where {N} = all(map(in, i.I, r.indices))
 
+    copy(iter::CartesianIndices) = iter
+
     simd_outer_range(iter::CartesianIndices{0}) = iter
     function simd_outer_range(iter::CartesianIndices)
         CartesianIndices(tail(iter.indices))
@@ -556,13 +593,13 @@ module IteratorsMD
     @inline function __dec(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
         rng = indices[1]
         I = state[1] - step(rng)
-        valid = __is_valid_range(I, rng) && state[1] != first(rng)
+        valid = state[1] != first(rng)
         return valid, (I,)
     end
     @inline function __dec(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         rng = indices[1]
         I = state[1] - step(rng)
-        if __is_valid_range(I, rng) && state[1] != first(rng)
+        if state[1] != first(rng)
             return true, (I, tail(state)...)
         end
         valid, I = __dec(tail(state), tail(indices))
@@ -583,7 +620,7 @@ module IteratorsMD
         else
             # Given the fact that StepRange 1:2:4 === 1:2:3, we lost the original size information
             # and thus cannot calculate the correct linear indices when the steps are not 1.
-            throw(ArgumentError("LinearIndices for $(typeof(inds)) with non-1 step size is not yet supported."))
+            throw(ArgumentError(LazyString("LinearIndices for ", typeof(inds), " with non-1 step size is not yet supported.")))
         end
     end
 
@@ -606,6 +643,8 @@ module IteratorsMD
     # array operations
     Base.intersect(a::CartesianIndices{N}, b::CartesianIndices{N}) where N =
         CartesianIndices(intersect.(a.indices, b.indices))
+    Base.issubset(a::CartesianIndices{N}, b::CartesianIndices{N}) where N =
+        isempty(a) || all(map(issubset, a.indices, b.indices))
 
     # Views of reshaped CartesianIndices are used for partitions — ensure these are fast
     const CartesianPartition{T<:CartesianIndex, P<:CartesianIndices, R<:ReshapedArray{T,1,P}} = SubArray{T,1,R,<:Tuple{AbstractUnitRange{Int}},false}
@@ -634,21 +673,24 @@ module IteratorsMD
         # CartesianPartition.
         mi = iter.parent.mi
         ci = iter.parent.parent
-        ax, ax1 = axes(ci), Base.axes1(ci)
-        subs = Base.ind2sub_rs(ax, mi, first(iter.indices[1]))
-        vl, fl = Base._sub2ind(tail(ax), tail(subs)...), subs[1]
-        vr, fr = divrem(last(iter.indices[1]) - 1, mi[end]) .+ (1, first(ax1))
+        ax1 = Base.axes1(ci)
+        function splitdim1(i, mi)
+            d, r = divrem(i - 1, mi)
+            d + 1, r + first(ax1)
+        end
+        vl, fl = splitdim1(first(iter.indices[1]), mi[1])
+        vr, fr = splitdim1(last(iter.indices[1]), mi[1])
+        # form the iterator for outer dimensions, equivalent to vec(oci), but mi is reused
         oci = CartesianIndices(tail(ci.indices))
-        # A fake CartesianPartition to reuse the outer iterate fallback
-        outer = @inbounds view(ReshapedArray(oci, (length(oci),), mi), vl:vr)
-        init = @inbounds dec(oci[tail(subs)...].I, oci.indices) # real init state
+        roci = ReshapedArray(oci, (length(oci),), tail(mi))
+        outer = @inbounds view(roci, vl:vr)
         # Use Generator to make inner loop branchless
         @inline function skip_len_I(i::Int, I::CartesianIndex)
             l = i == 1 ? fl : first(ax1)
             r = i == length(outer) ? fr : last(ax1)
             l - first(ax1), r - l + 1, I
         end
-        (skip_len_I(i, I) for (i, I) in Iterators.enumerate(Iterators.rest(outer, (init, 0))))
+        (skip_len_I(i, I) for (i, I) in Iterators.enumerate(outer))
     end
     @inline function simd_outer_range(iter::CartesianPartition{CartesianIndex{2}})
         # But for two-dimensional Partitions the above is just a simple one-dimensional range
@@ -676,19 +718,53 @@ end  # IteratorsMD
 
 using .IteratorsMD
 
+# from genericmemory.jl:
+## generate vararg methods for atomic indexing
+for ex in (
+    :(getindex_atomic(mem::GenericMemory, order::Symbol, i::Int)),
+    :(setindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)),
+    :(setindexonce_atomic!(mem::GenericMemory, success_order::Symbol, fail_order::Symbol, val, i::Int)),
+    :(modifyindex_atomic!(mem::GenericMemory, order::Symbol, op, val, i::Int)),
+    :(swapindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)),
+    :(replaceindex_atomic!(mem::GenericMemory, success_order::Symbol, fail_order::Symbol, expected, desired, i::Int,)),
+)
+    fn = ex.args[1]
+    args = ex.args[2:end-1]
+
+    @eval begin
+        function $fn($(args...), i::Union{Integer,CartesianIndex}...)
+            return $fn($(args...), CartesianIndex(to_indices($(args[1]), i)))
+        end
+
+        function $fn($(args...), i::CartesianIndex)
+            return $fn($(args...), Tuple(i)...)
+        end
+
+        function $fn($(args...), i::Integer...)
+            idcs = to_indices($(args[1]), i)
+            S = IndexStyle($(args[1]))
+            if isa(S, IndexLinear)
+                return $fn($(args...), _to_linear_index($(args[1]), idcs...))
+            else
+                return $fn($(args...), _to_subscript_indices($(args[1]), idcs...))
+            end
+        end
+    end
+end
+
 ## Bounds-checking with CartesianIndex
 # Disallow linear indexing with CartesianIndex
-function checkbounds(::Type{Bool}, A::AbstractArray, i::Union{CartesianIndex, AbstractArray{<:CartesianIndex}})
-    @inline
+@inline checkbounds(::Type{Bool}, A::AbstractArray, i::CartesianIndex) =
     checkbounds_indices(Bool, axes(A), (i,))
+# Here we try to consume N of the indices (if there are that many available)
+@inline function checkbounds_indices(::Type{Bool}, inds::Tuple, I::Tuple{CartesianIndex,Vararg})
+    inds1, rest = IteratorsMD.split(inds, Val(length(I[1])))
+    checkindex(Bool, inds1, I[1]) & checkbounds_indices(Bool, rest, tail(I))
 end
-
-@inline checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple{CartesianIndex,Vararg{Any}}) =
-    checkbounds_indices(Bool, (), (I[1].I..., tail(I)...))
-@inline checkbounds_indices(::Type{Bool}, IA::Tuple{Any}, I::Tuple{CartesianIndex,Vararg{Any}}) =
-    checkbounds_indices(Bool, IA, (I[1].I..., tail(I)...))
-@inline checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple{CartesianIndex,Vararg{Any}}) =
-    checkbounds_indices(Bool, IA, (I[1].I..., tail(I)...))
+@inline checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndex) =
+    checkbounds_indices(Bool, inds, I.I)
+@inline checkindex(::Type{Bool}, inds::Tuple, i::AbstractRange{<:CartesianIndex}) =
+    isempty(i) | (checkindex(Bool, inds, first(i)) & checkindex(Bool, inds, last(i)))
 
 # Indexing into Array with mixtures of Integers and CartesianIndices is
 # extremely performance-sensitive. While the abstract fallbacks support this,
@@ -698,45 +774,17 @@ end
 @propagate_inbounds setindex!(A::Array, v, i1::Union{Integer, CartesianIndex}, I::Union{Integer, CartesianIndex}...) =
     (A[to_indices(A, (i1, I...))...] = v; A)
 
-# Support indexing with an array of CartesianIndex{N}s
+## Bounds-checking with arrays of CartesianIndex{N}
+# Disallow linear indexing with an array of CartesianIndex{N}
+@inline checkbounds(::Type{Bool}, A::AbstractArray, i::AbstractArray{CartesianIndex{N}}) where {N} =
+    checkbounds_indices(Bool, axes(A), (i,))
 # Here we try to consume N of the indices (if there are that many available)
-# The first two simply handle ambiguities
-@inline function checkbounds_indices(::Type{Bool}, ::Tuple{},
-        I::Tuple{AbstractArray{CartesianIndex{N}},Vararg{Any}}) where N
-    checkindex(Bool, (), I[1]) & checkbounds_indices(Bool, (), tail(I))
-end
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple{Any},
-        I::Tuple{AbstractArray{CartesianIndex{0}},Vararg{Any}})
-    checkbounds_indices(Bool, IA, tail(I))
-end
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple{Any},
-        I::Tuple{AbstractArray{CartesianIndex{N}},Vararg{Any}}) where N
-    checkindex(Bool, IA, I[1]) & checkbounds_indices(Bool, (), tail(I))
-end
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple,
-        I::Tuple{AbstractArray{CartesianIndex{N}},Vararg{Any}}) where N
-    IA1, IArest = IteratorsMD.split(IA, Val(N))
-    checkindex(Bool, IA1, I[1]) & checkbounds_indices(Bool, IArest, tail(I))
-end
-
-
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple{},
-    I::Tuple{AbstractArray{Bool,N},Vararg{Any}}) where N
-    return checkbounds_indices(Bool, IA, (LogicalIndex(I[1]), tail(I)...))
+@inline function checkbounds_indices(::Type{Bool}, inds::Tuple, I::Tuple{AbstractArray{CartesianIndex{N}},Vararg}) where N
+    inds1, rest = IteratorsMD.split(inds, Val(N))
+    checkindex(Bool, inds1, I[1]) & checkbounds_indices(Bool, rest, tail(I))
 end
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple,
-    I::Tuple{AbstractArray{Bool,N},Vararg{Any}}) where N
-    return checkbounds_indices(Bool, IA, (LogicalIndex(I[1]), tail(I)...))
-end
-
-function checkindex(::Type{Bool}, inds::Tuple, I::AbstractArray{<:CartesianIndex})
-    b = true
-    for i in I
-        b &= checkbounds_indices(Bool, inds, (i,))
-    end
-    b
-end
-checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndices) = all(checkindex.(Bool, inds, I.indices))
+@inline checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndices) =
+    checkbounds_indices(Bool, inds, I.indices)
 
 # combined count of all indices, including CartesianIndex and
 # AbstractArray{CartesianIndex}
@@ -788,7 +836,6 @@ LogicalIndex(mask::AbstractVector{Bool}) = LogicalIndex{Int, typeof(mask)}(mask)
 LogicalIndex(mask::AbstractArray{Bool, N}) where {N} = LogicalIndex{CartesianIndex{N}, typeof(mask)}(mask)
 LogicalIndex{Int}(mask::AbstractArray) = LogicalIndex{Int, typeof(mask)}(mask)
 size(L::LogicalIndex) = (L.sum,)
-length(L::LogicalIndex) = L.sum
 collect(L::LogicalIndex) = [i for i in L]
 show(io::IO, r::LogicalIndex) = print(io,collect(r))
 print_array(io::IO, X::LogicalIndex) = print_array(io, collect(X))
@@ -810,11 +857,11 @@ end
     n = s[1]
     n > length(L) && return nothing
     #unroll once to help inference, cf issue #29418
-    idx, i = iterate(tail(s)...)
+    idx, i = iterate(tail(s)...)::Tuple{Any,Any}
     s = (n+1, s[2], i)
     L.mask[idx] && return (idx, s)
     while true
-        idx, i = iterate(tail(s)...)
+        idx, i = iterate(tail(s)...)::Tuple{Any,Any}
         s = (n+1, s[2], i)
         L.mask[idx] && return (idx, s)
     end
@@ -844,11 +891,29 @@ end
     return eltype(L)(i1, irest...), (i1 - tz, Bi, irest, c)
 end
 
-@inline checkbounds(::Type{Bool}, A::AbstractArray, I::LogicalIndex{<:Any,<:AbstractArray{Bool,1}}) =
-    eachindex(IndexLinear(), A) == eachindex(IndexLinear(), I.mask)
-@inline checkbounds(::Type{Bool}, A::AbstractArray, I::LogicalIndex) = axes(A) == axes(I.mask)
-@inline checkindex(::Type{Bool}, indx::AbstractUnitRange, I::LogicalIndex) = (indx,) == axes(I.mask)
-checkindex(::Type{Bool}, inds::Tuple, I::LogicalIndex) = checkbounds_indices(Bool, inds, axes(I.mask))
+## Boundscheck for Logicalindex
+# LogicalIndex: map all calls to mask
+checkbounds(::Type{Bool}, A::AbstractArray, i::LogicalIndex) = checkbounds(Bool, A, i.mask)
+# `checkbounds_indices` has been handled via `I::AbstractArray` fallback
+checkindex(::Type{Bool}, inds::AbstractUnitRange, i::LogicalIndex) = checkindex(Bool, inds, i.mask)
+checkindex(::Type{Bool}, inds::Tuple, i::LogicalIndex) = checkindex(Bool, inds, i.mask)
+
+## Boundscheck for AbstractArray{Bool}
+# Disallow linear indexing with AbstractArray{Bool}
+checkbounds(::Type{Bool}, A::AbstractArray, i::AbstractArray{Bool}) =
+    checkbounds_indices(Bool, axes(A), (i,))
+# But allow linear indexing with AbstractVector{Bool}
+checkbounds(::Type{Bool}, A::AbstractArray, i::AbstractVector{Bool}) =
+    checkindex(Bool, eachindex(IndexLinear(), A), i)
+@inline function checkbounds_indices(::Type{Bool}, inds::Tuple, I::Tuple{AbstractArray{Bool},Vararg})
+    inds1, rest = IteratorsMD.split(inds, Val(ndims(I[1])))
+    checkindex(Bool, inds1, I[1]) & checkbounds_indices(Bool, rest, tail(I))
+end
+checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractVector{Bool}) = axes1(I) == inds
+checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractRange{Bool}) = axes1(I) == inds
+checkindex(::Type{Bool}, inds::Tuple, I::AbstractArray{Bool}) = _check_boolean_axes(inds, axes(I))
+_check_boolean_axes(inds::Tuple, axes::Tuple) = (inds[1] == axes[1]) & _check_boolean_axes(tail(inds), tail(axes))
+_check_boolean_axes(::Tuple{}, axes::Tuple) = all(==(OneTo(1)), axes)
 
 ensure_indexable(I::Tuple{}) = ()
 @inline ensure_indexable(I::Tuple{Any, Vararg{Any}}) = (I[1], ensure_indexable(tail(I))...)
@@ -859,24 +924,54 @@ ensure_indexable(I::Tuple{}) = ()
 @inline to_indices(A, I::Tuple{Vararg{Union{Integer, CartesianIndex}}}) = to_indices(A, (), I)
 # But some index types require more context spanning multiple indices
 # CartesianIndex is unfolded outside the inner to_indices for better inference
-_to_indices1(A, inds, I1::CartesianIndex) = map(Fix1(to_index, A), I1.I)
-_cutdim(inds, I1::CartesianIndex) = IteratorsMD.split(inds, Val(length(I1)))[2]
+@inline function to_indices(A, inds, I::Tuple{CartesianIndex{N}, Vararg}) where N
+    _, indstail = IteratorsMD.split(inds, Val(N))
+    (map(Fix1(to_index, A), I[1].I)..., to_indices(A, indstail, tail(I))...)
+end
 # For arrays of CartesianIndex, we just skip the appropriate number of inds
-_cutdim(inds, I1::AbstractArray{CartesianIndex{N}}) where {N} = IteratorsMD.split(inds, Val(N))[2]
+@inline function to_indices(A, inds, I::Tuple{AbstractArray{CartesianIndex{N}}, Vararg}) where N
+    _, indstail = IteratorsMD.split(inds, Val(N))
+    (to_index(A, I[1]), to_indices(A, indstail, tail(I))...)
+end
 # And boolean arrays behave similarly; they also skip their number of dimensions
-_cutdim(inds::Tuple, I1::AbstractArray{Bool}) = IteratorsMD.split(inds, Val(ndims(I1)))[2]
-# As an optimization, we allow trailing Array{Bool} and BitArray to be linear over trailing dimensions
-@inline to_indices(A, inds, I::Tuple{Union{Array{Bool,N}, BitArray{N}}}) where {N} =
-    (_maybe_linear_logical_index(IndexStyle(A), A, I[1]),)
+@inline function to_indices(A, inds, I::Tuple{AbstractArray{Bool, N}, Vararg}) where N
+    _, indstail = IteratorsMD.split(inds, Val(N))
+    (to_index(A, I[1]), to_indices(A, indstail, tail(I))...)
+end
+# As an optimization, we allow the only `AbstractArray{Bool}` to be linear-iterated
+@inline to_indices(A, I::Tuple{AbstractArray{Bool}}) = (_maybe_linear_logical_index(IndexStyle(A), A, I[1]),)
 _maybe_linear_logical_index(::IndexStyle, A, i) = to_index(A, i)
 _maybe_linear_logical_index(::IndexLinear, A, i) = LogicalIndex{Int}(i)
 
 # Colons get converted to slices by `uncolon`
-_to_indices1(A, inds, I1::Colon) = (uncolon(inds),)
+@inline to_indices(A, inds, I::Tuple{Colon, Vararg}) =
+    (uncolon(inds), to_indices(A, Base.safe_tail(inds), tail(I))...)
 
 uncolon(::Tuple{}) = Slice(OneTo(1))
 uncolon(inds::Tuple) = Slice(inds[1])
 
+"""
+    _prechecked_iterate(iter[, state])
+
+Internal function used to eliminate the dead branch in `iterate`.
+Fallback to `iterate` by default, but optimized for indices type in `Base`.
+"""
+@propagate_inbounds _prechecked_iterate(iter) = iterate(iter)
+@propagate_inbounds _prechecked_iterate(iter, state) = iterate(iter, state)
+
+_prechecked_iterate(iter::AbstractUnitRange, i = first(iter)) = i, convert(eltype(iter), i + step(iter))
+_prechecked_iterate(iter::LinearIndices, i = first(iter)) = i, i + 1
+_prechecked_iterate(iter::CartesianIndices) = first(iter), first(iter)
+function _prechecked_iterate(iter::CartesianIndices, i::CartesianIndex)
+    i′ = IteratorsMD.inc(i.I, iter.indices)
+    return i′, i′
+end
+_prechecked_iterate(iter::SCartesianIndices2) = first(iter), first(iter)
+function _prechecked_iterate(iter::SCartesianIndices2{K}, (;i, j)) where {K}
+    I = i < K ? SCartesianIndex2{K}(i + 1, j) : SCartesianIndex2{K}(1, j + 1)
+    return I, I
+end
+
 ### From abstractarray.jl: Internal multidimensional indexing definitions ###
 getindex(x::Union{Number,AbstractChar}, ::CartesianIndex{0}) = x
 getindex(t::Tuple,  i::CartesianIndex{1}) = getindex(t, i.I[1])
@@ -908,14 +1003,11 @@ function _generate_unsafe_getindex!_body(N::Int)
     quote
         @inline
         D = eachindex(dest)
-        Dy = iterate(D)
+        Dy = _prechecked_iterate(D)
         @inbounds @nloops $N j d->I[d] begin
-            # This condition is never hit, but at the moment
-            # the optimizer is not clever enough to split the union without it
-            Dy === nothing && return dest
-            (idx, state) = Dy
+            (idx, state) = Dy::NTuple{2,Any}
             dest[idx] = @ncall $N getindex src j
-            Dy = iterate(D, state)
+            Dy = _prechecked_iterate(D, state)
         end
         return dest
     end
@@ -951,14 +1043,12 @@ function _generate_unsafe_setindex!_body(N::Int)
         @nexprs $N d->(I_d = unalias(A, I[d]))
         idxlens = @ncall $N index_lengths I
         @ncall $N setindex_shape_check x′ (d->idxlens[d])
-        Xy = iterate(x′)
+        X = eachindex(x′)
+        Xy = _prechecked_iterate(X)
         @inbounds @nloops $N i d->I_d begin
-            # This is never reached, but serves as an assumption for
-            # the optimizer that it does not need to emit error paths
-            Xy === nothing && break
-            (val, state) = Xy
-            @ncall $N setindex! A val i
-            Xy = iterate(x′, state)
+            (idx, state) = Xy::NTuple{2,Any}
+            @ncall $N setindex! A x′[idx] i
+            Xy = _prechecked_iterate(X, state)
         end
         A
     end
@@ -1025,25 +1115,34 @@ end
 
 ### from abstractarray.jl
 
-# In the common case where we have two views into the same parent, aliasing checks
-# are _much_ easier and more important to get right
-function mightalias(A::SubArray{T,<:Any,P}, B::SubArray{T,<:Any,P}) where {T,P}
-    if !_parentsmatch(A.parent, B.parent)
-        # We cannot do any better than the usual dataids check
-        return !_isdisjoint(dataids(A), dataids(B))
-    end
-    # Now we know that A.parent === B.parent. This means that the indices of A
-    # and B are the same length and indexing into the same dimensions. We can
-    # just walk through them and check for overlaps: O(ndims(A)). We must finally
-    # ensure that the indices don't alias with either parent
-    return _indicesmightoverlap(A.indices, B.indices) ||
-        !_isdisjoint(dataids(A.parent), _splatmap(dataids, B.indices)) ||
-        !_isdisjoint(dataids(B.parent), _splatmap(dataids, A.indices))
+function mightalias(A::SubArray, B::SubArray)
+    # There are three ways that SubArrays might _problematically_ alias one another:
+    #   1. The parents are the same we can conservatively check if the indices might overlap OR
+    #   2. The parents alias eachother in a more complicated manner (and we can't trace indices) OR
+    #   3. One's parent is used in the other's indices
+    # Note that it's ok for just the indices to alias each other as those should not be mutated,
+    # so we can always do better than the default !_isdisjoint(dataids(A), dataids(B))
+    if isbits(A.parent) || isbits(B.parent)
+        return false # Quick out for immutables
+    elseif _parentsmatch(A.parent, B.parent)
+        # Each SubArray unaliases its own parent from its own indices upon construction, so if
+        # the two parents are the same, then by construction one cannot alias the other's indices
+        # and therefore this is the only test we need to perform:
+        return _indicesmightoverlap(A.indices, B.indices)
+    else
+        A_parent_ids = dataids(A.parent)
+        B_parent_ids = dataids(B.parent)
+        return !_isdisjoint(A_parent_ids, B_parent_ids) ||
+            !_isdisjoint(A_parent_ids, _splatmap(dataids, B.indices)) ||
+            !_isdisjoint(B_parent_ids, _splatmap(dataids, A.indices))
+    end
 end
+# Test if two arrays are backed by exactly the same memory in exactly the same order
 _parentsmatch(A::AbstractArray, B::AbstractArray) = A === B
-# Two reshape(::Array)s of the same size aren't `===` because they have different headers
-_parentsmatch(A::Array, B::Array) = pointer(A) == pointer(B) && size(A) == size(B)
+_parentsmatch(A::DenseArray, B::DenseArray) = elsize(A) == elsize(B) && pointer(A) == pointer(B) && size(A) == size(B)
+_parentsmatch(A::StridedArray, B::StridedArray) = elsize(A) == elsize(B) && pointer(A) == pointer(B) && strides(A) == strides(B)
 
+# Given two SubArrays with the same parent, check if the indices might overlap (returning true if unsure)
 _indicesmightoverlap(A::Tuple{}, B::Tuple{}) = true
 _indicesmightoverlap(A::Tuple{}, B::Tuple) = error("malformed subarray")
 _indicesmightoverlap(A::Tuple, B::Tuple{}) = error("malformed subarray")
@@ -1076,46 +1175,6 @@ end
 # And in general, checking the intersection is too much work
 _indicesmightoverlap(A::Tuple{Any, Vararg{Any}}, B::Tuple{Any, Vararg{Any}}) = true
 
-"""
-    fill!(A, x)
-
-Fill array `A` with the value `x`. If `x` is an object reference, all elements will refer to
-the same object. `fill!(A, Foo())` will return `A` filled with the result of evaluating
-`Foo()` once.
-
-# Examples
-```jldoctest
-julia> A = zeros(2,3)
-2×3 Matrix{Float64}:
- 0.0  0.0  0.0
- 0.0  0.0  0.0
-
-julia> fill!(A, 2.)
-2×3 Matrix{Float64}:
- 2.0  2.0  2.0
- 2.0  2.0  2.0
-
-julia> a = [1, 1, 1]; A = fill!(Vector{Vector{Int}}(undef, 3), a); a[1] = 2; A
-3-element Vector{Vector{Int64}}:
- [2, 1, 1]
- [2, 1, 1]
- [2, 1, 1]
-
-julia> x = 0; f() = (global x += 1; x); fill!(Vector{Int}(undef, 3), f())
-3-element Vector{Int64}:
- 1
- 1
- 1
-```
-"""
-function fill!(A::AbstractArray{T}, x) where T
-    xT = convert(T, x)
-    for I in eachindex(A)
-        @inbounds A[I] = xT
-    end
-    A
-end
-
 function copyto!(dest::AbstractArray{T1,N}, Rdest::CartesianIndices{N},
                   src::AbstractArray{T2,N}, Rsrc::CartesianIndices{N}) where {T1,T2,N}
     isempty(Rdest) && return dest
@@ -1179,8 +1238,7 @@ circshift!(dest::AbstractArray, src, ::Tuple{}) = copyto!(dest, src)
 Circularly shift, i.e. rotate, the data in `src`, storing the result in
 `dest`. `shifts` specifies the amount to shift in each dimension.
 
-The `dest` array must be distinct from the `src` array (they cannot
-alias each other).
+$(_DOCS_ALIASING_WARNING)
 
 See also [`circshift`](@ref).
 """
@@ -1238,21 +1296,23 @@ their indices; any offset results in a (circular) wraparound. If the
 arrays have overlapping indices, then on the domain of the overlap
 `dest` agrees with `src`.
 
+$(_DOCS_ALIASING_WARNING)
+
 See also: [`circshift`](@ref).
 
 # Examples
 ```julia-repl
 julia> src = reshape(Vector(1:16), (4,4))
-4×4 Array{Int64,2}:
+4×4 Matrix{Int64}:
  1  5   9  13
  2  6  10  14
  3  7  11  15
  4  8  12  16
 
-julia> dest = OffsetArray{Int}(undef, (0:3,2:5))
+julia> dest = OffsetArray{Int}(undef, (0:3,2:5));
 
 julia> circcopy!(dest, src)
-OffsetArrays.OffsetArray{Int64,2,Array{Int64,2}} with indices 0:3×2:5:
+4×4 OffsetArray(::Matrix{Int64}, 0:3, 2:5) with eltype Int64 with indices 0:3×2:5:
  8  12  16  4
  5   9  13  1
  6  10  14  2
@@ -1441,7 +1501,7 @@ end
 # contiguous multidimensional indexing: if the first dimension is a range,
 # we can get some performance from using copy_chunks!
 
-@inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray}, J0::Union{Colon,AbstractUnitRange{Int}})
+@inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray}, J0::D) where {D<:Union{Colon,AbstractUnitRange{Int}}}
     I0 = to_indices(B, (J0,))[1]
     @boundscheck checkbounds(B, I0)
     l0 = length(I0)
@@ -1453,7 +1513,7 @@ end
 end
 
 @inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray},
-        I0::Union{Colon,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Colon}...)
+        I0::DI0, I::Union{Int,AbstractUnitRange{Int},Colon}...) where {DI0<:Union{Colon,AbstractUnitRange{Int}}, }
     J = to_indices(B, (I0, I...))
     @boundscheck checkbounds(B, J...)
     _unsafe_setindex!(B, X, J...)
@@ -1494,7 +1554,7 @@ end
 end
 
 @propagate_inbounds function setindex!(B::BitArray, X::AbstractArray,
-        I0::Union{Colon,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Colon}...)
+        I0::DI0, I::Union{Int,AbstractUnitRange{Int},Colon}...) where {DI0<:Union{Colon,AbstractUnitRange{Int}}}
     _setindex!(IndexStyle(B), B, X, to_indices(B, (I0, I...))...)
 end
 
@@ -1562,19 +1622,23 @@ end
     end
 end
 
-isassigned(a::AbstractArray, i::CartesianIndex) = isassigned(a, Tuple(i)...)
-function isassigned(A::AbstractArray, i::Union{Integer, CartesianIndex}...)
-    isa(i, Tuple{Vararg{Int}}) || return isassigned(A, CartesianIndex(i...))
-    @boundscheck checkbounds(Bool, A, i...) || return false
+@propagate_inbounds isassigned(A::AbstractArray, i::CartesianIndex) = isassigned(A, Tuple(i)...)
+@propagate_inbounds function isassigned(A::AbstractArray, i::Union{Integer, CartesianIndex}...)
+    return isassigned(A, CartesianIndex(to_indices(A, i)))
+end
+@inline function isassigned(A::AbstractArray, i::Integer...)
+    # convert to valid indices, checking for Bool
+    inds = to_indices(A, i)
+    @boundscheck checkbounds(Bool, A, inds...) || return false
     S = IndexStyle(A)
-    ninds = length(i)
+    ninds = length(inds)
     if (isa(S, IndexLinear) && ninds != 1)
-        return @inbounds isassigned(A, _to_linear_index(A, i...))
+        return @inbounds isassigned(A, _to_linear_index(A, inds...))
     elseif (!isa(S, IndexLinear) && ninds != ndims(A))
-        return @inbounds isassigned(A, _to_subscript_indices(A, i...)...)
+        return @inbounds isassigned(A, _to_subscript_indices(A, inds...)...)
     else
        try
-            A[i...]
+            A[inds...]
             true
         catch e
             if isa(e, BoundsError) || isa(e, UndefRefError)
@@ -1599,12 +1663,11 @@ function permutedims(B::StridedArray, perm)
     permutedims!(P, B, perm)
 end
 
-function checkdims_perm(P::AbstractArray{TP,N}, B::AbstractArray{TB,N}, perm) where {TP,TB,N}
-    indsB = axes(B)
-    length(perm) == N || throw(ArgumentError("expected permutation of size $N, but length(perm)=$(length(perm))"))
+checkdims_perm(P::AbstractArray{TP,N}, B::AbstractArray{TB,N}, perm) where {TP,TB,N} = checkdims_perm(axes(P), axes(B), perm)
+function checkdims_perm(indsP::NTuple{N, AbstractUnitRange}, indsB::NTuple{N, AbstractUnitRange}, perm) where {N}
+    length(perm) == N || throw(ArgumentError(LazyString("expected permutation of size ", N, ", but length(perm)=", length(perm))))
     isperm(perm) || throw(ArgumentError("input is not a permutation"))
-    indsP = axes(P)
-    for i = 1:length(perm)
+    for i in eachindex(perm)
         indsP[i] == indsB[perm[i]] || throw(DimensionMismatch("destination tensor of incorrect size"))
     end
     nothing
@@ -1613,21 +1676,21 @@ end
 for (V, PT, BT) in Any[((:N,), BitArray, BitArray), ((:T,:N), Array, StridedArray)]
     @eval @generated function permutedims!(P::$PT{$(V...)}, B::$BT{$(V...)}, perm) where $(V...)
         quote
-            checkdims_perm(P, B, perm)
+            checkdims_perm(axes(P), axes(B), perm)
 
             #calculates all the strides
             native_strides = size_to_strides(1, size(B)...)
-            strides_1 = 0
-            @nexprs $N d->(strides_{d+1} = native_strides[perm[d]])
+            strides = @ntuple $N d->native_strides[perm[d]]
+            strides::NTuple{$N,Integer}
 
             #Creates offset, because indexing starts at 1
-            offset = 1 - sum(@ntuple $N d->strides_{d+1})
+            offset = 1 - reduce(+, strides, init = 0)
 
             sumc = 0
             ind = 1
             @nloops($N, i, P,
-                    d->(sumc += i_d*strides_{d+1}), # PRE
-                    d->(sumc -= i_d*strides_{d+1}), # POST
+                    d->(sumc += i_d*strides[d]), # PRE
+                    d->(sumc -= i_d*strides[d]), # POST
                     begin # BODY
                         @inbounds P[ind] = B[sumc+offset]
                         ind += 1
@@ -1686,7 +1749,7 @@ julia> unique(A, dims=3)
  0  0
 ```
 """
-unique(A::AbstractArray; dims::Union{Colon,Integer} = :) = _unique_dims(A, dims)
+unique(A::AbstractArray; dims::D = :) where {D<:Union{Colon,Integer}} = _unique_dims(A, dims)
 
 _unique_dims(A::AbstractArray, dims::Colon) = invoke(unique, Tuple{Any}, A)
 
@@ -1843,7 +1906,7 @@ but the result order will be row-major instead.
 
 # Higher dimensional examples
 ```
-julia> A = permutedims(reshape([4 3; 2 1; 'A' 'B'; 'C' 'D'], (2, 2, 2)), (1, 3, 2))
+julia> A = [4 3; 2 1 ;;; 'A' 'B'; 'C' 'D']
 2×2×2 Array{Any, 3}:
 [:, :, 1] =
  4  3
@@ -1892,41 +1955,156 @@ julia> sortslices(reshape([5; 4; 3; 2; 1], (1,1,5)), dims=3, by=x->x[1,1])
 ```
 """
 function sortslices(A::AbstractArray; dims::Union{Integer, Tuple{Vararg{Integer}}}, kws...)
-    _sortslices(A, Val{dims}(); kws...)
+    if A isa Matrix && dims isa Integer && dims == 1
+        # TODO: remove once the generic version becomes as fast or faster
+        perm = sortperm(eachslice(A; dims); kws...)
+        return A[perm, :]
+    end
+
+    B = similar(A)
+    _sortslices!(B, A, Val{dims}(); kws...)
+    B
 end
 
-# Works around inference's lack of ability to recognize partial constness
-struct DimSelector{dims, T}
-    A::T
+function _sortslices!(B, A, ::Val{dims}; kws...) where dims
+    ves = vec(eachslice(A; dims))
+    perm = sortperm(ves; kws...)
+    bes = eachslice(B; dims)
+
+    # TODO for further optimization: traverse in memory order
+    for (slice, i) in zip(eachslice(B; dims), perm)
+        slice .= ves[i]
+    end
 end
-DimSelector{dims}(x::T) where {dims, T} = DimSelector{dims, T}(x)
-(ds::DimSelector{dims, T})(i) where {dims, T} = i in dims ? axes(ds.A, i) : (:,)
 
-_negdims(n, dims) = filter(i->!(i in dims), 1:n)
+getindex(b::Ref, ::CartesianIndex{0}) = getindex(b)
+setindex!(b::Ref, x, ::CartesianIndex{0}) = setindex!(b, x)
+
+## hashing AbstractArray ## can't be put in abstractarray.jl due to bootstrapping problems with the use of @nexpr
+
+function _hash_fib(A, h::UInt)
+    # Goal: Hash approximately log(N) entries with a higher density of hashed elements
+    # weighted towards the end and special consideration for repeated values. Colliding
+    # hashes will often subsequently be compared by equality -- and equality between arrays
+    # works elementwise forwards and is short-circuiting. This means that a collision
+    # between arrays that differ by elements at the beginning is cheaper than one where the
+    # difference is towards the end. Furthermore, choosing `log(N)` arbitrary entries from a
+    # sparse array will likely only choose the same element repeatedly (zero in this case).
+
+    # To achieve this, we work backwards, starting by hashing the last element of the
+    # array. After hashing each element, we skip `fibskip` elements, where `fibskip`
+    # is pulled from the Fibonacci sequence -- Fibonacci was chosen as a simple
+    # ~O(log(N)) algorithm that ensures we don't hit a common divisor of a dimension
+    # and only end up hashing one slice of the array (as might happen with powers of
+    # two). Finally, we find the next distinct value from the one we just hashed.
+
+    # This is a little tricky since skipping an integer number of values inherently works
+    # with linear indices, but `findprev` uses `keys`. Hoist out the conversion "maps":
+    ks = keys(A)
+    key_to_linear = LinearIndices(ks) # Index into this map to compute the linear index
+    linear_to_key = vec(ks)           # And vice-versa
+
+    # Start at the last index
+    keyidx = last(ks)
+    linidx = key_to_linear[keyidx]
+    fibskip = prevfibskip = oneunit(linidx)
+    first_linear = first(LinearIndices(linear_to_key))
+    @nexprs 4 i -> p_i = h
+
+    n = 0
+    while true
+        n += 1
+        # Hash the element
+        elt = A[keyidx]
+
+        stream_idx = mod1(n, 4)
+        @nexprs 4 i -> stream_idx == i && (p_i = hash_mix_linear(hash(keyidx, p_i), hash(elt, p_i)))
+
+        # Skip backwards a Fibonacci number of indices -- this is a linear index operation
+        linidx = key_to_linear[keyidx]
+        linidx < fibskip + first_linear && break
+        linidx -= fibskip
+        keyidx = linear_to_key[linidx]
+
+        # Only increase the Fibonacci skip once every N iterations. This was chosen
+        # to be big enough that all elements of small arrays get hashed while
+        # obscenely large arrays are still tractable. With a choice of N=4096, an
+        # entirely-distinct 8000-element array will have ~75% of its elements hashed,
+        # with every other element hashed in the first half of the array. At the same
+        # time, hashing a `typemax(Int64)`-length Float64 range takes about a second.
+        if rem(n, 4096) == 0
+            fibskip, prevfibskip = fibskip + prevfibskip, fibskip
+        end
+
+        # Find a key index with a value distinct from `elt` -- might be `keyidx` itself
+        keyidx = findprev(!isequal(elt), A, keyidx)
+        keyidx === nothing && break
+    end
 
-function compute_itspace(A, ::Val{dims}) where {dims}
-    negdims = _negdims(ndims(A), dims)
-    axs = Iterators.product(ntuple(DimSelector{dims}(A), ndims(A))...)
-    vec(permutedims(collect(axs), (dims..., negdims...)))
+    @nexprs 4 i -> h = hash_mix_linear(p_i, h)
+    return hash_uint(h)
 end
 
-function _sortslices(A::AbstractArray, d::Val{dims}; kws...) where dims
-    itspace = compute_itspace(A, d)
-    vecs = map(its->view(A, its...), itspace)
-    p = sortperm(vecs; kws...)
-    if ndims(A) == 2 && isa(dims, Integer) && isa(A, Array)
-        # At the moment, the performance of the generic version is subpar
-        # (about 5x slower). Hardcode a fast-path until we're able to
-        # optimize this.
-        return dims == 1 ? A[p, :] : A[:, p]
+"""
+    union_split(f, x, ts::Tuple{Vararg{Val}}, args...)
+
+call `f(x, args...)`, union-splitting on all the types specified by `ts`
+
+`union_split(f, x, (Val{T1}(), Val{T2}()), y, z)` is equivalent to
+
+```
+if x isa T1
+    f(x, y, z)
+elseif x isa T2
+    f(x, y, z)
+else
+    f(x, y, z)
+end
+```
+"""
+@inline function union_split(f, @nospecialize(x), ts::Tuple{Val{T}, Vararg{Val,N}}, args...) where {T, N}
+    if x isa T
+        f(x, args...)
     else
-        B = similar(A)
-        for (x, its) in zip(p, itspace)
-            B[its...] = vecs[x]
+        union_split(f, x, Base.tail(ts), args...)
+    end
+end
+@inline union_split(f, x, ::Tuple{}, args::Vararg{Any, N}) where {N} = f(x, args...)
+
+function hash_shaped(A, h0::UInt, eltype_hint=())
+    h::UInt = h0
+    # Axes are themselves AbstractArrays, so hashing them directly would stack overflow
+    # Instead hash the tuple of firsts and lasts along each dimension
+    h = hash(map(first, axes(A)), h)
+    h = hash(map(last, axes(A)), h)
+    len = length(A)
+
+    if len < 8
+        # for the shortest arrays we chain directly
+        for elt in A
+            h = union_split(hash, elt, eltype_hint, h)
         end
-        B
+        return h
+    elseif len < 32768
+        # separate accumulator streams, unrolled
+        @nexprs 8 i -> p_i::UInt = h
+        n  = 1
+        limit = len - 7
+        while n <= limit
+            @nexprs 8 i -> p_i = union_split(hash, A[n + i - 1], eltype_hint, p_i)
+            n += 8
+        end
+        while n <= len
+            p_1 = union_split(hash, A[n], eltype_hint, p_1)
+            n += 1
+        end
+        # fold all streams back together
+        @nexprs 8 i -> h = hash_mix_linear(p_i, h)
+        return hash_uint(h)
+    else
+        return _hash_fib(A, h)
     end
 end
 
-getindex(b::Ref, ::CartesianIndex{0}) = getindex(b)
-setindex!(b::Ref, x, ::CartesianIndex{0}) = setindex!(b, x)
+const hash_abstractarray_seed = UInt === UInt64 ? 0x7e2d6fb6448beb77 : 0xd4514ce5
+hash(A::AbstractArray, h::UInt) = hash_shaped(A, h ⊻ hash_abstractarray_seed)
diff --git a/base/multimedia.jl b/base/multimedia.jl
index e634a19b7d6aa..323fa044f7d0f 100644
--- a/base/multimedia.jl
+++ b/base/multimedia.jl
@@ -229,8 +229,8 @@ display(d::AbstractDisplay, mime::AbstractString, @nospecialize x) = display(d,
 display(mime::AbstractString, @nospecialize x) = display(MIME(mime), x)
 
 """
-    displayable(mime) -> Bool
-    displayable(d::AbstractDisplay, mime) -> Bool
+    displayable(mime)::Bool
+    displayable(d::AbstractDisplay, mime)::Bool
 
 Return a boolean value indicating whether the given `mime` type (string) is displayable by
 any of the displays in the current display stack, or specifically by the display `d` in the
@@ -372,7 +372,7 @@ function displayable(m::MIME)
 end
 
 ###########################################################################
-# The redisplay method can be overridden by a AbstractDisplay in order to
+# The redisplay method can be overridden by an AbstractDisplay in order to
 # update an existing display (instead of, for example, opening a new
 # window), and is used by the IJulia interface to defer display
 # until the next interactive prompt.  This is especially useful
diff --git a/base/multinverses.jl b/base/multinverses.jl
index 21d8e53d2ff83..7ce082268bfcf 100644
--- a/base/multinverses.jl
+++ b/base/multinverses.jl
@@ -2,7 +2,7 @@
 
 module MultiplicativeInverses
 
-import Base: div, divrem, rem, unsigned
+import Base: div, divrem, mul_hi, rem, unsigned, mod
 using  Base: IndexLinear, IndexCartesian, tail
 export multiplicativeinverse
 
@@ -28,7 +28,7 @@ abstract type  MultiplicativeInverse{T} <: Number end
 # Division of Int32 by 3:
 #   floor((2^32+2)/3 * n/2^32) = floor(n/3 + 2n/(3*2^32))
 # The correction term, 2n/(3*2^32), is strictly less than 1/3 for any
-# nonnegative n::Int32, so this divides any nonnegative Int32 by 3.
+# non-negative n::Int32, so this divides any non-negative Int32 by 3.
 # (When n < 0, we add 1, and one can show that this computes
 # ceil(n/d) = -floor(abs(n)/d).)
 #
@@ -134,33 +134,13 @@ struct UnsignedMultiplicativeInverse{T<:Unsigned} <: MultiplicativeInverse{T}
 end
 UnsignedMultiplicativeInverse(x::Unsigned) = UnsignedMultiplicativeInverse{typeof(x)}(x)
 
-# Returns the higher half of the product a*b
-function _mul_high(a::T, b::T) where {T<:Union{Signed, Unsigned}}
-    ((widen(a)*b) >>> (sizeof(a)*8)) % T
-end
-
-function _mul_high(a::UInt128, b::UInt128)
-    shift = sizeof(a)*4
-    mask = typemax(UInt128) >> shift
-    a1, a2 = a >>> shift, a & mask
-    b1, b2 = b >>> shift, b & mask
-    a1b1, a1b2, a2b1, a2b2 = a1*b1, a1*b2, a2*b1, a2*b2
-    carry = ((a1b2 & mask) + (a2b1 & mask) + (a2b2 >>> shift)) >>> shift
-    a1b1 + (a1b2 >>> shift) + (a2b1 >>> shift) + carry
-end
-function _mul_high(a::Int128, b::Int128)
-    shift = sizeof(a)*8 - 1
-    t1, t2 = (a >> shift) & b % UInt128, (b >> shift) & a % UInt128
-    (_mul_high(a % UInt128, b % UInt128) - t1 - t2) % Int128
-end
-
 function div(a::T, b::SignedMultiplicativeInverse{T}) where T
-    x = _mul_high(a, b.multiplier)
+    x = mul_hi(a, b.multiplier)
     x += (a*b.addmul) % T
     ifelse(abs(b.divisor) == 1, a*b.divisor, (signbit(x) + (x >> b.shift)) % T)
 end
 function div(a::T, b::UnsignedMultiplicativeInverse{T}) where T
-    x = _mul_high(a, b.multiplier)
+    x = mul_hi(a, b.multiplier)
     x = ifelse(b.add, convert(T, convert(T, (convert(T, a - x) >>> 1)) + x), x)
     ifelse(b.divisor == 1, a, x >>> b.shift)
 end
@@ -173,6 +153,13 @@ function divrem(a::T, b::MultiplicativeInverse{T}) where T
     (d, a - d*b.divisor)
 end
 
+mod(a::T, b::UnsignedMultiplicativeInverse{T}) where {T} = rem(a, b)
+
+function mod(a::T, b::SignedMultiplicativeInverse{T}) where {T}
+    r = rem(a, b)
+    return (iszero(r) || signbit(r) == signbit(b.divisor)) ? r : r + b.divisor
+end
+
 multiplicativeinverse(x::Signed) = SignedMultiplicativeInverse(x)
 multiplicativeinverse(x::Unsigned) = UnsignedMultiplicativeInverse(x)
 
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index e489508bc55ea..37f3a3ef8436b 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+import Core: NamedTuple
+
 """
     NamedTuple
 
@@ -110,26 +112,24 @@ julia> (; t.x)
 """
 Core.NamedTuple
 
-if nameof(@__MODULE__) === :Base
-
-@eval function NamedTuple{names,T}(args::Tuple) where {names, T <: Tuple}
+@eval function (NT::Type{NamedTuple{names,T}})(args::Tuple) where {names, T <: Tuple}
     if length(args) != length(names::Tuple)
         throw(ArgumentError("Wrong number of arguments to named tuple constructor."))
     end
     # Note T(args) might not return something of type T; e.g.
     # Tuple{Type{Float64}}((Float64,)) returns a Tuple{DataType}
-    $(Expr(:splatnew, :(NamedTuple{names,T}), :(T(args))))
+    $(Expr(:splatnew, :NT, :(T(args))))
 end
 
-function NamedTuple{names, T}(nt::NamedTuple) where {names, T <: Tuple}
+function (NT::Type{NamedTuple{names, T}})(nt::NamedTuple) where {names, T <: Tuple}
     if @generated
-        Expr(:new, :(NamedTuple{names, T}),
-             Any[ :(let Tn = fieldtype(T, $n),
+        Expr(:new, :NT,
+             Any[ :(let Tn = fieldtype(NT, $n),
                       ntn = getfield(nt, $(QuoteNode(names[n])))
                       ntn isa Tn ? ntn : convert(Tn, ntn)
                   end) for n in 1:length(names) ]...)
     else
-        NamedTuple{names, T}(map(Fix1(getfield, nt), names))
+        NT(map(Fix1(getfield, nt), names))
     end
 end
 
@@ -145,16 +145,11 @@ function NamedTuple{names}(nt::NamedTuple) where {names}
     end
 end
 
-NamedTuple{names, T}(itr) where {names, T <: Tuple} = NamedTuple{names, T}(T(itr))
-NamedTuple{names}(itr) where {names} = NamedTuple{names}(Tuple(itr))
+(NT::Type{NamedTuple{names, T}})(itr) where {names, T <: Tuple} = NT(T(itr))
+(NT::Type{NamedTuple{names}})(itr) where {names} = NT(Tuple(itr))
 
 NamedTuple(itr) = (; itr...)
 
-# avoids invalidating Union{}(...)
-NamedTuple{names, Union{}}(itr::Tuple) where {names} = throw(MethodError(NamedTuple{names, Union{}}, (itr,)))
-
-end # if Base
-
 # Like NamedTuple{names, T} as a constructor, but omits the additional
 # `convert` call, when the types are known to match the fields
 @eval function _new_NamedTuple(T::Type{NamedTuple{NTN, NTT}} where {NTN, NTT}, args::Tuple)
@@ -182,25 +177,24 @@ nextind(@nospecialize(t::NamedTuple), i::Integer) = Int(i)+1
 
 convert(::Type{NT}, nt::NT) where {names, NT<:NamedTuple{names}} = nt
 convert(::Type{NT}, nt::NT) where {names, T<:Tuple, NT<:NamedTuple{names,T}} = nt
+convert(::Type{NT}, t::Tuple) where {NT<:NamedTuple} = (@inline NT(t))::NT
 
-function convert(::Type{NT}, nt::NamedTuple{names}) where {names, T<:Tuple, NT<:NamedTuple{names,T}}
-    if !@isdefined T
-        # converting abstract NT to an abstract Tuple type, to a concrete NT1, is not straightforward, so this could just be an error, but we define it anyways
-        # _tuple_error(NT, nt)
-        T1 = Tuple{ntuple(i -> fieldtype(NT, i), Val(length(names)))...}
-        NT1 = NamedTuple{names, T1}
-    else
-        T1 = T
-        NT1 = NT
-    end
-    return NT1(T1(nt))::NT1::NT
+function convert(::Type{NamedTuple{names,T}}, nt::NamedTuple{names}) where {names,T<:Tuple}
+    NT = NamedTuple{names,T}
+    (@inline NT(nt))::NT
 end
 
-if nameof(@__MODULE__) === :Base
-    Tuple(nt::NamedTuple) = (nt...,)
-    (::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)
+function convert(::Type{NT}, nt::NamedTuple{names}) where {names, NT<:NamedTuple{names}}
+    # converting abstract NT to an abstract Tuple type, to a concrete NT1, is not straightforward, so this could just be an error, but we define it anyways
+    # _tuple_error(NT, nt)
+    T1 = Tuple{ntuple(i -> fieldtype(NT, i), Val(length(names)))...}
+    NT1 = NamedTuple{names, T1}
+    return NT1(T1(nt))::NT1::NT
 end
 
+Tuple(nt::NamedTuple) = (nt...,)
+(::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)
+
 function show(io::IO, t::NamedTuple)
     n = nfields(t)
     for i = 1:n
@@ -269,8 +263,11 @@ function map(f, nt::NamedTuple{names}, nts::NamedTuple...) where names
     NamedTuple{names}(map(f, map(Tuple, (nt, nts...))...))
 end
 
-@assume_effects :total function merge_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
-    @nospecialize an bn
+filter(f, xs::NamedTuple) = xs[filter(k -> f(xs[k]), keys(xs))]
+
+function merge_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_total_meta
     names = Symbol[an...]
     for n in bn
         if !sym_in(n, an)
@@ -280,18 +277,21 @@ end
     (names...,)
 end
 
-@assume_effects :total function merge_types(names::Tuple{Vararg{Symbol}}, a::Type{<:NamedTuple}, b::Type{<:NamedTuple})
-    @nospecialize names a b
+function merge_types(names::Tuple{Vararg{Symbol}}, a::Type{<:NamedTuple}, b::Type{<:NamedTuple})
+    @nospecialize
+    @_total_meta
     bn = _nt_names(b)
     return Tuple{Any[ fieldtype(sym_in(names[n], bn) ? b : a, names[n]) for n in 1:length(names) ]...}
 end
 
-@assume_effects :foldable function merge_fallback(@nospecialize(a::NamedTuple), @nospecialize(b::NamedTuple),
-        @nospecialize(an::Tuple{Vararg{Symbol}}), @nospecialize(bn::Tuple{Vararg{Symbol}}))
+function merge_fallback(a::NamedTuple, b::NamedTuple,
+                        an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_foldable_meta
     names = merge_names(an, bn)
     types = merge_types(names, typeof(a), typeof(b))
     n = length(names)
-    A = Vector{Any}(undef, n)
+    A = Memory{Any}(undef, n)
     for i=1:n
         n = names[i]
         A[i] = getfield(sym_in(n, bn) ? b : a, n)
@@ -299,6 +299,10 @@ end
     _new_NamedTuple(NamedTuple{names, types}, (A...,))
 end
 
+# This is `Experimental.@max_methods 4 function merge end`, which is not
+# defined at this point in bootstrap.
+typeof(function merge end).name.max_methods = UInt8(4)
+
 """
     merge(a::NamedTuple, bs::NamedTuple...)
 
@@ -339,7 +343,7 @@ merge(a::NamedTuple,     b::NamedTuple{()}) = a
 merge(a::NamedTuple{()}, b::NamedTuple{()}) = a
 merge(a::NamedTuple{()}, b::NamedTuple)     = b
 
-merge(a::NamedTuple, b::Iterators.Pairs{<:Any,<:Any,<:Any,<:NamedTuple}) = merge(a, getfield(b, :data))
+merge(a::NamedTuple, b::Iterators.Pairs{<:Any,<:Any,Nothing,<:NamedTuple}) = merge(a, getfield(b, :data))
 
 merge(a::NamedTuple, b::Iterators.Zip{<:Tuple{Any,Any}}) = merge(a, NamedTuple{Tuple(b.is[1])}(b.is[2]))
 
@@ -384,8 +388,9 @@ tail(t::NamedTuple{names}) where names = NamedTuple{tail(names::Tuple)}(t)
 front(t::NamedTuple{names}) where names = NamedTuple{front(names::Tuple)}(t)
 reverse(nt::NamedTuple) = NamedTuple{reverse(keys(nt))}(reverse(values(nt)))
 
-@assume_effects :total function diff_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
-    @nospecialize an bn
+function diff_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_total_meta
     names = Symbol[]
     for n in an
         if !sym_in(n, bn)
@@ -395,16 +400,20 @@ reverse(nt::NamedTuple) = NamedTuple{reverse(keys(nt))}(reverse(values(nt)))
     (names...,)
 end
 
-@assume_effects :foldable function diff_types(@nospecialize(a::NamedTuple), @nospecialize(names::Tuple{Vararg{Symbol}}))
+function diff_types(a::NamedTuple, names::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_foldable_meta
     return Tuple{Any[ fieldtype(typeof(a), names[n]) for n in 1:length(names) ]...}
 end
 
-@assume_effects :foldable function diff_fallback(@nospecialize(a::NamedTuple), @nospecialize(an::Tuple{Vararg{Symbol}}), @nospecialize(bn::Tuple{Vararg{Symbol}}))
+function diff_fallback(a::NamedTuple, an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_foldable_meta
     names = diff_names(an, bn)
     isempty(names) && return (;)
     types = diff_types(a, names)
     n = length(names)
-    A = Vector{Any}(undef, n)
+    A = Memory{Any}(undef, n)
     for i=1:n
         n = names[i]
         A[i] = getfield(a, n)
@@ -412,6 +421,24 @@ end
     _new_NamedTuple(NamedTuple{names, types}, (A...,))
 end
 
+"""
+    delete(a::NamedTuple, field::Symbol)
+
+Construct a new named tuple from `a` by removing the named field.
+
+```jldoctest
+julia> Base.delete((a=1, b=2, c=3), :a)
+(b = 2, c = 3)
+
+julia> Base.delete((a=1, b=2, c=3), :b)
+(a = 1, c = 3)
+```
+"""
+@constprop :aggressive function delete(a::NamedTuple{an}, field::Symbol) where {an}
+    names = diff_names(an, (field,))
+    NamedTuple{names}(a)
+end
+
 """
     structdiff(a::NamedTuple, b::Union{NamedTuple,Type{NamedTuple}})
 
@@ -508,10 +535,11 @@ when it is printed in the stack trace view.
 
 ```julia
 julia> @Kwargs{init::Int} # the internal representation of keyword arguments
-Base.Pairs{Symbol, Int64, Tuple{Symbol}, @NamedTuple{init::Int64}}
+Base.Pairs{Symbol, Int64, Nothing, @NamedTuple{init::Int64}}
 
 julia> sum("julia"; init=1)
 ERROR: MethodError: no method matching +(::Char, ::Char)
+The function `+` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   +(::Any, ::Any, ::Any, ::Any...)
@@ -550,7 +578,7 @@ Stacktrace:
 macro Kwargs(ex)
     return :(let
         NT = @NamedTuple $ex
-        Base.Pairs{keytype(NT),eltype(NT),typeof(NT.parameters[1]),NT}
+        Base.Pairs{keytype(NT),eltype(NT),Nothing,NT}
     end)
 end
 
diff --git a/base/ntuple.jl b/base/ntuple.jl
index 7391b86154ac4..fa0a3821d7047 100644
--- a/base/ntuple.jl
+++ b/base/ntuple.jl
@@ -3,7 +3,7 @@
 # `ntuple`, for constructing tuples of a given length
 
 """
-    ntuple(f::Function, n::Integer)
+    ntuple(f, n::Integer)
 
 Create a tuple of length `n`, computing each element as `f(i)`,
 where `i` is the index of the element.
@@ -14,7 +14,7 @@ julia> ntuple(i -> 2*i, 4)
 (2, 4, 6, 8)
 ```
 """
-@inline function ntuple(f::F, n::Integer) where F
+@inline function ntuple(f::F, n::Int) where F
     # marked inline since this benefits from constant propagation of `n`
     t = n == 0  ? () :
         n == 1  ? (f(1),) :
@@ -30,8 +30,10 @@ julia> ntuple(i -> 2*i, 4)
         _ntuple(f, n)
     return t
 end
+ntuple(f::F, n::Integer) where F = ntuple(f, convert(Int, n)::Int)
 
-function _ntuple(f::F, n) where F
+# `n` should always be an Int (#55790)
+function _ntuple(f::F, n::Int) where F
     @noinline
     (n >= 0) || throw(ArgumentError(LazyString("tuple length should be ≥ 0, got ", n)))
     ([f(i) for i = 1:n]...,)
@@ -43,12 +45,6 @@ function ntupleany(f, n)
     (Any[f(i) for i = 1:n]...,)
 end
 
-# inferable ntuple (enough for bootstrapping)
-ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@inline; (f(1),))
-ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
-
 """
     ntuple(f, ::Val{N})
 
@@ -72,7 +68,7 @@ julia> ntuple(i -> 2*i, Val(4))
     if @generated
         :(@ntuple $N i -> f(i))
     else
-        Tuple(f(i) for i = 1:N)
+        Tuple(f(i) for i = 1:(N::Int))
     end
 end
 
@@ -88,3 +84,11 @@ end
         (t..., fill(val, N-M)...)
     end
 end
+
+
+# Specialized extensions for NTuple
+function reverse(t::NTuple{N}) where N
+    ntuple(Val{N}()) do i
+        t[end+1-i]
+    end::typeof(t)
+end
diff --git a/base/number.jl b/base/number.jl
index 923fc907d4038..8314c546147c7 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -7,7 +7,7 @@ convert(::Type{T}, x::T)      where {T<:Number} = x
 convert(::Type{T}, x::Number) where {T<:Number} = T(x)::T
 
 """
-    isinteger(x) -> Bool
+    isinteger(x)::Bool
 
 Test whether `x` is numerically equal to some integer.
 
@@ -62,7 +62,7 @@ true
 isone(x) = x == one(x) # fallback method
 
 """
-    isfinite(f) -> Bool
+    isfinite(f)::Bool
 
 Test whether a number is finite.
 
@@ -136,6 +136,50 @@ true
 """
 signbit(x::Real) = x < 0
 
+"""
+    ispositive(x)
+
+Test whether `x > 0`. See also [`isnegative`](@ref).
+
+!!! compat "Julia 1.13"
+    This function requires at least Julia 1.13.
+
+# Examples
+```jldoctest
+julia> ispositive(-4.0)
+false
+
+julia> ispositive(99)
+true
+
+julia> ispositive(0.0)
+false
+```
+"""
+ispositive(x::Real) = x > 0
+
+"""
+    isnegative(x)
+
+Test whether `x < 0`. See also [`ispositive`](@ref).
+
+!!! compat "Julia 1.13"
+    This function requires at least Julia 1.13.
+
+# Examples
+```jldoctest
+julia> isnegative(-4.0)
+true
+
+julia> isnegative(99)
+false
+
+julia> isnegative(-0.0)
+false
+```
+"""
+isnegative(x::Real) = x < 0
+
 """
     sign(x)
 
@@ -287,7 +331,12 @@ map(f, x::Number, ys::Number...) = f(x, ys...)
     zero(x)
     zero(::Type)
 
-Get the additive identity element for the type of `x` (`x` can also specify the type itself).
+Get the additive identity element for `x`. If the additive identity can be deduced
+from the type alone, then a type may be given as an argument to `zero`.
+
+For example, `zero(Int)` will work because the additive identity is the same for all
+instances of `Int`, but `zero(Vector{Int})` is not defined because vectors of different
+lengths have different additive identities.
 
 See also [`iszero`](@ref), [`one`](@ref), [`oneunit`](@ref), [`oftype`](@ref).
 
@@ -311,12 +360,15 @@ zero(::Type{Union{}}, slurp...) = Union{}(0)
 
 """
     one(x)
-    one(T::type)
+    one(T::Type)
 
 Return a multiplicative identity for `x`: a value such that
-`one(x)*x == x*one(x) == x`.  Alternatively `one(T)` can
-take a type `T`, in which case `one` returns a multiplicative
-identity for any `x` of type `T`.
+`one(x)*x == x*one(x) == x`. If the multiplicative identity can
+be deduced from the type alone, then a type may be given as
+an argument to `one` (e.g. `one(Int)` will work because the
+multiplicative identity is the same for all instances of `Int`,
+but `one(Matrix{Int})` is not defined because matrices of
+different shapes have different multiplicative identities.)
 
 If possible, `one(x)` returns a value of the same type as `x`,
 and `one(T)` returns a value of type `T`.  However, this may
@@ -354,9 +406,10 @@ one(::Type{Union{}}, slurp...) = Union{}(1)
     oneunit(x::T)
     oneunit(T::Type)
 
-Return `T(one(x))`, where `T` is either the type of the argument or
-(if a type is passed) the argument.  This differs from [`one`](@ref) for
-dimensionful quantities: `one` is dimensionless (a multiplicative identity)
+Return `T(one(x))`, where `T` is either the type of the argument, or
+the argument itself in cases where the `oneunit` can be deduced from
+the type alone. This differs from [`one`](@ref) for dimensionful
+quantities: `one` is dimensionless (a multiplicative identity)
 while `oneunit` is dimensionful (of the same type as `x`, or of type `T`).
 
 # Examples
diff --git a/base/opaque_closure.jl b/base/opaque_closure.jl
index bb0ae8935b06c..5e38c8523f4a8 100644
--- a/base/opaque_closure.jl
+++ b/base/opaque_closure.jl
@@ -18,69 +18,30 @@ the argument type may be fixed length even if the function is variadic.
     This interface is experimental and subject to change or removal without notice.
 """
 macro opaque(ex)
-    esc(Expr(:opaque_closure, ex))
+    esc(Expr(:opaque_closure, nothing, nothing, nothing, #= allow_partial =# true, ex))
 end
 
 macro opaque(ty, ex)
-    esc(Expr(:opaque_closure, ty, ex))
-end
-
-# OpaqueClosure construction from pre-inferred CodeInfo/IRCode
-using Core.Compiler: IRCode
-using Core: CodeInfo
-
-function compute_ir_rettype(ir::IRCode)
-    rt = Union{}
-    for i = 1:length(ir.stmts)
-        stmt = ir.stmts[i][:inst]
-        if isa(stmt, Core.Compiler.ReturnNode) && isdefined(stmt, :val)
-            rt = Core.Compiler.tmerge(Core.Compiler.argextype(stmt.val, ir), rt)
+    if Base.isexpr(ty, :->)
+        (AT, body) = ty.args
+        filter!((n)->!isa(n, Core.LineNumberNode), body.args)
+        if !Base.isexpr(body, :block) || length(body.args) != 1
+            error("Opaque closure type must be specified in the form Tuple{T,U...}->RT")
         end
+        RT = only(body.args)
+    else
+        error("Opaque closure type must be specified in the form Tuple{T,U...}->RT")
     end
-    return Core.Compiler.widenconst(rt)
+    AT = (AT !== :_) ? AT : nothing
+    RT = (RT !== :_) ? RT : nothing
+    return esc(Expr(:opaque_closure, AT, RT, RT, #= allow_partial =# true, ex))
 end
 
-function compute_oc_signature(ir::IRCode, nargs::Int, isva::Bool)
-    argtypes = Vector{Any}(undef, nargs)
-    for i = 1:nargs
-        argtypes[i] = Core.Compiler.widenconst(ir.argtypes[i+1])
-    end
-    if isva
-        lastarg = pop!(argtypes)
-        if lastarg <: Tuple
-            append!(argtypes, lastarg.parameters)
-        else
-            push!(argtypes, Vararg{Any})
-        end
-    end
-    return Tuple{argtypes...}
-end
-
-function Core.OpaqueClosure(ir::IRCode, @nospecialize env...;
-                            isva::Bool = false,
-                            do_compile::Bool = true)
-    # NOTE: we need ir.argtypes[1] == typeof(env)
-    ir = Core.Compiler.copy(ir)
-    nargs = length(ir.argtypes)-1
-    sig = compute_oc_signature(ir, nargs, isva)
-    rt = compute_ir_rettype(ir)
-    src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-    src.slotnames = fill(:none, nargs+1)
-    src.slotflags = fill(zero(UInt8), length(ir.argtypes))
-    src.slottypes = copy(ir.argtypes)
-    src.rettype = rt
-    src = Core.Compiler.ir_to_codeinf!(src, ir)
-    return generate_opaque_closure(sig, Union{}, rt, src, nargs, isva, env...; do_compile)
-end
+# OpaqueClosure construction from pre-inferred CodeInfo/IRCode
+using Core: CodeInfo, SSAValue
 
-function Core.OpaqueClosure(src::CodeInfo, @nospecialize env...)
-    src.inferred || throw(ArgumentError("Expected inferred src::CodeInfo"))
-    mi = src.parent::Core.MethodInstance
-    sig = Base.tuple_type_tail(mi.specTypes)
-    method = mi.def::Method
-    nargs = method.nargs-1
-    isva = method.isva
-    return generate_opaque_closure(sig, Union{}, src.rettype, src, nargs, isva, env...)
+function Core.OpaqueClosure(src::CodeInfo, @nospecialize env...; rettype, sig, nargs, isva=false, kwargs...)
+    return generate_opaque_closure(sig, Union{}, rettype, src, nargs, isva, env...; kwargs...)
 end
 
 function generate_opaque_closure(@nospecialize(sig), @nospecialize(rt_lb), @nospecialize(rt_ub),
@@ -88,7 +49,8 @@ function generate_opaque_closure(@nospecialize(sig), @nospecialize(rt_lb), @nosp
                                  mod::Module=@__MODULE__,
                                  lineno::Int=0,
                                  file::Union{Nothing,Symbol}=nothing,
-                                 do_compile::Bool=true)
-    return ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any, Cint),
-        sig, rt_lb, rt_ub, mod, src, lineno, file, nargs, isva, env, do_compile)
+                                 do_compile::Bool=true,
+                                 isinferred::Bool=true)
+    return ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any, Cint, Cint),
+        sig, rt_lb, rt_ub, mod, src, lineno, file, nargs, isva, env, do_compile, isinferred)
 end
diff --git a/base/operators.jl b/base/operators.jl
index 3f0f8bc49b164..15bcf581c7598 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -3,10 +3,25 @@
 ## types ##
 
 """
-    <:(T1, T2)
+    <:(T1, T2)::Bool
 
-Subtype operator: returns `true` if and only if all values of type `T1` are
-also of type `T2`.
+Subtyping relation, defined between two types. In Julia, a type `S` is said to be a
+*subtype* of a type `T` if and only if we have `S <: T`.
+
+For any type `L` and any type `R`, `L <: R` implies that any value `v` of type `L`
+is also of type `R`. I.e., `(L <: R) && (v isa L)` implies `v isa R`.
+
+The subtyping relation is a *partial order*. I.e., `<:` is:
+
+* *reflexive*: for any type `T`, `T <: T` holds
+
+* *antisymmetric*: for any type `A` and any type `B`, `(A <: B) && (B <: A)`
+  implies `A == B`
+
+* *transitive*: for any type `A`, any type `B` and any type `C`;
+  `(A <: B) && (B <: C)` implies `A <: C`
+
+See also info on [Types](@ref man-types), [`Union{}`](@ref), [`Any`](@ref), [`isa`](@ref).
 
 # Examples
 ```jldoctest
@@ -16,28 +31,56 @@ true
 julia> Vector{Int} <: AbstractArray
 true
 
-julia> Matrix{Float64} <: Matrix{AbstractFloat}
+julia> Matrix{Float64} <: Matrix{AbstractFloat}  # `Matrix` is invariant
 false
+
+julia> Tuple{Float64} <: Tuple{AbstractFloat}    # `Tuple` is covariant
+true
+
+julia> Union{} <: Int  # The bottom type, `Union{}`, subtypes each type.
+true
+
+julia> Union{} <: Float32 <: AbstractFloat <: Real <: Number <: Any  # Operator chaining
+true
 ```
+
+The `<:` keyword also has several syntactic uses which represent the same subtyping relation,
+but which do not execute the operator or return a Bool:
+
+* To specify the lower bound and the upper bound on a parameter of a
+  [`UnionAll`](@ref) type in a [`where`](@ref) statement.
+
+* To specify the lower bound and the upper bound on a (static) parameter of a
+  method, see [Parametric Methods](@ref).
+
+* To define a subtyping relation while declaring a new type, see [`struct`](@ref)
+  and [`abstract type`](@ref).
 """
 (<:)
 
+import Core: >:
+
 """
     >:(T1, T2)
 
 Supertype operator, equivalent to `T2 <: T1`.
 """
-(>:)(@nospecialize(a), @nospecialize(b)) = (b <: a)
+>:
 
 """
-    supertype(T::DataType)
+    supertype(T::Union{DataType, UnionAll})
 
-Return the supertype of DataType `T`.
+Return the direct supertype of type `T`.
+`T` can be a [`DataType`](@ref) or a [`UnionAll`](@ref) type. Does not support
+type [`Union`](@ref)s. Also see info on [Types](@ref man-types).
 
 # Examples
 ```jldoctest
 julia> supertype(Int32)
 Signed
+
+julia> supertype(Vector)
+DenseVector (alias for DenseArray{T, 1} where T)
 ```
 """
 supertype(T::DataType) = (@_total_meta; T.super)
@@ -52,8 +95,9 @@ Generic equality operator. Falls back to [`===`](@ref).
 Should be implemented for all types with a notion of equality, based on the abstract value
 that an instance represents. For example, all numeric types are compared by numeric value,
 ignoring type. Strings are compared as sequences of characters, ignoring encoding.
-For collections, `==` is generally called recursively on all contents,
-though other properties (like the shape for arrays) may also be taken into account.
+Collections of the same type generally compare their key sets, and if those are `==`, then compare the values
+for each of those keys, returning true if all such pairs are `==`.
+Other properties are typically not taken into account (such as the exact type).
 
 This operator follows IEEE semantics for floating-point numbers: `0.0 == -0.0` and
 `NaN != NaN`.
@@ -61,17 +105,18 @@ This operator follows IEEE semantics for floating-point numbers: `0.0 == -0.0` a
 The result is of type `Bool`, except when one of the operands is [`missing`](@ref),
 in which case `missing` is returned
 ([three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic)).
-For collections, `missing` is returned if at least one of the operands contains
-a `missing` value and all non-missing values are equal.
+Collections generally implement three-valued logic akin to [`all`](@ref), returning
+missing if any operands contain missing values and all other pairs are equal.
 Use [`isequal`](@ref) or [`===`](@ref) to always get a `Bool` result.
 
 # Implementation
 New numeric types should implement this function for two arguments of the new type, and
 handle comparison to other types via promotion rules where possible.
 
-[`isequal`](@ref) falls back to `==`, so new methods of `==` will be used by the
-[`Dict`](@ref) type to compare keys. If your type will be used as a dictionary key, it
-should therefore also implement [`hash`](@ref).
+Equality and hashing are intimately related; two values that are considered [`isequal`](@ref) **must**
+have the same [`hash`](@ref) and by default `isequal` falls back to `==`. If a type customizes the behavior of `==` and/or [`isequal`](@ref),
+then [`hash`](@ref) must be similarly implemented to ensure `isequal` and `hash` agree. `Set`s, `Dict`s, and many other internal
+implementations assume that this invariant holds.
 
 If some type defines `==`, [`isequal`](@ref), and [`isless`](@ref) then it should
 also implement [`<`](@ref) to ensure consistency of comparisons.
@@ -79,7 +124,7 @@ also implement [`<`](@ref) to ensure consistency of comparisons.
 ==
 
 """
-    isequal(x, y) -> Bool
+    isequal(x, y)::Bool
 
 Similar to [`==`](@ref), except for the treatment of floating point numbers
 and of missing values. `isequal` treats all floating-point `NaN` values as equal
@@ -243,12 +288,11 @@ orderings such as [`isless`](@ref).
 !!! compat "Julia 1.7"
     This function requires Julia 1.7 or later.
 """
-isunordered(x) = false
+isunordered(_) = false
 isunordered(x::AbstractFloat) = isnan(x)
-isunordered(x::Missing) = true
+isunordered(::Missing) = true
 
 ==(T::Type, S::Type) = (@_total_meta; ccall(:jl_types_equal, Cint, (Any, Any), T, S) != 0)
-!=(T::Type, S::Type) = (@_total_meta; !(T == S))
 ==(T::TypeVar, S::Type) = false
 ==(T::Type, S::TypeVar) = false
 
@@ -277,8 +321,8 @@ false
 const ≠ = !=
 
 """
-    ===(x,y) -> Bool
-    ≡(x,y) -> Bool
+    ===(x,y)::Bool
+    ≡(x,y)::Bool
 
 Determine whether `x` and `y` are identical, in the sense that no program could distinguish
 them. First the types of `x` and `y` are compared. If those are identical, mutable objects
@@ -303,6 +347,7 @@ true
 ===
 const ≡ = ===
 
+import Core: !==
 """
     !==(x, y)
     ≢(x,y)
@@ -320,7 +365,8 @@ julia> a ≢ a
 false
 ```
 """
-!==(@nospecialize(x), @nospecialize(y)) = !(x === y)
+!==
+
 const ≢ = !==
 
 """
@@ -407,6 +453,14 @@ const ≤ = <=
 
 Greater-than-or-equals comparison operator. Falls back to `y <= x`.
 
+# Implementation
+
+New types should prefer to implement [`<=`](@ref) instead of this function,
+and rely on the fallback definition `>=(x, y) = y <= x`.
+
+Furthermore, in many cases it is enough to implement just [`<`](@ref) and
+[`==`](@ref), relying on the fallback definitions of both `<=` and `>=`.
+
 # Examples
 ```jldoctest
 julia> 'a' >= 'b'
@@ -464,13 +518,17 @@ cmp(x::Integer, y::Integer) = ifelse(isless(x, y), -1, ifelse(isless(y, x), 1, 0
 """
     max(x, y, ...)
 
-Return the maximum of the arguments (with respect to [`isless`](@ref)). See also the [`maximum`](@ref) function
-to take the maximum element from a collection.
+Return the maximum of the arguments, with respect to [`isless`](@ref).
+If any of the arguments is [`missing`](@ref), return `missing`.
+See also the [`maximum`](@ref) function to take the maximum element from a collection.
 
 # Examples
 ```jldoctest
 julia> max(2, 5, 1)
 5
+
+julia> max(5, missing, 6)
+missing
 ```
 """
 max(x, y) = ifelse(isless(y, x), x, y)
@@ -478,13 +536,17 @@ max(x, y) = ifelse(isless(y, x), x, y)
 """
     min(x, y, ...)
 
-Return the minimum of the arguments (with respect to [`isless`](@ref)). See also the [`minimum`](@ref) function
-to take the minimum element from a collection.
+Return the minimum of the arguments, with respect to [`isless`](@ref).
+If any of the arguments is [`missing`](@ref), return `missing`.
+See also the [`minimum`](@ref) function to take the minimum element from a collection.
 
 # Examples
 ```jldoctest
 julia> min(2, 5, 1)
 1
+
+julia> min(4, missing, 6)
+missing
 ```
 """
 min(x,y) = ifelse(isless(y, x), y, x)
@@ -538,6 +600,7 @@ const ⊽ = nor
 # method count limit in inference
 afoldl(op, a) = a
 function afoldl(op, a, bs...)
+    @_terminates_locally_meta
     l = length(bs)
     i =  0; y = a;            l == i && return y
     #@nexprs 31 i -> (y = op(y, bs[i]); l == i && return y)
@@ -577,7 +640,7 @@ function afoldl(op, a, bs...)
     end
     return y
 end
-setfield!(typeof(afoldl).name.mt, :max_args, 34, :monotonic)
+setfield!(typeof(afoldl).name, :max_args, Int32(34), :monotonic)
 
 for op in (:+, :*, :&, :|, :xor, :min, :max, :kron)
     @eval begin
@@ -1099,40 +1162,56 @@ julia> filter(!isletter, str)
 !(f::ComposedFunction{typeof(!)}) = f.inner #allows !!f === f
 
 """
-    Fix1(f, x)
+    Fix{N}(f, x)
+
+A type representing a partially-applied version of a function `f`, with the argument
+`x` fixed at position `N::Int`. In other words, `Fix{3}(f, x)` behaves similarly to
+`(y1, y2, y3...; kws...) -> f(y1, y2, x, y3...; kws...)`.
 
-A type representing a partially-applied version of the two-argument function
-`f`, with the first argument fixed to the value "x". In other words,
-`Fix1(f, x)` behaves similarly to `y->f(x, y)`.
+!!! compat "Julia 1.12"
+    This general functionality requires at least Julia 1.12, while `Fix1` and `Fix2`
+    are available earlier.
 
-See also [`Fix2`](@ref Base.Fix2).
+!!! note
+    When nesting multiple `Fix`, note that the `N` in `Fix{N}` is _relative_ to the current
+    available arguments, rather than an absolute ordering on the target function. For example,
+    `Fix{1}(Fix{2}(f, 4), 4)` fixes the first and second arg, while `Fix{2}(Fix{1}(f, 4), 4)`
+    fixes the first and third arg.
 """
-struct Fix1{F,T} <: Function
+struct Fix{N,F,T} <: Function
     f::F
     x::T
 
-    Fix1(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
-    Fix1(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
+    function Fix{N}(f::F, x) where {N,F}
+        if !(N isa Int)
+            throw(ArgumentError(LazyString("expected type parameter in `Fix` to be `Int`, but got `", N, "::", typeof(N), "`")))
+        elseif N < 1
+            throw(ArgumentError(LazyString("expected `N` in `Fix{N}` to be integer greater than 0, but got ", N)))
+        end
+        new{N,_stable_typeof(f),_stable_typeof(x)}(f, x)
+    end
+end
+
+function (f::Fix{N})(args::Vararg{Any,M}; kws...) where {N,M}
+    M < N-1 && throw(ArgumentError(LazyString("expected at least ", N-1, " arguments to `Fix{", N, "}`, but got ", M)))
+    (left, right) = _split_tuple(args, N-1)
+    return f.f(left..., f.x, right...; kws...)
 end
 
-(f::Fix1)(y) = f.f(f.x, y)
+# Special cases for improved constant propagation
+(f::Fix{1})(arg; kws...) = f.f(f.x, arg; kws...)
+(f::Fix{2})(arg; kws...) = f.f(arg, f.x; kws...)
 
 """
-    Fix2(f, x)
-
-A type representing a partially-applied version of the two-argument function
-`f`, with the second argument fixed to the value "x". In other words,
-`Fix2(f, x)` behaves similarly to `y->f(y, x)`.
+Alias for `Fix{1}`. See [`Fix`](@ref Base.Fix).
 """
-struct Fix2{F,T} <: Function
-    f::F
-    x::T
+const Fix1{F,T} = Fix{1,F,T}
 
-    Fix2(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
-    Fix2(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
-end
+"""
+Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).
+"""
+const Fix2{F,T} = Fix{2,F,T}
 
-(f::Fix2)(y) = f.f(y, f.x)
 
 """
     isequal(x)
@@ -1233,7 +1312,7 @@ it into the original function. This is useful as an adaptor to pass a
 multi-argument function in a context that expects a single argument, but passes
 a tuple as that single argument.
 
-# Example usage:
+# Examples
 ```jldoctest
 julia> map(splat(+), zip(1:3,4:6))
 3-element Vector{Int64}:
@@ -1269,8 +1348,7 @@ struct Splat{F} <: Function
     Splat(f) = new{Core.Typeof(f)}(f)
 end
 (s::Splat)(args) = s.f(args...)
-print(io::IO, s::Splat) = print(io, "splat(", s.f, ')')
-show(io::IO, s::Splat) = print(io, s)
+show(io::IO, s::Splat) = (print(io, "splat("); show(io, s.f); print(io, ")"))
 
 ## in and related operators
 
@@ -1287,7 +1365,7 @@ used to implement specialized methods.
 """
 in(x) = Fix2(in, x)
 
-function in(x, itr)
+function in(x, itr::Any)
     anymissing = false
     for y in itr
         v = (y == x)
@@ -1300,12 +1378,36 @@ function in(x, itr)
     return anymissing ? missing : false
 end
 
+# Specialized variant of in for Tuple, which can generate typed comparisons for each element
+# of the tuple, skipping values that are statically known to be != at compile time.
+in(x, itr::Tuple) = _in_tuple(x, itr, false)
+# This recursive function will be unrolled at compiletime, and will not generate separate
+# llvm-compiled specializations for each step of the recursion.
+function _in_tuple(x, @nospecialize(itr::Tuple), anymissing::Bool)
+    @inline
+    # Base case
+    if isempty(itr)
+        return anymissing ? missing : false
+    end
+    # Recursive case
+    v = (itr[1] == x)
+    if ismissing(v)
+        anymissing = true
+    elseif v
+        return true
+    end
+    return _in_tuple(x, tail(itr), anymissing)
+end
+
+# fallback to the loop implementation after some number of arguments to avoid inference blowup
+in(x, itr::Any32) = invoke(in, Tuple{Any,Any}, x, itr)
+
 const ∈ = in
 ∉(x, itr) = !∈(x, itr)
 ∉(itr) = Fix2(∉, itr)
 
 """
-    ∋(collection, item) -> Bool
+    ∋(collection, item)::Bool
 
 Like [`in`](@ref), but with arguments in reverse order.
 Avoid adding methods to this function; define `in` instead.
@@ -1327,16 +1429,20 @@ a function equivalent to `y -> item in y`.
 ∌(x) = Fix2(∌, x)
 
 """
-    in(item, collection) -> Bool
-    ∈(item, collection) -> Bool
+    in(item, collection)::Bool
+    ∈(item, collection)::Bool
 
 Determine whether an item is in the given collection, in the sense that it is
 [`==`](@ref) to one of the values generated by iterating over the collection.
+Can equivalently be used with infix syntax:
+
+    item in collection
+    item ∈ collection
+
 Return a `Bool` value, except if `item` is [`missing`](@ref) or `collection`
 contains `missing` but not `item`, in which case `missing` is returned
 ([three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
 matching the behavior of [`any`](@ref) and [`==`](@ref)).
-
 Some collections follow a slightly different definition. For example,
 [`Set`](@ref)s check whether the item [`isequal`](@ref) to one of the elements;
 [`Dict`](@ref)s look for `key=>value` pairs, and the `key` is compared using
@@ -1347,7 +1453,7 @@ or `k in keys(dict)`. For the collections mentioned above,
 the result is always a `Bool`.
 
 When broadcasting with `in.(items, collection)` or `items .∈ collection`, both
-`item` and `collection` are broadcasted over, which is often not what is intended.
+`items` and `collection` are broadcasted over, which is often not what is intended.
 For example, if both arguments are vectors (and the dimensions match), the result is
 a vector indicating whether each value in collection `items` is `in` the value at the
 corresponding position in `collection`. To get a vector indicating whether each value
@@ -1396,12 +1502,12 @@ julia> [1, 2] .∈ ([2, 3],)
 in
 
 """
-    ∉(item, collection) -> Bool
-    ∌(collection, item) -> Bool
+    ∉(item, collection)::Bool
+    ∌(collection, item)::Bool
 
 Negation of `∈` and `∋`, i.e. checks that `item` is not in `collection`.
 
-When broadcasting with `items .∉ collection`, both `item` and `collection` are
+When broadcasting with `items .∉ collection`, both `items` and `collection` are
 broadcasted over, which is often not what is intended. For example, if both arguments
 are vectors (and the dimensions match), the result is a vector indicating whether
 each value in collection `items` is not in the value at the corresponding position
diff --git a/base/optimized_generics.jl b/base/optimized_generics.jl
new file mode 100644
index 0000000000000..6b1d146b6172b
--- /dev/null
+++ b/base/optimized_generics.jl
@@ -0,0 +1,84 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module OptimizedGenerics
+
+# This file defines interfaces that are recognized and optimized by the compiler
+# They are intended to be used by data structure implementations that wish to
+# opt into some level of compiler optimizations. These interfaces are
+# EXPERIMENTAL and currently intended for use by Base only. They are subject
+# to change or removal without notice. It is undefined behavior to add methods
+# to these generics that do not conform to the specified interface.
+#
+# The intended way to use these generics is that data structures will provide
+# appropriate implementations for a generic. In the absence of compiler
+# optimizations, these behave like regular methods. However, the compiler is
+# semantically allowed to perform certain structural optimizations on
+# appropriate combinations of these intrinsics without proving correctness.
+
+# Compiler-recognized generics for immutable key-value stores (dicts, etc.)
+"""
+    module KeyValue
+
+Implements a key-value like interface where the compiler has liberty to perform
+the following transformations. The core optimization semantically allowed for
+the compiler is:
+
+    get(set(x, key, val), key) -> (val,)
+
+where the compiler will recursively look through `x`. Keys are compared by
+egality.
+
+Implementations must observe the following constraints:
+
+1. It is undefined behavior for `get` not to return the exact (by egality) val
+   stored for a given `key`.
+"""
+module KeyValue
+    """
+        set(collection, [key [, val]])
+        set(T, collection, key, val)
+
+    Set the `key` in `collection` to `val`. If `val` is omitted, deletes the
+    value from the collection. If `key` is omitted as well, deletes all elements
+    of the collection.
+    """
+    function set end
+
+    """
+        get(collection, key)
+
+    Retrieve the value corresponding to `key` in `collection` as a single
+    element tuple or `nothing` if no value corresponding to the key was found.
+    `key`s are compared by egal.
+    """
+    function get end
+end
+
+# Compiler-recognized intrinsics for compiler plugins
+"""
+    module CompilerPlugins
+
+Implements a pair of functions `typeinf`/`typeinf_edge`. When the optimizer sees
+a call to `typeinf`, it has license to instead call `typeinf_edge`, supplying the
+current inference stack in `parent_frame` (but otherwise supplying the arguments
+to `typeinf`). `typeinf_edge` will return the `CodeInstance` that `typeinf` would
+have returned at runtime. The optimizer may perform a non-IPO replacement of
+the call to `typeinf` by the result of `typeinf_edge`. In addition, the IPO-safe
+fields of the `CodeInstance` may be propagated in IPO mode.
+"""
+module CompilerPlugins
+    """
+        typeinf(owner, mi, source_mode)::CodeInstance
+
+    Return a `CodeInstance` for the given `mi` whose valid results include at
+    the least current tls world and satisfies the requirements of `source_mode`.
+    """
+    function typeinf end
+
+    """
+        typeinf_edge(owner, mi, parent_frame, world, abi_mode)::CodeInstance
+    """
+    function typeinf_edge end
+end
+
+end
diff --git a/base/options.jl b/base/options.jl
index a94936391fa8d..0255a8f2dc642 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -9,7 +9,7 @@ struct JLOptions
     commands::Ptr{Ptr{UInt8}} # (e)eval, (E)print, (L)load
     image_file::Ptr{UInt8}
     cpu_target::Ptr{UInt8}
-    nthreadpools::Int16
+    nthreadpools::Int8
     nthreads::Int16
     nmarkthreads::Int16
     nsweepthreads::Int8
@@ -17,6 +17,7 @@ struct JLOptions
     nprocs::Int32
     machine_file::Ptr{UInt8}
     project::Ptr{UInt8}
+    program_file::Ptr{UInt8}
     isinteractive::Int8
     color::Int8
     historyfile::Int8
@@ -34,10 +35,12 @@ struct JLOptions
     can_inline::Int8
     polly::Int8
     trace_compile::Ptr{UInt8}
+    trace_dispatch::Ptr{UInt8}
     fast_math::Int8
     worker::Int8
     cookie::Ptr{UInt8}
     handle_signals::Int8
+    use_experimental_features::Int8
     use_sysimage_native_code::Int8
     use_compiled_modules::Int8
     use_pkgimages::Int8
@@ -57,6 +60,19 @@ struct JLOptions
     strip_ir::Int8
     permalloc_pkgimg::Int8
     heap_size_hint::UInt64
+    hard_heap_limit::UInt64
+    heap_target_increment::UInt64
+    trace_compile_timing::Int8
+    trim::Int8
+    trace_eval::Int8
+    task_metrics::Int8
+    timeout_for_safepoint_straggler_s::Int16
+    gc_sweep_always_full::Int8
+    compress_sysimage::Int8
+    alert_on_critical_error::Int8
+    target_sanitize_memory::Int8
+    target_sanitize_thread::Int8
+    target_sanitize_address::Int8
 end
 
 # This runs early in the sysimage != is not defined yet
@@ -67,6 +83,18 @@ end
 
 JLOptions() = unsafe_load(cglobal(:jl_options, JLOptions))
 
+function colored_text(opts::JLOptions)
+    return if opts.color != 0
+        opts.color == 1
+    elseif !isempty(get(ENV, "FORCE_COLOR", ""))
+        true
+    elseif !isempty(get(ENV, "NO_COLOR", ""))
+        false
+    else
+        nothing
+    end
+end
+
 function show(io::IO, opt::JLOptions)
     print(io, "JLOptions(")
     fields = fieldnames(JLOptions)
diff --git a/base/ordering.jl b/base/ordering.jl
index 5383745b1dd1f..f2ddd20ab09f0 100644
--- a/base/ordering.jl
+++ b/base/ordering.jl
@@ -3,7 +3,7 @@
 module Order
 
 
-import ..@__MODULE__, ..parentmodule
+import Base: @__MODULE__, parentmodule
 const Base = parentmodule(@__MODULE__)
 import .Base:
     AbstractVector, @propagate_inbounds, isless, identity, getindex, reverse,
@@ -21,7 +21,8 @@ export # not exported by Base
 """
     Base.Order.Ordering
 
-Abstract type which represents a total order on some set of elements.
+Abstract type which represents a strict weak order on some set of elements. See
+[`sort!`](@ref) for more.
 
 Use [`Base.Order.lt`](@ref) to compare two elements according to the ordering.
 """
@@ -110,7 +111,7 @@ ReverseOrdering(by::By) = By(by.by, ReverseOrdering(by.order))
 ReverseOrdering(perm::Perm) = Perm(ReverseOrdering(perm.order), perm.data)
 
 """
-    lt(o::Ordering, a, b)
+    lt(o::Ordering, a, b)::Bool
 
 Test whether `a` is less than `b` according to the ordering `o`.
 """
@@ -125,18 +126,15 @@ lt(o::Lt,                    a, b) = o.lt(a,b)
     (lt(p.order, da, db)::Bool) | (!(lt(p.order, db, da)::Bool) & (a < b))
 end
 
-_ord(lt::typeof(isless), by::typeof(identity), order::Ordering) = order
-_ord(lt::typeof(isless), by,                   order::Ordering) = By(by, order)
-
-function _ord(lt, by, order::Ordering)
-    if order === Forward
-        return Lt((x, y) -> lt(by(x), by(y)))
-    elseif order === Reverse
-        return Lt((x, y) -> lt(by(y), by(x)))
-    else
-        error("Passing both lt= and order= arguments is ambiguous; please pass order=Forward or order=Reverse (or leave default)")
-    end
-end
+
+_ord(lt::typeof(isless), by, order::Ordering)                         = _by(by, order)
+_ord(lt::typeof(isless), by, order::ForwardOrdering)                  = _by(by, order)  # disambiguation
+_ord(lt::typeof(isless), by, order::ReverseOrdering{ForwardOrdering}) = _by(by, order)  # disambiguation
+_ord(lt,                 by, order::ForwardOrdering)                  = _by(by, Lt(lt))
+_ord(lt,                 by, order::ReverseOrdering{ForwardOrdering}) = reverse(_by(by, Lt(lt)))
+_ord(lt,                 by, order::Ordering) = error("Passing both lt= and order= arguments is ambiguous; please pass order=Forward or order=Reverse (or leave default)")
+_by(by, order::Ordering) = By(by, order)
+_by(::typeof(identity), order::Ordering) = order
 
 """
     ord(lt, by, rev::Union{Bool, Nothing}, order::Ordering=Forward)
diff --git a/base/osutils.jl b/base/osutils.jl
index 95d0562540e5a..5daf58f5b8f4f 100644
--- a/base/osutils.jl
+++ b/base/osutils.jl
@@ -3,13 +3,23 @@
 """
     @static
 
-Partially evaluate an expression at parse time.
+Partially evaluate an expression at macro expansion time.
 
-For example, `@static Sys.iswindows() ? foo : bar` will evaluate `Sys.iswindows()` and insert
-either `foo` or `bar` into the expression.
-This is useful in cases where a construct would be invalid on other platforms,
-such as a `ccall` to a non-existent function.
-`@static if Sys.isapple() foo end` and `@static foo <&&,||> bar` are also valid syntax.
+This is useful in cases where a construct would be invalid in some cases, such as a `ccall`
+to an os-dependent function, or macros defined in packages that are not imported.
+
+`@static` requires a conditional. The conditional can be in an `if` statement, a ternary
+operator, or `&&`\`||`. The conditional is evaluated by recursively expanding macros,
+lowering and executing the resulting expressions. Then, the matching branch (if any) is
+returned. All the other branches of the conditional are deleted before they are
+macro-expanded (and lowered or executed).
+
+# Example
+
+Suppose we want to parse an expression `expr` that is valid only on macOS. We could solve
+this problem using `@static` with `@static if Sys.isapple() expr end`. In case we had
+`expr_apple` for macOS and `expr_others` for the other operating systems, the solution with
+`@static` would be `@static Sys.isapple() ? expr_apple : expr_others`.
 """
 macro static(ex)
     if isa(ex, Expr)
diff --git a/base/parse.jl b/base/parse.jl
index f6a93e56369b7..4309094e9fa1d 100644
--- a/base/parse.jl
+++ b/base/parse.jl
@@ -251,8 +251,9 @@ function tryparse(::Type{T}, s::AbstractString; base::Union{Nothing,Integer} = n
 end
 
 function parse(::Type{T}, s::AbstractString; base::Union{Nothing,Integer} = nothing) where {T<:Integer}
-    convert(T, tryparse_internal(T, s, firstindex(s), lastindex(s),
-                                 base===nothing ? 0 : check_valid_base(base), true))
+    v = tryparse_internal(T, s, firstindex(s), lastindex(s), base===nothing ? 0 : check_valid_base(base), true)
+    v === nothing && error("should not happoen")
+    convert(T, v)
 end
 tryparse(::Type{Union{}}, slurp...; kwargs...) = error("cannot parse a value as Union{}")
 
@@ -260,12 +261,12 @@ tryparse(::Type{Union{}}, slurp...; kwargs...) = error("cannot parse a value as
 
 function tryparse(::Type{Float64}, s::String)
     hasvalue, val = ccall(:jl_try_substrtod, Tuple{Bool, Float64},
-                          (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
+                          (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s) % UInt)
     hasvalue ? val : nothing
 end
 function tryparse(::Type{Float64}, s::SubString{String})
     hasvalue, val = ccall(:jl_try_substrtod, Tuple{Bool, Float64},
-                          (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.ncodeunits)
+                          (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.ncodeunits % UInt)
     hasvalue ? val : nothing
 end
 function tryparse_internal(::Type{Float64}, s::String, startpos::Int, endpos::Int)
@@ -280,12 +281,12 @@ function tryparse_internal(::Type{Float64}, s::SubString{String}, startpos::Int,
 end
 function tryparse(::Type{Float32}, s::String)
     hasvalue, val = ccall(:jl_try_substrtof, Tuple{Bool, Float32},
-                          (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
+                          (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s) % UInt)
     hasvalue ? val : nothing
 end
 function tryparse(::Type{Float32}, s::SubString{String})
     hasvalue, val = ccall(:jl_try_substrtof, Tuple{Bool, Float32},
-                          (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.ncodeunits)
+                          (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.ncodeunits % UInt)
     hasvalue ? val : nothing
 end
 function tryparse_internal(::Type{Float32}, s::String, startpos::Int, endpos::Int)
@@ -298,7 +299,7 @@ function tryparse_internal(::Type{Float32}, s::SubString{String}, startpos::Int,
                           (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset+startpos-1, endpos-startpos+1)
     hasvalue ? val : nothing
 end
-tryparse(::Type{T}, s::AbstractString) where {T<:Union{Float32,Float64}} = tryparse(T, String(s))
+tryparse(::Type{T}, s::AbstractString) where {T<:Union{Float32,Float64}} = tryparse(T, String(s)::String)
 tryparse(::Type{Float16}, s::AbstractString) =
     convert(Union{Float16, Nothing}, tryparse(Float32, s))
 tryparse_internal(::Type{Float16}, s::AbstractString, startpos::Int, endpos::Int) =
@@ -321,14 +322,14 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}
     if i₊ == i # leading ± sign
         i₊ = something(findnext(in(('+','-')), s, i₊+1), 0)
     end
-    if i₊ != 0 && s[i₊-1] in ('e','E') # exponent sign
+    if i₊ != 0 && s[prevind(s, i₊)] in ('e','E') # exponent sign
         i₊ = something(findnext(in(('+','-')), s, i₊+1), 0)
     end
 
     # find trailing im/i/j
     iᵢ = something(findprev(in(('m','i','j')), s, e), 0)
     if iᵢ > 0 && s[iᵢ] == 'm' # im
-        iᵢ -= 1
+        iᵢ = prevind(s, iᵢ)
         if s[iᵢ] != 'i'
             raise && throw(ArgumentError("expected trailing \"im\", found only \"m\""))
             return nothing
@@ -337,7 +338,7 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}
 
     if i₊ == 0 # purely real or imaginary value
         if iᵢ > i && !(iᵢ == i+1 && s[i] in ('+','-')) # purely imaginary (not "±inf")
-            x = tryparse_internal(T, s, i, iᵢ-1, raise)
+            x = tryparse_internal(T, s, i, prevind(s, iᵢ), raise)
             x === nothing && return nothing
             return Complex{T}(zero(x),x)
         else # purely real
@@ -353,11 +354,11 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}
     end
 
     # parse real part
-    re = tryparse_internal(T, s, i, i₊-1, raise)
+    re = tryparse_internal(T, s, i, prevind(s, i₊), raise)
     re === nothing && return nothing
 
     # parse imaginary part
-    im = tryparse_internal(T, s, i₊+1, iᵢ-1, raise)
+    im = tryparse_internal(T, s, i₊+1, prevind(s, iᵢ), raise)
     im === nothing && return nothing
 
     return Complex{T}(re, s[i₊]=='-' ? -im : im)
@@ -385,7 +386,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, raise::Bool; kwargs...)
     return result
 end
 @noinline _parse_failure(T, s::AbstractString, startpos = firstindex(s), endpos = lastindex(s)) =
-    throw(ArgumentError("cannot parse $(repr(s[startpos:endpos])) as $T"))
+    throw(ArgumentError(LazyString("cannot parse ", repr(s[startpos:endpos]), " as ", T)))
 
 tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, raise::Bool) where T<:Integer =
     tryparse_internal(T, s, startpos, endpos, 10, raise)
diff --git a/base/partr.jl b/base/partr.jl
index a02272ceab202..d488330f0c87e 100644
--- a/base/partr.jl
+++ b/base/partr.jl
@@ -18,17 +18,64 @@ end
 const heap_d = UInt32(8)
 const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
 const heaps_lock = [SpinLock(), SpinLock()]
-const cong_unbias = [typemax(UInt32), typemax(UInt32)]
 
 
-cong(max::UInt32, unbias::UInt32) =
-    ccall(:jl_rand_ptls, UInt32, (UInt32, UInt32), max, unbias) + UInt32(1)
+"""
+    cong(max::UInt32)
 
-function unbias_cong(max::UInt32)
-    return typemax(UInt32) - ((typemax(UInt32) % max) + UInt32(1))
+Return a random UInt32 in the range `1:max` except if max is 0, in that case return 0.
+"""
+cong(max::UInt32) = iszero(max) ? UInt32(0) : rand_ptls(max) + UInt32(1) #TODO: make sure users don't use 0 and remove this check
+
+get_ptls_rng() = ccall(:jl_get_ptls_rng, UInt64, ())
+
+set_ptls_rng(seed::UInt64) = ccall(:jl_set_ptls_rng, Cvoid, (UInt64,), seed)
+
+"""
+    rand_ptls(max::UInt32)
+
+Return a random UInt32 in the range `0:max-1` using the thread-local RNG
+state. Max must be greater than 0.
+"""
+Base.@assume_effects :removable :inaccessiblememonly :notaskstate function rand_ptls(max::UInt32)
+    rngseed = get_ptls_rng()
+    val, seed = rand_uniform_max_int32(max, rngseed)
+    set_ptls_rng(seed)
+    return val % UInt32
+end
+
+# This implementation is based on OpenSSLs implementation of rand_uniform
+# https://github.com/openssl/openssl/blob/1d2cbd9b5a126189d5e9bc78a3bdb9709427d02b/crypto/rand/rand_uniform.c#L13-L99
+# Comments are vendored from their implementation as well.
+# For the original developer check the PR to swift https://github.com/apple/swift/pull/39143.
+
+# Essentially it boils down to incrementally generating a fixed point
+# number on the interval [0, 1) and multiplying this number by the upper
+# range limit.  Once it is certain what the fractional part contributes to
+# the integral part of the product, the algorithm has produced a definitive
+# result.
+"""
+    rand_uniform_max_int32(max::UInt32, seed::UInt64)
+
+Return a random UInt32 in the range `0:max-1` using the given seed.
+Max must be greater than 0.
+"""
+Base.@assume_effects :total function rand_uniform_max_int32(max::UInt32, seed::UInt64)
+    if max == UInt32(1)
+        return UInt32(0), seed
+    end
+    # We are generating a fixed point number on the interval [0, 1).
+    # Multiplying this by the range gives us a number on [0, upper).
+    # The high word of the multiplication result represents the integral part
+    # This is not completely unbiased as it's missing the fractional part of the original implementation but it's good enough for our purposes
+    seed = UInt64(69069) * seed + UInt64(362437)
+    prod = (UInt64(max)) * (seed % UInt32) # 64 bit product
+    i = prod >> 32 % UInt32 # integral part
+    return i % UInt32, seed
 end
 
 
+
 function multiq_sift_up(heap::taskheap, idx::Int32)
     while idx > Int32(1)
         parent = (idx - Int32(2)) ÷ heap_d + Int32(1)
@@ -60,7 +107,6 @@ function multiq_sift_down(heap::taskheap, idx::Int32)
     end
 end
 
-
 function multiq_size(tpid::Int8)
     nt = UInt32(Threads._nthreads_in_pool(tpid))
     tp = tpid + 1
@@ -86,24 +132,23 @@ function multiq_size(tpid::Int8)
             newheaps[i] = taskheap()
         end
         heaps[tp] = newheaps
-        cong_unbias[tp] = unbias_cong(heap_p)
     end
 
     return heap_p
 end
 
-
 function multiq_insert(task::Task, priority::UInt16)
     tpid = ccall(:jl_get_task_threadpoolid, Int8, (Any,), task)
+    @assert tpid > -1
     heap_p = multiq_size(tpid)
     tp = tpid + 1
 
     task.priority = priority
 
-    rn = cong(heap_p, cong_unbias[tp])
+    rn = cong(heap_p)
     tpheaps = heaps[tp]
     while !trylock(tpheaps[rn].lock)
-        rn = cong(heap_p, cong_unbias[tp])
+        rn = cong(heap_p)
     end
 
     heap = tpheaps[rn]
@@ -124,13 +169,14 @@ function multiq_insert(task::Task, priority::UInt16)
     return true
 end
 
-
 function multiq_deletemin()
-    local rn1, rn2
-    local prio1, prio2
+    local rn1::UInt32
 
     tid = Threads.threadid()
     tp = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1) + 1
+    if tp == 0 # Foreign thread
+        return nothing
+    end
     tpheaps = heaps[tp]
 
     @label retry
@@ -140,8 +186,8 @@ function multiq_deletemin()
         if i == heap_p
             return nothing
         end
-        rn1 = cong(heap_p, cong_unbias[tp])
-        rn2 = cong(heap_p, cong_unbias[tp])
+        rn1 = cong(heap_p)
+        rn2 = cong(heap_p)
         prio1 = tpheaps[rn1].priority
         prio2 = tpheaps[rn2].priority
         if prio1 > prio2
@@ -158,6 +204,8 @@ function multiq_deletemin()
         end
     end
 
+    @assert @isdefined(rn1) "Assertion to tell the compiler about the definedness of this variable"
+
     heap = tpheaps[rn1]
     task = heap.tasks[1]
     if ccall(:jl_set_task_tid, Cint, (Any, Cint), task, tid-1) == 0
@@ -182,6 +230,9 @@ end
 function multiq_check_empty()
     tid = Threads.threadid()
     tp = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1) + 1
+    if tp == 0 # Foreign thread
+        return true
+    end
     for i = UInt32(1):length(heaps[tp])
         if heaps[tp][i].ntasks != 0
             return false
diff --git a/base/path.jl b/base/path.jl
index c439a2800acce..00dded2bf6841 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -34,8 +34,25 @@ elseif Sys.iswindows()
     const path_dir_splitter = r"^(.*?)([/\\]+)([^/\\]*)$"sa
     const path_ext_splitter = r"^((?:.*[/\\])?(?:\.|[^/\\\.])[^/\\]*?)(\.[^/\\\.]*|)$"sa
 
+    const splitdrive_re = let
+        # Slash in either direction.
+        S = raw"[\\/]"
+        # Not a slash in either direction.
+        N = raw"[^\\/]"
+        # Drive letter, e.g. `C:`
+        drive = "$(N):"
+        # UNC path, e.g. `\\server\share`
+        unc = "$(S)$(S)$(N)+$(S)$(N)+"
+        # Long drive letter, e.g. `\\?\C:`
+        long_drive = "$(S)$(S)\\?$(S)$(drive)"
+        # Long UNC path, e.g. `\\?\UNC\server\share`
+        long_unc = "$(S)$(S)\\?$(S)UNC$(S)$(N)+$(S)$(N)+"
+        # Need to match the long patterns first so they get priority.
+        Regex("^($long_unc|$long_drive|$unc|$drive|)(.*)\$", "sa")
+    end
+
     function splitdrive(path::String)
-        m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"sa, path)::AbstractMatch
+        m = match(splitdrive_re, path)::AbstractMatch
         String(something(m.captures[1])), String(something(m.captures[2]))
     end
 else
@@ -44,7 +61,7 @@ end
 
 
 """
-    splitdrive(path::AbstractString) -> (AbstractString, AbstractString)
+    splitdrive(path::AbstractString) -> (drive::AbstractString, path::AbstractString)
 
 On Windows, split a path into the drive letter part and the path part. On Unix systems, the
 first component is always the empty string.
@@ -52,7 +69,7 @@ first component is always the empty string.
 splitdrive(path::AbstractString)
 
 """
-    homedir() -> String
+    homedir()::String
 
 Return the current user's home directory.
 
@@ -60,6 +77,8 @@ Return the current user's home directory.
     `homedir` determines the home directory via `libuv`'s `uv_os_homedir`. For details
     (for example on how to specify the home directory via environment variables), see the
     [`uv_os_homedir` documentation](http://docs.libuv.org/en/v1.x/misc.html#c.uv_os_homedir).
+
+See also [`Sys.username`](@ref).
 """
 function homedir()
     buf = Base.StringVector(AVG_PATH - 1) # space for null-terminator implied by StringVector
@@ -85,7 +104,7 @@ else
 end
 
 """
-    isabspath(path::AbstractString) -> Bool
+    isabspath(path::AbstractString)::Bool
 
 Determine whether a path is absolute (begins at the root directory).
 
@@ -101,7 +120,7 @@ false
 isabspath(path::AbstractString)
 
 """
-    isdirpath(path::AbstractString) -> Bool
+    isdirpath(path::AbstractString)::Bool
 
 Determine whether a path refers to a directory (for example, ends with a path separator).
 
@@ -117,7 +136,7 @@ true
 isdirpath(path::String) = occursin(path_directory_re, splitdrive(path)[2])
 
 """
-    splitdir(path::AbstractString) -> (AbstractString, AbstractString)
+    splitdir(path::AbstractString) -> (dir::AbstractString, file::AbstractString)
 
 Split a path into a tuple of the directory name and file name.
 
@@ -145,7 +164,7 @@ function _splitdir_nodrive(a::String, b::String)
 end
 
 """
-    dirname(path::AbstractString) -> String
+    dirname(path::AbstractString)::String
 
 Get the directory part of a path. Trailing characters ('/' or '\\') in the path are
 counted as part of the path.
@@ -164,7 +183,7 @@ See also [`basename`](@ref).
 dirname(path::AbstractString) = splitdir(path)[1]
 
 """
-    basename(path::AbstractString) -> String
+    basename(path::AbstractString)::String
 
 Get the file name part of a path.
 
@@ -186,7 +205,7 @@ See also [`dirname`](@ref).
 basename(path::AbstractString) = splitdir(path)[2]
 
 """
-    splitext(path::AbstractString) -> (String, String)
+    splitext(path::AbstractString) -> (path_without_extension::String, extension::String)
 
 If the last component of a path contains one or more dots, split the path into everything before the
 last dot and everything including and after the dot. Otherwise, return a tuple of the argument
@@ -215,7 +234,7 @@ end
 pathsep() = path_separator
 
 """
-    splitpath(path::AbstractString) -> Vector{String}
+    splitpath(path::AbstractString)::Vector{String}
 
 Split a file path into all its path components. This is the opposite of
 `joinpath`. Returns an array of substrings, one for each directory or file in
@@ -234,7 +253,7 @@ julia> splitpath("/home/myuser/example.jl")
  "example.jl"
 ```
 """
-splitpath(p::AbstractString) = splitpath(String(p))
+splitpath(p::AbstractString) = splitpath(String(p)::String)
 
 function splitpath(p::String)
     drive, p = splitdrive(p)
@@ -327,9 +346,9 @@ end # os-test
 joinpath(paths::AbstractString...)::String = joinpath(paths)
 
 """
-    joinpath(parts::AbstractString...) -> String
-    joinpath(parts::Vector{AbstractString}) -> String
-    joinpath(parts::Tuple{AbstractString}) -> String
+    joinpath(parts::AbstractString...)::String
+    joinpath(parts::Vector{AbstractString})::String
+    joinpath(parts::Tuple{AbstractString})::String
 
 Join path components into a full path. If some argument is an absolute path or
 (on Windows) has a drive specification that doesn't match the drive computed for
@@ -338,7 +357,7 @@ the join of the preceding paths, then prior components are dropped.
 Note on Windows since there is a current directory for each drive, `joinpath("c:", "foo")`
 represents a path relative to the current directory on drive "c:" so this is equal to "c:foo",
 not "c:\\foo". Furthermore, `joinpath` treats this as a non-absolute path and ignores the drive
-letter casing, hence `joinpath("C:\\A","c:b") = "C:\\A\\b"`.
+letter casing, hence `joinpath("C:\\\\A","c:b") = "C:\\\\A\\\\b"`.
 
 # Examples
 ```jldoctest
@@ -354,7 +373,7 @@ julia> joinpath(["/home/myuser", "example.jl"])
 joinpath
 
 """
-    normpath(path::AbstractString) -> String
+    normpath(path::AbstractString)::String
 
 Normalize a path, removing "." and ".." entries and changing "/" to the canonical path separator
 for the system.
@@ -403,7 +422,7 @@ function normpath(path::String)
 end
 
 """
-    normpath(path::AbstractString, paths::AbstractString...) -> String
+    normpath(path::AbstractString, paths::AbstractString...)::String
 
 Convert a set of paths to a normalized path by joining them together and removing
 "." and ".." entries. Equivalent to `normpath(joinpath(path, paths...))`.
@@ -411,16 +430,16 @@ Convert a set of paths to a normalized path by joining them together and removin
 normpath(a::AbstractString, b::AbstractString...) = normpath(joinpath(a,b...))
 
 """
-    abspath(path::AbstractString) -> String
+    abspath(path::AbstractString)::String
 
 Convert a path to an absolute path by adding the current directory if necessary.
 Also normalizes the path as in [`normpath`](@ref).
 
-# Example
+# Examples
 
 If you are in a directory called `JuliaExample` and the data you are using is two levels up relative to the `JuliaExample` directory, you could write:
 
-abspath("../../data")
+    abspath("../../data")
 
 Which gives a path like `"/home/JuliaUser/data/"`.
 
@@ -441,7 +460,7 @@ function abspath(a::String)::String
 end
 
 """
-    abspath(path::AbstractString, paths::AbstractString...) -> String
+    abspath(path::AbstractString, paths::AbstractString...)::String
 
 Convert a set of paths to an absolute path by joining them together and adding the
 current directory if necessary. Equivalent to `abspath(joinpath(path, paths...))`.
@@ -468,7 +487,7 @@ end # os-test
 
 
 """
-    realpath(path::AbstractString) -> String
+    realpath(path::AbstractString)::String
 
 Canonicalize a path by expanding symbolic links and removing "." and ".." entries.
 On case-insensitive case-preserving filesystems (typically Mac and Windows), the
@@ -523,7 +542,7 @@ end
 
 
 """
-    expanduser(path::AbstractString) -> AbstractString
+    expanduser(path::AbstractString)::AbstractString
 
 On Unix systems, replace a tilde character at the start of a path with the current user's home directory.
 
@@ -532,7 +551,7 @@ See also: [`contractuser`](@ref).
 expanduser(path::AbstractString)
 
 """
-    contractuser(path::AbstractString) -> AbstractString
+    contractuser(path::AbstractString)::AbstractString
 
 On Unix systems, if the path starts with `homedir()`, replace it with a tilde character.
 
@@ -542,7 +561,7 @@ contractuser(path::AbstractString)
 
 
 """
-    relpath(path::AbstractString, startpath::AbstractString = ".") -> String
+    relpath(path::AbstractString, startpath::AbstractString = ".")::String
 
 Return a relative filepath to `path` either from the current directory or from an optional
 start directory. This is a path computation: the filesystem is not accessed to confirm the
@@ -589,8 +608,61 @@ function relpath(path::String, startpath::String = ".")
     return isempty(relpath_) ? curdir :  relpath_
 end
 relpath(path::AbstractString, startpath::AbstractString) =
-    relpath(String(path), String(startpath))
+    relpath(String(path)::String, String(startpath)::String)
 
 for f in (:isdirpath, :splitdir, :splitdrive, :splitext, :normpath, :abspath)
-    @eval $f(path::AbstractString) = $f(String(path))
+    @eval $f(path::AbstractString) = $f(String(path)::String)
 end
+
+# RFC3986 Section 2.1
+percent_escape(s) = '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%')
+# RFC3986 Section 2.3
+encode_uri_component(s) = replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape)
+
+"""
+    uripath(path::AbstractString)
+
+Encode `path` as a URI as per [RFC8089: The "file" URI
+Scheme](https://www.rfc-editor.org/rfc/rfc8089), [RFC3986: Uniform Resource
+Identifier (URI): Generic Syntax](https://www.rfc-editor.org/rfc/rfc3986), and
+the [Freedesktop File URI spec](https://www.freedesktop.org/wiki/Specifications/file-uri-spec/).
+
+## Examples
+
+```julia-repl
+julia> uripath("/home/user/example file.jl") # On a unix machine
+"file://<hostname>/home/user/example%20file.jl"
+
+julia> uripath("C:\\Users\\user\\example file.jl") # On a windows machine
+"file:///C:/Users/user/example%20file.jl"
+```
+"""
+function uripath end
+
+@static if Sys.iswindows()
+    function uripath(path::String)
+        path = abspath(path)
+        if startswith(path, "\\\\") # UNC path, RFC8089 Appendix E.3
+            unixpath = join(eachsplit(path, path_separator_re, keepempty=false), '/')
+            string("file://", encode_uri_component(unixpath)) # RFC8089 Section 2
+        else
+            drive, localpath = splitdrive(path) # Assuming that non-UNC absolute paths on Windows always have a drive component
+            unixpath = join(eachsplit(localpath, path_separator_re, keepempty=false), '/')
+            encdrive = replace(encode_uri_component(drive), "%3A" => ':', "%7C" => '|') # RFC8089 Appendices D.2, E.2.1, and E.2.2
+            string("file:///", encdrive, '/', encode_uri_component(unixpath)) # RFC8089 Section 2
+        end
+    end
+else
+    function uripath(path::String)
+        localpath = join(eachsplit(abspath(path), path_separator_re, keepempty=false), '/')
+        host = if ispath("/proc/sys/fs/binfmt_misc/WSLInterop") # WSL sigil
+            distro = get(ENV, "WSL_DISTRO_NAME", "") # See <https://patrickwu.space/wslconf/>
+            "wsl\$/$distro" # See <https://github.com/microsoft/terminal/pull/14993> and <https://learn.microsoft.com/en-us/windows/wsl/filesystems>
+        else
+            gethostname() # Freedesktop File URI Spec, Hostnames section
+        end
+        string("file://", encode_uri_component(host), '/', encode_uri_component(localpath)) # RFC8089 Section 2
+    end
+end
+
+uripath(path::AbstractString) = uripath(String(path)::String)
diff --git a/base/pcre.jl b/base/pcre.jl
index 7597c1217ca9e..213fc1890f51d 100644
--- a/base/pcre.jl
+++ b/base/pcre.jl
@@ -7,7 +7,7 @@ module PCRE
 import ..RefValue
 
 # include($BUILDROOT/base/pcre_h.jl)
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl"))
+include(string(Base.BUILDROOT, "pcre_h.jl"))
 
 const PCRE_LIB = "libpcre2-8"
 
@@ -24,19 +24,19 @@ function create_match_context()
     return ctx
 end
 
-THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
+global THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
 
-PCRE_COMPILE_LOCK = nothing
+global PCRE_COMPILE_LOCK::Threads.SpinLock
 
 _tid() = Int(ccall(:jl_threadid, Int16, ())) + 1
-_mth() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cint), :acquire))
+_mth() = Threads.maxthreadid()
 
 function get_local_match_context()
     tid = _tid()
     ctxs = THREAD_MATCH_CONTEXTS
     if length(ctxs) < tid
         # slow path to allocate it
-        l = PCRE_COMPILE_LOCK::Threads.SpinLock
+        l = PCRE_COMPILE_LOCK
         lock(l)
         try
             ctxs = THREAD_MATCH_CONTEXTS
@@ -196,10 +196,12 @@ function err_message(errno::Integer)
     return GC.@preserve buffer unsafe_string(pointer(buffer))
 end
 
-function exec(re, subject, offset, options, match_data)
-    if !(subject isa Union{String,SubString{String}})
-        subject = String(subject)
-    end
+exec(re, subject::Union{String,SubString{String}}, offset, options, match_data) =
+    _exec(re, subject, offset, options, match_data)
+exec(re, subject, offset, options, match_data) =
+    _exec(re, String(subject)::String, offset, options, match_data)
+
+function _exec(re, subject, offset, options, match_data)
     rc = ccall((:pcre2_match_8, PCRE_LIB), Cint,
                (Ptr{Cvoid}, Ptr{UInt8}, Csize_t, Csize_t, UInt32, Ptr{Cvoid}, Ptr{Cvoid}),
                re, subject, ncodeunits(subject), offset, options, match_data, get_local_match_context())
diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl
index 41c3636b40216..cf9748168aac2 100644
--- a/base/permuteddimsarray.jl
+++ b/base/permuteddimsarray.jl
@@ -12,7 +12,7 @@ struct PermutedDimsArray{T,N,perm,iperm,AA<:AbstractArray} <: AbstractArray{T,N}
     function PermutedDimsArray{T,N,perm,iperm,AA}(data::AA) where {T,N,perm,iperm,AA<:AbstractArray}
         (isa(perm, NTuple{N,Int}) && isa(iperm, NTuple{N,Int})) || error("perm and iperm must both be NTuple{$N,Int}")
         isperm(perm) || throw(ArgumentError(string(perm, " is not a valid permutation of dimensions 1:", N)))
-        all(map(d->iperm[perm[d]]==d, 1:N)) || throw(ArgumentError(string(perm, " and ", iperm, " must be inverses")))
+        all(d->iperm[perm[d]]==d, 1:N) || throw(ArgumentError(string(perm, " and ", iperm, " must be inverses")))
         new(data)
     end
 end
@@ -39,7 +39,7 @@ julia> B[3,1,2] == A[1,2,3]
 true
 ```
 """
-function PermutedDimsArray(data::AbstractArray{T,N}, perm) where {T,N}
+Base.@constprop :aggressive function PermutedDimsArray(data::AbstractArray{T,N}, perm) where {T,N}
     length(perm) == N || throw(ArgumentError(string(perm, " is not a valid permutation of dimensions 1:", N)))
     iperm = invperm(perm)
     PermutedDimsArray{T,N,(perm...,),(iperm...,),typeof(data)}(data)
@@ -49,10 +49,8 @@ Base.parent(A::PermutedDimsArray) = A.parent
 Base.size(A::PermutedDimsArray{T,N,perm}) where {T,N,perm} = genperm(size(parent(A)), perm)
 Base.axes(A::PermutedDimsArray{T,N,perm}) where {T,N,perm} = genperm(axes(parent(A)), perm)
 Base.has_offset_axes(A::PermutedDimsArray) = Base.has_offset_axes(A.parent)
-
 Base.similar(A::PermutedDimsArray, T::Type, dims::Base.Dims) = similar(parent(A), T, dims)
-
-Base.unsafe_convert(::Type{Ptr{T}}, A::PermutedDimsArray{T}) where {T} = Base.unsafe_convert(Ptr{T}, parent(A))
+Base.cconvert(::Type{Ptr{T}}, A::PermutedDimsArray{T}) where {T} = Base.cconvert(Ptr{T}, parent(A))
 
 # It's OK to return a pointer to the first element, and indeed quite
 # useful for wrapping C routines that require a different storage
@@ -89,13 +87,68 @@ end
 
 """
     permutedims(A::AbstractArray, perm)
+    permutedims(A::AbstractMatrix)
 
-Permute the dimensions of array `A`. `perm` is a vector or a tuple of length `ndims(A)`
+Permute the dimensions (axes) of array `A`. `perm` is a tuple or vector of `ndims(A)` integers
 specifying the permutation.
 
+If `A` is a 2d array ([`AbstractMatrix`](@ref)), then
+`perm` defaults to `(2,1)`, swapping the two axes of `A` (the rows and columns
+of the matrix).   This differs from [`transpose`](@ref) in that the
+operation is not recursive, which is especially useful for arrays of non-numeric values
+(where the recursive `transpose` would throw an error) and/or 2d arrays that do not represent
+linear operators.
+
+For 1d arrays, see [`permutedims(v::AbstractVector)`](@ref), which returns a 1-row “matrix”.
+
 See also [`permutedims!`](@ref), [`PermutedDimsArray`](@ref), [`transpose`](@ref), [`invperm`](@ref).
 
 # Examples
+
+## 2d arrays:
+Unlike `transpose`, `permutedims` can be used to swap rows and columns of 2d arrays of
+arbitrary non-numeric elements, such as strings:
+```jldoctest
+julia> A = ["a" "b" "c"
+            "d" "e" "f"]
+2×3 Matrix{String}:
+ "a"  "b"  "c"
+ "d"  "e"  "f"
+
+julia> permutedims(A)
+3×2 Matrix{String}:
+ "a"  "d"
+ "b"  "e"
+ "c"  "f"
+```
+And `permutedims` produces results that differ from `transpose`
+for matrices whose elements are themselves numeric matrices:
+```jldoctest; setup = :(using LinearAlgebra)
+julia> a = [1 2; 3 4];
+
+julia> b = [5 6; 7 8];
+
+julia> c = [9 10; 11 12];
+
+julia> d = [13 14; 15 16];
+
+julia> X = [[a] [b]; [c] [d]]
+2×2 Matrix{Matrix{Int64}}:
+ [1 2; 3 4]     [5 6; 7 8]
+ [9 10; 11 12]  [13 14; 15 16]
+
+julia> permutedims(X)
+2×2 Matrix{Matrix{Int64}}:
+ [1 2; 3 4]  [9 10; 11 12]
+ [5 6; 7 8]  [13 14; 15 16]
+
+julia> transpose(X)
+2×2 transpose(::Matrix{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
+ [1 3; 2 4]  [9 11; 10 12]
+ [5 7; 6 8]  [13 15; 14 16]
+```
+
+## Multi-dimensional arrays
 ```jldoctest
 julia> A = reshape(Vector(1:8), (2,2,2))
 2×2×2 Array{Int64, 3}:
@@ -145,54 +198,62 @@ function permutedims(A::AbstractArray, perm)
     permutedims!(dest, A, perm)
 end
 
-"""
-    permutedims(m::AbstractMatrix)
-
-Permute the dimensions of the matrix `m`, by flipping the elements across the diagonal of
-the matrix. Differs from `LinearAlgebra`'s [`transpose`](@ref) in that the
-operation is not recursive.
-
-# Examples
-```jldoctest; setup = :(using LinearAlgebra)
-julia> a = [1 2; 3 4];
-
-julia> b = [5 6; 7 8];
-
-julia> c = [9 10; 11 12];
-
-julia> d = [13 14; 15 16];
-
-julia> X = [[a] [b]; [c] [d]]
-2×2 Matrix{Matrix{Int64}}:
- [1 2; 3 4]     [5 6; 7 8]
- [9 10; 11 12]  [13 14; 15 16]
-
-julia> permutedims(X)
-2×2 Matrix{Matrix{Int64}}:
- [1 2; 3 4]  [9 10; 11 12]
- [5 6; 7 8]  [13 14; 15 16]
-
-julia> transpose(X)
-2×2 transpose(::Matrix{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
- [1 3; 2 4]  [9 11; 10 12]
- [5 7; 6 8]  [13 15; 14 16]
-```
-"""
 permutedims(A::AbstractMatrix) = permutedims(A, (2,1))
 
 """
     permutedims(v::AbstractVector)
 
 Reshape vector `v` into a `1 × length(v)` row matrix.
-Differs from `LinearAlgebra`'s [`transpose`](@ref) in that
-the operation is not recursive.
+Differs from [`transpose`](@ref) in that
+the operation is not recursive, which is especially useful for arrays of non-numeric values
+(where the recursive `transpose` might throw an error).
 
 # Examples
+Unlike `transpose`, `permutedims` can be used on vectors of
+arbitrary non-numeric elements, such as strings:
+```jldoctest
+julia> permutedims(["a", "b", "c"])
+1×3 Matrix{String}:
+ "a"  "b"  "c"
+```
+For vectors of numbers, `permutedims(v)` works much like `transpose(v)`
+except that the return type differs (it uses [`reshape`](@ref)
+rather than a `LinearAlgebra.Transpose` view, though both
+share memory with the original array `v`):
 ```jldoctest; setup = :(using LinearAlgebra)
-julia> permutedims([1, 2, 3, 4])
+julia> v = [1, 2, 3, 4]
+4-element Vector{Int64}:
+ 1
+ 2
+ 3
+ 4
+
+julia> p = permutedims(v)
 1×4 Matrix{Int64}:
  1  2  3  4
 
+julia> r = transpose(v)
+1×4 transpose(::Vector{Int64}) with eltype Int64:
+ 1  2  3  4
+
+julia> p == r
+true
+
+julia> typeof(r)
+Transpose{Int64, Vector{Int64}}
+
+julia> p[1] = 5; r[2] = 6; # mutating p or r also changes v
+
+julia> v # shares memory with both p and r
+4-element Vector{Int64}:
+ 5
+ 6
+ 3
+ 4
+```
+However, `permutedims` produces results that differ from `transpose`
+for vectors whose elements are themselves numeric matrices:
+```jldoctest; setup = :(using LinearAlgebra)
 julia> V = [[[1 2; 3 4]]; [[5 6; 7 8]]]
 2-element Vector{Matrix{Int64}}:
  [1 2; 3 4]
@@ -221,7 +282,7 @@ regions.
 See also [`permutedims`](@ref).
 """
 function permutedims!(dest, src::AbstractArray, perm)
-    Base.checkdims_perm(dest, src, perm)
+    Base.checkdims_perm(axes(dest), axes(src), perm)
     P = PermutedDimsArray(dest, invperm(perm))
     _copy!(P, src)
     return dest
diff --git a/base/pkgid.jl b/base/pkgid.jl
index 20d9de559b334..7ef7c58eee4cc 100644
--- a/base/pkgid.jl
+++ b/base/pkgid.jl
@@ -17,13 +17,13 @@ end
 ==(a::PkgId, b::PkgId) = a.uuid == b.uuid && a.name == b.name
 
 function hash(pkg::PkgId, h::UInt)
-    h += 0xc9f248583a0ca36c % UInt
+    h ⊻= 0xc9f248583a0ca36c % UInt
     h = hash(pkg.uuid, h)
     h = hash(pkg.name, h)
     return h
 end
 
-show(io::IO, pkg::PkgId) =
+show(io::IO,  ::MIME"text/plain", pkg::PkgId) =
     print(io, pkg.name, " [", pkg.uuid === nothing ? "top-level" : pkg.uuid, "]")
 
 function binpack(pkg::PkgId)
@@ -32,12 +32,13 @@ function binpack(pkg::PkgId)
     uuid = pkg.uuid
     write(io, uuid === nothing ? UInt128(0) : UInt128(uuid))
     write(io, pkg.name)
-    return String(take!(io))
+    return unsafe_takestring!(io)
 end
 
 function binunpack(s::String)
     io = IOBuffer(s)
-    @assert read(io, UInt8) === 0x00
+    z = read(io, UInt8)
+    @assert z === 0x00
     uuid = read(io, UInt128)
     name = read(io, String)
     return PkgId(UUID(uuid), name)
diff --git a/base/pointer.jl b/base/pointer.jl
index a47f1e38edb9b..72c567eaf2a85 100644
--- a/base/pointer.jl
+++ b/base/pointer.jl
@@ -54,17 +54,38 @@ See also [`cconvert`](@ref)
 """
 function unsafe_convert end
 
-unsafe_convert(::Type{Ptr{UInt8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{UInt8}, (Any,), x)
-unsafe_convert(::Type{Ptr{Int8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{Int8}, (Any,), x)
-unsafe_convert(::Type{Ptr{UInt8}}, s::String) = ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s)
-unsafe_convert(::Type{Ptr{Int8}}, s::String) = ccall(:jl_string_ptr, Ptr{Int8}, (Any,), s)
 # convert strings to String etc. to pass as pointers
 cconvert(::Type{Ptr{UInt8}}, s::AbstractString) = String(s)
 cconvert(::Type{Ptr{Int8}}, s::AbstractString) = String(s)
+unsafe_convert(::Type{Ptr{UInt8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{UInt8}, (Any,), x)
+unsafe_convert(::Type{Ptr{Int8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{Int8}, (Any,), x)
 
-unsafe_convert(::Type{Ptr{T}}, a::Array{T}) where {T} = ccall(:jl_array_ptr, Ptr{T}, (Any,), a)
+cconvert(::Type{<:Ptr}, a::Array) = getfield(a, :ref)
 unsafe_convert(::Type{Ptr{S}}, a::AbstractArray{T}) where {S,T} = convert(Ptr{S}, unsafe_convert(Ptr{T}, a))
+unsafe_convert(::Type{Ptr{T}}, a::Array{T}) where {T} = unsafe_convert(Ptr{T}, a.ref)
 unsafe_convert(::Type{Ptr{T}}, a::AbstractArray{T}) where {T} = error("conversion to pointer not defined for $(typeof(a))")
+# TODO: add this deprecation to give a better error:
+# cconvert(::Type{<:Ptr}, a::AbstractArray) = error("conversion to pointer not defined for $(typeof(a))")
+# unsafe_convert(::Type{Ptr{T}}, a::AbstractArray{T}) where {T} = error("missing call to cconvert for call to unsafe_convert for AbstractArray")
+
+cconvert(::Type{<:Ptr}, a::GenericMemory) = a
+unsafe_convert(::Type{Ptr{Cvoid}}, a::GenericMemory{T}) where {T} = getfield(a, :ptr)
+unsafe_convert(::Type{Ptr{T}}, a::GenericMemory) where {T} = convert(Ptr{T}, getfield(a, :ptr))
+
+function unsafe_convert(::Type{Ptr{Cvoid}}, a::GenericMemoryRef{<:Any,T,Core.CPU}) where {T}
+    mem = getfield(a, :mem)
+    offset = getfield(a, :ptr_or_offset)
+    MemT = typeof(mem)
+    arrayelem = datatype_arrayelem(MemT)
+    elsz = datatype_layoutsize(MemT)
+    isboxed = 1; isunion = 2
+    if arrayelem == isunion || elsz == 0
+        offset = UInt(offset) * elsz
+        offset += unsafe_convert(Ptr{Cvoid}, mem)
+    end
+    return offset
+end
+unsafe_convert(::Type{Ptr{T}}, a::GenericMemoryRef) where {T} = convert(Ptr{T}, unsafe_convert(Ptr{Cvoid}, a))
 
 # unsafe pointer to array conversions
 """
@@ -92,10 +113,21 @@ function unsafe_wrap(::Union{Type{Array},Type{Array{T}},Type{Array{T,1}}},
     ccall(:jl_ptr_to_array_1d, Array{T,1},
           (Any, Ptr{Cvoid}, Csize_t, Cint), Array{T,1}, p, d, own)
 end
-unsafe_wrap(Atype::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}},
+function unsafe_wrap(::Union{Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}},
+                     p::Ptr{T}, dims::Tuple{Int}; own::Bool = false) where {kind,T}
+    ccall(:jl_ptr_to_genericmemory, Ref{GenericMemory{kind,T,Core.CPU}},
+          (Any, Ptr{Cvoid}, Csize_t, Cint), GenericMemory{kind,T,Core.CPU}, p, dims[1], own)
+end
+function unsafe_wrap(::Union{Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}},
+                     p::Ptr{T}, d::Integer; own::Bool = false) where {kind,T}
+    ccall(:jl_ptr_to_genericmemory, Ref{GenericMemory{kind,T,Core.CPU}},
+          (Any, Ptr{Cvoid}, Csize_t, Cint), GenericMemory{kind,T,Core.CPU}, p, d, own)
+end
+unsafe_wrap(Atype::Union{Type{Array},Type{Array{T}},Type{Array{T,N}},Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}} where {kind},
             p::Ptr{T}, dims::NTuple{N,<:Integer}; own::Bool = false) where {T,N} =
     unsafe_wrap(Atype, p, convert(Tuple{Vararg{Int}}, dims), own = own)
 
+
 """
     unsafe_load(p::Ptr{T}, i::Integer=1)
     unsafe_load(p::Ptr{T}, order::Symbol)
@@ -135,7 +167,7 @@ The `unsafe` prefix on this function indicates that no validation is performed o
 pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring
 that referenced memory is not freed or garbage collected while invoking this function.
 Incorrect usage may segfault your program. Unlike C, storing memory region allocated as
-different type may be valid provided that that the types are compatible.
+different type may be valid provided that the types are compatible.
 
 !!! compat "Julia 1.10"
      The `order` argument is available as of Julia 1.10.
@@ -150,7 +182,7 @@ function unsafe_store!(p::Ptr, x, i::Integer, order::Symbol)
 end
 
 """
-    unsafe_modify!(p::Ptr{T}, op, x, [order::Symbol]) -> Pair
+    unsafe_modify!(p::Ptr{T}, op, x, [order::Symbol])::Pair
 
 These atomically perform the operations to get and set a memory address after applying
 the function `op`. If supported by the hardware (for example, atomic increment), this may be
@@ -279,8 +311,8 @@ isless(x::Ptr{T}, y::Ptr{T}) where {T} = x < y
 <(x::Ptr,  y::Ptr) = UInt(x) < UInt(y)
 -(x::Ptr,  y::Ptr) = UInt(x) - UInt(y)
 
-+(x::Ptr, y::Integer) = oftype(x, add_ptr(UInt(x), (y % UInt) % UInt))
--(x::Ptr, y::Integer) = oftype(x, sub_ptr(UInt(x), (y % UInt) % UInt))
++(x::Ptr, y::Integer) = add_ptr(x, (y % UInt) % UInt)
+-(x::Ptr, y::Integer) = sub_ptr(x, (y % UInt) % UInt)
 +(x::Integer, y::Ptr) = y + x
 
 unsigned(x::Ptr) = UInt(x)
diff --git a/base/precompilation.jl b/base/precompilation.jl
new file mode 100644
index 0000000000000..52ffdb3fb69bd
--- /dev/null
+++ b/base/precompilation.jl
@@ -0,0 +1,1389 @@
+module Precompilation
+
+using Base: CoreLogging, PkgId, UUID, SHA1, parsed_toml, project_file_name_uuid, project_names,
+            project_file_manifest_path, get_deps, preferences_names, isaccessibledir, isfile_casesensitive,
+            base_project, isdefined
+
+# This is currently only used for pkgprecompile but the plan is to use this in code loading in the future
+# see the `kc/codeloading2.0` branch
+struct ExplicitEnv
+    path::String
+    project_deps::Dict{String, UUID} # [deps] in Project.toml
+    project_weakdeps::Dict{String, UUID} # [weakdeps] in Project.toml
+    project_extras::Dict{String, UUID} # [extras] in Project.toml
+    project_extensions::Dict{String, Vector{UUID}} # [exts] in Project.toml
+    deps::Dict{UUID, Vector{UUID}} # all dependencies in Manifest.toml
+    weakdeps::Dict{UUID, Vector{UUID}} # all weak dependencies in Manifest.toml
+    extensions::Dict{UUID, Dict{String, Vector{UUID}}}
+    # Lookup name for a UUID
+    names::Dict{UUID, String}
+    lookup_strategy::Dict{UUID, Union{
+                                      SHA1,     # `git-tree-sha1` entry
+                                      String,   # `path` entry
+                                      Nothing,  # stdlib (no `path` nor `git-tree-sha1`)
+                                      Missing}} # not present in the manifest
+    #prefs::Union{Nothing, Dict{String, Any}}
+    #local_prefs::Union{Nothing, Dict{String, Any}}
+end
+
+ExplicitEnv() = ExplicitEnv(Base.active_project())
+function ExplicitEnv(::Nothing, envpath::String="")
+    ExplicitEnv(envpath,
+        Dict{String, UUID}(),     # project_deps
+        Dict{String, UUID}(),     # project_weakdeps
+        Dict{String, UUID}(),     # project_extras
+        Dict{String, Vector{UUID}}(), # project_extensions
+        Dict{UUID, Vector{UUID}}(),   # deps
+        Dict{UUID, Vector{UUID}}(),   # weakdeps
+        Dict{UUID, Dict{String, Vector{UUID}}}(), # extensions
+        Dict{UUID, String}(),     # names
+        Dict{UUID, Union{SHA1, String, Nothing, Missing}}())
+end
+function ExplicitEnv(envpath::String)
+    # Handle missing project file by creating an empty environment
+    if !isfile(envpath) || project_file_manifest_path(envpath) === nothing
+        envpath = abspath(envpath)
+        return ExplicitEnv(nothing, envpath)
+    end
+    envpath = abspath(envpath)
+    project_d = parsed_toml(envpath)
+
+    # TODO: Perhaps verify that two packages with the same UUID do not have different names?
+    names = Dict{UUID, String}()
+    project_uuid_to_name = Dict{String, UUID}()
+
+    project_deps = Dict{String, UUID}()
+    project_weakdeps = Dict{String, UUID}()
+    project_extras = Dict{String, UUID}()
+
+    # Collect all direct dependencies of the project
+    for key in ["deps", "weakdeps", "extras"]
+        for (name, _uuid) in get(Dict{String, Any}, project_d, key)::Dict{String, Any}
+            v = key == "deps" ? project_deps :
+                key == "weakdeps" ? project_weakdeps :
+                key == "extras" ? project_extras :
+                error()
+            uuid = UUID(_uuid::String)
+            v[name] = uuid
+            names[UUID(uuid)] = name
+            project_uuid_to_name[name] = UUID(uuid)
+        end
+    end
+
+    # A package in both deps and weakdeps is in fact only a weakdep
+    for (name, _) in project_weakdeps
+        delete!(project_deps, name)
+    end
+
+    # This project might be a package, in that case, that is also a "dependency"
+    # of the project.
+    proj_name = get(project_d, "name", nothing)::Union{String, Nothing}
+    _proj_uuid = get(project_d, "uuid", nothing)::Union{String, Nothing}
+    proj_uuid = _proj_uuid === nothing ? nothing : UUID(_proj_uuid)
+
+    project_is_package = proj_name !== nothing && proj_uuid !== nothing
+    if project_is_package
+        # TODO: Error on missing uuid?
+        project_deps[proj_name] = UUID(proj_uuid)
+        names[UUID(proj_uuid)] = proj_name
+    end
+
+    project_extensions = Dict{String, Vector{UUID}}()
+    # Collect all extensions of the project
+    for (name, triggers) in get(Dict{String, Any}, project_d, "extensions")::Dict{String, Any}
+        if triggers isa String
+            triggers = [triggers]
+        else
+            triggers = triggers::Vector{String}
+        end
+        uuids = UUID[]
+        for trigger in triggers
+            uuid = get(project_uuid_to_name, trigger, nothing)
+            if uuid === nothing
+                error("Trigger $trigger for extension $name not found in project")
+            end
+            push!(uuids, uuid)
+        end
+        project_extensions[name] = uuids
+    end
+
+    manifest = project_file_manifest_path(envpath)
+    manifest_d = manifest === nothing ? Dict{String, Any}() : parsed_toml(manifest)
+
+    # Dependencies in a manifest can either be stored compressed (when name is unique among all packages)
+    # in which case it is a `Vector{String}` or expanded where it is a `name => uuid` mapping.
+    deps = Dict{UUID, Union{Vector{String}, Vector{UUID}}}()
+    weakdeps = Dict{UUID, Union{Vector{String}, Vector{UUID}}}()
+    extensions = Dict{UUID, Dict{String, Vector{String}}}()
+    name_to_uuid = Dict{String, UUID}()
+    lookup_strategy = Dict{UUID, Union{SHA1, String, Nothing, Missing}}()
+
+    sizehint!(deps, length(manifest_d))
+    sizehint!(weakdeps, length(manifest_d))
+    sizehint!(extensions, length(manifest_d))
+    sizehint!(name_to_uuid, length(manifest_d))
+    sizehint!(lookup_strategy, length(manifest_d))
+
+    for (name, pkg_infos) in get_deps(manifest_d)
+        for pkg_info in pkg_infos::Vector{Any}
+            pkg_info = pkg_info::Dict{String, Any}
+            m_uuid = UUID(pkg_info["uuid"]::String)
+
+            # If we have multiple packages with the same name we will overwrite things here
+            # but that is fine since we will only use the information in here for packages
+            # with unique names
+            names[m_uuid] = name
+            name_to_uuid[name] = m_uuid
+
+            for key in ["deps", "weakdeps"]
+                deps_pkg = get(Vector{String}, pkg_info, key)::Union{Vector{String}, Dict{String, Any}}
+                d = key == "deps" ? deps :
+                    key == "weakdeps" ? weakdeps :
+                    error()
+
+                # Compressed format with unique names:
+                if deps_pkg isa Vector{String}
+                    d[m_uuid] = deps_pkg
+                # Expanded format:
+                else
+                    uuids = UUID[]
+                    for (name_dep, _dep_uuid) in deps_pkg
+                        dep_uuid = UUID(_dep_uuid::String)
+                        push!(uuids, dep_uuid)
+                        names[dep_uuid] = name_dep
+                    end
+                    d[m_uuid] = uuids
+                end
+            end
+
+            # Extensions
+            deps_pkg = get(Dict{String, Any}, pkg_info, "extensions")::Dict{String, Any}
+            deps_pkg_concrete = Dict{String, Vector{String}}()
+            for (ext, triggers) in deps_pkg
+                if triggers isa String
+                    triggers = [triggers]
+                else
+                    triggers = triggers::Vector{String}
+                end
+                deps_pkg_concrete[ext] = triggers
+            end
+            extensions[m_uuid] = deps_pkg_concrete
+
+            # Determine strategy to find package
+            lookup_strat = begin
+                if (path = get(pkg_info, "path", nothing)::Union{String, Nothing}) !== nothing
+                    path
+                elseif (git_tree_sha_str = get(pkg_info, "git-tree-sha1", nothing)::Union{String, Nothing}) !== nothing
+                    SHA1(git_tree_sha_str)
+                else
+                    nothing
+                end
+            end
+            lookup_strategy[m_uuid] = lookup_strat
+        end
+    end
+
+    # No matter if the deps were stored compressed or not in the manifest,
+    # we internally store them expanded
+    deps_expanded = Dict{UUID, Vector{UUID}}()
+    weakdeps_expanded = Dict{UUID, Vector{UUID}}()
+    extensions_expanded = Dict{UUID, Dict{String, Vector{UUID}}}()
+    sizehint!(deps_expanded, length(deps))
+    sizehint!(weakdeps_expanded, length(deps))
+    sizehint!(extensions_expanded, length(deps))
+
+    if proj_name !== nothing && proj_uuid !== nothing
+        deps_expanded[proj_uuid] = filter!(!=(proj_uuid), collect(values(project_deps)))
+        extensions_expanded[proj_uuid] = project_extensions
+        path = get(project_d, "path", nothing)::Union{String, Nothing}
+        entry_point = path !== nothing ? path : dirname(envpath)
+        lookup_strategy[proj_uuid] = entry_point
+    end
+
+    for key in ["deps", "weakdeps"]
+        d = key == "deps" ? deps :
+            key == "weakdeps" ? weakdeps :
+            error()
+        d_expanded = key == "deps" ? deps_expanded :
+                     key == "weakdeps" ? weakdeps_expanded :
+                     error()
+        for (pkg, deps) in d
+            # dependencies was already expanded so use it directly:
+            if deps isa Vector{UUID}
+                d_expanded[pkg] = deps
+                for dep in deps
+                    name_to_uuid[names[dep]] = dep
+                end
+            # find the (unique) UUID associated with the name
+            else
+                deps_pkg = UUID[]
+                sizehint!(deps_pkg, length(deps))
+                for dep in deps
+                    push!(deps_pkg, name_to_uuid[dep])
+                end
+                d_expanded[pkg] = deps_pkg
+            end
+        end
+    end
+
+    for (pkg, exts) in extensions
+        exts_expanded = Dict{String, Vector{UUID}}()
+        for (ext, triggers) in exts
+            triggers_expanded = UUID[]
+            sizehint!(triggers_expanded, length(triggers))
+            for trigger in triggers
+                push!(triggers_expanded, name_to_uuid[trigger])
+            end
+            exts_expanded[ext] = triggers_expanded
+        end
+        extensions_expanded[pkg] = exts_expanded
+    end
+
+    # Everything that does not yet have a lookup_strategy is missing from the manifest
+    for (_, uuid) in project_deps
+        get!(lookup_strategy, uuid, missing)
+    end
+
+    #=
+    # Preferences:
+    prefs = get(project_d, "preferences", nothing)
+
+    # `(Julia)LocalPreferences.toml`
+    project_dir = dirname(envpath)
+    local_prefs = nothing
+    for name in preferences_names
+        toml_path = joinpath(project_dir, name)
+        if isfile(toml_path)
+            local_prefs = parsed_toml(toml_path)
+            break
+        end
+    end
+    =#
+
+    return ExplicitEnv(envpath, project_deps, project_weakdeps, project_extras,
+                       project_extensions, deps_expanded, weakdeps_expanded, extensions_expanded,
+                       names, lookup_strategy, #=prefs, local_prefs=#)
+end
+
+## PROGRESS BAR
+
+# using Printf
+Base.@kwdef mutable struct MiniProgressBar
+    max::Int = 1.0
+    header::String = ""
+    color::Symbol = :nothing
+    width::Int = 40
+    current::Int = 0.0
+    prev::Int = 0.0
+    has_shown::Bool = false
+    time_shown::Float64 = 0.0
+    percentage::Bool = true
+    always_reprint::Bool = false
+    indent::Int = 4
+end
+
+const PROGRESS_BAR_TIME_GRANULARITY = Ref(1 / 30.0) # 30 fps
+const PROGRESS_BAR_PERCENTAGE_GRANULARITY = Ref(0.1)
+
+function start_progress(io::IO, _::MiniProgressBar)
+    ansi_disablecursor = "\e[?25l"
+    print(io, ansi_disablecursor)
+end
+
+function show_progress(io::IO, p::MiniProgressBar; termwidth=nothing, carriagereturn=true)
+    if p.max == 0
+        perc = 0.0
+        prev_perc = 0.0
+    else
+        perc = p.current / p.max * 100
+        prev_perc = p.prev / p.max * 100
+    end
+    # Bail early if we are not updating the progress bar,
+    # Saves printing to the terminal
+    if !p.always_reprint && p.has_shown && !((perc - prev_perc) > PROGRESS_BAR_PERCENTAGE_GRANULARITY[])
+        return
+    end
+    t = time()
+    if !p.always_reprint && p.has_shown && (t - p.time_shown) < PROGRESS_BAR_TIME_GRANULARITY[]
+        return
+    end
+    p.time_shown = t
+    p.prev = p.current
+    p.has_shown = true
+
+    progress_text = if false # p.percentage
+        # @sprintf "%2.1f %%" perc
+    else
+        string(p.current, "/",  p.max)
+    end
+    termwidth = @something termwidth (displaysize(io)::Tuple{Int,Int})[2]
+    max_progress_width = max(0, min(termwidth - textwidth(p.header) - textwidth(progress_text) - 10 , p.width))
+    n_filled = floor(Int, max_progress_width * perc / 100)
+    partial_filled = (max_progress_width * perc / 100) - n_filled
+    n_left = max_progress_width - n_filled
+    headers = split(p.header, ' ')
+    to_print = sprint(; context=io) do io
+        print(io, " "^p.indent)
+        printstyled(io, headers[1], " "; color=:green, bold=true)
+        printstyled(io, join(headers[2:end], ' '))
+        print(io, " ")
+        printstyled(io, "━"^n_filled; color=p.color)
+        if n_left > 0
+            if partial_filled > 0.5
+                printstyled(io, "╸"; color=p.color) # More filled, use ╸
+            else
+                printstyled(io, "╺"; color=:light_black) # Less filled, use ╺
+            end
+            printstyled(io, "━"^(n_left-1); color=:light_black)
+        end
+        printstyled(io, " "; color=:light_black)
+        print(io, progress_text)
+        carriagereturn && print(io, "\r")
+    end
+    # Print everything in one call
+    print(io, to_print)
+end
+
+function end_progress(io, p::MiniProgressBar)
+    ansi_enablecursor = "\e[?25h"
+    ansi_clearline = "\e[2K"
+    print(io, ansi_enablecursor * ansi_clearline)
+end
+
+function print_progress_bottom(io::IO)
+    ansi_clearline = "\e[2K"
+    ansi_movecol1 = "\e[1G"
+    ansi_moveup(n::Int) = string("\e[", n, "A")
+    print(io, "\e[S" * ansi_moveup(1) * ansi_clearline * ansi_movecol1)
+end
+
+
+############
+struct PkgPrecompileError <: Exception
+    msg::String
+end
+Base.showerror(io::IO, err::PkgPrecompileError) = print(io, err.msg)
+Base.showerror(io::IO, err::PkgPrecompileError, bt; kw...) = Base.showerror(io, err) # hide stacktrace
+
+# This needs a show method to make `julia> err` show nicely
+Base.show(io::IO, err::PkgPrecompileError) = print(io, "PkgPrecompileError: ", err.msg)
+
+import Base: StaleCacheKey
+
+can_fancyprint(io::IO) = @something(get(io, :force_fancyprint, nothing), (io isa Base.TTY && (get(ENV, "CI", nothing) != "true")))
+
+function printpkgstyle(io, header, msg; color=:green)
+    printstyled(io, header; color, bold=true)
+    println(io, " ", msg)
+end
+
+const Config = Pair{Cmd, Base.CacheFlags}
+const PkgConfig = Tuple{PkgId,Config}
+
+# name or parent → ext
+function full_name(ext_to_parent::Dict{PkgId, PkgId}, pkg::PkgId)
+    if haskey(ext_to_parent, pkg)
+        return string(ext_to_parent[pkg].name, " → ", pkg.name)
+    else
+        return pkg.name
+    end
+end
+
+function excluded_circular_deps_explanation(io::IOContext{IO}, ext_to_parent::Dict{PkgId, PkgId}, circular_deps, cycles)
+    outer_deps = copy(circular_deps)
+    cycles_names = ""
+    for cycle in cycles
+        filter!(!in(cycle), outer_deps)
+        cycle_str = ""
+        for (i, pkg) in enumerate(cycle)
+            j = max(0, i - 1)
+            if length(cycle) == 1
+                line = " ─ "
+            elseif i == 1
+                line = " ┌ "
+            elseif i < length(cycle)
+                line = " │ " * " " ^j
+            else
+                line = " └" * "─" ^j * " "
+            end
+            hascolor = get(io, :color, false)::Bool # XXX: this output does not go to `io` so this is bad to call here
+            line = _color_string(line, :light_black, hascolor) * full_name(ext_to_parent, pkg) * "\n"
+            cycle_str *= line
+        end
+        cycles_names *= cycle_str
+    end
+    plural1 = length(cycles) > 1 ? "these cycles" : "this cycle"
+    plural2 = length(cycles) > 1 ? "cycles" : "cycle"
+    msg = """Circular dependency detected.
+    Precompilation will be skipped for dependencies in $plural1:
+    $cycles_names"""
+    if !isempty(outer_deps)
+        msg *= "Precompilation will also be skipped for the following, which depend on the above $plural2:\n"
+        msg *= join(("  " * full_name(ext_to_parent, pkg) for pkg in outer_deps), "\n")
+    end
+    return msg
+end
+
+
+function scan_pkg!(stack, could_be_cycle, cycles, pkg, dmap)
+    if haskey(could_be_cycle, pkg)
+        return could_be_cycle[pkg]
+    else
+        return scan_deps!(stack, could_be_cycle, cycles, pkg, dmap)
+    end
+end
+function scan_deps!(stack, could_be_cycle, cycles, pkg, dmap)
+    push!(stack, pkg)
+    cycle = nothing
+    for dep in dmap[pkg]
+        if dep in stack
+            # Created fresh cycle
+            cycle′ = stack[findlast(==(dep), stack):end]
+            if cycle === nothing || length(cycle′) < length(cycle)
+                cycle = cycle′ # try to report smallest cycle possible
+            end
+        elseif scan_pkg!(stack, could_be_cycle, cycles, dep, dmap)
+            # Reaches an existing cycle
+            could_be_cycle[pkg] = true
+            pop!(stack)
+            return true
+        end
+    end
+    pop!(stack)
+    if cycle !== nothing
+        push!(cycles, cycle)
+        could_be_cycle[pkg] = true
+        return true
+    end
+    could_be_cycle[pkg] = false
+    return false
+end
+
+# restrict to dependencies of given packages
+function collect_all_deps(direct_deps, dep, alldeps=Set{Base.PkgId}())
+    for _dep in direct_deps[dep]
+        if !(_dep in alldeps)
+            push!(alldeps, _dep)
+            collect_all_deps(direct_deps, _dep, alldeps)
+        end
+    end
+    return alldeps
+end
+
+
+"""
+    precompilepkgs(pkgs; kwargs...)
+
+Precompile packages and their dependencies, with support for parallel compilation,
+progress tracking, and various compilation configurations.
+
+`pkgs::Union{Vector{String}, Vector{PkgId}}`: Packages to precompile. When
+empty (default), precompiles all project dependencies. When specified,
+precompiles only the given packages and their dependencies (unless
+`manifest=true`).
+
+!!! note
+    Errors will only throw when precompiling the top-level dependencies, given that
+    not all manifest dependencies may be loaded by the top-level dependencies on the given system.
+    This can be overridden to make errors in all dependencies throw by setting the kwarg `strict` to `true`
+
+# Keyword Arguments
+- `internal_call::Bool`: Indicates this is an automatic precompilation call
+  from somewhere external (e.g. Pkg). Do not use this parameter.
+
+- `strict::Bool`: Controls error reporting scope. When `false` (default), only reports
+  errors for direct project dependencies. Only relevant when `manifest=true`.
+
+- `warn_loaded::Bool`: When `true` (default), checks for and warns about packages that are
+  precompiled but already loaded with a different version. Displays a warning that Julia
+  needs to be restarted to use the newly precompiled versions.
+
+- `timing::Bool`: When `true` (not default), displays timing information for
+  each package compilation, but only if compilation might have succeeded.
+  Disables fancy progress bar output (timing is shown in simple text mode).
+
+- `_from_loading::Bool`: Internal flag indicating the call originated from the
+  package loading system. When `true` (not default): returns early instead of
+  throwing when packages are not found; suppresses progress messages when not
+  in an interactive session; allows packages outside the current environment to
+  be added as serial precompilation jobs; skips LOADING_CACHE initialization;
+  and changes cachefile locking behavior.
+
+- `configs::Union{Config,Vector{Config}}`: Compilation configurations to use. Each Config
+  is a `Pair{Cmd, Base.CacheFlags}` specifying command flags and cache flags. When
+  multiple configs are provided, each package is precompiled for each configuration.
+
+- `io::IO`: Output stream for progress messages, warnings, and errors. Can be
+  redirected (e.g., to `devnull` when called from loading in non-interactive mode).
+
+- `fancyprint::Bool`: Controls output format. When `true`, displays an animated progress
+  bar with spinners. When `false`, instead enables `timing` mode. Automatically
+  disabled when `timing=true` or when called from loading in non-interactive mode.
+
+- `manifest::Bool`: Controls the scope of packages to precompile. When `false` (default),
+  precompiles only packages specified in `pkgs` and their dependencies. When `true`,
+  precompiles all packages in the manifest (workspace mode), typically used by Pkg for
+  workspace precompile requests.
+
+- `ignore_loaded::Bool`: Controls whether already-loaded packages affect cache
+  freshness checks. When `false` (not default), loaded package versions are considered when
+  determining if cache files are fresh.
+
+# Return
+- `Vector{String}`: Paths to cache files for the requested packages.
+- `Nothing`: precompilation should be skipped
+
+# Notes
+- Packages in circular dependency cycles are skipped with a warning.
+- Packages with `__precompile__(false)` are skipped if they are from loading to
+  avoid repeated work on every session.
+- Parallel compilation is controlled by `JULIA_NUM_PRECOMPILE_TASKS` environment variable
+  (defaults to CPU_THREADS + 1, capped at 16, halved on Windows).
+- Extensions are precompiled when all their triggers are available in the environment.
+"""
+function precompilepkgs(pkgs::Union{Vector{String}, Vector{PkgId}}=String[];
+                        internal_call::Bool=false,
+                        strict::Bool = false,
+                        warn_loaded::Bool = true,
+                        timing::Bool = false,
+                        _from_loading::Bool=false,
+                        configs::Union{Config,Vector{Config}}=(``=>Base.CacheFlags()),
+                        io::IO=stderr,
+                        # asking for timing disables fancy mode, as timing is shown in non-fancy mode
+                        fancyprint::Bool = can_fancyprint(io) && !timing,
+                        manifest::Bool=false,
+                        ignore_loaded::Bool=true)
+    @debug "precompilepkgs called with" pkgs internal_call strict warn_loaded timing _from_loading configs fancyprint manifest ignore_loaded
+    # monomorphize this to avoid latency problems
+    _precompilepkgs(pkgs, internal_call, strict, warn_loaded, timing, _from_loading,
+                   configs isa Vector{Config} ? configs : [configs],
+                   IOContext{IO}(io), fancyprint, manifest, ignore_loaded)
+end
+
+function _precompilepkgs(pkgs::Union{Vector{String}, Vector{PkgId}},
+                         internal_call::Bool,
+                         strict::Bool,
+                         warn_loaded::Bool,
+                         timing::Bool,
+                         _from_loading::Bool,
+                         configs::Vector{Config},
+                         io::IOContext{IO},
+                         fancyprint::Bool,
+                         manifest::Bool,
+                         ignore_loaded::Bool)
+    requested_pkgs = copy(pkgs) # for understanding user intent
+    pkg_names = pkgs isa Vector{String} ? copy(pkgs) : String[pkg.name for pkg in pkgs]
+    if pkgs isa Vector{PkgId}
+        requested_pkgids = copy(pkgs)
+    else
+        requested_pkgids = PkgId[]
+        for name in pkgs
+            pkgid = Base.identify_package(name)
+            if pkgid === nothing
+                if _from_loading
+                    return # leave it up to loading to handle this
+                else
+                    throw(PkgPrecompileError("Unknown package: $name"))
+                end
+            end
+            push!(requested_pkgids, pkgid)
+        end
+    end
+
+    time_start = time_ns()
+
+    env = ExplicitEnv()
+
+    # Windows sometimes hits a ReadOnlyMemoryError, so we halve the default number of tasks. Issue #2323
+    # TODO: Investigate why this happens in windows and restore the full task limit
+    default_num_tasks = Sys.iswindows() ? div(Sys.EFFECTIVE_CPU_THREADS::Int, 2) + 1 : Sys.EFFECTIVE_CPU_THREADS::Int + 1
+    default_num_tasks = min(default_num_tasks, 16) # limit for better stability on shared resource systems
+
+    num_tasks = max(1, something(tryparse(Int, get(ENV, "JULIA_NUM_PRECOMPILE_TASKS", string(default_num_tasks))), 1))
+    parallel_limiter = Base.Semaphore(num_tasks)
+
+    # suppress precompilation progress messages when precompiling for loading packages, except during interactive sessions
+    # or when specified by logging heuristics that explicitly require it
+    # since the complicated IO implemented here can have somewhat disastrous consequences when happening in the background (e.g. #59599)
+    logio = io
+    logcalls = nothing
+    if _from_loading
+        if isinteractive()
+            logcalls = CoreLogging.Info # sync with Base.compilecache
+        else
+            logio = IOContext{IO}(devnull)
+            fancyprint = false
+            logcalls = CoreLogging.Debug # sync with Base.compilecache
+        end
+    end
+
+    nconfigs = length(configs)
+    hascolor = get(logio, :color, false)::Bool
+    color_string(cstr::String, col::Union{Int64, Symbol}) = _color_string(cstr, col, hascolor)
+
+    stale_cache = Dict{StaleCacheKey, Bool}()
+    cachepath_cache = Dict{PkgId, Vector{String}}()
+
+    # a map from packages/extensions to their direct deps
+    direct_deps = Dict{Base.PkgId, Vector{Base.PkgId}}()
+    # a map from parent → extension, including all extensions that are loadable
+    # in the current environment (i.e. their triggers are present)
+    parent_to_exts = Dict{Base.PkgId, Vector{Base.PkgId}}()
+    # inverse map of `parent_to_ext` above (ext → parent)
+    ext_to_parent = Dict{Base.PkgId, Base.PkgId}()
+
+    function describe_pkg(pkg::PkgId, is_project_dep::Bool, is_serial_dep::Bool, flags::Cmd, cacheflags::Base.CacheFlags)
+        name = full_name(ext_to_parent, pkg)
+        name = is_project_dep ? name : color_string(name, :light_black)
+        if is_serial_dep
+            name *= color_string(" (serial)", :light_black)
+        end
+        if nconfigs > 1 && !isempty(flags)
+            config_str = join(flags, " ")
+            name *= color_string(" `$config_str`", :light_black)
+        end
+        if nconfigs > 1
+            config_str = join(Base.translate_cache_flags(cacheflags, Base.DefaultCacheFlags), " ")
+            name *= color_string(" $config_str", :light_black)
+        end
+        return name
+    end
+
+    triggers = Dict{Base.PkgId,Vector{Base.PkgId}}()
+    for (dep, deps) in env.deps
+        pkg = Base.PkgId(dep, env.names[dep])
+        Base.in_sysimage(pkg) && continue
+        deps = [Base.PkgId(x, env.names[x]) for x in deps]
+        direct_deps[pkg] = filter!(!Base.in_sysimage, deps)
+        for (ext_name, trigger_uuids) in env.extensions[dep]
+            ext_uuid = Base.uuid5(pkg.uuid, ext_name)
+            ext = Base.PkgId(ext_uuid, ext_name)
+            triggers[ext] = Base.PkgId[pkg] # depends on parent package
+            all_triggers_available = true
+            for trigger_uuid in trigger_uuids
+                trigger_name = Base.PkgId(trigger_uuid, env.names[trigger_uuid])
+                if trigger_uuid in keys(env.deps) || Base.in_sysimage(trigger_name)
+                    push!(triggers[ext], trigger_name)
+                else
+                    all_triggers_available = false
+                    break
+                end
+            end
+            all_triggers_available || continue
+            ext_to_parent[ext] = pkg
+            direct_deps[ext] = filter(!Base.in_sysimage, triggers[ext])
+
+            if !haskey(parent_to_exts, pkg)
+                parent_to_exts[pkg] = Base.PkgId[ext]
+            else
+                push!(parent_to_exts[pkg], ext)
+            end
+        end
+    end
+
+    project_deps = [
+        Base.PkgId(uuid, name)
+        for (name, uuid) in env.project_deps if !Base.in_sysimage(Base.PkgId(uuid, name))
+    ]
+
+    # consider exts of project deps to be project deps so that errors are reported
+    append!(project_deps, keys(filter(d->last(d).name in keys(env.project_deps), ext_to_parent)))
+
+    # An extension effectively depends on another extension if it has a strict superset of its triggers
+    for ext_a in keys(ext_to_parent)
+        for ext_b in keys(ext_to_parent)
+            if triggers[ext_a] ⊋ triggers[ext_b]
+                push!(triggers[ext_a], ext_b)
+                push!(direct_deps[ext_a], ext_b)
+            end
+        end
+    end
+
+    # A package depends on an extension if it (indirectly) depends on all extension triggers
+    function expand_indirect_dependencies(direct_deps)
+        function visit!(visited, node, all_deps)
+            if node in visited
+                return
+            end
+            push!(visited, node)
+            for dep in get(Set{Base.PkgId}, direct_deps, node)
+                if !(dep in all_deps)
+                    push!(all_deps, dep)
+                    visit!(visited, dep, all_deps)
+                end
+            end
+        end
+
+        local indirect_deps = Dict{Base.PkgId, Set{Base.PkgId}}()
+        for package in keys(direct_deps)
+            # Initialize a set to keep track of all dependencies for 'package'
+            all_deps = Set{Base.PkgId}()
+            visited = Set{Base.PkgId}()
+            visit!(visited, package, all_deps)
+            # Update direct_deps with the complete set of dependencies for 'package'
+            indirect_deps[package] = all_deps
+        end
+        return indirect_deps
+    end
+
+    # this loop must be run after the full direct_deps map has been populated
+    indirect_deps = expand_indirect_dependencies(direct_deps)
+    for ext in keys(ext_to_parent)
+        ext_loadable_in_pkg = Dict{Base.PkgId,Bool}()
+        for pkg in keys(direct_deps)
+            is_trigger = in(pkg, direct_deps[ext])
+            is_extension = in(pkg, keys(ext_to_parent))
+            has_triggers = issubset(direct_deps[ext], indirect_deps[pkg])
+            ext_loadable_in_pkg[pkg] = !is_extension && has_triggers && !is_trigger
+        end
+        for (pkg, ext_loadable) in ext_loadable_in_pkg
+            if ext_loadable && !any((dep)->ext_loadable_in_pkg[dep], direct_deps[pkg])
+                # add an edge if the extension is loadable by pkg, and was not loadable in any
+                # of the pkg's dependencies
+                push!(direct_deps[pkg], ext)
+            end
+        end
+    end
+
+    serial_deps = Base.PkgId[] # packages that are being precompiled in serial
+
+    if _from_loading
+        # if called from loading precompilation it may be a package from another environment stack
+        # where we don't have access to the dep graph, so just add as a single package and do serial
+        # precompilation of its deps within the job.
+        for pkgid in requested_pkgids # In case loading asks for multiple packages
+            pkgid === nothing && continue
+            if !haskey(direct_deps, pkgid)
+                @debug "precompile: package `$(pkgid)` is outside of the environment, so adding as single package serial job"
+                direct_deps[pkgid] = Base.PkgId[] # no deps, do them in serial in the job
+                push!(project_deps, pkgid) # add to project_deps so it doesn't show up in gray
+                push!(serial_deps, pkgid)
+            end
+        end
+    end
+
+    # return early if no deps
+    if isempty(direct_deps)
+        if isempty(pkgs)
+            return
+        else
+            error("No direct dependencies outside of the sysimage found matching $(pkgs)")
+        end
+    end
+
+    # initialize signalling
+    started = Dict{PkgConfig,Bool}()
+    was_processed = Dict{PkgConfig,Base.Event}()
+    was_recompiled = Dict{PkgConfig,Bool}()
+    for config in configs
+        for pkgid in keys(direct_deps)
+            pkg_config = (pkgid, config)
+            started[pkg_config] = false
+            was_processed[pkg_config] = Base.Event()
+            was_recompiled[pkg_config] = false
+        end
+    end
+
+    # find and guard against circular deps
+    cycles = Vector{Base.PkgId}[]
+    # For every scanned package, true if pkg found to be in a cycle
+    # or depends on packages in a cycle and false otherwise.
+    could_be_cycle = Dict{Base.PkgId, Bool}()
+    # temporary stack for the SCC-like algorithm below
+    stack = Base.PkgId[]
+
+    # set of packages that depend on a cycle (either because they are
+    # a part of a cycle themselves or because they transitively depend
+    # on a package in some cycle)
+    circular_deps = Base.PkgId[]
+    for pkg in keys(direct_deps)
+        @assert isempty(stack)
+        pkg in serial_deps && continue # skip serial deps as we don't have their dependency graph
+        if scan_pkg!(stack, could_be_cycle, cycles, pkg, direct_deps)
+            push!(circular_deps, pkg)
+            for (pkg_config, evt) in was_processed
+                # notify all to allow skipping
+                pkg_config[1] == pkg && notify(evt)
+            end
+        end
+    end
+    if !isempty(circular_deps)
+        @warn excluded_circular_deps_explanation(io, ext_to_parent, circular_deps, cycles)
+    end
+
+    # If you have a workspace and want to precompile all projects in it, look through all packages in the manifest
+    # instead of collecting from a project i.e. not filter out packages that are in the current project.
+    # i.e. Pkg sets manifest to true for workspace precompile requests
+    # TODO: rename `manifest`?
+    if !manifest
+        if isempty(pkg_names)
+            pkg_names = [pkg.name for pkg in project_deps]
+        end
+        keep = Set{Base.PkgId}()
+        for dep_pkgid in keys(direct_deps)
+            if dep_pkgid.name in pkg_names
+                push!(keep, dep_pkgid)
+                collect_all_deps(direct_deps, dep_pkgid, keep)
+            end
+        end
+        # Also keep packages that were explicitly requested as PkgIds (for extensions)
+        if pkgs isa Vector{PkgId}
+            for requested_pkgid in requested_pkgids
+                if haskey(direct_deps, requested_pkgid)
+                    push!(keep, requested_pkgid)
+                    collect_all_deps(direct_deps, requested_pkgid, keep)
+                end
+            end
+        end
+        for ext in keys(ext_to_parent)
+            if issubset(collect_all_deps(direct_deps, ext), keep) # if all extension deps are kept
+                push!(keep, ext)
+            end
+        end
+        filter!(d->in(first(d), keep), direct_deps)
+        if isempty(direct_deps)
+            if _from_loading
+                # if called from loading precompilation it may be a package from another environment stack so
+                # don't error and allow serial precompilation to try
+                # TODO: actually handle packages from other envs in the stack
+                return
+            else
+                return
+            end
+        end
+    end
+
+    target = Ref{Union{Nothing, String}}(nothing)
+    if nconfigs == 1
+        if !isempty(only(configs)[1])
+            target[] = "for configuration $(join(only(configs)[1], " "))"
+        end
+    else
+        target[] = "for $nconfigs compilation configurations..."
+    end
+
+    pkg_queue = PkgConfig[]
+    failed_deps = Dict{PkgConfig, String}()
+    precomperr_deps = PkgConfig[] # packages that may succeed after a restart (i.e. loaded packages with no cache file)
+
+    print_lock = io.io isa Base.LibuvStream ? io.io.lock::ReentrantLock : ReentrantLock()
+    first_started = Base.Event()
+    printloop_should_exit = Ref{Bool}(!fancyprint) # exit print loop immediately if not fancy printing
+    interrupted_or_done = Ref{Bool}(false)
+
+    ansi_moveup(n::Int) = string("\e[", n, "A")
+    ansi_movecol1 = "\e[1G"
+    ansi_cleartoend = "\e[0J"
+    ansi_cleartoendofline = "\e[0K"
+    ansi_enablecursor = "\e[?25h"
+    ansi_disablecursor = "\e[?25l"
+    n_done = Ref(0)
+    n_already_precomp = Ref(0)
+    n_loaded = Ref(0)
+    interrupted = Ref(false)
+
+    function handle_interrupt(err, in_printloop::Bool)
+        if err isa InterruptException
+            # record that this interrupted_or_done was from InterruptException
+            interrupted[] = true
+        end
+        interrupted_or_done[] = true
+        # notify all Event sources
+        for (pkg_config, evt) in was_processed
+            notify(evt)
+        end
+        notify(first_started)
+        in_printloop || wait(t_print) # Wait to let the print loop cease first. This makes the printing incorrect, so we shouldn't wait here, but we do anyways.
+        if err isa InterruptException
+            @lock print_lock begin
+                println(io, " Interrupted: Exiting precompilation...", ansi_cleartoendofline)
+            end
+            return true
+        else
+            return false
+        end
+    end
+    std_outputs = Dict{PkgConfig,IOBuffer}()
+    taskwaiting = Set{PkgConfig}()
+    pkgspidlocked = Dict{PkgConfig,String}()
+    pkg_liveprinted = Ref{Union{Nothing, PkgId}}(nothing)
+
+    function monitor_std(pkg_config, pipe; single_requested_pkg=false)
+        local pkg, config = pkg_config
+        try
+            local liveprinting = false
+            local thistaskwaiting = false
+            while !eof(pipe)
+                local str = readline(pipe, keep=true)
+                if single_requested_pkg && (liveprinting || !isempty(str))
+                    @lock print_lock begin
+                        if !liveprinting
+                            liveprinting = true
+                            pkg_liveprinted[] = pkg
+                        end
+                        print(io, ansi_cleartoendofline, str)
+                    end
+                end
+                write(get!(IOBuffer, std_outputs, pkg_config), str)
+                if thistaskwaiting
+                    if occursin("Waiting for background task / IO / timer", str)
+                        thistaskwaiting = true
+                        !liveprinting && !fancyprint && @lock print_lock begin
+                            println(io, pkg.name, color_string(str, Base.warn_color()))
+                        end
+                        push!(taskwaiting, pkg_config)
+                    end
+                else
+                    # XXX: don't just re-enable IO for random packages without printing the context for them first
+                    !liveprinting && !fancyprint && @lock print_lock begin
+                        print(io, ansi_cleartoendofline, str)
+                    end
+                end
+            end
+        catch err
+            err isa InterruptException || rethrow()
+        end
+    end
+
+    ## fancy print loop
+    t_print = @async begin
+        try
+            wait(first_started)
+            (isempty(pkg_queue) || interrupted_or_done[]) && return
+            @lock print_lock begin
+                if target[] !== nothing
+                    printpkgstyle(logio, :Precompiling, target[])
+                end
+                if fancyprint
+                    print(logio, ansi_disablecursor)
+                end
+            end
+            t = Timer(0; interval=1/10)
+            anim_chars = ["◐","◓","◑","◒"]
+            i = 1
+            last_length = 0
+            bar = MiniProgressBar(; indent=0, header = "Precompiling packages ", color = :green, percentage=false, always_reprint=true)
+            n_total = length(direct_deps) * length(configs)
+            bar.max = n_total - n_already_precomp[]
+            final_loop = false
+            n_print_rows = 0
+            while !printloop_should_exit[]
+                @lock print_lock begin
+                    term_size = displaysize(logio)::Tuple{Int, Int}
+                    num_deps_show = max(term_size[1] - 3, 2) # show at least 2 deps
+                    pkg_queue_show = if !interrupted_or_done[] && length(pkg_queue) > num_deps_show
+                        last(pkg_queue, num_deps_show)
+                    else
+                        pkg_queue
+                    end
+                    str_ = sprint() do iostr
+                        if i > 1
+                            print(iostr, ansi_cleartoend)
+                        end
+                        bar.current = n_done[] - n_already_precomp[]
+                        bar.max = n_total - n_already_precomp[]
+                        # when sizing to the terminal width subtract a little to give some tolerance to resizing the
+                        # window between print cycles
+                        termwidth = (displaysize(io)::Tuple{Int,Int})[2] - 4
+                        if !final_loop
+                            s = sprint(io -> show_progress(io, bar; termwidth, carriagereturn=false); context=logio)
+                            print(iostr, Base._truncate_at_width_or_chars(true, s, termwidth), "\n")
+                        end
+                        for pkg_config in pkg_queue_show
+                            dep, config = pkg_config
+                            loaded = warn_loaded && haskey(Base.loaded_modules, dep)
+                            flags, cacheflags = config
+                            name = describe_pkg(dep, dep in project_deps, dep in serial_deps, flags, cacheflags)
+                            line = if pkg_config in precomperr_deps
+                                string(color_string("  ? ", Base.warn_color()), name)
+                            elseif haskey(failed_deps, pkg_config)
+                                string(color_string("  ✗ ", Base.error_color()), name)
+                            elseif was_recompiled[pkg_config]
+                                !loaded && interrupted_or_done[] && continue
+                                loaded || @async begin # keep successful deps visible for short period
+                                    sleep(1);
+                                    filter!(!isequal(pkg_config), pkg_queue)
+                                end
+                                string(color_string("  ✓ ", loaded ? Base.warn_color() : :green), name)
+                            elseif started[pkg_config]
+                                # Offset each spinner animation using the first character in the package name as the seed.
+                                # If not offset, on larger terminal fonts it looks odd that they all sync-up
+                                anim_char = anim_chars[(i + Int(dep.name[1])) % length(anim_chars) + 1]
+                                anim_char_colored = dep in project_deps ? anim_char : color_string(anim_char, :light_black)
+                                waiting = if haskey(pkgspidlocked, pkg_config)
+                                    who_has_lock = pkgspidlocked[pkg_config]
+                                    color_string(" Being precompiled by $(who_has_lock)", Base.info_color())
+                                elseif pkg_config in taskwaiting
+                                    color_string(" Waiting for background task / IO / timer. Interrupt to inspect", Base.warn_color())
+                                else
+                                    ""
+                                end
+                                string("  ", anim_char_colored, " ", name, waiting)
+                            else
+                                string("    ", name)
+                            end
+                            println(iostr, Base._truncate_at_width_or_chars(true, line, termwidth))
+                        end
+                    end
+                    last_length = length(pkg_queue_show)
+                    n_print_rows = count("\n", str_)
+                    print(logio, str_)
+                    printloop_should_exit[] = interrupted_or_done[] && final_loop
+                    final_loop = interrupted_or_done[] # ensures one more loop to tidy last task after finish
+                    i += 1
+                    printloop_should_exit[] || print(logio, ansi_moveup(n_print_rows), ansi_movecol1)
+                end
+                wait(t)
+            end
+        catch err
+            # For debugging:
+            # println("Task failed $err")
+            # Base.display_error(ErrorException(""), Base.catch_backtrace())
+            handle_interrupt(err, true) || rethrow()
+        finally
+            fancyprint && print(logio, ansi_enablecursor)
+        end
+    end
+
+    tasks = Task[]
+    if !_from_loading
+        @lock Base.require_lock begin
+            Base.LOADING_CACHE[] = Base.LoadingCache()
+        end
+    end
+    @debug "precompile: starting precompilation loop" direct_deps project_deps
+    ## precompilation loop
+
+    for (pkg, deps) in direct_deps
+        cachepaths = Base.find_all_in_cache_path(pkg)
+        freshpaths = String[]
+        cachepath_cache[pkg] = freshpaths
+        sourcespec = Base.locate_package_load_spec(pkg)
+        single_requested_pkg = length(requested_pkgs) == 1 &&
+            (pkg in requested_pkgids || pkg.name in pkg_names)
+        for config in configs
+            pkg_config = (pkg, config)
+            if sourcespec === nothing
+                failed_deps[pkg_config] = "Error: Missing source file for $(pkg)"
+                notify(was_processed[pkg_config])
+                continue
+            end
+            # Heuristic for when precompilation is disabled, which must not over-estimate however for any dependent
+            # since it will also block precompilation of all dependents
+            if _from_loading && single_requested_pkg && occursin(r"\b__precompile__\(\s*false\s*\)", read(sourcespec.path, String))
+                @lock print_lock begin
+                    Base.@logmsg logcalls "Disabled precompiling $(repr("text/plain", pkg)) since the text `__precompile__(false)` was found in file."
+                end
+                notify(was_processed[pkg_config])
+                continue
+            end
+            flags, cacheflags = config
+            task = @async begin
+                try
+                    loaded = warn_loaded && haskey(Base.loaded_modules, pkg)
+                    for dep in deps # wait for deps to finish
+                        wait(was_processed[(dep,config)])
+                        if interrupted_or_done[]
+                            return
+                        end
+                    end
+                    circular = pkg in circular_deps
+                    freshpath = Base.compilecache_freshest_path(pkg; ignore_loaded, stale_cache, cachepath_cache, cachepaths, sourcespec, flags=cacheflags)
+                    is_stale = freshpath === nothing
+                    if !is_stale
+                        push!(freshpaths, freshpath)
+                    end
+                    if !circular && is_stale
+                        Base.acquire(parallel_limiter)
+                        is_serial_dep = pkg in serial_deps
+                        is_project_dep = pkg in project_deps
+
+                        # std monitoring
+                        std_pipe = Base.link_pipe!(Pipe(); reader_supports_async=true, writer_supports_async=true)
+                        t_monitor = @async monitor_std(pkg_config, std_pipe; single_requested_pkg)
+
+                        local name
+                        try
+                            name = describe_pkg(pkg, is_project_dep, is_serial_dep, flags, cacheflags)
+                            @lock print_lock begin
+                                if !fancyprint && isempty(pkg_queue)
+                                    printpkgstyle(logio, :Precompiling, something(target[], "packages..."))
+                                end
+                            end
+                            push!(pkg_queue, pkg_config)
+                            started[pkg_config] = true
+                            fancyprint && notify(first_started)
+                            if interrupted_or_done[]
+                                return
+                            end
+                            # for extensions, any extension that can trigger it needs to be accounted for here (even stdlibs, which are excluded from direct_deps)
+                            loadable_exts = haskey(ext_to_parent, pkg) ? filter((dep)->haskey(ext_to_parent, dep), triggers[pkg]) : nothing
+                            if !isempty(deps)
+                                # if deps is empty, either it doesn't have any (so compiled-modules is
+                                # irrelevant) or we couldn't compute them (so we actually should attempt
+                                # serial compile, as the dependencies are not in the parallel list)
+                                flags = `$flags --compiled-modules=strict`
+                            end
+                            if _from_loading && pkg in requested_pkgids
+                                # loading already took the cachefile_lock and printed logmsg for its explicit requests
+                                t = @elapsed ret = begin
+                                    Base.compilecache(pkg, sourcespec, std_pipe, std_pipe, !ignore_loaded;
+                                                      flags, cacheflags, loadable_exts)
+                                end
+                            else
+                                # allows processes to wait if another process is precompiling a given package to
+                                # a functionally identical package cache (except for preferences, which may differ)
+                                t = @elapsed ret = precompile_pkgs_maybe_cachefile_lock(io, print_lock, fancyprint, pkg_config, pkgspidlocked, hascolor, parallel_limiter) do
+                                    # refresh and double-check the search now that we have global lock
+                                    if interrupted_or_done[]
+                                        return ErrorException("canceled")
+                                    end
+                                    cachepaths = Base.find_all_in_cache_path(pkg)
+                                    local freshpath = Base.compilecache_freshest_path(pkg; ignore_loaded, stale_cache, cachepath_cache, cachepaths, sourcespec, flags=cacheflags)
+                                    local is_stale = freshpath === nothing
+                                    if !is_stale
+                                        push!(freshpaths, freshpath)
+                                        return nothing # returning nothing indicates another process did the recompile
+                                    end
+                                    logcalls === CoreLogging.Debug && @lock print_lock begin
+                                        @debug "Precompiling $(repr("text/plain", pkg))"
+                                    end
+                                    Base.compilecache(pkg, sourcespec, std_pipe, std_pipe, !ignore_loaded;
+                                                      flags, cacheflags, loadable_exts)
+                                end
+                            end
+                            if ret isa Exception
+                                push!(precomperr_deps, pkg_config)
+                                !fancyprint && @lock print_lock begin
+                                    println(logio, _timing_string(t), color_string("  ? ", Base.warn_color()), name)
+                                end
+                            else
+                                !fancyprint && @lock print_lock begin
+                                    println(logio, _timing_string(t), color_string("  ✓ ", loaded ? Base.warn_color() : :green), name)
+                                end
+                                if ret !== nothing
+                                    was_recompiled[pkg_config] = true
+                                    cachefile, _ = ret::Tuple{String, Union{Nothing, String}}
+                                    push!(freshpaths, cachefile)
+                                    build_id, _ = Base.parse_cache_buildid(cachefile)
+                                    stale_cache_key = (pkg, build_id, sourcespec, cachefile, ignore_loaded, cacheflags)::StaleCacheKey
+                                    stale_cache[stale_cache_key] = false
+                                end
+                            end
+                            loaded && (n_loaded[] += 1)
+                        catch err
+                            close(std_pipe.in) # close pipe to end the std output monitor
+                            wait(t_monitor)
+                            if err isa ErrorException || (err isa ArgumentError && startswith(err.msg, "Invalid header in cache file"))
+                                failed_deps[pkg_config] = sprint(showerror, err)
+                                !fancyprint && @lock print_lock begin
+                                    println(logio, " "^12, color_string("  ✗ ", Base.error_color()), name)
+                                end
+                            else
+                                rethrow()
+                            end
+                        finally
+                            isopen(std_pipe.in) && close(std_pipe.in) # close pipe to end the std output monitor
+                            wait(t_monitor)
+                            Base.release(parallel_limiter)
+                        end
+                    else
+                        is_stale || (n_already_precomp[] += 1)
+                    end
+                    n_done[] += 1
+                    notify(was_processed[pkg_config])
+                catch err_outer
+                    # For debugging:
+                    println("Task failed $err_outer")
+                    Base.display_error(ErrorException(""), Base.catch_backtrace())# logging doesn't show here
+                    handle_interrupt(err_outer, false)
+                    rethrow()
+                end
+            end
+            push!(tasks, task)
+        end
+    end
+    try
+        waitall(tasks; failfast=false, throw=false)
+        interrupted_or_done[] = true
+    catch err
+        # For debugging:
+        println("Task failed $err")
+        Base.display_error(ErrorException(""), Base.catch_backtrace())# logging doesn't show here
+        handle_interrupt(err, false) || rethrow()
+    finally
+        try
+            waitall(tasks; failfast=false, throw=false)
+        finally
+            @lock Base.require_lock begin
+                Base.LOADING_CACHE[] = nothing
+            end
+        end
+    end
+    notify(first_started) # in cases of no-op or !fancyprint
+    fancyprint && wait(t_print)
+    quick_exit = any(t -> !istaskdone(t) || istaskfailed(t), tasks) || interrupted[] # all should have finished (to avoid memory corruption)
+    seconds_elapsed = round(Int, (time_ns() - time_start) / 1e9)
+    ndeps = count(values(was_recompiled))
+    # Determine if any of failures were a requested package
+    requested_errs = false
+    for ((dep, config), err) in failed_deps
+        if dep in requested_pkgids
+            requested_errs = true
+            break
+        end
+    end
+    # if every requested package succeeded, filter away output from failed packages
+    # since it didn't contribute to the overall success and can be regenerated if that package is later required
+    if !strict && !requested_errs
+        for (pkg_config, err) in failed_deps
+            delete!(std_outputs, pkg_config)
+        end
+        empty!(failed_deps)
+    end
+    if ndeps > 0 || !isempty(failed_deps)
+        if !quick_exit
+            logstr = sprint(context=logio) do iostr
+                if fancyprint # replace the progress bar
+                    what = isempty(requested_pkgids) ? "packages finished." : "$(join((p.name for p in requested_pkgids), ", ", " and ")) finished."
+                    printpkgstyle(iostr, :Precompiling, what)
+                end
+                plural = length(configs) > 1 ? "dependency configurations" : ndeps == 1 ? "dependency" : "dependencies"
+                print(iostr, "  $(ndeps) $(plural) successfully precompiled in $(seconds_elapsed) seconds")
+                if n_already_precomp[] > 0 || !isempty(circular_deps)
+                    n_already_precomp[] > 0 && (print(iostr, ". $(n_already_precomp[]) already precompiled"))
+                    !isempty(circular_deps) && (print(iostr, ". $(length(circular_deps)) skipped due to circular dependency"))
+                    print(iostr, ".")
+                end
+                if n_loaded[] > 0
+                    local plural1 = length(configs) > 1 ? "dependency configurations" : n_loaded[] == 1 ? "dependency" : "dependencies"
+                    local plural2 = n_loaded[] == 1 ? "a different version is" : "different versions are"
+                    local plural3 = n_loaded[] == 1 ? "" : "s"
+                    local plural4 = n_loaded[] == 1 ? "this package" : "these packages"
+                    print(iostr, "\n  ",
+                        color_string(string(n_loaded[]), Base.warn_color()),
+                        " $(plural1) precompiled but ",
+                        color_string("$(plural2) currently loaded", Base.warn_color()),
+                        ". Restart julia to access the new version$(plural3). \
+                        Otherwise, loading dependents of $(plural4) may trigger further precompilation to work with the unexpected version$(plural3)."
+                    )
+                end
+                if !isempty(precomperr_deps)
+                    pluralpc = length(configs) > 1 ? "dependency configurations" : precomperr_deps == 1 ? "dependency" : "dependencies"
+                    print(iostr, "\n  ",
+                        color_string(string(length(precomperr_deps)), Base.warn_color()),
+                        " $(pluralpc) failed but may be precompilable after restarting julia"
+                    )
+                end
+            end
+            @lock print_lock begin
+                println(logio, logstr)
+            end
+        end
+    end
+    if !isempty(std_outputs)
+        str = sprint(context=io) do iostr
+            # show any stderr output, even if Pkg.precompile has been interrupted (quick_exit=true), given user may be
+            # interrupting a hanging precompile job with stderr output.
+            let std_outputs = Tuple{PkgConfig,SubString{String}}[(pkg_config, strip(String(take!(io)))) for (pkg_config,io) in std_outputs]
+                filter!(!isempty∘last, std_outputs)
+                if !isempty(std_outputs)
+                    local plural1 = length(std_outputs) == 1 ? "y" : "ies"
+                    local plural2 = length(std_outputs) == 1 ? "" : "s"
+                    print(iostr, "\n  ", color_string("$(length(std_outputs))", Base.warn_color()), " dependenc$(plural1) had output during precompilation:")
+                    for (pkg_config, err) in std_outputs
+                        pkg, config = pkg_config
+                        err = if pkg == pkg_liveprinted[]
+                            "[Output was shown above]"
+                        else
+                            join(split(err, "\n"), color_string("\n│  ", Base.warn_color()))
+                        end
+                        name = full_name(ext_to_parent, pkg)
+                        print(iostr, color_string("\n┌ ", Base.warn_color()), name, color_string("\n│  ", Base.warn_color()), err, color_string("\n└  ", Base.warn_color()))
+                    end
+                end
+            end
+        end
+        isempty(str) || @lock print_lock begin
+            println(io, str)
+        end
+    end
+    # Done cleanup and sub-process output, now ensure caller aborts too with the right error
+    if interrupted[]
+        throw(InterruptException())
+    end
+    # Fail noisily now with failed_deps if any.
+    # Include all messages from compilecache since any might be relevant in the failure.
+    if !isempty(failed_deps)
+        err_str = IOBuffer()
+        for ((dep, config), err) in failed_deps
+            write(err_str, "\n")
+            print(err_str, "\n", dep.name, " ")
+            join(err_str, config[1], " ")
+            print(err_str, "\n", err)
+        end
+        n_errs = length(failed_deps)
+        pluraled = n_errs == 1 ? "" : "s"
+        err_msg = "The following $n_errs package$(pluraled) failed to precompile:$(String(take!(err_str)))\n"
+        if internal_call
+            # Pkg does not implement correct error handling, so this sometimes handles them instead
+            print(io, err_msg)
+        else
+            throw(PkgPrecompileError(err_msg))
+        end
+    end
+    return collect(String, Iterators.flatten((v for (pkgid, v) in cachepath_cache if pkgid in requested_pkgids)))
+end
+
+_timing_string(t) = string(lpad(round(t * 1e3, digits = 1), 9), " ms")
+
+function _color_string(cstr::String, col::Union{Int64, Symbol}, hascolor)
+    if hascolor
+        enable_ansi  = get(Base.text_colors, col, Base.text_colors[:default])
+        disable_ansi = get(Base.disable_text_style, col, Base.text_colors[:default])
+        return string(enable_ansi, cstr, disable_ansi)
+    else
+        return cstr
+    end
+end
+
+# Can be merged with `maybe_cachefile_lock` in loading?
+function precompile_pkgs_maybe_cachefile_lock(f, io::IO, print_lock::ReentrantLock, fancyprint::Bool, pkg_config, pkgspidlocked, hascolor, parallel_limiter::Base.Semaphore)
+    if !(isdefined(Base, :mkpidlock_hook) && isdefined(Base, :trymkpidlock_hook) && Base.isdefined(Base, :parse_pidfile_hook))
+        return f()
+    end
+    pkg, config = pkg_config
+    flags, cacheflags = config
+    stale_age = Base.compilecache_pidlock_stale_age
+    pidfile = Base.compilecache_pidfile_path(pkg, flags=cacheflags)
+    cachefile = @invokelatest Base.trymkpidlock_hook(f, pidfile; stale_age)
+    if cachefile === false
+        pid, hostname, age = @invokelatest Base.parse_pidfile_hook(pidfile)
+        pkgspidlocked[pkg_config] = if isempty(hostname) || hostname == gethostname()
+            if pid == getpid()
+                "an async task in this process (pidfile: $pidfile)"
+            else
+                "another process (pid: $pid, pidfile: $pidfile)"
+            end
+        else
+            "another machine (hostname: $hostname, pid: $pid, pidfile: $pidfile)"
+        end
+        !fancyprint && @lock print_lock begin
+            println(io, "    ", pkg.name, _color_string(" Being precompiled by $(pkgspidlocked[pkg_config])", Base.info_color(), hascolor))
+        end
+        Base.release(parallel_limiter) # release so other work can be done while waiting
+        try
+            # wait until the lock is available
+            @invokelatest Base.mkpidlock_hook(() -> begin
+                    delete!(pkgspidlocked, pkg_config)
+                    Base.acquire(f, parallel_limiter)
+                end,
+                pidfile; stale_age)
+        finally
+            Base.acquire(parallel_limiter) # re-acquire so the outer release is balanced
+        end
+    end
+    return cachefile
+end
+
+end
diff --git a/base/process.jl b/base/process.jl
index ed51a30ae3ced..345afc737fb75 100644
--- a/base/process.jl
+++ b/base/process.jl
@@ -6,11 +6,12 @@ mutable struct Process <: AbstractPipe
     in::IO
     out::IO
     err::IO
+    syncd::Vector{Task}
     exitcode::Int64
     termsignal::Int32
     exitnotify::ThreadSynchronizer
-    function Process(cmd::Cmd, handle::Ptr{Cvoid})
-        this = new(cmd, handle, devnull, devnull, devnull,
+    function Process(cmd::Cmd, handle::Ptr{Cvoid}, syncd::Vector{Task})
+        this = new(cmd, handle, devnull, devnull, devnull, syncd,
                    typemin(fieldtype(Process, :exitcode)),
                    typemin(fieldtype(Process, :termsignal)),
                    ThreadSynchronizer())
@@ -35,6 +36,15 @@ end
 pipe_reader(p::ProcessChain) = p.out
 pipe_writer(p::ProcessChain) = p.in
 
+# a lightweight pair of a child OS_HANDLE and associated Task that will
+# complete only after all content has been read from it for synchronizing
+# state without the kernel to aide
+struct SyncCloseFD
+    fd
+    t::Task
+end
+rawhandle(io::SyncCloseFD) = rawhandle(io.fd)
+
 # release ownership of the libuv handle
 function uvfinalize(proc::Process)
     if proc.handle != C_NULL
@@ -74,8 +84,8 @@ function _uv_hook_close(proc::Process)
     nothing
 end
 
-const SpawnIO  = Union{IO, RawFD, OS_HANDLE}
-const SpawnIOs = Vector{SpawnIO} # convenience name for readability
+const SpawnIO  = Union{IO, IOServer, RawFD, OS_HANDLE, SyncCloseFD} # internal copy of Redirectable, removing FileRedirect and adding SyncCloseFD
+const SpawnIOs = Memory{SpawnIO} # convenience name for readability (used for dispatch also to clearly distinguish from Vector{Redirectable})
 
 function as_cpumask(cpus::Vector{UInt16})
     n = max(Int(maximum(cpus)), Int(ccall(:uv_cpumask_size, Cint, ())))
@@ -100,14 +110,23 @@ end
                 error("invalid spawn handle $h from $io")
             end
             for io in stdio]
+        syncd = Task[io.t for io in stdio if io isa SyncCloseFD]
         handle = Libc.malloc(_sizeof_uv_process)
         disassociate_julia_struct(handle)
-        (; exec, flags, env, dir) = cmd
+        (; exec, flags, env, dir, uid, gid) = cmd
+        flags ⊻= UV_PROCESS_WINDOWS_DISABLE_EXACT_NAME # libuv inverts the default for this, so flip this bit now
+        if uid !== nothing
+            flags |= UV_PROCESS_SETUID
+        end
+        if gid !== nothing
+            flags |= UV_PROCESS_SETGID
+        end
         iolock_begin()
         err = ccall(:jl_spawn, Int32,
                   (Cstring, Ptr{Cstring}, Ptr{Cvoid}, Ptr{Cvoid},
                    Ptr{Tuple{Cint, UInt}}, Int,
-                   UInt32, Ptr{Cstring}, Cstring, Ptr{Bool}, Csize_t, Ptr{Cvoid}),
+                   UInt32, Ptr{Cstring}, Cstring, Ptr{Bool}, Csize_t,
+                   UInt32, UInt32, Ptr{Cvoid}),
             file, exec, loop, handle,
             iohandles, length(iohandles),
             flags,
@@ -115,9 +134,11 @@ end
             isempty(dir) ? C_NULL : dir,
             cpumask === nothing ? C_NULL : cpumask,
             cpumask === nothing ? 0 : length(cpumask),
+            uid === nothing ? typemax(UInt32) : uid,
+            gid === nothing ? typemax(UInt32) : gid,
             @cfunction(uv_return_spawn, Cvoid, (Ptr{Cvoid}, Int64, Int32)))
         if err == 0
-            pp = Process(cmd, handle)
+            pp = Process(cmd, handle, syncd)
             associate_julia_struct(handle, pp)
         else
             ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), handle) # will call free on handle eventually
@@ -130,23 +151,24 @@ end
     return pp
 end
 
-_spawn(cmds::AbstractCmd) = _spawn(cmds, SpawnIO[])
+_spawn(cmds::AbstractCmd) = _spawn(cmds, SpawnIOs())
 
-# optimization: we can spawn `Cmd` directly without allocating the ProcessChain
-function _spawn(cmd::Cmd, stdios::SpawnIOs)
-    isempty(cmd.exec) && throw(ArgumentError("cannot spawn empty command"))
+function _spawn(cmd::AbstractCmd, stdios::Vector{Redirectable})
     pp = setup_stdios(stdios) do stdios
-        return _spawn_primitive(cmd.exec[1], cmd, stdios)
+        return _spawn(cmd, stdios)
     end
     return pp
 end
 
+# optimization: we can spawn `Cmd` directly without allocating the ProcessChain
+function _spawn(cmd::Cmd, stdios::SpawnIOs)
+    isempty(cmd.exec) && throw(ArgumentError("cannot spawn empty command"))
+    return _spawn_primitive(cmd.exec[1], cmd, stdios)
+end
+
 # assume that having a ProcessChain means that the stdio are setup
 function _spawn(cmds::AbstractCmd, stdios::SpawnIOs)
-    pp = setup_stdios(stdios) do stdios
-        return _spawn(cmds, stdios, ProcessChain())
-    end
-    return pp
+    return _spawn(cmds, stdios, ProcessChain())
 end
 
 # helper function for making a copy of a SpawnIOs, with replacement
@@ -212,7 +234,7 @@ end
 
 
 # open the child end of each element of `stdios`, and initialize the parent end
-function setup_stdios(f, stdios::SpawnIOs)
+function setup_stdios(f, stdios::Vector{Redirectable})
     nstdio = length(stdios)
     open_io = SpawnIOs(undef, nstdio)
     close_io = falses(nstdio)
@@ -295,25 +317,26 @@ function setup_stdio(stdio::IO, child_readable::Bool)
     child = child_readable ? rd : wr
     try
         let in = (child_readable ? parent : stdio),
-            out = (child_readable ? stdio : parent)
-            @async try
+            out = (child_readable ? stdio : parent),
+            t = @async try
                 write(in, out)
             catch ex
                 @warn "Process I/O error" exception=(ex, catch_backtrace())
+                rethrow()
             finally
                 close(parent)
-                child_readable || closewrite(stdio)
             end
+            return (SyncCloseFD(child, t), true)
         end
     catch
         close_pipe_sync(child)
         rethrow()
     end
-    return (child, true)
 end
 
-close_stdio(stdio::OS_HANDLE) = close_pipe_sync(stdio)
 close_stdio(stdio) = close(stdio)
+close_stdio(stdio::OS_HANDLE) = close_pipe_sync(stdio)
+close_stdio(stdio::SyncCloseFD) = close_stdio(stdio.fd)
 
 # INTERNAL
 # pad out stdio to have at least three elements,
@@ -325,19 +348,19 @@ close_stdio(stdio) = close(stdio)
 #   - An Filesystem.File or IOStream object to redirect the output to
 #   - A FileRedirect, containing a string specifying a filename to be opened for the child
 
-spawn_opts_swallow(stdios::StdIOSet) = SpawnIO[stdios...]
-spawn_opts_inherit(stdios::StdIOSet) = SpawnIO[stdios...]
-spawn_opts_swallow(in::Redirectable=devnull, out::Redirectable=devnull, err::Redirectable=devnull) =
-    SpawnIO[in, out, err]
+spawn_opts_swallow(stdios::StdIOSet) = Redirectable[stdios...]
+spawn_opts_inherit(stdios::StdIOSet) = Redirectable[stdios...]
+spawn_opts_swallow(in::Redirectable=devnull, out::Redirectable=devnull, err::Redirectable=devnull, extra::Redirectable...) =
+    Redirectable[in, out, err, extra...]
 # pass original descriptors to child processes by default, because we might
 # have already exhausted and closed the libuv object for our standard streams.
 # ref issue #8529
-spawn_opts_inherit(in::Redirectable=RawFD(0), out::Redirectable=RawFD(1), err::Redirectable=RawFD(2)) =
-    SpawnIO[in, out, err]
+spawn_opts_inherit(in::Redirectable=RawFD(0), out::Redirectable=RawFD(1), err::Redirectable=RawFD(2), extra::Redirectable...) =
+    Redirectable[in, out, err, extra...]
 
 function eachline(cmd::AbstractCmd; keep::Bool=false)
     out = PipeEndpoint()
-    processes = _spawn(cmd, SpawnIO[devnull, out, stderr])
+    processes = _spawn(cmd, Redirectable[devnull, out, stderr])
     # if the user consumes all the data, also check process exit status for success
     ondone = () -> (success(processes) || pipeline_error(processes); nothing)
     return EachLine(out, keep=keep, ondone=ondone)::EachLine
@@ -385,20 +408,20 @@ function open(cmds::AbstractCmd, stdio::Redirectable=devnull; write::Bool=false,
         stdio === devnull || throw(ArgumentError("no stream can be specified for `stdio` in read-write mode"))
         in = PipeEndpoint()
         out = PipeEndpoint()
-        processes = _spawn(cmds, SpawnIO[in, out, stderr])
+        processes = _spawn(cmds, Redirectable[in, out, stderr])
         processes.in = in
         processes.out = out
     elseif read
         out = PipeEndpoint()
-        processes = _spawn(cmds, SpawnIO[stdio, out, stderr])
+        processes = _spawn(cmds, Redirectable[stdio, out, stderr])
         processes.out = out
     elseif write
         in = PipeEndpoint()
-        processes = _spawn(cmds, SpawnIO[in, stdio, stderr])
+        processes = _spawn(cmds, Redirectable[in, stdio, stderr])
         processes.in = in
     else
         stdio === devnull || throw(ArgumentError("no stream can be specified for `stdio` in no-access mode"))
-        processes = _spawn(cmds, SpawnIO[devnull, devnull, stderr])
+        processes = _spawn(cmds, Redirectable[devnull, devnull, stderr])
     end
     return processes
 end
@@ -415,12 +438,18 @@ function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
     P = open(cmds, args...; kwargs...)
     function waitkill(P::Union{Process,ProcessChain})
         close(P)
-        # 0.1 seconds after we hope it dies (from closing stdio),
-        # we kill the process with SIGTERM (15)
-        local t = Timer(0.1) do t
+        # shortly after we hope it starts cleanup and dies (from closing
+        # stdio), we kill the process with SIGTERM (15) so that we can proceed
+        # with throwing the error and hope it will exit soon from that
+        local t = Timer(2) do t
             process_running(P) && kill(P)
         end
-        wait(P)
+        # pass false to indicate that we do not care about data-races on the
+        # Julia stdio objects after this point, since we already know this is
+        # an error path and the state of them is fairly unpredictable anyways
+        # in that case. Since we closed P some of those should come crumbling
+        # down already, and we don't want to throw that error here either.
+        wait(P, false)
         close(t)
     end
     ret = try
@@ -430,10 +459,23 @@ function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
         rethrow()
     end
     close(P.in)
+    closestdio = @async begin
+        # wait for P to complete (including sync'd), then mark the output streams for EOF (if applicable to that stream type)
+        wait(P)
+        err = P.err
+        applicable(closewrite, err) && closewrite(err)
+        out = P.out
+        applicable(closewrite, out) && closewrite(out)
+        nothing
+    end
+    # now verify that the output stream is at EOF, and the user didn't fail to consume it successfully
+    # (we do not currently verify the user dealt with the stderr stream)
     if !(eof(P.out)::Bool)
         waitkill(P)
         throw(_UVError("open(do)", UV_EPIPE))
     end
+    # make sure to closestdio is completely done to avoid data-races later
+    wait(closestdio)
     success(P) || pipeline_error(P)
     return ret
 end
@@ -467,12 +509,16 @@ exiting with a non-zero status (when `wait` is true).
 The `args...` allow you to pass through file descriptors to the command, and are ordered
 like regular unix file descriptors (eg `stdin, stdout, stderr, FD(3), FD(4)...`).
 
-If `wait` is false, the process runs asynchronously. You can later wait for it and check
-its exit status by calling `success` on the returned process object.
+If `wait` is false, the process runs asynchronously. You can later [`wait`](@ref) for it
+and check its exit status by calling `success` on the returned process object. If the
+`command` spawns only a single process, a `Process` object is returned and the
+exit code can be retrieved via the `exitcode` field; see [`wait`](@ref) for more details.
 
 When `wait` is false, the process' I/O streams are directed to `devnull`.
 When `wait` is true, I/O streams are shared with the parent process.
 Use [`pipeline`](@ref) to control I/O redirection.
+
+See also: [`Cmd`](@ref).
 """
 function run(cmds::AbstractCmd, args...; wait::Bool = true)
     if wait
@@ -602,7 +648,7 @@ kill(ps::Vector{Process}, signum::Integer=SIGTERM) = for p in ps; kill(p, signum
 kill(ps::ProcessChain, signum::Integer=SIGTERM) = kill(ps.processes, signum)
 
 """
-    getpid(process) -> Int32
+    getpid(process)::Int32
 
 Get the child process ID, if it still exists.
 
@@ -650,26 +696,31 @@ function process_status(s::Process)
            error("process status error")
 end
 
-function wait(x::Process)
-    process_exited(x) && return
-    iolock_begin()
+function wait(x::Process, syncd::Bool=true)
     if !process_exited(x)
-        preserve_handle(x)
-        lock(x.exitnotify)
-        iolock_end()
-        try
-            wait(x.exitnotify)
-        finally
-            unlock(x.exitnotify)
-            unpreserve_handle(x)
+        iolock_begin()
+        if !process_exited(x)
+            preserve_handle(x)
+            lock(x.exitnotify)
+            iolock_end()
+            try
+                wait(x.exitnotify)
+            finally
+                unlock(x.exitnotify)
+                unpreserve_handle(x)
+            end
+        else
+            iolock_end()
         end
-    else
-        iolock_end()
+    end
+    # and make sure all sync'd Tasks are complete too
+    syncd && for t in x.syncd
+        wait(t)
     end
     nothing
 end
 
-wait(x::ProcessChain) = foreach(wait, x.processes)
+wait(x::ProcessChain, syncd::Bool=true) = foreach(p -> wait(p, syncd), x.processes)
 
 show(io::IO, p::Process) = print(io, "Process(", p.cmd, ", ", process_status(p), ")")
 
diff --git a/base/promotion.jl b/base/promotion.jl
index 6e32bd7a42efa..f935c546915be 100644
--- a/base/promotion.jl
+++ b/base/promotion.jl
@@ -18,14 +18,21 @@ Number
 ```
 """
 typejoin() = Bottom
-typejoin(@nospecialize(t)) = t
-typejoin(@nospecialize(t), ts...) = (@_foldable_meta; typejoin(t, typejoin(ts...)))
+typejoin(@nospecialize(t)) = (@_nospecializeinfer_meta; t)
+typejoin(@nospecialize(t), @nospecialize(s), @nospecialize(u)) = (@_foldable_meta; @_nospecializeinfer_meta; typejoin(typejoin(t, s), u))
+typejoin(@nospecialize(t), @nospecialize(s), @nospecialize(u), ts...) = (@_foldable_meta; @_nospecializeinfer_meta; afoldl(typejoin, typejoin(t, s, u), ts...))
 function typejoin(@nospecialize(a), @nospecialize(b))
     @_foldable_meta
+    @_nothrow_meta
+    @_nospecializeinfer_meta
     if isa(a, TypeVar)
         return typejoin(a.ub, b)
     elseif isa(b, TypeVar)
         return typejoin(a, b.ub)
+    elseif a === b
+        return a
+    elseif !isa(a, Type) || !isa(b, Type)
+        return Any
     elseif a <: b
         return b
     elseif b <: a
@@ -90,9 +97,9 @@ function typejoin(@nospecialize(a), @nospecialize(b))
     elseif b <: Tuple
         return Any
     end
-    while b !== Any
+    while !(b === Any)
         if a <: b.name.wrapper
-            while a.name !== b.name
+            while !(a.name === b.name)
                 a = supertype(a)::DataType
             end
             if a.name === Type.body.name
@@ -119,7 +126,7 @@ function typejoin(@nospecialize(a), @nospecialize(b))
                     aprimary = aprimary::UnionAll
                     # pushfirst!(vars, aprimary.var)
                     _growbeg!(vars, 1)
-                    arrayset(false, vars, aprimary.var, 1)
+                    vars[1] = aprimary.var
                     aprimary = aprimary.body
                 end
             end
@@ -139,6 +146,7 @@ end
 #          (Core.Compiler.isnotbrokensubtype), use only simple types for `b`
 function typesplit(@nospecialize(a), @nospecialize(b))
     @_foldable_meta
+    @_nospecializeinfer_meta
     if a <: b
         return Bottom
     end
@@ -196,16 +204,15 @@ end
 
 function typejoin_union_tuple(T::DataType)
     @_foldable_meta
-    u = Base.unwrap_unionall(T)
-    p = (u::DataType).parameters
-    lr = length(p)::Int
+    p = T.parameters::Core.SimpleVector
+    lr = length(p)
     if lr == 0
         return Tuple{}
     end
     c = Vector{Any}(undef, lr)
     for i = 1:lr
         pi = p[i]
-        U = Core.Compiler.unwrapva(pi)
+        U = unwrapva(pi)
         if U === Union{}
             ci = Union{}
         elseif U isa Union
@@ -215,7 +222,7 @@ function typejoin_union_tuple(T::DataType)
         else
             ci = promote_typejoin_union(U)
         end
-        if i == lr && Core.Compiler.isvarargtype(pi)
+        if i == lr && isvarargtype(pi)
             c[i] = isdefined(pi, :N) ? Vararg{ci, pi.N} : Vararg{ci}
         else
             c[i] = ci
@@ -239,7 +246,8 @@ function full_va_len(p::Core.SimpleVector)
 end
 
 # reduce typejoin over A[i:end]
-function tailjoin(A, i)
+function tailjoin(A::SimpleVector, i::Int)
+    @_foldable_meta
     if i > length(A)
         return unwrapva(A[end])
     end
@@ -296,7 +304,8 @@ function promote_type end
 
 promote_type()  = Bottom
 promote_type(T) = T
-promote_type(T, S, U, V...) = (@inline; promote_type(T, promote_type(S, U, V...)))
+promote_type(T, S, U) = (@inline; promote_type(promote_type(T, S), U))
+promote_type(T, S, U, V...) = (@inline; afoldl(promote_type, promote_type(T, S, U), V...))
 
 promote_type(::Type{Bottom}, ::Type{Bottom}) = Bottom
 promote_type(::Type{T}, ::Type{T}) where {T} = T
@@ -370,7 +379,9 @@ function _promote(x::T, y::S) where {T,S}
     return (convert(R, x), convert(R, y))
 end
 promote_typeof(x) = typeof(x)
-promote_typeof(x, xs...) = (@inline; promote_type(typeof(x), promote_typeof(xs...)))
+promote_typeof(x, y) = (@inline; promote_type(typeof(x), typeof(y)))
+promote_typeof(x, y, z) = (@inline; promote_type(typeof(x), typeof(y), typeof(z)))
+promote_typeof(x, y, z, a...) = (@inline; afoldl(((::Type{T}, y) where {T}) -> promote_type(T, typeof(y)), promote_typeof(x, y, z), a...))
 function _promote(x, y, z)
     @inline
     R = promote_typeof(x, y, z)
@@ -427,7 +438,11 @@ end
 """
     ^(x, y)
 
-Exponentiation operator. If `x` is a matrix, computes matrix exponentiation.
+Exponentiation operator.
+
+If `x` and `y` are integers, the result may overflow.
+To enter numbers in scientific notation, use [`Float64`](@ref) literals
+such as `1.2e3` rather than `1.2 * 10^3`.
 
 If `y` is an `Int` literal (e.g. `2` in `x^2` or `-3` in `x^-3`), the Julia code
 `x^y` is transformed by the compiler to `Base.literal_pow(^, x, Val(y))`, to
@@ -437,20 +452,31 @@ where usually `^ == Base.^` unless `^` has been defined in the calling
 namespace.) If `y` is a negative integer literal, then `Base.literal_pow`
 transforms the operation to `inv(x)^-y` by default, where `-y` is positive.
 
+See also [`exp2`](@ref), [`<<`](@ref).
+
 # Examples
 ```jldoctest
 julia> 3^5
 243
 
-julia> A = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
+julia> 3^-1  # uses Base.literal_pow
+0.3333333333333333
+
+julia> p = -1;
+
+julia> 3^p
+ERROR: DomainError with -1:
+Cannot raise an integer x to a negative power -1.
+[...]
+
+julia> 3.0^p
+0.3333333333333333
+
+julia> 10^19 > 0  # integer overflow
+false
 
-julia> A^3
-2×2 Matrix{Int64}:
- 37   54
- 81  118
+julia> big(10)^19 == 1e19
+true
 ```
 """
 ^(x::Number, y::Number) = ^(promote(x,y)...)
@@ -472,12 +498,6 @@ max(x::Real, y::Real) = max(promote(x,y)...)
 min(x::Real, y::Real) = min(promote(x,y)...)
 minmax(x::Real, y::Real) = minmax(promote(x, y)...)
 
-if isdefined(Core, :Compiler)
-    const _return_type = Core.Compiler.return_type
-else
-    _return_type(@nospecialize(f), @nospecialize(t)) = Any
-end
-
 function TupleOrBottom(tt...)
     any(p -> p === Union{}, tt) && return Union{}
     return Tuple{tt...}
@@ -489,10 +509,106 @@ end
 Guess what an appropriate container eltype would be for storing results of
 `f(::argtypes...)`. The guess is in part based on type inference, so can change any time.
 
+Accordingly, return a type `R` such that `f(args...) isa R` where `args isa T`.
+
 !!! warning
     Due to its fragility, use of `promote_op` should be avoided. It is preferable to base
     the container eltype on the type of the actual elements. Only in the absence of any
     elements (for an empty result container), it may be unavoidable to call `promote_op`.
+
+The type `R` obtained from `promote_op` is merely an upper bound. There may exist a stricter
+type `S` such that `f(args...) isa S` for every `args isa T` with `S <: R` and `S != R`.
+Furthermore, the exact type `R` obtained from `promote_op` depends on various factors
+including but not limited to the exact Julia version used, packages loaded, and command line
+options. As such, when used in publicly registered packages, **it is the package authors'
+responsibility to ensure that the API guarantees provided by the package do not depend on
+the exact type `R` obtained from `promote_op`.**
+
+Additionally, the result may return overly exact types, such as `DataType`, `Type`, or
+`Union{...}`, while the desired inputs or outputs may be different from those. The internal
+`promote_typejoin_union` function may be helpful to improve the result in some of these
+cases.
+
+# Extended help
+
+## Examples
+
+The following function is an invalid use-case of `promote_op`.
+
+```julia
+\"""
+    invalid_usecase1(f, xs::AbstractArray) -> ys::Array
+
+Return an array `ys` such that `vec(ys)` is `isequal`-equivalent to
+
+    [f(xs[1]), f(xs[2]), ..., f(xs[end])]
+\"""
+function invalid_usecase1(f, xs)
+    R = promote_op(f, eltype(xs))
+    ys = similar(xs, R)
+    for i in eachindex(xs, ys)
+        ys[i] = f(xs[i])
+    end
+    return ys
+end
+```
+
+This is because the value obtained through `eltype(invalid_usecase1(f, xs))` depends on
+exactly what `promote_op` returns. It may be improved by re-computing the element type
+before returning the result.
+
+```julia
+function valid_usecase1(f, xs)
+    R = promote_typejoin_union(promote_op(f, eltype(xs)))
+    ys = similar(xs, R)
+    S = Union{}
+    for i in eachindex(xs, ys)
+        ys[i] = f(xs[i])
+        S = promote_type(S, typeof(ys[i]))
+    end
+    if S != R
+        zs = similar(xs, S)
+        copyto!(zs, ys)
+        return zs
+    end
+    return ys
+end
+```
+
+Note that using [`isconcretetype`](@ref) on the result is not enough to safely use
+`promote_op`. The following function is another invalid use-case of `promote_op`.
+
+```julia
+function invalid_usecase2(f, xs)
+    R = promote_op(f, eltype(xs))
+    if isconcretetype(R)
+        ys = similar(xs, R)
+    else
+        ys = similar(xs, Any)
+    end
+    for i in eachindex(xs, ys)
+        ys[i] = f(xs[i])
+    end
+    return ys
+end
+```
+
+This is because whether or not the caller gets `Any` element type depends on if `promote_op`
+can infer a concrete return type of the given function. A fix similar to `valid_usecase1`
+can be used.
+
+*Technically*, another possible fix for `invalid_usecase1` and `invalid_usecase2` is to
+loosen the API guarantee:
+
+>     another_valid_usecase1(f, xs::AbstractArray) -> ys::Array
+>
+> Return an array `ys` such that every element in `xs` with the same index
+> is mapped with `f`.
+>
+> The element type of `ys` is _undefined_. It must not be used with generic
+> functions whose behavior depend on the element type of `ys`.
+
+However, it is discouraged to define such unconventional API guarantees.
 """
 function promote_op(f, S::Type...)
     argT = TupleOrBottom(S...)
diff --git a/base/public.jl b/base/public.jl
new file mode 100644
index 0000000000000..afbb77c382659
--- /dev/null
+++ b/base/public.jl
@@ -0,0 +1,141 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+public
+# Modules
+    Checked,
+    Filesystem,
+    Order,
+    ScopedValues,
+    Sort,
+
+# Types
+    AbstractLock,
+    AbstractOneTo,
+    AbstractPipe,
+    AsyncCondition,
+    CodeUnits,
+    Event,
+    Fix,
+    Fix1,
+    Fix2,
+    Generator,
+    ImmutableDict,
+    OneTo,
+    Pairs,
+    LogRange,
+    UUID,
+
+# Semaphores
+    Semaphore,
+    acquire,
+    @acquire,
+    release,
+
+# arrays
+    has_offset_axes,
+    require_one_based_indexing,
+    memoryindex,
+
+# collections
+    IteratorEltype,
+    IteratorSize,
+    to_index,
+    vect,
+    isdone,
+    front,
+    rest,
+    split_rest,
+    tail,
+    checked_length,
+    elsize,
+
+# Loading
+    DL_LOAD_PATH,
+    load_path,
+    active_project,
+    active_manifest,
+
+# Reflection and introspection
+    get_extension,
+    isambiguous,
+    isexpr,
+    isidentifier,
+    issingletontype,
+    identify_package,
+    locate_package,
+    moduleroot,
+    jit_total_bytes,
+    summarysize,
+    isexported,
+    ispublic,
+    remove_linenums!,
+
+# AST handling
+    IR,
+    isa_ast_node,
+    quoted,
+
+# Operators
+    operator_associativity,
+    operator_precedence,
+    isbinaryoperator,
+    isoperator,
+    isunaryoperator,
+
+# Integer math
+    uabs,
+    mul_hi,
+
+# C interface
+    cconvert,
+    unsafe_convert,
+
+# Error handling
+    exit_on_sigint,
+    windowserror,
+
+# Macros
+    @assume_effects,
+    @constprop,
+    @locals,
+    @propagate_inbounds,
+    @__doc__,
+
+# External processes
+    shell_escape,
+    shell_split,
+    shell_escape_posixly,
+    shell_escape_csh,
+    shell_escape_wincmd,
+    escape_microsoft_c_args,
+
+# Strings
+    escape_raw_string,
+
+# Chars
+    ismalformed,
+    isoverlong,
+    show_invalid,
+
+# IO
+    # types
+    BufferStream,
+    IOServer,
+    OS_HANDLE,
+    PipeEndpoint,
+    TTY,
+    # functions
+    reseteof,
+    link_pipe!,
+    dup,
+    showarg,
+
+# filesystem operations
+    rename,
+
+# misc
+    notnothing,
+    runtests,
+    text_colors,
+    depwarn,
+    donotdelete
diff --git a/base/range.jl b/base/range.jl
index e6341003d0c5a..f2f3e26edf2f2 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -32,7 +32,7 @@ _colon(::Any, ::Any, start::T, step, stop::T) where {T} =
     (:)(start, [step], stop)
 
 Range operator. `a:b` constructs a range from `a` to `b` with a step size
-equal to 1, which produces:
+equal to +1, which produces:
 
 * a [`UnitRange`](@ref) when `a` and `b` are integers, or
 * a [`StepRange`](@ref) when `a` and `b` are characters, or
@@ -41,11 +41,16 @@ equal to 1, which produces:
 `a:s:b` is similar but uses a step size of `s` (a [`StepRange`](@ref) or
 [`StepRangeLen`](@ref)). See also [`range`](@ref) for more control.
 
+To create a descending range, use `reverse(a:b)` or a negative step size, e.g. `b:-1:a`.
+Otherwise, when `b < a`, an empty range will be constructed and normalized to `a:a-1`.
+
 The operator `:` is also used in indexing to select whole dimensions, e.g. in `A[:, 1]`.
 
 `:` is also used to [`quote`](@ref) code, e.g. `:(x + y) isa Expr` and `:x isa Symbol`.
 Since `:2 isa Int`, it does *not* create a range in indexing: `v[:2] == v[2] != v[begin:2]`.
 """
+(:)(::Any, ::Any, ::Any)
+
 (:)(start::T, step, stop::T) where {T} = _colon(start, step, stop)
 (:)(start::T, step, stop::T) where {T<:Real} = _colon(start, step, stop)
 # without the second method above, the first method above is ambiguous with
@@ -66,10 +71,15 @@ Mathematically a range is uniquely determined by any three of `start`, `step`, `
 Valid invocations of range are:
 * Call `range` with any three of `start`, `step`, `stop`, `length`.
 * Call `range` with two of `start`, `stop`, `length`. In this case `step` will be assumed
-  to be one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
-* Call `range` with one of `stop` or `length`. `start` and `step` will be assumed to be one.
+  to be positive one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
+* Call `range` with one of `stop` or `length`. `start` and `step` will be assumed to be positive one.
+
+To construct a descending range, specify a negative step size, e.g. `range(5, 1; step = -1)` => [5,4,3,2,1]. Otherwise,
+a `stop` value less than the `start` value, with the default `step` of `+1`, constructs an empty range. Empty ranges
+are normalized such that the `stop` is one less than the `start`, e.g. `range(5, 1) == 5:4`.
 
 See Extended Help for additional details on the returned type.
+See also [`logrange`](@ref) for logarithmically spaced points.
 
 # Examples
 ```jldoctest
@@ -252,10 +262,13 @@ end
 ## 1-dimensional ranges ##
 
 """
-    AbstractRange{T}
+    AbstractRange{T} <: AbstractVector{T}
 
-Supertype for ranges with elements of type `T`.
-[`UnitRange`](@ref) and other types are subtypes of this.
+Supertype for linear ranges with elements of type `T`.
+[`UnitRange`](@ref), [`LinRange`](@ref) and other types are subtypes of this.
+
+All subtypes must define [`step`](@ref).
+Thus [`LogRange`](@ref Base.LogRange) is not a subtype of `AbstractRange`.
 """
 abstract type AbstractRange{T} <: AbstractArray{T,1} end
 
@@ -347,7 +360,7 @@ function steprange_last(start, step, stop)::typeof(stop)
             # (to simplify handling both signed and unsigned T and checking for signed overflow):
             absdiff, absstep = stop > start ? (stop - start, step) : (start - stop, -step)
 
-            # Compute remainder as a nonnegative number:
+            # Compute remainder as a non-negative number:
             if absdiff isa Signed && absdiff < zero(absdiff)
                 # unlikely, but handle the signed overflow case with unsigned rem
                 overflow_case(absdiff, absstep) = (@noinline; convert(typeof(absdiff), unsigned(absdiff) % absstep))
@@ -372,6 +385,7 @@ function steprange_last_empty(start::Integer, step, stop)::typeof(stop)
     end
     return last
 end
+steprange_last_empty(start::Bool, step, stop) = start ⊻ (step > zero(step)) # isnegative(step) ? start : !start
 # For types where x+oneunit(x) may not be well-defined use the user-given value for stop
 steprange_last_empty(start, step, stop) = stop
 
@@ -439,6 +453,13 @@ if isdefined(Main, :Base)
     end
 end
 
+"""
+    Base.AbstractOneTo
+
+Abstract type for ranges that start at 1 and have a step size of 1.
+"""
+abstract type AbstractOneTo{T} <: AbstractUnitRange{T} end
+
 """
     Base.OneTo(n)
 
@@ -446,8 +467,8 @@ Define an `AbstractUnitRange` that behaves like `1:n`, with the added
 distinction that the lower limit is guaranteed (by the type system) to
 be 1.
 """
-struct OneTo{T<:Integer} <: AbstractUnitRange{T}
-    stop::T
+struct OneTo{T<:Integer} <: AbstractOneTo{T}
+    stop::T # invariant: stop >= zero(stop)
     function OneTo{T}(stop) where {T<:Integer}
         throwbool(r)  = (@noinline; throw(ArgumentError("invalid index: $r of type Bool")))
         T === Bool && throwbool(stop)
@@ -463,6 +484,8 @@ struct OneTo{T<:Integer} <: AbstractUnitRange{T}
         T === Bool && throwbool(r)
         return new(max(zero(T), last(r)))
     end
+
+    global unchecked_oneto(stop::Integer) = new{typeof(stop)}(stop)
 end
 OneTo(stop::T) where {T<:Integer} = OneTo{T}(stop)
 OneTo(r::AbstractRange{T}) where {T<:Integer} = OneTo{T}(r)
@@ -547,6 +570,8 @@ julia> collect(LinRange(-0.1, 0.3, 5))
   0.19999999999999998
   0.3
 ```
+
+See also [`Base.LogRange`](@ref Base.LogRange) for logarithmically spaced points.
 """
 struct LinRange{T,L<:Integer} <: AbstractRange{T}
     start::T
@@ -595,7 +620,7 @@ function show(io::IO, r::LinRange{T}) where {T}
     print(io, "LinRange{")
     show(io, T)
     print(io, "}(")
-    ioc = IOContext(io, :typeinto=>T)
+    ioc = IOContext(io, :typeinfo=>T)
     show(ioc, first(r))
     print(io, ", ")
     show(ioc, last(r))
@@ -617,7 +642,7 @@ parameters `pre` and `post` characters for each printed row,
 `sep` separator string between printed elements,
 `hdots` string for the horizontal ellipsis.
 """
-function print_range(io::IO, r::AbstractRange,
+function print_range(io::IO, r::AbstractArray,
                      pre::AbstractString = " ",
                      sep::AbstractString = ", ",
                      post::AbstractString = "",
@@ -664,7 +689,7 @@ end
 ## interface implementations
 
 length(r::AbstractRange) = error("length implementation missing") # catch mistakes
-size(r::AbstractRange) = (length(r),)
+size(r::AbstractRange) = (@inline; (length(r),))
 
 isempty(r::StepRange) =
     # steprange_last(r.start, r.step, r.stop) == r.stop
@@ -695,6 +720,7 @@ julia> step(range(2.5, stop=10.9, length=85))
 """
 step(r::StepRange) = r.step
 step(r::AbstractUnitRange{T}) where {T} = oneunit(T) - zero(T)
+step(r::AbstractUnitRange{Bool}) = true
 step(r::StepRangeLen) = r.step
 step(r::StepRangeLen{T}) where {T<:AbstractFloat} = T(r.step)
 step(r::LinRange) = (last(r)-first(r))/r.lendiv
@@ -703,8 +729,6 @@ step(r::LinRange) = (last(r)-first(r))/r.lendiv
 step_hp(r::StepRangeLen) = r.step
 step_hp(r::AbstractRange) = step(r)
 
-axes(r::AbstractRange) = (oneto(length(r)),)
-
 # Needed to ensure `has_offset_axes` can constant-fold.
 has_offset_axes(::StepRange) = false
 
@@ -783,17 +807,12 @@ let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128},
         s = step(r)
         diff = last(r) - first(r)
         isempty(r) && return zero(diff)
-        # if |s| > 1, diff might have overflowed, but unsigned(diff)÷s should
-        # therefore still be valid (if the result is representable at all)
-        # n.b. !(s isa T)
-        if s isa Unsigned || -1 <= s <= 1 || s == -s
-            a = div(diff, s) % typeof(diff)
-        elseif s < 0
-            a = div(unsigned(-diff), -s) % typeof(diff)
-        else
-            a = div(unsigned(diff), s) % typeof(diff)
-        end
-        return a + oneunit(a)
+        # Compute `(diff ÷ s) + 1` in a manner robust to signed overflow
+        # by using the absolute values as unsigneds for non-empty ranges.
+        # Note that `s` may be a different type from T and diff; it may not
+        # even be a BitInteger that supports `unsigned`. Handle with care.
+        a = div(unsigned(flipsign(diff, s)), s) % typeof(diff)
+        return flipsign(a, s) + oneunit(a)
     end
     function checked_length(r::OrdinalRange{T}) where T<:bigints
         s = step(r)
@@ -836,6 +855,11 @@ first(r::OneTo{T}) where {T} = oneunit(T)
 first(r::StepRangeLen) = unsafe_getindex(r, 1)
 first(r::LinRange) = r.start
 
+function first(r::OneTo, n::Integer)
+    n < 0 && throw(ArgumentError("Number of elements must be non-negative"))
+    OneTo(oftype(r.stop, min(r.stop, n)))
+end
+
 last(r::OrdinalRange{T}) where {T} = convert(T, r.stop) # via steprange_last
 last(r::StepRangeLen) = unsafe_getindex(r, length(r))
 last(r::LinRange) = r.stop
@@ -905,13 +929,20 @@ end
 
 ## indexing
 
-isassigned(r::AbstractRange, i::Int) = firstindex(r) <= i <= lastindex(r)
+function isassigned(r::AbstractRange, i::Integer)
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+    firstindex(r) <= i <= lastindex(r)
+end
+
+# `_getindex` is like `getindex` but does not check if `i isa Bool`
+function _getindex(v::AbstractRange, i::Integer)
+    @boundscheck checkbounds(v, i)
+    unsafe_getindex(v, i)
+end
 
 _in_unit_range(v::UnitRange, val, i::Integer) = i > 0 && val <= v.stop && val >= v.start
 
-function getindex(v::UnitRange{T}, i::Integer) where T
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+function _getindex(v::UnitRange{T}, i::Integer) where T
     val = convert(T, v.start + (i - oneunit(i)))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
     val
@@ -920,68 +951,38 @@ end
 const OverflowSafe = Union{Bool,Int8,Int16,Int32,Int64,Int128,
                            UInt8,UInt16,UInt32,UInt64,UInt128}
 
-function getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+function _getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
     val = v.start + (i - oneunit(i))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
     val % T
 end
 
-function getindex(v::OneTo{T}, i::Integer) where T
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    @boundscheck ((i > 0) & (i <= v.stop)) || throw_boundserror(v, i)
-    convert(T, i)
-end
-
-function getindex(v::AbstractRange{T}, i::Integer) where T
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    @boundscheck checkbounds(v, i)
-    convert(T, first(v) + (i - oneunit(i))*step_hp(v))
-end
-
 let BitInteger64 = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64} # for bootstrapping
-    function checkbounds(::Type{Bool}, v::StepRange{<:BitInteger64, <:BitInteger64}, i::BitInteger64)
-        @inline
+    global function checkbounds(::Type{Bool}, v::StepRange{<:BitInteger64, <:BitInteger64}, i::BitInteger64)
         res = widemul(step(v), i-oneunit(i)) + first(v)
         (0 < i) & ifelse(0 < step(v), res <= last(v), res >= last(v))
     end
 end
 
-function getindex(r::Union{StepRangeLen,LinRange}, i::Integer)
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    @boundscheck checkbounds(r, i)
-    unsafe_getindex(r, i)
-end
-
-# This is separate to make it useful even when running with --check-bounds=yes
+# unsafe_getindex is separate to make it useful even when running with --check-bounds=yes
+# it assumes the index is inbounds but does not segfault even if the index is out of bounds.
+# it does not check if the index isa bool.
+unsafe_getindex(v::OneTo{T}, i::Integer) where T = convert(T, i)
+unsafe_getindex(v::AbstractRange{T}, i::Integer) where T = convert(T, first(v) + (i - oneunit(i))*step_hp(v))
 function unsafe_getindex(r::StepRangeLen{T}, i::Integer) where T
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
+    u = oftype(r.offset, i) - r.offset
     T(r.ref + u*r.step)
 end
-
-function _getindex_hiprec(r::StepRangeLen, i::Integer)  # without rounding by T
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
-    r.ref + u*r.step
-end
-
-function unsafe_getindex(r::LinRange, i::Integer)
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    lerpi(i-oneunit(i), r.lendiv, r.start, r.stop)
-end
+unsafe_getindex(r::LinRange, i::Integer) = lerpi(i-oneunit(i), r.lendiv, r.start, r.stop)
 
 function lerpi(j::Integer, d::Integer, a::T, b::T) where T
-    @inline
     t = j/d # ∈ [0,1]
     # compute approximately fma(t, b, -fma(t, a, a))
     return T((1-t)*a + t*b)
 end
 
+# non-scalar indexing
+
 getindex(r::AbstractRange, ::Colon) = copy(r)
 
 function getindex(r::AbstractUnitRange, s::AbstractUnitRange{T}) where {T<:Integer}
@@ -1010,13 +1011,14 @@ function getindex(r::AbstractUnitRange, s::StepRange{T}) where {T<:Integer}
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        return range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length=last(s))
+        len = Int(last(s))
+        return range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length=len)
     else
         f = first(r)
         start = oftype(f, f + s.start - firstindex(r))
         st = step(s)
         len = length(s)
-        stop = oftype(f, start + (len - oneunit(len)) * st)
+        stop = oftype(f, start + (len - oneunit(len)) * (iszero(len) ? copysign(oneunit(st), st) : st))
         return range(start, stop; step=st)
     end
 end
@@ -1026,26 +1028,22 @@ function getindex(r::StepRange, s::AbstractRange{T}) where {T<:Integer}
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        if length(s) == 0
-            start, len = first(r), 0
-        elseif length(s) == 1
-            if first(s)
-                start, len = first(r), 1
-            else
-                start, len = first(r), 0
-            end
-        else # length(s) == 2
-            start, len = last(r), 1
-        end
-        return range(start, step=step(r); length=len)
+        # treat as a zero, one, or two-element vector, where at most one element is true
+        # staying inbounds on the original range (preserving either start or
+        # stop as either stop or start, depending on the length)
+        st = step(s)
+        nonempty = st > zero(st) ? last(s) : first(s)
+        # n.b. isempty(r) implies isempty(r) which means !nonempty and !first(s)
+        range((first(s) ⊻ nonempty) ⊻ isempty(r) ? last(r) : first(r), step=step(r), length=Int(nonempty))
     else
         f = r.start
         fs = first(s)
         st = r.step
-        start = oftype(f, f + (fs - oneunit(fs)) * st)
-        st = st * step(s)
+        start = oftype(f, f + (fs - firstindex(r)) * st)
+        st *= step(s)
         len = length(s)
-        stop = oftype(f, start + (len - oneunit(len)) * st)
+        # mimic steprange_last_empty here, to try to avoid overflow
+        stop = oftype(f, start + (len - oneunit(len)) * (iszero(len) ? copysign(oneunit(st), st) : st))
         return range(start, stop; step=st)
     end
 end
@@ -1080,6 +1078,11 @@ function getindex(r::StepRangeLen{T}, s::OrdinalRange{S}) where {T, S<:Integer}
     end
 end
 
+function _getindex_hiprec(r::StepRangeLen, i::Integer)  # without rounding by T
+    u = oftype(r.offset, i) - r.offset
+    r.ref + u*r.step
+end
+
 function getindex(r::LinRange{T}, s::OrdinalRange{S}) where {T, S<:Integer}
     @inline
     @boundscheck checkbounds(r, s)
@@ -1106,7 +1109,11 @@ function getindex(r::LinRange{T}, s::OrdinalRange{S}) where {T, S<:Integer}
 end
 
 show(io::IO, r::AbstractRange) = print(io, repr(first(r)), ':', repr(step(r)), ':', repr(last(r)))
-show(io::IO, r::UnitRange) = print(io, repr(first(r)), ':', repr(last(r)))
+function show(io::IO, r::UnitRange)
+    show(io, first(r))
+    print(io, ':')
+    show(io, last(r))
+end
 show(io::IO, r::OneTo) = print(io, "Base.OneTo(", r.stop, ")")
 function show(io::IO, r::StepRangeLen)
     if !iszero(step(r))
@@ -1301,6 +1308,9 @@ promote_rule(a::Type{OneTo{T1}}, b::Type{OneTo{T2}}) where {T1,T2} =
 OneTo{T}(r::OneTo{T}) where {T<:Integer} = r
 OneTo{T}(r::OneTo) where {T<:Integer} = OneTo{T}(r.stop)
 
+promote_rule(a::Type{OneTo{T1}}, ::Type{UR}) where {T1,UR<:AbstractUnitRange} =
+    promote_rule(UnitRange{T1}, UR)
+
 promote_rule(a::Type{UnitRange{T1}}, ::Type{UR}) where {T1,UR<:AbstractUnitRange} =
     promote_rule(a, UnitRange{eltype(UR)})
 UnitRange{T}(r::AbstractUnitRange) where {T<:Real} = UnitRange{T}(first(r), last(r))
@@ -1319,14 +1329,18 @@ function promote_rule(::Type{StepRange{T1a,T1b}}, ::Type{StepRange{T2a,T2b}}) wh
     el_same(promote_type(T1a, T2a), StepRange{T1a,Tb}, StepRange{T2a,Tb})
 end
 StepRange{T1,T2}(r::StepRange{T1,T2}) where {T1,T2} = r
+StepRange{T}(r::StepRange{T}) where {T} = r
+StepRange(r::StepRange) = r
 
 promote_rule(a::Type{StepRange{T1a,T1b}}, ::Type{UR}) where {T1a,T1b,UR<:AbstractUnitRange} =
     promote_rule(a, StepRange{eltype(UR), eltype(UR)})
 StepRange{T1,T2}(r::AbstractRange) where {T1,T2} =
     StepRange{T1,T2}(convert(T1, first(r)), convert(T2, step(r)), convert(T1, last(r)))
-StepRange(r::AbstractUnitRange{T}) where {T} =
-    StepRange{T,T}(first(r), step(r), last(r))
+StepRange(r::OrdinalRange{T,S}) where {T,S} = StepRange{T,S}(first(r), step(r), last(r))
+StepRange{T}(r::OrdinalRange{<:Any,S}) where {T,S} = StepRange{T,S}(first(r), step(r), last(r))
 (StepRange{T1,T2} where T1)(r::AbstractRange) where {T2} = StepRange{eltype(r),T2}(r)
+StepRange(r::StepRangeLen) = StepRange{eltype(r)}(r)
+StepRange{T}(r::StepRangeLen{<:Any,<:Any,S}) where {T,S} = StepRange{T,S}(r)
 
 function promote_rule(::Type{StepRangeLen{T1,R1,S1,L1}},::Type{StepRangeLen{T2,R2,S2,L2}}) where {T1,T2,R1,R2,S1,S2,L1,L2}
     R, S, L = promote_type(R1, R2), promote_type(S1, S2), promote_type(L1, L2)
@@ -1377,8 +1391,21 @@ function vcat(rs::AbstractRange{T}...) where T
     return a
 end
 
-Array{T,1}(r::AbstractRange{T}) where {T} = vcat(r)
-collect(r::AbstractRange) = vcat(r)
+# This method differs from that for AbstractArrays as it
+# use iteration instead of indexing. This works even if certain
+# non-standard ranges don't support indexing.
+# See https://github.com/JuliaLang/julia/pull/27302
+# Similarly, collect(r::AbstractRange) uses iteration
+function Array{T,1}(r::AbstractRange{T}) where {T}
+    a = Vector{T}(undef, length(r))
+    i = 1
+    for x in r
+        @inbounds a[i] = x
+        i += 1
+    end
+    return a
+end
+collect(r::AbstractRange) = Array(r)
 
 _reverse(r::OrdinalRange, ::Colon) = (:)(last(r), negate(step(r)), first(r))
 function _reverse(r::StepRangeLen, ::Colon)
@@ -1400,8 +1427,8 @@ sort!(r::AbstractUnitRange) = r
 
 sort(r::AbstractRange) = issorted(r) ? r : reverse(r)
 
-sortperm(r::AbstractUnitRange) = 1:length(r)
-sortperm(r::AbstractRange) = issorted(r) ? (1:1:length(r)) : (length(r):-1:1)
+sortperm(r::AbstractUnitRange) = eachindex(r)
+sortperm(r::AbstractRange) = issorted(r) ? (firstindex(r):1:lastindex(r)) : (lastindex(r):-1:firstindex(r))
 
 function sum(r::AbstractRange{<:Real})
     l = length(r)
@@ -1476,7 +1503,7 @@ end
 """
     mod(x::Integer, r::AbstractUnitRange)
 
-Find `y` in the range `r` such that ``x ≡ y (mod n)``, where `n = length(r)`,
+Find `y` in the range `r` such that `x` ≡ `y` (mod `n`), where `n = length(r)`,
 i.e. `y = mod(x - first(r), n) + first(r)`.
 
 See also [`mod1`](@ref).
@@ -1495,3 +1522,190 @@ julia> mod(3, 0:2)  # mod(3, 3)
 """
 mod(i::Integer, r::OneTo) = mod1(i, last(r))
 mod(i::Integer, r::AbstractUnitRange{<:Integer}) = mod(i-first(r), length(r)) + first(r)
+
+
+"""
+    logrange(start, stop, length)
+    logrange(start, stop; length)
+
+Construct a specialized array whose elements are spaced logarithmically
+between the given endpoints. That is, the ratio of successive elements is
+a constant, calculated from the length.
+
+This is similar to `geomspace` in Python. Unlike `PowerRange` in Mathematica,
+you specify the number of elements not the ratio.
+Unlike `logspace` in Python and Matlab, the `start` and `stop` arguments are
+always the first and last elements of the result, not powers applied to some base.
+
+# Examples
+```jldoctest
+julia> logrange(10, 4000, length=3)
+3-element Base.LogRange{Float64, Base.TwicePrecision{Float64}}:
+ 10.0, 200.0, 4000.0
+
+julia> ans[2] ≈ sqrt(10 * 4000)  # middle element is the geometric mean
+true
+
+julia> range(10, 40, length=3)[2] ≈ (10 + 40)/2  # arithmetic mean
+true
+
+julia> logrange(1f0, 32f0, 11)
+11-element Base.LogRange{Float32, Float64}:
+ 1.0, 1.41421, 2.0, 2.82843, 4.0, 5.65685, 8.0, 11.3137, 16.0, 22.6274, 32.0
+
+julia> logrange(1, 1000, length=4) ≈ 10 .^ (0:3)
+true
+```
+
+See the [`LogRange`](@ref Base.LogRange) type for further details.
+
+See also [`range`](@ref) for linearly spaced points.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+"""
+logrange(start::Real, stop::Real, length::Integer) = LogRange(start, stop, Int(length))
+logrange(start::Real, stop::Real; length::Integer) = logrange(start, stop, length)
+
+
+"""
+    LogRange{T}(start, stop, len) <: AbstractVector{T}
+
+A range whose elements are spaced logarithmically between `start` and `stop`,
+with spacing controlled by `len`. Returned by [`logrange`](@ref).
+
+Like [`LinRange`](@ref), the first and last elements will be exactly those
+provided, but intermediate values may have small floating-point errors.
+These are calculated using the logs of the endpoints, which are
+stored on construction, often in higher precision than `T`.
+
+# Examples
+```jldoctest
+julia> logrange(1, 4, length=5)
+5-element Base.LogRange{Float64, Base.TwicePrecision{Float64}}:
+ 1.0, 1.41421, 2.0, 2.82843, 4.0
+
+julia> Base.LogRange{Float16}(1, 4, 5)
+5-element Base.LogRange{Float16, Float64}:
+ 1.0, 1.414, 2.0, 2.828, 4.0
+
+julia> logrange(1e-310, 1e-300, 11)[1:2:end]
+6-element Vector{Float64}:
+ 1.0e-310
+ 9.999999999999974e-309
+ 9.999999999999981e-307
+ 9.999999999999988e-305
+ 9.999999999999994e-303
+ 1.0e-300
+
+julia> prevfloat(1e-308, 5) == ans[2]
+true
+```
+
+Note that integer eltype `T` is not allowed.
+Use for instance `round.(Int, xs)`, or explicit powers of some integer base:
+
+```jldoctest
+julia> xs = logrange(1, 512, 4)
+4-element Base.LogRange{Float64, Base.TwicePrecision{Float64}}:
+ 1.0, 8.0, 64.0, 512.0
+
+julia> 2 .^ (0:3:9) |> println
+[1, 8, 64, 512]
+```
+
+!!! compat "Julia 1.11"
+    This type requires at least Julia 1.11.
+"""
+struct LogRange{T<:Real,X} <: AbstractArray{T,1}
+    start::T
+    stop::T
+    len::Int
+    extra::Tuple{X,X}
+    function LogRange{T}(start::T, stop::T, len::Int) where {T<:Real}
+        if T <: Integer
+            # LogRange{Int}(1, 512, 4) produces InexactError: Int64(7.999999999999998)
+            throw(ArgumentError("LogRange{T} does not support integer types"))
+        end
+        if iszero(start) || iszero(stop)
+            throw(DomainError((start, stop),
+                "LogRange cannot start or stop at zero"))
+        elseif start < 0 || stop < 0
+            # log would throw, but _log_twice64_unchecked does not
+            throw(DomainError((start, stop),
+                "LogRange does not accept negative numbers"))
+        elseif !isfinite(start) || !isfinite(stop)
+            throw(DomainError((start, stop),
+                "LogRange is only defined for finite start & stop"))
+        elseif len < 0
+            throw(ArgumentError(LazyString(
+                "LogRange(", start, ", ", stop, ", ", len, "): can't have negative length")))
+        elseif len == 1 && start != stop
+            throw(ArgumentError(LazyString(
+                "LogRange(", start, ", ", stop, ", ", len, "): endpoints differ, while length is 1")))
+        end
+        ex = _logrange_extra(start, stop, len)
+        new{T,typeof(ex[1])}(start, stop, len, ex)
+    end
+end
+
+function LogRange{T}(start::Real, stop::Real, len::Integer) where {T}
+    LogRange{T}(convert(T, start), convert(T, stop), convert(Int, len))
+end
+function LogRange(start::Real, stop::Real, len::Integer)
+    T = float(promote_type(typeof(start), typeof(stop)))
+    LogRange{T}(convert(T, start), convert(T, stop), convert(Int, len))
+end
+
+size(r::LogRange) = (r.len,)
+length(r::LogRange) = r.len
+
+first(r::LogRange) = r.start
+last(r::LogRange) = r.stop
+
+function _logrange_extra(a::Real, b::Real, len::Int)
+    loga = log(1.0 * a)  # widen to at least Float64
+    logb = log(1.0 * b)
+    (loga/(len-1), logb/(len-1))
+end
+function _logrange_extra(a::Float64, b::Float64, len::Int)
+    loga = _log_twice64_unchecked(a)
+    logb = _log_twice64_unchecked(b)
+    # The reason not to do linear interpolation on log(a)..log(b) in `getindex` is
+    # that division of TwicePrecision is quite slow, so do it once on construction:
+    (loga/(len-1), logb/(len-1))
+end
+
+function getindex(r::LogRange{T}, i::Int) where {T}
+    @inline
+    @boundscheck checkbounds(r, i)
+    i == 1 && return r.start
+    i == r.len && return r.stop
+    # Main path uses Math.exp_impl for TwicePrecision, but is not perfectly
+    # accurate, hence the special cases for endpoints above.
+    logx = (r.len-i) * r.extra[1] + (i-1) * r.extra[2]
+    x = _exp_allowing_twice64(logx)
+    return T(x)
+end
+
+function show(io::IO, r::LogRange{T}) where {T}
+    print(io, "LogRange{", T, "}(")
+    ioc = IOContext(io, :typeinfo => T)
+    show(ioc, first(r))
+    print(io, ", ")
+    show(ioc, last(r))
+    print(io, ", ")
+    show(io, length(r))
+    print(io, ')')
+end
+
+# Implementation detail of @world
+# The rest of this is defined in essentials.jl, but UnitRange is not available
+function _resolve_in_world(worlds::UnitRange, gr::GlobalRef)
+    # Validate that this binding's reference covers the entire world range
+    bpart = lookup_binding_partition(UInt(first(worlds)), gr)
+    if bpart.max_world < last(worlds)
+        error("Binding does not cover the full world range")
+    end
+    _resolve_in_world(UInt(last(worlds)), gr)
+end
diff --git a/base/rational.jl b/base/rational.jl
index baca2397c42ff..b48e8a359e346 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -17,31 +17,47 @@ end
 unsafe_rational(num::T, den::T) where {T<:Integer} = unsafe_rational(T, num, den)
 unsafe_rational(num::Integer, den::Integer) = unsafe_rational(promote(num, den)...)
 
-@noinline __throw_rational_argerror_typemin(T) = throw(ArgumentError("invalid rational: denominator can't be typemin($T)"))
 function checked_den(::Type{T}, num::T, den::T) where T<:Integer
     if signbit(den)
-        den = -den
-        signbit(den) && __throw_rational_argerror_typemin(typeof(den))
-        num = -num
+        den = checked_neg(den)
+        num = checked_neg(num)
     end
     return unsafe_rational(T, num, den)
 end
 checked_den(num::T, den::T) where T<:Integer = checked_den(T, num, den)
 checked_den(num::Integer, den::Integer) = checked_den(promote(num, den)...)
 
-@noinline __throw_rational_argerror_zero(T) = throw(ArgumentError("invalid rational: zero($T)//zero($T)"))
+@noinline __throw_rational_argerror_zero(T) = throw(ArgumentError(LazyString("invalid rational: zero(", T, ")//zero(", T, ")")))
 function Rational{T}(num::Integer, den::Integer) where T<:Integer
     iszero(den) && iszero(num) && __throw_rational_argerror_zero(T)
-    num, den = divgcd(num, den)
-    return checked_den(T, T(num), T(den))
+    if T <: Union{Unsigned, Bool}
+        # Throw InexactError if the result is negative.
+        if !iszero(num) && (signbit(den) ⊻ signbit(num))
+            throw(InexactError(:Rational, Rational{T}, num, den))
+        end
+        unum = uabs(num)
+        uden = uabs(den)
+        r_unum, r_uden = divgcd(unum, uden)
+        return unsafe_rational(T, promote(T(r_unum), T(r_uden))...)
+    else
+        r_num, r_den = divgcd(num, den)
+        return checked_den(T, promote(T(r_num), T(r_den))...)
+    end
 end
 
 Rational(n::T, d::T) where {T<:Integer} = Rational{T}(n, d)
 Rational(n::Integer, d::Integer) = Rational(promote(n, d)...)
 Rational(n::Integer) = unsafe_rational(n, one(n))
 
-function divgcd(x::Integer,y::Integer)
-    g = gcd(x,y)
+"""
+    divgcd(x::Integer, y::Integer)
+
+Return `(x÷gcd(x,y), y÷gcd(x,y))`.
+
+See also [`div`](@ref), [`gcd`](@ref).
+"""
+function divgcd(x::TX, y::TY)::Tuple{TX, TY} where {TX<:Integer, TY<:Integer}
+    g = gcd(uabs(x), uabs(y))
     div(x,g), div(y,g)
 end
 
@@ -49,6 +65,12 @@ end
     //(num, den)
 
 Divide two integers or rational numbers, giving a [`Rational`](@ref) result.
+More generally, `//` can be used for exact rational division of other numeric types
+with integer or rational components, such as complex numbers with integer components.
+
+Note that floating-point ([`AbstractFloat`](@ref)) arguments are not permitted by `//`
+(even if the values are rational).
+The arguments must be subtypes of [`Integer`](@ref), `Rational`, or composites thereof.
 
 # Examples
 ```jldoctest
@@ -57,6 +79,13 @@ julia> 3 // 5
 
 julia> (3 // 5) // (2 // 1)
 3//10
+
+julia> (1+2im) // (3+4im)
+11//25 + 2//25*im
+
+julia> 1.0 // 2
+ERROR: MethodError: no method matching //(::Float64, ::Int64)
+[...]
 ```
 """
 //(n::Integer,  d::Integer) = Rational(n,d)
@@ -76,15 +105,49 @@ function //(x::Rational, y::Rational)
 end
 
 //(x::Complex, y::Real) = complex(real(x)//y, imag(x)//y)
-//(x::Number, y::Complex) = x*conj(y)//abs2(y)
 
+# Return a complex numerator and real denominator
+# of the exact inverse of a Complex number.
+function _complex_exact_inv(y::Complex)
+    c, d = reim(y)
+    num = if (isinf(c) | isinf(d))
+        conj(zero(y))
+    else
+        conj(y)
+    end
+    num, abs2(y)
+end
+function _complex_exact_inv(y::Complex{<:Integer})
+    c, d = reim(y)
+    c_r, d_r = divgcd(c, d)
+    abs2y_r = checked_add(checked_mul(c, c_r), checked_mul(d, d_r))
+    num = complex(c_r, checked_neg(d_r))
+    num, abs2y_r
+end
+
+function //(x::Number, y::Complex)
+    num, den = _complex_exact_inv(y)
+    (x * num) // den
+end
+function //(x::Integer, y::Complex{<:Integer})
+    complex(x) // y
+end
+function //(x::Complex{<:Integer}, y::Complex{<:Integer})
+    a, b, c, d = promote(reim(x)..., reim(y)...)
+    c_r, d_r = divgcd(c, d)
+    abs2y_r = checked_add(checked_mul(c, c_r), checked_mul(d, d_r))
+    complex(
+        checked_add(checked_mul(a, c_r), checked_mul(b, d_r)),
+        checked_add(checked_mul(b, c_r), checked_neg(checked_mul(a, d_r)))
+    )//abs2y_r
+end
 
 //(X::AbstractArray, y::Number) = X .// y
 
 function show(io::IO, x::Rational)
     show(io, numerator(x))
 
-    if isone(denominator(x)) && get(io, :typeinfo, Any) <: Rational
+    if isone(denominator(x)) && nonnothing_nonmissing_typeinfo(io) <: Rational
         return
     end
 
@@ -119,7 +182,7 @@ function Rational{T}(x::Rational) where T<:Integer
     unsafe_rational(T, convert(T, x.num), convert(T, x.den))
 end
 function Rational{T}(x::Integer) where T<:Integer
-    unsafe_rational(T, convert(T, x), one(T))
+    unsafe_rational(T, T(x), T(one(x)))
 end
 
 Rational(x::Rational) = x
@@ -134,6 +197,14 @@ function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S
     P = promote_type(T,S)
     convert(T, convert(P,x.num)/convert(P,x.den))::T
 end
+ # avoid spurious overflow (#52394).  (Needed for UInt16 or larger;
+ # we also include Int16 for consistency of accuracy.)
+Float16(x::Rational{<:Union{Int16,Int32,Int64,UInt16,UInt32,UInt64}}) =
+    Float16(Float32(x))
+Float16(x::Rational{<:Union{Int128,UInt128}}) =
+    Float16(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
+Float32(x::Rational{<:Union{Int128,UInt128}}) =
+    Float32(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
 
 function Rational{T}(x::AbstractFloat) where T<:Integer
     r = rationalize(T, x, tol=0)
@@ -234,7 +305,7 @@ function rationalize(::Type{T}, x::Union{AbstractFloat, Rational}, tol::Real) wh
     end
 end
 rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol)
-rationalize(x::AbstractFloat; kvs...) = rationalize(Int, x; kvs...)
+rationalize(x::Real; kvs...) = rationalize(Int, x; kvs...)
 rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re; kvs...), rationalize(T, x.im; kvs...))
 rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re; kvs...), rationalize(Int, x.im; kvs...))
 rationalize(::Type{T}, x::Rational; tol::Real = 0) where {T<:Integer} = rationalize(T, x, tol)
@@ -263,8 +334,14 @@ julia> numerator(4)
 4
 ```
 """
-numerator(x::Integer) = x
+numerator(x::Union{Integer,Complex{<:Integer}}) = x
 numerator(x::Rational) = x.num
+function numerator(z::Complex{<:Rational})
+    den = denominator(z)
+    reim = (real(z), imag(z))
+    result = checked_mul.(numerator.(reim), div.(den, denominator.(reim)))
+    complex(result...)
+end
 
 """
     denominator(x)
@@ -280,13 +357,12 @@ julia> denominator(4)
 1
 ```
 """
-denominator(x::Integer) = one(x)
+denominator(x::Union{Integer,Complex{<:Integer}}) = one(x)
 denominator(x::Rational) = x.den
+denominator(z::Complex{<:Rational}) = lcm(denominator(real(z)), denominator(imag(z)))
 
 sign(x::Rational) = oftype(x, sign(x.num))
 signbit(x::Rational) = signbit(x.num)
-copysign(x::Rational, y::Real) = unsafe_rational(copysign(x.num, y), x.den)
-copysign(x::Rational, y::Rational) = unsafe_rational(copysign(x.num, y.num), x.den)
 
 abs(x::Rational) = unsafe_rational(checked_abs(x.num), x.den)
 
@@ -304,7 +380,7 @@ function -(x::Rational{T}) where T<:BitSigned
     x.num == typemin(T) && __throw_rational_numerator_typemin(T)
     unsafe_rational(-x.num, x.den)
 end
-@noinline __throw_rational_numerator_typemin(T) = throw(OverflowError("rational numerator is typemin($T)"))
+@noinline __throw_rational_numerator_typemin(T) = throw(OverflowError(LazyString("rational numerator is typemin(", T, ")")))
 
 function -(x::Rational{T}) where T<:Unsigned
     x.num != zero(T) && __throw_negate_unsigned()
@@ -373,8 +449,14 @@ function *(y::Integer, x::Rational)
     yn, xd = divgcd(promote(y, x.den)...)
     unsafe_rational(checked_mul(yn, x.num), xd)
 end
-/(x::Rational, y::Union{Rational, Integer, Complex{<:Union{Integer,Rational}}}) = x//y
-/(x::Union{Integer, Complex{<:Union{Integer,Rational}}}, y::Rational) = x//y
+# make `false` a "strong zero": false*1//0 == 0//1 #57409
+# This is here instead of in bool.jl with the AbstractFloat method for bootstrapping
+function *(x::Bool, y::T)::promote_type(Bool,T) where T<:Rational
+    return ifelse(x, y, copysign(zero(y), y))
+end
+*(y::Rational, x::Bool) = x * y
+/(x::Rational, y::Union{Rational, Integer}) = x//y
+/(x::Integer, y::Rational) = x//y
 inv(x::Rational{T}) where {T} = checked_den(x.den, x.num)
 
 fma(x::Rational, y::Rational, z::Rational) = x*y+z
@@ -395,7 +477,7 @@ fma(x::Rational, y::Rational, z::Rational) = x*y+z
 
 function ==(x::AbstractFloat, q::Rational)
     if isfinite(x)
-        (count_ones(q.den) == 1) & (x*q.den == q.num)
+        (count_ones(q.den) == 1) && (ldexp(x, top_set_bit(q.den-1)) == q.num)
     else
         x == q.num/q.den
     end
@@ -484,10 +566,6 @@ for (S, T) in ((Rational, Integer), (Integer, Rational), (Rational, Rational))
     end
 end
 
-trunc(::Type{T}, x::Rational) where {T} = round(T, x, RoundToZero)
-floor(::Type{T}, x::Rational) where {T} = round(T, x, RoundDown)
-ceil(::Type{T}, x::Rational) where {T} = round(T, x, RoundUp)
-
 round(x::Rational, r::RoundingMode=RoundNearest) = round(typeof(x), x, r)
 
 function round(::Type{T}, x::Rational{Tr}, r::RoundingMode=RoundNearest) where {T,Tr}
@@ -526,12 +604,22 @@ end
 
 float(::Type{Rational{T}}) where {T<:Integer} = float(T)
 
-gcd(x::Rational, y::Rational) = unsafe_rational(gcd(x.num, y.num), lcm(x.den, y.den))
-lcm(x::Rational, y::Rational) = unsafe_rational(lcm(x.num, y.num), gcd(x.den, y.den))
+function gcd(x::Rational, y::Rational)
+    if isinf(x) != isinf(y)
+        throw(ArgumentError("gcd is not defined between infinite and finite numbers"))
+    end
+    unsafe_rational(gcd(x.num, y.num), lcm(x.den, y.den))
+end
+function lcm(x::Rational, y::Rational)
+    if isinf(x) != isinf(y)
+        throw(ArgumentError("lcm is not defined between infinite and finite numbers"))
+    end
+    return unsafe_rational(lcm(x.num, y.num), gcd(x.den, y.den))
+end
 function gcdx(x::Rational, y::Rational)
     c = gcd(x, y)
     if iszero(c.num)
-        a, b = one(c.num), c.num
+        a, b = zero(c.num), c.num
     elseif iszero(c.den)
         a = ifelse(iszero(x.den), one(c.den), c.den)
         b = ifelse(iszero(y.den), one(c.den), c.den)
@@ -549,9 +637,10 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
     num, den = Base.numerator(x), Base.denominator(x)
     den == 1 && return hash(num, h)
     den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
-    if isodd(den)
+    if isodd(den) # since den != 1, this rational can't be a Float64
         pow = trailing_zeros(num)
         num >>= pow
+        h = hash_integer(den, h)
     else
         pow = trailing_zeros(den)
         den >>= pow
@@ -565,7 +654,7 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
         end
     end
     h = hash_integer(pow, h)
-    h = hash_integer(num, h)
+    h = hash_integer((pow > 0) ? (num << (pow % 64)) : num, h)
     return h
 end
 
diff --git a/base/rawbigfloats.jl b/base/rawbigfloats.jl
new file mode 100644
index 0000000000000..a8b46b313bdb1
--- /dev/null
+++ b/base/rawbigfloats.jl
@@ -0,0 +1,143 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Some operations on BigFloat can be done more directly by treating the data portion ("BigFloatData") as a BigInt
+
+elem_count(x::BigFloatData, ::Val{:words}) = length(x)
+elem_count(x::Unsigned, ::Val{:bits}) = sizeof(x) * 8
+word_length(::BigFloatData{T}) where {T} = elem_count(zero(T), Val(:bits))
+elem_count(x::BigFloatData{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words))
+reversed_index(n::Int, i::Int) = n - i - 1
+reversed_index(x, i::Int, v::Val) = reversed_index(elem_count(x, v), i)::Int
+split_bit_index(x::BigFloatData, i::Int) = divrem(i, word_length(x), RoundToZero)
+
+"""
+`i` is the zero-based index of the wanted word in `x`, starting from
+the less significant words.
+"""
+function get_elem(x::BigFloatData{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T}
+    @inbounds return x[i + 1]::T
+end
+
+function get_elem(x, i::Int, v::Val, ::Val{:descending})
+    j = reversed_index(x, i, v)
+    get_elem(x, j, v, Val(:ascending))
+end
+
+word_is_nonzero(x::BigFloatData, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v))
+
+word_is_nonzero(x::BigFloatData, v::Val) = let x = x
+    i -> word_is_nonzero(x, i, v)
+end
+
+"""
+Return a `Bool` indicating whether the `len` least significant words
+of `x` are nonzero.
+"""
+function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:words})
+    any(word_is_nonzero(x, Val(:ascending)), 0:(len - 1))
+end
+
+"""
+Return a `Bool` indicating whether the `len` least significant bits of
+the `i`-th (zero-based index) word of `x` are nonzero.
+"""
+function tail_is_nonzero(x::BigFloatData, len::Int, i::Int, ::Val{:word})
+    !iszero(len) &&
+    !iszero(get_elem(x, i, Val(:words), Val(:ascending)) << (word_length(x) - len))
+end
+
+"""
+Return a `Bool` indicating whether the `len` least significant bits of
+`x` are nonzero.
+"""
+function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:bits})
+    if 0 < len
+        word_count, bit_count_in_word = split_bit_index(x, len)
+        tail_is_nonzero(x, bit_count_in_word, word_count, Val(:word)) ||
+        tail_is_nonzero(x, word_count, Val(:words))
+    else
+        false
+    end::Bool
+end
+
+"""
+Return a `Bool` that is the `i`-th (zero-based index) bit of `x`.
+"""
+function get_elem(x::Unsigned, i::Int, ::Val{:bits}, ::Val{:ascending})
+    (x >>> i) % Bool
+end
+
+"""
+Return a `Bool` that is the `i`-th (zero-based index) bit of `x`.
+"""
+function get_elem(x::BigFloatData, i::Int, ::Val{:bits}, v::Val{:ascending})
+    vb = Val(:bits)
+    if 0 ≤ i < elem_count(x, vb)
+        word_index, bit_index_in_word = split_bit_index(x, i)
+        word = get_elem(x, word_index, Val(:words), v)
+        get_elem(word, bit_index_in_word, vb, v)
+    else
+        false
+    end::Bool
+end
+
+"""
+Return an integer of type `R`, consisting of the `len` most
+significant bits of `x`. If there are less than `len` bits in `x`,
+the least significant bits are zeroed.
+"""
+function truncated(::Type{R}, x::BigFloatData, len::Int) where {R<:Integer}
+    ret = zero(R)
+    if 0 < len
+        word_count, bit_count_in_word = split_bit_index(x, len)
+        k = word_length(x)
+        vals = (Val(:words), Val(:descending))
+        lenx = elem_count(x, first(vals))
+
+        for w ∈ 0:(word_count - 1)
+            ret <<= k
+            if w < lenx # if the output type is larger, truncate turns into zero-extend
+                word = get_elem(x, w, vals...)
+                ret |= R(word)
+            end
+        end
+
+        if !iszero(bit_count_in_word)
+            ret <<= bit_count_in_word
+            if word_count < lenx # if the output type is larger, truncate turns into zero-extend
+                wrd = get_elem(x, word_count, vals...)
+                ret |= R(wrd >>> (k - bit_count_in_word))
+            end
+        end
+    end
+    ret::R
+end
+
+struct BigFloatDataRoundingIncrementHelper{T<:Unsigned}
+    n::BigFloatData{T}
+    trunc_len::Int
+
+    final_bit::Bool
+    round_bit::Bool
+
+    function BigFloatDataRoundingIncrementHelper{T}(n::BigFloatData{T}, len::Int) where {T<:Unsigned}
+        vals = (Val(:bits), Val(:descending))
+        f = get_elem(n, len - 1, vals...)
+        r = get_elem(n, len    , vals...)
+        new{T}(n, len, f, r)
+    end
+end
+
+function BigFloatDataRoundingIncrementHelper(n::BigFloatData{T}, len::Int) where {T<:Unsigned}
+    BigFloatDataRoundingIncrementHelper{T}(n, len)
+end
+
+(h::BigFloatDataRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit
+
+(h::BigFloatDataRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit
+
+function (h::BigFloatDataRoundingIncrementHelper)(::Rounding.StickyBit)
+    v = Val(:bits)
+    n = h.n
+    tail_is_nonzero(n, elem_count(n, v) - h.trunc_len - 1, v)
+end
diff --git a/base/reduce.jl b/base/reduce.jl
index 61a0f466b2902..8888c7300580a 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -4,14 +4,6 @@
 
 ###### Generic (map)reduce functions ######
 
-if Int === Int32
-    const SmallSigned = Union{Int8,Int16}
-    const SmallUnsigned = Union{UInt8,UInt16}
-else
-    const SmallSigned = Union{Int8,Int16,Int32}
-    const SmallUnsigned = Union{UInt8,UInt16,UInt32}
-end
-
 abstract type AbstractBroadcasted end
 const AbstractArrayOrBroadcasted = Union{AbstractArray, AbstractBroadcasted}
 
@@ -22,8 +14,8 @@ The reduction operator used in `sum`. The main difference from [`+`](@ref) is th
 integers are promoted to `Int`/`UInt`.
 """
 add_sum(x, y) = x + y
-add_sum(x::SmallSigned, y::SmallSigned) = Int(x) + Int(y)
-add_sum(x::SmallUnsigned, y::SmallUnsigned) = UInt(x) + UInt(y)
+add_sum(x::Union{Bool,BitIntegerSmall}, y::Union{Bool,BitIntegerSmall}) = Int(x) + Int(y)
+add_sum(x::BitUnsignedSmall, y::BitUnsignedSmall) = UInt(x) + UInt(y)
 add_sum(x::Real, y::Real)::Real = x + y
 
 """
@@ -33,10 +25,16 @@ The reduction operator used in `prod`. The main difference from [`*`](@ref) is t
 integers are promoted to `Int`/`UInt`.
 """
 mul_prod(x, y) = x * y
-mul_prod(x::SmallSigned, y::SmallSigned) = Int(x) * Int(y)
-mul_prod(x::SmallUnsigned, y::SmallUnsigned) = UInt(x) * UInt(y)
+mul_prod(x::BitSignedSmall, y::BitSignedSmall) = Int(x) * Int(y)
+mul_prod(x::BitUnsignedSmall, y::BitUnsignedSmall) = UInt(x) * UInt(y)
 mul_prod(x::Real, y::Real)::Real = x * y
 
+and_all(x, y) = (x && y)::Bool
+or_any(x, y) = (x || y)::Bool
+# As a performance optimization, avoid runtime branches:
+and_all(x::Bool, y::Bool) = (x & y)::Bool
+or_any(x::Bool, y::Bool) = (x | y)::Bool
+
 ## foldl && mapfoldl
 
 function mapfoldl_impl(f::F, op::OP, nt, itr) where {F,OP}
@@ -51,15 +49,15 @@ function foldl_impl(op::OP, nt, itr) where {OP}
 end
 
 function _foldl_impl(op::OP, init, itr) where {OP}
-    # Unroll the while loop once; if init is known, the call to op may
-    # be evaluated at compile time
+    # Unroll the loop once to check if the iterator is empty.
+    # If init is known, the call to op may be evaluated at compile time
     y = iterate(itr)
     y === nothing && return init
     v = op(init, y[1])
-    while true
-        y = iterate(itr, y[2])
-        y === nothing && break
-        v = op(v, y[1])
+    # Using a for loop is more performant than a while loop (see #56492)
+    # This unrolls the loop a second time before entering the body
+    for x in Iterators.rest(itr, y[2])
+        v = op(v, x)
     end
     return v
 end
@@ -220,7 +218,7 @@ Like [`mapreduce`](@ref), but with guaranteed right associativity, as in [`foldr
 provided, the keyword argument `init` will be used exactly once. In general, it will be
 necessary to provide `init` to work with empty collections.
 """
-mapfoldr(f, op, itr; init=_InitialValue()) = mapfoldr_impl(f, op, init, itr)
+mapfoldr(f::F, op::F2, itr; init=_InitialValue()) where {F,F2} = mapfoldr_impl(f, op, init, itr)
 
 
 """
@@ -239,7 +237,7 @@ julia> foldr(=>, 1:4; init=0)
 1 => (2 => (3 => (4 => 0)))
 ```
 """
-foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)
+foldr(op::F, itr; kw...) where {F} = mapfoldr(identity, op, itr; kw...)
 
 ## reduce & mapreduce
 
@@ -305,7 +303,7 @@ implementations may reuse the return value of `f` for elements that appear multi
 guaranteed left or right associativity and invocation of `f` for every value.
 """
 mapreduce(f, op, itr; kw...) = mapfoldl(f, op, itr; kw...)
-mapreduce(f, op, itrs...; kw...) = reduce(op, Generator(f, itrs...); kw...)
+mapreduce(f, op, itr, itrs...; kw...) = reduce(op, Generator(f, itr, itrs...); kw...)
 
 # Note: sum_seq usually uses four or more accumulators after partial
 # unrolling, so each accumulator gets at most 256 numbers
@@ -316,10 +314,11 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096
 
 
 # handling empty arrays
-_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))
-_empty_reduce_error(@nospecialize(f), @nospecialize(T::Type)) = throw(ArgumentError("""
-    reducing with $f over an empty collection of element type $T is not allowed.
-    You may be able to prevent this error by supplying an `init` value to the reducer."""))
+_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed; consider supplying `init` to the reducer"))
+reduce_empty(f, T) = _empty_reduce_error()
+mapreduce_empty(f, op, T) = _empty_reduce_error()
+reduce_empty(f, ::Type{Union{}}, splat...) = _empty_reduce_error()
+mapreduce_empty(f, op, ::Type{Union{}}, splat...) = _empty_reduce_error()
 
 """
     Base.reduce_empty(op, T)
@@ -339,23 +338,21 @@ is generally ambiguous, and especially so when the element type is unknown).
 
 As an alternative, consider supplying an `init` value to the reducer.
 """
-reduce_empty(::typeof(+), ::Type{Union{}}) = _empty_reduce_error(+, Union{})
 reduce_empty(::typeof(+), ::Type{T}) where {T} = zero(T)
 reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
-reduce_empty(::typeof(*), ::Type{Union{}}) = _empty_reduce_error(*, Union{})
 reduce_empty(::typeof(*), ::Type{T}) where {T} = one(T)
 reduce_empty(::typeof(*), ::Type{<:AbstractChar}) = ""
 reduce_empty(::typeof(&), ::Type{Bool}) = true
 reduce_empty(::typeof(|), ::Type{Bool}) = false
+reduce_empty(::typeof(and_all), ::Type{T}) where {T} = true
+reduce_empty(::typeof(or_any), ::Type{T}) where {T} = false
 
-reduce_empty(::typeof(add_sum), ::Type{Union{}}) = _empty_reduce_error(add_sum, Union{})
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T} = reduce_empty(+, T)
-reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned}  = zero(Int)
-reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
-reduce_empty(::typeof(mul_prod), ::Type{Union{}}) = _empty_reduce_error(mul_prod, Union{})
+reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:BitSignedSmall}  = zero(Int)
+reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:BitUnsignedSmall} = zero(UInt)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T} = reduce_empty(*, T)
-reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned}  = one(Int)
-reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)
+reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:BitSignedSmall}  = one(Int)
+reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:BitUnsignedSmall} = one(UInt)
 
 reduce_empty(op::BottomRF, ::Type{T}) where {T} = reduce_empty(op.rf, T)
 reduce_empty(op::MappingRF, ::Type{T}) where {T} = mapreduce_empty(op.f, op.rf, T)
@@ -365,7 +362,7 @@ reduce_empty(op::FlipArgs, ::Type{T}) where {T} = reduce_empty(op.f, T)
 """
     Base.mapreduce_empty(f, op, T)
 
-The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
+The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref) or
 [`mapfoldr`](@ref) with map `f` and reduction `op` over an empty array with element type
 of `T`. See [`Base.reduce_empty`](@ref) for more information.
 """
@@ -377,7 +374,7 @@ mapreduce_empty(f::typeof(abs),  ::typeof(max), T) = abs(zero(T))
 mapreduce_empty(f::typeof(abs2), ::typeof(max), T) = abs2(zero(T))
 
 # For backward compatibility:
-mapreduce_empty_iter(f, op, itr, ItrEltype) =
+mapreduce_empty_iter(f::F, op::F2, itr, ItrEltype) where {F,F2} =
     reduce_empty_iter(MappingRF(f, op), itr, ItrEltype)
 
 @inline reduce_empty_iter(op, itr) = reduce_empty_iter(op, itr, IteratorEltype(itr))
@@ -391,7 +388,7 @@ reduce_empty_iter(op, itr, ::EltypeUnknown) = throw(ArgumentError("""
 """
     Base.reduce_first(op, x)
 
-The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref`) or
+The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref) or
 [`foldr`](@ref) with reduction `op` over an iterator which contains a single element
 `x`. This value may also be used to initialise the recursion, so that `reduce(op, [x, y])`
 may call `op(reduce_first(op, x), y)`.
@@ -405,16 +402,18 @@ reduce_first(::typeof(+), x::Bool) = Int(x)
 reduce_first(::typeof(*), x::AbstractChar) = string(x)
 
 reduce_first(::typeof(add_sum), x) = reduce_first(+, x)
-reduce_first(::typeof(add_sum), x::SmallSigned)   = Int(x)
-reduce_first(::typeof(add_sum), x::SmallUnsigned) = UInt(x)
+reduce_first(::typeof(add_sum), x::BitSignedSmall)   = Int(x)
+reduce_first(::typeof(add_sum), x::BitUnsignedSmall) = UInt(x)
 reduce_first(::typeof(mul_prod), x) = reduce_first(*, x)
-reduce_first(::typeof(mul_prod), x::SmallSigned)   = Int(x)
-reduce_first(::typeof(mul_prod), x::SmallUnsigned) = UInt(x)
+reduce_first(::typeof(mul_prod), x::BitSignedSmall)   = Int(x)
+reduce_first(::typeof(mul_prod), x::BitUnsignedSmall) = UInt(x)
+reduce_first(::typeof(vcat), x) = vcat(x)
+reduce_first(::typeof(hcat), x) = hcat(x)
 
 """
     Base.mapreduce_first(f, op, x)
 
-The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
+The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref) or
 [`mapfoldr`](@ref) with map `f` and reduction `op` over an iterator which contains a
 single element `x`. This value may also be used to initialise the recursion, so that
 `mapreduce(f, op, [x, y])` may call `op(mapreduce_first(f, op, x), f(y))`.
@@ -483,8 +482,8 @@ elements are not reordered if you use an ordered collection.
 julia> reduce(*, [2; 3; 4])
 24
 
-julia> reduce(*, [2; 3; 4]; init=-1)
--24
+julia> reduce(*, Int[]; init=1)
+1
 ```
 """
 reduce(op, itr; kw...) = mapreduce(identity, op, itr; kw...)
@@ -620,63 +619,6 @@ julia> prod(1:5; init = 1.0)
 prod(a; kw...) = mapreduce(identity, mul_prod, a; kw...)
 
 ## maximum, minimum, & extrema
-_fast(::typeof(min),x,y) = min(x,y)
-_fast(::typeof(max),x,y) = max(x,y)
-function _fast(::typeof(max), x::AbstractFloat, y::AbstractFloat)
-    ifelse(isnan(x),
-        x,
-        ifelse(x > y, x, y))
-end
-
-function _fast(::typeof(min),x::AbstractFloat, y::AbstractFloat)
-    ifelse(isnan(x),
-        x,
-        ifelse(x < y, x, y))
-end
-
-isbadzero(::typeof(max), x::AbstractFloat) = (x == zero(x)) & signbit(x)
-isbadzero(::typeof(min), x::AbstractFloat) = (x == zero(x)) & !signbit(x)
-isbadzero(op, x) = false
-isgoodzero(::typeof(max), x) = isbadzero(min, x)
-isgoodzero(::typeof(min), x) = isbadzero(max, x)
-
-function mapreduce_impl(f, op::Union{typeof(max), typeof(min)},
-                        A::AbstractArrayOrBroadcasted, first::Int, last::Int)
-    a1 = @inbounds A[first]
-    v1 = mapreduce_first(f, op, a1)
-    v2 = v3 = v4 = v1
-    chunk_len = 256
-    start = first + 1
-    simdstop  = start + chunk_len - 4
-    while simdstop <= last - 3
-        @inbounds for i in start:4:simdstop
-            v1 = _fast(op, v1, f(A[i+0]))
-            v2 = _fast(op, v2, f(A[i+1]))
-            v3 = _fast(op, v3, f(A[i+2]))
-            v4 = _fast(op, v4, f(A[i+3]))
-        end
-        checkbounds(A, simdstop+3)
-        start += chunk_len
-        simdstop += chunk_len
-    end
-    v = op(op(v1,v2),op(v3,v4))
-    for i in start:last
-        @inbounds ai = A[i]
-        v = op(v, f(ai))
-    end
-
-    # enforce correct order of 0.0 and -0.0
-    # e.g. maximum([0.0, -0.0]) === 0.0
-    # should hold
-    if isbadzero(op, v)
-        for i in first:last
-            x = @inbounds A[i]
-            isgoodzero(op,x) && return x
-        end
-    end
-    return v
-end
-
 """
     maximum(f, itr; [init])
 
@@ -753,7 +695,7 @@ julia> maximum([1,2,3])
 3
 
 julia> maximum(())
-ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
+ERROR: ArgumentError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -785,7 +727,7 @@ julia> minimum([1,2,3])
 1
 
 julia> minimum([])
-ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
+ERROR: ArgumentError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -863,25 +805,18 @@ ExtremaMap(::Type{T}) where {T} = ExtremaMap{Type{T}}(T)
 @inline (f::ExtremaMap)(x) = (y = f.f(x); (y, y))
 
 @inline _extrema_rf((min1, max1), (min2, max2)) = (min(min1, min2), max(max1, max2))
-# optimization for IEEEFloat
-function _extrema_rf(x::NTuple{2,T}, y::NTuple{2,T}) where {T<:IEEEFloat}
-    (x1, x2), (y1, y2) = x, y
-    anynan = isnan(x1)|isnan(y1)
-    z1 = ifelse(anynan, x1-y1, ifelse(signbit(x1-y1), x1, y1))
-    z2 = ifelse(anynan, x1-y1, ifelse(signbit(x2-y2), y2, x2))
-    z1, z2
-end
 
 ## findmax, findmin, argmax & argmin
 
 """
     findmax(f, domain) -> (f(x), index)
 
-Return a pair of a value in the codomain (outputs of `f`) and the index of
+Return a pair of a value in the codomain (outputs of `f`) and the index or key of
 the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is maximised.
 If there are multiple maximal points, then the first one will be returned.
 
-`domain` must be a non-empty iterable.
+`domain` must be a non-empty iterable supporting [`keys`](@ref). Indices
+are of the same type as those returned by [`keys(domain)`](@ref).
 
 Values are compared with `isless`.
 
@@ -915,6 +850,9 @@ Return the maximal element of the collection `itr` and its index or key.
 If there are multiple maximal elements, then the first one will be returned.
 Values are compared with `isless`.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 See also: [`findmin`](@ref), [`argmax`](@ref), [`maximum`](@ref).
 
 # Examples
@@ -936,12 +874,15 @@ _findmax(a, ::Colon) = findmax(identity, a)
 """
     findmin(f, domain) -> (f(x), index)
 
-Return a pair of a value in the codomain (outputs of `f`) and the index of
+Return a pair of a value in the codomain (outputs of `f`) and the index or key of
 the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is minimised.
 If there are multiple minimal points, then the first one will be returned.
 
 `domain` must be a non-empty iterable.
 
+Indices are of the same type as those returned by [`keys(domain)`](@ref)
+and [`pairs(domain)`](@ref).
+
 `NaN` is treated as less than all other values except `missing`.
 
 !!! compat "Julia 1.7"
@@ -975,6 +916,9 @@ Return the minimal element of the collection `itr` and its index or key.
 If there are multiple minimal elements, then the first one will be returned.
 `NaN` is treated as less than all other values except `missing`.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 See also: [`findmax`](@ref), [`argmin`](@ref), [`minimum`](@ref).
 
 # Examples
@@ -1027,6 +971,9 @@ If there are multiple maximal elements, then the first one will be returned.
 
 The collection must not be empty.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 Values are compared with `isless`.
 
 See also: [`argmin`](@ref), [`findmax`](@ref).
@@ -1082,6 +1029,9 @@ If there are multiple minimal elements, then the first one will be returned.
 
 The collection must not be empty.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 `NaN` is treated as less than all other values except `missing`.
 
 See also: [`argmax`](@ref), [`findmin`](@ref).
@@ -1100,233 +1050,12 @@ julia> argmin([7, 1, 1, NaN])
 """
 argmin(itr) = findmin(itr)[2]
 
-## all & any
-
-"""
-    any(itr) -> Bool
-
-Test whether any elements of a boolean collection are `true`, returning `true` as
-soon as the first `true` value in `itr` is encountered (short-circuiting). To
-short-circuit on `false`, use [`all`](@ref).
-
-If the input contains [`missing`](@ref) values, return `missing` if all non-missing
-values are `false` (or equivalently, if the input contains no `true` value), following
-[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
-
-See also: [`all`](@ref), [`count`](@ref), [`sum`](@ref), [`|`](@ref), , [`||`](@ref).
-
-# Examples
-```jldoctest
-julia> a = [true,false,false,true]
-4-element Vector{Bool}:
- 1
- 0
- 0
- 1
-
-julia> any(a)
-true
-
-julia> any((println(i); v) for (i, v) in enumerate(a))
-1
-true
-
-julia> any([missing, true])
-true
-
-julia> any([false, missing])
-missing
-```
-"""
-any(itr) = any(identity, itr)
-
-"""
-    all(itr) -> Bool
-
-Test whether all elements of a boolean collection are `true`, returning `false` as
-soon as the first `false` value in `itr` is encountered (short-circuiting). To
-short-circuit on `true`, use [`any`](@ref).
-
-If the input contains [`missing`](@ref) values, return `missing` if all non-missing
-values are `true` (or equivalently, if the input contains no `false` value), following
-[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
-
-See also: [`all!`](@ref), [`any`](@ref), [`count`](@ref), [`&`](@ref), , [`&&`](@ref), [`allunique`](@ref).
-
-# Examples
-```jldoctest
-julia> a = [true,false,false,true]
-4-element Vector{Bool}:
- 1
- 0
- 0
- 1
-
-julia> all(a)
-false
-
-julia> all((println(i); v) for (i, v) in enumerate(a))
-1
-2
-false
-
-julia> all([missing, false])
-false
-
-julia> all([true, missing])
-missing
-```
-"""
-all(itr) = all(identity, itr)
-
-"""
-    any(p, itr) -> Bool
-
-Determine whether predicate `p` returns `true` for any elements of `itr`, returning
-`true` as soon as the first item in `itr` for which `p` returns `true` is encountered
-(short-circuiting). To short-circuit on `false`, use [`all`](@ref).
-
-If the input contains [`missing`](@ref) values, return `missing` if all non-missing
-values are `false` (or equivalently, if the input contains no `true` value), following
-[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
-
-# Examples
-```jldoctest
-julia> any(i->(4<=i<=6), [3,5,7])
-true
-
-julia> any(i -> (println(i); i > 3), 1:10)
-1
-2
-3
-4
-true
-
-julia> any(i -> i > 0, [1, missing])
-true
-
-julia> any(i -> i > 0, [-1, missing])
-missing
-
-julia> any(i -> i > 0, [-1, 0])
-false
-```
-"""
-any(f, itr) = _any(f, itr, :)
-
-function _any(f, itr, ::Colon)
-    anymissing = false
-    for x in itr
-        v = f(x)
-        if ismissing(v)
-            anymissing = true
-        elseif v
-            return true
-        end
-    end
-    return anymissing ? missing : false
-end
-
-# Specialized versions of any(f, ::Tuple)
-# We fall back to the for loop implementation all elements have the same type or
-# if the tuple is too large.
-function any(f, itr::Tuple)
-    if itr isa NTuple || length(itr) > 32
-        return _any(f, itr, :)
-    end
-    _any_tuple(f, false, itr...)
-end
-
-@inline function _any_tuple(f, anymissing, x, rest...)
-    v = f(x)
-    if ismissing(v)
-        anymissing = true
-    elseif v
-        return true
-    end
-    return _any_tuple(f, anymissing, rest...)
-end
-@inline _any_tuple(f, anymissing) = anymissing ? missing : false
-
-"""
-    all(p, itr) -> Bool
-
-Determine whether predicate `p` returns `true` for all elements of `itr`, returning
-`false` as soon as the first item in `itr` for which `p` returns `false` is encountered
-(short-circuiting). To short-circuit on `true`, use [`any`](@ref).
-
-If the input contains [`missing`](@ref) values, return `missing` if all non-missing
-values are `true` (or equivalently, if the input contains no `false` value), following
-[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
-
-# Examples
-```jldoctest
-julia> all(i->(4<=i<=6), [4,5,6])
-true
-
-julia> all(i -> (println(i); i < 3), 1:10)
-1
-2
-3
-false
-
-julia> all(i -> i > 0, [1, missing])
-missing
-
-julia> all(i -> i > 0, [-1, missing])
-false
-
-julia> all(i -> i > 0, [1, 2])
-true
-```
-"""
-all(f, itr) = _all(f, itr, :)
-
-function _all(f, itr, ::Colon)
-    anymissing = false
-    for x in itr
-        v = f(x)
-        if ismissing(v)
-            anymissing = true
-        # this syntax allows throwing a TypeError for non-Bool, for consistency with any
-        elseif v
-            continue
-        else
-            return false
-        end
-    end
-    return anymissing ? missing : true
-end
-
-# Specialized versions of all(f, ::Tuple),
-# This is similar to any(f, ::Tuple) defined above.
-function all(f, itr::Tuple)
-    if itr isa NTuple || length(itr) > 32
-        return _all(f, itr, :)
-    end
-    _all_tuple(f, false, itr...)
-end
-
-@inline function _all_tuple(f, anymissing, x, rest...)
-    v = f(x)
-    if ismissing(v)
-        anymissing = true
-    # this syntax allows throwing a TypeError for non-Bool, for consistency with any
-    elseif v
-        nothing
-    else
-        return false
-    end
-    return _all_tuple(f, anymissing, rest...)
-end
-@inline _all_tuple(f, anymissing) = anymissing ? missing : true
-
 ## count
 
 _bool(f) = x->f(x)::Bool
 
 """
-    count([f=identity,] itr; init=0) -> Integer
+    count([f=identity,] itr; init=0)::Integer
 
 Count the number of elements in `itr` for which the function `f` returns `true`.
 If `f` is omitted, count the number of `true` elements in `itr` (which
@@ -1346,8 +1075,8 @@ julia> count(i->(4<=i<=6), [2,3,4,5,6])
 julia> count([true, false, true, true])
 3
 
-julia> count(>(3), 1:7, init=0x03)
-0x07
+julia> count(>(3), 1:7, init=UInt(0))
+0x0000000000000004
 ```
 """
 count(itr; init=0) = count(identity, itr; init)
@@ -1356,8 +1085,10 @@ count(f, itr; init=0) = _simple_count(f, itr, init)
 
 _simple_count(pred, itr, init) = sum(_bool(pred), itr; init)
 
-function _simple_count(::typeof(identity), x::Array{Bool}, init::T=0) where {T}
-    n::T = init
+function _simple_count(::typeof(identity), x::Array{Bool}, init=0)
+    v0 = Base.add_sum(init, false)
+    T = typeof(v0)
+    n::T = v0
     chunks = length(x) ÷ sizeof(UInt)
     mask = 0x0101010101010101 % UInt
     GC.@preserve x begin
@@ -1371,3 +1102,10 @@ function _simple_count(::typeof(identity), x::Array{Bool}, init::T=0) where {T}
     end
     return n
 end
+
+# A few common reductions for ranges with init specified
+for (fred, f) in ((maximum, max), (minimum, min), (sum, add_sum))
+    @eval function _foldl_impl(op::typeof(BottomRF($f)), init, r::AbstractRange)
+        isempty(r) ? init : op(init, $fred(r))
+    end
+end
diff --git a/base/reducedim.jl b/base/reducedim.jl
index c1c58ccdfefed..703babc9bc56f 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -17,59 +17,21 @@ reduced_indices(a::AbstractArrayOrBroadcasted, region) = reduced_indices(axes(a)
 # for reductions that keep 0 dims as 0
 reduced_indices0(a::AbstractArray, region) = reduced_indices0(axes(a), region)
 
-function reduced_indices(inds::Indices{N}, d::Int) where N
-    d < 1 && throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    if d == 1
-        return (reduced_index(inds[1]), tail(inds)...)::typeof(inds)
-    elseif 1 < d <= N
-        return tuple(inds[1:d-1]..., oftype(inds[d], reduced_index(inds[d])), inds[d+1:N]...)::typeof(inds)
-    else
-        return inds
-    end
+function reduced_indices(axs::Indices{N}, region) where N
+    _check_valid_region(region)
+    ntuple(d -> d in region ? reduced_index(axs[d]) : axs[d], Val(N))
 end
 
-function reduced_indices0(inds::Indices{N}, d::Int) where N
-    d < 1 && throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    if d <= N
-        ind = inds[d]
-        rd = isempty(ind) ? ind : reduced_index(inds[d])
-        if d == 1
-            return (rd, tail(inds)...)::typeof(inds)
-        else
-            return tuple(inds[1:d-1]..., oftype(inds[d], rd), inds[d+1:N]...)::typeof(inds)
-        end
-    else
-        return inds
-    end
+function reduced_indices0(axs::Indices{N}, region) where N
+    _check_valid_region(region)
+    ntuple(d -> d in region && !isempty(axs[d]) ? reduced_index(axs[d]) : axs[d], Val(N))
 end
 
-function reduced_indices(inds::Indices{N}, region) where N
-    rinds = collect(inds)
-    for i in region
-        isa(i, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
-        d = Int(i)
-        if d < 1
-            throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
-        elseif d <= N
-            rinds[d] = reduced_index(rinds[d])
-        end
+function _check_valid_region(region)
+    for d in region
+        isa(d, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
+        Int(d) < 1 && throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
     end
-    tuple(rinds...)::typeof(inds)
-end
-
-function reduced_indices0(inds::Indices{N}, region) where N
-    rinds = collect(inds)
-    for i in region
-        isa(i, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
-        d = Int(i)
-        if d < 1
-            throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
-        elseif d <= N
-            rind = rinds[d]
-            rinds[d] = isempty(rind) ? rind : reduced_index(rind)
-        end
-    end
-    tuple(rinds...)::typeof(inds)
 end
 
 ###### Generic reduction functions #####
@@ -83,7 +45,7 @@ end
 initarray!(a::AbstractArray{T}, f, ::Union{typeof(min),typeof(max),typeof(_extrema_rf)},
     init::Bool, src::AbstractArray) where {T} = (init && mapfirst!(f, a, src); a)
 
-for (Op, initval) in ((:(typeof(&)), true), (:(typeof(|)), false))
+for (Op, initval) in ((:(typeof(and_all)), true), (:(typeof(or_any)), false))
     @eval initarray!(a::AbstractArray, ::Any, ::$(Op), init::Bool, src::AbstractArray) = (init && fill!(a, $initval); a)
 end
 
@@ -146,16 +108,18 @@ for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min
             T = _realtype(f, promote_union(eltype(A)))
             Tr = v0 isa T ? T : typeof(v0)
 
-            # but NaNs and missing need to be avoided as initial values
+            # but NaNs, missing and unordered values need to be avoided as initial values
             if v0 isa Number && isnan(v0)
                 # v0 is NaN
                 v0 = oftype(v0, $initval)
             elseif isunordered(v0)
                 # v0 is missing or a third-party unordered value
                 Tnm = nonmissingtype(Tr)
-                # TODO: Some types, like BigInt, don't support typemin/typemax.
-                # So a Matrix{Union{BigInt, Missing}} can still error here.
-                v0 = $typeextreme(Tnm)
+                if Tnm <: Union{BitInteger, IEEEFloat, BigFloat}
+                    v0 = $typeextreme(Tnm)
+                elseif !all(isunordered, A1)
+                    v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1))
+                end
             end
             # v0 may have changed type.
             Tr = v0 isa T ? T : typeof(v0)
@@ -186,12 +150,18 @@ function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray
 
     # but NaNs and missing need to be avoided as initial values
     if v0[1] isa Number && isnan(v0[1])
+        # v0 is NaN
         v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf)
     elseif isunordered(v0[1])
         # v0 is missing or a third-party unordered value
-        # TODO: Some types, like BigInt, don't support typemin/typemax.
-        # So a Matrix{Union{BigInt, Missing}} can still error here.
-        v0 = typemax(nonmissingtype(Tmin)), typemin(nonmissingtype(Tmax))
+        Tminnm = nonmissingtype(Tmin)
+        Tmaxnm = nonmissingtype(Tmax)
+        if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} &&
+            Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat}
+            v0 = (typemax(Tminnm), typemin(Tmaxnm))
+        elseif !all(isunordered, A1)
+            v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1)))
+        end
     end
     # v0 may have changed type.
     Tmin = v0[1] isa T ? T : typeof(v0[1])
@@ -203,6 +173,10 @@ end
 reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(max), A::AbstractArray{T}, region) where {T} =
     reducedim_initarray(A, region, zero(f(zero(T))), _realtype(f, T))
 
+reducedim_init(f, op::typeof(and_all), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, true)
+reducedim_init(f, op::typeof(or_any), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, false)
+
+# These definitions are wrong in general; Cf. JuliaLang/julia#45562
 reducedim_init(f, op::typeof(&), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, true)
 reducedim_init(f, op::typeof(|), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, false)
 
@@ -226,11 +200,8 @@ end
 
 ## generic (map)reduction
 
-has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = false
-has_fast_linear_indexing(a::Array) = true
-has_fast_linear_indexing(::Union{Number,Ref,AbstractChar}) = true  # 0d objects, for Broadcasted
-has_fast_linear_indexing(bc::Broadcast.Broadcasted) =
-    all(has_fast_linear_indexing, bc.args)
+has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = IndexStyle(a) === IndexLinear()
+has_fast_linear_indexing(a::AbstractVector) = true
 
 function check_reducedims(R, A)
     # Check whether R has compatible dimensions w.r.t. A for reduction
@@ -291,8 +262,9 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
         # use mapreduce_impl, which is probably better tuned to achieve higher performance
         nslices = div(length(A), lsiz)
         ibase = first(LinearIndices(A))-1
-        for i = 1:nslices
-            @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
+        for i in eachindex(R)
+            r = op(@inbounds(R[i]), mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
+            @inbounds R[i] = r
             ibase += lsiz
         end
         return R
@@ -302,19 +274,20 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
     if reducedim1(R, A)
         # keep the accumulator as a local variable when reducing along the first dimension
         i1 = first(axes1(R))
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
-            r = R[i1,IR]
+            @inbounds r = R[i1,IR]
             @simd for i in axes(A, 1)
-                r = op(r, f(A[i, IA]))
+                r = op(r, f(@inbounds(A[i, IA])))
             end
-            R[i1,IR] = r
+            @inbounds R[i1,IR] = r
         end
     else
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
             @simd for i in axes(A, 1)
-                R[i,IR] = op(R[i,IR], f(A[i,IA]))
+                v = op(@inbounds(R[i,IR]), f(@inbounds(A[i,IA])))
+                @inbounds R[i,IR] = v
             end
         end
     end
@@ -354,10 +327,10 @@ julia> mapreduce(isodd, |, a, dims=1)
  1  1  1  1
 ```
 """
-mapreduce(f, op, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) =
+mapreduce(f, op, A::AbstractArrayOrBroadcasted; dims::D=:, init=_InitialValue()) where {D} =
     _mapreduce_dim(f, op, init, A, dims)
-mapreduce(f, op, A::AbstractArrayOrBroadcasted...; kw...) =
-    reduce(op, map(f, A...); kw...)
+mapreduce(f, op, A::AbstractArrayOrBroadcasted, B::AbstractArrayOrBroadcasted...; kw...) =
+    reduce(op, map(f, A, B...); kw...)
 
 _mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) =
     mapfoldl_impl(f, op, nt, A)
@@ -365,10 +338,10 @@ _mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) =
 _mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) =
     _mapreduce(f, op, IndexStyle(A), A)
 
-_mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, dims) =
+_mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, dims::D) where {D} =
     mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)
 
-_mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) =
+_mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims::D) where {D} =
     mapreducedim!(f, op, reducedim_init(f, op, A, dims), A)
 
 """
@@ -436,8 +409,8 @@ julia> count(<=(2), A, dims=2)
  0
 ```
 """
-count(A::AbstractArrayOrBroadcasted; dims=:, init=0) = count(identity, A; dims, init)
-count(f, A::AbstractArrayOrBroadcasted; dims=:, init=0) = _count(f, A, dims, init)
+count(A::AbstractArrayOrBroadcasted; dims::D=:, init=0) where {D} = count(identity, A; dims, init)
+count(f, A::AbstractArrayOrBroadcasted; dims::D=:, init=0) where {D} = _count(f, A, dims, init)
 
 _count(f, A::AbstractArrayOrBroadcasted, dims::Colon, init) = _simple_count(f, A, init)
 _count(f, A::AbstractArrayOrBroadcasted, dims, init) = mapreduce(_bool(f), add_sum, A; dims, init)
@@ -448,6 +421,8 @@ _count(f, A::AbstractArrayOrBroadcasted, dims, init) = mapreduce(_bool(f), add_s
 Count the number of elements in `A` for which `f` returns `true` over the
 singleton dimensions of `r`, writing the result into `r` in-place.
 
+$(_DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.5"
     inplace `count!` was added in Julia 1.5.
 
@@ -525,8 +500,8 @@ sum(f, A::AbstractArray; dims)
     sum!(r, A)
 
 Sum elements of `A` over the singleton dimensions of `r`, and write results to `r`.
-Note that since the sum! function is intended to operate without making any allocations,
-the target should not alias with the source.
+
+$(_DOCS_ALIASING_WARNING)
 
 # Examples
 ```jldoctest
@@ -601,6 +576,8 @@ prod(f, A::AbstractArray; dims)
 
 Multiply elements of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -678,6 +655,8 @@ maximum(f, A::AbstractArray; dims)
 
 Compute the maximum value of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -755,6 +734,8 @@ minimum(f, A::AbstractArray; dims)
 
 Compute the minimum value of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -820,6 +801,8 @@ extrema(f, A::AbstractArray; dims)
 
 Compute the minimum and maximum value of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.8"
     This method requires Julia 1.8 or later.
 
@@ -895,6 +878,8 @@ all(::Function, ::AbstractArray; dims)
 
 Test whether all values in `A` along the singleton dimensions of `r` are `true`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [true false; true false]
@@ -902,13 +887,13 @@ julia> A = [true false; true false]
  1  0
  1  0
 
-julia> all!([1; 1], A)
-2-element Vector{Int64}:
+julia> all!(Bool[1; 1], A)
+2-element Vector{Bool}:
  0
  0
 
-julia> all!([1 1], A)
-1×2 Matrix{Int64}:
+julia> all!(Bool[1 1], A)
+1×2 Matrix{Bool}:
  1  0
 ```
 """
@@ -968,6 +953,8 @@ any(::Function, ::AbstractArray; dims)
 Test whether any values in `A` along the singleton dimensions of `r` are `true`, and write
 results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [true false; true false]
@@ -975,13 +962,13 @@ julia> A = [true false; true false]
  1  0
  1  0
 
-julia> any!([1; 1], A)
-2-element Vector{Int64}:
+julia> any!(Bool[1; 1], A)
+2-element Vector{Bool}:
  1
  1
 
-julia> any!([1 1], A)
-1×2 Matrix{Int64}:
+julia> any!(Bool[1 1], A)
+1×2 Matrix{Bool}:
  1  0
 ```
 """
@@ -993,8 +980,8 @@ for (fname, _fname, op) in [(:sum,     :_sum,     :add_sum), (:prod,    :_prod,
     mapf = fname === :extrema ? :(ExtremaMap(f)) : :f
     @eval begin
         # User-facing methods with keyword arguments
-        @inline ($fname)(a::AbstractArray; dims=:, kw...) = ($_fname)(a, dims; kw...)
-        @inline ($fname)(f, a::AbstractArray; dims=:, kw...) = ($_fname)(f, a, dims; kw...)
+        @inline ($fname)(a::AbstractArray; dims::D=:, kw...) where {D} = ($_fname)(a, dims; kw...)
+        @inline ($fname)(f, a::AbstractArray; dims::D=:, kw...) where {D} = ($_fname)(f, a, dims; kw...)
 
         # Underlying implementations using dispatch
         ($_fname)(a, ::Colon; kw...) = ($_fname)(identity, a, :; kw...)
@@ -1002,16 +989,16 @@ for (fname, _fname, op) in [(:sum,     :_sum,     :add_sum), (:prod,    :_prod,
     end
 end
 
-any(a::AbstractArray; dims=:)              = _any(a, dims)
-any(f::Function, a::AbstractArray; dims=:) = _any(f, a, dims)
+any(a::AbstractArray; dims::D=:) where {D} = _any(a, dims)
+any(f::Function, a::AbstractArray; dims::D=:) where {D} = _any(f, a, dims)
 _any(a, ::Colon)                           = _any(identity, a, :)
-all(a::AbstractArray; dims=:)              = _all(a, dims)
-all(f::Function, a::AbstractArray; dims=:) = _all(f, a, dims)
+all(a::AbstractArray; dims::D=:) where {D} = _all(a, dims)
+all(f::Function, a::AbstractArray; dims::D=:) where {D} = _all(f, a, dims)
 _all(a, ::Colon)                           = _all(identity, a, :)
 
 for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod),
                     (:maximum, :max), (:minimum, :min),
-                    (:all, :&),       (:any, :|),
+                    (:all, :and_all), (:any, :or_any),
                     (:extrema, :_extrema_rf)]
     fname! = Symbol(fname, '!')
     _fname = Symbol('_', fname)
@@ -1021,8 +1008,8 @@ for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod),
             mapreducedim!($mapf, $(op), initarray!(r, $mapf, $(op), init, A), A)
         $(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(identity, r, A; init=init)
 
-        $(_fname)(A, dims; kw...)    = $(_fname)(identity, A, dims; kw...)
-        $(_fname)(f, A, dims; kw...) = mapreduce($mapf, $(op), A; dims=dims, kw...)
+        $(_fname)(A, dims::D; kw...) where {D} = $(_fname)(identity, A, dims; kw...)
+        $(_fname)(f, A, dims::D; kw...) where {D} = mapreduce($mapf, $(op), A; dims=dims, kw...)
     end
 end
 
@@ -1044,33 +1031,33 @@ function findminmax!(f, op, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
     zi = zero(eltype(ks))
     if reducedim1(Rval, A)
         i1 = first(axes1(Rval))
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
-            tmpRv = Rval[i1,IR]
-            tmpRi = Rind[i1,IR]
+            @inbounds tmpRv = Rval[i1,IR]
+            @inbounds tmpRi = Rind[i1,IR]
             for i in axes(A,1)
                 k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
+                tmpAv = f(@inbounds(A[i,IA]))
                 if tmpRi == zi || op(tmpRv, tmpAv)
                     tmpRv = tmpAv
                     tmpRi = k
                 end
                 y = iterate(ks, kss)
             end
-            Rval[i1,IR] = tmpRv
-            Rind[i1,IR] = tmpRi
+            @inbounds Rval[i1,IR] = tmpRv
+            @inbounds Rind[i1,IR] = tmpRi
         end
     else
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
             for i in axes(A, 1)
                 k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
-                tmpRv = Rval[i,IR]
-                tmpRi = Rind[i,IR]
+                tmpAv = f(@inbounds(A[i,IA]))
+                @inbounds tmpRv = Rval[i,IR]
+                @inbounds tmpRi = Rind[i,IR]
                 if tmpRi == zi || op(tmpRv, tmpAv)
-                    Rval[i,IR] = tmpAv
-                    Rind[i,IR] = k
+                    @inbounds Rval[i,IR] = tmpAv
+                    @inbounds Rind[i,IR] = k
                 end
                 y = iterate(ks, kss)
             end
@@ -1085,6 +1072,8 @@ end
 Find the minimum of `A` and the corresponding linear index along singleton
 dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 `NaN` is treated as less than all other values except `missing`.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
@@ -1111,8 +1100,8 @@ julia> findmin(A, dims=2)
 ([1.0; 3.0;;], CartesianIndex{2}[CartesianIndex(1, 1); CartesianIndex(2, 1);;])
 ```
 """
-findmin(A::AbstractArray; dims=:) = _findmin(A, dims)
-_findmin(A, dims) = _findmin(identity, A, dims)
+findmin(A::AbstractArray; dims::D=:) where {D} = _findmin(A, dims)
+_findmin(A, dims::D) where {D} = _findmin(identity, A, dims)
 
 """
     findmin(f, A; dims) -> (f(x), index)
@@ -1134,9 +1123,9 @@ julia> findmin(abs2, A, dims=2)
 ([1.0; 0.25;;], CartesianIndex{2}[CartesianIndex(1, 1); CartesianIndex(2, 1);;])
 ```
 """
-findmin(f, A::AbstractArray; dims=:) = _findmin(f, A, dims)
+findmin(f, A::AbstractArray; dims::D=:) where {D} = _findmin(f, A, dims)
 
-function _findmin(f, A, region)
+function _findmin(f, A, region::D) where {D}
     ri = reduced_indices0(A, region)
     if isempty(A)
         if prod(map(length, reduced_indices(A, region))) != 0
@@ -1156,6 +1145,8 @@ end
 Find the maximum of `A` and the corresponding linear index along singleton
 dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 `NaN` is treated as greater than all other values except `missing`.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
@@ -1182,8 +1173,8 @@ julia> findmax(A, dims=2)
 ([2.0; 4.0;;], CartesianIndex{2}[CartesianIndex(1, 2); CartesianIndex(2, 2);;])
 ```
 """
-findmax(A::AbstractArray; dims=:) = _findmax(A, dims)
-_findmax(A, dims) = _findmax(identity, A, dims)
+findmax(A::AbstractArray; dims::D=:) where {D} = _findmax(A, dims)
+_findmax(A, dims::D) where {D} = _findmax(identity, A, dims)
 
 """
     findmax(f, A; dims) -> (f(x), index)
@@ -1205,9 +1196,9 @@ julia> findmax(abs2, A, dims=2)
 ([1.0; 4.0;;], CartesianIndex{2}[CartesianIndex(1, 1); CartesianIndex(2, 2);;])
 ```
 """
-findmax(f, A::AbstractArray; dims=:) = _findmax(f, A, dims)
+findmax(f, A::AbstractArray; dims::D=:) where {D} = _findmax(f, A, dims)
 
-function _findmax(f, A, region)
+function _findmax(f, A, region::D) where {D}
     ri = reduced_indices0(A, region)
     if isempty(A)
         if prod(map(length, reduced_indices(A, region))) != 0
@@ -1256,7 +1247,7 @@ julia> argmin(A, dims=2)
  CartesianIndex(2, 1)
 ```
 """
-argmin(A::AbstractArray; dims=:) = findmin(A; dims=dims)[2]
+argmin(A::AbstractArray; dims::D=:) where {D} = findmin(A; dims=dims)[2]
 
 """
     argmax(A; dims) -> indices
@@ -1281,4 +1272,4 @@ julia> argmax(A, dims=2)
  CartesianIndex(2, 2)
 ```
 """
-argmax(A::AbstractArray; dims=:) = findmax(A; dims=dims)[2]
+argmax(A::AbstractArray; dims::D=:) where {D} = findmax(A; dims=dims)[2]
diff --git a/base/reflection.jl b/base/reflection.jl
index 05ffb3a6e9211..03e1d4c71e393 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -1,965 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# name and module reflection
-
-"""
-    parentmodule(m::Module) -> Module
-
-Get a module's enclosing `Module`. `Main` is its own parent.
-
-See also: [`names`](@ref), [`nameof`](@ref), [`fullname`](@ref), [`@__MODULE__`](@ref).
-
-# Examples
-```jldoctest
-julia> parentmodule(Main)
-Main
-
-julia> parentmodule(Base.Broadcast)
-Base
-```
-"""
-parentmodule(m::Module) = ccall(:jl_module_parent, Ref{Module}, (Any,), m)
-
-"""
-    moduleroot(m::Module) -> Module
-
-Find the root module of a given module. This is the first module in the chain of
-parent modules of `m` which is either a registered root module or which is its
-own parent module.
-"""
-function moduleroot(m::Module)
-    while true
-        is_root_module(m) && return m
-        p = parentmodule(m)
-        p === m && return m
-        m = p
-    end
-end
-
-"""
-    @__MODULE__ -> Module
-
-Get the `Module` of the toplevel eval,
-which is the `Module` code is currently being read from.
-"""
-macro __MODULE__()
-    return __module__
-end
-
-"""
-    fullname(m::Module)
-
-Get the fully-qualified name of a module as a tuple of symbols. For example,
-
-# Examples
-```jldoctest
-julia> fullname(Base.Iterators)
-(:Base, :Iterators)
-
-julia> fullname(Main)
-(:Main,)
-```
-"""
-function fullname(m::Module)
-    mn = nameof(m)
-    if m === Main || m === Base || m === Core
-        return (mn,)
-    end
-    mp = parentmodule(m)
-    if mp === m
-        return (mn,)
-    end
-    return (fullname(mp)..., mn)
-end
-
-"""
-    names(x::Module; all::Bool = false, imported::Bool = false)
-
-Get an array of the names exported by a `Module`, excluding deprecated names.
-If `all` is true, then the list also includes non-exported names defined in the module,
-deprecated names, and compiler-generated names.
-If `imported` is true, then names explicitly imported from other modules
-are also included.
-
-As a special case, all names defined in `Main` are considered \"exported\",
-since it is not idiomatic to explicitly export names from `Main`.
-
-See also: [`@locals`](@ref Base.@locals), [`@__MODULE__`](@ref).
-"""
-names(m::Module; all::Bool = false, imported::Bool = false) =
-    sort!(unsorted_names(m; all, imported))
-unsorted_names(m::Module; all::Bool = false, imported::Bool = false) =
-    ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported)
-
-isexported(m::Module, s::Symbol) = ccall(:jl_module_exports_p, Cint, (Any, Any), m, s) != 0
-isdeprecated(m::Module, s::Symbol) = ccall(:jl_is_binding_deprecated, Cint, (Any, Any), m, s) != 0
-isbindingresolved(m::Module, var::Symbol) = ccall(:jl_binding_resolved_p, Cint, (Any, Any), m, var) != 0
-
-function binding_module(m::Module, s::Symbol)
-    p = ccall(:jl_get_module_of_binding, Ptr{Cvoid}, (Any, Any), m, s)
-    p == C_NULL && return m
-    return unsafe_pointer_to_objref(p)::Module
-end
-
-const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
-
-function _fieldnames(@nospecialize t)
-    if t.name === _NAMEDTUPLE_NAME
-        if t.parameters[1] isa Tuple
-            return t.parameters[1]
-        else
-            throw(ArgumentError("type does not have definite field names"))
-        end
-    end
-    return t.name.names
-end
-
-"""
-    fieldname(x::DataType, i::Integer)
-
-Get the name of field `i` of a `DataType`.
-
-# Examples
-```jldoctest
-julia> fieldname(Rational, 1)
-:num
-
-julia> fieldname(Rational, 2)
-:den
-```
-"""
-function fieldname(t::DataType, i::Integer)
-    throw_not_def_field() = throw(ArgumentError("type does not have definite field names"))
-    function throw_field_access(t, i, n_fields)
-        field_label = n_fields == 1 ? "field" : "fields"
-        throw(ArgumentError("Cannot access field $i since type $t only has $n_fields $field_label."))
-    end
-    throw_need_pos_int(i) = throw(ArgumentError("Field numbers must be positive integers. $i is invalid."))
-
-    isabstracttype(t) && throw_not_def_field()
-    names = _fieldnames(t)
-    n_fields = length(names)::Int
-    i > n_fields && throw_field_access(t, i, n_fields)
-    i < 1 && throw_need_pos_int(i)
-    return @inbounds names[i]::Symbol
-end
-
-fieldname(t::UnionAll, i::Integer) = fieldname(unwrap_unionall(t), i)
-fieldname(t::Type{<:Tuple}, i::Integer) =
-    i < 1 || i > fieldcount(t) ? throw(BoundsError(t, i)) : Int(i)
-
-"""
-    fieldnames(x::DataType)
-
-Get a tuple with the names of the fields of a `DataType`.
-
-See also [`propertynames`](@ref), [`hasfield`](@ref).
-
-# Examples
-```jldoctest
-julia> fieldnames(Rational)
-(:num, :den)
-
-julia> fieldnames(typeof(1+im))
-(:re, :im)
-```
-"""
-fieldnames(t::DataType) = (fieldcount(t); # error check to make sure type is specific enough
-                           (_fieldnames(t)...,))::Tuple{Vararg{Symbol}}
-fieldnames(t::UnionAll) = fieldnames(unwrap_unionall(t))
-fieldnames(::Core.TypeofBottom) =
-    throw(ArgumentError("The empty type does not have field names since it does not have instances."))
-fieldnames(t::Type{<:Tuple}) = ntuple(identity, fieldcount(t))
-
-"""
-    hasfield(T::Type, name::Symbol)
-
-Return a boolean indicating whether `T` has `name` as one of its own fields.
-
-See also [`fieldnames`](@ref), [`fieldcount`](@ref), [`hasproperty`](@ref).
-
-!!! compat "Julia 1.2"
-     This function requires at least Julia 1.2.
-
-# Examples
-```jldoctest
-julia> struct Foo
-            bar::Int
-       end
-
-julia> hasfield(Foo, :bar)
-true
-
-julia> hasfield(Foo, :x)
-false
-```
-"""
-hasfield(T::Type, name::Symbol) = fieldindex(T, name, false) > 0
-
-"""
-    nameof(t::DataType) -> Symbol
-
-Get the name of a (potentially `UnionAll`-wrapped) `DataType` (without its parent module)
-as a symbol.
-
-# Examples
-```jldoctest
-julia> module Foo
-           struct S{T}
-           end
-       end
-Foo
-
-julia> nameof(Foo.S{T} where T)
-:S
-```
-"""
-nameof(t::DataType) = t.name.name
-nameof(t::UnionAll) = nameof(unwrap_unionall(t))::Symbol
-
-"""
-    parentmodule(t::DataType) -> Module
-
-Determine the module containing the definition of a (potentially `UnionAll`-wrapped) `DataType`.
-
-# Examples
-```jldoctest
-julia> module Foo
-           struct Int end
-       end
-Foo
-
-julia> parentmodule(Int)
-Core
-
-julia> parentmodule(Foo.Int)
-Foo
-```
-"""
-parentmodule(t::DataType) = t.name.module
-parentmodule(t::UnionAll) = parentmodule(unwrap_unionall(t))
-
-"""
-    isconst(m::Module, s::Symbol) -> Bool
-
-Determine whether a global is declared `const` in a given module `m`.
-"""
-isconst(m::Module, s::Symbol) =
-    ccall(:jl_is_const, Cint, (Any, Any), m, s) != 0
-
-function isconst(g::GlobalRef)
-    return ccall(:jl_globalref_is_const, Cint, (Any,), g) != 0
-end
-
-"""
-    isconst(t::DataType, s::Union{Int,Symbol}) -> Bool
-
-Determine whether a field `s` is declared `const` in a given type `t`.
-"""
-function isconst(@nospecialize(t::Type), s::Symbol)
-    t = unwrap_unionall(t)
-    isa(t, DataType) || return false
-    return isconst(t, fieldindex(t, s, false))
-end
-function isconst(@nospecialize(t::Type), s::Int)
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    isa(t, DataType) || return false # uncertain
-    ismutabletype(t) || return true # immutable structs are always const
-    1 <= s <= length(t.name.names) || return true # OOB reads are "const" since they always throw
-    constfields = t.name.constfields
-    constfields === C_NULL && return false
-    s -= 1
-    return unsafe_load(Ptr{UInt32}(constfields), 1 + s÷32) & (1 << (s%32)) != 0
-end
-
-"""
-    isfieldatomic(t::DataType, s::Union{Int,Symbol}) -> Bool
-
-Determine whether a field `s` is declared `@atomic` in a given type `t`.
-"""
-function isfieldatomic(@nospecialize(t::Type), s::Symbol)
-    t = unwrap_unionall(t)
-    isa(t, DataType) || return false
-    return isfieldatomic(t, fieldindex(t, s, false))
-end
-function isfieldatomic(@nospecialize(t::Type), s::Int)
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    isa(t, DataType) || return false # uncertain
-    ismutabletype(t) || return false # immutable structs are never atomic
-    1 <= s <= length(t.name.names) || return false # OOB reads are not atomic (they always throw)
-    atomicfields = t.name.atomicfields
-    atomicfields === C_NULL && return false
-    s -= 1
-    return unsafe_load(Ptr{UInt32}(atomicfields), 1 + s÷32) & (1 << (s%32)) != 0
-end
-
-"""
-    @locals()
-
-Construct a dictionary of the names (as symbols) and values of all local
-variables defined as of the call site.
-
-!!! compat "Julia 1.1"
-    This macro requires at least Julia 1.1.
-
-# Examples
-```jldoctest
-julia> let x = 1, y = 2
-           Base.@locals
-       end
-Dict{Symbol, Any} with 2 entries:
-  :y => 2
-  :x => 1
-
-julia> function f(x)
-           local y
-           show(Base.@locals); println()
-           for i = 1:1
-               show(Base.@locals); println()
-           end
-           y = 2
-           show(Base.@locals); println()
-           nothing
-       end;
-
-julia> f(42)
-Dict{Symbol, Any}(:x => 42)
-Dict{Symbol, Any}(:i => 1, :x => 42)
-Dict{Symbol, Any}(:y => 2, :x => 42)
-```
-"""
-macro locals()
-    return Expr(:locals)
-end
-
-# concrete datatype predicates
-
-datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Core.SimpleVector, (Any,), x)
-
-struct DataTypeLayout
-    size::UInt32
-    nfields::UInt32
-    npointers::UInt32
-    firstptr::Int32
-    alignment::UInt16
-    flags::UInt16
-    # haspadding : 1;
-    # fielddesc_type : 2;
-end
-
-"""
-    Base.datatype_alignment(dt::DataType) -> Int
-
-Memory allocation minimum alignment for instances of this type.
-Can be called on any `isconcretetype`.
-"""
-function datatype_alignment(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment
-    return Int(alignment)
-end
-
-function uniontype_layout(@nospecialize T::Type)
-    sz = RefValue{Csize_t}(0)
-    algn = RefValue{Csize_t}(0)
-    isinline = ccall(:jl_islayout_inline, Cint, (Any, Ptr{Csize_t}, Ptr{Csize_t}), T, sz, algn) != 0
-    (isinline, Int(sz[]), Int(algn[]))
-end
-
-LLT_ALIGN(x, sz) = (x + sz - 1) & -sz
-
-# amount of total space taken by T when stored in a container
-function aligned_sizeof(@nospecialize T::Type)
-    @_foldable_meta
-    if isa(T, Union)
-        if allocatedinline(T)
-            # NOTE this check is equivalent to `isbitsunion(T)`, we can improve type
-            # inference in the second branch with the outer `isa(T, Union)` check
-            _, sz, al = uniontype_layout(T)
-            return LLT_ALIGN(sz, al)
-        end
-    elseif allocatedinline(T)
-        al = datatype_alignment(T)
-        return LLT_ALIGN(Core.sizeof(T), al)
-    end
-    return Core.sizeof(Ptr{Cvoid})
-end
-
-gc_alignment(sz::Integer) = Int(ccall(:jl_alignment, Cint, (Csize_t,), sz))
-gc_alignment(T::Type) = gc_alignment(Core.sizeof(T))
-
-"""
-    Base.datatype_haspadding(dt::DataType) -> Bool
-
-Return whether the fields of instances of this type are packed in memory,
-with no intervening padding bytes.
-Can be called on any `isconcretetype`.
-"""
-function datatype_haspadding(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
-    return flags & 1 == 1
-end
-
-"""
-    Base.datatype_nfields(dt::DataType) -> Bool
-
-Return the number of fields known to this datatype's layout.
-Can be called on any `isconcretetype`.
-"""
-function datatype_nfields(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).nfields
-end
-
-"""
-    Base.datatype_pointerfree(dt::DataType) -> Bool
-
-Return whether instances of this type can contain references to gc-managed memory.
-Can be called on any `isconcretetype`.
-"""
-function datatype_pointerfree(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    npointers = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers
-    return npointers == 0
-end
-
-"""
-    Base.datatype_fielddesc_type(dt::DataType) -> Int
-
-Return the size in bytes of each field-description entry in the layout array,
-located at `(dt.layout + sizeof(DataTypeLayout))`.
-Can be called on any `isconcretetype`.
-
-See also [`fieldoffset`](@ref).
-"""
-function datatype_fielddesc_type(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
-    return (flags >> 1) & 3
-end
-
-# For type stability, we only expose a single struct that describes everything
-struct FieldDesc
-    isforeign::Bool
-    isptr::Bool
-    size::UInt32
-    offset::UInt32
-end
-
-struct FieldDescStorage{T}
-    ptrsize::T
-    offset::T
-end
-FieldDesc(fd::FieldDescStorage{T}) where {T} =
-    FieldDesc(false, fd.ptrsize & 1 != 0,
-              fd.ptrsize >> 1, fd.offset)
-
-struct DataTypeFieldDesc
-    dt::DataType
-    function DataTypeFieldDesc(dt::DataType)
-        dt.layout == C_NULL && throw(UndefRefError())
-        new(dt)
-    end
-end
-
-function getindex(dtfd::DataTypeFieldDesc, i::Int)
-    layout_ptr = convert(Ptr{DataTypeLayout}, dtfd.dt.layout)
-    fd_ptr = layout_ptr + sizeof(DataTypeLayout)
-    layout = unsafe_load(layout_ptr)
-    fielddesc_type = (layout.flags >> 1) & 3
-    nfields = layout.nfields
-    @boundscheck ((1 <= i <= nfields) || throw(BoundsError(dtfd, i)))
-    if fielddesc_type == 0
-        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt8}}(fd_ptr), i))
-    elseif fielddesc_type == 1
-        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt16}}(fd_ptr), i))
-    elseif fielddesc_type == 2
-        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt32}}(fd_ptr), i))
-    else
-        # fielddesc_type == 3
-        return FieldDesc(true, true, 0, 0)
-    end
-end
-
-"""
-    ismutable(v) -> Bool
-
-Return `true` if and only if value `v` is mutable.  See [Mutable Composite Types](@ref)
-for a discussion of immutability. Note that this function works on values, so if you
-give it a `DataType`, it will tell you that a value of the type is mutable.
-
-!!! note
-    For technical reasons, `ismutable` returns `true` for values of certain special types
-    (for example `String` and `Symbol`) even though they cannot be mutated in a permissible way.
-
-See also [`isbits`](@ref), [`isstructtype`](@ref).
-
-# Examples
-```jldoctest
-julia> ismutable(1)
-false
-
-julia> ismutable([1,2])
-true
-```
-
-!!! compat "Julia 1.5"
-    This function requires at least Julia 1.5.
-"""
-ismutable(@nospecialize(x)) = (@_total_meta; typeof(x).name.flags & 0x2 == 0x2)
-
-"""
-    ismutabletype(T) -> Bool
-
-Determine whether type `T` was declared as a mutable type
-(i.e. using `mutable struct` keyword).
-
-!!! compat "Julia 1.7"
-    This function requires at least Julia 1.7.
-"""
-function ismutabletype(@nospecialize t)
-    @_total_meta
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    return isa(t, DataType) && t.name.flags & 0x2 == 0x2
-end
-
-"""
-    isstructtype(T) -> Bool
-
-Determine whether type `T` was declared as a struct type
-(i.e. using the `struct` or `mutable struct` keyword).
-"""
-function isstructtype(@nospecialize t)
-    @_total_meta
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    isa(t, DataType) || return false
-    return !isprimitivetype(t) && !isabstracttype(t)
-end
-
-"""
-    isprimitivetype(T) -> Bool
-
-Determine whether type `T` was declared as a primitive type
-(i.e. using the `primitive type` syntax).
-"""
-function isprimitivetype(@nospecialize t)
-    @_total_meta
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    isa(t, DataType) || return false
-    return (t.flags & 0x0080) == 0x0080
-end
-
-"""
-    isbitstype(T)
-
-Return `true` if type `T` is a "plain data" type,
-meaning it is immutable and contains no references to other values,
-only `primitive` types and other `isbitstype` types.
-Typical examples are numeric types such as [`UInt8`](@ref),
-[`Float64`](@ref), and [`Complex{Float64}`](@ref).
-This category of types is significant since they are valid as type parameters,
-may not track [`isdefined`](@ref) / [`isassigned`](@ref) status,
-and have a defined layout that is compatible with C.
-
-See also [`isbits`](@ref), [`isprimitivetype`](@ref), [`ismutable`](@ref).
-
-# Examples
-```jldoctest
-julia> isbitstype(Complex{Float64})
-true
-
-julia> isbitstype(Complex)
-false
-```
-"""
-isbitstype(@nospecialize t) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0008) == 0x0008)
-
-"""
-    isbits(x)
-
-Return `true` if `x` is an instance of an [`isbitstype`](@ref) type.
-"""
-isbits(@nospecialize x) = isbitstype(typeof(x))
-
-"""
-    objectid(x) -> UInt
-
-Get a hash value for `x` based on object identity.
-
-If `x === y` then `objectid(x) == objectid(y)`, and usually when `x !== y`, `objectid(x) != objectid(y)`.
-
-See also [`hash`](@ref), [`IdDict`](@ref).
-"""
-function objectid(x)
-    # objectid is foldable iff it isn't a pointer.
-    if isidentityfree(typeof(x))
-        return _foldable_objectid(x)
-    end
-    return _objectid(x)
-end
-function _foldable_objectid(@nospecialize(x))
-    @_foldable_meta
-    _objectid(x)
-end
-_objectid(@nospecialize(x)) = ccall(:jl_object_id, UInt, (Any,), x)
-
-"""
-    isdispatchtuple(T)
-
-Determine whether type `T` is a tuple "leaf type",
-meaning it could appear as a type signature in dispatch
-and has no subtypes (or supertypes) which could appear in a call.
-"""
-isdispatchtuple(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0004) == 0x0004)
-
-datatype_ismutationfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0100) == 0x0100)
-
-"""
-    ismutationfree(T)
-
-Determine whether type `T` is mutation free in the sense that no mutable memory
-is reachable from this type (either in the type itself) or through any fields.
-Note that the type itself need not be immutable. For example, an empty mutable
-type is `ismutabletype`, but also `ismutationfree`.
-"""
-function ismutationfree(@nospecialize(t))
-    t = unwrap_unionall(t)
-    if isa(t, DataType)
-        return datatype_ismutationfree(t)
-    elseif isa(t, Union)
-        return ismutationfree(t.a) && ismutationfree(t.b)
-    end
-    # TypeVar, etc.
-    return false
-end
-
-datatype_isidentityfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0200) == 0x0200)
-
-"""
-    isidentityfree(T)
-
-Determine whether type `T` is identity free in the sense that this type or any
-reachable through its fields has non-content-based identity.
-"""
-function isidentityfree(@nospecialize(t))
-    t = unwrap_unionall(t)
-    if isa(t, DataType)
-        return datatype_isidentityfree(t)
-    elseif isa(t, Union)
-        return isidentityfree(t.a) && isidentityfree(t.b)
-    end
-    # TypeVar, etc.
-    return false
-end
-
-iskindtype(@nospecialize t) = (t === DataType || t === UnionAll || t === Union || t === typeof(Bottom))
-isconcretedispatch(@nospecialize t) = isconcretetype(t) && !iskindtype(t)
-has_free_typevars(@nospecialize(t)) = ccall(:jl_has_free_typevars, Cint, (Any,), t) != 0
-
-# equivalent to isa(v, Type) && isdispatchtuple(Tuple{v}) || v === Union{}
-# and is thus perhaps most similar to the old (pre-1.0) `isleaftype` query
-function isdispatchelem(@nospecialize v)
-    return (v === Bottom) || (v === typeof(Bottom)) || isconcretedispatch(v) ||
-        (isType(v) && !has_free_typevars(v))
-end
-
-const _TYPE_NAME = Type.body.name
-isType(@nospecialize t) = isa(t, DataType) && t.name === _TYPE_NAME
-
-"""
-    isconcretetype(T)
-
-Determine whether type `T` is a concrete type, meaning it could have direct instances
-(values `x` such that `typeof(x) === T`).
-
-See also: [`isbits`](@ref), [`isabstracttype`](@ref), [`issingletontype`](@ref).
-
-# Examples
-```jldoctest
-julia> isconcretetype(Complex)
-false
-
-julia> isconcretetype(Complex{Float32})
-true
-
-julia> isconcretetype(Vector{Complex})
-true
-
-julia> isconcretetype(Vector{Complex{Float32}})
-true
-
-julia> isconcretetype(Union{})
-false
-
-julia> isconcretetype(Union{Int,String})
-false
-```
-"""
-isconcretetype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0002) == 0x0002)
-
-"""
-    isabstracttype(T)
-
-Determine whether type `T` was declared as an abstract type
-(i.e. using the `abstract type` syntax).
-
-# Examples
-```jldoctest
-julia> isabstracttype(AbstractArray)
-true
-
-julia> isabstracttype(Vector)
-false
-```
-"""
-function isabstracttype(@nospecialize(t))
-    @_total_meta
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    return isa(t, DataType) && (t.name.flags & 0x1) == 0x1
-end
-
-"""
-    Base.issingletontype(T)
-
-Determine whether type `T` has exactly one possible instance; for example, a
-struct type with no fields.
-"""
-issingletontype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && isdefined(t, :instance))
-
-"""
-    typeintersect(T::Type, S::Type)
-
-Compute a type that contains the intersection of `T` and `S`. Usually this will be the
-smallest such type or one close to it.
-"""
-typeintersect(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_intersection, Any, (Any, Any), a::Type, b::Type))
-
-morespecific(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_morespecific, Cint, (Any, Any), a::Type, b::Type) != 0)
-
-"""
-    fieldoffset(type, i)
-
-The byte offset of field `i` of a type relative to the data start. For example, we could
-use it in the following manner to summarize information about a struct:
-
-```jldoctest
-julia> structinfo(T) = [(fieldoffset(T,i), fieldname(T,i), fieldtype(T,i)) for i = 1:fieldcount(T)];
-
-julia> structinfo(Base.Filesystem.StatStruct)
-13-element Vector{Tuple{UInt64, Symbol, Type}}:
- (0x0000000000000000, :desc, Union{RawFD, String})
- (0x0000000000000008, :device, UInt64)
- (0x0000000000000010, :inode, UInt64)
- (0x0000000000000018, :mode, UInt64)
- (0x0000000000000020, :nlink, Int64)
- (0x0000000000000028, :uid, UInt64)
- (0x0000000000000030, :gid, UInt64)
- (0x0000000000000038, :rdev, UInt64)
- (0x0000000000000040, :size, Int64)
- (0x0000000000000048, :blksize, Int64)
- (0x0000000000000050, :blocks, Int64)
- (0x0000000000000058, :mtime, Float64)
- (0x0000000000000060, :ctime, Float64)
-```
-"""
-fieldoffset(x::DataType, idx::Integer) = (@_foldable_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx))
-
-"""
-    fieldtype(T, name::Symbol | index::Int)
-
-Determine the declared type of a field (specified by name or index) in a composite DataType `T`.
-
-# Examples
-```jldoctest
-julia> struct Foo
-           x::Int64
-           y::String
-       end
-
-julia> fieldtype(Foo, :x)
-Int64
-
-julia> fieldtype(Foo, 2)
-String
-```
-"""
-fieldtype
-
-"""
-    Base.fieldindex(T, name::Symbol, err:Bool=true)
-
-Get the index of a named field, throwing an error if the field does not exist (when err==true)
-or returning 0 (when err==false).
-
-# Examples
-```jldoctest
-julia> struct Foo
-           x::Int64
-           y::String
-       end
-
-julia> Base.fieldindex(Foo, :z)
-ERROR: type Foo has no field z
-Stacktrace:
-[...]
-
-julia> Base.fieldindex(Foo, :z, false)
-0
-```
-"""
-function fieldindex(T::DataType, name::Symbol, err::Bool=true)
-    return err ? _fieldindex_maythrow(T, name) : _fieldindex_nothrow(T, name)
-end
-
-function _fieldindex_maythrow(T::DataType, name::Symbol)
-    @_foldable_meta
-    @noinline
-    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, true)+1)
-end
-
-function _fieldindex_nothrow(T::DataType, name::Symbol)
-    @_total_meta
-    @noinline
-    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, false)+1)
-end
-
-function fieldindex(t::UnionAll, name::Symbol, err::Bool=true)
-    t = argument_datatype(t)
-    if t === nothing
-        err && throw(ArgumentError("type does not have definite fields"))
-        return 0
-    end
-    return fieldindex(t, name, err)
-end
-
-function argument_datatype(@nospecialize t)
-    @_total_meta
-    @noinline
-    return ccall(:jl_argument_datatype, Any, (Any,), t)::Union{Nothing,DataType}
-end
-
-function datatype_fieldcount(t::DataType)
-    if t.name === _NAMEDTUPLE_NAME
-        names, types = t.parameters[1], t.parameters[2]
-        if names isa Tuple
-            return length(names)
-        end
-        if types isa DataType && types <: Tuple
-            return fieldcount(types)
-        end
-        return nothing
-    elseif isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
-        return nothing
-    end
-    if isdefined(t, :types)
-        return length(t.types)
-    end
-    return length(t.name.names)
-end
-
-"""
-    fieldcount(t::Type)
-
-Get the number of fields that an instance of the given type would have.
-An error is thrown if the type is too abstract to determine this.
-"""
-function fieldcount(@nospecialize t)
-    @_foldable_meta
-    if t isa UnionAll || t isa Union
-        t = argument_datatype(t)
-        if t === nothing
-            throw(ArgumentError("type does not have a definite number of fields"))
-        end
-    elseif t === Union{}
-        throw(ArgumentError("The empty type does not have a well-defined number of fields since it does not have instances."))
-    end
-    if !(t isa DataType)
-        throw(TypeError(:fieldcount, DataType, t))
-    end
-    fcount = datatype_fieldcount(t)
-    if fcount === nothing
-        throw(ArgumentError("type does not have a definite number of fields"))
-    end
-    return fcount
-end
-
-"""
-    fieldtypes(T::Type)
-
-The declared types of all fields in a composite DataType `T` as a tuple.
-
-!!! compat "Julia 1.1"
-    This function requires at least Julia 1.1.
-
-# Examples
-```jldoctest
-julia> struct Foo
-           x::Int64
-           y::String
-       end
-
-julia> fieldtypes(Foo)
-(Int64, String)
-```
-"""
-fieldtypes(T::Type) = (@_foldable_meta; ntupleany(i -> fieldtype(T, i), fieldcount(T)))
-
-# return all instances, for types that can be enumerated
-
-"""
-    instances(T::Type)
-
-Return a collection of all instances of the given type, if applicable. Mostly used for
-enumerated types (see `@enum`).
-
-# Example
-```jldoctest
-julia> @enum Color red blue green
-
-julia> instances(Color)
-(red, blue, green)
-```
-"""
-function instances end
-
-function to_tuple_type(@nospecialize(t))
-    if isa(t, Tuple) || isa(t, AbstractArray) || isa(t, SimpleVector)
-        t = Tuple{t...}
-    end
-    if isa(t, Type) && t <: Tuple
-        for p in (unwrap_unionall(t)::DataType).parameters
-            if isa(p, Core.TypeofVararg)
-                p = unwrapva(p)
-            end
-            if !(isa(p, Type) || isa(p, TypeVar))
-                error("argument tuple type must contain only types")
-            end
-        end
-    else
-        error("expected tuple type")
-    end
-    t
-end
-
-function signature_type(@nospecialize(f), @nospecialize(argtypes))
-    argtypes = to_tuple_type(argtypes)
-    ft = Core.Typeof(f)
-    u = unwrap_unionall(argtypes)::DataType
-    return rewrap_unionall(Tuple{ft, u.parameters...}, argtypes)
-end
-
 """
     code_lowered(f, types; generated=true, debuginfo=:default)
 
@@ -973,10 +13,10 @@ yielded by expanding the generators.
 
 The keyword `debuginfo` controls the amount of code metadata present in the output.
 
-Note that an error will be thrown if `types` are not leaf types when `generated` is
+Note that an error will be thrown if `types` are not concrete types when `generated` is
 `true` and any of the corresponding methods are an `@generated` method.
 """
-function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=true, debuginfo::Symbol=:default)
+function code_lowered(@nospecialize(argtypes::Union{Tuple,Type{<:Tuple}}); generated::Bool=true, debuginfo::Symbol=:default)
     if @isdefined(IRShow)
         debuginfo = IRShow.debuginfo(debuginfo)
     elseif debuginfo === :default
@@ -986,338 +26,192 @@ function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
     world = get_world_counter()
-    return map(method_instances(f, t, world)) do m
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    ret = CodeInfo[]
+    for m in method_instances(argtypes, world)
         if generated && hasgenerator(m)
             if may_invoke_generator(m)
-                return ccall(:jl_code_for_staged, Any, (Any, UInt), m, world)::CodeInfo
+                code = ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{Cvoid}), m, world, C_NULL)
             else
                 error("Could not expand generator for `@generated` method ", m, ". ",
                       "This can happen if the provided argument types (", t, ") are ",
-                      "not leaf types, but the `generated` argument is `true`.")
+                      "not concrete types, but the `generated` argument is `true`.")
             end
+        else
+            code = uncompressed_ir(m.def::Method)
+            debuginfo === :none && remove_linenums!(code)
         end
-        code = uncompressed_ir(m.def::Method)
-        debuginfo === :none && remove_linenums!(code)
-        return code
-    end
-end
-
-hasgenerator(m::Method) = isdefined(m, :generator)
-hasgenerator(m::Core.MethodInstance) = hasgenerator(m.def::Method)
-
-# low-level method lookup functions used by the compiler
-
-unionlen(x::Union) = unionlen(x.a) + unionlen(x.b)
-unionlen(@nospecialize(x)) = 1
-
-_uniontypes(x::Union, ts) = (_uniontypes(x.a,ts); _uniontypes(x.b,ts); ts)
-_uniontypes(@nospecialize(x), ts) = (push!(ts, x); ts)
-uniontypes(@nospecialize(x)) = _uniontypes(x, Any[])
-
-function _methods(@nospecialize(f), @nospecialize(t), lim::Int, world::UInt)
-    tt = signature_type(f, t)
-    return _methods_by_ftype(tt, lim, world)
-end
-
-function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt)
-    return _methods_by_ftype(t, nothing, lim, world)
-end
-function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt)
-    return _methods_by_ftype(t, mt, lim, world, false, RefValue{UInt}(typemin(UInt)), RefValue{UInt}(typemax(UInt)), Ptr{Int32}(C_NULL))
-end
-function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt, ambig::Bool, min::Ref{UInt}, max::Ref{UInt}, has_ambig::Ref{Int32})
-    return ccall(:jl_matching_methods, Any, (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, mt, lim, ambig, world, min, max, has_ambig)::Union{Vector{Any},Nothing}
-end
-
-# high-level, more convenient method lookup functions
-
-# type for reflecting and pretty-printing a subset of methods
-mutable struct MethodList <: AbstractArray{Method,1}
-    ms::Array{Method,1}
-    mt::Core.MethodTable
-end
-
-size(m::MethodList) = size(m.ms)
-getindex(m::MethodList, i::Integer) = m.ms[i]
-
-function MethodList(mt::Core.MethodTable)
-    ms = Method[]
-    visit(mt) do m
-        push!(ms, m)
-    end
-    return MethodList(ms, mt)
-end
-
-"""
-    methods(f, [types], [module])
-
-Return the method table for `f`.
-
-If `types` is specified, return an array of methods whose types match.
-If `module` is specified, return an array of methods defined in that module.
-A list of modules can also be specified as an array.
-
-!!! compat "Julia 1.4"
-    At least Julia 1.4 is required for specifying a module.
-
-See also: [`which`](@ref) and `@which`.
-"""
-function methods(@nospecialize(f), @nospecialize(t),
-                 mod::Union{Tuple{Module},AbstractArray{Module},Nothing}=nothing)
-    world = get_world_counter()
-    # Lack of specialization => a comprehension triggers too many invalidations via _collect, so collect the methods manually
-    ms = Method[]
-    for m in _methods(f, t, -1, world)::Vector
-        m = m::Core.MethodMatch
-        (mod === nothing || parentmodule(m.method) ∈ mod) && push!(ms, m.method)
+        push!(ret, code)
     end
-    MethodList(ms, typeof(f).name.mt)
+    return ret
 end
-methods(@nospecialize(f), @nospecialize(t), mod::Module) = methods(f, t, (mod,))
 
-function methods_including_ambiguous(@nospecialize(f), @nospecialize(t))
+function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=true, debuginfo::Symbol=:default)
     tt = signature_type(f, t)
-    world = get_world_counter()
-    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
-        error("code reflection cannot be used from generated functions")
-    min = RefValue{UInt}(typemin(UInt))
-    max = RefValue{UInt}(typemax(UInt))
-    ms = _methods_by_ftype(tt, nothing, -1, world, true, min, max, Ptr{Int32}(C_NULL))::Vector
-    return MethodList(Method[(m::Core.MethodMatch).method for m in ms], typeof(f).name.mt)
-end
-
-function methods(@nospecialize(f),
-                 mod::Union{Module,AbstractArray{Module},Nothing}=nothing)
-    # return all matches
-    return methods(f, Tuple{Vararg{Any}}, mod)
+    return code_lowered(tt; generated, debuginfo)
 end
 
-function visit(f, mt::Core.MethodTable)
-    mt.defs !== nothing && visit(f, mt.defs)
-    nothing
-end
-function visit(f, mc::Core.TypeMapLevel)
-    function avisit(f, e::Array{Any,1})
-        for i in 2:2:length(e)
-            isassigned(e, i) || continue
-            ei = e[i]
-            if ei isa Vector{Any}
-                for j in 2:2:length(ei)
-                    isassigned(ei, j) || continue
-                    visit(f, ei[j])
-                end
-            else
-                visit(f, ei)
-            end
-        end
-    end
-    if mc.targ !== nothing
-        avisit(f, mc.targ::Vector{Any})
-    end
-    if mc.arg1 !== nothing
-        avisit(f, mc.arg1::Vector{Any})
-    end
-    if mc.tname !== nothing
-        avisit(f, mc.tname::Vector{Any})
-    end
-    if mc.name1 !== nothing
-        avisit(f, mc.name1::Vector{Any})
-    end
-    mc.list !== nothing && visit(f, mc.list)
-    mc.any !== nothing && visit(f, mc.any)
-    nothing
-end
-function visit(f, d::Core.TypeMapEntry)
-    while d !== nothing
-        f(d.func)
-        d = d.next
-    end
-    nothing
-end
-struct MethodSpecializations
-    specializations::Union{Nothing, Core.MethodInstance, Core.SimpleVector}
-end
-"""
-    specializations(m::Method) → itr
-
-Return an iterator `itr` of all compiler-generated specializations of `m`.
-"""
-specializations(m::Method) = MethodSpecializations(isdefined(m, :specializations) ? m.specializations : nothing)
-function iterate(specs::MethodSpecializations)
-    s = specs.specializations
-    s === nothing && return nothing
-    isa(s, Core.MethodInstance) && return (s, nothing)
-    return iterate(specs, 0)
-end
-iterate(specs::MethodSpecializations, ::Nothing) = nothing
-function iterate(specs::MethodSpecializations, i::Int)
-    s = specs.specializations::Core.SimpleVector
-    n = length(s)
-    i >= n && return nothing
-    item = nothing
-    while i < n && item === nothing
-        item = s[i+=1]
-    end
-    item === nothing && return nothing
-    return (item, i)
-end
-length(specs::MethodSpecializations) = count(Returns(true), specs)
-
-function length(mt::Core.MethodTable)
-    n = 0
-    visit(mt) do m
-        n += 1
-    end
-    return n::Int
-end
-isempty(mt::Core.MethodTable) = (mt.defs === nothing)
-
-uncompressed_ir(m::Method) = isdefined(m, :source) ? _uncompressed_ir(m, m.source) :
-                             isdefined(m, :generator) ? error("Method is @generated; try `code_lowered` instead.") :
-                             error("Code for this Method is not available.")
-_uncompressed_ir(m::Method, s::CodeInfo) = copy(s)
-_uncompressed_ir(m::Method, s::String) = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, s)::CodeInfo
-_uncompressed_ir(ci::Core.CodeInstance, s::String) = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), ci.def.def::Method, ci, s)::CodeInfo
 # for backwards compat
 const uncompressed_ast = uncompressed_ir
 const _uncompressed_ast = _uncompressed_ir
 
-function method_instances(@nospecialize(f), @nospecialize(t), world::UInt)
-    tt = signature_type(f, t)
+function method_instances(@nospecialize(argtypes::Union{Tuple,Type{<:Tuple}}), world::UInt)
+    tt = to_tuple_type(argtypes)
     results = Core.MethodInstance[]
     # this make a better error message than the typeassert that follows
     world == typemax(UInt) && error("code reflection cannot be used from generated functions")
     for match in _methods_by_ftype(tt, -1, world)::Vector
-        instance = Core.Compiler.specialize_method(match)
+        instance = specialize_method(match::Core.MethodMatch)
         push!(results, instance)
     end
     return results
 end
 
+function method_instances(@nospecialize(f), @nospecialize(t), world::UInt)
+    tt = signature_type(f, t)
+    return method_instances(tt, world)
+end
+
+function method_instance(@nospecialize(argtypes::Union{Tuple,Type{<:Tuple}});
+                         world=Base.get_world_counter(), method_table=nothing)
+    tt = to_tuple_type(argtypes)
+    mi = ccall(:jl_method_lookup_by_tt, Any,
+                (Any, Csize_t, Any),
+                tt, world, method_table)
+    return mi::Union{Nothing, MethodInstance}
+end
+
+function method_instance(@nospecialize(f), @nospecialize(t);
+                         world=Base.get_world_counter(), method_table=nothing)
+    tt = signature_type(f, t)
+    return method_instance(tt; world, method_table)
+end
+
 default_debug_info_kind() = unsafe_load(cglobal(:jl_default_debug_info_kind, Cint))
 
 # this type mirrors jl_cgparams_t (documented in julia.h)
 struct CodegenParams
+    """
+    If enabled, generate the necessary code to support the --track-allocations
+    command line flag to julia itself. Note that the option itself does not enable
+    allocation tracking. Rather, it merely generates the support code necessary
+    to perform allocation tracking if requested by the command line option.
+    """
     track_allocations::Cint
+
+    """
+    If enabled, generate the necessary code to support the --code-coverage
+    command line flag to julia itself. Note that the option itself does not enable
+    code coverage. Rather, it merely generates the support code necessary
+    to code coverage if requested by the command line option.
+    """
     code_coverage::Cint
+
+    """
+    If enabled, force the compiler to use the specialized signature
+    for all generated functions, whenever legal. If disabled, the choice is made
+    heuristically and specsig is only used when deemed profitable.
+    """
     prefer_specsig::Cint
+
+    """
+    If enabled, enable emission of `.debug_names` sections.
+    """
     gnu_pubnames::Cint
+
+    """
+    Controls what level of debug info to emit. Currently supported values are:
+    - 0: no debug info
+    - 1: full debug info
+    - 2: Line tables only
+    - 3: Debug directives only
+
+    The integer values currently match the llvm::DICompilerUnit::DebugEmissionKind enum,
+    although this is not guaranteed.
+    """
     debug_info_kind::Cint
+
+    """
+    Controls the debug_info_level parameter, equivalent to the -g command line option.
+    """
+    debug_info_level::Cint
+
+    """
+    If enabled, generate a GC safepoint at the entry to every function. Emitting
+    these extra safepoints can reduce the amount of time that other threads are
+    waiting for the currently running thread to reach a safepoint. The cost for
+    a safepoint is small, but non-zero. The option is enabled by default.
+    """
     safepoint_on_entry::Cint
-    gcstack_arg::Cint
 
-    lookup::Ptr{Cvoid}
+    """
+    If enabled, add an implicit argument to each function call that is used to
+    pass down the current task local state pointer. This argument is passed
+    using the `swiftself` convention, which in the ordinary case means that the
+    pointer is kept in a register and accesses are thus very fast. If this option
+    is disabled, the task local state pointer must be loaded from thread local
+    storage, which incurs a small amount of additional overhead. The option is enabled by
+    default.
+    """
+    gcstack_arg::Cint
 
-    generic_context::Any
+    """
+    If enabled, use the Julia PLT mechanism to support lazy-resolution of `ccall`
+    targets. The option may be disabled for use in environments where the julia
+    runtime is unavailable, but is otherwise recommended to be enabled, even if
+    lazy resolution is not required, as the Julia PLT mechanism may have superior
+    performance compared to the native platform mechanism. The options is enabled by default.
+    """
+    use_jlplt::Cint
+
+    """
+        If enabled emit LLVM IR for all functions even if wouldn't be compiled
+        for some reason (i.e functions that return a constant value).
+    """
+    force_emit_all::Cint
+
+    """
+    When enabled, run the MemorySanitizer pass.
+    """
+    sanitize_memory::Cint
+    """
+    When enabled, run the ThreadSanitizer pass.
+    """
+    sanitize_thread::Cint
+    """
+    When enabled, run the AddressSanitizer pass.
+    """
+    sanitize_address::Cint
 
     function CodegenParams(; track_allocations::Bool=true, code_coverage::Bool=true,
                    prefer_specsig::Bool=false,
-                   gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(),
-                   safepoint_on_entry::Bool=true,
-                   gcstack_arg::Bool=true,
-                   lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})),
-                   generic_context = nothing)
+                   gnu_pubnames::Bool=true, debug_info_kind::Cint = default_debug_info_kind(),
+                   debug_info_level::Cint = Cint(JLOptions().debug_level), safepoint_on_entry::Bool=true,
+                   gcstack_arg::Bool=true, use_jlplt::Bool=true, force_emit_all::Bool=false,
+                   sanitize_memory::Bool=false, sanitize_thread::Bool=false, sanitize_address::Bool=false)
         return new(
             Cint(track_allocations), Cint(code_coverage),
             Cint(prefer_specsig),
             Cint(gnu_pubnames), debug_info_kind,
-            Cint(safepoint_on_entry),
-            Cint(gcstack_arg),
-            lookup, generic_context)
+            debug_info_level, Cint(safepoint_on_entry),
+            Cint(gcstack_arg), Cint(use_jlplt), Cint(force_emit_all),
+            Cint(sanitize_memory), Cint(sanitize_thread), Cint(sanitize_address))
     end
 end
 
-const SLOT_USED = 0x8
-ast_slotflag(@nospecialize(code), i) = ccall(:jl_ir_slotflag, UInt8, (Any, Csize_t), code, i - 1)
+# this type mirrors jl_emission_params_t (documented in julia.h)
+struct EmissionParams
+    emit_metadata::Cint
 
-"""
-    may_invoke_generator(method, atype, sparams) -> Bool
-
-Computes whether or not we may invoke the generator for the given `method` on
-the given `atype` and `sparams`. For correctness, all generated function are
-required to return monotonic answers. However, since we don't expect users to
-be able to successfully implement this criterion, we only call generated
-functions on concrete types. The one exception to this is that we allow calling
-generators with abstract types if the generator does not use said abstract type
-(and thus cannot incorrectly use it to break monotonicity). This function
-computes whether we are in either of these cases.
-
-Unlike normal functions, the compilation heuristics still can't generate good dispatch
-in some cases, but this may still allow inference not to fall over in some limited cases.
-"""
-function may_invoke_generator(mi::MethodInstance)
-    return may_invoke_generator(mi.def::Method, mi.specTypes, mi.sparam_vals)
-end
-function may_invoke_generator(method::Method, @nospecialize(atype), sparams::SimpleVector)
-    # If we have complete information, we may always call the generator
-    isdispatchtuple(atype) && return true
-
-    # We don't have complete information, but it is possible that the generator
-    # syntactically doesn't make use of the information we don't have. Check
-    # for that.
-
-    # For now, only handle the (common, generated by the frontend case) that the
-    # generator only has one method
-    generator = method.generator
-    isa(generator, Core.GeneratedFunctionStub) || return false
-    tt = Tuple{typeof(generator.gen), Vararg{Any}}
-    gen_mthds = _methods_by_ftype(tt, #=lim=#1, method.primary_world)
-    gen_mthds isa Vector || return false
-    length(gen_mthds) == 1 || return false
-
-    generator_method = first(gen_mthds).method
-    nsparams = length(sparams)
-    isdefined(generator_method, :source) || return false
-    code = generator_method.source
-    nslots = ccall(:jl_ir_nslots, Int, (Any,), code)
-    at = unwrap_unionall(atype)
-    at isa DataType || return false
-    (nslots >= 1 + length(sparams) + length(at.parameters)) || return false
-
-    firstarg = 1
-    for i = 1:nsparams
-        if isa(sparams[i], TypeVar)
-            if (ast_slotflag(code, firstarg + i) & SLOT_USED) != 0
-                return false
-            end
-        end
-    end
-    nargs = Int(method.nargs)
-    non_va_args = method.isva ? nargs - 1 : nargs
-    for i = 1:non_va_args
-        if !isdispatchelem(at.parameters[i])
-            if (ast_slotflag(code, firstarg + i + nsparams) & SLOT_USED) != 0
-                return false
-            end
-        end
-    end
-    if method.isva
-        # If the va argument is used, we need to ensure that all arguments that
-        # contribute to the va tuple are dispatchelemes
-        if (ast_slotflag(code, firstarg + nargs + nsparams) & SLOT_USED) != 0
-            for i = (non_va_args+1):length(at.parameters)
-                if !isdispatchelem(at.parameters[i])
-                    return false
-                end
-            end
-        end
-    end
-    return true
-end
-
-# give a decent error message if we try to instantiate a staged function on non-leaf types
-function func_for_method_checked(m::Method, @nospecialize(types), sparams::SimpleVector)
-    if isdefined(m, :generator) && !may_invoke_generator(m, types, sparams)
-        error("cannot call @generated function `", m, "` ",
-              "with abstract argument types: ", types)
+    function EmissionParams(; emit_metadata::Bool=true)
+        return new(Cint(emit_metadata))
     end
-    return m
 end
 
 """
     code_typed(f, types; kw...)
 
-Returns an array of type-inferred lowered form (IR) for the methods matching the given
+Return an array of type-inferred lowered form (IR) for the methods matching the given
 generic function and type signature.
 
 # Keyword Arguments
@@ -1337,27 +231,58 @@ internals.
 - `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`:
   optional, controls the abstract interpreter to use, use the native interpreter if not specified.
 
-# Example
+# Examples
+
+One can put the argument types in a tuple to get the corresponding `code_typed`.
+
+```julia
+julia> code_typed(+, (Float64, Float64))
+1-element Vector{Any}:
+ CodeInfo(
+1 ─ %1 = Base.add_float(x, y)::Float64
+└──      return %1
+) => Float64
+
+julia> code_typed((typeof(-), Float64, Float64))
+1-element Vector{Any}:
+ CodeInfo(
+1 ─ %1 = Base.sub_float(x, y)::Float64
+└──      return %1
+) => Float64
 
-One can put the argument types in a tuple to get the corresponding `code_typed`.
+julia> code_typed((Type{Int}, UInt8))
+1-element Vector{Any}:
+ CodeInfo(
+1 ─ %1 = Core.zext_int(Core.Int64, x)::Int64
+└──      return %1
+) => Int64
 
-```julia
-julia> code_typed(+, (Float64, Float64))
+julia> code_typed((Returns{Int64},))
 1-element Vector{Any}:
  CodeInfo(
-1 ─ %1 = Base.add_float(x, y)::Float64
+1 ─ %1 =   builtin Base.getfield(obj, :value)::Int64
 └──      return %1
-) => Float64
+) => Int64
 ```
 """
+function code_typed end
+
 function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f)); kwargs...)
     if isa(f, Core.OpaqueClosure)
-        return code_typed_opaque_closure(f; kwargs...)
+        return code_typed_opaque_closure(f, types; kwargs...)
     end
     tt = signature_type(f, types)
     return code_typed_by_type(tt; kwargs...)
 end
 
+# support queries with signatures rather than objects to better support
+# non-singleton function objects such as `(::Foo)(::Int, ::Int)`
+# via `code_typed((Foo, Int, Int))` or `code_typed(Tuple{Foo, Int, Int})`.
+function code_typed(@nospecialize(argtypes::Union{Tuple,Type{<:Tuple}}); kwargs...)
+    tt = to_tuple_type(argtypes)
+    return code_typed_by_type(tt; kwargs...)
+end
+
 # returns argument tuple type which is supposed to be used for `code_typed` and its family;
 # if there is a single method this functions returns the method argument signature,
 # otherwise returns `Tuple` that doesn't match with any signature
@@ -1370,6 +295,57 @@ function default_tt(@nospecialize(f))
     end
 end
 
+function raise_match_failure(name::Symbol, @nospecialize(tt))
+    @noinline
+    sig_str = sprint(Base.show_tuple_as_call, Symbol(""), tt)
+    error("$name: unanalyzable call given $sig_str")
+end
+
+const REFLECTION_COMPILER = RefValue{Union{Nothing, Module}}(nothing)
+
+function invoke_in_typeinf_world(args...)
+    vargs = Any[args...]
+    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Any}, Cint), vargs, length(vargs))
+end
+
+function invoke_default_compiler(fname::Symbol, args...)
+    if REFLECTION_COMPILER[] === nothing
+        return invoke_in_typeinf_world(getglobal(Compiler, fname), args...)
+    else
+        return getglobal(REFLECTION_COMPILER[], fname)(args...)
+    end
+end
+
+function invoke_interp_compiler(interp, fname::Symbol, args...)
+    if interp === nothing
+        return invoke_default_compiler(fname, args...)
+    else
+        T = typeof(interp)
+        while true
+            Tname = typename(T).name
+            Tname === :Any && error("Expected AbstractInterpreter")
+            Tname === :AbstractInterpreter && break
+            T = supertype(T)
+        end
+        return getglobal(typename(T).module, fname)(args...)
+    end
+end
+
+function invoke_mt_compiler(mt, fname::Symbol, args...)
+    if mt === nothing
+        return invoke_default_compiler(fname, args...)
+    else
+        T = typeof(mt)
+        while true
+            Tname = typename(T).name
+            Tname === :Any && error("Expected MethodTableView")
+            Tname === :MethodTableView && break
+            T = supertype(T)
+        end
+        return getglobal(typename(T).module, fname)(args...)
+    end
+end
+
 """
     code_typed_by_type(types::Type{<:Tuple}; ...)
 
@@ -1380,7 +356,9 @@ function code_typed_by_type(@nospecialize(tt::Type);
                             optimize::Bool=true,
                             debuginfo::Symbol=:default,
                             world::UInt=get_world_counter(),
-                            interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+                            interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
     (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
         error("code reflection cannot be used from generated functions")
     if @isdefined(IRShow)
@@ -1392,37 +370,64 @@ function code_typed_by_type(@nospecialize(tt::Type);
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:code_typed, tt)
     asts = []
-    for match in matches
+    for match in matches.matches
         match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, tt, match.sparams)
-        (code, ty) = Core.Compiler.typeinf_code(interp, meth, match.spec_types, match.sparams, optimize)
+        code = invoke_interp_compiler(passed_interp, :typeinf_code, interp, match, optimize)
         if code === nothing
-            push!(asts, meth => Any)
+            push!(asts, match.method => Any)
         else
             debuginfo === :none && remove_linenums!(code)
-            push!(asts, code => ty)
+            push!(asts, code => code.rettype)
         end
     end
     return asts
 end
 
-function code_typed_opaque_closure(@nospecialize(oc::Core.OpaqueClosure);
-                                   debuginfo::Symbol=:default, _...)
-    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+function get_oc_code_rt(passed_interp, oc::Core.OpaqueClosure, types, optimize::Bool)
+    @nospecialize oc types
+    ccall(:jl_is_in_pure_context, Bool, ()) &&
+        error("code reflection cannot be used from generated functions")
     m = oc.source
     if isa(m, Method)
-        code = _uncompressed_ir(m, m.source)
-        debuginfo === :none && remove_linenums!(code)
-        # intersect the declared return type and the inferred return type (if available)
-        rt = typeintersect(code.rettype, typeof(oc).parameters[2])
-        return Any[code => rt]
+        if isdefined(m, :source)
+            if optimize
+                tt = Tuple{typeof(oc.captures), to_tuple_type(types).parameters...}
+                mi = specialize_method(m, tt, Core.svec())
+                interp = invoke_interp_compiler(passed_interp, :_default_interp, m.primary_world)
+                code = invoke_interp_compiler(passed_interp, :typeinf_code, interp, mi, optimize)
+                if code isa CodeInfo
+                    return Pair{CodeInfo, Any}(code, code.rettype)
+                end
+                error("inference not successful")
+            else
+                code = _uncompressed_ir(m)
+                return Pair{CodeInfo, Any}(code, typeof(oc).parameters[2])
+            end
+        else
+            # OC constructed from optimized IR
+            codeinst = m.specializations.cache
+            # XXX: the inferred field is not normally a CodeInfo, but this assumes it is guaranteed to be always
+            return Pair{CodeInfo, Any}(codeinst.inferred, codeinst.rettype)
+        end
     else
         error("encountered invalid Core.OpaqueClosure object")
     end
 end
 
+function code_typed_opaque_closure(oc::Core.OpaqueClosure, types;
+                                   debuginfo::Symbol=:default,
+                                   optimize::Bool=true,
+                                   interp=nothing,
+                                   _...)
+    @nospecialize oc types
+    (code, rt) = get_oc_code_rt(interp, oc, types, optimize)
+    debuginfo === :none && remove_linenums!(code)
+    return Any[Pair{CodeInfo,Any}(code, rt)]
+end
+
 """
     code_ircode(f, [types])
 
@@ -1440,13 +445,13 @@ internals.
   when looking up methods, use current world age if not specified.
 - `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`:
   optional, controls the abstract interpreter to use, use the native interpreter if not specified.
-- `optimize_until::Union{Integer,AbstractString,Nothing} = nothing`: optional,
+- `optimize_until::Union{Int,String,Nothing} = nothing`: optional,
   controls the optimization passes to run.
   If it is a string, it specifies the name of the pass up to which the optimizer is run.
   If it is an integer, it specifies the number of passes to run.
   If it is `nothing` (default), all passes are run.
 
-# Example
+# Examples
 
 One can put the argument types in a tuple to get the corresponding `code_ircode`.
 
@@ -1474,6 +479,11 @@ function code_ircode(@nospecialize(f), @nospecialize(types = default_tt(f)); kwa
     return code_ircode_by_type(tt; kwargs...)
 end
 
+function code_ircode(@nospecialize(argtypes::Union{Tuple,Type{<:Tuple}}); kwargs...)
+    tt = to_tuple_type(argtypes)
+    return code_ircode_by_type(tt; kwargs...)
+end
+
 """
     code_ircode_by_type(types::Type{<:Tuple}; ...)
 
@@ -1483,26 +493,22 @@ a full signature to query.
 function code_ircode_by_type(
     @nospecialize(tt::Type);
     world::UInt=get_world_counter(),
-    interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world),
-    optimize_until::Union{Integer,AbstractString,Nothing}=nothing,
+    interp=nothing,
+    optimize_until::Union{Int,String,Nothing}=nothing,
 )
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
     (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
         error("code reflection cannot be used from generated functions")
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:code_ircode, tt)
     asts = []
-    for match in matches
+    for match in matches.matches
         match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, tt, match.sparams)
-        (code, ty) = Core.Compiler.typeinf_ircode(
-            interp,
-            meth,
-            match.spec_types,
-            match.sparams,
-            optimize_until,
-        )
+        (code, ty) = invoke_interp_compiler(passed_interp, :typeinf_ircode, interp, match, optimize_until)
         if code === nothing
-            push!(asts, meth => Any)
+            push!(asts, match.method => Any)
         else
             push!(asts, code => ty)
         end
@@ -1510,16 +516,56 @@ function code_ircode_by_type(
     return asts
 end
 
+function _builtin_return_type(passed_interp, interp,
+                              @nospecialize(f::Core.Builtin), @nospecialize(types))
+    argtypes = Any[to_tuple_type(types).parameters...]
+    rt = invoke_interp_compiler(passed_interp, :builtin_tfunction, interp, f, argtypes, nothing)
+    return invoke_interp_compiler(passed_interp, :widenconst, rt)
+end
+
+function _builtin_effects(passed_interp, interp,
+                          @nospecialize(f::Core.Builtin), @nospecialize(types))
+    argtypes = Any[to_tuple_type(types).parameters...]
+    rt = invoke_interp_compiler(passed_interp, :builtin_tfunction, interp, f, argtypes, nothing)
+    return invoke_interp_compiler(passed_interp, :builtin_effects,
+        invoke_interp_compiler(passed_interp, :typeinf_lattice, interp),
+        f, argtypes, rt)
+end
+
+function _builtin_exception_type(passed_interp, interp,
+                                 @nospecialize(f::Core.Builtin), @nospecialize(types))
+    effects = _builtin_effects(passed_interp, interp, f, types)
+    return invoke_interp_compiler(passed_interp, :is_nothrow, effects) ? Union{} : Any
+end
+
+check_generated_context(world::UInt) =
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
+
+# TODO rename `Base.return_types` to `Base.infer_return_types`
 
 """
-    Base.return_types(f::Function, types::DataType=default_tt(f);
-                      world::UInt=get_world_counter(), interp::NativeInterpreter=Core.Compiler.NativeInterpreter(world))
+    Base.return_types(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::NativeInterpreter=Core.Compiler.NativeInterpreter(world)) -> rts::Vector{Any}
 
 Return a list of possible return types for a given function `f` and argument types `types`.
 The list corresponds to the results of type inference on all the possible method match
 candidates for `f` and `types` (see also [`methods(f, types)`](@ref methods).
 
-# Example
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `rts::Vector{Any}`: The list of return types that are figured out by inference on
+  methods matching with the given `f` and `types`. The list's order matches the order
+  returned by `methods(f, types)`.
+
+# Examples
 
 ```julia
 julia> Base.return_types(sum, Tuple{Vector{Int}})
@@ -1529,9 +575,9 @@ julia> Base.return_types(sum, Tuple{Vector{Int}})
 julia> methods(sum, (Union{Vector{Int},UnitRange{Int}},))
 # 2 methods for generic function "sum" from Base:
  [1] sum(r::AbstractRange{<:Real})
-     @ range.jl:1396
+     @ range.jl:1399
  [2] sum(a::AbstractArray; dims, kw...)
-     @ reducedim.jl:996
+     @ reducedim.jl:1010
 
 julia> Base.return_types(sum, (Union{Vector{Int},UnitRange{Int}},))
 2-element Vector{Any}:
@@ -1540,103 +586,320 @@ julia> Base.return_types(sum, (Union{Vector{Int},UnitRange{Int}},))
 ```
 
 !!! warning
-    The `return_types` function should not be used from generated functions;
+    The `Base.return_types` function should not be used from generated functions;
     doing so will result in an error.
 """
 function return_types(@nospecialize(f), @nospecialize(types=default_tt(f));
                       world::UInt=get_world_counter(),
-                      interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
-    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
-        error("code reflection cannot be used from generated functions")
+                      interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
     if isa(f, Core.OpaqueClosure)
-        _, rt = only(code_typed_opaque_closure(f))
+        _, rt = only(code_typed_opaque_closure(f, types; Compiler))
         return Any[rt]
+    elseif isa(f, Core.Builtin)
+        return Any[_builtin_return_type(passed_interp, interp, f, types)]
     end
-
-    if isa(f, Core.Builtin)
-        argtypes = Any[to_tuple_type(types).parameters...]
-        rt = Core.Compiler.builtin_tfunction(interp, f, argtypes, nothing)
-        return Any[Core.Compiler.widenconst(rt)]
-    end
-    rts = []
     tt = signature_type(f, types)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
-    for match in matches
-        match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, types, match.sparams)
-        ty = Core.Compiler.typeinf_type(interp, meth, match.spec_types, match.sparams)
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:return_types, tt)
+    rts = Any[]
+    for match in matches.matches
+        ty = invoke_interp_compiler(passed_interp, :typeinf_type, interp, match::Core.MethodMatch)
         push!(rts, something(ty, Any))
     end
     return rts
 end
 
 """
-    infer_effects(f, types=default_tt(f); world=get_world_counter(), interp=Core.Compiler.NativeInterpreter(world))
+    Base.infer_return_type(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> rt::Type
+
+Return an inferred return type of the function call specified by `f` and `types`.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `rt::Type`: An inferred return type of the function call specified by the given call signature.
+
+!!! note
+    Note that, different from [`Base.return_types`](@ref), this doesn't give you the list
+    return types of every possible method matching with the given `f` and `types`.
+    It returns a single return type, taking into account all potential outcomes of
+    any function call entailed by the given signature type.
+
+# Examples
+
+```julia
+julia> checksym(::Symbol) = :symbol;
+
+julia> checksym(x::Any) = x;
+
+julia> Base.infer_return_type(checksym, (Union{Symbol,String},))
+Union{String, Symbol}
+
+julia> Base.return_types(checksym, (Union{Symbol,String},))
+2-element Vector{Any}:
+ Symbol
+ Union{String, Symbol}
+```
+
+It's important to note the difference here: `Base.return_types` gives back inferred results
+for each method that matches the given signature `checksum(::Union{Symbol,String})`.
+On the other hand `Base.infer_return_type` returns one collective result that sums up all those possibilities.
+
+!!! warning
+    The `Base.infer_return_type` function should not be used from generated functions;
+    doing so will result in an error.
+"""
+function infer_return_type(@nospecialize(f), @nospecialize(types=default_tt(f));
+                           world::UInt=get_world_counter(),
+                           interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
+    if isa(f, Core.OpaqueClosure)
+        return last(only(code_typed_opaque_closure(f, types; interp=passed_interp)))
+    elseif isa(f, Core.Builtin)
+        return _builtin_return_type(passed_interp, interp, f, types)
+    end
+    tt = signature_type(f, types)
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:infer_return_type, tt)
+    rt = Union{}
+    for match in matches.matches
+        ty = invoke_interp_compiler(passed_interp, :typeinf_type, interp, match::Core.MethodMatch)
+        rt = invoke_interp_compiler(passed_interp, :tmerge, rt, something(ty, Any))
+    end
+    return rt
+end
+
+"""
+    Base.infer_exception_types(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::NativeInterpreter=Core.Compiler.NativeInterpreter(world)) -> excts::Vector{Any}
+
+Return a list of possible exception types for a given function `f` and argument types `types`.
+The list corresponds to the results of type inference on all the possible method match
+candidates for `f` and `types` (see also [`methods(f, types)`](@ref methods).
+It works like [`Base.return_types`](@ref), but it infers the exception types instead of the return types.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `excts::Vector{Any}`: The list of exception types that are figured out by inference on
+  methods matching with the given `f` and `types`. The list's order matches the order
+  returned by `methods(f, types)`.
+
+# Examples
+
+```julia
+julia> throw_if_number(::Number) = error("number is given");
+
+julia> throw_if_number(::Any) = nothing;
+
+julia> Base.infer_exception_types(throw_if_number, (Int,))
+1-element Vector{Any}:
+ ErrorException
+
+julia> methods(throw_if_number, (Any,))
+# 2 methods for generic function "throw_if_number" from Main:
+ [1] throw_if_number(x::Number)
+     @ REPL[1]:1
+ [2] throw_if_number(::Any)
+     @ REPL[2]:1
+
+julia> Base.infer_exception_types(throw_if_number, (Any,))
+2-element Vector{Any}:
+ ErrorException # the result of inference on `throw_if_number(::Number)`
+ Union{}        # the result of inference on `throw_if_number(::Any)`
+```
+
+!!! warning
+    The `Base.infer_exception_types` function should not be used from generated functions;
+    doing so will result in an error.
+"""
+function infer_exception_types(@nospecialize(f), @nospecialize(types=default_tt(f));
+                               world::UInt=get_world_counter(),
+                               interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
+    if isa(f, Core.OpaqueClosure)
+        return Any[Any] # TODO
+    elseif isa(f, Core.Builtin)
+        return Any[_builtin_exception_type(passed_interp, interp, f, types)]
+    end
+    tt = signature_type(f, types)
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:infer_exception_types, tt)
+    excts = Any[]
+    for match in matches.matches
+        frame = invoke_interp_compiler(passed_interp, :typeinf_frame, interp, match::Core.MethodMatch, #=run_optimizer=#false)
+        if frame === nothing
+            exct = Any
+        else
+            exct = invoke_interp_compiler(passed_interp, :widenconst, frame.result.exc_result)
+        end
+        push!(excts, exct)
+    end
+    return excts
+end
+
+"""
+    Base.infer_exception_type(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> exct::Type
+
+Return the type of exception potentially thrown by the function call specified by `f` and `types`.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `exct::Type`: The inferred type of exception that can be thrown by the function call
+  specified by the given call signature.
+
+!!! note
+    Note that, different from [`Base.infer_exception_types`](@ref), this doesn't give you the list
+    exception types for every possible matching method with the given `f` and `types`.
+    It returns a single exception type, taking into account all potential outcomes of
+    any function call entailed by the given signature type.
+
+# Examples
+
+```julia
+julia> f1(x) = x * 2;
+
+julia> Base.infer_exception_type(f1, (Int,))
+Union{}
+```
+
+The exception inferred as `Union{}` indicates that `f1(::Int)` will not throw any exception.
+
+```julia
+julia> f2(x::Int) = x * 2;
+
+julia> Base.infer_exception_type(f2, (Integer,))
+MethodError
+```
+
+This case is pretty much the same as with `f1`, but there's a key difference to note. For
+`f2`, the argument type is limited to `Int`, while the argument type is given as `Tuple{Integer}`.
+Because of this, taking into account the chance of the method error entailed by the call
+signature, the exception type is widened to `MethodError`.
+
+!!! warning
+    The `Base.infer_exception_type` function should not be used from generated functions;
+    doing so will result in an error.
+"""
+function infer_exception_type(@nospecialize(f), @nospecialize(types=default_tt(f));
+                              world::UInt=get_world_counter(),
+                              interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
+    if isa(f, Core.OpaqueClosure)
+        return Any # TODO
+    elseif isa(f, Core.Builtin)
+        return _builtin_exception_type(passed_interp, interp, f, types)
+    end
+    tt = signature_type(f, types)
+    exct = invoke_interp_compiler(passed_interp, :_infer_exception_type, interp, tt, false)
+    exct === nothing && raise_match_failure(:infer_exception_type, tt)
+    return exct
+end
+
+"""
+    Base.infer_effects(
+        f, types=default_tt(f);
+        optimize::Bool=true,
+        world::UInt=get_world_counter(),
+        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> effects::Effects
 
-Compute the `Effects` of a function `f` with argument types `types`. The `Effects` represents the computational effects of the function call, such as whether it is free of side effects, guaranteed not to throw an exception, guaranteed to terminate, etc. The `world` and `interp` arguments specify the world counter and the native interpreter to use for the analysis.
+Return the possible computation effects of the function call specified by `f` and `types`.
 
 # Arguments
 - `f`: The function to analyze.
 - `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `optimize` (optional): Whether to run additional effects refinements based on post-optimization analysis.
 - `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
-- `interp` (optional): The native interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
 
 # Returns
-- `effects::Effects`: The computed effects of the function call.
+- `effects::Effects`: The computed effects of the function call specified by the given call signature.
+  See the documentation of [`Effects`](@ref Core.Compiler.Effects) or [`Base.@assume_effects`](@ref)
+  for more information on the various effect properties.
+
+!!! note
+    Note that, different from [`Base.return_types`](@ref), this doesn't give you the list
+    effect analysis results for every possible matching method with the given `f` and `types`.
+    It returns a single effect, taking into account all potential outcomes of any function
+    call entailed by the given signature type.
 
-# Example
+# Examples
 
 ```julia
-julia> function foo(x)
-           y = x * 2
-           return y
-       end;
+julia> f1(x) = x * 2;
 
-julia> effects = Base.infer_effects(foo, (Int,))
+julia> Base.infer_effects(f1, (Int,))
 (+c,+e,+n,+t,+s,+m,+i)
 ```
 
-This function will return an `Effects` object with information about the computational effects of the function `foo` when called with an `Int` argument. See the documentation for `Effects` for more information on the various effect properties.
+This function will return an `Effects` object with information about the computational
+effects of the function `f1` when called with an `Int` argument.
+
+```julia
+julia> f2(x::Int) = x * 2;
+
+julia> Base.infer_effects(f2, (Integer,))
+(+c,+e,!n,+t,+s,+m,+i)
+```
+
+This case is pretty much the same as with `f1`, but there's a key difference to note. For
+`f2`, the argument type is limited to `Int`, while the argument type is given as `Tuple{Integer}`.
+Because of this, taking into account the chance of the method error entailed by the call
+signature, the `:nothrow` bit gets tainted.
 
 !!! warning
-    The `infer_effects` function should not be used from generated functions;
+    The `Base.infer_effects` function should not be used from generated functions;
     doing so will result in an error.
 
+$(Compiler.effects_key_string)
+
 # See Also
-- [`Core.Compiler.Effects`](@ref): A type representing the computational effects of a method call.
+- [`Compiler.Effects`](@ref): A type representing the computational effects of a method call.
 - [`Base.@assume_effects`](@ref): A macro for making assumptions about the effects of a method.
 """
 function infer_effects(@nospecialize(f), @nospecialize(types=default_tt(f));
-                       world = get_world_counter(),
-                       interp = Core.Compiler.NativeInterpreter(world))
-    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
-        error("code reflection cannot be used from generated functions")
+                       optimize::Bool=true,
+                       world::UInt=get_world_counter(),
+                       interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
     if isa(f, Core.Builtin)
-        types = to_tuple_type(types)
-        argtypes = Any[Core.Compiler.Const(f), types.parameters...]
-        rt = Core.Compiler.builtin_tfunction(interp, f, argtypes[2:end], nothing)
-        return Core.Compiler.builtin_effects(Core.Compiler.typeinf_lattice(interp), f,
-            Core.Compiler.ArgInfo(nothing, argtypes), rt)
+        return _builtin_effects(passed_interp, interp, f, types)
     end
     tt = signature_type(f, types)
-    result = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
-    if result === missing
-        # unanalyzable call, return the unknown effects
-        return Core.Compiler.Effects()
-    end
-    (; matches) = result
-    effects = Core.Compiler.EFFECTS_TOTAL
-    if matches.ambig || !any(match::Core.MethodMatch->match.fully_covers, matches.matches)
-        # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-        effects = Core.Compiler.Effects(effects; nothrow=false)
-    end
-    for match in matches.matches
-        match = match::Core.MethodMatch
-        frame = Core.Compiler.typeinf_frame(interp,
-            match.method, match.spec_types, match.sparams, #=run_optimizer=#false)
-        frame === nothing && return Core.Compiler.Effects()
-        effects = Core.Compiler.merge_effects(effects, frame.ipo_effects)
-    end
+    effects = invoke_interp_compiler(passed_interp, :_infer_effects, interp, tt, optimize)
+    effects === nothing && raise_match_failure(:infer_effects, tt)
     return effects
 end
 
@@ -1653,23 +916,24 @@ end
 
 function print_statement_costs(io::IO, @nospecialize(tt::Type);
                                world::UInt=get_world_counter(),
-                               interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+                               interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
-    params = Core.Compiler.OptimizationParams(interp)
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:print_statement_costs, tt)
     cst = Int[]
-    for match in matches
+    for match in matches.matches
         match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, tt, match.sparams)
-        println(io, meth)
-        (code, ty) = Core.Compiler.typeinf_code(interp, meth, match.spec_types, match.sparams, true)
+        println(io, match.method)
+        code = invoke_interp_compiler(passed_interp, :typeinf_code, interp, match, true)
         if code === nothing
             println(io, "  inference not successful")
         else
             empty!(cst)
             resize!(cst, length(code.code))
-            sptypes = Core.Compiler.VarState[Core.Compiler.VarState(sp, false) for sp in match.sparams]
-            maxcost = Core.Compiler.statement_costs!(cst, code.code, code, sptypes, params)
+            maxcost = invoke_interp_compiler(passed_interp, :statement_costs!, interp, cst, code.code, code, match)
             nd = ndigits(maxcost)
             irshow_config = IRShow.IRShowConfig() do io, linestart, idx
                 print(io, idx > 0 ? lpad(cst[idx], nd+1) : " "^(nd+1), " ")
@@ -1684,18 +948,11 @@ end
 print_statement_costs(args...; kwargs...) = print_statement_costs(stdout, args...; kwargs...)
 
 function _which(@nospecialize(tt::Type);
-    method_table::Union{Nothing,Core.MethodTable,Core.Compiler.MethodTableView}=nothing,
+    method_table #=::Union{Nothing,Core.MethodTable,Compiler.MethodTableView}=# =nothing,
     world::UInt=get_world_counter(),
     raise::Bool=true)
     world == typemax(UInt) && error("code reflection cannot be used from generated functions")
-    if method_table === nothing
-        table = Core.Compiler.InternalMethodTable(world)
-    elseif method_table isa Core.MethodTable
-        table = Core.Compiler.OverlayMethodTable(world, method_table)
-    else
-        table = method_table
-    end
-    match, = Core.Compiler.findsup(tt, table)
+    match, = invoke_mt_compiler(method_table, :findsup_mt, tt, world, method_table)
     if match === nothing
         raise && error("no unique matching method found for the specified argument types")
         return nothing
@@ -1706,25 +963,36 @@ end
 """
     which(f, types)
 
-Returns the method of `f` (a `Method` object) that would be called for arguments of the given `types`.
+Return the method of `f` (a `Method` object) that would be called for arguments of the given `types`.
 
 If `types` is an abstract type, then the method that would be called by `invoke` is returned.
 
-See also: [`parentmodule`](@ref), and `@which` and `@edit` in [`InteractiveUtils`](@ref man-interactive-utils).
+See also: [`parentmodule`](@ref), [`@which`](@ref Main.InteractiveUtils.@which), and [`@edit`](@ref Main.InteractiveUtils.@edit).
 """
 function which(@nospecialize(f), @nospecialize(t))
     tt = signature_type(f, t)
-    return which(tt)
+    world = get_world_counter()
+    match, _ = invoke_default_compiler(:_findsup, tt, nothing, world)
+    if match === nothing
+        me = MethodError(f, t, world)
+        ee = ErrorException(sprint(io -> begin
+            println(io, "Calling invoke(f, t, args...) would throw:");
+            Base.showerror(io, me);
+        end))
+        throw(ee)
+    end
+    return match.method
 end
 
 """
     which(types::Type{<:Tuple})
 
-Returns the method that would be called by the given type signature (as a tuple type).
+Return the method that would be called by the given type signature (as a tuple type).
 """
 function which(@nospecialize(tt#=::Type=#))
     return _which(tt).method
 end
+which(@nospecialize(argtypes::Tuple)) = which(to_tuple_type(argtypes))
 
 """
     which(module, symbol)
@@ -1741,20 +1009,14 @@ end
 # function reflection
 
 """
-    nameof(f::Function) -> Symbol
+    nameof(f::Function)::Symbol
 
 Get the name of a generic `Function` as a symbol. For anonymous functions,
 this is a compiler-generated name. For explicitly-declared subtypes of
 `Function`, it is the name of the function's type.
 """
 function nameof(f::Function)
-    t = typeof(f)
-    mt = t.name.mt
-    if mt === Symbol.name.mt
-        # uses shared method table, so name is not unique to this function type
-        return nameof(t)
-    end
-    return mt.name
+    return typeof(f).name.singletonname
 end
 
 function nameof(f::Core.IntrinsicFunction)
@@ -1763,7 +1025,7 @@ function nameof(f::Core.IntrinsicFunction)
 end
 
 """
-    parentmodule(f::Function) -> Module
+    parentmodule(f::Function)::Module
 
 Determine the module containing the (first) definition of a generic
 function.
@@ -1771,7 +1033,7 @@ function.
 parentmodule(f::Function) = parentmodule(typeof(f))
 
 """
-    parentmodule(f::Function, types) -> Module
+    parentmodule(f::Function, types)::Module
 
 Determine the module containing the first method of a generic function `f` matching
 the specified `types`.
@@ -1785,7 +1047,7 @@ function parentmodule(@nospecialize(f), @nospecialize(types))
 end
 
 """
-    parentmodule(m::Method) -> Module
+    parentmodule(m::Method)::Module
 
 Return the module in which the given method `m` is defined.
 
@@ -1795,7 +1057,7 @@ Return the module in which the given method `m` is defined.
 parentmodule(m::Method) = m.module
 
 """
-    hasmethod(f, t::Type{<:Tuple}[, kwnames]; world=get_world_counter()) -> Bool
+    hasmethod(f, t::Type{<:Tuple}[, kwnames]; world=get_world_counter())::Bool
 
 Determine whether the given generic function has a method matching the given
 `Tuple` of argument types with the upper bound of world age given by `world`.
@@ -1831,7 +1093,7 @@ true
 ```
 """
 function hasmethod(@nospecialize(f), @nospecialize(t))
-    return Core._hasmethod(f, t isa Type ? t : to_tuple_type(t))
+    return Core._hasmethod(signature_type(f, t))
 end
 
 function Core.kwcall(kwargs::NamedTuple, ::typeof(hasmethod), @nospecialize(f), @nospecialize(t))
@@ -1841,6 +1103,7 @@ end
 
 function hasmethod(f, t, kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_counter())
     @nospecialize
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
     isempty(kwnames) && return hasmethod(f, t; world)
     t = to_tuple_type(t)
     ft = Core.Typeof(f)
@@ -1849,11 +1112,11 @@ function hasmethod(f, t, kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_c
     match = ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tt, nothing, world)
     match === nothing && return false
     kws = ccall(:jl_uncompress_argnames, Array{Symbol,1}, (Any,), (match::Method).slot_syms)
+    kws = kws[((match::Method).nargs + 1):end] # remove positional arguments
     isempty(kws) && return true # some kwfuncs simply forward everything directly
     for kw in kws
         endswith(String(kw), "...") && return true
     end
-    kwnames = Symbol[kwnames[i] for i in 1:length(kwnames)]
     return issubset(kwnames, kws)
 end
 
@@ -1893,6 +1156,8 @@ function bodyfunction(basemethod::Method)
                     else
                         return nothing
                     end
+                elseif isa(fsym, Core.SSAValue)
+                    fsym = ast.code[fsym.id]
                 else
                     return nothing
                 end
@@ -1903,7 +1168,7 @@ function bodyfunction(basemethod::Method)
 end
 
 """
-    Base.isambiguous(m1, m2; ambiguous_bottom=false) -> Bool
+    Base.isambiguous(m1, m2; ambiguous_bottom=false)::Bool
 
 Determine whether two methods `m1` and `m2` may be ambiguous for some call
 signature. This test is performed in the context of other methods of the same
@@ -1946,10 +1211,11 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
             has_bottom_parameter(ti) && return false
         end
         world = get_world_counter()
+        world == typemax(UInt) && return true # intersecting methods are always ambiguous in the generator world, which is true, albeit maybe confusing for some
         min = Ref{UInt}(typemin(UInt))
         max = Ref{UInt}(typemax(UInt))
         has_ambig = Ref{Int32}(0)
-        ms = _methods_by_ftype(ti, nothing, -1, world, true, min, max, has_ambig)::Vector
+        ms = collect(Core.MethodMatch, _methods_by_ftype(ti, nothing, -1, world, true, min, max, has_ambig)::Vector)
         has_ambig[] == 0 && return false
         if !ambiguous_bottom
             filter!(ms) do m::Core.MethodMatch
@@ -1962,7 +1228,6 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
         # report the other ambiguous pair)
         have_m1 = have_m2 = false
         for match in ms
-            match = match::Core.MethodMatch
             m = match.method
             m === m1 && (have_m1 = true)
             m === m2 && (have_m2 = true)
@@ -1980,7 +1245,7 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
             for match in ms
                 m = match.method
                 match.fully_covers || continue
-                if minmax === nothing || morespecific(m.sig, minmax.sig)
+                if minmax === nothing || morespecific(m, minmax)
                     minmax = m
                 end
             end
@@ -1990,8 +1255,8 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
             for match in ms
                 m = match.method
                 m === minmax && continue
-                if !morespecific(minmax.sig, m.sig)
-                    if match.fully_covers || !morespecific(m.sig, minmax.sig)
+                if !morespecific(minmax, m)
+                    if match.fully_covers || !morespecific(m, minmax)
                         return true
                     end
                 end
@@ -2021,73 +1286,6 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
     return true
 end
 
-"""
-    delete_method(m::Method)
-
-Make method `m` uncallable and force recompilation of any methods that use(d) it.
-"""
-function delete_method(m::Method)
-    ccall(:jl_method_table_disable, Cvoid, (Any, Any), get_methodtable(m), m)
-end
-
-function get_methodtable(m::Method)
-    return ccall(:jl_method_get_table, Any, (Any,), m)::Core.MethodTable
-end
-
-"""
-    has_bottom_parameter(t) -> Bool
-
-Determine whether `t` is a Type for which one or more of its parameters is `Union{}`.
-"""
-function has_bottom_parameter(t::DataType)
-    for p in t.parameters
-        has_bottom_parameter(p) && return true
-    end
-    return false
-end
-has_bottom_parameter(t::typeof(Bottom)) = true
-has_bottom_parameter(t::UnionAll) = has_bottom_parameter(unwrap_unionall(t))
-has_bottom_parameter(t::Union) = has_bottom_parameter(t.a) & has_bottom_parameter(t.b)
-has_bottom_parameter(t::TypeVar) = has_bottom_parameter(t.ub)
-has_bottom_parameter(::Any) = false
-
-min_world(m::Core.CodeInstance) = m.min_world
-max_world(m::Core.CodeInstance) = m.max_world
-min_world(m::Core.CodeInfo) = m.min_world
-max_world(m::Core.CodeInfo) = m.max_world
-get_world_counter() = ccall(:jl_get_world_counter, UInt, ())
-
-"""
-    propertynames(x, private=false)
-
-Get a tuple or a vector of the properties (`x.property`) of an object `x`.
-This is typically the same as [`fieldnames(typeof(x))`](@ref), but types
-that overload [`getproperty`](@ref) should generally overload `propertynames`
-as well to get the properties of an instance of the type.
-
-`propertynames(x)` may return only "public" property names that are part
-of the documented interface of `x`.   If you want it to also return "private"
-property names intended for internal use, pass `true` for the optional second argument.
-REPL tab completion on `x.` shows only the `private=false` properties.
-
-See also: [`hasproperty`](@ref), [`hasfield`](@ref).
-"""
-propertynames(x) = fieldnames(typeof(x))
-propertynames(m::Module) = names(m)
-propertynames(x, private::Bool) = propertynames(x) # ignore private flag by default
-
-"""
-    hasproperty(x, s::Symbol)
-
-Return a boolean indicating whether the object `x` has `s` as one of its own properties.
-
-!!! compat "Julia 1.2"
-     This function requires at least Julia 1.2.
-
-See also: [`propertynames`](@ref), [`hasfield`](@ref).
-"""
-hasproperty(x, s::Symbol) = s in propertynames(x)
-
 """
     @invoke f(arg::T, ...; kwargs...)
 
@@ -2107,22 +1305,22 @@ It also supports the following syntax:
 
 ```jldoctest
 julia> @macroexpand @invoke f(x::T, y)
-:(Core.invoke(f, Tuple{T, Core.Typeof(y)}, x, y))
+:(Core.invoke(f, Base.Tuple{T, Core.Typeof(y)}, x, y))
 
 julia> @invoke 420::Integer % Unsigned
 0x00000000000001a4
 
 julia> @macroexpand @invoke (x::X).f
-:(Core.invoke(Base.getproperty, Tuple{X, Core.Typeof(:f)}, x, :f))
+:(Core.invoke(Base.getproperty, Base.Tuple{X, Core.Typeof(:f)}, x, :f))
 
 julia> @macroexpand @invoke (x::X).f = v::V
-:(Core.invoke(Base.setproperty!, Tuple{X, Core.Typeof(:f), V}, x, :f, v))
+:(Core.invoke(Base.setproperty!, Base.Tuple{X, Core.Typeof(:f), V}, x, :f, v))
 
 julia> @macroexpand @invoke (xs::Xs)[i::I]
-:(Core.invoke(Base.getindex, Tuple{Xs, I}, xs, i))
+:(Core.invoke(Base.getindex, Base.Tuple{Xs, I}, xs, i))
 
 julia> @macroexpand @invoke (xs::Xs)[i::I] = v::V
-:(Core.invoke(Base.setindex!, Tuple{Xs, V, I}, xs, v, i))
+:(Core.invoke(Base.setindex!, Base.Tuple{Xs, V, I}, xs, v, i))
 ```
 
 !!! compat "Julia 1.7"
@@ -2135,23 +1333,36 @@ julia> @macroexpand @invoke (xs::Xs)[i::I] = v::V
     The additional syntax is supported as of Julia 1.10.
 """
 macro invoke(ex)
-    topmod = Core.Compiler._topmod(__module__) # well, except, do not get it via CC but define it locally
+    topmod = _topmod(__module__)
     f, args, kwargs = destructure_callex(topmod, ex)
     types = Expr(:curly, :Tuple)
     out = Expr(:call, GlobalRef(Core, :invoke))
-    isempty(kwargs) || push!(out.args, Expr(:parameters, kwargs...))
-    push!(out.args, f)
+    isempty(kwargs) || push!(out.args, Expr(:parameters, Any[esc(kw) for kw in kwargs]...))
+    push!(out.args, esc(f))
     push!(out.args, types)
     for arg in args
         if isexpr(arg, :(::))
-            push!(out.args, arg.args[1])
-            push!(types.args, arg.args[2])
+            push!(out.args, esc(arg.args[1]))
+            push!(types.args, esc(arg.args[2]))
         else
-            push!(out.args, arg)
-            push!(types.args, Expr(:call, GlobalRef(Core, :Typeof), arg))
+            push!(out.args, esc(arg))
+            push!(types.args, Expr(:call, GlobalRef(Core, :Typeof), esc(arg)))
         end
     end
-    return esc(out)
+    return out
+end
+
+getglobalref(gr::GlobalRef, world::UInt) = ccall(:jl_eval_globalref, Any, (Any, UInt), gr, world)
+
+function invokelatest_gr(gr::GlobalRef, args...; kwargs...)
+    @inline
+    kwargs = merge(NamedTuple(), kwargs)
+    world = get_world_counter()
+    f = getglobalref(gr, world)
+    if isempty(kwargs)
+        return invoke_in_world(world, f, args...)
+    end
+    return invoke_in_world(world, Core.kwcall, kwargs, f, args...)
 end
 
 """
@@ -2167,22 +1378,11 @@ It also supports the following syntax:
 - `@invokelatest xs[i]` expands to `Base.invokelatest(getindex, xs, i)`
 - `@invokelatest xs[i] = v` expands to `Base.invokelatest(setindex!, xs, v, i)`
 
-```jldoctest
-julia> @macroexpand @invokelatest f(x; kw=kwv)
-:(Base.invokelatest(f, x; kw = kwv))
-
-julia> @macroexpand @invokelatest x.f
-:(Base.invokelatest(Base.getproperty, x, :f))
-
-julia> @macroexpand @invokelatest x.f = v
-:(Base.invokelatest(Base.setproperty!, x, :f, v))
-
-julia> @macroexpand @invokelatest xs[i]
-:(Base.invokelatest(Base.getindex, xs, i))
-
-julia> @macroexpand @invokelatest xs[i] = v
-:(Base.invokelatest(Base.setindex!, xs, v, i))
-```
+!!! note
+    If `f` is a global, it will be resolved consistently
+    in the (latest) world as the call target. However, all other arguments
+    (as well as `f` itself if it is not a literal global) will be evaluated
+    in the current world age.
 
 !!! compat "Julia 1.7"
     This macro requires Julia 1.7 or later.
@@ -2194,13 +1394,47 @@ julia> @macroexpand @invokelatest xs[i] = v
     The additional `x.f` and `xs[i]` syntax requires Julia 1.10.
 """
 macro invokelatest(ex)
-    topmod = Core.Compiler._topmod(__module__) # well, except, do not get it via CC but define it locally
+    topmod = _topmod(__module__)
     f, args, kwargs = destructure_callex(topmod, ex)
-    out = Expr(:call, GlobalRef(Base, :invokelatest))
-    isempty(kwargs) || push!(out.args, Expr(:parameters, kwargs...))
-    push!(out.args, f)
-    append!(out.args, args)
-    return esc(out)
+
+    if !isa(f, GlobalRef)
+        out_f = Expr(:call, GlobalRef(Base, :invokelatest))
+        isempty(kwargs) || push!(out_f.args, Expr(:parameters, Any[esc(kw) for kw in kwargs]...))
+
+        if isexpr(f, :(.))
+            s = :s
+            check = quote
+                $s = $(esc(f.args[1]))
+                isa($s, Module)
+            end
+            push!(out_f.args, Expr(:(.), s, esc(f.args[2])))
+        else
+            push!(out_f.args, esc(f))
+        end
+        append!(out_f.args, Any[esc(arg) for arg in args])
+
+        if @isdefined(s)
+            f = :(GlobalRef($s, $(esc(f.args[2]))))
+        elseif isa(f, Symbol)
+            check = esc(:($(Expr(:isglobal, f))))
+        else
+            return out_f
+        end
+    end
+
+    out_gr = Expr(:call, GlobalRef(Base, :invokelatest_gr))
+    isempty(kwargs) || push!(out_gr.args, Expr(:parameters, Any[esc(kw) for kw in kwargs]...))
+    push!(out_gr.args, isa(f, GlobalRef) ? QuoteNode(f) :
+                       isa(f, Symbol) ? QuoteNode(GlobalRef(__module__, f)) :
+                       f)
+    append!(out_gr.args, Any[esc(arg) for arg in args])
+
+    if isa(f, GlobalRef)
+        return out_gr
+    end
+
+    # f::Symbol
+    return :($check ? $out_gr : $out_f)
 end
 
 function destructure_callex(topmod::Module, @nospecialize(ex))
@@ -2251,3 +1485,20 @@ function destructure_callex(topmod::Module, @nospecialize(ex))
     end
     return f, args, kwargs
 end
+
+"""
+    Base.drop_all_caches()
+
+Internal function to drop all native code caches and increment world age.
+This invalidates all compiled code as if a method was added that intersects
+with all existing methods.
+"""
+function drop_all_caches()
+    ccall(:jl_drop_all_caches, Cvoid, ())
+
+    # Reset loading.jl world age so that loading code is regenerated
+    _require_world_age[] = typemax(UInt)
+
+    # Call Base.Compiler.activate!() after dropping caching to activate coverage of the Compiler code itself
+    Base.Compiler.activate!()
+end
diff --git a/base/refpointer.jl b/base/refpointer.jl
index ad74763ff8286..c5968934aa748 100644
--- a/base/refpointer.jl
+++ b/base/refpointer.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+import Core: Ref
+
 """
     Ref{T}
 
@@ -42,9 +44,18 @@ A `C_NULL` instance of `Ptr` can be passed to a `ccall` `Ref` argument to initia
 # Examples
 
 ```jldoctest
-julia> Ref(5)
+julia> r = Ref(5) # Create a Ref with an initial value
 Base.RefValue{Int64}(5)
 
+julia> r[] # Getting a value from a Ref
+5
+
+julia> r[] = 7 # Storing a new value in a Ref
+7
+
+julia> r # The Ref now contains 7
+Base.RefValue{Int64}(7)
+
 julia> isa.(Ref([1,2,3]), [Array, Dict, Int]) # Treat reference values as scalar during broadcasting
 3-element BitVector:
  1
@@ -65,9 +76,6 @@ julia> Ref{Int64}()[]; # A reference to a bitstype refers to an undetermined val
 
 julia> isassigned(Ref{Int64}()) # A reference to a bitstype is always assigned
 true
-
-julia> Ref{Int64}(0)[] == 0 # Explicitly give a value for a bitstype reference
-true
 ```
 """
 Ref
@@ -142,13 +150,14 @@ if is_primary_base_module
     Ref(x::Ptr{T}, i::Integer) where {T} = x + (i - 1) * Core.sizeof(T)
 
     # convert Arrays to pointer arrays for ccall
-    function Ref{P}(a::Array{<:Union{Ptr,Cwstring,Cstring}}) where P<:Union{Ptr,Cwstring,Cstring}
-        return RefArray(a) # effectively a no-op
-    end
+    # For example `["a", "b"]` to Ptr{Cstring} for `char **argv`
     function Ref{P}(a::Array{T}) where P<:Union{Ptr,Cwstring,Cstring} where T
-        if (!isbitstype(T) && T <: eltype(P))
+        if P == T
+            return getfield(a, :ref)
+        elseif (isbitstype(T) ? T <: Ptr || T <: Union{Cwstring,Cstring} : T <: eltype(P))
             # this Array already has the right memory layout for the requested Ref
-            return RefArray(a,1,false) # root something, so that this function is type-stable
+            # but the wrong eltype for the constructor
+            return RefArray{P,typeof(a),Nothing}(a, 1, nothing) # effectively a no-op
         else
             ptrs = Vector{P}(undef, length(a)+1)
             roots = Vector{Any}(undef, length(a))
@@ -158,14 +167,14 @@ if is_primary_base_module
                 roots[i] = root
             end
             ptrs[length(a)+1] = C_NULL
-            return RefArray(ptrs,1,roots)
+            return RefArray{P,typeof(ptrs),typeof(roots)}(ptrs, 1, roots)
         end
     end
     Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
 end
 
-cconvert(::Type{Ptr{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
-cconvert(::Type{Ref{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
+cconvert(::Type{Ptr{P}}, a::Array{<:Union{Ptr,Cwstring,Cstring}}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
+cconvert(::Type{Ref{P}}, a::Array{<:Union{Ptr,Cwstring,Cstring}}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
 cconvert(::Type{Ptr{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
 cconvert(::Type{Ref{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
 
diff --git a/base/refvalue.jl b/base/refvalue.jl
index 000088ff0ce76..188ab67f26a78 100644
--- a/base/refvalue.jl
+++ b/base/refvalue.jl
@@ -9,7 +9,7 @@ mutable struct RefValue{T} <: Ref{T}
 end
 RefValue(x::T) where {T} = RefValue{T}(x)
 """
-    isassigned(ref::RefValue) -> Bool
+    isassigned(ref::RefValue)::Bool
 
 Test whether the given [`Ref`](@ref) is associated with a value.
 This is always true for a [`Ref`](@ref) of a bitstype object.
@@ -46,9 +46,9 @@ function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefValue{T})
         # Instead, explicitly load the pointer from the `RefValue`,
         # which also ensures this returns same pointer as the one rooted in the `RefValue` object.
         p = atomic_pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), :monotonic)
-    end
-    if p == C_NULL
-        throw(UndefRefError())
+        if p == C_NULL
+            throw(UndefRefError())
+        end
     end
     return p
 end
diff --git a/base/regex.jl b/base/regex.jl
index 109724903823f..52db136cc6643 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -28,7 +28,7 @@ mutable struct Regex <: AbstractPattern
 
     function Regex(pattern::AbstractString, compile_options::Integer,
                    match_options::Integer)
-        pattern = String(pattern)
+        pattern = String(pattern)::String
         compile_options = UInt32(compile_options)
         match_options = UInt32(match_options)
         if (compile_options & ~PCRE.COMPILE_MASK) != 0
@@ -69,11 +69,11 @@ Regex(pattern::AbstractString) = Regex(pattern, DEFAULT_COMPILER_OPTS, DEFAULT_M
 
 function compile(regex::Regex)
     if regex.regex == C_NULL
-        if PCRE.PCRE_COMPILE_LOCK === nothing
+        if !isdefinedglobal(PCRE, :PCRE_COMPILE_LOCK)
             regex.regex = PCRE.compile(regex.pattern, regex.compile_options)
             PCRE.jit_compile(regex.regex)
         else
-            l = PCRE.PCRE_COMPILE_LOCK::Threads.SpinLock
+            l = PCRE.PCRE_COMPILE_LOCK
             lock(l)
             try
                 if regex.regex == C_NULL
@@ -185,11 +185,16 @@ If a group was not captured, `nothing` will be yielded instead of a substring.
 
 Methods that accept a `RegexMatch` object are defined for [`iterate`](@ref),
 [`length`](@ref), [`eltype`](@ref), [`keys`](@ref keys(::RegexMatch)), [`haskey`](@ref), and
-[`getindex`](@ref), where keys are the the names or numbers of a capture group.
+[`getindex`](@ref), where keys are the names or numbers of a capture group.
 See [`keys`](@ref keys(::RegexMatch)) for more information.
 
+`Tuple(m)`, `NamedTuple(m)`, and `Dict(m)` can be used to construct more flexible collection types from `RegexMatch` objects.
+
+!!! compat "Julia 1.11"
+    Constructing NamedTuples and Dicts from RegexMatches requires Julia 1.11
+
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\S+\$"m
 julia> m = match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30 in the morning")
 RegexMatch("11:30", hour="11", minute="30", 3=nothing)
 
@@ -210,18 +215,28 @@ julia> hr, min, ampm = m; # destructure capture groups by iteration
 
 julia> hr
 "11"
+
+julia> Dict(m)
+Dict{Any, Union{Nothing, SubString{String}}} with 3 entries:
+  "hour"   => "11"
+  3        => nothing
+  "minute" => "30"
 ```
 """
-struct RegexMatch <: AbstractMatch
-    match::SubString{String}
-    captures::Vector{Union{Nothing,SubString{String}}}
+struct RegexMatch{S<:AbstractString} <: AbstractMatch
+    match::SubString{S}
+    captures::Vector{Union{Nothing,SubString{S}}}
     offset::Int
     offsets::Vector{Int}
     regex::Regex
 end
 
+RegexMatch(match::SubString{S}, captures::Vector{Union{Nothing,SubString{S}}},
+           offset::Union{Int, UInt}, offsets::Vector{Int}, regex::Regex) where {S<:AbstractString} =
+    RegexMatch{S}(match, captures, offset, offsets, regex)
+
 """
-    keys(m::RegexMatch) -> Vector
+    keys(m::RegexMatch)::Vector
 
 Return a vector of keys for all capture groups of the underlying regex.
 A key is included even if the capture group fails to match.
@@ -245,7 +260,7 @@ julia> keys(match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30"))
 function keys(m::RegexMatch)
     idx_to_capture_name = PCRE.capture_names(m.regex.regex)
     return map(eachindex(m.captures)) do i
-        # If the capture group is named, return it's name, else return it's index
+        # If the capture group is named, return its name, else return its index
         get(idx_to_capture_name, i, i)
     end
 end
@@ -285,6 +300,9 @@ iterate(m::RegexMatch, args...) = iterate(m.captures, args...)
 length(m::RegexMatch) = length(m.captures)
 eltype(m::RegexMatch) = eltype(m.captures)
 
+NamedTuple(m::RegexMatch) = NamedTuple{Symbol.(Tuple(keys(m)))}(values(m))
+Dict(m::RegexMatch) = Dict(pairs(m))
+
 function occursin(r::Regex, s::AbstractString; offset::Integer=0)
     compile(r)
     return PCRE.exec_r(r.regex, String(s), offset, r.match_options)
@@ -306,7 +324,7 @@ Return `true` if `s` starts with the regex pattern, `prefix`.
     `match_option` to PCRE. If compile time is amortized,
     `occursin(r"^...", s)` is faster than `startswith(s, r"...")`.
 
-See also [`occursin`](@ref) and [`endswith`](@ref).
+See also [`occursin`](@ref), [`endswith`](@ref), [`match`](@ref)
 
 !!! compat "Julia 1.2"
     This method requires at least Julia 1.2.
@@ -338,7 +356,7 @@ Return `true` if `s` ends with the regex pattern, `suffix`.
     `match_option` to PCRE. If compile time is amortized,
     `occursin(r"...\$", s)` is faster than `endswith(s, r"...")`.
 
-See also [`occursin`](@ref) and [`startswith`](@ref).
+See also [`occursin`](@ref), [`startswith`](@ref), [`match`](@ref)
 
 !!! compat "Julia 1.2"
     This method requires at least Julia 1.2.
@@ -377,9 +395,13 @@ end
     match(r::Regex, s::AbstractString[, idx::Integer[, addopts]])
 
 Search for the first match of the regular expression `r` in `s` and return a [`RegexMatch`](@ref)
-object containing the match, or nothing if the match failed. The matching substring can be
-retrieved by accessing `m.match` and the captured sequences can be retrieved by accessing
-`m.captures` The optional `idx` argument specifies an index at which to start the search.
+object containing the match, or nothing if the match failed.
+The optional `idx` argument specifies an index at which to start the search.
+The matching substring can be retrieved by accessing `m.match`, the captured sequences can be retrieved by accessing `m.captures`.
+The resulting [`RegexMatch`](@ref) object can be used to construct other collections: e.g. `Tuple(m)`, `NamedTuple(m)`.
+
+!!! compat "Julia 1.11"
+    Constructing NamedTuples and Dicts requires Julia 1.11
 
 # Examples
 ```jldoctest
@@ -399,6 +421,9 @@ julia> m.match
 julia> match(rx, "cabac", 3) === nothing
 true
 ```
+# See also
+[`eachmatch`](@ref), [`occursin`](@ref), [`findfirst`](@ref)
+
 """
 function match end
 
@@ -423,15 +448,42 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
     return result
 end
 
+function _annotatedmatch(m::RegexMatch{S}, str::AnnotatedString{S}) where {S<:AbstractString}
+    RegexMatch{AnnotatedString{S}}(
+        (@inbounds SubString{AnnotatedString{S}}(
+            str, m.match.offset, m.match.ncodeunits, Val(:noshift))),
+        Union{Nothing,SubString{AnnotatedString{S}}}[
+            if !isnothing(cap)
+                (@inbounds SubString{AnnotatedString{S}}(
+                    str, cap.offset, cap.ncodeunits, Val(:noshift)))
+            end for cap in m.captures],
+        m.offset, m.offsets, m.regex)
+end
+
+function match(re::Regex, str::AnnotatedString)
+    m = match(re, str.string)
+    if !isnothing(m)
+        _annotatedmatch(m, str)
+    end
+end
+
+function match(re::Regex, str::AnnotatedString, idx::Integer, add_opts::UInt32=UInt32(0))
+    m = match(re, str.string, idx, add_opts)
+    if !isnothing(m)
+        _annotatedmatch(m, str)
+    end
+end
+
 match(r::Regex, s::AbstractString) = match(r, s, firstindex(s))
 match(r::Regex, s::AbstractString, i::Integer) = throw(ArgumentError(
-    "regex matching is only available for the String type; use String(s) to convert"
+    "regex matching is only available for the String and AnnotatedString types; use String(s) to convert"
 ))
 
 findnext(re::Regex, str::Union{String,SubString}, idx::Integer) = _findnext_re(re, str, idx, C_NULL)
 
 # TODO: return only start index and update deprecation
-function _findnext_re(re::Regex, str::Union{String,SubString}, idx::Integer, match_data::Ptr{Cvoid})
+# duck-type str so that external UTF-8 string packages like StringViews can hook in
+function _findnext_re(re::Regex, str, idx::Integer, match_data::Ptr{Cvoid})
     if idx > nextind(str,lastindex(str))
         throw(BoundsError())
     end
@@ -509,6 +561,9 @@ julia> count(r"a(.)a", "cabacabac", overlap=true)
 julia> count(r"a(.)a", "cabacabac")
 2
 ```
+# See also
+[`eachmatch`](@ref), [`occursin`](@ref), [`findall`](@ref)
+
 """
 function count(t::Union{AbstractChar,AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
     n = 0
@@ -594,17 +649,17 @@ replace_err(repl) = error("Bad replacement string: $repl")
 function _write_capture(io::IO, group::Int, str, r, re::RegexAndMatchData)
     len = PCRE.substring_length_bynumber(re.match_data, group)
     # in the case of an optional group that doesn't match, len == 0
-    len == 0 && return
+    len == 0 && return len
     ensureroom(io, len+1)
     PCRE.substring_copy_bynumber(re.match_data, group,
         pointer(io.data, io.ptr), len+1)
     io.ptr += len
     io.size = max(io.size, io.ptr - 1)
-    nothing
+    return len
 end
 function _write_capture(io::IO, group::Int, str, r, re)
     group == 0 || replace_err("pattern is not a Regex")
-    return print(io, SubString(str, r))
+    return write(io, SubString(str, r))
 end
 
 
@@ -618,12 +673,13 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
     repl = unescape_string(repl_s.string, KEEP_ESC)
     i = firstindex(repl)
     e = lastindex(repl)
+    nb = 0
     while i <= e
         if repl[i] == SUB_CHAR
             next_i = nextind(repl, i)
             next_i > e && replace_err(repl)
             if repl[next_i] == SUB_CHAR
-                write(io, SUB_CHAR)
+                nb += write(io, SUB_CHAR)
                 i = nextind(repl, next_i)
             elseif isdigit(repl[next_i])
                 group = parse(Int, repl[next_i])
@@ -636,7 +692,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
                         break
                     end
                 end
-                _write_capture(io, group, str, r, re)
+                nb += _write_capture(io, group, str, r, re)
             elseif repl[next_i] == GROUP_CHAR
                 i = nextind(repl, next_i)
                 if i > e || repl[i] != LBRACKET
@@ -658,30 +714,32 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
                 else
                     group = -1
                 end
-                _write_capture(io, group, str, r, re)
+                nb += _write_capture(io, group, str, r, re)
                 i = nextind(repl, i)
             else
                 replace_err(repl)
             end
         else
-            write(io, repl[i])
+            nb += write(io, repl[i])
             i = nextind(repl, i)
         end
     end
+    nb
 end
 
-struct RegexMatchIterator
+struct RegexMatchIterator{S <: AbstractString}
     regex::Regex
-    string::String
+    string::S
     overlap::Bool
 
-    function RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false)
-        new(regex, string, ovr)
-    end
+    RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false) =
+        new{String}(regex, String(string), ovr)
+    RegexMatchIterator(regex::Regex, string::AnnotatedString, ovr::Bool=false) =
+        new{AnnotatedString{String}}(regex, AnnotatedString(String(string.string), string.annotations), ovr)
 end
 compile(itr::RegexMatchIterator) = (compile(itr.regex); itr)
-eltype(::Type{RegexMatchIterator}) = RegexMatch
-IteratorSize(::Type{RegexMatchIterator}) = SizeUnknown()
+eltype(::Type{<:RegexMatchIterator}) = RegexMatch
+IteratorSize(::Type{<:RegexMatchIterator}) = SizeUnknown()
 
 function iterate(itr::RegexMatchIterator, (offset,prevempty)=(1,false))
     opts_nonempty = UInt32(PCRE.ANCHORED | PCRE.NOTEMPTY_ATSTART)
@@ -726,7 +784,7 @@ julia> rx = r"a.a"
 r"a.a"
 
 julia> m = eachmatch(rx, "a1a2a3a")
-Base.RegexMatchIterator(r"a.a", "a1a2a3a", false)
+Base.RegexMatchIterator{String}(r"a.a", "a1a2a3a", false)
 
 julia> collect(m)
 2-element Vector{RegexMatch}:
@@ -739,6 +797,9 @@ julia> collect(eachmatch(rx, "a1a2a3a", overlap = true))
  RegexMatch("a2a")
  RegexMatch("a3a")
 ```
+# See also
+[`match`](@ref), [`findall`](@ref), [`count`](@ref)
+
 """
 eachmatch(re::Regex, str::AbstractString; overlap = false) =
     RegexMatchIterator(re, str, overlap)
@@ -752,7 +813,7 @@ end
 ## hash ##
 const hashre_seed = UInt === UInt64 ? 0x67e195eb8555e72d : 0xe32373e4
 function hash(r::Regex, h::UInt)
-    h += hashre_seed
+    h ⊻= hashre_seed
     h = hash(r.pattern, h)
     h = hash(r.compile_options, h)
     h = hash(r.match_options, h)
@@ -761,8 +822,8 @@ end
 ## String operations ##
 
 """
-    *(s::Regex, t::Union{Regex,AbstractString,AbstractChar}) -> Regex
-    *(s::Union{Regex,AbstractString,AbstractChar}, t::Regex) -> Regex
+    *(s::Regex, t::Union{Regex,AbstractString,AbstractChar})::Regex
+    *(s::Union{Regex,AbstractString,AbstractChar}, t::Regex)::Regex
 
 Concatenate regexes, strings and/or characters, producing a [`Regex`](@ref).
 String and character arguments must be matched exactly in the resulting regex,
@@ -844,7 +905,7 @@ end
 
 
 """
-    ^(s::Regex, n::Integer) -> Regex
+    ^(s::Regex, n::Integer)::Regex
 
 Repeat a regex `n` times.
 
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index 74b888a39fd76..39fee20a31227 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -13,15 +13,16 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
 
     function throwbits(S::Type, T::Type, U::Type)
         @noinline
-        throw(ArgumentError("cannot reinterpret `$(S)` as `$(T)`, type `$(U)` is not a bits type"))
+        throw(ArgumentError(LazyString("cannot reinterpret `", S, "` as `", T, "`, type `", U, "` is not a bits type")))
     end
     function throwsize0(S::Type, T::Type, msg)
         @noinline
-        throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a $msg size"))
+        throw(ArgumentError(LazyString("cannot reinterpret a zero-dimensional `", S, "` array to `", T,
+            "` which is of a ", msg, " size")))
     end
     function throwsingleton(S::Type, T::Type)
         @noinline
-        throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` which is a singleton type"))
+        throw(ArgumentError(LazyString("cannot reinterpret a `", S, "` array to `", T, "` which is a singleton type")))
     end
 
     global reinterpret
@@ -46,18 +47,35 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
      3 + 4im
      5 + 6im
     ```
+
+    If the location of padding bits does not line up between `T` and `eltype(A)`, the resulting array will be
+    read-only or write-only, to prevent invalid bits from being written to or read from, respectively.
+
+    ```jldoctest
+    julia> a = reinterpret(Tuple{UInt8, UInt32}, UInt32[1, 2])
+    1-element reinterpret(Tuple{UInt8, UInt32}, ::Vector{UInt32}):
+     (0x01, 0x00000002)
+
+    julia> a[1] = 3
+    ERROR: Padding of type Tuple{UInt8, UInt32} is not compatible with type UInt32.
+
+    julia> b = reinterpret(UInt32, Tuple{UInt8, UInt32}[(0x01, 0x00000002)]); # showing will error
+
+    julia> b[1]
+    ERROR: Padding of type UInt32 is not compatible with type Tuple{UInt8, UInt32}.
+    ```
     """
     function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
         function thrownonint(S::Type, T::Type, dim)
             @noinline
-            throw(ArgumentError("""
-                cannot reinterpret an `$(S)` array to `$(T)` whose first dimension has size `$(dim)`.
-                The resulting array would have non-integral first dimension.
-                """))
+            throw(ArgumentError(LazyString(
+                "cannot reinterpret an `", S, "` array to `", T, "` whose first dimension has size `", dim,
+                "`. The resulting array would have a non-integral first dimension.")))
         end
         function throwaxes1(S::Type, T::Type, ax1)
             @noinline
-            throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` when the first axis is $ax1. Try reshaping first."))
+            throw(ArgumentError(LazyString("cannot reinterpret a `", S, "` array to `", T,
+                "` when the first axis is ", ax1, ". Try reshaping first.")))
         end
         isbitstype(T) || throwbits(S, T, T)
         isbitstype(S) || throwbits(S, T, S)
@@ -82,15 +100,19 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     function reinterpret(::typeof(reshape), ::Type{T}, a::A) where {T,S,A<:AbstractArray{S}}
         function throwintmult(S::Type, T::Type)
             @noinline
-            throw(ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got $(sizeof(T))) and `sizeof(eltype(a))` (got $(sizeof(S))) be an integer multiple of the other"))
+            throw(ArgumentError(LazyString("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got ",
+                sizeof(T), ") and `sizeof(eltype(a))` (got ", sizeof(S), ") be an integer multiple of the other")))
         end
         function throwsize1(a::AbstractArray, T::Type)
             @noinline
-            throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $(eltype(a)) requires that `axes(a, 1)` (got $(axes(a, 1))) be equal to 1:$(sizeof(T) ÷ sizeof(eltype(a))) (from the ratio of element sizes)"))
+            throw(ArgumentError(LazyString("`reinterpret(reshape, ", T, ", a)` where `eltype(a)` is ", eltype(a),
+                " requires that `axes(a, 1)` (got ", axes(a, 1), ") be equal to 1:",
+                sizeof(T) ÷ sizeof(eltype(a)), " (from the ratio of element sizes)")))
         end
         function throwfromsingleton(S, T)
             @noinline
-            throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $S requires that $T be a singleton type, since $S is one"))
+            throw(ArgumentError(LazyString("`reinterpret(reshape, ", T, ", a)` where `eltype(a)` is ", S,
+                " requires that ", T, " be a singleton type, since ", S, " is one")))
         end
         isbitstype(T) || throwbits(S, T, T)
         isbitstype(S) || throwbits(S, T, S)
@@ -199,6 +221,7 @@ end
 _checkcontiguous(::Type{Bool}, A::ReinterpretArray) = _checkcontiguous(Bool, parent(A))
 
 similar(a::ReinterpretArray, T::Type, d::Dims) = similar(a.parent, T, d)
+similar(::Type{TA}, dims::Dims) where {T,N,O,P,TA<:ReinterpretArray{T,N,O,P}} = similar(P, dims)
 
 function check_readable(a::ReinterpretArray{T, N, S} where N) where {T,S}
     # See comment in check_writable
@@ -249,7 +272,8 @@ SCartesianIndices2{K}(indices2::AbstractUnitRange{Int}) where {K} = (@assert K::
 eachindex(::IndexSCartesian2{K}, A::ReshapedReinterpretArray) where {K} = SCartesianIndices2{K}(eachindex(IndexLinear(), parent(A)))
 @inline function eachindex(style::IndexSCartesian2{K}, A::AbstractArray, B::AbstractArray...) where {K}
     iter = eachindex(style, A)
-    _all_match_first(C->eachindex(style, C), iter, B...) || throw_eachindex_mismatch_indices(IndexSCartesian2{K}(), axes(A), axes.(B)...)
+    itersBs = map(C->eachindex(style, C), B)
+    all(==(iter), itersBs) || throw_eachindex_mismatch_indices("axes", axes(A), map(axes, B)...)
     return iter
 end
 
@@ -288,16 +312,17 @@ SimdLoop.simd_inner_length(::SCartesianIndices2{K}, ::Any) where K = K
     SCartesianIndex2{K}(I1+1, Ilast)
 end
 
+_maybe_reshape(::IndexSCartesian2, A::AbstractArray, I...) = _maybe_reshape(IndexCartesian(), A, I...)
 _maybe_reshape(::IndexSCartesian2, A::ReshapedReinterpretArray, I...) = A
 
 # fallbacks
-function _getindex(::IndexSCartesian2, A::AbstractArray{T,N}, I::Vararg{Int, N}) where {T,N}
+function _getindex(::IndexSCartesian2, A::AbstractArray, I::Vararg{Int, N}) where {N}
     @_propagate_inbounds_meta
-    getindex(A, I...)
+    _getindex(IndexCartesian(), A, I...)
 end
-function _setindex!(::IndexSCartesian2, A::AbstractArray{T,N}, v, I::Vararg{Int, N}) where {T,N}
+function _setindex!(::IndexSCartesian2, A::AbstractArray, v, I::Vararg{Int, N}) where {N}
     @_propagate_inbounds_meta
-    setindex!(A, v, I...)
+    _setindex!(IndexCartesian(), A, v, I...)
 end
 # fallbacks for array types that use "pass-through" indexing (e.g., `IndexStyle(A) = IndexStyle(parent(A))`)
 # but which don't handle SCartesianIndex2
@@ -306,11 +331,25 @@ function _getindex(::IndexSCartesian2, A::AbstractArray{T,N}, ind::SCartesianInd
     J = _ind2sub(tail(axes(A)), ind.j)
     getindex(A, ind.i, J...)
 end
+
+function _getindex(::IndexSCartesian2{2}, A::AbstractArray{T,2}, ind::SCartesianIndex2) where {T}
+    @_propagate_inbounds_meta
+    J = first(axes(A, 2)) + ind.j - 1
+    getindex(A, ind.i, J)
+end
+
 function _setindex!(::IndexSCartesian2, A::AbstractArray{T,N}, v, ind::SCartesianIndex2) where {T,N}
     @_propagate_inbounds_meta
     J = _ind2sub(tail(axes(A)), ind.j)
     setindex!(A, v, ind.i, J...)
 end
+
+function _setindex!(::IndexSCartesian2{2}, A::AbstractArray{T,2}, v, ind::SCartesianIndex2) where {T}
+    @_propagate_inbounds_meta
+    J = first(axes(A, 2)) + ind.j - 1
+    setindex!(A, v, ind.i, J)
+end
+
 eachindex(style::IndexSCartesian2, A::AbstractArray) = eachindex(style, parent(A))
 
 ## AbstractArray interface
@@ -350,9 +389,10 @@ axes(a::NonReshapedReinterpretArray{T,0}) where {T} = ()
 has_offset_axes(a::ReinterpretArray) = has_offset_axes(a.parent)
 
 elsize(::Type{<:ReinterpretArray{T}}) where {T} = sizeof(T)
+cconvert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = cconvert(Ptr{S}, a.parent)
 unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent))
 
-@inline @propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S}
+@propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S}
     if isprimitivetype(T) && isprimitivetype(S)
         reinterpret(T, a.parent[])
     else
@@ -360,34 +400,53 @@ unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} =
     end
 end
 
-@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[firstindex(a)]
+check_ptr_indexable(a::ReinterpretArray, sz = elsize(a)) = check_ptr_indexable(parent(a), sz)
+check_ptr_indexable(a::ReshapedArray, sz) = check_ptr_indexable(parent(a), sz)
+check_ptr_indexable(a::FastContiguousSubArray, sz) = check_ptr_indexable(parent(a), sz)
+check_ptr_indexable(a::Array, sz) = sizeof(eltype(a)) !== sz
+check_ptr_indexable(a::Memory, sz) = true
+check_ptr_indexable(a::AbstractArray, sz) = false
 
-@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
+@propagate_inbounds getindex(a::ReshapedReinterpretArray{T,0}) where {T} = a[firstindex(a)]
+
+@propagate_inbounds isassigned(a::ReinterpretArray, inds::Integer...) = checkbounds(Bool, a, inds...) && (check_ptr_indexable(a) || _isassigned_ra(a, inds...))
+@propagate_inbounds isassigned(a::ReinterpretArray, inds::SCartesianIndex2) = isassigned(a.parent, inds.j)
+@propagate_inbounds _isassigned_ra(a::ReinterpretArray, inds...) = true # that is not entirely true, but computing exactly which indexes will be accessed in the parent requires a lot of duplication from the _getindex_ra code
+
+@propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
     check_readable(a)
+    check_ptr_indexable(a) && return _getindex_ptr(a, inds...)
     _getindex_ra(a, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, i::Int) where {T,N,S}
+@propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, i::Int) where {T,N,S}
     check_readable(a)
+    check_ptr_indexable(a) && return _getindex_ptr(a, i)
     if isa(IndexStyle(a), IndexLinear)
         return _getindex_ra(a, i, ())
     end
     # Convert to full indices here, to avoid needing multiple conversions in
     # the loop in _getindex_ra
     inds = _to_subscript_indices(a, i)
-    isempty(inds) ? _getindex_ra(a, 1, ()) : _getindex_ra(a, inds[1], tail(inds))
+    isempty(inds) ? _getindex_ra(a, firstindex(a), ()) : _getindex_ra(a, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function getindex(a::ReshapedReinterpretArray{T,N,S}, ind::SCartesianIndex2) where {T,N,S}
+@propagate_inbounds function getindex(a::ReshapedReinterpretArray{T,N,S}, ind::SCartesianIndex2) where {T,N,S}
     check_readable(a)
     s = Ref{S}(a.parent[ind.j])
-    GC.@preserve s begin
-        tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
-        return unsafe_load(tptr, ind.i)
-    end
+    tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
+    GC.@preserve s return unsafe_load(tptr, ind.i)
 end
 
-@inline @propagate_inbounds function _getindex_ra(a::NonReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
+@inline function _getindex_ptr(a::ReinterpretArray{T}, inds...) where {T}
+    @boundscheck checkbounds(a, inds...)
+    li = _to_linear_index(a, inds...)
+    ap = cconvert(Ptr{T}, a)
+    p = unsafe_convert(Ptr{T}, ap) + sizeof(T) * (li - 1)
+    GC.@preserve ap return unsafe_load(p)
+end
+
+@propagate_inbounds function _getindex_ra(a::NonReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
         if issingletontype(T) # singleton types
@@ -443,7 +502,7 @@ end
     end
 end
 
-@inline @propagate_inbounds function _getindex_ra(a::ReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
+@propagate_inbounds function _getindex_ra(a::ReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
         if issingletontype(T) # singleton types
@@ -490,31 +549,33 @@ end
     end
 end
 
-@inline @propagate_inbounds function setindex!(a::NonReshapedReinterpretArray{T,0,S}, v) where {T,S}
+@propagate_inbounds function setindex!(a::NonReshapedReinterpretArray{T,0,S}, v) where {T,S}
     if isprimitivetype(S) && isprimitivetype(T)
-        a.parent[] = reinterpret(S, v)
+        a.parent[] = reinterpret(S, convert(T, v)::T)
         return a
     end
     setindex!(a, v, firstindex(a))
 end
 
-@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = setindex!(a, v, firstindex(a))
+@propagate_inbounds setindex!(a::ReshapedReinterpretArray{T,0}, v) where {T} = setindex!(a, v, firstindex(a))
 
-@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
+@propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
     check_writable(a)
+    check_ptr_indexable(a) && return _setindex_ptr!(a, v, inds...)
     _setindex_ra!(a, v, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, i::Int) where {T,N,S}
+@propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, i::Int) where {T,N,S}
     check_writable(a)
+    check_ptr_indexable(a) && return _setindex_ptr!(a, v, i)
     if isa(IndexStyle(a), IndexLinear)
         return _setindex_ra!(a, v, i, ())
     end
     inds = _to_subscript_indices(a, i)
-    _setindex_ra!(a, v, inds[1], tail(inds))
+    isempty(inds) ? _setindex_ra!(a, v, firstindex(a), ()) : _setindex_ra!(a, v, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function setindex!(a::ReshapedReinterpretArray{T,N,S}, v, ind::SCartesianIndex2) where {T,N,S}
+@propagate_inbounds function setindex!(a::ReshapedReinterpretArray{T,N,S}, v, ind::SCartesianIndex2) where {T,N,S}
     check_writable(a)
     v = convert(T, v)::T
     s = Ref{S}(a.parent[ind.j])
@@ -526,7 +587,16 @@ end
     return a
 end
 
-@inline @propagate_inbounds function _setindex_ra!(a::NonReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
+@inline function _setindex_ptr!(a::ReinterpretArray{T}, v, inds...) where {T}
+    @boundscheck checkbounds(a, inds...)
+    li = _to_linear_index(a, inds...)
+    ap = cconvert(Ptr{T}, a)
+    p = unsafe_convert(Ptr{T}, ap) + sizeof(T) * (li - 1)
+    GC.@preserve ap unsafe_store!(p, v)
+    return a
+end
+
+@propagate_inbounds function _setindex_ra!(a::NonReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
     v = convert(T, v)::T
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
@@ -599,7 +669,7 @@ end
     return a
 end
 
-@inline @propagate_inbounds function _setindex_ra!(a::ReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
+@propagate_inbounds function _setindex_ra!(a::ReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
     v = convert(T, v)::T
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
@@ -672,7 +742,7 @@ end
 """
     CyclePadding(padding, total_size)
 
-Cylces an iterator of `Padding` structs, restarting the padding at `total_size`.
+Cycles an iterator of `Padding` structs, restarting the padding at `total_size`.
 E.g. if `padding` is all the padding in a struct and `total_size` is the total
 aligned size of that array, `CyclePadding` will correspond to the padding in an
 infinite vector of such structs.
@@ -720,7 +790,9 @@ function CyclePadding(T::DataType)
     a, s = datatype_alignment(T), sizeof(T)
     as = s + (a - (s % a)) % a
     pad = padding(T)
-    s != as && push!(pad, Padding(s, as - s))
+    if s != as
+        pad = Core.svec(pad..., Padding(s, as - s))
+    end
     CyclePadding(pad, as)
 end
 
@@ -802,8 +874,8 @@ end
     inpackedsize = packedsize(In)
     outpackedsize = packedsize(Out)
     inpackedsize == outpackedsize ||
-        throw(ArgumentError("Packed sizes of types $Out and $In do not match; got $outpackedsize \
-            and $inpackedsize, respectively."))
+        throw(ArgumentError(LazyString("Packed sizes of types ", Out, " and ", In,
+            " do not match; got ", outpackedsize, " and ", inpackedsize, ", respectively.")))
     in = Ref{In}(x)
     out = Ref{Out}()
     if struct_subpadding(Out, In)
@@ -816,25 +888,34 @@ end
         return out[]
     else
         # mismatched padding
-        GC.@preserve in out begin
-            ptr_in = unsafe_convert(Ptr{In}, in)
-            ptr_out = unsafe_convert(Ptr{Out}, out)
+        return _reinterpret_padding(Out, x)
+    end
+end
 
-            if fieldcount(In) > 0 && ispacked(Out)
-                _copytopacked!(ptr_out, ptr_in)
-            elseif fieldcount(Out) > 0 && ispacked(In)
-                _copyfrompacked!(ptr_out, ptr_in)
-            else
-                packed = Ref{NTuple{inpackedsize, UInt8}}()
-                GC.@preserve packed begin
-                    ptr_packed = unsafe_convert(Ptr{NTuple{inpackedsize, UInt8}}, packed)
-                    _copytopacked!(ptr_packed, ptr_in)
-                    _copyfrompacked!(ptr_out, ptr_packed)
-                end
+# If the code reaches this part, it needs to handle padding and is unlikely
+# to compile to a noop. Therefore, we don't forcibly inline it.
+function _reinterpret_padding(::Type{Out}, x::In) where {Out, In}
+    inpackedsize = packedsize(In)
+    in = Ref{In}(x)
+    out = Ref{Out}()
+    GC.@preserve in out begin
+        ptr_in = unsafe_convert(Ptr{In}, in)
+        ptr_out = unsafe_convert(Ptr{Out}, out)
+
+        if fieldcount(In) > 0 && ispacked(Out)
+            _copytopacked!(ptr_out, ptr_in)
+        elseif fieldcount(Out) > 0 && ispacked(In)
+            _copyfrompacked!(ptr_out, ptr_in)
+        else
+            packed = Ref{NTuple{inpackedsize, UInt8}}()
+            GC.@preserve packed begin
+                ptr_packed = unsafe_convert(Ptr{NTuple{inpackedsize, UInt8}}, packed)
+                _copytopacked!(ptr_packed, ptr_in)
+                _copyfrompacked!(ptr_out, ptr_packed)
             end
         end
-        return out[]
     end
+    return out[]
 end
 
 
diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl
index bcb47a9359392..4d2504cedf687 100644
--- a/base/reshapedarray.jl
+++ b/base/reshapedarray.jl
@@ -35,25 +35,37 @@ end
 length(R::ReshapedArrayIterator) = length(R.iter)
 eltype(::Type{<:ReshapedArrayIterator{I}}) where {I} = @isdefined(I) ? ReshapedIndex{eltype(I)} : Any
 
-## reshape(::Array, ::Dims) returns an Array, except for isbitsunion eltypes (issue #28611)
-# reshaping to same # of dimensions
-function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M}
-    throw_dmrsa(dims, len) =
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $len"))
+@noinline throw_dmrsa(dims, len) =
+    throw(DimensionMismatch(LazyString("new dimensions ", dims, " must be consistent with array length ", len)))
 
-    if prod(dims) != length(a)
+## reshape(::Array, ::Dims) returns a new Array (to avoid conditionally aliasing the structure, only the data)
+# reshaping to same # of dimensions
+@eval function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M}
+    len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
+    if len != length(a)
         throw_dmrsa(dims, length(a))
     end
-    isbitsunion(T) && return ReshapedArray(a, dims, ())
-    if N == M && dims == size(a)
-        return a
+    ref = a.ref
+    # or we could use `a = Array{T,N}(undef, ntuple(i->0, Val(N))); a.ref = ref; a.size = dims; return a` here to avoid the eval
+    return $(Expr(:new, :(Array{T,N}), :ref, :dims))
+end
+
+## reshape!(::Array, ::Dims) returns the original array, but must have the same dimensions and length as the original
+# see also resize! for a similar operation that can change the length
+function reshape!(a::Array{T,N}, dims::NTuple{N,Int}) where {T,N}
+    len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
+    if len != length(a)
+        throw_dmrsa(dims, length(a))
     end
-    ccall(:jl_reshape_array, Array{T,N}, (Any, Any, Any), Array{T,N}, a, dims)
+    setfield!(a, :dims, dims)
+    return a
 end
 
+
+
 """
-    reshape(A, dims...) -> AbstractArray
-    reshape(A, dims) -> AbstractArray
+    reshape(A, dims...)::AbstractArray
+    reshape(A, dims)::AbstractArray
 
 Return an array with the same data as `A`, but with different
 dimension sizes or number of dimensions. The two arrays share the same
@@ -108,26 +120,60 @@ julia> reshape(1:6, 2, 3)
 reshape
 
 reshape(parent::AbstractArray, dims::IntOrInd...) = reshape(parent, dims)
+reshape(parent::AbstractArray, shp::Tuple{Union{Integer,AbstractOneTo}, Vararg{Union{Integer,AbstractOneTo}}}) = reshape(parent, to_shape(shp))
+# legacy method for packages that specialize reshape(parent::AbstractArray, shp::Tuple{Union{Integer,OneTo,CustomAxis}, Vararg{Union{Integer,OneTo,CustomAxis}}})
+# leaving this method in ensures that Base owns the more specific method
 reshape(parent::AbstractArray, shp::Tuple{Union{Integer,OneTo}, Vararg{Union{Integer,OneTo}}}) = reshape(parent, to_shape(shp))
+reshape(parent::AbstractArray, dims::Tuple{Integer, Vararg{Integer}}) = reshape(parent, map(Int, dims))
 reshape(parent::AbstractArray, dims::Dims)        = _reshape(parent, dims)
 
 # Allow missing dimensions with Colon():
 reshape(parent::AbstractVector, ::Colon) = parent
 reshape(parent::AbstractVector, ::Tuple{Colon}) = parent
 reshape(parent::AbstractArray, dims::Int...) = reshape(parent, dims)
-reshape(parent::AbstractArray, dims::Union{Int,Colon}...) = reshape(parent, dims)
-reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims))
-@inline function _reshape_uncolon(A, dims)
-    @noinline throw1(dims) = throw(DimensionMismatch(string("new dimensions $(dims) ",
-        "may have at most one omitted dimension specified by `Colon()`")))
-    @noinline throw2(A, dims) = throw(DimensionMismatch(string("array size $(length(A)) ",
-        "must be divisible by the product of the new dimensions $dims")))
+reshape(parent::AbstractArray, dims::Integer...) = reshape(parent, dims)
+reshape(parent::AbstractArray, dims::Union{Integer,Colon}...) = reshape(parent, dims)
+reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Integer,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims))
+
+@noinline throw1(dims) = throw(DimensionMismatch(LazyString("new dimensions ", dims,
+        " may have at most one omitted dimension specified by `Colon()`")))
+@noinline throw2(lenA, dims) = throw(DimensionMismatch(string("array size ", lenA,
+    " must be divisible by the product of the new dimensions ", dims)))
+
+@inline function _reshape_uncolon(A, _dims::Tuple{Vararg{Union{Integer, Colon}}})
+    # promote the dims to `Int` at least
+    dims = map(x -> x isa Colon ? x : promote_type(typeof(x), Int)(x), _dims)
     pre = _before_colon(dims...)
     post = _after_colon(dims...)
     _any_colon(post...) && throw1(dims)
-    sz, remainder = divrem(length(A), prod(pre)*prod(post))
-    remainder == 0 || throw2(A, dims)
-    (pre..., Int(sz), post...)
+    len = length(A)
+    _reshape_uncolon_computesize(len, dims, pre, post)
+end
+@inline function _reshape_uncolon_computesize(len::Int, dims, pre::Tuple{Vararg{Int}}, post::Tuple{Vararg{Int}})
+    sz = if iszero(len)
+        0
+    else
+        let pr = Core.checked_dims(pre..., post...)  # safe product
+            quo = _reshape_uncolon_computesize_nonempty(len, dims, pr)
+            convert(Int, quo)
+        end
+    end
+    (pre..., sz, post...)
+end
+@inline function _reshape_uncolon_computesize(len, dims, pre, post)
+    pr = prod((pre..., post...))
+    sz = if iszero(len)
+        promote(len, pr)[1] # zero of the correct type
+    else
+        _reshape_uncolon_computesize_nonempty(len, dims, pr)
+    end
+    (pre..., sz, post...)
+end
+@inline function _reshape_uncolon_computesize_nonempty(len, dims, pr)
+    iszero(pr) && throw2(len, dims)
+    (quo, rem) = divrem(len, pr)
+    iszero(rem) || throw2(len, dims)
+    quo
 end
 @inline _any_colon() = false
 @inline _any_colon(dim::Colon, tail...) = true
@@ -179,7 +225,7 @@ function _reshape(parent::AbstractArray, dims::Dims)
 end
 
 @noinline function _throw_dmrs(n, str, dims)
-    throw(DimensionMismatch("parent has $n elements, which is incompatible with $str $dims"))
+    throw(DimensionMismatch("parent has $n elements, which is incompatible with $str $dims ($(prod(dims)) elements)"))
 end
 
 # Reshaping a ReshapedArray
@@ -188,10 +234,10 @@ _reshape(R::ReshapedArray, dims::Dims) = _reshape(R.parent, dims)
 
 function __reshape(p::Tuple{AbstractArray,IndexStyle}, dims::Dims)
     parent = p[1]
-    strds = front(size_to_strides(map(length, axes(parent))..., 1))
-    strds1 = map(s->max(1,Int(s)), strds)  # for resizing empty arrays
-    mi = map(SignedMultiplicativeInverse, strds1)
-    ReshapedArray(parent, dims, reverse(mi))
+    szs = front(size(parent))
+    szs1 = map(s -> max(1, Int(s)), szs) # for resizing empty arrays
+    mi = map(SignedMultiplicativeInverse, szs1)
+    ReshapedArray(parent, dims, mi)
 end
 
 function __reshape(p::Tuple{AbstractArray{<:Any,0},IndexCartesian}, dims::Dims)
@@ -207,6 +253,7 @@ end
 size(A::ReshapedArray) = A.dims
 length(A::ReshapedArray) = length(parent(A))
 similar(A::ReshapedArray, eltype::Type, dims::Dims) = similar(parent(A), eltype, dims)
+similar(::Type{TA}, dims::Dims) where {T,N,P,TA<:ReshapedArray{T,N,P}} = similar(P, dims)
 IndexStyle(::Type{<:ReshapedArrayLF}) = IndexLinear()
 parent(A::ReshapedArray) = A.parent
 parentindices(A::ReshapedArray) = map(oneto, size(parent(A)))
@@ -215,20 +262,26 @@ elsize(::Type{<:ReshapedArray{<:Any,<:Any,P}}) where {P} = elsize(P)
 
 unaliascopy(A::ReshapedArray) = typeof(A)(unaliascopy(A.parent), A.dims, A.mi)
 dataids(A::ReshapedArray) = dataids(A.parent)
+# forward the aliasing check the parent in case there are specializations
+mightalias(A::ReshapedArray, B::ReshapedArray) = mightalias(parent(A), parent(B))
+# special handling for reshaped SubArrays that dispatches to the subarray aliasing check
+mightalias(A::ReshapedArray, B::SubArray) = mightalias(parent(A), B)
+mightalias(A::SubArray, B::ReshapedArray) = mightalias(A, parent(B))
 
 @inline ind2sub_rs(ax, ::Tuple{}, i::Int) = (i,)
-@inline ind2sub_rs(ax, strds, i) = _ind2sub_rs(ax, strds, i - 1)
+@inline ind2sub_rs(ax, szs, i) = _ind2sub_rs(ax, szs, i - 1)
 @inline _ind2sub_rs(ax, ::Tuple{}, ind) = (ind + first(ax[end]),)
-@inline function _ind2sub_rs(ax, strds, ind)
-    d, r = divrem(ind, strds[1])
-    (_ind2sub_rs(front(ax), tail(strds), r)..., d + first(ax[end]))
+@inline function _ind2sub_rs(ax, szs, ind)
+    d, r = divrem(ind, szs[1])
+    (r + first(ax[1]), _ind2sub_rs(tail(ax), tail(szs), d)...)
 end
 offset_if_vec(i::Integer, axs::Tuple{<:AbstractUnitRange}) = i + first(axs[1]) - 1
 offset_if_vec(i::Integer, axs::Tuple) = i
 
 @inline function isassigned(A::ReshapedArrayLF, index::Int)
     @boundscheck checkbounds(Bool, A, index) || return false
-    @inbounds ret = isassigned(parent(A), index)
+    indexparent = index - firstindex(A) + firstindex(parent(A))
+    @inbounds ret = isassigned(parent(A), indexparent)
     ret
 end
 @inline function isassigned(A::ReshapedArray{T,N}, indices::Vararg{Int, N}) where {T,N}
@@ -241,7 +294,8 @@ end
 
 @inline function getindex(A::ReshapedArrayLF, index::Int)
     @boundscheck checkbounds(A, index)
-    @inbounds ret = parent(A)[index]
+    indexparent = index - firstindex(A) + firstindex(parent(A))
+    @inbounds ret = parent(A)[indexparent]
     ret
 end
 @inline function getindex(A::ReshapedArray{T,N}, indices::Vararg{Int,N}) where {T,N}
@@ -265,7 +319,8 @@ end
 
 @inline function setindex!(A::ReshapedArrayLF, val, index::Int)
     @boundscheck checkbounds(A, index)
-    @inbounds parent(A)[index] = val
+    indexparent = index - firstindex(A) + firstindex(parent(A))
+    @inbounds parent(A)[indexparent] = val
     val
 end
 @inline function setindex!(A::ReshapedArray{T,N}, val, indices::Vararg{Int,N}) where {T,N}
@@ -293,7 +348,8 @@ setindex!(A::ReshapedRange, val, index::ReshapedIndex) = _rs_setindex!_err()
 
 @noinline _rs_setindex!_err() = error("indexed assignment fails for a reshaped range; consider calling collect")
 
-unsafe_convert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = unsafe_convert(Ptr{T}, parent(a))
+cconvert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = cconvert(Ptr{T}, parent(a))
+unsafe_convert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = unsafe_convert(Ptr{T}, a.parent)
 
 # Add a few handy specializations to further speed up views of reshaped ranges
 const ReshapedUnitRange{T,N,A<:AbstractUnitRange} = ReshapedArray{T,N,A,Tuple{}}
@@ -304,9 +360,18 @@ compute_offset1(parent::AbstractVector, stride1::Integer, I::Tuple{ReshapedRange
     (@inline; first(I[1]) - first(axes1(I[1]))*stride1)
 substrides(strds::NTuple{N,Int}, I::Tuple{ReshapedUnitRange, Vararg{Any}}) where N =
     (size_to_strides(strds[1], size(I[1])...)..., substrides(tail(strds), tail(I))...)
-unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {T,N,P} =
-    unsafe_convert(Ptr{T}, V.parent) + (first_index(V)-1)*sizeof(T)
 
+# cconvert(::Type{<:Ptr}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {T,N,P} = V
+function unsafe_convert(::Type{Ptr{S}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {S,T,N,P}
+    parent = V.parent
+    p = cconvert(Ptr{T}, parent) # XXX: this should occur in cconvert, the result is not GC-rooted
+    Δmem = if _checkcontiguous(Bool, parent)
+        (first_index(V) - firstindex(parent)) * elsize(parent)
+    else
+        _memory_offset(parent, map(first, V.indices)...)
+    end
+    return Ptr{S}(unsafe_convert(Ptr{T}, p) + Δmem)
+end
 
 _checkcontiguous(::Type{Bool}, A::AbstractArray) = false
 # `strides(A::DenseArray)` calls `size_to_strides` by default.
diff --git a/base/rounding.jl b/base/rounding.jl
index 25cfe2dc09829..88966c82fb3a6 100644
--- a/base/rounding.jl
+++ b/base/rounding.jl
@@ -2,7 +2,7 @@
 
 module Rounding
 
-let fenv_consts = Vector{Cint}(undef, 9)
+let fenv_consts = Array{Cint,1}(undef, 9)
     ccall(:jl_get_fenv_consts, Cvoid, (Ptr{Cint},), fenv_consts)
     global const JL_FE_INEXACT = fenv_consts[1]
     global const JL_FE_UNDERFLOW = fenv_consts[2]
@@ -109,6 +109,64 @@ Rounds to nearest integer, with ties rounded toward positive infinity (Java/Java
 """
 const RoundNearestTiesUp = RoundingMode{:NearestTiesUp}()
 
+# Rounding mode predicates. TODO: better names
+
+# Overload these for other rounding modes
+rounds_to_nearest(::RoundingMode) = false
+rounds_to_nearest(::RoundingMode{:Nearest}) = true
+rounds_to_nearest(::RoundingMode{:NearestTiesUp}) = true
+rounds_to_nearest(::RoundingMode{:NearestTiesAway}) = true
+rounds_away_from_zero(::RoundingMode{:Up},   sign_bit::Bool) = !sign_bit
+rounds_away_from_zero(::RoundingMode{:Down}, sign_bit::Bool) = sign_bit
+rounds_away_from_zero(::RoundingMode{:FromZero}, ::Bool) = true
+rounds_away_from_zero(::RoundingMode{:ToZero},   ::Bool) = false
+tie_breaker_is_to_even(::RoundingMode{:Nearest}) = true
+tie_breaker_is_to_even(::RoundingMode{:NearestTiesUp}) = false
+tie_breaker_is_to_even(::RoundingMode{:NearestTiesAway}) = false
+tie_breaker_rounds_away_from_zero(::RoundingMode{:NearestTiesUp}, sign_bit::Bool) = !sign_bit
+tie_breaker_rounds_away_from_zero(::RoundingMode{:NearestTiesAway},       ::Bool) = true
+
+rounds_to_nearest(t::Tuple{Any,Bool}) = rounds_to_nearest(first(t))
+rounds_away_from_zero(t::Tuple{Any,Bool}) = rounds_away_from_zero(t...)
+tie_breaker_is_to_even(t::Tuple{Any,Bool}) = tie_breaker_is_to_even(first(t))
+tie_breaker_rounds_away_from_zero(t::Tuple{Any,Bool}) = tie_breaker_rounds_away_from_zero(t...)
+
+struct FinalBit end
+struct RoundBit end
+struct StickyBit end
+
+function correct_rounding_requires_increment(x, rounding_mode, sign_bit::Bool)
+    r = (rounding_mode, sign_bit)
+    f = let y = x
+        (z::Union{FinalBit,RoundBit,StickyBit}) -> y(z)::Bool
+    end
+    if rounds_to_nearest(r)
+        if f(RoundBit())
+            if f(StickyBit())
+                true
+            else
+                if tie_breaker_is_to_even(r)
+                    f(FinalBit())
+                else
+                    tie_breaker_rounds_away_from_zero(r)::Bool
+                end
+            end
+        else
+            false
+        end
+    else
+        if rounds_away_from_zero(r)
+            if f(RoundBit())
+                true
+            else
+                f(StickyBit())
+            end
+        else
+            false
+        end
+    end::Bool
+end
+
 to_fenv(::RoundingMode{:Nearest}) = JL_FE_TONEAREST
 to_fenv(::RoundingMode{:ToZero}) = JL_FE_TOWARDZERO
 to_fenv(::RoundingMode{:Up}) = JL_FE_UPWARD
@@ -223,9 +281,20 @@ function _convert_rounding(::Type{T}, x::Real, r::RoundingMode{:ToZero}) where T
         y < x ? nextfloat(y) : y
     end
 end
+function _convert_rounding(::Type{T}, x::Real, r::RoundingMode{:FromZero}) where T<:AbstractFloat
+    y = convert(T,x)::T
+    if x < 0.0
+        y > x ? prevfloat(y) : y
+    else
+        y < x ? nextfloat(y) : y
+    end
+end
+
+
+# Default definitions
 
 """
-    set_zero_subnormals(yes::Bool) -> Bool
+    set_zero_subnormals(yes::Bool)::Bool
 
 If `yes` is `false`, subsequent floating-point operations follow rules for IEEE arithmetic
 on subnormal values ("denormals"). Otherwise, floating-point operations are permitted (but
@@ -242,7 +311,7 @@ break identities such as `(x-y==0) == (x==y)`.
 set_zero_subnormals(yes::Bool) = ccall(:jl_set_zero_subnormals,Int32,(Int8,),yes)==0
 
 """
-    get_zero_subnormals() -> Bool
+    get_zero_subnormals()::Bool
 
 Return `false` if operations on subnormal floating-point values ("denormals") obey rules
 for IEEE arithmetic, and `true` if they might be converted to zeros.
@@ -254,3 +323,169 @@ for IEEE arithmetic, and `true` if they might be converted to zeros.
 get_zero_subnormals() = ccall(:jl_get_zero_subnormals,Int32,())!=0
 
 end #module
+using .Rounding
+
+"""
+    round([T,] x, [r::RoundingMode])
+    round(x, [r::RoundingMode]; digits::Integer=0, base = 10)
+    round(x, [r::RoundingMode]; sigdigits::Integer, base = 10)
+
+Rounds the number `x`.
+
+Without keyword arguments, `x` is rounded to an integer value, returning a value of type
+`T`, or of the same type of `x` if no `T` is provided. An [`InexactError`](@ref) will be
+thrown if the value is not representable by `T`, similar to [`convert`](@ref).
+
+If the `digits` keyword argument is provided, it rounds to the specified number of digits
+after the decimal place (or before if `digits` is negative), in base `base`.
+
+If the `sigdigits` keyword argument is provided, it rounds to the specified number of
+significant digits, in base `base`.
+
+The [`RoundingMode`](@ref) `r` controls the direction of the rounding; the default is
+[`RoundNearest`](@ref), which rounds to the nearest integer, with ties (fractional values
+of 0.5) being rounded to the nearest even integer. Note that `round` may give incorrect
+results if the global rounding mode is changed (see [`rounding`](@ref)).
+
+When rounding to a floating point type, will round to integers representable by that type
+(and Inf) rather than true integers. Inf is treated as one ulp greater than the
+`floatmax(T)` for purposes of determining "nearest", similar to [`convert`](@ref).
+
+# Examples
+```jldoctest
+julia> round(1.7)
+2.0
+
+julia> round(Int, 1.7)
+2
+
+julia> round(1.5)
+2.0
+
+julia> round(2.5)
+2.0
+
+julia> round(pi; digits=2)
+3.14
+
+julia> round(pi; digits=3, base=2)
+3.125
+
+julia> round(123.456; sigdigits=2)
+120.0
+
+julia> round(357.913; sigdigits=4, base=2)
+352.0
+
+julia> round(Float16, typemax(UInt128))
+Inf16
+
+julia> floor(Float16, typemax(UInt128))
+Float16(6.55e4)
+```
+
+!!! note
+    Rounding to specified digits in bases other than 2 can be inexact when
+    operating on binary floating point numbers. For example, the [`Float64`](@ref)
+    value represented by `1.15` is actually *less* than 1.15, yet will be
+    rounded to 1.2. For example:
+
+    ```jldoctest
+    julia> x = 1.15
+    1.15
+
+    julia> big(1.15)
+    1.149999999999999911182158029987476766109466552734375
+
+    julia> x < 115//100
+    true
+
+    julia> round(x, digits=1)
+    1.2
+    ```
+
+# Extensions
+
+To extend `round` to new numeric types, it is typically sufficient to define `Base.round(x::NewType, r::RoundingMode)`.
+"""
+function round end
+
+"""
+    trunc([T,] x)
+    trunc(x; digits::Integer= [, base = 10])
+    trunc(x; sigdigits::Integer= [, base = 10])
+
+`trunc(x)` returns the nearest integral value of the same type as `x` whose absolute value
+is less than or equal to the absolute value of `x`.
+
+`trunc(T, x)` converts the result to type `T`, throwing an `InexactError` if the truncated
+value is not representable a `T`.
+
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+To support `trunc` for a new type, define `Base.round(x::NewType, ::RoundingMode{:ToZero})`.
+
+See also: [`%`](@ref rem), [`floor`](@ref), [`unsigned`](@ref), [`unsafe_trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> trunc(2.22)
+2.0
+
+julia> trunc(-2.22, digits=1)
+-2.2
+
+julia> trunc(Int, -2.22)
+-2
+```
+"""
+function trunc end
+
+"""
+    floor([T,] x)
+    floor(x; digits::Integer= [, base = 10])
+    floor(x; sigdigits::Integer= [, base = 10])
+
+`floor(x)` returns the nearest integral value of the same type as `x` that is less than or
+equal to `x`.
+
+`floor(T, x)` converts the result to type `T`, throwing an `InexactError` if the floored
+value is not representable a `T`.
+
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+To support `floor` for a new type, define `Base.round(x::NewType, ::RoundingMode{:Down})`.
+"""
+function floor end
+
+"""
+    ceil([T,] x)
+    ceil(x; digits::Integer= [, base = 10])
+    ceil(x; sigdigits::Integer= [, base = 10])
+
+`ceil(x)` returns the nearest integral value of the same type as `x` that is greater than or
+equal to `x`.
+
+`ceil(T, x)` converts the result to type `T`, throwing an `InexactError` if the ceiled
+value is not representable as a `T`.
+
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+To support `ceil` for a new type, define `Base.round(x::NewType, ::RoundingMode{:Up})`.
+"""
+function ceil end
+
+trunc(x; kws...) = round(x, RoundToZero; kws...)
+floor(x; kws...) = round(x, RoundDown; kws...)
+ ceil(x; kws...) = round(x, RoundUp; kws...)
+round(x; kws...) = round(x, RoundNearest; kws...)
+
+trunc(::Type{T}, x) where T = round(T, x, RoundToZero)
+floor(::Type{T}, x) where T = round(T, x, RoundDown)
+ ceil(::Type{T}, x) where T = round(T, x, RoundUp)
+round(::Type{T}, x) where T = round(T, x, RoundNearest)
+
+round(::Type{T}, x, r::RoundingMode) where T = _round_convert(T, round(x, r), x, r)
+_round_convert(::Type{T}, x_integer, x, r) where T = convert(T, x_integer)
+
+round(x::Integer, r::RoundingMode) = x
diff --git a/base/runtime_internals.jl b/base/runtime_internals.jl
new file mode 100644
index 0000000000000..ab6d086cdbd72
--- /dev/null
+++ b/base/runtime_internals.jl
@@ -0,0 +1,1889 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# name and module reflection
+
+"""
+    parentmodule(m::Module)::Module
+
+Get a module's enclosing `Module`. `Main` is its own parent.
+
+See also: [`names`](@ref), [`nameof`](@ref), [`fullname`](@ref), [`@__MODULE__`](@ref).
+
+# Examples
+```jldoctest
+julia> parentmodule(Main)
+Main
+
+julia> parentmodule(Base.Broadcast)
+Base
+```
+"""
+parentmodule(m::Module) = (@_total_meta; ccall(:jl_module_parent, Ref{Module}, (Any,), m))
+
+is_root_module(m::Module) = parentmodule(m) === m || m === Compiler || (isdefined(Main, :Base) && m === Main.Base)
+
+"""
+    moduleroot(m::Module)::Module
+
+Find the root module of a given module. This is the first module in the chain of
+parent modules of `m` which is either a registered root module or which is its
+own parent module.
+"""
+function moduleroot(m::Module)
+    @_total_meta
+    while true
+        is_root_module(m) && return m
+        p = parentmodule(m)
+        p === m && return m
+        m = p
+    end
+end
+
+"""
+    @__MODULE__ -> Module
+
+Get the `Module` of the toplevel eval,
+which is the `Module` code is currently being read from.
+"""
+macro __MODULE__()
+    return __module__
+end
+
+"""
+    fullname(m::Module)
+
+Get the fully-qualified name of a module as a tuple of symbols. For example,
+
+# Examples
+```jldoctest
+julia> fullname(Base.Iterators)
+(:Base, :Iterators)
+
+julia> fullname(Main)
+(:Main,)
+```
+"""
+function fullname(m::Module)
+    @_total_meta
+    mn = nameof(m)
+    if m === Main || m === Base || m === Core
+        return (mn,)
+    end
+    mp = parentmodule(m)
+    if mp === m
+        return (mn,)
+    end
+    return (fullname(mp)..., mn)
+end
+
+"""
+    moduleloc(m::Module)::LineNumberNode
+
+Get the location of the `module` definition.
+"""
+function moduleloc(m::Module)
+    line = Ref{Int32}(0)
+    file = ccall(:jl_module_getloc, Ref{Symbol}, (Any, Ref{Int32}), m, line)
+    return LineNumberNode(Int(line[]), file)
+end
+
+"""
+    names(x::Module; all::Bool=false, imported::Bool=false, usings::Bool=false)::Vector{Symbol}
+
+Get a vector of the public names of a `Module`, excluding deprecated names.
+If `all` is true, then the list also includes non-public names defined in the module,
+deprecated names, and compiler-generated names.
+If `imported` is true, then names explicitly imported from other modules
+are also included.
+If `usings` is true, then names explicitly or implicitly imported via `using` are also included.
+Names are returned in sorted order.
+
+As a special case, all names defined in `Main` are considered \"public\",
+since it is not idiomatic to explicitly mark names from `Main` as public.
+
+!!! note
+    `sym ∈ names(SomeModule)` does *not* imply `isdefined(SomeModule, sym)`.
+    `names` may return symbols marked with `public` or `export`, even if
+    they are not defined in the module.
+
+!!! warning
+    `names` may return duplicate names. The duplication happens, e.g. if an `import`ed name
+    conflicts with an already existing identifier.
+
+!!! compat "Julia 1.12"
+    The `usings` argument requires Julia 1.12 or later.
+
+See also: [`Base.isexported`](@ref), [`Base.ispublic`](@ref), [`Base.@locals`](@ref), [`@__MODULE__`](@ref).
+"""
+names(m::Module; kwargs...) = sort!(unsorted_names(m; kwargs...))
+unsorted_names(m::Module; all::Bool=false, imported::Bool=false, usings::Bool=false) =
+    ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint, Cint), m, all, imported, usings)
+
+"""
+    isexported(m::Module, s::Symbol)::Bool
+
+Return whether a symbol is exported from a module.
+
+See also: [`ispublic`](@ref), [`names`](@ref)
+
+```jldoctest
+julia> module Mod
+           export foo
+           public bar
+       end
+Mod
+
+julia> Base.isexported(Mod, :foo)
+true
+
+julia> Base.isexported(Mod, :bar)
+false
+
+julia> Base.isexported(Mod, :baz)
+false
+```
+"""
+isexported(m::Module, s::Symbol) = ccall(:jl_module_exports_p, Cint, (Any, Any), m, s) != 0
+
+"""
+    ispublic(m::Module, s::Symbol)::Bool
+
+Return whether a symbol is marked as public in a module.
+
+Exported symbols are considered public.
+
+!!! compat "Julia 1.11"
+    This function and the notion of publicity were added in Julia 1.11.
+
+See also: [`isexported`](@ref), [`names`](@ref)
+
+```jldoctest
+julia> module Mod
+           export foo
+           public bar
+       end
+Mod
+
+julia> Base.ispublic(Mod, :foo)
+true
+
+julia> Base.ispublic(Mod, :bar)
+true
+
+julia> Base.ispublic(Mod, :baz)
+false
+```
+"""
+ispublic(m::Module, s::Symbol) = ccall(:jl_module_public_p, Cint, (Any, Any), m, s) != 0
+
+"""
+    @__FUNCTION__
+
+Get the innermost enclosing function object.
+
+!!! note
+    `@__FUNCTION__` has the same scoping behavior as `return`: when used
+    inside a closure, it refers to the closure and not the outer function.
+    Some macros, including [`@spawn`](@ref Threads.@spawn), [`@async`](@ref), etc.,
+    wrap their input in closures. When `@__FUNCTION__` is used within such code,
+    it will refer to the closure created by the macro rather than the enclosing function.
+
+# Examples
+
+`@__FUNCTION__` enables recursive anonymous functions:
+
+```jldoctest
+julia> factorial = (n -> n <= 1 ? 1 : n * (@__FUNCTION__)(n - 1));
+
+julia> factorial(5)
+120
+```
+
+`@__FUNCTION__` can be combined with `nameof` to identify a function's
+name from within its body:
+
+```jldoctest
+julia> bar() = nameof(@__FUNCTION__);
+
+julia> bar()
+:bar
+```
+"""
+macro __FUNCTION__()
+    Expr(:thisfunction)
+end
+
+# TODO: this is vaguely broken because it only works for explicit calls to
+# `Base.deprecate`, not the @deprecated macro:
+isdeprecated(m::Module, s::Symbol) = ccall(:jl_is_binding_deprecated, Cint, (Any, Any), m, s) != 0
+
+function binding_module(m::Module, s::Symbol)
+    p = ccall(:jl_get_module_of_binding, Ptr{Cvoid}, (Any, Any), m, s)
+    p == C_NULL && return m
+    return unsafe_pointer_to_objref(p)::Module
+end
+
+const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
+
+function _fieldnames(@nospecialize t)
+    if t.name === _NAMEDTUPLE_NAME
+        if t.parameters[1] isa Tuple
+            return t.parameters[1]
+        else
+            throw(ArgumentError("type does not have definite field names"))
+        end
+    end
+    return t.name.names
+end
+
+# N.B.: Needs to be synced with julia.h
+const PARTITION_KIND_CONST              = 0x0
+const PARTITION_KIND_CONST_IMPORT       = 0x1
+const PARTITION_KIND_GLOBAL             = 0x2
+const PARTITION_KIND_IMPLICIT_GLOBAL    = 0x3
+const PARTITION_KIND_IMPLICIT_CONST     = 0x4
+const PARTITION_KIND_EXPLICIT           = 0x5
+const PARTITION_KIND_IMPORTED           = 0x6
+const PARTITION_KIND_FAILED             = 0x7
+const PARTITION_KIND_DECLARED           = 0x8
+const PARTITION_KIND_GUARD              = 0x9
+const PARTITION_KIND_UNDEF_CONST        = 0xa
+const PARTITION_KIND_BACKDATED_CONST    = 0xb
+
+const PARTITION_FLAG_EXPORTED     = 0x10
+const PARTITION_FLAG_DEPRECATED   = 0x20
+const PARTITION_FLAG_DEPWARN      = 0x40
+const PARTITION_FLAG_IMPLICITLY_EXPORTED = 0x80
+
+const PARTITION_MASK_KIND         = 0x0f
+const PARTITION_MASK_FLAG         = 0xf0
+
+const BINDING_FLAG_ANY_IMPLICIT_EDGES = 0x8
+
+const JL_MODULE_USING_REEXPORT = 0x1
+
+is_defined_const_binding(kind::UInt8) = (kind == PARTITION_KIND_CONST || kind == PARTITION_KIND_CONST_IMPORT || kind == PARTITION_KIND_IMPLICIT_CONST || kind == PARTITION_KIND_BACKDATED_CONST)
+is_some_const_binding(kind::UInt8) = (is_defined_const_binding(kind) || kind == PARTITION_KIND_UNDEF_CONST)
+is_some_imported(kind::UInt8) = (kind == PARTITION_KIND_IMPLICIT_GLOBAL || kind == PARTITION_KIND_IMPLICIT_CONST || kind == PARTITION_KIND_EXPLICIT || kind == PARTITION_KIND_IMPORTED)
+is_some_implicit(kind::UInt8) = (kind == PARTITION_KIND_IMPLICIT_GLOBAL || kind == PARTITION_KIND_IMPLICIT_CONST || kind == PARTITION_KIND_GUARD || kind == PARTITION_KIND_FAILED)
+is_some_explicit_imported(kind::UInt8) = (kind == PARTITION_KIND_EXPLICIT || kind == PARTITION_KIND_IMPORTED)
+is_some_binding_imported(kind::UInt8) = is_some_explicit_imported(kind) || kind == PARTITION_KIND_IMPLICIT_GLOBAL
+is_some_guard(kind::UInt8) = (kind == PARTITION_KIND_GUARD || kind == PARTITION_KIND_FAILED || kind == PARTITION_KIND_UNDEF_CONST)
+
+function lookup_binding_partition(world::UInt, b::Core.Binding)
+    ccall(:jl_get_binding_partition, Ref{Core.BindingPartition}, (Any, UInt), b, world)
+end
+
+function lookup_binding_partition(world::UInt, b::Core.Binding, previous_partition::Core.BindingPartition)
+    ccall(:jl_get_binding_partition_with_hint, Ref{Core.BindingPartition}, (Any, Any, UInt), b, previous_partition, world)
+end
+
+function convert(::Type{Core.Binding}, gr::Core.GlobalRef)
+    if isdefined(gr, :binding)
+        return gr.binding
+    else
+        return ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), gr.mod, gr.name, true)
+    end
+end
+
+function lookup_binding_partition(world::UInt, gr::Core.GlobalRef)
+    b = convert(Core.Binding, gr)
+    return lookup_binding_partition(world, b)
+end
+
+partition_restriction(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_restriction_value, Any, (Any,), bpart)
+
+binding_kind(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_kind, UInt8, (Any,), bpart)
+binding_kind(m::Module, s::Symbol) = binding_kind(lookup_binding_partition(tls_world_age(), GlobalRef(m, s)))
+
+"""
+    delete_binding(mod::Module, sym::Symbol)
+
+Force the binding `mod.sym` to be undefined again, allowing it be redefined.
+Note that this operation is very expensive, requiring a full scan of all code in the system,
+as well as potential recompilation of any methods that (may) have used binding
+information.
+
+!!! warning
+    The implementation of this functionality is currently incomplete. Do not use
+    this method on versions that contain this disclaimer except for testing.
+"""
+function delete_binding(mod::Module, sym::Symbol)
+    ccall(:jl_disable_binding, Cvoid, (Any,), GlobalRef(mod, sym))
+end
+
+"""
+    fieldname(x::DataType, i::Integer)
+
+Get the name of field `i` of a `DataType`.
+
+The return type is `Symbol`, except when `x <: Tuple`, in which case the index of the field is returned, of type `Int`.
+
+# Examples
+```jldoctest
+julia> fieldname(Rational, 1)
+:num
+
+julia> fieldname(Rational, 2)
+:den
+
+julia> fieldname(Tuple{String,Int}, 2)
+2
+```
+"""
+function fieldname(t::DataType, i::Integer)
+    throw_not_def_field() = throw(ArgumentError("type does not have definite field names"))
+    function throw_field_access(t, i, n_fields)
+        field_label = n_fields == 1 ? "field" : "fields"
+        throw(ArgumentError("Cannot access field $i since type $t only has $n_fields $field_label."))
+    end
+    throw_need_pos_int(i) = throw(ArgumentError("Field numbers must be positive integers. $i is invalid."))
+
+    isabstracttype(t) && throw_not_def_field()
+    names = _fieldnames(t)
+    n_fields = length(names)::Int
+    i > n_fields && throw_field_access(t, i, n_fields)
+    i < 1 && throw_need_pos_int(i)
+    return @inbounds names[i]::Symbol
+end
+
+fieldname(t::UnionAll, i::Integer) = fieldname(unwrap_unionall(t), i)
+fieldname(t::Type{<:Tuple}, i::Integer) =
+    i < 1 || i > fieldcount(t) ? throw(BoundsError(t, i)) : Int(i)
+
+"""
+    fieldnames(x::DataType)
+
+Get a tuple with the names of the fields of a `DataType`.
+
+Each name is a `Symbol`, except when `x <: Tuple`, in which case each name (actually the
+index of the field) is an `Int`.
+
+See also [`propertynames`](@ref), [`hasfield`](@ref).
+
+# Examples
+```jldoctest
+julia> fieldnames(Rational)
+(:num, :den)
+
+julia> fieldnames(typeof(1+im))
+(:re, :im)
+
+julia> fieldnames(Tuple{String,Int})
+(1, 2)
+```
+"""
+fieldnames(t::DataType) = (fieldcount(t); # error check to make sure type is specific enough
+                           (_fieldnames(t)...,))::Tuple{Vararg{Symbol}}
+fieldnames(t::UnionAll) = fieldnames(unwrap_unionall(t))
+fieldnames(::Core.TypeofBottom) =
+    throw(ArgumentError("The empty type does not have field names since it does not have instances."))
+fieldnames(t::Type{<:Tuple}) = ntuple(identity, fieldcount(t))
+
+"""
+    hasfield(T::Type, name::Symbol)
+
+Return a boolean indicating whether `T` has `name` as one of its own fields.
+
+See also [`fieldnames`](@ref), [`fieldcount`](@ref), [`hasproperty`](@ref).
+
+!!! compat "Julia 1.2"
+     This function requires at least Julia 1.2.
+
+# Examples
+```jldoctest
+julia> struct Foo
+            bar::Int
+       end
+
+julia> hasfield(Foo, :bar)
+true
+
+julia> hasfield(Foo, :x)
+false
+```
+"""
+hasfield(T::Type, name::Symbol) = fieldindex(T, name, false) > 0
+
+"""
+    nameof(t::DataType)::Symbol
+
+Get the name of a (potentially `UnionAll`-wrapped) `DataType` (without its parent module)
+as a symbol.
+
+# Examples
+```jldoctest
+julia> module Foo
+           struct S{T}
+           end
+       end
+Foo
+
+julia> nameof(Foo.S{T} where T)
+:S
+```
+"""
+nameof(t::DataType) = t.name.name
+nameof(t::UnionAll) = nameof(unwrap_unionall(t))::Symbol
+
+"""
+    parentmodule(t::DataType)::Module
+
+Determine the module containing the definition of a (potentially `UnionAll`-wrapped) `DataType`.
+
+# Examples
+```jldoctest
+julia> module Foo
+           struct Int end
+       end
+Foo
+
+julia> parentmodule(Int)
+Core
+
+julia> parentmodule(Foo.Int)
+Foo
+```
+"""
+parentmodule(t::DataType) = t.name.module
+parentmodule(t::UnionAll) = parentmodule(unwrap_unionall(t))
+
+"""
+    isconst(m::Module, s::Symbol)::Bool
+    isconst(g::GlobalRef)::Bool
+
+Determine whether a global is `const` in a given module `m`, either
+because it was declared constant or because it was imported from a
+constant binding. Note that constant-ness is specific to a particular
+world age, so the result of this function may not be assumed to hold
+after a world age update.
+"""
+isconst(m::Module, s::Symbol) =
+    ccall(:jl_is_const, Cint, (Any, Any), m, s) != 0
+
+function isconst(g::GlobalRef)
+    return ccall(:jl_globalref_is_const, Cint, (Any,), g) != 0
+end
+
+"""
+    isconst(t::DataType, s::Union{Int,Symbol})::Bool
+
+Determine whether a field `s` is const in a given type `t`
+in the sense that a read from said field is consistent
+for egal objects. Note in particular that out-of-bounds
+fields are considered const under this definition (because
+they always throw).
+"""
+function isconst(@nospecialize(t::Type), s::Symbol)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    isa(t, DataType) || return false
+    return isconst(t, fieldindex(t, s, false))
+end
+function isconst(@nospecialize(t::Type), s::Int)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false # uncertain
+    ismutabletype(t) || return true # immutable structs are always const
+    1 <= s <= length(t.name.names) || return true # OOB reads are "const" since they always throw
+    constfields = t.name.constfields
+    constfields === C_NULL && return false
+    s -= 1
+    return unsafe_load(Ptr{UInt32}(constfields), 1 + s÷32) & (1 << (s%32)) != 0
+end
+
+"""
+    isfieldatomic(t::DataType, s::Union{Int,Symbol})::Bool
+
+Determine whether a field `s` is declared `@atomic` in a given type `t`.
+"""
+function isfieldatomic(@nospecialize(t::Type), s::Symbol)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    isa(t, DataType) || return false
+    return isfieldatomic(t, fieldindex(t, s, false))
+end
+function isfieldatomic(@nospecialize(t::Type), s::Int)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false # uncertain
+    ismutabletype(t) || return false # immutable structs are never atomic
+    1 <= s <= length(t.name.names) || return false # OOB reads are not atomic (they always throw)
+    atomicfields = t.name.atomicfields
+    atomicfields === C_NULL && return false
+    s -= 1
+    return unsafe_load(Ptr{UInt32}(atomicfields), 1 + s÷32) & (1 << (s%32)) != 0
+end
+
+"""
+    @locals()
+
+Construct a dictionary of the names (as symbols) and values of all local
+variables defined as of the call site.
+
+!!! compat "Julia 1.1"
+    This macro requires at least Julia 1.1.
+
+# Examples
+```jldoctest
+julia> let x = 1, y = 2
+           Base.@locals
+       end
+Dict{Symbol, Any} with 2 entries:
+  :y => 2
+  :x => 1
+
+julia> function f(x)
+           local y
+           show(Base.@locals); println()
+           for i = 1:1
+               show(Base.@locals); println()
+           end
+           y = 2
+           show(Base.@locals); println()
+           nothing
+       end;
+
+julia> f(42)
+Dict{Symbol, Any}(:x => 42)
+Dict{Symbol, Any}(:i => 1, :x => 42)
+Dict{Symbol, Any}(:y => 2, :x => 42)
+```
+"""
+macro locals()
+    return Expr(:locals)
+end
+
+# concrete datatype predicates
+
+datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Core.SimpleVector, (Any,), x)
+
+struct DataTypeLayout
+    size::UInt32
+    nfields::UInt32
+    npointers::UInt32
+    firstptr::Int32
+    alignment::UInt16
+    flags::UInt16
+    # haspadding : 1;
+    # fielddesc_type : 2;
+    # arrayelem_isboxed : 1;
+    # arrayelem_isunion : 1;
+    # arrayelem_isatomic : 1;
+    # arrayelem_islocked : 1;
+    # isbitsegal : 1;
+    # padding : 8;
+end
+
+"""
+    Base.datatype_alignment(dt::DataType)::Int
+
+Memory allocation minimum alignment for instances of this type.
+Can be called on any `isconcretetype`, although for Memory it will give the
+alignment of the elements, not the whole object.
+"""
+function datatype_alignment(dt::DataType)
+    @_foldable_meta
+    layout = dt.layout::Ptr{Cvoid}
+    layout == C_NULL && throw(UndefRefError())
+    alignment = unsafe_load(convert(Ptr{DataTypeLayout}, layout)).alignment
+    return Int(alignment)
+end
+
+function uniontype_layout(@nospecialize T::Type)
+    sz = RefValue{Csize_t}(0)
+    algn = RefValue{Csize_t}(0)
+    isinline = ccall(:jl_islayout_inline, Cint, (Any, Ptr{Csize_t}, Ptr{Csize_t}), T, sz, algn) != 0
+    (isinline, Int(sz[]), Int(algn[]))
+end
+
+LLT_ALIGN(x, sz) = (x + sz - 1) & -sz
+
+# amount of total space taken by T when stored in a container
+function aligned_sizeof(@nospecialize T::Type)
+    @_foldable_meta
+    if isa(T, Union)
+        if allocatedinline(T)
+            # NOTE this check is equivalent to `isbitsunion(T)`, we can improve type
+            # inference in the second branch with the outer `isa(T, Union)` check
+            _, sz, al = uniontype_layout(T)
+            return LLT_ALIGN(sz, al)
+        end
+    elseif allocatedinline(T)
+        al = datatype_alignment(T)
+        return LLT_ALIGN(Core.sizeof(T), al)
+    end
+    return Core.sizeof(Ptr{Cvoid})
+end
+
+gc_alignment(sz::Integer) = Int(ccall(:jl_alignment, Cint, (Csize_t,), sz))
+gc_alignment(T::Type) = gc_alignment(Core.sizeof(T))
+
+"""
+    Base.datatype_haspadding(dt::DataType)::Bool
+
+Return whether the fields of instances of this type are packed in memory,
+with no intervening padding bits (defined as bits whose value does not impact
+the semantic value of the instance itself).
+Can be called on any `isconcretetype`.
+"""
+function datatype_haspadding(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return flags & 1 == 1
+end
+
+"""
+    Base.datatype_isbitsegal(dt::DataType)::Bool
+
+Return whether egality of the (non-padding bits of the) in-memory representation
+of an instance of this type implies semantic egality of the instance itself.
+This may not be the case if the type contains to other values whose egality is
+independent of their identity (e.g. immutable structs, some types, etc.).
+"""
+function datatype_isbitsegal(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return (flags & (1<<7)) != 0
+end
+
+"""
+    Base.datatype_nfields(dt::DataType)::UInt32
+
+Return the number of fields known to this datatype's layout. This may be
+different from the number of actual fields of the type for opaque types.
+Can be called on any `isconcretetype`.
+"""
+function datatype_nfields(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).nfields
+end
+
+"""
+    Base.datatype_npointers(dt::DataType)::Int
+
+Return the number of pointers in the layout of a datatype.
+"""
+function datatype_npointers(dt::DataType)
+    @_foldable_meta
+    layout = dt.layout::Ptr{Cvoid}
+    layout == C_NULL && throw(UndefRefError())
+    return unsafe_load(convert(Ptr{DataTypeLayout}, layout)).npointers
+end
+
+"""
+    Base.datatype_pointerfree(dt::DataType)::Bool
+
+Return whether instances of this type can contain references to gc-managed memory.
+Can be called on any `isconcretetype`.
+"""
+function datatype_pointerfree(dt::DataType)
+    @_foldable_meta
+    return datatype_npointers(dt) == 0
+end
+
+"""
+    Base.datatype_fielddesc_type(dt::DataType)::Int
+
+Return the size in bytes of each field-description entry in the layout array,
+located at `(dt.layout + sizeof(DataTypeLayout))`.
+Can be called on any `isconcretetype`.
+
+See also [`fieldoffset`](@ref).
+"""
+function datatype_fielddesc_type(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return (flags >> 1) & 3
+end
+
+"""
+    Base.datatype_arrayelem(dt::DataType)::Int
+
+Return the behavior of the trailing array types allocations.
+Can be called on any `isconcretetype`, but only meaningful on `Memory`.
+
+0 = inlinealloc
+1 = isboxed
+2 = isbitsunion
+"""
+function datatype_arrayelem(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return (flags >> 3) & 3
+end
+
+function datatype_layoutsize(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    size = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).size
+    return size % Int
+end
+
+
+# For type stability, we only expose a single struct that describes everything
+struct FieldDesc
+    isforeign::Bool
+    isptr::Bool
+    size::UInt32
+    offset::UInt32
+end
+
+struct FieldDescStorage{T}
+    ptrsize::T
+    offset::T
+end
+FieldDesc(fd::FieldDescStorage{T}) where {T} =
+    FieldDesc(false, fd.ptrsize & 1 != 0,
+              fd.ptrsize >> 1, fd.offset)
+
+struct DataTypeFieldDesc
+    dt::DataType
+    function DataTypeFieldDesc(dt::DataType)
+        dt.layout == C_NULL && throw(UndefRefError())
+        new(dt)
+    end
+end
+
+function getindex(dtfd::DataTypeFieldDesc, i::Int)
+    layout_ptr = convert(Ptr{DataTypeLayout}, dtfd.dt.layout)
+    fd_ptr = layout_ptr + Core.sizeof(DataTypeLayout)
+    layout = unsafe_load(layout_ptr)
+    fielddesc_type = (layout.flags >> 1) & 3
+    nfields = layout.nfields
+    @boundscheck ((1 <= i <= nfields) || throw(BoundsError(dtfd, i)))
+    if fielddesc_type == 0
+        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt8}}(fd_ptr), i))
+    elseif fielddesc_type == 1
+        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt16}}(fd_ptr), i))
+    elseif fielddesc_type == 2
+        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt32}}(fd_ptr), i))
+    else
+        # fielddesc_type == 3
+        return FieldDesc(true, true, 0, 0)
+    end
+end
+
+"""
+    ismutable(v)::Bool
+
+Return `true` if and only if value `v` is mutable.  See [Mutable Composite Types](@ref)
+for a discussion of immutability. Note that this function works on values, so if you
+give it a `DataType`, it will tell you that a value of the type is mutable.
+
+!!! note
+    For technical reasons, `ismutable` returns `true` for values of certain special types
+    (for example `String` and `Symbol`) even though they cannot be mutated in a permissible way.
+
+See also [`isbits`](@ref), [`isstructtype`](@ref).
+
+# Examples
+```jldoctest
+julia> ismutable(1)
+false
+
+julia> ismutable([1,2])
+true
+```
+
+!!! compat "Julia 1.5"
+    This function requires at least Julia 1.5.
+"""
+ismutable(@nospecialize(x)) = (@_total_meta; (typeof(x).name::Core.TypeName).flags & 0x2 == 0x2)
+# The type assertion above is required to fix some invalidations.
+# See also https://github.com/JuliaLang/julia/issues/52134
+
+"""
+    ismutabletype(T)::Bool
+
+Determine whether type `T` was declared as a mutable type
+(i.e. using `mutable struct` keyword).
+If `T` is not a type, then return `false`.
+
+!!! compat "Julia 1.7"
+    This function requires at least Julia 1.7.
+"""
+function ismutabletype(@nospecialize t)
+    @_total_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    return isa(t, DataType) && ismutabletypename(t.name)
+end
+
+ismutabletypename(tn::Core.TypeName) = tn.flags & 0x2 == 0x2
+
+"""
+    isstructtype(T)::Bool
+
+Determine whether type `T` was declared as a struct type
+(i.e. using the `struct` or `mutable struct` keyword).
+If `T` is not a type, then return `false`.
+"""
+function isstructtype(@nospecialize t)
+    @_total_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false
+    return !isprimitivetype(t) && !isabstracttype(t)
+end
+
+"""
+    isprimitivetype(T)::Bool
+
+Determine whether type `T` was declared as a primitive type
+(i.e. using the `primitive type` syntax).
+If `T` is not a type, then return `false`.
+"""
+function isprimitivetype(@nospecialize t)
+    @_total_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false
+    return (t.flags & 0x0080) == 0x0080
+end
+
+"""
+    isbitstype(T)
+
+Return `true` if type `T` is a "plain data" type,
+meaning it is immutable and contains no references to other values,
+only `primitive` types and other `isbitstype` types.
+Typical examples are numeric types such as [`UInt8`](@ref),
+[`Float64`](@ref), and [`Complex{Float64}`](@ref).
+This category of types is significant since they are valid as type parameters,
+may not track [`isdefined`](@ref) / [`isassigned`](@ref) status,
+and have a defined layout that is compatible with C.
+If `T` is not a type, then return `false`.
+
+See also [`isbits`](@ref), [`isprimitivetype`](@ref), [`ismutable`](@ref).
+
+# Examples
+```jldoctest
+julia> isbitstype(Complex{Float64})
+true
+
+julia> isbitstype(Complex)
+false
+```
+"""
+isbitstype(@nospecialize t) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0008) == 0x0008)
+
+"""
+    isbits(x)
+
+Return `true` if `x` is an instance of an [`isbitstype`](@ref) type.
+"""
+isbits(@nospecialize x) = isbitstype(typeof(x))
+
+"""
+    objectid(x)::UInt
+
+Get a hash value for `x` based on object identity. This value is not unique nor
+stable between Julia processes or versions.
+
+If `x === y` then `objectid(x) == objectid(y)`, and usually when `x !== y`, `objectid(x) != objectid(y)`.
+
+See also [`hash`](@ref), [`IdDict`](@ref).
+"""
+function objectid(@nospecialize(x))
+    @_total_meta
+    return ccall(:jl_object_id, UInt, (Any,), x)
+end
+
+"""
+    isdispatchtuple(T)
+
+Determine whether type `T` is a [`Tuple`](@ref) that could appear as a type
+signature in dispatch.  For this to be true, every element of the tuple type
+must be either:
+- [concrete](@ref isconcretetype) but not a [kind type](@ref Base.iskindtype)
+- a [`Type{U}`](@ref Type) with no free type variables in `U`
+
+!!! note
+    A dispatch tuple is relevant for method dispatch because it has no inhabited
+    subtypes.
+
+    For example, `Tuple{Int, DataType}` is concrete, but is not a dispatch tuple
+    because `Tuple{Int, Type{Bool}}` is an inhabited subtype.
+
+    `Tuple{Tuple{DataType}}` *is* a dispatch tuple because `Tuple{DataType}` is
+    concrete and not a kind; the subtype `Tuple{Tuple{Type{Int}}}` is not
+    inhabited.
+
+If `T` is not a type, then return `false`.
+
+# Examples
+```jldoctest
+julia> isdispatchtuple(Int)
+false
+
+julia> isdispatchtuple(Tuple{Int})
+true
+
+julia> isdispatchtuple(Tuple{Number})
+false
+
+julia> isdispatchtuple(Tuple{DataType})
+false
+
+julia> isdispatchtuple(Tuple{Type{Int}})
+true
+
+julia> isdispatchtuple(Tuple{Type})
+false
+```
+"""
+isdispatchtuple(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0004) == 0x0004)
+
+datatype_ismutationfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0100) == 0x0100)
+
+"""
+    Base.ismutationfree(T)
+
+Determine whether type `T` is mutation free in the sense that no mutable memory
+is reachable from this type (either in the type itself) or through any fields.
+Note that the type itself need not be immutable. For example, an empty mutable
+type is `ismutabletype`, but also `ismutationfree`.
+If `T` is not a type, then return `false`.
+"""
+function ismutationfree(@nospecialize(t))
+    t = unwrap_unionall(t)
+    if isa(t, DataType)
+        return datatype_ismutationfree(t)
+    elseif isa(t, Union)
+        return ismutationfree(t.a) && ismutationfree(t.b)
+    end
+    # TypeVar, etc.
+    return false
+end
+
+datatype_isidentityfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0200) == 0x0200)
+
+"""
+    Base.isidentityfree(T)
+
+Determine whether type `T` is identity free in the sense that this type or any
+reachable through its fields has non-content-based identity.
+If `T` is not a type, then return `false`.
+"""
+function isidentityfree(@nospecialize(t))
+    t = unwrap_unionall(t)
+    if isa(t, DataType)
+        return datatype_isidentityfree(t)
+    elseif isa(t, Union)
+        return isidentityfree(t.a) && isidentityfree(t.b)
+    end
+    # TypeVar, etc.
+    return false
+end
+
+"""
+    Base.iskindtype(T)
+
+Determine whether `T` is a kind, that is, the type of a Julia type:
+a [`DataType`](@ref), [`Union`](@ref), [`UnionAll`](@ref),
+or [`Core.TypeofBottom`](@ref).
+
+All kinds are [concrete](@ref isconcretetype) because types are Julia values.
+"""
+iskindtype(@nospecialize t) = (t === DataType || t === UnionAll || t === Union || t === typeof(Bottom))
+
+"""
+    Base.isconcretedispatch(T)
+
+Return true if `T` is a [concrete type](@ref isconcretetype) that could appear
+as an element of a [dispatch tuple](@ref isdispatchtuple).
+
+See also: [`isdispatchtuple`](@ref).
+
+# Examples
+```jldoctest
+julia> Base.isconcretedispatch(Int)
+true
+
+julia> Base.isconcretedispatch(Number)
+false
+
+julia> Base.isconcretedispatch(DataType)
+false
+
+julia> Base.isconcretedispatch(Type{Int})
+false
+```
+"""
+isconcretedispatch(@nospecialize t) = isconcretetype(t) && !iskindtype(t)
+
+using Core: has_free_typevars
+
+# equivalent to isa(v, Type) && isdispatchtuple(Tuple{v}) || v === Union{}
+# and is thus perhaps most similar to the old (pre-1.0) `isconcretetype` query
+function isdispatchelem(@nospecialize v)
+    return (v === Bottom) || (v === typeof(Bottom)) || isconcretedispatch(v) ||
+        (isType(v) && !has_free_typevars(v))
+end
+
+const _TYPE_NAME = Type.body.name
+isType(@nospecialize t) = isa(t, DataType) && t.name === _TYPE_NAME
+
+"""
+    isconcretetype(T)
+
+Determine whether type `T` is a concrete type, meaning it could have direct instances
+(values `x` such that `typeof(x) === T`).
+Note that this is not the negation of `isabstracttype(T)`.
+If `T` is not a type, then return `false`.
+
+!!! note
+    While concrete types are not [abstract](@ref isabstracttype) and
+    vice versa, types can be neither concrete nor abstract (for example,
+    `Vector` (a [`UnionAll`](@ref))).
+
+!!! note
+    `T` must be the exact type that would be returned from `typeof`.  It is
+    possible for a type `U` to exist such that `T == U`, `isconcretetype(T)`,
+    but `!isconcretetype(U)`.
+
+See also: [`isbits`](@ref), [`isabstracttype`](@ref), [`issingletontype`](@ref).
+
+# Examples
+```jldoctest
+julia> isconcretetype(Complex)
+false
+
+julia> isconcretetype(Complex{Float32})
+true
+
+julia> isconcretetype(Vector)
+false
+
+julia> isconcretetype(Vector{Complex})
+true
+
+julia> isconcretetype(Vector{Complex{Float32}})
+true
+
+julia> isconcretetype(Union{})
+false
+
+julia> isconcretetype(Union{Int,String})
+false
+
+julia> isconcretetype(Tuple{T} where T<:Int)
+false
+```
+"""
+isconcretetype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0002) == 0x0002)
+
+"""
+    isabstracttype(T)
+
+Determine whether type `T` was declared as an abstract type
+(i.e. using the `abstract type` syntax).
+If `T` is not a type, then return `false`.
+
+!!! note
+    While abstract types are not [concrete](@ref isconcretetype) and
+    vice versa, types can be neither concrete nor abstract (for example,
+    `Vector` (a [`UnionAll`](@ref))).
+
+See also: [`isconcretetype`](@ref).
+
+# Examples
+```jldoctest
+julia> isabstracttype(AbstractArray)
+true
+
+julia> isabstracttype(Vector)
+false
+```
+"""
+function isabstracttype(@nospecialize(t))
+    @_total_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    return isa(t, DataType) && (t.name.flags & 0x1) == 0x1
+end
+
+function is_datatype_layoutopaque(dt::DataType)
+    datatype_nfields(dt) == 0 && !datatype_pointerfree(dt)
+end
+
+function is_valid_intrinsic_elptr(@nospecialize(ety))
+    ety === Any && return true
+    isconcretetype(ety) || return false
+    ety <: Array && return false
+    return !is_datatype_layoutopaque(ety)
+end
+
+"""
+    Base.issingletontype(T)
+
+Determine whether type `T` has exactly one possible instance; for example, a
+struct type with no fields except other singleton values.
+If `T` is not a concrete type, then return `false`.
+"""
+issingletontype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && isdefined(t, :instance) && datatype_layoutsize(t) == 0 && datatype_pointerfree(t))
+
+"""
+    typeintersect(T::Type, S::Type)
+
+Compute a type that contains the intersection of `T` and `S`. Usually this will be the
+smallest such type or one close to it.
+
+A special case where exact behavior is guaranteed: when `T <: S`,
+`typeintersect(S, T) == T == typeintersect(T, S)`.
+"""
+typeintersect(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_intersection, Any, (Any, Any), a::Type, b::Type))
+
+morespecific(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_morespecific, Cint, (Any, Any), a::Type, b::Type) != 0)
+morespecific(a::Method, b::Method) = ccall(:jl_method_morespecific, Cint, (Any, Any), a, b) != 0
+
+"""
+    fieldoffset(type, name::Symbol | i::Integer)
+
+The byte offset of a field (specified by name or index) of a type relative to its start.
+
+# Examples
+```jldoctest
+julia> struct Foo
+           x::Int64
+           y::String
+       end
+
+julia> fieldoffset(Foo, 2)
+0x0000000000000008
+
+julia> fieldoffset(Foo, :x)
+0x0000000000000000
+```
+
+We can use it to summarize information about a struct:
+
+```jldoctest
+julia> structinfo(T) = [(fieldoffset(T,i), fieldname(T,i), fieldtype(T,i)) for i = 1:fieldcount(T)];
+
+julia> structinfo(Base.Filesystem.StatStruct)
+14-element Vector{Tuple{UInt64, Symbol, Type}}:
+ (0x0000000000000000, :desc, Union{RawFD, String})
+ (0x0000000000000008, :device, UInt64)
+ (0x0000000000000010, :inode, UInt64)
+ (0x0000000000000018, :mode, UInt64)
+ (0x0000000000000020, :nlink, Int64)
+ (0x0000000000000028, :uid, UInt64)
+ (0x0000000000000030, :gid, UInt64)
+ (0x0000000000000038, :rdev, UInt64)
+ (0x0000000000000040, :size, Int64)
+ (0x0000000000000048, :blksize, Int64)
+ (0x0000000000000050, :blocks, Int64)
+ (0x0000000000000058, :mtime, Float64)
+ (0x0000000000000060, :ctime, Float64)
+ (0x0000000000000068, :ioerrno, Int32)
+```
+
+!!! compat "Julia 1.13"
+    Specifying the field by name rather than index requires Julia 1.13 or later.
+"""
+fieldoffset(x::DataType, idx::Integer) = (@_foldable_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx))
+fieldoffset(x::DataType, name::Symbol) = fieldoffset(x, fieldindex(x, name))
+
+"""
+    fieldtype(T, name::Symbol | index::Int)
+
+Determine the declared type of a field (specified by name or index) in a composite DataType `T`.
+
+# Examples
+```jldoctest
+julia> struct Foo
+           x::Int64
+           y::String
+       end
+
+julia> fieldtype(Foo, :x)
+Int64
+
+julia> fieldtype(Foo, 2)
+String
+```
+"""
+fieldtype
+
+"""
+    fieldindex(T, name::Symbol, err:Bool=true)
+
+Get the index of a named field, throwing an error if the field does not exist (when err==true)
+or returning 0 (when err==false).
+
+# Examples
+```jldoctest
+julia> struct Foo
+           x::Int64
+           y::String
+       end
+
+julia> fieldindex(Foo, :y)
+2
+
+julia> fieldindex(Foo, :z)
+ERROR: FieldError: type Foo has no field `z`, available fields: `x`, `y`
+Stacktrace:
+[...]
+
+julia> fieldindex(Foo, :z, false)
+0
+```
+
+!!! compat "Julia 1.13"
+    This function is exported as of Julia 1.13.
+"""
+function fieldindex(T::DataType, name::Symbol, err::Bool=true)
+    return err ? _fieldindex_maythrow(T, name) : _fieldindex_nothrow(T, name)
+end
+
+function _fieldindex_maythrow(T::DataType, name::Symbol)
+    @_foldable_meta
+    @noinline
+    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, true)+1)
+end
+
+function _fieldindex_nothrow(T::DataType, name::Symbol)
+    @_total_meta
+    @noinline
+    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, false)+1)
+end
+
+function fieldindex(t::UnionAll, name::Symbol, err::Bool=true)
+    t = argument_datatype(t)
+    if t === nothing
+        err && throw(ArgumentError("type does not have definite fields"))
+        return 0
+    end
+    return fieldindex(t, name, err)
+end
+
+function argument_datatype(@nospecialize t)
+    @_total_meta
+    @noinline
+    return ccall(:jl_argument_datatype, Any, (Any,), t)::Union{Nothing,DataType}
+end
+
+function datatype_fieldcount(t::DataType)
+    if t.name === _NAMEDTUPLE_NAME
+        names, types = t.parameters[1], t.parameters[2]
+        if names isa Tuple
+            return length(names)
+        end
+        if types isa DataType && types <: Tuple
+            return datatype_fieldcount(types)
+        end
+        return nothing
+    elseif isabstracttype(t)
+        return nothing
+    end
+    if t.name === Tuple.name
+        isvatuple(t) && return nothing
+        return length(t.types)
+    end
+    # Equivalent to length(t.types), but `t.types` is lazy and we do not want
+    # to be forced to compute it.
+    return length(t.name.names)
+end
+
+"""
+    fieldcount(t::Type)
+
+Get the number of fields that an instance of the given type would have.
+An error is thrown if the type is too abstract to determine this.
+"""
+function fieldcount(@nospecialize t)
+    @_foldable_meta
+    if t isa UnionAll || t isa Union
+        t = argument_datatype(t)
+        if t === nothing
+            throw(ArgumentError("type does not have a definite number of fields"))
+        end
+    elseif t === Union{}
+        throw(ArgumentError("The empty type does not have a well-defined number of fields since it does not have instances."))
+    end
+    if !(t isa DataType)
+        throw(TypeError(:fieldcount, DataType, t))
+    end
+    fcount = datatype_fieldcount(t)
+    if fcount === nothing
+        throw(ArgumentError("type does not have a definite number of fields"))
+    end
+    return fcount
+end
+
+function fieldcount_noerror(@nospecialize t)
+    if t isa UnionAll || t isa Union
+        t = argument_datatype(t)
+        if t === nothing
+            return nothing
+        end
+    elseif t === Union{}
+        return 0
+    end
+    t isa DataType || return nothing
+    if t.name === _NAMEDTUPLE_NAME
+        names, types = t.parameters
+        if names isa Tuple
+            return length(names)
+        end
+        if types isa DataType && types <: Tuple
+            return fieldcount_noerror(types)
+        end
+        return nothing
+    elseif isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
+        return nothing
+    end
+    return isdefined(t, :types) ? length(t.types) : length(t.name.names)
+end
+
+
+"""
+    fieldtypes(T::Type)
+
+The declared types of all fields in a composite DataType `T` as a tuple.
+
+!!! compat "Julia 1.1"
+    This function requires at least Julia 1.1.
+
+# Examples
+```jldoctest
+julia> struct Foo
+           x::Int64
+           y::String
+       end
+
+julia> fieldtypes(Foo)
+(Int64, String)
+```
+"""
+fieldtypes(T::Type) = (@_foldable_meta; ntupleany(i -> fieldtype(T, i), fieldcount(T)))
+
+# return all instances, for types that can be enumerated
+
+"""
+    instances(T::Type)
+
+Return a collection of all instances of the given type, if applicable. Mostly used for
+enumerated types (see `@enum`).
+
+# Examples
+```jldoctest
+julia> @enum Color red blue green
+
+julia> instances(Color)
+(red, blue, green)
+```
+"""
+function instances end
+
+function to_tuple_type(@nospecialize(t))
+    if isa(t, Tuple) || isa(t, AbstractArray) || isa(t, SimpleVector)
+        t = Tuple{t...}
+    end
+    if isa(t, Type) && t <: Tuple
+        for p in (unwrap_unionall(t)::DataType).parameters
+            if isa(p, Core.TypeofVararg)
+                p = unwrapva(p)
+            end
+            if !(isa(p, Type) || isa(p, TypeVar))
+                error("argument tuple type must contain only types")
+            end
+        end
+    else
+        error("expected tuple type")
+    end
+    t
+end
+
+function signature_type(@nospecialize(f), @nospecialize(argtypes))
+    argtypes = to_tuple_type(argtypes)
+    ft = Core.Typeof(f)
+    u = unwrap_unionall(argtypes)::DataType
+    return rewrap_unionall(Tuple{ft, u.parameters...}, argtypes)
+end
+
+function get_methodtable(m::Method)
+    mt = ccall(:jl_method_get_table, Any, (Any,), m)
+    if mt === nothing
+        return nothing
+    end
+    return mt::Core.MethodTable
+end
+
+"""
+    has_bottom_parameter(t)::Bool
+
+Determine whether `t` is a Type for which one or more of its parameters is `Union{}`.
+"""
+function has_bottom_parameter(t::DataType)
+    for p in t.parameters
+        has_bottom_parameter(p) && return true
+    end
+    return false
+end
+has_bottom_parameter(t::typeof(Bottom)) = true
+has_bottom_parameter(t::UnionAll) = has_bottom_parameter(unwrap_unionall(t))
+has_bottom_parameter(t::Union) = has_bottom_parameter(t.a) & has_bottom_parameter(t.b)
+has_bottom_parameter(t::TypeVar) = has_bottom_parameter(t.ub)
+has_bottom_parameter(::Any) = false
+
+min_world(m::Core.CodeInstance) = m.min_world
+max_world(m::Core.CodeInstance) = m.max_world
+min_world(m::Core.CodeInfo) = m.min_world
+max_world(m::Core.CodeInfo) = m.max_world
+
+"""
+    get_world_counter()
+
+Return the current maximum world-age counter. This counter is monotonically
+increasing.
+
+!!! warning
+    This counter is global and may change at any time between invocations.
+    In general, most reflection functions operate on the current task's world
+    age, rather than the global maximum world age. See [`tls_world_age`](@ref)
+    as well as the [manual chapter of world age](@ref man-world-age).
+"""
+get_world_counter() = ccall(:jl_get_world_counter, UInt, ())
+
+"""
+    tls_world_age()
+
+Return the world the [current_task()](@ref) is executing within.
+"""
+tls_world_age() = ccall(:jl_get_tls_world_age, UInt, ())
+
+get_require_world() = unsafe_load(cglobal(:jl_require_world, UInt))
+
+"""
+    propertynames(x, private=false)
+
+Get a tuple or a vector of the properties (`x.property`) of an object `x`.
+This is typically the same as [`fieldnames(typeof(x))`](@ref), but types
+that overload [`getproperty`](@ref) should generally overload `propertynames`
+as well to get the properties of an instance of the type.
+
+`propertynames(x)` may return only "public" property names that are part
+of the documented interface of `x`.   If you want it to also return "private"
+property names intended for internal use, pass `true` for the optional second argument.
+REPL tab completion on `x.` shows only the `private=false` properties.
+
+See also: [`hasproperty`](@ref), [`hasfield`](@ref).
+"""
+propertynames(x) = fieldnames(typeof(x))
+propertynames(m::Module) = names(m)
+propertynames(x, private::Bool) = propertynames(x) # ignore private flag by default
+propertynames(x::Array) = () # hide the fields from tab completion to discourage calling `x.size` instead of `size(x)`, even though they are equivalent
+
+"""
+    hasproperty(x, s::Symbol)
+
+Return a boolean indicating whether the object `x` has `s` as one of its own properties.
+
+!!! compat "Julia 1.2"
+     This function requires at least Julia 1.2.
+
+See also: [`propertynames`](@ref), [`hasfield`](@ref).
+"""
+hasproperty(x, s::Symbol) = s in propertynames(x)
+
+"""
+    delete_method(m::Method)
+
+Make method `m` uncallable and force recompilation of any methods that use(d) it.
+"""
+function delete_method(m::Method)
+    ccall(:jl_method_table_disable, Cvoid, (Any,), m)
+end
+
+
+# type for reflecting and pretty-printing a subset of methods
+mutable struct MethodList <: AbstractArray{Method,1}
+    ms::Array{Method,1}
+    tn::Core.TypeName # contains module.singletonname globalref for altering some aspects of printing
+end
+
+size(m::MethodList) = size(m.ms)
+getindex(m::MethodList, i::Integer) = m.ms[i]
+
+function MethodList(mt::Core.MethodTable)
+    ms = Method[]
+    visit(mt) do m
+        push!(ms, m)
+    end
+    return MethodList(ms, Any.name)
+end
+
+function matches_to_methods(ms::Array{Any,1}, tn::Core.TypeName, mod)
+    # Lack of specialization => a comprehension triggers too many invalidations via _collect, so collect the methods manually
+    ms = Method[(ms[i]::Core.MethodMatch).method for i in 1:length(ms)]
+    # Remove methods not part of module
+    mod === nothing || filter!(ms) do m
+        return parentmodule(m) ∈ mod
+    end
+    return MethodList(ms, tn)
+end
+
+"""
+    methods(f, [types], [module])
+
+Return the method table for `f`.
+
+If `types` is specified, return an array of methods whose types match.
+If `module` is specified, return an array of methods defined in that module.
+A list of modules can also be specified as an array or set.
+
+!!! compat "Julia 1.4"
+    At least Julia 1.4 is required for specifying a module.
+
+See also: [`which`](@ref), [`@which`](@ref Main.InteractiveUtils.@which) and [`methodswith`](@ref Main.InteractiveUtils.methodswith).
+"""
+function methods(@nospecialize(f), @nospecialize(t),
+                 mod::Union{Tuple{Module},AbstractArray{Module},AbstractSet{Module},Nothing}=nothing)
+    world = get_world_counter()
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    ms = _methods(f, t, -1, world)::Vector{Any}
+    return matches_to_methods(ms, typeof(f).name, mod)
+end
+methods(@nospecialize(f), @nospecialize(t), mod::Module) = methods(f, t, (mod,))
+
+function methods_including_ambiguous(@nospecialize(f), @nospecialize(t))
+    tt = signature_type(f, t)
+    world = get_world_counter()
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    min = RefValue{UInt}(typemin(UInt))
+    max = RefValue{UInt}(typemax(UInt))
+    ms = _methods_by_ftype(tt, nothing, -1, world, true, min, max, Ptr{Int32}(C_NULL))::Vector{Any}
+    return matches_to_methods(ms, typeof(f).name, nothing)
+end
+
+function methods(@nospecialize(f),
+                 mod::Union{Module,AbstractArray{Module},AbstractSet{Module},Nothing}=nothing)
+    # return all matches
+    return methods(f, Tuple{Vararg{Any}}, mod)
+end
+
+# low-level method lookup functions used by the compiler
+
+unionlen(@nospecialize(x)) = x isa Union ? unionlen(x.a) + unionlen(x.b) : 1
+
+function _uniontypes(@nospecialize(x), ts::Array{Any,1})
+    if x isa Union
+        _uniontypes(x.a, ts)
+        _uniontypes(x.b, ts)
+    else
+        push!(ts, x)
+    end
+    return ts
+end
+uniontypes(@nospecialize(x)) = _uniontypes(x, Any[])
+
+function _methods(@nospecialize(f), @nospecialize(t), lim::Int, world::UInt)
+    tt = signature_type(f, t)
+    return _methods_by_ftype(tt, lim, world)
+end
+
+function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt)
+    return _methods_by_ftype(t, nothing, lim, world)
+end
+function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt)
+    return _methods_by_ftype(t, mt, lim, world, false, RefValue{UInt}(typemin(UInt)), RefValue{UInt}(typemax(UInt)), Ptr{Int32}(C_NULL))
+end
+function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt, ambig::Bool, min::Ref{UInt}, max::Ref{UInt}, has_ambig::Ref{Int32})
+    return ccall(:jl_matching_methods, Any, (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, mt, lim, ambig, world, min, max, has_ambig)::Union{Vector{Any},Nothing}
+end
+
+hasgenerator(m::Method) = isdefined(m, :generator)
+hasgenerator(m::Core.MethodInstance) = hasgenerator(m.def::Method)
+
+function _uncompressed_ir(m::Method)
+    s = m.source
+    if s isa String
+        s = ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Ptr{Cvoid}, Any), m, C_NULL, s)
+    end
+    return s::CodeInfo
+end
+
+_uncompressed_ir(codeinst::CodeInstance, s::String) =
+    ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Any, Any), codeinst.def.def::Method, codeinst, s)
+
+function get_ci_mi(codeinst::CodeInstance)
+    def = codeinst.def
+    if def isa Core.ABIOverride
+        return def.def
+    else
+        return def::MethodInstance
+    end
+end
+
+"""
+    Base.generating_output([incremental::Bool])::Bool
+
+Return `true` if the current process is being used to pre-generate a
+code cache via any of the `--output-*` command line arguments. The optional
+`incremental` argument further specifies the precompilation mode: when set
+to `true`, the function will return `true` only for package precompilation;
+when set to `false`, it will return `true` only for system image generation.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+"""
+function generating_output(incremental::Union{Bool,Nothing}=nothing)
+    ccall(:jl_generating_output, Cint, ()) == 0 && return false
+    if incremental !== nothing
+        JLOptions().incremental == incremental || return false
+    end
+    return true
+end
+
+const SLOT_USED = 0x8
+ast_slotflag(@nospecialize(code), i) = ccall(:jl_ir_slotflag, UInt8, (Any, Csize_t), code, i - 1)
+
+"""
+    may_invoke_generator(method, atype, sparams)::Bool
+
+Compute whether or not we may invoke the generator for the given `method` on
+the given `atype` and `sparams`. For correctness, all generated function are
+required to return monotonic answers. However, since we don't expect users to
+be able to successfully implement this criterion, we only call generated
+functions on concrete types. The one exception to this is that we allow calling
+generators with abstract types if the generator does not use said abstract type
+(and thus cannot incorrectly use it to break monotonicity). This function
+computes whether we are in either of these cases.
+
+Unlike normal functions, the compilation heuristics still can't generate good dispatch
+in some cases, but this may still allow inference not to fall over in some limited cases.
+"""
+function may_invoke_generator(mi::MethodInstance)
+    return may_invoke_generator(mi.def::Method, mi.specTypes, mi.sparam_vals)
+end
+function may_invoke_generator(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    # If we have complete information, we may always call the generator
+    isdispatchtuple(atype) && return true
+
+    # We don't have complete information, but it is possible that the generator
+    # syntactically doesn't make use of the information we don't have. Check
+    # for that.
+
+    # For now, only handle the (common, generated by the frontend case) that the
+    # generator only has one method
+    generator = method.generator
+    isa(generator, Core.GeneratedFunctionStub) || return false
+    tt = Tuple{typeof(generator.gen), Vararg{Any}}
+    gen_mthds = _methods_by_ftype(tt, #=lim=#1, method.primary_world)
+    gen_mthds isa Vector || return false
+    length(gen_mthds) == 1 || return false
+
+    generator_method = (first(gen_mthds)::Core.MethodMatch).method
+    nsparams = length(sparams)
+    isdefined(generator_method, :source) || return false
+    code = generator_method.source
+    nslots = ccall(:jl_ir_nslots, Int, (Any,), code)
+    at = unwrap_unionall(atype)
+    at isa DataType || return false
+    (nslots >= 1 + length(sparams) + length(at.parameters)) || return false
+
+    firstarg = 1
+    for i = 1:nsparams
+        if isa(sparams[i], TypeVar)
+            if (ast_slotflag(code, firstarg + i) & SLOT_USED) != 0
+                return false
+            end
+        end
+    end
+    nargs = Int(method.nargs)
+    non_va_args = method.isva ? nargs - 1 : nargs
+    for i = 1:non_va_args
+        if !isdispatchelem(at.parameters[i])
+            if (ast_slotflag(code, firstarg + i + nsparams) & SLOT_USED) != 0
+                return false
+            end
+        end
+    end
+    if method.isva
+        # If the va argument is used, we need to ensure that all arguments that
+        # contribute to the va tuple are dispatchelemes
+        if (ast_slotflag(code, firstarg + nargs + nsparams) & SLOT_USED) != 0
+            for i = (non_va_args+1):length(at.parameters)
+                if !isdispatchelem(at.parameters[i])
+                    return false
+                end
+            end
+        end
+    end
+    return true
+end
+
+# get a handle to the unique specialization object representing a particular instantiation of a call
+# eliminate UnionAll vars that might be degenerate due to having identical bounds,
+# or a concrete upper bound and appearing covariantly.
+function subst_trivial_bounds(@nospecialize(atype))
+    if !isa(atype, UnionAll)
+        return atype
+    end
+    v = atype.var
+    if isconcretetype(v.ub) || v.lb === v.ub
+        subst = try
+            atype{v.ub}
+        catch
+            # Note in rare cases a var bound might not be valid to substitute.
+            nothing
+        end
+        if subst !== nothing
+            return subst_trivial_bounds(subst)
+        end
+    end
+    return UnionAll(v, subst_trivial_bounds(atype.body))
+end
+
+# If removing trivial vars from atype results in an equivalent type, use that
+# instead. Otherwise we can get a case like issue #38888, where a signature like
+#   f(x::S) where S<:Int
+# gets cached and matches a concrete dispatch case.
+function normalize_typevars(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    at2 = subst_trivial_bounds(atype)
+    if at2 !== atype && at2 == atype
+        atype = at2
+        sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), at2, method.sig)::SimpleVector
+        sparams = sp_[2]::SimpleVector
+    end
+    return Pair{Any,SimpleVector}(atype, sparams)
+end
+
+function get_nospecializeinfer_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    isa(atype, DataType) || return method.sig
+    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Cint),
+        atype, sparams, method, #=int return_if_compileable=#0)
+end
+
+is_nospecialized(method::Method) = method.nospecialize ≠ 0
+is_nospecializeinfer(method::Method) = method.nospecializeinfer && is_nospecialized(method)
+
+"""
+Return MethodInstance corresponding to `atype` and `sparams`.
+
+No widening / narrowing / compileable-normalization of `atype` is performed.
+"""
+function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false)
+    @inline
+    if isa(atype, UnionAll)
+        atype, sparams = normalize_typevars(method, atype, sparams)
+    end
+    if is_nospecializeinfer(method) # TODO: this shouldn't be here
+        atype = get_nospecializeinfer_sig(method, atype, sparams)
+    end
+    if preexisting
+        # check cached specializations
+        # for an existing result stored there
+        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atype)::Union{Nothing,MethodInstance}
+    end
+    return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atype, sparams)
+end
+
+function specialize_method(match::Core.MethodMatch; kwargs...)
+    return specialize_method(match.method, match.spec_types, match.sparams; kwargs...)
+end
+
+hasintersect(@nospecialize(a), @nospecialize(b)) = typeintersect(a, b) !== Bottom
+
+###########
+# scoping #
+###########
+
+# high-level, more convenient method lookup functions
+
+function visit(f, mt::Core.MethodTable)
+    mt.defs !== nothing && visit(f, mt.defs)
+    nothing
+end
+function visit(f, mc::Core.TypeMapLevel)
+    function avisit(f, e::Memory{Any})
+        for i in 2:2:length(e)
+            isassigned(e, i) || continue
+            ei = e[i]
+            if ei isa Memory{Any}
+                for j in 2:2:length(ei)
+                    isassigned(ei, j) || continue
+                    visit(f, ei[j])
+                end
+            else
+                visit(f, ei)
+            end
+        end
+    end
+    if mc.targ !== nothing
+        avisit(f, mc.targ::Memory{Any})
+    end
+    if mc.arg1 !== nothing
+        avisit(f, mc.arg1::Memory{Any})
+    end
+    if mc.tname !== nothing
+        avisit(f, mc.tname::Memory{Any})
+    end
+    if mc.name1 !== nothing
+        avisit(f, mc.name1::Memory{Any})
+    end
+    mc.list !== nothing && visit(f, mc.list)
+    mc.any !== nothing && visit(f, mc.any)
+    nothing
+end
+function visit(f, d::Core.TypeMapEntry)
+    while d !== nothing
+        f(d.func)
+        d = d.next
+    end
+    nothing
+end
+struct MethodSpecializations
+    specializations::Union{Nothing, Core.MethodInstance, Core.SimpleVector}
+end
+"""
+    specializations(m::Method) → itr
+
+Return an iterator `itr` of all compiler-generated specializations of `m`.
+"""
+specializations(m::Method) = MethodSpecializations(isdefined(m, :specializations) ? m.specializations : nothing)
+function iterate(specs::MethodSpecializations)
+    s = specs.specializations
+    s === nothing && return nothing
+    isa(s, Core.MethodInstance) && return (s, nothing)
+    return iterate(specs, 0)
+end
+iterate(specs::MethodSpecializations, ::Nothing) = nothing
+function iterate(specs::MethodSpecializations, i::Int)
+    s = specs.specializations::Core.SimpleVector
+    n = length(s)
+    i >= n && return nothing
+    item = nothing
+    while i < n && item === nothing
+        item = s[i+=1]
+    end
+    item === nothing && return nothing
+    return (item, i)
+end
+length(specs::MethodSpecializations) = count(Returns(true), specs)
+
+function length(mt::Core.MethodTable)
+    n = 0
+    visit(mt) do m
+        n += 1
+    end
+    return n::Int
+end
+isempty(mt::Core.MethodTable) = (mt.defs === nothing)
+
+uncompressed_ir(m::Method) = isdefined(m, :source) ? _uncompressed_ir(m) :
+                             isdefined(m, :generator) ? error("Method is @generated; try `code_lowered` instead.") :
+                             error("Code for this Method is not available.")
+
+has_image_globalref(m::Method) = ccall(:jl_ir_flag_has_image_globalref, Bool, (Any,), m.source)
diff --git a/base/ryu/Ryu.jl b/base/ryu/Ryu.jl
index 9b236caeb6ff1..86b9a64c002d5 100644
--- a/base/ryu/Ryu.jl
+++ b/base/ryu/Ryu.jl
@@ -19,7 +19,7 @@ neededdigits(::Type{Float16}) = 9 + 5 + 9
 
 """
     Ryu.writeshortest(x, plus=false, space=false, hash=true, precision=-1, expchar=UInt8('e'), padexp=false, decchar=UInt8('.'), typed=false, compact=false)
-    Ryu.writeshortest(buf::Vector{UInt8}, pos::Int, x, args...)
+    Ryu.writeshortest(buf::AbstractVector{UInt8}, pos::Int, x, args...)
 
 Convert a float value `x` into its "shortest" decimal string, which can be parsed back to the same value.
 This function allows achieving the `%g` printf format.
@@ -53,7 +53,7 @@ end
 
 """
     Ryu.writefixed(x, precision, plus=false, space=false, hash=false, decchar=UInt8('.'), trimtrailingzeros=false)
-    Ryu.writefixed(buf::Vector{UInt8}, pos::Int, x, args...)
+    Ryu.writefixed(buf::AbstractVector{UInt8}, pos::Int, x, args...)
 
 Convert a float value `x` into a "fixed" size decimal string of the provided precision.
 This function allows achieving the `%f` printf format.
@@ -81,7 +81,7 @@ end
 
 """
     Ryu.writeexp(x, precision, plus=false, space=false, hash=false, expchar=UInt8('e'), decchar=UInt8('.'), trimtrailingzeros=false)
-    Ryu.writeexp(buf::Vector{UInt8}, pos::Int, x, args...)
+    Ryu.writeexp(buf::AbstractVector{UInt8}, pos::Int, x, args...)
 
 Convert a float value `x` into a scientific notation decimal string.
 This function allows achieving the `%e` printf format.
@@ -112,7 +112,7 @@ end
 function Base.show(io::IO, x::T, forceuntyped::Bool=false, fromprint::Bool=false) where {T <: Base.IEEEFloat}
     compact = get(io, :compact, false)::Bool
     buf = Base.StringVector(neededdigits(T))
-    typed = !forceuntyped && !compact && get(io, :typeinfo, Any) != typeof(x)
+    typed = !forceuntyped && !compact && Base.nonnothing_nonmissing_typeinfo(io) !== typeof(x)
     pos = writeshortest(buf, 1, x, false, false, true, -1,
         (x isa Float32 && !fromprint) ? UInt8('f') : UInt8('e'), false, UInt8('.'), typed, compact)
     write(io, resize!(buf, pos - 1))
diff --git a/base/ryu/exp.jl b/base/ryu/exp.jl
index 30291212d014d..4f749668867e2 100644
--- a/base/ryu/exp.jl
+++ b/base/ryu/exp.jl
@@ -7,34 +7,34 @@ function writeexp(buf, pos, v::T,
     pos = append_sign(x, plus, space, buf, pos)
 
     # special cases
-    if x == 0
-        buf[pos] = UInt8('0')
+    if iszero(x)
+        @inbounds buf[pos] = UInt8('0')
         pos += 1
         if precision > 0 && !trimtrailingzeros
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
             for _ = 1:precision
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
         elseif hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
         end
-        buf[pos] = expchar
-        buf[pos + 1] = UInt8('+')
-        buf[pos + 2] = UInt8('0')
-        buf[pos + 3] = UInt8('0')
+        @inbounds buf[pos] = expchar
+        @inbounds buf[pos + 1] = UInt8('+')
+        @inbounds buf[pos + 2] = UInt8('0')
+        @inbounds buf[pos + 3] = UInt8('0')
         return pos + 4
     elseif isnan(x)
-        buf[pos] = UInt8('N')
-        buf[pos + 1] = UInt8('a')
-        buf[pos + 2] = UInt8('N')
+        @inbounds buf[pos] = UInt8('N')
+        @inbounds buf[pos + 1] = UInt8('a')
+        @inbounds buf[pos + 2] = UInt8('N')
         return pos + 3
     elseif !isfinite(x)
-        buf[pos] = UInt8('I')
-        buf[pos + 1] = UInt8('n')
-        buf[pos + 2] = UInt8('f')
+        @inbounds buf[pos] = UInt8('I')
+        @inbounds buf[pos + 1] = UInt8('n')
+        @inbounds buf[pos + 2] = UInt8('f')
         return pos + 3
     end
 
@@ -42,7 +42,7 @@ function writeexp(buf, pos, v::T,
     mant = bits & MANTISSA_MASK
     exp = Int((bits >> 52) & EXP_MASK)
 
-    if exp == 0
+    if iszero(exp)
         e2 = 1 - 1023 - 52
         m2 = mant
     else
@@ -51,7 +51,7 @@ function writeexp(buf, pos, v::T,
     end
     nonzero = false
     precision += 1
-    digits = 0
+    digits = zero(UInt32)
     printedDigits = 0
     availableDigits = 0
     e = 0
@@ -64,14 +64,14 @@ function writeexp(buf, pos, v::T,
             j = p10bits - e2
             #=@inbounds=# mula, mulb, mulc = POW10_SPLIT[POW10_OFFSET[idx + 1] + i + 1]
             digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
-            if printedDigits != 0
+            if !iszero(printedDigits)
                 if printedDigits + 9 > precision
                     availableDigits = 9
                     break
                 end
                 pos = append_nine_digits(digits, buf, pos)
                 printedDigits += 9
-            elseif digits != 0
+            elseif !iszero(digits)
                 availableDigits = decimallength(digits)
                 e = i * 9 + availableDigits - 1
                 if availableDigits > precision
@@ -80,10 +80,10 @@ function writeexp(buf, pos, v::T,
                 if precision > 1
                     pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
                 else
-                    buf[pos] = UInt8('0') + digits
+                    @inbounds buf[pos] = UInt8('0') + digits
                     pos += 1
                     if hash
-                        buf[pos] = decchar
+                        @inbounds buf[pos] = decchar
                         pos += 1
                     end
                 end
@@ -93,26 +93,26 @@ function writeexp(buf, pos, v::T,
             i -= 1
         end
     end
-    if e2 < 0 && availableDigits == 0
+    if e2 < 0 && iszero(availableDigits)
         idx = div(-e2, 16)
-        i = MIN_BLOCK_2[idx + 1]
+        i = Int(MIN_BLOCK_2[idx + 1])
         while i < 200
             j = 120 + (-e2 - 16 * idx)
             p = POW10_OFFSET_2[idx + 1] + i - MIN_BLOCK_2[idx + 1]
             if p >= POW10_OFFSET_2[idx + 2]
-                digits = 0
+                digits = zero(UInt32)
             else
                 #=@inbounds=# mula, mulb, mulc = POW10_SPLIT_2[p + 1]
                 digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
             end
-            if printedDigits != 0
+            if !iszero(printedDigits)
                 if printedDigits + 9 > precision
                     availableDigits = 9
                     break
                 end
                 pos = append_nine_digits(digits, buf, pos)
                 printedDigits += 9
-            elseif digits != 0
+            elseif !iszero(digits)
                 availableDigits = decimallength(digits)
                 e = -(i + 1) * 9 + availableDigits - 1
                 if availableDigits > precision
@@ -121,10 +121,10 @@ function writeexp(buf, pos, v::T,
                 if precision > 1
                     pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
                 else
-                    buf[pos] = UInt8('0') + digits
+                    @inbounds buf[pos] = UInt8('0') + digits
                     pos += 1
                     if hash
-                        buf[pos] = decchar
+                        @inbounds buf[pos] = decchar
                         pos += 1
                     end
                 end
@@ -135,19 +135,19 @@ function writeexp(buf, pos, v::T,
         end
     end
     maximum = precision - printedDigits
-    if availableDigits == 0
-        digits = 0
+    if iszero(availableDigits)
+        digits = zero(UInt32)
     end
-    lastDigit = 0
+    lastDigit = zero(UInt32)
     if availableDigits > maximum
         for k = 0:(availableDigits - maximum - 1)
-            lastDigit = digits % 10
-            digits = div(digits, 10)
+            lastDigit = digits % UInt32(10)
+            digits = div(digits, UInt32(10))
         end
     end
     roundUp = 0
     if lastDigit != 5
-        roundUp = lastDigit > 5
+        roundUp = lastDigit > 5 ? 1 : 0
     else
         rexp = precision - e
         requiredTwos = -e2 - rexp
@@ -159,10 +159,10 @@ function writeexp(buf, pos, v::T,
         end
         roundUp = trailingZeros ? 2 : 1
     end
-    if printedDigits != 0
-        if digits == 0
+    if !iszero(printedDigits)
+        if iszero(digits)
             for _ = 1:maximum
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
         else
@@ -172,64 +172,68 @@ function writeexp(buf, pos, v::T,
         if precision > 1
             pos = append_d_digits(maximum, digits, buf, pos, decchar)
         else
-            buf[pos] = UInt8('0') + digits
+            @inbounds buf[pos] = UInt8('0') + digits
             pos += 1
             if hash
-                buf[pos] = decchar
+                @inbounds buf[pos] = decchar
                 pos += 1
             end
         end
     end
-    if roundUp != 0
+    if !iszero(roundUp)
         roundPos = pos
         while true
             roundPos -= 1
-            if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-') || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
-                buf[roundPos + 1] = UInt8('1')
+            if roundPos == (startpos - 1) || (@inbounds buf[roundPos]) == UInt8('-') || (plus && (@inbounds buf[roundPos]) == UInt8('+')) || (space && (@inbounds buf[roundPos]) == UInt8(' '))
+                @inbounds buf[roundPos + 1] = UInt8('1')
                 e += 1
                 break
             end
-            c = roundPos > 0 ? buf[roundPos] : 0x00
+            c = roundPos > 0 ? (@inbounds buf[roundPos]) : 0x00
             if c == decchar
                 continue
             elseif c == UInt8('9')
-                buf[roundPos] = UInt8('0')
+                @inbounds buf[roundPos] = UInt8('0')
                 roundUp = 1
                 continue
             else
-                if roundUp == 2 && UInt8(c) % 2 == 0
+                if roundUp == 2 && iseven(c)
                     break
                 end
-                buf[roundPos] = c + 1
+                @inbounds buf[roundPos] = c + 1
                 break
             end
         end
     end
     if trimtrailingzeros
-        while buf[pos - 1] == UInt8('0')
+        while @inbounds buf[pos - 1] == UInt8('0')
             pos -= 1
         end
-        if buf[pos - 1] == decchar && !hash
+        if @inbounds buf[pos - 1] == decchar && !hash
             pos -= 1
         end
     end
     buf[pos] = expchar
     pos += 1
     if e < 0
-        buf[pos] = UInt8('-')
+        @inbounds buf[pos] = UInt8('-')
         pos += 1
         e = -e
     else
-        buf[pos] = UInt8('+')
+        @inbounds buf[pos] = UInt8('+')
         pos += 1
     end
     if e >= 100
-        c = e % 10
-        unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * div(e, 10) + 1, 2)
-        buf[pos + 2] = UInt8('0') + c
+        c = (e % 10) % UInt8
+        @inbounds d100 = DIGIT_TABLE16[div(e, 10) + 1]
+        @inbounds buf[pos] = d100 % UInt8
+        @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
+        @inbounds buf[pos + 2] = UInt8('0') + c
         pos += 3
     else
-        unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * e + 1, 2)
+        @inbounds d100 = DIGIT_TABLE16[e + 1]
+        @inbounds buf[pos] = d100 % UInt8
+        @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
         pos += 2
     end
     return pos
diff --git a/base/ryu/fixed.jl b/base/ryu/fixed.jl
index e0085f5c66dab..96777059bc284 100644
--- a/base/ryu/fixed.jl
+++ b/base/ryu/fixed.jl
@@ -38,7 +38,7 @@ function writefixed(buf, pos, v::T,
     mant = bits & MANTISSA_MASK
     exp = Int((bits >> 52) & EXP_MASK)
 
-    if exp == 0
+    if exp == 0 # subnormal
         e2 = 1 - 1023 - 52
         m2 = mant
     else
@@ -53,13 +53,13 @@ function writefixed(buf, pos, v::T,
         i = len - 1
         while i >= 0
             j = p10bits - e2
-            #=@inbounds=# mula, mulb, mulc = POW10_SPLIT[POW10_OFFSET[idx + 1] + i + 1]
+            mula, mulb, mulc = POW10_SPLIT[POW10_OFFSET[idx + 1] + i + 1]
             digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
             if nonzero
                 pos = append_nine_digits(digits, buf, pos)
             elseif digits != 0
                 olength = decimallength(digits)
-                pos = append_n_digits(olength, digits, buf, pos)
+                pos = append_c_digits(olength, digits, buf, pos)
                 nonzero = true
             end
             i -= 1
@@ -103,7 +103,7 @@ function writefixed(buf, pos, v::T,
                 end
                 break
             end
-            #=@inbounds=# mula, mulb, mulc = POW10_SPLIT_2[p + 1]
+            mula, mulb, mulc = POW10_SPLIT_2[p + 1]
             digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
             if i < blocks - 1
                 pos = append_nine_digits(digits, buf, pos)
@@ -118,11 +118,11 @@ function writefixed(buf, pos, v::T,
                     k += 1
                 end
                 if lastDigit != 5
-                    roundUp = lastDigit > 5
+                    roundUp = lastDigit > 5 ? 1 : 0
                 else
                     requiredTwos = -e2 - precision - 1
                     trailingZeros = requiredTwos <= 0 || (requiredTwos < 60 && pow2(m2, requiredTwos))
-                    roundUp = trailingZeros ? 2 : 1
+                    roundUp = trailingZeros ? 2 : 1 # 2 means round only if odd
                 end
                 if maximum > 0
                     pos = append_c_digits(maximum, digits, buf, pos)
@@ -137,13 +137,13 @@ function writefixed(buf, pos, v::T,
             while true
                 roundPos -= 1
                 if roundPos == (startpos - 1) || (buf[roundPos] == UInt8('-')) || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
+                    buf[pos] = UInt8('0')
                     buf[roundPos + 1] = UInt8('1')
                     if dotPos > 1
                         buf[dotPos] = UInt8('0')
                         buf[dotPos + 1] = decchar
                         hasfractional = true
                     end
-                    buf[pos] = UInt8('0')
                     pos += 1
                     break
                 end
diff --git a/base/ryu/shortest.jl b/base/ryu/shortest.jl
index aaa62ba33c703..13d72f225f867 100644
--- a/base/ryu/shortest.jl
+++ b/base/ryu/shortest.jl
@@ -196,6 +196,7 @@ integer. If a `maxsignif` argument is provided, then `b < maxsignif`.
         e10 = 0
 
         if maxsignif !== nothing && b > maxsignif
+            roundup = false
             b_allzero = true
             # reduce to max significant digits
             while true
@@ -224,7 +225,7 @@ integer. If a `maxsignif` argument is provided, then `b < maxsignif`.
     return b, e10
 end
 
-function writeshortest(buf::Vector{UInt8}, pos, x::T,
+function writeshortest(buf::AbstractVector{UInt8}, pos, x::T,
                        plus=false, space=false, hash=true,
                        precision=-1, expchar=UInt8('e'), padexp=false, decchar=UInt8('.'),
                        typed=false, compact=false) where {T}
@@ -232,79 +233,81 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T,
     # special cases
     if x == 0
         if typed && x isa Float16
-            buf[pos] = UInt8('F')
-            buf[pos + 1] = UInt8('l')
-            buf[pos + 2] = UInt8('o')
-            buf[pos + 3] = UInt8('a')
-            buf[pos + 4] = UInt8('t')
-            buf[pos + 5] = UInt8('1')
-            buf[pos + 6] = UInt8('6')
-            buf[pos + 7] = UInt8('(')
+            @inbounds buf[pos] = UInt8('F')
+            @inbounds buf[pos + 1] = UInt8('l')
+            @inbounds buf[pos + 2] = UInt8('o')
+            @inbounds buf[pos + 3] = UInt8('a')
+            @inbounds buf[pos + 4] = UInt8('t')
+            @inbounds buf[pos + 5] = UInt8('1')
+            @inbounds buf[pos + 6] = UInt8('6')
+            @inbounds buf[pos + 7] = UInt8('(')
             pos += 8
         end
         pos = append_sign(x, plus, space, buf, pos)
-        buf[pos] = UInt8('0')
+        @inbounds buf[pos] = UInt8('0')
         pos += 1
         if hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
         end
         if precision == -1
-            buf[pos] = UInt8('0')
-            pos += 1
+            if hash
+                @inbounds buf[pos] = UInt8('0')
+                pos += 1
+            end
             if typed && x isa Float32
-                buf[pos] = UInt8('f')
-                buf[pos + 1] = UInt8('0')
+                @inbounds buf[pos] = UInt8('f')
+                @inbounds buf[pos + 1] = UInt8('0')
                 pos += 2
             end
             if typed && x isa Float16
-                buf[pos] = UInt8(')')
+                @inbounds buf[pos] = UInt8(')')
                 pos += 1
             end
             return pos
         end
         while hash && precision > 1
-            buf[pos] = UInt8('0')
+            @inbounds buf[pos] = UInt8('0')
             pos += 1
             precision -= 1
         end
         if typed && x isa Float32
-            buf[pos] = UInt8('f')
-            buf[pos + 1] = UInt8('0')
+            @inbounds buf[pos] = UInt8('f')
+            @inbounds buf[pos + 1] = UInt8('0')
             pos += 2
         end
         if typed && x isa Float16
-            buf[pos] = UInt8(')')
+            @inbounds buf[pos] = UInt8(')')
             pos += 1
         end
         return pos
     elseif isnan(x)
         pos = append_sign(x, plus, space, buf, pos)
-        buf[pos] = UInt8('N')
-        buf[pos + 1] = UInt8('a')
-        buf[pos + 2] = UInt8('N')
+        @inbounds buf[pos] = UInt8('N')
+        @inbounds buf[pos + 1] = UInt8('a')
+        @inbounds buf[pos + 2] = UInt8('N')
         if typed
             if x isa Float32
-                buf[pos + 3] = UInt8('3')
-                buf[pos + 4] = UInt8('2')
+                @inbounds buf[pos + 3] = UInt8('3')
+                @inbounds buf[pos + 4] = UInt8('2')
             elseif x isa Float16
-                buf[pos + 3] = UInt8('1')
-                buf[pos + 4] = UInt8('6')
+                @inbounds buf[pos + 3] = UInt8('1')
+                @inbounds buf[pos + 4] = UInt8('6')
             end
         end
         return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
     elseif !isfinite(x)
         pos = append_sign(x, plus, space, buf, pos)
-        buf[pos] = UInt8('I')
-        buf[pos + 1] = UInt8('n')
-        buf[pos + 2] = UInt8('f')
+        @inbounds buf[pos] = UInt8('I')
+        @inbounds buf[pos + 1] = UInt8('n')
+        @inbounds buf[pos + 2] = UInt8('f')
         if typed
             if x isa Float32
-                buf[pos + 3] = UInt8('3')
-                buf[pos + 4] = UInt8('2')
+                @inbounds buf[pos + 3] = UInt8('3')
+                @inbounds buf[pos + 4] = UInt8('2')
             elseif x isa Float16
-                buf[pos + 3] = UInt8('1')
-                buf[pos + 4] = UInt8('6')
+                @inbounds buf[pos + 3] = UInt8('1')
+                @inbounds buf[pos + 4] = UInt8('6')
             end
         end
         return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
@@ -313,14 +316,14 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T,
     output, nexp = reduce_shortest(x, compact ? 999_999 : nothing)
 
     if typed && x isa Float16
-        buf[pos] = UInt8('F')
-        buf[pos + 1] = UInt8('l')
-        buf[pos + 2] = UInt8('o')
-        buf[pos + 3] = UInt8('a')
-        buf[pos + 4] = UInt8('t')
-        buf[pos + 5] = UInt8('1')
-        buf[pos + 6] = UInt8('6')
-        buf[pos + 7] = UInt8('(')
+        @inbounds buf[pos] = UInt8('F')
+        @inbounds buf[pos + 1] = UInt8('l')
+        @inbounds buf[pos + 2] = UInt8('o')
+        @inbounds buf[pos + 3] = UInt8('a')
+        @inbounds buf[pos + 4] = UInt8('t')
+        @inbounds buf[pos + 5] = UInt8('1')
+        @inbounds buf[pos + 6] = UInt8('6')
+        @inbounds buf[pos + 7] = UInt8('(')
         pos += 8
     end
     pos = append_sign(x, plus, space, buf, pos)
@@ -332,161 +335,125 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T,
         !(pt >= olength && abs(mod(x + 0.05, 10^(pt - olength)) - 0.05) > 0.05)
         exp_form = false
         if pt <= 0
-            buf[pos] = UInt8('0')
+            @inbounds buf[pos] = UInt8('0')
             pos += 1
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
             for _ = 1:abs(pt)
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
-            # elseif pt >= olength
+        # elseif pt >= olength
             # nothing to do at this point
-            # else
+        # else
             # nothing to do at this point
         end
     else
+        # make space for decchar
         pos += 1
     end
-    i = 0
-    ptr = pointer(buf)
-    ptr2 = pointer(DIGIT_TABLE)
-    if (output >> 32) != 0
-        q = output ÷ 100000000
-        output2 = (output % UInt32) - UInt32(100000000) * (q % UInt32)
-        output = q
 
-        c = output2 % UInt32(10000)
-        output2 = div(output2, UInt32(10000))
-        d = output2 % UInt32(10000)
-        c0 = (c % 100) << 1
-        c1 = (c ÷ 100) << 1
-        d0 = (d % 100) << 1
-        d1 = (d ÷ 100) << 1
-        memcpy(ptr + pos + olength - 3, ptr2 + c0, 2)
-        memcpy(ptr + pos + olength - 5, ptr2 + c1, 2)
-        memcpy(ptr + pos + olength - 7, ptr2 + d0, 2)
-        memcpy(ptr + pos + olength - 9, ptr2 + d1, 2)
-        i += 8
-    end
-    output2 = output % UInt32
-    while output2 >= 10000
-        c = output2 % UInt32(10000)
-        output2 = div(output2, UInt32(10000))
-        c0 = (c % 100) << 1
-        c1 = (c ÷ 100) << 1
-        memcpy(ptr + pos + olength - i - 3, ptr2 + c0, 2)
-        memcpy(ptr + pos + olength - i - 5, ptr2 + c1, 2)
-        i += 4
-    end
-    if output2 >= 100
-        c = (output2 % UInt32(100)) << 1
-        output2 = div(output2, UInt32(100))
-        memcpy(ptr + pos + olength - i - 3, ptr2 + c, 2)
-        i += 2
-    end
-    if output2 >= 10
-        c = output2 << 1
-        buf[pos + 1] = DIGIT_TABLE[c + 2]
-        buf[pos - exp_form] = DIGIT_TABLE[c + 1]
-    else
-        buf[pos - exp_form] = UInt8('0') + (output2 % UInt8)
-    end
+    append_c_digits(olength, output, buf, pos)
 
     if !exp_form
         if pt <= 0
             pos += olength
             precision -= olength
-            while hash && precision > 0
-                buf[pos] = UInt8('0')
-                pos += 1
-                precision -= 1
-            end
         elseif pt >= olength
             pos += olength
             precision -= olength
             for _ = 1:nexp
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
                 precision -= 1
             end
             if hash
-                buf[pos] = decchar
+                @inbounds buf[pos] = decchar
                 pos += 1
                 if precision < 0
-                    buf[pos] = UInt8('0')
-                    pos += 1
-                end
-                while precision > 0
-                    buf[pos] = UInt8('0')
+                    @inbounds buf[pos] = UInt8('0')
                     pos += 1
-                    precision -= 1
                 end
             end
         else
             pointoff = olength - abs(nexp)
-            memmove(ptr + pos + pointoff, ptr + pos + pointoff - 1, olength - pointoff + 1)
-            buf[pos + pointoff] = decchar
+            # shift bytes after pointoff to make room for decchar
+            buf_cconv = Base.cconvert(Ptr{UInt8}, buf)
+            GC.@preserve buf_cconv begin
+                ptr = Base.unsafe_convert(Ptr{UInt8}, buf_cconv)
+                memmove(ptr + pos + pointoff, ptr + pos + pointoff - 1, (olength - pointoff + 1)%Csize_t)
+            end
+            @inbounds buf[pos + pointoff] = decchar
             pos += olength + 1
             precision -= olength
-            while hash && precision > 0
-                buf[pos] = UInt8('0')
+        end
+        if hash
+            while precision > 0
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
                 precision -= 1
             end
         end
         if typed && x isa Float32
-            buf[pos] = UInt8('f')
-            buf[pos + 1] = UInt8('0')
+            @inbounds buf[pos] = UInt8('f')
+            @inbounds buf[pos + 1] = UInt8('0')
             pos += 2
         end
     else
+        # move leading digit into place
+        @inbounds buf[pos - 1] = buf[pos]
         if olength > 1 || hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += olength
             precision -= olength
         end
-        if hash && olength == 1
-            buf[pos] = UInt8('0')
-            pos += 1
-        end
-        while hash && precision > 0
-            buf[pos] = UInt8('0')
-            pos += 1
-            precision -= 1
+        if hash
+            if olength == 1
+                @inbounds buf[pos] = UInt8('0')
+                pos += 1
+            end
+            while precision > 0
+                @inbounds buf[pos] = UInt8('0')
+                pos += 1
+                precision -= 1
+            end
         end
 
-        buf[pos] = expchar
+        @inbounds buf[pos] = expchar
         pos += 1
         exp2 = nexp + olength - 1
         if exp2 < 0
-            buf[pos] = UInt8('-')
+            @inbounds buf[pos] = UInt8('-')
             pos += 1
             exp2 = -exp2
         elseif padexp
-            buf[pos] = UInt8('+')
+            @inbounds buf[pos] = UInt8('+')
             pos += 1
         end
 
         if exp2 >= 100
             c = exp2 % 10
-            memcpy(ptr + pos - 1, ptr2 + 2 * div(exp2, 10), 2)
-            buf[pos + 2] = UInt8('0') + (c % UInt8)
+            @inbounds d100 = DIGIT_TABLE16[(div(exp2, 10) % Int) + 1]
+            @inbounds buf[pos] = d100 % UInt8
+            @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
+            @inbounds buf[pos + 2] = UInt8('0') + (c % UInt8)
             pos += 3
         elseif exp2 >= 10
-            memcpy(ptr + pos - 1, ptr2 + 2 * exp2, 2)
+            @inbounds d100 = DIGIT_TABLE16[(exp2 % Int) + 1]
+            @inbounds buf[pos] = d100 % UInt8
+            @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
             pos += 2
         else
             if padexp
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
-            buf[pos] = UInt8('0') + (exp2 % UInt8)
+            @inbounds buf[pos] = UInt8('0') + (exp2 % UInt8)
             pos += 1
         end
     end
     if typed && x isa Float16
-        buf[pos] = UInt8(')')
+        @inbounds buf[pos] = UInt8(')')
         pos += 1
     end
 
diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl
index f5a88c057e2b3..7902d1ca94206 100644
--- a/base/ryu/utils.jl
+++ b/base/ryu/utils.jl
@@ -22,7 +22,7 @@ qbound(::Type{Float64}) = 63
 """
     Ryu.log10pow2(e::Integer)
 
-Computes `floor(log10(2^e))`. This is valid for all `e < 1651`.
+Compute `floor(log10(2^e))`. This is valid for all `e < 1651`.
 """
 log10pow2(e) = (e * 78913) >> 18
 
@@ -30,14 +30,14 @@ log10pow2(e) = (e * 78913) >> 18
 """
     Ryu.log10pow5(e::Integer)
 
-Computes `floor(log10(5^e))`. This is valid for all `e < 2621`.
+Compute `floor(log10(5^e))`. This is valid for all `e < 2621`.
 """
 log10pow5(e) = (e * 732923) >> 20
 
 """
     Ryu.pow5bits(e)
 
-Computes `e == 0 ? 1 : ceil(log2(5^e))`. This is valid for `e < 3529` (if performend in `Int32` arithmetic).
+Compute `e == 0 ? 1 : ceil(log2(5^e))`. This is valid for `e < 3529` (if performend in `Int32` arithmetic).
 """
 pow5bits(e) = ((e * 1217359) >> 19) + 1
 
@@ -134,7 +134,7 @@ end
 
 Compute `p = a*b` where `b = bLo + bHi<<64`, returning the result as `pLo, pHi` where `p = pLo + pHi<<128`.
 """
-function umul256(a, bHi, bLo)
+function umul256(a::UInt128, bHi::UInt64, bLo::UInt64)
     aLo = a % UInt64
     aHi = (a >> 64) % UInt64
 
@@ -164,7 +164,7 @@ end
 
 Compute `pHi = (a*b)>>128` where `b = bLo + bHi<<64`.
 """
-umul256_hi(a, bHi, bLo) = umul256(a, bHi, bLo)[2]
+umul256_hi(a::UInt128, bHi::UInt64, bLo::UInt64) = umul256(a, bHi, bLo)[2]
 
 """
     Ryu.mulshiftmod1e9(m, mula, mulb, mulc, j)::UInt32
@@ -183,7 +183,7 @@ function mulshiftmod1e9(m, mula, mulb, mulc, j)
     return (v % UInt32) - UInt32(1000000000) * shifted
 end
 
-function append_sign(x, plus, space, buf, pos)
+function append_sign(x, plus::Bool, space::Bool, buf, pos::Int)
     if signbit(x) && !isnan(x)  # suppress minus sign for signaling NaNs
         buf[pos] = UInt8('-')
         pos += 1
@@ -197,101 +197,14 @@ function append_sign(x, plus, space, buf, pos)
     return pos
 end
 
-function append_n_digits(olength, digits, buf, pos)
-    i = 0
-    while digits >= 10000
-        c = digits % 10000
-        digits = div(digits, 10000)
-        c0 = (c % 100) << 1
-        c1 = div(c, 100) << 1
-        unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c0 + 1, 2)
-        unsafe_copyto!(buf, pos + olength - i - 4, DIGIT_TABLE, c1 + 1, 2)
-        i += 4
-    end
-    if digits >= 100
-        c = (digits % 100) << 1
-        digits = div(digits, 100)
-        unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    end
-    if digits >= 10
-        c = digits << 1
-        unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    else
-        buf[pos] = UInt8('0') + digits
-        i += 1
-    end
-    return pos + i
-end
-
-function append_d_digits(olength, digits, buf, pos, decchar)
-    i = 0
-    while digits >= 10000
-        c = digits % 10000
-        digits = div(digits, 10000)
-        c0 = (c % 100) << 1
-        c1 = div(c, 100) << 1
-        unsafe_copyto!(buf, pos + olength + 1 - i - 2, DIGIT_TABLE, c0 + 1, 2)
-        unsafe_copyto!(buf, pos + olength + 1 - i - 4, DIGIT_TABLE, c1 + 1, 2)
-        i += 4
-    end
-    if digits >= 100
-        c = (digits % 100) << 1
-        digits = div(digits, 100)
-        unsafe_copyto!(buf, pos + olength + 1 - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    end
-    if digits >= 10
-        c = digits << 1
-        buf[pos] = DIGIT_TABLE[c + 1]
-        buf[pos + 1] = decchar
-        buf[pos + 2] = DIGIT_TABLE[c + 2]
-        i += 3
-    else
-        buf[pos] = UInt8('0') + digits
-        buf[pos + 1] = decchar
-        i += 2
-    end
-    return pos + i
-end
 
-function append_c_digits(count, digits, buf, pos)
-    i = 0
-    while i < count - 1
-        c = (digits % 100) << 1
-        digits = div(digits, 100)
-        unsafe_copyto!(buf, pos + count - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    end
-    if i < count
-        buf[pos + count - i - 1] = UInt8('0') + (digits % 10)
-        i += 1
-    end
-    return pos + i
-end
+import Base: append_c_digits_fast as append_c_digits, append_nine_digits
 
-function append_nine_digits(digits, buf, pos)
-    if digits == 0
-        for _ = 1:9
-            buf[pos] = UInt8('0')
-            pos += 1
-        end
-        return pos
-    end
-    i = 0
-    while i < 5
-        c = digits % 10000
-        digits = div(digits, 10000)
-        c0 = (c % 100) << 1
-        c1 = div(c, 100) << 1
-        unsafe_copyto!(buf, pos + 7 - i, DIGIT_TABLE, c0 + 1, 2)
-        unsafe_copyto!(buf, pos + 5 - i, DIGIT_TABLE, c1 + 1, 2)
-        i += 4
-    end
-    buf[pos] = UInt8('0') + digits
-    i += 1
-    return pos + i
+function append_d_digits(olength::Int, digits::Unsigned, buf, pos::Int, decchar)
+    newpos = append_c_digits(olength, digits, buf, pos + 1)
+    @inbounds buf[pos] = buf[pos + 1]
+    @inbounds buf[pos + 1] = decchar
+    return newpos # == pos + olength + 1
 end
 
 const BIG_MASK = (big(1) << 64) - 1
@@ -390,18 +303,7 @@ for T in (Float64, Float32, Float16)
     @eval pow5split_lookup(::Type{$T}, i) = @inbounds($table_sym[i+1])
 end
 
-const DIGIT_TABLE = UInt8[
-  '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
-  '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
-  '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
-  '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
-  '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
-  '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
-  '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
-  '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
-  '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
-  '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
-]
+const DIGIT_TABLE16 = Base._dec_d100
 
 const POW10_OFFSET = UInt16[
   0, 2, 5, 8, 12, 16, 21, 26, 32, 39,
diff --git a/base/scopedvalues.jl b/base/scopedvalues.jl
new file mode 100644
index 0000000000000..989663d59ab76
--- /dev/null
+++ b/base/scopedvalues.jl
@@ -0,0 +1,348 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module ScopedValues
+
+export ScopedValue, LazyScopedValue, with, @with, ScopedThunk
+public get
+
+"""
+    AbstractScopedValue{T}
+
+Abstract base type for scoped values that propagate values across
+dynamic scopes. All scoped value types must extend this abstract type.
+
+See also: [`ScopedValue`](@ref), [`LazyScopedValue`](@ref)
+
+!!! compat "Julia 1.13"
+    AbstractScopedValue requires Julia 1.13+.
+"""
+abstract type AbstractScopedValue{T} end
+
+
+"""
+    LazyScopedValue{T}(f::OncePerProcess{T})
+
+A scoped value that uses an `OncePerProcess{T}` to lazily compute its default value
+when none has been set in the current scope. Unlike `ScopedValue`, the default is
+not evaluated at construction time but only when first accessed.
+
+# Examples
+
+```julia-repl
+julia> using Base.ScopedValues;
+
+julia> const editor = LazyScopedValue(OncePerProcess(() -> ENV["JULIA_EDITOR"]));
+
+julia> editor[]
+"vim"
+
+julia> with(editor => "emacs") do
+           sval[]
+       end
+"emacs"
+
+julia> editor[]
+"vim"
+```
+
+!!! compat "Julia 1.13"
+    LazyScopedValue requires Julia 1.13+.
+"""
+mutable struct LazyScopedValue{T} <: AbstractScopedValue{T}
+    const getdefault::OncePerProcess{T}
+end
+
+
+"""
+    ScopedValue(x)
+
+Create a container that propagates values across dynamic scopes.
+Use [`with`](@ref) to create and enter a new dynamic scope.
+
+Values can only be set when entering a new dynamic scope,
+and the value referred to will be constant during the
+execution of a dynamic scope.
+
+Dynamic scopes are propagated across tasks.
+
+# Examples
+
+```jldoctest
+julia> using Base.ScopedValues;
+
+julia> const sval = ScopedValue(1);
+
+julia> sval[]
+1
+
+julia> with(sval => 2) do
+           sval[]
+       end
+2
+
+julia> sval[]
+1
+```
+
+!!! compat "Julia 1.11"
+    Scoped values were introduced in Julia 1.11. In Julia 1.8+ a compatible
+    implementation is available from the package ScopedValues.jl.
+"""
+mutable struct ScopedValue{T} <: AbstractScopedValue{T}
+    # NOTE this struct must be defined as mutable one since it's used as a key of
+    #      `ScopeStorage` dictionary and thus needs object identity
+    const hasdefault::Bool # this field is necessary since isbitstype `default` field may be initialized with undefined value
+    const default::T
+    ScopedValue{T}() where T = new{T}(false)
+    ScopedValue{T}(val) where T = new{T}(true, val)
+    ScopedValue(val::T) where T = new{T}(true, val)
+end
+
+Base.eltype(::AbstractScopedValue{T}) where {T} = T
+
+hasdefault(val::ScopedValue) = val.hasdefault
+hasdefault(val::LazyScopedValue) = true
+
+getdefault(val::ScopedValue) = val.hasdefault ? val.default : throw(KeyError(val))
+getdefault(val::LazyScopedValue) = val.getdefault()
+
+"""
+    isassigned(val::ScopedValue)
+
+Test whether a `ScopedValue` has an assigned value.
+
+See also: [`ScopedValues.with`](@ref), [`ScopedValues.@with`](@ref), [`ScopedValues.get`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> a = ScopedValue(1); b = ScopedValue{Int}();
+
+julia> isassigned(a)
+true
+
+julia> isassigned(b)
+false
+```
+"""
+function Base.isassigned(val::AbstractScopedValue)
+    hasdefault(val) && return true
+    scope = Core.current_scope()::Union{Scope, Nothing}
+    scope === nothing && return false
+    return haskey((scope::Scope).values, val)
+end
+
+const ScopeStorage = Base.PersistentDict{AbstractScopedValue, Any}
+
+struct Scope
+    values::ScopeStorage
+end
+
+Scope(scope::Scope) = scope
+
+function Scope(parent::Union{Nothing, Scope}, key::AbstractScopedValue{T}, value) where T
+    val = convert(T, value)
+    if parent === nothing
+        return Scope(ScopeStorage(key=>val))
+    end
+    return Scope(ScopeStorage(parent.values, key=>val))
+end
+
+function Scope(scope, pair::Pair{<:AbstractScopedValue})
+    return Scope(scope, pair...)
+end
+
+function Scope(scope, pair1::Pair{<:AbstractScopedValue}, pair2::Pair{<:AbstractScopedValue}, pairs::Pair{<:AbstractScopedValue}...)
+    # Unroll this loop through recursion to make sure that
+    # our compiler optimization support works
+    return Scope(Scope(scope, pair1...), pair2, pairs...)
+end
+Scope(::Nothing) = nothing
+
+function Base.show(io::IO, scope::Scope)
+    print(io, Scope, "(")
+    first = true
+    for (key, value) in scope.values
+        if first
+            first = false
+        else
+            print(io, ", ")
+        end
+        print(io, isa(key, ScopedValue) ? ScopedValue{eltype(key)} : typeof(key), "@")
+        show(io, Base.objectid(key))
+        print(io, " => ")
+        show(IOContext(io, :typeinfo => eltype(key)), value)
+    end
+    print(io, ")")
+end
+
+"""
+    get(val::ScopedValue{T})::Union{Nothing, Some{T}}
+    get(val::LazyScopedValue{T})::Union{Nothing, Some{T}}
+
+If the scoped value isn't set and doesn't have a default value,
+return `nothing`. Otherwise returns `Some{T}` with the current
+value.
+
+See also: [`ScopedValues.with`](@ref), [`ScopedValues.@with`](@ref), [`ScopedValues.ScopedValue`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> a = ScopedValue(42); b = ScopedValue{Int}();
+
+julia> ScopedValues.get(a)
+Some(42)
+
+julia> isnothing(ScopedValues.get(b))
+true
+```
+"""
+function get(val::AbstractScopedValue{T}) where {T}
+    scope = Core.current_scope()::Union{Scope, Nothing}
+    if scope === nothing
+        !hasdefault(val) && return nothing
+        return Some{T}(getdefault(val))
+    end
+    scope = scope::Scope
+    if hasdefault(val)
+        return Some{T}(Base.get(Base.Fix1(getdefault, val), scope.values, val)::T)
+    else
+        v = Base.KeyValue.get(scope.values, val)
+        v === nothing && return nothing
+        return Some{T}(only(v)::T)
+    end
+    return nothing
+end
+
+function Base.getindex(val::AbstractScopedValue{T})::T where T
+    maybe = get(val)
+    maybe === nothing && throw(KeyError(val))
+    return something(maybe)::T
+end
+
+function Base.show(io::IO, val::AbstractScopedValue)
+    if isa(val, ScopedValue)
+        print(io, ScopedValue)
+        print(io, '{', eltype(val), '}')
+    else
+        print(io, typeof(val))
+    end
+    print(io, '(')
+    v = get(val)
+    if v !== nothing
+        show(IOContext(io, :typeinfo => eltype(val)), something(v))
+    end
+    print(io, ')')
+end
+
+"""
+    @with (var::ScopedValue{T} => val)... expr
+
+Macro version of `with`. The expression `@with var=>val expr` evaluates `expr` in a
+new dynamic scope with `var` set to `val`. `val` will be converted to type `T`.
+`@with var=>val expr` is equivalent to `with(var=>val) do expr end`, but `@with`
+avoids creating a closure.
+
+See also: [`ScopedValues.with`](@ref), [`ScopedValues.ScopedValue`](@ref), [`ScopedValues.get`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> const a = ScopedValue(1);
+
+julia> f(x) = a[] + x;
+
+julia> @with a=>2 f(10)
+12
+
+julia> @with a=>3 begin
+           x = 100
+           f(x)
+       end
+103
+```
+"""
+macro with(exprs...)
+    if length(exprs) > 1
+        ex = last(exprs)
+        exprs = exprs[1:end-1]
+    elseif length(exprs) == 1
+        ex = only(exprs)
+        exprs = ()
+    else
+        error("@with expects at least one argument")
+    end
+    exprs = map(esc, exprs)
+    Expr(:tryfinally, esc(ex), nothing, :(Scope(Core.current_scope()::Union{Nothing, Scope}, $(exprs...))))
+end
+
+"""
+    with(f, (var::ScopedValue{T} => val)...)
+
+Execute `f` in a new dynamic scope with `var` set to `val`. `val` will be converted
+to type `T`.
+
+See also: [`ScopedValues.@with`](@ref), [`ScopedValues.ScopedValue`](@ref), [`ScopedValues.get`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> a = ScopedValue(1);
+
+julia> f(x) = a[] + x;
+
+julia> f(10)
+11
+
+julia> with(a=>2) do
+           f(10)
+       end
+12
+
+julia> f(10)
+11
+
+julia> b = ScopedValue(2);
+
+julia> g(x) = a[] + b[] + x;
+
+julia> with(a=>10, b=>20) do
+           g(30)
+       end
+60
+
+julia> with(() -> a[] * b[], a=>3, b=>4)
+12
+```
+"""
+function with(f, pair::Pair{<:AbstractScopedValue}, rest::Pair{<:AbstractScopedValue}...)
+    @with(pair, rest..., f())
+end
+with(@nospecialize(f)) = f()
+
+macro enter_scope(scope, expr)
+    Expr(:tryfinally, esc(expr), nothing, :(Scope($(esc(scope))::Union{Nothing, Scope})))
+end
+
+"""
+    ScopedThunk(f)
+
+Create a callable that records the current dynamic scope, i.e. all current
+`ScopedValue`s, along with `f`. When the callable is invoked, it runs `f`
+in the recorded dynamic scope.
+"""
+struct ScopedThunk{F}
+    f::F
+    scope::Union{Nothing, Scope}
+
+    ScopedThunk{F}(f) where {F} = new{F}(f, Core.current_scope())
+end
+ScopedThunk(f) = ScopedThunk{typeof(f)}(f)
+(sf::ScopedThunk)() = @enter_scope sf.scope sf.f()
+
+end # module ScopedValues
diff --git a/base/secretbuffer.jl b/base/secretbuffer.jl
index eedfd8cbe84c1..bf37c3caa6c23 100644
--- a/base/secretbuffer.jl
+++ b/base/secretbuffer.jl
@@ -29,12 +29,12 @@ true
 ```
 """
 mutable struct SecretBuffer <: IO
-    data::Vector{UInt8}
+    data::Memory{UInt8}
     size::Int
     ptr::Int
 
     function SecretBuffer(; sizehint=128)
-        s = new(Vector{UInt8}(undef, sizehint), 0, 1)
+        s = new(Memory{UInt8}(undef, sizehint), 0, 1)
         finalizer(final_shred!, s)
         return s
     end
@@ -49,7 +49,7 @@ Strings are bad at keeping secrets because they are unable to be securely
 zeroed or destroyed. Therefore, avoid using this constructor with secret data.
 Instead of starting with a string, either construct the `SecretBuffer`
 incrementally with `SecretBuffer()` and [`write`](@ref), or use a `Vector{UInt8}` with
-the `Base.SecretBuffer!(::Vector{UInt8})` constructor.
+the `Base.SecretBuffer!(::AbstractVector{UInt8})` constructor.
 """
 SecretBuffer(str::AbstractString) = SecretBuffer(String(str))
 function SecretBuffer(str::String)
@@ -68,7 +68,7 @@ convert(::Type{SecretBuffer}, s::AbstractString) = SecretBuffer(String(s))
 
 Initialize a new `SecretBuffer` from `data`, securely zeroing `data` afterwards.
 """
-function SecretBuffer!(d::Vector{UInt8})
+function SecretBuffer!(d::AbstractVector{UInt8})
     len = length(d)
     s = SecretBuffer(sizehint=len)
     for i in 1:len
@@ -106,7 +106,7 @@ show(io::IO, s::SecretBuffer) = print(io, "SecretBuffer(\"*******\")")
 ==(s1::SecretBuffer, s2::SecretBuffer) = (s1.ptr == s2.ptr) && (s1.size == s2.size) && (UInt8(0) == _bufcmp(s1.data, s2.data, min(s1.size, s2.size)))
 # Also attempt a constant time buffer comparison algorithm — the length of the secret might be
 # inferred by a timing attack, but not its values.
-@noinline function _bufcmp(data1::Vector{UInt8}, data2::Vector{UInt8}, sz::Int)
+@noinline function _bufcmp(data1::Memory{UInt8}, data2::Memory{UInt8}, sz::Int)
     res = UInt8(0)
     for i = 1:sz
         res |= xor(data1[i], data2[i])
@@ -117,11 +117,23 @@ end
 const _sb_hash = UInt === UInt32 ? 0x111c0925 : 0xb06061e370557428
 hash(s::SecretBuffer, h::UInt) = hash(_sb_hash, h)
 
+copy(s::SecretBuffer) = copy!(SecretBuffer(sizehint=length(s.data)), s)
+function copy!(dest::SecretBuffer, src::SecretBuffer)
+    if length(dest.data) != length(src.data)
+        securezero!(dest.data)
+        dest.data = copy(src.data)
+    else
+        copyto!(dest.data, src.data)
+    end
+    dest.size = src.size
+    dest.ptr = src.ptr
+    return dest
+end
 
 function write(io::SecretBuffer, b::UInt8)
     if io.ptr > length(io.data)
         # We need to resize! the array: do this manually to ensure no copies are left behind
-        newdata = Vector{UInt8}(undef, (io.size+16)*2)
+        newdata = Memory{UInt8}(undef, (io.size+16)*2)
         copyto!(newdata, io.data)
         securezero!(io.data)
         io.data = newdata
@@ -140,8 +152,7 @@ function write(io::IO, s::SecretBuffer)
     return nb
 end
 
-cconvert(::Type{Cstring}, s::SecretBuffer) = unsafe_convert(Cstring, s)
-function unsafe_convert(::Type{Cstring}, s::SecretBuffer)
+function cconvert(::Type{Cstring}, s::SecretBuffer)
     # Ensure that no nuls appear in the valid region
     if any(==(0x00), s.data[i] for i in 1:s.size)
         throw(ArgumentError("`SecretBuffers` containing nul bytes cannot be converted to C strings"))
@@ -152,8 +163,10 @@ function unsafe_convert(::Type{Cstring}, s::SecretBuffer)
     write(s, '\0')
     s.ptr = p
     s.size -= 1
-    return Cstring(unsafe_convert(Ptr{Cchar}, s.data))
+    return s.data
 end
+# optional shim for manual calls to unsafe_convert:
+#   unsafe_convert(::Type{Cstring}, s::SecretBuffer) = unsafe_convert(Cstring, cconvert(Cstring, s))
 
 seek(io::SecretBuffer, n::Integer) = (io.ptr = max(min(n+1, io.size+1), 1); io)
 seekend(io::SecretBuffer) = seek(io, io.size+1)
@@ -187,7 +200,7 @@ resetting its pointer and size.
 This function is used to securely erase the sensitive data held in the buffer,
 reducing the potential for information leaks.
 
-# Example
+# Examples
 ```julia
 s = SecretBuffer()
 write(s, 's', 'e', 'c', 'r', 'e', 't')
diff --git a/base/set.jl b/base/set.jl
index a91bf328bd911..8b8f3d44603c8 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -91,18 +91,70 @@ isempty(s::Set) = isempty(s.dict)
 length(s::Set)  = length(s.dict)
 in(x, s::Set) = haskey(s.dict, x)
 
-# This avoids hashing and probing twice and it works the same as
-# in!(x, s::Set) = in(x, s) ? true : (push!(s, x); false)
+"""
+    in!(x, s::AbstractSet)::Bool
+
+If `x` is in `s`, return `true`. If not, push `x` into `s` and return `false`.
+This is equivalent to `in(x, s) ? true : (push!(s, x); false)`, but may have a
+more efficient implementation.
+
+See also: [`in`](@ref), [`push!`](@ref), [`Set`](@ref)
+
+!!! compat "Julia 1.11"
+    This function requires at least 1.11.
+
+# Examples
+```jldoctest; filter = r"^\\s+\\d\$"m
+julia> s = Set{Any}([1, 2, 3]); in!(4, s)
+false
+
+julia> length(s)
+4
+
+julia> in!(0x04, s)
+true
+
+julia> s
+Set{Any} with 4 elements:
+  4
+  2
+  3
+  1
+```
+"""
+function in!(x, s::AbstractSet)
+    x ∈ s ? true : (push!(s, x); false)
+end
+
 function in!(x, s::Set)
-    idx, sh = ht_keyindex2_shorthash!(s.dict, x)
+    xT = convert(eltype(s), x)
+    idx, sh = ht_keyindex2_shorthash!(s.dict, xT)
     idx > 0 && return true
-    _setindex!(s.dict, nothing, x, -idx, sh)
+    _setindex!(s.dict, nothing, xT, -idx, sh)
     return false
 end
 
 push!(s::Set, x) = (s.dict[x] = nothing; s)
-pop!(s::Set, x) = (pop!(s.dict, x); x)
-pop!(s::Set, x, default) = (x in s ? pop!(s, x) : default)
+
+function pop!(s::Set, x, default)
+    dict = s.dict
+    index = ht_keyindex(dict, x)
+    if index > 0
+        @inbounds key = dict.keys[index]
+        _delete!(dict, index)
+        return key
+    else
+        return default
+    end
+end
+
+function pop!(s::Set, x)
+    index = ht_keyindex(s.dict, x)
+    index < 1 && throw(KeyError(x))
+    result = @inbounds s.dict.keys[index]
+    _delete!(s.dict, index)
+    result
+end
 
 function pop!(s::Set)
     isempty(s) && throw(ArgumentError("set must be non-empty"))
@@ -117,12 +169,14 @@ copymutable(s::Set{T}) where {T} = Set{T}(s)
 # Set is the default mutable fall-back
 copymutable(s::AbstractSet{T}) where {T} = Set{T}(s)
 
-sizehint!(s::Set, newsz) = (sizehint!(s.dict, newsz); s)
+sizehint!(s::Set, newsz::Integer; shrink::Bool=true) = (sizehint!(s.dict, newsz; shrink); s)
 empty!(s::Set) = (empty!(s.dict); s)
 rehash!(s::Set) = (rehash!(s.dict); s)
 
 iterate(s::Set, i...)       = iterate(KeySet(s.dict), i...)
 
+@propagate_inbounds Iterators.only(s::Set) = Iterators._only(s, first)
+
 # In case the size(s) is smaller than size(t) its more efficient to iterate through
 # elements of s instead and only delete the ones also contained in t.
 # The threshold for this decision boils down to a tradeoff between
@@ -147,7 +201,7 @@ end
     unique(itr)
 
 Return an array containing only the unique elements of collection `itr`,
-as determined by [`isequal`](@ref), in the order that the first of each
+as determined by [`isequal`](@ref) and [`hash`](@ref), in the order that the first of each
 set of equivalent elements originally appears. The element type of the
 input is preserved.
 
@@ -205,7 +259,7 @@ _unique_from(itr, out, seen, i) = unique_from(itr, out, seen, i)
     return out
 end
 
-unique(r::AbstractRange) = allunique(r) ? r : oftype(r, r[begin:begin])
+unique(r::AbstractRange) = allunique(r) ? r : oftype(r, r[begin]:r[begin])
 
 """
     unique(f, itr)
@@ -382,7 +436,7 @@ end
 """
     unique!(A::AbstractVector)
 
-Remove duplicate items as determined by [`isequal`](@ref), then return the modified `A`.
+Remove duplicate items as determined by [`isequal`](@ref) and [`hash`](@ref), then return the modified `A`.
 `unique!` will return the elements of `A` in the order that they occur. If you do not care
 about the order of the returned data, then calling `(sort!(A); unique!(A))` will be much
 more efficient as long as the elements of `A` can be sorted.
@@ -424,12 +478,22 @@ function unique!(itr)
 end
 
 """
-    allunique(itr) -> Bool
+    allunique(itr)::Bool
+    allunique(f, itr)::Bool
 
 Return `true` if all values from `itr` are distinct when compared with [`isequal`](@ref).
+Or if all of `[f(x) for x in itr]` are distinct, for the second method.
+
+Note that `allunique(f, itr)` may call `f` fewer than `length(itr)` times.
+The precise number of calls is regarded as an implementation detail.
+
+`allunique` may use a specialized implementation when the input is sorted.
 
 See also: [`unique`](@ref), [`issorted`](@ref), [`allequal`](@ref).
 
+!!! compat "Julia 1.11"
+    The method `allunique(f, itr)` requires at least Julia 1.11.
+
 # Examples
 ```jldoctest
 julia> allunique([1, 2, 3])
@@ -443,6 +507,9 @@ false
 
 julia> allunique([NaN, 2.0, NaN, 4.0])
 false
+
+julia> allunique(abs, [1, -1, 2])
+false
 ```
 """
 function allunique(C)
@@ -453,8 +520,10 @@ function allunique(C)
     return _hashed_allunique(C)
 end
 
+allunique(f, xs) = allunique(Generator(f, xs))
+
 function _hashed_allunique(C)
-    seen = Set{eltype(C)}()
+    seen = Set{@default_eltype(C)}()
     x = iterate(C)
     if haslength(C) && length(C) > 1000
         for i in OneTo(1000)
@@ -476,7 +545,31 @@ allunique(::Union{AbstractSet,AbstractDict}) = true
 
 allunique(r::AbstractRange) = !iszero(step(r)) || length(r) <= 1
 
-allunique(A::StridedArray) = length(A) < 32 ? _indexed_allunique(A) : _hashed_allunique(A)
+function allunique(A::StridedArray)
+    if length(A) < 32
+        _indexed_allunique(A)
+    elseif OrderStyle(eltype(A)) === Ordered()
+        a1, rest1 = Iterators.peel(A)::Tuple{Any,Any}
+        a2, rest = Iterators.peel(rest1)::Tuple{Any,Any}
+        if !isequal(a1, a2)
+            compare = isless(a1, a2) ? isless : (a,b) -> isless(b,a)
+            for a in rest
+                if compare(a2, a)
+                    a2 = a
+                elseif isequal(a2, a)
+                    return false
+                else
+                    return _hashed_allunique(A)
+                end
+            end
+        else # isequal(a1, a2)
+            return false
+        end
+        return true
+    else
+        _hashed_allunique(A)
+    end
+end
 
 function _indexed_allunique(A)
     length(A) < 2 && return true
@@ -502,16 +595,30 @@ function allunique(t::Tuple)
 end
 allunique(t::Tuple{}) = true
 
+function allunique(f::F, t::Tuple) where {F}
+    length(t) < 2 && return true
+    length(t) < 32 || return _hashed_allunique(Generator(f, t))
+    return allunique(map(f, t))
+end
+
 """
-    allequal(itr) -> Bool
+    allequal(itr)::Bool
+    allequal(f, itr)::Bool
 
 Return `true` if all values from `itr` are equal when compared with [`isequal`](@ref).
+Or if all of `[f(x) for x in itr]` are equal, for the second method.
+
+Note that `allequal(f, itr)` may call `f` fewer than `length(itr)` times.
+The precise number of calls is regarded as an implementation detail.
 
 See also: [`unique`](@ref), [`allunique`](@ref).
 
 !!! compat "Julia 1.8"
     The `allequal` function requires at least Julia 1.8.
 
+!!! compat "Julia 1.11"
+    The method `allequal(f, itr)` requires at least Julia 1.11.
+
 # Examples
 ```jldoctest
 julia> allequal([])
@@ -528,14 +635,36 @@ false
 
 julia> allequal(Dict(:a => 1, :b => 1))
 false
+
+julia> allequal(abs2, [1, -1])
+true
 ```
 """
-allequal(itr) = isempty(itr) ? true : all(isequal(first(itr)), itr)
+function allequal(itr)
+    if haslength(itr)
+        length(itr) <= 1 && return true
+    end
+    pl = Iterators.peel(itr)
+    isnothing(pl) && return true
+    a, rest = pl
+    return all(isequal(a), rest)
+end
 
 allequal(c::Union{AbstractSet,AbstractDict}) = length(c) <= 1
 
 allequal(r::AbstractRange) = iszero(step(r)) || length(r) <= 1
 
+allequal(f, xs) = allequal(Generator(f, xs))
+
+function allequal(f, xs::Tuple)
+    length(xs) <= 1 && return true
+    f1 = f(xs[1])
+    for x in tail(xs)
+        isequal(f1, f(x)) || return false
+    end
+    return true
+end
+
 filter!(f, s::Set) = unsafe_filter!(f, s)
 
 const hashs_seed = UInt === UInt64 ? 0x852ada37cfe8e0ce : 0xcfe8e0ce
@@ -588,7 +717,7 @@ If `count` is specified, then replace at most `count` occurrences in total.
 See also [`replace`](@ref replace(A, old_new::Pair...)).
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\d\$"m
 julia> replace!([1, 2, 1, 3], 1=>0, 2=>4, count=2)
 4-element Vector{Int64}:
  0
@@ -624,7 +753,7 @@ If `count` is specified, then replace at most `count` values in total
 (replacements being defined as `new(x) !== x`).
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\d+(\\s+=>\\s+\\d)?\$"m
 julia> replace!(x -> isodd(x) ? 2x : x, [1, 2, 3, 4])
 4-element Vector{Int64}:
  2
@@ -720,7 +849,7 @@ If `count` is specified, then replace at most `count` values in total
     Version 1.7 is required to replace elements of a `Tuple`.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"^\\s+\\S+\\s+=>\\s+\\d\$"m
 julia> replace(x -> isodd(x) ? 2x : x, [1, 2, 3, 4])
 4-element Vector{Int64}:
  2
diff --git a/base/shell.jl b/base/shell.jl
index 5bfd11fb46d29..68925cbd5d5af 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -4,7 +4,19 @@
 
 const shell_special = "#{}()[]<>|&*?~;"
 
-# strips the end but respects the space when the string ends with "\\ "
+(@doc raw"""
+    rstrip_shell(s::AbstractString)
+
+Strip trailing whitespace from a shell command string, while respecting a trailing backslash followed by a space ("\\ ").
+
+```jldoctest
+julia> Base.rstrip_shell("echo 'Hello World' \\ ")
+"echo 'Hello World' \\ "
+
+julia> Base.rstrip_shell("echo 'Hello World'    ")
+"echo 'Hello World'"
+```
+"""
 function rstrip_shell(s::AbstractString)
     c_old = nothing
     for (i, c) in Iterators.reverse(pairs(s))
@@ -14,16 +26,15 @@ function rstrip_shell(s::AbstractString)
         c_old = c
     end
     SubString(s, 1, 0)
-end
+end)
 
 function shell_parse(str::AbstractString, interpolate::Bool=true;
                      special::AbstractString="", filename="none")
-    s = SubString(str, firstindex(str))
+    last_arg = firstindex(str) # N.B.: This is used by REPLCompletions
+    s = SubString(str, last_arg)
     s = rstrip_shell(lstrip(s))
 
-    # N.B.: This is used by REPLCompletions
-    last_parse = 0:-1
-    isempty(s) && return interpolate ? (Expr(:tuple,:()),last_parse) : ([],last_parse)
+    isempty(s) && return interpolate ? (Expr(:tuple,:()), last_arg) : ([], last_arg)
 
     in_single_quotes = false
     in_double_quotes = false
@@ -32,6 +43,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
     arg = []
     i = firstindex(s)
     st = Iterators.Stateful(pairs(s))
+    update_last_arg = false # true after spaces or interpolate
 
     function push_nonempty!(list, x)
         if !isa(x,AbstractString) || !isempty(x)
@@ -54,6 +66,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
     for (j, c) in st
         j, c = j::Int, c::C
         if !in_single_quotes && !in_double_quotes && isspace(c)
+            update_last_arg = true
             i = consume_upto!(arg, s, i, j)
             append_2to1!(args, arg)
             while !isempty(st)
@@ -77,12 +90,17 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
                 # use parseatom instead of parse to respect filename (#28188)
                 ex, j = Meta.parseatom(s, stpos, filename=filename)
             end
-            last_parse = (stpos:prevind(s, j)) .+ s.offset
-            push_nonempty!(arg, ex)
+            last_arg = stpos + s.offset
+            update_last_arg = true
+            push!(arg, ex)
             s = SubString(s, j)
             Iterators.reset!(st, pairs(s))
             i = firstindex(s)
         else
+            if update_last_arg
+                last_arg = i + s.offset
+                update_last_arg = false
+            end
             if !in_double_quotes && c == '\''
                 in_single_quotes = !in_single_quotes
                 i = consume_upto!(arg, s, i, j)
@@ -124,16 +142,31 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
     push_nonempty!(arg, s[i:end])
     append_2to1!(args, arg)
 
-    interpolate || return args, last_parse
+    interpolate || return args, last_arg
 
     # construct an expression
     ex = Expr(:tuple)
     for arg in args
         push!(ex.args, Expr(:tuple, arg...))
     end
-    return ex, last_parse
+    return ex, last_arg
 end
 
+"""
+    shell_split(command::AbstractString)
+
+Split a shell command string into its individual components.
+
+# Examples
+```jldoctest
+julia> Base.shell_split("git commit -m 'Initial commit'")
+4-element Vector{String}:
+ "git"
+ "commit"
+ "-m"
+ "Initial commit"
+```
+"""
 function shell_split(s::AbstractString)
     parsed = shell_parse(s, false)[1]
     args = String[]
@@ -186,9 +219,9 @@ print_shell_escaped(io::IO; special::String="") = nothing
 """
     shell_escape(args::Union{Cmd,AbstractString...}; special::AbstractString="")
 
-The unexported `shell_escape` function is the inverse of the unexported `shell_split` function:
+The unexported `shell_escape` function is the inverse of the unexported [`Base.shell_split()`](@ref) function:
 it takes a string or command object and escapes any special characters in such a way that calling
-`shell_split` on it would give back the array of words in the original command. The `special`
+[`Base.shell_split()`](@ref) on it would give back the array of words in the original command. The `special`
 keyword argument controls what characters in addition to whitespace, backslashes, quotes and
 dollar signs are considered to be special (default: none).
 
@@ -216,7 +249,7 @@ function print_shell_escaped_posixly(io::IO, args::AbstractString...)
         function isword(c::AbstractChar)
             if '0' <= c <= '9' || 'a' <= c <= 'z' || 'A' <= c <= 'Z'
                 # word characters
-            elseif c == '_' || c == '/' || c == '+' || c == '-'
+            elseif c == '_' || c == '/' || c == '+' || c == '-' || c == '.'
                 # other common characters
             elseif c == '\''
                 have_single = true
@@ -251,6 +284,8 @@ The unexported `shell_escape_posixly` function
 takes a string or command object and escapes any special characters in such a way that
 it is safe to pass it as an argument to a posix shell.
 
+See also: [`Base.shell_escape()`](@ref)
+
 # Examples
 ```jldoctest
 julia> Base.shell_escape_posixly("cat", "/foo/bar baz", "&&", "echo", "done")
@@ -283,7 +318,7 @@ a backslash.
 This function should also work for a POSIX shell, except if the input
 string contains a linefeed (`"\\n"`) character.
 
-See also: [`shell_escape_posixly`](@ref)
+See also: [`Base.shell_escape_posixly()`](@ref)
 """
 function shell_escape_csh(io::IO, args::AbstractString...)
     first = true
@@ -309,7 +344,7 @@ function shell_escape_csh(io::IO, args::AbstractString...)
 end
 shell_escape_csh(args::AbstractString...) =
     sprint(shell_escape_csh, args...;
-           sizehint = sum(sizeof.(args)) + length(args) * 3)
+           sizehint = sum(sizeof, args) + length(args) * 3)
 
 """
     shell_escape_wincmd(s::AbstractString)
@@ -381,9 +416,9 @@ run(setenv(`cmd /C echo %cmdargs%`, "cmdargs" => cmdargs))
 With an I/O stream parameter `io`, the result will be written there,
 rather than returned as a string.
 
-See also [`escape_microsoft_c_args`](@ref), [`shell_escape_posixly`](@ref).
+See also [`Base.escape_microsoft_c_args()`](@ref), [`Base.shell_escape_posixly()`](@ref).
 
-# Example
+# Examples
 ```jldoctest
 julia> Base.shell_escape_wincmd("a^\\"^o\\"^u\\"")
 "a^^\\"^o\\"^^u^\\""
@@ -435,7 +470,7 @@ It joins command-line arguments to be passed to a Windows
 C/C++/Julia application into a command line, escaping or quoting the
 meta characters space, TAB, double quote and backslash where needed.
 
-See also [`shell_escape_wincmd`](@ref), [`escape_raw_string`](@ref).
+See also [`Base.shell_escape_wincmd()`](@ref), [`Base.escape_raw_string()`](@ref).
 """
 function escape_microsoft_c_args(io::IO, args::AbstractString...)
     # http://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
@@ -459,4 +494,4 @@ function escape_microsoft_c_args(io::IO, args::AbstractString...)
 end
 escape_microsoft_c_args(args::AbstractString...) =
     sprint(escape_microsoft_c_args, args...;
-           sizehint = (sum(sizeof.(args)) + 3*length(args)))
+           sizehint = (sum(sizeof, args) + 3*length(args)))
diff --git a/base/show.jl b/base/show.jl
index 7d59a416674e8..cb71376fedc68 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -1,6 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Core.Compiler: has_typevar
+using .Compiler: has_typevar
+using .Meta: isidentifier, isoperator, isunaryoperator, isbinaryoperator, ispostfixoperator,
+            is_id_start_char, is_id_char, _isoperator, is_syntactic_operator, is_valid_identifier,
+            is_unary_and_binary_operator
 
 function show(io::IO, ::MIME"text/plain", u::UndefInitializer)
     show(io, u)
@@ -12,6 +15,13 @@ end
 
 show(io::IO, ::MIME"text/plain", r::AbstractRange) = show(io, r) # always use the compact form for printing ranges
 
+function show(io::IO, ::MIME"text/plain", r::UnitRange)
+    show(io, r)
+    if !(get(io, :compact, false)::Bool) && isempty(r)
+        print(io, " (empty range)")
+    end
+end
+
 function show(io::IO, ::MIME"text/plain", r::LinRange)
     isempty(r) && return show(io, r)
     # show for LinRange, e.g.
@@ -23,18 +33,25 @@ function show(io::IO, ::MIME"text/plain", r::LinRange)
     print_range(io, r)
 end
 
+function show(io::IO, ::MIME"text/plain", r::LogRange)  # display LogRange like LinRange
+    isempty(r) && return show(io, r)
+    summary(io, r)
+    println(io, ":")
+    print_range(io, r, " ", ", ", "", " \u2026 ")
+end
+
 function _isself(ft::DataType)
     ftname = ft.name
-    isdefined(ftname, :mt) || return false
-    name = ftname.mt.name
-    mod = parentmodule(ft)  # NOTE: not necessarily the same as ft.name.mt.module
-    return isdefined(mod, name) && ft == typeof(getfield(mod, name))
+    name = ftname.singletonname
+    ftname.name === name && return false
+    mod = parentmodule(ft)
+    return isdefinedglobal(mod, name) && ft === typeof(getglobal(mod, name))
 end
 
 function show(io::IO, ::MIME"text/plain", f::Function)
     get(io, :compact, false)::Bool && return show(io, f)
     ft = typeof(f)
-    name = ft.name.mt.name
+    name = ft.name.singletonname
     if isa(f, Core.IntrinsicFunction)
         print(io, f)
         id = Core.Intrinsics.bitcast(Int32, f)
@@ -65,13 +82,13 @@ ncodeunits(c::ANSIDelimiter) = ncodeunits(c.del)
 textwidth(::ANSIDelimiter) = 0
 
 # An iterator similar to `pairs(::String)` but whose values are Char or ANSIDelimiter
-struct ANSIIterator
-    captures::RegexMatchIterator
+struct ANSIIterator{S}
+    captures::RegexMatchIterator{S}
 end
 ANSIIterator(s::AbstractString) = ANSIIterator(eachmatch(ansi_regex, s))
 
-IteratorSize(::Type{ANSIIterator}) = SizeUnknown()
-eltype(::Type{ANSIIterator}) = Pair{Int, Union{Char,ANSIDelimiter}}
+IteratorSize(::Type{<:ANSIIterator}) = SizeUnknown()
+eltype(::Type{<:ANSIIterator}) = Pair{Int, Union{Char,ANSIDelimiter}}
 function iterate(I::ANSIIterator, (i, m_st)=(1, iterate(I.captures)))
     m_st === nothing && return nothing
     m, (j, new_m_st) = m_st
@@ -80,7 +97,7 @@ function iterate(I::ANSIIterator, (i, m_st)=(1, iterate(I.captures)))
 end
 textwidth(I::ANSIIterator) = mapreduce(textwidth∘last, +, I; init=0)
 
-function _truncate_at_width_or_chars(ignore_ANSI::Bool, str, width, rpad=false, chars="\r\n", truncmark="…")
+function _truncate_at_width_or_chars(ignore_ANSI::Bool, str::AbstractString, width::Int, rpad::Bool=false, chars="\r\n", truncmark="…")
     truncwidth = textwidth(truncmark)
     (width <= 0 || width < truncwidth) && return ""
     wid = truncidx = lastidx = 0
@@ -145,7 +162,7 @@ function show(io::IO, ::MIME"text/plain", iter::Union{KeySet,ValueIterator})
 end
 
 function show(io::IO, ::MIME"text/plain", t::AbstractDict{K,V}) where {K,V}
-    isempty(t) && return show(io, t)
+    (isempty(t) || !haslength(t)) && return show(io, t)
     # show more descriptively, with one line per key/value pair
     recur_io = IOContext(io, :SHOWN_SET => t)
     limit = get(io, :limit, false)::Bool
@@ -294,35 +311,51 @@ struct IOContext{IO_t <: IO} <: AbstractPipe
     dict::ImmutableDict{Symbol, Any}
 
     function IOContext{IO_t}(io::IO_t, dict::ImmutableDict{Symbol, Any}) where IO_t<:IO
-        @assert !(IO_t <: IOContext) "Cannot create `IOContext` from another `IOContext`."
+        io isa IOContext && (io = io.io) # implicitly unwrap, since the io.dict field is not useful anymore, and could confuse pipe_reader consumers
         return new(io, dict)
     end
 end
 
-# (Note that TTY and TTYTerminal io types have a :color property.)
-unwrapcontext(io::IO) = io, get(io,:color,false) ? ImmutableDict{Symbol,Any}(:color, true) : ImmutableDict{Symbol,Any}()
-unwrapcontext(io::IOContext) = io.io, io.dict
+# (Note that TTY and TTYTerminal io types have an implied :color property.)
+ioproperties(io::IO) = get(io, :color, false) ? ImmutableDict{Symbol,Any}(:color, true) : ImmutableDict{Symbol,Any}()
+ioproperties(io::IOContext) = io.dict
+# these can probably be deprecated, but there is a use in the ecosystem for them
+unwrapcontext(io::IO) = (io,)
+unwrapcontext(io::IOContext) = (io.io,)
 
-function IOContext(io::IO, dict::ImmutableDict)
-    io0 = unwrapcontext(io)[1]
-    IOContext{typeof(io0)}(io0, dict)
+function IOContext(io::IO, dict::ImmutableDict{Symbol, Any})
+    return IOContext{typeof(io)}(io, dict)
 end
 
-convert(::Type{IOContext}, io::IO) = IOContext(unwrapcontext(io)...)::IOContext
+function IOContext(io::IOContext, dict::ImmutableDict{Symbol, Any})
+    return typeof(io)(io.io, dict)
+end
+
+
+convert(::Type{IOContext}, io::IOContext) = io
+convert(::Type{IOContext}, io::IO) = IOContext(io, ioproperties(io))::IOContext
+convert(::Type{IOContext{IO_t}}, io::IOContext{IO_t}) where {IO_t} = io
+convert(::Type{IOContext{IO_t}}, io::IO) where {IO_t} = IOContext{IO_t}(io, ioproperties(io))::IOContext{IO_t}
 
 IOContext(io::IO) = convert(IOContext, io)
+IOContext{IO_t}(io::IO) where {IO_t} = convert(IOContext{IO_t}, io)
 
 function IOContext(io::IO, KV::Pair)
-    io0, d = unwrapcontext(io)
-    IOContext(io0, ImmutableDict{Symbol,Any}(d, KV[1], KV[2]))
+    d = ioproperties(io)
+    return IOContext(io, ImmutableDict{Symbol,Any}(d, KV[1], KV[2]))
 end
 
 """
     IOContext(io::IO, context::IOContext)
 
 Create an `IOContext` that wraps an alternate `IO` but inherits the properties of `context`.
+
+!!! note
+    Unless explicitly set in the wrapped `io` the `displaysize` of `io` will not be inherited.
+    This is because by default `displaysize` is not a property of IO objects themselves, but lazily inferred,
+    as the size of the terminal window can change during the lifetime of the IO object.
 """
-IOContext(io::IO, context::IO) = IOContext(unwrapcontext(io)[1], unwrapcontext(context)[2])
+IOContext(io::IO, context::IO) = IOContext(io, ioproperties(context))
 
 """
     IOContext(io::IO, KV::Pair...)
@@ -360,12 +393,12 @@ julia> io = IOBuffer();
 
 julia> printstyled(IOContext(io, :color => true), "string", color=:red)
 
-julia> String(take!(io))
+julia> takestring!(io)
 "\\e[31mstring\\e[39m"
 
 julia> printstyled(io, "string", color=:red)
 
-julia> String(take!(io))
+julia> takestring!(io)
 "string"
 ```
 
@@ -412,7 +445,7 @@ get(io::IO, key, default) = default
 keys(io::IOContext) = keys(io.dict)
 keys(io::IO) = keys(ImmutableDict{Symbol,Any}())
 
-displaysize(io::IOContext) = haskey(io, :displaysize) ? io[:displaysize]::Tuple{Int,Int} : displaysize(io.io)
+displaysize(io::IOContext) = haskey(io, :displaysize) ? io[:displaysize]::Tuple{Int,Int} : displaysize(io.io)::Tuple{Int,Int}
 
 show_circular(io::IO, @nospecialize(x)) = false
 function show_circular(io::IOContext, @nospecialize(x))
@@ -420,7 +453,7 @@ function show_circular(io::IOContext, @nospecialize(x))
     for (k, v) in io.dict
         if k === :SHOWN_SET
             if v === x
-                print(io, "#= circular reference @-$d =#")
+                printstyled(io, "#= circular reference @-$d =#"; color = :yellow)
                 return true
             end
             d += 1
@@ -499,43 +532,33 @@ function _show_default(io::IO, @nospecialize(x))
 end
 
 function active_module()
-    isassigned(REPL_MODULE_REF) || return Main
-    REPL = REPL_MODULE_REF[]
-    return invokelatest(REPL.active_module)::Module
+    if ccall(:jl_is_in_pure_context, Bool, ())
+        error("active_module() should not be called from a pure context")
+    end
+    if !@isdefined(active_repl) || active_repl === nothing
+        return Main
+    end
+    return invokelatest(active_module, active_repl)::Module
 end
 
-# Check if a particular symbol is exported from a standard library module
-function is_exported_from_stdlib(name::Symbol, mod::Module)
-    !isdefined(mod, name) && return false
-    orig = getfield(mod, name)
-    while !(mod === Base || mod === Core)
-        activemod = active_module()
-        parent = parentmodule(mod)
-        if mod === activemod || mod === parent || parent === activemod
-            return false
-        end
-        mod = parent
-    end
-    return isexported(mod, name) && isdefined(mod, name) && !isdeprecated(mod, name) && getfield(mod, name) === orig
+module UsesBaseOnly
 end
 
 function show_function(io::IO, f::Function, compact::Bool, fallback::Function)
-    ft = typeof(f)
-    mt = ft.name.mt
-    if mt === Symbol.name.mt
-        # uses shared method table
+    fname = typeof(f).name
+    if fname.name === fname.singletonname
         fallback(io, f)
     elseif compact
-        print(io, mt.name)
-    elseif isdefined(mt, :module) && isdefined(mt.module, mt.name) &&
-        getfield(mt.module, mt.name) === f
-        mod = active_module()
-        if is_exported_from_stdlib(mt.name, mt.module) || mt.module === mod
-            show_sym(io, mt.name)
-        else
-            print(io, mt.module, ".")
-            show_sym(io, mt.name)
-        end
+        print(io, fname.singletonname)
+    elseif isdefined(fname, :module) && isdefinedglobal(fname.module, fname.singletonname) && isconst(fname.module, fname.singletonname) &&
+            getglobal(fname.module, fname.singletonname) === f
+        # this used to call the removed internal function `is_exported_from_stdlib`, which effectively
+        # just checked for exports from Base.
+        mod = get(io, :module, UsesBaseOnly)
+        if !(isvisible(fname.singletonname, fname.module, mod) || fname.module === mod)
+            print(io, fname.module, ".")
+        end
+        show_sym(io, fname.singletonname)
     else
         fallback(io, f)
     end
@@ -603,7 +626,7 @@ function make_typealias(@nospecialize(x::Type))
     Any === x && return nothing
     x <: Tuple && return nothing
     mods = modulesof!(Set{Module}(), x)
-    Core in mods && push!(mods, Base)
+    replace!(mods, Core=>Base)
     aliases = Tuple{GlobalRef,SimpleVector}[]
     xenv = UnionAll[]
     for p in uniontypes(unwrap_unionall(x))
@@ -612,8 +635,8 @@ function make_typealias(@nospecialize(x::Type))
     x isa UnionAll && push!(xenv, x)
     for mod in mods
         for name in unsorted_names(mod)
-            if isdefined(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
-                alias = getfield(mod, name)
+            if isdefinedglobal(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
+                alias = getglobal(mod, name)
                 if alias isa Type && !has_free_typevars(alias) && !print_without_params(alias) && x <: alias
                     if alias isa UnionAll
                         (ti, env) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), x, alias)::SimpleVector
@@ -666,7 +689,7 @@ function show_can_elide(p::TypeVar, wheres::Vector, elide::Int, env::SimpleVecto
         has_typevar(v.lb, p) && return false
         has_typevar(v.ub, p) && return false
     end
-    for i = 1:length(env)
+    for i = eachindex(env)
         i == skip && continue
         has_typevar(env[i], p) && return false
     end
@@ -722,9 +745,9 @@ end
 function show_typealias(io::IO, name::GlobalRef, x::Type, env::SimpleVector, wheres::Vector)
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless alias is visible from module passed to
-        # IOContext. If :module is not set, default to Main (or current active module).
+        # IOContext. If :module is not set, default to Main.
         # nothing can be used to force printing prefix.
-        from = get(io, :module, active_module())
+        from = get(io, :module, Main)
         if (from === nothing || !isvisible(name.name, name.mod, from))
             show(io, name.mod)
             print(io, ".")
@@ -805,7 +828,7 @@ function make_typealiases(@nospecialize(x::Type))
     Any === x && return aliases, Union{}
     x <: Tuple && return aliases, Union{}
     mods = modulesof!(Set{Module}(), x)
-    Core in mods && push!(mods, Base)
+    replace!(mods, Core=>Base)
     vars = Dict{Symbol,TypeVar}()
     xenv = UnionAll[]
     each = Any[]
@@ -816,14 +839,14 @@ function make_typealiases(@nospecialize(x::Type))
     x isa UnionAll && push!(xenv, x)
     for mod in mods
         for name in unsorted_names(mod)
-            if isdefined(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
-                alias = getfield(mod, name)
+            if isdefinedglobal(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
+                alias = getglobal(mod, name)
                 if alias isa Type && !has_free_typevars(alias) && !print_without_params(alias) && !(alias <: Tuple)
                     (ti, env) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), x, alias)::SimpleVector
                     ti === Union{} && continue
                     # make sure this alias wasn't from an unrelated part of the Union
                     mod2 = modulesof!(Set{Module}(), alias)
-                    mod in mod2 || (mod === Base && Core in mods) || continue
+                    mod in mod2 || (mod === Base && Core in mod2) || continue
                     env = env::SimpleVector
                     applied = alias
                     if !isempty(env)
@@ -943,7 +966,7 @@ function show(io::IO, ::MIME"text/plain", @nospecialize(x::Type))
     # give a helpful hint for function types
     if x isa DataType && x !== UnionAll && !(get(io, :compact, false)::Bool)
         tn = x.name::Core.TypeName
-        globname = isdefined(tn, :mt) ? tn.mt.name : nothing
+        globname = tn.singletonname
         if is_global_function(tn, globname)
             print(io, " (singleton type of function ")
             show_sym(io, globname)
@@ -1006,42 +1029,74 @@ end
 # If an object with this name exists in 'from', we need to check that it's the same binding
 # and that it's not deprecated.
 function isvisible(sym::Symbol, parent::Module, from::Module)
-    owner = ccall(:jl_binding_owner, Ptr{Cvoid}, (Any, Any), parent, sym)
-    from_owner = ccall(:jl_binding_owner, Ptr{Cvoid}, (Any, Any), from, sym)
-    return owner !== C_NULL && from_owner === owner &&
-        !isdeprecated(parent, sym) &&
-        isdefined(from, sym) # if we're going to return true, force binding resolution
+    isdeprecated(parent, sym) && return false
+    isdefinedglobal(from, sym) || return false
+    isdefinedglobal(parent, sym) || return false
+    parent_binding = convert(Core.Binding, GlobalRef(parent, sym))
+    from_binding = convert(Core.Binding, GlobalRef(from, sym))
+    while true
+        from_binding === parent_binding && return true
+        partition = lookup_binding_partition(tls_world_age(), from_binding)
+        is_some_explicit_imported(binding_kind(partition)) || break
+        from_binding = partition_restriction(partition)::Core.Binding
+    end
+    parent_partition = lookup_binding_partition(tls_world_age(), parent_binding)
+    from_partition = lookup_binding_partition(tls_world_age(), from_binding)
+    if is_defined_const_binding(binding_kind(parent_partition)) && is_defined_const_binding(binding_kind(from_partition))
+        return parent_partition.restriction === from_partition.restriction
+    end
+    return false
 end
 
 function is_global_function(tn::Core.TypeName, globname::Union{Symbol,Nothing})
-    if globname !== nothing
+    if globname !== nothing && isconcretetype(tn.wrapper) && tn !== DataType.name # ignore that typeof(DataType)===DataType, since it is valid but not useful
         globname_str = string(globname::Symbol)
-        if ('#' ∉ globname_str && '@' ∉ globname_str && isdefined(tn, :module) &&
-                isbindingresolved(tn.module, globname) && isdefined(tn.module, globname) &&
-                isconcretetype(tn.wrapper) && isa(getfield(tn.module, globname), tn.wrapper))
+        if '#' ∉ globname_str && '@' ∉ globname_str && isdefined(tn, :module) &&
+                isdefinedglobal(tn.module, globname) && isconst(tn.module, globname) &&
+                isa(getglobal(tn.module, globname), tn.wrapper)
             return true
         end
     end
     return false
 end
 
+function check_world_bounded(tn::Core.TypeName)
+    bnd = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), tn.module, tn.name, true)
+    isdefined(bnd, :partitions) || return nothing
+    partition = @atomic bnd.partitions
+    while true
+        if is_defined_const_binding(binding_kind(partition))
+            cval = partition_restriction(partition)
+            if isa(cval, Type) && cval <: tn.wrapper
+                max_world = @atomic partition.max_world
+                max_world == typemax(UInt) && return nothing
+                return Int(partition.min_world):Int(max_world)
+            end
+        end
+        isdefined(partition, :next) || return nothing
+        partition = @atomic partition.next
+    end
+end
+
 function show_type_name(io::IO, tn::Core.TypeName)
     if tn === UnionAll.name
         # by coincidence, `typeof(Type)` is a valid representation of the UnionAll type.
         # intercept this case and print `UnionAll` instead.
         return print(io, "UnionAll")
     end
-    globname = isdefined(tn, :mt) ? tn.mt.name : nothing
+    globname = tn.singletonname
     globfunc = is_global_function(tn, globname)
     sym = (globfunc ? globname : tn.name)::Symbol
     globfunc && print(io, "typeof(")
     quo = false
+    world = check_world_bounded(tn)
+    world !== nothing && print(io, "@world(")
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless type is visible from module passed to
-        # IOContext If :module is not set, default to Main (or current active module).
+        # IOContext If :module is not set, default to Main.
         # nothing can be used to force printing prefix
-        from = get(io, :module, active_module())
-        if isdefined(tn, :module) && (from === nothing || !isvisible(sym, tn.module, from))
+        from = get(io, :module, Main)
+        if isdefined(tn, :module) && (from === nothing || !isvisible(sym, tn.module, from::Module))
             show(io, tn.module)
             print(io, ".")
             if globfunc && !is_id_start_char(first(string(sym)))
@@ -1054,12 +1109,15 @@ function show_type_name(io::IO, tn::Core.TypeName)
         end
     end
     show_sym(io, sym)
+    world !== nothing && print(io, ", ", world, ")")
     quo      && print(io, ")")
     globfunc && print(io, ")")
     nothing
 end
 
 function maybe_kws_nt(x::DataType)
+    # manually-written version of
+    # x <: (Pairs{Symbol, eltype(NT), Nothing, NT} where NT <: NamedTuple)
     x.name === typename(Pairs) || return nothing
     length(x.parameters) == 4 || return nothing
     x.parameters[1] === Symbol || return nothing
@@ -1069,7 +1127,7 @@ function maybe_kws_nt(x::DataType)
         types isa DataType || return nothing
         x.parameters[2] === eltype(p4) || return nothing
         isa(syms, Tuple) || return nothing
-        x.parameters[3] === typeof(syms) || return nothing
+        x.parameters[3] === Nothing || return nothing
         return p4
     end
     return nothing
@@ -1084,29 +1142,68 @@ function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[])
 
     # Print tuple types with homogeneous tails longer than max_n compactly using `NTuple` or `Vararg`
     if istuple
+        if n == 0
+            print(io, "Tuple{}")
+            return
+        end
+
+        # find the length of the homogeneous tail
         max_n = 3
         taillen = 1
-        for i in (n-1):-1:1
-            if parameters[i] === parameters[n]
-                taillen += 1
+        pn = parameters[n]
+        fulln = n
+        vakind = :none
+        vaN = 0
+        if pn isa Core.TypeofVararg
+            if isdefined(pn, :N)
+                vaN = pn.N
+                if vaN isa Int
+                    taillen = vaN
+                    fulln += taillen - 1
+                    vakind = :fixed
+                else
+                    vakind = :bound
+                end
             else
-                break
+                vakind = :unbound
+            end
+            pn = unwrapva(pn)
+        end
+        if !(pn isa TypeVar || pn isa Type)
+            # prefer Tuple over NTuple if it contains something other than types
+            # (e.g. if the user has switched the N and T accidentally)
+            taillen = 0
+        elseif vakind === :none || vakind === :fixed
+            for i in (n-1):-1:1
+                if parameters[i] === pn
+                    taillen += 1
+                else
+                    break
+                end
             end
         end
-        if n == taillen > max_n
-            print(io, "NTuple{", n, ", ")
-            show(io, parameters[1])
+
+        # prefer NTuple over Tuple if it is a Vararg without a fixed length
+        # and prefer Tuple for short lists of elements
+        if (vakind == :bound && n == 1 == taillen) || (vakind === :fixed && taillen == fulln > max_n) ||
+           (vakind === :none && taillen == fulln > max_n)
+            print(io, "NTuple{")
+            vakind === :bound ? show(io, vaN) : print(io, fulln)
+            print(io, ", ")
+            show(io, pn)
             print(io, "}")
         else
             print(io, "Tuple{")
-            for i = 1:(taillen > max_n ? n-taillen : n)
+            headlen = (taillen > max_n ? fulln - taillen : fulln)
+            for i = 1:headlen
                 i > 1 && print(io, ", ")
-                show(io, parameters[i])
+                show(io, vakind === :fixed && i >= n ? pn : parameters[i])
             end
-            if taillen > max_n
-                print(io, ", Vararg{")
-                show(io, parameters[n])
-                print(io, ", ", taillen, "}")
+            if headlen < fulln
+                headlen > 0 && print(io, ", ")
+                print(io, "Vararg{")
+                show(io, pn)
+                print(io, ", ", fulln - headlen, "}")
             end
             print(io, "}")
         end
@@ -1134,11 +1231,11 @@ end
 
 function show_at_namedtuple(io::IO, syms::Tuple, types::DataType)
     first = true
-    for i in 1:length(syms)
+    for i in eachindex(syms)
         if !first
             print(io, ", ")
         end
-        print(io, syms[i])
+        show_sym(io, syms[i])
         typ = types.parameters[i]
         if typ !== Any
             print(io, "::")
@@ -1192,14 +1289,13 @@ function show(io::IO, tn::Core.TypeName)
     print(io, ")")
 end
 
+nonnothing_nonmissing_typeinfo(io::IO) = nonmissingtype(nonnothingtype(get(io, :typeinfo, Any)))
+show(io::IO, b::Bool) = print(io, nonnothing_nonmissing_typeinfo(io) === Bool ? (b ? "1" : "0") : (b ? "true" : "false"))
 show(io::IO, ::Nothing) = print(io, "nothing")
-show(io::IO, b::Bool) = print(io, get(io, :typeinfo, Any) === Bool ? (b ? "1" : "0") : (b ? "true" : "false"))
 show(io::IO, n::Signed) = (write(io, string(n)); nothing)
 show(io::IO, n::Unsigned) = print(io, "0x", string(n, pad = sizeof(n)<<1, base = 16))
 print(io::IO, n::Unsigned) = print(io, string(n))
 
-show(io::IO, p::Ptr) = print(io, typeof(p), " @0x$(string(UInt(p), base = 16, pad = Sys.WORD_SIZE>>2))")
-
 has_tight_type(p::Pair) =
     typeof(p.first)  == typeof(p).parameters[1] &&
     typeof(p.second) == typeof(p).parameters[2]
@@ -1279,17 +1375,27 @@ function sourceinfo_slotnames(slotnames::Vector{Symbol})
     return printnames
 end
 
-show(io::IO, l::Core.MethodInstance) = show_mi(io, l)
+show(io::IO, mi::Core.MethodInstance) = show_mi(io, mi)
+function show(io::IO, codeinst::Core.CodeInstance)
+    print(io, "CodeInstance for ")
+    def = codeinst.def
+    if isa(def, Core.ABIOverride)
+        show_mi(io, def.def)
+        print(io, " (ABI Overridden)")
+    else
+        show_mi(io, def::MethodInstance)
+    end
+end
 
-function show_mi(io::IO, l::Core.MethodInstance, from_stackframe::Bool=false)
-    def = l.def
+function show_mi(io::IO, mi::Core.MethodInstance, from_stackframe::Bool=false)
+    def = mi.def
     if isa(def, Method)
-        if isdefined(def, :generator) && l === def.generator
+        if isdefined(def, :generator) && mi === def.generator
             print(io, "MethodInstance generator for ")
             show(io, def)
         else
             print(io, "MethodInstance for ")
-            show_tuple_as_call(io, def.name, l.specTypes; qualified=true)
+            show_tuple_as_call(io, def.name, mi.specTypes; qualified=true)
         end
     else
         print(io, "Toplevel MethodInstance thunk")
@@ -1297,41 +1403,19 @@ function show_mi(io::IO, l::Core.MethodInstance, from_stackframe::Bool=false)
         # MethodInstance is part of a stacktrace, it gets location info
         # added by other means.  But if it isn't, then we should try
         # to print a little more identifying information.
-        if !from_stackframe
-            linetable = l.uninferred.linetable
-            line = isempty(linetable) ? "unknown" : (lt = linetable[1]::Union{LineNumberNode,Core.LineInfoNode}; string(lt.file, ':', lt.line))
-            print(io, " from ", def, " starting at ", line)
-        end
-    end
-end
-
-# These sometimes show up as Const-values in InferenceFrameInfo signatures
-show(io::IO, r::Core.Compiler.UnitRange) = show(io, r.start : r.stop)
-show(io::IO, mime::MIME{Symbol("text/plain")}, r::Core.Compiler.UnitRange) = show(io, mime, r.start : r.stop)
-
-function show(io::IO, mi_info::Core.Compiler.Timings.InferenceFrameInfo)
-    mi = mi_info.mi
-    def = mi.def
-    if isa(def, Method)
-        if isdefined(def, :generator) && mi === def.generator
-            print(io, "InferenceFrameInfo generator for ")
-            show(io, def)
-        else
-            print(io, "InferenceFrameInfo for ")
-            argnames = [isa(a, Core.Const) ? (isa(a.val, Type) ? "" : a.val) : "" for a in mi_info.slottypes[1:mi_info.nargs]]
-            show_tuple_as_call(io, def.name, mi.specTypes; argnames, qualified=true)
+        if !from_stackframe && isdefined(mi, :cache)
+            ci = mi.cache
+            if ci.owner === :uninferred
+                di = ci.inferred.debuginfo
+                file, line = IRShow.debuginfo_firstline(di)
+                file = string(file)
+                line = isempty(file) || line < 0 ? "<unknown>" : "$file:$line"
+                print(io, " from ", def, " starting at ", line)
+            end
         end
-    else
-        linetable = mi.uninferred.linetable
-        line = isempty(linetable) ? "" : (lt = linetable[1]; string(lt.file, ':', lt.line))
-        print(io, "Toplevel InferenceFrameInfo thunk from ", def, " starting at ", line)
     end
 end
 
-function show(io::IO, tinf::Core.Compiler.Timings.Timing)
-    print(io, "Core.Compiler.Timings.Timing(", tinf.mi_info, ") with ", length(tinf.children), " children")
-end
-
 function show_delim_array(io::IO, itr::Union{AbstractArray,SimpleVector}, op, delim, cl,
                           delim_one, i1=first(LinearIndices(itr)), l=last(LinearIndices(itr)))
     print(io, op)
@@ -1347,11 +1431,11 @@ function show_delim_array(io::IO, itr::Union{AbstractArray,SimpleVector}, op, de
                     x = itr[i]
                     show(recur_io, x)
                 end
-                i += 1
-                if i > l
+                if i == l
                     delim_one && first && print(io, delim)
                     break
                 end
+                i += 1
                 first = false
                 print(io, delim)
                 print(io, ' ')
@@ -1424,9 +1508,7 @@ show(io::IO, s::Symbol) = show_unquoted_quote_expr(io, s, 0, 0, 0)
 #   eval(Meta.parse("Set{Int64}([2,3,1])")) # ==> An actual set
 # While this isn’t true of ALL show methods, it is of all ASTs.
 
-using Core.Compiler: TypedSlot, UnoptSlot
-
-const ExprNode = Union{Expr, QuoteNode, UnoptSlot, LineNumberNode, SSAValue,
+const ExprNode = Union{Expr, QuoteNode, SlotNumber, LineNumberNode, SSAValue,
                        GotoNode, GotoIfNot, GlobalRef, PhiNode, PhiCNode, UpsilonNode,
                        ReturnNode}
 # Operators have precedence levels from 1-N, and show_unquoted defaults to a
@@ -1465,111 +1547,16 @@ const expr_parens = Dict(:tuple=>('(',')'), :vcat=>('[',']'),
                          :ncat =>('[',']'), :nrow =>('[',']'),
                          :braces=>('{','}'), :bracescat=>('{','}'))
 
-## AST decoding helpers ##
-
-is_id_start_char(c::AbstractChar) = ccall(:jl_id_start_char, Cint, (UInt32,), c) != 0
-is_id_char(c::AbstractChar) = ccall(:jl_id_char, Cint, (UInt32,), c) != 0
-
-"""
-     isidentifier(s) -> Bool
-
-Return whether the symbol or string `s` contains characters that are parsed as
-a valid ordinary identifier (not a binary/unary operator) in Julia code;
-see also [`Base.isoperator`](@ref).
-
-Internally Julia allows any sequence of characters in a `Symbol` (except `\\0`s),
-and macros automatically use variable names containing `#` in order to avoid
-naming collision with the surrounding code. In order for the parser to
-recognize a variable, it uses a limited set of characters (greatly extended by
-Unicode). `isidentifier()` makes it possible to query the parser directly
-whether a symbol contains valid characters.
-
-# Examples
-```jldoctest
-julia> Meta.isidentifier(:x), Meta.isidentifier("1x")
-(true, false)
-```
-"""
-function isidentifier(s::AbstractString)
-    x = Iterators.peel(s)
-    isnothing(x) && return false
-    (s == "true" || s == "false") && return false
-    c, rest = x
-    is_id_start_char(c) || return false
-    return all(is_id_char, rest)
-end
-isidentifier(s::Symbol) = isidentifier(string(s))
-
-is_op_suffix_char(c::AbstractChar) = ccall(:jl_op_suffix_char, Cint, (UInt32,), c) != 0
-
-_isoperator(s) = ccall(:jl_is_operator, Cint, (Cstring,), s) != 0
-
-"""
-    isoperator(s::Symbol)
-
-Return `true` if the symbol can be used as an operator, `false` otherwise.
-
-# Examples
-```jldoctest
-julia> Meta.isoperator(:+), Meta.isoperator(:f)
-(true, false)
-```
-"""
-isoperator(s::Union{Symbol,AbstractString}) = _isoperator(s) || ispostfixoperator(s)
-
-"""
-    isunaryoperator(s::Symbol)
-
-Return `true` if the symbol can be used as a unary (prefix) operator, `false` otherwise.
-
-# Examples
-```jldoctest
-julia> Meta.isunaryoperator(:-), Meta.isunaryoperator(:√), Meta.isunaryoperator(:f)
-(true, true, false)
-```
-"""
-isunaryoperator(s::Symbol) = ccall(:jl_is_unary_operator, Cint, (Cstring,), s) != 0
-is_unary_and_binary_operator(s::Symbol) = ccall(:jl_is_unary_and_binary_operator, Cint, (Cstring,), s) != 0
-is_syntactic_operator(s::Symbol) = ccall(:jl_is_syntactic_operator, Cint, (Cstring,), s) != 0
-
-"""
-    isbinaryoperator(s::Symbol)
-
-Return `true` if the symbol can be used as a binary (infix) operator, `false` otherwise.
-
-# Examples
-```jldoctest
-julia> Meta.isbinaryoperator(:-), Meta.isbinaryoperator(:√), Meta.isbinaryoperator(:f)
-(true, false, false)
-```
-"""
-function isbinaryoperator(s::Symbol)
-    return _isoperator(s) && (!isunaryoperator(s) || is_unary_and_binary_operator(s)) &&
-        s !== Symbol("'")
-end
-
-"""
-    ispostfixoperator(s::Union{Symbol,AbstractString})
-
-Return `true` if the symbol can be used as a postfix operator, `false` otherwise.
-
-# Examples
-```jldoctest
-julia> Meta.ispostfixoperator(Symbol("'")), Meta.ispostfixoperator(Symbol("'ᵀ")), Meta.ispostfixoperator(:-)
-(true, true, false)
-```
-"""
-function ispostfixoperator(s::Union{Symbol,AbstractString})
-    s = String(s)::String
-    return startswith(s, '\'') && all(is_op_suffix_char, SubString(s, 2))
-end
 
 """
     operator_precedence(s::Symbol)
 
-Return an integer representing the precedence of operator `s`, relative to
+Return an integer representing the precedence of a binary operator `s`, relative to
 other operators. Higher-numbered operators take precedence over lower-numbered
-operators. Return `0` if `s` is not a valid operator.
+operators. Return `0` if `s` is not a valid binary operator.
+
+(The precedence of *unary* operators is handled differently, including cases like `+`
+where an operator can be either unary or binary.)
 
 # Examples
 ```jldoctest
@@ -1608,7 +1595,8 @@ julia> Base.operator_associativity(:⊗), Base.operator_associativity(:sin), Bas
 """
 function operator_associativity(s::Symbol)
     if operator_precedence(s) in (prec_arrow, prec_assignment, prec_control_flow, prec_pair, prec_power) ||
-        (isunaryoperator(s) && !is_unary_and_binary_operator(s)) || s === :<| || s === :||
+        (isunaryoperator(s) && !is_unary_and_binary_operator(s)) ||
+        (s === :<| || s === :|| || s == :?)
         return :right
     elseif operator_precedence(s) in (0, prec_comparison) || s in (:+, :++, :*)
         return :none
@@ -1700,13 +1688,6 @@ function show_enclosed_list(io::IO, op, items, sep, cl, indent, prec=0, quote_le
     print(io, cl)
 end
 
-function is_valid_identifier(sym)
-    return isidentifier(sym) || (
-        _isoperator(sym) &&
-        !(sym in (Symbol("'"), :(::), :?)) &&
-        !is_syntactic_operator(sym)
-    )
-end
 
 # show a normal (non-operator) function call, e.g. f(x, y) or A[z]
 # kw: `=` expressions are parsed with head `kw` in this context
@@ -1746,7 +1727,7 @@ function show_sym(io::IO, sym::Symbol; allow_macroname=false)
         print(io, '@')
         show_sym(io, Symbol(sym_str[2:end]))
     else
-        print(io, "var", repr(string(sym)))
+        print(io, "var\"", escape_raw_string(string(sym)), '"')
     end
 end
 
@@ -1777,19 +1758,14 @@ function show_globalref(io::IO, ex::GlobalRef; allow_macroname=false)
     nothing
 end
 
-function show_unquoted(io::IO, ex::UnoptSlot, ::Int, ::Int)
-    typ = isa(ex, TypedSlot) ? ex.typ : Any
+function show_unquoted(io::IO, ex::SlotNumber, ::Int, ::Int)
     slotid = ex.id
     slotnames = get(io, :SOURCE_SLOTNAMES, false)
-    if (isa(slotnames, Vector{String}) &&
-        slotid <= length(slotnames::Vector{String}))
-        print(io, (slotnames::Vector{String})[slotid])
+    if isa(slotnames, Vector{String}) && slotid ≤ length(slotnames)
+        print(io, slotnames[slotid])
     else
         print(io, "_", slotid)
     end
-    if typ !== Any && isa(ex, TypedSlot)
-        print(io, "::", typ)
-    end
 end
 
 function show_unquoted(io::IO, ex::QuoteNode, indent::Int, prec::Int)
@@ -2151,8 +2127,12 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     elseif head === :do && nargs == 2
         iob = IOContext(io, beginsym=>false)
         show_unquoted(iob, args[1], indent, -1, quote_level)
-        print(io, " do ")
-        show_list(iob, (((args[2]::Expr).args[1])::Expr).args, ", ", 0, 0, quote_level)
+        print(io, " do")
+        do_args = (((args[2]::Expr).args[1])::Expr).args
+        if !isempty(do_args)
+            print(io, ' ')
+            show_list(iob, do_args, ", ", 0, 0, quote_level)
+        end
         for stmt in (((args[2]::Expr).args[2])::Expr).args
             print(io, '\n', " "^(indent + indent_width))
             show_unquoted(iob, stmt, indent + indent_width, -1, quote_level)
@@ -2232,7 +2212,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
         print(io, head, ' ')
         show_list(io, args, ", ", indent, 0, quote_level)
 
-    elseif head === :export
+    elseif head in (:export, :public)
         print(io, head, ' ')
         show_list(io, mapany(allow_macroname, args), ", ", indent)
 
@@ -2329,7 +2309,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
         if get(io, beginsym, false)
             print(io, '(')
             ind = indent + indent_width
-            for i = 1:length(ex.args)
+            for i = eachindex(ex.args)
                 if i > 1
                     # if there was only a comment before the first semicolon, the expression would get parsed as a NamedTuple
                     if !(i == 2 && ex.args[1] isa LineNumberNode)
@@ -2452,6 +2432,11 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     elseif head === :meta && nargs == 2 && args[1] === :pop_loc
         print(io, "# meta: pop locations ($(args[2]::Int))")
     # print anything else as "Expr(head, args...)"
+    elseif head === :toplevel
+        # Reset SOURCE_SLOTNAMES. Raw SlotNumbers are not valid in Expr(:toplevel), but
+        # we want to show bad ASTs reasonably to make errors understandable.
+        lambda_io = IOContext(io, :SOURCE_SLOTNAMES => false)
+        show_unquoted_expr_fallback(lambda_io, ex, indent, quote_level)
     else
         unhandled = true
     end
@@ -2477,10 +2462,10 @@ function show_signature_function(io::IO, @nospecialize(ft), demangle=false, farg
     uw = unwrap_unionall(ft)
     if ft <: Function && isa(uw, DataType) && isempty(uw.parameters) && _isself(uw)
         uwmod = parentmodule(uw)
-        if qualified && !is_exported_from_stdlib(uw.name.mt.name, uwmod) && uwmod !== Main
+        if qualified && !isexported(uwmod, uw.name.singletonname) && uwmod !== Main
             print_within_stacktrace(io, uwmod, '.', bold=true)
         end
-        s = sprint(show_sym, (demangle ? demangle_function_name : identity)(uw.name.mt.name), context=io)
+        s = sprint(show_sym, (demangle ? demangle_function_name : identity)(uw.name.singletonname), context=io)
         print_within_stacktrace(io, s, bold=true)
     elseif isType(ft) && (f = ft.parameters[1]; !isa(f, TypeVar))
         uwf = unwrap_unionall(f)
@@ -2515,7 +2500,8 @@ function show_tuple_as_call(out::IO, name::Symbol, sig::Type;
         return
     end
     tv = Any[]
-    io = IOContext(IOBuffer(), out)
+    buf = IOBuffer()
+    io = IOContext(buf, out)
     env_io = io
     while isa(sig, UnionAll)
         push!(tv, sig.var)
@@ -2558,20 +2544,23 @@ function show_tuple_as_call(out::IO, name::Symbol, sig::Type;
     end
     print_within_stacktrace(io, ")", bold=true)
     show_method_params(io, tv)
-    str = String(take!(unwrapcontext(io)[1]))
-    if get(out, :limit, false)::Bool
-        sz = get(out, :displaysize, (typemax(Int), typemax(Int)))::Tuple{Int, Int}
+    str = takestring!(buf)
+    str = type_limited_string_from_context(out, str)
+    print(out, str)
+    nothing
+end
+
+function type_limited_string_from_context(out::IO, str::String)
+    typelimitflag = get(out, :stacktrace_types_limited, nothing)
+    if typelimitflag isa RefValue{Bool}
+        sz = get(out, :displaysize, Base.displaysize_(out))::Tuple{Int, Int}
         str_lim = type_depth_limit(str, max(sz[2], 120))
         if sizeof(str_lim) < sizeof(str)
-            typelimitflag = get(out, :stacktrace_types_limited, nothing)
-            if typelimitflag !== nothing
-                typelimitflag[] = true
-            end
+            typelimitflag[] = true
         end
         str = str_lim
     end
-    print(out, str)
-    nothing
+    return str
 end
 
 # limit nesting depth of `{ }` until string textwidth is less than `n`
@@ -2664,7 +2653,7 @@ function type_depth_limit(str::String, n::Int; maxdepth = nothing)
         end
         prev = di
     end
-    return String(take!(output))
+    return unsafe_takestring!(output)
 end
 
 function print_type_bicolor(io, type; kwargs...)
@@ -2762,32 +2751,7 @@ function show(io::IO, vm::Core.TypeofVararg)
     end
 end
 
-module IRShow
-    const Compiler = Core.Compiler
-    using Core.IR
-    import ..Base
-    import .Compiler: IRCode, TypedSlot, CFG, scan_ssa_use!,
-        isexpr, compute_basic_blocks, block_for_inst, IncrementalCompact,
-        Effects, ALWAYS_TRUE, ALWAYS_FALSE
-    Base.getindex(r::Compiler.StmtRange, ind::Integer) = Compiler.getindex(r, ind)
-    Base.size(r::Compiler.StmtRange) = Compiler.size(r)
-    Base.first(r::Compiler.StmtRange) = Compiler.first(r)
-    Base.last(r::Compiler.StmtRange) = Compiler.last(r)
-    Base.length(is::Compiler.InstructionStream) = Compiler.length(is)
-    Base.iterate(is::Compiler.InstructionStream, st::Int=1) = (st <= Compiler.length(is)) ? (is[st], st + 1) : nothing
-    Base.getindex(is::Compiler.InstructionStream, idx::Int) = Compiler.getindex(is, idx)
-    Base.getindex(node::Compiler.Instruction, fld::Symbol) = Compiler.getindex(node, fld)
-    include("compiler/ssair/show.jl")
-
-    const __debuginfo = Dict{Symbol, Any}(
-        # :full => src -> Base.IRShow.statementidx_lineinfo_printer(src), # and add variable slot information
-        :source => src -> Base.IRShow.statementidx_lineinfo_printer(src),
-        # :oneliner => src -> Base.IRShow.statementidx_lineinfo_printer(Base.IRShow.PartialLineInfoPrinter, src),
-        :none => src -> Base.IRShow.lineinfo_disabled,
-        )
-    const default_debuginfo = Ref{Symbol}(:none)
-    debuginfo(sym) = sym === :default ? default_debuginfo[] : sym
-end
+Compiler.load_irshow!()
 
 function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source)
     # Fix slot names and types in function body
@@ -2796,41 +2760,73 @@ function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source)
     if src.slotnames !== nothing
         lambda_io = IOContext(lambda_io, :SOURCE_SLOTNAMES => sourceinfo_slotnames(src))
     end
-    if isempty(src.linetable) || src.linetable[1] isa LineInfoNode
-        println(io)
-        # TODO: static parameter values?
-        # only accepts :source or :none, we can't have a fallback for default since
-        # that would break code_typed(, debuginfo=:source) iff IRShow.default_debuginfo[] = :none
-        IRShow.show_ir(lambda_io, src, IRShow.IRShowConfig(IRShow.__debuginfo[debuginfo](src)))
-    else
-        # this is a CodeInfo that has not been used as a method yet, so its locations are still LineNumberNodes
-        body = Expr(:block)
-        body.args = src.code
-        show(lambda_io, body)
-    end
+    println(io)
+    # TODO: static parameter values?
+    # only accepts :source or :none, we can't have a fallback for default since
+    # that would break code_typed(, debuginfo=:source) iff IRShow.default_debuginfo[] = :none
+    IRShow.show_ir(lambda_io, src, IRShow.IRShowConfig(IRShow.__debuginfo[debuginfo](src)))
     print(io, ")")
 end
 
-function show(io::IO, inferred::Core.Compiler.InferenceResult)
-    mi = inferred.linfo
-    tt = mi.specTypes.parameters[2:end]
-    tts = join(["::$(t)" for t in tt], ", ")
-    rettype = inferred.result
-    if isa(rettype, Core.Compiler.InferenceState)
-        rettype = rettype.bestguess
+show_unquoted(io::IO, val::Argument, indent::Int, prec::Int) = show_unquoted(io, Core.SlotNumber(val.n), indent, prec)
+
+show_unquoted(io::IO, stmt::PhiNode, indent::Int, ::Int) = show_unquoted_phinode(io, stmt, indent, "%")
+function show_unquoted_phinode(io::IO, stmt::PhiNode, indent::Int, prefix::String)
+    args = String[let
+        e = stmt.edges[i]
+        v = !isassigned(stmt.values, i) ? "#undef" :
+            sprint(; context=io) do io′
+                show_unquoted(io′, stmt.values[i], indent)
+            end
+        "$prefix$e => $v"
+        end for i in 1:length(stmt.edges)
+    ]
+    print(io, "φ ", '(')
+    join(io, args, ", ")
+    print(io, ')')
+end
+
+function show_unquoted(io::IO, stmt::PhiCNode, indent::Int, ::Int)
+    print(io, "φᶜ (")
+    first = true
+    for v in stmt.values
+        first ? (first = false) : print(io, ", ")
+        show_unquoted(io, v, indent)
     end
-    if isa(mi.def, Method)
-        print(io, mi.def.name, "(", tts, " => ", rettype, ")")
+    print(io, ")")
+end
+
+function show_unquoted(io::IO, stmt::PiNode, indent::Int, ::Int)
+    print(io, "π (")
+    show_unquoted(io, stmt.val, indent)
+    print(io, ", ")
+    printstyled(io, stmt.typ, color=:cyan)
+    print(io, ")")
+end
+
+function show_unquoted(io::IO, stmt::UpsilonNode, indent::Int, ::Int)
+    print(io, "ϒ (")
+    isdefined(stmt, :val) ?
+        show_unquoted(io, stmt.val, indent) :
+        print(io, "#undef")
+    print(io, ")")
+end
+
+function show_unquoted(io::IO, stmt::ReturnNode, indent::Int, ::Int)
+    if !isdefined(stmt, :val)
+        print(io, "unreachable")
     else
-        print(io, "Toplevel MethodInstance thunk from ", mi.def, " => ", rettype)
+        print(io, "return ")
+        show_unquoted(io, stmt.val, indent)
     end
 end
 
-function show(io::IO, ::Core.Compiler.NativeInterpreter)
-    print(io, "Core.Compiler.NativeInterpreter(...)")
+show_unquoted(io::IO, stmt::GotoIfNot, indent::Int, ::Int) = show_unquoted_gotoifnot(io, stmt, indent, "%")
+function show_unquoted_gotoifnot(io::IO, stmt::GotoIfNot, indent::Int, prefix::String)
+    print(io, "goto ", prefix, stmt.dest, " if not ")
+    show_unquoted(io, stmt.cond, indent)
 end
 
-
 function dump(io::IOContext, x::SimpleVector, n::Int, indent)
     if isempty(x)
         print(io, "empty SimpleVector")
@@ -2838,7 +2834,7 @@ function dump(io::IOContext, x::SimpleVector, n::Int, indent)
     end
     print(io, "SimpleVector")
     if n > 0
-        for i = 1:length(x)
+        for i in eachindex(x)
             println(io)
             print(io, indent, "  ", i, ": ")
             if isassigned(x,i)
@@ -2929,6 +2925,13 @@ end
 
 # Types
 function dump(io::IOContext, x::DataType, n::Int, indent)
+    # For some reason, tuples are structs
+    is_struct = isstructtype(x) && !(x <: Tuple)
+    is_mut = is_struct && ismutabletype(x)
+    is_mut && print(io, "mutable ")
+    is_struct && print(io, "struct ")
+    isprimitivetype(x) && print(io, "primitive type ")
+    isabstracttype(x) && print(io, "abstract type ")
     print(io, x)
     if x !== Any
         print(io, " <: ", supertype(x))
@@ -2948,10 +2951,15 @@ function dump(io::IOContext, x::DataType, n::Int, indent)
         end
         fields = fieldnames(x)
         fieldtypes = datatype_fieldtypes(x)
-        for idx in 1:length(fields)
+        for idx in eachindex(fields)
             println(io)
-            print(io, indent, "  ", fields[idx], "::")
-            print(tvar_io, fieldtypes[idx])
+            print(io, indent, "  ")
+            is_mut && isconst(x, idx) && print(io, "const ")
+            print(io, fields[idx])
+            if isassigned(fieldtypes, idx)
+                print(io, "::")
+                print(tvar_io, fieldtypes[idx])
+            end
         end
     end
     nothing
@@ -3065,7 +3073,7 @@ Print to a stream `io`, or return a string `str`, giving a brief description of
 a value. By default returns `string(typeof(x))`, e.g. [`Int64`](@ref).
 
 For arrays, returns a string of size and type info,
-e.g. `10-element Array{Int64,1}`.
+e.g. `10-element Vector{Int64}` or `9×4×5 Array{Float64, 3}`.
 
 # Examples
 ```jldoctest
@@ -3080,7 +3088,7 @@ summary(io::IO, x) = print(io, typeof(x))
 function summary(x)
     io = IOBuffer()
     summary(io, x)
-    String(take!(io))
+    takestring!(io)
 end
 
 ## `summary` for AbstractArrays
@@ -3109,19 +3117,25 @@ end
 summary(io::IO, f::Function) = show(io, MIME"text/plain"(), f)
 
 """
-    showarg(io::IO, x, toplevel)
+    Base.showarg(io::IO, x, toplevel)
+
+Show the quasi-type of `x` where quasi-type is the type of `x` or an expression (possibly
+containing quasi-types) that would generate an object of the same type as `x`. The shorter
+of these two options is typically used.
+
+This function is used by `summary` to display type information in terms of sequences of
+function calls on objects.
 
-Show `x` as if it were an argument to a function. This function is
-used by [`summary`](@ref) to display type information in terms of sequences of
-function calls on objects. `toplevel` is `true` if this is
-the direct call from `summary` and `false` for nested (recursive) calls.
+Show a leading `::` if `toplevel` is `false` and showing a type. `toplevel` is `true` if
+this is the direct call from `summary` and `false` for nested (recursive) calls.
 
-The fallback definition is to print `x` as "::\\\$(typeof(x))",
-representing argument `x` in terms of its type. (The double-colon is
-omitted if `toplevel=true`.) However, you can
-specialize this function for specific types to customize printing.
+The fallback definition is to print `x` as "::\\\$(typeof(x))" or "\\\$(typeof(x))",
+representing argument `x` in terms of its type. However, you can specialize
+this function for specific types to customize printing. This customization is useful for
+types that have simple, public constructors and verbose and/or internal types and type
+parameters such as `reinterpret`ed arrays or `SubArray`s.
 
-# Example
+# Examples
 
 A SubArray created as `view(a, :, 3, 2:5)`, where `a` is a
 3-dimensional Float64 array, has type
@@ -3148,14 +3162,14 @@ type, indicating that any recursed calls are not at the top level.
 Printing the parent as `::Array{Float64,3}` is the fallback (non-toplevel)
 behavior, because no specialized method for `Array` has been defined.
 """
-function showarg(io::IO, T::Type, toplevel)
-    toplevel || print(io, "::")
-    print(io, "Type{", T, "}")
-end
 function showarg(io::IO, @nospecialize(x), toplevel)
     toplevel || print(io, "::")
     print(io, typeof(x))
 end
+function showarg(io::IO, T::Type, toplevel)
+    toplevel || print(io, "::")
+    print(io, "Type{", T, "}")
+end
 # This method resolves an ambiguity for packages that specialize on eltype
 function showarg(io::IO, a::Array{Union{}}, toplevel)
     toplevel || print(io, "::")
@@ -3180,7 +3194,9 @@ showindices(io) = nothing
 function showarg(io::IO, r::ReshapedArray, toplevel)
     print(io, "reshape(")
     showarg(io, parent(r), false)
-    print(io, ", ", join(r.dims, ", "))
+    if !isempty(r.dims)
+        print(io, ", ", join(r.dims, ", "))
+    end
     print(io, ')')
     toplevel && print(io, " with eltype ", eltype(r))
     return nothing
@@ -3214,7 +3230,7 @@ function Base.showarg(io::IO, r::Iterators.Pairs{<:Integer, <:Any, <:Any, T}, to
     print(io, "pairs(IndexLinear(), ::", T, ")")
 end
 
-function Base.showarg(io::IO, r::Iterators.Pairs{Symbol, <:Any, <:Any, T}, toplevel) where {T <: NamedTuple}
+function Base.showarg(io::IO, r::Iterators.Pairs{Symbol, <:Any, Nothing, T}, toplevel) where {T <: NamedTuple}
     print(io, "pairs(::NamedTuple)")
 end
 
@@ -3257,10 +3273,119 @@ bitstring(B::BitArray) = sprint(bitshow, B)
 function show(io::IO, oc::Core.OpaqueClosure)
     A, R = typeof(oc).parameters
     show_tuple_as_call(io, Symbol(""), A; hasfirst=false)
-    print(io, "::", R)
     print(io, "->◌")
+    print(io, "::", R)
 end
 
 function show(io::IO, ::MIME"text/plain", oc::Core.OpaqueClosure{A, R}) where {A, R}
     show(io, oc)
 end
+
+# printing bindings and partitions
+function print_partition(io::IO, partition::Core.BindingPartition)
+    print(io, partition.min_world)
+    print(io, ":")
+    max_world = @atomic partition.max_world
+    if max_world == typemax(UInt)
+        print(io, '∞')
+    else
+        print(io, max_world)
+    end
+    if (partition.kind & PARTITION_MASK_FLAG) != 0
+        first = false
+        print(io, " [")
+        if (partition.kind & PARTITION_FLAG_EXPORTED) != 0
+            print(io, "exported")
+        end
+        if (partition.kind & PARTITION_FLAG_IMPLICITLY_EXPORTED) != 0
+            first ? (first = false) : print(io, ",")
+            print(io, "re-exported")
+        end
+        if (partition.kind & PARTITION_FLAG_DEPRECATED) != 0
+            first ? (first = false) : print(io, ",")
+            print(io, "deprecated")
+        end
+        if (partition.kind & PARTITION_FLAG_DEPWARN) != 0
+            first ? (first = false) : print(io, ",")
+            print(io, "depwarn")
+        end
+        print(io, "]")
+    end
+    print(io, " - ")
+    kind = binding_kind(partition)
+    if kind == PARTITION_KIND_BACKDATED_CONST
+        print(io, "backdated constant binding to ")
+        print(io, partition_restriction(partition))
+    elseif kind == PARTITION_KIND_CONST
+        print(io, "constant binding to ")
+        print(io, partition_restriction(partition))
+    elseif kind == PARTITION_KIND_CONST_IMPORT
+        print(io, "constant binding (declared with `import`) to ")
+        print(io, partition_restriction(partition))
+    elseif kind == PARTITION_KIND_UNDEF_CONST
+        print(io, "undefined const binding")
+    elseif kind == PARTITION_KIND_GUARD
+        print(io, "undefined binding - guard entry")
+    elseif kind == PARTITION_KIND_FAILED
+        print(io, "ambiguous binding - guard entry")
+    elseif kind == PARTITION_KIND_DECLARED
+        print(io, "weak global binding declared using `global` (implicit type Any)")
+    elseif kind == PARTITION_KIND_IMPLICIT_GLOBAL
+        print(io, "implicit `using` resolved to global ")
+        print(io, partition_restriction(partition).globalref)
+    elseif kind == PARTITION_KIND_IMPLICIT_CONST
+        print(io, "implicit `using` resolved to constant ")
+        print(io, partition_restriction(partition))
+    elseif kind == PARTITION_KIND_EXPLICIT
+        print(io, "explicit `using` from ")
+        print(io, partition_restriction(partition).globalref)
+    elseif kind == PARTITION_KIND_IMPORTED
+        print(io, "explicit `import` from ")
+        print(io, partition_restriction(partition).globalref)
+    else
+        @assert kind == PARTITION_KIND_GLOBAL
+        print(io, "global variable with type ")
+        print(io, partition_restriction(partition))
+    end
+end
+
+function show(io::IO, ::MIME"text/plain", partition::Core.BindingPartition)
+    print(io, "BindingPartition ")
+    print_partition(io, partition)
+end
+
+function show(io::IO, ::MIME"text/plain", bnd::Core.Binding)
+    print(io, "Binding ")
+    print(io, bnd.globalref)
+    if !isdefined(bnd, :partitions)
+        print(io, " - No partitions")
+    else
+        partition = @atomic bnd.partitions
+        while true
+            println(io)
+            print(io, "   ")
+            print_partition(io, partition)
+            isdefined(partition, :next) || break
+            partition = @atomic partition.next
+        end
+    end
+end
+
+# Special pretty printing for EvalInto/IncludeInto
+function show(io::IO, ii::IncludeInto)
+    if getglobal(ii.m, :include) === ii
+        print(io, ii.m)
+        print(io, ".include")
+    else
+        show_default(io, ii)
+    end
+end
+
+function show(io::IO, ei::Core.EvalInto)
+    if getglobal(ei.m, :eval) === ei
+        print(io, ei.m)
+        print(io, ".eval")
+    else
+        show_default(io, ei)
+    end
+end
diff --git a/base/simdloop.jl b/base/simdloop.jl
index 797b77ed75a99..40bd52f0fce37 100644
--- a/base/simdloop.jl
+++ b/base/simdloop.jl
@@ -60,22 +60,22 @@ function compile(x, ivdep)
     check_body!(x)
 
     var,range = parse_iteration_space(x.args[1])
-    r = gensym("r") # Range value
-    j = gensym("i") # Iteration variable for outer loop
-    n = gensym("n") # Trip count for inner loop
-    i = gensym("i") # Trip index for inner loop
-    quote
+    # r: Range value
+    # j: Iteration variable for outer loop
+    # n: Trip count for inner loop
+    # i: Trip index for inner loop
+    return quote
         # Evaluate range value once, to enhance type and data flow analysis by optimizers.
-        let $r = $range
-            for $j in Base.simd_outer_range($r)
-                let $n = Base.simd_inner_length($r,$j)
-                    if zero($n) < $n
+        let r = $(esc(range))
+            for j in Base.simd_outer_range(r)
+                let n = Base.simd_inner_length(r,j)
+                    if zero(n) < n
                         # Lower loop in way that seems to work best for LLVM 3.3 vectorizer.
-                        let $i = zero($n)
-                            while $i < $n
-                                local $var = Base.simd_index($r,$j,$i)
-                                $(x.args[2])        # Body of loop
-                                $i += 1
+                        let i = zero(n)
+                            while i < n
+                                local $(esc(var)) = Base.simd_index(r,j,i)
+                                $(esc(x.args[2]))        # Body of loop
+                                i += 1
                                 $(Expr(:loopinfo, Symbol("julia.simdloop"), ivdep))  # Mark loop as SIMD loop
                             end
                         end
@@ -125,12 +125,12 @@ either case, your inner loop should have the following properties to allow vecto
 * No iteration ever waits on a previous iteration to make forward progress.
 """
 macro simd(forloop)
-    esc(compile(forloop, nothing))
+    compile(forloop, nothing)
 end
 
 macro simd(ivdep, forloop)
     if ivdep === :ivdep
-        esc(compile(forloop, Symbol("julia.ivdep")))
+        compile(forloop, Symbol("julia.ivdep"))
     else
         throw(SimdError("Only ivdep is valid as the first argument to @simd"))
     end
diff --git a/base/slicearray.jl b/base/slicearray.jl
index e5a433cdb8d2a..1928020a1155a 100644
--- a/base/slicearray.jl
+++ b/base/slicearray.jl
@@ -25,7 +25,7 @@ struct Slices{P,SM,AX,S,N} <: AbstractSlices{S,N}
     """
     parent::P
     """
-    A tuple of length `ndims(parent)`, denoting how each dimension should be handled:
+    A tuple of length at least `ndims(parent)`, denoting how each dimension should be handled:
       - an integer `i`: this is the `i`th dimension of the outer `Slices` object.
       - `:`: an "inner" dimension
     """
@@ -39,34 +39,39 @@ end
 unitaxis(::AbstractArray) = Base.OneTo(1)
 
 function Slices(A::P, slicemap::SM, ax::AX) where {P,SM,AX}
+    length(slicemap) >= ndims(A) ||
+        throw(ArgumentError("Slices cannot be constructed with a slicemap of fewer elements than the parent has dimensions"))
     N = length(ax)
-    argT = map((a,l) -> l === (:) ? Colon : eltype(a), axes(A), slicemap)
+    parent_axes = ntuple(d -> axes(A, d), length(slicemap))
+    argT = map((a,l) -> l === (:) ? Colon : eltype(a), parent_axes, slicemap)
     S = Base.promote_op(view, P, argT...)
     Slices{P,SM,AX,S,N}(A, slicemap, ax)
 end
 
 _slice_check_dims(N) = nothing
 function _slice_check_dims(N, dim, dims...)
-    1 <= dim <= N || throw(DimensionMismatch("Invalid dimension $dim"))
+    1 <= dim || throw(DimensionMismatch("Invalid dimension $dim"))
     dim in dims && throw(DimensionMismatch("Dimensions $dims are not unique"))
     _slice_check_dims(N,dims...)
 end
 
 @constprop :aggressive function _eachslice(A::AbstractArray{T,N}, dims::NTuple{M,Integer}, drop::Bool) where {T,N,M}
     _slice_check_dims(N,dims...)
+    N_ = foldl(max, dims; init=N)
+
     if drop
         # if N = 4, dims = (3,1) then
         # axes = (axes(A,3), axes(A,1))
         # slicemap = (2, :, 1, :)
         ax = map(dim -> axes(A,dim), dims)
-        slicemap = ntuple(dim -> something(findfirst(isequal(dim), dims), (:)), N)
+        slicemap = ntuple(dim -> something(findfirst(isequal(dim), dims), (:)),  N_)
         return Slices(A, slicemap, ax)
     else
         # if N = 4, dims = (3,1) then
         # axes = (axes(A,1), OneTo(1), axes(A,3), OneTo(1))
         # slicemap = (1, :, 3, :)
-        ax = ntuple(dim -> dim in dims ? axes(A,dim) : unitaxis(A), N)
-        slicemap = ntuple(dim -> dim in dims ? dim : (:), N)
+        ax = ntuple(dim -> dim in dims ? axes(A,dim) : unitaxis(A), N_)
+        slicemap = ntuple(dim -> dim in dims ? dim : (:), N_)
         return Slices(A, slicemap, ax)
     end
 end
@@ -77,13 +82,13 @@ end
 """
     eachslice(A::AbstractArray; dims, drop=true)
 
-Create a [`Slices`](@ref) object that is an array of slices over dimensions `dims` of `A`, returning
-views that select all the data from the other dimensions in `A`. `dims` can either by an
-integer or a tuple of integers.
+Create a sliced object, usually [`Slices`](@ref), that is an array of slices over dimensions
+`dims` of `A`, returning views that select all the data from the other dimensions in `A`.
+`dims` can either be an integer or a tuple of integers.
 
-If `drop = true` (the default), the outer `Slices` will drop the inner dimensions, and
+If `drop = true` (the default), the outer slices will drop the inner dimensions, and
 the ordering of the dimensions will match those in `dims`. If `drop = false`, then the
-`Slices` will have the same dimensionality as the underlying array, with inner
+slices object will have the same dimensionality as the underlying array, with inner
 dimensions having size 1.
 
 See [`stack`](@ref)`(slices; dims)` for the inverse of `eachslice(A; dims::Integer)`.
@@ -96,7 +101,7 @@ See also [`eachrow`](@ref), [`eachcol`](@ref), [`mapslices`](@ref) and [`selectd
 !!! compat "Julia 1.9"
      Prior to Julia 1.9, this returned an iterator, and only a single dimension `dims` was supported.
 
-# Example
+# Examples
 
 ```jldoctest
 julia> m = [1 2 3; 4 5 6; 7 8 9]
@@ -144,7 +149,7 @@ See also [`eachcol`](@ref), [`eachslice`](@ref) and [`mapslices`](@ref).
 !!! compat "Julia 1.9"
      Prior to Julia 1.9, this returned an iterator.
 
-# Example
+# Examples
 
 ```jldoctest
 julia> a = [1 2; 3 4]
@@ -182,7 +187,7 @@ See also [`eachrow`](@ref), [`eachslice`](@ref) and [`mapslices`](@ref).
 !!! compat "Julia 1.9"
      Prior to Julia 1.9, this returned an iterator.
 
-# Example
+# Examples
 
 ```jldoctest
 julia> a = [1 2; 3 4]
@@ -225,7 +230,6 @@ constructed by [`eachcol`](@ref).
 const ColumnSlices{P<:AbstractMatrix,AX,S<:AbstractVector} = Slices{P,Tuple{Colon,Int},AX,S,1}
 
 
-IteratorSize(::Type{Slices{P,SM,AX,S,N}}) where {P,SM,AX,S,N} = HasShape{N}()
 axes(s::Slices) = s.axes
 size(s::Slices) = map(length, s.axes)
 
diff --git a/base/some.jl b/base/some.jl
index 0d538cbed6c23..614f0d3395381 100644
--- a/base/some.jl
+++ b/base/some.jl
@@ -14,9 +14,7 @@ end
 
 Some(::Type{T}) where {T} = Some{Type{T}}(T)
 
-promote_rule(::Type{Some{T}}, ::Type{Some{S}}) where {T, S<:T} = Some{T}
-
-nonnothingtype(::Type{T}) where {T} = typesplit(T, Nothing)
+nonnothingtype(@nospecialize(T::Type)) = typesplit(T, Nothing)
 promote_rule(T::Type{Nothing}, S::Type) = Union{S, Nothing}
 function promote_rule(T::Type{>:Nothing}, S::Type)
     R = nonnothingtype(T)
@@ -138,10 +136,37 @@ true
     This macro is available as of Julia 1.7.
 """
 macro something(args...)
-    expr = :(nothing)
-    for arg in reverse(args)
-        expr = :(val = $(esc(arg)); val !== nothing ? val : ($expr))
-    end
+    noth = GlobalRef(Base, :nothing)
     something = GlobalRef(Base, :something)
-    return :($something($expr))
+
+    # This preserves existing semantics of throwing on `nothing`
+    expr = :($something($noth))
+
+    #=
+    We go through the arguments in reverse
+    because we're building a nested if/else
+    expression from the inside out.
+    The innermost thing to check is the last argument,
+    which is why we need the last argument first
+    when building the final expression.
+    =#
+    for i in reverse(eachindex(args))
+        arg  = args[i]
+        val = Cartesian.inlineanonymous(:val, i)
+        expr = quote
+            $val = $(esc(arg))
+            if !isnothing($val)
+                # unwrap eagerly to help type inference
+                $something($val)
+            else
+                $expr
+            end
+        end
+    end
+    return expr
 end
+
+==(a::Some, b::Some) = a.value == b.value
+isequal(a::Some, b::Some)::Bool = isequal(a.value, b.value)
+const hash_some_seed = UInt == UInt64 ? 0xde5c997007a4ca3a : 0x78c29c09
+hash(s::Some, h::UInt) = hash(s.value, hash_some_seed + h)
diff --git a/base/sort.jl b/base/sort.jl
index 786d8e110e6e2..685caa25a33f6 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -4,9 +4,8 @@ module Sort
 
 using Base.Order
 
-using Base: copymutable, midpoint, require_one_based_indexing, uinttype,
-    sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit,
-    IteratorSize, HasShape, IsInfinite, tail
+using Base: copymutable, midpoint, require_one_based_indexing, uinttype, tail,
+    sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit
 
 import Base:
     sort,
@@ -63,7 +62,7 @@ function issorted(itr, order::Ordering)
 end
 
 """
-    issorted(v, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    issorted(itr, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Test whether a collection is in sorted order. The keywords modify what
 order is considered sorted, as described in the [`sort!`](@ref) documentation.
@@ -86,12 +85,27 @@ julia> issorted([1, 2, -2, 3], by=abs)
 true
 ```
 """
-issorted(itr;
-    lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) =
-    issorted(itr, ord(lt,by,rev,order))
+function issorted(itr;
+        lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward)
+    # Explicit branching because the compiler can't optimize away the
+    # type instability of the `ord` call with Bool `rev` parameter.
+    if rev === true
+        issorted(itr, ord(lt, by, true, order))
+    else
+        issorted(itr, ord(lt, by, nothing, order))
+    end
+end
 
-function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering)
-    _sort!(v, InitialOptimizations(ScratchQuickSort(k)), o, (;))
+function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering; scratch::Union{Nothing, Vector} = nothing)
+    # TODO move k from `alg` to `kw`
+    # Don't perform InitialOptimizations before Bracketing. The optimizations take O(n)
+    # time and so does the whole sort. But do perform them before recursive calls because
+    # that can cause significant speedups when the target range is large so the runtime is
+    # dominated by k log k and the optimizations runs in O(k) time.
+    _sort!(v, BoolOptimization(
+        Small{12}( # Very small inputs should go straight to insertion sort
+            BracketedSort(k))),
+        o, (; scratch))
     maybeview(v, k)
 end
 
@@ -99,9 +113,9 @@ maybeview(v, k) = view(v, k)
 maybeview(v, k::Integer) = v[k]
 
 """
-    partialsort!(v, k; by=identity, lt=isless, rev=false)
+    partialsort!(v, k; by=identity, lt=isless, rev=false, order::Base.Order.Ordering=Base.Order.Forward)
 
-Partially sort the vector `v` in place so that the value at index `k` (or
+Mutate the vector `v` so that the value at index `k` (or
 range of adjacent values if `k` is a range) occurs
 at the position where it would appear if the array were fully sorted. If `k` is a single
 index, that value is returned; if `k` is a range, an array of values at those indices is
@@ -152,11 +166,11 @@ julia> a
 ```
 """
 partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange};
-             lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) =
-    partialsort!(v, k, ord(lt,by,rev,order))
+             lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, kws...) =
+    partialsort!(v, k, ord(lt,by,rev,order); kws...)
 
 """
-    partialsort(v, k, by=identity, lt=isless, rev=false)
+    partialsort(v, k, by=identity, lt=isless, rev=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Variant of [`partialsort!`](@ref) that copies `v` before partially sorting it, thereby returning the
 same thing as `partialsort!` but leaving `v` unmodified.
@@ -165,17 +179,17 @@ partialsort(v::AbstractVector, k::Union{Integer,OrdinalRange}; kws...) =
     partialsort!(copymutable(v), k; kws...)
 
 # reference on sorted binary search:
-#   http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
+#   https://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
 
 # index of the first value of vector a that is greater than or equivalent to x;
 # returns lastindex(v)+1 if x is greater than all values in v.
 function searchsortedfirst(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer
     hi = hi + T(1)
     len = hi - lo
-    @inbounds while len != 0
+    while len != 0
         half_len = len >>> 0x01
         m = lo + half_len
-        if lt(o, v[m], x)
+        if lt(o, @inbounds(v[m]), x)
             lo = m + 1
             len -= half_len + 1
         else
@@ -192,9 +206,9 @@ function searchsortedlast(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keyt
     u = T(1)
     lo = lo - u
     hi = hi + u
-    @inbounds while lo < hi - u
+    while lo != hi - u
         m = midpoint(lo, hi)
-        if lt(o, x, v[m])
+        if lt(o, x, @inbounds(v[m]))
             hi = m
         else
             lo = m
@@ -210,22 +224,25 @@ function searchsorted(v::AbstractVector, x, ilo::T, ihi::T, o::Ordering)::UnitRa
     u = T(1)
     lo = ilo - u
     hi = ihi + u
-    @inbounds while lo < hi - u
+    while lo != hi - u
         m = midpoint(lo, hi)
-        if lt(o, v[m], x)
+        if lt(o, @inbounds(v[m]), x)
             lo = m
-        elseif lt(o, x, v[m])
+        elseif lt(o, x, @inbounds(v[m]))
             hi = m
         else
-            a = searchsortedfirst(v, x, max(lo,ilo), m, o)
-            b = searchsortedlast(v, x, m, min(hi,ihi), o)
+            a = searchsortedfirst(v, x, lo+u, m, o)
+            b = searchsortedlast(v, x, m, hi-u, o)
             return a : b
         end
     end
     return (lo + 1) : (hi - 1)
 end
 
-function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
+
+const FastRangeOrderings = Union{DirectOrdering,Lt{typeof(<)},ReverseOrdering{Lt{typeof(<)}}}
+
+function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if lt(o, x, f)
@@ -238,7 +255,7 @@ function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering):
     end
 end
 
-function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
+function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if !lt(o, f, x)
@@ -251,7 +268,7 @@ function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)
     end
 end
 
-function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
+function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if lt(o, x, f)
@@ -259,7 +276,7 @@ function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderin
     elseif h == 0 || !lt(o, x, l)
         length(a)
     else
-        if o isa ForwardOrdering
+        if !(o isa ReverseOrdering)
             fld(floor(Integer, x) - f, h) + 1
         else
             fld(ceil(Integer, x) - f, h) + 1
@@ -267,7 +284,7 @@ function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderin
     end
 end
 
-function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
+function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if !lt(o, f, x)
@@ -275,7 +292,7 @@ function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderi
     elseif h == 0 || lt(o, l, x)
         length(a) + 1
     else
-        if o isa ForwardOrdering
+        if !(o isa ReverseOrdering)
             cld(ceil(Integer, x) - f, h) + 1
         else
             cld(floor(Integer, x) - f, h) + 1
@@ -283,7 +300,7 @@ function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderi
     end
 end
 
-searchsorted(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering) =
+searchsorted(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings) =
     searchsortedfirst(a, x, o) : searchsortedlast(a, x, o)
 
 for s in [:searchsortedfirst, :searchsortedlast, :searchsorted]
@@ -319,13 +336,13 @@ julia> searchsorted([1, 2, 4, 5, 5, 7], 5) # multiple matches
 4:5
 
 julia> searchsorted([1, 2, 4, 5, 5, 7], 3) # no match, insert in the middle
-3:2
+3:2 (empty range)
 
 julia> searchsorted([1, 2, 4, 5, 5, 7], 9) # no match, insert at end
-7:6
+7:6 (empty range)
 
 julia> searchsorted([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
-1:0
+1:0 (empty range)
 
 julia> searchsorted([1=>"one", 2=>"two", 2=>"two", 4=>"four"], 2=>"two", by=first) # compare the keys of the pairs
 2:3
@@ -335,14 +352,14 @@ julia> searchsorted([1=>"one", 2=>"two", 2=>"two", 4=>"four"], 2=>"two", by=firs
 """
     searchsortedfirst(v, x; by=identity, lt=isless, rev=false)
 
-Return the index of the first value in `v` greater than or equivalent to `x`.
-If `x` is greater than all values in `v`, return `lastindex(v) + 1`.
+Return the index of the first value in `v` that is not ordered before `x`.
+If all values in `v` are ordered before `x`, return `lastindex(v) + 1`.
 
 The vector `v` must be sorted according to the order defined by the keywords.
-`insert!`ing `x` at the returned index will maintain the sorted order. Refer to
-[`sort!`](@ref) for the meaning of the keywords and the definition of
-"greater than" and equivalence. Note that the `by` function is applied to the
-searched value `x` as well as the values in `v`.
+`insert!`ing `x` at the returned index will maintain the sorted order.
+Refer to [`sort!`](@ref) for the meaning and use of the keywords.
+Note that the `by` function is applied to the searched value `x` as well as the
+values in `v`.
 
 The index is generally found using binary search, but there are optimized
 implementations for some inputs.
@@ -374,13 +391,14 @@ julia> searchsortedfirst([1=>"one", 2=>"two", 4=>"four"], 3=>"three", by=first)
 """
     searchsortedlast(v, x; by=identity, lt=isless, rev=false)
 
-Return the index of the last value in `v` less than or equivalent to `x`.
-If `x` is less than all values in `v` the function returns `firstindex(v) - 1`.
+Return the index of the last value in `v` that is not ordered after `x`.
+If all values in `v` are ordered after `x`, return `firstindex(v) - 1`.
 
 The vector `v` must be sorted according to the order defined by the keywords.
-Refer to [`sort!`](@ref) for the meaning of the keywords and the definition of
-"less than" and equivalence. Note that the `by` function is applied to the
-searched value `x` as well as the values in `v`.
+`insert!`ing `x` immediately after the returned index will maintain the sorted order.
+Refer to [`sort!`](@ref) for the meaning and use of the keywords.
+Note that the `by` function is applied to the searched value `x` as well as the
+values in `v`.
 
 The index is generally found using binary search, but there are optimized
 implementations for some inputs
@@ -408,7 +426,7 @@ julia> searchsortedlast([1=>"one", 2=>"two", 4=>"four"], 3=>"three", by=first) #
 """ searchsortedlast
 
 """
-    insorted(x, v; by=identity, lt=isless, rev=false) -> Bool
+    insorted(x, v; by=identity, lt=isless, rev=false)::Bool
 
 Determine whether a vector `v` contains any value equivalent to `x`.
 The vector `v` must be sorted according to the order defined by the keywords.
@@ -478,7 +496,7 @@ end
 """
     make_scratch(scratch::Union{Nothing, Vector}, T::Type, len::Integer)
 
-Returns `(s, t)` where `t` is an `AbstractVector` of type `T` with length at least `len`
+Return `(s, t)` where `t` is an `AbstractVector` of type `T` with length at least `len`
 that is backed by the `Vector` `s`. If `scratch !== nothing`, then `s === scratch`.
 
 This function will allocate a new vector if `scratch === nothing`, `resize!` `scratch` if it
@@ -503,7 +521,7 @@ end
 ## sorting algorithm components ##
 
 """
-    _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw; t, offset)
+    _sort!(v::AbstractVector, a::Base.Sort.Algorithm, o::Base.Order.Ordering, kw; t, offset)
 
 An internal function that sorts `v` using the algorithm `a` under the ordering `o`,
 subject to specifications provided in `kw` (such as `lo` and `hi` in which case it only
@@ -517,7 +535,7 @@ no scratch space is present.
 
 A returned scratch space will be a `Vector{T}` where `T` is usually the eltype of `v`. There
 are some exceptions, for example if `eltype(v) == Union{Missing, T}` then the scratch space
-may be be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`.
+may be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`.
 
 `t` is an appropriate scratch space for the algorithm at hand, to be accessed as
 `t[i + offset]`. `t` is used for an algorithm to pass a scratch space back to itself in
@@ -525,9 +543,40 @@ internal or recursive calls.
 """
 function _sort! end
 
+# TODO: delete this optimization when views have no overhead.
+const UnwrappableSubArray = SubArray{T, 1, <:AbstractArray{T}, <:Tuple{AbstractUnitRange, Vararg{Number}}, true} where T
+"""
+    SubArrayOptimization(next) isa Base.Sort.Algorithm
+
+Unwrap certain known SubArrays because views have a performance overhead 😢
+
+Specifically, unwraps some instances of the type
 
+    $UnwrappableSubArray
 """
-    MissingOptimization(next) <: Algorithm
+struct SubArrayOptimization{T <: Algorithm} <: Algorithm
+    next::T
+end
+
+_sort!(v::AbstractVector, a::SubArrayOptimization, o::Ordering, kw) = _sort!(v, a.next, o, kw)
+function _sort!(v::UnwrappableSubArray, a::SubArrayOptimization, o::Ordering, kw)
+    @getkw lo hi
+    # @assert v.stride1 == 1
+    parent = v.parent
+    if parent isa Array && !(parent isa Vector) && hi - lo < 100 || !iszero(v.offset1)
+        # vec(::Array{T, ≠1}) allocates and is therefore somewhat expensive.
+        # We don't want that for small inputs.
+
+        # Additionally, if offset1 is non-zero, then this optimization is incompatible with
+        # algorithms that track absolute first and last indices (e.g. ScratchQuickSort)
+        _sort!(v, a.next, o, kw)
+    else
+        _sort!(vec(parent), a.next, o, kw)
+    end
+end
+
+"""
+    MissingOptimization(next) isa Base.Sort.Algorithm
 
 Filter out missing values.
 
@@ -586,7 +635,7 @@ function send_to_end!(f::F, v::AbstractVector; lo=firstindex(v), hi=lastindex(v)
     i - 1
 end
 """
-    send_to_end!(f::Function, v::AbstractVector, o::DirectOrdering[, end_stable]; lo, hi)
+    send_to_end!(f::Function, v::AbstractVector, o::Base.Order.DirectOrdering[, end_stable]; lo, hi)
 
 Return `(a, b)` where `v[a:b]` are the elements that are not sent to the end.
 
@@ -640,7 +689,7 @@ end
 
 
 """
-    IEEEFloatOptimization(next) <: Algorithm
+    IEEEFloatOptimization(next) isa Base.Sort.Algorithm
 
 Move NaN values to the end, partition by sign, and reinterpret the rest as unsigned integers.
 
@@ -685,7 +734,7 @@ end
 
 
 """
-    BoolOptimization(next) <: Algorithm
+    BoolOptimization(next) isa Base.Sort.Algorithm
 
 Sort `AbstractVector{Bool}`s using a specialized version of counting sort.
 
@@ -712,9 +761,9 @@ end
 
 
 """
-    IsUIntMappable(yes, no) <: Algorithm
+    IsUIntMappable(yes, no) isa Base.Sort.Algorithm
 
-Determines if the elements of a vector can be mapped to unsigned integers while preserving
+Determine if the elements of a vector can be mapped to unsigned integers while preserving
 their order under the specified ordering.
 
 If they can be, dispatch to the `yes` algorithm and record the unsigned integer type that
@@ -734,7 +783,7 @@ end
 
 
 """
-    Small{N}(small=SMALL_ALGORITHM, big) <: Algorithm
+    Small{N}(small=SMALL_ALGORITHM, big) isa Base.Sort.Algorithm
 
 Sort inputs with `length(lo:hi) <= N` using the `small` algorithm. Otherwise use the `big`
 algorithm.
@@ -773,6 +822,16 @@ Characteristics:
 it is well-suited to small collections but should not be used for large ones.
 """
 const InsertionSort = InsertionSortAlg()
+
+"""
+    SMALL_ALGORITHM
+
+Default sorting algorithm for small arrays.
+
+This is an alias for a simple low-overhead algorithm that does not scale well
+to large arrays, unlike high-overhead recursive algorithms used for larger arrays.
+`SMALL_ALGORITHM` is a good choice for the base case of a recursive algorithm.
+"""
 const SMALL_ALGORITHM = InsertionSortAlg()
 
 function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw)
@@ -796,7 +855,7 @@ end
 
 
 """
-    CheckSorted(next) <: Algorithm
+    CheckSorted(next) isa Base.Sort.Algorithm
 
 Check if the input is already sorted and for large inputs, also check if it is
 reverse-sorted. The reverse-sorted check is unstable.
@@ -823,7 +882,7 @@ end
 
 
 """
-    ComputeExtrema(next) <: Algorithm
+    ComputeExtrema(next) isa Base.Sort.Algorithm
 
 Compute the extrema of the input under the provided order.
 
@@ -849,7 +908,7 @@ end
 
 
 """
-    ConsiderCountingSort(counting=CountingSort(), next) <: Algorithm
+    ConsiderCountingSort(counting=CountingSort(), next) isa Base.Sort.Algorithm
 
 If the input's range is small enough, use the `counting` algorithm. Otherwise, dispatch to
 the `next` algorithm.
@@ -877,7 +936,7 @@ _sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering, kw) = _sort!(v,
 
 
 """
-    CountingSort <: Algorithm
+    CountingSort() isa Base.Sort.Algorithm
 
 Use the counting sort algorithm.
 
@@ -913,7 +972,7 @@ end
 
 
 """
-    ConsiderRadixSort(radix=RadixSort(), next) <: Algorithm
+    ConsiderRadixSort(radix=RadixSort(), next) isa Base.Sort.Algorithm
 
 If the number of bits in the input's range is small enough and the input supports efficient
 bitshifts, use the `radix` algorithm. Otherwise, dispatch to the `next` algorithm.
@@ -936,7 +995,7 @@ end
 
 
 """
-    RadixSort <: Algorithm
+    RadixSort() isa Base.Sort.Algorithm
 
 Use the radix sort algorithm.
 
@@ -991,8 +1050,8 @@ end
 
 
 """
-    ScratchQuickSort(next::Algorithm=SMALL_ALGORITHM) <: Algorithm
-    ScratchQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Algorithm=SMALL_ALGORITHM) <: Algorithm
+    ScratchQuickSort(next::Base.Sort.Algorithm=Base.Sort.SMALL_ALGORITHM) isa Base.Sort.Algorithm
+    ScratchQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Base.Sort.Algorithm=Base.Sort.SMALL_ALGORITHM) isa Base.Sort.Algorithm
 
 Use the `ScratchQuickSort` algorithm with the `next` algorithm as a base case.
 
@@ -1032,7 +1091,7 @@ function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer
         v::AbstractVector, rev::Bool, pivot_dest::AbstractVector, pivot_index_offset::Integer)
     # Ideally we would use `pivot_index = rand(lo:hi)`, but that requires Random.jl
     # and would mutate the global RNG in sorting.
-    pivot_index = typeof(hi-lo)(hash(lo) % (hi-lo+1)) + lo
+    pivot_index = mod(hash(lo), lo:hi)
     @inbounds begin
         pivot = v[pivot_index]
         while lo < pivot_index
@@ -1108,7 +1167,196 @@ end
 
 
 """
-    StableCheckSorted(next) <: Algorithm
+    BracketedSort(target[, next::Algorithm]) isa Base.Sort.Algorithm
+
+Perform a partialsort for the elements that fall into the indices specified by the `target`
+using BracketedSort with the `next` algorithm for subproblems.
+
+BracketedSort takes a random* sample of the input, estimates the quantiles of the input
+using the quantiles of the sample to find signposts that almost certainly bracket the target
+values, filters the value in the input that fall between the signpost values to the front of
+the input, and then, if that "almost certainly" turned out to be true, finds the target
+within the small chunk that are, by value, between the signposts and now by position, at the
+front of the vector. On small inputs or when target is close to the size of the input,
+BracketedSort falls back to the `next` algorithm directly. Otherwise, BracketedSort uses the
+`next` algorithm only to compute quantiles of the sample and to find the target within the
+small chunk.
+
+## Performance
+
+If the `next` algorithm has `O(n * log(n))` runtime and the input is not pathological then
+the runtime of this algorithm is `O(n + k * log(k))` where `n` is the length of the input
+and `k` is `length(target)`. On pathological inputs the asymptotic runtime is the same as
+the runtime of the `next` algorithm.
+
+BracketedSort itself does not allocate. If `next` is in-place then BracketedSort is also
+in-place. If `next` is not in place, and it's space usage increases monotonically with input
+length then BracketedSort's maximum space usage will never be more than the space usage
+of `next` on the input BracketedSort receives. For large nonpathological inputs and targets
+substantially smaller than the size of the input, BracketedSort's maximum memory usage will
+be much less than `next`'s. If the maximum additional space usage of `next` scales linearly
+then for small k the average* maximum additional space usage of BracketedSort will be
+`O(n^(2.3/3))`.
+
+By default, BracketedSort uses the `O(n)` space and `O(n + k log k)` runtime
+`ScratchQuickSort` algorithm recursively.
+
+*Sorting is unable to depend on Random.jl because Random.jl depends on sorting.
+ Consequently, we use `hash` as a source of randomness. The average runtime guarantees
+ assume that `hash(x::Int)` produces a random result. However, as this randomization is
+ deterministic, if you try hard enough you can find inputs that consistently reach the
+ worst case bounds. Actually constructing such inputs is an exercise left to the reader.
+ Have fun :).
+
+Characteristics:
+  * *unstable*: does not preserve the ordering of elements that compare equal
+    (e.g. "a" and "A" in a sort of letters that ignores case).
+  * *in-place* in memory if the `next` algorithm is in-place.
+  * *estimate-and-filter*: strategy
+  * *linear runtime* if `length(target)` is constant and `next` is reasonable
+  * *n + k log k* worst case runtime if `next` has that runtime.
+  * *pathological inputs* can significantly increase constant factors.
+"""
+struct BracketedSort{T, F} <: Algorithm
+    target::T
+    get_next::F
+end
+
+# TODO: this composition between BracketedSort and ScratchQuickSort does not bring me joy
+BracketedSort(k) = BracketedSort(k, k -> InitialOptimizations(ScratchQuickSort(k)))
+
+function bracket_kernel!(v::AbstractVector, lo, hi, lo_signpost, hi_signpost, o)
+    i = 0
+    count_below = 0
+    checkbounds(v, lo:hi)
+    for j in lo:hi
+        x = @inbounds v[j]
+        a = lo_signpost !== nothing && lt(o, x, lo_signpost)
+        b = hi_signpost === nothing || !lt(o, hi_signpost, x)
+        count_below += a
+        # if a != b # This branch is almost never taken, so making it branchless is bad.
+        #     @inbounds v[i], v[j] = v[j], v[i]
+        #     i += 1
+        # end
+        c = a != b # JK, this is faster.
+        k = i * c + j
+        # Invariant: @assert firstindex(v) ≤ lo ≤ i + j ≤ k ≤ j ≤ hi ≤ lastindex(v)
+        @inbounds v[j], v[k] = v[k], v[j]
+        i += c - 1
+    end
+    count_below, i+hi
+end
+
+function move!(v, target, source)
+    # This function never dominates runtime—only add `@inbounds` if you can demonstrate a
+    # performance improvement. And if you do, also double check behavior when `target`
+    # is out of bounds.
+    @assert length(target) == length(source)
+    if length(target) == 1 || isdisjoint(target, source)
+        for (i, j) in zip(target, source)
+            v[i], v[j] = v[j], v[i]
+        end
+    else
+        @assert minimum(source) <= minimum(target)
+        reverse!(v, minimum(source), maximum(target))
+        reverse!(v, minimum(target), maximum(target))
+    end
+end
+
+function _sort!(v::AbstractVector, a::BracketedSort, o::Ordering, kw)
+    @getkw lo hi scratch
+    # TODO for further optimization: reuse scratch between trials better, from signpost
+    # selection to recursive calls, and from the fallback (but be aware of type stability,
+    # especially when sorting IEEE floats.
+
+    # We don't need to bounds check target because that is done higher up in the stack
+    # However, we cannot assume the target is inbounds.
+    lo < hi || return scratch
+    ln = hi - lo + 1
+
+    # This is simply a precomputed short-circuit to avoid doing scalar math for small inputs.
+    # It does not change dispatch at all.
+    ln < 260 && return _sort!(v, a.get_next(a.target), o, kw)
+
+    target = a.target
+    k = cbrt(ln)
+    k2 = round(Int, k^2)
+    k2ln = k2/ln
+    offset = .15k*top_set_bit(k2) # TODO for further optimization: tune this
+    lo_signpost_i, hi_signpost_i =
+        (floor(Int, (tar - lo) * k2ln + lo + off) for (tar, off) in
+            ((minimum(target), -offset), (maximum(target), offset)))
+    lastindex_sample = lo+k2-1
+    expected_middle_ln = (min(lastindex_sample, hi_signpost_i) - max(lo, lo_signpost_i) + 1) / k2ln
+    # This heuristic is complicated because it fairly accurately reflects the runtime of
+    # this algorithm which is necessary to get good dispatch when both the target is large
+    # and the input are large.
+    # expected_middle_ln is a float and k2 is significantly below typemax(Int), so this will
+    # not overflow:
+    # TODO move target from alg to kw to avoid this ickyness:
+    ln <= 130 + 2k2 + 2expected_middle_ln && return _sort!(v, a.get_next(a.target), o, kw)
+
+    # We store the random sample in
+    #     sample = view(v, lo:lo+k2)
+    # but views are not quite as fast as using the input array directly,
+    # so we don't actually construct this view at runtime.
+
+    # TODO for further optimization: handle lots of duplicates better.
+    # Right now lots of duplicates rounds up when it could use some super fast optimizations
+    # in some cases.
+    # e.g.
+    #
+    # Target:                      |----|
+    # Sorted input: 000000000000000000011111112222223333333333
+    #
+    # Will filter all zeros and ones to the front when it could just take the first few
+    # it encounters. This optimization would be especially potent when `allequal(ans)` and
+    # equal elements are egal.
+
+    # 3 random trials should typically give us 0.99999 reliability; we can assume
+    # the input is pathological and abort to fallback if we fail three trials.
+    seed = hash(ln, Int === Int64 ? 0x85eb830e0216012d : 0xae6c4e15)
+    for attempt in 1:3
+        seed = hash(attempt, seed)
+        for i in lo:lo+k2-1
+            j = mod(hash(i, seed), i:hi) # TODO for further optimization: be sneaky and remove this division
+            v[i], v[j] = v[j], v[i]
+        end
+        count_below, lastindex_middle = if lo_signpost_i <= lo && lastindex_sample <= hi_signpost_i
+            # The heuristics higher up in this function that dispatch to the `next`
+            # algorithm should prevent this from happening.
+            # Specifically, this means that expected_middle_ln == ln, so
+            # ln <= ... + 2.0expected_middle_ln && return ...
+            # will trigger.
+            @assert false
+            # But if it does happen, the kernel reduces to
+            0, hi
+        elseif lo_signpost_i <= lo
+            _sort!(v, a.get_next(hi_signpost_i), o, (;kw..., hi=lastindex_sample))
+            bracket_kernel!(v, lo, hi, nothing, v[hi_signpost_i], o)
+        elseif lastindex_sample <= hi_signpost_i
+            _sort!(v, a.get_next(lo_signpost_i), o, (;kw..., hi=lastindex_sample))
+            bracket_kernel!(v, lo, hi, v[lo_signpost_i], nothing, o)
+        else
+            # TODO for further optimization: don't sort the middle elements
+            _sort!(v, a.get_next(lo_signpost_i:hi_signpost_i), o, (;kw..., hi=lastindex_sample))
+            bracket_kernel!(v, lo, hi, v[lo_signpost_i], v[hi_signpost_i], o)
+        end
+        target_in_middle = target .- count_below
+        if lo <= minimum(target_in_middle) && maximum(target_in_middle) <= lastindex_middle
+            scratch = _sort!(v, a.get_next(target_in_middle), o, (;kw..., hi=lastindex_middle))
+            move!(v, target, target_in_middle)
+            return scratch
+        end
+        # This line almost never runs.
+    end
+    # This line only runs on pathological inputs. Make sure it's covered by tests :)
+    _sort!(v, a.get_next(target), o, kw)
+end
+
+
+"""
+    StableCheckSorted(next) isa Base.Sort.Algorithm
 
 Check if an input is sorted and/or reverse-sorted.
 
@@ -1208,7 +1456,7 @@ end
 ## default sorting policy ##
 
 """
-    InitialOptimizations(next) <: Algorithm
+    InitialOptimizations(next) isa Base.Sort.Algorithm
 
 Attempt to apply a suite of low-cost optimizations to the input vector before sorting. These
 optimizations may be automatically applied by the `sort!` family of functions when
@@ -1220,29 +1468,26 @@ future versions of Julia.
 If `next` is stable, then `InitialOptimizations(next)` is also stable.
 
 The specific optimizations attempted by `InitialOptimizations` are
-[`MissingOptimization`](@ref), [`BoolOptimization`](@ref), dispatch to
-[`InsertionSort`](@ref) for inputs with `length <= 10`, and [`IEEEFloatOptimization`](@ref).
-"""
-InitialOptimizations(next) = MissingOptimization(
-    BoolOptimization(
-        Small{10}(
-            IEEEFloatOptimization(
-                next))))
+[`SubArrayOptimization`](@ref), [`MissingOptimization`](@ref), [`BoolOptimization`](@ref),
+dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`, and
+[`IEEEFloatOptimization`](@ref).
 """
-    DEFAULT_STABLE
-
-The default sorting algorithm.
-
-This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare
-equal). It makes an effort to be fast for most inputs.
+InitialOptimizations(next) = SubArrayOptimization(
+    MissingOptimization(
+        BoolOptimization(
+            Small{10}(
+                IEEEFloatOptimization(
+                    next)))))
 
-The algorithms used by `DEFAULT_STABLE` are an implementation detail. See extended help
-for the current dispatch system.
+"""
+    struct DefaultStable <: Algorithm end
 
-# Extended Help
+`DefaultStable` is an algorithm which indicates that a fast, general purpose sorting
+algorithm should be used, but does not specify exactly which algorithm.
 
-`DEFAULT_STABLE` is composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid
-of Radix, Insertion, Counting, Quick sorts.
+Currently, when sorting short NTuples, this is an unrolled mergesort, and otherwise it is
+composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid of Radix, Insertion,
+Counting, Quick sorts.
 
 We begin with MissingOptimization because it has no runtime cost when it is not
 triggered and can enable other optimizations to be applied later. For example,
@@ -1295,14 +1540,46 @@ Next, we [`ConsiderCountingSort`](@ref). If the range the input is small compare
 length, we apply [`CountingSort`](@ref).
 
 Next, we [`ConsiderRadixSort`](@ref). This is similar to the dispatch to counting sort,
-but we conside rthe number of _bits_ in the range, rather than the range itself.
+but we consider the number of _bits_ in the range, rather than the range itself.
 Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that reach this
 stage.
 
 Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and
 otherwise we dispatch to [`ScratchQuickSort`](@ref).
 """
-const DEFAULT_STABLE = InitialOptimizations(
+struct DefaultStable <: Algorithm end
+
+"""
+    DEFAULT_STABLE
+
+The default sorting algorithm.
+
+This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare
+equal). It makes an effort to be fast for most inputs.
+
+The algorithms used by `DEFAULT_STABLE` are an implementation detail. See the docstring
+of `Base.Sort.DefaultStable` for the current dispatch system.
+"""
+const DEFAULT_STABLE = DefaultStable()
+
+"""
+    DefaultUnstable <: Algorithm
+
+Like [`DefaultStable`](@ref), but does not guarantee stability.
+"""
+struct DefaultUnstable <: Algorithm end
+
+"""
+    DEFAULT_UNSTABLE
+
+An efficient sorting algorithm which may or may not be stable.
+
+The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently
+the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future.
+"""
+const DEFAULT_UNSTABLE = DefaultUnstable()
+
+const _DEFAULT_ALGORITHMS_FOR_VECTORS = InitialOptimizations(
     IsUIntMappable(
         Small{40}(
             CheckSorted(
@@ -1313,15 +1590,10 @@ const DEFAULT_STABLE = InitialOptimizations(
                                 ScratchQuickSort())))))),
         StableCheckSorted(
             ScratchQuickSort())))
-"""
-    DEFAULT_UNSTABLE
 
-An efficient sorting algorithm.
+_sort!(v::AbstractVector, ::Union{DefaultStable, DefaultUnstable}, o::Ordering, kw) =
+    _sort!(v, _DEFAULT_ALGORITHMS_FOR_VECTORS, o, kw)
 
-The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently
-the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future.
-"""
-const DEFAULT_UNSTABLE = DEFAULT_STABLE
 const SMALL_THRESHOLD  = 20
 
 function Base.show(io::IO, alg::Algorithm)
@@ -1351,19 +1623,21 @@ defalg(v::AbstractArray) = DEFAULT_STABLE
 defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE
 defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation
 defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation
+defalg(v) = DEFAULT_STABLE
 
 """
-    sort!(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort!(v; alg::Base.Sort.Algorithm=Base.Sort.defalg(v), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
-Sort the vector `v` in place. A stable algorithm is used by default: the
-ordering of elements that compare equal is preserved. A specific algorithm can
-be selected via the `alg` keyword (see [Sorting Algorithms](@ref) for available
-algorithms).
+Mutate the vector `v` so that it is sorted.
+
+A stable algorithm is used by default: the ordering of elements that
+compare equal is preserved. A specific algorithm can be selected via the
+`alg` keyword (see [Sorting Algorithms](@ref) for available algorithms).
 
 Elements are first transformed with the function `by` and then compared
 according to either the function `lt` or the ordering `order`. Finally, the
 resulting order is reversed if `rev=true` (this preserves forward stability:
-elements that compare equal are not reversed). The current implemention applies
+elements that compare equal are not reversed). The current implementation applies
 the `by` transformation before each comparison rather than once per element.
 
 Passing an `lt` other than `isless` along with an `order` other than
@@ -1430,13 +1704,16 @@ julia> v = [(1, "c"), (3, "a"), (2, "b")]; sort!(v, by = x -> x[2]); v
  (2, "b")
  (1, "c")
 
-julia> sort(0:3, by=x->x-2, order=Base.Order.By(abs)) # same as sort(0:3, by=abs(x->x-2))
+julia> sort(0:3, by=x->x-2, order=Base.Order.By(abs))
 4-element Vector{Int64}:
  2
  1
  3
  0
 
+julia> sort(0:3, by=x->x-2, order=Base.Order.By(abs)) == sort(0:3, by=x->abs(x-2))
+true
+
 julia> sort([2, NaN, 1, NaN, 3]) # correct sort with default lt=isless
 5-element Vector{Float64}:
    1.0
@@ -1466,14 +1743,19 @@ function sort!(v::AbstractVector{T};
 end
 
 """
-    sort(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort(v; alg::Base.Sort.Algorithm=Base.Sort.defalg(v), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
+    sort(v::NTuple; kws...)::NTuple
 
 Variant of [`sort!`](@ref) that returns a sorted copy of `v` leaving `v` itself unmodified.
 
-Uses `Base.copymutable` to support immutable collections and iterables.
+When calling `sort` on the [`keys`](@ref) or [`values](@ref) of a dictionary, `v` is
+collected and then sorted.
+
+!!! compat "Julia 1.12"
+    Sorting `NTuple`s requires Julia 1.12 or later.
 
-!!! compat "Julia 1.10"
-    `sort` of arbitrary iterables requires at least Julia 1.10.
+!!! compat "Julia 1.13"
+    Sorting keys sets and values iterators requires Julia 1.13 or later.
 
 # Examples
 ```jldoctest
@@ -1490,34 +1772,46 @@ julia> v
  3
  1
  2
+
+julia> sort(values(Dict('a'=>2, 'b'=>1)))
+2-element Vector{Int64}:
+ 1
+ 2
 ```
 """
-function sort(v; kws...)
-    size = IteratorSize(v)
-    size == HasShape{0}() && throw(ArgumentError("$v cannot be sorted"))
-    size == IsInfinite() && throw(ArgumentError("infinite iterator $v cannot be sorted"))
-    sort!(copymutable(v); kws...)
-end
-sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...) # for method disambiguation
-sort(::AbstractString; kws...) =
-    throw(ArgumentError("sort(::AbstractString) is not supported"))
-sort(::Tuple; kws...) =
-    throw(ArgumentError("sort(::Tuple) is only supported for NTuples"))
-
-function sort(x::NTuple{N}; lt::Function=isless, by::Function=identity,
-              rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) where N
-    o = ord(lt,by,rev,order)
-    if N > 9
-        v = sort!(copymutable(x), DEFAULT_STABLE, o)
-        tuple((v[i] for i in 1:N)...)
+sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...)
+
+const COLLECT_ON_SORT_TYPES = Union{Base.KeySet, Base.ValueIterator}
+sort(v::COLLECT_ON_SORT_TYPES; kws...) = sort!(collect(v); kws...)
+
+function sort(x::NTuple;
+              alg::Algorithm=defalg(x),
+              lt=isless,
+              by=identity,
+              rev::Union{Bool,Nothing}=nothing,
+              order::Ordering=Forward,
+              scratch::Union{Vector, Nothing}=nothing)
+    # Can't do this check with type parameters because of https://github.com/JuliaLang/julia/issues/56698
+    scratch === nothing || eltype(x) == eltype(scratch) || throw(ArgumentError("scratch has the wrong eltype"))
+    _sort(x, alg, ord(lt,by,rev,order), (;scratch))::typeof(x)
+end
+# Folks who want to hack internals can define a new _sort(x::NTuple, ::TheirAlg, o::Ordering)
+# or _sort(x::NTuple{N, TheirType}, ::DefaultStable, o::Ordering) where N
+function _sort(x::NTuple, a::Union{DefaultStable, DefaultUnstable}, o::Ordering, kw)
+    # The unrolled tuple sort is prohibitively slow to compile for length > 9.
+    # See https://github.com/JuliaLang/julia/pull/46104#issuecomment-1435688502 for benchmarks
+    if length(x) > 9
+        v = copymutable(x)
+        _sort!(v, a, o, kw)
+        typeof(x)(v)
     else
-        _sort(x, o)
+        _mergesort(x, o)
     end
 end
-_sort(x::Union{NTuple{0}, NTuple{1}}, o::Ordering) = x
-function _sort(x::NTuple, o::Ordering)
+_mergesort(x::Union{NTuple{0}, NTuple{1}}, o::Ordering) = x
+function _mergesort(x::NTuple, o::Ordering)
     a, b = Base.IteratorsMD.split(x, Val(length(x)>>1))
-    merge(_sort(a, o), _sort(b, o), o)
+    merge(_mergesort(a, o), _mergesort(b, o), o)
 end
 merge(x::NTuple, y::NTuple{0}, o::Ordering) = x
 merge(x::NTuple{0}, y::NTuple, o::Ordering) = y
@@ -1525,11 +1819,10 @@ merge(x::NTuple{0}, y::NTuple{0}, o::Ordering) = x # Method ambiguity
 merge(x::NTuple, y::NTuple, o::Ordering) =
     (lt(o, y[1], x[1]) ? (y[1], merge(x, tail(y), o)...) : (x[1], merge(tail(x), y, o)...))
 
-
 ## partialsortperm: the permutation to sort the first k elements of an array ##
 
 """
-    partialsortperm(v, k; by=ientity, lt=isless, rev=false)
+    partialsortperm(v, k; by=identity, lt=isless, rev=false)
 
 Return a partial permutation `I` of the vector `v`, so that `v[I]` returns values of a fully
 sorted version of `v` at index `k`. If `k` is a range, a vector of indices is returned; if
@@ -1583,6 +1876,8 @@ v[ix[k]] == partialsort(v, k)
 The return value is the `k`th element of `ix` if `k` is an integer, or view into `ix` if `k` is
 a range.
 
+$(Base._DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> v = [3, 1, 2, 1];
@@ -1624,7 +1919,7 @@ end
 ## sortperm: the permutation to sort an array ##
 
 """
-    sortperm(A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, [dims::Integer])
+    sortperm(A; alg::Base.Sort.Algorithm=Base.Sort.DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward, [dims::Integer])
 
 Return a permutation vector or array `I` that puts `A[I]` in sorted order along the given dimension.
 If `A` has more than one dimension, then the `dims` keyword argument must be specified. The order is specified
@@ -1702,11 +1997,13 @@ end
 
 
 """
-    sortperm!(ix, A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, [dims::Integer])
+    sortperm!(ix, A; alg::Base.Sort.Algorithm=Base.Sort.DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward, [dims::Integer])
 
 Like [`sortperm`](@ref), but accepts a preallocated index vector or array `ix` with the same `axes` as `A`.
 `ix` is initialized to contain the values `LinearIndices(A)`.
 
+$(Base._DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.9"
     The method accepting `dims` requires at least Julia 1.9.
 
@@ -1788,7 +2085,7 @@ end
 ## sorting multi-dimensional arrays ##
 
 """
-    sort(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort(A; dims::Integer, alg::Base.Sort.Algorithm=Base.Sort.defalg(A), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Sort a multidimensional array `A` along the given dimension.
 See [`sort!`](@ref) for a description of possible
@@ -1860,7 +2157,7 @@ end
 
 
 """
-    sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort!(A; dims::Integer, alg::Base.Sort.Algorithm=Base.Sort.defalg(A), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Sort the multidimensional array `A` along dimension `dims`.
 See the one-dimensional version of [`sort!`](@ref) for a description of
@@ -1896,30 +2193,43 @@ function sort!(A::AbstractArray{T};
                by=identity,
                rev::Union{Bool,Nothing}=nothing,
                order::Ordering=Forward, # TODO stop eagerly over-allocating.
-               scratch::Union{Vector{T}, Nothing}=Vector{T}(undef, size(A, dims))) where T
-    __sort!(A, Val(dims), maybe_apply_initial_optimizations(alg), ord(lt, by, rev, order), scratch)
-end
-function __sort!(A::AbstractArray{T}, ::Val{K},
-                alg::Algorithm,
-                order::Ordering,
-                scratch::Union{Vector{T}, Nothing}) where {K,T}
+               scratch::Union{Vector{T}, Nothing}=size(A, dims) < 10 ? nothing : Vector{T}(undef, size(A, dims))) where T
     nd = ndims(A)
-
-    1 <= K <= nd || throw(ArgumentError("dimension out of range"))
-
-    remdims = ntuple(i -> i == K ? 1 : axes(A, i), nd)
-    for idx in CartesianIndices(remdims)
-        Av = view(A, ntuple(i -> i == K ? Colon() : idx[i], nd)...)
-        sort!(Av; alg, order, scratch)
+    1 <= dims <= nd || throw(ArgumentError("dimension out of range"))
+    alg2 = maybe_apply_initial_optimizations(alg)
+    order2 = ord(lt, by, rev, order)
+    foreach(ntuple(Val, nd)) do d
+        get_value(d) == dims || return
+        # We assume that an Integer between 1 and nd must be equal to one of the
+        # values 1:nd. If this assumption is false, then what's an integer? and
+        # also sort! will silently do nothing.
+
+        idxs = CartesianIndices(ntuple(i -> i == get_value(d) ? 1 : axes(A, i), ndims(A)))
+        get_view(idx) = view(A, ntuple(i -> i == get_value(d) ? Colon() : idx[i], ndims(A))...)
+        if d == Val(1) || size(A, get_value(d)) < 30
+            for idx in idxs
+                sort!(get_view(idx); alg=alg2, order=order2, scratch)
+            end
+        else
+            v = similar(get_view(first(idxs)))
+            for idx in idxs
+                vw = get_view(idx)
+                v .= vw
+                sort!(v; alg=alg2, order=order2, scratch)
+                vw .= v
+            end
+        end
+        A
     end
     A
 end
+get_value(::Val{x}) where x = x
 
 
 ## uint mapping to allow radix sorting primitives other than UInts ##
 
 """
-    UIntMappable(T::Type, order::Ordering)
+    UIntMappable(T::Type, order::Base.Order.Ordering)
 
 Return `typeof(uint_map(x::T, order))` if [`uint_map`](@ref) and
 [`uint_unmap`](@ref) are implemented.
@@ -1929,7 +2239,7 @@ If either is not implemented, return `nothing`.
 UIntMappable(T::Type, order::Ordering) = nothing
 
 """
-    uint_map(x, order::Ordering)::Unsigned
+    uint_map(x, order::Base.Order.Ordering)::Unsigned
 
 Map `x` to an un unsigned integer, maintaining sort order.
 
@@ -1943,7 +2253,7 @@ See also: [`UIntMappable`](@ref) [`uint_unmap`](@ref)
 function uint_map end
 
 """
-    uint_unmap(T::Type, u::Unsigned, order::Ordering)
+    uint_unmap(T::Type, u::Unsigned, order::Base.Order.Ordering)
 
 Reconstruct the unique value `x::T` that uint_maps to `u`. Satisfies
 `x === uint_unmap(T, uint_map(x::T, order), order)` for all `x <: T`.
@@ -1983,7 +2293,7 @@ UIntMappable(T::Type, order::ReverseOrdering) = UIntMappable(T, order.fwd)
 
 ### Vectors
 
-# Convert v to unsigned integers in place, maintaining sort order.
+# Convert v to unsigned integers in-place, maintaining sort order.
 function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering)
     u = reinterpret(UIntMappable(eltype(v), order), v)
     @inbounds for i in lo:hi
@@ -2013,9 +2323,9 @@ struct MergeSortAlg     <: Algorithm end
 """
     PartialQuickSort{T <: Union{Integer,OrdinalRange}}
 
-Indicate that a sorting function should use the partial quick sort
-algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest
-to largest, finding them and sorting them using [`QuickSort`](@ref).
+Indicate that a sorting function should use the partial quick sort algorithm.
+`PartialQuickSort(k)` is like `QuickSort`, but is only required to find and
+sort the elements that would end up in `v[k]` were `v` fully sorted.
 
 Characteristics:
   * *not stable*: does not preserve the ordering of elements that
@@ -2024,7 +2334,7 @@ Characteristics:
   * *in-place* in memory.
   * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
 
-  Note that `PartialQuickSort(k)` does not necessarily sort the whole array. For example,
+Note that `PartialQuickSort(k)` does not necessarily sort the whole array. For example,
 
 ```jldoctest
 julia> x = rand(100);
@@ -2251,7 +2561,7 @@ function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw)
     @getkw lo hi scratch legacy_dispatch_entry
     if legacy_dispatch_entry === a
         # This error prevents infinite recursion for unknown algorithms
-        throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o)), ::Any) is not defined"))
+        throw(ArgumentError(LazyString("Base.Sort._sort!(::", typeof(v), ", ::", typeof(a), ", ::", typeof(o), ", ::Any) is not defined")))
     else
         sort!(v, lo, hi, a, o)
         scratch
diff --git a/base/special/cbrt.jl b/base/special/cbrt.jl
index 9fda5c41fb09e..ce3a3d67e3ba4 100644
--- a/base/special/cbrt.jl
+++ b/base/special/cbrt.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Float32/Float64 based on C implementations from FDLIBM (http://www.netlib.org/fdlibm/)
+# Float32/Float64 based on C implementations from FDLIBM (https://www.netlib.org/fdlibm/)
 # and FreeBSD:
 #
 ## ====================================================
diff --git a/base/special/exp.jl b/base/special/exp.jl
index 9cca6f568305f..797e5474fe255 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -216,6 +216,7 @@ end
     small_part =  muladd(jU, expm1b_kernel(base, r), jL) + jU
 
     if !(abs(x) <= SUBNORM_EXP(base, T))
+        isnan(x) && return x
         x >= MAX_EXP(base, T) && return Inf
         x <= MIN_EXP(base, T) && return 0.0
         if k <= -53
@@ -223,7 +224,7 @@ end
             twopk = (k + UInt64(53)) << 52
             return reinterpret(T, twopk + reinterpret(UInt64, small_part))*0x1p-53
         end
-        #k == 1024 && return (small_part * 2.0) * 2.0^1023
+        #k == 1024 && return (small_part * 2.0) * 0x1p1023
     end
     twopk = Int64(k) << 52
     return reinterpret(T, twopk + reinterpret(Int64, small_part))
@@ -243,6 +244,7 @@ end
     hi, lo = Base.canonicalize2(1.0, kern)
     small_part = fma(jU, hi, muladd(jU, (lo+xlo), very_small))
     if !(abs(x) <= SUBNORM_EXP(base, T))
+        isnan(x) && return x
         x >= MAX_EXP(base, T) && return Inf
         x <= MIN_EXP(base, T) && return 0.0
         if k <= -53
@@ -250,7 +252,7 @@ end
             twopk = (k + UInt64(53)) << 52
             return reinterpret(T, twopk + reinterpret(UInt64, small_part))*0x1p-53
         end
-        #k == 1024 && return (small_part * 2.0) * 2.0^1023
+        k == 1024 && return (small_part * 2.0) * 0x1p1023
     end
     twopk = Int64(k) << 52
     return reinterpret(T, twopk + reinterpret(Int64, small_part))
@@ -393,7 +395,7 @@ exp10(x)
         reinterpret(Float64, 0x0000_0000_0000_0001 << ((x + 1074) % UInt))
     else
         # We will cast everything to Int64 to avoid errors in case of Int128
-        # If x is a Int128, and is outside the range of Int64, then it is not -1023<x<=1023
+        # If x is an Int128, and is outside the range of Int64, then it is not -1023<x<=1023
         reinterpret(Float64, (exponent_bias(Float64) + (x % Int64)) << (significand_bits(Float64) % UInt))
     end
 end
@@ -460,7 +462,7 @@ function expm1(x::Float32)
     end
     x = Float64(x)
     N_float = round(x*Ln2INV(Float64))
-    N = unsafe_trunc(UInt64, N_float)
+    N = unsafe_trunc(Int64, N_float)
     r = muladd(N_float, Ln2(Float64), x)
     hi = evalpoly(r, (1.0, .5, 0.16666667546642386, 0.041666183019487026,
                       0.008332997481506921, 0.0013966479175977883, 0.0002004037059220124))
@@ -477,7 +479,7 @@ function expm1(x::Float16)
         return Float16(x*evalpoly(x, (1f0, .5f0, 0.16666628f0, 0.04166785f0, 0.008351848f0, 0.0013675707f0)))
     end
     N_float = round(x*Ln2INV(Float32))
-    N = unsafe_trunc(UInt32, N_float)
+    N = unsafe_trunc(Int32, N_float)
     r = muladd(N_float, Ln2(Float32), x)
     hi = evalpoly(r, (1f0, .5f0, 0.16666667f0, 0.041665863f0, 0.008333111f0, 0.0013981499f0, 0.00019983904f0))
     small_part = r*hi
@@ -489,7 +491,7 @@ end
     expm1(x)
 
 Accurately compute ``e^x-1``. It avoids the loss of precision involved in the direct
-evaluation of exp(x)-1 for small values of x.
+evaluation of exp(x) - 1 for small values of x.
 # Examples
 ```jldoctest
 julia> expm1(1e-16)
diff --git a/base/special/log.jl b/base/special/log.jl
index 5d7f1c8118724..029394b7a63f1 100644
--- a/base/special/log.jl
+++ b/base/special/log.jl
@@ -155,14 +155,11 @@ logbU(::Type{Float64},::Val{10}) = 0.4342944819032518
 logbL(::Type{Float64},::Val{10}) = 1.098319650216765e-17
 
 # Procedure 1
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the access to `t_log_Float64` is really safe here
-Base.@assume_effects :consistent @inline function log_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,base=Val(:ℯ))
+@inline function log_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,base=Val(:ℯ))
     jp = unsafe_trunc(Int,128.0*F)-127
 
     ## Steps 1 and 2
-    @inbounds hi,lo = t_log_Float64[jp]
+    Base.@assume_effects :nothrow :noub @inbounds hi,lo = t_log_Float64[jp]
     l_hi = mf* 0.6931471805601177 + hi
     l_lo = mf*-1.7239444525614835e-13 + lo
 
@@ -216,14 +213,11 @@ end
 end
 
 # Procedure 1
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the access to `t_log_Float32` is really safe here
-Base.@assume_effects :consistent @inline function log_proc1(y::Float32,mf::Float32,F::Float32,f::Float32,base=Val(:ℯ))
+@inline function log_proc1(y::Float32,mf::Float32,F::Float32,f::Float32,base=Val(:ℯ))
     jp = unsafe_trunc(Int,128.0f0*F)-127
 
     ## Steps 1 and 2
-    @inbounds hi = t_log_Float32[jp]
+    Base.@assume_effects :nothrow :noub @inbounds hi = t_log_Float32[jp]
     l = mf*0.6931471805599453 + hi
 
     ## Step 3
@@ -260,14 +254,14 @@ end
     Float32(logb(Float32, base)*(u64 + q))
 end
 
-log2(x::Float32)  = _log(x, Val(2),  :log2)
-log(x::Float32)   = _log(x, Val(:ℯ), :log)
-log10(x::Float32) = _log(x, Val(10), :log10)
-log2(x::Float64)  = _log(x, Val(2),  :log2)
-log(x::Float64)   = _log(x, Val(:ℯ), :log)
-log10(x::Float64) = _log(x, Val(10), :log10)
+@noinline log2(x::Float32)  = _log(x, Val(2),  :log2)
+@noinline log(x::Float32)   = _log(x, Val(:ℯ), :log)
+@noinline log10(x::Float32) = _log(x, Val(10), :log10)
+@noinline log2(x::Float64)  = _log(x, Val(2),  :log2)
+@noinline log(x::Float64)   = _log(x, Val(:ℯ), :log)
+@noinline log10(x::Float64) = _log(x, Val(10), :log10)
 
-function _log(x::Float64, base, func)
+@inline function _log(x::Float64, base, func::Symbol)
     if x > 0.0
         x == Inf && return x
 
@@ -294,15 +288,15 @@ function _log(x::Float64, base, func)
 
         return log_proc1(y,mf,F,f,base)
     elseif x == 0.0
-        -Inf
+        return -Inf
     elseif isnan(x)
-        NaN
+        return NaN
     else
         throw_complex_domainerror(func, x)
     end
 end
 
-function _log(x::Float32, base, func)
+@inline function _log(x::Float32, base, func::Symbol)
     if x > 0f0
         x == Inf32 && return x
 
@@ -327,11 +321,11 @@ function _log(x::Float32, base, func)
         F = (y + 65536.0f0) - 65536.0f0 # 0x1p-7*round(0x1p7*y)
         f = y-F
 
-        log_proc1(y,mf,F,f,base)
+        return log_proc1(y,mf,F,f,base)
     elseif x == 0f0
-        -Inf32
+        return -Inf32
     elseif isnan(x)
-        NaN32
+        return NaN32
     else
         throw_complex_domainerror(func, x)
     end
@@ -562,17 +556,17 @@ end
 # Adapted and modified from https://github.com/ARM-software/optimized-routines/blob/master/math/pow.c
 # Copyright (c) 2018-2020, Arm Limited. (which is also MIT licensed)
 # note that this isn't an exact translation as this version compacts the table to reduce cache pressure.
-function _log_ext(xu)
+function _log_ext(xu::UInt64)
     # x = 2^k z; where z is in range [0x1.69555p-1,0x1.69555p-0) and exact.
     # The range is split into N subintervals.
     # The ith subinterval contains z and c is near the center of the interval.
     tmp = reinterpret(Int64, xu - 0x3fe6955500000000) #0x1.69555p-1
-    i = (tmp >> 45) & 127
     z = reinterpret(Float64, xu - (tmp & 0xfff0000000000000))
     k = Float64(tmp >> 52)
     # log(x) = k*Ln2 + log(c) + log1p(z/c-1).
-    # getfield instead of getindex to satisfy effect analysis not knowing whether this is inbounds
-    t, logctail = getfield(t_log_table_compact, Int(i+1))
+    # N.B. :nothrow and :noub since `idx` is known to be `1 ≤ idx ≤ length(t_log_table_compact)`
+    idx = (tmp >> 45) & (length(t_log_table_compact)-1) + 1
+    t, logctail = Base.@assume_effects :nothrow :noub @inbounds t_log_table_compact[idx]
     invc, logc = log_tab_unpack(t)
     # Note: invc is j/N or j/N/2 where j is an integer in [N,2N) and
     # |z/c - 1| < 1/N, so r = z/c - 1 is exactly representable.
diff --git a/base/special/pow.jl b/base/special/pow.jl
new file mode 100644
index 0000000000000..8009e72744ba8
--- /dev/null
+++ b/base/special/pow.jl
@@ -0,0 +1,142 @@
+@inline function use_power_by_squaring(n::Integer)
+    -2^12 <= n <= 3 * 2^13
+end
+
+# @constprop aggressive to help the compiler see the switch between the integer and float
+# variants for callers with constant `y`
+@constprop :aggressive function ^(x::Float64, y::Float64)
+    xu = reinterpret(UInt64, x)
+    xu == reinterpret(UInt64, 1.0) && return 1.0
+    # Exponents greater than this will always overflow or underflow.
+    # Note that NaN can pass through this, but that will end up fine.
+    if !(abs(y)<0x1.8p62)
+        isnan(y) && return y
+        y = sign(y)*0x1.8p62
+    end
+    yint = unsafe_trunc(Int64, y) # This is actually safe since julia freezes the result
+    yisint = y == yint
+    if yisint
+        yint == 0 && return 1.0
+        use_power_by_squaring(yint) && return @noinline pow_body(x, yint)
+    end
+    2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x === +0.0 or -0.0 (Inf * false === 0.0)
+    s = 1
+    if x < 0
+        !yisint && throw_exp_domainerror(x) # y isn't an integer
+        s = ifelse(isodd(yint), -1, 1)
+    end
+    !isfinite(x) && return copysign(x,s)*(y>0 || isnan(x))           # x is inf or NaN
+    return copysign(pow_body(abs(x), y), s)
+end
+
+# @constprop aggressive to help the compiler see the switch between the integer and float
+# variants for callers with constant `y`
+@constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32}
+    x == 1 && return one(T)
+    # Exponents greater than this will always overflow or underflow.
+    # Note that NaN can pass through this, but that will end up fine.
+    max_exp = T == Float16 ? T(3<<14) : T(0x1.Ap30)
+    if !(abs(y)<max_exp)
+        isnan(y) && return y
+        y = sign(y)*max_exp
+    end
+    yint = unsafe_trunc(Int32, y) # This is actually safe since julia freezes the result
+    yisint = y == yint
+    if yisint
+        yint == 0 && return one(T)
+        use_power_by_squaring(yint) && return pow_body(x, yint)
+    end
+    s = 1
+    if x < 0
+        !yisint && throw_exp_domainerror(x) # y isn't an integer
+        s = ifelse(isodd(yint), -1, 1)
+    end
+    !isfinite(x) && return copysign(x,s)*(y>0 || isnan(x)) # x is inf or NaN
+    return copysign(pow_body(abs(x), y), s)
+end
+
+@constprop :aggressive @inline function ^(x::Float64, n::Integer)
+    n = clamp(n, Int64)
+    n == 0 && return one(x)
+    if use_power_by_squaring(n)
+        return pow_body(x, n)
+    else
+        s = ifelse(x < 0 && isodd(n), -1.0, 1.0)
+        x = abs(x)
+        y = float(n)
+        if y == n
+            return copysign(pow_body(x, y), s)
+        else
+            n2 = n % 1024
+            y = float(n - n2)
+            return pow_body(x, y) * copysign(pow_body(x, n2), s)
+        end
+    end
+end
+
+# @constprop aggressive to help the compiler see the switch between the integer and float
+# variants for callers with constant `y`
+@constprop :aggressive @inline function ^(x::T, n::Integer) where T <: Union{Float16, Float32}
+    n = clamp(n, Int32)
+    # Exponents greater than this will always overflow or underflow.
+    # Note that NaN can pass through this, but that will end up fine.
+    n == 0 && return one(x)
+    use_power_by_squaring(n) && return pow_body(x, n)
+    s = ifelse(x < 0 && isodd(n), -one(T), one(T))
+    x = abs(x)
+    return pow_body(x, widen(T)(n))
+end
+
+@assume_effects :foldable @noinline function pow_body(x::Float64, y::Float64)
+    xu = reinterpret(UInt64, x)
+    if xu < (UInt64(1)<<52) # x is subnormal
+        xu = reinterpret(UInt64, x * 0x1p52) # normalize x
+        xu &= ~sign_mask(Float64)
+        xu -= UInt64(52) << 52 # mess with the exponent
+    end
+    logxhi,logxlo = _log_ext(xu)
+    xyhi, xylo = two_mul(logxhi,y)
+    xylo = muladd(logxlo, y, xylo)
+    hi = xyhi+xylo
+    return @inline Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
+end
+
+@inline function pow_body(x::T, y) where T <: Union{Float16, Float32}
+    return T(exp2(log2(abs(widen(x))) * y))
+end
+
+@inline function pow_body(x::Union{Float16, Float32}, n::Int32)
+    n == -2 && return (i=inv(x); i*i)
+    n == 3 && return x*x*x #keep compatibility with literal_pow
+    n < 0 && return oftype(x, Base.power_by_squaring(inv(widen(x)), -n))
+    return oftype(x, Base.power_by_squaring(widen(x), n))
+end
+
+# compensated power by squaring
+# this method is only reliable for -2^20 < n < 2^20 (cf. #53881 #53886)
+@assume_effects :terminates_locally @noinline function pow_body(x::Float64, n::Integer)
+    y = 1.0
+    xnlo = -0.0
+    ynlo = 0.0
+    n == 3 && return x*x*x # keep compatibility with literal_pow
+    if n < 0
+        rx = inv(x)
+        n==-2 && return rx*rx #keep compatibility with literal_pow
+        isfinite(x) && (xnlo = -fma(x, rx, -1.) * rx)
+        x = rx
+        n = -n
+    end
+    while n > 1
+        if n&1 > 0
+            err = muladd(y, xnlo, x*ynlo)
+            y, ynlo = two_mul(x,y)
+            ynlo += err
+        end
+        err = x*2*xnlo
+        x, xnlo = two_mul(x, x)
+        xnlo += err
+        n >>>= 1
+    end
+    err = muladd(y, xnlo, x*ynlo)
+    return ifelse(isfinite(x) & isfinite(err), muladd(x, y, err), x*y)
+end
diff --git a/base/special/rem2pi.jl b/base/special/rem2pi.jl
new file mode 100644
index 0000000000000..2b8417416a5bd
--- /dev/null
+++ b/base/special/rem2pi.jl
@@ -0,0 +1,142 @@
+# multiples of pi/2, as double-double (ie with "tail")
+const pi1o2_h  = 1.5707963267948966     # convert(Float64, pi * BigFloat(1/2))
+const pi1o2_l  = 6.123233995736766e-17  # convert(Float64, pi * BigFloat(1/2) - pi1o2_h)
+
+const pi2o2_h  = 3.141592653589793      # convert(Float64, pi * BigFloat(1))
+const pi2o2_l  = 1.2246467991473532e-16 # convert(Float64, pi * BigFloat(1) - pi2o2_h)
+
+const pi3o2_h  = 4.71238898038469       # convert(Float64, pi * BigFloat(3/2))
+const pi3o2_l  = 1.8369701987210297e-16 # convert(Float64, pi * BigFloat(3/2) - pi3o2_h)
+
+const pi4o2_h  = 6.283185307179586      # convert(Float64, pi * BigFloat(2))
+const pi4o2_l  = 2.4492935982947064e-16 # convert(Float64, pi * BigFloat(2) - pi4o2_h)
+
+function rem2pi(x::Float64, ::RoundingMode{:Nearest})
+    isnan(x) && return x
+    isinf(x) && return NaN
+
+    abs(x) < pi && return x
+
+    n,y = rem_pio2_kernel(x)
+
+    if iseven(n)
+        if n & 2 == 2 # n % 4 == 2: add/subtract pi
+            if y.hi <= 0
+                return add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l)
+            else
+                return add22condh(y.hi,y.lo,-pi2o2_h,-pi2o2_l)
+            end
+        else          # n % 4 == 0: add 0
+            return y.hi+y.lo
+        end
+    else
+        if n & 2 == 2 # n % 4 == 3: subtract pi/2
+            return add22condh(y.hi,y.lo,-pi1o2_h,-pi1o2_l)
+        else          # n % 4 == 1: add pi/2
+            return add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l)
+        end
+    end
+end
+function rem2pi(x::Float64, ::RoundingMode{:ToZero})
+    isnan(x) && return x
+    isinf(x) && return NaN
+
+    ax = abs(x)
+    ax <= 2*Float64(pi,RoundDown) && return x
+
+    n,y = rem_pio2_kernel(ax)
+
+    if iseven(n)
+        if n & 2 == 2 # n % 4 == 2: add pi
+            z = add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l)
+        else          # n % 4 == 0: add 0 or 2pi
+            if y.hi > 0
+                z = y.hi+y.lo
+            else      # negative: add 2pi
+                z = add22condh(y.hi,y.lo,pi4o2_h,pi4o2_l)
+            end
+        end
+    else
+        if n & 2 == 2 # n % 4 == 3: add 3pi/2
+            z = add22condh(y.hi,y.lo,pi3o2_h,pi3o2_l)
+        else          # n % 4 == 1: add pi/2
+            z = add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l)
+        end
+    end
+    copysign(z,x)
+end
+function rem2pi(x::Float64, ::RoundingMode{:Down})
+    isnan(x) && return x
+    isinf(x) && return NaN
+
+    if x < pi4o2_h
+        if x >= 0
+            return x
+        elseif x > -pi4o2_h
+            return add22condh(x,0.0,pi4o2_h,pi4o2_l)
+        end
+    end
+
+    n,y = rem_pio2_kernel(x)
+
+    if iseven(n)
+        if n & 2 == 2 # n % 4 == 2: add pi
+            return add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l)
+        else          # n % 4 == 0: add 0 or 2pi
+            if y.hi > 0
+                return y.hi+y.lo
+            else      # negative: add 2pi
+                return add22condh(y.hi,y.lo,pi4o2_h,pi4o2_l)
+            end
+        end
+    else
+        if n & 2 == 2 # n % 4 == 3: add 3pi/2
+            return add22condh(y.hi,y.lo,pi3o2_h,pi3o2_l)
+        else          # n % 4 == 1: add pi/2
+            return add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l)
+        end
+    end
+end
+function rem2pi(x::Float64, ::RoundingMode{:Up})
+    isnan(x) && return x
+    isinf(x) && return NaN
+
+    if x > -pi4o2_h
+        if x <= 0
+            return x
+        elseif x < pi4o2_h
+            return add22condh(x,0.0,-pi4o2_h,-pi4o2_l)
+        end
+    end
+
+    n,y = rem_pio2_kernel(x)
+
+    if iseven(n)
+        if n & 2 == 2 # n % 4 == 2: sub pi
+            return add22condh(y.hi,y.lo,-pi2o2_h,-pi2o2_l)
+        else          # n % 4 == 0: sub 0 or 2pi
+            if y.hi < 0
+                return y.hi+y.lo
+            else      # positive: sub 2pi
+                return add22condh(y.hi,y.lo,-pi4o2_h,-pi4o2_l)
+            end
+        end
+    else
+        if n & 2 == 2 # n % 4 == 3: sub pi/2
+            return add22condh(y.hi,y.lo,-pi1o2_h,-pi1o2_l)
+        else          # n % 4 == 1: sub 3pi/2
+            return add22condh(y.hi,y.lo,-pi3o2_h,-pi3o2_l)
+        end
+    end
+end
+
+rem2pi(x::Float32, r::RoundingMode) = Float32(rem2pi(Float64(x), r))
+rem2pi(x::Float16, r::RoundingMode) = Float16(rem2pi(Float64(x), r))
+rem2pi(x::Int32, r::RoundingMode) = rem2pi(Float64(x), r)
+
+# general fallback
+function rem2pi(x::Integer, r::RoundingMode)
+    fx = float(x)
+    fx == x || throw(ArgumentError(LazyString(typeof(x), " argument to rem2pi is too large: ", x)))
+    rem2pi(fx, r)
+end
diff --git a/base/special/rem_pio2.jl b/base/special/rem_pio2.jl
index de5c4151df2d0..2bbe400aaec25 100644
--- a/base/special/rem_pio2.jl
+++ b/base/special/rem_pio2.jl
@@ -97,6 +97,7 @@ end
 
 """
     fromfraction(f::Int128)
+
 Compute a tuple of values `(z1,z2)` such that
     ``z1 + z2 == f / 2^128``
 and the significand of `z1` has 27 trailing zeros.
@@ -126,10 +127,7 @@ function fromfraction(f::Int128)
     return (z1,z2)
 end
 
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the accesses to `INV_2PI` are really safe here
-Base.@assume_effects :consistent function paynehanek(x::Float64)
+function paynehanek(x::Float64)
     # 1. Convert to form
     #
     #    x = X * 2^k,
@@ -168,15 +166,15 @@ Base.@assume_effects :consistent function paynehanek(x::Float64)
     idx = k >> 6
 
     shift = k - (idx << 6)
-    if shift == 0
-        @inbounds a1 = INV_2PI[idx+1]
-        @inbounds a2 = INV_2PI[idx+2]
-        @inbounds a3 = INV_2PI[idx+3]
+    Base.@assume_effects :nothrow :noub @inbounds if shift == 0
+        a1 = INV_2PI[idx+1]
+        a2 = INV_2PI[idx+2]
+        a3 = INV_2PI[idx+3]
     else
         # use shifts to extract the relevant 64 bit window
-        @inbounds a1 = (idx < 0 ? zero(UInt64) : INV_2PI[idx+1] << shift) | (INV_2PI[idx+2] >> (64 - shift))
-        @inbounds a2 = (INV_2PI[idx+2] << shift) | (INV_2PI[idx+3] >> (64 - shift))
-        @inbounds a3 = (INV_2PI[idx+3] << shift) | (INV_2PI[idx+4] >> (64 - shift))
+        a1 = (idx < 0 ? zero(UInt64) : INV_2PI[idx+1] << shift) | (INV_2PI[idx+2] >> (64 - shift))
+        a2 = (INV_2PI[idx+2] << shift) | (INV_2PI[idx+3] >> (64 - shift))
+        a3 = (INV_2PI[idx+3] << shift) | (INV_2PI[idx+4] >> (64 - shift))
     end
 
     # 3. Perform the multiplication:
@@ -214,6 +212,7 @@ end
 
 """
     rem_pio2_kernel(x::Union{Float32, Float64})
+
 Calculate `x` divided by `π/2` accurately for arbitrarily large `x`.
 Returns a pair `(k, r)`, where `k` is the quadrant of the result
 (multiple of π/2) and `r` is the remainder, such that ``k * π/2 = x - r``.
diff --git a/base/special/trig.jl b/base/special/trig.jl
index 5b2a23688ca6b..2119d5a356f3c 100644
--- a/base/special/trig.jl
+++ b/base/special/trig.jl
@@ -64,7 +64,7 @@ const DS6 = 1.58969099521155010221e-10
 """
     sin_kernel(yhi, ylo)
 
-Computes the sine on the interval [-π/4; π/4].
+Compute the sine on the interval [-π/4; π/4].
 """
 @inline function sin_kernel(y::DoubleFloat64)
     y² = y.hi*y.hi
@@ -165,11 +165,13 @@ end
 @noinline sincos_domain_error(x) = throw(DomainError(x, "sincos(x) is only defined for finite x."))
 
 """
-    sincos(x)
+    sincos(x::T) where T -> Tuple{float(T),float(T)}
 
 Simultaneously compute the sine and cosine of `x`, where `x` is in radians, returning
 a tuple `(sine, cosine)`.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `(T(NaN), T(NaN))` if `isnan(x)`.
+
 See also [`cis`](@ref), [`sincospi`](@ref), [`sincosd`](@ref).
 """
 function sincos(x::T) where T<:Union{Float32, Float64}
@@ -783,22 +785,22 @@ end
 end
 
 """
-    sinpi(x)
+    sinpi(x::T) where T -> float(T)
 
 Compute ``\\sin(\\pi x)`` more accurately than `sin(pi*x)`, especially for large `x`.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
 See also [`sind`](@ref), [`cospi`](@ref), [`sincospi`](@ref).
 """
-function sinpi(_x::T) where T<:Union{IEEEFloat, Rational}
+function sinpi(_x::T) where T<:IEEEFloat
     x = abs(_x)
     if !isfinite(x)
         isnan(x) && return x
-        throw(DomainError(x, "`x` cannot be infinite."))
+        throw(DomainError(x, "`sinpi(x)` is only defined for finite `x`."))
     end
     # For large x, answers are all 1 or zero.
-    if T <: AbstractFloat
-        x >= maxintfloat(T) && return copysign(zero(T), _x)
-    end
+    x >= maxintfloat(T) && return copysign(zero(T), _x)
 
     # reduce to interval [0, 0.5]
     n = round(2*x)
@@ -816,20 +818,22 @@ function sinpi(_x::T) where T<:Union{IEEEFloat, Rational}
     return ifelse(signbit(_x), -res, res)
 end
 """
-    cospi(x)
+    cospi(x::T) where T -> float(T)
 
 Compute ``\\cos(\\pi x)`` more accurately than `cos(pi*x)`, especially for large `x`.
+
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
+See also: [`cispi`](@ref), [`sincosd`](@ref), [`sinpi`](@ref).
 """
-function cospi(x::T) where T<:Union{IEEEFloat, Rational}
+function cospi(x::T) where T<:IEEEFloat
     x = abs(x)
     if !isfinite(x)
         isnan(x) && return x
-        throw(DomainError(x, "`x` cannot be infinite."))
+        throw(DomainError(x, "`cospi(x)` is only defined for finite `x`."))
     end
     # For large x, answers are all 1 or zero.
-    if T <: AbstractFloat
-        x >= maxintfloat(T) && return one(T)
-    end
+    x >= maxintfloat(T) && return one(T)
 
     # reduce to interval [0, 0.5]
     n = round(2*x)
@@ -846,26 +850,26 @@ function cospi(x::T) where T<:Union{IEEEFloat, Rational}
     end
 end
 """
-    sincospi(x)
+    sincospi(x::T) where T -> Tuple{float(T),float(T)}
 
 Simultaneously compute [`sinpi(x)`](@ref) and [`cospi(x)`](@ref) (the sine and cosine of `π*x`,
 where `x` is in radians), returning a tuple `(sine, cosine)`.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `(T(NaN), T(NaN))` tuple if `isnan(x)`.
+
 !!! compat "Julia 1.6"
     This function requires Julia 1.6 or later.
 
 See also: [`cispi`](@ref), [`sincosd`](@ref), [`sinpi`](@ref).
 """
-function sincospi(_x::T) where T<:Union{IEEEFloat, Rational}
+function sincospi(_x::T) where T<:IEEEFloat
     x = abs(_x)
     if !isfinite(x)
         isnan(x) && return x, x
-        throw(DomainError(x, "`x` cannot be infinite."))
+        throw(DomainError(x, "`sincospi(x)` is only defined for finite `x`."))
     end
     # For large x, answers are all 1 or zero.
-    if T <: AbstractFloat
-        x >= maxintfloat(T) && return (copysign(zero(T), _x), one(T))
-    end
+    x >= maxintfloat(T) && return (copysign(zero(T), _x), one(T))
 
     # reduce to interval [0, 0.5]
     n = round(2*x)
@@ -886,29 +890,28 @@ function sincospi(_x::T) where T<:Union{IEEEFloat, Rational}
 end
 
 """
-    tanpi(x)
+    tanpi(x::T) where T -> float(T)
 
 Compute ``\\tan(\\pi x)`` more accurately than `tan(pi*x)`, especially for large `x`.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
 !!! compat "Julia 1.10"
     This function requires at least Julia 1.10.
 
 See also [`tand`](@ref), [`sinpi`](@ref), [`cospi`](@ref), [`sincospi`](@ref).
 """
-
-function tanpi(_x::T) where T<:Union{IEEEFloat, Rational}
+function tanpi(_x::T) where T<:IEEEFloat
     # This is modified from sincospi.
     # Would it be faster or more accurate to make a tanpi_kernel?
     x = abs(_x)
     if !isfinite(x)
         isnan(x) && return x
-        throw(DomainError(x, "`x` cannot be infinite."))
+        throw(DomainError(x, "`tanpi(x)` is only defined for finite `x`."))
     end
     # For large x, answers are all zero.
     # All integer values for floats larger than maxintfloat are even.
-    if T <: AbstractFloat
-        x >= maxintfloat(T) && return copysign(zero(T), _x)
-    end
+    x >= maxintfloat(T) && return copysign(zero(T), _x)
 
     # reduce to interval [0, 0.5]
     n = round(2*x)
@@ -933,10 +936,10 @@ cospi(x::Integer) = isodd(x) ? -one(float(x)) : one(float(x))
 tanpi(x::Integer) = x >= 0 ? (isodd(x) ? -zero(float(x)) : zero(float(x))) :
                              (isodd(x) ? zero(float(x)) : -zero(float(x)))
 sincospi(x::Integer) = (sinpi(x), cospi(x))
-sinpi(x::Real) = sin(pi*x)
-cospi(x::Real) = cos(pi*x)
-sincospi(x::Real) = sincos(pi*x)
-tanpi(x::Real) = tan(pi*x)
+sinpi(x::AbstractFloat) = sin(pi*x)
+cospi(x::AbstractFloat) = cos(pi*x)
+sincospi(x::AbstractFloat) = sincos(pi*x)
+tanpi(x::AbstractFloat) = tan(pi*x)
 tanpi(x::Complex) = sinpi(x) / cospi(x) # Is there a better way to do this?
 
 function sinpi(z::Complex{T}) where T
@@ -1072,9 +1075,11 @@ isinf_real(x::Complex) = isinf(real(x)) && isfinite(imag(x))
 isinf_real(x::Number) = false
 
 """
-    sinc(x)
+    sinc(x::T) where {T <: Number} -> float(T)
 
-Compute ``\\sin(\\pi x) / (\\pi x)`` if ``x \\neq 0``, and ``1`` if ``x = 0``.
+Compute normalized sinc function ``\\operatorname{sinc}(x) = \\sin(\\pi x) / (\\pi x)`` if ``x \\neq 0``, and ``1`` if ``x = 0``.
+
+Return a `T(NaN)` if `isnan(x)`.
 
 See also [`cosc`](@ref), its derivative.
 """
@@ -1089,12 +1094,20 @@ _sinc(x::Float16) = Float16(_sinc(Float32(x)))
 _sinc(x::ComplexF16) = ComplexF16(_sinc(ComplexF32(x)))
 
 """
-    cosc(x)
+    cosc(x::T) where {T <: Number} -> float(T)
 
 Compute ``\\cos(\\pi x) / x - \\sin(\\pi x) / (\\pi x^2)`` if ``x \\neq 0``, and ``0`` if
 ``x = 0``. This is the derivative of `sinc(x)`.
+
+Return a `T(NaN)` if `isnan(x)`.
+
+See also [`sinc`](@ref).
 """
 cosc(x::Number) = _cosc(float(x))
+function _cosc_generic(x)
+    pi_x = pi * x
+    (pi_x*cospi(x)-sinpi(x))/(pi_x*x)
+end
 function _cosc(x::Number)
     # naive cosc formula is susceptible to catastrophic
     # cancellation error near x=0, so we use the Taylor series
@@ -1103,6 +1116,7 @@ function _cosc(x::Number)
         # generic Taylor series: π ∑ (-1)^n (πx)^{2n-1}/a(n) where
         # a(n) = (1+2n)*(2n-1)! (= OEIS A174549)
         s = (term = -(π*x))/3
+        iszero(s) && return s  # preserve floating-point signed zero
         π²x² = term^2
         ε = eps(fastabs(term)) # error threshold to stop sum
         n = 1
@@ -1114,17 +1128,128 @@ function _cosc(x::Number)
         end
         return π*s
     else
-        return isinf_real(x) ? zero(x) : ((pi*x)*cospi(x)-sinpi(x))/((pi*x)*x)
+        return isinf_real(x) ? zero(x) : _cosc_generic(x)
     end
 end
+
+#=
+
+## `cosc(x)` for `x` around the first zero, at `x = 0`
+
+`Float32`:
+
+```sollya
+prec = 500!;
+accurate = ((pi * x) * cos(pi * x) - sin(pi * x)) / (pi * x * x);
+b1 = 0.27001953125;
+b2 = 0.449951171875;
+domain_0 = [-b1/2, b1];
+domain_1 = [b1, b2];
+machinePrecision = 24;
+freeMonomials = [|1, 3, 5, 7|];
+freeMonomialPrecisions = [|machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
+polynomial_0 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_0);
+polynomial_1 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_1);
+polynomial_0;
+polynomial_1;
+```
+
+`Float64`:
+
+```sollya
+prec = 500!;
+accurate = ((pi * x) * cos(pi * x) - sin(pi * x)) / (pi * x * x);
+b1 = 0.1700439453125;
+b2 = 0.27001953125;
+b3 = 0.340087890625;
+b4 = 0.39990234375;
+domain_0 = [-b1/2, b1];
+domain_1 = [b1, b2];
+domain_2 = [b2, b3];
+domain_3 = [b3, b4];
+machinePrecision = 53;
+freeMonomials = [|1, 3, 5, 7, 9, 11|];
+freeMonomialPrecisions = [|machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
+polynomial_0 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_0);
+polynomial_1 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_1);
+polynomial_2 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_2);
+polynomial_3 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_3);
+polynomial_0;
+polynomial_1;
+polynomial_2;
+polynomial_3;
+```
+
+=#
+
+function _cos_cardinal_eval(x::AbstractFloat, polynomials_close_to_origin::NTuple)
+    function choose_poly(a::AbstractFloat, polynomials_close_to_origin::NTuple{2})
+        ((b1, p0), (_, p1)) = polynomials_close_to_origin
+        if a ≤ b1
+            p0
+        else
+            p1
+        end
+    end
+    function choose_poly(a::AbstractFloat, polynomials_close_to_origin::NTuple{4})
+        ((b1, p0), (b2, p1), (b3, p2), (_, p3)) = polynomials_close_to_origin
+        if a ≤ b2  # hardcoded binary search
+            if a ≤ b1
+                p0
+            else
+                p1
+            end
+        else
+            if a ≤ b3
+                p2
+            else
+                p3
+            end
+        end
+    end
+    a = abs(x)
+    if (polynomials_close_to_origin !== ()) && (a ≤ polynomials_close_to_origin[end][1])
+        x * evalpoly(x * x, choose_poly(a, polynomials_close_to_origin))
+    elseif isinf(x)
+        typeof(x)(0)
+    else
+        _cosc_generic(x)
+    end
+end
+
+const _cosc_f32 = let b = Float32 ∘ Float16
+    (
+        (b(0.27), (-3.289868f0, 3.246966f0, -1.1443111f0, 0.20542027f0)),
+        (b(0.45), (-3.2898617f0, 3.2467577f0, -1.1420113f0, 0.1965574f0)),
+    )
+end
+
+const _cosc_f64 = let b = Float64 ∘ Float16
+    (
+        (b(0.17), (-3.289868133696453, 3.2469697011333203, -1.1445109446992934, 0.20918277797812262, -0.023460519561502552, 0.001772485141534688)),
+        (b(0.27), (-3.289868133695205, 3.246969700970421, -1.1445109360543062, 0.20918254132488637, -0.023457115021035743, 0.0017515112964895303)),
+        (b(0.34), (-3.289868133634355, 3.246969697075094, -1.1445108347839286, 0.209181201609773, -0.023448079433318045, 0.001726628430505518)),
+        (b(0.4),  (-3.289868133074254, 3.2469696736659346, -1.1445104406286049, 0.20917785794416457, -0.02343378376047161, 0.0017019796223768677)),
+    )
+end
+
+function _cosc(x::Union{Float32, Float64})
+    if x isa Float32
+        pols = _cosc_f32
+    elseif x isa Float64
+        pols = _cosc_f64
+    end
+    _cos_cardinal_eval(x, pols)
+end
+
 # hard-code Float64/Float32 Taylor series, with coefficients
 #  Float64.([(-1)^n*big(pi)^(2n)/((2n+1)*factorial(2n-1)) for n = 1:6])
-_cosc(x::Union{Float64,ComplexF64}) =
+_cosc(x::ComplexF64) =
     fastabs(x) < 0.14 ? x*evalpoly(x^2, (-3.289868133696453, 3.2469697011334144, -1.1445109447325053, 0.2091827825412384, -0.023460810354558236, 0.001781145516372852)) :
-    isinf_real(x) ? zero(x) : ((pi*x)*cospi(x)-sinpi(x))/((pi*x)*x)
-_cosc(x::Union{Float32,ComplexF32}) =
+    isinf_real(x) ? zero(x) : _cosc_generic(x)
+_cosc(x::ComplexF32) =
     fastabs(x) < 0.26f0 ? x*evalpoly(x^2, (-3.289868f0, 3.2469697f0, -1.144511f0, 0.20918278f0)) :
-    isinf_real(x) ? zero(x) : ((pi*x)*cospi(x)-sinpi(x))/((pi*x)*x)
+    isinf_real(x) ? zero(x) : _cosc_generic(x)
 _cosc(x::Float16) = Float16(_cosc(Float32(x)))
 _cosc(x::ComplexF16) = ComplexF16(_cosc(ComplexF32(x)))
 
@@ -1136,19 +1261,25 @@ for (finv, f, finvh, fh, finvd, fd, fn) in ((:sec, :cos, :sech, :cosh, :secd, :c
     dname = string(finvd)
     @eval begin
         @doc """
-            $($name)(x)
+            $($name)(x::T) where {T <: Number} -> float(T)
 
         Compute the $($fn) of `x`, where `x` is in radians.
+
+        Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
         """ ($finv)(z::Number) = inv(($f)(z))
         @doc """
-            $($hname)(x)
+            $($hname)(x::T) where {T <: Number} -> float(T)
 
         Compute the hyperbolic $($fn) of `x`.
+
+        Return a `T(NaN)` if `isnan(x)`.
         """ ($finvh)(z::Number) = inv(($fh)(z))
         @doc """
-            $($dname)(x)
+            $($dname)(x::T) where {T <: Number} -> float(T)
 
         Compute the $($fn) of `x`, where `x` is in degrees.
+
+        Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
         """ ($finvd)(z::Number) = inv(($fd)(z))
     end
 end
@@ -1160,11 +1291,15 @@ for (tfa, tfainv, hfa, hfainv, fn) in ((:asec, :acos, :asech, :acosh, "secant"),
     hname = string(hfa)
     @eval begin
         @doc """
-            $($tname)(x)
-        Compute the inverse $($fn) of `x`, where the output is in radians. """ ($tfa)(y::Number) = ($tfainv)(inv(y))
+            $($tname)(x::T) where {T <: Number} -> float(T)
+
+        Compute the inverse $($fn) of `x`, where the output is in radians.
+        """ ($tfa)(y::Number) = ($tfainv)(inv(y))
         @doc """
-            $($hname)(x)
-        Compute the inverse hyperbolic $($fn) of `x`. """ ($hfa)(y::Number) = ($hfainv)(inv(y))
+            $($hname)(x::T) where {T <: Number} -> float(T)
+
+        Compute the inverse hyperbolic $($fn) of `x`.
+        """ ($hfa)(y::Number) = ($hfainv)(inv(y))
     end
 end
 
@@ -1189,7 +1324,7 @@ deg2rad_ext(x::Real) = deg2rad(x) # Fallback
 
 function sind(x::Real)
     if isinf(x)
-        return throw(DomainError(x, "`x` cannot be infinite."))
+        return throw(DomainError(x, "`sind(x)` is only defined for finite `x`."))
     elseif isnan(x)
         return x
     end
@@ -1220,7 +1355,7 @@ end
 
 function cosd(x::Real)
     if isinf(x)
-        return throw(DomainError(x, "`x` cannot be infinite."))
+        return throw(DomainError(x, "`cosd(x)` is only defined for finite `x`."))
     elseif isnan(x)
         return x
     end
@@ -1247,9 +1382,12 @@ end
 tand(x::Real) = sind(x) / cosd(x)
 
 """
-    sincosd(x)
+    sincosd(x::T) where T -> Tuple{float(T),float(T)}
 
-Simultaneously compute the sine and cosine of `x`, where `x` is in degrees.
+Simultaneously compute the sine and cosine of `x`, where `x` is in degrees, returning
+a tuple `(sine, cosine)`.
+
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `(T(NaN), T(NaN))` tuple if `isnan(x)`.
 
 !!! compat "Julia 1.3"
     This function requires at least Julia 1.3.
@@ -1265,11 +1403,13 @@ for (fd, f, fn) in ((:sind, :sin, "sine"), (:cosd, :cos, "cosine"), (:tand, :tan
         name = string(fd)
         @eval begin
             @doc """
-                $($name)(x)
+                $($name)(x::T) where T -> float(T)
 
             Compute $($fn) of `x`, where `x` is in $($un).
             If `x` is a matrix, `x` needs to be a square matrix.
 
+            Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
             !!! compat "Julia 1.7"
                 Matrix arguments require Julia 1.7 or later.
             """ ($fd)(x) = ($f)(($fu).(x))
@@ -1297,11 +1437,15 @@ for (fd, f, fn) in ((:asind, :asin, "sine"), (:acosd, :acos, "cosine"),
 end
 
 """
-    atand(y)
-    atand(y,x)
+    atand(y::T) where T -> float(T)
+    atand(y::T, x::S) where {T,S} -> promote_type(T,S)
+    atand(y::AbstractMatrix{T}) where T -> AbstractMatrix{Complex{float(T)}}
 
 Compute the inverse tangent of `y` or `y/x`, respectively, where the output is in degrees.
 
+Return a `NaN` if `isnan(y)` or `isnan(x)`. The returned `NaN` is either a `T` in the single
+argument version, or a `promote_type(T,S)` in the two argument version.
+
 !!! compat "Julia 1.7"
     The one-argument method supports square matrix arguments as of Julia 1.7.
 """
diff --git a/base/stacktraces.jl b/base/stacktraces.jl
index 9c942814eefad..b095f1f687807 100644
--- a/base/stacktraces.jl
+++ b/base/stacktraces.jl
@@ -7,7 +7,9 @@ module StackTraces
 
 
 import Base: hash, ==, show
-import Core: CodeInfo, MethodInstance
+
+using Core: CodeInfo, MethodInstance, CodeInstance
+using Base.IRShow
 
 export StackTrace, StackFrame, stacktrace
 
@@ -20,10 +22,10 @@ Stack information representing execution context, with the following fields:
 
   The name of the function containing the execution context.
 
-- `linfo::Union{Core.MethodInstance, Method, Module, Core.CodeInfo, Nothing}`
+- `linfo::Union{Method, Core.MethodInstance, Core.CodeInstance, Core.CodeInfo, Nothing}`
 
-  The MethodInstance or CodeInfo containing the execution context (if it could be found), \
-     or Module (for macro expansions)"
+  The Method, MethodInstance, CodeInstance, or CodeInfo containing the execution context (if it could be found),
+     or nothing (for example, if the inlining was a result of macro expansion).
 
 - `file::Symbol`
 
@@ -53,9 +55,9 @@ struct StackFrame # this type should be kept platform-agnostic so that profiles
     file::Symbol
     "the line number in the file containing the execution context"
     line::Int
-    "the MethodInstance or CodeInfo containing the execution context (if it could be found), \
-     or Module (for macro expansions)"
-    linfo::Union{MethodInstance, Method, Module, CodeInfo, Nothing}
+    "the CodeInstance or CodeInfo containing the execution context (if it could be found), \
+     or nothing (for example, if the inlining was a result of macro expansion)."
+    linfo::Union{Core.MethodInstance, Core.CodeInstance, Method, CodeInfo, Nothing}
     "true if the code is from C"
     from_c::Bool
     "true if the code is from an inlined frame"
@@ -88,7 +90,7 @@ function ==(a::StackFrame, b::StackFrame)
 end
 
 function hash(frame::StackFrame, h::UInt)
-    h += 0xf4fbda67fe20ce88 % UInt
+    h ⊻= 0xf4fbda67fe20ce88 % UInt
     h = hash(frame.line, h)
     h = hash(frame.file, h)
     h = hash(frame.func, h)
@@ -97,89 +99,8 @@ function hash(frame::StackFrame, h::UInt)
     return h
 end
 
-get_inlinetable(::Any) = nothing
-function get_inlinetable(mi::MethodInstance)
-    isdefined(mi, :def) && mi.def isa Method && isdefined(mi, :cache) && isdefined(mi.cache, :inferred) &&
-        mi.cache.inferred !== nothing || return nothing
-    linetable = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), mi.def, mi.cache, mi.cache.inferred).linetable
-    return filter!(x -> x.inlined_at > 0, linetable)
-end
-
-get_method_instance_roots(::Any) = nothing
-function get_method_instance_roots(mi::Union{Method, MethodInstance})
-    m = mi isa MethodInstance ? mi.def : mi
-    m isa Method && isdefined(m, :roots) || return nothing
-    return filter(x -> x isa MethodInstance, m.roots)
-end
-
-function lookup_inline_frame_info(func::Symbol, file::Symbol, linenum::Int, inlinetable::Vector{Core.LineInfoNode})
-    #REPL frames and some base files lack this prefix while others have it; should fix?
-    filestripped = Symbol(lstrip(string(file), ('.', '\\', '/')))
-    linfo = nothing
-    #=
-    Some matching entries contain the MethodInstance directly.
-    Other matching entries contain only a Method or Symbol (function name); such entries
-    are located after the entry with the MethodInstance, so backtracking is required.
-    If backtracking fails, the Method or Module is stored for return, but we continue
-    the search in case a MethodInstance is found later.
-    TODO: If a backtrack has failed, do we need to backtrack again later if another Method
-    or Symbol match is found? Or can a limit on the subsequent backtracks be placed?
-    =#
-    for (i, line) in enumerate(inlinetable)
-        Base.IRShow.method_name(line) === func && line.file ∈ (file, filestripped) && line.line == linenum || continue
-        if line.method isa MethodInstance
-            linfo = line.method
-            break
-        elseif line.method isa Method || line.method isa Symbol
-            linfo = line.method isa Method ? line.method : line.module
-            # backtrack to find the matching MethodInstance, if possible
-            for j in (i - 1):-1:1
-                nextline = inlinetable[j]
-                nextline.inlined_at == line.inlined_at && Base.IRShow.method_name(line) === Base.IRShow.method_name(nextline) && line.file === nextline.file || break
-                if nextline.method isa MethodInstance
-                    linfo = nextline.method
-                    break
-                end
-            end
-        end
-    end
-    return linfo
-end
-
-function lookup_inline_frame_info(func::Symbol, file::Symbol, miroots::Vector{Any})
-    # REPL frames and some base files lack this prefix while others have it; should fix?
-    filestripped = Symbol(lstrip(string(file), ('.', '\\', '/')))
-    matches = filter(miroots) do x
-        x.def isa Method || return false
-        m = x.def::Method
-        return m.name == func && m.file ∈ (file, filestripped)
-    end
-    if length(matches) > 1
-        # ambiguous, check if method is same and return that instead
-        all_matched = true
-        for m in matches
-            all_matched = m.def.line == matches[1].def.line &&
-                m.def.module == matches[1].def.module
-            all_matched || break
-        end
-        if all_matched
-            return matches[1].def
-        end
-        # all else fails, return module if they match, or give up
-        all_matched = true
-        for m in matches
-            all_matched = m.def.module == matches[1].def.module
-            all_matched || break
-        end
-        return all_matched ? matches[1].def.module : nothing
-    elseif length(matches) == 1
-        return matches[1]
-    end
-    return nothing
-end
-
 """
-    lookup(pointer::Ptr{Cvoid}) -> Vector{StackFrame}
+    lookup(pointer::Ptr{Cvoid})::Vector{StackFrame}
 
 Given a pointer to an execution context (usually generated by a call to `backtrace`), looks
 up stack frame context information. Returns an array of frame information for all functions
@@ -189,25 +110,14 @@ Base.@constprop :none function lookup(pointer::Ptr{Cvoid})
     infos = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint), pointer, false)::Core.SimpleVector
     pointer = convert(UInt64, pointer)
     isempty(infos) && return [StackFrame(empty_sym, empty_sym, -1, nothing, true, false, pointer)] # this is equal to UNKNOWN
-    parent_linfo = infos[end][4]
-    inlinetable = get_inlinetable(parent_linfo)
-    miroots = inlinetable === nothing ? get_method_instance_roots(parent_linfo) : nothing # fallback if linetable missing
     res = Vector{StackFrame}(undef, length(infos))
-    for i in reverse(1:length(infos))
+    for i in 1:length(infos)
         info = infos[i]::Core.SimpleVector
-        @assert(length(info) == 6)
+        @assert length(info) == 6 "corrupt return from jl_lookup_code_address"
         func = info[1]::Symbol
         file = info[2]::Symbol
         linenum = info[3]::Int
         linfo = info[4]
-        if i < length(infos)
-            if inlinetable !== nothing
-                linfo = lookup_inline_frame_info(func, file, linenum, inlinetable)
-            elseif miroots !== nothing
-                linfo = lookup_inline_frame_info(func, file, miroots)
-            end
-            linfo = linfo === nothing ? parentmodule(res[i + 1]) : linfo # e.g. `macro expansion`
-        end
         res[i] = StackFrame(func, file, linenum, linfo, info[5]::Bool, info[6]::Bool, pointer)
     end
     return res
@@ -215,46 +125,72 @@ end
 
 const top_level_scope_sym = Symbol("top-level scope")
 
-function lookup(ip::Union{Base.InterpreterIP,Core.Compiler.InterpreterIP})
+function lookup(ip::Base.InterpreterIP)
     code = ip.code
     if code === nothing
         # interpreted top-level expression with no CodeInfo
         return [StackFrame(top_level_scope_sym, empty_sym, 0, nothing, false, false, 0)]
     end
-    codeinfo = (code isa MethodInstance ? code.uninferred : code)::CodeInfo
     # prepare approximate code info
     if code isa MethodInstance && (meth = code.def; meth isa Method)
         func = meth.name
         file = meth.file
         line = meth.line
+        codeinfo = meth.source
     else
         func = top_level_scope_sym
         file = empty_sym
         line = Int32(0)
+        if code isa Core.CodeInstance
+            codeinfo = code.inferred::CodeInfo
+            def = code.def
+            if isa(def, Core.ABIOverride)
+                def = def.def
+            end
+            if isa(def, MethodInstance)
+                let meth = def.def
+                    if isa(meth, Method)
+                        func = meth.name
+                        file = meth.file
+                        line = meth.line
+                    end
+                end
+            end
+        else
+            codeinfo = code::CodeInfo
+        end
     end
-    i = max(ip.stmt+1, 1)  # ip.stmt is 0-indexed
-    if i > length(codeinfo.codelocs) || codeinfo.codelocs[i] == 0
+    def = (code isa CodeInfo ? StackTraces : code) # Module just used as a token for top-level code
+    pc::Int = max(ip.stmt + 1, 0) # n.b. ip.stmt is 0-indexed
+    scopes = IRShow.LineInfoNode[]
+    IRShow.append_scopes!(scopes, pc, codeinfo.debuginfo, def)
+    if isempty(scopes)
         return [StackFrame(func, file, line, code, false, false, 0)]
     end
-    lineinfo = codeinfo.linetable[codeinfo.codelocs[i]]::Core.LineInfoNode
-    scopes = StackFrame[]
-    while true
-        inlined = lineinfo.inlined_at != 0
-        push!(scopes, StackFrame(Base.IRShow.method_name(lineinfo)::Symbol, lineinfo.file, lineinfo.line, inlined ? nothing : code, false, inlined, 0))
-        inlined || break
-        lineinfo = codeinfo.linetable[lineinfo.inlined_at]::Core.LineInfoNode
+    closure = let inlined::Bool = false, def = def
+        function closure_inner(lno)
+            if inlined
+                def = lno.method
+                def isa Union{Method,Core.CodeInstance,MethodInstance} || (def = nothing)
+            else
+                def = codeinfo
+            end
+            sf = StackFrame(IRShow.normalize_method_name(lno.method), lno.file, lno.line, def, false, inlined, 0)
+            inlined = true
+            return sf
+        end
     end
-    return scopes
+    return map(closure, scopes)
 end
 
 """
-    stacktrace([trace::Vector{Ptr{Cvoid}},] [c_funcs::Bool=false]) -> StackTrace
+    stacktrace([trace::Vector{Ptr{Cvoid}},] [c_funcs::Bool=false])::StackTrace
 
 Return a stack trace in the form of a vector of `StackFrame`s. (By default stacktrace
 doesn't return C functions, but this can be enabled.) When called without specifying a
 trace, `stacktrace` first calls `backtrace`.
 """
-Base.@constprop :none function stacktrace(trace::Vector{<:Union{Base.InterpreterIP,Core.Compiler.InterpreterIP,Ptr{Cvoid}}}, c_funcs::Bool=false)
+Base.@constprop :none function stacktrace(trace::Vector{<:Union{Base.InterpreterIP,Ptr{Cvoid}}}, c_funcs::Bool=false)
     stack = StackTrace()
     for ip in trace
         for frame in lookup(ip)
@@ -306,6 +242,23 @@ end
 
 is_top_level_frame(f::StackFrame) = f.linfo isa CodeInfo || (f.linfo === nothing && f.func === top_level_scope_sym)
 
+function frame_method_or_module(lkup::StackFrame)
+    code = lkup.linfo
+    code isa Method && return code
+    code isa Module && return code
+    mi = frame_mi(lkup)
+    mi isa MethodInstance || return nothing
+    return mi.def
+end
+
+function frame_mi(lkup::StackFrame)
+    code = lkup.linfo
+    code isa Core.CodeInstance && (code = code.def)
+    code isa Core.ABIOverride && (code = code.def)
+    code isa MethodInstance || return nothing
+    return code
+end
+
 function show_spec_linfo(io::IO, frame::StackFrame)
     linfo = frame.linfo
     if linfo === nothing
@@ -320,19 +273,31 @@ function show_spec_linfo(io::IO, frame::StackFrame)
         print(io, "top-level scope")
     elseif linfo isa Module
         Base.print_within_stacktrace(io, Base.demangle_function_name(string(frame.func)), bold=true)
-    elseif linfo isa MethodInstance
-        def = linfo.def
-        if def isa Module
-            Base.show_mi(io, linfo, #=from_stackframe=#true)
+    else
+        if linfo isa Union{MethodInstance, CodeInstance}
+            def = frame_method_or_module(frame)
+            if def isa Module
+                Base.show_mi(io, linfo::MethodInstance, #=from_stackframe=#true)
+            elseif linfo isa CodeInstance && linfo.owner !== nothing
+                show_custom_spec_sig(io, linfo.owner, linfo, frame)
+            else
+                # Equivalent to the default implementation of `show_custom_spec_sig`
+                # for `linfo isa CodeInstance`, but saves an extra dynamic dispatch.
+                show_spec_sig(io, def, frame_mi(frame).specTypes)
+            end
         else
-            show_spec_sig(io, def, linfo.specTypes)
+            m = linfo::Method
+            show_spec_sig(io, m, m.sig)
         end
-    else
-        m = linfo::Method
-        show_spec_sig(io, m, m.sig)
     end
 end
 
+# Can be extended by compiler packages to customize backtrace display of custom code instance frames
+function show_custom_spec_sig(io::IO, @nospecialize(owner), linfo::CodeInstance, frame::StackFrame)
+    mi = Base.get_ci_mi(linfo)
+    return show_spec_sig(io, mi.def, mi.specTypes)
+end
+
 function show_spec_sig(io::IO, m::Method, @nospecialize(sig::Type))
     if get(io, :limit, :false)::Bool
         if !haskey(io, :displaysize)
@@ -381,6 +346,12 @@ end
 
 function Base.parentmodule(frame::StackFrame)
     linfo = frame.linfo
+    if linfo isa CodeInstance
+        linfo = linfo.def
+        if isa(linfo, Core.ABIOverride)
+            linfo = linfo.def
+        end
+    end
     if linfo isa MethodInstance
         def = linfo.def
         if def isa Module
@@ -400,7 +371,7 @@ function Base.parentmodule(frame::StackFrame)
 end
 
 """
-    from(frame::StackFrame, filter_mod::Module) -> Bool
+    from(frame::StackFrame, filter_mod::Module)::Bool
 
 Return whether the `frame` is from the provided `Module`
 """
@@ -408,4 +379,4 @@ function from(frame::StackFrame, m::Module)
     return parentmodule(frame) === m
 end
 
-end
+end  # module StackTraces
diff --git a/base/stat.jl b/base/stat.jl
index 81f9dcfd20191..5ab871da6ea16 100644
--- a/base/stat.jl
+++ b/base/stat.jl
@@ -25,6 +25,37 @@ export
     stat,
     uperm
 
+"""
+    StatStruct
+
+A struct which stores information about a file. Usually
+constructed by calling [`stat`](@ref) on a path.
+
+This struct is used internally as the foundation of a number of utility
+functions. Some return specific parts of the information stored in it
+directly, such as [`filesize`](@ref), [`mtime`](@ref) and [`ctime`](@ref). Others add
+some logic on top using bit-manipulation, such as [`isfifo`](@ref), [`ischardev`](@ref), and [`issetuid`](@ref).
+
+The following fields of this struct are considered public API:
+
+| Name    | Type                            | Description                                                        |
+|:--------|:--------------------------------|:-------------------------------------------------------------------|
+| desc    | `Union{String, Base.OS_HANDLE}` | The path or OS file descriptor                                     |
+| size    | `Int64`                         | The size (in bytes) of the file                                    |
+| device  | `UInt`                          | ID of the device that contains the file                            |
+| inode   | `UInt`                          | The inode number of the file                                       |
+| mode    | `UInt`                          | The protection mode of the file                                    |
+| nlink   | `Int`                           | The number of hard links to the file                               |
+| uid     | `UInt`                          | The user id of the owner of the file                               |
+| gid     | `UInt`                          | The group id of the file owner                                     |
+| rdev    | `UInt`                          | If this file refers to a device, the ID of the device it refers to |
+| blksize | `Int64`                         | The file-system preferred block size for the file                  |
+| blocks  | `Int64`                         | The number of 512-byte blocks allocated                            |
+| mtime   | `Float64`                       | Unix timestamp of when the file was last modified                  |
+| ctime   | `Float64`                       | Unix timestamp of when the file's metadata was changed             |
+
+See also: [`stat`](@ref)
+"""
 struct StatStruct
     desc    :: Union{String, OS_HANDLE} # for show method, not included in equality or hash
     device  :: UInt
@@ -39,10 +70,11 @@ struct StatStruct
     blocks  :: Int64
     mtime   :: Float64
     ctime   :: Float64
+    ioerrno :: Int32
 end
 
 @eval function Base.:(==)(x::StatStruct, y::StatStruct) # do not include `desc` in equality or hash
-  $(let ex = true
+    $(let ex = true
         for fld in fieldnames(StatStruct)[2:end]
             ex = :(getfield(x, $(QuoteNode(fld))) === getfield(y, $(QuoteNode(fld))) && $ex)
         end
@@ -50,28 +82,29 @@ end
     end)
 end
 @eval function Base.hash(obj::StatStruct, h::UInt)
-  $(quote
+    $(quote
         $(Any[:(h = hash(getfield(obj, $(QuoteNode(fld))), h)) for fld in fieldnames(StatStruct)[2:end]]...)
         return h
     end)
 end
 
-StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct("", buf)
-StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
+StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Base.UV_ENOENT)
+StatStruct(buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct("", buf, ioerrno)
+StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct(
     desc isa OS_HANDLE ? desc : String(desc),
-    ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_nlink,   UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_uid,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_gid,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_rdev,    UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_size,    UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_blksize, UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_blocks,  UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_mtime,   Float64, (Ptr{UInt8},), buf),
-    ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_nlink,   UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_uid,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_gid,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_rdev,    UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_size,    UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blksize, UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blocks,  UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_mtime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno
 )
 
 function iso_datetime_with_relative(t, tnow)
@@ -106,35 +139,41 @@ end
 function show_statstruct(io::IO, st::StatStruct, oneline::Bool)
     print(io, oneline ? "StatStruct(" : "StatStruct for ")
     show(io, st.desc)
-    oneline || print(io, "\n  ")
-    print(io, " size: ", st.size, " bytes")
-    oneline || print(io, "\n")
-    print(io, " device: ", st.device)
-    oneline || print(io, "\n ")
-    print(io, " inode: ", st.inode)
-    oneline || print(io, "\n  ")
-    print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
-    oneline || print(io, "\n ")
-    print(io, " nlink: ", st.nlink)
-    oneline || print(io, "\n   ")
-    print(io, " uid: $(st.uid)")
-    username = getusername(st.uid)
-    username === nothing || print(io, " (", username, ")")
-    oneline || print(io, "\n   ")
-    print(io, " gid: ", st.gid)
-    groupname = getgroupname(st.gid)
-    groupname === nothing || print(io, " (", groupname, ")")
-    oneline || print(io, "\n  ")
-    print(io, " rdev: ", st.rdev)
-    oneline || print(io, "\n ")
-    print(io, " blksz: ", st.blksize)
-    oneline || print(io, "\n")
-    print(io, " blocks: ", st.blocks)
-    tnow = round(UInt, time())
-    oneline || print(io, "\n ")
-    print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
-    oneline || print(io, "\n ")
-    print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    code = st.ioerrno
+    if code != 0
+        print(io, oneline ? " " : "\n ")
+        print(io, Base.uverrorname(code), ": ", Base.struverror(code))
+    else
+        oneline || print(io, "\n  ")
+        print(io, " size: ", st.size, " bytes")
+        oneline || print(io, "\n")
+        print(io, " device: ", st.device)
+        oneline || print(io, "\n ")
+        print(io, " inode: ", st.inode)
+        oneline || print(io, "\n  ")
+        print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
+        oneline || print(io, "\n ")
+        print(io, " nlink: ", st.nlink)
+        oneline || print(io, "\n   ")
+        print(io, " uid: $(st.uid)")
+        username = getusername(st.uid)
+        username === nothing || print(io, " (", username, ")")
+        oneline || print(io, "\n   ")
+        print(io, " gid: ", st.gid)
+        groupname = getgroupname(st.gid)
+        groupname === nothing || print(io, " (", groupname, ")")
+        oneline || print(io, "\n  ")
+        print(io, " rdev: ", st.rdev)
+        oneline || print(io, "\n ")
+        print(io, " blksz: ", st.blksize)
+        oneline || print(io, "\n")
+        print(io, " blocks: ", st.blocks)
+        tnow = round(UInt, time())
+        oneline || print(io, "\n ")
+        print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
+        oneline || print(io, "\n ")
+        print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    end
     oneline && print(io, ")")
     return nothing
 end
@@ -144,62 +183,69 @@ show(io::IO, ::MIME"text/plain", st::StatStruct) = show_statstruct(io, st, false
 
 # stat & lstat functions
 
+checkstat(s::StatStruct) = Int(s.ioerrno) in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL) ? s :
+    _uv_error(string("stat(", repr(s.desc), ")"), s.ioerrno)
+
 macro stat_call(sym, arg1type, arg)
     return quote
-        stat_buf = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ())))
+        stat_buf = fill!(Memory{UInt8}(undef, Int(ccall(:jl_sizeof_stat, Int32, ()))), 0x00)
         r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), stat_buf)
-        if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL))
-            uv_error(string("stat(", repr($(esc(arg))), ")"), r)
-        end
-        st = StatStruct($(esc(arg)), stat_buf)
-        if ispath(st) != (r == 0)
-            error("stat returned zero type for a valid path")
-        end
-        return st
+        return checkstat(StatStruct($(esc(arg)), stat_buf, r))
     end
 end
 
 stat(fd::OS_HANDLE)         = @stat_call jl_fstat OS_HANDLE fd
-stat(path::AbstractString)  = @stat_call jl_stat  Cstring path
-lstat(path::AbstractString) = @stat_call jl_lstat Cstring path
+function stat(path::AbstractString)
+    # @info "stat($(repr(path)))" exception=(ErrorException("Fake error for backtrace printing"),stacktrace())
+    @stat_call jl_stat  Cstring path
+end
+function lstat(path::AbstractString)
+    # @info "lstat($(repr(path)))" exception=(ErrorException("Fake error for backtrace printing"),stacktrace())
+    @stat_call jl_lstat Cstring path
+end
 if RawFD !== OS_HANDLE
     global stat(fd::RawFD)  = stat(Libc._get_osfhandle(fd))
 end
-stat(fd::Integer)           = stat(RawFD(fd))
 
 """
-    stat(file)
+    stat(path)
+    stat(path_elements...)
 
 Return a structure whose fields contain information about the file.
-The fields of the structure are:
+If multiple arguments are given, they are joined by [`joinpath`](@ref).
 
-| Name    | Description                                                        |
-|:--------|:-------------------------------------------------------------------|
-| desc    | The path or OS file descriptor                                     |
-| size    | The size (in bytes) of the file                                    |
-| device  | ID of the device that contains the file                            |
-| inode   | The inode number of the file                                       |
-| mode    | The protection mode of the file                                    |
-| nlink   | The number of hard links to the file                               |
-| uid     | The user id of the owner of the file                               |
-| gid     | The group id of the file owner                                     |
-| rdev    | If this file refers to a device, the ID of the device it refers to |
-| blksize | The file-system preferred block size for the file                  |
-| blocks  | The number of such blocks allocated                                |
-| mtime   | Unix timestamp of when the file was last modified                  |
-| ctime   | Unix timestamp of when the file's metadata was changed             |
+The fields of the structure are:
 
-"""
+| Name    | Type                            | Description                                                        |
+|:--------|:--------------------------------|:-------------------------------------------------------------------|
+| desc    | `Union{String, Base.OS_HANDLE}` | The path or OS file descriptor                                     |
+| size    | `Int64`                         | The size (in bytes) of the file                                    |
+| device  | `UInt`                          | ID of the device that contains the file                            |
+| inode   | `UInt`                          | The inode number of the file                                       |
+| mode    | `UInt`                          | The protection mode of the file                                    |
+| nlink   | `Int`                           | The number of hard links to the file                               |
+| uid     | `UInt`                          | The user id of the owner of the file                               |
+| gid     | `UInt`                          | The group id of the file owner                                     |
+| rdev    | `UInt`                          | If this file refers to a device, the ID of the device it refers to |
+| blksize | `Int64`                         | The file-system preferred block size for the file                  |
+| blocks  | `Int64`                         | The number of 512-byte blocks allocated                            |
+| mtime   | `Float64`                       | Unix timestamp of when the file was last modified                  |
+| ctime   | `Float64`                       | Unix timestamp of when the file's metadata was changed             |
+"""
+stat(path) = (path2 = joinpath(path); path2 isa typeof(path) ? error("stat not implemented for $(typeof(path))") : stat(path2))
 stat(path...) = stat(joinpath(path...))
 
 """
-    lstat(file)
+    lstat(path)
+    lstat(path_elements...)
 
-Like [`stat`](@ref), but for symbolic links gets the info for the link
-itself rather than the file it refers to.
-This function must be called on a file path rather than a file object or a file
-descriptor.
+Like [`stat`](@ref), but for symbolic links gets the info
+for the link itself rather than the file it refers to.
+
+This function must be called on a file path rather
+than a file object or a file descriptor.
 """
+lstat(path) = (path2 = joinpath(path); path2 isa typeof(path) ? error("lstat not implemented for $(typeof(path))") : lstat(path2))
 lstat(path...) = lstat(joinpath(path...))
 
 # some convenience functions
@@ -250,9 +296,14 @@ const filemode_table = (
 )
 
 """
-    filemode(file)
+    filemode(path)
+    filemode(path_elements...)
+    filemode(stat_struct)
+
+Return the mode of the file located at `path`,
+or the mode indicated by the file descriptor `stat_struct`.
 
-Equivalent to `stat(file).mode`.
+Equivalent to `stat(path).mode` or `stat_struct.mode`.
 """
 filemode(st::StatStruct) = st.mode
 filemode_string(st::StatStruct) = filemode_string(st.mode)
@@ -269,59 +320,91 @@ function filemode_string(mode)
         end
         complete && write(str, "-")
     end
-    return String(take!(str))
+    return unsafe_takestring!(str)
 end
 
 """
-    filesize(path...)
+    filesize(path)
+    filesize(path_elements...)
+    filesize(stat_struct)
 
-Equivalent to `stat(file).size`.
+Return the size of the file located at `path`,
+or the size indicated by file descriptor `stat_struct`.
+
+Equivalent to `stat(path).size` or `stat_struct.size`.
 """
 filesize(st::StatStruct) = st.size
 
 """
-    mtime(file)
+    mtime(path)
+    mtime(path_elements...)
+    mtime(stat_struct)
+
+Return the unix timestamp of when the file at `path` was last modified,
+or the last modified timestamp indicated by the file descriptor `stat_struct`.
 
-Equivalent to `stat(file).mtime`.
+Equivalent to `stat(path).mtime` or `stat_struct.mtime`.
 """
 mtime(st::StatStruct) = st.mtime
 
 """
-    ctime(file)
+    ctime(path)
+    ctime(path_elements...)
+    ctime(stat_struct)
+
+Return the unix timestamp of when the metadata of the file at `path` was last modified,
+or the last modified metadata timestamp indicated by the file descriptor `stat_struct`.
 
-Equivalent to `stat(file).ctime`.
+Equivalent to `stat(path).ctime` or `stat_struct.ctime`.
 """
 ctime(st::StatStruct) = st.ctime
 
 # mode type predicates
 
 """
-    ispath(path) -> Bool
+    ispath(path)::Bool
+    ispath(path_elements...)::Bool
 
 Return `true` if a valid filesystem entity exists at `path`,
 otherwise returns `false`.
+
 This is the generalization of [`isfile`](@ref), [`isdir`](@ref) etc.
 """
-ispath(st::StatStruct) = filemode(st) & 0xf000 != 0x0000
+ispath(st::StatStruct) = st.ioerrno == 0
+function ispath(path::String)
+    # We use `access()` and `F_OK` to determine if a given path exists. `F_OK` comes from `unistd.h`.
+    F_OK = 0x00
+    r = ccall(:jl_fs_access, Cint, (Cstring, Cint), path, F_OK)
+    if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL))
+        uv_error(string("ispath(", repr(path), ")"), r)
+    end
+    return r == 0
+end
+ispath(path::AbstractString) = ispath(String(path)::String)
 
 """
-    isfifo(path) -> Bool
+    isfifo(path)::Bool
+    isfifo(path_elements...)::Bool
+    isfifo(stat_struct)::Bool
 
-Return `true` if `path` is a FIFO, `false` otherwise.
+Return `true` if the file at `path` or file descriptor `stat_struct` is FIFO, `false` otherwise.
 """
 isfifo(st::StatStruct) = filemode(st) & 0xf000 == 0x1000
 
 """
-    ischardev(path) -> Bool
+    ischardev(path)::Bool
+    ischardev(path_elements...)::Bool
+    ischardev(stat_struct)::Bool
 
-Return `true` if `path` is a character device, `false` otherwise.
+Return `true` if the path `path` or file descriptor `stat_struct` refer to a character device, `false` otherwise.
 """
 ischardev(st::StatStruct) = filemode(st) & 0xf000 == 0x2000
 
 """
-    isdir(path) -> Bool
+    isdir(path)::Bool
+    isdir(path_elements...)::Bool
 
-Return `true` if `path` is a directory, `false` otherwise.
+Return `true` if `path` points to a directory, `false` otherwise.
 
 # Examples
 ```jldoctest
@@ -337,16 +420,19 @@ See also [`isfile`](@ref) and [`ispath`](@ref).
 isdir(st::StatStruct) = filemode(st) & 0xf000 == 0x4000
 
 """
-    isblockdev(path) -> Bool
+    isblockdev(path)::Bool
+    isblockdev(path_elements...)::Bool
+    isblockdev(stat_struct)::Bool
 
-Return `true` if `path` is a block device, `false` otherwise.
+Return `true` if the path `path` or file descriptor `stat_struct` refer to a block device, `false` otherwise.
 """
 isblockdev(st::StatStruct) = filemode(st) & 0xf000 == 0x6000
 
 """
-    isfile(path) -> Bool
+    isfile(path)::Bool
+    isfile(path_elements...)::Bool
 
-Return `true` if `path` is a regular file, `false` otherwise.
+Return `true` if `path` points to a regular file, `false` otherwise.
 
 # Examples
 ```jldoctest
@@ -371,46 +457,56 @@ See also [`isdir`](@ref) and [`ispath`](@ref).
 isfile(st::StatStruct) = filemode(st) & 0xf000 == 0x8000
 
 """
-    islink(path) -> Bool
+    islink(path)::Bool
+    islink(path_elements...)::Bool
 
-Return `true` if `path` is a symbolic link, `false` otherwise.
+Return `true` if `path` points to a symbolic link, `false` otherwise.
 """
 islink(st::StatStruct) = filemode(st) & 0xf000 == 0xa000
 
 """
-    issocket(path) -> Bool
+    issocket(path)::Bool
+    issocket(path_elements...)::Bool
 
-Return `true` if `path` is a socket, `false` otherwise.
+Return `true` if `path` points to a socket, `false` otherwise.
 """
 issocket(st::StatStruct) = filemode(st) & 0xf000 == 0xc000
 
 # mode permission predicates
 
 """
-    issetuid(path) -> Bool
+    issetuid(path)::Bool
+    issetuid(path_elements...)::Bool
+    issetuid(stat_struct)::Bool
 
-Return `true` if `path` has the setuid flag set, `false` otherwise.
+Return `true` if the file at `path` or file descriptor `stat_struct` have the setuid flag set, `false` otherwise.
 """
 issetuid(st::StatStruct) = (filemode(st) & 0o4000) > 0
 
 """
-    issetgid(path) -> Bool
+    issetgid(path)::Bool
+    issetgid(path_elements...)::Bool
+    issetgid(stat_struct)::Bool
 
-Return `true` if `path` has the setgid flag set, `false` otherwise.
+Return `true` if the file at `path` or file descriptor `stat_struct` have the setgid flag set, `false` otherwise.
 """
 issetgid(st::StatStruct) = (filemode(st) & 0o2000) > 0
 
 """
-    issticky(path) -> Bool
+    issticky(path)::Bool
+    issticky(path_elements...)::Bool
+    issticky(stat_struct)::Bool
 
-Return `true` if `path` has the sticky bit set, `false` otherwise.
+Return `true` if the file at `path` or file descriptor `stat_struct` have the sticky bit set, `false` otherwise.
 """
 issticky(st::StatStruct) = (filemode(st) & 0o1000) > 0
 
 """
-    uperm(file)
+    uperm(path)
+    uperm(path_elements...)
+    uperm(stat_struct)
 
-Get the permissions of the owner of the file as a bitfield of
+Return a bitfield of the owner permissions for the file at `path` or file descriptor `stat_struct`.
 
 | Value | Description        |
 |:------|:-------------------|
@@ -418,22 +514,52 @@ Get the permissions of the owner of the file as a bitfield of
 | 02    | Write Permission   |
 | 04    | Read Permission    |
 
-For allowed arguments, see [`stat`](@ref).
+The fact that a bitfield is returned means that if the permission
+is read+write, the bitfield is "110", which maps to the decimal
+value of 0+2+4=6. This is reflected in the printing of the
+returned `UInt8` value.
+
+See also [`gperm`](@ref) and [`operm`](@ref).
+
+```jldoctest
+julia> touch("dummy_file");  # Create test-file without contents
+
+julia> uperm("dummy_file")
+0x06
+
+julia> bitstring(ans)
+"00000110"
+
+julia> has_read_permission(path) = uperm(path) & 0b00000100 != 0;  # Use bit mask to check specific bit
+
+julia> has_read_permission("dummy_file")
+true
+
+julia> rm("dummy_file")     # Clean up test-file
+```
 """
 uperm(st::StatStruct) = UInt8((filemode(st) >> 6) & 0x7)
 
 """
-    gperm(file)
+    gperm(path)
+    gperm(path_elements...)
+    gperm(stat_struct)
 
 Like [`uperm`](@ref) but gets the permissions of the group owning the file.
+
+See also [`operm`](@ref).
 """
 gperm(st::StatStruct) = UInt8((filemode(st) >> 3) & 0x7)
 
 """
-    operm(file)
+    operm(path)
+    operm(path_elements...)
+    operm(stat_struct)
+
+Like [`uperm`](@ref) but gets the permissions for people who neither own the
+file nor are a member of the group owning the file.
 
-Like [`uperm`](@ref) but gets the permissions for people who neither own the file nor are a member of
-the group owning the file
+See also [`gperm`](@ref).
 """
 operm(st::StatStruct) = UInt8((filemode(st)     ) & 0x7)
 
@@ -469,14 +595,15 @@ function samefile(a::StatStruct, b::StatStruct)
 end
 
 """
-    samefile(path_a::AbstractString, path_b::AbstractString)
+    samefile(path_a, path_b)
 
 Check if the paths `path_a` and `path_b` refer to the same existing file or directory.
 """
 samefile(a::AbstractString, b::AbstractString) = samefile(stat(a), stat(b))
 
 """
-    ismount(path) -> Bool
+    ismount(path)::Bool
+    ismount(path_elements...)::Bool
 
 Return `true` if `path` is a mount point, `false` otherwise.
 """
diff --git a/base/stream.jl b/base/stream.jl
index 0b6c9a93777f6..7b227458ec552 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -76,7 +76,7 @@ function getproperty(stream::LibuvStream, name::Symbol)
 end
 
 # IO
-# +- GenericIOBuffer{T<:AbstractArray{UInt8,1}} (not exported)
+# +- GenericIOBuffer{T<:AbstractVector{UInt8}} (not exported)
 # +- AbstractPipe (not exported)
 # .  +- Pipe
 # .  +- Process (not exported)
@@ -89,7 +89,7 @@ end
 # .  +- TTY (not exported)
 # .  +- UDPSocket
 # .  +- BufferStream (FIXME: 2.0)
-# +- IOBuffer = Base.GenericIOBuffer{Array{UInt8,1}}
+# +- IOBuffer = Base.GenericIOBuffer{Vector{UInt8}}
 # +- IOStream
 
 # IOServer
@@ -122,7 +122,7 @@ const DEFAULT_READ_BUFFER_SZ = 10485760 # 10 MB
 if Sys.iswindows()
     const MAX_OS_WRITE = UInt(0x1FF0_0000) # 511 MB (determined semi-empirically, limited to 31 MB on XP)
 else
-    const MAX_OS_WRITE = UInt(typemax(Csize_t))
+    const MAX_OS_WRITE = UInt(0x7FFF_0000) # almost 2 GB (both macOS and linux have this kernel restriction, although only macOS documents it)
 end
 
 
@@ -202,12 +202,7 @@ end
 
 function PipeEndpoint(fd::OS_HANDLE)
     pipe = PipeEndpoint()
-    iolock_begin()
-    err = ccall(:uv_pipe_open, Int32, (Ptr{Cvoid}, OS_HANDLE), pipe.handle, fd)
-    uv_error("pipe_open", err)
-    pipe.status = StatusOpen
-    iolock_end()
-    return pipe
+    return open_pipe!(pipe, fd)
 end
 if OS_HANDLE != RawFD
     PipeEndpoint(fd::RawFD) = PipeEndpoint(Libc._get_osfhandle(fd))
@@ -283,8 +278,8 @@ end
 lock(s::LibuvStream) = lock(s.lock)
 unlock(s::LibuvStream) = unlock(s.lock)
 
-setup_stdio(stream::LibuvStream, ::Bool) = (stream, false)
-rawhandle(stream::LibuvStream) = stream.handle
+setup_stdio(stream::Union{LibuvStream, LibuvServer}, ::Bool) = (stream, false)
+rawhandle(stream::Union{LibuvStream, LibuvServer}) = stream.handle
 unsafe_convert(::Type{Ptr{Cvoid}}, s::Union{LibuvStream, LibuvServer}) = s.handle
 
 function init_stdio(handle::Ptr{Cvoid})
@@ -304,7 +299,7 @@ function init_stdio(handle::Ptr{Cvoid})
     elseif t == UV_TTY
         io = TTY(handle, StatusOpen)
     elseif t == UV_TCP
-        Sockets = require(PkgId(UUID((0x6462fe0b_24de_5631, 0x8697_dd941f90decc)), "Sockets"))
+        Sockets = require_stdlib(PkgId(UUID((0x6462fe0b_24de_5631, 0x8697_dd941f90decc)), "Sockets"))
         io = Sockets.TCPSocket(handle, StatusOpen)
     elseif t == UV_NAMED_PIPE
         io = PipeEndpoint(handle, StatusOpen)
@@ -316,9 +311,9 @@ function init_stdio(handle::Ptr{Cvoid})
 end
 
 """
-    open(fd::OS_HANDLE) -> IO
+    open(fd::OS_HANDLE)::IO
 
-Take a raw file descriptor wrap it in a Julia-aware IO type,
+Take a raw file descriptor and wrap it in a Julia-aware IO type,
 and take ownership of the fd handle.
 Call `open(Libc.dup(fd))` to avoid the ownership capture
 of the original handle.
@@ -341,7 +336,7 @@ function open(h::OS_HANDLE)
     elseif t == UV_TTY
         io = TTY(h)
     elseif t == UV_TCP
-        Sockets = require(PkgId(UUID((0x6462fe0b_24de_5631, 0x8697_dd941f90decc)), "Sockets"))
+        Sockets = require_stdlib(PkgId(UUID((0x6462fe0b_24de_5631, 0x8697_dd941f90decc)), "Sockets"))
         io = Sockets.TCPSocket(h)
     elseif t == UV_NAMED_PIPE
         io = PipeEndpoint(h)
@@ -378,7 +373,7 @@ end
 
 function isopen(x::Union{LibuvStream, LibuvServer})
     if x.status == StatusUninit || x.status == StatusInit || x.handle === C_NULL
-        throw(ArgumentError("$x is not initialized"))
+        throw(ArgumentError("stream not initialized"))
     end
     return x.status != StatusClosed
 end
@@ -436,7 +431,10 @@ end
 
 function closewrite(s::LibuvStream)
     iolock_begin()
-    check_open(s)
+    if !iswritable(s)
+        iolock_end()
+        return
+    end
     req = Libc.malloc(_sizeof_uv_shutdown)
     uv_req_set_data(req, C_NULL) # in case we get interrupted before arriving at the wait call
     err = ccall(:uv_shutdown, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}),
@@ -450,14 +448,16 @@ function closewrite(s::LibuvStream)
     sigatomic_begin()
     uv_req_set_data(req, ct)
     iolock_end()
-    status = try
+    local status
+    try
         sigatomic_end()
-        wait()::Cint
+        status = wait()::Cint
+        sigatomic_begin()
     finally
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we won't get spurious notifications later
@@ -564,8 +564,14 @@ displaysize(io::IO) = displaysize()
 displaysize() = (parse(Int, get(ENV, "LINES",   "24")),
                  parse(Int, get(ENV, "COLUMNS", "80")))::Tuple{Int, Int}
 
+# This is a fancy way to make de-specialize a call to `displaysize(io::IO)`
+# which is unfortunately invalidated by REPL
+#  (https://github.com/JuliaLang/julia/issues/56080)
+#
+# This makes the call less efficient, but avoids being invalidated by REPL.
+displaysize_(io::IO) = Base.invoke_in_world(Base.tls_world_age(), displaysize, io)::Tuple{Int,Int}
+
 function displaysize(io::TTY)
-    # A workaround for #34620 and #26687 (this still has the TOCTOU problem).
     check_open(io)
 
     local h::Int, w::Int
@@ -588,6 +594,7 @@ function displaysize(io::TTY)
     s1 = Ref{Int32}(0)
     s2 = Ref{Int32}(0)
     iolock_begin()
+    check_open(io)
     Base.uv_error("size (TTY)", ccall(:uv_tty_get_winsize,
                                       Int32, (Ptr{Cvoid}, Ptr{Int32}, Ptr{Int32}),
                                       io, s1, s2) != 0)
@@ -603,9 +610,10 @@ end
 ## BUFFER ##
 ## Allocate space in buffer (for immediate use)
 function alloc_request(buffer::IOBuffer, recommended_size::UInt)
-    ensureroom(buffer, Int(recommended_size))
+    ensureroom(buffer, recommended_size)
     ptr = buffer.append ? buffer.size + 1 : buffer.ptr
-    nb = min(length(buffer.data), buffer.maxsize) - ptr + 1
+    start_offset = ptr - 1
+    nb = max(0, min(length(buffer.data) - start_offset, buffer.maxsize - (start_offset - get_offset(buffer))))
     return (Ptr{Cvoid}(pointer(buffer.data, ptr)), nb)
 end
 
@@ -616,6 +624,7 @@ function notify_filled(buffer::IOBuffer, nread::Int)
         buffer.size += nread
     else
         buffer.ptr += nread
+        buffer.size = max(buffer.size, buffer.ptr - 1)
     end
     nothing
 end
@@ -740,24 +749,42 @@ mutable struct Pipe <: AbstractPipe
 end
 
 """
-Construct an uninitialized Pipe object.
+    Pipe()
 
-The appropriate end of the pipe will be automatically initialized if
-the object is used in process spawning. This can be useful to easily
-obtain references in process pipelines, e.g.:
+Construct an uninitialized Pipe object, especially for IO communication between multiple processes.
+
+The appropriate end of the pipe will be automatically initialized if the object is used in
+process spawning. This can be useful to easily obtain references in process pipelines, e.g.:
 
 ```
 julia> err = Pipe()
 
 # After this `err` will be initialized and you may read `foo`'s
-# stderr from the `err` pipe.
+# stderr from the `err` pipe, or pass `err` to other pipelines.
 julia> run(pipeline(pipeline(`foo`, stderr=err), `cat`), wait=false)
+
+# Now destroy the write half of the pipe, so that the read half will get EOF
+julia> closewrite(err)
+
+julia> read(err, String)
+"stderr messages"
 ```
+
+See also [`Base.link_pipe!`](@ref).
 """
 Pipe() = Pipe(PipeEndpoint(), PipeEndpoint())
 pipe_reader(p::Pipe) = p.out
 pipe_writer(p::Pipe) = p.in
 
+"""
+    link_pipe!(pipe; reader_supports_async=false, writer_supports_async=false)
+
+Initialize `pipe` and link the `in` endpoint to the `out` endpoint. The keyword
+arguments `reader_supports_async`/`writer_supports_async` correspond to
+`OVERLAPPED` on Windows and `O_NONBLOCK` on POSIX systems. They should be `true`
+unless they'll be used by an external program (e.g. the output of a command
+executed with [`run`](@ref)).
+"""
 function link_pipe!(pipe::Pipe;
                     reader_supports_async = false,
                     writer_supports_async = false)
@@ -773,6 +800,7 @@ show(io::IO, stream::Pipe) = print(io,
     uv_status_string(stream.out), ", ",
     bytesavailable(stream), " bytes waiting)")
 
+closewrite(pipe::Pipe) = close(pipe.in)
 
 ## Functions for PipeEndpoint and PipeServer ##
 
@@ -910,8 +938,8 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
     if bytesavailable(sbuf) >= nb
         nread = readbytes!(sbuf, a, nb)
     else
-        newbuf = PipeBuffer(a, maxsize=nb)
-        newbuf.size = 0 # reset the write pointer to the beginning
+        initsize = length(a)
+        newbuf = _truncated_pipebuffer(a; maxsize=nb)
         nread = try
             s.buffer = newbuf
             write(newbuf, sbuf)
@@ -920,7 +948,8 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
         finally
             s.buffer = sbuf
         end
-        compact(newbuf)
+        _take!(a, _unsafe_take!(newbuf))
+        length(a) >= initsize || resize!(a, initsize)
     end
     iolock_end()
     return nread
@@ -957,8 +986,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt)
     if bytesavailable(sbuf) >= nb
         unsafe_read(sbuf, p, nb)
     else
-        newbuf = PipeBuffer(unsafe_wrap(Array, p, nb), maxsize=Int(nb))
-        newbuf.size = 0 # reset the write pointer to the beginning
+        newbuf = _truncated_pipebuffer(unsafe_wrap(Array, p, nb); maxsize=Int(nb))
         try
             s.buffer = newbuf
             write(newbuf, sbuf)
@@ -995,7 +1023,7 @@ function readavailable(this::LibuvStream)
     return bytes
 end
 
-function readuntil(x::LibuvStream, c::UInt8; keep::Bool=false)
+function copyuntil(out::IO, x::LibuvStream, c::UInt8; keep::Bool=false)
     iolock_begin()
     buf = x.buffer
     @assert buf.seekable == false
@@ -1025,9 +1053,9 @@ function readuntil(x::LibuvStream, c::UInt8; keep::Bool=false)
             end
         end
     end
-    bytes = readuntil(buf, c, keep=keep)
+    copyuntil(out, buf, c; keep)
     iolock_end()
-    return bytes
+    return out
 end
 
 uv_write(s::LibuvStream, p::Vector{UInt8}) = GC.@preserve p uv_write(s, pointer(p), UInt(sizeof(p)))
@@ -1040,17 +1068,19 @@ function uv_write(s::LibuvStream, p::Ptr{UInt8}, n::UInt)
     sigatomic_begin()
     uv_req_set_data(uvw, ct)
     iolock_end()
-    status = try
+    local status
+    try
         sigatomic_end()
         # wait for the last chunk to complete (or error)
         # assume that any errors would be sticky,
         # (so we don't need to monitor the error status of the intermediate writes)
-        wait()::Cint
+        status = wait()::Cint
+        sigatomic_begin()
     finally
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
@@ -1215,7 +1245,15 @@ function _redirect_io_libc(stream, unix_fd::Int)
                 -10 - unix_fd, Libc._get_osfhandle(posix_fd))
         end
     end
-    dup(posix_fd, RawFD(unix_fd))
+    GC.@preserve stream dup(posix_fd, RawFD(unix_fd))
+    nothing
+end
+function _redirect_io_cglobal(handle::Union{LibuvStream, IOStream, Nothing}, unix_fd::Int)
+    c_sym = unix_fd == 0 ? cglobal(:jl_uv_stdin, Ptr{Cvoid}) :
+            unix_fd == 1 ? cglobal(:jl_uv_stdout, Ptr{Cvoid}) :
+            unix_fd == 2 ? cglobal(:jl_uv_stderr, Ptr{Cvoid}) :
+            C_NULL
+    c_sym == C_NULL || unsafe_store!(c_sym, handle === nothing ? Ptr{Cvoid}(unix_fd) : handle.handle)
     nothing
 end
 function _redirect_io_global(io, unix_fd::Int)
@@ -1226,11 +1264,7 @@ function _redirect_io_global(io, unix_fd::Int)
 end
 function (f::RedirectStdStream)(handle::Union{LibuvStream, IOStream})
     _redirect_io_libc(handle, f.unix_fd)
-    c_sym = f.unix_fd == 0 ? cglobal(:jl_uv_stdin, Ptr{Cvoid}) :
-            f.unix_fd == 1 ? cglobal(:jl_uv_stdout, Ptr{Cvoid}) :
-            f.unix_fd == 2 ? cglobal(:jl_uv_stderr, Ptr{Cvoid}) :
-            C_NULL
-    c_sym == C_NULL || unsafe_store!(c_sym, handle.handle)
+    _redirect_io_cglobal(handle, f.unix_fd)
     _redirect_io_global(handle, f.unix_fd)
     return handle
 end
@@ -1239,6 +1273,7 @@ function (f::RedirectStdStream)(::DevNull)
     handle = open(nulldev, write=f.writable)
     _redirect_io_libc(handle, f.unix_fd)
     close(handle) # handle has been dup'ed in _redirect_io_libc
+    _redirect_io_cglobal(nothing, f.unix_fd)
     _redirect_io_global(devnull, f.unix_fd)
     return devnull
 end
@@ -1273,7 +1308,7 @@ the pipe.
 
 !!! note
     `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
-    `Pipe`, socket, or `devnull`.
+    [`Pipe`](@ref), socket, or `devnull`.
 
 See also [`redirect_stdio`](@ref).
 """
@@ -1286,7 +1321,7 @@ Like [`redirect_stdout`](@ref), but for [`stderr`](@ref).
 
 !!! note
     `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
-    `Pipe`, socket, or `devnull`.
+    [`Pipe`](@ref), socket, or `devnull`.
 
 See also [`redirect_stdio`](@ref).
 """
@@ -1300,7 +1335,7 @@ Note that the direction of the stream is reversed.
 
 !!! note
     `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
-    `Pipe`, socket, or `devnull`.
+    [`Pipe`](@ref), socket, or `devnull`.
 
 See also [`redirect_stdio`](@ref).
 """
@@ -1310,7 +1345,8 @@ redirect_stdin
     redirect_stdio(;stdin=stdin, stderr=stderr, stdout=stdout)
 
 Redirect a subset of the streams `stdin`, `stderr`, `stdout`.
-Each argument must be an `IOStream`, `TTY`, `Pipe`, socket, or `devnull`.
+Each argument must be an `IOStream`, `TTY`, [`Pipe`](@ref), socket, or
+`devnull`.
 
 !!! compat "Julia 1.7"
     `redirect_stdio` requires Julia 1.7 or later.
@@ -1330,7 +1366,7 @@ call `f()` and restore each stream.
 Possible values for each stream are:
 * `nothing` indicating the stream should not be redirected.
 * `path::AbstractString` redirecting the stream to the file at `path`.
-* `io` an `IOStream`, `TTY`, `Pipe`, socket, or `devnull`.
+* `io` an `IOStream`, `TTY`, [`Pipe`](@ref), socket, or `devnull`.
 
 # Examples
 ```julia-repl
@@ -1489,7 +1525,7 @@ closewrite(s::BufferStream) = close(s)
 function close(s::BufferStream)
     lock(s.cond) do
         s.status = StatusClosed
-        notify(s.cond)
+        notify(s.cond) # aka flush
         nothing
     end
 end
@@ -1523,6 +1559,63 @@ function wait_readnb(s::BufferStream, nb::Int)
     end
 end
 
+function readavailable(this::BufferStream)
+    bytes = lock(this.cond) do
+        wait_readnb(this, 1)
+        buf = this.buffer
+        @assert buf.seekable == false
+        take!(buf)
+    end
+    return bytes
+end
+
+function read(stream::BufferStream)
+    bytes = lock(stream.cond) do
+        wait_close(stream)
+        take!(stream.buffer)
+    end
+    return bytes
+end
+
+function readbytes!(s::BufferStream, a::Vector{UInt8}, nb::Int)
+    sbuf = s.buffer
+    @assert sbuf.seekable == false
+    @assert sbuf.maxsize >= nb
+
+    function wait_locked(s, buf, nb)
+        while bytesavailable(buf) < nb
+            s.readerror === nothing || throw(s.readerror)
+            isopen(s) || break
+            s.status != StatusEOF || break
+            wait_readnb(s, nb)
+        end
+    end
+
+    bytes = lock(s.cond) do
+        if nb <= SZ_UNBUFFERED_IO # Under this limit we are OK with copying the array from the stream's buffer
+            wait_locked(s, sbuf, nb)
+        end
+        if bytesavailable(sbuf) >= nb
+            nread = readbytes!(sbuf, a, nb)
+        else
+            initsize = length(a)
+            newbuf = _truncated_pipebuffer(a; maxsize=nb)
+            nread = try
+                s.buffer = newbuf
+                write(newbuf, sbuf)
+                wait_locked(s, newbuf, nb)
+                bytesavailable(newbuf)
+            finally
+                s.buffer = sbuf
+            end
+            _take!(a, _unsafe_take!(newbuf))
+            length(a) >= initsize || resize!(a, initsize)
+        end
+        return nread
+    end
+    return bytes
+end
+
 show(io::IO, s::BufferStream) = print(io, "BufferStream(bytes waiting=", bytesavailable(s.buffer), ", isopen=", isopen(s), ")")
 
 function readuntil(s::BufferStream, c::UInt8; keep::Bool=false)
@@ -1549,6 +1642,7 @@ stop_reading(s::BufferStream) = nothing
 write(s::BufferStream, b::UInt8) = write(s, Ref{UInt8}(b))
 function unsafe_write(s::BufferStream, p::Ptr{UInt8}, nb::UInt)
     nwrite = lock(s.cond) do
+        check_open(s)
         rv = unsafe_write(s.buffer, p, nb)
         s.buffer_writes || notify(s.cond)
         rv
@@ -1569,9 +1663,18 @@ end
 buffer_writes(s::BufferStream, bufsize=0) = (s.buffer_writes = true; s)
 function flush(s::BufferStream)
     lock(s.cond) do
+        check_open(s)
         notify(s.cond)
         nothing
     end
 end
 
 skip(s::BufferStream, n) = skip(s.buffer, n)
+
+function reseteof(s::BufferStream)
+    lock(s.cond) do
+        s.status = StatusOpen
+        nothing
+    end
+    nothing
+end
diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl
new file mode 100644
index 0000000000000..89cba6db42c8d
--- /dev/null
+++ b/base/strings/annotated.jl
@@ -0,0 +1,568 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+const Annotation = NamedTuple{(:label, :value), Tuple{Symbol, Any}}
+const RegionAnnotation = NamedTuple{(:region, :label, :value), Tuple{UnitRange{Int}, Symbol, Any}}
+
+"""
+    AnnotatedString{S <: AbstractString} <: AbstractString
+
+A string with metadata, in the form of annotated regions.
+
+More specifically, this is a simple wrapper around any other
+[`AbstractString`](@ref) that allows for regions of the wrapped string to be
+annotated with labeled values.
+
+```text
+                           C
+                    ┌──────┸─────────┐
+  "this is an example annotated string"
+  └──┰────────┼─────┘         │
+     A        └─────┰─────────┘
+                    B
+```
+
+The above diagram represents a `AnnotatedString` where three ranges have been
+annotated (labeled `A`, `B`, and `C`). Each annotation holds a label (`Symbol`)
+and a value (`Any`). These three pieces of information are held as a
+`$RegionAnnotation`.
+
+Labels do not need to be unique, the same region can hold multiple annotations
+with the same label.
+
+Code written for `AnnotatedString`s in general should conserve the following
+properties:
+- Which characters an annotation is applied to
+- The order in which annotations are applied to each character
+
+Additional semantics may be introduced by specific uses of `AnnotatedString`s.
+
+A corollary of these rules is that adjacent, consecutively placed, annotations
+with identical labels and values are equivalent to a single annotation spanning
+the combined range.
+
+See also [`AnnotatedChar`](@ref), [`annotatedstring`](@ref),
+[`annotations`](@ref), and [`annotate!`](@ref).
+
+# Constructors
+
+```julia
+AnnotatedString(s::S<:AbstractString) -> AnnotatedString{S}
+AnnotatedString(s::S<:AbstractString, annotations::Vector{$RegionAnnotation})
+```
+
+A AnnotatedString can also be created with [`annotatedstring`](@ref), which acts much
+like [`string`](@ref) but preserves any annotations present in the arguments.
+
+# Examples
+
+```jldoctest; setup=:(using Base: AnnotatedString)
+julia> AnnotatedString("this is an example annotated string",
+                    [(1:18, :A, 1), (12:28, :B, 2), (18:35, :C, 3)])
+"this is an example annotated string"
+```
+"""
+struct AnnotatedString{S <: AbstractString} <: AbstractString
+    string::S
+    annotations::Vector{RegionAnnotation}
+end
+
+"""
+    AnnotatedChar{S <: AbstractChar} <: AbstractChar
+
+A Char with annotations.
+
+More specifically, this is a simple wrapper around any other
+[`AbstractChar`](@ref), which holds a list of arbitrary labelled annotations
+(`$Annotation`) with the wrapped character.
+
+See also: [`AnnotatedString`](@ref), [`annotatedstring`](@ref), `annotations`,
+and `annotate!`.
+
+# Constructors
+
+```julia
+AnnotatedChar(s::S) -> AnnotatedChar{S}
+AnnotatedChar(s::S, annotations::Vector{$Annotation})
+```
+
+# Examples
+
+```jldoctest; setup=:(using Base: AnnotatedChar)
+julia> AnnotatedChar('j', [(:label, 1)])
+'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase)
+```
+"""
+struct AnnotatedChar{C <: AbstractChar} <: AbstractChar
+    char::C
+    annotations::Vector{Annotation}
+end
+
+## Constructors ##
+
+# When called with overly-specialised arguments
+
+AnnotatedString(s::AbstractString, annots::Vector) =
+    AnnotatedString(s, Vector{RegionAnnotation}(annots))
+
+AnnotatedString(s::AbstractString, annots) =
+    AnnotatedString(s, collect(RegionAnnotation, annots))
+
+AnnotatedChar(c::AbstractChar, annots::Vector) =
+    AnnotatedChar(c, Vector{Annotation}(annots))
+
+AnnotatedChar(c::AbstractChar, annots) =
+    AnnotatedChar(c, collect(Annotation, annots))
+
+# Constructors to avoid recursive wrapping
+
+AnnotatedString(s::AnnotatedString, annots::Vector{RegionAnnotation}) =
+    AnnotatedString(s.string, vcat(s.annotations, annots))
+
+AnnotatedChar(c::AnnotatedChar, annots::Vector{Annotation}) =
+    AnnotatedChar(c.char, vcat(c.annotations, Vector{Annotation}(annots)))
+
+# To avoid pointless overhead
+String(s::AnnotatedString{String}) = s.string
+
+## Conversion/promotion ##
+
+convert(::Type{AnnotatedString}, s::AnnotatedString) = s
+convert(::Type{AnnotatedString{S}}, s::S) where {S <: AbstractString} =
+    AnnotatedString(s, Vector{RegionAnnotation}())
+convert(::Type{AnnotatedString}, s::S) where {S <: AbstractString} =
+    convert(AnnotatedString{S}, s)
+AnnotatedString(s::S) where {S <: AbstractString} = convert(AnnotatedString{S}, s)
+
+convert(::Type{AnnotatedChar}, c::AnnotatedChar) = c
+convert(::Type{AnnotatedChar{C}}, c::C) where { C <: AbstractChar } =
+    AnnotatedChar{C}(c, Vector{Annotation}())
+convert(::Type{AnnotatedChar}, c::C) where { C <: AbstractChar } =
+    convert(AnnotatedChar{C}, c)
+
+AnnotatedChar(c::AbstractChar) = convert(AnnotatedChar, c)
+AnnotatedChar(c::UInt32) = convert(AnnotatedChar, Char(c))
+AnnotatedChar{C}(c::UInt32) where {C <: AbstractChar} = convert(AnnotatedChar, C(c))
+
+promote_rule(::Type{<:AnnotatedString}, ::Type{<:AbstractString}) = AnnotatedString
+
+## AbstractString interface ##
+
+ncodeunits(s::AnnotatedString) = ncodeunits(s.string)::Int
+codeunits(s::AnnotatedString) = codeunits(s.string)
+codeunit(s::AnnotatedString) = codeunit(s.string)
+codeunit(s::AnnotatedString, i::Integer) = codeunit(s.string, i)
+isvalid(s::AnnotatedString, i::Integer) = isvalid(s.string, i)::Bool
+@propagate_inbounds iterate(s::AnnotatedString, i::Integer=firstindex(s)) =
+    if i <= lastindex(s.string); (s[i], nextind(s, i)) end
+eltype(::Type{<:AnnotatedString{S}}) where {S} = AnnotatedChar{eltype(S)}
+firstindex(s::AnnotatedString) = firstindex(s.string)
+lastindex(s::AnnotatedString) = lastindex(s.string)
+
+function getindex(s::AnnotatedString, i::Integer)
+    @boundscheck checkbounds(s, i)
+    @inbounds if isvalid(s, i)
+        AnnotatedChar(s.string[i], Annotation[(; label, value) for (; label, value) in annotations(s, i)])
+    else
+        string_index_err(s, i)
+    end
+end
+
+# To make `AnnotatedString`s repr-evaluable, we need to override
+# the generic `AbstractString` 2-arg show method.
+
+function show(io::IO, s::A) where {A <: AnnotatedString}
+    show(io, A)
+    print(io, '(')
+    show(io, s.string)
+    print(io, ", ")
+    tupanns = Vector{Tuple{UnitRange{Int}, Symbol, Any}}(map(values, s.annotations))
+    show(IOContext(io, :typeinfo => typeof(tupanns)), tupanns)
+    print(io, ')')
+end
+
+# But still use the generic `AbstractString` fallback for the 3-arg show.
+show(io::IO, ::MIME"text/plain", s::AnnotatedString) =
+    invoke(show, Tuple{IO, AbstractString}, io, s)
+
+## AbstractChar interface ##
+
+ncodeunits(c::AnnotatedChar) = ncodeunits(c.char)
+codepoint(c::AnnotatedChar) = codepoint(c.char)
+
+# Avoid the iteration fallback with comparison
+cmp(a::AnnotatedString, b::AbstractString) = cmp(a.string, b)
+cmp(a::AbstractString, b::AnnotatedString) = cmp(a, b.string)
+# To avoid method ambiguity
+cmp(a::AnnotatedString, b::AnnotatedString) = cmp(a.string, b.string)
+
+==(a::AnnotatedString, b::AnnotatedString) =
+    a.string == b.string && a.annotations == b.annotations
+
+==(a::AnnotatedString, b::AbstractString) = isempty(a.annotations) && a.string == b
+==(a::AbstractString, b::AnnotatedString) = isempty(b.annotations) && a == b.string
+
+# To prevent substring equality from hitting the generic fallback
+
+function ==(a::SubString{<:AnnotatedString}, b::SubString{<:AnnotatedString})
+    SubString(a.string.string, a.offset, a.ncodeunits, Val(:noshift)) ==
+        SubString(b.string.string, b.offset, b.ncodeunits, Val(:noshift)) &&
+        annotations(a) == annotations(b)
+end
+
+==(a::SubString{<:AnnotatedString}, b::AnnotatedString) =
+    annotations(a) == annotations(b) && SubString(a.string.string, a.offset, a.ncodeunits, Val(:noshift)) == b.string
+
+==(a::SubString{<:AnnotatedString}, b::AbstractString) =
+    isempty(annotations(a)) && SubString(a.string.string, a.offset, a.ncodeunits, Val(:noshift)) == b
+
+==(a::AbstractString, b::SubString{<:AnnotatedString}) = b == a
+
+==(a::AnnotatedString, b::SubString{<:AnnotatedString}) = b == a
+
+"""
+    annotatedstring(values...)
+
+Create a `AnnotatedString` from any number of `values` using their
+[`print`](@ref)ed representation.
+
+This acts like [`string`](@ref), but takes care to preserve any annotations
+present (in the form of [`AnnotatedString`](@ref) or [`AnnotatedChar`](@ref) values).
+
+See also [`AnnotatedString`](@ref) and [`AnnotatedChar`](@ref).
+
+## Examples
+
+```jldoctest; setup=:(using Base: AnnotatedString, annotatedstring)
+julia> annotatedstring("now an AnnotatedString")
+"now an AnnotatedString"
+
+julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label, 1)]), ", and unannotated")
+"annotated, and unannotated"
+```
+"""
+function annotatedstring(xs...)
+    isempty(xs) && return AnnotatedString("")
+    size = mapreduce(_str_sizehint, +, xs)
+    buf = IOBuffer(sizehint=size)
+    s = IOContext(buf, :color => true)
+    annotations = Vector{RegionAnnotation}()
+    for x in xs
+        size = filesize(s.io)
+        if x isa AnnotatedString
+            for annot in x.annotations
+                push!(annotations, setindex(annot, annot.region .+ size, :region))
+            end
+            print(s, x.string)
+        elseif x isa SubString{<:AnnotatedString}
+            for annot in x.string.annotations
+                start, stop = first(annot.region), last(annot.region)
+                if start <= x.offset + x.ncodeunits && stop > x.offset
+                    rstart = size + max(0, start - x.offset - 1) + 1
+                    rstop = size + min(stop, x.offset + x.ncodeunits) - x.offset
+                    push!(annotations, setindex(annot, rstart:rstop, :region))
+                end
+            end
+            print(s, SubString(x.string.string, x.offset, x.ncodeunits, Val(:noshift)))
+        elseif x isa AnnotatedChar
+            for annot in x.annotations
+                push!(annotations, (region=1+size:1+size, annot...))
+            end
+            print(s, x.char)
+        else
+            print(s, x)
+        end
+    end
+    str = takestring!(buf)
+    AnnotatedString(str, annotations)
+end
+
+annotatedstring(s::AnnotatedString) = s
+annotatedstring(c::AnnotatedChar) =
+    AnnotatedString(string(c.char), [(region=1:ncodeunits(c), annot...) for annot in c.annotations])
+
+AnnotatedString(s::SubString{<:AnnotatedString}) = annotatedstring(s)
+
+function repeat(str::AnnotatedString, r::Integer)
+    r == 0 && return one(AnnotatedString)
+    r == 1 && return str
+    unannot = repeat(str.string, r)
+    annotations = Vector{RegionAnnotation}()
+    len = ncodeunits(str)
+    fullregion = firstindex(str):lastindex(str)
+    if isempty(str.annotations)
+    elseif allequal(a -> a.region, str.annotations) && first(str.annotations).region == fullregion
+        newfullregion = firstindex(unannot):lastindex(unannot)
+        for annot in str.annotations
+            push!(annotations, setindex(annot, newfullregion, :region))
+        end
+    else
+        for offset in 0:len:(r-1)*len
+            for annot in str.annotations
+                push!(annotations, setindex(annot, annot.region .+ offset, :region))
+            end
+        end
+    end
+    AnnotatedString(unannot, annotations)
+end
+
+repeat(str::SubString{<:AnnotatedString}, r::Integer) =
+    repeat(AnnotatedString(str), r)
+
+function repeat(c::AnnotatedChar, r::Integer)
+    str = repeat(c.char, r)
+    fullregion = firstindex(str):lastindex(str)
+    AnnotatedString(str, [(region=fullregion, annot...) for annot in c.annotations])
+end
+
+function reverse(s::AnnotatedString)
+    lastind = lastindex(s)
+    AnnotatedString(
+        reverse(s.string),
+        [setindex(annot,
+                  UnitRange(1 + lastind - last(annot.region),
+                            1 + lastind - first(annot.region)),
+                  :region)
+         for annot in s.annotations])
+end
+
+# TODO optimise?
+reverse(s::SubString{<:AnnotatedString}) = reverse(AnnotatedString(s))
+
+# TODO implement `replace(::AnnotatedString, ...)`
+
+## End AbstractString interface ##
+
+function _annotate!(annlist::Vector{RegionAnnotation}, region::UnitRange{Int}, label::Symbol, @nospecialize(value::Any))
+    if value === nothing
+        deleteat!(annlist, findall(ann -> ann.region == region && ann.label === label, annlist))
+    else
+        push!(annlist, RegionAnnotation((; region, label, value)))
+    end
+end
+
+"""
+    annotate!(str::AnnotatedString, [range::UnitRange{Int}], label::Symbol, value)
+    annotate!(str::SubString{AnnotatedString}, [range::UnitRange{Int}], label::Symbol, value)
+
+Annotate a `range` of `str` (or the entire string) with a labeled value `(label, value)`.
+To remove existing `label` annotations, use a value of `nothing`.
+
+The order in which annotations are applied to `str` is semantically meaningful,
+as described in [`AnnotatedString`](@ref).
+"""
+annotate!(s::AnnotatedString, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) =
+    (_annotate!(s.annotations, range, label, val); s)
+
+annotate!(ss::AnnotatedString, label::Symbol, @nospecialize(val::Any)) =
+    annotate!(ss, firstindex(ss):lastindex(ss), label, val)
+
+annotate!(s::SubString{<:AnnotatedString}, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) =
+    (annotate!(s.string, s.offset .+ (range), label, val); s)
+
+annotate!(s::SubString{<:AnnotatedString}, label::Symbol, @nospecialize(val::Any)) =
+    (annotate!(s.string, s.offset .+ (1:s.ncodeunits), label, val); s)
+
+"""
+    annotate!(char::AnnotatedChar, label::Symbol, value::Any)
+
+Annotate `char` with the labeled value `(label, value)`.
+"""
+annotate!(c::AnnotatedChar, label::Symbol, @nospecialize(val::Any)) =
+    (push!(c.annotations, Annotation((; label, val))); c)
+
+"""
+    annotations(str::Union{AnnotatedString, SubString{AnnotatedString}},
+                [position::Union{Integer, UnitRange}]) ->
+        Vector{$RegionAnnotation}
+
+Get all annotations that apply to `str`. Should `position` be provided, only
+annotations that overlap with `position` will be returned.
+
+Annotations are provided together with the regions they apply to, in the form of
+a vector of region–annotation tuples.
+
+In accordance with the semantics documented in [`AnnotatedString`](@ref), the
+order of annotations returned matches the order in which they were applied.
+
+See also: [`annotate!`](@ref).
+"""
+annotations(s::AnnotatedString) = s.annotations
+
+function annotations(s::SubString{<:AnnotatedString})
+    RegionAnnotation[
+        setindex(ann, first(ann.region)-s.offset:last(ann.region)-s.offset, :region)
+        for ann in annotations(s.string, s.offset+1:s.offset+s.ncodeunits)]
+end
+
+function annotations(s::AnnotatedString, pos::UnitRange{<:Integer})
+    # TODO optimise
+    RegionAnnotation[
+        setindex(ann, max(first(pos), first(ann.region)):min(last(pos), last(ann.region)), :region)
+        for ann in s.annotations if !isempty(intersect(pos, ann.region))]
+end
+
+annotations(s::AnnotatedString, pos::Integer) = annotations(s, pos:pos)
+
+annotations(s::SubString{<:AnnotatedString}, pos::Integer) =
+    annotations(s.string, s.offset + pos)
+
+annotations(s::SubString{<:AnnotatedString}, pos::UnitRange{<:Integer}) =
+    annotations(s.string, first(pos)+s.offset:last(pos)+s.offset)
+
+"""
+    annotations(chr::AnnotatedChar)::Vector{$Annotation}
+
+Get all annotations of `chr`, in the form of a vector of annotation pairs.
+"""
+annotations(c::AnnotatedChar) = c.annotations
+
+## Character transformation helper function, c.f. `unicode.jl`.
+
+"""
+    annotated_chartransform(f::Function, str::AnnotatedString, state=nothing)
+
+Transform every character in `str` with `f`, adjusting annotation regions as
+appropriate. `f` must take one of two forms, either:
+- `f(c::Char) -> Char`, or
+- `f(c::Char, state) -> (Char, state)`.
+
+This works by comparing the number of code units of each character before and
+after transforming with `f`, recording and aggregating any differences, then
+applying them to the annotation regions.
+
+Returns an `AnnotatedString{String}` (regardless of the original underling
+string type of `str`).
+"""
+function annotated_chartransform(f::Function, str::AnnotatedString, state=nothing)
+    outstr = IOBuffer()
+    annots = RegionAnnotation[]
+    bytepos = firstindex(str) - 1
+    offsets = [bytepos => 0]
+    for c in str.string
+        oldnb = ncodeunits(c)
+        bytepos += oldnb
+        if isnothing(state)
+            c = f(c)
+        else
+            c, state = f(c, state)
+        end
+        nb = write(outstr, c)
+        if nb != oldnb
+            push!(offsets, bytepos => last(last(offsets)) + nb - oldnb)
+        end
+    end
+    for annot in str.annotations
+        start, stop = first(annot.region), last(annot.region)
+        start_offset = last(offsets[findlast(<=(start) ∘ first, offsets)::Int])
+        stop_offset  = last(offsets[findlast(<=(stop) ∘ first, offsets)::Int])
+        push!(annots, setindex(annot, (start + start_offset):(stop + stop_offset), :region))
+    end
+    AnnotatedString(takestring!(outstr), annots)
+end
+
+struct RegionIterator{S <: AbstractString}
+    str::S
+    regions::Vector{UnitRange{Int}}
+    annotations::Vector{Vector{Annotation}}
+end
+
+Base.length(si::RegionIterator) = length(si.regions)
+
+Base.@propagate_inbounds function Base.iterate(si::RegionIterator, i::Integer=1)
+    if i <= length(si.regions)
+        @inbounds ((SubString(si.str, si.regions[i]), si.annotations[i]), i+1)
+    end
+end
+
+Base.eltype(::RegionIterator{S}) where { S <: AbstractString} =
+    Tuple{SubString{S}, Vector{Annotation}}
+
+"""
+    eachregion(s::AnnotatedString{S})
+    eachregion(s::SubString{AnnotatedString{S}})
+
+Identify the contiguous substrings of `s` with a constant annotations, and return
+an iterator which provides each substring and the applicable annotations as a
+`Tuple{SubString{S}, Vector{$Annotation}}`.
+
+# Examples
+
+```jldoctest; setup=:(using Base: AnnotatedString, eachregion)
+julia> collect(eachregion(AnnotatedString(
+           "hey there", [(1:3, :face, :bold),
+                         (5:9, :face, :italic)])))
+3-element Vector{Tuple{SubString{String}, Vector{$Annotation}}}:
+ ("hey", [$Annotation((:face, :bold))])
+ (" ", [])
+ ("there", [$Annotation((:face, :italic))])
+```
+"""
+function eachregion(s::AnnotatedString, subregion::UnitRange{Int}=firstindex(s):lastindex(s))
+    isempty(s) || isempty(subregion) &&
+        return RegionIterator(s.string, UnitRange{Int}[], Vector{Annotation}[])
+    events = annotation_events(s, subregion)
+    isempty(events) && return RegionIterator(s.string, [subregion], [Annotation[]])
+    annotvals = Annotation[
+        (; label, value) for (; label, value) in annotations(s)]
+    regions = Vector{UnitRange{Int}}()
+    annots = Vector{Vector{Annotation}}()
+    pos = first(events).pos
+    if pos > first(subregion)
+        push!(regions, thisind(s, first(subregion)):prevind(s, pos))
+        push!(annots, [])
+    end
+    activelist = Int[]
+    for event in events
+        if event.pos != pos
+            push!(regions, pos:prevind(s, event.pos))
+            push!(annots, annotvals[activelist])
+            pos = event.pos
+        end
+        if event.active
+            insert!(activelist, searchsortedfirst(activelist, event.index), event.index)
+        else
+            deleteat!(activelist, searchsortedfirst(activelist, event.index))
+        end
+    end
+    if last(events).pos < nextind(s, last(subregion))
+        push!(regions, last(events).pos:thisind(s, last(subregion)))
+        push!(annots, [])
+    end
+    RegionIterator(s.string, regions, annots)
+end
+
+function eachregion(s::SubString{<:AnnotatedString}, pos::UnitRange{Int}=firstindex(s):lastindex(s))
+    if isempty(s)
+        RegionIterator(s.string, Vector{UnitRange{Int}}(), Vector{Vector{Annotation}}())
+    else
+        eachregion(s.string, first(pos)+s.offset:last(pos)+s.offset)
+    end
+end
+
+"""
+    annotation_events(string::AbstractString, annots::Vector{$RegionAnnotation}, subregion::UnitRange{Int})
+    annotation_events(string::AnnotatedString, subregion::UnitRange{Int})
+
+Find all annotation "change events" that occur within a `subregion` of `annots`,
+with respect to `string`. When `string` is styled, `annots` is inferred.
+
+Each change event is given in the form of a `@NamedTuple{pos::Int, active::Bool,
+index::Int}` where `pos` is the position of the event, `active` is a boolean
+indicating whether the annotation is being activated or deactivated, and `index`
+is the index of the annotation in question.
+"""
+function annotation_events(s::AbstractString, annots::Vector{RegionAnnotation}, subregion::UnitRange{Int})
+    events = Vector{NamedTuple{(:pos, :active, :index), Tuple{Int, Bool, Int}}}() # Position, Active?, Annotation index
+    for (i, (; region)) in enumerate(annots)
+        if !isempty(intersect(subregion, region))
+            start, stop = max(first(subregion), first(region)), min(last(subregion), last(region))
+            start <= stop || continue # Currently can't handle empty regions
+            push!(events, (pos=thisind(s, start), active=true, index=i))
+            push!(events, (pos=nextind(s, stop), active=false, index=i))
+        end
+    end
+    sort(events, by=e -> e.pos)
+end
+
+annotation_events(s::AnnotatedString, subregion::UnitRange{Int}) =
+    annotation_events(s.string, annotations(s), subregion)
diff --git a/base/strings/annotated_io.jl b/base/strings/annotated_io.jl
new file mode 100644
index 0000000000000..60c91be24ebfb
--- /dev/null
+++ b/base/strings/annotated_io.jl
@@ -0,0 +1,425 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## AnnotatedIOBuffer
+
+struct AnnotatedIOBuffer <: AbstractPipe
+    io::IOBuffer
+    annotations::Vector{RegionAnnotation}
+end
+
+AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{RegionAnnotation}())
+AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer())
+
+function show(io::IO, aio::AnnotatedIOBuffer)
+    show(io, AnnotatedIOBuffer)
+    size = filesize(aio.io)
+    print(io, '(', size, " byte", ifelse(size == 1, "", "s"), ", ",
+          length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")")
+end
+
+pipe_reader(io::AnnotatedIOBuffer) = io.io
+pipe_writer(io::AnnotatedIOBuffer) = io.io
+
+# Useful `IOBuffer` methods that we don't get from `AbstractPipe`
+position(io::AnnotatedIOBuffer) = position(io.io)
+seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io)
+seekend(io::AnnotatedIOBuffer) = (seekend(io.io); io)
+skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io)
+copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations))
+
+annotations(io::AnnotatedIOBuffer) = io.annotations
+
+annotate!(io::AnnotatedIOBuffer, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) =
+    (_annotate!(io.annotations, range, label, val); io)
+
+function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}})
+    astr = AnnotatedString(astr)
+    offset = position(io.io)
+    eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr))
+    _insert_annotations!(io, astr.annotations)
+    write(io.io, String(astr))
+end
+
+write(io::AnnotatedIOBuffer, c::AnnotatedChar) =
+    write(io, AnnotatedString(string(c), [(region=1:ncodeunits(c), a...) for a in c.annotations]))
+write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x)
+write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s)
+write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b)
+
+function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer)
+    destpos = position(dest)
+    isappending = eof(dest)
+    srcpos = position(src)
+    nb = write(dest.io, src.io)
+    isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb)
+    srcannots = [setindex(annot, max(1 + srcpos, first(annot.region)):last(annot.region), :region)
+                 for annot in src.annotations if first(annot.region) >= srcpos]
+    _insert_annotations!(dest, srcannots, destpos - srcpos)
+    nb
+end
+
+# So that read/writes with `IOContext` (and any similar `AbstractPipe` wrappers)
+# work as expected.
+function write(io::AbstractPipe, s::Union{AnnotatedString, SubString{<:AnnotatedString}})
+    if pipe_writer(io) isa AnnotatedIOBuffer
+        write(pipe_writer(io), s)
+    else
+        invoke(write, Tuple{IO, typeof(s)}, io, s)
+    end::Int
+end
+
+# Can't be part of the `Union` above because it introduces method ambiguities
+function write(io::AbstractPipe, c::AnnotatedChar)
+    if pipe_writer(io) isa AnnotatedIOBuffer
+        write(pipe_writer(io), c)
+    else
+        invoke(write, Tuple{IO, typeof(c)}, io, c)
+    end::Int
+end
+
+function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString}
+    if (start = position(io)) == 0
+        AnnotatedString(read(io.io, T), copy(io.annotations))
+    else
+        annots = [setindex(annot, UnitRange{Int}(max(1, first(annot.region) - start), last(annot.region)-start), :region)
+                  for annot in io.annotations if last(annot.region) > start]
+        AnnotatedString(read(io.io, T), annots)
+    end
+end
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String})
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String})
+
+function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar}
+    pos = position(io)
+    char = read(io.io, T)
+    annots = [NamedTuple{(:label, :value)}(annot) for annot in io.annotations if pos+1 in annot.region]
+    AnnotatedChar(char, annots)
+end
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char})
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char})
+
+function truncate(io::AnnotatedIOBuffer, size::Integer)
+    truncate(io.io, size)
+    filter!(ann -> first(ann.region) <= size, io.annotations)
+    map!(ann -> setindex(ann, first(ann.region):min(size, last(ann.region)), :region),
+         io.annotations, io.annotations)
+    io
+end
+
+"""
+    _clear_annotations_in_region!(annotations::Vector{$RegionAnnotation}, span::UnitRange{Int})
+
+Erase the presence of `annotations` within a certain `span`.
+
+This operates by removing all elements of `annotations` that are entirely
+contained in `span`, truncating ranges that partially overlap, and splitting
+annotations that subsume `span` to just exist either side of `span`.
+"""
+function _clear_annotations_in_region!(annotations::Vector{RegionAnnotation}, span::UnitRange{Int})
+    # Clear out any overlapping pre-existing annotations.
+    filter!(ann -> first(ann.region) < first(span) || last(ann.region) > last(span), annotations)
+    extras = Tuple{Int, RegionAnnotation}[]
+    for i in eachindex(annotations)
+        annot = annotations[i]
+        region = annot.region
+        # Test for partial overlap
+        if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region)
+            annotations[i] =
+                setindex(annot,
+                         if first(region) < first(span)
+                             first(region):first(span)-1
+                         else
+                             last(span)+1:last(region)
+                         end,
+                         :region)
+            # If `span` fits exactly within `region`, then we've only copied over
+            # the beginning overhang, but also need to conserve the end overhang.
+            if first(region) < first(span) && last(span) < last(region)
+                push!(extras, (i, setindex(annot, last(span)+1:last(region), :region)))
+            end
+        end
+    end
+    # Insert any extra entries in the appropriate position
+    for (offset, (i, entry)) in enumerate(extras)
+        insert!(annotations, i + offset, entry)
+    end
+    annotations
+end
+
+"""
+    _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{$RegionAnnotation}, offset::Int = position(io))
+
+Register new `annotations` in `io`, applying an `offset` to their regions.
+
+The largely consists of simply shifting the regions of `annotations` by `offset`
+and pushing them onto `io`'s annotations. However, when it is possible to merge
+the new annotations with recent annotations in accordance with the semantics
+outlined in [`AnnotatedString`](@ref), we do so. More specifically, when there
+is a run of the most recent annotations that are also present as the first
+`annotations`, with the same value and adjacent regions, the new annotations are
+merged into the existing recent annotations by simply extending their range.
+
+This is implemented so that one can say write an `AnnotatedString` to an
+`AnnotatedIOBuffer` one character at a time without needlessly producing a
+new annotation for each character.
+"""
+function _insert_annotations!(annots::Vector{RegionAnnotation}, newannots::Vector{RegionAnnotation}, offset::Int = 0)
+    run = 0
+    if !isempty(annots) && last(last(annots).region) == offset
+        for i in reverse(axes(newannots, 1))
+            annot = newannots[i]
+            first(annot.region) == 1 || continue
+            i <= length(annots) || continue
+            if annot.label == last(annots).label && annot.value == last(annots).value
+                valid_run = true
+                for runlen in 1:i
+                    new = newannots[begin+runlen-1]
+                    old = annots[end-i+runlen]
+                    if last(old.region) != offset || first(new.region) != 1 || old.label != new.label || old.value != new.value
+                        valid_run = false
+                        break
+                    end
+                end
+                if valid_run
+                    run = i
+                    break
+                end
+            end
+        end
+    end
+    for runindex in 0:run-1
+        old_index = lastindex(annots) - run + 1 + runindex
+        old = annots[old_index]
+        new = newannots[begin+runindex]
+        extannot = (region = first(old.region):last(new.region)+offset,
+                    label = old.label,
+                    value = old.value)
+        annots[old_index] = extannot
+    end
+    for index in run+1:lastindex(newannots)
+        annot = newannots[index]
+        start, stop = first(annot.region), last(annot.region)
+        # REVIEW: For some reason, construction of `newannot`
+        # can be a significant contributor to the overall runtime
+        # of this function. For instance, executing:
+        #
+        #     replace(AnnotatedIOBuffer(), S"apple",
+        #             'e' => S"{red:x}", 'p' => S"{green:y}")
+        #
+        # results in 3 calls to `_insert_annotations!`. It takes
+        # ~570ns in total, compared to ~200ns if we push `annot`
+        # instead of `newannot`. Commenting out the `_insert_annotations!`
+        # line reduces the runtime to ~170ns, from which we can infer
+        # that constructing `newannot` is somehow responsible for
+        # a ~30ns -> ~400ns (~13x) increase in runtime!!
+        # This also comes with a marginal increase in allocations
+        # (compared to the commented out version) of 2 -> 14 (250b -> 720b).
+        #
+        # This seems quite strange, but I haven't dug into the generated
+        # LLVM or ASM code. If anybody reading this is interested in checking
+        # this out, that would be brilliant 🙏.
+        #
+        # What I have done is found that "direct tuple reconstruction"
+        # (as below) is several times faster than using `setindex`.
+        newannot = (region = start+offset:stop+offset,
+                    label = annot.label,
+                    value = annot.value)
+        push!(annots, newannot)
+    end
+end
+
+_insert_annotations!(io::AnnotatedIOBuffer, newannots::Vector{RegionAnnotation}, offset::Int = position(io)) =
+    _insert_annotations!(io.annotations, newannots, offset)
+
+# String replacement
+
+# REVIEW: For some reason the `Core.kwcall` indirection seems to cause a
+# substantial slowdown here. If we remove `; count` from the signature
+# and run the sample code above in `_insert_annotations!`, the runtime
+# drops from ~4400ns to ~580ns (~7x faster). I cannot guess why this is.
+function replace(out::AnnotatedIOBuffer, str::AnnotatedString, pat_f::Pair...; count = typemax(Int))
+    if count == 0 || isempty(pat_f)
+        write(out, str)
+        return out
+    end
+    e1, patterns, replacers, repspans, notfound = _replace_init(str.string, pat_f, count)
+    if notfound
+        foreach(_free_pat_replacer, patterns)
+        write(out, str)
+        return out
+    end
+    # Modelled after `Base.annotated_chartransform`, but needing
+    # to handle a bit more complexity.
+    isappending = eof(out)
+    newannots = empty(out.annotations)
+    bytepos = bytestart = firstindex(str.string)
+    replacements = [(region = (bytestart - 1):(bytestart - 1), offset = position(out))]
+    nrep = 1
+    while nrep <= count
+        repspans, ridx, xspan, newbytes, bytepos = @inline _replace_once(
+            out.io, str.string, bytestart, e1, patterns, replacers, repspans, count, nrep, bytepos)
+        first(xspan) >= e1 && break
+        nrep += 1
+        # NOTE: When the replaced pattern ends with a multi-codeunit character,
+        # `xspan` only covers up to the start of that character. However,
+        # for us to correctly account for the changes to the string we need
+        # the /entire/ span of codeunits that were replaced.
+        if !isempty(xspan) && codeunit(str.string, last(xspan)) > 0x80
+            xspan = first(xspan):nextind(str.string, last(xspan))-1
+        end
+        drift = last(replacements).offset
+        thisrep = (region = xspan, offset = drift + newbytes - length(xspan))
+        destoff = first(xspan) - 1 + drift
+        push!(replacements, thisrep)
+        replacement = replacers[ridx]
+        _isannotated(replacement) || continue
+        annots = annotations(replacement)
+        annots′ = if eltype(annots) == Annotation # When it's a char not a string
+            region = 1:newbytes
+            [@NamedTuple{region::UnitRange{Int}, label::Symbol, value}((region, label, value))
+             for (; label, value) in annots]
+        else
+            annots
+        end::Vector{RegionAnnotation}
+        _insert_annotations!(newannots, annots′, destoff)
+    end
+    push!(replacements, (region = e1:(e1-1), offset = last(replacements).offset))
+    foreach(_free_pat_replacer, patterns)
+    write(out.io, SubString(str.string, bytepos))
+    # NOTE: To enable more efficient annotation clearing,
+    # we make use of the fact that `_replace_once` picks
+    # replacements ordered by their match start position.
+    # This means that the start of `.region`s in
+    # `replacements` is monotonically increasing.
+    isappending || _clear_annotations_in_region!(out.annotations, first(replacements).offset:position(out))
+    for (; region, label, value) in str.annotations
+        start, stop = first(region), last(region)
+        prioridx = searchsortedlast(
+            replacements, (region = start:start, offset = 0),
+            by = r -> first(r.region))
+        postidx = searchsortedfirst(
+            replacements, (region = stop:stop, offset = 0),
+            by = r -> first(r.region))
+        priorrep, postrep = replacements[prioridx], replacements[postidx]
+        if prioridx == postidx && start >= first(priorrep.region) && stop <= last(priorrep.region)
+            # Region contained with a replacement
+            continue
+        elseif postidx - prioridx <= 1 && start > last(priorrep.region) && stop < first(postrep.region)
+            # Lies between replacements
+            shiftregion = (start + priorrep.offset):(stop + priorrep.offset)
+            shiftann = (region = shiftregion, label, value)
+            push!(out.annotations, shiftann)
+        else
+            # Split between replacements
+            prevrep = replacements[max(begin, prioridx - 1)]
+            for rep in @view replacements[max(begin, prioridx - 1):min(end, postidx + 1)]
+                gap = max(start, last(prevrep.region)+1):min(stop, first(rep.region)-1)
+                if !isempty(gap)
+                    shiftregion = (first(gap) + prevrep.offset):(last(gap) + prevrep.offset)
+                    shiftann = (; region = shiftregion, label, value)
+                    push!(out.annotations, shiftann)
+                end
+                prevrep = rep
+            end
+        end
+    end
+    append!(out.annotations, newannots)
+    out
+end
+
+replace(out::IO, str::AnnotatedString, pat_f::Pair...; count=typemax(Int)) =
+    replace(out, str.string, pat_f...; count)
+
+function replace(str::AnnotatedString, pat_f::Pair...; count=typemax(Int))
+    isempty(pat_f) || iszero(count) && return str
+    out = AnnotatedIOBuffer()
+    replace(out, str, pat_f...; count)
+    read(seekstart(out), AnnotatedString)
+end
+
+# Printing
+
+function printstyled end
+
+# NOTE: This is an interim solution to the invalidations caused
+# by the split styled display implementation. This should be
+# replaced by a more robust solution (such as a consolidation of
+# the type and method definitions) in the near future.
+module AnnotatedDisplay
+
+using ..Base: IO, SubString, AnnotatedString, AnnotatedChar, AnnotatedIOBuffer
+using ..Base: eachregion, invoke_in_world, tls_world_age
+
+# Write
+
+ansi_write(f::Function, io::IO, x::Any) = f(io, String(x))
+
+ansi_write_(f::Function, io::IO, @nospecialize(x::Any)) =
+    invoke_in_world(tls_world_age(), ansi_write, f, io, x)
+
+Base.write(io::IO, s::Union{<:AnnotatedString, SubString{<:AnnotatedString}}) =
+    ansi_write_(write, io, s)::Int
+
+Base.write(io::IO, c::AnnotatedChar) =
+    ansi_write_(write, io, c)::Int
+
+function Base.write(io::IO, aio::AnnotatedIOBuffer)
+    if get(io, :color, false) == true
+        # This does introduce an overhead that technically
+        # could be avoided, but I'm not sure that it's currently
+        # worth the effort to implement an efficient version of
+        # writing from an AnnotatedIOBuffer with style.
+        # In the meantime, by converting to an `AnnotatedString` we can just
+        # reuse all the work done to make that work.
+        ansi_write_(write, io, read(aio, AnnotatedString))::Int
+    else
+        write(io, aio.io)
+    end
+end
+
+# Print
+
+Base.print(io::IO, s::Union{<:AnnotatedString, SubString{<:AnnotatedString}}) =
+    (ansi_write_(write, io, s); nothing)
+
+Base.print(io::IO, s::AnnotatedChar) =
+    (ansi_write_(write, io, s); nothing)
+
+Base.print(io::AnnotatedIOBuffer, s::Union{<:AnnotatedString, SubString{<:AnnotatedString}}) =
+    (write(io, s); nothing)
+
+Base.print(io::AnnotatedIOBuffer, c::AnnotatedChar) =
+    (write(io, c); nothing)
+
+styled_print(io::AnnotatedIOBuffer, msg::Any, kwargs::Any) = print(io, msg...)
+
+styled_print_(io::AnnotatedIOBuffer, @nospecialize(msg), @nospecialize(kwargs)) =
+    invoke_in_world(tls_world_age(), styled_print, io, msg, kwargs)::Nothing
+
+Base.printstyled(io::AnnotatedIOBuffer, msg...; kwargs...) =
+    styled_print_(io, msg, kwargs)
+
+# Escape
+
+Base.escape_string(io::IO, s::Union{<:AnnotatedString, SubString{<:AnnotatedString}},
+              esc = ""; keep = (), ascii::Bool=false, fullhex::Bool=false) =
+    (ansi_write_((io, s) -> escape_string(io, s, esc; keep, ascii, fullhex), io, s); nothing)
+
+# Show
+
+show_annot(io::IO, ::Any) = nothing
+show_annot(io::IO, ::MIME, ::Any) = nothing
+
+show_annot_(io::IO, @nospecialize(x::Any)) =
+    invoke_in_world(tls_world_age(), show_annot, io, x)::Nothing
+
+show_annot_(io::IO, m::MIME, @nospecialize(x::Any)) =
+    invoke_in_world(tls_world_age(), show_annot, io, m, x)::Nothing
+
+Base.show(io::IO, m::MIME"text/html", s::Union{<:AnnotatedString, SubString{<:AnnotatedString}}) =
+    show_annot_(io, m, s)
+
+Base.show(io::IO, m::MIME"text/html", c::AnnotatedChar) =
+    show_annot_(io, m, c)
+
+end
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index 8be33c4fb6406..85a4dacbd323c 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+import Core: Symbol
+
 """
 The `AbstractString` type is the supertype of all string implementations in
 Julia. Strings are encodings of sequences of [Unicode](https://unicode.org/)
@@ -29,7 +31,7 @@ types may choose different "imaginary" character sizes as makes sense for their
 implementations (e.g. substrings may pass index arithmetic through to the
 underlying string they provide a view into). Relaxed indexing functions include
 those intended for index arithmetic: `thisind`, `nextind` and `prevind`. This
-model allows index arithmetic to work with out-of- bounds indices as
+model allows index arithmetic to work with out-of-bounds indices as
 intermediate values so long as one never uses them to retrieve a character,
 which often helps avoid needing to code around edge cases.
 
@@ -41,7 +43,7 @@ AbstractString
 ## required string functions ##
 
 """
-    ncodeunits(s::AbstractString) -> Int
+    ncodeunits(s::AbstractString)::Int
 
 Return the number of code units in a string. Indices that are in bounds to
 access this string must satisfy `1 ≤ i ≤ ncodeunits(s)`. Not all such indices
@@ -66,7 +68,7 @@ See also [`codeunit`](@ref), [`checkbounds`](@ref), [`sizeof`](@ref),
 ncodeunits(s::AbstractString)
 
 """
-    codeunit(s::AbstractString) -> Type{<:Union{UInt8, UInt16, UInt32}}
+    codeunit(s::AbstractString)::Type{<:Union{UInt8, UInt16, UInt32}}
 
 Return the code unit type of the given string object. For ASCII, Latin-1, or
 UTF-8 encoded strings, this would be `UInt8`; for UCS-2 and UTF-16 it would be
@@ -82,7 +84,7 @@ codeunit(s::AbstractString)
 const CodeunitType = Union{Type{UInt8},Type{UInt16},Type{UInt32}}
 
 """
-    codeunit(s::AbstractString, i::Integer) -> Union{UInt8, UInt16, UInt32}
+    codeunit(s::AbstractString, i::Integer)::Union{UInt8, UInt16, UInt32}
 
 Return the code unit value in the string `s` at index `i`. Note that
 
@@ -106,7 +108,7 @@ See also [`ncodeunits`](@ref), [`checkbounds`](@ref).
     throw(MethodError(codeunit, (s, i))) : codeunit(s, Int(i))
 
 """
-    isvalid(s::AbstractString, i::Integer) -> Bool
+    isvalid(s::AbstractString, i::Integer)::Bool
 
 Predicate indicating whether the given index is the start of the encoding of a
 character in `s` or not. If `isvalid(s, i)` is true then `s[i]` will return the
@@ -142,13 +144,12 @@ Stacktrace:
     throw(MethodError(isvalid, (s, i))) : isvalid(s, Int(i))
 
 """
-    iterate(s::AbstractString, i::Integer) -> Union{Tuple{<:AbstractChar, Int}, Nothing}
+    iterate(s::AbstractString, i::Integer)::Union{Tuple{<:AbstractChar, Int}, Nothing}
 
 Return a tuple of the character in `s` at index `i` with the index of the start
 of the following character in `s`. This is the key method that allows strings to
-be iterated, yielding a sequences of characters. If `i` is out of bounds in `s`
-then a bounds error is raised. The `iterate` function, as part of the iteration
-protocol may assume that `i` is the start of a character in `s`.
+be iterated, yielding a sequences of characters. The `iterate` function, as part
+of the iteration protocol may assume that `i` is the start of a character in `s`.
 
 See also [`getindex`](@ref), [`checkbounds`](@ref).
 """
@@ -179,6 +180,8 @@ firstindex(s::AbstractString) = 1
 lastindex(s::AbstractString) = thisind(s, ncodeunits(s)::Int)
 isempty(s::AbstractString) = iszero(ncodeunits(s)::Int)
 
+@propagate_inbounds first(s::AbstractString) = s[firstindex(s)]
+
 function getindex(s::AbstractString, i::Integer)
     @boundscheck checkbounds(s, i)
     @inbounds return isvalid(s, i) ? (iterate(s, i)::NTuple{2,Any})[1] : string_index_err(s, i)
@@ -193,12 +196,7 @@ getindex(s::AbstractString, v::AbstractVector{Bool}) =
     throw(ArgumentError("logical indexing not supported for strings"))
 
 function get(s::AbstractString, i::Integer, default)
-# TODO: use ternary once @inbounds is expression-like
-    if checkbounds(Bool, s, i)
-        @inbounds return s[i]
-    else
-        return default
-    end
+    checkbounds(Bool, s, i) ? (@inbounds s[i]) : default
 end
 
 ## bounds checking ##
@@ -223,7 +221,7 @@ Vector{UInt8}(s::AbstractString) = unsafe_wrap(Vector{UInt8}, String(s))
 Array{UInt8}(s::AbstractString) = unsafe_wrap(Vector{UInt8}, String(s))
 Vector{T}(s::AbstractString) where {T<:AbstractChar} = collect(T, s)
 
-Symbol(s::AbstractString) = Symbol(String(s))
+Symbol(s::AbstractString) = Symbol(String(s)::String)
 Symbol(x...) = Symbol(string(x...))
 
 convert(::Type{T}, s::T) where {T<:AbstractString} = s
@@ -239,11 +237,12 @@ end
 ## string & character concatenation ##
 
 """
-    *(s::Union{AbstractString, AbstractChar}, t::Union{AbstractString, AbstractChar}...) -> AbstractString
+    *(s::Union{AbstractString, AbstractChar}, t::Union{AbstractString, AbstractChar}...)::AbstractString
 
-Concatenate strings and/or characters, producing a [`String`](@ref). This is equivalent
-to calling the [`string`](@ref) function on the arguments. Concatenation of built-in
-string types always produces a value of type `String` but other string types may choose
+Concatenate strings and/or characters, producing a [`String`](@ref) or
+[`AnnotatedString`](@ref) (as appropriate). This is equivalent to calling the
+[`string`](@ref) or [`annotatedstring`](@ref) function on the arguments. Concatenation of built-in string
+types always produces a value of type `String` but other string types may choose
 to return a string of a different type as appropriate.
 
 # Examples
@@ -255,14 +254,26 @@ julia> 'j' * "ulia"
 "julia"
 ```
 """
-(*)(s1::Union{AbstractChar, AbstractString}, ss::Union{AbstractChar, AbstractString}...) = string(s1, ss...)
+function (*)(s1::Union{AbstractChar, AbstractString}, ss::Union{AbstractChar, AbstractString}...)
+    if _isannotated(s1) || any(_isannotated, ss)
+        annotatedstring(s1, ss...)
+    else
+        string(s1, ss...)
+    end
+end
 
 one(::Union{T,Type{T}}) where {T<:AbstractString} = convert(T, "")
 
+# This could be written as a single statement with three ||-clauses, however then effect
+# analysis thinks it may throw and runtime checks are added.
+# Also see `substring.jl` for the `::SubString{T}` method.
+_isannotated(S::Type) = S != Union{} && (S <: AnnotatedString || S <: AnnotatedChar)
+_isannotated(s) = _isannotated(typeof(s))
+
 ## generic string comparison ##
 
 """
-    cmp(a::AbstractString, b::AbstractString) -> Int
+    cmp(a::AbstractString, b::AbstractString)::Int
 
 Compare two strings. Return `0` if both strings have the same length and the character
 at each index is the same in both strings. Return `-1` if `a` is a prefix of `b`, or if
@@ -306,10 +317,11 @@ function cmp(a::AbstractString, b::AbstractString)
 end
 
 """
-    ==(a::AbstractString, b::AbstractString) -> Bool
+    ==(a::AbstractString, b::AbstractString)::Bool
 
 Test whether two strings are equal character by character (technically, Unicode
-code point by code point).
+code point by code point). Should either string be a [`AnnotatedString`](@ref) the
+string properties must match too.
 
 # Examples
 ```jldoctest
@@ -323,7 +335,7 @@ false
 ==(a::AbstractString, b::AbstractString) = cmp(a, b) == 0
 
 """
-    isless(a::AbstractString, b::AbstractString) -> Bool
+    isless(a::AbstractString, b::AbstractString)::Bool
 
 Test whether string `a` comes before string `b` in alphabetical order
 (technically, in lexicographical order by Unicode code points).
@@ -350,15 +362,11 @@ end
 
 isless(a::Symbol, b::Symbol) = cmp(a, b) < 0
 
-# hashing
-
-hash(s::AbstractString, h::UInt) = hash(String(s), h)
-
 ## character index arithmetic ##
 
 """
-    length(s::AbstractString) -> Int
-    length(s::AbstractString, i::Integer, j::Integer) -> Int
+    length(s::AbstractString)::Int
+    length(s::AbstractString, i::Integer, j::Integer)::Int
 
 Return the number of characters in string `s` from indices `i` through `j`.
 
@@ -400,10 +408,10 @@ function length(s::AbstractString, i::Int, j::Int)
 end
 
 @propagate_inbounds length(s::AbstractString, i::Integer, j::Integer) =
-    length(s, Int(i), Int(j))
+    length(s, Int(i)::Int, Int(j)::Int)
 
 """
-    thisind(s::AbstractString, i::Integer) -> Int
+    thisind(s::AbstractString, i::Integer)::Int
 
 If `i` is in bounds in `s` return the index of the start of the character whose
 encoding code unit `i` is part of. In other words, if `i` is the start of a
@@ -434,7 +442,7 @@ ERROR: BoundsError: attempt to access 2-codeunit String at index [-1]
 [...]
 ```
 """
-thisind(s::AbstractString, i::Integer) = thisind(s, Int(i))
+thisind(s::AbstractString, i::Integer) = thisind(s, Int(i)::Int)
 
 function thisind(s::AbstractString, i::Int)
     z = ncodeunits(s)::Int + 1
@@ -447,11 +455,11 @@ function thisind(s::AbstractString, i::Int)
 end
 
 """
-    prevind(str::AbstractString, i::Integer, n::Integer=1) -> Int
+    prevind(str::AbstractString, i::Integer, n::Integer=1)::Int
 
 * Case `n == 1`
 
-  If `i` is in bounds in `s` return the index of the start of the character whose
+  If `i` is in bounds in `str` return the index of the start of the character whose
   encoding starts before index `i`. In other words, if `i` is the start of a
   character, return the start of the previous character; if `i` is not the start
   of a character, rewind until the start of a character and return that index.
@@ -490,27 +498,27 @@ julia> prevind("α", 2, 3)
 -1
 ```
 """
-prevind(s::AbstractString, i::Integer, n::Integer) = prevind(s, Int(i), Int(n))
-prevind(s::AbstractString, i::Integer)             = prevind(s, Int(i))
+prevind(s::AbstractString, i::Integer, n::Integer) = prevind(s, Int(i)::Int, Int(n)::Int)
+prevind(s::AbstractString, i::Integer)             = prevind(s, Int(i)::Int)
 prevind(s::AbstractString, i::Int)                 = prevind(s, i, 1)
 
 function prevind(s::AbstractString, i::Int, n::Int)
     n < 0 && throw(ArgumentError("n cannot be negative: $n"))
-    z = ncodeunits(s) + 1
+    z = ncodeunits(s)::Int + 1
     @boundscheck 0 < i ≤ z || throw(BoundsError(s, i))
-    n == 0 && return thisind(s, i) == i ? i : string_index_err(s, i)
+    n == 0 && return thisind(s, i)::Int == i ? i : string_index_err(s, i)
     while n > 0 && 1 < i
-        @inbounds n -= isvalid(s, i -= 1)
+        @inbounds n -= isvalid(s, i -= 1)::Bool
     end
     return i - n
 end
 
 """
-    nextind(str::AbstractString, i::Integer, n::Integer=1) -> Int
+    nextind(str::AbstractString, i::Integer, n::Integer=1)::Int
 
 * Case `n == 1`
 
-  If `i` is in bounds in `s` return the index of the start of the character whose
+  If `i` is in bounds in `str` return the index of the start of the character whose
   encoding starts after index `i`. In other words, if `i` is the start of a
   character, return the start of the next character; if `i` is not the start
   of a character, move forward until the start of a character and return that index.
@@ -527,7 +535,7 @@ end
 
 * Case `n == 0`
 
-  Return `i` only if `i` is a valid index in `s` or is equal to `0`.
+  Return `i` only if `i` is a valid index in `str` or is equal to `0`.
   Otherwise `StringIndexError` or `BoundsError` is thrown.
 
 # Examples
@@ -549,17 +557,17 @@ julia> nextind("α", 1, 2)
 4
 ```
 """
-nextind(s::AbstractString, i::Integer, n::Integer) = nextind(s, Int(i), Int(n))
-nextind(s::AbstractString, i::Integer)             = nextind(s, Int(i))
+nextind(s::AbstractString, i::Integer, n::Integer) = nextind(s, Int(i)::Int, Int(n)::Int)
+nextind(s::AbstractString, i::Integer)             = nextind(s, Int(i)::Int)
 nextind(s::AbstractString, i::Int)                 = nextind(s, i, 1)
 
 function nextind(s::AbstractString, i::Int, n::Int)
     n < 0 && throw(ArgumentError("n cannot be negative: $n"))
-    z = ncodeunits(s)
+    z = ncodeunits(s)::Int
     @boundscheck 0 ≤ i ≤ z || throw(BoundsError(s, i))
-    n == 0 && return thisind(s, i) == i ? i : string_index_err(s, i)
+    n == 0 && return thisind(s, i)::Int == i ? i : string_index_err(s, i)
     while n > 0 && i < z
-        @inbounds n -= isvalid(s, i += 1)
+        @inbounds n -= isvalid(s, i += 1)::Bool
     end
     return i + n
 end
@@ -578,7 +586,7 @@ iterate(e::EachStringIndex, state=firstindex(e.s)) = state > ncodeunits(e.s) ? n
 eltype(::Type{<:EachStringIndex}) = Int
 
 """
-    isascii(c::Union{AbstractChar,AbstractString}) -> Bool
+    isascii(c::Union{AbstractChar,AbstractString})::Bool
 
 Test whether a character belongs to the ASCII character set, or whether this is true for
 all elements of a string.
@@ -629,7 +637,7 @@ end
     return  _isascii(cu,last-chunk_size+1,last)
 end
 """
-    isascii(cu::AbstractVector{CU}) where {CU <: Integer} -> Bool
+    isascii(cu::AbstractVector{CU}) where {CU <: Integer}::Bool
 
 Test whether all values in the vector belong to the ASCII character set (0x00 to 0x7f).
 This function is intended to be used by other string implementations that need a fast ASCII check.
@@ -666,7 +674,7 @@ function filter(f, s::AbstractString)
     for c in s
         f(c) && write(out, c)
     end
-    String(_unsafe_take!(out))
+    takestring!(out)
 end
 
 ## string first and last ##
@@ -745,10 +753,10 @@ julia> repeat("ha", 3)
 "hahaha"
 ```
 """
-repeat(s::AbstractString, r::Integer) = repeat(String(s), r)
+repeat(s::AbstractString, r::Integer) = repeat(String(s)::String, r)
 
 """
-    ^(s::Union{AbstractString,AbstractChar}, n::Integer) -> AbstractString
+    ^(s::Union{AbstractString,AbstractChar}, n::Integer)::AbstractString
 
 Repeat a string or character `n` times. This can also be written as `repeat(s, n)`.
 
@@ -779,19 +787,20 @@ struct CodeUnits{T,S<:AbstractString} <: DenseVector{T}
     CodeUnits(s::S) where {S<:AbstractString} = new{codeunit(s),S}(s)
 end
 
-length(s::CodeUnits) = ncodeunits(s.s)
 sizeof(s::CodeUnits{T}) where {T} = ncodeunits(s.s) * sizeof(T)
-size(s::CodeUnits) = (length(s),)
+size(s::CodeUnits) = (ncodeunits(s.s),)
 elsize(s::Type{<:CodeUnits{T}}) where {T} = sizeof(T)
 @propagate_inbounds getindex(s::CodeUnits, i::Int) = codeunit(s.s, i)
 IndexStyle(::Type{<:CodeUnits}) = IndexLinear()
-@inline iterate(s::CodeUnits, i=1) = (i % UInt) - 1 < length(s) ? (@inbounds s[i], i + 1) : nothing
+checkbounds(::Type{Bool}, s::CodeUnits, i::Integer) = checkbounds(Bool, s.s, i)
 
 
 write(io::IO, s::CodeUnits) = write(io, s.s)
 
-unsafe_convert(::Type{Ptr{T}},    s::CodeUnits{T}) where {T} = unsafe_convert(Ptr{T}, s.s)
-unsafe_convert(::Type{Ptr{Int8}}, s::CodeUnits{UInt8}) = unsafe_convert(Ptr{Int8}, s.s)
+cconvert(::Type{Ptr{T}},    s::CodeUnits{T}) where {T} = cconvert(Ptr{T}, s.s)
+cconvert(::Type{Ptr{Int8}}, s::CodeUnits{UInt8}) = cconvert(Ptr{Int8}, s.s)
+
+similar(::Type{<:CodeUnits{T}}, dims::Dims) where {T} = similar(Array{T}, dims)
 
 """
     codeunits(s::AbstractString)
diff --git a/base/strings/cstring.jl b/base/strings/cstring.jl
new file mode 100644
index 0000000000000..8da67d7f7bc14
--- /dev/null
+++ b/base/strings/cstring.jl
@@ -0,0 +1,327 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+import Core.Intrinsics: bitcast
+
+"""
+    Cwstring
+
+A C-style string composed of the native wide character type
+[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
+C-style strings composed of the native character
+type, see [`Cstring`](@ref). For more information
+about string interoperability with C, see the
+[manual](@ref man-bits-types).
+
+"""
+Cwstring
+
+"""
+    Cstring
+
+A C-style string composed of the native character type
+[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
+C-style strings composed of the native wide character
+type, see [`Cwstring`](@ref). For more information
+about string interoperability with C, see the
+[manual](@ref man-bits-types).
+"""
+Cstring
+
+# construction from pointers
+Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
+Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
+Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
+Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
+
+convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
+convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
+convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
+convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
+
+"""
+    pointer(array [, index])
+
+Get the native address of an array or string, optionally at a given location `index`.
+
+This function is "unsafe". Be careful to ensure that a Julia reference to
+`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
+macro should be used to protect the `array` argument from garbage collection
+within a given block of code.
+
+Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
+"""
+function pointer end
+
+pointer(p::Cstring) = convert(Ptr{Cchar}, p)
+pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
+
+# comparisons against pointers (mainly to support `cstr==C_NULL`)
+==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
+==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
+
+unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
+
+# convert strings to String etc. to pass as pointers
+cconvert(::Type{Cstring}, s::String) = s
+cconvert(::Type{Cstring}, s::AbstractString) =
+    cconvert(Cstring, String(s)::String)
+
+function cconvert(::Type{Cwstring}, s::AbstractString)
+    v = transcode(Cwchar_t, String(s))
+    push!(v, 0)
+    return cconvert(Cwstring, v)
+end
+
+eltype(::Type{Cstring}) = Cchar
+eltype(::Type{Cwstring}) = Cwchar_t
+
+containsnul(p::Ptr, len) =
+    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
+containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
+containsnul(s::AbstractString) = '\0' in s
+
+function unsafe_convert(::Type{Cstring}, s::String)
+    p = unsafe_convert(Ptr{Cchar}, s)
+    containsnul(p, sizeof(s)) &&
+        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
+    return Cstring(p)
+end
+
+unsafe_convert(::Type{Cstring}, s::Union{Memory{UInt8},Memory{Int8}}) = Cstring(unsafe_convert(Ptr{Cvoid}, s))
+
+function cconvert(::Type{Cwstring}, v::Vector{Cwchar_t})
+    for i = 1:length(v)-1
+        v[i] == 0 &&
+            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
+    end
+    v[end] == 0 ||
+        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
+    return cconvert(Ptr{Cwchar_t}, v)
+end
+unsafe_convert(::Type{Cwstring}, s) = Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
+unsafe_convert(::Type{Cwstring}, s::Cwstring) = s
+
+# symbols are guaranteed not to contain embedded NUL
+cconvert(::Type{Cstring}, s::Symbol) = s
+unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
+
+if ccall(:jl_get_UNAME, Any, ()) === :NT
+"""
+    Base.cwstring(s)
+
+Convert a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
+functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
+conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
+same argument.
+
+This is only available on Windows.
+"""
+function cwstring(s::AbstractString)
+    bytes = codeunits(String(s))
+    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
+    return push!(transcode(UInt16, bytes), 0)
+end
+end
+
+# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
+# and also UTF-32 for APIs using Cwchar_t on other platforms.
+
+"""
+    transcode(T, src)
+
+Convert string data between Unicode encodings. `src` is either a
+`String` or a `Vector{UIntXX}` of UTF-XX code units, where
+`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
+`String` to return a (UTF-8 encoded) `String` or `UIntXX`
+to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
+can also be used as the integer type, for converting `wchar_t*` strings
+used by external C libraries.)
+
+The `transcode` function succeeds as long as the input data can be
+reasonably represented in the target encoding; it always succeeds for
+conversions between UTF-XX encodings, even for invalid Unicode data.
+
+Only conversion to/from UTF-8 is currently supported.
+
+# Examples
+```jldoctest
+julia> str = "αβγ"
+"αβγ"
+
+julia> transcode(UInt16, str)
+3-element Vector{UInt16}:
+ 0x03b1
+ 0x03b2
+ 0x03b3
+
+julia> transcode(String, transcode(UInt16, str))
+"αβγ"
+```
+"""
+function transcode end
+
+transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
+transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
+transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
+    transcode(T, String(Vector(src)))
+transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
+    transcode(T, String(src))
+
+function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
+    buf = IOBuffer()
+    for c in src
+        print(buf, Char(c))
+    end
+    take!(buf)
+end
+transcode(::Type{String}, src::String) = src
+transcode(T, src::String) = transcode(T, codeunits(src))
+transcode(::Type{String}, src) = String(transcode(UInt8, src))
+
+function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
+    require_one_based_indexing(src)
+    dst = UInt16[]
+    i, n = 1, length(src)
+    n > 0 || return dst
+    sizehint!(dst, 2n)
+    a = src[1]
+    while true
+        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
+            b = src[i += 1]
+            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
+                # invalid UTF-8 (non-continuation or too-high code point)
+                push!(dst, a)
+                a = b; continue
+            elseif a < 0xe0 # 2-byte UTF-8
+                push!(dst, xor(0x3080, UInt16(a) << 6, b))
+            elseif i < n # 3/4-byte character
+                c = src[i += 1]
+                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
+                    push!(dst, a, b)
+                    a = c; continue
+                elseif a < 0xf0 # 3-byte UTF-8
+                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
+                elseif i < n
+                    d = src[i += 1]
+                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
+                        push!(dst, a, b, c)
+                        a = d; continue
+                    elseif a == 0xf0 && b < 0x90 # overlong encoding
+                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
+                    else # 4-byte UTF-8
+                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
+                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
+                    end
+                else # too short
+                    push!(dst, a, b, c)
+                    break
+                end
+            else # too short
+                push!(dst, a, b)
+                break
+            end
+        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
+            push!(dst, a)
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+    return dst
+end
+
+function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
+    require_one_based_indexing(src)
+    n = length(src)
+    n == 0 && return UInt8[]
+
+    # Precompute m = sizeof(dst).   This involves annoying duplication
+    # of the loop over the src array.   However, this is not just an
+    # optimization: it is problematic for security reasons to grow
+    # dst dynamically, because Base.winprompt uses this function to
+    # convert passwords to UTF-8 and we don't want to make unintentional
+    # copies of the password data.
+    a = src[1]
+    i, m = 1, 0
+    while true
+        if a < 0x80
+            m += 1
+        elseif a < 0x800 # 2-byte UTF-8
+            m += 2
+        elseif a & 0xfc00 == 0xd800 && i < length(src)
+            b = src[i += 1]
+            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
+                m += 4
+            else
+                m += 3
+                a = b; continue
+            end
+        else
+            # 1-unit high UTF-16 or unpaired high surrogate
+            # either way, encode as 3-byte UTF-8 code point
+            m += 3
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+
+    dst = StringVector(m)
+    a = src[1]
+    i, j = 1, 0
+    while true
+        if a < 0x80 # ASCII
+            dst[j += 1] = a % UInt8
+        elseif a < 0x800 # 2-byte UTF-8
+            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
+            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
+        elseif a & 0xfc00 == 0xd800 && i < n
+            b = src[i += 1]
+            if (b & 0xfc00) == 0xdc00
+                # 2-unit UTF-16 sequence => 4-byte UTF-8
+                a += 0x2840
+                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
+                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
+                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
+                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
+            else
+                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
+                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
+                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
+                a = b; continue
+            end
+        else
+            # 1-unit high UTF-16 or unpaired high surrogate
+            # either way, encode as 3-byte UTF-8 code point
+            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
+            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
+            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+    return dst
+end
+
+"""
+    unsafe_string(p::Ptr{T}, [length::Integer]) where {T<:Union{UInt16,UInt32,Cwchar_t}}
+    unsafe_string(p::Cwstring)
+
+Transcode a string from the address of a C-style (NUL-terminated) string encoded as UTF-16
+(`T=UInt16`), UTF-32 (`T=UInt32`), or the system-dependent `wchar_t` (`T=Cwchar_t` or `Cwstring`),
+returning a `String` (UTF-8 encoding), similar to [`transcode`](@ref) but reading directly
+from a pointer.  (The pointer can be safely freed afterwards.) If `length` is specified
+(the length of the data in encoding units), the string does not have to be NUL-terminated.
+
+This function is labeled "unsafe" because it will crash if `p` is not
+a valid memory address to data of the requested length (or NUL-terminated data).
+"""
+function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
+    transcode(String, unsafe_wrap(Array, p, length; own=false))
+end
+function unsafe_string(p::Ptr{T}) where {T<:Union{UInt16,UInt32,Cwchar_t}}
+    n = 1
+    while unsafe_load(p, n) != 0
+        n += 1
+    end
+    return unsafe_string(p, n - 1)
+end
+unsafe_string(cw::Cwstring) = unsafe_string(convert(Ptr{Cwchar_t}, cw))
diff --git a/base/strings/io.jl b/base/strings/io.jl
index 987a64798d3da..1e2e41f1bca0c 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -10,10 +10,10 @@ if `io` is not given) a canonical (un-decorated) text representation.
 The representation used by `print` includes minimal formatting and tries to
 avoid Julia-specific details.
 
-`print` falls back to calling `show`, so most types should just define
-`show`. Define `print` if your type has a separate "plain" representation.
-For example, `show` displays strings with quotes, and `print` displays strings
-without quotes.
+`print` falls back to calling the 2-argument `show(io, x)` for each argument `x` in `xs`,
+so most types should just define `show`. Define `print` if your type has a separate
+"plain" representation.  For example, `show` displays strings with quotes, and `print`
+displays strings without quotes.
 
 See also [`println`](@ref), [`string`](@ref), [`printstyled`](@ref).
 
@@ -25,7 +25,7 @@ julia> io = IOBuffer();
 
 julia> print(io, "Hello", ' ', :World!)
 
-julia> String(take!(io))
+julia> takestring!(io)
 "Hello World!"
 ```
 """
@@ -68,12 +68,11 @@ julia> io = IOBuffer();
 
 julia> println(io, "Hello", ',', " world.")
 
-julia> String(take!(io))
+julia> takestring!(io)
 "Hello, world.\\n"
 ```
 """
 println(io::IO, xs...) = print(io, xs..., "\n")
-
 ## conversion of general objects to strings ##
 
 """
@@ -81,10 +80,6 @@ println(io::IO, xs...) = print(io, xs..., "\n")
 
 Call the given function with an I/O stream and the supplied extra arguments.
 Everything written to this I/O stream is returned as a string.
-`context` can be an [`IOContext`](@ref) whose properties will be used, a `Pair`
-specifying a property and its value, or a tuple of `Pair` specifying multiple
-properties and their values. `sizehint` suggests the capacity of the buffer (in
-bytes).
 
 The optional keyword argument `context` can be set to a `:key=>value` pair, a
 tuple of `:key=>value` pairs, or an `IO` or [`IOContext`](@ref) object whose
@@ -113,7 +108,7 @@ function sprint(f::Function, args...; context=nothing, sizehint::Integer=0)
     else
         f(s, args...)
     end
-    String(_unsafe_take!(s))
+    takestring!(s)
 end
 
 function _str_sizehint(x)
@@ -147,7 +142,7 @@ function print_to_string(xs...)
     for x in xs
         print(s, x)
     end
-    String(_unsafe_take!(s))
+    takestring!(s)
 end
 
 function string_with_env(env, xs...)
@@ -164,7 +159,7 @@ function string_with_env(env, xs...)
     for x in xs
         print(env_io, x)
     end
-    String(_unsafe_take!(s))
+    takestring!(s)
 end
 
 """
@@ -210,35 +205,29 @@ function show(
         # one line in collection, seven otherwise
         get(io, :typeinfo, nothing) === nothing && (limit *= 7)
     end
+    limit = max(0, limit-2) # quote chars
 
     # early out for short strings
-    len = ncodeunits(str)
-    len ≤ limit - 2 && # quote chars
-        return show(io, str)
+    check_textwidth(str, limit) && return show(io, str)
 
     # these don't depend on string data
     units = codeunit(str) == UInt8 ? "bytes" : "code units"
     skip_text(skip) = " ⋯ $skip $units ⋯ "
-    short = length(skip_text("")) + 4 # quote chars
-    chars = max(limit, short + 1) - short # at least 1 digit
 
-    # figure out how many characters to print in elided case
-    chars -= d = ndigits(len - chars) # first adjustment
-    chars += d - ndigits(len - chars) # second if needed
-    chars = max(0, chars)
+    # longest possible replacement string for omitted chars
+    max_replacement = skip_text(ncodeunits(str) * 100) # *100 for 2 inner quote chars
 
-    # find head & tail, avoiding O(length(str)) computation
-    head = nextind(str, 0, 1 + (chars + 1) ÷ 2)
-    tail = prevind(str, len + 1, chars ÷ 2)
+    head, tail = string_truncate_boundaries(str, limit, max_replacement, Val(:center))
 
     # threshold: min chars skipped to make elision worthwhile
-    t = short + ndigits(len - chars) - 1
-    n = tail - head # skipped code units
-    if 4t ≤ n || t ≤ n && t ≤ length(str, head, tail-1)
-        skip = skip_text(n)
-        show(io, SubString(str, 1:prevind(str, head)))
-        printstyled(io, skip; color=:light_yellow, bold=true)
-        show(io, SubString(str, tail))
+    afterhead = nextind(str, head)
+    n = tail - afterhead # skipped code units
+    replacement = skip_text(n)
+    t = ncodeunits(replacement) # length of replacement (textwidth == ncodeunits here)
+    @views if 4t ≤ n || t ≤ n && t ≤ textwidth(str[afterhead:prevind(str,tail)])
+        show(io, str[begin:head])
+        printstyled(io, replacement; color=:light_yellow, bold=true)
+        show(io, str[tail:end])
     else
         show(io, str)
     end
@@ -246,14 +235,16 @@ end
 
 # optimized methods to avoid iterating over chars
 write(io::IO, s::Union{String,SubString{String}}) =
-    GC.@preserve s Int(unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))))::Int
+    GC.@preserve s (unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))) % Int)::Int
 print(io::IO, s::Union{String,SubString{String}}) = (write(io, s); nothing)
 
 """
     repr(x; context=nothing)
 
-Create a string from any value using the [`show`](@ref) function.
-You should not add methods to `repr`; define a `show` method instead.
+Create a string representation of any value using the 2-argument `show(io, x)` function,
+which aims to produce a string that is parseable Julia code, where possible.
+i.e. `eval(Meta.parse(repr(x))) == x` should hold true.
+You should not add methods to `repr`; define a [`show`](@ref) method instead.
 
 The optional keyword argument `context` can be set to a `:key=>value` pair, a
 tuple of `:key=>value` pairs, or an `IO` or [`IOContext`](@ref) object whose
@@ -262,7 +253,7 @@ attributes are used for the I/O stream passed to `show`.
 Note that `repr(x)` is usually similar to how the value of `x` would
 be entered in Julia.  See also [`repr(MIME("text/plain"), x)`](@ref) to instead
 return a "pretty-printed" version of `x` designed more for human consumption,
-equivalent to the REPL display of `x`.
+equivalent to the REPL display of `x`, using the 3-argument `show(io, mime, x)`.
 
 !!! compat "Julia 1.7"
     Passing a tuple to keyword `context` requires Julia 1.7 or later.
@@ -298,10 +289,10 @@ Create a read-only `IOBuffer` on the data underlying the given string.
 ```jldoctest
 julia> io = IOBuffer("Haho");
 
-julia> String(take!(io))
+julia> takestring!(io)
 "Haho"
 
-julia> String(take!(io))
+julia> takestring!(io)
 "Haho"
 ```
 """
@@ -353,9 +344,29 @@ function join(io::IO, iterator, delim="")
     end
 end
 
-join(iterator) = sprint(join, iterator)
-join(iterator, delim) = sprint(join, iterator, delim)
-join(iterator, delim, last) = sprint(join, iterator, delim, last)
+function _join_preserve_annotations(iterator, args...)
+    et = @default_eltype(iterator)
+    if isconcretetype(et) && !_isannotated(et) && !any(_isannotated, args)
+        sprint(join, iterator, args...)
+    else
+        io = AnnotatedIOBuffer()
+        join(io, iterator, args...)
+        # If we know (from compile time information, or dynamically in the case
+        # of iterators with a non-concrete eltype), that the result is annotated
+        # in nature, we extract an `AnnotatedString`, otherwise we just extract
+        # a plain `String` from `io`.
+        if isconcretetype(et) || !isempty(io.annotations)
+            seekstart(io)
+            read(io, AnnotatedString{String})
+        else
+            String(take!(io.io))
+        end
+    end
+end
+
+join(iterator) = _join_preserve_annotations(iterator)
+join(iterator, delim) = _join_preserve_annotations(iterator, delim)
+join(iterator, delim, last) = _join_preserve_annotations(iterator, delim, last)
 
 ## string escaping & unescaping ##
 
@@ -364,8 +375,8 @@ escape_nul(c::Union{Nothing, AbstractChar}) =
     (c !== nothing && '0' <= c <= '7') ? "\\x00" : "\\0"
 
 """
-    escape_string(str::AbstractString[, esc]; keep = ())::AbstractString
-    escape_string(io, str::AbstractString[, esc]; keep = ())::Nothing
+    escape_string(str::AbstractString[, esc]; keep=(), ascii=false, fullhex=false)::AbstractString
+    escape_string(io, str::AbstractString[, esc]; keep=())::Nothing
 
 General escaping of traditional C and Unicode escape sequences. The first form returns the
 escaped string, the second prints the result to `io`.
@@ -380,11 +391,23 @@ escaped by a prepending backslash (`\"` is also escaped by default in the first
 The argument `keep` specifies a collection of characters which are to be kept as
 they are. Notice that `esc` has precedence here.
 
+The argument `ascii` can be set to `true` to escape all non-ASCII characters,
+whereas the default `ascii=false` outputs printable Unicode characters as-is.
+(`keep` takes precedence over `ascii`.)
+
+The argument `fullhex` can be set to `true` to require all `\\u` escapes to be
+printed with 4 hex digits, and `\\U` escapes to be printed with 8 hex digits,
+whereas by default (`fullhex=false`) they are printed with fewer digits if
+possible (omitting leading zeros).
+
 See also [`unescape_string`](@ref) for the reverse operation.
 
 !!! compat "Julia 1.7"
     The `keep` argument is available as of Julia 1.7.
 
+!!! compat "Julia 1.12"
+    The `ascii` and `fullhex` arguments require Julia 1.12.
+
 # Examples
 ```jldoctest
 julia> escape_string("aaa\\nbbb")
@@ -403,7 +426,7 @@ julia> escape_string(string('\\u2135','\\0','0')) # \\0 would be ambiguous
 "ℵ\\\\x000"
 ```
 """
-function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
+function escape_string(io::IO, s::AbstractString, esc=""; keep = (), ascii::Bool=false, fullhex::Bool=false)
     a = Iterators.Stateful(s)
     for c::AbstractChar in a
         if c in esc
@@ -418,10 +441,10 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
             isprint(c)         ? print(io, c) :
                                  print(io, "\\x", string(UInt32(c), base = 16, pad = 2))
         elseif !isoverlong(c) && !ismalformed(c)
-            isprint(c)         ? print(io, c) :
-            c <= '\x7f'        ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
-            c <= '\uffff'      ? print(io, "\\u", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
-                                 print(io, "\\U", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
+            !ascii && isprint(c) ? print(io, c) :
+            c <= '\x7f'          ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
+            c <= '\uffff'        ? print(io, "\\u", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
+                                   print(io, "\\U", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
         else # malformed or overlong
             u = bswap(reinterpret(UInt32, c)::UInt32)
             while true
@@ -432,8 +455,8 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
     end
 end
 
-escape_string(s::AbstractString, esc=('\"',); keep = ()) =
-    sprint((io)->escape_string(io, s, esc; keep = keep), sizehint=lastindex(s))
+escape_string(s::AbstractString, esc=('\"',); keep = (), ascii::Bool=false, fullhex::Bool=false) =
+    sprint((io)->escape_string(io, s, esc; keep, ascii, fullhex), sizehint=lastindex(s))
 
 function print_quoted(io, s::AbstractString)
     print(io, '"')
@@ -557,7 +580,7 @@ julia> v[2]
 0x32
 ```
 """
-macro b_str(s)
+macro b_str(s::String)
     v = codeunits(unescape_string(s))
     QuoteNode(v)
 end
@@ -590,14 +613,14 @@ julia> println(raw"\\\\x \\\\\\"")
 macro raw_str(s); s; end
 
 """
-    escape_raw_string(s::AbstractString)
-    escape_raw_string(io, s::AbstractString)
+    escape_raw_string(s::AbstractString, delim='"')::AbstractString
+    escape_raw_string(io, s::AbstractString, delim='"')
 
 Escape a string in the manner used for parsing raw string literals.
-For each double-quote (`"`) character in input string `s`, this
-function counts the number _n_ of preceding backslash (`\\`) characters,
-and then increases there the number of backslashes from _n_ to 2_n_+1
-(even for _n_ = 0). It also doubles a sequence of backslashes at the end
+For each double-quote (`"`) character in input string `s` (or `delim` if
+specified), this function counts the number _n_ of preceding backslash (`\\`)
+characters, and then increases there the number of backslashes from _n_ to
+2_n_+1 (even for _n_ = 0). It also doubles a sequence of backslashes at the end
 of the string.
 
 This escaping convention is used in raw strings and other non-standard
@@ -605,43 +628,48 @@ string literals. (It also happens to be the escaping convention
 expected by the Microsoft C/C++ compiler runtime when it parses a
 command-line string into the argv[] array.)
 
-See also [`escape_string`](@ref).
+See also [`Base.escape_string()`](@ref).
 """
-function escape_raw_string(io, str::AbstractString)
+function escape_raw_string(io::IO, str::AbstractString, delim::Char='"')
+    total = 0
     escapes = 0
     for c in str
         if c == '\\'
             escapes += 1
         else
-            if c == '"'
+            if c == delim
                 # if one or more backslashes are followed by
                 # a double quote then escape all backslashes
                 # and the double quote
-                escapes = escapes * 2 + 1
-            end
-            while escapes > 0
-                write(io, '\\')
-                escapes -= 1
+                escapes += 1
+                total += escapes
+                while escapes > 0
+                    write(io, '\\')
+                    escapes -= 1
+                end
             end
             escapes = 0
-            write(io, c)
         end
+        write(io, c)
     end
     # also escape any trailing backslashes,
     # so they do not affect the closing quote
+    total += escapes
     while escapes > 0
-        write(io, '\\')
         write(io, '\\')
         escapes -= 1
     end
+    total
+end
+function escape_raw_string(str::AbstractString, delim::Char='"')
+    total = escape_raw_string(devnull, str, delim) # check whether the string even needs to be copied and how much to allocate for it
+    return total == 0 ? str : sprint(escape_raw_string, str, delim; sizehint = sizeof(str) + total)
 end
-escape_raw_string(str::AbstractString) = sprint(escape_raw_string, str;
-                                                sizehint = lastindex(str) + 2)
 
 ## multiline strings ##
 
 """
-    indentation(str::AbstractString; tabwidth=8) -> (Int, Bool)
+    indentation(str::AbstractString; tabwidth=8) -> (width::Int, empty::Bool)
 
 Calculate the width of leading white space. Return the width and a flag to indicate
 if the string is empty.
@@ -741,7 +769,7 @@ function unindent(str::AbstractString, indent::Int; tabwidth=8)
             print(buf, ' ')
         end
     end
-    String(take!(buf))
+    takestring!(buf)
 end
 
 function String(a::AbstractVector{Char})
@@ -764,3 +792,26 @@ function String(chars::AbstractVector{<:AbstractChar})
         end
     end
 end
+
+function AnnotatedString(chars::AbstractVector{C}) where {C<:AbstractChar}
+    str = if C <: AnnotatedChar
+        String(getfield.(chars, :char))
+    else
+        sprint(sizehint=length(chars)) do io
+            for c in chars
+                print(io, c)
+            end
+        end
+    end
+    annots = RegionAnnotation[]
+    point = 1
+    for c in chars
+        if c isa AnnotatedChar
+            for annot in c.annotations
+                push!(annots, (point:point, annot...))
+            end
+        end
+        point += ncodeunits(c)
+    end
+    AnnotatedString(str, annots)
+end
diff --git a/base/strings/lazy.jl b/base/strings/lazy.jl
index eaaa6397d37f2..f7d946fbb9168 100644
--- a/base/strings/lazy.jl
+++ b/base/strings/lazy.jl
@@ -96,6 +96,7 @@ iterate(s::LazyString, i::Integer) = iterate(String(s), i)
 isequal(a::LazyString, b::LazyString) = isequal(String(a), String(b))
 ==(a::LazyString, b::LazyString) = (String(a) == String(b))
 ncodeunits(s::LazyString) = ncodeunits(String(s))
-codeunit(s::LazyString) = codeunit(String(s))
+codeunit(s::LazyString) = codeunit("") # returns UInt8
 codeunit(s::LazyString, i::Integer) = codeunit(String(s), i)
+codeunits(s::LazyString) = codeunits(String(s))
 isvalid(s::LazyString, i::Integer) = isvalid(String(s), i)
diff --git a/base/strings/search.jl b/base/strings/search.jl
index 1a3085e084ccd..22e26457b41a4 100644
--- a/base/strings/search.jl
+++ b/base/strings/search.jl
@@ -10,95 +10,257 @@ match strings with [`match`](@ref).
 """
 abstract type AbstractPattern end
 
-nothing_sentinel(i) = i == 0 ? nothing : i
+# TODO: These unions represent bytes in memory that can be accessed via a pointer.
+# this property is used throughout Julia, e.g. also in IO code.
+# This deserves a better solution - see #53178.
+# If such a better solution comes in place, these unions should be replaced.
+const DenseInt8 = Union{
+    DenseArray{Int8},
+    FastContiguousSubArray{Int8,N,<:DenseArray} where N
+}
+
+# Note: This union is different from that above in that it includes CodeUnits.
+# Currently, this is redundant as CodeUnits <: DenseVector, but this subtyping
+# is buggy and may be removed in the future, see #54002
+const DenseUInt8 = Union{
+    DenseArray{UInt8},
+    FastContiguousSubArray{UInt8,N,<:DenseArray} where N,
+    CodeUnits{UInt8, <:Union{String, SubString{String}}},
+    FastContiguousSubArray{UInt8,N,<:CodeUnits{UInt8, <:Union{String, SubString{String}}}} where N,
+}
+
+const DenseUInt8OrInt8 = Union{DenseUInt8, DenseInt8}
+
+function last_utf8_byte(c::Char)
+    u = reinterpret(UInt32, c)
+    shift = ((4 - ncodeunits(c)) * 8) & 31
+    (u >> shift) % UInt8
+end
 
-function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
-                  s::String, i::Integer)
-    if i < 1 || i > sizeof(s)
-        i == sizeof(s) + 1 && return nothing
-        throw(BoundsError(s, i))
-    end
-    @inbounds isvalid(s, i) || string_index_err(s, i)
-    c = pred.x
-    c ≤ '\x7f' && return nothing_sentinel(_search(s, c % UInt8, i))
-    while true
-        i = _search(s, first_utf8_byte(c), i)
-        i == 0 && return nothing
-        pred(s[i]) && return i
-        i = nextind(s, i)
+# Whether the given byte is guaranteed to be the only byte in a Char
+# This holds even in the presence of invalid UTF8
+is_standalone_byte(x::UInt8) = (x < 0x80) | (x > 0xf7)
+
+last_byteindex(x::Union{String, SubString{String}}) = ncodeunits(x)
+last_byteindex(x::DenseUInt8OrInt8) = lastindex(x)
+
+# Internal type - lazy iterator over positions of char in string
+struct FwCharPosIter{S}
+    string::S # S is assumed to be either String or SubString{String}
+    char::Char
+    # Char searchers search for the last UTF8 byte, because this byte tends to
+    # have the most variety in real texts, so any individual value is rarer.
+    # This allows more work to be done in the fast path using memchr.
+    last_char_byte::UInt8
+end
+
+function FwCharPosIter(s::Union{String, SubString{String}}, c::AbstractChar)
+    char = Char(c)::Char
+    byte = last_utf8_byte(char)
+    FwCharPosIter{typeof(s)}(s, char, byte)
+end
+
+# i is the index in the string to search from.
+# We assume it's never < firstindex(s.string)
+function Base.iterate(s::FwCharPosIter, i::Int=1)
+    scu = ncodeunits(s.string)
+
+    # By definition, if the last byte is a standalone byte, then the char
+    # is a single-byte char where the byte can never be a subset of another char.
+    # Hence, we can simply search for the occurrence of the byte itself.
+    if is_standalone_byte(s.last_char_byte)
+        i > scu && return nothing
+        i = _search(s.string, s.last_char_byte, i)
+        i === nothing ? nothing : (i, i + 1)
+    else
+        ncu = ncodeunits(s.char)
+        while true
+            i > scu && return nothing
+            i = _search(s.string, s.last_char_byte, i)
+            i === nothing && return nothing
+            # Increment i before the continue to avoid infinite loop.
+            # Since we search for the last byte in the char, the index has an offset.
+            i += 1
+            index = i - ncu
+            # The byte may be part of a different char, in which case index
+            # may be invalid.
+            isvalid(s.string, index) || continue
+            # Here, we use iterate instead of indexing, because indexing needlessly
+            # re-validates the index which we have already done here.
+            # This relies on the implementation detail that the iterator state for
+            # iterating strings is the same as the byte index.
+            char = first(something(iterate(s.string, index)))
+            char == s.char && return (index, i)
+        end
     end
 end
 
-findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) =
-    nothing_sentinel(_search(a, pred.x))
+# Internal type - lazy iterator over positions of char in string, in reverse order
+struct RvCharPosIter{S}
+    string::S # S is assumed to be either String or SubString{String}
+    char::Char
+    last_char_byte::UInt8
+end
 
-findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
-    nothing_sentinel(_search(a, pred.x, i))
+IteratorSize(s::Type{<:Union{FwCharPosIter, RvCharPosIter}}) = SizeUnknown()
+eltype(::Type{<:Union{FwCharPosIter, RvCharPosIter}}) = Int
 
-findfirst(::typeof(iszero), a::ByteArray) = nothing_sentinel(_search(a, zero(UInt8)))
-findnext(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_search(a, zero(UInt8), i))
+function RvCharPosIter(s::Union{String, SubString{String}}, c::AbstractChar)
+    char = Char(c)::Char
+    byte = last_utf8_byte(char)
+    RvCharPosIter{typeof(s)}(s, char, byte)
+end
 
-function _search(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1)
-    if i < 1
-        throw(BoundsError(a, i))
-    end
-    n = sizeof(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
+# i is the index in the string to search from
+# We assume it's never > ncodeunits(s.string)
+# This is the same implementation as FwCharPosIter, except for two differences:
+# 1. i must be decremented, not incremented because we are searching backwards
+# 2. Because we search for the last byte, the starting value of i need to be
+#    incremented in the beginning, as that byte may be found at i + ncodeunits(char) - 1.
+function Base.iterate(s::RvCharPosIter, i::Int=ncodeunits(s.string))
+    ncu = ncodeunits(s.char)
+    if is_standalone_byte(s.last_char_byte)
+        i < ncu && return nothing
+        i = _rsearch(s.string, s.last_char_byte, i)
+        i === nothing ? nothing : (i, i - 1)
+    else
+        i = min(ncodeunits(s.string), i + ncu - 1)
+        while true
+            i < ncu && return nothing
+            i = _rsearch(s.string, s.last_char_byte, i)
+            i === nothing && return nothing
+            index = i - ncu + 1
+            i -= 1
+            isvalid(s.string, index) || continue
+            char = first(something(iterate(s.string, index)))
+            char == s.char && return (index, i)
+        end
     end
-    p = pointer(a)
-    q = GC.@preserve a ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-1, b, n-i+1)
-    return q == C_NULL ? 0 : Int(q-p+1)
 end
 
-function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
-    if isascii(b)
-        _search(a,UInt8(b),i)
+function try_next(x, state)
+    y = iterate(x, state)
+    y === nothing ? nothing : first(y)
+end
+
+function findnext(
+    pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
+    s::Union{String, SubString{String}},
+    i::Integer,
+)
+    # TODO: Redesign these strange rules for errors, see #54584
+    scu = ncodeunits(s)
+    i == scu + 1 && return nothing
+    @boundscheck if i < 1 || i > scu + 1
+        throw(BoundsError(s, i))
+    end
+    # The most common case is probably searching for an ASCII char.
+    # We inline this critical path here to avoid instantiating a
+    # FwCharPosIter in the common case.
+    c = Char(pred.x)::Char
+    u = (reinterpret(UInt32, c) >> 24) % UInt8
+    i = Int(i)::Int
+    isvalid(s, i) || string_index_err(s, i)
+    return if is_standalone_byte(u)
+        _search(s, u, i)
     else
-        _search(a,codeunits(string(b)),i).start
+        try_next(FwCharPosIter(s, c, last_utf8_byte(c)), i)
     end
 end
 
-function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
-                  s::String, i::Integer)
-    c = pred.x
-    c ≤ '\x7f' && return nothing_sentinel(_rsearch(s, c % UInt8, i))
-    b = first_utf8_byte(c)
-    while true
-        i = _rsearch(s, b, i)
-        i == 0 && return nothing
-        pred(s[i]) && return i
-        i = prevind(s, i)
-    end
+function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer)
+    @boundscheck i < firstindex(a) && throw(BoundsError(a, i))
+    i > lastindex(a) && return nothing
+    _search(a, pred.x, i)
 end
 
-findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) =
-    nothing_sentinel(_rsearch(a, pred.x))
+function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer)
+    @boundscheck i < firstindex(a) && throw(BoundsError(a, i))
+    i > lastindex(a) && return nothing
+    _search(a, pred.x, i)
+end
 
-findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
-    nothing_sentinel(_rsearch(a, pred.x, i))
+# iszero is special, in that the bitpattern for zero for Int8 and UInt8 is the same,
+# so we can use memchr even if we search for an Int8 in an UInt8 array or vice versa
+function findnext(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer)
+    @boundscheck i < firstindex(a) && throw(BoundsError(a, i))
+    i > lastindex(a) && return nothing
+    _search(a, zero(UInt8), i)
+end
 
-findlast(::typeof(iszero), a::ByteArray) = nothing_sentinel(_rsearch(a, zero(UInt8)))
-findprev(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_rsearch(a, zero(UInt8), i))
+# This is essentially just a wrapper around memchr. i must be inbounds.
+function _search(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = firstindex(a))
+    fst = firstindex(a)
+    GC.@preserve a begin
+        p = pointer(a)
+        q = ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-fst, b, last_byteindex(a) - i + 1)
+    end
+    return q == C_NULL ? nothing : (q-p+fst) % Int
+end
 
-function _rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(a))
-    if i < 1
-        return i == 0 ? 0 : throw(BoundsError(a, i))
+function findprev(
+    pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
+    s::Union{String, SubString{String}},
+    i::Integer,
+)
+    # TODO: Redesign these strange rules for errors, see #54584
+    if i == ncodeunits(s) + 1 || i == 0
+        return nothing
     end
-    n = sizeof(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
+    @boundscheck if i < 1 || i > ncodeunits(s) + 1
+        throw(BoundsError(s, i))
     end
-    p = pointer(a)
-    q = GC.@preserve a ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i)
-    return q == C_NULL ? 0 : Int(q-p+1)
+    # Manually inline the fast path if c is ASCII, as we expect it to often be
+    c = Char(pred.x)::Char
+    u = (reinterpret(UInt32, c) >> 24) % UInt8
+    i = Int(i)::Int
+    return if is_standalone_byte(u)
+        _rsearch(s, u, i)
+    else
+        try_next(RvCharPosIter(s, c, last_utf8_byte(c)), i)
+    end
+end
+
+function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer)
+    @boundscheck i > lastindex(a) && throw(BoundsError(a, i))
+    i < firstindex(a) && return nothing
+    _rsearch(a, pred.x, i)
+end
+
+function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer)
+    @boundscheck i > lastindex(a) && throw(BoundsError(a, i))
+    i < firstindex(a) && return nothing
+    _rsearch(a, pred.x, i)
 end
 
-function _rsearch(a::ByteArray, b::AbstractChar, i::Integer = length(a))
-    if isascii(b)
-        _rsearch(a,UInt8(b),i)
+# See comments above for findfirst(::typeof(iszero)) methods
+function findprev(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer)
+    @boundscheck i > lastindex(a) && throw(BoundsError(a, i))
+    i < firstindex(a) && return nothing
+    _rsearch(a, zero(UInt8), i)
+end
+
+# This is essentially just a wrapper around memrchr. i must be inbounds.
+function _rsearch(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = last_byteindex(a))
+    fst = firstindex(a)
+    GC.@preserve a begin
+        p = pointer(a)
+        q = ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i-fst+1)
+    end
+    return q == C_NULL ? nothing : (q-p+fst) % Int
+end
+
+function findall(
+    pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
+    s::Union{String, SubString{String}},
+)
+    iter = FwCharPosIter(s, pred.x)
+    return if is_standalone_byte(iter.last_char_byte)
+        findall(==(iter.last_char_byte), codeunits(s))
     else
-        _rsearch(a,codeunits(string(b)),i).start
+        # It is slightly wasteful that every iteration will check is_standalone_byte
+        # again, but this should only be minor overhead in the non-fast path.
+        collect(iter)
     end
 end
 
@@ -172,21 +334,21 @@ function findnext(testf::Function, s::AbstractString, i::Integer)
     return nothing
 end
 
-
 in(c::AbstractChar, s::AbstractString) = (findfirst(isequal(c),s)!==nothing)
 
-function _searchindex(s::Union{AbstractString,ByteArray},
+function _searchindex(s::Union{AbstractString,DenseUInt8OrInt8},
                       t::Union{AbstractString,AbstractChar,Int8,UInt8},
                       i::Integer)
+    sentinel = firstindex(s) - 1
     x = Iterators.peel(t)
     if isnothing(x)
-        return 1 <= i <= nextind(s,lastindex(s))::Int ? i :
+        return firstindex(s) <= i <= nextind(s,lastindex(s))::Int ? i :
                throw(BoundsError(s, i))
     end
     t1, trest = x
     while true
         i = findnext(isequal(t1),s,i)
-        if i === nothing return 0 end
+        if i === nothing return sentinel end
         ii = nextind(s, i)::Int
         a = Iterators.Stateful(trest)
         matched = all(splat(==), zip(SubString(s, ii), a))
@@ -201,10 +363,10 @@ function _search_bloom_mask(c)
     UInt64(1) << (c & 63)
 end
 
-_nthbyte(s::String, i) = codeunit(s, i)
+_nthbyte(s::Union{String, SubString{String}}, i) = codeunit(s, i)
 _nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)]
 
-function _searchindex(s::String, t::String, i::Integer)
+function _searchindex(s::Union{String, SubString{String}}, t::Union{String, SubString{String}}, i::Integer)
     # Check for fast case of a single byte
     lastindex(t) == 1 && return something(findnext(isequal(t[1]), s, i), 0)
     _searchindex(codeunits(s), codeunits(t), i)
@@ -373,7 +535,7 @@ Find the last occurrence of `pattern` in `string`. Equivalent to
 julia> findlast("o", "Hello to the world")
 15:15
 
-julia> findfirst("Julia", "JuliaLang")
+julia> findlast("Julia", "JuliaLang")
 1:5
 ```
 """
@@ -423,7 +585,7 @@ findlast(ch::AbstractChar, string::AbstractString) = findlast(==(ch), string)
         overlap::Bool = false,
     )
     findall(
-        pattern::Vector{UInt8}
+        pattern::Vector{UInt8},
         A::Vector{UInt8};
         overlap::Bool = false,
     )
@@ -460,9 +622,8 @@ julia> findall(UInt8[1,2], UInt8[1,2,3,1,2])
 !!! compat "Julia 1.3"
      This method requires at least Julia 1.3.
 """
-
-function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
-                 s::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
+function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}},
+                 s::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}},
                  ; overlap::Bool=false)
     found = UnitRange{Int}[]
     i, e = firstindex(s), lastindex(s)
@@ -515,7 +676,7 @@ function _rsearchindex(s::AbstractString,
     end
 end
 
-function _rsearchindex(s::String, t::String, i::Integer)
+function _rsearchindex(s::Union{String, SubString{String}}, t::Union{String, SubString{String}}, i::Integer)
     # Check for fast case of a single byte
     if lastindex(t) == 1
         return something(findprev(isequal(t[1]), s, i), 0)
@@ -724,3 +885,6 @@ false
 occursin(haystack) = Base.Fix2(occursin, haystack)
 
 in(::AbstractString, ::AbstractString) = error("use occursin(needle, haystack) for string containment")
+
+in(a::UInt8, b::DenseUInt8) = !isnothing(findfirst(==(a), b))
+in(a::Int8, b::DenseInt8) = !isnothing(findfirst(==(a), b))
diff --git a/base/strings/string.jl b/base/strings/string.jl
index a26791958cd50..ab139a89af9ff 100644
--- a/base/strings/string.jl
+++ b/base/strings/string.jl
@@ -7,11 +7,11 @@ An error occurred when trying to access `str` at index `i` that is not valid.
 """
 struct StringIndexError <: Exception
     string::AbstractString
-    index::Integer
+    index::Int
 end
-@noinline string_index_err(s::AbstractString, i::Integer) =
+@noinline string_index_err((@nospecialize s::AbstractString), i::Integer) =
     throw(StringIndexError(s, Int(i)))
-function Base.showerror(io::IO, exc::StringIndexError)
+function showerror(io::IO, exc::StringIndexError)
     s = exc.string
     print(io, "StringIndexError: ", "invalid index [$(exc.index)]")
     if firstindex(s) <= exc.index <= ncodeunits(s)
@@ -27,8 +27,6 @@ function Base.showerror(io::IO, exc::StringIndexError)
     end
 end
 
-const ByteArray = Union{CodeUnits{UInt8,String}, Vector{UInt8},Vector{Int8}, FastContiguousSubArray{UInt8,1,CodeUnits{UInt8,String}}, FastContiguousSubArray{UInt8,1,Vector{UInt8}}, FastContiguousSubArray{Int8,1,Vector{Int8}}}
-
 @inline between(b::T, lo::T, hi::T) where {T<:Integer} = (lo ≤ b) & (b ≤ hi)
 
 """
@@ -63,11 +61,55 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to
 In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway
 to guarantee consistent behavior.
 """
-String(v::AbstractVector{UInt8}) = String(copyto!(StringVector(length(v)), v))
-String(v::Vector{UInt8}) = ccall(:jl_array_to_string, Ref{String}, (Any,), v)
+String(v::AbstractVector{UInt8}) = unsafe_takestring(copyto!(StringMemory(length(v)), v))
+
+function String(v::Vector{UInt8})
+    len = length(v)
+    len == 0 && return ""
+    ref = v.ref
+    if ref.ptr_or_offset == ref.mem.ptr
+        str = ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), ref.mem, len)
+    else
+        str = ccall(:jl_pchar_to_string, Ref{String}, (Ptr{UInt8}, Int), ref, len)
+    end
+    # optimized empty!(v); sizehint!(v, 0) calls
+    setfield!(v, :size, (0,))
+    setfield!(v, :ref, memoryref(Memory{UInt8}()))
+    return str
+end
+
+"""
+    unsafe_takestring(m::Memory{UInt8})::String
+
+Create a `String` from `m`, changing the interpretation of the contents of `m`.
+This is done without copying, if possible. Thus, any access to `m` after
+calling this function, either to read or to write, is undefined behavior.
+"""
+function unsafe_takestring(m::Memory{UInt8})
+    isempty(m) ? "" : ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m))
+end
+
+"""
+    takestring!(x) -> String
+
+Create a string from the content of `x`, emptying `x`.
+
+# Examples
+```jldoctest
+julia> v = [0x61, 0x62, 0x63];
+
+julia> s = takestring!(v)
+"abc"
+
+julia> isempty(v)
+true
+```
+"""
+takestring!(v::Vector{UInt8}) = String(v)
 
 """
     unsafe_string(p::Ptr{UInt8}, [length::Integer])
+    unsafe_string(p::Cstring)
 
 Copy a string from the address of a C-style (NUL-terminated) string encoded as UTF-8.
 (The pointer can be safely freed afterwards.) If `length` is specified
@@ -85,9 +127,11 @@ function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}})
     ccall(:jl_cstr_to_string, Ref{String}, (Ptr{UInt8},), p)
 end
 
-# This is @assume_effects :effect_free :nothrow :terminates_globally @ccall jl_alloc_string(n::Csize_t)::Ref{String},
+# This is `@assume_effects :total !:consistent @ccall jl_alloc_string(n::Csize_t)::Ref{String}`,
 # but the macro is not available at this time in bootstrap, so we write it manually.
-@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String}, Expr(:call, Expr(:core, :svec), :Csize_t), 1, QuoteNode((:ccall,0xe)), :(convert(Csize_t, n))))
+const _string_n_override = 0x04ee
+@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String},
+    :(Core.svec(Csize_t)), 1, QuoteNode((:ccall, _string_n_override, false)), :(convert(Csize_t, n))))
 
 """
     String(s::AbstractString)
@@ -97,8 +141,8 @@ Create a new `String` from an existing `AbstractString`.
 String(s::AbstractString) = print_to_string(s)
 @assume_effects :total String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
 
-unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)
-unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s))
+unsafe_wrap(::Type{Memory{UInt8}}, s::String) = ccall(:jl_string_to_genericmemory, Ref{Memory{UInt8}}, (Any,), s)
+unsafe_wrap(::Type{Vector{UInt8}}, s::String) = wrap(Array, unsafe_wrap(Memory{UInt8}, s))
 
 Vector{UInt8}(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s)
 Vector{UInt8}(s::String) = Vector{UInt8}(codeunits(s))
@@ -114,7 +158,7 @@ pointer(s::String, i::Integer) = pointer(s) + Int(i)::Int - 1
 ncodeunits(s::String) = Core.sizeof(s)
 codeunit(s::String) = UInt8
 
-codeunit(s::String, i::Integer) = codeunit(s, Int(i))
+codeunit(s::String, i::Integer) = codeunit(s, Int(i)::Int)
 @assume_effects :foldable @inline function codeunit(s::String, i::Int)
     @boundscheck checkbounds(s, i)
     b = GC.@preserve s unsafe_load(pointer(s, i))
@@ -157,15 +201,18 @@ typemin(::String) = typemin(String)
     @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
     @inbounds b = codeunit(s, i)
     (b & 0xc0 == 0x80) & (i-1 > 0) || return i
-    @inbounds b = codeunit(s, i-1)
-    between(b, 0b11000000, 0b11110111) && return i-1
-    (b & 0xc0 == 0x80) & (i-2 > 0) || return i
-    @inbounds b = codeunit(s, i-2)
-    between(b, 0b11100000, 0b11110111) && return i-2
-    (b & 0xc0 == 0x80) & (i-3 > 0) || return i
-    @inbounds b = codeunit(s, i-3)
-    between(b, 0b11110000, 0b11110111) && return i-3
-    return i
+    (@noinline function _thisind_continued(s, i, n) # mark the rest of the function as a slow-path
+        local b
+        @inbounds b = codeunit(s, i-1)
+        between(b, 0b11000000, 0b11110111) && return i-1
+        (b & 0xc0 == 0x80) & (i-2 > 0) || return i
+        @inbounds b = codeunit(s, i-2)
+        between(b, 0b11100000, 0b11110111) && return i-2
+        (b & 0xc0 == 0x80) & (i-3 > 0) || return i
+        @inbounds b = codeunit(s, i-3)
+        between(b, 0b11110000, 0b11110111) && return i-3
+        return i
+    end)(s, i, n)
 end
 
 @propagate_inbounds nextind(s::String, i::Int) = _nextind_str(s, i)
@@ -176,26 +223,34 @@ end
     n = ncodeunits(s)
     @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
     @inbounds l = codeunit(s, i)
-    (l < 0x80) | (0xf8 ≤ l) && return i+1
-    if l < 0xc0
-        i′ = @inbounds thisind(s, i)
-        return i′ < i ? @inbounds(nextind(s, i′)) : i+1
-    end
-    # first continuation byte
-    (i += 1) > n && return i
-    @inbounds b = codeunit(s, i)
-    b & 0xc0 ≠ 0x80 && return i
-    ((i += 1) > n) | (l < 0xe0) && return i
-    # second continuation byte
-    @inbounds b = codeunit(s, i)
-    b & 0xc0 ≠ 0x80 && return i
-    ((i += 1) > n) | (l < 0xf0) && return i
-    # third continuation byte
-    @inbounds b = codeunit(s, i)
-    ifelse(b & 0xc0 ≠ 0x80, i, i+1)
+    between(l, 0x80, 0xf7) || return i+1
+    (@noinline function _nextind_continued(s, i, n, l) # mark the rest of the function as a slow-path
+        if l < 0xc0
+            # handle invalid codeunit index by scanning back to the start of this index
+            # (which may be the same as this index)
+            i′ = @inbounds thisind(s, i)
+            i′ >= i && return i+1
+            i = i′
+            @inbounds l = codeunit(s, i)
+            (l < 0x80) | (0xf8 ≤ l) && return i+1
+            @assert l >= 0xc0 "invalid codeunit"
+        end
+        # first continuation byte
+        (i += 1) > n && return i
+        @inbounds b = codeunit(s, i)
+        b & 0xc0 ≠ 0x80 && return i
+        ((i += 1) > n) | (l < 0xe0) && return i
+        # second continuation byte
+        @inbounds b = codeunit(s, i)
+        b & 0xc0 ≠ 0x80 && return i
+        ((i += 1) > n) | (l < 0xf0) && return i
+        # third continuation byte
+        @inbounds b = codeunit(s, i)
+        return ifelse(b & 0xc0 ≠ 0x80, i, i+1)
+    end)(s, i, n, l)
 end
 
-## checking UTF-8 & ACSII validity ##
+## checking UTF-8 & ASCII validity ##
 #=
     The UTF-8 Validation is performed by a shift based DFA.
     ┌───────────────────────────────────────────────────────────────────┐
@@ -247,7 +302,7 @@ end
 
            Shifts | 0  4 10 14 18 24  8 20 12 26
 
-    The shifts that represent each state were derived using teh SMT solver Z3, to ensure when encoded into
+    The shifts that represent each state were derived using the SMT solver Z3, to ensure when encoded into
     the rows the correct shift was a result.
 
     Each character class row is encoding 10 states with shifts as defined above. By shifting the bitsof a row by
@@ -343,7 +398,7 @@ end
 
 ##
 
-# Classifcations of string
+# Classifications of string
     # 0: neither valid ASCII nor UTF-8
     # 1: valid ASCII
     # 2: valid UTF-8
@@ -401,10 +456,11 @@ is_valid_continuation(c) = c & 0xc0 == 0x80
     b = @inbounds codeunit(s, i)
     u = UInt32(b) << 24
     between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1
-    return iterate_continued(s, i, u)
+    return @noinline iterate_continued(s, i, u)
 end
 
-function iterate_continued(s::String, i::Int, u::UInt32)
+# duck-type s so that external UTF-8 string packages like StringViews can hook in
+function iterate_continued(s, i::Int, u::UInt32)
     u < 0xc0000000 && (i += 1; @goto ret)
     n = ncodeunits(s)
     # first continuation byte
@@ -433,7 +489,8 @@ end
     return getindex_continued(s, i, u)
 end
 
-function getindex_continued(s::String, i::Int, u::UInt32)
+# duck-type s so that external UTF-8 string packages like StringViews can hook in
+function getindex_continued(s, i::Int, u::UInt32)
     if u < 0xc0000000
         # called from `getindex` which checks bounds
         @inbounds isvalid(s, i) && @goto ret
@@ -526,7 +583,7 @@ isascii(s::String) = isascii(codeunits(s))
 @assume_effects :foldable repeat(c::Char, r::BitInteger) = @invoke repeat(c::Char, r::Integer)
 
 """
-    repeat(c::AbstractChar, r::Integer) -> String
+    repeat(c::AbstractChar, r::Integer)::String
 
 Repeat a character `r` times. This can equivalently be accomplished by calling
 [`c^r`](@ref :^(::Union{AbstractString, AbstractChar}, ::Integer)).
@@ -538,9 +595,10 @@ julia> repeat('A', 3)
 ```
 """
 function repeat(c::AbstractChar, r::Integer)
+    r < 0 && throw(ArgumentError("can't repeat a character $r times"))
+    r = UInt(r)::UInt
     c = Char(c)::Char
     r == 0 && return ""
-    r < 0 && throw(ArgumentError("can't repeat a character $r times"))
     u = bswap(reinterpret(UInt32, c))
     n = 4 - (leading_zeros(u | 0xff) >> 3)
     s = _string_n(n*r)
diff --git a/base/strings/strings.jl b/base/strings/strings.jl
index d995d8535e24b..32975b6ea3fc7 100644
--- a/base/strings/strings.jl
+++ b/base/strings/strings.jl
@@ -1,5 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+include("strings/annotated.jl")
 include("strings/search.jl")
 include("strings/unicode.jl")
 
@@ -10,3 +11,4 @@ import .Iterators: PartitionIterator
 
 include("strings/util.jl")
 include("strings/io.jl")
+include("strings/annotated_io.jl")
diff --git a/base/strings/substring.jl b/base/strings/substring.jl
index 792925f24b12b..973c1d68badce 100644
--- a/base/strings/substring.jl
+++ b/base/strings/substring.jl
@@ -36,10 +36,19 @@ struct SubString{T<:AbstractString} <: AbstractString
         end
         return new(s, i-1, nextind(s,j)-i)
     end
+    function SubString{T}(s::T, i::Int, j::Int, ::Val{:noshift}) where T<:AbstractString
+        @boundscheck if !(i == j == 0)
+            si, sj = i + 1, prevind(s, j + i + 1)
+            @inbounds isvalid(s, si) || string_index_err(s, si)
+            @inbounds isvalid(s, sj) || string_index_err(s, sj)
+        end
+        new(s, i, j)
+    end
 end
 
 @propagate_inbounds SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j)
-@propagate_inbounds SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s)) = SubString(s, Int(i), Int(j))
+@propagate_inbounds SubString(s::T, i::Int, j::Int, v::Val{:noshift}) where {T<:AbstractString} = SubString{T}(s, i, j, v)
+@propagate_inbounds SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s)) = SubString(s, Int(i)::Int, Int(j)::Int)
 @propagate_inbounds SubString(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, first(r), last(r))
 
 @propagate_inbounds function SubString(s::SubString, i::Int, j::Int)
@@ -100,8 +109,8 @@ function isvalid(s::SubString, i::Integer)
     @inbounds return ib && isvalid(s.string, s.offset + i)::Bool
 end
 
-thisind(s::SubString{String}, i::Int) = _thisind_str(s, i)
-nextind(s::SubString{String}, i::Int) = _nextind_str(s, i)
+@propagate_inbounds thisind(s::SubString{String}, i::Int) = _thisind_str(s, i)
+@propagate_inbounds nextind(s::SubString{String}, i::Int) = _nextind_str(s, i)
 
 parent(s::SubString) = s.string
 parentindices(s::SubString) = (s.offset + 1 : thisind(s.string, s.offset + s.ncodeunits),)
@@ -126,13 +135,13 @@ end
 pointer(x::SubString{String}) = pointer(x.string) + x.offset
 pointer(x::SubString{String}, i::Integer) = pointer(x.string) + x.offset + (i-1)
 
-function hash(s::SubString{String}, h::UInt)
-    h += memhash_seed
-    ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof(s), h % UInt32) + h
-end
+hash(data::SubString{String}, h::UInt) =
+    GC.@preserve data hash_bytes(pointer(data), sizeof(data), UInt64(h), HASH_SECRET) % UInt
+
+_isannotated(::SubString{T}) where {T} = _isannotated(T)
 
 """
-    reverse(s::AbstractString) -> AbstractString
+    reverse(s::AbstractString)::AbstractString
 
 Reverses a string. Technically, this function reverses the codepoints in a string and its
 main utility is for reversed-order string processing, especially for reversed
@@ -261,6 +270,7 @@ end
 
 function repeat(s::Union{String, SubString{String}}, r::Integer)
     r < 0 && throw(ArgumentError("can't repeat a string $r times"))
+    r = UInt(r)::UInt
     r == 0 && return ""
     r == 1 && return String(s)
     n = sizeof(s)
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index 17c5d66c160b6..eba0ed22aee4d 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -4,12 +4,14 @@
 module Unicode
 
 import Base: show, ==, hash, string, Symbol, isless, length, eltype,
-             convert, isvalid, ismalformed, isoverlong, iterate
+             convert, isvalid, ismalformed, isoverlong, iterate,
+             AnnotatedString, AnnotatedChar, annotated_chartransform,
+             @assume_effects, annotations, is_overlong_enc
 
 # whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff
 
 """
-    isvalid(value) -> Bool
+    isvalid(value)::Bool
 
 Return `true` if the given value is valid for its type, which currently can be either
 `AbstractChar` or `String` or `SubString{String}`.
@@ -29,7 +31,7 @@ true
 isvalid(value)
 
 """
-    isvalid(T, value) -> Bool
+    isvalid(T, value)::Bool
 
 Return `true` if the given value is valid for that type. Types currently can
 be either `AbstractChar` or `String`. Values for `AbstractChar` can be of type `AbstractChar` or [`UInt32`](@ref).
@@ -155,15 +157,15 @@ function utf8proc_decompose(str, options, buffer, nwords, chartransform::typeof(
     ret < 0 && utf8proc_error(ret)
     return ret
 end
-function utf8proc_decompose(str, options, buffer, nwords, chartransform::T) where T
-    ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{T}),
+function utf8proc_decompose(str, options, buffer, nwords, chartransform::F) where F
+    ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{F}),
                 str, sizeof(str), buffer, nwords, options,
-                @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{T})), chartransform)
+                @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{F})), chartransform)
     ret < 0 && utf8proc_error(ret)
     return ret
 end
 
-function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform=identity)
+function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform::F = identity) where F
     nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform)
     buffer = Base.StringVector(nwords*4)
     nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform)
@@ -172,16 +174,24 @@ function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, ch
     return String(resize!(buffer, nbytes))
 end
 
-# from julia_charmap.h, used by julia_chartransform in the Unicode stdlib
+"""
+`Dict` of `original codepoint => replacement codepoint` normalizations
+to perform on Julia identifiers, to canonicalize characters that
+are both easily confused and easily inputted by accident.
+
+!!! warning
+    When this table is updated, also update the corresponding table in `src/flisp/julia_charmap.h`.
+"""
 const _julia_charmap = Dict{UInt32,UInt32}(
-    0x025B => 0x03B5,
-    0x00B5 => 0x03BC,
-    0x00B7 => 0x22C5,
-    0x0387 => 0x22C5,
-    0x2212 => 0x002D,
+    0x025B => 0x03B5, # latin small letter open e -> greek small letter epsilon
+    0x00B5 => 0x03BC, # micro sign -> greek small letter mu
+    0x00B7 => 0x22C5, # middot char -> dot operator (#25098)
+    0x0387 => 0x22C5, # Greek interpunct -> dot operator (#25098)
+    0x2212 => 0x002D, # minus -> hyphen-minus (#26193)
+    0x210F => 0x0127, # hbar -> small letter h with stroke (#48870)
 )
 
-utf8proc_map(s::AbstractString, flags::Integer, chartransform=identity) = utf8proc_map(String(s), flags, chartransform)
+utf8proc_map(s::AbstractString, flags::Integer, chartransform::F = identity) where F = utf8proc_map(String(s)::String, flags, chartransform)
 
 # Documented in Unicode module
 function normalize(
@@ -252,8 +262,15 @@ julia> textwidth('⛵')
 2
 ```
 """
-function textwidth(c::AbstractChar)
-    ismalformed(c) && return 1
+textwidth(c::AbstractChar) = textwidth(Char(c)::Char)
+
+function textwidth(c::Char)
+    u = reinterpret(UInt32, c)
+    b = bswap(u) # from isascii(c)
+    b < 0x7f && return Int(b >= 0x20) # ASCII fast path
+    # We can't know a priori how terminals will render invalid UTF8 chars,
+    # so we conservatively decide a width of 1.
+    (ismalformed(c) || is_overlong_enc(u)) && return 1
     Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))
 end
 
@@ -270,6 +287,8 @@ julia> textwidth("March")
 """
 textwidth(s::AbstractString) = mapreduce(textwidth, +, s; init=0)
 
+textwidth(s::AnnotatedString) = textwidth(s.string)
+
 """
     lowercase(c::AbstractChar)
 
@@ -289,6 +308,8 @@ julia> lowercase('Ö')
 lowercase(c::T) where {T<:AbstractChar} = isascii(c) ? ('A' <= c <= 'Z' ? c + 0x20 : c) :
     T(ccall(:utf8proc_tolower, UInt32, (UInt32,), c))
 
+lowercase(c::AnnotatedChar) = AnnotatedChar(lowercase(c.char), annotations(c))
+
 """
     uppercase(c::AbstractChar)
 
@@ -308,6 +329,8 @@ julia> uppercase('ê')
 uppercase(c::T) where {T<:AbstractChar} = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) :
     T(ccall(:utf8proc_toupper, UInt32, (UInt32,), c))
 
+uppercase(c::AnnotatedChar) = AnnotatedChar(uppercase(c.char), annotations(c))
+
 """
     titlecase(c::AbstractChar)
 
@@ -331,6 +354,8 @@ julia> uppercase('ǆ')
 titlecase(c::T) where {T<:AbstractChar} = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) :
     T(ccall(:utf8proc_totitle, UInt32, (UInt32,), c))
 
+titlecase(c::AnnotatedChar) = AnnotatedChar(titlecase(c.char), annotations(c))
+
 ############################################################################
 
 # returns UTF8PROC_CATEGORY code in 0:30 giving Unicode category
@@ -339,7 +364,7 @@ function category_code(c::AbstractChar)
 end
 
 function category_code(x::Integer)
-    x ≤ 0x10ffff ? ccall(:utf8proc_category, Cint, (UInt32,), x) : Cint(30)
+    x ≤ 0x10ffff ? (@assume_effects :foldable @ccall utf8proc_category(UInt32(x)::UInt32)::Cint) : Cint(30)
 end
 
 # more human-readable representations of the category code
@@ -356,7 +381,7 @@ isassigned(c) = UTF8PROC_CATEGORY_CN < category_code(c) <= UTF8PROC_CATEGORY_CO
 ## libc character class predicates ##
 
 """
-    islowercase(c::AbstractChar) -> Bool
+    islowercase(c::AbstractChar)::Bool
 
 Tests whether a character is a lowercase letter (according to the Unicode
 standard's `Lowercase` derived property).
@@ -375,12 +400,13 @@ julia> islowercase('❤')
 false
 ```
 """
-islowercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_islower, Cint, (UInt32,), UInt32(c)))
+islowercase(c::AbstractChar) = ismalformed(c) ? false :
+    Bool(@assume_effects :foldable @ccall utf8proc_islower(UInt32(c)::UInt32)::Cint)
 
 # true for Unicode upper and mixed case
 
 """
-    isuppercase(c::AbstractChar) -> Bool
+    isuppercase(c::AbstractChar)::Bool
 
 Tests whether a character is an uppercase letter (according to the Unicode
 standard's `Uppercase` derived property).
@@ -399,10 +425,11 @@ julia> isuppercase('❤')
 false
 ```
 """
-isuppercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isupper, Cint, (UInt32,), UInt32(c)))
+isuppercase(c::AbstractChar) = ismalformed(c) ? false :
+    Bool(@assume_effects :foldable @ccall utf8proc_isupper(UInt32(c)::UInt32)::Cint)
 
 """
-    iscased(c::AbstractChar) -> Bool
+    iscased(c::AbstractChar)::Bool
 
 Tests whether a character is cased, i.e. is lower-, upper- or title-cased.
 
@@ -417,9 +444,9 @@ end
 
 
 """
-    isdigit(c::AbstractChar) -> Bool
+    isdigit(c::AbstractChar)::Bool
 
-Tests whether a character is a decimal digit (0-9).
+Tests whether a character is an ASCII decimal digit (`0`-`9`).
 
 See also: [`isletter`](@ref).
 
@@ -438,7 +465,7 @@ false
 isdigit(c::AbstractChar) = (c >= '0') & (c <= '9')
 
 """
-    isletter(c::AbstractChar) -> Bool
+    isletter(c::AbstractChar)::Bool
 
 Test whether a character is a letter.
 A character is classified as a letter if it belongs to the Unicode general
@@ -461,7 +488,7 @@ false
 isletter(c::AbstractChar) = UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_LO
 
 """
-    isnumeric(c::AbstractChar) -> Bool
+    isnumeric(c::AbstractChar)::Bool
 
 Tests whether a character is numeric.
 A character is classified as numeric if it belongs to the Unicode general category Number,
@@ -490,7 +517,7 @@ isnumeric(c::AbstractChar) = UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PRO
 # following C++ only control characters from the Latin-1 subset return true
 
 """
-    iscntrl(c::AbstractChar) -> Bool
+    iscntrl(c::AbstractChar)::Bool
 
 Tests whether a character is a control character.
 Control characters are the non-printing characters of the Latin-1 subset of Unicode.
@@ -507,16 +534,22 @@ false
 iscntrl(c::AbstractChar) = c <= '\x1f' || '\x7f' <= c <= '\u9f'
 
 """
-    ispunct(c::AbstractChar) -> Bool
+    ispunct(c::AbstractChar)::Bool
 
 Tests whether a character belongs to the Unicode general category Punctuation, i.e. a
 character whose category code begins with 'P'.
 
+!!! note
+    This behavior is different from the `ispunct` function in C.
+
 # Examples
 ```jldoctest
 julia> ispunct('α')
 false
 
+julia> ispunct('=')
+false
+
 julia> ispunct('/')
 true
 
@@ -529,7 +562,7 @@ ispunct(c::AbstractChar) = UTF8PROC_CATEGORY_PC <= category_code(c) <= UTF8PROC_
 # \u85 is the Unicode Next Line (NEL) character
 
 """
-    isspace(c::AbstractChar) -> Bool
+    isspace(c::AbstractChar)::Bool
 
 Tests whether a character is any whitespace character. Includes ASCII characters '\\t',
 '\\n', '\\v', '\\f', '\\r', and ' ', Latin-1 character U+0085, and characters in Unicode
@@ -555,7 +588,7 @@ true
     '\ua0' <= c && category_code(c) == UTF8PROC_CATEGORY_ZS
 
 """
-    isprint(c::AbstractChar) -> Bool
+    isprint(c::AbstractChar)::Bool
 
 Tests whether a character is printable, including spaces, but not a control character.
 
@@ -573,7 +606,7 @@ isprint(c::AbstractChar) = UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_
 # true in principal if a printer would use ink
 
 """
-    isxdigit(c::AbstractChar) -> Bool
+    isxdigit(c::AbstractChar)::Bool
 
 Test whether a character is a valid hexadecimal digit. Note that this does not
 include `x` (as in the standard `0x` prefix).
@@ -605,6 +638,8 @@ julia> uppercase("Julia")
 ```
 """
 uppercase(s::AbstractString) = map(uppercase, s)
+uppercase(s::AnnotatedString) = annotated_chartransform(uppercase, s)
+uppercase(s::SubString{<:AnnotatedString}) = uppercase(AnnotatedString(s))
 
 """
     lowercase(s::AbstractString)
@@ -620,9 +655,11 @@ julia> lowercase("STRINGS AND THINGS")
 ```
 """
 lowercase(s::AbstractString) = map(lowercase, s)
+lowercase(s::AnnotatedString) = annotated_chartransform(lowercase, s)
+lowercase(s::SubString{<:AnnotatedString}) = lowercase(AnnotatedString(s))
 
 """
-    titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true) -> String
+    titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true)::String
 
 Capitalize the first character of each word in `s`;
 if `strict` is true, every other character is
@@ -665,11 +702,31 @@ function titlecase(s::AbstractString; wordsep::Function = !isletter, strict::Boo
         end
         c0 = c
     end
-    return String(take!(b))
+    return takestring!(b)
 end
 
+# TODO: improve performance characteristics, room for a ~10x improvement.
+function titlecase(s::AnnotatedString; wordsep::Function = !isletter, strict::Bool=true)
+    initial_state = (; startword = true, state = Ref{Int32}(0),
+             c0 = eltype(s)(zero(UInt32)), wordsep, strict)
+    annotated_chartransform(s, initial_state) do c, state
+        if isgraphemebreak!(state.state, state.c0, c) && state.wordsep(c)
+            state = Base.setindex(state, true, :startword)
+            cnew = c
+        else
+            cnew = state.startword ? titlecase(c) : state.strict ? lowercase(c) : c
+            state = Base.setindex(state, false, :startword)
+        end
+        state = Base.setindex(state, c, :c0)
+        cnew, state
+    end
+end
+
+titlecase(s::SubString{<:AnnotatedString}; wordsep::Function = !isletter, strict::Bool=true) =
+    titlecase(AnnotatedString(s); wordsep=wordsep, strict=strict)
+
 """
-    uppercasefirst(s::AbstractString) -> String
+    uppercasefirst(s::AbstractString)::String
 
 Return `s` with the first character converted to uppercase (technically "title
 case" for Unicode). See also [`titlecase`](@ref) to capitalize the first
@@ -692,6 +749,18 @@ function uppercasefirst(s::AbstractString)
     string(c′, SubString(s, nextind(s, 1)))
 end
 
+# TODO: improve performance characteristics, room for a ~5x improvement.
+function uppercasefirst(s::AnnotatedString)
+    annotated_chartransform(s, true) do c, state
+        if state
+            (titlecase(c), false)
+        else
+            (c, state)
+        end
+    end
+end
+uppercasefirst(s::SubString{<:AnnotatedString}) = uppercasefirst(AnnotatedString(s))
+
 """
     lowercasefirst(s::AbstractString)
 
@@ -714,6 +783,18 @@ function lowercasefirst(s::AbstractString)
     string(c′, SubString(s, nextind(s, 1)))
 end
 
+# TODO: improve performance characteristics, room for a ~5x improvement.
+function lowercasefirst(s::AnnotatedString)
+    annotated_chartransform(s, true) do c, state
+        if state
+            (lowercase(c), false)
+        else
+            (c, state)
+        end
+    end
+end
+lowercasefirst(s::SubString{<:AnnotatedString}) = lowercasefirst(AnnotatedString(s))
+
 ############################################################################
 # iterators for grapheme segmentation
 
@@ -724,7 +805,7 @@ isgraphemebreak(c1::AbstractChar, c2::AbstractChar) =
 # Stateful grapheme break required by Unicode-9 rules: the string
 # must be processed in sequence, with state initialized to Ref{Int32}(0).
 # Requires utf8proc v2.0 or later.
-function isgraphemebreak!(state::Ref{Int32}, c1::AbstractChar, c2::AbstractChar)
+@inline function isgraphemebreak!(state::Ref{Int32}, c1::AbstractChar, c2::AbstractChar)
     if ismalformed(c1) || ismalformed(c2)
         state[] = 0
         return true
diff --git a/base/strings/util.jl b/base/strings/util.jl
index c77d45255a735..d8cc95d9ec801 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -1,13 +1,23 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-const Chars = Union{AbstractChar,Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}
+"""
+    Base.Chars = Union{AbstractChar,Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},AbstractSet{<:AbstractChar}}
+
+An alias type for either a single character or a tuple/vector/set of characters, used to describe arguments
+of several string-matching functions such as [`startswith`](@ref) and [`strip`](@ref).
+
+!!! compat "Julia 1.11"
+    Julia versions prior to 1.11 only included `Set`, not `AbstractSet`, in `Base.Chars` types.
+"""
+const Chars = Union{AbstractChar,Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},AbstractSet{<:AbstractChar}}
 
 # starts with and ends with predicates
 
 """
-    startswith(s::AbstractString, prefix::AbstractString)
+    startswith(s::AbstractString, prefix::Union{AbstractString,Base.Chars})
 
-Return `true` if `s` starts with `prefix`. If `prefix` is a vector or set
+Return `true` if `s` starts with `prefix`, which can be a string, a character,
+or a tuple/vector/set of characters. If `prefix` is a tuple/vector/set
 of characters, test whether the first character of `s` belongs to that set.
 
 See also [`endswith`](@ref), [`contains`](@ref).
@@ -30,10 +40,11 @@ end
 startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str)::AbstractChar in chars
 
 """
-    endswith(s::AbstractString, suffix::AbstractString)
+    endswith(s::AbstractString, suffix::Union{AbstractString,Base.Chars})
 
-Return `true` if `s` ends with `suffix`. If `suffix` is a vector or set of
-characters, test whether the last character of `s` belongs to that set.
+Return `true` if `s` ends with `suffix`, which can be a string, a character,
+or a tuple/vector/set of characters. If `suffix` is a tuple/vector/set
+of characters, test whether the last character of `s` belongs to that set.
 
 See also [`startswith`](@ref), [`contains`](@ref).
 
@@ -70,7 +81,8 @@ end
 """
     startswith(io::IO, prefix::Union{AbstractString,Base.Chars})
 
-Check if an `IO` object starts with a prefix.  See also [`peek`](@ref).
+Check if an `IO` object starts with a prefix, which can be either a string, a
+character, or a tuple/vector/set of characters.  See also [`peek`](@ref).
 """
 function Base.startswith(io::IO, prefix::Base.Chars)
     mark(io)
@@ -84,11 +96,10 @@ function Base.startswith(io::IO, prefix::Union{String,SubString{String}})
     reset(io)
     return s == codeunits(prefix)
 end
-Base.startswith(io::IO, prefix::AbstractString) = startswith(io, String(prefix))
+Base.startswith(io::IO, prefix::AbstractString) = startswith(io, String(prefix)::String)
 
 function endswith(a::Union{String, SubString{String}},
                   b::Union{String, SubString{String}})
-    cub = ncodeunits(b)
     astart = ncodeunits(a) - ncodeunits(b) + 1
     if astart < 1
         false
@@ -221,7 +232,7 @@ end
 # chop(s::AbstractString) = SubString(s, firstindex(s), prevind(s, lastindex(s)))
 
 """
-    chopprefix(s::AbstractString, prefix::Union{AbstractString,Regex}) -> SubString
+    chopprefix(s::AbstractString, prefix::Union{AbstractString,Regex,AbstractChar})::SubString
 
 Remove the prefix `prefix` from `s`. If `s` does not start with `prefix`, a string equal to `s` is returned.
 
@@ -230,6 +241,9 @@ See also [`chopsuffix`](@ref).
 !!! compat "Julia 1.8"
     This function is available as of Julia 1.8.
 
+!!! compat "Julia 1.13"
+    The method which accepts an `AbstractChar` prefix is available as of Julia 1.13.
+
 # Examples
 ```jldoctest
 julia> chopprefix("Hamburger", "Ham")
@@ -261,8 +275,16 @@ function chopprefix(s::Union{String, SubString{String}},
     end
 end
 
+function chopprefix(s::AbstractString, prefix::AbstractChar)
+    if !isempty(s) && first(s) == prefix
+        return SubString(s, nextind(s, firstindex(s)))
+    else
+        return SubString(s)
+    end
+end
+
 """
-    chopsuffix(s::AbstractString, suffix::Union{AbstractString,Regex}) -> SubString
+    chopsuffix(s::AbstractString, suffix::Union{AbstractString,Regex,AbstractChar})::SubString
 
 Remove the suffix `suffix` from `s`. If `s` does not end with `suffix`, a string equal to `s` is returned.
 
@@ -271,6 +293,9 @@ See also [`chopprefix`](@ref).
 !!! compat "Julia 1.8"
     This function is available as of Julia 1.8.
 
+!!! compat "Julia 1.13"
+    The method which accepts an `AbstractChar` suffix is available as of Julia 1.13.
+
 # Examples
 ```jldoctest
 julia> chopsuffix("Hamburger", "er")
@@ -304,11 +329,18 @@ function chopsuffix(s::Union{String, SubString{String}},
     end
 end
 
+function chopsuffix(s::AbstractString, suffix::AbstractChar)
+    if !isempty(s) && last(s) == suffix
+        return SubString(s, firstindex(s), prevind(s, lastindex(s)))
+    else
+        return SubString(s)
+    end
+end
 
 """
-    chomp(s::AbstractString) -> SubString
+    chomp(s::AbstractString)::SubString
 
-Remove a single trailing newline from a string.
+Remove a single trailing newline (i.e. "\\r\\n" or "\\n") from a string.
 
 See also [`chop`](@ref).
 
@@ -316,6 +348,12 @@ See also [`chop`](@ref).
 ```jldoctest
 julia> chomp("Hello\\n")
 "Hello"
+
+julia> chomp("World\\r\\n")
+"World"
+
+julia> chomp("Julia\\r\\n\\n")
+"Julia\\r\\n"
 ```
 """
 function chomp(s::AbstractString)
@@ -325,20 +363,25 @@ function chomp(s::AbstractString)
     (j < 1 || s[j] != '\r') && (return SubString(s, 1, j))
     return SubString(s, 1, prevind(s,j))
 end
-function chomp(s::String)
-    i = lastindex(s)
-    if i < 1 || codeunit(s,i) != 0x0a
-        return @inbounds SubString(s, 1, i)
-    elseif i < 2 || codeunit(s,i-1) != 0x0d
-        return @inbounds SubString(s, 1, prevind(s, i))
+
+@assume_effects :removable :foldable function chomp(s::Union{String, SubString{String}})
+    cu = codeunits(s)
+    ncu = length(cu)
+    len = if iszero(ncu)
+        0
     else
-        return @inbounds SubString(s, 1, prevind(s, i-1))
+        has_lf = @inbounds(cu[ncu]) == 0x0a
+        two_bytes = ncu > 1
+        has_cr = has_lf & two_bytes & (@inbounds(cu[ncu - two_bytes]) == 0x0d)
+        ncu - (has_lf + has_cr)
     end
+    off = s isa String ? 0 : s.offset
+    par = s isa String ? s : s.string
+    @inbounds @inline SubString{String}(par, off, len, Val{:noshift}())
 end
-
 """
-    lstrip([pred=isspace,] str::AbstractString) -> SubString
-    lstrip(str::AbstractString, chars) -> SubString
+    lstrip([pred=isspace,] str::AbstractString)::SubString
+    lstrip(str::AbstractString, chars)::SubString
 
 Remove leading characters from `str`, either those specified by `chars` or those for
 which the function `pred` returns `true`.
@@ -369,10 +412,11 @@ function lstrip(f, s::AbstractString)
 end
 lstrip(s::AbstractString) = lstrip(isspace, s)
 lstrip(s::AbstractString, chars::Chars) = lstrip(in(chars), s)
+lstrip(::AbstractString, ::AbstractString) = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s"))
 
 """
-    rstrip([pred=isspace,] str::AbstractString) -> SubString
-    rstrip(str::AbstractString, chars) -> SubString
+    rstrip([pred=isspace,] str::AbstractString)::SubString
+    rstrip(str::AbstractString, chars)::SubString
 
 Remove trailing characters from `str`, either those specified by `chars` or those for
 which the function `pred` returns `true`.
@@ -402,10 +446,12 @@ function rstrip(f, s::AbstractString)
 end
 rstrip(s::AbstractString) = rstrip(isspace, s)
 rstrip(s::AbstractString, chars::Chars) = rstrip(in(chars), s)
+rstrip(::AbstractString, ::AbstractString) = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s"))
+
 
 """
-    strip([pred=isspace,] str::AbstractString) -> SubString
-    strip(str::AbstractString, chars) -> SubString
+    strip([pred=isspace,] str::AbstractString)::SubString
+    strip(str::AbstractString, chars)::SubString
 
 Remove leading and trailing characters from `str`, either those specified by `chars` or
 those for which the function `pred` returns `true`.
@@ -429,12 +475,13 @@ julia> strip("{3, 5}\\n", ['{', '}', '\\n'])
 """
 strip(s::AbstractString) = lstrip(rstrip(s))
 strip(s::AbstractString, chars::Chars) = lstrip(rstrip(s, chars), chars)
+strip(::AbstractString, ::AbstractString) = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s"))
 strip(f, s::AbstractString) = lstrip(f, rstrip(f, s))
 
 ## string padding functions ##
 
 """
-    lpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') -> String
+    lpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ')::String
 
 Stringify `s` and pad the resulting string on the left with `p` to make it `n`
 characters (in [`textwidth`](@ref)) long. If `s` is already `n` characters long, an equal
@@ -454,17 +501,24 @@ function lpad(
     s::Union{AbstractChar,AbstractString},
     n::Integer,
     p::Union{AbstractChar,AbstractString}=' ',
-) :: String
+)
+    stringfn = if _isannotated(s) || _isannotated(p)
+        annotatedstring else string end
     n = Int(n)::Int
     m = signed(n) - Int(textwidth(s))::Int
-    m ≤ 0 && return string(s)
-    l = textwidth(p)
+    m ≤ 0 && return stringfn(s)
+    l = Int(textwidth(p))::Int
+    if l == 0
+        throw(ArgumentError("$(repr(p)) has zero textwidth" * (ncodeunits(p) != 1 ? "" :
+            "; maybe you want pad^max(0, npad - ncodeunits(str)) * str to pad by codeunits" *
+            (s isa AbstractString && codeunit(s) != UInt8 ? "?" : " (bytes)?"))))
+    end
     q, r = divrem(m, l)
-    r == 0 ? string(p^q, s) : string(p^q, first(p, r), s)
+    r == 0 ? stringfn(p^q, s) : stringfn(p^q, first(p, r), s)
 end
 
 """
-    rpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') -> String
+    rpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ')::String
 
 Stringify `s` and pad the resulting string on the right with `p` to make it `n`
 characters (in [`textwidth`](@ref)) long. If `s` is already `n` characters long, an equal
@@ -484,13 +538,171 @@ function rpad(
     s::Union{AbstractChar,AbstractString},
     n::Integer,
     p::Union{AbstractChar,AbstractString}=' ',
-) :: String
+)
+    stringfn = if _isannotated(s) || _isannotated(p)
+        annotatedstring else string end
     n = Int(n)::Int
     m = signed(n) - Int(textwidth(s))::Int
-    m ≤ 0 && return string(s)
-    l = textwidth(p)
+    m ≤ 0 && return stringfn(s)
+    l = Int(textwidth(p))::Int
+    if l == 0
+        throw(ArgumentError("$(repr(p)) has zero textwidth" * (ncodeunits(p) != 1 ? "" :
+            "; maybe you want str * pad^max(0, npad - ncodeunits(str)) to pad by codeunits" *
+            (s isa AbstractString && codeunit(s) != UInt8 ? "?" : " (bytes)?"))))
+    end
     q, r = divrem(m, l)
-    r == 0 ? string(s, p^q) : string(s, p^q, first(p, r))
+    r == 0 ? stringfn(s, p^q) : stringfn(s, p^q, first(p, r))
+end
+
+"""
+    rtruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the last characters
+with `replacement` if necessary. The default replacement string is "…".
+
+# Examples
+```jldoctest
+julia> s = rtruncate("🍕🍕 I love 🍕", 10)
+"🍕🍕 I lo…"
+
+julia> textwidth(s)
+10
+
+julia> rtruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`ltruncate`](@ref) and [`ctruncate`](@ref).
+"""
+function rtruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:right))
+    if isnothing(ret)
+        return string(str)
+    else
+        left, _ = ret::Tuple{Int,Int}
+        @views return str[begin:left] * replacement
+    end
+end
+
+"""
+    ltruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the first characters
+with `replacement` if necessary. The default replacement string is "…".
+
+# Examples
+```jldoctest
+julia> s = ltruncate("🍕🍕 I love 🍕", 10)
+"…I love 🍕"
+
+julia> textwidth(s)
+10
+
+julia> ltruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`rtruncate`](@ref) and [`ctruncate`](@ref).
+"""
+function ltruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:left))
+    if isnothing(ret)
+        return string(str)
+    else
+        _, right = ret::Tuple{Int,Int}
+        @views return replacement * str[right:end]
+    end
+end
+
+"""
+    ctruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…'; prefer_left::Bool = true)
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the middle characters
+with `replacement` if necessary. The default replacement string is "…". By default, the truncation
+prefers keeping chars on the left, but this can be changed by setting `prefer_left` to `false`.
+
+# Examples
+```jldoctest
+julia> s = ctruncate("🍕🍕 I love 🍕", 10)
+"🍕🍕 …e 🍕"
+
+julia> textwidth(s)
+10
+
+julia> ctruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`ltruncate`](@ref) and [`rtruncate`](@ref).
+"""
+function ctruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…'; prefer_left::Bool = true)
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:center), prefer_left)
+    if isnothing(ret)
+        return string(str)
+    else
+        left, right = ret::Tuple{Int,Int}
+        @views return str[begin:left] * replacement * str[right:end]
+    end
+end
+
+# return whether textwidth(str) <= maxwidth
+function check_textwidth(str::AbstractString, maxwidth::Integer)
+    # check efficiently for early return if str is wider than maxwidth
+    total_width = 0
+    for c in str
+        total_width += textwidth(c)
+        total_width > maxwidth && return false
+    end
+    return true
+end
+
+function string_truncate_boundaries(
+            str::AbstractString,
+            maxwidth::Integer,
+            replacement::Union{AbstractString,AbstractChar},
+            ::Val{mode},
+            prefer_left::Bool = true) where {mode}
+    maxwidth >= 0 || throw(ArgumentError("maxwidth $maxwidth should be non-negative"))
+    check_textwidth(str, maxwidth) && return nothing
+
+    l0, _ = left, right = firstindex(str), lastindex(str)
+    width = textwidth(replacement)
+    # used to balance the truncated width on either side
+    rm_width_left, rm_width_right, force_other = 0, 0, false
+    @inbounds while true
+        if mode === :left || (mode === :center && (!prefer_left || left > l0))
+            rm_width = textwidth(str[right])
+            if mode === :left || (rm_width_right <= rm_width_left || force_other)
+                force_other = false
+                (width += rm_width) <= maxwidth || break
+                rm_width_right += rm_width
+                right = prevind(str, right)
+            else
+                force_other = true
+            end
+        end
+        if mode ∈ (:right, :center)
+            rm_width = textwidth(str[left])
+            if mode === :left || (rm_width_left <= rm_width_right || force_other)
+                force_other = false
+                (width += textwidth(str[left])) <= maxwidth || break
+                rm_width_left += rm_width
+                left = nextind(str, left)
+            else
+                force_other = true
+            end
+        end
+    end
+    return prevind(str, left), nextind(str, right)
 end
 
 """
@@ -567,6 +779,8 @@ end
 
 # Specialization for partition(s,n) to return a SubString
 eltype(::Type{PartitionIterator{T}}) where {T<:AbstractString} = SubString{T}
+# SubStrings do not nest
+eltype(::Type{PartitionIterator{T}}) where {T<:SubString} = T
 
 function iterate(itr::PartitionIterator{<:AbstractString}, state = firstindex(itr.c))
     state > ncodeunits(itr.c) && return nothing
@@ -588,6 +802,101 @@ eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) wher
 eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
     eachsplit(str, isspace; limit, keepempty)
 
+"""
+    eachrsplit(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
+    eachrsplit(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
+
+Return an iterator over `SubString`s of `str`, produced when splitting on
+the delimiter(s) `dlm`, and yielded in reverse order (from right to left).
+`dlm` can be any of the formats allowed by [`findprev`](@ref)'s first argument
+(i.e. a string, a single character or a function), or a collection of characters.
+
+If `dlm` is omitted, it defaults to [`isspace`](@ref), and `keepempty` default to `false`.
+
+The optional keyword arguments are:
+ - If `limit > 0`, the iterator will split at most `limit - 1` times before returning
+   the rest of the string unsplit. `limit < 1` implies no cap to splits (default).
+ - `keepempty`: whether empty fields should be returned when iterating
+   Default is `false` without a `dlm` argument, `true` with a `dlm` argument.
+
+Note that unlike [`split`](@ref), [`rsplit`](@ref) and [`eachsplit`](@ref), this
+function iterates the substrings right to left as they occur in the input.
+
+See also [`eachsplit`](@ref), [`rsplit`](@ref).
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+# Examples
+```jldoctest
+julia> a = "Ma.r.ch";
+
+julia> collect(eachrsplit(a, ".")) == ["ch", "r", "Ma"]
+true
+
+julia> collect(eachrsplit(a, "."; limit=2)) == ["ch", "Ma.r"]
+true
+```
+"""
+function eachrsplit end
+
+struct RSplitIterator{S <: AbstractString, F}
+    str::S
+    splitter::F
+    limit::Int
+    keepempty::Bool
+end
+
+eltype(::Type{<:RSplitIterator{T}}) where T = SubString{T}
+eltype(::Type{<:RSplitIterator{<:SubString{T}}}) where T = SubString{T}
+
+IteratorSize(::Type{<:RSplitIterator}) = SizeUnknown()
+
+eachrsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
+    RSplitIterator(str, splitter, limit, keepempty)
+
+eachrsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
+          limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachrsplit(str, in(splitter); limit, keepempty)
+
+eachrsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachrsplit(str, isequal(splitter); limit, keepempty)
+
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
+eachrsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
+    eachrsplit(str, isspace; limit, keepempty)
+
+function Base.iterate(it::RSplitIterator, (to, remaining_splits)=(lastindex(it.str), it.limit-1))
+    to < 0 && return nothing
+    from = 1
+    next_to = -1
+    while !iszero(remaining_splits)
+        pos = findprev(it.splitter, it.str, to)
+        # If no matches: It returns the rest of the string, then the iterator stops.
+        if pos === nothing
+            from = 1
+            next_to = -1
+            break
+        else
+            from = nextind(it.str, last(pos))
+            # pos can be empty if we search for a zero-width delimiter, in which
+            # case pos is to:to-1.
+            # In this case, next_to must be to - 1, except if to is 0 or 1, in
+            # which case, we must stop iteration for some reason.
+            next_to = (isempty(pos) & (to < 2)) ? -1 : prevind(it.str, first(pos))
+
+            # If the element we emit is empty, discard it based on keepempty
+            if from > to && !(it.keepempty)
+                to = next_to
+                continue
+            end
+            break
+        end
+    end
+    from > to && !(it.keepempty) && return nothing
+    return (SubString(it.str, from, to), (next_to, remaining_splits-1))
+end
+
 """
     split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
     split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -656,43 +965,32 @@ julia> rsplit(a, "."; limit=2)
  "h"
 ```
 """
-function rsplit end
-
 function rsplit(str::T, splitter;
-                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _rsplit(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function rsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
-                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _rsplit(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function rsplit(str::T, splitter::AbstractChar;
-                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _rsplit(str, isequal(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
+               limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
+    reverse!(collect(eachrsplit(str, splitter; limit, keepempty)))
 end
 
-function _rsplit(str::AbstractString, splitter, limit::Integer, keepempty::Bool, strs::Array)
-    n = lastindex(str)::Int
-    r = something(findlast(splitter, str)::Union{Nothing,Int,UnitRange{Int}}, 0)
-    j, k = first(r), last(r)
-    while j > 0 && k > 0 && length(strs) != limit-1
-        (keepempty || k < n) && pushfirst!(strs, @inbounds SubString(str,nextind(str,k)::Int,n))
-        n = prevind(str, j)::Int
-        r = something(findprev(splitter,str,n)::Union{Nothing,Int,UnitRange{Int}}, 0)
-        j, k = first(r), last(r)
-    end
-    (keepempty || n > 0) && pushfirst!(strs, SubString(str,1,n))
-    return strs
-end
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
 rsplit(str::AbstractString;
       limit::Integer=0, keepempty::Bool=false) =
-    rsplit(str, isspace; limit=limit, keepempty=keepempty)
-
-_replace(io, repl, str, r, pattern) = print(io, repl)
+    rsplit(str, isspace; limit, keepempty)
+
+_replace(io, repl::Union{<:AbstractString, <:AbstractChar}, str, r, pattern) =
+    write(io, repl)
+function _replace(io, repl, str, r, pattern)
+    if applicable(position, io)
+        p1 = position(io)
+        print(io, repl)
+        p2 = position(io)
+        p2 - p1
+    else
+        write(io, repr(repl))
+    end
+end
 _replace(io, repl::Function, str, r, pattern) =
-    print(io, repl(SubString(str, first(r), last(r))))
+    _replace(io, repl(SubString(str, first(r), last(r))), str, r, pattern)
 _replace(io, repl::Function, str, r, pattern::Function) =
-    print(io, repl(str[first(r)]))
+    _replace(io, repl(str[first(r)]), str, r, pattern)
 
 _pat_replacer(x) = x
 _free_pat_replacer(x) = nothing
@@ -722,38 +1020,11 @@ end
 function _replace_finish(io::IO, str, count::Int,
                          e1::Int, patterns::Tuple, replaces::Tuple, rs::Tuple)
     n = 1
-    i = a = firstindex(str)
-    while true
-        p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ?
-        r = rs[p]
-        j, k = first(r), last(r)
-        j > e1 && break
-        if i == a || i <= k
-            # copy out preserved portion
-            GC.@preserve str unsafe_write(io, pointer(str, i), UInt(j-i))
-            # copy out replacement string
-            _replace(io, replaces[p], str, r, patterns[p])
-        end
-        if k < j
-            i = j
-            j == e1 && break
-            k = nextind(str, j)
-        else
-            i = k = nextind(str, k)
-        end
-        n == count && break
-        let k = k
-            rs = map(patterns, rs) do p, r
-                if first(r) < k
-                    r = findnext(p, str, k)
-                    if r === nothing || first(r) == 0
-                        return e1+1:0
-                    end
-                    r isa Int && (r = r:r) # findnext / performance fix
-                end
-                return r
-            end
-        end
+    i = start = firstindex(str)
+    while n <= count
+        rs, _, r, _, i = @inline _replace_once(
+            io, str, start, e1, patterns, replaces, rs, count, n, i)
+        first(r) >= e1 && break
         n += 1
     end
     foreach(_free_pat_replacer, patterns)
@@ -761,6 +1032,44 @@ function _replace_finish(io::IO, str, count::Int,
     return io
 end
 
+function _replace_once(io::IO, str, start::Int, e1::Int,
+                       patterns::Tuple, replaces::Tuple, rs::Tuple,
+                       count::Int, n::Int, i::Int)
+    x = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ?
+    r = rs[x]
+    j, k = first(r), last(r)
+    j > e1 && return rs, x, r, 0, i
+    nb = if i == start || i <= k
+        # copy out preserved portion
+        GC.@preserve str unsafe_write(io, pointer(str, i), UInt(j-i))
+        # copy out replacement string
+        _replace(io, replaces[x], str, r, patterns[x])
+    else
+        0
+    end
+    if k < j
+        i = j
+        j == e1 && return rs, x, r, nb, i
+        k = nextind(str, j)
+    else
+        i = k = nextind(str, k)
+    end
+    n == count && return rs, x, r, nb, i
+    let k = k
+        rs = map(patterns, rs) do p, r
+            if first(r) < k
+                r = findnext(p, str, k)
+                if r === nothing || first(r) == 0
+                    return e1+1:0
+                end
+                r isa Int && (r = r:r) # findnext / performance fix
+            end
+            return r
+        end
+    end
+    return rs, x, r, nb, i
+end
+
 # note: leave str untyped here to make it easier for packages like StringViews to hook in
 function _replace_(io::IO, str, pat_repl::NTuple{N, Pair}, count::Int) where N
     if count == 0
@@ -785,7 +1094,7 @@ function _replace_(str, pat_repl::NTuple{N, Pair}, count::Int) where N
         return String(str)
     end
     out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
-    return String(take!(_replace_finish(out, str, count, e1, patterns, replaces, rs)))
+    return takestring!(_replace_finish(out, str, count, e1, patterns, replaces, rs))
 end
 
 """
@@ -807,8 +1116,11 @@ is supplied, the transformed string is instead written to `io` (returning `io`).
 (For example, this can be used in conjunction with an [`IOBuffer`](@ref) to re-use
 a pre-allocated buffer array in-place.)
 
-Multiple patterns can be specified, and they will be applied left-to-right
-simultaneously, so only one pattern will be applied to any character, and the
+Multiple patterns can be specified: The input string will be scanned only once
+from start (left) to end (right), and the first matching replacement
+will be applied to each substring. Replacements are applied in the order of
+the arguments provided if they match substrings starting at the same
+input string position. Thus, only one pattern will be applied to any character, and the
 patterns will only be applied to the input text, not the replacements.
 
 !!! compat "Julia 1.7"
@@ -926,7 +1238,7 @@ function hex2bytes!(dest::AbstractArray{UInt8}, itr)
     return dest
 end
 
-@inline number_from_hex(c::AbstractChar) = number_from_hex(Char(c))
+@inline number_from_hex(c::AbstractChar) = number_from_hex(Char(c)::Char)
 @inline number_from_hex(c::Char) = number_from_hex(UInt8(c))
 @inline function number_from_hex(c::UInt8)
     UInt8('0') <= c <= UInt8('9') && return c - UInt8('0')
@@ -936,8 +1248,8 @@ end
 end
 
 """
-    bytes2hex(itr) -> String
-    bytes2hex(io::IO, itr)
+    bytes2hex(itr)::String
+    bytes2hex(io::IO, itr)::Nothing
 
 Convert an iterator `itr` of bytes to its hexadecimal string representation, either
 returning a `String` via `bytes2hex(itr)` or writing the string to an `io` stream
@@ -966,12 +1278,12 @@ function bytes2hex end
 
 function bytes2hex(itr)
     eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
-    b = Base.StringVector(2*length(itr))
+    b = Base.StringMemory(2*length(itr))
     @inbounds for (i, x) in enumerate(itr)
         b[2i - 1] = hex_chars[1 + x >> 4]
         b[2i    ] = hex_chars[1 + x & 0xf]
     end
-    return String(b)
+    return unsafe_takestring(b)
 end
 
 function bytes2hex(io::IO, itr)
@@ -1009,7 +1321,7 @@ julia> ascii("abcdefgh")
 "abcdefgh"
 ```
 """
-ascii(x::AbstractString) = ascii(String(x))
+ascii(x::AbstractString) = ascii(String(x)::String)
 
 Base.rest(s::Union{String,SubString{String}}, i=1) = SubString(s, i)
 function Base.rest(s::AbstractString, st...)
@@ -1017,5 +1329,5 @@ function Base.rest(s::AbstractString, st...)
     for c in Iterators.rest(s, st...)
         print(io, c)
     end
-    return String(take!(io))
+    return takestring!(io)
 end
diff --git a/base/subarray.jl b/base/subarray.jl
index 901410e908d1e..65e0026bf90e2 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -52,8 +52,10 @@ viewindexing(I::Tuple{Slice, Slice, Vararg{Any}}) = (@inline; viewindexing(tail(
 # A UnitRange can follow Slices, but only if all other indices are scalar
 viewindexing(I::Tuple{Slice, AbstractUnitRange, Vararg{ScalarIndex}}) = IndexLinear()
 viewindexing(I::Tuple{Slice, Slice, Vararg{ScalarIndex}}) = IndexLinear() # disambiguate
-# In general, ranges are only fast if all other indices are scalar
-viewindexing(I::Tuple{AbstractRange, Vararg{ScalarIndex}}) = IndexLinear()
+# In general, scalar ranges are only fast if all other indices are scalar
+# Other ranges, such as those of `CartesianIndex`es, are not fast even if these
+# are followed by `ScalarIndex`es
+viewindexing(I::Tuple{AbstractRange{<:ScalarIndex}, Vararg{ScalarIndex}}) = IndexLinear()
 # All other index combinations are slow
 viewindexing(I::Tuple{Vararg{Any}}) = IndexCartesian()
 # Of course, all other array types are slow
@@ -63,6 +65,7 @@ viewindexing(I::Tuple{AbstractArray, Vararg{Any}}) = IndexCartesian()
 size(V::SubArray) = (@inline; map(length, axes(V)))
 
 similar(V::SubArray, T::Type, dims::Dims) = similar(V.parent, T, dims)
+similar(::Type{TA}, dims::Dims) where {T,N,P,TA<:SubArray{T,N,P}} = similar(P, dims)
 
 sizeof(V::SubArray) = length(V) * sizeof(eltype(V))
 sizeof(V::SubArray{<:Any,<:Any,<:Array}) = length(V) * elsize(V.parent)
@@ -108,16 +111,44 @@ unaliascopy(A::SubArray) = typeof(A)(unaliascopy(A.parent), map(unaliascopy, A.i
 
 # When the parent is an Array we can trim the size down a bit. In the future this
 # could possibly be extended to any mutable array.
-function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{Real,AbstractRange,Array}}},LD}
-    dest = Array{T}(undef, index_lengths(V.indices...))
-    copyto!(dest, V)
-    SubArray{T,N,A,I,LD}(dest, map(_trimmedindex, V.indices), 0, Int(LD))
-end
+function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{ScalarIndex,AbstractRange{<:ScalarIndex},Array{<:Union{ScalarIndex,AbstractCartesianIndex}}}}},LD}
+    dest = Array{T}(undef, _trimmedshape(V.indices...))
+    trimmedpind = _trimmedpind(V.indices...)
+    vdest = trimmedpind isa Tuple{Vararg{Union{Slice,Colon}}} ? dest : view(dest, trimmedpind...)
+    copyto!(vdest, view(V, _trimmedvind(V.indices...)...))
+    indices = map(_trimmedindex, V.indices)
+    stride1 = LD ? compute_stride1(dest, indices) : 0
+    offset1 = LD ? compute_offset1(dest, stride1, indices) : 0
+    SubArray{T,N,A,I,LD}(dest, indices, offset1, stride1)
+end
+# Get the proper trimmed shape
+_trimmedshape(::ScalarIndex, rest...) = (1, _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractRange, rest...) = (isempty(i) ? zero(eltype(i)) : maximum(i), _trimmedshape(rest...)...)
+_trimmedshape(i::Union{UnitRange,StepRange,OneTo}, rest...) = (length(i), _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractArray{<:ScalarIndex}, rest...) = (length(i), _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = _trimmedshape(rest...)
+_trimmedshape(i::AbstractArray{<:AbstractCartesianIndex{N}}, rest...) where {N} = (length(i), ntuple(Returns(1), Val(N - 1))..., _trimmedshape(rest...)...)
+_trimmedshape() = ()
+# We can avoid the repetition from `AbstractArray{CartesianIndex{0}}`
+_trimmedpind(i, rest...) = (map(Returns(:), axes(i))..., _trimmedpind(rest...)...)
+_trimmedpind(i::AbstractRange, rest...) = (i, _trimmedpind(rest...)...)
+_trimmedpind(i::Union{UnitRange,StepRange,OneTo}, rest...) = ((:), _trimmedpind(rest...)...)
+_trimmedpind(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = _trimmedpind(rest...)
+_trimmedpind() = ()
+_trimmedvind(i, rest...) = (map(Returns(:), axes(i))..., _trimmedvind(rest...)...)
+_trimmedvind(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = (map(first, axes(i))..., _trimmedvind(rest...)...)
+_trimmedvind() = ()
 # Transform indices to be "dense"
-_trimmedindex(i::Real) = oftype(i, 1)
-_trimmedindex(i::AbstractUnitRange) = oftype(i, oneto(length(i)))
-_trimmedindex(i::AbstractArray) = oftype(i, reshape(eachindex(IndexLinear(), i), axes(i)))
-
+_trimmedindex(i::ScalarIndex) = oftype(i, 1)
+_trimmedindex(i::AbstractRange) = i
+_trimmedindex(i::Union{UnitRange,StepRange,OneTo}) = oftype(i, oneto(length(i)))
+_trimmedindex(i::AbstractArray{<:ScalarIndex}) = oftype(i, reshape(eachindex(IndexLinear(), i), axes(i)))
+_trimmedindex(i::AbstractArray{<:AbstractCartesianIndex{0}}) = oftype(i, copy(i))
+function _trimmedindex(i::AbstractArray{<:AbstractCartesianIndex{N}}) where {N}
+    padding = ntuple(Returns(1), Val(N - 1))
+    ax1 = eachindex(IndexLinear(), i)
+    return oftype(i, reshape(CartesianIndices((ax1, padding...)), axes(i)))
+end
 ## SubArray creation
 # We always assume that the dimensionality of the parent matches the number of
 # indices that end up getting passed to it, so we store the parent as a
@@ -127,6 +158,11 @@ _maybe_reshape_parent(A::AbstractArray, ::NTuple{1, Bool}) = reshape(A, Val(1))
 _maybe_reshape_parent(A::AbstractArray{<:Any,1}, ::NTuple{1, Bool}) = reshape(A, Val(1))
 _maybe_reshape_parent(A::AbstractArray{<:Any,N}, ::NTuple{N, Bool}) where {N} = A
 _maybe_reshape_parent(A::AbstractArray, ::NTuple{N, Bool}) where {N} = reshape(A, Val(N))
+# The trailing singleton indices could be eliminated after bounds checking.
+rm_singleton_indices(ndims::Tuple, J1, Js...) = (J1, rm_singleton_indices(IteratorsMD._splitrest(ndims, index_ndims(J1)), Js...)...)
+rm_singleton_indices(::Tuple{}, ::ScalarIndex, Js...) = rm_singleton_indices((), Js...)
+rm_singleton_indices(::Tuple) = ()
+
 """
     view(A, inds...)
 
@@ -137,7 +173,8 @@ Calling [`getindex`](@ref) or [`setindex!`](@ref) on the returned value
 (often a [`SubArray`](@ref)) computes the indices to access or modify the
 parent array on the fly.  The behavior is undefined if the shape of the parent array is
 changed after `view` is called because there is no bound check for the parent array; e.g.,
-it may cause a segmentation fault.
+it may cause a segmentation fault. It is likewise undefined behavior to modify the `inds`
+array(s) after construction of the view.
 
 Some immutable parent arrays (like ranges) may choose to simply
 recompute a new array in some circumstances instead of returning
@@ -173,22 +210,15 @@ julia> view(2:5, 2:3) # returns a range as type is immutable
 3:4
 ```
 """
-function view(A::AbstractArray{<:Any,N}, I::Vararg{Any,M}) where {N,M}
+function view(A::AbstractArray, I::Vararg{Any,M}) where {M}
     @inline
     J = map(i->unalias(A,i), to_indices(A, I))
     @boundscheck checkbounds(A, J...)
-    if length(J) > ndims(A) && J[N+1:end] isa Tuple{Vararg{Int}}
-        # view([1,2,3], :, 1) does not need to reshape
-        return unsafe_view(A, J[1:N]...)
-    end
-    unsafe_view(_maybe_reshape_parent(A, index_ndims(J...)), J...)
+    J′ = rm_singleton_indices(ntuple(Returns(true), Val(ndims(A))), J...)
+    unsafe_view(_maybe_reshape_parent(A, index_ndims(J′...)), J′...)
 end
 
 # Ranges implement getindex to return recomputed ranges; use that for views, too (when possible)
-function view(r1::OneTo, r2::OneTo)
-    @_propagate_inbounds_meta
-    getindex(r1, r2)
-end
 function view(r1::AbstractUnitRange, r2::AbstractUnitRange{<:Integer})
     @_propagate_inbounds_meta
     getindex(r1, r2)
@@ -270,15 +300,8 @@ reindex(idxs::Tuple{AbstractMatrix, Vararg{Any}}, subidxs::Tuple{Any, Any, Varar
     (@_propagate_inbounds_meta; (idxs[1][subidxs[1], subidxs[2]], reindex(tail(idxs), tail(tail(subidxs)))...))
 
 # In general, we index N-dimensional parent arrays with N indices
-@generated function reindex(idxs::Tuple{AbstractArray{T,N}, Vararg{Any}}, subidxs::Tuple{Vararg{Any}}) where {T,N}
-    if length(subidxs.parameters) >= N
-        subs = [:(subidxs[$d]) for d in 1:N]
-        tail = [:(subidxs[$d]) for d in N+1:length(subidxs.parameters)]
-        :(@_propagate_inbounds_meta; (idxs[1][$(subs...)], reindex(tail(idxs), ($(tail...),))...))
-    else
-        :(throw(ArgumentError("cannot re-index SubArray with fewer indices than dimensions\nThis should not occur; please submit a bug report.")))
-    end
-end
+reindex(idxs::Tuple{AbstractArray{<:Any,N}, Vararg{Any}}, subidxs::Tuple{Vararg{Any}}) where {N} =
+    (@_propagate_inbounds_meta; (idxs[1][subidxs[1:N]...], reindex(tail(idxs), subidxs[N+1:end])...))
 
 # In general, we simply re-index the parent indices by the provided ones
 SlowSubArray{T,N,P,I} = SubArray{T,N,P,I,false}
@@ -291,37 +314,66 @@ end
 
 # But SubArrays with fast linear indexing pre-compute a stride and offset
 FastSubArray{T,N,P,I} = SubArray{T,N,P,I,true}
+# We define a convenience functions to compute the shifted parent index
+# This differs from reindex as this accepts the view directly, instead of its indices
+@inline _reindexlinear(V::FastSubArray, i::Int) = V.offset1 + V.stride1*i
+@inline _reindexlinear(V::FastSubArray, i::AbstractUnitRange{Int}) = V.offset1 .+ V.stride1 .* i
+
 function getindex(V::FastSubArray, i::Int)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds r = V.parent[V.offset1 + V.stride1*i]
-    r
-end
-# We can avoid a multiplication if the first parent index is a Colon or AbstractUnitRange,
-# or if all the indices are scalars, i.e. the view is for a single value only
-FastContiguousSubArray{T,N,P,I<:Union{Tuple{Union{Slice, AbstractUnitRange}, Vararg{Any}},
-                                      Tuple{Vararg{ScalarIndex}}}} = SubArray{T,N,P,I,true}
-function getindex(V::FastContiguousSubArray, i::Int)
-    @inline
-    @boundscheck checkbounds(V, i)
-    @inbounds r = V.parent[V.offset1 + i]
+    @inbounds r = V.parent[_reindexlinear(V, i)]
     r
 end
+
 # For vector views with linear indexing, we disambiguate to favor the stride/offset
 # computation as that'll generally be faster than (or just as fast as) re-indexing into a range.
 function getindex(V::FastSubArray{<:Any, 1}, i::Int)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds r = V.parent[V.offset1 + V.stride1*i]
+    @inbounds r = V.parent[_reindexlinear(V, i)]
     r
 end
-function getindex(V::FastContiguousSubArray{<:Any, 1}, i::Int)
+
+# We can avoid a multiplication if the first parent index is a Colon or AbstractUnitRange,
+# or if all the indices are scalars, i.e. the view is for a single value only
+FastContiguousSubArray{T,N,P,I<:Union{Tuple{AbstractUnitRange, Vararg{Any}},
+                                      Tuple{Vararg{ScalarIndex}}}} = SubArray{T,N,P,I,true}
+
+@inline _reindexlinear(V::FastContiguousSubArray, i::Int) = V.offset1 + i
+@inline _reindexlinear(V::FastContiguousSubArray, i::AbstractUnitRange{Int}) = V.offset1 .+ i
+
+"""
+An internal type representing arrays stored contiguously in memory.
+"""
+const DenseArrayType{T,N} = Union{
+    DenseArray{T,N},
+    <:FastContiguousSubArray{T,N,<:DenseArray},
+}
+
+"""
+An internal type representing mutable arrays stored contiguously in memory.
+"""
+const MutableDenseArrayType{T,N} = Union{
+    Array{T, N},
+    Memory{T},
+    FastContiguousSubArray{T,N,<:Array},
+    FastContiguousSubArray{T,N,<:Memory}
+}
+
+# parents of FastContiguousSubArrays may support fast indexing with AbstractUnitRanges,
+# so we may just forward the indexing to the parent
+# This may only be done for non-offset ranges, as the result would otherwise have offset axes
+const _OneBasedRanges = Union{OneTo{Int}, UnitRange{Int}, Slice{OneTo{Int}}, IdentityUnitRange{OneTo{Int}}}
+function getindex(V::FastContiguousSubArray, i::_OneBasedRanges)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds r = V.parent[V.offset1 + i]
+    @inbounds r = V.parent[_reindexlinear(V, i)]
     r
 end
 
+@inline getindex(V::FastContiguousSubArray, i::Colon) = getindex(V, to_indices(V, (:,))...)
+
 # Indexed assignment follows the same pattern as `getindex` above
 function setindex!(V::SubArray{T,N}, x, I::Vararg{Int,N}) where {T,N}
     @inline
@@ -332,28 +384,25 @@ end
 function setindex!(V::FastSubArray, x, i::Int)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds V.parent[V.offset1 + V.stride1*i] = x
-    V
-end
-function setindex!(V::FastContiguousSubArray, x, i::Int)
-    @inline
-    @boundscheck checkbounds(V, i)
-    @inbounds V.parent[V.offset1 + i] = x
+    @inbounds V.parent[_reindexlinear(V, i)] = x
     V
 end
 function setindex!(V::FastSubArray{<:Any, 1}, x, i::Int)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds V.parent[V.offset1 + V.stride1*i] = x
+    @inbounds V.parent[_reindexlinear(V, i)] = x
     V
 end
-function setindex!(V::FastContiguousSubArray{<:Any, 1}, x, i::Int)
+
+function setindex!(V::FastSubArray, x, i::AbstractUnitRange{Int})
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds V.parent[V.offset1 + i] = x
+    @inbounds V.parent[_reindexlinear(V, i)] = x
     V
 end
 
+@inline setindex!(V::FastSubArray, x, i::Colon) = setindex!(V, x, to_indices(V, (i,))...)
+
 function isassigned(V::SubArray{T,N}, I::Vararg{Int,N}) where {T,N}
     @inline
     @boundscheck checkbounds(Bool, V, I...) || return false
@@ -363,30 +412,17 @@ end
 function isassigned(V::FastSubArray, i::Int)
     @inline
     @boundscheck checkbounds(Bool, V, i) || return false
-    @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i)
-    r
-end
-function isassigned(V::FastContiguousSubArray, i::Int)
-    @inline
-    @boundscheck checkbounds(Bool, V, i) || return false
-    @inbounds r = isassigned(V.parent, V.offset1 + i)
+    @inbounds r = isassigned(V.parent, _reindexlinear(V, i))
     r
 end
 function isassigned(V::FastSubArray{<:Any, 1}, i::Int)
     @inline
     @boundscheck checkbounds(Bool, V, i) || return false
-    @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i)
-    r
-end
-function isassigned(V::FastContiguousSubArray{<:Any, 1}, i::Int)
-    @inline
-    @boundscheck checkbounds(Bool, V, i) || return false
-    @inbounds r = isassigned(V.parent, V.offset1 + i)
+    @inbounds r = isassigned(V.parent, _reindexlinear(V, i))
     r
 end
 
 IndexStyle(::Type{<:FastSubArray}) = IndexLinear()
-IndexStyle(::Type{<:SubArray}) = IndexCartesian()
 
 # Strides are the distance in memory between adjacent elements in a given dimension
 # which we determine from the strides of the parent
@@ -396,7 +432,8 @@ substrides(strds::Tuple{}, ::Tuple{}) = ()
 substrides(strds::NTuple{N,Int}, I::Tuple{ScalarIndex, Vararg{Any}}) where N = (substrides(tail(strds), tail(I))...,)
 substrides(strds::NTuple{N,Int}, I::Tuple{Slice, Vararg{Any}}) where N = (first(strds), substrides(tail(strds), tail(I))...)
 substrides(strds::NTuple{N,Int}, I::Tuple{AbstractRange, Vararg{Any}}) where N = (first(strds)*step(I[1]), substrides(tail(strds), tail(I))...)
-substrides(strds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("strides is invalid for SubArrays with indices of type $(typeof(I[1]))"))
+substrides(strds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError(
+    LazyString("strides is invalid for SubArrays with indices of type ", typeof(I[1]))))
 
 stride(V::SubArray, d::Integer) = d <= ndims(V) ? strides(V)[d] : strides(V)[end] * size(V)[end]
 
@@ -408,7 +445,7 @@ compute_stride1(s, inds, I::Tuple{ScalarIndex, Vararg{Any}}) =
     (@inline; compute_stride1(s*length(inds[1]), tail(inds), tail(I)))
 compute_stride1(s, inds, I::Tuple{AbstractRange, Vararg{Any}}) = s*step(I[1])
 compute_stride1(s, inds, I::Tuple{Slice, Vararg{Any}}) = s
-compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("invalid strided index type $(typeof(I[1]))"))
+compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError(LazyString("invalid strided index type ", typeof(I[1]))))
 
 elsize(::Type{<:SubArray{<:Any,<:Any,P}}) where {P} = elsize(P)
 
@@ -416,12 +453,8 @@ iscontiguous(A::SubArray) = iscontiguous(typeof(A))
 iscontiguous(::Type{<:SubArray}) = false
 iscontiguous(::Type{<:FastContiguousSubArray}) = true
 
-first_index(V::FastSubArray) = V.offset1 + V.stride1 # cached for fast linear SubArrays
-function first_index(V::SubArray)
-    P, I = parent(V), V.indices
-    s1 = compute_stride1(P, I)
-    s1 + compute_offset1(P, s1, I)
-end
+first_index(V::FastSubArray) = V.offset1 + V.stride1 * firstindex(V) # cached for fast linear SubArrays
+first_index(V::SubArray) = compute_linindex(parent(V), V.indices)
 
 # Computing the first index simply steps through the indices, accumulating the
 # sum of index each multiplied by the parent's stride.
@@ -447,11 +480,6 @@ function compute_linindex(parent, I::NTuple{N,Any}) where N
     IP = fill_to_length(axes(parent), OneTo(1), Val(N))
     compute_linindex(first(LinearIndices(parent)), 1, IP, I)
 end
-function compute_linindex(f, s, IP::Tuple, I::Tuple{ScalarIndex, Vararg{Any}})
-    @inline
-    Δi = I[1]-first(IP[1])
-    compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
-end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{Any, Vararg{Any}})
     @inline
     Δi = first(I[1])-first(IP[1])
@@ -466,10 +494,6 @@ find_extended_inds(::ScalarIndex, I...) = (@inline; find_extended_inds(I...))
 find_extended_inds(i1, I...) = (@inline; (i1, find_extended_inds(I...)...))
 find_extended_inds() = ()
 
-function unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{RangeIndex}}}) where {T,N,P}
-    return unsafe_convert(Ptr{T}, V.parent) + _memory_offset(V.parent, map(first, V.indices)...)
-end
-
 pointer(V::FastSubArray, i::Int) = pointer(V.parent, V.offset1 + V.stride1*i)
 pointer(V::FastContiguousSubArray, i::Int) = pointer(V.parent, V.offset1 + i)
 
@@ -493,4 +517,24 @@ function _indices_sub(i1::AbstractArray, I...)
     (axes(i1)..., _indices_sub(I...)...)
 end
 
+axes1(::SubArray{<:Any,0}) = OneTo(1)
+axes1(S::SubArray) = (@inline; _axes1_sub(S.indices...))
+_axes1_sub() = ()
+_axes1_sub(::Real, I...) = (@inline; _axes1_sub(I...))
+_axes1_sub(::AbstractArray{<:Any,0}, I...) = _axes1_sub(I...)
+function _axes1_sub(i1::AbstractArray, I...)
+    @inline
+    axes1(i1)
+end
+
 has_offset_axes(S::SubArray) = has_offset_axes(S.indices...)
+
+function replace_in_print_matrix(S::SubArray{<:Any,2,<:AbstractMatrix}, i::Integer, j::Integer, s::AbstractString)
+    replace_in_print_matrix(S.parent, to_indices(S.parent, reindex(S.indices, (i,j)))..., s)
+end
+function replace_in_print_matrix(S::SubArray{<:Any,1,<:AbstractVector}, i::Integer, j::Integer, s::AbstractString)
+    replace_in_print_matrix(S.parent, to_indices(S.parent, reindex(S.indices, (i,)))..., j, s)
+end
+
+# XXX: this is considerably more unsafe than the other similarly named methods
+unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s))
diff --git a/base/summarysize.jl b/base/summarysize.jl
index 9bbae187cab12..bce9b1190e55f 100644
--- a/base/summarysize.jl
+++ b/base/summarysize.jl
@@ -6,14 +6,21 @@ struct SummarySize
     frontier_i::Vector{Int}
     exclude::Any
     chargeall::Any
+    count::Bool
 end
 
+nth_pointer_isdefined(obj, i::Int) = ccall(:jl_nth_pointer_isdefined, Cint, (Any, Csize_t), obj, i-1) != 0
+get_nth_pointer(obj, i::Int) = ccall(:jl_get_nth_pointer, Any, (Any, Csize_t), obj, i-1)
+
 """
-    Base.summarysize(obj; exclude=Union{...}, chargeall=Union{...}) -> Int
+    Base.summarysize(obj; count = false, exclude=Union{...}, chargeall=Union{...})::Int
 
-Compute the amount of memory, in bytes, used by all unique objects reachable from the argument.
+Compute all unique objects reachable from the argument and return either their size in
+memory (in bytes) or the number of allocations they span.
 
 # Keyword Arguments
+- `count`: if false, return the total size of the objects in memory. if true, return the
+  number of allocations spanned by the object.
 - `exclude`: specifies the types of objects to exclude from the traversal.
 - `chargeall`: specifies the types of objects to always charge the size of all of their
   fields, even if those fields would normally be excluded.
@@ -30,13 +37,17 @@ julia> Base.summarysize(Ref(rand(100)))
 
 julia> sizeof(Ref(rand(100)))
 8
+
+julia> Base.summarysize(Core.svec(1.0, "testing", true); count=true)
+4
 ```
 """
 function summarysize(obj;
+                     count::Bool = false,
                      exclude = Union{DataType, Core.TypeName, Core.MethodInstance},
                      chargeall = Union{Core.TypeMapEntry, Method})
     @nospecialize obj exclude chargeall
-    ss = SummarySize(IdDict(), Any[], Int[], exclude, chargeall)
+    ss = SummarySize(IdDict(), Any[], Int[], exclude, chargeall, count)
     size::Int = ss(obj)
     while !isempty(ss.frontier_x)
         # DFS heap traversal of everything without a specialization
@@ -44,21 +55,34 @@ function summarysize(obj;
         x = ss.frontier_x[end]
         i = ss.frontier_i[end]
         val = nothing
-        if isa(x, SimpleVector)
+        if isa(x, Core.SimpleVector)
             nf = length(x)
             if isassigned(x, i)
                 val = x[i]
             end
-        elseif isa(x, Array)
-            nf = length(x)
-            if ccall(:jl_array_isassigned, Cint, (Any, UInt), x, i - 1) != 0
-                val = x[i]
+        elseif isa(x, GenericMemory)
+            T = eltype(x)
+            if allocatedinline(T)
+                np = datatype_npointers(T)
+                nf = length(x) * np
+                idx = (i-1) ÷ np + 1
+                if @inbounds @inline isassigned(x, idx)
+                    elt = x[idx]
+                    p = (i-1) % np + 1
+                    if nth_pointer_isdefined(elt, p)
+                        val = get_nth_pointer(elt, p)
+                    end
+                end
+            else
+                nf = length(x)
+                if @inbounds @inline isassigned(x, i)
+                    val = x[i]
+                end
             end
         else
-            nf = nfields(x)
-            ft = typeof(x).types
-            if !isbitstype(ft[i]) && isdefined(x, i)
-                val = getfield(x, i)
+            nf = datatype_npointers(typeof(x))
+            if nth_pointer_isdefined(x, i)
+                val = get_nth_pointer(x, i)
             end
         end
         if nf > i
@@ -74,15 +98,15 @@ function summarysize(obj;
     return size
 end
 
-(ss::SummarySize)(@nospecialize obj) = _summarysize(ss, obj)
+(ss::SummarySize)(@nospecialize obj) = _summarysize(ss, obj, ss.count)
 # define the general case separately to make sure it is not specialized for every type
-@noinline function _summarysize(ss::SummarySize, @nospecialize obj)
+@noinline function _summarysize(ss::SummarySize, @nospecialize(obj), count::Bool)
     issingletontype(typeof(obj)) && return 0
     # NOTE: this attempts to discover multiple copies of the same immutable value,
     # and so is somewhat approximate.
     key = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), obj)
     haskey(ss.seen, key) ? (return 0) : (ss.seen[key] = true)
-    if nfields(obj) > 0
+    if datatype_npointers(typeof(obj)) > 0
         push!(ss.frontier_x, obj)
         push!(ss.frontier_i, 1)
     end
@@ -96,7 +120,7 @@ end
         # 0-field mutable structs are not unique
         return gc_alignment(0)
     end
-    return sz
+    return count ? 1 : sz
 end
 
 (::SummarySize)(obj::Symbol) = 0
@@ -105,14 +129,13 @@ end
 function (ss::SummarySize)(obj::String)
     key = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), obj)
     haskey(ss.seen, key) ? (return 0) : (ss.seen[key] = true)
-    return Core.sizeof(Int) + Core.sizeof(obj)
+    return (ss.count ? 1 : (Core.sizeof(Int) + Core.sizeof(obj)))
 end
 
 function (ss::SummarySize)(obj::DataType)
     key = pointer_from_objref(obj)
     haskey(ss.seen, key) ? (return 0) : (ss.seen[key] = true)
-    size::Int = 7 * Core.sizeof(Int) + 6 * Core.sizeof(Int32)
-    size += 4 * nfields(obj) + ifelse(Sys.WORD_SIZE == 64, 4, 0)
+    size::Int = ss.count ? 1 : sizeof(DataType)
     size += ss(obj.parameters)::Int
     if isdefined(obj, :types)
         size += ss(obj.types)::Int
@@ -123,24 +146,23 @@ end
 function (ss::SummarySize)(obj::Core.TypeName)
     key = pointer_from_objref(obj)
     haskey(ss.seen, key) ? (return 0) : (ss.seen[key] = true)
-    return Core.sizeof(obj) + (isdefined(obj, :mt) ? ss(obj.mt) : 0)
+    return (ss.count ? 1 : Core.sizeof(obj))
 end
 
-function (ss::SummarySize)(obj::Array)
+function (ss::SummarySize)(obj::GenericMemory)
     haskey(ss.seen, obj) ? (return 0) : (ss.seen[obj] = true)
-    headersize = 4*sizeof(Int) + 8 + max(0, ndims(obj)-2)*sizeof(Int)
-    size::Int = headersize
+    headersize = 2 * sizeof(Int)
+    size::Int = (ss.count ? 1 : headersize)
     datakey = unsafe_convert(Ptr{Cvoid}, obj)
     if !haskey(ss.seen, datakey)
         ss.seen[datakey] = true
-        dsize = Core.sizeof(obj)
-        T = eltype(obj)
-        if isbitsunion(T)
-            # add 1 union selector byte for each element
-            dsize += length(obj)
+        if !ss.count
+            size += sizeof(obj)
+        elseif pointer_from_objref(obj) + 16 != datakey
+            size += 1
         end
-        size += dsize
-        if !isempty(obj) && T !== Symbol && (!Base.allocatedinline(T) || (T isa DataType && !Base.datatype_pointerfree(T)))
+        T = eltype(obj)
+        if !isempty(obj) && T !== Symbol && (!allocatedinline(T) || (T isa DataType && !datatype_pointerfree(T)))
             push!(ss.frontier_x, obj)
             push!(ss.frontier_i, 1)
         end
@@ -148,10 +170,10 @@ function (ss::SummarySize)(obj::Array)
     return size
 end
 
-function (ss::SummarySize)(obj::SimpleVector)
+function (ss::SummarySize)(obj::Core.SimpleVector)
     key = pointer_from_objref(obj)
     haskey(ss.seen, key) ? (return 0) : (ss.seen[key] = true)
-    size::Int = Core.sizeof(obj)
+    size::Int = (ss.count ? 1 : Core.sizeof(obj))
     if !isempty(obj)
         push!(ss.frontier_x, obj)
         push!(ss.frontier_i, 1)
@@ -161,7 +183,7 @@ end
 
 function (ss::SummarySize)(obj::Module)
     haskey(ss.seen, obj) ? (return 0) : (ss.seen[obj] = true)
-    size::Int = Core.sizeof(obj)
+    size::Int = (ss.count ? 1 : Core.sizeof(obj))
     for binding in names(obj, all = true)
         if isdefined(obj, binding) && !isdeprecated(obj, binding)
             value = getfield(obj, binding)
@@ -182,7 +204,7 @@ end
 
 function (ss::SummarySize)(obj::Task)
     haskey(ss.seen, obj) ? (return 0) : (ss.seen[obj] = true)
-    size::Int = Core.sizeof(obj)
+    size::Int = (ss.count ? 1 : Core.sizeof(obj))
     if isdefined(obj, :code)
         size += ss(obj.code)::Int
     end
@@ -193,4 +215,4 @@ function (ss::SummarySize)(obj::Task)
     return size
 end
 
-(ss::SummarySize)(obj::BigInt) = _summarysize(ss, obj) + obj.alloc*sizeof(Base.GMP.Limb)
+(ss::SummarySize)(obj::BigInt) = _summarysize(ss, obj, ss.count) + (ss.count ? 1 : obj.alloc * sizeof(GMP.Limb))
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 09ea015b0f903..fd71544c205cc 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -1,15 +1,61 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-Core.include(Main, "Base.jl")
+Base.include("Base.jl") # finish populating Base (currently just has the Compiler)
 
+# Set up Main module by importing from Base
 using .Base
+using .Base.MainInclude # ans, err, and sometimes Out
 
-# Set up Main module
-using Base.MainInclude # ans, err, and sometimes Out
-import Base.MainInclude: eval, include
+ccall(:jl_init_restored_module, Cvoid, (Any,), Base)
+
+# These definitions calls Base._include rather than Base.include to get
+# one-frame stacktraces for the common case of using include(fname) in Main.
+
+"""
+    include([mapexpr::Function,] path::AbstractString)
+
+Evaluate the contents of the input source file in the global scope of the containing module.
+Every `Module` (except those defined with `baremodule`) has a private 1-argument definition
+of `include`, which evaluates the file in that module, for use inside that module.
+Returns the result of the last evaluated expression of the input file. During including,
+a task-local include path is set to the directory containing the file. Nested calls to
+`include` will search relative to that path. This function is typically used to load source
+interactively, or to combine files in packages that are broken into multiple source files.
+The argument `path` is normalized using [`normpath`](@ref) which will resolve
+relative path tokens such as `..` and convert `/` to the appropriate path separator.
+
+The optional first argument `mapexpr` can be used to transform the included code before
+it is evaluated: for each parsed expression `expr` in `path`, the `include` function
+actually evaluates `mapexpr(expr)`.  If it is omitted, `mapexpr` defaults to [`identity`](@ref).
+
+Use [`Base.include`](@ref) to evaluate a file into another module.
+
+!!! note
+    Julia's syntax lowering recognizes an explicit call to a literal `include`
+    at top-level and inserts an implicit `@Core.latestworld` to make any include'd
+    definitions visible to subsequent code. Note however that this recognition
+    is *syntactic*. I.e. assigning `const myinclude = include` may require
+    an explicit `@Core.latestworld` call after `myinclude`.
+
+!!! compat "Julia 1.5"
+    Julia 1.5 is required for passing the `mapexpr` argument.
+"""
+Base.IncludeInto
+
+"""
+    eval(expr)
+
+Evaluate an expression in the global scope of the containing module.
+Every `Module` (except those defined with `baremodule`) has a private 1-argument definition
+of `eval`, which evaluates expressions in that module, for use inside that module.
+"""
+Core.EvalInto
+
+const include = Base.IncludeInto(Main)
+const eval = Core.EvalInto(Main)
 
 # Ensure this file is also tracked
-pushfirst!(Base._included_files, (@__MODULE__, abspath(@__FILE__)))
+pushfirst!(Base._included_files, (Main, abspath(@__FILE__)))
 
 # set up depot & load paths to be able to find stdlib packages
 Base.init_depot_path()
@@ -29,55 +75,36 @@ let
 
     # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl
     # Run with the `--exclude-jlls` option to filter out all JLL packages
-    stdlibs = [
-        # No dependencies
-        :ArgTools,
-        :Artifacts,
-        :Base64,
-        :CRC32c,
-        :FileWatching,
-        :Libdl,
-        :Logging,
-        :Mmap,
-        :NetworkOptions,
-        :SHA,
-        :Serialization,
-        :Sockets,
-        :Unicode,
-
-        # 1-depth packages
-        :LinearAlgebra,
-        :Markdown,
-        :Printf,
-        :Random,
-        :Tar,
-
-        # 2-depth packages
-        :Dates,
-        :Future,
-        :InteractiveUtils,
-        :LibGit2,
-        :UUIDs,
-
-        # 3-depth packages
-        :REPL,
-        :TOML,
-
-        # 4-depth packages
-        :LibCURL,
-
-        # 5-depth packages
-        :Downloads,
-
-        # 6-depth packages
-        :Pkg,
-    ]
+    if isdefined(Base.BuildSettings, :INCLUDE_STDLIBS)
+        # e.g. INCLUDE_STDLIBS = "FileWatching,Libdl,Artifacts,SHA,Sockets,LinearAlgebra,Random"
+        stdlibs = Symbol.(split(Base.BuildSettings.INCLUDE_STDLIBS, ","))
+    else
+        # TODO: this is included for compatibility with PackageCompiler, which looks for it.
+        # This should eventually be removed so we only use `BuildSettings`.
+        stdlibs = [
+            # No dependencies
+            :FileWatching, # used by loading.jl -- implicit assumption that init runs
+            :Libdl, # Transitive through LinAlg
+            :Artifacts, # Transitive through LinAlg
+            :SHA, # transitive through Random
+            :Sockets, # used by stream.jl
+
+            # Transitive through LingAlg
+            # OpenBLAS_jll
+            # libblastrampoline_jll
+
+            # 1-depth packages
+            :LinearAlgebra, # Commits type-piracy and GEMM
+            :Random, # Can't be removed due to rand being exported by Base
+        ]
+    end
     # PackageCompiler can filter out stdlibs so it can be empty
     maxlen = maximum(textwidth.(string.(stdlibs)); init=0)
 
     tot_time_stdlib = 0.0
     # use a temp module to avoid leaving the type of this closure in Main
-    m = Module()
+    push!(empty!(LOAD_PATH), "@stdlib")
+    m = Core.Module()
     GC.@preserve m begin
         print_time = @eval m (mod, t) -> (print(rpad(string(mod) * "  ", $maxlen + 3, "─"));
                                           Base.time_print(stdout, t * 10^9); println())
@@ -89,8 +116,9 @@ let
             print_time(stdlib, tt)
         end
         for dep in Base._require_dependencies
-            dep[3] == 0.0 && continue
-            push!(Base._included_files, dep[1:2])
+            mod, path, fsize, mtime = dep[1], dep[2], dep[3], dep[5]
+            (fsize == 0 || mtime == 0.0) && continue
+            push!(Base._included_files, (mod, path))
         end
         empty!(Base._require_dependencies)
         Base._track_dependencies[] = false
@@ -105,6 +133,7 @@ let
     Base.init_load_path() # want to be able to find external packages in userimg.jl
 
     ccall(:jl_clear_implicit_imports, Cvoid, (Any,), Main)
+
     tot_time_userimg = @elapsed (isfile("userimg.jl") && Base.include(Main, "userimg.jl"))
 
     tot_time_base = (Base.end_base_include - Base.start_base_include) * 10.0^(-9)
@@ -124,6 +153,7 @@ end
 
 empty!(Base.TOML_CACHE.d)
 Base.TOML.reinit!(Base.TOML_CACHE.p, "")
+@eval Base BUILDROOT = ""
 @eval Sys begin
     BINDIR = ""
     STDLIB = ""
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index 2c962088484e7..ca67695e7e31c 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -8,14 +8,17 @@ Provide methods for retrieving information about hardware and the operating syst
 export BINDIR,
        STDLIB,
        CPU_THREADS,
+       EFFECTIVE_CPU_THREADS,
        CPU_NAME,
        WORD_SIZE,
        ARCH,
        MACHINE,
        KERNEL,
        JIT,
+       PAGESIZE,
        cpu_info,
        cpu_summary,
+       sysimage_target,
        uptime,
        loadavg,
        free_memory,
@@ -33,9 +36,13 @@ export BINDIR,
        iswindows,
        isjsvm,
        isexecutable,
-       which
+       isreadable,
+       iswritable,
+       username,
+       which,
+       detectwsl
 
-import ..Base: show
+import ..Base: DATAROOTDIR, show
 
 """
     Sys.BINDIR::String
@@ -49,7 +56,7 @@ global BINDIR::String = ccall(:jl_get_julia_bindir, Any, ())::String
 
 A string containing the full path to the directory containing the `stdlib` packages.
 """
-global STDLIB::String = "$BINDIR/../share/julia/stdlib/v$(VERSION.major).$(VERSION.minor)" # for bootstrap
+global STDLIB::String = "$BINDIR/$DATAROOTDIR/julia/stdlib/v$(VERSION.major).$(VERSION.minor)" # for bootstrap
 # In case STDLIB change after julia is built, the variable below can be used
 # to update cached method locations to updated ones.
 const BUILD_STDLIB_PATH = STDLIB
@@ -65,9 +72,28 @@ CPU cores, for example, in the presence of
 [hyper-threading](https://en.wikipedia.org/wiki/Hyper-threading).
 
 See Hwloc.jl or CpuId.jl for extended information, including number of physical cores.
+
+See also: [`Sys.EFFECTIVE_CPU_THREADS`](@ref) for a container-aware CPU count that respects
+cgroup limits.
 """
 global CPU_THREADS::Int = 1 # for bootstrap, changed on startup
 
+"""
+    Sys.EFFECTIVE_CPU_THREADS::Int
+
+The effective number of logical CPU cores available to the Julia process, taking into
+account container limits (e.g., Docker `--cpus`, Kubernetes CPU limits, cgroup quotas).
+This is the minimum of the hardware CPU thread count and any imposed CPU limits.
+
+In non-containerized environments, this typically equals `Sys.CPU_THREADS`. In containerized
+environments, it respects cgroup CPU limits and provides a more accurate measure of
+available parallelism.
+
+Use this constant when determining default thread pool sizes or parallelism levels to
+ensure proper behavior in containerized deployments.
+"""
+global EFFECTIVE_CPU_THREADS::Int = 1 # for bootstrap, changed on startup
+
 """
     Sys.ARCH::Symbol
 
@@ -97,7 +123,52 @@ Standard word size on the current machine, in bits.
 """
 const WORD_SIZE = Core.sizeof(Int) * 8
 
-global SC_CLK_TCK::Clong, CPU_NAME::String, JIT::String
+"""
+    Sys.SC_CLK_TCK::Clong
+
+The number of system "clock ticks" per second, corresponding to `sysconf(_SC_CLK_TCK)` on
+POSIX systems, or `0` if it is unknown.
+
+CPU times, e.g. as returned by `Sys.cpu_info()`, are in units of ticks, i.e. units of `1 / Sys.SC_CLK_TCK` seconds if `Sys.SC_CLK_TCK > 0`.
+"""
+global SC_CLK_TCK::Clong
+
+"""
+    Sys.CPU_NAME::String
+
+A string representing the name of CPU.
+
+# Examples
+For example, `Sys.CPU_NAME` might equal `"tigerlake"` on an
+[Intel Core "Tiger Lake" CPU](https://en.wikipedia.org/wiki/Tiger_Lake),
+or `"apple-m1"` on an [Apple M1 CPU](https://en.wikipedia.org/wiki/Apple_M1).
+
+Note: Included in the detailed system information via `versioninfo(verbose=true)`.
+"""
+global CPU_NAME::String
+
+"""
+    Sys.JIT::String
+
+A string representing the specific Just-In-Time (JIT) compiler being utilized in the current runtime.
+
+# Examples
+Currently, this equals `"ORCJIT"` for the LLVM "ORC" ("On-Request Compilation") JIT library:
+```jldoctest
+julia> Sys.JIT
+"ORCJIT"
+```
+
+Note: Included in the detailed system information via `versioninfo(verbose=true)`.
+"""
+global JIT::String
+
+"""
+    Sys.PAGESIZE::Clong
+
+A number providing the pagesize of the given OS.  Common values being 4kb or 64kb on Linux.
+"""
+global PAGESIZE::Clong
 
 function __init__()
     env_threads = nothing
@@ -106,7 +177,7 @@ function __init__()
     end
     global CPU_THREADS = if env_threads !== nothing
         env_threads = tryparse(Int, env_threads)
-        if !(env_threads isa Int && env_threads > 0)
+        if env_threads === nothing || env_threads <= 0
             env_threads = Int(ccall(:jl_cpu_threads, Int32, ()))
             Core.print(Core.stderr, "WARNING: couldn't parse `JULIA_CPU_THREADS` environment variable. Defaulting Sys.CPU_THREADS to $env_threads.\n")
         end
@@ -114,9 +185,11 @@ function __init__()
     else
         Int(ccall(:jl_cpu_threads, Int32, ()))
     end
+    global EFFECTIVE_CPU_THREADS = min(CPU_THREADS, Int(ccall(:jl_effective_threads, Int32, ())))
     global SC_CLK_TCK = ccall(:jl_SC_CLK_TCK, Clong, ())
     global CPU_NAME = ccall(:jl_get_cpu_name, Ref{String}, ())
     global JIT = ccall(:jl_get_JIT, Ref{String}, ())
+    global PAGESIZE = Int(Sys.isunix() ? ccall(:jl_getpagesize, Clong, ()) : ccall(:jl_getallocationgranularity, Clong, ()))
     __init_build()
     nothing
 end
@@ -124,8 +197,8 @@ end
 # without pulling in anything unnecessary like `CPU_NAME`
 function __init_build()
     global BINDIR = ccall(:jl_get_julia_bindir, Any, ())::String
-    vers = "v$(VERSION.major).$(VERSION.minor)"
-    global STDLIB = abspath(BINDIR, "..", "share", "julia", "stdlib", vers)
+    vers = "v$(string(VERSION.major)).$(string(VERSION.minor))"
+    global STDLIB = abspath(BINDIR, DATAROOTDIR, "julia", "stdlib", vers)
     nothing
 end
 
@@ -138,6 +211,24 @@ mutable struct UV_cpu_info_t
     cpu_times!idle::UInt64
     cpu_times!irq::UInt64
 end
+
+"""
+    Sys.CPUinfo
+
+The `CPUinfo` type is a mutable struct with the following fields:
+- `model::String`: CPU model information.
+- `speed::Int32`: CPU speed (in MHz).
+- `cpu_times!user::UInt64`: Time spent in user mode. CPU state shows CPU time used by user space processes.
+- `cpu_times!nice::UInt64`: Time spent in nice mode. CPU state is a subset of the "user" state and shows the CPU time used by processes that have a positive niceness, meaning a lower priority than other tasks.
+- `cpu_times!sys::UInt64`: Time spent in system mode. CPU state shows the amount of CPU time used by the kernel.
+- `cpu_times!idle::UInt64`: Time spent in idle mode. CPU state shows the CPU time that's not actively being used.
+- `cpu_times!irq::UInt64`: Time spent handling interrupts. CPU state shows the amount of time the CPU has been servicing hardware interrupts.
+
+The times are in units of `1/Sys.SC_CLK_TCK` seconds if `Sys.SC_CLK_TCK > 0`; otherwise they are in
+unknown units.
+
+Note: Included in the detailed system information via `versioninfo(verbose=true)`.
+"""
 mutable struct CPUinfo
     model::String
     speed::Int32
@@ -152,6 +243,8 @@ CPUinfo(info::UV_cpu_info_t) = CPUinfo(unsafe_string(info.model), info.speed,
     info.cpu_times!user, info.cpu_times!nice, info.cpu_times!sys,
     info.cpu_times!idle, info.cpu_times!irq)
 
+public CPUinfo
+
 function _show_cpuinfo(io::IO, info::Sys.CPUinfo, header::Bool=true, prefix::AbstractString="    ")
     tck = SC_CLK_TCK
     if header
@@ -173,7 +266,7 @@ function _show_cpuinfo(io::IO, info::Sys.CPUinfo, header::Bool=true, prefix::Abs
     end
 end
 
-show(io::IO, info::CPUinfo) = _show_cpuinfo(io, info, true, "    ")
+show(io::IO, ::MIME"text/plain", info::CPUinfo) = _show_cpuinfo(io, info, true, "    ")
 
 function _cpu_summary(io::IO, cpu::AbstractVector{CPUinfo}, i, j)
     if j-i < 9
@@ -200,6 +293,17 @@ function _cpu_summary(io::IO, cpu::AbstractVector{CPUinfo}, i, j)
     println(io)
 end
 
+"""
+    Sys.cpu_summary(io::IO=stdout, cpu::AbstractVector{CPUinfo}=cpu_info())
+
+Print a summary of CPU information to the `io` stream (defaulting to [`stdout`](@ref)), organizing and displaying aggregated data for CPUs with the same model, for a given array of `CPUinfo` data structures
+describing a set of CPUs (which defaults to the return value of the [`Sys.cpu_info`](@ref) function).
+
+The summary includes aggregated information for each distinct CPU model,
+providing details such as average CPU speed and total time spent in different modes (user, nice, sys, idle, irq) across all cores with the same model.
+
+Note: Included in the detailed system information via `versioninfo(verbose=true)`.
+"""
 function cpu_summary(io::IO=stdout, cpu::AbstractVector{CPUinfo} = cpu_info())
     model = cpu[1].model
     first = 1
@@ -212,6 +316,18 @@ function cpu_summary(io::IO=stdout, cpu::AbstractVector{CPUinfo} = cpu_info())
     _cpu_summary(io, cpu, first, length(cpu))
 end
 
+"""
+    Sys.cpu_info()
+
+Return a vector of `CPUinfo` objects, where each object represents information about a CPU core.
+
+This is pretty-printed in a tabular format by `Sys.cpu_summary`, which is included in the output
+of `versioninfo(verbose=true)`, so most users will not need to access the `CPUinfo`
+data structures directly.
+
+The function provides information about each CPU, including model, speed, and usage statistics such as user time, nice time, system time, idle time, and interrupt time.
+
+"""
 function cpu_info()
     UVcpus = Ref{Ptr{UV_cpu_info_t}}()
     count = Ref{Int32}()
@@ -225,10 +341,27 @@ function cpu_info()
     return cpus
 end
 
+"""
+    Sys.sysimage_target()
+
+Return the CPU target string that was used to build the current system image.
+
+This function returns the original CPU target specification that was passed to Julia
+when the system image was compiled. This can be useful for reproducing the same
+system image or for understanding what CPU features were enabled during compilation.
+
+If the system image was built with the default settings this will return `"native"`.
+
+See also [`JULIA_CPU_TARGET`](@ref).
+"""
+function sysimage_target()
+    return ccall(:jl_get_sysimage_cpu_target, Ref{String}, ())
+end
+
 """
     Sys.uptime()
 
-Gets the current system uptime in seconds.
+Get the current system uptime in seconds.
 """
 function uptime()
     uptime_ = Ref{Float64}()
@@ -276,7 +409,7 @@ free_memory() = ccall(:uv_get_available_memory, UInt64, ())
 
 Get the total memory in RAM (including that which is currently used) in bytes.
 This amount may be constrained, e.g., by Linux control groups. For the unconstrained
-amount, see `Sys.physical_memory()`.
+amount, see `Sys.total_physical_memory()`.
 """
 function total_memory()
     constrained = ccall(:uv_get_constrained_memory, UInt64, ())
@@ -315,7 +448,7 @@ end
 
 Get the maximum resident set size utilized in bytes.
 See also:
-    - man page of `getrusage`(2) on Linux and FreeBSD.
+    - man page of `getrusage`(2) on Linux and BSD.
     - Windows API `GetProcessMemoryInfo`.
 """
 maxrss() = ccall(:jl_maxrss, Csize_t, ())
@@ -446,6 +579,27 @@ including e.g. a WebAssembly JavaScript embedding in a web browser.
 """
 isjsvm(os::Symbol) = (os === :Emscripten)
 
+"""
+    Sys.detectwsl()
+
+Runtime predicate for testing if Julia is running inside
+Windows Subsystem for Linux (WSL).
+
+!!! note
+    Unlike `Sys.iswindows`, `Sys.islinux` etc., this is a runtime test, and thus
+    cannot meaningfully be used in `@static if` constructs.
+
+!!! compat "Julia 1.12"
+    This function requires at least Julia 1.12.
+"""
+function detectwsl()
+    # We use the same approach as canonical/snapd do to detect WSL
+    islinux() && (
+        isfile("/proc/sys/fs/binfmt_misc/WSLInterop")
+        || isdir("/run/WSL")
+    )
+end
+
 for f in (:isunix, :islinux, :isbsd, :isapple, :iswindows, :isfreebsd, :isopenbsd, :isnetbsd, :isdragonfly, :isjsvm)
     @eval $f() = $(getfield(@__MODULE__, f)(KERNEL))
 end
@@ -469,24 +623,9 @@ windows_version
 
 const WINDOWS_VISTA_VER = v"6.0"
 
-"""
-    Sys.isexecutable(path::String)
-
-Return `true` if the given `path` has executable permissions.
-
-!!! note
-    Prior to Julia 1.6, this did not correctly interrogate filesystem
-    ACLs on Windows, therefore it would return `true` for any
-    file.  From Julia 1.6 on, it correctly determines whether the
-    file is marked as executable or not.
-"""
-function isexecutable(path::String)
-    # We use `access()` and `X_OK` to determine if a given path is
-    # executable by the current user.  `X_OK` comes from `unistd.h`.
-    X_OK = 0x01
-    return ccall(:jl_fs_access, Cint, (Ptr{UInt8}, Cint), path, X_OK) == 0
-end
-isexecutable(path::AbstractString) = isexecutable(String(path))
+const isexecutable = Base.isexecutable
+const isreadable   = Base.isreadable
+const iswritable   = Base.iswritable
 
 """
     Sys.which(program_name::String)
@@ -565,6 +704,29 @@ function which(program_name::String)
     # If we couldn't find anything, don't return anything
     nothing
 end
-which(program_name::AbstractString) = which(String(program_name))
+which(program_name::AbstractString) = which(String(program_name)::String)
+
+"""
+    Sys.username()::String
+
+Return the username for the current user. If the username cannot be determined
+or is empty, this function throws an error.
+
+To retrieve a username that is overridable via an environment variable,
+e.g., `USER`, consider using
+```julia
+user = get(Sys.username, ENV, "USER")
+```
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+
+See also [`homedir`](@ref).
+"""
+function username()
+    pw = Libc.getpw()
+    isempty(pw.username) && Base.uv_error("username", Base.UV_ENOENT)
+    return pw.username
+end
 
 end # module Sys
diff --git a/base/task.jl b/base/task.jl
index 611135cb6981c..244a8f70a768a 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -14,7 +14,7 @@ struct CapturedException <: Exception
         # Typically the result of a catch_backtrace()
 
         # Process bt_raw so that it can be safely serialized
-        bt_lines = process_backtrace(bt_raw, 100) # Limiting this to 100 lines.
+        bt_lines = process_backtrace(stacktrace(bt_raw))[1:min(100, end)] # Limiting this to 100 lines.
         CapturedException(ex, bt_lines)
     end
 
@@ -26,9 +26,9 @@ function showerror(io::IO, ce::CapturedException)
 end
 
 """
-    capture_exception(ex, bt) -> Exception
+    capture_exception(ex, bt)::Exception
 
-Returns an exception, possibly incorporating information from a backtrace `bt`. Defaults to returning [`CapturedException(ex, bt)`](@ref).
+Return an exception, possibly incorporating information from a backtrace `bt`. Defaults to returning [`CapturedException(ex, bt)`](@ref).
 
 Used in [`asyncmap`](@ref) and [`asyncmap!`](@ref) to capture exceptions thrown during
 the user-supplied function call.
@@ -53,7 +53,6 @@ push!(c::CompositeException, ex) = push!(c.exceptions, ex)
 pushfirst!(c::CompositeException, ex) = pushfirst!(c.exceptions, ex)
 isempty(c::CompositeException) = isempty(c.exceptions)
 iterate(c::CompositeException, state...) = iterate(c.exceptions, state...)
-eltype(::Type{CompositeException}) = Any
 
 function showerror(io::IO, ex::CompositeException)
     if !isempty(ex)
@@ -96,7 +95,7 @@ function show_task_exception(io::IO, t::Task; indent = true)
     else
         show_exception_stack(IOContext(b, io), stack)
     end
-    str = String(take!(b))
+    str = takestring!(b)
     if indent
         str = replace(str, "\n" => "\n    ")
     end
@@ -115,6 +114,13 @@ end
 Wrap an expression in a [`Task`](@ref) without executing it, and return the [`Task`](@ref). This only
 creates a task, and does not run it.
 
+!!! warning
+    By default tasks will have the sticky bit set to true `t.sticky`. This models the
+    historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
+    they are first scheduled on, and when scheduled will make the task that they were scheduled
+    from sticky. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    bit manually to `false`.
+
 # Examples
 ```jldoctest
 julia> a1() = sum(i for i in 1:1000);
@@ -133,38 +139,20 @@ true
 ```
 """
 macro task(ex)
-    thunk = Base.replace_linenums!(:(()->$(esc(ex))), __source__)
+    thunk = replace_linenums!(:(()->$(esc(ex))), __source__)
     :(Task($thunk))
 end
 
-"""
-    current_task()
-
-Get the currently running [`Task`](@ref).
-"""
-current_task() = ccall(:jl_get_current_task, Ref{Task}, ())
-
 # task states
 
 const task_state_runnable = UInt8(0)
 const task_state_done     = UInt8(1)
 const task_state_failed   = UInt8(2)
 
-const _state_index = findfirst(==(:_state), fieldnames(Task))
-@eval function load_state_acquire(t)
-    # TODO: Replace this by proper atomic operations when available
-    @GC.preserve t llvmcall($("""
-        %ptr = inttoptr i$(Sys.WORD_SIZE) %0 to i8*
-        %rv = load atomic i8, i8* %ptr acquire, align 8
-        ret i8 %rv
-        """), UInt8, Tuple{Ptr{UInt8}},
-        Ptr{UInt8}(pointer_from_objref(t) + fieldoffset(Task, _state_index)))
-end
-
 @inline function getproperty(t::Task, field::Symbol)
     if field === :state
         # TODO: this field name should be deprecated in 2.0
-        st = load_state_acquire(t)
+        st = @atomic :acquire t._state
         if st === task_state_runnable
             return :runnable
         elseif st === task_state_done
@@ -180,13 +168,24 @@ end
     elseif field === :exception
         # TODO: this field name should be deprecated in 2.0
         return t._isexception ? t.result : nothing
+    elseif field === :scope
+        error("""
+            Querying a Task's `scope` field is disallowed.
+            The private `Core.current_scope()` function is better, though still an implementation detail.""")
     else
         return getfield(t, field)
     end
 end
 
+@inline function setproperty!(t::Task, field::Symbol, @nospecialize(v))
+    if field === :scope
+        istaskstarted(t) && error("Setting scope on a started task directly is disallowed.")
+    end
+    return @invoke setproperty!(t::Any, field::Symbol, v::Any)
+end
+
 """
-    istaskdone(t::Task) -> Bool
+    istaskdone(t::Task)::Bool
 
 Determine whether a task has exited.
 
@@ -207,10 +206,10 @@ julia> istaskdone(b)
 true
 ```
 """
-istaskdone(t::Task) = load_state_acquire(t) !== task_state_runnable
+istaskdone(t::Task) = (@atomic :acquire t._state) !== task_state_runnable
 
 """
-    istaskstarted(t::Task) -> Bool
+    istaskstarted(t::Task)::Bool
 
 Determine whether a task has started executing.
 
@@ -227,7 +226,7 @@ false
 istaskstarted(t::Task) = ccall(:jl_is_task_started, Cint, (Any,), t) != 0
 
 """
-    istaskfailed(t::Task) -> Bool
+    istaskfailed(t::Task)::Bool
 
 Determine whether a task has exited because an exception was thrown.
 
@@ -251,7 +250,7 @@ true
 !!! compat "Julia 1.3"
     This function requires at least Julia 1.3.
 """
-istaskfailed(t::Task) = (load_state_acquire(t) === task_state_failed)
+istaskfailed(t::Task) = ((@atomic :acquire t._state) === task_state_failed)
 
 Threads.threadid(t::Task) = Int(ccall(:jl_get_task_tid, Int16, (Any,), t)+1)
 function Threads.threadpool(t::Task)
@@ -304,6 +303,7 @@ end
 
 # just wait for a task to be done, no error propagation
 function _wait(t::Task)
+    t === current_task() && Core.throw(ConcurrencyViolationError("deadlock detected: cannot wait on current task"))
     if !istaskdone(t)
         donenotify = t.donenotify::ThreadSynchronizer
         lock(donenotify)
@@ -347,15 +347,195 @@ function _wait2(t::Task, waiter::Task)
     nothing
 end
 
-function wait(t::Task)
-    t === current_task() && error("deadlock detected: cannot wait on current task")
+"""
+    wait(t::Task; throw=true)
+
+Wait for a `Task` to finish.
+
+The keyword `throw` (defaults to `true`) controls whether a failed task results
+in an error, thrown as a [`TaskFailedException`](@ref) which wraps the failed task.
+
+Throws a `ConcurrencyViolationError` if `t` is the currently running task, to prevent deadlocks.
+"""
+function wait(t::Task; throw=true)
     _wait(t)
-    if istaskfailed(t)
-        throw(TaskFailedException(t))
+    if throw && istaskfailed(t)
+        Core.throw(TaskFailedException(t))
     end
     nothing
 end
 
+# Wait multiple tasks
+
+"""
+    waitany(tasks; throw=true) -> (done_tasks, remaining_tasks)
+
+Wait until at least one of the given tasks have been completed.
+
+If `throw` is `true`, throw `CompositeException` when one of the
+completed tasks completes with an exception.
+
+The return value consists of two task vectors. The first one consists of
+completed tasks, and the other consists of uncompleted tasks.
+
+!!! warning
+    This may scale poorly compared to writing code that uses multiple individual tasks that
+    each runs serially, since this needs to scan the list of `tasks` each time and
+    synchronize with each one every time this is called. Or consider using
+    [`waitall(tasks; failfast=true)`](@ref waitall) instead.
+
+!!! compat "Julia 1.12"
+    This function requires at least Julia 1.12.
+"""
+waitany(tasks; throw=true) = _wait_multiple(collect_tasks(tasks), throw)
+
+"""
+    waitall(tasks; failfast=true, throw=true) -> (done_tasks, remaining_tasks)
+
+Wait until all the given tasks have been completed.
+
+If `failfast` is `true`, the function will return when at least one of the
+given tasks is finished by exception. If `throw` is `true`, throw
+`CompositeException` when one of the completed tasks has failed.
+
+`failfast` and `throw` keyword arguments work independently; when only
+`throw=true` is specified, this function waits for all the tasks to complete.
+
+The return value consists of two task vectors. The first one consists of
+completed tasks, and the other consists of uncompleted tasks.
+
+!!! compat "Julia 1.12"
+    This function requires at least Julia 1.12.
+"""
+waitall(tasks; failfast=true, throw=true) = _wait_multiple(collect_tasks(tasks), throw, true, failfast)
+
+function collect_tasks(waiting_tasks)
+    tasks = Task[]
+    for t in waiting_tasks
+        t isa Task || error("Expected an iterator of `Task` object")
+        push!(tasks, t)
+    end
+    return tasks
+end
+
+function _wait_multiple(tasks::Vector{Task}, throwexc::Bool=false, all::Bool=false, failfast::Bool=false)
+    if (all && !failfast) || length(tasks) <= 1
+        exception = false
+        # Force everything to finish synchronously for the case of waitall
+        # with failfast=false
+        for t in tasks
+            _wait(t)
+            exception |= istaskfailed(t)
+        end
+        if exception && throwexc
+            exceptions = [TaskFailedException(t) for t in tasks if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        else
+            return tasks, Task[]
+        end
+    end
+
+    exception = false
+    nremaining::Int = length(tasks)
+    done_mask = falses(nremaining)
+    for (i, t) in enumerate(tasks)
+        if istaskdone(t)
+            done_mask[i] = true
+            exception |= istaskfailed(t)
+            nremaining -= 1
+        end
+    end
+
+    # We can return early all tasks are done, or if any is done and we only
+    # needed to wait for one, or if any task failed and we have failfast
+    if nremaining == 0 || (any(done_mask) && (!all || (failfast && exception)))
+        if throwexc && (!all || failfast) && exception
+            exceptions = [TaskFailedException(t) for t in tasks[done_mask] if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        else
+            if nremaining == 0
+                return tasks, Task[]
+            else
+                return tasks[done_mask], tasks[.~done_mask]
+            end
+        end
+    end
+
+    chan = Channel{Int}(Inf)
+    sentinel = current_task()
+    waiter_tasks = fill(sentinel, length(tasks))
+
+    for (i, done) in enumerate(done_mask)
+        done && continue
+        t = tasks[i]
+        if istaskdone(t)
+            done_mask[i] = true
+            exception |= istaskfailed(t)
+            nremaining -= 1
+            exception && failfast && break
+        else
+            waiter = @task put!(chan, i)
+            waiter.sticky = false
+            _wait2(t, waiter)
+            waiter_tasks[i] = waiter
+        end
+    end
+
+    while nremaining > 0
+        exception && failfast && break
+        i = take!(chan)
+        t = tasks[i]
+        waiter_tasks[i] = sentinel
+        done_mask[i] = true
+        exception |= istaskfailed(t)
+        nremaining -= 1
+        # stop early if requested
+        all || break
+    end
+
+    close(chan)
+
+    # now just read which tasks finished directly: the channel is not needed anymore for that
+    # repeat until we get (acquire) the list of all dependent-exited tasks
+    changed = true
+    while changed
+        changed = false
+        for (i, done) in enumerate(done_mask)
+            done && continue
+            t = tasks[i]
+            if istaskdone(t)
+                done_mask[i] = true
+                exception |= istaskfailed(t)
+                nremaining -= 1
+                changed = true
+            end
+        end
+    end
+
+    if nremaining == 0
+        if throwexc && exception
+            exceptions = [TaskFailedException(t) for t in tasks if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        end
+        return tasks, Task[]
+    else
+        remaining_mask = .~done_mask
+        for i in findall(remaining_mask)
+            waiter = waiter_tasks[i]
+            waiter === sentinel && continue
+            donenotify = tasks[i].donenotify::ThreadSynchronizer
+            @lock donenotify list_deletefirst!(donenotify.waitq, waiter)
+        end
+        done_tasks = tasks[done_mask]
+        if throwexc && exception
+            exceptions = [TaskFailedException(t) for t in done_tasks if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        else
+            return done_tasks, tasks[remaining_mask]
+        end
+    end
+end
+
 """
     fetch(x::Any)
 
@@ -457,7 +637,8 @@ const sync_varname = gensym(:sync)
 """
     @sync
 
-Wait until all lexically-enclosed uses of [`@async`](@ref), [`@spawn`](@ref Threads.@spawn), `@spawnat` and `@distributed`
+Wait until all lexically-enclosed uses of [`@async`](@ref), [`@spawn`](@ref Threads.@spawn),
+`Distributed.@spawnat` and `Distributed.@distributed`
 are complete. All exceptions thrown by enclosed async operations are collected and thrown as
 a [`CompositeException`](@ref).
 
@@ -508,15 +689,15 @@ isolating the asynchronous code from changes to the variable's value in the curr
     Interpolating values via `\$` is available as of Julia 1.4.
 """
 macro async(expr)
-    do_async_macro(expr, __source__)
+    do_async_macro(expr, __source__, identity)
 end
 
 # generate the code for @async, possibly wrapping the task in something before
 # pushing it to the wait queue.
-function do_async_macro(expr, linenums; wrap=identity)
-    letargs = Base._lift_one_interp!(expr)
+function do_async_macro(expr, linenums, wrap)
+    letargs = _lift_one_interp!(expr)
 
-    thunk = Base.replace_linenums!(:(()->($(esc(expr)))), linenums)
+    thunk = replace_linenums!(:(()->($(esc(expr)))), linenums)
     var = esc(sync_varname)
     quote
         let $(letargs...)
@@ -556,7 +737,7 @@ fetch(t::UnwrapTaskFailedException) = unwrap_task_failed(fetch, t)
 
 # macro for running async code that doesn't throw wrapped exceptions
 macro async_unwrap(expr)
-    do_async_macro(expr, __source__, wrap=task->:(Base.UnwrapTaskFailedException($task)))
+    do_async_macro(expr, __source__, taskvar->:(UnwrapTaskFailedException($taskvar)))
 end
 
 """
@@ -566,7 +747,7 @@ Print an error log to `stderr` if task `t` fails.
 
 # Examples
 ```julia-repl
-julia> Base._wait(errormonitor(Threads.@spawn error("task failed")))
+julia> wait(errormonitor(Threads.@spawn error("task failed")); throw = false)
 Unhandled Task ERROR: task failed
 Stacktrace:
 [...]
@@ -606,29 +787,37 @@ function errormonitor(t::Task)
 end
 
 # Capture interpolated variables in $() and move them to let-block
-function _lift_one_interp!(e)
+function _lift_one_interp!(@nospecialize e)
     letargs = Any[]  # store the new gensymed arguments
-    _lift_one_interp_helper(e, false, letargs) # Start out _not_ in a quote context (false)
-    letargs
+    _lift_one_interp_helper(e, false, 0, letargs) # Start out _not_ in a quote context (false) and not needing escapes
+    return letargs
 end
-_lift_one_interp_helper(v, _, _) = v
-function _lift_one_interp_helper(expr::Expr, in_quote_context, letargs)
+_lift_one_interp_helper(@nospecialize(v), _::Bool, _::Int, _::Vector{Any}) = v
+function _lift_one_interp_helper(expr::Expr, in_quote_context::Bool, escs::Int, letargs::Vector{Any})
     if expr.head === :$
         if in_quote_context  # This $ is simply interpolating out of the quote
             # Now, we're out of the quote, so any _further_ $ is ours.
             in_quote_context = false
-        else
+        elseif escs == 0
+            # if escs is non-zero, then we cannot hoist expr.args without violating hygiene rules
             newarg = gensym()
             push!(letargs, :($(esc(newarg)) = $(esc(expr.args[1]))))
             return newarg  # Don't recurse into the lifted $() exprs
         end
+    elseif expr.head === :meta || expr.head === :inert
+        return expr
     elseif expr.head === :quote
         in_quote_context = true   # Don't try to lift $ directly out of quotes
     elseif expr.head === :macrocall
         return expr  # Don't recur into macro calls, since some other macros use $
+    elseif expr.head === :var"hygienic-scope"
+        escs += 1
+    elseif expr.head === :escape
+        escs == 0 && return expr
+        escs -= 1
     end
     for (i,e) in enumerate(expr.args)
-        expr.args[i] = _lift_one_interp_helper(e, in_quote_context, letargs)
+        expr.args[i] = _lift_one_interp_helper(e, in_quote_context, escs, letargs)
     end
     expr
 end
@@ -644,6 +833,17 @@ macro sync_add(expr)
     end
 end
 
+function repl_backend_task()
+    @isdefined(active_repl_backend) || return
+    backend = active_repl_backend
+    isdefined(backend, :backend_task) || return
+    backend_task = getfield(active_repl_backend, :backend_task)::Task
+    if backend_task._state === task_state_runnable && getfield(backend, :in_eval)
+        return backend_task
+    end
+    return
+end
+
 # runtime system hook called when a task finishes
 function task_done_hook(t::Task)
     # `finish_task` sets `sigatomic` before entering this function
@@ -665,10 +865,9 @@ function task_done_hook(t::Task)
     end
 
     if err && !handled && Threads.threadid() == 1
-        if isa(result, InterruptException) && isdefined(Base, :active_repl_backend) &&
-            active_repl_backend.backend_task._state === task_state_runnable && isempty(Workqueue) &&
-            active_repl_backend.in_eval
-            throwto(active_repl_backend.backend_task, result) # this terminates the task
+        if isa(result, InterruptException) && isempty(Workqueue)
+            backend = repl_backend_task()
+            backend isa Task && throwto(backend, result)
         end
     end
     # Clear sigatomic before waiting
@@ -679,21 +878,23 @@ function task_done_hook(t::Task)
         # If an InterruptException happens while blocked in the event loop, try handing
         # the exception to the REPL task since the current task is done.
         # issue #19467
-        if Threads.threadid() == 1 &&
-            isa(e, InterruptException) && isdefined(Base, :active_repl_backend) &&
-            active_repl_backend.backend_task._state === task_state_runnable && isempty(Workqueue) &&
-            active_repl_backend.in_eval
-            throwto(active_repl_backend.backend_task, e)
-        else
-            rethrow()
+        if Threads.threadid() == 1 && isa(e, InterruptException) && isempty(Workqueue)
+            backend = repl_backend_task()
+            backend isa Task && throwto(backend, e)
         end
+        rethrow() # this will terminate the program
     end
 end
 
+function init_task_lock(t::Task) # Function only called from jl_adopt_thread so foreign tasks have a lock.
+    if t.donenotify === nothing
+        t.donenotify = ThreadSynchronizer()
+    end
+end
 
 ## scheduler and work queue
 
-struct IntrusiveLinkedListSynchronized{T}
+mutable struct IntrusiveLinkedListSynchronized{T}
     queue::IntrusiveLinkedList{T}
     lock::Threads.SpinLock
     IntrusiveLinkedListSynchronized{T}() where {T} = new(IntrusiveLinkedList{T}(), Threads.SpinLock())
@@ -745,30 +946,10 @@ function list_deletefirst!(W::IntrusiveLinkedListSynchronized{T}, t::T) where T
 end
 
 const StickyWorkqueue = IntrusiveLinkedListSynchronized{Task}
-global Workqueues::Vector{StickyWorkqueue} = [StickyWorkqueue()]
-const Workqueues_lock = Threads.SpinLock()
+const Workqueues = OncePerThread{StickyWorkqueue}(StickyWorkqueue)
 const Workqueue = Workqueues[1] # default work queue is thread 1 // TODO: deprecate this variable
 
-function workqueue_for(tid::Int)
-    qs = Workqueues
-    if length(qs) >= tid && isassigned(qs, tid)
-        return @inbounds qs[tid]
-    end
-    # slow path to allocate it
-    l = Workqueues_lock
-    @lock l begin
-        qs = Workqueues
-        if length(qs) < tid
-            nt = Threads.maxthreadid()
-            @assert tid <= nt
-            global Workqueues = qs = copyto!(typeof(qs)(undef, length(qs) + nt - 1), qs)
-        end
-        if !isassigned(qs, tid)
-            @inbounds qs[tid] = StickyWorkqueue()
-        end
-        return @inbounds qs[tid]
-    end
-end
+workqueue_for(tid::Int) = Workqueues[tid]
 
 function enq_work(t::Task)
     (t._state === task_state_runnable && t.queue === nothing) || error("schedule: Task not runnable")
@@ -776,19 +957,27 @@ function enq_work(t::Task)
     # Sticky tasks go into their thread's work queue.
     if t.sticky
         tid = Threads.threadid(t)
-        if tid == 0 && !GC.in_finalizer()
+        if tid == 0
             # The task is not yet stuck to a thread. Stick it to the current
             # thread and do the same to the parent task (the current task) so
             # that the tasks are correctly co-scheduled (issue #41324).
             # XXX: Ideally we would be able to unset this.
-            tid = Threads.threadid()
-            ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
-            current_task().sticky = true
+            if GC.in_finalizer()
+                # The task was launched in a finalizer. There is no thread to sticky it
+                # to, so just allow it to run anywhere as if it had been non-sticky.
+                t.sticky = false
+                @goto not_sticky
+            else
+                tid = Threads.threadid()
+                ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
+                current_task().sticky = true
+            end
         end
         push!(workqueue_for(tid), t)
     else
+        @label not_sticky
         tp = Threads.threadpool(t)
-        if Threads.threadpoolsize(tp) == 1
+        if tp === :foreign || Threads.threadpoolsize(tp) == 1
             # There's only one thread in the task's assigned thread pool;
             # use its work queue.
             tid = (tp === :interactive) ? 1 : Threads.threadpoolsize(:interactive)+1
@@ -804,7 +993,11 @@ function enq_work(t::Task)
     return t
 end
 
-schedule(t::Task) = enq_work(t)
+function schedule(t::Task)
+    # [task] created -scheduled-> wait_time
+    maybe_record_enqueued!(t)
+    enq_work(t)
+end
 
 """
     schedule(t::Task, [val]; error=false)
@@ -820,6 +1013,13 @@ the woken task.
     It is incorrect to use `schedule` on an arbitrary `Task` that has already been started.
     See [the API reference](@ref low-level-schedule-wait) for more information.
 
+!!! warning
+    By default tasks will have the sticky bit set to true `t.sticky`. This models the
+    historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
+    they are first scheduled on, and when scheduled will make the task that they were scheduled
+    from sticky. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    bit manually to `false`.
+
 # Examples
 ```jldoctest
 julia> a5() = sum(i for i in 1:1000);
@@ -844,13 +1044,15 @@ function schedule(t::Task, @nospecialize(arg); error=false)
     # schedule a task to be (re)started with the given value or exception
     t._state === task_state_runnable || Base.error("schedule: Task not runnable")
     if error
-        t.queue === nothing || Base.list_deletefirst!(t.queue::IntrusiveLinkedList{Task}, t)
+        q = t.queue; q === nothing || list_deletefirst!(q::IntrusiveLinkedList{Task}, t)
         setfield!(t, :result, arg)
         setfield!(t, :_isexception, true)
     else
         t.queue === nothing || Base.error("schedule: Task not runnable")
         setfield!(t, :result, arg)
     end
+    # [task] created -scheduled-> wait_time
+    maybe_record_enqueued!(t)
     enq_work(t)
     return t
 end
@@ -868,7 +1070,7 @@ function yield()
     try
         wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         rethrow()
     end
 end
@@ -880,11 +1082,19 @@ end
 
 A fast, unfair-scheduling version of `schedule(t, arg); yield()` which
 immediately yields to `t` before calling the scheduler.
+
+Throws a `ConcurrencyViolationError` if `t` is the currently running task.
 """
 function yield(t::Task, @nospecialize(x=nothing))
-    (t._state === task_state_runnable && t.queue === nothing) || error("yield: Task not runnable")
+    ct = current_task()
+    t === ct && throw(ConcurrencyViolationError("Cannot yield to currently running task!"))
+    (t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable"))
+    # [task] user_time -yield-> wait_time
+    record_running_time!(ct)
+    # [task] created -scheduled-> wait_time
+    maybe_record_enqueued!(t)
     t.result = x
-    enq_work(current_task())
+    enq_work(ct)
     set_next_task(t)
     return try_yieldto(ensure_rescheduled)
 end
@@ -898,6 +1108,7 @@ call to `yieldto`. This is a low-level call that only switches tasks, not consid
 or scheduling in any way. Its use is discouraged.
 """
 function yieldto(t::Task, @nospecialize(x=nothing))
+    ct = current_task()
     # TODO: these are legacy behaviors; these should perhaps be a scheduler
     # state error instead.
     if t._state === task_state_done
@@ -905,6 +1116,10 @@ function yieldto(t::Task, @nospecialize(x=nothing))
     elseif t._state === task_state_failed
         throw(t.result)
     end
+    # [task] user_time -yield-> wait_time
+    record_running_time!(ct)
+    # [task] created -scheduled-unfairly-> wait_time
+    maybe_record_enqueued!(t)
     t.result = x
     set_next_task(t)
     return try_yieldto(identity)
@@ -918,6 +1133,10 @@ function try_yieldto(undo)
         rethrow()
     end
     ct = current_task()
+    # [task] wait_time -(re)started-> user_time
+    if ct.metrics_enabled
+        @atomic :monotonic ct.last_started_running_at = time_ns()
+    end
     if ct._isexception
         exc = ct.result
         ct.result = nothing
@@ -931,12 +1150,43 @@ end
 
 # yield to a task, throwing an exception in it
 function throwto(t::Task, @nospecialize exc)
+    ct = current_task()
+    # [task] user_time -yield-> wait_time
+    record_running_time!(ct)
+    # [task] created -scheduled-unfairly-> wait_time
+    maybe_record_enqueued!(t)
     t.result = exc
     t._isexception = true
     set_next_task(t)
     return try_yieldto(identity)
 end
 
+function wait_forever()
+    while true
+        try
+            while true
+                wait()
+            end
+        catch e
+            local errs = stderr
+            # try to display the failure atomically
+            errio = IOContext(PipeBuffer(), errs::IO)
+            emphasize(errio, "Internal Task ")
+            display_error(errio, current_exceptions())
+            write(errs, errio)
+            # victimize another random Task also
+            if Threads.threadid() == 1 && isa(e, InterruptException) && isempty(Workqueue)
+                backend = repl_backend_task()
+                backend isa Task && throwto(backend, e)
+            end
+        end
+    end
+end
+
+const get_sched_task = OncePerThread{Task}() do
+    Task(wait_forever)
+end
+
 function ensure_rescheduled(othertask::Task)
     ct = current_task()
     W = workqueue_for(Threads.threadid())
@@ -973,23 +1223,30 @@ end
 
 checktaskempty = Partr.multiq_check_empty
 
-@noinline function poptask(W::StickyWorkqueue)
+function wait()
+    ct = current_task()
+    # [task] user_time -yield-or-done-> wait_time
+    record_running_time!(ct)
+    # let GC run
+    GC.safepoint()
+    # check for libuv events
+    process_events()
+
+    # get the next task to run
+    W = workqueue_for(Threads.threadid())
     task = trypoptask(W)
-    if !(task isa Task)
+    if task === nothing
+        # No tasks to run; switch to the scheduler task to run the
+        # thread sleep logic.
+        sched_task = get_sched_task()
+        if ct !== sched_task
+            istaskdone(sched_task) && (sched_task = @task wait())
+            return yieldto(sched_task)
+        end
         task = ccall(:jl_task_get_next, Ref{Task}, (Any, Any, Any), trypoptask, W, checktaskempty)
     end
     set_next_task(task)
-    nothing
-end
-
-function wait()
-    GC.safepoint()
-    W = workqueue_for(Threads.threadid())
-    poptask(W)
-    result = try_yieldto(ensure_rescheduled)
-    process_events()
-    # return when we come out of the queue
-    return result
+    return try_yieldto(ensure_rescheduled)
 end
 
 if Sys.iswindows()
@@ -997,3 +1254,21 @@ if Sys.iswindows()
 else
     pause() = ccall(:pause, Cvoid, ())
 end
+
+# update the `running_time_ns` field of `t` to include the time since it last started running.
+function record_running_time!(t::Task)
+    if t.metrics_enabled && !istaskdone(t)
+        @atomic :monotonic t.running_time_ns += time_ns() - t.last_started_running_at
+    end
+    return t
+end
+
+# if this is the first time `t` has been added to the run queue
+# (or the first time it has been unfairly yielded to without being added to the run queue)
+# then set the `first_enqueued_at` field to the current time.
+function maybe_record_enqueued!(t::Task)
+    if t.metrics_enabled && t.first_enqueued_at == 0
+        @atomic :monotonic t.first_enqueued_at = time_ns()
+    end
+    return t
+end
diff --git a/base/terminfo.jl b/base/terminfo.jl
new file mode 100644
index 0000000000000..be0dd53b1ac74
--- /dev/null
+++ b/base/terminfo.jl
@@ -0,0 +1,394 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Since this code is in the startup-path, we go to some effort to
+# be easier on the compiler, such as using `map` over broadcasting.
+
+include("terminfo_data.jl")
+
+"""
+    struct TermInfoRaw
+
+A structured representation of a terminfo file, without any knowledge of
+particular capabilities, solely based on `term(5)`.
+
+!!! warning
+  This is not part of the public API, and thus subject to change without notice.
+
+# Fields
+
+- `names::Vector{String}`: The names this terminal is known by.
+- `flags::BitVector`: A list of 0–$(length(TERM_FLAGS)) flag values.
+- `numbers::Union{Vector{Int16}, Vector{Int32}}`: A list of 0–$(length(TERM_NUMBERS))
+  number values. A value of `typemax(eltype(numbers))` is used to skip over
+  unspecified capabilities while ensuring value indices are correct.
+- `strings::Vector{Union{String, Nothing}}`: A list of 0–$(length(TERM_STRINGS))
+  string values. A value of `nothing` is used to skip over unspecified
+  capabilities while ensuring value indices are correct.
+- `extended::Union{Nothing, Dict{Symbol, Union{Bool, Int, String}}}`: Should an
+  extended info section exist, this gives the entire extended info as a
+  dictionary. Otherwise `nothing`.
+
+See also: `TermInfo` and `TermCapability`.
+"""
+struct TermInfoRaw
+    names::Vector{String}
+    flags::BitVector
+    numbers::Vector{Int}
+    strings::Vector{Union{String, Nothing}}
+    extended::Union{Nothing, Dict{Symbol, Union{Bool, Int, String, Nothing}}}
+end
+
+"""
+    struct TermInfo
+
+A parsed terminfo paired with capability information.
+
+!!! warning
+  This is not part of the public API, and thus subject to change without notice.
+
+# Fields
+
+- `names::Vector{String}`: The names this terminal is known by.
+- `flags::Int`: The number of flags specified.
+- `numbers::BitVector`: A mask indicating which of `TERM_NUMBERS` have been
+  specified.
+- `strings::BitVector`: A mask indicating which of `TERM_STRINGS` have been
+  specified.
+- `extensions::Vector{Symbol}`: A list of extended capability variable names.
+- `capabilities::Dict{Symbol, Union{Bool, Int, String}}`: The capability values
+  themselves.
+
+See also: `TermInfoRaw` and `TermCapability`.
+"""
+struct TermInfo
+    names::Vector{String}
+    flags::Dict{Symbol, Bool}
+    numbers::Dict{Symbol, Int}
+    strings::Dict{Symbol, String}
+    extensions::Union{Nothing, Set{Symbol}}
+    aliases::Dict{Symbol, Symbol}
+end
+
+TermInfo() = TermInfo([], Dict(), Dict(), Dict(), nothing, Dict())
+
+function read(data::IO, ::Type{TermInfoRaw})
+    # Parse according to `term(5)`
+    # Header
+    magic = read(data, UInt16) |> ltoh
+    NumInt = if magic == 0o0432
+        Int16
+    elseif magic == 0o01036
+        Int32
+    else
+        throw(ArgumentError("Terminfo data did not start with the magic number 0o0432 or 0o01036"))
+    end
+    name_bytes, flag_bytes, numbers_count, string_count, table_bytes =
+        @ntuple 5 _->read(data, Int16) |> ltoh
+    # Terminal Names
+    term_names = map(String, split(String(read(data, name_bytes - 1)), '|'))
+    0x00 == read(data, UInt8) ||
+        throw(ArgumentError("Terminfo data did not contain a null byte after the terminal names section"))
+    # Boolean Flags
+    flags = map(==(0x01), read(data, flag_bytes))
+    if position(data) % 2 != 0
+        0x00 == read(data, UInt8) ||
+            throw(ArgumentError("Terminfo did not contain a null byte after the flag section, expected to position the start of the numbers section on an even byte"))
+    end
+    # Numbers, Strings, Table
+    numbers = map(Int ∘ ltoh, reinterpret(NumInt, read(data, numbers_count * sizeof(NumInt))))
+    string_indices = map(ltoh, reinterpret(Int16, read(data, string_count * sizeof(Int16))))
+    strings_table = read(data, table_bytes)
+    strings = _terminfo_read_strings(strings_table, string_indices)
+    TermInfoRaw(term_names, flags, numbers, strings,
+                if !eof(data) extendedterminfo(data, NumInt) end)
+end
+
+"""
+    extendedterminfo(data::IO; NumInt::Union{Type{Int16}, Type{Int32}})
+
+Read an extended terminfo section from `data`, with `NumInt` as the numbers type.
+
+This will accept any terminfo content that conforms with `term(5)`.
+
+See also: `read(::IO, ::Type{TermInfoRaw})`
+"""
+function extendedterminfo(data::IO, NumInt::Union{Type{Int16}, Type{Int32}})
+    # Extended info
+    if position(data) % 2 != 0
+        0x00 == read(data, UInt8) ||
+            throw(ArgumentError("Terminfo did not contain a null byte before the extended section; expected to position the start on an even byte"))
+    end
+    # Extended header
+    flag_bytes, numbers_count, string_count, table_count, table_bytes =
+        @ntuple 5 _->read(data, Int16) |> ltoh
+    # Extended flags/numbers/strings
+    flags = map(==(0x01), read(data, flag_bytes))
+    if flag_bytes % 2 != 0
+        0x00 == read(data, UInt8) ||
+            throw(ArgumentError("Terminfo did not contain a null byte after the extended flag section; expected to position the start of the numbers section on an even byte"))
+    end
+    numbers = map(Int ∘ ltoh, reinterpret(NumInt, read(data, numbers_count * sizeof(NumInt))))
+    table_indices = map(ltoh, reinterpret(Int16, read(data, table_count * sizeof(Int16))))
+    table_data = read(data, table_bytes)
+    strings = _terminfo_read_strings(table_data, table_indices[1:string_count])
+    table_halfoffset = Int16(get(table_indices, string_count, 0) +
+        ncodeunits(something(get(strings, length(strings), ""), "")) + 1)
+    for index in string_count+1:lastindex(table_indices)
+        table_indices[index] += table_halfoffset
+    end
+    labels = map(Symbol, _terminfo_read_strings(table_data, table_indices[string_count+1:end]))
+    Dict{Symbol, Union{Bool, Int, String, Nothing}}(
+        zip(labels, Iterators.flatten((flags, numbers, strings))))
+end
+
+"""
+    _terminfo_read_strings(table::Vector{UInt8}, indices::Vector{Int16})
+
+From `table`, read a string starting at each position in `indices`. Each string
+must be null-terminated. Should an index be -1 or -2, `nothing` is given instead
+of a string.
+"""
+function _terminfo_read_strings(table::Vector{UInt8}, indices::Vector{Int16})
+    strings = Vector{Union{Nothing, String}}(undef, length(indices))
+    map!(strings, indices) do idx
+        if idx >= 0
+            len = findfirst(==(0x00), view(table, 1+idx:length(table)))
+            !isnothing(len) ||
+                throw(ArgumentError("Terminfo table entry @$idx does not terminate with a null byte"))
+            String(table[1+idx:idx+len-1])
+        elseif idx ∈ (-1, -2)
+        else
+            throw(ArgumentError("Terminfo table index is invalid: -2 ≰ $idx"))
+        end
+    end
+    strings
+end
+
+"""
+    TermInfo(raw::TermInfoRaw)
+
+Construct a `TermInfo` from `raw`, using known terminal capabilities (as of
+NCurses 6.3, see `TERM_FLAGS`, `TERM_NUMBERS`, and `TERM_STRINGS`).
+"""
+function TermInfo(raw::TermInfoRaw)
+    capabilities = Dict{Symbol, Union{Bool, Int, String}}()
+    sizehint!(capabilities, 2 * (length(raw.flags) + length(raw.numbers) + length(raw.strings)))
+    flags = Dict{Symbol, Bool}()
+    numbers = Dict{Symbol, Int}()
+    strings = Dict{Symbol, String}()
+    aliases = Dict{Symbol, Symbol}()
+    extensions = nothing
+    for (flag, value) in zip(TERM_FLAGS, raw.flags)
+        flags[flag.name] = value
+        aliases[flag.capname] = flag.name
+    end
+    for (num, value) in zip(TERM_NUMBERS, raw.numbers)
+        numbers[num.name] = Int(value)
+        aliases[num.capname] = num.name
+    end
+    for (str, value) in zip(TERM_STRINGS, raw.strings)
+        if !isnothing(value)
+            strings[str.name] = value
+            aliases[str.capname] = str.name
+        end
+    end
+    if !isnothing(raw.extended)
+        extensions = Set{Symbol}()
+        longalias(key, value) = first(get(TERM_USER, (typeof(value), key), (nothing, "")))
+        for (short, value) in raw.extended
+            long = longalias(short, value)
+            key = something(long, short)
+            push!(extensions, key)
+            if value isa Bool
+                flags[key] = value
+            elseif value isa Int
+                numbers[key] = value
+            elseif value isa String
+                strings[key] = value
+            end
+            if !isnothing(long)
+                aliases[short] = long
+            end
+        end
+    end
+    TermInfo(raw.names, flags, numbers, strings, extensions, aliases)
+end
+
+get(ti::TermInfo, key::Symbol, default::Bool)   = get(ti.flags,   get(ti.aliases, key, key), default)
+get(ti::TermInfo, key::Symbol, default::Int)    = get(ti.numbers, get(ti.aliases, key, key), default)
+get(ti::TermInfo, key::Symbol, default::String) = get(ti.strings, get(ti.aliases, key, key), default)
+
+haskey(ti::TermInfo, key::Symbol) =
+    haskey(ti.flags, key) || haskey(ti.numbers, key) || haskey(ti.strings, key) || haskey(ti.aliases, key)
+
+function getindex(ti::TermInfo, key::Symbol)
+    haskey(ti.flags, key) && return ti.flags[key]
+    haskey(ti.numbers, key) && return ti.numbers[key]
+    haskey(ti.strings, key) && return ti.strings[key]
+    haskey(ti.aliases, key) && return getindex(ti, ti.aliases[key])
+    throw(KeyError(key))
+end
+
+keys(ti::TermInfo) = keys(ti.flags) ∪ keys(ti.numbers) ∪ keys(ti.strings) ∪ keys(ti.aliases)
+
+function show(io::IO, ::MIME"text/plain", ti::TermInfo)
+    print(io, "TermInfo(", ti.names, "; ", length(ti.flags), " flags, ",
+          length(ti.numbers), " numbers, ", length(ti.strings), " strings")
+    !isnothing(ti.extensions) &&
+        print(io, ", ", length(ti.extensions), " extended capabilities")
+    print(io, ')')
+end
+
+"""
+    find_terminfo_file(term::String)
+
+Locate the terminfo file for `term`, return `nothing` if none could be found.
+
+The lookup policy is described in `terminfo(5)` "Fetching Compiled
+Descriptions". A terminfo database is included by default with Julia and is
+taken to be the first entry of `@TERMINFO_DIRS@`.
+"""
+function find_terminfo_file(term::String)
+    isempty(term) && return
+    chr, chrcode = string(first(term)), string(Int(first(term)), base=16)
+    terminfo_dirs = if haskey(ENV, "TERMINFO")
+        [ENV["TERMINFO"]]
+    elseif isdir(joinpath(homedir(), ".terminfo"))
+        [joinpath(homedir(), ".terminfo")]
+    else
+        String[]
+    end
+    haskey(ENV, "TERMINFO_DIRS") &&
+        append!(terminfo_dirs,
+                replace(split(ENV["TERMINFO_DIRS"], ':'),
+                        "" => "/usr/share/terminfo"))
+    push!(terminfo_dirs, normpath(Sys.BINDIR, DATAROOTDIR, "julia", "terminfo"))
+    Sys.isunix() &&
+        push!(terminfo_dirs, "/etc/terminfo", "/lib/terminfo", "/usr/share/terminfo")
+    for dir in terminfo_dirs
+        if isfile(joinpath(dir, chr, term))
+            return joinpath(dir, chr, term)
+        elseif isfile(joinpath(dir, chrcode, term))
+            return joinpath(dir, chrcode, term)
+        elseif isfile(joinpath(dir, lowercase(chr), lowercase(term)))
+            # The vendored terminfo database is fully lowercase to avoid issues on
+            # case-sensitive filesystems. On Unix-like systems, terminfo files with
+            # different cases are hard links to one another, so this is still
+            # correct for non-vendored terminfo, just redundant.
+            return joinpath(dir, lowercase(chr), lowercase(term))
+        end
+    end
+    return nothing
+end
+
+"""
+    load_terminfo(term::String)
+
+Load the `TermInfo` for `term`, falling back on a blank `TermInfo`.
+"""
+function load_terminfo(term::String)
+    file = find_terminfo_file(term)
+    isnothing(file) && return TermInfo()
+    try
+        TermInfo(read(file, TermInfoRaw))
+    catch err
+        if err isa ArgumentError || err isa IOError
+            TermInfo()
+        else
+            rethrow()
+        end
+    end
+end
+
+"""
+The terminfo of the current terminal.
+"""
+const current_terminfo = OncePerProcess{TermInfo}() do
+    term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
+    terminfo = load_terminfo(term_env)
+    # Ensure setaf is set for xterm terminals
+    if !haskey(terminfo, :setaf) && startswith(term_env, "xterm")
+        # For xterm-like terminals without setaf, add a reasonable default
+        terminfo.strings[:setaf] = "\e[3%p1%dm"
+    end
+    return terminfo
+end
+
+# Legacy/TTY methods and the `:color` parameter
+
+if Sys.iswindows()
+    ttyhascolor() = true
+else
+    function ttyhascolor()
+        haskey(current_terminfo(), :setaf)
+    end
+end
+
+"""
+    ttyhastruecolor()
+
+Return a boolean signifying whether the current terminal supports 24-bit colors.
+
+Multiple conditions are taken as signifying truecolor support, specifically any of the following:
+- The `COLORTERM` environment variable is set to `"truecolor"` or `"24bit"`
+- The current terminfo sets the [`RGB`
+  capability](https://invisible-island.net/ncurses/man/user_caps.5.html#h3-Recognized-Capabilities)[^1]
+  (or the legacy `Tc` capability[^2]) flag
+- The current terminfo provides `setrgbf` and `setrgbb` strings[^3]
+- The current terminfo has a `colors` number greater that `256`, on a unix system
+- The VTE version is at least 3600 (detected via the `VTE_VERSION` environment variable)
+- The current terminal has the `XTERM_VERSION` environment variable set
+- The current terminal appears to be iTerm according to the `TERMINAL_PROGRAM` environment variable
+- The `TERM` environment variable corresponds to: linuxvt, rxvt, or st
+
+[^1]: Added to Ncurses 6.1, and used in `TERM=*-direct` terminfos.
+[^2]: Convention [added to tmux in 2016](https://github.com/tmux/tmux/commit/427b8204268af5548d09b830e101c59daa095df9),
+      superseded by `RGB`.
+[^3]: Proposed by [Rüdiger Sonderfeld in 2013](https://lists.gnu.org/archive/html/bug-ncurses/2013-10/msg00007.html),
+      adopted by a few terminal emulators.
+
+!!! note
+    The set of conditions is messy, because the situation is a mess, and there's
+    no resolution in sight. `COLORTERM` is widely accepted, but an imperfect
+    solution because only `TERM` is passed across `ssh` sessions. Terminfo is
+    the obvious place for a terminal to declare capabilities, but it's taken
+    enough years for ncurses/terminfo to declare a standard capability (`RGB`)
+    that a number of other approaches have taken root. Furthermore, the official
+    `RGB` capability is *incompatible* with 256-color operation, and so is
+    unable to resolve the fragmentation in the terminal ecosystem.
+"""
+function ttyhastruecolor()
+    # Lasciate ogne speranza, voi ch'intrate
+    get(ENV, "COLORTERM", "") ∈ ("truecolor", "24bit") ||
+        get(current_terminfo(), :RGB, false) || get(current_terminfo(), :Tc, false) ||
+        (haskey(current_terminfo(), :setrgbf) && haskey(current_terminfo(), :setrgbb)) ||
+        @static if Sys.isunix() get(current_terminfo(), :colors, 0) > 256 else false end ||
+        (Sys.iswindows() && Sys.windows_version() ≥ v"10.0.14931") || # See <https://devblogs.microsoft.com/commandline/24-bit-color-in-the-windows-console/>
+        something(tryparse(Int, get(ENV, "VTE_VERSION", "")), 0) >= 3600 || # Per GNOME bug #685759 <https://bugzilla.gnome.org/show_bug.cgi?id=685759>
+        haskey(ENV, "XTERM_VERSION") ||
+        get(ENV, "TERMINAL_PROGRAM", "") == "iTerm.app" || # Why does Apple need to be special?
+        haskey(ENV, "KONSOLE_PROFILE_NAME") || # Per commentary in VT102Emulation.cpp
+        haskey(ENV, "KONSOLE_DBUS_SESSION") ||
+        let term = get(ENV, "TERM", "")
+            startswith(term, "linux") || # Linux 4.8+ supports true-colour SGR.
+                startswith(term, "rxvt") || # See <http://lists.schmorp.de/pipermail/rxvt-unicode/2016q2/002261.html>
+                startswith(term, "st") # From experimentation
+        end
+end
+
+function get_have_color()
+    global have_color
+    have_color === nothing && (have_color = ttyhascolor())
+    return have_color::Bool
+end
+
+function get_have_truecolor()
+    global have_truecolor
+    have_truecolor === nothing && (have_truecolor = ttyhastruecolor())
+    return have_truecolor::Bool
+end
+
+in(key_value::Pair{Symbol,Bool}, ::TTY) = key_value.first === :color && key_value.second === get_have_color()
+haskey(::TTY, key::Symbol) = key === :color
+getindex(::TTY, key::Symbol) = key === :color ? get_have_color() : throw(KeyError(key))
+get(::TTY, key::Symbol, default) = key === :color ? get_have_color() : default
diff --git a/base/terminfo_data.jl b/base/terminfo_data.jl
new file mode 100644
index 0000000000000..caf2ff528d3e1
--- /dev/null
+++ b/base/terminfo_data.jl
@@ -0,0 +1,796 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Updating this listing is fairly easy, assuming existence of a unix system,
+# posix shell, and `awk`. Just update the version string in the commented out
+# `NCURSES_VERSION` variable, and run this file. This works because this file is
+# a bit of a quine.
+
+#=
+awk '/^#=run/{flag=1;next}/=#/{flag=0}flag{gsub(/__FILE__/,"\"'"$0"'\"");print}' "$0" | \
+  julia --startup-file=no -E 'readchomp("/dev/fd/0") |> Meta.parse |> eval' && echo "Done"; exit
+=#
+
+"""
+    struct TermCapability
+
+Specification of a single terminal capability.
+
+!!! warning
+  This is not part of the public API, and thus subject to change without notice.
+
+# Fields
+
+- `name::Symbol`: The name of the terminfo capability variable
+- `capname::Symbol`: The *Cap-name* of the capability
+- `description::String`: A description of the purpose of the capability
+
+See also: `TermInfo`, `TERM_FLAGS`, `TERM_NUMBERS`, and `TERM_STRINGS`.
+"""
+struct TermCapability
+    name::Symbol
+    capname::Symbol
+    description::String
+end
+
+#=run
+begin
+
+using Downloads
+
+version_info = IOBuffer()
+standard_caps = IOBuffer()
+user_caps = IOBuffer()
+
+Downloads.download("https://raw.githubusercontent.com/mirror/ncurses/master/VERSION", version_info)
+Downloads.download("https://raw.githubusercontent.com/mirror/ncurses/master/include/Caps", standard_caps)
+Downloads.download("https://raw.githubusercontent.com/mirror/ncurses/master/include/Caps-ncurses", user_caps)
+
+const TERM_FLAGS   = NTuple{3, String}[]
+const TERM_NUMBERS = NTuple{3, String}[]
+const TERM_STRINGS = NTuple{3, String}[]
+const TERM_USER    = NTuple{3, String}[]
+
+_, ncurses_version, ncurses_date = split(read(seekstart(version_info), String))
+
+for line in eachline(seekstart(standard_caps))
+    startswith(line, '#') && continue
+    components = split(line, '\t', keepempty=false)
+    if length(components) ∉ 8:9
+        @warn "Malformed line: $(sprint(show, line))"
+        continue
+    end
+    name, shortcode, type, _, _, _, _, description, _... = components
+    caplist = if type == "bool" TERM_FLAGS
+    elseif type == "num" TERM_NUMBERS
+    elseif type == "str" TERM_STRINGS
+    else
+        @warn "Unrecognised capability type: $type"
+        continue
+    end
+    push!(caplist, (name, shortcode, description))
+end
+
+for line in eachline(seekstart(user_caps))
+    startswith(line, '#') && continue
+    !startswith(line, "userdef") && continue
+    line = line[1+ncodeunits("userdef "):end]
+    components = split(line, '\t', keepempty=false)
+    if length(components) ∉ 4:5
+        @warn "Malformed line: $(sprint(show, line))"
+        continue
+    end
+    code, type, _, description, _... = components
+    if code == "xm"
+        components[3] == "-" || continue
+        description = "mouse response"
+    end
+    dtype = get(Dict("bool" => "Bool", "num" => "Int", "str" => "String"), type, nothing)
+    if isnothing(dtype)
+        @warn "Unrecognised data type: $type"
+        continue
+    end
+    push!(TERM_USER, (dtype, code, description))
+end
+
+push!(TERM_USER, ("Bool", "Tc", "tmux extension to indicate 24-bit truecolor support"))
+push!(TERM_USER, ("Bool", "Su", "kitty extension to indicate styled underline support"))
+
+const SENTINEL = "\n## GENERATED CODE BEYOND THIS POINT ##"
+const PREAMBLE = readuntil(__FILE__, SENTINEL, keep=true)
+
+out = IOBuffer()
+write(out, PREAMBLE, "\n\n# Terminfo Capabilities as of NCurses $ncurses_version-$ncurses_date\n",
+      "const NCURSES_VERSION = v\"$ncurses_version.$ncurses_date\"\n")
+
+for (ftype, list) in [("flag", TERM_FLAGS), ("number", TERM_NUMBERS), ("string", TERM_STRINGS)]
+    print(out, "\n\"\"\"\n\
+          Ordered list of known terminal capability $ftype fields, as of NCurses $ncurses_version-$ncurses_date.\n\
+          \"\"\"\n\
+          const TERM_$(uppercase(ftype))S = [")
+    namepad = maximum(textwidth, getindex.(list, 1)) + 1
+    codepad = maximum(textwidth, getindex.(list, 2)) + 1
+    for (name, shortcode, description) in list
+        print(out, "\n    TermCapability(:", name, ',', ' '^(namepad - textwidth(name)),
+              ':', shortcode, ',', ' '^(codepad - textwidth(shortcode)),
+              '"', escape_string(description), "\"),")
+    end
+    println(out, "\n]")
+end
+
+function getcustomalias(allterms::Vector{NTuple{3, String}}, type, short, description)
+    specific_aliases = Dict{String, String}(
+        "smxx"  => ":enter_strikeout_mode",
+        "rmxx"  => ":exit_strikeout_mode",
+        "Smol"  => ":enter_overline_mode",
+        "Rmol"  => ":exit_overline_mode",
+        "Cs"    => ":set_cursor_color",
+        "Cr"    => ":reset_cursor_color",
+        "Ss"    => ":set_cursor_style",
+        "Se"    => ":reset_cursor_style",
+        "Smulx" => ":set_underline_style",
+        "Su"    => ":can_style_underline",
+        "csl"   => ":clear_status_line",
+        "Ms"    => ":set_host_clipboard",
+        "Tc"    => ":truecolor",
+        "XF"    => ":xterm_focus")
+    if startswith(short, 'k') && !occursin("keypad", description)
+        return ":key_" * replace(lowercase(description), r"[^a-z]" => '_')
+    end
+    return get(specific_aliases, short, "nothing")
+end
+
+print(out, "\n\"\"\"\nTerminfo extensions that NCurses $ncurses_version-$ncurses_date is aware of.\n\"\"\"",
+           "\nconst TERM_USER = Dict{Tuple{DataType, Symbol}, Union{Tuple{Nothing, String}, Tuple{Symbol, String}}}(")
+shortpad = maximum(textwidth, getindex.(TERM_USER, 2)) + 1
+for (type, short, description) in TERM_USER
+    print(out, "\n    ($(rpad(type * ',', 7)) :$short)", ' '^(shortpad - textwidth(short)),
+          "=> (", getcustomalias(TERM_USER, type, short, description), ", \"",
+          escape_string(description), "\"),")
+end
+println(out, "\n)")
+
+open(io -> write(io, seekstart(out)), __FILE__, "w")
+
+end
+=#
+
+## GENERATED CODE BEYOND THIS POINT ##
+
+# Terminfo Capabilities as of NCurses 6.4-20230311
+const NCURSES_VERSION = v"6.4.20230311"
+
+"""
+Ordered list of known terminal capability flag fields, as of NCurses 6.4-20230311.
+"""
+const TERM_FLAGS = [
+    TermCapability(:auto_left_margin,         :bw,    "cub1 wraps from column 0 to last column"),
+    TermCapability(:auto_right_margin,        :am,    "terminal has automatic margins"),
+    TermCapability(:no_esc_ctlc,              :xsb,   "beehive (f1=escape, f2=ctrl C)"),
+    TermCapability(:ceol_standout_glitch,     :xhp,   "standout not erased by overwriting (hp)"),
+    TermCapability(:eat_newline_glitch,       :xenl,  "newline ignored after 80 cols (concept)"),
+    TermCapability(:erase_overstrike,         :eo,    "can erase overstrikes with a blank"),
+    TermCapability(:generic_type,             :gn,    "generic line type"),
+    TermCapability(:hard_copy,                :hc,    "hardcopy terminal"),
+    TermCapability(:has_meta_key,             :km,    "Has a meta key (i.e., sets 8th-bit)"),
+    TermCapability(:has_status_line,          :hs,    "has extra status line"),
+    TermCapability(:insert_null_glitch,       :in,    "insert mode distinguishes nulls"),
+    TermCapability(:memory_above,             :da,    "display may be retained above the screen"),
+    TermCapability(:memory_below,             :db,    "display may be retained below the screen"),
+    TermCapability(:move_insert_mode,         :mir,   "safe to move while in insert mode"),
+    TermCapability(:move_standout_mode,       :msgr,  "safe to move while in standout mode"),
+    TermCapability(:over_strike,              :os,    "terminal can overstrike"),
+    TermCapability(:status_line_esc_ok,       :eslok, "escape can be used on the status line"),
+    TermCapability(:dest_tabs_magic_smso,     :xt,    "tabs destructive, magic so char (t1061)"),
+    TermCapability(:tilde_glitch,             :hz,    "cannot print ~'s (Hazeltine)"),
+    TermCapability(:transparent_underline,    :ul,    "underline character overstrikes"),
+    TermCapability(:xon_xoff,                 :xon,   "terminal uses xon/xoff handshaking"),
+    TermCapability(:needs_xon_xoff,           :nxon,  "padding will not work, xon/xoff required"),
+    TermCapability(:prtr_silent,              :mc5i,  "printer will not echo on screen"),
+    TermCapability(:hard_cursor,              :chts,  "cursor is hard to see"),
+    TermCapability(:non_rev_rmcup,            :nrrmc, "smcup does not reverse rmcup"),
+    TermCapability(:no_pad_char,              :npc,   "pad character does not exist"),
+    TermCapability(:non_dest_scroll_region,   :ndscr, "scrolling region is non-destructive"),
+    TermCapability(:can_change,               :ccc,   "terminal can re-define existing colors"),
+    TermCapability(:back_color_erase,         :bce,   "screen erased with background color"),
+    TermCapability(:hue_lightness_saturation, :hls,   "terminal uses only HLS color notation (Tektronix)"),
+    TermCapability(:col_addr_glitch,          :xhpa,  "only positive motion for hpa/mhpa caps"),
+    TermCapability(:cr_cancels_micro_mode,    :crxm,  "using cr turns off micro mode"),
+    TermCapability(:has_print_wheel,          :daisy, "printer needs operator to change character set"),
+    TermCapability(:row_addr_glitch,          :xvpa,  "only positive motion for vpa/mvpa caps"),
+    TermCapability(:semi_auto_right_margin,   :sam,   "printing in last column causes cr"),
+    TermCapability(:cpi_changes_res,          :cpix,  "changing character pitch changes resolution"),
+    TermCapability(:lpi_changes_res,          :lpix,  "changing line pitch changes resolution"),
+    TermCapability(:backspaces_with_bs,       :OTbs,  "uses ^H to move left"),
+    TermCapability(:crt_no_scrolling,         :OTns,  "crt cannot scroll"),
+    TermCapability(:no_correctly_working_cr,  :OTnc,  "no way to go to start of line"),
+    TermCapability(:gnu_has_meta_key,         :OTMT,  "has meta key"),
+    TermCapability(:linefeed_is_newline,      :OTNL,  "move down with \\n"),
+    TermCapability(:has_hardware_tabs,        :OTpt,  "has 8-char tabs invoked with ^I"),
+    TermCapability(:return_does_clr_eol,      :OTxr,  "return clears the line"),
+]
+
+"""
+Ordered list of known terminal capability number fields, as of NCurses 6.4-20230311.
+"""
+const TERM_NUMBERS = [
+    TermCapability(:columns,                 :cols,   "number of columns in a line"),
+    TermCapability(:init_tabs,               :it,     "tabs initially every # spaces"),
+    TermCapability(:lines,                   :lines,  "number of lines on screen or page"),
+    TermCapability(:lines_of_memory,         :lm,     "lines of memory if > line. 0 means varies"),
+    TermCapability(:magic_cookie_glitch,     :xmc,    "number of blank characters left by smso or rmso"),
+    TermCapability(:padding_baud_rate,       :pb,     "lowest baud rate where padding needed"),
+    TermCapability(:virtual_terminal,        :vt,     "virtual terminal number (CB/unix)"),
+    TermCapability(:width_status_line,       :wsl,    "number of columns in status line"),
+    TermCapability(:num_labels,              :nlab,   "number of labels on screen"),
+    TermCapability(:label_height,            :lh,     "rows in each label"),
+    TermCapability(:label_width,             :lw,     "columns in each label"),
+    TermCapability(:max_attributes,          :ma,     "maximum combined attributes terminal can handle"),
+    TermCapability(:maximum_windows,         :wnum,   "maximum number of definable windows"),
+    TermCapability(:max_colors,              :colors, "maximum number of colors on screen"),
+    TermCapability(:max_pairs,               :pairs,  "maximum number of color-pairs on the screen"),
+    TermCapability(:no_color_video,          :ncv,    "video attributes that cannot be used with colors"),
+    TermCapability(:buffer_capacity,         :bufsz,  "numbers of bytes buffered before printing"),
+    TermCapability(:dot_vert_spacing,        :spinv,  "spacing of pins vertically in pins per inch"),
+    TermCapability(:dot_horz_spacing,        :spinh,  "spacing of dots horizontally in dots per inch"),
+    TermCapability(:max_micro_address,       :maddr,  "maximum value in micro_..._address"),
+    TermCapability(:max_micro_jump,          :mjump,  "maximum value in parm_..._micro"),
+    TermCapability(:micro_col_size,          :mcs,    "character step size when in micro mode"),
+    TermCapability(:micro_line_size,         :mls,    "line step size when in micro mode"),
+    TermCapability(:number_of_pins,          :npins,  "numbers of pins in print-head"),
+    TermCapability(:output_res_char,         :orc,    "horizontal resolution in units per line"),
+    TermCapability(:output_res_line,         :orl,    "vertical resolution in units per line"),
+    TermCapability(:output_res_horz_inch,    :orhi,   "horizontal resolution in units per inch"),
+    TermCapability(:output_res_vert_inch,    :orvi,   "vertical resolution in units per inch"),
+    TermCapability(:print_rate,              :cps,    "print rate in characters per second"),
+    TermCapability(:wide_char_size,          :widcs,  "character step size when in double wide mode"),
+    TermCapability(:buttons,                 :btns,   "number of buttons on mouse"),
+    TermCapability(:bit_image_entwining,     :bitwin, "number of passes for each bit-image row"),
+    TermCapability(:bit_image_type,          :bitype, "type of bit-image device"),
+    TermCapability(:magic_cookie_glitch_ul,  :OTug,   "number of blanks left by ul"),
+    TermCapability(:carriage_return_delay,   :OTdC,   "pad needed for CR"),
+    TermCapability(:new_line_delay,          :OTdN,   "pad needed for LF"),
+    TermCapability(:backspace_delay,         :OTdB,   "padding required for ^H"),
+    TermCapability(:horizontal_tab_delay,    :OTdT,   "padding required for ^I"),
+    TermCapability(:number_of_function_keys, :OTkn,   "count of function keys"),
+]
+
+"""
+Ordered list of known terminal capability string fields, as of NCurses 6.4-20230311.
+"""
+const TERM_STRINGS = [
+    TermCapability(:back_tab,                  :cbt,      "back tab (P)"),
+    TermCapability(:bell,                      :bel,      "audible signal (bell) (P)"),
+    TermCapability(:carriage_return,           :cr,       "carriage return (P*) (P*)"),
+    TermCapability(:change_scroll_region,      :csr,      "change region to line #1 to line #2 (P)"),
+    TermCapability(:clear_all_tabs,            :tbc,      "clear all tab stops (P)"),
+    TermCapability(:clear_screen,              :clear,    "clear screen and home cursor (P*)"),
+    TermCapability(:clr_eol,                   :el,       "clear to end of line (P)"),
+    TermCapability(:clr_eos,                   :ed,       "clear to end of screen (P*)"),
+    TermCapability(:column_address,            :hpa,      "horizontal position #1, absolute (P)"),
+    TermCapability(:command_character,         :cmdch,    "terminal settable cmd character in prototype !?"),
+    TermCapability(:cursor_address,            :cup,      "move to row #1 columns #2"),
+    TermCapability(:cursor_down,               :cud1,     "down one line"),
+    TermCapability(:cursor_home,               :home,     "home cursor (if no cup)"),
+    TermCapability(:cursor_invisible,          :civis,    "make cursor invisible"),
+    TermCapability(:cursor_left,               :cub1,     "move left one space"),
+    TermCapability(:cursor_mem_address,        :mrcup,    "memory relative cursor addressing, move to row #1 columns #2"),
+    TermCapability(:cursor_normal,             :cnorm,    "make cursor appear normal (undo civis/cvvis)"),
+    TermCapability(:cursor_right,              :cuf1,     "non-destructive space (move right one space)"),
+    TermCapability(:cursor_to_ll,              :ll,       "last line, first column (if no cup)"),
+    TermCapability(:cursor_up,                 :cuu1,     "up one line"),
+    TermCapability(:cursor_visible,            :cvvis,    "make cursor very visible"),
+    TermCapability(:delete_character,          :dch1,     "delete character (P*)"),
+    TermCapability(:delete_line,               :dl1,      "delete line (P*)"),
+    TermCapability(:dis_status_line,           :dsl,      "disable status line"),
+    TermCapability(:down_half_line,            :hd,       "half a line down"),
+    TermCapability(:enter_alt_charset_mode,    :smacs,    "start alternate character set (P)"),
+    TermCapability(:enter_blink_mode,          :blink,    "turn on blinking"),
+    TermCapability(:enter_bold_mode,           :bold,     "turn on bold (extra bright) mode"),
+    TermCapability(:enter_ca_mode,             :smcup,    "string to start programs using cup"),
+    TermCapability(:enter_delete_mode,         :smdc,     "enter delete mode"),
+    TermCapability(:enter_dim_mode,            :dim,      "turn on half-bright mode"),
+    TermCapability(:enter_insert_mode,         :smir,     "enter insert mode"),
+    TermCapability(:enter_secure_mode,         :invis,    "turn on blank mode (characters invisible)"),
+    TermCapability(:enter_protected_mode,      :prot,     "turn on protected mode"),
+    TermCapability(:enter_reverse_mode,        :rev,      "turn on reverse video mode"),
+    TermCapability(:enter_standout_mode,       :smso,     "begin standout mode"),
+    TermCapability(:enter_underline_mode,      :smul,     "begin underline mode"),
+    TermCapability(:erase_chars,               :ech,      "erase #1 characters (P)"),
+    TermCapability(:exit_alt_charset_mode,     :rmacs,    "end alternate character set (P)"),
+    TermCapability(:exit_attribute_mode,       :sgr0,     "turn off all attributes"),
+    TermCapability(:exit_ca_mode,              :rmcup,    "strings to end programs using cup"),
+    TermCapability(:exit_delete_mode,          :rmdc,     "end delete mode"),
+    TermCapability(:exit_insert_mode,          :rmir,     "exit insert mode"),
+    TermCapability(:exit_standout_mode,        :rmso,     "exit standout mode"),
+    TermCapability(:exit_underline_mode,       :rmul,     "exit underline mode"),
+    TermCapability(:flash_screen,              :flash,    "visible bell (may not move cursor)"),
+    TermCapability(:form_feed,                 :ff,       "hardcopy terminal page eject (P*)"),
+    TermCapability(:from_status_line,          :fsl,      "return from status line"),
+    TermCapability(:init_1string,              :is1,      "initialization string"),
+    TermCapability(:init_2string,              :is2,      "initialization string"),
+    TermCapability(:init_3string,              :is3,      "initialization string"),
+    TermCapability(:init_file,                 :if,       "name of initialization file"),
+    TermCapability(:insert_character,          :ich1,     "insert character (P)"),
+    TermCapability(:insert_line,               :il1,      "insert line (P*)"),
+    TermCapability(:insert_padding,            :ip,       "insert padding after inserted character"),
+    TermCapability(:key_backspace,             :kbs,      "backspace key"),
+    TermCapability(:key_catab,                 :ktbc,     "clear-all-tabs key"),
+    TermCapability(:key_clear,                 :kclr,     "clear-screen or erase key"),
+    TermCapability(:key_ctab,                  :kctab,    "clear-tab key"),
+    TermCapability(:key_dc,                    :kdch1,    "delete-character key"),
+    TermCapability(:key_dl,                    :kdl1,     "delete-line key"),
+    TermCapability(:key_down,                  :kcud1,    "down-arrow key"),
+    TermCapability(:key_eic,                   :krmir,    "sent by rmir or smir in insert mode"),
+    TermCapability(:key_eol,                   :kel,      "clear-to-end-of-line key"),
+    TermCapability(:key_eos,                   :ked,      "clear-to-end-of-screen key"),
+    TermCapability(:key_f0,                    :kf0,      "F0 function key"),
+    TermCapability(:key_f1,                    :kf1,      "F1 function key"),
+    TermCapability(:key_f10,                   :kf10,     "F10 function key"),
+    TermCapability(:key_f2,                    :kf2,      "F2 function key"),
+    TermCapability(:key_f3,                    :kf3,      "F3 function key"),
+    TermCapability(:key_f4,                    :kf4,      "F4 function key"),
+    TermCapability(:key_f5,                    :kf5,      "F5 function key"),
+    TermCapability(:key_f6,                    :kf6,      "F6 function key"),
+    TermCapability(:key_f7,                    :kf7,      "F7 function key"),
+    TermCapability(:key_f8,                    :kf8,      "F8 function key"),
+    TermCapability(:key_f9,                    :kf9,      "F9 function key"),
+    TermCapability(:key_home,                  :khome,    "home key"),
+    TermCapability(:key_ic,                    :kich1,    "insert-character key"),
+    TermCapability(:key_il,                    :kil1,     "insert-line key"),
+    TermCapability(:key_left,                  :kcub1,    "left-arrow key"),
+    TermCapability(:key_ll,                    :kll,      "lower-left key (home down)"),
+    TermCapability(:key_npage,                 :knp,      "next-page key"),
+    TermCapability(:key_ppage,                 :kpp,      "previous-page key"),
+    TermCapability(:key_right,                 :kcuf1,    "right-arrow key"),
+    TermCapability(:key_sf,                    :kind,     "scroll-forward key"),
+    TermCapability(:key_sr,                    :kri,      "scroll-backward key"),
+    TermCapability(:key_stab,                  :khts,     "set-tab key"),
+    TermCapability(:key_up,                    :kcuu1,    "up-arrow key"),
+    TermCapability(:keypad_local,              :rmkx,     "leave 'keyboard_transmit' mode"),
+    TermCapability(:keypad_xmit,               :smkx,     "enter 'keyboard_transmit' mode"),
+    TermCapability(:lab_f0,                    :lf0,      "label on function key f0 if not f0"),
+    TermCapability(:lab_f1,                    :lf1,      "label on function key f1 if not f1"),
+    TermCapability(:lab_f10,                   :lf10,     "label on function key f10 if not f10"),
+    TermCapability(:lab_f2,                    :lf2,      "label on function key f2 if not f2"),
+    TermCapability(:lab_f3,                    :lf3,      "label on function key f3 if not f3"),
+    TermCapability(:lab_f4,                    :lf4,      "label on function key f4 if not f4"),
+    TermCapability(:lab_f5,                    :lf5,      "label on function key f5 if not f5"),
+    TermCapability(:lab_f6,                    :lf6,      "label on function key f6 if not f6"),
+    TermCapability(:lab_f7,                    :lf7,      "label on function key f7 if not f7"),
+    TermCapability(:lab_f8,                    :lf8,      "label on function key f8 if not f8"),
+    TermCapability(:lab_f9,                    :lf9,      "label on function key f9 if not f9"),
+    TermCapability(:meta_off,                  :rmm,      "turn off meta mode"),
+    TermCapability(:meta_on,                   :smm,      "turn on meta mode (8th-bit on)"),
+    TermCapability(:newline,                   :nel,      "newline (behave like cr followed by lf)"),
+    TermCapability(:pad_char,                  :pad,      "padding char (instead of null)"),
+    TermCapability(:parm_dch,                  :dch,      "delete #1 characters (P*)"),
+    TermCapability(:parm_delete_line,          :dl,       "delete #1 lines (P*)"),
+    TermCapability(:parm_down_cursor,          :cud,      "down #1 lines (P*)"),
+    TermCapability(:parm_ich,                  :ich,      "insert #1 characters (P*)"),
+    TermCapability(:parm_index,                :indn,     "scroll forward #1 lines (P)"),
+    TermCapability(:parm_insert_line,          :il,       "insert #1 lines (P*)"),
+    TermCapability(:parm_left_cursor,          :cub,      "move #1 characters to the left (P)"),
+    TermCapability(:parm_right_cursor,         :cuf,      "move #1 characters to the right (P*)"),
+    TermCapability(:parm_rindex,               :rin,      "scroll back #1 lines (P)"),
+    TermCapability(:parm_up_cursor,            :cuu,      "up #1 lines (P*)"),
+    TermCapability(:pkey_key,                  :pfkey,    "program function key #1 to type string #2"),
+    TermCapability(:pkey_local,                :pfloc,    "program function key #1 to execute string #2"),
+    TermCapability(:pkey_xmit,                 :pfx,      "program function key #1 to transmit string #2"),
+    TermCapability(:print_screen,              :mc0,      "print contents of screen"),
+    TermCapability(:prtr_off,                  :mc4,      "turn off printer"),
+    TermCapability(:prtr_on,                   :mc5,      "turn on printer"),
+    TermCapability(:repeat_char,               :rep,      "repeat char #1 #2 times (P*)"),
+    TermCapability(:reset_1string,             :rs1,      "reset string"),
+    TermCapability(:reset_2string,             :rs2,      "reset string"),
+    TermCapability(:reset_3string,             :rs3,      "reset string"),
+    TermCapability(:reset_file,                :rf,       "name of reset file"),
+    TermCapability(:restore_cursor,            :rc,       "restore cursor to position of last save_cursor"),
+    TermCapability(:row_address,               :vpa,      "vertical position #1 absolute (P)"),
+    TermCapability(:save_cursor,               :sc,       "save current cursor position (P)"),
+    TermCapability(:scroll_forward,            :ind,      "scroll text up (P)"),
+    TermCapability(:scroll_reverse,            :ri,       "scroll text down (P)"),
+    TermCapability(:set_attributes,            :sgr,      "define video attributes #1-#9 (PG9)"),
+    TermCapability(:set_tab,                   :hts,      "set a tab in every row, current columns"),
+    TermCapability(:set_window,                :wind,     "current window is lines #1-#2 cols #3-#4"),
+    TermCapability(:tab,                       :ht,       "tab to next 8-space hardware tab stop"),
+    TermCapability(:to_status_line,            :tsl,      "move to status line, column #1"),
+    TermCapability(:underline_char,            :uc,       "underline char and move past it"),
+    TermCapability(:up_half_line,              :hu,       "half a line up"),
+    TermCapability(:init_prog,                 :iprog,    "path name of program for initialization"),
+    TermCapability(:key_a1,                    :ka1,      "upper left of keypad"),
+    TermCapability(:key_a3,                    :ka3,      "upper right of keypad"),
+    TermCapability(:key_b2,                    :kb2,      "center of keypad"),
+    TermCapability(:key_c1,                    :kc1,      "lower left of keypad"),
+    TermCapability(:key_c3,                    :kc3,      "lower right of keypad"),
+    TermCapability(:prtr_non,                  :mc5p,     "turn on printer for #1 bytes"),
+    TermCapability(:char_padding,              :rmp,      "like ip but when in insert mode"),
+    TermCapability(:acs_chars,                 :acsc,     "graphics charset pairs, based on vt100"),
+    TermCapability(:plab_norm,                 :pln,      "program label #1 to show string #2"),
+    TermCapability(:key_btab,                  :kcbt,     "back-tab key"),
+    TermCapability(:enter_xon_mode,            :smxon,    "turn on xon/xoff handshaking"),
+    TermCapability(:exit_xon_mode,             :rmxon,    "turn off xon/xoff handshaking"),
+    TermCapability(:enter_am_mode,             :smam,     "turn on automatic margins"),
+    TermCapability(:exit_am_mode,              :rmam,     "turn off automatic margins"),
+    TermCapability(:xon_character,             :xonc,     "XON character"),
+    TermCapability(:xoff_character,            :xoffc,    "XOFF character"),
+    TermCapability(:ena_acs,                   :enacs,    "enable alternate char set"),
+    TermCapability(:label_on,                  :smln,     "turn on soft labels"),
+    TermCapability(:label_off,                 :rmln,     "turn off soft labels"),
+    TermCapability(:key_beg,                   :kbeg,     "begin key"),
+    TermCapability(:key_cancel,                :kcan,     "cancel key"),
+    TermCapability(:key_close,                 :kclo,     "close key"),
+    TermCapability(:key_command,               :kcmd,     "command key"),
+    TermCapability(:key_copy,                  :kcpy,     "copy key"),
+    TermCapability(:key_create,                :kcrt,     "create key"),
+    TermCapability(:key_end,                   :kend,     "end key"),
+    TermCapability(:key_enter,                 :kent,     "enter/send key"),
+    TermCapability(:key_exit,                  :kext,     "exit key"),
+    TermCapability(:key_find,                  :kfnd,     "find key"),
+    TermCapability(:key_help,                  :khlp,     "help key"),
+    TermCapability(:key_mark,                  :kmrk,     "mark key"),
+    TermCapability(:key_message,               :kmsg,     "message key"),
+    TermCapability(:key_move,                  :kmov,     "move key"),
+    TermCapability(:key_next,                  :knxt,     "next key"),
+    TermCapability(:key_open,                  :kopn,     "open key"),
+    TermCapability(:key_options,               :kopt,     "options key"),
+    TermCapability(:key_previous,              :kprv,     "previous key"),
+    TermCapability(:key_print,                 :kprt,     "print key"),
+    TermCapability(:key_redo,                  :krdo,     "redo key"),
+    TermCapability(:key_reference,             :kref,     "reference key"),
+    TermCapability(:key_refresh,               :krfr,     "refresh key"),
+    TermCapability(:key_replace,               :krpl,     "replace key"),
+    TermCapability(:key_restart,               :krst,     "restart key"),
+    TermCapability(:key_resume,                :kres,     "resume key"),
+    TermCapability(:key_save,                  :ksav,     "save key"),
+    TermCapability(:key_suspend,               :kspd,     "suspend key"),
+    TermCapability(:key_undo,                  :kund,     "undo key"),
+    TermCapability(:key_sbeg,                  :kBEG,     "shifted begin key"),
+    TermCapability(:key_scancel,               :kCAN,     "shifted cancel key"),
+    TermCapability(:key_scommand,              :kCMD,     "shifted command key"),
+    TermCapability(:key_scopy,                 :kCPY,     "shifted copy key"),
+    TermCapability(:key_screate,               :kCRT,     "shifted create key"),
+    TermCapability(:key_sdc,                   :kDC,      "shifted delete-character key"),
+    TermCapability(:key_sdl,                   :kDL,      "shifted delete-line key"),
+    TermCapability(:key_select,                :kslt,     "select key"),
+    TermCapability(:key_send,                  :kEND,     "shifted end key"),
+    TermCapability(:key_seol,                  :kEOL,     "shifted clear-to-end-of-line key"),
+    TermCapability(:key_sexit,                 :kEXT,     "shifted exit key"),
+    TermCapability(:key_sfind,                 :kFND,     "shifted find key"),
+    TermCapability(:key_shelp,                 :kHLP,     "shifted help key"),
+    TermCapability(:key_shome,                 :kHOM,     "shifted home key"),
+    TermCapability(:key_sic,                   :kIC,      "shifted insert-character key"),
+    TermCapability(:key_sleft,                 :kLFT,     "shifted left-arrow key"),
+    TermCapability(:key_smessage,              :kMSG,     "shifted message key"),
+    TermCapability(:key_smove,                 :kMOV,     "shifted move key"),
+    TermCapability(:key_snext,                 :kNXT,     "shifted next key"),
+    TermCapability(:key_soptions,              :kOPT,     "shifted options key"),
+    TermCapability(:key_sprevious,             :kPRV,     "shifted previous key"),
+    TermCapability(:key_sprint,                :kPRT,     "shifted print key"),
+    TermCapability(:key_sredo,                 :kRDO,     "shifted redo key"),
+    TermCapability(:key_sreplace,              :kRPL,     "shifted replace key"),
+    TermCapability(:key_sright,                :kRIT,     "shifted right-arrow key"),
+    TermCapability(:key_srsume,                :kRES,     "shifted resume key"),
+    TermCapability(:key_ssave,                 :kSAV,     "shifted save key"),
+    TermCapability(:key_ssuspend,              :kSPD,     "shifted suspend key"),
+    TermCapability(:key_sundo,                 :kUND,     "shifted undo key"),
+    TermCapability(:req_for_input,             :rfi,      "send next input char (for ptys)"),
+    TermCapability(:key_f11,                   :kf11,     "F11 function key"),
+    TermCapability(:key_f12,                   :kf12,     "F12 function key"),
+    TermCapability(:key_f13,                   :kf13,     "F13 function key"),
+    TermCapability(:key_f14,                   :kf14,     "F14 function key"),
+    TermCapability(:key_f15,                   :kf15,     "F15 function key"),
+    TermCapability(:key_f16,                   :kf16,     "F16 function key"),
+    TermCapability(:key_f17,                   :kf17,     "F17 function key"),
+    TermCapability(:key_f18,                   :kf18,     "F18 function key"),
+    TermCapability(:key_f19,                   :kf19,     "F19 function key"),
+    TermCapability(:key_f20,                   :kf20,     "F20 function key"),
+    TermCapability(:key_f21,                   :kf21,     "F21 function key"),
+    TermCapability(:key_f22,                   :kf22,     "F22 function key"),
+    TermCapability(:key_f23,                   :kf23,     "F23 function key"),
+    TermCapability(:key_f24,                   :kf24,     "F24 function key"),
+    TermCapability(:key_f25,                   :kf25,     "F25 function key"),
+    TermCapability(:key_f26,                   :kf26,     "F26 function key"),
+    TermCapability(:key_f27,                   :kf27,     "F27 function key"),
+    TermCapability(:key_f28,                   :kf28,     "F28 function key"),
+    TermCapability(:key_f29,                   :kf29,     "F29 function key"),
+    TermCapability(:key_f30,                   :kf30,     "F30 function key"),
+    TermCapability(:key_f31,                   :kf31,     "F31 function key"),
+    TermCapability(:key_f32,                   :kf32,     "F32 function key"),
+    TermCapability(:key_f33,                   :kf33,     "F33 function key"),
+    TermCapability(:key_f34,                   :kf34,     "F34 function key"),
+    TermCapability(:key_f35,                   :kf35,     "F35 function key"),
+    TermCapability(:key_f36,                   :kf36,     "F36 function key"),
+    TermCapability(:key_f37,                   :kf37,     "F37 function key"),
+    TermCapability(:key_f38,                   :kf38,     "F38 function key"),
+    TermCapability(:key_f39,                   :kf39,     "F39 function key"),
+    TermCapability(:key_f40,                   :kf40,     "F40 function key"),
+    TermCapability(:key_f41,                   :kf41,     "F41 function key"),
+    TermCapability(:key_f42,                   :kf42,     "F42 function key"),
+    TermCapability(:key_f43,                   :kf43,     "F43 function key"),
+    TermCapability(:key_f44,                   :kf44,     "F44 function key"),
+    TermCapability(:key_f45,                   :kf45,     "F45 function key"),
+    TermCapability(:key_f46,                   :kf46,     "F46 function key"),
+    TermCapability(:key_f47,                   :kf47,     "F47 function key"),
+    TermCapability(:key_f48,                   :kf48,     "F48 function key"),
+    TermCapability(:key_f49,                   :kf49,     "F49 function key"),
+    TermCapability(:key_f50,                   :kf50,     "F50 function key"),
+    TermCapability(:key_f51,                   :kf51,     "F51 function key"),
+    TermCapability(:key_f52,                   :kf52,     "F52 function key"),
+    TermCapability(:key_f53,                   :kf53,     "F53 function key"),
+    TermCapability(:key_f54,                   :kf54,     "F54 function key"),
+    TermCapability(:key_f55,                   :kf55,     "F55 function key"),
+    TermCapability(:key_f56,                   :kf56,     "F56 function key"),
+    TermCapability(:key_f57,                   :kf57,     "F57 function key"),
+    TermCapability(:key_f58,                   :kf58,     "F58 function key"),
+    TermCapability(:key_f59,                   :kf59,     "F59 function key"),
+    TermCapability(:key_f60,                   :kf60,     "F60 function key"),
+    TermCapability(:key_f61,                   :kf61,     "F61 function key"),
+    TermCapability(:key_f62,                   :kf62,     "F62 function key"),
+    TermCapability(:key_f63,                   :kf63,     "F63 function key"),
+    TermCapability(:clr_bol,                   :el1,      "Clear to beginning of line"),
+    TermCapability(:clear_margins,             :mgc,      "clear right and left soft margins"),
+    TermCapability(:set_left_margin,           :smgl,     "set left soft margin at current column."),
+    TermCapability(:set_right_margin,          :smgr,     "set right soft margin at current column"),
+    TermCapability(:label_format,              :fln,      "label format"),
+    TermCapability(:set_clock,                 :sclk,     "set clock, #1 hrs #2 mins #3 secs"),
+    TermCapability(:display_clock,             :dclk,     "display clock"),
+    TermCapability(:remove_clock,              :rmclk,    "remove clock"),
+    TermCapability(:create_window,             :cwin,     "define a window #1 from #2,#3 to #4,#5"),
+    TermCapability(:goto_window,               :wingo,    "go to window #1"),
+    TermCapability(:hangup,                    :hup,      "hang-up phone"),
+    TermCapability(:dial_phone,                :dial,     "dial number #1"),
+    TermCapability(:quick_dial,                :qdial,    "dial number #1 without checking"),
+    TermCapability(:tone,                      :tone,     "select touch tone dialing"),
+    TermCapability(:pulse,                     :pulse,    "select pulse dialing"),
+    TermCapability(:flash_hook,                :hook,     "flash switch hook"),
+    TermCapability(:fixed_pause,               :pause,    "pause for 2-3 seconds"),
+    TermCapability(:wait_tone,                 :wait,     "wait for dial-tone"),
+    TermCapability(:user0,                     :u0,       "User string #0"),
+    TermCapability(:user1,                     :u1,       "User string #1"),
+    TermCapability(:user2,                     :u2,       "User string #2"),
+    TermCapability(:user3,                     :u3,       "User string #3"),
+    TermCapability(:user4,                     :u4,       "User string #4"),
+    TermCapability(:user5,                     :u5,       "User string #5"),
+    TermCapability(:user6,                     :u6,       "User string #6"),
+    TermCapability(:user7,                     :u7,       "User string #7"),
+    TermCapability(:user8,                     :u8,       "User string #8"),
+    TermCapability(:user9,                     :u9,       "User string #9"),
+    TermCapability(:orig_pair,                 :op,       "Set default pair to its original value"),
+    TermCapability(:orig_colors,               :oc,       "Set all color pairs to the original ones"),
+    TermCapability(:initialize_color,          :initc,    "initialize color #1 to (#2,#3,#4)"),
+    TermCapability(:initialize_pair,           :initp,    "Initialize color pair #1 to fg=(#2,#3,#4), bg=(#5,#6,#7)"),
+    TermCapability(:set_color_pair,            :scp,      "Set current color pair to #1"),
+    TermCapability(:set_foreground,            :setf,     "Set foreground color #1"),
+    TermCapability(:set_background,            :setb,     "Set background color #1"),
+    TermCapability(:change_char_pitch,         :cpi,      "Change number of characters per inch to #1"),
+    TermCapability(:change_line_pitch,         :lpi,      "Change number of lines per inch to #1"),
+    TermCapability(:change_res_horz,           :chr,      "Change horizontal resolution to #1"),
+    TermCapability(:change_res_vert,           :cvr,      "Change vertical resolution to #1"),
+    TermCapability(:define_char,               :defc,     "Define a character #1, #2 dots wide, descender #3"),
+    TermCapability(:enter_doublewide_mode,     :swidm,    "Enter double-wide mode"),
+    TermCapability(:enter_draft_quality,       :sdrfq,    "Enter draft-quality mode"),
+    TermCapability(:enter_italics_mode,        :sitm,     "Enter italic mode"),
+    TermCapability(:enter_leftward_mode,       :slm,      "Start leftward carriage motion"),
+    TermCapability(:enter_micro_mode,          :smicm,    "Start micro-motion mode"),
+    TermCapability(:enter_near_letter_quality, :snlq,     "Enter NLQ mode"),
+    TermCapability(:enter_normal_quality,      :snrmq,    "Enter normal-quality mode"),
+    TermCapability(:enter_shadow_mode,         :sshm,     "Enter shadow-print mode"),
+    TermCapability(:enter_subscript_mode,      :ssubm,    "Enter subscript mode"),
+    TermCapability(:enter_superscript_mode,    :ssupm,    "Enter superscript mode"),
+    TermCapability(:enter_upward_mode,         :sum,      "Start upward carriage motion"),
+    TermCapability(:exit_doublewide_mode,      :rwidm,    "End double-wide mode"),
+    TermCapability(:exit_italics_mode,         :ritm,     "End italic mode"),
+    TermCapability(:exit_leftward_mode,        :rlm,      "End left-motion mode"),
+    TermCapability(:exit_micro_mode,           :rmicm,    "End micro-motion mode"),
+    TermCapability(:exit_shadow_mode,          :rshm,     "End shadow-print mode"),
+    TermCapability(:exit_subscript_mode,       :rsubm,    "End subscript mode"),
+    TermCapability(:exit_superscript_mode,     :rsupm,    "End superscript mode"),
+    TermCapability(:exit_upward_mode,          :rum,      "End reverse character motion"),
+    TermCapability(:micro_column_address,      :mhpa,     "Like column_address in micro mode"),
+    TermCapability(:micro_down,                :mcud1,    "Like cursor_down in micro mode"),
+    TermCapability(:micro_left,                :mcub1,    "Like cursor_left in micro mode"),
+    TermCapability(:micro_right,               :mcuf1,    "Like cursor_right in micro mode"),
+    TermCapability(:micro_row_address,         :mvpa,     "Like row_address #1 in micro mode"),
+    TermCapability(:micro_up,                  :mcuu1,    "Like cursor_up in micro mode"),
+    TermCapability(:order_of_pins,             :porder,   "Match software bits to print-head pins"),
+    TermCapability(:parm_down_micro,           :mcud,     "Like parm_down_cursor in micro mode"),
+    TermCapability(:parm_left_micro,           :mcub,     "Like parm_left_cursor in micro mode"),
+    TermCapability(:parm_right_micro,          :mcuf,     "Like parm_right_cursor in micro mode"),
+    TermCapability(:parm_up_micro,             :mcuu,     "Like parm_up_cursor in micro mode"),
+    TermCapability(:select_char_set,           :scs,      "Select character set, #1"),
+    TermCapability(:set_bottom_margin,         :smgb,     "Set bottom margin at current line"),
+    TermCapability(:set_bottom_margin_parm,    :smgbp,    "Set bottom margin at line #1 or (if smgtp is not given) #2 lines from bottom"),
+    TermCapability(:set_left_margin_parm,      :smglp,    "Set left (right) margin at column #1"),
+    TermCapability(:set_right_margin_parm,     :smgrp,    "Set right margin at column #1"),
+    TermCapability(:set_top_margin,            :smgt,     "Set top margin at current line"),
+    TermCapability(:set_top_margin_parm,       :smgtp,    "Set top (bottom) margin at row #1"),
+    TermCapability(:start_bit_image,           :sbim,     "Start printing bit image graphics"),
+    TermCapability(:start_char_set_def,        :scsd,     "Start character set definition #1, with #2 characters in the set"),
+    TermCapability(:stop_bit_image,            :rbim,     "Stop printing bit image graphics"),
+    TermCapability(:stop_char_set_def,         :rcsd,     "End definition of character set #1"),
+    TermCapability(:subscript_characters,      :subcs,    "List of subscriptable characters"),
+    TermCapability(:superscript_characters,    :supcs,    "List of superscriptable characters"),
+    TermCapability(:these_cause_cr,            :docr,     "Printing any of these characters causes CR"),
+    TermCapability(:zero_motion,               :zerom,    "No motion for subsequent character"),
+    TermCapability(:char_set_names,            :csnm,     "Produce #1'th item from list of character set names"),
+    TermCapability(:key_mouse,                 :kmous,    "Mouse event has occurred"),
+    TermCapability(:mouse_info,                :minfo,    "Mouse status information"),
+    TermCapability(:req_mouse_pos,             :reqmp,    "Request mouse position"),
+    TermCapability(:get_mouse,                 :getm,     "Curses should get button events, parameter #1 not documented."),
+    TermCapability(:set_a_foreground,          :setaf,    "Set foreground color to #1, using ANSI escape"),
+    TermCapability(:set_a_background,          :setab,    "Set background color to #1, using ANSI escape"),
+    TermCapability(:pkey_plab,                 :pfxl,     "Program function key #1 to type string #2 and show string #3"),
+    TermCapability(:device_type,               :devt,     "Indicate language/codeset support"),
+    TermCapability(:code_set_init,             :csin,     "Init sequence for multiple codesets"),
+    TermCapability(:set0_des_seq,              :s0ds,     "Shift to codeset 0 (EUC set 0, ASCII)"),
+    TermCapability(:set1_des_seq,              :s1ds,     "Shift to codeset 1"),
+    TermCapability(:set2_des_seq,              :s2ds,     "Shift to codeset 2"),
+    TermCapability(:set3_des_seq,              :s3ds,     "Shift to codeset 3"),
+    TermCapability(:set_lr_margin,             :smglr,    "Set both left and right margins to #1, #2.  (ML is not in BSD termcap)."),
+    TermCapability(:set_tb_margin,             :smgtb,    "Sets both top and bottom margins to #1, #2"),
+    TermCapability(:bit_image_repeat,          :birep,    "Repeat bit image cell #1 #2 times"),
+    TermCapability(:bit_image_newline,         :binel,    "Move to next row of the bit image"),
+    TermCapability(:bit_image_carriage_return, :bicr,     "Move to beginning of same row"),
+    TermCapability(:color_names,               :colornm,  "Give name for color #1"),
+    TermCapability(:define_bit_image_region,   :defbi,    "Define rectangular bit image region"),
+    TermCapability(:end_bit_image_region,      :endbi,    "End a bit-image region"),
+    TermCapability(:set_color_band,            :setcolor, "Change to ribbon color #1"),
+    TermCapability(:set_page_length,           :slines,   "Set page length to #1 lines"),
+    TermCapability(:display_pc_char,           :dispc,    "Display PC character #1"),
+    TermCapability(:enter_pc_charset_mode,     :smpch,    "Enter PC character display mode"),
+    TermCapability(:exit_pc_charset_mode,      :rmpch,    "Exit PC character display mode"),
+    TermCapability(:enter_scancode_mode,       :smsc,     "Enter PC scancode mode"),
+    TermCapability(:exit_scancode_mode,        :rmsc,     "Exit PC scancode mode"),
+    TermCapability(:pc_term_options,           :pctrm,    "PC terminal options"),
+    TermCapability(:scancode_escape,           :scesc,    "Escape for scancode emulation"),
+    TermCapability(:alt_scancode_esc,          :scesa,    "Alternate escape for scancode emulation"),
+    TermCapability(:enter_horizontal_hl_mode,  :ehhlm,    "Enter horizontal highlight mode"),
+    TermCapability(:enter_left_hl_mode,        :elhlm,    "Enter left highlight mode"),
+    TermCapability(:enter_low_hl_mode,         :elohlm,   "Enter low highlight mode"),
+    TermCapability(:enter_right_hl_mode,       :erhlm,    "Enter right highlight mode"),
+    TermCapability(:enter_top_hl_mode,         :ethlm,    "Enter top highlight mode"),
+    TermCapability(:enter_vertical_hl_mode,    :evhlm,    "Enter vertical highlight mode"),
+    TermCapability(:set_a_attributes,          :sgr1,     "Define second set of video attributes #1-#6"),
+    TermCapability(:set_pglen_inch,            :slength,  "Set page length to #1 hundredth of an inch (some implementations use sL for termcap)."),
+    TermCapability(:termcap_init2,             :OTi2,     "secondary initialization string"),
+    TermCapability(:termcap_reset,             :OTrs,     "terminal reset string"),
+    TermCapability(:linefeed_if_not_lf,        :OTnl,     "use to move down"),
+    TermCapability(:backspace_if_not_bs,       :OTbc,     "move left, if not ^H"),
+    TermCapability(:other_non_function_keys,   :OTko,     "list of self-mapped keycaps"),
+    TermCapability(:arrow_key_map,             :OTma,     "map motion-keys for vi version 2"),
+    TermCapability(:acs_ulcorner,              :OTG2,     "single upper left"),
+    TermCapability(:acs_llcorner,              :OTG3,     "single lower left"),
+    TermCapability(:acs_urcorner,              :OTG1,     "single upper right"),
+    TermCapability(:acs_lrcorner,              :OTG4,     "single lower right"),
+    TermCapability(:acs_ltee,                  :OTGR,     "tee pointing right"),
+    TermCapability(:acs_rtee,                  :OTGL,     "tee pointing left"),
+    TermCapability(:acs_btee,                  :OTGU,     "tee pointing up"),
+    TermCapability(:acs_ttee,                  :OTGD,     "tee pointing down"),
+    TermCapability(:acs_hline,                 :OTGH,     "single horizontal line"),
+    TermCapability(:acs_vline,                 :OTGV,     "single vertical line"),
+    TermCapability(:acs_plus,                  :OTGC,     "single intersection"),
+    TermCapability(:memory_lock,               :meml,     "lock memory above cursor"),
+    TermCapability(:memory_unlock,             :memu,     "unlock memory"),
+    TermCapability(:box_chars_1,               :box1,     "box characters primary set"),
+]
+
+"""
+Terminfo extensions that NCurses 6.4-20230311 is aware of.
+"""
+const TERM_USER = Dict{Tuple{DataType, Symbol}, Union{Tuple{Nothing, String}, Tuple{Symbol, String}}}(
+    (Int,    :CO )    => (nothing, "number of indexed colors overlaying RGB space"),
+    (String, :E3)     => (nothing, "clears the terminal's scrollback buffer."),
+    (Bool,   :NQ)     => (nothing, "terminal does not support query/response"),
+    (Bool,   :RGB)    => (nothing, "use direct colors with 1/3 of color-pair bits per color."),
+    (Int,    :RGB)    => (nothing, "use direct colors with given number of bits per color."),
+    (String, :RGB)    => (nothing, "use direct colors with given bit-layout."),
+    (String, :TS)     => (nothing, "like \"tsl\", but uses no parameter."),
+    (Int,    :U8)     => (nothing, "terminal does/does not support VT100 SI/SO when processing UTF-8 encoding."),
+    (String, :XM)     => (nothing, "initialize alternate xterm mouse mode"),
+    (String, :grbom)  => (nothing, "disable real bold (not intensity bright) mode."),
+    (String, :gsbom)  => (nothing, "enable real bold (not intensity bright) mode."),
+    (String, :xm)     => (nothing, "mouse response"),
+    (String, :Rmol)   => (:exit_overline_mode, "remove overline-mode"),
+    (String, :Smol)   => (:enter_overline_mode, "set overline-mode"),
+    (String, :blink2) => (nothing, "turn on rapid blinking"),
+    (String, :norm)   => (nothing, "turn off bold and half-bright mode"),
+    (String, :opaq)   => (nothing, "turn off blank mode"),
+    (String, :setal)  => (nothing, "set underline-color"),
+    (String, :smul2)  => (nothing, "begin double underline mode"),
+    (Bool,   :AN)     => (nothing, "turn on autonuke."),
+    (Bool,   :AX)     => (nothing, "understands ANSI set default fg/bg color (\\E[39m / \\E[49m)."),
+    (String, :C0)     => (nothing, "use the string as a conversion table for font '0', like acsc."),
+    (Bool,   :C8)     => (nothing, "terminal shows bold as high-intensity colors."),
+    (String, :CE)     => (nothing, "switch cursor-keys back to normal mode."),
+    (String, :CS)     => (nothing, "switch cursor-keys to application mode."),
+    (String, :E0)     => (nothing, "switch charset 'G0' back to standard charset. Default is '\\E(B'."),
+    (Bool,   :G0)     => (nothing, "terminal can deal with ISO 2022 font selection sequences."),
+    (String, :KJ)     => (nothing, "set the encoding of the terminal."),
+    (Int,    :OL)     => (nothing, "set the screen program's output buffer limit."),
+    (String, :S0)     => (nothing, "switch charset 'G0' to the specified charset. Default is '\\E(%.'."),
+    (Bool,   :TF)     => (nothing, "add missing capabilities to screen's termcap/info entry. (Set by default)."),
+    (String, :WS)     => (nothing, "resize display. This capability has the desired width and height as arguments. SunView(tm) example: '\\E[8;%d;%dt'."),
+    (String, :XC)     => (nothing, "describe a translation of characters to strings depending on the current font."),
+    (Bool,   :XT)     => (nothing, "terminal understands special xterm sequences (OSC, mouse tracking)."),
+    (String, :Z0)     => (nothing, "change width to 132 columns."),
+    (String, :Z1)     => (nothing, "change width to 80 columns."),
+    (String, :Cr)     => (:reset_cursor_color, "restore the default cursor color."),
+    (String, :Cs)     => (:set_cursor_color, "set the cursor color."),
+    (String, :Csr)    => (nothing, "change the cursor style, overriding Ss."),
+    (String, :Ms)     => (:set_host_clipboard, "store the current buffer in the host terminal's selection (clipboard)."),
+    (String, :Se)     => (:reset_cursor_style, "reset the cursor style to the terminal initial state."),
+    (String, :Smulx)  => (:set_underline_style, "modify the appearance of underlines in VTE."),
+    (String, :Ss)     => (:set_cursor_style, "change the cursor style."),
+    (String, :rmxx)   => (:exit_strikeout_mode, "reset ECMA-48 strikeout/crossed-out attributes."),
+    (String, :smxx)   => (:enter_strikeout_mode, "set ECMA-48 strikeout/crossed-out attributes."),
+    (String, :BD)     => (nothing, "disables bracketed paste"),
+    (String, :BE)     => (nothing, "enables bracketed paste"),
+    (String, :PE)     => (nothing, "is sent after pasted text"),
+    (String, :PS)     => (nothing, "is sent before pasted text"),
+    (String, :RV)     => (nothing, "report terminal secondary device attributes"),
+    (String, :XR)     => (nothing, "report terminal version as a free-format string."),
+    (Bool,   :XF)     => (:xterm_focus, "terminal supports xterm focus in/out"),
+    (String, :rv)     => (nothing, "response to RV, regular expression"),
+    (String, :xr)     => (nothing, "response to XR, regular expression"),
+    (String, :csl)    => (:clear_status_line, "clear status line"),
+    (String, :kDC3)   => (:key_alt_delete_character, "alt delete-character"),
+    (String, :kDC4)   => (:key_shift_alt_delete_character, "shift+alt delete-character"),
+    (String, :kDC5)   => (:key_control_delete_character, "control delete-character"),
+    (String, :kDC6)   => (:key_shift_control_delete_character, "shift+control delete-character"),
+    (String, :kDC7)   => (:key_alt_control_delete_character, "alt+control delete-character"),
+    (String, :kDN)    => (:key_shift_down_cursor, "shift down-cursor"),
+    (String, :kDN3)   => (:key_alt_down_cursor, "alt down-cursor"),
+    (String, :kDN4)   => (:key_shift_alt_down_cursor, "shift+alt down-cursor"),
+    (String, :kDN5)   => (:key_control_down_cursor, "control down-cursor"),
+    (String, :kDN6)   => (:key_shift_control_down_cursor, "shift+control down-cursor"),
+    (String, :kDN7)   => (:key_alt_control_down_cursor, "alt+control down-cursor"),
+    (String, :kEND3)  => (:key_alt_end, "alt end"),
+    (String, :kEND4)  => (:key_shift_alt_end, "shift+alt end"),
+    (String, :kEND5)  => (:key_control_end, "control end"),
+    (String, :kEND6)  => (:key_shift_control_end, "shift+control end"),
+    (String, :kEND7)  => (:key_alt_control_end, "alt+control end"),
+    (String, :kHOM3)  => (:key_alt_home, "alt home"),
+    (String, :kHOM4)  => (:key_shift_alt_home, "shift+alt home"),
+    (String, :kHOM5)  => (:key_control_home, "control home"),
+    (String, :kHOM6)  => (:key_shift_control_home, "shift+control home"),
+    (String, :kHOM7)  => (:key_alt_control_home, "alt+control home"),
+    (String, :kIC3)   => (:key_alt_insert_character, "alt insert-character"),
+    (String, :kIC4)   => (:key_shift_alt_insert_character, "shift+alt insert-character"),
+    (String, :kIC5)   => (:key_control_insert_character, "control insert-character"),
+    (String, :kIC6)   => (:key_shift_control_insert_character, "shift+control insert-character"),
+    (String, :kIC7)   => (:key_alt_control_insert_character, "alt+control insert-character"),
+    (String, :kLFT3)  => (:key_alt_left_cursor, "alt left-cursor"),
+    (String, :kLFT4)  => (:key_shift_alt_left_cursor, "shift+alt left-cursor"),
+    (String, :kLFT5)  => (:key_control_left_cursor, "control left-cursor"),
+    (String, :kLFT6)  => (:key_shift_control_left_cursor, "shift+control left-cursor"),
+    (String, :kLFT7)  => (:key_alt_control_left_cursor, "alt+control left-cursor"),
+    (String, :kNXT3)  => (:key_alt_next, "alt next"),
+    (String, :kNXT4)  => (:key_shift_alt_next, "shift+alt next"),
+    (String, :kNXT5)  => (:key_control_next, "control next"),
+    (String, :kNXT6)  => (:key_shift_control_next, "shift+control next"),
+    (String, :kNXT7)  => (:key_alt_control_next, "alt+control next"),
+    (String, :kPRV3)  => (:key_alt_previous, "alt previous"),
+    (String, :kPRV4)  => (:key_shift_alt_previous, "shift+alt previous"),
+    (String, :kPRV5)  => (:key_control_previous, "control previous"),
+    (String, :kPRV6)  => (:key_shift_control_previous, "shift+control previous"),
+    (String, :kPRV7)  => (:key_alt_control_previous, "alt+control previous"),
+    (String, :kRIT3)  => (:key_alt_right_cursor, "alt right-cursor"),
+    (String, :kRIT4)  => (:key_shift_alt_right_cursor, "shift+alt right-cursor"),
+    (String, :kRIT5)  => (:key_control_right_cursor, "control right-cursor"),
+    (String, :kRIT6)  => (:key_shift_control_right_cursor, "shift+control right-cursor"),
+    (String, :kRIT7)  => (:key_alt_control_right_cursor, "alt+control right-cursor"),
+    (String, :kUP)    => (:key_shift_up_cursor, "shift up-cursor"),
+    (String, :kUP3)   => (:key_alt_up_cursor, "alt up-cursor"),
+    (String, :kUP4)   => (:key_shift_alt_up_cursor, "shift+alt up-cursor"),
+    (String, :kUP5)   => (:key_control_up_cursor, "control up-cursor"),
+    (String, :kUP6)   => (:key_shift_control_up_cursor, "shift+control up-cursor"),
+    (String, :kUP7)   => (:key_alt_control_up_cursor, "alt+control up-cursor"),
+    (String, :ka2)    => (nothing, "vt220-keypad extensions"),
+    (String, :kb1)    => (nothing, "vt220-keypad extensions"),
+    (String, :kb3)    => (nothing, "vt220-keypad extensions"),
+    (String, :kc2)    => (nothing, "vt220-keypad extensions"),
+    (String, :kxIN)   => (:key_mouse_response_on_focus_in, "mouse response on focus-in"),
+    (String, :kxOUT)  => (:key_mouse_response_on_focus_out, "mouse response on focus-out"),
+    (Bool,   :Tc)     => (:truecolor, "tmux extension to indicate 24-bit truecolor support"),
+    (Bool,   :Su)     => (:can_style_underline, "kitty extension to indicate styled underline support"),
+)
diff --git a/base/threadcall.jl b/base/threadcall.jl
index 7548c5063671f..fbc1a87a20980 100644
--- a/base/threadcall.jl
+++ b/base/threadcall.jl
@@ -1,8 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 const max_ccall_threads = parse(Int, get(ENV, "UV_THREADPOOL_SIZE", "4"))
-const thread_notifiers = Union{Base.Condition, Nothing}[nothing for i in 1:max_ccall_threads]
+const thread_notifiers = Union{Event, Nothing}[nothing for i in 1:max_ccall_threads]
 const threadcall_restrictor = Semaphore(max_ccall_threads)
+const threadcall_lock = Threads.SpinLock()
 
 """
     @threadcall((cfunc, clib), rettype, (argtypes...), argvals...)
@@ -81,8 +82,11 @@ function do_threadcall(fun_ptr::Ptr{Cvoid}, cfptr::Ptr{Cvoid}, rettype::Type, ar
 
     # wait for a worker thread to be available
     acquire(threadcall_restrictor)
-    idx = findfirst(isequal(nothing), thread_notifiers)::Int
-    thread_notifiers[idx] = Base.Condition()
+    idx = -1
+    @lock threadcall_lock begin
+        idx = findfirst(isequal(nothing), thread_notifiers)::Int
+        thread_notifiers[idx] = Event()
+    end
 
     GC.@preserve args_arr ret_arr roots begin
         # queue up the work to be done
@@ -92,7 +96,9 @@ function do_threadcall(fun_ptr::Ptr{Cvoid}, cfptr::Ptr{Cvoid}, rettype::Type, ar
 
         # wait for a result & return it
         wait(thread_notifiers[idx])
-        thread_notifiers[idx] = nothing
+        @lock threadcall_lock begin
+            thread_notifiers[idx] = nothing
+        end
         release(threadcall_restrictor)
 
         r = unsafe_load(convert(Ptr{rettype}, pointer(ret_arr)))
diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl
index e4f14e26ac5a9..ee289cc788a55 100644
--- a/base/threadingconstructs.jl
+++ b/base/threadingconstructs.jl
@@ -3,11 +3,13 @@
 export threadid, nthreads, @threads, @spawn,
        threadpool, nthreadpools
 
+public Condition, threadpoolsize, ngcthreads
+
 """
-    Threads.threadid() -> Int
+    Threads.threadid([t::Task])::Int
 
-Get the ID number of the current thread of execution. The master thread has
-ID `1`.
+Get the ID number of the current thread of execution, or the thread of task
+`t`. The master thread has ID `1`.
 
 # Examples
 ```julia-repl
@@ -21,18 +23,21 @@ julia> Threads.@threads for i in 1:4
 2
 5
 4
+
+julia> Threads.threadid(Threads.@spawn "foo")
+2
 ```
 
 !!! note
     The thread that a task runs on may change if the task yields, which is known as [`Task Migration`](@ref man-task-migration).
-    For this reason in most cases it is not safe to use `threadid()` to index into, say, a vector of buffer or stateful objects.
-
+    For this reason in most cases it is not safe to use `threadid([task])` to index into, say, a vector of buffers or stateful
+    objects.
 """
 threadid() = Int(ccall(:jl_threadid, Int16, ())+1)
 
 # lower bound on the largest threadid()
 """
-    Threads.maxthreadid() -> Int
+    Threads.maxthreadid()::Int
 
 Get a lower bound on the number of threads (across all thread pools) available
 to the Julia process, with atomic-acquire semantics. The result will always be
@@ -42,10 +47,11 @@ any task you were able to observe before calling `maxthreadid`.
 maxthreadid() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cint), :acquire))
 
 """
-    Threads.nthreads(:default | :interactive) -> Int
+    Threads.nthreads(:default | :interactive)::Int
 
-Get the current number of threads within the specified thread pool. The threads in default
-have id numbers `1:nthreads(:default)`.
+Get the current number of threads within the specified thread pool. The threads in `:interactive`
+have id numbers `1:nthreads(:interactive)`, and the threads in `:default` have id numbers in
+`nthreads(:interactive) .+ (1:nthreads(:default))`.
 
 See also `BLAS.get_num_threads` and `BLAS.set_num_threads` in the [`LinearAlgebra`](@ref
 man-linalg) standard library, and `nprocs()` in the [`Distributed`](@ref man-distributed)
@@ -63,8 +69,10 @@ function _tpid_to_sym(tpid::Int8)
         return :interactive
     elseif tpid == 1
         return :default
+    elseif tpid == -1
+        return :foreign
     else
-        throw(ArgumentError("Unrecognized threadpool id $tpid"))
+        throw(ArgumentError(LazyString("Unrecognized threadpool id ", tpid)))
     end
 end
 
@@ -73,15 +81,17 @@ function _sym_to_tpid(tp::Symbol)
         return Int8(0)
     elseif tp === :default
         return Int8(1)
+    elseif tp == :foreign
+        return Int8(-1)
     else
-        throw(ArgumentError("Unrecognized threadpool name `$(repr(tp))`"))
+        throw(ArgumentError(LazyString("Unrecognized threadpool name `", tp, "`")))
     end
 end
 
 """
-    Threads.threadpool(tid = threadid()) -> Symbol
+    Threads.threadpool(tid = threadid())::Symbol
 
-Returns the specified thread's threadpool; either `:default` or `:interactive`.
+Return the specified thread's threadpool; either `:default`, `:interactive`, or `:foreign`.
 """
 function threadpool(tid = threadid())
     tpid = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1)
@@ -89,14 +99,32 @@ function threadpool(tid = threadid())
 end
 
 """
-    Threads.nthreadpools() -> Int
+    Threads.threadpooldescription(tid = threadid())::String
 
-Returns the number of threadpools currently configured.
+Return the specified thread's threadpool name with extended description where appropriate.
+"""
+function threadpooldescription(tid = threadid())
+    threadpool_name = threadpool(tid)
+    if threadpool_name == :foreign
+        # TODO: extend tls to include a field to add a description to a foreign thread and make this more general
+        n_others = nthreads(:interactive) + nthreads(:default)
+        # Assumes GC threads come first in the foreign thread pool
+        if tid > n_others && tid <= n_others + ngcthreads()
+            return "foreign: gc"
+        end
+    end
+    return string(threadpool_name)
+end
+
+"""
+    Threads.nthreadpools()::Int
+
+Return the number of threadpools currently configured.
 """
 nthreadpools() = Int(unsafe_load(cglobal(:jl_n_threadpools, Cint)))
 
 """
-    Threads.threadpoolsize(pool::Symbol = :default) -> Int
+    Threads.threadpoolsize(pool::Symbol = :default)::Int
 
 Get the number of threads available to the default thread pool (or to the
 specified thread pool).
@@ -108,6 +136,8 @@ See also: `BLAS.get_num_threads` and `BLAS.set_num_threads` in the
 function threadpoolsize(pool::Symbol = :default)
     if pool === :default || pool === :interactive
         tpid = _sym_to_tpid(pool)
+    elseif pool == :foreign
+        error("Threadpool size of `:foreign` is indeterminant")
     else
         error("invalid threadpool specified")
     end
@@ -117,7 +147,7 @@ end
 """
     threadpooltids(pool::Symbol)
 
-Returns a vector of IDs of threads in the given pool.
+Return a vector of IDs of threads in the given pool.
 """
 function threadpooltids(pool::Symbol)
     ni = _nthreads_in_pool(Int8(0))
@@ -131,9 +161,9 @@ function threadpooltids(pool::Symbol)
 end
 
 """
-    Threads.ngcthreads() -> Int
+    Threads.ngcthreads()::Int
 
-Returns the number of GC threads currently configured.
+Return the number of GC threads currently configured.
 This includes both mark threads and concurrent sweep threads.
 """
 ngcthreads() = Int(unsafe_load(cglobal(:jl_n_gcthreads, Cint))) + 1
@@ -146,7 +176,14 @@ function threading_run(fun, static)
     for i = 1:n
         t = Task(() -> fun(i)) # pass in tid
         t.sticky = static
-        static && ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid_offset + i-1)
+        if static
+            ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid_offset + i-1)
+        else
+            # TODO: this should be the current pool (except interactive) if there
+            # are ever more than two pools.
+            _result = ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, _sym_to_tpid(:default))
+            @assert _result == 1
+        end
         tasks[i] = t
         schedule(t)
     end
@@ -163,9 +200,46 @@ end
 function _threadsfor(iter, lbody, schedule)
     lidx = iter.args[1]         # index
     range = iter.args[2]
+    esc_range = esc(range)
+    func = if schedule === :greedy
+        greedy_func(esc_range, lidx, lbody)
+    else
+        default_func(esc_range, lidx, lbody)
+    end
     quote
         local threadsfor_fun
-        let range = $(esc(range))
+        $func
+        if $(schedule === :greedy || schedule === :dynamic || schedule === :default)
+            threading_run(threadsfor_fun, false)
+        elseif ccall(:jl_in_threaded_region, Cint, ()) != 0 # :static
+            error("`@threads :static` cannot be used concurrently or nested")
+        else # :static
+            threading_run(threadsfor_fun, true)
+        end
+        nothing
+    end
+end
+
+function greedy_func(itr, lidx, lbody)
+    quote
+        let c = Channel{eltype($itr)}(threadpoolsize(), spawn=true) do ch
+            for item in $itr
+                put!(ch, item)
+            end
+        end
+        function threadsfor_fun(tid)
+            for item in c
+                local $(esc(lidx)) = item
+                $(esc(lbody))
+            end
+        end
+        end
+    end
+end
+
+function default_func(itr, lidx, lbody)
+    quote
+        let range = $itr
         function threadsfor_fun(tid = 1; onethread = false)
             r = range # Load into local variable
             lenr = length(r)
@@ -203,14 +277,6 @@ function _threadsfor(iter, lbody, schedule)
             end
         end
         end
-        if $(schedule === :dynamic || schedule === :default)
-            threading_run(threadsfor_fun, false)
-        elseif ccall(:jl_in_threaded_region, Cint, ()) != 0 # :static
-            error("`@threads :static` cannot be used concurrently or nested")
-        else # :static
-            threading_run(threadsfor_fun, true)
-        end
-        nothing
     end
 end
 
@@ -221,8 +287,14 @@ A macro to execute a `for` loop in parallel. The iteration space is distributed
 coarse-grained tasks. This policy can be specified by the `schedule` argument. The
 execution of the loop waits for the evaluation of all iterations.
 
+Tasks spawned by `@threads` are scheduled on the `:default` threadpool. This means that
+`@threads` will not use threads from the `:interactive` threadpool, even if called from
+the main thread or from a task in the interactive pool. The `:default` threadpool is
+intended for compute-intensive parallel workloads.
+
 See also: [`@spawn`](@ref Threads.@spawn) and
 `pmap` in [`Distributed`](@ref man-distributed).
+For more information on threadpools, see the chapter on [threadpools](@ref man-threadpools).
 
 # Extended help
 
@@ -276,6 +348,20 @@ microseconds).
 !!! compat "Julia 1.8"
     The `:dynamic` option for the `schedule` argument is available and the default as of Julia 1.8.
 
+### `:greedy`
+
+`:greedy` scheduler spawns up to [`Threads.threadpoolsize()`](@ref) tasks, each greedily working on
+the given iterated values as they are produced. As soon as one task finishes its work, it takes
+the next value from the iterator. Work done by any individual task is not necessarily on
+contiguous values from the iterator. The given iterator may produce values forever, only the
+iterator interface is required (no indexing).
+
+This scheduling option is generally a good choice if the workload of individual iterations
+is not uniform/has a large spread.
+
+!!! compat "Julia 1.11"
+    The `:greedy` option for the `schedule` argument is available as of Julia 1.11.
+
 ### `:static`
 
 `:static` scheduler creates one task per thread and divides the iterations equally among
@@ -289,7 +375,7 @@ thread other than 1.
     In newly written library functions, `:static` scheduling is discouraged because the
     functions using this option cannot be called from arbitrary worker threads.
 
-## Example
+## Examples
 
 To illustrate of the different scheduling strategies, consider the following function
 `busywait` containing a non-yielding timed loop that runs for a given number of seconds.
@@ -331,7 +417,7 @@ macro threads(args...)
             # for now only allow quoted symbols
             sched = nothing
         end
-        if sched !== :static && sched !== :dynamic
+        if sched !== :static && sched !== :dynamic && sched !== :greedy
             throw(ArgumentError("unsupported schedule argument in @threads"))
         end
     elseif na == 1
@@ -351,18 +437,20 @@ end
 
 function _spawn_set_thrpool(t::Task, tp::Symbol)
     tpid = _sym_to_tpid(tp)
-    if _nthreads_in_pool(tpid) == 0
+    if tpid == -1 || _nthreads_in_pool(tpid) == 0
         tpid = _sym_to_tpid(:default)
     end
-    ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, tpid)
+    _result = ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, tpid)
+    @assert _result == 1
     nothing
 end
 
 """
-    Threads.@spawn [:default|:interactive] expr
+    Threads.@spawn [:default|:interactive|:samepool] expr
 
 Create a [`Task`](@ref) and [`schedule`](@ref) it to run on any available
-thread in the specified threadpool (`:default` if unspecified). The task is
+thread in the specified threadpool: `:default`, `:interactive`, or `:samepool`
+to use the same as the caller. `:default` is used if unspecified. The task is
 allocated to a thread once one becomes available. To wait for the task to
 finish, call [`wait`](@ref) on the result of this macro, or call
 [`fetch`](@ref) to wait and then obtain its return value.
@@ -387,6 +475,9 @@ the variable's value in the current task.
 !!! compat "Julia 1.9"
     A threadpool may be specified as of Julia 1.9.
 
+!!! compat "Julia 1.12"
+    The same threadpool may be specified as of Julia 1.12.
+
 # Examples
 ```julia-repl
 julia> t() = println("Hello from ", Threads.threadid());
@@ -405,8 +496,8 @@ macro spawn(args...)
         ttype, ex = args
         if ttype isa QuoteNode
             ttype = ttype.value
-            if ttype !== :interactive && ttype !== :default
-                throw(ArgumentError("unsupported threadpool in @spawn: $ttype"))
+            if !in(ttype, (:interactive, :default, :samepool))
+                throw(ArgumentError(LazyString("unsupported threadpool in @spawn: ", ttype)))
             end
             tp = QuoteNode(ttype)
         else
@@ -426,7 +517,11 @@ macro spawn(args...)
         let $(letargs...)
             local task = Task($thunk)
             task.sticky = false
-            _spawn_set_thrpool(task, $(esc(tp)))
+            local tp = $(esc(tp))
+            if tp == :samepool
+                tp = Threads.threadpool()
+            end
+            _spawn_set_thrpool(task, tp)
             if $(Expr(:islocal, var))
                 put!($var, task)
             end
diff --git a/base/threads.jl b/base/threads.jl
index 2d388cc4b9f77..bdd6677c5a955 100644
--- a/base/threads.jl
+++ b/base/threads.jl
@@ -8,7 +8,6 @@ module Threads
 global Condition # we'll define this later, make sure we don't import Base.Condition
 
 include("threadingconstructs.jl")
-include("atomics.jl")
 include("locks-mt.jl")
 
 end
diff --git a/base/timing.jl b/base/timing.jl
index 154951d031af5..d4c219049fd3d 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -1,35 +1,80 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# This type must be kept in sync with the C struct in src/gc.h
+# This type must be kept in sync with the C struct in src/gc-interface.h
 struct GC_Num
-    allocd          ::Int64 # GC internal
-    deferred_alloc  ::Int64 # GC internal
-    freed           ::Int64 # GC internal
-    malloc          ::Int64
-    realloc         ::Int64
-    poolalloc       ::Int64
-    bigalloc        ::Int64
-    freecall        ::Int64
-    total_time      ::Int64
-    total_allocd    ::Int64 # GC internal
-    collect         ::Csize_t # GC internal
-    pause           ::Cint
-    full_sweep      ::Cint
-    max_pause       ::Int64
-    max_memory      ::Int64
-    time_to_safepoint           ::Int64
-    max_time_to_safepoint       ::Int64
-    total_time_to_safepoint     ::Int64
-    sweep_time      ::Int64
-    mark_time       ::Int64
-    total_sweep_time  ::Int64
-    total_mark_time   ::Int64
-    last_full_sweep ::Int64
-    last_incremental_sweep ::Int64
+    # (GC Internal) Number of allocated bytes since the last collection. This field is reset
+    # after the end of every garbage collection cycle, so it will always be zero if observed
+    # during execution of Julia user code
+    allocd::Int64
+    # (GC Internal) Number of allocated bytes within a `gc_disable/gc_enable` block. This field is
+    # reset after every garbage collection cycle and will always be zero in case of no use
+    # of `gc_disable/gc_enable` blocks
+    deferred_alloc::Int64
+    # (GC Internal) Number of bytes freed bytes in the current collection cycle. This field is
+    # reset after every garbage collection cycle and will always be zero when observed
+    # during execution of Julia user code. It's incremented as memory is reclaimed during a collection,
+    # used to gather some statistics within the collection itself and reset at the end of a GC cycle.
+    freed::Int64
+    # Number of `malloc/calloc` calls (never reset by the runtime)
+    malloc::Int64
+    # Number of `realloc` calls (never reset by the runtime)
+    realloc::Int64
+    # Number of pool allocation calls (never reset by the runtime)
+    # NOTE: Julia's stock GC uses an internal (pool) allocator for objects up to 2032 bytes.
+    # Larger objects are allocated through `malloc/calloc`.
+    poolalloc::Int64
+    # Number of allocations for "big objects" (non-array objects larger than 2032 bytes)
+    # (never reset by the runtime)
+    bigalloc::Int64
+    # Number of `free` calls (never reset by the runtime)
+    freecall::Int64
+    # Total time spent in garbage collection (never reset by the runtime)
+    total_time::Int64
+    # (GC internal) Total number of bytes allocated since the program started
+    total_allocd::Int64
+    # (GC internal) Per-thread allocation quota before triggering a GC
+    # NOTE: This field is no longer used by the heuristics in the stock GC
+    interval::Csize_t
+    # Duration of the last GC pause in nanoseconds
+    pause::Cint
+    # Number of full GC sweeps completed so far (never reset by the runtime)
+    full_sweep::Cint
+    # Maximum pause duration observed so far in nanoseconds
+    max_pause::Int64
+    # Maximum number of bytes allocated any point in time.
+    # NOTE: This is aggregated over objects, not pages
+    max_memory::Int64
+    # Time taken to reach a safepoint in the last GC cycle in nanoseconds
+    time_to_safepoint::Int64
+    # Maximum time taken to reach a safepoint across all GCs in nanoseconds
+    max_time_to_safepoint::Int64
+    # Total time taken to reach safepoints across all GCs in nanoseconds
+    total_time_to_safepoint::Int64
+    # Time spent in the last GC sweeping phase in nanoseconds
+    sweep_time::Int64
+    # Time spent in the last GC marking phase in nanoseconds
+    mark_time::Int64
+    # Time spent sweeping stack pools in the last GC in nanoseconds
+    stack_pool_sweep_time::Int64
+    # Total time spent in sweeping phase across all GCs in nanoseconds
+    total_sweep_time::Int64
+    # Total time spent walking pool allocated pages during sweeping phase across all GCs in nanoseconds
+    total_sweep_page_walk_time::Int64
+    # Total time spent in madvise calls during sweeping phase across all GCs in nanoseconds
+    total_sweep_madvise_time::Int64
+    # Total time spent in freeing malloc'd memory during sweeping phase across all GCs in nanoseconds
+    total_sweep_free_mallocd_memory_time::Int64
+    # Total time spent in marking phase across all GCs in nanoseconds
+    total_mark_time::Int64
+    # Total time spent sweeping stack pools across all GCs in nanoseconds
+    total_stack_pool_sweep_time::Int64
+    # Timestamp of the last full GC sweep in nanoseconds
+    last_full_sweep::Int64
+    # Timestamp of the last incremental GC sweep in nanoseconds
+    last_incremental_sweep::Int64
 end
 
 gc_num() = ccall(:jl_gc_num, GC_Num, ())
-reset_gc_stats() = ccall(:jl_gc_reset_stats, Cvoid, ())
 
 # This type is to represent differences in the counters, so fields may be negative
 struct GC_Diff
@@ -48,7 +93,7 @@ gc_total_bytes(gc_num::GC_Num) =
     gc_num.allocd + gc_num.deferred_alloc + gc_num.total_allocd
 
 function GC_Diff(new::GC_Num, old::GC_Num)
-    # logic from `src/gc.c:jl_gc_total_bytes`
+    # logic from `jl_gc_total_bytes`
     old_allocd = gc_total_bytes(old)
     new_allocd = gc_total_bytes(new)
     return GC_Diff(new_allocd       - old_allocd,
@@ -98,6 +143,47 @@ function gc_live_bytes()
     Int(ccall(:jl_gc_live_bytes, Int64, ())) + num.allocd + num.deferred_alloc
 end
 
+# must be kept in sync with the value from `src/julia_threads.h``
+const JL_GC_N_MAX_POOLS = 51
+function gc_page_utilization_data()
+    page_utilization_raw = cglobal(:jl_gc_page_utilization_stats, Float64)
+    return Base.unsafe_wrap(Array, page_utilization_raw, JL_GC_N_MAX_POOLS, own=false)
+end
+
+# Full sweep reasons are currently only available for the stock GC
+@static if Base.USING_STOCK_GC
+# must be kept in sync with `src/gc-stock.h``
+const FULL_SWEEP_REASONS = [:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL, :FULL_SWEEP_REASON_FORCED_FULL_SWEEP,
+                            :FULL_SWEEP_REASON_USER_MAX_EXCEEDED, :FULL_SWEEP_REASON_LARGE_PROMOTION_RATE]
+end
+
+"""
+    Base.full_sweep_reasons()
+
+Return a dictionary of the number of times each full sweep reason has occurred.
+
+The reasons are:
+- `:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL`: Full sweep was caused due to `always_full` being set in the GC debug environment
+- `:FULL_SWEEP_REASON_FORCED_FULL_SWEEP`: Full sweep was forced by `GC.gc(true)`
+- `:FULL_SWEEP_REASON_USER_MAX_EXCEEDED`: Full sweep was forced due to the system reaching the heap soft size limit
+- `:FULL_SWEEP_REASON_LARGE_PROMOTION_RATE`: Full sweep was forced by a large promotion rate across GC generations
+
+Note that the set of reasons is not guaranteed to be stable across minor versions of Julia.
+"""
+function full_sweep_reasons()
+    d = Dict{Symbol, Int64}()
+    # populate the dictionary according to the reasons above for the stock GC
+    # otherwise return an empty dictionary for now
+    @static if Base.USING_STOCK_GC
+        reason = cglobal(:jl_full_sweep_reasons, UInt64)
+        reasons_as_array = Base.unsafe_wrap(Vector{UInt64}, reason, length(FULL_SWEEP_REASONS), own=false)
+        for (i, r) in enumerate(FULL_SWEEP_REASONS)
+            d[r] = reasons_as_array[i]
+        end
+    end
+    return d
+end
+
 """
     Base.jit_total_bytes()
 
@@ -128,21 +214,52 @@ function padded_nonzero_print(value, str, always_print = true)
     end
 end
 
-function format_bytes(bytes) # also used by InteractiveUtils
-    bytes, mb = prettyprint_getunits(bytes, length(_mem_units), Int64(1024))
+"""
+    format_bytes(bytes; binary=true)
+
+Format a given number of bytes into a human-readable string.
+
+# Arguments
+- `bytes`: The number of bytes to format.
+- `binary=true`: If `true`, formats the bytes in binary units (powers of 1024). If `false`, uses decimal units (powers of 1000).
+
+# Returns
+`String`: A human-readable string representation of the bytes, formatted in either binary or decimal units based on the `binary` argument.
+
+# Examples
+```jldoctest
+julia> Base.format_bytes(1024)
+"1024 bytes"
+
+julia> Base.format_bytes(10000)
+"9.766 KiB"
+
+julia> Base.format_bytes(10000, binary=false)
+"10.000 kB"
+```
+"""
+function format_bytes(bytes; binary=true) # also used by InteractiveUtils
+    units = binary ? _mem_units : _cnt_units
+    factor = binary ? 1024 : 1000
+    bytes, mb = prettyprint_getunits(bytes, length(units), Int64(factor))
     if mb == 1
         return string(Int(bytes), " ", _mem_units[mb], bytes==1 ? "" : "s")
     else
-        return string(Ryu.writefixed(Float64(bytes), 3), " ", _mem_units[mb])
+        return string(Ryu.writefixed(Float64(bytes), 3), binary ? " $(units[mb])" : "$(units[mb])B")
     end
 end
 
-function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true)
+function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, newline=false;
+                    msg::Union{String,Nothing}=nothing)
     timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
     str = sprint() do io
-        _lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
+        if msg isa String
+            print(io, msg, ": ")
+        else
+            print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
+        end
         print(io, timestr, " seconds")
-        parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
+        parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0
         parens && print(io, " (")
         if bytes != 0 || allocs != 0
             allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
@@ -159,10 +276,17 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, compile_ti
             end
             print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
         end
-        if compile_time > 0
+        if lock_conflicts > 0
             if bytes != 0 || allocs != 0 || gctime > 0
                 print(io, ", ")
             end
+            plural = lock_conflicts == 1 ? "" : "s"
+            print(io, lock_conflicts, " lock conflict$plural")
+        end
+        if compile_time > 0
+            if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0
+                print(io, ", ")
+            end
             print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
         end
         if recompile_time > 0
@@ -177,11 +301,11 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, compile_ti
     nothing
 end
 
-function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad)
+function timev_print(elapsedtime, diff::GC_Diff, lock_conflicts, compile_times; msg::Union{String,Nothing}=nothing)
     allocs = gc_alloc_count(diff)
     compile_time = first(compile_times)
     recompile_time = last(compile_times)
-    time_print(stdout, elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad)
+    time_print(stdout, elapsedtime, diff.allocd, diff.total_time, allocs, lock_conflicts, compile_time, recompile_time, true; msg)
     padded_nonzero_print(elapsedtime,       "elapsed time (ns)")
     padded_nonzero_print(diff.total_time,   "gc time (ns)")
     padded_nonzero_print(diff.allocd,       "bytes allocated")
@@ -213,7 +337,8 @@ end
 A macro to execute an expression, printing the time it took to execute, the number of
 allocations, and the total number of bytes its execution caused to be allocated, before
 returning the value of the expression. Any time spent garbage collecting (gc), compiling
-new code, or recompiling invalidated code is shown as a percentage.
+new code, or recompiling invalidated code is shown as a percentage. Any lock conflicts
+where a [`ReentrantLock`](@ref) had to wait are shown as a count.
 
 Optionally provide a description string to print before the time report.
 
@@ -234,6 +359,9 @@ See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@
 
     Recompilation time being shown separately from compilation time was introduced in Julia 1.8
 
+!!! compat "Julia 1.11"
+    The reporting of any lock conflicts was added in Julia 1.11.
+
 ```julia-repl
 julia> x = rand(10,10);
 
@@ -268,22 +396,11 @@ macro time(ex)
 end
 macro time(msg, ex)
     quote
-        Experimental.@force_compile
-        local stats = gc_num()
-        local elapsedtime = time_ns()
-        cumulative_compile_timing(true)
-        local compile_elapsedtimes = cumulative_compile_time_ns()
-        local val = @__tryfinally($(esc(ex)),
-            (elapsedtime = time_ns() - elapsedtime;
-            cumulative_compile_timing(false);
-            compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
-        )
-        local diff = GC_Diff(gc_num(), stats)
+        local ret = @timed $(esc(ex))
         local _msg = $(esc(msg))
-        local has_msg = !isnothing(_msg)
-        has_msg && print(_msg, ": ")
-        time_print(stdout, elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg)
-        val
+        local _msg_str = _msg === nothing ? _msg : string(_msg)
+        time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, true; msg=_msg_str)
+        ret.value
     end
 end
 
@@ -352,22 +469,11 @@ macro timev(ex)
 end
 macro timev(msg, ex)
     quote
-        Experimental.@force_compile
-        local stats = gc_num()
-        local elapsedtime = time_ns()
-        cumulative_compile_timing(true)
-        local compile_elapsedtimes = cumulative_compile_time_ns()
-        local val = @__tryfinally($(esc(ex)),
-            (elapsedtime = time_ns() - elapsedtime;
-            cumulative_compile_timing(false);
-            compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
-        )
-        local diff = GC_Diff(gc_num(), stats)
+        local ret = @timed $(esc(ex))
         local _msg = $(esc(msg))
-        local has_msg = !isnothing(_msg)
-        has_msg && print(_msg, ": ")
-        timev_print(elapsedtime, diff, compile_elapsedtimes, !has_msg)
-        val
+        local _msg_str = _msg === nothing ? _msg : string(_msg)
+        timev_print(ret.time*1e9, ret.gcstats, ret.lock_conflicts, (ret.compile_time*1e9, ret.recompile_time*1e9); msg=_msg_str)
+        ret.value
     end
 end
 
@@ -407,12 +513,93 @@ function gc_bytes()
     b[]
 end
 
+@constprop :none function allocated(f, args::Vararg{Any,N}) where {N}
+    b0 = Ref{Int64}(0)
+    b1 = Ref{Int64}(0)
+    Base.gc_bytes(b0)
+    @noinline f(args...)
+    Base.gc_bytes(b1)
+    return b1[] - b0[]
+end
+only(methods(allocated)).called = 0xff
+
+@constprop :none function allocations(f, args::Vararg{Any,N}) where {N}
+    stats = Base.gc_num()
+    @noinline f(args...)
+    diff = Base.GC_Diff(Base.gc_num(), stats)
+    return Base.gc_alloc_count(diff)
+end
+only(methods(allocations)).called = 0xff
+
+function is_simply_call(@nospecialize ex)
+    is_simple_atom(a) = a isa QuoteNode || a isa Symbol || !isa_ast_node(a)
+    Meta.isexpr(ex, :call) || return false
+    for a in ex.args
+        is_simple_atom(a) && continue
+        Meta.isexpr(a, :..., 1) && is_simple_atom(a.args[1]) && continue
+        return false
+    end
+    return true
+end
+
+function _gen_allocation_measurer(ex, fname::Symbol)
+    if isexpr(ex, :call)
+        if !is_simply_call(ex)
+            ex = :((() -> $ex)())
+        end
+        pushfirst!(ex.args, GlobalRef(Base, fname))
+        return quote
+            Experimental.@force_compile
+            $(esc(ex))
+        end
+    elseif fname === :allocated
+        # v1.11-compatible implementation
+        return quote
+            Experimental.@force_compile
+            local b0 = Ref{Int64}(0)
+            local b1 = Ref{Int64}(0)
+            gc_bytes(b0)
+            $(esc(ex))
+            gc_bytes(b1)
+            b1[] - b0[]
+        end
+    else
+        @assert fname === :allocations
+        return quote
+            Experimental.@force_compile
+            # Note this value is unused, but without it `allocated` and `allocations`
+            # are sufficiently different that the compiler can remove allocations here
+            # that it cannot remove there, giving inconsistent numbers.
+            local b1 = Ref{Int64}(0)
+            local stats = Base.gc_num()
+            $(esc(ex))
+            local diff = Base.GC_Diff(Base.gc_num(), stats)
+            gc_bytes(b1)
+            Base.gc_alloc_count(diff)
+        end
+    end
+end
+
 """
     @allocated
 
 A macro to evaluate an expression, discarding the resulting value, instead returning the
 total number of bytes allocated during evaluation of the expression.
 
+If the expression is a function call, an effort is made to measure only allocations from
+the argument expressions and during the function, excluding any overhead from calling it
+and not performing constant propagation with the provided argument values. If you want to
+include those effects, i.e. measuring the call site as well, use the syntax
+`@allocated (()->f(1))()`.
+
+It is recommended to measure function calls with only simple argument expressions, e.g.
+`x = []; @allocated f(x)` instead of `@allocated f([])` to clarify that only `f` is
+being measured.
+
+For more complex expressions, the code is simply run in place and therefore may see
+allocations due to the surrounding context. For example it is possible for
+`@allocated f(1)` and `@allocated x = f(1)` to give different results.
+
 See also [`@allocations`](@ref), [`@time`](@ref), [`@timev`](@ref), [`@timed`](@ref),
 and [`@elapsed`](@ref).
 
@@ -422,15 +609,7 @@ julia> @allocated rand(10^6)
 ```
 """
 macro allocated(ex)
-    quote
-        Experimental.@force_compile
-        local b0 = Ref{Int64}(0)
-        local b1 = Ref{Int64}(0)
-        gc_bytes(b0)
-        $(esc(ex))
-        gc_bytes(b1)
-        b1[] - b0[]
-    end
+    _gen_allocation_measurer(ex, :allocated)
 end
 
 """
@@ -451,28 +630,61 @@ julia> @allocations rand(10^6)
     This macro was added in Julia 1.9.
 """
 macro allocations(ex)
+    _gen_allocation_measurer(ex, :allocations)
+end
+
+
+"""
+    @lock_conflicts
+
+A macro to evaluate an expression, discard the resulting value, and instead return the
+total number of lock conflicts during evaluation, where a lock attempt on a [`ReentrantLock`](@ref)
+resulted in a wait because the lock was already held.
+
+See also [`@time`](@ref), [`@timev`](@ref) and [`@timed`](@ref).
+
+```julia-repl
+julia> @lock_conflicts begin
+    l = ReentrantLock()
+    Threads.@threads for i in 1:Threads.nthreads()
+        lock(l) do
+        sleep(1)
+        end
+    end
+end
+5
+```
+
+!!! compat "Julia 1.11"
+    This macro was added in Julia 1.11.
+"""
+macro lock_conflicts(ex)
     quote
-        Experimental.@force_compile
-        local stats = Base.gc_num()
-        $(esc(ex))
-        local diff = Base.GC_Diff(Base.gc_num(), stats)
-        Base.gc_alloc_count(diff)
+        Threads.lock_profiling(true)
+        local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
+        try
+            $(esc(ex))
+        finally
+            Threads.lock_profiling(false)
+        end
+        Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts
     end
 end
 
 """
     @timed
 
-A macro to execute an expression, and return the value of the expression, elapsed time,
-total bytes allocated, garbage collection time, and an object with various memory allocation
-counters.
+A macro to execute an expression, and return the value of the expression, elapsed time in seconds,
+total bytes allocated, garbage collection time, an object with various memory allocation
+counters, compilation time in seconds, and recompilation time in seconds. Any lock conflicts
+where a [`ReentrantLock`](@ref) had to wait are shown as a count.
 
 In some cases the system will look inside the `@timed` expression and compile some of the
 called code before execution of the top-level expression begins. When that happens, some
 compilation time will not be counted. To include this time you can run `@timed @eval ...`.
 
 See also [`@time`](@ref), [`@timev`](@ref), [`@elapsed`](@ref),
-[`@allocated`](@ref), and [`@allocations`](@ref).
+[`@allocated`](@ref), [`@allocations`](@ref), and [`@lock_conflicts`](@ref).
 
 ```julia-repl
 julia> stats = @timed rand(10^6);
@@ -491,19 +703,85 @@ julia> propertynames(stats.gcstats)
 
 julia> stats.gcstats.total_time
 5576500
+
+julia> stats.compile_time
+0.0
+
+julia> stats.recompile_time
+0.0
+
 ```
 
 !!! compat "Julia 1.5"
     The return type of this macro was changed from `Tuple` to `NamedTuple` in Julia 1.5.
+
+!!! compat "Julia 1.11"
+    The `lock_conflicts`, `compile_time`, and `recompile_time` fields were added in Julia 1.11.
 """
 macro timed(ex)
     quote
         Experimental.@force_compile
+        Threads.lock_profiling(true)
+        local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
         local stats = gc_num()
         local elapsedtime = time_ns()
-        local val = $(esc(ex))
-        elapsedtime = time_ns() - elapsedtime
+        cumulative_compile_timing(true)
+        local compile_elapsedtimes = cumulative_compile_time_ns()
+        local val = @__tryfinally($(esc(ex)),
+            (elapsedtime = time_ns() - elapsedtime;
+            cumulative_compile_timing(false);
+            compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes;
+            lock_conflicts = Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts;
+            Threads.lock_profiling(false))
+        )
         local diff = GC_Diff(gc_num(), stats)
-        (value=val, time=elapsedtime/1e9, bytes=diff.allocd, gctime=diff.total_time/1e9, gcstats=diff)
+        (
+            value=val,
+            time=elapsedtime/1e9,
+            bytes=diff.allocd,
+            gctime=diff.total_time/1e9,
+            gcstats=diff,
+            lock_conflicts=lock_conflicts,
+            compile_time=compile_elapsedtimes[1]/1e9,
+            recompile_time=compile_elapsedtimes[2]/1e9
+        )
+    end
+end
+
+# Exported, documented, and tested in InteractiveUtils
+# here so it's possible to time/trace all imports, including InteractiveUtils and its deps
+macro time_imports(ex)
+    quote
+        Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1)
+        @__tryfinally(
+            # try
+            $(esc(ex)),
+            # finally
+            Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1)
+        )
+    end
+end
+
+macro trace_compile(ex)
+    quote
+        ccall(:jl_force_trace_compile_timing_enable, Cvoid, ())
+        @__tryfinally(
+            # try
+            $(esc(ex)),
+            # finally
+            ccall(:jl_force_trace_compile_timing_disable, Cvoid, ())
+        )
+    end
+end
+
+macro trace_dispatch(ex)
+    quote
+        ccall(:jl_force_trace_dispatch_enable, Cvoid, ())
+        @__tryfinally(
+            # try
+            $(esc(ex)),
+            # finally
+            ccall(:jl_force_trace_dispatch_disable, Cvoid, ())
+        )
     end
 end
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
index 6c4ff6e2a52c0..f07f25eeddf25 100644
--- a/base/toml_parser.jl
+++ b/base/toml_parser.jl
@@ -1,11 +1,16 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+`Base.TOML` is an undocumented internal part of Julia's TOML parser
+implementation.  Users should call the documented interface in the
+TOML.jl standard library instead (by `import TOML` or `using TOML`).
+"""
 module TOML
 
 using Base: IdSet
 
-# In case we do not have the Dates stdlib available
 # we parse DateTime into these internal structs,
+# unless a different DateTime library is passed to the Parser constructor
 # note that these do not do any argument checking
 struct Date
     year::Int
@@ -33,7 +38,7 @@ const TOMLDict  = Dict{String, Any}
 # Parser #
 ##########
 
-mutable struct Parser
+mutable struct Parser{Dates}
     str::String
     # 1 character look ahead
     current_char::Char
@@ -79,16 +84,11 @@ mutable struct Parser
 
     # Filled in in case we are parsing a file to improve error messages
     filepath::Union{String, Nothing}
-
-    # Get's populated with the Dates stdlib if it exists
-    Dates::Union{Module, Nothing}
 end
 
-const DATES_PKGID = Base.PkgId(Base.UUID("ade2ca70-3891-5945-98fb-dc099432e06a"), "Dates")
-
-function Parser(str::String; filepath=nothing)
+function Parser{Dates}(str::String; filepath=nothing) where {Dates}
     root = TOMLDict()
-    l = Parser(
+    l = Parser{Dates}(
             str,                  # str
             EOF_CHAR,             # current_char
             firstindex(str),      # pos
@@ -103,12 +103,12 @@ function Parser(str::String; filepath=nothing)
             IdSet{Any}(),         # static_arrays
             IdSet{TOMLDict}(),    # defined_tables
             root,
-            filepath,
-            isdefined(Base, :maybe_root_module) ? Base.maybe_root_module(DATES_PKGID) : nothing,
+            filepath
         )
     startup(l)
     return l
 end
+
 function startup(l::Parser)
     # Populate our one character look-ahead
     c = eat_char(l)
@@ -119,8 +119,10 @@ function startup(l::Parser)
     end
 end
 
-Parser() = Parser("")
-Parser(io::IO) = Parser(read(io, String))
+Parser{Dates}() where {Dates} = Parser{Dates}("")
+Parser{Dates}(io::IO) where {Dates} = Parser{Dates}(read(io, String))
+
+# Parser(...) will be defined by TOML stdlib
 
 function reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing)
     p.str = str
@@ -146,8 +148,6 @@ end
 # Errors #
 ##########
 
-throw_internal_error(msg) = error("internal TOML parser error: $msg")
-
 # Many functions return a ParserError. We want this to bubble up
 # all the way and have this error be returned to the user
 # if the parse is called with `raise=false`. This macro
@@ -258,10 +258,10 @@ end
 mutable struct ParserError <: Exception
     type::ErrorType
 
-    # Arbitrary data to store at the
+    # Data to store at the
     # call site to be used when formatting
     # the error
-    data
+    data::Union{Char, Nothing}
 
     # These are filled in before returning from parse function
     str       ::Union{String,   Nothing}
@@ -276,7 +276,7 @@ ParserError(type) = ParserError(type, nothing)
 # Defining these below can be useful when debugging code that erroneously returns a
 # ParserError because you get a stacktrace to where the ParserError was created
 #ParserError(type) = error(type)
-#ParserError(type, data) = error(type,data)
+#ParserError(type, data) = error(type, data)
 
 # Many functions return either a T or a ParserError
 const Err{T} = Union{T, ParserError}
@@ -284,7 +284,7 @@ const Err{T} = Union{T, ParserError}
 function format_error_message_for_err_type(error::ParserError)
     msg = err_message[error.type]
     if error.type == ErrInvalidBareKeyCharacter
-        c_escaped = escape_string(string(error.data)::String)
+        c_escaped = escape_string(string(error.data::Char))
         msg *= ": '$c_escaped'"
     end
     return msg
@@ -315,7 +315,7 @@ function point_to_line(str::AbstractString, a::Int, b::Int, context)
         c == '\n' && break
         print(io1, c)
     end
-    return String(take!(io1.io)), String(take!(io2.io))
+    return takestring!(io1.io), takestring!(io2.io)
 end
 
 function Base.showerror(io::IO, err::ParserError)
@@ -367,7 +367,7 @@ end
 @inline peek(l::Parser) = l.current_char
 
 # Return true if the character was accepted. When a character
-# is accepted it get's eaten and we move to the next character
+# is accepted it gets eaten and we move to the next character
 @inline function accept(l::Parser, f::Union{Function, Char})::Bool
     c = peek(l)
     c == EOF_CHAR && return false
@@ -491,8 +491,10 @@ function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String},
         d = d::TOMLDict
         key = dotted_keys[i]
         d = get!(TOMLDict, d, key)
-        if d isa Vector
+        if d isa Vector{Any}
             d = d[end]
+        elseif d isa Vector
+            return ParserError(ErrKeyAlreadyHasValue)
         end
         check && @try check_allowed_add_key(l, d, i == length(dotted_keys))
     end
@@ -533,7 +535,7 @@ function parse_array_table(l)::Union{Nothing, ParserError}
     end
     d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false)
     k = table_key[end]
-    old = get!(() -> [], d, k)
+    old = get!(() -> Any[], d, k)
     if old isa Vector
         if old in l.static_arrays
             return ParserError(ErrAddArrayToStaticArray)
@@ -542,7 +544,7 @@ function parse_array_table(l)::Union{Nothing, ParserError}
         return ParserError(ErrArrayTreatedAsDictionary)
     end
     d_new = TOMLDict()
-    push!(old, d_new)
+    push!(old::Vector{Any}, d_new)
     push!(l.defined_tables, d_new)
     l.active_table = d_new
 
@@ -611,7 +613,7 @@ function _parse_key(l::Parser)
     else
         set_marker!(l)
         if accept_batch(l, isvalid_barekey_char)
-            if !(peek(l) == '.' || peek(l) == ' ' || peek(l) == ']' || peek(l) == '=')
+            if !(peek(l) == '.' || iswhitespace(peek(l)) || peek(l) == ']' || peek(l) == '=')
                 c = eat_char(l)
                 return ParserError(ErrInvalidBareKeyCharacter, c)
             end
@@ -664,41 +666,20 @@ end
 # Array #
 #########
 
-function push!!(v::Vector, el)
-    # Since these types are typically non-inferrable, they are a big invalidation risk,
-    # and since it's used by the package-loading infrastructure the cost of invalidation
-    # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather
-    # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there
-    # is no ambiguity about what types of objects will be created.
-    T = eltype(v)
-    t = typeof(el)
-    if el isa T || t === T
-        push!(v, el::T)
-        return v
-    elseif T === Union{}
-        out = Vector{t}(undef, 1)
-        out[1] = el
-        return out
-    else
-        if T isa Union
-            newT = Any
-        else
-            newT = Union{T, typeof(el)}
-        end
-        new = Array{newT}(undef, length(v))
-        copy!(new, v)
-        return push!(new, el)
+function copyto_typed!(a::Vector{T}, b::Vector) where T
+    for i in 1:length(b)
+        a[i] = b[i]::T
     end
+    return nothing
 end
 
-function parse_array(l::Parser)::Err{Vector}
+function parse_array(l::Parser{Dates})::Err{Vector} where Dates
     skip_ws_nl(l)
-    array = Vector{Union{}}()
+    array = Vector{Any}()
     empty_array = accept(l, ']')
     while !empty_array
         v = @try parse_value(l)
-        # TODO: Worth to function barrier this?
-        array = push!!(array, v)
+        array = push!(array, v)
         # There can be an arbitrary number of newlines and comments before a value and before the closing bracket.
         skip_ws_nl(l)
         comma = accept(l, ',')
@@ -708,8 +689,40 @@ function parse_array(l::Parser)::Err{Vector}
             return ParserError(ErrExpectedCommaBetweenItemsArray)
         end
     end
-    push!(l.static_arrays, array)
-    return array
+    # check for static type throughout array
+    T = !isempty(array) ? typeof(array[1]) : Union{}
+    for el in array
+        if typeof(el) != T
+            T = Any
+            break
+        end
+    end
+    if T === Any
+        new = array
+    elseif T === String
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Bool
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Int64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === UInt64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Float64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Union{}
+        new = Any[]
+    elseif (T === TOMLDict) || (T == BigInt) || (T === UInt128) || (T === Int128) || (T <: Vector) ||
+        (T === Dates.Date) || (T === Dates.Time) || (T === Dates.DateTime)
+        # do nothing, leave as Vector{Any}
+        new = array
+    else @assert false end
+    push!(l.static_arrays, new)
+    return new
 end
 
 
@@ -757,7 +770,7 @@ isvalid_hex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F')
 isvalid_oct(c::Char) = '0' <= c <= '7'
 isvalid_binary(c::Char) = '0' <= c <= '1'
 
-const ValidSigs = Union{typeof.([isvalid_hex, isvalid_oct, isvalid_binary, isdigit])...}
+const ValidSigs = Union{typeof(isvalid_hex), typeof(isvalid_oct), typeof(isvalid_binary), typeof(isdigit)}
 # This function eats things accepted by `f` but also allows eating `_` in between
 # digits. Returns if it ate at lest one character and if it ate an underscore
 function accept_batch_underscore(l::Parser, f::ValidSigs, fail_if_underscore=true)::Err{Tuple{Bool, Bool}}
@@ -849,7 +862,7 @@ function parse_number_or_date_start(l::Parser)
     ate, contains_underscore = @try accept_batch_underscore(l, isdigit, readed_zero)
     read_underscore |= contains_underscore
     if (read_digit || ate) && ok_end_value(peek(l))
-        return parse_int(l, contains_underscore)
+        return parse_integer(l, contains_underscore)
     end
     # Done with integers here
 
@@ -887,7 +900,7 @@ end
 
 function take_string_or_substring(l, contains_underscore)::SubString
     subs = take_substring(l)
-    # Need to pass a AbstractString to `parse` so materialize it in case it
+    # Need to pass an AbstractString to `parse` so materialize it in case it
     # contains underscore.
     return contains_underscore ? SubString(filter(!=('_'), subs)) : subs
 end
@@ -895,11 +908,22 @@ end
 function parse_float(l::Parser, contains_underscore)::Err{Float64}
     s = take_string_or_substring(l, contains_underscore)
     v = Base.tryparse(Float64, s)
-    v === nothing && return(ParserError(ErrGenericValueError))
+    v === nothing && return ParserError(ErrGenericValueError)
     return v
 end
 
-for (name, T1, T2, n1, n2) in (("int", Int64,  Int128,  17,  33),
+function parse_int(l::Parser, contains_underscore, base=nothing)::Err{Int64}
+    s = take_string_or_substring(l, contains_underscore)
+    v = try
+        Base.parse(Int64, s; base=base)
+    catch e
+        e isa Base.OverflowError && return ParserError(ErrOverflowError)
+        rethrow()
+    end
+    return v
+end
+
+for (name, T1, T2, n1, n2) in (("integer", Int64,  Int128,  17,  33),
                                ("hex", UInt64, UInt128, 18,  34),
                                ("oct", UInt64, UInt128, 24,  45),
                                ("bin", UInt64, UInt128, 66, 130),
@@ -916,8 +940,8 @@ for (name, T1, T2, n1, n2) in (("int", Int64,  Int128,  17,  33),
                 Base.parse(BigInt, s; base)
             end
         catch e
-            e isa Base.OverflowError && return(ParserError(ErrOverflowError))
-            error("internal parser error: did not correctly discredit $(repr(s)) as an int")
+            e isa Base.OverflowError && return ParserError(ErrOverflowError)
+            rethrow()
         end
         return v
     end
@@ -1009,26 +1033,26 @@ function parse_datetime(l)
     return try_return_datetime(l, year, month, day, h, m, s, ms)
 end
 
-function try_return_datetime(p, year, month, day, h, m, s, ms)
-    Dates = p.Dates
+function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) where Dates
     if Dates !== nothing
         try
             return Dates.DateTime(year, month, day, h, m, s, ms)
-        catch
-            return ParserError(ErrParsingDateTime)
+        catch ex
+            ex isa ArgumentError && return ParserError(ErrParsingDateTime)
+            rethrow()
         end
     else
         return DateTime(year, month, day, h, m, s, ms)
     end
 end
 
-function try_return_date(p, year, month, day)
-    Dates = p.Dates
+function try_return_date(p::Parser{Dates}, year, month, day) where Dates
     if Dates !== nothing
         try
             return Dates.Date(year, month, day)
-        catch
-            return ParserError(ErrParsingDateTime)
+        catch ex
+            ex isa ArgumentError && return ParserError(ErrParsingDateTime)
+            rethrow()
         end
     else
         return Date(year, month, day)
@@ -1044,13 +1068,13 @@ function parse_local_time(l::Parser)
     return try_return_time(l, h, m, s, ms)
 end
 
-function try_return_time(p, h, m, s, ms)
-    Dates = p.Dates
+function try_return_time(p::Parser{Dates}, h, m, s, ms) where Dates
     if Dates !== nothing
         try
             return Dates.Time(h, m, s, ms)
-        catch
-            return ParserError(ErrParsingDateTime)
+        catch ex
+            ex isa ArgumentError && return ParserError(ErrParsingDateTime)
+            rethrow()
         end
     else
         return Time(h, m, s, ms)
@@ -1086,7 +1110,7 @@ function _parse_local_time(l::Parser, skip_hour=false)::Err{NTuple{4, Int64}}
     second in 0:59 || return ParserError(ErrParsingDateTime)
 
     # optional fractional second
-    fractional_second = Int64(0)
+    millisecond = Int64(0)
     if accept(l, '.')
         set_marker!(l)
         found_fractional_digit = false
@@ -1097,12 +1121,15 @@ function _parse_local_time(l::Parser, skip_hour=false)::Err{NTuple{4, Int64}}
             return ParserError(ErrParsingDateTime)
         end
         # DateTime in base only manages 3 significant digits in fractional
-        # second
-        fractional_second = parse_int(l, false)
+        # second. Interpret parsed digits as fractional seconds and scale to
+        # milliseconds precision (e.g., ".2" => 200ms, ".20" => 200ms).
+        ndigits = l.prevpos - l.marker
+        fractional_second = parse_int(l, false)::Int64
+        millisecond = fractional_second * 10^(3 - ndigits)
         # Truncate off the rest eventual digits
         accept_batch(l, isdigit)
     end
-    return hour, minute, second, fractional_second
+    return hour, minute, second, millisecond
 end
 
 
diff --git a/base/ttyhascolor.jl b/base/ttyhascolor.jl
deleted file mode 100644
index 5984dba6d592e..0000000000000
--- a/base/ttyhascolor.jl
+++ /dev/null
@@ -1,27 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-if Sys.iswindows()
-    ttyhascolor(term_type = nothing) = true
-else
-    function ttyhascolor(term_type = get(ENV, "TERM", ""))
-        startswith(term_type, "xterm") && return true
-        try
-            @static if Sys.KERNEL === :FreeBSD
-                return success(`tput AF 0`)
-            else
-                return success(`tput setaf 0`)
-            end
-        catch e
-            return false
-        end
-    end
-end
-function get_have_color()
-    global have_color
-    have_color === nothing && (have_color = ttyhascolor())
-    return have_color::Bool
-end
-in(key_value::Pair{Symbol,Bool}, ::TTY) = key_value.first === :color && key_value.second === get_have_color()
-haskey(::TTY, key::Symbol) = key === :color
-getindex(::TTY, key::Symbol) = key === :color ? get_have_color() : throw(KeyError(key))
-get(::TTY, key::Symbol, default) = key === :color ? get_have_color() : default
diff --git a/base/tuple.jl b/base/tuple.jl
index 59fe2c1e531e1..3cdf78fa4d135 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+import Core: Tuple
+
 # Document NTuple here where we have everything needed for the doc system
 """
     NTuple{N, T}
@@ -28,10 +30,9 @@ firstindex(@nospecialize t::Tuple) = 1
 lastindex(@nospecialize t::Tuple) = length(t)
 size(@nospecialize(t::Tuple), d::Integer) = (d == 1) ? length(t) : throw(ArgumentError("invalid tuple dimension $d"))
 axes(@nospecialize t::Tuple) = (OneTo(length(t)),)
-@eval getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, $(Expr(:boundscheck)))
-@eval getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), $(Expr(:boundscheck)))
-__inbounds_getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, false)
-__inbounds_getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), false)
+getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, @_boundscheck)
+getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), @_boundscheck)
+__safe_getindex(@nospecialize(t::Tuple), i::Int) = (@_nothrow_noub_meta; getfield(t, i, false))
 getindex(t::Tuple, r::AbstractArray{<:Any,1}) = (eltype(t)[t[ri] for ri in r]...,)
 getindex(t::Tuple, b::AbstractArray{Bool,1}) = length(b) == length(t) ? getindex(t, findall(b)) : throw(BoundsError(t, b))
 getindex(t::Tuple, c::Colon) = t
@@ -42,9 +43,9 @@ get(f::Callable, t::Tuple, i::Integer) = i in 1:length(t) ? getindex(t, i) : f()
 # returns new tuple; N.B.: becomes no-op if `i` is out-of-bounds
 
 """
-    setindex(c::Tuple, v, i::Integer)
+    setindex(t::Tuple, v, i::Integer)
 
-Creates a new tuple similar to `x` with the value at index `i` set to `v`.
+Create a new tuple similar to `t` with the value at index `i` set to `v`.
 Throws a `BoundsError` when out of bounds.
 
 # Examples
@@ -61,7 +62,7 @@ end
 
 function _setindex(v, i::Integer, args::Vararg{Any,N}) where {N}
     @inline
-    return ntuple(j -> ifelse(j == i, v, args[j]), Val{N}())
+    return ntuple(j -> ifelse(j == i, v, args[j]), Val{N}())::NTuple{N, Any}
 end
 
 
@@ -69,22 +70,92 @@ end
 
 function iterate(@nospecialize(t::Tuple), i::Int=1)
     @inline
+    @_nothrow_meta
     return (1 <= i <= length(t)) ? (t[i], i + 1) : nothing
 end
 
 keys(@nospecialize t::Tuple) = OneTo(length(t))
 
+"""
+    prevind(A, i)
+
+Return the index before `i` in `A`. The returned index is often equivalent to
+`i - 1` for an integer `i`. This function can be useful for generic code.
+
+!!! warning
+    The returned index might be out of bounds. Consider using
+    [`checkbounds`](@ref).
+
+See also: [`nextind`](@ref).
+
+# Examples
+```jldoctest
+julia> x = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> prevind(x, 4) # valid result
+3
+
+julia> prevind(x, 1) # invalid result
+0
+
+julia> prevind(x, CartesianIndex(2, 2)) # valid result
+CartesianIndex(1, 2)
+
+julia> prevind(x, CartesianIndex(1, 1)) # invalid result
+CartesianIndex(2, 0)
+```
+"""
+function prevind end
+
+"""
+    nextind(A, i)
+
+Return the index after `i` in `A`. The returned index is often equivalent to
+`i + 1` for an integer `i`. This function can be useful for generic code.
+
+!!! warning
+    The returned index might be out of bounds. Consider using
+    [`checkbounds`](@ref).
+
+See also: [`prevind`](@ref).
+
+# Examples
+```jldoctest
+julia> x = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> nextind(x, 1) # valid result
+2
+
+julia> nextind(x, 4) # invalid result
+5
+
+julia> nextind(x, CartesianIndex(1, 1)) # valid result
+CartesianIndex(2, 1)
+
+julia> nextind(x, CartesianIndex(2, 2)) # invalid result
+CartesianIndex(1, 3)
+```
+"""
+function nextind end
+
 prevind(@nospecialize(t::Tuple), i::Integer) = Int(i)-1
 nextind(@nospecialize(t::Tuple), i::Integer) = Int(i)+1
 
 function keys(t::Tuple, t2::Tuple...)
     @inline
-    OneTo(_maxlength(t, t2...))
-end
-_maxlength(t::Tuple) = length(t)
-function _maxlength(t::Tuple, t2::Tuple, t3::Tuple...)
-    @inline
-    max(length(t), _maxlength(t2, t3...))
+    lent = length(t)
+    if !all(==(lent) ∘ length, t2)
+        let inds = map(only ∘ axes, (t, t2...))
+            throw_eachindex_mismatch_indices("indices", inds...)
+        end
+    end
+    Base.OneTo(lent)
 end
 
 # this allows partial evaluation of bounded sequences of next() calls on tuples,
@@ -190,7 +261,9 @@ function _split_rest(a::Union{AbstractArray, Core.SimpleVector}, n::Int)
     return a[begin:end-n], a[end-n+1:end]
 end
 
-@eval split_rest(t::Tuple, n::Int, i=1) = ($(Expr(:meta, :aggressive_constprop)); (t[i:end-n], t[end-n+1:end]))
+@eval _split_tuple(t::Tuple, n::Int, i::Int=1) = ($(Expr(:meta, :aggressive_constprop)); (t[i:n], t[n+1:end]))
+
+@eval split_rest(t::Tuple, n::Int, i=1) = ($(Expr(:meta, :aggressive_constprop)); _split_tuple(t, length(t)-n, Int(i)))
 
 # Use dispatch to avoid a branch in first
 first(::Tuple{}) = throw(ArgumentError("tuple must be non-empty"))
@@ -198,58 +271,48 @@ first(t::Tuple) = t[1]
 
 # eltype
 
-eltype(::Type{Tuple{}}) = Bottom
-function eltype(t::Type{<:Tuple{Vararg{E}}}) where {E}
-    if @isdefined(E)
-        return E
+# the <: here makes the runtime a bit more complicated (needing to check isdefined), but really helps inference
+_eltype_ntuple(t::Type{<:Tuple{Vararg{E}}}) where {E} = @isdefined(E) ? (E isa Type ? E : Union{}) : _compute_eltype(t)
+# We'd like to be able to infer eltype(::Tuple), so keep the number of eltype(::Type{<:Tuple}) methods at max_methods!
+function eltype(t::Type{<:Tuple})
+    if t <: Tuple{}
+        Bottom
+    elseif t <: NTuple
+        _eltype_ntuple(t)
     else
-        # TODO: need to guard against E being miscomputed by subtyping (ref #23017)
-        # and compute the result manually in this case
-        return _compute_eltype(t)
+        _compute_eltype(t)
     end
 end
-eltype(t::Type{<:Tuple}) = _compute_eltype(t)
-function _tuple_unique_fieldtypes(@nospecialize t)
+function _compute_eltype(@nospecialize t)
     @_total_meta
-    types = IdSet()
+    has_free_typevars(t) && return Any
     t´ = unwrap_unionall(t)
     # Given t = Tuple{Vararg{S}} where S<:Real, the various
     # unwrapping/wrapping/va-handling here will return Real
     if t´ isa Union
-        union!(types, _tuple_unique_fieldtypes(rewrap_unionall(t´.a, t)))
-        union!(types, _tuple_unique_fieldtypes(rewrap_unionall(t´.b, t)))
-    else
-        for ti in (t´::DataType).parameters
-            push!(types, rewrap_unionall(unwrapva(ti), t))
-        end
+        return promote_typejoin(_compute_eltype(rewrap_unionall(t´.a, t)),
+                                _compute_eltype(rewrap_unionall(t´.b, t)))
     end
-    return Core.svec(types...)
-end
-function _compute_eltype(@nospecialize t)
-    @_total_meta # TODO: the compiler shouldn't need this
-    types = _tuple_unique_fieldtypes(t)
-    return afoldl(types...) do a, b
-        # if we've already reached Any, it can't widen any more
-        a === Any && return Any
-        b === Any && return Any
-        return promote_typejoin(a, b)
+    p = (t´::DataType).parameters
+    length(p) == 0 && return Union{}
+    elt = rewrap_unionall(unwrapva(p[1]), t)
+    elt isa Type || return Union{} # Tuple{2} is legal as a Type, but the eltype is Union{} since it is uninhabited
+    r = elt
+    for i in 2:length(p)
+        r === Any && return r # if we've already reached Any, it can't widen any more
+        elt = rewrap_unionall(unwrapva(p[i]), t)
+        elt isa Type || return Union{} # Tuple{2} is legal as a Type, but the eltype is Union{} since it is uninhabited
+        r = promote_typejoin(elt, r)
     end
+    return r
 end
 
-# We'd like to be able to infer eltype(::Tuple), which needs to be able to
-# look at these four methods:
-#
-# julia> methods(Base.eltype, Tuple{Type{<:Tuple}})
-# 4 methods for generic function "eltype" from Base:
-# [1] eltype(::Type{Union{}})
-#  @ abstractarray.jl:234
-# [2] eltype(::Type{Tuple{}})
-#  @ tuple.jl:199
-# [3] eltype(t::Type{<:Tuple{Vararg{E}}}) where E
-#  @ tuple.jl:200
-# [4] eltype(t::Type{<:Tuple})
-#  @ tuple.jl:209
-typeof(function eltype end).name.max_methods = UInt8(4)
+# key/val types
+keytype(@nospecialize t::Tuple) = keytype(typeof(t))
+keytype(@nospecialize T::Type{<:Tuple}) = Int
+
+valtype(@nospecialize t::Tuple) = valtype(typeof(t))
+valtype(@nospecialize T::Type{<:Tuple}) = eltype(T)
 
 # version of tail that doesn't throw on empty tuples (used in array indexing)
 safe_tail(t::Tuple) = tail(t)
@@ -332,7 +395,7 @@ end
 # n argument function
 heads(ts::Tuple...) = map(t -> t[1], ts)
 tails(ts::Tuple...) = map(tail, ts)
-map(f, ::Tuple{}...) = ()
+map(f, ::Tuple{}, ::Tuple{}...) = ()
 anyempty(x::Tuple{}, xs...) = true
 anyempty(x::Tuple, xs...) = anyempty(xs...)
 anyempty() = false
@@ -362,10 +425,6 @@ fill_to_length(t::Tuple{}, val, ::Val{2}) = (val, val)
 
 # constructing from an iterator
 
-# only define these in Base, to avoid overwriting the constructors
-# NOTE: this means this constructor must be avoided in Core.Compiler!
-if nameof(@__MODULE__) === :Base
-
 function tuple_type_tail(T::Type)
     @_foldable_meta # TODO: this method is wrong (and not :foldable)
     if isa(T, UnionAll)
@@ -394,7 +453,7 @@ _totuple(::Type{Tuple{}}, itr, s...) = ()
 
 function _totuple_err(@nospecialize T)
     @noinline
-    throw(ArgumentError("too few elements for tuple type $T"))
+    throw(ArgumentError(LazyString("too few elements for tuple type ", T)))
 end
 
 function _totuple(::Type{T}, itr, s::Vararg{Any,N}) where {T,N}
@@ -428,16 +487,15 @@ _totuple(::Type{Tuple}, itr, s...) = (collect(Iterators.rest(itr,s...))...,)
 _totuple(::Type{Tuple}, itr::Array) = (itr...,)
 _totuple(::Type{Tuple}, itr::SimpleVector) = (itr...,)
 _totuple(::Type{Tuple}, itr::NamedTuple) = (itr...,)
+_totuple(::Type{Tuple}, p::Pair) = (p.first, p.second)
 _totuple(::Type{Tuple}, x::Number) = (x,) # to make Tuple(x) inferable
 
-end
-
 ## find ##
 
 _findfirst_rec(f, i::Int, ::Tuple{}) = nothing
 _findfirst_rec(f, i::Int, t::Tuple) = (@inline; f(first(t)) ? i : _findfirst_rec(f, i+1, tail(t)))
 function _findfirst_loop(f::Function, t)
-    for i in 1:length(t)
+    for i in eachindex(t)
         f(t[i]) && return i
     end
     return nothing
@@ -471,7 +529,7 @@ function _isequal(t1::Tuple{Any,Vararg{Any}}, t2::Tuple{Any,Vararg{Any}})
     return isequal(t1[1], t2[1]) && _isequal(tail(t1), tail(t2))
 end
 function _isequal(t1::Any32, t2::Any32)
-    for i = 1:length(t1)
+    for i in eachindex(t1, t2)
         if !isequal(t1[i], t2[i])
             return false
         end
@@ -502,7 +560,7 @@ function _eq_missing(t1::Tuple, t2::Tuple)
 end
 function _eq(t1::Any32, t2::Any32)
     anymissing = false
-    for i = 1:length(t1)
+    for i in eachindex(t1, t2)
         eq = (t1[i] == t2[i])
         if ismissing(eq)
             anymissing = true
@@ -514,10 +572,10 @@ function _eq(t1::Any32, t2::Any32)
 end
 
 const tuplehash_seed = UInt === UInt64 ? 0x77cfa1eef01bca90 : 0xf01bca90
-hash(::Tuple{}, h::UInt) = h + tuplehash_seed
+hash(::Tuple{}, h::UInt) = h ⊻ tuplehash_seed
 hash(t::Tuple, h::UInt) = hash(t[1], hash(tail(t), h))
 function hash(t::Any32, h::UInt)
-    out = h + tuplehash_seed
+    out = h ⊻ tuplehash_seed
     for i = length(t):-1:1
         out = hash(t[i], out)
     end
@@ -594,19 +652,8 @@ prod(x::Tuple{}) = 1
 # than the general prod definition is available.
 prod(x::Tuple{Int, Vararg{Int}}) = *(x...)
 
-all(x::Tuple{}) = true
-all(x::Tuple{Bool}) = x[1]
-all(x::Tuple{Bool, Bool}) = x[1]&x[2]
-all(x::Tuple{Bool, Bool, Bool}) = x[1]&x[2]&x[3]
-# use generic reductions for the rest
-
-any(x::Tuple{}) = false
-any(x::Tuple{Bool}) = x[1]
-any(x::Tuple{Bool, Bool}) = x[1]|x[2]
-any(x::Tuple{Bool, Bool, Bool}) = x[1]|x[2]|x[3]
-
 # a version of `in` esp. for NamedTuple, to make it pure, and not compiled for each tuple length
-function sym_in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}})
+function sym_in(x::Symbol, itr::Tuple{Vararg{Symbol}})
     @noinline
     @_total_meta
     for y in itr
@@ -614,7 +661,7 @@ function sym_in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}})
     end
     return false
 end
-in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}}) = sym_in(x, itr)
+in(x::Symbol, itr::Tuple{Vararg{Symbol}}) = sym_in(x, itr)
 
 
 """
@@ -625,4 +672,13 @@ Return an empty tuple, `()`.
 empty(@nospecialize x::Tuple) = ()
 
 foreach(f, itr::Tuple) = foldl((_, x) -> (f(x); nothing), itr, init=nothing)
-foreach(f, itrs::Tuple...) = foldl((_, xs) -> (f(xs...); nothing), zip(itrs...), init=nothing)
+foreach(f, itr::Tuple, itrs::Tuple...) = foldl((_, xs) -> (f(xs...); nothing), zip(itr, itrs...), init=nothing)
+
+circshift((@nospecialize t::Union{Tuple{},Tuple{Any}}), @nospecialize _::Integer) = t
+circshift(t::Tuple{Any,Any}, shift::Integer) = iseven(shift) ? t : reverse(t)
+function circshift(x::Tuple{Any,Any,Any,Vararg{Any,N}}, shift::Integer) where {N}
+    @inline
+    len = N + 3
+    j = mod1(shift, len)
+    ntuple(k -> getindex(x, k-j+ifelse(k>j,0,len)), Val(len))::Tuple
+end
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index d91a04371230c..920ba71eba24f 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -63,7 +63,7 @@ representation, even though it is exact from the standpoint of binary
 representation.
 
 Example:
-```julia-repl
+```jldoctest
 julia> 1.0 + 1.0001e-15
 1.000000000000001
 
@@ -94,7 +94,7 @@ numbers. Mathematically, `zhi + zlo = x * y`, where `zhi` contains the
 most significant bits and `zlo` the least significant.
 
 Example:
-```julia-repl
+```jldoctest
 julia> x = Float32(π)
 3.1415927f0
 
@@ -126,7 +126,7 @@ numbers. Mathematically, `zhi + zlo ≈ x / y`, where `zhi` contains the
 most significant bits and `zlo` the least significant.
 
 Example:
-```julia-repl
+```jldoctest
 julia> x, y = Float32(π), 3.1f0
 (3.1415927f0, 3.1f0)
 
@@ -278,6 +278,7 @@ big(x::TwicePrecision) = big(x.hi) + big(x.lo)
 
 -(x::TwicePrecision) = TwicePrecision(-x.hi, -x.lo)
 
+zero(x::TwicePrecision) = zero(typeof(x))
 function zero(::Type{TwicePrecision{T}}) where {T}
     z = zero(T)
     TwicePrecision{T}(z, z)
@@ -476,9 +477,7 @@ end
 # This assumes that r.step has already been split so that (0:len-1)*r.step.hi is exact
 function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i::Integer) where T
     # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add12
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
+    u = oftype(r.offset, i) - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
     x_hi, x_lo = add12(r.ref.hi, shift_hi)
     T(x_hi + (x_lo + (shift_lo + r.ref.lo)))
@@ -486,7 +485,7 @@ end
 
 function _getindex_hiprec(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Integer)
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
+    u = oftype(r.offset, i) - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
     x_hi, x_lo = add12(r.ref.hi, shift_hi)
     x_hi, x_lo = add12(x_hi, x_lo + (shift_lo + r.ref.lo))
@@ -787,3 +786,19 @@ _tp_prod(t::TwicePrecision) = t
     x.hi < y.hi || ((x.hi == y.hi) & (x.lo < y.lo))
 
 isbetween(a, x, b) = a <= x <= b || b <= x <= a
+
+# These functions exist for use in LogRange:
+
+_exp_allowing_twice64(x::Number) = exp(x)
+_exp_allowing_twice64(x::TwicePrecision{Float64}) = Math.exp_impl(x.hi, x.lo, Val(:ℯ))
+
+# No error on negative x, and for NaN/Inf this returns junk:
+function _log_twice64_unchecked(x::Float64)
+    xu = reinterpret(UInt64, x)
+    if xu < (UInt64(1)<<52) # x is subnormal
+        xu = reinterpret(UInt64, x * 0x1p52) # normalize x
+        xu &= ~sign_mask(Float64)
+        xu -= UInt64(52) << 52 # mess with the exponent
+    end
+    TwicePrecision(Math._log_ext(xu)...)
+end
diff --git a/base/util.jl b/base/util.jl
index 3ccdd0a37ae68..962a6873cf054 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -77,7 +77,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
     iscolor = get(io, :color, false)::Bool
     try f(IOContext(buf, io), args...)
     finally
-        str = String(take!(buf))
+        str = takestring!(buf)
         if !iscolor
             print(io, str)
         else
@@ -109,7 +109,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
                 isempty(line) && continue
                 print(buf, enable_ansi, line, disable_ansi)
             end
-            print(io, String(take!(buf)))
+            print(io, takestring!(buf))
         end
     end
 end
@@ -144,7 +144,7 @@ See also [`print`](@ref), [`println`](@ref), [`show`](@ref).
     printstyled(stdout, msg...; bold=bold, italic=italic, underline=underline, blink=blink, reverse=reverse, hidden=hidden, color=color)
 
 """
-    Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename()); cpu_target)
+    Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Union{Nothing,String}=nothing)
 
 Return a julia command similar to the one of the running process.
 Propagates any of the `--cpu-target`, `--sysimage`, `--compile`, `--sysimage-native-code`,
@@ -154,6 +154,8 @@ command line arguments that are not at their default values.
 
 Among others, `--math-mode`, `--warn-overwrite`, and `--trace-compile` are notably not propagated currently.
 
+Unless set to `nothing`, the `cpu_target` keyword argument can be used to override the CPU target set for the running process.
+
 To get the julia command without propagated command line arguments, `julia_cmd()[1]` can be used.
 
 !!! compat "Julia 1.1"
@@ -163,8 +165,7 @@ To get the julia command without propagated command line arguments, `julia_cmd()
     The flags `--color` and `--startup-file` were added in Julia 1.5.
 
 !!! compat "Julia 1.9"
-    The keyword argument `cpu_target` was added.
-
+    The keyword argument `cpu_target` was added in 1.9.
     The flag `--pkgimages` was added in Julia 1.9.
 """
 function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Union{Nothing,String} = nothing)
@@ -205,6 +206,10 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Unio
     end
     opts.can_inline == 0 && push!(addflags, "--inline=no")
     opts.use_compiled_modules == 0 && push!(addflags, "--compiled-modules=no")
+    opts.use_compiled_modules == 2 && push!(addflags, "--compiled-modules=existing")
+    opts.use_compiled_modules == 3 && push!(addflags, "--compiled-modules=strict")
+    opts.use_pkgimages == 0 && push!(addflags, "--pkgimages=no")
+    opts.use_pkgimages == 2 && push!(addflags, "--pkgimages=existing")
     opts.opt_level == 2 || push!(addflags, "-O$(opts.opt_level)")
     opts.opt_level_min == 0 || push!(addflags, "--min-optlevel=$(opts.opt_level_min)")
     push!(addflags, "-g$(opts.debug_level)")
@@ -240,17 +245,14 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Unio
     if opts.use_sysimage_native_code == 0
         push!(addflags, "--sysimage-native-code=no")
     end
-    if opts.use_pkgimages == 0
-        push!(addflags, "--pkgimages=no")
-    else
-        # If pkgimage is set, malloc_log and code_coverage should not
-        @assert opts.malloc_log == 0 && opts.code_coverage == 0
+    if opts.compress_sysimage == 1
+        push!(addflags, "--compress-sysimage=yes")
     end
-    return `$julia -C$cpu_target -J$image_file $addflags`
+    return `$julia -C $cpu_target -J$image_file $addflags`
 end
 
 function julia_exename()
-    if !Base.isdebugbuild()
+    if !isdebugbuild()
         return @static Sys.iswindows() ? "julia.exe" : "julia"
     else
         return @static Sys.iswindows() ? "julia-debug.exe" : "julia-debug"
@@ -272,15 +274,29 @@ function securezero! end
 unsafe_securezero!(p::Ptr{Cvoid}, len::Integer=1) = Ptr{Cvoid}(unsafe_securezero!(Ptr{UInt8}(p), len))
 
 """
-    Base.getpass(message::AbstractString) -> Base.SecretBuffer
+    Base.getpass(message::AbstractString; with_suffix::Bool=true)::Base.SecretBuffer
 
 Display a message and wait for the user to input a secret, returning an `IO`
-object containing the secret.
+object containing the secret. If `with_suffix` is `true` (the default), the
+suffix `": "` will be appended to `message`.
 
 !!! info "Windows"
     Note that on Windows, the secret might be displayed as it is typed; see
     `Base.winprompt` for securely retrieving username/password pairs from a
     graphical interface.
+
+!!! compat "Julia 1.12"
+    The `with_suffix` keyword argument requires at least Julia 1.12.
+
+# Examples
+
+```julia-repl
+julia> Base.getpass("Secret")
+Secret: SecretBuffer("*******")
+
+julia> Base.getpass("Secret> "; with_suffix=false)
+Secret> SecretBuffer("*******")
+```
 """
 function getpass end
 
@@ -340,11 +356,13 @@ function with_raw_tty(f::Function, input::TTY)
     end
 end
 
-function getpass(input::TTY, output::IO, prompt::AbstractString)
+function getpass(input::TTY, output::IO, prompt::AbstractString; with_suffix::Bool=true)
     input === stdin || throw(ArgumentError("getpass only works for stdin"))
     with_raw_tty(stdin) do
-        print(output, prompt, ": ")
+        print(output, prompt)
+        with_suffix && print(output, ": ")
         flush(output)
+
         s = SecretBuffer()
         plen = 0
         while true
@@ -365,10 +383,10 @@ end
 
 # allow new getpass methods to be defined if stdin has been
 # redirected to some custom stream, e.g. in IJulia.
-getpass(prompt::AbstractString) = getpass(stdin, stdout, prompt)
+getpass(prompt::AbstractString; with_suffix::Bool=true) = getpass(stdin, stdout, prompt; with_suffix)
 
 """
-    prompt(message; default="") -> Union{String, Nothing}
+    prompt(message; default="")::Union{String, Nothing}
 
 Displays the `message` then waits for user input. Input is terminated when a newline (\\n)
 is encountered or EOF (^D) character is entered on a blank line. If a `default` is provided
@@ -376,7 +394,7 @@ then the user can enter just a newline character to select the `default`.
 
 See also `Base.winprompt` (for Windows) and `Base.getpass` for secure entry of passwords.
 
-# Example
+# Examples
 
 ```julia-repl
 julia> your_name = Base.prompt("Enter your name");
@@ -492,8 +510,10 @@ unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_
 
 _crc32c(a::NTuple{<:Any, UInt8}, crc::UInt32=0x00000000) =
     unsafe_crc32c(Ref(a), length(a) % Csize_t, crc)
-_crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) =
+
+function _crc32c(a::DenseUInt8OrInt8, crc::UInt32=0x00000000)
     unsafe_crc32c(a, length(a) % Csize_t, crc)
+end
 
 function _crc32c(s::Union{String, SubString{String}}, crc::UInt32=0x00000000)
     unsafe_crc32c(s, sizeof(s) % Csize_t, crc)
@@ -513,7 +533,6 @@ function _crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000)
 end
 _crc32c(io::IO, crc::UInt32=0x00000000) = _crc32c(io, typemax(Int64), crc)
 _crc32c(io::IOStream, crc::UInt32=0x00000000) = _crc32c(io, filesize(io)-position(io), crc)
-_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc)
 _crc32c(x::UInt128, crc::UInt32=0x00000000) =
     ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt128}, Csize_t), crc, x, 16)
 _crc32c(x::UInt64, crc::UInt32=0x00000000) =
@@ -565,24 +584,31 @@ Stacktrace:
 macro kwdef(expr)
     expr = macroexpand(__module__, expr) # to expand @static
     isexpr(expr, :struct) || error("Invalid usage of @kwdef")
-    T = expr.args[2]
+    _, T, fieldsblock = expr.args
     if T isa Expr && T.head === :<:
         T = T.args[1]
     end
 
-    params_ex = Expr(:parameters)
-    call_args = Any[]
+    fieldnames = Any[]
+    defvals = Any[]
+    extract_names_and_defvals_from_kwdef_fieldblock!(fieldsblock, fieldnames, defvals)
+    parameters = map(fieldnames, defvals) do fieldname, defval
+        if isnothing(defval)
+            return fieldname
+        else
+            return Expr(:kw, fieldname, esc(defval))
+        end
+    end
 
-    _kwdef!(expr.args[3], params_ex.args, call_args)
     # Only define a constructor if the type has fields, otherwise we'll get a stack
     # overflow on construction
-    if !isempty(params_ex.args)
-        if T isa Symbol
-            sig = :(($(esc(T)))($params_ex))
-            call = :(($(esc(T)))($(call_args...)))
-            body = Expr(:block, __source__, call)
+    if !isempty(parameters)
+        T_no_esc = Meta.unescape(T)
+        if T_no_esc isa Symbol
+            sig = Expr(:call, esc(T), Expr(:parameters, parameters...))
+            body = Expr(:block, __source__, Expr(:call, esc(T), fieldnames...))
             kwdefs = Expr(:function, sig, body)
-        elseif isexpr(T, :curly)
+        elseif isexpr(T_no_esc, :curly)
             # if T == S{A<:AA,B<:BB}, define two methods
             #   S(...) = ...
             #   S{A,B}(...) where {A<:AA,B<:BB} = ...
@@ -590,11 +616,11 @@ macro kwdef(expr)
             P = T.args[2:end]
             Q = Any[isexpr(U, :<:) ? U.args[1] : U for U in P]
             SQ = :($S{$(Q...)})
-            body1 = Expr(:block, __source__, :(($(esc(S)))($(call_args...))))
-            sig1 = :(($(esc(S)))($params_ex))
+            body1 = Expr(:block, __source__, Expr(:call, esc(S), fieldnames...))
+            sig1 = Expr(:call, esc(S), Expr(:parameters, parameters...))
             def1 = Expr(:function, sig1, body1)
-            body2 = Expr(:block, __source__, :(($(esc(SQ)))($(call_args...))))
-            sig2 = :(($(esc(SQ)))($params_ex) where {$(esc.(P)...)})
+            body2 = Expr(:block, __source__, Expr(:call, esc(SQ), fieldnames...))
+            sig2 = :($(Expr(:call, esc(SQ), Expr(:parameters, parameters...))) where {$(esc.(P)...)})
             def2 = Expr(:function, sig2, body2)
             kwdefs = Expr(:block, def1, def2)
         else
@@ -611,61 +637,51 @@ end
 
 # @kwdef helper function
 # mutates arguments inplace
-function _kwdef!(blk, params_args, call_args)
-    for i in eachindex(blk.args)
-        ei = blk.args[i]
-        if ei isa Symbol
-            #  var
-            push!(params_args, ei)
-            push!(call_args, ei)
-        elseif ei isa Expr
-            is_atomic = ei.head === :atomic
-            ei = is_atomic ? first(ei.args) : ei # strip "@atomic" and add it back later
-            is_const = ei.head === :const
-            ei = is_const ? first(ei.args) : ei # strip "const" and add it back later
-            # Note: `@atomic const ..` isn't valid, but reconstruct it anyway to serve a nice error
-            if ei isa Symbol
-                # const var
-                push!(params_args, ei)
-                push!(call_args, ei)
-            elseif ei.head === :(=)
-                lhs = ei.args[1]
-                if lhs isa Symbol
-                    #  var = defexpr
-                    var = lhs
-                elseif lhs isa Expr && lhs.head === :(::) && lhs.args[1] isa Symbol
-                    #  var::T = defexpr
-                    var = lhs.args[1]
-                else
-                    # something else, e.g. inline inner constructor
-                    #   F(...) = ...
-                    continue
+function extract_names_and_defvals_from_kwdef_fieldblock!(block, names, defvals)
+    for (i, item) in pairs(block.args)
+        if isexpr(item, :block)
+            extract_names_and_defvals_from_kwdef_fieldblock!(item, names, defvals)
+        elseif item isa Expr && item.head in (:escape, :var"hygienic-scope")
+            n = length(names)
+            extract_names_and_defvals_from_kwdef_fieldblock!(item, names, defvals)
+            for j in n+1:length(defvals)
+                if !isnothing(defvals[j])
+                    defvals[j] = Expr(item.head, defvals[j])
                 end
-                defexpr = ei.args[2]  # defexpr
-                push!(params_args, Expr(:kw, var, esc(defexpr)))
-                push!(call_args, var)
-                lhs = is_const ? Expr(:const, lhs) : lhs
-                lhs = is_atomic ? Expr(:atomic, lhs) : lhs
-                blk.args[i] = lhs # overrides arg
-            elseif ei.head === :(::) && ei.args[1] isa Symbol
-                # var::Typ
-                var = ei.args[1]
-                push!(params_args, var)
-                push!(call_args, var)
-            elseif ei.head === :block
-                # can arise with use of @static inside type decl
-                _kwdef!(ei, params_args, call_args)
             end
+        else
+            def, name, defval = @something(def_name_defval_from_kwdef_fielddef(item), continue)
+            block.args[i] = def
+            push!(names, name)
+            push!(defvals, defval)
         end
     end
-    blk
+end
+
+function def_name_defval_from_kwdef_fielddef(kwdef)
+    if kwdef isa Symbol
+        return kwdef, kwdef, nothing
+    elseif isexpr(kwdef, :(::))
+        name, _ = kwdef.args
+        return kwdef, Meta.unescape(name), nothing
+    elseif isexpr(kwdef, :(=))
+        lhs, rhs = kwdef.args
+        def, name, _ = @something(def_name_defval_from_kwdef_fielddef(lhs), return nothing)
+        return def, name, rhs
+    elseif kwdef isa Expr && kwdef.head in (:const, :atomic)
+        def, name, defval = @something(def_name_defval_from_kwdef_fielddef(kwdef.args[1]), return nothing)
+        return Expr(kwdef.head, def), name, defval
+    elseif kwdef isa Expr && kwdef.head in (:escape, :var"hygienic-scope")
+        def, name, defval = @something(def_name_defval_from_kwdef_fielddef(kwdef.args[1]), return nothing)
+        return Expr(kwdef.head, def), name, isnothing(defval) ? defval : Expr(kwdef.head, defval)
+    end
 end
 
 # testing
 
 """
-    Base.runtests(tests=["all"]; ncores=ceil(Int, Sys.CPU_THREADS / 2),
-                  exit_on_error=false, revise=false, [seed])
+    Base.runtests(tests=["all"]; ncores=ceil(Int, Sys.EFFECTIVE_CPU_THREADS / 2),
+                  exit_on_error=false, revise=false, propagate_project=true, [seed], [julia_args::Cmd])
 
 Run the Julia unit tests listed in `tests`, which can be either a string or an array of
 strings, using `ncores` processors. If `exit_on_error` is `false`, when one test
@@ -673,13 +689,17 @@ fails, all remaining tests in other files will still be run; they are otherwise
 when `exit_on_error == true`.
 If `revise` is `true`, the `Revise` package is used to load any modifications to `Base` or
 to the standard libraries before running the tests.
+If `propagate_project` is true the current project is propagated to the test environment.
 If a seed is provided via the keyword argument, it is used to seed the
 global RNG in the context where the tests are run; otherwise the seed is chosen randomly.
+The argument `julia_args` can be used to pass custom `julia` command line flags to the test process.
 """
-function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS / 2),
+function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.EFFECTIVE_CPU_THREADS / 2),
                   exit_on_error::Bool=false,
                   revise::Bool=false,
-                  seed::Union{BitInteger,Nothing}=nothing)
+                  propagate_project::Bool=false,
+                  seed::Union{BitInteger,Nothing}=nothing,
+                  julia_args::Cmd=``)
     if isa(tests,AbstractString)
         tests = split(tests)
     end
@@ -690,17 +710,19 @@ function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS / 2),
     ENV2["JULIA_CPU_THREADS"] = "$ncores"
     pathsep = Sys.iswindows() ? ";" : ":"
     ENV2["JULIA_DEPOT_PATH"] = string(mktempdir(; cleanup = true), pathsep) # make sure the default depots can be loaded
-    delete!(ENV2, "JULIA_LOAD_PATH")
+    ENV2["JULIA_LOAD_PATH"] = string("@", pathsep, "@stdlib")
+    ENV2["JULIA_TESTS"] = "true"
     delete!(ENV2, "JULIA_PROJECT")
+    project_flag = propagate_project ? `--project` : ``
     try
-        run(setenv(`$(julia_cmd()) $(joinpath(Sys.BINDIR,
+        run(setenv(`$(julia_cmd()) $julia_args $project_flag $(joinpath(Sys.BINDIR,
             Base.DATAROOTDIR, "julia", "test", "runtests.jl")) $tests`, ENV2))
         nothing
     catch
         buf = PipeBuffer()
-        original_load_path = copy(Base.LOAD_PATH); empty!(Base.LOAD_PATH); pushfirst!(Base.LOAD_PATH, "@stdlib")
-        Base.require(Base, :InteractiveUtils).versioninfo(buf)
-        empty!(Base.LOAD_PATH); append!(Base.LOAD_PATH, original_load_path)
+        let InteractiveUtils = Base.require_stdlib(PkgId(UUID(0xb77e0a4c_d291_57a0_90e8_8db25a27a240), "InteractiveUtils"))
+            @invokelatest InteractiveUtils.versioninfo(buf)
+        end
         error("A test has failed. Please submit a bug report (https://github.com/JuliaLang/julia/issues)\n" *
               "including error messages above and the output of versioninfo():\n$(read(buf, String))")
     end
diff --git a/base/uuid.jl b/base/uuid.jl
index ff4df68ddb7c8..4b9bae863d926 100644
--- a/base/uuid.jl
+++ b/base/uuid.jl
@@ -36,6 +36,8 @@ let
     Base.hash(uuid::UUID, h::UInt) = hash(uuid_hash_seed, hash(convert(NTuple{2, UInt64}, uuid), h))
 end
 
+_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc)
+
 let
 @inline function uuid_kernel(s, i, u)
     _c = UInt32(@inbounds codeunit(s, i))
@@ -90,18 +92,18 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1]
     global string
     function string(u::UUID)
         u = u.value
-        a = Base.StringVector(36)
+        a = Base.StringMemory(36)
         for i in groupings
             @inbounds a[i] = hex_chars[1 + u & 0xf]
             u >>= 4
         end
         @inbounds a[24] = a[19] = a[14] = a[9] = '-'
-        return String(a)
+        return unsafe_takestring(a)
     end
 end
 
 print(io::IO, u::UUID) = print(io, string(u))
-show(io::IO, u::UUID) = print(io, "UUID(\"", u, "\")")
+show(io::IO, u::UUID) = print(io, UUID, "(\"", u, "\")")
 
 isless(a::UUID, b::UUID) = isless(a.value, b.value)
 
diff --git a/base/version.jl b/base/version.jl
index 67377c86a8493..5870b11c5ac18 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -9,12 +9,22 @@ const VInt = UInt32
     VersionNumber
 
 Version number type which follows the specifications of
-[semantic versioning (semver)](https://semver.org/), composed of major, minor
+[semantic versioning (semver)](https://semver.org/spec/v2.0.0-rc.2.html), composed of major, minor
 and patch numeric values, followed by pre-release and build
-alpha-numeric annotations.
+alphanumeric annotations.
+As an extension to this standard, Julia also allows a single, empty prerelease annotation
+if there is no build identifier present (e.g. `1.0-`), or a single, empty build annotation (e.g. `1.0+`).
 
 `VersionNumber` objects can be compared with all of the standard comparison
-operators (`==`, `<`, `<=`, etc.), with the result following semver rules.
+operators (`==`, `<`, `<=`, etc.), with the result following semver v2.0.0-rc.2 rules.
+Different from the semver standard, build annotations are not ignored when comparing version numbers.
+
+`VersionNumber` has the following public fields:
+- `v.major::Integer`
+- `v.minor::Integer`
+- `v.patch::Integer`
+- `v.prerelease::Tuple{Vararg{Union{Integer, AbstractString}}}`
+- `v.build::Tuple{Vararg{Union{Integer, AbstractString}}}`
 
 See also [`@v_str`](@ref) to efficiently construct `VersionNumber` objects
 from semver-format literal strings, [`VERSION`](@ref) for the `VersionNumber`
@@ -44,8 +54,7 @@ struct VersionNumber
     build::VerTuple
 
     function VersionNumber(major::VInt, minor::VInt, patch::VInt,
-            pre::VerTuple,
-            bld::VerTuple)
+                           @nospecialize(pre::VerTuple), @nospecialize(bld::VerTuple))
         major >= 0 || throw(ArgumentError("invalid negative major version: $major"))
         minor >= 0 || throw(ArgumentError("invalid negative minor version: $minor"))
         patch >= 0 || throw(ArgumentError("invalid negative patch version: $patch"))
@@ -172,7 +181,7 @@ ident_cmp(a::Integer, b::String ) = isempty(b) ? +1 : -1
 ident_cmp(a::String,  b::Integer) = isempty(a) ? -1 : +1
 ident_cmp(a::String,  b::String ) = cmp(a, b)
 
-function ident_cmp(A::VerTuple, B::VerTuple)
+function ident_cmp(@nospecialize(A::VerTuple), @nospecialize(B::VerTuple))
     for (a, b) in Iterators.Zip{Tuple{VerTuple,VerTuple}}((A, B))
         c = ident_cmp(a, b)
         (c != 0) && return c
@@ -212,7 +221,7 @@ function isless(a::VersionNumber, b::VersionNumber)
 end
 
 function hash(v::VersionNumber, h::UInt)
-    h += 0x8ff4ffdb75f9fede % UInt
+    h ⊻= 0x8ff4ffdb75f9fede % UInt
     h = hash(v.major, h)
     h = hash(v.minor, h)
     h = hash(v.patch, h)
@@ -267,58 +276,469 @@ end
 
 libllvm_path() = ccall(:jl_get_libllvm, Any, ())
 
-function banner(io::IO = stdout)
-    if GIT_VERSION_INFO.tagged_commit
-        commit_string = TAGGED_RELEASE_BANNER
-    elseif isempty(GIT_VERSION_INFO.commit)
-        commit_string = ""
+
+################
+# VersionBound #
+################
+struct VersionBound
+    t::NTuple{3, UInt32}
+    n::Int
+    function VersionBound(tin::NTuple{n, Integer}) where {n}
+        n <= 3 || throw(ArgumentError("VersionBound: you can only specify major, minor and patch versions"))
+        n == 0 && return new((0, 0, 0), n)
+        n == 1 && return new((tin[1], 0, 0), n)
+        n == 2 && return new((tin[1], tin[2], 0), n)
+        n == 3 && return new((tin[1], tin[2], tin[3]), n)
+        error("invalid $n")
+    end
+end
+VersionBound(t::Integer...) = VersionBound(t)
+VersionBound(v::VersionNumber) = VersionBound(v.major, v.minor, v.patch)
+
+Base.getindex(b::VersionBound, i::Int) = b.t[i]
+
+function ≲(v::VersionNumber, b::VersionBound)
+    b.n == 0 && return true
+    b.n == 1 && return v.major <= b[1]
+    b.n == 2 && return (v.major, v.minor) <= (b[1], b[2])
+    return (v.major, v.minor, v.patch) <= (b[1], b[2], b[3])
+end
+
+function ≲(b::VersionBound, v::VersionNumber)
+    b.n == 0 && return true
+    b.n == 1 && return v.major >= b[1]
+    b.n == 2 && return (v.major, v.minor) >= (b[1], b[2])
+    return (v.major, v.minor, v.patch) >= (b[1], b[2], b[3])
+end
+
+function isless_ll(a::VersionBound, b::VersionBound)
+    m, n = a.n, b.n
+    for i in 1:min(m, n)
+        a[i] < b[i] && return true
+        a[i] > b[i] && return false
+    end
+    return m < n
+end
+
+stricterlower(a::VersionBound, b::VersionBound) = isless_ll(a, b) ? b : a
+
+# Comparison between two upper bounds
+function isless_uu(a::VersionBound, b::VersionBound)
+    m, n = a.n, b.n
+    for i in 1:min(m, n)
+        a[i] < b[i] && return true
+        a[i] > b[i] && return false
+    end
+    return m > n
+end
+
+stricterupper(a::VersionBound, b::VersionBound) = isless_uu(a, b) ? a : b
+
+# `isjoinable` compares an upper bound of a range with the lower bound of the next range
+# to determine if they can be joined, as in [1.5-2.8, 2.5-3] -> [1.5-3]. Used by `union!`.
+# The equal-length-bounds case is special since e.g. `1.5` can be joined with `1.6`,
+# `2.3.4` can be joined with `2.3.5` etc.
+
+function isjoinable(up::VersionBound, lo::VersionBound)
+    up.n == 0 && lo.n == 0 && return true
+    if up.n == lo.n
+        n = up.n
+        for i in 1:(n - 1)
+            up[i] > lo[i] && return true
+            up[i] < lo[i] && return false
+        end
+        up[n] < lo[n] - 1 && return false
+        return true
     else
-        days = Int(floor((ccall(:jl_clock_now, Float64, ()) - GIT_VERSION_INFO.fork_master_timestamp) / (60 * 60 * 24)))
-        days = max(0, days)
-        unit = days == 1 ? "day" : "days"
-        distance = GIT_VERSION_INFO.fork_master_distance
-        commit = GIT_VERSION_INFO.commit_short
-
-        if distance == 0
-            commit_string = "Commit $(commit) ($(days) $(unit) old master)"
+        l = min(up.n, lo.n)
+        for i in 1:l
+            up[i] > lo[i] && return true
+            up[i] < lo[i] && return false
+        end
+    end
+    return true
+end
+
+Base.hash(r::VersionBound, h::UInt) = hash(r.t, hash(r.n, h))
+
+# Hot code
+function VersionBound(s::AbstractString)
+    s = strip(s)
+    s == "*" && return VersionBound()
+    first(s) == 'v' && (s = SubString(s, 2))
+    l = lastindex(s)
+
+    p = findnext('.', s, 1)
+    b = p === nothing ? l : (p - 1)
+    i = parse(Int64, SubString(s, 1, b))
+    p === nothing && return VersionBound(i)
+
+    a = p + 1
+    p = findnext('.', s, a)
+    b = p === nothing ? l : (p - 1)
+    j = parse(Int64, SubString(s, a, b))
+    p === nothing && return VersionBound(i, j)
+
+    a = p + 1
+    p = findnext('.', s, a)
+    b = p === nothing ? l : (p - 1)
+    k = parse(Int64, SubString(s, a, b))
+    p === nothing && return VersionBound(i, j, k)
+
+    error("invalid VersionBound string $(repr(s))")
+end
+
+################
+# VersionRange #
+################
+struct VersionRange
+    lower::VersionBound
+    upper::VersionBound
+    # NOTE: ranges are allowed to be empty; they are ignored by VersionSpec anyway
+    function VersionRange(lo::VersionBound, hi::VersionBound)
+        # lo.t == hi.t implies that digits past min(lo.n, hi.n) are zero
+        # lo.n < hi.n example: 1.2-1.2.0 => 1.2.0
+        # lo.n > hi.n example: 1.2.0-1.2 => 1.2
+        lo.t == hi.t && (lo = hi)
+        return new(lo, hi)
+    end
+end
+VersionRange(b::VersionBound = VersionBound()) = VersionRange(b, b)
+VersionRange(t::Integer...) = VersionRange(VersionBound(t...))
+VersionRange(v::VersionNumber) = VersionRange(VersionBound(v))
+VersionRange(lo::VersionNumber, hi::VersionNumber) = VersionRange(VersionBound(lo), VersionBound(hi))
+
+# The vast majority of VersionRanges are in practice equal to "1"
+const VersionRange_1 = VersionRange(VersionBound("1"), VersionBound("1"))
+function VersionRange(s::AbstractString)
+    s == "1" && return VersionRange_1
+    p = split(s, "-")
+    if length(p) != 1 && length(p) != 2
+        throw(ArgumentError("invalid version range: $(repr(s))"))
+    end
+    lower = VersionBound(p[1])
+    upper = length(p) == 1 ? lower : VersionBound(p[2])
+    return VersionRange(lower, upper)
+end
+
+function Base.isempty(r::VersionRange)
+    for i in 1:min(r.lower.n, r.upper.n)
+        r.lower[i] > r.upper[i] && return true
+        r.lower[i] < r.upper[i] && return false
+    end
+    return false
+end
+
+function Base.print(io::IO, r::VersionRange)
+    m, n = r.lower.n, r.upper.n
+    return if (m, n) == (0, 0)
+        print(io, '*')
+    elseif m == 0
+        print(io, "0 -")
+        join(io, r.upper.t, '.')
+    elseif n == 0
+        join(io, r.lower.t, '.')
+        print(io, " - *")
+    else
+        join(io, r.lower.t[1:m], '.')
+        if r.lower != r.upper
+            print(io, " - ")
+            join(io, r.upper.t[1:n], '.')
+        end
+    end
+end
+Base.show(io::IO, r::VersionRange) = print(io, "VersionRange(\"", r, "\")")
+
+Base.in(v::VersionNumber, r::VersionRange) = r.lower ≲ v ≲ r.upper
+
+Base.intersect(a::VersionRange, b::VersionRange) = VersionRange(stricterlower(a.lower, b.lower), stricterupper(a.upper, b.upper))
+
+function Base.union!(ranges::Vector{<:VersionRange})
+    l = length(ranges)
+    l == 0 && return ranges
+
+    sort!(ranges, lt = (a, b) -> (isless_ll(a.lower, b.lower) || (a.lower == b.lower && isless_uu(a.upper, b.upper))))
+
+    k0 = 1
+    ks = findfirst(!isempty, ranges)
+    ks === nothing && return empty!(ranges)
+
+    lo, up, k0 = ranges[ks].lower, ranges[ks].upper, 1
+    for k in (ks + 1):l
+        isempty(ranges[k]) && continue
+        lo1, up1 = ranges[k].lower, ranges[k].upper
+        if isjoinable(up, lo1)
+            isless_uu(up, up1) && (up = up1)
+            continue
+        end
+        vr = VersionRange(lo, up)
+        @assert !isempty(vr)
+        ranges[k0] = vr
+        k0 += 1
+        lo, up = lo1, up1
+    end
+    vr = VersionRange(lo, up)
+    if !isempty(vr)
+        ranges[k0] = vr
+        k0 += 1
+    end
+    resize!(ranges, k0 - 1)
+    return ranges
+end
+
+Base.minimum(r::VersionRange) = r.lower
+
+###############
+# VersionSpec #
+###############
+struct VersionSpec
+    ranges::Vector{VersionRange}
+    VersionSpec(r::Vector{<:VersionRange}) = new(length(r) == 1 ? r : union!(r))
+    VersionSpec(vs::VersionSpec) = vs
+end
+
+VersionSpec(r::VersionRange) = VersionSpec(VersionRange[r])
+VersionSpec(v::VersionNumber) = VersionSpec(VersionRange(v))
+const _all_versionsspec = VersionSpec(VersionRange())
+VersionSpec() = _all_versionsspec
+VersionSpec(s::AbstractString) = VersionSpec(VersionRange(s))
+VersionSpec(v::AbstractVector) = VersionSpec(map(VersionRange, v))
+
+# Hot code
+function Base.in(v::VersionNumber, s::VersionSpec)
+    for r in s.ranges
+        v in r && return true
+    end
+    return false
+end
+
+# Optimized batch version check for version lists
+# Fills dest[1:n] indicating which versions are in the VersionSpec
+# Optimized for sorted version lists (but works correctly even if unsorted)
+# Note: Only fills indices 1:n, leaves rest of dest unchanged
+function matches_spec_range!(dest::BitVector, versions::AbstractVector{VersionNumber}, spec::VersionSpec, n::Int)
+    @assert length(versions) == n
+    @assert length(dest) >= n
+
+    # Initialize to false
+    dest[1:n] .= false
+
+    isempty(spec.ranges) && return dest
+
+    # Assumes versions are sorted (as created in Operations.jl:1002)
+    # If sorted, this avoids O(n*m) comparisons by scanning linearly
+    @inbounds for range in spec.ranges
+        # Find first version that could be in range
+        i = 1
+        while i <= n && !(range.lower ≲ versions[i])
+            i += 1
+        end
+
+        # Mark all versions in range
+        while i <= n && versions[i] ≲ range.upper
+            dest[i] = true
+            i += 1
+        end
+    end
+
+    return dest
+end
+
+Base.copy(vs::VersionSpec) = VersionSpec(vs)
+
+const empty_versionspec = VersionSpec(VersionRange[])
+const _empty_symbol = "∅"
+
+Base.isempty(s::VersionSpec) = all(isempty, s.ranges)
+@assert isempty(empty_versionspec)
+# Hot code, measure performance before changing
+function Base.intersect(A::VersionSpec, B::VersionSpec)
+    (isempty(A) || isempty(B)) && return copy(empty_versionspec)
+    ranges = Vector{VersionRange}(undef, length(A.ranges) * length(B.ranges))
+    i = 1
+    @inbounds for a in A.ranges, b in B.ranges
+        ranges[i] = intersect(a, b)
+        i += 1
+    end
+    return VersionSpec(ranges)
+end
+Base.intersect(a::VersionNumber, B::VersionSpec) = a in B ? VersionSpec(a) : empty_versionspec
+Base.intersect(A::VersionSpec, b::VersionNumber) = intersect(b, A)
+
+function Base.union(A::VersionSpec, B::VersionSpec)
+    A == B && return A
+    Ar = copy(A.ranges)
+    append!(Ar, B.ranges)
+    union!(Ar)
+    return VersionSpec(Ar)
+end
+
+Base.:(==)(A::VersionSpec, B::VersionSpec) = A.ranges == B.ranges
+Base.hash(s::VersionSpec, h::UInt) = hash(s.ranges, h + (0x2fd2ca6efa023f44 % UInt))
+
+function Base.print(io::IO, s::VersionSpec)
+    isempty(s) && return print(io, _empty_symbol)
+    length(s.ranges) == 1 && return print(io, s.ranges[1])
+    print(io, '[')
+    for i in 1:length(s.ranges)
+        1 < i && print(io, ", ")
+        print(io, s.ranges[i])
+    end
+    return print(io, ']')
+end
+
+function Base.show(io::IO, s::VersionSpec)
+    print(io, "VersionSpec(")
+    if length(s.ranges) == 1
+        print(io, '"', s.ranges[1], '"')
+    else
+        print(io, "[")
+        for i in 1:length(s.ranges)
+            1 < i && print(io, ", ")
+            print(io, '"', s.ranges[i], '"')
+        end
+        print(io, ']')
+    end
+    return print(io, ")")
+end
+
+Base.minimum(v::VersionSpec) = minimum(v.ranges[1])
+
+###################
+# Semver notation #
+###################
+
+function semver_spec(s::String; throw = true)
+    ranges = VersionRange[]
+    for ver in strip.(split(strip(s), ','))
+        range = nothing
+        found_match = false
+        for (ver_reg, f) in ver_regs
+            if occursin(ver_reg, ver)
+                range = f(match(ver_reg, ver))
+                found_match = true
+                break
+            end
+        end
+        if !found_match
+            if throw
+                error("invalid version specifier: \"$s\"")
+            else
+                return nothing
+            end
+        end
+        push!(ranges, range)
+    end
+    return VersionSpec(ranges)
+end
+
+function semver_interval(m::RegexMatch)
+    @assert length(m.captures) == 4
+    n_significant = count(x -> x !== nothing, m.captures) - 1
+    typ, _major, _minor, _patch = m.captures
+    major = parse(Int, _major)
+    minor = (n_significant < 2) ? 0 : parse(Int, _minor)
+    patch = (n_significant < 3) ? 0 : parse(Int, _patch)
+    if n_significant == 3 && major == 0 && minor == 0 && patch == 0
+        error("invalid version: \"0.0.0\"")
+    end
+    # Default type is :caret
+    vertyp = (typ == "" || typ == "^") ? :caret : :tilde
+    v0 = VersionBound((major, minor, patch))
+    return if vertyp === :caret
+        if major != 0
+            return VersionRange(v0, VersionBound((v0[1],)))
+        elseif minor != 0
+            return VersionRange(v0, VersionBound((v0[1], v0[2])))
+        else
+            if n_significant == 1
+                return VersionRange(v0, VersionBound((0,)))
+            elseif n_significant == 2
+                return VersionRange(v0, VersionBound((0, 0)))
+            else
+                return VersionRange(v0, VersionBound((0, 0, v0[3])))
+            end
+        end
+    else
+        if n_significant == 3 || n_significant == 2
+            return VersionRange(v0, VersionBound((v0[1], v0[2])))
         else
-            branch = GIT_VERSION_INFO.branch
-            commit_string = "$(branch)/$(commit) (fork: $(distance) commits, $(days) $(unit))"
+            return VersionRange(v0, VersionBound((v0[1],)))
         end
     end
+end
 
-    commit_date = isempty(Base.GIT_VERSION_INFO.date_string) ? "" : " ($(split(Base.GIT_VERSION_INFO.date_string)[1]))"
-
-    if get(io, :color, false)::Bool
-        c = text_colors
-        tx = c[:normal] # text
-        jl = c[:normal] # julia
-        d1 = c[:bold] * c[:blue]    # first dot
-        d2 = c[:bold] * c[:red]     # second dot
-        d3 = c[:bold] * c[:green]   # third dot
-        d4 = c[:bold] * c[:magenta] # fourth dot
-
-        print(io,"""               $(d3)_$(tx)
-           $(d1)_$(tx)       $(jl)_$(tx) $(d2)_$(d3)(_)$(d4)_$(tx)     |  Documentation: https://docs.julialang.org
-          $(d1)(_)$(jl)     | $(d2)(_)$(tx) $(d4)(_)$(tx)    |
-           $(jl)_ _   _| |_  __ _$(tx)   |  Type \"?\" for help, \"]?\" for Pkg help.
-          $(jl)| | | | | | |/ _` |$(tx)  |
-          $(jl)| | |_| | | | (_| |$(tx)  |  Version $(VERSION)$(commit_date)
-         $(jl)_/ |\\__'_|_|_|\\__'_|$(tx)  |  $(commit_string)
-        $(jl)|__/$(tx)                   |
-
-        """)
+const _inf = VersionBound("*")
+function inequality_interval(m::RegexMatch)
+    @assert length(m.captures) == 4
+    typ, _major, _minor, _patch = m.captures
+    n_significant = count(x -> x !== nothing, m.captures) - 1
+    major = parse(Int, _major)
+    minor = (n_significant < 2) ? 0 : parse(Int, _minor)
+    patch = (n_significant < 3) ? 0 : parse(Int, _patch)
+    if n_significant == 3 && major == 0 && minor == 0 && patch == 0
+        error("invalid version: 0.0.0")
+    end
+    v = VersionBound(major, minor, patch)
+    if occursin(r"^<\s*$", typ)
+        nil = VersionBound(0, 0, 0)
+        if v[3] == 0
+            if v[2] == 0
+                v1 = VersionBound(v[1] - 1)
+            else
+                v1 = VersionBound(v[1], v[2] - 1)
+            end
+        else
+            v1 = VersionBound(v[1], v[2], v[3] - 1)
+        end
+        return VersionRange(nil, v1)
+    elseif occursin(r"^=\s*$", typ)
+        return VersionRange(v)
+    elseif occursin(r"^>=\s*$", typ) || occursin(r"^≥\s*$", typ)
+        return VersionRange(v, _inf)
     else
-        print(io,"""
-                       _
-           _       _ _(_)_     |  Documentation: https://docs.julialang.org
-          (_)     | (_) (_)    |
-           _ _   _| |_  __ _   |  Type \"?\" for help, \"]?\" for Pkg help.
-          | | | | | | |/ _` |  |
-          | | |_| | | | (_| |  |  Version $(VERSION)$(commit_date)
-         _/ |\\__'_|_|_|\\__'_|  |  $(commit_string)
-        |__/                   |
-
-        """)
+        error("invalid prefix $typ")
     end
 end
+
+function hyphen_interval(m::RegexMatch)
+    @assert length(m.captures) == 6
+    _lower_major, _lower_minor, _lower_patch, _upper_major, _upper_minor, _upper_patch = m.captures
+    if isnothing(_lower_minor)
+        lower_bound = VersionBound(parse(Int, _lower_major))
+    elseif isnothing(_lower_patch)
+        lower_bound = VersionBound(
+            parse(Int, _lower_major),
+            parse(Int, _lower_minor)
+        )
+    else
+        lower_bound = VersionBound(
+            parse(Int, _lower_major),
+            parse(Int, _lower_minor),
+            parse(Int, _lower_patch)
+        )
+    end
+    if isnothing(_upper_minor)
+        upper_bound = VersionBound(parse(Int, _upper_major))
+    elseif isnothing(_upper_patch)
+        upper_bound = VersionBound(
+            parse(Int, _upper_major),
+            parse(Int, _upper_minor)
+        )
+    else
+        upper_bound = VersionBound(
+            parse(Int, _upper_major),
+            parse(Int, _upper_minor),
+            parse(Int, _upper_patch)
+        )
+    end
+    return VersionRange(lower_bound, upper_bound)
+end
+
+const version = "v?([0-9]+?)(?:\\.([0-9]+?))?(?:\\.([0-9]+?))?"
+const ver_regs =
+    Pair{Regex, Any}[
+    Regex("^([~^]?)?$version\$") => semver_interval, # 0.5 ^0.4 ~0.3.2
+    Regex("^((?:≥\\s*)|(?:>=\\s*)|(?:=\\s*)|(?:<\\s*)|(?:=\\s*))v?$version\$") => inequality_interval, # < 0.2 >= 0.5,2
+    Regex("^[\\s]*$version[\\s]*?\\s-\\s[\\s]*?$version[\\s]*\$") => hyphen_interval, # 0.7 - 1.3
+]
diff --git a/base/version_git.sh b/base/version_git.sh
index 76092e9800594..b88fbcf04d0d1 100644
--- a/base/version_git.sh
+++ b/base/version_git.sh
@@ -6,7 +6,7 @@
 echo "# This file was autogenerated by base/version_git.sh"
 echo "struct GitVersionInfo"
 echo "    commit::String"
-echo "    commit_short::String"
+echo "    commit_short_raw::String"
 echo "    branch::String"
 echo "    build_number::Int"
 echo "    date_string::String"
@@ -17,6 +17,24 @@ echo "    build_system_commit::String"
 echo "    build_system_commit_short::String"
 echo "end"
 echo ""
+echo "function Base.getproperty(info::GitVersionInfo, s::Symbol)"
+echo "    if s === :commit_short"
+echo "        commit = getfield(info, :commit_short_raw)"
+echo "        dirty_file = joinpath(Sys.BINDIR, Base.DATAROOTDIR, \"julia\", \"base\", \"version_git_dirty\")"
+echo "        dirty_str = try"
+echo "            read(dirty_file, String)"
+echo "        catch"
+echo "            \"\""
+echo "        end"
+echo "        if strip(dirty_str) == \"true\""
+echo "            return commit * \"*\""
+echo "        end"
+echo "        return commit"
+echo "    else"
+echo "        return getfield(info, s)"
+echo "    end"
+echo "end"
+echo ""
 
 cd $1
 
@@ -38,8 +56,9 @@ git_time=$(git log -1 --pretty=format:%ct)
 commit=$(git rev-parse HEAD)
 commit_short=$(git rev-parse --short HEAD)
 if [ -n "$(git status --porcelain)" ]; then
-    # append dirty mark '*' if the repository has uncommitted changes
-    commit_short="$commit_short"*
+    dirty="true"
+else
+    dirty="false"
 fi
 
 # Our CI system checks commits out as a detached head, and so we must
@@ -117,3 +136,7 @@ echo "    $fork_master_timestamp.0,"
 echo "    \"$build_system_commit\","
 echo "    \"$build_system_commit_short\","
 echo ")"
+
+# Write dirty status to a separate file to avoid triggering rebuilds
+# when only the dirty status changes
+echo "$dirty" > version_git_dirty
diff --git a/base/views.jl b/base/views.jl
index 70d4c1d9110ee..d205b1b38dee4 100644
--- a/base/views.jl
+++ b/base/views.jl
@@ -14,64 +14,149 @@ should transform to
     A[B[lastindex(B)]]
 
 """
-replace_ref_begin_end!(ex) = replace_ref_begin_end_!(ex, nothing)[1]
-# replace_ref_begin_end_!(ex,withex) returns (new ex, whether withex was used)
-function replace_ref_begin_end_!(ex, withex)
+replace_ref_begin_end!(__module__::Module, @nospecialize ex) = replace_ref_begin_end_!(__module__, ex, nothing, false, 0)[1]
+# replace_ref_begin_end_!(...) returns (new ex, whether withex was used)
+function replace_ref_begin_end_!(__module__::Module, ex, withex, in_quote_context::Bool, escs::Int)
+    @nospecialize
     used_withex = false
-    if isa(ex,Symbol)
-        if ex === :begin
-            withex === nothing && error("Invalid use of begin")
-            return withex[1], true
-        elseif ex === :end
-            withex === nothing && error("Invalid use of end")
-            return withex[2], true
+    function escapes(@nospecialize(ex), escs::Int)
+        for i = 1:escs
+            ex = esc(ex)
         end
-    elseif isa(ex,Expr)
-        if ex.head === :ref
-            ex.args[1], used_withex = replace_ref_begin_end_!(ex.args[1], withex)
-            S = isa(ex.args[1],Symbol) ? ex.args[1]::Symbol : gensym(:S) # temp var to cache ex.args[1] if needed
-            used_S = false # whether we actually need S
-            # new :ref, so redefine withex
-            nargs = length(ex.args)-1
-            if nargs == 0
-                return ex, used_withex
-            elseif nargs == 1
-                # replace with lastindex(S)
-                ex.args[2], used_S = replace_ref_begin_end_!(ex.args[2], (:($firstindex($S)),:($lastindex($S))))
-            else
-                n = 1
-                J = lastindex(ex.args)
-                for j = 2:J
-                    exj, used = replace_ref_begin_end_!(ex.args[j], (:($firstindex($S,$n)),:($lastindex($S,$n))))
-                    used_S |= used
-                    ex.args[j] = exj
-                    if isa(exj,Expr) && exj.head === :...
-                        # splatted object
-                        exjs = exj.args[1]
-                        n = :($n + length($exjs))
-                    elseif isa(n, Expr)
-                        # previous expression splatted
-                        n = :($n + 1)
-                    else
-                        # an integer
-                        n += 1
+        return ex
+    end
+    function handle_refexpr!(__module__::Module, ref_ex::Expr, main_ex::Expr, withex, in_quote_context, escs::Int)
+        @assert !in_quote_context
+        local used_withex
+        ref_ex.args[1], used_withex = replace_ref_begin_end_!(__module__, ref_ex.args[1], withex, in_quote_context, escs)
+        S = gensym(:S) # temp var to cache ex.args[1] if needed. if S is a global or expression, then it has side effects to use
+        assignments = []
+        used_S = false # whether we actually need S
+        # new :ref, so redefine withex
+        nargs = length(ref_ex.args) - 1
+        if nargs == 0
+            return main_ex, used_withex
+        elseif nargs == 1
+            # replace with lastindex(S)
+            ref_ex.args[2], used_S = replace_ref_begin_end_!(__module__, ref_ex.args[2], (:($firstindex($S)),:($lastindex($S))), in_quote_context, escs)
+        else
+            ni = 1
+            nx = 0
+            J = nargs + 1
+            need_temps = false # whether any arg needs temporaries
+
+            # First pass: determine if any argument will needs temporaries
+            for j = 2:J
+                exj = ref_ex.args[j]
+                if isexpr(exj, :...)
+                    need_temps = true
+                    break
+                end
+            end
+
+            # Second pass: if any need temps, create temps for all args
+            temp_vars = Tuple{Int,Symbol}[]
+            for j = 2:J
+                n = nx === 0 ? ni : :($nx + $ni)
+                exj, used = replace_ref_begin_end_!(__module__, ref_ex.args[j], (:($firstindex($S,$n)),:($lastindex($S,$n))), in_quote_context, escs)
+                used_S |= used
+                ref_ex.args[j] = exj
+                ni += 1
+                if need_temps
+                    isva = isexpr(exj, :...) # implied need_temps
+                    if isva
+                        exj = exj.args[1]
+                    end
+                    if isa_ast_node(exj) # create temp to preserve evaluation order and count in case `used` gets set later
+                        exj = gensym(:arg)
+                        push!(temp_vars, (j, exj))
+                    end
+                    if isva
+                        ni -= 1
+                        nx = nx === 0 ? :(length($exj)) : :($nx + length($exj))
                     end
                 end
             end
-            if used_S && S !== ex.args[1]
-                S0 = ex.args[1]
-                ex.args[1] = S
-                ex = Expr(:let, :($S = $S0), ex)
+
+            # Third pass: if `used`, need to actually make those temp assignments now
+            if used_S
+                for (j, temp_var) in temp_vars
+                    exj = ref_ex.args[j]
+                    isva = isexpr(exj, :...) # implied need_temps
+                    if isva
+                        exj = exj.args[1]
+                    end
+                    push!(assignments, :(local $temp_var = $exj))
+                    ref_ex.args[j] = isva ? Expr(:..., temp_var) : temp_var
+                end
             end
-        else
-            # recursive search
+        end
+
+        if used_S
+            S0 = ref_ex.args[1]
+            S = escapes(S, escs)
+            ref_ex.args[1] = S
+            main_ex = :(local $S = $S0; $(assignments...); $main_ex)
+        end
+        return main_ex, used_withex
+    end
+    if ex isa Expr && ex.head === :macrocall
+        # Blithly modifying the arguments to another macro is unwise, so call
+        # macroexpand first on it.
+        # Unfortunately, macroexpand itself corrupts the scope of variables in
+        # the result by calling macroexpand.scm before returning which cannot be
+        # avoided since `jl_expand_macros` is private and somewhat difficult to
+        # reimplement correctly.
+        ex = macroexpand(__module__, ex)
+    end
+    if isa(ex,Symbol)
+        if !in_quote_context
+            if ex === :begin
+                withex === nothing && error("Invalid use of begin outside []")
+                return escapes((withex::NTuple{2,Expr})[1], escs), true
+            elseif ex === :end
+                withex === nothing && error("Invalid use of end outside []")
+                return escapes((withex::NTuple{2,Expr})[2], escs), true
+            end
+        end
+    elseif isa(ex,Expr)
+        if !in_quote_context && ex.head === :ref # n.b. macroexpand.scm design is incapable of tracking :begin and :end scope, so emulate that here too and ignore escs
+            return handle_refexpr!(__module__, ex, ex, withex, in_quote_context, escs)
+        elseif ex.head === :$
+            # no longer an executable expression (handle all equivalent forms of :inert, :quote, and QuoteNode the same way)
+            in_quote_context = false
+        elseif ex.head === :quote
+            # executable again
+            in_quote_context = true
+        elseif ex.head === :var"hygienic-scope"
+            # no longer our expression
+            escs += 1
+        elseif ex.head === :escape
+            # our expression again once zero
+            escs == 0 && return ex, used_withex
+            escs -= 1
+        elseif ex.head === :meta || ex.head === :inert
+            return ex, used_withex
+        elseif !in_quote_context && last(string(ex.head)) == '=' && Meta.isexpr(ex.args[1], :ref)
             for i = eachindex(ex.args)
-                ex.args[i], used = replace_ref_begin_end_!(ex.args[i], withex)
+                if i == 1
+                    # we'll deal with the ref expression later
+                    continue
+                end
+                ex.args[i], used = replace_ref_begin_end_!(__module__, ex.args[i], withex, in_quote_context, escs)
                 used_withex |= used
             end
+            ex, used = handle_refexpr!(__module__, ex.args[1]::Expr, ex, withex, in_quote_context, escs)
+            used_withex |= used
+            return ex, used_withex
+        end
+        # recursive search
+        for i = eachindex(ex.args)
+            ex.args[i], used = replace_ref_begin_end_!(__module__, ex.args[i], withex, in_quote_context, escs)
+            used_withex |= used
         end
     end
-    ex, used_withex
+    return ex, used_withex
 end
 
 """
@@ -123,20 +208,23 @@ julia> A
 ```
 """
 macro view(ex)
+    Meta.isexpr(ex, :ref) || throw(ArgumentError(
+        "Invalid use of @view macro: argument must be a reference expression A[...]."))
+    ex = replace_ref_begin_end!(__module__, ex)
+    # NOTE We embed `view` as a function object itself directly into the AST.
+    #      By doing this, we prevent the creation of function definitions like
+    #      `view(A, idx) = xxx` in cases such as `@view(A[idx]) = xxx.`
     if Meta.isexpr(ex, :ref)
-        ex = replace_ref_begin_end!(ex)
-        if Meta.isexpr(ex, :ref)
-            ex = Expr(:call, view, ex.args...)
-        else # ex replaced by let ...; foo[...]; end
-            if !(Meta.isexpr(ex, :let) && Meta.isexpr(ex.args[2], :ref))
-                error("invalid expression")
-            end
-            ex.args[2] = Expr(:call, view, ex.args[2].args...)
-        end
-        Expr(:&&, true, esc(ex))
+        ex = Expr(:call, view, ex.args...)
+    elseif Meta.isexpr(ex, :block)
+        arg2 = ex.args[end]
+        Meta.isexpr(arg2, :ref) || error("unsupported replace_ref_begin_end result")
+        # ex replaced by let ...; foo[...]; end
+        ex.args[end] = Expr(:call, view, arg2.args...)
     else
-        throw(ArgumentError("Invalid use of @view macro: argument must be a reference expression A[...]."))
+        error("unsupported replace_ref_begin_end result")
     end
+    return esc(ex)
 end
 
 ############################################################################
@@ -175,10 +263,7 @@ function _views(ex::Expr)
 
             # temp vars to avoid recomputing a and i,
             # which will be assigned in a let block:
-            a = gensym(:a)
-            i = let lhs=lhs     # #15276
-                [gensym(:i) for k = 1:length(lhs.args)-1]
-            end
+            i = Symbol[Symbol(:i, k) for k = 1:length(lhs.args)-1]
 
             # for splatted indices like a[i, j...], we need to
             # splat the corresponding temp var.
@@ -193,14 +278,15 @@ function _views(ex::Expr)
                 end
             end
 
-            Expr(:let,
-                 Expr(:block,
-                      :($a = $(_views(lhs.args[1]))),
-                      Any[:($(i[k]) = $(_views(lhs.args[k+1]))) for k=1:length(i)]...),
-                 Expr(first(h) == '.' ? :(.=) : :(=), :($a[$(I...)]),
-                      Expr(:call, Symbol(h[1:end-1]),
-                           :($maybeview($a, $(I...))),
-                           mapany(_views, ex.args[2:end])...)))
+            Expr(:var"hygienic-scope", # assign a and i to the macro's scope
+                 Expr(:let,
+                      Expr(:block,
+                           :(a = $(esc(_views(lhs.args[1])))),
+                           Any[:($(i[k]) = $(esc(_views(lhs.args[k+1])))) for k=1:length(i)]...),
+                      Expr(first(h) == '.' ? :(.=) : :(=), :(a[$(I...)]),
+                           Expr(:call, esc(Symbol(h[1:end-1])),
+                                :($maybeview(a, $(I...))),
+                                mapany(e -> esc(_views(e)), ex.args[2:end])...))), Base)
         else
             exprarray(ex.head, mapany(_views, ex.args))
         end
@@ -224,16 +310,16 @@ Similarly, `@views` converts string slices into [`SubString`](@ref) views.
     occurs in functions called by that code.
 
 !!! compat "Julia 1.5"
-    Using `begin` in an indexing expression to refer to the first index requires at least
-    Julia 1.5.
+    Using `begin` in an indexing expression to refer to the first index was implemented
+    in Julia 1.4, but was only supported by `@views` starting in Julia 1.5.
 
 # Examples
 ```jldoctest
 julia> A = zeros(3, 3);
 
 julia> @views for row in 1:3
-           b = A[row, :]
-           b[:] .= row
+           b = A[row, :] # b is a view, not a copy
+           b .= row      # assign every element to the row index
        end
 
 julia> A
@@ -244,5 +330,5 @@ julia> A
 ```
 """
 macro views(x)
-    esc(_views(replace_ref_begin_end!(x)))
+    esc(_views(replace_ref_begin_end!(__module__, x)))
 end
diff --git a/base/weakkeydict.jl b/base/weakkeydict.jl
index 328f368c80b71..1283dc9cbc8cb 100644
--- a/base/weakkeydict.jl
+++ b/base/weakkeydict.jl
@@ -2,6 +2,12 @@
 
 # weak key dictionaries
 
+mutable struct WeakKeyDictFinalizer{T}
+    const d::T
+end
+(d::WeakKeyDictFinalizer)(k) = d.d.dirty = true
+
+
 """
     WeakKeyDict([itr])
 
@@ -16,15 +22,15 @@ object was unreferenced anywhere before insertion.
 See also [`WeakRef`](@ref).
 """
 mutable struct WeakKeyDict{K,V} <: AbstractDict{K,V}
-    ht::Dict{WeakRef,V}
-    lock::ReentrantLock
-    finalizer::Function
+    const ht::Dict{WeakRef,V}
+    const lock::ReentrantLock
     dirty::Bool
+    finalizer::WeakKeyDictFinalizer
 
     # Constructors mirror Dict's
-    function WeakKeyDict{K,V}() where V where K
-        t = new(Dict{WeakRef,V}(), ReentrantLock(), identity, 0)
-        t.finalizer = k -> t.dirty = true
+    function WeakKeyDict{K,V}() where {K, V}
+        t = new{K,V}(Dict{WeakRef,V}(), ReentrantLock(), false)
+        t.finalizer = WeakKeyDictFinalizer(t)
         return t
     end
 end
@@ -54,17 +60,7 @@ WeakKeyDict(ps::Pair{K}...)             where {K}   = WeakKeyDict{K,Any}(ps)
 WeakKeyDict(ps::(Pair{K,V} where K)...) where {V}   = WeakKeyDict{Any,V}(ps)
 WeakKeyDict(ps::Pair...)                            = WeakKeyDict{Any,Any}(ps)
 
-function WeakKeyDict(kv)
-    try
-        Base.dict_with_eltype((K, V) -> WeakKeyDict{K, V}, kv, eltype(kv))
-    catch
-        if !isiterable(typeof(kv)) || !all(x->isa(x,Union{Tuple,Pair}),kv)
-            throw(ArgumentError("WeakKeyDict(kv): kv needs to be an iterator of tuples or pairs"))
-        else
-            rethrow()
-        end
-    end
-end
+WeakKeyDict(kv) = Base.dict_with_eltype((K, V) -> WeakKeyDict{K, V}, kv, eltype(kv))
 
 function _cleanup_locked(h::WeakKeyDict)
     if h.dirty
@@ -80,7 +76,7 @@ function _cleanup_locked(h::WeakKeyDict)
     return h
 end
 
-sizehint!(d::WeakKeyDict, newsz) = sizehint!(d.ht, newsz)
+sizehint!(d::WeakKeyDict, newsz::Integer; shrink::Bool = true) = @lock d sizehint!(d.ht, newsz; shrink = shrink)
 empty(d::WeakKeyDict, ::Type{K}, ::Type{V}) where {K, V} = WeakKeyDict{K, V}()
 
 IteratorSize(::Type{<:WeakKeyDict}) = SizeUnknown()
@@ -213,4 +209,6 @@ function iterate(t::WeakKeyDict{K,V}, state...) where {K, V}
     end
 end
 
+@propagate_inbounds Iterators.only(d::WeakKeyDict) = Iterators._only(d, first)
+
 filter!(f, d::WeakKeyDict) = filter_in_one_pass!(f, d)
diff --git a/cli/Makefile b/cli/Makefile
index b6a2b48ebf044..bc24c9a69f012 100644
--- a/cli/Makefile
+++ b/cli/Makefile
@@ -17,19 +17,19 @@ LOADER_CFLAGS += -DGLIBCXX_LEAST_VERSION_SYMBOL=\"$(shell echo "$(CSL_NEXT_GLIBC
 endif
 
 ifeq ($(OS),WINNT)
-LOADER_LDFLAGS += -municode -mconsole -nostdlib --disable-auto-import \
-                  --disable-runtime-pseudo-reloc -lntdll -lkernel32 -lpsapi
+LOADER_LDFLAGS += -municode -mconsole -nostdlib -lntdll -lkernel32 -lpsapi
 else ifeq ($(OS),Linux)
-LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
+# textoff and notext are aliases to the same option which suppress the TEXTREL warning for i686
+LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed -Wl,-z,notext
 else ifeq ($(OS),FreeBSD)
 LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
-else ifeq ($(OS),Darwin)
-LOADER_LDFLAGS += -lSystem
+else ifeq ($(OS),OpenBSD)
+LOADER_LDFLAGS += -Wl,--no-as-needed -lpthread -rdynamic -lc -Wl,--as-needed
 endif
 
 # Build list of dependent libraries that must be opened
-SHIPFLAGS  += -DDEP_LIBS="\"$(LOADER_BUILD_DEP_LIBS)\""
-DEBUGFLAGS += -DDEP_LIBS="\"$(LOADER_DEBUG_BUILD_DEP_LIBS)\""
+SHIPFLAGS  += -DDEP_LIBS=$(call shell_escape,$(call c_escape,$(LOADER_BUILD_DEP_LIBS)))
+DEBUGFLAGS += -DDEP_LIBS=$(call shell_escape,$(call c_escape,$(LOADER_DEBUG_BUILD_DEP_LIBS)))
 ifneq (,$(findstring MINGW,$(shell uname)))
 # In MSYS2, do not perform path conversion for `DEP_LIBS`.
 # https://www.msys2.org/wiki/Porting/#filesystem-namespaces
@@ -39,6 +39,11 @@ $(BUILDDIR)/loader_lib.o: export MSYS2_ARG_CONV_EXCL = -DDEP_LIBS=
 $(BUILDDIR)/loader_lib.dbg.obj: export MSYS2_ARG_CONV_EXCL = -DDEP_LIBS=
 endif # MSYS2
 
+ifeq ($(USE_RT_STATIC_LIBSTDCXX),1)
+SHIPFLAGS += -DRT_STATIC_LIBSTDCXX
+DEBUGFLAGS += -DRT_STATIC_LIBSTDCXX
+endif # USE_RT_STATIC_LIBSTDCXX
+
 EXE_OBJS := $(BUILDDIR)/loader_exe.o
 EXE_DOBJS := $(BUILDDIR)/loader_exe.dbg.obj
 LIB_OBJS := $(BUILDDIR)/loader_lib.o
@@ -47,7 +52,7 @@ LIB_DOBJS := $(BUILDDIR)/loader_lib.dbg.obj
 # If this is an architecture that supports dynamic linking, link in a trampoline definition
 ifneq (,$(wildcard $(SRCDIR)/trampolines/trampolines_$(ARCH).S))
 LIB_OBJS += $(BUILDDIR)/loader_trampolines.o
-LIB_DOBJS += $(BUILDDIR)/loader_trampolines.o
+LIB_DOBJS += $(BUILDDIR)/loader_trampolines.dbg.obj
 endif
 
 default: release
@@ -64,6 +69,8 @@ $(BUILDDIR)/loader_exe.dbg.obj : $(SRCDIR)/loader_exe.c $(HEADERS) $(JULIAHOME)/
 	@$(call PRINT_CC, $(CC) $(DEBUGFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
 $(BUILDDIR)/loader_trampolines.o : $(SRCDIR)/trampolines/trampolines_$(ARCH).S $(HEADERS) $(SRCDIR)/trampolines/common.h
 	@$(call PRINT_CC, $(CC) $(SHIPFLAGS) $(LOADER_CFLAGS) $< -c -o $@)
+$(BUILDDIR)/loader_trampolines.dbg.obj : $(SRCDIR)/trampolines/trampolines_$(ARCH).S $(HEADERS) $(SRCDIR)/trampolines/common.h
+	@$(call PRINT_CC, $(CC) $(DEBUGFLAGS) $(LOADER_CFLAGS) $< -c -o $@)
 
 # Debugging target to help us see what kind of code is being generated for our trampolines
 dump-trampolines: $(SRCDIR)/trampolines/trampolines_$(ARCH).S
@@ -104,7 +111,7 @@ julia-debug: $(build_bindir)/julia-debug$(EXE)
 libjulia-release: $(build_shlibdir)/libjulia.$(SHLIB_EXT)
 libjulia-debug: $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT)
 
-ifneq (,$(filter $(OS), Linux FreeBSD))
+ifneq (,$(filter $(OS), Linux FreeBSD OpenBSD))
 VERSIONSCRIPT := -Wl,--version-script=$(BUILDDIR)/julia.expmap
 endif
 
@@ -114,7 +121,7 @@ STRIP_EXPORTED_FUNCS := $(shell $(CPP_STDOUT) -I$(JULIAHOME)/src $(SRCDIR)/list_
 endif
 
 $(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) $(RPATH_LIB) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia.$(JL_MAJOR_SHLIB_EXT) $@
 	@$(DSYMUTIL) $@
@@ -125,7 +132,7 @@ ifeq ($(OS), WINNT)
 endif
 
 $(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) $(RPATH_LIB) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-debug.$(JL_MAJOR_SHLIB_EXT) $@
 	@$(DSYMUTIL) $@
@@ -148,7 +155,7 @@ $(build_bindir)/julia$(EXE): $(EXE_OBJS) $(build_shlibdir)/libjulia.$(SHLIB_EXT)
 $(build_bindir)/julia-debug$(EXE): $(EXE_DOBJS) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) | $(build_bindir)
 	@$(call PRINT_LINK, $(CC) $(LOADER_CFLAGS) $(DEBUGFLAGS) $(EXE_DOBJS) -o $@ $(LOADER_LDFLAGS) $(RPATH) -ljulia-debug)
 
-$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION
 	sed <'$<' >'$@' -e 's/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/'
 
 clean: | $(CLEAN_TARGETS)
diff --git a/cli/README.md b/cli/README.md
index 4021aceb7d839..5a4ecc0a6fc2b 100644
--- a/cli/README.md
+++ b/cli/README.md
@@ -4,9 +4,9 @@ This directory contains the code used by the Julia loader, implementing the piec
 This loader comprises the `julia` executable and the `libjulia` library, which are responsible for setting things up such that `libjulia-internal` and any other internal dependencies can be reliably loaded.
 The code is organized in three pieces:
 
-* `loader_exe.c` gets built into the main `julia` executable.  It immediately loads `libjulia`.
-* `loader_lib.c` gets built into the main `libjulia` shared library.  This is the main entrypoint for the Julia runtime loading process, which occurs within `jl_load_repl()`.
-* `trampolines/*.S`, which contains assembly definitions for symbol forwarding trampolines.  These are used to allow `libjulia` to re-export symbols such that a C linker can use `libjulia` directly for embedding usecases.
+* `loader_exe.c` gets built into the main `julia` executable. It immediately loads `libjulia`.
+* `loader_lib.c` gets built into the main `libjulia` shared library. This is the main entrypoint for the Julia runtime loading process, which occurs within `jl_load_repl()`.
+* `trampolines/*.S`, which contains assembly definitions for symbol forwarding trampolines. These are used to allow `libjulia` to re-export symbols such that a C linker can use `libjulia` directly for embedding usecases.
 
 The main requirements of the loader are as follows:
 
diff --git a/cli/jl_exports.h b/cli/jl_exports.h
index d28958c097edb..1334a5d9c63a2 100644
--- a/cli/jl_exports.h
+++ b/cli/jl_exports.h
@@ -7,8 +7,9 @@
 #include "../src/jl_exported_funcs.inc"
 
 // Define pointer data as `const void * $(name);`
-#define XX(name)    JL_DLLEXPORT const void * name;
+#define XX(name, type)    JL_DLLEXPORT const void * jl_##name;
 JL_EXPORTED_DATA_POINTERS(XX)
+JL_CONST_GLOBAL_VARS(XX)
 #undef XX
 
 // Define symbol data as `$(type) $(name);`
@@ -18,7 +19,7 @@ JL_EXPORTED_DATA_SYMBOLS(XX)
 
 // define a copy of exported data
 #define jl_max_tags 64
-JL_DLLEXPORT void *small_typeof[(jl_max_tags << 4) / sizeof(void*)]; // 16-bit aligned, like the GC
+JL_DLLEXPORT void *jl_small_typeof[(jl_max_tags << 4) / sizeof(void*)]; // 16-bit aligned, like the GC
 
 // Declare list of exported functions (sans type)
 #define XX(name)    JL_DLLEXPORT void name(void);
diff --git a/cli/loader.h b/cli/loader.h
index b778976cee495..310226c84f815 100644
--- a/cli/loader.h
+++ b/cli/loader.h
@@ -5,24 +5,6 @@
 #include "../src/support/dirpath.h"
 #include "../src/julia_fasttls.h"
 
-#ifdef _OS_WINDOWS_
-/* We need to reimplement a bunch of standard library stuff on windows,
- * but we want to make sure that it doesn't conflict with the actual implementations
- * once those get linked into this process. */
-#define fwrite loader_fwrite
-#define fputs loader_fputs
-#define exit loader_exit
-#define strlen loader_strlen
-#define wcslen loader_wcslen
-#define strncat loader_strncat
-#define memcpy loader_memcpy
-#define dirname loader_dirname
-#define strchr loader_strchr
-#define malloc loader_malloc
-#define realloc loader_realloc
-#define free loader_free
-#endif
-
 #ifdef _OS_WINDOWS_
 
 #define WIN32_LEAN_AND_MEAN
@@ -54,9 +36,9 @@
 // Borrow definition from `support/dtypes.h`
 #ifdef _OS_WINDOWS_
 # ifdef JL_LIBRARY_EXPORTS
-#  define JL_DLLEXPORT __declspec(dllexport)
+#  define JL_DLLEXPORT __declspec(dllexport) __attribute__ ((visibility("default")))
 # endif
-#  define JL_DLLIMPORT __declspec(dllimport)
+#  define JL_DLLIMPORT __declspec(dllimport) __attribute__ ((visibility("default")))
 #define JL_HIDDEN
 #else
 # define JL_DLLIMPORT __attribute__ ((visibility("default")))
diff --git a/cli/loader_lib.c b/cli/loader_lib.c
index 12feed0c508a0..4a8ffaf95d92e 100644
--- a/cli/loader_lib.c
+++ b/cli/loader_lib.c
@@ -125,6 +125,32 @@ static void * lookup_symbol(const void * lib_handle, const char * symbol_name) {
 #endif
 }
 
+#if defined(_OS_WINDOWS_)
+void win32_formatmessage(DWORD code, char *reason, int len) {
+    DWORD res;
+    LPWSTR errmsg;
+    res = FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                         FORMAT_MESSAGE_FROM_SYSTEM |
+                         FORMAT_MESSAGE_IGNORE_INSERTS |
+                         FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                         NULL, code,
+                         MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US),
+                         (LPWSTR)&errmsg, 0, NULL);
+    if (!res && (GetLastError() == ERROR_MUI_FILE_NOT_FOUND ||
+                 GetLastError() == ERROR_RESOURCE_TYPE_NOT_FOUND)) {
+      res = FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                           FORMAT_MESSAGE_FROM_SYSTEM |
+                           FORMAT_MESSAGE_IGNORE_INSERTS |
+                           FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                           NULL, code,
+                           0, (LPWSTR)&errmsg, 0, NULL);
+    }
+    res = WideCharToMultiByte(CP_UTF8, 0, errmsg, -1, reason, len, NULL, NULL);
+    reason[len - 1] = '\0';
+    LocalFree(errmsg);
+}
+#endif
+
 // Find the location of libjulia.
 char *lib_dir = NULL;
 JL_DLLEXPORT const char * jl_get_libdir()
@@ -135,21 +161,21 @@ JL_DLLEXPORT const char * jl_get_libdir()
     }
 #if defined(_OS_WINDOWS_)
     // On Windows, we use GetModuleFileNameW
-    wchar_t *libjulia_path = utf8_to_wchar(LIBJULIA_NAME);
     HMODULE libjulia = NULL;
 
-    // Get a handle to libjulia.
-    if (!libjulia_path) {
-        jl_loader_print_stderr3("ERROR: Unable to convert path ", LIBJULIA_NAME, " to wide string!\n");
-        exit(1);
-    }
-    libjulia = LoadLibraryW(libjulia_path);
-    if (libjulia == NULL) {
-        jl_loader_print_stderr3("ERROR: Unable to load ", LIBJULIA_NAME, "!\n");
+    // Get a handle to libjulia
+    if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                            (LPCWSTR)jl_get_libdir, &libjulia)) {
+        DWORD err = GetLastError();
+        jl_loader_print_stderr3("ERROR: could not locate library \"", LIBJULIA_NAME, "\"\n");
+
+        char msg[2048];
+        win32_formatmessage(err, msg, sizeof(msg));
+        jl_loader_print_stderr(msg);
         exit(1);
     }
-    free(libjulia_path);
-    libjulia_path = (wchar_t*)malloc(32768 * sizeof(wchar_t)); // max long path length
+
+    wchar_t *libjulia_path = (wchar_t*)malloc(32768 * sizeof(wchar_t)); // max long path length
     if (!GetModuleFileNameW(libjulia, libjulia_path, 32768)) {
         jl_loader_print_stderr("ERROR: GetModuleFileName() failed\n");
         exit(1);
@@ -281,6 +307,7 @@ static char *libstdcxxprobe(void)
         // See if the version is compatible
         char *dlerr = dlerror(); // clear out dlerror
         void *sym = dlsym(handle, GLIBCXX_LEAST_VERSION_SYMBOL);
+        (void)sym;
         dlerr = dlerror();
         if (dlerr) {
             // We can't use the library that was found, so don't write anything.
@@ -322,10 +349,16 @@ static char *libstdcxxprobe(void)
             pid_t npid = waitpid(pid, &wstatus, 0);
             if (npid == -1) {
                 if (errno == EINTR) continue;
-                if (errno != EINTR) {
-                    perror("Error during libstdcxxprobe in parent process:\nwaitpid");
-                    exit(1);
+                if (errno == ECHILD) {
+                    // SIGCHLD is set to SIG_IGN or has flag SA_NOCLDWAIT, so the child
+                    // did not become a zombie and wait for `waitpid` - it just exited.
+                    //
+                    // Assume that it exited successfully and use whatever libpath we
+                    // got out of the pipe, if any.
+                    break;
                 }
+                perror("Error during libstdcxxprobe in parent process:\nwaitpid");
+                exit(1);
             }
             else if (!WIFEXITED(wstatus)) {
                 const char *err_str = "Error during libstdcxxprobe in parent process:\n"
@@ -375,7 +408,6 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
     const char *lib_dir = jl_get_libdir();
 
     // Pre-load libraries that libjulia-internal needs.
-    int deps_len = strlen(&dep_libs[1]);
     char *curr_dep = &dep_libs[1];
 
     // We keep track of "special" libraries names (ones whose name is prefixed with `@`)
@@ -451,6 +483,7 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
                     char *cxxpath = libstdcxxprobe();
                     if (cxxpath) {
                         void *cxx_handle = dlopen(cxxpath, RTLD_LAZY);
+                        (void)cxx_handle;
                         const char *dlr = dlerror();
                         if (dlr) {
                             jl_loader_print_stderr("ERROR: Unable to dlopen(cxxpath) in parent!\n");
@@ -464,7 +497,13 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
                 // If the probe rejected the system libstdc++ (or didn't find one!)
                 // just load our bundled libstdc++ as identified by curr_dep;
                 if (!probe_successful) {
+# ifdef RT_STATIC_LIBSTDCXX
+                    // If we have a statically-linked libstdc++, it is ok for
+                    // this to fail.
+                    load_library(curr_dep, lib_dir, 0);
+# else
                     load_library(curr_dep, lib_dir, 1);
+# endif
                 }
 #endif
             } else if (special_idx == 1) {
@@ -519,7 +558,7 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
         (*jl_codegen_exported_func_addrs[symbol_idx]) = addr;
     }
     // Next, if we're on Linux/FreeBSD, set up fast TLS.
-#if !defined(_OS_WINDOWS_) && !defined(_OS_DARWIN_)
+#if !defined(_OS_WINDOWS_) && !defined(_OS_OPENBSD_)
     void (*jl_pgcstack_setkey)(void*, void*(*)(void)) = lookup_symbol(libjulia_internal, "jl_pgcstack_setkey");
     if (jl_pgcstack_setkey == NULL) {
         jl_loader_print_stderr("ERROR: Cannot find jl_pgcstack_setkey() function within libjulia-internal!\n");
diff --git a/cli/loader_win_utils.c b/cli/loader_win_utils.c
index 2c3c826b08369..34fe277fb2879 100644
--- a/cli/loader_win_utils.c
+++ b/cli/loader_win_utils.c
@@ -12,7 +12,11 @@ static FILE _stderr = { INVALID_HANDLE_VALUE };
 FILE *stdout = &_stdout;
 FILE *stderr = &_stderr;
 
-int loader_fwrite(const char *str, size_t nchars, FILE *out) {
+void JL_HIDDEN free(void* mem) {
+    HeapFree(GetProcessHeap(), 0, mem);
+}
+
+int JL_HIDDEN fwrite(const char *str, size_t nchars, FILE *out) {
     DWORD written;
     if (out->isconsole) {
         // Windows consoles do not support UTF-8 (for reading input, though new Windows Terminal does for writing), only UTF-16.
@@ -20,10 +24,10 @@ int loader_fwrite(const char *str, size_t nchars, FILE *out) {
         if (!wstr)
             return -1;
         if (WriteConsoleW(out->fd, wstr, wcslen(wstr), &written, NULL)) {
-            loader_free(wstr);
+            free(wstr);
             return written;
         }
-        loader_free(wstr);
+        free(wstr);
     } else {
         // However, we want to print UTF-8 if the output is a file.
         if (WriteFile(out->fd, str, nchars, &written, NULL))
@@ -32,22 +36,18 @@ int loader_fwrite(const char *str, size_t nchars, FILE *out) {
     return -1;
 }
 
-int loader_fputs(const char *str, FILE *out) {
-    return loader_fwrite(str, loader_strlen(str), out);
+int JL_HIDDEN fputs(const char *str, FILE *out) {
+    return fwrite(str, strlen(str), out);
 }
 
-void * loader_malloc(const size_t size) {
+void JL_HIDDEN *malloc(const size_t size) {
     return HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, size);
 }
 
-void * loader_realloc(void * mem, const size_t size) {
+void JL_HIDDEN *realloc(void * mem, const size_t size) {
     return HeapReAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, mem, size);
 }
 
-void loader_free(void* mem) {
-    HeapFree(GetProcessHeap(), 0, mem);
-}
-
 LPWSTR *CommandLineToArgv(LPWSTR lpCmdLine, int *pNumArgs) {
     LPWSTR out = lpCmdLine;
     LPWSTR cmd = out;
@@ -110,7 +110,7 @@ void setup_stdio() {
     _stderr.isconsole = GetConsoleMode(_stderr.fd, &mode);
 }
 
-void loader_exit(int code) {
+void JL_HIDDEN exit(int code) {
     ExitProcess(code);
 }
 
@@ -148,21 +148,21 @@ wchar_t *utf8_to_wchar(const char * str) {
     return wstr;
 }
 
-size_t loader_strlen(const char * x) {
+size_t JL_HIDDEN strlen(const char * x) {
     int idx = 0;
     while (x[idx] != 0)
         idx++;
     return idx;
 }
 
-size_t loader_wcslen(const wchar_t * x) {
+size_t JL_HIDDEN wcslen(const wchar_t * x) {
     int idx = 0;
     while (x[idx] != 0)
         idx++;
     return idx;
 }
 
-char * loader_strncat(char * base, const char * tail, size_t maxlen) {
+char JL_HIDDEN *strncat(char * base, const char * tail, size_t maxlen) {
     int base_len = strlen(base);
     int tail_len = strlen(tail);
     for (int idx=base_len; idx<min(maxlen, base_len + tail_len); ++idx) {
@@ -171,14 +171,21 @@ char * loader_strncat(char * base, const char * tail, size_t maxlen) {
     return base;
 }
 
-void * loader_memcpy(void * dest, const void * src, size_t len) {
+void JL_HIDDEN *memcpy(void * dest, const void * src, size_t len) {
     for (int idx=0; idx<len; ++idx) {
         ((char *)dest)[idx] = ((const char *)src)[idx];
     }
     return dest;
 }
 
-char * loader_dirname(char * x) {
+void JL_HIDDEN *memset(void *s, int c, size_t n) {
+  unsigned char* p = s;
+  while(n--)
+    *p++ = (unsigned char)c;
+  return s;
+}
+
+char JL_HIDDEN *dirname(char * x) {
     int idx = strlen(x);
     while (idx > 0 && x[idx] != PATHSEPSTRING[0]) {
         idx -= 1;
@@ -198,7 +205,7 @@ char * loader_dirname(char * x) {
     return x;
 }
 
-char * loader_strchr(const char * haystack, int needle) {
+char JL_HIDDEN *strchr(const char * haystack, int needle) {
     int idx=0;
     while (haystack[idx] != needle) {
         if (haystack[idx] == 0) {
diff --git a/cli/trampolines/trampolines_aarch64.S b/cli/trampolines/trampolines_aarch64.S
index 2d87ae6dcdb1c..ccb9a647ac6c3 100644
--- a/cli/trampolines/trampolines_aarch64.S
+++ b/cli/trampolines/trampolines_aarch64.S
@@ -5,9 +5,9 @@
 
 #define XX(name) \
 .global CNAME(name) SEP \
+CNAME(name)##: SEP \
 .cfi_startproc SEP \
 .p2align    2 SEP \
-CNAME(name)##: SEP \
     adrp x16, PAGE(CNAME(name##_addr)) SEP \
     ldr x16, [x16, PAGEOFF(CNAME(name##_addr))] SEP \
     br x16 SEP \
diff --git a/cli/trampolines/trampolines_i686.S b/cli/trampolines/trampolines_i686.S
index 3d9cacf0ce652..f6c46fd6ee49b 100644
--- a/cli/trampolines/trampolines_i686.S
+++ b/cli/trampolines/trampolines_i686.S
@@ -3,13 +3,41 @@
 #include "common.h"
 #include "../../src/jl_exported_funcs.inc"
 
+// set this option to 1 to get very slightly slower trampolines which however do not trigger
+// this linker warning:
+//   ld: ./loader_trampolines.o: warning: relocation against `jl_***_addr' in read-only section `.text'
+//   ld: warning: creating DT_TEXTREL in a shared object
+// If you have a large libjulia.so file or other restrictions on using TEXTREL for some
+// reason, this may be worthwhile.
+// This is not relevant on Windows (though it is valid there), since it always uses
+// DT_TEXTREL anyways, and does not support this notion of PIC.
+#define USE_PC32 0
+
+#if USE_PC32
+.cfi_startproc
+julia__x86.get_pc_thunk.ax:
+    mov    (%esp),%eax
+    ret
+.cfi_endproc
+
+#define CALL(name) \
+    call julia__x86.get_pc_thunk.ax; \
+    jmpl *(CNAMEADDR(name) - .)(%eax); \
+
+#else
+
+#define CALL(name) \
+    jmpl *(CNAMEADDR(name)); \
+
+#endif
+
 #define XX(name) \
 DEBUGINFO(CNAME(name)); \
 .global CNAME(name); \
 .cfi_startproc; \
 CNAME(name)##:; \
     CET_START(); \
-    jmpl *(CNAMEADDR(name)); \
+    CALL(name); \
     ud2; \
 .cfi_endproc; \
 EXPORT(name); \
diff --git a/cli/trampolines/trampolines_riscv64.S b/cli/trampolines/trampolines_riscv64.S
new file mode 100644
index 0000000000000..26307b7c2bb36
--- /dev/null
+++ b/cli/trampolines/trampolines_riscv64.S
@@ -0,0 +1,20 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "common.h"
+#include "../../src/jl_exported_funcs.inc"
+
+#define SEP ;
+
+#define XX(name) \
+.global CNAME(name) SEP \
+.cfi_startproc SEP \
+.p2align    2 SEP \
+ CNAME(name)##: SEP \
+    auipc t3, %pcrel_hi(CNAMEADDR(name)) SEP \
+    ld t3, %pcrel_lo(CNAME(name))(t3) SEP \
+    jr t3 SEP \
+.cfi_endproc SEP \
+
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+JL_CODEGEN_EXPORTED_FUNCS(XX)
+#undef XX
diff --git a/cli/trampolines/trampolines_x86_64.S b/cli/trampolines/trampolines_x86_64.S
index 3b800da56eee1..fcc8e40e1ddc9 100644
--- a/cli/trampolines/trampolines_x86_64.S
+++ b/cli/trampolines/trampolines_x86_64.S
@@ -6,9 +6,9 @@
 #define XX(name) \
 DEBUGINFO(name); \
 .global CNAME(name); \
+CNAME(name)##:; \
 .cfi_startproc; \
 SEH_START1(name); \
-CNAME(name)##:; \
 SEH_START2(); \
     CET_START(); \
     mov CNAMEADDR(name)(%rip),%r11; \
diff --git a/contrib/asan/Make.user.asan b/contrib/asan/Make.user.asan
index 96ed13b54e0f9..1ad8d3c8fb1f7 100644
--- a/contrib/asan/Make.user.asan
+++ b/contrib/asan/Make.user.asan
@@ -3,9 +3,9 @@ BINDIR=$(TOOLCHAIN)/usr/bin
 TOOLDIR=$(TOOLCHAIN)/usr/tools
 
 # use our new toolchain
-USECLANG=1
 override CC=$(TOOLDIR)/clang
 override CXX=$(TOOLDIR)/clang++
+override PATCHELF=$(TOOLDIR)/patchelf
 export ASAN_SYMBOLIZER_PATH=$(TOOLDIR)/llvm-symbolizer
 
 USE_BINARYBUILDER_LLVM=1
@@ -16,9 +16,6 @@ override SANITIZE_ADDRESS=1
 # make the GC use regular malloc/frees, which are hooked by ASAN
 override WITH_GC_DEBUG_ENV=1
 
-# default to a debug build for better line number reporting
-override JULIA_BUILD_MODE=debug
-
 # Enable Julia assertions and LLVM assertions
 FORCE_ASSERTIONS=1
 LLVM_ASSERTIONS=1
diff --git a/contrib/asan/build.sh b/contrib/asan/build.sh
index 77f3078b35c42..2e7f243772c81 100755
--- a/contrib/asan/build.sh
+++ b/contrib/asan/build.sh
@@ -40,7 +40,7 @@ if [ ! -d "$TOOLCHAIN" ]; then
     cp "$HERE/Make.user.tools"  "$TOOLCHAIN/Make.user"
 fi
 
-make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools
+make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools install-patchelf
 
 echo
 echo "Building Julia..."
diff --git a/contrib/bolt/.gitignore b/contrib/bolt/.gitignore
new file mode 100644
index 0000000000000..921d429130268
--- /dev/null
+++ b/contrib/bolt/.gitignore
@@ -0,0 +1,10 @@
+profiles-bolt*
+optimized.build
+toolchain
+
+bolt
+bolt_instrument
+merge_data
+copy_originals
+stage0
+stage1
diff --git a/contrib/bolt/Makefile b/contrib/bolt/Makefile
new file mode 100644
index 0000000000000..76833b9865020
--- /dev/null
+++ b/contrib/bolt/Makefile
@@ -0,0 +1,136 @@
+.PHONY: clean clean_profiles restore_originals
+
+# Settings taken from https://github.com/rust-lang/rust/blob/master/src/tools/opt-dist/src/bolt.rs
+BOLT_ARGS :=
+# Reorder basic blocks within functions
+BOLT_ARGS += -reorder-blocks=ext-tsp
+# Reorder functions within the binary
+BOLT_ARGS += -reorder-functions=cdsort
+# Split function code into hot and code regions
+BOLT_ARGS += -split-functions
+# Split as many basic blocks as possible
+BOLT_ARGS += -split-all-cold
+# Move jump tables to a separate section
+BOLT_ARGS += -jump-tables=move
+# Use regular size pages for code alignment
+BOLT_ARGS += -no-huge-pages
+# Fold functions with identical code
+BOLT_ARGS += -icf=1
+# Split using best available strategy (three-way splitting, Cache-Directed Sort)
+# Disabled for libjulia-internal till https://github.com/llvm/llvm-project/issues/89508 is fixed
+# BOLT_ARGS += -split-strategy=cdsplit
+# Update DWARF debug info in the final binary
+BOLT_ARGS += -update-debug-sections
+# Print optimization statistics
+BOLT_ARGS += -dyno-stats
+# BOLT doesn't fully support computed gotos, https://github.com/llvm/llvm-project/issues/89117
+# Use escaped regex as the name BOLT recognises is often a bit different, e.g. apply_cl/1(*2)
+# This doesn't actually seem to do anything, the actual mitigation is not using --use-old-text
+# which we do in the bolt target
+BOLT_ARGS += -skip-funcs=.\*apply_cl.\*
+
+# -fno-reorder-blocks-and-partition is needed on gcc >= 8.
+BOLT_FLAGS := $\
+	"BOLT_CFLAGS_GCC+=-fno-reorder-blocks-and-partition" $\
+	"BOLT_LDFLAGS=-Wl,--emit-relocs"
+
+STAGE0_BUILD:=$(CURDIR)/toolchain
+STAGE1_BUILD:=$(CURDIR)/optimized.build
+
+STAGE0_BINARIES:=$(STAGE0_BUILD)/usr/bin/
+
+PROFILE_DIR:=$(CURDIR)/profiles-bolt
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_BOLT:=$(STAGE0_BINARIES)llvm-bolt
+LLVM_MERGEFDATA:=$(STAGE0_BINARIES)merge-fdata
+
+# If you add new files to optimize, you need to add BOLT_LDFLAGS and BOLT_CFLAGS to the build of your new file.
+SYMLINKS_TO_OPTIMIZE := libLLVM.so libjulia-internal.so libjulia-codegen.so
+FILES_TO_OPTIMIZE := $(shell for file in $(SYMLINKS_TO_OPTIMIZE); do readlink $(STAGE1_BUILD)/usr/lib/$$file; done)
+
+AFTER_INSTRUMENT_MESSAGE:='Run `make finish_stage1` to finish off the build. $\
+	You can now optionally collect more profiling data by running Julia with an appropriate workload, $\
+	if you wish, run `make clean_profiles` before doing so to remove any profiling data generated by `make finish_stage1`. $\
+	You should end up with some data in $(PROFILE_DIR). Afterwards run `make merge_data && make bolt`.'
+
+$(STAGE0_BUILD) $(STAGE1_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: | $(STAGE0_BUILD)
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-BOLT && \
+	touch $@
+
+# Build with our custom flags, binary builder doesn't use them so we need to build LLVM for now.
+# We manually skip package image creation so that we can profile it
+$(STAGE1_BUILD): stage0
+stage1: export USE_BINARYBUILDER_LLVM=0
+stage1: | $(STAGE1_BUILD)
+	$(MAKE) -C $(STAGE1_BUILD) $(BOLT_FLAGS) julia-src-release julia-symlink julia-libccalltest \
+								julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest && \
+	touch $@
+
+copy_originals: stage1
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		cp $$abs_file "$$abs_file.original"; \
+	done && \
+	touch $@
+
+# I don't think there's any particular reason to have -no-huge-pages here, perhaps slightly more accurate profile data
+# as the final build uses -no-huge-pages
+# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
+bolt_instrument: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		old_time=$$(stat -c %Y $$abs_file); \
+		$(LLVM_BOLT) "$$abs_file.original" -o $$abs_file --instrument --instrumentation-file-append-pid --instrumentation-file="$(PROFILE_DIR)/$$file-prof" -no-huge-pages; \
+		mkdir -p $$(dirname "$(PROFILE_DIR)/$$file-prof"); \
+		touch -d "@$$old_time" $$abs_file; \
+		printf "\n"; \
+	done && \
+	touch $@
+	@echo $(AFTER_INSTRUMENT_MESSAGE)
+
+finish_stage1: stage1
+	$(MAKE) -C $(STAGE1_BUILD)
+
+merge_data: bolt_instrument
+	for file in $(FILES_TO_OPTIMIZE); do \
+		profiles=$(PROFILE_DIR)/$$file-prof.*.fdata; \
+		$(LLVM_MERGEFDATA) $$profiles > "$(PROFILE_DIR)/$$file-prof.merged.fdata"; \
+	done && \
+	touch $@
+
+# The --use-old-text saves about 16 MiB of libLLVM.so size.
+# However, the rust folk found it succeeds very non-deterministically for them.
+# It tries to reuse old text segments to reduce binary size
+# BOLT doesn't fully support computed gotos https://github.com/llvm/llvm-project/issues/89117, so we cannot use --use-old-text on libjulia-internal
+# That flag saves less than 1 MiB for libjulia-internal so oh well.
+# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
+bolt: merge_data
+	for file in $(FILES_TO_OPTIMIZE); do \
+        abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		old_time=$$(stat -c %Y $$abs_file); \
+		$(LLVM_BOLT) "$$abs_file.original" -data "$(PROFILE_DIR)/$$file-prof.merged.fdata" -o $$abs_file $(BOLT_ARGS) $$(if [ "$$file" != $(shell readlink $(STAGE1_BUILD)/usr/lib/libjulia-internal.so) ]; then echo "--use-old-text -split-strategy=cdsplit"; fi); \
+		touch -d "@$$old_time" $$abs_file; \
+    done && \
+    touch $@
+
+clean_profiles:
+	rm -rf $(PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 bolt copy_originals merge_data bolt_instrument
+
+restore_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		cp -P "$$abs_file.original" $$abs_file; \
+	done
+
+delete_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		rm "$$abs_file.original"; \
+	done
diff --git a/contrib/bolt/README.md b/contrib/bolt/README.md
new file mode 100644
index 0000000000000..8680939ef6276
--- /dev/null
+++ b/contrib/bolt/README.md
@@ -0,0 +1,17 @@
+BOLT only works on x86_64 and arch64 on Linux.
+
+DO NOT STRIP THE RESULTING .so FILES, https://github.com/llvm/llvm-project/issues/56738.
+If you really need to, try adding `-use-gnu-stack` to `BOLT_ARGS`.
+
+To build a BOLT-optimized version of Julia run the following commands (`cd` into this directory first)
+```bash
+make stage1
+make copy_originals
+make bolt_instrument
+make finish_stage1
+make merge_data
+make bolt
+```
+After these commands finish, the optimized version of Julia will be built in the `optimized.build` directory.
+
+This doesn't align the code to support huge pages as it doesn't seem that we do that currently, this decreases the size of the .so files by 2-4mb.
diff --git a/contrib/check-whitespace.jl b/contrib/check-whitespace.jl
index d5473ab4c7c62..d7e04512e153d 100755
--- a/contrib/check-whitespace.jl
+++ b/contrib/check-whitespace.jl
@@ -18,51 +18,94 @@ const patterns = split("""
     *Makefile
 """)
 
+const is_gha = something(tryparse(Bool, get(ENV, "GITHUB_ACTIONS", "false")), false)
+
+# Note: `git ls-files` gives `/` as a path separator on Windows,
+#   so we just use `/` for all platforms.
 allow_tabs(path) =
     path == "Make.inc" ||
     endswith(path, "Makefile") ||
     endswith(path, ".make") ||
     endswith(path, ".mk") ||
-    startswith(path, joinpath("src", "support")) ||
-    startswith(path, joinpath("src", "flisp")) ||
-    endswith(path, joinpath("test", "syntax.jl")) ||
-    endswith(path, joinpath("test", "triplequote.jl"))
+    startswith(path, "src/support") ||
+    startswith(path, "src/flisp") ||
+    endswith(path, "test/syntax.jl") ||
+    endswith(path, "test/triplequote.jl")
 
-const errors = Set{Tuple{String,Int,String}}()
+function check_whitespace()
+    # Get file list from ARGS if provided, otherwise use git ls-files
+    errors = Set{Tuple{String,Int,String}}()
+    files_to_check = filter(arg -> arg != "--fix", ARGS)
+    if isempty(files_to_check)
+        files_to_check = eachline(`git ls-files -- $patterns`)
+    end
 
-for path in eachline(`git ls-files -- $patterns`)
-    lineno = 0
-    non_blank = 0
+    files_fixed = 0
+    if "--fix" in ARGS
+        for path in files_to_check
+            content = newcontent = read(path, String)
+            isempty(content) && continue
+            if !allow_tabs(path)
+                tabpattern = r"^([ \t]+)"m => (x -> replace(x, r"((?: {4})*)( *\t)" => s"\1    ")) # Replace tab sequences at start of line after any number of 4-space groups
+                newcontent = replace(newcontent, tabpattern)
+            end
+            newcontent = replace(newcontent,
+                r"\s*$" => '\n',                # Remove trailing whitespace and normalize line ending at eof
+                r"\s*?[\r\n]" => '\n',          # Remove trailing whitespace and normalize line endings on each line
+                r"\xa0" => ' '                  # Replace non-breaking spaces
+            )
+            if content != newcontent
+                write(path, newcontent)
+                files_fixed += 1
+            end
+        end
+        if files_fixed > 0
+            println(stderr, "Fixed whitespace issues in $files_fixed files.")
+        end
+    end
 
-    file_err(msg) = push!(errors, (path, 0, msg))
-    line_err(msg) = push!(errors, (path, lineno, msg))
+    for path in files_to_check
+        lineno = 0
+        non_blank = 0
 
-    isfile(path) || continue
-    for line in eachline(path, keep=true)
-        lineno += 1
-        contains(line, '\r')   && file_err("non-UNIX line endings")
-        contains(line, '\ua0') && line_err("non-breaking space")
-        allow_tabs(path) ||
-        contains(line, '\t')   && line_err("tab")
-        endswith(line, '\n')   || line_err("no trailing newline")
-        line = chomp(line)
-        endswith(line, r"\s")  && line_err("trailing whitespace")
-        contains(line, r"\S")  && (non_blank = lineno)
+        file_err(msg) = push!(errors, (path, 0, msg))
+        line_err(msg) = push!(errors, (path, lineno, msg))
+
+        isfile(path) || continue
+        for line in eachline(path, keep=true)
+            lineno += 1
+            contains(line, '\r')   && file_err("non-UNIX line endings")
+            contains(line, '\ua0') && line_err("non-breaking space")
+            allow_tabs(path) ||
+                contains(line, '\t')   && line_err("tab")
+            endswith(line, '\n')   || line_err("no trailing newline")
+            line = chomp(line)
+            endswith(line, r"\s")  && line_err("trailing whitespace")
+            contains(line, r"\S")  && (non_blank = lineno)
+        end
+        non_blank < lineno         && line_err("trailing blank lines")
     end
-    non_blank < lineno         && line_err("trailing blank lines")
-end
 
-if isempty(errors)
-    println(stderr, "Whitespace check found no issues.")
-    exit(0)
-else
-    println(stderr, "Whitespace check found $(length(errors)) issues:")
-    for (path, lineno, msg) in sort!(collect(errors))
-        if lineno == 0
-            println(stderr, "$path -- $msg")
-        else
-            println(stderr, "$path:$lineno -- $msg")
+    if isempty(errors)
+        println(stderr, "Whitespace check found no issues.")
+        exit(0)
+    else
+        println(stderr, "Whitespace check found $(length(errors)) issues:")
+        for (path, lineno, msg) in sort!(collect(errors))
+            if lineno == 0
+                println(stderr, "$path -- $msg")
+                if is_gha
+                    println(stdout, "::warning title=Whitespace check,file=", path, "::", msg)
+                end
+            else
+                println(stderr, "$path:$lineno -- $msg")
+                if is_gha
+                    println(stdout, "::warning title=Whitespace check,file=", path, ",line=", lineno, "::", msg)
+                end
+            end
         end
+        exit(1)
     end
-    exit(1)
 end
+
+check_whitespace()
diff --git a/contrib/download_cmake.sh b/contrib/download_cmake.sh
index 1deeb08ddded2..5cf3c579ed052 100755
--- a/contrib/download_cmake.sh
+++ b/contrib/download_cmake.sh
@@ -8,17 +8,17 @@ mkdir -p "$(dirname "$0")"/../deps/scratch
 cd "$(dirname "$0")"/../deps/scratch
 
 CMAKE_VERSION_MAJOR=3
-CMAKE_VERSION_MINOR=19
-CMAKE_VERSION_PATCH=3
+CMAKE_VERSION_MINOR=30
+CMAKE_VERSION_PATCH=1
 CMAKE_VERSION_MAJMIN=$CMAKE_VERSION_MAJOR.$CMAKE_VERSION_MINOR
 CMAKE_VERSION=$CMAKE_VERSION_MAJMIN.$CMAKE_VERSION_PATCH
 
 # listed at https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/cmake-$CMAKE_VERSION-SHA-256.txt
 # for the files cmake-$CMAKE_VERSION-macos-universal.tar.gz
 # cmake-$CMAKE_VERSION-Linux-x86_64.tar.gz and cmake-$CMAKE_VERSION-Linux-aarch64.tar.gz
-CMAKE_SHA256_DARWIN=a6b79ad05f89241a05797510e650354d74ff72cc988981cdd1eb2b3b2bda66ac
-CMAKE_SHA256_LINUX_X86_64=c18b65697e9679e5c88dccede08c323cd3d3730648e59048047bba82097e0ffc
-CMAKE_SHA256_LINUX_AARCH64=66e507c97ffb586d7ca6567890808b792c8eb004b645706df6fbf27826a395a2
+CMAKE_SHA256_DARWIN=51e12618829b811bba6f033ee8f39f6192da1b6abb20d82a7899d5134e879a4c
+CMAKE_SHA256_LINUX_X86_64=ac31f077ef3378641fa25a3cb980d21b2f083982d3149a8f2eb9154f2b53696b
+CMAKE_SHA256_LINUX_AARCH64=ad234996f8750f11d7bd0d17b03f55c434816adf1f1671aab9e8bab21a43286a
 
 PLATFORM="$(uname)-$(uname -m)"
 case $PLATFORM in
@@ -28,12 +28,12 @@ case $PLATFORM in
     echo "$CMAKE_SHA256_DARWIN  $FULLNAME.tar.gz" | shasum -a 256 -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/CMake.app/Contents/bin/cmake;;
   Linux-x86_64)
-    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
+    FULLNAME=cmake-$CMAKE_VERSION-linux-x86_64
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
     echo "$CMAKE_SHA256_LINUX_X86_64  $FULLNAME.tar.gz" | sha256sum -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
   Linux-aarch64)
-    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
+    FULLNAME=cmake-$CMAKE_VERSION-linux-aarch64
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
     echo "$CMAKE_SHA256_LINUX_AARCH64  $FULLNAME.tar.gz" | sha256sum -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
diff --git a/contrib/escape_json.sh b/contrib/escape_json.sh
new file mode 100755
index 0000000000000..06e08d39ad57e
--- /dev/null
+++ b/contrib/escape_json.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# print arguments escaped as json list elements
+
+first=true
+for n in "$@"; do
+  $first && first=false || printf ', '
+  n="${n//\\/\\\\}"
+  n="${n//\"/\\\"}"
+  printf '"%s"' "$n"
+done
diff --git a/contrib/excise_stdlib.sh b/contrib/excise_stdlib.sh
new file mode 100755
index 0000000000000..3da9ff437bf83
--- /dev/null
+++ b/contrib/excise_stdlib.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# Uses https://github.com/newren/git-filter-repo
+# Recommended use of `Github cli`
+
+set -e
+set -f
+set -x
+
+if [ -z "$*" ]; then echo "Expected name of stdlib"; fi
+
+STDLIB=$1
+WORKDIR=$(mktemp -d)
+
+echo "Excising stdlib $STDLIB; workdir $WORKDIR"
+pushd $WORKDIR
+git clone https://github.com/JuliaLang/julia $STDLIB
+pushd $STDLIB
+
+echo "Filtering repo"
+git filter-repo --subdirectory-filter stdlib/$STDLIB --path LICENSE.md \
+    --message-callback 'return re.sub(b"(\W)(#\d+)", lambda m: m.group(1) + b"JuliaLang/julia" + m.group(2), message)'
+
+
+echo "Deleting branches"
+git branch -l | grep -v release- | grep -v master | xargs git branch -v -D
+
+popd
+popd
+echo "Done! Inspect the result and push it!"
+echo """
+      cd $WORKDIR/$STDLIB
+      gh repo create JuliaLang/$STDLIB.jl --push --source=. --public
+      git push --all
+      git push --tags"""
+
+echo """
+     Remember to:
+     1. Add a README.md
+     2. Setup GHA or similar for CI
+     """
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index fea4ca6bc1fe3..d925933314481 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Prevent this from putting anyting into the Main namespace
-@eval Module() begin
+# Prevent this from putting anything into the Main namespace
+@eval Base module __precompile_script
 
 if Threads.maxthreadid() != 1
     @warn "Running this file with multiple Julia threads may lead to a build error" Threads.maxthreadid()
@@ -12,8 +12,6 @@ Sys.__init_build()
 if !isdefined(Base, :uv_eventloop)
     Base.reinit_stdio()
 end
-Base.include(@__MODULE__, joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testhelpers", "FakePTYs.jl"))
-import .FakePTYs: open_fake_pty
 using Base.Meta
 
 ## Debugging options
@@ -33,6 +31,62 @@ UP_ARROW = "\e[A"
 DOWN_ARROW = "\e[B"
 
 hardcoded_precompile_statements = """
+precompile(Base.unsafe_string, (Ptr{UInt8},))
+precompile(Base.unsafe_string, (Ptr{Int8},))
+
+# used by REPL
+precompile(Tuple{typeof(Base.getproperty), Base.Terminals.TTYTerminal, Symbol})
+precompile(Tuple{typeof(Base.reseteof), Base.Terminals.TTYTerminal})
+precompile(Tuple{typeof(Base.Terminals.enable_bracketed_paste), Base.Terminals.TTYTerminal})
+precompile(Tuple{typeof(Base.Terminals.width), Base.Terminals.TTYTerminal})
+precompile(Tuple{typeof(Base.Terminals.height), Base.Terminals.TTYTerminal})
+precompile(Tuple{typeof(Base.write), Base.Terminals.TTYTerminal, Array{UInt8, 1}})
+precompile(Tuple{typeof(Base.isempty), Base.AnnotatedString{String}}
+
+# loading.jl - without these each precompile worker would precompile these because they're hit before pkgimages are loaded
+precompile(Base.__require, (Module, Symbol))
+precompile(Base.__require, (Base.PkgId,))
+precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int))
+precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int))
+precompile(Tuple{typeof(Base.Threads.atomic_add!), Base.Threads.Atomic{Int}, Int})
+precompile(Tuple{typeof(Base.Threads.atomic_sub!), Base.Threads.Atomic{Int}, Int})
+precompile(Tuple{Type{Pair{A, B} where B where A}, Base.PkgId, UInt128})
+precompile(Tuple{typeof(Base.in!), Tuple{Module, String, UInt64, UInt32, Float64}, Base.Set{Any}})
+precompile(Tuple{typeof(Base.Compiler.ir_to_codeinf!), Base.Compiler.OptimizationState{Base.Compiler.NativeInterpreter}})
+precompile(Tuple{typeof(Base.getindex), Type{Pair{Base.PkgId, UInt128}}, Pair{Base.PkgId, UInt128}, Pair{Base.PkgId, UInt128}, Pair{Base.PkgId, UInt128}, Vararg{Pair{Base.PkgId, UInt128}}})
+precompile(Tuple{typeof(Base.Compiler.ir_to_codeinf!), Base.Compiler.OptimizationState{Base.Compiler.NativeInterpreter}, Core.SimpleVector})
+precompile(Tuple{typeof(Base.Compiler.ir_to_codeinf!), Base.Compiler.OptimizationState{Base.Compiler.NativeInterpreter}})
+
+# LazyArtifacts (but more generally helpful)
+precompile(Tuple{Type{Base.Val{x} where x}, Module})
+precompile(Tuple{Type{NamedTuple{(:honor_overrides,), T} where T<:Tuple}, Tuple{Bool}})
+precompile(Tuple{typeof(Base.unique!), Array{String, 1}})
+precompile(Tuple{typeof(Base.vcat), Array{String, 1}, Array{String, 1}})
+
+# Pkg loading
+precompile(Tuple{typeof(Base.Filesystem.normpath), String, String, Vararg{String}})
+precompile(Tuple{typeof(Base.append!), Array{String, 1}, Array{String, 1}})
+precompile(Tuple{typeof(Base.join), Array{String, 1}, Char})
+precompile(Tuple{typeof(Base.getindex), Base.Dict{Any, Any}, Char})
+precompile(Tuple{typeof(Base.delete!), Base.Set{Any}, Char})
+
+# REPL
+precompile(isequal, (String, String))
+precompile(Base.check_open, (Base.TTY,))
+precompile(Base.getproperty, (Base.TTY, Symbol))
+precompile(write, (Base.TTY, String))
+precompile(Tuple{typeof(Base.get), Base.TTY, Symbol, Bool})
+precompile(Tuple{typeof(Base.hashindex), String, Int})
+precompile(Tuple{typeof(Base.write), Base.GenericIOBuffer{Array{UInt8, 1}}, String})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Nothing, Int}, Int})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Nothing, Int}, Int, Int})
+precompile(Tuple{typeof(Base._typeddict), Base.Dict{String, Any}, Base.Dict{String, Any}, Vararg{Base.Dict{String, Any}}})
+precompile(Tuple{typeof(Base.promoteK), Type, Base.Dict{String, Any}, Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.promoteK), Type, Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.promoteV), Type, Base.Dict{String, Any}, Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.eval_user_input), Base.PipeEndpoint, Any, Bool})
+precompile(Tuple{typeof(Base.get), Base.PipeEndpoint, Symbol, Bool})
+
 # used by Revise.jl
 precompile(Tuple{typeof(Base.parse_cache_header), String})
 precompile(Base.read_dependency_src, (String, String))
@@ -43,12 +97,17 @@ precompile(Tuple{typeof(haskey), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
 precompile(Tuple{typeof(delete!), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
 precompile(Tuple{typeof(push!), Vector{Function}, Function})
 
+# preferences
+precompile(Base.get_preferences, (Base.UUID,))
+precompile(Base.record_compiletime_preference, (Base.UUID, String))
+
 # miscellaneous
+precompile(Tuple{typeof(Base.exit)})
 precompile(Tuple{typeof(Base.require), Base.PkgId})
 precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}})
 precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}, Base.Dict{String, Any}, Vararg{Base.Dict{String, Any}}})
-precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, Nothing}, Int64})
-precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, String}, Int64})
+precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, Nothing}, Int})
+precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, String}, Int})
 precompile(Tuple{typeof(isassigned), Core.SimpleVector, Int})
 precompile(Tuple{typeof(getindex), Core.SimpleVector, Int})
 precompile(Tuple{typeof(Base.Experimental.register_error_hint), Any, Type})
@@ -59,6 +118,27 @@ precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel,
 precompile(Base.CoreLogging.env_override_minlevel, (Symbol, Module))
 precompile(Base.StackTraces.lookup, (Ptr{Nothing},))
 precompile(Tuple{typeof(Base.run_module_init), Module, Int})
+precompile(Tuple{Type{Base.VersionNumber}, Int32, Int32, Int32})
+
+# Presence tested in the tests
+precompile(Tuple{typeof(Base.print), Base.IOStream, String})
+
+# precompilepkgs
+precompile(Tuple{typeof(Base.get), Type{Array{String, 1}}, Base.Dict{String, Any}, String})
+precompile(Tuple{typeof(Base.get), Type{Base.Dict{String, Any}}, Base.Dict{String, Any}, String})
+precompile(Tuple{typeof(Base.haskey), Base.Dict{String, Any}, String})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Base.TTY, Bool}, Int, Int})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Base.TTY, Bool}, Int})
+precompile(Tuple{typeof(Base.open), Base.CmdRedirect, String, Base.TTY})
+precompile(Tuple{typeof(Base.Precompilation.precompilepkgs)})
+precompile(Tuple{typeof(Base.Precompilation.printpkgstyle), Base.TTY, Symbol, String})
+precompile(Tuple{typeof(Base.rawhandle), Base.TTY})
+precompile(Tuple{typeof(Base.setindex!), Base.Dict{String, Array{String, 1}}, Array{String, 1}, String})
+precompile(Tuple{typeof(Base.setindex!), GenericMemory{:not_atomic, Union{Base.Libc.RawFD, Base.SyncCloseFD, IO}, Core.AddrSpace{Core}(0x00)}, Base.TTY, Int})
+precompile(Tuple{typeof(Base.setup_stdio), Base.TTY, Bool})
+precompile(Tuple{typeof(Base.spawn_opts_inherit), Base.DevNull, Base.TTY, Base.TTY})
+precompile(Tuple{typeof(Core.kwcall), NamedTuple{(:context,), Tuple{Base.TTY}}, typeof(Base.sprint), Function})
+precompile(Tuple{Type{Base.UUID}, Base.UUID})
 """
 
 for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base.TTY, IOContext{Base.TTY})
@@ -66,75 +146,69 @@ for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base
     hardcoded_precompile_statements *= "precompile(Tuple{typeof(show), $IO, $T})\n"
 end
 
-repl_script = """
-2+2
-print("")
-printstyled("a", "b")
-display([1])
-display([1 2; 3 4])
-foo(x) = 1
-@time @eval foo(1)
-; pwd
-$CTRL_C
-$CTRL_R$CTRL_C
-? reinterpret
-using Ra\t$CTRL_C
-\\alpha\t$CTRL_C
-\e[200~paste here ;)\e[201~"$CTRL_C
-$UP_ARROW$DOWN_ARROW$CTRL_C
-123\b\b\b$CTRL_C
-\b\b$CTRL_C
-f(x) = x03
-f(1,2)
-[][1]
-cd("complet_path\t\t$CTRL_C
-"""
-
+# Precompiles for Revise and other packages
 precompile_script = """
-# NOTE: these were moved to the end of Base.jl. TODO: move back here.
-# # Used by Revise & its dependencies
-# while true  # force inference
-# delete!(push!(Set{Module}(), Base), Main)
-# m = first(methods(+))
-# delete!(push!(Set{Method}(), m), m)
-# empty!(Set())
-# push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
-# (setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
-# (setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
-# (setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
-# (setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
-# (setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
-# Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
-# Dict(Base => [:(1+1)])[Base]
-# Dict(:one => [1])[:one]
-# Dict("abc" => Set())["abc"]
-# pushfirst!([], sum)
-# get(Base.pkgorigins, Base.PkgId(Base), nothing)
-# sort!([1,2,3])
-# unique!([1,2,3])
-# cumsum([1,2,3])
-# append!(Int[], BitSet())
-# isempty(BitSet())
-# delete!(BitSet([1,2]), 3)
-# deleteat!(Int32[1,2,3], [1,3])
-# deleteat!(Any[1,2,3], [1,3])
-# Core.svec(1, 2) == Core.svec(3, 4)
-# # copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(which(+, (Int, Int)), [Int, Int], Core.svec())))
-# any(t->t[1].line > 1, [(LineNumberNode(2,:none),:(1+1))])
-# break   # end force inference
-# end
+for match = Base._methods(+, (Int, Int), -1, Base.get_world_counter())
+    m = match.method
+    delete!(push!(Set{Method}(), m), m)
+    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match), typemax(UInt)))
+    break   # only actually need to do this once
+end
+empty!(Set())
+push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
+get!(ENV, "___DUMMY", "")
+ENV["___DUMMY"]
+delete!(ENV, "___DUMMY")
+(setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
+(setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
+(setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
+(setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
+(setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
+Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
+Dict(Base => [:(1+1)])[Base]
+Dict(:one => [1])[:one]
+Dict("abc" => Set())["abc"]
+pushfirst!([], sum)
+get(Base.pkgorigins, Base.PkgId(Base), nothing)
+sort!([1,2,3])
+unique!([1,2,3])
+cumsum([1,2,3])
+append!(Int[], BitSet())
+isempty(BitSet())
+delete!(BitSet([1,2]), 3)
+deleteat!(Int32[1,2,3], [1,3])
+deleteat!(Any[1,2,3], [1,3])
+Core.svec(1, 2) == Core.svec(3, 4)
+any(t->t[1].line > 1, [(LineNumberNode(2,:none), :(1+1))])
+
+# Code loading uses this
+sortperm(mtime.(readdir(".")), rev=true)
+# JLLWrappers uses these
+Dict{Base.UUID,Set{String}}()[Base.UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210")] = Set{String}()
+get!(Set{String}, Dict{Base.UUID,Set{String}}(), Base.UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210"))
+eachindex(IndexLinear(), Expr[])
+push!(Expr[], Expr(:return, false))
+vcat(String[], String[])
+k, v = (:hello => nothing)
+Base.print_time_imports_report(Base)
+Base.print_time_imports_report_init(Base)
+
+# Preferences uses these
+get(Dict{String,Any}(), "missing", nothing)
+delete!(Dict{String,Any}(), "missing")
+for (k, v) in Dict{String,Any}()
+    println(k)
+end
+
+# interactive startup uses this
+write(IOBuffer(), "")
+
+# precompile @time report generation and printing
+@time @eval Base.Experimental.@force_compile
 """
 
 julia_exepath() = joinpath(Sys.BINDIR, Base.julia_exename())
 
-have_repl =  haskey(Base.loaded_modules,
-                    Base.PkgId(Base.UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL"))
-if have_repl
-    hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(getproperty), REPL.REPLBackend, Symbol})
-    """
-end
-
 Artifacts = get(Base.loaded_modules,
           Base.PkgId(Base.UUID("56f22d72-fd6d-98f1-02f0-08ddc0907c33"), "Artifacts"),
           nothing)
@@ -143,23 +217,21 @@ if Artifacts !== nothing
     using Artifacts, Base.BinaryPlatforms, Libdl
     artifacts_toml = abspath(joinpath(Sys.STDLIB, "Artifacts", "test", "Artifacts.toml"))
     artifact_hash("HelloWorldC", artifacts_toml)
-    oldpwd = pwd(); cd(dirname(artifacts_toml))
-    macroexpand(Main, :(@artifact_str("HelloWorldC")))
-    cd(oldpwd)
     artifacts = Artifacts.load_artifacts_toml(artifacts_toml)
     platforms = [Artifacts.unpack_platform(e, "HelloWorldC", artifacts_toml) for e in artifacts["HelloWorldC"]]
     best_platform = select_platform(Dict(p => triplet(p) for p in platforms))
+    if best_platform !== nothing
+      # @artifact errors for unsupported platforms
+      oldpwd = pwd(); cd(dirname(artifacts_toml))
+      macroexpand(Main, :(@artifact_str("HelloWorldC")))
+      cd(oldpwd)
+    end
     dlopen("libjulia$(Base.isdebugbuild() ? "-debug" : "")", RTLD_LAZY | RTLD_DEEPBIND)
     """
-end
-
-Pkg = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg"),
-          nothing)
-
-if Pkg !== nothing
-    # TODO: Split Pkg precompile script into REPL and script part
-    repl_script = Pkg.precompile_script * repl_script # do larger workloads first for better parallelization
+    hardcoded_precompile_statements *= """
+    precompile(Tuple{typeof(Artifacts._artifact_str), Module, String, Base.SubString{String}, String, Base.Dict{String, Any}, Base.SHA1, Base.BinaryPlatforms.Platform, Base.Val{Artifacts}})
+    precompile(Tuple{typeof(Base.tryparse), Type{Base.BinaryPlatforms.Platform}, String})
+    """
 end
 
 FileWatching = get(Base.loaded_modules,
@@ -182,28 +254,12 @@ if Libdl !== nothing
     """
 end
 
-InteractiveUtils = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("b77e0a4c-d291-57a0-90e8-8db25a27a240"), "InteractiveUtils"),
-          nothing)
-if InteractiveUtils !== nothing
-    repl_script *= """
-    @time_imports using Random
-    """
-end
-
-const JULIA_PROMPT = "julia> "
-const PKG_PROMPT = "pkg> "
-const SHELL_PROMPT = "shell> "
-const HELP_PROMPT = "help?> "
-
 # Printing the current state
 let
     global print_state
     print_lk = ReentrantLock()
     status = Dict{String, String}(
         "step1" => "W",
-        "step2" => "W",
-        "repl" => "0/0",
         "step3" => "W",
         "clock" => "◐",
     )
@@ -224,8 +280,6 @@ let
             isempty(args) || push!(status, args...)
             print("\r└ Collect (Basic: ")
             print_status("step1")
-            print(", REPL ", status["repl"], ": ")
-            print_status("step2")
             print(") => Execute ")
             print_status("step3")
         end
@@ -237,10 +291,11 @@ ansi_disablecursor = "\e[?25l"
 blackhole = Sys.isunix() ? "/dev/null" : "nul"
 procenv = Dict{String,Any}(
         "JULIA_HISTORY" => blackhole,
-        "JULIA_PROJECT" => nothing, # remove from environment
-        "JULIA_LOAD_PATH" => "@stdlib",
+        "JULIA_LOAD_PATH" => "@$(Sys.iswindows() ? ";" : ":")@stdlib",
         "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
-        "TERM" => "")
+        "TERM" => "",
+        # "JULIA_DEBUG" => "precompilation",
+        "JULIA_FALLBACK_REPL" => "true")
 
 generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printed
     start_time = time_ns()
@@ -248,7 +303,6 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
 
     # Extract the precompile statements from the precompile file
     statements_step1 = Channel{String}(Inf)
-    statements_step2 = Channel{String}(Inf)
 
     # From hardcoded statements
     for statement in split(hardcoded_precompile_statements::String, '\n')
@@ -263,7 +317,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         anim_chars = ["◐","◓","◑","◒"]
         current = 1
         if fancyprint
-            while isopen(statements_step2) || !isempty(statements_step2)
+            while isopen(statements_step1) || !isempty(statements_step1)
                 print_state("clock" => anim_chars[current])
                 wait(t)
                 current = current == 4 ? 1 : current + 1
@@ -277,24 +331,32 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         print_state("step1" => "R")
         # Also precompile a package here
         pkgname = "__PackagePrecompilationStatementModule"
-        mkpath(joinpath(prec_path, pkgname, "src"))
-        path = joinpath(prec_path, pkgname, "src", "$pkgname.jl")
-        write(path,
-              """
-              module $pkgname
-              end
-              """)
-        tmp_prec = tempname(prec_path)
-        tmp_proc = tempname(prec_path)
+        pkguuid = "824efdaf-a0e9-431c-8ee7-3d356b2531c2"
+        pkgpath = joinpath(prec_path, pkgname)
+        mkpath(joinpath(pkgpath, "src"))
+        write(joinpath(pkgpath, "src", "$pkgname.jl"),
+            """
+            module $pkgname
+            println("Precompiling $pkgname")
+            end
+            """)
+        write(joinpath(pkgpath, "Project.toml"),
+            """
+            name = "$pkgname"
+            uuid = "$pkguuid"
+            """)
+        touch(joinpath(pkgpath, "Manifest.toml"))
+        tmp_prec = tempname(prec_path; cleanup=false)
+        tmp_proc = tempname(prec_path; cleanup=false)
         s = """
-            pushfirst!(DEPOT_PATH, $(repr(prec_path)));
+            pushfirst!(DEPOT_PATH, $(repr(joinpath(prec_path,"depot"))));
             Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp_prec));
-            Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path)))
+            Base.Precompilation.precompilepkgs(;fancyprint=true);
             $precompile_script
             """
         p = run(pipeline(addenv(`$(julia_exepath()) -O0 --trace-compile=$tmp_proc --sysimage $sysimg
-                --cpu-target=native --startup-file=no --color=yes`, procenv),
-                 stdin=IOBuffer(s), stdout=debug_output))
+                --cpu-target=native --startup-file=no --color=yes --project=$(pkgpath)`, procenv),
+                 stdin=IOBuffer(s), stderr=debug_output, stdout=debug_output))
         n_step1 = 0
         for f in (tmp_prec, tmp_proc)
             isfile(f) || continue
@@ -307,119 +369,22 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         print_state("step1" => "F$n_step1")
         return :ok
     end
-    !PARALLEL_PRECOMPILATION && wait(step1)
-
-    step2 = @async mktemp() do precompile_file, precompile_file_h
-        print_state("step2" => "R")
-        # Collect statements from running a REPL process and replaying our REPL script
-        touch(precompile_file)
-        pts, ptm = open_fake_pty()
-        if have_repl
-            cmdargs = `-e 'import REPL; REPL.Terminals.is_precompiling[] = true'`
-        else
-            cmdargs = `-e nothing`
-        end
-        p = run(addenv(addenv(```$(julia_exepath()) -O0 --trace-compile=$precompile_file --sysimage $sysimg
-                --cpu-target=native --startup-file=no --color=yes -i $cmdargs```, procenv),
-                "JULIA_PKG_PRECOMPILE_AUTO" => "0"),
-            pts, pts, pts; wait=false)
-        Base.close_stdio(pts)
-        # Prepare a background process to copy output from process until `pts` is closed
-        output_copy = Base.BufferStream()
-        tee = @async try
-            while !eof(ptm)
-                l = readavailable(ptm)
-                write(debug_output, l)
-                Sys.iswindows() && (sleep(0.1); yield(); yield()) # workaround hang - probably a libuv issue?
-                write(output_copy, l)
-            end
-        catch ex
-            if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
-                rethrow() # ignore EIO on ptm after pts dies
-            end
-        finally
-            close(output_copy)
-            close(ptm)
-        end
-        repl_inputter = @async begin
-            # wait for the definitive prompt before start writing to the TTY
-            readuntil(output_copy, JULIA_PROMPT)
-            sleep(0.1)
-            readavailable(output_copy)
-            # Input our script
-            if have_repl
-                precompile_lines = split(repl_script::String, '\n'; keepempty=false)
-                curr = 0
-                for l in precompile_lines
-                    sleep(0.1)
-                    curr += 1
-                    print_state("repl" => "$curr/$(length(precompile_lines))")
-                    # consume any other output
-                    bytesavailable(output_copy) > 0 && readavailable(output_copy)
-                    # push our input
-                    write(debug_output, "\n#### inputting statement: ####\n$(repr(l))\n####\n")
-                    write(ptm, l, "\n")
-                    readuntil(output_copy, "\n")
-                    # wait for the next prompt-like to appear
-                    readuntil(output_copy, "\n")
-                    strbuf = ""
-                    while !eof(output_copy)
-                        strbuf *= String(readavailable(output_copy))
-                        occursin(JULIA_PROMPT, strbuf) && break
-                        occursin(PKG_PROMPT, strbuf) && break
-                        occursin(SHELL_PROMPT, strbuf) && break
-                        occursin(HELP_PROMPT, strbuf) && break
-                        sleep(0.1)
-                    end
-                end
-            end
-            write(ptm, "exit()\n")
-            wait(tee)
-            success(p) || Base.pipeline_error(p)
-            close(ptm)
-            write(debug_output, "\n#### FINISHED ####\n")
-        end
-
-        n_step2 = 0
-        precompile_copy = Base.BufferStream()
-        buffer_reader = @async for statement in eachline(precompile_copy)
-            print_state("step2" => "R$n_step2")
-            push!(statements_step2, statement)
-            n_step2 += 1
-        end
-
-        open(precompile_file, "r") do io
-            while true
-                # We need to allways call eof(io) for bytesavailable(io) to work
-                eof(io) && istaskdone(repl_inputter) && eof(io) && break
-                if bytesavailable(io) == 0
-                    sleep(0.1)
-                    continue
-                end
-                write(precompile_copy, readavailable(io))
-            end
-        end
-        close(precompile_copy)
-        wait(buffer_reader)
-        close(statements_step2)
-        print_state("step2" => "F$n_step2")
-        return :ok
-    end
-    !PARALLEL_PRECOMPILATION && wait(step2)
+    PARALLEL_PRECOMPILATION ? bind(statements_step1, step1) : wait(step1)
 
     # Create a staging area where all the loaded packages are available
     PrecompileStagingArea = Module()
     for (_pkgid, _mod) in Base.loaded_modules
         if !(_pkgid.name in ("Main", "Core", "Base"))
-            eval(PrecompileStagingArea, :(const $(Symbol(_mod)) = $_mod))
+            Core.eval(PrecompileStagingArea, :(const $(Symbol(_mod)) = $_mod))
         end
     end
+    Core.eval(PrecompileStagingArea, :(const Compiler = Base.Compiler))
 
     n_succeeded = 0
     # Make statements unique
     statements = Set{String}()
     # Execute the precompile statements
-    for sts in [statements_step1, statements_step2], statement in sts
+    for statement in statements_step1
         # Main should be completely clean
         occursin("Main.", statement) && continue
         Base.in!(statement, statements) && continue
@@ -439,6 +404,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
             if precompile(ps...)
                 n_succeeded += 1
             else
+                Base.get_bool_env("CI", false) && error("Precompilation failed for $statement")
                 @warn "Failed to precompile expression" form=statement _module=nothing _file=nothing _line=0
             end
             failed = length(statements) - n_succeeded
@@ -446,7 +412,8 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
             print_state("step3" => string("R$n_succeeded", failed > 0 ? " ($failed failed)" : ""))
         catch ex
             # See #28808
-            @warn "Failed to precompile expression" form=statement exception=ex _module=nothing _file=nothing _line=0
+            Base.get_bool_env("CI", false) && error("Precompilation failed for $statement")
+            @warn "Failed to precompile expression" form=statement exception=(ex,catch_backtrace()) _module=nothing _file=nothing _line=0
         end
     end
     wait(clock) # Stop asynchronous printing
@@ -455,10 +422,10 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
     println()
     # Seems like a reasonable number right now, adjust as needed
     # comment out if debugging script
-    n_succeeded > (have_repl ? 900 : 90) || @warn "Only $n_succeeded precompile statements"
+    have_repl = false
+    n_succeeded > (have_repl ? 650 : 90) || @warn "Only $n_succeeded precompile statements"
 
     fetch(step1) == :ok || throw("Step 1 of collecting precompiles failed.")
-    fetch(step2) == :ok || throw("Step 2 of collecting precompiles failed.")
 
     tot_time = time_ns() - start_time
     println("Precompilation complete. Summary:")
@@ -466,15 +433,10 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
 finally
     fancyprint && print(ansi_enablecursor)
     GC.gc(true); GC.gc(false); # reduce memory footprint
-    return
 end
 
 generate_precompile_statements()
 
-# As a last step in system image generation,
-# remove some references to build time environment for a more reproducible build.
-Base.Filesystem.temp_cleanup_purge(force=true)
-
 let stdout = Ref{IO}(stdout)
     Base.PROGRAM_FILE = ""
     Sys.BINDIR = ""
diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl
index df17b967c1ed7..6794834834d25 100755
--- a/contrib/julia-config.jl
+++ b/contrib/julia-config.jl
@@ -45,8 +45,8 @@ function includeDir()
     return abspath(Sys.BINDIR, Base.INCLUDEDIR, "julia")
 end
 
-function ldflags(doframework)
-    doframework && return "-F$(shell_escape(frameworkDir()))"
+function ldflags(; framework::Bool=false)
+    framework && return "-F$(shell_escape(frameworkDir()))"
     fl = "-L$(shell_escape(libDir()))"
     if Sys.iswindows()
         fl = fl * " -Wl,--stack,8388608"
@@ -56,29 +56,40 @@ function ldflags(doframework)
     return fl
 end
 
-function ldlibs(doframework)
+function ldrpath()
+    libname = if Base.isdebugbuild()
+        "julia-debug"
+    else
+        "julia"
+    end
+    return "-Wl,-rpath,$(shell_escape(private_libDir())) -Wl,-rpath,$(shell_escape(libDir())) -l$libname"
+end
+
+function ldlibs(; framework::Bool=false, rpath::Bool=true)
     # Return "Julia" for the framework even if this is a debug build.
     # If the user wants the debug framework, DYLD_IMAGE_SUFFIX=_debug
     # should be used (refer to man 1 dyld).
-    doframework && return "-framework $(Base.DARWIN_FRAMEWORK_NAME)"
+    framework && return "-framework $(Base.DARWIN_FRAMEWORK_NAME)"
     libname = if Base.isdebugbuild()
         "julia-debug"
     else
         "julia"
     end
     if Sys.isunix()
-        return "-Wl,-rpath,$(shell_escape(libDir())) " *
-            (Sys.isapple() ? string() : "-Wl,-rpath,$(shell_escape(private_libDir())) ") *
-            "-l$libname"
+        if rpath
+            return "-L$(shell_escape(private_libDir())) $(ldrpath())"
+        else
+            return "-L$(shell_escape(private_libDir()))"
+        end
     else
         return "-l$libname -lopenlibm"
     end
 end
 
-function cflags(doframework)
+function cflags(; framework::Bool=false)
     flags = IOBuffer()
     print(flags, "-std=gnu11")
-    if doframework
+    if framework
         include = shell_escape(frameworkDir())
         print(flags, " -F", include)
     else
@@ -91,8 +102,8 @@ function cflags(doframework)
     return String(take!(flags))
 end
 
-function allflags(doframework)
-    return "$(cflags(doframework)) $(ldflags(doframework)) $(ldlibs(doframework))"
+function allflags(; framework::Bool=false, rpath::Bool=true)
+    return "$(cflags(; framework)) $(ldflags(; framework)) $(ldlibs(; framework, rpath))"
 end
 
 function check_args(args)
@@ -104,31 +115,29 @@ function check_args(args)
 end
 
 function check_framework_flag(args)
-    doframework = "--framework" in args
-    if doframework && !Base.DARWIN_FRAMEWORK
+    framework = "--framework" in args
+    if framework && !Base.DARWIN_FRAMEWORK
         println(stderr, "NOTICE: Ignoring --framework because Julia is not packaged as a framework.")
         return false
-    elseif !doframework && Base.DARWIN_FRAMEWORK
+    elseif !framework && Base.DARWIN_FRAMEWORK
         println(stderr, "NOTICE: Consider using --framework because Julia is packaged as a framework.")
         return false
     end
-    return doframework
+    return framework
 end
 
-function main()
-    check_args(ARGS)
-    doframework = check_framework_flag(ARGS)
-    for args in ARGS
+function (@main)(args)
+    check_args(args)
+    framework = check_framework_flag(args)
+    for args in args
         if args == "--ldflags"
-            println(ldflags(doframework))
+            println(ldflags(; framework))
         elseif args == "--cflags"
-            println(cflags(doframework))
+            println(cflags(; framework))
         elseif args == "--ldlibs"
-            println(ldlibs(doframework))
+            println(ldlibs(; framework))
         elseif args == "--allflags"
-            println(allflags(doframework))
+            println(allflags(; framework))
         end
     end
 end
-
-main()
diff --git a/contrib/juliac/Artifacts.toml b/contrib/juliac/Artifacts.toml
new file mode 100644
index 0000000000000..54771b41b21f7
--- /dev/null
+++ b/contrib/juliac/Artifacts.toml
@@ -0,0 +1,19 @@
+[[mingw-w64]]
+arch = "x86_64"
+git-tree-sha1 = "b17bda08a19173572926f43a48aad5ef3d845e7c"
+os = "windows"
+lazy = true
+
+    [[mingw-w64.download]]
+    sha256 = "53645e06775a55733580426341395c67dda20a664af83bcda76a1d052b618b59"
+    url = "https://github.com/JuliaLang/PackageCompiler.jl/releases/download/v2.1.24/x86_64-14.2.0-release-posix-seh-msvcrt-rt_v12-rev0.tar.gz"
+
+[[mingw-w64]]
+arch = "i686"
+git-tree-sha1 = "76b9f278e7de1d7dfdfe3a786afbe9c1e29003ea"
+os = "windows"
+lazy = true
+
+    [[mingw-w64.download]]
+    sha256 = "d049bd771e01b02f2ca9274435f0e6f9f4f295bf2af72a8059dd851c52144910"
+    url = "https://github.com/JuliaLang/PackageCompiler.jl/releases/download/v2.1.24/i686-14.2.0-release-posix-dwarf-msvcrt-rt_v12-rev0.tar.gz"
diff --git a/contrib/juliac/abi_export.jl b/contrib/juliac/abi_export.jl
new file mode 100644
index 0000000000000..c62e36f14a5b3
--- /dev/null
+++ b/contrib/juliac/abi_export.jl
@@ -0,0 +1,221 @@
+const C_friendly_types = Base.IdSet{Any}([    # a few of these are redundant to make it easier to maintain
+    Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64, Bool,
+    Cvoid, Cint, Cshort, Clong, Cuint, Cushort, Culong, Cssize_t, Csize_t,
+    Cchar, Cwchar_t, Cstring, Cwstring,
+    RawFD,
+])
+
+function recursively_add_types!(types::Base.IdSet{DataType}, @nospecialize(T::DataType))
+    T in types && return types
+    while T.name === Ptr.body.name
+        push!(types, T)
+        T = T.parameters[1] # unwrap Ptr{...}
+        T in types && return types
+    end
+    if T.name.module === Core && T ∉ C_friendly_types
+        error("invalid type for juliac: ", T) # exclude internals (they may change)
+    end
+    push!(types, T)
+    for list in (T.parameters, fieldtypes(T))
+        for S in list
+            recursively_add_types!(types, S)
+        end
+    end
+    return types
+end
+
+struct TypeEmitter
+    io::IO
+    type_ids::IdDict{Any,Int}
+end
+
+function escape_string_json(s::AbstractString)
+    iob = IOBuffer()
+    print(iob, '"')
+    for c in s
+        if c == '"'
+            print(iob, "\\\"")
+        elseif c == '\\'
+            print(iob, "\\\\")
+        elseif c == '\b'
+            print(iob, "\\b")
+        elseif c == '\f'
+            print(iob, "\\f")
+        elseif c == '\n'
+            print(iob, "\\n")
+        elseif c == '\r'
+            print(iob, "\\r")
+        elseif c == '\t'
+            print(iob, "\\t")
+        elseif '\x00' <= c <= '\x1f'
+            print(iob, "\\u", lpad(string(UInt16(c), base=16), 4, '0'))
+        else
+            @assert isvalid(c) "invalid unicode character"
+            print(iob, c)
+        end
+    end
+    print(iob, '"')
+    return String(take!(iob))
+end
+
+function type_name_json(@nospecialize(dt::DataType))
+    return escape_string_json(repr(dt; context=:compact=>true))
+end
+
+function field_name_json(@nospecialize(dt::DataType), field::Int)
+    name = String(fieldname(dt, field))
+    return escape_string_json(name)
+end
+
+function emit_pointer_info!(ctx::TypeEmitter, @nospecialize(dt::DataType); indent::Int = 0)
+    pointee_type_id = ctx.type_ids[dt.parameters[1]]
+    let indented_println(args...) = println(ctx.io, " " ^ indent, args...)
+        indented_println("{")
+        indented_println("  \"id\": ", ctx.type_ids[dt], ",")
+        indented_println("  \"kind\": \"pointer\",")
+        indented_println("  \"name\": ", type_name_json(dt), ",")
+        indented_println("  \"pointee_type_id\": ", pointee_type_id)
+        print(ctx.io, " " ^ indent, "}")
+    end
+end
+
+function emit_field_info!(ctx::TypeEmitter, @nospecialize(dt::DataType), field::Int; indent::Int = 0)
+    desc = Base.DataTypeFieldDesc(dt)[field]
+    type_id = ctx.type_ids[fieldtype(dt, field)]
+    print(ctx.io, " " ^ indent)
+    print(ctx.io, "{")
+    print(ctx.io, " \"name\": ", field_name_json(dt, field), ",")
+    print(ctx.io, " \"type_id\": ", type_id, ",")
+    print(ctx.io, " \"offset\": ", desc.offset, ",")
+    print(ctx.io, " \"isptr\": ", desc.isptr, ",")
+    print(ctx.io, " \"isfieldatomic\": ", Base.isfieldatomic(dt, field))
+    print(ctx.io, " }")
+end
+
+function emit_struct_info!(ctx::TypeEmitter, @nospecialize(dt::DataType); indent::Int = 0)
+    type_id = ctx.type_ids[dt]
+    let indented_println(args...) = println(ctx.io, " " ^ indent, args...)
+        indented_println("{")
+        indented_println("  \"id\": ", type_id, ",")
+        indented_println(ismutabletype(dt) ? "  \"kind\": \"mutable struct\"," : "  \"kind\": \"struct\",")
+        indented_println("  \"name\": ", type_name_json(dt), ",")
+        indented_println("  \"size\": ", Core.sizeof(dt), ",")
+        indented_println("  \"alignment\": ", Base.datatype_alignment(dt), ",")
+        indented_println("  \"fields\": [")
+        for i = 1:Base.datatype_nfields(dt)
+            emit_field_info!(ctx, dt, i; indent = indent + 4)
+            println(ctx.io, i == Base.datatype_nfields(dt) ? "" : ",")
+        end
+        indented_println("  ]")
+        print(ctx.io, " " ^ indent, "}")
+    end
+end
+
+function emit_primitive_type!(ctx::TypeEmitter, @nospecialize(dt::DataType); indent::Int = 0)
+    type_id = ctx.type_ids[dt]
+    let indented_println(args...) = println(ctx.io, " " ^ indent, args...)
+        indented_println("{")
+        indented_println("  \"id\": ", type_id, ",")
+        indented_println("  \"kind\": \"primitive\",")
+        indented_println("  \"name\": ", type_name_json(dt), ",")
+        indented_println("  \"signed\": ", (dt <: Signed), ",")
+        indented_println("  \"bits\": ", 8 * Base.packedsize(dt), ",") # size for reinterpret / in-register
+        indented_println("  \"size\": ", Base.aligned_sizeof(dt), ",") # size with padding / in-memory
+        indented_println("  \"alignment\": ", Base.datatype_alignment(dt))
+        print(ctx.io, " " ^ indent, "}")
+    end
+end
+
+function emit_type_info!(ctx::TypeEmitter, @nospecialize(dt::DataType); indent::Int = 0)
+    if dt.name === Ptr.body.name
+        emit_pointer_info!(ctx, dt; indent)
+    elseif Base.isprimitivetype(dt)
+        emit_primitive_type!(ctx, dt; indent)
+    else
+        emit_struct_info!(ctx, dt; indent)
+    end
+end
+
+function emit_method_info!(ctx::TypeEmitter, method::Core.Method; indent::Int = 0)
+    (rt, sig) = method.ccallable
+    (name, symbol) = let
+        symbol = length(method.ccallable) > 2 ? Symbol(method.ccallable[3]) : method.name
+        iob = IOBuffer()
+        print(IOContext(iob, :print_method_signature_only => true), method)
+        str = String(take!(iob))
+        if symbol !== method.name && startswith(str, String(method.name))
+            # Make a best-effort attempt to use the exported name
+            #
+            # Note: the `startswith` check is to make sure we support 'functor's in arg0,
+            # which Base.@ccallable supports as long as they are singletons.
+            str = replace(str, String(method.name) => String(symbol); count = 1)
+        end
+        (str, String(symbol))
+    end
+
+    argnames = String.(Base.method_argnames(method))
+    let indented_println(args...) = println(ctx.io, " " ^ indent, args...)
+        indented_println("{")
+        indented_println("  \"symbol\": ", escape_string_json(symbol), ",")
+        indented_println("  \"name\": ", escape_string_json(name), ",")
+        indented_println("  \"arguments\": [")
+        for i in 2:length(sig.parameters)
+            print(ctx.io, " " ^ (indent + 4))
+            print(ctx.io, "{")
+            print(ctx.io, " \"name\": ", escape_string_json(argnames[i]), ",")
+            print(ctx.io, " \"type_id\": ", ctx.type_ids[sig.parameters[i]])
+            println(ctx.io, i == length(sig.parameters) ? " }" : " },")
+        end
+        indented_println("  ],")
+        indented_println("  \"returns\": { \"type_id\": ", ctx.type_ids[rt], " }")
+        print(ctx.io, " " ^ indent, "}")
+    end
+end
+
+function emit_abi_info!(ctx::TypeEmitter, exported::Vector{Core.Method}, types::IdSet{DataType})
+    println(ctx.io, "{")
+
+    # assign an ID to each type, so that we can refer to them
+    for (i, T) in enumerate(types)
+        ctx.type_ids[T] = i
+    end
+
+    # print exported functions
+    println(ctx.io, "  \"functions\": [")
+    for (i, method) in enumerate(exported)
+        emit_method_info!(ctx, method; indent = 4)
+        println(ctx.io, i == length(exported) ? "" : ",")
+    end
+    println(ctx.io, "  ],")
+
+    # print type / structure information
+    println(ctx.io, "  \"types\": [")
+    for (i, T) in enumerate(types)
+        emit_type_info!(ctx, T; indent = 4)
+        println(ctx.io, i == length(types) ? "" : ",")
+    end
+    println(ctx.io, "  ]")
+
+    println(ctx.io, "}")
+end
+
+function write_abi_metadata(io::IO)
+    types = Base.IdSet{DataType}()
+
+    # discover all exported methods + any types they reference
+    exported = Core.Method[]
+    Base.visit(Core.methodtable) do method
+        if isdefined(method, :ccallable)
+            push!(exported, method)
+            (rt, sig) = method.ccallable
+            for T in sig.parameters[2:end]
+                recursively_add_types!(types, T)
+            end
+            recursively_add_types!(types, rt)
+        end
+    end
+
+    # print the discovered ABI info
+    ctx = TypeEmitter(io, IdDict{Any,Int}())
+    emit_abi_info!(ctx, exported, types)
+end
diff --git a/contrib/juliac/juliac-buildscript.jl b/contrib/juliac/juliac-buildscript.jl
new file mode 100644
index 0000000000000..7fd46e58870f8
--- /dev/null
+++ b/contrib/juliac/juliac-buildscript.jl
@@ -0,0 +1,115 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Script to run in the process that generates juliac's object file output
+
+# Initialize some things not usually initialized when output is requested
+Sys.__init__()
+Base.reinit_stdio()
+Base.init_depot_path()
+Base.init_load_path()
+Base.init_active_project()
+task = current_task()
+task.rngState0 = 0x5156087469e170ab
+task.rngState1 = 0x7431eaead385992c
+task.rngState2 = 0x503e1d32781c2608
+task.rngState3 = 0x3a77f7189200c20b
+task.rngState4 = 0x5502376d099035ae
+uuid_tuple = (UInt64(0), UInt64(0))
+ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Base.__toplevel__, uuid_tuple)
+if Base.get_bool_env("JULIA_USE_FLISP_PARSER", false) === false
+    Base.JuliaSyntax.enable_in_core!()
+end
+
+include(joinpath(@__DIR__, "abi_export.jl"))
+
+# Load user code
+
+import Base.Experimental.entrypoint
+
+# for use as C main if needed
+function _main(argc::Cint, argv::Ptr{Ptr{Cchar}})::Cint
+    args = ccall(:jl_set_ARGS, Any, (Cint, Ptr{Ptr{Cchar}}), argc, argv)::Vector{String}
+    setglobal!(Base, :PROGRAM_FILE, args[1])
+    popfirst!(args)
+    append!(Base.ARGS, args)
+    return Main.main(args)
+end
+
+let include_result = Base.include(Main, ARGS[1])
+    Core.@latestworld
+    if ARGS[2] == "--output-exe"
+        have_cmain = false
+        if isdefined(Main, :main)
+            for m in methods(Main.main)
+                if isdefined(m, :ccallable)
+                    # TODO: possibly check signature and return type
+                    have_cmain = true
+                    break
+                end
+            end
+        elseif include_result isa Module && isdefined(include_result, :main)
+            error("""
+                  The `main` function must be defined in `Main`. If you are defining it inside a
+                  module, try adding `import .$(nameof(include_result)).main` to $(ARGS[1]).
+                  """)
+        end
+        if !have_cmain
+            if Base.should_use_main_entrypoint()
+                if hasmethod(Main.main, Tuple{Vector{String}})
+                    entrypoint(_main, (Cint, Ptr{Ptr{Cchar}}))
+                    Base._ccallable("main", Cint, Tuple{typeof(_main), Cint, Ptr{Ptr{Cchar}}})
+                else
+                    error("`@main` must accept a `Vector{String}` argument.")
+                end
+            else
+                error("To generate an executable a `@main` function must be defined in the `Main` module.")
+            end
+        end
+    end
+end
+
+# Run the verifier in the current world (before build-script modifications),
+# so that error messages and types print in their usual way.
+Core.Compiler._verify_trim_world_age[] = Base.get_world_counter()
+
+# Apply hacks
+
+if Base.JLOptions().trim != 0
+    include(joinpath(@__DIR__, "juliac-trim-base.jl"))
+    include(joinpath(@__DIR__, "juliac-trim-stdlib.jl"))
+end
+
+#entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{Base.SubString{String}, 1}, String))
+#entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{String, 1}, Char))
+entrypoint(Base.task_done_hook, (Task,))
+entrypoint(Base.wait, ())
+entrypoint(Base.wait_forever, ())
+entrypoint(Base.trypoptask, (Base.StickyWorkqueue,))
+entrypoint(Base.checktaskempty, ())
+
+if ARGS[3] == "true"
+    Base.Compiler.add_ccallable_entrypoints!()
+end
+
+# Export info about entrypoints and structs needed to create header files
+if length(ARGS) >= 4
+    abi_export = ARGS[4]
+    open(abi_export, "w") do io
+        write_abi_metadata(io)
+    end
+end
+
+empty!(Core.ARGS)
+empty!(Base.ARGS)
+empty!(LOAD_PATH)
+empty!(DEPOT_PATH)
+empty!(Base.TOML_CACHE.d)
+Base.TOML.reinit!(Base.TOML_CACHE.p, "")
+Base.ACTIVE_PROJECT[] = nothing
+@eval Base begin
+    PROGRAM_FILE = ""
+end
+@eval Sys begin
+    BINDIR = ""
+    STDLIB = ""
+end
diff --git a/contrib/juliac/juliac-trim-base.jl b/contrib/juliac/juliac-trim-base.jl
new file mode 100644
index 0000000000000..fce275551751e
--- /dev/null
+++ b/contrib/juliac/juliac-trim-base.jl
@@ -0,0 +1,133 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Patches to Base needed for trimming
+
+@eval Core begin
+    DomainError(@nospecialize(val), @nospecialize(msg::AbstractString)) = (@noinline; $(Expr(:new, :DomainError, :val, :msg)))
+end
+
+(f::Base.RedirectStdStream)(io::Core.CoreSTDOUT) = Base._redirect_io_global(io, f.unix_fd)
+
+@eval Base begin
+    depwarn(msg, funcsym; force::Bool=false) = nothing
+    _assert_tostring(msg) = ""
+    reinit_stdio() = nothing
+    wait_forever() = while true; wait(); end
+    JuliaSyntax.enable_in_core!() = nothing
+    init_active_project() = ACTIVE_PROJECT[] = nothing
+    set_active_project(projfile::Union{AbstractString,Nothing}) = ACTIVE_PROJECT[] = projfile
+    disable_library_threading() = nothing
+    start_profile_listener() = nothing
+    invokelatest_trimmed(f, args...; kwargs...) = f(args...; kwargs...)
+    const invokelatest = invokelatest_trimmed
+    function sprint(f::F, args::Vararg{Any,N}; context=nothing, sizehint::Integer=0) where {F<:Function,N}
+        s = IOBuffer(sizehint=sizehint)
+        if context isa Tuple
+            f(IOContext(s, context...), args...)
+        elseif context !== nothing
+            f(IOContext(s, context), args...)
+        else
+            f(s, args...)
+        end
+        String(_unsafe_take!(s))
+    end
+    function show_typeish(io::IO, @nospecialize(T))
+        if T isa Type
+            show(io, T)
+        elseif T isa TypeVar
+            print(io, (T::TypeVar).name)
+        else
+            print(io, "?")
+        end
+    end
+    function show(io::IO, T::Type)
+        if T isa DataType
+            print(io, T.name.name)
+            if T !== T.name.wrapper && length(T.parameters) > 0
+                print(io, "{")
+                first = true
+                for p in T.parameters
+                    if !first
+                        print(io, ", ")
+                    end
+                    first = false
+                    if p isa Int
+                        show(io, p)
+                    elseif p isa Type
+                        show(io, p)
+                    elseif p isa Symbol
+                        print(io, ":")
+                        print(io, p)
+                    elseif p isa TypeVar
+                        print(io, p.name)
+                    else
+                        print(io, "?")
+                    end
+                end
+                print(io, "}")
+            end
+        elseif T isa Union
+            print(io, "Union{")
+            show_typeish(io, T.a)
+            print(io, ", ")
+            show_typeish(io, T.b)
+            print(io, "}")
+        elseif T isa UnionAll
+            print(io, T.body::Type)
+            print(io, " where ")
+            print(io, T.var.name)
+        end
+    end
+    # these functions are not `--trim`-compatible if it resolves to a Varargs{...} specialization
+    # and since it only has 1-argument methods this happens too often by default (just 2-3 args)
+    setfield!(typeof(throw_eachindex_mismatch_indices).name, :max_args, Int32(5), :monotonic)
+    setfield!(typeof(print).name, :max_args, Int32(10), :monotonic)
+    setfield!(typeof(println).name, :max_args, Int32(10), :monotonic)
+    setfield!(typeof(print_to_string).name, :max_args, Int32(10), :monotonic)
+end
+@eval Base.Sys begin
+    __init_build() = nothing # VersionNumber parsing is not supported yet
+end
+# Used for LinearAlgebre ldiv with SVD
+for s in [:searchsortedfirst, :searchsortedlast, :searchsorted]
+    @eval Base.Sort begin
+        # identical to existing Base def. but specializes on `lt` / `by`
+        $s(v::AbstractVector, x, o::Ordering) = $s(v,x,firstindex(v),lastindex(v),o)
+        $s(v::AbstractVector, x;
+            lt::T=isless, by::F=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) where {T,F} =
+            $s(v,x,ord(lt,by,rev,order))
+    end
+end
+@eval Base.GMP begin
+    function __init__() # VersionNumber parsing is not supported yet
+        try
+            ccall((:__gmp_set_memory_functions, libgmp), Cvoid,
+                (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}),
+                cglobal(:jl_gc_counted_malloc),
+                cglobal(:jl_gc_counted_realloc_with_old_size),
+                cglobal(:jl_gc_counted_free_with_size))
+            ZERO.alloc, ZERO.size, ZERO.d = 0, 0, C_NULL
+            ONE.alloc, ONE.size, ONE.d = 1, 1, pointer(_ONE)
+        catch ex
+            Base.showerror_nostdio(ex, "WARNING: Error during initialization of module GMP")
+        end
+        # This only works with a patched version of GMP, ignore otherwise
+        try
+            ccall((:__gmp_set_alloc_overflow_function, libgmp), Cvoid,
+                (Ptr{Cvoid},),
+                cglobal(:jl_throw_out_of_memory_error))
+            ALLOC_OVERFLOW_FUNCTION[] = true
+        catch ex
+            # ErrorException("ccall: could not find function...")
+            if typeof(ex) != ErrorException
+                rethrow()
+            end
+        end
+    end
+end
+
+@eval Base.CoreLogging begin
+    # Disable logging (TypedCallable is required to support the existing dynamic
+    # logger interface, but it's not implemented yet)
+    @inline current_logger_for_env(std_level::LogLevel, group, _module) = nothing
+end
diff --git a/contrib/juliac/juliac-trim-stdlib.jl b/contrib/juliac/juliac-trim-stdlib.jl
new file mode 100644
index 0000000000000..0cc3f01aa92f8
--- /dev/null
+++ b/contrib/juliac/juliac-trim-stdlib.jl
@@ -0,0 +1,83 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Patches to stdlib needed for trimming
+
+let
+    find_loaded_root_module(key::Base.PkgId) = Base.maybe_root_module(key)
+
+    SparseArrays = find_loaded_root_module(Base.PkgId(
+        Base.UUID("2f01184e-e22b-5df5-ae63-d93ebab69eaf"), "SparseArrays"))
+    if SparseArrays !== nothing
+        @eval SparseArrays.CHOLMOD begin
+            function __init__()
+                ccall((:SuiteSparse_config_malloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_malloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_calloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_calloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_realloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_realloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_free_func_set, :libsuitesparseconfig),
+                Cvoid, (Ptr{Cvoid},), cglobal(:jl_free, Ptr{Cvoid}))
+            end
+        end
+    end
+
+    Artifacts = find_loaded_root_module(Base.PkgId(
+        Base.UUID("56f22d72-fd6d-98f1-02f0-08ddc0907c33"), "Artifacts"))
+    if Artifacts !== nothing
+        @eval Artifacts begin
+            function _artifact_str(
+                __module__,
+                artifacts_toml,
+                name,
+                path_tail,
+                artifact_dict,
+                hash,
+                platform,
+                _::Val{LazyArtifacts}
+            ) where LazyArtifacts
+                # If the artifact exists, we're in the happy path and we can immediately
+                # return the path to the artifact:
+                dirs = artifacts_dirs(bytes2hex(hash.bytes))
+                for dir in dirs
+                    if isdir(dir)
+                        return jointail(dir, path_tail)
+                    end
+                end
+                error("Artifact not found")
+            end
+        end
+    end
+
+    Pkg = find_loaded_root_module(Base.PkgId(
+        Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg"))
+    if Pkg !== nothing
+        @eval Pkg begin
+            __init__() = nothing # Assume the Pkg is not actually used
+        end
+    end
+
+    StyledStrings = find_loaded_root_module(Base.PkgId(
+        Base.UUID("f489334b-da3d-4c2e-b8f0-e476e12c162b"), "StyledStrings"))
+    if StyledStrings !== nothing
+        @eval StyledStrings begin
+            __init__() = nothing # Assume that StyledStrings are not actually used
+        end
+    end
+
+    Markdown = find_loaded_root_module(Base.PkgId(
+        Base.UUID("d6f4376e-aef5-505a-96c1-9c027394607a"), "Markdown"))
+    if Markdown !== nothing
+        @eval Markdown begin
+            __init__() = nothing # Assume that Markdown is not actually used with StyledStrings
+        end
+    end
+
+    JuliaSyntaxHighlighting = find_loaded_root_module(Base.PkgId(
+        Base.UUID("ac6e5ff7-fb65-4e79-a425-ec3bc9c03011"), "JuliaSyntaxHighlighting"))
+    if JuliaSyntaxHighlighting !== nothing
+        @eval JuliaSyntaxHighlighting begin
+            __init__() = nothing # Assume the JuliaSyntaxHighlighting is not actually used with StyledStrings
+        end
+    end
+end
diff --git a/contrib/juliac/juliac.jl b/contrib/juliac/juliac.jl
new file mode 100644
index 0000000000000..23eb1d93ffa82
--- /dev/null
+++ b/contrib/juliac/juliac.jl
@@ -0,0 +1,221 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Julia compiler wrapper script
+# NOTE: The interface and location of this script are considered unstable/experimental
+
+using LazyArtifacts
+
+module JuliaConfig
+    include(joinpath(@__DIR__, "..", "julia-config.jl"))
+end
+
+julia_cmd = `$(Base.julia_cmd()) --startup-file=no --history-file=no`
+cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing)
+julia_cmd_target =  `$(Base.julia_cmd(;cpu_target)) --startup-file=no --history-file=no`
+output_type = nothing  # exe, sharedlib, sysimage
+abi_export_file = nothing
+outname = nothing
+file = nothing
+add_ccallables = false
+relative_rpath = false
+verbose = false
+
+help = findfirst(x->x == "--help", ARGS)
+if help !== nothing
+    println(
+        """
+        Usage: julia juliac.jl [--output-exe | --output-lib | --output-sysimage] <name> [options] <file.jl>
+        --experimental --trim=<no,safe,unsafe,unsafe-warn>  Only output code statically determined to be reachable
+        --export-abi <file>  Emit type / function information for the ABI (in JSON format)
+        --compile-ccallable  Include all methods marked `@ccallable` in output
+        --relative-rpath     Configure the library / executable to lookup all required libraries in an adjacent "julia/" folder
+        --verbose            Request verbose output
+        """)
+    exit(0)
+end
+
+# Copied from PackageCompiler
+# https://github.com/JuliaLang/PackageCompiler.jl/blob/1c35331d8ef81494f054bbc71214811253101993/src/PackageCompiler.jl#L147-L190
+function get_compiler_cmd(; cplusplus::Bool=false)
+    cc = get(ENV, "JULIA_CC", nothing)
+    path = nothing
+    @static if Sys.iswindows()
+        path = joinpath(LazyArtifacts.artifact"mingw-w64",
+                        "extracted_files",
+                        (Int==Int64 ? "mingw64" : "mingw32"),
+                        "bin",
+                        cplusplus ? "g++.exe" : "gcc.exe")
+        compiler_cmd = `$path`
+    end
+    if cc !== nothing
+        compiler_cmd = Cmd(Base.shell_split(cc))
+        path = nothing
+    elseif !Sys.iswindows()
+        compilers_cpp = ("g++", "clang++")
+        compilers_c = ("gcc", "clang")
+        found_compiler = false
+        if cplusplus
+            for compiler in compilers_cpp
+                if Sys.which(compiler) !== nothing
+                    compiler_cmd = `$compiler`
+                    found_compiler = true
+                    break
+                end
+            end
+        end
+        if !found_compiler
+            for compiler in compilers_c
+                if Sys.which(compiler) !== nothing
+                    compiler_cmd = `$compiler`
+                    found_compiler = true
+                    if cplusplus && !WARNED_CPP_COMPILER[]
+                        @warn "could not find a c++ compiler (g++ or clang++), falling back to $compiler, this might cause link errors"
+                        WARNED_CPP_COMPILER[] = true
+                    end
+                    break
+                end
+            end
+        end
+        found_compiler || error("could not find a compiler, looked for ",
+            join(((cplusplus ? compilers_cpp : ())..., compilers_c...), ", ", " and "))
+    end
+    if path !== nothing
+        compiler_cmd = addenv(compiler_cmd, "PATH" => string(ENV["PATH"], ";", dirname(path)))
+    end
+    return compiler_cmd
+end
+
+# arguments to forward to julia compilation process
+julia_args = []
+enable_trim::Bool = false
+project::String = "--project=$(Base.active_project())"
+
+let i = 1
+    while i <= length(ARGS)
+        arg = ARGS[i]
+        if arg == "--output-exe" || arg == "--output-lib" || arg == "--output-sysimage"
+            isnothing(output_type) || error("Multiple output types specified")
+            global output_type = arg
+            i == length(ARGS) && error("Output specifier requires an argument")
+            global outname = ARGS[i+1]
+            i += 1
+        elseif arg == "--export-abi"
+            i == length(ARGS) && error("Output specifier requires an argument")
+            global abi_export_file = ARGS[i+1]
+            i += 1
+        elseif arg == "--compile-ccallable"
+            global add_ccallables = true
+        elseif arg == "--verbose"
+            global verbose = true
+        elseif arg == "--relative-rpath"
+            global relative_rpath = true
+        elseif startswith(arg, "--trim")
+            global enable_trim = arg != "--trim=no"
+            push!(julia_args, arg) # forwarded arg
+        elseif arg == "--experimental"
+            push!(julia_args, arg) # forwarded arg
+        elseif startswith(arg, "--proj")
+            global project = arg
+        else
+            if arg[1] == '-' || !isnothing(file)
+                println("Unexpected argument `$arg`")
+                exit(1)
+            end
+            global file = arg
+        end
+        i += 1
+    end
+end
+
+isnothing(outname) && error("No output file specified")
+isnothing(file) && error("No input file specified")
+
+function get_rpath(; relative::Bool = false)
+    if relative
+        if Sys.isapple()
+            return "-Wl,-rpath,'@loader_path/julia/' -Wl,-rpath,'@loader_path/'"
+        elseif Sys.islinux()
+            return "-Wl,-rpath,'\$ORIGIN/julia/' -Wl,-rpath,'\$ORIGIN/'"
+        else
+            error("unimplemented")
+        end
+    else
+        return JuliaConfig.ldrpath()
+    end
+end
+
+cc = get_compiler_cmd()
+absfile = abspath(file)
+cflags = JuliaConfig.cflags(; framework=false)
+cflags = Base.shell_split(cflags)
+allflags = JuliaConfig.allflags(; framework=false, rpath=false)
+allflags = Base.shell_split(allflags)
+rpath = get_rpath(; relative = relative_rpath)
+rpath = Base.shell_split(rpath)
+tmpdir = mktempdir(cleanup=false)
+img_path = joinpath(tmpdir, "img.a")
+bc_path = joinpath(tmpdir, "img-bc.a")
+
+function precompile_env()
+    # Pre-compile the environment
+    # (otherwise obscure error messages will occur)
+    cmd = addenv(`$julia_cmd $project -e "using Pkg; Pkg.precompile()"`)
+    verbose && println("Running: $cmd")
+    if !success(pipeline(cmd; stdout, stderr))
+        println(stderr, "\nError encountered during pre-compilation of environment.")
+        exit(1)
+    end
+end
+
+function compile_products(enable_trim::Bool)
+
+    # Only strip IR / metadata if not `--trim=no`
+    strip_args = String[]
+    if enable_trim
+        push!(strip_args, "--strip-ir")
+        push!(strip_args, "--strip-metadata")
+    end
+
+    # Compile the Julia code
+    args = String[absfile, output_type, string(add_ccallables)]
+    if abi_export_file !== nothing
+        push!(args, abi_export_file)
+    end
+    cmd = addenv(`$julia_cmd_target $project --output-o $img_path --output-incremental=no $strip_args $julia_args $(joinpath(@__DIR__,"juliac-buildscript.jl")) $(args)`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1)
+    verbose && println("Running: $cmd")
+    if !success(pipeline(cmd; stdout, stderr))
+        println(stderr, "\nFailed to compile $file")
+        exit(1)
+    end
+end
+
+function link_products()
+    global outname
+    if output_type == "--output-lib" || output_type == "--output-sysimage"
+        of, ext = splitext(outname)
+        soext = "." * Base.BinaryPlatforms.platform_dlext()
+        if ext == ""
+            outname = of * soext
+        end
+    end
+
+    julia_libs = Base.shell_split(Base.isdebugbuild() ? "-ljulia-debug -ljulia-internal-debug" : "-ljulia -ljulia-internal")
+    try
+        if output_type == "--output-lib"
+            cmd2 = `$(cc) $(allflags) $(rpath) -o $outname -shared $(Base.Linking.whole_archive(img_path; is_cc=true)) $(julia_libs)`
+        elseif output_type == "--output-sysimage"
+            cmd2 = `$(cc) $(allflags) $(rpath) -o $outname -shared -Wl,$(Base.Linking.whole_archive(img_path; is_cc=true)) $(julia_libs)`
+        else
+            cmd2 = `$(cc) $(allflags) $(rpath) -o $outname -Wl,$(Base.Linking.whole_archive(img_path; is_cc=true)) $(julia_libs)`
+        end
+        verbose && println("Running: $cmd2")
+        run(cmd2)
+    catch e
+        println("\nCompilation failed: ", e)
+        exit(1)
+    end
+end
+
+precompile_env()
+compile_products(enable_trim)
+link_products()
diff --git a/contrib/mac/app/Makefile b/contrib/mac/app/Makefile
index 81b7e47cdf2cf..70436a857c265 100644
--- a/contrib/mac/app/Makefile
+++ b/contrib/mac/app/Makefile
@@ -47,8 +47,8 @@ dmg/$(APP_NAME): startup.applescript julia.icns
 	plutil -insert  CFBundleVersion            -string "$(JULIA_VERSION_OPT_COMMIT)" $@/Contents/Info.plist
 	plutil -insert  NSHumanReadableCopyright   -string "$(APP_COPYRIGHT)" $@/Contents/Info.plist
 	-mkdir -p $@/Contents/Resources/julia
-	make -C $(JULIAHOME) binary-dist
-	tar zxf $(JULIAHOME)/$(JULIA_BINARYDIST_FILENAME).tar.gz -C $@/Contents/Resources/julia --strip-components 1
+	$(MAKE) -C $(JULIAHOME) binary-dist
+	$(TAR) -xzf $(JULIAHOME)/$(JULIA_BINARYDIST_FILENAME).tar.gz -C $@/Contents/Resources/julia --strip-components 1
 	find $@/Contents/Resources/julia -type f -exec chmod -w {} \;
 	# Even though the tarball may already be signed, we re-sign here to make it easier to add
 	# unsigned executables (like the app launcher) and whatnot, without needing to maintain lists
diff --git a/contrib/mac/app/startup.applescript b/contrib/mac/app/startup.applescript
index 9964049f34ed6..d7b46cec1a89d 100644
--- a/contrib/mac/app/startup.applescript
+++ b/contrib/mac/app/startup.applescript
@@ -1,4 +1,3 @@
 set RootPath to (path to me)
 set JuliaPath to POSIX path of ((RootPath as text) & "Contents:Resources:julia:bin:julia")
-set JuliaFile to POSIX file JuliaPath
-tell application id "com.apple.finder" to open JuliaFile
+do shell script "open -a Terminal '" & JuliaPath & "'"
diff --git a/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m b/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m
index db2f13b485189..1d20d6ed3efa1 100644
--- a/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m
+++ b/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m
@@ -51,7 +51,7 @@ + (ExecSandboxController *)sharedController {
 
 @end
 
-/// Location of an installed variant of Julia (frameowrk or nix hier).
+/// Location of an installed variant of Julia (framework or nix hier).
 @interface JuliaVariant : NSObject
 @property(readonly, nullable) NSBundle *bundle;
 @property(readonly, nonnull) NSURL *juliaexe;
diff --git a/contrib/mac/frameworkapp/README.md b/contrib/mac/frameworkapp/README.md
index 94c344d16564f..953ad115c94e1 100644
--- a/contrib/mac/frameworkapp/README.md
+++ b/contrib/mac/frameworkapp/README.md
@@ -4,23 +4,23 @@ New Julia Launcher App
 This builds the Julia framework and a launcher app and packages them in a
 product archive for the macOS Installer.
 
-Run `make APPLE_DEVELOPMENT_TEAM=xxxxxxxxxx` to build the product archive.  The
+Run `make APPLE_DEVELOPMENT_TEAM=xxxxxxxxxx` to build the product archive. The
 resulting archive may be installed to the home directory with
 `installer -pkg~/Documents/pkgs/Julia-1.1.0.pkg -target CurrentUserHomeDirectory`.
-To just build the app, build the `appexport` make target.  Read the comments at
+To just build the app, build the `appexport` make target. Read the comments at
 the top of the `Makefile` to set appropriate code signing parameters.
 
 The framework is installed in `/Library/Frameworks` and the app in
-`/Applications`.  Installation may be system-wide (i.e., relative to `/`) or
+`/Applications`. Installation may be system-wide (i.e., relative to `/`) or
 local to the user's home directory (i.e., `$Home/Applications/Julia.app`).
 
 The `julia` binary is embedded in the framework at
 `Julia.framework/Helpers/julia`.
 
-Multiple versions of Julia may be installed at once.  Each version is placed in
-the `Julia.framework/Versions` directory.  By default, the version is
+Multiple versions of Julia may be installed at once. Each version is placed in
+the `Julia.framework/Versions` directory. By default, the version is
 identified by the Major.Minor version number but may be customized by setting
-the `FRAMEWORK_VERSION` make variable.  The resulting product archive will not
-overwrite other versions but will upgrade a version if it exists.  Thus, the
+the `FRAMEWORK_VERSION` make variable. The resulting product archive will not
+overwrite other versions but will upgrade a version if it exists. Thus, the
 `1.1` framework version that is actually the 3rd patch (1.1.3) will overwrite
 any existing `1.1` framework version.
diff --git a/contrib/normalize_triplet.py b/contrib/normalize_triplet.py
index 77c047b360b76..833b725480996 100755
--- a/contrib/normalize_triplet.py
+++ b/contrib/normalize_triplet.py
@@ -14,11 +14,13 @@
     'i686': "i\\d86",
     'aarch64': "(arm|aarch)64",
     'armv7l': "arm(v7l)?",
+    'riscv64': "(rv64|riscv64)",
     'powerpc64le': "p(ower)?pc64le",
 }
 platform_mapping = {
     'darwin': "-apple-darwin[\\d\\.]*",
     'freebsd': "-(.*-)?freebsd[\\d\\.]*",
+    'openbsd': "-(.*-)?openbsd[\\d\\.]*",
     'windows': "-w64-mingw32",
     'linux': "-(.*-)?linux",
 }
@@ -96,6 +98,7 @@ def p(x):
         'darwin': 'apple-darwin',
         'windows': 'w64-mingw32',
         'freebsd': 'unknown-freebsd',
+        'openbsd': 'unknown-openbsd',
     }
     x = r(x)
     if x:
diff --git a/contrib/pgo-lto-bolt/.gitignore b/contrib/pgo-lto-bolt/.gitignore
new file mode 100644
index 0000000000000..1b29279acc0da
--- /dev/null
+++ b/contrib/pgo-lto-bolt/.gitignore
@@ -0,0 +1,14 @@
+stage0*
+stage1*
+stage2*
+bolt
+bolt_instrument
+merge_data
+copy_originals
+
+profiles
+profiles-bolt
+
+toolchain
+pgo-instrumented.build
+optimized.build
diff --git a/contrib/pgo-lto-bolt/Makefile b/contrib/pgo-lto-bolt/Makefile
new file mode 100644
index 0000000000000..ce1b8b04f68c9
--- /dev/null
+++ b/contrib/pgo-lto-bolt/Makefile
@@ -0,0 +1,190 @@
+.PHONY: clean clean_profiles restore_originals
+
+# See the makefiles in contrib/bolt and contrib/pgo-lto for more information.
+
+# Settings taken from https://github.com/rust-lang/rust/blob/master/src/tools/opt-dist/src/bolt.rs
+BOLT_ARGS :=
+# Reorder basic blocks within functions
+BOLT_ARGS += -reorder-blocks=ext-tsp
+# Reorder functions within the binary
+BOLT_ARGS += -reorder-functions=cdsort
+# Split function code into hot and code regions
+BOLT_ARGS += -split-functions
+# Split as many basic blocks as possible
+BOLT_ARGS += -split-all-cold
+# Move jump tables to a separate section
+BOLT_ARGS += -jump-tables=move
+# Use regular size pages for code alignment
+BOLT_ARGS += -no-huge-pages
+# Fold functions with identical code
+BOLT_ARGS += -icf=1
+# Split using best available strategy (three-way splitting, Cache-Directed Sort)
+# Disabled for libjulia-internal till https://github.com/llvm/llvm-project/issues/89508 is fixed
+# BOLT_ARGS += -split-strategy=cdsplit
+# Update DWARF debug info in the final binary
+BOLT_ARGS += -update-debug-sections
+# Print optimization statistics
+BOLT_ARGS += -dyno-stats
+# BOLT doesn't fully support computed gotos, https://github.com/llvm/llvm-project/issues/89117
+# Use escaped regex as the name BOLT recognises is often a bit different, e.g. apply_cl/1(*2)
+# This doesn't actually seem to do anything, the actual mitigation is not using --use-old-text
+# which we do in the bolt target
+BOLT_ARGS += -skip-funcs=.\*apply_cl.\*
+
+# -fno-reorder-blocks-and-partition is needed on gcc >= 8.
+BOLT_FLAGS := $\
+	"BOLT_CFLAGS_GCC+=-fno-reorder-blocks-and-partition" $\
+	"BOLT_LDFLAGS=-Wl,--emit-relocs"
+
+STAGE0_BUILD:=$(CURDIR)/toolchain
+STAGE1_BUILD:=$(CURDIR)/pgo-instrumented.build
+STAGE2_BUILD:=$(CURDIR)/optimized.build
+
+STAGE0_BINARIES:=$(STAGE0_BUILD)/usr/bin/
+STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/
+
+BOLT_PROFILE_DIR:=$(CURDIR)/profiles-bolt
+PGO_PROFILE_DIR:=$(CURDIR)/profiles
+PGO_PROFILE_FILE:=$(PGO_PROFILE_DIR)/merged.prof
+PGO_PROFRAW_FILES:=$(wildcard $(PGO_PROFILE_DIR)/*.profraw)
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_BOLT:=$(STAGE0_BINARIES)llvm-bolt
+LLVM_MERGEFDATA:=$(STAGE0_BINARIES)merge-fdata
+LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
+LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata
+LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy
+
+# If you add new files to optimize, you need to add BOLT_LDFLAGS and BOLT_CFLAGS to the build of your new file.
+SYMLINKS_TO_OPTIMIZE := libLLVM.so libjulia-internal.so libjulia-codegen.so
+FILES_TO_OPTIMIZE := $(shell for file in $(SYMLINKS_TO_OPTIMIZE); do readlink $(STAGE1_BUILD)/usr/lib/$$file; done)
+
+AFTER_INSTRUMENT_MESSAGE:='Run `make finish_stage2` to finish off the build. $\
+	You can now optionally collect more profiling data by running Julia with an appropriate workload, $\
+	if you wish, run `make clean_profiles` before doing so to remove any profiling data generated by `make finish_stage2`. $\
+	You should end up with some data in $(BOLT_PROFILE_DIR). Afterwards run `make merge_data && make bolt`.'
+
+# When building a single libLLVM.so we need to increase -vp-counters-per-site
+# significantly
+COUNTERS_PER_SITE:=6
+# Note: profile counters are not atomic by default, https://discourse.llvm.org/t/profile-guided-optimization-pgo-related-questions-and-suggestions/75232/5
+
+AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for use in PGO by running Julia $\
+	with an appropriate workload. If you wish, run `make clean_profiles` before doing so to remove any profiling data $\
+	generated by building Julia. You should end up with about 15MB of data in $(PGO_PROFILE_DIR). $\
+	Note that running extensive scripts may result in counter overflows, which can be detected by running $\
+	`make top`. Afterwards run `make stage2`.'
+
+TOOLCHAIN_FLAGS = $\
+	"CC=$(STAGE0_TOOLS)clang" $\
+	"CXX=$(STAGE0_TOOLS)clang++" $\
+	"LD=$(STAGE0_TOOLS)ld.lld" $\
+	"AR=$(STAGE0_TOOLS)llvm-ar" $\
+	"RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\
+	"CFLAGS+=$(PGO_CFLAGS)" $\
+	"CXXFLAGS+=$(PGO_CXXFLAGS)" $\
+	"LDFLAGS+=-fuse-ld=lld $(PGO_LDFLAGS)"
+
+$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: export USE_BINARYBUILDER_LLVM=1
+stage0: | $(STAGE0_BUILD)
+	# Turn [cd]tors into init/fini_array sections in libclang_rt, since lld
+	# doesn't do that, and otherwise the profile constructor is not executed
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools install-BOLT && \
+	find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \
+	touch $@
+
+$(STAGE1_BUILD): stage0
+stage1: PGO_CFLAGS:=-fprofile-generate=$(PGO_PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
+stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PGO_PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
+stage1: PGO_LDFLAGS:=-flto=thin -fprofile-generate=$(PGO_PROFILE_DIR)
+stage1: export USE_BINARYBUILDER_LLVM=0
+stage1: | $(STAGE1_BUILD)
+	$(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@
+	@echo $(AFTER_STAGE1_MESSAGE)
+
+stage2: PGO_CFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+stage2: PGO_CXXFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+stage2: PGO_LDFLAGS:=-flto=thin -fprofile-use=$(PGO_PROFILE_FILE) -Wl,--icf=safe
+stage2: export USE_BINARYBUILDER_LLVM=0
+stage2: $(PGO_PROFILE_FILE) | $(STAGE2_BUILD)
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) $(BOLT_FLAGS) julia-src-release julia-symlink julia-libccalltest \
+								julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest && \
+	touch $@
+
+copy_originals: stage2
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		cp $$abs_file "$$abs_file.original"; \
+	done && \
+	touch $@
+
+# I don't think there's any particular reason to have -no-huge-pages here, perhaps slightly more accurate profile data
+# as the final build uses -no-huge-pages
+# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
+bolt_instrument: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		old_time=$$(stat -c %Y $$abs_file); \
+		$(LLVM_BOLT) "$$abs_file.original" -o $$abs_file --instrument --instrumentation-file-append-pid --instrumentation-file="$(BOLT_PROFILE_DIR)/$$file-prof" -no-huge-pages; \
+		mkdir -p $$(dirname "$(BOLT_PROFILE_DIR)/$$file-prof"); \
+		touch -d "@$$old_time" $$abs_file; \
+		printf "\n"; \
+	done && \
+	touch $@
+	@echo $(AFTER_INSTRUMENT_MESSAGE)
+
+finish_stage2: PGO_CFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+finish_stage2: PGO_CXXFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+finish_stage2: PGO_LDFLAGS:=-flto=thin -fprofile-use=$(PGO_PROFILE_FILE) -Wl,--icf=safe
+finish_stage2: stage2
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS)
+
+merge_data: bolt_instrument
+	for file in $(FILES_TO_OPTIMIZE); do \
+		profiles=$(BOLT_PROFILE_DIR)/$$file-prof.*.fdata; \
+		$(LLVM_MERGEFDATA) $$profiles > "$(BOLT_PROFILE_DIR)/$$file-prof.merged.fdata"; \
+	done && \
+	touch $@
+
+# The --use-old-text saves about 16 MiB of libLLVM.so size.
+# However, the rust folk found it succeeds very non-deterministically for them.
+# It tries to reuse old text segments to reduce binary size
+# BOLT doesn't fully support computed gotos https://github.com/llvm/llvm-project/issues/89117, so we cannot use --use-old-text on libjulia-internal
+# That flag saves less than 1 MiB for libjulia-internal so oh well.
+# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
+bolt: merge_data
+	for file in $(FILES_TO_OPTIMIZE); do \
+        abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		old_time=$$(stat -c %Y $$abs_file); \
+		$(LLVM_BOLT) "$$abs_file.original" -data "$(BOLT_PROFILE_DIR)/$$file-prof.merged.fdata" -o $$abs_file $(BOLT_ARGS) $$(if [ "$$file" != $(shell readlink $(STAGE2_BUILD)/usr/lib/libjulia-internal.so) ]; then echo "--use-old-text -split-strategy=cdsplit"; fi); \
+		touch -d "@$$old_time" $$abs_file; \
+    done && \
+    touch $@
+
+clean_profiles:
+	rm -rf $(PGO_PROFILE_DIR) $(BOLT_PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 stage2 $(PGO_PROFILE_FILE) bolt copy_originals merge_data bolt_instrument
+
+restore_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		cp -P "$$abs_file.original" $$abs_file; \
+	done
+
+delete_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		rm "$$abs_file.original"; \
+	done
+
+$(PGO_PROFILE_FILE): stage1 $(PGO_PROFRAW_FILES)
+	$(LLVM_PROFDATA) merge -output=$@ $(PGO_PROFRAW_FILES)
+
+# show top 50 functions
+top: $(PGO_PROFILE_FILE)
+	$(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT)
diff --git a/contrib/pgo-lto-bolt/README.md b/contrib/pgo-lto-bolt/README.md
new file mode 100644
index 0000000000000..ab574907c292f
--- /dev/null
+++ b/contrib/pgo-lto-bolt/README.md
@@ -0,0 +1,18 @@
+BOLT only works on x86_64 and arch64 on Linux.
+
+DO NOT STRIP THE RESULTING .so FILES, https://github.com/llvm/llvm-project/issues/56738.
+If you really need to, try adding `-use-gnu-stack` to `BOLT_ARGS`.
+
+To build a PGO+LTO+BOLT version of Julia run the following commands (`cd` into this directory first)
+```bash
+make stage1
+make stage2
+make copy_originals
+make bolt_instrument
+make finish_stage2
+make merge_data
+make bolt
+```
+After these commands finish, the optimized version of Julia will be built in the `optimized.build` directory.
+
+This doesn't align the code to support huge pages as it doesn't seem that we do that currently, this decreases the size of the .so files by 2-4mb.
diff --git a/contrib/pgo-lto/.gitignore b/contrib/pgo-lto/.gitignore
new file mode 100644
index 0000000000000..978d8f2ca86dd
--- /dev/null
+++ b/contrib/pgo-lto/.gitignore
@@ -0,0 +1,4 @@
+profiles
+stage0*
+stage1*
+stage2*
diff --git a/contrib/pgo-lto/Makefile b/contrib/pgo-lto/Makefile
new file mode 100644
index 0000000000000..5902d4ad08151
--- /dev/null
+++ b/contrib/pgo-lto/Makefile
@@ -0,0 +1,74 @@
+.PHONY: top clean clean-profiles
+
+STAGE0_BUILD:=$(CURDIR)/stage0.build
+STAGE1_BUILD:=$(CURDIR)/stage1.build
+STAGE2_BUILD:=$(CURDIR)/stage2.build
+
+STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/
+
+PROFILE_DIR:=$(CURDIR)/profiles
+PROFILE_FILE:=$(PROFILE_DIR)/merged.prof
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
+LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata
+LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy
+
+# When building a single libLLVM.so we need to increase -vp-counters-per-site
+# significantly
+COUNTERS_PER_SITE:=6
+# Note: profile counters are not atomic by default, https://discourse.llvm.org/t/profile-guided-optimization-pgo-related-questions-and-suggestions/75232/5
+
+AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for use in PGO by running Julia $\
+	with an appropriate workload. If you wish, run `make clean_profiles` before doing so to remove any profiling data $\
+	generated by building Julia. You should end up with about 15MB of data in $(PGO_PROFILE_DIR). $\
+	Note that running extensive scripts may result in counter overflows, which can be detected by running $\
+	`make top`. Afterwards run `make stage2`.'
+
+STAGE1_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-generate=$(PROFILE_DIR)" $\
+			CFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)" $\
+			CXXFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)"
+STAGE2_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe" $\
+			CFLAGS="-fprofile-use=$(PROFILE_FILE)" $\
+			CXXFLAGS="-fprofile-use=$(PROFILE_FILE)"
+
+COMMON_FLAGS:=USE_BINARYBUILDER_LLVM=0
+
+all: stage2 # Default target as first in file
+
+$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: export USE_BINARYBUILDER_LLVM=1
+stage0: | $(STAGE0_BUILD)
+	# Turn [cd]tors into init/fini_array sections in libclang_rt, since lld
+	# doesn't do that, and otherwise the profile constructor is not executed
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools && \
+	find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \
+	touch $@
+
+$(STAGE1_BUILD): stage0
+stage1: | $(STAGE1_BUILD)
+	@echo "--- Build Julia Stage 1 - with instrumentation"
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE1_FLAGS) $(MAKE) -C $(STAGE1_BUILD) $(COMMON_FLAGS) && touch $@
+	@echo $(AFTER_STAGE1_MESSAGE)
+
+stage2: $(PROFILE_FILE) | $(STAGE2_BUILD)
+	@echo "--- Build Julia Stage 2 - PGO + LTO optimised"
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) && touch $@
+
+.DEFAULT: stage2
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) $@
+
+$(PROFILE_FILE): stage1 $(wildcard $(PROFILE_DIR)/*.profraw)
+	$(LLVM_PROFDATA) merge -output=$@ $(PROFILE_DIR)/*.profraw
+
+# show top 50 functions
+top: $(PROFILE_FILE)
+	$(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT)
+
+clean-profiles:
+	rm -rf $(PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 stage2 $(PROFILE_FILE)
diff --git a/contrib/print_sorted_stdlibs.jl b/contrib/print_sorted_stdlibs.jl
index 6bc2023c4f1cc..c4cf391efb623 100644
--- a/contrib/print_sorted_stdlibs.jl
+++ b/contrib/print_sorted_stdlibs.jl
@@ -12,12 +12,13 @@ function check_flag(flag)
 end
 
 if check_flag("--help") || check_flag("-h")
-    println("Usage: julia print_sorted_stdlibs.jl [stdlib_dir] [--exclude-jlls] [--exclude-sysimage]")
+    println("Usage: julia print_sorted_stdlibs.jl [stdlib_dir] [--exclude-jlls] [--exclude-sysimage] [--only-sysimg]")
 end
 
 # Allow users to ask for JLL or no JLLs
 exclude_jlls = check_flag("--exclude-jlls")
 exclude_sysimage = check_flag("--exclude-sysimage")
+only_sysimage = check_flag("--only-sysimg")
 
 # Default to the `stdlib/vX.Y` directory
 STDLIB_DIR = get(ARGS, 1, joinpath(@__DIR__, "..", "usr", "share", "julia", "stdlib"))
@@ -81,9 +82,19 @@ if exclude_jlls
     filter!(p -> !endswith(p, "_jll"), sorted_projects)
 end
 
-if exclude_sysimage
-    loaded_modules = Set(map(k->k.name, collect(keys(Base.loaded_modules))))
-    filter!(p->!in(p, loaded_modules), sorted_projects)
+if only_sysimage && exclude_sysimage
+    println(stderr, "Warning: --only-sysimg and --exclude-sysimage are mutually exclusive. Prioritizing --only-sysimg.")
+    exclude_sysimage = false
+end
+
+if only_sysimage || exclude_sysimage
+    loaded_modules_set = Set(map(k->k.name, collect(keys(Base.loaded_modules))))
+
+    if only_sysimage
+        filter!(p -> in(p, loaded_modules_set), sorted_projects)
+    else
+        filter!(p -> !in(p, loaded_modules_set), sorted_projects)
+    end
 end
 
 # Print out sorted projects, ready to be pasted into `sysimg.jl`
@@ -92,6 +103,9 @@ println("    # Stdlibs sorted in dependency, then alphabetical, order by contrib
 if exclude_jlls
     println("    # Run with the `--exclude-jlls` option to filter out all JLL packages")
 end
+if only_sysimage
+    println("    # Run with the `--only-sysimg` option to filter for only packages included in the system image")
+end
 if exclude_sysimage
     println("    # Run with the `--exclude-sysimage` option to filter out all packages included in the system image")
 end
diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk
index f67088141ccd4..77921858f2b6e 100644
--- a/contrib/refresh_checksums.mk
+++ b/contrib/refresh_checksums.mk
@@ -19,12 +19,12 @@ all: checksum pack-checksum
 # Get this list via:
 #    using BinaryBuilder
 #    print("TRIPLETS=\"$(join(sort(triplet.(BinaryBuilder.supported_platforms(;experimental=true))), " "))\"")
-TRIPLETS=aarch64-apple-darwin aarch64-linux-gnu aarch64-linux-musl armv6l-linux-gnueabihf armv6l-linux-musleabihf armv7l-linux-gnueabihf armv7l-linux-musleabihf i686-linux-gnu i686-linux-musl i686-w64-mingw32 powerpc64le-linux-gnu x86_64-apple-darwin x86_64-linux-gnu x86_64-linux-musl x86_64-unknown-freebsd x86_64-w64-mingw32
+TRIPLETS=aarch64-apple-darwin aarch64-linux-gnu aarch64-linux-musl aarch64-unknown-freebsd armv6l-linux-gnueabihf armv6l-linux-musleabihf armv7l-linux-gnueabihf armv7l-linux-musleabihf i686-linux-gnu i686-linux-musl i686-w64-mingw32 powerpc64le-linux-gnu riscv64-linux-gnu x86_64-apple-darwin x86_64-linux-gnu x86_64-linux-musl x86_64-unknown-freebsd x86_64-w64-mingw32
 CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS))
 NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS))
 
 # These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded:
-BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libtracyclient
+BB_PROJECTS=openssl libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib zstd libsuitesparse openlibm blastrampoline libtracyclient mmtk_julia
 BB_GCC_EXPANDED_PROJECTS=openblas csl
 BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools lld
 # These are non-BB source-only deps
@@ -58,10 +58,6 @@ checksum-$(1)-$(2)-$(3): clean-$(1)
 # Add this guy to his project target
 checksum-$(1): checksum-$(1)-$(2)-$(3)
 
-# Add a dependency to the pack target
-# TODO: can we make this so it only adds an ordering but not a dependency?
-pack-checksum-$(1): | checksum-$(1)
-
 # Add this guy to the `checksum` and `pack-checksum` default targets (e.g. `make -f contrib/refresh_checksums.mk openblas`)
 checksum: checksum-$1
 $1 pack-checksum: pack-checksum-$1
@@ -100,7 +96,7 @@ checksum-doc-unicodedata:
 all: checksum-doc-unicodedata
 .PHONY: checksum-doc-unicodedata
 
-# merge substring project names to avoid races
+# merge substring project names (llvm and llvm-tools, libsuitesparse and suitesparse) to avoid races
 pack-checksum-llvm-tools: | pack-checksum-llvm
 	@# nothing to do but disable the prefix rule
 pack-checksum-llvm: | checksum-llvm-tools
@@ -110,18 +106,21 @@ pack-checksum-compilersupportlibraries: | checksum-csl
 pack-checksum-libsuitesparse: | pack-checksum-suitesparse
 	@# nothing to do but disable the prefix rule
 pack-checksum-suitesparse: | checksum-libsuitesparse
-# This is a bit tricky: we want llvmunwind to be separate from unwind and llvm,
+# This is a bit tricky: we want llvmunwind, clang, and lld to be separate from unwind and llvm,
 # so we add a rule to process those first
 pack-checksum-llvm pack-checksum-unwind: | pack-checksum-llvmunwind
-# and the name for LLVMLibUnwind is awkward, so handle that with a regex
-pack-checksum-llvmunwind: | pack-checksum-llvm.*unwind
+pack-checksum-llvm: | pack-checksum-clang pack-checksum-lld
+# and the name for LLVMLibUnwind is awkward, so handle that packing with a regex
+checksum-llvm.*unwind: checksum-llvmunwind
+	@# nothing to do but disable the prefix rule
+pack-checksum-llvmunwind: | pack-checksum-llvm.*unwind # override general rule below
 	cd "$(JULIAHOME)/deps/checksums" && mv 'llvm.*unwind' llvmunwind
 
 clean-%: FORCE
 	-rm "$(JULIAHOME)/deps/checksums"/'$*'
 
 # define how to pack parallel checksums into a single file format
-pack-checksum-%: FORCE
+pack-checksum-%: FORCE | checksum-%
 	@echo making "$(JULIAHOME)/deps/checksums/"'$*'
 	@cd "$(JULIAHOME)/deps/checksums" && \
 		for each in $$(ls | grep -i '$*'); do \
diff --git a/contrib/tsan/Make.user.tsan b/contrib/tsan/Make.user.tsan
index b192c36e4cfee..252a17ba86497 100644
--- a/contrib/tsan/Make.user.tsan
+++ b/contrib/tsan/Make.user.tsan
@@ -3,7 +3,6 @@ BINDIR=$(TOOLCHAIN)/usr/bin
 TOOLDIR=$(TOOLCHAIN)/usr/tools
 
 # use our new toolchain
-USECLANG=1
 override CC=$(TOOLDIR)/clang
 override CXX=$(TOOLDIR)/clang++
 
@@ -11,3 +10,5 @@ USE_BINARYBUILDER_LLVM=1
 
 override SANITIZE=1
 override SANITIZE_THREAD=1
+override CROSS_BOOTSTRAP_JULIA=$(BUILDROOT)/../bootstrap/usr/bin/julia
+override CROSS_BOOTSTRAP_SYSBASE=$(BUILDROOT)/../bootstrap/usr/lib/julia/sysbase.$(SHLIB_EXT)
diff --git a/contrib/tsan/build.sh b/contrib/tsan/build.sh
index 2c4ba3b1bde95..de3951fbe8bd5 100755
--- a/contrib/tsan/build.sh
+++ b/contrib/tsan/build.sh
@@ -3,7 +3,7 @@
 
 #
 # Usage:
-#     contrib/tsan/build.sh <path> [<make_targets>...]
+#     contrib/tsan/build.sh [-j<N>] <path> [<make_targets>...]
 #
 # Build TSAN-enabled julia.  Given a workspace directory <path>, build
 # TSAN-enabled julia in <path>/tsan.  Required toolss are install under
@@ -13,6 +13,16 @@
 # make target is `debug`.
 
 set -ue
+set -x
+
+JOBS=1
+while getopts j: opt
+do
+    case $opt in
+        j) JOBS="$OPTARG";;
+    esac
+done
+shift $((OPTIND-1))
 
 # `$WORKSPACE` is a directory in which we create `toolchain` and `tsan`
 # sub-directories.
@@ -44,6 +54,12 @@ fi
 
 make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools
 
+echo
+echo "Building bootstrap Julia..."
+BUILD="$WORKSPACE/bootstrap"
+
+make -j "$JOBS" O="$BUILD" julia-src-release julia-sysbase-release
+
 echo
 echo "Building Julia..."
 
@@ -54,4 +70,6 @@ if [ ! -d "$BUILD" ]; then
 fi
 
 cd "$BUILD"  # so that we can pass `-C src` to `make`
-make "$@"
+# Reporting tsan warnings will interfere with bootstrapping.
+export TSAN_OPTIONS="report_bugs=0 exitcode=0"
+make -j "$JOBS" "$@"
diff --git a/contrib/tsan/ignorelist.txt b/contrib/tsan/ignorelist.txt
new file mode 100644
index 0000000000000..34c9c2aced7d8
--- /dev/null
+++ b/contrib/tsan/ignorelist.txt
@@ -0,0 +1 @@
+mainfile:*/gc-*.c
diff --git a/contrib/tsan/suppressions.txt b/contrib/tsan/suppressions.txt
new file mode 100644
index 0000000000000..706f258e65d71
--- /dev/null
+++ b/contrib/tsan/suppressions.txt
@@ -0,0 +1 @@
+deadlock:invalidate_code_instance
diff --git a/etc/write_base_cache.jl b/contrib/write_base_cache.jl
similarity index 100%
rename from etc/write_base_cache.jl
rename to contrib/write_base_cache.jl
diff --git a/deps/BOLT.mk b/deps/BOLT.mk
new file mode 100644
index 0000000000000..34391ab10f716
--- /dev/null
+++ b/deps/BOLT.mk
@@ -0,0 +1,118 @@
+## BOLT ##
+include $(SRCDIR)/BOLT.version
+
+ifneq ($(USE_BINARYBUILDER_BOLT), 1)
+BOLT_GIT_URL:=https://github.com/llvm/llvm-project.git
+BOLT_TAR_URL=https://api.github.com/repos/llvm/llvm-project/tarball/$1
+$(eval $(call git-external,BOLT,BOLT,CMakeLists.txt,,$(SRCCACHE)))
+
+BOLT_BUILDDIR := $(BUILDDIR)/$(BOLT_SRC_DIR)/build
+
+LLVM_ENABLE_PROJECTS := bolt
+
+LLVM_CFLAGS :=
+LLVM_CXXFLAGS :=
+LLVM_CPPFLAGS :=
+LLVM_LDFLAGS :=
+LLVM_CMAKE :=
+
+LLVM_CMAKE += -DLLVM_ENABLE_PROJECTS="$(LLVM_ENABLE_PROJECTS)"
+
+# Otherwise LLVM will translate \\ to / on mingw
+LLVM_CMAKE += -DLLVM_WINDOWS_PREFER_FORWARD_SLASH=False
+
+# Allow adding LLVM specific flags
+LLVM_CFLAGS += $(CFLAGS)
+LLVM_CXXFLAGS += $(CXXFLAGS)
+LLVM_CXXFLAGS += $(LLVM_CXXFLAGS)
+LLVM_CPPFLAGS += $(CPPFLAGS)
+LLVM_LDFLAGS += $(LDFLAGS)
+LLVM_LDFLAGS += $(LLVM_LDFLAGS)
+LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING=host -DCMAKE_BUILD_TYPE=Release
+LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=FORCE_ON -DZLIB_ROOT="$(build_prefix)"
+
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off
+
+ifeq ($(OS), WINNT)
+LLVM_CPPFLAGS += -D__USING_SJLJ_EXCEPTIONS__ -D__CRT__NO_INLINE
+endif # OS == WINNT
+ifneq ($(HOSTCC),$(CC))
+LLVM_CMAKE += -DCROSS_TOOLCHAIN_FLAGS_NATIVE="-DCMAKE_C_COMPILER=$$(which $(HOSTCC));-DCMAKE_CXX_COMPILER=$$(which $(HOSTCXX))"
+
+# Defaults to off when crosscompiling, starting from LLVM 18
+LLVM_CMAKE += -DBOLT_ENABLE_RUNTIME=ON
+endif
+ifeq ($(OS), emscripten)
+LLVM_CMAKE += -DCMAKE_TOOLCHAIN_FILE=$(EMSCRIPTEN)/cmake/Modules/Platform/Emscripten.cmake -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_ENABLE_THREADS=OFF -DLLVM_BUILD_UTILS=OFF
+endif # OS == emscripten
+
+ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
+ifeq (${USECLANG},0)
+LLVM_CXXFLAGS += -mminimal-toc
+endif
+endif
+
+ifeq ($(fPIC),)
+LLVM_CMAKE += -DLLVM_ENABLE_PIC=OFF
+endif
+
+LLVM_CMAKE += -DCMAKE_C_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CFLAGS)" \
+	-DCMAKE_CXX_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CXXFLAGS)"
+ifeq ($(OS),Darwin)
+# Explicitly use the default for -mmacosx-version-min=10.9 and later
+LLVM_CMAKE += -DLLVM_ENABLE_LIBCXX=ON
+endif
+
+LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
+	-DCMAKE_SHARED_LINKER_FLAGS="$(LLVM_LDFLAGS)"
+
+ifeq ($(USE_SYSTEM_ZLIB), 0)
+$(BOLT_BUILDDIR)/build-configured: | $(build_prefix)/manifest/zlib
+endif
+
+$(BOLT_BUILDDIR)/build-configured: $(SRCCACHE)/$(BOLT_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+		$(CMAKE) $(SRCCACHE)/$(BOLT_SRC_DIR)/llvm $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LLVM_CMAKE) \
+		|| { echo '*** To install a newer version of cmake, run contrib/download_cmake.sh ***' && false; }
+	echo 1 > $@
+
+$(BOLT_BUILDDIR)/build-compiled: $(BOLT_BUILDDIR)/build-configured
+	cd $(BOLT_BUILDDIR) && \
+		$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build . --target bolt)
+	echo 1 > $@
+
+$(BOLT_BUILDDIR)/build-checked: $(BOLT_BUILDDIR)/build-compiled
+ifeq ($(OS),$(BUILD_OS))
+	cd $(BOLT_BUILDDIR) && \
+		  $(CMAKE) --build . --target check-bolt
+endif
+	echo 1 > $@
+
+BOLT_INSTALL = \
+	cd $1 && mkdir -p $2$$(build_depsbindir) && \
+	$$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P tools/bolt/cmake_install.cmake
+
+$(eval $(call staged-install, \
+	bolt,$$(BOLT_SRC_DIR)/build, \
+	BOLT_INSTALL,,,))
+
+clean-bolt:
+	-rm -f $(BOLT_BUILDDIR)/build-configured $(BOLT_BUILDDIR)/build-compiled
+	-$(MAKE) -C $(BOLT_BUILDDIR) clean
+
+get-bolt: $(BOLT_SRC_FILE)
+extract-bolt: $(SRCCACHE)/$(BOLT_SRC_DIR)/source-extracted
+configure-bolt: $(BOLT_BUILDDIR)/build-configured
+compile-bolt: $(BOLT_BUILDDIR)/build-compiled
+fastcheck-bolt: #none
+check-bolt: $(BOLT_BUILDDIR)/build-checked
+
+else # USE_BINARYBUILDER_BOLT
+
+$(eval $(call bb-install,BOLT,BOLT,false,true))
+
+endif # USE_BINARYBUILDER_BOLT
diff --git a/deps/BOLT.version b/deps/BOLT.version
new file mode 100644
index 0000000000000..6a785041e163f
--- /dev/null
+++ b/deps/BOLT.version
@@ -0,0 +1,11 @@
+# -*- makefile -*-
+
+BOLT_VER := 18.1.4
+BOLT_JLL_VER := 18.1.4+0
+
+## jll artifact
+BOLT_JLL_NAME := BOLT
+
+## source build
+BOLT_BRANCH=llvmorg-$(BOLT_VER)
+BOLT_SHA1=e6c3289804a67ea0bb6a86fadbe454dd93b8d855
diff --git a/deps/JuliaSyntax.mk b/deps/JuliaSyntax.mk
deleted file mode 100644
index e9cc0c942dbe0..0000000000000
--- a/deps/JuliaSyntax.mk
+++ /dev/null
@@ -1,16 +0,0 @@
-$(eval $(call git-external,JuliaSyntax,JULIASYNTAX,,,$(BUILDDIR)))
-
-$(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted
-	@# no build steps
-	echo 1 > $@
-
-$(eval $(call symlink_install,JuliaSyntax,$$(JULIASYNTAX_SRC_DIR),$$(JULIAHOME)/base))
-
-clean-JuliaSyntax:
-	-rm -f $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled
-get-JuliaSyntax: $(JULIASYNTAX_SRC_FILE)
-extract-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted
-configure-JuliaSyntax: extract-JuliaSyntax
-compile-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled
-fastcheck-JuliSyntax: check-JuliSyntax
-check-JuliSyntax: compile-JuliSyntax
diff --git a/deps/JuliaSyntax.version b/deps/JuliaSyntax.version
deleted file mode 100644
index b604eedaa43dd..0000000000000
--- a/deps/JuliaSyntax.version
+++ /dev/null
@@ -1,4 +0,0 @@
-JULIASYNTAX_BRANCH = main
-JULIASYNTAX_SHA1 = 8731bab86f14762cca8cf24224d8c7a6a89c21c5
-JULIASYNTAX_GIT_URL := https://github.com/JuliaLang/JuliaSyntax.jl.git
-JULIASYNTAX_TAR_URL = https://api.github.com/repos/JuliaLang/JuliaSyntax.jl/tarball/$1
diff --git a/deps/Makefile b/deps/Makefile
index 27f5fdbb693d5..cea1e52c55156 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -22,11 +22,11 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST)
 # additionally all targets should be listed in the getall target for easier off-line compilation
 # if you are adding a new target, it can help to copy an similar, existing target
 #
-# autoconf configure-driven scripts: pcre unwind gmp mpfr patchelf libuv curl
+# autoconf configure-driven scripts: pcre unwind gmp mpfr patchelf libuv curl openssl
 # custom Makefile rules: openlibm dsfmt libsuitesparse lapack blastrampoline openblas utf8proc objconv libwhich
-# CMake libs: llvm llvmunwind libgit2 libssh2 mbedtls libtracyclient
+# CMake libs: llvm llvmunwind libgit2 libssh2 libtracyclient
 #
-# downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2, libtracyclient
+# downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2, libtracyclient, mmtk_julia
 #
 # to debug 'define' rules, replace eval at the usage site with info or error
 
@@ -36,15 +36,14 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST)
 # prevent installing libs into usr/lib64 on opensuse
 unexport CONFIG_SITE
 
-DEP_LIBS := JuliaSyntax
-
 ifeq ($(USE_SYSTEM_LIBBLASTRAMPOLINE), 0)
 DEP_LIBS += blastrampoline
 endif
 
-ifeq ($(USE_SYSTEM_CSL), 0)
+# We need to run this whether or not USE_SYSTEM_CSL is set.
+# If it is, this target copies the system CSLs into the location our
+# build system expects.
 DEP_LIBS += csl
-endif
 
 ifeq ($(SANITIZE), 1)
 DEP_LIBS += sanitizers
@@ -64,16 +63,20 @@ ifeq ($(OS), Linux)
 DEP_LIBS += unwind
 else ifeq ($(OS), FreeBSD)
 DEP_LIBS += unwind
+else ifeq ($(OS), OpenBSD)
+DEP_LIBS += llvmunwind
 else ifeq ($(OS), Darwin)
 DEP_LIBS += llvmunwind
 endif
 endif
 endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
+PATCHELF_MANIFEST :=
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
 ifeq ($(USE_SYSTEM_PATCHELF), 0)
 DEP_LIBS += patchelf
 PATCHELF:=$(build_depsbindir)/patchelf
+PATCHELF_MANIFEST:=$(build_prefix)/manifest/patchelf
 else
 PATCHELF:=patchelf
 endif
@@ -117,9 +120,8 @@ ifeq ($(USE_SYSTEM_GMP), 0)
 DEP_LIBS += gmp
 endif
 
-ifeq ($(USE_SYSTEM_LIBGIT2), 0)
-ifeq ($(USE_SYSTEM_MBEDTLS), 0)
-DEP_LIBS += mbedtls
+ifeq ($(USE_SYSTEM_OPENSSL), 0)
+DEP_LIBS += openssl
 endif
 
 ifeq ($(USE_SYSTEM_LIBSSH2), 0)
@@ -134,18 +136,19 @@ ifeq ($(USE_SYSTEM_CURL), 0)
 DEP_LIBS += curl
 endif
 
+ifeq ($(USE_SYSTEM_LIBGIT2), 0)
 DEP_LIBS += libgit2
-endif # USE_SYSTEM_LIBGIT2
+endif
 
 ifeq ($(USE_SYSTEM_MPFR), 0)
 DEP_LIBS += mpfr
 endif
 
-ifeq ($(USE_GPL_LIBS), 1)
+# Only some of the modules in SuiteSparse are GPL.
+# xref: `remove-libsuitesparse-gpl-lib` in libsuitesparse.mk
 ifeq ($(USE_SYSTEM_LIBSUITESPARSE), 0)
 DEP_LIBS += libsuitesparse
 endif
-endif
 
 ifeq ($(USE_SYSTEM_UTF8PROC), 0)
 DEP_LIBS += utf8proc
@@ -155,6 +158,10 @@ ifeq ($(USE_SYSTEM_ZLIB), 0)
 DEP_LIBS += zlib
 endif
 
+ifeq ($(USE_SYSTEM_ZSTD), 0)
+DEP_LIBS += zstd
+endif
+
 ifeq ($(USE_SYSTEM_P7ZIP), 0)
 DEP_LIBS += p7zip
 endif
@@ -169,6 +176,13 @@ ifeq ($(WITH_ITTAPI),1)
 DEP_LIBS += ittapi
 endif
 
+ifeq ($(WITH_NVTX),1)
+DEP_LIBS += nvtx
+endif
+
+ifneq ($(WITH_TERMINFO),0)
+DEP_LIBS += terminfo
+endif
 
 # Only compile standalone LAPACK if we are not using OpenBLAS.
 # OpenBLAS otherwise compiles LAPACK as part of its build.
@@ -186,13 +200,18 @@ DEP_LIBS += libwhich
 endif
 endif
 
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+DEP_LIBS += mmtk_julia
+endif
+
 DEP_LIBS_STAGED := $(DEP_LIBS)
 
 # list all targets
 DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
 	openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \
-	objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \
-	sanitizers libsuitesparse lld libtracyclient ittapi JuliaSyntax
+	objconv openssl libssh2 nghttp2 curl libgit2 libwhich zlib zstd p7zip csl \
+	sanitizers libsuitesparse lld libtracyclient ittapi nvtx \
+	terminfo mmtk_julia
 DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL)
 
 ifneq ($(USE_BINARYBUILDER_OPENBLAS),0)
@@ -226,9 +245,27 @@ distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL))
 	rm -rf $(build_prefix)
 getall: $(addprefix get-, $(DEP_LIBS_ALL))
 
+.PHONY: default
+.PHONY: get
+.PHONY: extract
+.PHONY: configure
+.PHONY: compile
+.PHONY: check
+.PHONY: fastcheck
+.PHONY: stage
+.PHONY: install
+.PHONY: version-check
+
+.PHONY: uninstall
+.PHONY: cleanall
+.PHONY: distcleanall
+.PHONY: getall
+
+include $(SRCDIR)/BOLT.mk
 include $(SRCDIR)/csl.mk
 include $(SRCDIR)/sanitizers.mk
 include $(SRCDIR)/ittapi.mk
+include $(SRCDIR)/nvtx.mk
 include $(SRCDIR)/llvm.mk
 include $(SRCDIR)/libuv.mk
 include $(SRCDIR)/pcre.mk
@@ -240,11 +277,12 @@ include $(SRCDIR)/openblas.mk
 include $(SRCDIR)/utf8proc.mk
 include $(SRCDIR)/libsuitesparse.mk
 include $(SRCDIR)/zlib.mk
+include $(SRCDIR)/zstd.mk
 include $(SRCDIR)/unwind.mk
 include $(SRCDIR)/gmp.mk
 include $(SRCDIR)/mpfr.mk
 include $(SRCDIR)/patchelf.mk
-include $(SRCDIR)/mbedtls.mk
+include $(SRCDIR)/openssl.mk
 include $(SRCDIR)/libssh2.mk
 include $(SRCDIR)/nghttp2.mk
 include $(SRCDIR)/curl.mk
@@ -252,8 +290,9 @@ include $(SRCDIR)/libgit2.mk
 include $(SRCDIR)/libwhich.mk
 include $(SRCDIR)/p7zip.mk
 include $(SRCDIR)/libtracyclient.mk
+include $(SRCDIR)/terminfo.mk
 
-# vendored Julia libs
-include $(SRCDIR)/JuliaSyntax.mk
+# MMTk
+include $(SRCDIR)/mmtk_julia.mk
 
 include $(SRCDIR)/tools/uninstallers.mk
diff --git a/deps/blastrampoline.mk b/deps/blastrampoline.mk
index bd1cb65c6ae2d..cfa28a4d8b88f 100644
--- a/deps/blastrampoline.mk
+++ b/deps/blastrampoline.mk
@@ -16,16 +16,16 @@ $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured: $(BUILDDIR)/$(BLASTRAMPO
 BLASTRAMPOLINE_BUILD_ROOT := $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/src
 $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-compiled: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured
 	cd $(dir $@)/src && $(MAKE) $(BLASTRAMPOLINE_BUILD_OPTS)
-ifeq ($(OS), WINNT)
-	# Windows doesn't like soft link, use hard link
-	cd $(BLASTRAMPOLINE_BUILD_ROOT)/build/ && \
-		cp -f --dereference --link libblastrampoline.dll libblastrampoline.dll
-endif
 	echo 1 > $@
 
 define BLASTRAMPOLINE_INSTALL
 	$(MAKE) -C $(BLASTRAMPOLINE_BUILD_ROOT) install $(BLASTRAMPOLINE_BUILD_OPTS) DESTDIR="$2"
 endef
+ifeq ($(OS), WINNT)
+# Windows doesn't like soft link, use hard link to copy file without version suffix
+BLASTRAMPOLINE_INSTALL += && cd $2$$(build_prefix)/bin && \
+$$(WIN_MAKE_HARD_LINK) libblastrampoline-*.dll libblastrampoline.dll
+endif
 $(eval $(call staged-install, \
 	blastrampoline,$(BLASTRAMPOLINE_SRC_DIR), \
 	BLASTRAMPOLINE_INSTALL,, \
diff --git a/deps/blastrampoline.version b/deps/blastrampoline.version
index 616300377e3e6..4f9dc0aff0741 100644
--- a/deps/blastrampoline.version
+++ b/deps/blastrampoline.version
@@ -1,7 +1,9 @@
+# -*- makefile -*-
+
 ## jll artifact
 BLASTRAMPOLINE_JLL_NAME := libblastrampoline
 
 ## source build
-BLASTRAMPOLINE_VER := 5.8.0
-BLASTRAMPOLINE_BRANCH=v5.8.0
-BLASTRAMPOLINE_SHA1=81316155d4838392e8462a92bcac3eebe9acd0c7
+BLASTRAMPOLINE_VER := 5.15.0
+BLASTRAMPOLINE_BRANCH=v5.15.0
+BLASTRAMPOLINE_SHA1=072b5f67895bec0b92f8c83194567c1c48e9833d
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
deleted file mode 100644
index 915ee5c4bb6bf..0000000000000
--- a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-22c097ca7784442f1f10733db7961cc3
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
deleted file mode 100644
index b824dbcb73a08..0000000000000
--- a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-915791ab9837f09db428060bd128e182dda38c8dc10e13f32f059eb8e8b477548e8ae2cd691522f98c88c510b78b2693018264b62d9cc76d5005ea8104d1539a
diff --git a/deps/checksums/ArgTools-89d19599208c02bfa9609d4578ab72eabe6e8eee.tar.gz/md5 b/deps/checksums/ArgTools-89d19599208c02bfa9609d4578ab72eabe6e8eee.tar.gz/md5
new file mode 100644
index 0000000000000..ff91989b17ca6
--- /dev/null
+++ b/deps/checksums/ArgTools-89d19599208c02bfa9609d4578ab72eabe6e8eee.tar.gz/md5
@@ -0,0 +1 @@
+08dafd9e97d1e389573c37d4dcf4ebb9
diff --git a/deps/checksums/ArgTools-89d19599208c02bfa9609d4578ab72eabe6e8eee.tar.gz/sha512 b/deps/checksums/ArgTools-89d19599208c02bfa9609d4578ab72eabe6e8eee.tar.gz/sha512
new file mode 100644
index 0000000000000..510344dd84ed2
--- /dev/null
+++ b/deps/checksums/ArgTools-89d19599208c02bfa9609d4578ab72eabe6e8eee.tar.gz/sha512
@@ -0,0 +1 @@
+7de0acecc0856e2c59177346df30750760675687e2c186a051b41c9314a9f1e84d8f95ce656fc772681a84d6ba053640b0378d94270f701e0c646e7ac6925c2c
diff --git a/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5
new file mode 100644
index 0000000000000..62e63ff3174d6
--- /dev/null
+++ b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5
@@ -0,0 +1 @@
+c12540d5889cef05bc87183a4ce5a54c
diff --git a/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512
new file mode 100644
index 0000000000000..0635e180ac9a5
--- /dev/null
+++ b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512
@@ -0,0 +1 @@
+61cc7cc42b925f37502eed0d31eafadbfdc24a9ebc892c9b8d96a27b004cbccf2e5da7face5c8d9c9db57fac1b5cf662d890a67337436c5d4aa3373256638ab1
diff --git a/deps/checksums/DelimitedFiles-aac8c59e58cbf961fa15baf4d866901d9d1e6980.tar.gz/md5 b/deps/checksums/DelimitedFiles-aac8c59e58cbf961fa15baf4d866901d9d1e6980.tar.gz/md5
new file mode 100644
index 0000000000000..136aad8d85c4f
--- /dev/null
+++ b/deps/checksums/DelimitedFiles-aac8c59e58cbf961fa15baf4d866901d9d1e6980.tar.gz/md5
@@ -0,0 +1 @@
+3c46839478f91030fc7ca76563ceb65e
diff --git a/deps/checksums/DelimitedFiles-aac8c59e58cbf961fa15baf4d866901d9d1e6980.tar.gz/sha512 b/deps/checksums/DelimitedFiles-aac8c59e58cbf961fa15baf4d866901d9d1e6980.tar.gz/sha512
new file mode 100644
index 0000000000000..7bf292d90d456
--- /dev/null
+++ b/deps/checksums/DelimitedFiles-aac8c59e58cbf961fa15baf4d866901d9d1e6980.tar.gz/sha512
@@ -0,0 +1 @@
+840af831edd4207f7a88ab9faaea1dfd951da5edc8f8d7bc439f66d76b36afd6af20a6f1a53ae6e6fbb4f88949a8908b692719d7783946fd77a21388d307b63c
diff --git a/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/md5 b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/md5
deleted file mode 100644
index 9c6e4e44927fe..0000000000000
--- a/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ee5afca99801e37fd3a42a9455ae986b
diff --git a/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/sha512 b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/sha512
deleted file mode 100644
index 69a50a7282781..0000000000000
--- a/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2adec92de521df1668eb13f2903ffdb01efd6afa5f04ce6fbd1737caa4948f7b629cdda7f75a895853a0cd49dccf8b388860d5c19c29e4d4aad6c7f8fa6b7209
diff --git a/deps/checksums/Distributed-cd9219573d736b036077dff3cadddf369516d495.tar.gz/md5 b/deps/checksums/Distributed-cd9219573d736b036077dff3cadddf369516d495.tar.gz/md5
new file mode 100644
index 0000000000000..1d5b0b1934e1c
--- /dev/null
+++ b/deps/checksums/Distributed-cd9219573d736b036077dff3cadddf369516d495.tar.gz/md5
@@ -0,0 +1 @@
+630dac30c0a9b7384c9654e926dd7b5b
diff --git a/deps/checksums/Distributed-cd9219573d736b036077dff3cadddf369516d495.tar.gz/sha512 b/deps/checksums/Distributed-cd9219573d736b036077dff3cadddf369516d495.tar.gz/sha512
new file mode 100644
index 0000000000000..219f8e8a39f02
--- /dev/null
+++ b/deps/checksums/Distributed-cd9219573d736b036077dff3cadddf369516d495.tar.gz/sha512
@@ -0,0 +1 @@
+387634fcc2f6ea88d52d261df08040109c3e499f48ab46971cc4af2adcd868c39da2cb5bae7b5476248866a33038fa43586ea8f5054fadb18c5455383522954e
diff --git a/deps/checksums/Downloads-4e20d029c723199c0b8ea0e2418ff240d25ddaef.tar.gz/md5 b/deps/checksums/Downloads-4e20d029c723199c0b8ea0e2418ff240d25ddaef.tar.gz/md5
new file mode 100644
index 0000000000000..54152fc3afad5
--- /dev/null
+++ b/deps/checksums/Downloads-4e20d029c723199c0b8ea0e2418ff240d25ddaef.tar.gz/md5
@@ -0,0 +1 @@
+ffdec003d29b6a45229b1fc3358accd3
diff --git a/deps/checksums/Downloads-4e20d029c723199c0b8ea0e2418ff240d25ddaef.tar.gz/sha512 b/deps/checksums/Downloads-4e20d029c723199c0b8ea0e2418ff240d25ddaef.tar.gz/sha512
new file mode 100644
index 0000000000000..a507712c72bdd
--- /dev/null
+++ b/deps/checksums/Downloads-4e20d029c723199c0b8ea0e2418ff240d25ddaef.tar.gz/sha512
@@ -0,0 +1 @@
+f4d399f20b852a69503a939bbffee365efc9aa2202c8dbc3b3de63728e7183c8bcf922c8607bc29922737eb66f516c3f52f1a1898c4283c33bd856b38473dc5a
diff --git a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/md5 b/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/md5
deleted file mode 100644
index 4e70641a4a08b..0000000000000
--- a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-fa2c90db0e7aa73186c491aa2f03bb2b
diff --git a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/sha512 b/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/sha512
deleted file mode 100644
index 3f54f39d35ac6..0000000000000
--- a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d36737b946af5e720402ce4f25e4c69c740bdbdc174385d6448c3660b26fffe34c14af7c4dd4d26ad864ad12771cabdf922c8b3cf4423167a46cdf3001ede125
diff --git a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5 b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5
deleted file mode 100644
index 8bec9dde7fbae..0000000000000
--- a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6fdeb9332af478502be39af642027387
diff --git a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512 b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512
deleted file mode 100644
index 50c676f808c5c..0000000000000
--- a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-fbb4ab0b99de7e1f86b918b401c2d42883a2bf8e80f6af4d6b85b7ca263d97cca1c47b25aca48359f14dee91b658684c0c590b7f20240bd9e0ce6e960ccf6647
diff --git a/deps/checksums/JuliaSyntax-99e975a726a82994de3f8e961e6fa8d39aed0d37.tar.gz/md5 b/deps/checksums/JuliaSyntax-99e975a726a82994de3f8e961e6fa8d39aed0d37.tar.gz/md5
new file mode 100644
index 0000000000000..12fce1e97c1db
--- /dev/null
+++ b/deps/checksums/JuliaSyntax-99e975a726a82994de3f8e961e6fa8d39aed0d37.tar.gz/md5
@@ -0,0 +1 @@
+ecef4caa8b237a51f92d5622b811a0c3
diff --git a/deps/checksums/JuliaSyntax-99e975a726a82994de3f8e961e6fa8d39aed0d37.tar.gz/sha512 b/deps/checksums/JuliaSyntax-99e975a726a82994de3f8e961e6fa8d39aed0d37.tar.gz/sha512
new file mode 100644
index 0000000000000..f042854e27a47
--- /dev/null
+++ b/deps/checksums/JuliaSyntax-99e975a726a82994de3f8e961e6fa8d39aed0d37.tar.gz/sha512
@@ -0,0 +1 @@
+56dc5158ebfaf0d5e3e5002dfeb322a137f0866add071cfa9f7a0d9ef2d40859e4c6131358c5aeaf0e9e39fe77a94ba88022028092230b059099cd87e2b795ac
diff --git a/deps/checksums/JuliaSyntaxHighlighting-84fb1fd08824736de14aaa94265df756474e0bdf.tar.gz/md5 b/deps/checksums/JuliaSyntaxHighlighting-84fb1fd08824736de14aaa94265df756474e0bdf.tar.gz/md5
new file mode 100644
index 0000000000000..d57e2854bb83a
--- /dev/null
+++ b/deps/checksums/JuliaSyntaxHighlighting-84fb1fd08824736de14aaa94265df756474e0bdf.tar.gz/md5
@@ -0,0 +1 @@
+96c4535bb47d6bb6c779e42ae011abbc
diff --git a/deps/checksums/JuliaSyntaxHighlighting-84fb1fd08824736de14aaa94265df756474e0bdf.tar.gz/sha512 b/deps/checksums/JuliaSyntaxHighlighting-84fb1fd08824736de14aaa94265df756474e0bdf.tar.gz/sha512
new file mode 100644
index 0000000000000..511d70aeda6e2
--- /dev/null
+++ b/deps/checksums/JuliaSyntaxHighlighting-84fb1fd08824736de14aaa94265df756474e0bdf.tar.gz/sha512
@@ -0,0 +1 @@
+f2afdb1f0da6b2e5bd21c368e005921f77f7726bca1caac62699563e5d1e110aa08f709156fda2456885b4fe37f9864ccffd87d39a52602da1fb04a39dd7ca67
diff --git a/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/md5 b/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/md5
new file mode 100644
index 0000000000000..48bd7a8a7fa25
--- /dev/null
+++ b/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/md5
@@ -0,0 +1 @@
+405faa2237105ff823e80e759b2df17a
diff --git a/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/sha512 b/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/sha512
new file mode 100644
index 0000000000000..9fa6aec4d1939
--- /dev/null
+++ b/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/sha512
@@ -0,0 +1 @@
+9bd2bdd5a83df28a26ebfb0d4e59b50584962e07b1364e6fd76bc7a6a7b109f1facaa04366beaa9f340192ea9efa540decde1393ddd50dc3efa13937deeb5d7f
diff --git a/deps/checksums/LinearAlgebra-b599095ef3da7ba7e950ee4700a3ba0fea047949.tar.gz/md5 b/deps/checksums/LinearAlgebra-b599095ef3da7ba7e950ee4700a3ba0fea047949.tar.gz/md5
new file mode 100644
index 0000000000000..c4b5e742e7638
--- /dev/null
+++ b/deps/checksums/LinearAlgebra-b599095ef3da7ba7e950ee4700a3ba0fea047949.tar.gz/md5
@@ -0,0 +1 @@
+de32e20dce88650db906b149eda6854b
diff --git a/deps/checksums/LinearAlgebra-b599095ef3da7ba7e950ee4700a3ba0fea047949.tar.gz/sha512 b/deps/checksums/LinearAlgebra-b599095ef3da7ba7e950ee4700a3ba0fea047949.tar.gz/sha512
new file mode 100644
index 0000000000000..1ae2af6930915
--- /dev/null
+++ b/deps/checksums/LinearAlgebra-b599095ef3da7ba7e950ee4700a3ba0fea047949.tar.gz/sha512
@@ -0,0 +1 @@
+90a6af3cebea0cd01c2b76ad3d70c578cba478f1665e07c66a27d3e91c7c3ceb37d07ea2fc791252d28735aef4a6f1ac86f350058ebb391d79e7a19ee149b639
diff --git a/deps/checksums/NetworkOptions-7034c55dbf52ee959cabd63bcbe656df658f5bda.tar.gz/md5 b/deps/checksums/NetworkOptions-7034c55dbf52ee959cabd63bcbe656df658f5bda.tar.gz/md5
new file mode 100644
index 0000000000000..8f918b40b2ffb
--- /dev/null
+++ b/deps/checksums/NetworkOptions-7034c55dbf52ee959cabd63bcbe656df658f5bda.tar.gz/md5
@@ -0,0 +1 @@
+1fbf59e3052ec0d40a195d935b3d4a96
diff --git a/deps/checksums/NetworkOptions-7034c55dbf52ee959cabd63bcbe656df658f5bda.tar.gz/sha512 b/deps/checksums/NetworkOptions-7034c55dbf52ee959cabd63bcbe656df658f5bda.tar.gz/sha512
new file mode 100644
index 0000000000000..feeb688dcb7e2
--- /dev/null
+++ b/deps/checksums/NetworkOptions-7034c55dbf52ee959cabd63bcbe656df658f5bda.tar.gz/sha512
@@ -0,0 +1 @@
+4ec9724062d97a9d400bfb4a672ed5ce52999738ddbc01d2892d97df3fd256fe03bb5ea69f2ebbbbdbfef91edc24d82e54df48f997781eba2c6cd8e1c36d046c
diff --git a/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/md5 b/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/md5
deleted file mode 100644
index 9e91b76f9a3c8..0000000000000
--- a/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-16bc9f2eefa3021e19a09ffefc84159b
diff --git a/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/sha512 b/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/sha512
deleted file mode 100644
index 551f7c8da347c..0000000000000
--- a/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5b53c09343e25b5bde7ea12c2119da656040ca5f62ce934f00f57945ce73dfaf26522da6a9a007ba06ac6fd75a285cbcbdf5edaf9113faa7bba0398294fbd684
diff --git a/deps/checksums/Pkg-1e90f07f9f28e9cec60c5aea0e55302a02164b10.tar.gz/md5 b/deps/checksums/Pkg-1e90f07f9f28e9cec60c5aea0e55302a02164b10.tar.gz/md5
new file mode 100644
index 0000000000000..511b5c1d55133
--- /dev/null
+++ b/deps/checksums/Pkg-1e90f07f9f28e9cec60c5aea0e55302a02164b10.tar.gz/md5
@@ -0,0 +1 @@
+e2064b38167e42b7e96fdb2b6a738b31
diff --git a/deps/checksums/Pkg-1e90f07f9f28e9cec60c5aea0e55302a02164b10.tar.gz/sha512 b/deps/checksums/Pkg-1e90f07f9f28e9cec60c5aea0e55302a02164b10.tar.gz/sha512
new file mode 100644
index 0000000000000..8dcd6313d01f7
--- /dev/null
+++ b/deps/checksums/Pkg-1e90f07f9f28e9cec60c5aea0e55302a02164b10.tar.gz/sha512
@@ -0,0 +1 @@
+b21d6c58f3132af8dd739c8b23073a6702e554d517c6d364e883a355ea1d692cdae01d8600d7eed7c9d4b3792025d1e4d7aa258f5850817aff2fae48f631b2c3
diff --git a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5
deleted file mode 100644
index 8710722b5409c..0000000000000
--- a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f0e62f7b63dc9400caa2fec1b91b7889
diff --git a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512
deleted file mode 100644
index c92e62d861633..0000000000000
--- a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e48ee01791f58d41715fd44e16238d835315e930d3ef529dd3f3b5660935f7f0ca2c5163ec9c4e4d90e4ead5328f39e0bfffa88223c2094c8727460eac022cc1
diff --git a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5 b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5
deleted file mode 100644
index f682cf3518658..0000000000000
--- a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-de53629eb0b1ce98ac6b245bdbf14e9d
diff --git a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512 b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512
deleted file mode 100644
index 870098ef7aada..0000000000000
--- a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-71cdc58b03cc4f42f8c4b9c2353d6f94d77b4ac5c9d374387d435c57ba85e966f3be4e8c8447b34e184cb8e665c42b3cd2c9d9742c86f7fb5c71a85df5087966
diff --git a/deps/checksums/SHA-876bc0400f9a457eb2736388fc3d0fbe9460fc7d.tar.gz/md5 b/deps/checksums/SHA-876bc0400f9a457eb2736388fc3d0fbe9460fc7d.tar.gz/md5
new file mode 100644
index 0000000000000..9c330db6498c9
--- /dev/null
+++ b/deps/checksums/SHA-876bc0400f9a457eb2736388fc3d0fbe9460fc7d.tar.gz/md5
@@ -0,0 +1 @@
+1f944d6be3d247bc266e850c67a827ab
diff --git a/deps/checksums/SHA-876bc0400f9a457eb2736388fc3d0fbe9460fc7d.tar.gz/sha512 b/deps/checksums/SHA-876bc0400f9a457eb2736388fc3d0fbe9460fc7d.tar.gz/sha512
new file mode 100644
index 0000000000000..67b9127af483a
--- /dev/null
+++ b/deps/checksums/SHA-876bc0400f9a457eb2736388fc3d0fbe9460fc7d.tar.gz/sha512
@@ -0,0 +1 @@
+3d2d2f48a7d25cc32cd720843fb19edb143c0fbf63b14aba1e4c4394a48cd0ff4d7ff2c25a580b7cdbe0becc05cad5b7701daec625470e8555530ed24fb6e2f0
diff --git a/deps/checksums/SparseArrays-26c80c8b45dc2dca92788332a40a99b6c360d05a.tar.gz/md5 b/deps/checksums/SparseArrays-26c80c8b45dc2dca92788332a40a99b6c360d05a.tar.gz/md5
new file mode 100644
index 0000000000000..64ee597144a75
--- /dev/null
+++ b/deps/checksums/SparseArrays-26c80c8b45dc2dca92788332a40a99b6c360d05a.tar.gz/md5
@@ -0,0 +1 @@
+4b7f1fbb158328c4337ef7bcf2bb81b9
diff --git a/deps/checksums/SparseArrays-26c80c8b45dc2dca92788332a40a99b6c360d05a.tar.gz/sha512 b/deps/checksums/SparseArrays-26c80c8b45dc2dca92788332a40a99b6c360d05a.tar.gz/sha512
new file mode 100644
index 0000000000000..ecb23d19f89f6
--- /dev/null
+++ b/deps/checksums/SparseArrays-26c80c8b45dc2dca92788332a40a99b6c360d05a.tar.gz/sha512
@@ -0,0 +1 @@
+c9e32cfb5ba14bcbab391edf7a327c147d9e4169586c5d8e934e6bcf8d03e8741196f85dbdef05621a0af38e0c394c13f3336b03840bf3770ae6f999b4752e14
diff --git a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/md5 b/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/md5
deleted file mode 100644
index 96861ba265b5f..0000000000000
--- a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e6dc511b49e07a167848adc4e12690d8
diff --git a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/sha512 b/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/sha512
deleted file mode 100644
index f503304f810e4..0000000000000
--- a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f40fd137ccd6651fc8b697f57cfcbd8e3feccb99f6a6b32fbaa69cc0160b78cefc662b914ff8f4e48478ca48f9583318a6030d922d43ed66f8db59fd5985f768
diff --git a/deps/checksums/Statistics-22dee82f9824d6045e87aa4b97e1d64fe6f01d8d.tar.gz/md5 b/deps/checksums/Statistics-22dee82f9824d6045e87aa4b97e1d64fe6f01d8d.tar.gz/md5
new file mode 100644
index 0000000000000..c5f56d9064e92
--- /dev/null
+++ b/deps/checksums/Statistics-22dee82f9824d6045e87aa4b97e1d64fe6f01d8d.tar.gz/md5
@@ -0,0 +1 @@
+0b60da1286ca8a978cf3c27b8fbc0601
diff --git a/deps/checksums/Statistics-22dee82f9824d6045e87aa4b97e1d64fe6f01d8d.tar.gz/sha512 b/deps/checksums/Statistics-22dee82f9824d6045e87aa4b97e1d64fe6f01d8d.tar.gz/sha512
new file mode 100644
index 0000000000000..8cd97202f18ba
--- /dev/null
+++ b/deps/checksums/Statistics-22dee82f9824d6045e87aa4b97e1d64fe6f01d8d.tar.gz/sha512
@@ -0,0 +1 @@
+2e03fe3b79dfb299caa0ac23e045bf26addeb3d38ef0b5e5430966dc227e771cfd84722d8bb80aaaedc0a988fbd2228bebf56c3e7d80b3e8993c623d8436660c
diff --git a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/md5 b/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/md5
deleted file mode 100644
index 7e7a889eecd29..0000000000000
--- a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6564297a5f5971231809bf9940f68b98
diff --git a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/sha512 b/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/sha512
deleted file mode 100644
index bbe9b8bed6371..0000000000000
--- a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-22d14c82a30f3ec7af09028423cc823808abf86918d5707fd1fcf6ca20dea7871589da9b22e462d194e86fcee380f549aeb65f585048f00bf23281786b17e040
diff --git a/deps/checksums/StyledStrings-9bb8ffdd8c2858cced7b6b6fcee85be41c9a1867.tar.gz/md5 b/deps/checksums/StyledStrings-9bb8ffdd8c2858cced7b6b6fcee85be41c9a1867.tar.gz/md5
new file mode 100644
index 0000000000000..7512b07eaa7cf
--- /dev/null
+++ b/deps/checksums/StyledStrings-9bb8ffdd8c2858cced7b6b6fcee85be41c9a1867.tar.gz/md5
@@ -0,0 +1 @@
+e788177e4a0f7dfdbd794b1203786aa5
diff --git a/deps/checksums/StyledStrings-9bb8ffdd8c2858cced7b6b6fcee85be41c9a1867.tar.gz/sha512 b/deps/checksums/StyledStrings-9bb8ffdd8c2858cced7b6b6fcee85be41c9a1867.tar.gz/sha512
new file mode 100644
index 0000000000000..52c5e04fe6212
--- /dev/null
+++ b/deps/checksums/StyledStrings-9bb8ffdd8c2858cced7b6b6fcee85be41c9a1867.tar.gz/sha512
@@ -0,0 +1 @@
+904399dee5a9d284ccf4d508965867c3533bbd97d8d7bac12ecca924cc4fd783f6e67c77d9c8723fdced5f6a28041b7488cb0b309ba5cb3885443e64fd6a1ee8
diff --git a/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5 b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5
new file mode 100644
index 0000000000000..2f81a0d9191b5
--- /dev/null
+++ b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5
@@ -0,0 +1 @@
+46541001073d1c3c85e18d910f8308f3
diff --git a/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512 b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512
new file mode 100644
index 0000000000000..e2eb44845e276
--- /dev/null
+++ b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512
@@ -0,0 +1 @@
+f7470a447b934ca9315e216a07b97e363f11bc93186f9aa057b20b2d05092c58ae4f1b733de362de4a0730861c00be4ca5588d0b3ba65f018c1798b9122b9672
diff --git a/deps/checksums/Tar-d236fa0affb2ab90c78798b01bb1d64615785354.tar.gz/md5 b/deps/checksums/Tar-d236fa0affb2ab90c78798b01bb1d64615785354.tar.gz/md5
new file mode 100644
index 0000000000000..1c87e200b6009
--- /dev/null
+++ b/deps/checksums/Tar-d236fa0affb2ab90c78798b01bb1d64615785354.tar.gz/md5
@@ -0,0 +1 @@
+dd5e9859422d5df99dfba2f2a9680ae6
diff --git a/deps/checksums/Tar-d236fa0affb2ab90c78798b01bb1d64615785354.tar.gz/sha512 b/deps/checksums/Tar-d236fa0affb2ab90c78798b01bb1d64615785354.tar.gz/sha512
new file mode 100644
index 0000000000000..86ff50687df10
--- /dev/null
+++ b/deps/checksums/Tar-d236fa0affb2ab90c78798b01bb1d64615785354.tar.gz/sha512
@@ -0,0 +1 @@
+821f2ce35f3c3659ec323f97486f79deea0e5fb5edec0d5725a6ad6875d6e27974419c53dc702afebeaad68f2547b22c3566074fdd40049fee0ef00d3fb5a242
diff --git a/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/md5 b/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/md5
deleted file mode 100644
index 40d52c2803746..0000000000000
--- a/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-438818cad063d6808354a9b4aecd3001
diff --git a/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/sha512 b/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/sha512
deleted file mode 100644
index 27c57c5051212..0000000000000
--- a/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f9a6e7757bbcca09a84d92ab3a2690a51612c318bdfd98bbb4ffcef56305b019029838e5f1483c9febafa7ecb5e735e68855bc82d04b593af04a446e32436145
diff --git a/deps/checksums/UnicodeData-13.0.0.txt/md5 b/deps/checksums/UnicodeData-13.0.0.txt/md5
deleted file mode 100644
index 2b3ffc179ce01..0000000000000
--- a/deps/checksums/UnicodeData-13.0.0.txt/md5
+++ /dev/null
@@ -1 +0,0 @@
-85879f1976cc8eb739ee5585a93938e2
diff --git a/deps/checksums/UnicodeData-13.0.0.txt/sha512 b/deps/checksums/UnicodeData-13.0.0.txt/sha512
deleted file mode 100644
index a93ba01e7ddda..0000000000000
--- a/deps/checksums/UnicodeData-13.0.0.txt/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1a4a662e2ab33469976bf5f91aa6933ed9b73f6d4179a2daffb349e1869d7d6cfa885b164e82d15dcdad7458cd451c81add58d875eb0c70de854589dc97b2055
diff --git a/deps/checksums/UnicodeData-17.0.0.txt/md5 b/deps/checksums/UnicodeData-17.0.0.txt/md5
new file mode 100644
index 0000000000000..389a8decab6e2
--- /dev/null
+++ b/deps/checksums/UnicodeData-17.0.0.txt/md5
@@ -0,0 +1 @@
+959cb19fcda0240caef8c02953e3d771
diff --git a/deps/checksums/UnicodeData-17.0.0.txt/sha512 b/deps/checksums/UnicodeData-17.0.0.txt/sha512
new file mode 100644
index 0000000000000..1a037aa3f6b81
--- /dev/null
+++ b/deps/checksums/UnicodeData-17.0.0.txt/sha512
@@ -0,0 +1 @@
+7aa597dead08f862295dac2cbcee4c994d847c374910ec23f90a34a413ab9420828d3038d72a75b3f2c583ffcdb6f7b75205ffcde3309fec6310e494c9252798
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
index 011b0f6e4704d..d4c42a32024f9 100644
--- a/deps/checksums/blastrampoline
+++ b/deps/checksums/blastrampoline
@@ -1,34 +1,38 @@
-blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/md5/0478361eac783b99002b1ad985182f05
-blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/sha512/2489ce5770a9861889a2d07e61440ba4f233a92efd4a3544747f83320e0e7a229a8fe01553d99f5f1d98713316f2506daf0adb7d024a46e32b3de1bb2966d637
-libblastrampoline.v5.8.0+0.aarch64-apple-darwin.tar.gz/md5/a28837b9838fef2b3831de3278ec7949
-libblastrampoline.v5.8.0+0.aarch64-apple-darwin.tar.gz/sha512/111ac2fe5f8f8102f2f7c9e9e6aa1d1a12d2db941238c949ff8e64b30335e8b2f6ecce0d5f577879c231eb839c06e259302b709f3d34e94a97047bfa984222f6
-libblastrampoline.v5.8.0+0.aarch64-linux-gnu.tar.gz/md5/9e781a026e03118df81347fb90f10d45
-libblastrampoline.v5.8.0+0.aarch64-linux-gnu.tar.gz/sha512/89469f32a666efd46437351a8fb16758c35e5aecc563d202b480c10ddf9fa5350a5a321076b79b0a1a07ec2cea0b73aa5c28979cc382a198fa96cca0b5899d25
-libblastrampoline.v5.8.0+0.aarch64-linux-musl.tar.gz/md5/b7acda2fdd157bbb183d0dd33643beef
-libblastrampoline.v5.8.0+0.aarch64-linux-musl.tar.gz/sha512/cf4125a47334fe2ec0d5a4b11624b12e1366ec031500218f680ad5a53152b9d752c0c02a0b92d0e07f3eb21f2f8f58d0c587438a4869a72197bbd5e91531369d
-libblastrampoline.v5.8.0+0.armv6l-linux-gnueabihf.tar.gz/md5/eafabd99fb1287d495acb8efb8091fde
-libblastrampoline.v5.8.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/63ff4e6bc400fa8ee713a1c5ae4af0a8e152d49860c6f5e94a17e426ad9f780d41cc0f84d33c75ea5347af1a53f07fc012798d603b6a94ea39f37cfd651a0719
-libblastrampoline.v5.8.0+0.armv6l-linux-musleabihf.tar.gz/md5/9788f74b375ef6b84c16c080f2be5bdd
-libblastrampoline.v5.8.0+0.armv6l-linux-musleabihf.tar.gz/sha512/f00ebf794927404e2294a2fbb759b1e3e57836c7f683525fac0b2ac570da2c75904e43f154cf76fce310a624f9b35fbd40e6c7757882bb6f30db790f4221a543
-libblastrampoline.v5.8.0+0.armv7l-linux-gnueabihf.tar.gz/md5/4492bace63d8274d68ecdaa735e47e99
-libblastrampoline.v5.8.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/8868283e6c5224b80145fdfd17f13f713053ba94e49c170f38f0cbf9f794185d7dec9c107ce65dc76121d3ac5b21d2f3857f619d8279bede86a906230ff59a71
-libblastrampoline.v5.8.0+0.armv7l-linux-musleabihf.tar.gz/md5/d66b6ed1d4e5f6a130f36791063e651d
-libblastrampoline.v5.8.0+0.armv7l-linux-musleabihf.tar.gz/sha512/414ad07574a6e9aa670bbfea13eaea11da13129c9ccb4193cad708014c31493ff10ff427558b90cb16040fa64c8a325c2e375e3310c39fb37bb3e7fdb6a72a5f
-libblastrampoline.v5.8.0+0.i686-linux-gnu.tar.gz/md5/595199a3a01174cfa4d9ce3407bf30dc
-libblastrampoline.v5.8.0+0.i686-linux-gnu.tar.gz/sha512/02c3b0c3c0a411d5090a081f3bbbe38aaae40eaa5fe63d0690e0582e233cd9ce76483922557d4f65dc457e29a4e84d86ee5af20a60b082aec7bec4ca8607c1ca
-libblastrampoline.v5.8.0+0.i686-linux-musl.tar.gz/md5/5832d0044842cb84f4e1e1b0a04b8205
-libblastrampoline.v5.8.0+0.i686-linux-musl.tar.gz/sha512/d28954d0feef6a33fa0bfeb59acb68821222d36a4e353eaf41936ee2c9aace719c2d0f0b0f080eafe2baecc67a29de4cacc0446aac776bbb615c4426d35c9c8f
-libblastrampoline.v5.8.0+0.i686-w64-mingw32.tar.gz/md5/89c07640b6c7ed719199b0cd0a570961
-libblastrampoline.v5.8.0+0.i686-w64-mingw32.tar.gz/sha512/71241e83501ed473af0bf60a3223075e22a48788fdcf0ad5b2932861c89ec0741c61bf6a04c8a26e68b2f39d360b6009a79ea2502b5cccf28249738e7796be89
-libblastrampoline.v5.8.0+0.powerpc64le-linux-gnu.tar.gz/md5/5f76f5c6a88c0caaa6419ba212f8cb94
-libblastrampoline.v5.8.0+0.powerpc64le-linux-gnu.tar.gz/sha512/785071e682075b2cebd992394e66169f4ee2db3a8e23affb88dc05d9abf55f49d597b2a7400a13c83ad106ad825b5ee666b01f8625e51aec267132573273991e
-libblastrampoline.v5.8.0+0.x86_64-apple-darwin.tar.gz/md5/21beb51d448bd22e4608a16b3f4fde05
-libblastrampoline.v5.8.0+0.x86_64-apple-darwin.tar.gz/sha512/620ba64d93ef416e483f813617aa313957282d8361f920b5444702fa911ff0051d1f8a8814b5fa0b082fd4dc77d96cb8b763937c786959bbc97cbb6131617152
-libblastrampoline.v5.8.0+0.x86_64-linux-gnu.tar.gz/md5/14c1045ba4d400f490ddea5343a46f04
-libblastrampoline.v5.8.0+0.x86_64-linux-gnu.tar.gz/sha512/0fdae83f4df93b28951521cf426736367f568c1e76fb68eea42b045cc9a288b6836abb3206a6d61e4f88adcf198553e911c45231aecb0f552e06de28eb3bec54
-libblastrampoline.v5.8.0+0.x86_64-linux-musl.tar.gz/md5/59b110676fcb2fcfdcf670a5d435d555
-libblastrampoline.v5.8.0+0.x86_64-linux-musl.tar.gz/sha512/57a5022e9fabc0637a29f3c32f6180cb4f6a90282191232e299df6cea5265b535e4a0af4fde15c8fe80e5a59edea0fae96dd3a510f5720ecd78e85a2a9ffbfe0
-libblastrampoline.v5.8.0+0.x86_64-unknown-freebsd.tar.gz/md5/cb1c14b4f8754561c5eaf8502582f09a
-libblastrampoline.v5.8.0+0.x86_64-unknown-freebsd.tar.gz/sha512/d3b19a2a9b3dc674119590d920a2e99705de823e7d01a210485b31f8b1ce59253c4a70f2d8fb967f7fa05efb6ac376d94e79ffc6848607a366b2f0caa58b4208
-libblastrampoline.v5.8.0+0.x86_64-w64-mingw32.tar.gz/md5/34fdc53745245887f968f420b2f02ed9
-libblastrampoline.v5.8.0+0.x86_64-w64-mingw32.tar.gz/sha512/bbf478736b7bd57b340ccd5b6744d526a7a95fc524d30fdf9af6e9d79285641be26fae5f9e5302d71a5be76b05c379e969a829e259d8100ba9c6ce202b632b3d
+blastrampoline-072b5f67895bec0b92f8c83194567c1c48e9833d.tar.gz/md5/aa445967f00e19fef56c7072c72a6832
+blastrampoline-072b5f67895bec0b92f8c83194567c1c48e9833d.tar.gz/sha512/3185a39b553c9cb9f6171755462f52f1d164ec3d7ad83274cb4557e6cf5d6cef3bd7447f85c27ebcb3b1c76da0a547f978099bd554688bc769dd8ded654a2ad2
+libblastrampoline.v5.15.0+0.aarch64-apple-darwin.tar.gz/md5/d816e41ff36472e67d9d815f8db5b75d
+libblastrampoline.v5.15.0+0.aarch64-apple-darwin.tar.gz/sha512/b4c844f6f7f4d917629fb27dcaabb70f46f362b2fc0b4686df56c39e46251328ee323d01e09609988ebfa76d123eac033641fae14f4a48bfbe4b20b77ef745a9
+libblastrampoline.v5.15.0+0.aarch64-linux-gnu.tar.gz/md5/375e826ab5bdaf34b71c61eafc9deac8
+libblastrampoline.v5.15.0+0.aarch64-linux-gnu.tar.gz/sha512/2f8503aea3454136800f51fd3edf3b26efd1bab194d33fc9a302b00cff00b237462bd2a9409016656ac8e9904244004f4725d3388f29da4512b962f018af0609
+libblastrampoline.v5.15.0+0.aarch64-linux-musl.tar.gz/md5/f1c90626a26885bac905ad6ed9c26b46
+libblastrampoline.v5.15.0+0.aarch64-linux-musl.tar.gz/sha512/0cd900176f4b5e628b54fdfb4f9504fb6b352f92b97a1b3cb102343c94ffddb37b564a5d682d36631af3b03f6b4a49ad7e33f473f8450d95f74e3f986f39b780
+libblastrampoline.v5.15.0+0.aarch64-unknown-freebsd.tar.gz/md5/b9025566d8ba21fa10f55065bd9e3795
+libblastrampoline.v5.15.0+0.aarch64-unknown-freebsd.tar.gz/sha512/97ef1dd8116c4fc89cfa35d8f8674885e3b425d30cd45ab727ae98cf5f9b2b481352217bf2a01e6d3e0f5a95700c1771c15db4dcd73312c6d3286895c04c6f6b
+libblastrampoline.v5.15.0+0.armv6l-linux-gnueabihf.tar.gz/md5/24e043853904f395436db5045add7e2c
+libblastrampoline.v5.15.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/8948720cc3cf46ec0c7489ea90e8defd24eaebaaf0c9a98a0de13bdfffdd732311f1e1608078573a30e3170ad543eb0caf31a500255827482af526957bccacbb
+libblastrampoline.v5.15.0+0.armv6l-linux-musleabihf.tar.gz/md5/3cbbb1c09d1731e9dd29dd9350110aae
+libblastrampoline.v5.15.0+0.armv6l-linux-musleabihf.tar.gz/sha512/f1806cb4a1b59a78027291ca01481f2d6acf74340c3bc9534af93c3874aa6774dd4c6b5cf7b1bf98b028602d74b9301b0250fad9d2c2e0e02188c604f63e1559
+libblastrampoline.v5.15.0+0.armv7l-linux-gnueabihf.tar.gz/md5/da36360de121353b213c488158c264ed
+libblastrampoline.v5.15.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/25883ac30da9a14816e295545a7f8135c20adc3e2e3ab7b374a8796bcfef31b57ab1bc8f4788aef53e2f02d2b795ee89181f143b30132a8af531f019a0219a72
+libblastrampoline.v5.15.0+0.armv7l-linux-musleabihf.tar.gz/md5/7062e23f7549d790cb2c6ff5ef96aff6
+libblastrampoline.v5.15.0+0.armv7l-linux-musleabihf.tar.gz/sha512/06f3da585d720801c840ded52cb29b25364e892b1c1b895ee49a37bde721063757da0f7da4336c9dd518b6269f3c47b2b109d6d37d0a578d805fffe814c239f1
+libblastrampoline.v5.15.0+0.i686-linux-gnu.tar.gz/md5/fe7d22677ec883261429c0201210e35c
+libblastrampoline.v5.15.0+0.i686-linux-gnu.tar.gz/sha512/b99713ae9a00bffdca3001a1a8a84b99e1b3194a755d1a52eb7a73cd8d0571adfb8746b182cb62b3a3e8f7584ae49ece89d503af032b0485b3e6e832bd17de83
+libblastrampoline.v5.15.0+0.i686-linux-musl.tar.gz/md5/3a54aba695352019a507a44d49d4b73a
+libblastrampoline.v5.15.0+0.i686-linux-musl.tar.gz/sha512/9a3baeb75551ec14b58eb370730a50bd0023aa63610d47e492d08dd4d54ff8afca4b8c9b34501ba5eaf2dab02922b88cfeb6ea9383b58783337999eb966f2f0e
+libblastrampoline.v5.15.0+0.i686-w64-mingw32.tar.gz/md5/f8424745959e7bce3a804ccee41b30bf
+libblastrampoline.v5.15.0+0.i686-w64-mingw32.tar.gz/sha512/1b0e877098e666a0ffa7d057f60a1063d710d598ee8a53094815237d98119f5101c4e643fce7190ce22ce3787249920ab7dc6ff2dc1e3088cd95779b19e17bf7
+libblastrampoline.v5.15.0+0.powerpc64le-linux-gnu.tar.gz/md5/927bb0e1a4b5e3ceb225bd8c0f75ec77
+libblastrampoline.v5.15.0+0.powerpc64le-linux-gnu.tar.gz/sha512/a414a84fb45d2666dfd72cd9d6e013481e95c8d6d7cae40939bef779c14fe50d0a8473ddeb7d6060fcc8c4531cd34bd06ff6bbc7a44a6b106b8d0e2c8d62bdbd
+libblastrampoline.v5.15.0+0.riscv64-linux-gnu.tar.gz/md5/3369a45ed2b52482ca2a83ddb515e08b
+libblastrampoline.v5.15.0+0.riscv64-linux-gnu.tar.gz/sha512/68ceb3dea1ebb1689892f05b6c7ab5a088b5249cab5a70bba8164d6290b74634deaa9aa3cfa5c84de836313ac0569542280a48623f95b3744746b2f8984c1d37
+libblastrampoline.v5.15.0+0.x86_64-apple-darwin.tar.gz/md5/07e63e6ef01f2bde3f80b724170991fb
+libblastrampoline.v5.15.0+0.x86_64-apple-darwin.tar.gz/sha512/361689ede0771148e353a4b911fef76839754ddf8f735a60d597b8c523569e206b5daede3885e3ec1302e9e4ca5488b6ea203bd3ed384ccd607934d813b27ba7
+libblastrampoline.v5.15.0+0.x86_64-linux-gnu.tar.gz/md5/e7bcbd90bb3f7d4f197269b3f64cfc4c
+libblastrampoline.v5.15.0+0.x86_64-linux-gnu.tar.gz/sha512/5a55c9b10460bf9472c18fc381cf941d10443b9a089c7e774b4e9456c11a100dd61788c9c5139f77504f0b0bce7e4567b1e002386c402695c968632280b4d316
+libblastrampoline.v5.15.0+0.x86_64-linux-musl.tar.gz/md5/bb8bacb2748137adfb0aa83834bc72d0
+libblastrampoline.v5.15.0+0.x86_64-linux-musl.tar.gz/sha512/5d683868e377d2aee34fb2699f8c3733714d2c7cf0ac7ddb1d35a78460015d784b9e00f8d30e8bc6ee570a47de6baaa7f7efd90790588cc2953cd19897c56a92
+libblastrampoline.v5.15.0+0.x86_64-unknown-freebsd.tar.gz/md5/60e6833b6e7656db680032c0d1b5d5b7
+libblastrampoline.v5.15.0+0.x86_64-unknown-freebsd.tar.gz/sha512/ca155fcc0b66889fe608291738e5e4c8506d28c42e03a948e4e56d441949be1d1dcbd201ba0f2536c862da3d8fdf7c7ee77a9b0b715dc1b9893767510ff0ffb9
+libblastrampoline.v5.15.0+0.x86_64-w64-mingw32.tar.gz/md5/950ec7ca747e8f13cfb7bd1967af5db6
+libblastrampoline.v5.15.0+0.x86_64-w64-mingw32.tar.gz/sha512/6dbcde726cf3b1e2ecc91913b3ff954881706381fcd97907173fd8c21aff6708a9f6998f510356c7f284f826ee67264509831752399efe7163a4baaeb071c3c8
diff --git a/deps/checksums/cacert-2023-01-10.pem/md5 b/deps/checksums/cacert-2023-01-10.pem/md5
deleted file mode 100644
index 92063050b50f3..0000000000000
--- a/deps/checksums/cacert-2023-01-10.pem/md5
+++ /dev/null
@@ -1 +0,0 @@
-e7cf471ba7c88f4e313f492a76e624b3
diff --git a/deps/checksums/cacert-2023-01-10.pem/sha512 b/deps/checksums/cacert-2023-01-10.pem/sha512
deleted file mode 100644
index d3322e5890f81..0000000000000
--- a/deps/checksums/cacert-2023-01-10.pem/sha512
+++ /dev/null
@@ -1 +0,0 @@
-08cd35277bf2260cb3232d7a7ca3cce6b2bd58af9221922d2c6e9838a19c2f96d1ca6d77f3cc2a3ab611692f9fec939e9b21f67442282e867a487b0203ee0279
diff --git a/deps/checksums/cacert-2025-11-04.pem/md5 b/deps/checksums/cacert-2025-11-04.pem/md5
new file mode 100644
index 0000000000000..641a98aecef02
--- /dev/null
+++ b/deps/checksums/cacert-2025-11-04.pem/md5
@@ -0,0 +1 @@
+4ca8e1c3e8fc44c3ecd7a1fb9d3a6d03
diff --git a/deps/checksums/cacert-2025-11-04.pem/sha512 b/deps/checksums/cacert-2025-11-04.pem/sha512
new file mode 100644
index 0000000000000..bbd48b9475d7f
--- /dev/null
+++ b/deps/checksums/cacert-2025-11-04.pem/sha512
@@ -0,0 +1 @@
+9d9f7ecc829bafc222501d8a66852d96a51f522b04a82963e4166c87b85d6a5e5eedb50ced2ef3026cd7cb06fcb4b7dca59c4157813a067cb7b185e32f2957ec
diff --git a/deps/checksums/clang b/deps/checksums/clang
index c16dd849e6fc5..f8591038c34ad 100644
--- a/deps/checksums/clang
+++ b/deps/checksums/clang
@@ -1,108 +1,120 @@
-Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/5dce383804bd3d404b8a1936c12ba457
-Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/5661a1cb2044ded03566c9316978595d692667fbc4e951feca658f9986a8557196557b05ccddf1b00b818aac0893696c3bbbf63a35dc9ed7df146b4488529f6a
-Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/549cbc6fa28ebee446e99701aded16e8
-Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/55eea0b514aa2e43ad2f373ad25ea4fad5219ff1cd8d5b639914c218a0a454ae9b27b8d022ae73771d8ec89fa329f5bfde538817653cc59e569b600148d56842
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/ac3cd40e47702f306bc42d6be5826029
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/73b217caa53702bc6fbbb3286241b7a20c111358cb9436283e9f7f9fec90436d5b54cb4c332afb7e447867a40ba46c9e3b93464acefbca7c0bb6191001525cbf
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/b1a656501493c15b98442bde584a34d7
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f424254cc887301d4d5b04fa71e2c7da6e4d561725d5b06278925e05be1c62a74769f19c37b431c2e2d73e7e5129acff07ac54a0b7fd381821aece27f260c116
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/969170b1a791e89a0094154f34023e86
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/d6ae356c9b1b80cbc5cea4eb8632b77ab3ce0d060b103cec4a5f1c73feaaf60688c2253034b2a6e132273fe04c803de93f415cbe2ef40cf1d6f6a30dcfa03af3
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/22d599b774af41dcaa54481cc6325b1c
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b0f257d45f1a920f46b18049b762b5a3cefdf8683c4dce46f48ce2993e6a622dbdfaaa6cc9a9cda8a7f047094a6f804091d1ba6c83e26cefc38fbd1ca5c0a536
-Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/f2f5064217c14700f0f933b704fff233
-Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/2284978d8cfe22aa49b1f3b161c75cb0c9d43f84674ba58a1335edf818b91c6ea1684a9c3580f2e1918fdc050a624c698a4e87dc163e9076b9d6c0023c989d7a
-Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/eafd72ec24ec81d42cb044e4e4d638dc
-Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/bbfc6c9179fc43a1db0ad82fc8c1fcc8ec8ce94d5c32b38cd1f88490dedc67953283995c0dd4db7262a9206431135cf2671c6ecc6580da65ba8ff4ec0323ab64
-Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/0432eb21283647995e35bd0d486148ab
-Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/561beaf45770c06b35bc1626e93a0cd89874026a8afa22017b40eb1e6ba306b05305619d42a4a2145c576b1dcc77ade80cd0bf0e0237761f3517f4db402f9b74
-Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/653b9b87f2573818d66992f969f7811e
-Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/517df570b40b51a4f4cbcecbdaacdf0b592fce66ec328139d95eaf8b63c89a1adb41a9cfe4982f5bc032fb29a6b967dc1b16b0eced98cd78756ced36ff2257d8
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/4b1a5cf46925575bbc6765f3336e1cc8
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/7afb23aa5ce823b1f2371e038faf311e8e21c3843cc50a0b1473038cd746fcdc77dede67130631bfaee778c3d42ac1eaa23ec664a82f43e2ad406962f3019479
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5a6200aef0e6660bb156ecf3e53cc3c8
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/0dc564fe753fbccfa03ac94e19828ea5ba2b8b74e7adbe7f501ac8b11d1ed8fd85a65572dcdf957018bfa1be3a6babadb1ec3937966347fe49fb38596a4b1728
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/ad693e5cf8f2583c3311a39c095b0bf8
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/b4e1120c960bd69f2643f185607bb2139095fa7a2f943fffec65ccad9422f2bd801131185cbeea1b75298c64cbf109fe28bae54c1b9917fe1ce8b2248d623668
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c04cd594e25324c42d97739d72e772e1
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/5aeeedbc3f0f8327f7760abe3eb6fda368353a7b429e31ff47a7bf42d612d070cc86f0e97031ca0c2fa9f9f448757d59b2652d89bb05b27fd380f2116a5beb6b
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/d706ad9062539a37df1e5cedc084086a
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/4862bbe0c71fe0e8cfddade0f881637ae5f58263208e1154f2284884ddf4ad43d76d98bde57904829f2218db21e4fb6ac038e231b682455fa22deeabe65f1336
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/6cc35754a4378902f9f126139bf299a5
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/4256e9c3f58dfc896d56eeccd7495601ec585e208857de14f91e2d95295a4d03009149f49254be40b27affd5a2250323c6d0744e1ddfbd5fb924fdedc8a993d6
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/128bb901686224fb6d32c9689c03cc21
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/b7048ff3d8a3b3e3cddc49b2cd9fbda8ad308fe10e932e8d90000e76c12059547342a1132149254628077d0efc36b34479688b3e9f32e7364301e85a18304cf8
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/d860412ac46bdeef203a578f0bfc5b05
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/497fa51af138b3c645d5017165aea6d33410262d2ce69e322b259b34fbdcf52a131541dbac66fae8b9a9027b70771199f9a76869721bf18760065ca7cb3b5364
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/6fb13f1cc2aec210298c3045f8a7fd94
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/085c94f43fb46ecc8cadfed5c5d91978c9ddb9d647eea6e82ff0a548eec53dbddc77721faaa8c43ab5b0674f83fef7aa3b34ba0dc273feabdbb8cb95bf5534ee
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/63d765b268e792df2aa92f3689de23de
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/32b2397bb4b627f0ad9b00838e30c965feca902e417117d0884244a2be6a50e0d4d40e55a27a87616e33819967455f90ae0a4319c2eefefd49b82e9041835444
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/c00e93211a1e470f1b00a53e776a9e3c
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/6621b3ab12302657ef2441482e8bc6335535964fda472ab8378221e4a9cc0813968589f457e1af66141821cdedbf8eff3080c20105eec810742e5539fc329fcf
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/254fdeddad203954ec0531875cecec8c
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/84a19469231a9204a553abc58073e423173ff828445634501a61837c0e249ed003f9051fcf1da4eb16201f80d755e7bb4b7513536c749eb1e7ea78c7ded59945
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/0475a3e401b59e1a34dcbd9d9b980823
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/e0c9e1e18cc4f7106effaeb04e0e6f41fe8ad872d67d3d0da928ce36d1bce6be3d5231d149b2d404b3a4b99900b50d280ac6f7dd8965d30c4dcd3913590144a6
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/08c9e802640582af0b79bc04702c9771
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/d4f413bbb5d5c3ae01cea2b87ef4e46816023fcf4373f00fca13f2edc6278eac651718feea3f8c7d04d3ef82360417dd93b6c7163d54ecd79a3811a0ed588054
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/e7c253db924ea5cb5098be57029e009f
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/373884c492e5862aaff27f5782ba44e202e581e4faeb2cffe14bd696a590c0bc72459fccf3342aadbf189282af0c43efe3db113caa47c27c3ea556f0b3313e7e
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/9c1867e316ac258d9199b389ea053d2d
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/9537f285d2a06b8c86ff21aab9daad1ba7e71bcfac55d780c693da8cc250707011ee22ed021e387422543b1e2abbc34de1a7fe49175a27a9c11e43b00549f1be
-Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/f9a13a80efacf45f49d6d7591d2cc3ea
-Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/c7edc55c4f76ae086080ba639d83793738884b9385618c52b30f5c3fadb0ed2a31bbe95ab80c5eee8504ec6301d73fc7318a8c0f877ba8b5f51170de51179d9a
-Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/c9911680ea55b36c4b9f59cfda2a8e33
-Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/9c3722bd402627a4f51b4c98c8712a85031aa79380fe38be0db9df13a5cfabe428fcc7d5d5cf804ac4387d738cad1796bb3f341ebdcf4726ea7f699c6de586e9
-Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/db82d62c163f69038364189a60b18d09
-Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/5dc415426bd99dc2d7b5fc4fe3f2bb1aabc8961fc2b03a2bc14562f330b273c4d1942d7ea5f05b38c76ee753b440cc4f92015a25f9de7980aa3b1d52f7d0f2bb
-Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/67b7194b31f68db8ffcf5ec250948740
-Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/a032c2ae911b6318ab23950ac74dc95f2c8bf815196be62e410b20cd2e271c4154f916388d119ca91c77e07853ba2c56bd5e75a4ce6742d2a7bbd9d3e61853ea
-Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/50b4fa021c1c9b6bdb29eae63ea22103
-Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/40b377df590521e5291c3f2f9daa8d60863c03253b07d0e537288324819a909ab3466b710b10b1a92ccd6f3566702c515d808f03e6d9fe9d01617b9a836bb63f
-Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/d2da27ebc23793c107cb03e176f02d6e
-Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/3ed297cfd3c1ec03cbff10d7b54f9f4a374a9cf8c699287f179ebd5fa000dd525fdbed3c31b59a8ae32ef1c56115c3a84640d776f01c8a92bfae979c332043f5
-Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/aefacc80a5f704aa7498b35dfc2441e6
-Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/76c7fd64fc4323ca442fb0aa30b236355b26328f897ea8cf3e3be029246574d150a9790ae1c45b289e4fc3050fdacc20b6d57b588a707f6d0750e6da91815edf
-Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/59048d333a8a261d079673828c174d96
-Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/bcd0c3c5e04cea24383fc2472f6190e48f8738fb7fa625ad700d1997f8aa81c9b6909af0fc38a2287b80756fbfd01300f3388c19c8df791d78ed913d8d59dee1
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/bb4007dc5b0c0d545f457bdf35e868ee
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/2f686bdd0bbcc62aaf9e20d3804c83291ad7c41a0a174516d7a83dee7f969f7d50f19f70c0f35901a3eaa8d54fe83204d832a901586feb9eb8e141631c411b3b
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/08f088ab3498a4f7645393f43098583d
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/faf62bba3a282f218ea569d3064d6c0cefde9232d055fc3a08c994fe424f2b60dd9bbf1655f6ca101da701e3d05bd813695d6a66628ec2b6b4d11b89f773f0e4
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/bb8f05da1e35ab358a96265f68b37f57
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/30e3789ccca1fdc5eecaeb25345c30bc4f752cd41b8725c5279654d9b3f500d6e8693c6d1dda8b3167fcce15443682994d66922a17986419eb48bb09970f02e0
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/ea9fdfb7c8d1a9c973ea953d4e057f0d
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/5e5d9298a12e65a7e4d401a0e404eb172c96e70fa906096f549e7eda5dbfb294189e4f3526246f28f71ba3bcf35d1bf790f05522150c5877bf8f186d8c503795
-Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/053334d0c5aabaccc81f22c1a371c9a6
-Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/aa8daa99a4b52985d80e57d175b6fc4489058ed84f06fb2fd67710a873d5333ee77b64ed0620df099ed5617792fb3eab23d9cedf3ab3c79f4eb6f04ad1fd9588
-Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/b80918f03dcdfc5b5f1e8afa90dd4e88
-Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/c0397541e06878535b41ba7479b603699d78f1ea3345d9a1146a0e7d17f42078e8365dc71a117981b2d2b25f35a40aeb707ff9ee8a2145303f3cb6567e82bd54
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/78b9e190d5cb7e6fb172814eda2996f7
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/2c9a764ba2427faa8e67285205dd1b8c211665046c9a4a19aea02de46d02a6d4287467bacd1260b7996b2b85d3e571e750d92f02c21b180abe37709ee9da78c1
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/ba6dcd205dbd7c0301855f2a892c6467
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/9a98c10943a8abfbe92b151f184370d21a10ce72afb22f131bd0522672c65875868357f60650122e1a2cc91254adceaf8044de4533aea08c4df400ded8c01669
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/ce62f8e67b89c612eea35f4ba0e09d45
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/9c3afaf0dd338abed0631b81d5f6c197b5dff6aae637996f5bc2f85f2f7dbf64a7a4bdc07dee9ab72abada5be576bb0466550280a9ee9093946a469a2b6af648
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/543ebeb138123ce190e74cf0ad17d43f
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/aff131b5d0ed372557e3195e15701543ec32db05d5fc18117c4aee789a5cb967706d28b2dc53588bc7566f3a4498fd9e2293518ff28387466464ee07c10e9fff
-Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/58617f16466bcb1b56b204dde697cd89
-Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/bdc0c52168beabc0552ee941246b1d4506fec50913030965b374f4cedd67d6fd2b5746f04505aa5bbd4e6d61c5f684dd22c3b207e364578fd8538aef8efe0b14
-Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/aa6f0d9a455f5f0109433b9cfaa8f009
-Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/b267bd6291fc5830ffee075af00fed9a37177141b0cdcaa8ffd602e6a8bfc58e191408c3a6a12c0fb3ea7a5d825adf1ef99122399e8246e0312b4cd056d49a2f
-Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/ee2d7c4dc5c95e46c6d46c4fff112e9a
-Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/cd11acb2dccd2ac45a53fc48ee6a58299b5e54e80a5b9747c680e9b068381bf87cd388ee75cb0a51ccb1162ee8af03acd4c3f730a5f5a3ed5f443dd24ee91cde
-Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/a5c16a8832f5c28346912f610932ecb4
-Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/91b244ccd569597fe42ec45e5a62f6de0ab2c4da048b8b3ed191bbdde0a8ba5a710054d9f40c31a405a6c494a25c7546748870d1170d76e2d3b22dbb0c618e87
-Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/2d789f91744aebb0deed9b91202c1abf
-Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/fb991942325fcbfa1ad4903db43e81fcfeda5d007ee664d96a0e0d2ee5f04b5767d6ad5d37e0273f5af626efbf1c6fde84d54536b74cb17433d29b6772bcf7bc
-Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/ab8fae829b5822e9123fc3d763d327e1
-Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/1b24b03f6a81fba7400bdaa57899e9cdffd6da7e476832870460a12ab6188662c15a3cadd80ccd7dc0790834aa76ba0df098b400c87fd067eaa9f9fec0b053be
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/d5638f87a6ac840d571a3973e89316cf
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/0f07e9e8dd75691ee73ab0e78a29047596a543c5886a137a7503c916ee6792cf7d6a7f279dbd864a2ad36d36aac422555d408381e3781ec004bcde5525abeb68
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/e777625c3c7efe2dcb029e74ac7d1ba7
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/25e0a48a4d8a2ad7f5f5adb7c30429655ff496e6b5a224fc5707f092233239d4c3f4cc17432de12815e546bb595caf2a70b18ff208a53b9f0236accbd83acda3
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/22e03dc887f6e425f98cd66e0859ab2f
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/ef20886b841ba8b749ffb0c5780a9dc25d5f563ef726b1026ee77607e0572c45b8eb3470e252f882e2c4c23a2159d88ee83d31aae5081c6e4f4c37a61a7875c1
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/5d8f1390ff66b6b357768b1994a43d1c
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/5fd2fc0cf888d95c38531d236564109b284f20faed222d1feeab2beae68662073c9c59baee310e2bd67908f267416cded7b75f73e28969e2a16d2fcea0b03854
+Clang.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.asserts.tar.gz/md5/9c47efb6a5db15113cd98058f3eff420
+Clang.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.asserts.tar.gz/sha512/7a821a9c61e1fa6db7e130f05a99bb7efec94b909e230a634f0a3236a69798575fc138513cac3922f891d2a3d1691b1992eb42d07e08ba088276067e416450f9
+Clang.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.tar.gz/md5/47b9eb9d7fbc85c74ea3428cdbc7464a
+Clang.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.tar.gz/sha512/2093120e812963ae6583bdef7688963f3615c65db7ba428c00da6171eaa628c328e5e31c1477dff1e698095b2fd0c9357920fb441ec408db69d644f61f3963c6
+Clang.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/9e1fb7beb3988ec3886f76a0d61ffdc3
+Clang.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/90097b6ded0d7bf9454a479dec4925c5fd8f45eb485786798c3b95c9efbd6c8591c2a842199cab11274a543352a3b7918533ae5774f1d207bd1328722e2d805c
+Clang.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/8241f88af4e07c01fbf19ac203954b50
+Clang.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/3dd162f948bd47977d9c037cd7e9ee6da8b3bf789cc5eb0a514527991b37486f8408a7edcaa40699450256de333d0e3d47bbbf3d2850b2a49eebfc702a6a5cca
+Clang.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/23cff175e1239df7497f7b4aeb0e513c
+Clang.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/b89e0f5bc82ea24a71213c4c5235db2f882f7c5f9132b0eceb5d96de2771d882af6363cbe9d6a410687639be8c998d3eb7b54e6bb99aa0ebe14e53803e25eb13
+Clang.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/e0fea3d6012c7e2ff315900d4b65bd8f
+Clang.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/7e59b2b9384033e666feac04d604392729546f466e7d1096e34ab8ce82a6e9eb6c027a28e4c248df036fdee2fed413d1172838b0bd9b601be94629ea3bdf6667
+Clang.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/md5/fe4b69d9ac85ff415ecf0a805b886da2
+Clang.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/sha512/143c250d9f63749690182d649829252e646877a6bd085e6df2496c3e4165976a971fa4f4d6fb3c50b1d38495b504c50ce931cb5a9358049258fbe907e827f66f
+Clang.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.tar.gz/md5/b86048f158ce265d14bc6a5bf1dd2101
+Clang.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.tar.gz/sha512/16d95d2fce2801857f8f34dcd479d03c3392d7b7f59f36de1ff9139beebed05ae40d7f23880e290772b7ac60ef8b4bfaeb8b3495d63f33a88a2de40331afd631
+Clang.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/md5/d58d4375c160e16bc07844e1ec18ac0b
+Clang.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/sha512/20a8e40baefe8a648f66775d0e9e5a8233bb89d9af4cbe18b4b820f37faaa7dd744b8b63329d0c554c2ab8053b5e5775af43a66f394226bcadf66ea1a26b1e15
+Clang.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.tar.gz/md5/b9ade9550cd6b4c8d8c3ce8e52b68350
+Clang.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.tar.gz/sha512/4ccab94517cbc5c25f35caa41f9ee485346d89b2f8519f2bdcd5fb203522dd6ba40fbe23363e8d22c92dd3a3ad5138a0bc82e0be1779187ff64ed93d822f1478
+Clang.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.asserts.tar.gz/md5/28457ae6df848b2c58955578d168307e
+Clang.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.asserts.tar.gz/sha512/c7304f8f67f7c5b8c48cc44ccd3127a78a06715ed521e1916fa7bf8f06daa7309a226028aa6b7b74f4df2531ea743015e8d2b1a16ca10da64aafb0999ebc90da
+Clang.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.tar.gz/md5/9af9e2aa2747643ce1b1a4012881e1f5
+Clang.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.tar.gz/sha512/b653c02fe21e5a41a43a0121eab2be0567209cdef7f8bbe9c34a1f43caec3f97b9b8894bb983b2cdacf97ffa2f36ab7ba5fd9c4d05952db112cdda0771e0fd07
+Clang.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/4f8b736e126af1d4e023d252130cb1e4
+Clang.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/f124cb4d4f35f781f23e2c1024567d8e380896a88ef22f9c414cb18d3b10ff73180005c8c758ee29771eaae9e6a188927f1b195350d4a8cdba93a2e39b7efa21
+Clang.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/md5/e4c1e34c3da9d7ec673dc69f6d390d4f
+Clang.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/sha512/cd53eedfebf6b7308942f31bea25a6f270fb22f9052a12667472b6fa5dd362e3b2cea0d1f7eac72e5b0d1043b9741f61276588319f616223030460244b798fa8
+Clang.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/82ac25b54d4ad1ec9136282be4122ed9
+Clang.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/4955418049392f8544df48121c16cce1939d80765b20ae877d48d61de75f1cc87c984956a83546836c9497397a71fc63b67f618d2a9cac4d9d05affebf795c30
+Clang.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/md5/91e61db7b98969ab42daa51c84f51cbc
+Clang.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/sha512/7cb7480c7b5275f8b0c51859e6c665722eef634fc1a7dd5d48b2be7655a22279493fd9a4177f0b64356b5862f703173f821ef9e854c6bb59ebcc3b1f940e0184
+Clang.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/ac35f42450df3f1f621acae17ebf7a03
+Clang.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/18ef952e4c7a73173678ab69273cd7d34ce2d6c606c9b81476f65b368f4db7991ea6cdeaea35055f5cb7c01191694e8900634264b014150581a85e1432c23512
+Clang.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/md5/7492c55beb5a9915621c88de463bd23d
+Clang.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/sha512/275168f0be23103ab9756f22dfda3a162d77af65a59d01b2e3dbbe262d57f0fa02d2f35f5d6f3865cb22d074d6d3b4be324cb139afcaa88f339b9e02b809228d
+Clang.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/3e9c3bfc7d16906e3a32286cf25eca8f
+Clang.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/ac11bc540225219db02ffd6e9418741dab6142553a3035e4bc64cf5dc8f7d11eed783fd6fd57f340123eef3a74686e087437f40fe7cfc572b27831ca99ca5ad1
+Clang.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/md5/4a7d6d6cafaa6c6468505ded7bc0f719
+Clang.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/sha512/10bc4e17a3b33f38b44ecb61761690487aea0342119c6a1d019fa9c15e35f4712f2a44a6920e7ff39ee832c98767f20ce028f9d773f16b06c277737843cdf5ad
+Clang.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/bb5631553e80397fa3706afaf7542e83
+Clang.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/2e46d49f5464f9e13d3edd3fc9b66120d9d3c12f23c7f661fde99fe21a0f20fa5d34123550052d6d8bd5edd0b1f0971cd73ece1d86ddf3f13732a1da799ab336
+Clang.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/md5/69f0d6e53cc981b155a468693fc9168b
+Clang.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/sha512/b167615437deef8d686c03c4e3ee06d437acc9fcfb6b11c9eed513e5883af78e22cd5796db0693c8b880839a0d5486a822d90a71579b262d953600254815f480
+Clang.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/02698e06c0b277eb5c8303e4921b1e81
+Clang.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/4b7d4b4a3e030e5bb4eb6f9f67c96029857a653ae5bffc8afa789590d9267414bef4e63a8936d94ecb19005eef16284086a0cb2fd1bb28cae059e11b6e691bc7
+Clang.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/md5/93fbad92b45d2d8a233c3da740f2d900
+Clang.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/sha512/b65d75b4041445c40b7d1f8f3ec254cfc060ce24df5d04c7ad13813224df3d4b8da62609837b81f3cbba6b2901e95aab0fe6ec1f156e0b5e8c19ea41eeb1f921
+Clang.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/639c1baee3bb00c6a7dc41e22c94c665
+Clang.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/93af47cb9273f8980dba0387ef44e459bae656df24bd4d92f16956b41f7665712ab0ff388171acbdbd8cc53a7e0e1c42e168580a0e0f47067a50fef47c10acd5
+Clang.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/md5/706dfcf4d3f8f8c92d94b9c5086ff13d
+Clang.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/sha512/98fb3a461e241cd75a1c54959f8a95282435553406caa4ef9648cb905cf980bfc6752bf1d6ae5e92af73fe9e1cb987d82539ab075503dacf9372359472c782f6
+Clang.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/2b4252119dbb7e9d240ab8cbcab57dac
+Clang.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/9cf19cb79c130e2c1129f35bbad3606fc74588433af36d39bad9127968585944b465a18d8d4aaead50a0ec1af31fd07ceebc89d605b1a112496fc4f9dab89e59
+Clang.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/md5/ab6084925af728617fa1aae5dc246350
+Clang.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/sha512/1ade44ee683e0bdb46ade27afb3b0d960822125bf0948a008080a0096471b88a765d57786e11bf1941a6687a2b1f45e753c701a3ff1a642b869da5d698fd300a
+Clang.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/e38c8e2b1d35ff82912aab79a54c422d
+Clang.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/61264b16f95d22503f2db4f6d28801ffa6c05b0482fc9605e820d8e9b2e2304f44ef0c4ab6bd924b7e7f75f64c66cdf6945aaa77dd3cb205e07bf10417ab5172
+Clang.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/fef1ff340861790b831e076e97997f76
+Clang.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/ca1eb7cd0124cc42e4f05f01c0e0ec5e217065836421420d94472833ebd74fe491c4e92728cd79cf5944a8142274b1e03cf348d88ed767d745f2710743accf4a
+Clang.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/f31cbbe374d25dd023a7885a1f13fa42
+Clang.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/c7ca5e7db5cb237a9ca766f47ff692e91643f06591d80cc4413e1801444527c11ac0169ed4dc4c98957d7130524372fc50055edafa091710713d1b5dcf49c718
+Clang.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/0e60c18eda59aa263c6c3773408a8a1b
+Clang.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/8e585d3f47b8c6eb955e8b26b2defba63285c77dbfd51927f38792ee1202317aca7d7cf21bfb09d2134b1f097a6aee2e795b03500da1f9876da7e337ac54cfed
+Clang.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/md5/ca591fcba25b79ba9843191e6aa2a203
+Clang.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/sha512/5f9176b3375ae47e6b36962fd78504d4e0f8596db691a65844ae97dc9edaba8753c9f4752e1949ec994c12c7cdc64987a6c8db8548f7345ca268175a06ceca9f
+Clang.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.tar.gz/md5/a79a463bf693aeba882303a28f5d4b71
+Clang.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.tar.gz/sha512/9fe2763055e5fd1b2d3bbd23ee4c09e766d24370caef5e4b2690f07e62a3ea0ac03fdee781848e44bea3c24be35bb3ad99bb7aca7ca111f662ed663878d33ab2
+Clang.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/md5/2869273d4d2d12a22b9038af54509bbc
+Clang.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/sha512/2ef87fabc6ccf6a74dad904e26908fd8d00213458228b4bc08484bf932e874cb400fa9544c87440aa83312d78ae4a9937dcadde39a084de3e2f720d869705c1c
+Clang.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.tar.gz/md5/dc10249c751fc817ef0b6876df2a1550
+Clang.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.tar.gz/sha512/c338f29548fd12e65ee4f476af669389a361349fef1a656b27bcefac8d2e8537b3f99df2cb196bc5d6e5ed419918bae4839116bf36bba8709f270a1089211700
+Clang.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/e7c427cbc7328058b17fd8bcfa5daea1
+Clang.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/43dc8ceb8c19aad8c5a7b2a0bcae798220fa99c9ac21d6a37da35307611877b9c066134c32f59f0074326fb25a6aff26d9712edd557fd379e5feb53bfb92e11e
+Clang.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/e74bb80ed9e6fac8e640379a7334d255
+Clang.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/c48c992c5ffc22b9eea997331fc4fe62818696433a9d965077ffd91e3975b761d566e1f6fe48d5fd113ee4db60769ceb16c1d375204dbfae341b29768944e88a
+Clang.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/bf4ed3e705e20c02573abbf59430ff48
+Clang.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/bea3f73e8e0b397aa1ca2929cdd71e287417b0fabbe6204310054579e1845f80c5793656eac461b2d50232551fc871d1656774c39c7971ae50fe8f61d1294286
+Clang.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/ba1bb7ad6acd30f312d5175189287d0e
+Clang.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/f1bca49cb6e2e6ae91f59662433db5b8981a1295184a5165f34b50703b0e798ffa06d363677623e1764631ae50685bd8d6f0e807cbcc95b9993647b4d7e5a7aa
+Clang.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/cf0847fdd05d913762e39420cb5c5d62
+Clang.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/0f7340c4f3394adb87cbf90dd4e288591df0035851c8118292773a79f9aad445ac4b81bf29fb3c64885ef6a02e2a8d6b20758cd8eb32139376fc0cd849c4c0fd
+Clang.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/f222e9ed346a539dce245d56f00cca20
+Clang.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/73175b714a694afb2a131154fcc0b2f4d910f93740d5b857bb3f708e033e2eb31117ba9e1d7e96959a37c47932fcb4924332f43432c00f94c43239291c568ef8
+Clang.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/a06644a2318bc4a6fd10fe97ec272673
+Clang.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/45c557133400a82f110919e18e644bb5252b9d046eab3b9dfa39eb8153d47f6615d17066e16a586a189b67e49eba23a4ba711213b79cc9a3dcf6f88e6f6645df
+Clang.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/f68fefafdb8cefa5da72bc769d32d968
+Clang.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/c46cc3d39a411f4b5234d1b5912e474e6cef2d83620005109719264fea523ac43f0202150326b5726dcc48458c641452583a1cdc961e68220dc5144104bc918c
+Clang.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.asserts.tar.gz/md5/e48f6ab3096bd51ee8a12c46efc0a988
+Clang.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.asserts.tar.gz/sha512/e9c1eaf5937f3953b0ed6c0514be88fe193a507114c51492693347f1b9b809da0fbef474b5d3237534aa3abf779f0450772e2a20a9c15f149b1f359c36e5b021
+Clang.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.tar.gz/md5/496c7c831cf85c7d80e6b52c6cde32ef
+Clang.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.tar.gz/sha512/0a57e0aad772cd7940beafe3185b0b68c774a1cf7c423d9b63b22d6e0f1e99f927d0dbf390727cf0d9838b512caa9a5ee0de7ccbb82ddfd415da5d4ed0aebd5f
+Clang.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/4cb32aa02580bb31e3167b9b8bd21157
+Clang.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/915577246f9165b20e095663823055290be237bbeb33fbc6f3d00f651c04103e1b2f9024ed31e1456d3aa6ceff8aa0afb85efd0936d44bda1f8d1e1daf895741
+Clang.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/8ab69a82ff0eb536a6d41220f815d10a
+Clang.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/1364105ef07129ba97011e15308e3d64d666e2d7282f4e7d8da039b460481f63ff225359e56880db06d7354d9167254589f441f91dde25480aa7cfb1f94d5abf
+Clang.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/f287f8a3a035dbbdca969dc2247c29bc
+Clang.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/601325115d552549bad3f7369baaa9d2d511f1a250c41ac93fe9660cf752d53225fce4c8bc4d460c3d195b5c4834eca4fafdb7d8f93b59ebfec242642ce15baa
+Clang.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/c308ac7bb891e7cfcc4dfbf87e525532
+Clang.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/d0b8fa39bb621d0be3e41c5bd6cc998835c7cf68b8508d70d771669510ed9240c7b8c0a5b017ae71a3435ec9652ec750287fc2d13a45ae9e0b5a7717372ff4f9
+Clang.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/md5/d209757929197f21828017b193ad8b6e
+Clang.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/sha512/5a77b260d3271524ba3f8f9a9e31d7858c51a68338759ca8d580eb5602d0721ebc401af1eae3296adc3ab1533a8e55269c5e5a05d0c465e237c7377ffed2eb44
+Clang.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.tar.gz/md5/7a785fb84cf9216400a224d3ca29813a
+Clang.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.tar.gz/sha512/490de9cb5b84cf45a0fb9adf7030a6bbb0853ae6bbfecf441db4b7e710b9bf019a523a7b7353cee9f307e9f5f5e83701f04dd0048fcd146ed5b12b5a447e2f5d
+Clang.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/md5/7d868df41e7392f6904e30b5eb0edc54
+Clang.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/sha512/4e045b911f835b5a6a0f7040d4a7c4d6b8b1d6a5a869aebbf2a70f7eca6bf2bd8e9ab305b6be76bb69c1512bdf711d68cda0faad22f8d8418983157e7dd93097
+Clang.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.tar.gz/md5/38768f9670c9e4310646a2e3ea05b9ed
+Clang.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.tar.gz/sha512/511e7ee866ded51f72546b300097cfc52aa04d8f7883d0af9683016efb0fe80aa2e3a13d47165b5ecb280389efb1375e9186d3711bc6a4db04ba337583d45b0d
+Clang.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.asserts.tar.gz/md5/b83b0887bd12bf4ff7d6870b2a6d7805
+Clang.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.asserts.tar.gz/sha512/9cb07a7b80b58fa6d081b3d75242f04fa4d4556c590da56a68d6782b362c435900f09cd1f1a3d0dc46afda19819933d6602e03ff95a649cb0e9a37bda0f8e0d9
+Clang.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.tar.gz/md5/f03f5e5fc1d4f6756106d3ac055f7aa3
+Clang.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.tar.gz/sha512/e6811e7707e7104aa407ebf08361585d0ca98fbddee37dfa261876a8daf21455579f8bd63bef4862ae0f3f46b0b4678f9a22026330f23874cbd3f2ba6345f25a
+Clang.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/md5/609d9023d01f3909f4516178f3be1073
+Clang.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/sha512/65f396d8fb9325212f4ba6ec1a95bfe0fcd8e3f56aa7f9bc28dbb0f071de72d57531e912f3b77cd37dfe3990ec22f99eaa9217883d5b56b2838d0e5e6d217962
+Clang.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.tar.gz/md5/a43a81f0157d9515d7c4cc7fd215321b
+Clang.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.tar.gz/sha512/68879e2a1ac7df69674e88de4a2bad5d323ba699587654d021967d75a3f25bc36565ceaf18951e58ef41d8b6d24f4e725101390bf9aa6acb7bd8aaac0ba7d285
+Clang.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/md5/5c711463196cd9ecc2372b5fd09a52cc
+Clang.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/sha512/39924b1f4d42a4be1082730c93d65a7c711c51b5c5685a0fb911fa490c25b7dde89044b4156688a4d26e1e4127b15aa0c5556f61b4fc8282743163936b275284
+Clang.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.tar.gz/md5/34a9df47d90c64e4755d3ad424de5b3e
+Clang.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.tar.gz/sha512/2ddf385d1440e29f251574676f0963b56a4818af8d7734809d4bf9944793c29c42139ac39efadae44e809236e5aa2caae514a737f722f5adffd218d18da576c7
diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries
index 4830109bd7aea..08802ca1f4de3 100644
--- a/deps/checksums/compilersupportlibraries
+++ b/deps/checksums/compilersupportlibraries
@@ -1,92 +1,98 @@
-CompilerSupportLibraries.v1.0.5+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4
-CompilerSupportLibraries.v1.0.5+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/3908fa1a2f739b330e787468c9bfb5c8
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1741e3403ac7aa99e7cfd9a01222c4153ed300f47cc1b347e1af1a6cd07a82caaa54b9cfbebae8751440420551621cc6524504413446d104f9493dff2c081853
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/2444dbb7637b32cf543675cc12330878
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8537f0b243df8544350c884021b21c585fd302e8dd462a30a6ee84c7a36a049133262e5d1bc362f972066b8e8d6a091c32c3b746bab1feb9fccf2e7cca65756c
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/d79c1434594c0c5e7d6be798bf52c99e
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/7e71accc401a45b51b298702fb4c79a2fc856c7b28f0935f6ad3a0db5381c55fe5432daff371842930d718024b7c6c1d80e2bd09d397145203673bebbe3496ae
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/f212059053d99558a9b0bf54b20180e1
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5c104b1282cec8a944e5d008f44a4d60f4394fd5d797fec7d1f487d13e7328cd9c88ec4916dabf18596d87160756bda914e4f8c5a356b5577f9349d0d9e976d6
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/3e3b3795ee93ef317223050e803a9875
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/85d3c955e15f66bfe8bfec2f28c9160bc03d4d531ea4ffe6bc6b51e0d69ccea3ab67a16ca752dabc870861c407381c4519d75c6be3832e8dccd6122ec8c6ed75
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/cf2d1315f6a348af2e6c065e2a286e7a
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/58420377bc77aa7678034ee5f708eb6be7db359faef2c2638869765453633da9bf455512bd88e95b38ae0428ecc4053561517b176b2371129bdaef9d8d5dadfd
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran3.tar.gz/md5/6decf8fd5afb50451771c761e63a8917
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/4984724bcc847724b1bc005b6f760a18b68147f7d5402d0faf4e28fc0d14fa10975368a951f9caf2a8856500046dec8343043274557d58269e77492b929a9e4b
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran4.tar.gz/md5/39d1e8a3baa144c018d3eaf7f3806482
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/fc4d429279c5a93b6c28b6e911b1e7cfd1c1cfe46f11f2e901b3832ce90d45f49d3d29f0ef18518a94af6cc8651f67c4ed81672680f9281ada390440b172a2af
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran5.tar.gz/md5/37dabd9cd224c9fed9633dedccb6c565
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/b253149e72eef9486888fbaace66e9b6945f4477f6b818f64f3047331165b0e2bc17aa6e3fc8c88686a72e478eb62c8f53883415d5419db448d8016fa3a1da5e
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran3.tar.gz/md5/afdd32bfadd465848e6be458817a44ae
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran3.tar.gz/sha512/eebd679c499143014514c7c9d1875dedbbab9e3af51526c4dd445a9e3dbade95d24522da8bbad0a50ab400755e47b018828b324c4ad7705e212ccd990e34439a
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran4.tar.gz/md5/bc4a0f0b7cea328f7e8850583774496b
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran4.tar.gz/sha512/82285b67946212b49cddf6259f2c60ff5469f8c5263ccefe44f1d93ace98ab68e2c152e1b54434b2f075fd8d192c06d5451bc8cca26d951ad15f3453102f02b5
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran5.tar.gz/md5/177f0232abce8d523882530ed7a93092
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran5.tar.gz/sha512/db80acf0f2434f28ee7680e1beb34f564940071815d1ad89fb5913cbd9ac24da528e826d0d54be6265a7340ebd661b6d308ed79d96b67fa5d8c98dc3f1bee8d6
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/f5795dada5360eb8422f45150b13bae9
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/6acd1bf7c81631cef9b8b0576ccece08723c5ae2f49de2487d3aefd25f9a0ad49df09e3782735267997d40687b04b85c89e00f6889b026af599bf1bbe91803a1
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/5e590f83161913f0145ba8d496b2504b
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/4a3f36588afcdef26173764597054068e26f2376e6126a9a94c46b258b5d7a29951d47b5e1ba24df6c3d139bbc4decc5c501a266811692d7fadadc7bd7b6960d
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/27da4a7c890fe1427c33fe214cc5feaf
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/310ad00f053f9f3ec715ce2e8d20446f397728dff5acc787ea9c9332346607a3d42b678099c424e6d6e5294acddf2aa26051de657b48d34abfd04486951bf241
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/4e5e4b23dc87450738da33926a07511d
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fc09879d94b750e75775d8b64a41ab9924d675fb53c5700467604412928fe7f5cb21911da0f64898d2463fa77ffbaf4c96c397b9060f4746eec152747930cddc
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/9a92138ed69aa317a932a615c6e62d69
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0b7785379936a2a209b074177b1424dd7e00b29b5165f564e799b0aa4e06a582e9d616525d97274ba2507cb88192028f1ac485d3f99bdc7ee53fc63c1a7e85de
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8ffee3d6de5197c7a1f354d72c8238fa
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/deadc4d7224c84f9b82dc956b69e815c44ae036802838365d870ab9f58c8bcf8ce0645f2f387c8ff344ac2108fc8e7e1ee907fa55e93c91aa5d9fd921bf3fdcb
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/87449e72e3f33dbb69b7053cdc2649d4
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5ce02ad10c6f4686a476eb2a5de2988cd8b482f5e693db2880c84ad1c82f468ef03fe01b9d0feefe5d4ee741d1d16643d36b144e6261ed32311b3b6f312fac2f
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0407cde92cfa42fa89ac83217ca0ec16
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/032c831f1166a336551138939ac40eb2c68a048ce786c0c1403b879a20c1b706caac16d22560b2c7f2b3d6373986c347188675674116005ca251336ee048d09f
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/23418763b808371ee94772a90d501f4d
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/7867b843551457b11bda7821dd384c1c1cf23b80a308b2058a693de7b7da099f0b37eb0a6de2b84c04b625a68c60eea55138e200d5d6ec6f6af09bd7ce406a96
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/e3d33ae03c18affea74699bdc1fabb68
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42013f4921de5a69ad857195ce5c19ad1bca3c920d79699e5501f1f4534ab132fabd422362b2b5056f5d182215d6c069db5df460bafa700903faf962cc00f77b
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/d40c1e8c0393213c6057c53a12f44175
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/fe7baa4de7490065ab7b953cc12f41462a24bcb49d0a4a64b23249e98e7569b19bb1cb455af2f76090e34066a7d3cdd7a48cae6515ce6c7a5c8486b0cacc5106
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/48541b90f715c4c86ee4da0570275947
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/7f2683fb98e80f12629f4ed3bea9fd59d32b7e7a9ed1699e782d8e238ff0915ecc61bf00adaf4597cfe41caf82cdca0f9be250f595f5f0bea6d8f77dba99eaf4
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/4547059eb905995667be48bf85d49911
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/7400fdabc924434ab4a4949248c3603887ac06ffd2f205ae33e14495d86cd4f816bbd1999eeafa0257f518df1e7f7c522f596e847a71dbfbfccff4859f50acc7
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/46267543cad6584d7b7b9fcc8f18f21d
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0353d7d724be48d4185d3c181692970b7996f53f6a01723072aa5c94b53a8c5055faeed30df51659c252a46f4b941dec0cb24569323e3c85c166f14c5b7c8e9e
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/14dba2897a6e9d370fa9091c045375fc
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/10b79f9c059839f5b57fa8d2a381a034c4067262c4088bd354d14ea56bec097878069383aa9cfadaa09d73bd20fc348fb61662d863a8d62cb25d7af6b8e29858
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eed836d1addeb10d0901f836724aff1e
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/e33eca424d1529a1fb23ba9cf7fac345ed1cfc8073c975b6b31ca44d2e8c3f5083af65433df009b22483dceb2e43149f3c1e8433681fec5fb812e1d5b4243ce4
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/d5ae9f9519341fdaabf62267c89461d2
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/6421aa5d1bd6f08ad43f59ed4dc1bef8b9b598ebbbd3e48149730f3bec3471f8e2c02ffb338427326924290b8f52ef9e626e3313448bc931a61d866c5dc544ae
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/fc1df521395362a5aaa2e2aeef707207
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/f2e5a08e3cae171242ae6a20d2d4838c1529ce042745dc466148b7bbc06896d94476fd05c7787e6e8641bea752dfc0e6b09e95b160bede600d20d2ad68e7705f
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/0c2fc6fae4ebe293a7f0dc1e91f6531a
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/fdb0ad061cacad0557fde3ec216fd3666284f24ad6a86f4a4b6f946dccb112c9704f52edba86f3b17d84c824affbcfef740720348ef227380cf6017811bda80b
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/005e608dbef2b5cdb7624702ccc426be
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/8bb2bcd0a6b1901e8a9be20f505bead5c78ecafbe5a8271cd13385553e5744e0c7bff62976ac9e7d74b8f3bd467603d4c0f5658e6b120bb23066c15e0a644ed4
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/d6c2c7ad72bff7f7e5c43678d716a57a
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/36f5eba1b0be440797467cb7104652b74709913d2bad1b08ee2dc70f450fb8eab81b28f2b0bc8dfc238b3c46982c69aac831b4fad5bcee4e9dd114852fcb4a0b
+CompilerSupportLibraries.v1.3.0+1.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4
+CompilerSupportLibraries.v1.3.0+1.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran3.tar.gz/md5/c679907ddce62f21bc30667cc40d8d52
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/284b17b6634087f0b969d3e99b2e4152667ab5eb9e6b5813f9739bd14ae1c25dba01f15488e901ca5fcfd780b02bc02b6bff670fefed7d965dcb585e81b03782
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran4.tar.gz/md5/1b4f6efeb83f5f3e27c42eddeafe993a
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/b1d5aa122b2bd25bcd1ce47e000f71785d617d77f44acda56f9f5ad77101a0c54f6c6a4c5560a7c12ffb8c89ae325d4f056bd92f893d219385c3d5c85aa457e9
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran5.tar.gz/md5/834adb105f78ac1bb223ef309dbf7cdc
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/dd0440805145f1a8e8521633a955317567606bf2e3725a5a7eb90515128b077f2163832ab608022fab152526f2a55991f50256ab92104d5d62bbb8a740e25009
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran3.tar.gz/md5/d613881e48181bb8ac0bf34a456c9736
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran3.tar.gz/sha512/34214bca9f7c66e3c508b2f9d88cb296695721cfba0c001660e2edb0387a2efbb2fecb0360f8eb2b1d0ec502480fe63e802f350367498a342c455c0f58aadd82
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran4.tar.gz/md5/97e4ea4394df1d784ce4de3f75aed580
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran4.tar.gz/sha512/a072ceece6600b704dae5a7491f1ead9b4e11da3d4438b7056f2c71e59b0a37d3023fb812cbae205a4f1fcaf18a4b223a5ba2cea32131c5eda0d55b1f0649c23
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran5.tar.gz/md5/df09c5b33b2e307e0d9c2b39b450c0eb
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran5.tar.gz/sha512/d0a8dc03ea1667d90bd58c2376b575a1090a54a4412bc53b311a3ea910c76dc698be5ca1078e6ca8341244f1fd6b84201ba10c10baba194c1d6c3ffb7e69563c
+CompilerSupportLibraries.v1.3.0+1.aarch64-unknown-freebsd-libgfortran4.tar.gz/md5/4f9b257eabaf0a817755495cfbf75088
+CompilerSupportLibraries.v1.3.0+1.aarch64-unknown-freebsd-libgfortran4.tar.gz/sha512/876036a8b599008512ab7010f4bc5f11fbf963bb9b9f77499adcca21fcad89f94180f653dce3121e5c1206f4fd4ace717ef8f3b40d8009a71039a84ae7272588
+CompilerSupportLibraries.v1.3.0+1.aarch64-unknown-freebsd-libgfortran5.tar.gz/md5/0d296a080921b54d959a2a60884b7938
+CompilerSupportLibraries.v1.3.0+1.aarch64-unknown-freebsd-libgfortran5.tar.gz/sha512/d8232dd9131c2890ea2f5c90f62c646ea1dc93a0a6de4af0a98c7e69928c5ca5698e79ff9d23bdcf47de1f5670467c9e8fed5f01e82e009696641896f0658030
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/3e0727a3813c699b6daa041e336d6e13
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/00cc2c34af7c4a5df06deaff27dff5b94b231ede4afe7a47b7b783a8d2e62158c0ba1b2062d40df949fdc0a21ac703f8c9011f998ab032bac265aef153cea012
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/b7cb328b5e5fae5b5e456d058f5c18b7
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ff191595bdf4dfb2cdd77d42e591adc0b27ca0e1055efa7fb25fc06784f26add83e6c5c7594405bdfd715f9c8e6ae3f2171a50ae218b4b691099da754fe9bedd
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/46a3fc18a65e223ba59d984f99d42979
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/cb470147f6695b101d497bf2d84caeb1f97d967bf23d1844ad70be47505588d981df096378136a98c35cda5aec090255d60cf7c1c8def9801233c72ca002b563
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/28e1bc0fb0ac1512a8598f26ee3f376a
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/7ec17859790cd08942929281906918e4c69d7f306a8302dcd591a4a67b3d95f7f72f7afbeea3a86a0d94ca5b608b3bda00ce43b594e9f173edb0228c0f79ba49
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/f9bff1a49d95fc0f3ad3d4a90b259c87
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/8ad503a213b949f569c5b9eac28e33ed51cc55298bb66b147375dc12cb9ed90e60165aa2dca8e3d28f1a2c153894a9e4672bdb2ae3cfb3a67b1e06b345cb454f
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/d550a4dac1b20606681a56acc00c01ad
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/14ca10ad8809dfd2434e300ad5280915f21cc1ba159a9f4aed7aa2164ae624687a2a7a9e6dd99abcfe95f40cb037c72292c992f4483fa1affcf8a9b5cf29c9bf
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/3e0727a3813c699b6daa041e336d6e13
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/00cc2c34af7c4a5df06deaff27dff5b94b231ede4afe7a47b7b783a8d2e62158c0ba1b2062d40df949fdc0a21ac703f8c9011f998ab032bac265aef153cea012
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/b7cb328b5e5fae5b5e456d058f5c18b7
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ff191595bdf4dfb2cdd77d42e591adc0b27ca0e1055efa7fb25fc06784f26add83e6c5c7594405bdfd715f9c8e6ae3f2171a50ae218b4b691099da754fe9bedd
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/46a3fc18a65e223ba59d984f99d42979
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/cb470147f6695b101d497bf2d84caeb1f97d967bf23d1844ad70be47505588d981df096378136a98c35cda5aec090255d60cf7c1c8def9801233c72ca002b563
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/28e1bc0fb0ac1512a8598f26ee3f376a
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/7ec17859790cd08942929281906918e4c69d7f306a8302dcd591a4a67b3d95f7f72f7afbeea3a86a0d94ca5b608b3bda00ce43b594e9f173edb0228c0f79ba49
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/f9bff1a49d95fc0f3ad3d4a90b259c87
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/8ad503a213b949f569c5b9eac28e33ed51cc55298bb66b147375dc12cb9ed90e60165aa2dca8e3d28f1a2c153894a9e4672bdb2ae3cfb3a67b1e06b345cb454f
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/d550a4dac1b20606681a56acc00c01ad
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/14ca10ad8809dfd2434e300ad5280915f21cc1ba159a9f4aed7aa2164ae624687a2a7a9e6dd99abcfe95f40cb037c72292c992f4483fa1affcf8a9b5cf29c9bf
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran3.tar.gz/md5/73e14b94dc74d17aca38a51ad402f836
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran3.tar.gz/sha512/d37263a216fb3e9b94dd032642ed6bf5be154a5c66de3e4bd74e5e2059d9740958a673796eb652ca9ebea8ec09a7eec837d8906a50775913325899aa190808db
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran4.tar.gz/md5/23996e5c6690b35e7c36bff245f6f4d1
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran4.tar.gz/sha512/660dc4866a13f9a1ae98424b605723b250218a034e02151d4160d58ca07bba4fa1390e99e7fe2f31eccdd518d1ac4c5f5454968ce52525e3a2d21918b6b5bba8
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran5.tar.gz/md5/af836562cfaf76f0728be0d875d29ae1
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran5.tar.gz/sha512/a2b10c2f72d1e84c7b496b7ad6d38629342c93cd6a7f691e5bbe96ce28ef40fd38509d382d22208e40cc4953e7b93d1c211bf59529db0ad1a77b684ba75bc68a
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran3.tar.gz/md5/502f089e5ee03b3a290ee6e18577a22f
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran3.tar.gz/sha512/13a97c2386f37aba2416ec35fe67b99a1eccb880b0254ff0a70f2ba01a01a15c80251606ec7eb0503d59a7723542b6b9778d6c9d9e4ba66ae5cce51e46a9cb40
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran4.tar.gz/md5/221aa40c278faee74ab6af46686d68d6
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran4.tar.gz/sha512/9e4e598c8acdecebc812555de9631f022f6158d679c329537e37f83c76c818f31476a5827924b5ac12978515d64a7e913f220ca75314f41d3227573e9a2ac9af
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran5.tar.gz/md5/c364ec196e66dd5eadc3932b208a0385
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran5.tar.gz/sha512/3f7b80fb35a967d9354c2f4c40bb6d62751a0d791aeec09817cdc278393cacef089214f61d8338c0981f7a4ed3144d37bc9267cf0e7ce6c4cf651bc67c431b70
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran3.tar.gz/md5/4177f1ede00d81472bb69888f5b3e26f
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran3.tar.gz/sha512/09ab710599d237ee35fca6a39b4d67b36bbadb7d127797724743026eae72319faa161755b03f4cb67c83f801aa4132968b561245487b2c2c0836d0ff867c0e83
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran4.tar.gz/md5/9402d280886784bc245096bdc838fbc6
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran4.tar.gz/sha512/60e72336efdd307b88b1e6db5234388ac1892504ac858b412d18f072a33ca1aeaf1b8621ccf43027508b7a4653150f0849a89c57164beb1e7f24ef32f7fb7f11
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran5.tar.gz/md5/310b163628e7defdfa6a293360b203db
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran5.tar.gz/sha512/02e9a797246feb9c4b09b0c67c773dac5c3bb61568bdd48be147adeb2dc08fd2bd7151f2293e2756685d011e463e39dc5ca0f79593dda7501cacbc15adfc74e0
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/c139a9d54f39701e805d2af185a6f17c
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/0d1f29cb04b42b276edd7998a02f6166295f6b7a2a8ffdf6b2986145476385b19c2f93b012974835363ef57f2018bdb80814adef3b72b9378f0d2c6a8805c43e
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/3ab360133835e1d0a6a24bb2de1dde02
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/9c2f765b58a73b3705f787f68c995d8f2cbd211978c0ec8ac2adbfec6685f4b3a02aa63bf75b9dbf0a2a5c048e35536929d04b89c120671174d76132cbd2c7ed
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/6ce9e27ab33b35900d8f81c2ad05eec2
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/1d8af2664e68d18ef4f68b9fed28979af0acf3dd09c8064c4b25b3268808bc6901ce727b5b3ec3c27e37914a7c1f8c92e5ce35de093d66cb6a2e98ad59c2252b
+CompilerSupportLibraries.v1.3.0+1.riscv64-linux-gnu-libgfortran5.tar.gz/md5/6c292cf98c6b4cbf10aeb4f0af383222
+CompilerSupportLibraries.v1.3.0+1.riscv64-linux-gnu-libgfortran5.tar.gz/sha512/1497789d918d633f319f89a04241678602d3b0f441ca6f8f6d756f6d1fba59d5eca54fd24183e39e9b956cd3c053afd747dc03a9a1e2d4819d26de3539c5eb07
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran3.tar.gz/md5/0aae7ac19dade024e0228bb1a3565edf
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/b779badad7e6021875b5df04793445b4056d84cc217f389f9496d8ca61af71d98a667ec29b912131c83319be4d6e82c59e7c3f409f302cc3691899f0e77edd46
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran4.tar.gz/md5/6fcb9749463a96504f1e23cd97695f60
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/66d4cb8237859234f8fd49461b5976a7f155e02fb93c765208701c43c041dc8693f3f8b868ba74bd28614586c0f5109a5b5aa0d0d69ac38732ad6d84d2635e04
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran5.tar.gz/md5/af01aefc789a0388df504abae68fc01f
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/444d1d54fb6ef95f2093894c685a4065e9708504b820bd9325bdf32619eac8b2972b1601e788ff8f1ee2759117b726c04c8bb395820359bdc737bdfdc3c4026b
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran3.tar.gz/md5/df1c55a47f9faebf09ea093d5d1ee344
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/03477fdf14d8dfce204999e6825d9ad94c2c78686d59b251f39d1bb357b3c9d9a74339c4d5f5e97420870d44f7bc2fceca637fbf7b862d0d1cf04a19a2a0b036
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran4.tar.gz/md5/8812418d84c2ac289d64a597d4968704
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/4da50ea541c13a98ae05c6ff67b8021496b871a205f994605f0230a67eb6c58ede55aa3a471df8bbdd5618177d34914186cfae106664b80a7fef795e5fe97e8f
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran5.tar.gz/md5/55bd8dacbc4afff6196494542ea13eec
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/2bb63e68f56818c0a3bb988f395ebcbe99af2740f806e324c385c1dcd7a5dbb058afd286fb6d85a1621ca668aba962a8701bef96a4547b0d22f92d9e4f4b51cc
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran3.tar.gz/md5/1e06592e53de4448b0712a79e61b9a51
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran3.tar.gz/sha512/cf92bbc217a51b9a18e07c5b5248ac5f59f92a7924c5fc566a1bd5b87a1acd36ec9c1d64871b273f80670596c05c1795cec91294f32f8dc1490633ea6d543037
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran4.tar.gz/md5/fa81135fc7e697eb8409baf3fcafdcb6
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran4.tar.gz/sha512/32ab98be0521f2451ce9b71c5ce7dfc70094583df80ed8db3990a2041594839f065abcf6c847fe6b8293eac3b3395da16ab3d24cf5e15c962aa320b28a6cd4be
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran5.tar.gz/md5/d6f0a90da74eaf2f9bf4f7b884231a2a
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran5.tar.gz/sha512/cbbbd0284799f78cf20a41f1b2d110651ee0460f0191d519d522a5034a31edaaf62ef130e7ef42c28882e224a4f997f0bead5b569254cdda7100b1f41e286b78
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/c525e70e726f0fc1c49deedd08ab6026
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/eb50d1443e1d13b892c141ac579b2e807f346d98a75e2ce9a0a23494c754b7149d1900046f5c39e324b48bfeedc6bee590a7e2c182e6f0e3c07b9f816fcb9d6d
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/9777c3216792efd8e8625f5f72442be6
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/215398a9c893a5298101d98a3cf3df1e59e6dd4b0d66b3cdcd9decd8725541ae33c30d1e391fb51d7aaaa33dc5911511257f7ee7e3ea6350a8942ae70fcb3ada
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/c7571567040d646935234b51c121745b
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/159900879d46eb2a2e45f0bfbf6eb7b03c1e28705d576ad712f67a3ae242e7e4642c08f3be181b9fbac659e1c76de6ca278ad3662fd15e8371adc7bf19e9e9b3
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/63187354746bbcfd43c11b8047595d21
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/7c004e5ee255a9cc410b2f8f8836d0dffae8f4e35552c57a74a9c2eb8dadd6f0966ffceb296fd61c5c0ad7a0ea25c80ee2d7bd80ed3ccf1305f236b64e2dad5a
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/64f5d316b2d694dbdb2c96e557482de8
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/e1b3be2e6e9d4ccae55ec131f6cd51a7c4391639365057f7c8ecde539c9f5fa4d73942cbc2d06c62f43c2e1bca0469862a9ac6dc064536400ec09f37a20e2b1d
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/d10bb5d9facb9428c22f920798876f9b
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/7671d0a7e1d965d0dfd56e3f037dbb47a2748cbff2656be26741e1b15687b3ba48bb44e7d43e005cd610257c94ffa8e71eb3e3ade772ee5c6f6aeee4535f04ce
diff --git a/deps/checksums/curl b/deps/checksums/curl
index 85974ba0bc8a0..24dcee75afde6 100644
--- a/deps/checksums/curl
+++ b/deps/checksums/curl
@@ -1,36 +1,40 @@
-LibCURL-a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0.tar.gz/md5/e8c53aa3fb963c80921787d5d565eb2c
-LibCURL-a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0.tar.gz/sha512/8e442ea834299df9c02acb87226c121395ad8e550025ac5ee1103df09c6ff43817e9e48dd1bcbc92c80331ef3ddff531962430269115179acbec2bab2de5b011
-LibCURL.v8.0.1+0.aarch64-apple-darwin.tar.gz/md5/f697b4391608c2916ef159187e0d0b29
-LibCURL.v8.0.1+0.aarch64-apple-darwin.tar.gz/sha512/41da87eed77ffac391a60a4af7fdc707f117affebe54960eaf43e3077440ce17d95fbe0f47de41bb1456e222e7a126d687fa0beb26cf98713b3472e9b3ba9e57
-LibCURL.v8.0.1+0.aarch64-linux-gnu.tar.gz/md5/9d3e7e7601ac21a587bbb4289e149225
-LibCURL.v8.0.1+0.aarch64-linux-gnu.tar.gz/sha512/67ac7bc108cc274ee5e088411dd9d652a969952892236d6c37a6dcd710a1887f9ff83df2c01ca0f5b16b2086852077d6c62ae7a13f7b9ac4b9e257cd1aacb0ea
-LibCURL.v8.0.1+0.aarch64-linux-musl.tar.gz/md5/bd2b62cd40b9e87fe149d842d4ff55ca
-LibCURL.v8.0.1+0.aarch64-linux-musl.tar.gz/sha512/7c6bff3dbe341e2a271b61e02767a25768b74631894c789fffdef580605d821518274a04d9441c9b5d3255b9a9297d0d35f22310dccaab367aa92d928f25c062
-LibCURL.v8.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/9effcc21c5074ef88ad54c8b6b7a3f8f
-LibCURL.v8.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/9327fc8e0db9edcf941548b0291e0bafe9b956e92f6edf47795ca961303a24ed305b30b09f29478a70149056411c4ca4652facbeca89c2bb3db41a6c97df14a9
-LibCURL.v8.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/9cb716973ec75e2a2fa7379201aad59f
-LibCURL.v8.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/3e4d22be628af7b478862593653a5d34c2d69623b70f128d9f15641ab3366282aadee96bc46ffacafa0dcbc539fbbda4e92f6ff5c7a4e65f59040948233eabce
-LibCURL.v8.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/95bd98a64034f8dfc5e1dda8fb7ac94e
-LibCURL.v8.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/6a7898670e71efd7f06e614cdf535cf390eb6def9e93409d4ce2d9811a8e1f892959c0f6ca8e370f49e215df495ee8f95e1b7d9f92e2708ca548344b6ef9cc22
-LibCURL.v8.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/42aeb569e80865377c65bba6cc84b262
-LibCURL.v8.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/fa46e52d8abd49e22636e48fb43f11be95bfdabbc13142e0cdaf4bb892ff982eb09abd9f3bf1c33ad374efc18ce21ab9968ed22c084411a55afddec0c459ab3d
-LibCURL.v8.0.1+0.i686-linux-gnu.tar.gz/md5/ded5d6d6580b979c372992c0fcf0aad6
-LibCURL.v8.0.1+0.i686-linux-gnu.tar.gz/sha512/f8a40285a25d61878e87d525bebcfe6e8c30cc5a40f38297de774c8e3191490c38716b3938cf81582afb23714a38405c20ed0241bcd3d41c68a5594822498b70
-LibCURL.v8.0.1+0.i686-linux-musl.tar.gz/md5/cd2bcf96545c783f5012611824169a93
-LibCURL.v8.0.1+0.i686-linux-musl.tar.gz/sha512/318dd3adcbf36c7979df9f394e78b7fb876dc60c9ec87d6b0edf47676c69df4dc3e73c07b2434b15c6e7497b385dc0fbf3fe7e3235b291a369f6f1d883c99645
-LibCURL.v8.0.1+0.i686-w64-mingw32.tar.gz/md5/276cc56eaf744ac0a5cec6c8c396ede7
-LibCURL.v8.0.1+0.i686-w64-mingw32.tar.gz/sha512/55cd7882ad976aeed1acaab7b1d59279ff3a0d2456d0bffa6240957ac6f152e903485f0ca05baafa5e97e0d1474cb204987eb9c94b1b2ddd657b52864a44c646
-LibCURL.v8.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/cfdc41294b2f4aa85bb8b27beced17ca
-LibCURL.v8.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/24f92091ab44a3be40228a9d9a57febc026f49b12c538c98e46a06dbcd679086332b773662126c68dbe4a60dd90a77c970c8a398237afbcf06c660fdbea16a76
-LibCURL.v8.0.1+0.x86_64-apple-darwin.tar.gz/md5/10a19a4f428951adbca7cfee91406498
-LibCURL.v8.0.1+0.x86_64-apple-darwin.tar.gz/sha512/28ddbad4310ed886c65edf28ccf01a5aba77fe11784740600aaec2aaa5c10c5e5915e297a4d72dd85bbc5304bb2027f5d18b95f13868b4bb1353fafed7bce4e0
-LibCURL.v8.0.1+0.x86_64-linux-gnu.tar.gz/md5/a68df850605cc9ec24268887e4b4ea77
-LibCURL.v8.0.1+0.x86_64-linux-gnu.tar.gz/sha512/f532dfcc84dbb4b92229a79b5629b16198061158e1f12d2dd37948cd0ceccc095221b5fc9a8e2de30de19727c727ee500c8ea4508722c677c7938ddef1c40350
-LibCURL.v8.0.1+0.x86_64-linux-musl.tar.gz/md5/023a2d8271173de0a02bdca8d1d55bbe
-LibCURL.v8.0.1+0.x86_64-linux-musl.tar.gz/sha512/e3195f917c250f31ce9669c304918b33664c5b03583f328929e73377f4feff525cedac42dc74adc9ba98a704630294a5697f07eb95ca520c6db4a67f0f83383f
-LibCURL.v8.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/ecd39a1cc45ee76751e1e3c5edf469d7
-LibCURL.v8.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/fa06afb1173bc23474f8f7992268ae9a0df52bc3c1af86d2b60da2cfff43371bb029b51debe638d81d8a1dd334a95dcd3c53dc12923220ad9b1336fcdad1ff8a
-LibCURL.v8.0.1+0.x86_64-w64-mingw32.tar.gz/md5/d9a735335e3603635a56eb3b86e6ea87
-LibCURL.v8.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/8fc6677b1be27a900d2a984cf9f9f4b3aa1555bfd732da2bd6553c28da98048c4c86216b57744d7156de94c522b013768e57f42e662845002e5bd9f730c818a8
-curl-8.0.1.tar.bz2/md5/b2e694208b4891d7396d118712148ff3
-curl-8.0.1.tar.bz2/sha512/24e84e922612ebf19341525c5f12f36e730cd21a5279cbea6421742d1ba61e5fa404f2add2e71d64e5692a1feabfa92c5a5d56501f161d1e157718fee467e0a5
+LibCURL-9ea5c5d6f5b88615d9fe23379b7f951787b99fd3.tar.gz/md5/a1e927b166ffd98c83436d21588a5d19
+LibCURL-9ea5c5d6f5b88615d9fe23379b7f951787b99fd3.tar.gz/sha512/be87705fd4232f59a1a39aef6594ca2b34823ec411ae2e2ee1463836263f88599c6aadc9c0d9c635bae399761daba38046169d8cc5e02e7531db104037e7177c
+LibCURL.v8.17.0+0.aarch64-apple-darwin.tar.gz/md5/e64eb49d23779ea99b9d0eab216cf35a
+LibCURL.v8.17.0+0.aarch64-apple-darwin.tar.gz/sha512/e84613740ec0d7f8fe1243f50cd999b9c8a3563f1d3d20b3e5774b92dcf46b95240734d4ee7f58aafd8256a3d159880dfec740fc311c18a4b510daaad4ff563e
+LibCURL.v8.17.0+0.aarch64-linux-gnu.tar.gz/md5/da4a75e8978b88cdf0cbe0672b8b8b9a
+LibCURL.v8.17.0+0.aarch64-linux-gnu.tar.gz/sha512/71a41bd1a24f296db646f0427e0058df4e7c1e1341b40a1189a42cf9787dac3f5be1c25245bcc56bc85c9c9fda0f6c7889757fb1cd58d9ea5b15ad21c93f315c
+LibCURL.v8.17.0+0.aarch64-linux-musl.tar.gz/md5/68d5efad085362562a6e2460ffba5e80
+LibCURL.v8.17.0+0.aarch64-linux-musl.tar.gz/sha512/a9df0e2ec54ae347c318ef6104dadc4e86f258790b97dad357b8d40091bdd131eb42aa81da38cd595f875f67f4caa01cc4db400c7fd36f0108aee75ccfc95942
+LibCURL.v8.17.0+0.aarch64-unknown-freebsd.tar.gz/md5/318de5d65d0d7f1991f1b1425582d300
+LibCURL.v8.17.0+0.aarch64-unknown-freebsd.tar.gz/sha512/e2491599a7673ca0a1eac716ca88dff470104418cf1e2612d8371e9d4c7c7a76da1da2ccf7d692c2dae412818de729f3e55f5cc20ca8e36fa190f3587e7c515d
+LibCURL.v8.17.0+0.armv6l-linux-gnueabihf.tar.gz/md5/9465bb7ed5acace0e2cfb847e8d1d6a5
+LibCURL.v8.17.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/ef116010ad762f1ca5e0e4beda766790f72001598af348589b0abb6f7326c20713b71e0a3afd036ed1e4de618964f40a394cb78a8661596cdffb7603c6d93b0c
+LibCURL.v8.17.0+0.armv6l-linux-musleabihf.tar.gz/md5/6ff9feac5bf4a15f91951ce22dc35fd4
+LibCURL.v8.17.0+0.armv6l-linux-musleabihf.tar.gz/sha512/7edb6594691a61c18a5884c47cb67a061557fd2d3e6832cd069fb3304e5001431eb42e5c71984035935d47501ec0253093a4fc4714fef43a9bc8c63f07cffea7
+LibCURL.v8.17.0+0.armv7l-linux-gnueabihf.tar.gz/md5/8dfe428d4f2cc8e704038a33a2c57fbd
+LibCURL.v8.17.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/5d6e89a8dce99eaab49d06b8c8f0141cb9130f1fe074bbe5f3305bf12d791be590ff1bbbb7a7e336e10a596729873e084cc3ac9cdf485c794fa5101d4b685b80
+LibCURL.v8.17.0+0.armv7l-linux-musleabihf.tar.gz/md5/6ec83f65ae02ea7aac49efbd24d321f5
+LibCURL.v8.17.0+0.armv7l-linux-musleabihf.tar.gz/sha512/0680728f165163d023cad2d753c94674c55b66219daa38396c0b36b8425f1302a229cc40cfddb900acdc78e07a43e15aab3264b4c411e9d3a7fde06a9e83ea0a
+LibCURL.v8.17.0+0.i686-linux-gnu.tar.gz/md5/ad1072c60c92da4439b472feb615fe95
+LibCURL.v8.17.0+0.i686-linux-gnu.tar.gz/sha512/702cc4936f6729b488b91f64f060b10c3ac8c2a542fc98c5fd237b3dfd28b88248d415fae0598845275c77f91fe4e3662141b835615541b66ecf357475610cc0
+LibCURL.v8.17.0+0.i686-linux-musl.tar.gz/md5/9d6baf6bd390d27bffad65030cad3e85
+LibCURL.v8.17.0+0.i686-linux-musl.tar.gz/sha512/567525f450e25157138776dfb685837f25d44176d1d84a7444bd928deec8ea5caec708e725832f4e58009a9c19b79670b2c8147ea88a0a0ee76203d8450adbb3
+LibCURL.v8.17.0+0.i686-w64-mingw32.tar.gz/md5/1a01f12a2913768001627ad8e6b71934
+LibCURL.v8.17.0+0.i686-w64-mingw32.tar.gz/sha512/9ab1cedad5709294f533c4b5a80be44f0340ed8c2769a6938e14d5ae7112cc2d901678e5974573ac48de6ef231f865e882a0dcd883c1ae86833b741e94376188
+LibCURL.v8.17.0+0.powerpc64le-linux-gnu.tar.gz/md5/6e362d68e569077497dddc4e85c2ff47
+LibCURL.v8.17.0+0.powerpc64le-linux-gnu.tar.gz/sha512/defd4043703660cef7255bbfe2060dcf7765a492210870733c5dfb342d1441aa79174e4d07ea9079792f7c47b6548d50894d57b2d7c65d92583a223e825c5a80
+LibCURL.v8.17.0+0.riscv64-linux-gnu.tar.gz/md5/356209913221fd423b29b589222aadfa
+LibCURL.v8.17.0+0.riscv64-linux-gnu.tar.gz/sha512/6194e30e23596972948f41784a6b4bf8600ce7389b399b70b3e8063c1ea8853db4753731adf71e4c957c5c2b781b7f15339259a213eedd78422b7282ae4782a4
+LibCURL.v8.17.0+0.x86_64-apple-darwin.tar.gz/md5/6f68a0461baa863512a5eb1c7ad2ba7a
+LibCURL.v8.17.0+0.x86_64-apple-darwin.tar.gz/sha512/6c0ee59c80c1d134d09413d5abf823735d60ff02b5dc7e45ff68353cc67d52e74779105f7965d2b7cf2637b56a0cefb9448f503fc0d732fdb6bcaa91bb765bac
+LibCURL.v8.17.0+0.x86_64-linux-gnu.tar.gz/md5/9d0eb74daf6e8f5853c7e74803d47670
+LibCURL.v8.17.0+0.x86_64-linux-gnu.tar.gz/sha512/e876c4bc5facace14e070392685760eb1d517d5ed3576a130fc983843651c4bcddd686fa3d7945e85b2ac22831ff358000e8278fa2c213faaae6565e854721ea
+LibCURL.v8.17.0+0.x86_64-linux-musl.tar.gz/md5/f6fb57bfb43d0f745e55ca1cd5cb7390
+LibCURL.v8.17.0+0.x86_64-linux-musl.tar.gz/sha512/8922d502b711e1de7513077e9ac106933a579c70359a52617be93a471a6fb881efb1c781b9c2bd630b46eed85257611a1459608185e7f404fef6afd9e6063725
+LibCURL.v8.17.0+0.x86_64-unknown-freebsd.tar.gz/md5/b7f5b785066be2469c4f662bdd2aec7d
+LibCURL.v8.17.0+0.x86_64-unknown-freebsd.tar.gz/sha512/ced5ac1c2703d1eafc30cee93f7e597f009e6d2afb0fb1b60b90f2c3a21be5628d5c6c5e9e5dd701aa7a14abb2ad74c7e96e8a3e0478d831c4c7cf922166378a
+LibCURL.v8.17.0+0.x86_64-w64-mingw32.tar.gz/md5/d583662c810f863072c3c4b17fb2cbdb
+LibCURL.v8.17.0+0.x86_64-w64-mingw32.tar.gz/sha512/233a7a3f05197819a989d9e4a251ec7e34f44ece7a1a545dc833e6f22c4f2a2f6b687496871108d4f387a6b8f63f25f97322146df5f2f324ea2e712405e56236
+curl-8.17.0.tar.bz2/md5/04b44270eeb98ffb91a9e6fe070d0622
+curl-8.17.0.tar.bz2/sha512/1fabd262de3f6829cf9e67591429ebfafc2528d4c35a13fda87bf7944fa5892fdea3c0c01a8705c92425bc6fedfc93ee75a7751aa6586cdfc2e815c8408c08ee
diff --git a/deps/checksums/dsfmt b/deps/checksums/dsfmt
index edadf5c01b1d7..9d5fa782663ec 100644
--- a/deps/checksums/dsfmt
+++ b/deps/checksums/dsfmt
@@ -1,34 +1,38 @@
-dSFMT.v2.2.4+1.aarch64-apple-darwin.tar.gz/md5/0299af20dae6bed519635900687f4aeb
-dSFMT.v2.2.4+1.aarch64-apple-darwin.tar.gz/sha512/5f20bd7602f09dcb23299d979372453db9a0e76a66129d69cc93c4b45a65ad377486f3cecb7093ff65307f515358420dc318b19eaf5945ff2fbfbe6886e95efa
-dSFMT.v2.2.4+1.aarch64-linux-gnu.tar.gz/md5/78a0fa53ad3db17f2849c744246a6bc6
-dSFMT.v2.2.4+1.aarch64-linux-gnu.tar.gz/sha512/b855bf3349f1ee33978d2c35999fe24a91ee17c5af345881e26351099cea05477528da9df43773d82e20917b4b1dd8c5590eb5ebb99cb5c9c425d03e38192e32
-dSFMT.v2.2.4+1.aarch64-linux-musl.tar.gz/md5/45829fa624e98f806e184cfdbb918a7a
-dSFMT.v2.2.4+1.aarch64-linux-musl.tar.gz/sha512/28823838bba7c1bb40f636835b1f7b15c49c5395a72a1261f3d5eb22c54b487b98bbcd7cd79f12297ffb37c62bfc3b69f0ce9b7036b5c2662d6305497d2e09dc
-dSFMT.v2.2.4+1.armv6l-linux-gnueabihf.tar.gz/md5/fdf55ed8c59b6cc0409fc8154777d57e
-dSFMT.v2.2.4+1.armv6l-linux-gnueabihf.tar.gz/sha512/8249e8a74ea958ebdd1e4e967d1020158c49643bc33855a5f0043b77026c250371d60b95e8370e0f67dbbaa380a7c02fc1c4eff7d49933a5f471393abc2a266f
-dSFMT.v2.2.4+1.armv6l-linux-musleabihf.tar.gz/md5/4814dcf836033065745f3532ceabeb33
-dSFMT.v2.2.4+1.armv6l-linux-musleabihf.tar.gz/sha512/75b8df698762e421cbef208cf7fda556f812f7e9c0481f83ddf38e468459ffa6fbdde86b5942f28f47225c73901c9863246f77eed93abd73052b6d0918717444
-dSFMT.v2.2.4+1.armv7l-linux-gnueabihf.tar.gz/md5/bff0a088b3bdf557dcebc48c2b260bb5
-dSFMT.v2.2.4+1.armv7l-linux-gnueabihf.tar.gz/sha512/609b45b7330bbf6e093fe6277dd14c9e23fd8c8c5e4db6a7275d29c6436f7602cd5069a0912a6ae3fd02d492a25c56400166b25c02b7379eb152eba3aa019dcb
-dSFMT.v2.2.4+1.armv7l-linux-musleabihf.tar.gz/md5/5193c1f1c2d056b3ffd265f4ef18965b
-dSFMT.v2.2.4+1.armv7l-linux-musleabihf.tar.gz/sha512/3e1b0af492a83c076087923d317291fd473670626599d3d03ed86a4515362a24610f3a9b2bd4b71c15bf86b03e44a11fd973f9f16d8b01bfdabbf7ee1ea7f4bb
-dSFMT.v2.2.4+1.i686-linux-gnu.tar.gz/md5/69b959d409030f86eefbe1a0d4196787
-dSFMT.v2.2.4+1.i686-linux-gnu.tar.gz/sha512/0ff871b96031c5f11e5c5fbb4fd35c8bf5e3b1fa5c43dcece275bc847a82b89f0f60db5b273bef2dd31572e89c98694fd1cbc2b442ee3a5fdf3b44e8707ef338
-dSFMT.v2.2.4+1.i686-linux-musl.tar.gz/md5/1fd3b4d5169be306b86cca9dfa6f014c
-dSFMT.v2.2.4+1.i686-linux-musl.tar.gz/sha512/d5e129abf6ff8a1077bb9de27fdc17c131f26d9c3707c189c02649290b50699f26e39230ef875fd172b54b1e28b1b595cbf835c6a8c36e1101951444e174f02a
-dSFMT.v2.2.4+1.i686-w64-mingw32.tar.gz/md5/2a6ea60fe134309ecafb0efd5364b186
-dSFMT.v2.2.4+1.i686-w64-mingw32.tar.gz/sha512/50ffad9c0071746acff16532b71d84d53c6f11039aa10167f49ac9293f4819a905f63f521c93a45daed5068df0fea1699a15a1a1d6c100dce0932cce4165442d
-dSFMT.v2.2.4+1.powerpc64le-linux-gnu.tar.gz/md5/060a4ed22e6e36a661b08c804a7272bd
-dSFMT.v2.2.4+1.powerpc64le-linux-gnu.tar.gz/sha512/f1367b910512b996c4e6bfcb4a99afc3640a4ad5ec8e6a2fc092d677c7eb68527800c4d248188a2cd7a2d427cab472a2fdb48978aeab39805a62f774dc58bb50
-dSFMT.v2.2.4+1.x86_64-apple-darwin.tar.gz/md5/d768332dd9902c4e3869a160fb002af3
-dSFMT.v2.2.4+1.x86_64-apple-darwin.tar.gz/sha512/db3e43ea9b884fb2ddc9585a224d85835ead169f5996ffb20930a8970893f9cbbd8b54832a4fc78745c7dcd7991f973e929965ffded32ae8289c0be68316e60d
-dSFMT.v2.2.4+1.x86_64-linux-gnu.tar.gz/md5/671e5a06c68d23854051c78268bfb9ed
-dSFMT.v2.2.4+1.x86_64-linux-gnu.tar.gz/sha512/0b8eb9e527cea444fdc33a3089684f9b85a8889370fe0b240718d32332523f1175e38a9b51fdabf4a38bad4a820e956baceac253001213b1fc3e7a5eabf8664a
-dSFMT.v2.2.4+1.x86_64-linux-musl.tar.gz/md5/65929d7a40fea8e8783cdeb77205ee06
-dSFMT.v2.2.4+1.x86_64-linux-musl.tar.gz/sha512/ce1b49365b764cf67ef4757f91078ea11afc6e07c4a776258a09f58c9ff84ece440d80714a491c1a21da06ea6a67bd27d2933b862dbfecf3c357f3c32ebb4fc1
-dSFMT.v2.2.4+1.x86_64-unknown-freebsd.tar.gz/md5/e27869ac4f1ea6774ade7d3b53cd301b
-dSFMT.v2.2.4+1.x86_64-unknown-freebsd.tar.gz/sha512/762571a5d5773c2d9780586603859272f48ed67d6c8b09cd95c92fd62dc9bb03c274b12c2c04e05f426c9a42edbbc8e33beba3c79865f2c49459eca2d588b14c
-dSFMT.v2.2.4+1.x86_64-w64-mingw32.tar.gz/md5/74e5c27ba9eb654b4e998ce73719e724
-dSFMT.v2.2.4+1.x86_64-w64-mingw32.tar.gz/sha512/59badcef14b06f14f8f5bce1c72de6750c8310ae18581e24b5d663edefe1bed3d120b4cebb87b53dc664411b62d9802f75aefde4e5236ada1dec740e6ef2445d
-dsfmt-2.2.4.tar.gz/md5/ed30e63552d62df48d709dde4f755660
-dsfmt-2.2.4.tar.gz/sha512/fe84e986cbf198172340adfac0436b08f087643eca3f1ceccacde146cbfd8c41e3eb0dfbb062f7ca5f462db13c386abd7c269bc0cbefc9a0ecf97a8a8870a2e4
+dSFMT.v2.2.5+2.aarch64-apple-darwin.tar.gz/md5/4d9e6a1ed07d1fe1557845b763224eeb
+dSFMT.v2.2.5+2.aarch64-apple-darwin.tar.gz/sha512/930e12a9b6ac82888f4122515a8a7cc3aa5d5363e500455b33c57efb7656041fe3f0fa68b02dd048b2a9f00abb56449415f1edf600ef09703aaed991e1d6f23d
+dSFMT.v2.2.5+2.aarch64-linux-gnu.tar.gz/md5/260e14855dbc7773a2ca906d58cc57f2
+dSFMT.v2.2.5+2.aarch64-linux-gnu.tar.gz/sha512/820ca4c6afde931e855b74015150f4ffbb513276c3fa7dbcc1ec8d34c02d4989fb7424a6e4f81f93d054811b5f54f8633d955b05acdb088387ee90f1c3b00915
+dSFMT.v2.2.5+2.aarch64-linux-musl.tar.gz/md5/7ddccbad6b5c9de4be187fe76637a0d8
+dSFMT.v2.2.5+2.aarch64-linux-musl.tar.gz/sha512/e3c225da00927096e3a6cd4abc681fba8f469cb74828e7054d4f5684d71dcb8e75c9a81f14fa10bfbb78f62f9567a31a92edcca8d797e5810a2a44a3fc17bc84
+dSFMT.v2.2.5+2.aarch64-unknown-freebsd.tar.gz/md5/d592c490259f45acef2308fd61046404
+dSFMT.v2.2.5+2.aarch64-unknown-freebsd.tar.gz/sha512/4f4e100b4cd5301e815f29f911b3ddba845a90247f1d641ea11153f5845c700e6f94ccd4a1d46fbb9e64a0c5698c5419c52560f0629629ffd665cf9ddec24e17
+dSFMT.v2.2.5+2.armv6l-linux-gnueabihf.tar.gz/md5/a70329e0a6c57009c6b6950fd34089f6
+dSFMT.v2.2.5+2.armv6l-linux-gnueabihf.tar.gz/sha512/4418c42165660adc050e872ef834f920c89ed6a0d2b816821672b1e862e947aad7efd023289da9bf05bb2eb9ec4b9d2561c403e2d5384d5314a4ba016b1f9cfc
+dSFMT.v2.2.5+2.armv6l-linux-musleabihf.tar.gz/md5/6ffc798b8a0c847fa5cb93640bd66ab3
+dSFMT.v2.2.5+2.armv6l-linux-musleabihf.tar.gz/sha512/94e5ae07d0b1420abd7290519bce6f77deae634bbb4df31e3f02416bf509e555a9b1c9d19dd77ca76a308c2b86d5c9d4718b9ef83c13167b88a8181d8ca7e73a
+dSFMT.v2.2.5+2.armv7l-linux-gnueabihf.tar.gz/md5/660d95aa08580ca1716a89c4d8b1eb24
+dSFMT.v2.2.5+2.armv7l-linux-gnueabihf.tar.gz/sha512/bc757a9f805047be5375f92c10a3f3eab69345a4ec5cc997f763e66be36144a74d414ff926df8e17b9d5a2394189269c3188c55e0b7c75a72495394d65510cef
+dSFMT.v2.2.5+2.armv7l-linux-musleabihf.tar.gz/md5/78c487049092fe61949d506637c713bb
+dSFMT.v2.2.5+2.armv7l-linux-musleabihf.tar.gz/sha512/03ddada4478f05eab7d2971b2deaf2cba91f084d7ce66fc8219bcb3cf5c308ea13959fed95568ca80f4ce11794e197092984919265716de8f2558e2cb30d94ce
+dSFMT.v2.2.5+2.i686-linux-gnu.tar.gz/md5/11463fd3981a8c143d7aed691d18d4e0
+dSFMT.v2.2.5+2.i686-linux-gnu.tar.gz/sha512/db946a4fbd8a3163b8b1c25e02bfc4a841da7d2532892a99037bd48ac98e1840691e8cc0127d9457a82667a0131e4826cb4e9d0a13f127afc62da4eb68af5a3e
+dSFMT.v2.2.5+2.i686-linux-musl.tar.gz/md5/a61405f72c9a3bba5718f078c68e61a5
+dSFMT.v2.2.5+2.i686-linux-musl.tar.gz/sha512/726f130bbbfd0dece4185b89a25a73f3b5b950ebfb7f86aea6e9cbcf9ae932e591d20b854de0b4985103dbf8b4b7cb3560661c5070af971cd2c1f3ec3e1ea7d2
+dSFMT.v2.2.5+2.i686-w64-mingw32.tar.gz/md5/3bc27ef8f26c7a26f096cf1d558d408d
+dSFMT.v2.2.5+2.i686-w64-mingw32.tar.gz/sha512/ea3608d3ae3874ea57a1a08f69abe2a1638bc340db71c6fe3c4fd5637d8c54943bf16b099a46817387c1ed4cb5f3cd1c0ff19ae8a4ed85dd555555821af06374
+dSFMT.v2.2.5+2.powerpc64le-linux-gnu.tar.gz/md5/fd8c73961ef7c82201e6d86e8bf4324c
+dSFMT.v2.2.5+2.powerpc64le-linux-gnu.tar.gz/sha512/1bd0ebd019cfc6f25f7ba007547c5ee297854655b93c55e90d8ead420875de5a087e38956693d5e901ff2abf667c72aa66fb34f587b82adf4b91b3d5d666b5c7
+dSFMT.v2.2.5+2.riscv64-linux-gnu.tar.gz/md5/5c4981c2c016436faf6f33fa8df4204b
+dSFMT.v2.2.5+2.riscv64-linux-gnu.tar.gz/sha512/9b56f0abbfb2731d23b99b5286b69c31bfc21eb14f49d88953680d5596c20c6b4d59520828f0a398915d56c82e169a36316f8e319dfe4e25a8e3f44f2aca4938
+dSFMT.v2.2.5+2.x86_64-apple-darwin.tar.gz/md5/e21e30097f1f02c5cc14cca3f73ce92f
+dSFMT.v2.2.5+2.x86_64-apple-darwin.tar.gz/sha512/48b19706189eabcab2c823e6143ae22f4a330abb239c7a952913fe9973c5f750d72b113af32a82a1f6124c534495b26d1f81ccab407d8d15ee459dc83fb8d3cd
+dSFMT.v2.2.5+2.x86_64-linux-gnu.tar.gz/md5/fa671f4ca14b171d53c8866d03f9162a
+dSFMT.v2.2.5+2.x86_64-linux-gnu.tar.gz/sha512/2e242a1448da0508ea88cc1a106f1e74f8d7e7562cd82b80d86abf9a8b454653ad7612e25c30ce00c23757e8a5b7b5736253b00a52f9473af6c5d4df768138f2
+dSFMT.v2.2.5+2.x86_64-linux-musl.tar.gz/md5/c648294163882ec539ab646542c74880
+dSFMT.v2.2.5+2.x86_64-linux-musl.tar.gz/sha512/9e96a47d660854b6517364f0db40a2f4e0e3b814499a0349f7cf550b1c8d04589fca5eb4a75bf34f36d1b5d1b2277b3e9a961c887092abedd08f438e025329e7
+dSFMT.v2.2.5+2.x86_64-unknown-freebsd.tar.gz/md5/5a9b811be74f02202c57588f35582cb6
+dSFMT.v2.2.5+2.x86_64-unknown-freebsd.tar.gz/sha512/8dc6cae5cdf038fd5647cf86b85a15ac082d35b4532340e145b7e091839079ff47371aef6c3012a67692e492622b4f84db8f0ccf46049cc94926aed5c9cd9fb4
+dSFMT.v2.2.5+2.x86_64-w64-mingw32.tar.gz/md5/386adb3b7593c222dc7a1060a1356b21
+dSFMT.v2.2.5+2.x86_64-w64-mingw32.tar.gz/sha512/fe2ab5021126807b37042e89a22ef9a869c6a0a028680df445773b2affd11c2b02148be07d53504ea3842bb38bb62fe039529688266c1cba3545a892bd4dc185
+dsfmt-2.2.5.tar.gz/md5/d22e476b52cdee7d5b90d2f289570073
+dsfmt-2.2.5.tar.gz/sha512/951e8669350f750b8915a819e704eae0a9b9c9518b3e3b9a1905f9ca0d25cc4c2486cb479e258a4a114e9c26ceb73a6c4e9f1cc02ed19173aeb8f20189754f6b
diff --git a/deps/checksums/gmp b/deps/checksums/gmp
index 0c45aa6a00ca9..949e4d738a472 100644
--- a/deps/checksums/gmp
+++ b/deps/checksums/gmp
@@ -1,60 +1,66 @@
-GMP.v6.2.1+2.aarch64-apple-darwin.tar.gz/md5/37a4c537149a1d6d7424833294e61dac
-GMP.v6.2.1+2.aarch64-apple-darwin.tar.gz/sha512/33dd86279b5b3b08496180c92971c2e7ef84715e9ed3a80071a178ee94de6231ea3cf7b4dd4fa7e0dbd0b386a1a04c4f6b28446e86cb92c100ebb295b2f5ee3a
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx03.tar.gz/md5/44ef76b228cdc4cf54e5d4b40a29034d
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx03.tar.gz/sha512/255a680c75d3e8ca542dffc47050adfce038e25a12a4131c18dc719d36b364c1a6488ee5743d1c5de445b4bc5ccbb932399f7071083d86fe5bd2befc521cfbfd
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx11.tar.gz/md5/0289ffc3621b5d62dc2f9e1b36c41f9f
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx11.tar.gz/sha512/f27b82efb5aa1d7eaaed7574d3312969664eac38f45cf40c6de13ca20b256d45481546fc1a402e6c04bee416c842a092a4e57b8df702bbcdc52f742555d07aa7
-GMP.v6.2.1+2.aarch64-linux-musl-cxx03.tar.gz/md5/9ff4c76804f59056b49a9bf5b6a02099
-GMP.v6.2.1+2.aarch64-linux-musl-cxx03.tar.gz/sha512/d86afa10bdc4e20fa259a17ce7d0a5dca2524b42752bc7d5c33e4323973587d234d4c420900deef34670bfce8ab8c6725e7edb45bfd3896b2644a42ec187dfd7
-GMP.v6.2.1+2.aarch64-linux-musl-cxx11.tar.gz/md5/cc9857a965afcdcbc2b378a368360690
-GMP.v6.2.1+2.aarch64-linux-musl-cxx11.tar.gz/sha512/c46bff9fdcbecc71c12914dadb31ee9fd5b4293cb45bda782200daa18d7f7e8b588e0c0f68a39c2fec7cc3d026bcef3620dae35ae2dd3acf2505dcfc084d11bd
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/5b3343367896e31b29571fe0d2b90390
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/65a501db63c386727aa336d6dbecdff0417628bc9ff7ac1b2161922246d94f8caa71b63fc3789ec6bb10aff03b96d5d0c22c37c82bd95d74e557df8de7e8a09c
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/cc04dda18412fa11f228e66eb5a03aad
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/49fdd452fe8f0129ee06795e04a0cc0238132f9d6f60a124dd2c7395fabbb71f005c16d95fdc00d87f8bf82b048cc54e07f162fbc38223c644854cc72c4d26b0
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx03.tar.gz/md5/675599595f3dedb8ca11151168da7110
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/eedcdc2230fd81d613d54be356679a97b59491f5f9a17c518239b5504c3dd5da15721d553f57ae21f1c55d253e808e7afd1d1651b8c666379c55c7b48f71217e
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx11.tar.gz/md5/9a74abbc46439ae8268ca926f0045691
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/6329506f7a886d0dd907b051d6cbab1bd0cd21b2d5715f55402bf9ad6cb1ae33e058931bdf6cba17658b0e455f9e4fb7f9aad274755a159106cfe1c4d1ea328a
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/8c20e0def927a202f2d23aed78aadb4a
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/b7f42efae6fce864c9e07714056444ba74befb9cc9a766ffe14e676240f23f83d3241b1bf3a8f4a282acbdc197287fffb27dadedf3055505ad63bb0b9df573c6
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/423a625816b3c52efa6021e76f6009b7
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/21cbbfd647d4a7c884344dc66e0fd83d654d22c3338669539e8eab515bdc6bbd772b47f949d28280789e4343e9a8d6319a73dc9e11c23da381b8a452ef7fb098
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx03.tar.gz/md5/7d67f981538d7a69ab1e458a54bf56f4
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/8aefbcddc326d4ef289dcdba8d3bd56a5f9656a7be30c83b4dbd9a0b8ee26a963c6a2f4294c94b8a8f2f712f1e1c9e17b8b9dcc9967d64294ca466e51656f7c7
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx11.tar.gz/md5/ed8713b71636ea75fcc0c9fbc4a8618d
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/d7f50d06a256fd9176d5fbf682ff599a5ffba62bb35fb37321ab41e88970921a9d9fa4531bd74e73e471c7e15fcae568d0536d3e32a2b2d7f81dc9cd1f0c039f
-GMP.v6.2.1+2.i686-linux-gnu-cxx03.tar.gz/md5/875f0bc57172788cb80ca2b80ff3065f
-GMP.v6.2.1+2.i686-linux-gnu-cxx03.tar.gz/sha512/808a3c2422b5168260dbf7a3875d5c8151e10b20a8ec87a66bf08f71ad7cf5de20fb7a4f3457c3ab2b4ffc9627764c743baa96f409629c70f2233ea7a5b628b9
-GMP.v6.2.1+2.i686-linux-gnu-cxx11.tar.gz/md5/09ae13f2a6a0dc317d2bca5700d2bf59
-GMP.v6.2.1+2.i686-linux-gnu-cxx11.tar.gz/sha512/9c986e2904247de937e30c05b29e0179986d7747b217468c59bc56af6d4c48d4575f24dace521dc8d66d84230eebd695fe0538972bfd744182ca940a23a9239c
-GMP.v6.2.1+2.i686-linux-musl-cxx03.tar.gz/md5/45f53fd95dd69a6ee6b43463976b5aa6
-GMP.v6.2.1+2.i686-linux-musl-cxx03.tar.gz/sha512/4df57d6c88f0ff86e0ee78da8f6ad02decf7a38884ae8c785c114e0e38e791b733e0d046c90712327c08645dd40b7f0391fcb3258cb3bfb8b6a62c59c27d6e83
-GMP.v6.2.1+2.i686-linux-musl-cxx11.tar.gz/md5/8b15988bfb1ba0543eefab73b3ac3439
-GMP.v6.2.1+2.i686-linux-musl-cxx11.tar.gz/sha512/e32dec7ded9bf6fc26033df83521481dde851c68d7cc45efaabeded7603417cdc5016de45f78a956b69aaed00a55a91aa8b1cd5bbe5431b01074dafce2c47751
-GMP.v6.2.1+2.i686-w64-mingw32-cxx03.tar.gz/md5/4138d0b5185f722aef4e1f215f381275
-GMP.v6.2.1+2.i686-w64-mingw32-cxx03.tar.gz/sha512/255d4ecf178b9440b667c56e542baa4422d731f83a67accd41b76268274c2344fbbf94979fddbbd1f6b5751bac2d228a8ef49a93365de78c1772146edd1b4845
-GMP.v6.2.1+2.i686-w64-mingw32-cxx11.tar.gz/md5/606b4b453af25ded1323aee9e085c132
-GMP.v6.2.1+2.i686-w64-mingw32-cxx11.tar.gz/sha512/8605b764ff6e5d81767432fd8e70c25c5ad76f2cac7c2b3d6ed0596df692300973803487c970a896a0a316d46de3e3cae31b21d4e11fe2961e228cd389da13da
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx03.tar.gz/md5/3fbd157df4ae738da6820b26fb75e75e
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/6e64c5c4e393c0001bd7085e627126134b5999c2d8df2fa9b72c9f9835d6b0f0ad440a2f58fe6537ec446a517f8df2667881871fce9b4d61c356d2b52080d641
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx11.tar.gz/md5/35608e3166278d52a482d7e19313eca6
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/a9550fe2b94e0e111a487159c0cd8fb6f1a21b8941ada7bb281572079dbbece921f80b0275bcc8f88117ecc72e7f8e93219350f5444b67295620db1aa9ae947d
-GMP.v6.2.1+2.x86_64-apple-darwin.tar.gz/md5/b5004a436660a2533b94b41c592b686c
-GMP.v6.2.1+2.x86_64-apple-darwin.tar.gz/sha512/b7b4dc8025ce304c5b899084f42c8f5aad5bbe03509bada17dbe6be952f98306729180a22b5d0a095692f349406db0b98f99f5e3f2be5f2165825e6f7f7d1813
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx03.tar.gz/md5/47ba899c9ac714a4594f999d845f45cf
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx03.tar.gz/sha512/99624ec71865d6285ab409ef54f4cf12ba246de6233de56a2fb9f70806574891539efed32e711202003570c157918fde8d53534c695fd5b8476e0d4e0ecd1bd4
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx11.tar.gz/md5/3b0c1258ecafcaf96e549f9b979420ee
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx11.tar.gz/sha512/b94d8f25d23597f96cc0cf0aebd1708755a8714ec4a481108add852b77addc737d3d8feba566ec410db019698ca2de826583b1a6105f0d2188679e7f72331df0
-GMP.v6.2.1+2.x86_64-linux-musl-cxx03.tar.gz/md5/061cfe5f416c1365e98d6b1ed89abd63
-GMP.v6.2.1+2.x86_64-linux-musl-cxx03.tar.gz/sha512/b6847f7ff599fa811851788a6ec6ce69ba02dbb3672d0a64b03b7056b35215536b059287709b3d207bc977094e994a7d744061b7ecf95886510285489bb89578
-GMP.v6.2.1+2.x86_64-linux-musl-cxx11.tar.gz/md5/81911acbc0c3607338c6455b1798cab8
-GMP.v6.2.1+2.x86_64-linux-musl-cxx11.tar.gz/sha512/e007441194abc5c80d9521a17e2ab9e6fb54f319571f4045fec2f7464ffaa99652d3252416c15d110dbf9deaad2c1dc94f81c638e28ce620cf543f554eb7d1e0
-GMP.v6.2.1+2.x86_64-unknown-freebsd.tar.gz/md5/ef7173194848e8d00d73ef05fc520f0e
-GMP.v6.2.1+2.x86_64-unknown-freebsd.tar.gz/sha512/512c3cf8fb951fe0ef7b1715b78202d0bdf5844fe33e16c4674a19e6335440fb5352d7bde71fce83e8e373efe43281d05b160b11657a582a9d3a0201ce97a189
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx03.tar.gz/md5/882c6749f217f5a691b744ef728ad089
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx03.tar.gz/sha512/53424ad8a9dcfb8e0e738d4521b2ab1c75aaf54668a54a76b8bcab2404308e69b531dc25b3dc18bc8eaa7ebd9e2914d6624c5d371e6c0ecb9e8d24aa575e99ab
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx11.tar.gz/md5/bcdd7bcbc69161744397d249a9c82e45
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx11.tar.gz/sha512/b7f8fb4f5aaf5034d4d2f60e29cc7b5e06c13d4b677af30f30831e1fc95925a575275ebffda36efcc09e29ccd78ba56475c1be3ad0627e28862057764f1ef74e
-gmp-6.2.1.tar.bz2/md5/28971fc21cf028042d4897f02fd355ea
-gmp-6.2.1.tar.bz2/sha512/8904334a3bcc5c896ececabc75cda9dec642e401fb5397c4992c4fabea5e962c9ce8bd44e8e4233c34e55c8010cc28db0545f5f750cbdbb5f00af538dc763be9
+GMP.v6.3.0+2.aarch64-apple-darwin.tar.gz/md5/3fb601fcf70024fcc40889cf1b958441
+GMP.v6.3.0+2.aarch64-apple-darwin.tar.gz/sha512/7ecc97c1f22287e9d7f3e8073e1cc3c6b3c75aa4a350a55a0b6f92c5bf60339b52f8866994f5973077e1026b9d3b10a7bcd71ec2abf25c3cc1bf6ca1041c3e73
+GMP.v6.3.0+2.aarch64-linux-gnu-cxx03.tar.gz/md5/10581945c01bac319c9c2d76f1f7052c
+GMP.v6.3.0+2.aarch64-linux-gnu-cxx03.tar.gz/sha512/3aa2799ef7783a4edb767a695bd2797776def8ce1b2dc471b2cc733371db9981d6c3f395fee2fb50b13c7ef74c1521d2787c29dc60a75e1b92652b94819b5364
+GMP.v6.3.0+2.aarch64-linux-gnu-cxx11.tar.gz/md5/c1f9765fccec8ec131faa5e31b7ac28f
+GMP.v6.3.0+2.aarch64-linux-gnu-cxx11.tar.gz/sha512/aebde82400544dc7a2aef0a4531cee78f9abcac9352dfd5d86472a70d704b281de03325cc609583169ecbe4cb64623ab04a3d7fff9cf24c70991530fe530aa05
+GMP.v6.3.0+2.aarch64-linux-musl-cxx03.tar.gz/md5/b1f771c79f3b380555c1c96232074523
+GMP.v6.3.0+2.aarch64-linux-musl-cxx03.tar.gz/sha512/daca9d3b4179e99da8e61f4010f5965718c79d02627e0b3272e4d20c34dac0d933408dc7d760a6d6fa09546e436c800ad5da4a1d34283eac9558f3d2f97bebce
+GMP.v6.3.0+2.aarch64-linux-musl-cxx11.tar.gz/md5/523c386457e9d48430b83f2db85ac10f
+GMP.v6.3.0+2.aarch64-linux-musl-cxx11.tar.gz/sha512/18155dd92641bf6240606d23b0d3cab16bb9b63b6034a7c7c61f3728fb48a6b710fdc21c6477145c015c648557e97003b0cc6087b4b36a691daecb87272cd51a
+GMP.v6.3.0+2.aarch64-unknown-freebsd.tar.gz/md5/7dd3f2813fd7e9e620a8123ae2340ab2
+GMP.v6.3.0+2.aarch64-unknown-freebsd.tar.gz/sha512/375b12dee41285b65b5cdd55f6b000a90fd431c3eeb788a928396a102594fb6fad257f2c4e707f11ce7d0e4d45bc82a77ac85d8a48fa0a42f969b48b8b2c1c23
+GMP.v6.3.0+2.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/7d23f84102362ec3974ca2d84da33c4a
+GMP.v6.3.0+2.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/51e419159fad75ca0ab12c31db29259be6fa280e66e2b980df4c99a0558615297741f633322978a409fbc071ec71834214b12d27d04ced0c043c569438dabd12
+GMP.v6.3.0+2.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/5f809ffa56ec07cc04e3c4cb155faad0
+GMP.v6.3.0+2.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/e394afb93a2c0aebe0ac7887bb2610720cb926256f0f5e7b05f3b1a805d3f7967fb97f4227ccec049df554c6cd1c4d4e9414fc4fea33f201204dd87e207e33ff
+GMP.v6.3.0+2.armv6l-linux-musleabihf-cxx03.tar.gz/md5/494564a56197edc5b8772c15eca7b117
+GMP.v6.3.0+2.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/a7bd8bc19a030c56edd4d91e3cff16d78d4a9c1c1bec99897e55cfaca7e14cb99cee32e220473e207b78f0b5e0c0bf188c679d1748c010380485fad4d89758c5
+GMP.v6.3.0+2.armv6l-linux-musleabihf-cxx11.tar.gz/md5/751c36d4975d6ff88eb968123afc1845
+GMP.v6.3.0+2.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/af471834ba32a970b4f358a263434b03e169dc48445aa5af412ec51e70668a41699f9c408d90f64b06dc9233360f70a03df859428fdc0d759e5696a3ae32f3f4
+GMP.v6.3.0+2.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/ea9c867ae191a29647e8ccfb67947bc6
+GMP.v6.3.0+2.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/d6c44c945d1ef869155be087320d7750be549399b186aad8c92bba32ff5312bf09cbb2fb57be91be237be7d50f8f6ef0aea67070f50c024e6f5302485f405d5e
+GMP.v6.3.0+2.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/ee5becfac9fe3c448a5de322ddee66d7
+GMP.v6.3.0+2.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/bc9bb2ad83644cf0b9f2bb0bfce28938ee6e82dbc0de74d1f411a8eb5ab96c5ec00c648019384ec07f34a469bd984d6c62eac1bcb803eaa013b6c85547ec3277
+GMP.v6.3.0+2.armv7l-linux-musleabihf-cxx03.tar.gz/md5/23962e487398f02c8d660724d88bf7f6
+GMP.v6.3.0+2.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/4c561053f79ed976a698c7382c5c94ebcbcd25ed27c939016bbb4af59948fd6bfb82e494e18fc7b4969941a7756c33afd2f177b3158f1b3d659215c25c958d2c
+GMP.v6.3.0+2.armv7l-linux-musleabihf-cxx11.tar.gz/md5/4734feb61dd3f2a4e6e395f9ac7ccf57
+GMP.v6.3.0+2.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/088a52c372681b4853fe7c4c70eb8625b58df6d79eea2a8982bd781458188930aa31dd9a121ff7a6d00cd8165f5d126155d7f931100aeff256b55a2281d44a90
+GMP.v6.3.0+2.i686-linux-gnu-cxx03.tar.gz/md5/e229a7a09d6c843f03028b036a54b786
+GMP.v6.3.0+2.i686-linux-gnu-cxx03.tar.gz/sha512/d92cccfdd7abe3ca5c6ee1eecfe3f7aebe875ca6b9f6257bf1181dc5ee9c873a930ebb2accc825596ee26dc45bd290a482f0405cfd7a3a1b0eb606f5ca897b70
+GMP.v6.3.0+2.i686-linux-gnu-cxx11.tar.gz/md5/01dbe43b15197cd39351dce91b3a62c9
+GMP.v6.3.0+2.i686-linux-gnu-cxx11.tar.gz/sha512/d6e7ea99f76e10b4f7733d8c7f4af3fb2fc09618510c222da1fb95e8b4c83b0aa7c5d2f896bb620546bf39041d6dc1b32ca74ddf5024ef1beb5526b374ba885c
+GMP.v6.3.0+2.i686-linux-musl-cxx03.tar.gz/md5/ce2f8d8b59228888cb7f03da0c1aca70
+GMP.v6.3.0+2.i686-linux-musl-cxx03.tar.gz/sha512/cc024a2ca4b4f042c19f667c4c3c08e3041d9b9ea0279cc668a3c0212103e86444abbdb323304e05c506b44b3c1b32a55f90c04cc32e9d26ac013336821c9ac1
+GMP.v6.3.0+2.i686-linux-musl-cxx11.tar.gz/md5/c37741b3a03ef2e705d45124eae25afa
+GMP.v6.3.0+2.i686-linux-musl-cxx11.tar.gz/sha512/c343ad2ea47d5775e6e4c50fd8d46745d39f3632f4ad479199f7583fd02b08a0126048625d3999b23a0534e4f5c2bf19d021436229689da7c794427102c7780b
+GMP.v6.3.0+2.i686-w64-mingw32-cxx03.tar.gz/md5/52a773a2111f7b1f938e78263c4608b0
+GMP.v6.3.0+2.i686-w64-mingw32-cxx03.tar.gz/sha512/6ef89b7eda8f0709315c1080e4d57810f976939c755f160e34b04e4c199409c8c707036fae5a73fca3a16813cb4ceff8daca38d1ead73e36d7ff23506e5bb4b1
+GMP.v6.3.0+2.i686-w64-mingw32-cxx11.tar.gz/md5/88b1ff47d913fa301c95e9e2aecf42ce
+GMP.v6.3.0+2.i686-w64-mingw32-cxx11.tar.gz/sha512/3d631ee81906627a8bd9194fa8f18b634467565c10e5e08db7d1a4b0943bae9391ae15a1c39533c9796edf24e1f0210d082e44dc7c1fbd9f93855f37e207da07
+GMP.v6.3.0+2.powerpc64le-linux-gnu-cxx03.tar.gz/md5/0b2c73cf7936500ce0f07577c4c76ba5
+GMP.v6.3.0+2.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/30e099bd6384e801fb28b4741810122f82ab0760a4e09d6ab28559b72feff278a48150579907cb2920a624fc85287a197743331bc1808353d0855c198341bfa1
+GMP.v6.3.0+2.powerpc64le-linux-gnu-cxx11.tar.gz/md5/f496279b474948435f836ba39291c708
+GMP.v6.3.0+2.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/c37d4fbba284af87fc16a24bf1fdfe80b42c84bd44f1859d1c9ee97fdbb489817b58db80a078729e19c8a5b8448f9234408a8e477fd15acf15521f3129e86acd
+GMP.v6.3.0+2.riscv64-linux-gnu-cxx03.tar.gz/md5/f07fc6751104a407ea2515fda3f26880
+GMP.v6.3.0+2.riscv64-linux-gnu-cxx03.tar.gz/sha512/435b375da747d2dfba06a303b55118471c6ef705cc65afeabb5a59477cc98aa9a956b31c5e8b571126f63d922498b9a66510f8f6810a60f6a4fabba5ec368cdf
+GMP.v6.3.0+2.riscv64-linux-gnu-cxx11.tar.gz/md5/493c24a7a7370f308f0da2955f40b5d5
+GMP.v6.3.0+2.riscv64-linux-gnu-cxx11.tar.gz/sha512/2e1a7562b759219d1a4283372e66fa1e907279c5b5feb8a858f6bd8de8b9c2ef3ddd09d5e812d93813fa781090574fd26d0cec85b211274db628681301a206f9
+GMP.v6.3.0+2.x86_64-apple-darwin.tar.gz/md5/c3bb785e10fe19cf1c47db6bc5e98fdd
+GMP.v6.3.0+2.x86_64-apple-darwin.tar.gz/sha512/5280896654e1c7864d770ecbfc853a1c7837c2b1dd369047432d10f831762a26fdaeac4201ca419d8bf7c545c107800b892660f4484b5eb87bfaf42c919fb640
+GMP.v6.3.0+2.x86_64-linux-gnu-cxx03.tar.gz/md5/0fd62bb914554c3cb6b5dc0f5ec0d330
+GMP.v6.3.0+2.x86_64-linux-gnu-cxx03.tar.gz/sha512/78cdf0cdcdca4a0ddc87755f4afdb8f290fa946b3c5541a3e31145f8bd905884d59f38e9f5ee4fe96ceaedaf90881af795f4e3ecf1be922103b838964da101cf
+GMP.v6.3.0+2.x86_64-linux-gnu-cxx11.tar.gz/md5/02f54f8895bae0d7a824374888300744
+GMP.v6.3.0+2.x86_64-linux-gnu-cxx11.tar.gz/sha512/83c865f6164400e56c28949c680cf92457daa270b745d89034e1bcc46af1eb93c96bce708561dee03b58162191f6448e4325e921daec11083bbc42dcf3a1ffda
+GMP.v6.3.0+2.x86_64-linux-musl-cxx03.tar.gz/md5/8f3f26422f8bd0889b5c2ecd22d97101
+GMP.v6.3.0+2.x86_64-linux-musl-cxx03.tar.gz/sha512/680beb99936433bc1c3367e85f3a4129c5a99d4c4031a1da919293819f6d3f1b85be801a2f48af352c47d7cb6f394534333f1a0d0404ff41899952d55c4b1f75
+GMP.v6.3.0+2.x86_64-linux-musl-cxx11.tar.gz/md5/7ec0e3e9125c14a20d6d0044036f0996
+GMP.v6.3.0+2.x86_64-linux-musl-cxx11.tar.gz/sha512/c22e6a25ec854f9c199d5e76bc1dbcbe57c4cc219eb2b5f24418729252eee1a5c1d3e8bbf5b62d148cb408595e96f448f68a29a9425a902952bee666b6f051f6
+GMP.v6.3.0+2.x86_64-unknown-freebsd.tar.gz/md5/6782d7fd0bd15c189c4a1753ee0fb0eb
+GMP.v6.3.0+2.x86_64-unknown-freebsd.tar.gz/sha512/04d7a95337e832f7ec228f160a09b74ed7908ef9cef1bd392555392a24ff63ce4a88b616b5426cd710dcb581e164bb94c04fe17f0b599adf3c3bc33106bcd886
+GMP.v6.3.0+2.x86_64-w64-mingw32-cxx03.tar.gz/md5/b4cb31e93c85cd453b7d8d392a365088
+GMP.v6.3.0+2.x86_64-w64-mingw32-cxx03.tar.gz/sha512/3bd84fa8f580b272eecb06077ef710ae8df661126e86afa2c901b298a2598975a07f840b922da0066dbf555f03376cba1b7e4915cd37617341fd420b6707276d
+GMP.v6.3.0+2.x86_64-w64-mingw32-cxx11.tar.gz/md5/2342842254e7b47b26836366d29d6802
+GMP.v6.3.0+2.x86_64-w64-mingw32-cxx11.tar.gz/sha512/fb12be14433763d9de689a5df222802cd79d5c990da9a53855fd2f6f8e663a9838b444a310318c059cdb4962eb87d0d4cc2b54d163cf82b09377339c8e45510f
+gmp-6.3.0.tar.bz2/md5/c1cd6ef33085e9cb818b9b08371f9000
+gmp-6.3.0.tar.bz2/sha512/3b684c9bcb9ede2b7e54d0ba4c9764bfa17c20d4f3000017c553b6f1e135b536949580ff37341680c25dc236cfe0ba1db8cfdfe619ce013656189ef0871b89f8
diff --git a/deps/checksums/libgit2 b/deps/checksums/libgit2
index a70a404ae6843..2339d381a7079 100644
--- a/deps/checksums/libgit2
+++ b/deps/checksums/libgit2
@@ -1,34 +1,38 @@
-LibGit2.v1.6.1+0.aarch64-apple-darwin.tar.gz/md5/62bb842de0ede8a7c2b119cfa7402a61
-LibGit2.v1.6.1+0.aarch64-apple-darwin.tar.gz/sha512/e5117912419fd73138779322d5cb84454c641aad87d0df7d44b5074c96576fe1ee3822dba18c8207dacc9bae2b74cef87353d5c519fb7fba8ea89c858415f993
-LibGit2.v1.6.1+0.aarch64-linux-gnu.tar.gz/md5/3f42f283a9f550841b285216d681f3d0
-LibGit2.v1.6.1+0.aarch64-linux-gnu.tar.gz/sha512/0a793bb239976946941af5794cb45cfd7d1d99b9aa125800aee9337bf9d9c5152bcad258f75d987a7af9b547ea906ee2beebe7b8d2c8cea111e6878df0eb3ea9
-LibGit2.v1.6.1+0.aarch64-linux-musl.tar.gz/md5/0f20cee604380bfa789334b5544b1cab
-LibGit2.v1.6.1+0.aarch64-linux-musl.tar.gz/sha512/86d7e6a64bf24f3e69dfa4383ed896c5d8a915e19f6f0351e8cf38361352347c827f79032fd8576ca9bfb94dc8db4704d35540ae67b46d671f44ab549c6ceb49
-LibGit2.v1.6.1+0.armv6l-linux-gnueabihf.tar.gz/md5/5c025b4c9065c0b481c7b0f6dd7666a0
-LibGit2.v1.6.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/5b1d3472df47462b3e38c5a5b3400d90038b1637a7f479e9fe04ef046849c14d12301328498429a9f290ff82b6343ccd9ae7616c5ff1d5fd83f35559bedf8747
-LibGit2.v1.6.1+0.armv6l-linux-musleabihf.tar.gz/md5/8015b63706e6d5826779f870681ff865
-LibGit2.v1.6.1+0.armv6l-linux-musleabihf.tar.gz/sha512/e3c8c46d8da8df409b2dc7c476da638da2c79974270390b84473ebefb66f26cf60647445c2b141f7b6cf45655de12404deea30731b812952fd9156acbd7344a1
-LibGit2.v1.6.1+0.armv7l-linux-gnueabihf.tar.gz/md5/74672b31da80507609e59b19448ec415
-LibGit2.v1.6.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/6c6365501abeffc7e796f3b67a139e93262dab1550ba5fe6ead179c0a9d32c62bab7b422b81524d7a367ca1032c7bfd2b3385155e364fc267f660dffa8eee39a
-LibGit2.v1.6.1+0.armv7l-linux-musleabihf.tar.gz/md5/057c22b3fc988a98551fc319eb080c39
-LibGit2.v1.6.1+0.armv7l-linux-musleabihf.tar.gz/sha512/edfb8c57aad5499fae88f09a17e905b4c009e2a8781727566321a858f3ed8a4bcb75b990ae5ad4ac57bcb2b01bd2dfbe0375b01a41405c161106881c8859aa78
-LibGit2.v1.6.1+0.i686-linux-gnu.tar.gz/md5/ecde35f4ca6b4a03f8491d90480f33b3
-LibGit2.v1.6.1+0.i686-linux-gnu.tar.gz/sha512/ca77a1b3c381be2286be9134d7adfde51fb38c4bc9dcb3f56cf1840809c40c484c843cf4ed8d77c538889e06cbef2e5d1b4468739bf761cc91c676a0dc5a34ee
-LibGit2.v1.6.1+0.i686-linux-musl.tar.gz/md5/1a56e7832761479fe911b8efd66b5b73
-LibGit2.v1.6.1+0.i686-linux-musl.tar.gz/sha512/e929261ba9564762d2b3c3191dde216caede5c436b84a00d08706a708436023430a9a762cbd94bf96e903a230c690ea28787ee08208d5b50e51d98e56587b30f
-LibGit2.v1.6.1+0.i686-w64-mingw32.tar.gz/md5/671a1c045725877e1a4f55b42fbb15b9
-LibGit2.v1.6.1+0.i686-w64-mingw32.tar.gz/sha512/5b0e78b5f5f24b7ee8c88d704bf58043626174d9e8e28226b72873f62d0ff6a6f87d6200adfd613e35c27f6d127d967f49a1f7ef26ded8d1b08c89589b59ce85
-LibGit2.v1.6.1+0.powerpc64le-linux-gnu.tar.gz/md5/4ffc17733025ac94e525f8d9416713a4
-LibGit2.v1.6.1+0.powerpc64le-linux-gnu.tar.gz/sha512/a382f7f15484426d6e913c9cd54facd63573650449f1a2d7b180f1905b79dc75280fdb48ff9e47ffc1ef70c9941d43a6ca35e21bc9746172689886fbbc9d65a4
-LibGit2.v1.6.1+0.x86_64-apple-darwin.tar.gz/md5/af4192c866787ce226fb7a6d5229bfa2
-LibGit2.v1.6.1+0.x86_64-apple-darwin.tar.gz/sha512/18bac55bd7bcd9ea66002c98717ef358710aa689c9bff63be77de1cce4db2082f023ee577060f6ed11e3830c2e751bf2adae1a9b232570a090031c5246f29edf
-LibGit2.v1.6.1+0.x86_64-linux-gnu.tar.gz/md5/d26008f39b244ab0caa804ae0365d69b
-LibGit2.v1.6.1+0.x86_64-linux-gnu.tar.gz/sha512/3d6068d2165c012ce66317cc0993c374df43cdb2dcd584ec7966f602062428d4f5e18d157c7aa19572affa1e9dcb0346105a01c64f8e5ac01546aaf7b5d99439
-LibGit2.v1.6.1+0.x86_64-linux-musl.tar.gz/md5/fcbfc9f15ffe3c4b2ea055e198795e96
-LibGit2.v1.6.1+0.x86_64-linux-musl.tar.gz/sha512/16bb30defa9d23e6025e3729e313766940105e02f00168e61bff81ae38beae9ae050a5fbf2307083b3cd89d364aa70a7042b94062160fda2174aaf5018f3e2f3
-LibGit2.v1.6.1+0.x86_64-unknown-freebsd.tar.gz/md5/a4fe2ed51c1ac1aaaa4f46a00714d85a
-LibGit2.v1.6.1+0.x86_64-unknown-freebsd.tar.gz/sha512/bba31901fcd8b2e69f43e9645c028be4c840b3d9afb4e92e64c9ea46c7fb44dfecf14f99cde586380ae0508fdb8402d3bbe93ec7b38219fe7806299b70576949
-LibGit2.v1.6.1+0.x86_64-w64-mingw32.tar.gz/md5/11ed8da2cb4c7ef924b50768cbb54678
-LibGit2.v1.6.1+0.x86_64-w64-mingw32.tar.gz/sha512/b39f12931d638809af27e446d7ac25b17bfd5c003cac89bcf83dc4c5331d14ec12b07ae410cfdc636546a3b1edf0f7d360bd194aa58c835261642b51edb4afd1
-libgit2-8a871d13b7f4e186b8ad943ae5a7fcf30be52e67.tar.gz/md5/831f4d09a6a22662dc0043063d0305cb
-libgit2-8a871d13b7f4e186b8ad943ae5a7fcf30be52e67.tar.gz/sha512/17ad43e6f80e87e8115cef89919475a9d9ea11d679e107221e6d82623577fc8e4002876a33c7eb2a52a47e3d8142976777bc79f81e4c4cf2da6adb1553d17b00
+LibGit2.v1.9.1+0.aarch64-apple-darwin.tar.gz/md5/1281d4cfc44ab26054b83355a053c42b
+LibGit2.v1.9.1+0.aarch64-apple-darwin.tar.gz/sha512/d4040d56c588333c222d66488944c2988a3ed1bb096ffe2bac67061b0ce20c44c5a803741fdc53a14a2523d0bd3fe5b3a6f247b0c366d0d3676c48bac44d3c05
+LibGit2.v1.9.1+0.aarch64-linux-gnu.tar.gz/md5/b2571cd9b20a307fc12c9d7979f42e81
+LibGit2.v1.9.1+0.aarch64-linux-gnu.tar.gz/sha512/52a2301f1311a8fb3c091b158e99d29267175156979779896d346cff5a5dac8b784f113793ca86bbd41f92c937c4eb64b8b8238df55ae95a6b1320cc3455b379
+LibGit2.v1.9.1+0.aarch64-linux-musl.tar.gz/md5/b53fa9546706075f41cf12c39c4969ed
+LibGit2.v1.9.1+0.aarch64-linux-musl.tar.gz/sha512/9d818007520f1780eff4d0b6b2d8c7c1fc09763f36280b55d3795895dfa908d87db1699e9dc265162f831b9b1f03879cd187c020f8082c77eda57320eca14ff0
+LibGit2.v1.9.1+0.aarch64-unknown-freebsd.tar.gz/md5/5975e2abf13b3cefbc8cfd3d1d24a956
+LibGit2.v1.9.1+0.aarch64-unknown-freebsd.tar.gz/sha512/05337be9595c5d94816d168cf245289f2c7b2717c49ea000ba872b582c919b327be3a8cfa877eaf7c541a38897db3f57e11a8f49df9b9bfbf8f3110b39389e0e
+LibGit2.v1.9.1+0.armv6l-linux-gnueabihf.tar.gz/md5/a4de1b1a529f130d85a75b56f82551e8
+LibGit2.v1.9.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/cfdb3aef71f88405e7ec3c31df1f84617ad01dd227d0d6dde9a7dcd5e4a59590437f4ae90b91b63ab14a5716b366e554ed8d2e403eca2bcc9f4a3f7b813d3df3
+LibGit2.v1.9.1+0.armv6l-linux-musleabihf.tar.gz/md5/6a5deeaf7dc9f4352dc018df12c1ec61
+LibGit2.v1.9.1+0.armv6l-linux-musleabihf.tar.gz/sha512/cec900743c68c1b2580d21e51abe1852ae44ecadd682e48300d0b656ce955f71c4706c1462c35d301e331d79c37330bb4abface88709dee8b4154d1b65ea1217
+LibGit2.v1.9.1+0.armv7l-linux-gnueabihf.tar.gz/md5/90d0cab30f58b6e1d75b006c12f3ce0f
+LibGit2.v1.9.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/735174f87677795e313b8338a50f97ce438058386ff594ae91f7b8bed3085e8579c4da5e305e3e70821e30cbf6d878deb1c161b695b1f19f58bc924663bf2949
+LibGit2.v1.9.1+0.armv7l-linux-musleabihf.tar.gz/md5/56c495c7cd018e3942ee5879d0e397fe
+LibGit2.v1.9.1+0.armv7l-linux-musleabihf.tar.gz/sha512/2ea7cafb587c2f2f693789f37c1962fdcd7d4815b064e676c876178f42f8dab400a91d48767bbfd1758646174bf060cad2978cd0cc74fa810955acb1e96b6ba0
+LibGit2.v1.9.1+0.i686-linux-gnu.tar.gz/md5/d8fd641059379d0a153aefc806adc12a
+LibGit2.v1.9.1+0.i686-linux-gnu.tar.gz/sha512/bbe3b2903c981037d71d841a1376d744068a5f1ac9d476f833e3f31d7c22284d35262f04ce0fd885d63102c0bb68558e29ec2743adafa6dc618b13b061be402a
+LibGit2.v1.9.1+0.i686-linux-musl.tar.gz/md5/c7e31adcdb6735e4a658e8c75d5cf213
+LibGit2.v1.9.1+0.i686-linux-musl.tar.gz/sha512/1f1ec4a3f1cbe14ff1b94ddb1d4c693b0fefec4bff51ab18920ed84eb12406d778af624baebf75a82c6411540cb62bbd3e00d975b38281a7ef51d7b7cfe255ff
+LibGit2.v1.9.1+0.i686-w64-mingw32.tar.gz/md5/d28b2507115bcbceab8fe2c1a3141e72
+LibGit2.v1.9.1+0.i686-w64-mingw32.tar.gz/sha512/42d3ba8a06207dbe4a9862981f18a08c77fbf7eb0da047b2631d4cc46352e6403c8016fa6490b6732165c8286e341bd167f1d4953a77607102fceec276f37eed
+LibGit2.v1.9.1+0.powerpc64le-linux-gnu.tar.gz/md5/ef21df7cd15d2eb71e9b1ed00d30a008
+LibGit2.v1.9.1+0.powerpc64le-linux-gnu.tar.gz/sha512/1b6b76d098d1b3bb48ece5c95b529e067b815aa95115f8df604a9169ed243d8b1b1c86598425f34193dce594dba19d7094ae95860ad7fed723d891df3cbf53c1
+LibGit2.v1.9.1+0.riscv64-linux-gnu.tar.gz/md5/52eb7e4c1057c704a5abf4a3526ff6f9
+LibGit2.v1.9.1+0.riscv64-linux-gnu.tar.gz/sha512/1d1378143ca1f1cb61335785b6d7db655d00da721ba5a5837c735b3dbd2a5be3207ee1e8bd2cfbcbd08e41f8aa61c028335affad7854cfe499b0210b5642c124
+LibGit2.v1.9.1+0.x86_64-apple-darwin.tar.gz/md5/a571908803d937204a84d0418c49284e
+LibGit2.v1.9.1+0.x86_64-apple-darwin.tar.gz/sha512/bc3f19cc22889f7d082363274e6fd70487e0e5c1c0e156098d6f4f1350b78b327f8d7891f3c5c476628e85649a44ef0b2ec51899afa8721fbb26363fd52c1586
+LibGit2.v1.9.1+0.x86_64-linux-gnu.tar.gz/md5/9823d346d9cfebb7d14f537e7d4f66da
+LibGit2.v1.9.1+0.x86_64-linux-gnu.tar.gz/sha512/5ac7a780861907554e3586ec2bd2109603af93ef3a6a3d8a9746af65013d3d745f00289dd24312b6b6bbe6154f0e66fd7925e4907873e712f3e765b960a36771
+LibGit2.v1.9.1+0.x86_64-linux-musl.tar.gz/md5/d12787cdb2407c9668dd1c9fe5cef246
+LibGit2.v1.9.1+0.x86_64-linux-musl.tar.gz/sha512/d7b0a08bc154639ca0c3f12ebf3b5d1725edc45a177ac27e91abb83add19c3e3b2c414c955477d9ace6ceeddb5e1f7257a56baccee8134812b5d47932162089c
+LibGit2.v1.9.1+0.x86_64-unknown-freebsd.tar.gz/md5/0aecfa9999032119fe0ee6aee13a55c1
+LibGit2.v1.9.1+0.x86_64-unknown-freebsd.tar.gz/sha512/b2b42ff038abf32fb4e3c15d4b4760578410218685fcd2761c91efb366d5430e2dcad267f51c28205040b2714dbe91aaa060742ac66b77690744367de14484d7
+LibGit2.v1.9.1+0.x86_64-w64-mingw32.tar.gz/md5/d610537c9abd49eec7d861c814226c2d
+LibGit2.v1.9.1+0.x86_64-w64-mingw32.tar.gz/sha512/5bea03406ef4d2bf89bd82fa6cdd8fa52fb3cc6efdc7e5016a1e3a33e60603be23f867630d7a5b14d2fe435fcade5229e10067a86945a5e46abaa20ce1f7d9dd
+libgit2-0060d9cf5666f015b1067129bd874c6cc4c9c7ac.tar.gz/md5/904b3287acfbd1098a447e50b957f76d
+libgit2-0060d9cf5666f015b1067129bd874c6cc4c9c7ac.tar.gz/sha512/e3f49faa709456d506993328177226932e3673fce3cf9ac0d0b0063206aa9ce0a6d780348c35f5ad28f59ae50f777c9f4fa54ec35af04582742cf7e36af09b56
diff --git a/deps/checksums/libssh2 b/deps/checksums/libssh2
index f8c1fc5da8d37..056d373656d98 100644
--- a/deps/checksums/libssh2
+++ b/deps/checksums/libssh2
@@ -1,34 +1,38 @@
-LibSSH2.v1.10.2+0.aarch64-apple-darwin.tar.gz/md5/b692a93b8f7e45edd5c5d397cd1d3725
-LibSSH2.v1.10.2+0.aarch64-apple-darwin.tar.gz/sha512/8863fb372e3bccb9d6ff7f33494754b1391f0081426d1a42a3f3da69ced9d1b6246b7aa84269b7ec2844c27991d5998a6c58561b277f86daa96b577dec57b514
-LibSSH2.v1.10.2+0.aarch64-linux-gnu.tar.gz/md5/ea2e202282947c4900d656c83ba30953
-LibSSH2.v1.10.2+0.aarch64-linux-gnu.tar.gz/sha512/1c3a035a2e711ad013acb460293e929d18b58d345f84f4a7cda93510dca5e46d466f08b2b96e5742c16c509dc6ed6b6e2b13399bbd1c48340326e3e6d73f9322
-LibSSH2.v1.10.2+0.aarch64-linux-musl.tar.gz/md5/9dd3b1813cd1cc6246b31c5bd2df538b
-LibSSH2.v1.10.2+0.aarch64-linux-musl.tar.gz/sha512/075d681235961838e2bb14f2034daa65909a40972cf638b44646464f66973d139de9af9d653073c24510cd40e5068d3a41c09f6ff12835a8278259d8530a6720
-LibSSH2.v1.10.2+0.armv6l-linux-gnueabihf.tar.gz/md5/17a33524ad9e6dfcf239b076803e3c84
-LibSSH2.v1.10.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/09ef31e3a6ee1e6055c6cf5c0f45fc2704701bdeb565b82a1896e8c83cc34c9dcf1b2e761d1c78b5d513291802a02ef3567a8a6d06d9d7ad946f4233e973c5c9
-LibSSH2.v1.10.2+0.armv6l-linux-musleabihf.tar.gz/md5/18db6a0698da98ec8247ab0f86e9a2e9
-LibSSH2.v1.10.2+0.armv6l-linux-musleabihf.tar.gz/sha512/11657f0b411ee81d84d3c02befd34d53d25da0485214f82e6ac9601fd065127e01ac74b592d328481a0ed7d04c231b37f4fec773e06d1c1f5186f6eb7eae57ce
-LibSSH2.v1.10.2+0.armv7l-linux-gnueabihf.tar.gz/md5/0a49a14c15176f32867f49a6e487de77
-LibSSH2.v1.10.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/aa81a8504529b90e6e212f1495e8732118f5d0eececd4a809cecdeb88b97f0ca4a1081e669660ea8513b15f71299854da9eb5f8352f099796df4fde33f89072d
-LibSSH2.v1.10.2+0.armv7l-linux-musleabihf.tar.gz/md5/0bb9d7c5c40d88e19a2d9f3178c4de64
-LibSSH2.v1.10.2+0.armv7l-linux-musleabihf.tar.gz/sha512/cf361ac498daa4c3a0b044171165756e54402f70c75fd5d877e6a24db9b6930c678c3f45c16850b9138e8f97cab9f1cb4ba82262e48fad269a36fc556215899d
-LibSSH2.v1.10.2+0.i686-linux-gnu.tar.gz/md5/651955e1c129d88d9dd0e9b048505db7
-LibSSH2.v1.10.2+0.i686-linux-gnu.tar.gz/sha512/b48d3e5eb36d4e0ef36bc512f1fe65a85e0ddf16dab8da8190f642d2460b6ab94677838f263ad280f37a8bd838c4c8283a3cc706247d4241d8760fde797fc163
-LibSSH2.v1.10.2+0.i686-linux-musl.tar.gz/md5/74b4741009fea8bb10ab3f6a44937fb1
-LibSSH2.v1.10.2+0.i686-linux-musl.tar.gz/sha512/13b64fd49d6a6b80dede0c049db871c5b22fec2f093adbe6699f2e467f7458684cd38093230c5d2fc837e500c8d1e86cc2966b9805a2ed7a705d50108a95803f
-LibSSH2.v1.10.2+0.i686-w64-mingw32.tar.gz/md5/20c1c7d0825cba67a0bbfa822348a245
-LibSSH2.v1.10.2+0.i686-w64-mingw32.tar.gz/sha512/2ac02ff310911998c07861493b699837168e43e40172372c33fc7769ff6eae2f2f2c65b10319c2f10316d34c519ec2fc5308b327b62caeb415ac7c5c692fa81d
-LibSSH2.v1.10.2+0.powerpc64le-linux-gnu.tar.gz/md5/6d180f087e415adbafa516c471315ce2
-LibSSH2.v1.10.2+0.powerpc64le-linux-gnu.tar.gz/sha512/23ea211b5d1219454c14316e59cb94195195abebd9a4e7a6812c4d824abcac7c5b896c460c2dae3511abaae7e0afb5ead40a5836e5d94ec0c3a2b8076dd29e3e
-LibSSH2.v1.10.2+0.x86_64-apple-darwin.tar.gz/md5/dff956043faefa3396fc31bddbf83b1e
-LibSSH2.v1.10.2+0.x86_64-apple-darwin.tar.gz/sha512/00aa279251a04684d968e413bd9f652c6740bf4a6e860ba9b999c8584561499f1f589ca2eb3f06a01c539a952fffb41787a37a6e514d689b97693a5a7bf4c18f
-LibSSH2.v1.10.2+0.x86_64-linux-gnu.tar.gz/md5/a50d390c1a8ea77d7d78c07a759fa79e
-LibSSH2.v1.10.2+0.x86_64-linux-gnu.tar.gz/sha512/c985018dbc79c9d41ca3df940fcda15d7f57f0a4e869268ab7c255b4fbc7aa9bd5088281258de22523c777bc9210ce3c9e1f0f76693c0575506ea840619c0306
-LibSSH2.v1.10.2+0.x86_64-linux-musl.tar.gz/md5/a8b647a12439a7ec9b2d5111a4fd605c
-LibSSH2.v1.10.2+0.x86_64-linux-musl.tar.gz/sha512/7790bf3b88513a026f3b58090c5b39b0544d873f7bee4f4c06fb23f513954f580ff2d5d552f15f8b725fd3264585390c33283906f1844cf20ce6d2eee54495a7
-LibSSH2.v1.10.2+0.x86_64-unknown-freebsd.tar.gz/md5/c4f76951ed337bc87d21993d1997dac8
-LibSSH2.v1.10.2+0.x86_64-unknown-freebsd.tar.gz/sha512/3db3c44e2100d00a537c87590dcd5493dc9ec74913ce903ce7bca697ab590417bd55ec6475a0a790ab49e9444d79ece539733ac25b0b82eaab735c8c96c0e992
-LibSSH2.v1.10.2+0.x86_64-w64-mingw32.tar.gz/md5/b58d05eb572149dbfec7b53a75dc4d6f
-LibSSH2.v1.10.2+0.x86_64-w64-mingw32.tar.gz/sha512/422fb36c6d7d3f7153b52547fb98d70268da1506a4957e2772184ba52bf06455f869f1c491d82852494459189830c68569219fbb2c795ddb49d7e8a8e95d6988
-libssh2-635caa90787220ac3773c1d5ba11f1236c22eae8.tar.gz/md5/d0b060310da22a245fc488a300288198
-libssh2-635caa90787220ac3773c1d5ba11f1236c22eae8.tar.gz/sha512/17770f8de4f081840e765d6f7842d562e20f46972fb53a15e3c9e10421f3654a559c5dd1dfbafd7b4a0e5205d800e848b9c9c26ec1d8fc0d229d5070b6d19463
+LibSSH2.v1.11.3+1.aarch64-apple-darwin.tar.gz/md5/87ba86e78421d6195aa6a46129ff61d4
+LibSSH2.v1.11.3+1.aarch64-apple-darwin.tar.gz/sha512/2b7129be9e9518337f59857474882a6a3448f358c931c66ab9f9ec67506c68d2356df591bd45925154844ca0d6f6e1f071d4c54d62039c5078b468fcb356187b
+LibSSH2.v1.11.3+1.aarch64-linux-gnu.tar.gz/md5/84c6eb68e7797038d0863513fa4e292f
+LibSSH2.v1.11.3+1.aarch64-linux-gnu.tar.gz/sha512/3012beb35fdf94136907037e8f5261a5cc94d102f461172321d4ed8f328da3789d521513dd03cb344c6fcb73675cd1d3ede606bf9a904fb811d40c43fd09d8aa
+LibSSH2.v1.11.3+1.aarch64-linux-musl.tar.gz/md5/5a49057201e779f3427b794b72bf07a2
+LibSSH2.v1.11.3+1.aarch64-linux-musl.tar.gz/sha512/62a812efb4ad7b24bfeeb3bb89756004215c09a1cc01e0530f14ce4b8546f1dcbbac18155ac2ce08311c1790d659b14674e3bb3549ff68d1209d52b5e5986fff
+LibSSH2.v1.11.3+1.aarch64-unknown-freebsd.tar.gz/md5/a5129167b7be7ac8ba2c873e164afb1b
+LibSSH2.v1.11.3+1.aarch64-unknown-freebsd.tar.gz/sha512/f8d9cc5098a3b401fbbe98a24efaca0ea46f533ecaf11dbfe8f7e7e3853363af19914de62bd1cb5a573e55e90d5c6074532ddc6d64723c9e235b277f438ce6ef
+LibSSH2.v1.11.3+1.armv6l-linux-gnueabihf.tar.gz/md5/5c59c95612bf9aa172e5d487002db509
+LibSSH2.v1.11.3+1.armv6l-linux-gnueabihf.tar.gz/sha512/5ba41e49365c2018d55c92e4a23d806ca9ab960a448593b08380527da21eec03f76cab89c34befbc56f4104002aa189d5cae6f655797f1447f395b51a14d40e2
+LibSSH2.v1.11.3+1.armv6l-linux-musleabihf.tar.gz/md5/4bc27411f0eddf82a787d1ede17ce2c3
+LibSSH2.v1.11.3+1.armv6l-linux-musleabihf.tar.gz/sha512/d6024b6949ac6867c56c66defbb99300a5661e0c73da6c330165bceba78d64063986c8851601ca74554b27944d5b02e3f602b1e71781097bbb8b12effc0cbbdb
+LibSSH2.v1.11.3+1.armv7l-linux-gnueabihf.tar.gz/md5/40e1a0d323969b96ab121eb5a3ecc874
+LibSSH2.v1.11.3+1.armv7l-linux-gnueabihf.tar.gz/sha512/67ce15a5b1c1fe0fd1096ed5d2d9f44d83983de11c1bc651f5914d70d387a99ee6bde31716031b758f48981e2a9383599f077f02d61a5c783ee6d09a7bf445db
+LibSSH2.v1.11.3+1.armv7l-linux-musleabihf.tar.gz/md5/9453c52394b1b06bd36c43e461a3b48f
+LibSSH2.v1.11.3+1.armv7l-linux-musleabihf.tar.gz/sha512/c62068ecb1b88dbd08a2474e0b93cd313bdc4e1407a22cd9164a73b2d897564f12a3c34f6fc492b264af579b00e9335a0fe1fa853fbe0fbb18d8335b77d409b2
+LibSSH2.v1.11.3+1.i686-linux-gnu.tar.gz/md5/992453b1c59033aefa8d98b89f491ff6
+LibSSH2.v1.11.3+1.i686-linux-gnu.tar.gz/sha512/ebf14565d614086c4401e1a997a3aacc83f8e499ed836c429f87c4f95f1c8409713fad47f1c34a2b1cd23f90de3daf14caafba3c82b15642018592213607c874
+LibSSH2.v1.11.3+1.i686-linux-musl.tar.gz/md5/e0cb0566c724c107f4f04619080d4c0c
+LibSSH2.v1.11.3+1.i686-linux-musl.tar.gz/sha512/af7d08dba5bb06eaf7ce8aeb12b69701d3c2829996a1c8e68510c106402a1166ad060687987df49365c26d30e8d6511c66f2a50ec810a493d2c090931ccf05a5
+LibSSH2.v1.11.3+1.i686-w64-mingw32.tar.gz/md5/c5e8d3145deb56d6df008522a5d3ea6f
+LibSSH2.v1.11.3+1.i686-w64-mingw32.tar.gz/sha512/47f3c36747d2e42a4c0669ef468d395078328235d30056b7d67d76bd737b5118c1bbc720aef455c4d9017e7b9350e8cc043ed28264ea8a9ecb6833ca517f82aa
+LibSSH2.v1.11.3+1.powerpc64le-linux-gnu.tar.gz/md5/12eba4aec5e320a4d0cf09225bca3f7c
+LibSSH2.v1.11.3+1.powerpc64le-linux-gnu.tar.gz/sha512/d6b8413d77d8af3d29b867692f6c02b63e793f5e8f17c4777756d247c8e602b3ab87380031aefa60f2c3ddae5a3c7a1f1c739439f149db34a32c79f32e08048b
+LibSSH2.v1.11.3+1.riscv64-linux-gnu.tar.gz/md5/cc11dd403ecaa373241b3c30cd16bd24
+LibSSH2.v1.11.3+1.riscv64-linux-gnu.tar.gz/sha512/d195ad62cde58dfa1e3546efd70a5f6b8a0762a2a933c637120aa71eda45dc6dc4213e87f9f401e2e148bbd5fb10638e429ae514bcda5bada0940c70cb7ff15e
+LibSSH2.v1.11.3+1.x86_64-apple-darwin.tar.gz/md5/f6e7cd35e16290b198c80c61a0fca5e5
+LibSSH2.v1.11.3+1.x86_64-apple-darwin.tar.gz/sha512/2c83814ef6ae78ec94a43f2997151dd7195c0a0f9cf456fcd3f780268bd1cbdd7ea55182fc5a1f8e1413c26889e54fccb01964b0b91dd4b925ecaa16b7df8d07
+LibSSH2.v1.11.3+1.x86_64-linux-gnu.tar.gz/md5/95aa96befc9f9007e6a000a95c1b7572
+LibSSH2.v1.11.3+1.x86_64-linux-gnu.tar.gz/sha512/6058dca6d933afb7fe5fc3374937b4432f202a5dfe3ebcc2f91f65777230c18d76801c38071f84f8362527ee08656a97f79da234ab5481265a7ccf29e94c20c5
+LibSSH2.v1.11.3+1.x86_64-linux-musl.tar.gz/md5/88b69d889d602bc3df420535dba30f9e
+LibSSH2.v1.11.3+1.x86_64-linux-musl.tar.gz/sha512/7335954124074e7df786989db86e86e3bcf41f503b8e3b27d6ac18032c8025bec26180bd2c537b23349bcf5673eb67245531479b939670e620faf5aa13c8c4ab
+LibSSH2.v1.11.3+1.x86_64-unknown-freebsd.tar.gz/md5/6d5f6e9455c35c5f6655cb4d46797db0
+LibSSH2.v1.11.3+1.x86_64-unknown-freebsd.tar.gz/sha512/9515d11bb5686e29eb5a37bbcb7ab07574da0869c82e5b3f0cf282bbc56792af31e6174521d58133968b997caa6db75ac9b195024144fd2c95fd1bbf689ebbf6
+LibSSH2.v1.11.3+1.x86_64-w64-mingw32.tar.gz/md5/e66cdac0c2d5ce2d160e482d780ad0c3
+LibSSH2.v1.11.3+1.x86_64-w64-mingw32.tar.gz/sha512/2dabb1e8da5ea496898751d5517ca37178e1a44c78c26fe33f87487a0b4acf7185f686ce8d6ea0e65e38a8fd56e5ff09fd70becda402a942b5e459707eb2a44e
+libssh2-a312b43325e3383c865a87bb1d26cb52e3292641.tar.gz/md5/06d5e2881ac023583c7fd6665d628a87
+libssh2-a312b43325e3383c865a87bb1d26cb52e3292641.tar.gz/sha512/5dee8cce91853eb8c9968d7453b1ad0c3cd1411901d288f1731b7c7e4adf380313f61c2a66eee0d3b89eba79e420e13269bb3738bcf2c59f0b88276aa785fa8c
diff --git a/deps/checksums/libtracyclient b/deps/checksums/libtracyclient
index 19b7b26c5461e..a212c415f09e5 100644
--- a/deps/checksums/libtracyclient
+++ b/deps/checksums/libtracyclient
@@ -1,34 +1,34 @@
-LibTracyClient.v0.9.1+2.aarch64-apple-darwin.tar.gz/md5/08881ffc565e099903e2e972a7f7c002
-LibTracyClient.v0.9.1+2.aarch64-apple-darwin.tar.gz/sha512/a9dcc7f9ed7565a769dd1080513eec7439cd7b03d68d48f570ac3f396769ef0a7f9b07446045ce6536b7e67860096eb150670256c311c0a77ac1a271dc4b4422
-LibTracyClient.v0.9.1+2.aarch64-linux-gnu.tar.gz/md5/d6a8dbc7cf871f772f848a5e515e6502
-LibTracyClient.v0.9.1+2.aarch64-linux-gnu.tar.gz/sha512/cb9b3065f581a956d318d71a94216ca0e57599262a12a25bc2e6fa0234505fed5a9cad9c2eb7ad30d7ffe9c4ee3d26d9f645887d3f7180d69d3bf1d0745b4f22
-LibTracyClient.v0.9.1+2.aarch64-linux-musl.tar.gz/md5/0d74193e3571fbd80eb7d9e884b47e53
-LibTracyClient.v0.9.1+2.aarch64-linux-musl.tar.gz/sha512/18821911a96129486cb12726018b33fde1da345228623b7f326b92ccfcbbbb2349d79a35e6fa7cb4b6cf9283a860e8ac44c40d6b54a4dc1ea4373b869491b6d6
-LibTracyClient.v0.9.1+2.armv6l-linux-gnueabihf.tar.gz/md5/6111f3b3c696d9d07139e137c2ec1d08
-LibTracyClient.v0.9.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/135139c221cb2d4d6000bd1a3771bd095e93487c7c649ebdf760ff5cb03f6ae003c33c2a36a52bbdf70e4c349195f78a97bc963336a36f33fcdeee33e4fc1eb7
-LibTracyClient.v0.9.1+2.armv6l-linux-musleabihf.tar.gz/md5/5b3154cc849b04bb3523f04fa4481b83
-LibTracyClient.v0.9.1+2.armv6l-linux-musleabihf.tar.gz/sha512/7f62a546c7cdbe3bb6a0a446980371ff340d5f530907a2434eba2a14bbfede8c740a763b0c68a252d7a3e357d9d933bcc6313919cd9bfa385715bc833be56cce
-LibTracyClient.v0.9.1+2.armv7l-linux-gnueabihf.tar.gz/md5/f6952d495c5b699226260e065cf2703c
-LibTracyClient.v0.9.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/5fdad7f8ce3a03ce05adb3deb6bc8347aefcc8a7fe0a30e0f7684fe233eb8520aca138e0b8a6cc5555a1f2316a6e36bca32cb5de37f2aac5c5deddfaeb0f8570
-LibTracyClient.v0.9.1+2.armv7l-linux-musleabihf.tar.gz/md5/84924c2e32b39ed580b553a968e97360
-LibTracyClient.v0.9.1+2.armv7l-linux-musleabihf.tar.gz/sha512/2b81834b91472eb9897abefbe77e931782e8c14eaf7193f22fce82024610906b6e96122610edfab29a9c844581cc4ee9124e330af9eacd97fb8759c1de421472
-LibTracyClient.v0.9.1+2.i686-linux-gnu.tar.gz/md5/9f243a9d10cd928d45436f634d020c27
-LibTracyClient.v0.9.1+2.i686-linux-gnu.tar.gz/sha512/c9512030d83f32942c7fefd598bfa597ce758f39d11bc9551fbf565a418a3000d23f899f1e9411cddebb3642efef8cccfa3cf3f629bcc11fcf50585e1a80549e
-LibTracyClient.v0.9.1+2.i686-linux-musl.tar.gz/md5/4aebc58f4c8101640d9e450338a4e12a
-LibTracyClient.v0.9.1+2.i686-linux-musl.tar.gz/sha512/2085b7c0658bb39dce9a9b511c209a348916ed8e50ed0d51eb22f7eac167b890a87d357e433e12eaf7034c15842c8d2893a0c128443c4f25fa90fd5ca83e256d
-LibTracyClient.v0.9.1+2.i686-w64-mingw32.tar.gz/md5/dc6f911f5cdd2789ef9f13a1a9882243
-LibTracyClient.v0.9.1+2.i686-w64-mingw32.tar.gz/sha512/57894c759db949dc669e23b7d5e015942630328a3dc754185a0f6bae95a66f0c3e65e365317bae95f3a216f4dcab681203e64dc8c9a0b5478cc9e27c9dab2e56
-LibTracyClient.v0.9.1+2.powerpc64le-linux-gnu.tar.gz/md5/a7429f900f7f0a14fa355186d99a24e1
-LibTracyClient.v0.9.1+2.powerpc64le-linux-gnu.tar.gz/sha512/e37ff8e8de9b74367b9f0d6fe49d983900529caf9c2c55d5ace305d5896c2de6589380247dc85017d959901864d4a163fe110e6d860340d949c6ea4dec50f47c
-LibTracyClient.v0.9.1+2.x86_64-apple-darwin.tar.gz/md5/b037ea1027e6466d5dd9c0fb41f65ded
-LibTracyClient.v0.9.1+2.x86_64-apple-darwin.tar.gz/sha512/81e2d00bd8eaa1cbcbd5c0ee4552028ccedffcc072beea3dc08ac3181677da93406e8dfc581a78434175fa5bb861df06848dd3012f8adbbb6dc72efcbb5094a0
-LibTracyClient.v0.9.1+2.x86_64-linux-gnu.tar.gz/md5/cfbe122083aeeea6bd7ddc4591b1cb53
-LibTracyClient.v0.9.1+2.x86_64-linux-gnu.tar.gz/sha512/e0418a0b50d64990d6f1b80dfe65e2360817211e1225c4d8d9fc9c871a95bbb62c2601c617adf1d55305518f5ba1dd05baee82f6934d0011269fab21b89336b9
-LibTracyClient.v0.9.1+2.x86_64-linux-musl.tar.gz/md5/f152ba78f2461fec711144ae66380c34
-LibTracyClient.v0.9.1+2.x86_64-linux-musl.tar.gz/sha512/f59f837d2beb4df4d3d65352a8c46261bb5a92ae88a62e2d1bfb7293184e02be982fbefe20736456719055e718a26003984224d0d74a0a6244dcc59e0d350556
-LibTracyClient.v0.9.1+2.x86_64-unknown-freebsd.tar.gz/md5/83c7b3d9438dd04d25573a386bc5c3df
-LibTracyClient.v0.9.1+2.x86_64-unknown-freebsd.tar.gz/sha512/f22d0d4f4171067bd1f56bb63dba801e262d0ed4809538dae907296d1a12817954ad759cdc9e61f710fff5802fb7371d8283d6df52c9e8faf6b43c713c23e371
-LibTracyClient.v0.9.1+2.x86_64-w64-mingw32.tar.gz/md5/83f3db14b65b8e9942c754bcdb430060
-LibTracyClient.v0.9.1+2.x86_64-w64-mingw32.tar.gz/sha512/8acdd1d407ae927925f33eb75891684d6687e3577d5f8ac77e738daedc8145462b1f044e31edd9e2db4507673a0abebcea19e171833042cbbe5a135b0c0435cb
+LibTracyClient.v0.9.1+5.aarch64-apple-darwin.tar.gz/md5/c6768380fef203f5310d2cb3ab6fe509
+LibTracyClient.v0.9.1+5.aarch64-apple-darwin.tar.gz/sha512/309216fbc5be52319241ccdd2360c8960ffecf50c963bf248ee6aab6a43a6cb4c3a703391c7d3e1b07cb077badce930673f85f28c0924975b75909a4654ca3a6
+LibTracyClient.v0.9.1+5.aarch64-linux-gnu.tar.gz/md5/20d6c3ef5032d458817f18aa7f92b44b
+LibTracyClient.v0.9.1+5.aarch64-linux-gnu.tar.gz/sha512/d2e341ff18bd06b57094f2356fdb36a3f9dcf56f3340b83006dc02d41d6e5040f145a23a06f86ccd1c9800e93fc4461ddd7902b6eccb93b78b365c824e5d764c
+LibTracyClient.v0.9.1+5.aarch64-linux-musl.tar.gz/md5/58cd88ab771a31326fb4e3448325a17f
+LibTracyClient.v0.9.1+5.aarch64-linux-musl.tar.gz/sha512/dfa11ac4fa5261bad557a244458b2f67c20c761c5d49d31488a9b944345e32e55f1a40515097b4f5f4896fe5046e3bbc70faa40ff4dd504d4d97cfa42e46bc33
+LibTracyClient.v0.9.1+5.armv6l-linux-gnueabihf.tar.gz/md5/cce5ef56b53255494b334df157e285bd
+LibTracyClient.v0.9.1+5.armv6l-linux-gnueabihf.tar.gz/sha512/1287f734669b7a7a6f43b6cf3b725de650e64edcd4284d51120e7371f91ca18c63d4981d58c28ce2da9355eefa5a02e0bc0b35303b096733a7842bc952da2df6
+LibTracyClient.v0.9.1+5.armv6l-linux-musleabihf.tar.gz/md5/60508f4f9c757678b8e52ed0342079eb
+LibTracyClient.v0.9.1+5.armv6l-linux-musleabihf.tar.gz/sha512/92d2d8521a8b6c6901f36ad71f59fa1b2eb58d6dd06da02140cc26b6c44d2741ddd0cd3107bbd3e70ca1713a33fda48d5288bec59304264d0afc20d4e0167a50
+LibTracyClient.v0.9.1+5.armv7l-linux-gnueabihf.tar.gz/md5/64563cbf8fe18fe84a2ec9e9fda19e6b
+LibTracyClient.v0.9.1+5.armv7l-linux-gnueabihf.tar.gz/sha512/b0158367834f32fd7449d45b983f37bcfed71727bcea3febf382a779265ce4b99845ae32f5c3969d83089bbf5e072680b11138a45b7b7030364ce341f285e220
+LibTracyClient.v0.9.1+5.armv7l-linux-musleabihf.tar.gz/md5/1d272a82701889fb695edde6bdde21bc
+LibTracyClient.v0.9.1+5.armv7l-linux-musleabihf.tar.gz/sha512/1539f06593eb769ba35ef198f90b9fa6c11d7146124f21e35c8fee577d8fcff3d71f4e38e6d26d84dc8f66b06a26a130f4bc740a201cb27573ec8e6816d489e2
+LibTracyClient.v0.9.1+5.i686-linux-gnu.tar.gz/md5/4e14c36ea3b1e54a26897767d4a010d6
+LibTracyClient.v0.9.1+5.i686-linux-gnu.tar.gz/sha512/4c1d9cda642a4ea3084b73c0b536edd0f33a216aa02c59f914ab428e0e97120ba3f81e7eb2262a2242884f553fd319b80ea7b013344e87e173dc1ee9b3421ef0
+LibTracyClient.v0.9.1+5.i686-linux-musl.tar.gz/md5/75ced50efcc6ce1c17064a3447933fb1
+LibTracyClient.v0.9.1+5.i686-linux-musl.tar.gz/sha512/969c41de91d288e4e37a36f990341c2f71d6788d93bb34eb6708532ea60bfa1bae6760871de33b42cca50b61dbf8028f639538f34ab9bebef2312d449c259f4c
+LibTracyClient.v0.9.1+5.i686-w64-mingw32.tar.gz/md5/0f8c6cd2e1aa738b340e10e1ce81732b
+LibTracyClient.v0.9.1+5.i686-w64-mingw32.tar.gz/sha512/76824a28f16650e14b06051f49a5c42cd8692dbcf808c2d47ab21ac986cf49e17508062ce5e0e71f2a689112098897dd7fcd47b819cab6967080b7e23224bf1e
+LibTracyClient.v0.9.1+5.powerpc64le-linux-gnu.tar.gz/md5/573e5c6aca49845b7e9e881c7bc8f755
+LibTracyClient.v0.9.1+5.powerpc64le-linux-gnu.tar.gz/sha512/7451731c6f0bf0ac08c966f925b0dd628f6f4c0ff7e2d65e8bd3d27231e3fbb2512169c917431baeca3fe8e66af4bbbea7ca4ba79dd7d3b9e1d43b08a580dc76
+LibTracyClient.v0.9.1+5.x86_64-apple-darwin.tar.gz/md5/b037ea1027e6466d5dd9c0fb41f65ded
+LibTracyClient.v0.9.1+5.x86_64-apple-darwin.tar.gz/sha512/81e2d00bd8eaa1cbcbd5c0ee4552028ccedffcc072beea3dc08ac3181677da93406e8dfc581a78434175fa5bb861df06848dd3012f8adbbb6dc72efcbb5094a0
+LibTracyClient.v0.9.1+5.x86_64-linux-gnu.tar.gz/md5/886d5b76711252176eaf7e41dd2db0a5
+LibTracyClient.v0.9.1+5.x86_64-linux-gnu.tar.gz/sha512/783c6469c586520c7f1206f5c2eae6a909a2dac7c3f726f439da91b0f3dde970fc4f17c6e3b54aa8924ae537b1b6a14729cd1305e6488c458db68ffe973c2ced
+LibTracyClient.v0.9.1+5.x86_64-linux-musl.tar.gz/md5/170a53b0c63e7e130bf3d71590e27193
+LibTracyClient.v0.9.1+5.x86_64-linux-musl.tar.gz/sha512/ccda22a897358d9eb55b500dbeb35c8d136f484a649c29066d5f2d0665d5b71526502237dbc374c2d018fa212896fa6a6c6903fc7c4833f814d1e60c3f12fa83
+LibTracyClient.v0.9.1+5.x86_64-unknown-freebsd.tar.gz/md5/58fdabdbbdaa7b4f53bc4249a7c57059
+LibTracyClient.v0.9.1+5.x86_64-unknown-freebsd.tar.gz/sha512/ad0d4b74c707da86fbcf7210cbcc9a3ebdef770ad0bf12f33e3476c2e455d48e389f021fc47e50b85d1c4741072396b71dc034cb95bfd38f775f4879d06b5998
+LibTracyClient.v0.9.1+5.x86_64-w64-mingw32.tar.gz/md5/cf5b40edd556a7b8ab23d28bf0eecb12
+LibTracyClient.v0.9.1+5.x86_64-w64-mingw32.tar.gz/sha512/8ece28dae598418c3435cfd323609e6e615ce8299370040fdd3a000007faecf87c5ffdfebcca80ea9b4180ad9bf13d3f3d65fed2fafe6f54d4a6068f9cae61ca
 libtracyclient-897aec5b062664d2485f4f9a213715d2e527e0ca.tar.gz/md5/51986311723ba88ac305ad2c1e3e86c6
 libtracyclient-897aec5b062664d2485f4f9a213715d2e527e0ca.tar.gz/sha512/f92c5bd71fd3e933f03e3535c0668a9afddc7ea19531aaee11b22bde09c57cc8a555f7f17f489d4221645fb6d73ecf9299d5bb11949d7529987beec3e7d91763
diff --git a/deps/checksums/libuv b/deps/checksums/libuv
index 709fba71f159b..e2ebad914fa52 100644
--- a/deps/checksums/libuv
+++ b/deps/checksums/libuv
@@ -1,34 +1,38 @@
-LibUV.v2.0.1+13.aarch64-apple-darwin.tar.gz/md5/1a58ce9dc88984c3b5f7df97af6cbf83
-LibUV.v2.0.1+13.aarch64-apple-darwin.tar.gz/sha512/2bfd482ac759ac88d885371854affa8e358a10fea6c7756e0d1b366bc82ecbea56bdf24ca634525fb2a6fc2b3a5c77b07a4c6dec2923d8bffe2bc962bd3e7f84
-LibUV.v2.0.1+13.aarch64-linux-gnu.tar.gz/md5/7f270dd1e3046c8db432e350dd5cf114
-LibUV.v2.0.1+13.aarch64-linux-gnu.tar.gz/sha512/c0debcf17b54ba9f1588d4b267d610751f739d8ff96936c9d5fb6d8742039f8736c63fa70037322705569e221d73fb83c03b6ba9fb4454442fffd3a9f1a1a2da
-LibUV.v2.0.1+13.aarch64-linux-musl.tar.gz/md5/07f56c32d5a2c12e6c351cf9f705631c
-LibUV.v2.0.1+13.aarch64-linux-musl.tar.gz/sha512/8037d7aa0cb06850f055fd19cebdcfcf3146dde0d12768a9669bf05dcab91fdf3708798203258cb3f452158bdec7faae41e6afbb0e60b21403e683db3e23a1c9
-LibUV.v2.0.1+13.armv6l-linux-gnueabihf.tar.gz/md5/5558a7f68c7c375f40bc64da59fef0ad
-LibUV.v2.0.1+13.armv6l-linux-gnueabihf.tar.gz/sha512/92ed6601cb5aa9a3ea2478a1485849543c9e847c8e85542e72f372a2d37c4c8b90f5ecb1bee1e462db31e1e8dba460f584b3cca9c833989c2b9ee404e355654e
-LibUV.v2.0.1+13.armv6l-linux-musleabihf.tar.gz/md5/de6bfb7f0c0468b79e8895f166fb6340
-LibUV.v2.0.1+13.armv6l-linux-musleabihf.tar.gz/sha512/7948d007171bf57b827b489f3627ac74df447f4d696e8226e54e95ef0c8eed5a5ddbf758fbad841bc367f78cd61e6a5899eb478003dca3a79cb494b38cab830b
-LibUV.v2.0.1+13.armv7l-linux-gnueabihf.tar.gz/md5/5be35de1d881f80981647c369b9b4ec8
-LibUV.v2.0.1+13.armv7l-linux-gnueabihf.tar.gz/sha512/458e5058ea4e794e0dc790da4c98569676056bac336df69762e8ccfec8f2955dcc55e8d090daa1b191c0ffa41392a04530c9bc28aa27cf411c1df2f1ba14bb97
-LibUV.v2.0.1+13.armv7l-linux-musleabihf.tar.gz/md5/8d034490da1ec2ef3dd3c69336177654
-LibUV.v2.0.1+13.armv7l-linux-musleabihf.tar.gz/sha512/7f595a8ab8b664d229cf6144e9ed1b5936ba8aaa70b92611ddb85bbe9046bb1b94d8417355a5abf058fb00023d4d56be0b2ddfd5dba896cd7b64e84e32dbfc5a
-LibUV.v2.0.1+13.i686-linux-gnu.tar.gz/md5/ccb9aba78456c99b8473e8ddd328f90e
-LibUV.v2.0.1+13.i686-linux-gnu.tar.gz/sha512/d382d90137db308933257a75e51d90988d6d07663b3b2915478547127d32f73ae6cdb4575d5ee20758f8850c7e85908fe4710c053cb361826621f22bc5b6502d
-LibUV.v2.0.1+13.i686-linux-musl.tar.gz/md5/5ade48f16aa26bb68dc046d285c73043
-LibUV.v2.0.1+13.i686-linux-musl.tar.gz/sha512/f5728a5dc567268e59aa2697deb793ae427e11dcb6796c577e3da3ac24225ece5d4a6c4f903d4a7b184d3c3a3c8c1586c34b97e4a75de0a4e23ace720020fa8c
-LibUV.v2.0.1+13.i686-w64-mingw32.tar.gz/md5/399d6fbe54dcfb2f997f276cd38fd185
-LibUV.v2.0.1+13.i686-w64-mingw32.tar.gz/sha512/55707e02a4b5bdf9c94683dbaaea1cac58f7735d5ae22009c219ea61ddfab1fe19b9bc6e830fc32207efc588c27f92770d2441b972f351a1bb3fdbbf5671a58b
-LibUV.v2.0.1+13.powerpc64le-linux-gnu.tar.gz/md5/26656d4eaae8739099c55054bad54f57
-LibUV.v2.0.1+13.powerpc64le-linux-gnu.tar.gz/sha512/f85f8cfd91e7b1b02b073931ef9a3bb05620641d18ada039744a92b8c40e5a3de8d7c5efa7189b88baf1eb11fbcf9e6d16031b86e40f99f1b7cfebb0f5c5adf1
-LibUV.v2.0.1+13.x86_64-apple-darwin.tar.gz/md5/c7da6b91394a20c43acdf6f680cb62e2
-LibUV.v2.0.1+13.x86_64-apple-darwin.tar.gz/sha512/238d22bd299ae3b0dfd24a5b38d6d0d07b751fb301487a2d1d2f5313ae3596f33492388ea9fbff549293787505fc527e174ebcd4068f1bda43b40bc19e016d89
-LibUV.v2.0.1+13.x86_64-linux-gnu.tar.gz/md5/8c8913068263257cce5042b725918e0e
-LibUV.v2.0.1+13.x86_64-linux-gnu.tar.gz/sha512/a848381012d5a20a0c881f5835e479cfff811928ce508cc57041d69668782f2135c14c7e5388e7dbf693ae57aa1825d911f6f450b9e909cce45487b03a581a23
-LibUV.v2.0.1+13.x86_64-linux-musl.tar.gz/md5/16747c066b6d7fe56850c77f66ea7478
-LibUV.v2.0.1+13.x86_64-linux-musl.tar.gz/sha512/833a02f9191edf3b56f1e02f5671f22de6cb27ec3c9f770530ec95d8da7ba0b9c05bcdf6b094224ea8e43ba70918e1599f3237bd98900763daef80c327d3d2de
-LibUV.v2.0.1+13.x86_64-unknown-freebsd.tar.gz/md5/71f7d9d9234a0623c4b2ee3a44089b62
-LibUV.v2.0.1+13.x86_64-unknown-freebsd.tar.gz/sha512/e73911c3ec35a2201d42c035ecc86e8bd860604b950cb1b7784ff49e27ef5ac9b1da09b59d359ff25b093b87593a8305105bc43711c12eb9654972e280c26d3c
-LibUV.v2.0.1+13.x86_64-w64-mingw32.tar.gz/md5/471d20fa2eac6bfd5d7cdb1b7f58c602
-LibUV.v2.0.1+13.x86_64-w64-mingw32.tar.gz/sha512/3f5ad55268184227378ddcfed0146bf0386c8cf468bc53a348d21195d818db4db768be61fd23e1ee2ecbb52f073815884a04a923d815b9b5992825d144c0633a
-libuv-2723e256e952be0b015b3c0086f717c3d365d97e.tar.gz/md5/d2284d7f6fa75d6a35673d22e1be058b
-libuv-2723e256e952be0b015b3c0086f717c3d365d97e.tar.gz/sha512/68d6ab740945b9ce3475118ce3d186fb67d7e8125784cc0c827df23d63f50c40c0261ef37365d8c11ab9462a8dd4e2e6b19e91e3c84b64d8fb84fd3894afc4ac
+LibUV.v2.0.1+21.aarch64-apple-darwin.tar.gz/md5/0db3963ae84aa07543f50fb24968c3f8
+LibUV.v2.0.1+21.aarch64-apple-darwin.tar.gz/sha512/85fff3a18800ba949c2abc21f32245fe3560fef7162fd538b7d5d262b058279ce018c4d27fcf47efb9f71850629fc2ed14c89ae22b3ae647461b77858b6e1eba
+LibUV.v2.0.1+21.aarch64-linux-gnu.tar.gz/md5/cf8054da92a93e8a2d22f8722719bdd6
+LibUV.v2.0.1+21.aarch64-linux-gnu.tar.gz/sha512/710d3c6ba80ea5d7696b4e2c20ea5637aba5ede506ffbe8a2fb4118dcb4311339d1b625906b000beaf69810f0139002132d82bb38783d9e798a5754f514642ad
+LibUV.v2.0.1+21.aarch64-linux-musl.tar.gz/md5/a34a77f9c07081e4c9a0113b2d69635b
+LibUV.v2.0.1+21.aarch64-linux-musl.tar.gz/sha512/330791294246b3a726575dafcc6f506746ef0928563ccca60b497bffda2ab3f521694e29fbcf5bdd859ff2e0b10926e8ea783a2f90f8b2c8f2708401f413cbbe
+LibUV.v2.0.1+21.aarch64-unknown-freebsd.tar.gz/md5/f196359888b9e23cb499de4fc03a58f8
+LibUV.v2.0.1+21.aarch64-unknown-freebsd.tar.gz/sha512/7d534c6c1bfb590babb428ec01f5b65fdbba630f06cc746020ee0e6a302d0872175d53f0e97daee0c84895d1a4e2dd4f84530e203ba04fd1dd5ca5c6498f66ba
+LibUV.v2.0.1+21.armv6l-linux-gnueabihf.tar.gz/md5/48d9abf58af13a3941fb5b520e909014
+LibUV.v2.0.1+21.armv6l-linux-gnueabihf.tar.gz/sha512/d39fa5dc8ab4271cc3d0b6e241acb3f8d5f3a63ab800c69c216b7016e3afe59d46408ceda16e8e707e023b59e477ed3376745eff436a5c39fcd87a14b5d43940
+LibUV.v2.0.1+21.armv6l-linux-musleabihf.tar.gz/md5/dbf3450984699d816caddf994ccdce89
+LibUV.v2.0.1+21.armv6l-linux-musleabihf.tar.gz/sha512/2918d80f0fb78dd721b802b338ec460a802bb792efc91b13d63a539955730d88e1efc0d1dc5ec67a7ffa7c90977eab11f28baedb09b603b19f32c91a57defe90
+LibUV.v2.0.1+21.armv7l-linux-gnueabihf.tar.gz/md5/7c90f161d81d65da8769d94672783913
+LibUV.v2.0.1+21.armv7l-linux-gnueabihf.tar.gz/sha512/accf0a35db6296d67ebd471819d2c2152cf9f79e8e485d61fe894535ad58f55819b0caec0c72b89acba9e9377e1ce024f5513f66cb8b2f60ad1a78bd537229df
+LibUV.v2.0.1+21.armv7l-linux-musleabihf.tar.gz/md5/afd4da4b9586785fbb0f17cf43bfc72d
+LibUV.v2.0.1+21.armv7l-linux-musleabihf.tar.gz/sha512/be90ffc0e7874ac59a6bf2e62c52addaa4165d6acff3799a595cd4bf279ce85d3cfcdeae572ea5f76287cc1ad422e6814a2c170f7bbd494267e0922354e2f025
+LibUV.v2.0.1+21.i686-linux-gnu.tar.gz/md5/abb811a975e90866b88b909b1c5210b7
+LibUV.v2.0.1+21.i686-linux-gnu.tar.gz/sha512/2eaf7913bbbd720a65d1b39669c4215f91c80f8aa10221214cf9c8eceb9bbb735720293470c7b6ff38ad7db5f5942c6abc4850c2a96413cde63ac06d324b49e6
+LibUV.v2.0.1+21.i686-linux-musl.tar.gz/md5/07f61613f5faf3a123715ec5bc20c9e2
+LibUV.v2.0.1+21.i686-linux-musl.tar.gz/sha512/89ffec766bdc1f789da0228c7fe875bfeffd3465a9467ee7fe318539bc327a8c81841d17fc5201c63186ec959bf422d270b6d871d4f4bdc84257d1d846b060a8
+LibUV.v2.0.1+21.i686-w64-mingw32.tar.gz/md5/002f3c6da91ab5559abb79fd0538b663
+LibUV.v2.0.1+21.i686-w64-mingw32.tar.gz/sha512/b610755eb75b0fed77df70dd316b481e0bdad77d6469b72b71ca7195b306426c1f3bc7687e43ff18282f127249584d87f5dd3b0839db3ac98a0daa4bb8ff5e85
+LibUV.v2.0.1+21.powerpc64le-linux-gnu.tar.gz/md5/2e44778998af4e4a1d4b6f35aefc3553
+LibUV.v2.0.1+21.powerpc64le-linux-gnu.tar.gz/sha512/3babe8f522ba15172eb80044c0f6586bb15a8061c6bad5afd0d9218827d96ad5624f67f3c9dd7cba3a24dcba82cf90091c4e2b29f6aecb415ec719215b51d2d9
+LibUV.v2.0.1+21.riscv64-linux-gnu.tar.gz/md5/6f3f085ac810fde5c5b8e33d7ccdc1c5
+LibUV.v2.0.1+21.riscv64-linux-gnu.tar.gz/sha512/61c2ac7627efb7219fccfe52c55a60fc8da8d40d564e0a041a7f2bbd6c20a6e4b20288f8f284340cd04b6b79063362b1a1d1bcaf64f744fd5121e402b81c74a5
+LibUV.v2.0.1+21.x86_64-apple-darwin.tar.gz/md5/78fb85991934037c9873865bbe7a67d8
+LibUV.v2.0.1+21.x86_64-apple-darwin.tar.gz/sha512/3dbd58b7c3fc627285d9034fcb34603a2ce2f980dc8c6da685fbd434009bb50b938f954d8a7707f96bba2edeb278216142f50d60393b8813f97233731346ae84
+LibUV.v2.0.1+21.x86_64-linux-gnu.tar.gz/md5/959082a092668c8ee1f4a9ac6996ca2a
+LibUV.v2.0.1+21.x86_64-linux-gnu.tar.gz/sha512/84b9502202fa17c8f2c94f3f58e7335091666dceab3a54de168673bfff7da68549b6b90a34349178894768e3b8ebb67be46cb5e194e722031b877f468d5c88a8
+LibUV.v2.0.1+21.x86_64-linux-musl.tar.gz/md5/c16a4f691859369b53b517934bfc0c31
+LibUV.v2.0.1+21.x86_64-linux-musl.tar.gz/sha512/47331a443bfb01972cd857d3b7164f60f9faaeee07ef14e8fdefdff433909be9e0af23ab2e1b57ac53bad43e115d665a686b1681291d980e152082d2420c9acc
+LibUV.v2.0.1+21.x86_64-unknown-freebsd.tar.gz/md5/1a7e29761a10949e66d98d5796ee19e6
+LibUV.v2.0.1+21.x86_64-unknown-freebsd.tar.gz/sha512/34fd8166d873d1cd77c424710b4bc95ed8afef60f49369ece3a46361cfe0d9e74eb28f0ee75071f72c9ea697e02cb2305c2ea4400e734f70b7d8d4d0c4f04edf
+LibUV.v2.0.1+21.x86_64-w64-mingw32.tar.gz/md5/775e956acdae49cb57a4cf2cf17baa74
+LibUV.v2.0.1+21.x86_64-w64-mingw32.tar.gz/sha512/404ccf4ef1eebcb373233bb5ded1b0d504041473b52a32fed4d21d17e50f1be0bd2f67e9720bdd4ab3e296a3aa7d5a61b308a6d7533ce4b2140dd0fc17c8dc8b
+libuv-b21d6d84e46f6c97ecbc8e4e8a8ea6ad98049ea8.tar.gz/md5/d9570f4887254018466ca64feb9afc95
+libuv-b21d6d84e46f6c97ecbc8e4e8a8ea6ad98049ea8.tar.gz/sha512/b41e1e97c3f05490a16926717ae22eed841c901fb3470336090248471204b0b434291149fd313c93e5de3445eefe5f909317044eb5420c199de556a27660d14d
diff --git a/deps/checksums/libwhich b/deps/checksums/libwhich
index d4a0119625663..cd34ac7cc0b8b 100644
--- a/deps/checksums/libwhich
+++ b/deps/checksums/libwhich
@@ -1,2 +1,2 @@
-libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/md5/22fd8368c7b40209dada50e3205c1294
-libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/sha512/6fb77b715d70d9bc95a8546c3bf97bd3677c7ea344b88bb5bc3bbfac9dceabe8a8cde7a0f64dec884cde802e4a3000e30837d3f824b5a9242348c4fe061526a3
+libwhich-99a0ea12689e41164456dba03e93bc40924de880.tar.gz/md5/213f0ad813de677d25787cae05901a9a
+libwhich-99a0ea12689e41164456dba03e93bc40924de880.tar.gz/sha512/7c42c3b6c480763b85f8c5eb927e776b48cb8a2be1e1c143e799628ee9265adea6a56b33c17583c8e6fc040a3889a4010ac674918bc6947899983a4942353526
diff --git a/deps/checksums/lld b/deps/checksums/lld
index 1b238fdbd1a96..aab077e9aca1a 100644
--- a/deps/checksums/lld
+++ b/deps/checksums/lld
@@ -1,108 +1,120 @@
-LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/0edc0983135da9e37b18fa3fe6d56237
-LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/2adbb4eb76e72be28951c96140070b6d16c5144f689631d51b56365549a5d38535c1dbb5e351a6bdac4648ba52da02297591874193b1c16e7078060c99d23f04
-LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/59b06fca083f1a5e9bf9517ae4f6a4d6
-LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/7f1dc641df9288dfcd887239b86e7fe2871220b9d7f877b24b3197ab73d2176c4533decbea427b09e8f70ddc6c7570d31f5682eaed7215193e95f323769276a8
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/c97e607a661b9ff571eba4238ec649dd
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/7c7add8a0fac379b580a19a02966adca4932bd4573ba0111262544c0d935fc121c5aadaeadc97f9564331202b08c7366ceb170bb2b318db3425c157772d283ea
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/d55ebbd25b97a4e4628fad1e04782056
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/681729b4d10d8f66b0cdb89ca4500ee8a417561cc886608d06af0809d946bdf7cf5c6bda2b6d5d577bae3a15dc347568a3d7d7428568f86ca61327041026fbd2
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/78b06e5a351e6eab372ae29d393ffdcf
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/37a8b5fa3491ec8ae74da88e81a0c229d38166acbb46ff3f5a819034c40fa59ca2ebf4c0ed58e615baf7bf7da789ba86114738252501cfbd842be95cc2104dd4
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/7ba5b76c83d746a3c62354bf753db697
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/1fa403c8923487e2d6a8e8c1d86c2ea955ed32bcde2328cb1167a315cdcf704af896505e9c44b750ffca9e3ae66e805f60831136eb79fe1c6d58eaf81a78b1a4
-LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/f052208026a0fd5120ea838843b244ac
-LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/fd9ff2d5836300bcf76e4aeefb1e57860b3203fab0c32e668dce3e636dc362876d0fba1f2c23bf55a342ac17294c73e839a8eaf065d64d4397582dc212b8b9f4
-LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/4d1077835df0f592a168c140ffe6299e
-LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/8dfd44113b817f607bc38ac1b4ffb192be340c826b9bc8f9d41e92e0f0333d8fc4227f93aaed16a4b9e94a5ec8b79628f2d3a73fb644684a595921f36ccfbeb8
-LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/0f31939f4ff00c572eb392b6e70aab38
-LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/581441087ad4869cfdba13808b2d6adaf929ea1b38ce96c357f276d77c3e63439f8edbb822c8f41770cb61fc08837d7eed2466d187683bc44f2cb3c553e2e60e
-LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/ca767173044b5a19a86c6a890dda3b05
-LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/0577785079039b534fd736ea7a51d9b5176693d81e0bcda4fccd760d7c1218042999b6a38b973a903c0ef68e57dfb3b86e9e2f9e307dbaf603997a853f34eed3
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/89bb950f17a5b792a6e60ef98450a6b4
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/54bb68159743cd14ac0fce7f218a66ff6bf29e626df8dbdbd6e8581699d9b1d357a3c10d86c6822bde7299c14728bc55480f91cefd041d1de61cc179ed347b9a
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/735e4dda5f8cc06934f6bda59eab21d6
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/a9b91beed959804b9e121fee786f28808a7670fc5d2728688cca1c7e0fe56e82e47d95712e38fdfc42e02030896843c4b3df9928eb34c2aca9ac02262427c76c
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/30a95179bef252aaca41984daa54c680
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/0302db3c04396a30d1f6ab8d8d585bbe3a9e70342f068747ddb875b024c173bb9bb34518da7e76a10d3a325dfd741118f36f67fb83251bdb8a9901c4799ad79f
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/4386c746c5d9b1408dbe7df04bc6a08d
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/d71c6ebf5d3eb42368ab336cf8520afcd05470308ea117fe95797171e5c573948412ce777f62cbd45ee99ffa59cc769c276a60393a22fecffbeaf8b77b50ea35
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/49287977de61b100979355e458c8970c
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/85ed3b2c7d2478a307a393a2003e694fc3097cc6812143abb3cbdd73a7d36bcb6f06a7d341ea639b9849f714c2d8f418a8b96035ed1c19a3957b42d005c0427a
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/80a97341c9537b8a58c7df23f86d5cf4
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/5774b246ae820de4230a1f4f65bd683145dad5cbc4d326fd75649e06e773c74c2cffd48108a79ee0cc93175786450b6d50f7ac532e6f68961c18fe6119ef94f5
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/6f84d6858aecdfd95726a37c9b6a0e0f
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/2cdac9a810c777ec6d85093926292c75e4287f83b7224246f6fa248e3874a2078c46377cd5ccb0f36a5e25b139691f1111d705079e89ea4215c9bc8659414094
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/d40f0956cc36aa7846630755a672a91c
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/01368311a0ecfbe3f23514115f0bce7ce816c878815d937f3fa067b9daab07da0c02f520a96ad793212e5056bfb6294dd0129dae75f274dfeb48191e504c5322
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/689120b8091b9da8cc9528c96f5c5df2
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/ab78810af7d77116a4973b5825d5090133218cf08d5d77be14f83e028821e83493a112adf71094cc208f74cf4deabda63d7fff98866cc0304793aec9b27b7222
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5627ccf1677c48b7ef8ac9e5faac1d20
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/454d2636cd72974c79c2d907e56e3c69c30c3fff78b199591c9ebe4f14d04c40c4bd7331f8dc2c957c37e214da8d28ef3a47ed8d3dd4ca9d480d52bab3429b39
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/8f50e5f684c41845308c123f8e45a0d5
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/21baf8a00fa65473ff6cf7ef2974ef88cd5b0eadd06ff85598de10d09425074297bcff3472ef001047a5440065a2de2fc6b1eefe3a32c7c1b3e3261165dc063c
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c2e0a5f58e38a9acf2c3914177ceb827
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/2a1653d171a2ff08bde55c53973e62955fe9d9629388ae014a645d3199d8f4bcf0fb923d06812ccd62e224032b261c8ebed56ebebed750acbc87671203d7aee5
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/fa3959aa413a2b707d8831edd2bd7867
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/8b74fef916a72c2f4933c21d3344410c7e03e64265a44dd62cf2ef2ac0feeafeb2b443eafa5dad3d3d0028be96b9424ff67b16391f1b3a2185826de68921adab
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/b0751bf7eba4f7f7a28dc22993eac9cc
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/7510f7349b06365e9cd260229e7b8c84da26bac072c5fe9a4e59484d82a0753d4ecf1066ffe41343f881a682590dc9ee4ef4a49cd83dba45c21b8d76dfb80f67
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/5abfe9e960bab4c8a44f41aaccaf936b
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/efda0e0a35e2774af2f2df53f89d61f146a5730086d40865d448b009c833934b23ea4b296c3dc3f2039527b72ef40493fdee6f7c630484f64cec2d1aebf4a4c1
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/bfe87378e965050b1b20e993c8b13a53
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/ef2fd5e81f349673417bffd68c4122a87c09caed3f6f8f0235bc70b75deca7363cad68276aa708fb9ad8f7edd249d49f78d9f5fe7b226b62e8604c7bd3d4b9cc
-LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/4ee16f57d7dc060007250e17ffd55817
-LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/27fd3a21bac676feb2c2c2363c027cf12988c70d889174e52c6bc1fcb4a93241f4bae85d5750ceba5fa971611700a9d15e3e02803cc14382cf6a1ab2918b719c
-LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/06699da5617371442b0539203152405d
-LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/83ba6300d5669b52c1913440598a2577106ea73e0b83549a5b3b0f081a94b6b8ca9fc05687d2be4b60c2d6a524bafd43b839082f0eee58b4685758061b229fde
-LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/a051688aa3a6383b4be4faa4f4aee985
-LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/2059c6ac6579c4720e7167cd547b679a9c1a27a2c68174ed543be935ee23122234b3f2a4555de0abab3a982aba73d1751db336f3e28005ce8e4659d61f9269aa
-LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/600baa66310cf348ef3b4351ada014f4
-LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/52b4718993d8abdca8ab701e86022367655d7927dabb8f3a8e41e43dbc90a9af78caf8abd37907a79b0f05017b6f0ef72314a187dab5bdac8ef7996e74c96e2d
-LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/4bc599fc07e9c7c717355802c1538a6b
-LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/4521e40cf6cca31cc9ec8ad974c6eb922632d8ad0d5008c951e23b7ec193a71dba5f3bc2dadcfe47e2ca29395646293c6559bd88ac286c5d31d5c4521756177d
-LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/462b9c453405768c2d93535fc83308b8
-LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/39dee4d4a0073a8dc4ea63d43bc9a357bcf8e26e3c5c17f1441fa72145f5a4ff6a53e0aae6de687b8fcbace40207ba06e61cb8452c9bfff7882ab48e9f9f5ff0
-LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/8b12a4f5db80b925785f42a97e6489f0
-LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/797d12888668712658fce85ff842d812a255fa4633bf4e78b21488867518a1fc2de746885e2fca1055595ae476670790239a714797f2322ca04027afbf27330f
-LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/acb8716cf94f654078c7dce4a140f71c
-LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/cf64ae04ae3e55575d5781ad30212b1c0ec734f81b42e3c26da8766bde7c47b6a9512515997afd15f9eeef2ee326c7aa589ee1b557c45b4ef955a8afc72fd759
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/331d844c447f564171345009764321a1
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/978349a74fc5498408a5318c87ec6d25c01268b9d21fb85e6bb601243ad0d33be8501b181d1f9ab7663433a740912f5bcb7160caf1011b1a2c84fdd51e0fce78
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/8595a49c49e851973fffae7c4062911d
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f707e514843a206b53f380c7bd8d4d8203cc62219344c1234416462dc1cb3d3f8a7452ddfd0f07178d43dfb193b4402a018cc465dc76b43b687fd20fa1ea5222
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/5b4463e81c156dabe3d182c42eb647e1
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/995db577d4a78d62cfcfca3f1fafb333ff26548b41d8aa8d763e4705dcdfe8005e2f68873faba4040599a6d15821a523261d0451d75fdf6e1c5224e8e777a71e
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/d2f9f08cc952c0639f7ef1073c8630d6
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b1cab7b813fe0f7c26c55261e8561295cbdf1e812db3844b87605fb527d09855f2bef4a40ddb0a7cd354c7cbb626293d4d4012f33acc242f9af4abe1dbbbeeb7
-LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/e82e3b67a073cfa6b019bf5604eabf2a
-LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/9bb18adf78afa9dfa0054e6511f5750a9e2fa9138aeb1bd83f7a51d37d031e2f3c151463ea8f682dc7130cb98fafae0b84c60d3befe27f9d0d3dc3334ef82420
-LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/56da3cbe81ddff089ccf6b6392a9396c
-LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/2af483a1761022dcad414fa7cec7fb5c6fd54be28185e49539f4824cb0b6acdc1cfa5c78de31268dbdc444201936c5a6d2e04f39ef6f0b9fb184985ba4e3daa2
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/15cbf5eaf89c7b834ee19629387515a5
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/75ce7c398bdfd57af2c09dfc946b024d5a72e90575ed92f28e015e620ca89e421dfc9a391f4a78277c3e06c38dd696d572c5601a2b1866e521dbc2fc5a60da56
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/b895da29b6082cdff6f0324179352fdf
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/e89a97dfd6c345158e3e12cdf97d33c22f849e5438401cf5a3670c0d1cf0252ca03e4c52475a42c3e6c2b2d689c2f53fc5cb7c925a23167ac51fa1a5e01e3d7f
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/7edda2d8c2eaadec2d262ded2456934a
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/0b1d60840d638c0b0269b901a3f5198e18e244da338aef2fb49b474b3601d44a2b4dec13e258909985e363ef8a8749838b01dd195e05a266ca36e6d9f274ef17
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/e26138e3491a053ea9a998dd00ad728b
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/1215861fa52b1ee21196bbce0e99912b25f887f5734e0c2628ac78c1af5fdf57c4d7cf099cddcd7031a26c60cf141aeea66a0147428008cb485c207e90801835
-LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/a1e786ac775517b8b483bbe3f6571d37
-LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/3937f156fc2fb8eecb13444c71f380753c16b08f29124228808c91ea4258ee2195219c4a9b601d4468cc24bd584403c16175518a620bd94a7dadff868b3771d7
-LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/976d840de14ef6ee2c0a538197fe8f10
-LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/7f58f975dc3d69f502537aca79509bbc3c4f5da2ff8ddb1c7e27180a6bb2123713eb42da61cfabd7a48a31fc464fd74554b34935dfdb3ec095d14ff443f514f3
-LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/ab0295ba327cfa6b9a252b0e7a4b50a5
-LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/7c750916d4157ba0a37cd1277a0f8faf32123dfc626ea76f848a7c567fd889a7801f8402a307c190ab34fc21b156f2a23967abc9972fc103e5847a200ffc7305
-LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/6827f38ed653f33953ff7ae510a517d5
-LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/f01c655f6433ec6808b62872b8fb4c5a2d8e187643c11f0b4f5c06e2302e462353b516f431c1e26ee60b579c0f8c8c6385f018db3011c619745a39f9ef263436
-LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/385cd2715d29de3e85a3ac10bcbc88d8
-LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/5c90e8e583176ed9dd563f794073bb344283284a10e303834b6c5a9b71369f50dfbcbac61400ff70f34f3065279c848dc29086309ad38774e50eca3fdd5f9799
-LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/241978345735e3b57a88918693c0c0db
-LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/916c6a4540ce9a2b2574d92c3aed42171f9e49f776ab97d3e5be84df832d463b7e542529c3ae81e4d6a31d5789d55b96f9559f48c0e4c8be36d70e3ff6f4292f
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/a4f16e809240c1837b90d28930e3f711
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/983201793e0f9e6416bcea23b4a70a5a1a36fbdd72bed2cc60ec267eee441aa3d9c850b4aa3da6a232f3de451089754138ecd5411e5431f632e48c1993513ef9
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/70f47c2be55741f754ffe89e4749dafa
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/f2dcf4f6ce888801e8a14875909f78b46d8ed853a7063a185356c7f21e42e15323d847d9a9d4b020481a7fcec9539d979e4c7f2b083ac1c1bf75a275a200562b
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/becf7c6cc39a98cb722899c94b32ca34
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/84818621307779e27cc149afbf958653049e47a62ca44ff78552878114c2fb0f7c40cc83722394ee8d880a6ddfdec79012235a6ed20bbfd1e5d9e83ed0a0199b
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/0117c05f8dabf41c4628532d59cccd3b
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/b276dff2c54fdb6403a461ecf5435978e2cf9c9273934edcf3a31e7f640ecccf37de672f6b0b3f296ddb6a7059b0d95ca6c5bf62d62ca545cc62a69ebb84b8ce
+LLD.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.asserts.tar.gz/md5/1c6ab527fc25597c54b8ff7d3270b7e0
+LLD.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.asserts.tar.gz/sha512/a609643d56b15e46683222fbb913e19f244125e93e15d2004b98d34e0a9835b446a60d99c777366097408ca6cc61be953b31c2a6b618acc2b5ee5da4a0b898e2
+LLD.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.tar.gz/md5/2f36595af30daa121ed7018c0d9b56ca
+LLD.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.tar.gz/sha512/4161ac10364e66b0535ae2ab5271d31b0e9723049d5ca8cc45f81fda83b23e37cca23f26cb12dc4ddebc89eb674719f48aba4f025f0325e449bf1dc01c99c935
+LLD.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/42c329497c7da83949b538e23d66b30c
+LLD.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/e42dbad0f130e27a16b08fd42adb8d7e6f47a00c71dfdbae0c4fa5a9d54b12978c47ee9e263cbd6f14152923b18d8c8d11bb75e345c4893e8dfaa7b9e134b7a4
+LLD.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/9275d29af4f7f5309571826aba04375e
+LLD.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/bfefaaefc12ff30acb44dbc64e9d6363f4749ccd9d26468294f0ab9d125a2ad7d62d70388d4fab4f4f94d8a53880556c393c516f85e9ef25948bee392d465186
+LLD.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/1f33eee19e522d8a39f29fe7befa9986
+LLD.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/7c75ea6392d15e47adbfc5393a576faaec9925aacf1096302cb0fc35077a9a87517db0d2daa8a79a4d707907af87ff4651ba9ef2e30e0c274270f02d6f2178e3
+LLD.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/cb3b3d0c7d4255e9287d53c67b661a94
+LLD.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/238112164ecf8a69ea49fce91535ce4fe1664500285fffec83ee915b4049dbf2c9d00599ea3f99e703acfeecf0fb31457c7d6833c20191e674cd40abb73a38ed
+LLD.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/md5/8cfb25b5f8dbd265b9874f1d89e7335a
+LLD.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/sha512/066703e0dfa360fe2c5558f0e09aa35d93ca18f115293652d9e189f7cb2f9b78b379416a3fe188e3396981e6ef9db1c9732f22d6bbda99136e1f644989773979
+LLD.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.tar.gz/md5/8542a4b46e1a0ab022b7088319c2e7d1
+LLD.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.tar.gz/sha512/434a0852ce3fc19fe05f0a4260de106d400febedb8e0c0cbe15f5d08c62d057edf196dc4ea41867cf5a41c9b74f3467b4651d677eb97177d31234ae0f3a7e5d0
+LLD.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/md5/a98e9510a590c108268f62773a396ddb
+LLD.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/sha512/3a51882d60cf45cc7947736ea1bd78bd75c26d4ba2fdc5aefa41fc9353d4b341395f972ef8c4c244189917cb8516f6f33deee52117c28b49d192e8c8bbdd6496
+LLD.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.tar.gz/md5/80a119d58c979b8a40f943f4f246b186
+LLD.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.tar.gz/sha512/92cfda2060194b69d5c69943037d1800895baf07433c47694fe541f00fe1985d06afee86a37a16ca0f05e7fb3abb586683b5cdd44a10d605542e18f7691b2bc7
+LLD.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.asserts.tar.gz/md5/b42a67d438ecc7f0700b1143e8e7f60c
+LLD.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.asserts.tar.gz/sha512/89d6a1ca098960ed202eefc962fb63441f8a236fa4d08390a08c2ac6a6275021fabade1282e1048d00cb2d9230682cd7fc454f6600053cff08a0b42f4789e4cb
+LLD.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.tar.gz/md5/bb2d6eb0fbb08f09aeec72fcb275ff8f
+LLD.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.tar.gz/sha512/021a8b5cf74b74da0ac3737a128dbbbd22c6a390b3b3bcd4480cb6f70cb4f7e16384e0dc25482a96f75c71fcb4b6976d3d4cd78c8c6b8b82d445e2d3b7e52774
+LLD.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/5ff3ce4fdd9f698463af6a719515aea0
+LLD.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/2dd0166da293c57c0fc7cad436f74f09e56c25296ecbf9f57b8dfb38e24f1775dcd821b55d89bf21049ee83846d24d6a142c42884c0f714cceea055b093e9d34
+LLD.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/md5/b358cf092aa813b923d12f355c26070f
+LLD.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/sha512/d680683df76a58e9eb0effad7cba1707b409a5c715239ef079f0ca0e92a0cbf595a4f4339c438e6962c3fd4908694cfbc3b8dfa46d7147f12a4e76ad6b98d66c
+LLD.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/93f1eb3dc1bfe99e7e1db62abbd24b31
+LLD.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/e0f57e08ad0b4925e2b512fba59f5e7164dde4b26f813e1d07b15d870321d6905781c383d23dd390bb40ee039fedf60cf0b0e4657400e0988bb6114b9441cdf4
+LLD.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/md5/bd03ee22f464546ae69358214ff65723
+LLD.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/sha512/47f087f115024bb61e981eea80a833f2ee7db2d6ab5d1be2679ab4030498836c9bc81418518c4c93a02be4c6f884fcef6ced6ad6c49694920c0645e01d3046c7
+LLD.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/78c1b13a5cda18bc928ed19d90960ba8
+LLD.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/7cfa60ac151112700e3076f258c69996f4d7c238dd7b373f6984002ce56cbec5d5d288358a7609cd780dc52f4f4b1443f4d5fafedd440b843983583de740ceed
+LLD.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/md5/e4c53ab63e012aaf92e47416e3be9520
+LLD.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/sha512/34e55a9b67395b90682589e495bbfb83b931010ea7fde43f7ea8d94e9bec515de6ff8f604bd084a453b004a27b9346f0777ff145e7940ab1a5874bfdc58acd9b
+LLD.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/e14e1dec1e4c6361703b81775af3d4fb
+LLD.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/78f317d0e883342bee59cb0864d0faee1494f64fec4ca629ae554610b863020727da9808161efa71e07772aaabb5a8426dc2fb58f190d2e915e903ba33c76391
+LLD.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/md5/0339b66ac5f6c4520ffd1094aac0f24b
+LLD.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/sha512/0f43216995b6c9c93fa64f3422f2a844236542f48c53694563df9bc4bac46bfa0ece82c61e4cfe7171ee5d567548d6dab48d2c5a8ef1a8f22fcc923865470de9
+LLD.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/48d56f5bcf8f8247c0f2fed6d247508f
+LLD.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/03fe4c6e45eac3a47d91a8cb53d0d1c1785bb692fcfc9194628d4d299d15a9ab1f7386741623fdb21e145040d7d727a7fdd51707cca87ad31f8053ea653f24b3
+LLD.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/md5/603d56daeba5e64e390dbe354f6c07b8
+LLD.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/sha512/a3c9dfe279e4fd26dd7c56eaa3b102545beebbfc801814647e76b1791fbedbff3c6224ef5dc296ee1e6d5ec4201ee1336af181f03ed6697db6954c15f9fdaf01
+LLD.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/c6360fe502456fab2e8945a00b137bdf
+LLD.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/9a49bf7959f4ee4fb900ade06f2f0a585cb614b4c3d9ef11a184cdcdf17bb3134e92c106525927dd4c0bb6a0a38709572a6c29892101ed6e123812621bcd0a2e
+LLD.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/md5/e31787cd50a0c5fb63aeed7a58751a34
+LLD.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/sha512/46c309720f19fc7015c86df171595f274cacb56d1e081223c2de9cfbc7d22d3b6c2d88cd61be03a5c93fd852c207a300e0e5bde07dfa260d27e80eb0604612f4
+LLD.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/790f0be8e1a2e4036047172113c00002
+LLD.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/2439c88f78478cde52e4cda902783a6246171e2ca657437f43ce40cbfd681374993fb4f178c8dd7abce65fe5e4a71df97642bec5ff255cfa0db750e59f5d28ab
+LLD.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/md5/e72b4f166ebddc13f69343ff33437a77
+LLD.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/sha512/43e72261c23553e2d6d91dc3ab46be77d1f034f1742d5574630b4e46ed432bf12352f35511ddc5339eb9e6ba5ba89e26e79a2df2d60ba25455305848e623cc44
+LLD.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/1bec1f2110d4715f066915837d27ef6f
+LLD.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/3ccd53b38f2398369eb17bbf7948b7bcfda328f2e87a3b5154a837ea904df9047a2e8d06f03215e24756a2d5154b8008a1e1b01032b7f7191bece3c7c2e6b907
+LLD.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/md5/b92e3ed2bdf3af2ad06d57ca73e64cdc
+LLD.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/sha512/f939598a62afa887ca7e3b1caea01531f21cf7d793258eea3de2de22d4052ec4f2c657a66c7a6d137010d029049045efddce3381f0d1200832716fdfa5ae6bc6
+LLD.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/03577c0d410a6fb2afd0f36f6698fcb5
+LLD.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/9ebb176d184729a617d3c563ded225fc26f6e2eb29b89799e6b14936f6628933fc214b2c9626b9ed623682305491cf0cc8cc4faf88eb8ac54dec4293b5f92614
+LLD.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/bd683a6d5cf41ca9fb2809f34af4fb9e
+LLD.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/f23cf0c94245c76471b0adb60254ba355863914f88632262f2c9ddbb77c5fb94cac5225fdcab4eddcbfec255daa954b56bb305b6fadd6ba390ce992e2fe657eb
+LLD.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/c0d4c565ef3f591ee57e565d977e6483
+LLD.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/c2d6dbcca8e5c1c0b4e26ab85c53c138bd3ef2e375a9895f85ec4c1289db5dd9782ff087257809b148bbfedcf0578b1a6dfdb5bb8a9e2787ef07c4e5d22e9736
+LLD.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/fe16d302842b57068608bd34586eff05
+LLD.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/2d0b515b20c2e449f75f6e1ba8b61e9ff2fd9c999eda9a5e416a25d1650be69faf1de9faa7f2ad65b901276d5c9ca091bcefbaea46ec418cdee5dc70435254f1
+LLD.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/md5/0a56691ff74be9c04ebb8c7b58636bb4
+LLD.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/sha512/d31cd2b601781a76cc87101295722605692840476d29dd9559b6b4c1231cc3128e3fa6b84fa08c0d014eae11492a7b42f5c0f7aa6e4a24b5cf10adb5e4411401
+LLD.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.tar.gz/md5/b1de598b35d8129025a97892f3e67ca3
+LLD.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.tar.gz/sha512/d84073ab953cf29095a9dea5e717c94f40ea0d7badd9f201bfe6771b9eefe526aedb775cd0e132bec24a0a2dd4751bab15aab7404ca0fbc8665be67404192b23
+LLD.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/md5/9043e0945247371cca8eed4273e5feae
+LLD.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/sha512/9c1cb75ae12aeb75c7b4debed52a4bb1ffa0a2a25044fac7532b0d519f70a479b183097e32e7fda439f2e57a80321b5a2744ac33fe0420f5aec95ab321ef1e7c
+LLD.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.tar.gz/md5/a68cf299d38f5cec1e099a26b4c42cc0
+LLD.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.tar.gz/sha512/a88a759fac39b42933a3eb810ba37670d90405950ba954679c87013077973a25ee5418659c48db656542b873a1bb12f8154a777af15fecad91a7e5f613f41913
+LLD.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/ec8a0df1e35656ef20e736ae016ba7d2
+LLD.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/cbeec5115ab7ac2b3f0b3ee946480ebc3c2c7e66693d713c52df621ffe3b821d2fefc5af841d20a04a69414dc72013a4789d2cfc44b9714dcb8bc51f922b7ee8
+LLD.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/fe0fd33ba7e2ed07e870e44fbf508de9
+LLD.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/a2c09b9d25bc384daa7281900952d44cde497767bb41782e60cfd2e0f2f2f210d7c115d51e5204643fea26b1b33b13f57f70379a6d830f2a36037983e3c03398
+LLD.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/6b20ef6ded013df08b5432d35682ec3f
+LLD.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/221f7bdf5744e967e6135c3008d6bc9419a0a1b00ea1715e174e4481d912104eb2b703f5c547dbedfb4fd4cf319a3c69ab13b86a6e11b3f5a2b935bd6d1db184
+LLD.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/de686a5318f01a5676ac8ac94188b7c8
+LLD.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/d986fb53c416572b88d7bb6cd39dd06d6a8ad3fa57586b24fa9b23f0a0aabeac3fe38c5cb519cc08071669b36255af2fca6e7234197855a2b9b2c7989b816692
+LLD.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/e15aa0e680f218091564acdb4f073d22
+LLD.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/2fab79fcad944dcf681bec8825f50faba4a97fb1bd8612baefed4d7abe8e0e71fce833f08110f2cfc2ad61b3573a72d57e10931918cb3b192ba3e7c9333489f9
+LLD.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/5f715e5ef8760742969cea7ad591f4ab
+LLD.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/6bd6fc50fbb1fd05dbaa068f6a8e7b99ce4b124d0e270b8add6e0a32c1f5d34a33a1927c29687267b4263927a9c19227e2a919b0f3dae1973573bfeee81ac9d9
+LLD.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/68608257c085b5a6d32addd873c41309
+LLD.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/144a4a2cd7cc8c2f504be5ed05e848ab35e6eba74dbe2c05279a57948966ba9e738164ca32d89207772c3aa8f6ed5f802f9b91dd14c3c6c26a27b5b70fb6f04e
+LLD.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/9cbe74aaa310b6bc6780e990d70966a1
+LLD.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/048fef2ae0be938dc80e8fca026dbd1eccb0668c9c9900cbbb46ec2668b3fb2a205b5f54c951c31fdfbdea4df44ddffc76767f2e1732c2b5728e4dae510ae8fa
+LLD.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.asserts.tar.gz/md5/c603e5384a1b6ac8cf7280262086f749
+LLD.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.asserts.tar.gz/sha512/9cd5c1e2fffaf8bdabb7cf3261b39d9d4a32ba402d2d61445c94488da92ae2a7ef8b9d963251018b0b6195e954628352e634f4adfc4e3c26be8646f4c2ed8640
+LLD.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.tar.gz/md5/3921afaa99c9546d761314ccd839da69
+LLD.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.tar.gz/sha512/724478787715c828aa88f7e383ce5c548f1ac67a74743de684f4fdf89c23d3130cfe6035910a34a8b5b48bed40f462e2c803d503d61aaaf8a3f75f58e69f547f
+LLD.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/9b1aca42ef19daea7a10446b843ca684
+LLD.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/157a8812a49a87f170a41714f153b5ed9ab118f6208238dcca7376f6a38164552d12bb09069ee489f3fd7cd09a929e9d74e16a1f266099d51294c9d2297ae412
+LLD.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/c22847d5db340122d94d32623286686b
+LLD.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/29385fbb484fd634410072a8ee26d071ab41efe298c72f01a05b6c8a2d3795a1f0784ccefbffc0f252b1fb288b32452538cebbf25e60a66bf2a447ace3bd3239
+LLD.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/5ab65696e45ca45725442ea953497181
+LLD.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/6bd45dad3b8ac8bbb321b575719c1f1b7f8b8896fcb5cfb8f07165062e76d6c6005b878c3d2ffde0186ff6a2ab3bb71f729cc9ba41c67457e828238b04c06fa0
+LLD.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/aaabfa843e9f767255e3f2719b0b0a4c
+LLD.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/99a124dc63873dc746faa21717a509235bd5031b436bd275ea40e796edbcc649e19478e9eceac595a6dabbe121b187d94677cf019addf2464094cfcc2037c73c
+LLD.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/md5/26b68d2d3126830b02c422229bf6c6b2
+LLD.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/sha512/ae7ed53df96f26746e2b5e6168fafce92fedaa236146131d97945ec7217b85b5c46cdec32060d49576e23190b6ed5020fdb322c3fdd64810b2d0f613081f3b72
+LLD.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.tar.gz/md5/b41ce7f9c94351b997c10f3a85b65740
+LLD.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.tar.gz/sha512/f5a07e5f410b1d84547979feacd0fc49099131d240590b4bb43cd0fd4548f4c028b4141b31c69c7d58284dc27ab6eb758f69ed3d1d2764b59903237e293d627f
+LLD.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/md5/5d7ea45334781fd9acf03c072b70e056
+LLD.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/sha512/3ecd4d3dc9f399e2808b5ff23db106eb881e4f6611daa6126b2909916d66959b1ff2dc309c641ca3ca8d01f7025a61df7815a5d838878c7b001155b623fd078e
+LLD.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.tar.gz/md5/b3d194c0819fe1860a8380b9b543c6fc
+LLD.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.tar.gz/sha512/8ea7961e6c7b697bf4ac22faa1e1401d38d0d9400e7904dbb8f21e12dd347209f274fccba8f348fa3203d47216b1d7948a0d62bfc8d00bfeb4a22e99b851c9df
+LLD.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.asserts.tar.gz/md5/36776ac557a6e597c649e5e8eb0d145c
+LLD.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.asserts.tar.gz/sha512/b8ad9746854bb394767b5e498a896238bc954fab7e9bbcc19f47cbe121d8e39ab31c4c42715125dc5a7db953837b074dca9b8d75d72a687afe638e9f5a6a33da
+LLD.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.tar.gz/md5/135953051b2de713007e598dcc92ed28
+LLD.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.tar.gz/sha512/698f4db0dc09315b4b84cdd4da41cb4f87bb1778a60f984e7c286673ee1e7d20f4435616263687adb1005531e6a658779f254f0f3ea4ed06da4c83fba3e38ac5
+LLD.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/md5/4601061ca25e6242cb10fc3429ad456e
+LLD.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/sha512/c3cdbd25d3fb03bef563fb6d8d26a4d58dd3c2ee63f16a0148145f868d822425c4d96ab69f67643961f1170f48ac2f9b79bf02a1ec56764feb39d83bc3dbc284
+LLD.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.tar.gz/md5/5548f67fa43170e4349577fbb9afee3f
+LLD.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.tar.gz/sha512/2829e2667e475b1fd1c4940d0d8267539926b27ce8ba26dd00199e9d8da886f5a19718e0cab2022e19621bb6e6cd2245604edfd11101394dfea3987d436fbb23
+LLD.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/md5/a292184ddb3ce6091057fb64881b4b0f
+LLD.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/sha512/c1a17ef04fb0336a3d07a5071fd092820ad4f090ec6aed6bb39b15ab7e519e2425a507f1e6fe2f30763ad9fa2dfac92a58c49aefc95b6cdd264d544924cd42aa
+LLD.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.tar.gz/md5/6e70eb99345f25e4925e6106870c0d5b
+LLD.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.tar.gz/sha512/8f0043e0b9433b58e3a50ae096b6baed15e156efa1af3d2e4a734acb69336ef3d00d96160f619e6ae3f73b600684148e8988d4a0a07fcd8b286bedf2a0102c2d
diff --git a/deps/checksums/llvm b/deps/checksums/llvm
index 6380397ffb84f..a6386248e0ab2 100644
--- a/deps/checksums/llvm
+++ b/deps/checksums/llvm
@@ -1,252 +1,244 @@
-LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/f18fa63ec97c79f3773af2bba51f69c6
-LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/4ee1c3e746177296fbe976976c58b6ca09dec22943ac1e63008aeed94f46619e4e60d8278566e74f4912fa9d3aa21c8b03ae2bee360db54c7dcdfa2381469148
-LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/f482e543971546cd59d946cc33d79d5a
-LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/d026b746f419e9bcc04daea60b1e66e26d4132e7a551b0f14c95ea95dc9a0f4e645110d8cd5b91b92bce7775ababb715747a2e4a09c0920787e2f25ef1bfbf19
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/5d12f50225285b180274cc89c21e7c44
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/3947f0d909444716a29c26a0645288e0f02ab19e6fa6ac0104c5ffc9659f01337198a5914beca2ccea7c98c9aeb12fc537891d440766054c0b9d3bbc40e24165
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/e555476d3324996897cb0845ca22312b
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/a809d8c455d6f72c2bfc2517ab375d6ce329880ae33c5c1bf575dfd599d6132e38df35fac4300a0e72726ca33ae1db69ae67f5fb03d5c617eb34f7ad20f09b8d
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/6432ac27166a0ebb550c7b000c27e2da
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/be6440412d46252292e6d907f04193ed3f438b06419d0fb8b067a7cd89e5cd2dd9143af4605de9a2a697ec2745efbdaf6021d065346041fec3b86051de42a26b
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/0bfd05e6bd23c92b73751a86826b288e
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/68c08b2624bd0d38c7cfaa8b61b7e1ed70c7a106dda814f146a3f5796cbd42f476ef19f726d3ce368d89e624c7a3fa7f07829c171d79581f3cf565dba28c27de
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/53a9db6445352b44717f7e0f81d896b2
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/ae34208c128f1d4468d8a25b060bd1904f36a73dd0029606394061843f90aa26f9c3071e8281e76dbc10fcfd103f04602fde370a0cb04d435fe2f7a230989cb2
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/f7320272ec2f3cc86a742a8ce3b4cec2
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/612f03f49b04fce2a21e3e0242c3ae591ccdf6398e31aaa63956c40fb805d4a060da8acd6e5ca1d1c0a7b1f994105ad74b1acf78490e31a149368c8a9c96c026
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/db7b7a03c047a6aa7b599cafbf6023c0
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/05474495e73c56a8bf8a2459e705198a6c6e32df5b83ab153f1080a763d2f7d79dbe014592e12f0f3063b30bb0641dcfbf4f161ed988c777c8955ce9bdb89cbe
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/88255189a80045bb410da1eee3c277e2
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/b944ed004867d6bcf48dbc089d6ee5904318d6a2ab3a7dac3c802cb7646d4df21950a6e4bcd5bc57bbea872f99f39ef9e174dde8dfa4f5518f23a1fa0e8cf959
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/a25160098b55d2ec00cde15d088343f9
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/2e84a0b52a4852a69155aa4cdf33366b863caba7ced42db573e401a64c0fd2acd1d27446a3ad0ff94740a5fc4c579e745802bc32f925bb505177afdc64fb85eb
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/10b225be9d25681a36fbffdb5f3e315f
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/6c38d87c8aa321fa08ff9880bb27cedda1806bf6aece891f08f757e6276dd37e450a899c4fca587bb693f683f9ad0d85f388e7c4ec4a76c96e73f0f26ff6766a
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/320b77cc43b91549ae0e6b538ff53f7b
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/6b297c643530c06be5ef1d8dc2fd47abbfaa3a7862ba42ee9e4cff1361e54aa7ce77d4d9d7f5d2db38a3c780cd38a472eba1308e1f50aba74d3de3bf188fe91a
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c3e0fe843bfcbe0c03a563bd40a16f0d
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/b62c3d8867594e34b1eb0c16f1db609c4b43146deceeabc23d4ee9af2046d8b2ae1a8566e2613a69691646d1991017f0a7d37ba8636a395d471f8f385a478479
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/be03ae93d0825f335411a4039905052a
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/9e0159681e8ecfe477d3099314ccf2986eb2a8325cee274b6ab35e04ee9e89ea61356e5082d9adab6c41b8be98d0171e41642afca283ec59ed91267e66223c6e
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/9e244718d094dd6b2cdc50be77a284af
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/705668d6b44bc754fff8f28246d8359773f29888c1f6ead6a5f1e10386c88572de27d4d47b8a1bb160211c07fcde2667833615c31ae445d1929229d981e36e3c
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/12162558c4c89913f0486f3a4c969c8f
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/dc6a48cdc9a04b3f0938784d5d40d0b453bf438881895c78a0cad9ebd83090cd9f1d12fc00df6538d053b2943a590a3217a8309aa0912fb3615d728280979276
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/e5012844af1fd76d6cf92ff0921a9f24
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/436ace73907097593bd060ff5674db2e36f7a6e4081033b078554b76244ba0d2caea30dd94a49fb62c96f2a1c3e1f190de440bd2bb9242c1206f4794b65b30a8
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/9ee929acc7c52d18a7c42808761ae233
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/12f07258d295245f2b53414d0df0144c547c60b090354b5548f50bb704a82e1623e55ad353eec233407f1840a50d423d1404fc3e7b87f2386863189e7f886813
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/c94a2e1f4bc031a7c663111babb0f8fd
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/4c82406d8df72312798d95ac0d038b38eb332b4f4f8a586bca7103bdbf7759365daccb6f3bdef9a9c74a06d04a12e96c01ac9fd03aa38f3c586a7ef3c7ec7e8c
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/e038b8feabb2e60b866756a8dc7a5947
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/c3e03bff11db87c7f131dbf7163b414cac91556795e4c5c340bec52409c39f7e91c26cb34a6339c10610d0906f57a209d36f6cfd458b26d24ffca9a43d259f5a
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/b3bf4ff216946ad38ac6be230e0865e6
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/0daba831dda378b2add9607fdc0d32046c0390a0a63758a6cdd9c0b90f660559cad0e71c5ee0b1c4264f3427e523a8c615bb87ebdfb63a65b983acfcb8df43e1
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/33a3c56ab597e6f2c2863842f0103e53
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/fb15d07a66b1f56b73625ead591f90b57a843cd9cb140e5158159a5f7c9249437678c61d0e19a11a65a536776dad37abd6be34ee0ec5dec7c0736079a0fcc7e6
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/7488ef289e45e6c44753a42dc51aad7c
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/6ecd62f0756a1941c8df92605a7edf9fc2e70957f39ae407e5b1b49977301ac6e82d55bcb856668135c27f1a75d156f3dfe7a27c47c6a3594c2c9f032af8ef19
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/5a286dd05b936c0a3ab61722531ef5ee
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/80016717959246708eec8588fd6bb5cb4894bf05c2d78cd1641e31cb43f38c0fda866283dabf1d999c77d030b70b89363e2346bd9b9310a2999623e47b2e4e7f
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/b62420d31c65fd8720427900b72f9aa4
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/f63f62a667f6f2c6ea76db2b142d58cad3165a426fd420348f0831d447a9eacfda5ec9c006e05f60c1f2804e8b25e87369e754a0bace28257035a63a1ea23a76
-LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/ea922c8edae65c855e40f6ff924c35d7
-LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/d83a3737058da3c2427c061cac83ad910c43368e47bd1f9ff86c21ef0b40669946b128bd1063a8fcb081563ecf606d70a783a0747ec951c3443077b3ec8e93f8
-LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/7a20fc23311317b85127fa033cb69059
-LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/36d51f84dcb3c76556b6ee677a4f0fde1610df30a7030d1799fe9681c27e04faf1ecb4b5731db9a58060879076c037e3e5bab65faecc527296b439743bdd7d86
-LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/bf6859a7e73fb51bf91e2c7ce5b879e9
-LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/39aa6b1e2923aa572458cba58a328bf6ac0efd5f11974e04343d65cbb56fc5804066f7cedb1e9c58252313f94ee0487d6855a1714adebb3b71fd6c783a01018b
-LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/10c32deaee824ed7a19dca9055a138ae
-LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b9b14c9ddc2b0b07c07a53bbd3b711737d1a7d71626d3c34812bc3862145865205e5da07b052e119aeaf54fb97968b27e86450d768312623a7a87c6b8179d872
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/caa574701f180bf4dc323ecb441fa53d
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/1c97d2311111f4411c3eedc6f1338a8c899932e7fc3490a03c0c9b2bc4c9a52d5797c50339ec7105d60edca951fc57c6f11bc7198c8e1c96334147d2b2dc670c
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/f46c39e2f848fb5fbc9f1eed7fa695af
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/ed5bfd8057b2d6d543c4a11f0c1c6502dc7aafd07d0c5a96ca2b1d0c5194093f20f995ee38a4a25cc0291b31c682c6dcee460f9fb657b90be5afd43258ce4c43
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/06533f3ac22a8a9be2501b6708821806
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/5284308b46ab1d8ed896e0425fae4288f87a640707c8cd5f298520cb19cea8d6311b0e6d21d5ed016f6d87f47b93d92d371abfe9bf1810b357972b7c9b437811
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/f75c2acc329a9ee041ff2c81aa93b4ed
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/6ec83776bac9e2cf2cbf3f890412a940c9507ba06eb50b6a05148c9c336775168cd5b6ec4aa1aa148703e6724c414830e54c3ae075e4d3649280ada705ce9816
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/7e2ea1a3e9c61976b446cbceadf33193
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/b21830528362115476cec7f32b11f3c1541a5779027c824882fdc00b248ea0f0ca8d08ebd86b938c10fc80a7f7930d86e2cd4482fdce33822613128eb250884c
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/d77b1c5ec7cb8bd02ccd24347e2e620a
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/84ddacf1c222bb5d895939ba4aab80dc6b5c5c596a36fcc2869a87d639d006a156750f04d268b6c10b47d286cf3bb5e8c20804174fc93881383f2512833ad7cc
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/26f634aff16b5c8cff48b0183f3f8ddd
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/cc3619c9c8adf322bb334a6b2c9de1ad088a17f117bcb9aae5b51a4f7613a50391c3478b7f892e9dcdb802067de69b098ba7d61edc9979b8f960028af0fa172b
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/891a1f113e7f3f8dfa56f5f28e1c8176
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/9b6a4a26c8f83764f892f7caf5f09a5453ab6e89c742ae4cb1e831a0711104d131d8fe0d9a8cbdd384b2d881edb3d9026af804f47f5f79d62da1d51dad4ec0e0
-LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/7dbc009fb3ef6ba400baaafa733afb54
-LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/c279c4be6a5e131b567625173b33e1f51a56c53eb0740895c1afc8b6824a00d4331df76bae9960c2143f7bfc2a9758dcbc7898fb49ef4aab56df6bba7030d636
-LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/007fdc357a995d68a01fb45d52a92da9
-LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/2bf2752f654db140822f4ed74494bcdddb85f4040ae24a753ed9c77efa29d2f50397719fa20de031325823004a66ddc1c00c9624887289c8020d6627ffd21f5a
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/fb17aeedc48fb6a24f0aa2d078ceb2f3
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/bd622d2472f85ac5b0cb255a929413ae3b30ee06ec7204148072dc1f9da7bf451b07960f4905a66d2673db9926797e4bc33b262cff656e7bf4cbcfd132b49868
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/eceea244f8fdaf61c6addac8b8f57319
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/44ab4a30ff65685a121dc54c2de55de441fad95b02f54cb359ad44fb298adbf48fd7651ce871fecb40b08d95e1ca701ad4c857f975a37a5e5a42280dab6fc670
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/b09f19c4940f6fa12ea8b5076501e297
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/a52da2ace1f0f2ce0090a582a267fcba526c86a88be3d8e55020ea07e00a1cbb0323f8b8b0205c9417982774fcc05d667b8330f7676dd40c869f374130dcc50c
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/a365e7fd610b6f6ad2dda2d94a141b4b
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/5242fa37a93dfd99720f9c4966b4f9ac164987cb8de136c01b3474860c6229538e73db7727a6c7c832f651ce7ccb97dba0082cd66da2fe812dbc8ecd44fe2cf8
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/6645a6254d82bf854e50e47d671b192e
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/d330eb15c34e13cad0eeb046e2e27f10eaefcf1d6cb68bc4d55668b55e3c00cfa07bccfb4292647a737ffc69bdf4070cf5a8bb1cb7f6319a1caf0faddde7aafe
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/4073ae0cc33b7f803644a272cd0730d2
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/2ea897e3ed3688e2ae45918db51c5a1273afabf46d01a6a27739ac951803645861c549fd438b48dcda05294a4d87b6c39778d42e916301277a0bfc1d9ce53979
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/e223954ddf9e11830cbab24e4ed435c9
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/fb88bfc543ccae5cc9ef737e81757a8f7f61d1a2816501d569456fa62bd8ce30ae57b837ed32dd6d2a7c55fdc26c2c1b1a9965968f784eb3c01680f25ee5bd8e
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/356d2f3008be6e04843a278d182834ff
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/ae5b30925cce41593a34cf2e76b606e978c352f2bc915d8869b01600c8a81547ad392fc900766db2ade06355c2d95aa473bd51dd3d45f6bf20289d9cdfbb126a
-LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/c31804464c51d1967e73f491948e2763
-LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/84ab795067bbe71390f15b2d700ff9e0c4fc124c3d111bdd141643242cf6dd7d3317a92d9c97ef5129ef089cfa3d703abc2b12c6a9d2287c90a9ad58a4de8478
-LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/9f205efa80dbc9d43560830c668659b9
-LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/54548970bc7b3988142c1a5c2be36f877c4d2cbdb3a58dba71acd7bb32b20cab2ab12c82619abeb6b3bde9a95fb66942e08104df0fb0f59d2ead7eda957b783d
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/ab175b04b9c8dc73f2c06c06bd9d6915
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/c28bb2033ce2fe182f6a5a29e34a6ce4cdd22e994245f7122c4efb39cedd491c9d4343d8ba2aa8062eac156ad36d9f54605e6832feadce3c6e9f66e9ed7c760f
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/7e4dedc77bdcd6853d613d8b0e3e9af0
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/e09c451cf018548bb388f9a0b419496a6c6540cdf1e204be391391b3a5645c2198562c2f995c3ae30f775c786e9e59e8b93c0fbb5d00fc9ebf1529dbca9c568a
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/0835b50b6cd53b4d1fd894f27b3e072a
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/8d228561f66feaaa96cf0af71421032f6c241e8a8ce3b8352771072d7bdd972e1b6270e15b0a4f5f4b76764cbd65ec371626cabe8607294041679fe9b6bac5f4
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/bb61fbd156bb0a70184f6f425ba770a5
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/ec310cab20f39facaa6c0b3a8badded0e4ffbd7bbc1fea6b3e67717046bfe6932a94cf562d3e35dba5052d5cfe62c540c6a38477452e535da52e650fe5dd4d6c
-LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
-LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/sha512/d3b0c81498220d77e4f3cc684fb2cc0653792c381207390e695ac30bc74249f96a333a406b2cebdaca14e0b0a27b188cba6209bb5c1cbbb5c184d5626dbdc7a0
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/md5/052a35e879d52244e4b0804be875a38f
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/sha512/d1b34fb97f9928e046d3131a050454710a93d38e60287b7e3c92f179f436586d3230cf90b0ca0eb8a3f9ef89fef7b1ffd7d52871645dfa233a8b07ca87ea2ee4
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/1ad96a03a5dde506b5c05773b1849ec4
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/82306fb7b920fa7c71bd53b23d6915e7f256e8da9679cc926a53bb0d879f1f4469f43efe556ca32c9ef59e27b435572c7b39859090652635db4eeefdec0d1685
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/6a24fcd3a4dc3b1a98bb7963b1bb4930
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/9ba6b83ccec061a1e5260c807dc8afd6e18799431b25a7e65b97662cc4db02509d02ea07fe12025d80914cec7383624b1c8fc9add46511c668e184ede263ac52
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/09f1bfcf58a4124561553ab5005f9538
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/b0907cb857131183ffc338780c6c6dd1d48bf0ba61c3da1b8f20cf9a943373173b621cf9b2e8f1fbc657059a896b84aa025e6d4f0f1d1e8b623fac3e96541765
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/19158bcfae716b26f924d67c4e719342
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/a90be57990b6699cb737ba96904e94e1f082601ca9d01e670f025b5500f526980741921c9cf672accab78cb5327714ab6ecdbb875174088f0773ebb627a98819
-LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/md5/ba75556eb96b2bcdaf73ff68386d3bc3
-LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/sha512/612fb765695b7aae11ef29608eedf8b959f60c021287a67b03a2a0f57a5814001ffa9b261c9d60d5f3d0582c06c2b41f75fd3afb66a045a248bd43d29e304c97
-LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/md5/2fcbceeb1bfde29be0cbca8bb6718bfe
-LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/sha512/58f281cfc70b3f8a59cf4faa7732824637c811ddc5ea6a058f294f4c3ed4fa6c8ddab5c007567b439f2854635cf4fd146284059bfbc73e7006000ced9383f705
-LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/md5/153c028d97dceb6924414a7a9a137e1e
-LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/sha512/7ae1f197600eabde9036ae58623de34a6d25636d7861777e324eb97902f65e26c6f3775e757178f8914b0cb6c2e925413f5ffc6abc9b6138470dc9e67a17f212
-LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/c08a6cf3e1baf156eb05003ed4e9ebe9
-LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/f74e44986622329990842cb3ff549ff9254c81863d8bee468b0e58b7621067e7e7f7f18e4cbeafad6a05e0c107323de6828a78dc7afbcd7cd1892383ff417968
-LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/caf151150e56827be09acca6964d2b18
-LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/cb3e7aa71367ec4a115bccc2e8ac6bd5d9f22b3935b3889eee1fbf7303c5f553d7d3108977bc1f6c9b6917a6ed9e10bff211fd56b8169233ceae287b112894c2
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/md5/d95874cbf6f8b55bc314c3968a6a4563
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/sha512/4986a8d9cc9d8761a99a4f02d017b424484233d4cbe2d4f49ccd371591384b1b8d1c4d31cb908505b86b00f2b164568e57751dd949d91af203ee4a582971798a
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/md5/89077d871e15425b1f4c2451fb19a1b2
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/sha512/b65a218b05ade2e2d1582188897b036a4596d09cf65558f178c49c1a1a62b7d992b1d99fbe86a027dc83b614f178e6061f3dfb695b18a8e2b6bf76779b741d96
-LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8829dad5e34
-LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
-LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
-LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
-libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/c1bfb47e9a53cc612fe98505788e1838
-libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/f16c9f1faa3e959d486fbb109add976c2a2018597a0b053ac3168abad074ff9c2b23874f8969f0a71c6551c8092082938bcc35ad846913a0a9965dd27d6dc876
-libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/cbe0859ffa50e2de82b8fe86c2540f6f
-libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/e864e7d62eb3b62066fe14210c43b79dfab704f04381ba29fcfc2a2e2b839e8db2ad3f61bb257b64cb6a546cc45e95195089e8b734425d9d4afa3168211f6762
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/352f8869f53096a566b387b885a74918
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/67dc69f8b327791ab77d4082208653ca74ce2cc750d9cba833cadf4d0f311dba73dbc951d0ce088a66b06321f7addda34bd5705a6c38d4d901b5813b2d1bd37b
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/220916b081fea2190e372df195daf13f
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/28bc05009335d61bfec33f24c89e67412f13760de72ea9acf7a12b2abf6d89cc3f3067fddb4ce598031b054b33efcf6773b4057d5adad830ab15c88fdbe56955
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/2774e9f2922e087d06e0976076d3ecf3
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/2aacbce77120fa9d24fd4026220e610b70c08b36175dee70f718f4d023b0ced9f8ae9dd2d58e35b61db7ca77ae337ed6f2da6a0de70296b4160a3f8e99ecdf67
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/63801b5fa51c2e75abd4b46f4ab1046c
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/eec9642a9c000d1aa3d298382a5b7c66caa81714665c7a405b416818f2e7a0cf1bedb81bc2a650452424391fe57061c33c2559abfc55bbac9b58e19d82131d5d
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/b3b3975a9a00b0292b9ba4b7fdf5e757
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/c886fff05f76053682a906dd94c6674f072206f37781b1025ec8a977eb952e0aeefcf20d76a3411e54782a6425667ee3a373f0a48d5a486fd4f37c02b0ecef78
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/88cf748f1a8086f949cb6217fcdd40b7
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/4e3d3cef71062b002406afb923f3d16508206662c3835242bf522cc7c881ea236695cee6add1b1f85a0b2708510dab2b59eafe004e67ee1d87a5970602a9d942
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/dae6e06bf26505fff786d0187cc5f90c
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/ed76e52223f84dd8c1ad7190341b167928493c2c617968aa17266c274527d18348865d9289cb82dd1c0d12240220750ac31e6c1354ddd9bc5ec2e226f360ba87
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/1bdae6507ca26b09a81c3b5b89f17908
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/b2704c0ef478467eb0fa21c7b436d6efc9602e8723bcf194dfcf6b3ac33d316b79de66c0c1c291e92f45f5bb09b6ab579a45782fa1ba3c03192177aaad6c29e1
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/8906c5b197baec7fc795256b92ca0a75
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/b79ec5ef4e59b0016784d31e51a94c9b292d19c36d66eadcfb3be6579244048b2650206836b4e017a63d84d8a0c72cd487f22ea08fd92f5b5ab4cb46d218e1a0
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/bd81f6f43b54364bef1e6486c17e3dea
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/892e4478e672fed55d63bfbf20a959b488e1cfafa332e2f1743cb519594526b5e0f2167b67636714dec6f43c76dcc0eb0bb2775eb43e4d898e63a0d1e78e9c94
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/6437ac1aa63c9b83c72238f4b0eaca00
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/f5e2bdb0af587e5cd55a5a2f16bf551c0e0fbadd2d9232fd5d3b2b38cdfaa80920d25903af5d79cb52a45a703a5bc07e550ca07163628cd1a79d3b3dda0d05d1
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/5616fc6e683ab133ed751d60164ca894
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/40944ea809c3f4000038b7b26e6297a5ce9d2710995c57b4e0751e74dcbeed9c00b1d89d0c75bf0f0d9094fd4811f5c5ca0cc5b83f54cbe20c1b2db85de44d72
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/dcdb815f425a6ec2aca7f29f601a73b5
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/3619419dbc81807db63e5c7bd7b237a6355ec60d2aada9bf26c1d38f10b4cb87a3cb3fc9a81e7f695ed7a195d2c3c214cd9bf96d3ccca68422898be323797fb1
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/ab2250406d3a69d68755b77b79b61f53
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/f5eaf02c7d19689a9cff2410269daccc00a075abde9287b025de3aff1d5b539b43001d1f2120f88c4c149af27eaf0caedb2942ae029550cc822e6af103b32960
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/77576af5b13b2916dae4e7e24760afec
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/1b3708202ccebd47aecca5a7c6396799ef14c4235b0904d23d6b6b4fdd91fb6b13a1627f65211ee0283a15d96b8a68cfc962d7aa2ddf75c08f2670a767c6cfa8
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/81277b7fde4cf08293f8ca956417fe05
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/72caccf9933e1790bdb0b6f6dc1ec5da6a84a5fc06336e29f2928142f3182261afd39477be913427d65655c40ddbda5ec5042c360bc49383e88c871db19b018b
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/d326fe9ccfbbf179571fdcd684bb7b80
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/a34550dcbb416f79648a5c4306775f1aca041c4e8e3b269e94f960ec0925d5b7cca0ed1934b2b63b9f4437d304d658adc6c0d3e0169c629d50d7c0b5051dbb04
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5ced197907e87c470e5cc1ab08a7eedf
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/b57810b718bbfb1ec48415ec3e727388bb647fa3768ee191d81fbb16248edbde9332086d445ff57ad53e9d62fb9c8fb1f8be176649350f5eb57c744404c63cb9
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/4d5133f794e0b53d563ccfc10ca42e98
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/9fc7bbb8dee022304c4aedb930318db04345987bb7ec9b78c3d488a5616680738ca2b9a9087f60b7d6cc68650234295d18c6cee4a45d1956d2240791993fe45a
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/e5c8eae08bd2defe76e0985687d6f057
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/d632971cd93131b90d5a26fdcd8a262f2042a2dd59a09c82a8523558f2b292f9a3f285b0a6276f0e6b255f34d855736c0bfb9f426488c5929f2abf6c0b921b73
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/f0fb4b9b0257e0ead2e5aeafebb64214
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/1993c7d6ceb7efd93f2eb21379c046073b7d9d2460d6eab5aca26cae94bcbe07658780a2f6382a052e4d64813614078b5e582a933a0bc9a5d64d8388df98ce69
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/e236983a6c801d33ead6f60140cf1ddd
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/c6b44cd0d9139e0b1d47c8b17e9035099a6b360f873a2fc5c6e84c1c97dd455510f4f4262c746b47910703158fe0ceba0d19b8e6a61117d9723346f4c3e89004
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/c3ad2f3774b9b7651078fa3b2dfbe7ff
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/009561d4fecd65e35960843670048b79e70495c2cfc80a7c80614f253bea7ca46d8278d338bdf7719229fa7eb9f02299bf8bc39ace683b862ad005cfebcca0e7
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/6f8d226436a2822eb7e0f25d1073925c
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/b63a32b1eb4a8af210f6a9511bcc4c90ad39091a6b2c50431253f4fe5e1ab304b68f79e71fe55e173449ebc96a3395dd1ee55a9a8cdd289950b609a5bec8e722
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/a618c88b200fa25434e969a168b93a15
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/940d6b61162bdd2d9ab5c264c9ba71789638fec646e62b9204e9304c8244d10c8a5ea3603c84bff76c5471e7f3184a21e4d1760bfe05deec80c8126a7207db04
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/8a4e4c69ff51c941244d0765947dfaba
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/287e59ff6e8e81e1650796da7a01be568b9ef024eef0505eaa34cdaf4cfd8d798596e9396e48fafa39acab5e70c3a41129917e8ec7d625f9acb896bc4e9e7b5e
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/3f131f5c0e11db8de1e0268708ff17c4
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/04d1371a970694c234880ccd826f6a75615793867a3ba1fdce683a844cac3c9d33a58d34163bf2141624dde71f3af0e3582effbfce679ad2134894f86ac3ce98
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/8636def624785ea4b99d12c0d65bc0c3
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b8ae5cc249664d32a8dbc26a2bf180a782f51ba69126d099bb239ee94afdca7b8492a7458971cc91aef0ca55a1ca38d3bf3c8716234ded81319a2ad5ac082732
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/bedb9f6540966fc382de1a4544ce8c9c
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/527ad792c220e491fcfb46de81b9b15cf4f6a1d50cfe4435296e0f94ae4d8e53165b6f634f85e95a8c7369a1e7b3788d1683fa77b843f56dfb1264313f80dae1
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/62051888207139e47c9a0694cf4de5c6
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/034e4e272d09ae8f573d3a7e591f93dc551651c7a32e2b8923fcd7fcf36be5bb491530f4673cb9bf39a54c1527cc3e3ecab64c79e3fd7075209fd81f32f7f4f9
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/8543a076a97e6c72e7c514021ca5f121
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/fc11ac25945adee135ebc523fe3908bcd5c5a7aa4c2a405e3dba61e0fb59502e5aef3cf4982502da7f7ee1974bcee8354ac675e0e0182f9319ea20c299813a1f
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/6247a9f59f87a2b923aacdc0a7c128ca
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/f13adadc653d2f8442c8ee4ecca9563d6cad5f958abf2893d8a3eda331d9ed8c33cd4a52bb721be811dec66b3b5566f038bfebbcfea620bf0094c305cd3aef0f
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/873155e60b133d597cf8c40169c5745e
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/a000e1fe4698d5c19bf85b048bcf76cdffea191ee281b44ffbd83230de5dd93c9efb564a51da082df070f2358d6dce423bf0d6023836217c5b34d563844d977e
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/17467f361317ad56762b7e455d869724
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/62a8d601c8db178cbdaa57a23a26cd65a8f3855be40ba2966b445afc9ee223db2ed6c2fc344ea98ff129d8917c14f34ed93158633521780d52763fc4a4f2a799
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/2c094ecef656dc6c9317038b0c5a47cc
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/af5972750be3de00df275a0f03c9c8a3b487a040f9bd29356457bc18661ffba9b3aa909849b24ae1c518fd2975a9b687c33353ba927f8713796a6c8eefa6e509
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/c10497e3f473e80e309d4c6102fc194d
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/2349230301cbebe8c7a7d7054bb4e60d991e1798dbb8bc6b8cf73350738e7058a9eb3c1067ce7d3ece1780e360080d00dd4777359742aff924d2db5c463f2a8b
-libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/15c99e56a9e8ed664deb2d6aedeb7ea0
-libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/c7d3d6d33f0fc0cad0394c02662bed2dd7d5389a6aa21027d7ebee124c3c9f5910316c44bd4485f1d45c6bb9fe12775c697a176602809bb52c8d3cfadf4f2737
-libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/b8d748a34a381d085c962549612a212f
-libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/02afa1db42ff68a3ea0443ab539a7c613e5acb6170f7849cce1d36969ddad36e7546427bc55cd289df46a5fd8e83477b70941b8fd9aba0717dd861c84473da49
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/12f825c1c1586a8f7c9ce56e243b6bbf
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/f6c9cb33f129f1ff95800c0c88152d27e6de3fd78e01b29d75a80df9fdd8d95de70003dee0df3868215009cf434006223b488c64d6eb240f1e18799f529e980d
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/19d05d46cd97714abd23b668693afe4e
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/deb786016835fb34967e474235b1ca9c2e9f0258c88394979c41654fc4487ef83ac622f1e049aed5d83da8738b8f1079b3dbc67ca788f6c68b432d7007b850e7
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/0fee1aea27ac30304228af1f398dbf14
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/e14eb6fad8ef734efd5dae610cc1906901b389c7557853e7fad27c4cbf6c06614996bdd5840ee3640b9fcd8a870ea058c212bc978b6b869f4594cd8b06b42ca7
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/dc14c7faeadb0c42f4e9cffcae8c7684
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/10ef07d1e1fe3bcf8bc52da169156ad10de7b3bd54f16bf1d694bd243bc4c86b4244643f1a71fec94b024ffa2e605141eec9b10e6e65dce5d96aee2b454fdb6a
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/ee90487acb75a33b77f24fdb075402f9
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/6bc8605021dbb23aa71636318468a1f81f8dbf7308d637f551132700634fea208d24608c4afb28a9609a7a866302597f684d204f718fd8cae10a616abc1b7b0a
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/2c96c511ef55496a1044f63d4fdb096e
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/564202d6cd321b8b058124c4623bfa7d7310a5020140f194bfecd44a25490ff9590e661bbb838b1af4f7e40fc15f88363a1510d8f7a2138f8ccc52ad76700506
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/555ea3150d5eeeec54b1d463380447cf
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/9da05a39e8d4d9cffffe85bc2717e105a47137682ede9cbbd2f216065ebdbb6624b68a2e120a1b87247838276cd8a501c83aec63c91673229bde8d207f651f4c
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/a1f6daa0703ddcbc87b8f9d17c9ad54a
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/e803ba34861b600b350bc99484adb619bd75a82162633e8d80f1456a908d42d95842f194a6752fa43e683c26356592fb94b64f7823b64edc922aca154d970288
-libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/364b73f29c1df14d8b942183cb113dd2
-libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/c4966e3607314acbace4b31dc095b81770ac3414ac1bddb43084443191b92b2b96f6702177dec76b70be12f7a3af4797c9692cf872ea7eaf60569dc7fdd92ee4
-libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/d4aea085c08951e0facaa553b3c22a91
-libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/cc5cc36d50a342b5692144905ae52676fe9ff19054245152e3fff02276250604009881325cb5ef063f274b51cb2b45dcc88db0a929f6244d81cad1f241bd0c64
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/5cdf36e1300bbc9b032bebe5cba7bd6a
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/c732ba652aaf7a5f6aa8cd2f39088d83b78d2fe3121c4e2415bdc935b0a3ccdff7f028d3ef50f0b5f7bccff54f1fb5acbf970fc28301510d09b3f3847556c613
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/c5b335f634ec9e663a7c5d54dfeb1967
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/51c7b1ceb0e235d9d7db9727eb7744cbd8b2e51e189c58bfa6d3b65bc4b6e7a8224e8b7b57eeeefce01c7f65a4df48da97a975dec61fb000d83d23d15737728d
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/822be345af871cd1d5e595b2a83bedf3
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/fda0ff71c7a26e783436da214acc22842fe73df1f9d1d526955f4acd0794c3afa8722df8e4c9671b11948fd96e4c079fe525c9bf3e38b5119a79793a22baf16c
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/1201b56c0dea9d1fd2a5ceb4d62f78a9
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/550c041f495a2d2439e6c4abcd4db6da06702d32046f6574f6a595fceea467ebf896635bc70d9c3e41c99b42404c87d98e3cd76a34b0f959d21284e3e4f15941
-llvm-julia-15.0.7-5.tar.gz/md5/1ffb5b00586262196d24dcc7baa4a4c0
-llvm-julia-15.0.7-5.tar.gz/sha512/5b5c93b4359cee649974bbdb5c3c191cff5ce5c3862e7cce00e2e35dd0627bf50e0aee454e67ea0fadd21c36065b7c1cae6e77abdd512fab70b71899d369cfac
-llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
-llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
+LLVM.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.asserts.tar.gz/md5/faa32564b64df641db723ff8489ab285
+LLVM.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.asserts.tar.gz/sha512/7d2826e54854d6ea02d1749f0ce7c6359111293921c1b064a6461ec5e49f37894449d61c120b02fd15c7993c60a2ffcbb9e558a1def504bc549e9e1bc9d887aa
+LLVM.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.tar.gz/md5/3b7db2cd82036c0a85cdddeb1e2acad4
+LLVM.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.tar.gz/sha512/cc108ddb4a3dbb99251b3fce6f03301915ad666d2850e1929552a1ea669b70671d9b7c4763237a5980b9d190711fcadfb9143dbd3e15e68e8fddee1968c8daf1
+LLVM.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/dca685ede5028a33ac96e4961dd6184d
+LLVM.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/f310fbe772f6283dc40cc4cf61f6719db0ae44b24b9bf5b886787bb4892cac35924abdc4a11643903c8d2c64bc3ead6d9d769b53c66c775f40f82c6823b38dab
+LLVM.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/52933a569085714a0ad3a1f587b69ea4
+LLVM.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/c5f26f724404ecb0e4b7e45c7a35faf5b48ab63849826bf71c60ef1cdf1e13d96456530bfbee1621089c23c77af91db3c70664bedec97e136d23b6d0fd732d8b
+LLVM.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/9782ed6a19fe4518f1d94a146413e99c
+LLVM.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/60384533510bc01b2ebcda9376dfca74a2df7b2ec22e57432aed98ea06105a75bdc028f108efa462eee8b98f076a72feeca2632c29dd03b31a3c7de7eb2beff0
+LLVM.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/59e312f492f2345cf20ade3562a8d5a8
+LLVM.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/3de774510a14365c6c1ad958559bfcdf30400fea17493922e910ff5c9c5ad80f0f336d40ebc4a61d9a42d0c897f31be06c255a4baea91bab862b0753533ef702
+LLVM.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/md5/b2f47e7f6e275f774aaca8b58ff991da
+LLVM.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/sha512/7326e88cb9c1d6fab64f5a2277af4447fe83df9961b34eb82ed5d5aaf9d58bc8cad3d69d92dea6d93b4ec4d59b1628f67b12ff3601af8a12c5948b9837682e1d
+LLVM.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.tar.gz/md5/f1fa3d2711de4f307f3809c2683c35ad
+LLVM.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.tar.gz/sha512/375488c9e6a2ba40255afc76c8415e77cd93762b5f4da4048a79e25159c5e982e8d6eb25fe74a5ad1f4dcb3fb001c37cd3e3748eb622225e191ea57399bd6a29
+LLVM.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/md5/1eb2955942a99ad02cd0edcdade59875
+LLVM.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/sha512/da519f3130e1b4d2023de434da2c5b07db5ddc3dc710b2ce5bc8c51b0c40d9d5deb2341494e55117794689e51baf02abf546c5295a606a9c5b24a24e6c04ead4
+LLVM.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.tar.gz/md5/22260a11b610497faa5ceabc56c8cda8
+LLVM.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.tar.gz/sha512/e73d457f420a6d56b52fc5c16b7ff747fd510ca0045a79a024697c21d6ce7be1bcca6385e2b22ecd24198e86db3a86a52aad0487f90444db94feeda205f65ac8
+LLVM.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.asserts.tar.gz/md5/180710799e7bd502cd79749bced6bc37
+LLVM.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.asserts.tar.gz/sha512/e2ca97930e6c76a754c79f36dfa877dd722b519de4b23de9dc0bec304433d71b9eb7c2ae0394fc4e8d67c2e1170012b9cfc6d446c1a635349ff8c757db03c296
+LLVM.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.tar.gz/md5/98cc1449bc86cd1310fe3d88d107507f
+LLVM.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.tar.gz/sha512/8ee541ec0a243760336a8825ee8ab9cd1799f244eafe9fb1f027979acca554164684ca3ee62d0567bb201cec426caacc53af8cfe1fadbcb8c71ca8470689395e
+LLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/d49ccef8994bb28cc0319416e4e65c50
+LLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/3142f5fec422194d2f4062b2cf14c65e64512e721416f76d709e0921b5e8e7047ab353d4064c8d4ce3c0b3eb0b295e031fa63a4c8a5f71ba6b00cc3939574b5b
+LLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/md5/2aa1d8fbe3e3848a2687c2d06b3fd339
+LLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/sha512/df1d4c0e2ec739d9ff6f1e012d73816b2d29dd443b6dc59db137d7b17ef37b215f9fcf6f033e2c09559d611184785ac0554169c30ef5a35720b50172e4e97156
+LLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/f8dc0050a6aba5eb26fbf9c6e769764e
+LLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/e480824911dd35a2c840a2345c456b68a981f7d229767cf86207b094a1353f7ba890d3af72bfb6464d53b56f70d4822d1499420233d5b7f7695910c45915cf10
+LLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/md5/d2bdaa66d1ababc755324e42fc352e5c
+LLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/sha512/e2152229e162d2a125db0d23cb2b8b97539ee564b23b2cc4e650599ec1310922481be349cb0bb56e4353dac7d0b968551289b47fb72e81c61863329e7536d2ee
+LLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/f622b9b7c677524aa04430b2926e48ac
+LLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/637dbb02f370020b2eb1399654906b68ec22a0e4453851b41954261cd7897bde42830d1d687ce021437993eebd9722e82f335a6bcc9c08f3f8dd4241cf3c4365
+LLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/md5/b6db7df215fecc9522bd4d87d54b0476
+LLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/sha512/e984c402fde554d762263593b64a9872356221f5b1eeb386f173c5710d9b12d8f28c418585fe52856d2ea22e1f1aab325944cd3d1cf6577ebb5c1bd981a1ee92
+LLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/a5707f551b55a32467f1b4f439250b82
+LLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/0798b890d22c8bf7b597ddbb1bd3f5a40e0056261ff701a097f48793a4f7af56dfb6d9f3ac1d24a5644ea80ac9ce3c5eab46945361014f0770ad07c83afe0465
+LLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/md5/b100d1e296c9ed6d7568d8e54fadda6e
+LLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/sha512/b99bf61e6fcdec3492d3b952e553f01159e06ea1b0087fdc87984655b703a4e3853bf4b84016c2d1fffc9571d3419d89d86196a7d9845ef0addd4331b79c2f89
+LLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/085850199abae5bf880790a589889653
+LLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/9a8d901eb0cb8f145f73ff37d371503223cb1ee49bfe2bb0f6a1c9dd132d83c4e35336cca5711fc79517300da05d17a466775b54eef0f50e06ae76febc6677ec
+LLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/md5/678e199fcd38684f2afe0853a139ab20
+LLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/sha512/3f33123437d4f5b558b0ce6a25de82c51ba148b2d8cfcab0bbdf1b67d2451becf6712c8ea23436083b129900147d214b004688e4c698f0450422193a2d36fa50
+LLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/c753e667a3a970d64f8c571e75be6a21
+LLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/6e586dbc8305bc98a5d11b2b306b39a0e9362169667842dec56dbd58bc05ecaad74d56bd412874d7182f8ba1c091a101cc9876ccfaebccb0ef4988602fd24b78
+LLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/md5/cea9e681fd1a66643619cdb6284e07a1
+LLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/sha512/0fdaf5531299055e7b0a34c2ea952eff0f2d4d8bc51eae725fff1cd9986cea0361bd305a2c1cd1aa228587fd01fe2ec4b50fcffaa69367da9fca0db41bd40d91
+LLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/84fe5a13dd73706a292249ac134b3a72
+LLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/f1eaeb2862f445525d15ba7026d1df69abffc25a2b4d31f3229d005d425e10520a2755ef845a148a2cf7c9cd790487fdc57d78b7f613e5e32d3d1a42214e7cfc
+LLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/md5/e69a38a3c901850fd134e1f2dc4bdf9e
+LLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/sha512/56250c18ce837a2e9b13a270bdb592b55cda704fa39b4c7d6f278f436a15cd1d320d3ed9695d461e5d801b726aeb9cb6e9c8a1ca069bfe4675d48084ba32a00f
+LLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/b40a76f48d11d4a35d286970d865a535
+LLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/4b4181fb3c2c810a037f463590980f44eba710caaa235a5231e05212ff9f0bd5e361790242540c960bca42edf2e3ca8c175a997574459dc9394e09d3f0c073c0
+LLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/md5/71a639d45730912cd92c260ccbb80307
+LLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/sha512/01c3558609bffc3678b2f79b19384214fb6ab151602371e62c70a27a37236739e83701f1494806e2edd31a8ff21bdaf0e34996d5041f55fe4c0eb427027e7126
+LLVM.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/d809573413dd666d65aff02e6eface08
+LLVM.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/4a248dbfe7211c5e9d38030aa7ffcecc9ff1461ed94a8442dfa7ac74ae6bfb2e0d3d5abe7ef7e728a421460feee1c09fac67f3cca30268160ad5f6e0265df327
+LLVM.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/0cc7eac2313f8abd1a445264d808bc8e
+LLVM.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/e9ee7de057ab26ae7f39ce2129d6c36829c313dd01327b6998588f1dcc754bef79b6df6922e8f6e4cd8c50497dfb9fd8093681b78b4a3e38f167faffac4d45a6
+LLVM.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/37b2c5de5f06d1086e76df4cd811ac43
+LLVM.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/6a7345aa1c86ae823eacab68c859329a98a91bb8c90ed132fb3bec4662c9e8e878d84c1c58db59b070d820243f5ea2d8fc5e986c0fcf469be8ce496174da63d9
+LLVM.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/0162cd2fd47318198e2fa277f4dc3195
+LLVM.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/32045ec153044c7a8ecff22b374d04f5283ba7b7e497519185c861acfb5e3649fce07559afb5c2e6485fd5d0baf3f30f3d4d4ac0aba198c1a057416ae8a8609e
+LLVM.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/md5/c3b15eae2d940e96e0fb33e083b37a2e
+LLVM.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/sha512/aef0157b6d1237b475bd037c1c05d1064f09cc9fdbc1318dde5dd9b67c76af8cf1212df2384dda7cd65356a68cfe39a9f8c21a76db5c2044f58c54d68e8f82cb
+LLVM.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.tar.gz/md5/75e276d1865b403909c7d8f19a45cbca
+LLVM.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.tar.gz/sha512/ba3bad5cf309e148b24d082d205e26db61b6bc163ad8d2afa5218adf13a078e1eef08c10154bb31fd860b7496116145b69c5e47a495c221ccf06683d22c05be5
+LLVM.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/md5/308fa6f521316d8d537afc4b8b64d4a0
+LLVM.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/sha512/5a912f6c09efc68952b9d7cfa73f1c8e5888d86f167ec0792c2adfb7bc6e79d3590ed92ed661a70fe92c4177d16e05ebb027bc9d619c4d2af4a37423157197f2
+LLVM.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.tar.gz/md5/bd9139b4a13eb3210a9d6f3e12e630d3
+LLVM.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.tar.gz/sha512/5bcafb9948544812fe9275d07d6f43ac09f6c54c59aabf3c996accd08c45663bf965b937ca6a7d38582b9d6ea9a3526cc56d58f2d396a0ab82e767d2fab75bf6
+LLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/9289c14cd481b19446fc616ad0751df5
+LLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/101e93ffc69697959e7630ffccabc23d9a30657d48f1befbe1f323cc726151a5eb5409ccb177c5d18394d891d372dc6e62febc7fa84a262a44113e0118c9d9a1
+LLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/8e6bc1fcf3d7ebd7db77179ecb9846bd
+LLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/de49f82aeca76bcf97d6d4bb5d1c84451120ce6a69d4695729c4a66bc8a13df49e25d6d499c09ff956a3f46141d8e5f5df8ef47d94fb0e5158ccf3aaf8d4dc80
+LLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/e1cd3b3f3a0862e3ce46f76ed15a0278
+LLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/995682137e1ab48ccb64b1758e97e72e3ea685c9aed23dbf0ed9da847754e1d3492902238575f8263bd86a08e621f056f4e2d00844d70efed00caebeacfff3d9
+LLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/1ce6a8ea7b7c1237c48ac231cb67fbcc
+LLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/64c90fc5c98117df7321e75b9792ab235752673f61f9d8949b8796ed8ad4d838a58c5782a13515ff0cf8d3870f072e412fe7710ef5e8f87a4717683c46f3513f
+LLVM.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/0959dc7edf0ad3f9ed77b14465c05507
+LLVM.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/8c6b7113f130ceb08936799c647762962cf08cff92058dd7efd32eedcd8434e3399943dc3a7f56bcf43e8480e1d9e7dd4d3c22a860606334cf4747f1b9a07a57
+LLVM.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/69aaaa34ab491c6542fd0df7457ca1af
+LLVM.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/92908bf56d3afefcb0895c5daa89698d7e00dad5286eb757dc067a27392278fe9e4643da57cb14e7b5f9cd382d2e9ed5d52b03ce9413b357cb45df9fafa90a71
+LLVM.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/125397aa59f7f80f5f717d76dc691d9e
+LLVM.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/bc0cf9a894da7489d23852b059036307c43ef8d877606356eaac808dae340c1a9fa2f087657c024bf39d0f3b103642a38f143587984b61ddb5ab684f854fa188
+LLVM.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/e667f4d2127cf90598372a2318dc968d
+LLVM.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/0ec0adaa528eb7528f5adbfa3449fd50767b71e3900945002b1b1a2770dc0e56e590a7d59b20de7a68fb005d58aacb3bf20c1205a42be87dbf378e63bc284b57
+LLVM.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.asserts.tar.gz/md5/64a70932bb352bd4cb6fde33422e0bf1
+LLVM.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.asserts.tar.gz/sha512/a61fae9e8922f7bc8ec718c8e11a05c13f0048e9ee24d1a1d8650fe4281586568c331e027511a93bbe43582e23525f0984201f313abe3e842730aaedf64c5f98
+LLVM.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.tar.gz/md5/09a2c6e96dabe9477e430e3500f4806b
+LLVM.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.tar.gz/sha512/a5daef0e1cc64593af69256bbfeb2bd2dab6ffcfc1e1b393ab0bbd2cb6fd9a5bebfbab50a8c778ff605e4489757e86c880aa4d44af775b69610d2d2e56366dfc
+LLVM.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/fcf99cb4e50c2411cd3b1e6af9c3f6ec
+LLVM.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/c9e7bf803201f0f80debae20f1c29bd56329b58ff6aff51f0b3dd22e482377dfc605aeb1ca7dbdefac648172185f85e430635240f18ca9a8fa025268cf741302
+LLVM.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/d710dff16e7a6c7ba3edfa96452f8559
+LLVM.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/d79bdb5772ceb355744856da0d163c44d32c399034c86a44b6f80a3799e3671d7821118bfa2b6678f81839a53d4e409ae5cebe935add93014c077d5f85043fdb
+LLVM.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/a76457c3e92c371c34303e87f038957d
+LLVM.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/786282287400b90632f822a851c64d20b50e4d50097410a87c93b95d37ff023d55dcf77f303851c947dcb9ed9f9df3618717d23494a89837d58f73900f49d6a3
+LLVM.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/cbba9333c95761fc7df770bdedb1a1ad
+LLVM.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/14b712bbc81683ce8783d5be4895da4a6239910e98a23471ffe148495fa9cb30314963d1943bd00a3a7294c68dc9539c88c0b449029c2f1402ae17076056e9b6
+LLVM.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/md5/731accb405be9bff3edf7dc541e1e5c8
+LLVM.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/sha512/4ddbde78d57e105a4dd7ffbdc9a71f1633ac75ad0c1d61f03c147ec5d49a9d957d9c410a2951ee05a5c0fd6afb96c08478313dd97f6ab8d4158ac25caf4a77e2
+LLVM.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.tar.gz/md5/4867aa40e4e6eab554df2e33b8c4ad86
+LLVM.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.tar.gz/sha512/a66cd17114a1e172dcbcbd86c6fe799b55b45a0bf9225ae92add9101f0172a6bbd798af9e3882bcbd038b4062dfc8eccfe398436301a7b2144cd4ba7511e332d
+LLVM.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/md5/b2bf3660149b1494a9956181943a1b71
+LLVM.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/sha512/d4d1d0850527e966bdfa55c4c9ae66d04942b3feb404bb74ba9e043aa777e1d0ac4a87a0e6946af67fad46273af69f57782e5a89d81045ddfa725caf89af35b7
+LLVM.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.tar.gz/md5/ec8b494606291af7e71b811a295dad72
+LLVM.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.tar.gz/sha512/8510add18ad2266fc3fb741e1d75531111e920c3a5bfcc15037080bcdb72e78cf701b49e1790d4024cdd71f739932da47c7c93f80d500d0b2a176f6e1d2ad4f7
+LLVM.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.asserts.tar.gz/md5/af227b9fb123b3ae4416f61457ece83c
+LLVM.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.asserts.tar.gz/sha512/b3486b9ee2968b819e4130e7743e1342979e2fe9697c800a7cac882424efa45676d1f16d56261f670d453b98ed961d6b50d61ae814f0d1ed81b836bc71801e2c
+LLVM.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.tar.gz/md5/70bff6ada8885f2e96a503712dcf8f47
+LLVM.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.tar.gz/sha512/79dda1460c5a30b919d1d3c696bada45f7c4e6f818eaa350f052f816f643455f9966f650301e84c3b10b38421328dd26b23dbf48aa9a4df660e242aa4e77616d
+LLVM.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/md5/d1d6f0d8877821285581d4abd6f9e58d
+LLVM.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/sha512/d5133e1387ba633ada61d592e2c9db0b546979ff4dae779f1d6a207786395a1a869c0f340d3a5a1ee7081bb356215f8f308b6b620c46a03ca6dcd87846af8398
+LLVM.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.tar.gz/md5/6d9331a894bed1edc900c09c89ea5ac8
+LLVM.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.tar.gz/sha512/789cabfa529e1a5c4a73e94f034f53bbcbbe6d7f5fa4be885f70f34d5430b7d85e6b7d9b88d6aadd56b0705d9a7b93a9897eba947872bf6a371660e4f7323c50
+LLVM.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/md5/6a2434c5a9cb4e6f0780c0f623e721c2
+LLVM.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/sha512/bf0e3974a4cca56a7a7da5139ab573ee6d211f19ea3244b09d629615fa952f871b54617423aa0e44724d698083b5db6a0cfaea02c85f5ab1345a55c72280c760
+LLVM.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.tar.gz/md5/f795b731c16504e240ff900a38521d26
+LLVM.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.tar.gz/sha512/0f515cf6e5aeb626def49ec11e545d28bae96dc8650ed555c752e9631f2beb6403aebb0e5a3e18799f265728865bce24c378f0004610a8390984db9972e54ebf
+libLLVM.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.asserts.tar.gz/md5/896f886aa5eb1fc087894c9f9f9883bf
+libLLVM.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.asserts.tar.gz/sha512/a61c9b6153cf5eb2d7fcac50fde9a51912470ab3f6cee9bd324c1d5a9e2dc7890460669e9a7e94b128f209355be66bdee92c0eb274e5c0feca1ce78d0bde45f3
+libLLVM.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.tar.gz/md5/04c9add92f77ddf8ec296f1b8a73673a
+libLLVM.v20.1.8+0.aarch64-apple-darwin-llvm_version+20.tar.gz/sha512/143b709ede5229462be1a7620cf7ffe134823368db82594c206a4855b5a3bcf115e608d8368fa3204cef734c1768fc9362390d255999a618d1218c1f6e9b6f4f
+libLLVM.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/20aa889fa4f22b080de037cc59619b3d
+libLLVM.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/b04846e966da18890628d39923b65a4f0989c6585452ab5a098f6d39e9f9ed9016c9d0bf2a65a455f5939a746fbab330a3e9dc390103de77d86d54eebbb489a2
+libLLVM.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/b3dbb1f775b708b45c5d97e20e51c78d
+libLLVM.v20.1.8+0.aarch64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/1c2144d4854811968d1ad28c8bca7acf812128ca067954ff67e4cc4fb2218f2cdc139391a6ed36e093db2a7a058948b47b4488f9045f9683fb590ecc65ea59fa
+libLLVM.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/8dfc0b6f5183b61de7f5433374f18d56
+libLLVM.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/17c8fce0b86e7f23006838e5aa3ed0ff4dfd684a2dadbdb190fdcb0174a022f1d648557a021ab1c338941c488efbd5480d0b8821122e39a13d3de16c0b5d389e
+libLLVM.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/9f46eae254d0d5e4c4107a36406f85c6
+libLLVM.v20.1.8+0.aarch64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/2d128bc3683af633d8b467a8d53f9ca857ca167979bc81e0f73d5eb44af1da49b64536e75c991b5460eab6cb35a2947d3a7b0b984af19d3b208b609cf25c1a77
+libLLVM.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/md5/03e5e17828add3b628e4359aa8bffd55
+libLLVM.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/sha512/2d0e4b71821110d740308caa2cee6904636dd7fbc82e1888b186aee2531ee54c4778e700fa41ec64a1b9e725301f7618b4e9ae4fefb71857d51aee0d9fdeb04a
+libLLVM.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.tar.gz/md5/3f6457e82d4f0d85a7f6c99348d23f98
+libLLVM.v20.1.8+0.aarch64-linux-musl-cxx03-llvm_version+20.tar.gz/sha512/299a8ae05950d6f3d53514c9685fab876fce59c3734f4358e7b3f94e7b83d6e50bdab93c4cc3ff3658bc4305b987dbf0334c18addf33c29538f8204082315aae
+libLLVM.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/md5/dca742c368e2c35d51e1c0fe7fe14783
+libLLVM.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/sha512/98898d63e544b32fec17982c58564df335af40570e1a16cdc79fe7314ddfa576499ce691aad66a362c8d15de197163fa254f6f894e1e0d6cd9929f3bf551d025
+libLLVM.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.tar.gz/md5/4b1e1aa21688b16c1bead5b010663d5e
+libLLVM.v20.1.8+0.aarch64-linux-musl-cxx11-llvm_version+20.tar.gz/sha512/48f1c396b6e35e07b7d518b82c59ab7297a0b823000237267c855a8cd03230c9f107a72bb9938e7b7028b8089e2199dd56f01bec52b102dace9edb4475fe4d49
+libLLVM.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.asserts.tar.gz/md5/d040e6d78e978b2c5bf4c1197915e4f3
+libLLVM.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.asserts.tar.gz/sha512/8f06fab8b7fd63be8553a3a7c7d4343ef3849fb98561a8327f2c1444a76378d7a5309a0f080a1e7c921e4f94922a53abd5867cc67db525b5cfe2c2d565c61e7c
+libLLVM.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.tar.gz/md5/cb0dee5837c4aab8c9b93289b1752e7a
+libLLVM.v20.1.8+0.aarch64-unknown-freebsd-llvm_version+20.tar.gz/sha512/be1cd4ca802cd8813df60329f73412d5f9a71099a0721b1e937810bd77e8b8e873058675cbd9d712f217e633697ec9abf74e8fd6e24d7bf4235bc42ac53ab8e8
+libLLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/c378721206529c747630ffa9f9607c6f
+libLLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/57c3d5a09a30d8b04c3441bb534f53e94cb0ac4aecedc3eb3ea346323c54f5fc45c7fa502ec098b116bbde638824102fd706e7e8f34be6474946c43fedc471b0
+libLLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/md5/69b490ecb8b353911759bd3dded24129
+libLLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/sha512/186470ce63f15a376eabd666db86e1fce2d38f50013cab6179c5fffee1544e6f6bfd704b420e1d4bf224082bd3b740cbc51e454f00a8d4fd563a5fc7ff751546
+libLLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/a6d2af8021a3f228ad521b00d700e0f3
+libLLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/5cf67cdf24e0e8bbbc31b8e8e147f68c3c244f8621d6ab016c2f5ece0f69453acce4c34b7d731575813c04930ad08898f2a53fb575e865fd8d3e35246d49f0c9
+libLLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/md5/29842948af5a3b7ddc61fcb0aecb46c6
+libLLVM.v20.1.8+0.armv6l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/sha512/0ac39a005eaaf9af2f94ca9340553855ab6a4b4a19cf6aeb27d48197d3d42c421f2a908796c1c534b76970fc45781db799d223d8da96ee2d56b8c18b64ff8237
+libLLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/f72733ad05add6b53e448a993979c12e
+libLLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/9a0da3e715b990d8de8f2049235c4fe851b52b2472e916851b38f9e7897196ba7c30dd71bfebff76d8b2aca48706224203c27cf37561a775f5dd8752d6cf8ec7
+libLLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/md5/74e21fc2401f33232ec08c979dcb971a
+libLLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/sha512/944dc63c961947fefb9714c591b9f1da5a7860d9625f8885a991bd085adfa3be51d4b021bf31131c89dd65f9a3311b082324c461744fbb8eccfbec6b563bc792
+libLLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/081811138cf70e880c7bbb459a21f393
+libLLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/06134ed819b7f1dd640dd8685a5f2225b245589e02dfa76bb69ce97424caa14f730d5965e6eae8bc4cee9a2b3b33267813a04aebfd835a7ad1f3296973cadb74
+libLLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/md5/1cbe30b01128a23950d076fad8c25ade
+libLLVM.v20.1.8+0.armv6l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/sha512/8944aa24162148f60b02222961c6ac8f6f97cbdc0bf1fee4495efcb85ab261ff22f9444f4bb9a33df22afff03b5356c3b64f26adfd6eab5850169c476509fd22
+libLLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/aa83346acfc15f1667819b1c3d0d4877
+libLLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/74ebec660277ad3f274ad3736beaa8bb8eae3f63322c8bf33ec5b1e1d1583b78d1df9bd30cafd9ee0be33bc1590b2c630b849e4330484529508294020881b409
+libLLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/md5/dbd924917eed252fa0281e4cf10e16f4
+libLLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx03-llvm_version+20.tar.gz/sha512/9861998f7a3b8eb63c291d7135fe11e3797e932156b2eb83001b7298681389ec7c542a50b66eddc3391a052d602dd95f991fb15a96d57429ed444f85fd4f3cfb
+libLLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/ae5022bcb9937521d6c92ab971527686
+libLLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/1d6b124a65cd0768d3293d0584e227df49f6f188388cfb073b5dfdfe1295e6950486d02b363cffc0ff55933298838396ea8615f9a4bbca534c58c4d1d075500b
+libLLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/md5/1dbf2a7d8c13672f9d2bdf089b708387
+libLLVM.v20.1.8+0.armv7l-linux-gnueabihf-cxx11-llvm_version+20.tar.gz/sha512/982c28030f57e32d11b7d57b22baa0968371177670b5fd64e94ee1abf506626d0d8627b39f09c5a4b1781698bb50f1b1d3d533f34ef505383ff9db21c3ebc0bf
+libLLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/md5/95b62ed08209f375edcc55930f41d1cf
+libLLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.asserts.tar.gz/sha512/bec47498a657176109ab1737c6e9ef4967c6d7efd22045fd888e8ef0f7902c91c51ae1a6599a6914c6cdc3c9e047bd21605291838b040d6591efa2cea9da2dc0
+libLLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/md5/dd93ecf3e9345e05726f25360979773d
+libLLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx03-llvm_version+20.tar.gz/sha512/3093bb3514b5e72a889b6dd1e5e548bf3d42c258a2df1ac541c8384d9082ff56f10477d58996d56eb43f84ed51da49d653e826560e9eed00907ad8b336494d9b
+libLLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/md5/e7dad9a37e504da82da0d30d68ed12e9
+libLLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.asserts.tar.gz/sha512/ad613c0f53d84eb28e6d3ad659c07821d9c1db1593bc1ec20b6d9b7b6609eeafb4fad0c54a9cc1712edcf1ce7b3d478c3fe8b5a560975d7ee101c9d8b5895300
+libLLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/md5/54976e9a92ab0cac50bc4ba019f567b7
+libLLVM.v20.1.8+0.armv7l-linux-musleabihf-cxx11-llvm_version+20.tar.gz/sha512/039df729c3ad3e1da31fe9777f31c38462895fb983e87a0df0f8750a73393f768cc48a956914ed6bc95703abe1984d56655f1301041a005484c32e45b22f1a5c
+libLLVM.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/55c6460d8d9aa298e9b439eb4499aeb6
+libLLVM.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/524f4eb3216742c13313166a2a8a68bb220932616f29f6675fe8bb17b8fc68a27a88abaa4747b5a81c9359c4667cc59d0c950164f9de1a661ee13a9e07d298d4
+libLLVM.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/6c5bf64340ba9f4a764510220223a2eb
+libLLVM.v20.1.8+0.i686-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/e7f8f0894fc87ef8886c95b6016522cff41b991d1a826d9d429c319dcedd74eead16dcbff778a1c28be7dbeb4f1d5055ac6ec3f8e7b4404284e002613f5dc6e4
+libLLVM.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/ea80fd6cc3a2c1194f6907f990112c76
+libLLVM.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/05564b2d030a24d0dd003c14213cffb16160d79077798a1844d51d1277d533ea6b6af0df037d977afff0eaa1975bc6287ea1d6365f6cbaa8a2371ef1c20aab11
+libLLVM.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/069fffd57f24bf668f9a7bedf6dc01c5
+libLLVM.v20.1.8+0.i686-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/e4f0e744f9dc53499e820a800a6982e7371840c29edf722282b569879a3b84b1e991e41b845bade0921d4ef963a62ff2f2ee99faa84649e94e5da34c2f966fbc
+libLLVM.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/md5/750e1118d4a58fe9a340e3436265afe6
+libLLVM.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/sha512/c19fdece337c20e0545d29f00d5b39b1a744b984ac934bd43cfe11a78c07e72c0aed2b119d72bc7c1d40d08bca9830680fa59e465b022754585e360c1ef24b3b
+libLLVM.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.tar.gz/md5/479130d81c3385a80900e30c9660d251
+libLLVM.v20.1.8+0.i686-w64-mingw32-cxx03-llvm_version+20.tar.gz/sha512/f492e159241145b0cffba4b476f6418f01297667d83c4dc86f7f97b433b226f7d6e0f3d51f489915d8d470b083ea7c64b53f78d559845bd2d7763ce06c407b73
+libLLVM.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/md5/12e9b99c59ed743e856ef3926bcb7b5e
+libLLVM.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/sha512/513a622269cbd6fa8e88aded1fc6c7a34edf162dfa358c4134abb4f3453905426b1b59d01c340a21fb7916737a51043b4efb6e2f59e8e6cf5a1737792cb23b44
+libLLVM.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.tar.gz/md5/206264cc8c74273ea30b8bee8b94c1ec
+libLLVM.v20.1.8+0.i686-w64-mingw32-cxx11-llvm_version+20.tar.gz/sha512/9aa39d1741987af6529e8e848343579d5d8144b21617861d30977c577047bc596ec3dce41fd27448c77da955a6af7da8556a0df4a17e4b1b66a30f37cd73c1b8
+libLLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/684b8a6f2b6bf515518e06aa95b0c866
+libLLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/259cf75537562283484a679b31292b8f75bc3a40d279d7d06e9f15c364614e17f096d8100cd018b2e9f27a26d4de1d83428e57fd774d92f669fd0d678dc0f11d
+libLLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/f90f1cfa2808a5f40aac32ccbdf32712
+libLLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/25889776532f7eb0e61bb85082a5b6232ee8faba37568ba8b004621c56cf64ca5cff0c706fcc206da0897617679957c9d45ad0d2f69107800426021ee1da3656
+libLLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/fed035ec13df5625ad0e6ad3bca9e1d6
+libLLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/ceb48ca92cbcee726a223c53fc779dfa8a07f35c253a662fe74e22555ab1b8a5a7bb23e17f6dcf855e9a7c04cf37f1ab20c075fc2edd472d2c245ccaf39197b1
+libLLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/a9afbddf63f9c2719c0ab99c79e41839
+libLLVM.v20.1.8+0.powerpc64le-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/ba3def772aad446ef9a27a72108b58702de29e04faf281e76e87d274aff2df9cd8479e3c8a57383eb0cd8e140bd8ed57135a90cd300fcf689545b116a0010131
+libLLVM.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/6576fa8e1a9aaead69f5a341996621e8
+libLLVM.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/9cda11f31ae261a2ea3e0375e1212dae8aea16ca7b9333816e5d4426dbe79a11058a692931f5c16011265f3023cc520f29366209dbdcca4531e616be5524d8dc
+libLLVM.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/116322c2c2f4fa2d2c728472230ae9b4
+libLLVM.v20.1.8+0.riscv64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/7a6a19cad953115db2afdddeda499cb20e2f12f74e80287ef9a03736f6140f7dc63d7cac8baf071f31874794628848c21ecccd93189070a5f4cbaff39ae809ab
+libLLVM.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/d6b006ebebd0d360fb67c355a96180f9
+libLLVM.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/c84e93ae1580544e49c090491ccfbcdbd816d4aeecf7a02edc8d02f733029459d64828dd39a3ff1bd66136c9ad26b9a848c62583d2443b012b69553964e6d2d6
+libLLVM.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/bc91bc457cbba5c11f499b033ae35ad3
+libLLVM.v20.1.8+0.riscv64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/fba17762b8165bb5806521a9f23784b12f3fc6dcd27ab386d685c8d94405ec2763358553642399741842fa8c1331db0213ab805c2f1b0808bc0a3826873af816
+libLLVM.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.asserts.tar.gz/md5/babb27648bc75c3d5f1b69b73cfbd4cf
+libLLVM.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.asserts.tar.gz/sha512/c5321084874826f251b4e8eab1d252de99f8e3cf4364b7f101af2d47d106607ab17070322892789311195c5a62a7729ca6331d78ed1902ff1819e2dce7df93c0
+libLLVM.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.tar.gz/md5/6ed91e2413934ee88f31e8cf8c913dc8
+libLLVM.v20.1.8+0.x86_64-apple-darwin-llvm_version+20.tar.gz/sha512/bf0b59e6a1fc760f4d66fd4393454607110716ae51f3766409694aa17af3d1257436b3703e4d1cdb7e95e4ed619f2b236ee0bd47acf426db18fd1c690ba1d9c0
+libLLVM.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/md5/c9e1c0c52293eec4874ad2ab843c15de
+libLLVM.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.asserts.tar.gz/sha512/b79d37b48eb7d1ef5a6c48b438f5dc485d0a94f0f1ed20accc9c172f1b8c3da4be72ffbcb3b51b3a72a1bf31a7bde646736c561b23e4bd01e903f84bfc870fc8
+libLLVM.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.tar.gz/md5/024c2345cbc7a576e5b8d816de679ab8
+libLLVM.v20.1.8+0.x86_64-linux-gnu-cxx03-llvm_version+20.tar.gz/sha512/41e35d1c8898280b9bc5618d82b3dee754d850168a97d14c852466355833f35a7d260bc6c37fe12c77c43e254db11013ea2512180fc1322f258910d3ea878baf
+libLLVM.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/md5/011fdf6ecf2b2313b1f9b1d59b4c3178
+libLLVM.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.asserts.tar.gz/sha512/1a7bb1d362d0d6dd60ce91616d1b7f2964be37a5bd2123bdb0534dde8e4a5dc3237887cd017c715002d11194d159b9d50de4bddbd8f278fb1ae2a283d11c1071
+libLLVM.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.tar.gz/md5/ed3421fa183b90081894a817536ab166
+libLLVM.v20.1.8+0.x86_64-linux-gnu-cxx11-llvm_version+20.tar.gz/sha512/0509f560c2951230c386989e516efe4e3d6dbfee97bc2c35b7c48fc651956cfea926800669c64c0a4469c6c32e7360816d63df280eac8f8c6c33ca4db8cedae7
+libLLVM.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/md5/9ce37b5aaccc4818e3658f9afbae0654
+libLLVM.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.asserts.tar.gz/sha512/3569a8f8c4297cd48450502a27d73c2d6a7655b358814c02f41d2e56422a9647bc98e1534b5014063eafe78b27099a43eaf64b2691c5490654a8efc1d58597da
+libLLVM.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.tar.gz/md5/ef2c9a8c4a15f6e734ea9d52e0ce1281
+libLLVM.v20.1.8+0.x86_64-linux-musl-cxx03-llvm_version+20.tar.gz/sha512/84aec13364c8ab83f17bfae66b6960be2fa11c9082a294b6c33abc16b071e61eb217503d97e870eb7809faf187475c5bdf4a61dbb317eb00e84925fa800af983
+libLLVM.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/md5/a990c9c47b03d02640b54677076911bc
+libLLVM.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.asserts.tar.gz/sha512/52e277ef91356cb62055d2ed6ad615a603dfa9af8b2f5ab70b51a48f5b741e992ffa34b17d6b4fd4b9168bdb8638b488fe58963e4351bfc5779a5c3d01ffc735
+libLLVM.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.tar.gz/md5/b63cae73c28ce51fe509f00ca2080bf6
+libLLVM.v20.1.8+0.x86_64-linux-musl-cxx11-llvm_version+20.tar.gz/sha512/a46a667dff7f4494f6aaecf76cb6b0f8cb66c21220aef71caf9dc6112519d93fdb3c0a99b951d97a6f52d3980b3593c2631ae2a84636362ca6b627a0c3210243
+libLLVM.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.asserts.tar.gz/md5/57ef8d4e1f17f80304d6ef88c27fe1fa
+libLLVM.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.asserts.tar.gz/sha512/6bba2f5b69d1d17f354285d0f8d9ffb8a463eaa516d59205049cb9c7b777f9bddadb5ab0891824b37666e9e6ccadbf49b6c47124a3177d0e4db21280adee78e8
+libLLVM.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.tar.gz/md5/4f6a088e5a62d0f22d83670e3a0a0286
+libLLVM.v20.1.8+0.x86_64-unknown-freebsd-llvm_version+20.tar.gz/sha512/583a2314586517d818c94639077ed56502202ee74c41c774e8c3912170a952069cbcd6521c30aae29e8cdc37993e9a8a85e901eb17de2d4da1b9c5e901351bd5
+libLLVM.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/md5/f7158f62021a6146a0214b5f58f712e6
+libLLVM.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.asserts.tar.gz/sha512/72df048b992fb5ff51e0c32089fa07698e00b38a924ecde8b30544c503a97228d773e537f9ce477c849a5632b87e677b18bf12b37eab030ad2e3cf50a4933a23
+libLLVM.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.tar.gz/md5/49abb52caf360e0dc642826227e65efa
+libLLVM.v20.1.8+0.x86_64-w64-mingw32-cxx03-llvm_version+20.tar.gz/sha512/fcb5a400f23b9b34f7850801c59740f0a42a17ae2c1dd23ae3c7f8e4c85ec2027987276e83b6b08c93b6e0a6a131199bfe1f6b73a80fe2608092ab04c910322a
+libLLVM.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/md5/381020dce6e2342604c57b76be89553e
+libLLVM.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.asserts.tar.gz/sha512/38600f0eb4f4135b0a2f2e19f2c29c17bd84efb7831dadc6ac0afc5544de0f20fef6b08dd1cf1f4bf08f9a36429fb5581d39824c1306e8a08ec20c01c7cd3224
+libLLVM.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.tar.gz/md5/c690cf9e09e420207bf56cafe5008e84
+libLLVM.v20.1.8+0.x86_64-w64-mingw32-cxx11-llvm_version+20.tar.gz/sha512/f8969fd68da9af4cf99f98ee8a50abfdc58624f16765d1aa68332dc9da9db30023023e99fb0107a89c0763ec4a8bf92e9c5c28aaddc74fc8119c8fe45e748d8f
+llvm-julia-20.1.8-0.tar.gz/md5/468acfa510e532af3580b30f115cc6d3
+llvm-julia-20.1.8-0.tar.gz/sha512/fa4e257a96f3c755593c989a88ab43dc8e3e7ceb5b7197522922ba8a7a68da716cec60a64f5638c431b3c88e18262f2b887adf3f68a79d8cd767e843a25453f9
+llvm-project-19.1.4.tar.xz/md5/1e13043b18558e4346ea3769094c9737
+llvm-project-19.1.4.tar.xz/sha512/a586f8a41dde5e0d9ca6d8c58e9ef2a2e59b70a86d2e2c46106dc31b5c096bb80af0cdbdb486179e9cc676a540099f49a1c2db9e5e84c50362db1f72e9af6906
diff --git a/deps/checksums/llvmunwind b/deps/checksums/llvmunwind
new file mode 100644
index 0000000000000..f2f4f67c64a85
--- /dev/null
+++ b/deps/checksums/llvmunwind
@@ -0,0 +1,34 @@
+LLVMLibUnwind.v19.1.4+0.aarch64-apple-darwin.tar.gz/md5/aace388fc1ece82ea524c582506ae931
+LLVMLibUnwind.v19.1.4+0.aarch64-apple-darwin.tar.gz/sha512/c0211340a05630bcfcf9e3bab97da3e9f07e596e8d391427fa919c99502ab0a09878eda379254f379511884347f7e742872e8589f9b6ccbc2d126a5dfe0a350f
+LLVMLibUnwind.v19.1.4+0.aarch64-linux-gnu.tar.gz/md5/942d0b4ffb8bfd743cdafebf5bdfdbb3
+LLVMLibUnwind.v19.1.4+0.aarch64-linux-gnu.tar.gz/sha512/ec68df054c6694d17cb7f5c389adc4b8b855023f9ca03713d21f1f0c58de2b90166a9f3981b81da5f817f6b09f85fb11e85732d6c78f1d115d6aecf326dc20a1
+LLVMLibUnwind.v19.1.4+0.aarch64-linux-musl.tar.gz/md5/2c27d3c130f54e38e6639ebf7095f743
+LLVMLibUnwind.v19.1.4+0.aarch64-linux-musl.tar.gz/sha512/d348cc1f87927a3d36cd3f2587cf4161dbdc9f3555900ee338857d806384c0cff8fbe67bef97cad0d3098cc8c7f149aac699f3defe87db70fffcc94d681810b6
+LLVMLibUnwind.v19.1.4+0.aarch64-unknown-freebsd.tar.gz/md5/6bb1466d45159193407f27201a443ddc
+LLVMLibUnwind.v19.1.4+0.aarch64-unknown-freebsd.tar.gz/sha512/da6da450e6fba5d501be13d83bc9133796b92e1b3a6cc7cb97470cc7476a369fcd8ddbc9267f03fa4cbe1f2484359eeb70fb629b26c9a1d7ea0065c5a671e1b9
+LLVMLibUnwind.v19.1.4+0.armv6l-linux-gnueabihf.tar.gz/md5/2cdf57d34b1db677498dfc5d89501599
+LLVMLibUnwind.v19.1.4+0.armv6l-linux-gnueabihf.tar.gz/sha512/217c15e1bfdc72014dd26321eb46ae9cfadb7839c693caf3c974989ee2036781cf7e62bb7175766f5171bf32de53a95598ef463c70a0ac64ec012ca9bc19e6df
+LLVMLibUnwind.v19.1.4+0.armv6l-linux-musleabihf.tar.gz/md5/110c80b549d1f80faa36a3e0b39a11b4
+LLVMLibUnwind.v19.1.4+0.armv6l-linux-musleabihf.tar.gz/sha512/b9151aaaaae4adf5da5701ee5962d712def509f85101dae485b905f73391d8658b5a0a58ea1a4c68cc3bc68d7e17d557c05c98d33d907cdb512513ffff75765b
+LLVMLibUnwind.v19.1.4+0.armv7l-linux-gnueabihf.tar.gz/md5/bf50011ce9e4c82d49e61e868b27ea23
+LLVMLibUnwind.v19.1.4+0.armv7l-linux-gnueabihf.tar.gz/sha512/d08faae71010e4a7d25a16374249ff1740ed7883e260e544e4fb0f0d3758d2eb76fea93433cb1987850f54f1ae6528b6336fc2e1db9b46f49defd870e97f8a94
+LLVMLibUnwind.v19.1.4+0.armv7l-linux-musleabihf.tar.gz/md5/142118a84c1b959b0b202d51072168f9
+LLVMLibUnwind.v19.1.4+0.armv7l-linux-musleabihf.tar.gz/sha512/71ac937417f5f2226b8952c925fff94b553de8a29fc45fee6c0fef53a9cf8c07979c60408c8efcf827b260bc3a287059aefa24e050393f2e09b65af45b60d07f
+LLVMLibUnwind.v19.1.4+0.i686-linux-gnu.tar.gz/md5/1bcd011ba209cc840647c684dcad9631
+LLVMLibUnwind.v19.1.4+0.i686-linux-gnu.tar.gz/sha512/8309c3d82d0a94c4c7a8b72720702f5cb0c97f316492217f1eebfc0dc33b4e9c7c8af5c6ee3700ea0c1cc0fd66c90a52389c2aaaaeb67f6278e53e33a476abc1
+LLVMLibUnwind.v19.1.4+0.i686-linux-musl.tar.gz/md5/8db27a7ab4a23febfd6a8eb2f65cd611
+LLVMLibUnwind.v19.1.4+0.i686-linux-musl.tar.gz/sha512/dc7839d2c9a258b122985eb35096e0000561598c54fbd1c5f269921146e6e85589c6f60a0fb964ebfc78af703045373999163253ad2c8f09475bf6bdb923a59f
+LLVMLibUnwind.v19.1.4+0.i686-w64-mingw32.tar.gz/md5/7de74ebac40c9425f619c7f8b309de00
+LLVMLibUnwind.v19.1.4+0.i686-w64-mingw32.tar.gz/sha512/f28f4e8c25cdc06c8d363735e1914c748c150a962c37dfa8a45a3ba514d3fa1b6c551809b8d7f668b258c3165674f012ee6a18f36421e624f38ece27db755a3f
+LLVMLibUnwind.v19.1.4+0.powerpc64le-linux-gnu.tar.gz/md5/c5277c6c127ccc5fa66867ddeb6f93a2
+LLVMLibUnwind.v19.1.4+0.powerpc64le-linux-gnu.tar.gz/sha512/b3d61aee2187c185be1b1b26edaccea66da750931c1216db1f3e89393c1d2c101335d791f0124282320084e697386f395951035e5071da23ecd55133fad472fc
+LLVMLibUnwind.v19.1.4+0.x86_64-apple-darwin.tar.gz/md5/64d459ec7cb7d70b89f5ed62a1261425
+LLVMLibUnwind.v19.1.4+0.x86_64-apple-darwin.tar.gz/sha512/861130348376c8a54b2aa8c86d9d338a4b5fb88d3d2745578dcf15e0f477f518c07a505ce86c898c87142a7c5bf2e1ce43daedecc386a7f3bde67af8e6a56e64
+LLVMLibUnwind.v19.1.4+0.x86_64-linux-gnu.tar.gz/md5/2702948c4171ad35f521e15ee4ebcc8e
+LLVMLibUnwind.v19.1.4+0.x86_64-linux-gnu.tar.gz/sha512/306759ae9064a9746474c53b674eb0b9da7cef6271094009c3244542295ef7a86cb77096b4a18dc2e50628c6ab02e2f1c6e39a1401e86fe4743410ae8d782126
+LLVMLibUnwind.v19.1.4+0.x86_64-linux-musl.tar.gz/md5/a7f9ea5dfbd4760b5a33c97581ad4b95
+LLVMLibUnwind.v19.1.4+0.x86_64-linux-musl.tar.gz/sha512/08add6b1a4e90f50fbceea6d72a476fba3a2b271f44bf64f06b53f35dfecc756f71843d54d0895a2f62d56df24f3675619cf3220215acb2e0a574696c6fa630c
+LLVMLibUnwind.v19.1.4+0.x86_64-unknown-freebsd.tar.gz/md5/05f5b916fa639a68096cc73fb82007f8
+LLVMLibUnwind.v19.1.4+0.x86_64-unknown-freebsd.tar.gz/sha512/0a137168c466861fdbdbef86dec96ece0d4c10f87fdc2dd729b445deb0fd59b214241b62644da77581a0100826e07dacf81fa060e67e35ff38df0d6807cb618b
+LLVMLibUnwind.v19.1.4+0.x86_64-w64-mingw32.tar.gz/md5/bb073cb86c821a70b845bd5de0edc2d9
+LLVMLibUnwind.v19.1.4+0.x86_64-w64-mingw32.tar.gz/sha512/24d206c65c7be34485a1492250a9ca958e70be7057b981940bc24c4822e50e3963c9f88f42892ba2ea6df17fedb2783ace1693aeac74f200a5ca6033a14d6cb9
diff --git a/deps/checksums/mbedtls b/deps/checksums/mbedtls
deleted file mode 100644
index d0b43ad80ea70..0000000000000
--- a/deps/checksums/mbedtls
+++ /dev/null
@@ -1,34 +0,0 @@
-MbedTLS.v2.28.2+0.aarch64-apple-darwin.tar.gz/md5/ef83fb4706100ee678cd8af3f7a5c762
-MbedTLS.v2.28.2+0.aarch64-apple-darwin.tar.gz/sha512/03dda8cc9afa3d79c3c733e45c77891e75d939dc2bcca5ba8eb7aa3bd01fb52011ea9323df9cf7294fe6dcf87eb86c1b1c4b2f3b8af6116929b3371698559fe4
-MbedTLS.v2.28.2+0.aarch64-linux-gnu.tar.gz/md5/ac46c3840d2d0cc7c573f31c2f3d0d61
-MbedTLS.v2.28.2+0.aarch64-linux-gnu.tar.gz/sha512/bb458f1dc9b8684a38f603136ee4ba1c51b47f5047c5a5cfe2c552be266e79dfcd8243b216b0831abf24390eeb6f4524bc7e43b2642eb2ad0227399222cd0d8a
-MbedTLS.v2.28.2+0.aarch64-linux-musl.tar.gz/md5/d74732e0bbcd03666243605e60bb345a
-MbedTLS.v2.28.2+0.aarch64-linux-musl.tar.gz/sha512/90b0699477b697b94c0ab1ba0607fb3e1cd40d66a80a51cb1e0f3b927de03ba201e7e280d453db672e6265db5b07d0145846e53ddbcb4b550afcabef1716470b
-MbedTLS.v2.28.2+0.armv6l-linux-gnueabihf.tar.gz/md5/65ce7c51884b50dcb8343a945644b862
-MbedTLS.v2.28.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/e9df753e9f3a08fd645b15422be7cc0ec3aeac3f8d5f76e0c4c5ec24c54e1b653db320ed0c6799411802a05801241a5363bb449a8765fda7856413c7e3297721
-MbedTLS.v2.28.2+0.armv6l-linux-musleabihf.tar.gz/md5/7b7fc8eafc95416d75e3f1bfb2640e09
-MbedTLS.v2.28.2+0.armv6l-linux-musleabihf.tar.gz/sha512/68362114808fb4f986dea673ef1c7f104caad8233bed1c7f6a365d5d69bb7f7c92b234d6b1bfa5b014e7096411841c115a5cfe9932ae9ce642293cab962f8d38
-MbedTLS.v2.28.2+0.armv7l-linux-gnueabihf.tar.gz/md5/4a477379b15fafbf0c05435f5ab370ac
-MbedTLS.v2.28.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/fd34b475bf94b411e3155f5a5166d1ad081fef3622d7b99f4915b592d4235f63a0b910e0559ba2a0c3d596df9ccc2d7ecb61984091debb20bd4b995942857132
-MbedTLS.v2.28.2+0.armv7l-linux-musleabihf.tar.gz/md5/fc6551ef5f189010a84230dd48f6bdfe
-MbedTLS.v2.28.2+0.armv7l-linux-musleabihf.tar.gz/sha512/d3a7199f3e1ffb1c289c5f0a4384f3b5d1af6e868eb1081d66d6cbfc60e6415e68a7e22afb497f2e7c7900678a19bf1ba2a4c888efa1019c03bce376af62154c
-MbedTLS.v2.28.2+0.i686-linux-gnu.tar.gz/md5/335c3ac146bbe8cd862e4737bc362037
-MbedTLS.v2.28.2+0.i686-linux-gnu.tar.gz/sha512/f12ef67a92af27f4021f73171cdf2ef5558f734fcb185e4417fd7e16752dafe3f75be4291854b5ce346abda674252d58064d9186122eb4f9b15ff89156d221ce
-MbedTLS.v2.28.2+0.i686-linux-musl.tar.gz/md5/435b864b02d1d2c96e5d8dc32b433ae1
-MbedTLS.v2.28.2+0.i686-linux-musl.tar.gz/sha512/52e3a79a70b3ff4617c93cafdeb702105c13b34687fc0fa31eebc91aa5cacea356d5b6a6bdbbfd81417d77debe256ea8f0f2a43c8d140154099bde097740dce7
-MbedTLS.v2.28.2+0.i686-w64-mingw32.tar.gz/md5/a238801f7e0d14f4b693aa4b74645263
-MbedTLS.v2.28.2+0.i686-w64-mingw32.tar.gz/sha512/431db4c388d3c52b08795d6fee6e6696cf383506a603816d6a63dc3571dbdc2b673837a1df1d9003c5009f8f8dc6eaaef3f80aaea396dc2fdf54b7e6a3c6aad6
-MbedTLS.v2.28.2+0.powerpc64le-linux-gnu.tar.gz/md5/26c8f09aa65e5b70be528311519d4376
-MbedTLS.v2.28.2+0.powerpc64le-linux-gnu.tar.gz/sha512/2d47567388b8554ce7714f4ded013fcbffbf94726dbc6a1b7287dc17b27d1fa35baba55cf7dac17c555892a5f4c74119afdf552b42b0e8f80f26621adaa4dbca
-MbedTLS.v2.28.2+0.x86_64-apple-darwin.tar.gz/md5/dfc263208b1a8d4c29b4ec3b6f10e5ce
-MbedTLS.v2.28.2+0.x86_64-apple-darwin.tar.gz/sha512/3b2941c4b151206a56a9a795f0f30519676ea4bc0c93f66b419b15568edc91bb976954f584116accb7f9bd067580712e61b3c580a249332640e27e6346ca51ff
-MbedTLS.v2.28.2+0.x86_64-linux-gnu.tar.gz/md5/94b908036eecbe59372722b41f0b1985
-MbedTLS.v2.28.2+0.x86_64-linux-gnu.tar.gz/sha512/c37a4c34eb450bd716c076c4105bd6022892731c470d64a854ac0fca6653dcf5a70b23982050e7d82cdfd67d02902d9efe4c94d2cf5e0d29d497c3c5ac03f8e8
-MbedTLS.v2.28.2+0.x86_64-linux-musl.tar.gz/md5/217866be499144eeb2e0944b0b60cc09
-MbedTLS.v2.28.2+0.x86_64-linux-musl.tar.gz/sha512/144180e1968da627c92173277a130283aea711157a04a2655786658234232e397985f63d5407166377fc5f38a7447c19797c51b66a9c4b1773601d9e7e01d0e0
-MbedTLS.v2.28.2+0.x86_64-unknown-freebsd.tar.gz/md5/74316c624c8106faf7c04e05149b5c38
-MbedTLS.v2.28.2+0.x86_64-unknown-freebsd.tar.gz/sha512/9eca254c9b663b2f5799705c2e0aebb5529a7ff7759b0f3b67516e622dd4561169fface1d08340666453e779133498eacb8ef2dae1ef6332ceb4d8052d3614d3
-MbedTLS.v2.28.2+0.x86_64-w64-mingw32.tar.gz/md5/cdd28912607781f5e6ea6cad73c7dba2
-MbedTLS.v2.28.2+0.x86_64-w64-mingw32.tar.gz/sha512/e5793778d57b725a0cab48dd7e8f45022699b654bb8e890620efa73628140e453c80601e43647a700d6090a4b66d3c30b11634c4224c016c11c7bfde6b8a1b2a
-mbedtls-2.28.2.tar.gz/md5/421c47c18ef46095e3ad38ffc0543e11
-mbedtls-2.28.2.tar.gz/sha512/93cdb44f764b200131b8dbefb9363e5fa38760eaf01473a512f93673cc55db3515830e16b813e03b39cb819323ad78cee4cb7f3fa85861ec5e72e0f89541c7fc
diff --git a/deps/checksums/mmtk_julia b/deps/checksums/mmtk_julia
new file mode 100644
index 0000000000000..cd7b71f028044
--- /dev/null
+++ b/deps/checksums/mmtk_julia
@@ -0,0 +1,2 @@
+mmtk_julia.v0.31.1+0.x86_64-linux-gnu.tar.gz/md5/80894e5c6582e8a6a30f2712a2b91c83
+mmtk_julia.v0.31.1+0.x86_64-linux-gnu.tar.gz/sha512/8997351c716943c757c27c8f5fdb24320c681f6f393c11bc4134d6535ed09560cec9622500825bd1652c9867ac92eb0626556483d859e9271927ab89f33fbe74
diff --git a/deps/checksums/mpfr b/deps/checksums/mpfr
index 2b4281659b13a..7f0de6099713c 100644
--- a/deps/checksums/mpfr
+++ b/deps/checksums/mpfr
@@ -1,34 +1,38 @@
-MPFR.v4.2.0+0.aarch64-apple-darwin.tar.gz/md5/f9393a636497b19c846343b456b2dd7e
-MPFR.v4.2.0+0.aarch64-apple-darwin.tar.gz/sha512/a77a0387e84f572ef5558977096e70da8eb7b3674a8198cc6ae35462971f76d684145ffae7c2ddca32e2bd1c8b2ccb33e4447eb8606d5d5cd5958298472b3ea9
-MPFR.v4.2.0+0.aarch64-linux-gnu.tar.gz/md5/ade253017d195de694780c32f9161dcf
-MPFR.v4.2.0+0.aarch64-linux-gnu.tar.gz/sha512/1b68de5f8e557b7434c8c1bc016227b58683b56c0977b763422ea85a673bec446fcfee3a4f69e1d4689abb9bb6bf47f2a50fbb56ecac6a9d40096e66bd0f2080
-MPFR.v4.2.0+0.aarch64-linux-musl.tar.gz/md5/7dbd121c7192ccaf7191de5ab8d91afb
-MPFR.v4.2.0+0.aarch64-linux-musl.tar.gz/sha512/8614e3cb28491b24a0ec5060b44abaf264b61c91ddd29d70105ff583bd3112cff1b9bd5ed45e39f186265333982d5eeb8bf35fedc3b51b2a009cc7a51046b50b
-MPFR.v4.2.0+0.armv6l-linux-gnueabihf.tar.gz/md5/adb2b7fdf111c8b19df1516cfb278bb1
-MPFR.v4.2.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/0c47aeffd05a194802f6c4e0e2779d56fb46007e6c3e145ee6992854a21a317a9d51512c59a0ce4ddcd314c387945225c6557d6c2ab6961ae4848875e8983de8
-MPFR.v4.2.0+0.armv6l-linux-musleabihf.tar.gz/md5/c30358bdeffcff65ba9be906cd35889b
-MPFR.v4.2.0+0.armv6l-linux-musleabihf.tar.gz/sha512/2857ec27ae2d53a451d62dd241ce9b43f7ee182bee180ecd9ad92c907c66d0b0ab2d1ea3b20fe61cc176ae44ecbe6041305cc8a9343b396c9cb54dd77a1e2868
-MPFR.v4.2.0+0.armv7l-linux-gnueabihf.tar.gz/md5/a1e30436bade2150c9dc924177f0c321
-MPFR.v4.2.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/d2f4662c494fefda66847e7a085edda3ce396383aafb4e17fc2e176191b0f530541726c261cac3467f13136e8ec728c8a7cf0e352f3e9ebf960d153cbfe766b8
-MPFR.v4.2.0+0.armv7l-linux-musleabihf.tar.gz/md5/857e3c82804e7c853d21603f18caa715
-MPFR.v4.2.0+0.armv7l-linux-musleabihf.tar.gz/sha512/86cf3e940fd66820b5269e9aa2a49c3fc3077857bec037a08e0d301b0bf3cc5c79ac331cc6370d852e20f4acf8f601c49d5dbe24e96652e4411b3f33a11e3f45
-MPFR.v4.2.0+0.i686-linux-gnu.tar.gz/md5/5a432be79a112e67e970980f4bde13a0
-MPFR.v4.2.0+0.i686-linux-gnu.tar.gz/sha512/94198b23ac94dcb9dca95938a46b9899c3ef329bafbb13b32076cd3415b89f11908632c7c07e90549c01bd9ed7fc9a002dae07a645f85b8509234c49be729621
-MPFR.v4.2.0+0.i686-linux-musl.tar.gz/md5/4ce71dc250c2469f844a02c6ee6571a1
-MPFR.v4.2.0+0.i686-linux-musl.tar.gz/sha512/134b67b23de75ab172594cd0fac55b5c265730bfea195978698e3e6fbc47d65617652bd72d90ba092ed1bac4c29d5b2c109df5d8dc60b5d8f91159fd58575b67
-MPFR.v4.2.0+0.i686-w64-mingw32.tar.gz/md5/df41bde61d33b56fd48bdb0f9ec0c624
-MPFR.v4.2.0+0.i686-w64-mingw32.tar.gz/sha512/145bc14f22eb077992cd993a20d3205eeeee1d2bb99ff4f48277173b0b39c848e2cd3044d2141003607aa4ea3665546a87b9ffea87bf570ab1b152117ef4045c
-MPFR.v4.2.0+0.powerpc64le-linux-gnu.tar.gz/md5/d818894054b38232ba02ee0e129f6fe0
-MPFR.v4.2.0+0.powerpc64le-linux-gnu.tar.gz/sha512/0e73ca926f3e06466d1899f0b3e9ae4abe15102804dce6716ce23154344a571773c40d276f0038a0ae4e626799867ee715428e1d961334a01ad3091745367e8e
-MPFR.v4.2.0+0.x86_64-apple-darwin.tar.gz/md5/9652148df4e771be39713c4f43d3ff61
-MPFR.v4.2.0+0.x86_64-apple-darwin.tar.gz/sha512/91a0219fd1880dfa90d196fa403f4e1df0347ced58a4772492196b94476f346d80696885a4f3520424494bc09679cca0c0ccf2f6e9247d60b52ebdf564485e72
-MPFR.v4.2.0+0.x86_64-linux-gnu.tar.gz/md5/4de39327a792be708119ac7b43957628
-MPFR.v4.2.0+0.x86_64-linux-gnu.tar.gz/sha512/447b59d5589a8517061627668e8baed4366408cacc9d8e063528b9b795de6d27e4005844578310185f03f568f4948bc4a794624235875fb61b6187264b6f483b
-MPFR.v4.2.0+0.x86_64-linux-musl.tar.gz/md5/f9b8c3c094b339341b19828cc5e1d47c
-MPFR.v4.2.0+0.x86_64-linux-musl.tar.gz/sha512/c661e7c5bded3bdf11b2bd5e5ef4ad8e446934d9b82dfe26f0be1b83cea98d7e56e0903bfc1075f91c8d23401cc6b3b722f2d60f46d73cab884e81fe518aba27
-MPFR.v4.2.0+0.x86_64-unknown-freebsd.tar.gz/md5/83700aaebc7344d84d70f0bd0f9c7890
-MPFR.v4.2.0+0.x86_64-unknown-freebsd.tar.gz/sha512/039cb18a142a90fadc7951f05324fe9c033da9502a61da77fdcd5d9557075ad1ca8500b9b9b39ce57a44b9cb28d41dfc6cbde10cfdbdb40077ebada24a2bab9a
-MPFR.v4.2.0+0.x86_64-w64-mingw32.tar.gz/md5/9cdaa3fc0d13a8835d165c745937c385
-MPFR.v4.2.0+0.x86_64-w64-mingw32.tar.gz/sha512/21464bf836362ecc50da82859a4ba2de3d32d76ff57de9719ac850e73918814e1002130e0d6797fbb914b822f13bea383be3a29b2a1c9c8415cb2e3c5d321669
-mpfr-4.2.0.tar.bz2/md5/f8c66d737283fd35f9fe433fb419b05f
-mpfr-4.2.0.tar.bz2/sha512/cb2a9314b94e34a4ea49ce2619802e9420c982e55258a4bc423f802740632646a3d420e7fcf373b19618385b8b2b412abfa127e8f473053863424cac233893c0
+MPFR.v4.2.2+0.aarch64-apple-darwin.tar.gz/md5/01a13215fd646c761e469f36f693fdc8
+MPFR.v4.2.2+0.aarch64-apple-darwin.tar.gz/sha512/da473776ac8c687ab34792235ee5e1e08dc6a2e29b73620bd6dac93db32397037ae502b8ac3a35e020f722dae7da007a060e5e11e3287c4cdb846bf7e5168297
+MPFR.v4.2.2+0.aarch64-linux-gnu.tar.gz/md5/58ca9f3e08a388c3e40692e623f3884e
+MPFR.v4.2.2+0.aarch64-linux-gnu.tar.gz/sha512/c6846d982ce1211791b466ed6fed2aad9e5f9a4866c48db99eb288dcbb1480660772010869fdea66d6453c8c140c92e367cfe55f6087fe41ea040fbd77eafe34
+MPFR.v4.2.2+0.aarch64-linux-musl.tar.gz/md5/2ff7e1400f27d049e3274a6277322860
+MPFR.v4.2.2+0.aarch64-linux-musl.tar.gz/sha512/388f7050288be9d30c4a2e772c0859e414b0cf6dbc845eec0eb6aeda53595df94a4e3001d02fa04c173fcf74e00c2552a8880b62ebf5adf443da2a95497be891
+MPFR.v4.2.2+0.aarch64-unknown-freebsd.tar.gz/md5/d1e6c477ab9678d1cd1dfa7e00366e69
+MPFR.v4.2.2+0.aarch64-unknown-freebsd.tar.gz/sha512/897174756651d01272d86bb147f5dda9f84f8f1bf1fe02b8505e141df3cc38523019f85cbe538fcc6ea8073d7743fc6428a06271107b059de80cd8f959c52daa
+MPFR.v4.2.2+0.armv6l-linux-gnueabihf.tar.gz/md5/5213b0ef1b191c529e3335e05b918003
+MPFR.v4.2.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/bbcdb90f80d8cb826cd055eb41f051890c7847fc0887389b61bd24c051d35873af36672e5f1956cc3fb23b8e3ee50ee069c185fc2faabe302787d70210bd5b07
+MPFR.v4.2.2+0.armv6l-linux-musleabihf.tar.gz/md5/9a9d9207a6b52b6e84b1b2b1c631e0f2
+MPFR.v4.2.2+0.armv6l-linux-musleabihf.tar.gz/sha512/fd40d16a40b1db2b441339e5c8cb3f8a1810d2889713b0504f9bfd5451f4f4c2dd0ca35a4b2922feca9cf50e4a9b3bf8cf2c088655dd85a23c33ee67c12e0a72
+MPFR.v4.2.2+0.armv7l-linux-gnueabihf.tar.gz/md5/44532dd5607ced01a8ba0856c3bfdbc3
+MPFR.v4.2.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/469fc030f458bd52f6bdffc442ceaaf8659f0f1e40d581eb1303fd4753d2c665fcb75bc6c54d04eb53d77b1945d67f48a5ea5614f2ee82cc7fd27e89859b45f4
+MPFR.v4.2.2+0.armv7l-linux-musleabihf.tar.gz/md5/fbd13b054b8d27be6bc836283f7846bf
+MPFR.v4.2.2+0.armv7l-linux-musleabihf.tar.gz/sha512/926dc03f99a6827c833614d17c5ef4f80fb862bdf4397db9aaf8ae9b3a66e8b9121cfa044b18db46f5774abbd7e9c129363183ccb2ae3192084711e7ff9d6382
+MPFR.v4.2.2+0.i686-linux-gnu.tar.gz/md5/da6fbb90dc20830af9325cfaf3544e4c
+MPFR.v4.2.2+0.i686-linux-gnu.tar.gz/sha512/d235884e1d1bef406b1e5ceb9c34aab68c1a8040b2022964105238ef8cdfd4af7aebe474fef80849689ca88d9168697fd55e8d6ab92b6641a1f37c431d5e3ff3
+MPFR.v4.2.2+0.i686-linux-musl.tar.gz/md5/fc885092e1469a06aaaaf24168e8fafe
+MPFR.v4.2.2+0.i686-linux-musl.tar.gz/sha512/5307926e1222b302e48e2f5c08479b920279d15b95937a245e16ac1dfd5c6206cb64fe4b6ca4cb7d6be847d8cc01a04d2661a630b978dc2dbd60605d222b8b21
+MPFR.v4.2.2+0.i686-w64-mingw32.tar.gz/md5/55f129d5b5b849b3bc018e68ccf14914
+MPFR.v4.2.2+0.i686-w64-mingw32.tar.gz/sha512/9a24e4616e05f5c1fb53e7a12167f7a55d05ec1895124d6ee23b2efd548f49e4c7995c16d240ec803f352d586ae4667027ee0bdeefa520e0c1f581fcc338dc44
+MPFR.v4.2.2+0.powerpc64le-linux-gnu.tar.gz/md5/6f47e4cde45ddf0cb2ea4f31ef9c9e04
+MPFR.v4.2.2+0.powerpc64le-linux-gnu.tar.gz/sha512/4fd8fbe166e719c636e430d4d5c938231fa9126b29eacbc678d2eb50d3d4b95cf6ccef155ce401c6d33b9730c2f89c0c77ec8fb39254483c2e4004639c503c1c
+MPFR.v4.2.2+0.riscv64-linux-gnu.tar.gz/md5/c4736705ff2a55cf8206c3af84bfc417
+MPFR.v4.2.2+0.riscv64-linux-gnu.tar.gz/sha512/e1e77d64ee88de2990fbc791d7307afe859cfbdc1ac67e7bdfa633627b5542ce2e3ee0cd9fe4036abfaf60509277a43f263e2155665ac2c5e38b8627e470f399
+MPFR.v4.2.2+0.x86_64-apple-darwin.tar.gz/md5/c3e983178a1e9600f42714d4cc1ecdf6
+MPFR.v4.2.2+0.x86_64-apple-darwin.tar.gz/sha512/c5c6cebcdfc5b7b84e9e217a81d99e5af78d163949745d570af5689210b3eedeb9de3c11991b1b36d8fdbee17b550a4072af951d19c3f863cf24cda7d9c12950
+MPFR.v4.2.2+0.x86_64-linux-gnu.tar.gz/md5/61fc7c7aa676d0a07e1709b433a8e423
+MPFR.v4.2.2+0.x86_64-linux-gnu.tar.gz/sha512/74bdefa72c51c82ca709e3494cd664a6593173bbfbe0198f18f4c0add06ce4c1217e4dd49e99cb151d71c85cd696ae2147aed29ed2cf3f1ca0e5b40582abb571
+MPFR.v4.2.2+0.x86_64-linux-musl.tar.gz/md5/207ee8ad2293ba36d3d7bb845ab346e0
+MPFR.v4.2.2+0.x86_64-linux-musl.tar.gz/sha512/63325e6595861a324f3c299d8c51b1d665197217c8fc9a5ae627b624037394f050bb08a9acd14e9809f982942c066f1185dded0fa493f360bcd3baae17a05f92
+MPFR.v4.2.2+0.x86_64-unknown-freebsd.tar.gz/md5/74e5a5ce0ea84959ccec7b7f7ab22c66
+MPFR.v4.2.2+0.x86_64-unknown-freebsd.tar.gz/sha512/411dbb339218669af6181fdf1e17f926abb9830ae54a8f9ef1b7df53021e8da01a41fda13067731afaf9b803324d5f82c060ef5b5b91045625188458b99dcc75
+MPFR.v4.2.2+0.x86_64-w64-mingw32.tar.gz/md5/2de84b494ea832147be4f9bfa786cd19
+MPFR.v4.2.2+0.x86_64-w64-mingw32.tar.gz/sha512/5f86aef6ab4fd7517cb23ad9a32ae21954a3ce1f27f5cbd28abe038271e20197b7c241055092a4aa6d5391f012bdee10465c58b53acd64bb5b99fd754c75ad29
+mpfr-4.2.2.tar.bz2/md5/afe8268360bc8702fbc8297d351c8b5e
+mpfr-4.2.2.tar.bz2/sha512/0176e50808dcc07afbf5bc3e38bf9b7b21918e5f194aa0bfd860d99b00c470630aef149776c4be814a61c44269c3a5b9a4b0b1c0fcd4c9feb1459d8466452da8
diff --git a/deps/checksums/nghttp2 b/deps/checksums/nghttp2
index 6113b23d68c14..58fedfe3fa5e0 100644
--- a/deps/checksums/nghttp2
+++ b/deps/checksums/nghttp2
@@ -1,34 +1,38 @@
-nghttp2-1.52.0.tar.bz2/md5/bde5874bd8e7e8be3512a621de27b9d5
-nghttp2-1.52.0.tar.bz2/sha512/019ec7a904d1baf8755ffcea0b38acf45ea9c6829d989a530ab35807338ba78d3328b86eebb3106b8372b7a8c51b466974d423e0cd786b6d6d020f0840c160bf
-nghttp2.v1.52.0+0.aarch64-apple-darwin.tar.gz/md5/e3d9e07029e184cc55b7e0c4d2e27c7f
-nghttp2.v1.52.0+0.aarch64-apple-darwin.tar.gz/sha512/cd098db984f751b00d2cc99d7f7eba0fa830ba178dd85a9dfa679a591e62d57364dcfd74e6a55ef513a0436a8e520b1a5474d4bfa9a8bdcd70e398482b7c9985
-nghttp2.v1.52.0+0.aarch64-linux-gnu.tar.gz/md5/73fe75f3cfa2bd3e804ea39a4eb884a9
-nghttp2.v1.52.0+0.aarch64-linux-gnu.tar.gz/sha512/71f4b2a23ba148b66432797b0db954dbd98fc900045d4572f488b43779aae125f71929e5bba6bbadd30c7998a133c5e5beb70888968bf3b01bb5fe9c9ea0e451
-nghttp2.v1.52.0+0.aarch64-linux-musl.tar.gz/md5/736a24a7eee567851a965558e31489fb
-nghttp2.v1.52.0+0.aarch64-linux-musl.tar.gz/sha512/ab36182b04a590b092fae9e3a912a87467e8b01ad40a628a1d2e52910ee513ab327d5d2836df598d5aa8203f60a605d19d0b9636eb35d12a84a1c9d87124604b
-nghttp2.v1.52.0+0.armv6l-linux-gnueabihf.tar.gz/md5/56fd32e8d77d4c9d9e2355565f4db19b
-nghttp2.v1.52.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/85718e0e5cee35d91a8684ea33d8f965bb30d62dbd6b74a574a2fbc4c1027b1ef23ef68f1dec3f037fa6c5739287329567df9591a69f8f23b23fab2516a0b644
-nghttp2.v1.52.0+0.armv6l-linux-musleabihf.tar.gz/md5/283273d3bf4d53b56d12ef6af2e72f20
-nghttp2.v1.52.0+0.armv6l-linux-musleabihf.tar.gz/sha512/5c1d92cbf5f2f4e1ceb4ee13634c0bceb6ca28abaf9d87cc673f264d274bb96aa095648295e9aa76f86eb0890a426f47c0b942e72610daf722ed8e86b5f0df69
-nghttp2.v1.52.0+0.armv7l-linux-gnueabihf.tar.gz/md5/d7ae84e5365759a42d0fe0360f679b61
-nghttp2.v1.52.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/63212e3ad94d2bc54ca9ebd452d8de8e67aa53c03a3b3033d36da765303e714d8d5c24156ea4fb985acc72fe52e2977e8e8a658cdd9409bd41ecf401c08c1aee
-nghttp2.v1.52.0+0.armv7l-linux-musleabihf.tar.gz/md5/a6ad0f25f43b7f1832faeaaadf683ed4
-nghttp2.v1.52.0+0.armv7l-linux-musleabihf.tar.gz/sha512/64b9075c0d819288345d53c5ce88b360d2ca4d24c3d2e81fb53c55f86054b1a3e95d7831b363a4100965cdbf479268a5993d66ef59089a219a97b4151d8fef60
-nghttp2.v1.52.0+0.i686-linux-gnu.tar.gz/md5/9781f6eeb4d24a291d6737e59e74edc1
-nghttp2.v1.52.0+0.i686-linux-gnu.tar.gz/sha512/2b542cb67e78993ef881694dc50c980b57db3761c5f4e11c381afb1b31d1fb8ab0a8b20e1279303a602c07912f21e8ef9d732366b76ab3f356a74b444a5dc78c
-nghttp2.v1.52.0+0.i686-linux-musl.tar.gz/md5/08603b9364179ab4cbe0637b9b1b63b5
-nghttp2.v1.52.0+0.i686-linux-musl.tar.gz/sha512/0a5b79709482548c6a713843b670695b4b13d2b219b592d029719da0b4187fe884798fb44e2c511c300f02bab03f2b0b289d49d6256e3ce0b9602a66ea2382bd
-nghttp2.v1.52.0+0.i686-w64-mingw32.tar.gz/md5/1abdf0cad466ed0ca0da137809999d8e
-nghttp2.v1.52.0+0.i686-w64-mingw32.tar.gz/sha512/04680895ead989fda56b284d8963e7ca31680492c8f77f4c6bd7ca03b9a66ee7529b78cf35e07b2e106f43c9aa543dffd4081b034339803ba95021293d3df997
-nghttp2.v1.52.0+0.powerpc64le-linux-gnu.tar.gz/md5/ae411e40e24cb3f3b07fe8de211b58c6
-nghttp2.v1.52.0+0.powerpc64le-linux-gnu.tar.gz/sha512/7433502d76646e5761ea2707fa65ea5a412c513c70908a4d9ceb504f08121b1f39bcff984543370c221814785b7064f85dedc777a22df5e30a64a64e510e0978
-nghttp2.v1.52.0+0.x86_64-apple-darwin.tar.gz/md5/59f0de0affaa17898e837b5074de68fc
-nghttp2.v1.52.0+0.x86_64-apple-darwin.tar.gz/sha512/e639c813373b17d95220640ec2a568e9731cfc32df826610357ec9ff8e9d7e7abe10291140eaeb9342ae69215798bf3f999db7647c23efb4f815b54f4da9cfe4
-nghttp2.v1.52.0+0.x86_64-linux-gnu.tar.gz/md5/6bc8501392d47b349c7463e984dc5909
-nghttp2.v1.52.0+0.x86_64-linux-gnu.tar.gz/sha512/522cc2a8464ee5770c01b83a6b4ecbbcce322efffbd738f7c907643fe85342e785bbc805028d41c2b7404d6241168d1ab37a9db15018623c265b53905bcf060f
-nghttp2.v1.52.0+0.x86_64-linux-musl.tar.gz/md5/725a6adc23880b28303017597b974535
-nghttp2.v1.52.0+0.x86_64-linux-musl.tar.gz/sha512/ede5a34b7f71310e4c3cd99b9b61b2453db5dc8117675de12adb1e68c9283cdf821614f49f4d04bdd3b0f17d51a52972ec1e226d0dbdc5462b1a4a1fcc9f39e7
-nghttp2.v1.52.0+0.x86_64-unknown-freebsd.tar.gz/md5/a2b89913c1057ff67e7be6086619a65f
-nghttp2.v1.52.0+0.x86_64-unknown-freebsd.tar.gz/sha512/6b4efd2a0807f19cecf1f1e97b23ade11ed39f651e29586bb21185e17d0c50dcb63e26233ff994bfa934b383468e29f680b1ebe0cc2a2dd09768b14dead399a4
-nghttp2.v1.52.0+0.x86_64-w64-mingw32.tar.gz/md5/e1c8ec6ec2d69b2ac64b114ebf09f8b4
-nghttp2.v1.52.0+0.x86_64-w64-mingw32.tar.gz/sha512/cb43cb138f14717501e852ed388a44d41012e2bb70b6887584b37b4e0f42827d74f17ea85ba4aa0bc09d623dedeef73eee80815c1db2b6858b31251feb0b5580
+nghttp2-1.68.0.tar.bz2/md5/278014628b7c04c38a9112399e6e80ec
+nghttp2-1.68.0.tar.bz2/sha512/0a11c62085e582a045fd35a0ec3a752ec08e38bc0c5460d3cd84fb8aae518776db662b524a01c75694306d98ae054e3150b141515f8babc9dff467d139e747ca
+nghttp2.v1.68.0+1.aarch64-apple-darwin.tar.gz/md5/ba37f654c668bf36d40886d8c8918d7f
+nghttp2.v1.68.0+1.aarch64-apple-darwin.tar.gz/sha512/59d5c3834cdfb1c5fddcd5c6c3c41c9d6a1fc4d772ce0a38b261482ab7c4f856cbda336e9731e7b1731389747514f3871aa114c77472b286b9e477b4e710bd8b
+nghttp2.v1.68.0+1.aarch64-linux-gnu.tar.gz/md5/cc8e4d1a8b356f6341a7621b39a95769
+nghttp2.v1.68.0+1.aarch64-linux-gnu.tar.gz/sha512/b9ba113f8653edbd9e1d8bd21d3eaa7ca8a163bc0702cc647cd82705f768f6eb489eb8b7739e6255af87a418602dd2d7939d1a57e7cafe020eb4e497a3a4861a
+nghttp2.v1.68.0+1.aarch64-linux-musl.tar.gz/md5/463c6dbf4e277f3aefc0f55d65d35f10
+nghttp2.v1.68.0+1.aarch64-linux-musl.tar.gz/sha512/72ce78adac27103d12bbc272c27596fe493cc804ac0c403e0af689c65b323d2d9044a9c4fa955ab24efc7c4c68a7228071ecb41e5e2f2c0aa73a810e400a1256
+nghttp2.v1.68.0+1.aarch64-unknown-freebsd.tar.gz/md5/9cf8b4b5842befc289c2eb6be308d46a
+nghttp2.v1.68.0+1.aarch64-unknown-freebsd.tar.gz/sha512/60417edfa64ce850d003aa0975ddc3fb907707b464333de67bd3956310432deff71f88f8a9e233db06d0ac50e62977302854c2e91a5b77c0da451c831ce8b429
+nghttp2.v1.68.0+1.armv6l-linux-gnueabihf.tar.gz/md5/52ae15239397e8fdcaeda348f330da84
+nghttp2.v1.68.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/2320e6c3b9f6c05000184aa4c75531fef395dee04663a2370b4ff07418e137e9dc70949f9a324ee05c87d1b4d6eab466ce6ae9501b595a98b0f2f2150ebfcc33
+nghttp2.v1.68.0+1.armv6l-linux-musleabihf.tar.gz/md5/a5437e7a9bf8c21836f90eb36d623190
+nghttp2.v1.68.0+1.armv6l-linux-musleabihf.tar.gz/sha512/7c9a830b1468a8964e0b949906d2e79f70da7f3ba10ea0ab454b310ee18b7915c2800e06c66dc42368458cccc5a102f02260d48e1a7ca722008966093fcf114b
+nghttp2.v1.68.0+1.armv7l-linux-gnueabihf.tar.gz/md5/4243800e6d4dd915ab76babcc4310a4a
+nghttp2.v1.68.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/fcc4813b840ea8be19b18be2b286bbc37cacb0773ebff0ba7d838127354359a07639bb4cac45e16e1603f54c3053c0e9e9f5ec3364fb3e65b617723d2f3679e6
+nghttp2.v1.68.0+1.armv7l-linux-musleabihf.tar.gz/md5/46569561e3889b0e058f97eda7750357
+nghttp2.v1.68.0+1.armv7l-linux-musleabihf.tar.gz/sha512/856bfae4739fab136183b2c78aaa95bb3964de42196886ed673aeb44896f6dfaa08ff704ec9fff1a19690cef8c470c725d34d28e4892b2b9a875327e379f728f
+nghttp2.v1.68.0+1.i686-linux-gnu.tar.gz/md5/b4f54faba0a46e2ce695526f496f6d90
+nghttp2.v1.68.0+1.i686-linux-gnu.tar.gz/sha512/4cbfcb72814712dfa0c40c790d1aa2536adc4be5d3624090f4a25dbdd4893ef12d5f5ebe2f7dd3d14ac592d18ee34174c105ca12aabdbb31902ca071f859c970
+nghttp2.v1.68.0+1.i686-linux-musl.tar.gz/md5/e3e94afd23739a86dc04e6c23e4baea6
+nghttp2.v1.68.0+1.i686-linux-musl.tar.gz/sha512/421d0425b022c34a0468b0001cfe40bee70f5964575582f6f329c529ddfe54960fc8ab366820c80cb909d99c3245c5fd63286c2ee032cec0c0d208a7163a283f
+nghttp2.v1.68.0+1.i686-w64-mingw32.tar.gz/md5/527d1553f3c7ea3cfb8afc6854610c3f
+nghttp2.v1.68.0+1.i686-w64-mingw32.tar.gz/sha512/5b5ee6bc5c2dddb4d65b6fdcc27d592acc22bd4d1a5d665c0c887b213a0a22807f96bc750d341050a0b8d7d7f0f8c0eab54177873d3b49b9f83b8cbc6d0a4dcf
+nghttp2.v1.68.0+1.powerpc64le-linux-gnu.tar.gz/md5/cc3317466151688b04b02ed837dfb6da
+nghttp2.v1.68.0+1.powerpc64le-linux-gnu.tar.gz/sha512/3fddb8e2e870619819ab49c9e7742f816e45f96ead103765915c79d0924e37f6fa0a2df014068957502a8f94e66b21efc65f2b5fae697b2b30b312c4532c68d9
+nghttp2.v1.68.0+1.riscv64-linux-gnu.tar.gz/md5/0f3685a5512ae30acf19c60694354eed
+nghttp2.v1.68.0+1.riscv64-linux-gnu.tar.gz/sha512/5c902dc4636f09cdbf9e7cadca7e38f1f929d5b05b805da25521a51480bb58f97983b706eb0429026564316623a9cb19d327ba1280d081e392276aec06ea4b44
+nghttp2.v1.68.0+1.x86_64-apple-darwin.tar.gz/md5/e3f4562e811cd00b871ee6e24b5c6866
+nghttp2.v1.68.0+1.x86_64-apple-darwin.tar.gz/sha512/de8c3026236fc21aaa886d3c83f5ceaca66037442f597d497a92c79dd0e22d87a6e5348d73089a90c617ef8d683a7b714ff08f3a2434519a84402b79b8c2d886
+nghttp2.v1.68.0+1.x86_64-linux-gnu.tar.gz/md5/fa5ab51548c885aa78bee3a7a7c14de4
+nghttp2.v1.68.0+1.x86_64-linux-gnu.tar.gz/sha512/138dbaa160f63611f377badef9154bc4f62ca7ab38eda22830f84d70a36cdea42041f553c190be32ed8ef431861ac0224208105557e7f5972b0f892993df666b
+nghttp2.v1.68.0+1.x86_64-linux-musl.tar.gz/md5/35be6e42eb9ada6e97456e0e2bcab234
+nghttp2.v1.68.0+1.x86_64-linux-musl.tar.gz/sha512/1773e7ce16cfe875530fe7e2dc0b6babfe7e925d071b35ab88b88b2d9bfb1c956da79a537230ee32b845c02a9e8acd620f5baa53695b3a345c30da758040faf5
+nghttp2.v1.68.0+1.x86_64-unknown-freebsd.tar.gz/md5/d6ebda89287e10c48eb8db5a745c799b
+nghttp2.v1.68.0+1.x86_64-unknown-freebsd.tar.gz/sha512/6e492a26de097167a7801b8143ab8635cb212059e996bad6584260e78914c4e7c537413c99e623254831bbd6e3530a6235bb25366e9281bf335e3cd77f31acc9
+nghttp2.v1.68.0+1.x86_64-w64-mingw32.tar.gz/md5/4fa8eebdb2be1aec5c8c87429226e5d3
+nghttp2.v1.68.0+1.x86_64-w64-mingw32.tar.gz/sha512/50063b88328fdbeaefb3864e47c142478eaa96166acb876af8cd37a0ab9bf8abf6165a5aa3103563f22a9a5748a6ffd12aa307ea2683595cfd27c85d8936191a
diff --git a/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5 b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5
new file mode 100644
index 0000000000000..a834d041324c4
--- /dev/null
+++ b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5
@@ -0,0 +1 @@
+c866a3ff71f0640c47cda5d31f76c8e0
diff --git a/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512 b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512
new file mode 100644
index 0000000000000..31eafabe3a66b
--- /dev/null
+++ b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512
@@ -0,0 +1 @@
+a2db523b0068cb727db7e3a2210578f6d1de46493c5e3a9114ad961ed1553d10b646d11486fe4f987e43a9e2ea289d1923a63861f5fc56cada94bcf0b96b7dc8
diff --git a/deps/checksums/objconv b/deps/checksums/objconv
index f3dfb0de2ffab..09db43a8b7bf7 100644
--- a/deps/checksums/objconv
+++ b/deps/checksums/objconv
@@ -1,32 +1,36 @@
-Objconv.v2.49.1+0.aarch64-apple-darwin.tar.gz/md5/bdf95a776cfe782d30f48a41575e1414
-Objconv.v2.49.1+0.aarch64-apple-darwin.tar.gz/sha512/188b5e25d238a5e2f704c3ba8d2d57d6fe452f9d5c0e26b710ff225086581f906b8de6388c6240bbaa9d395cb58c0a73a67e65cbb8df6be7b98473101db467e0
-Objconv.v2.49.1+0.aarch64-linux-gnu.tar.gz/md5/2c2b88856921c38294a30671d4794dac
-Objconv.v2.49.1+0.aarch64-linux-gnu.tar.gz/sha512/2c0e6cf6da02e25386e89c51f5f2d39833b49653b20537c73f7938a4045805d07b0f520661d07332aa1372231d34a3a979ad490bf5eb91fc00fcc20da3e7a9bf
-Objconv.v2.49.1+0.aarch64-linux-musl.tar.gz/md5/11f6c06ee0d98b553781367d5404c76e
-Objconv.v2.49.1+0.aarch64-linux-musl.tar.gz/sha512/d93a742a08f873f9336f57a28af8a1eeff624d5d9dbcbceba0d58c17a2ee3791b363661af293d08997d701fc22177192e5b5154b827974163c189ad6511ea13a
-Objconv.v2.49.1+0.armv6l-linux-gnueabihf.tar.gz/md5/0151be530a0d54376590065cef28666a
-Objconv.v2.49.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/cb7cf5b00f211be4194b5b6acf11cc491b5f140d990fd8babc6590649e9864cf07a421e8a87ccdbe0b8720bc6473166837e384202bcbac6cedb2a9bd9c46711b
-Objconv.v2.49.1+0.armv6l-linux-musleabihf.tar.gz/md5/390251e8245a3d8d110a1786336663cc
-Objconv.v2.49.1+0.armv6l-linux-musleabihf.tar.gz/sha512/b7eb9e4a983e69ca970ce86bf306b7df11bfa8aefdd26cc02841c563ad0b5dddcb47f106fe7a0a420b20ae1d4890e6a8011c0db5a26e3493c80e63eeaadf86b0
-Objconv.v2.49.1+0.armv7l-linux-gnueabihf.tar.gz/md5/5f924d5bc16bac6447e9f2deb943e60f
-Objconv.v2.49.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/edaf3881754bc949ef3c60b058cc0cfff7e828d6486ca968940d216852baed5b06544dd48619cf045f3ef87df5ea00389ac3b298a1d4e9489995351e1e1ca952
-Objconv.v2.49.1+0.armv7l-linux-musleabihf.tar.gz/md5/c967a9ed1764d9692e905d879a03f45f
-Objconv.v2.49.1+0.armv7l-linux-musleabihf.tar.gz/sha512/08e9397bbd34734c3e9137f64a94a86ec11bc70eaf54811301e7bf782b581ffdcfa041936e29aa0a8ee46203591f8213d6170a7c6ea141a0ab625ac1156dcfbc
-Objconv.v2.49.1+0.i686-linux-gnu.tar.gz/md5/a59fd92a1ed62048edb1a1297d615aa7
-Objconv.v2.49.1+0.i686-linux-gnu.tar.gz/sha512/581fa0f5ea37e1802c9845bbc9df0b826fdad5900e712eed8767922e155026011570b4a4b8714430c038fb3c0d47965168a4c33b21bd28cd9080cb036fc9f033
-Objconv.v2.49.1+0.i686-linux-musl.tar.gz/md5/05524b26d550ad8fd045976f395cdf6a
-Objconv.v2.49.1+0.i686-linux-musl.tar.gz/sha512/5e6d3b27b80f96a4f1c278f2f8fe0ff1f9bdc2f1df223a7c4d1c235c18dd8eac0b8b74d37defda656142fb2882c1b590bb3e730cfed77e316012eb69b9580b53
-Objconv.v2.49.1+0.i686-w64-mingw32.tar.gz/md5/10e82481a5396b00f568eac690c47e0a
-Objconv.v2.49.1+0.i686-w64-mingw32.tar.gz/sha512/27d606acad2cf6789c9888c72887bb6a277c07f7b528fd8fe333f9738caae73e293df76ba9a0af5dceb00b8289bbc523ce235cb0eff0f0031bcf20300b4168cb
-Objconv.v2.49.1+0.powerpc64le-linux-gnu.tar.gz/md5/8755aecaacc983e2a9a948eff5c485d9
-Objconv.v2.49.1+0.powerpc64le-linux-gnu.tar.gz/sha512/8b2bf010ff7da164b59df7147cb4904ae6f2913a3095c649e20f4263f77fb92cf8513d9130a345576da2cca4caa30828cc43b9c8ae1870268e3140e0016ad557
-Objconv.v2.49.1+0.x86_64-apple-darwin.tar.gz/md5/0657a0ef9f278718c741da4d72c0952c
-Objconv.v2.49.1+0.x86_64-apple-darwin.tar.gz/sha512/ffd9247b02f72830d3b12e075124239ca472503701eef005b7457e21cd10103aaa13520206787818f11e9dcf35a156979e01cf5839dd554bab935ce757f032e0
-Objconv.v2.49.1+0.x86_64-linux-gnu.tar.gz/md5/0e029960584d00dbf8673ec4fcd9eb83
-Objconv.v2.49.1+0.x86_64-linux-gnu.tar.gz/sha512/ae747a84edccbc804239499c10d863c62bd5846b4ab87abab42c832c9fd446065024034d76ddc167d358821f90f8d2241c341232d9dd86cf31601e951e050a6e
-Objconv.v2.49.1+0.x86_64-linux-musl.tar.gz/md5/39fc1ec3602dcb6eb2f80448269588fa
-Objconv.v2.49.1+0.x86_64-linux-musl.tar.gz/sha512/e86114bf0b0da7297721c56b1cf246f52b9331083e4e73b53a30a1ff031f260a5d6bd97b455669c38af915689a363b99a30ea7ed743ebf49271b95e34bcfd85e
-Objconv.v2.49.1+0.x86_64-unknown-freebsd.tar.gz/md5/9d331f32792c394c0d11dc4e6c24ffb0
-Objconv.v2.49.1+0.x86_64-unknown-freebsd.tar.gz/sha512/0c9b7c2f58110b3c8df52d83cbadd5349fb81732acae6786617e72a3150aa9ae8da7afa1e9eb08639f4dd4e7e69f29b882f98e99a8a4404b569c545c904f5523
-Objconv.v2.49.1+0.x86_64-w64-mingw32.tar.gz/md5/c8ef7dd7742e2c9bf2d05d2b0310bb50
-Objconv.v2.49.1+0.x86_64-w64-mingw32.tar.gz/sha512/b47ac1f3a10ee4f958dcda72ac45f35b94fd436498d008642ce53b93ff517c0d4158a72cbb849336dc9d4a16e26021af13e7b6976f83610380cd78cce6a7deb1
+Objconv.v2.55.0+0.aarch64-apple-darwin.tar.gz/md5/8ba9de3951c3406881dc9b9a08f6d9f9
+Objconv.v2.55.0+0.aarch64-apple-darwin.tar.gz/sha512/ef58b22ae077c9ec01d2ce0c7ef3a15ead5457399fc058f2a8333b44602bf0df033b421113934c75ac4d9edbb48ebcdaf5e08608829199fc5590d2807dcc2e2e
+Objconv.v2.55.0+0.aarch64-linux-gnu.tar.gz/md5/11dbe5a12f73f5d8fe04b563cd3fc457
+Objconv.v2.55.0+0.aarch64-linux-gnu.tar.gz/sha512/371d8542892070aacd06dc44915edb053f8df71a992d11cedaa6fb496a10d4c2f50e36ce45892af2409b422f5218c16dd95cdaf25a3f6642d5c817bd660e8017
+Objconv.v2.55.0+0.aarch64-linux-musl.tar.gz/md5/2a8c213058a8288099630fc201fb31f9
+Objconv.v2.55.0+0.aarch64-linux-musl.tar.gz/sha512/90c83ad869ff4cb08248adf469019896fc99b17bf1d010db9aca965b942a853fbf2e7fb6d788bdf7096a6c5be462fc48a206e003097abf3374a6e5cf40217401
+Objconv.v2.55.0+0.aarch64-unknown-freebsd.tar.gz/md5/ae34a491f753ab3065c55010c37eef54
+Objconv.v2.55.0+0.aarch64-unknown-freebsd.tar.gz/sha512/dd6da052ffa879beb92459caa471af2d67a86c16940f3134e40ef51720fccd1a84a3db5f52d6f84c306c5ea83b223db380c898c298d917237920415350d598dc
+Objconv.v2.55.0+0.armv6l-linux-gnueabihf.tar.gz/md5/cad9ce64eee92b0656663760c0a79969
+Objconv.v2.55.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/4e5788efc851519af3230553a4765193a0696c16deaa4ce531ed9ca9e5e71aba86aae0a993c82b2fa220bebcddcb1f691a82e0f2e2db3608358afa122edddf8c
+Objconv.v2.55.0+0.armv6l-linux-musleabihf.tar.gz/md5/a963aafa8c1cc3e9b29c147ed07694ba
+Objconv.v2.55.0+0.armv6l-linux-musleabihf.tar.gz/sha512/96ff5c1abdc6d939cdd94eb47c33c23656b655796c34de35881f531a7b18a454ece39c90ddb21e2a8e5611a922d6804b18043e6421e7e07d16a9bee10ed2b7e2
+Objconv.v2.55.0+0.armv7l-linux-gnueabihf.tar.gz/md5/25ce7fbd3926eff25dff2971b74eba67
+Objconv.v2.55.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/7656afd0f4e5b96b26f596e0d9d23701a017c22cfd0aae9153f4aae3355c2f3bda3f6530c2daf60eedd877d80c941790ee4aad0f2fa1283f62b463efb1c2d222
+Objconv.v2.55.0+0.armv7l-linux-musleabihf.tar.gz/md5/0983c6acb87ced5f4a23e3aad0adb5ae
+Objconv.v2.55.0+0.armv7l-linux-musleabihf.tar.gz/sha512/98ddf8eac980f42260d5fa5f5739e3bc3aee040ad3b6af3fa6b148b657509940c2bdbc7760118f1e7e0a86ae6ca45372eae8600808c29a52a367ad9a8bbd8062
+Objconv.v2.55.0+0.i686-linux-gnu.tar.gz/md5/3252f86019ee64092fb061c1e8f42cc8
+Objconv.v2.55.0+0.i686-linux-gnu.tar.gz/sha512/07e36ad4c54c16dd0d3eacacdc79beba69ab602e0b5a4654fe218ef3eab340af5beb21cf7cc6f3bcf9af38a77eab5893ab3c38d52388154fe66d6dd149e4e2ce
+Objconv.v2.55.0+0.i686-linux-musl.tar.gz/md5/75f9a0ab44e9c5202b2a6e3433b5f345
+Objconv.v2.55.0+0.i686-linux-musl.tar.gz/sha512/626a5daa02203306fabd2348833fba4476dab9166091f86393d32eb01bdaf33f73fa6e377ff8e12ddd67c3aed1597def01e2c974c8aa5c498f36054eb07594ec
+Objconv.v2.55.0+0.i686-w64-mingw32.tar.gz/md5/c0ed10fe824e5d49436bde2843e02594
+Objconv.v2.55.0+0.i686-w64-mingw32.tar.gz/sha512/89bbc593bb977d10757acb4fd273e0d92d0c8d6cd4df88aa24b41434b80ef71de9df2edf6f21968777ef275a58e6182e674b273cdd5d94a24e04926c35a30633
+Objconv.v2.55.0+0.powerpc64le-linux-gnu.tar.gz/md5/9ea42b486d436bd9bf9f1244c4e4d9a8
+Objconv.v2.55.0+0.powerpc64le-linux-gnu.tar.gz/sha512/c14490521259ff3164521e861c6c6c9766785f201b1fd9e71c851d99b859405e5397ecb56a0dc7a8acfef779e56ca933be7319ca7fb2713f213669149f259847
+Objconv.v2.55.0+0.riscv64-linux-gnu.tar.gz/md5/a43272b0a5470143148c948260ad541f
+Objconv.v2.55.0+0.riscv64-linux-gnu.tar.gz/sha512/9b0c24aa917f7e6750c64bbe01a0c0857fb72e389b05d4b8d38aeddc41bd9546d3731f79b698ac0ee3cac2e6362ded4efc3fe0f99a842e303d91146e3e45b65e
+Objconv.v2.55.0+0.x86_64-apple-darwin.tar.gz/md5/4c656ec2e49be66271a5dd05181b2caf
+Objconv.v2.55.0+0.x86_64-apple-darwin.tar.gz/sha512/f20d4cbb3e6ddd4ad3d4e8c8f6be79ac6c28e1981552a2bfcb4683a78bfa242e28db2d6e0e9d1913ec71b3ed1cbb1ca14ac4321328d7a7fc878734624b78d6db
+Objconv.v2.55.0+0.x86_64-linux-gnu.tar.gz/md5/e1e427f0dcefe285eaeaabe8a947ea95
+Objconv.v2.55.0+0.x86_64-linux-gnu.tar.gz/sha512/96405d3a90918a978c772b0ab0452679dd91bd51ba9f2a06e8b5fdb2ab6608ba18cd74acd6167709af45efc5147f42a958fb1dac3d5b75cd270b13adf40e2f4c
+Objconv.v2.55.0+0.x86_64-linux-musl.tar.gz/md5/69c46df368e12f0ea046eb3fd5da743c
+Objconv.v2.55.0+0.x86_64-linux-musl.tar.gz/sha512/74bd1db59fcd016d1bb06e8445d54c3589fbb979ddb1eba2c99d8b366e27aed8f1d830c896acc89eb4458afd7870da5475a95e8679059186fe9481210d77c182
+Objconv.v2.55.0+0.x86_64-unknown-freebsd.tar.gz/md5/1db670d15bd5aae1810414a9b010fcd3
+Objconv.v2.55.0+0.x86_64-unknown-freebsd.tar.gz/sha512/dc1edaadb2423cf48403036c19fa61fc4f2cc9a3a6b2efe4dc37b0a1073870b10e6e78e5eca862167005b6101cc1e23806addbbe9be036a6efcef376abcae889
+Objconv.v2.55.0+0.x86_64-w64-mingw32.tar.gz/md5/d5a050f359a21e921a8fbf1285c31793
+Objconv.v2.55.0+0.x86_64-w64-mingw32.tar.gz/sha512/ec78583b8522bf56a5048e7ac95f5460f0b67d1d9cec5c3cea297fe9e036e5248ee27f1ca916a8dc26252fe2f1dc9e3200af397112e351ca8909a8583e1fee17
diff --git a/deps/checksums/openblas b/deps/checksums/openblas
index 5cd8d27baf25e..f1bcf3f322d8c 100644
--- a/deps/checksums/openblas
+++ b/deps/checksums/openblas
@@ -1,94 +1,96 @@
-OpenBLAS.v0.3.23+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/f4ab1aa718db6ab731179199b48506ad
-OpenBLAS.v0.3.23+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/5cd6326eab751d087b6638acc256a7c5dfc3a8a4be8949f4e2b5b8079aedc05cd8569774da19912fcbcd2dc1eac6a09d72d19bdbeded1198317992a85ccd605b
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/57b8903e05998d293d28e70ee6cbc4d8
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/03325728191f88dcfc2bea16d818c0325b4f42019ed9c2e0533233e8e2a4da09a2c70503632fef2ab55ed12b7da39fdab470b801d34a9b6f576bda509f8a8a8d
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/fe529647382de5693557363f658c71b6
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/77ac56f683a481477fa898d208e67c0c04c1ab8ca9dacb1e4e4ea3795fadb2604faffd1f3fd35d53eecb223c7f92de40cc8b2bdeb9c8a6a1b6a9949965cb9380
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/5aea8a00a946273a154110ca7b468214
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/a606933bed17e563d15ac206a4a37d38d75e9bb0bef46ef62485dcd32aa5a0e8501dab01f6887a1e60736c59177c6fbf0ec541fa521a9a8de854f44703f337c3
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/d81dc2a42a8c0d87f4ee9bad98579f2a
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/f2bda57546f1b9aa1f8dfe9a07b2243cadc002a9ffefbcfdde344ccc96efb07608a55bf8dbb6de34925af03f01ac5487f9fe293befa84edd9a84c01a9b7409e1
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/400ba512f73a60420aa0d316bc24db48
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/927c711c3950f24e6b4c22c6dd92cd2b212e3df9241c637ff42f5b9135e7bee8f3864868aea594c6e8ba5b40f0563d63a5f8634ea3c3276bec35d480601e76e5
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/6a91ea53f3aff17b602b324d025309c5
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/8ee85883fcc605c16031bafdd0f1a4f4d4a5957a4f85c2022466232f902a4cf64c284537dd2f237221f7d0c154e2b46200501891d3990e94dcf49a74a66c36de
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/c653ff340dc25b19ca36309060dd6b1a
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/cc77c84538bb0301eaa98ca1a32f024da6242e40e847e71f4a36ab69233590422aea41a32ee67031d8055c929f741617053416e5b9d446affa36e7233e5af48b
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/18a914a1df2be07ff6b419617cb6347f
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/eafab27655b0c179ad8b9b1dc818e8394d365f19cf75a0d77402951a38e204aa2fbe580037116a28e8e1254b66d15a543ccd0f438f3ae388e8bcad39f5953c64
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/4b8d18500b4bdc6f1081da6f0837340f
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/6512bd03d58b7669dba7f9830d3f8654b2747ee66c7bfc05acdbca6c3d2c3750c9d1163768a3f91d56c5a87cb30705ad6f10395652fee4c9cd06cd2920db3027
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/27fd022a3b84c3a92da9d6062d8dafaf
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/c0e73f2012df2453cc6231a9e7a644609ba1280c9aea63d2cbbf9594539fb26c8f9ab6976de8ec9870cab483b1fe7e3a1fc81246fa99bbd7526051e74a4733e1
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/e2b0503bf1144f4b6a65ae9f09b25828
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/204678995b9f337e4ddae793762c3a00968faa3da3433ea17578944fd56f33c381150521b6a561d6ff2022693f8d46b9d0f32f330e500036b4bfc08a7dbd8a62
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/3e733c1c668a3efaccfde643092595e5
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/4a37e5de66920f20a648118f62555755b51e6e089e7ee43d2b7b8ec0dc47e68c7705b878158ad83d152cfebf77118f789d1bf7b2ee0702334d4317f0c6a926a1
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/c653ff340dc25b19ca36309060dd6b1a
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/cc77c84538bb0301eaa98ca1a32f024da6242e40e847e71f4a36ab69233590422aea41a32ee67031d8055c929f741617053416e5b9d446affa36e7233e5af48b
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/18a914a1df2be07ff6b419617cb6347f
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/eafab27655b0c179ad8b9b1dc818e8394d365f19cf75a0d77402951a38e204aa2fbe580037116a28e8e1254b66d15a543ccd0f438f3ae388e8bcad39f5953c64
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/4b8d18500b4bdc6f1081da6f0837340f
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/6512bd03d58b7669dba7f9830d3f8654b2747ee66c7bfc05acdbca6c3d2c3750c9d1163768a3f91d56c5a87cb30705ad6f10395652fee4c9cd06cd2920db3027
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/27fd022a3b84c3a92da9d6062d8dafaf
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/c0e73f2012df2453cc6231a9e7a644609ba1280c9aea63d2cbbf9594539fb26c8f9ab6976de8ec9870cab483b1fe7e3a1fc81246fa99bbd7526051e74a4733e1
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/e2b0503bf1144f4b6a65ae9f09b25828
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/204678995b9f337e4ddae793762c3a00968faa3da3433ea17578944fd56f33c381150521b6a561d6ff2022693f8d46b9d0f32f330e500036b4bfc08a7dbd8a62
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/3e733c1c668a3efaccfde643092595e5
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/4a37e5de66920f20a648118f62555755b51e6e089e7ee43d2b7b8ec0dc47e68c7705b878158ad83d152cfebf77118f789d1bf7b2ee0702334d4317f0c6a926a1
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran3.tar.gz/md5/639643a12f8018e4be7bb1f9f29e57f6
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/0993e1967964874a3f90610745d82369ee70fa4313445391fdcb26c4218c6badb18577c67648d2f77f359b163dafde31a3723998e0b006622effeace506b669f
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran4.tar.gz/md5/13ec86d62840258c425b0a5a6824a609
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/0bc74dac87b8ab5ea244fa5bcd05baf2968b7041c4eb392ff808d0aae897cec4b3082ef7fecda28aea2662b6cd956a5254212740b1802a947dd3f1e5a3dfe2d2
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran5.tar.gz/md5/413d4eae7b9c409204ab5fb7867dc30f
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/4a484d2aa239d8c1e2733cd9d16bd17549f5048d9958899a4e20039a7efcfd280bba901f3fe63b3b079fd7fae88911f7201a7649a472d47d0148ba8520f350cb
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran3.tar.gz/md5/7f342d27a9b193b5d37e2ae4de6e4640
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran3.tar.gz/sha512/2927b18e176e07fe8a05d2eba24f6160680131832094bde9634f0890c1bc3b877c3293163fc65067cea402f3e75871c41b47e4a9999f273e667ac400878aa2b2
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran4.tar.gz/md5/523c007c319adbdde6e8cd7d3d89a9a1
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran4.tar.gz/sha512/ddb7a8d67c9430976ad967e21a6b8717c8a5501e8808fabf6e7b2e7298a0ca56049dcfc12214a5a19dbf7bd52d625b0b2b1bcc6b4c1d921c3ee62fd2766da891
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran5.tar.gz/md5/7dd91db180e59da5f866f73eaccc4d1d
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran5.tar.gz/sha512/ff0ee65e536eae5ece7fbc00a0735349d560a142e025084d64f28891bdd3da5914e976640be354d8ad34fd3d89bfb90461eb95f2426d5e292906ed4ead1cfafc
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/fef43c3fed5ed7e9fdd9c7757be6b95e
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/b580c1da073ed94d1a259183c5b2a6896a746c5e88c83e2df57fea801f259cb49f99b3468bbc5c1d7dc6bb84f597843bc3c383c9cab7608dbfbbb15352fb1012
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/88db137baca7ce99e58ff3b13ee73644
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/1608f3ee3964df833db9a1277fb9f69e3bb1d328a27482ac419e08520a51b2cb25501cf8986b2ff617bc04881984ce73ecd2b55b0c99afb5cb28f32d24d89052
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/32c1ca252dcae7d02bcd54d2b00a4409
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/401126557d3072d965327aa1440eeaf22fdfb1e5265c28dca779d81b94ababd1d487603d55e384f2bac305125c9ed3826f0bb7be99af20b0d18a674a8069ce5b
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/3059083c8293106486a0f28a3564e499
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/019bb4bc71d7be14f040b36d1b44f653ee89aac680749a6a3b8b72446dffae185dd3d8172ca7ac9aac45cfe564c0fc6cf3221a6f8496b9ba10d04ab44d897b65
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/648167f83536f32921f1208d09cc8f47
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/084346b93a99671967433f4ac6548d7b828aa65c402bac7e68aee78bbf75e5cb06b22f42a7d4876fdea3e838162278ee3fcf011fa18530c8d8b0e853a4c6440c
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/9796916fb0acbea2e93747dafa96d496
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/7c3643c3669fea262907bb5c0f27b492adfec910716498a0bd992d705a544b21023d77801f27c967c07be9d5b30bbd936137c8f59f61632fb16cc0e1f2efebd1
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/cbf9ad429547ebd1a473f735b6c65442
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5e98ec17ee35624bf0a286a2dbe01f5ae4fa879274af70b218080c537a325a92fe76331b746e98b3ce3a0d127df2c03f522f554cb43c169a2b7b1890a9a8a81f
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/28792164b6c34bc627966e338221ff34
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/433dcec661ff2459740c4d1e72d766549135f6f41a7ffb488502d76751fcb00c3d75aaa0e3db182441ef6b5e3b487a9df3e1b8b979da3681496f4ac6c6ce819b
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/7013b806bfcd2c65582df5f224bd7d86
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/1078cf5583d158af5d38690acf913db378195b79b4743d977e7654c246fecb0ded4ebee96d89f54c5ec5f04af1b9858bcc0700251ccce1bf7c87926ede069b91
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/f959117d5c3fd001412c790bd478f7f6
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/e6fbe9fe1b7a92e10760d2b945bcc2c1c5e8399d729fbbb771764e7b72856707629123bc2d2fed2549f551776f8f0a737b0f414ffddc820a655172d933c10af9
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/af04d6bd91df5c9bcc63fe06c88a4b79
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/0cd4972d0a44505f9d8d3958bd20e491c986f55f5f84000ab534020dc8d39d788402355fa51bbd521c8c1bf6884d9d35c1db156bd106a98fbde80c104e8dd5a1
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/c5e6138630c5b616df1d045e1c388710
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/a54db7cb7e28dd792bd2c4f33945e7d99db1ee9a620bbe77a21cd7fa7f4cddc5c7744d27116951582f00223df09e7dc2258754032cebd57f61a723762743d3fb
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/7d407633f4f59c305896f9132c098cd2
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/8a04d46b6dc2eef87d6c4ac43bcdacf5da2b1669bb829c42f07f7f73bc0dba35a6e48f303d1e9cb951062fa2c3a4cce894406c5551c2bac7f57f02d2f92122a3
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/1d6c7e0b6f3eeedb41ecfea9881d0bac
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/9152b7f584ecc3f06caf0eaf0a496d9e9c16afe41a4750a9bcce0477cd3cabcdcec5c97c24fa3fba03d603148c8a3dcf7199c171abe10121aaee2f8a68b93c91
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/fdd5c9e5f746403f7ba4789d8d8c47e1
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/2bd980e1e2021b32f3455fb3fdbae407fb672074ca798664c77e063ea6a7503b625eac7655c8cf25307afbfd9abaa64af52fbb3ed811ff8eb6515e3edcf26b1d
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/8c69d9b7b6fbd0896f839c8979c35a81
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/d8859f485fa35b33be167dd45f1fe87696be0b12f27dd041087cfbb9df0da94bb726fb9c5f89162405de473969013e3a6a11b0520236db7f5603b25466ebf0d9
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/40724e1d694288f930a15860650f37bd
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/b7bd75b57803da93d19016f5fe63bd88357aa4e728fdde026a55ab2382957f5a82254b12e701ffb19085a6d1ecc0c0b0c685efb6fa9654e7537f146087cce00a
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/d78352f4e9baf1225aa135b03da9315b
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/aa4d3b0972429af7376e80eab93375ea0368f2f3a31cdbacdb782ff32f7b1c708c5e2d7f1c30ba5b8a7c604a3a7c27a7601fc7f09c8dad2b6dbc54ff099fc0e2
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/dbf8b0592102b01de80df0767f681227
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/9bdf9ab9c3ff36281fa501771c4ed932e8a481ffc4cef08725b4877999bd320c99f9c756beba7143050705323bdc0bea150ab3a11e47f3f7c60f206595c37b73
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/feba9f9647e82992ba310650e3b8ff71
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/b6c98a5a57764eef4940d81461f9706f905d376d165abdbd0fafbdd5802e34523ad15e6ee75a4550555b7c969630c43438d6cce3d6e37ac95e57b58bcc9d542c
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/732544eb61201b6dd7c27d5be376d50d
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/7b68cceb0bdb892ae74e2744f2a9139602a03e01d937188ca9c875d606d79f555594a5ff022b64d955613b6eb0026a26003011dc17382f019882d9c4c612e8e2
-openblas-394a9fbafe9010b76a2615c562204277a956eb52.tar.gz/md5/7ccaaaafc8176b87dc59d4e527ca4d9f
-openblas-394a9fbafe9010b76a2615c562204277a956eb52.tar.gz/sha512/12235f0459469b483a393844c228be5ad4bc60575bbe4b3238198f2480b7b457e4b0609730ce6d99530bb82e1d16fdd2338ceed6d28c952e6fff0da7f571f863
+OpenBLAS.v0.3.29+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/227fc95ef10e30698aade797ebd8b685
+OpenBLAS.v0.3.29+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/49a932f0c1c2d1087d20a3de2940733ed6a944284e1cf2a384a7401c5ca6bd90a35e9679b4f19bac176923aa170427e7514a47fc16261413ee03a59bbb301bd0
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/77acdfde5dc6f05629f3fb68a95b78f8
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/f28187213eac2d481bc12263fe13fcb35f4771084bacaa42b0b149ac15cf89d033910519ecc5cada77915a48c95a2de3ea4a476c0c6bc3f154e7f2ceb4bf3ffd
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/4fb2bd80d3e4ad8ce04fa33c9a2aaa19
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8cc2aee3b351cc5c78e494efededdf98f65ce8942453bb3a55c90e0822ddcc07bc7716d0746bbc16701eca458b7a7aa933e9363f71bd56788c9fab36bd9bcf6d
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/3a3ef97dc80dec3d0debade503ca2232
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/53d707f9bf57c1a19279f0146e767d779280f922ef621b5f372cedc018efb2798adabbd762324819f342d0fd98ec17c68badc50da7b6e9aa3e57c3a3c045dab2
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/463cb6b46091f4b4b4f2535b9f38f11d
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5a5a7d5a7ca5e619d5af9bcbab7cfffcb4b7954005cb4a2d03f4cd0ef29c95707e830ad0b0303d694cace557cb1e9973c0244ae1f635249a313fb9f9cdfaacd9
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/699ca0247ec7cccec0d9d2801b5a35a7
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/3bb2926d2d2a43c280bb947063dd74b65194118edbd99df820bef56a546648ed903245e0947ebc31765ff43784b11349bf86cd592c78d143c0627d692162b344
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/2ab069e5abd5014495b849bfbaabbd3a
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/fd10e4ce326c524d97e69e50342ab63b8298c796faab8f4512772fbb9c4ae1ddc85d54643c868f3b2dc8084af974430e1f8751576bedfdc88af2ba0d2affba1a
+OpenBLAS.v0.3.29+0.aarch64-unknown-freebsd-libgfortran4.tar.gz/md5/ce5d04e041e9447529ad8e043e45895c
+OpenBLAS.v0.3.29+0.aarch64-unknown-freebsd-libgfortran4.tar.gz/sha512/eaf521d3957713e9d22b2c0b991f5eb846096891dc15bc42ad0817c32e6a1343617d28afe739dce0e39c185d022d3cdd44db2610635691990003b1b0a29f4657
+OpenBLAS.v0.3.29+0.aarch64-unknown-freebsd-libgfortran5.tar.gz/md5/00b3a4433f93a56fa8b0f17acc254865
+OpenBLAS.v0.3.29+0.aarch64-unknown-freebsd-libgfortran5.tar.gz/sha512/a9845380778ec15642d74a46dfa65f8a325929f8ec8d61915941f6e228bb1ed29310f86f20ec559fdc2d5dac98a780f71a1b3116676a34e18ee7c0cb86cb7124
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/1be6fa7ef684733faab744fdec6c8dbd
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/7c7803a0484b8c8e343ff5049e52fe81b76e43f0aaca7a5ad0134079147d2311cb5b159738486dcdd7ec69eb42cb0eea738741401179499a53fead2fbd8dba3b
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/451dad687dd26a299e4a44db37a8db2a
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ea73ee91896203566dd7510303c73d77189afec913ac1de3b7c7935dc2c460f87c83a8ddd272d9542b619e419b9392479f02540ef1c8d3daa528bf05aaf5c3f1
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/07ca32f715981570f2e1a5ac6721e569
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/c6ece0dac375fd66a303ca6f503e46f78472a59dc13381e8462e3e9c29e133cbe87ee77f6144a80924ae286162620c4395f5217e4f9ba379a471409085950427
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/49ac07fcdf0d7ce221051d089b408e05
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/6c9c379473f1bb5f202ca183c6ef4d43b442c867e67712e6ec2936790c282143c1edae0a1385e366f729c952e02fca13604f6b51d778dabb28ca7be0f359281e
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/87e3dea9e115fbc9a0c7f64020c41f74
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/63a37a9cc882562978460e1e0f603177921a64ece7d4050b0b7a584e05d80f58314e7f8e988ea5446945d7009620c4f746ce547fe7dcb77a0707d54fd830983e
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/8c85e7ce9bd702438c548bdae54f5c32
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/3dbaa326944d79688fa167c968a7e2660bf3b94c2e052755cc8b1ede853c02364edb7fa974880c37c60ee6e6f84c75848eb4d999c5c1e8881441191dbab056e2
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/1be6fa7ef684733faab744fdec6c8dbd
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/7c7803a0484b8c8e343ff5049e52fe81b76e43f0aaca7a5ad0134079147d2311cb5b159738486dcdd7ec69eb42cb0eea738741401179499a53fead2fbd8dba3b
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/451dad687dd26a299e4a44db37a8db2a
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ea73ee91896203566dd7510303c73d77189afec913ac1de3b7c7935dc2c460f87c83a8ddd272d9542b619e419b9392479f02540ef1c8d3daa528bf05aaf5c3f1
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/07ca32f715981570f2e1a5ac6721e569
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/c6ece0dac375fd66a303ca6f503e46f78472a59dc13381e8462e3e9c29e133cbe87ee77f6144a80924ae286162620c4395f5217e4f9ba379a471409085950427
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/49ac07fcdf0d7ce221051d089b408e05
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/6c9c379473f1bb5f202ca183c6ef4d43b442c867e67712e6ec2936790c282143c1edae0a1385e366f729c952e02fca13604f6b51d778dabb28ca7be0f359281e
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/87e3dea9e115fbc9a0c7f64020c41f74
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/63a37a9cc882562978460e1e0f603177921a64ece7d4050b0b7a584e05d80f58314e7f8e988ea5446945d7009620c4f746ce547fe7dcb77a0707d54fd830983e
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/8c85e7ce9bd702438c548bdae54f5c32
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/3dbaa326944d79688fa167c968a7e2660bf3b94c2e052755cc8b1ede853c02364edb7fa974880c37c60ee6e6f84c75848eb4d999c5c1e8881441191dbab056e2
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran3.tar.gz/md5/86834236dee3db3affb38b8cdcf59681
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/6731b4ea22a0f3d25f9d041e2baa6d66f1027dce49931a334a33711fc4c6de5da368274c9328618ed78158855c5d38524b917447d1aafb5c551934cf982505d2
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran4.tar.gz/md5/c63c2fb1bda01456d99590e9aec3b45f
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/437c260499f4a28db9efb4bbdff31c0f675f3ccef1bd48fd2dfbb8c8897fc75608bd7247293bd3eae129b133cb05c3c8150dd19c243faa09b6506688f57c633a
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran5.tar.gz/md5/376567d56bf4314f8a4adcfc4d1baa66
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/c4952874b19af4fd0d5541999d07094f7e7e983124964405a4756b9adf619172b7128e11557e64a80bc4eadaf76c783609a75f25ccfc44fc4f181886a0c8ca18
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran3.tar.gz/md5/8f7abbc6d5cefdbefb2b9499ec8874c9
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran3.tar.gz/sha512/b8c39674df9400efecbe4ac740f0c3ef11a04dd852f31774d63db3ca6583a21c8e0a0b80aa4e7b82be7a8fa3de38892d4fbca34244acef7fb49e8ffc0e1eed09
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran4.tar.gz/md5/6b0f0544fe45de9d2dea946c7f55cc40
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran4.tar.gz/sha512/8c21df39a8ee99614ef0880706c1497d032f68dfc332cc5ee111f69bfc818db4896115a964f16115ac49b01b31713037c905792d9586dd05471efdb21dd0be88
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran5.tar.gz/md5/aa343048c35c5227a4bcc37f25ddfacb
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran5.tar.gz/sha512/af6c9d15d9d5a4901d228522d2e20da5276f1bf35d7f34648697ba7a39153a9152dc17f5f0d360593e733ef3e3317df29581cb86fdd9fe8d6e6093592a6240bb
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/8595dda5ee1f15b2070d8ac20077f389
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/df7d7ad68b47f8865d01f6edd6ba44587c6563ebc4a1900f92210b5117fc7c581e6145f95e10fe7a3db48eda9805330073c8cbeec7eb8a19978ec33f2528cef8
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/4e67905ab599f24327e9726f70d261cf
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/13ba78e98d7c2cda62a6ca9226365e90fa8a5404e4006ae5e49030b314b762a37d78977f14c72448c844e68a6b83ecd679c60362fde023c9052b9b8597d7775c
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/e78c5005d9ee57ab464fca86c6d6fff1
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/8ceb9527461136cd4f4d02f10c241f5e7070991f73c974389acedb1d9d7be4bade592bc021ba1001c5ac148ea580cf8355fb89c88438820bfa665bf3e72392fa
+OpenBLAS.v0.3.29+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/421d93da4cfab0df79569e09dff1015b
+OpenBLAS.v0.3.29+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/351174d948626ad36daf40c45672cd1ac40bbe4af25c28332fbea62a7ba89188a7d33836d327d31ce99b9a9334c6053366d33b58f588355c2818e332e46b34d0
+OpenBLAS.v0.3.29+0.riscv64-linux-gnu-libgfortran5.tar.gz/md5/34cc0b3260d9471bc8fb32005e3c5043
+OpenBLAS.v0.3.29+0.riscv64-linux-gnu-libgfortran5.tar.gz/sha512/5eec279c5eead55d099d8db4a75dd4a3f2bcbc8bb22b33884a89d678e4eebf87c6dece1aa4c24374d0162b35f376648a473c2d6d7866583e61016e37f4262820
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/f921a0ad6ebf91f444cb8d927886e573
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5cc98edf9fa8ba8981ce78b2595fd27645c783561ff19d0fd25ecc927f63492437a4b9b80d5caf51ad619b7ca5d24cb43e153156921f9f03c64741014b686196
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0126b52c134954e63ab8f9197afebd7a
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/6d1e37009e6831a26f25bfd3e95dbcc841ee50a3f84dc4355d7fd528cd74a400138955558306345e986a732d0d1ef9294c4f5be457d05119a8e1e5851cc8ca20
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/e8c7bd786672a9caf989dbe4fcef896a
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/2e708fddfa8e5821d2e44bbc00a86df83b09cdfc0054d7c2bbb2a8de52ed80c95973e6602048335a60b54be1baeb617121b605644daf50579b2044d0c5766063
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/b1efd957a2a63f814168bd318381812e
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/097a750b7f381089588498e52a2b07a67202bfd4bc2e38f5abbbeb372129e392fcd53beade2fa7cb60ef0038f2baf61d57fab40b5585806d3ddb1fcdad73bbe3
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/c3560828f503962c6ae94135c4f00ac5
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/1ad514172e51a5d3eb6fea03182e3eb9c6db99d9d11c430e3d8542a9ce0f5d6967e623b9c0951535b683210ce0b02460358c67520b06363594f6063f8f012396
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/07a9c3050824bbc6a96efdb333fff0ea
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/b737ab1fc8c5ffd1494804c59f8fd3e5d3d8a063a89fbbc29cbd75d43af233ddf77f63d0e514059164517f408ea340ffe95c020a7c696af8c52be3a7259922ab
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/9a4a828a1b58737c79eb170c94021c52
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/785443a38cda87a63ee4268cdaa51bbc2c4662de27e0695cd7e21ffe55c3bddb1fa1a399edec39c3466f2ea0bd5ce727daca2eb381213059419c2e8371b5a733
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/cd4afdd6f6ba06c7541e7124316802b3
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/55796fdb52e1ac92750dfc2233d3feb37b53920b12024be605bf6c7322153c4dbeb650f16d6def4f0fac685733a04a1c4cacb1fc4e562a27a00b4f44484a4715
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/8cd55ac7a7f0a7bda80b44171793718e
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/728991a4c39d691abebac3ebbb2dbe093f3a4acd2d3aefb5c7c08bccf0dc1fd5aaa24de6367961d278d448b76a4ddacab36b7be15128f7ccec5049eab83828da
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/c2dda93a61e02812831b6a6e33f7d2ca
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/bd62e44f266b834c6dfab068841506a83eaf510eefbcf8896dfca36671321430293dc251885af108d94affc5b193919e0e29c965fef3ce6d994df37324aef013
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/8cbd64d2ce4e3944e702696839a4ad3a
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/3621dfa5cf8bca62bb8f2a049acdc0ed4e02cb2b5585758e6e1173e61b3a5f0e1655a10f2feb2f0e70a098b00181d0b24dcd61e1205324d436b712f58e58df5d
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/476f1ebfb93baad6fac778fa00c4f99e
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/06aa18da572b0904e5d8ec0823626d0af02a29224aba98efd43d8fbf4636d2625ece9f88f9a86d2e493f016c106f2ae71422191afc16dda2b26bbc81eb09d901
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/8c55d04d9def74f6bc2cc0d03b764975
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/d6196a339a263d80c05b94596ec5acfeff6e3ce93fafee348a864f760aa1239aa59ee294cab29fd730dcf7974ac6dcb230433184be093612bad3bc3edc067649
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/8427f098a44457ba65b21a16439ee6c0
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/4855321b2a16d55e1c6e830e33d0a199286002798c0f33c7f594a55626b5a502df94c172de4fd0a38ab6ba92f384abbbc3ef06123c3115a3f290f50a9d43ae9d
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/9d1636bb7500d9ba15ed703231f8def2
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/0b3530fd95e01d58b85157d7bb75e44ee7b2f0c5a912920ff0763f404e1ab28d16a624463f3f20241c7baea57e00fca3f896d6e0befb6a1c9e5ece4264b87e35
+openblas-8795fc7985635de1ecf674b87e2008a15097ffab.tar.gz/md5/095d293409140dd8eee500eb92372eb7
+openblas-8795fc7985635de1ecf674b87e2008a15097ffab.tar.gz/sha512/7b10d4c2bef68159e0a88fb6d4fd0ecca17b4c6394479e8f838f5078d9d5acef24c6bd44777d43c03859c952d4612d76b57aa0bff367b197920ea16eb3839144
diff --git a/deps/checksums/openlibm b/deps/checksums/openlibm
index 4c8ad913fc58d..fc20184ac85da 100644
--- a/deps/checksums/openlibm
+++ b/deps/checksums/openlibm
@@ -1,34 +1,38 @@
-OpenLibm.v0.8.1+0.aarch64-apple-darwin.tar.gz/md5/cbb86fb881fce48d837e40017272f7f5
-OpenLibm.v0.8.1+0.aarch64-apple-darwin.tar.gz/sha512/3fc7753ce12e0012b42e469084351ec97e20c1761b50add48af4b4d92ba61b9db4a015243f31f7ec9bf322a9a4e098cffad31cd4501424568bb49fe973b63a35
-OpenLibm.v0.8.1+0.aarch64-linux-gnu.tar.gz/md5/416b30b38b91a76d1613753ec85ac4a9
-OpenLibm.v0.8.1+0.aarch64-linux-gnu.tar.gz/sha512/9ee5821ee83cd5e29c4f338e0076fe3e3705925e8556abe95f0356151ae93f23be0bbcde42cbf6b382e8c29a201959cb31be349ef3d1447e2d19a414a476fc55
-OpenLibm.v0.8.1+0.aarch64-linux-musl.tar.gz/md5/b170fc0058803377d7c4d7d0c9e4b954
-OpenLibm.v0.8.1+0.aarch64-linux-musl.tar.gz/sha512/be311888953887745a2d64eb3d1d5755e2f37e9f46558c6f6722520c503ee23d3470e97d7bf28f0a03915a398c31080e789d6e1287a2b743b6fd3f37b3a2911a
-OpenLibm.v0.8.1+0.armv6l-linux-gnueabihf.tar.gz/md5/63f60d2f13acc6fd2ba3854a8ecf2b0b
-OpenLibm.v0.8.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/1853a8346f460cf7e26efefb27c36440976e40f000aefc22a81bb69bb25d07675a19f4b64c4dea8fedaaae76925467739cee8cd5743689ae55512e57dab54286
-OpenLibm.v0.8.1+0.armv6l-linux-musleabihf.tar.gz/md5/5b410e9611b188f34fcc5314c45d2297
-OpenLibm.v0.8.1+0.armv6l-linux-musleabihf.tar.gz/sha512/ecb2fd14728c40c7e3d2cf7c4f1dc06568f1dacc60677218ec59011cd913cab173c33db1c402a8b27b8f0556ca66667ebc033130222617cb4f5d9d8cfe7431ed
-OpenLibm.v0.8.1+0.armv7l-linux-gnueabihf.tar.gz/md5/0fc3732640b6bfd52759e74be75e2290
-OpenLibm.v0.8.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/81214ec825d33e37c9e889bea054f3aa72466e5fa585356247ef0ec4653b548f7836219092a8c7f0bc3c694e97582012cd026325e0b1c1a6fc113c461dfe49f7
-OpenLibm.v0.8.1+0.armv7l-linux-musleabihf.tar.gz/md5/7ba7a7f9461b43b8ac622b9fa9c0489a
-OpenLibm.v0.8.1+0.armv7l-linux-musleabihf.tar.gz/sha512/e088f806f9fad70b2e6ea28a39ffeb083b4c1c215b1cac73e848a06cb19efcf3ff100e4d401ec2a0ed225ecba6dad115f5d245a612a39c90334a753bc79947e6
-OpenLibm.v0.8.1+0.i686-linux-gnu.tar.gz/md5/4a2eb776682551a25bf1d27e9d8b645e
-OpenLibm.v0.8.1+0.i686-linux-gnu.tar.gz/sha512/716808c4a2a8c06439072a39db1f4b93a171a2f42e9677cb7f3eba04f204bc181f96c915ad8c74141952eb783cd82ecf3804ea60d830e3f5d9b88bfb4924223d
-OpenLibm.v0.8.1+0.i686-linux-musl.tar.gz/md5/1acd5b790b7b8d1c40c6b094b99fcdb6
-OpenLibm.v0.8.1+0.i686-linux-musl.tar.gz/sha512/22c28a5c5e9542ddfb23297523b56e0a964bc322d252d5681e763c28d4c876dd683d3456297e385f560ab4cf378e5848047aec6cc934850fd0a4df0ea6967b44
-OpenLibm.v0.8.1+0.i686-w64-mingw32.tar.gz/md5/8e974b3bafa9dfe1cdba1d31049d7e85
-OpenLibm.v0.8.1+0.i686-w64-mingw32.tar.gz/sha512/df9e5250dea575341ec1a40d94e3485701690542bc7dfede0504c04fdb7f3fd272d88debdd6546d8644563fb244373b5f4e214da1d0b0133db5b66cdafbf441f
-OpenLibm.v0.8.1+0.powerpc64le-linux-gnu.tar.gz/md5/83f68736e911b7c700bf7a8c79cc48a8
-OpenLibm.v0.8.1+0.powerpc64le-linux-gnu.tar.gz/sha512/b879f81d0c02f457310efc08269a7012fe6ed479d33bf79830e48dafce03976425566c5a210ed4833e106848eda038ae531f0c956971b3139c60002a5b1c1325
-OpenLibm.v0.8.1+0.x86_64-apple-darwin.tar.gz/md5/2cd7845dc3d5558cf77e8b6faac4a659
-OpenLibm.v0.8.1+0.x86_64-apple-darwin.tar.gz/sha512/f894c5b7541ebd4f652cb0028b2d367db6af13258d5a42722f19e0ac3a6bc438453e5c2bd17d8b491288c93796ba638c4a3a247f9d33abbac392a6db9169cbcb
-OpenLibm.v0.8.1+0.x86_64-linux-gnu.tar.gz/md5/eda96ecab33dfb9a015375813c41d14a
-OpenLibm.v0.8.1+0.x86_64-linux-gnu.tar.gz/sha512/641d35dfde15b0868c4ede7d2210ac136e4ca7bf7a969623cbecd792eb09e60824601a6178dbc2c8e4d49523aa105956b5f63566b789003ec736164a8fe5df4b
-OpenLibm.v0.8.1+0.x86_64-linux-musl.tar.gz/md5/8d8e6ffa1ad9574bd8ead8b99754e122
-OpenLibm.v0.8.1+0.x86_64-linux-musl.tar.gz/sha512/a66102e69688fdda1c1a51ab07697db0e183b6def21dfed411cd6a92e6c23b22eacd6cccab16e43b86e318d967f81d43dc26bc778746186965bc68c65c7e87a0
-OpenLibm.v0.8.1+0.x86_64-unknown-freebsd.tar.gz/md5/7634eb65eea08ab48a9224295071f058
-OpenLibm.v0.8.1+0.x86_64-unknown-freebsd.tar.gz/sha512/f2c0a6d5113de911aeb516308b1d0ea71a1d61f2ce54c367670a6567444e0030babd66546489e288891678aa6613f40fd0182261de3244f1aed1085c2a32e81c
-OpenLibm.v0.8.1+0.x86_64-w64-mingw32.tar.gz/md5/9de4a420caab2cb53ddf86eb0241885a
-OpenLibm.v0.8.1+0.x86_64-w64-mingw32.tar.gz/sha512/928675df75d6c6f1e1acc46a2bb554ef120d74671d23e8682adbe05732f668401eaebd98d534e54f7f34e1657d2e1b1d19e18b3822faa891cdf06afd0c3ee56b
-openlibm-ae2d91698508701c83cab83714d42a1146dccf85.tar.gz/md5/19408d70bf042a109e1c267a53740089
-openlibm-ae2d91698508701c83cab83714d42a1146dccf85.tar.gz/sha512/9597fdcbc4af8369e6eecc3f8e86f251661cc64d236578f3ee8a6b39e77a47951446e1a0fe1151513da153e7ed17bf39aa5a36c32153d0d0400232bed2839e22
+OpenLibm.v0.8.7+0.aarch64-apple-darwin.tar.gz/md5/68dd4a8d49ed68ec464352220b006e66
+OpenLibm.v0.8.7+0.aarch64-apple-darwin.tar.gz/sha512/fc6f69c3477c99058a074784af2f19769de5fdfbecbcbec76ce1253de69788ff41304fe8677a50582f6628d3a50b889b0c56e19c0d7bb369c493d72810b2aef9
+OpenLibm.v0.8.7+0.aarch64-linux-gnu.tar.gz/md5/421644dba2a629013db582011b2e5f14
+OpenLibm.v0.8.7+0.aarch64-linux-gnu.tar.gz/sha512/429416fd411a06ab8576653fb5e07e6fc5e210a3d6939451a0d56593ae74c04c330ee7936fa10b6ca6c21748f697817e67506b98c49a11d39ae9a98a958bd6e8
+OpenLibm.v0.8.7+0.aarch64-linux-musl.tar.gz/md5/ee870854946972b6a2ff989cc1ca6c12
+OpenLibm.v0.8.7+0.aarch64-linux-musl.tar.gz/sha512/3f9c0514fdc296fa28562abfbc513bba442d48c3fdbe38622a0a9e16bfbfee39135247fcc4fd486abb629a86cc7afd342215baccb4db59d07a5a1fa2c8356549
+OpenLibm.v0.8.7+0.aarch64-unknown-freebsd.tar.gz/md5/02ca3e2e478e8d2eb3c7d24e1584ab6f
+OpenLibm.v0.8.7+0.aarch64-unknown-freebsd.tar.gz/sha512/d1f44059270e5884bea9b7f14c15b251dff528002d3f1b61144b0d197891487b690917711ce648c298a7f8db6d8a2035b82acee3dfb2e9b18dc556a09c3b7128
+OpenLibm.v0.8.7+0.armv6l-linux-gnueabihf.tar.gz/md5/473f534041e403f04edb56846fcd8426
+OpenLibm.v0.8.7+0.armv6l-linux-gnueabihf.tar.gz/sha512/17e1c5cbd871091ac9e0057d334891bd795d690986b40ad903bfaaf89100604e5c0abb97e9675581d50893f85859e64f44139c9359149c629a9b06c20a8f46ae
+OpenLibm.v0.8.7+0.armv6l-linux-musleabihf.tar.gz/md5/147b932a2d3b7b0636be22d849f33a5a
+OpenLibm.v0.8.7+0.armv6l-linux-musleabihf.tar.gz/sha512/abd3ad5096ef280814dcff203aaa1cc39d53bc706913cc6aae92d33f7d0842760ccc44f6270f8023558347be8944b169d1d04e508f74f9158f6686484218ce2d
+OpenLibm.v0.8.7+0.armv7l-linux-gnueabihf.tar.gz/md5/473f534041e403f04edb56846fcd8426
+OpenLibm.v0.8.7+0.armv7l-linux-gnueabihf.tar.gz/sha512/17e1c5cbd871091ac9e0057d334891bd795d690986b40ad903bfaaf89100604e5c0abb97e9675581d50893f85859e64f44139c9359149c629a9b06c20a8f46ae
+OpenLibm.v0.8.7+0.armv7l-linux-musleabihf.tar.gz/md5/147b932a2d3b7b0636be22d849f33a5a
+OpenLibm.v0.8.7+0.armv7l-linux-musleabihf.tar.gz/sha512/abd3ad5096ef280814dcff203aaa1cc39d53bc706913cc6aae92d33f7d0842760ccc44f6270f8023558347be8944b169d1d04e508f74f9158f6686484218ce2d
+OpenLibm.v0.8.7+0.i686-linux-gnu.tar.gz/md5/7a7ceefec85b5054a1bc31b1fa93a7f8
+OpenLibm.v0.8.7+0.i686-linux-gnu.tar.gz/sha512/0117e9f8a2236757443fff50c530e2ffb7d713e1bf46997713a6987f8602ac8398748c3fc8c8ba0980d127408de0d2f0de5e53af2c1a911488723bfba531c0ba
+OpenLibm.v0.8.7+0.i686-linux-musl.tar.gz/md5/39163327893bfa499c97d5dbfe892d10
+OpenLibm.v0.8.7+0.i686-linux-musl.tar.gz/sha512/68ac6dc53da509ef906ecab66070381de27b8d551d39c147982c7b5008ec21e6a3a32bbc2aa6cb4e5c3f883cae76fc1c96b5cdfca58b2dabbf05c734431bf594
+OpenLibm.v0.8.7+0.i686-w64-mingw32.tar.gz/md5/8bef2aa7010577a37b1d789980ecb97a
+OpenLibm.v0.8.7+0.i686-w64-mingw32.tar.gz/sha512/6770de28fbf856fc21619f1aa0ac752d059a9bbb9668d56211cc4b33050835a41d2b6d5777c535d63c4083fb01d70b5874f63c65b93ea3ee15e3ac19372091e8
+OpenLibm.v0.8.7+0.powerpc64le-linux-gnu.tar.gz/md5/4fb864d30bcb12946228a37ef51577ec
+OpenLibm.v0.8.7+0.powerpc64le-linux-gnu.tar.gz/sha512/c22c2ff04baa93547b006185138b162cc293f2c022433830cff14895ee49eb68d448d9731f0f92c8bfb4ab036648a2480be59cdf33b8ac52ab4d63131fde078c
+OpenLibm.v0.8.7+0.riscv64-linux-gnu.tar.gz/md5/f09c74ff81b14c1d3c14263410ffbdd3
+OpenLibm.v0.8.7+0.riscv64-linux-gnu.tar.gz/sha512/7136dccc94d3324eb9f74ae05dc2e44cc5ddb3defdccb2e5f0b483205892111da7bf5bedb494190537d95fce45879096430d0298d6b520845aa804e94a047679
+OpenLibm.v0.8.7+0.x86_64-apple-darwin.tar.gz/md5/76c855df0fb8ceda368e6da3463429fc
+OpenLibm.v0.8.7+0.x86_64-apple-darwin.tar.gz/sha512/0a3ff48d7c04cb9dc932b2d0a683063a366941b8f8f533b809d993621abd5f52606653afac98a045b7ead82f85394dfac728cb4a358f118779a3f72b53e0282a
+OpenLibm.v0.8.7+0.x86_64-linux-gnu.tar.gz/md5/41c4d1525544c452d66c9fc11248a4fd
+OpenLibm.v0.8.7+0.x86_64-linux-gnu.tar.gz/sha512/51abc0f0aa3b8a674c42228c41a87258d91e838f2bce7382d7da134f972ddd8b65a23330683d04702be21031db6a2b448081f5044b0640091e4aa47e9ffefe2e
+OpenLibm.v0.8.7+0.x86_64-linux-musl.tar.gz/md5/781eeeffc650b41b25742005cab2dd3a
+OpenLibm.v0.8.7+0.x86_64-linux-musl.tar.gz/sha512/945d3a1267d7435ccfb7342fe8d581dd28878ecac429d339924b3a49be6c25e7130759be68cf02e2e5c9dddf0ee6e5321bc9da8fbdf58f1e0dde3950ab866fe3
+OpenLibm.v0.8.7+0.x86_64-unknown-freebsd.tar.gz/md5/a947d491d424d891af54ef6db3990c52
+OpenLibm.v0.8.7+0.x86_64-unknown-freebsd.tar.gz/sha512/da477916536fcd7bb447dd6b7df1142c851a4ff027d5e1eca667736bd9eee8deda6aeacd7acf451b3246f16ddc8ec8f8fc35e8c4d4739858b8255b3d6be37b95
+OpenLibm.v0.8.7+0.x86_64-w64-mingw32.tar.gz/md5/26fcdd81d0e98542c6f7597993fabeee
+OpenLibm.v0.8.7+0.x86_64-w64-mingw32.tar.gz/sha512/60705757778abb932009edf19250704ddb5f705a242e407195df9ced984f3855501f9034f170817dbd7770d6e8703353a4e0efb772c86c83d6d7a178c2aedb53
+openlibm-9fbeafcd4f1b6ef6aa3946c1c8faead50f38a94d.tar.gz/md5/7a2773e071b7aa1a39182dcb1e665a78
+openlibm-9fbeafcd4f1b6ef6aa3946c1c8faead50f38a94d.tar.gz/sha512/65f511aa223c9a1a7cc84b83415cc55a42786e08559620414e94aaa83414d32e868e6e9d3d460961dcb79998211fa80a04acd1c1f2bf9aa3fd49cd8324e2f9a0
diff --git a/deps/checksums/openssl b/deps/checksums/openssl
new file mode 100644
index 0000000000000..1dee209bb0c5f
--- /dev/null
+++ b/deps/checksums/openssl
@@ -0,0 +1,38 @@
+OpenSSL.v3.5.4+0.aarch64-apple-darwin.tar.gz/md5/d3a74c8ea6a26b89b250ea72d4df09f4
+OpenSSL.v3.5.4+0.aarch64-apple-darwin.tar.gz/sha512/6586b7c070409be5a4a8c2b436f78d43d5e007698721478919b10247b8ba72fe591f22432b366e21f63d1d9e0f59e0bf708726b13e8e76cf46b83dfd9c6290e2
+OpenSSL.v3.5.4+0.aarch64-linux-gnu.tar.gz/md5/a63078615f652e9c8802ee4f157ce670
+OpenSSL.v3.5.4+0.aarch64-linux-gnu.tar.gz/sha512/bfb1810bf33e8428d32f9f454cb48c1ec1aff1b8cadb92e7042f053791e27b49b28676d7843b13c1837e44916553b34fcc2f6dc891b7a9ef691a249de384a17e
+OpenSSL.v3.5.4+0.aarch64-linux-musl.tar.gz/md5/f5cdb3a716692ca6f3ab704eb54d2af3
+OpenSSL.v3.5.4+0.aarch64-linux-musl.tar.gz/sha512/1734abca858d554d4e8b46d494e3a41e99726b53ff9534dd85ea47e3c576c5da354940245b73d1d31fb6cc14ca50dd331385da8b7328a97d19d0592414c36835
+OpenSSL.v3.5.4+0.aarch64-unknown-freebsd.tar.gz/md5/812279eb91f01e35a39d77f77ab54a1d
+OpenSSL.v3.5.4+0.aarch64-unknown-freebsd.tar.gz/sha512/17a1e1fa314c8c766e957b2adbb6bbe3bcd20e81df5a5d6280f8013821774cd447076757bb9eb436a39e31ba4921ccea25f02c4adf3784945a740fbe4192a1f0
+OpenSSL.v3.5.4+0.armv6l-linux-gnueabihf.tar.gz/md5/07bfc3d49e4085e3bf286bf0f89c9771
+OpenSSL.v3.5.4+0.armv6l-linux-gnueabihf.tar.gz/sha512/1d1e93735f208f7704259ddb1bc791d8e79f57d962b8e3ea4888aee1cb42c5726800310137a2d0a3b26e78cc7f419e3410894716932cceae549acd4aaa632ac5
+OpenSSL.v3.5.4+0.armv6l-linux-musleabihf.tar.gz/md5/0e7f6a4fc89cc7917a63f514eee960fc
+OpenSSL.v3.5.4+0.armv6l-linux-musleabihf.tar.gz/sha512/2124ae606fbc3d9b60fa092241f2ca228e666758b24a043104791a7b58f8a9d1bb657918b2be67f70175ace25e336996e047ce1eb34f1ce02c95412ea657e515
+OpenSSL.v3.5.4+0.armv7l-linux-gnueabihf.tar.gz/md5/e8d8e328f953f0ed4cbc010bb760b165
+OpenSSL.v3.5.4+0.armv7l-linux-gnueabihf.tar.gz/sha512/07a80f5c370d1e61d16031bc32fd0ff2605de09afd1bcb60e4da2237e595c09336103f37d0d9d972849e6e0a4190e83f29360cbd646967f3d8aaac1bd9330e62
+OpenSSL.v3.5.4+0.armv7l-linux-musleabihf.tar.gz/md5/fb14accd074b1527dab887f6481b5e2a
+OpenSSL.v3.5.4+0.armv7l-linux-musleabihf.tar.gz/sha512/366d70e849d8d7e809b8d204b8446c8b190dd87d34ac09dc5c5631dbaa6b470dc554f908e90b4346947a93d570a127d3acb4e48a6372ce0c7d8fdb52d9591e2a
+OpenSSL.v3.5.4+0.i686-linux-gnu.tar.gz/md5/f8debcbb53d95da043b1105b8299202e
+OpenSSL.v3.5.4+0.i686-linux-gnu.tar.gz/sha512/4c619a87cb37a2d67d7940a50b1b7b2aec2d7bdfddbadd7310a52058494026bcbd1459c09dc1e9db7f2969093070a1110e0c592c671cdc1a084737e8edb106a1
+OpenSSL.v3.5.4+0.i686-linux-musl.tar.gz/md5/ee85ed549ebd79c4fe0a02cc6f408cfc
+OpenSSL.v3.5.4+0.i686-linux-musl.tar.gz/sha512/e89e76e2c375fbabbf7d9195e3212f77fd929ff4089eab2662988033a2232eaf842783079d254b01e80e82aa8bea7507ea6ffacb74c029a8c107255ff9d3502e
+OpenSSL.v3.5.4+0.i686-w64-mingw32.tar.gz/md5/20f9847da4ea3a42de7a69441c68cb77
+OpenSSL.v3.5.4+0.i686-w64-mingw32.tar.gz/sha512/d88556fa5ce1164bc5890077a387d30dec4d454d1e35b743cdc562e594f872c01270fa447ced0fe8b0a6a6d1589e117576510b8d8d8abab9f0b85cc2b12232c0
+OpenSSL.v3.5.4+0.powerpc64le-linux-gnu.tar.gz/md5/cd3a9bfcc9c2ac35c363cd629303e025
+OpenSSL.v3.5.4+0.powerpc64le-linux-gnu.tar.gz/sha512/107f7a3c640c4105ed66a002020b4aa397cbd31af4305f485676b90d0a8c53ba9aae4721dfeb3406b109413e8313cc2a1fadba5786780c7af0ee5bc14e01dec0
+OpenSSL.v3.5.4+0.riscv64-linux-gnu.tar.gz/md5/4d1eb41a393b36a1ca39d52113991861
+OpenSSL.v3.5.4+0.riscv64-linux-gnu.tar.gz/sha512/ad896b5623d746b09866d378b9f05f515aa34d5f874f57fc76329df8630bced8d6d7576dd5ae79248347bbca98b035b941773692ba27a1c899fd5500e01b6432
+OpenSSL.v3.5.4+0.x86_64-apple-darwin.tar.gz/md5/37b748664516d632886aab07cabaeb96
+OpenSSL.v3.5.4+0.x86_64-apple-darwin.tar.gz/sha512/49f24b13906cf4538d56b562f26c9641ed5d64e05cc176dc4c461ff9e238828c216e0e396527c7403fd8672b313bc59839a767ab530222878191b1dc01f9c39d
+OpenSSL.v3.5.4+0.x86_64-linux-gnu.tar.gz/md5/c3cf2b5c53332edd6755beabde9ec7ca
+OpenSSL.v3.5.4+0.x86_64-linux-gnu.tar.gz/sha512/dab8ff0ca15785d8023a8e3b770d72761191fdf1f1f3525e4ddf10104c6a8905e089f6e888a24bf6717bc944f1ba498e4af68724d8af32393c147c59de309fbc
+OpenSSL.v3.5.4+0.x86_64-linux-musl.tar.gz/md5/fa61e1bd29a34b58971224715929fe21
+OpenSSL.v3.5.4+0.x86_64-linux-musl.tar.gz/sha512/22d00cae28d3e63fac59518423cfbb76eb82bc18b72f42c89b60b37e346b19affabe01feb8c0b785b3cb88110356efb576ffe74ff5af00fc9c9f1669711e3faf
+OpenSSL.v3.5.4+0.x86_64-unknown-freebsd.tar.gz/md5/ac3b835042008fe1229d5822b89a0ff2
+OpenSSL.v3.5.4+0.x86_64-unknown-freebsd.tar.gz/sha512/bf70b67488571b53bca8bb695da5206f401c759cb290b8007b0175bb262cff22d2145fc8e4690b7a1b819593a5de3a94beed6484fbbbf5a5ae03161ed2a866e9
+OpenSSL.v3.5.4+0.x86_64-w64-mingw32.tar.gz/md5/0ca1bc12a96a1b80217b780e4eda1d10
+OpenSSL.v3.5.4+0.x86_64-w64-mingw32.tar.gz/sha512/b9b98e9b1b1942b1141cb21166548a81cbd336c62ec335905fcf2aad536b16d90d68d160714a376dc70159e17063588893276b345d67833ccf5f28d2ccd32551
+openssl-3.5.4.tar.gz/md5/570a7ab371147b6ba72c6d0fed93131f
+openssl-3.5.4.tar.gz/sha512/365aca6f2e59b5c8261fba683425d177874cf6024b0d216ca309112b879c1f4e8da78617e23c3c95d0b4a26b83ecd0d8348038b999d30e597d19f466c4761227
diff --git a/deps/checksums/p7zip b/deps/checksums/p7zip
index b3c24a811a043..a75aec9ac0e9d 100644
--- a/deps/checksums/p7zip
+++ b/deps/checksums/p7zip
@@ -1,34 +1,38 @@
-p7zip.v17.4.0+0.aarch64-apple-darwin.tar.gz/md5/af8134ed9c24b99d69e4edb4d5226ca5
-p7zip.v17.4.0+0.aarch64-apple-darwin.tar.gz/sha512/b8bb6aee60a54cca37568af8b2d9baedd892ba0d4918b93bcb29d74189524af7115901f4fabafb1ca58ed17e97c59846fcdfbd460abc81059806802b0a7be840
-p7zip.v17.4.0+0.aarch64-linux-gnu.tar.gz/md5/20abac5ebb99f31742878013c02f96a3
-p7zip.v17.4.0+0.aarch64-linux-gnu.tar.gz/sha512/6d8ebf895b969b1f707d0c23a19db4cd0dee47957d076e6e389395e09404d55bfcb78bb14bb67bb35b93b6a0072f2b4f097d839503d1ccab62b4ce28939dc71d
-p7zip.v17.4.0+0.aarch64-linux-musl.tar.gz/md5/185c979c7419b7ded3832c0f5cfd3b77
-p7zip.v17.4.0+0.aarch64-linux-musl.tar.gz/sha512/722e880c9f111738cb4cde84bf62c36892dbefdba625ae2b9e0fae76a7b1eabfa481a9838fbf9667223f19f62b6f09fcfd42b50c2bff7a65af0fae3616250fc7
-p7zip.v17.4.0+0.armv6l-linux-gnueabihf.tar.gz/md5/dceb37181763f86bf12f8ca473cf3403
-p7zip.v17.4.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/51e409bbcd3c54838cb3219b2476c8b45c8340e0a2fd26cced0d8484ae7f51711723e06e9023fce9ae9a1b51b5fb94aba536428ce2a5c5902b38498a0b3c2b50
-p7zip.v17.4.0+0.armv6l-linux-musleabihf.tar.gz/md5/193ecd888787ea03a500d102a7e33afa
-p7zip.v17.4.0+0.armv6l-linux-musleabihf.tar.gz/sha512/d525aad33f5ed27dc993f31c6db2996b830716bfac9bc7c49cb462ea3f0b412d0d3267765b9952c85e9c9be31d36d095d55ba89c0fa2c92823d9490372389c95
-p7zip.v17.4.0+0.armv7l-linux-gnueabihf.tar.gz/md5/096f11a7f1af5ff730bb8cfef22e335e
-p7zip.v17.4.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/1866ffd0169e0795594aaa70f1af8102ebbd79b3cafaadfb9c6a537dac0cdbb6eb7c31ad5165a975508c1b850744f94b60d9c530d658cdcc5536a474203cff21
-p7zip.v17.4.0+0.armv7l-linux-musleabihf.tar.gz/md5/fef1576982f45d1922582f6f7a7d6665
-p7zip.v17.4.0+0.armv7l-linux-musleabihf.tar.gz/sha512/71061585b32fa1a8e0a403a60c07e9f90586291a9799d7e2d6f7e6ec9f7b0ebf4b45ed080efd87cad82c45f71ec9a14cbcf9134a73bad4f5e3329f23bc6df01a
-p7zip.v17.4.0+0.i686-linux-gnu.tar.gz/md5/8818389b3bf00f10c6a39fe0c4a331b4
-p7zip.v17.4.0+0.i686-linux-gnu.tar.gz/sha512/bec2051a258f7e8a762b7cd4324e7b8f00fe5d99d48f05fb3557c41604e8b08af9ab66ab830f4a48086656be41aaf011b2aae0fb530e0ffefec38689f85a3bb5
-p7zip.v17.4.0+0.i686-linux-musl.tar.gz/md5/4ed9c16a65ed1d656aa214013e46eb28
-p7zip.v17.4.0+0.i686-linux-musl.tar.gz/sha512/7a5b3e15d0038bea0de7fc28ce058d7f93b8e04f271e30953a6b52d2b5d71f59d10177033e888a50cf8dfeb4f44bcf3271c9b9d1b28d0122ab2b239decdad446
-p7zip.v17.4.0+0.i686-w64-mingw32.tar.gz/md5/d06cff2ec0b7c8415700587f931ce1ac
-p7zip.v17.4.0+0.i686-w64-mingw32.tar.gz/sha512/ed72440f5306a57465a70b00bff33185a83c3e223844a79aa0b0d1fbe30dbd35da75e6188725aa621f5c4574a09527daf1e4893c7c6979ab91b2c09b4979dbcb
-p7zip.v17.4.0+0.powerpc64le-linux-gnu.tar.gz/md5/949ca7d111e497b82c9c762e5ac63a6b
-p7zip.v17.4.0+0.powerpc64le-linux-gnu.tar.gz/sha512/4842e0d44bf6380100723209596f526181fefe8a81d59c28658d03ea16600e71d010d5c7898b4c943efdd9caaa2301c3fdb0dccb343d631d1734acda1c559f65
-p7zip.v17.4.0+0.x86_64-apple-darwin.tar.gz/md5/2322c7a08f62592ca394a716949008bc
-p7zip.v17.4.0+0.x86_64-apple-darwin.tar.gz/sha512/9549f3e1052730ce13414636b32f0d1a9a1ac944a2b622380eac0da144b11fd65d437afe877ba6797d651da9c4ec77f0ebd3e515146caceaa2524829419eda48
-p7zip.v17.4.0+0.x86_64-linux-gnu.tar.gz/md5/a21b12946a62ef3688d5fc965974e8f7
-p7zip.v17.4.0+0.x86_64-linux-gnu.tar.gz/sha512/d32faeac23acf8a023f65350ba1d62bb3d9f904e32570ae03b8fb0a5375758784dd95be8caeecd007cbde40e103854a077e2c817f62afa72491f3b8966deb738
-p7zip.v17.4.0+0.x86_64-linux-musl.tar.gz/md5/c448e872d4ad66beb2d46d9134952f2f
-p7zip.v17.4.0+0.x86_64-linux-musl.tar.gz/sha512/92588f4817e145ef655c718dec049e7f43dd93644f43f19cd320643fac5f5b2312837c7a6c3e782e97fd08747311c58ed4657484f8bc778942fc5206ff8ea4e5
-p7zip.v17.4.0+0.x86_64-unknown-freebsd.tar.gz/md5/2cca6259a2eb1b0fea777d566267bf05
-p7zip.v17.4.0+0.x86_64-unknown-freebsd.tar.gz/sha512/92f90e2be4a8b8fcd80a4ceacac8bbab750913526b85f9279f8ee9ed91b77248b5de2d35d0c6241d0ad51fda185f4cb1ead1dcc9d23e2bef35e0b61efe3c3170
-p7zip.v17.4.0+0.x86_64-w64-mingw32.tar.gz/md5/5d272c78d7ffb40da0f333463f3cc098
-p7zip.v17.4.0+0.x86_64-w64-mingw32.tar.gz/sha512/2d999c6df4786cec1bba396b3a651a63740f4b799e9fc11754afd24438076e898daae74b4d3c7072450428e89881991e8884711cd4c349879a00c7aeeb4e1d3e
-p7zip-17.04.tar.gz/md5/00acfd6be87848231722d2d53f89e4a5
-p7zip-17.04.tar.gz/sha512/ad176db5b657b1c39584f6792c47978d94f2f1ccb1cf5bdb0f52ab31a7356b3822f4a922152c4253f4aa7e79166ba052b6592530b7a38f548cd555fe9c008be3
+7z2501-src.tar.xz/md5/00904e3039346ee32b7a500a34e2d699
+7z2501-src.tar.xz/sha512/5ee146ce993c6d12ad19333dc3545e6c3429212260c22d456390e49ca150e6fcbfc6eae45b5ec61138ae1598d7b4a79d6f2e3ff02929af38039c0ca59823e729
+p7zip.v17.7.0+0.aarch64-apple-darwin.tar.gz/md5/a52be1050f7903c9664379eede9ec6f0
+p7zip.v17.7.0+0.aarch64-apple-darwin.tar.gz/sha512/b8816a06bf964e55aaca09871ba59434e6c936d8449783bee7be8ba534d5eff9908a889e7b18f5a5cc1c51b291e3d6d107060cc1e83aced3cbc452e089f4cc0e
+p7zip.v17.7.0+0.aarch64-linux-gnu.tar.gz/md5/4e14f09c1a57ad742c69e95b3df3a76e
+p7zip.v17.7.0+0.aarch64-linux-gnu.tar.gz/sha512/79ca72ef17e50016b15d8bba9aaa49e1e174b6496470344e42658d861d4fd4a6337660a3b4b08c351f21f4d23ea96a0b1ba01d2f7b1724765c62be282d3c577b
+p7zip.v17.7.0+0.aarch64-linux-musl.tar.gz/md5/07936d59f9cf3161ac8daf6427929a83
+p7zip.v17.7.0+0.aarch64-linux-musl.tar.gz/sha512/807dc652ea62823188774e16c6b93fceb904042e3d22cb4eaf82dc393966b458d0d9bca578da7087466e63007a918c5caf24f19b03b8720fc5b4c9cfedbdbcce
+p7zip.v17.7.0+0.aarch64-unknown-freebsd.tar.gz/md5/6cd8a5e16ed17b56d8eadf1e7e167512
+p7zip.v17.7.0+0.aarch64-unknown-freebsd.tar.gz/sha512/1904bae2b48e27a02c3fda3324c5ebe2fb736984659bbfac4cb5071fb31130062e574eb2ac5fb8002d8c6f56ce89d47f74ac37343eee07822ad634db477582bb
+p7zip.v17.7.0+0.armv6l-linux-gnueabihf.tar.gz/md5/32dff6435c64a22fb5a504d5c1429c49
+p7zip.v17.7.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/7bfee156ce16106f8ea50d4de63de7536030f560110aad9e8390a549443d311b4373c1940df680876b5b44a2f728981268c5d21bc083aa97820e79f362eecc21
+p7zip.v17.7.0+0.armv6l-linux-musleabihf.tar.gz/md5/cc7671317b0f450cb6584927ac31129e
+p7zip.v17.7.0+0.armv6l-linux-musleabihf.tar.gz/sha512/76680da33289cf227686dbefae41401abd46826089d5880bad47e661c35f5d0ae2bbed4dadff60682651b3cc0ae7bcd0fd95d1c838197cf72a24c815be4c9895
+p7zip.v17.7.0+0.armv7l-linux-gnueabihf.tar.gz/md5/c7ef8bc1143cd7b7e8cabd150cb03015
+p7zip.v17.7.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/08d5934d1fa7795e7c956b3ac519bebe52cfebd7f546d60f03800265cec560dd004c12523e43d3fa9086f73276450dfd76c67bf6522c55c4fba81c8172944fb2
+p7zip.v17.7.0+0.armv7l-linux-musleabihf.tar.gz/md5/87067e9348d302e58740dd71b6dff9e7
+p7zip.v17.7.0+0.armv7l-linux-musleabihf.tar.gz/sha512/f04be74daf9ffc29ef0c33ef367e1136828f3e4dc027995eabb9e757e754bc9c34fd19089e5d8e5b6a4c4ec8e43237aedaee4b99f04c1d7631961e0d0aa3345f
+p7zip.v17.7.0+0.i686-linux-gnu.tar.gz/md5/ff1aad93d8dae893f1dc23e856ed05ac
+p7zip.v17.7.0+0.i686-linux-gnu.tar.gz/sha512/06475f5d401839e438db068b48a6a34ddf4b72ee3010589d4c3a9ae6f2afe18a7b2af34437c60c6b9e0d8f657a6df2d740f1e39885bd78bb0eddf02ce020a2b6
+p7zip.v17.7.0+0.i686-linux-musl.tar.gz/md5/a843068034a5410022afc6aa36bab84c
+p7zip.v17.7.0+0.i686-linux-musl.tar.gz/sha512/15a32626ac48fb043c33f564dd36550913be19f0193fa2e52e95b81e2e4d087a84e1c05d42381ed57b3ac0d38695d50cf6d146f462460661b2a6a3abc9a6b578
+p7zip.v17.7.0+0.i686-w64-mingw32.tar.gz/md5/c7c4af99790b838f611c252b2268e83a
+p7zip.v17.7.0+0.i686-w64-mingw32.tar.gz/sha512/a5d44bf97fbd9381d71bbc0472348221bcf77bc0813af5fb0c56d40a1774ee430865402481dba005166689e7c9670ff82deb4616d9d32ece226decc89be7ed19
+p7zip.v17.7.0+0.powerpc64le-linux-gnu.tar.gz/md5/dab2a1fcff915cce8e81740614e400c4
+p7zip.v17.7.0+0.powerpc64le-linux-gnu.tar.gz/sha512/ca71a8a2def311a21db5018f21613c1354704b0a03c92fea5f8ed38b9e6be776509bd138fdbc222e6c560928f4ad381e98c86e952a1707dd459705c5676f5754
+p7zip.v17.7.0+0.riscv64-linux-gnu.tar.gz/md5/cb86be2084d96777b3cb2b097a1cbd47
+p7zip.v17.7.0+0.riscv64-linux-gnu.tar.gz/sha512/537ac28020f88bc6fbf9cdd2b2cffe9879c9f0d74aa02567181b47a2a63c622bd5d2c9de67b7dc74cfc8e372d672b8269f965c6ee098a03a0b89ae0ed53bbf9c
+p7zip.v17.7.0+0.x86_64-apple-darwin.tar.gz/md5/c9f06846dec4e868b669ada25409d031
+p7zip.v17.7.0+0.x86_64-apple-darwin.tar.gz/sha512/50ae225555b31a9ae31ce70c8489fd39718f274a8a0d4432f59b8e689f1b049614a148c389036458a53953485683cd9846b3a9413cd83d8a32b312f7d61b0650
+p7zip.v17.7.0+0.x86_64-linux-gnu.tar.gz/md5/c3a9a8e0c1870b7a841c3b3a37220a07
+p7zip.v17.7.0+0.x86_64-linux-gnu.tar.gz/sha512/75d8150566ac29ba06416767b742cf9c222da9e2c8c48dc8da7f5c0241c41b8e767db0aff9e14cc9240e8662c591505c872e850ad472b234951ab32799790d0c
+p7zip.v17.7.0+0.x86_64-linux-musl.tar.gz/md5/34925d3032cdcd953c36ded1625f5e33
+p7zip.v17.7.0+0.x86_64-linux-musl.tar.gz/sha512/03d4e24015498e6e6b025e9ba2d25e43dcb8af3d78c49c9fb34fe2e0d75010c0cb09476dc317ef54f03e4e16449b25dab81121a4b3c54a565700d1a16ad958dc
+p7zip.v17.7.0+0.x86_64-unknown-freebsd.tar.gz/md5/a1fea324148e8082db4657c195b91984
+p7zip.v17.7.0+0.x86_64-unknown-freebsd.tar.gz/sha512/ecf5b278077edc4e0e73e9d091c9f9d4b2edfbe742f2de8f05411a32ee0fd58d8adea2915f9295345ea1082a01f41719d1b3ed1744225ea4e129abd3785f4df3
+p7zip.v17.7.0+0.x86_64-w64-mingw32.tar.gz/md5/f0279fe7836fc0949410c8889c6ae09f
+p7zip.v17.7.0+0.x86_64-w64-mingw32.tar.gz/sha512/79dfa0324f050202f091197102fa4749373ee737a86dcb37980d73a4795677431144f8308890eeae1ecc30d8f1694b19a3a08f963c1a91536a3f2f3d438b5dd7
diff --git a/deps/checksums/patchelf b/deps/checksums/patchelf
index a7122c400749a..6392e44d8f2e8 100644
--- a/deps/checksums/patchelf
+++ b/deps/checksums/patchelf
@@ -1,2 +1,2 @@
-patchelf-0.13.tar.bz2/md5/d387eee9325414be0b1a80c8fbd2745f
-patchelf-0.13.tar.bz2/sha512/43c3f99fe922e2f34d860389165bcc2b0f3f3317e124eb8443017f71b1f223d96a7c815dc81f51b14958b7dc316f75c4ab367ccc287cd99c82abe890b09a478d
+patchelf-0.17.2.tar.bz2/md5/d76db4f1a27b0934d0b0d0585b081c0f
+patchelf-0.17.2.tar.bz2/sha512/8277adf95513f88fb190536a38bdfdf438a4cc7685d8a130bdffbe064441f0f25095b6c83bbb190133e1a138963776d15b46c247dd2f1a073a1bfe1d1dbdd503
diff --git a/deps/checksums/pcre b/deps/checksums/pcre
index cab79abe745bf..9ead844d895f8 100644
--- a/deps/checksums/pcre
+++ b/deps/checksums/pcre
@@ -1,34 +1,38 @@
-PCRE2.v10.42.0+0.aarch64-apple-darwin.tar.gz/md5/667a570d341396c3213749ee1e5b5fda
-PCRE2.v10.42.0+0.aarch64-apple-darwin.tar.gz/sha512/c1bb99e8928efded9b0ea3f294ceb41daea7254204ca30c0ff88686110ccd58138d8ea8b20b9a9d6d16a6d8d3f34e27e74e7b57d3c8fe6b051c9d8fa6f86431a
-PCRE2.v10.42.0+0.aarch64-linux-gnu.tar.gz/md5/1a758f275ff3306fbad7698df7b9b7be
-PCRE2.v10.42.0+0.aarch64-linux-gnu.tar.gz/sha512/d09508c0b255366d01f1b4d1ae6748a8e47f18c451498d30715f5f968784990949dab7540cd086396abd912f61b5f7c44c8c72a27efaba0a7fc08b71a167c057
-PCRE2.v10.42.0+0.aarch64-linux-musl.tar.gz/md5/e61147579fdc9b57a61b814bdf9c84bb
-PCRE2.v10.42.0+0.aarch64-linux-musl.tar.gz/sha512/eecaf4c1937fc04210b910ac65318524c02d690e8c4894c38e74eaba36d26c87a1fd9e1cc36f4307a11ff3552a79f081fa8f05085435eb34872dc2fdecce2d18
-PCRE2.v10.42.0+0.armv6l-linux-gnueabihf.tar.gz/md5/b4c484a3b87923c0e2e4d9cc5f140eb7
-PCRE2.v10.42.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/5931cf13d340971356a9b87f62c9efdb3656ba649e7b25f1722127a3fd70973d94c815a37b43cecab8eb0ed8d1ae02ef1a0c0a12051852c1b9242c3eaa01c496
-PCRE2.v10.42.0+0.armv6l-linux-musleabihf.tar.gz/md5/bc7b5bb1c5b0b99c121bad5a89299ca7
-PCRE2.v10.42.0+0.armv6l-linux-musleabihf.tar.gz/sha512/86b5ad4fa6f4b5bd1a76ad68ddff4b39916d0ed0acc03a3fee8eab5256aaed53abc0ff4ce9d9d9f8b9203c087211684da92fe6aa06ff5bc331ba1b3da2cba57e
-PCRE2.v10.42.0+0.armv7l-linux-gnueabihf.tar.gz/md5/3541eb26fa5a4d13e2c7d063dbd900d8
-PCRE2.v10.42.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/872181f931662edaf653351486c5e2a700e94cfa0966ca90eca893fdc75dd46eb40d9d45737c198aa4b9ad8ebab33fd78697ef35906985e4e1c9748ddf58d363
-PCRE2.v10.42.0+0.armv7l-linux-musleabihf.tar.gz/md5/fe059feb18fcc9312f1033362070fe34
-PCRE2.v10.42.0+0.armv7l-linux-musleabihf.tar.gz/sha512/5a96acf3908c964ccb4f296c449499388ed447d9a094c2760c979e02ef656fa710ede3926b9626e89fb5b0545c111e6eedff21e48416e923c17fc9ff129d0519
-PCRE2.v10.42.0+0.i686-linux-gnu.tar.gz/md5/67f49cb139017109c422c51c0120823a
-PCRE2.v10.42.0+0.i686-linux-gnu.tar.gz/sha512/8873d9995bdf5701fc5a24163f93eada12af76d09781a679a4ed61b66f117cf322505d291931d1c58b3b3eb560f6487a1100b0735c14abe6cb38677750b481c7
-PCRE2.v10.42.0+0.i686-linux-musl.tar.gz/md5/092af10d8182cb4240cdd975efce4d7c
-PCRE2.v10.42.0+0.i686-linux-musl.tar.gz/sha512/79a48f4fd50ffdf49c8d57581e01ace38c1b3d7edd86d44db44b8efd93074d16faf035131a0d60c6631b8bf22f0fd8296acedba45908da56e8096c296122f047
-PCRE2.v10.42.0+0.i686-w64-mingw32.tar.gz/md5/2bb13db8b5d6d1a5632de3db874c2614
-PCRE2.v10.42.0+0.i686-w64-mingw32.tar.gz/sha512/7d1324696087c32d1bbbb64f5e4b8c8a220ef216d025886b3c3e6d685c3f701428c6696d7ae0bcc771d3295381ba2bdd5db040f788f8a9a58f80ad4d790dd141
-PCRE2.v10.42.0+0.powerpc64le-linux-gnu.tar.gz/md5/0de1215b2a1e9c0efd131355e9fbf2c1
-PCRE2.v10.42.0+0.powerpc64le-linux-gnu.tar.gz/sha512/69dae12627685ae665db8c91264a79aba7c60ae97eccdc79ef889f2a5f69b465fa333aba298fc90bbb95710cfc324e3630bc427a97577855e8fb6c8fe227cfec
-PCRE2.v10.42.0+0.x86_64-apple-darwin.tar.gz/md5/c5c52b399921c5ab81a5f598b350d2ca
-PCRE2.v10.42.0+0.x86_64-apple-darwin.tar.gz/sha512/e6c8ba3aa3fbf54b37079301ab317104c6852812b23835f52ca40f31f0831678172d32e077fbaa712a8a2cb16d62bb97d475827004353e7807922a2d6e049b28
-PCRE2.v10.42.0+0.x86_64-linux-gnu.tar.gz/md5/b074dd1f85e24e723349e566350e2c78
-PCRE2.v10.42.0+0.x86_64-linux-gnu.tar.gz/sha512/236017e02c9f32b913b772dbf22897c8460e5791f196c86f8a073e329ad8925f6859afe48f3bf18ca057c265f08fedbde255360d8f859e2303c6569ab1b0e1bb
-PCRE2.v10.42.0+0.x86_64-linux-musl.tar.gz/md5/9f32ca77e79843fc9c4b5fc8ed336d11
-PCRE2.v10.42.0+0.x86_64-linux-musl.tar.gz/sha512/334a31724e9d69c6517568d922717ce76d85cf87dbc863b7262b25ab43c79734b457833cd42674eb6a004864e5c74da3ae1d0a45794b4cd459eea24d9669fac5
-PCRE2.v10.42.0+0.x86_64-unknown-freebsd.tar.gz/md5/037bf13e9a53eb90846b6643610a17df
-PCRE2.v10.42.0+0.x86_64-unknown-freebsd.tar.gz/sha512/64bc9acda3d158621f442aa2e766730cc425df3795965f461b530d8152934ffaf93d75b86ebc483345b78b203b0502857683c183ec65a01da1834b55405c7f77
-PCRE2.v10.42.0+0.x86_64-w64-mingw32.tar.gz/md5/6b04c3778bf02947cb1b7e70a41f3292
-PCRE2.v10.42.0+0.x86_64-w64-mingw32.tar.gz/sha512/9b808832cc48703ed525eca06d1dd0162dae3f94a9ad72d044876edcb86a90e8443c8b169e60ccf3507d5960156c447d8f3f30e586ac2a22b6d43dbe807009d0
-pcre2-10.42.tar.bz2/md5/a8e9ab2935d428a4807461f183034abe
-pcre2-10.42.tar.bz2/sha512/72fbde87fecec3aa4b47225dd919ea1d55e97f2cbcf02aba26e5a0d3b1ffb58c25a80a9ef069eb99f9cf4e41ba9604ad06a7ec159870e1e875d86820e12256d3
+PCRE2.v10.47.0+0.aarch64-apple-darwin.tar.gz/md5/2ce4ed46260926476a067740c6d07866
+PCRE2.v10.47.0+0.aarch64-apple-darwin.tar.gz/sha512/03330b63ec5ee30f89fd54ccdb06616dc744c3b249a8984100617cc9b6db83170bd07a1e89abe11822714d345989f5db845a14b1705ecf5a31982814f32d1e3e
+PCRE2.v10.47.0+0.aarch64-linux-gnu.tar.gz/md5/f9cfd4fde71790ccd3c8d66196a827c2
+PCRE2.v10.47.0+0.aarch64-linux-gnu.tar.gz/sha512/da9aba37af68d1c1d41a374ee18b9d208945816f15e3b676847e3d3c0be426c8649bca969a0b2eb38c3320e26b85b19b19cd0de03b390cc7bcbb774784f1824e
+PCRE2.v10.47.0+0.aarch64-linux-musl.tar.gz/md5/5d38471aa8cac767b3940e8fc1ca7251
+PCRE2.v10.47.0+0.aarch64-linux-musl.tar.gz/sha512/2ca63e271f8398e579a239220b9b82d0616172692bd5572b8162006674f409e03c24bec804d50b07980986e43ee3e8732f472584336862cc71f7653e0193768b
+PCRE2.v10.47.0+0.aarch64-unknown-freebsd.tar.gz/md5/da1e2f586744c331ee1f99a29426bdc8
+PCRE2.v10.47.0+0.aarch64-unknown-freebsd.tar.gz/sha512/898d47b2ef149dab8cc57334aad7b1ca4029c4692fde73e49e1a56892c2d7c8057b0a50e686e327e32f339cd9971ad79cc09b4668ac5b1cafd511a7aa1964134
+PCRE2.v10.47.0+0.armv6l-linux-gnueabihf.tar.gz/md5/59cc20453fbb52c7113b41bc76bbccb7
+PCRE2.v10.47.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/338781612672e5f3280047e441ba5b966b837135c7e756f7dbffdb39f7381bd85646374b05718495f8549f90648ac59c63404102087c517a008441cd610bbdf1
+PCRE2.v10.47.0+0.armv6l-linux-musleabihf.tar.gz/md5/2ba9bd162e5a8c2b329875fe20e584ec
+PCRE2.v10.47.0+0.armv6l-linux-musleabihf.tar.gz/sha512/b8a31e2a5a61a6c2b8cdce3e0ecf8df9e14e3fc207a1058bf2b420e268e2316270fcfa4586cf5f3fcf7408b5757483160698e1233c6b74af771e009dd760d3bd
+PCRE2.v10.47.0+0.armv7l-linux-gnueabihf.tar.gz/md5/ec0274e6a0120f294468626fc1bc5d5f
+PCRE2.v10.47.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/ae77ef836a7db9ff72db76941a61143263a45711d28f3f118ed470c574b800f99013d3bfb9f824dba21de07d524f0f3ff5757164f7ffe0f19ea191560dfc3ae3
+PCRE2.v10.47.0+0.armv7l-linux-musleabihf.tar.gz/md5/65198d605a58d5433bdbd943af2e7839
+PCRE2.v10.47.0+0.armv7l-linux-musleabihf.tar.gz/sha512/5a3363413cc759f00c6abc1cceb2347b02a80ed26325c75118b2d19a164d9dc27031a02059d2057f1ffaeb3a05657965a50bc44613a7909934951cd8ca524677
+PCRE2.v10.47.0+0.i686-linux-gnu.tar.gz/md5/958976044aeef66eb8558b914cb979a3
+PCRE2.v10.47.0+0.i686-linux-gnu.tar.gz/sha512/6136e8787f76470dbd246b161a74c023759d2b22749b025afe8476dc63cfeb5cad00c1d503f1e7eb0b5a427d71a3487f363d7b8dddf3ee32c8c656daa6fbe51e
+PCRE2.v10.47.0+0.i686-linux-musl.tar.gz/md5/cab8d94586624ded998956d872df5e81
+PCRE2.v10.47.0+0.i686-linux-musl.tar.gz/sha512/8d2963ea961867d027d603ff77fbce6a4ada73078ff14dd1bc19b50b214dcac622381f86a4516d36ff8556f2e02931465a43c5710c511300d96fe0f25b6166a1
+PCRE2.v10.47.0+0.i686-w64-mingw32.tar.gz/md5/e5db741987d4b916b780667f441ae43a
+PCRE2.v10.47.0+0.i686-w64-mingw32.tar.gz/sha512/f68dc8f8648c64a185fca20878d077b81ebef4054df326082bc96d713219b333061063f47596a2abc27ab79159fc23feb6ca59a7931b79dc4568c3b61d89b7be
+PCRE2.v10.47.0+0.powerpc64le-linux-gnu.tar.gz/md5/239bc4022a84a1549d7ee268876531a4
+PCRE2.v10.47.0+0.powerpc64le-linux-gnu.tar.gz/sha512/3247ea00cc9e6fba8c96bc311c1b924c3886fc540ab7817861d1b7c8f4471307772c6ada56c97619e392244c60673ed5d34cf27d52aa79990e598d8d448b6039
+PCRE2.v10.47.0+0.riscv64-linux-gnu.tar.gz/md5/e7a189f3603cbc9c2c09fe96d9fc5d3d
+PCRE2.v10.47.0+0.riscv64-linux-gnu.tar.gz/sha512/1b42187a9e1ffd1f314c907675828c2511a4c17b9456f87a992c80f15bd543710c2b22cc86de0ef8ac1b5551b2b506a15d13cd86ab02e36d5bb0f993a65f2d7b
+PCRE2.v10.47.0+0.x86_64-apple-darwin.tar.gz/md5/e3f9e1bb09b3b8bfb5240ac1b45782dc
+PCRE2.v10.47.0+0.x86_64-apple-darwin.tar.gz/sha512/d5d677392b7c662482112b96cebea2e41f964b661ca2529ba5f4a236e0fa89d2e5f6f8d26fb84f623735b520a12ea506c90737ac4dca4258dfe5c85b789939be
+PCRE2.v10.47.0+0.x86_64-linux-gnu.tar.gz/md5/d059c54613cef5f7dee278d285a5fb50
+PCRE2.v10.47.0+0.x86_64-linux-gnu.tar.gz/sha512/66362e80e6e7185fb5e1ede8059296a50219df2a4f471b7c6707c79c1f272ec6bb1cbb7a04aaf37afa65b43a8c193f36c094e77a426a2b4ab6a5c099d803d77f
+PCRE2.v10.47.0+0.x86_64-linux-musl.tar.gz/md5/94fcd51517b5d9482544fecf5c1d5798
+PCRE2.v10.47.0+0.x86_64-linux-musl.tar.gz/sha512/98bd495e7ae78c83276bdb2e6f7ce219d6b0a99e1f65255aa715913bc8b5094e63e9ca0a8158f04c120206641727b2b91ce0c74614cee82815b89602c2dd36e1
+PCRE2.v10.47.0+0.x86_64-unknown-freebsd.tar.gz/md5/47e737b50dde40990a3d5c6c6d186bf5
+PCRE2.v10.47.0+0.x86_64-unknown-freebsd.tar.gz/sha512/6f7dab95da96486266182de692ce8e1cdf44f0e5d8331471bf24749feb23251b21f89d87778aa9e8507a3d2c2f416e6d7a320cc69fe709bc7c0a1dd06e94d379
+PCRE2.v10.47.0+0.x86_64-w64-mingw32.tar.gz/md5/58a0327be4edb64cd56ad57dd3b7d608
+PCRE2.v10.47.0+0.x86_64-w64-mingw32.tar.gz/sha512/c39d08797560a7f4d92c5af2123e905fb6e6cd414a329ee38c86850b3e080843ffe82bce5c7754780473f293408200a10f9d50364b0be761b0114fd25b4b7ac5
+pcre2-10.47.tar.bz2/md5/aded5840ab5a7d772dd4e16fc294b665
+pcre2-10.47.tar.bz2/sha512/889a6fdc80f8a7285e4a75d189c183b4588df81bdf048302cf0830e11bbf9b9eeb387ba43dfd3aff8ffb3aa693291e8c535845c06e8ce92d1028cefa6b474804
diff --git a/deps/checksums/suitesparse b/deps/checksums/suitesparse
index 65db184c5cbca..30ac49aa2994b 100644
--- a/deps/checksums/suitesparse
+++ b/deps/checksums/suitesparse
@@ -1,36 +1,40 @@
-SuiteSparse-5.10.1.tar.gz/md5/68bb912f3cf3d2b01f30ebafef690302
-SuiteSparse-5.10.1.tar.gz/sha512/8f85c6d63b76cba95707dfa732c51200df7794cb4c2599dbd92100475747b8d02b05089a47096e85c60b89bc852a8e768e0670f24902a82d29494a80ccf2bb5f
+SuiteSparse-7.10.1.tar.gz/md5/3b8c4f18091589da221a1acc5a331edb
+SuiteSparse-7.10.1.tar.gz/sha512/893f6a51df9a30793db67baa068a7c10237bc96bab1ed534293b09fb1c87ed4b2ec5fa3f6c285977582f9cba9b7a5947aa31e65d52ed2af67c93ac2ce64b55b3
 SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5/46541001073d1c3c85e18d910f8308f3
 SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512/f7470a447b934ca9315e216a07b97e363f11bc93186f9aa057b20b2d05092c58ae4f1b733de362de4a0730861c00be4ca5588d0b3ba65f018c1798b9122b9672
-SuiteSparse.v5.10.1+6.aarch64-apple-darwin.tar.gz/md5/14cc0d3c7b5271246eb45c495c7a4e79
-SuiteSparse.v5.10.1+6.aarch64-apple-darwin.tar.gz/sha512/a56da81a5165bcdf49d1913799bffcaea84efd6f8740dd002f700eb4070313cac64be5359ba88d1f39fe976944e34ee6ed6575ceade2ae2d97b850e6a1aee0ae
-SuiteSparse.v5.10.1+6.aarch64-linux-gnu.tar.gz/md5/b93b047040e2db5e0277e52b9bd3feb7
-SuiteSparse.v5.10.1+6.aarch64-linux-gnu.tar.gz/sha512/e03a9ecafce9dcc6791dd202efac2f864bdf3a0a4524567801c092304c17ab15dae949abfb1fe2bc71b367a0e398260ccfdd91dad611860090df471b44e75ee3
-SuiteSparse.v5.10.1+6.aarch64-linux-musl.tar.gz/md5/22c44d9d82608724e1aa62d126fdf030
-SuiteSparse.v5.10.1+6.aarch64-linux-musl.tar.gz/sha512/39a3c11429cd3e6afa2f615dc4b0c8d16d7b94a423d76e598b3b48db2c47fe64d644233e2a672bd6654f8bd57da91dd7a787a3e4978f0f803237ab4ec6f97905
-SuiteSparse.v5.10.1+6.armv6l-linux-gnueabihf.tar.gz/md5/505ee3c0750a720ed1e4de670f81e220
-SuiteSparse.v5.10.1+6.armv6l-linux-gnueabihf.tar.gz/sha512/20fafbdd2df96427b95b730901663c255dafc415f3a8154e3364ec46ca2b205fa45a081f92272b81d7aed22b9f8373d2d4eee70ff8ab5ed8d1d80b6a340c8aad
-SuiteSparse.v5.10.1+6.armv6l-linux-musleabihf.tar.gz/md5/8e1821668cbca9c2d3c5cee5ad1746c8
-SuiteSparse.v5.10.1+6.armv6l-linux-musleabihf.tar.gz/sha512/58fb4ec10a537d101e0be8417648a4d0127444b3fe8a32498320aaaefc747f5cac3c7503b70775c1d708b077034060fe5ed8609e73bf9be22f9a8729abc4c73d
-SuiteSparse.v5.10.1+6.armv7l-linux-gnueabihf.tar.gz/md5/43d133a916e548ecae50671b92f64c6f
-SuiteSparse.v5.10.1+6.armv7l-linux-gnueabihf.tar.gz/sha512/f7f767c0e7eb45afe10941513695bfcc9e0628195cb9245a9c24700967f9cfa7cd0030cdcfaf47a76400d5dd3eb908c1f9ea5e44efd3054ed7bba47e664279a2
-SuiteSparse.v5.10.1+6.armv7l-linux-musleabihf.tar.gz/md5/7c3b2e19d3296002b1aa72b951421eec
-SuiteSparse.v5.10.1+6.armv7l-linux-musleabihf.tar.gz/sha512/7546ce844b03d0414168ab6d0925f848b14b35ed27cb545b41f2512bad44b7da4f39004e75657c7c572557ccb015177d3e0d346e2c3182b27a6ee602876ee0df
-SuiteSparse.v5.10.1+6.i686-linux-gnu.tar.gz/md5/e00a73f0fad92a266dd8d3774707f9b1
-SuiteSparse.v5.10.1+6.i686-linux-gnu.tar.gz/sha512/9cc2332a78d0490170d722d2f062d6f660fb3bd9042dd177c3683675d0f44306b93bf882cb79c0707ab79318280d08582431eb1c92334f2bb50946e942be0b16
-SuiteSparse.v5.10.1+6.i686-linux-musl.tar.gz/md5/71fb647a76ecc9e547df903535011b5b
-SuiteSparse.v5.10.1+6.i686-linux-musl.tar.gz/sha512/7806cd9179e46fa61b63a3f711b37289da72a48430912e564c88e3dcb4caaad8a9bd232d6f572f8270806d286e4a4eb9edfdcda29fe8d91dadb1b03d57eda76d
-SuiteSparse.v5.10.1+6.i686-w64-mingw32.tar.gz/md5/d4e6c9aba53b2107469cda6de9ca2724
-SuiteSparse.v5.10.1+6.i686-w64-mingw32.tar.gz/sha512/c0c49641c6e7f3f0333e3fa44ce62dcd4ad5942c74b2429aaeb49fd0d7b8c13c872150ae4d54cc5cfaae07a65a24a7d4ea731adc78db3d9341a54e5edb5c80f0
-SuiteSparse.v5.10.1+6.powerpc64le-linux-gnu.tar.gz/md5/5432dca00f7e0f42b7dbd16083537318
-SuiteSparse.v5.10.1+6.powerpc64le-linux-gnu.tar.gz/sha512/61946a7faa2a49613ea2c08a01f064b619c9ec134f0d9509eb42a96bebf2a63f5fb57b14702f25618def410658da8334bb6aa5200280956e573aa944476efef2
-SuiteSparse.v5.10.1+6.x86_64-apple-darwin.tar.gz/md5/ca175d433a02f91407e2921872c2b67c
-SuiteSparse.v5.10.1+6.x86_64-apple-darwin.tar.gz/sha512/14d9b01e2db8c04f9a1076bcbac022c6573728f708f31344825805fed53971e922aecebeb4b2f567a6b5f44ad27c0d66e142887ff4684c8679ab65b902538abf
-SuiteSparse.v5.10.1+6.x86_64-linux-gnu.tar.gz/md5/6c271ced91dbb1bf748efbaace1dac10
-SuiteSparse.v5.10.1+6.x86_64-linux-gnu.tar.gz/sha512/5984db9c101ef80d63024bc3b51821268349450deedd5aaea5fade0fc5932992379a0133c4f91711af134014835afea1bde518ae1e7efd482d556a97e54b0238
-SuiteSparse.v5.10.1+6.x86_64-linux-musl.tar.gz/md5/c7d55069969dbb98997687c847ab643d
-SuiteSparse.v5.10.1+6.x86_64-linux-musl.tar.gz/sha512/b54012765f7c7329125b41c3fb678e23888a858d3fd5a139c52bd980e383a308282238020754e795de6457fb312b61c39e6ab2d665ca5af95c65f52f0c354067
-SuiteSparse.v5.10.1+6.x86_64-unknown-freebsd.tar.gz/md5/e641be38c8205e362a7299c736aedad5
-SuiteSparse.v5.10.1+6.x86_64-unknown-freebsd.tar.gz/sha512/d55e85335bccb59210014c35233ad9e42f5d086f01a43fe0ee13f21cbb8555ea05f1d91c95a6d3f883477086851e123c4b0cde7cd2dcd8e08835fe9f685d5b25
-SuiteSparse.v5.10.1+6.x86_64-w64-mingw32.tar.gz/md5/45cad947fa962e1f192cb7b52a1f7b3c
-SuiteSparse.v5.10.1+6.x86_64-w64-mingw32.tar.gz/sha512/e6545c681ba7d2346baf8fafabdf25f2faf6ea54763d999b14499f30d235e90f34fd4f83430ea7f17c01adea0699dff6c4d7ae3cb938c749d6a15f8bf4f1519f
+SuiteSparse.v7.10.1+0.aarch64-apple-darwin.tar.gz/md5/cd62af0918c511728741613e81df1c00
+SuiteSparse.v7.10.1+0.aarch64-apple-darwin.tar.gz/sha512/a8f5a357c42de61fa71c8f275412b0c5743a275ffb46f95d73ba4135be0e221176a1b7f7d98d39b4ff5a8a2890601e888b0a2e87e18da958ad5e78290f2a816b
+SuiteSparse.v7.10.1+0.aarch64-linux-gnu.tar.gz/md5/2019565921ffb5ebb77dc997a41985f9
+SuiteSparse.v7.10.1+0.aarch64-linux-gnu.tar.gz/sha512/2e5243dc943ec4e00d5210d4e201a8163bbcfa8ced216b1737c515b0343e87cacf7eca69dc45b0a3893c6daa157f70c6179388efd90a2412887132c7f1725526
+SuiteSparse.v7.10.1+0.aarch64-linux-musl.tar.gz/md5/f9e904e29fdc5f65af179d2a1b315ceb
+SuiteSparse.v7.10.1+0.aarch64-linux-musl.tar.gz/sha512/c3335b9902ececf78e6eec3930cbe52cedc0428f2dc20295ebc940b32177d40ca255cc252469a4b18e3c21fe2a1f4ec3d2a75a4d1ffc9dc60febcffd6625afa7
+SuiteSparse.v7.10.1+0.aarch64-unknown-freebsd.tar.gz/md5/91cda0a046879b44da31352e42b6a678
+SuiteSparse.v7.10.1+0.aarch64-unknown-freebsd.tar.gz/sha512/9e292c7484b348950817f8b298ce76de6c6f3ba2fedb91518f9d050eea076849db64b1ee3b0f20ffa37cb35ddf3fbb25b8e72f8647d24d87f9757b2771a14979
+SuiteSparse.v7.10.1+0.armv6l-linux-gnueabihf.tar.gz/md5/00a30f24ebf6c78f989ce123b8160757
+SuiteSparse.v7.10.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/eb23943cef8c540d15f47d49442d612a43b4f1247b0190294bac64ef0a95ffabb355eedfd15fb0e6bf8d01eb490a3f3f0ef80ab04ad2c1e3460a1a7729e6a059
+SuiteSparse.v7.10.1+0.armv6l-linux-musleabihf.tar.gz/md5/975429f8462de4f9bd7929e070fbd3e2
+SuiteSparse.v7.10.1+0.armv6l-linux-musleabihf.tar.gz/sha512/a616124a4085be7100ef1a3ec030e2cd4c0eae961a4aa5986c5abd63c9d85397f7cc84ee83e6f8a28754834139f5794a6393c2d20d9600c5cdc107e9c24049d3
+SuiteSparse.v7.10.1+0.armv7l-linux-gnueabihf.tar.gz/md5/8261f8c08e5243d30980ea9c989bd793
+SuiteSparse.v7.10.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/3d9fa542789d2ab5e8eb7b541b184d572d14d6980741cd0156f3fb74de710e7f21ebe524dfcb8c777b93a005ea66d9ce0768883d63fcb3a90583db7feffe596b
+SuiteSparse.v7.10.1+0.armv7l-linux-musleabihf.tar.gz/md5/2c0ab3267dceb40d38473f24da54da69
+SuiteSparse.v7.10.1+0.armv7l-linux-musleabihf.tar.gz/sha512/c8092ac3b1303bbdb8f551609bc2cb6e0871bc201e90e8eda7c1dccc3de916ea932237cfbb1f738d12f4515d72d1cc03466a3d391e1dda5c09b1a1923ced42c4
+SuiteSparse.v7.10.1+0.i686-linux-gnu.tar.gz/md5/5c9f9e8ef3ff88eb35cb4e9c20810fdc
+SuiteSparse.v7.10.1+0.i686-linux-gnu.tar.gz/sha512/4ea534ca7764253ea92c20a002927687d5d49279065a76853f36557ef030a839d9f6a99f3aa35e1144dd405a9acf9f098c45812f6e61e9a73caab4a40d9b6062
+SuiteSparse.v7.10.1+0.i686-linux-musl.tar.gz/md5/f70262bc5f0dfade2b71dc1ab1394b17
+SuiteSparse.v7.10.1+0.i686-linux-musl.tar.gz/sha512/8ac49fec2c6103b5aa87990d2245bacf1a6805ff89fedb390b332803419bdb59a808b1252db55f9ea802589ed5b87d33c3d42af2d613b04fb0c4b88c2bed4e56
+SuiteSparse.v7.10.1+0.i686-w64-mingw32.tar.gz/md5/3d34fee6e16d8df340a5af96794cb415
+SuiteSparse.v7.10.1+0.i686-w64-mingw32.tar.gz/sha512/b9f092973a0b5705f084a81e4814ce943dcb3e212bd9a5d23a7f6987ab67303ae3533b83e4bb07259c631c9a05c72da657e9a473549d50478c6bc91d3303cf68
+SuiteSparse.v7.10.1+0.powerpc64le-linux-gnu.tar.gz/md5/351afb17c410d18a0147ac6127e78692
+SuiteSparse.v7.10.1+0.powerpc64le-linux-gnu.tar.gz/sha512/d9eef86c1528c5c1be9a88e20cce1a79a88cbdc9b5848398ed1ad44986d43f836299ecaf94c36d390a5fe6dd9fb891c74e963560f673f08f2b9115f12f13a2c6
+SuiteSparse.v7.10.1+0.riscv64-linux-gnu.tar.gz/md5/1d6716f75d958423f94f9e9bd9e73006
+SuiteSparse.v7.10.1+0.riscv64-linux-gnu.tar.gz/sha512/12603984ab46cc0d1eab43e6ef9156448d0ec46306a98a0d970eb26db8d1ddb240e40c108d7aa0198f3c14242804efa50500d2aa8034c71578a43117e97f9dfa
+SuiteSparse.v7.10.1+0.x86_64-apple-darwin.tar.gz/md5/97384c46a7273e90f1a42a98a28ce787
+SuiteSparse.v7.10.1+0.x86_64-apple-darwin.tar.gz/sha512/e1eab80f99062f343461e536c83dbf39fb305ac4a109e3fc92fe8999afd90b0f469e94d8400c451c20aeafc1f0b4f3efbbaf8c6cbc97096c94cd2dd6cb00141b
+SuiteSparse.v7.10.1+0.x86_64-linux-gnu.tar.gz/md5/19b877aa692b5aaf832f8a72b90407c3
+SuiteSparse.v7.10.1+0.x86_64-linux-gnu.tar.gz/sha512/62f4ebbcdf3a432588deb84d58201e939761b6e1dcde229e499a4cf0a5198130a83552709909c9b6a7483c0d9b165db1a80a6c85ec998d2569849d05236abfbe
+SuiteSparse.v7.10.1+0.x86_64-linux-musl.tar.gz/md5/c0cc0294e4fcdfa2bafc3945ff745bd2
+SuiteSparse.v7.10.1+0.x86_64-linux-musl.tar.gz/sha512/b66c6b67b4e2dcb02e4b9d3319f70ca13f16c381ce51559032316c69840dd9f545819579dc0fef987f7e11ca075bbfa8f7fa081eb65c7c1c3acd3bd742466e81
+SuiteSparse.v7.10.1+0.x86_64-unknown-freebsd.tar.gz/md5/d6480c9e4ee22fc52d0c78151abd1720
+SuiteSparse.v7.10.1+0.x86_64-unknown-freebsd.tar.gz/sha512/4828e8a68eccdbd020635c7a491afcbf96d7d83e231eb98d1bd325fae4daa396daf47adf4ba7e620ae30567a2d148892b47744668d4195e4b6c90ca2fbc3fc6a
+SuiteSparse.v7.10.1+0.x86_64-w64-mingw32.tar.gz/md5/ce32fda49383eca94f4a3ad56b4424a4
+SuiteSparse.v7.10.1+0.x86_64-w64-mingw32.tar.gz/sha512/7aa8f123e5486c336043b554c7e5ea2c8143a8f9ed47ae2146f3cb908d8834c3bca3c0cff73e14460c29015a031aeccd324ea7ff66a83fdce48d0283cb345b5d
diff --git a/deps/checksums/terminfo b/deps/checksums/terminfo
new file mode 100644
index 0000000000000..bd971e72b1be8
--- /dev/null
+++ b/deps/checksums/terminfo
@@ -0,0 +1,2 @@
+TermInfoDB-v2023.12.9.any.tar.gz/md5/573d9b5adaf6af500e3dfae6e3d15ebf
+TermInfoDB-v2023.12.9.any.tar.gz/sha512/e0a5bfe54346f9d5690a840628b329f6fac7375b0d29337bc70813ae3553a72bb397f8034d221c544289e40c4cfc685d5805777b7528f05bbe0123b5905c24a4
diff --git a/deps/checksums/unwind b/deps/checksums/unwind
index 7a3141d79368c..a288f21ada393 100644
--- a/deps/checksums/unwind
+++ b/deps/checksums/unwind
@@ -1,26 +1,30 @@
-LibUnwind.v1.5.0+4.aarch64-linux-gnu.tar.gz/md5/b40fee1e2995d3fa2c823c45b231d9f0
-LibUnwind.v1.5.0+4.aarch64-linux-gnu.tar.gz/sha512/d5865dabb541c3e1a5b6bc20547adc0788dde0f74731006e44e2cd128742c1ce61638a31340f8f4bfcd8b052706c3d57c24a202d048cb8d0496a909ff51fe9f7
-LibUnwind.v1.5.0+4.aarch64-linux-musl.tar.gz/md5/580b46908f43309c3f88c9ec4177d296
-LibUnwind.v1.5.0+4.aarch64-linux-musl.tar.gz/sha512/c12caa005586bea53932054d2742d6b55c40fd1a284daeb73924f3b761115929e022f3cf377b590d818e2c69726d42f12d4c87be2daf6d43caeaef54e226afdb
-LibUnwind.v1.5.0+4.armv6l-linux-gnueabihf.tar.gz/md5/5af8f16e7eb32718cde68ee840c373c2
-LibUnwind.v1.5.0+4.armv6l-linux-gnueabihf.tar.gz/sha512/71e6f64477bc356c42bf1604e61a2596dfdb90f5fc3005e6656f2aa5ba0576867e6b482501d3d3c68da623cf4d6c572e4fb9708a71988671b1bbe76d6c2e4754
-LibUnwind.v1.5.0+4.armv6l-linux-musleabihf.tar.gz/md5/446f9021d1903410ed9b2e400e2533af
-LibUnwind.v1.5.0+4.armv6l-linux-musleabihf.tar.gz/sha512/bf39ac9faea323c394e627647aaafacccdcd9545ac970b771dc4736376c56f0e1cfe58fead45625b7c491d91ae4f1dd41c3303d04536ef514c3a3657c06fd261
-LibUnwind.v1.5.0+4.armv7l-linux-gnueabihf.tar.gz/md5/ab594ba2df5cdc08dcf74ee2d0af9742
-LibUnwind.v1.5.0+4.armv7l-linux-gnueabihf.tar.gz/sha512/80f3b0c922b27d98fec1ba58f227af3c9d3e9691f34ed088152619289fa09b03a5b891162cd8ba497432867d60c2cd97a3466178c0891d848ded167e64f720ef
-LibUnwind.v1.5.0+4.armv7l-linux-musleabihf.tar.gz/md5/84cdf938ab0880447f242d86ad9e6d1d
-LibUnwind.v1.5.0+4.armv7l-linux-musleabihf.tar.gz/sha512/a985e9fc4e75cb292e7cb80ae0446110221a7f785818f53ac26c03dc2e142c959a6f380ffbceb43039dc95659e0da608b436d5faa5133f7d49308dd6198652f3
-LibUnwind.v1.5.0+4.i686-linux-gnu.tar.gz/md5/29a8d300b5edc3b25fc0c38d415ec4a7
-LibUnwind.v1.5.0+4.i686-linux-gnu.tar.gz/sha512/c96b954ee5736ad69a47e1214aac483ed2697a013749a696de823e2064bd5869590ae17c19268bf06227c9065b10bb36b197fb73987a74706fd37e0eefc17254
-LibUnwind.v1.5.0+4.i686-linux-musl.tar.gz/md5/fe8822d87cbad1abc4173a0c5c3f082f
-LibUnwind.v1.5.0+4.i686-linux-musl.tar.gz/sha512/ff09cdbb4046413c260df0058a2fb3c2daa56e656a038c1ff4c47b251254e08066ae3b8b144a02483e1ca7d92192d8e3c1b005adcf2dad26343219eab4c26d95
-LibUnwind.v1.5.0+4.powerpc64le-linux-gnu.tar.gz/md5/15eea5ef1f4ad04cc8fb8f701571233f
-LibUnwind.v1.5.0+4.powerpc64le-linux-gnu.tar.gz/sha512/875d50cea141397783c4d3062a08a1951fb14c96e9c99489ddeb91f94f403c48e8d358c181b6649198318586463efedd1b5f991acc792d8412a6ad2c810c568e
-LibUnwind.v1.5.0+4.x86_64-linux-gnu.tar.gz/md5/2b7b2264763d10f39c548b3f23ea1a95
-LibUnwind.v1.5.0+4.x86_64-linux-gnu.tar.gz/sha512/7e76ae26ce7f6f60020af0908c7197e28204a8b290022af7dd92b17d64b01d68338d347e3f78a5946fef2faec3cd3f1c274bc55de1472a6245867b8e5219dd0a
-LibUnwind.v1.5.0+4.x86_64-linux-musl.tar.gz/md5/84789e4ee681fbe4697e02431ab1004b
-LibUnwind.v1.5.0+4.x86_64-linux-musl.tar.gz/sha512/e8166e2efbb70a3b492551556c72181c505b8cdb2e5d528caa69b32727c59f3e065e4455fdd9749878bb6d1ab5962ca7dfe2ebc9efa6dbdb0bebd210bd16c6a7
-LibUnwind.v1.5.0+4.x86_64-unknown-freebsd.tar.gz/md5/f35f256dd24183f72a932946c07073b0
-LibUnwind.v1.5.0+4.x86_64-unknown-freebsd.tar.gz/sha512/de80153025ba3e4192c8faf3f7c5f5a0044d4580f8cb56f4c0206f7030cbeeb406cdd064f87b4568392c06e96b9e32fc07c55b68b92e8cc5d596fb79040ecb78
-libunwind-1.5.0.tar.gz/md5/c6923dda0675f6a4ef21426164dc8b6a
-libunwind-1.5.0.tar.gz/sha512/1df20ca7a8cee2f2e61294fa9b677e88fec52e9d5a329f88d05c2671c69fa462f6c18808c97ca9ff664ef57292537a844f00b18d142b1938c9da701ca95a4bab
+LibUnwind.v1.8.3+0.aarch64-linux-gnu.tar.gz/md5/18ad2df05a31c2d2cce958d408993c9c
+LibUnwind.v1.8.3+0.aarch64-linux-gnu.tar.gz/sha512/81cd49551c4417bef7d9ce4aeb05f3e4e97d9bbac9b2d8db6bc5da7d83946b3f9af867833ff9bb474309b844533f28a4ec44845c1c943b5d43f08eb2d743461c
+LibUnwind.v1.8.3+0.aarch64-linux-musl.tar.gz/md5/5ea5ef50c9b820da833af1ec3d4501fd
+LibUnwind.v1.8.3+0.aarch64-linux-musl.tar.gz/sha512/1f3f851e16c96b9bd6ef8191193a5e1c2cf60b8abb0a9a48597b9435c27c6a45b691af57f2abd956b037398b086e9c4c039fbab388da9ef9ca683501737b0a98
+LibUnwind.v1.8.3+0.aarch64-unknown-freebsd.tar.gz/md5/8e87a7450f759c739ea916ca362b7e2b
+LibUnwind.v1.8.3+0.aarch64-unknown-freebsd.tar.gz/sha512/2daf3fc1a6155816ac0bfc7129336ec30e92ced2e2be68330596b9f3e9bffaf79c1ca734027898039261b57da49789ff0323f08f3cb45d5746279200509b9fbb
+LibUnwind.v1.8.3+0.armv6l-linux-gnueabihf.tar.gz/md5/ebb4a0aa515d3b28eb484d6d2f0baf1f
+LibUnwind.v1.8.3+0.armv6l-linux-gnueabihf.tar.gz/sha512/9ad442c0cd127bba9fe313a580e475ad6377ba200c13bea683c5afd443a13b72c76ea5fc89072441ab9f467d932a460a268d6c839c2de4700d294d79ce349458
+LibUnwind.v1.8.3+0.armv6l-linux-musleabihf.tar.gz/md5/38131a7e358fe3b1fd9e60f04ef8c6e5
+LibUnwind.v1.8.3+0.armv6l-linux-musleabihf.tar.gz/sha512/b4afead381719513f2e0a1718bcce4a37118caa10a94fa66410bf36b32274795ed378eb6ee2f1b5c2159ca8d41eac230f4e73c644d6e79acd73d74c02a6334bc
+LibUnwind.v1.8.3+0.armv7l-linux-gnueabihf.tar.gz/md5/cdc30f2982ec96d6dedf87275f15a99a
+LibUnwind.v1.8.3+0.armv7l-linux-gnueabihf.tar.gz/sha512/9d0591cd0b5b2cd4834e05ade42756a41e5b4d14d8b78d1f3a40e60b0771d2f15687639ad3f11be1b83baf32b72f496f838c268420b19f7452a4a7ccf1a312a5
+LibUnwind.v1.8.3+0.armv7l-linux-musleabihf.tar.gz/md5/58eeae893dd353998c0ff16fae07b7e1
+LibUnwind.v1.8.3+0.armv7l-linux-musleabihf.tar.gz/sha512/8cd46facfc2008e602411e94ad2857370c54b32435f110e93f735cc3f47b72319f7221c1b31a7126d13ca6a9c25fa7bf5682ce02ae061e7cd84a4bd3a8469a8f
+LibUnwind.v1.8.3+0.i686-linux-gnu.tar.gz/md5/98f6cee47066768f581083fe0a497cbc
+LibUnwind.v1.8.3+0.i686-linux-gnu.tar.gz/sha512/478a081e56649e94478453f8f2301f02de3f191963cb88e5f21440efed788f35d64931c30355ba7f3c18d612112cf53c6feb942c79b6f1b5e69abf9bcf65fa56
+LibUnwind.v1.8.3+0.i686-linux-musl.tar.gz/md5/de5c5c4876052960c91b00fbc7196988
+LibUnwind.v1.8.3+0.i686-linux-musl.tar.gz/sha512/c8f525b231ca7b41625e98740b9aa03b0424e08ec962fe1b5728762560108bfb9b33d9e93e73254872c979edea6930f1cb466d235b9e2d9845d9dd986de5f120
+LibUnwind.v1.8.3+0.powerpc64le-linux-gnu.tar.gz/md5/4e9da901c9f7c0a11b27aa09caed8a8d
+LibUnwind.v1.8.3+0.powerpc64le-linux-gnu.tar.gz/sha512/7d41eb48bddcd57c2a417a67a857c54eedc952767383e5e9bb958ce75e61dc3893eb472f6b93542c51af0341328d0be1d71b9b85a9c6aa693839e0f90afa0fdb
+LibUnwind.v1.8.3+0.riscv64-linux-gnu.tar.gz/md5/3438341cc02f25e175236cfdd715df48
+LibUnwind.v1.8.3+0.riscv64-linux-gnu.tar.gz/sha512/7196cca42045af7490a4a0b4c42fd9fca544175e5ad5bf67f1f9978f3413ff198a0cc8c0c6888b95f749acb303dfd81d1325188d54a0405ffa765f5d3a64d37f
+LibUnwind.v1.8.3+0.x86_64-linux-gnu.tar.gz/md5/8b5cc28cbacceab1278ecea8e1cb407c
+LibUnwind.v1.8.3+0.x86_64-linux-gnu.tar.gz/sha512/2762d318656f1624837d1a0e34f78065d3b24f0dae5d99b517e9d5e9100dce47e09450bf185025cfaac4415c502e9f0012ab76b08bd40fff6f7b94f42002fe82
+LibUnwind.v1.8.3+0.x86_64-linux-musl.tar.gz/md5/a0c17833154c9bbfd5857230b98993dc
+LibUnwind.v1.8.3+0.x86_64-linux-musl.tar.gz/sha512/916e0f4b305421ec0dc30d6e3eba4c3e7bc0d4f84132462fbeb1743365f88c2afb3930f739936dc8167ba885d7f5dd9a512a25a38107a1a7f8519508903f1ca3
+LibUnwind.v1.8.3+0.x86_64-unknown-freebsd.tar.gz/md5/1d281dcbfd15663cd75e5dca1a7e9732
+LibUnwind.v1.8.3+0.x86_64-unknown-freebsd.tar.gz/sha512/4be2637eea0d1faccc64376ee798875c1ed95b769a0dbd8e36e7410fa890ef9a2728a9b2e52c8d4fbae23dad4c1b52b9e096d79bb49cff23db27870230f1a5c7
+libunwind-1.8.3.tar.gz/md5/13bc7b41462ac6ea157d350eaf6c1503
+libunwind-1.8.3.tar.gz/sha512/732c9b74635cf4e65bc90fd5b9a4b90fe5968c7fcce6cc704295038f0d41b848f78a59d725e813072e78df72ff8fc77001096abdfb9ef1c73e3bd82433987679
diff --git a/deps/checksums/utf8proc b/deps/checksums/utf8proc
index c1b2a6779e555..5a2aeb0475ed7 100644
--- a/deps/checksums/utf8proc
+++ b/deps/checksums/utf8proc
@@ -1,2 +1,2 @@
-utf8proc-1cb28a66ca79a0845e99433fd1056257456cef8b.tar.gz/md5/aff37aadd1b02cad3259683e8a5f4543
-utf8proc-1cb28a66ca79a0845e99433fd1056257456cef8b.tar.gz/sha512/3ee433e5577e01f334aa4224275dfb7ee6ae7c785013df3eee6fc0488218d3bc895649811589edf57461c6520ad70437fbf6a376959a6a6f70bd920eb01c5001
+utf8proc-53f0bac76f5d5bd3ed69d1a06ee5b200c8940610.tar.gz/md5/4efb2eee18de062fd0a59607e862ed51
+utf8proc-53f0bac76f5d5bd3ed69d1a06ee5b200c8940610.tar.gz/sha512/5742df3e2e0e1dc47fa1641197162c3b6a0896322ff3b2e9eaafc88f7b073da0d607c6f3255c87ea7ee0ecb34a0eaa2063e170571b38cde6145f38f500bf1c96
diff --git a/deps/checksums/zlib b/deps/checksums/zlib
index 15e2cffa5b485..bd651003399b9 100644
--- a/deps/checksums/zlib
+++ b/deps/checksums/zlib
@@ -1,34 +1,38 @@
-Zlib.v1.2.13+0.aarch64-apple-darwin.tar.gz/md5/64403a5962d70d7e4b6bf7c225526144
-Zlib.v1.2.13+0.aarch64-apple-darwin.tar.gz/sha512/a7e6bb32c324943e5df3fa8501ee9d744d132db6f27033fe8ce789c1f19f26c15dc456ee8d6fc8095b427054e750ffe268500f5f69edecaa1af230b4b23535c4
-Zlib.v1.2.13+0.aarch64-linux-gnu.tar.gz/md5/a2d3265543017db03bc47b9d9778d99d
-Zlib.v1.2.13+0.aarch64-linux-gnu.tar.gz/sha512/c8143445222e151d7f522a98ee8f2742571542f4e71d515e88086c9d7f27b952662ced93f40c795e0de42e3a07c0cb5e1d9d8e792347f3c068cb07ccc144a640
-Zlib.v1.2.13+0.aarch64-linux-musl.tar.gz/md5/c1f2a1c562f72c7aa4b228f57c2346d4
-Zlib.v1.2.13+0.aarch64-linux-musl.tar.gz/sha512/7ed89bc7696690c03617c7413f5456ff5a1caa0dd600880ae67132f6c9190672ae451a06d23956a1969be00bf5c8f29bfa4f5bc4ab646b3b375c350f67c993e5
-Zlib.v1.2.13+0.armv6l-linux-gnueabihf.tar.gz/md5/7dff966f7bc5dd2902fa9ce20444235b
-Zlib.v1.2.13+0.armv6l-linux-gnueabihf.tar.gz/sha512/49e7b4a7c84996b697cf944b11ce06ce6064983a6a911c4539587385afa1e0119e3b1dbf816703a2c132acc90f7f114ec10631647638b59b14954382c1a82014
-Zlib.v1.2.13+0.armv6l-linux-musleabihf.tar.gz/md5/6982f19d2446559c0fd369afe84ebe4a
-Zlib.v1.2.13+0.armv6l-linux-musleabihf.tar.gz/sha512/8f69dfb7fb91cd6f7c934e1acddd83f77c2ebcc1732553f41ae1adcb7805a3304d16062133ce5094a8aea18ff5eca5f7a2df5724ae5a5cb9137caee732c1bf36
-Zlib.v1.2.13+0.armv7l-linux-gnueabihf.tar.gz/md5/30579a91f8f1c96752fe9a82bc053523
-Zlib.v1.2.13+0.armv7l-linux-gnueabihf.tar.gz/sha512/64f6a0e66ee13b086609e0d070c8742de20052e1ef43da201be0007e478c65b2f0a28a3c19ca5be6537b7c8bbeb6a4b2886c15a1e47bb2bd1cfe9d5e1590a620
-Zlib.v1.2.13+0.armv7l-linux-musleabihf.tar.gz/md5/b052ad151dbc3bad78762bc06164d667
-Zlib.v1.2.13+0.armv7l-linux-musleabihf.tar.gz/sha512/b5d2de09a4d65d898cf9ba0db34327c712f42a78cd1fd0f1d77fd8798910502049be63ccfed23de5fe3b499d9e0fe3d4cbb07c72765fd54db275e92f8f1e4dc4
-Zlib.v1.2.13+0.i686-linux-gnu.tar.gz/md5/3074702010889f586b43aa3dbbda4ceb
-Zlib.v1.2.13+0.i686-linux-gnu.tar.gz/sha512/92aa87c5aa3831155305276c2f0da091b5be4e8a396772e1a28650c2837ceb116dd2207329732b653a97c011abd7dd6ac1fc9574ac64cb3049ccd36fa6700748
-Zlib.v1.2.13+0.i686-linux-musl.tar.gz/md5/eff02476825ea7a53ab26b346d58f96e
-Zlib.v1.2.13+0.i686-linux-musl.tar.gz/sha512/14b72607d524948198e999e3919ee01046c049b3ec441bc581c77642cf37c3d28cc3c5500a3c073d62e9b8dc1efc9661b23bb925ed9c80b5e69abaddbcb59115
-Zlib.v1.2.13+0.i686-w64-mingw32.tar.gz/md5/279d2699458b1dfec80da17dd6f32f02
-Zlib.v1.2.13+0.i686-w64-mingw32.tar.gz/sha512/fb14d27b4f4ed5eb75bf4d4377074a206610558301be89ed692cf61d1266e425edb0489511fbbec100dafc71cff2cac863a4ea4ec70cfaa94e8175b9b7add25c
-Zlib.v1.2.13+0.powerpc64le-linux-gnu.tar.gz/md5/bc69de101d9159b22b7a334e2700faa6
-Zlib.v1.2.13+0.powerpc64le-linux-gnu.tar.gz/sha512/174eb4f154594d268d970d23eb6144dd2f6be41ddcfb9bc756b2ff48f0781ad0ed6571e2ead64dab0967da91517a02cd8db2b0e33a0bde9400103b5204f78e85
-Zlib.v1.2.13+0.x86_64-apple-darwin.tar.gz/md5/9a53075fc5595e638bacd25341f7ff42
-Zlib.v1.2.13+0.x86_64-apple-darwin.tar.gz/sha512/8124f677c036a288575712e201a809f44532b300fa56f8c12be9a1d7094fd644cb198c47b63d9f9f16d5509e27e7b3c59f080d4748ae489a4977fdfeae79e762
-Zlib.v1.2.13+0.x86_64-linux-gnu.tar.gz/md5/b192d547d56124262e2ae744f385efd6
-Zlib.v1.2.13+0.x86_64-linux-gnu.tar.gz/sha512/c6dca3c0a713ef2e2296bc9e9afa75e103a4cc4f00b5c905ebc5cff688904d6a454f83ab5ef3b6c66bdf425daa2fcd25825e50a3534c0ff109b13affbb686179
-Zlib.v1.2.13+0.x86_64-linux-musl.tar.gz/md5/f2a466b38b2ff1c895f630982147a950
-Zlib.v1.2.13+0.x86_64-linux-musl.tar.gz/sha512/191261d37fc501591005bf680d76bf518da261252456c4fef1c12bc572f9200a855fbd1b125bb8ad10d803eedbc53d4c9d7a2861e9a35d629fb40f87e5306f5f
-Zlib.v1.2.13+0.x86_64-unknown-freebsd.tar.gz/md5/00cb91c5edede46f72fae113b3115799
-Zlib.v1.2.13+0.x86_64-unknown-freebsd.tar.gz/sha512/8894e4a89dbf10e60ed020993484dcad91a52a8d310f3dfcc53808643c8401b1e445db46a815c19d55c0e5fd1a386945d1253c16af94b00ff27ccda44941f69b
-Zlib.v1.2.13+0.x86_64-w64-mingw32.tar.gz/md5/f98c68e19d9cfd24c7cec0b79d374e05
-Zlib.v1.2.13+0.x86_64-w64-mingw32.tar.gz/sha512/8e68edbdfe4e2ec6de70a724e30bc2df439901291639eca9e5aace75e31c7c6d3f47021213b8b7473b1f6ad4986f6b8695da4e24e2ea3025681e5d07dcfc067d
-zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/md5/60a49c89b9409dd91c1b039266f7bd0c
-zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/sha512/83122539da9399ce5f51c2ecbc38a627405334a9a6d53a024341353c1263a1e3aef7498f30ee281a49b3022be70e992eae475691e33da7a9c6a59b83207bd688
+Zlib.v1.3.1+2.aarch64-apple-darwin.tar.gz/md5/938c376c7513fa48d4b8b78cea741260
+Zlib.v1.3.1+2.aarch64-apple-darwin.tar.gz/sha512/ccece3f5618efe3e3699eb521167e2ee768932ea6f4e411d36619a941af3e9e32394beb260171d557930382f412f9be70f4c69215d3f7e448d4446b1690111ee
+Zlib.v1.3.1+2.aarch64-linux-gnu.tar.gz/md5/44a14273caeea9c5cb34ce3e0ba9d1fc
+Zlib.v1.3.1+2.aarch64-linux-gnu.tar.gz/sha512/8977bdc225404a01746fc14885e4823b4e2781c73a75e0ee0c8d9ca58b706c6cf9f98647b4e22bb09e7e09640caf4643e5210054a4624e06c76fc3eb2c2a2728
+Zlib.v1.3.1+2.aarch64-linux-musl.tar.gz/md5/dcef6c714555de9b2181b8c5b0a2c668
+Zlib.v1.3.1+2.aarch64-linux-musl.tar.gz/sha512/499701cc0fd1e52f3952da1b3c4377662c54390db9ebd6f5be82ecc0ba8754d2ca42b2f572b3a78ccdef30e527b7bed22c15511944f1299398587c529f8f4619
+Zlib.v1.3.1+2.aarch64-unknown-freebsd.tar.gz/md5/166f8a076a01a6f0979c712d7cec44e8
+Zlib.v1.3.1+2.aarch64-unknown-freebsd.tar.gz/sha512/7a1546b614cb5e2c0566774247269740d881c0a6d22ef6dca8010d77829b4e64594f4e609bb83299fa239d66909a4eb046d6d078268006723747f86e6c733e6b
+Zlib.v1.3.1+2.armv6l-linux-gnueabihf.tar.gz/md5/1f0bcb50b545badbc9de1569f51c4668
+Zlib.v1.3.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/1e4bea6fa41300ec770822dcd9335d1393c087db45d128e2c60d9315db01a69c984c98304b83af0725a99ae3a5cac4a273f4eea8a4213454608edbe0e55c74ce
+Zlib.v1.3.1+2.armv6l-linux-musleabihf.tar.gz/md5/3a78103181bf8a74dfc0c6f7681bd3de
+Zlib.v1.3.1+2.armv6l-linux-musleabihf.tar.gz/sha512/2a7c70266fd5928e46c8d71d95884054eaff2432d9fbce37eef67eb62af2b087f5f9fa3752a5d14f50cd058519d39a1b81450b30786a4f66eafbd16d18ef7b6b
+Zlib.v1.3.1+2.armv7l-linux-gnueabihf.tar.gz/md5/4e202f829e7f478451e93da7be2b6f98
+Zlib.v1.3.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/0734bc8a84b039b971a15620adb9b5da77d1b1992fb4c6adf9031fa8c592512645d424d2ce752efdda1f300f871c3d4f3b159794c3725fd113e1acd5512aed59
+Zlib.v1.3.1+2.armv7l-linux-musleabihf.tar.gz/md5/5000d1941b7e32dec4a2d125bbd22fff
+Zlib.v1.3.1+2.armv7l-linux-musleabihf.tar.gz/sha512/6abd69ef6878fa6cdcf7fe94e4d7aedaae58d961122e131a414f3aea43b401a3812d9d847ab4b1690e9faf89d577935d7f547484edb6cb2814cbc1156159e8ed
+Zlib.v1.3.1+2.i686-linux-gnu.tar.gz/md5/7a5de529294b9d8dba4ac1eeb4cbcbdc
+Zlib.v1.3.1+2.i686-linux-gnu.tar.gz/sha512/72d52c4e2f01fe1436b22c854efff83068f6a65a9280556018c77bb843f81902c0c96f30132123d4dd6a66041e9391a418ceec227b2b2411f99a26df76d21c74
+Zlib.v1.3.1+2.i686-linux-musl.tar.gz/md5/d18b442e4d108397482fd852deb4241e
+Zlib.v1.3.1+2.i686-linux-musl.tar.gz/sha512/6c367f7c522167db510cf42a84dfcce75fc129bb49800c05b90dfdfa5fb31fa20ed020e165f1b283b81f4568c9bf79d4c41f7ad4e42a3513cb13149a50707114
+Zlib.v1.3.1+2.i686-w64-mingw32.tar.gz/md5/b2c8af112298ae9e635054e4ba22e5ae
+Zlib.v1.3.1+2.i686-w64-mingw32.tar.gz/sha512/0a28076fc8cf8daa925f4be76dd0025d01d8ad6bc269f87164749da0c3bea6f4b404ef097a2907ce2c009211d9f8387f844fe5e5b1bd2f6d77c9b35b2b1c7548
+Zlib.v1.3.1+2.powerpc64le-linux-gnu.tar.gz/md5/9ae4feb621ae088c323ff12235bdf5db
+Zlib.v1.3.1+2.powerpc64le-linux-gnu.tar.gz/sha512/912134f741fe96217d1b8465510ac82d238d3d8a21519534fb88c568092dcc9eb8e51ef66b7ca56a2a7a881474f04edb7a6f7bf1ebf06bfff7708b3edd3487c0
+Zlib.v1.3.1+2.riscv64-linux-gnu.tar.gz/md5/43b61473a02e492f932ce60f726966a8
+Zlib.v1.3.1+2.riscv64-linux-gnu.tar.gz/sha512/4742503831da6a6b7945c6e5efd96bd6b03b8a63b73e68181e961b6f326ac5d805350219c43f4537165d1af0ac2ed496d5a72edd0c1d11e012ef12385a8f2e5f
+Zlib.v1.3.1+2.x86_64-apple-darwin.tar.gz/md5/347a92686d81ed7c022d2e7310babb77
+Zlib.v1.3.1+2.x86_64-apple-darwin.tar.gz/sha512/a59b9c4f63124c893a2a11b886bbe03bfc75846599eb21997652bd62a2f28afe754d16127e964683552423bf1c0da160e46c32d0b584ed07e28f4e91904b9c67
+Zlib.v1.3.1+2.x86_64-linux-gnu.tar.gz/md5/0630f603e35ab56efdef331e29db436b
+Zlib.v1.3.1+2.x86_64-linux-gnu.tar.gz/sha512/b936c328fad685c22473ff4cbfcc8bc48772ed9927c5b29c5d0503b95888efd0ca8d891f53cca45da7b5768ed4a1f6994f9e211167f4704c15c688cced90cac5
+Zlib.v1.3.1+2.x86_64-linux-musl.tar.gz/md5/252d8542bb5a53c479f4ffc067847e27
+Zlib.v1.3.1+2.x86_64-linux-musl.tar.gz/sha512/449809acbbff1fcbd89b9689e803f69d1f9cb49860f0b508b69c560cfcb51232640fcff17ede6ea75d9906edb5a8f38139afd890f18a34260ef5dbb5d167af36
+Zlib.v1.3.1+2.x86_64-unknown-freebsd.tar.gz/md5/79fa906629dff81c38b93001a7798040
+Zlib.v1.3.1+2.x86_64-unknown-freebsd.tar.gz/sha512/53dbcce99b2d6ec82ef86d76f3f574db304ab90f1b131c49b2c06f89bd2426afa4a31bfa8455e2ecdad64d4da71fef1b2d79f471efd55a8bbdc29e95c952a289
+Zlib.v1.3.1+2.x86_64-w64-mingw32.tar.gz/md5/92b083205ca44db131b7cf6b9c09eb21
+Zlib.v1.3.1+2.x86_64-w64-mingw32.tar.gz/sha512/1c3b7b414f09b1896c5a08f156c5e55f07ee012cf3f6fe50d5ba116405dcd9a80e5004ee7c774f7cc981e4d1b099efee85e16b8417cef2734cb7c12ec440d09a
+zlib-51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf.tar.gz/md5/7ce1b2766499af7d948130113b649028
+zlib-51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf.tar.gz/sha512/79d032b8c93260ce6b9806f2289cdccce67e9d80865b5bb39ac46dadffc8ee009da51c551eead59c56249c7adfa164c1d5ebcf2b10a8645e0b11b5650176cb24
diff --git a/deps/checksums/zstd b/deps/checksums/zstd
new file mode 100644
index 0000000000000..aea151b266966
--- /dev/null
+++ b/deps/checksums/zstd
@@ -0,0 +1,38 @@
+Zstd.v1.5.7+1.aarch64-apple-darwin.tar.gz/md5/d6b2fb32d705078dbc369986ac8b056b
+Zstd.v1.5.7+1.aarch64-apple-darwin.tar.gz/sha512/5dfcf36087ce8540b1f6a04181adee962e2164a763e758ac5cc256c332756774b381ca58e26641a15ce555d59641690a6da72a67bf935d8611734f2006bde504
+Zstd.v1.5.7+1.aarch64-linux-gnu.tar.gz/md5/0c627ec83e426383c25eb4bc297f3548
+Zstd.v1.5.7+1.aarch64-linux-gnu.tar.gz/sha512/1fdcf77e877f0676fc26a05e0cc20a1d6e1df731d81e0bba9a5657131116bbea75da4d38953969d8d07dce0bf2d7654075dbb285ebe5f4588c446e88774336c8
+Zstd.v1.5.7+1.aarch64-linux-musl.tar.gz/md5/cc9ada74a19db50d7dd6edd05866c902
+Zstd.v1.5.7+1.aarch64-linux-musl.tar.gz/sha512/0b33c0df144bb1e95290685f01695b26da834a70a365c0362314cb001ba611962a0876bc5baac31f19c80bcb110e030fb9840a56761b4d29a7893ca65f95b111
+Zstd.v1.5.7+1.aarch64-unknown-freebsd.tar.gz/md5/5daa5b2bf2b856c448feaa8329d0de1b
+Zstd.v1.5.7+1.aarch64-unknown-freebsd.tar.gz/sha512/b39d025463b4bf21295fd5bbff91ba501506b3480363cdcfe6dd2f11d2e0afaf130f6c74d962e503fccb7a55bfcad0504ebb19f18b6b5c8b8103e7b9919df536
+Zstd.v1.5.7+1.armv6l-linux-gnueabihf.tar.gz/md5/f4218e8b4f8d415df49aeba9d43f0ba0
+Zstd.v1.5.7+1.armv6l-linux-gnueabihf.tar.gz/sha512/878d4f90160c6b0c341c61ecafbf5f5cb89c73db3175f272adc666bc25c88b127145d78946bc0fcb992489b54fbb48089bfcacf768397fc5d54d7cae4aeae9f9
+Zstd.v1.5.7+1.armv6l-linux-musleabihf.tar.gz/md5/3c2e132ca47e6d1d23c149fdde9d8bd5
+Zstd.v1.5.7+1.armv6l-linux-musleabihf.tar.gz/sha512/3745d99c9ca0ce9f98ff9393e405e8b382d05573a972067d57e800e282a9544fff7bc3d49b91eccc98d7736acdc3faa4c637911d79fab10f5a691d33ae775574
+Zstd.v1.5.7+1.armv7l-linux-gnueabihf.tar.gz/md5/926d765281bef388ecc25d04cbb66102
+Zstd.v1.5.7+1.armv7l-linux-gnueabihf.tar.gz/sha512/2d2c14587e2e7b2b147cb6423720cc30ed6aa57ed07372a1aa54e7f2e6badb5aa640b116e83371561d6f8f3a1b3f7fff7f6df137f8c7be788ee889bb30273eae
+Zstd.v1.5.7+1.armv7l-linux-musleabihf.tar.gz/md5/c25420561ce254e57d74e30c88fc53dd
+Zstd.v1.5.7+1.armv7l-linux-musleabihf.tar.gz/sha512/2f924e2089589057e8713d04db9a1cb2f2d571ad9e7eeda3b7f898c9a75f8fecf0647f2185d3c01fc3b399d3662ff3b1acb13429c8a953f0394a3ed9ca30b877
+Zstd.v1.5.7+1.i686-linux-gnu.tar.gz/md5/3314bf1b52f2295555fb4ae44b1d9331
+Zstd.v1.5.7+1.i686-linux-gnu.tar.gz/sha512/91502910a0c9b786d91499477fee2445b8f6de6bcb71af7d79c738ea2430c67cb1957866383ee3921ed1a23c53a80be19aea6abcf0e76056ffee69583728c3ed
+Zstd.v1.5.7+1.i686-linux-musl.tar.gz/md5/845eddc06527a4c4b196666f7ac64ba3
+Zstd.v1.5.7+1.i686-linux-musl.tar.gz/sha512/bb15b4327cef32be38c2fd68afedb3245c7db881ad66d3ece2198ff3034be9c12efa3d62bcba2b8e6056e7d8cb5f1b3e33726f7d1e1bead235c38f8fa985b557
+Zstd.v1.5.7+1.i686-w64-mingw32.tar.gz/md5/9bc0b3c951f5e66393fd5433bf60a2c8
+Zstd.v1.5.7+1.i686-w64-mingw32.tar.gz/sha512/550b0189097e569f98404aa836b76a5cbdc36428292214c4af8916dea2713440cf3ba94125b3e5fa0c65b2bcb916733094fdef906ad19f923d90dabfc961c75a
+Zstd.v1.5.7+1.powerpc64le-linux-gnu.tar.gz/md5/468d930de7a27af961996e7c6ed35298
+Zstd.v1.5.7+1.powerpc64le-linux-gnu.tar.gz/sha512/d680715b1ac9ff07d5662c499fbab67757509599335f861158b9dba32fe9b22da6e52d0db6b402dd4542799621ad3dccf254dfd9d3c8748bbd22f7446681539a
+Zstd.v1.5.7+1.riscv64-linux-gnu.tar.gz/md5/b93fef8db2b0b4417f7836d73c5fbe86
+Zstd.v1.5.7+1.riscv64-linux-gnu.tar.gz/sha512/9f3ee42c7952aba2d2c26252f058bb7ab96828fafc978c9273b500ef15ccd271c51399d4b93eebd4c832b087ab5ed8a4847104ce9c83c9483aaa13c22df681bb
+Zstd.v1.5.7+1.x86_64-apple-darwin.tar.gz/md5/29a260789fae6f6b6df0e5cebdafd615
+Zstd.v1.5.7+1.x86_64-apple-darwin.tar.gz/sha512/015045a1b7a477504057cb4c87428d42386218e48af38f83739dbe6b93961ca2c8dd4d794377a2d54b8cc284f5a467e3358d4f534cf8bcbcad886ef8cea038e9
+Zstd.v1.5.7+1.x86_64-linux-gnu.tar.gz/md5/06656befb6ef9a8cc7f56e7152c2acc5
+Zstd.v1.5.7+1.x86_64-linux-gnu.tar.gz/sha512/16aea0d95432a87d21d9a6f55d84e45df85caf1fda77c75b7e9a8bba519605168585f21a812773ddf1075d9bad68412e63b8cad1a143420e25ae4405bb41842e
+Zstd.v1.5.7+1.x86_64-linux-musl.tar.gz/md5/da13dd1cc0d20ba9a06e9e79a588cda4
+Zstd.v1.5.7+1.x86_64-linux-musl.tar.gz/sha512/cd4218fa92dcf8772390788d5654ca12132af7829fb0ada016f3c663e2045e29e7d7587f2f5a4f057020cacca17c188c8537f284b1456100d57e84bb47c40e77
+Zstd.v1.5.7+1.x86_64-unknown-freebsd.tar.gz/md5/bce5f37e53e330bfe4df4a28cf5c223b
+Zstd.v1.5.7+1.x86_64-unknown-freebsd.tar.gz/sha512/8f6bd7664efea537ac7815db0604ca1a07bcfb71b5152c22dc7f0a11b57643f059c341fa71d315407e2333e4c97e43e214471c73eed8b977680785302c7c2b3e
+Zstd.v1.5.7+1.x86_64-w64-mingw32.tar.gz/md5/7cf3a740fa174004b94125e8754f4a19
+Zstd.v1.5.7+1.x86_64-w64-mingw32.tar.gz/sha512/faac37ad4dacb0f083364c593cd3bd1c0b592947341a631bd2fbc4081361d97ef89482f4459c46ad37ae030aa900c62305a8525e64a2ad8e91204d76dda89db1
+zstd-f8745da6ff1ad1e7bab384bd1f9d742439278e99.tar.gz/md5/a679d9aa86549b5851100ac5d4044c68
+zstd-f8745da6ff1ad1e7bab384bd1f9d742439278e99.tar.gz/sha512/27c6fff165abea694d91311a6657a939433ba1d707147ed9072b5e4ecce259b929970306788e0c3e95db38ce85e894e5025936b1faa81cf67741b8464e24fc4e
diff --git a/deps/clang.version b/deps/clang.version
index d291dc8e8f8d8..a2412a0e2f3cb 100644
--- a/deps/clang.version
+++ b/deps/clang.version
@@ -1,4 +1,6 @@
+# -*- makefile -*-
+
 ## jll artifact
 # Clang (paired with LLVM, only here as a JLL download)
 CLANG_JLL_NAME := Clang
-CLANG_JLL_VER  := 15.0.7+5
+CLANG_JLL_VER  := 20.1.8+0
diff --git a/deps/csl.mk b/deps/csl.mk
index 457e276c66709..51368187c55fc 100644
--- a/deps/csl.mk
+++ b/deps/csl.mk
@@ -1,13 +1,13 @@
 # Interrogate the fortran compiler (which is always GCC based) on where it is keeping its libraries
 STD_LIB_PATH := $(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^programs: =' | sed -e "s/^programs: =//")
-STD_LIB_PATH += :$(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^libraries: =' | sed -e "s/^libraries: =//")
-ifneq (,$(findstring CYGWIN,$(BUILD_OS))) # the cygwin-mingw32 compiler lies about it search directory paths
-STD_LIB_PATH := $(shell echo '$(STD_LIB_PATH)' | sed -e "s!/lib/!/bin/!g")
+STD_LIB_PATH += $(PATHSEP)$(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^libraries: =' | sed -e "s/^libraries: =//")
+ifeq ($(BUILD_OS),WINNT)  # the mingw compiler lies about it search directory paths
+STD_LIB_PATH += $(shell echo '$(STD_LIB_PATH)' | sed -e "s!/lib/!/bin/!g")
 endif
 
-# Given a colon-separated list of paths in $(2), find the location of the library given in $(1)
+# Given a $(PATHSEP)-separated list of paths in $(2), find the location of the library given in $(1)
 define pathsearch
-$(firstword $(wildcard $(addsuffix /$(1),$(subst :, ,$(2)))))
+$(firstword $(wildcard $(addsuffix /$(1),$(subst $(PATHSEP), ,$(2)))))
 endef
 
 # CSL bundles lots of system compiler libraries, and while it is quite bleeding-edge
@@ -32,8 +32,8 @@ ifeq ($(USE_SYSTEM_CSL),1)
 USE_BINARYBUILDER_CSL ?= 0
 else
 # If it's not, see if we should disable it due to `libstdc++` being newer:
-LIBSTDCXX_PATH := $(eval $(call pathsearch,libstdc++,$(STD_LIB_PATH)))
-ifneq (,$(and $(LIBSTDCXX_PATH),$(shell objdump -p $(LIBSTDCXX_PATH) | grep $(CSL_NEXT_GLIBCXX_VERSION))))
+LIBSTDCXX_PATH := $(call pathsearch,$(call versioned_libname,libstdc++,6),$(STD_LIB_PATH))
+ifneq (,$(and $(LIBSTDCXX_PATH),$(shell objdump -p '$(LIBSTDCXX_PATH)' | grep '$(CSL_NEXT_GLIBCXX_VERSION)')))
 # Found `libstdc++`, grepped it for strings and found a `GLIBCXX` symbol
 # that is newer that whatever we have in CSL.  Default to not using BB.
 USE_BINARYBUILDER_CSL ?= 0
@@ -50,8 +50,15 @@ ifeq ($(USE_BINARYBUILDER_CSL),0)
 define copy_csl
 install-csl: | $$(build_shlibdir) $$(build_shlibdir)/$(1)
 $$(build_shlibdir)/$(1): | $$(build_shlibdir)
-	-@SRC_LIB=$$(call pathsearch,$(1),$$(STD_LIB_PATH)); \
-	[ -n "$$$${SRC_LIB}" ] && cp $$$${SRC_LIB} $$(build_shlibdir)
+	-@SRC_LIB='$$(call pathsearch,$(1),$$(STD_LIB_PATH))'; \
+	[ -n "$$$${SRC_LIB}" ] && cp "$$$${SRC_LIB}" '$$(build_shlibdir)'
+endef
+
+define copy_csl_static_lib
+install-csl: | $$(build_private_libdir) $$(build_private_libdir)/$(1)
+$$(build_private_libdir)/$(1): | $$(build_private_libdir)
+	-@SRC_LIB='$$(call pathsearch,$(1),$$(STD_LIB_PATH))'; \
+	[ -n "$$$${SRC_LIB}" ] && cp "$$$${SRC_LIB}" '$$(build_private_libdir)'
 endef
 
 # libgfortran has multiple names; we're just going to copy any version we can find
@@ -63,11 +70,20 @@ $(eval $(call copy_csl,$(call versioned_libname,libgfortran,5)))
 # These are all libraries that we should always have
 $(eval $(call copy_csl,$(call versioned_libname,libquadmath,0)))
 $(eval $(call copy_csl,$(call versioned_libname,libstdc++,6)))
-$(eval $(call copy_csl,$(call versioned_libname,libssp,0)))
 $(eval $(call copy_csl,$(call versioned_libname,libatomic,1)))
 $(eval $(call copy_csl,$(call versioned_libname,libgomp,1)))
 
+# Configurable either a static or dynamic library depending on the system
+$(eval $(call copy_csl,$(call versioned_libname,libssp,0)))
+$(eval $(call copy_csl_static_lib,libssp.a))
+
 ifeq ($(OS),WINNT)
+# On windows we need the static gcc runtime libraries for linking pkgimages
+$(eval $(call copy_csl_static_lib,libgcc.a))
+$(eval $(call copy_csl_static_lib,libgcc_s.a))
+$(eval $(call copy_csl_static_lib,libmsvcrt.a))
+$(eval $(call copy_csl_static_lib,libmingwex.a))
+$(eval $(call copy_csl_static_lib,libkernel32.a))
 # Windows has special gcc_s names
 ifeq ($(ARCH),i686)
 $(eval $(call copy_csl,$(call versioned_libname,libgcc_s_sjlj,1)))
@@ -75,12 +91,24 @@ else
 $(eval $(call copy_csl,$(call versioned_libname,libgcc_s_seh,1)))
 endif
 else
-ifeq ($(APPLE_ARCH),arm64)
+ifeq ($(OS),Darwin)
+# On macOS, libgcc_s has soversion 1.1 always on aarch64 and only for GCC 12+
+# (-> libgfortran 5) on x86_64
+ifeq ($(ARCH),aarch64)
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1.1)))
+else
+ifeq ($(LIBGFORTRAN_VERSION),5)
 $(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1.1)))
 else
 $(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1)))
 endif
 endif
+else
+# Other targets just use libgcc_s.1
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1)))
+endif
+endif
+
 # winpthread is only Windows, pthread is only others
 ifeq ($(OS),WINNT)
 $(eval $(call copy_csl,$(call versioned_libname,libwinpthread,1)))
@@ -104,4 +132,27 @@ distclean-csl: clean-csl
 
 else
 $(eval $(call bb-install,csl,CSL,true))
+ifeq ($(OS),WINNT)
+GCC_VERSION = 14
+install-csl:
+	mkdir -p $(build_private_libdir)/
+	cp -a $(build_shlibdir)/$(call versioned_libname,libstdc++,6) $(build_shlibdir)/libstdc++.$(SHLIB_EXT)
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libgcc_s.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libgcc.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libmsvcrt.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libssp.dll.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libssp.dll.a $(build_libdir)/
+endif
+endif
+
+ifeq ($(OS),WINNT)
+uninstall-csl: uninstall-gcc-libraries
+uninstall-gcc-libraries:
+	-rm -f $(build_shlibdir)/libstdc++.$(SHLIB_EXT)
+	-rm -f $(build_private_libdir)/libgcc_s.a
+	-rm -f $(build_private_libdir)/libgcc.a
+	-rm -f $(build_private_libdir)/libmsvcrt.a
+	-rm -f $(build_private_libdir)/libssp.dll.a
+	-rm -f $(build_libdir)/libssp.dll.a
+.PHONY: uninstall-gcc-libraries
 endif
diff --git a/deps/curl.mk b/deps/curl.mk
index a063dfe07fba0..1f650dce9370e 100644
--- a/deps/curl.mk
+++ b/deps/curl.mk
@@ -1,6 +1,10 @@
 ## CURL ##
 include $(SRCDIR)/curl.version
 
+ifeq ($(USE_SYSTEM_OPENSSL), 0)
+$(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/openssl
+endif
+
 ifeq ($(USE_SYSTEM_LIBSSH2), 0)
 $(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/libssh2
 endif
@@ -14,7 +18,7 @@ $(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/nghttp
 endif
 
 ifneq ($(USE_BINARYBUILDER_CURL),1)
-CURL_LDFLAGS := $(RPATH_ESCAPED_ORIGIN)
+CURL_LDFLAGS := $(RPATH_ESCAPED_ORIGIN) -Wl,-rpath,$(build_shlibdir)
 
 # On older Linuces (those that use OpenSSL < 1.1) we include `libpthread` explicitly.
 # It doesn't hurt to include it explicitly elsewhere, so we do so.
@@ -27,7 +31,7 @@ $(SRCCACHE)/curl-$(CURL_VER).tar.bz2: | $(SRCCACHE)
 
 $(SRCCACHE)/curl-$(CURL_VER)/source-extracted: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 	$(JLCHECKSUM) $<
-	cd $(dir $<) && $(TAR) jxf $(notdir $<)
+	cd $(dir $<) && $(TAR) -jxf $(notdir $<)
 	echo 1 > $@
 
 checksum-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
@@ -35,25 +39,26 @@ checksum-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 
 ## xref: https://github.com/JuliaPackaging/Yggdrasil/blob/master/L/LibCURL/common.jl
 # Disable....almost everything
-CURL_CONFIGURE_FLAGS := $(CONFIGURE_COMMON) \
-	--without-gnutls --without-libidn2 --without-librtmp \
-	--without-nss --without-libpsl --without-libgsasl --without-fish-functions-dir \
-	--disable-ares --disable-manual --disable-ldap --disable-ldaps --disable-static \
-	--without-gssapi --without-brotli
+CURL_CONFIGURE_FLAGS := $(CONFIGURE_COMMON)				\
+        --without-gnutls						\
+        --without-libidn2 --without-librtmp				\
+        --without-nss --without-libpsl					\
+        --disable-ares --disable-manual					\
+        --disable-ldap --disable-ldaps --without-zsh-functions-dir	\
+        --disable-static --without-libgsasl				\
+        --without-brotli
 # A few things we actually enable
-CURL_CONFIGURE_FLAGS += --enable-versioned-symbols \
-	--with-libssh2=${build_prefix} --with-zlib=${build_prefix} --with-nghttp2=${build_prefix}
+CURL_CONFIGURE_FLAGS +=											\
+        --with-libssh2=${build_prefix} --with-zlib=${build_prefix} --with-zstd=${build_prefix} --with-nghttp2=${build_prefix}	\
+        --enable-versioned-symbols
 
 # We use different TLS libraries on different platforms.
 #   On Windows, we use schannel
-#   On MacOS, we use SecureTransport
-#   On Linux, we use mbedTLS
+#   On other platforms, we use OpenSSL
 ifeq ($(OS), WINNT)
 CURL_TLS_CONFIGURE_FLAGS := --with-schannel
-else ifeq ($(OS), Darwin)
-CURL_TLS_CONFIGURE_FLAGS := --with-secure-transport
 else
-CURL_TLS_CONFIGURE_FLAGS := --with-mbedtls=$(build_prefix)
+CURL_TLS_CONFIGURE_FLAGS := --with-openssl
 endif
 CURL_CONFIGURE_FLAGS += $(CURL_TLS_CONFIGURE_FLAGS)
 
diff --git a/deps/curl.version b/deps/curl.version
index f704bc2bebc61..614e94bfc539a 100644
--- a/deps/curl.version
+++ b/deps/curl.version
@@ -3,4 +3,4 @@
 CURL_JLL_NAME := LibCURL
 
 ## source build
-CURL_VER := 8.0.1
+CURL_VER := 8.17.0
diff --git a/deps/dsfmt.version b/deps/dsfmt.version
index bbb63417f46cd..d81db2d10ff09 100644
--- a/deps/dsfmt.version
+++ b/deps/dsfmt.version
@@ -1,5 +1,7 @@
+# -*- makefile -*-
+
 ## jll artifact
 DSFMT_JLL_NAME := dSFMT
 
 ## source build
-DSFMT_VER := 2.2.4
+DSFMT_VER := 2.2.5
diff --git a/deps/gmp.mk b/deps/gmp.mk
index 12ba15f8aa0f6..23075c861cd35 100644
--- a/deps/gmp.mk
+++ b/deps/gmp.mk
@@ -35,29 +35,17 @@ $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted: $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2
 checksum-gmp: $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2
 	$(JLCHECKSUM) $<
 
-# Apply fix to avoid using Apple ARM reserved register X18
-# Necessary for version 6.2.1, remove after next gmp release
-$(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
 	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp-HG-changeset.patch
+		patch -p1 -f < $(SRCDIR)/patches/gmp-exception.patch
 	echo 1 > $@
 
-$(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp-alloc_overflow.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied
 	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp-exception.patch
+		patch -p1 -f < $(SRCDIR)/patches/gmp-alloc_overflow.patch
 	echo 1 > $@
 
-$(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied
-	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp_alloc_overflow_func.patch
-	echo 1 > $@
-
-$(SRCCACHE)/gmp-$(GMP_VER)/gmp-CVE-2021-43618.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied
-	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp-CVE-2021-43618.patch
-	echo 1 > $@
-
-$(SRCCACHE)/gmp-$(GMP_VER)/source-patched: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-CVE-2021-43618.patch-applied
+$(SRCCACHE)/gmp-$(GMP_VER)/source-patched: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-alloc_overflow.patch-applied
 	echo 1 > $@
 
 $(BUILDDIR)/gmp-$(GMP_VER)/build-configured: $(SRCCACHE)/gmp-$(GMP_VER)/source-patched
diff --git a/deps/gmp.version b/deps/gmp.version
index f77cac5906cea..3b6659faea7b7 100644
--- a/deps/gmp.version
+++ b/deps/gmp.version
@@ -1,5 +1,6 @@
+# -*- makefile -*-
 ## jll artifact
 GMP_JLL_NAME := GMP
 
 ## source build
-GMP_VER := 6.2.1
+GMP_VER := 6.3.0
diff --git a/deps/ittapi.mk b/deps/ittapi.mk
index 1a47c3ae89390..f27ae2d6f77d5 100644
--- a/deps/ittapi.mk
+++ b/deps/ittapi.mk
@@ -10,7 +10,7 @@ ITTAPI_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DITT_API_IPT_SUPPORT=
 $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(CMAKE) $(dir $<) $(ITTAPI_OPTS)
+	$(CMAKE) -G"Unix Makefiles" $(dir $<) $(ITTAPI_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled: $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured
@@ -40,4 +40,5 @@ fastcheck-ittapi: #none
 check-ittapi: #none
 
 clean-ittapi:
-	-rm -f $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled $(build_libdir)/libopenlibm.a
+	-rm -f $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled
+	-rm -f $(build_libdir)/libittnotify.a $(build_libdir)/libjitprofiling.a
diff --git a/deps/jlutilities/documenter/Manifest.toml b/deps/jlutilities/documenter/Manifest.toml
new file mode 100644
index 0000000000000..751c1033e4923
--- /dev/null
+++ b/deps/jlutilities/documenter/Manifest.toml
@@ -0,0 +1,353 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.14.0-DEV"
+manifest_format = "2.1"
+project_hash = "1e9ffa7d4739f7d125a5e2c66af8747a8effd889"
+
+[[deps.ANSIColoredPrinters]]
+git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
+registries = "General"
+uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9"
+version = "0.0.1"
+
+[[deps.AbstractTrees]]
+git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177"
+registries = "General"
+uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
+version = "0.4.5"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.2"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.CodecZlib]]
+deps = ["TranscodingStreams", "Zlib_jll"]
+git-tree-sha1 = "962834c22b66e32aa10f7611c08c8ca4e20749a9"
+registries = "General"
+uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
+version = "0.7.8"
+
+[[deps.CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+version = "1.3.0+1"
+
+[[deps.Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
+
+[[deps.DocStringExtensions]]
+git-tree-sha1 = "7442a5dfe1ebb773c29cc2962a8980f47221d76c"
+registries = "General"
+uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+version = "0.9.5"
+
+[[deps.Documenter]]
+deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"]
+git-tree-sha1 = "b37458ae37d8bdb643d763451585cd8d0e5b4a9e"
+registries = "General"
+uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+version = "1.16.1"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.7.0"
+
+[[deps.Expat_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "27af30de8b5445644e8ffe3bcb0d72049c089cf1"
+registries = "General"
+uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
+version = "2.7.3+0"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
+
+[[deps.Git]]
+deps = ["Git_LFS_jll", "Git_jll", "JLLWrappers", "OpenSSH_jll"]
+git-tree-sha1 = "824a1890086880696fc908fe12a17bcf61738bd8"
+registries = "General"
+uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2"
+version = "1.5.0"
+
+[[deps.Git_LFS_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "bb8471f313ed941f299aa53d32a94ab3bee08844"
+registries = "General"
+uuid = "020c3dae-16b3-5ae5-87b3-4cb189e250b2"
+version = "3.7.0+0"
+
+[[deps.Git_jll]]
+deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"]
+git-tree-sha1 = "b6a684587ebe896d9f68ae777f648205940f0f70"
+registries = "General"
+uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb"
+version = "2.51.3+0"
+
+[[deps.IOCapture]]
+deps = ["Logging", "Random"]
+git-tree-sha1 = "0ee181ec08df7d7c911901ea38baf16f755114dc"
+registries = "General"
+uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
+version = "1.0.0"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.JLLWrappers]]
+deps = ["Artifacts", "Preferences"]
+git-tree-sha1 = "0533e564aae234aff59ab625543145446d8b6ec2"
+registries = "General"
+uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
+version = "1.7.1"
+
+[[deps.JSON]]
+deps = ["Dates", "Logging", "Parsers", "PrecompileTools", "StructUtils", "UUIDs", "Unicode"]
+git-tree-sha1 = "5b6bb73f555bc753a6153deec3717b8904f5551c"
+registries = "General"
+uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+version = "1.3.0"
+
+    [deps.JSON.extensions]
+    JSONArrowExt = ["ArrowTypes"]
+
+    [deps.JSON.weakdeps]
+    ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
+
+[[deps.JuliaSyntaxHighlighting]]
+deps = ["StyledStrings"]
+uuid = "ac6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+version = "1.12.0"
+
+[[deps.LazilyInitializedFields]]
+git-tree-sha1 = "0f2da712350b020bc3957f269c9caad516383ee0"
+registries = "General"
+uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf"
+version = "1.3.0"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "1.0.0"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "LibSSH2_jll", "Libdl", "OpenSSL_jll", "Zlib_jll", "Zstd_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "8.17.0+0"
+
+[[deps.LibGit2]]
+deps = ["LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "LibSSH2_jll", "Libdl", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.9.1+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl", "OpenSSL_jll", "Zlib_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.3+1"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.Libiconv_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "be484f5c92fad0bd8acfef35fe017900b0b73809"
+registries = "General"
+uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
+version = "1.18.0+0"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
+
+[[deps.Markdown]]
+deps = ["Base64", "JuliaSyntaxHighlighting", "StyledStrings"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.MarkdownAST]]
+deps = ["AbstractTrees", "Markdown"]
+git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899"
+registries = "General"
+uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391"
+version = "0.1.2"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2025.11.4"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.3.0"
+
+[[deps.OpenSSH_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "OpenSSL_jll", "Zlib_jll"]
+git-tree-sha1 = "301412a644646fdc0ad67d0a87487466b491e53d"
+registries = "General"
+uuid = "9bd350c2-7e96-507f-8002-3f2e150b4e1b"
+version = "10.2.1+0"
+
+[[deps.OpenSSL_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.5.4+0"
+
+[[deps.PCRE2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.47.0+0"
+
+[[deps.Parsers]]
+deps = ["Dates", "PrecompileTools", "UUIDs"]
+git-tree-sha1 = "7d2f8f21da5db6a806faf7b9b292296da42b2810"
+registries = "General"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "2.8.3"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "Zstd_jll", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.13.0"
+weakdeps = ["REPL"]
+
+    [deps.Pkg.extensions]
+    REPLExt = "REPL"
+
+[[deps.PrecompileTools]]
+deps = ["Preferences"]
+git-tree-sha1 = "07a921781cab75691315adc645096ed5e370cb77"
+registries = "General"
+uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+version = "1.3.3"
+
+[[deps.Preferences]]
+deps = ["TOML"]
+git-tree-sha1 = "0f27480397253da18fe2c12a4ba4eb9eb208bf3d"
+registries = "General"
+uuid = "21216c6a-2e73-6563-6e65-726566657250"
+version = "1.5.0"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
+
+[[deps.REPL]]
+deps = ["Dates", "FileWatching", "InteractiveUtils", "JuliaSyntaxHighlighting", "Markdown", "Sockets", "StyledStrings", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.RegistryInstances]]
+deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"]
+git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51"
+registries = "General"
+uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3"
+version = "0.1.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "1.0.0"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
+
+[[deps.StructUtils]]
+deps = ["Dates", "UUIDs"]
+git-tree-sha1 = "79529b493a44927dd5b13dde1c7ce957c2d049e4"
+registries = "General"
+uuid = "ec057cc2-7a8d-4b58-b3b3-92acb9f63b42"
+version = "2.6.0"
+
+    [deps.StructUtils.extensions]
+    StructUtilsMeasurementsExt = ["Measurements"]
+    StructUtilsTablesExt = ["Tables"]
+
+    [deps.StructUtils.weakdeps]
+    Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7"
+    Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.3"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
+
+[[deps.TranscodingStreams]]
+git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742"
+registries = "General"
+uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
+version = "0.11.3"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.3.1+2"
+
+[[deps.Zstd_jll]]
+deps = ["CompilerSupportLibraries_jll", "Libdl"]
+uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
+version = "1.5.7+1"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.68.0+1"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.7.0+0"
+
+[registries.General]
+url = "https://github.com/JuliaRegistries/General.git"
+uuid = "23338594-aafe-5451-b93e-139f81909106"
diff --git a/doc/Project.toml b/deps/jlutilities/documenter/Project.toml
similarity index 100%
rename from doc/Project.toml
rename to deps/jlutilities/documenter/Project.toml
diff --git a/deps/jlutilities/objectfile/Manifest.toml b/deps/jlutilities/objectfile/Manifest.toml
new file mode 100644
index 0000000000000..5b1d6d29db365
--- /dev/null
+++ b/deps/jlutilities/objectfile/Manifest.toml
@@ -0,0 +1,28 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.14.0-DEV"
+manifest_format = "2.1"
+project_hash = "23b8253b8eadb7ba5d7489bb56f38819b7150654"
+
+[[deps.ObjectFile]]
+deps = ["Reexport", "StructIO"]
+git-tree-sha1 = "22faba70c22d2f03e60fbc61da99c4ebfc3eb9ba"
+registries = "General"
+uuid = "d8793406-e978-5875-9003-1fc021f44a92"
+version = "0.5.0"
+
+[[deps.Reexport]]
+git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
+registries = "General"
+uuid = "189a3867-3050-52da-a836-e630ba90ab69"
+version = "1.2.2"
+
+[[deps.StructIO]]
+git-tree-sha1 = "c581be48ae1cbf83e899b14c07a807e1787512cc"
+registries = "General"
+uuid = "53d494c1-5632-5724-8f4c-31dff12d585f"
+version = "0.3.1"
+
+[registries.General]
+url = "https://github.com/JuliaRegistries/General.git"
+uuid = "23338594-aafe-5451-b93e-139f81909106"
diff --git a/deps/jlutilities/objectfile/Project.toml b/deps/jlutilities/objectfile/Project.toml
new file mode 100644
index 0000000000000..acdde4be0965e
--- /dev/null
+++ b/deps/jlutilities/objectfile/Project.toml
@@ -0,0 +1,2 @@
+[deps]
+ObjectFile = "d8793406-e978-5875-9003-1fc021f44a92"
diff --git a/deps/jlutilities/revise/Manifest.toml b/deps/jlutilities/revise/Manifest.toml
new file mode 100644
index 0000000000000..143f20b08e9c1
--- /dev/null
+++ b/deps/jlutilities/revise/Manifest.toml
@@ -0,0 +1,159 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.13.0-DEV"
+manifest_format = "2.0"
+project_hash = "6de5e8b1c4d9b467a5c126490dbc755dc0575a9c"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.CodeTracking]]
+deps = ["InteractiveUtils", "UUIDs"]
+git-tree-sha1 = "062c5e1a5bf6ada13db96a4ae4749a4c2234f521"
+uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
+version = "1.3.9"
+
+[[deps.Compiler]]
+git-tree-sha1 = "382d79bfe72a406294faca39ef0c3cef6e6ce1f1"
+uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
+version = "0.1.1"
+
+[[deps.CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+version = "1.3.0+1"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.JuliaInterpreter]]
+deps = ["CodeTracking", "InteractiveUtils", "Random", "UUIDs"]
+git-tree-sha1 = "6ac9e4acc417a5b534ace12690bc6973c25b862f"
+uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a"
+version = "0.10.3"
+
+[[deps.JuliaSyntaxHighlighting]]
+deps = ["StyledStrings"]
+uuid = "ac6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+version = "1.12.0"
+
+[[deps.LibGit2]]
+deps = ["LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "LibSSH2_jll", "Libdl", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.9.1+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl", "OpenSSL_jll", "Zlib_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.3+1"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.LoweredCodeUtils]]
+deps = ["Compiler", "JuliaInterpreter"]
+git-tree-sha1 = "b882a7dd7ef37643066ae8f9380beea8fdd89cae"
+uuid = "6f1432cf-f94c-5a45-995e-cdbf5db27b0b"
+version = "3.4.2"
+
+[[deps.Markdown]]
+deps = ["Base64", "JuliaSyntaxHighlighting", "StyledStrings"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.3.0"
+
+[[deps.OpenSSL_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.5.1+0"
+
+[[deps.OrderedCollections]]
+git-tree-sha1 = "05868e21324cede2207c6f0f466b4bfef6d5e7ee"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.8.1"
+
+[[deps.PCRE2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.45.0+0"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
+
+[[deps.REPL]]
+deps = ["FileWatching", "InteractiveUtils", "JuliaSyntaxHighlighting", "Markdown", "Sockets", "StyledStrings", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.Requires]]
+deps = ["UUIDs"]
+git-tree-sha1 = "62389eeff14780bfe55195b7204c0d8738436d64"
+uuid = "ae029012-a4dd-5104-9daa-d747884805df"
+version = "1.3.1"
+
+[[deps.Revise]]
+deps = ["CodeTracking", "FileWatching", "JuliaInterpreter", "LibGit2", "LoweredCodeUtils", "OrderedCollections", "REPL", "Requires", "UUIDs", "Unicode"]
+git-tree-sha1 = "82dc140c7f52e4daeeec3675a411d48167a85a87"
+repo-rev = "master"
+repo-url = "https://github.com/timholy/Revise.jl.git"
+uuid = "295af30f-e4ad-537b-8983-00126c2a3abe"
+version = "3.8.0"
+
+    [deps.Revise.extensions]
+    DistributedExt = "Distributed"
+
+    [deps.Revise.weakdeps]
+    Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
+
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.3.1+2"
diff --git a/deps/jlutilities/revise/Project.toml b/deps/jlutilities/revise/Project.toml
new file mode 100644
index 0000000000000..7f7ec8b06162c
--- /dev/null
+++ b/deps/jlutilities/revise/Project.toml
@@ -0,0 +1,5 @@
+[deps]
+Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
+
+[sources]
+Revise = {rev = "master", url = "https://github.com/timholy/Revise.jl.git"}
diff --git a/deps/libgit2.mk b/deps/libgit2.mk
index 014fdc0108f7c..85bc0629f6f28 100644
--- a/deps/libgit2.mk
+++ b/deps/libgit2.mk
@@ -4,16 +4,25 @@ ifneq ($(USE_BINARYBUILDER_LIBGIT2),1)
 LIBGIT2_GIT_URL := https://github.com/libgit2/libgit2.git
 LIBGIT2_TAR_URL = https://api.github.com/repos/libgit2/libgit2/tarball/$1
 $(eval $(call git-external,libgit2,LIBGIT2,CMakeLists.txt,,$(SRCCACHE)))
+$(SRCCACHE)/$(LIBGIT2_SRC_DIR)/source-extracted: export MSYS=$(MSYS_NONEXISTENT_SYMLINK_TARGET_FIX)
 
 ifeq ($(USE_SYSTEM_LIBSSH2), 0)
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/libssh2
 endif
 
-ifeq ($(USE_SYSTEM_MBEDTLS), 0)
-$(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/mbedtls
+ifeq ($(USE_SYSTEM_OPENSSL), 0)
+$(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/openssl
 endif
 
-LIBGIT2_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=ON -DUSE_BUNDLED_ZLIB=ON -DUSE_SSH=ON -DBUILD_CLI=OFF
+ifeq ($(USE_SYSTEM_PCRE), 0)
+$(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/pcre
+endif
+
+ifeq ($(USE_SYSTEM_ZLIB), 0)
+$(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/zlib
+endif
+
+LIBGIT2_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=ON -DUSE_BUNDLED_ZLIB=OFF -DUSE_SSH=ON -DREGEX_BACKEND=pcre2 -DBUILD_CLI=OFF -DBUILD_TESTS=OFF
 ifeq ($(OS),WINNT)
 LIBGIT2_OPTS += -DWIN32=ON -DMINGW=ON
 ifeq ($(USE_SYSTEM_LIBSSH2), 0)
@@ -29,13 +38,17 @@ endif
 ifeq ($(BUILD_OS),WINNT)
 LIBGIT2_OPTS += -G"MSYS Makefiles"
 else
-LIBGIT2_OPTS += -DBUILD_CLAR=OFF -DDLLTOOL=`which $(CROSS_COMPILE)dlltool`
+LIBGIT2_OPTS += -DBUILD_TESTS=OFF -DDLLTOOL=`which $(CROSS_COMPILE)dlltool`
 LIBGIT2_OPTS += -DCMAKE_FIND_ROOT_PATH=/usr/$(XC_HOST) -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY
 endif
 endif
+ifeq ($(OS),OpenBSD)
+# iconv.h is third-party
+LIBGIT2_OPTS += -DCMAKE_C_FLAGS="-I/usr/local/include"
+endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
-LIBGIT2_OPTS += -DUSE_HTTPS="mbedTLS" -DUSE_SHA1="CollisionDetection" -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
+LIBGIT2_OPTS += -DUSE_HTTPS="OpenSSL" -DUSE_SHA1="CollisionDetection" -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
 LIBGIT2_SRC_PATH := $(SRCCACHE)/$(LIBGIT2_SRC_DIR)
@@ -43,7 +56,7 @@ LIBGIT2_SRC_PATH := $(SRCCACHE)/$(LIBGIT2_SRC_DIR)
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: $(LIBGIT2_SRC_PATH)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(CMAKE) $(dir $<) $(LIBGIT2_OPTS)
+	$(CMAKE) -G"Unix Makefiles" $(dir $<) $(LIBGIT2_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-compiled: $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured
diff --git a/deps/libgit2.version b/deps/libgit2.version
index b8cefc3c5c6f3..6c80bdc41e928 100644
--- a/deps/libgit2.version
+++ b/deps/libgit2.version
@@ -3,11 +3,12 @@
 LIBGIT2_JLL_NAME := LibGit2
 
 ## source build
-LIBGIT2_BRANCH=v1.6.1
-LIBGIT2_SHA1=8a871d13b7f4e186b8ad943ae5a7fcf30be52e67
+LIBGIT2_BRANCH=v1.9.1
+LIBGIT2_SHA1=0060d9cf5666f015b1067129bd874c6cc4c9c7ac
 
 ## Other deps
 # Specify the version of the Mozilla CA Certificate Store to obtain.
 # The versions of cacert.pem are identified by the date (YYYY-MM-DD) of their changes.
 # See https://curl.haxx.se/docs/caextract.html for more details.
-MOZILLA_CACERT_VERSION := 2023-01-10
+# Keep in sync with `stdlib/MozillaCACerts_jll/Project.toml`.
+MOZILLA_CACERT_VERSION := 2025-11-04
diff --git a/deps/libssh2.mk b/deps/libssh2.mk
index d0174c0c090e2..05cc12b6e159b 100644
--- a/deps/libssh2.mk
+++ b/deps/libssh2.mk
@@ -4,23 +4,25 @@ LIBSSH2_GIT_URL := https://github.com/libssh2/libssh2.git
 LIBSSH2_TAR_URL = https://api.github.com/repos/libssh2/libssh2/tarball/$1
 $(eval $(call git-external,libssh2,LIBSSH2,CMakeLists.txt,,$(SRCCACHE)))
 
-ifeq ($(USE_SYSTEM_MBEDTLS), 0)
-$(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/mbedtls
+ifeq ($(USE_SYSTEM_OPENSSL), 0)
+$(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/openssl
 endif
 
 LIBSSH2_OPTS := $(CMAKE_COMMON) -DBUILD_SHARED_LIBS=ON -DBUILD_EXAMPLES=OFF \
 		-DCMAKE_BUILD_TYPE=Release
 
+ifneq ($(fPIC),)
+LIBSSH2_OPTS += -DCMAKE_C_FLAGS="-fPIC"
+endif
+
 ifeq ($(OS),WINNT)
 LIBSSH2_OPTS += -DCRYPTO_BACKEND=WinCNG -DENABLE_ZLIB_COMPRESSION=OFF
-ifeq ($(BUILD_OS),WINNT)
-LIBSSH2_OPTS += -G"MSYS Makefiles"
-endif
 else
-LIBSSH2_OPTS += -DCRYPTO_BACKEND=mbedTLS -DENABLE_ZLIB_COMPRESSION=OFF
+LIBSSH2_OPTS += -DCRYPTO_BACKEND=OpenSSL -DENABLE_ZLIB_COMPRESSION=OFF
+LIBSSH2_OPTS += -DOPENSSL_ROOT_DIR=$(build_prefix)
 endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
 LIBSSH2_OPTS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
@@ -30,30 +32,14 @@ endif
 
 LIBSSH2_SRC_PATH := $(SRCCACHE)/$(LIBSSH2_SRC_DIR)
 
- # Apply patch to fix v1.10.0 CVE (https://github.com/libssh2/libssh2/issues/649), drop with v1.11
-$(LIBSSH2_SRC_PATH)/libssh2-userauth-check.patch-applied: $(LIBSSH2_SRC_PATH)/source-extracted
-	cd $(LIBSSH2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libssh2-userauth-check.patch
-	echo 1 > $@
-
-# issue:   https://github.com/JuliaLang/julia/issues/45645#issuecomment-1153214379
-# fix pr:  https://github.com/libssh2/libssh2/pull/711
-$(LIBSSH2_SRC_PATH)/libssh2-fix-import-lib-name.patch-applied: $(LIBSSH2_SRC_PATH)/libssh2-userauth-check.patch-applied
-	cd $(LIBSSH2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libssh2-fix-import-lib-name.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: \
-	$(LIBSSH2_SRC_PATH)/libssh2-fix-import-lib-name.patch-applied
-
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: $(LIBSSH2_SRC_PATH)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(CMAKE) $(dir $<) $(LIBSSH2_OPTS)
+	$(CMAKE) $(CMAKE_GENERATOR_COMMAND) $(dir $<) $(LIBSSH2_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled: $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured
-	$(MAKE) -C $(dir $<) libssh2
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-checked: $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled
diff --git a/deps/libssh2.version b/deps/libssh2.version
index 3d5b2bb98d7eb..d6cc8a629c3bf 100644
--- a/deps/libssh2.version
+++ b/deps/libssh2.version
@@ -1,7 +1,8 @@
+# -*- makefile -*-
 ## jll artifact
 LIBSSH2_JLL_NAME := LibSSH2
 
 ## source build
-LIBSSH2_VER := 1.10.2
-LIBSSH2_BRANCH=libssh2-1.10.0
-LIBSSH2_SHA1=635caa90787220ac3773c1d5ba11f1236c22eae8
+LIBSSH2_VER := 1.11.1
+LIBSSH2_BRANCH=libssh2-1.11.1
+LIBSSH2_SHA1=a312b43325e3383c865a87bb1d26cb52e3292641
diff --git a/deps/libsuitesparse.mk b/deps/libsuitesparse.mk
index 7d79e03ee8d0e..4bfeb0742fb7e 100644
--- a/deps/libsuitesparse.mk
+++ b/deps/libsuitesparse.mk
@@ -1,39 +1,43 @@
 ## LIBSUITESPARSE ##
 include $(SRCDIR)/libsuitesparse.version
 
-ifeq ($(USE_BLAS64), 1)
-UMFPACK_CONFIG := -DLONGBLAS='long long'
-CHOLMOD_CONFIG := -DLONGBLAS='long long'
-SPQR_CONFIG := -DLONGBLAS='long long'
-UMFPACK_CONFIG += -DSUN64
-CHOLMOD_CONFIG += -DSUN64
-SPQR_CONFIG += -DSUN64
-endif
-
-# Disable linking to libmetis
-CHOLMOD_CONFIG += -DNPARTITION
-
 ifneq ($(USE_BINARYBUILDER_LIBSUITESPARSE), 1)
 
-LIBSUITESPARSE_PROJECTS := AMD BTF CAMD CCOLAMD COLAMD CHOLMOD LDL KLU UMFPACK RBio SPQR
-LIBSUITESPARSE_LIBS := $(addsuffix .*$(SHLIB_EXT)*,suitesparseconfig amd btf camd ccolamd colamd cholmod klu ldl umfpack rbio spqr)
+LIBSUITESPARSE_PROJECTS := "suitesparse_config;amd;btf;camd;ccolamd;colamd;cholmod;klu;ldl;umfpack;rbio;spqr"
+LIBSUITESPARSE_LIBS := $(addsuffix .*$(SHLIB_EXT)*,suitesparseconfig $(subst ;, ,$(LIBSUITESPARSE_PROJECTS)))
 
-SUITESPARSE_LIB := $(LDFLAGS) -L"$(abspath $(BUILDDIR))/SuiteSparse-$(LIBSUITESPARSE_VER)/lib"
-ifeq ($(OS), Darwin)
-SUITESPARSE_LIB += $(RPATH_ESCAPED_ORIGIN)
-endif
-LIBSUITESPARSE_MFLAGS := CC="$(CC) $(SANITIZE_OPTS)" CXX="$(CXX) $(SANITIZE_OPTS)" F77="$(FC)" \
-	  AR="$(AR)" RANLIB="$(RANLIB)" \
-	  BLAS="-L$(build_shlibdir) -lblastrampoline" \
-	  LAPACK="-L$(build_shlibdir) -lblastrampoline" \
-	  LDFLAGS="$(SUITESPARSE_LIB) $(SANITIZE_LDFLAGS)" CFOPENMP="" CUDA=no CUDA_PATH="" \
-	  UMFPACK_CONFIG="$(UMFPACK_CONFIG)" \
-	  CHOLMOD_CONFIG="$(CHOLMOD_CONFIG)" \
-	  SPQR_CONFIG="$(SPQR_CONFIG)"
 ifeq ($(OS),WINNT)
-LIBSUITESPARSE_MFLAGS += UNAME=Windows
+BLAS_LIB_NAME_NO_EXT:=blastrampoline-5
+else
+BLAS_LIB_NAME_NO_EXT:=blastrampoline
+endif
+
+LIBSUITESPARSE_CMAKE_FLAGS := $(CMAKE_COMMON) \
+	  -DCMAKE_BUILD_TYPE=Release \
+	  -DBUILD_STATIC_LIBS=OFF \
+	  -DBUILD_TESTING=OFF \
+	  -DSUITESPARSE_ENABLE_PROJECTS=$(LIBSUITESPARSE_PROJECTS) \
+	  -DSUITESPARSE_DEMOS=OFF \
+	  -DSUITESPARSE_USE_STRICT=ON \
+	  -DSUITESPARSE_USE_CUDA=OFF \
+	  -DSUITESPARSE_USE_FORTRAN=OFF \
+	  -DSUITESPARSE_USE_OPENMP=OFF \
+	  -DCHOLMOD_PARTITION=ON \
+	  -DBLAS_FOUND=1 \
+	  -DBLAS_LIBRARIES="$(build_shlibdir)/lib$(BLAS_LIB_NAME_NO_EXT).$(SHLIB_EXT)" \
+	  -DBLAS_LINKER_FLAGS="$(BLAS_LIB_NAME_NO_EXT)" \
+	  -DBLA_VENDOR="$(BLAS_LIB_NAME_NO_EXT)" \
+	  -DLAPACK_LIBRARIES="$(build_shlibdir)/lib$(BLAS_LIB_NAME_NO_EXT).$(SHLIB_EXT)" \
+	  -DLAPACK_LINKER_FLAGS="${BLAS_LIB_NAME_NO_EXT}"
+
+ifeq ($(BINARY),64)
+LIBSUITESPARSE_CMAKE_FLAGS += -DBLAS64_SUFFIX="_64" -DSUITESPARSE_USE_64BIT_BLAS=YES
 else
-LIBSUITESPARSE_MFLAGS += UNAME=$(OS)
+LIBSUITESPARSE_CMAKE_FLAGS += -DSUITESPARSE_USE_64BIT_BLAS=NO
+endif
+
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
+LIBSUITESPARSE_CMAKE_FLAGS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
 $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz: | $(SRCCACHE)
@@ -48,20 +52,15 @@ $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted: $(SRCCACHE)/Suit
 checksum-libsuitesparse: $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz
 	$(JLCHECKSUM) $<
 
-$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/SuiteSparse-shlib.patch
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-patched: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
 	echo 1 > $@
-$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied
 
 $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: | $(build_prefix)/manifest/blastrampoline
 
-$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
-	$(MAKE) -C $(dir $<)SuiteSparse_config library config $(LIBSUITESPARSE_MFLAGS)
-	$(INSTALL_NAME_CMD)libsuitesparseconfig.$(SHLIB_EXT) $(dir $<)lib/libsuitesparseconfig.$(SHLIB_EXT)
-	for PROJ in $(LIBSUITESPARSE_PROJECTS); do \
-		$(MAKE) -C $(dir $<)$${PROJ} library $(LIBSUITESPARSE_MFLAGS) || exit 1; \
-		$(INSTALL_NAME_CMD)lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) $(dir $<)lib/lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) || exit 1; \
-	done
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-patched
+	cd $(dir $<) && $(CMAKE) -G"Unix Makefiles" . $(LIBSUITESPARSE_CMAKE_FLAGS)
+	$(MAKE) -C $(dir $<)
+	$(MAKE) -C $(dir $<) install
 	echo 1 > $@
 
 ifeq ($(OS),WINNT)
@@ -70,19 +69,14 @@ else
 LIBSUITESPARSE_SHLIB_ENV:=LD_LIBRARY_PATH="$(build_shlibdir)"
 endif
 $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
-	for PROJ in $(LIBSUITESPARSE_PROJECTS); do \
+	for PROJ in $(shell echo $(subst ;, ,$(LIBSUITESPARSE_PROJECTS))); do \
 		$(LIBSUITESPARSE_SHLIB_ENV) $(MAKE) -C $(dir $<)$${PROJ} default $(LIBSUITESPARSE_MFLAGS) || exit 1; \
 	done
 	echo 1 > $@
 
-UNINSTALL_suitesparse := $(LIBSUITESPARSE_VER) manual_suitesparse $(LIBSUITESPARSE_LIBS)
+UNINSTALL_libsuitesparse := $(LIBSUITESPARSE_VER) manual_libsuitesparse $(LIBSUITESPARSE_LIBS)
 
 $(build_prefix)/manifest/libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled | $(build_prefix)/manifest $(build_shlibdir)
-	for lib in $(LIBSUITESPARSE_LIBS); do \
-		cp -a $(dir $<)lib/lib$${lib} $(build_shlibdir) || exit 1; \
-	done
-	#cp -a $(dir $<)lib/* $(build_shlibdir)
-	#cp -a $(dir $<)include/* $(build_includedir)
 	echo $(UNINSTALL_libsuitesparse) > $@
 
 clean-libsuitesparse: uninstall-libsuitesparse
@@ -101,7 +95,7 @@ configure-libsuitesparse: extract-libsuitesparse
 compile-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
 fastcheck-libsuitesparse: #none
 check-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked
-install-libsuitesparse: $(build_prefix)/manifest/libsuitesparse
+install-libsuitesparse: $(build_prefix)/manifest/libsuitesparse remove-libsuitesparse-gpl-lib
 
 else # USE_BINARYBUILDER_LIBSUITESPARSE
 
@@ -109,6 +103,7 @@ $(eval $(call bb-install,libsuitesparse,LIBSUITESPARSE,false))
 
 # libsuitesparse depends on blastrampoline
 compile-libsuitesparse: | $(build_prefix)/manifest/blastrampoline
+install-libsuitesparse: | remove-libsuitesparse-gpl-lib
 endif
 
 define manual_libsuitesparse
@@ -116,3 +111,13 @@ uninstall-libsuitesparse:
 	-rm -f $(build_prefix)/manifest/libsuitesparse
 	-rm -f $(addprefix $(build_shlibdir)/lib,$3)
 endef
+
+remove-libsuitesparse-gpl-lib:
+ifeq ($(USE_GPL_LIBS),0)
+	@echo Removing GPL libs...
+	-rm -f $(build_bindir)/libcholmod*
+	-rm -f $(build_bindir)/libklu_cholmod*
+	-rm -f $(build_bindir)/librbio*
+	-rm -f $(build_bindir)/libspqr*
+	-rm -f $(build_bindir)/libumfpack*
+endif
diff --git a/deps/libsuitesparse.version b/deps/libsuitesparse.version
index 2237db6f2d116..dbb4d1eb713a7 100644
--- a/deps/libsuitesparse.version
+++ b/deps/libsuitesparse.version
@@ -1,5 +1,8 @@
+# -*- makefile -*-
+
 ## jll artifact
 LIBSUITESPARSE_JLL_NAME := SuiteSparse
 
 ## source build
-LIBSUITESPARSE_VER := 5.10.1
+LIBSUITESPARSE_VER := 7.10.1
+LIBSUITESPARSE_SHA1=d3c4926d2c47fd6ae558e898bfc072ade210a2a1
diff --git a/deps/libtracyclient.version b/deps/libtracyclient.version
index 0baf8504261f1..60b5a3e8ce630 100644
--- a/deps/libtracyclient.version
+++ b/deps/libtracyclient.version
@@ -1,6 +1,6 @@
 ## jll artifact
 LIBTRACYCLIENT_JLL_NAME := LibTracyClient
-LIBTRACYCLIENT_JLL_VER := 0.9.1+2
+LIBTRACYCLIENT_JLL_VER := 0.9.1+5
 
 ## source build
 LIBTRACYCLIENT_VER := 0.9.1
diff --git a/deps/libuv.mk b/deps/libuv.mk
index eacabac55e34f..993aa4fc144da 100644
--- a/deps/libuv.mk
+++ b/deps/libuv.mk
@@ -4,7 +4,7 @@ LIBUV_GIT_URL:=https://github.com/JuliaLang/libuv.git
 LIBUV_TAR_URL=https://api.github.com/repos/JuliaLang/libuv/tarball/$1
 $(eval $(call git-external,libuv,LIBUV,configure,,$(SRCCACHE)))
 
-UV_CFLAGS := -O2
+UV_CFLAGS := -O2 -DBUILDING_UV_SHARED=1
 
 UV_FLAGS := LDFLAGS="$(LDFLAGS) $(CLDFLAGS) -v"
 UV_FLAGS += CFLAGS="$(CFLAGS) $(UV_CFLAGS) $(SANITIZE_OPTS)"
diff --git a/deps/libuv.version b/deps/libuv.version
index 01bf4fecc6dc6..f80cde8964237 100644
--- a/deps/libuv.version
+++ b/deps/libuv.version
@@ -1,7 +1,9 @@
+# -*- makefile -*-
+
 ## jll artifact
 LIBUV_JLL_NAME := LibUV
 
 ## source build
 LIBUV_VER := 2
-LIBUV_BRANCH=julia-uv2-1.44.2
-LIBUV_SHA1=2723e256e952be0b015b3c0086f717c3d365d97e
+LIBUV_BRANCH=julia-uv2-1.48.0
+LIBUV_SHA1=b21d6d84e46f6c97ecbc8e4e8a8ea6ad98049ea8
diff --git a/deps/libwhich.version b/deps/libwhich.version
index 0fa713024ef99..09ea0197d10c1 100644
--- a/deps/libwhich.version
+++ b/deps/libwhich.version
@@ -1,2 +1,2 @@
 LIBWHICH_BRANCH=master
-LIBWHICH_SHA1=81e9723c0273d78493dc8c8ed570f68d9ce7e89e
+LIBWHICH_SHA1=99a0ea12689e41164456dba03e93bc40924de880
diff --git a/deps/lld.version b/deps/lld.version
index d4b2a664d980c..bb0f47027c857 100644
--- a/deps/lld.version
+++ b/deps/lld.version
@@ -1,3 +1,5 @@
+# -*- makefile -*-
+
 ## jll artifact
 LLD_JLL_NAME := LLD
-LLD_JLL_VER := 15.0.7+5
+LLD_JLL_VER := 20.1.8+0
diff --git a/deps/llvm-tools.version b/deps/llvm-tools.version
index f2ecd0b33e989..e62cfb18f0af5 100644
--- a/deps/llvm-tools.version
+++ b/deps/llvm-tools.version
@@ -1,5 +1,7 @@
+# -*- makefile -*-
+
 ## jll artifact
 # LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
 LLVM_TOOLS_JLL_NAME := LLVM
-LLVM_TOOLS_JLL_VER := 15.0.7+5
-LLVM_TOOLS_ASSERT_JLL_VER := 15.0.7+5
+LLVM_TOOLS_JLL_VER := 20.1.8+0
+LLVM_TOOLS_ASSERT_JLL_VER := 20.1.8+0
diff --git a/deps/llvm.mk b/deps/llvm.mk
index 2a8365dd73e75..e3303aba55afd 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -7,6 +7,14 @@ ifneq ($(USE_BINARYBUILDER_LLVM), 1)
 LLVM_GIT_URL:=https://github.com/JuliaLang/llvm-project.git
 LLVM_TAR_URL=https://api.github.com/repos/JuliaLang/llvm-project/tarball/$1
 $(eval $(call git-external,llvm,LLVM,CMakeLists.txt,,$(SRCCACHE)))
+# LLVM's tarball contains symlinks to non-existent targets. This breaks the
+# the default msys strategy `deepcopy` symlink strategy. To workaround this,
+# switch to `native` which tries native windows symlinks (possible if the
+# machine is in developer mode) - or if not, falls back to cygwin-style
+# symlinks. We don't particularly care either way - we just need to symlinks
+# to succeed. We could guard this by a uname check, but it's harmless elsewhere,
+# so let's not incur the additional overhead.
+$(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted: export MSYS=$(MSYS_NONEXISTENT_SYMLINK_TARGET_FIX)
 
 LLVM_BUILDDIR := $(BUILDDIR)/$(LLVM_SRC_DIR)
 LLVM_BUILDDIR_withtype := $(LLVM_BUILDDIR)/build_$(LLVM_BUILDTYPE)
@@ -60,6 +68,10 @@ ifeq ($(BUILD_LLD), 1)
 LLVM_ENABLE_PROJECTS := $(LLVM_ENABLE_PROJECTS);lld
 endif
 
+# Remove ; if it's the first character
+ifneq ($(LLVM_ENABLE_RUNTIMES),)
+	LLVM_ENABLE_RUNTIMES := $(patsubst ;%,%,$(LLVM_ENABLE_RUNTIMES))
+endif
 
 LLVM_LIB_FILE := libLLVMCodeGen.a
 
@@ -70,7 +82,7 @@ LLVM_EXPERIMENTAL_TARGETS :=
 LLVM_CFLAGS :=
 LLVM_CXXFLAGS :=
 LLVM_CPPFLAGS :=
-LLVM_LDFLAGS :=
+LLVM_LDFLAGS := "-L$(build_shlibdir)" # hacky way to force zlib to be found when linking against libLLVM and sysroot is set
 LLVM_CMAKE :=
 
 LLVM_CMAKE += -DLLVM_ENABLE_PROJECTS="$(LLVM_ENABLE_PROJECTS)"
@@ -86,22 +98,23 @@ endif
 LLVM_CMAKE += -DLLVM_WINDOWS_PREFER_FORWARD_SLASH=False
 
 # Allow adding LLVM specific flags
-LLVM_CFLAGS += $(CFLAGS)
-LLVM_CXXFLAGS += $(CXXFLAGS)
+LLVM_CFLAGS += $(CFLAGS) $(BOLT_CFLAGS)
+LLVM_CXXFLAGS += $(CXXFLAGS) $(BOLT_CFLAGS)
 LLVM_CPPFLAGS += $(CPPFLAGS)
 LLVM_LDFLAGS += $(LDFLAGS)
+LLVM_LDFLAGS += $(BOLT_LDFLAGS)
 LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING="$(LLVM_TARGETS)" -DCMAKE_BUILD_TYPE="$(LLVM_CMAKE_BUILDTYPE)"
 LLVM_CMAKE += -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="$(LLVM_EXPERIMENTAL_TARGETS)"
 LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
-LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=ON -DZLIB_LIBRARY="$(build_prefix)/lib"
-LLVM_CMAKE += -DCOMPILER_RT_ENABLE_IOS=OFF -DCOMPILER_RT_ENABLE_WATCHOS=OFF -DCOMPILER_RT_ENABLE_TVOS=OFF
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=FORCE_ON -DZLIB_ROOT="$(build_prefix)"
+LLVM_CMAKE += -DLLVM_ENABLE_ZSTD=FORCE_ON -DZSTD_ROOT="$(build_prefix)"
 ifeq ($(USE_POLLY_ACC),1)
 LLVM_CMAKE += -DPOLLY_ENABLE_GPGPU_CODEGEN=ON
 endif
 LLVM_CMAKE += -DLLVM_TOOLS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_UTILS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_INCLUDE_UTILS=ON -DLLVM_INSTALL_UTILS=ON
-LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_HISTEDIT_H=Off -DHAVE_LIBEDIT=Off
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off -DLLVM_ENABLE_LIBEDIT=OFF
 ifeq ($(LLVM_ASSERTIONS), 1)
 LLVM_CMAKE += -DLLVM_ENABLE_ASSERTIONS:BOOL=ON
 endif # LLVM_ASSERTIONS
@@ -133,7 +146,7 @@ endif # USE_PERF_JITEVENTS
 
 ifeq ($(BUILD_LLDB),1)
 ifeq ($(USECLANG),0)
-LLVM_CXXFLAGS += -std=c++0x
+LLVM_CXXFLAGS += -std=c++17
 endif # USECLANG
 ifeq ($(LLDB_DISABLE_PYTHON),1)
 LLVM_CXXFLAGS += -DLLDB_DISABLE_PYTHON
@@ -182,6 +195,11 @@ endif
 
 ifeq ($(fPIC),)
 LLVM_CMAKE += -DLLVM_ENABLE_PIC=OFF
+else
+ifeq ($(OS),FreeBSD)
+    # On FreeBSD, we must force even statically-linked code to have -fPIC
+    LLVM_CMAKE += -DCMAKE_POSITION_INDEPENDENT_CODE=TRUE
+endif
 endif
 
 LLVM_CMAKE += -DCMAKE_C_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CFLAGS)" \
@@ -210,11 +228,14 @@ LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
 LLVM_CMAKE += -DLLVM_VERSION_SUFFIX:STRING="jl"
 LLVM_CMAKE += -DLLVM_SHLIB_SYMBOL_VERSION:STRING="JL_LLVM_$(LLVM_VER_SHORT)"
 
+# Change the default bug report URL to Julia's issue tracker
+LLVM_CMAKE += -DBUG_REPORT_URL="https://github.com/julialang/julia"
+
 # Apply version-specific LLVM patches sequentially
 LLVM_PATCH_PREV :=
 define LLVM_PATCH
 $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
-	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm && patch -p1 < $$(SRCDIR)/patches/$1.patch
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm && patch -p1 -f < $$(SRCDIR)/patches/$1.patch
 	echo 1 > $$@
 # declare that applying any patch must re-run the compile step
 $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
@@ -223,19 +244,26 @@ endef
 
 define LLVM_PROJ_PATCH
 $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
-	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR) && patch -p1 < $$(SRCDIR)/patches/$1.patch
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR) && patch -p1 -f < $$(SRCDIR)/patches/$1.patch
 	echo 1 > $$@
 # declare that applying any patch must re-run the compile step
 $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
 LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
 endef
 
+ifeq ($(shell test $(LLVM_VER_MAJ) -lt 19 && echo true),true)
 $(eval $(call LLVM_PATCH,llvm-ittapi-cmake))
+endif
 
 ifeq ($(USE_SYSTEM_ZLIB), 0)
 $(LLVM_BUILDDIR_withtype)/build-configured: | $(build_prefix)/manifest/zlib
 endif
 
+ifeq ($(USE_SYSTEM_ZSTD), 0)
+$(LLVM_BUILDDIR_withtype)/build-configured: | $(build_prefix)/manifest/zstd
+endif
+
+
 # NOTE: LLVM 12 and 13 have their patches applied to JuliaLang/llvm-project
 
 # declare that all patches must be applied before running ./configure
@@ -249,7 +277,7 @@ $(BUILDDIR)/julia-patches.patch:
 
 # Apply the patch.
 $(SRCCACHE)/$(LLVM_SRC_DIR)/julia-patches.patch-applied: $(BUILDDIR)/julia-patches.patch $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted
-	cd $(SRCCACHE)/$(LLVM_SRC_DIR) && patch -p1 < $(realpath $<)
+	cd $(SRCCACHE)/$(LLVM_SRC_DIR) && patch -p1 -f < $(realpath $<)
 	echo 1 > $@
 
 # Require application of Julia's patchset before configuring LLVM.
@@ -264,6 +292,12 @@ $(LLVM_BUILDDIR_withtype)/build-configured: $(SRCCACHE)/$(LLVM_SRC_DIR)/source-e
 	echo 1 > $@
 
 $(LLVM_BUILDDIR_withtype)/build-compiled: $(LLVM_BUILDDIR_withtype)/build-configured
+ifeq ($(OS),WINNT)
+ifeq ($(USEGCC),1)
+	echo "LLVM source build is currently known to fail using GCC due to exceeded export table limits. Try clang."
+	exit 1
+endif
+endif
 	cd $(LLVM_BUILDDIR_withtype) && \
 		$(if $(filter $(CMAKE_GENERATOR),make), \
 		  $(MAKE), \
@@ -278,8 +312,10 @@ endif
 	echo 1 > $@
 
 LLVM_INSTALL = \
-	cd $1 && mkdir -p $2$$(build_depsbindir) && \
-	cp -r $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm/utils/lit $2$$(build_depsbindir)/ && \
+	cd $1 && mkdir -p $2$$(build_depsbindir)/lit && \
+	cp $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm/utils/lit/*.py $2$$(build_depsbindir)/lit/ && \
+	cp $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm/utils/lit/*.toml $2$$(build_depsbindir)/lit/ && \
+	cp -r $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm/utils/lit/lit $2$$(build_depsbindir)/lit/ && \
 	$$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P cmake_install.cmake
 ifeq ($(OS), WINNT)
 LLVM_INSTALL += && cp $2$$(build_shlibdir)/$(LLVM_SHARED_LIB_NAME).dll $2$$(build_depsbindir)
@@ -288,6 +324,9 @@ ifeq ($(OS),Darwin)
 # https://github.com/JuliaLang/julia/issues/29981
 LLVM_INSTALL += && ln -s libLLVM.dylib $2$$(build_shlibdir)/libLLVM-$$(LLVM_VER_SHORT).dylib
 endif
+ifeq ($(BUILD_LLD), 1)
+LLVM_INSTALL += && cp $2$$(build_bindir)/lld$$(EXE) $2$$(build_depsbindir)
+endif
 
 $(eval $(call staged-install, \
 	llvm,$$(LLVM_SRC_DIR)/build_$$(LLVM_BUILDTYPE), \
@@ -334,6 +373,10 @@ $(eval $(call bb-install,lld,LLD,false,true))
 $(eval $(call bb-install,clang,CLANG,false,true))
 $(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true))
 
+# work-around for Yggdrasil packaging bug (https://github.com/JuliaPackaging/Yggdrasil/pull/11231)
+$(build_prefix)/manifest/llvm-tools uninstall-llvm-tools: \
+	TAR:=$(TAR) --exclude=llvm-config.exe
+
 endif # USE_BINARYBUILDER_LLVM
 
 get-lld: get-llvm
diff --git a/deps/llvm.version b/deps/llvm.version
index 4e7969994141e..1025f5962501a 100644
--- a/deps/llvm.version
+++ b/deps/llvm.version
@@ -2,14 +2,14 @@
 
 ## jll artifact
 LLVM_JLL_NAME := libLLVM
-LLVM_ASSERT_JLL_VER := 15.0.7+5
+LLVM_ASSERT_JLL_VER := 20.1.8+0
 ## source build
 # Version number of LLVM
-LLVM_VER := 15.0.7
+LLVM_VER := 20.1.8
 # Git branch name in `LLVM_GIT_URL` repository
-LLVM_BRANCH=julia-15.0.7-5
+LLVM_BRANCH=julia-20.1.8-0
 # Git ref in `LLVM_GIT_URL` repository
-LLVM_SHA1=julia-15.0.7-5
+LLVM_SHA1=julia-20.1.8-0
 
 ## Following options are used to automatically fetch patchset from Julia's fork.  This is
 ## useful if you want to build an external LLVM while still applying Julia's patches.
@@ -18,6 +18,6 @@ LLVM_APPLY_JULIA_PATCHES := 0
 # GitHub repository to use for fetching the Julia patches to apply to LLVM source code.
 LLVM_JULIA_DIFF_GITHUB_REPO := https://github.com/llvm/llvm-project
 # Base GitHub ref for generating the diff.
-LLVM_BASE_REF := llvm:llvmorg-15.0.7
+LLVM_BASE_REF := llvm:llvmorg-20.1.8
 # Julia fork's GitHub ref for generating the diff.
-LLVM_JULIA_REF := JuliaLang:julia-15.0.7-5
+LLVM_JULIA_REF := JuliaLang:julia-20.1.8-0
diff --git a/deps/llvmunwind.version b/deps/llvmunwind.version
index 7d13af9a158f7..666cae54025b4 100644
--- a/deps/llvmunwind.version
+++ b/deps/llvmunwind.version
@@ -2,4 +2,4 @@
 LLVMUNWIND_JLL_NAME := LLVMLibUnwind
 
 ## source build
-LLVMUNWIND_VER := 12.0.1
+LLVMUNWIND_VER := 19.1.4
diff --git a/deps/mbedtls.mk b/deps/mbedtls.mk
deleted file mode 100644
index b4147c2c2684e..0000000000000
--- a/deps/mbedtls.mk
+++ /dev/null
@@ -1,97 +0,0 @@
-## mbedtls
-include $(SRCDIR)/mbedtls.version
-
-ifneq ($(USE_BINARYBUILDER_MBEDTLS), 1)
-MBEDTLS_SRC = mbedtls-$(MBEDTLS_VER)
-MBEDTLS_URL = https://github.com/Mbed-TLS/mbedtls/archive/v$(MBEDTLS_VER).tar.gz
-
-MBEDTLS_OPTS := $(CMAKE_COMMON) -DUSE_SHARED_MBEDTLS_LIBRARY=ON \
-    -DUSE_STATIC_MBEDTLS_LIBRARY=OFF -DENABLE_PROGRAMS=OFF -DCMAKE_BUILD_TYPE=Release
-
-MBEDTLS_OPTS += -DENABLE_ZLIB_SUPPORT=OFF -DMBEDTLS_FATAL_WARNINGS=OFF
-ifeq ($(BUILD_OS),WINNT)
-MBEDTLS_OPTS += -G"MSYS Makefiles"
-endif
-
-ifneq (,$(findstring $(OS),Linux FreeBSD))
-MBEDTLS_OPTS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
-endif
-
-$(SRCCACHE)/$(MBEDTLS_SRC).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(MBEDTLS_URL)
-
-$(SRCCACHE)/$(MBEDTLS_SRC)/source-extracted: $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz
-	$(JLCHECKSUM) $<
-	mkdir -p $(dir $@) && \
-	$(TAR) -C $(dir $@) --strip-components 1 -xf $<
-	# Force-enable MD4
-	sed -i.org "s|//#define MBEDTLS_MD4_C|#define MBEDTLS_MD4_C|" $(SRCCACHE)/$(MBEDTLS_SRC)/include/mbedtls/config.h
-	touch -c $(SRCCACHE)/$(MBEDTLS_SRC)/CMakeLists.txt # old target
-	echo 1 > $@
-
-checksum-mbedtls: $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz
-	$(JLCHECKSUM) $<
-
-$(BUILDDIR)/$(MBEDTLS_SRC)/build-configured: $(SRCCACHE)/$(MBEDTLS_SRC)/source-extracted
-	mkdir -p $(dir $@)
-	cd $(dir $@) && \
-	$(CMAKE) $(dir $<) $(MBEDTLS_OPTS)
-	echo 1 > $@
-
-$(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled: $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured
-	$(MAKE) -C $(dir $<)
-	echo 1 > $@
-
-$(BUILDDIR)/$(MBEDTLS_SRC)/build-checked: $(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled
-ifeq ($(OS),$(BUILD_OS))
-	$(MAKE) -C $(dir $@) test
-endif
-	echo 1 > $@
-
-ifeq ($(OS),WINNT)
-define MBEDTLS_INSTALL
-	mkdir -p $2/$$(build_shlibdir)
-	cp $1/library/libmbedcrypto.$$(SHLIB_EXT) $2/$$(build_shlibdir)
-	cp $1/library/libmbedx509.$$(SHLIB_EXT) $2/$$(build_shlibdir)
-	cp $1/library/libmbedtls.$$(SHLIB_EXT) $2/$$(build_shlibdir)
-endef
-else
-define MBEDTLS_INSTALL
-	$(call MAKE_INSTALL,$1,$2,)
-endef
-endif
-$(eval $(call staged-install, \
-	mbedtls,$(MBEDTLS_SRC), \
-	MBEDTLS_INSTALL,,, \
-	$$(INSTALL_NAME_CMD)libmbedx509.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedx509.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CMD)libmbedtls.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CHANGE_CMD) libmbedx509.1.dylib @rpath/libmbedx509.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CHANGE_CMD) libmbedcrypto.7.dylib @rpath/libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CHANGE_CMD) libmbedcrypto.7.dylib @rpath/libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedx509.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CMD)libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedcrypto.$$(SHLIB_EXT)))
-
-
-clean-mbedtls:
-	-rm -f $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured \
-		$(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled
-	-$(MAKE) -C $(BUILDDIR)/$(MBEDTLS_SRC) clean
-
-distclean-mbedtls:
-	rm -rf $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz \
-		$(SRCCACHE)/$(MBEDTLS_SRC) \
-		$(BUILDDIR)/$(MBEDTLS_SRC)
-
-
-get-mbedtls: $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz
-extract-mbedtls: $(SRCCACHE)/$(MBEDTLS_SRC)/source-extracted
-configure-mbedtls: $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured
-compile-mbedtls: $(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled
-# tests disabled since they are known to fail
-fastcheck-mbedtls: #check-mbedtls
-check-mbedtls: $(BUILDDIR)/$(MBEDTLS_SRC)/build-checked
-
-else # USE_BINARYBUILDER_MBEDTLS
-
-$(eval $(call bb-install,mbedtls,MBEDTLS,false))
-
-endif
diff --git a/deps/mbedtls.version b/deps/mbedtls.version
deleted file mode 100644
index f262476af1684..0000000000000
--- a/deps/mbedtls.version
+++ /dev/null
@@ -1,5 +0,0 @@
-## jll artifact
-MBEDTLS_JLL_NAME := MbedTLS
-
-## source build
-MBEDTLS_VER := 2.28.2
diff --git a/deps/mmtk_julia.mk b/deps/mmtk_julia.mk
new file mode 100644
index 0000000000000..7a7160a595d90
--- /dev/null
+++ b/deps/mmtk_julia.mk
@@ -0,0 +1,111 @@
+## MMTK ##
+
+# Both MMTK_MOVING and MMTK_PLAN should be specified in the Make.user file.
+# FIXME: By default we do a non-moving build. We should change the default to 1
+# once we support moving plans.
+MMTK_MOVING ?= 0
+MMTK_VARS := MMTK_PLAN=$(MMTK_PLAN) MMTK_MOVING=$(MMTK_MOVING)
+
+ifneq ($(USE_BINARYBUILDER_MMTK_JULIA),1)
+$(eval $(call git-external,mmtk_julia,MMTK_JULIA,,,$(BUILDDIR)))
+get-mmtk_julia: $(MMTK_JULIA_SRC_FILE)
+.PHONY: get-mmtk_julia
+
+# Download the binding, build it from source
+ifeq (${MMTK_JULIA_DIR},$(BUILDROOT)/usr/lib/mmtk_julia)
+
+MMTK_JULIA_DIR=$(BUILDROOT)/deps/$(BUILDDIR)/$(MMTK_JULIA_SRC_DIR)
+MMTK_JULIA_LIB_PATH=$(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)
+PROJECT_DIRS := JULIA_PATH=$(JULIAHOME) JULIA_BUILDROOT=$(BUILDROOT) MMTK_JULIA_DIR=$(MMTK_JULIA_DIR)
+
+$(BUILDDIR)/$(MMTK_JULIA_SRC_DIR)/build-compiled: $(BUILDROOT)/usr/lib/libmmtk_julia.so
+	@echo 1 > $@
+
+# NB: use the absolute dir when creating the symlink
+$(BUILDROOT)/usr/lib/libmmtk_julia.so: $(MMTK_JULIA_LIB_PATH)/libmmtk_julia.so
+	@ln -sf $(MMTK_JULIA_LIB_PATH)/libmmtk_julia.so $@
+
+$(MMTK_JULIA_LIB_PATH)/libmmtk_julia.so: $(BUILDDIR)/$(MMTK_JULIA_SRC_DIR)/source-extracted
+	@$(PROJECT_DIRS) $(MMTK_VARS) $(MAKE) -C $(MMTK_JULIA_DIR) $(MMTK_BUILD)
+
+extract-mmtk_julia: $(BUILDDIR)/$(MMTK_JULIA_SRC_DIR)/source-extracted
+configure-mmtk_julia: extract-mmtk_julia
+compile-mmtk_julia: $(BUILDROOT)/usr/lib/libmmtk_julia.so
+fastcheck-mmtk_julia: #none
+check-mmtk_julia: compile-mmtk_julia
+
+.PHONY: extract-mmtk_julia
+.PHONY: configure-mmtk_julia
+.PHONY: compile-mmtk_julia
+.PHONY: fastcheck-mmtk_julia
+.PHONY: check-mmtk_julia
+
+$(eval $(call symlink_install,mmtk_julia,$$(MMTK_JULIA_SRC_DIR),$$(BUILDROOT)/usr/lib))
+
+# In this case, there is a custom version of the binding in MMTK_JULIA_DIR
+# Build it and symlink libmmtk_julia.so file into $(BUILDROOT)/usr/lib
+else
+
+PROJECT_DIRS := JULIA_PATH=$(JULIAHOME) JULIA_BUILDROOT=$(BUILDROOT) MMTK_JULIA_DIR=$(MMTK_JULIA_DIR)
+MMTK_JULIA_LIB_PATH=$(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)
+
+.PHONY: install-mmtk_julia
+install-mmtk_julia: compile-mmtk_julia $(build_prefix)/manifest/mmtk_julia
+
+.PHONY: compile-mmtk_julia
+compile-mmtk_julia: $(BUILDROOT)/usr/lib/libmmtk_julia.so
+
+.PHONY: version-check-mmtk_julia
+version-check-mmtk_julia: $(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)/libmmtk_julia.so
+
+# NB: This will NOT run `cargo build` if there are changes in the Rust source files
+# inside the binding repo. However the target below should remake the symlink if there
+# are changes in the libmmtk_julia.so from the custom MMTK_JULIA_DIR folder
+$(BUILDROOT)/usr/lib/libmmtk_julia.so: $(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)/libmmtk_julia.so
+	@ln -sf $(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)/libmmtk_julia.so $@
+
+$(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)/libmmtk_julia.so:
+	@$(PROJECT_DIRS) $(MMTK_VARS) $(MAKE) -C $(MMTK_JULIA_DIR) $(MMTK_BUILD)
+
+MMTK_JULIA_VER := mmtk_julia_custom
+
+UNINSTALL_mmtk_julia := $(MMTK_JULIA_VER) manual_mmtk_julia
+
+define manual_mmtk_julia
+.PHONY: uninstall-mmtk_julia
+uninstall-mmtk_julia:
+	-rm -f $(build_prefix)/manifest/mmtk_julia
+	-rm -f $(BUILDROOT)/usr/lib/libmmtk_julia.so
+endef
+
+$(build_prefix)/manifest/mmtk_julia: $(BUILDROOT)/usr/lib/libmmtk_julia.so
+	@echo $(UNINSTALL_mmtk_julia) > $@
+
+endif # MMTK_JULIA_DIR
+
+else
+# We are building using the BinaryBuilder version of the binding
+
+# This will download all the versions of the binding that are available in the BinaryBuilder
+$(eval $(call bb-install,mmtk_julia,MMTK_JULIA,false))
+
+# Make sure we use the right version of $MMTK_PLAN, $MMTK_MOVING and $MMTK_BUILD
+ifeq (${MMTK_PLAN},Immix)
+LIB_PATH_PLAN = immix
+else ifeq (${MMTK_PLAN},StickyImmix)
+LIB_PATH_PLAN = sticky
+endif
+
+ifeq ($(MMTK_MOVING), 0)
+LIB_PATH_MOVING := non_moving
+else
+LIB_PATH_MOVING := moving
+endif
+
+.PHONY: version-check-mmtk_julia
+version-check-mmtk_julia: $(BUILDROOT)/usr/lib/libmmtk_julia.so
+
+$(BUILDROOT)/usr/lib/libmmtk_julia.so: get-mmtk_julia
+	@ln -sf $(BUILDROOT)/usr/lib/$(LIB_PATH_PLAN)/$(LIB_PATH_MOVING)/$(MMTK_BUILD)/libmmtk_julia.so $@
+
+endif # USE_BINARYBUILDER_MMTK_JULIA
diff --git a/deps/mmtk_julia.version b/deps/mmtk_julia.version
new file mode 100644
index 0000000000000..87241d0920505
--- /dev/null
+++ b/deps/mmtk_julia.version
@@ -0,0 +1,6 @@
+MMTK_JULIA_BRANCH = master
+MMTK_JULIA_SHA1 = f299191f32388673a3f447f35f91a9ec47955cc7
+MMTK_JULIA_GIT_URL := https://github.com/mmtk/mmtk-julia.git
+MMTK_JULIA_TAR_URL = https://github.com/mmtk/mmtk-julia/archive/refs/tags/v0.31.1.tar.gz
+MMTK_JULIA_JLL_VER := 0.31.1+0
+MMTK_JULIA_JLL_NAME := mmtk_julia
diff --git a/deps/mpfr.version b/deps/mpfr.version
index e4f1c8a45aeb0..70585f95a6385 100644
--- a/deps/mpfr.version
+++ b/deps/mpfr.version
@@ -1,5 +1,6 @@
+# -*- makefile -*-
 ## jll artifact
 MPFR_JLL_NAME := MPFR
 
 ## source build
-MPFR_VER := 4.2.0
+MPFR_VER := 4.2.2
diff --git a/deps/nghttp2.version b/deps/nghttp2.version
index 200e08bf4bfd9..68cc43b1e3e6f 100644
--- a/deps/nghttp2.version
+++ b/deps/nghttp2.version
@@ -3,4 +3,4 @@
 NGHTTP2_JLL_NAME := nghttp2
 
 ## source build
-NGHTTP2_VER := 1.52.0
+NGHTTP2_VER := 1.68.0
diff --git a/deps/nvtx.mk b/deps/nvtx.mk
new file mode 100644
index 0000000000000..c4d4db2deba65
--- /dev/null
+++ b/deps/nvtx.mk
@@ -0,0 +1,31 @@
+## nvtx ##
+include $(SRCDIR)/nvtx.version
+
+NVTX_GIT_URL := https://github.com/NVIDIA/NVTX.git
+NVTX_TAR_URL = https://api.github.com/repos/NVIDIA/NVTX/tarball/$1
+$(eval $(call git-external,nvtx,NVTX,,,$(SRCCACHE)))
+
+$(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured: $(SRCCACHE)/$(NVTX_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	echo 1 > $@
+
+$(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured
+	echo 1 > $@
+
+define NVTX_INSTALL
+	cp -a $(SRCCACHE)/$(NVTX_SRC_DIR)/c/include $2/$$(build_includedir)/
+endef
+
+$(eval $(call staged-install, \
+	nvtx,$(NVTX_SRC_DIR), \
+	NVTX_INSTALL,,,))
+
+get-nvtx: $(NVTX_SRC_FILE)
+extract-nvtx: $(SRCCACHE)/$(NVTX_SRC_DIR)/source-extracted
+configure-nvtx: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured
+compile-nvtx: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled
+fastcheck-nvtx: #none
+check-nvtx: #none
+
+clean-nvtx:
+	-rm -f $(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled
diff --git a/deps/nvtx.version b/deps/nvtx.version
new file mode 100644
index 0000000000000..e26c55cae095e
--- /dev/null
+++ b/deps/nvtx.version
@@ -0,0 +1,4 @@
+# -*- makefile -*-
+## source build
+NVTX_BRANCH=dev
+NVTX_SHA1=733fb419540bc1d152bc682d2ca066c7bb79da29
diff --git a/deps/objconv.mk b/deps/objconv.mk
index 70c7289b07bfa..bfbc8ac3629af 100644
--- a/deps/objconv.mk
+++ b/deps/objconv.mk
@@ -3,18 +3,15 @@ include $(SRCDIR)/objconv.version
 
 ifneq ($(USE_BINARYBUILDER_OBJCONV),1)
 
-$(SRCCACHE)/objconv.zip: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://www.agner.org/optimize/objconv.zip
-
-$(BUILDDIR)/objconv/source-extracted: $(SRCCACHE)/objconv.zip
-	rm -rf $(dir $@)
-	mkdir -p $(BUILDDIR)
-	unzip -d $(dir $@) $<
-	cd $(dir $@) && unzip source.zip
+$(SRCCACHE)/objconv-$(OBJCONV_VER).tar.gz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/staticfloat/objconv/archive/refs/tags/v$(OBJCONV_VER).tar.gz
+
+$(BUILDDIR)/objconv/source-extracted: $(SRCCACHE)/objconv-$(OBJCONV_VER).tar.gz
+	cd $(dir $<) && $(TAR) -zxf $(notdir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/objconv/build-compiled: $(BUILDDIR)/objconv/source-extracted
-	cd $(dir $<) && $(CXX) -o objconv -O2 *.cpp
+	cd $(dir $<) && $(CXX) -o objconv -O2 src/*.cpp
 	echo 1 > $@
 
 $(eval $(call staged-install, \
@@ -25,10 +22,9 @@ clean-objconv:
 	-rm -f $(BUILDDIR)/objconv/build-compiled $(build_depsbindir)/objconv
 
 distclean-objconv:
-	rm -rf $(SRCCACHE)/objconv.zip $(BUILDDIR)/objconv
-
+	rm -rf $(SRCCACHE)/objconv-$(OBJCONV_VER).tar.gz $(BUILDDIR)/objconv
 
-get-objconv: $(SRCCACHE)/objconv.zip
+get-objconv: $(SRCCACHE)/objconv-$(OBJCONV_VER).tar.gz
 extract-objconv: $(BUILDDIR)/objconv/source-extracted
 configure-objconv: extract-objconv
 compile-objconv: $(BUILDDIR)/objconv/build-compiled
diff --git a/deps/objconv.version b/deps/objconv.version
index 322c8fa828a17..377d2d9cd692b 100644
--- a/deps/objconv.version
+++ b/deps/objconv.version
@@ -1,7 +1,7 @@
 ## jll artifact
 # Objconv (we don't ship this, so no need for a fake JLL; therefore we specify the JLL_VER here instead of in a `stdlib/Objconv_jll/Project.toml` file)
 OBJCONV_JLL_NAME := Objconv
-OBJCONV_JLL_VER  := 2.49.1+0
+OBJCONV_JLL_VER  := 2.55.0+0
 
 ## source build
-OBJCONV_VER := 2.49.1
+OBJCONV_VER := 2.55
diff --git a/deps/openblas.mk b/deps/openblas.mk
index e2837bc47232a..e5a988ba84df2 100644
--- a/deps/openblas.mk
+++ b/deps/openblas.mk
@@ -10,7 +10,7 @@ OPENBLAS_BUILD_OPTS := CC="$(CC) $(SANITIZE_OPTS)" FC="$(FC) $(SANITIZE_OPTS)" L
 # Thread support
 ifeq ($(OPENBLAS_USE_THREAD), 1)
 OPENBLAS_BUILD_OPTS += USE_THREAD=1
-OPENBLAS_BUILD_OPTS += GEMM_MULTITHREADING_THRESHOLD=50
+OPENBLAS_BUILD_OPTS += GEMM_MULTITHREADING_THRESHOLD=400
 # Maximum number of threads for parallelism
 OPENBLAS_BUILD_OPTS += NUM_THREADS=512
 else
@@ -43,7 +43,7 @@ OPENBLAS_FFLAGS := $(JFFLAGS) $(USE_BLAS_FFLAGS)
 OPENBLAS_CFLAGS := -O2
 
 # Decide whether to build for 32-bit or 64-bit arch
-ifneq ($(BUILD_OS),$(OS))
+ifneq ($(XC_HOST),)
 OPENBLAS_BUILD_OPTS += OSNAME=$(OS) CROSS=1 HOSTCC=$(HOSTCC) CROSS_SUFFIX=$(CROSS_COMPILE)
 endif
 ifeq ($(OS),WINNT)
@@ -95,12 +95,7 @@ $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/
 		patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
-	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/neoverse-generic-kernels.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
 	echo 1 > $@
 
 $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
diff --git a/deps/openblas.version b/deps/openblas.version
index be0506fcd5137..f9729639c67ab 100644
--- a/deps/openblas.version
+++ b/deps/openblas.version
@@ -3,9 +3,9 @@
 OPENBLAS_JLL_NAME := OpenBLAS
 
 ## source build
-OPENBLAS_VER := 0.3.23
-OPENBLAS_BRANCH=v0.3.23
-OPENBLAS_SHA1=394a9fbafe9010b76a2615c562204277a956eb52
+OPENBLAS_VER := 0.3.29
+OPENBLAS_BRANCH=v0.3.29
+OPENBLAS_SHA1=8795fc7985635de1ecf674b87e2008a15097ffab
 
 # LAPACK, source-only
 LAPACK_VER := 3.9.0
diff --git a/deps/openlibm.version b/deps/openlibm.version
index f35b291260380..2d59624cbeb91 100644
--- a/deps/openlibm.version
+++ b/deps/openlibm.version
@@ -1,7 +1,9 @@
+# -*- makefile -*-
+
 ## jll artifact
 OPENLIBM_JLL_NAME := OpenLibm
 
 ## source build
-OPENLIBM_VER := 0.8.1
-OPENLIBM_BRANCH=v0.8.1
-OPENLIBM_SHA1=ae2d91698508701c83cab83714d42a1146dccf85
+OPENLIBM_VER := 0.8.7
+OPENLIBM_BRANCH=v0.8.7
+OPENLIBM_SHA1=9fbeafcd4f1b6ef6aa3946c1c8faead50f38a94d
diff --git a/deps/openssl.mk b/deps/openssl.mk
new file mode 100644
index 0000000000000..ab6bd94921562
--- /dev/null
+++ b/deps/openssl.mk
@@ -0,0 +1,111 @@
+## OpenSSL ##
+include $(SRCDIR)/openssl.version
+
+ifneq ($(USE_BINARYBUILDER_OPENSSL),1)
+
+ifeq ($(OS),Darwin)
+ifeq ($(APPLE_ARCH),arm64)
+OPENSSL_TARGET := darwin64-arm64-cc
+else
+OPENSSL_TARGET := darwin64-x86_64-cc
+endif
+else ifeq ($(OS),WINNT)
+ifeq ($(ARCH),x86_64)
+OPENSSL_TARGET := mingw64
+else
+OPENSSL_TARGET := mingw
+endif
+else ifeq ($(OS),FreeBSD)
+ifeq ($(ARCH),aarch64)
+OPENSSL_TARGET := BSD-aarch64
+else
+OPENSSL_TARGET := BSD-x86_64
+endif
+else ifeq ($(OS),Linux)
+ifeq ($(ARCH),x86_64)
+OPENSSL_TARGET := linux-x86_64
+else ifeq ($(ARCH),i686)
+OPENSSL_TARGET := linux-x86
+else ifeq ($(ARCH),arm)
+OPENSSL_TARGET := linux-armv4
+else ifeq ($(ARCH),aarch64)
+OPENSSL_TARGET := linux-aarch64
+else ifeq ($(ARCH),ppc64le)
+OPENSSL_TARGET := linux-ppc64le
+else ifeq ($(ARCH),powerpc64le)
+OPENSSL_TARGET := linux-ppc64le
+else ifeq ($(ARCH),riscv64)
+OPENSSL_TARGET := linux64-riscv64
+endif
+else
+OPENSSL_TARGET := unknown
+endif
+
+$(SRCCACHE)/openssl-$(OPENSSL_VER).tar.gz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://www.openssl.org/source/$(notdir $@)
+
+$(SRCCACHE)/openssl-$(OPENSSL_VER)/source-extracted: $(SRCCACHE)/openssl-$(OPENSSL_VER).tar.gz
+	$(JLCHECKSUM) $<
+	cd $(dir $<) && $(TAR) -zxf $<
+	touch -c $(SRCCACHE)/openssl-$(OPENSSL_VER)/configure # old target
+	echo 1 > $@
+
+checksum-openssl: $(SRCCACHE)/openssl-$(OPENSSL_VER).tar.gz
+	$(JLCHECKSUM) $<
+
+# We cannot use $(CONFIGURE_COMMON) in this step, because openssl's Configure scripts is picky
+# and does not like that we pass make variables as arguments, it wants them in the environment
+$(BUILDDIR)/openssl-$(OPENSSL_VER)/build-configured: $(SRCCACHE)/openssl-$(OPENSSL_VER)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+        CC="$(CC) $(SANITIZE_OPTS)" CXX="$(CXX) $(SANITIZE_OPTS)" LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN) $(SANITIZE_LDFLAGS)" \
+	$(dir $<)/Configure shared --prefix=$(abspath $(build_prefix)) --libdir=$(abspath $(build_libdir)) $(OPENSSL_TARGET)
+	echo 1 > $@
+
+$(BUILDDIR)/openssl-$(OPENSSL_VER)/build-compiled: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-configured
+	$(MAKE) -C $(dir $<)
+	echo 1 > $@
+
+$(BUILDDIR)/openssl-$(OPENSSL_VER)/build-checked: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-compiled
+ifeq ($(OS),$(BUILD_OS))
+	$(MAKE) -C $(dir $@) test
+endif
+	echo 1 > $@
+
+# Override bindir and only install runtime libraries, otherwise they'll go into build_depsbindir.
+OPENSSL_INSTALL = \
+	mkdir -p $2$$(build_shlibdir) && \
+	$$(MAKE) -C $1 install_dev $$(MAKE_COMMON) bindir=$$(build_shlibdir) $3 DESTDIR="$2"
+
+OPENSSL_POST_INSTALL := \
+	$(WIN_MAKE_HARD_LINK) $(build_bindir)/libcrypto-*.dll $(build_bindir)/libcrypto.dll && \
+	$(WIN_MAKE_HARD_LINK) $(build_bindir)/libssl-*.dll $(build_bindir)/libssl.dll && \
+	$(INSTALL_NAME_CMD)libcrypto.$(SHLIB_EXT) $(build_shlibdir)/libcrypto.$(SHLIB_EXT) && \
+	$(INSTALL_NAME_CMD)libssl.$(SHLIB_EXT) $(build_shlibdir)/libssl.$(SHLIB_EXT) && \
+	$(INSTALL_NAME_CHANGE_CMD) $(build_shlibdir)/libcrypto.3.dylib @rpath/libcrypto.$(SHLIB_EXT) $(build_shlibdir)/libssl.$(SHLIB_EXT)
+
+$(eval $(call staged-install, \
+	openssl,openssl-$(OPENSSL_VER), \
+	OPENSSL_INSTALL,,,$(OPENSSL_POST_INSTALL)))
+
+clean-openssl:
+	-rm -f $(BUILDDIR)/-openssl-$(OPENSSL_VER)/build-configured $(BUILDDIR)/-openssl-$(OPENSSL_VER)/build-compiled
+	-$(MAKE) -C $(BUILDDIR)/-openssl-$(OPENSSL_VER) clean
+
+distclean-openssl:
+	rm -rf $(SRCCACHE)/-openssl-$(OPENSSL_VER).tar.gz \
+		$(SRCCACHE)/-openssl-$(OPENSSL_VER) \
+		$(BUILDDIR)/-openssl-$(OPENSSL_VER)
+
+get-openssl: $(SRCCACHE)/openssl-$(OPENSSL_VER).tar.gz
+extract-openssl: $(SRCCACHE)/openssl-$(OPENSSL_VER)/source-extracted
+configure-openssl: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-configured
+compile-openssl: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-compiled
+fastcheck-openssl: check-openssl
+check-openssl: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-checked
+
+else # USE_BINARYBUILDER_OPENSSL
+
+$(eval $(call bb-install,openssl,OPENSSL,false))
+
+endif
diff --git a/deps/openssl.version b/deps/openssl.version
new file mode 100644
index 0000000000000..a5239226ef862
--- /dev/null
+++ b/deps/openssl.version
@@ -0,0 +1,6 @@
+# -*- makefile -*-
+## jll artifact
+OPENSSL_JLL_NAME := OpenSSL
+
+## source build
+OPENSSL_VER := 3.5.4
diff --git a/deps/p7zip.mk b/deps/p7zip.mk
index c7c2874d49a5e..5015edbfc2ba3 100644
--- a/deps/p7zip.mk
+++ b/deps/p7zip.mk
@@ -3,26 +3,26 @@ include $(SRCDIR)/p7zip.version
 
 ifneq ($(USE_BINARYBUILDER_P7ZIP),1)
 
-$(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/p7zip-project/p7zip/archive/refs/tags/v$(P7ZIP_VER).tar.gz
+$(SRCCACHE)/7z$(subst .,,$(P7ZIP_VER))-src.tar.xz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://downloads.sourceforge.net/project/sevenzip/7-Zip/$(P7ZIP_VER)/7z$(subst .,,$(P7ZIP_VER))-src.tar.xz
 
-$(BUILDDIR)/p7zip-$(P7ZIP_VER)/source-extracted: $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.gz
+$(BUILDDIR)/p7zip-$(P7ZIP_VER)/source-extracted: $(SRCCACHE)/7z$(subst .,,$(P7ZIP_VER))-src.tar.xz
 	$(JLCHECKSUM) $<
 	mkdir -p $(dir $@)
-	cd $(dir $@) && $(TAR) --strip-components 1 -zxf $<
+	cd $(dir $@) && $(TAR) -Jxf $<
 	echo 1 > $@
 
-checksum-p7zip: $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.gz
+checksum-p7zip: $(SRCCACHE)/7z$(subst .,,$(P7ZIP_VER))-src.tar.xz
 	$(JLCHECKSUM) $<
 
 $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-configured: $(BUILDDIR)/p7zip-$(P7ZIP_VER)/source-extracted
 $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-compiled: $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-configured
-	$(MAKE) -C $(dir $<) $(MAKE_COMMON) CC="$(CC)" CXX="$(CXX)" 7za
+	$(MAKE) -C $(dir $<)CPP/7zip/Bundles/Alone -f makefile.gcc
 	echo 1 > $@
 
 define P7ZIP_INSTALL
-	mkdir -p $2/$$(build_bindir)
-	cp -a $1/bin/7za $2/$$(build_bindir)/7z
+	mkdir -p $2/$$(build_private_libexecdir)/
+	cp -a $1/CPP/7zip/Bundles/Alone/_o/7za$(EXE) $2/$$(build_private_libexecdir)/7z$(EXE)
 endef
 $(eval $(call staged-install, \
 	p7zip,p7zip-$(P7ZIP_VER), \
@@ -30,14 +30,14 @@ $(eval $(call staged-install, \
 
 clean-p7zip:
 	-rm -f $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-configured $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-compiled
-	-rm -f $(build_bindir)/7za
-	-$(MAKE) -C $(BUILDDIR)/p7zip-$(P7ZIP_VER) clean
+	-rm -f $(build_bindir)/7z$(EXE) $(build_bindir)/7z$(EXE) $(build_private_libexecdir)/7z$(EXE)
+	-$(MAKE) -C $(BUILDDIR)/p7zip-$(P7ZIP_VER) $(MAKE_COMMON) $(P7ZIP_BUILD_OPTS) clean
 
 distclean-p7zip:
-	rm -rf $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.gz $(SRCCACHE)/p7zip-$(P7ZIP_VER) $(BUILDDIR)/p7zip-$(P7ZIP_VER)
+	rm -rf $(SRCCACHE)/7z$(subst .,,$(P7ZIP_VER))-src.tar.xz $(SRCCACHE)/p7zip-$(P7ZIP_VER) $(BUILDDIR)/p7zip-$(P7ZIP_VER)
 
 
-get-p7zip: $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.gz
+get-p7zip: $(SRCCACHE)/7z$(subst .,,$(P7ZIP_VER))-src.tar.xz
 extract-p7zip: $(SRCCACHE)/p7zip-$(P7ZIP_VER)/source-extracted
 configure-p7zip: $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-configured
 compile-p7zip: $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-compiled
@@ -48,5 +48,20 @@ check-p7zip: compile-p7zip
 else # USE_BINARYBUILDER_P7ZIP
 
 $(eval $(call bb-install,p7zip,P7ZIP,false))
+# move from bindir to shlibdir, where we expect to install it
+install-p7zip: post-install-p7zip
+uninstall-p7zip: pre-uninstall-p7zip
+post-install-p7zip: $(build_prefix)/manifest/p7zip $(PATCHELF_MANIFEST)
+	mkdir -p $(build_private_libexecdir)/
+	[ ! -e $(build_bindir)/7z$(EXE) ] || mv $(build_bindir)/7z$(EXE) $(build_private_libexecdir)/7z$(EXE)
+	[ -e $(build_private_libexecdir)/7z$(EXE) ]
+ifneq (,$(findstring $(OS),Linux FreeBSD))
+	[ -L $(build_private_libexecdir)/7z ] || \
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN/$(reverse_build_private_libexecdir_rel)' $(build_private_libexecdir)/7z$(EXE)
+endif
+pre-uninstall-p7zip:
+	-rm -f $(build_private_libexecdir)/7z$(EXE)
+
+.PHONY: post-install-p7zip pre-uninstall-p7zip
 
 endif
diff --git a/deps/p7zip.version b/deps/p7zip.version
index d4a13155d9162..740d269d8a757 100644
--- a/deps/p7zip.version
+++ b/deps/p7zip.version
@@ -2,4 +2,4 @@
 P7ZIP_JLL_NAME := p7zip
 
 ## source build
-P7ZIP_VER := 17.04
+P7ZIP_VER := 25.01
diff --git a/deps/patchelf.mk b/deps/patchelf.mk
index 9b4947f183117..aaf0ecb313b80 100644
--- a/deps/patchelf.mk
+++ b/deps/patchelf.mk
@@ -7,7 +7,7 @@ $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2: | $(SRCCACHE)
 $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2
 	$(JLCHECKSUM) $<
 	mkdir $(dir $@)
-	cd $(dir $@) && $(TAR) jxf $< --strip-components=1
+	cd $(dir $@) && $(TAR) -jxf $< --strip-components=1
 	touch -c $(SRCCACHE)/patchelf-$(PATCHELF_VER)/configure # old target
 	echo 1 > $@
 
@@ -20,7 +20,7 @@ $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: XC_HOST:=$(BUILD_MACHINE)
 $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) LDFLAGS="$(CXXLDFLAGS)" CPPFLAGS="$(CPPFLAGS)"
+	$(dir $<)/configure $(CONFIGURE_COMMON) LDFLAGS="$(CXXLDFLAGS)" CPPFLAGS="$(CPPFLAGS)" MAKE=$(MAKE)
 	echo 1 > $@
 
 $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-compiled: $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured
diff --git a/deps/patchelf.version b/deps/patchelf.version
index bbeaa87d25136..6e4f32a0c2fe4 100644
--- a/deps/patchelf.version
+++ b/deps/patchelf.version
@@ -1,3 +1,4 @@
 ## source build
 # Patchelf (we don't ship this or even use a JLL, we just always build it)
-PATCHELF_VER := 0.13
+# NOTE: Do not upgrade this to 0.18+ until https://github.com/NixOS/patchelf/issues/492 is fixed
+PATCHELF_VER := 0.17.2
diff --git a/deps/patches/gmp-CVE-2021-43618.patch b/deps/patches/gmp-CVE-2021-43618.patch
deleted file mode 100644
index a4e420e9219da..0000000000000
--- a/deps/patches/gmp-CVE-2021-43618.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-# Origin: https://gmplib.org/repo/gmp-6.2/rev/561a9c25298e
-# HG changeset patch
-# User Marco Bodrato <bodrato@mail.dm.unipi.it>
-# Date 1634836009 -7200
-# Node ID 561a9c25298e17bb01896801ff353546c6923dbd
-# Parent  e1fd9db13b475209a864577237ea4b9105b3e96e
-mpz/inp_raw.c: Avoid bit size overflows
-
-diff -r e1fd9db13b47 -r 561a9c25298e mpz/inp_raw.c
---- a/mpz/inp_raw.c	Tue Dec 22 23:49:51 2020 +0100
-+++ b/mpz/inp_raw.c	Thu Oct 21 19:06:49 2021 +0200
-@@ -88,8 +88,11 @@
- 
-   abs_csize = ABS (csize);
- 
-+  if (UNLIKELY (abs_csize > ~(mp_bitcnt_t) 0 / 8))
-+    return 0; /* Bit size overflows */
-+
-   /* round up to a multiple of limbs */
--  abs_xsize = BITS_TO_LIMBS (abs_csize*8);
-+  abs_xsize = BITS_TO_LIMBS ((mp_bitcnt_t) abs_csize * 8);
- 
-   if (abs_xsize != 0)
-     {
diff --git a/deps/patches/gmp-HG-changeset.patch b/deps/patches/gmp-HG-changeset.patch
deleted file mode 100644
index 7437fb6f2f748..0000000000000
--- a/deps/patches/gmp-HG-changeset.patch
+++ /dev/null
@@ -1,520 +0,0 @@
-
-# HG changeset patch
-# User Torbjorn Granlund <tg@gmplib.org>
-# Date 1606685500 -3600
-# Node ID 5f32dbc41afc1f8cd77af1614f0caeb24deb7d7b
-# Parent  94c84d919f83ba963ed1809f8e80c7bef32db55c
-Avoid the x18 register since it is reserved on Darwin.
-
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aors_n.asm
---- a/mpn/arm64/aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -68,7 +68,7 @@
- EPILOGUE()
- PROLOGUE(func_n)
- 	CLRCY
--L(ent):	lsr	x18, n, #2
-+L(ent):	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x7, [up]
-@@ -77,7 +77,7 @@
- 	str	x13, [rp],#8
- 	tbnz	n, #1, L(b11)
- 
--L(b01):	cbz	x18, L(ret)
-+L(b01):	cbz	x17, L(ret)
- 	ldp	x4, x5, [up,#8]
- 	ldp	x8, x9, [vp,#8]
- 	sub	up, up, #8
-@@ -88,7 +88,7 @@
- 	ldp	x10, x11, [vp,#8]
- 	add	up, up, #8
- 	add	vp, vp, #8
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 	b	L(top)
- 
- L(bx0):	tbnz	n, #1, L(b10)
-@@ -101,7 +101,7 @@
- 
- L(b10):	ldp	x6, x7, [up]
- 	ldp	x10, x11, [vp]
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 
- 	ALIGN(16)
- L(top):	ldp	x4, x5, [up,#16]
-@@ -114,8 +114,8 @@
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	stp	x12, x13, [rp],#16
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x17, x17, #1
-+	cbnz	x17, L(top)
- 
- L(end):	ADDSUBC	x12, x6, x10
- 	ADDSUBC	x13, x7, x11
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aorsmul_1.asm
---- a/mpn/arm64/aorsmul_1.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/aorsmul_1.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -32,10 +32,15 @@
- 
- include(`../config.m4')
- 
--C	     cycles/limb
--C Cortex-A53	9.3-9.8
--C Cortex-A57	 7.0
--C X-Gene	 5.0
-+C	       addmul_1        submul_1
-+C	     cycles/limb     cycles/limb
-+C Cortex-A53	9.3-9.8		9.3-9.8
-+C Cortex-A55    9.0-9.5		9.3-9.8
-+C Cortex-A57	 7		 7
-+C Cortex-A72
-+C Cortex-A73	 6		 6
-+C X-Gene	 5		 5
-+C Apple M1	 1.75		 1.75
- 
- C NOTES
- C  * It is possible to keep the carry chain alive between the addition blocks
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aorsorrlshC_n.asm
---- a/mpn/arm64/aorsorrlshC_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/aorsorrlshC_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -65,14 +65,14 @@
- 
- ASM_START()
- PROLOGUE(func_n)
--	lsr	x18, n, #2
-+	lsr	x6, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x5, [up]
- 	tbnz	n, #1, L(b11)
- 
- L(b01):	ldr	x11, [vp]
--	cbz	x18, L(1)
-+	cbz	x6, L(1)
- 	ldp	x8, x9, [vp,#8]
- 	lsl	x13, x11, #LSH
- 	ADDSUB(	x15, x13, x5)
-@@ -94,7 +94,7 @@
- 	ADDSUB(	x17, x13, x5)
- 	str	x17, [rp],#8
- 	sub	up, up, #8
--	cbz	x18, L(end)
-+	cbz	x6, L(end)
- 	b	L(top)
- 
- L(bx0):	tbnz	n, #1, L(b10)
-@@ -107,7 +107,7 @@
- L(b10):	CLRRCY(	x9)
- 	ldp	x10, x11, [vp]
- 	sub	up, up, #16
--	cbz	x18, L(end)
-+	cbz	x6, L(end)
- 
- 	ALIGN(16)
- L(top):	ldp	x4, x5, [up,#16]
-@@ -124,8 +124,8 @@
- 	ADDSUBC(x16, x12, x4)
- 	ADDSUBC(x17, x13, x5)
- 	stp	x16, x17, [rp],#16
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x6, x6, #1
-+	cbnz	x6, L(top)
- 
- L(end):	ldp	x4, x5, [up,#16]
- 	extr	x12, x10, x9, #RSH
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/cnd_aors_n.asm
---- a/mpn/arm64/cnd_aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/cnd_aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -65,7 +65,7 @@
- 
- 	CLRCY
- 
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x13, [vp]
-@@ -75,7 +75,7 @@
- 	str	x9, [rp]
- 	tbnz	n, #1, L(b11)
- 
--L(b01):	cbz	x18, L(rt)
-+L(b01):	cbz	x17, L(rt)
- 	ldp	x12, x13, [vp,#8]
- 	ldp	x10, x11, [up,#8]
- 	sub	up, up, #8
-@@ -86,7 +86,7 @@
- L(b11):	ldp	x12, x13, [vp,#8]!
- 	ldp	x10, x11, [up,#8]!
- 	sub	rp, rp, #8
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 	b	L(top)
- 
- L(bx0):	ldp	x12, x13, [vp]
-@@ -99,7 +99,7 @@
- 	b	L(mid)
- 
- L(b10):	sub	rp, rp, #16
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 
- 	ALIGN(16)
- L(top):	bic	x6, x12, cnd
-@@ -116,8 +116,8 @@
- 	ADDSUBC	x9, x11, x7
- 	ldp	x10, x11, [up,#32]!
- 	stp	x8, x9, [rp,#32]!
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x17, x17, #1
-+	cbnz	x17, L(top)
- 
- L(end):	bic	x6, x12, cnd
- 	bic	x7, x13, cnd
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/logops_n.asm
---- a/mpn/arm64/logops_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/logops_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -78,7 +78,7 @@
- 
- ASM_START()
- PROLOGUE(func)
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x7, [up]
-@@ -88,7 +88,7 @@
- 	str	x15, [rp],#8
- 	tbnz	n, #1, L(b11)
- 
--L(b01):	cbz	x18, L(ret)
-+L(b01):	cbz	x17, L(ret)
- 	ldp	x4, x5, [up,#8]
- 	ldp	x8, x9, [vp,#8]
- 	sub	up, up, #8
-@@ -99,7 +99,7 @@
- 	ldp	x10, x11, [vp,#8]
- 	add	up, up, #8
- 	add	vp, vp, #8
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 	b	L(top)
- 
- L(bx0):	tbnz	n, #1, L(b10)
-@@ -110,7 +110,7 @@
- 
- L(b10):	ldp	x6, x7, [up]
- 	ldp	x10, x11, [vp]
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 
- 	ALIGN(16)
- L(top):	ldp	x4, x5, [up,#16]
-@@ -127,8 +127,8 @@
- 	POSTOP(	x12)
- 	POSTOP(	x13)
- 	stp	x12, x13, [rp],#16
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x17, x17, #1
-+	cbnz	x17, L(top)
- 
- L(end):	LOGOP(	x12, x6, x10)
- 	LOGOP(	x13, x7, x11)
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/lshift.asm
---- a/mpn/arm64/lshift.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/lshift.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -61,7 +61,7 @@
- 	add	rp, rp_arg, n, lsl #3
- 	add	up, up, n, lsl #3
- 	sub	tnc, xzr, cnt
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x4, [up,#-8]
-@@ -69,7 +69,7 @@
- 
- L(b01):	NSHIFT	x0, x4, tnc
- 	PSHIFT	x2, x4, cnt
--	cbnz	x18, L(gt1)
-+	cbnz	x17, L(gt1)
- 	str	x2, [rp,#-8]
- 	ret
- L(gt1):	ldp	x4, x5, [up,#-24]
-@@ -89,7 +89,7 @@
- 	PSHIFT	x13, x5, cnt
- 	NSHIFT	x10, x4, tnc
- 	PSHIFT	x2, x4, cnt
--	cbnz	x18, L(gt2)
-+	cbnz	x17, L(gt2)
- 	orr	x10, x10, x13
- 	stp	x2, x10, [rp,#-16]
- 	ret
-@@ -123,11 +123,11 @@
- 	orr	x11, x12, x2
- 	stp	x10, x11, [rp,#-32]!
- 	PSHIFT	x2, x4, cnt
--L(lo0):	sub	x18, x18, #1
-+L(lo0):	sub	x17, x17, #1
- L(lo3):	NSHIFT	x10, x6, tnc
- 	PSHIFT	x13, x7, cnt
- 	NSHIFT	x12, x7, tnc
--	cbnz	x18, L(top)
-+	cbnz	x17, L(top)
- 
- L(end):	orr	x10, x10, x13
- 	orr	x11, x12, x2
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/lshiftc.asm
---- a/mpn/arm64/lshiftc.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/lshiftc.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -61,7 +61,7 @@
- 	add	rp, rp_arg, n, lsl #3
- 	add	up, up, n, lsl #3
- 	sub	tnc, xzr, cnt
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x4, [up,#-8]
-@@ -69,7 +69,7 @@
- 
- L(b01):	NSHIFT	x0, x4, tnc
- 	PSHIFT	x2, x4, cnt
--	cbnz	x18, L(gt1)
-+	cbnz	x17, L(gt1)
- 	mvn	x2, x2
- 	str	x2, [rp,#-8]
- 	ret
-@@ -90,7 +90,7 @@
- 	PSHIFT	x13, x5, cnt
- 	NSHIFT	x10, x4, tnc
- 	PSHIFT	x2, x4, cnt
--	cbnz	x18, L(gt2)
-+	cbnz	x17, L(gt2)
- 	eon	x10, x10, x13
- 	mvn	x2, x2
- 	stp	x2, x10, [rp,#-16]
-@@ -125,11 +125,11 @@
- 	eon	x11, x12, x2
- 	stp	x10, x11, [rp,#-32]!
- 	PSHIFT	x2, x4, cnt
--L(lo0):	sub	x18, x18, #1
-+L(lo0):	sub	x17, x17, #1
- L(lo3):	NSHIFT	x10, x6, tnc
- 	PSHIFT	x13, x7, cnt
- 	NSHIFT	x12, x7, tnc
--	cbnz	x18, L(top)
-+	cbnz	x17, L(top)
- 
- L(end):	eon	x10, x10, x13
- 	eon	x11, x12, x2
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/mul_1.asm
---- a/mpn/arm64/mul_1.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/mul_1.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -56,7 +56,7 @@
- 
- PROLOGUE(mpn_mul_1)
- 	adds	x4, xzr, xzr		C clear register and cy flag
--L(com):	lsr	x18, n, #2
-+L(com):	lsr	x17, n, #2
- 	tbnz	n, #0, L(bx1)
- 
- L(bx0):	mov	x11, x4
-@@ -65,7 +65,7 @@
- L(b10):	ldp	x4, x5, [up]
- 	mul	x8, x4, v0
- 	umulh	x10, x4, v0
--	cbz	x18, L(2)
-+	cbz	x17, L(2)
- 	ldp	x6, x7, [up,#16]!
- 	mul	x9, x5, v0
- 	b	L(mid)-8
-@@ -80,7 +80,7 @@
- 	str	x9, [rp],#8
- 	tbnz	n, #1, L(b10)
- 
--L(b01):	cbz	x18, L(1)
-+L(b01):	cbz	x17, L(1)
- 
- L(b00):	ldp	x6, x7, [up]
- 	mul	x8, x6, v0
-@@ -90,8 +90,8 @@
- 	adcs	x12, x8, x11
- 	umulh	x11, x7, v0
- 	add	rp, rp, #16
--	sub	x18, x18, #1
--	cbz	x18, L(end)
-+	sub	x17, x17, #1
-+	cbz	x17, L(end)
- 
- 	ALIGN(16)
- L(top):	mul	x8, x4, v0
-@@ -110,8 +110,8 @@
- 	stp	x12, x13, [rp],#32
- 	adcs	x12, x8, x11
- 	umulh	x11, x7, v0
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x17, x17, #1
-+	cbnz	x17, L(top)
- 
- L(end):	mul	x8, x4, v0
- 	adcs	x13, x9, x10
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/rsh1aors_n.asm
---- a/mpn/arm64/rsh1aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/rsh1aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -59,7 +59,7 @@
- 
- ASM_START()
- PROLOGUE(func_n)
--	lsr	x18, n, #2
-+	lsr	x6, n, #2
- 
- 	tbz	n, #0, L(bx0)
- 
-@@ -69,7 +69,7 @@
- 
- L(b01):	ADDSUB	x13, x5, x9
- 	and	x10, x13, #1
--	cbz	x18, L(1)
-+	cbz	x6, L(1)
- 	ldp	x4, x5, [up],#48
- 	ldp	x8, x9, [vp],#48
- 	ADDSUBC	x14, x4, x8
-@@ -80,8 +80,8 @@
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	str	x17, [rp], #24
--	sub	x18, x18, #1
--	cbz	x18, L(end)
-+	sub	x6, x6, #1
-+	cbz	x6, L(end)
- 	b	L(top)
- 
- L(1):	cset	x14, COND
-@@ -97,7 +97,7 @@
- 	ldp	x8, x9, [vp],#32
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
--	cbz	x18, L(3)
-+	cbz	x6, L(3)
- 	ldp	x4, x5, [up,#-16]
- 	ldp	x8, x9, [vp,#-16]
- 	extr	x17, x12, x15, #1
-@@ -117,7 +117,7 @@
- 	ADDSUB	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	and	x10, x12, #1
--	cbz	x18, L(2)
-+	cbz	x6, L(2)
- 	ldp	x4, x5, [up,#-16]
- 	ldp	x8, x9, [vp,#-16]
- 	ADDSUBC	x14, x4, x8
-@@ -134,8 +134,8 @@
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	add	rp, rp, #16
--	sub	x18, x18, #1
--	cbz	x18, L(end)
-+	sub	x6, x6, #1
-+	cbz	x6, L(end)
- 
- 	ALIGN(16)
- L(top):	ldp	x4, x5, [up,#-16]
-@@ -152,8 +152,8 @@
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	stp	x16, x17, [rp],#32
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x6, x6, #1
-+	cbnz	x6, L(top)
- 
- L(end):	extr	x16, x15, x14, #1
- 	extr	x17, x12, x15, #1
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/rshift.asm
---- a/mpn/arm64/rshift.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/rshift.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -60,7 +60,7 @@
- PROLOGUE(mpn_rshift)
- 	mov	rp, rp_arg
- 	sub	tnc, xzr, cnt
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x5, [up]
-@@ -68,7 +68,7 @@
- 
- L(b01):	NSHIFT	x0, x5, tnc
- 	PSHIFT	x2, x5, cnt
--	cbnz	x18, L(gt1)
-+	cbnz	x17, L(gt1)
- 	str	x2, [rp]
- 	ret
- L(gt1):	ldp	x4, x5, [up,#8]
-@@ -89,7 +89,7 @@
- 	PSHIFT	x13, x4, cnt
- 	NSHIFT	x10, x5, tnc
- 	PSHIFT	x2, x5, cnt
--	cbnz	x18, L(gt2)
-+	cbnz	x17, L(gt2)
- 	orr	x10, x10, x13
- 	stp	x10, x2, [rp]
- 	ret
-@@ -121,11 +121,11 @@
- 	orr	x11, x12, x2
- 	stp	x11, x10, [rp,#32]!
- 	PSHIFT	x2, x5, cnt
--L(lo0):	sub	x18, x18, #1
-+L(lo0):	sub	x17, x17, #1
- L(lo3):	NSHIFT	x10, x7, tnc
- 	NSHIFT	x12, x6, tnc
- 	PSHIFT	x13, x6, cnt
--	cbnz	x18, L(top)
-+	cbnz	x17, L(top)
- 
- L(end):	orr	x10, x10, x13
- 	orr	x11, x12, x2
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/sqr_diag_addlsh1.asm
---- a/mpn/arm64/sqr_diag_addlsh1.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/sqr_diag_addlsh1.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -47,7 +47,7 @@
- ASM_START()
- PROLOGUE(mpn_sqr_diag_addlsh1)
- 	ldr	x15, [up],#8
--	lsr	x18, n, #1
-+	lsr	x14, n, #1
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	adds	x7, xzr, xzr
-@@ -62,8 +62,8 @@
- 	ldr	x17, [up],#16
- 	ldp	x6, x7, [tp],#32
- 	umulh	x11, x15, x15
--	sub	x18, x18, #1
--	cbz	x18, L(end)
-+	sub	x14, x14, #1
-+	cbz	x14, L(end)
- 
- 	ALIGN(16)
- L(top):	extr	x9, x6, x5, #63
-@@ -84,8 +84,8 @@
- 	extr	x8, x5, x4, #63
- 	stp	x12, x13, [rp],#16
- 	adcs	x12, x8, x10
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x14, x14, #1
-+	cbnz	x14, L(top)
- 
- L(end):	extr	x9, x6, x5, #63
- 	mul	x10, x17, x17
diff --git a/deps/patches/gmp_alloc_overflow_func.patch b/deps/patches/gmp-alloc_overflow.patch
similarity index 54%
rename from deps/patches/gmp_alloc_overflow_func.patch
rename to deps/patches/gmp-alloc_overflow.patch
index 51506d70d46fb..6a0f02c66e3f4 100644
--- a/deps/patches/gmp_alloc_overflow_func.patch
+++ b/deps/patches/gmp-alloc_overflow.patch
@@ -1,7 +1,7 @@
-diff --git a/gmp-h.in b/gmp-h.in
---- a/gmp-h.in
-+++ b/gmp-h.in
-@@ -479,6 +479,13 @@ using std::FILE;
+diff -ru gmp-6.3.0/gmp-h.in gmp-6.3.0.new/gmp-h.in
+--- gmp-6.3.0/gmp-h.in	2023-07-29 09:42:16
++++ gmp-6.3.0.new/gmp-h.in	2023-12-29 15:33:34
+@@ -487,6 +487,12 @@
  				      void *(**) (void *, size_t, size_t),
  				      void (**) (void *, size_t)) __GMP_NOTHROW;
  
@@ -10,15 +10,23 @@ diff --git a/gmp-h.in b/gmp-h.in
 +
 +#define mp_get_alloc_overflow_function __gmp_get_alloc_overflow_function
 +__GMP_DECLSPEC void mp_get_alloc_overflow_function (void (**) (void)) __GMP_NOTHROW;
-+
 +
  #define mp_bits_per_limb __gmp_bits_per_limb
  __GMP_DECLSPEC extern const int mp_bits_per_limb;
  
-diff --git a/gmp-impl.h b/gmp-impl.h
---- a/gmp-impl.h
-+++ b/gmp-impl.h
-@@ -696,10 +696,12 @@ struct tmp_debug_entry_t {
+diff -ru gmp-6.3.0/gmp-impl.h gmp-6.3.0.new/gmp-impl.h
+--- gmp-6.3.0/gmp-impl.h	2023-07-29 09:42:16
++++ gmp-6.3.0.new/gmp-impl.h	2023-12-30 16:02:36
+@@ -58,6 +58,8 @@
+    short can be 24, 32, 46 or 64 bits, and different for ushort.  */
+ 
+ #include <limits.h>
++#include <stdio.h>
++#include <stdlib.h>
+ 
+ /* For fat.h and other fat binary stuff.
+    No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
+@@ -699,14 +701,22 @@
  __GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
  __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
  __GMP_DECLSPEC extern void   (*__gmp_free_func) (void *, size_t);
@@ -31,10 +39,7 @@ diff --git a/gmp-impl.h b/gmp-impl.h
  
  #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
    ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
-@@ -727,6 +729,12 @@ struct tmp_debug_entry_t {
- 	(ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));	\
-   } while (0)
- 
+ #define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
 +#define __GMP_ALLOC_OVERFLOW_FUNC()                              \
 +  do {                                                           \
 +    (*__gmp_alloc_overflow_func) ();                             \
@@ -42,12 +47,12 @@ diff --git a/gmp-impl.h b/gmp-impl.h
 +    abort ();                                                    \
 +  } while (0)
  
- /* Dummy for non-gcc, code involving it will go dead. */
- #if ! defined (__GNUC__) || __GNUC__ < 2
-diff --git a/memory.c b/memory.c
---- a/memory.c
-+++ b/memory.c
-@@ -38,6 +38,7 @@ see https://www.gnu.org/licenses/.  */
+ #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type)		\
+   ((type *) (*__gmp_reallocate_func)					\
+diff -ru gmp-6.3.0/memory.c gmp-6.3.0.new/memory.c
+--- gmp-6.3.0/memory.c	2023-07-29 09:42:16
++++ gmp-6.3.0.new/memory.c	2023-12-29 15:43:27
+@@ -37,6 +37,7 @@
  void * (*__gmp_allocate_func) (size_t) = __gmp_default_allocate;
  void * (*__gmp_reallocate_func) (void *, size_t, size_t) = __gmp_default_reallocate;
  void   (*__gmp_free_func) (void *, size_t) = __gmp_default_free;
@@ -55,21 +60,22 @@ diff --git a/memory.c b/memory.c
  
  
  /* Default allocation functions.  In case of failure to allocate/reallocate
-@@ -144,3 +145,10 @@ void
+@@ -142,4 +143,11 @@
+   }
  #endif
    free (blk_ptr);
- }
++}
 +
 +void
 +__gmp_default_alloc_overflow(void)
 +{
 +    fprintf (stderr, "gmp: overflow in mpz type\n");
 +    abort();
-+}
-diff --git a/mp_get_fns.c b/mp_get_fns.c
---- a/mp_get_fns.c
-+++ b/mp_get_fns.c
-@@ -46,3 +46,11 @@ mp_get_memory_functions (void *(**alloc_
+ }
+diff -ru gmp-6.3.0/mp_get_fns.c gmp-6.3.0.new/mp_get_fns.c
+--- gmp-6.3.0/mp_get_fns.c	2023-07-29 09:42:16
++++ gmp-6.3.0.new/mp_get_fns.c	2023-12-29 15:43:27
+@@ -45,3 +45,11 @@
    if (free_func != NULL)
      *free_func = __gmp_free_func;
  }
@@ -81,10 +87,10 @@ diff --git a/mp_get_fns.c b/mp_get_fns.c
 +  if (alloc_overflow_func != NULL)
 +    *alloc_overflow_func = __gmp_alloc_overflow_func;
 +}
-diff --git a/mp_set_fns.c b/mp_set_fns.c
---- a/mp_set_fns.c
-+++ b/mp_set_fns.c
-@@ -48,3 +48,12 @@ mp_set_memory_functions (void *(*alloc_f
+diff -ru gmp-6.3.0/mp_set_fns.c gmp-6.3.0.new/mp_set_fns.c
+--- gmp-6.3.0/mp_set_fns.c	2023-07-29 09:42:16
++++ gmp-6.3.0.new/mp_set_fns.c	2023-12-29 15:43:27
+@@ -47,3 +47,12 @@
    __gmp_reallocate_func = realloc_func;
    __gmp_free_func = free_func;
  }
@@ -97,58 +103,66 @@ diff --git a/mp_set_fns.c b/mp_set_fns.c
 +    alloc_overflow_func = __gmp_default_alloc_overflow;
 +  __gmp_alloc_overflow_func = alloc_overflow_func;
 +}
-diff --git a/mpz/init2.c b/mpz/init2.c
---- a/mpz/init2.c
-+++ b/mpz/init2.c
-@@ -45,8 +45,7 @@ mpz_init2 (mpz_ptr x, mp_bitcnt_t bits)
+diff -ru gmp-6.3.0/mpz/init2.c gmp-6.3.0.new/mpz/init2.c
+--- gmp-6.3.0/mpz/init2.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/mpz/init2.c	2023-12-30 12:22:34
+@@ -41,7 +41,7 @@
+   if (sizeof (unsigned long) > sizeof (int)) /* param vs _mp_size field */
      {
        if (UNLIKELY (new_alloc > INT_MAX))
- 	{
--	  fprintf (stderr, "gmp: overflow in mpz type\n");
--	  abort ();
-+	  __GMP_ALLOC_OVERFLOW_FUNC ();
- 	}
+-	MPZ_OVERFLOW;
++	__GMP_ALLOC_OVERFLOW_FUNC ();
      }
  
-diff --git a/mpz/realloc.c b/mpz/realloc.c
---- a/mpz/realloc.c
-+++ b/mpz/realloc.c
-@@ -45,16 +45,14 @@ void *
+   PTR(x) = __GMP_ALLOCATE_FUNC_LIMBS (new_alloc);
+diff -ru gmp-6.3.0/mpz/realloc.c gmp-6.3.0.new/mpz/realloc.c
+--- gmp-6.3.0/mpz/realloc.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/mpz/realloc.c	2023-12-30 12:22:47
+@@ -42,12 +42,12 @@
+   if (sizeof (mp_size_t) == sizeof (int))
      {
        if (UNLIKELY (new_alloc > ULONG_MAX / GMP_NUMB_BITS))
- 	{
--	  fprintf (stderr, "gmp: overflow in mpz type\n");
--	  abort ();
-+	  __GMP_ALLOC_OVERFLOW_FUNC ();
- 	}
+-	MPZ_OVERFLOW;
++	__GMP_ALLOC_OVERFLOW_FUNC ();
      }
    else
      {
        if (UNLIKELY (new_alloc > INT_MAX))
- 	{
--	  fprintf (stderr, "gmp: overflow in mpz type\n");
--	  abort ();
-+	  __GMP_ALLOC_OVERFLOW_FUNC ();
- 	}
+-	MPZ_OVERFLOW;
++	__GMP_ALLOC_OVERFLOW_FUNC ();
      }
  
-diff --git a/mpz/realloc2.c b/mpz/realloc2.c
---- a/mpz/realloc2.c
-+++ b/mpz/realloc2.c
-@@ -45,8 +45,7 @@ mpz_realloc2 (mpz_ptr m, mp_bitcnt_t bit
+   if (ALLOC (m) == 0)
+diff -ru gmp-6.3.0/mpz/realloc2.c gmp-6.3.0.new/mpz/realloc2.c
+--- gmp-6.3.0/mpz/realloc2.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/mpz/realloc2.c	2023-12-30 12:22:59
+@@ -42,7 +42,7 @@
+   if (sizeof (unsigned long) > sizeof (int)) /* param vs _mp_size field */
      {
        if (UNLIKELY (new_alloc > INT_MAX))
- 	{
--	  fprintf (stderr, "gmp: overflow in mpz type\n");
--	  abort ();
-+	  __GMP_ALLOC_OVERFLOW_FUNC ();
- 	}
+-	MPZ_OVERFLOW;
++	__GMP_ALLOC_OVERFLOW_FUNC ();
      }
  
-diff --git a/tests/mpz/t-pow.c b/tests/mpz/t-pow.c
---- a/tests/mpz/t-pow.c
-+++ b/tests/mpz/t-pow.c
-@@ -195,6 +195,34 @@ check_random (int reps)
+   if (ALLOC (m) == 0)
+diff -ru gmp-6.3.0/tal-reent.c gmp-6.3.0.new/tal-reent.c
+--- gmp-6.3.0/tal-reent.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/tal-reent.c	2023-12-30 12:19:40
+@@ -61,6 +61,10 @@
+ 
+   total_size = size + HSIZ;
+   p = __GMP_ALLOCATE_FUNC_TYPE (total_size, char);
++  if (!p)
++    {
++      __GMP_ALLOC_OVERFLOW_FUNC ();
++    }
+   P->size = total_size;
+   P->next = *markp;
+   *markp = P;
+diff -ru gmp-6.3.0/tests/mpz/t-pow.c gmp-6.3.0.new/tests/mpz/t-pow.c
+--- gmp-6.3.0/tests/mpz/t-pow.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/tests/mpz/t-pow.c	2023-12-30 15:57:58
+@@ -194,6 +194,33 @@
    mpz_clear (want);
  }
  
@@ -178,12 +192,11 @@ diff --git a/tests/mpz/t-pow.c b/tests/mpz/t-pow.c
 +  }
 +  mpz_clear (x);
 +}
-+
 +
  int
  main (int argc, char **argv)
  {
-@@ -212,6 +240,7 @@ main (int argc, char **argv)
+@@ -211,6 +238,7 @@
  
    check_various ();
    check_random (reps);
diff --git a/deps/patches/libssh2-fix-import-lib-name.patch b/deps/patches/libssh2-fix-import-lib-name.patch
deleted file mode 100644
index 15aafb58d2736..0000000000000
--- a/deps/patches/libssh2-fix-import-lib-name.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 3732420725efbf410df5863b91a09ca214ee18ba Mon Sep 17 00:00:00 2001
-From: "Y. Yang" <metab0t@users.noreply.github.com>
-Date: Thu, 16 Jun 2022 19:16:37 +0800
-Subject: [PATCH] Fix DLL import library name
-
-https://aur.archlinux.org/packages/mingw-w64-libssh2
-https://cmake.org/cmake/help/latest/prop_tgt/IMPORT_PREFIX.html
----
- src/CMakeLists.txt | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
-index cb8fee1..17ecefd 100644
---- a/src/CMakeLists.txt
-+++ b/src/CMakeLists.txt
-@@ -220,6 +220,7 @@ endif()
- add_library(libssh2 ${SOURCES})
- # we want it to be called libssh2 on all platforms
- set_target_properties(libssh2 PROPERTIES PREFIX "")
-+set_target_properties(libssh2 PROPERTIES IMPORT_PREFIX "")
- 
- target_compile_definitions(libssh2 PRIVATE ${PRIVATE_COMPILE_DEFINITIONS})
- target_include_directories(libssh2
--- 
-2.36.1
-
diff --git a/deps/patches/libssh2-userauth-check.patch b/deps/patches/libssh2-userauth-check.patch
deleted file mode 100644
index 1dc6108ebece7..0000000000000
--- a/deps/patches/libssh2-userauth-check.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-From 37ee0aa214655b63e7869d1d74ff1ec9f9818a5e Mon Sep 17 00:00:00 2001
-From: Daniel Stenberg <daniel@haxx.se>
-Date: Fri, 17 Dec 2021 17:46:29 +0100
-Subject: [PATCH] userauth: check for too large userauth_kybd_auth_name_len
- (#650)
-
-... before using it.
-
-Reported-by: MarcoPoloPie
-Fixes #649
----
- src/userauth.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/userauth.c b/src/userauth.c
-index 40ef915..caa5635 100644
---- a/src/userauth.c
-+++ b/src/userauth.c
-@@ -1769,6 +1769,11 @@ userauth_keyboard_interactive(LIBSSH2_SESSION * session,
-             if(session->userauth_kybd_data_len >= 5) {
-                 /* string    name (ISO-10646 UTF-8) */
-                 session->userauth_kybd_auth_name_len = _libssh2_ntohu32(s);
-+                if(session->userauth_kybd_auth_name_len >
-+                   session->userauth_kybd_data_len - 5)
-+                    return _libssh2_error(session,
-+                                          LIBSSH2_ERROR_OUT_OF_BOUNDARY,
-+                                          "Bad keyboard auth name");
-                 s += 4;
-             }
-             else {
diff --git a/deps/patches/libunwind-cfa-rsp.patch b/deps/patches/libunwind-cfa-rsp.patch
deleted file mode 100644
index 6b2080c10c2cf..0000000000000
--- a/deps/patches/libunwind-cfa-rsp.patch
+++ /dev/null
@@ -1,368 +0,0 @@
-From 8c8c78e2db09c5dc66ad0188a088b1664483a13f Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Sun, 29 Aug 2021 11:07:54 -0700
-Subject: [PATCH] x86_64: Stop aliasing RSP and CFA
-
-RSP and CFA are different concepts. RSP refers to the physical
-register, CFA is a virtual register that serves as the base
-address for various other saved registers. It is true that
-in many frames these are set to alias, however this is not
-a requirement. For example, a function that performs a stack
-switch would likely change the rsp in the middle of the function,
-but would keep the CFA at the original RSP such that saved registers
-may be appropriately recovered.
-
-We are seeing incorrect unwinds in the Julia runtime when running
-julia under rr. This is because injects code (with correct CFI)
-that performs just such a stack switch [1]. GDB manages to unwind
-this correctly, but libunwind incorrectly sets the rsp to the CFA
-address, causing a misunwind.
-
-Tested on x86_64, patches for other architectures are ported, but
-not tested.
-
-[1] https://github.com/rr-debugger/rr/blob/469c22059a4a1798d33a8a224457faf22b2c178c/src/preload/syscall_hook.S#L454
----
- include/dwarf.h                 |  3 +-
- include/libunwind_i.h           |  4 ++
- include/tdep-x86/dwarf-config.h |  2 -
- include/tdep-x86/libunwind_i.h  | 73 ++++++++++++---------------------
- src/dwarf/Gparser.c             | 15 +++++--
- src/x86/Gos-freebsd.c           |  1 +
- src/x86/Gregs.c                 |  2 +-
- src/x86/Gstep.c                 |  4 +-
- src/x86_64/Gos-freebsd.c        |  1 +
- src/x86_64/Gregs.c              |  2 +-
- src/x86_64/Gstep.c              |  2 +-
- 11 files changed, 52 insertions(+), 57 deletions(-)
-
-diff --git a/include/dwarf.h b/include/dwarf.h
-index 175c419bb..23ff4c4f6 100644
---- a/include/dwarf.h
-+++ b/include/dwarf.h
-@@ -231,6 +231,7 @@ typedef enum
-     DWARF_WHERE_REG,            /* register saved in another register */
-     DWARF_WHERE_EXPR,           /* register saved */
-     DWARF_WHERE_VAL_EXPR,       /* register has computed value */
-+    DWARF_WHERE_CFA,            /* register is set to the computed cfa value */
-   }
- dwarf_where_t;
- 
-@@ -313,7 +314,7 @@ typedef struct dwarf_cursor
-     void *as_arg;               /* argument to address-space callbacks */
-     unw_addr_space_t as;        /* reference to per-address-space info */
- 
--    unw_word_t cfa;     /* canonical frame address; aka frame-/stack-pointer */
-+    unw_word_t cfa;     /* canonical frame address; aka frame-pointer */
-     unw_word_t ip;              /* instruction pointer */
-     unw_word_t args_size;       /* size of arguments */
-     unw_word_t eh_args[UNW_TDEP_NUM_EH_REGS];
-diff --git a/include/libunwind_i.h b/include/libunwind_i.h
-index fea5c2607..6c7dda9a8 100644
---- a/include/libunwind_i.h
-+++ b/include/libunwind_i.h
-@@ -346,6 +346,10 @@ static inline void invalidate_edi (struct elf_dyn_info *edi)
- 
- #include "tdep/libunwind_i.h"
- 
-+#ifndef TDEP_DWARF_SP
-+#define TDEP_DWARF_SP UNW_TDEP_SP
-+#endif
-+
- #ifndef tdep_get_func_addr
- # define tdep_get_func_addr(as,addr,v)          (*(v) = addr, 0)
- #endif
-diff --git a/include/tdep-x86/dwarf-config.h b/include/tdep-x86/dwarf-config.h
-index f76f9c1c4..11398e4e6 100644
---- a/include/tdep-x86/dwarf-config.h
-+++ b/include/tdep-x86/dwarf-config.h
-@@ -43,9 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
- typedef struct dwarf_loc
-   {
-     unw_word_t val;
--#ifndef UNW_LOCAL_ONLY
-     unw_word_t type;            /* see X86_LOC_TYPE_* macros.  */
--#endif
-   }
- dwarf_loc_t;
- 
-diff --git a/include/tdep-x86/libunwind_i.h b/include/tdep-x86/libunwind_i.h
-index d4c5ccdb1..ad4edc2f5 100644
---- a/include/tdep-x86/libunwind_i.h
-+++ b/include/tdep-x86/libunwind_i.h
-@@ -84,15 +84,26 @@ dwarf_get_uc(const struct dwarf_cursor *cursor)
- }
- 
- #define DWARF_GET_LOC(l)        ((l).val)
-+# define DWARF_LOC_TYPE_MEM     (0 << 0)
-+# define DWARF_LOC_TYPE_FP      (1 << 0)
-+# define DWARF_LOC_TYPE_REG     (1 << 1)
-+# define DWARF_LOC_TYPE_VAL     (1 << 2)
- 
--#ifdef UNW_LOCAL_ONLY
-+# define DWARF_IS_REG_LOC(l)    (((l).type & DWARF_LOC_TYPE_REG) != 0)
-+# define DWARF_IS_FP_LOC(l)     (((l).type & DWARF_LOC_TYPE_FP) != 0)
-+# define DWARF_IS_MEM_LOC(l)    ((l).type == DWARF_LOC_TYPE_MEM)
-+# define DWARF_IS_VAL_LOC(l)    (((l).type & DWARF_LOC_TYPE_VAL) != 0)
-+
-+# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r), .type = (t) })
- # define DWARF_NULL_LOC         DWARF_LOC (0, 0)
--# define DWARF_IS_NULL_LOC(l)   (DWARF_GET_LOC (l) == 0)
--# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r) })
--# define DWARF_IS_REG_LOC(l)    0
-+# define DWARF_IS_NULL_LOC(l)                                           \
-+                ({ dwarf_loc_t _l = (l); _l.val == 0 && _l.type == 0; })
-+# define DWARF_VAL_LOC(c,v)     DWARF_LOC ((v), DWARF_LOC_TYPE_VAL)
-+# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), DWARF_LOC_TYPE_MEM)
-+
-+#ifdef UNW_LOCAL_ONLY
- # define DWARF_REG_LOC(c,r)     (DWARF_LOC((unw_word_t)                      \
-                                  tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
--# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), 0)
- # define DWARF_FPREG_LOC(c,r)   (DWARF_LOC((unw_word_t)                      \
-                                  tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
- 
-@@ -114,35 +125,8 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
-   return 0;
- }
- 
--static inline int
--dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
--{
--  if (!DWARF_GET_LOC (loc))
--    return -1;
--  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
--                                   0, c->as_arg);
--}
--
--static inline int
--dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
--{
--  if (!DWARF_GET_LOC (loc))
--    return -1;
--  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
--                                   1, c->as_arg);
--}
--
- #else /* !UNW_LOCAL_ONLY */
--# define DWARF_LOC_TYPE_FP      (1 << 0)
--# define DWARF_LOC_TYPE_REG     (1 << 1)
--# define DWARF_NULL_LOC         DWARF_LOC (0, 0)
--# define DWARF_IS_NULL_LOC(l)                                           \
--                ({ dwarf_loc_t _l = (l); _l.val == 0 && _l.type == 0; })
--# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r), .type = (t) })
--# define DWARF_IS_REG_LOC(l)    (((l).type & DWARF_LOC_TYPE_REG) != 0)
--# define DWARF_IS_FP_LOC(l)     (((l).type & DWARF_LOC_TYPE_FP) != 0)
- # define DWARF_REG_LOC(c,r)     DWARF_LOC((r), DWARF_LOC_TYPE_REG)
--# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), 0)
- # define DWARF_FPREG_LOC(c,r)   DWARF_LOC((r), (DWARF_LOC_TYPE_REG      \
-                                                 | DWARF_LOC_TYPE_FP))
- 
-@@ -192,38 +176,33 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
-                                    1, c->as_arg);
- }
- 
-+#endif /* !UNW_LOCAL_ONLY */
-+
- static inline int
- dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
- {
-   if (DWARF_IS_NULL_LOC (loc))
-     return -UNW_EBADREG;
- 
--  /* If a code-generator were to save a value of type unw_word_t in a
--     floating-point register, we would have to support this case.  I
--     suppose it could happen with MMX registers, but does it really
--     happen?  */
--  assert (!DWARF_IS_FP_LOC (loc));
--
-   if (DWARF_IS_REG_LOC (loc))
-     return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
-                                      0, c->as_arg);
--  else
-+  if (DWARF_IS_MEM_LOC (loc))
-     return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
-                                      0, c->as_arg);
-+  assert(DWARF_IS_VAL_LOC (loc));
-+  *val = DWARF_GET_LOC (loc);
-+  return 0;
- }
- 
- static inline int
- dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
- {
-+  assert(!DWARF_IS_VAL_LOC (loc));
-+
-   if (DWARF_IS_NULL_LOC (loc))
-     return -UNW_EBADREG;
- 
--  /* If a code-generator were to save a value of type unw_word_t in a
--     floating-point register, we would have to support this case.  I
--     suppose it could happen with MMX registers, but does it really
--     happen?  */
--  assert (!DWARF_IS_FP_LOC (loc));
--
-   if (DWARF_IS_REG_LOC (loc))
-     return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
-                                      1, c->as_arg);
-@@ -232,7 +211,9 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
-                                      1, c->as_arg);
- }
- 
--#endif /* !UNW_LOCAL_ONLY */
-+// For historical reasons, the DWARF numbering does not match the libunwind
-+// numbering, necessitating this override
-+#define TDEP_DWARF_SP 4
- 
- #define tdep_getcontext_trace           unw_getcontext
- #define tdep_init_done                  UNW_OBJ(init_done)
-diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
-index da170d4b3..70a62c505 100644
---- a/src/dwarf/Gparser.c
-+++ b/src/dwarf/Gparser.c
-@@ -508,6 +508,9 @@ setup_fde (struct dwarf_cursor *c, dwarf_state_record_t *sr)
-   for (i = 0; i < DWARF_NUM_PRESERVED_REGS + 2; ++i)
-     set_reg (sr, i, DWARF_WHERE_SAME, 0);
- 
-+  // SP defaults to CFA (but is overridable)
-+  set_reg (sr, TDEP_DWARF_SP, DWARF_WHERE_CFA, 0);
-+
-   struct dwarf_cie_info *dci = c->pi.unwind_info;
-   sr->rs_current.ret_addr_column  = dci->ret_addr_column;
-   unw_word_t addr = dci->cie_instr_start;
-@@ -792,14 +795,14 @@ apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
-       /* As a special-case, if the stack-pointer is the CFA and the
-          stack-pointer wasn't saved, popping the CFA implicitly pops
-          the stack-pointer as well.  */
--      if ((rs->reg.val[DWARF_CFA_REG_COLUMN] == UNW_TDEP_SP)
--          && (UNW_TDEP_SP < ARRAY_SIZE(rs->reg.val))
--          && (rs->reg.where[UNW_TDEP_SP] == DWARF_WHERE_SAME))
-+      if ((rs->reg.val[DWARF_CFA_REG_COLUMN] == TDEP_DWARF_SP)
-+          && (TDEP_DWARF_SP < ARRAY_SIZE(rs->reg.val))
-+          && (DWARF_IS_NULL_LOC(c->loc[TDEP_DWARF_SP])))
-           cfa = c->cfa;
-       else
-         {
-           regnum = dwarf_to_unw_regnum (rs->reg.val[DWARF_CFA_REG_COLUMN]);
--          if ((ret = unw_get_reg ((unw_cursor_t *) c, regnum, &cfa)) < 0)
-+          if ((ret = unw_get_reg (dwarf_to_cursor(c), regnum, &cfa)) < 0)
-             return ret;
-         }
-       cfa += rs->reg.val[DWARF_CFA_OFF_COLUMN];
-@@ -836,6 +839,10 @@ apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
-         case DWARF_WHERE_SAME:
-           break;
- 
-+        case DWARF_WHERE_CFA:
-+          new_loc[i] = DWARF_VAL_LOC (c, cfa);
-+          break;
-+
-         case DWARF_WHERE_CFAREL:
-           new_loc[i] = DWARF_MEM_LOC (c, cfa + rs->reg.val[i]);
-           break;
-diff --git a/src/x86/Gos-freebsd.c b/src/x86/Gos-freebsd.c
-index 7dd014046..1b251d027 100644
---- a/src/x86/Gos-freebsd.c
-+++ b/src/x86/Gos-freebsd.c
-@@ -138,6 +138,7 @@ x86_handle_signal_frame (unw_cursor_t *cursor)
-     c->dwarf.loc[ST0] = DWARF_NULL_LOC;
-   } else if (c->sigcontext_format == X86_SCF_FREEBSD_SYSCALL) {
-     c->dwarf.loc[EIP] = DWARF_LOC (c->dwarf.cfa, 0);
-+    c->dwarf.loc[ESP] = DWARF_VAL_LOC (c, c->dwarf.cfa + 4);
-     c->dwarf.loc[EAX] = DWARF_NULL_LOC;
-     c->dwarf.cfa += 4;
-     c->dwarf.use_prev_instr = 1;
-diff --git a/src/x86/Gregs.c b/src/x86/Gregs.c
-index 4a9592617..9446d6c62 100644
---- a/src/x86/Gregs.c
-+++ b/src/x86/Gregs.c
-@@ -53,7 +53,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-       break;
- 
-     case UNW_X86_CFA:
--    case UNW_X86_ESP:
-       if (write)
-         return -UNW_EREADONLYREG;
-       *valp = c->dwarf.cfa;
-@@ -81,6 +80,7 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-     case UNW_X86_ECX: loc = c->dwarf.loc[ECX]; break;
-     case UNW_X86_EBX: loc = c->dwarf.loc[EBX]; break;
- 
-+    case UNW_X86_ESP: loc = c->dwarf.loc[ESP]; break;
-     case UNW_X86_EBP: loc = c->dwarf.loc[EBP]; break;
-     case UNW_X86_ESI: loc = c->dwarf.loc[ESI]; break;
-     case UNW_X86_EDI: loc = c->dwarf.loc[EDI]; break;
-diff --git a/src/x86/Gstep.c b/src/x86/Gstep.c
-index 129b739a3..061dcbaaa 100644
---- a/src/x86/Gstep.c
-+++ b/src/x86/Gstep.c
-@@ -47,7 +47,7 @@ unw_step (unw_cursor_t *cursor)
-     {
-       /* DWARF failed, let's see if we can follow the frame-chain
-          or skip over the signal trampoline.  */
--      struct dwarf_loc ebp_loc, eip_loc;
-+      struct dwarf_loc ebp_loc, eip_loc, esp_loc;
- 
-       /* We could get here because of missing/bad unwind information.
-          Validate all addresses before dereferencing. */
-@@ -77,6 +77,7 @@ unw_step (unw_cursor_t *cursor)
-                  c->dwarf.cfa);
- 
-           ebp_loc = DWARF_LOC (c->dwarf.cfa, 0);
-+          esp_loc = DWARF_VAL_LOC (c, c->dwarf.cfa + 8);
-           eip_loc = DWARF_LOC (c->dwarf.cfa + 4, 0);
-           c->dwarf.cfa += 8;
- 
-@@ -87,6 +88,7 @@ unw_step (unw_cursor_t *cursor)
-             c->dwarf.loc[i] = DWARF_NULL_LOC;
- 
-           c->dwarf.loc[EBP] = ebp_loc;
-+          c->dwarf.loc[ESP] = esp_loc;
-           c->dwarf.loc[EIP] = eip_loc;
-           c->dwarf.use_prev_instr = 1;
-         }
-diff --git a/src/x86_64/Gos-freebsd.c b/src/x86_64/Gos-freebsd.c
-index 8f28d1d8c..0c5a17940 100644
---- a/src/x86_64/Gos-freebsd.c
-+++ b/src/x86_64/Gos-freebsd.c
-@@ -133,6 +133,7 @@ x86_64_handle_signal_frame (unw_cursor_t *cursor)
-     c->dwarf.loc[RCX] = c->dwarf.loc[R10];
-     /*  rsp_loc = DWARF_LOC(c->dwarf.cfa - 8, 0);       */
-     /*  rbp_loc = c->dwarf.loc[RBP];                    */
-+    c->dwarf.loc[RSP] = DWARF_VAL_LOC (c, c->dwarf.cfa + 8);
-     c->dwarf.loc[RIP] = DWARF_LOC (c->dwarf.cfa, 0);
-     ret = dwarf_get (&c->dwarf, c->dwarf.loc[RIP], &c->dwarf.ip);
-     Debug (1, "Frame Chain [RIP=0x%Lx] = 0x%Lx\n",
-diff --git a/src/x86_64/Gregs.c b/src/x86_64/Gregs.c
-index baf8a24f0..dff5bcbe7 100644
---- a/src/x86_64/Gregs.c
-+++ b/src/x86_64/Gregs.c
-@@ -79,7 +79,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-       break;
- 
-     case UNW_X86_64_CFA:
--    case UNW_X86_64_RSP:
-       if (write)
-         return -UNW_EREADONLYREG;
-       *valp = c->dwarf.cfa;
-@@ -107,6 +106,7 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-     case UNW_X86_64_RCX: loc = c->dwarf.loc[RCX]; break;
-     case UNW_X86_64_RBX: loc = c->dwarf.loc[RBX]; break;
- 
-+    case UNW_X86_64_RSP: loc = c->dwarf.loc[RSP]; break;
-     case UNW_X86_64_RBP: loc = c->dwarf.loc[RBP]; break;
-     case UNW_X86_64_RSI: loc = c->dwarf.loc[RSI]; break;
-     case UNW_X86_64_RDI: loc = c->dwarf.loc[RDI]; break;
-diff --git a/src/x86_64/Gstep.c b/src/x86_64/Gstep.c
-index 3c5c3830f..fdad298c7 100644
---- a/src/x86_64/Gstep.c
-+++ b/src/x86_64/Gstep.c
-@@ -223,7 +223,7 @@ unw_step (unw_cursor_t *cursor)
-                   Debug (2, "RIP fixup didn't work, falling back\n");
-                   unw_word_t rbp1 = 0;
-                   rbp_loc = DWARF_LOC(rbp, 0);
--                  rsp_loc = DWARF_NULL_LOC;
-+                  rsp_loc = DWARF_VAL_LOC(c, rbp + 16);
-                   rip_loc = DWARF_LOC (rbp + 8, 0);
-                   ret = dwarf_get (&c->dwarf, rbp_loc, &rbp1);
-                   Debug (1, "[RBP=0x%lx] = 0x%lx (cfa = 0x%lx) -> 0x%lx\n",
diff --git a/deps/patches/libunwind-configure-static-lzma.patch b/deps/patches/libunwind-configure-static-lzma.patch
new file mode 100644
index 0000000000000..16991d5cd2656
--- /dev/null
+++ b/deps/patches/libunwind-configure-static-lzma.patch
@@ -0,0 +1,20 @@
+--- configure.orig	2025-05-26 15:25:01
++++ configure	2025-05-26 15:25:41
+@@ -20878,7 +20878,7 @@
+   printf %s "(cached) " >&6
+ else $as_nop
+   ac_check_lib_save_LIBS=$LIBS
+-LIBS="-llzma  $LIBS"
++LIBS="-L${libdir} -l:liblzma.a  $LIBS"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ /* end confdefs.h.  */
+ 
+@@ -20908,7 +20908,7 @@
+ printf "%s\n" "$ac_cv_lib_lzma_lzma_mf_is_supported" >&6; }
+ if test "x$ac_cv_lib_lzma_lzma_mf_is_supported" = xyes
+ then :
+-  LIBLZMA=-llzma
++  LIBLZMA="-L${libdir} -l:liblzma.a"
+ 
+ printf "%s\n" "#define HAVE_LZMA 1" >>confdefs.h
+ 
diff --git a/deps/patches/libunwind-disable-initial-exec-tls.patch b/deps/patches/libunwind-disable-initial-exec-tls.patch
new file mode 100644
index 0000000000000..c6718ac2db98f
--- /dev/null
+++ b/deps/patches/libunwind-disable-initial-exec-tls.patch
@@ -0,0 +1,44 @@
+diff --git a/include/libunwind-common.h.in b/include/libunwind-common.h.in
+index 893fdd69..80ab9648 100644
+--- a/include/libunwind-common.h.in
++++ b/include/libunwind-common.h.in
+@@ -340,5 +340,6 @@ extern int unw_get_elf_filename_by_ip (unw_addr_space_t, unw_word_t, char *,
+ extern const char *unw_strerror (int);
+ extern int unw_backtrace (void **, int);
+ extern int unw_backtrace2 (void **, int, unw_context_t*, int);
++extern int unw_ensure_tls (void);
+ 
+ extern unw_addr_space_t unw_local_addr_space;
+diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
+index 7a5d7e1f..8453ffb0 100644
+--- a/src/dwarf/Gparser.c
++++ b/src/dwarf/Gparser.c
+@@ -623,7 +623,7 @@ get_rs_cache (unw_addr_space_t as, intrmask_t *saved_maskp)
+ #if defined(HAVE___CACHE_PER_THREAD) && HAVE___CACHE_PER_THREAD
+   if (likely (caching == UNW_CACHE_PER_THREAD))
+     {
+-      static _Thread_local struct dwarf_rs_cache tls_cache __attribute__((tls_model("initial-exec")));
++      static _Thread_local struct dwarf_rs_cache tls_cache;
+       Debug (16, "using TLS cache\n");
+       cache = &tls_cache;
+     }
+diff --git a/src/mi/init.c b/src/mi/init.c
+index e4431eeb..07cae852 100644
+--- a/src/mi/init.c
++++ b/src/mi/init.c
+@@ -82,3 +82,15 @@ mi_init (void)
+   unw_init_page_size();
+   assert(sizeof(struct cursor) <= sizeof(unw_cursor_t));
+ }
++
++int
++unw_ensure_tls (void)
++{
++#if defined(HAVE___CACHE_PER_THREAD) && HAVE___CACHE_PER_THREAD
++  static _Thread_local int alloc_trigger;
++  alloc_trigger = 1;
++  return alloc_trigger;
++#else
++  return 0;
++#endif
++}
diff --git a/deps/patches/libunwind-dwarf-table.patch b/deps/patches/libunwind-dwarf-table.patch
deleted file mode 100644
index 5905982f9a349..0000000000000
--- a/deps/patches/libunwind-dwarf-table.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From a5b5fd28ed03cb1ab524d24dc534c1fa167bf5a1 Mon Sep 17 00:00:00 2001
-From: Alex Arslan <ararslan@comcast.net>
-Date: Fri, 5 Nov 2021 16:58:41 -0700
-Subject: [PATCH] Fix table indexing in `dwarf_search_unwind_table`
-
-`table_len` is used as an index into `table`, assuming it represents the
-number of entries. However, it is defined as the number of entries
-multiplied by `sizeof(unw_word_t)`. This is accounted for in other
-places that use `table_len`, e.g. in `lookup`, which divides out the
-size of `unw_word_t`, but the indexing expression uses `table_len`
-directly. So when `table` has say 2 entries, we're actually looking at
-index 15 rather than 1 in the comparison. This can cause the conditional
-to erroneously evaluate to true, allowing the following line to
-segfault.
-
-This was observed with JIT compiled code from Julia with LLVM on
-FreeBSD.
-
-Co-Authored-By: Jameson Nash <vtjnash@gmail.com>
----
- src/dwarf/Gfind_proc_info-lsb.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/dwarf/Gfind_proc_info-lsb.c b/src/dwarf/Gfind_proc_info-lsb.c
-index 5e27a501..af4cbce8 100644
---- a/src/dwarf/Gfind_proc_info-lsb.c
-+++ b/src/dwarf/Gfind_proc_info-lsb.c
-@@ -866,7 +866,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
-   if (as == unw_local_addr_space)
-     {
-       e = lookup (table, table_len, ip - ip_base);
--      if (e && &e[1] < &table[table_len])
-+      if (e && &e[1] < &table[table_len / sizeof (unw_word_t)])
- 	last_ip = e[1].start_ip_offset + ip_base;
-       else
- 	last_ip = di->end_ip;
diff --git a/deps/patches/libunwind-non-empty-structs.patch b/deps/patches/libunwind-non-empty-structs.patch
deleted file mode 100644
index 0c04709a13184..0000000000000
--- a/deps/patches/libunwind-non-empty-structs.patch
+++ /dev/null
@@ -1,108 +0,0 @@
-From 1f35cd8f2bdcc1876af7352cc3e87bb7277e8162 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <mose@gnu.org>
-Date: Sat, 18 Jun 2022 10:35:36 +0100
-Subject: [PATCH 1/1] Make some structs non-empty
-
-Backport of <https://github.com/libunwind/libunwind/pull/332>.
----
- include/libunwind-aarch64.h | 6 ++++++
- include/libunwind-arm.h     | 6 ++++++
- include/libunwind-x86.h     | 6 ++++++
- 3 files changed, 18 insertions(+)
-
-diff --git a/include/libunwind-aarch64.h b/include/libunwind-aarch64.h
-index aeaef630..b7066c51 100644
---- a/include/libunwind-aarch64.h
-+++ b/include/libunwind-aarch64.h
-@@ -35,6 +35,10 @@ extern "C" {
- #include <stddef.h>
- #include <ucontext.h>
- 
-+#ifndef UNW_EMPTY_STRUCT
-+#  define UNW_EMPTY_STRUCT uint8_t unused;
-+#endif
-+
- #define UNW_TARGET      aarch64
- #define UNW_TARGET_AARCH64      1
- 
-@@ -60,6 +64,7 @@ typedef long double unw_tdep_fpreg_t;
- typedef struct
-   {
-     /* no aarch64-specific auxiliary proc-info */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_proc_info_t;
- 
-@@ -169,6 +174,7 @@ aarch64_regnum_t;
- typedef struct unw_tdep_save_loc
-   {
-     /* Additional target-dependent info on a save location.  */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_save_loc_t;
- 
-diff --git a/include/libunwind-arm.h b/include/libunwind-arm.h
-index 6709b7ab..7c7005d1 100644
---- a/include/libunwind-arm.h
-+++ b/include/libunwind-arm.h
-@@ -32,6 +32,10 @@ extern "C" {
- #include <inttypes.h>
- #include <stddef.h>
- 
-+#ifndef UNW_EMPTY_STRUCT
-+#  define UNW_EMPTY_STRUCT uint8_t unused;
-+#endif
-+
- #define UNW_TARGET      arm
- #define UNW_TARGET_ARM  1
- 
-@@ -247,6 +251,7 @@ arm_regnum_t;
- typedef struct unw_tdep_save_loc
-   {
-     /* Additional target-dependent info on a save location.  */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_save_loc_t;
- 
-@@ -288,6 +293,7 @@ unw_tdep_context_t;
- typedef struct
-   {
-     /* no arm-specific auxiliary proc-info */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_proc_info_t;
- 
-diff --git a/include/libunwind-x86.h b/include/libunwind-x86.h
-index 40fe0464..d3b741d3 100644
---- a/include/libunwind-x86.h
-+++ b/include/libunwind-x86.h
-@@ -34,6 +34,10 @@ extern "C" {
- #include <inttypes.h>
- #include <ucontext.h>
- 
-+#ifndef UNW_EMPTY_STRUCT
-+#  define UNW_EMPTY_STRUCT uint8_t unused;
-+#endif
-+
- #define UNW_TARGET      x86
- #define UNW_TARGET_X86  1
- 
-@@ -158,6 +162,7 @@ x86_regnum_t;
- typedef struct unw_tdep_save_loc
-   {
-     /* Additional target-dependent info on a save location.  */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_save_loc_t;
- 
-@@ -169,6 +174,7 @@ typedef ucontext_t unw_tdep_context_t;
- typedef struct
-   {
-     /* no x86-specific auxiliary proc-info */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_proc_info_t;
- 
--- 
-2.36.1
-
diff --git a/deps/patches/libunwind-prefer-extbl.patch b/deps/patches/libunwind-prefer-extbl.patch
deleted file mode 100644
index 07b172604d623..0000000000000
--- a/deps/patches/libunwind-prefer-extbl.patch
+++ /dev/null
@@ -1,194 +0,0 @@
-From 2d6a50435bb743be1e4d88eee002372344348349 Mon Sep 17 00:00:00 2001
-From: Yichao Yu <yyc1992@gmail.com>
-Date: Sun, 29 Aug 2021 13:43:01 -0700
-Subject: [PATCH] Prefer EXTBL unwinding on ARM
-
-It is part of the C++ ABI so a EXTBL unwind info that's not `CANT_UNWIND`
-should always be reliable/correct.
-Ignore `ESTOPUNWIND` so that a `CANT_UNWIND` info can fallback to unwinding
-using the debug info instead.
----
- include/tdep-arm/libunwind_i.h |  4 +++
- src/arm/Gex_tables.c           | 18 ++++++++---
- src/arm/Gstep.c                | 55 ++++++++++++++++++++--------------
- 3 files changed, 51 insertions(+), 26 deletions(-)
-
-diff --git a/include/tdep-arm/libunwind_i.h b/include/tdep-arm/libunwind_i.h
-index 88ebfb069..5bd28c953 100644
---- a/include/tdep-arm/libunwind_i.h
-+++ b/include/tdep-arm/libunwind_i.h
-@@ -256,6 +256,7 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
- #define tdep_init_done                  UNW_OBJ(init_done)
- #define tdep_init                       UNW_OBJ(init)
- #define arm_find_proc_info              UNW_OBJ(find_proc_info)
-+#define arm_find_proc_info2             UNW_OBJ(find_proc_info2)
- #define arm_put_unwind_info             UNW_OBJ(put_unwind_info)
- /* Platforms that support UNW_INFO_FORMAT_TABLE need to define
-    tdep_search_unwind_table.  */
-@@ -297,6 +298,9 @@ extern void tdep_init (void);
- extern int arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
-                                unw_proc_info_t *pi, int need_unwind_info,
-                                void *arg);
-+extern int arm_find_proc_info2 (unw_addr_space_t as, unw_word_t ip,
-+                                unw_proc_info_t *pi, int need_unwind_info,
-+                                void *arg, int methods);
- extern void arm_put_unwind_info (unw_addr_space_t as,
-                                   unw_proc_info_t *pi, void *arg);
- extern int tdep_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
-diff --git a/src/arm/Gex_tables.c b/src/arm/Gex_tables.c
-index efdcf2978..083d2b2f7 100644
---- a/src/arm/Gex_tables.c
-+++ b/src/arm/Gex_tables.c
-@@ -506,18 +506,20 @@ arm_phdr_cb (struct dl_phdr_info *info, size_t size, void *data)
- }
- 
- HIDDEN int
--arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
--                    unw_proc_info_t *pi, int need_unwind_info, void *arg)
-+arm_find_proc_info2 (unw_addr_space_t as, unw_word_t ip,
-+                     unw_proc_info_t *pi, int need_unwind_info, void *arg,
-+                     int methods)
- {
-   int ret = -1;
-   intrmask_t saved_mask;
- 
-   Debug (14, "looking for IP=0x%lx\n", (long) ip);
- 
--  if (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF))
-+  if (UNW_TRY_METHOD (UNW_ARM_METHOD_DWARF) && (methods & UNW_ARM_METHOD_DWARF))
-     ret = dwarf_find_proc_info (as, ip, pi, need_unwind_info, arg);
- 
--  if (ret < 0 && UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
-+  if (ret < 0 && UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX) &&
-+      (methods & UNW_ARM_METHOD_EXIDX))
-     {
-       struct arm_cb_data cb_data;
- 
-@@ -540,6 +542,14 @@ arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
-   return ret;
- }
- 
-+HIDDEN int
-+arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
-+                    unw_proc_info_t *pi, int need_unwind_info, void *arg)
-+{
-+    return arm_find_proc_info2 (as, ip, pi, need_unwind_info, arg,
-+                                UNW_ARM_METHOD_ALL);
-+}
-+
- HIDDEN void
- arm_put_unwind_info (unw_addr_space_t as, unw_proc_info_t *proc_info, void *arg)
- {
-diff --git a/src/arm/Gstep.c b/src/arm/Gstep.c
-index 895e8a892..e4ada651b 100644
---- a/src/arm/Gstep.c
-+++ b/src/arm/Gstep.c
-@@ -54,17 +54,22 @@ arm_exidx_step (struct cursor *c)
-                                      c->dwarf.as_arg);
-   if (ret == -UNW_ENOINFO)
-     {
-+#ifdef UNW_LOCAL_ONLY
-+      if ((ret = arm_find_proc_info2 (c->dwarf.as, ip, &c->dwarf.pi,
-+                                      1, c->dwarf.as_arg,
-+                                      UNW_ARM_METHOD_EXIDX)) < 0)
-+        return ret;
-+#else
-       if ((ret = tdep_find_proc_info (&c->dwarf, ip, 1)) < 0)
-         return ret;
-+#endif
-     }
- 
-   if (c->dwarf.pi.format != UNW_INFO_FORMAT_ARM_EXIDX)
-     return -UNW_ENOINFO;
- 
-   ret = arm_exidx_extract (&c->dwarf, buf);
--  if (ret == -UNW_ESTOPUNWIND)
--    return 0;
--  else if (ret < 0)
-+  if (ret < 0)
-     return ret;
- 
-   ret = arm_exidx_decode (buf, ret, &c->dwarf);
-@@ -88,6 +93,7 @@ unw_step (unw_cursor_t *cursor)
- {
-   struct cursor *c = (struct cursor *) cursor;
-   int ret = -UNW_EUNSPEC;
-+  int has_stopunwind = 0;
- 
-   Debug (1, "(cursor=%p)\n", c);
- 
-@@ -95,17 +101,31 @@ unw_step (unw_cursor_t *cursor)
-   if (unw_is_signal_frame (cursor) > 0)
-      return arm_handle_signal_frame (cursor);
- 
-+  /* First, try extbl-based unwinding. */
-+  if (UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
-+    {
-+      ret = arm_exidx_step (c);
-+      Debug(1, "arm_exidx_step()=%d\n", ret);
-+      if (ret > 0)
-+        return 1;
-+      if (ret == 0)
-+        return ret;
-+      if (ret == -UNW_ESTOPUNWIND)
-+        has_stopunwind = 1;
-+    }
-+
- #ifdef CONFIG_DEBUG_FRAME
--  /* First, try DWARF-based unwinding. */
-+  /* Second, try DWARF-based unwinding. */
-   if (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF))
-     {
-+      Debug (13, "%s(ret=%d), trying extbl\n",
-+             UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) ? "arm_exidx_step() failed " : "",
-+             ret);
-       ret = dwarf_step (&c->dwarf);
-       Debug(1, "dwarf_step()=%d\n", ret);
- 
-       if (likely (ret > 0))
-         return 1;
--      else if (unlikely (ret == -UNW_ESTOPUNWIND))
--        return ret;
- 
-       if (ret < 0 && ret != -UNW_ENOINFO)
-         {
-@@ -115,18 +135,9 @@ unw_step (unw_cursor_t *cursor)
-     }
- #endif /* CONFIG_DEBUG_FRAME */
- 
--  /* Next, try extbl-based unwinding. */
--  if (UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
--    {
--      Debug (13, "%s(ret=%d), trying extbl\n",
--             UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() failed " : "",
--             ret);
--      ret = arm_exidx_step (c);
--      if (ret > 0)
--        return 1;
--      if (ret == -UNW_ESTOPUNWIND || ret == 0)
--        return ret;
--    }
-+  // Before trying the fallback, if any unwind info tell us to stop, do that.
-+  if (has_stopunwind)
-+    return -UNW_ESTOPUNWIND;
- 
-   /* Fall back on APCS frame parsing.
-      Note: This won't work in case the ARM EABI is used. */
-@@ -139,13 +150,13 @@ unw_step (unw_cursor_t *cursor)
-       if (UNW_TRY_METHOD(UNW_ARM_METHOD_FRAME))
-         {
-           Debug (13, "%s%s%s%s(ret=%d), trying frame-chain\n",
--                 UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() " : "",
--                 (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) && UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX)) ? "and " : "",
-                  UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) ? "arm_exidx_step() " : "",
--                 (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) || UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX)) ? "failed " : "",
-+                 (UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) && UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF)) ? "and " : "",
-+                 UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() " : "",
-+                 (UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) || UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF)) ? "failed " : "",
-                  ret);
-           ret = UNW_ESUCCESS;
--          /* DWARF unwinding failed, try to follow APCS/optimized APCS frame chain */
-+          /* EXIDX and/or DWARF unwinding failed, try to follow APCS/optimized APCS frame chain */
-           unw_word_t instr, i;
-           dwarf_loc_t ip_loc, fp_loc;
-           unw_word_t frame;
diff --git a/deps/patches/libunwind-revert_prelink_unwind.patch b/deps/patches/libunwind-revert_prelink_unwind.patch
new file mode 100644
index 0000000000000..80de3c9ce4571
--- /dev/null
+++ b/deps/patches/libunwind-revert_prelink_unwind.patch
@@ -0,0 +1,187 @@
+From 3af39d34f576890e7f1f3e97cc1cb45b4b76aa47 Mon Sep 17 00:00:00 2001
+From: Tim Besard <tim.besard@gmail.com>
+Date: Tue, 16 Jan 2024 09:49:21 -0800
+Subject: [PATCH] Revert "Fix unwinding of pre-linked libraries"
+
+This reverts commit a4014f33775321b4106a1134b89020a7774902dd,
+which regresses unwinding on FreeBSD (JuliaLang/julia#51467).
+---
+ include/dwarf.h                 |  2 --
+ include/libunwind-dynamic.h     |  1 -
+ src/dwarf/Gfind_proc_info-lsb.c | 42 +++++++--------------------------
+ src/dwarf/Gfind_unwind_table.c  |  1 -
+ 4 files changed, 8 insertions(+), 38 deletions(-)
+
+diff --git a/include/dwarf.h b/include/dwarf.h
+index 4fd1dba0..3fc6bce2 100644
+--- a/include/dwarf.h
++++ b/include/dwarf.h
+@@ -371,8 +371,6 @@ struct unw_debug_frame_list
+     /* The start (inclusive) and end (exclusive) of the described region.  */
+     unw_word_t start;
+     unw_word_t end;
+-    /* ELF load offset */
+-    unw_word_t load_offset;
+     /* The debug frame itself.  */
+     char *debug_frame;
+     size_t debug_frame_size;
+diff --git a/include/libunwind-dynamic.h b/include/libunwind-dynamic.h
+index a26f2c99..c902ccd9 100644
+--- a/include/libunwind-dynamic.h
++++ b/include/libunwind-dynamic.h
+@@ -141,7 +141,6 @@ typedef struct unw_dyn_info
+     unw_word_t gp;              /* global-pointer in effect for this entry */
+     int32_t format;             /* real type: unw_dyn_info_format_t */
+     int32_t pad;
+-    unw_word_t load_offset;     /* ELF load offset */
+     union
+       {
+         unw_dyn_proc_info_t pi;
+diff --git a/src/dwarf/Gfind_proc_info-lsb.c b/src/dwarf/Gfind_proc_info-lsb.c
+index c11345e8..c701ccfb 100644
+--- a/src/dwarf/Gfind_proc_info-lsb.c
++++ b/src/dwarf/Gfind_proc_info-lsb.c
+@@ -108,17 +108,13 @@ linear_search (unw_addr_space_t as, unw_word_t ip,
+ 
+ static int
+ load_debug_frame (const char *file, char **buf, size_t *bufsize, int is_local,
+-                  unw_word_t segbase, unw_word_t *load_offset)
++                  unw_word_t segbase)
+ {
+   struct elf_image ei;
+-  Elf_W (Ehdr) *ehdr;
+-  Elf_W (Phdr) *phdr;
+   Elf_W (Shdr) *shdr;
+-  int i;
+   int ret;
+ 
+   ei.image = NULL;
+-  *load_offset = 0;
+ 
+   ret = elf_w (load_debuginfo) (file, &ei, is_local);
+   if (ret != 0)
+@@ -193,20 +189,6 @@ load_debug_frame (const char *file, char **buf, size_t *bufsize, int is_local,
+ #if defined(SHF_COMPRESSED)
+     }
+ #endif
+-
+-  ehdr = ei.image;
+-  phdr = (Elf_W (Phdr) *) ((char *) ei.image + ehdr->e_phoff);
+-
+-  for (i = 0; i < ehdr->e_phnum; ++i)
+-    if (phdr[i].p_type == PT_LOAD)
+-      {
+-        *load_offset = segbase - phdr[i].p_vaddr;
+-
+-        Debug (4, "%s load offset is 0x%zx\n", file, *load_offset);
+-
+-        break;
+-      }
+-
+   mi_munmap(ei.image, ei.size);
+   return 0;
+ }
+@@ -259,7 +241,6 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
+   int err;
+   char *buf;
+   size_t bufsize;
+-  unw_word_t load_offset;
+ 
+   /* First, see if we loaded this frame already.  */
+ 
+@@ -287,7 +268,7 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
+     name = (char*) dlname;
+ 
+   err = load_debug_frame (name, &buf, &bufsize, as == unw_local_addr_space,
+-                          segbase, &load_offset);
++                          segbase);
+ 
+   if (!err)
+     {
+@@ -300,7 +281,6 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
+ 
+       fdesc->start = start;
+       fdesc->end = end;
+-      fdesc->load_offset = load_offset;
+       fdesc->debug_frame = buf;
+       fdesc->debug_frame_size = bufsize;
+       fdesc->index = NULL;
+@@ -497,7 +477,6 @@ dwarf_find_debug_frame (int found, unw_dyn_info_t *di_debug, unw_word_t ip,
+   di->format = UNW_INFO_FORMAT_TABLE;
+   di->start_ip = fdesc->start;
+   di->end_ip = fdesc->end;
+-  di->load_offset = fdesc->load_offset;
+   di->u.ti.name_ptr = (unw_word_t) (uintptr_t) obj_name;
+   di->u.ti.table_data = (unw_word_t *) fdesc;
+   di->u.ti.table_len = sizeof (*fdesc) / sizeof (unw_word_t);
+@@ -960,14 +939,12 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+     ip_base = segbase;
+   }
+ 
+-  Debug (6, "lookup IP 0x%lx\n", (long) (ip - ip_base - di->load_offset));
+-
+ #ifndef UNW_REMOTE_ONLY
+   if (as == unw_local_addr_space)
+     {
+-      e = lookup (table, table_len, ip - ip_base - di->load_offset);
++      e = lookup (table, table_len, ip - ip_base);
+       if (e && &e[1] < &table[table_len / sizeof (struct table_entry)])
+-	last_ip = e[1].start_ip_offset + ip_base + di->load_offset;
++	last_ip = e[1].start_ip_offset + ip_base;
+       else
+ 	last_ip = di->end_ip;
+     }
+@@ -975,7 +952,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+ #endif
+     {
+ #ifndef UNW_LOCAL_ONLY
+-      int32_t last_ip_offset = di->end_ip - ip_base - di->load_offset;
++      int32_t last_ip_offset = di->end_ip - ip_base;
+       segbase = di->u.rti.segbase;
+       if ((ret = remote_lookup (as, (uintptr_t) table, table_len,
+                                 ip - ip_base, &ent, &last_ip_offset, arg)) < 0)
+@@ -983,7 +960,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+       if (ret)
+ 	{
+ 	  e = &ent;
+-	  last_ip = last_ip_offset + ip_base + di->load_offset;
++	  last_ip = last_ip_offset + ip_base;
+ 	}
+       else
+         e = NULL;       /* no info found */
+@@ -997,8 +974,8 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+          unwind info.  */
+       return -UNW_ENOINFO;
+     }
+-  Debug (15, "ip=0x%lx, load_offset=0x%lx, start_ip=0x%lx\n",
+-         (long) ip, (long) di->load_offset, (long) (e->start_ip_offset));
++  Debug (15, "ip=0x%lx, start_ip=0x%lx\n",
++         (long) ip, (long) (e->start_ip_offset));
+   if (debug_frame_base)
+     fde_addr = e->fde_offset + debug_frame_base;
+   else
+@@ -1022,9 +999,6 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+       pi->flags = UNW_PI_FLAG_DEBUG_FRAME;
+     }
+ 
+-  pi->start_ip += di->load_offset;
+-  pi->end_ip += di->load_offset;
+-
+ #if defined(NEED_LAST_IP)
+   pi->last_ip = last_ip;
+ #else
+diff --git a/src/dwarf/Gfind_unwind_table.c b/src/dwarf/Gfind_unwind_table.c
+index a7c4dfd3..2b503ea9 100644
+--- a/src/dwarf/Gfind_unwind_table.c
++++ b/src/dwarf/Gfind_unwind_table.c
+@@ -197,7 +197,6 @@ dwarf_find_unwind_table (struct elf_dyn_info *edi,
+ 
+       edi->di_cache.start_ip = start_ip;
+       edi->di_cache.end_ip = end_ip;
+-      edi->di_cache.load_offset = 0;
+       edi->di_cache.format = UNW_INFO_FORMAT_REMOTE_TABLE;
+       edi->di_cache.u.rti.name_ptr = 0;
+       /* two 32-bit values (ip_offset/fde_offset) per table-entry: */
+-- 
+2.43.0
+
diff --git a/deps/patches/libunwind-static-arm.patch b/deps/patches/libunwind-static-arm.patch
deleted file mode 100644
index 92544a003b8b9..0000000000000
--- a/deps/patches/libunwind-static-arm.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/src/arm/Gex_tables.c b/src/arm/Gex_tables.c
-index d6573a65..1d64803e 100644
---- a/src/arm/Gex_tables.c
-+++ b/src/arm/Gex_tables.c
-@@ -381,7 +381,7 @@ arm_exidx_extract (struct dwarf_cursor *c, uint8_t *buf)
-   return nbuf;
- }
- 
--int
-+static int
- arm_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
- 			 unw_dyn_info_t *di, unw_proc_info_t *pi,
- 			 int need_unwind_info, void *arg)
diff --git a/deps/patches/llvm-libunwind-force-dwarf.patch b/deps/patches/llvm-libunwind-force-dwarf.patch
index 2f4d31acb8a4a..494c5e77e187b 100644
--- a/deps/patches/llvm-libunwind-force-dwarf.patch
+++ b/deps/patches/llvm-libunwind-force-dwarf.patch
@@ -6,22 +6,23 @@ Date:   Tue Aug 27 15:01:22 2013 -0400
     Add option to step with DWARF
 
 ---
-diff -pur a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h
---- a/libunwind/include/libunwind.h	2021-06-28 18:23:38.000000000 +0200
-+++ b/libunwind/include/libunwind.h	2022-05-04 18:44:24.000000000 +0200
+diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h
+index b2dae8f..fc37afb 100644
+--- a/libunwind/include/libunwind.h
++++ b/libunwind/include/libunwind.h
 @@ -108,6 +108,7 @@ extern "C" {
- 
+
  extern int unw_getcontext(unw_context_t *) LIBUNWIND_AVAIL;
  extern int unw_init_local(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL;
 +extern int unw_init_local_dwarf(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL;
  extern int unw_step(unw_cursor_t *) LIBUNWIND_AVAIL;
  extern int unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *) LIBUNWIND_AVAIL;
  extern int unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *) LIBUNWIND_AVAIL;
-Only in b/libunwind/include: libunwind.h.orig
-diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
---- a/libunwind/src/UnwindCursor.hpp	2021-06-28 18:23:38.000000000 +0200
-+++ b/libunwind/src/UnwindCursor.hpp	2022-05-04 18:45:11.000000000 +0200
-@@ -437,6 +437,9 @@ public:
+diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
+index 7753936..26ca486 100644
+--- a/libunwind/src/UnwindCursor.hpp
++++ b/libunwind/src/UnwindCursor.hpp
+@@ -453,6 +453,9 @@ public:
    virtual bool isSignalFrame() {
      _LIBUNWIND_ABORT("isSignalFrame not implemented");
    }
@@ -31,7 +32,7 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
    virtual bool getFunctionName(char *, size_t, unw_word_t *) {
      _LIBUNWIND_ABORT("getFunctionName not implemented");
    }
-@@ -894,6 +897,7 @@ public:
+@@ -944,6 +947,7 @@ public:
    virtual void        getInfo(unw_proc_info_t *);
    virtual void        jumpto();
    virtual bool        isSignalFrame();
@@ -39,24 +40,23 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
    virtual bool        getFunctionName(char *buf, size_t len, unw_word_t *off);
    virtual void        setInfoBasedOnIPRegister(bool isReturnAddress = false);
    virtual const char *getRegisterName(int num);
-@@ -963,7 +967,7 @@ private:
+@@ -1031,7 +1035,7 @@ private:
                                              const UnwindInfoSections &sects);
-   int stepWithCompactEncoding() {
-   #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+   int stepWithCompactEncoding(bool stage2 = false) {
+ #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
 -    if ( compactSaysUseDwarf() )
 +    if ( _forceDwarf || compactSaysUseDwarf() )
-       return stepWithDwarfFDE();
-   #endif
+       return stepWithDwarfFDE(stage2);
+ #endif
      R dummy;
-@@ -1198,6 +1202,7 @@ private:
-   unw_proc_info_t  _info;
-   bool             _unwindInfoMissing;
-   bool             _isSignalFrame;
-+  bool             _forceDwarf;
- #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64)
+@@ -1317,13 +1321,14 @@ private:
+ #if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN)
    bool             _isSigReturn = false;
  #endif
-@@ -1207,7 +1212,7 @@ private:
++  bool             _forceDwarf;
+ };
+
+
  template <typename A, typename R>
  UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
      : _addressSpace(as), _registers(context), _unwindInfoMissing(false),
@@ -65,8 +65,8 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
    static_assert((check_fit<UnwindCursor<A, R>, unw_cursor_t>::does_fit),
                  "UnwindCursor<> does not fit in unw_cursor_t");
    static_assert((alignof(UnwindCursor<A, R>) <= alignof(unw_cursor_t)),
-@@ -1217,7 +1222,8 @@ UnwindCursor<A, R>::UnwindCursor(unw_con
- 
+@@ -1333,7 +1338,8 @@ UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
+
  template <typename A, typename R>
  UnwindCursor<A, R>::UnwindCursor(A &as, void *)
 -    : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false) {
@@ -75,18 +75,18 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
    memset(&_info, 0, sizeof(_info));
    // FIXME
    // fill in _registers from thread arg
-@@ -1273,6 +1279,10 @@ template <typename A, typename R> bool U
+@@ -1396,6 +1402,10 @@ template <typename A, typename R> bool UnwindCursor<A, R>::isSignalFrame() {
    return _isSignalFrame;
  }
- 
+
 +template <typename A, typename R> void UnwindCursor<A, R>::setForceDWARF(bool force) {
 +  _forceDwarf = force;
 +}
 +
  #endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
- 
+
  #if defined(_LIBUNWIND_ARM_EHABI)
-@@ -1941,7 +1951,13 @@ void UnwindCursor<A, R>::setInfoBasedOnI
+@@ -2611,7 +2621,12 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
          // record that we have no unwind info.
          if (_info.format == 0)
            _unwindInfoMissing = true;
@@ -96,14 +96,14 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
 +  #else
          return;
 +  #endif
-+
        }
      }
  #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-diff -pur a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
---- a/libunwind/src/libunwind.cpp	2021-06-28 18:23:38.000000000 +0200
-+++ b/libunwind/src/libunwind.cpp	2022-05-04 18:44:24.000000000 +0200
-@@ -71,6 +71,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(u
+diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
+index 217dde9..8e9a77a 100644
+--- a/libunwind/src/libunwind.cpp
++++ b/libunwind/src/libunwind.cpp
+@@ -86,6 +86,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
    new (reinterpret_cast<UnwindCursor<LocalAddressSpace, REGISTER_KIND> *>(cursor))
        UnwindCursor<LocalAddressSpace, REGISTER_KIND>(
            context, LocalAddressSpace::sThisAddressSpace);
@@ -111,10 +111,10 @@ diff -pur a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
  #undef REGISTER_KIND
    AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
    co->setInfoBasedOnIPRegister();
-@@ -79,6 +80,54 @@ _LIBUNWIND_HIDDEN int __unw_init_local(u
+@@ -109,6 +110,54 @@ _LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
  }
- _LIBUNWIND_WEAK_ALIAS(__unw_init_local, unw_init_local)
- 
+ _LIBUNWIND_WEAK_ALIAS(__unw_get_reg, unw_get_reg)
+
 +_LIBUNWIND_HIDDEN int __unw_init_local_dwarf(unw_cursor_t *cursor,
 +                                       unw_context_t *context) {
 +  _LIBUNWIND_TRACE_API("__unw_init_local_dwarf(cursor=%p, context=%p)",
@@ -163,14 +163,15 @@ diff -pur a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
 +}
 +_LIBUNWIND_WEAK_ALIAS(__unw_init_local_dwarf, unw_init_local_dwarf)
 +
- /// Get value of specified register at cursor position in stack frame.
- _LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
-                                     unw_word_t *value) {
-diff -pur a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h
---- a/libunwind/src/libunwind_ext.h	2021-06-28 18:23:38.000000000 +0200
-+++ b/libunwind/src/libunwind_ext.h	2022-05-04 18:44:24.000000000 +0200
+ /// Set value of specified register at cursor position in stack frame.
+ _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
+                                     unw_word_t value) {
+diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h
+index 28db43a..c4f9767 100644
+--- a/libunwind/src/libunwind_ext.h
++++ b/libunwind/src/libunwind_ext.h
 @@ -25,6 +25,7 @@ extern "C" {
- 
+
  extern int __unw_getcontext(unw_context_t *);
  extern int __unw_init_local(unw_cursor_t *, unw_context_t *);
 +extern int __unw_init_local_dwarf(unw_cursor_t *, unw_context_t *);
diff --git a/deps/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch b/deps/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
index afb4b941d5b92..0e517d8ec7aa8 100644
--- a/deps/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
+++ b/deps/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
@@ -16,7 +16,7 @@ single FDE. I suspect this was just an Apple bug, compensated by Apple-
 specific code in LLVM.
 
 See lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp and
-http://lists.llvm.org/pipermail/llvm-dev/2013-April/061737.html
+https://lists.llvm.org/pipermail/llvm-dev/2013-April/061737.html
 for more detail.
 
 This change is based on the LLVM RTDyldMemoryManager.cpp. It should
diff --git a/deps/patches/llvm-libunwind-prologue-epilogue.patch b/deps/patches/llvm-libunwind-prologue-epilogue.patch
index 7dadca728f9cf..b2618998905e4 100644
--- a/deps/patches/llvm-libunwind-prologue-epilogue.patch
+++ b/deps/patches/llvm-libunwind-prologue-epilogue.patch
@@ -14,7 +14,7 @@ index 1c3175dff50a..78a658ccbc27 100644
 @@ -310,6 +310,50 @@ int CompactUnwinder_x86_64<A>::stepWithCompactEncodingRBPFrame(
    uint32_t savedRegistersLocations =
        EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS);
- 
+
 +  // If we have not stored EBP yet
 +  if (functionStart == registers.getIP()) {
 +    uint64_t rsp = registers.getSP();
diff --git a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch b/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch
deleted file mode 100644
index 4e3897dfb9801..0000000000000
--- a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-Upstream commit 8c03fdf34a659925a3f09c8f54016e47ea1c7519 changed the build such
-that it requires living inside the monorepo with libcxx available, only so that
-it can reuse a CMake file to simplify some build steps. This patch is a revert
-of that commit applied only to libunwind.
-
----
-diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
-index 570b8db90653..a383d7d77d6f 100644
---- a/libunwind/CMakeLists.txt
-+++ b/libunwind/CMakeLists.txt
-@@ -1,7 +1,3 @@
--if (NOT IS_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/../libcxx")
--  message(FATAL_ERROR "libunwind requires being built in a monorepo layout with libcxx available")
--endif()
--
- #===============================================================================
- # Setup Project
- #===============================================================================
-@@ -15,31 +11,103 @@ set(CMAKE_MODULE_PATH
-   ${CMAKE_MODULE_PATH}
-   )
- 
--set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
--set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
--set(LIBUNWIND_LIBCXX_PATH "${CMAKE_CURRENT_LIST_DIR}/../libcxx" CACHE PATH
--        "Specify path to libc++ source.")
--
- if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD)
-   project(libunwind LANGUAGES C CXX ASM)
- 
-+  # Rely on llvm-config.
-+  set(CONFIG_OUTPUT)
-+  if(NOT LLVM_CONFIG_PATH)
-+    find_program(LLVM_CONFIG_PATH "llvm-config")
-+  endif()
-+  if (DEFINED LLVM_PATH)
-+    set(LLVM_INCLUDE_DIR ${LLVM_INCLUDE_DIR} CACHE PATH "Path to llvm/include")
-+    set(LLVM_PATH ${LLVM_PATH} CACHE PATH "Path to LLVM source tree")
-+    set(LLVM_MAIN_SRC_DIR ${LLVM_PATH})
-+    set(LLVM_CMAKE_PATH "${LLVM_PATH}/cmake/modules")
-+  elseif(LLVM_CONFIG_PATH)
-+    message(STATUS "Found LLVM_CONFIG_PATH as ${LLVM_CONFIG_PATH}")
-+    set(CONFIG_COMMAND ${LLVM_CONFIG_PATH} "--includedir" "--prefix" "--src-root")
-+    execute_process(COMMAND ${CONFIG_COMMAND}
-+                    RESULT_VARIABLE HAD_ERROR
-+                    OUTPUT_VARIABLE CONFIG_OUTPUT)
-+    if (NOT HAD_ERROR)
-+      string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";"
-+             CONFIG_OUTPUT ${CONFIG_OUTPUT})
-+    else()
-+      string(REPLACE ";" " " CONFIG_COMMAND_STR "${CONFIG_COMMAND}")
-+      message(STATUS "${CONFIG_COMMAND_STR}")
-+      message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}")
-+    endif()
-+
-+    list(GET CONFIG_OUTPUT 0 INCLUDE_DIR)
-+    list(GET CONFIG_OUTPUT 1 LLVM_OBJ_ROOT)
-+    list(GET CONFIG_OUTPUT 2 MAIN_SRC_DIR)
-+
-+    set(LLVM_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Path to llvm/include")
-+    set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree")
-+    set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
-+    set(LLVM_LIT_PATH "${LLVM_PATH}/utils/lit/lit.py")
-+
-+    # --cmakedir is supported since llvm r291218 (4.0 release)
-+    execute_process(
-+      COMMAND ${LLVM_CONFIG_PATH} --cmakedir
-+      RESULT_VARIABLE HAD_ERROR
-+      OUTPUT_VARIABLE CONFIG_OUTPUT
-+      ERROR_QUIET)
-+    if(NOT HAD_ERROR)
-+      string(STRIP "${CONFIG_OUTPUT}" LLVM_CMAKE_PATH_FROM_LLVM_CONFIG)
-+      file(TO_CMAKE_PATH "${LLVM_CMAKE_PATH_FROM_LLVM_CONFIG}" LLVM_CMAKE_PATH)
-+    else()
-+      file(TO_CMAKE_PATH "${LLVM_BINARY_DIR}" LLVM_BINARY_DIR_CMAKE_STYLE)
-+      set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
-+    endif()
-+  else()
-+    message(WARNING "UNSUPPORTED LIBUNWIND CONFIGURATION DETECTED: "
-+                    "llvm-config not found and LLVM_MAIN_SRC_DIR not defined. "
-+                    "Reconfigure with -DLLVM_CONFIG=path/to/llvm-config "
-+                    "or -DLLVM_PATH=path/to/llvm-source-root.")
-+  endif()
-+
-+  if (EXISTS ${LLVM_CMAKE_PATH})
-+    list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
-+    include("${LLVM_CMAKE_PATH}/AddLLVM.cmake")
-+    include("${LLVM_CMAKE_PATH}/HandleLLVMOptions.cmake")
-+  else()
-+    message(WARNING "Not found: ${LLVM_CMAKE_PATH}")
-+  endif()
-+
-   set(PACKAGE_NAME libunwind)
-   set(PACKAGE_VERSION 12.0.1)
-   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
-   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
- 
--  # Add the CMake module path of libcxx so we can reuse HandleOutOfTreeLLVM.cmake
--  set(LIBUNWIND_LIBCXX_CMAKE_PATH "${LIBUNWIND_LIBCXX_PATH}/cmake/Modules")
--  list(APPEND CMAKE_MODULE_PATH "${LIBUNWIND_LIBCXX_CMAKE_PATH}")
-+  if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
-+    set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
-+  else()
-+    # Seek installed Lit.
-+    find_program(LLVM_LIT "lit.py" ${LLVM_MAIN_SRC_DIR}/utils/lit
-+                 DOC "Path to lit.py")
-+  endif()
- 
--  # In a standalone build, we don't have llvm to automatically generate the
--  # llvm-lit script for us.  So we need to provide an explicit directory that
--  # the configurator should write the script into.
--  set(LIBUNWIND_STANDALONE_BUILD 1)
--  set(LLVM_LIT_OUTPUT_DIR "${LIBUNWIND_BINARY_DIR}/bin")
-+  if (LLVM_LIT)
-+    # Define the default arguments to use with 'lit', and an option for the user
-+    # to override.
-+    set(LIT_ARGS_DEFAULT "-sv")
-+    if (MSVC OR XCODE)
-+      set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
-+    endif()
-+    set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
-+
-+    # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools.
-+    if (WIN32 AND NOT CYGWIN)
-+      set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
-+    endif()
-+  else()
-+    set(LLVM_INCLUDE_TESTS OFF)
-+  endif()
- 
--  # Find the LLVM sources and simulate LLVM CMake options.
--  include(HandleOutOfTreeLLVM)
-+  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
-+  set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
- else()
-   set(LLVM_LIT "${CMAKE_SOURCE_DIR}/utils/lit/lit.py")
- endif()
-@@ -85,8 +153,6 @@ set(LIBUNWIND_TEST_COMPILER_FLAGS "" CACHE STRING
-     "Additional compiler flags for test programs.")
- set(LIBUNWIND_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/lit.site.cfg.in" CACHE STRING
-     "The Lit testing configuration to use when running the tests.")
--set(LIBUNWIND_TEST_PARAMS "" CACHE STRING
--    "A list of parameters to run the Lit test suite with.")
- 
- if (NOT LIBUNWIND_ENABLE_SHARED AND NOT LIBUNWIND_ENABLE_STATIC)
-   message(FATAL_ERROR "libunwind must be built as either a shared or static library.")
-@@ -113,6 +179,9 @@ set(CMAKE_MODULE_PATH
-     "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
-     ${CMAKE_MODULE_PATH})
- 
-+set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
-+set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
-+
- if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
-   set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
-   set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
diff --git a/deps/patches/llvm7-symver-jlprefix.patch b/deps/patches/llvm7-symver-jlprefix.patch
deleted file mode 100644
index bdfb8e523668b..0000000000000
--- a/deps/patches/llvm7-symver-jlprefix.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-From f23277bb91a4925ba8763337137a3123a7600557 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Tue, 16 Jan 2018 17:29:05 -0500
-Subject: [PATCH] add JL prefix to all LLVM version suffixes
-
----
- tools/llvm-shlib/simple_version_script.map.in | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/llvm-shlib/simple_version_script.map.in b/tools/llvm-shlib/simple_version_script.map.in
-index e9515fe7862..af082581627 100644
---- a/tools/llvm-shlib/simple_version_script.map.in
-+++ b/tools/llvm-shlib/simple_version_script.map.in
-@@ -1 +1 @@
--LLVM_@LLVM_VERSION_MAJOR@ { global: *; };
-+JL_LLVM_@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@ { global: *; };
---
-2.15.1
diff --git a/deps/patches/neoverse-generic-kernels.patch b/deps/patches/neoverse-generic-kernels.patch
deleted file mode 100644
index ab37e3783bf3e..0000000000000
--- a/deps/patches/neoverse-generic-kernels.patch
+++ /dev/null
@@ -1,19 +0,0 @@
-diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1
-index ea010db4..074d7215 100644
---- a/kernel/arm64/KERNEL.NEOVERSEN1
-+++ b/kernel/arm64/KERNEL.NEOVERSEN1
-@@ -91,10 +91,10 @@ IDAMAXKERNEL   = iamax_thunderx2t99.c
- ICAMAXKERNEL   = izamax_thunderx2t99.c
- IZAMAXKERNEL   = izamax_thunderx2t99.c
- 
--SNRM2KERNEL    = scnrm2_thunderx2t99.c
--DNRM2KERNEL    = dznrm2_thunderx2t99.c
--CNRM2KERNEL    = scnrm2_thunderx2t99.c
--ZNRM2KERNEL    = dznrm2_thunderx2t99.c
-+SNRM2KERNEL    = nrm2.S
-+DNRM2KERNEL    = nrm2.S
-+CNRM2KERNEL    = znrm2.S
-+ZNRM2KERNEL    = znrm2.S
- 
- DDOTKERNEL     = dot_thunderx2t99.c
- SDOTKERNEL     = dot_thunderx2t99.c
diff --git a/deps/patches/openblas-ofast-power.patch b/deps/patches/openblas-ofast-power.patch
index 405e3f7581331..01089286257f7 100644
--- a/deps/patches/openblas-ofast-power.patch
+++ b/deps/patches/openblas-ofast-power.patch
@@ -1,17 +1,29 @@
 diff --git a/Makefile.power b/Makefile.power
-index 28a0bae0..b4869fbd 100644
+index aa1ca080a..42c417a78 100644
 --- a/Makefile.power
 +++ b/Makefile.power
-@@ -11,7 +11,7 @@ endif
- 
- ifeq ($(CORE), POWER10)
+@@ -13,16 +13,16 @@ ifeq ($(CORE), POWER10)
  ifneq ($(C_COMPILER), PGI)
+ ifeq ($(C_COMPILER), GCC))
+ ifeq ($(GCCVERSIONGTEQ10), 1)
 -CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
 +CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
- ifeq ($(F_COMPILER), IBM)
- FCOMMON_OPT += -O2 -qrecur -qnosave
+ else ifneq ($(GCCVERSIONGT4), 1)
+ $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
+-CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
++CCOMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
+ else
+ $(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
+-CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
++CCOMMON_OPT += -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
+ endif
  else
-@@ -22,7 +22,7 @@ endif
+-CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
++CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
+ endif
+ ifeq ($(F_COMPILER), IBM)
+ FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
+@@ -34,7 +34,7 @@ endif
  
  ifeq ($(CORE), POWER9)
  ifneq ($(C_COMPILER), PGI)
@@ -20,7 +32,7 @@ index 28a0bae0..b4869fbd 100644
  ifeq ($(C_COMPILER), GCC)
  ifneq ($(GCCVERSIONGT4), 1)
  $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
-@@ -59,7 +59,7 @@ endif
+@@ -70,7 +70,7 @@ endif
  
  ifeq ($(CORE), POWER8)
  ifneq ($(C_COMPILER), PGI)
diff --git a/deps/pcre.mk b/deps/pcre.mk
index cd1180d992885..8fbdb1ba79a35 100644
--- a/deps/pcre.mk
+++ b/deps/pcre.mk
@@ -9,6 +9,9 @@ PCRE_LDFLAGS := $(RPATH_ESCAPED_ORIGIN)
 ifeq ($(OS),emscripten)
 PCRE_CFLAGS += -fPIC
 PCRE_JIT = --disable-jit
+else ifeq ($(OS),OpenBSD)
+# jit will need RWX memory
+PCRE_JIT = --disable-jit
 else
 PCRE_JIT = --enable-jit
 endif
@@ -18,7 +21,7 @@ $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2: | $(SRCCACHE)
 
 $(SRCCACHE)/pcre2-$(PCRE_VER)/source-extracted: $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2
 	$(JLCHECKSUM) $<
-	cd $(dir $<) && $(TAR) jxf $(notdir $<)
+	cd $(dir $<) && $(TAR) -jxf $(notdir $<)
 	echo 1 > $@
 
 checksum-pcre: $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2
diff --git a/deps/pcre.version b/deps/pcre.version
index ce27921435e1d..0f49d2577c1bc 100644
--- a/deps/pcre.version
+++ b/deps/pcre.version
@@ -1,5 +1,6 @@
+# -*- makefile -*-
 ## jll artifact
 PCRE_JLL_NAME := PCRE2
 
 ## source build
-PCRE_VER := 10.42
+PCRE_VER := 10.47
diff --git a/deps/sanitizers.mk b/deps/sanitizers.mk
index 81db75a4ee63e..2b685b8d80aef 100644
--- a/deps/sanitizers.mk
+++ b/deps/sanitizers.mk
@@ -1,19 +1,27 @@
 # Interrogate the compiler about where it is keeping its sanitizer libraries
 ifeq ($(USECLANG),1)
-SANITIZER_LIB_PATH := $(shell LANG=C $(CC) -print-runtime-dir)
+SANITIZER_LIB_PATH := $(shell LANG=C $(CC) -print-runtime-dir)\:$(subst $(eval ) ,:,$(abspath $(wildcard $(shell LANG=C $(CC) -print-resource-dir)/lib/*/)))
 else
 SANITIZER_LIB_PATH := $(dir $(shell LANG=C $(CC) -print-file-name=libasan.so))
 endif
 
 # Given a colon-separated list of paths in $(2), find the location of the library given in $(1)
-define pathsearch
-$(wildcard $(addsuffix /$(1),$(subst :, ,$(2))))
+define pathsearch_all
+$(firstword $(wildcard $(addsuffix /$(1),$(subst :, ,$(2)))))
 endef
 
 define copy_sanitizer_lib
-install-sanitizers: $$(addprefix $$(build_libdir)/, $$(notdir $$(call pathsearch,$(1),$$(SANITIZER_LIB_PATH)))) | $$(build_shlibdir)
-$$(addprefix $$(build_shlibdir)/,$(2)): $$(addprefix $$(SANITIZER_LIB_PATH)/,$(2)) | $$(build_shlibdir)
+install-sanitizers: $$(addprefix $$(build_libdir)/, $$(notdir $$(call pathsearch_all,$(1),$$(SANITIZER_LIB_PATH)))) | $$(build_shlibdir)
+	@result=$$(call pathsearch_all,$(1),$$(SANITIZER_LIB_PATH)); \
+	if [ -z "$$$$result" ]; then \
+		echo "Sanitizer library $(1) not found in $$(SANITIZER_LIB_PATH)"; \
+		exit 1; \
+	fi
+$$(addprefix $$(build_shlibdir)/,$(2)): $$(addprefix $$(dir $$(call pathsearch_all,$(1),$$(SANITIZER_LIB_PATH))),$(2)) $$(PATCHELF_MANIFEST) | $$(build_shlibdir)
 	-cp $$< $$@
+ifneq (,$(findstring $(OS),Linux))
+	-$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$$$ORIGIN' $$@
+endif
 endef
 
 ifeq ($(USECLANG),1)
diff --git a/deps/terminfo.mk b/deps/terminfo.mk
new file mode 100644
index 0000000000000..60865838a813e
--- /dev/null
+++ b/deps/terminfo.mk
@@ -0,0 +1,43 @@
+## TERMINFO-DB ##
+include $(SRCDIR)/terminfo.version
+
+$(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/JuliaBinaryWrappers/TermInfoDB_jll.jl/releases/download/$(TERMINFO_TAG)/TermInfoDB.v$(TERMINFO_VER).any.tar.gz
+	touch -c $@
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+	$(JLCHECKSUM) $<
+	rm -rf $(dir $@)
+	mkdir -p $(dir $@)
+	$(TAR) -C $(dir $@) --strip-components 1 -xf $<
+	echo 1 > $@
+
+checksum-terminfo: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+	$(JLCHECKSUM) $<
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted
+	echo 1 > $@
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+	echo 1 > $@
+
+define TERMINFO_INSTALL
+	mkdir -p $2/$$(build_datarootdir)/julia
+	cp -R $1/terminfo $2/$$(build_datarootdir)/julia/
+endef
+$(eval $(call staged-install, \
+	terminfo,TermInfoDB-v$(TERMINFO_VER), \
+	TERMINFO_INSTALL,,,,))
+
+clean-terminfo:
+	-rm -f $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+
+distclean-terminfo:
+	rm -rf $(SRCCACHE)/TermInfoDB*.tar.gz $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER) $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)
+
+get-terminfo: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+extract-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted
+configure-terminfo: extract-terminfo
+compile-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+fastcheck-terminfo: check-terminfo
+check-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked
diff --git a/deps/terminfo.version b/deps/terminfo.version
new file mode 100644
index 0000000000000..b7c020b830517
--- /dev/null
+++ b/deps/terminfo.version
@@ -0,0 +1,3 @@
+# -*- makefile -*-
+TERMINFO_VER := 2023.12.9
+TERMINFO_TAG := TermInfoDB-v$(TERMINFO_VER)+0
diff --git a/deps/tools/bb-install.mk b/deps/tools/bb-install.mk
index ee7f833a8ac2b..66e3d716c90ff 100644
--- a/deps/tools/bb-install.mk
+++ b/deps/tools/bb-install.mk
@@ -55,9 +55,9 @@ ifneq (bsdtar,$(findstring bsdtar,$(TAR_TEST)))
 	@# work-around a gtar bug: they do some complicated work to avoid the mkdir
 	@# syscall, which is buggy when working with Tar.jl files so we manually do
 	@# the mkdir calls first in a pre-pass
-	$(TAR) -tzf $$< | xargs -n 1 dirname | sort -u | (cd $$(build_prefix) && xargs -t mkdir -p)
+	$$(TAR) -tzf $$< | xargs -n 1 dirname | sort -u | (cd $$(build_prefix) && xargs -t mkdir -p)
 endif
-	$(UNTAR) $$< -C $$(build_prefix)
+	$$(UNTAR) $$< -C $$(build_prefix)
 	echo '$$(UNINSTALL_$(strip $1))' > $$@
 
 # Special "checksum-foo" target to speed up `contrib/refresh_checksums.sh`
diff --git a/deps/tools/common.mk b/deps/tools/common.mk
index 3cefc253cec3d..30e0e105d283f 100644
--- a/deps/tools/common.mk
+++ b/deps/tools/common.mk
@@ -5,8 +5,15 @@
 # apparently not on FreeBSD). Ref PR #22352
 
 CONFIGURE_COMMON = --prefix=$(abspath $(build_prefix)) --build=$(BUILD_MACHINE) --libdir=$(abspath $(build_libdir)) --bindir=$(abspath $(build_depsbindir)) $(CUSTOM_LD_LIBRARY_PATH)
+
+CMAKE_COMMON := -DCMAKE_INSTALL_PREFIX:PATH=$(build_prefix) -DCMAKE_PREFIX_PATH=$(build_prefix)
+CMAKE_COMMON += -DLIB_INSTALL_DIR=$(build_shlibdir)
+
 ifneq ($(XC_HOST),)
 CONFIGURE_COMMON += --host=$(XC_HOST)
+else
+# Defeat bad automatic cross compile detection (e.g. clang on mingw)
+# CMAKE_COMMON += -DCMAKE_CROSSCOMPILING=0
 endif
 ifeq ($(OS),WINNT)
 CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) -Wl,--stack,8388608"
@@ -15,8 +22,6 @@ CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN) $(SANITIZE_LDFLA
 endif
 CONFIGURE_COMMON += F77="$(FC)" CC="$(CC) $(SANITIZE_OPTS)" CXX="$(CXX) $(SANITIZE_OPTS)" LD="$(LD)"
 
-CMAKE_COMMON := -DCMAKE_INSTALL_PREFIX:PATH=$(build_prefix) -DCMAKE_PREFIX_PATH=$(build_prefix)
-CMAKE_COMMON += -DLIB_INSTALL_DIR=$(build_shlibdir)
 ifneq ($(OS),WINNT)
 CMAKE_COMMON += -DCMAKE_INSTALL_LIBDIR=$(build_libdir)
 endif
@@ -36,8 +41,14 @@ CMAKE_COMMON += -DCMAKE_C_COMPILER_LAUNCHER=ccache
 CMAKE_COMMON += -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_CC := "$$(which $(shell echo $(CC_ARG) | cut -d' ' -f1))"
 CMAKE_CXX := "$$(which $(shell echo $(CXX_ARG) | cut -d' ' -f1))"
-CMAKE_CC_ARG := $(shell echo $(CC_ARG) | cut -d' ' -f2-)
-CMAKE_CXX_ARG := $(shell echo $(CXX_ARG) | cut -d' ' -f2-)
+CMAKE_CC_ARG := $(shell echo $(CC_ARG) | cut -s -d' ' -f2-)
+CMAKE_CXX_ARG := $(shell echo $(CXX_ARG) | cut -s -d' ' -f2-)
+else ifneq (,$(findstring MINGW,$(RAW_BUILD_OS)))
+# `cmake` is mingw-native and needs `cygpath -w`, rather than `cygpath -m`, which is the msys2 conversion default
+CMAKE_CC := "$(shell echo $(call cygpath_w, $(shell which $(CC_BASE))))"
+CMAKE_CXX := "$(shell echo $(call cygpath_w, $(shell which $(CXX_BASE))))"
+CMAKE_CC_ARG := $(CC_ARG)
+CMAKE_CXX_ARG := $(CXX_ARG)
 else
 CMAKE_CC := "$$(which $(CC_BASE))"
 CMAKE_CXX := "$$(which $(CXX_BASE))"
@@ -55,11 +66,15 @@ endif
 CMAKE_COMMON += -DCMAKE_LINKER="$$(which $(LD))" -DCMAKE_AR="$$(which $(AR))" -DCMAKE_RANLIB="$$(which $(RANLIB))"
 
 ifeq ($(OS),WINNT)
+ifeq ($(BUILD_OS),WINNT)
+# Don't make CMake think we're cross compiling, but do make sure it knows we're Windows
+CMAKE_COMMON += -DCMAKE_HOST_SYSTEM_NAME=Windows
+else
 CMAKE_COMMON += -DCMAKE_SYSTEM_NAME=Windows
+endif
 CMAKE_COMMON += -DCMAKE_RC_COMPILER="$$(which $(CROSS_COMPILE)windres)"
 endif
 
-# For now this is LLVM specific, but I expect it won't be in the future
 ifeq ($(CMAKE_GENERATOR),Ninja)
 CMAKE_GENERATOR_COMMAND := -G Ninja
 else ifeq ($(CMAKE_GENERATOR),make)
@@ -68,6 +83,27 @@ else
 $(error Unknown CMake generator '$(CMAKE_GENERATOR)'. Options are 'Ninja' and 'make')
 endif
 
+ifneq (,$(findstring MINGW,$(RAW_BUILD_OS)))
+ifneq (,$(shell ldd $(shell which cmake) | grep msys-2.0.dll))
+# Detect MSYS2 with cygwin CMake rather than MinGW cmake - the former fails to
+# properly drive MinGW tools
+override CMAKE := echo "ERROR: CMake is Cygwin CMake, not MinGW CMake. Build will fail. Use 'pacman -S mingw-w64-{i686,x86_64}-cmake'."; exit 1; $(CMAKE)
+endif
+# In our setup, CMAKE_INSTALL_PREFIX is a relative path inside usr-staging.
+# We do not want this converted to a windows path, because our make system
+# assumes it to be relative to msys `/`.
+override CMAKE := MSYS2_ARG_CONV_EXCL="-DCMAKE_INSTALL_PREFIX" $(CMAKE)
+endif
+
+# Some dependencies' tarballs contains symlinks to non-existent targets. This breaks the
+# the default msys strategy `deepcopy` symlink strategy. To workaround this,
+# switch to `native` which tries native windows symlinks (possible if the
+# machine is in developer mode) - or if not, falls back to cygwin-style
+# symlinks. We don't particularly care either way - we just need to symlinks
+# to succeed. We could guard this by a uname check, but it's harmless elsewhere,
+# so let's not incur the additional overhead.
+MSYS_NONEXISTENT_SYMLINK_TARGET_FIX := winsymlinks:native
+
 # If the top-level Makefile is called with environment variables,
 # they will override the values passed above to ./configure
 MAKE_COMMON := DESTDIR="" prefix=$(build_prefix) bindir=$(build_depsbindir) libdir=$(build_libdir) shlibdir=$(build_shlibdir) libexecdir=$(build_libexecdir) datarootdir=$(build_datarootdir) includedir=$(build_includedir) sysconfdir=$(build_sysconfdir) O=
@@ -144,7 +180,7 @@ upper = $(shell echo $1 | tr a-z A-Z)
 # this rule ensures that make install is more nearly atomic
 # so it's harder to get half-installed (or half-reinstalled) dependencies
 # # and enables sharing deps compiles, uninstall, and fast reinstall
-MAKE_INSTALL = +$$(MAKE) -C $1 install $$(MAKE_COMMON) $3 DESTDIR="$2"
+MAKE_INSTALL = MSYS2_ARG_CONV_EXCL="prefix=" $$(MAKE) -C $1 install $$(MAKE_COMMON) $3 DESTDIR="$2"
 
 define SHLIBFILE_INSTALL
 	mkdir -p $2/$$(build_shlibdir)
@@ -187,7 +223,7 @@ UNINSTALL_$(strip $1) := $2 staged-uninstaller
 
 $$(build_prefix)/manifest/$(strip $1): $$(build_staging)/$2.tar | $(build_prefix)/manifest
 	-+[ ! -e $$@ ] || $$(MAKE) uninstall-$(strip $1)
-	$(UNTAR) $$< -C $$(build_prefix)
+	$$(UNTAR) $$< -C $$(build_prefix)
 	$6
 	echo '$$(UNINSTALL_$(strip $1))' > $$@
 .PHONY: $(addsuffix -$(strip $1),stage install distclean uninstall reinstall)
@@ -253,6 +289,26 @@ endif
 
 ## phony targets ##
 
-.PHONY: default get extract configure compile fastcheck check install uninstall reinstall cleanall distcleanall version-check \
-	get-* extract-* configure-* compile-* fastcheck-* check-* install-* uninstall-* reinstall-* clean-* distclean-* \
-	update-llvm
+.PHONY: check
+.PHONY: check-*
+.PHONY: clean-*
+.PHONY: cleanall
+.PHONY: compile
+.PHONY: compile-*
+.PHONY: configure
+.PHONY: configure-*
+.PHONY: default
+.PHONY: distclean-*
+.PHONY: distcleanall
+.PHONY: extract
+.PHONY: extract-*
+.PHONY: fastcheck
+.PHONY: fastcheck-*
+.PHONY: get
+.PHONY: get-*
+.PHONY: install
+.PHONY: install-*
+.PHONY: reinstall
+.PHONY: reinstall-*
+.PHONY: uninstall
+.PHONY: uninstall-*
diff --git a/deps/tools/jlchecksum b/deps/tools/jlchecksum
index 87db805dbfab3..513984ac0ad70 100755
--- a/deps/tools/jlchecksum
+++ b/deps/tools/jlchecksum
@@ -56,14 +56,14 @@ find_checksum()
 {
     for pack in "$DEPSDIR"/checksums/*; do
         if [ -f "$pack" ]; then
-            TRUE_CHECKSUM=$(awk -F / "{ if (\$1 == \"$BASENAME\" && \$2 == \"$CHECKSUM_TYPE\") print \$3 }" "$pack")
-            if [ $TRUE_CHECKSUM ]; then
+            TRUE_CHECKSUM=$(awk -F / "{ if (\$1 == \"$BASENAME\" && \$2 == \"$CHECKSUM_TYPE\") { print \$3; exit } }" "$pack")
+            if [ "$TRUE_CHECKSUM" ]; then
                 return
             fi
         fi
     done
     if [ ! -f "$DEPSDIR/checksums/$BASENAME/$CHECKSUM_TYPE" ]; then
-        if [ ${TAGGED_RELEASE_BANNER:-} ]; then
+        if [ "${TAGGED_RELEASE_BANNER:-}" ]; then
             echo "WARNING: $CHECKSUM_TYPE checksum for $BASENAME not found in deps/checksums/, failing release build." >&2
             exit 3
         fi
@@ -87,15 +87,17 @@ SHA512_PROG=""
 MD5_PROG=""
 find_checksum_progs()
 {
-    if [ ! -z $(which sha512sum) ]; then
+    if [ ! -z $(which sha512sum 2>/dev/null) ]; then
         SHA512_PROG="sha512sum $ARG1 | awk '{ print \$1; }'"
-    elif [ ! -z $(which shasum) ]; then
+    elif [ ! -z $(which shasum 2>/dev/null) ]; then
         SHA512_PROG="shasum -a 512 $ARG1 | awk '{ print \$1; }'"
+    elif [ ! -z $(which sha512 2>/dev/null) ]; then
+        SHA512_PROG="sha512 -q $ARG1"
     fi
 
-    if [ ! -z $(which md5sum) ]; then
+    if [ ! -z $(which md5sum 2>/dev/null) ]; then
         MD5_PROG="md5sum $ARG1 | awk '{ print \$1; }'"
-    elif [ ! -z $(which md5) ]; then
+    elif [ ! -z $(which md5 2>/dev/null) ]; then
         MD5_PROG="md5 -q $ARG1"
     fi
 }
diff --git a/deps/unwind.mk b/deps/unwind.mk
index 76593df1e5ef0..83cd8a8633c84 100644
--- a/deps/unwind.mk
+++ b/deps/unwind.mk
@@ -3,8 +3,9 @@ include $(SRCDIR)/unwind.version
 include $(SRCDIR)/llvmunwind.version
 
 ifneq ($(USE_BINARYBUILDER_LIBUNWIND),1)
-LIBUNWIND_CFLAGS := -U_FORTIFY_SOURCE $(fPIC) -lz $(SANITIZE_OPTS)
-LIBUNWIND_CPPFLAGS :=
+LIBUNWIND_CFLAGS := -U_FORTIFY_SOURCE $(fPIC) $(SANITIZE_OPTS)
+LIBUNWIND_CPPFLAGS := -I$(build_includedir)
+LIBUNWIND_LDFLAGS := -L$(build_shlibdir)
 
 ifeq ($(USE_SYSTEM_ZLIB),0)
 $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: | $(build_prefix)/manifest/zlib
@@ -19,40 +20,32 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz: | $(SRCCACHE)
 
 $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted: $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz
 	$(JLCHECKSUM) $<
-	cd $(dir $<) && $(TAR) xfz $<
+	cd $(dir $<) && $(TAR) -xzf $<
 	touch -c $(SRCCACHE)/libunwind-$(UNWIND_VER)/configure # old target
 	echo 1 > $@
 
 checksum-unwind: $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz
 	$(JLCHECKSUM) $<
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-prefer-extbl.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f < $(SRCDIR)/patches/libunwind-prefer-extbl.patch
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-configure-static-lzma.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p0 -f -u -l < $(SRCDIR)/patches/libunwind-configure-static-lzma.patch
 	echo 1 > $@
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-static-arm.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-prefer-extbl.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f < $(SRCDIR)/patches/libunwind-static-arm.patch
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-configure-static-lzma.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-revert_prelink_unwind.patch
 	echo 1 > $@
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-static-arm.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u < $(SRCDIR)/patches/libunwind-cfa-rsp.patch
-	echo 1 > $@
-
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-dwarf-table.patch
-	echo 1 > $@
-
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-non-empty-structs.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-non-empty-structs.patch
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-disable-initial-exec-tls.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-disable-initial-exec-tls.patch
 	echo 1 > $@
 
 # note minidebuginfo requires liblzma, which we do not have a source build for
 # (it will be enabled in BinaryBuilder-based downloads however)
 # since https://github.com/JuliaPackaging/Yggdrasil/commit/0149e021be9badcb331007c62442a4f554f3003c
-$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-non-empty-structs.patch-applied
+$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-disable-initial-exec-tls.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo --disable-conservative-checks
+	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" LDFLAGS="$(LDFLAGS) $(LIBUNWIND_LDFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-debug_frame --enable-zlibdebuginfo --disable-conservative-checks --enable-per-thread-cache
 	echo 1 > $@
 
 $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-compiled: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured
@@ -89,50 +82,62 @@ check-unwind: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-checked
 
 ## LLVM libunwind ##
 
-LLVMUNWIND_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=MinSizeRel -DLIBUNWIND_ENABLE_PEDANTIC=OFF -DLLVM_CONFIG_PATH=$(build_depsbindir)/llvm-config
-
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/libunwind-$(LLVMUNWIND_VER).src.tar.xz
-
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+LLVMUNWIND_OPTS := $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) \
+	-DCMAKE_BUILD_TYPE=MinSizeRel \
+	-DLIBUNWIND_ENABLE_PEDANTIC=OFF \
+	-DLIBUNWIND_INCLUDE_DOCS=OFF \
+	-DLIBUNWIND_INCLUDE_TESTS=OFF \
+	-DLIBUNWIND_INSTALL_HEADERS=ON \
+	-DLIBUNWIND_ENABLE_ASSERTIONS=OFF \
+	-DLLVM_CONFIG_PATH=$(build_depsbindir)/llvm-config \
+	-DLLVM_ENABLE_RUNTIMES="libunwind" \
+	-DLLVM_PATH=$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/llvm
+
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/llvm-project-$(LLVMUNWIND_VER).src.tar.xz
+
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
 	$(JLCHECKSUM) $<
-	cd $(dir $<) && $(TAR) xf $<
-	mv $(SRCCACHE)/libunwind-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)
-	echo 1 > $@
-
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch
+	cd $(dir $<) && $(TAR) -xf $<
+	mv $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-revert-monorepo-requirement.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
 	echo 1 > $@
 
-checksum-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+checksum-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
 	$(JLCHECKSUM) $<
 
-$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied
+$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(CMAKE) $(dir $<) $(LLVMUNWIND_OPTS)
+	$(CMAKE) $(dir $<) -S $(dir $<)/runtimes $(LLVMUNWIND_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
-	$(MAKE) -C $(dir $<)
+	cd $(dir $<) && \
+	$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build . --target unwind)
 	echo 1 > $@
 
+LIBUNWIND_INSTALL = \
+	cd $1 && mkdir -p $2$$(build_depsbindir) && \
+	$$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P libunwind/cmake_install.cmake
+
 $(eval $(call staged-install, \
 	llvmunwind,llvmunwind-$(LLVMUNWIND_VER), \
-	MAKE_INSTALL,,, \
-	cp -fR $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/include/* $(build_includedir)))
+	LIBUNWIND_INSTALL,,, \
+	cp -fR $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/* $(build_includedir)))
 
 clean-llvmunwind:
 	-rm -f $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
@@ -140,14 +145,14 @@ clean-llvmunwind:
 	-$(MAKE) -C $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER) clean
 
 distclean-llvmunwind:
-	rm -rf $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz \
+	rm -rf $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz \
 		$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) \
 		$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)
 
-get-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
-extract-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
-configure-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
-compile-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
+get-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
+extract-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted
+configure-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-configured
+compile-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-compiled
 fastcheck-llvmunwind: check-llvmunwind
 check-llvmunwind: # no test/check provided by Makefile
 
diff --git a/deps/unwind.version b/deps/unwind.version
index e17b2e91c2e51..682182179825c 100644
--- a/deps/unwind.version
+++ b/deps/unwind.version
@@ -1,6 +1,8 @@
+# -*- makefile -*-
+
 ## jll artifact
 UNWIND_JLL_NAME := LibUnwind
 
 ## source build
-UNWIND_VER_TAG := 1.5
-UNWIND_VER := 1.5.0
+UNWIND_VER_TAG := 1.8.3
+UNWIND_VER := 1.8.3
diff --git a/deps/utf8proc.version b/deps/utf8proc.version
index 659b995e8abaf..98ec437ea6f2b 100644
--- a/deps/utf8proc.version
+++ b/deps/utf8proc.version
@@ -1,2 +1,2 @@
-UTF8PROC_BRANCH=v2.8.0
-UTF8PROC_SHA1=1cb28a66ca79a0845e99433fd1056257456cef8b
+UTF8PROC_BRANCH=v2.11.1
+UTF8PROC_SHA1=53f0bac76f5d5bd3ed69d1a06ee5b200c8940610
diff --git a/deps/valgrind/valgrind.h b/deps/valgrind/valgrind.h
index 2e07a49d91dfa..b33fd70fab672 100644
--- a/deps/valgrind/valgrind.h
+++ b/deps/valgrind/valgrind.h
@@ -1065,7 +1065,7 @@ typedef
 
 /* Use these to write the name of your wrapper.  NOTE: duplicates
    VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h.  NOTE also: inserts
-   the default behaviour equivalance class tag "0000" into the name.
+   the default behaviour equivalence class tag "0000" into the name.
    See pub_tool_redir.h for details -- normally you don't need to
    think about this, though. */
 
diff --git a/deps/zlib.mk b/deps/zlib.mk
index 5548a0791f4d2..347b8d4cf53b6 100644
--- a/deps/zlib.mk
+++ b/deps/zlib.mk
@@ -10,7 +10,7 @@ ZLIB_BUILD_OPTS += -DCMAKE_POSITION_INDEPENDENT_CODE=ON
 
 $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-configured: $(SRCCACHE)/$(ZLIB_SRC_DIR)/source-extracted
 	mkdir -p $(dir $@)
-	cd $(dir $@) && $(CMAKE) $(ZLIB_BUILD_OPTS) $(dir $<)
+	cd $(dir $@) && $(CMAKE) -G"Unix Makefiles" $(ZLIB_BUILD_OPTS) $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-compiled: $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-configured
diff --git a/deps/zlib.version b/deps/zlib.version
index 89a304c49b6dc..27d862a4cc35b 100644
--- a/deps/zlib.version
+++ b/deps/zlib.version
@@ -3,6 +3,6 @@
 ZLIB_JLL_NAME := Zlib
 
 ## source build
-ZLIB_VER := 1.2.13
-ZLIB_BRANCH=v1.2.13
-ZLIB_SHA1=04f42ceca40f73e2978b50e93806c2a18c1281fc
+ZLIB_VER := 1.3.1
+ZLIB_BRANCH=v1.3.1
+ZLIB_SHA1=51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf
diff --git a/deps/zstd.mk b/deps/zstd.mk
new file mode 100644
index 0000000000000..ecce416ab3f38
--- /dev/null
+++ b/deps/zstd.mk
@@ -0,0 +1,66 @@
+## Zstd ##
+ifneq ($(USE_BINARYBUILDER_ZSTD), 1)
+ZSTD_GIT_URL := https://github.com/facebook/zstd.git
+ZSTD_TAR_URL = https://api.github.com/repos/facebook/zstd/tarball/$1
+$(eval $(call git-external,zstd,ZSTD,,,$(BUILDDIR)))
+$(BUILDDIR)/$(ZSTD_SRC_DIR)/source-extracted: export MSYS=$(MSYS_NONEXISTENT_SYMLINK_TARGET_FIX)
+
+ZSTD_BUILD_OPTS := MOREFLAGS="-DZSTD_MULTITHREAD $(fPIC)" bindir=$(build_private_libexecdir)
+ifeq ($(OS), WINNT)
+# Zstd detects "Windows" not WINNT, ordinarily from the inherited $(OS), but it expects the
+# override to be done using TARGET_SYSTEM.
+ZSTD_BUILD_OPTS += TARGET_SYSTEM="Windows"
+endif
+
+$(BUILDDIR)/$(ZSTD_SRC_DIR)/build-configured: $(BUILDDIR)/$(ZSTD_SRC_DIR)/source-extracted
+	echo 1 > $@
+
+$(BUILDDIR)/$(ZSTD_SRC_DIR)/build-compiled: $(BUILDDIR)/$(ZSTD_SRC_DIR)/build-configured
+	$(MAKE) -C $(dir $<) $(MAKE_COMMON) $(ZSTD_BUILD_OPTS)
+	echo 1 > $@
+
+$(eval $(call staged-install, \
+	zstd,$(ZSTD_SRC_DIR), \
+	MAKE_INSTALL,$(ZSTD_BUILD_OPTS) MT=1,, \
+	$(INSTALL_NAME_CMD)libzstd.$(SHLIB_EXT) $(build_private_libexecdir)/libzstd.$(SHLIB_EXT)))
+
+clean-zstd:
+	-rm -f $(BUILDDIR)/$(ZSTD_SRC_DIR)/build-configured $(BUILDDIR)/$(ZSTD_SRC_DIR)/build-compiled
+	-$(MAKE) -C $(BUILDDIR)/$(ZSTD_SRC_DIR) $(MAKE_COMMON) $(ZSTD_BUILD_OPTS) clean
+
+get-zstd: $(ZSTD_SRC_FILE)
+extract-zstd: $(BUILDDIR)/$(ZSTD_SRC_DIR)/source-extracted
+configure-zstd: $(BUILDDIR)/$(ZSTD_SRC_DIR)/build-configured
+compile-zstd: $(BUILDDIR)/$(ZSTD_SRC_DIR)/build-compiled
+fastcheck-zstd: check-zstd
+check-zstd: compile-zstd
+
+else # USE_BINARYBUILDER_ZSTD
+
+$(eval $(call bb-install,zstd,ZSTD,false))
+# move from bindir to shlibdir, where we expect to install it
+install-zstd: post-install-zstd
+uninstall-zstd: pre-uninstall-zstd
+post-install-zstd: $(build_prefix)/manifest/zstd $(PATCHELF_MANIFEST)
+	mkdir -p $(build_private_libexecdir)/
+	[ ! -e $(build_bindir)/zstdmt$(EXE) ] || mv $(build_bindir)/zstdmt$(EXE) $(build_private_libexecdir)/zstdmt$(EXE)
+	[ ! -e $(build_bindir)/zstd$(EXE) ] || mv $(build_bindir)/zstd$(EXE) $(build_private_libexecdir)/zstd$(EXE)
+	[ -e $(build_private_libexecdir)/zstd$(EXE) ]
+	[ -e $(build_private_libexecdir)/zstdmt$(EXE) ]
+ifeq ($(OS), Darwin)
+	for j in zstd zstdmt ; do \
+		[ -L $(build_private_libexecdir)/$$j ] && continue; \
+		install_name_tool -rpath @executable_path/$(reverse_build_private_libexecdir_rel) @loader_path/$(build_libdir_rel) $(build_private_libexecdir)/$$j 2>/dev/null || true; \
+		install_name_tool -rpath @loader_path/$(build_libdir_rel) @executable_path/$(reverse_build_private_libexecdir_rel) $(build_private_libexecdir)/$$j || exit 1; \
+	done
+else ifneq (,$(findstring $(OS),Linux FreeBSD))
+	for j in zstd zstdmt ; do \
+		[ -L $(build_private_libexecdir)/$$j ] && continue; \
+		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN/$(reverse_build_private_libexecdir_rel)' $(build_private_libexecdir)/$$j || exit 1; \
+	done
+endif
+
+pre-uninstall-zstd:
+	-rm -f $(build_private_libexecdir)/zstd$(EXE) $(build_private_libexecdir)/zstdmt$(EXE)
+
+endif # USE_BINARYBUILDER_ZSTD
diff --git a/deps/zstd.version b/deps/zstd.version
new file mode 100644
index 0000000000000..d4d960aa6f04b
--- /dev/null
+++ b/deps/zstd.version
@@ -0,0 +1,8 @@
+# -*- makefile -*-
+## jll artifact
+ZSTD_JLL_NAME := Zstd
+
+## source build
+ZSTD_VER := 1.5.7
+ZSTD_BRANCH=v1.5.7
+ZSTD_SHA1=f8745da6ff1ad1e7bab384bd1f9d742439278e99
diff --git a/doc/Makefile b/doc/Makefile
index 4469a40f74248..0c5d94d0100fb 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -1,5 +1,6 @@
 # Makefile for building documentation
 
+.PHONY: default
 default: html
 
 # You can set these variables from the command line.
@@ -9,8 +10,7 @@ SRCCACHE         := $(abspath $(JULIAHOME)/deps/srccache)
 include $(JULIAHOME)/Make.inc
 JULIA_EXECUTABLE := $(call spawn,$(build_bindir)/julia) --startup-file=no
 
-.PHONY: help clean cleanall html pdf deps deploy
-
+.PHONY: help
 help:
 	@echo "Please use 'make <target>' where <target> is one of"
 	@echo "  html  to make standalone HTML files"
@@ -21,39 +21,63 @@ help:
 	@echo "To fix outdated doctests, use 'make <target> doctest=fix'"
 	@echo "To run doctests using Revise (to test changes without rebuilding the sysimage), use 'make <target> doctest=true revise=true'"
 
-
+VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 DOCUMENTER_OPTIONS := linkcheck=$(linkcheck) doctest=$(doctest) buildroot=$(call cygpath_w,$(BUILDROOT)) \
-    texplatform=$(texplatform) revise=$(revise)
+    texplatform=$(texplatform) revise=$(revise) stdlibdir=$(call cygpath_w,$(build_datarootdir)/julia/stdlib/$(VERSDIR)/)
 
-UNICODE_DATA_VERSION=13.0.0
+UNICODE_DATA_VERSION=17.0.0
 $(SRCCACHE)/UnicodeData-$(UNICODE_DATA_VERSION).txt:
 	@mkdir -p "$(SRCCACHE)"
 	$(JLDOWNLOAD) "$@" https://www.unicode.org/Public/$(UNICODE_DATA_VERSION)/ucd/UnicodeData.txt
 
+# NEWS.md and stdlib are in-tree build artifacts - don't link them for oot builds.
+DOC_FILES=$(filter-out NEWS.md stdlib,$(notdir $(wildcard $(SRCDIR)/src/*)))
+src/%:
+	@mkdir -p src
+	ln -s $(SRCDIR)/src/$* $@
+.PHONY: src
+src: $(addprefix src/,$(DOC_FILES))
+
+.PHONY: deps
 deps: $(SRCCACHE)/UnicodeData-$(UNICODE_DATA_VERSION).txt
 	$(JLCHECKSUM) "$<"
 	cp "$<" UnicodeData.txt
 
+.PHONY: alldeps
+alldeps: deps
+	$(JULIA_EXECUTABLE) --color=yes $(call cygpath_w,$(SRCDIR)/make.jl) deps $(DOCUMENTER_OPTIONS)
+
+.PHONY: checksum-unicodedata
 checksum-unicodedata: $(SRCCACHE)/UnicodeData-$(UNICODE_DATA_VERSION).txt
 	$(JLCHECKSUM) "$<"
 
+.PHONY: clean
 clean:
 	rm -rf _build/* deps/* docbuild.log UnicodeData.txt
 
+.PHONY: cleanall
 cleanall: clean
 
+.PHONY: html
 html: deps
 	@echo "Building HTML documentation."
 	$(JULIA_EXECUTABLE) --color=yes $(call cygpath_w,$(SRCDIR)/make.jl) $(DOCUMENTER_OPTIONS)
 	@echo "Build finished. The HTML pages are in _build/html."
 
+.PHONY: pdf
 pdf: deps
 	@echo "Building PDF documentation."
 	$(JULIA_EXECUTABLE) --color=yes $(call cygpath_w,$(SRCDIR)/make.jl) -- pdf $(DOCUMENTER_OPTIONS)
 	@echo "Build finished."
 
 # The deploy target should only be called in CI builds
+.PHONY: deploy
 deploy: deps
 	@echo "Deploying HTML documentation."
 	$(JULIA_EXECUTABLE) --color=yes $(call cygpath_w,$(SRCDIR)/make.jl) -- deploy $(DOCUMENTER_OPTIONS)
 	@echo "Build & deploy of docs finished."
+
+.PHONY: update-documenter
+update-documenter:
+	@echo "Updating Documenter."
+	JULIA_PKG_PRECOMPILE_AUTO=0 $(JULIA_EXECUTABLE) --project=$(call cygpath_w,$(SRCDIR)/../deps/jlutilities/documenter/) --color=yes -e 'using Pkg; Pkg.update()'
diff --git a/doc/Manifest.toml b/doc/Manifest.toml
deleted file mode 100644
index cf50a1d41ddbd..0000000000000
--- a/doc/Manifest.toml
+++ /dev/null
@@ -1,98 +0,0 @@
-# This file is machine-generated - editing it directly is not advised
-
-julia_version = "1.9.0-DEV"
-manifest_format = "2.0"
-project_hash = "e0c77beb18dc1f6cce661ebd60658c0c1a77390f"
-
-[[deps.ANSIColoredPrinters]]
-git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
-uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9"
-version = "0.0.1"
-
-[[deps.Base64]]
-uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
-
-[[deps.Dates]]
-deps = ["Printf"]
-uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
-
-[[deps.DocStringExtensions]]
-deps = ["LibGit2"]
-git-tree-sha1 = "5158c2b41018c5f7eb1470d558127ac274eca0c9"
-uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.9.1"
-
-[[deps.Documenter]]
-deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "6030186b00a38e9d0434518627426570aac2ef95"
-uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.27.23"
-
-[[deps.IOCapture]]
-deps = ["Logging", "Random"]
-git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a"
-uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
-version = "0.2.2"
-
-[[deps.InteractiveUtils]]
-deps = ["Markdown"]
-uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-
-[[deps.JSON]]
-deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "3c837543ddb02250ef42f4738347454f95079d4e"
-uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.3"
-
-[[deps.LibGit2]]
-deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
-uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
-
-[[deps.Logging]]
-uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
-
-[[deps.Markdown]]
-deps = ["Base64"]
-uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
-
-[[deps.Mmap]]
-uuid = "a63ad114-7e13-5084-954f-fe012c677804"
-
-[[deps.NetworkOptions]]
-uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
-version = "1.2.0"
-
-[[deps.Parsers]]
-deps = ["Dates"]
-git-tree-sha1 = "3d5bf43e3e8b412656404ed9466f1dcbf7c50269"
-uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.4.0"
-
-[[deps.Printf]]
-deps = ["Unicode"]
-uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-
-[[deps.REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
-uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
-
-[[deps.Random]]
-deps = ["SHA", "Serialization"]
-uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[[deps.SHA]]
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-version = "0.7.0"
-
-[[deps.Serialization]]
-uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-
-[[deps.Sockets]]
-uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
-
-[[deps.Test]]
-deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
-uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[[deps.Unicode]]
-uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
diff --git a/doc/NEWS-update.jl b/doc/NEWS-update.jl
index 4014c67645eb2..a5ffcd5753314 100644
--- a/doc/NEWS-update.jl
+++ b/doc/NEWS-update.jl
@@ -5,7 +5,7 @@ NEWS = get(ARGS, 1, "NEWS.md")
 
 s = read(NEWS, String)
 
-m = match(r"\[#[0-9]+\]:", s)
+m = match(r"^\[#[0-9]+\]:"m, s)
 if m !== nothing
     s = s[1:m.offset-1]
 end
diff --git a/doc/README.md b/doc/README.md
index be5426018084d..d282485bc584a 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -27,3 +27,11 @@ $ make -C doc doctest=true
 ```
 
 from the root directory.
+
+## Customizing Doctest Execution
+
+By default, doctests are run using the in-tree Julia executable.
+This behavior can be changed by setting the `JULIA_EXECUTABLE` Makefile variable.
+
+> [!WARNING]
+> Using a custom `JULIA_EXECUTABLE` will not pick up changes to docstrings for Base or any standard library built into the system image. To see the list of standard libraries that are part of the system image, you can run the `contrib/print_sorted_stdlibs.jl` script (e.g., `julia contrib/print_sorted_stdlibs.jl --only-sysimg`).
diff --git a/doc/make.jl b/doc/make.jl
index 087b033fcf79c..ef9548c22a1ed 100644
--- a/doc/make.jl
+++ b/doc/make.jl
@@ -1,13 +1,41 @@
+# Get the buildroot and stdlibdir from the make environment to make sure we're
+# generating docs for the current julia source tree, regardless of what julia
+# executable we're using. If these arguments are not passed, fall back to
+# assuming that we're running a just-built version of julia and generating docs
+# in tree.
+let r = r"buildroot=(.+)", i = findfirst(x -> occursin(r, x), ARGS)
+    if i === nothing
+        global const buildrootdoc = @__DIR__
+        global const buildroot = abspath(joinpath(buildrootdoc, ".."))
+    else
+        global const buildroot = first(match(r, ARGS[i]).captures)
+        global const buildrootdoc = joinpath(buildroot, "doc")
+    end
+end
+
+let r = r"stdlibdir=(.+)", i = findfirst(x -> occursin(r, x), ARGS)
+    if i === nothing
+        global const STDLIB_DIR = Sys.STDLIB
+    else
+        global const STDLIB_DIR = first(match(r, ARGS[i]).captures)
+    end
+end
+
 # Install dependencies needed to build the documentation.
-Base.ACTIVE_PROJECT[] = nothing
-empty!(LOAD_PATH)
-push!(LOAD_PATH, @__DIR__, "@stdlib")
+documenter_project_dir = joinpath(@__DIR__, "..", "deps", "jlutilities", "documenter")
 empty!(DEPOT_PATH)
-pushfirst!(DEPOT_PATH, joinpath(@__DIR__, "deps"))
+push!(DEPOT_PATH, joinpath(buildroot, "deps", "jlutilities", "depot"))
+push!(DEPOT_PATH, abspath(Sys.BINDIR, Base.DATAROOTDIR, "julia"))
 using Pkg
+Pkg.activate(documenter_project_dir)
 Pkg.instantiate()
 
+if "deps" in ARGS
+    exit()
+end
+
 using Documenter
+import LibGit2
 
 baremodule GenStdLib end
 
@@ -18,9 +46,8 @@ cp_q(src, dest) = isfile(dest) || cp(src, dest)
 
 # make links for stdlib package docs, this is needed until #552 in Documenter.jl is finished
 const STDLIB_DOCS = []
-const STDLIB_DIR = Sys.STDLIB
 const EXT_STDLIB_DOCS = ["Pkg"]
-cd(joinpath(@__DIR__, "src")) do
+cd(joinpath(buildrootdoc, "src")) do
     Base.rm("stdlib"; recursive=true, force=true)
     mkdir("stdlib")
     for dir in readdir(STDLIB_DIR)
@@ -42,6 +69,76 @@ cd(joinpath(@__DIR__, "src")) do
     end
 end
 
+# Because we have standard libraries that are hosted outside of the julia repo,
+# but their docs are included in the manual, we need to populate the remotes argument
+# of makedocs(), to make sure that Documenter knows how to resolve the directories
+# in stdlib/ to the correct remote Git repositories (for source and edit links).
+#
+# This function parses the *.version files in stdlib/, returning a dictionary with
+# all the key-value pairs from those files. *_GIT_URL and *_SHA1 fields are the ones
+# we will actually be interested in.
+function parse_stdlib_version_file(path)
+    values = Dict{String,String}()
+    for line in readlines(path)
+        m = match(r"^([A-Z0-9_]+)\s+:?=\s+(\S+)$", line)
+        if isnothing(m)
+            @warn "Unable to parse line in $(path)" line
+        else
+            values[m[1]] = m[2]
+        end
+    end
+    return values
+end
+# This generates the value that will be passed to the `remotes` argument of makedocs(),
+# by looking through all *.version files in stdlib/.
+documenter_stdlib_remotes = let stdlib_dir = realpath(joinpath(@__DIR__, "..", "stdlib")),
+                                stdlib_build_dir = joinpath(buildrootdoc, "..", "stdlib")
+    # Get a list of all *.version files in stdlib/..
+    version_files = filter(readdir(stdlib_dir)) do fname
+        isfile(joinpath(stdlib_dir, fname)) && endswith(fname, ".version")
+    end
+    # .. and then parse them, each becoming an entry for makedocs's remotes.
+    # The values for each are of the form path => (remote, sha1), where
+    #  - path: the path to the stdlib package's root directory, i.e. "stdlib/$PACKAGE"
+    #  - remote: a Documenter.Remote object, pointing to the Git repository where package is hosted
+    #  - sha1: the SHA1 of the commit that is included with the current Julia version
+    remotes_list = map(version_files) do version_fname
+        package = match(r"(.+)\.version", version_fname)[1]
+        versionfile = parse_stdlib_version_file(joinpath(stdlib_dir, version_fname))
+        # From the (all uppercase) $(package)_GIT_URL and $(package)_SHA1 fields, we'll determine
+        # the necessary information. If this logic happens to fail for some reason for any of the
+        # standard libraries, we'll crash the documentation build, so that it could be fixed.
+        remote = let git_url_key = "$(uppercase(package))_GIT_URL"
+            haskey(versionfile, git_url_key) || error("Missing $(git_url_key) in $version_fname")
+            m = match(LibGit2.GITHUB_REGEX, versionfile[git_url_key])
+            isnothing(m) && error("Unable to parse $(git_url_key)='$(versionfile[git_url_key])' in $version_fname")
+            Documenter.Remotes.GitHub(m[2], m[3])
+        end
+        package_sha = let sha_key = "$(uppercase(package))_SHA1"
+            haskey(versionfile, sha_key) || error("Missing $(sha_key) in $version_fname")
+            versionfile[sha_key]
+        end
+        # Construct the absolute (local) path to the stdlib package's root directory
+        package_root_dir = joinpath(stdlib_build_dir, "$(package)-$(package_sha)")
+        # Documenter needs package_root_dir to exist --- it's just a sanity check it does on the remotes= keyword.
+        # In normal (local) builds, this will be the case, since the Makefiles will have unpacked the standard
+        # libraries. However, on CI we do this thing where we actually build docs in a clean worktree, just
+        # unpacking the `usr/` directory from the main build, and the unpacked stdlibs will be missing, and this
+        # will cause Documenter to throw an error. However, we don't _actually_ need the source files of the standard
+        # libraries to be present, so we just generate empty root directories to satisfy the check in Documenter.
+        isdir(package_root_dir) || mkpath(package_root_dir)
+        package_root_dir => (remote, package_sha)
+    end
+    Dict(
+        # We also add the root of the repository to `remotes`, because we do not always build the docs in a
+        # checked out JuliaLang/julia repository. In particular, when building Julia from tarballs, there is no
+        # Git information available. And also the way the BuildKite CI is configured to check out the code means
+        # that in some circumstances the Git repository information is incorrect / no available via Git.
+        dirname(@__DIR__) => (Documenter.Remotes.GitHub("JuliaLang", "julia"), Base.GIT_VERSION_INFO.commit),
+        remotes_list...
+    )
+end
+
 # Check if we are building a PDF
 const render_pdf = "pdf" in ARGS
 
@@ -52,7 +149,7 @@ function generate_markdown(basename)
     @assert length(splitted) == 2
     replaced_links = replace(splitted[1], r"\[\#([0-9]*?)\]" => s"[#\g<1>](https://github.com/JuliaLang/julia/issues/\g<1>)")
     write(
-        joinpath(@__DIR__, "src", "$basename.md"),
+        joinpath(buildrootdoc, "src", "$basename.md"),
         """
         ```@meta
         EditURL = "https://github.com/JuliaLang/julia/blob/master/$basename.md"
@@ -63,6 +160,7 @@ generate_markdown("NEWS")
 
 Manual = [
     "manual/getting-started.md",
+    "manual/installation.md",
     "manual/variables.md",
     "manual/integers-and-floating-point-numbers.md",
     "manual/mathematical-operations.md",
@@ -94,6 +192,7 @@ Manual = [
     "manual/code-loading.md",
     "manual/profile.md",
     "manual/stacktraces.md",
+    "manual/memory-management.md",
     "manual/performance-tips.md",
     "manual/workflow-tips.md",
     "manual/style-guide.md",
@@ -101,6 +200,7 @@ Manual = [
     "manual/noteworthy-differences.md",
     "manual/unicode-input.md",
     "manual/command-line-interface.md",
+    "manual/worldage.md",
 ]
 
 BaseDocs = [
@@ -112,6 +212,7 @@ BaseDocs = [
     "base/arrays.md",
     "base/parallel.md",
     "base/multi-threading.md",
+    "base/scopedvalues.md",
     "base/constants.md",
     "base/file.md",
     "base/io-network.md",
@@ -127,6 +228,15 @@ BaseDocs = [
 
 StdlibDocs = [stdlib.targetfile for stdlib in STDLIB_DOCS]
 
+# HACK: get nicer sorting here, even though we don't have the header
+# of the .md files at hand.
+sort!(StdlibDocs, by=function(x)
+    x = replace(x, "stdlib/" => "")
+    startswith(x, "Libdl") && return lowercase("Dynamic Linker")
+    startswith(x, "Test") && return lowercase("Unit Testing")
+    return lowercase(x)
+end)
+
 DevDocs = [
     "Documentation of Julia's Internals" => [
         "devdocs/init.md",
@@ -156,7 +266,10 @@ DevDocs = [
         "devdocs/aot.md",
         "devdocs/gc-sa.md",
         "devdocs/gc.md",
+        "devdocs/gc-mmtk.md",
         "devdocs/jit.md",
+        "devdocs/builtins.md",
+        "devdocs/precompile_hang.md",
     ],
     "Developing/debugging Julia's C code" => [
         "devdocs/backtraces.md",
@@ -173,7 +286,18 @@ DevDocs = [
         "devdocs/build/windows.md",
         "devdocs/build/freebsd.md",
         "devdocs/build/arm.md",
+        "devdocs/build/riscv.md",
         "devdocs/build/distributing.md",
+    ],
+    "Contributor's Guide" => [
+        "devdocs/contributing/code-changes.md",
+        "devdocs/contributing/tests.md",
+        "devdocs/contributing/documentation.md",
+        "devdocs/contributing/jldoctests.md",
+        "devdocs/contributing/patch-releases.md",
+        "devdocs/contributing/formatting.md",
+        "devdocs/contributing/git-workflow.md",
+        "devdocs/contributing/aiagents.md"
     ]
 ]
 
@@ -199,16 +323,11 @@ end
 
 const use_revise = "revise=true" in ARGS
 if use_revise
-    let revise_env = joinpath(@__DIR__, "deps", "revise")
-        Pkg.activate(revise_env)
-        Pkg.add("Revise"; preserve=Pkg.PRESERVE_NONE)
-        Base.ACTIVE_PROJECT[] = nothing
-        pushfirst!(LOAD_PATH, revise_env)
-    end
+    Pkg.activate(joinpath(@__DIR__, "..", "deps", "jlutilities", "revise"))
+    Pkg.instantiate()
 end
 function maybe_revise(ex)
     use_revise || return ex
-    STDLIB_DIR = Sys.STDLIB
     STDLIBS = filter!(x -> isfile(joinpath(STDLIB_DIR, x, "src", "$(x).jl")), readdir(STDLIB_DIR))
     return quote
         $ex
@@ -269,10 +388,6 @@ DocMeta.setdocmeta!(
     recursive=true, warn=false,
 )
 
-let r = r"buildroot=(.+)", i = findfirst(x -> occursin(r, x), ARGS)
-    global const buildroot = i === nothing ? (@__DIR__) : first(match(r, ARGS[i]).captures)
-end
-
 const format = if render_pdf
     Documenter.LaTeX(
         platform = "texplatform=docker" in ARGS ? "docker" : "native"
@@ -289,23 +404,27 @@ else
         collapselevel = 1,
         sidebar_sitename = false,
         ansicolor = true,
+        size_threshold = 800 * 2^10, # 800 KiB
+        size_threshold_warn = 200 * 2^10, # the manual has quite a few large pages, so we warn at 200+ KiB only
+        inventory_version = VERSION,
     )
 end
 
-const output_path = joinpath(buildroot, "doc", "_build", (render_pdf ? "pdf" : "html"), "en")
+const output_path = joinpath(buildrootdoc, "_build", (render_pdf ? "pdf" : "html"), "en")
 makedocs(
+    source    = joinpath(buildrootdoc, "src"),
     build     = output_path,
     modules   = [Main, Base, Core, [Base.root_module(Base, stdlib.stdlib) for stdlib in STDLIB_DOCS]...],
     clean     = true,
     doctest   = ("doctest=fix" in ARGS) ? (:fix) : ("doctest=only" in ARGS) ? (:only) : ("doctest=true" in ARGS) ? true : false,
     linkcheck = "linkcheck=true" in ARGS,
     linkcheck_ignore = ["https://bugs.kde.org/show_bug.cgi?id=136779"], # fails to load from nanosoldier?
-    strict    = true,
     checkdocs = :none,
     format    = format,
     sitename  = "The Julia Language",
     authors   = "The Julia Project",
     pages     = PAGES,
+    remotes   = documenter_stdlib_remotes,
 )
 
 # Update URLs to external stdlibs (JuliaLang/julia#43199)
@@ -369,6 +488,7 @@ const devurl = "v$(VERSION.major).$(VERSION.minor)-dev"
 
 # Hack to make rc docs visible in the version selector
 struct Versions versions end
+Documenter.determine_deploy_subfolder(deploy_decision, ::Versions) = deploy_decision.subfolder
 function Documenter.Writers.HTMLWriter.expand_versions(dir::String, v::Versions)
     # Find all available docs
     available_folders = readdir(dir)
@@ -393,7 +513,7 @@ if "deploy" in ARGS
     deploydocs(
         repo = "github.com/JuliaLang/docs.julialang.org.git",
         deploy_config = BuildBotConfig(),
-        target = joinpath(buildroot, "doc", "_build", "html", "en"),
+        target = joinpath(buildrootdoc, "_build", "html", "en"),
         dirname = "en",
         devurl = devurl,
         versions = Versions(["v#.#", devurl => devurl]),
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index fa9f641b1e76f..a1992ccafaafc 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -21,14 +21,14 @@
 .\" - diagnostics
 .\" - notes
 
-.TH JULIA 1 2022-02-17 JULIA
+.TH JULIA 1 2023-09-01 JULIA
 
 .\" from the front page of https://julialang.org/
 .SH NAME
 julia - a high-level, high-performance dynamic programming language for technical computing
 
 .SH SYNOPSIS
-\fBjulia\fR [OPTIONS...] \fB--\fR [PROGRAMMFILE] [ARGS...]
+\fBjulia\fR [OPTIONS...] \fB--\fR [PROGRAMFILE] [ARGS...]
 
 If a Julia source file is given as a \fIPROGRAMFILE\fP (optionally followed by
 arguments in \fIARGS\fP) Julia will execute the program and exit.
@@ -59,7 +59,7 @@ Display version information
 
 .TP
 -h, --help
-Print help message
+Print command-line options (this message)
 
 .TP
 --help-hidden
@@ -67,7 +67,7 @@ Print uncommon options not shown by `-h`
 
 .TP
 --project[=<dir>/@.]
-Set <dir> as the home project/environment. The default @. option will search
+Set <dir> as the active project/environment. The default @. option will search
 through parent directories until a Project.toml or JuliaProject.toml file is
 found.
 
@@ -77,7 +77,7 @@ Start up with the given system image file
 
 .TP
 -H, --home <dir>
-Set location of julia executable
+Set location of `julia` executable
 
 .TP
 --startup-file={yes*|no}
@@ -93,8 +93,16 @@ Enable or disable Julia's default signal handlers
 Use native code from system image if available
 
 .TP
---compiled-modules={yes*|no}
-Enable or disable incremental precompilation of modules
+--compiled-modules={yes*|no|existing|strict}
+Enable or disable incremental precompilation of modules.
+The `existing` option allows use of existing compiled modules that were
+previously precompiled, but disallows creation of new precompile files.
+The `strict` option is similar, but will error if no precompile file is found.
+
+.TP
+--pkgimages={yes*|no|existing}
+Enable or disable usage of native code caching in the form of pkgimages
+The `existing` option allows use of existing pkgimages but disallows creation of new ones
 
 .TP
 -e, --eval <expr>
@@ -104,24 +112,30 @@ Evaluate <expr>
 -E, --print <expr>
 Evaluate <expr> and display the result
 
+.TP
+-m, --module <Package> [args]
+Run entry point of `Package` (`@main` function) with `args'
+
 .TP
 -L, --load <file>
 Load <file> immediately on all processors
 
 .TP
--t, --threads <n>
-Enable n threads; "auto" tries to infer a useful default number
-of threads to use but the exact behavior might change in the future.
-Currently, "auto" uses the number of CPUs assigned to this julia
-process based on the OS-specific affinity assignment interface, if
-supported (Linux and Windows). If this is not supported (macOS) or
-process affinity is not configured, it uses the number of CPU
-threads.
+-t, --threads {auto|N[,auto|M]}
+Enable N[+M] threads; N threads are assigned to the `default`
+threadpool, and if M is specified, M threads are assigned to the
+`interactive` threadpool; `auto` tries to infer a useful
+default number of threads to use but the exact behavior might change
+in the future. Currently sets N to the number of CPUs assigned to
+this Julia process based on the OS-specific affinity assignment
+interface if supported (Linux and Windows) or to the number of CPU
+threads if not supported (MacOS) or if process affinity is not
+configured, and sets M to 1.
 
 .TP
---gcthreads <n>
-Enable n GC threads; If unspecified is set to half of the
-compute worker threads.
+--gcthreads=N[,M]
+Use N threads for the mark phase of GC and M (0 or 1) threads for the concurrent sweeping phase of GC.
+N is set to the number of compute threads and M is set to 0 if unspecified.
 
 .TP
 -p, --procs {N|auto}
@@ -133,7 +147,7 @@ as the number of local CPU threads (logical cores)
 Run processes on hosts listed in <file>
 
 .TP
--i
+-i, --interactive
 Interactive mode; REPL runs and `isinteractive()` is true
 
 .TP
@@ -141,7 +155,7 @@ Interactive mode; REPL runs and `isinteractive()` is true
 Quiet startup: no banner, suppress REPL warnings
 
 .TP
---banner={yes|no|auto*}
+--banner={yes|no|short|auto*}
 Enable or disable startup banner
 
 .TP
@@ -169,15 +183,15 @@ Enable or disable warning for ambiguous top-level scope
 Limit usage of CPU features up to <target>; set to `help` to see the available options
 
 .TP
--O, --optimize={0,1,2*,3}
+-O, --optimize={0|1|2*|3}
 Set the optimization level (level 3 if `-O` is used without a level)
 
 .TP
---min-optlevel={0*,1,2,3}
+--min-optlevel={0*|1|2|3}
 Set a lower bound on the optimization level
 
 .TP
--g {0,1*,2}
+-g, --debug-info={0|1*|2}
 Set the level of debug info generation (level 2 if `-g` is used without a level)
 
 .TP
@@ -189,8 +203,12 @@ Control whether inlining is permitted, including overriding @inline declarations
 Emit bounds checks always, never, or respect @inbounds declarations
 
 .TP
---math-mode={ieee|user}
-Disallow or enable unsafe floating point optimizations (overrides @fastmath declaration)
+--math-mode={ieee|user*}
+Always follow `ieee` floating point semantics or respect `@fastmath` declarations
+
+.TP
+--polly={yes*|no}
+Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)
 
 .TP
 --code-coverage[={none*|user|all}]
@@ -202,8 +220,8 @@ Count executions of source lines in a file or files under a given directory. A `
 be placed before the path to indicate this option. A `@` with no path will track the current directory.
 
 .TP
- --code-coverage=tracefile.info
- Append coverage information to the LCOV tracefile (filename supports format tokens)
+--code-coverage=tracefile.info
+Append coverage information to the LCOV tracefile (filename supports format tokens)
 
 .TP
 --track-allocation[={none*|user|all}]
@@ -211,8 +229,8 @@ Count bytes allocated by each source line (omitting setting is equivalent to `us
 
 .TP
 --track-allocation=@<path>
-Count bytes allocated by each source line in a file or files under a given directory. A `@`
-must be placed before the path to indicate this option. A `@` with no path will track the current directory.
+Count bytes but only in files that fall under the given file path/directory.
+The `@` prefix is required to select this option. A `@` with no path will track the current directory.
 
 .TP
 --bug-report=KIND
@@ -223,8 +241,9 @@ fallbacks to the latest compatible BugReporting.jl if not. For more information,
 
 .TP
 --heap-size-hint=<size>
-Forces garbage collection if memory usage is higher than that value. The memory hint might be
-specified in megabytes (500M) or gigabytes (1.5G)
+Forces garbage collection if memory usage is higher than the given value.
+The value may be specified as a number of bytes, optionally in units of
+KB, MB, GB, or TB, or as a percentage of physical memory with %.
 
 .TP
 --compile={yes*|no|all|min}
@@ -263,13 +282,37 @@ Generate an assembly file (.s)
 Generate an incremental output file (rather than complete)
 
 .TP
---trace-compile={stderr,name}
-Print precompile statements for methods compiled during execution or save to a path
+--trace-compile={stderr|name}
+Print precompile statements for methods compiled during execution or save to stderr or a path.
+Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported
+
+.TP
+--trace-compile-timing=
+If --trace-compile is enabled show how long each took to compile in ms
+
+.TP
+--trace-dispatch={stderr|name}
+Print precompile statements for methods dispatched during execution or save to stderr or a path.
+
+.TP
+--trace-eval[={no*|loc|full}]
+Show top-level expressions being evaluated. `loc` shows location info only,
+`full` shows full expressions (omitting setting is equivalent to `loc`).
+Only shows the outermost expression being evaluated, not internal function calls.
+Can be controlled programmatically via Base.TRACE_EVAL.
+
+.TP
+--task-metrics={yes|no*}
+Enable the collection of per-task metrics.
 
 .TP
 -image-codegen
 Force generate code in imaging mode
 
+.TP
+--permalloc-pkgimg={yes|no*}
+Copy the data section of package images into memory
+
 .SH FILES AND ENVIRONMENT
 See https://docs.julialang.org/en/v1/manual/environment-variables/
 
@@ -277,6 +320,15 @@ See https://docs.julialang.org/en/v1/manual/environment-variables/
 Please report any bugs using the GitHub issue tracker:
 https://github.com/julialang/julia/issues?state=open
 
-
 .SH AUTHORS
 Contributors: https://github.com/JuliaLang/julia/graphs/contributors
+
+.SH INTERNET RESOURCES
+Website:  https://julialang.org/
+.br
+Documentation:  https://docs.julialang.org/
+.br
+Install:  https://julialang.org/install/
+
+.SH LICENSING
+Julia is an open-source project. It is made available under the MIT license.
diff --git a/doc/src/assets/cover.tex b/doc/src/assets/cover.tex
index 67b77e520acd3..b959477913f59 100644
--- a/doc/src/assets/cover.tex
+++ b/doc/src/assets/cover.tex
@@ -15,7 +15,7 @@
 %% ---- reset page geometry for cover page
 \newgeometry{left=2cm,right=2cm,bottom=3cm}
 % ref: memman@v3.7q, P65, "4.1. Styling the titling"
-%   http://mirrors.ctan.org/macros/latex/contrib/memoir/memman.pdf
+%   https://mirrors.ctan.org/macros/latex/contrib/memoir/memman.pdf
 \begin{titlingpage}
     % set background image
     \BgThispage
diff --git a/doc/src/assets/custom.sty b/doc/src/assets/custom.sty
index 03e6ff805cd3f..ebc11f0414945 100644
--- a/doc/src/assets/custom.sty
+++ b/doc/src/assets/custom.sty
@@ -6,7 +6,7 @@
 \usepackage{geometry}
 % "some": use \BgThispage to change background
 % ref: background@v2.1,# 2.1 Options, "pages="
-%   http://mirrors.ctan.org/macros/latex/contrib/background/background.pdf
+%   https://mirrors.ctan.org/macros/latex/contrib/background/background.pdf
 \usepackage[pages=some]{background}
 
 %% Color definitions for Julia
@@ -27,7 +27,7 @@ contents={
 %% Place the background image `title-bg' in the right place via `tikz'.
 % tikz option "remember picture", "overlay"
 % ref: pgfmanual@3.1.9a, #17.13.1 Referencing a Node in a Different Picture\
-%   http://mirrors.ctan.org/graphics/pgf/base/doc/pgfmanual.pdf
+%   https://mirrors.ctan.org/graphics/pgf/base/doc/pgfmanual.pdf
 \begin{tikzpicture}[remember picture,overlay,draw=white]
   \draw [path picture={
     % ref: pgfmanual, 15.6, "Predefined node path picture bounding box"
diff --git a/doc/src/base/arrays.md b/doc/src/base/arrays.md
index 6585f98360585..f142f67532017 100644
--- a/doc/src/base/arrays.md
+++ b/doc/src/base/arrays.md
@@ -30,6 +30,11 @@ Base.StridedArray
 Base.StridedVector
 Base.StridedMatrix
 Base.StridedVecOrMat
+Base.GenericMemory
+Base.Memory
+Base.Memory(::UndefInitializer, ::Int)
+Base.memoryref
+Base.memoryindex
 Base.Slices
 Base.RowSlices
 Base.ColumnSlices
@@ -76,6 +81,7 @@ to operate on arrays, you should use `sin.(a)` to vectorize via `broadcast`.
 Base.broadcast
 Base.Broadcast.broadcast!
 Base.@__dot__
+Base.Broadcast.BroadcastFunction
 ```
 
 For specializing broadcast on custom types, see
@@ -95,6 +101,8 @@ Base.Broadcast.result_style
 ```@docs
 Base.getindex(::AbstractArray, ::Any...)
 Base.setindex!(::AbstractArray, ::Any, ::Any...)
+Base.nextind
+Base.prevind
 Base.copyto!(::AbstractArray, ::CartesianIndices, ::AbstractArray, ::CartesianIndices)
 Base.copy!
 Base.isassigned
@@ -109,6 +117,12 @@ Base.checkindex
 Base.elsize
 ```
 
+While most code can be written in an index-agnostic manner (see, e.g., [`eachindex`](@ref)), it can sometimes be useful to explicitly check for offset axes:
+```@docs
+Base.require_one_based_indexing
+Base.has_offset_axes
+```
+
 ## Views (SubArrays and other view types)
 
 A “view” is a data structure that acts like an array (it is a subtype of `AbstractArray`), but the underlying data is actually
@@ -120,7 +134,7 @@ accessing the first 10 elements of `x`. Writing to a view, e.g. `v[3] = 2`, writ
 
 Slicing operations like `x[1:10]` create a copy by default in Julia. `@view x[1:10]` changes it to make a view. The
 `@views` macro can be used on a whole block of code (e.g. `@views function foo() .... end` or `@views begin ... end`)
-to change all the slicing operations in that block to use views.  Sometimes making a copy of the data is faster and
+to change all the slicing operations in that block to use views. Sometimes making a copy of the data is faster and
 sometimes using a view is faster, as described in the [performance tips](@ref man-performance-views).
 
 ```@docs
@@ -132,6 +146,7 @@ Base.parentindices
 Base.selectdim
 Base.reinterpret
 Base.reshape
+Base.insertdims
 Base.dropdims
 Base.vec
 Base.SubArray
diff --git a/doc/src/base/base.md b/doc/src/base/base.md
index aa5d23281dd6b..a59689f4dc4de 100644
--- a/doc/src/base/base.md
+++ b/doc/src/base/base.md
@@ -3,8 +3,8 @@
 ## Introduction
 
 Julia Base contains a range of functions and macros appropriate for performing
-scientific and numerical computing, but is also as broad as those of many general purpose programming
-languages.  Additional functionality is available from a growing collection of
+scientific and numerical computing, but is also as broad as those of many general-purpose programming
+languages. Additional functionality is available from a growing collection of
 [available packages](https://julialang.org/packages/).
 Functions are grouped by topic below.
 
@@ -30,10 +30,13 @@ Base.isinteractive
 Base.summarysize
 Base.__precompile__
 Base.include
-Base.MainInclude.include
+Main.include
 Base.include_string
 Base.include_dependency
 __init__
+Base.OncePerProcess
+Base.OncePerTask
+Base.OncePerThread
 Base.which(::Any, ::Any)
 Base.methods
 Base.@show
@@ -41,6 +44,7 @@ ans
 err
 Base.active_project
 Base.set_active_project
+Base.active_manifest
 ```
 
 ## [Keywords](@id Keywords)
@@ -60,6 +64,7 @@ However, you can create variables with names:
 Finally:
 `where` is parsed as an infix operator for writing parametric method and type definitions;
 `in` and `isa` are parsed as infix operators;
+`public` is parsed as a keyword when beginning a toplevel statement;
 `outer` is parsed as a keyword when used to modify the scope of a variable in an iteration specification of a `for` loop;
 and `as` is used as a keyword to rename an identifier brought into scope by `import` or `using`.
 Creation of variables named `where`, `in`, `isa`, `outer` and `as` is allowed, though.
@@ -67,6 +72,7 @@ Creation of variables named `where`, `in`, `isa`, `outer` and `as` is allowed, t
 ```@docs
 module
 export
+public
 import
 using
 as
@@ -100,16 +106,22 @@ where
 ;
 =
 ?:
+.=
+.
+->
+Base.:(:)
+::
+[]
 ```
 
-## Standard Modules
+## [Standard Modules](@id standard-modules)
 ```@docs
 Main
 Core
 Base
 ```
 
-## Base Submodules
+## [Base Submodules](@id base-submodules)
 ```@docs
 Base.Broadcast
 Base.Docs
@@ -129,6 +141,8 @@ Core.:(===)
 Core.isa
 Base.isequal
 Base.isless
+Base.ispositive
+Base.isnegative
 Base.isunordered
 Base.ifelse
 Core.typeassert
@@ -146,6 +160,7 @@ Base.setproperty!
 Base.replaceproperty!
 Base.swapproperty!
 Base.modifyproperty!
+Base.setpropertyonce!
 Base.propertynames
 Base.hasproperty
 Core.getfield
@@ -153,9 +168,9 @@ Core.setfield!
 Core.modifyfield!
 Core.replacefield!
 Core.swapfield!
+Core.setfieldonce!
 Core.isdefined
-Core.getglobal
-Core.setglobal!
+Core.isdefinedglobal
 Base.@isdefined
 Base.convert
 Base.promote
@@ -180,6 +195,7 @@ Base.typeintersect
 Base.promote_type
 Base.promote_rule
 Base.promote_typejoin
+Base.iskindtype
 Base.isdispatchtuple
 ```
 
@@ -196,6 +212,7 @@ Base.isstructtype
 Base.nameof(::DataType)
 Base.fieldnames
 Base.fieldname
+Base.fieldindex
 Core.fieldtype
 Base.fieldtypes
 Base.fieldcount
@@ -237,6 +254,7 @@ Base.instances
 Core.Any
 Core.Union
 Union{}
+Core.TypeofBottom
 Core.UnionAll
 Core.Tuple
 Core.NTuple
@@ -275,15 +293,17 @@ Base.:(|>)
 Base.:(∘)
 Base.ComposedFunction
 Base.splat
+Base.Fix
 Base.Fix1
 Base.Fix2
+Returns
 ```
 
 ## Syntax
 
 ```@docs
 Core.eval
-Base.MainInclude.eval
+Main.eval
 Base.@eval
 Base.evalfile
 Base.esc
@@ -305,7 +325,12 @@ Base.@simd
 Base.@polly
 Base.@generated
 Base.@assume_effects
+```
+
+## Managing deprecations
+```@docs
 Base.@deprecate
+Base.depwarn
 ```
 
 ## Missing Values
@@ -336,6 +361,12 @@ Base.Cmd
 Base.setenv
 Base.addenv
 Base.withenv
+Base.shell_escape
+Base.shell_split
+Base.shell_escape_posixly
+Base.shell_escape_csh
+Base.shell_escape_wincmd
+Base.escape_microsoft_c_args
 Base.setcpuaffinity
 Base.pipeline(::Any, ::Any, ::Any, ::Any...)
 Base.pipeline(::Base.AbstractCmd)
@@ -350,6 +381,8 @@ Base.@timed
 Base.@elapsed
 Base.@allocated
 Base.@allocations
+Base.@lock_conflicts
+Base.TRACE_EVAL
 Base.EnvDict
 Base.ENV
 Base.Sys.STDLIB
@@ -368,9 +401,14 @@ Base.Sys.total_memory
 Base.Sys.free_physical_memory
 Base.Sys.total_physical_memory
 Base.Sys.uptime
+Base.Sys.sysimage_target
 Base.Sys.isjsvm
 Base.Sys.loadavg
 Base.Sys.isexecutable
+Base.Sys.isreadable
+Base.Sys.iswritable
+Base.Sys.which
+Base.Sys.username
 Base.@static
 ```
 
@@ -402,6 +440,7 @@ Core.DivideError
 Core.DomainError
 Base.EOFError
 Core.ErrorException
+Core.FieldError
 Core.InexactError
 Core.InterruptException
 Base.KeyError
@@ -446,15 +485,35 @@ Base.moduleroot
 __module__
 __source__
 Base.@__MODULE__
+Base.@__FUNCTION__
 Base.@__FILE__
 Base.@__DIR__
 Base.@__LINE__
 Base.fullname
 Base.names
+Base.isexported
+Base.ispublic
 Base.nameof(::Function)
 Base.functionloc(::Any, ::Any)
 Base.functionloc(::Method)
 Base.@locals
+Core.getglobal
+Core.setglobal!
+Core.modifyglobal!
+Core.swapglobal!
+Core.setglobalonce!
+Core.replaceglobal!
+Core.declare_const
+```
+
+## Documentation
+(See also the [documentation](@ref man-documentation) chapter.)
+```@docs
+Base.@doc
+Docs.HTML
+Docs.Text
+Docs.hasdoc
+Docs.undocumented_names
 ```
 
 ## Code loading
@@ -465,6 +524,7 @@ Base.locate_package
 Base.require
 Base.compilecache
 Base.isprecompiled
+Base.get_extension
 ```
 
 ## Internals
@@ -475,6 +535,7 @@ Base.GC.enable
 Base.GC.@preserve
 Base.GC.safepoint
 Base.GC.enable_logging
+Base.GC.logging_enabled
 Meta.lower
 Meta.@lower
 Meta.parse(::AbstractString, ::Int)
@@ -482,6 +543,7 @@ Meta.parse(::AbstractString)
 Meta.ParseError
 Core.QuoteNode
 Base.macroexpand
+Base.macroexpand!
 Base.@macroexpand
 Base.@macroexpand1
 Base.code_lowered
diff --git a/doc/src/base/c.md b/doc/src/base/c.md
index e221a6432542f..bf7e2577029fe 100644
--- a/doc/src/base/c.md
+++ b/doc/src/base/c.md
@@ -14,7 +14,7 @@ Base.unsafe_modify!
 Base.unsafe_replace!
 Base.unsafe_swap!
 Base.unsafe_copyto!{T}(::Ptr{T}, ::Ptr{T}, ::Any)
-Base.unsafe_copyto!{T}(::Array{T}, ::Any, ::Array{T}, ::Any, ::Any)
+Base.unsafe_copyto!(::Array, ::Any, ::Array, ::Any, ::Any)
 Base.copyto!
 Base.pointer
 Base.unsafe_wrap{T,N}(::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}}, ::Ptr{T}, ::NTuple{N,Int})
diff --git a/doc/src/base/collections.md b/doc/src/base/collections.md
index 96f540086d021..55cf1ba5dd30d 100644
--- a/doc/src/base/collections.md
+++ b/doc/src/base/collections.md
@@ -36,15 +36,15 @@ Fully implemented by:
 
   * [`AbstractRange`](@ref)
   * [`UnitRange`](@ref)
-  * `Tuple`
-  * `Number`
+  * [`Tuple`](@ref)
+  * [`Number`](@ref)
   * [`AbstractArray`](@ref)
   * [`BitSet`](@ref)
   * [`IdDict`](@ref)
   * [`Dict`](@ref)
   * [`WeakKeyDict`](@ref)
   * `EachLine`
-  * `AbstractString`
+  * [`AbstractString`](@ref)
   * [`Set`](@ref)
   * [`Pair`](@ref)
   * [`NamedTuple`](@ref)
@@ -64,6 +64,7 @@ Base.LinRange
 
 ```@docs
 Base.isempty
+Base.isdone
 Base.empty!
 Base.length
 Base.checked_length
@@ -73,14 +74,14 @@ Fully implemented by:
 
   * [`AbstractRange`](@ref)
   * [`UnitRange`](@ref)
-  * `Tuple`
-  * `Number`
+  * [`Tuple`](@ref)
+  * [`Number`](@ref)
   * [`AbstractArray`](@ref)
   * [`BitSet`](@ref)
   * [`IdDict`](@ref)
   * [`Dict`](@ref)
   * [`WeakKeyDict`](@ref)
-  * `AbstractString`
+  * [`AbstractString`](@ref)
   * [`Set`](@ref)
   * [`NamedTuple`](@ref)
 
@@ -89,6 +90,7 @@ Fully implemented by:
 ```@docs
 Base.in
 Base.:∉
+Base.hasfastin
 Base.eltype
 Base.indexin
 Base.unique
@@ -164,8 +166,8 @@ Partially implemented by:
 
   * [`AbstractRange`](@ref)
   * [`UnitRange`](@ref)
-  * `Tuple`
-  * `AbstractString`
+  * [`Tuple`](@ref)
+  * [`AbstractString`](@ref)
   * [`Dict`](@ref)
   * [`IdDict`](@ref)
   * [`WeakKeyDict`](@ref)
@@ -193,7 +195,7 @@ Dictionaries may also be created with generators. For example, `Dict(i => f(i) f
 
 Given a dictionary `D`, the syntax `D[x]` returns the value of key `x` (if it exists) or throws
 an error, and `D[x] = y` stores the key-value pair `x => y` in `D` (replacing any existing value
-for the key `x`).  Multiple arguments to `D[...]` are converted to tuples; for example, the syntax
+for the key `x`). Multiple arguments to `D[...]` are converted to tuples; for example, the syntax
 `D[x,y]`  is equivalent to `D[(x,y)]`, i.e. it refers to the value keyed by the tuple `(x,y)`.
 
 ```@docs
@@ -202,6 +204,7 @@ Base.Dict
 Base.IdDict
 Base.WeakKeyDict
 Base.ImmutableDict
+Base.PersistentDict
 Base.haskey
 Base.get
 Base.get!
@@ -222,18 +225,20 @@ Base.valtype
 
 Fully implemented by:
 
-  * [`IdDict`](@ref)
   * [`Dict`](@ref)
+  * [`IdDict`](@ref)
   * [`WeakKeyDict`](@ref)
 
 Partially implemented by:
 
-  * [`BitSet`](@ref)
   * [`Set`](@ref)
+  * [`BitSet`](@ref)
+  * [`IdSet`](@ref)
   * [`EnvDict`](@ref Base.EnvDict)
   * [`Array`](@ref)
   * [`BitArray`](@ref)
   * [`ImmutableDict`](@ref Base.ImmutableDict)
+  * [`PersistentDict`](@ref Base.PersistentDict)
   * [`Iterators.Pairs`](@ref)
 
 ## Set-Like Collections
@@ -242,6 +247,7 @@ Partially implemented by:
 Base.AbstractSet
 Base.Set
 Base.BitSet
+Base.IdSet
 Base.union
 Base.union!
 Base.intersect
@@ -251,6 +257,7 @@ Base.symdiff
 Base.symdiff!
 Base.intersect!
 Base.issubset
+Base.in!
 Base.:⊈
 Base.:⊊
 Base.issetequal
@@ -259,14 +266,16 @@ Base.isdisjoint
 
 Fully implemented by:
 
-  * [`BitSet`](@ref)
   * [`Set`](@ref)
+  * [`BitSet`](@ref)
+  * [`IdSet`](@ref)
+
 
 Partially implemented by:
 
   * [`Array`](@ref)
 
-## Dequeues
+## Deques
 
 ```@docs
 Base.push!
diff --git a/doc/src/base/constants.md b/doc/src/base/constants.md
index 14ddbc02698d0..bd7cc1339a21e 100644
--- a/doc/src/base/constants.md
+++ b/doc/src/base/constants.md
@@ -10,6 +10,7 @@ Base.DEPOT_PATH
 Base.LOAD_PATH
 Base.Sys.BINDIR
 Base.Sys.CPU_THREADS
+Base.Sys.EFFECTIVE_CPU_THREADS
 Base.Sys.WORD_SIZE
 Base.Sys.KERNEL
 Base.Sys.ARCH
diff --git a/doc/src/base/file.md b/doc/src/base/file.md
index 9a9dc5d8a72f8..300738a39322d 100644
--- a/doc/src/base/file.md
+++ b/doc/src/base/file.md
@@ -1,6 +1,8 @@
 # Filesystem
 
 ```@docs
+Base.read(::String)
+Base.write(::String, ::Any)
 Base.Filesystem.pwd
 Base.Filesystem.cd(::AbstractString)
 Base.Filesystem.cd(::Function)
@@ -27,6 +29,7 @@ Base.Filesystem.operm
 Base.Filesystem.cp
 Base.download
 Base.Filesystem.mv
+Base.Filesystem.rename
 Base.Filesystem.rm
 Base.Filesystem.touch
 Base.Filesystem.tempname
diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md
index 68f144427a892..d82055e5b0921 100644
--- a/doc/src/base/io-network.md
+++ b/doc/src/base/io-network.md
@@ -3,13 +3,18 @@
 ## General I/O
 
 ```@docs
+IO
 Base.stdout
 Base.stderr
 Base.stdin
+Base.read(::AbstractString)
+Base.write(::AbstractString, ::Any)
 Base.open
 Base.IOStream
 Base.IOBuffer
 Base.take!(::Base.GenericIOBuffer)
+Base.Pipe
+Base.link_pipe!
 Base.fdio
 Base.flush
 Base.close
@@ -35,6 +40,7 @@ Base.eof
 Base.isreadonly
 Base.iswritable
 Base.isreadable
+Base.isexecutable
 Base.isopen
 Base.fd
 Base.redirect_stdio
@@ -110,17 +116,17 @@ PNG images in a window can register this capability with Julia, so that calling
 types with PNG representations will automatically display the image using the module's window.
 
 In order to define a new display backend, one should first create a subtype `D` of the abstract
-class [`AbstractDisplay`](@ref).  Then, for each MIME type (`mime` string) that can be displayed on `D`, one should
+class [`AbstractDisplay`](@ref). Then, for each MIME type (`mime` string) that can be displayed on `D`, one should
 define a function `display(d::D, ::MIME"mime", x) = ...` that displays `x` as that MIME type,
 usually by calling [`show(io, mime, x)`](@ref) or [`repr(io, mime, x)`](@ref).
 A [`MethodError`](@ref) should be thrown if `x` cannot be displayed
 as that MIME type; this is automatic if one calls `show` or `repr`. Finally, one should define a function
 `display(d::D, x)` that queries [`showable(mime, x)`](@ref) for the `mime` types supported by `D`
 and displays the "best" one; a `MethodError` should be thrown if no supported MIME types are found
-for `x`.  Similarly, some subtypes may wish to override [`redisplay(d::D, ...)`](@ref Base.Multimedia.redisplay). (Again, one should
+for `x`. Similarly, some subtypes may wish to override [`redisplay(d::D, ...)`](@ref Base.Multimedia.redisplay). (Again, one should
 `import Base.display` to add new methods to `display`.) The return values of these functions are
 up to the implementation (since in some cases it may be useful to return a display "handle" of
-some type).  The display functions for `D` can then be called directly, but they can also be invoked
+some type). The display functions for `D` can then be called directly, but they can also be invoked
 automatically from [`display(x)`](@ref) simply by pushing a new display onto the display-backend stack
 with:
 
diff --git a/doc/src/base/iterators.md b/doc/src/base/iterators.md
index 1c4831e52bc14..042c20f55a21d 100644
--- a/doc/src/base/iterators.md
+++ b/doc/src/base/iterators.md
@@ -10,6 +10,7 @@ Base.Iterators.take
 Base.Iterators.takewhile
 Base.Iterators.drop
 Base.Iterators.dropwhile
+Base.Iterators.findeach
 Base.Iterators.cycle
 Base.Iterators.repeated
 Base.Iterators.product
diff --git a/doc/src/base/libc.md b/doc/src/base/libc.md
index 08d2670123234..b598baaa16bab 100644
--- a/doc/src/base/libc.md
+++ b/doc/src/base/libc.md
@@ -17,6 +17,9 @@ Base.Libc.time(::Base.Libc.TmStruct)
 Base.Libc.strftime
 Base.Libc.strptime
 Base.Libc.TmStruct
+Base.Libc.FILE
+Base.Libc.dup
 Base.Libc.flush_cstdio
 Base.Libc.systemsleep
+Base.Libc.mkfifo
 ```
diff --git a/doc/src/base/math.md b/doc/src/base/math.md
index 62368424629c6..6ac6f36559130 100644
--- a/doc/src/base/math.md
+++ b/doc/src/base/math.md
@@ -14,10 +14,12 @@ Base.fma
 Base.muladd
 Base.inv(::Number)
 Base.div
+Base.div(::Any, ::Any, ::RoundingMode)
 Base.fld
 Base.cld
 Base.mod
 Base.rem
+Base.rem(::Any, ::Any, ::RoundingMode)
 Base.rem2pi
 Base.Math.mod2pi
 Base.divrem
@@ -33,10 +35,13 @@ Base.:(<<)
 Base.:(>>)
 Base.:(>>>)
 Base.bitrotate
-Base.:(:)
+Base.:(:)(::Any, ::Any, ::Any)
+Base.:(:)(::CartesianIndex, ::CartesianIndex, ::CartesianIndex)
 Base.range
 Base.OneTo
 Base.StepRangeLen
+Base.logrange
+Base.LogRange
 Base.:(==)
 Base.:(!=)
 Base.:(!==)
@@ -70,6 +75,7 @@ Base.Math.tand
 Base.Math.sincosd
 Base.Math.sinpi
 Base.Math.cospi
+Base.Math.tanpi
 Base.Math.sincospi
 Base.sinh(::Number)
 Base.cosh(::Number)
@@ -118,7 +124,7 @@ Base.exp10
 Base.Math.ldexp
 Base.Math.modf
 Base.expm1
-Base.round(::Type, ::Any)
+Base.round
 Base.Rounding.RoundingMode
 Base.Rounding.RoundNearest
 Base.Rounding.RoundNearestTiesAway
@@ -138,6 +144,7 @@ Base.minmax
 Base.Math.clamp
 Base.Math.clamp!
 Base.abs
+Base.Checked
 Base.Checked.checked_abs
 Base.Checked.checked_neg
 Base.Checked.checked_add
@@ -148,6 +155,7 @@ Base.Checked.checked_rem
 Base.Checked.checked_fld
 Base.Checked.checked_mod
 Base.Checked.checked_cld
+Base.Checked.checked_pow
 Base.Checked.add_with_overflow
 Base.Checked.sub_with_overflow
 Base.Checked.mul_with_overflow
@@ -158,7 +166,8 @@ Base.signbit
 Base.flipsign
 Base.sqrt(::Number)
 Base.isqrt
-Base.Math.cbrt
+Base.Math.cbrt(::AbstractFloat)
+Base.fourthroot(::Number)
 Base.real
 Base.imag
 Base.reim
@@ -179,6 +188,7 @@ Base.invmod
 Base.powermod
 Base.ndigits
 Base.add_sum
+Base.uabs
 Base.widemul
 Base.Math.evalpoly
 Base.Math.@evalpoly
diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md
index 45a60b14d541a..88dc2b7514a2a 100644
--- a/doc/src/base/multi-threading.md
+++ b/doc/src/base/multi-threading.md
@@ -25,19 +25,13 @@ atomic
 Base.@atomic
 Base.@atomicswap
 Base.@atomicreplace
+Base.@atomiconce
+Base.AtomicMemory
 ```
 
-!!! note
-
-    The following APIs are fairly primitive, and will likely be exposed through an `unsafe_*`-like wrapper.
-
-```
-Core.Intrinsics.atomic_pointerref(pointer::Ptr{T}, order::Symbol) --> T
-Core.Intrinsics.atomic_pointerset(pointer::Ptr{T}, new::T, order::Symbol) --> pointer
-Core.Intrinsics.atomic_pointerswap(pointer::Ptr{T}, new::T, order::Symbol) --> old
-Core.Intrinsics.atomic_pointermodify(pointer::Ptr{T}, function::(old::T,arg::S)->T, arg::S, order::Symbol) --> old
-Core.Intrinsics.atomic_pointerreplace(pointer::Ptr{T}, expected::Any, new::T, success_order::Symbol, failure_order::Symbol) --> (old, cmp)
-```
+There are also optional memory ordering parameters for the `unsafe` set of functions, that
+select the C/C++-compatible versions of these atomic operations, if that parameter is specified to
+[`unsafe_load`](@ref), [`unsafe_store!`](@ref), [`unsafe_swap!`](@ref), [`unsafe_replace!`](@ref), and [`unsafe_modify!`](@ref).
 
 !!! warning
 
@@ -69,5 +63,15 @@ Base.@threadcall
 These building blocks are used to create the regular synchronization objects.
 
 ```@docs
+Base.Threads.AbstractSpinLock
 Base.Threads.SpinLock
+Base.Threads.PaddedSpinLock
+```
+
+## Task metrics (Experimental)
+
+```@docs
+Base.Experimental.task_metrics
+Base.Experimental.task_running_time_ns
+Base.Experimental.task_wall_time_ns
 ```
diff --git a/doc/src/base/numbers.md b/doc/src/base/numbers.md
index 8167650ac17d1..0bd9d2d4c57d0 100644
--- a/doc/src/base/numbers.md
+++ b/doc/src/base/numbers.md
@@ -63,6 +63,8 @@ Core.Int64
 Core.UInt64
 Core.Int128
 Core.UInt128
+Base.Int
+Base.UInt
 Base.BigInt
 Base.Complex
 Base.Rational
@@ -146,7 +148,7 @@ Base.@uint128_str
 
 ## [BigFloats and BigInts](@id BigFloats-and-BigInts)
 
-The [`BigFloat`](@ref) and [`BigInt`](@ref) types implements
+The [`BigFloat`](@ref) and [`BigInt`](@ref) types implement
 arbitrary-precision floating point and integer arithmetic, respectively. For
 [`BigFloat`](@ref) the [GNU MPFR library](https://www.mpfr.org/) is used,
 and for [`BigInt`](@ref) the [GNU Multiple Precision Arithmetic Library (GMP)]
diff --git a/doc/src/base/parallel.md b/doc/src/base/parallel.md
index c9f24429fd0e5..e382e8edc56ee 100644
--- a/doc/src/base/parallel.md
+++ b/doc/src/base/parallel.md
@@ -13,6 +13,7 @@ Base.istaskfailed
 Base.task_local_storage(::Any)
 Base.task_local_storage(::Any, ::Any)
 Base.task_local_storage(::Function, ::Any, ::Any)
+Core.ConcurrencyViolationError
 ```
 
 ## Scheduling
@@ -30,6 +31,8 @@ Base.schedule
 Base.errormonitor
 Base.@sync
 Base.wait
+Base.waitany
+Base.waitall
 Base.fetch(t::Task)
 Base.fetch(x::Any)
 Base.timedwait
@@ -50,6 +53,8 @@ Base.unlock
 Base.trylock
 Base.islocked
 Base.ReentrantLock
+Base.@lock
+Base.Lockable
 ```
 
 ## Channels
@@ -60,7 +65,9 @@ Base.Channel
 Base.Channel(::Function)
 Base.put!(::Channel, ::Any)
 Base.take!(::Channel)
+Base.isfull(::Channel)
 Base.isready(::Channel)
+Base.isopen(::Channel)
 Base.fetch(::Channel)
 Base.close(::Channel)
 Base.bind(c::Channel, task::Task)
@@ -69,11 +76,11 @@ Base.bind(c::Channel, task::Task)
 ## [Low-level synchronization using `schedule` and `wait`](@id low-level-schedule-wait)
 
 The easiest correct use of [`schedule`](@ref) is on a `Task` that is not started (scheduled)
-yet.  However, it is possible to use [`schedule`](@ref) and [`wait`](@ref) as a very
-low-level building block for constructing synchronization interfaces.  A crucial
+yet. However, it is possible to use [`schedule`](@ref) and [`wait`](@ref) as a very
+low-level building block for constructing synchronization interfaces. A crucial
 pre-condition of calling `schedule(task)` is that the caller must "own" the `task`; i.e., it
 must know that the call to `wait` in the given `task` is happening at the locations known to
-the code calling `schedule(task)`.  One strategy for ensuring such pre-condition is to use
+the code calling `schedule(task)`. One strategy for ensuring such pre-condition is to use
 atomics, as demonstrated in the following example:
 
 ```jldoctest
@@ -118,8 +125,8 @@ function Base.wait(ev::OneWayEvent)
     state, ok = @atomicreplace(ev.state, OWE_EMPTY => OWE_WAITING)
     if ok
         # OWE_EMPTY -> OWE_WAITING transition means that the notifier task is guaranteed to
-        # invoke OWE_WAITING -> OWE_NOTIFYING transition.  The waiter task must call
-        # `wait()` immediately.  In particular, it MUST NOT invoke any function that may
+        # invoke OWE_WAITING -> OWE_NOTIFYING transition. The waiter task must call
+        # `wait()` immediately. In particular, it MUST NOT invoke any function that may
         # yield to the scheduler at this point in code.
         wait()
     else
@@ -132,7 +139,7 @@ end
 
 ev = OneWayEvent()
 @sync begin
-    @async begin
+    Threads.@spawn begin
         wait(ev)
         println("done")
     end
@@ -145,12 +152,12 @@ notifying...
 done
 ```
 
-`OneWayEvent` lets one task to `wait` for another task's `notify`.  It is a limited
+`OneWayEvent` lets one task to `wait` for another task's `notify`. It is a limited
 communication interface since `wait` can only be used once from a single task (note the
 non-atomic assignment of `ev.task`)
 
 In this example, `notify(ev::OneWayEvent)` is allowed to call `schedule(ev.task)` if and
-only if *it* modifies the state from `OWE_WAITING` to `OWE_NOTIFYING`.  This lets us know that
+only if *it* modifies the state from `OWE_WAITING` to `OWE_NOTIFYING`. This lets us know that
 the task executing `wait(ev::OneWayEvent)` is now in the `ok` branch and that there cannot be
-other tasks that tries to `schedule(ev.task)` since their
+other tasks that try to `schedule(ev.task)` since their
 `@atomicreplace(ev.state, state => OWE_NOTIFYING)` will fail.
diff --git a/doc/src/base/punctuation.md b/doc/src/base/punctuation.md
index dbea97e4e3cb5..8956cebd53971 100644
--- a/doc/src/base/punctuation.md
+++ b/doc/src/base/punctuation.md
@@ -22,6 +22,7 @@ Extended documentation for mathematical symbols & functions is [here](@ref math-
 | `'`         | a trailing apostrophe is the [`adjoint`](@ref) (that is, the complex transpose) operator Aᴴ |
 | [`*`](@ref) | the asterisk is used for multiplication, including matrix multiplication and [string concatenation](@ref man-concatenation) |
 | [`/`](@ref) | forward slash divides the argument on its left by the one on its right                      |
+| [`//`](@ref) | double forward slash performs exact, rational division                                     |
 | [`\`](@ref) | backslash operator divides the argument on its right by the one on its left, commonly used to solve matrix equations |
 | `()`        | parentheses with no arguments constructs an empty [`Tuple`](@ref)                           |
 | `(a,...)`   | parentheses with comma-separated arguments constructs a tuple containing its arguments      |
diff --git a/doc/src/base/reflection.md b/doc/src/base/reflection.md
index e9da82475fd68..fecc40e0b626d 100644
--- a/doc/src/base/reflection.md
+++ b/doc/src/base/reflection.md
@@ -4,9 +4,9 @@ Julia provides a variety of runtime reflection capabilities.
 
 ## Module bindings
 
-The exported names for a `Module` are available using [`names(m::Module)`](@ref), which will return
-an array of [`Symbol`](@ref) elements representing the exported bindings. `names(m::Module, all = true)`
-returns symbols for all bindings in `m`, regardless of export status.
+The public names for a `Module` are available using [`names(m::Module)`](@ref), which will return
+an array of [`Symbol`](@ref) elements representing the public bindings. `names(m::Module, all = true)`
+returns symbols for all bindings in `m`, regardless of public status.
 
 ## DataType fields
 
@@ -51,9 +51,10 @@ The *direct* subtypes of any `DataType` may be listed using [`subtypes`](@ref).
 the abstract `DataType` [`AbstractFloat`](@ref) has four (concrete) subtypes:
 
 ```jldoctest; setup = :(using InteractiveUtils)
-julia> subtypes(AbstractFloat)
-4-element Vector{Any}:
+julia> InteractiveUtils.subtypes(AbstractFloat)
+5-element Vector{Any}:
  BigFloat
+ Core.BFloat16
  Float16
  Float32
  Float64
@@ -62,6 +63,9 @@ julia> subtypes(AbstractFloat)
 Any abstract subtype will also be included in this list, but further subtypes thereof will not;
 recursive application of [`subtypes`](@ref) may be used to inspect the full type tree.
 
+Note that [`subtypes`](@ref) is located inside [`InteractiveUtils`](@ref man-interactive-utils) but
+is automatically exported when using the REPL.
+
 ## DataType layout
 
 The internal representation of a `DataType` is critically important when interfacing with C code
@@ -81,9 +85,9 @@ the unquoted and interpolated expression ([`Expr`](@ref)) form for a given macro
 `quote` the expression block itself (otherwise, the macro will be evaluated and the result will
 be passed instead!). For example:
 
-```jldoctest; setup = :(using InteractiveUtils)
-julia> macroexpand(@__MODULE__, :(@edit println("")) )
-:(InteractiveUtils.edit(println, (Base.typesof)("")))
+```jldoctest
+julia> macroexpand(@__MODULE__, :(@invoke identity(1::Int)))
+:(Core.invoke(identity, Base.Tuple{Int}, 1))
 ```
 
 The functions `Base.Meta.show_sexpr` and [`dump`](@ref) are used to display S-expr style views
@@ -93,14 +97,15 @@ Finally, the [`Meta.lower`](@ref) function gives the `lowered` form of any expre
 particular interest for understanding how language constructs map to primitive operations such
 as assignments, branches, and calls:
 
-```jldoctest
+```jldoctest; setup = (using Base: +, sin)
 julia> Meta.lower(@__MODULE__, :( [1+2, sin(0.5)] ))
 :($(Expr(:thunk, CodeInfo(
-    @ none within `top-level scope`
-1 ─ %1 = 1 + 2
-│   %2 = sin(0.5)
-│   %3 = Base.vect(%1, %2)
-└──      return %3
+1 ─ %1 = :+
+│   %2 =   dynamic (%1)(1, 2)
+│   %3 = sin
+│   %4 =   dynamic (%3)(0.5)
+│   %5 =   dynamic Base.vect(%2, %4)
+└──      return %5
 ))))
 ```
 
@@ -137,13 +142,13 @@ For more information see [`@code_lowered`](@ref), [`@code_typed`](@ref), [`@code
 ### Printing of debug information
 
 The aforementioned functions and macros take the keyword argument `debuginfo` that controls the level
-debug information printed.
+of debug information printed.
 
-```julia-repl
-julia> @code_typed debuginfo=:source +(1,1)
+```jldoctest; setup = :(using InteractiveUtils), filter = r"int.jl:\d+"
+julia> InteractiveUtils.@code_typed debuginfo=:source +(1,1)
 CodeInfo(
-    @ int.jl:53 within `+'
-1 ─ %1 = Base.add_int(x, y)::Int64
+    @ int.jl:87 within `+`
+1 ─ %1 = intrinsic Base.add_int(x, y)::Int64
 └──      return %1
 ) => Int64
 ```
diff --git a/doc/src/base/scopedvalues.md b/doc/src/base/scopedvalues.md
new file mode 100644
index 0000000000000..21d075daf9389
--- /dev/null
+++ b/doc/src/base/scopedvalues.md
@@ -0,0 +1,336 @@
+# [Scoped Values](@id scoped-values)
+
+Scoped values provide an implementation of dynamic scoping in Julia.
+
+!!! note "Lexical scoping vs dynamic scoping"
+    [Lexical scoping](@ref scope-of-variables) is the default behavior in Julia.
+    Under lexical scoping the scope of a variable is determined by the lexical
+    (textual) structure of a program.
+    Under dynamic scoping a variable is bound to the most recent assigned value
+    during the program's execution.
+
+The state of a scoped value is dependent on the execution path of the program.
+This means that for a scoped value you may observe multiple different values
+concurrently.
+
+!!! compat "Julia 1.11"
+    Scoped values were introduced in Julia 1.11. In Julia 1.8+ a compatible
+    implementation is available from the package ScopedValues.jl.
+
+In its simplest form you can create a [`ScopedValue`](@ref Base.ScopedValues.ScopedValue)
+with a default value and then use [`with`](@ref Base.ScopedValues.with) or
+[`@with`](@ref Base.ScopedValues.@with) to enter a new dynamic scope. The new scope will
+inherit all values from the parent scope (and recursively from all outer scopes) with the
+provided scoped value taking priority over previous definitions.
+
+Let's first look at an example of **lexical** scope. A `let` statement begins
+a new lexical scope within which the outer definition of `x` is shadowed by
+its inner definition.
+
+```jldoctest
+julia> x = 1
+1
+
+julia> let x = 5
+           @show x
+       end;
+x = 5
+
+julia> @show x;
+x = 1
+```
+
+In the following example, since Julia uses lexical scope, the variable `x` in the body
+of `f` refers to the `x` defined in the global scope, and entering a `let` scope does
+not change the value `f` observes.
+
+```jldoctest
+julia> x = 1
+1
+
+julia> f() = @show x
+f (generic function with 1 method)
+
+julia> let x = 5
+           f()
+       end;
+x = 1
+
+julia> f();
+x = 1
+```
+
+Now using a `ScopedValue` we can use **dynamic** scoping.
+
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> x = ScopedValue(1)
+ScopedValue{Int64}(1)
+
+julia> f() = @show x[]
+f (generic function with 1 method)
+
+julia> with(x=>5) do
+           f()
+       end;
+x[] = 5
+
+julia> f();
+x[] = 1
+```
+
+Note that the observed value of the `ScopedValue` is dependent on the execution
+path of the program.
+
+It often makes sense to use a `const` variable to point to a scoped value,
+and you can set the value of multiple `ScopedValue`s with one call to `with`.
+
+
+```julia
+using Base.ScopedValues
+
+f() = @show a[]
+g() = @show b[]
+
+const a = ScopedValue(1)
+const b = ScopedValue(2)
+
+f() # a[] = 1
+g() # b[] = 2
+
+# Enter a new dynamic scope and set value.
+with(a => 3) do
+    f() # a[] = 3
+    g() # b[] = 2
+    with(a => 4, b => 5) do
+        f() # a[] = 4
+        g() # b[] = 5
+    end
+    f() # a[] = 3
+    g() # b[] = 2
+end
+
+f() # a[] = 1
+g() # b[] = 2
+```
+
+`ScopedValues` provides a macro version of `with`. The expression `@with var=>val expr`
+evaluates `expr` in a new dynamic scope with `var` set to `val`. `@with var=>val expr`
+is equivalent to `with(var=>val) do expr end`. However, `with` requires a zero-argument
+closure or function, which results in an extra call-frame. As an example, consider the
+following function `f`:
+
+```julia
+using Base.ScopedValues
+const a = ScopedValue(1)
+f(x) = a[] + x
+```
+
+If you wish to run `f` in a dynamic scope with `a` set to `2`, then you can use `with`:
+
+```julia
+with(() -> f(10), a=>2)
+```
+
+However, this requires wrapping `f` in a zero-argument function. If you wish to avoid
+the extra call-frame, then you can use the `@with` macro:
+
+```julia
+@with a=>2 f(10)
+```
+
+!!! note
+    Dynamic scopes are inherited by [`Task`](@ref)s, at the moment of task creation. Dynamic scopes are **not** propagated through `Distributed.jl` operations.
+
+In the example below we open a new dynamic scope before launching a task.
+The parent task and the two child tasks observe independent values of the
+same scoped value at the same time.
+
+```julia
+using Base.ScopedValues
+import Base.Threads: @spawn
+
+const scoped_val = ScopedValue(1)
+@sync begin
+    with(scoped_val => 2)
+        @spawn @show scoped_val[] # 2
+    end
+    with(scoped_val => 3)
+        @spawn @show scoped_val[] # 3
+    end
+    @show scoped_val[] # 1
+end
+```
+
+Scoped values are constant throughout a scope, but you can store mutable
+state in a scoped value. Just keep in mind that the usual caveats
+for global variables apply in the context of concurrent programming.
+
+Care is also required when storing references to mutable state in scoped
+values. You might want to explicitly [unshare mutable state](@ref unshare_mutable_state)
+when entering a new dynamic scope.
+
+```julia
+using Base.ScopedValues
+import Base.Threads: @spawn
+
+const sval_dict = ScopedValue(Dict())
+
+# Example of using a mutable value wrongly
+@sync begin
+    # `Dict` is not thread-safe the usage below is invalid
+    @spawn (sval_dict[][:a] = 3)
+    @spawn (sval_dict[][:b] = 3)
+end
+
+@sync begin
+    # If we instead pass a unique dictionary to each
+    # task we can access the dictionaries race free.
+    with(sval_dict => Dict()) do
+        @spawn (sval_dict[][:a] = 3)
+    end
+    with(sval_dict => Dict()) do
+        @spawn (sval_dict[][:b] = 3)
+    end
+end
+```
+
+## Example
+
+In the example below we use a scoped value to implement a permission check in
+a web-application. After determining the permissions of the request,
+a new dynamic scope is entered and the scoped value `LEVEL` is set.
+Other parts of the application can query the scoped value and will receive
+the appropriate value. Other alternatives like task-local storage and global variables
+are not well suited for this kind of propagation; our only alternative would have
+been to thread a value through the entire call-chain.
+
+```julia
+using Base.ScopedValues
+
+const LEVEL = ScopedValue(:GUEST)
+
+function serve(request, response)
+    level = isAdmin(request) ? :ADMIN : :GUEST
+    with(LEVEL => level) do
+        Threads.@spawn handle(request, response)
+    end
+end
+
+function open(connection::Database)
+    level = LEVEL[]
+    if level !== :ADMIN
+        error("Access disallowed")
+    end
+    # ... open connection
+end
+
+function handle(request, response)
+    # ...
+    open(Database(#=...=#))
+    # ...
+end
+```
+
+## Idioms
+### [Unshare mutable state](@id unshare_mutable_state)
+
+```julia
+using Base.ScopedValues
+import Base.Threads: @spawn
+
+const sval_dict = ScopedValue(Dict())
+
+# If you want to add new values to the dict, instead of replacing
+# it, unshare the values explicitly. In this example we use `merge`
+# to unshare the state of the dictionary in parent scope.
+@sync begin
+    with(sval_dict => merge(sval_dict[], Dict(:a => 10))) do
+        @spawn @show sval_dict[][:a]
+    end
+    @spawn sval_dict[][:a] = 3 # Not a race since they are unshared.
+end
+```
+
+### Scoped values as globals
+
+In order to access the value of a scoped value, the scoped value itself has to
+be in (lexical) scope. This means most often you likely want to use scoped values
+as constant globals.
+
+```julia
+using Base.ScopedValues
+const sval = ScopedValue(1)
+```
+
+Indeed one can think of scoped values as hidden function arguments.
+
+This does not preclude their use as non-globals.
+
+```julia
+using Base.ScopedValues
+import Base.Threads: @spawn
+
+function main()
+    role = ScopedValue(:client)
+
+    function launch()
+        #...
+        role[]
+    end
+
+    @with role => :server @spawn launch()
+    launch()
+end
+```
+
+But it might have been simpler to just directly pass the function argument
+in these cases.
+
+### Very many ScopedValues
+
+If you find yourself creating many `ScopedValue`'s for one given module,
+it may be better to use a dedicated struct to hold them.
+
+```julia
+using Base.ScopedValues
+
+Base.@kwdef struct Configuration
+    color::Bool = false
+    verbose::Bool = false
+end
+
+const CONFIG = ScopedValue(Configuration(color=true))
+
+@with CONFIG => Configuration(color=CONFIG[].color, verbose=true) begin
+    @show CONFIG[].color # true
+    @show CONFIG[].verbose # true
+end
+```
+
+## API docs
+
+```@docs
+Base.ScopedValues.ScopedValue
+Base.ScopedValues.with
+Base.ScopedValues.@with
+Base.isassigned(::Base.ScopedValues.ScopedValue)
+Base.ScopedValues.get
+```
+
+## Implementation notes and performance
+
+`Scope`s use a persistent dictionary. Lookup and insertion is `O(log(32, n))`,
+upon dynamic scope entry a small amount of data is copied and the unchanged
+data is shared among other scopes.
+
+The `Scope` object itself is not user-facing and may be changed in a future
+version of Julia.
+
+## Design inspiration
+
+This design was heavily inspired by [JEPS-429](https://openjdk.org/jeps/429),
+which in turn was inspired by dynamically scoped free variables in many Lisp dialects. In particular Interlisp-D and its deep binding strategy.
+
+A prior design discussed was context variables ala [PEPS-567](https://peps.python.org/pep-0567/) and implemented in Julia as [ContextVariablesX.jl](https://github.com/tkf/ContextVariablesX.jl).
diff --git a/doc/src/base/sort.md b/doc/src/base/sort.md
index 455a95d39617c..cde0d571424a0 100644
--- a/doc/src/base/sort.md
+++ b/doc/src/base/sort.md
@@ -1,7 +1,7 @@
 # Sorting and Related Functions
 
-Julia has an extensive, flexible API for sorting and interacting with already-sorted arrays of
-values. By default, Julia picks reasonable algorithms and sorts in standard ascending order:
+Julia has an extensive, flexible API for sorting and interacting with already-sorted arrays
+of values. By default, Julia picks reasonable algorithms and sorts in ascending order:
 
 ```jldoctest
 julia> sort([2,3,1])
@@ -11,7 +11,7 @@ julia> sort([2,3,1])
  3
 ```
 
-You can easily sort in reverse order as well:
+You can sort in reverse order as well:
 
 ```jldoctest
 julia> sort([2,3,1], rev=true)
@@ -36,12 +36,12 @@ julia> a
  3
 ```
 
-Instead of directly sorting an array, you can compute a permutation of the array's indices that
-puts the array into sorted order:
+Instead of directly sorting an array, you can compute a permutation of the array's
+indices that puts the array into sorted order:
 
-```julia-repl
-julia> v = randn(5)
-5-element Array{Float64,1}:
+```jldoctest sort_example
+julia> v = [0.297288, 0.382396, -0.597634, -0.0104452, -0.839027]
+5-element Vector{Float64}:
   0.297288
   0.382396
  -0.597634
@@ -49,7 +49,7 @@ julia> v = randn(5)
  -0.839027
 
 julia> p = sortperm(v)
-5-element Array{Int64,1}:
+5-element Vector{Int64}:
  5
  3
  4
@@ -57,7 +57,7 @@ julia> p = sortperm(v)
  2
 
 julia> v[p]
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.839027
  -0.597634
  -0.0104452
@@ -65,11 +65,11 @@ julia> v[p]
   0.382396
 ```
 
-Arrays can easily be sorted according to an arbitrary transformation of their values:
+Arrays can be sorted according to an arbitrary transformation of their values:
 
-```julia-repl
+```jldoctest sort_example
 julia> sort(v, by=abs)
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.0104452
   0.297288
   0.382396
@@ -79,9 +79,9 @@ julia> sort(v, by=abs)
 
 Or in reverse order by a transformation:
 
-```julia-repl
+```jldoctest sort_example
 julia> sort(v, by=abs, rev=true)
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.839027
  -0.597634
   0.382396
@@ -91,9 +91,9 @@ julia> sort(v, by=abs, rev=true)
 
 If needed, the sorting algorithm can be chosen:
 
-```julia-repl
+```jldoctest sort_example
 julia> sort(v, alg=InsertionSort)
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.839027
  -0.597634
  -0.0104452
@@ -101,9 +101,12 @@ julia> sort(v, alg=InsertionSort)
   0.382396
 ```
 
-All the sorting and order related functions rely on a "less than" relation defining a total order
-on the values to be manipulated. The `isless` function is invoked by default, but the relation
-can be specified via the `lt` keyword.
+All the sorting and order related functions rely on a "less than" relation defining a
+[strict weak order](https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings)
+on the values to be manipulated. The `isless` function is invoked by default, but the
+relation can be specified via the `lt` keyword, a function that takes two array elements
+and returns `true` if and only if the first argument is "less than" the second. See
+[`sort!`](@ref) and [Alternate Orderings](@ref) for more information.
 
 ## Sorting Functions
 
@@ -165,22 +168,17 @@ Base.Sort.defalg(::AbstractArray{<:Union{SmallInlineStrings, Missing}}) = Inline
 
 ## Alternate Orderings
 
-By default, `sort` and related functions use [`isless`](@ref) to compare two
-elements in order to determine which should come first. The
-[`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining
-alternate orderings on the same set of elements: when calling a sorting function like
-`sort`, an instance of `Ordering` can be provided with the keyword argument `order`.
-
-Instances of `Ordering` define a [total order](https://en.wikipedia.org/wiki/Total_order)
-on a set of elements, so that for any elements `a`, `b`, `c` the following hold:
-
-* Exactly one of the following is true: `a` is less than `b`, `b` is less than
-  `a`, or `a` and `b` are equal (according to [`isequal`](@ref)).
-* The relation is transitive - if `a` is less than `b` and `b` is less than `c`
-  then `a` is less than `c`.
-
-The [`Base.Order.lt`](@ref) function works as a generalization of `isless` to
-test whether `a` is less than `b` according to a given order.
+By default, `sort`, `searchsorted`, and related functions use [`isless`](@ref) to compare
+two elements in order to determine which should come first. The
+[`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining alternate
+orderings on the same set of elements: when calling a sorting function like
+`sort!`, an instance of `Ordering` can be provided with the keyword argument `order`.
+
+Instances of `Ordering` define an order through the [`Base.Order.lt`](@ref)
+function, which works as a generalization of `isless`.
+This function's behavior on custom `Ordering`s must satisfy all the conditions of a
+[strict weak order](https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings).
+See [`sort!`](@ref) for details and examples of valid and invalid `lt` functions.
 
 ```@docs
 Base.Order.Ordering
diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
index 226e8aa92a989..3b415202559f4 100644
--- a/doc/src/base/strings.md
+++ b/doc/src/base/strings.md
@@ -29,11 +29,15 @@ Base.SubstitutionString
 Base.@s_str
 Base.@raw_str
 Base.@b_str
+Base.takestring!
 Base.Docs.@html_str
 Base.Docs.@text_str
 Base.isvalid(::Any)
 Base.isvalid(::Any, ::Any)
 Base.isvalid(::AbstractString, ::Integer)
+Base.ismalformed
+Base.isoverlong
+Base.show_invalid
 Base.match
 Base.eachmatch
 Base.RegexMatch
@@ -43,6 +47,9 @@ Base.:(==)(::AbstractString, ::AbstractString)
 Base.cmp(::AbstractString, ::AbstractString)
 Base.lpad
 Base.rpad
+Base.ltruncate
+Base.rtruncate
+Base.ctruncate
 Base.findfirst(::AbstractString, ::AbstractString)
 Base.findnext(::AbstractString, ::AbstractString, ::Integer)
 Base.findnext(::AbstractChar, ::AbstractString, ::Integer)
@@ -53,6 +60,7 @@ Base.occursin
 Base.reverse(::Union{String,SubString{String}})
 Base.replace(::IO, s::AbstractString, ::Pair...)
 Base.eachsplit
+Base.eachrsplit
 Base.split
 Base.rsplit
 Base.strip
@@ -74,8 +82,8 @@ Base.chopprefix
 Base.chopsuffix
 Base.chomp
 Base.thisind
-Base.nextind
-Base.prevind
+Base.nextind(::AbstractString, ::Integer, ::Integer)
+Base.prevind(::AbstractString, ::Integer, ::Integer)
 Base.textwidth
 Base.isascii
 Base.iscntrl
@@ -89,5 +97,20 @@ Base.isspace
 Base.isuppercase
 Base.isxdigit
 Base.escape_string
+Base.escape_raw_string
 Base.unescape_string
 ```
+
+## `AnnotatedString`s
+
+!!! note
+    The API for AnnotatedStrings is considered experimental and is subject to change between
+    Julia versions.
+
+```@docs
+Base.AnnotatedString
+Base.AnnotatedChar
+Base.annotatedstring
+Base.annotations
+Base.annotate!
+```
diff --git a/doc/src/devdocs/EscapeAnalysis.md b/doc/src/devdocs/EscapeAnalysis.md
index 983a6782ccc79..9c568541ab7c0 100644
--- a/doc/src/devdocs/EscapeAnalysis.md
+++ b/doc/src/devdocs/EscapeAnalysis.md
@@ -1,6 +1,6 @@
 # `EscapeAnalysis`
 
-`Core.Compiler.EscapeAnalysis` is a compiler utility module that aims to analyze
+`Compiler.EscapeAnalysis` is a compiler utility module that aims to analyze
 escape information of [Julia's SSA-form IR](@ref Julia-SSA-form-IR) a.k.a. `IRCode`.
 
 This escape analysis aims to:
@@ -18,9 +18,14 @@ This escape analysis aims to:
 ## Try it out!
 
 You can give a try to the escape analysis by loading the `EAUtils.jl` utility script that
-define the convenience entries `code_escapes` and `@code_escapes` for testing and debugging purposes:
+defines the convenience entries `code_escapes` and `@code_escapes` for testing and debugging purposes:
 ```@repl EAUtils
-include(normpath(Sys.BINDIR, "..", "share", "julia", "test", "compiler", "EscapeAnalysis", "EAUtils.jl")); using .EAUtils
+# InteractiveUtils.@activate Compiler # to use the stdlib version of the Compiler
+
+let JULIA_DIR = normpath(Sys.BINDIR, Base.DATAROOTDIR, "julia")
+    include(normpath(JULIA_DIR, "Compiler", "test", "EAUtils.jl"))
+    using .EAUtils
+end
 
 mutable struct SafeRef{T}
     x::T
@@ -30,29 +35,27 @@ Base.setindex!(x::SafeRef, v) = x.x = v;
 Base.isassigned(x::SafeRef) = true;
 get′(x) = isassigned(x) ? x[] : throw(x);
 
-result = code_escapes((String,String,String,String)) do s1, s2, s3, s4
-    r1 = Ref(s1)
+result = code_escapes((Base.RefValue{String},String,String,)) do r1, s2, s3
     r2 = Ref(s2)
     r3 = SafeRef(s3)
     try
         s1 = get′(r1)
         ret = sizeof(s1)
     catch err
-        global GV = err # will definitely escape `r1`
+        global GV = err # `r1` may escape
     end
-    s2 = get′(r2)       # still `r2` doesn't escape fully
-    s3 = get′(r3)       # still `r3` doesn't escape fully
-    s4 = sizeof(s4)     # the argument `s4` doesn't escape here
+    s2 = get′(r2)       # `r2` doesn't escape
+    s3 = get′(r3)       # `r3` doesn't escape
     return s2, s3, s4
 end
 ```
 
-The symbols in the side of each call argument and SSA statements represents the following meaning:
+The symbols on the side of each call argument and SSA statements represent the following meaning:
 - `◌` (plain): this value is not analyzed because escape information of it won't be used anyway (when the object is `isbitstype` for example)
 - `✓` (green or cyan): this value never escapes (`has_no_escape(result.state[x])` holds), colored blue if it has arg escape also (`has_arg_escape(result.state[x])` holds)
 - `↑` (blue or yellow): this value can escape to the caller via return (`has_return_escape(result.state[x])` holds), colored yellow if it has unhandled thrown escape also (`has_thrown_escape(result.state[x])` holds)
 - `X` (red): this value can escape to somewhere the escape analysis can't reason about like escapes to a global memory (`has_all_escape(result.state[x])` holds)
-- `*` (bold): this value's escape state is between the `ReturnEscape` and `AllEscape` in the partial order of [`EscapeInfo`](@ref Core.Compiler.EscapeAnalysis.EscapeInfo), colored yellow if it has unhandled thrown escape also (`has_thrown_escape(result.state[x])` holds)
+- `*` (bold): this value's escape state is between the `ReturnEscape` and `AllEscape` in the partial order of [`EscapeInfo`](@ref Base.Compiler.EscapeAnalysis.EscapeInfo), colored yellow if it has unhandled thrown escape also (`has_thrown_escape(result.state[x])` holds)
 - `′`: this value has additional object field / array element information in its `AliasInfo` property
 
 Escape information of each call argument and SSA value can be inspected programmatically as like:
@@ -67,7 +70,7 @@ result.state[Core.SSAValue(3)] # get EscapeInfo of `r3`
 ### Lattice Design
 
 `EscapeAnalysis` is implemented as a [data-flow analysis](https://en.wikipedia.org/wiki/Data-flow_analysis)
-that works on a lattice of [`x::EscapeInfo`](@ref Core.Compiler.EscapeAnalysis.EscapeInfo),
+that works on a lattice of [`x::EscapeInfo`](@ref Base.Compiler.EscapeAnalysis.EscapeInfo),
 which is composed of the following properties:
 - `x.Analyzed::Bool`: not formally part of the lattice, only indicates `x` has not been analyzed or not
 - `x.ReturnEscape::BitSet`: records SSA statements where `x` can escape to the caller via return
@@ -98,10 +101,10 @@ One distinctive design of this escape analysis is that it is fully _backward_,
 i.e. escape information flows _from usages to definitions_.
 For example, in the code snippet below, EA first analyzes the statement `return %1` and
 imposes `ReturnEscape` on `%1` (corresponding to `obj`), and then it analyzes
-`%1 = %new(Base.RefValue{String, _2}))` and propagates the `ReturnEscape` imposed on `%1`
-to the call argument `_2` (corresponding to `s`):
+`%1 = %new(Base.RefValue{Base.RefValue{String}, _2}))` and propagates the `ReturnEscape`
+imposed on `%1` to the call argument `_2` (corresponding to `s`):
 ```@repl EAUtils
-code_escapes((String,)) do s
+code_escapes((Base.RefValue{String},)) do s
     obj = Ref(s)
     return obj
 end
@@ -113,7 +116,7 @@ As a result this scheme enables a simple implementation of escape analysis,
 e.g. `PhiNode` for example can be handled simply by propagating escape information
 imposed on a `PhiNode` to its predecessor values:
 ```@repl EAUtils
-code_escapes((Bool, String, String)) do cnd, s, t
+code_escapes((Bool, Base.RefValue{String}, Base.RefValue{String})) do cnd, s, t
     if cnd
         obj = Ref(s)
     else
@@ -358,14 +361,10 @@ non-inlined callees that has been derived by previous `IPO EA`.
 More interestingly, it is also valid to use `IPO EA` escape information for type inference,
 e.g., inference accuracy can be improved by forming `Const`/`PartialStruct`/`MustAlias` of mutable object.
 
-Since the computational cost of `analyze_escapes` is not that cheap,
-both `IPO EA` and `Local EA` are better to run only when there is any profitability.
-Currently `EscapeAnalysis` provides the `is_ipo_profitable` heuristic to check a profitability of `IPO EA`.
 ```@docs
-Core.Compiler.EscapeAnalysis.analyze_escapes
-Core.Compiler.EscapeAnalysis.EscapeState
-Core.Compiler.EscapeAnalysis.EscapeInfo
-Core.Compiler.EscapeAnalysis.is_ipo_profitable
+Base.Compiler.EscapeAnalysis.analyze_escapes
+Base.Compiler.EscapeAnalysis.EscapeState
+Base.Compiler.EscapeAnalysis.EscapeInfo
 ```
 
 --------------------------------------------------------------------------------------------
diff --git a/doc/src/devdocs/aot.md b/doc/src/devdocs/aot.md
index 33a78c6a2ecae..cdaf1880ab927 100644
--- a/doc/src/devdocs/aot.md
+++ b/doc/src/devdocs/aot.md
@@ -23,7 +23,7 @@ Firstly, the methods that need to be compiled to native code must be identified.
 
 Once the methods to be compiled have been identified, they are passed to the `jl_create_system_image` function. This function sets up a number of data structures that will be used when serializing native code to a file, and then calls `jl_create_native` with the array of methods. `jl_create_native` runs codegen on the methods produces one or more LLVM modules. `jl_create_system_image` then records some useful information about what codegen produced from the module(s).
 
-The module(s) are then passed to `jl_dump_native`, along with the information recorded by `jl_create_system_image`. `jl_dump_native` contains the code necessary to serialize the module(s) to bitcode, object, or assembly files depending on the command-line options passed to Julia. The serialized code and information is then written to a file as an archive.
+The module(s) are then passed to `jl_dump_native`, along with the information recorded by `jl_create_system_image`. `jl_dump_native` contains the code necessary to serialize the module(s) to bitcode, object, or assembly files depending on the command-line options passed to Julia. The serialized code and information are then written to a file as an archive.
 
 The final step is to run a system linker on the object files in the archive produced by `jl_dump_native`. Once this step is complete, a shared library containing the compiled code is produced.
 
@@ -49,9 +49,9 @@ Julia has a command-line flag to record all of the methods that are compiled by
 
 `jl_dump_native` is responsible for serializing the LLVM module containing the native code to a file. In addition to the module, the system image data produced by `jl_create_system_image` is compiled as a global variable. The output of this method is bitcode, object, and/or assembly archives containing the code and system image data.
 
-`jl_dump_native` is typically one of the larger time sinks when emitting native code, with much of the time spent in optimizing LLVM IR and emitting machine code. Therefore, this function is capable of multithreading the optimization and machine code emission steps. This multithreading is parameterized on the size of the module, but can be explicitly overridden by setting the `JULIA_IMAGE_THREADS` environment variable. The default maximum number of threads is half the number of available threads, but setting it to be lower can reduce peak memory usage during compilation.
+`jl_dump_native` is typically one of the larger time sinks when emitting native code, with much of the time spent in optimizing LLVM IR and emitting machine code. Therefore, this function is capable of multithreading the optimization and machine code emission steps. This multithreading is parameterized on the size of the module, but can be explicitly overridden by setting the [`JULIA_IMAGE_THREADS`](@ref JULIA_IMAGE_THREADS) environment variable. The default maximum number of threads is half the number of available threads, but setting it to be lower can reduce peak memory usage during compilation.
 
-`jl_dump_native` can also produce native code optimized for multiple architectures, when integrated with the Julia loader. This is triggered by setting the `JULIA_CPU_TARGET` environment variable and mediated by the multiversioning pass in the optimization pipeline. To make this work with multithreading, an annotation step is added before the module is split into submodules that are emitted on their own threads, and this annotation step uses information available throughout the entire module to decide what functions are cloned for different architectures. Once the annotation has happened, individual threads can emit code for different architectures in parallel, knowing that a different submodule is guaranteed to produce the necessary functions that will be called by a cloned function.
+`jl_dump_native` can also produce native code optimized for multiple architectures, when integrated with the Julia loader. This is triggered by setting the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable and mediated by the multiversioning pass in the optimization pipeline. To make this work with multithreading, an annotation step is added before the module is split into submodules that are emitted on their own threads, and this annotation step uses information available throughout the entire module to decide what functions are cloned for different architectures. Once the annotation has happened, individual threads can emit code for different architectures in parallel, knowing that a different submodule is guaranteed to produce the necessary functions that will be called by a cloned function.
 
 Some other metadata about how the module was serialized is also stored in the archive, such as the number of threads used to serialize the module and the number of functions that were compiled.
 
diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md
index 1a11a5918d091..52f9b98849c77 100644
--- a/doc/src/devdocs/ast.md
+++ b/doc/src/devdocs/ast.md
@@ -119,10 +119,18 @@ parses as `(macrocall (|.| Core '@doc) (line) "some docs" (= (call f x) (block x
 | `import Base: x`    | `(import (: (. Base) (. x)))`                |
 | `import Base: x, y` | `(import (: (. Base) (. x) (. y)))`          |
 | `export a, b`       | `(export a b)`                               |
+| `public a, b`       | `(public a b)`                               |
 
 `using` has the same representation as `import`, but with expression head `:using`
 instead of `:import`.
 
+To programmatically create a `public` statement, you can use `Expr(:public, :a, :b)` or,
+closer to regular code, `Meta.parse("public a, b")`. This approach is necessary due to
+[current limitations on `public`](@ref Export-lists). The `public` keyword is only
+recognized at the syntactic top level within a file (`parse_stmts`) or module. This
+restriction was implemented to prevent breaking existing code that used `public` as an
+identifier when it was introduced in Julia 1.11.
+
 ### Numbers
 
 Julia supports more number types than many scheme implementations, so not all numbers are represented
@@ -155,7 +163,7 @@ parses as:
 ```
 (if a (block (line 2) b)
     (elseif (block (line 3) c) (block (line 4) d)
-            (block (line 6 e))))
+            (block (line 6) e)))
 ```
 
 A `while` loop parses as `(while condition body)`.
@@ -228,9 +236,9 @@ These expressions are represented as `LineNumberNode`s in Julia.
 ### Macros
 
 Macro hygiene is represented through the expression head pair `escape` and `hygienic-scope`.
-The result of a macro expansion is automatically wrapped in `(hygienic-scope block module)`,
+The result of a macro expansion is automatically wrapped in `(hygienic-scope block module [lno])`,
 to represent the result of the new scope. The user can insert `(escape block)` inside
-to interpolate code from the caller.
+to interpolate code from the caller. The lno is the `__source__` argument of the macro, if included.
 
 
 ## Lowered form
@@ -254,11 +262,6 @@ types exist in lowered form:
     Identifies arguments and local variables by consecutive numbering. It has an
     integer-valued `id` field giving the slot index.
     The types of these slots can be found in the `slottypes` field of their `CodeInfo` object.
-    When a slot has different types at different uses and thus requires per-use type annotations,
-    they are converted to temporary `Core.Compiler.TypedSlot` object. This object has an
-    additional `typ` field as well as the `id` field. Note that `Core.Compiler.TypedSlot`
-    only appears in an unoptimized lowered form that is scheduled for optimization,
-    and it never appears elsewhere.
 
   * `Argument`
 
@@ -421,7 +424,7 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
   * `new`
 
     Allocates a new struct-like object. First argument is the type. The [`new`](@ref) pseudo-function is lowered
-    to this, and the type is always inserted by the compiler.  This is very much an internal-only
+    to this, and the type is always inserted by the compiler. This is very much an internal-only
     feature, and does no checking. Evaluating arbitrary `new` expressions can easily segfault.
 
   * `splatnew`
@@ -436,12 +439,12 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
   * `the_exception`
 
-    Yields the caught exception inside a `catch` block, as returned by `jl_current_exception()`.
+    Yields the caught exception inside a `catch` block, as returned by `jl_current_exception(ct)`.
 
   * `enter`
 
     Enters an exception handler (`setjmp`). `args[1]` is the label of the catch block to jump to on
-    error.  Yields a token which is consumed by `pop_exception`.
+    error. Yields a token which is consumed by `pop_exception`.
 
   * `leave`
 
@@ -503,9 +506,9 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
         The number of required arguments for a varargs function definition.
 
-      * `args[5]::QuoteNode{Symbol}` : calling convention
+      * `args[5]::QuoteNode{<:Union{Symbol,Tuple{Symbol,UInt16}, Tuple{Symbol,UInt16,Bool}}`: calling convention
 
-        The calling convention for the call.
+        The calling convention for the call, optionally with effects, and `gc_safe` (safe to execute concurrently to GC.).
 
       * `args[6:5+length(args[3])]` : arguments
 
@@ -524,18 +527,22 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
         The function signature of the opaque closure. Opaque closures don't participate in dispatch, but the input types can be restricted.
 
-      * `args[2]` : isva
-
-        Indicates whether the closure accepts varargs.
-
-      * `args[3]` : lb
+      * `args[2]` : lb
 
         Lower bound on the output type. (Defaults to `Union{}`)
 
-      * `args[4]` : ub
+      * `args[3]` : ub
 
         Upper bound on the output type. (Defaults to `Any`)
 
+      * `args[4]` : constprop
+
+        Indicates whether the opaque closure's identity may be used for constant
+        propagation. The `@opaque` macro enables this by default, but this will
+        cause additional inference which may be undesirable and prevents the
+        code from running during precompile.
+        If `args[4]` is a method, the argument is considered skipped.
+
       * `args[5]` : method
 
         The actual method as an `opaque_closure_method` expression.
@@ -606,15 +613,9 @@ for important details on how to modify these fields safely.
 
   * `sparam_vals`
 
-    The values of the static parameters in `specTypes` indexed by `def.sparam_syms`. For the
-    `MethodInstance` at `Method.unspecialized`, this is the empty `SimpleVector`. But for a
-    runtime `MethodInstance` from the `MethodTable` cache, this will always be defined and
-    indexable.
-
-  * `uninferred`
-
-    The uncompressed source code for a toplevel thunk. Additionally, for a generated function,
-    this is one of many places that the source code might be found.
+    The values of the static parameters in `specTypes`.
+    For the `MethodInstance` at `Method.unspecialized`, this is the empty `SimpleVector`.
+    But for a runtime `MethodInstance` from the `MethodTable` cache, this will always be defined and indexable.
 
   * `backedges`
 
@@ -633,6 +634,10 @@ for important details on how to modify these fields safely.
 
     The `MethodInstance` that this cache entry is derived from.
 
+  * `owner`
+
+    A token that represents the owner of this `CodeInstance`. Will use `jl_egal` to match.
+
 
   * `rettype`/`rettype_const`
 
@@ -653,10 +658,10 @@ for important details on how to modify these fields safely.
     The ABI to use when calling `fptr`. Some significant ones include:
 
       * 0 - Not compiled yet
-      * 1 - `JL_CALLABLE` `jl_value_t *(*)(jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
+      * 1 - `JL_CALLABLE` `jl_value_t *(*)(jl_value_t *f, jl_value_t *args[nargs], uint32_t nargs)`
       * 2 - Constant (value stored in `rettype_const`)
-      * 3 - With Static-parameters forwarded `jl_value_t *(*)(jl_svec_t *sparams, jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
-      * 4 - Run in interpreter `jl_value_t *(*)(jl_method_instance_t *meth, jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
+      * 3 - With Static-parameters forwarded `jl_value_t *(*)(jl_svec_t *sparams, jl_value_t *f, jl_value_t *args[nargs], uint32_t nargs)`
+      * 4 - Run in interpreter `jl_value_t *(*)(jl_method_instance_t *meth, jl_value_t *f, jl_value_t *args[nargs], uint32_t nargs)`
 
   * `min_world` / `max_world`
 
@@ -664,10 +669,30 @@ for important details on how to modify these fields safely.
     If max_world is the special token value `-1`, the value is not yet known.
     It may continue to be used until we encounter a backedge that requires us to reconsider.
 
+  * Timing fields
+
+    - `time_infer_total`: Total cost of computing `inferred` originally as wall-time from start to finish.
+
+    - `time_infer_cache_saved`: The cost saved from `time_infer_total` by having caching.
+      Adding this to `time_infer_total` should give a stable estimate for comparing the cost
+      of two implementations or one implementation over time. This is generally an
+      over-estimate of the time to infer something, since the cache is frequently effective
+      at handling repeated work.
+
+    - `time_infer_self`: Self cost of julia inference for `inferred` (a portion of
+      `time_infer_total`). This is simply the incremental cost of compiling this one method,
+      if given a fully populated cache of all call targets, even including constant
+      inference results and LimitedAccuracy results, which generally are not in a cache.
+
+    - `time_compile`: Self cost of llvm JIT compilation (e.g. of computing `invoke` from
+      `inferred`). A total cost estimate can be computed by walking all of the `edges`
+      contents and summing those, while accounting for cycles and duplicates. (This field
+      currently does not include any measured AOT compile times.)
+
 
 ### CodeInfo
 
-A (usually temporary) container for holding lowered source code.
+A (usually temporary) container for holding lowered (and possibly inferred) source code.
 
   * `code`
 
@@ -695,39 +720,21 @@ A (usually temporary) container for holding lowered source code.
 
   * `ssaflags`
 
-    Statement-level flags for each expression in the function. Many of these are reserved, but not yet implemented:
+    Statement-level 32 bits flags for each expression in the function.
+    See the definition of `jl_code_info_t` in julia.h for more details.
 
-    * 0x01 << 0 = statement is marked as `@inbounds`
-    * 0x01 << 1 = statement is marked as `@inline`
-    * 0x01 << 2 = statement is marked as `@noinline`
-    * 0x01 << 3 = statement is within a block that leads to `throw` call
-    * 0x01 << 4 = statement may be removed if its result is unused, in particular it is thus be both pure and effect free
-    * 0x01 << 5-6 = <unused>
-    * 0x01 << 7 = <reserved> has out-of-band info
+These are only populated after inference (or by generated functions in some cases):
 
-  * `linetable`
-
-    An array of source location objects
-
-  * `codelocs`
-
-    An array of integer indices into the `linetable`, giving the location associated
-    with each statement.
+  * `debuginfo`
 
-Optional Fields:
-
-  * `slottypes`
-
-    An array of types for the slots.
+    An object to retrieve source information for each statements, see
+    [How to interpret line numbers in a `CodeInfo` object](@ref).
 
   * `rettype`
 
-    The inferred return type of the lowered form (IR). Default value is `Any`.
-
-  * `method_for_inference_limit_heuristics`
-
-    The `method_for_inference_heuristics` will expand the given method's generator if
-    necessary during inference.
+    The inferred return type of the lowered form (IR). Default value is `Any`. This is
+    mostly present for convenience, as (due to the way OpaqueClosures work) it is not
+    necessarily the rettype used by codegen.
 
   * `parent`
 
@@ -741,16 +748,19 @@ Optional Fields:
 
     The range of world ages for which this code was valid at the time when it had been inferred.
 
+Optional Fields:
 
-Boolean properties:
+  * `slottypes`
 
-  * `inferred`
+    An array of types for the slots.
 
-    Whether this has been produced by type inference.
+  * `method_for_inference_limit_heuristics`
+
+    The `method_for_inference_heuristics` will expand the given method's generator if
+    necessary during inference.
 
-  * `inlineable`
 
-    Whether this should be eligible for inlining.
+Boolean properties:
 
   * `propagate_inbounds`
 
@@ -760,7 +770,7 @@ Boolean properties:
 
 `UInt8` settings:
 
-  * `constprop`
+  * `constprop`, `inlineable`
 
     * 0 = use heuristic
     * 1 = aggressive
@@ -776,3 +786,79 @@ Boolean properties:
     * 0x01 << 4 = the syntactic control flow within this method is guaranteed to terminate (`:terminates_locally`)
 
     See the documentation of `Base.@assume_effects` for more details.
+
+
+#### How to interpret line numbers in a `CodeInfo` object
+
+There are 2 common forms for this data: one used internally that compresses the data somewhat and one used in the compiler.
+They contain the same basic info, but the compiler version is all mutable while the version used internally is not.
+
+Many consumers may be able to call `Base.IRShow.buildLineInfoNode`,
+`Base.IRShow.append_scopes!`, or `Stacktraces.lookup(::InterpreterIP)` to avoid needing to
+(re-)implement these details specifically.
+
+The definitions of each of these are:
+
+```julia
+struct Core.DebugInfo
+    @noinline
+    def::Union{Method,MethodInstance,Symbol}
+    linetable::Union{Nothing,DebugInfo}
+    edges::SimpleVector{DebugInfo}
+    codelocs::String # compressed data
+end
+mutable struct Core.Compiler.DebugInfoStream
+    def::Union{Method,MethodInstance,Symbol}
+    linetable::Union{Nothing,DebugInfo}
+    edges::Vector{DebugInfo}
+    firstline::Int32 # the starting line for this block (specified by an index of 0)
+    codelocs::Vector{Int32} # for each statement:
+        # index into linetable (if defined), else a line number (in the file represented by def)
+        # then index into edges
+        # then index into edges[linetable]
+end
+```
+
+
+  * `def` : where this `DebugInfo` was defined (the `Method`, `MethodInstance`, or `Symbol` of file scope, for example)
+
+  * `linetable`
+
+    Another `DebugInfo` that this was derived from, which contains the actual line numbers,
+    such that this DebugInfo contains only the indexes into it. This avoids making copies,
+    as well as makes it possible to track how each individual statement transformed from
+    source to optimized, not just the separate line numbers. If `def` is not a Symbol, then
+    that object replaces the current function object for the metadata on what function is
+    conceptually being executed (e.g. think Cassette transforms here). The `codelocs` values
+    described below also are interpreted as an index into the `codelocs` in this object,
+    instead of being a line number itself.
+
+  * `edges` : Vector of the unique DebugInfo for every function inlined into this (which
+    recursively have the edges for everything inlined into them).
+
+  * `firstline` (when uncompressed to DebugInfoStream)
+
+    The line number associated with the `begin` statement (or other keyword such as
+    `function` or `quote`) that delineates where this code definition "starts".
+
+  * `codelocs` (when uncompressed to `DebugInfoStream`)
+
+    A vector of indices, with 3 values for each statement in the IR plus one for the
+    starting point of the block, that describe the stacktrace from that point:
+     1. the integer index into the `linetable.codelocs` field, giving the
+        original location associated with each statement (including its syntactic edges),
+        or zero indicating no change to the line number from the previously
+        executed statement (which is not necessarily syntactic or lexical prior),
+        or the line number itself if the `linetable` field is `nothing`.
+     2. the integer index into `edges`, giving the `DebugInfo` inlined there,
+        or zero if there are no edges.
+     3. (if entry 2 is non-zero) the integer index into `edges[].codelocs`,
+        to interpret recursively for each function in the inlining stack,
+        or zero indicating to use `edges[].firstline` as the line number.
+
+    Special codes include:
+     - `(zero, zero, *) `: no change to the line number or edges from the previous statement
+       (you may choose to interpret this either syntactically or lexically). The inlining
+       depth also might have changed, though most callers should ignore that.
+     - `(zero, non-zero, *)` : no line number, just edges (usually because of
+       macro-expansion into top-level code).
diff --git a/doc/src/devdocs/backtraces.md b/doc/src/devdocs/backtraces.md
index 4ed3ea47efbb5..7ecfa20f89780 100644
--- a/doc/src/devdocs/backtraces.md
+++ b/doc/src/devdocs/backtraces.md
@@ -1,12 +1,12 @@
 # Reporting and analyzing crashes (segfaults)
 
-So you managed to break Julia.  Congratulations!  Collected here are some general procedures you
-can undergo for common symptoms encountered when something goes awry.  Including the information
+So you managed to break Julia. Congratulations!  Collected here are some general procedures you
+can undergo for common symptoms encountered when something goes awry. Including the information
 from these debugging steps can greatly help the maintainers when tracking down a segfault or trying
 to figure out why your script is running slower than expected.
 
 If you've been directed to this page, find the symptom that best matches what you're experiencing
-and follow the instructions to generate the debugging information requested.  Table of symptoms:
+and follow the instructions to generate the debugging information requested. Table of symptoms:
 
   * [Segfaults during bootstrap (`sysimg.jl`)](@ref)
   * [Segfaults when running a script](@ref)
@@ -26,10 +26,10 @@ versioninfo()
 ## Segfaults during bootstrap (`sysimg.jl`)
 
 Segfaults toward the end of the `make` process of building Julia are a common symptom of something
-going wrong while Julia is preparsing the corpus of code in the `base/` folder.  Many factors
+going wrong while Julia is preparsing the corpus of code in the `base/` folder. Many factors
 can contribute toward this process dying unexpectedly, however it is as often as not due to an
 error in the C-code portion of Julia, and as such must typically be debugged with a debug build
-inside of `gdb`.  Explicitly:
+inside of `gdb`. Explicitly:
 
 Create a debug build of Julia:
 
@@ -40,7 +40,7 @@ $ make debug
 
 Note that this process will likely fail with the same error as a normal `make` incantation, however
 this will create a debug executable that will offer `gdb` the debugging symbols needed to get
-accurate backtraces.  Next, manually run the bootstrap process inside of `gdb`:
+accurate backtraces. Next, manually run the bootstrap process inside of `gdb`:
 
 ```
 $ cd base/
@@ -48,14 +48,14 @@ $ gdb -x ../contrib/debug_bootstrap.gdb
 ```
 
 This will start `gdb`, attempt to run the bootstrap process using the debug build of Julia, and
-print out a backtrace if (when) it segfaults.  You may need to hit `<enter>` a few times to get
-the full backtrace.  Create a [gist](https://gist.github.com) with the backtrace, the [version info](@ref dev-version-info),
+print out a backtrace if (when) it segfaults. You may need to hit `<enter>` a few times to get
+the full backtrace. Create a [gist](https://gist.github.com) with the backtrace, the [version info](@ref dev-version-info),
 and any other pertinent information you can think of and open a new [issue](https://github.com/JuliaLang/julia/issues?q=is%3Aopen)
 on Github with a link to the gist.
 
 ## Segfaults when running a script
 
-The procedure is very similar to [Segfaults during bootstrap (`sysimg.jl`)](@ref).  Create a debug
+The procedure is very similar to [Segfaults during bootstrap (`sysimg.jl`)](@ref). Create a debug
 build of Julia, and run your script inside of a debugged Julia process:
 
 ```
@@ -64,7 +64,7 @@ $ make debug
 $ gdb --args usr/bin/julia-debug <path_to_your_script>
 ```
 
-Note that `gdb` will sit there, waiting for instructions.  Type `r` to run the process, and `bt`
+Note that `gdb` will sit there, waiting for instructions. Type `r` to run the process, and `bt`
 to generate a backtrace once it segfaults:
 
 ```
@@ -128,4 +128,4 @@ Note that this is only works on Linux. The blog post on [Time Travelling Bug Rep
 A few terms have been used as shorthand in this guide:
 
   * `<julia_root>` refers to the root directory of the Julia source tree; e.g. it should contain folders
-    such as `base`, `deps`, `src`, `test`, etc.....
+    such as `base`, `deps`, `src`, `test`, etc.
diff --git a/doc/src/devdocs/boundscheck.md b/doc/src/devdocs/boundscheck.md
index 7acd32f04dc75..fa2cda2698bfe 100644
--- a/doc/src/devdocs/boundscheck.md
+++ b/doc/src/devdocs/boundscheck.md
@@ -105,7 +105,7 @@ checkbounds_indices(Bool, (IA1, IA...), (I1, I...)) = checkindex(Bool, IA1, I1)
                                                       checkbounds_indices(Bool, IA, I)
 ```
 
-so `checkindex` checks a single dimension.  All of these functions, including the unexported
+so `checkindex` checks a single dimension. All of these functions, including the unexported
 `checkbounds_indices` have docstrings accessible with `?` .
 
 If you have to customize bounds checking for a specific array type, you should specialize `checkbounds(Bool, A, I...)`.
@@ -113,10 +113,10 @@ However, in most cases you should be able to rely on `checkbounds_indices` as lo
 useful `axes` for your array type.
 
 If you have novel index types, first consider specializing `checkindex`, which handles a single
-index for a particular dimension of an array.  If you have a custom multidimensional index type
+index for a particular dimension of an array. If you have a custom multidimensional index type
 (similar to `CartesianIndex`), then you may have to consider specializing `checkbounds_indices`.
 
-Note this hierarchy has been designed to reduce the likelihood of method ambiguities.  We try
+Note this hierarchy has been designed to reduce the likelihood of method ambiguities. We try
 to make `checkbounds` the place to specialize on array type, and try to avoid specializations
 on index types; conversely, `checkindex` is intended to be specialized only on index type (especially,
 the last argument).
diff --git a/doc/src/devdocs/build/arm.md b/doc/src/devdocs/build/arm.md
index 747ee25d22a04..df9ede07d270f 100644
--- a/doc/src/devdocs/build/arm.md
+++ b/doc/src/devdocs/build/arm.md
@@ -55,18 +55,9 @@ due to unsupported inline assembly. In that case, add `MCPU=armv7-a` to
 
 ## AArch64 (ARMv8)
 
-Julia has been successfully built on the following ARMv8 devices:
+Julia is expected to work and build on ARMv8 cpus. One should follow the general [build instructions](https://github.com/JuliaLang/julia/blob/master/README.md). Julia expects to have around 8GB of ram or swap enabled to build itself.
 
-* [nVidia Jetson TX1 & TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html);
-* [X-Gene 1](https://www.apm.com/products/data-center/x-gene-family/x-gene/);
-* [Overdrive 3000](https://softiron.com/products/overdrive-3000/);
-* [Cavium ThunderX](https://www.cavium.com/ThunderX_ARM_Processors.html) on [packet.net](https://www.packet.net).
-
-Compilation on `ARMv8-A` requires that `Make.user` is configured as follows:
-
-```
-MCPU=armv8-a
-```
+### Known issues
 
 Starting from Julia v1.10, [JITLink](https://llvm.org/docs/JITLink.html) is automatically enabled on this architecture for all operating systems when linking to LLVM 15 or later versions.
 Due to a [bug in LLVM memory manager](https://github.com/llvm/llvm-project/issues/63236), non-trivial workloads may generate too many memory mappings that on Linux can exceed the limit of memory mappings (`mmap`) set in the file `/proc/sys/vm/max_map_count`, resulting in an error like
@@ -77,21 +68,3 @@ Should this happen, ask your system administrator to increase the limit of memor
 ```
 sysctl -w vm.max_map_count=262144
 ```
-
-### nVidia Jetson TX2
-
-Julia builds and runs on the [nVidia Jetson TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html)
-platform with minimal configuration changes.
-
-After configuring `Make.user` as per the `AArch64` instructions in this document,
-follow the general [build instructions](https://github.com/JuliaLang/julia/blob/master/README.md).
-The majority of the build dependencies specified in the instructions are installed by
-the default configuration flashed by [Jetpack 3.0](https://developer.nvidia.com/embedded/jetpack). The remaining tools can be installed by issuing the following command:
-
-```
-sudo apt-get install gfortran wget cmake
-```
-
-A full parallel build, including LLVM,
-will complete in around two hours. All tests pass and CUDA functionality is available
-through, e.g., [CUDAdrv](https://github.com/JuliaGPU/CUDAdrv.jl).
diff --git a/doc/src/devdocs/build/build.md b/doc/src/devdocs/build/build.md
index 51bcf7d0ee469..861fbba1d1f69 100644
--- a/doc/src/devdocs/build/build.md
+++ b/doc/src/devdocs/build/build.md
@@ -39,9 +39,16 @@ directory mirror, with all of the necessary Makefiles to build Julia,
 in the specified directory. These builds will share the source files
 in Julia and `deps/srccache`. Each out-of-tree build directory can
 have its own `Make.user` file to override the global `Make.user` file
-in the top-level folder.
+in the top-level folder. After modifying the `Make.user` file if necessary,
+build using: `make -C <build-directory>`.
 
-If everything works correctly, you will see a Julia banner and an
+If everything works correctly, there will be a symlink to
+the julia executable in the build directory which can be run as:
+```sh
+./julia
+```
+The actual executable is in `<build-directory>/usr/bin`.
+After running this, you will see a Julia banner and an
 interactive prompt into which you can enter expressions for
 evaluation. (Errors related to libraries might be caused by old,
 incompatible libraries sitting around in your PATH. In this case, try
@@ -49,9 +56,9 @@ moving the `julia` directory earlier in the PATH). Note that most of
 the instructions above apply to unix systems.
 
 To run julia from anywhere you can:
-- add an alias (in `bash`: `echo "alias julia='/path/to/install/folder/bin/julia'" >> ~/.bashrc && source ~/.bashrc`), or
+- add an alias (in `bash`: `echo "alias julia='<build-directory>/usr/bin/julia'" >> ~/.bashrc && source ~/.bashrc`), or
 
-- add a soft link to the `julia` executable in the `julia` directory to `/usr/local/bin` (or any suitable directory already in your path), or
+- add a soft link to the `julia` executable in the `<build-directory>/usr/bin` directory to `/usr/local/bin` (or any suitable directory already in your path), or
 
 - add the `julia` directory to your executable path for this shell session (in `bash`: `export PATH="$(pwd):$PATH"` ; in `csh` or `tcsh`:
 `set path= ( $path $cwd )` ), or
@@ -60,6 +67,16 @@ To run julia from anywhere you can:
 
 - write `prefix=/path/to/install/folder` into `Make.user` and then run `make install`. If there is a version of Julia already installed in this folder, you should delete it before running `make install`.
 
+Some of the options you can set to control the build of Julia are listed and documented at the beginning of the file `Make.inc`, but you should never edit it for this purpose, use `Make.user` instead.
+
+Julia's Makefiles define convenient automatic rules called `print-<VARNAME>` for printing the value of variables, replacing `<VARNAME>` with the name of the variable to print the value of.
+For example
+```console
+$ make print-JULIA_PRECOMPILE
+JULIA_PRECOMPILE=1
+```
+These rules are useful for debugging purposes.
+
 Now you should be able to run Julia like this:
 
     julia
@@ -138,92 +155,62 @@ Notes for various operating systems:
 Notes for various architectures:
 
 * [ARM](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/arm.md)
+* [RISC-V](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/riscv.md)
 
 ## Required Build Tools and External Libraries
 
 Building Julia requires that the following software be installed:
 
-- **[GNU make]**                — building dependencies.
-- **[gcc & g++][gcc]** (>= 7.1) or **[Clang][clang]** (>= 5.0, >= 9.3 for Apple Clang) — compiling and linking C, C++.
-- **[libatomic][gcc]**          — provided by **[gcc]** and needed to support atomic operations.
-- **[python]** (>=2.7)          — needed to build LLVM.
-- **[gfortran]**                — compiling and linking Fortran libraries.
-- **[perl]**                    — preprocessing of header files of libraries.
-- **[wget]**, **[curl]**, or **[fetch]** (FreeBSD) — to automatically download external libraries.
-- **[m4]**                      — needed to build GMP.
-- **[awk]**                     — helper tool for Makefiles.
-- **[patch]**                   — for modifying source code.
-- **[cmake]** (>= 3.4.3)        — needed to build `libgit2`.
-- **[pkg-config]**              — needed to build `libgit2` correctly, especially for proxy support.
-- **[powershell]** (>= 3.0)     — necessary only on Windows.
-- **[which]**                   — needed for checking build dependencies.
+- **[GNU make](https://www.gnu.org/software/make)**                — building dependencies.
+- **[gcc & g++](https://gcc.gnu.org)** (>= 7.1) or **[Clang](https://clang.llvm.org)** (>= 5.0, >= 9.3 for Apple Clang) — compiling and linking C, C++.
+- **[libatomic](https://gcc.gnu.org)**          — provided by **[gcc]** and needed to support atomic operations.
+- **[python](https://www.python.org/)** (>=2.7)          — needed to build LLVM.
+- **[gfortran](https://gcc.gnu.org/fortran/)**                — compiling and linking Fortran libraries.
+- **[perl](https://www.perl.org)**                    — preprocessing of header files of libraries.
+- **[wget](https://www.gnu.org/software/wget)**, **[curl](https://curl.se)**, or **[fetch](https://www.freebsd.org/cgi/man.cgi?fetch(1))** (FreeBSD) — to automatically download external libraries.
+- **[m4](https://www.gnu.org/software/m4)**                      — needed to build GMP.
+- **[awk](https://www.gnu.org/software/gawk)**                     — helper tool for Makefiles.
+- **[patch](https://www.gnu.org/software/patch)**                   — for modifying source code.
+- **[cmake](https://www.cmake.org)** (>= 3.4.3)        — needed to build `libgit2`.
+- **[pkg-config](https://www.freedesktop.org/wiki/Software/pkg-config/)**              — needed to build `libgit2` correctly, especially for proxy support.
+- **[powershell](https://docs.microsoft.com/en-us/powershell/scripting/wmf/overview)** (>= 3.0)     — necessary only on Windows.
+- **[which](https://carlowood.github.io/which/)**                   — needed for checking build dependencies.
 
 On Debian-based distributions (e.g. Ubuntu), you can easily install them with `apt-get`:
 ```
 sudo apt-get install build-essential libatomic1 python gfortran perl wget m4 cmake pkg-config curl
 ```
 
+On Red Hat-based distributions (e.g. Fedora, CentOS), you can install them with `yum`:
+```
+sudo dnf install gcc gcc-c++ gcc-gfortran python3 perl wget m4 cmake pkgconfig curl
+```
+
 Julia uses the following external libraries, which are automatically
 downloaded (or in a few cases, included in the Julia source
 repository) and then compiled from source the first time you run
 `make`. The specific version numbers of these libraries that Julia
 uses are listed in [`deps/$(libname).version`](https://github.com/JuliaLang/julia/blob/master/deps/):
 
-- **[LLVM]** (15.0 + [patches](https://github.com/JuliaLang/llvm-project/tree/julia-release/15.x)) — compiler infrastructure (see [note below](#llvm)).
-- **[FemtoLisp]**            — packaged with Julia source, and used to implement the compiler front-end.
-- **[libuv]**  (custom fork) — portable, high-performance event-based I/O library.
-- **[OpenLibm]**             — portable libm library containing elementary math functions.
-- **[DSFMT]**                — fast Mersenne Twister pseudorandom number generator library.
-- **[OpenBLAS]**             — fast, open, and maintained [basic linear algebra subprograms (BLAS)]
-- **[LAPACK]**               — library of linear algebra routines for solving systems of simultaneous linear equations, least-squares solutions of linear systems of equations, eigenvalue problems, and singular value problems.
-- **[MKL]** (optional)       – OpenBLAS and LAPACK may be replaced by Intel's MKL library.
-- **[SuiteSparse]**          — library of linear algebra routines for sparse matrices.
-- **[PCRE]**                 — Perl-compatible regular expressions library.
-- **[GMP]**                  — GNU multiple precision arithmetic library, needed for `BigInt` support.
-- **[MPFR]**                 — GNU multiple precision floating point library, needed for arbitrary precision floating point (`BigFloat`) support.
-- **[libgit2]**              — Git linkable library, used by Julia's package manager.
-- **[curl]**                 — libcurl provides download and proxy support.
-- **[libssh2]**              — library for SSH transport, used by libgit2 for packages with SSH remotes.
-- **[mbedtls]**              — library used for cryptography and transport layer security, used by libssh2
-- **[utf8proc]**             — a library for processing UTF-8 encoded Unicode strings.
-- **[LLVM libunwind]**       — LLVM's fork of [libunwind], a library that determines the call-chain of a program.
-- **[ITTAPI]**               — Intel's Instrumentation and Tracing Technology and Just-In-Time API.
-
-[GNU make]:     https://www.gnu.org/software/make
-[patch]:        https://www.gnu.org/software/patch
-[wget]:         https://www.gnu.org/software/wget
-[m4]:           https://www.gnu.org/software/m4
-[awk]:          https://www.gnu.org/software/gawk
-[gcc]:          https://gcc.gnu.org
-[clang]:        https://clang.llvm.org
-[python]:       https://www.python.org/
-[gfortran]:     https://gcc.gnu.org/fortran/
-[curl]:         https://curl.haxx.se
-[fetch]:        https://www.freebsd.org/cgi/man.cgi?fetch(1)
-[perl]:         https://www.perl.org
-[cmake]:        https://www.cmake.org
-[OpenLibm]:     https://github.com/JuliaLang/openlibm
-[DSFMT]:        https://github.com/MersenneTwister-Lab/dSFMT
-[OpenBLAS]:     https://github.com/xianyi/OpenBLAS
-[LAPACK]:       https://www.netlib.org/lapack
-[MKL]:          https://software.intel.com/en-us/articles/intel-mkl
-[SuiteSparse]:  https://people.engr.tamu.edu/davis/suitesparse.html
-[PCRE]:         https://www.pcre.org
-[LLVM]:         https://www.llvm.org
-[LLVM libunwind]: https://github.com/llvm/llvm-project/tree/main/libunwind
-[FemtoLisp]:    https://github.com/JeffBezanson/femtolisp
-[GMP]:          https://gmplib.org
-[MPFR]:         https://www.mpfr.org
-[libuv]:        https://github.com/JuliaLang/libuv
-[libgit2]:      https://libgit2.org/
-[utf8proc]:     https://julialang.org/utf8proc/
-[libunwind]:    https://www.nongnu.org/libunwind
-[libssh2]:      https://www.libssh2.org
-[mbedtls]:      https://tls.mbed.org/
-[pkg-config]:   https://www.freedesktop.org/wiki/Software/pkg-config/
-[powershell]:   https://docs.microsoft.com/en-us/powershell/scripting/wmf/overview
-[which]:        https://carlowood.github.io/which/
-[ITTAPI]:       https://github.com/intel/ittapi
+- **[LLVM](https://www.llvm.org)** (15.0 + [patches](https://github.com/JuliaLang/llvm-project/tree/julia-release/15.x)) — compiler infrastructure (see [note below](#llvm)).
+- **[FemtoLisp](https://github.com/JeffBezanson/femtolisp)**            — packaged with Julia source, and used to implement the compiler front-end.
+- **[libuv](https://github.com/JuliaLang/libuv)**  (custom fork) — portable, high-performance event-based I/O library.
+- **[OpenLibm](https://github.com/JuliaLang/openlibm)**             — portable libm library containing elementary math functions.
+- **[DSFMT](https://github.com/MersenneTwister-Lab/dSFMT)**                — fast Mersenne Twister pseudorandom number generator library.
+- **[OpenBLAS](https://github.com/xianyi/OpenBLAS)**             — fast, open, and maintained [basic linear algebra subprograms (BLAS)]
+- **[LAPACK](https://www.netlib.org/lapack)**               — library of linear algebra routines for solving systems of simultaneous linear equations, least-squares solutions of linear systems of equations, eigenvalue problems, and singular value problems.
+- **[MKL](https://software.intel.com/en-us/articles/intel-mkl)** (optional)       – OpenBLAS and LAPACK may be replaced by Intel's MKL library.
+- **[SuiteSparse](https://people.engr.tamu.edu/davis/suitesparse.html)**          — library of linear algebra routines for sparse matrices.
+- **[PCRE](https://www.pcre.org)**                 — Perl-compatible regular expressions library.
+- **[GMP](https://gmplib.org)**                  — GNU multiple precision arithmetic library, needed for `BigInt` support.
+- **[MPFR](https://www.mpfr.org)**                 — GNU multiple precision floating point library, needed for arbitrary precision floating point (`BigFloat`) support.
+- **[libgit2](https://libgit2.org/)**              — Git linkable library, used by Julia's package manager.
+- **[curl](https://curl.se)**                 — libcurl provides download and proxy support.
+- **[libssh2](https://www.libssh2.org)**              — library for SSH transport, used by libgit2 for packages with SSH remotes.
+- **[OpenSSL](https://www.openssl.org/)**              — library used for cryptography and transport layer security, used by libgit2 and libssh2.
+- **[utf8proc](https://julialang.org/utf8proc/)**             — a library for processing UTF-8 encoded Unicode strings.
+- **[LLVM libunwind](https://www.nongnu.org/libunwind)**       — LLVM's fork of [libunwind], a library that determines the call-chain of a program.
+- **[ITTAPI](https://github.com/intel/ittapi)**               — Intel's Instrumentation and Tracing Technology and Just-In-Time API.
 
 ## Build dependencies
 
@@ -238,11 +225,48 @@ The most complicated dependency is LLVM, for which we require additional patches
 For packaging Julia with LLVM, we recommend either:
  - bundling a Julia-only LLVM library inside the Julia package, or
  - adding the patches to the LLVM package of the distribution.
-   * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/15.x` branch.
-   * The only Julia-specific patch is the lib renaming (`llvm-symver-jlprefix.patch`), which should _not_ be applied to a system LLVM.
+   * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/18.x` branch.
    * The remaining patches are all upstream bug fixes, and have been contributed into upstream LLVM.
 
-Using an unpatched or different version of LLVM will result in errors and/or poor performance. Though Julia can be built with newer LLVM versions, support for this should be regarded as experimental and not suitable for packaging.
+Using an unpatched or different version of LLVM will result in errors and/or poor performance.
+You can build a different version of LLVM from a remote Git repository with the following options in the `Make.user` file:
+
+```make
+# Force source build of LLVM
+USE_BINARYBUILDER_LLVM = 0
+# Use Git for fetching LLVM source code
+# this is either `1` to get all of them
+DEPS_GIT = 1
+# or a space-separated list of specific dependencies to download with git
+DEPS_GIT = llvm
+
+# Other useful options:
+#URL of the Git repository you want to obtain LLVM from:
+#  LLVM_GIT_URL = ...
+#Name of the alternate branch to clone from git
+#  LLVM_BRANCH = julia-16.0.6-0
+#SHA hash of the alternate commit to check out automatically
+#  LLVM_SHA1 = $(LLVM_BRANCH)
+#List of LLVM targets to build. It is strongly recommended to keep at least all the
+#default targets listed in `deps/llvm.mk`, even if you don't necessarily need all of them.
+#  LLVM_TARGETS = ...
+#Use ccache for faster recompilation in case you need to restart a build.
+#  USECCACHE = 1
+#  CMAKE_GENERATOR=Ninja
+#  LLVM_ASSERTIONS=1
+#  LLVM_DEBUG=Symbols
+```
+
+The various build phases are controlled by specific files:
+ * `deps/llvm.version` : touch or change to checkout a new version, `make get-llvm check-llvm`
+ * `deps/srccache/llvm/source-extracted` : result of `make extract-llvm`
+ * `deps/llvm/build_Release*/build-configured` : result of `make configure-llvm`
+ * `deps/llvm/build_Release*/build-configured` : result of `make compile-llvm`
+ * `usr-staging/llvm/build_Release*.tgz` : result of `make stage-llvm` (regenerate with `make reinstall-llvm`)
+ * `usr/manifest/llvm` : result of `make install-llvm` (regenerate with `make uninstall-llvm`)
+ * `make version-check-llvm` : runs every time to warn the user if there are local modifications
+
+Though Julia can be built with newer LLVM versions, support for this should be regarded as experimental and not suitable for packaging.
 
 ### libuv
 
@@ -285,10 +309,24 @@ LLVM_ASSERTIONS=1
 
 Please note that assert builds of Julia will be slower than regular (non-assert) builds.
 
+## Building a debug build of Julia
+
+A full debug build of Julia can be built with `make debug`.  This builds a debug
+version of `libjulia` and uses it to bootstrap the compiler, before creating a
+system image with debug symbols enabled.  This can take more than 15 minutes.
+
+Although it may result in some differences, a debug build can be built much
+quicker by bootstrapping from a release build:
+
+```sh
+$ make julia-src-release julia-sysbase-release
+$ make julia-sysimg-debug CROSS_BOOTSTRAP_JULIA=$PWD/usr/bin/julia CROSS_BOOTSTRAP_SYSBASE=$PWD/usr/lib/julia/sysbase.so
+```
+
 ## Building 32-bit Julia on a 64-bit machine
 
-Occasionally, bugs specific to 32-bit architectures may arise, and when this happens it is useful to be able to debug the problem on your local machine.  Since most modern 64-bit systems support running programs built for 32-bit ones, if you don't have to recompile Julia from source (e.g. you mainly need to inspect the behavior of a 32-bit Julia without having to touch the C code), you can likely use a 32-bit build of Julia for your system that you can obtain from the [official downloads page](https://julialang.org/downloads/).
-However, if you do need to recompile Julia from source one option is to use a Docker container of a 32-bit system.  At least for now, building a 32-bit version of Julia is relatively straightforward using [ubuntu 32-bit docker images](https://hub.docker.com/r/i386/ubuntu). In brief, after setting up `docker` here are the required steps:
+Occasionally, bugs specific to 32-bit architectures may arise, and when this happens it is useful to be able to debug the problem on your local machine. Since most modern 64-bit systems support running programs built for 32-bit ones, if you don't have to recompile Julia from source (e.g. you mainly need to inspect the behavior of a 32-bit Julia without having to touch the C code), you can likely use a 32-bit build of Julia for your system that you can obtain from the [official downloads page](https://julialang.org/downloads/).
+However, if you do need to recompile Julia from source one option is to use a Docker container of a 32-bit system. At least for now, building a 32-bit version of Julia is relatively straightforward using [ubuntu 32-bit docker images](https://hub.docker.com/r/i386/ubuntu). In brief, after setting up `docker` here are the required steps:
 
 ```sh
 $ docker pull i386/ubuntu
diff --git a/doc/src/devdocs/build/distributing.md b/doc/src/devdocs/build/distributing.md
index c49f6f071224c..ed06c20fa0df3 100644
--- a/doc/src/devdocs/build/distributing.md
+++ b/doc/src/devdocs/build/distributing.md
@@ -2,9 +2,9 @@ Binary distributions
 =======================================
 
 These notes are for those wishing to compile a binary distribution of Julia
-for distribution on various platforms.  We love users spreading Julia as
+for distribution on various platforms. We love users spreading Julia as
 far and wide as they can, trying it out on as wide an array of
-operating systems and hardware configurations as possible.  As each
+operating systems and hardware configurations as possible. As each
 platform has specific gotchas and processes that must be followed in
 order to create a portable, working Julia distribution, we have
 separated most of the notes by OS.
@@ -53,7 +53,7 @@ as it will make Julia fail at startup on any machine with incompatible CPUs
 We therefore recommend that you pass the `MARCH` variable when calling `make`,
 setting it to the baseline target you intend to support. This will determine
 the target CPU for both the Julia executable and libraries, and the system
-image (the latter can also be set using `JULIA_CPU_TARGET`). Typically useful
+image (the latter can also be set using [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET)). Typically useful
 values for x86 CPUs are `x86-64` and `core2` (for 64-bit builds) and
 `pentium4` (for 32-bit builds). Unfortunately, CPUs older than Pentium 4
 are currently not supported (see
@@ -86,8 +86,8 @@ installation-wide initialization file. This file can be used by
 distribution managers to set up custom paths or initialization code.
 For Linux distribution packages, if `$prefix` is
 set to `/usr`, there is no `/usr/etc` to look into. This requires
-the path to Julia's private `etc` directory to be changed.  This can
-be done via the `sysconfdir` make variable when building.  Simply
+the path to Julia's private `etc` directory to be changed. This can
+be done via the `sysconfdir` make variable when building. Simply
 pass `sysconfdir=/etc` to `make` when building and Julia will first
 check `/etc/julia/startup.jl` before trying
 `$prefix/etc/julia/startup.jl`.
@@ -97,18 +97,18 @@ OS X
 
 To create a binary distribution on OSX, build Julia first, then cd to
 `contrib/mac/app`, and run `make` with the same makevars that were used
-with `make` when building Julia proper.  This will then
+with `make` when building Julia proper. This will then
 create a `.dmg` file in the `contrib/mac/app` directory holding a
 completely self-contained Julia.app.
 
 Alternatively, Julia may be built as a framework by invoking `make` with the
-`darwinframework` target and `DARWIN_FRAMEWORK=1` set.  For example,
+`darwinframework` target and `DARWIN_FRAMEWORK=1` set. For example,
 `make DARWIN_FRAMEWORK=1 darwinframework`.
 
 Windows
 -------
 
-Instructions for reating a Julia distribution on Windows are described in the
+Instructions for creating a Julia distribution on Windows are described in the
 [build devdocs for Windows](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/windows.md).
 
 Notes on BLAS and LAPACK
diff --git a/doc/src/devdocs/build/linux.md b/doc/src/devdocs/build/linux.md
index 4e596ef73341b..8c4773e4e41ad 100644
--- a/doc/src/devdocs/build/linux.md
+++ b/doc/src/devdocs/build/linux.md
@@ -8,7 +8,7 @@
 
 ## Architecture Customization
 
-Julia can be built for a non-generic architecture by configuring the `ARCH` Makefile variable in a `Make.user` file. See the appropriate section of `Make.inc` for additional customization options, such as `MARCH` and `JULIA_CPU_TARGET`.
+Julia can be built for a non-generic architecture by configuring the `ARCH` Makefile variable in a `Make.user` file. See the appropriate section of `Make.inc` for additional customization options, such as `MARCH` and [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET).
 
 For example, to build for Pentium 4, set `MARCH=pentium4` and install the necessary system libraries for linking. On Ubuntu, these may include lib32gfortran-6-dev, lib32gcc1, and lib32stdc++6, among others.
 
diff --git a/doc/src/devdocs/build/riscv.md b/doc/src/devdocs/build/riscv.md
new file mode 100644
index 0000000000000..51939c25e41a1
--- /dev/null
+++ b/doc/src/devdocs/build/riscv.md
@@ -0,0 +1,95 @@
+# RISC-V (Linux)
+
+Julia has experimental support for 64-bit RISC-V (RV64) processors running
+Linux. This file provides general guidelines for compilation, in addition to
+instructions for specific devices.
+
+A list of [known issues](https://github.com/JuliaLang/julia/labels/system:riscv)
+for RISC-V is available. If you encounter difficulties, please create an issue
+including the output from `cat /proc/cpuinfo`.
+
+
+## Compiling Julia
+
+To compilie Julia for RISC-V, you need to manually indicate what architecture, and
+optionally which CPU to build for. This can be done by setting the `MARCH` and `MCPU`
+variables in `Make.user`
+
+The `MARCH` variable needs to be set to a RISC-V ISA string, which can be found by
+looking at the documentation of your device, or by inspecting `/proc/cpuinfo`. Only
+use flags that your compiler supports, e.g., run `gcc -march=help` to see a list of
+supported flags. A common value is `rv64gc`, which is a good starting point.
+
+The `MCPU` variable is optional, and can be used to further optimize the
+generated code for a specific CPU. If you are unsure, it is recommended to leave
+it unset. You can find a list of supported values by running `gcc --target-help`.
+
+For example, if you are using a StarFive VisionFive2, which contains a JH7110
+processor based on the SiFive U74, you can set these flags as follows:
+
+```make
+MARCH := rv64gc_zba_zbb
+MCPU := sifive-u74
+```
+
+If you prefer a portable build, you could use:
+
+```make
+MARCH := rv64gc
+
+# also set JULIA_CPU_TARGET to the expanded form of rv64gc
+# (it normally copies the value of MCPU, which we don't set)
+JULIA_CPU_TARGET := generic-rv64,i,m,a,f,d,zicsr,zifencei,c
+```
+
+### Cross-compilation
+
+A native build on a RISC-V device may take a very long time, so it's also
+possible to cross-compile Julia on a faster machine.
+
+First, get a hold of a RISC-V cross-compilation toolchain that provides
+support for C, C++ and Fortran. This can be done by checking-out the
+[riscv-gnu-toolchain](https://github.com/riscv-collab/riscv-gnu-toolchain)
+repository and building it as follows:
+
+```sh
+sudo mkdir /opt/riscv && sudo chown $USER /opt/riscv
+./configure --prefix=/opt/riscv --with-languages=c,c++,fortran
+make linux -j$(nproc)
+```
+
+Then, install the QEMU user-mode emulator for RISC-V, along with `binfmt`
+support to enable execution of RISC-V binaries on the host machine. The
+exact steps depend on your distribution, e.g., on Arch Linux it involves
+installing the `qemu-user-static` and `qemu-user-static-binfmt` packages.
+Note that to actually execute RISC-V binaries, QEMU will need to be able to
+find the RISC-V system root, which can be achieved by setting the
+`QEMU_LD_PREFIX` environment variable to the path of the root filesystem.
+
+Finally, compile Julia with the following `Make.user` variables (in addition to
+the ones from the previous section):
+
+```make
+XC_HOST=riscv64-unknown-linux-gnu
+OS=Linux
+export QEMU_LD_PREFIX=/opt/riscv/sysroot
+```
+
+Note that you will have to execute `make` with `PATH` set to include the
+cross-compilation toolchain, e.g., by running:
+
+```sh
+PATH=/opt/riscv/bin:$PATH make -j$(nproc)
+```
+
+Because of the RISC-V sysroot we use being very barren, you may need to
+add additional libraries that the Julia build system currently expects
+to be available system-wide. For example, the build currently relies on
+a system-provided `libz`, so you may need to copy this library from the
+Julia build into the system root:
+
+```sh
+make -C deps install-zlib
+cp -v usr/lib/libz.*   /opt/riscv/sysroot/usr/lib
+cp -v usr/include/z*.h /opt/riscv/sysroot/usr/include
+```
diff --git a/doc/src/devdocs/build/windows.md b/doc/src/devdocs/build/windows.md
index 8f8f0c8bc676a..e5f1fa2caeb6c 100644
--- a/doc/src/devdocs/build/windows.md
+++ b/doc/src/devdocs/build/windows.md
@@ -32,7 +32,7 @@ or edit `%USERPROFILE%\.gitconfig` and add/edit the lines:
 ## Binary distribution
 
 For the binary distribution installation notes on Windows please see the instructions at
-[https://julialang.org/downloads/platform/#windows](https://julialang.org/downloads/platform/#windows).
+[https://julialang.org/downloads/platform/#windows](https://julialang.org/downloads/platform/#windows). Note, however, that on all platforms [using `juliaup`](https://julialang.org/install/) is recommended over manually installing binaries.
 
 ## Source distribution
 
@@ -50,7 +50,7 @@ MinGW-w64 compilers available through Cygwin's package manager.
     *Advanced*: you may skip steps 2-4 by running:
 
     ```sh
-    setup-x86_64.exe -s <url> -q -P cmake,gcc-g++,git,make,patch,curl,m4,python3,p7zip,mingw64-i686-gcc-g++,mingw64-i686-gcc-fortran,mingw64-x86_64-gcc-g++,mingw64-x86_64-gcc-fortran
+    setup-x86_64.exe -s <url> -q -P cmake,gcc-g++=12.5.0-1,git,make,patch,curl,m4,python3,p7zip,mingw64-i686-gcc-g++=12.5.0-1,mingw64-i686-gcc-fortran=12.5.0-1,mingw64-i686-gcc-core=12.5.0-1,mingw64-i686-headers=12.0.0-1,mingw64-i686-runtime=12.0.0-1,mingw64-i686-winpthreads=12.0.0-1,mingw64-x86_64-gcc-g++=12.5.0-1,mingw64-x86_64-gcc-fortran=12.5.0-1,mingw64-x86_64-gcc-core=12.5.0-1,mingw64-x86_64-headers=12.0.0-1,mingw64-x86_64-runtime=12.0.0-1,mingw64-x86_64-winpthreads=12.0.0-1
     ```
 
     replacing `<url>` with a site from [https://cygwin.com/mirrors.html](https://cygwin.com/mirrors.html)
@@ -60,14 +60,16 @@ MinGW-w64 compilers available through Cygwin's package manager.
 
  3. At the *Select Packages* step, select the following:
 
-    1.  From the *Devel* category: `cmake`, `gcc-g++`, `git`, `make`, `patch`
-    2.  From the *Net* category: `curl`
-    3.  From *Interpreters* (or *Python*) category: `m4`, `python3`
-    4.  From the *Archive* category: `p7zip`
-    5.  For 32 bit Julia, and also from the *Devel* category:
-        `mingw64-i686-gcc-g++` and `mingw64-i686-gcc-fortran`
-    6.  For 64 bit Julia, and also from the *Devel* category:
-        `mingw64-x86_64-gcc-g++` and `mingw64-x86_64-gcc-fortran`
+    1. From the *Devel* category: `cmake`, `gcc-g++`, `git`, `make`, `patch`
+    2. From the *Net* category: `curl`
+    3. From *Interpreters* (or *Python*) category: `m4`, `python3`
+    4. From the *Archive* category: `p7zip`
+    5. For 32 bit Julia, and also from the *Devel* category:
+       `mingw64-i686-gcc-g++` and `mingw64-i686-gcc-fortran` and `mingw64-i686-gcc-core` (version "12.5.0-1")
+       `mingw64-i686-headers` and `mingw64-i686-runtime` and `mingw64-i686-winpthreads` (version "12.0.0-1")
+    6. For 64 bit Julia, and also from the *Devel* category:
+       `mingw64-x86_64-gcc-g++` and `mingw64-x86_64-gcc-fortran` and `mingw64-x86_64-gcc-core` (version "12.5.0-1")
+       `mingw64-x86_64-headers` and `mingw64-x86_64-runtime` and `mingw64-x86_64-winpthreads` (version "12.0.0-1")
 
  4. Allow Cygwin installation to finish, then start from the installed shortcut
     *'Cygwin Terminal'*, or *'Cygwin64 Terminal'*, respectively.
@@ -129,32 +131,32 @@ Note: MSYS2 requires **64 bit** Windows 7 or newer.
 
     2. Open the MSYS2 shell. Update the package database and base packages:
 
-        ```
-        pacman -Syu
-        ```
+       ```
+       pacman -Syu
+       ```
     3. Exit and restart MSYS2. Update the rest of the base packages:
 
-        ```
-        pacman -Syu
-        ```
+       ```
+       pacman -Syu
+       ```
 
     4. Then install tools required to build julia:
 
-        ```
-        pacman -S cmake diffutils git m4 make patch tar p7zip curl python
-        ```
+       ```
+       pacman -S diffutils git m4 make patch tar p7zip curl python
+       ```
 
-        For 64 bit Julia, install the x86_64 version:
+       For 64 bit Julia, install the x86_64 version:
 
-        ```
-        pacman -S mingw-w64-x86_64-gcc
-        ```
+       ```
+       pacman -S mingw-w64-x86_64-gcc mingw-w64-x86_64-cmake mingw-w64-x86_64-clang
+       ```
 
-        For 32 bit Julia, install the i686 version:
+       For 32 bit Julia, install the i686 version:
 
-        ```
-        pacman -S mingw-w64-i686-gcc
-        ```
+       ```
+       pacman -S mingw-w64-i686-gcc mingw-w64-i686-cmake mingw-w64-i686-clang
+       ```
 
     5. Configuration of MSYS2 is complete. Now `exit` the MSYS2 shell.
  2. Build Julia and its dependencies with pre-build dependencies.
@@ -166,16 +168,24 @@ Note: MSYS2 requires **64 bit** Windows 7 or newer.
 
     2. Clone the Julia sources:
 
-        ```
-        git clone https://github.com/JuliaLang/julia.git
-        cd julia
-        ```
+       ```sh
+       git clone https://github.com/JuliaLang/julia.git
+       cd julia
+       ```
 
-    3. Start the build
+    3. If you want to use clang (currently required if building LLVM from source), put the following in your Make.user
+      ```
+      CC=/mingw64/bin/clang
+      CXX=/mingw64/bin/clang++
+      ```
+!!! warning "UCRT Unsupported"
+   Do not try to use any other clang that MSYS2 may install (which may not have the correct default target) or the "Clang" environment(which defaults to the currently unsupported ucrt).
+
+    4. Start the build
 
-        ```
-        make -j$(nproc)
-        ```
+       ```
+       make -j$(nproc)
+       ```
 
 !!! note "Pro tip: build in dir"
     ```sh
@@ -208,7 +218,7 @@ done
 
 **On Mac**: Install XCode, XCode command line tools, X11 (now
 [XQuartz](https://www.xquartz.org/)), and [MacPorts](https://www.macports.org/install.php)
-or [Homebrew](https://brew.sh/).  Then run `port install wine wget mingw-w64`, or `brew
+or [Homebrew](https://brew.sh/). Then run `port install wine wget mingw-w64`, or `brew
 install wine wget mingw-w64`, as appropriate.
 
 **Then run the build:**
@@ -259,14 +269,14 @@ Then run the resulting installer.
 ### GDB not attaching to the right process
 
  - Use the PID from the Windows task manager or `WINPID` from the `ps` command
-   instead of the PID from unix-style command line tools (e.g. `pgrep`).  You
+   instead of the PID from unix-style command line tools (e.g. `pgrep`). You
    may need to add the PID column if it is not shown by default in the Windows
    task manager.
 
 ### GDB not showing the right backtrace
 
  - When attaching to the julia process, GDB may not be attaching to the right
-   thread.  Use `info threads` command to show all the threads and
+   thread. Use `info threads` command to show all the threads and
    `thread <threadno>` to switch threads.
  - Be sure to use a 32 bit version of GDB to debug a 32 bit build of Julia, or
    a 64 bit version of GDB to debug a 64 bit build of Julia.
diff --git a/doc/src/devdocs/builtins.md b/doc/src/devdocs/builtins.md
new file mode 100644
index 0000000000000..ce56a7f9a6b91
--- /dev/null
+++ b/doc/src/devdocs/builtins.md
@@ -0,0 +1,48 @@
+# [Core.Builtins](@id lib-builtins)
+
+The following builtin functions are considered unstable, but provide the basic
+definitions for what defines the abilities and behaviors of a Julia
+program. They are typically accessed through a higher level generic API.
+
+## Raw access to memory
+
+```@docs
+Core.Intrinsics.pointerref
+Core.Intrinsics.pointerset
+Core.Intrinsics.atomic_pointerref
+Core.Intrinsics.atomic_pointerset
+Core.Intrinsics.atomic_pointerswap
+Core.Intrinsics.atomic_pointermodify
+Core.Intrinsics.atomic_pointerreplace
+```
+
+## Managed memory
+
+```@docs
+Core.memorynew
+Core.memoryrefnew
+Core.memoryrefoffset
+Core.memoryrefget
+Core.memoryrefset!
+Core.memoryref_isassigned
+Core.memoryrefswap!
+Core.memoryrefmodify!
+Core.memoryrefreplace!
+Core.memoryrefsetonce!
+```
+
+## Module bindings
+
+```@docs
+Core.get_binding_type
+```
+
+## Other
+
+```@docs
+Core.IntrinsicFunction
+Core.Intrinsics
+Core.IR
+Base.quoted
+Base.isa_ast_node
+```
diff --git a/doc/src/devdocs/callconv.md b/doc/src/devdocs/callconv.md
index 88158cb1eee84..2bf89fb92856b 100644
--- a/doc/src/devdocs/callconv.md
+++ b/doc/src/devdocs/callconv.md
@@ -18,10 +18,10 @@ signature.
   * LLVM scalars and vectors are passed by value.
   * LLVM aggregates (arrays and structs) are passed by reference.
 
-A small return values is returned as LLVM return values. A large return values is returned via
+A small return value is returned as LLVM return values. A large return value is returned via
 the "structure return" (`sret`) convention, where the caller provides a pointer to a return slot.
 
-An argument or return values that is a homogeneous tuple is sometimes represented as an LLVM vector
+An argument or return value that is a homogeneous tuple is sometimes represented as an LLVM vector
 instead of an LLVM array.
 
 ## JL Call Convention
diff --git a/doc/src/devdocs/cartesian.md b/doc/src/devdocs/cartesian.md
index 1d338cbd8fab3..8d5d6d1832e23 100644
--- a/doc/src/devdocs/cartesian.md
+++ b/doc/src/devdocs/cartesian.md
@@ -26,7 +26,7 @@ end
 ```
 
 In general, Cartesian allows you to write generic code that contains repetitive elements, like
-the nested loops in this example.  Other applications include repeated expressions (e.g., loop
+the nested loops in this example. Other applications include repeated expressions (e.g., loop
 unwinding) or creating function calls with variable numbers of arguments without using the "splat"
 construct (`i...`).
 
@@ -71,7 +71,7 @@ DocTestSetup = nothing
 
 The first argument to both of these macros is the number of expressions, which must be an integer.
 When you're writing a function that you intend to work in multiple dimensions, this may not be
-something you want to hard-code. The recommended approach is to use a `@generated function`.  Here's
+something you want to hard-code. The recommended approach is to use a `@generated function`. Here's
 an example:
 
 ```julia
@@ -91,7 +91,7 @@ Naturally, you can also prepare expressions or perform calculations before the `
 ### Anonymous-function expressions as macro arguments
 
 Perhaps the single most powerful feature in `Cartesian` is the ability to supply anonymous-function
-expressions that get evaluated at parsing time.  Let's consider a simple example:
+expressions that get evaluated at parsing time. Let's consider a simple example:
 
 ```julia
 @nexprs 2 j->(i_j = 1)
@@ -106,8 +106,8 @@ i_2 = 1
 ```
 
 In each generated statement, an "isolated" `j` (the variable of the anonymous function) gets replaced
-by values in the range `1:2`. Generally speaking, Cartesian employs a LaTeX-like syntax.  This
-allows you to do math on the index `j`.  Here's an example computing the strides of an array:
+by values in the range `1:2`. Generally speaking, Cartesian employs a LaTeX-like syntax. This
+allows you to do math on the index `j`. Here's an example computing the strides of an array:
 
 ```julia
 s_1 = 1
@@ -133,6 +133,7 @@ Base.Cartesian.@nref
 Base.Cartesian.@nextract
 Base.Cartesian.@nexprs
 Base.Cartesian.@ncall
+Base.Cartesian.@ncallkw
 Base.Cartesian.@ntuple
 Base.Cartesian.@nall
 Base.Cartesian.@nany
diff --git a/doc/src/devdocs/compiler.md b/doc/src/devdocs/compiler.md
index 0749eafd81bd3..c2ee5f9a375e1 100644
--- a/doc/src/devdocs/compiler.md
+++ b/doc/src/devdocs/compiler.md
@@ -94,22 +94,28 @@ Use appropriate care when copying.
 
 ## Specialized Calling Convention Signature Representation
 
-A `jl_returninfo_t` object describes the calling convention details of any callable.
+A `jl_returninfo_t` object describes the specialized calling convention details of any
+callable. It can be generated from any (specTypes, rettype) pair, such as a CodeInstance, or
+other place they are declared. This is the expected calling convention for specptr, but
+other data may be stored there. Only if the function pointer stored there has the
+expected specialized calling convention will the corresponding flag be set in specsigflags
+to indicate it is useable.
 
-If any of the arguments or return type of a method can be represented unboxed,
-and the method is not varargs, it'll be given an optimized calling convention
-signature based on its `specTypes` and `rettype` fields.
+If any of the arguments or return type of a method can be represented unboxed, and none are
+unable to be represented unboxed (such as an unbounded vararg), it will be given an
+optimized calling convention signature based on the `specTypes` and `rettype` values.
 
 The general principles are that:
 
 - Primitive types get passed in int/float registers.
 - Tuples of VecElement types get passed in vector registers.
 - Structs get passed on the stack.
-- Return values are handle similarly to arguments,
+- Return values are handled similarly to arguments,
   with a size-cutoff at which they will instead be returned via a hidden sret argument.
 
 The total logic for this is implemented by `get_specsig_function` and `deserves_sret`.
 
 Additionally, if the return type is a union, it may be returned as a pair of values (a pointer and a tag).
 If the union values can be stack-allocated, then sufficient space to store them will also be passed as a hidden first argument.
+If the struct to return needs gc roots, space for those will be passed as a hidden second argument.
 It is up to the callee whether the returned pointer will point to this space, a boxed object, or even other constant memory.
diff --git a/doc/src/devdocs/contributing/aiagents.md b/doc/src/devdocs/contributing/aiagents.md
new file mode 100644
index 0000000000000..6fc7b9a662071
--- /dev/null
+++ b/doc/src/devdocs/contributing/aiagents.md
@@ -0,0 +1,49 @@
+# Using AI agents to work on Julia
+
+> ![WARNING]
+> You are responsible for the code you submit in PRs. Do not submit PRs
+> containing AI-generated code that you do not understand or that does not
+> meet the ordinary quality bar for PRs to julia.
+
+This page documents best practices for setting up AI agents to work with Julia.
+If you find additional prompt instructions that work well for common tasks,
+consider submitting a PR to add these to AGENTS.md.
+
+## Google Jules
+
+Use the following for your `Initial Setup` configuration.
+
+```
+curl -fsSL https://install.julialang.org | sh -s -- -y --default-channel nightly
+. /home/swebot/.profile
+```
+
+Jules has access to the internet, so you can give it links to issues or additional
+documentation in your prompting.
+
+## OpenAI Codex
+
+Configure the following:
+
+Setup Script
+```
+apt update
+apt install less
+curl -fsSL https://install.julialang.org | sh -s -- -y --default-channel nightly
+source /root/.bashrc
+make -C /workspace/julia/doc alldeps JULIA_EXECUTABLE="/root/.juliaup/bin/julia"
+make -C /workspace/julia/test install-revise-deps JULIA_EXECUTABLE="/root/.juliaup/bin/julia"
+```
+
+Environment Variables
+```
+JULIA_PKG_OFFLINE=true
+```
+
+Codex does not have internet access after initial setup, so you cannot give it
+additional information as links - you will need to copy any relevant text into
+the prompt.
+
+Note that Codex rebuilds the environment after every invocation. This can
+add significant latency. Codex work best for well-defined tasks that can
+be solved in a single shot.
diff --git a/doc/src/devdocs/contributing/code-changes.md b/doc/src/devdocs/contributing/code-changes.md
new file mode 100644
index 0000000000000..41bb03f01409b
--- /dev/null
+++ b/doc/src/devdocs/contributing/code-changes.md
@@ -0,0 +1,114 @@
+# Code changes
+
+## Contributing to core functionality or base libraries
+
+*By contributing code to Julia, you are agreeing to release it under the [MIT License](https://github.com/JuliaLang/julia/tree/master/LICENSE.md).*
+
+The Julia community uses [GitHub issues](https://github.com/JuliaLang/julia/issues) to track and discuss problems, feature requests, and pull requests (PR).
+
+Issues and pull requests should have self explanatory titles such that they can be understood from the list of PRs and Issues.
+i.e. `Add {feature}` and `Fix {bug}` are good, `Fix #12345. Corrects the bug.` is bad.
+
+You can make pull requests for incomplete features to get code review. The convention is to open these as draft PRs and prefix
+the pull request title with "WIP:" for Work In Progress, or "RFC:" for Request for Comments when work is completed and ready
+for merging. This will prevent accidental merging of work that is in progress.
+
+Note: These instructions are for adding to or improving functionality in the base library. Before getting started, it can be helpful to discuss the proposed changes or additions on the [Julia Discourse forum](https://discourse.julialang.org) or in a GitHub issue---it's possible your proposed change belongs in a package rather than the core language. Also, keep in mind that changing stuff in the base can potentially break a lot of things. Finally, because of the time required to build Julia, note that it's usually faster to develop your code in stand-alone files, get it working, and then migrate it into the base libraries.
+
+Add new code to Julia's base libraries as follows (this is the "basic" approach; see a more efficient approach in the next section):
+
+ 1. Edit the appropriate file in the `base/` directory, or add new files if necessary. Create tests for your functionality and add them to files in the `test/` directory. If you're editing C or Scheme code, most likely it lives in `src/` or one of its subdirectories, although some aspects of Julia's REPL initialization live in `cli/`.
+
+ 2. Add any new files to `sysimg.jl` in order to build them into the Julia system image.
+
+ 3. Add any necessary export symbols in `exports.jl`.
+
+ 4. Include your tests in `test/Makefile` and `test/choosetests.jl`.
+
+Build as usual, and do `make clean testall` to test your contribution. If your contribution includes changes to Makefiles or external dependencies, make sure you can build Julia from a clean tree using `git clean -fdx` or equivalent (be careful – this command will delete any files lying around that aren't checked into git).
+
+### Running specific tests
+
+There are `make` targets for running specific tests:
+
+    make test-bitarray
+
+You can also use the `runtests.jl` script, e.g. to run `test/bitarray.jl` and `test/math.jl`:
+
+    ./usr/bin/julia test/runtests.jl bitarray math
+
+### Modifying base more efficiently with Revise.jl
+
+[Revise](https://github.com/timholy/Revise.jl) is a package that
+tracks changes in source files and automatically updates function
+definitions in your running Julia session. Using it, you can make
+extensive changes to Base without needing to rebuild in order to test
+your changes.
+
+Here is the standard procedure:
+
+1. If you are planning changes to any types or macros, make those
+   changes and build julia using `make`. (This is
+   necessary because `Revise` cannot handle changes to type
+   definitions or macros.) Unless it's
+   required to get Julia to build, you do not have to add any
+   functionality based on the new types, just the type definitions
+   themselves.
+
+2. Start a Julia REPL session. Then issue the following commands:
+
+```julia
+using Revise    # if you aren't launching it in your `.julia/config/startup.jl`
+Revise.track(Base)
+```
+
+3. Edit files in `base/`, save your edits, and test the
+   functionality.
+
+If you need to restart your Julia session, just start at step 2 above.
+`Revise.track(Base)` will note any changes from when Julia was last
+built and incorporate them automatically. You only need to rebuild
+Julia if you made code-changes that Revise cannot handle.
+
+For convenience, there are also `test-revise-*` targets for every [`test-*`
+target](https://github.com/JuliaLang/julia/blob/master/CONTRIBUTING.md#running-specific-tests) that use Revise to load any modifications to Base into the current
+system image before running the corresponding test. This can be useful as a shortcut
+on the command line (since tests aren't always designed to be run outside the
+runtest harness).
+
+## Contributing to the standard library
+
+The standard library (stdlib) packages are baked into the Julia system image.
+When running the ordinary test workflow on the stdlib packages, the system image
+version overrides the version you are developing.
+To test stdlib packages, you can do the following steps:
+
+1. Edit the UUID field of the `Project.toml` in the stdlib package
+2. Change the current directory to the directory of the stdlib you are developing
+3. Start julia with `julia --project=.`
+4. You can now test the package by running `pkg> test` in Pkg mode.
+
+Because you changed the UUID, the package manager treats the stdlib package as
+different from the one in the system image, and the system image version will
+not override the package.
+
+Be sure to change the UUID value back before making the pull request.
+
+### News-worthy changes
+
+For new functionality and other substantial changes, add a brief summary to `NEWS.md`. The news item should cross reference the pull request (PR) parenthetically, in the form `([#pr])`. To add the PR reference number, first create the PR, then push an additional commit updating `NEWS.md` with the PR reference number. We periodically run `./julia doc/NEWS-update.jl` from the julia directory to update the cross-reference links, but this should not be done in a typical PR in order to avoid conflicting commits.
+
+### Annotations for new features, deprecations and behavior changes
+
+API additions and deprecations, and minor behavior changes are allowed in minor version releases.
+For documented features that are part of the public API, a compatibility note should be added into
+the manual or the docstring. It should state the Julia minor version that changed the behavior
+and have a brief message describing the change.
+
+At the moment, this should always be done with the following `compat` admonition
+(so that it would be possible to programmatically find the annotations in the future):
+
+  ```
+  !!! compat "Julia 1.X"
+      This method was added in Julia 1.X.
+  ```
diff --git a/doc/src/devdocs/contributing/documentation.md b/doc/src/devdocs/contributing/documentation.md
new file mode 100644
index 0000000000000..e12ac1d482063
--- /dev/null
+++ b/doc/src/devdocs/contributing/documentation.md
@@ -0,0 +1,87 @@
+# Improving documentation
+
+*By contributing documentation to Julia, you are agreeing to release it under the [MIT License](https://github.com/JuliaLang/julia/tree/master/LICENSE.md).*
+
+Julia's documentation source files are stored in the `doc/` directory and all docstrings are found in `base/`. Like everything else these can be modified using `git`. Documentation is built with [Documenter.jl](https://github.com/JuliaDocs/Documenter.jl), which uses Markdown syntax. The HTML documentation can be built locally by running
+
+```
+make docs
+```
+
+from Julia's root directory. This will rebuild the Julia system image, then install or update the package dependencies required to build the documentation, and finally build the HTML documentation and place the resulting files in `doc/_build/html/`.
+
+> **Note**
+>
+> When making changes to any of Julia's documentation it is recommended that you run `make docs` to check that your changes are valid and do not produce any errors before opening a pull request.
+
+Below are outlined the three most common types of documentation changes and the steps required to perform them. Please note that the following instructions do not cover the full range of features provided by Documenter.jl. Refer to [Documenter's documentation](https://juliadocs.github.io/Documenter.jl/stable) if you encounter anything that is not covered by the sections below.
+
+## Modifying files in `doc/src/`
+
+Most of the source text for the Julia Manual is located in `doc/src/`. To update or add new text to any one of the existing files the following steps should be followed:
+
+1. update the text in whichever `.md` files are applicable;
+2. run `make docs` from the root directory;
+3. check the output in `doc/_build/html/` to make sure the changes are correct;
+4. commit your changes and open a pull request.
+
+> **Note**
+>
+> The contents of `doc/_build/` does **not** need to be committed when you make changes.
+
+To add a **new file** to `doc/src/` rather than updating a file replace step `1` above with
+
+1. add the file to the appropriate subdirectory in `doc/src/` and also add the file path to the `PAGES` vector in `doc/make.jl`.
+
+## Modifying an existing docstring in `base/`
+
+All docstrings are written inline above the methods or types they are associated with and can be found by clicking on the `source` link that appears below each docstring in the HTML file. The steps needed to make a change to an existing docstring are listed below:
+
+1. find the docstring in `base/`;
+2. update the text in the docstring;
+3. run `make docs` from the root directory;
+4. check the output in `doc/_build/html/` to make sure the changes are correct;
+5. commit your changes and open a pull request.
+
+## Adding a new docstring to `base/`
+
+The steps required to add a new docstring are listed below:
+
+1. find a suitable definition in `base/` that the docstring will be most applicable to;
+2. add a docstring above the definition;
+3. find a suitable `@docs` code block in one of the `doc/src/stdlib/` files where you would like the docstring to appear;
+4. add the name of the definition to the `@docs` code block. For example, with a docstring added to a function `bar`
+
+    ```julia
+    "..."
+    function bar(args...)
+        # ...
+    end
+    ```
+
+   you would add the name `bar` to a `@docs` block in `doc/src/stdlib/`
+
+        ```@docs
+        foo
+        bar # <-- Added this one.
+        baz
+        ```
+
+5. run `make docs` from the root directory;
+6. check the output in `doc/_build/html` to make sure the changes are correct;
+7. commit your changes and open a pull request.
+
+## Doctests
+
+Examples written within docstrings can be used as testcases known as "doctests" by annotating code blocks with `jldoctest`.
+
+    ```jldoctest
+    julia> uppercase("Docstring test")
+    "DOCSTRING TEST"
+    ```
+
+A doctest needs to match an interactive REPL including the `julia>` prompt. It is recommended to add the header `# Examples` above the doctests.
+
+See the documentation of [writing jldoctests](@ref writing-jldoctests) for best
+practices on how to write doctests for common scenarios and the `doc/README.md`
+file for how to run the doctests.
diff --git a/doc/src/devdocs/contributing/formatting.md b/doc/src/devdocs/contributing/formatting.md
new file mode 100644
index 0000000000000..e3fc8ec129908
--- /dev/null
+++ b/doc/src/devdocs/contributing/formatting.md
@@ -0,0 +1,22 @@
+# Code Formatting Guidelines
+
+## General Formatting Guidelines for Julia code contributions
+
+ - Follow the latest dev version of [Julia Style Guide](https://docs.julialang.org/en/v1/manual/style-guide/).
+ - Use whitespace to make the code more readable
+ - No whitespace at the end of a line (trailing whitespace)
+ - Comments are good, especially when they explain the algorithm
+ - Try to adhere to a 92 character line length limit
+ - It is generally preferred to use ASCII operators and identifiers over
+   Unicode equivalents whenever possible
+ - In docstrings refer to the language as "Julia" and the executable as "`julia`"
+
+## General Formatting Guidelines For C code contributions
+
+ - 4 spaces per indentation level, no tabs
+ - Space between `if` and `(` (`if (x) ...`)
+ - Newline before opening `{` in function definitions
+ - `f(void)` for 0-argument function declarations
+ - Newline between `}` and `else` instead of `} else {`
+ - If one part of an `if..else` chain uses `{ }` then all should
+ - No whitespace at the end of a line
diff --git a/doc/src/devdocs/contributing/git-workflow.md b/doc/src/devdocs/contributing/git-workflow.md
new file mode 100644
index 0000000000000..2142eed69632a
--- /dev/null
+++ b/doc/src/devdocs/contributing/git-workflow.md
@@ -0,0 +1,19 @@
+# Git workflow recommendations
+
+## Git Recommendations For Pull Requests
+
+ - Avoid working from the `master` branch of your fork. Create a new branch as it will make it easier to update your pull request if Julia's `master` changes.
+ - Try to [squash](https://gitready.com/advanced/2009/02/10/squashing-commits-with-rebase.html) together small commits that make repeated changes to the same section of code, so your pull request is easier to review. A reasonable number of separate well-factored commits is fine, especially for larger changes.
+ - If any conflicts arise due to changes in Julia's `master`, prefer updating your pull request branch with `git rebase` versus `git merge` or `git pull`, since the latter will introduce merge commits that clutter the git history with noise that makes your changes more difficult to review.
+ - Descriptive commit messages are good.
+ - Using `git add -p` or `git add -i` can be useful to avoid accidentally committing unrelated changes.
+ - When linking to specific lines of code in discussion of an issue or pull request, hit the `y` key while viewing code on GitHub to reload the page with a URL that includes the specific version that you're viewing. That way any lines of code that you refer to will still make sense in the future, even if the content of the file changes.
+ - Whitespace can be automatically removed from existing commits with `git rebase`.
+   - To remove whitespace for the previous commit, run
+     `git rebase --whitespace=fix HEAD~1`.
+   - To remove whitespace relative to the `master` branch, run
+     `git rebase --whitespace=fix master`.
+
+### Git Recommendations For Pull Request Reviewers
+
+- When merging, we generally like `squash+merge`. Unless it is the rare case of a PR with carefully staged individual commits that you want in the history separately, in which case `merge` is acceptable, but usually prefer `squash+merge`.
diff --git a/doc/src/devdocs/contributing/jldoctests.md b/doc/src/devdocs/contributing/jldoctests.md
new file mode 100644
index 0000000000000..6631390cf9ca3
--- /dev/null
+++ b/doc/src/devdocs/contributing/jldoctests.md
@@ -0,0 +1,114 @@
+# [Writing jldoctests](@id writing-jldoctests)
+
+This page describes how to write and maintain `jldoctest` blocks in the documentation. Following these guidelines helps keep doctests reliable and easy to read.
+
+## Filters
+
+Use `filter =` whenever output contains text that might vary across runs.
+The following are common situations where this may happen:
+
+- The output contains arrays with undefined memory (e.g. from `undef` or `similar`)
+- The output contains random numbers
+- The output contains timing information
+- The output contains file system paths
+
+
+### Common filter sequences
+
+The documentation relies on several recurring patterns:
+- `r"int.jl:\\d+"` — remove line numbers from introspection macros.
+- `r"Stacktrace:(\\n \\[0-9]+\\].*)*"` — hide stack traces when illustrating
+  errors.
+- `r"Closest candidates.*\\n  .*"` — skip the method suggestions printed by
+  `MethodError`.
+- `r"@ .*"` — strip file locations from the output of `methods` or
+  `@which`.
+- `r"\\@world\\(MyStruct, \\d+:\\d+\\)"` — filter world age numbers.
+- `r"with \\d+ methods"` — ignore method counts when redefining functions.
+- `r"[0-9\\.]+ seconds \\(.*?\\)"` — remove timing output with memory
+  information.
+- `r"[0-9\\.]+ seconds"` — remove simple timing results.
+- `r"[0-9\\.]+"` — filter digits from names such as anonymous functions.
+- `r"([A-B] [0-5])"` and `r"[A-B] [X-Z] [0-5]"` — account for non-deterministic
+  process output.
+- `r"(world\\nhello|hello\\nworld)"` — allow either ordering of interleaved
+  output.
+
+If none of these match your situation, craft a regular expression that
+removes the varying text. Using filters keeps doctests stable across
+platforms and Julia versions.
+
+!!! note "Double escaping in docstrings"
+    When writing regex filters inside docstrings, remember to double escape
+    backslashes. For example, use `r"[\\d\\.]+"` instead of `r"[\d\.]+"`.
+    This is necessary because the docstring itself processes escape sequences
+    before the regex is created.
+
+## Setup code
+
+Small setup expressions may be placed inline using the `setup =` option:
+
+````
+```jldoctest; setup = :(using InteractiveUtils)
+...
+```
+````
+
+For longer setup code or if multiple blocks require the same environment, use the
+`DocTestSetup` meta block:
+
+````
+```@meta
+DocTestSetup = :(import Random; Random.seed!(1234))
+```
+````
+
+and disable it afterwards with
+
+````
+```@meta
+DocTestSetup = nothing
+```
+````
+
+### Teardown code
+
+If you need teardown code (e.g. to delete created temporary files or to reset
+the current directory), you can use the `teardown =` option:
+
+````
+```jldoctest; setup = :(oldpath = pwd(); cd(mktempdir())), teardown = :(cd(oldpath))
+...
+```
+````
+
+## Maintaining state between snippets
+
+Related doctest blocks can share state by giving them the same label after the
+`jldoctest` keyword. The manual uses this pattern to demonstrate mutation:
+
+````
+```jldoctest mutation_vs_rebind
+julia> a = [1,2,3]
+...
+```
+````
+
+and later
+
+````
+```jldoctest mutation_vs_rebind
+julia> a[1] = 42
+...
+```
+````
+
+Blocks with the same name execute sequentially during doctesting, so variables
+created in the first block remain available in the following ones.
+
+When a snippet needs to preserve its result for later examples, give it a label
+and reuse that label. This avoids repeating setup code and mirrors a REPL
+session more closely.
+
+## Further reading
+For a complete reference of doctest syntax, see the [corresponding Documenter.jl docs](https://documenter.juliadocs.org/stable/man/doctests/).
diff --git a/doc/src/devdocs/contributing/patch-releases.md b/doc/src/devdocs/contributing/patch-releases.md
new file mode 100644
index 0000000000000..515c2cfd35225
--- /dev/null
+++ b/doc/src/devdocs/contributing/patch-releases.md
@@ -0,0 +1,44 @@
+# Contributing to patch releases
+
+The process of [creating a patch release](https://docs.julialang.org/en/v1/devdocs/build/distributing/#Point-releasing-101) is roughly as follows:
+
+1. Create a new branch (e.g. `backports-release-1.10`) against the relevant minor release
+   branch (e.g. `release-1.10`). Usually a corresponding pull request is created as well.
+
+2. Add commits, nominally from `master` (hence "backports"), to that branch.
+   See below for more information on this process.
+
+3. Run the [BaseBenchmarks.jl](https://github.com/JuliaCI/BaseBenchmarks.jl) benchmark
+   suite and [PkgEval.jl](https://github.com/JuliaCI/PkgEval.jl) package ecosystem
+   exerciser against that branch. Nominally BaseBenchmarks.jl and PkgEval.jl are
+   invoked via [Nanosoldier.jl](https://github.com/JuliaCI/Nanosoldier.jl) from
+   the pull request associated with the backports branch. Fix any issues.
+
+4. Once all test and benchmark reports look good, merge the backports branch into
+   the corresponding release branch (e.g. merge `backports-release-1.10` into
+   `release-1.10`).
+
+5. Open a pull request that bumps the version of the relevant minor release to the
+   next patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37718).
+
+6. Ping `@JuliaLang/releases` to tag the patch release and update the website.
+
+7. Open a pull request that bumps the version of the relevant minor release to the
+   next prerelease patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37724).
+
+Step 2 above, i.e. backporting commits to the `backports-release-X.Y` branch, has largely
+been automated via [`Backporter`](https://github.com/KristofferC/Backporter): Backporter
+searches for merged pull requests with the relevant `backport-X.Y` tag, and attempts to
+cherry-pick the commits from those pull requests onto the `backports-release-X.Y` branch.
+Some commits apply successfully without intervention, others not so much. The latter
+commits require "manual" backporting, with which help is generally much appreciated.
+Backporter generates a report identifying those commits it managed to backport automatically
+and those that require manual backporting; this report is usually copied into the first
+post of the pull request associated with `backports-release-X.Y` and maintained as
+additional commits are automatically and/or manually backported.
+
+When contributing a manual backport, if you have the necessary permissions, please push the
+backport directly to the `backports-release-X.Y` branch. If you lack the relevant
+permissions, please open a pull request against the `backports-release-X.Y` branch with the
+manual backport. Once the manual backport is live on the `backports-release-X.Y` branch,
+please remove the `backport-X.Y` tag from the originating pull request for the commits.
diff --git a/doc/src/devdocs/contributing/tests.md b/doc/src/devdocs/contributing/tests.md
new file mode 100644
index 0000000000000..c1f25da5f4a7c
--- /dev/null
+++ b/doc/src/devdocs/contributing/tests.md
@@ -0,0 +1,19 @@
+# Writing tests
+
+There are never enough tests. Track [code coverage at Codecov](https://codecov.io/github/JuliaLang/julia), and help improve it.
+
+1. Go visit https://codecov.io/github/JuliaLang/julia.
+
+2. Browse through the source files and find some untested functionality (highlighted in red) that you think you might be able to write a test for.
+
+3. Write a test that exercises this functionality---you can add your test to one of the existing files, or start a new one, whichever seems most appropriate to you. If you're adding a new test file, make sure you include it in the list of tests in `test/choosetests.jl`. https://docs.julialang.org/en/v1/stdlib/Test/ may be helpful in explaining how the testing infrastructure works.
+
+4. Run `make test-all` to rebuild Julia and run your new test(s). If you had to fix a bug or add functionality in `base`, this will ensure that your test passes and that you have not introduced extraneous whitespace.
+
+5. Submit the test as a pull request (PR).
+
+* Code for the buildbot configuration is maintained at: https://github.com/staticfloat/julia-buildbot
+* You can see the current buildbot setup at: https://build.julialang.org/builders
+* [Issue 9493](https://github.com/JuliaLang/julia/issues/9493) and [issue 11885](https://github.com/JuliaLang/julia/issues/11885) have more detailed discussion on code coverage.
+
+Code coverage shows functionality that still needs "proof of concept" tests. These are important, as are tests for tricky edge cases, such as converting between integer types when the number to convert is near the maximum of the range of one of the integer types. Even if a function already has some coverage on Codecov, it may still benefit from tests for edge cases.
diff --git a/doc/src/devdocs/debuggingtips.md b/doc/src/devdocs/debuggingtips.md
index 7639e8be2ef96..f9ece853bf8e1 100644
--- a/doc/src/devdocs/debuggingtips.md
+++ b/doc/src/devdocs/debuggingtips.md
@@ -41,11 +41,16 @@ useful.
 
 ## Useful Julia functions for Inspecting those variables
 
-  * `jl_gdblookup($rip)` :: For looking up the current function and line. (use `$eip` on i686 platforms)
+  * `jl_print_task_backtraces(0)` :: Similar to gdb's `thread apply all bt` or lldb's `thread backtrace
+    all`. Runs all threads while printing backtraces for all existing tasks.
+  * `jl_gdblookup($pc)` :: For looking up the current function and line.
+  * `jl_gdblookupinfo($pc)` :: For looking up the current method instance object.
+  * `jl_gdbdumpcode(mi)` :: For dumping all of `code_typed/code_llvm/code_asm` when the REPL is not working right.
   * `jlbacktrace()` :: For dumping the current Julia backtrace stack to stderr. Only usable after
     `record_backtrace()` has been called.
   * `jl_dump_llvm_value(Value*)` :: For invoking `Value->dump()` in gdb, where it doesn't work natively.
     For example, `f->linfo->functionObject`, `f->linfo->specFunctionObject`, and `to_function(f->linfo)`.
+  * `jl_dump_llvm_module(Module*)` :: For invoking `Module->dump()` in gdb, where it doesn't work natively.
   * `Type->dump()` :: only works in lldb. Note: add something like `;1` to prevent lldb from printing
     its prompt over the output
   * `jl_eval_string("expr")` :: for invoking side-effects to modify the current state or to lookup
@@ -123,7 +128,7 @@ The corresponding LLDB command is (after the process is started):
 (lldb) pro hand -p true -s false -n false SIGSEGV
 ```
 
-If you are debugging a segfault with threaded code, you can set a breakpoint on `jl_critical_error`
+If you are debugging a segfault with threaded code, you can set a breakpoint on `jl_fprint_critical_error`
 (`sigdie_handler` should also work on Linux and BSD) in order to only catch the actual segfault
 rather than the GC synchronization points.
 
@@ -172,7 +177,7 @@ $2 = void
 
 The most recent `jl_apply` is at frame #3, so we can go back there and look at the AST for the
 function `julia_convert_16886`. This is the uniqued name for some method of `convert`. `f` in
-this frame is a `jl_function_t*`, so we can look at the type signature, if any, from the `specTypes`
+this frame is a `jl_value_t*`, so we can look at the type signature, if any, from the `specTypes`
 field:
 
 ```
@@ -195,7 +200,7 @@ Expr(:return, Expr(:call, :box, :Float32, Expr(:call, :fptrunc, :Float32, :x)::A
 ```
 
 Finally, and perhaps most usefully, we can force the function to be recompiled in order to step
-through the codegen process. To do this, clear the cached `functionObject` from the `jl_lamdbda_info_t*`:
+through the codegen process. To do this, clear the cached `functionObject` from the `jl_lambda_info_t*`:
 
 ```
 (gdb) p f->linfo->functionObject
@@ -236,7 +241,7 @@ process)
 
 Julia now works out of the box with [rr](https://rr-project.org/), the lightweight recording and
 deterministic debugging framework from Mozilla. This allows you to replay the trace of an execution
-deterministically.  The replayed execution's address spaces, register contents, syscall data etc
+deterministically. The replayed execution's address spaces, register contents, syscall data etc
 are exactly the same in every run.
 
 A recent version of rr (3.1.0 or higher) is required.
diff --git a/doc/src/devdocs/diagnostics.md b/doc/src/devdocs/diagnostics.md
new file mode 100644
index 0000000000000..386f65db2e665
--- /dev/null
+++ b/doc/src/devdocs/diagnostics.md
@@ -0,0 +1,37 @@
+# Diagnostics used by the package ecosystem
+
+This page documents "hooks" embedded in Julia that are primarily used by
+external tools. Many of these tools are designed to perform analyses that are
+too complicated to be made part of Julia proper.
+
+## SnoopCompile
+
+SnoopCompile "snoops" on Julia's compiler to extract information for analysis
+about invalidations and type-inference. There are a few internals it uses for
+different purposes:
+
+- recording invalidations: `Base.StaticData.debug_method_invalidation` and
+  `ccall(:jl_debug_method_invalidation, ...)`: these record different modes of
+  invalidation. Users of SnoopCompile will transiently turn these on when, e.g.,
+  loading packages. Each produces a standard log format; messing with the log
+  format might require a complementary pull request to SnoopCompile.
+  SnoopCompile will process these logs and generate trees of invalidated
+  CodeInstances that are attributable to specific changes in the method tables
+  or bindings.
+- observing inference: `ccall(:jl_set_newly_inferred, ...)` and
+  `ccall(:jl_set_inference_entrance_backtraces, ...)`: these are used to
+  understand how inference gets triggered. The main purpose is to allow
+  performance diagnostics to understand sources of TTFX. The second of these
+  `ccall`s records a backtrace on every entrance to type-inference, so that
+  SnoopCompile can determine the caller of a dynamically-dispatched call. This
+  is needed to attribute "cause" for new type inference.
+
+  The `jl_set_inference_entrance_backtraces` function accepts an array where
+  inference entrance events will be recorded. Each inference event stores two
+  consecutive array elements: first the `CodeInstance` object, then the
+  backtrace representation. So for N inference events, the array will contain 2N
+  elements arranged as: `[ci₁, bt₁, ci₂, bt₂, ..., ciₙ, btₙ]`.
+
+  Note that the backtrace elements `btᵢ` contain raw backtrace data that
+  typically needs to be processed using `stacktrace(Base._reformat_bt(btᵢ...))`.
+  to convert them into a usable stack trace format for analysis.
diff --git a/doc/src/devdocs/eval.md b/doc/src/devdocs/eval.md
index 8f2fd68159676..889831ba6470d 100644
--- a/doc/src/devdocs/eval.md
+++ b/doc/src/devdocs/eval.md
@@ -15,7 +15,7 @@ function, and primitive function, before turning into the desired result (hopefu
         short.
       * AST
 
-        Abstract Syntax Tree The AST is the digital representation of the code structure. In this form
+        Abstract Syntax Tree. The AST is the digital representation of the code structure. In this form
         the code has been tokenized for meaning so that it is more suitable for manipulation and execution.
 
 
@@ -62,25 +62,11 @@ The 10,000 foot view of the whole process is as follows:
 
 ## [Parsing](@id dev-parsing)
 
-The Julia parser is a small lisp program written in femtolisp, the source-code for which is distributed
-inside Julia in [src/flisp](https://github.com/JuliaLang/julia/tree/master/src/flisp).
-
-The interface functions for this are primarily defined in [`jlfrontend.scm`](https://github.com/JuliaLang/julia/blob/master/src/jlfrontend.scm).
-The code in [`ast.c`](https://github.com/JuliaLang/julia/blob/master/src/ast.c) handles this handoff
-on the Julia side.
-
-The other relevant files at this stage are [`julia-parser.scm`](https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm),
-which handles tokenizing Julia code and turning it into an AST, and [`julia-syntax.scm`](https://github.com/JuliaLang/julia/blob/master/src/julia-syntax.scm),
-which handles transforming complex AST representations into simpler, "lowered" AST representations
-which are more suitable for analysis and execution.
-
-If you want to test the parser without re-building Julia in its entirety, you can run the frontend
-on its own as follows:
-
-    $ cd src
-    $ flisp/flisp
-    > (load "jlfrontend.scm")
-    > (jl-parse-file "<filename>")
+By default, Julia uses [JuliaSyntax.jl](https://github.com/JuliaLang/JuliaSyntax.jl) to produce the
+AST. Historically, it used a small lisp program written in femtolisp, the source-code for which is
+distributed inside Julia in [src/flisp](https://github.com/JuliaLang/julia/tree/master/src/flisp).
+If the `JULIA_USE_FLISP_PARSER` environment variable is set to `1`, the old parser will be used
+instead.
 
 ## [Macro Expansion](@id dev-macro-expansion)
 
@@ -89,7 +75,7 @@ the expression. Macro expansion involves a handoff from [`eval()`](@ref) (in Jul
 function `jl_macroexpand()` (written in `flisp`) to the Julia macro itself (written in - what
 else - Julia) via `fl_invoke_julia_macro()`, and back.
 
-Typically, macro expansion is invoked as a first step during a call to [`Meta.lower()`](@ref)/`jl_expand()`,
+Typically, macro expansion is invoked as a first step during a call to [`Meta.lower()`](@ref)/`Core._lower()`,
 although it can also be invoked directly by a call to [`macroexpand()`](@ref)/`jl_macroexpand()`.
 
 ## [Type Inference](@id dev-type-inference)
diff --git a/doc/src/devdocs/external_profilers.md b/doc/src/devdocs/external_profilers.md
index 956d66508fc89..5f54c2619559b 100644
--- a/doc/src/devdocs/external_profilers.md
+++ b/doc/src/devdocs/external_profilers.md
@@ -8,12 +8,24 @@ The currently supported profilers are:
 
 ### Adding New Zones
 
+#### From C/C++ code
+
 To add new zones, use the `JL_TIMING` macro. You can find numerous examples throughout the codebase by searching for `JL_TIMING`. To add a new type of zone
 you add it to `JL_TIMING_OWNERS` (and possibly `JL_TIMING_EVENTS`).
 
+#### From Julia code
+
+The `Compiler.@zone` macro can be used to add a zone from Julia code, it is used as:
+
+```julia
+Compiler.@zone "ZONE NAME" begin
+    ...
+end
+```
+
 ### Dynamically Enabling and Disabling Zones
 
-The `JULIA_TIMING_SUBSYSTEMS` environment variable allows you to enable or disable zones for a specific Julia run. For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable the `INFERENCE`
+The [`JULIA_TIMING_SUBSYSTEMS`](@ref JULIA_TIMING_SUBSYSTEMS) environment variable allows you to enable or disable zones for a specific Julia run. For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable the `INFERENCE`
 zones.
 
 ## Tracy Profiler
@@ -39,7 +51,13 @@ run(TracyProfiler_jll.tracy())
 !!! note
     On macOS, you may want to set the `TRACY_DPI_SCALE` environment variable to `1.0` if the UI elements in the profiler appear excessively large.
 
-To run a "headless" instance that saves the trace to disk, use `TracyProfiler_jll.capture() -o mytracefile.tracy` instead.
+To run a "headless" instance that saves the trace to disk, use
+
+```julia
+run(`$(TracyProfiler_jll.capture()) -o mytracefile.tracy`)
+```
+
+instead.
 
 For information on using the Tracy UI, refer to the Tracy manual.
 
diff --git a/doc/src/devdocs/functions.md b/doc/src/devdocs/functions.md
index 283f63b2d0dce..fb1123f9b19ae 100644
--- a/doc/src/devdocs/functions.md
+++ b/doc/src/devdocs/functions.md
@@ -1,28 +1,25 @@
 # Julia Functions
 
+
 This document will explain how functions, method definitions, and method tables work.
 
 ## Method Tables
 
 Every function in Julia is a generic function. A generic function is conceptually a single function,
-but consists of many definitions, or methods. The methods of a generic function are stored in
-a method table. Method tables (type `MethodTable`) are associated with `TypeName`s. A `TypeName`
-describes a family of parameterized types. For example `Complex{Float32}` and `Complex{Float64}`
-share the same `Complex` type name object.
-
-All objects in Julia are potentially callable, because every object has a type, which in turn
-has a `TypeName`.
+but consists of many definitions, or methods. The methods of a generic function are stored in a
+method table. There is one global method table (type `MethodTable`) named `Core.methodtable`. Any
+default operation on methods (such as calls) uses that table.
 
 ## [Function calls](@id Function-calls)
 
-Given the call `f(x,y)`, the following steps are performed: first, the method table to use is
-accessed as `typeof(f).name.mt`. Second, an argument tuple type is formed, `Tuple{typeof(f), typeof(x), typeof(y)}`.
-Note that the type of the function itself is the first element. This is because the type might
-have parameters, and so needs to take part in dispatch. This tuple type is looked up in the method
-table.
+Given the call `f(x, y)`, the following steps are performed: First, a tuple type is formed,
+`Tuple{typeof(f), typeof(x), typeof(y)}`. Note that the type of the function itself is the first
+element. This is because the function itself participates symmetrically in method lookup with the
+other arguments. This tuple type is looked up in the global method table. However, the system can
+then cache the results, so these steps can be skipped later for similar lookups.
 
 This dispatch process is performed by `jl_apply_generic`, which takes two arguments: a pointer
-to an array of the values f, x, and y, and the number of values (in this case 3).
+to an array of the values `f`, `x`, and `y`, and the number of values (in this case 3).
 
 Throughout the system, there are two kinds of APIs that handle functions and argument lists: those
 that accept the function and arguments separately, and those that accept a single argument structure.
@@ -41,22 +38,13 @@ This entry point for the same functionality accepts the function separately, so
 does not contain the function:
 
 ```c
-jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t nargs);
+jl_value_t *jl_call(jl_value_t *f, jl_value_t **args, int32_t nargs);
 ```
 
 ## Adding methods
 
 Given the above dispatch process, conceptually all that is needed to add a new method is (1) a
 tuple type, and (2) code for the body of the method. `jl_method_def` implements this operation.
-`jl_method_table_for` is called to extract the relevant method table from what would be
-the type of the first argument. This is much more complicated than the corresponding procedure
-during dispatch, since the argument tuple type might be abstract. For example, we can define:
-
-```julia
-(::Union{Foo{Int},Foo{Int8}})(x) = 0
-```
-
-which works since all possible matching methods would belong to the same method table.
 
 ## Creating generic functions
 
@@ -93,9 +81,7 @@ end
 
 ## Constructors
 
-A constructor call is just a call to a type. The method table for `Type` contains all
-constructor definitions. All subtypes of `Type` (`Type`, `UnionAll`, `Union`, and `DataType`)
-currently share a method table via special arrangement.
+A constructor call is just a call to a type, to a method defined on `Type{T}`.
 
 ## Builtins
 
@@ -117,7 +103,7 @@ function lines(words)
             n += length(w)+1
         end
     end
-    String(take!(io))
+    takestring!(io)
 end
 import Markdown
 [string(n) for n in names(Core;all=true)
@@ -127,18 +113,14 @@ import Markdown
     Markdown.parse
 ```
 
-These are all singleton objects whose types are subtypes of `Builtin`, which is a subtype of
-`Function`. Their purpose is to expose entry points in the run time that use the "jlcall" calling
-convention:
+These are mostly singleton objects all of whose types are subtypes of `Builtin`, which is a
+subtype of `Function`. Their purpose is to expose entry points in the run time that use the
+"jlcall" calling convention:
 
 ```c
 jl_value_t *(jl_value_t*, jl_value_t**, uint32_t)
 ```
 
-The method tables of builtins are empty. Instead, they have a single catch-all method cache entry
-(`Tuple{Vararg{Any}}`) whose jlcall fptr points to the correct function. This is kind of a hack
-but works reasonably well.
-
 ## Keyword arguments
 
 Keyword arguments work by adding methods to the kwcall function. This function
@@ -187,7 +169,7 @@ is absent.
 Finally there is the kwsorter definition:
 
 ```
-function (::Core.kwftype(typeof(circle)))(kws, circle, center, radius)
+function (::Core.kwcall)(kws, circle, center, radius)
     if haskey(kws, :color)
         color = kws.color
     else
@@ -205,30 +187,6 @@ function (::Core.kwftype(typeof(circle)))(kws, circle, center, radius)
 end
 ```
 
-The function `Core.kwftype(t)` creates the field `t.name.mt.kwsorter` (if it hasn't been created
-yet), and returns the type of that function.
-
-This design has the feature that call sites that don't use keyword arguments require no special
-handling; everything works as if they were not part of the language at all. Call sites that do
-use keyword arguments are dispatched directly to the called function's kwsorter. For example the
-call:
-
-```julia
-circle((0,0), 1.0, color = red; other...)
-```
-
-is lowered to:
-
-```julia
-kwcall(merge((color = red,), other), circle, (0,0), 1.0)
-```
-
- `kwcall` (also in`Core`) denotes a kwcall signature and dispatch.
-The keyword splatting operation (written as `other...`) calls the named tuple `merge` function.
-This function further unpacks each *element* of `other`, expecting each one to contain two values
-(a symbol and a value).
-Naturally, a more efficient implementation is available if all splatted arguments are named tuples.
-Notice that the original `circle` function is passed through, to handle closures.
 
 ## [Compiler efficiency issues](@id compiler-efficiency-issues)
 
@@ -251,23 +209,18 @@ sees an argument in the `Function` type hierarchy passed to a slot declared as `
 it behaves as if the `@nospecialize` annotation were applied. This heuristic seems to be extremely
 effective in practice.
 
-The next issue concerns the structure of method cache hash tables. Empirical studies show that
-the vast majority of dynamically-dispatched calls involve one or two arguments. In turn, many
-of these cases can be resolved by considering only the first argument. (Aside: proponents of single
-dispatch would not be surprised by this at all. However, this argument means "multiple dispatch
-is easy to optimize in practice", and that we should therefore use it, *not* "we should use single
-dispatch"!) So the method cache uses the type of the first argument as its primary key. Note,
-however, that this corresponds to the *second* element of the tuple type for a function call (the
-first element being the type of the function itself). Typically, type variation in head position
-is extremely low -- indeed, the majority of functions belong to singleton types with no parameters.
-However, this is not the case for constructors, where a single method table holds constructors
-for every type. Therefore the `Type` method table is special-cased to use the *first* tuple type
-element instead of the second.
+The next issue concerns the structure of method tables. Empirical studies show that the vast
+majority of dynamically-dispatched calls involve one or two arguments. In turn, many of these cases
+can be resolved by considering only the first argument. (Aside: proponents of single dispatch would
+not be surprised by this at all. However, this argument means "multiple dispatch is easy to optimize
+in practice", and that we should therefore use it, *not* "we should use single dispatch"!). So the
+method table and cache splits up on the structure based on a left-to-right decision tree so allow
+efficient nearest-neighbor searches.
 
 The front end generates type declarations for all closures. Initially, this was implemented by
 generating normal type declarations. However, this produced an extremely large number of constructors,
 all of which were trivial (simply passing all arguments through to [`new`](@ref)). Since methods are partially
-ordered, inserting all of these methods is O(n^2), plus there are just too many of them to keep
+ordered, inserting all of these methods is O(n²), plus there are just too many of them to keep
 around. This was optimized by generating `struct_type` expressions directly (bypassing default
 constructor generation), and using `new` directly to create closure instances. Not the prettiest
 thing ever, but you do what you gotta do.
diff --git a/doc/src/devdocs/gc-mmtk.md b/doc/src/devdocs/gc-mmtk.md
new file mode 100644
index 0000000000000..01119f1b39ac4
--- /dev/null
+++ b/doc/src/devdocs/gc-mmtk.md
@@ -0,0 +1,40 @@
+# Julia + MMTk
+
+There has been quite a lot of effort to refactor the GC code inside Julia to support external GCs. The first step to enable using different GC algorithms for Julia was the design and implementation of a [GC interface](https://docs.google.com/document/d/1v0jtSrIpdEDNOxj5S9g1jPqSpuAkNWhr_T8ToFC9RLI/edit?usp=sharing). To drive that interface, we added support for building Julia with [MMTk](https://www.mmtk.io) (Memory Management Toolkit). Using Julia + MMTk enables testing different GC implementations, allowing developers to choose a specific implementation when building Julia from source. The connection between Julia and MMTk is done via a *binding*, which links the language runtime with MMTk core. The mmtk-julia binding is written in Rust and can be found in [this repository](https://github.com/mmtk/mmtk-julia).
+
+> [!NOTE]
+> Using a different GC requires building Julia from source. It is not possible to switch implementations at runtime. To see what version of the GC is currently being used, run `versioninfo()` from the Julia REPL and it should show the version under `GC: ...`.
+
+## Building Julia with MMTk
+
+There are 3 different ways of building Julia with MMTk: building from source using a fixed release of the binding, checking out a custom version in the mmtk-julia [repository](https://github.com/mmtk/mmtk-julia) or using a precompiled binary from Julia's BinaryBuilder. The easiest way is to use the BinaryBuilder binary. First, to enable MMTk as a third-party GC, set the variable `WITH_THIRD_PARTY_GC` to `mmtk`. Then, for example, to use the Immix as the GC, simply set the variable `MMTK_PLAN=Immix` and build Julia as usual.
+
+There are different configurations supported by the following variables, which can be set in a `Make.user` file or as an environment variable. Note that at this time, setting `MMTK_PLAN=StickyImmix` (to use a generational version of Immix) or `MMTK_MOVING=1` (to enable object movement) will likely cause segmentation faults or other build failures, since we have not added support for these configurations yet. Setting `MMTK_BUILD=debug` will force a debug build of the binding, which will print some logging information that can be used to find errors that are specific to MMTk.
+
+| Variable      |       |        |
+|---------------|--------------|---------------|
+| `MMTK_PLAN`     | Immix        | StickyImmix   |
+| `MMTK_MOVING`   | 0            | 1             |
+| `MMTK_BUILD`    | release      | debug         |
+
+Note that when setting only `MMTK_PLAN`, then the default is to do a non-moving, release build.
+
+### Building mmtk-julia from source
+
+It is also possible to build the binding from source. To do so, set the variable `USE_BINARYBUILDER_MMTK_JULIA=0` and the latest release version of the binding will be downloaded and built as part of building Julia. Note that this requires an installation of the rust toolchain.
+
+It is also possible to build a custom version of binding by checking it out from the [git repository](https://github.com/mmtk/mmtk-julia) and setting a variable named `MMTK_JULIA_DIR` as the path that contains the binding.
+
+For more information on building Julia with MMTk, please refer to the [README](https://github.com/mmtk/mmtk-julia/blob/master/README.md) file in the binding repo.
+
+### I've got a build error when building Julia with MMTk, what should I do?
+
+If you try to build Julia with MMTk and get an error it is likely due to a change to Julia that has not been yet propagated to the binding or to the code in Julia that is specific to MMTk. Some changes include:
+
+(1) **Changing the memory layout of objects in Julia**. The binding relies on automatically generated Rust FFI bindings from Julia code. These files are generated using a crate named [`rust-bindgen`](https://github.com/rust-lang/rust-bindgen). To regenerate those files, check out the latest version of the `mmtk-julia` binding, set the variable `JULIA_PATH` to the path of the Julia version you are trying to build and run `make regen-bindgen-ffi` from the directory containing the binding. This should delete the current version of the FFI bindings and generate a new version based on the Julia code from `JULIA_PATH`.
+
+(2) **Changing the root objects passed to the GC**. Julia passes a set of objects to the GC as roots in the function [gc_mark_roots](https://github.com/JuliaLang/julia/blob/fbe865657942da7d73cc02f76064f9ba9cdef56c/src/gc-stock.c#L2846). At the moment, this set needs to be consistent between both the Stock GC and MMTk (in the function [`jl_gc_scan_vm_specific_roots`](https://github.com/JuliaLang/julia/blob/fbe865657942da7d73cc02f76064f9ba9cdef56c/src/gc-mmtk.c#L496)).
+
+(3) **Changing how objects are scanned**. MMTk uses the same strategy to find references in Julia objects as the stock GC (see [gc_mark_outrefs](https://github.com/JuliaLang/julia/blob/fbe865657942da7d73cc02f76064f9ba9cdef56c/src/gc-stock.c#L2227C19-L2227C34)). Changing the logic from this function should be reflected in the Rust code in the binding that [scan Julia objects](https://github.com/mmtk/mmtk-julia/blob/c9e046baf3a0d52fe75d6c8b28f6afd69b045d95/mmtk/src/julia_scanning.rs#L68).
+
+If your case is not included in one of the alternatives above, please create an issue in the Julia repository tagging it with the `GC: MMTK` label.
diff --git a/doc/src/devdocs/gc-sa.md b/doc/src/devdocs/gc-sa.md
index ffbb7451fce5f..89a285fe4ad3f 100644
--- a/doc/src/devdocs/gc-sa.md
+++ b/doc/src/devdocs/gc-sa.md
@@ -57,7 +57,7 @@ code base to make things work.
 
 ## GC Invariants
 
-There is two simple invariants correctness:
+There are two simple invariants for correctness:
 - All `GC_PUSH` calls need to be followed by an appropriate `GC_POP` (in practice we enforce this
   at the function level)
 - If a value was previously not rooted at any safepoint, it may no longer be referenced
@@ -101,7 +101,7 @@ we place on a given function are indeed correct given the implementation of said
 ## The analyzer annotations
 
 These annotations are found in src/support/analyzer_annotations.h.
-The are only active when the analyzer is being used and expand either
+They are only active when the analyzer is being used and expand either
 to nothing (for prototype annotations) or to no-ops (for function like annotations).
 
 ### `JL_NOTSAFEPOINT`
diff --git a/doc/src/devdocs/gc.md b/doc/src/devdocs/gc.md
index c072912e77c3f..a641944df3ab1 100644
--- a/doc/src/devdocs/gc.md
+++ b/doc/src/devdocs/gc.md
@@ -1,72 +1,74 @@
-# Garbage Collection in Julia
+# Julia Garbage Collector (GC) Internals
 
 ## Introduction
 
-Julia has a serial, stop-the-world, generational, non-moving mark-sweep garbage collector.
-Native objects are precisely scanned and foreign ones are conservatively marked.
+Julia implements a garbage collector (GC) to automate dynamic memory management. Julia's GC is:
 
-## Memory layout of objects and GC bits
+- **Mark-sweep**: the object graph is traced starting from a root-set (e.g., global variables and local variables on the stack) to determine the set of live objects.
+- **Non-moving**: objects are not relocated to a different memory address.
+- **Parallel**: multiple threads can be used during the marking and sweeping phases.
+- **Partially concurrent**: the runtime provides an option to scavenge pool-allocated memory blocks (e.g., call `madvise` on these blocks on Linux) concurrently with Julia user code.
+- **Generational**: objects are partitioned into generations according to how many collection cycles they've survived. Younger generations are collected more often.
+- **Mostly precise**: Julia optionally supports conservative stack scanning for users who inter-operate with foreign languages like C.
 
-An opaque tag is stored in the front of GC managed objects, and its lowest two bits are
-used for garbage collection.  The lowest bit is set for marked objects and the second
-lowest bit stores age information (e.g. it's only set for old objects).
+## Allocation
 
-Objects are aligned by a multiple of 4 bytes to ensure this pointer tagging is legal.
+Julia uses two types of allocators, depending on the size of the allocation request.
 
-## Pool allocation
+### Small Object Allocation
 
-Sufficiently small objects (up to 2032 bytes) are allocated on per-thread object
-pools.
+Sufficiently small objects, up to 2k bytes, are allocated through a per-thread free-list pool allocator.
 
-A three-level tree (analogous to a three-level page-table) is used to keep metadata
-(e.g. whether a page has been allocated, whether contains marked objects, number of free objects etc.)
-about address ranges spanning at least one page.
-Sweeping a pool allocated object consists of inserting it back into the free list
-maintained by its pool.
+Julia's pool allocator often has better runtime performance than `libc` `malloc` for small allocations. Additionally, using a custom pool allocator enables a few optimizations during the sweeping phase (e.g., concurrent scavenging).
 
-## Malloc'd arrays and big objects
+The pool allocator segregates objects on different size classes. Each large memory block (16k bytes) managed by the pool allocator only contains objects belonging to the same size class.
 
-Two lists are used to keep track of the remaining allocated objects:
-one for sufficiently large malloc'd arrays (`mallocarray_t`) and one for
-sufficiently large objects (`bigval_t`).
+Each pool-allocated memory block is paired with a metadata structure containing information such as whether the block has live objects at all, the number of free memory slots in the block, the offsets to the first and last objects in the block, etc. This metadata is used to aggregate statistics such as number of objects freed during a collection cycle. It's also used to optimize the sweeping phase of the GC: blocks that have no live objects whatsoever don't need to be linearly scanned during the sweeping phase.
 
-Sweeping these objects consists of unlinking them from their list and calling `free` on the
-corresponding address.
+Julia's pool allocator stores memory blocks into different global lock-free lists depending on whether the block has been mapped but never accessed (`page_pool_clean`),  whether the page has been lazily swept and it's waiting to be scavenged by a background GC thread (`page_pool_lazily_freed`), or whether the page has been scavenged (`page_pool_freed`).
 
-## Generational and remembered sets
+ The pool allocator uses this partitioning of blocks to implement a tiered allocation discipline. When it requests a fresh memory block, it will:
 
-Field writes into old objects trigger a write barrier if the written field
-points to a young object and if a write barrier has not been triggered on the old object yet.
-In this case, the old object being written to is enqueued into a remembered set, and
-its mark bit is set to indicate that a write barrier has already been triggered on it.
+- Try to claim a block from `page_pool_lazily_freed`, which contains blocks that were empty during the last stop-the-world phase, but haven't been madvised by a concurrent scavenger GC thread yet.
 
-There is no explicit flag to determine whether a marking pass will scan the
-entire heap or only through young objects and remembered set.
-The mark bits of the objects themselves are used to determine whether a full mark happens.
-The mark-sweep algorithm follows this sequence of steps:
+- If it failed to claim a block from `page_pool_lazily_freed`, it will try to claim a block from `page_pool_clean`, which contains blocks mapped on a previous block allocation request but never accessed.
 
-- Objects in the remembered set have their GC mark bits reset
-(these are set once write barrier is triggered, as described above) and are enqueued.
+- If it failed to claim a block from `page_pool_clean` and from `page_pool_lazily_freed`, it will try to claim a block from `page_pool_freed`, which contains blocks already scavenged by a concurrent scavenger GC thread and whose underlying virtual address can be recycled.
 
-- Roots (e.g. thread locals) are enqueued.
+- If it failed in all of the attempts mentioned above, it will map a batch of operating system pages, partition them into memory blocks, claim one block for itself, and insert the remaining blocks into `page_pool_clean`.
 
-- Object graph is traversed and mark bits are set.
+![Diagram of tiered pool allocation](./img/gc-tiered-allocation.jpg)
 
-- Object pools, malloc'd arrays and big objects are sweeped. On a full sweep,
-the mark bits of all marked objects are reset. On a generational sweep,
-only the mark bits of marked young objects are reset.
+### Large Object Allocation
 
-- Mark bits of objects in the remembered set are set,
-so we don't trigger the write barrier on them again.
+Sufficiently large objects, above the 2k byte threshold mentioned in the previous section, are allocated through `libc` `malloc`. Large allocations are typically less performance-critical than small allocations, as they occur less frequently.
 
-After these stages, old objects will be left with their mark bits set,
-so that references from them are not explored in a subsequent generational collection.
-This scheme eliminates the need of explicitly keeping a flag to indicate a full mark
-(though a flag to indicate a full sweep is necessary).
+Although Julia currently uses `libc` `malloc`, it also supports pre-loading other dynamic memory allocators (e.g., `jemalloc`).
 
-## Heuristics
+## Marking and Generational Collection
 
-GC heuristics tune the GC by changing the size of the allocation interval between garbage collections.
-If a GC was unproductive, then we increase the size of the allocation interval to allow objects more time to die.
-If a GC returns a lot of space we can shrink the interval. The goal is to find a steady state where we are
-allocating just about the same amount as we are collecting.
+Julia’s mark phase is implemented through a parallel depth-first-search that traverses the object graph to determine which objects are alive.
+
+Julia stores age information for its generational GC in the object header: the lowest two bits of an object’s header store a mark bit, set when an object is marked, and an age bit, set when the object is promoted. Because Julia’s GC is non-moving, object age information can’t be only determined through the object's memory address, such as in GC implementations that allocate young objects in certain memory regions and relocate them to other memory regions during object promotion.
+
+Generational collection is implemented through sticky bits: objects are only pushed to the mark-stack, and therefore traced, if their mark-bits have not been set. When objects reach the oldest generation, their mark-bits aren't reset during a quick sweep, so these objects aren't traced during a subsequent mark phase. A full sweep, however, resets the mark-bits of all objects, so all of them are traced in a subsequent collection.
+
+When the mutator is running, a write barrier intercepts field writes and pushes an object’s address into a per-thread remembered set if the reference crosses generations. Objects in this remembered set are then traced during the next mark phase.
+
+## Sweeping
+
+If a memory block managed by the pool allocator contains at least one live object, the sweeping phase creates a free-list from its dead objects; if it doesn't, then the block is scavenged and its underlying physical memory might be returned to the operating system through, for instance, `madvise` on Linux.
+
+The linear scan of memory blocks that have at least one live object can be run with multiple threads. If concurrent page sweeping is enabled through the flag `--gcthreads=X,1` the GC scavenges memory blocks concurrently with the mutator.
+
+During the stop-the-world phase of the collector, memory blocks containing no live objects are initially pushed into the `page_pool_lazily_freed`. The background scavenger thread is then woken up and removes blocks from `page_pool_lazily_freed`, scavenges them (e.g., `madvise` on Linux), and inserts them into `page_pool_freed`. `page_pool_lazily_freed` is also shared with mutator threads. This can improve performance in some applications because in allocation-heavy multithreaded workloads, mutator threads often avoid a page fault during allocation, which happens by accessing a freshly mapped operating system page or a madvised page, by directly allocating a block from `page_pool_lazily_freed`. In these workloads, the scavenger thread also needs to scavenge fewer blocks, since some have already been claimed by the mutators.
+
+## Memory Accounting
+
+The GC determines the heap size by adding the number of bytes in-use by pool-allocated memory blocks and bytes in-use by objects allocated through the large allocator. Previously, we measured the heap size by adding up the bytes for live objects, but not live memory blocks. This definition ignores fragmentation, which can lead to inaccurate GC decisions.
+
+## GC Trigger Heuristics
+
+Julia's GC heuristics are based on `MemBalancer` (https://dl.acm.org/doi/10.1145/3563323). They decide when to trigger a collection and which (quick or full) collection to trigger. The heuristics adjust the number of bytes the mutator can allocate before triggering a collection cycle by measuring metrics such as allocation rate, freeing rate, and current heap size.
+
+Independently of allocation rates, freeing rates, or GC times, Julia will always trigger full collections if the heap size exceeds 80% of a memory upper bound specified through `--heap-size-hint` or determined by reading system information.
diff --git a/doc/src/devdocs/img/gc-tiered-allocation.jpg b/doc/src/devdocs/img/gc-tiered-allocation.jpg
new file mode 100644
index 0000000000000..4ab0e1298364c
Binary files /dev/null and b/doc/src/devdocs/img/gc-tiered-allocation.jpg differ
diff --git a/doc/src/devdocs/img/invalidation-example.png b/doc/src/devdocs/img/invalidation-example.png
new file mode 100644
index 0000000000000..4a80869063761
Binary files /dev/null and b/doc/src/devdocs/img/invalidation-example.png differ
diff --git a/doc/src/devdocs/img/invalidation-example.svg b/doc/src/devdocs/img/invalidation-example.svg
new file mode 100644
index 0000000000000..678f5100ba5df
--- /dev/null
+++ b/doc/src/devdocs/img/invalidation-example.svg
@@ -0,0 +1,234 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 12.2.1 (0)
+ -->
+<!-- Title: G Pages: 1 -->
+<svg width="451pt" height="726pt"
+ viewBox="0.00 0.00 450.77 726.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 722)">
+<title>G</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-722 446.77,-722 446.77,4 -4,4"/>
+<!-- mt -->
+<g id="node1" class="node">
+<title>mt</title>
+<polygon fill="none" stroke="black" points="269.02,-718 169.02,-718 169.02,-682 269.02,-682 269.02,-718"/>
+<text text-anchor="start" x="177.02" y="-695.2" font-family="sans-serif" font-weight="bold" font-size="14.00">MethodTable</text>
+</g>
+<!-- f_m -->
+<g id="node3" class="node">
+<title>f_m</title>
+<polygon fill="none" stroke="black" points="150.39,-558 85.64,-558 85.64,-522 150.39,-522 150.39,-558"/>
+<text text-anchor="start" x="93.64" y="-541.7" font-family="sans-serif" font-weight="bold" font-size="14.00">Method</text>
+<text text-anchor="start" x="111.64" y="-527.7" font-family="sans-serif" font-size="14.00">f()</text>
+</g>
+<!-- mt&#45;&gt;f_m -->
+<g id="edge4" class="edge">
+<title>mt&#45;&gt;f_m</title>
+<path fill="none" stroke="black" d="M208.02,-681.79C190.41,-654.25 155.77,-600.06 134.98,-567.53"/>
+<polygon fill="black" stroke="black" points="138.16,-566 129.82,-559.46 132.26,-569.77 138.16,-566"/>
+</g>
+<!-- g_m -->
+<g id="node4" class="node">
+<title>g_m</title>
+<polygon fill="none" stroke="black" points="387.39,-645 322.64,-645 322.64,-609 387.39,-609 387.39,-645"/>
+<text text-anchor="start" x="330.64" y="-628.7" font-family="sans-serif" font-weight="bold" font-size="14.00">Method</text>
+<text text-anchor="start" x="346.77" y="-614.7" font-family="sans-serif" font-size="14.00">g()</text>
+</g>
+<!-- mt&#45;&gt;g_m -->
+<g id="edge5" class="edge">
+<title>mt&#45;&gt;g_m</title>
+<path fill="none" stroke="black" d="M252.29,-681.63C270.42,-672.17 293.05,-660.35 312.46,-650.22"/>
+<polygon fill="black" stroke="black" points="313.98,-653.38 321.22,-645.64 310.74,-647.17 313.98,-653.38"/>
+</g>
+<!-- m_mod -->
+<g id="node2" class="node">
+<title>m_mod</title>
+<polygon fill="none" stroke="black" points="368.02,-297 304.02,-297 304.02,-261 368.02,-261 368.02,-297"/>
+<text text-anchor="start" x="312.02" y="-280.7" font-family="sans-serif" font-weight="bold" font-size="14.00">Module</text>
+<text text-anchor="start" x="330.02" y="-266.7" font-family="sans-serif" font-size="14.00">M</text>
+</g>
+<!-- c2_bind -->
+<g id="node11" class="node">
+<title>c2_bind</title>
+<polygon fill="none" stroke="black" points="165.14,-210 98.89,-210 98.89,-174 165.14,-174 165.14,-210"/>
+<text text-anchor="start" x="106.89" y="-193.7" font-family="sans-serif" font-weight="bold" font-size="14.00">Binding</text>
+<text text-anchor="start" x="117.02" y="-179.7" font-family="sans-serif" font-size="14.00">M.c2</text>
+</g>
+<!-- m_mod&#45;&gt;c2_bind -->
+<g id="edge1" class="edge">
+<title>m_mod&#45;&gt;c2_bind</title>
+<path fill="none" stroke="black" d="M316.16,-260.51C303.02,-249.75 284.95,-236.45 267.02,-228 238.27,-214.45 203.52,-205.6 176.47,-200.18"/>
+<polygon fill="black" stroke="black" points="177.36,-196.79 166.88,-198.35 176.04,-203.67 177.36,-196.79"/>
+<text text-anchor="middle" x="316.41" y="-229.7" font-family="sans-serif" font-size="14.00">bindings</text>
+</g>
+<!-- f_mi -->
+<g id="node5" class="node">
+<title>f_mi</title>
+<polygon fill="none" stroke="black" points="177.77,-471 58.27,-471 58.27,-435 177.77,-435 177.77,-471"/>
+<text text-anchor="start" x="66.27" y="-454.7" font-family="sans-serif" font-weight="bold" font-size="14.00">MethodInstance</text>
+<text text-anchor="start" x="111.64" y="-440.7" font-family="sans-serif" font-size="14.00">f()</text>
+</g>
+<!-- f_m&#45;&gt;f_mi -->
+<g id="edge6" class="edge">
+<title>f_m&#45;&gt;f_mi</title>
+<path fill="none" stroke="black" d="M118.02,-521.8C118.02,-510.58 118.02,-495.67 118.02,-482.69"/>
+<polygon fill="black" stroke="black" points="121.52,-482.98 118.02,-472.98 114.52,-482.98 121.52,-482.98"/>
+<text text-anchor="middle" x="161.89" y="-490.7" font-family="sans-serif" font-size="14.00">specializations</text>
+</g>
+<!-- g_mi -->
+<g id="node6" class="node">
+<title>g_mi</title>
+<polygon fill="none" stroke="black" points="414.77,-558 295.27,-558 295.27,-522 414.77,-522 414.77,-558"/>
+<text text-anchor="start" x="303.27" y="-541.7" font-family="sans-serif" font-weight="bold" font-size="14.00">MethodInstance</text>
+<text text-anchor="start" x="346.77" y="-527.7" font-family="sans-serif" font-size="14.00">g()</text>
+</g>
+<!-- g_m&#45;&gt;g_mi -->
+<g id="edge7" class="edge">
+<title>g_m&#45;&gt;g_mi</title>
+<path fill="none" stroke="black" d="M355.02,-608.8C355.02,-597.58 355.02,-582.67 355.02,-569.69"/>
+<polygon fill="black" stroke="black" points="358.52,-569.98 355.02,-559.98 351.52,-569.98 358.52,-569.98"/>
+<text text-anchor="middle" x="398.89" y="-577.7" font-family="sans-serif" font-size="14.00">specializations</text>
+</g>
+<!-- f_ci2 -->
+<g id="node8" class="node">
+<title>f_ci2</title>
+<polygon fill="none" stroke="black" points="170.27,-384 65.77,-384 65.77,-348 170.27,-348 170.27,-384"/>
+<text text-anchor="start" x="73.77" y="-367.7" font-family="sans-serif" font-weight="bold" font-size="14.00">CodeInstance</text>
+<text text-anchor="start" x="83.52" y="-353.7" font-family="sans-serif" font-size="14.00">f() for [2, ∞)</text>
+</g>
+<!-- f_mi&#45;&gt;f_ci2 -->
+<g id="edge8" class="edge">
+<title>f_mi&#45;&gt;f_ci2</title>
+<path fill="none" stroke="black" d="M118.02,-434.8C118.02,-423.58 118.02,-408.67 118.02,-395.69"/>
+<polygon fill="black" stroke="black" points="121.52,-395.98 118.02,-385.98 114.52,-395.98 121.52,-395.98"/>
+<text text-anchor="middle" x="136.02" y="-403.7" font-family="sans-serif" font-size="14.00">cache</text>
+</g>
+<!-- g_ci1 -->
+<g id="node9" class="node">
+<title>g_ci1</title>
+<polygon fill="none" stroke="black" points="407.27,-384 302.77,-384 302.77,-348 407.27,-348 407.27,-384"/>
+<text text-anchor="start" x="310.77" y="-367.7" font-family="sans-serif" font-weight="bold" font-size="14.00">CodeInstance</text>
+<text text-anchor="start" x="320.14" y="-353.7" font-family="sans-serif" font-size="14.00">g() for [1, 1]</text>
+</g>
+<!-- f_mi&#45;&gt;g_ci1 -->
+<g id="edge10" class="edge">
+<title>f_mi&#45;&gt;g_ci1</title>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M136.7,-434.6C150.68,-421.92 168.86,-406.26 177.77,-402 213.27,-385.03 256.53,-376.29 291.42,-371.79"/>
+<polygon fill="black" stroke="black" points="291.43,-375.31 300.94,-370.65 290.59,-368.36 291.43,-375.31"/>
+<text text-anchor="middle" x="217.89" y="-403.7" font-family="sans-serif" font-size="14.00">backedges[1]</text>
+</g>
+<!-- g_ci2 -->
+<g id="node10" class="node">
+<title>g_ci2</title>
+<polygon fill="none" stroke="black" points="407.27,-471 302.77,-471 302.77,-435 407.27,-435 407.27,-471"/>
+<text text-anchor="start" x="310.77" y="-454.7" font-family="sans-serif" font-weight="bold" font-size="14.00">CodeInstance</text>
+<text text-anchor="start" x="318.64" y="-440.7" font-family="sans-serif" font-size="14.00">g() for [2, ∞)</text>
+</g>
+<!-- f_mi&#45;&gt;g_ci2 -->
+<g id="edge11" class="edge">
+<title>f_mi&#45;&gt;g_ci2</title>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M178.13,-453C212.78,-453 256.31,-453 291.51,-453"/>
+<polygon fill="black" stroke="black" points="291.12,-456.5 301.12,-453 291.11,-449.5 291.12,-456.5"/>
+<text text-anchor="middle" x="240.27" y="-457.7" font-family="sans-serif" font-size="14.00">backedges[2]</text>
+</g>
+<!-- g_mi&#45;&gt;g_ci2 -->
+<g id="edge9" class="edge">
+<title>g_mi&#45;&gt;g_ci2</title>
+<path fill="none" stroke="black" d="M355.02,-521.8C355.02,-510.58 355.02,-495.67 355.02,-482.69"/>
+<polygon fill="black" stroke="black" points="358.52,-482.98 355.02,-472.98 351.52,-482.98 358.52,-482.98"/>
+<text text-anchor="middle" x="373.02" y="-490.7" font-family="sans-serif" font-size="14.00">cache</text>
+</g>
+<!-- f_ci1 -->
+<g id="node7" class="node">
+<title>f_ci1</title>
+<polygon fill="none" stroke="black" points="170.27,-297 65.77,-297 65.77,-261 170.27,-261 170.27,-297"/>
+<text text-anchor="start" x="73.77" y="-280.7" font-family="sans-serif" font-weight="bold" font-size="14.00">CodeInstance</text>
+<text text-anchor="start" x="85.02" y="-266.7" font-family="sans-serif" font-size="14.00">f() for [1, 1]</text>
+</g>
+<!-- f_ci1&#45;&gt;c2_bind -->
+<g id="edge16" class="edge">
+<title>f_ci1&#45;&gt;c2_bind</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M120.85,-260.8C122.7,-249.58 125.16,-234.67 127.29,-221.69"/>
+<polygon fill="black" stroke="black" points="130.73,-222.4 128.9,-211.96 123.82,-221.26 130.73,-222.4"/>
+<text text-anchor="middle" x="151.73" y="-229.7" font-family="sans-serif" font-size="14.00">edges[1]</text>
+</g>
+<!-- f_ci2&#45;&gt;f_ci1 -->
+<g id="edge12" class="edge">
+<title>f_ci2&#45;&gt;f_ci1</title>
+<path fill="none" stroke="black" d="M118.02,-347.8C118.02,-336.58 118.02,-321.67 118.02,-308.69"/>
+<polygon fill="black" stroke="black" points="121.52,-308.98 118.02,-298.98 114.52,-308.98 121.52,-308.98"/>
+<text text-anchor="middle" x="130.77" y="-316.7" font-family="sans-serif" font-size="14.00">next</text>
+</g>
+<!-- f_ci2&#45;&gt;c2_bind -->
+<g id="edge17" class="edge">
+<title>f_ci2&#45;&gt;c2_bind</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M66.91,-347.64C43.74,-336.91 18.59,-320.59 5.27,-297 -2.6,-283.07 -0.07,-276.08 5.27,-261 17.32,-227 56.14,-209.72 87.67,-201.11"/>
+<polygon fill="black" stroke="black" points="88.14,-204.6 97,-198.78 86.45,-197.81 88.14,-204.6"/>
+<text text-anchor="middle" x="31.14" y="-273.2" font-family="sans-serif" font-size="14.00">edges[1]</text>
+</g>
+<!-- g_ci1&#45;&gt;f_ci1 -->
+<g id="edge14" class="edge">
+<title>g_ci1&#45;&gt;f_ci1</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M306.49,-347.59C269.02,-334.16 217.02,-315.51 177.13,-301.2"/>
+<polygon fill="black" stroke="black" points="178.51,-297.98 167.91,-297.89 176.14,-304.56 178.51,-297.98"/>
+<text text-anchor="middle" x="276.58" y="-316.7" font-family="sans-serif" font-size="14.00">edges[1]</text>
+</g>
+<!-- g_ci2&#45;&gt;f_ci2 -->
+<g id="edge15" class="edge">
+<title>g_ci2&#45;&gt;f_ci2</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M325.69,-434.62C306.86,-424.04 281.6,-410.89 258.02,-402 233.73,-392.85 206.18,-385.37 181.98,-379.71"/>
+<polygon fill="black" stroke="black" points="182.76,-376.3 172.23,-377.5 181.21,-383.13 182.76,-376.3"/>
+<text text-anchor="middle" x="316.85" y="-403.7" font-family="sans-serif" font-size="14.00">edges[1]</text>
+</g>
+<!-- g_ci2&#45;&gt;g_ci1 -->
+<g id="edge13" class="edge">
+<title>g_ci2&#45;&gt;g_ci1</title>
+<path fill="none" stroke="black" d="M355.02,-434.8C355.02,-423.58 355.02,-408.67 355.02,-395.69"/>
+<polygon fill="black" stroke="black" points="358.52,-395.98 355.02,-385.98 351.52,-395.98 358.52,-395.98"/>
+<text text-anchor="middle" x="367.77" y="-403.7" font-family="sans-serif" font-size="14.00">next</text>
+</g>
+<!-- c2_bind&#45;&gt;f_ci1 -->
+<g id="edge18" class="edge">
+<title>c2_bind&#45;&gt;f_ci1</title>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M98.48,-196.31C77.84,-200.28 52.91,-208.97 39.77,-228 30.83,-240.95 40.09,-251.3 54.96,-259.14"/>
+<polygon fill="black" stroke="black" points="53.49,-262.31 64.04,-263.3 56.41,-255.95 53.49,-262.31"/>
+<text text-anchor="middle" x="79.89" y="-229.7" font-family="sans-serif" font-size="14.00">backedges[1]</text>
+</g>
+<!-- c2_bind&#45;&gt;f_ci2 -->
+<g id="edge19" class="edge">
+<title>c2_bind&#45;&gt;f_ci2</title>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M165.64,-209.79C172.23,-214.71 178.25,-220.76 182.02,-228 196.2,-255.23 190.78,-268.65 179.02,-297 172.25,-313.31 159.84,-328.29 147.97,-340.01"/>
+<polygon fill="black" stroke="black" points="145.9,-337.15 141.01,-346.55 150.7,-342.25 145.9,-337.15"/>
+<text text-anchor="middle" x="230.44" y="-273.2" font-family="sans-serif" font-size="14.00">backedges[2]</text>
+</g>
+<!-- c2_bpart2 -->
+<g id="node13" class="node">
+<title>c2_bpart2</title>
+<polygon fill="none" stroke="black" points="199.27,-123 64.77,-123 64.77,-87 199.27,-87 199.27,-123"/>
+<text text-anchor="start" x="79.52" y="-106.7" font-family="sans-serif" font-weight="bold" font-size="14.00">BindingPartition</text>
+<text text-anchor="start" x="72.77" y="-92.7" font-family="sans-serif" font-size="14.00">constant 3 for [2, ∞)</text>
+</g>
+<!-- c2_bind&#45;&gt;c2_bpart2 -->
+<g id="edge2" class="edge">
+<title>c2_bind&#45;&gt;c2_bpart2</title>
+<path fill="none" stroke="black" d="M132.02,-173.8C132.02,-162.58 132.02,-147.67 132.02,-134.69"/>
+<polygon fill="black" stroke="black" points="135.52,-134.98 132.02,-124.98 128.52,-134.98 135.52,-134.98"/>
+<text text-anchor="middle" x="159.39" y="-142.7" font-family="sans-serif" font-size="14.00">partitions</text>
+</g>
+<!-- c2_bpart1 -->
+<g id="node12" class="node">
+<title>c2_bpart1</title>
+<polygon fill="none" stroke="black" points="197.77,-36 66.27,-36 66.27,0 197.77,0 197.77,-36"/>
+<text text-anchor="start" x="79.52" y="-19.7" font-family="sans-serif" font-weight="bold" font-size="14.00">BindingPartition</text>
+<text text-anchor="start" x="74.27" y="-5.7" font-family="sans-serif" font-size="14.00">constant 2 for [1, 1]</text>
+</g>
+<!-- c2_bpart2&#45;&gt;c2_bpart1 -->
+<g id="edge3" class="edge">
+<title>c2_bpart2&#45;&gt;c2_bpart1</title>
+<path fill="none" stroke="black" d="M132.02,-86.8C132.02,-75.58 132.02,-60.67 132.02,-47.69"/>
+<polygon fill="black" stroke="black" points="135.52,-47.98 132.02,-37.98 128.52,-47.98 135.52,-47.98"/>
+<text text-anchor="middle" x="144.77" y="-55.7" font-family="sans-serif" font-size="14.00">next</text>
+</g>
+</g>
+</svg>
diff --git a/doc/src/devdocs/img/precompilation_hang.png b/doc/src/devdocs/img/precompilation_hang.png
new file mode 100644
index 0000000000000..d076b7697f271
Binary files /dev/null and b/doc/src/devdocs/img/precompilation_hang.png differ
diff --git a/doc/src/devdocs/img/typeinf-promotion.png b/doc/src/devdocs/img/typeinf-promotion.png
new file mode 100644
index 0000000000000..044575745f56e
Binary files /dev/null and b/doc/src/devdocs/img/typeinf-promotion.png differ
diff --git a/doc/src/devdocs/img/typeinf-promotion.svg b/doc/src/devdocs/img/typeinf-promotion.svg
new file mode 100644
index 0000000000000..2bf859a70d06c
--- /dev/null
+++ b/doc/src/devdocs/img/typeinf-promotion.svg
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="485" height="210" version="1.1" viewBox="0 0 161.67 70" xmlns="http://www.w3.org/2000/svg">
+ <g transform="translate(-82.026 -119.5)">
+  <rect x="82.026" y="119.5" width="161.67" height="70" fill="#fff"/>
+  <g transform="translate(1.6667 1.6667)">
+   <g>
+    <path d="m90 125h10" fill="none" stroke="#000"/>
+    <path d="m150 125h90" fill="none" stroke="#000"/>
+    <text x="88.028793" y="126.785" fill="#000000" font-family="sans-serif" font-size="5px" xml:space="preserve"><tspan x="88.028793" y="126.785" font-size="5px" text-align="end" text-anchor="end">T1</tspan></text>
+   </g>
+   <path d="m100 120v10" fill="none" stroke="#000"/>
+   <path d="m125 120v10" fill="none" stroke="#000"/>
+   <g fill="none" font-family="sans-serif" stroke="#000000" text-anchor="middle">
+    <text x="100" y="135" font-size="5px" text-align="center" xml:space="preserve"><tspan x="100" y="135" fill="#000000" font-size="5px" stroke="none" text-align="center" text-anchor="middle">acq W</tspan></text>
+    <text x="137.68201" y="125.83799" font-size="4px" text-align="center" xml:space="preserve"><tspan x="137.68201" y="125.83799" fill="#000000" font-size="4px" stroke="none" text-align="center" text-anchor="middle">promote</tspan></text>
+    <text x="175" y="176.49998" font-size="4px" text-align="center" xml:space="preserve"><tspan x="175" y="176.49998" fill="#000000" font-size="4px" stroke="none" text-align="center" text-anchor="middle">new method</tspan></text>
+    <text x="125" y="135" font-size="5px" text-align="center" xml:space="preserve"><tspan x="125" y="135" fill="#000000" font-size="5px" stroke="none" text-align="center" text-anchor="middle">read W</tspan></text>
+   </g>
+   <g fill="none" stroke="#000">
+    <path d="m100 125h20" stroke-dasharray="1, 1"/>
+    <g>
+     <rect x="120" y="120" width="30" height="10"/>
+     <path d="m230 150 10-2e-5"/>
+     <path d="m205 145v10"/>
+    </g>
+   </g>
+   <text x="217.68199" y="148.798" fill="#000000" font-family="sans-serif" font-size="4px" text-align="center" text-anchor="middle" xml:space="preserve"><tspan x="217.68199" y="148.798" text-align="center">promote</tspan><tspan x="217.68199" y="153.798" text-align="center">fail</tspan></text>
+   <g fill="none" stroke="#000">
+    <path d="m135 150 65-1e-5" stroke-dasharray="1, 1"/>
+    <g>
+     <rect x="200" y="145" width="30" height="10"/>
+     <path d="m160 170v10"/>
+     <path d="m190 170v10"/>
+     <rect x="155" y="170" width="40" height="10"/>
+     <path d="m90 150h45"/>
+    </g>
+   </g>
+   <g>
+    <text x="87.616295" y="151.80998" fill="#000000" font-family="sans-serif" font-size="5px" xml:space="preserve"><tspan x="87.616295" y="151.80998" font-size="5px" text-align="end" text-anchor="end">T2</tspan></text>
+    <path d="m90 175h65" fill="none" stroke="#000"/>
+    <text x="87.616295" y="176.80998" fill="#000000" font-family="sans-serif" font-size="5px" xml:space="preserve"><tspan x="87.616295" y="176.80998" font-size="5px" text-align="end" text-anchor="end">T3</tspan></text>
+   </g>
+   <g fill="none">
+    <path d="m135 145v10" stroke="#000"/>
+    <g font-family="sans-serif" font-size="5px" stroke="#000000" text-anchor="middle">
+     <text x="135" y="160" text-align="center" xml:space="preserve"><tspan x="135" y="160" fill="#000000" font-size="5px" stroke="none" text-align="center" text-anchor="middle">acq W</tspan></text>
+     <text x="160" y="185" text-align="center" xml:space="preserve"><tspan x="160" y="185" fill="#000000" font-size="5px" stroke="none" text-align="center" text-anchor="middle">read W</tspan></text>
+     <text x="205" y="160" text-align="center" xml:space="preserve"><tspan x="205" y="160" fill="#000000" font-size="5px" stroke="none" text-align="center" text-anchor="middle">read W+1</tspan></text>
+     <text x="190" y="185" text-align="center" xml:space="preserve"><tspan x="190" y="185" fill="#000000" font-size="5px" stroke="none" text-align="center" text-anchor="middle">rel W+1</tspan></text>
+    </g>
+    <path d="m195 175h45" stroke="#000"/>
+   </g>
+  </g>
+ </g>
+</svg>
diff --git a/doc/src/devdocs/inference.md b/doc/src/devdocs/inference.md
index b6614d060a0c8..985b6ad6e8364 100644
--- a/doc/src/devdocs/inference.md
+++ b/doc/src/devdocs/inference.md
@@ -36,9 +36,9 @@ m = first(mths)
 # Create variables needed to call `typeinf_code`
 interp = Core.Compiler.NativeInterpreter()
 sparams = Core.svec()      # this particular method doesn't have type-parameters
-optimize = true            # run all inference optimizations
+run_optimizer = true       # run all inference optimizations
 types = Tuple{typeof(convert), atypes.parameters...} # Tuple{typeof(convert), Type{Int}, UInt}
-Core.Compiler.typeinf_code(interp, m, types, sparams, optimize)
+Core.Compiler.typeinf_code(interp, m, types, sparams, run_optimizer)
 ```
 
 If your debugging adventures require a `MethodInstance`, you can look it up by
@@ -96,18 +96,20 @@ Each statement gets analyzed for its total cost in a function called
 as follows:
 ```jldoctest; filter=r"tuple.jl:\d+"
 julia> Base.print_statement_costs(stdout, map, (typeof(sqrt), Tuple{Int},)) # map(sqrt, (2,))
-map(f, t::Tuple{Any}) @ Base tuple.jl:273
-  0 1 ─ %1  = Base.getfield(_3, 1, true)::Int64
-  1 │   %2  = Base.sitofp(Float64, %1)::Float64
-  2 │   %3  = Base.lt_float(%2, 0.0)::Bool
-  0 └──       goto #3 if not %3
-  0 2 ─       invoke Base.Math.throw_complex_domainerror(:sqrt::Symbol, %2::Float64)::Union{}
+map(f, t::Tuple{Any}) @ Base tuple.jl:358
+  0 1 ─ %1  = $(Expr(:boundscheck, true))::Bool
+  0 │   %2  =   builtin Base.getfield(_3, 1, %1)::Int64
+  1 │   %3  = intrinsic Base.sitofp(Float64, %2)::Float64
+  0 │   %4  = intrinsic Base.lt_float(%3, 0.0)::Bool
+  0 └──       goto #3 if not %4
+  0 2 ─          invoke Base.Math.throw_complex_domainerror(:sqrt::Symbol, %3::Float64)::Union{}
   0 └──       unreachable
- 20 3 ─ %7  = Base.Math.sqrt_llvm(%2)::Float64
+ 20 3 ─ %8  = intrinsic Base.Math.sqrt_llvm(%3)::Float64
   0 └──       goto #4
   0 4 ─       goto #5
-  0 5 ─ %10 = Core.tuple(%7)::Tuple{Float64}
-  0 └──       return %10
+  0 5 ─ %11 =   builtin Core.tuple(%8)::Tuple{Float64}
+  0 └──       return %11
+
 ```
 
 The line costs are in the left column. This includes the consequences of inlining and other forms of optimization.
diff --git a/doc/src/devdocs/init.md b/doc/src/devdocs/init.md
index 1e0e1173f8695..23012d6ba1eb7 100644
--- a/doc/src/devdocs/init.md
+++ b/doc/src/devdocs/init.md
@@ -63,7 +63,7 @@ the [LLVM library](https://llvm.org).
 If there is no sysimg file (`!jl_options.image_file`) then the `Core` and `Main` modules are
 created and `boot.jl` is evaluated:
 
-`jl_core_module = jl_new_module(jl_symbol("Core"))` creates the Julia `Core` module.
+`jl_core_module = jl_new_module(jl_symbol("Core"), NULL)` creates the Julia `Core` module.
 
 [`jl_init_intrinsic_functions()`](https://github.com/JuliaLang/julia/blob/master/src/intrinsics.cpp)
 creates a new Julia module `Intrinsics` containing constant `jl_intrinsic_type` symbols. These define
diff --git a/doc/src/devdocs/isbitsunionarrays.md b/doc/src/devdocs/isbitsunionarrays.md
index 2a25c033ec9fd..f01afe50985ec 100644
--- a/doc/src/devdocs/isbitsunionarrays.md
+++ b/doc/src/devdocs/isbitsunionarrays.md
@@ -18,6 +18,12 @@ Lastly, a value of `0x00` signals that the `nothing` value will be returned for
 type with a single type instance, it technically has a size of 0. The type tag byte for a type's Union field is stored
 directly after the field's computed Union memory.
 
-## isbits Union Arrays
+## isbits Union Memory
 
-Julia can now also store "isbits Union" values inline in an Array, as opposed to requiring an indirection box. The optimization is accomplished by storing an extra "type tag array" of bytes, one byte per array element, alongside the bytes of the actual array data. This type tag array serves the same function as the type field case: its value signals the type of the actual stored Union value in the array. In terms of layout, a Julia Array can include extra "buffer" space before and after its actual data values, which are tracked in the `a->offset` and `a->maxsize` fields of the `jl_array_t*` type. The "type tag array" is treated exactly as another `jl_array_t*`, but which shares the same `a->offset`, `a->maxsize`, and `a->len` fields. So the formula to access an isbits Union Array's type tag bytes is `a->data + (a->maxsize - a->offset) * a->elsize + a->offset`; i.e. the Array's `a->data` pointer is already shifted by `a->offset`, so correcting for that, we follow the data all the way to the max of what it can hold `a->maxsize`, then adjust by `a->offset` more bytes to account for any present "front buffering" the array might be doing. This layout in particular allows for very efficient resizing operations as the type tag data only ever has to move when the actual array's data has to move.
+Julia can now also store "isbits Union" values inline in a Memory, as opposed to requiring
+an indirection box. The optimization is accomplished by storing an extra "type tag memory"
+of bytes, one byte per element, alongside the bytes of the actual data. This type tag memory
+serves the same function as the type field case: its value signals the type of the actual
+stored Union value. The "type tag memory" directly follows the regular data space. So the
+formula to access an isbits Union Array's type tag bytes is `a->data + a->length *
+a->elsize`.
diff --git a/doc/src/devdocs/jit.md b/doc/src/devdocs/jit.md
index f33b968ad3948..96315c67b659f 100644
--- a/doc/src/devdocs/jit.md
+++ b/doc/src/devdocs/jit.md
@@ -59,20 +59,22 @@ In addition, there are a number of different transitional states that occur duri
 
 1. When writing `invoke`, `specsigflags`, and `specptr`:
       1. Perform an atomic compare-exchange operation of specptr assuming the old value was NULL. This compare-exchange operation should have at least acquire-release ordering, to provide ordering guarantees of the remaining memory operations in the write.
-      2. If `specptr` was non-null, cease the write operation and wait for bit 0b10 of `specsigflags` to be written.
+      2. If `specptr` was non-null, cease the write operation and wait for bit 0b10 of `specsigflags` to be written, then restart from step 1 if desired.
       3. Write the new low bit of `specsigflags` to its final value. This may be a relaxed write.
       4. Write the new `invoke` pointer to its final value. This must have at least a release memory ordering to synchronize with reads of `invoke`.
       5. Set the second bit of `specsigflags` to 1. This must be at least a release memory ordering to synchronize with reads of `specsigflags`. This step completes the write operation and announces to all other threads that all fields have been set.
 2. When reading all of `invoke`, `specsigflags`, and `specptr`:
-   1. Read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `initial_invoke`.
-   2. If `initial_invoke` is NULL, the codeinst is not yet executable. `invoke` is NULL, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
-   3. Read the `specptr` field with at least an acquire memory ordering.
+   1. Read the `specptr` field with any memory ordering.
+   2. Read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `initial_invoke`.
+   3. If `initial_invoke` is NULL, the codeinst is not yet executable. `invoke` is NULL, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
    4. If `specptr` is NULL, then the `initial_invoke` pointer must not be relying on `specptr` to guarantee correct execution. Therefore, `invoke` is non-null, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
    5. If `specptr` is non-null, then `initial_invoke` might not be the final `invoke` field that uses `specptr`. This can occur if `specptr` has been written, but `invoke` has not yet been written. Therefore, spin on the second bit of `specsigflags` until it is set to 1 with at least acquire memory ordering.
-   6. Re-read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `final_invoke`.
+   6. Re-read the `invoke` field with any memory ordering. This load will be referred to as `final_invoke`.
    7. Read the `specsigflags` field with any memory ordering.
    8. `invoke` is `final_invoke`, `specsigflags` is the value read in step 7, `specptr` is the value read in step 3.
 3. When updating a `specptr` to a different but equivalent function pointer:
    1. Perform a release store of the new function pointer to `specptr`. Races here must be benign, as the old function pointer is required to still be valid, and any new ones are also required to be valid as well. Once a pointer has been written to `specptr`, it must always be callable whether or not it is later overwritten.
 
+Correctly reading these fields is implemented in `jl_read_codeinst_invoke`.
+
 Although these write, read, and update steps are complicated, they ensure that the JIT can update codeinsts without invalidating existing codeinsts, and that the JIT can update codeinsts without invalidating existing `invoke` pointers. This allows the JIT to potentially reoptimize functions at higher optimization levels in the future, and also will allow the JIT to support concurrent compilation of functions in the future.
diff --git a/doc/src/devdocs/llvm-passes.md b/doc/src/devdocs/llvm-passes.md
index eec8b07c2701e..641abb864e703 100644
--- a/doc/src/devdocs/llvm-passes.md
+++ b/doc/src/devdocs/llvm-passes.md
@@ -37,15 +37,6 @@ This pass performs most of the GC rooting work required to track pointers betwee
 * Opt Name: `module(FinalLowerGC)`
 
 This pass lowers a few last intrinsics to their final form targeting functions in the `libjulia` library. Separating this from `LateGCLowering` enables other backends (GPU compilation) to supply their own custom lowerings for these intrinsics, enabling the Julia pipeline to be used on those backends as well.
-
-### LowerHandlers
-
-* Filename: `llvm-lower-handlers.cpp`
-* Class Name: `LowerExcHandlersPass`
-* Opt Name: `function(LowerExcHandlers)`
-
-This pass lowers exception handling intrinsics into calls to runtime functions that are actually called when handling exceptions.
-
 ### RemoveNI
 
 * Filename: `llvm-remove-ni.cpp`
@@ -58,9 +49,9 @@ This pass removes the non-integral address spaces from the module's datalayout s
 
 * Filename: `llvm-simdloop.cpp`
 * Class Name: `LowerSIMDLoopPass`
-* Opt Name: `module(LowerSIMDLoop)`
+* Opt Name: `loop(LowerSIMDLoop)`
 
-This pass acts as the main driver of the `@simd` annotation. Codegen inserts a call to a marker intrinsic (`julia.simdloop`), which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).
+This pass acts as the main driver of the `@simd` annotation. Codegen inserts a `!llvm.loopid` marker at the back branch of a loop, which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).
 
 ### LowerPTLS
 
@@ -94,11 +85,11 @@ This pass removes Julia-specific address spaces from LLVM IR. It is mostly used
 * Class Name: `MultiVersioningPass`
 * Opt Name: `module(JuliaMultiVersioning)`
 
-This pass performs modifications to a module to create functions that are optimized for running on different architectures (see sysimg.md and pkgimg.md for more details). Implementation-wise, it clones functions and applies different target-specific attributes to them to allow the optimizer to use advanced features such as vectorization and instruction scheduling for that platform. It also creates some infrastructure to enable the Julia image loader to select the appropriate version of the function to call based on the architecture the loader is running on. The target-specific attributes are controlled by the `julia.mv.specs` module flag, which during compilation is derived from the `JULIA_CPU_TARGET` environment variable. The pass must also be enabled by providing a `julia.mv.enable` module flag with a value of 1.
+This pass performs modifications to a module to create functions that are optimized for running on different architectures (see sysimg.md and pkgimg.md for more details). Implementation-wise, it clones functions and applies different target-specific attributes to them to allow the optimizer to use advanced features such as vectorization and instruction scheduling for that platform. It also creates some infrastructure to enable the Julia image loader to select the appropriate version of the function to call based on the architecture the loader is running on. The target-specific attributes are controlled by the `julia.mv.specs` module flag, which during compilation is derived from the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable. The pass must also be enabled by providing a `julia.mv.enable` module flag with a value of 1.
 
 !!! warning
 
-   Use of `llvmcall` with multiversioning is dangerous. `llvmcall` enables access to features not typically exposed by the Julia APIs, and are therefore usually not available on all architectures. If multiversioning is enabled and code generation is requested for a target architecture that does not support the feature required by an `llvmcall` expression, LLVM will probably error out, likely with an abort and the message `LLVM ERROR: Do not know how to split the result of this operator!`.
+    Use of `llvmcall` with multiversioning is dangerous. `llvmcall` enables access to features not typically exposed by the Julia APIs, and are therefore usually not available on all architectures. If multiversioning is enabled and code generation is requested for a target architecture that does not support the feature required by an `llvmcall` expression, LLVM will probably error out, likely with an abort and the message `LLVM ERROR: Do not know how to split the result of this operator!`.
 
 ### GCInvariantVerifier
 
@@ -114,18 +105,6 @@ This pass is used to verify Julia's invariants about LLVM IR. This includes thin
 
 These passes are used to perform transformations on LLVM IR that LLVM will not perform itself, e.g. fast math flag propagation, escape analysis, and optimizations on Julia-specific internal functions. They use knowledge about Julia's semantics to perform these optimizations.
 
-### CombineMulAdd
-
-* Filename: `llvm-muladd.cpp`
-* Class Name: `CombineMulAddPass`
-* Opt Name: `function(CombineMulAdd)`
-
-This pass serves to optimize the particular combination of a regular `fmul` with a fast `fadd` into a contract `fmul` with a fast `fadd`. This is later optimized by the backend to a [fused multiply-add](https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation#Fused_multiply%E2%80%93add) instruction, which can provide significantly faster operations at the cost of more [unpredictable semantics](https://simonbyrne.github.io/notes/fastmath/).
-
-!!! note
-
-    This optimization only occurs when the `fmul` has a single use, which is the fast `fadd`.
-
 ### AllocOpt
 
 * Filename: `llvm-alloc-opt.cpp`
@@ -156,6 +135,6 @@ This pass is used to hoist Julia-specific intrinsics out of loops. Specifically,
 3. Hoist allocations out of loops when they do not escape the loop
    1. We use a very conservative definition of escape here, the same as the one used in `AllocOptPass`. This transformation can reduce the number of allocations in the IR, even when an allocation escapes the function altogether.
 
-!!!note
+!!! note
 
     This pass is required to preserve LLVM's [MemorySSA](https://llvm.org/docs/MemorySSA.html) ([Short Video](https://www.youtube.com/watch?v=bdxWmryoHak), [Longer Video](https://www.youtube.com/watch?v=1e5y6WDbXCQ)) and [ScalarEvolution](https://baziotis.cs.illinois.edu/compilers/introduction-to-scalar-evolution.html) ([Newer Slides](https://llvm.org/devmtg/2018-04/slides/Absar-ScalarEvolution.pdf) [Older Slides](https://llvm.org/devmtg/2009-10/ScalarEvolutionAndLoopOptimization.pdf)) analyses.
diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md
index 4e5e90d7cdbc6..3db57a59deb06 100644
--- a/doc/src/devdocs/llvm.md
+++ b/doc/src/devdocs/llvm.md
@@ -11,13 +11,13 @@ The code for lowering Julia AST to LLVM IR or interpreting it directly is in dir
 
 | File                             | Description                                                        |
 |:-------------------------------- |:------------------------------------------------------------------ |
-| `aotcompile.cpp`                 | Legacy pass manager pipeline, compiler C-interface entry           |
+| `aotcompile.cpp`                 | Compiler C-interface entry and object file emission                |
 | `builtins.c`                     | Builtin functions                                                  |
 | `ccall.cpp`                      | Lowering [`ccall`](@ref)                                           |
 | `cgutils.cpp`                    | Lowering utilities, notably for array and tuple accesses           |
 | `codegen.cpp`                    | Top-level of code generation, pass list, lowering builtins         |
 | `debuginfo.cpp`                  | Tracks debug information for JIT code                              |
-| `disasm.cpp`                     | Handles native object file and JIT code diassembly                 |
+| `disasm.cpp`                     | Handles native object file and JIT code disassembly                |
 | `gf.c`                           | Generic functions                                                  |
 | `intrinsics.cpp`                 | Lowering intrinsics                                                |
 | `jitlayers.cpp`                  | JIT-specific code, ORC compilation layers/utilities                |
@@ -29,8 +29,6 @@ The code for lowering Julia AST to LLVM IR or interpreting it directly is in dir
 | `llvm-gc-invariant-verifier.cpp` | Custom LLVM pass to verify Julia GC invariants                     |
 | `llvm-julia-licm.cpp`            | Custom LLVM pass to hoist/sink Julia-specific intrinsics           |
 | `llvm-late-gc-lowering.cpp`      | Custom LLVM pass to root GC-tracked values                         |
-| `llvm-lower-handlers.cpp`        | Custom LLVM pass to lower try-catch blocks                         |
-| `llvm-muladd.cpp`                | Custom LLVM pass for fast-match FMA                                |
 | `llvm-multiversioning.cpp`       | Custom LLVM pass to generate sysimg code on multiple architectures |
 | `llvm-propagate-addrspaces.cpp`  | Custom LLVM pass to canonicalize addrspaces                        |
 | `llvm-ptls.cpp`                  | Custom LLVM pass to lower TLS operations                           |
@@ -43,7 +41,7 @@ The code for lowering Julia AST to LLVM IR or interpreting it directly is in dir
 Some of the `.cpp` files form a group that compile to a single object.
 
 The difference between an intrinsic and a builtin is that a builtin is a first class function
-that can be used like any other Julia function.  An intrinsic can operate only on unboxed data,
+that can be used like any other Julia function. An intrinsic can operate only on unboxed data,
 and therefore its arguments must be statically typed.
 
 ### [Alias Analysis](@id LLVM-Alias-Analysis)
@@ -75,7 +73,7 @@ implies that option by default.
 
 ## Passing options to LLVM
 
-You can pass options to LLVM via the environment variable `JULIA_LLVM_ARGS`.
+You can pass options to LLVM via the environment variable [`JULIA_LLVM_ARGS`](@ref JULIA_LLVM_ARGS).
 Here are example settings using `bash` syntax:
 
   * `export JULIA_LLVM_ARGS=-print-after-all` dumps IR after each pass.
@@ -120,7 +118,14 @@ Here are example settings using `bash` syntax:
 On occasion, it can be useful to debug LLVM's transformations in isolation from
 the rest of the Julia system, e.g. because reproducing the issue inside `julia`
 would take too long, or because one wants to take advantage of LLVM's tooling
-(e.g. bugpoint). To get unoptimized IR for the entire system image, pass the
+(e.g. bugpoint).
+
+To start with, you can install the developer tools to work with LLVM via:
+```
+make -C deps install-llvm-tools
+```
+
+To get unoptimized IR for the entire system image, pass the
 `--output-unopt-bc unopt.bc` option to the system image build process, which will
 output the unoptimized IR to an `unopt.bc` file. This file can then be passed to
 LLVM tools as usual. `libjulia` can function as an LLVM pass plugin and can be
@@ -129,15 +134,15 @@ environment. In addition, it exposes the `-julia` meta-pass, which runs the
 entire Julia pass-pipeline over the IR. As an example, to generate a system
 image with the old pass manager, one could do:
 ```
-opt -enable-new-pm=0 -load libjulia-codegen.so -julia -o opt.bc unopt.bc
+
 llc -o sys.o opt.bc
 cc -shared -o sys.so sys.o
 ```
 To generate a system image with the new pass manager, one could do:
 ```
-opt -load-pass-plugin=libjulia-codegen.so --passes='julia' -o opt.bc unopt.bc
-llc -o sys.o opt.bc
-cc -shared -o sys.so sys.o
+./usr/tools/opt -load-pass-plugin=libjulia-codegen.so --passes='julia' -o opt.bc unopt.bc
+./usr/tools/llc -o sys.o opt.bc
+./usr/tools/cc -shared -o sys.so sys.o
 ```
 This system image can then be loaded by `julia` as usual.
 
@@ -147,11 +152,29 @@ using:
 fun, T = +, Tuple{Int,Int} # Substitute your function of interest here
 optimize = false
 open("plus.ll", "w") do file
-    println(file, InteractiveUtils._dump_function(fun, T, false, false, false, true, :att, optimize, :default))
+    code_llvm(file, fun, T; raw=true, dump_module=true, optimize)
 end
 ```
 These files can be processed the same way as the unoptimized sysimg IR shown
-above.
+above, or if you want to see the LLVM IR yourself and get extra verification run, you can use
+```
+./usr/tools/opt -load-pass-plugin=libjulia-codegen.so --passes='julia' -S -verify-each plus.ll
+```
+(note on MacOS this would be `libjulia-codegen.dylib` and on Windows `libjulia-codegen.dll`)
+
+## Running the LLVM test suite
+
+To run the llvm tests locally, you need to first install the tools, build julia, then you
+can run the tests:
+```
+make -C deps install-llvm-tools
+make -j julia-src-release
+make -C test/llvmpasses
+```
+
+If you want to run the individual test files directly, via the commands at the top of each
+test file, the first step here will have installed the tools into `./usr/tools/opt`. Then
+you'll want to manually replace `%s` with the name of the test file.
 
 ## Improving LLVM optimizations for Julia
 
@@ -167,7 +190,7 @@ study it and the pass of interest in isolation.
 3. Pick out the IR at the point just before the pass of interest runs.
 4. Strip the debug metadata and fix up the TBAA metadata by hand.
 
-The last step is labor intensive.  Suggestions on a better way would be appreciated.
+The last step is labor intensive. Suggestions on a better way would be appreciated.
 
 ## The jlcall calling convention
 
@@ -320,10 +343,10 @@ ccall(:foo, Cvoid, (Ptr{Float64},), A)
 In lowering, the compiler will insert a conversion from the array to the
 pointer which drops the reference to the array value. However, we of course
 need to make sure that the array does stay alive while we're doing the
-[`ccall`](@ref). To understand how this is done, first recall the lowering of the
-above code:
+[`ccall`](@ref). To understand how this is done, lets look at a hypothetical
+approximate possible lowering of the above code:
 ```julia
-return $(Expr(:foreigncall, :(:foo), Cvoid, svec(Ptr{Float64}), 0, :(:ccall), Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), 0, :(:ccall), :(A)), :(A)))
+return $(Expr(:foreigncall, Expr(:tuple, :(:foo)), Cvoid, svec(Ptr{Float64}), 0, :(:ccall), Expr(:foreigncall, Expr(:tuple, :(:jl_array_ptr)), Ptr{Float64}, svec(Any), 0, :(:ccall), :(A)), :(A)))
 ```
 The last `:(A)`, is an extra argument list inserted during lowering that informs
 the code generator which Julia level values need to be kept alive for the
diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md
index bef1419b1c8f8..b5cbe68c46435 100644
--- a/doc/src/devdocs/locks.md
+++ b/doc/src/devdocs/locks.md
@@ -3,177 +3,232 @@
 The following strategies are used to ensure that the code is dead-lock free (generally by addressing
 the 4th Coffman condition: circular wait).
 
-> 1. structure code such that only one lock will need to be acquired at a time
-> 2. always acquire shared locks in the same order, as given by the table below
-> 3. avoid constructs that expect to need unrestricted recursion
-
-## Locks
-
-Below are all of the locks that exist in the system and the mechanisms for using them that avoid
-the potential for deadlocks (no Ostrich algorithm allowed here):
-
-The following are definitely leaf locks (level 1), and must not try to acquire any other lock:
-
->   * safepoint
->
->     > Note that this lock is acquired implicitly by `JL_LOCK` and `JL_UNLOCK`. use the `_NOGC` variants
->     > to avoid that for level 1 locks.
->     >
->     > While holding this lock, the code must not do any allocation or hit any safepoints. Note that
->     > there are safepoints when doing allocation, enabling / disabling GC, entering / restoring exception
->     > frames, and taking / releasing locks.
->   * shared_map
->   * finalizers
->   * pagealloc
->   * gc_perm_lock
->   * flisp
->   * jl_in_stackwalk (Win32)
->   * ResourcePool<?>::mutex
->   * RLST_mutex
->   * jl_locked_stream::mutex
->   * debuginfo_asyncsafe
->   * inference_timing_mutex
->   * ExecutionEngine::SessionLock
->
->     > flisp itself is already threadsafe, this lock only protects the `jl_ast_context_list_t` pool
->     > likewise, the ResourcePool<?>::mutexes just protect the associated resource pool
-
-The following is a leaf lock (level 2), and only acquires level 1 locks (safepoint) internally:
-
->   * typecache
->   * Module->lock
->   * JLDebuginfoPlugin::PluginMutex
->   * newly_inferred_mutex
-
-The following is a level 3 lock, which can only acquire level 1 or level 2 locks internally:
-
->   * Method->writelock
-
-The following is a level 4 lock, which can only recurse to acquire level 1, 2, or 3 locks:
-
->   * MethodTable->writelock
+1. structure code such that only one lock will need to be acquired at a time
+2. always acquire shared locks in the same order, as given by the table below
+3. avoid constructs that expect to need unrestricted recursion
+
+## Types of locks
+
+`uv_mutex_t` (or `std::mutex`) is a wrapper around platform-specific locks
+(`pthread_mutex_t` on Unix, `CRITICAL_SECTION` on Windows).  It may cause the
+current OS thread to block, is not reentrant, and is not a safepoint.
+
+`jl_mutex_t` is a reentrant spinlock.  `jl_mutex_t`s acquired in a `try` block
+will be unlocked when we leave the block, either by reaching the end or catching
+an exception.  `JL_LOCK`/`JL_UNLOCK` are safepoints, while
+`JL_LOCK_NOGC`/`JL_UNLOCK_NOGC` are not.  `jl_mutex_t` must not be held across
+task switches.
+
+## Lock hierarchy
+
+Below are all of the locks that exist in the system and the mechanisms for using
+them that avoid the potential for deadlocks (no Ostrich algorithm allowed here).
+Except in the special cases where a rule for avoiding deadlock is given, no lock
+of a lower level may acquire a lock at a higher level.
+
+### Level 1
+
+No other lock may be acquired when one of these locks is held.  As a result, the
+code must not do any allocation or hit any safepoints. Note that there are
+safepoints when doing allocation, enabling/disabling GC, entering/restoring
+exception frames, and taking/releasing locks.
+
+* `safepoint_lock` (`uv_mutex_t`)
+  !!! danger
+
+      This lock is acquired implicitly by `JL_LOCK` and `JL_UNLOCK`. Use the
+      `_NOGC` variants to avoid that for level 1 locks.
+
+* `shared_map_lock.mtx` (`uv_mutex_t`)
+* `finalizers_lock` (`jl_mutex_t`)
+* `gc_pages_lock` (`uv_mutex_t`)
+* `gc_perm_lock` (`uv_mutex_t`)
+* `gc_queue_observer_lock` (`uv_mutex_t`)
+* `gc_threads_lock` (`uv_mutex_t`)
+* `flisp_lock` (`uv_mutex_t`)
+  !!! note
+      flisp itself is already threadsafe; this lock only protects the
+      `jl_ast_context_list_t` pool.  Likewise, the `ResourcePool<?>::mutexes`
+      just protect the associated resource pool.
+
+* `ResourcePool<?>.mutex` (`std::mutex`)
+* `RLST_mutex` (`std::mutex`)
+* `llvm_printing_mutex` (`std::mutex`)
+* `jl_locked_stream.mutex` (`std::mutex`)
+* `debuginfo_asyncsafe` (`uv_rwlock_t`) (can still acquire `jl_in_stackwalk` (`uv_mutex_t`, Win32 only))
+* `profile_show_peek_cond_lock` (`jl_mutex_t`)
+* `trampoline_lock` (`uv_mutex_t`)
+* `bt_data_prof_lock` (`uv_mutex_t`)
+* `jl_ptls_t.sleep_lock` (`uv_mutex_t`)
+* `tls_lock` (`uv_mutex_t`)
+* `page_profile_lock` (`uv_mutex_t`)
+* `symtab_lock` (`uv_mutex_t`)
+* `engine_lock` (`std::mutex`)
+
+### Level 2
+
+* `global_roots_lock`
+* `jl_module_t.lock`
+* `newly_inferred_mutex`
+* `JLDebuginfoPlugin.PluginMutex` (`std::mutex`)
+* `precompile_field_replace_lock`
+* `live_tasks_lock` (`uv_mutex_t`)
+* `heapsnapshot_lock`
+* `jitlock`
+* `jl_safepoint_suspend_all_threads` and `jl_safepoint_resume_all_threads`
+  !!! note
+      Inside a region protected by these functions, all other threads are
+      blocked inside a safepoint.  It is unsafe to take locks that may safepoint
+      in this region.
+
+### Level 3
+
+* `jl_method_t.writelock`
+* `typecache_lock`
+* `libmap_lock`
+
+### Level 4
+
+* `jl_methcache_t.writelock`
+
+### Level 5
+
+* `jl_methtable_t.writelock`
+
+### Level 6
 
 No Julia code may be called while holding a lock above this point.
 
-orc::ThreadSafeContext (TSCtx) locks occupy a special spot in the locking hierarchy. They are used to
-protect LLVM's global non-threadsafe state, but there may be an arbitrary number of them. By default,
-all of these locks may be treated as level 5 locks for the purposes of comparing with the rest of the
-hierarchy. Acquiring a TSCtx should only be done from the JIT's pool of TSCtx's, and all locks on
-that TSCtx should be released prior to returning it to the pool. If multiple TSCtx locks must be
-acquired at the same time (due to recursive compilation), then locks should be acquired in the order
-that the TSCtxs were borrowed from the pool.
+* `world_counter_lock`
 
-The following is a level 5 lock
+### Level 7
 
->   * JuliaOJIT::EmissionMutex
+* `JuliaOJIT::EmissionMutex` (`std::recursive_mutex`)
 
-The following are a level 6 lock, which can only recurse to acquire locks at lower levels:
+* `jl_modules_mutex`
 
->   * codegen
->   * jl_modules_mutex
+* `jl_uv_mutex` (known as `iolock` from Julia)
+  !!! danger
+      Doing any I/O (for example, printing warning messages or debug information)
+      while holding any other lock listed above may result in pernicious and
+      hard-to-find deadlocks.
 
-The following is an almost root lock (level end-1), meaning only the root look may be held when
-trying to acquire it:
+* Individual `ThreadSynchronizer` locks
+  !!! danger
+      This may continue to be held after releasing the iolock, or acquired
+      without it, but be very careful to never attempt to acquire the iolock
+      while holding it.
 
->   * typeinf
->
->     > this one is perhaps one of the most tricky ones, since type-inference can be invoked from many
->     > points
->     >
->     > currently the lock is merged with the codegen lock, since they call each other recursively
+* `Libdl.LazyLibrary.lock` (`ReentrantLock`)
 
-The following lock synchronizes IO operation. Be aware that doing any I/O (for example,
-printing warning messages or debug information) while holding any other lock listed above
-may result in pernicious and hard-to-find deadlocks. BE VERY CAREFUL!
+* `orc::ThreadSafeContext`
 
->   * iolock
->   * Individual ThreadSynchronizers locks
->
->     > this may continue to be held after releasing the iolock, or acquired without it,
->     > but be very careful to never attempt to acquire the iolock while holding it
+* `cfun_lock`
 
+### Level 8
 
-The following is the root lock, meaning no other lock shall be held when trying to acquire it:
+* `precomp_statement_out_lock`
+* `dispatch_statement_out_lock`
 
->   * toplevel
->
->     > this should be held while attempting a top-level action (such as making a new type or defining
->     > a new method): trying to obtain this lock inside a staged function will cause a deadlock condition!
->     >
->     >
->     > additionally, it's unclear if *any* code can safely run in parallel with an arbitrary toplevel
->     > expression, so it may require all threads to get to a safepoint first
+## Exceptions to the lock hierarchy
 
-## Broken Locks
+Ordinarily, it is forbidden to acquire locks of equal level to a lock already
+held.  In these specific cases we use a special protocol for acquiring locks at
+the same level:
 
-The following locks are broken:
-
-  * toplevel
-
-    > doesn't exist right now
-    >
-    > fix: create it
-
-  * Module->lock
-
-    > This is vulnerable to deadlocks since it can't be certain it is acquired in sequence.
-    > Some operations (such as `import_module`) are missing a lock.
-    >
-    > fix: replace with `jl_modules_mutex`?
-
-  * loading.jl: `require` and `register_root_module`
-
-    > This file potentially has numerous problems.
-    >
-    > fix: needs locks
-
-## Shared Global Data Structures
-
-These data structures each need locks due to being shared mutable global state. It is the inverse
-list for the above lock priority list. This list does not include level 1 leaf resources due to
-their simplicity.
-
-MethodTable modifications (def, cache) : MethodTable->writelock
+- `jl_method_t.writelock`
 
-Type declarations : toplevel lock
+  Invalidation acquires the lock for every method during its depth-first search
+  for backedges.  To avoid deadlocks, we must already hold `world_counter_lock`
+  before acquiring multiple `jl_method_t.writelock`s.
 
-Type application : typecache lock
+### Broken locks
 
-Global variable tables : Module->lock
-
-Module serializer : toplevel lock
-
-JIT & type-inference : codegen lock
-
-MethodInstance/CodeInstance updates : Method->writelock, codegen lock
-
->   * These are set at construction and immutable:
->       * specTypes
->       * sparam_vals
->       * def
-
->   * These are set by `jl_type_infer` (while holding codegen lock):
->       * cache
->       * rettype
->       * inferred
-        * valid ages
-
->   * `inInference` flag:
->       * optimization to quickly avoid recurring into `jl_type_infer` while it is already running
->       * actual state (of setting `inferred`, then `fptr`) is protected by codegen lock
-
->   * Function pointers:
->       * these transition once, from `NULL` to a value, while the codegen lock is held
->
->   * Code-generator cache (the contents of `functionObjectsDecls`):
->       * these can transition multiple times, but only while the codegen lock is held
->       * it is valid to use old version of this, or block for new versions of this, so races are benign,
->         as long as the code is careful not to reference other data in the method instance (such as `rettype`)
->         and assume it is coordinated, unless also holding the codegen lock
->
-LLVMContext : codegen lock
-
-Method : Method->writelock
+The following locks are broken:
 
-  * roots array (serializer and codegen)
-  * invoke / specializations / tfunc modifications
+* `loading.jl`: `require` and `register_root_module`
+
+   This file potentially has numerous problems. (fix: needs locks)
+
+## Updates to the world counter
+
+Thanks to the [world age](@ref man-world-age) mechanism, Julia can allow the
+replacement of both methods and bindings, yet remain amenable to optimization.
+Every compiled `CodeInstance` has a range of valid world ages; we could
+conservatively assume all CIs are stale after a world age increment.  However,
+to avoid spurious recompilation, we track dependencies, called "edges", while
+maintaining the following invariant:
+
+For every published `CodeInstance`, either:
+- `min_world` and `max_world` are finite, and the CI is valid for every world
+  in that range.
+- `max_world` is ∞ (`-1`), and this CI is ready for invalidation, meaning
+  for every forward edge:
+  - If the edge is a `CodeInstance` that is invoked or inlined into this CI,
+    the edge's `MethodInstance` `backedge` array has an entry pointing back.
+  - If the edge is a `Binding`:
+      - If the binding is in another module, it has an entry for this CI in its
+        `backedges` array.
+      - If the binding is in the same module, the `Method` for this CI is in the
+        module's `scanned_methods` array.
+
+For example, the following code replaces a constant in another module, causing a
+chain of invalidations:
+```julia
+const c1 = 1
+module M const c2 = 2 end
+f() = getfield(M, :c2)
+g() = f() + c1
+
+g()                   # compile g
+
+@eval M const c2 = 3  # invalidate f, g
+g()                   # recompile g
+```
+
+After compiling the two versions of `g()`, the global cache looks like this:
+![Global cache state after invalidation](./img/invalidation-example.png)
+
+The maximum world age, `jl_world_counter`, is protected by the
+`world_counter_lock`.  Julia uses a form of optimistic concurrency control to
+allow type inference without holding `world_counter_lock`.
+
+Publishing a new method or binding follows these steps:
+- Acquire `world_counter_lock`.
+- Relaxed-load `jl_world_counter` and let `new_world = jl_world_counter + 1`.
+- Publish the new binding partitions or method table entries with world range
+  `[new_world, ∞)`.  This step is described in the section on the [lock free
+  data structures](@ref man-lock-free-data).
+- Release-store `new_world` to `jl_world_counter`.
+- Release `world_counter_lock`.
+
+Type inference proceeds like so:
+- Acquire-load `jl_world_counter` (call this `validation_world`).
+- Perform type inference in that world, reading the bindings and method table in
+  that world using the lock-free data structures.
+- Store back edges for every inferred `CodeInstance`:
+  - For non-local bindings, this acquires the binding's module's lock.
+  - For CIs, this acquires the method's lock.
+- Acquire `world_counter_lock`.
+- Relaxed-load `jl_world_counter` and compare it to `validation_world`:
+  - If it is different, leave the valid world ranges for the inferred CIs
+    unchanged.
+  - If it is unchanged, our optimism was rewarded.  We can promote all the
+    inferred CIs valid in `validation_world` to `[validation_world, ∞)` and rely
+    on the backedges for invalidation.
+- Release `world_counter_lock`.
+
+![Two threads doing type inference while another adds a method](./img/typeinf-promotion.png)
+
+In the above diagram, threads 1 and 2 are doing type inference (the dotted
+line), while thread 3 is activating a new method.  The solid boxes represent
+critical sections where the `world_counter_lock` is held.  `acq`, `rel`, and
+`read`, are acquire loads, release stores, and relaxed loads respectively.
+
+T1 promotes its CI in time, but T2 takes too long, blocking on
+`world_counter_lock` until T3 has finished publishing the new method and
+incrementing the world counter.  It reads `W+1` and fails to promote its CI,
+leaving it with a maximum world of `W`.
+
+## [Lock free data structures](@id man-lock-free-data)
+TODO
diff --git a/doc/src/devdocs/meta.md b/doc/src/devdocs/meta.md
index 7a58578b3e53e..7b37ceaad068d 100644
--- a/doc/src/devdocs/meta.md
+++ b/doc/src/devdocs/meta.md
@@ -2,7 +2,7 @@
 
 In some circumstances, one might wish to provide hints or instructions that a given block of code
 has special properties: you might always want to inline it, or you might want to turn on special
-compiler optimization passes.  Starting with version 0.4, Julia has a convention that these instructions
+compiler optimization passes. Starting with version 0.4, Julia has a convention that these instructions
 can be placed inside a `:meta` expression, which is typically (but not necessarily) the first
 expression in the body of a function.
 
@@ -34,9 +34,8 @@ quote
 end
 ```
 
-`Base.pushmeta!(ex, :symbol, args...)` appends `:symbol` to the end of the `:meta` expression,
-creating a new `:meta` expression if necessary. If `args` is specified, a nested expression containing
-`:symbol` and these arguments is appended instead, which can be used to specify additional information.
+`Base.pushmeta!(ex, tag::Union{Symbol,Expr})` appends `:tag` to the end of the `:meta` expression,
+creating a new `:meta` expression if necessary.
 
 To use the metadata, you have to parse these `:meta` expressions. If your implementation can be
 performed within Julia, `Base.popmeta!` is very handy: `Base.popmeta!(body, :symbol)` will scan
diff --git a/doc/src/devdocs/object.md b/doc/src/devdocs/object.md
index caba6c3f12190..a9ec5f1aa256c 100644
--- a/doc/src/devdocs/object.md
+++ b/doc/src/devdocs/object.md
@@ -92,7 +92,7 @@ The corresponding global `jl_datatype_t` objects are created by [`jl_init_types`
 
 The garbage collector uses several bits from the metadata portion of the `jl_typetag_t` to track
 each object in the system. Further details about this algorithm can be found in the comments of
-the [garbage collector implementation in `gc.c`](https://github.com/JuliaLang/julia/blob/master/src/gc.c).
+the [garbage collector implementation in `gc-stock.c`](https://github.com/JuliaLang/julia/blob/master/src/gc-stock.c).
 
 ## Object allocation
 
@@ -155,7 +155,7 @@ jl_sym_t *jl_symbol(const char *str);
 Functions and MethodInstance:
 
 ```c
-jl_function_t *jl_new_generic_function(jl_sym_t *name);
+jl_value_t *jl_new_generic_function(jl_sym_t *name);
 jl_method_instance_t *jl_new_method_instance(jl_value_t *ast, jl_tuple_t *sparams);
 ```
 
@@ -163,11 +163,8 @@ Arrays:
 
 ```c
 jl_array_t *jl_new_array(jl_value_t *atype, jl_tuple_t *dims);
-jl_array_t *jl_new_arrayv(jl_value_t *atype, ...);
 jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr);
-jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr, size_t nc);
-jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr, size_t nc, size_t z);
-jl_array_t *jl_alloc_vec_any(size_t n);
+jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims);
 ```
 
 Note that many of these have alternative allocation functions for various special-purposes. The
@@ -182,7 +179,7 @@ jl_value_t *newstruct(jl_value_t *type);
 jl_value_t *newobj(jl_value_t *type, size_t nfields);
 ```
 
-And at the lowest level, memory is getting allocated by a call to the garbage collector (in `gc.c`),
+And at the lowest level, memory is getting allocated by a call to the garbage collector (in `gc-stock.c`),
 then tagged with its type:
 
 ```c
diff --git a/doc/src/devdocs/offset-arrays.md b/doc/src/devdocs/offset-arrays.md
index cc647eb1bd464..9a234288c6097 100644
--- a/doc/src/devdocs/offset-arrays.md
+++ b/doc/src/devdocs/offset-arrays.md
@@ -2,7 +2,7 @@
 
 Conventionally, Julia's
 arrays are indexed starting at 1, whereas some other languages start numbering at 0, and yet others
-(e.g., Fortran) allow you to specify arbitrary starting indices.  While there is much merit in
+(e.g., Fortran) allow you to specify arbitrary starting indices. While there is much merit in
 picking a standard (i.e., 1 for Julia), there are some algorithms which simplify considerably
 if you can index outside the range `1:size(A,d)` (and not just `0:size(A,d)-1`, either).
 To facilitate such computations, Julia supports arrays with arbitrary indices.
@@ -57,8 +57,8 @@ the cause try running julia with the option `--check-bounds=yes`.)
 ### Using `axes` for bounds checks and loop iteration
 
 `axes(A)` (reminiscent of `size(A)`) returns a tuple of `AbstractUnitRange{<:Integer}` objects, specifying
-the range of valid indices along each dimension of `A`.  When `A` has unconventional indexing,
-the ranges may not start at 1.  If you just want the range for a particular dimension `d`, there
+the range of valid indices along each dimension of `A`. When `A` has unconventional indexing,
+the ranges may not start at 1. If you just want the range for a particular dimension `d`, there
 is `axes(A, d)`.
 
 Base implements a custom range type, `OneTo`, where `OneTo(n)` means the same thing as `1:n` but
@@ -102,7 +102,7 @@ a convenient way of producing an all-zeros array that matches the indices of A i
 
 Let's walk through a couple of explicit examples. First, if `A` has conventional indices, then
 `similar(Array{Int}, axes(A))` would end up calling `Array{Int}(undef, size(A))`, and thus return
-an array.  If `A` is an `AbstractArray` type with unconventional indexing, then `similar(Array{Int}, axes(A))`
+an array. If `A` is an `AbstractArray` type with unconventional indexing, then `similar(Array{Int}, axes(A))`
 should return something that "behaves like" an `Array{Int}` but with a shape (including indices)
 that matches `A`.  (The most obvious implementation is to allocate an `Array{Int}(undef, size(A))` and
 then "wrap" it in a type that shifts the indices.)
@@ -118,7 +118,7 @@ This page focuses on the steps needed to define unconventional indexing.
 ### Custom `AbstractUnitRange` types
 
 If you're writing a non-1 indexed array type, you will want to specialize `axes` so it returns
-a `UnitRange`, or (perhaps better) a custom `AbstractUnitRange`.  The advantage of a custom type
+a `UnitRange`, or (perhaps better) a custom `AbstractUnitRange`. The advantage of a custom type
 is that it "signals" the allocation type for functions like `similar`. If we're writing an array
 type for which indexing will start at 0, we likely want to begin by creating a new `AbstractUnitRange`,
 `ZeroRange`, where `ZeroRange(n)` is equivalent to `0:n-1`.
@@ -150,7 +150,7 @@ axes(A::AbstractArray{T,N}, d) where {T,N} = d <= N ? axes(A)[d] : OneTo(1)
 ```
 
 may not be what you want: you may need to specialize it to return something other than `OneTo(1)`
-when `d > ndims(A)`.  Likewise, in `Base` there is a dedicated function `axes1` which is equivalent
+when `d > ndims(A)`. Likewise, in `Base` there is a dedicated function `axes1` which is equivalent
 to `axes(A, 1)` but which avoids checking (at runtime) whether `ndims(A) > 0`. (This is purely
 a performance optimization.)  It is defined as:
 
diff --git a/doc/src/devdocs/pkgimg.md b/doc/src/devdocs/pkgimg.md
index d9fc1a33a4d24..0bc28b07b0c29 100644
--- a/doc/src/devdocs/pkgimg.md
+++ b/doc/src/devdocs/pkgimg.md
@@ -9,7 +9,7 @@ In fact the underlying serialization format is the same, and the system image is
 Package images are shared libraries that contain both code and data. Like `.ji` cache files, they are generated per package. The data section contains both global data (global variables in the package) as well as the necessary metadata about what methods and types are defined by the package. The code section contains native objects that cache the final output of Julia's LLVM-based compiler.
 
 The command line option `--pkgimages=no` can be used to turn off object caching for this session. Note that this means that cache files have to likely be regenerated.
-See [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@ref env-max-num-precompile-files) for the upper limit of variants Julia caches per default.
+See [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@ref JULIA_MAX_NUM_PRECOMPILE_FILES) for the upper limit of variants Julia caches per default.
 
 !!! note
     While the package images present themselves as native shared libraries, they are only an approximation thereof. You will not be able to link against them from a native program and they must be loaded from Julia.
@@ -17,7 +17,7 @@ See [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@ref env-max-num-precompile-files) for th
 
 ## Linking
 
-Since the package images contain native code, we must run a linker over them before we can use them. You can set the environment variable `JULIA_VERBOSE_LINKING` to `true` to make the package image linking process verbose.
+Since the package images contain native code, we must run a linker over them before we can use them. You can set the environment variable [`JULIA_VERBOSE_LINKING`](@ref JULIA_VERBOSE_LINKING) to `true` to make the package image linking process verbose.
 
 Furthermore, we cannot assume that the user has a working system linker installed. Therefore, Julia ships with LLD, the LLVM linker, to provide a working out of the box experience. In `base/linking.jl`, we implement a limited interface to be able to link package images on all supported platforms.
 
@@ -33,8 +33,10 @@ Dynamic libraries on macOS need to link against `-lSystem`. On recent macOS vers
 To that effect we link with `-undefined dynamic_lookup`.
 
 ## [Package images optimized for multiple microarchitectures](@id pkgimgs-multi-versioning)
-Similar to [multi-versioning](@ref sysimg-multi-versioning) for system images, package images support multi-versioning. If you are in a heterogenous environment, with a unified cache,
-you can set the environment variable `JULIA_CPU_TARGET=generic` to multi-version the object caches.
+
+Similar to [multi-versioning](@ref sysimg-multi-versioning) for system images, package images support multi-versioning. This allows creating package caches that can run efficiently on different CPU architectures within the same environment.
+
+See the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable for more information on how to set the CPU target for package images.
 
 ## Flags that impact package image creation and selection
 
diff --git a/doc/src/devdocs/precompile_hang.md b/doc/src/devdocs/precompile_hang.md
new file mode 100644
index 0000000000000..279ffec5360e8
--- /dev/null
+++ b/doc/src/devdocs/precompile_hang.md
@@ -0,0 +1,98 @@
+# Fixing precompilation hangs due to open tasks or IO
+
+On Julia 1.10 or higher, you might see the following message:
+
+![Screenshot of precompilation hang](./img/precompilation_hang.png)
+
+This may repeat. If it continues to repeat with no hints that it will
+resolve itself, you may have a "precompilation hang" that requires
+fixing. Even if it's transient, you might prefer to resolve it so that
+users will not be bothered by this warning. This page walks you
+through how to analyze and fix such issues.
+
+If you follow the advice and hit `Ctrl-C`, you might see
+
+```
+^C Interrupted: Exiting precompilation...
+
+  1 dependency had warnings during precompilation:
+┌ Test1 [ac89d554-e2ba-40bc-bc5c-de68b658c982]
+│  [pid 2745] Waiting for background task / IO / timer to finish:
+│   Handle type        uv_handle_t->data
+│   timer              0x55580decd1e0->0x7f94c3a4c340
+```
+
+This message conveys two key pieces of information:
+
+- the hang is occurring during precompilation of `Test1`, a dependency of `Test2` (the package we were trying to load with `using Test2`)
+- during precompilation of `Test1`, Julia created a `Timer` object (use `?Timer` if you're unfamiliar with Timers) which is still open; until that closes, the process is hung
+
+If this is enough of a hint for you to figure out how `timer = Timer(args...)` is being created, one good solution is to add `wait(timer)` if `timer` eventually finishes on its own, or `close(timer)` if you need to force-close it, before the final `end` of the module.
+
+However, there are cases that may not be that straightforward. Usually the best option is to start by determining whether the hang is due to code in Test1 or whether it is due to one of Test1's dependencies:
+
+- Option 1: `Pkg.add("Aqua")` and use [`Aqua.test_persistent_tasks`](https://juliatesting.github.io/Aqua.jl/dev/#Aqua.test_persistent_tasks-Tuple{Base.PkgId}). This should help you identify which package is causing the problem, after which the instructions [below](@ref pchang_fix) should be followed. If needed, you can create a `PkgId` as `Base.PkgId(UUID("..."), "Test1")`, where `...` comes from the `uuid` entry in `Test1/Project.toml`.
+- Option 2: manually diagnose the source of the hang.
+
+To manually diagnose:
+
+1. `Pkg.develop("Test1")`
+2. Comment out all the code `include`d or defined in `Test1`, *except* the `using/import` statements.
+3. Try `using Test2` (or even `using Test1` assuming that hangs too) again
+
+Now we arrive at a fork in the road: either
+
+- the hang persists, indicating it is [due to one of your dependencies](@ref pchang_deps)
+- the hang disappears, indicating that it is [due to something in your code](@ref pchang_fix).
+
+## [Diagnosing and fixing hangs due to a package dependency](@id pchang_deps)
+
+Use a binary search to identify the problematic dependency: start by commenting out half your dependencies, then when you isolate which half is responsible comment out half of that half, etc. (You don't have to remove them from the project, just comment out the `using`/`import` statements.)
+
+Once you've identified a suspect (here we'll call it `ThePackageYouThinkIsCausingTheProblem`), first try precompiling that package. If it also hangs during precompilation, continue chasing the problem backwards.
+
+However, most likely `ThePackageYouThinkIsCausingTheProblem` will precompile fine. This suggests it's in the function `ThePackageYouThinkIsCausingTheProblem.__init__`, which does not run during precompilation of `ThePackageYouThinkIsCausingTheProblem` but *does* in any package that loads `ThePackageYouThinkIsCausingTheProblem`. To test this theory, set up a minimal working example (MWE), something like
+
+```julia
+(@v1.10) pkg> generate MWE
+  Generating  project MWE:
+    MWE\Project.toml
+    MWE\src\MWE.jl
+```
+
+where the source code of `MWE.jl` is
+
+```julia
+module MWE
+using ThePackageYouThinkIsCausingTheProblem
+end
+```
+
+and you've added `ThePackageYouThinkIsCausingTheProblem` to MWE's dependencies.
+
+If that MWE reproduces the hang, you've found your culprit:
+`ThePackageYouThinkIsCausingTheProblem.__init__` must be creating the `Timer` object. If the timer object can be safely `close`d, that's a good option. Otherwise, the most common solution is to avoid creating the timer while *any* package is being precompiled: add
+
+```julia
+ccall(:jl_generating_output, Cint, ()) == 1 && return nothing
+```
+
+as the first line of `ThePackageYouThinkIsCausingTheProblem.__init__`, and it will avoid doing any initialization in any Julia process whose purpose is to precompile packages.
+
+## [Fixing package code to avoid hangs](@id pchang_fix)
+
+Search your package for suggestive words (here like "Timer") and see if you can identify where the problem is being created. Note that a method *definition* like
+
+```julia
+maketimer() = Timer(timer -> println("hi"), 0; interval=1)
+```
+
+is not problematic in and of itself: it can cause this problem only if `maketimer` gets called while the module is being defined. This might be happening from a top-level statement such as
+
+```julia
+const GLOBAL_TIMER = maketimer()
+```
+
+or it might conceivably occur in a [precompile workload](https://github.com/JuliaLang/PrecompileTools.jl).
+
+If you struggle to identify the causative lines, then consider doing a binary search: comment out sections of your package (or `include` lines to omit entire files) until you've reduced the problem in scope.
diff --git a/doc/src/devdocs/probes.md b/doc/src/devdocs/probes.md
index d15723e945462..b9616da513069 100644
--- a/doc/src/devdocs/probes.md
+++ b/doc/src/devdocs/probes.md
@@ -137,8 +137,8 @@ fib(x) = x <= 1 ? 1 : fib(x-1) + fib(x-2)
 beaver = @spawn begin
     while true
         fib(30)
-        # This safepoint is necessary until #41616, since otherwise this
-        # loop will never yield to GC.
+        # A manual safepoint is necessary since otherwise this loop
+        # may never yield to GC.
         GC.safepoint()
     end
 end
@@ -177,8 +177,8 @@ It's sometimes useful to know when a task is spawning other tasks. This is very
 easy to see with `rt__new__task`. The first argument to the probe, `parent`, is
 the existing task which is creating a new task. This means that if you know the
 address of the task you want to monitor, you can easily just look at the tasks
-that that specific task spawned. Let's see how to do this; first let's start a
-Julia session and get the PID and REPL's task address:
+that were spawned by that specific task. Let's see how to do this; first let's
+start a Julia session and get the PID and REPL's task address:
 
 ```
 > julia
@@ -188,7 +188,7 @@ Julia session and get the PID and REPL's task address:
    _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
   | | | | | | |/ _` |  |
   | | |_| | | | (_| |  |  Version 1.6.2 (2021-07-14)
- _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
+ _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org release
 |__/                   |
 
 1> getpid()
@@ -206,7 +206,7 @@ Now we can start `bpftrace` and have it monitor `rt__new__task` for *only* this
 
 And if we spawn a single task:
 
-`@async 1+1`
+`Threads.@spawn 1+1`
 
 we see this task being created:
 
@@ -215,8 +215,8 @@ we see this task being created:
 However, if we spawn a bunch of tasks from that newly-spawned task:
 
 ```julia
-@async for i in 1:10
-   @async 1+1
+Threads.@spawn for i in 1:10
+   Threads.@spawn 1+1
 end
 ```
 
@@ -264,7 +264,7 @@ We can see this problem illustrated with `bpftrace` quite easily. First, in one
    _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
   | | | | | | |/ _` |  |
   | | |_| | | | (_| |  |  Version 1.6.2 (2021-07-14)
- _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
+ _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org release
 |__/                   |
 
 1> getpid()
diff --git a/doc/src/devdocs/require.md b/doc/src/devdocs/require.md
index 5198a7425ee49..9f824e78a8653 100644
--- a/doc/src/devdocs/require.md
+++ b/doc/src/devdocs/require.md
@@ -7,26 +7,22 @@ precompilation cache. It is the implementation of the `import` statement.
 The features below are experimental and not part of the stable Julia API.
 Before building upon them inform yourself about the current thinking and whether they might change soon.
 
-### Module loading callbacks
+### Package loading callbacks
 
-It is possible to listen to the modules loaded by `Base.require`, by registering a callback.
+It is possible to listen to the packages loaded by `Base.require`, by registering a callback.
 
 ```julia
-loaded_packages = Channel{Symbol}()
-callback = (mod::Symbol) -> put!(loaded_packages, mod)
+loaded_packages = Base.PkgId[]
+callback = (pkg::Base.PkgId) -> push!(loaded_packages, pkg)
 push!(Base.package_callbacks, callback)
 ```
 
-Please note that the symbol given to the callback is a non-unique identifier and
-it is the responsibility of the callback provider to walk the module chain to
-determine the fully qualified name of the loaded binding.
+Using this would look something like:
 
-The callback below is an example of how to do that:
+```julia-repl
+julia> using Example
 
-```julia
-# Get the fully-qualified name of a module.
-function module_fqn(name::Symbol)
-    fqn = fullname(Base.root_module(name))
-    return join(fqn, '.')
-end
+julia> loaded_packages
+1-element Vector{Base.PkgId}:
+ Example [7876af07-990d-54b4-ab0e-23690620f79a]
 ```
diff --git a/doc/src/devdocs/sanitizers.md b/doc/src/devdocs/sanitizers.md
index 5eaf4b45d9f57..90d9c833052cd 100644
--- a/doc/src/devdocs/sanitizers.md
+++ b/doc/src/devdocs/sanitizers.md
@@ -21,14 +21,14 @@ If you require customization or further detail, see the documentation below.
 
 ## General considerations
 
-Using Clang's sanitizers obviously requires you to use Clang (`USECLANG=1`), but there's another
+Using Clang's sanitizers obviously requires you to use Clang, but there's another
 catch: most sanitizers require a run-time library, provided by the host compiler, while the instrumented
 code generated by Julia's JIT relies on functionality from that library. This implies that the
 LLVM version of your host compiler must match that of the LLVM library used within Julia.
 
 An easy solution is to have a dedicated build folder for providing a matching toolchain, by building
 with `BUILD_LLVM_CLANG=1`. You can then refer to this toolchain from another build
-folder by specifying `USECLANG=1` while overriding the `CC` and `CXX` variables.
+folder by overriding the `CC` and `CXX` variables.
 
 The sanitizers error out when they detect a shared library being opened using `RTLD_DEEPBIND`
 (ref: [google/sanitizers#611](https://github.com/google/sanitizers/issues/611)).
@@ -36,7 +36,7 @@ Since [libblastrampoline](https://github.com/staticfloat/libblastrampoline) by d
 uses `RTLD_DEEPBIND`, we need to set the environment variable `LBT_USE_RTLD_DEEPBIND=0`
 when using a sanitizer.
 
-To use one of of the sanitizers set `SANITIZE=1` and then the appropriate flag for the sanitizer you
+To use one of the sanitizers set `SANITIZE=1` and then the appropriate flag for the sanitizer you
 want to use.
 
 On macOS, this might need some extra flags also to work. Altogether, it might
@@ -44,7 +44,7 @@ look like this, plus one or more of the `SANITIZE_*` flags listed below:
 
     make -C deps USE_BINARYBUILDER_LLVM=0 LLVM_VER=svn stage-llvm
 
-    make -C src SANITIZE=1 USECLANG=1 \
+    make -C src SANITIZE=1 \
         CC=~+/deps/scratch/llvm-svn/build_Release/bin/clang \
         CXX=~+/deps/scratch/llvm-svn/build_Release/bin/clang++ \
         CPPFLAGS="-isysroot $(xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk" \
@@ -99,7 +99,6 @@ Checkout a Git worktree (or create out-of-tree build directory) at
 TOOLCHAIN=$(TOOLCHAIN_WORKTREE)/usr/tools
 
 # use our new toolchain
-USECLANG=1
 override CC=$(TOOLCHAIN)/clang
 override CXX=$(TOOLCHAIN)/clang++
 export ASAN_SYMBOLIZER_PATH=$(TOOLCHAIN)/llvm-symbolizer
diff --git a/doc/src/devdocs/ssair.md b/doc/src/devdocs/ssair.md
index 6d3de6d1f5758..2eb065a62e4bf 100644
--- a/doc/src/devdocs/ssair.md
+++ b/doc/src/devdocs/ssair.md
@@ -1,5 +1,53 @@
 # Julia SSA-form IR
 
+Julia uses a static single assignment intermediate representation ([SSA IR](https://en.wikipedia.org/wiki/Static_single-assignment_form)) to perform optimization.
+This IR is different from LLVM IR, and unique to Julia.
+It allows for Julia specific optimizations.
+
+1. Basic blocks (regions with no control flow) are explicitly annotated.
+2. if/else and loops are turned into `goto` statements.
+3. lines with multiple operations are split into multiple lines by introducing variables.
+
+For example the following Julia code:
+```julia
+function foo(x)
+    y = sin(x)
+    if x > 5.0
+        y = y + cos(x)
+    end
+    return exp(2) + y
+end
+```
+when called with a `Float64` argument is translated into:
+
+```julia
+using InteractiveUtils
+@code_typed foo(1.0)
+```
+
+```llvm
+CodeInfo(
+1 ─ %1 = invoke Main.sin(x::Float64)::Float64
+│   %2 = Base.lt_float(x, 5.0)::Bool
+└──      goto #3 if not %2
+2 ─ %4 = invoke Main.cos(x::Float64)::Float64
+└── %5 = Base.add_float(%1, %4)::Float64
+3 ┄ %6 = φ (#2 => %5, #1 => %1)::Float64
+│   %7 = Base.add_float(7.38905609893065, %6)::Float64
+└──      return %7
+) => Float64
+```
+
+In this example, we can see all of these changes.
+1. The first basic block is everything in
+```llvm
+1 ─ %1 = invoke Main.sin(x::Float64)::Float64
+│   %2 = Base.lt_float(x, 5.0)::Bool
+└──      goto #3 if not %2
+```
+2. The `if` statement is translated into `goto #3 if not %2` which goes to the 3rd basic block if `x>5` isn't met and otherwise goes to the second basic block.
+3. `%2` is an SSA value introduced to represent `x > 5`.
+
 ## Background
 
 Beginning in Julia 0.7, parts of the compiler use a new [SSA-form](https://en.wikipedia.org/wiki/Static_single_assignment_form)
@@ -11,11 +59,9 @@ linearized (i.e. turned into a form where function arguments could only be SSA v
 conditional control flow). This negated much of the usefulness of SSA form representation when performing
 middle end optimizations. Some heroic effort was put into making these optimizations work without a complete SSA
 form representation, but the lack of such a representation ultimately proved prohibitive.
+## Categories of IR nodes
 
-## New IR nodes
-
-With the new IR representation, the compiler learned to handle four new IR nodes, Phi nodes, Pi
-nodes as well as PhiC nodes and Upsilon nodes (the latter two are only used for exception handling).
+The SSA IR representation has four categories of IR nodes: Phi, Pi, PhiC, and Upsilon nodes (the latter two are only used for exception handling).
 
 ### Phi nodes and Pi nodes
 
@@ -37,6 +83,15 @@ may assume that any use of a Phi node will have an assigned value in the corresp
 for the mapping to be incomplete, i.e. for a Phi node to have missing incoming edges. In that case, it must
 be dynamically guaranteed that the corresponding value will not be used.
 
+Note that SSA uses semantically occur after the terminator of the corresponding predecessor ("on the edge").
+Consequently, if multiple Phi nodes appear at the start of a basic block, they are run simultaneously.
+This means that in the following IR snippet, if we came from block `23`, `%46` will take the value associated to
+`%45` _before_ we entered this block.
+```julia
+%45 = φ (#18 => %23, #23 => %50)
+%46 = φ (#18 => 1.0, #23 => %45)
+```
+
 PiNodes encode statically proven information that may be implicitly assumed in basic blocks dominated by a given
 pi node. They are conceptually equivalent to the technique introduced in the paper
 [ABCD: Eliminating Array Bounds Checks on Demand](https://dl.acm.org/citation.cfm?id=358438.349342) or the predicate info nodes in LLVM. To see how they work, consider,
@@ -144,7 +199,7 @@ The corresponding IR (with irrelevant types stripped) is:
 4 ┄ %13 = φᶜ (%3, %6, %9)::Bool
 │   %14 = φᶜ (%4, %7, %10)::Core.Compiler.MaybeUndef(Int64)
 │   %15 = φᶜ (%5)::Core.Const(1)
-└──       $(Expr(:leave, 1))
+└──       $(Expr(:leave, Core.SSAValue(2)))
 5 ─       $(Expr(:pop_exception, :(%2)))::Any
 │         $(Expr(:throw_undef_if_not, :y, :(%13)))::Any
 │   %19 = Core.tuple(%15, %14)
@@ -179,7 +234,7 @@ Instead, we do the following:
 - RAUW style operations are performed by setting the corresponding statement index to the replacement
   value.
 - Statements are erased by setting the corresponding statement to `nothing` (this is essentially just a special-case
-  convention of the above.
+  convention of the above).
 - If there are any uses of the statement being erased, they will be set to `nothing`.
 
 There is a `compact!` function that compacts the above data structure by performing the insertion of nodes in the appropriate place, trivial copy propagation, and renaming of uses to any changed SSA values. However, the clever part
diff --git a/doc/src/devdocs/stdio.md b/doc/src/devdocs/stdio.md
index 5ee4f0206ee0b..352420e25de77 100644
--- a/doc/src/devdocs/stdio.md
+++ b/doc/src/devdocs/stdio.md
@@ -36,7 +36,7 @@ Julia's `__init__()` function (in `base/sysimg.jl`) calls `reinit_stdio()` (in `
 to create Julia objects for [`Base.stdin`](@ref), [`Base.stdout`](@ref) and [`Base.stderr`](@ref).
 
 `reinit_stdio()` uses [`ccall`](@ref) to retrieve pointers to `JL_STD*` and calls `jl_uv_handle_type()`
-to inspect the type of each stream.  It then creates a Julia `Base.IOStream`, `Base.TTY` or `Base.PipeEndpoint`
+to inspect the type of each stream. It then creates a Julia `Base.IOStream`, `Base.TTY` or `Base.PipeEndpoint`
 object to represent each stream, e.g.:
 
 ```
@@ -63,7 +63,7 @@ stream.jl: function write(s::IO, p::Ptr, nb::Integer)
 ## printf() during initialization
 
 The libuv streams relied upon by `jl_printf()` etc., are not available until midway through
-initialization of the runtime (see `init.c`, `init_stdio()`).  Error messages or warnings that
+initialization of the runtime (see `init.c`, `init_stdio()`). Error messages or warnings that
 need to be printed before this are routed to the standard C library `fwrite()` function by the
 following mechanism:
 
diff --git a/doc/src/devdocs/subarrays.md b/doc/src/devdocs/subarrays.md
index cec7a64a65245..75b76bcb563a1 100644
--- a/doc/src/devdocs/subarrays.md
+++ b/doc/src/devdocs/subarrays.md
@@ -1,6 +1,6 @@
 # SubArrays
 
-Julia's `SubArray` type is a container encoding a "view" of a parent [`AbstractArray`](@ref).  This page
+Julia's `SubArray` type is a container encoding a "view" of a parent [`AbstractArray`](@ref). This page
 documents some of the design principles and implementation of `SubArray`s.
 
 One of the major design goals is to ensure high performance for views of both [`IndexLinear`](@ref) and
@@ -56,8 +56,8 @@ struct SubArray{T,N,P,I,L} <: AbstractArray{T,N}
 end
 ```
 
-`SubArray` has 5 type parameters.  The first two are the standard element type and dimensionality.
- The next is the type of the parent `AbstractArray`.  The most heavily-used is the fourth parameter,
+`SubArray` has 5 type parameters. The first two are the standard element type and dimensionality.
+ The next is the type of the parent `AbstractArray`. The most heavily-used is the fourth parameter,
 a `Tuple` of the types of the indices for each dimension. The final one, `L`, is only provided
 as a convenience for dispatch; it's a boolean that represents whether the index types support
 fast linear indexing. More on that later.
@@ -78,8 +78,8 @@ one to dispatch to efficient algorithms.
 ### Index translation
 
 Performing index translation requires that you do different things for different concrete `SubArray`
-types.  For example, for `S1`, one needs to apply the `i,j` indices to the first and third dimensions
-of the parent array, whereas for `S2` one needs to apply them to the second and third.  The simplest
+types. For example, for `S1`, one needs to apply the `i,j` indices to the first and third dimensions
+of the parent array, whereas for `S2` one needs to apply them to the second and third. The simplest
 approach to indexing would be to do the type-analysis at runtime:
 
 ```julia
@@ -161,7 +161,7 @@ julia> diff(A[2:2:4,:][:])
 ```
 
 A view constructed as `view(A, 2:2:4, :)` happens to have uniform stride, and therefore linear
-indexing indeed could be performed efficiently.  However, success in this case depends on the
+indexing indeed could be performed efficiently. However, success in this case depends on the
 size of the array: if the first dimension instead were odd,
 
 ```jldoctest
@@ -192,7 +192,7 @@ then `A[2:2:4,:]` does not have uniform stride, so we cannot guarantee efficient
     levels of indirection; they can simply re-compute the indices into the original parent array!
   * Hopefully by now it's fairly clear that supporting slices means that the dimensionality, given
     by the parameter `N`, is not necessarily equal to the dimensionality of the parent array or the
-    length of the `indices` tuple.  Neither do user-supplied indices necessarily line up with entries
+    length of the `indices` tuple. Neither do user-supplied indices necessarily line up with entries
     in the `indices` tuple (e.g., the second user-supplied index might correspond to the third dimension
     of the parent array, and the third element in the `indices` tuple).
 
diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md
index 40fcd3fa602f8..e8202736e57e1 100644
--- a/doc/src/devdocs/sysimg.md
+++ b/doc/src/devdocs/sysimg.md
@@ -3,15 +3,15 @@
 ## [Building the Julia system image](@id Building-the-Julia-system-image)
 
 Julia ships with a preparsed system image containing the contents of the `Base` module, named
-`sys.ji`.  This file is also precompiled into a shared library called `sys.{so,dll,dylib}` on
-as many platforms as possible, so as to give vastly improved startup times.  On systems that do
+`sys.ji`. This file is also precompiled into a shared library called `sys.{so,dll,dylib}` on
+as many platforms as possible, so as to give vastly improved startup times. On systems that do
 not ship with a precompiled system image file, one can be generated from the source files shipped
 in Julia's `DATAROOTDIR/julia/base` folder.
 
 Julia will by default generate its system image on half of the available system threads. This
-may be controlled by the [`JULIA_IMAGE_THREADS`](@ref env-image-threads) environment variable.
+may be controlled by the [`JULIA_IMAGE_THREADS`](@ref JULIA_IMAGE_THREADS) environment variable.
 
-This operation is useful for multiple reasons.  A user may:
+This operation is useful for multiple reasons. A user may:
 
   * Build a precompiled shared library system image on a platform that did not ship with one, thereby
     improving startup times.
@@ -34,7 +34,7 @@ based on available CPU features.
 ### Specifying multiple system image targets
 
 A multi-microarchitecture system image can be enabled by passing multiple targets
-during system image compilation. This can be done either with the `JULIA_CPU_TARGET` make option
+during system image compilation. This can be done either with the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) make option
 or with the `-C` command line option when running the compilation command manually.
 Multiple targets are separated by `;` in the option string.
 The syntax for each target is a CPU name followed by multiple features separated by `,`.
@@ -84,11 +84,11 @@ generic;sandybridge,-xsaveopt,clone_all;haswell,-rdrnd,base(1)
 This creates a system image with three separate targets; one for a generic `x86_64`
 processor, one with a `sandybridge` ISA (explicitly excluding `xsaveopt`) that explicitly
 clones all functions, and one targeting the `haswell` ISA, based off of the `sandybridge`
-sysimg version, and also excluding `rdrnd`.  When a Julia implementation loads the
+sysimg version, and also excluding `rdrnd`. When a Julia implementation loads the
 generated sysimg, it will check the host processor for matching CPU capability flags,
-enabling the highest ISA level possible.  Note that the base level (`generic`) requires
+enabling the highest ISA level possible. Note that the base level (`generic`) requires
 the `cx16` instruction, which is disabled in some virtualization software and must be
-enabled for the `generic` target to be loaded.  Alternatively, a sysimg could be generated
+enabled for the `generic` target to be loaded. Alternatively, a sysimg could be generated
 with the target `generic,-cx16` for greater compatibility, however note that this may cause
 performance and stability problems in some code.
 
@@ -117,3 +117,82 @@ See code comments for each components for more implementation details.
     depending on the ISA. The target selection will prefer exact CPU name match,
     larger vector register size, and larger number of features.
     An overview of this process is in `src/processor.cpp`.
+
+## Trimming
+
+System images are typically quite large, since Base includes a lot of functionality, and by
+default system images also include several packages such as LinearAlgebra for convenience
+and backwards compatibility. Most programs will use only a fraction of the functions in
+these packages. Therefore it makes sense to build binaries that exclude unused functions
+to save space, referred to as "trimming".
+
+While the basic idea of trimming is sound, Julia has dynamic and reflective features that make it
+difficult (or impossible) to know in general which functions are unused. As an extreme example,
+consider code like
+
+```
+getglobal(Base, Symbol(readchomp(stdin)))(1)
+```
+
+This code reads a function name from `stdin` and calls the named function from Base on the value
+`1`. In this case it is impossible to predict which function will be called, so no functions
+can reliably be considered "unused". With some noteworthy exceptions (Julia's own REPL being
+one of them), most real-world programs do not do things like this.
+
+Less extreme cases occur, for example, when there are type instabilities that make it impossible
+for the compiler to predict which method will be called. However, if code is well-typed and does
+not use reflection, a complete and (hopefully) relatively small set of needed methods can be
+determined, and the rest can be removed. The `--trim` command-line option requests this kind of
+compilation.
+
+When `--trim` is specified in a command used to build a system image, the compiler begins
+tracing calls starting at methods marked using `Base.Experimental.entrypoint`. If a call is too
+dynamic to reasonably narrow down the possible call targets, an error is given at compile
+time showing the location of the call. For testing purposes, it is possible to skip these
+errors by specifying `--trim=unsafe` or `--trim=unsafe-warn`. Then you will get a system
+image built, but it may crash at run time if needed code is not present.
+
+It typically makes sense to specify `--strip-ir` along with `--trim`, since trimmed binaries
+are fully compiled and therefore don't need Julia IR. At some point we may make `--trim` imply
+`--strip-ir`, but for now we have kept them orthogonal.
+
+To get the smallest possible binary, it will also help to specify `--strip-metadata` and
+run the Unix `strip` utility. However, those steps remove Julia-specific and native (DWARF format)
+debug info, respectively, and so will make debugging more difficult.
+
+### Common problems
+
+- The Base global variables `stdin`, `stdout`, and `stderr` are non-constant and so their
+  types are not known. All printing should use a specific IO object with a known type.
+  The easiest substitution is to use `print(Core.stdout, x)` instead of `print(x)` or
+  `print(stdout, x)`.
+- Use tools like [JET.jl](https://github.com/aviatesk/JET.jl),
+  [Cthulhu.jl](https://github.com/JuliaDebug/Cthulhu.jl), and/or
+  [SnoopCompile](https://github.com/timholy/SnoopCompile.jl)
+  to identify failures of type-inference, and follow our [Performance Tips](@ref) to fix them.
+
+### Compatibility concerns
+
+We have identified many small changes to Base that significantly increase the set of programs
+that can be reliably trimmed. Unfortunately some of those changes would be considered breaking,
+and so are only applied when trimming is requested (this is done by an external build script,
+currently maintained inside the test suite as `contrib/juliac/juliac-buildscript.jl`).
+Therefore in many cases trimming will require you to opt in to new variants of Base and some
+standard libraries.
+
+If you want to use trimming, it is important to set up continuous integration testing that
+performs a trimmed build and fully tests the resulting program.
+Fortunately, if your program successfully compiles with `--trim` then it is very likely to work
+the same as it did before. However, CI is needed to ensure that your program continues to build
+with trimming as you develop it.
+
+Package authors may wish to test that their package is "trimming safe", however this is impossible
+in general. Trimming is only expected to work given concrete entry points such as `main()` and
+library entry points meant to be called from outside Julia. For generic packages, existing tests
+for type stability like `@inferred` and `JET.@report_call` are about as close as you can get to checking
+trim compatibility.
+
+Trimming also introduces new compatibility issues between minor versions of Julia. At this time,
+we are not able to guarantee that a program that can be trimmed in one version of Julia
+can also be trimmed in all future versions of Julia. However, breakage of that kind is expected
+to be rare. We also plan to try to *increase* the set of programs that can be trimmed over time.
diff --git a/doc/src/devdocs/types.md b/doc/src/devdocs/types.md
index c3afc26600c65..8344cd90da9af 100644
--- a/doc/src/devdocs/types.md
+++ b/doc/src/devdocs/types.md
@@ -1,6 +1,6 @@
 # More about types
 
-If you've used Julia for a while, you understand the fundamental role that types play.  Here we
+If you've used Julia for a while, you understand the fundamental role that types play. Here we
 try to get under the hood, focusing particularly on [Parametric Types](@ref).
 
 ## Types and sets (and `Any` and `Union{}`/`Bottom`)
@@ -52,7 +52,7 @@ julia> typejoin(Tuple{Integer, Float64}, Tuple{Int, Real})
 Tuple{Integer, Real}
 ```
 
-While these operations may seem abstract, they lie at the heart of Julia.  For example, method
+While these operations may seem abstract, they lie at the heart of Julia. For example, method
 dispatch is implemented by stepping through the items in a method list until reaching one for which
 the type of the argument tuple is a subtype of the method signature.
 For this algorithm to work, it's important that methods be sorted by their specificity, and that the
@@ -93,13 +93,15 @@ UnionAll
   var: TypeVar
     name: Symbol T
     lb: Union{}
-    ub: Any
+    ub: abstract type Any
   body: UnionAll
     var: TypeVar
       name: Symbol N
       lb: Union{}
-      ub: Any
-    body: Array{T, N} <: DenseArray{T, N}
+      ub: abstract type Any
+    body: mutable struct Array{T, N} <: DenseArray{T, N}
+      ref::MemoryRef{T}
+      size::NTuple{N, Int64}
 ```
 
 This indicates that `Array` actually names a `UnionAll` type. There is one `UnionAll` type for
@@ -174,33 +176,37 @@ julia> dump(Array{Int,1}.name)
 TypeName
   name: Symbol Array
   module: Module Core
-  names: empty SimpleVector
+  singletonname: Symbol Array
+  names: SimpleVector
+    1: Symbol ref
+    2: Symbol size
+  atomicfields: Ptr{Nothing}(0x0000000000000000)
+  constfields: Ptr{Nothing}(0x0000000000000000)
   wrapper: UnionAll
     var: TypeVar
       name: Symbol T
       lb: Union{}
-      ub: Any
+      ub: abstract type Any
     body: UnionAll
       var: TypeVar
         name: Symbol N
         lb: Union{}
-        ub: Any
-      body: Array{T, N} <: DenseArray{T, N}
+        ub: abstract type Any
+      body: mutable struct Array{T, N} <: DenseArray{T, N}
+  Typeofwrapper: abstract type Type{Array} <: Any
   cache: SimpleVector
     ...
-
   linearcache: SimpleVector
     ...
-
-  hash: Int64 -7900426068641098781
-  mt: MethodTable
-    name: Symbol Array
-    defs: Nothing nothing
-    cache: Nothing nothing
-    max_args: Int64 0
-    module: Module Core
-    : Int64 0
-    : Int64 0
+  hash: Int64 2594190783455944385
+  backedges: #undef
+  partial: #undef
+  max_args: Int32 0
+  n_uninitialized: Int32 0
+  flags: UInt8 0x02
+  cache_entry_count: UInt8 0x00
+  max_methods: UInt8 0x00
+  constprop_heuristic: UInt8 0x00
 ```
 
 In this case, the relevant field is `wrapper`, which holds a reference to the top-level type used
@@ -223,7 +229,7 @@ Ptr{Cvoid} @0x00007fcc7de64850
 The `wrapper` field of [`Array`](@ref) points to itself, but for `Array{TV,NV}` it points back
 to the original definition of the type.
 
-What about the other fields? `hash` assigns an integer to each type.  To examine the `cache`
+What about the other fields? `hash` assigns an integer to each type. To examine the `cache`
 field, it's helpful to pick a type that is less heavily used than Array. Let's first create our
 own type:
 
@@ -243,8 +249,8 @@ variables are not cached.
 
 ## Tuple types
 
-Tuple types constitute an interesting special case.  For dispatch to work on declarations like
-`x::Tuple`, the type has to be able to accommodate any tuple.  Let's check the parameters:
+Tuple types constitute an interesting special case. For dispatch to work on declarations like
+`x::Tuple`, the type has to be able to accommodate any tuple. Let's check the parameters:
 
 ```jldoctest
 julia> Tuple
@@ -489,7 +495,7 @@ julia> function mysubtype(a,b)
        end
 ```
 
-and then set a breakpoint in `jl_breakpoint`.  Once this breakpoint gets triggered, you can set
+and then set a breakpoint in `jl_breakpoint`. Once this breakpoint gets triggered, you can set
 breakpoints in other functions.
 
 As a warm-up, try the following:
@@ -519,10 +525,6 @@ than the other.)  Likewise, `Tuple{Int,Vararg{Int}}` is not a subtype of `Tuple{
 considered more specific. However, `morespecific` does get a bonus for length: in particular,
 `Tuple{Int,Int}` is more specific than `Tuple{Int,Vararg{Int}}`.
 
-If you're debugging how methods get sorted, it can be convenient to define the function:
-
-```julia
-type_morespecific(a, b) = ccall(:jl_type_morespecific, Cint, (Any,Any), a, b)
-```
-
-which allows you to test whether tuple type `a` is more specific than tuple type `b`.
+Additionally, if 2 methods are defined with identical signatures, per type-equal, then they
+will instead be compared by order of addition, such that the later method is more specific
+than the earlier one.
diff --git a/doc/src/devdocs/valgrind.md b/doc/src/devdocs/valgrind.md
index 7e62aeb176f3c..015c4a6d983ee 100644
--- a/doc/src/devdocs/valgrind.md
+++ b/doc/src/devdocs/valgrind.md
@@ -6,22 +6,22 @@ Julia.
 
 ## General considerations
 
-By default, Valgrind assumes that there is no self modifying code in the programs it runs.  This
+By default, Valgrind assumes that there is no self modifying code in the programs it runs. This
 assumption works fine in most instances but fails miserably for a just-in-time compiler like
-`julia`.  For this reason it is crucial to pass `--smc-check=all-non-file` to `valgrind`, else
+`julia`. For this reason it is crucial to pass `--smc-check=all-non-file` to `valgrind`, else
 code may crash or behave unexpectedly (often in subtle ways).
 
-In some cases, to better detect memory errors using Valgrind it can help to compile `julia` with
-memory pools disabled.  The compile-time flag `MEMDEBUG` disables memory pools in Julia, and
-`MEMDEBUG2` disables memory pools in FemtoLisp.  To build `julia` with both flags, add the following
+In some cases, to better detect memory errors using Valgrind, it can help to compile `julia` with
+memory pools disabled. The compile-time flag `MEMDEBUG` disables memory pools in Julia, and
+`MEMDEBUG2` disables memory pools in FemtoLisp. To build `julia` with both flags, add the following
 line to `Make.user`:
 
 ```make
 CFLAGS = -DMEMDEBUG -DMEMDEBUG2
 ```
 
-Another thing to note: if your program uses multiple workers processes, it is likely that you
-want all such worker processes to run under Valgrind, not just the parent process.  To do this,
+Another thing to note: if your program uses multiple worker processes, it is likely that you
+want all such worker processes to run under Valgrind, not just the parent process. To do this,
 pass `--trace-children=yes` to `valgrind`.
 
 Yet another thing to note: if using `valgrind` errors with `Unable to find compatible target in system image`,
@@ -29,9 +29,9 @@ try rebuilding the sysimage with target `generic` or julia with `JULIA_CPU_TARGE
 
 ## Suppressions
 
-Valgrind will typically display spurious warnings as it runs.  To reduce the number of such warnings,
+Valgrind will typically display spurious warnings as it runs. To reduce the number of such warnings,
 it helps to provide a [suppressions file](https://valgrind.org/docs/manual/manual-core.html#manual-core.suppress)
-to Valgrind.  A sample suppressions file is included in the Julia source distribution at `contrib/valgrind-julia.supp`.
+to Valgrind. A sample suppressions file is included in the Julia source distribution at `contrib/valgrind-julia.supp`.
 
 The suppressions file can be used from the `julia/` source directory as follows:
 
@@ -40,13 +40,13 @@ $ valgrind --smc-check=all-non-file --suppressions=contrib/valgrind-julia.supp .
 ```
 
 Any memory errors that are displayed should either be reported as bugs or contributed as additional
-suppressions.  Note that some versions of Valgrind are [shipped with insufficient default suppressions](https://github.com/JuliaLang/julia/issues/8314#issuecomment-55766210),
+suppressions. Note that some versions of Valgrind are [shipped with insufficient default suppressions](https://github.com/JuliaLang/julia/issues/8314#issuecomment-55766210),
 so that may be one thing to consider before submitting any bugs.
 
 ## Running the Julia test suite under Valgrind
 
 It is possible to run the entire Julia test suite under Valgrind, but it does take quite some
-time (typically several hours).  To do so, run the following command from the `julia/test/` directory:
+time (typically several hours). To do so, run the following command from the `julia/test/` directory:
 
 ```
 valgrind --smc-check=all-non-file --trace-children=yes --suppressions=$PWD/../contrib/valgrind-julia.supp ../julia runtests.jl all
@@ -57,7 +57,7 @@ to `valgrind` as well.
 
 ## Additional spurious warnings
 
-This section covers Valgrind warnings which cannot be added to the
+This section covers Valgrind warnings that cannot be added to the
 suppressions file yet are nonetheless safe to ignore.
 
 ### Unhandled rr system calls
@@ -65,7 +65,7 @@ suppressions file yet are nonetheless safe to ignore.
 Valgrind will emit a warning if it encounters any of the [system calls
 that are specific to
 rr](https://github.com/rr-debugger/rr/blob/master/src/preload/rrcalls.h),
-the [Record and Replay Framework](https://rr-project.org/).  In
+the [Record and Replay Framework](https://rr-project.org/). In
 particular, a warning about an unhandled `1008` syscall will be shown
 when julia tries to detect whether it is running under rr:
 
diff --git a/doc/src/index.md b/doc/src/index.md
index bb758d14b4cf2..dea0c11dc1c8b 100644
--- a/doc/src/index.md
+++ b/doc/src/index.md
@@ -32,18 +32,6 @@ Markdown.parse("""
 """)
 ```
 
-## [Important Links](@id man-important-links)
-
-Below is a non-exhasutive list of links that will be useful as you learn and use the Julia programming language.
-
-- [Julia Homepage](https://julialang.org)
-- [Download Julia](https://julialang.org/downloads/)
-- [Discussion forum](https://discourse.julialang.org)
-- [Julia YouTube](https://www.youtube.com/user/JuliaLanguage)
-- [Find Julia Packages](https://julialang.org/packages/)
-- [Learning Resources](https://julialang.org/learning/)
-- [Read and write blogs on Julia](https://forem.julialang.org)
-
 ## [Introduction](@id man-introduction)
 
 Scientific computing has traditionally required the highest performance, yet domain experts have
@@ -76,7 +64,7 @@ and [Ruby](https://en.wikipedia.org/wiki/Ruby_(programming_language)).
 
 The most significant departures of Julia from typical dynamic languages are:
 
-  * The core language imposes very little; Julia Base and the standard library are written in Julia itself, including
+  * The core language imposes very little; [Julia Base and the standard library](@ref man-core-base-and-stdlib) are written in Julia itself, including
     primitive operations like integer arithmetic
   * A rich language of types for constructing and describing objects, that can also optionally be
     used to make type declarations
@@ -126,3 +114,14 @@ language. In addition to the above, some advantages of Julia over comparable sys
   * Call C functions directly (no wrappers or special APIs needed)
   * Powerful shell-like capabilities for managing other processes
   * Lisp-like macros and other metaprogramming facilities
+
+## [Important Links](@id man-important-links)
+
+A non-exhaustive list of links that will be useful as you learn and use the Julia programming language:
+
+- [Julia Homepage](https://julialang.org)
+- [Install Julia](https://julialang.org/install/)
+- [Discussion forum](https://discourse.julialang.org)
+- [Julia YouTube](https://www.youtube.com/user/JuliaLanguage)
+- [Find Julia Packages](https://julialang.org/packages/)
+- [Learning Resources](https://julialang.org/learning/)
diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md
index 0b4532e1b423d..ba2d261301b40 100644
--- a/doc/src/manual/arrays.md
+++ b/doc/src/manual/arrays.md
@@ -103,7 +103,7 @@ same type, then that is its `eltype`. If they all have a common
 [promotion type](@ref conversion-and-promotion) then they get converted to that type using
 [`convert`](@ref) and that type is the array's `eltype`. Otherwise, a heterogeneous array
 that can hold anything — a `Vector{Any}` — is constructed; this includes the literal `[]`
-where no arguments are given. [Array literal can be typed](@ref man-array-typed-literal) with
+where no arguments are given. [Array literals can be typed](@ref man-array-typed-literal) with
 the syntax `T[A, B, C, ...]` where `T` is a type.
 
 ```jldoctest
@@ -326,8 +326,8 @@ These syntaxes are shorthands for function calls that themselves are convenience
 | Syntax                 | Function         | Description                                                                                                |
 |:---------------------- |:---------------- |:---------------------------------------------------------------------------------------------------------- |
 |                        | [`cat`](@ref)    | concatenate input arrays along dimension(s) `k`                                                            |
-| `[A; B; C; ...]`       | [`vcat`](@ref)   | shorthand for `cat(A...; dims=1)`                                                                           |
-| `[A B C ...]`          | [`hcat`](@ref)   | shorthand for `cat(A...; dims=2)`                                                                           |
+| `[A; B; C; ...]`       | [`vcat`](@ref)   | shorthand for `cat(A...; dims=1)`                                                                          |
+| `[A B C ...]`          | [`hcat`](@ref)   | shorthand for `cat(A...; dims=2)`                                                                          |
 | `[A B; C D; ...]`      | [`hvcat`](@ref)  | simultaneous vertical and horizontal concatenation                                                         |
 | `[A; C;; B; D;;; ...]` | [`hvncat`](@ref) | simultaneous n-dimensional concatenation, where number of semicolons indicate the dimension to concatenate |
 
@@ -355,7 +355,7 @@ julia> Int8[[1 2] [3 4]]
 Comprehensions provide a general and powerful way to construct arrays. Comprehension syntax is
 similar to set construction notation in mathematics:
 
-```
+```julia
 A = [ F(x, y, ...) for x=rx, y=ry, ... ]
 ```
 
@@ -366,28 +366,28 @@ The result is an N-d dense array with dimensions that are the concatenation of t
 of the variable ranges `rx`, `ry`, etc. and each `F(x,y,...)` evaluation returns a scalar.
 
 The following example computes a weighted average of the current element and its left and right
-neighbor along a 1-d grid. :
-
-```julia-repl
-julia> x = rand(8)
-8-element Array{Float64,1}:
- 0.843025
- 0.869052
- 0.365105
- 0.699456
- 0.977653
- 0.994953
- 0.41084
- 0.809411
+neighbor along a 1-d grid:
+
+```jldoctest
+julia> x = [4, 8, 2, 6, 10, 10, 2, 8]
+8-element Vector{Int64}:
+  4
+  8
+  2
+  6
+ 10
+ 10
+  2
+  8
 
 julia> [ 0.25*x[i-1] + 0.5*x[i] + 0.25*x[i+1] for i=2:length(x)-1 ]
-6-element Array{Float64,1}:
- 0.736559
- 0.57468
- 0.685417
- 0.912429
- 0.8446
- 0.656511
+6-element Vector{Float64}:
+ 5.5
+ 4.5
+ 6.0
+ 9.0
+ 8.0
+ 5.5
 ```
 
 The resulting array type depends on the types of the computed elements just like [array literals](@ref man-array-literals) do. In order to control the
@@ -398,7 +398,7 @@ the result in single precision by writing:
 Float32[ 0.25*x[i-1] + 0.5*x[i] + 0.25*x[i+1] for i=2:length(x)-1 ]
 ```
 
-## Generator Expressions
+## [Generator Expressions](@id man-generators)
 
 Comprehensions can also be written without the enclosing square brackets, producing an object
 known as a generator. This object can be iterated to produce values on demand, instead of allocating
@@ -413,9 +413,12 @@ julia> sum(1/n^2 for n=1:1000)
 When writing a generator expression with multiple dimensions inside an argument list, parentheses
 are needed to separate the generator from subsequent arguments:
 
-```julia-repl
+```jldoctest
 julia> map(tuple, 1/(i+j) for i=1:2, j=1:2, [1:4;])
-ERROR: syntax: invalid iteration specification
+ERROR: ParseError:
+# Error @ none:1:44
+map(tuple, 1/(i+j) for i=1:2, j=1:2, [1:4;])
+#                                          └ ── invalid iteration spec: expected one of `=` `in` or `∈`
 ```
 
 All comma-separated expressions after `for` are interpreted as ranges. Adding parentheses lets
@@ -430,7 +433,7 @@ julia> map(tuple, (1/(i+j) for i=1:2, j=1:2), [1 3; 2 4])
 
 Generators are implemented via inner functions. Just like
 inner functions used elsewhere in the language, variables from the enclosing scope can be
-"captured" in the inner function.  For example, `sum(p[i] - q[i] for i=1:n)`
+"captured" in the inner function. For example, `sum(p[i] - q[i] for i=1:n)`
 captures the three variables `p`, `q` and `n` from the enclosing scope.
 Captured variables can present performance challenges; see
 [performance tips](@ref man-performance-captured).
@@ -603,7 +606,7 @@ overwritten with the value of `X`, [`convert`](@ref)ing to the
 If any index `I_k` is itself an array, then the right hand side `X` must also be an
 array with the same shape as the result of indexing `A[I_1, I_2, ..., I_n]` or a vector with
 the same number of elements. The value in location `I_1[i_1], I_2[i_2], ..., I_n[i_n]` of
-`A` is overwritten with the value `X[I_1, I_2, ..., I_n]`, converting if necessary. The
+`A` is overwritten with the value `X[i_1, i_2, ..., i_n]`, converting if necessary. The
 element-wise assignment operator `.=` may be used to [broadcast](@ref Broadcasting) `X`
 across the selected locations:
 
@@ -714,7 +717,7 @@ julia> A[:, 3:3]
 ### Cartesian indices
 
 The special `CartesianIndex{N}` object represents a scalar index that behaves
-like an `N`-tuple of integers spanning multiple dimensions.  For example:
+like an `N`-tuple of integers spanning multiple dimensions. For example:
 
 ```jldoctest cartesianindex
 julia> A = reshape(1:32, 4, 4, 2);
@@ -793,38 +796,46 @@ Indexing by a boolean vector `B` is effectively the same as indexing by the
 vector of integers that is returned by [`findall(B)`](@ref). Similarly, indexing
 by a `N`-dimensional boolean array is effectively the same as indexing by the
 vector of `CartesianIndex{N}`s where its values are `true`. A logical index
-must be a vector of the same length as the dimension it indexes into, or it
-must be the only index provided and match the size and dimensionality of the
-array it indexes into. It is generally more efficient to use boolean arrays as
-indices directly instead of first calling [`findall`](@ref).
+must be an array of the same shape as the dimension(s) it indexes into, or it
+must be the only index provided and match the shape of the one-dimensional
+reshaped view of the array it indexes into. It is generally more efficient
+to use boolean arrays as indices directly instead of first calling [`findall`](@ref).
 
 ```jldoctest
-julia> x = reshape(1:16, 4, 4)
-4×4 reshape(::UnitRange{Int64}, 4, 4) with eltype Int64:
- 1  5   9  13
- 2  6  10  14
- 3  7  11  15
- 4  8  12  16
+julia> x = reshape(1:12, 2, 3, 2)
+2×3×2 reshape(::UnitRange{Int64}, 2, 3, 2) with eltype Int64:
+[:, :, 1] =
+ 1  3  5
+ 2  4  6
 
-julia> x[[false, true, true, false], :]
-2×4 Matrix{Int64}:
- 2  6  10  14
- 3  7  11  15
+[:, :, 2] =
+ 7   9  11
+ 8  10  12
+
+julia> x[:, [true false; false true; true false]]
+2×3 Matrix{Int64}:
+ 1  5   9
+ 2  6  10
 
 julia> mask = map(ispow2, x)
-4×4 Matrix{Bool}:
- 1  0  0  0
- 1  0  0  0
- 0  0  0  0
- 1  1  0  1
+2×3×2 Array{Bool, 3}:
+[:, :, 1] =
+ 1  0  0
+ 1  1  0
+
+[:, :, 2] =
+ 0  0  0
+ 1  0  0
 
 julia> x[mask]
-5-element Vector{Int64}:
-  1
-  2
-  4
-  8
- 16
+4-element Vector{Int64}:
+ 1
+ 2
+ 4
+ 8
+
+julia> x[vec(mask)] == x[mask] # we can also index with a single Boolean vector
+true
 ```
 
 ### Number of indices
@@ -881,7 +892,7 @@ in their implementations, other arrays — like [`Diagonal`](@ref) — need the
 full set of cartesian indices to do their lookup (see [`IndexStyle`](@ref) to
 introspect which is which).
 
-!!! warnings
+!!! warning
 
     When iterating over all the indices for an array, it is
     better to iterate over [`eachindex(A)`](@ref) instead of `1:length(A)`.
@@ -935,13 +946,13 @@ element of `axes(A, d)` where `d` is that particular dimension number). This
 allows vectors to be indexed like one-column matrices, for example:
 
 ```jldoctest
-julia> A = [8,6,7]
+julia> A = [8, 6, 7]
 3-element Vector{Int64}:
  8
  6
  7
 
-julia> A[2,1]
+julia> A[2, 1]
 6
 ```
 
@@ -1006,7 +1017,7 @@ The following operators are supported for arrays:
 
 To enable convenient vectorization of mathematical and other operations,
 Julia [provides the dot syntax](@ref man-vectorized) `f.(args...)`, e.g. `sin.(x)`
-or `min.(x,y)`, for elementwise operations over arrays or mixtures of arrays and
+or `min.(x, y)`, for elementwise operations over arrays or mixtures of arrays and
 scalars (a [Broadcasting](@ref) operation); these have the additional advantage of
 "fusing" into a single loop when combined with other dot calls, e.g. `sin.(cos.(x))`.
 
@@ -1020,7 +1031,7 @@ operations like `<`, *only* the elementwise `.<` version is applicable to arrays
 
 Also notice the difference between `max.(a,b)`, which [`broadcast`](@ref)s [`max`](@ref)
 elementwise over `a` and `b`, and [`maximum(a)`](@ref), which finds the largest value within
-`a`. The same relationship holds for `min.(a,b)` and `minimum(a)`.
+`a`. The same relationship holds for `min.(a, b)` and `minimum(a)`.
 
 ## Broadcasting
 
@@ -1028,33 +1039,33 @@ It is sometimes useful to perform element-by-element binary operations on arrays
 sizes, such as adding a vector to each column of a matrix. An inefficient way to do this would
 be to replicate the vector to the size of the matrix:
 
-```julia-repl
-julia> a = rand(2, 1); A = rand(2, 3);
+```jldoctest broadcast_example
+julia> a = [0.2, 0.5]; A = [1.0 1.6 1.05; 1.07 1.36 1.18];
 
 julia> repeat(a, 1, 3) + A
-2×3 Array{Float64,2}:
- 1.20813  1.82068  1.25387
- 1.56851  1.86401  1.67846
+2×3 Matrix{Float64}:
+ 1.2   1.8   1.25
+ 1.57  1.86  1.68
 ```
 
 This is wasteful when dimensions get large, so Julia provides [`broadcast`](@ref), which expands
 singleton dimensions in array arguments to match the corresponding dimension in the other array
 without using extra memory, and applies the given function elementwise:
 
-```julia-repl
+```jldoctest broadcast_example
 julia> broadcast(+, a, A)
-2×3 Array{Float64,2}:
- 1.20813  1.82068  1.25387
- 1.56851  1.86401  1.67846
+2×3 Matrix{Float64}:
+ 1.2   1.8   1.25
+ 1.57  1.86  1.68
 
-julia> b = rand(1,2)
-1×2 Array{Float64,2}:
- 0.867535  0.00457906
+julia> b = [0.9 0.1]
+1×2 Matrix{Float64}:
+ 0.9  0.1
 
 julia> broadcast(+, a, b)
-2×2 Array{Float64,2}:
- 1.71056  0.847604
- 1.73659  0.873631
+2×2 Matrix{Float64}:
+ 1.1  0.3
+ 1.4  0.6
 ```
 
 [Dotted operators](@ref man-dot-operators) such as `.+` and `.*` are equivalent
@@ -1066,7 +1077,7 @@ is equivalent to `broadcast(f, args...)`, providing a convenient syntax to broad
 [automatically fuse](@ref man-dot-operators) into a single `broadcast` call.
 
 Additionally, [`broadcast`](@ref) is not limited to arrays (see the function documentation);
-it also handles scalars, tuples and other collections.  By default, only some argument types are
+it also handles scalars, tuples and other collections. By default, only some argument types are
 considered scalars, including (but not limited to) `Number`s, `String`s, `Symbol`s, `Type`s, `Function`s
 and some common singletons like `missing` and `nothing`. All other arguments are
 iterated over or indexed into elementwise.
@@ -1111,10 +1122,10 @@ generally work correctly as a fallback for any specific array implementation.
 The `AbstractArray` type includes anything vaguely array-like, and implementations of it might
 be quite different from conventional arrays. For example, elements might be computed on request
 rather than stored. However, any concrete `AbstractArray{T,N}` type should generally implement
-at least [`size(A)`](@ref) (returning an `Int` tuple), [`getindex(A,i)`](@ref) and [`getindex(A,i1,...,iN)`](@ref getindex);
-mutable arrays should also implement [`setindex!`](@ref). It is recommended that these operations
-have nearly constant time complexity, as otherwise some array
-functions may be unexpectedly slow. Concrete types should also typically provide a [`similar(A,T=eltype(A),dims=size(A))`](@ref)
+at least [`size(A)`](@ref) (returning an `Int` tuple), [`getindex(A, i)`](@ref) and
+[`getindex(A, i1, ..., iN)`](@ref getindex); mutable arrays should also implement [`setindex!`](@ref).
+It is recommended that these operations have nearly constant time complexity, as otherwise some array
+functions may be unexpectedly slow. Concrete types should also typically provide a [`similar(A, T=eltype(A), dims=size(A))`](@ref)
 method, which is used to allocate a similar array for [`copy`](@ref) and other out-of-place
 operations. No matter how an `AbstractArray{T,N}` is represented internally, `T` is the type of
 object returned by *integer* indexing (`A[1, ..., 1]`, when `A` is not empty) and `N` should be
@@ -1136,7 +1147,7 @@ is created with the [`view`](@ref) function, which is called the same way as
 of [`view`](@ref) looks the same as the result of [`getindex`](@ref), except the
 data is left in place. [`view`](@ref) stores the input index vectors in a
 `SubArray` object, which can later be used to index the original array
-indirectly.  By putting the [`@views`](@ref) macro in front of an expression or
+indirectly. By putting the [`@views`](@ref) macro in front of an expression or
 block of code, any `array[...]` slice in that expression will be converted to
 create a `SubArray` view instead.
 
@@ -1152,7 +1163,7 @@ dimension `d`. For example, the builtin `Array` returned by `rand(5,7,2)` has it
 arranged contiguously in column major order. This means that the stride of the first
 dimension — the spacing between elements in the same column — is `1`:
 
-```julia-repl
+```jldoctest strides
 julia> A = rand(5, 7, 2);
 
 julia> stride(A, 1)
@@ -1161,10 +1172,10 @@ julia> stride(A, 1)
 
 The stride of the second dimension is the spacing between elements in the same row, skipping
 as many elements as there are in a single column (`5`). Similarly, jumping between the two
-"pages" (in the third dimension) requires skipping `5*7 == 35` elements.  The [`strides`](@ref)
+"pages" (in the third dimension) requires skipping `5*7 == 35` elements. The [`strides`](@ref)
 of this array is the tuple of these three numbers together:
 
-```julia-repl
+```jldoctest strides
 julia> strides(A)
 (1, 5, 35)
 ```
@@ -1177,7 +1188,7 @@ This view `V` refers to the same memory as `A` but is skipping and re-arranging
 elements. The stride of the first dimension of `V` is `3` because we're only selecting every
 third row from our original array:
 
-```julia-repl
+```jldoctest strides
 julia> V = @view A[1:3:4, 2:2:6, 2:-1:1];
 
 julia> stride(V, 1)
@@ -1188,7 +1199,7 @@ This view is similarly selecting every other column from our original `A` — an
 needs to skip the equivalent of two five-element columns when moving between indices in the
 second dimension:
 
-```julia-repl
+```jldoctest strides
 julia> stride(V, 2)
 10
 ```
@@ -1197,7 +1208,7 @@ The third dimension is interesting because its order is reversed! Thus to get fr
 "page" to the second one it must go _backwards_ in memory, and so its stride in this
 dimension is negative!
 
-```julia-repl
+```jldoctest strides
 julia> stride(V, 3)
 -35
 ```
diff --git a/doc/src/manual/asynchronous-programming.md b/doc/src/manual/asynchronous-programming.md
index 5b43ba971ee1c..eccd924aec6b3 100644
--- a/doc/src/manual/asynchronous-programming.md
+++ b/doc/src/manual/asynchronous-programming.md
@@ -64,8 +64,8 @@ the next input prompt appears. That is because the REPL is waiting for `t`
 to finish before proceeding.
 
 It is common to want to create a task and schedule it right away, so the
-macro [`@async`](@ref) is provided for that purpose --- `@async x` is
-equivalent to `schedule(@task x)`.
+macro [`Threads.@spawn`](@ref) is provided for that purpose --- `Threads.@spawn x` is
+equivalent to `task = @task x; task.sticky = false; schedule(task)`.
 
 ## Communicating with Channels
 
@@ -162,7 +162,7 @@ constructors to explicitly link a set of channels with a set of producer/consume
 
 ### More on Channels
 
-A channel can be visualized as a pipe, i.e., it has a write end and a read end :
+A channel can be visualized as a pipe, i.e., it has a write end and a read end:
 
   * Multiple writers in different tasks can write to the same channel concurrently via [`put!`](@ref)
     calls.
@@ -186,7 +186,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
 
     # we can schedule `n` instances of `foo` to be active concurrently.
     for _ in 1:n
-        errormonitor(@async foo())
+        errormonitor(Threads.@spawn foo())
     end
     ```
   * Channels are created via the `Channel{T}(sz)` constructor. The channel will only hold objects
@@ -194,15 +194,16 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
     to the maximum number of elements that can be held in the channel at any time. For example, `Channel(32)`
     creates a channel that can hold a maximum of 32 objects of any type. A `Channel{MyType}(64)` can
     hold up to 64 objects of `MyType` at any time.
-  * If a [`Channel`](@ref) is empty, readers (on a [`take!`](@ref) call) will block until data is available.
-  * If a [`Channel`](@ref) is full, writers (on a [`put!`](@ref) call) will block until space becomes available.
+  * If a [`Channel`](@ref) is empty, readers (on a [`take!`](@ref) call) will block until data is available (see [`isempty`](@ref)).
+  * If a [`Channel`](@ref) is full, writers (on a [`put!`](@ref) call) will block until space becomes available (see [`isfull`](@ref)).
   * [`isready`](@ref) tests for the presence of any object in the channel, while [`wait`](@ref)
     waits for an object to become available.
+  * Note that if another task is currently waiting to `put!` an object into a channel, a channel can have more items available than its capacity.
   * A [`Channel`](@ref) is in an open state initially. This means that it can be read from and written to
     freely via [`take!`](@ref) and [`put!`](@ref) calls. [`close`](@ref) closes a [`Channel`](@ref).
     On a closed [`Channel`](@ref), [`put!`](@ref) will fail. For example:
 
-    ```julia-repl
+    ```jldoctest channel_example
     julia> c = Channel(2);
 
     julia> put!(c, 1) # `put!` on an open channel succeeds
@@ -219,7 +220,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
   * [`take!`](@ref) and [`fetch`](@ref) (which retrieves but does not remove the value) on a closed
     channel successfully return any existing values until it is emptied. Continuing the above example:
 
-    ```julia-repl
+    ```jldoctest channel_example
     julia> fetch(c) # Any number of `fetch` calls succeed.
     1
 
@@ -263,10 +264,10 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
+julia> errormonitor(Threads.@spawn make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for i in 1:4 # start 4 tasks to process requests in parallel
-           errormonitor(@async do_work())
+           errormonitor(Threads.@spawn do_work())
        end
 
 julia> @elapsed while n > 0 # print out results
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index 7b889589c592d..70341bd180fd3 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -27,16 +27,20 @@ commonly passed in registers when using C or Julia calling conventions.
 The syntax for [`@ccall`](@ref) to generate a call to the library function is:
 
 ```julia
-  @ccall library.function_name(argvalue1::argtype1, ...)::returntype
-  @ccall function_name(argvalue1::argtype1, ...)::returntype
-  @ccall $function_pointer(argvalue1::argtype1, ...)::returntype
+@ccall library.function_name(argvalue1::argtype1, ...)::returntype
+@ccall function_name(argvalue1::argtype1, ...)::returntype
+@ccall $function_pointer(argvalue1::argtype1, ...)::returntype
 ```
 
-where `library` is a string constant or literal (but see [Non-constant Function
-Specifications](@ref) below). The library may be omitted, in which case the
-function name is resolved in the current process. This form can be used to call
-C library functions, functions in the Julia runtime, or functions in an
-application linked to Julia. The full path to the library may also be specified.
+where `library` is a string constant or global variable name (see [Non-constant
+Function -Specifications](@ref) below). The library can be just a name, or it
+can specify a full path to the library. The library may be omitted, in which
+case the function name is resolved in the current executable, the current libc,
+or libjulia(-internal). This form can be used to call C library functions,
+functions in the Julia runtime, or functions in an application linked to Julia.
+Omitting the library *cannot* be used to call a function in any library (like
+specifying `RTLD_DEFAULT` to `dlsym`) as such behavior is slow, complicated,
+and not implemented on all platforms.
 Alternatively, `@ccall` may also be used to call a function pointer
 `$function_pointer`, such as one returned by `Libdl.dlsym`. The `argtype`s
 corresponds to the C-function signature and the `argvalue`s are the actual
@@ -69,7 +73,7 @@ julia> unsafe_string(path)
 
 In practice, especially when providing reusable functionality, one generally wraps `@ccall`
 uses in Julia functions that set up arguments and then check for errors in whatever manner the
-C or Fortran function specifies. And if an error occurs it is thrown as a normal Julia exception. This is especially
+C or Fortran function specifies. If an error occurs it is thrown as a normal Julia exception. This is especially
 important since C and Fortran APIs are notoriously inconsistent about how they indicate error
 conditions. For example, the `getenv` C library function is wrapped in the following Julia function,
 which is a simplified version of the actual definition from [`env.jl`](https://github.com/JuliaLang/julia/blob/master/base/env.jl):
@@ -224,7 +228,7 @@ julia> A
 ```
 
 As the example shows, the original Julia array `A` has now been sorted: `[-2.7, 1.3, 3.1, 4.4]`. Note that Julia
-[takes care of converting the array to a `Ptr{Cdouble}`](@ref automatic-type-conversion)), computing
+[takes care of converting the array to a `Ptr{Cdouble}`](@ref automatic-type-conversion), computing
 the size of the element type in bytes, and so on.
 
 For fun, try inserting a `println("mycompare($a, $b)")` line into `mycompare`, which will allow
@@ -253,10 +257,14 @@ to the specified type. For example, the following call:
 will behave as if it were written like this:
 
 ```julia
-@ccall "libfoo".foo(
-    Base.unsafe_convert(Int32, Base.cconvert(Int32, x))::Int32,
-    Base.unsafe_convert(Float64, Base.cconvert(Float64, y))::Float64
+c_x = Base.cconvert(Int32, x)
+c_y = Base.cconvert(Float64, y)
+GC.@preserve c_x c_y begin
+    @ccall "libfoo".foo(
+        Base.unsafe_convert(Int32, c_x)::Int32,
+        Base.unsafe_convert(Float64, c_y)::Float64
     )::Cvoid
+end
 ```
 
 [`Base.cconvert`](@ref) normally just calls [`convert`](@ref), but can be defined to return an
@@ -272,17 +280,17 @@ it to be freed prematurely.
 
 First, let's review some relevant Julia type terminology:
 
-| Syntax / Keyword              | Example                                     | Description                                                                                                                                                                                                                                                                    |
-|:----------------------------- |:------------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `mutable struct`              | `BitSet`                                    | "Leaf Type" :: A group of related data that includes a type-tag, is managed by the Julia GC, and is defined by object-identity. The type parameters of a leaf type must be fully defined (no `TypeVars` are allowed) in order for the instance to be constructed.              |
-| `abstract type`               | `Any`, `AbstractArray{T, N}`, `Complex{T}`  | "Super Type" :: A super-type (not a leaf-type) that cannot be instantiated, but can be used to describe a group of types.                                                                                                                                                      |
-| `T{A}`                        | `Vector{Int}`                               | "Type Parameter" :: A specialization of a type (typically used for dispatch or storage optimization).                                                                                                                                                                          |
-|                               |                                             | "TypeVar" :: The `T` in the type parameter declaration is referred to as a TypeVar (short for type variable).                                                                                                                                                                  |
-| `primitive type`              | `Int`, `Float64`                            | "Primitive Type" :: A type with no fields, but a size. It is stored and defined by-value.                                                                                                                                                                                           |
-| `struct`                      | `Pair{Int, Int}`                            | "Struct" :: A type with all fields defined to be constant. It is defined by-value, and may be stored with a type-tag.                                                                                                                                                       |
-|                               | `ComplexF64` (`isbits`)                     | "Is-Bits"   :: A `primitive type`, or a `struct` type where all fields are other `isbits` types. It is defined by-value, and is stored without a type-tag.                                                                                                                       |
-| `struct ...; end`             | `nothing`                                   | "Singleton" :: a Leaf Type or Struct with no fields.                                                                                                                                                                                                                        |
-| `(...)` or `tuple(...)`       | `(1, 2, 3)`                                 | "Tuple" :: an immutable data-structure similar to an anonymous struct type, or a constant array. Represented as either an array or a struct.                                                                                                                                |
+| Syntax / Keyword              | Example                                     | Description                                                                                                                                                                                                                                                                                                       |
+|:----------------------------- |:------------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `mutable struct`              | `BitSet`                                    | "Concrete Type" :: A group of related data that includes a type-tag, is managed by the Julia GC, and is defined by object-identity. The type parameters of a concrete type must be fully defined (no `TypeVars` are allowed) in order for the instance to be constructed. Also see [`isconcretetype`](@ref).      |
+| `abstract type`               | `Any`, `AbstractArray{T, N}`, `Complex{T}`  | "Super Type" :: A super-type (not a concrete type) that cannot be instantiated, but can be used to describe a group of types. Also see [`isabstracttype`](@ref).                                                                                                                                                  |
+| `T{A}`                        | `Vector{Int}`                               | "Type Parameter" :: A specialization of a type (typically used for dispatch or storage optimization).                                                                                                                                                                                                             |
+|                               |                                             | "TypeVar" :: The `T` in the type parameter declaration is referred to as a TypeVar (short for type variable).                                                                                                                                                                                                     |
+| `primitive type`              | `Int`, `Float64`                            | "Primitive Type" :: A type with no fields, but a size. It is stored and defined by-value.                                                                                                                                                                                                                         |
+| `struct`                      | `Pair{Int, Int}`                            | "Struct" :: A type with all fields defined to be constant. It is defined by-value, and may be stored with a type-tag.                                                                                                                                                                                             |
+|                               | `ComplexF64` (`isbits`)                     | "Is-Bits"   :: A `primitive type`, or a `struct` type where all fields are other `isbits` types. It is defined by-value, and is stored without a type-tag.                                                                                                                                                        |
+| `struct ...; end`             | `nothing`                                   | "Singleton" :: a concrete Type or Struct with no fields.                                                                                                                                                                                                                                                          |
+| `(...)` or `tuple(...)`       | `(1, 2, 3)`                                 | "Tuple" :: an immutable data-structure similar to an anonymous struct type, or a constant array. Represented as either an array or a struct.                                                                                                                                                                      |
 
 ### [Bits Types](@id man-bits-types)
 
@@ -353,7 +361,7 @@ an `Int` in Julia).
 | `unsigned long long`                                    |                          | `Culonglong`         | `UInt64`                                                                                                       |
 | `intmax_t`                                              |                          | `Cintmax_t`          | `Int64`                                                                                                        |
 | `uintmax_t`                                             |                          | `Cuintmax_t`         | `UInt64`                                                                                                       |
-| `float`                                                 | `REAL*4i`                | `Cfloat`             | `Float32`                                                                                                      |
+| `float`                                                 | `REAL*4`                 | `Cfloat`             | `Float32`                                                                                                      |
 | `double`                                                | `REAL*8`                 | `Cdouble`            | `Float64`                                                                                                      |
 | `complex float`                                         | `COMPLEX*8`              | `ComplexF32`         | `Complex{Float32}`                                                                                             |
 | `complex double`                                        | `COMPLEX*16`             | `ComplexF64`         | `Complex{Float64}`                                                                                             |
@@ -543,15 +551,14 @@ is not valid, since the type layout of `T` is not known statically.
 
 ### SIMD Values
 
-Note: This feature is currently implemented on 64-bit x86 and AArch64 platforms only.
-
 If a C/C++ routine has an argument or return value that is a native SIMD type, the corresponding
 Julia type is a homogeneous tuple of `VecElement` that naturally maps to the SIMD type. Specifically:
 
->   * The tuple must be the same size as the SIMD type. For example, a tuple representing an `__m128`
->     on x86 must have a size of 16 bytes.
->   * The element type of the tuple must be an instance of `VecElement{T}` where `T` is a primitive type that
->     is 1, 2, 4 or 8 bytes.
+>   * The tuple must be the same size and elements as the SIMD type. For example, a tuple
+>     representing an `__m128` on x86 must have a size of 16 bytes and Float32 elements.
+>   * The element type of the tuple must be an instance of `VecElement{T}` where `T` is a
+>     primitive type with a power-of-two number of bytes (e.g. 1, 2, 4, 8, 16, etc) such as
+>     Int8 or Float64.
 
 For instance, consider this C routine that uses AVX intrinsics:
 
@@ -622,8 +629,12 @@ For translating a C argument list to Julia:
       * argument value will be copied (passed by value)
   * `struct T` (including typedef to a struct)
 
-      * `T`, where `T` is a Julia leaf type
+      * `T`, where `T` is a concrete Julia type
       * argument value will be copied (passed by value)
+  * `vector T` (or `__attribute__ vector_size`, or a typedef such as `__m128`)
+
+      * `NTuple{N, VecElement{T}}`, where `T` is a primitive Julia type of the correct size
+        and N is the number of elements in the vector (equal to `vector_size / sizeof T`).
   * `void*`
 
       * depends on how this parameter is used, first translate this to the intended pointer type, then
@@ -670,13 +681,16 @@ For translating a C return type to Julia:
   * `T`, where `T` is one of the primitive types: `char`, `int`, `long`, `short`, `float`, `double`,
     `complex`, `enum` or any of their `typedef` equivalents
 
-      * `T`, where `T` is an equivalent Julia Bits Type (per the table above)
-      * if `T` is an `enum`, the argument type should be equivalent to `Cint` or `Cuint`
+      * same as C argument list
       * argument value will be copied (returned by-value)
   * `struct T` (including typedef to a struct)
 
-      * `T`, where `T` is a Julia Leaf Type
+      * same as C argument list
       * argument value will be copied (returned by-value)
+
+  * `vector T`
+
+      * same as C argument list
   * `void*`
 
       * depends on how this parameter is used, first translate this to the intended pointer type, then
@@ -821,7 +835,7 @@ Instead define a [`Base.cconvert`](@ref) method and pass the variables directly
 automatically arranges that all of its arguments will be preserved from garbage collection until
 the call returns. If a C API will store a reference to memory allocated by Julia, after the `@ccall`
 returns, you must ensure that the object remains visible to the garbage collector. The suggested
-way to do this is to make a global variable of type `Array{Ref,1}` to hold these values until
+way to do this is to make a global variable of type `Vector{Ref}` to hold these values until
 the C library notifies you that it is finished with them.
 
 Whenever you have created a pointer to Julia data, you must ensure the original data exists until
@@ -838,16 +852,137 @@ it must be handled in other ways.
 
 ## Non-constant Function Specifications
 
-In some cases, the exact name or path of the needed library is not known in advance and must
-be computed at run time. To handle such cases, the library component
-specification can be a function call, e.g. `find_blas().dgemm`. The call expression will
-be executed when the `ccall` itself is executed. However, it is assumed that the library
-location does not change once it is determined, so the result of the call can be cached and
-reused. Therefore, the number of times the expression executes is unspecified, and returning
-different values for multiple calls results in unspecified behavior.
+In some cases, the exact name or path of the needed library is not known in
+advance and must be computed at run time. To handle such cases, the library
+component specification can be a value such as `Libdl.LazyLibrary`. The runtime
+will call `Libdl.dlopen` on that object when first used by a `ccall`.
+
+### [Using LazyLibrary for Lazy Loading](@id man-lazylibrary)
+
+[`Libdl.LazyLibrary`](@ref) provides a thread-safe mechanism for deferring library loading
+until first use. This is the recommended approach for library initialization in modern Julia code.
+
+A `LazyLibrary` represents a library that opens itself (and its dependencies) automatically
+on first use in a `ccall()`, `@ccall`, `dlopen()`, `dlsym()`, `dlpath()`, or `cglobal()`.
+The library is loaded exactly once in a thread-safe manner, and subsequent calls reuse the
+loaded library handle.
+
+#### Basic Usage
+
+```julia
+using Libdl
+
+# Define a LazyLibrary as a const for optimal performance
+const libz = LazyLibrary("libz")
+
+# Use directly in @ccall - library loads automatically on first call
+@ccall libz.deflate(strm::Ptr{Cvoid}, flush::Cint)::Cint
+
+# Also works with ccall
+ccall((:inflate, libz), Cint, (Ptr{Cvoid}, Cint), strm, flush)
+```
+
+#### Platform-Specific Libraries
+
+For code that needs to work across different platforms:
+
+```julia
+const mylib = LazyLibrary(
+    if Sys.iswindows()
+        "mylib.dll"
+    elseif Sys.isapple()
+        "libmylib.dylib"
+    else
+        "libmylib.so"
+    end
+)
+```
+
+#### Libraries with Dependencies
+
+When a library depends on other libraries, specify the dependencies to ensure
+they load in the correct order:
+
+```julia
+const libfoo = LazyLibrary("libfoo")
+const libbar = LazyLibrary("libbar"; dependencies=[libfoo])
+
+# When libbar is first used, libfoo is loaded first automatically
+@ccall libbar.bar_function(x::Cint)::Cint
+```
+
+#### Lazy Path Construction
+
+For libraries whose paths are determined at runtime, use `LazyLibraryPath`:
+
+```julia
+# Path is constructed when library is first accessed
+const mylib = LazyLibrary(LazyLibraryPath(artifact_dir, "lib", "libmylib.so"))
+```
+
+#### Initialization Callbacks
+
+If a library requires initialization after loading:
+
+```julia
+const mylib = LazyLibrary("libmylib";
+    on_load_callback = () -> @ccall mylib.initialize()::Cvoid
+)
+```
+
+!!! warning
+    The `on_load_callback` should be minimal and must not call `wait()` on any tasks.
+    It is called exactly once by the thread that loads the library.
+
+#### Conversion from `__init__()` Pattern
+
+Before `LazyLibrary`, library paths were often computed in `__init__()` functions.
+This pattern can be replaced with `LazyLibrary` for better performance and thread safety.
+
+Old pattern using `__init__()`:
+
+```julia
+# Old: Library path computed in __init__()
+libmylib_path = ""
+
+function __init__(
+    # Loads library on startup, whether it is used or not
+    global libmylib_path = find_library(["libmylib"])
+end
 
-If even more flexibility is needed, it is possible
-to use computed values as function names by staging through [`eval`](@ref) as follows:
+function myfunc(x)
+    ccall((:cfunc, libmylib_path), Cint, (Cint,), x)
+end
+```
+
+New pattern using `LazyLibrary`:
+
+```julia
+# New: Library as const, no __init__() needed
+const libmylib = LazyLibrary("libmylib")
+
+function myfunc(x)
+    # Library loads automatically just before calling `cfunc`
+    @ccall libmylib.cfunc(x::Cint)::Cint
+end
+```
+
+For more details, see the [`Libdl.LazyLibrary`](@ref) documentation.
+
+### Overloading `dlopen` for Custom Types
+
+The runtime will call `dlsym(:function, dlopen(library)::Ptr{Cvoid})` when a `@ccall` is executed.
+The `Libdl.dlopen` function can be overloaded for custom types to provide alternate behaviors.
+However, it is assumed that the library location and handle does not change
+once it is determined, so the result of the call may be cached and reused.
+Therefore, the number of times the `dlopen` expression executes is unspecified,
+and returning different values for multiple calls will results in unspecified
+(but valid) behavior.
+
+### Computed Function Names
+
+If even more flexibility is needed, it is possible to use computed values as
+function names by staging through [`eval`](@ref) as follows:
 
 ```julia
 @eval @ccall "lib".$(string("a", "b"))()::Cint
@@ -858,38 +993,37 @@ expression, which is then evaluated. Keep in mind that `eval` only operates at t
 so within this expression local variables will not be available (unless their values are substituted
 with `$`). For this reason, `eval` is typically only used to form top-level definitions, for example
 when wrapping libraries that contain many similar functions.
-A similar example can be constructed for [`@cfunction`](@ref).
-
-However, doing this will also be very slow and leak memory, so you should usually avoid this and instead keep
-reading.
-The next section discusses how to use indirect calls to efficiently achieve a similar effect.
 
-## Indirect Calls
+### Indirect Calls
 
-The first argument to `@ccall` can also be an expression evaluated at run time. In this
-case, the expression must evaluate to a `Ptr`, which will be used as the address of the native
-function to call. This behavior occurs when the first `@ccall` argument contains references
-to non-constants, such as local variables, function arguments, or non-constant globals.
+The first argument to `@ccall` can also be an expression to be evaluated at run
+time, each time it is used. In this case, the expression must evaluate to a
+`Ptr`, which will be used as the address of the native function to call. This
+behavior occurs when the first `@ccall` argument is marked with `$` and when
+the first `ccall` argument is not a simple constant literal or expression in
+`()`. The argument can be any expression and can use local variables and
+arguments and can return a different value every time.
 
-For example, you might look up the function via `dlsym`,
-then cache it in a shared reference for that session. For example:
+For example, you might implement a macro similar to `cglobal` that looks up the
+function via `dlsym`, then caches the pointer in a shared reference (which is
+auto reset to C_NULL during precompile saving).
+For example:
 
 ```julia
 macro dlsym(lib, func)
-    z = Ref{Ptr{Cvoid}}(C_NULL)
+    z = Ref(C_NULL)
     quote
-        let zlocal = $z[]
-            if zlocal == C_NULL
-                zlocal = dlsym($(esc(lib))::Ptr{Cvoid}, $(esc(func)))::Ptr{Cvoid}
-                $z[] = zlocal
-            end
-            zlocal
+        local zlocal = $z[]
+        if zlocal == C_NULL
+            zlocal = dlsym($(esc(lib))::Ptr{Cvoid}, $(esc(func)))::Ptr{Cvoid}
+            $z[] = zlocal
         end
+        zlocal
     end
 end
 
-mylibvar = Libdl.dlopen("mylib")
-@ccall $(@dlsym(mylibvar, "myfunc"))()::Cvoid
+const mylibvar = LazyLibrary("mylib")
+@ccall $(@dlsym(dlopen(mylibvar), "myfunc"))()::Cvoid
 ```
 
 ## Closure cfunctions
@@ -980,22 +1114,49 @@ The arguments to [`ccall`](@ref) are:
 
 
 !!! note
-    The `(:function, "library")` pair, return type, and input types must be literal constants
-    (i.e., they can't be variables, but see [Non-constant Function Specifications](@ref)).
-
-    The remaining parameters are evaluated at compile-time, when the containing method is defined.
-
+    The `(:function, "library")` pair and the input type list must be syntactic tuples
+    (i.e., they can't be variables or values with a type of Tuple.
+
+    The rettype and argument type values are evaluated at when the containing method is
+    defined, not runtime.
+
+!!! note "Function Name vs Pointer Syntax"
+    The syntax of the first argument to `ccall` determines whether you're calling by **name** or by **pointer**:
+    * **Name-based calls** (tuple literal syntax):
+    - Both the function and library names can be a quoted Symbol, a String, a
+      variable name (a GlobalRef), or a dotted expression ending with a variable
+      name.
+    - Single name: `(:function_name,)` or `"function_name"` - uses default library lookup.
+    - Name with library: `(:function_name, "library")` - specifies both function and library.
+    - Symbol, string, and tuple literal constants (not expressions that evaluate to those constants,
+      but actual literals) are automatically normalized to tuple form.
+    * **Pointer-based calls** (non-tuple syntax):
+    - Anything that is not a literal tuple expression specified above is assumed to be an
+      expression that evaluates to a function pointers at runtime.
+    - Function pointer variables: `fptr` where `fptr` is a runtime pointer value.
+    - Function pointer computations: `dlsym(:something)` where the result is computed at
+      runtime every time (usually along with some caching logic).
+    * **Library name expressions**:
+    - When given as a variable, the library name can resolve to a `Symbol`, a `String`, or
+      any other value. The runtime will call `Libdl.dlopen(name)` on the value an
+      unspecified number of times, caching the result. The result is not invalidated if the
+      value of the binding changes or if it becomes undefined, as long as there exists any
+      value for that binding in any past or future worlds, that value may be used.
+    - Dot expressions, such as `A.B().c`, will be executed at method definition
+      time up to the final `c`. The first part must resolve to a Module, and the
+      second part to a quoted symbol. The value of that global will be resolved at
+      runtime when the `ccall` is first executed.
 
 A table of translations between the macro and function interfaces is given below.
 
 | `@ccall`                                                                     | `ccall`                                                                     |
 |------------------------------------------------------------------------------|-----------------------------------------------------------------------------|
 | `@ccall clock()::Int32`                                                      | `ccall(:clock, Int32, ())`                                                  |
-| `@ccall f(a::Cint)::Cint`                                                    | `ccall(:a, Cint, (Cint,), a)`                                               |
-| `@ccall "mylib".f(a::Cint, b::Cdouble)::Cvoid`                               | `ccall((:f, "mylib"), Cvoid, (Cint, Cdouble), (a, b))`                      |
+| `@ccall f(a::Cint)::Cint`                                                    | `ccall(:f, Cint, (Cint,), a)`                                               |
+| `@ccall "mylib".f(a::Cint, b::Cdouble)::Cvoid`                               | `ccall((:f, "mylib"), Cvoid, (Cint, Cdouble), a, b)`                        |
 | `@ccall $fptr.f()::Cvoid`                                                    | `ccall(fptr, f, Cvoid, ())`                                                 |
 | `@ccall printf("%s = %d\n"::Cstring ; "foo"::Cstring, foo::Cint)::Cint`      | `<unavailable>`                                                             |
-| `@ccall printf("%s = %d\n"::Cstring ; "2 + 2"::Cstring, "5"::Cstring)::Cint` | `ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5")`    |
+| `@ccall printf("%s = %s\n"::Cstring ; "2 + 2"::Cstring, "5"::Cstring)::Cint` | `ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5")`    |
 | `<unavailable>`                                                              | `ccall(:gethostname, stdcall, Int32, (Ptr{UInt8}, UInt32), hn, length(hn))` |
 
 ## [Calling Convention](@id calling-convention)
@@ -1005,7 +1166,7 @@ be a calling convention specifier (the `@ccall` macro currently does not support
 giving a calling convention). Without any specifier, the platform-default C
 calling convention is used. Other supported conventions are: `stdcall`, `cdecl`,
 `fastcall`, and `thiscall` (no-op on 64-bit Windows). For example (from
-`base/libc.jl`) we see the same `gethostname``ccall` as above, but with the
+`base/libc.jl`) we see the same `gethostname` `ccall` as above, but with the
 correct signature for Windows:
 
 ```julia
diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md
index 743ee83c333a4..dd3658e555d2a 100644
--- a/doc/src/manual/code-loading.md
+++ b/doc/src/manual/code-loading.md
@@ -14,7 +14,7 @@ Code inclusion is quite straightforward and simple: it evaluates the given sourc
 
 A *package* is a source tree with a standard layout providing functionality that can be reused by other Julia projects. A package is loaded by `import X` or  `using X` statements. These statements also make the module named `X`—which results from loading the package code—available within the module where the import statement occurs. The meaning of `X` in `import X` is context-dependent: which `X` package is loaded depends on what code the statement occurs in. Thus, handling of `import X` happens in two stages: first, it determines **what** package is defined to be `X` in this context; second, it determines **where** that particular `X` package is found.
 
-These questions are answered by searching through the project environments listed in [`LOAD_PATH`](@ref) for project files (`Project.toml` or `JuliaProject.toml`), manifest files (`Manifest.toml` or `JuliaManifest.toml`), or folders of source files.
+These questions are answered by searching through the project environments listed in [`LOAD_PATH`](@ref) for project files (`Project.toml` or `JuliaProject.toml`), manifest files (`Manifest.toml` or `JuliaManifest.toml`, or the same names suffixed by `-v{major}.{minor}.toml` for specific versions), or folders of source files.
 
 
 ## Federation of packages
@@ -36,12 +36,15 @@ An *environment* determines what `import X` and `using X` mean in various code c
 
 These can be intermixed to create **a stacked environment**: an ordered set of project environments and package directories, overlaid to make a single composite environment. The precedence and visibility rules then combine to determine which packages are available and where they get loaded from. Julia's load path forms a stacked environment, for example.
 
-These environment each serve a different purpose:
+These environments each serve a different purpose:
 
 * Project environments provide **reproducibility**. By checking a project environment into version control—e.g. a git repository—along with the rest of the project's source code, you can reproduce the exact state of the project and all of its dependencies. The manifest file, in particular, captures the exact version of every dependency, identified by a cryptographic hash of its source tree, which makes it possible for `Pkg` to retrieve the correct versions and be sure that you are running the exact code that was recorded for all dependencies.
 * Package directories provide **convenience** when a full carefully-tracked project environment is unnecessary. They are useful when you want to put a set of packages somewhere and be able to directly use them, without needing to create a project environment for them.
 * Stacked environments allow for **adding** tools to the primary environment. You can push an environment of development tools onto the end of the stack to make them available from the REPL and scripts, but not from inside packages.
 
+!!! note
+    When loading a package from another environment in the stack other than the active environment the package is loaded in the context of the active environment. This means that the package will be loaded as if it were imported in the active environment, which may affect how its dependencies versions are resolved. When such a package is precompiling it will be marked as a `(serial)` precompile job, which means that its dependencies will be precompiled in series within the same job, which will likely be slower.
+
 At a high-level, each environment conceptually defines three maps: roots, graph and paths. When resolving the meaning of `import X`, the roots and graph maps are used to determine the identity of `X`, while the paths map is used to locate the source code of `X`. The specific roles of the three maps are:
 
 - **roots:** `name::Symbol` ⟶ `uuid::UUID`
@@ -61,9 +64,9 @@ Each kind of environment defines these three maps differently, as detailed in th
 !!! note
     For ease of understanding, the examples throughout this chapter show full data structures for roots, graph and paths. However, Julia's package loading code does not explicitly create these. Instead, it lazily computes only as much of each structure as it needs to load a given package.
 
-### Project environments
+### [Project environments](@id project-environments)
 
-A project environment is determined by a directory containing a project file called `Project.toml`, and optionally a manifest file called `Manifest.toml`. These files may also be called `JuliaProject.toml` and `JuliaManifest.toml`, in which case `Project.toml` and `Manifest.toml` are ignored. This allows for coexistence with other tools that might consider files called `Project.toml` and `Manifest.toml` significant. For pure Julia projects, however, the names `Project.toml` and `Manifest.toml` are preferred.
+A project environment is determined by a directory containing a project file called `Project.toml`, and optionally a manifest file called `Manifest.toml`. These files may also be called `JuliaProject.toml` and `JuliaManifest.toml`, in which case `Project.toml` and `Manifest.toml` are ignored. This allows for coexistence with other tools that might consider files called `Project.toml` and `Manifest.toml` significant. For pure Julia projects, however, the names `Project.toml` and `Manifest.toml` are preferred. However, from Julia v1.10.8 onwards, `(Julia)Manifest-v{major}.{minor}.toml` is recognized as a format to make a given julia version use a specific manifest file i.e. in the same folder, a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by any other julia version.
 
 The roots, graph and paths maps of a project environment are defined as follows:
 
@@ -123,7 +126,7 @@ This manifest file describes a possible complete dependency graph for the `App`
 - There are two different packages named `Priv` that the application uses. It uses a private package, which is a root dependency, and a public one, which is an indirect dependency through `Pub`. These are differentiated by their distinct UUIDs, and they have different deps:
   * The private `Priv` depends on the `Pub` and `Zebra` packages.
   * The public `Priv` has no dependencies.
-- The application also depends on the `Pub` package, which in turn depends on the public `Priv ` and the same `Zebra` package that the private `Priv` package depends on.
+- The application also depends on the `Pub` package, which in turn depends on the public `Priv` and the same `Zebra` package that the private `Priv` package depends on.
 
 
 This dependency graph represented as a dictionary, looks like this:
@@ -155,16 +158,17 @@ graph[UUID("c07ecb7d-0dc9-4db7-8803-fadaaeaf08e1")][:Priv]
 
 and gets `2d15fe94-a1f7-436c-a4d8-07a9a496e01c`, which indicates that in the context of the `Pub` package, `import Priv` refers to the public `Priv` package, rather than the private one which the app depends on directly. This is how the name `Priv` can refer to different packages in the main project than it does in one of its package's dependencies, which allows for duplicate names in the package ecosystem.
 
-What happens if `import Zebra` is evaluated in the main `App` code base? Since `Zebra` does not appear in the project file, the import will fail even though `Zebra` *does* appear in the manifest file. Moreover, if `import Zebra` occurs in the public `Priv` package—the one with UUID `2d15fe94-a1f7-436c-a4d8-07a9a496e01c`—then that would also fail since that `Priv` package has no declared dependencies in the manifest file and therefore cannot load any packages. The `Zebra` package can only be loaded by packages for which it appear as an explicit dependency in the manifest file: the  `Pub` package and one of the `Priv` packages.
+What happens if `import Zebra` is evaluated in the main `App` code base? Since `Zebra` does not appear in the project file, the import will fail even though `Zebra` *does* appear in the manifest file. Moreover, if `import Zebra` occurs in the public `Priv` package—the one with UUID `2d15fe94-a1f7-436c-a4d8-07a9a496e01c`—then that would also fail since that `Priv` package has no declared dependencies in the manifest file and therefore cannot load any packages. The `Zebra` package can only be loaded by packages for which it appears as an explicit dependency in the manifest file: the `Pub` package and one of the `Priv` packages.
 
 **The paths map** of a project environment is extracted from the manifest file. The path of a package `uuid` named `X` is determined by these rules (in order):
 
 1. If the project file in the directory matches `uuid` and name `X`, then either:
-   - It has a toplevel `path` entry, then `uuid` will be mapped to that path, interpreted relative to the directory containing the project file.
-   - Otherwise, `uuid` is mapped to  `src/X.jl` relative to the directory containing the project file.
-2. If the above is not the case and the project file has a corresponding manifest file and the manifest contains a stanza matching `uuid` then:
-   - If it has a `path` entry, use that path (relative to the directory containing the manifest file).
-   - If it has a `git-tree-sha1` entry, compute a deterministic hash function of `uuid` and `git-tree-sha1`—call it `slug`—and look for a directory named `packages/X/$slug` in each directory in the Julia `DEPOT_PATH` global array. Use the first such directory that exists.
+   - It has a toplevel `entryfile` entry, then `uuid` will be mapped to that path, interpreted relative to the directory containing the project file.
+   - Otherwise, `uuid` is mapped to `src/X.jl` relative to the directory containing the project file.
+2. 1. If the above is not the case and the project file has a corresponding manifest file and the manifest contains a stanza matching `uuid` then:
+      - If it has a `path` entry, use that path (relative to the directory containing the manifest file).
+      - If it has a `git-tree-sha1` entry, compute a deterministic hash function of `uuid` and `git-tree-sha1`—call it `slug`—and look for a directory named `packages/X/$slug` in each directory in the Julia `DEPOT_PATH` global array. Use the first such directory that exists.
+   2. If this is a directory then `uuid` is mapped to `src/X.jl` unless the matching manifest stanza has an `entryfile` entry in which case this is used. In both cases, these are relative to the directory in 2.1.
 
 If any of these result in success, the path to the source code entry point will be either that result, the relative path from that result plus `src/X.jl`; otherwise, there is no path mapping for `uuid`. When loading `X`, if no source code path is found, the lookup will fail, and the user may be prompted to install the appropriate package version or to take other corrective action (e.g. declaring `X` as a dependency).
 
@@ -190,7 +194,7 @@ paths = Dict(
     # Priv – the public one:
     (UUID("2d15fe94-a1f7-436c-a4d8-07a9a496e01c"), :Priv) =>
         # package installed in the system depot:
-        "/usr/local/julia/packages/Priv/HDkr/src/Priv.jl",
+        "/usr/local/julia/packages/Priv/HDkrT/src/Priv.jl",
     # Pub:
     (UUID("c07ecb7d-0dc9-4db7-8803-fadaaeaf08e1"), :Pub) =>
         # package installed in the user depot:
@@ -208,7 +212,6 @@ This example map includes three different kinds of package locations (the first
 2. The public `Priv` and `Zebra` packages are in the system depot, where packages installed and managed by the system administrator live. These are available to all users on the system.
 3. The `Pub` package is in the user depot, where packages installed by the user live. These are only available to the user who installed them.
 
-
 ### Package directories
 
 Package directories provide a simpler kind of environment without the ability to handle name collisions. In a package directory, the set of top-level packages is the set of subdirectories that "look like" packages. A package `X` exists in a package directory if the directory contains one of the following "entry point" files:
@@ -351,7 +354,7 @@ Since the primary environment is typically the environment of a project you're w
 
 ### [Package Extensions](@id man-extensions)
 
-A package "extension" is a module that is automatically loaded when a specified set of other packages (its "extension dependencies") are loaded in the current Julia session. Extensions are defined under the `[extensions]` section in the project file. The extension dependencies of an extension are a subset of those packages listed under the `[weakdeps]` section of the project file. Those packages can have compat entries like other packages.
+A package "extension" is a module that is automatically loaded when a specified set of other packages (its "triggers") are loaded in the current Julia session. Extensions are defined under the `[extensions]` section in the project file. The triggers of an extension are a subset of those packages listed under the `[weakdeps]` (and possibly, but uncommonly the `[deps]`) section of the project file. Those packages can have compat entries like other packages.
 
 ```toml
 name = "MyPackage"
@@ -371,8 +374,8 @@ FooExt = "ExtDep"
 ```
 
 The keys under `extensions` are the names of the extensions.
-They are loaded when all the packages on the right hand side (the extension dependencies) of that extension are loaded.
-If an extension only has one extension dependency the list of extension dependencies can be written as just a string for brevity.
+They are loaded when all the packages on the right hand side (the triggers) of that extension are loaded.
+If an extension only has one trigger the list of triggers can be written as just a string for brevity.
 The location for the entry point of the extension is either in `ext/FooExt.jl` or `ext/FooExt/FooExt.jl` for
 extension `FooExt`.
 The content of an extension is often structured as:
@@ -380,10 +383,10 @@ The content of an extension is often structured as:
 ```
 module FooExt
 
-# Load main package and extension dependencies
+# Load main package and triggers
 using MyPackage, ExtDep
 
-# Extend functionality in main package with types from the extension dependencies
+# Extend functionality in main package with types from the triggers
 MyPackage.func(x::ExtDep.SomeStruct) = ...
 
 end
@@ -391,9 +394,33 @@ end
 
 When a package with extensions is added to an environment, the `weakdeps` and `extensions` sections
 are stored in the manifest file in the section for that package. The dependency lookup rules for
-a package are the same as for its "parent" except that the listed extension dependencies are also considered as
+a package are the same as for its "parent" except that the listed triggers are also considered as
 dependencies.
 
+### [Workspaces](@id workspaces)
+
+A project file can define a workspace by giving a set of projects that is part of that workspace:
+
+```toml
+[workspace]
+projects = ["test", "benchmarks", "docs", "SomePackage"]
+```
+
+Each project listed in the `projects` array is specified by its relative path from the workspace root. This can be a direct child directory (e.g., `"test"`) or a nested subdirectory (e.g., `"nested/subdir/MyPackage"`). Each project contains its own `Project.toml` file, which may include additional dependencies and compatibility constraints. In such cases, the package manager gathers all dependency information from all the projects in the workspace generating a single manifest file that combines the versions of all dependencies.
+
+When Julia loads a project, it searches upward through parent directories until it reaches the user's home directory to find a workspace that includes that project. This allows workspace projects to be nested at arbitrary depth within the workspace directory tree.
+
+Furthermore, workspaces can be "nested", meaning a project defining a workspace can also be part of another workspace. In this scenario, a single manifest file is still utilized, stored alongside the "root project" (the project that doesn't have another workspace including it). An example file structure could look like this:
+
+```
+Project.toml # projects = ["MyPackage"]
+Manifest.toml
+MyPackage/
+    Project.toml # projects = ["test"]
+    test/
+        Project.toml
+```
+
 ### [Package/Environment Preferences](@id preferences)
 
 Preferences are dictionaries of metadata that influence package behavior within an environment.
@@ -411,6 +438,32 @@ Preferences in environments higher up in the environment stack get overridden by
 This allows depot-wide preference defaults to exist, with active projects able to merge or even completely overwrite these inherited preferences.
 See the docstring for `Preferences.set_preferences!()` for the full details of how to set preferences to allow or disallow merging.
 
+### [Syntax Versioning](@id syntax-versioning)
+
+Syntax versioning allows packages to specify which version of Julia's syntax they use. In particular, different
+packages can use different versions of the Julia syntax. This allows evolution of Julia's syntax in a non-breaking
+way, while allowing packages to upgrade at their own pace. The syntax version is determined from the package's
+corresponding Project.toml and propagates to all modules defined in the package.
+
+#### Syntax Version Determination
+
+The syntax version for a package is determined by the loading mechanism in the following order of precedence:
+
+1. If a `syntax.julia_version` field is present in the project file, it is used directly:
+   ```toml
+   name = "MyPackage"
+   uuid = "..."
+   syntax.julia_version = "1.14"
+   ```
+
+2. Otherwise, if a `[compat]` section specifies a Julia version constraint, the minimum compatible version is used:
+   ```toml
+   [compat]
+   julia = "1.13-2"  # implies syntax version 1.13.0
+   ```
+
+3. If neither is specified, the current Julia version is used.
+
 ## Conclusion
 
 Federated package management and precise software reproducibility are difficult but worthy goals in a package system. In combination, these goals lead to a more complex package loading mechanism than most dynamic languages have, but it also yields scalability and reproducibility that is more commonly associated with static languages. Typically, Julia users should be able to use the built-in package manager to manage their projects without needing a precise understanding of these interactions. A call to `Pkg.add("X")` will add to the appropriate project and manifest files, selected via `Pkg.activate("Y")`, so that a future call to `import X` will load `X` without further thought.
diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md
index e1651c61a3ec3..b14557ba6ef20 100644
--- a/doc/src/manual/command-line-interface.md
+++ b/doc/src/manual/command-line-interface.md
@@ -39,6 +39,85 @@ $ julia --color=yes -O -- script.jl arg1 arg2..
 
 See also [Scripting](@ref man-scripting) for more information on writing Julia scripts.
 
+## The `Main.main` entry point
+
+As of Julia 1.11, `Base` exports the macro `@main`. This macro expands to the symbol `main`,
+but at the conclusion of executing a script or expression, `julia` will attempt to execute
+`Main.main(Base.ARGS)` if such a function `Main.main` has been defined and this behavior was opted into
+by using the `@main` macro.
+
+To see this feature in action, consider the following definition:
+```julia
+(@main)(args) = println("Hello $(args[1])!")
+```
+Executing the above script with `julia script.jl "Buddy"` will automatically run `(@main)` and print "Hello Buddy!",
+despite there being no explicit call to `(@main)`.
+
+The return value of the `(@main)` function must either be `nothing`, resulting in exit code
+`0`, or convertible to a `Cint` which will be the exit code:
+```
+$ julia -e "(@main)(args) = nothing"; echo $?0
+0
+$ julia -e "(@main)(args) = 1"; echo $?
+1
+```
+Typically exit codes are in the range `0:255`, although the interpretation of the return value might be OS dependent.
+
+This feature is intended to aid in the unification of compiled and interactive workflows. In compiled workflows, loading the code that defines the `main`
+function may be spatially and temporally separated from the invocation. However, for interactive workflows,
+the behavior is equivalent to explicitly calling `exit(main(ARGS))` at the end of the evaluated script or
+expression.
+
+!!! compat "Julia 1.11"
+    The special entry point `Main.main` was added in Julia 1.11. For compatibility with prior julia versions,
+    add an explicit `@isdefined(var"@main") ? (@main) : exit(main(ARGS))` at the end of your scripts.
+
+Only the `main` binding in the `Main` module has this behavior and only if
+the macro `@main` was used within the defining module.
+
+For example, using `hello` instead of `main` will not result in the `hello` function executing:
+
+```
+$ julia -e 'hello(args) = println("Hello World!")'
+$
+```
+
+and neither will a plain definition of `main`:
+```
+$ julia -e 'main(args) = println("Hello World!")'
+$
+```
+
+However, the opt-in need not occur at definition time:
+```
+$ julia -e 'main(args) = println("Hello World!"); @main'
+Hello World!
+$
+```
+
+The `main` binding may be imported from a package. A *hello world* package defined as
+
+```
+module Hello
+
+export main
+(@main)(args) = println("Hello from the package!")
+
+end
+```
+
+may be used as:
+
+```
+$ julia -e 'using Hello'
+Hello from the package!
+$ julia -e 'import Hello' # N.B.: Execution depends on the binding not whether the package is loaded
+$
+```
+
+However, note that the current best practice recommendation is to not mix application and reusable library
+code in the same package. Helper applications may be distributed as separate packages or as scripts with
+separate `main` entry points in a package's `bin` folder.
 
 ## Parallel mode
 
@@ -77,7 +156,8 @@ atreplinit() do repl
     # ...
 end
 ```
-
+If [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) is set, the startup file should be located there:
+`$JULIA_DEPOT_PATH/config/startup.jl`.
 
 ## [Command-line switches for Julia](@id command-line-interface)
 
@@ -93,44 +173,48 @@ The following is a complete list of command-line switches available when launchi
 |Switch                                 |Description|
 |:---                                   |:---|
 |`-v`, `--version`                      |Display version information|
-|`-h`, `--help`                         |Print command-line options (this message).|
-|`--help-hidden`                        |Uncommon options not shown by `-h`|
-|`--project[={<dir>\|@.}]`              |Set `<dir>` as the home project/environment. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
+|`-h`, `--help`                         |Print command-line options (this message)|
+|`--help-hidden`                        |Print uncommon options not shown by `-h`|
+|`--project[={<dir>\|@temp\|@.}]`       |Set `<dir>` as the active project/environment. Or, create a temporary environment with `@temp`. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
 |`-J`, `--sysimage <file>`              |Start up with the given system image file|
 |`-H`, `--home <dir>`                   |Set location of `julia` executable|
-|`--startup-file={yes*\|no}`            |Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH` environment variable is unset, load `~/.julia/config/startup.jl`|
+|`--startup-file={yes*\|no}`            |Load `JULIA_DEPOT_PATH/config/startup.jl`; if [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable is unset, load `~/.julia/config/startup.jl`|
 |`--handle-signals={yes*\|no}`          |Enable or disable Julia's default signal handlers|
 |`--sysimage-native-code={yes*\|no}`    |Use native code from system image if available|
-|`--compiled-modules={yes*\|no}`        |Enable or disable incremental precompilation of modules|
-|`--pkgimages={yes*\|no}`               |Enable or disable usage of native code caching in the form of pkgimages|
+|`--compiled-modules={yes*\|no\|existing\|strict}` |Enable or disable incremental precompilation of modules. The `existing` option allows use of existing compiled modules that were previously precompiled, but disallows creation of new precompile files. The `strict` option is similar, but will error if no precompile file is found. |
+|`--pkgimages={yes*\|no\|existing}`     |Enable or disable usage of native code caching in the form of pkgimages. The `existing` option allows use of existing pkgimages but disallows creation of new ones|
 |`-e`, `--eval <expr>`                  |Evaluate `<expr>`|
 |`-E`, `--print <expr>`                 |Evaluate `<expr>` and display the result|
+|`-m`, `--module <Package> [args]`      |Run entry point of `Package` (`@main` function) with `args`|
 |`-L`, `--load <file>`                  |Load `<file>` immediately on all processors|
-|`-t`, `--threads {N\|auto}`            |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future.  Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.|
-| `--gcthreads {N}`                     |Enable N GC threads; If unspecified is set to half of the compute worker threads.|
+|`-t`, `--threads {auto\|N[,auto\|M]}`  |Enable N[+M] threads; N threads are assigned to the `default` threadpool, and if M is specified, M threads are assigned to the `interactive` threadpool; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future. Currently sets N to the number of CPUs assigned to this Julia process based on the OS-specific affinity assignment interface if supported (Linux and Windows) or to the number of CPU threads if not supported (MacOS) or if process affinity is not configured, and sets M to 1.|
+| `--gcthreads=N[,M]`                   |Use N threads for the mark phase of GC and M (0 or 1) threads for the concurrent sweeping phase of GC. N is set to the number of compute threads and M is set to 0 if unspecified. See [Memory Management and Garbage Collection](@ref man-memory-management) for more details.|
 |`-p`, `--procs {N\|auto}`              |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
 |`--machine-file <file>`                |Run processes on hosts listed in `<file>`|
-|`-i`                                   |Interactive mode; REPL runs and `isinteractive()` is true|
+|`-i`, `--interactive`                  |Interactive mode; REPL runs and `isinteractive()` is true|
 |`-q`, `--quiet`                        |Quiet startup: no banner, suppress REPL warnings|
-|`--banner={yes\|no\|auto*}`            |Enable or disable startup banner|
+|`--banner={yes\|no\|short\|auto*}`     |Enable or disable startup banner|
 |`--color={yes\|no\|auto*}`             |Enable or disable color text|
 |`--history-file={yes*\|no}`            |Load or save history|
 |`--depwarn={yes\|no*\|error}`          |Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)|
 |`--warn-overwrite={yes\|no*}`          |Enable or disable method overwrite warnings|
 |`--warn-scope={yes*\|no}`              |Enable or disable warning for ambiguous top-level scope|
 |`-C`, `--cpu-target <target>`          |Limit usage of CPU features up to `<target>`; set to `help` to see the available options|
-|`-O`, `--optimize={0,1,2*,3}`          |Set the optimization level (level is 3 if `-O` is used without a level) ($)|
-|`--min-optlevel={0*,1,2,3}`            |Set the lower bound on per-module optimization|
-|`-g`, `--debug-info={0,1*,2}`          |Set the level of debug info generation (level is 2 if `-g` is used without a level) ($)|
-|`--inline={yes\|no}`                   |Control whether inlining is permitted, including overriding `@inline` declarations|
+|`-O`, `--optimize={0\|1\|2*\|3}`       |Set the optimization level (level is 3 if `-O` is used without a level) ($)|
+|`--min-optlevel={0*\|1\|2\|3}`         |Set the lower bound on per-module optimization|
+|`-g`, `--debug-info={0\|1*\|2}`        |Set the level of debug info generation (level is 2 if `-g` is used without a level) ($)|
+|`--inline={yes*\|no}`                  |Control whether inlining is permitted, including overriding `@inline` declarations|
 |`--check-bounds={yes\|no\|auto*}`      |Emit bounds checks always, never, or respect `@inbounds` declarations ($)|
-|`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides `@fastmath` declaration)|
+|`--math-mode={ieee\|user*}`            |Always follow `ieee` floating point semantics or respect `@fastmath` declarations|
+|`--polly={yes*\|no}`                   |Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)|
 |`--code-coverage[={none*\|user\|all}]` |Count executions of source lines (omitting setting is equivalent to `user`)|
 |`--code-coverage=@<path>`              |Count executions but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
 |`--code-coverage=tracefile.info`       |Append coverage information to the LCOV tracefile (filename supports format tokens).|
 |`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")|
 |`--track-allocation=@<path>`           |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
+|`--task-metrics={yes\|no*}`             |Enable the collection of per-task metrics|
 |`--bug-report=KIND`                    |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.|
+|`--heap-size-hint=<size>`              |Forces garbage collection if memory usage is higher than the given value. The value may be specified as a number of bytes, optionally in units of KB, MB, GB, or TB, or as a percentage of physical memory with %. See [Memory Management and Garbage Collection](@ref man-memory-management) for more details.|
 |`--compile={yes*\|no\|all\|min}`       |Enable or disable JIT compiler, or request exhaustive or minimal compilation|
 |`--output-o <name>`                    |Generate an object file (including system image data)|
 |`--output-ji <name>`                   |Generate a system image data file (.ji)|
@@ -140,9 +224,17 @@ The following is a complete list of command-line switches available when launchi
 |`--output-bc <name>`                   |Generate LLVM bitcode (.bc)|
 |`--output-asm <name>`                  |Generate an assembly file (.s)|
 |`--output-incremental={yes\|no*}`      |Generate an incremental output file (rather than complete)|
-|`--trace-compile={stderr,name}`        |Print precompile statements for methods compiled during execution or save to a path|
+|`--trace-compile={stderr\|name}`       |Print precompile statements for methods compiled during execution or save to stderr or a path. Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported|
+|`--trace-compile-timing`               |If `--trace-compile` is enabled show how long each took to compile in ms|
+|`--trace-dispatch={stderr\|name}`      |Print precompile statements for methods dispatched during execution or save to stderr or a path.|
+|`--trace-eval[={no*\|loc\|full}]`      |Show top-level expressions being evaluated. `loc` shows location info only, `full` shows full expressions (omitting setting is equivalent to `loc`). Only shows the outermost expression being evaluated, not internal function calls. See also [`Base.TRACE_EVAL`](@ref).|
 |`--image-codegen`                      |Force generate code in imaging mode|
+|`--permalloc-pkgimg={yes\|no*}`        |Copy the data section of package images into memory|
+|`--trim={no*\|safe\|unsafe\|unsafe-warn}` |Build a sysimage including only code provably reachable from methods marked by calling `entrypoint`. The three non-default options differ in how they handle dynamic call sites. In safe mode, such sites result in compile-time errors. In unsafe mode, such sites are allowed but the resulting binary might be missing needed code and can throw runtime errors. With unsafe-warn, such sites will trigger warnings at compile-time and might error at runtime.|
+
+Options that have the form `--option={...}` can be specified either as `--option=value` or as `--option value`. For example, `julia --banner=no` is equivalent to `julia --banner no`. This is especially relevant for options that take a filename for output, because forgetting to specifying the argument for (say) `--trace-compile` will cause the option following it to be interpreted as the filename, possibly unintentionally overwriting it.
 
+Note that options of the form `--option[=...]` can **not** be specified as `--option value`, but only as `--option=value` (or simply `--option`, when no argument is provided).
 
 !!! compat "Julia 1.1"
     In Julia 1.0, the default `--project=@.` option did not search up from the root
diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md
index 9cab2ed1e4f24..d1d6ffeca245f 100644
--- a/doc/src/manual/complex-and-rational-numbers.md
+++ b/doc/src/manual/complex-and-rational-numbers.md
@@ -254,13 +254,30 @@ julia> float(3//4)
 ```
 
 Conversion from rational to floating-point respects the following identity for any integral values
-of `a` and `b`, with the exception of the two cases `b == 0` and `a == 0 && b < 0`:
+of `a` and `b`, except when `a==0 && b <= 0`:
 
 ```jldoctest
 julia> a = 1; b = 2;
 
 julia> isequal(float(a//b), a/b)
 true
+
+julia> a, b = 0, 0
+(0, 0)
+
+julia> float(a//b)
+ERROR: ArgumentError: invalid rational: zero(Int64)//zero(Int64)
+Stacktrace:
+[...]
+
+julia> a/b
+NaN
+
+julia> a, b = 0, -1
+(0, -1)
+
+julia> float(a//b), a/b
+(0.0, -0.0)
 ```
 
 Constructing infinite rational values is acceptable:
diff --git a/doc/src/manual/constructors.md b/doc/src/manual/constructors.md
index 6ec206dade335..ff354a923610c 100644
--- a/doc/src/manual/constructors.md
+++ b/doc/src/manual/constructors.md
@@ -293,6 +293,8 @@ Point{Float64}(1.0, 2.5)
 
 julia> Point(1,2.5) ## implicit T ##
 ERROR: MethodError: no method matching Point(::Int64, ::Float64)
+The type `Point` exists, but no method is defined for this combination of argument types when trying to construct it.
+
 Closest candidates are:
   Point(::T, ::T) where T<:Real at none:2
 
@@ -372,10 +374,13 @@ However, other similar calls still don't work:
 ```jldoctest parametric2
 julia> Point(1.5,2)
 ERROR: MethodError: no method matching Point(::Float64, ::Int64)
+The type `Point` exists, but no method is defined for this combination of argument types when trying to construct it.
 
 Closest candidates are:
   Point(::T, !Matched::T) where T<:Real
    @ Main none:1
+  Point(!Matched::Int64, !Matched::Float64)
+   @ Main none:1
 
 Stacktrace:
 [...]
@@ -491,6 +496,7 @@ operator, which provides a syntax for writing rationals (e.g. `1 ⊘ 2`). Julia'
 type uses the [`//`](@ref) operator for this purpose. Before these definitions, `⊘`
 is a completely undefined operator with only syntax and no meaning. Afterwards, it behaves just
 as described in [Rational Numbers](@ref) -- its entire behavior is defined in these few lines.
+Note that the infix use of `⊘` works because Julia has a set of symbols that are recognized to be infix operators.
 The first and most basic definition just makes `a ⊘ b` construct a `OurRational` by applying the
 `OurRational` constructor to `a` and `b` when they are integers. When one of the operands of `⊘`
 is already a rational number, we construct a new rational for the resulting ratio slightly differently;
@@ -555,6 +561,7 @@ julia> struct SummedArray{T<:Number,S<:Number}
 
 julia> SummedArray(Int32[1; 2; 3], Int32(6))
 ERROR: MethodError: no method matching SummedArray(::Vector{Int32}, ::Int32)
+The type `SummedArray` exists, but no method is defined for this combination of argument types when trying to construct it.
 
 Closest candidates are:
   SummedArray(::Vector{T}) where T
@@ -568,3 +575,32 @@ This constructor will be invoked by the syntax `SummedArray(a)`. The syntax `new
 specifying parameters for the type to be constructed, i.e. this call will return a `SummedArray{T,S}`.
 `new{T,S}` can be used in any constructor definition, but for convenience the parameters
 to `new{}` are automatically derived from the type being constructed when possible.
+
+## Constructors are just callable objects
+
+An object of any type may be [made callable](@ref "Function-like objects") by defining a
+method. This includes types, i.e., objects of type [`Type`](@ref); and constructors may,
+in fact, be viewed as just callable type objects. For example, there are many methods
+defined on `Bool` and various supertypes of it:
+
+```@repl
+methods(Bool)
+```
+
+The usual constructor syntax is exactly equivalent to the function-like object
+syntax, so trying to define a method with each syntax will cause the first method
+to be overwritten by the next one:
+
+```jldoctest
+julia> struct S
+           f::Int
+       end
+
+julia> S() = S(7)
+S
+
+julia> (::Type{S})() = S(8)  # overwrites the previous constructor method
+
+julia> S()
+S(8)
+```
diff --git a/doc/src/manual/control-flow.md b/doc/src/manual/control-flow.md
index 5d12530892b1e..04bb1ea621f33 100644
--- a/doc/src/manual/control-flow.md
+++ b/doc/src/manual/control-flow.md
@@ -139,7 +139,7 @@ julia> test(1,2)
 x is less than y.
 
 julia> test(2,1)
-ERROR: UndefVarError: `relation` not defined
+ERROR: UndefVarError: `relation` not defined in local scope
 Stacktrace:
  [1] test(::Int64, ::Int64) at ./none:7
 ```
@@ -248,7 +248,7 @@ no
 ## Short-Circuit Evaluation
 
 The `&&` and `||` operators in Julia correspond to logical “and” and “or” operations, respectively,
-and are typically used for this purpose.  However, they have an additional property of *short-circuit*
+and are typically used for this purpose. However, they have an additional property of *short-circuit*
 evaluation: they don't necessarily evaluate their second argument, as explained below.  (There
 are also bitwise `&` and `|` operators that can be used as logical “and” and “or” *without*
 short-circuit behavior, but beware that `&` and `|` have higher precedence than `&&` and `||` for evaluation order.)
@@ -397,7 +397,7 @@ julia> while i <= 3
 3
 ```
 
-The `while` loop evaluates the condition expression (`i <= 5` in this case), and as long it remains
+The `while` loop evaluates the condition expression (`i <= 3` in this case), and as long it remains
 `true`, keeps also evaluating the body of the `while` loop. If the condition expression is `false`
 when the `while` loop is first reached, the body is never evaluated.
 
@@ -414,8 +414,33 @@ julia> for i = 1:3
 3
 ```
 
-Here the `1:3` is a range object, representing the sequence of numbers 1, 2, 3. The `for`
-loop iterates through these values, assigning each one in turn to the variable `i`. One rather
+Here the `1:3` is a [`range`](@ref) object, representing the sequence of numbers 1, 2, 3. The `for`
+loop iterates through these values, assigning each one in turn to the variable `i`.
+In general, the `for` construct can loop over any "iterable" object (or "container"), from a  range like `1:3` or `1:3:13` (a [`StepRange`](@ref) indicating every 3rd integer 1, 4, 7, …, 13) to more generic containers like arrays, including [iterators defined by user code](@ref man-interface-iteration)
+or external packages. For containers other than ranges, the alternative
+(but fully equivalent) keyword `in` or `∈` is typically used instead of `=`, since it makes
+the code read more clearly:
+
+```jldoctest
+julia> for i in [1,4,0]
+           println(i)
+       end
+1
+4
+0
+
+julia> for s ∈ ["foo","bar","baz"]
+           println(s)
+       end
+foo
+bar
+baz
+```
+
+Various types of iterable containers will be introduced and discussed in later sections of the
+manual (see, e.g., [Multi-dimensional Arrays](@ref man-multi-dim-arrays)).
+
+One rather
 important distinction between the previous `while` loop form and the `for` loop form is the scope
 during which the variable is visible. A `for` loop always introduces a new iteration variable in
 its body, regardless of whether a variable of the same name exists in the enclosing scope.
@@ -433,7 +458,7 @@ julia> for j = 1:3
 3
 
 julia> j
-ERROR: UndefVarError: `j` not defined
+ERROR: UndefVarError: `j` not defined in `Main`
 ```
 
 ```jldoctest
@@ -455,29 +480,6 @@ Use `for outer` to modify the latter behavior and reuse an existing local variab
 See [Scope of Variables](@ref scope-of-variables) for a detailed explanation of variable scope, [`outer`](@ref), and how it works in
 Julia.
 
-In general, the `for` loop construct can iterate over any container. In these cases, the alternative
-(but fully equivalent) keyword `in` or `∈` is typically used instead of `=`, since it makes
-the code read more clearly:
-
-```jldoctest
-julia> for i in [1,4,0]
-           println(i)
-       end
-1
-4
-0
-
-julia> for s ∈ ["foo","bar","baz"]
-           println(s)
-       end
-foo
-bar
-baz
-```
-
-Various types of iterable containers will be introduced and discussed in later sections of the
-manual (see, e.g., [Multi-dimensional Arrays](@ref man-multi-dim-arrays)).
-
 It is sometimes convenient to terminate the repetition of a `while` before the test condition
 is falsified or stop iterating in a `for` loop before the end of the iterable object is reached.
 This can be accomplished with the `break` keyword:
@@ -599,6 +601,7 @@ below all interrupt the normal flow of control.
 | [`DomainError`](@ref)         |
 | [`EOFError`](@ref)            |
 | [`ErrorException`](@ref)      |
+| [`FieldError`](@ref)          |
 | [`InexactError`](@ref)        |
 | [`InitError`](@ref)           |
 | [`InterruptException`](@ref)  |
@@ -637,11 +640,11 @@ julia> struct MyCustomException <: Exception end
 ### The [`throw`](@ref) function
 
 Exceptions can be created explicitly with [`throw`](@ref). For example, a function defined only
-for nonnegative numbers could be written to [`throw`](@ref) a [`DomainError`](@ref) if the argument
+for non-negative numbers could be written to [`throw`](@ref) a [`DomainError`](@ref) if the argument
 is negative:
 
 ```jldoctest; filter = r"Stacktrace:(\n \[[0-9]+\].*)*"
-julia> f(x) = x>=0 ? exp(-x) : throw(DomainError(x, "argument must be nonnegative"))
+julia> f(x) = x>=0 ? exp(-x) : throw(DomainError(x, "argument must be non-negative"))
 f (generic function with 1 method)
 
 julia> f(1)
@@ -649,7 +652,7 @@ julia> f(1)
 
 julia> f(-1)
 ERROR: DomainError with -1:
-argument must be nonnegative
+argument must be non-negative
 Stacktrace:
  [1] f(::Int64) at ./none:1
 ```
@@ -782,6 +785,8 @@ julia> sqrt_second(x) = try
                sqrt(complex(x[2], 0))
            elseif isa(y, BoundsError)
                sqrt(x)
+           else
+               rethrow() # ensure other exceptions can bubble up the call stack
            end
        end
 sqrt_second (generic function with 1 method)
@@ -800,8 +805,18 @@ ERROR: DomainError with -9.0:
 sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
+
+julia> sqrt_second([1 nothing])
+ERROR: MethodError: no method matching sqrt(::Nothing)
+The function `sqrt` exists, but no method is defined for this combination of argument types.
+[...]
 ```
 
+Use [`rethrow`](@ref) as above to continue unwinding the stack with the original exception so that
+higher-level exception handlers can deal with the exception. When filtering by exception type
+as above, it is often important to include `else rethrow()` so that other types of exceptions
+are not hidden from the caller.
+
 Note that the symbol following `catch` will always be interpreted as a name for the exception,
 so care is needed when writing `try/catch` expressions on a single line. The following code will
 *not* work to return the value of `x` in case of an error:
@@ -824,7 +839,7 @@ end
 The power of the `try/catch` construct lies in the ability to unwind a deeply nested computation
 immediately to a much higher level in the stack of calling functions. There are situations where
 no error has occurred, but the ability to unwind the stack and pass a value to a higher level
-is desirable. Julia provides the [`rethrow`](@ref), [`backtrace`](@ref), [`catch_backtrace`](@ref)
+is desirable. Julia provides the [`backtrace`](@ref), [`catch_backtrace`](@ref)
 and [`current_exceptions`](@ref) functions for more advanced error handling.
 
 ### `else` Clauses
@@ -860,7 +875,8 @@ end
            else
                foo
            end
-    ERROR: UndefVarError: `foo` not defined
+    ERROR: UndefVarError: `foo` not defined in `Main`
+    Suggestion: check for spelling errors or missing imports.
     ```
     Use the [`local` keyword](@ref local-scope) outside the `try` block to make the variable
     accessible from anywhere within the outer scope.
@@ -889,6 +905,41 @@ When control leaves the `try` block (for example due to a `return`, or just fini
 continue propagating. A `catch` block may be combined with `try` and `finally` as well. In this
 case the `finally` block will run after `catch` has handled the error.
 
+When evaluating a `try/catch/else/finally` expression, the value of the entire
+expression is the value of the last block executed, excluding the `finally`
+block. For example:
+
+```jldoctest
+julia> try
+           1
+       finally
+           2
+       end
+1
+
+julia> try
+           error("")
+       catch
+           1
+       else
+           2
+       finally
+           3
+       end
+1
+
+julia> try
+           0
+       catch
+           1
+       else
+           2
+       finally
+           3
+       end
+2
+```
+
 ## [Tasks (aka Coroutines)](@id man-tasks)
 
 Tasks are a control flow feature that allows computations to be suspended and resumed in a flexible
diff --git a/doc/src/manual/conversion-and-promotion.md b/doc/src/manual/conversion-and-promotion.md
index f0c156f21ea62..9f785a560bfcc 100644
--- a/doc/src/manual/conversion-and-promotion.md
+++ b/doc/src/manual/conversion-and-promotion.md
@@ -165,6 +165,7 @@ constructor.
 Such a definition might look like this:
 
 ```julia
+import Base: convert
 convert(::Type{MyType}, x) = MyType(x)
 ```
 
@@ -195,6 +196,8 @@ convert(::Type{T}, x::T) where {T<:Number} = x
 
 Similar definitions exist for `AbstractString`, [`AbstractArray`](@ref), and [`AbstractDict`](@ref).
 
+
+
 ## Promotion
 
 Promotion refers to converting values of mixed types to a single common type. Although it is not
@@ -291,6 +294,7 @@ another type object, such that instances of the argument types will be promoted
 type. Thus, by defining the rule:
 
 ```julia
+import Base: promote_rule
 promote_rule(::Type{Float64}, ::Type{Float32}) = Float64
 ```
 
@@ -336,6 +340,7 @@ Finally, we finish off our ongoing case study of Julia's rational number type, w
 sophisticated use of the promotion mechanism with the following promotion rules:
 
 ```julia
+import Base: promote_rule
 promote_rule(::Type{Rational{T}}, ::Type{S}) where {T<:Integer,S<:Integer} = Rational{promote_type(T,S)}
 promote_rule(::Type{Rational{T}}, ::Type{Rational{S}}) where {T<:Integer,S<:Integer} = Rational{promote_type(T,S)}
 promote_rule(::Type{Rational{T}}, ::Type{S}) where {T<:Integer,S<:AbstractFloat} = promote_type(T,S)
diff --git a/doc/src/manual/distributed-computing.md b/doc/src/manual/distributed-computing.md
index 4531506d5c49d..43fb9405fdb3d 100644
--- a/doc/src/manual/distributed-computing.md
+++ b/doc/src/manual/distributed-computing.md
@@ -48,7 +48,7 @@ Generally it makes sense for `n` to equal the number of CPU threads (logical cor
 argument implicitly loads module [`Distributed`](@ref man-distributed).
 
 
-```julia
+```julia-repl
 $ julia -p 2
 
 julia> r = remotecall(rand, 2, 2, 2)
@@ -58,7 +58,7 @@ julia> s = @spawnat 2 1 .+ fetch(r)
 Future(2, 1, 5, nothing)
 
 julia> fetch(s)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  1.18526  1.50912
  1.16296  1.60607
 ```
@@ -106,7 +106,7 @@ julia> s = @spawnat :any 1 .+ fetch(r)
 Future(3, 1, 5, nothing)
 
 julia> fetch(s)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  1.38854  1.9098
  1.20939  1.57158
 ```
@@ -123,7 +123,7 @@ An important thing to remember is that, once fetched, a [`Future`](@ref Distribu
 locally. Further [`fetch`](@ref) calls do not entail a network hop. Once all referencing [`Future`](@ref Distributed.Future)s
 have fetched, the remote stored value is deleted.
 
-[`@async`](@ref) is similar to [`@spawnat`](@ref), but only runs tasks on the local process. We
+[`Threads.@spawn`](@ref) is similar to [`@spawnat`](@ref), but only runs tasks on the local process. We
 use it to create a "feeder" task for each process. Each task picks the next index that needs to
 be computed, then waits for its process to finish, then repeats until we run out of indices. Note
 that the feeder tasks do not begin to execute until the main task reaches the end of the [`@sync`](@ref)
@@ -153,12 +153,12 @@ julia> function rand2(dims...)
        end
 
 julia> rand2(2,2)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  0.153756  0.368514
  1.15119   0.918912
 
 julia> fetch(@spawnat :any rand2(2,2))
-ERROR: RemoteException(2, CapturedException(UndefVarError(Symbol("#rand2"))
+ERROR: RemoteException(2, CapturedException(UndefVarError(Symbol("#rand2"))))
 Stacktrace:
 [...]
 ```
@@ -186,7 +186,7 @@ end
 ```
 
 In order to refer to `MyType` across all processes, `DummyModule.jl` needs to be loaded on
-every process.  Calling `include("DummyModule.jl")` loads it only on a single process.  To
+every process. Calling `include("DummyModule.jl")` loads it only on a single process. To
 load it on every process, use the [`@everywhere`](@ref) macro (starting Julia with `julia -p
 2`):
 
@@ -197,8 +197,8 @@ loaded
       From worker 2:    loaded
 ```
 
-As usual, this does not bring `DummyModule` into scope on any of the process, which requires
-[`using`](@ref) or [`import`](@ref).  Moreover, when `DummyModule` is brought into scope on one process, it
+As usual, this does not bring `DummyModule` into scope on any of the processes, which requires
+[`using`](@ref) or [`import`](@ref). Moreover, when `DummyModule` is brought into scope on one process, it
 is not on any other:
 
 ```julia-repl
@@ -209,7 +209,7 @@ MyType(7)
 
 julia> fetch(@spawnat 2 MyType(7))
 ERROR: On worker 2:
-UndefVarError: `MyType` not defined
+UndefVarError: `MyType` not defined in `Main`
 ⋮
 
 julia> fetch(@spawnat 2 DummyModule.MyType(7))
@@ -262,7 +262,7 @@ as a programmatic means of adding, removing and querying the processes in a clus
 julia> using Distributed
 
 julia> addprocs(2)
-2-element Array{Int64,1}:
+2-element Vector{Int64}:
  2
  3
 ```
@@ -270,10 +270,11 @@ julia> addprocs(2)
 Module [`Distributed`](@ref man-distributed) must be explicitly loaded on the master process before invoking [`addprocs`](@ref).
 It is automatically made available on the worker processes.
 
-Note that workers do not run a `~/.julia/config/startup.jl` startup script, nor do they synchronize
-their global state (such as global variables, new method definitions, and loaded modules) with any
-of the other running processes. You may use `addprocs(exeflags="--project")` to initialize a worker with
-a particular environment, and then `@everywhere using <modulename>` or `@everywhere include("file.jl")`.
+!!! note
+    Note that workers do not run a `~/.julia/config/startup.jl` startup script, nor do they synchronize
+    their global state (such as command-line switches, global variables, new method definitions, and loaded modules) with any
+    of the other running processes. You may use `addprocs(exeflags="--project")` to initialize a worker with
+    a particular environment, and then `@everywhere using <modulename>` or `@everywhere include("file.jl")`.
 
 Other types of clusters can be supported by writing your own custom `ClusterManager`, as described
 below in the [ClusterManagers](@ref) section.
@@ -539,9 +540,72 @@ Methods [`put!`](@ref), [`take!`](@ref), [`fetch`](@ref), [`isready`](@ref) and
 on a [`RemoteChannel`](@ref) are proxied onto the backing store on the remote process.
 
 [`RemoteChannel`](@ref) can thus be used to refer to user implemented `AbstractChannel` objects.
-A simple example of this is provided in `dictchannel.jl` in the
-[Examples repository](https://github.com/JuliaAttic/Examples), which uses a dictionary as its
-remote store.
+A simple example of this is the following `DictChannel` which uses a dictionary as its
+remote store:
+
+```jldoctest
+julia> struct DictChannel{T} <: AbstractChannel{T}
+           d::Dict
+           cond_take::Threads.Condition    # waiting for data to become available
+           DictChannel{T}() where {T} = new(Dict(), Threads.Condition())
+           DictChannel() = DictChannel{Any}()
+       end
+
+julia> begin
+       function Base.put!(D::DictChannel, k, v)
+           @lock D.cond_take begin
+               D.d[k] = v
+               notify(D.cond_take)
+           end
+           return D
+       end
+       function Base.take!(D::DictChannel, k)
+           @lock D.cond_take begin
+               v = fetch(D, k)
+               delete!(D.d, k)
+               return v
+           end
+       end
+       Base.isready(D::DictChannel) = @lock D.cond_take !isempty(D.d)
+       Base.isready(D::DictChannel, k) = @lock D.cond_take haskey(D.d, k)
+       function Base.fetch(D::DictChannel, k)
+           @lock D.cond_take begin
+               wait(D, k)
+               return D.d[k]
+           end
+       end
+       function Base.wait(D::DictChannel, k)
+           @lock D.cond_take begin
+               while !isready(D, k)
+                   wait(D.cond_take)
+               end
+           end
+       end
+       end;
+
+julia> d = DictChannel();
+
+julia> isready(d)
+false
+
+julia> put!(d, :k, :v);
+
+julia> isready(d, :k)
+true
+
+julia> fetch(d, :k)
+:v
+
+julia> wait(d, :k)
+
+julia> take!(d, :k)
+:v
+
+julia> isready(d, :k)
+false
+```
+
+
 
 
 ## Channels and RemoteChannels
@@ -593,15 +657,15 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
+julia> errormonitor(Threads.@spawn make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for p in workers() # start tasks on the workers to process requests in parallel
            remote_do(do_work, p, jobs, results)
        end
 
 julia> @elapsed while n > 0 # print out results
-           job_id, exec_time, where = take!(results)
-           println("$job_id finished in $(round(exec_time; digits=2)) seconds on worker $where")
+           job_id, exec_time, worker_id = take!(results)
+           println("$job_id finished in $(round(exec_time; digits=2)) seconds on worker $worker_id")
            global n = n - 1
        end
 1 finished in 0.18 seconds on worker 4
@@ -670,7 +734,7 @@ serialization/deserialization of data. Consequently, the call refers to the same
 as passed - no copies are created. This behavior is highlighted below:
 
 ```julia-repl
-julia> using Distributed;
+julia> using Distributed
 
 julia> rc = RemoteChannel(()->Channel(3));   # RemoteChannel created on local node
 
@@ -684,7 +748,7 @@ julia> for i in 1:3
 julia> result = [take!(rc) for _ in 1:3];
 
 julia> println(result);
-Array{Int64,1}[[3], [3], [3]]
+[[3], [3], [3]]
 
 julia> println("Num Unique objects : ", length(unique(map(objectid, result))));
 Num Unique objects : 1
@@ -703,7 +767,7 @@ julia> for i in 1:3
 julia> result = [take!(rc) for _ in 1:3];
 
 julia> println(result);
-Array{Int64,1}[[1], [2], [3]]
+[[1], [2], [3]]
 
 julia> println("Num Unique objects : ", length(unique(map(objectid, result))));
 Num Unique objects : 3
@@ -750,16 +814,18 @@ will always operate on copies of arguments.
 
 ## [Shared Arrays](@id man-shared-arrays)
 
-Shared Arrays use system shared memory to map the same array across many processes. While there
-are some similarities to a [`DArray`](https://github.com/JuliaParallel/DistributedArrays.jl), the
-behavior of a [`SharedArray`](@ref) is quite different. In a [`DArray`](https://github.com/JuliaParallel/DistributedArrays.jl),
-each process has local access to just a chunk of the data, and no two processes share the same
-chunk; in contrast, in a [`SharedArray`](@ref) each "participating" process has access to the
-entire array.  A [`SharedArray`](@ref) is a good choice when you want to have a large amount of
-data jointly accessible to two or more processes on the same machine.
+Shared Arrays use system shared memory to map the same array across many processes. A
+[`SharedArray`](@ref) is a good choice when you want to have a large amount of data jointly
+accessible to two or more processes on the same machine. Shared Array support is available via the
+module `SharedArrays`, which must be explicitly loaded on all participating workers.
 
-Shared Array support is available via module `SharedArrays` which must be explicitly loaded on
-all participating workers.
+A complementary data structure is provided by the external package
+[`DistributedArrays.jl`](https://github.com/JuliaParallel/DistributedArrays.jl) in the form of a
+`DArray`. While there are some similarities to a [`SharedArray`](@ref), the behavior of a
+[`DArray`](https://github.com/JuliaParallel/DistributedArrays.jl) is quite different. In a
+[`SharedArray`](@ref), each "participating" process has access to the entire array; in contrast, in
+a [`DArray`](https://github.com/JuliaParallel/DistributedArrays.jl), each process has local access
+to just a chunk of the data, and no two processes share the same chunk.
 
 [`SharedArray`](@ref) indexing (assignment and accessing values) works just as with regular arrays,
 and is efficient because the underlying memory is available to the local process. Therefore,
@@ -789,7 +855,7 @@ Here's a brief example:
 julia> using Distributed
 
 julia> addprocs(3)
-3-element Array{Int64,1}:
+3-element Vector{Int64}:
  2
  3
  4
@@ -797,7 +863,7 @@ julia> addprocs(3)
 julia> @everywhere using SharedArrays
 
 julia> S = SharedArray{Int,2}((3,4), init = S -> S[localindices(S)] = repeat([myid()], length(localindices(S))))
-3×4 SharedArray{Int64,2}:
+3×4 SharedMatrix{Int64}:
  2  2  3  4
  2  3  3  4
  2  3  4  4
@@ -806,7 +872,7 @@ julia> S[3,2] = 7
 7
 
 julia> S
-3×4 SharedArray{Int64,2}:
+3×4 SharedMatrix{Int64}:
  2  2  3  4
  2  3  3  4
  2  7  4  4
@@ -818,7 +884,7 @@ you wish:
 
 ```julia-repl
 julia> S = SharedArray{Int,2}((3,4), init = S -> S[indexpids(S):length(procs(S)):length(S)] = repeat([myid()], length( indexpids(S):length(procs(S)):length(S))))
-3×4 SharedArray{Int64,2}:
+3×4 SharedMatrix{Int64}:
  2  2  2  2
  3  3  3  3
  4  4  4  4
@@ -830,7 +896,7 @@ conflicts. For example:
 ```julia
 @sync begin
     for p in procs(S)
-        @async begin
+        Threads.@spawn begin
             remotecall_wait(fill!, p, S, p)
         end
     end
@@ -912,7 +978,7 @@ and one that delegates in chunks:
 julia> function advection_shared!(q, u)
            @sync begin
                for p in procs(q)
-                   @async remotecall_wait(advection_shared_chunk!, p, q, u)
+                   Threads.@spawn remotecall_wait(advection_shared_chunk!, p, q, u)
                end
            end
            q
@@ -1263,8 +1329,11 @@ in future releases.
 ## Noteworthy external packages
 
 Outside of Julia parallelism there are plenty of external packages that should be mentioned.
-For example [MPI.jl](https://github.com/JuliaParallel/MPI.jl) is a Julia wrapper for the `MPI` protocol, [Dagger.jl](https://github.com/JuliaParallel/Dagger.jl) provides functionality similar to Python's [Dask](https://dask.org/), and
-[DistributedArrays.jl](https://github.com/JuliaParallel/Distributedarrays.jl) provides array operations distributed across workers, as presented in [Shared Arrays](@ref).
+For example, [`MPI.jl`](https://github.com/JuliaParallel/MPI.jl) is a Julia wrapper for the `MPI`
+protocol, [`Dagger.jl`](https://github.com/JuliaParallel/Dagger.jl) provides functionality similar to
+Python's [Dask](https://dask.org/), and
+[`DistributedArrays.jl`](https://github.com/JuliaParallel/Distributedarrays.jl) provides array
+operations distributed across workers, as [outlined above](@ref man-shared-arrays).
 
 A mention must be made of Julia's GPU programming ecosystem, which includes:
 
@@ -1302,7 +1371,7 @@ julia> all(C .≈ 4*π)
 true
 
 julia> typeof(C)
-Array{Float64,1}
+Vector{Float64} (alias for Array{Float64, 1})
 
 julia> dB = distribute(B);
 
@@ -1314,7 +1383,7 @@ julia> all(dC .≈ 4*π)
 true
 
 julia> typeof(dC)
-DistributedArrays.DArray{Float64,1,Array{Float64,1}}
+DistributedArrays.DArray{Float64,1,Vector{Float64}}
 
 julia> cuB = CuArray(B);
 
@@ -1350,7 +1419,7 @@ function declaration, let's see if it works with the aforementioned datatypes:
 julia> M = [2. 1; 1 1];
 
 julia> v = rand(2)
-2-element Array{Float64,1}:
+2-element Vector{Float64}:
 0.40395
 0.445877
 
@@ -1373,7 +1442,7 @@ julia> dv = distribute(v);
 julia> dC = power_method(dM, dv);
 
 julia> typeof(dC)
-Tuple{DistributedArrays.DArray{Float64,1,Array{Float64,1}},Float64}
+Tuple{DistributedArrays.DArray{Float64,1,Vector{Float64}},Float64}
 ```
 
 To end this short exposure to external packages, we can consider `MPI.jl`, a Julia wrapper
diff --git a/doc/src/manual/documentation.md b/doc/src/manual/documentation.md
index 4c724e1deaaeb..dff410c74aa2e 100644
--- a/doc/src/manual/documentation.md
+++ b/doc/src/manual/documentation.md
@@ -19,7 +19,11 @@ environments provide a way to access documentation directly:
 - In [Juno](https://junolab.org) using `Ctrl-J, Ctrl-D` will show the documentation for the object
   under the cursor.
 
-## Writing Documentation
+
+`Docs.hasdoc(module, name)::Bool` tells whether a name has a docstring. `Docs.undocumented_names(module; all)`
+returns the undocumented names in a module.
+
+## [Writing Documentation](@id man-writing-documentation)
 
 Julia enables package developers and users to document functions, types and other objects easily
 via a built-in documentation system.
@@ -30,9 +34,11 @@ The basic syntax is simple: any string appearing just before an object
 the documented object. Here is a basic example:
 
 ```julia
-"Tell whether there are too foo items in the array."
+"Tell whether there are too many foo items in the array."
 foo(xs::Array) = ...
 ```
+!!! note "Reminder"
+    Any empty lines between the docstring and the object being documented detach the former from the latter, making the docstring ineffective.
 
 Documentation is interpreted as [Markdown](https://en.wikipedia.org/wiki/Markdown), so you can
 use indentation and code fences to delimit code examples from text. Technically, any object can
@@ -77,6 +83,35 @@ As in the example above, we recommend following some simple conventions when wri
    accepts many keyword arguments, only include a `<keyword arguments>` placeholder in the signature
    (i.e. `f(x; <keyword arguments>)`), and give the complete list under an `# Arguments` section
    (see point 4 below).
+
+   Use this style to document the return type or give the return value a name:
+
+   ```julia
+   # Naming the return value or its type is not necessary (this is the most common case)
+   """
+      sum(itr; [init])
+
+   ...
+   """
+
+   # The return type is easily documented and critical to the semantics of this function
+   """
+      vec(x::AbstractArray)::AbstractVector
+
+   ...
+   """
+
+   # Naming and/or destructuring the return value clarifies the semantics of this function
+   """
+      splitdir(path::AbstractString) -> (dir::AbstractString, file::AbstractString)
+   ...
+   """
+   ```
+   When included, a return type should be written after the signature, separated by `::`,
+   while a named return value should be separated by ` -> `, with a space on both sides.
+   Return types and return values should be valid Julia expressions when possible.
+   Macro docstring signatures that annotate return types or return values should use
+   parentheses to clarify where the macro arguments end and return type or return value begins.
 2. Include a single one-line sentence describing what the function does or what the object represents
    after the simplified signature block. If needed, provide more details in a second paragraph, after
    a blank line.
@@ -138,7 +173,7 @@ As in the example above, we recommend following some simple conventions when wri
    # Examples
    ```jldoctest
    julia> a = [1 2; 3 4]
-   2×2 Array{Int64,2}:
+   2×2 Matrix{Int64}:
     1  2
     3  4
    ```
@@ -213,12 +248,12 @@ As in the example above, we recommend following some simple conventions when wri
 
    Docstrings are edited using the same tools as code. Therefore, the same conventions should apply.
    It is recommended that lines are at most 92 characters wide.
-6. Provide information allowing custom types to implement the function in an
+10. Provide information allowing custom types to implement the function in an
    `# Implementation` section. These implementation details are intended for developers
    rather than users, explaining e.g. which functions should be overridden and which
    functions automatically use appropriate fallbacks. Such details are best kept separate
    from the main description of the function's behavior.
-5. For long docstrings, consider splitting the documentation with an
+11. For long docstrings, consider splitting the documentation with an
    `# Extended help` header. The typical help-mode will show only the
    material above the header; you can access the full help by adding a '?'
    at the beginning of the expression (i.e., "??foo" rather than "?foo").
@@ -298,20 +333,25 @@ documentation between different versions of a function:
 @doc (@doc foo!) foo
 ```
 
+!!! compat "Julia 1.11"
+    In Julia 1.11 and newer, retrieving documentation with the `@doc` macro requires that
+    the `REPL` stdlib is loaded.
+
 Or for use with Julia's metaprogramming functionality:
 
 ```julia
 for (f, op) in ((:add, :+), (:subtract, :-), (:multiply, :*), (:divide, :/))
     @eval begin
-        $f(a,b) = $op(a,b)
+        $f(a, b) = $op(a, b)
     end
 end
-@doc "`add(a,b)` adds `a` and `b` together" add
-@doc "`subtract(a,b)` subtracts `b` from `a`" subtract
+@doc "`add(a, b)` adds `a` and `b` together" add
+@doc "`subtract(a, b)` subtracts `b` from `a`" subtract
 ```
 
-Documentation in non-toplevel blocks, such as `begin`, `if`, `for`, and `let`, should be
-added to the documentation system via `@doc` as well. For example:
+Documentation in non-toplevel blocks, such as `begin`, `if`, `for`, `let`, and
+inner constructors, should be added to the documentation system via `@doc` as
+well. For example:
 
 ```julia
 if condition()
@@ -402,7 +442,7 @@ f(x) = x
 
 "..."
 function f(x)
-    x
+    return x
 end
 
 "..."
@@ -429,10 +469,13 @@ Adds docstring `"..."` to the `@m(::Any)` macro definition.
 
 ```julia
 "..."
-:(@m)
+:(@m1)
+
+"..."
+macro m2 end
 ```
 
-Adds docstring `"..."` to the macro named `@m`.
+Adds docstring `"..."` to the macros named `@m1` and `@m2`.
 
 ### Types
 
@@ -453,6 +496,20 @@ end
 
 Adds the docstring `"..."` to types `T1`, `T2`, and `T3`.
 
+```
+"..."
+T1
+
+"..."
+T2
+
+"..."
+T3
+```
+
+Adds the docstring `"..."` to types `T1`, `T2`, and `T3`.
+The previous version is the preferred syntax, however both are equivalent.
+
 ```julia
 "..."
 struct T
@@ -460,11 +517,17 @@ struct T
     x
     "y"
     y
+
+    @doc "Inner constructor"
+    function T()
+        new(...)
+    end
 end
 ```
 
-Adds docstring `"..."` to type `T`, `"x"` to field `T.x` and `"y"` to field `T.y`. Also applicable
-to `mutable struct` types.
+Adds docstring `"..."` to type `T`, `"x"` to field `T.x`, `"y"` to field `T.y`,
+and `"Inner constructor"` to the inner constructor `T()`. Also applicable to
+`mutable struct` types.
 
 ### Modules
 
@@ -483,6 +546,20 @@ end
 Adds docstring `"..."` to the `Module` `M`. Adding the docstring above the `Module` is the preferred
 syntax, however both are equivalent.
 
+The module docstring is evaluated *inside* the scope of the module, allowing
+access to all the symbols defined in and imported into the module:
+
+```julia
+"The magic number is $(MAGIC)."
+module DocStringEval
+const MAGIC = 42
+end
+```
+
+Documenting a `baremodule` by placing a docstring above the expression automatically imports
+`@doc` into the module. These imports must be done manually when the module expression is not
+documented:
+
 ```julia
 "..."
 baremodule M
@@ -499,10 +576,6 @@ f(x) = x
 end
 ```
 
-Documenting a `baremodule` by placing a docstring above the expression automatically imports
-`@doc` into the module. These imports must be done manually when the module expression is not
-documented.
-
 ### Global Variables
 
 ```julia
diff --git a/doc/src/manual/embedding.md b/doc/src/manual/embedding.md
index 2b6e48c533849..1c47777c9777c 100644
--- a/doc/src/manual/embedding.md
+++ b/doc/src/manual/embedding.md
@@ -54,7 +54,7 @@ linking against `libjulia`.
 The first thing that must be done before calling any other Julia C function is to
 initialize Julia. This is done by calling `jl_init`, which tries to automatically determine
 Julia's install location. If you need to specify a custom location, or specify which system
-image to load, use `jl_init_with_image` instead.
+image to load, use `jl_init_with_image_file` or `jl_init_with_image_handle` instead.
 
 The second statement in the test program evaluates a Julia statement using a call to `jl_eval_string`.
 
@@ -228,7 +228,7 @@ passing arguments computed in C to Julia. For this you will need to invoke Julia
 using `jl_call`:
 
 ```c
-jl_function_t *func = jl_get_function(jl_base_module, "sqrt");
+jl_value_t *func = jl_get_function(jl_base_module, "sqrt");
 jl_value_t *argument = jl_box_float64(2.0);
 jl_value_t *ret = jl_call1(func, argument);
 ```
@@ -240,14 +240,14 @@ the function is called using `jl_call1`. `jl_call0`, `jl_call2`, and `jl_call3`
 exist, to conveniently handle different numbers of arguments. To pass more arguments, use `jl_call`:
 
 ```
-jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t nargs)
+jl_value_t *jl_call(jl_value_t *f, jl_value_t **args, int32_t nargs)
 ```
 
 Its second argument `args` is an array of `jl_value_t*` arguments and `nargs` is the number of
 arguments.
 
 There is also an alternative, possibly simpler, way of calling Julia functions and that is via [`@cfunction`](@ref).
-Using `@cfunction` allows you to do the type conversions on the Julia side which typically is easier than doing it on
+Using `@cfunction` allows you to do the type conversions on the Julia side, which is typically easier than doing it on
 the C side. The `sqrt` example above would with `@cfunction` be written as:
 
 ```c
@@ -255,7 +255,10 @@ double (*sqrt_jl)(double) = jl_unbox_voidpointer(jl_eval_string("@cfunction(sqrt
 double ret = sqrt_jl(2.0);
 ```
 
-where we first define a C callable function in Julia, extract the function pointer from it and finally call it.
+where we first define a C callable function in Julia, extract the function pointer from it, and finally call it.
+In addition to simplifying type conversions by doing them in the higher-level language, calling Julia functions
+via `@cfunction` pointers eliminates the dynamic-dispatch overhead required by `jl_call` (for which all of the
+arguments are "boxed"), and should have performance equivalent to native C function pointers.
 
 ## Memory Management
 
@@ -316,7 +319,7 @@ jl_value_t *ret1 = jl_eval_string("sqrt(2.0)");
 JL_GC_PUSH1(&ret1);
 jl_value_t *ret2 = 0;
 {
-    jl_function_t *func = jl_get_function(jl_base_module, "exp");
+    jl_value_t *func = jl_get_function(jl_base_module, "exp");
     ret2 = jl_call1(func, ret1);
     JL_GC_PUSH1(&ret2);
     // Do something with ret2.
@@ -347,7 +350,7 @@ properly with mutable types.
 ```c
 // This functions shall be executed only once, during the initialization.
 jl_value_t* refs = jl_eval_string("refs = IdDict()");
-jl_function_t* setindex = jl_get_function(jl_base_module, "setindex!");
+jl_value_t* setindex = jl_get_function(jl_base_module, "setindex!");
 
 ...
 
@@ -371,7 +374,7 @@ container is created by `jl_call*`, then you will need to reload the pointer to
 ```c
 // This functions shall be executed only once, during the initialization.
 jl_value_t* refs = jl_eval_string("refs = IdDict()");
-jl_function_t* setindex = jl_get_function(jl_base_module, "setindex!");
+jl_value_t* setindex = jl_get_function(jl_base_module, "setindex!");
 jl_datatype_t* reft = (jl_datatype_t*)jl_eval_string("Base.RefValue{Any}");
 
 ...
@@ -398,7 +401,7 @@ The GC can be allowed to deallocate a variable by removing the reference to it f
 the function `delete!`, provided that no other reference to the variable is kept anywhere:
 
 ```c
-jl_function_t* delete = jl_get_function(jl_base_module, "delete!");
+jl_value_t* delete = jl_get_function(jl_base_module, "delete!");
 jl_call2(delete, refs, rvar);
 ```
 
@@ -409,7 +412,7 @@ per pointer using
 ```c
 jl_module_t *mod = jl_main_module;
 jl_sym_t *var = jl_symbol("var");
-jl_binding_t *bp = jl_get_binding_wr(mod, var);
+jl_binding_t *bp = jl_get_binding_wr(mod, var, 1);
 jl_checked_assignment(bp, mod, var, val);
 ```
 
@@ -432,26 +435,28 @@ object has just been allocated and no garbage collection has run since then. Not
 `jl_...` functions can sometimes invoke garbage collection.
 
 The write barrier is also necessary for arrays of pointers when updating their data directly.
-For example:
+Calling `jl_array_ptr_set` is usually much preferred. But direct updates can be done. For example:
 
 ```c
 jl_array_t *some_array = ...; // e.g. a Vector{Any}
-void **data = (void**)jl_array_data(some_array);
+void **data = jl_array_data(some_array, void*);
 jl_value_t *some_value = ...;
 data[0] = some_value;
-jl_gc_wb(some_array, some_value);
+jl_gc_wb(jl_array_owner(some_array), some_value);
 ```
 
 ### Controlling the Garbage Collector
 
 There are some functions to control the GC. In normal use cases, these should not be necessary.
 
-| Function             | Description                                  |
-|:-------------------- |:-------------------------------------------- |
-| `jl_gc_collect()`    | Force a GC run                               |
-| `jl_gc_enable(0)`    | Disable the GC, return previous state as int |
-| `jl_gc_enable(1)`    | Enable the GC,  return previous state as int |
-| `jl_gc_is_enabled()` | Return current state as int                  |
+| Function                           | Description                                                         |
+| :--------------------------------- | :------------------------------------------------------------------ |
+| `jl_gc_collect(JL_GC_FULL)`        | Force a GC run on all objects                                       |
+| `jl_gc_collect(JL_GC_INCREMENTAL)` | Force a GC run only on young objects                                |
+| `jl_gc_collect(JL_GC_AUTO)`        | Force a GC run, automatically choosing between full and incremental |
+| `jl_gc_enable(0)`                  | Disable the GC, return previous state as int                        |
+| `jl_gc_enable(1)`                  | Enable the GC, return previous state as int                         |
+| `jl_gc_is_enabled()`               | Return current state as int                                         |
 
 ## Working with Arrays
 
@@ -487,20 +492,20 @@ referenced.
 In order to access the data of `x`, we can use `jl_array_data`:
 
 ```c
-double *xData = (double*)jl_array_data(x);
+double *xData = jl_array_data(x, double);
 ```
 
 Now we can fill the array:
 
 ```c
-for(size_t i=0; i<jl_array_len(x); i++)
+for (size_t i = 0; i < jl_array_nrows(x); i++)
     xData[i] = i;
 ```
 
 Now let us call a Julia function that performs an in-place operation on `x`:
 
 ```c
-jl_function_t *func = jl_get_function(jl_base_module, "reverse!");
+jl_value_t *func = jl_get_function(jl_base_module, "reverse!");
 jl_call1(func, (jl_value_t*)x);
 ```
 
@@ -512,7 +517,7 @@ If a Julia function returns an array, the return value of `jl_eval_string` and `
 cast to a `jl_array_t*`:
 
 ```c
-jl_function_t *func  = jl_get_function(jl_base_module, "reverse");
+jl_value_t *func  = jl_get_function(jl_base_module, "reverse");
 jl_array_t *y = (jl_array_t*)jl_call1(func, (jl_value_t*)x);
 ```
 
@@ -527,10 +532,11 @@ that creates a 2D array and accesses its properties:
 ```c
 // Create 2D array of float64 type
 jl_value_t *array_type = jl_apply_array_type((jl_value_t*)jl_float64_type, 2);
-jl_array_t *x  = jl_alloc_array_2d(array_type, 10, 5);
+int dims[] = {10,5};
+jl_array_t *x  = jl_alloc_array_nd(array_type, dims, 2);
 
 // Get array pointer
-double *p = (double*)jl_array_data(x);
+double *p = jl_array_data(x, double);
 // Get number of dimensions
 int ndims = jl_array_ndims(x);
 // Get the size of the i-th dim
@@ -658,7 +664,7 @@ double c_func(int i)
     printf("[C %08x] i = %d\n", pthread_self(), i);
 
     // Call the Julia sqrt() function to compute the square root of i, and return it
-    jl_function_t *sqrt = jl_get_function(jl_base_module, "sqrt");
+    jl_value_t *sqrt = jl_get_function(jl_base_module, "sqrt");
     jl_value_t* arg = jl_box_int32(i);
     double ret = jl_unbox_float64(jl_call1(sqrt, arg));
 
diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md
index eb26063a5e61e..3bf88afcc8992 100644
--- a/doc/src/manual/environment-variables.md
+++ b/doc/src/manual/environment-variables.md
@@ -2,7 +2,7 @@
 
 Julia can be configured with a number of environment variables, set either in
 the usual way for each operating system, or in a portable way from within Julia.
-Supposing that you want to set the environment variable `JULIA_EDITOR` to `vim`,
+Supposing that you want to set the environment variable [`JULIA_EDITOR`](@ref JULIA_EDITOR) to `vim`,
 you can type `ENV["JULIA_EDITOR"] = "vim"` (for instance, in the REPL) to make
 this change on a case by case basis, or add the same to the user configuration
 file `~/.julia/config/startup.jl` in the user's home directory to have a
@@ -16,15 +16,26 @@ including those which include `JULIA` in their names.
 
 !!! note
 
-    Some variables, such as `JULIA_NUM_THREADS` and `JULIA_PROJECT`, need to be set before Julia
-    starts, therefore adding these to `~/.julia/config/startup.jl` is too late in the startup process.
+    It is recommended to avoid changing environment variables during runtime,
+    such as within a `~/.julia/config/startup.jl`.
+
+    One reason is that some julia language variables, such as [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS)
+    and [`JULIA_PROJECT`](@ref JULIA_PROJECT), need to be set before Julia starts.
+
+    Similarly, `__init__()` functions of user modules in the sysimage (via PackageCompiler) are
+    run before `startup.jl`, so setting environment variables in a `startup.jl` may be too late for
+    user code.
+
+    Further, changing environment variables during runtime can introduce data races into
+    otherwise benign code.
+
     In Bash, environment variables can either be set manually by running, e.g.,
     `export JULIA_NUM_THREADS=4` before starting Julia, or by adding the same command to
     `~/.bashrc` or `~/.bash_profile` to set the variable each time Bash is started.
 
 ## File locations
 
-### `JULIA_BINDIR`
+### [`JULIA_BINDIR`](@id JULIA_BINDIR)
 
 The absolute path of the directory containing the Julia executable, which sets
 the global variable [`Sys.BINDIR`](@ref). If `$JULIA_BINDIR` is not set, then
@@ -61,7 +72,7 @@ by default (via `Base.load_julia_startup()`).
 
 For example, a Linux installation with a Julia executable located at
 `/bin/julia`, a `DATAROOTDIR` of `../share`, and a `SYSCONFDIR` of `../etc` will
-have `JULIA_BINDIR` set to `/bin`, a source-file search path of
+have [`JULIA_BINDIR`](@ref JULIA_BINDIR) set to `/bin`, a source-file search path of
 
 ```
 /share/julia/base
@@ -73,7 +84,7 @@ and a global configuration search path of
 /etc/julia/startup.jl
 ```
 
-### `JULIA_PROJECT`
+### [`JULIA_PROJECT`](@id JULIA_PROJECT)
 
 A directory path that indicates which project should be the initial active project.
 Setting this environment variable has the same effect as specifying the `--project`
@@ -85,27 +96,27 @@ the chapter on [Code Loading](@ref code-loading).
 
 !!! note
 
-    `JULIA_PROJECT` must be defined before starting julia; defining it in `startup.jl`
+    [`JULIA_PROJECT`](@ref JULIA_PROJECT) must be defined before starting julia; defining it in `startup.jl`
     is too late in the startup process.
 
-### `JULIA_LOAD_PATH`
+### [`JULIA_LOAD_PATH`](@id JULIA_LOAD_PATH)
 
-The `JULIA_LOAD_PATH` environment variable is used to populate the global Julia
+The [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) environment variable is used to populate the global Julia
 [`LOAD_PATH`](@ref) variable, which determines which packages can be loaded via
 `import` and `using` (see [Code Loading](@ref code-loading)).
 
-Unlike the shell `PATH` variable, empty entries in `JULIA_LOAD_PATH` are expanded to
+Unlike the shell `PATH` variable, empty entries in [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) are expanded to
 the default value of `LOAD_PATH`, `["@", "@v#.#", "@stdlib"]` when populating
 `LOAD_PATH`. This allows easy appending, prepending, etc. of the load path value in
-shell scripts regardless of whether `JULIA_LOAD_PATH` is already set or not. For
+shell scripts regardless of whether [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) is already set or not. For
 example, to prepend the directory `/foo/bar` to `LOAD_PATH` just do
 ```sh
 export JULIA_LOAD_PATH="/foo/bar:$JULIA_LOAD_PATH"
 ```
-If the `JULIA_LOAD_PATH` environment variable is already set, its old value will be
-prepended with `/foo/bar`. On the other hand, if `JULIA_LOAD_PATH` is not set, then
+If the [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) environment variable is already set, its old value will be
+prepended with `/foo/bar`. On the other hand, if [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) is not set, then
 it will be set to `/foo/bar:` which will expand to a `LOAD_PATH` value of
-`["/foo/bar", "@", "@v#.#", "@stdlib"]`. If `JULIA_LOAD_PATH` is set to the empty
+`["/foo/bar", "@", "@v#.#", "@stdlib"]`. If [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) is set to the empty
 string, it expands to an empty `LOAD_PATH` array. In other words, the empty string
 is interpreted as a zero-element array, not a one-element array of the empty string.
 This behavior was chosen so that it would be possible to set an empty load path via
@@ -117,31 +128,46 @@ environment variable or if it must have a value, set it to the string `:`.
     On Windows, path elements are separated by the `;` character, as is the case with
     most path lists on Windows. Replace `:` with `;` in the above paragraph.
 
-### `JULIA_DEPOT_PATH`
-
-The `JULIA_DEPOT_PATH` environment variable is used to populate the global Julia
-[`DEPOT_PATH`](@ref) variable, which controls where the package manager, as well
-as Julia's code loading mechanisms, look for package registries, installed
-packages, named environments, repo clones, cached compiled package images,
-configuration files, and the default location of the REPL's history file.
-
-Unlike the shell `PATH` variable but similar to `JULIA_LOAD_PATH`, empty entries in
-`JULIA_DEPOT_PATH` are expanded to the default value of `DEPOT_PATH`. This allows
-easy appending, prepending, etc. of the depot path value in shell scripts regardless
-of whether `JULIA_DEPOT_PATH` is already set or not. For example, to prepend the
-directory `/foo/bar` to `DEPOT_PATH` just do
+### [`JULIA_DEPOT_PATH`](@id JULIA_DEPOT_PATH)
+
+The [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable is used to populate the
+global Julia [`DEPOT_PATH`](@ref) variable, which controls where the package manager, as well
+as Julia's code loading mechanisms, look for package registries, installed packages, named
+environments, repo clones, cached compiled package images, configuration files, and the default
+location of the REPL's history file.
+
+Unlike the shell `PATH` variable but similar to [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH),
+empty entries in [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) have special behavior:
+- At the end, it is expanded to the default value of `DEPOT_PATH`, *excluding* the user depot.
+- At the start, it is expanded to the default value of `DEPOT_PATH`, *including* the user depot.
+This allows easy overriding of the user depot, while still retaining access to resources that
+are bundled with Julia, like cache files, artifacts, etc. For example, to switch the user depot
+to `/foo/bar` use a trailing `:`
 ```sh
-export JULIA_DEPOT_PATH="/foo/bar:$JULIA_DEPOT_PATH"
+export JULIA_DEPOT_PATH="/foo/bar:"
 ```
-If the `JULIA_DEPOT_PATH` environment variable is already set, its old value will be
-prepended with `/foo/bar`. On the other hand, if `JULIA_DEPOT_PATH` is not set, then
-it will be set to `/foo/bar:` which will have the effect of prepending `/foo/bar` to
-the default depot path. If `JULIA_DEPOT_PATH` is set to the empty string, it expands
-to an empty `DEPOT_PATH` array. In other words, the empty string is interpreted as a
-zero-element array, not a one-element array of the empty string. This behavior was
-chosen so that it would be possible to set an empty depot path via the environment
-variable. If you want the default depot path, either unset the environment variable
-or if it must have a value, set it to the string `:`.
+All package operations, like cloning registries or installing packages, will now write to
+`/foo/bar`, but since the empty entry is expanded to the default system depot, any bundled
+resources will still be available. If you really only want to use the depot at `/foo/bar`,
+and not load any bundled resources, simply set the environment variable to `/foo/bar`
+without the trailing colon.
+
+To append a depot at the end of the full default list, including the default user depot, use a
+leading `:`
+```sh
+export JULIA_DEPOT_PATH=":/foo/bar"
+```
+
+There are two exceptions to the above rule. First, if [`JULIA_DEPOT_PATH`](@ref
+JULIA_DEPOT_PATH) is set to the empty string, it expands to an empty `DEPOT_PATH` array. In
+other words, the empty string is interpreted as a zero-element array, not a one-element
+array of the empty string. This behavior was chosen so that it would be possible to set an
+empty depot path via the environment variable.
+
+Second, if no user depot is specified in [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), then
+the empty entry is expanded to the default depot *including* the user depot. This makes
+it possible to use the default depot, as if the environment variable was unset, by setting
+it to the string `:`.
 
 !!! note
 
@@ -149,12 +175,12 @@ or if it must have a value, set it to the string `:`.
     most path lists on Windows. Replace `:` with `;` in the above paragraph.
 
 !!! note
-    `JULIA_DEPOT_PATH` must be defined before starting julia; defining it in
+    [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) must be defined before starting julia; defining it in
     `startup.jl` is too late in the startup process; at that point you can instead
     directly modify the `DEPOT_PATH` array, which is populated from the environment
     variable.
 
-### `JULIA_HISTORY`
+### [`JULIA_HISTORY`](@id JULIA_HISTORY)
 
 The absolute path `REPL.find_hist_file()` of the REPL's history file. If
 `$JULIA_HISTORY` is not set, then `REPL.find_hist_file()` defaults to
@@ -163,96 +189,116 @@ The absolute path `REPL.find_hist_file()` of the REPL's history file. If
 $(DEPOT_PATH[1])/logs/repl_history.jl
 ```
 
-### [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@id env-max-num-precompile-files)
+### [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@id JULIA_MAX_NUM_PRECOMPILE_FILES)
 
 Sets the maximum number of different instances of a single package that are to be stored in the precompile cache (default = 10).
 
-### `JULIA_VERBOSE_LINKING`
+### [`JULIA_VERBOSE_LINKING`](@id JULIA_VERBOSE_LINKING)
 
 If set to true, linker commands will be displayed during precompilation.
 
 ## Pkg.jl
 
-### `JULIA_CI`
+### [`JULIA_CI`](@id JULIA_CI)
 
 If set to `true`, this indicates to the package server that any package operations are part of a continuous integration (CI) system for the purposes of gathering package usage statistics.
 
-### `JULIA_NUM_PRECOMPILE_TASKS`
+### [`JULIA_NUM_PRECOMPILE_TASKS`](@id JULIA_NUM_PRECOMPILE_TASKS)
 
 The number of parallel tasks to use when precompiling packages. See [`Pkg.precompile`](https://pkgdocs.julialang.org/v1/api/#Pkg.precompile).
 
-### `JULIA_PKG_DEVDIR`
+### [`JULIA_PKG_DEVDIR`](@id JULIA_PKG_DEVDIR)
 
 The default directory used by [`Pkg.develop`](https://pkgdocs.julialang.org/v1/api/#Pkg.develop) for downloading packages.
 
-### `JULIA_PKG_IGNORE_HASHES`
+### [`JULIA_PKG_IGNORE_HASHES`](@id JULIA_PKG_IGNORE_HASHES)
 
 If set to `1`, this will ignore incorrect hashes in artifacts. This should be used carefully, as it disables verification of downloads, but can resolve issues when moving files across different types of file systems. See [Pkg.jl issue #2317](https://github.com/JuliaLang/Pkg.jl/issues/2317) for more details.
 
 !!! compat "Julia 1.6"
     This is only supported in Julia 1.6 and above.
 
-### `JULIA_PKG_OFFLINE`
+### [`JULIA_PKG_OFFLINE`](@id JULIA_PKG_OFFLINE)
 
 If set to `true`, this will enable offline mode: see [`Pkg.offline`](https://pkgdocs.julialang.org/v1/api/#Pkg.offline).
 
 !!! compat "Julia 1.5"
     Pkg's offline mode requires Julia 1.5 or later.
 
-### `JULIA_PKG_PRECOMPILE_AUTO`
+### [`JULIA_PKG_PRECOMPILE_AUTO`](@id JULIA_PKG_PRECOMPILE_AUTO)
 
 If set to `0`, this will disable automatic precompilation by package actions which change the manifest. See [`Pkg.precompile`](https://pkgdocs.julialang.org/v1/api/#Pkg.precompile).
 
-### `JULIA_PKG_SERVER`
+### [`JULIA_PKG_SERVER`](@id JULIA_PKG_SERVER)
 
 Specifies the URL of the package registry to use. By default, `Pkg` uses
 `https://pkg.julialang.org` to fetch Julia packages. In addition, you can disable the use of the PkgServer
 protocol, and instead access the packages directly from their hosts (GitHub, GitLab, etc.)
 by setting: ``` export JULIA_PKG_SERVER="" ```
 
-### `JULIA_PKG_SERVER_REGISTRY_PREFERENCE`
+### [`JULIA_PKG_SERVER_REGISTRY_PREFERENCE`](@id JULIA_PKG_SERVER_REGISTRY_PREFERENCE)
 
 Specifies the preferred registry flavor. Currently supported values are `conservative`
 (the default), which will only publish resources that have been processed by the storage
 server (and thereby have a higher probability of being available from the PkgServers),
 whereas `eager` will publish registries whose resources have not necessarily been
-processed by the storage servers.  Users behind restrictive firewalls that do not allow
+processed by the storage servers. Users behind restrictive firewalls that do not allow
 downloading from arbitrary servers should not use the `eager` flavor.
 
 !!! compat "Julia 1.7"
     This only affects Julia 1.7 and above.
 
-### `JULIA_PKG_UNPACK_REGISTRY`
+### [`JULIA_PKG_UNPACK_REGISTRY`](@id JULIA_PKG_UNPACK_REGISTRY)
 
 If set to `true`, this will unpack the registry instead of storing it as a compressed tarball.
 
 !!! compat "Julia 1.7"
     This only affects Julia 1.7 and above. Earlier versions will always unpack the registry.
 
-### `JULIA_PKG_USE_CLI_GIT`
+### [`JULIA_PKG_USE_CLI_GIT`](@id JULIA_PKG_USE_CLI_GIT)
 
 If set to `true`, Pkg operations which use the git protocol will use an external `git` executable instead of the default libgit2 library.
 
 !!! compat "Julia 1.7"
     Use of the `git` executable is only supported on Julia 1.7 and above.
 
-### `JULIA_PKGRESOLVE_ACCURACY`
+### [`JULIA_PKGRESOLVE_ACCURACY`](@id JULIA_PKGRESOLVE_ACCURACY)
 
 The accuracy of the package resolver. This should be a positive integer, the default is `1`.
 
+### [`JULIA_PKG_PRESERVE_TIERED_INSTALLED`](@id JULIA_PKG_PRESERVE_TIERED_INSTALLED)
+
+Change the default package installation strategy to `Pkg.PRESERVE_TIERED_INSTALLED`
+to let the package manager try to install versions of packages while keeping as many
+versions of packages already installed as possible.
+
+!!! compat "Julia 1.9"
+    This only affects Julia 1.9 and above.
+
+### [`JULIA_PKG_GC_AUTO`](@id JULIA_PKG_GC_AUTO)
+
+If set to `false`, automatic garbage collection of packages and artifacts will be disabled;
+see [`Pkg.gc`](https://pkgdocs.julialang.org/v1/api/#Pkg.gc) for more details.
+
+!!! compat "Julia 1.12"
+    This environment variable is only supported on Julia 1.12 and above.
+
 ## Network transport
 
-### `JULIA_NO_VERIFY_HOSTS` / `JULIA_SSL_NO_VERIFY_HOSTS` / `JULIA_SSH_NO_VERIFY_HOSTS` / `JULIA_ALWAYS_VERIFY_HOSTS`
+### [`JULIA_NO_VERIFY_HOSTS`](@id JULIA_NO_VERIFY_HOSTS)
+### [`JULIA_SSL_NO_VERIFY_HOSTS`](@id JULIA_SSL_NO_VERIFY_HOSTS)
+### [`JULIA_SSH_NO_VERIFY_HOSTS`](@id JULIA_SSH_NO_VERIFY_HOSTS)
+### [`JULIA_ALWAYS_VERIFY_HOSTS`](@id JULIA_ALWAYS_VERIFY_HOSTS)
 
 Specify hosts whose identity should or should not be verified for specific transport layers. See [`NetworkOptions.verify_host`](https://github.com/JuliaLang/NetworkOptions.jl#verify_host)
 
-### `JULIA_SSL_CA_ROOTS_PATH`
+### [`JULIA_SSL_CA_ROOTS_PATH`](@id JULIA_SSL_CA_ROOTS_PATH)
 
 Specify the file or directory containing the certificate authority roots. See [`NetworkOptions.ca_roots`](https://github.com/JuliaLang/NetworkOptions.jl#ca_roots)
 
 ## External applications
 
-### `JULIA_SHELL`
+### [`JULIA_SHELL`](@id JULIA_SHELL)
 
 The absolute path of the shell with which Julia should execute external commands
 (via `Base.repl_cmd()`). Defaults to the environment variable `$SHELL`, and
@@ -263,7 +309,7 @@ falls back to `/bin/sh` if `$SHELL` is unset.
     On Windows, this environment variable is ignored, and external commands are
     executed directly.
 
-### `JULIA_EDITOR`
+### [`JULIA_EDITOR`](@id JULIA_EDITOR)
 
 The editor returned by `InteractiveUtils.editor()` and used in, e.g., [`InteractiveUtils.edit`](@ref),
 referring to the command of the preferred editor, for instance `vim`.
@@ -277,12 +323,12 @@ To use Visual Studio Code on Windows, set `$JULIA_EDITOR` to `code.cmd`.
 
 ## Parallelization
 
-### [`JULIA_CPU_THREADS`](@id env-cpu-threads)
+### [`JULIA_CPU_THREADS`](@id JULIA_CPU_THREADS)
 
 Overrides the global variable [`Base.Sys.CPU_THREADS`](@ref), the number of
 logical CPU cores available.
 
-### `JULIA_WORKER_TIMEOUT`
+### [`JULIA_WORKER_TIMEOUT`](@id JULIA_WORKER_TIMEOUT)
 
 A [`Float64`](@ref) that sets the value of `Distributed.worker_timeout()` (default: `60.0`).
 This function gives the number of seconds a worker process will wait for
@@ -290,16 +336,25 @@ a master process to establish a connection before dying.
 
 ### [`JULIA_NUM_THREADS`](@id JULIA_NUM_THREADS)
 
-An unsigned 64-bit integer (`uint64_t`) that sets the maximum number of threads
-available to Julia.  If `$JULIA_NUM_THREADS` is not positive or is not set, or
-if the number of CPU threads cannot be determined through system calls, then the
-number of threads is set to `1`.
+An unsigned 64-bit integer (`uint64_t`) or string that sets the maximum number
+of threads available to Julia. If `$JULIA_NUM_THREADS` is not set or is a
+non-positive integer, or if the number of CPU threads cannot be determined
+through system calls, then the number of threads is set to `1`.
 
 If `$JULIA_NUM_THREADS` is set to `auto`, then the number of threads will be set
-to the number of CPU threads.
+to the number of CPU threads. It can also be set to a comma-separated string to
+specify the size of the `:default` and `:interactive` [threadpools](@ref
+man-threadpools), respectively:
+```bash
+# 5 threads in the :default pool and 2 in the :interactive pool
+export JULIA_NUM_THREADS=5,2
+
+# `auto` threads in the :default pool and 1 in the :interactive pool
+export JULIA_NUM_THREADS=auto,1
+```
 
 !!! note
-    `JULIA_NUM_THREADS` must be defined before starting julia; defining it in
+    `JULIA_NUM_THREADS` must be defined before starting Julia; defining it in
     `startup.jl` is too late in the startup process.
 
 !!! compat "Julia 1.5"
@@ -309,76 +364,111 @@ to the number of CPU threads.
 !!! compat "Julia 1.7"
     The `auto` value for `$JULIA_NUM_THREADS` requires Julia 1.7 or above.
 
-### `JULIA_THREAD_SLEEP_THRESHOLD`
+!!! compat "Julia 1.9"
+    The `x,y` format for threadpools requires Julia 1.9 or above.
+
+### [`JULIA_THREAD_SLEEP_THRESHOLD`](@id JULIA_THREAD_SLEEP_THRESHOLD)
 
 If set to a string that starts with the case-insensitive substring `"infinite"`,
 then spinning threads never sleep. Otherwise, `$JULIA_THREAD_SLEEP_THRESHOLD` is
 interpreted as an unsigned 64-bit integer (`uint64_t`) and gives, in
 nanoseconds, the amount of time after which spinning threads should sleep.
 
-### [`JULIA_NUM_GC_THREADS`](@id env-gc-threads)
+### [`JULIA_NUM_GC_THREADS`](@id JULIA_NUM_GC_THREADS)
 
-Sets the number of threads used by Garbage Collection. If unspecified is set to
-half of the number of worker threads.
+Sets the number of threads used by Garbage Collection. If unspecified is set to the number of worker threads.
 
 !!! compat "Julia 1.10"
     The environment variable was added in 1.10
 
-### [`JULIA_IMAGE_THREADS`](@id env-image-threads)
+### [`JULIA_IMAGE_THREADS`](@id JULIA_IMAGE_THREADS)
 
 An unsigned 32-bit integer that sets the number of threads used by image
 compilation in this Julia process. The value of this variable may be
 ignored if the module is a small module. If left unspecified, the smaller
-of the value of [`JULIA_CPU_THREADS`](@ref env-cpu-threads) or half the
+of the value of [`JULIA_CPU_THREADS`](@ref JULIA_CPU_THREADS) or half the
 number of logical CPU cores is used in its place.
 
-### `JULIA_IMAGE_TIMINGS`
+### [`JULIA_IMAGE_TIMINGS`](@id JULIA_IMAGE_TIMINGS)
 
 A boolean value that determines if detailed timing information is printed during
 during image compilation. Defaults to 0.
 
-### `JULIA_EXCLUSIVE`
+### [`JULIA_EXCLUSIVE`](@id JULIA_EXCLUSIVE)
 
 If set to anything besides `0`, then Julia's thread policy is consistent with
-running on a dedicated machine: the master thread is on proc 0, and threads are
-affinitized. Otherwise, Julia lets the operating system handle thread policy.
+running on a dedicated machine: each thread in the default threadpool is
+affinitized.  [Interactive threads](@ref man-threadpools) remain under the
+control of the operating system scheduler.
+
+Otherwise, Julia lets the operating system handle thread policy.
+
+## Garbage Collection
+
+### [`JULIA_HEAP_SIZE_HINT`](@id JULIA_HEAP_SIZE_HINT)
+
+Environment variable equivalent to the `--heap-size-hint=<size>[<unit>]` command line option.
+
+Forces garbage collection if memory usage is higher than the given value. The value may be specified as a number of bytes, optionally in units of:
+
+    - B  (bytes)
+    - K  (kibibytes)
+    - M  (mebibytes)
+    - G  (gibibytes)
+    - T  (tebibytes)
+    - %  (percentage of physical memory)
+
+For example, `JULIA_HEAP_SIZE_HINT=1G` would provide a 1 GB heap size hint to the garbage collector.
 
 ## REPL formatting
 
 Environment variables that determine how REPL output should be formatted at the
-terminal. Generally, these variables should be set to [ANSI terminal escape
+terminal. The `JULIA_*_COLOR` variables should be set to [ANSI terminal escape
 sequences](https://en.wikipedia.org/wiki/ANSI_escape_code). Julia provides
 a high-level interface with much of the same functionality; see the section on
 [The Julia REPL](@ref).
 
-### `JULIA_ERROR_COLOR`
+### [`JULIA_ERROR_COLOR`](@id JULIA_ERROR_COLOR)
 
 The formatting `Base.error_color()` (default: light red, `"\033[91m"`) that
 errors should have at the terminal.
 
-### `JULIA_WARN_COLOR`
+### [`JULIA_WARN_COLOR`](@id JULIA_WARN_COLOR)
 
 The formatting `Base.warn_color()` (default: yellow, `"\033[93m"`) that warnings
 should have at the terminal.
 
-### `JULIA_INFO_COLOR`
+### [`JULIA_INFO_COLOR`](@id JULIA_INFO_COLOR)
 
 The formatting `Base.info_color()` (default: cyan, `"\033[36m"`) that info
 should have at the terminal.
 
-### `JULIA_INPUT_COLOR`
+### [`JULIA_INPUT_COLOR`](@id JULIA_INPUT_COLOR)
 
 The formatting `Base.input_color()` (default: normal, `"\033[0m"`) that input
 should have at the terminal.
 
-### `JULIA_ANSWER_COLOR`
+### [`JULIA_ANSWER_COLOR`](@id JULIA_ANSWER_COLOR)
 
 The formatting `Base.answer_color()` (default: normal, `"\033[0m"`) that output
 should have at the terminal.
 
+### [`NO_COLOR`](@id NO_COLOR)
+
+When this variable is present and not an empty string (regardless of its value) then colored
+text will be disabled on the REPL. Can be overridden with the flag `--color=yes` or with the
+environment variable [`FORCE_COLOR`](@ref FORCE_COLOR). This environment variable is
+[commonly recognized by command-line applications](https://no-color.org/).
+
+### [`FORCE_COLOR`](@id FORCE_COLOR)
+
+When this variable is present and not an empty string (regardless of its value) then
+colored text will be enabled on the REPL. Can be overridden with the flag `--color=no`. This
+environment variable is [commonly recognized by command-line applications](https://force-color.org/).
+
 ## System and Package Image Building
 
-### `JULIA_CPU_TARGET`
+### [`JULIA_CPU_TARGET`](@id JULIA_CPU_TARGET)
 
 Modify the target machine architecture for (pre)compiling
 [system](@ref sysimg-multi-versioning) and [package images](@ref pkgimgs-multi-versioning).
@@ -387,11 +477,17 @@ Unlike the `--cpu-target`, or `-C`, [command line option](@ref cli), it does not
 just-in-time (JIT) code generation within a Julia session where machine code is only
 stored in memory.
 
-Valid values for `JULIA_CPU_TARGET` can be obtained by executing `julia -C help`.
+Valid values for [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) can be obtained by executing `julia -C help`.
 
-Setting `JULIA_CPU_TARGET` is important for heterogeneous compute systems where processors of
+To get the CPU target string that was used to build the current system image,
+use [`Sys.sysimage_target()`](@ref). This can be useful for reproducing
+the same system image or understanding what CPU features were enabled during compilation.
+
+Setting [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) is important for heterogeneous compute systems where processors of
 distinct types or features may be present. This is commonly encountered in high performance
-computing (HPC) clusters since the component nodes may be using distinct processors.
+computing (HPC) clusters since the component nodes may be using distinct processors. In this case,
+you may want to use the `sysimage` CPU target to maintain the same configuration as the sysimage.
+See below for more details.
 
 The CPU target string is a list of strings separated by `;` each string starts with a CPU
 or architecture name and followed by an optional list of features separated by `,`.
@@ -399,14 +495,26 @@ A `generic` or empty CPU name means the basic required feature set of the target
 which is at least the architecture the C/C++ runtime is compiled with. Each string
 is interpreted by LLVM.
 
+!!! note
+    Package images can only target the same or more specific CPU features than
+    their base system image.
+
 A few special features are supported:
-1. `clone_all`
+
+1. `sysimage`
+
+     A special keyword that can be used as a CPU target name, which will be replaced
+     with the CPU target string that was used to build the current system image. This allows
+     you to specify CPU targets that build upon or extend the current sysimage's target, which
+     is particularly helpful for creating package images that are as flexible as the sysimage.
+
+2. `clone_all`
 
      This forces the target to have all functions in sysimg cloned.
      When used in negative form (i.e. `-clone_all`), this disables full clone that's
      enabled by default for certain targets.
 
-2. `base([0-9]*)`
+3. `base([0-9]*)`
 
      This specifies the (0-based) base target index. The base target is the target
      that the current target is based on, i.e. the functions that are not being cloned
@@ -414,60 +522,42 @@ A few special features are supported:
      fully cloned (as if `clone_all` is specified for it) if it is not the default target (0).
      The index can only be smaller than the current index.
 
-3. `opt_size`
+4. `opt_size`
 
      Optimize for size with minimum performance impact. Clang/GCC's `-Os`.
 
-4. `min_size`
+5. `min_size`
 
      Optimize only for size. Clang's `-Oz`.
 
 
 ## Debugging and profiling
 
-### `JULIA_DEBUG`
+### [`JULIA_DEBUG`](@id JULIA_DEBUG)
 
 Enable debug logging for a file or module, see [`Logging`](@ref man-logging) for more information.
 
-### `JULIA_GC_ALLOC_POOL`, `JULIA_GC_ALLOC_OTHER`, `JULIA_GC_ALLOC_PRINT`
+### CI Debug Environment Variables
 
-If set, these environment variables take strings that optionally start with the
-character `'r'`, followed by a string interpolation of a colon-separated list of
-three signed 64-bit integers (`int64_t`). This triple of integers `a:b:c`
-represents the arithmetic sequence `a`, `a + b`, `a + 2*b`, ... `c`.
+Julia automatically enables verbose debugging options when certain continuous integration (CI) debug environment variables are set. This improves the debugging experience when CI jobs are re-run with debug logging enabled, by automatically:
 
-*   If it's the `n`th time that `jl_gc_pool_alloc()` has been called, and `n`
-    belongs to the arithmetic sequence represented by `$JULIA_GC_ALLOC_POOL`,
-    then garbage collection is forced.
-*   If it's the `n`th time that `maybe_collect()` has been called, and `n` belongs
-    to the arithmetic sequence represented by `$JULIA_GC_ALLOC_OTHER`, then garbage
-    collection is forced.
-*   If it's the `n`th time that `jl_gc_collect()` has been called, and `n` belongs
-    to the arithmetic sequence represented by `$JULIA_GC_ALLOC_PRINT`, then counts
-    for the number of calls to `jl_gc_pool_alloc()` and `maybe_collect()` are
-    printed.
+- Enabling `--trace-eval` (location mode) to show expressions being evaluated
+- Setting `JULIA_TEST_VERBOSE=true` to enable verbose test output
 
-If the value of the environment variable begins with the character `'r'`, then
-the interval between garbage collection events is randomized.
+This allows developers to get detailed debugging information from CI runs without modifying their scripts or workflow files.
 
-!!! note
+### [`JULIA_PROFILE_PEEK_HEAP_SNAPSHOT`](@id JULIA_PROFILE_PEEK_HEAP_SNAPSHOT)
 
-    These environment variables only have an effect if Julia was compiled with
-    garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
-    in the build configuration).
+Enable collecting of a heap snapshot during execution via the profiling peek mechanism.
+See [Triggered During Execution](@ref).
 
-### `JULIA_GC_NO_GENERATIONAL`
+### [`JULIA_TIMING_SUBSYSTEMS`](@id JULIA_TIMING_SUBSYSTEMS)
 
-If set to anything besides `0`, then the Julia garbage collector never performs
-"quick sweeps" of memory.
+Allows you to enable or disable zones for a specific Julia run.
+For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable
+the `INFERENCE` zones. See [Dynamically Enabling and Disabling Zones](@ref).
 
-!!! note
-
-    This environment variable only has an effect if Julia was compiled with
-    garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
-    in the build configuration).
-
-### `JULIA_GC_WAIT_FOR_DEBUGGER`
+### [`JULIA_GC_WAIT_FOR_DEBUGGER`](@id JULIA_GC_WAIT_FOR_DEBUGGER)
 
 If set to anything besides `0`, then the Julia garbage collector will wait for
 a debugger to attach instead of aborting whenever there's a critical error.
@@ -478,7 +568,7 @@ a debugger to attach instead of aborting whenever there's a critical error.
     garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
     in the build configuration).
 
-### `ENABLE_JITPROFILING`
+### [`ENABLE_JITPROFILING`](@id ENABLE_JITPROFILING)
 
 If set to anything besides `0`, then the compiler will create and register an
 event listener for just-in-time (JIT) profiling.
@@ -494,12 +584,16 @@ event listener for just-in-time (JIT) profiling.
     * [Perf](https://perf.wiki.kernel.org) (`USE_PERF_JITEVENTS` set to `1`
       in the build configuration). This integration is enabled by default.
 
-### `ENABLE_GDBLISTENER`
+### [`ENABLE_GDBLISTENER`](@id ENABLE_GDBLISTENER)
 
 If set to anything besides `0` enables GDB registration of Julia code on release builds.
 On debug builds of Julia this is always enabled. Recommended to use with `-g 2`.
 
 
-### `JULIA_LLVM_ARGS`
+### [`JULIA_LLVM_ARGS`](@id JULIA_LLVM_ARGS)
 
 Arguments to be passed to the LLVM backend.
+
+### `JULIA_FALLBACK_REPL`
+
+Forces the fallback repl instead of REPL.jl.
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index bdecb5ecf106f..5515d65773127 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -8,34 +8,51 @@ No.
 
 ### Why don't you compile Matlab/Python/R/… code to Julia?
 
-Since many people are familiar with the syntax of other dynamic languages, and lots of code has already been written in those languages, it is natural to wonder why we didn't just plug a Matlab or Python front-end into a Julia back-end (or “transpile” code to Julia) in order to get all the performance benefits of Julia without requiring programmers to learn a new language.  Simple, right?
+Since many people are familiar with the syntax of other dynamic languages, and lots of code has already been written in those languages, it is natural to wonder why we didn't just plug a Matlab or Python front-end into a Julia back-end (or “transpile” code to Julia) in order to get all the performance benefits of Julia without requiring programmers to learn a new language. Simple, right?
 
-The basic issue is that there is *nothing special about Julia's compiler*: we use a commonplace compiler (LLVM) with no “secret sauce” that other language developers don't know about.  Indeed, Julia's compiler is in many ways much simpler than those of other dynamic languages (e.g. PyPy or LuaJIT).   Julia's performance advantage derives almost entirely from its front-end: its language semantics allow a [well-written Julia program](@ref man-performance-tips) to *give more opportunities to the compiler* to generate efficient code and memory layouts.  If you tried to compile Matlab or Python code to Julia, our compiler would be limited by the semantics of Matlab or Python to producing code no better than that of existing compilers for those languages (and probably worse).  The key role of semantics is also why several existing Python compilers (like Numba and Pythran) only attempt to optimize a small subset of the language (e.g. operations on Numpy arrays and scalars), and for this subset they are already doing at least as well as we could for the same semantics.  The people working on those projects are incredibly smart and have accomplished amazing things, but retrofitting a compiler onto a language that was designed to be interpreted is a very difficult problem.
+The basic issue is that there is *nothing special about Julia's compiler*: we use a commonplace compiler (LLVM) with no “secret sauce” that other language developers don't know about. Indeed, Julia's compiler is in many ways much simpler than those of other dynamic languages (e.g. PyPy or LuaJIT). Julia's performance advantage derives almost entirely from its front-end: its language semantics allow a [well-written Julia program](@ref man-performance-tips) to *give more opportunities to the compiler* to generate efficient code and memory layouts. If you tried to compile Matlab or Python code to Julia, our compiler would be limited by the semantics of Matlab or Python to producing code no better than that of existing compilers for those languages (and probably worse). The key role of semantics is also why several existing Python compilers (like Numba and Pythran) only attempt to optimize a small subset of the language (e.g. operations on Numpy arrays and scalars), and for this subset they are already doing at least as well as we could for the same semantics. The people working on those projects are incredibly smart and have accomplished amazing things, but retrofitting a compiler onto a language that was designed to be interpreted is a very difficult problem.
 
-Julia's advantage is that good performance is not limited to a small subset of “built-in” types and operations, and one can write high-level type-generic code that works on arbitrary user-defined types while remaining fast and memory-efficient.  Types in languages like Python simply don't provide enough information to the compiler for similar capabilities, so as soon as you used those languages as a Julia front-end you would be stuck.
+Julia's advantage is that good performance is not limited to a small subset of “built-in” types and operations, and one can write high-level type-generic code that works on arbitrary user-defined types while remaining fast and memory-efficient. Types in languages like Python simply don't provide enough information to the compiler for similar capabilities, so as soon as you used those languages as a Julia front-end you would be stuck.
 
 For similar reasons, automated translation to Julia would also typically generate unreadable, slow, non-idiomatic code that would not be a good starting point for a native Julia port from another language.
 
-On the other hand, language *interoperability* is extremely useful: we want to exploit existing high-quality code in other languages from Julia (and vice versa)!  The best way to enable this is not a transpiler, but rather via easy inter-language calling facilities.  We have worked hard on this, from the built-in `ccall` intrinsic (to call C and Fortran libraries) to [JuliaInterop](https://github.com/JuliaInterop) packages that connect Julia to Python, Matlab, C++, and more.
+On the other hand, language *interoperability* is extremely useful: we want to exploit existing high-quality code in other languages from Julia (and vice versa)!  The best way to enable this is not a transpiler, but rather via easy inter-language calling facilities. We have worked hard on this, from the built-in `ccall` intrinsic (to call C and Fortran libraries) to [JuliaInterop](https://github.com/JuliaInterop) packages that connect Julia to Python, Matlab, C++, and more.
 
 ## [Public API](@id man-api)
 
 ### How does Julia define its public API?
 
-Julia `Base` and standard library functionality described in the
-[the documentation](https://docs.julialang.org/) that is not marked as unstable
-(e.g. experimental and internal) is covered by [SemVer](https://semver.org/).
-Functions, types, and constants are not part of the public API if they are not
-included in the documentation, _even if they have docstrings_.
+Julia's public [API](https://en.wikipedia.org/wiki/API) is the behavior described in
+documentation of public bindings from `Base` and the standard libraries. Functions,
+types, and constants are not part of the public API if they are not public, even if
+they have docstrings or are described in the documentation. Further, only the documented
+behavior of public bindings is part of the public API. Undocumented behavior of public
+bindings is internal.
+
+Public bindings are those marked with either `public foo` or `export foo`.
+
+In other words:
+
+- Documented behavior of public bindings is part of the public API.
+- Undocumented behavior of public bindings is not part of the public API.
+- Documented behavior of private bindings is not part of the public API.
+- Undocumented behavior of private bindings is not part of the public API.
+
+You can get a complete list of the public bindings from a module with `names(MyModule)`.
+
+Package authors are encouraged to define their public API similarly.
+
+Anything in Julia's Public API is covered by [SemVer](https://semver.org/) and therefore
+will not be removed or receive meaningful breaking changes before Julia 2.0.
 
 ### There is a useful undocumented function/type/constant. Can I use it?
 
-Updating Julia may break your code if you use non-public API.  If the code is
-self-contained, it may be a good idea to copy it into your project.  If you want to rely on
+Updating Julia may break your code if you use non-public API. If the code is
+self-contained, it may be a good idea to copy it into your project. If you want to rely on
 a complex non-public API, especially when using it from a stable package, it is a good idea
 to open an [issue](https://github.com/JuliaLang/julia/issues) or
 [pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning it
-into a public API.  However, we do not discourage the attempt to create packages that expose
+into a public API. However, we do not discourage the attempt to create packages that expose
 stable public interfaces while relying on non-public implementation details of Julia and
 buffering the differences across different Julia versions.
 
@@ -54,38 +71,9 @@ session (technically, in module `Main`), it is always present.
 
 If memory usage is your concern, you can always replace objects with ones that consume less memory.
  For example, if `A` is a gigabyte-sized array that you no longer need, you can free the memory
-with `A = nothing`.  The memory will be released the next time the garbage collector runs; you can force
+with `A = nothing`. The memory will be released the next time the garbage collector runs; you can force
 this to happen with [`GC.gc()`](@ref Base.GC.gc). Moreover, an attempt to use `A` will likely result in an error, because most methods are not defined on type `Nothing`.
 
-### How can I modify the declaration of a type in my session?
-
-Perhaps you've defined a type and then realize you need to add a new field.  If you try this at
-the REPL, you get the error:
-
-```
-ERROR: invalid redefinition of constant MyType
-```
-
-Types in module `Main` cannot be redefined.
-
-While this can be inconvenient when you are developing new code, there's an excellent workaround.
- Modules can be replaced by redefining them, and so if you wrap all your new code inside a module
-you can redefine types and constants.  You can't import the type names into `Main` and then expect
-to be able to redefine them there, but you can use the module name to resolve the scope.  In other
-words, while developing you might use a workflow something like this:
-
-```julia
-include("mynewcode.jl")              # this defines a module MyModule
-obj1 = MyModule.ObjConstructor(a, b)
-obj2 = MyModule.somefunction(obj1)
-# Got an error. Change something in "mynewcode.jl"
-include("mynewcode.jl")              # reload the module
-obj1 = MyModule.ObjConstructor(a, b) # old objects are no longer valid, must reconstruct
-obj2 = MyModule.somefunction(obj1)   # this time it worked!
-obj3 = MyModule.someotherfunction(obj2, c)
-...
-```
-
 ## [Scripting](@id man-scripting)
 
 ### How do I check if the current file is being run as the main script?
@@ -101,7 +89,7 @@ If one needs functionality both available as a library and a script, it is bette
 
 Running a Julia script using `julia file.jl` does not throw
 [`InterruptException`](@ref) when you try to terminate it with CTRL-C
-(SIGINT).  To run a certain code before terminating a Julia script,
+(SIGINT). To run a certain code before terminating a Julia script,
 which may or may not be caused by CTRL-C, use [`atexit`](@ref).
 Alternatively, you can use `julia -e 'include(popfirst!(ARGS))'
 file.jl` to execute a script while being able to catch
@@ -134,7 +122,7 @@ invoking an [operating-system shell](https://en.wikipedia.org/wiki/Shell_(comput
 That means that `run` does not perform wildcard expansion of `*` (["globbing"](https://en.wikipedia.org/wiki/Glob_(programming))),
 nor does it interpret [shell pipelines](https://en.wikipedia.org/wiki/Pipeline_(Unix)) like `|` or `>`.
 
-You can still do globbing and pipelines using Julia features, however.  For example, the built-in
+You can still do globbing and pipelines using Julia features, however. For example, the built-in
 [`pipeline`](@ref) function allows you to chain external programs and files, similar to shell pipes, and
 the [Glob.jl package](https://github.com/vtjnash/Glob.jl) implements POSIX-compatible globbing.
 
@@ -158,7 +146,7 @@ end
 ```
 and notice that it works fine in an interactive environment (like the Julia REPL),
 but gives ```UndefVarError: `x` not defined``` when you try to run it in script or other
-file.   What is going on is that Julia generally requires you to **be explicit about assigning to global variables in a local scope**.
+file. What is going on is that Julia generally requires you to **be explicit about assigning to global variables in a local scope**.
 
 Here, `x` is a global variable, `while` defines a [local scope](@ref scope-of-variables), and `x += 1` is
 an assignment to a global in that local scope.
@@ -167,7 +155,7 @@ As mentioned above, Julia (version 1.5 or later) allows you to omit the `global`
 keyword for code in the REPL (and many other interactive environments), to simplify
 exploration (e.g. copy-pasting code from a function to run interactively).
 However, once you move to code in files, Julia requires a more disciplined approach
-to global variables.  You have least three options:
+to global variables. You have least three options:
 
 1. Put the code into a function (so that `x` is a *local* variable in a function). In general, it is good software engineering to use functions rather than global scripts (search online for "why global variables bad" to see many explanations). In Julia, global variables are also [slow](@ref man-performance-tips).
 2. Wrap the code in a [`let`](@ref) block.  (This makes `x` a local variable within the `let ... end` statement, again eliminating the need for `global`).
@@ -235,8 +223,8 @@ the variables `A` and `x` were distinct bindings referring to the same mutable `
 
 ### Can I use `using` or `import` inside a function?
 
-No, you are not allowed to have a `using` or `import` statement inside a function.  If you want
-to import a module but only use its symbols inside a specific function or set of functions, you
+No, you are not allowed to have a `using` or `import` statement inside a function. If you want
+to import a module but only use its bindings inside a specific function or set of functions, you
 have two options:
 
 1. Use `import`:
@@ -244,13 +232,13 @@ have two options:
    ```julia
    import Foo
    function bar(...)
-       # ... refer to Foo symbols via Foo.baz ...
+       # ... refer to Foo bindings via Foo.baz ...
    end
    ```
 
    This loads the module `Foo` and defines a variable `Foo` that refers to the module, but does not
-   import any of the other symbols from the module into the current namespace.  You refer to the
-   `Foo` symbols by their qualified names `Foo.bar` etc.
+   import any of the other bindings from the module into the current namespace. You refer to the
+   `Foo` bindings by their qualified names `Foo.bar` etc.
 2. Wrap your function in a module:
 
    ```julia
@@ -264,7 +252,7 @@ have two options:
    using Bar
    ```
 
-   This imports all the symbols from `Foo`, but only inside the module `Bar`.
+   This imports all the bindings from `Foo`, but only inside the module `Bar`.
 
 ### What does the `...` operator do?
 
@@ -374,11 +362,17 @@ julia> twothreearr()
  3
 ```
 
+### Is a function that ends with `!` allowed to allocate?
+
+Yes! A function name ending with `!` indicates that the function mutates at
+least one of its arguments (typically the first argument). However, it may
+still allocate a scratch space to expedite computation or produce that result.
+
 ## Types, type declarations, and constructors
 
 ### [What does "type-stable" mean?](@id man-type-stability)
 
-It means that the type of the output is predictable from the types of the inputs.  In particular,
+It means that the type of the output is predictable from the types of the inputs. In particular,
 it means that the type of the output cannot vary depending on the *values* of the inputs. The
 following code is *not* type-stable:
 
@@ -410,9 +404,9 @@ Stacktrace:
 [...]
 ```
 
-This behavior is an inconvenient consequence of the requirement for type-stability.  In the case
+This behavior is an inconvenient consequence of the requirement for type-stability. In the case
 of [`sqrt`](@ref), most users want `sqrt(2.0)` to give a real number, and would be unhappy if
-it produced the complex number `1.4142135623730951 + 0.0im`.  One could write the [`sqrt`](@ref)
+it produced the complex number `1.4142135623730951 + 0.0im`. One could write the [`sqrt`](@ref)
 function to switch to a complex-valued output only when passed a negative number (which is what
 [`sqrt`](@ref) does in some other languages), but then the result would not be [type-stable](@ref man-type-stability)
 and the [`sqrt`](@ref) function would have poor performance.
@@ -430,14 +424,14 @@ julia> sqrt(-2.0+0im)
 The parameters of a [parametric type](@ref Parametric-Types) can hold either
 types or bits values, and the type itself chooses how it makes use of these parameters.
 For example, `Array{Float64, 2}` is parameterized by the type `Float64` to express its
-element type and the integer value `2` to express its number of dimensions.  When
+element type and the integer value `2` to express its number of dimensions. When
 defining your own parametric type, you can use subtype constraints to declare that a
 certain parameter must be a subtype ([`<:`](@ref)) of some abstract type or a previous
-type parameter.  There is not, however, a dedicated syntax to declare that a parameter
+type parameter. There is not, however, a dedicated syntax to declare that a parameter
 must be a _value_ of a given type — that is, you cannot directly declare that a
 dimensionality-like parameter [`isa`](@ref) `Int` within the `struct` definition, for
-example.  Similarly, you cannot do computations (including simple things like addition
-or subtraction) on type parameters.  Instead, these sorts of constraints and
+example. Similarly, you cannot do computations (including simple things like addition
+or subtraction) on type parameters. Instead, these sorts of constraints and
 relationships may be expressed through additional type parameters that are computed
 and enforced within the type's [constructors](@ref man-constructors).
 
@@ -552,9 +546,9 @@ way that is compatible with C and Fortran. Saturated integer arithmetic, however
 The first and most obvious issue is that this is not the way machine integer arithmetic works,
 so implementing saturated operations requires emitting instructions after each machine integer
 operation to check for underflow or overflow and replace the result with [`typemin(Int)`](@ref)
-or [`typemax(Int)`](@ref) as appropriate. This alone expands each integer operation from a single,
-fast instruction into half a dozen instructions, probably including branches. Ouch. But it gets
-worse – saturating integer arithmetic isn't associative. Consider this Matlab computation:
+or [`typemax(Int)`](@ref) as appropriate. This expands each integer operation from a single, fast
+instruction into a few instructions. But it gets worse – saturating integer arithmetic isn't
+associative. Consider this Matlab computation:
 
 ```
 >> n = int64(2)^62
@@ -708,7 +702,7 @@ julia> module Foo
 
 julia> Foo.foo()
 ERROR: On worker 2:
-UndefVarError: `Foo` not defined
+UndefVarError: `Foo` not defined in `Main`
 Stacktrace:
 [...]
 ```
@@ -729,7 +723,7 @@ julia> @everywhere module Foo
 
 julia> Foo.foo()
 ERROR: On worker 2:
-UndefVarError: `gvar` not defined
+UndefVarError: `gvar` not defined in `Main.Foo`
 Stacktrace:
 [...]
 ```
@@ -765,7 +759,7 @@ bar (generic function with 1 method)
 
 julia> remotecall_fetch(bar, 2)
 ERROR: On worker 2:
-UndefVarError: `#bar` not defined
+UndefVarError: `#bar` not defined in `Main`
 [...]
 
 julia> anon_bar  = ()->1
@@ -787,6 +781,7 @@ foo (generic function with 1 method)
 
 julia> foo([1])
 ERROR: MethodError: no method matching foo(::Vector{Int64})
+The function `foo` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   foo(!Matched::Vector{Real})
@@ -882,7 +877,7 @@ array to store the result. If you prefer to mutate `x`, use `x .+= y` to update
 individually.
 
 While this behavior might surprise some, the choice is deliberate. The main reason is the presence
-of immutable objects within Julia, which cannot change their value once created.  Indeed, a
+of immutable objects within Julia, which cannot change their value once created. Indeed, a
 number is an immutable object; the statements `x = 5; x += 1` do not modify the meaning of `5`,
 they modify the value bound to `x`. For an immutable, the only way to change the value is to reassign
 it.
@@ -923,9 +918,9 @@ While the streaming I/O API is synchronous, the underlying implementation is ful
 
 Consider the printed output from the following:
 
-```jldoctest
+```
 julia> @sync for i in 1:3
-           @async write(stdout, string(i), " Foo ", " Bar ")
+           Threads.@spawn write(stdout, string(i), " Foo ", " Bar ")
        end
 123 Foo  Foo  Foo  Bar  Bar  Bar
 ```
@@ -936,9 +931,9 @@ yields to other tasks while waiting for that part of the I/O to complete.
 `print` and `println` "lock" the stream during a call. Consequently changing `write` to `println`
 in the above example results in:
 
-```jldoctest
+```
 julia> @sync for i in 1:3
-           @async println(stdout, string(i), " Foo ", " Bar ")
+           Threads.@spawn println(stdout, string(i), " Foo ", " Bar ")
        end
 1 Foo  Bar
 2 Foo  Bar
@@ -947,11 +942,11 @@ julia> @sync for i in 1:3
 
 You can lock your writes with a `ReentrantLock` like this:
 
-```jldoctest
+```
 julia> l = ReentrantLock();
 
 julia> @sync for i in 1:3
-           @async begin
+           Threads.@spawn begin
                lock(l)
                try
                    write(stdout, string(i), " Foo ", " Bar ")
@@ -1035,17 +1030,15 @@ Modifying OpenBLAS settings or compiling Julia with a different BLAS library, eg
 
 ### How do I manage precompilation caches in distributed file systems?
 
-When using `julia` in high-performance computing (HPC) facilities, invoking
-_n_ `julia` processes simultaneously creates at most _n_ temporary copies of
-precompilation cache files. If this is an issue (slow and/or small distributed
-file system), you may:
+When using Julia in high-performance computing (HPC) facilities with shared filesystems, it is recommended to use a shared
+depot (via the [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable). Since Julia v1.10, multiple Julia processes on functionally similar
+workers and using the same depot will coordinate via pidfile locks to only spend effort precompiling on one process while the
+others wait. The precompilation process will indicate when the process is precompiling or waiting for another that is
+precompiling. If non-interactive the messages are via `@debug`.
 
-1. Use `julia` with `--compiled-modules=no` flag to turn off precompilation.
-2. Configure a private writable depot using `pushfirst!(DEPOT_PATH, private_path)`
-   where `private_path` is a path unique to this `julia` process.  This
-   can also be done by setting environment variable `JULIA_DEPOT_PATH` to
-   `$private_path:$HOME/.julia`.
-3. Create a symlink from `~/.julia/compiled` to a directory in a scratch space.
+However, due to caching of binary code, the cache rejection since v1.9 is more strict and users may need to set the
+[`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable appropriately to get a single cache that is usable throughout the HPC
+environment.
 
 ## Julia Releases
 
@@ -1074,8 +1067,7 @@ You may wish to test against the nightly version to ensure that such regressions
 Finally, you may also consider building Julia from source for yourself. This option is mainly for those individuals who are comfortable at the command line, or interested in learning.
 If this describes you, you may also be interested in reading our [guidelines for contributing](https://github.com/JuliaLang/julia/blob/master/CONTRIBUTING.md).
 
-Links to each of these download types can be found on the download page at [https://julialang.org/downloads/](https://julialang.org/downloads/).
-Note that not all versions of Julia are available for all platforms.
+The [`juliaup` install manager](https://julialang.org/install/) has pre-defined channels named `release` and `lts` for the latest stable release and the current LTS release, as well as version-specific channels.
 
 ### How can I transfer the list of installed packages after updating my version of Julia?
 
diff --git a/doc/src/manual/functions.md b/doc/src/manual/functions.md
index a724f450dccfa..b2c72cb648d6b 100644
--- a/doc/src/manual/functions.md
+++ b/doc/src/manual/functions.md
@@ -5,7 +5,7 @@ functions are not pure mathematical functions, because they can alter and be aff
 by the global state of the program. The basic syntax for defining functions in Julia is:
 
 ```jldoctest
-julia> function f(x,y)
+julia> function f(x, y)
            x + y
        end
 f (generic function with 1 method)
@@ -18,7 +18,7 @@ There is a second, more terse syntax for defining a function in Julia. The tradi
 declaration syntax demonstrated above is equivalent to the following compact "assignment form":
 
 ```jldoctest fofxy
-julia> f(x,y) = x + y
+julia> f(x, y) = x + y
 f (generic function with 1 method)
 ```
 
@@ -30,7 +30,7 @@ both typing and visual noise.
 A function is called using the traditional parenthesis syntax:
 
 ```jldoctest fofxy
-julia> f(2,3)
+julia> f(2, 3)
 5
 ```
 
@@ -40,14 +40,14 @@ like any other value:
 ```jldoctest fofxy
 julia> g = f;
 
-julia> g(2,3)
+julia> g(2, 3)
 5
 ```
 
 As with variables, Unicode can also be used for function names:
 
 ```jldoctest
-julia> ∑(x,y) = x + y
+julia> ∑(x, y) = x + y
 ∑ (generic function with 1 method)
 
 julia> ∑(2, 3)
@@ -65,19 +65,23 @@ a function will be visible to the caller. (This is the same behavior found in Sc
 Python, Ruby and Perl, among other dynamic languages.)
 
 For example, in the function
-```julia
+```jldoctest argpassing; output = false
 function f(x, y)
     x[1] = 42    # mutates x
     y = 7 + y    # new binding for y, no mutation
     return y
 end
+
+# output
+
+f (generic function with 1 method)
 ```
 The statement `x[1] = 42` *mutates* the object `x`, and hence this change *will* be visible in the array passed
-by the caller for this argument.   On the other hand, the assignment `y = 7 + y` changes the *binding* ("name")
+by the caller for this argument. On the other hand, the assignment `y = 7 + y` changes the *binding* ("name")
 `y` to refer to a new value `7 + y`, rather than mutating the *original* object referred to by `y`,
-and hence does *not* change the corresponding argument passed by the caller.   This can be seen if we call `f(x, y)`:
-```julia-repl
-julia> a = [4,5,6]
+and hence does *not* change the corresponding argument passed by the caller. This can be seen if we call `f(x, y)`:
+```jldoctest argpassing
+julia> a = [4, 5, 6]
 3-element Vector{Int64}:
  4
  5
@@ -102,6 +106,9 @@ As a common convention in Julia (not a syntactic requirement), such a function w
 [typically be named `f!(x, y)`](@ref man-punctuation) rather than `f(x, y)`, as a visual reminder at
 the call site that at least one of the arguments (often the first one) is being mutated.
 
+!!! warning "Shared memory between arguments"
+    The behavior of a mutating function can be unexpected when a mutated argument shares memory with another argument, a situation known as aliasing (e.g. when one is a view of the other).
+    Unless the function docstring explicitly indicates that aliasing produces the expected result, it is the responsibility of the caller to ensure proper behavior on such inputs.
 
 ## Argument-type declarations
 
@@ -112,13 +119,13 @@ fib(n::Integer) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)
 ```
 and the `::Integer` specification means that it will only be callable when `n` is a subtype of the [abstract](@ref man-abstract-types) `Integer` type.
 
-Argument-type declarations **normally have no impact on performance**: regardless of what argument types (if any) are declared, Julia compiles a specialized version of the function for the actual argument types passed by the caller.   For example, calling `fib(1)` will trigger the compilation of specialized version of `fib` optimized specifically for `Int` arguments, which is then re-used if `fib(7)` or `fib(15)` are called.  (There are rare exceptions when an argument-type declaration can trigger additional compiler specializations; see: [Be aware of when Julia avoids specializing](@ref).)  The most common reasons to declare argument types in Julia are, instead:
+Argument-type declarations **normally have no impact on performance**: regardless of what argument types (if any) are declared, Julia compiles a specialized version of the function for the actual argument types passed by the caller. For example, calling `fib(1)` will trigger the compilation of specialized version of `fib` optimized specifically for `Int` arguments, which is then re-used if `fib(7)` or `fib(15)` are called.  (There are rare exceptions when an argument-type declaration can trigger additional compiler specializations; see: [Be aware of when Julia avoids specializing](@ref).)  The most common reasons to declare argument types in Julia are, instead:
 
-* **Dispatch:** As explained in [Methods](@ref), you can have different versions ("methods") of a function for different argument types, in which case the argument types are used to determine which implementation is called for which arguments.  For example, you might implement a completely different algorithm `fib(x::Number) = ...` that works for any `Number` type by using [Binet's formula](https://en.wikipedia.org/wiki/Fibonacci_number#Binet%27s_formula) to extend it to non-integer values.
-* **Correctness:** Type declarations can be useful if your function only returns correct results for certain argument types.  For example, if we omitted argument types and wrote `fib(n) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)`, then `fib(1.5)` would silently give us the nonsensical answer `1.0`.
+* **Dispatch:** As explained in [Methods](@ref), you can have different versions ("methods") of a function for different argument types, in which case the argument types are used to determine which implementation is called for which arguments. For example, you might implement a completely different algorithm `fib(x::Number) = ...` that works for any `Number` type by using [Binet's formula](https://en.wikipedia.org/wiki/Fibonacci_number#Binet%27s_formula) to extend it to non-integer values.
+* **Correctness:** Type declarations can be useful if your function only returns correct results for certain argument types. For example, if we omitted argument types and wrote `fib(n) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)`, then `fib(1.5)` would silently give us the nonsensical answer `1.0`.
 * **Clarity:** Type declarations can serve as a form of documentation about the expected arguments.
 
-However, it is a **common mistake to overly restrict the argument types**, which can unnecessarily limit the applicability of the function and prevent it from being re-used in circumstances you did not anticipate.    For example, the `fib(n::Integer)` function above works equally well for `Int` arguments (machine integers) and `BigInt` arbitrary-precision integers (see [BigFloats and BigInts](@ref BigFloats-and-BigInts)), which is especially useful because Fibonacci numbers grow exponentially rapidly and will quickly overflow any fixed-precision type like `Int` (see [Overflow behavior](@ref)).  If we had declared our function as `fib(n::Int)`, however, the application to `BigInt` would have been prevented for no reason.   In general, you should use the most general applicable abstract types for arguments, and **when in doubt, omit the argument types**.  You can always add argument-type specifications later if they become necessary, and you don't sacrifice performance or functionality by omitting them.
+However, it is a **common mistake to overly restrict the argument types**, which can unnecessarily limit the applicability of the function and prevent it from being re-used in circumstances you did not anticipate. For example, the `fib(n::Integer)` function above works equally well for `Int` arguments (machine integers) and `BigInt` arbitrary-precision integers (see [BigFloats and BigInts](@ref BigFloats-and-BigInts)), which is especially useful because Fibonacci numbers grow exponentially rapidly and will quickly overflow any fixed-precision type like `Int` (see [Overflow behavior](@ref)). If we had declared our function as `fib(n::Int)`, however, the application to `BigInt` would have been prevented for no reason. In general, you should use the most general applicable abstract types for arguments, and **when in doubt, omit the argument types**. You can always add argument-type specifications later if they become necessary, and you don't sacrifice performance or functionality by omitting them.
 
 ## The `return` Keyword
 
@@ -130,7 +137,7 @@ the `return` keyword causes a function to return immediately, providing
 an expression whose value is returned:
 
 ```julia
-function g(x,y)
+function g(x, y)
     return x * y
     x + y
 end
@@ -140,19 +147,19 @@ Since function definitions can be entered into interactive sessions, it is easy
 definitions:
 
 ```jldoctest
-julia> f(x,y) = x + y
+julia> f(x, y) = x + y
 f (generic function with 1 method)
 
-julia> function g(x,y)
+julia> function g(x, y)
            return x * y
            x + y
        end
 g (generic function with 1 method)
 
-julia> f(2,3)
+julia> f(2, 3)
 5
 
-julia> g(2,3)
+julia> g(2, 3)
 6
 ```
 
@@ -163,18 +170,18 @@ is of real use. Here, for example, is a function that computes the hypotenuse le
 triangle with sides of length `x` and `y`, avoiding overflow:
 
 ```jldoctest
-julia> function hypot(x,y)
+julia> function hypot(x, y)
            x = abs(x)
            y = abs(y)
            if x > y
                r = y/x
-               return x*sqrt(1+r*r)
+               return x*sqrt(1 + r*r)
            end
            if y == 0
-               return zero(x)
+               return float(x)
            end
            r = x/y
-           return y*sqrt(1+r*r)
+           return y*sqrt(1 + r*r)
        end
 hypot (generic function with 1 method)
 
@@ -186,7 +193,7 @@ There are three possible points of return from this function, returning the valu
 expressions, depending on the values of `x` and `y`. The `return` on the last line could be omitted
 since it is the last expression.
 
-### Return type
+### [Return type](@id man-functions-return-type)
 
 A return type can be specified in the function declaration using the `::` operator. This converts
 the return value to the specified type.
@@ -205,7 +212,7 @@ See [Type Declarations](@ref) for more on return types.
 
 Return type declarations are **rarely used** in Julia: in general, you should
 instead write "type-stable" functions in which Julia's compiler can automatically
-infer the return type.  For more information, see the [Performance Tips](@ref man-performance-tips) chapter.
+infer the return type. For more information, see the [Performance Tips](@ref man-performance-tips) chapter.
 
 ### Returning nothing
 
@@ -243,7 +250,7 @@ as you would any other function:
 julia> 1 + 2 + 3
 6
 
-julia> +(1,2,3)
+julia> +(1, 2, 3)
 6
 ```
 
@@ -254,7 +261,7 @@ operators such as [`+`](@ref) and [`*`](@ref) just like you would with other fun
 ```jldoctest
 julia> f = +;
 
-julia> f(1,2,3)
+julia> f(1, 2, 3)
 6
 ```
 
@@ -289,15 +296,15 @@ syntaxes:
 
 ```jldoctest
 julia> x -> x^2 + 2x - 1
-#1 (generic function with 1 method)
+#2 (generic function with 1 method)
 
 julia> function (x)
            x^2 + 2x - 1
        end
-#3 (generic function with 1 method)
+#5 (generic function with 1 method)
 ```
 
-This creates a function taking one argument `x` and returning the value of the polynomial `x^2 +
+Each statement creates a function taking one argument `x` and returning the value of the polynomial `x^2 +
 2x - 1` at that value. Notice that the result is a generic function, but with a compiler-generated
 name based on consecutive numbering.
 
@@ -327,28 +334,17 @@ julia> map(x -> x^2 + 2x - 1, [1, 3, -1])
 ```
 
 An anonymous function accepting multiple arguments can be written using the syntax `(x,y,z)->2x+y-z`.
-A zero-argument anonymous function is written as `()->3`. The idea of a function with no arguments
-may seem strange, but is useful for "delaying" a computation. In this usage, a block of code is
-wrapped in a zero-argument function, which is later invoked by calling it as `f`.
-
-As an example, consider this call to [`get`](@ref):
-
-```julia
-get(dict, key) do
-    # default value calculated here
-    time()
-end
-```
 
-The code above is equivalent to calling `get` with an anonymous function containing the code
-enclosed between `do` and `end`, like so:
+Argument-type declarations for anonymous functions work as for named functions, for example `x::Integer->2x`.
+The return type of an anonymous function cannot be specified.
 
-```julia
-get(()->time(), dict, key)
-```
-
-The call to [`time`](@ref) is delayed by wrapping it in a 0-argument anonymous function
-that is called only if the requested key is absent from `dict`.
+A zero-argument anonymous function can be written as `()->2+2`. The idea of a function with
+no arguments may seem strange, but is useful in cases where a result cannot (or should not)
+be precomputed. For example, Julia has a zero-argument [`time`](@ref) function that returns
+the current time in seconds, and thus `seconds = ()->round(Int, time())` is an anonymous
+function that returns this time rounded to the nearest integer assigned to the variable
+`seconds`. Each time this anonymous function is called as `seconds()` the current time will
+be calculated and returned.
 
 ## Tuples
 
@@ -402,7 +398,7 @@ left side of an assignment: the value on the right side is _destructured_ by ite
 over and assigning to each variable in turn:
 
 ```jldoctest
-julia> (a,b,c) = 1:3
+julia> (a, b, c) = 1:3
 1:3
 
 julia> b
@@ -417,7 +413,7 @@ This can be used to return multiple values from functions by returning a tuple o
 other iterable value. For example, the following function returns two values:
 
 ```jldoctest foofunc
-julia> function foo(a,b)
+julia> function foo(a, b)
            a+b, a*b
        end
 foo (generic function with 1 method)
@@ -427,14 +423,14 @@ If you call it in an interactive session without assigning the return value anyw
 see the tuple returned:
 
 ```jldoctest foofunc
-julia> foo(2,3)
+julia> foo(2, 3)
 (5, 6)
 ```
 
 Destructuring assignment extracts each value into a variable:
 
 ```jldoctest foofunc
-julia> x, y = foo(2,3)
+julia> x, y = foo(2, 3)
 (5, 6)
 
 julia> x
@@ -473,7 +469,7 @@ Other valid left-hand side expressions can be used as elements of the assignment
 ```jldoctest
 julia> X = zeros(3);
 
-julia> X[1], (a,b) = (1, (2, 3))
+julia> X[1], (a, b) = (1, (2, 3))
 (1, (2, 3))
 
 julia> X
@@ -592,10 +588,12 @@ The destructuring feature can also be used within a function argument.
 If a function argument name is written as a tuple (e.g. `(x, y)`) instead of just
 a symbol, then an assignment `(x, y) = argument` will be inserted for you:
 
-```julia-repl
+```jldoctest
 julia> minmax(x, y) = (y < x) ? (y, x) : (x, y)
+minmax (generic function with 1 method)
 
 julia> gap((min, max)) = max - min
+gap (generic function with 1 method)
 
 julia> gap(minmax(10, 2))
 8
@@ -606,7 +604,7 @@ would be a two-argument function, and this example would not work.
 
 Similarly, property destructuring can also be used for function arguments:
 
-```julia-repl
+```jldoctest
 julia> foo((; x, y)) = x + y
 foo (generic function with 1 method)
 
@@ -624,9 +622,9 @@ julia> foo(A(3, 4))
 
 For anonymous functions, destructuring a single argument requires an extra comma:
 
-```
-julia> map(((x,y),) -> x + y, [(1,2), (3,4)])
-2-element Array{Int64,1}:
+```jldoctest
+julia> map(((x, y),) -> x + y, [(1, 2), (3, 4)])
+2-element Vector{Int64}:
  3
  7
 ```
@@ -638,7 +636,7 @@ Such functions are traditionally known as "varargs" functions, which is short fo
 of arguments". You can define a varargs function by following the last positional argument with an ellipsis:
 
 ```jldoctest barfunc
-julia> bar(a,b,x...) = (a,b,x)
+julia> bar(a, b, x...) = (a, b, x)
 bar (generic function with 1 method)
 ```
 
@@ -647,16 +645,16 @@ The variables `a` and `b` are bound to the first two argument values as usual, a
 two arguments:
 
 ```jldoctest barfunc
-julia> bar(1,2)
+julia> bar(1, 2)
 (1, 2, ())
 
-julia> bar(1,2,3)
+julia> bar(1, 2, 3)
 (1, 2, (3,))
 
 julia> bar(1, 2, 3, 4)
 (1, 2, (3, 4))
 
-julia> bar(1,2,3,4,5,6)
+julia> bar(1, 2, 3, 4, 5, 6)
 (1, 2, (3, 4, 5, 6))
 ```
 
@@ -673,7 +671,7 @@ call instead:
 julia> x = (3, 4)
 (3, 4)
 
-julia> bar(1,2,x...)
+julia> bar(1, 2, x...)
 (1, 2, (3, 4))
 ```
 
@@ -684,7 +682,7 @@ of arguments go. This need not be the case, however:
 julia> x = (2, 3, 4)
 (2, 3, 4)
 
-julia> bar(1,x...)
+julia> bar(1, x...)
 (1, 2, (3, 4))
 
 julia> x = (1, 2, 3, 4)
@@ -697,15 +695,15 @@ julia> bar(x...)
 Furthermore, the iterable object splatted into a function call need not be a tuple:
 
 ```jldoctest barfunc
-julia> x = [3,4]
+julia> x = [3, 4]
 2-element Vector{Int64}:
  3
  4
 
-julia> bar(1,2,x...)
+julia> bar(1, 2, x...)
 (1, 2, (3, 4))
 
-julia> x = [1,2,3,4]
+julia> x = [1, 2, 3, 4]
 4-element Vector{Int64}:
  1
  2
@@ -720,9 +718,9 @@ Also, the function that arguments are splatted into need not be a varargs functi
 often is):
 
 ```jldoctest
-julia> baz(a,b) = a + b;
+julia> baz(a, b) = a + b;
 
-julia> args = [1,2]
+julia> args = [1, 2]
 2-element Vector{Int64}:
  1
  2
@@ -730,7 +728,7 @@ julia> args = [1,2]
 julia> baz(args...)
 3
 
-julia> args = [1,2,3]
+julia> args = [1, 2, 3]
 3-element Vector{Int64}:
  1
  2
@@ -738,6 +736,7 @@ julia> args = [1,2,3]
 
 julia> baz(args...)
 ERROR: MethodError: no method matching baz(::Int64, ::Int64, ::Int64)
+The function `baz` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   baz(::Any, ::Any)
@@ -791,12 +790,15 @@ Optional arguments are actually just a convenient syntax for writing multiple me
 with different numbers of arguments (see [Note on Optional and keyword Arguments](@ref)).
 This can be checked for our `date` function example by calling the `methods` function:
 
-```julia-repl
+```jldoctest date_default_args; filter = r"@ .*"a
 julia> methods(date)
-# 3 methods for generic function "date":
-[1] date(y::Int64) in Main at REPL[1]:1
-[2] date(y::Int64, m::Int64) in Main at REPL[1]:1
-[3] date(y::Int64, m::Int64, d::Int64) in Main at REPL[1]:1
+# 3 methods for generic function "date" from Main:
+ [1] date(y::Int64, m::Int64, d::Int64)
+     @ REPL[2]:1
+ [2] date(y::Int64, m::Int64)
+     @ REPL[2]:1
+ [3] date(y::Int64)
+     @ REPL[2]:1
 ```
 
 ## Keyword Arguments
@@ -831,7 +833,7 @@ prior keyword arguments.
 The types of keyword arguments can be made explicit as follows:
 
 ```julia
-function f(;x::Int=1)
+function f(; x::Int=1)
     ###
 end
 ```
@@ -931,8 +933,10 @@ map([A, B, C]) do x
 end
 ```
 
-The `do x` syntax creates an anonymous function with argument `x` and passes it as the first argument
-to [`map`](@ref). Similarly, `do a,b` would create a two-argument anonymous function. Note that `do (a,b)` would create a one-argument anonymous function,
+The `do x` syntax creates an anonymous function with argument `x` and passes
+the anonymous function as the first argument
+to the "outer" function - [`map`](@ref) in this example.
+Similarly, `do a,b` would create a two-argument anonymous function. Note that `do (a,b)` would create a one-argument anonymous function,
 whose argument is a tuple to be deconstructed. A plain `do` would declare that what follows is an anonymous function of the form `() -> ...`.
 
 How these arguments are initialized depends on the "outer" function; here, [`map`](@ref) will
@@ -981,7 +985,7 @@ can create performance challenges as discussed in [performance tips](@ref man-pe
 Functions in Julia can be combined by composing or piping (chaining) them together.
 
 Function composition is when you combine functions together and apply the resulting composition to arguments.
-You use the function composition operator (`∘`) to compose the functions, so `(f ∘ g)(args...)` is the same as `f(g(args...))`.
+You use the function composition operator (`∘`) to compose the functions, so `(f ∘ g)(args...; kw...)` is the same as `f(g(args...; kw...))`.
 
 You can type the composition operator at the REPL and suitably-configured editors using `\circ<tab>`.
 
@@ -1077,13 +1081,13 @@ in advance by the library writer.
 
 More generally, `f.(args...)` is actually equivalent to `broadcast(f, args...)`, which allows
 you to operate on multiple arrays (even of different shapes), or a mix of arrays and scalars (see
-[Broadcasting](@ref)). For example, if you have `f(x,y) = 3x + 4y`, then `f.(pi,A)` will return
-a new array consisting of `f(pi,a)` for each `a` in `A`, and `f.(vector1,vector2)` will return
-a new vector consisting of `f(vector1[i],vector2[i])` for each index `i` (throwing an exception
+[Broadcasting](@ref)). For example, if you have `f(x, y) = 3x + 4y`, then `f.(pi, A)` will return
+a new array consisting of `f(pi,a)` for each `a` in `A`, and `f.(vector1, vector2)` will return
+a new vector consisting of `f(vector1[i], vector2[i])` for each index `i` (throwing an exception
 if the vectors have different length).
 
 ```jldoctest
-julia> f(x,y) = 3x + 4y;
+julia> f(x, y) = 3x + 4y;
 
 julia> A = [1.0, 2.0, 3.0];
 
@@ -1103,7 +1107,7 @@ julia> f.(A, B)
 ```
 
 Keyword arguments are not broadcasted over, but are simply passed through to each call of
-the function.  For example, `round.(x, digits=3)` is equivalent to `broadcast(x -> round(x, digits=3), x)`.
+the function. For example, `round.(x, digits=3)` is equivalent to `broadcast(x -> round(x, digits=3), x)`.
 
 Moreover, *nested* `f.(args...)` calls are *fused* into a single `broadcast` loop. For example,
 `sin.(cos.(X))` is equivalent to `broadcast(x -> sin(cos(x)), X)`, similar to `[sin(cos(x)) for x in X]`:
@@ -1159,6 +1163,8 @@ julia> 1:5 .|> [x->x^2, inv, x->2*x, -, isodd]
  true
 ```
 
+All functions in the fused broadcast are always called for every element of the result. Thus `X .+ σ .* randn.()` will add a mask of independent and identically sampled random values to each element of the array `X`, but `X .+ σ .* randn()` will add the *same* random sample to each element. In cases where the fused computation is constant along one or more axes of the broadcast iteration, it may be possible to leverage a space-time tradeoff and allocate intermediate values to reduce the number of computations. See more at [performance tips](@ref man-performance-unfuse).
+
 ## Further Reading
 
 We should mention here that this is far from a complete picture of defining functions. Julia has
diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md
index e972788022de6..502fbd59166f7 100644
--- a/doc/src/manual/getting-started.md
+++ b/doc/src/manual/getting-started.md
@@ -1,7 +1,7 @@
 # [Getting Started](@id man-getting-started)
 
 Julia installation is straightforward, whether using precompiled binaries or compiling from source.
-Download and install Julia by following the instructions at [https://julialang.org/downloads/](https://julialang.org/downloads/).
+Download and install Julia by following the instructions at [https://julialang.org/install/](https://julialang.org/install/).
 
 If you are coming to Julia from one of the following languages, then you should start by reading the section on noteworthy differences from [MATLAB](@ref Noteworthy-differences-from-MATLAB), [R](@ref Noteworthy-differences-from-R), [Python](@ref Noteworthy-differences-from-Python), [C/C++](@ref Noteworthy-differences-from-C/C) or [Common Lisp](@ref Noteworthy-differences-from-Common-Lisp). This will help you avoid some common pitfalls since Julia differs from those languages in many subtle ways.
 
@@ -10,9 +10,10 @@ known as a read-eval-print loop or "REPL") by double-clicking the Julia executab
 `julia` from the command line:
 
 ```@eval
+using REPL
 io = IOBuffer()
-Base.banner(io)
-banner = String(take!(io))
+REPL.banner(io)
+banner = takestring!(io)
 import Markdown
 Markdown.parse("```\n\$ julia\n\n$(banner)\njulia> 1 + 2\n3\n\njulia> ans\n3\n```")
 ```
@@ -55,4 +56,4 @@ search: begin disable_sigint reenable_sigint
   begin...end denotes a block of code.
 ```
 
-If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips).
+If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips), or check out the comprehensive [ModernJuliaWorkflows](https://modernjuliaworkflows.org/) blog.
diff --git a/doc/src/manual/img/cpu-profile.png b/doc/src/manual/img/cpu-profile.png
new file mode 100644
index 0000000000000..ec48b41f6e78b
Binary files /dev/null and b/doc/src/manual/img/cpu-profile.png differ
diff --git a/doc/src/manual/img/task-sampling-failure.png b/doc/src/manual/img/task-sampling-failure.png
new file mode 100644
index 0000000000000..46bbd9b33b6ce
Binary files /dev/null and b/doc/src/manual/img/task-sampling-failure.png differ
diff --git a/doc/src/manual/img/wall-time-profiler-channel-example.png b/doc/src/manual/img/wall-time-profiler-channel-example.png
new file mode 100644
index 0000000000000..26cb4a4522621
Binary files /dev/null and b/doc/src/manual/img/wall-time-profiler-channel-example.png differ
diff --git a/doc/src/manual/img/wall-time-profiler-compute-bound-example.png b/doc/src/manual/img/wall-time-profiler-compute-bound-example.png
new file mode 100644
index 0000000000000..983b01bcc2dea
Binary files /dev/null and b/doc/src/manual/img/wall-time-profiler-compute-bound-example.png differ
diff --git a/doc/src/manual/installation.md b/doc/src/manual/installation.md
new file mode 100644
index 0000000000000..60a52e8cb6e19
--- /dev/null
+++ b/doc/src/manual/installation.md
@@ -0,0 +1,126 @@
+# [Installation](@id man-installation)
+
+There are many ways to install Julia. The following sections highlight the
+recommended method for each of the main supported platforms, and then present
+alternative ways that might be useful in specialized situations.
+
+The current installation recommendation is a solution based on Juliaup. If you
+installed Julia previously with a method that is _not_ based on Juliaup and want
+to switch your system to an installation that is based on Juliaup, we recommend
+that you uninstall all previous Julia versions, ensure that you remove anything
+Julia related from your `PATH` variable and then install Julia with one of the
+methods described below.
+
+## Windows
+
+On Windows Julia can be installed directly from the Windows store
+[here](https://www.microsoft.com/store/apps/9NJNWW8PVKMN). One can also install
+exactly the same version by executing
+
+```
+winget install --name Julia --id 9NJNWW8PVKMN -e -s msstore
+```
+
+in any shell.
+
+## Mac and Linux
+
+Julia can be installed on Linux or Mac by executing
+
+```
+curl -fsSL https://install.julialang.org | sh
+```
+
+in a shell.
+
+### Command line arguments
+
+One can pass various command line arguments to the Julia installer. The syntax
+for installer arguments is
+
+```bash
+curl -fsSL https://install.julialang.org | sh -s -- <ARGS>
+```
+
+Here `<ARGS>` should be replaced with one or more of the following arguments:
+- `--yes` (or `-y`): Run the installer in a non-interactive mode. All
+  configuration values use their default or a value supplied as a command line
+  argument.
+- `--default-channel=<NAME>`: Configure the default Juliaup channel. For
+  example `--default-channel lts` would install the `lts` channel and configure it
+  as the default.
+- `--add-to-path=<yes|no>`: Configure whether Julia should be added to the `PATH`
+  environment variable. Valid values are `yes` (default) and `no`.
+- `--background-selfupdate=<SECONDS>`: Configure an optional CRON job that
+  auto-updates Juliaup if `<SECONDS>` has a value larger than 0. The actual value
+  controls how often the CRON job will run to check for a new Juliaup version in
+  seconds. The default value is 0, i.e. no CRON job will be created.
+- `--startup-selfupdate=<MINUTES>`: Configure how often Julia will check for new
+  versions of Juliaup when Julia is started. The default is every 1440 minutes.
+- `-p=<PATH>` (or `--path`): Configure where the Julia and Juliaup binaries are
+  installed. The default is `~/.juliaup`.
+
+## Alternative installation methods
+
+Note that we recommend the following methods _only_ if none of the installation
+methods described above work for your system.
+
+Some of the installation methods described below recommend installing a package
+called `juliaup`. Note that this nevertheless installs a fully functional
+Julia system, not just Juliaup.
+
+### App Installer (Windows)
+
+If the Windows Store is blocked on a system, we have an alternative
+[MSIX App Installer](https://learn.microsoft.com/en-us/windows/msix/app-installer/app-installer-file-overview)
+based setup. To use the App Installer version, download
+[this](https://install.julialang.org/Julia.appinstaller) file and open it by
+double clicking on it.
+
+### MSI Installer (Windows)
+
+If neither the Windows Store nor the App Installer version work on your Windows
+system, you can also use a MSI based installer. Note that this installation
+methods comes with serious limitations and is generally not recommended unless
+no other method works. For example, there is no automatic update mechanism for
+Juliaup with this installation method. The 64 bit version of the MSI installer
+can be downloaded from [here](https://install.julialang.org/Julia-x64.msi) and
+the 32 bit version from [here](https://install.julialang.org/Julia-x86.msi).
+
+ By default the install will be a per-user install that does not require
+ elevation. You can also do a system install by running the following command
+ from a shell:
+
+```
+msiexec /i <PATH_TO_JULIA_MSI> ALLUSERS=1
+```
+
+### [Homebrew](https://brew.sh) (Mac and Linux)
+
+On systems with brew, you can install Julia by running
+```
+brew install juliaup
+```
+in a shell. Note that you will have to update Juliaup with standard brew
+commands.
+
+### [Arch Linux - AUR](https://aur.archlinux.org/packages/juliaup/) (Linux)
+
+On Arch Linux, Juliaup is available [in the Arch User Repository (AUR)](https://aur.archlinux.org/packages/juliaup/).
+
+### [openSUSE Tumbleweed](https://get.opensuse.org/tumbleweed/) (Linux)
+
+On openSUSE Tumbleweed, you can install Julia by running
+
+```sh
+zypper install juliaup
+```
+in a shell with root privileges.
+
+### [cargo](https://crates.io/crates/juliaup/) (Windows, Mac and Linux)
+
+To install Julia via Rust's cargo, run:
+
+```sh
+cargo install juliaup
+```
diff --git a/doc/src/manual/integers-and-floating-point-numbers.md b/doc/src/manual/integers-and-floating-point-numbers.md
index 173ca7847616e..845d42e33abfb 100644
--- a/doc/src/manual/integers-and-floating-point-numbers.md
+++ b/doc/src/manual/integers-and-floating-point-numbers.md
@@ -59,7 +59,7 @@ julia> 1234
 The default type for an integer literal depends on whether the target system has a 32-bit architecture
 or a 64-bit architecture:
 
-```julia-repl
+```julia-repl ; nodoctest = "Results depend on system word size"
 # 32-bit system:
 julia> typeof(1)
 Int32
@@ -72,7 +72,7 @@ Int64
 The Julia internal variable [`Sys.WORD_SIZE`](@ref) indicates whether the target system is 32-bit
 or 64-bit:
 
-```julia-repl
+```julia-repl ; nodoctest = "Results depend on system word size"
 # 32-bit system:
 julia> Sys.WORD_SIZE
 32
@@ -85,7 +85,7 @@ julia> Sys.WORD_SIZE
 Julia also defines the types `Int` and `UInt`, which are aliases for the system's signed and unsigned
 native integer types respectively:
 
-```julia-repl
+```julia-repl ; nodoctest = "Results depend on system word size"
 # 32-bit system:
 julia> Int
 Int32
@@ -243,11 +243,10 @@ julia> x + 1 == typemin(Int64)
 true
 ```
 
-Thus, arithmetic with Julia integers is actually a form of [modular arithmetic](https://en.wikipedia.org/wiki/Modular_arithmetic).
-This reflects the characteristics of the underlying arithmetic of integers as implemented on modern
-computers. In applications where overflow is possible, explicit checking for wraparound produced
-by overflow is essential; otherwise, the [`BigInt`](@ref) type in [Arbitrary Precision Arithmetic](@ref)
-is recommended instead.
+Arithmetic operations with Julia's integer types inherently perform [modular arithmetic](https://en.wikipedia.org/wiki/Modular_arithmetic),
+mirroring the characteristics of integer arithmetic on modern computer hardware. In scenarios where overflow is a possibility,
+it is crucial to explicitly check for wraparound effects that can result from such overflows.
+The [`Base.Checked`](@ref) module provides a suite of arithmetic operations equipped with overflow checks, which trigger errors if an overflow occurs. For use cases where overflow cannot be tolerated under any circumstances, utilizing the [`BigInt`](@ref) type, as detailed in [Arbitrary Precision Arithmetic](@ref), is advisable.
 
 An example of overflow behavior and how to potentially resolve it is as follows:
 
@@ -335,8 +334,8 @@ julia> typeof(x)
 Float64
 ```
 
-Half-precision floating-point numbers are also supported ([`Float16`](@ref)), but they are
-implemented in software and use [`Float32`](@ref) for calculations.
+Half-precision floating-point numbers are also supported ([`Float16`](@ref)) on all platforms, with native instructions used on hardware which supports this number format. Otherwise, operations are implemented in software, and use [`Float32`](@ref) for intermediate calculations.
+As an internal implementation detail, this is achieved under the hood by using LLVM's [`half`](https://llvm.org/docs/LangRef.html#half-precision-floating-point-intrinsics) type, which behaves similarly to what the GCC [`-fexcess-precision=16`](https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#index-fexcess-precision) flag does for C/C++ code.
 
 ```jldoctest
 julia> sizeof(Float16(4.))
@@ -601,7 +600,7 @@ julia> parse(BigFloat, "1.23456789012345678901")
 1.234567890123456789010000000000000000000000000000000000000000000000000000000004
 
 julia> BigFloat(2.0^66) / 3
-2.459565876494606882133333333333333333333333333333333333333333333333333333333344e+19
+2.459565876494606882133333333333333333333333333333333333333333333333333333333344e19
 
 julia> factorial(BigInt(40))
 815915283247897734345611269596115894272000000000
@@ -632,7 +631,7 @@ BigInt
 
 The default precision (in number of bits of the significand) and rounding mode of [`BigFloat`](@ref)
 operations can be changed globally by calling [`setprecision`](@ref) and [`setrounding`](@ref),
-and all further calculations will take these changes in account.  Alternatively, the precision
+and all further calculations will take these changes in account. Alternatively, the precision
 or the rounding can be changed only within the execution of a particular block of code by using
 the same functions with a `do` block:
 
@@ -653,6 +652,13 @@ julia> setprecision(40) do
 1.1000000000004
 ```
 
+!!! warning
+    The relation between [`setprecision`](@ref) or [`setrounding`](@ref) and
+    [`@big_str`](@ref), the macro used for `big` string literals (such as
+    `big"0.3"`), might not be intuitive, as a consequence of the fact that
+    `@big_str` is a macro. See the [`@big_str`](@ref) documentation for
+    details.
+
 ## [Numeric Literal Coefficients](@id man-numeric-literal-coefficients)
 
 To make common numeric formulae and expressions clearer, Julia allows variables to be immediately
@@ -693,7 +699,7 @@ julia> 2(x-1)^2 - 3(x-1) + 1
 !!! note
     The precedence of numeric literal coefficients used for implicit
     multiplication is higher than other binary operators such as multiplication
-    (`*`), and division (`/`, `\`, and `//`).  This means, for example, that
+    (`*`), and division (`/`, `\`, and `//`). This means, for example, that
     `1 / 2im` equals `-0.5im` and `6 // 2(2 + 1)` equals `1 // 1`.
 
 Additionally, parenthesized expressions can be used as coefficients to variables, implying multiplication
diff --git a/doc/src/manual/interfaces.md b/doc/src/manual/interfaces.md
index bcb15da69dedf..a92c15b1aea73 100644
--- a/doc/src/manual/interfaces.md
+++ b/doc/src/manual/interfaces.md
@@ -7,29 +7,24 @@ to generically build upon those behaviors.
 
 ## [Iteration](@id man-interface-iteration)
 
-| Required methods               |                        | Brief description                                                                     |
-|:------------------------------ |:---------------------- |:------------------------------------------------------------------------------------- |
-| `iterate(iter)`                |                        | Returns either a tuple of the first item and initial state or [`nothing`](@ref) if empty        |
-| `iterate(iter, state)`         |                        | Returns either a tuple of the next item and next state or `nothing` if no items remain  |
-| **Important optional methods** | **Default definition** | **Brief description**                                                                 |
-| `Base.IteratorSize(IterType)`  | `Base.HasLength()`     | One of `Base.HasLength()`, `Base.HasShape{N}()`, `Base.IsInfinite()`, or `Base.SizeUnknown()` as appropriate |
-| `Base.IteratorEltype(IterType)`| `Base.HasEltype()`     | Either `Base.EltypeUnknown()` or `Base.HasEltype()` as appropriate                    |
-| `eltype(IterType)`             | `Any`                  | The type of the first entry of the tuple returned by `iterate()`                      |
-| `length(iter)`                 | (*undefined*)          | The number of items, if known                                                         |
-| `size(iter, [dim])`            | (*undefined*)          | The number of items in each dimension, if known                                       |
-| `Base.isdone(iter[, state])`   | `missing`              | Fast-path hint for iterator completion. Should be defined for stateful iterators, or else `isempty(iter)` may call `iterate(iter[, state])` and mutate the iterator. |
-
-| Value returned by `IteratorSize(IterType)` | Required Methods                           |
-|:------------------------------------------ |:------------------------------------------ |
-| `Base.HasLength()`                         | [`length(iter)`](@ref)                     |
-| `Base.HasShape{N}()`                       | `length(iter)`  and `size(iter, [dim])`    |
-| `Base.IsInfinite()`                        | (*none*)                                   |
-| `Base.SizeUnknown()`                       | (*none*)                                   |
-
-| Value returned by `IteratorEltype(IterType)` | Required Methods   |
-|:-------------------------------------------- |:------------------ |
-| `Base.HasEltype()`                           | `eltype(IterType)` |
-| `Base.EltypeUnknown()`                       | (*none*)           |
+There are two methods that are always required:
+
+| Required method         | Brief description                                                                        |
+|:----------------------- |:---------------------------------------------------------------------------------------- |
+| [`iterate(iter)`](@ref) | Returns either a tuple of the first item and initial state or [`nothing`](@ref) if empty |
+| `iterate(iter, state)`  | Returns either a tuple of the next item and next state or `nothing` if no items remain   |
+
+There are several more methods that should be defined in some circumstances.
+Please note that you should always define at least one of `Base.IteratorSize(IterType)` and `length(iter)` because the default definition of `Base.IteratorSize(IterType)` is `Base.HasLength()`.
+
+| Method                                  | When should this method be defined?                                         | Default definition | Brief description |
+|:--- |:--- |:--- |:--- |
+| [`Base.IteratorSize(IterType)`](@ref)   | If default is not appropriate                                               | `Base.HasLength()` | One of `Base.HasLength()`, `Base.HasShape{N}()`, `Base.IsInfinite()`, or `Base.SizeUnknown()` as appropriate |
+| [`length(iter)`](@ref)                  | If `Base.IteratorSize()` returns `Base.HasLength()` or `Base.HasShape{N}()` | (*undefined*)      | The number of items, if known |
+| [`size(iter, [dim])`](@ref)             | If `Base.IteratorSize()` returns `Base.HasShape{N}()`                       | (*undefined*)      | The number of items in each dimension, if known |
+| [`Base.IteratorEltype(IterType)`](@ref) | If default is not appropriate                                               | `Base.HasEltype()` | Either `Base.EltypeUnknown()` or `Base.HasEltype()` as appropriate |
+| [`eltype(IterType)`](@ref)              | If default is not appropriate                                               | `Any`              | The type of the first entry of the tuple returned by `iterate()` |
+| [`Base.isdone(iter, [state])`](@ref)    | **Must** be defined if iterator is stateful                                 | `missing`          | Fast-path hint for iterator completion. If not defined for a stateful iterator then functions that check for done-ness, like `isempty()` and `zip()`, may mutate the iterator and cause buggy behaviour! |
 
 Sequential iteration is implemented by the [`iterate`](@ref) function. Instead
 of mutating objects as they are iterated over, Julia iterators may keep track
@@ -96,7 +91,7 @@ julia> sum(Squares(100))
 ```
 
 There are a few more methods we can extend to give Julia more information about this iterable
-collection.  We know that the elements in a `Squares` sequence will always be `Int`. By extending
+collection. We know that the elements in a `Squares` sequence will always be `Int`. By extending
 the [`eltype`](@ref) method, we can give that information to Julia and help it make more specialized
 code in the more complicated methods. We also know the number of elements in our sequence, so
 we can extend [`length`](@ref), too:
@@ -136,7 +131,7 @@ to additionally specialize those extra behaviors when they know a more efficient
 be used in their specific case.
 
 It is also often useful to allow iteration over a collection in *reverse order*
-by iterating over [`Iterators.reverse(iterator)`](@ref).  To actually support
+by iterating over [`Iterators.reverse(iterator)`](@ref). To actually support
 reverse-order iteration, however, an iterator
 type `T` needs to implement `iterate` for `Iterators.Reverse{T}`.
 (Given `r::Iterators.Reverse{T}`, the underling iterator of type `T` is `r.itr`.)
@@ -157,13 +152,13 @@ julia> collect(Iterators.reverse(Squares(4)))
 
 | Methods to implement | Brief description                |
 |:-------------------- |:-------------------------------- |
-| `getindex(X, i)`     | `X[i]`, indexed element access   |
-| `setindex!(X, v, i)` | `X[i] = v`, indexed assignment   |
+| `getindex(X, i)`     | `X[i]`, indexed access, non-scalar `i` should allocate a copy  |
+| `setindex!(X, v, i)` | `X[i] = v`, indexed assignment         |
 | `firstindex(X)`         | The first index, used in `X[begin]` |
-| `lastindex(X)`           | The last index, used in `X[end]` |
+| `lastindex(X)`           | The last index, used in `X[end]`   |
 
 For the `Squares` iterable above, we can easily compute the `i`th element of the sequence by squaring
-it.  We can expose this as an indexing expression `S[i]`. To opt into this behavior, `Squares`
+it. We can expose this as an indexing expression `S[i]`. To opt into this behavior, `Squares`
 simply needs to define [`getindex`](@ref):
 
 ```jldoctest squaretype
@@ -238,12 +233,12 @@ ourselves, we can officially define it as a subtype of an [`AbstractArray`](@ref
 | `similar(T::Union{Type,Function}, inds)`   | `T(Base.to_shape(inds))`               | Return an array similar to `T` with the specified indices `inds` (see below)          |
 
 If a type is defined as a subtype of `AbstractArray`, it inherits a very large set of rich behaviors
-including iteration and multidimensional indexing built on top of single-element access.  See
+including iteration and multidimensional indexing built on top of single-element access. See
 the [arrays manual page](@ref man-multi-dim-arrays) and the [Julia Base section](@ref lib-arrays) for more supported methods.
 
 A key part in defining an `AbstractArray` subtype is [`IndexStyle`](@ref). Since indexing is
 such an important part of an array and often occurs in hot loops, it's important to make both
-indexing and indexed assignment as efficient as possible.  Array data structures are typically
+indexing and indexed assignment as efficient as possible. Array data structures are typically
 defined in one of two ways: either it most efficiently accesses its elements using just one index
 (linear indexing) or it intrinsically accesses the elements with indices specified for every dimension.
  These two modalities are identified by Julia as `IndexLinear()` and `IndexCartesian()`.
@@ -251,7 +246,7 @@ defined in one of two ways: either it most efficiently accesses its elements usi
 provides a traits-based mechanism to enable efficient generic code for all array types.
 
 This distinction determines which scalar indexing methods the type must define. `IndexLinear()`
-arrays are simple: just define `getindex(A::ArrayType, i::Int)`.  When the array is subsequently
+arrays are simple: just define `getindex(A::ArrayType, i::Int)`. When the array is subsequently
 indexed with a multidimensional set of indices, the fallback `getindex(A::AbstractArray, I...)`
 efficiently converts the indices into one linear index and then calls the above method. `IndexCartesian()`
 arrays, on the other hand, require methods to be defined for each supported dimensionality with
@@ -260,10 +255,10 @@ library module, only supports two dimensions, so it just defines
 `getindex(A::SparseMatrixCSC, i::Int, j::Int)`. The same holds for [`setindex!`](@ref).
 
 Returning to the sequence of squares from above, we could instead define it as a subtype of an
-`AbstractArray{Int, 1}`:
+`AbstractVector{Int}`:
 
 ```jldoctest squarevectype
-julia> struct SquaresVector <: AbstractArray{Int, 1}
+julia> struct SquaresVector <: AbstractVector{Int}
            count::Int
        end
 
@@ -406,18 +401,46 @@ perhaps range-types `Ind` of your own design. For more information, see
 |:----------------------------------------------- |:-------------------------------------- |:------------------------------------------------------------------------------------- |
 | `strides(A)`                                    |                                        | Return the distance in memory (in number of elements) between adjacent elements in each dimension as a tuple. If `A` is an `AbstractArray{T,0}`, this should return an empty tuple.    |
 | `Base.unsafe_convert(::Type{Ptr{T}}, A)`        |                                        | Return the native address of an array.                                                             |
-| `Base.elsize(::Type{<:A})`                      |                                        | Return the stride between consecutive elements in the array.                                       |
+| `Base.elsize(::Type{<:A})`                      |                                        | Return the stride (in number of bytes) between consecutive elements in the array.                                       |
 | **Optional methods**                            | **Default definition**                 | **Brief description**                                                                              |
-| `stride(A, i::Int)`                             |     `strides(A)[i]`                    | Return the distance in memory (in number of elements) between adjacent elements in dimension k.    |
+| `stride(A, i::Int)`                             |     `strides(A)[i]`                    | Return the distance in memory (in number of elements) between adjacent elements in dimension i.    |
 
 A strided array is a subtype of `AbstractArray` whose entries are stored in memory with fixed strides.
 Provided the element type of the array is compatible with BLAS, a strided array can utilize BLAS and LAPACK routines
-for more efficient linear algebra routines.  A typical example of a user-defined strided array is one
+for more efficient linear algebra routines. A typical example of a user-defined strided array is one
 that wraps a standard `Array` with additional structure.
 
 Warning: do not implement these methods if the underlying storage is not actually strided, as it
 may lead to incorrect results or segmentation faults.
 
+The following function demonstrates how an element at indices `I` in a strided array `A` can be accessed.
+This function assumes the element type `isbitstype` and the indices are inbounds.
+
+```jldoctest
+julia> function unsafe_strided_getindex(A::AbstractArray{T,N}, I::Vararg{Int, N})::T where {T, N}
+           A_cconv = Base.cconvert(Ptr{T}, A)
+           GC.@preserve A_cconv begin
+               A_ptr = Base.unsafe_convert(Ptr{T}, A_cconv)
+               for d in 1:N
+                   stride_in_bytes = stride(A, d) * Base.elsize(typeof(A))
+                   first_idx = first(axes(A, d))
+                   A_ptr += (I[d] - first_idx) * stride_in_bytes
+               end
+               unsafe_load(A_ptr)
+           end
+       end;
+
+julia> A = [1 5; 2 6; 3 7; 4 8];
+
+julia> unsafe_strided_getindex(A, 3, 2)
+7
+
+julia> V = view(A, 1:2:3, 1:2);
+
+julia> unsafe_strided_getindex(V, 2, 2)
+7
+```
+
 Here are some examples to demonstrate which type of arrays are strided and which are not:
 ```julia
 1:5   # not strided (there is no storage associated with this array.)
@@ -469,7 +492,7 @@ container for broadcasting, then the following method should be defined:
 ```julia
 Base.broadcastable(o::MyType) = Ref(o)
 ```
-that returns the argument wrapped in a 0-dimensional [`Ref`](@ref) container.   For example, such a wrapper
+that returns the argument wrapped in a 0-dimensional [`Ref`](@ref) container. For example, such a wrapper
 method is defined for types themselves, functions, special singletons like [`missing`](@ref) and [`nothing`](@ref), and dates.
 
 Custom array-like types can specialize
@@ -531,8 +554,8 @@ similar(bc::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}) where {N,ElType}
 
 However, if needed you can specialize on any or all of these arguments. The final argument
 `bc` is a lazy representation of a (potentially fused) broadcast operation, a `Broadcasted`
-object.  For these purposes, the most important fields of the wrapper are
-`f` and `args`, describing the function and argument list, respectively.  Note that the argument
+object. For these purposes, the most important fields of the wrapper are
+`f` and `args`, describing the function and argument list, respectively. Note that the argument
 list can — and often does — include other nested `Broadcasted` wrappers.
 
 For a complete example, let's say you have created a type, `ArrayAndChar`, that stores an
@@ -841,3 +864,51 @@ julia> p.r
 Finally, it is worth noting that adding instance properties like this is quite
 rarely done in Julia and should in general only be done if there is a good
 reason for doing so.
+
+## [Rounding](@id man-rounding-interface)
+
+| Methods to implement                          | Default definition        | Brief description                                                                                   |
+|:--------------------------------------------- |:------------------------- |:--------------------------------------------------------------------------------------------------- |
+| `round(x::ObjType, r::RoundingMode)`          | none                      | Round `x` and return the result. If possible, round should return an object of the same type as `x` |
+| `round(T::Type, x::ObjType, r::RoundingMode)` | `convert(T, round(x, r))` | Round `x`, returning the result as a `T`                                                            |
+
+To support rounding on a new type it is typically sufficient to define the single method
+`round(x::ObjType, r::RoundingMode)`. The passed rounding mode determines in which direction
+the value should be rounded. The most commonly used rounding modes are `RoundNearest`,
+`RoundToZero`, `RoundDown`, and `RoundUp`, as these rounding modes are used in the
+definitions of the one argument `round`, method, and `trunc`, `floor`, and `ceil`,
+respectively.
+
+In some cases, it is possible to define a three-argument `round` method that is more
+accurate or performant than the two-argument method followed by conversion. In this case it
+is acceptable to define the three argument method in addition to the two argument method.
+If it is impossible to represent the rounded result as an object of the type `T`,
+then the three argument method should throw an `InexactError`.
+
+For example, if we have an `Interval` type which represents a range of possible values
+similar to https://github.com/JuliaPhysics/Measurements.jl, we may define rounding on that
+type with the following
+
+```jldoctest
+julia> struct Interval{T}
+           min::T
+           max::T
+       end
+
+julia> Base.round(x::Interval, r::RoundingMode) = Interval(round(x.min, r), round(x.max, r))
+
+julia> x = Interval(1.7, 2.2)
+Interval{Float64}(1.7, 2.2)
+
+julia> round(x)
+Interval{Float64}(2.0, 2.0)
+
+julia> floor(x)
+Interval{Float64}(1.0, 2.0)
+
+julia> ceil(x)
+Interval{Float64}(2.0, 3.0)
+
+julia> trunc(x)
+Interval{Float64}(1.0, 2.0)
+```
diff --git a/doc/src/manual/mathematical-operations.md b/doc/src/manual/mathematical-operations.md
index 0c33b45b609b7..20abf40917a18 100644
--- a/doc/src/manual/mathematical-operations.md
+++ b/doc/src/manual/mathematical-operations.md
@@ -22,7 +22,7 @@ are supported on all primitive numeric types:
 | `x ^ y`    | power          | raises `x` to the `y`th power           |
 | `x % y`    | remainder      | equivalent to `rem(x, y)`               |
 
-A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x+y)`, is treated as a multiplication, except with higher precedence than other binary operations.  See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
+A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x + y)`, is treated as a multiplication, except with higher precedence than other binary operations. See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
 
 Julia's promotion system makes arithmetic operations on mixtures of argument types "just work"
 naturally and automatically. See [Conversion and Promotion](@ref conversion-and-promotion) for details of the promotion
@@ -47,18 +47,6 @@ julia> 3*2/12
 operators. For instance, we would generally write `-x + 2` to reflect that first `x` gets negated,
 and then `2` is added to that result.)
 
-When used in multiplication, `false` acts as a *strong zero*:
-
-```jldoctest
-julia> NaN * false
-0.0
-
-julia> false * Inf
-0.0
-```
-
-This is useful for preventing the propagation of `NaN` values in quantities that are known to be zero. See [Knuth (1992)](https://arxiv.org/abs/math/9205211) for motivation.
-
 ## Boolean Operators
 
 The following [Boolean operators](https://en.wikipedia.org/wiki/Boolean_algebra#Operations) are supported on [`Bool`](@ref) types:
@@ -71,7 +59,29 @@ The following [Boolean operators](https://en.wikipedia.org/wiki/Boolean_algebra#
 
 Negation changes `true` to `false` and vice versa. The short-circuiting operations are explained on the linked page.
 
-Note that `Bool` is an integer type and all the usual promotion rules and numeric operators are also defined on it.
+## Arithmetic operations with `Bool` values
+
+Note that `Bool` is an integer type, such that `false` is numerically equal to `0` and `true` is numerically equal to `1`. All the usual promotion rules and numeric operators are also defined on it, with a special behavior of arithmetic (non-Boolean) operations when all the arguments are `Bool`: in those cases, the arguments are promoted to `Int` instead of keeping their type. Compare e.g. the following equivalent operations with `Bool` and with a different numeric type (`UInt8`):
+
+```jldoctest
+julia> true - true
+0
+
+julia> 0x01 - 0x01
+0x00
+```
+
+Also, when used in multiplication, `false` acts as a *strong zero*:
+
+```jldoctest
+julia> NaN * false
+0.0
+
+julia> false * Inf
+0.0
+```
+
+This is useful for preventing the propagation of `NaN` values in quantities that are known to be zero. See [Knuth (1992)](https://arxiv.org/abs/math/9205211) for motivation.
 
 ## Bitwise Operators
 
@@ -174,7 +184,7 @@ to perform `^` element-by-element on arrays. For example,
 `[1, 2, 3] ^ 3` is not defined, since there is no standard
 mathematical meaning to "cubing" a (non-square) array, but
 `[1, 2, 3] .^ 3` is defined as computing the elementwise
-(or "vectorized") result `[1^3, 2^3, 3^3]`.  Similarly for unary
+(or "vectorized") result `[1^3, 2^3, 3^3]`. Similarly for unary
 operators like `!` or `√`, there is a corresponding `.√` that
 applies the operator elementwise.
 
@@ -204,9 +214,9 @@ as `a .= a .+ b`, where `.=` is a fused *in-place* assignment operation
 (see the [dot syntax documentation](@ref man-vectorized)).
 
 Note the dot syntax is also applicable to user-defined operators.
-For example, if you define `⊗(A,B) = kron(A,B)` to give a convenient
+For example, if you define `⊗(A, B) = kron(A, B)` to give a convenient
 infix syntax `A ⊗ B` for Kronecker products ([`kron`](@ref)), then
-`[A,B] .⊗ [C,D]` will compute `[A⊗C, B⊗D]` with no additional coding.
+`[A, B] .⊗ [C, D]` will compute `[A⊗C, B⊗D]` with no additional coding.
 
 Combining dot operators with numeric literals can be ambiguous.
 For example, it is not clear whether `1.+x` means `1. + x` or `1 .+ x`.
@@ -332,7 +342,7 @@ Mixed-type comparisons between signed integers, unsigned integers, and floats ca
 great deal of care has been taken to ensure that Julia does them correctly.
 
 For other types, `isequal` defaults to calling [`==`](@ref), so if you want to define
-equality for your own types then you only need to add a [`==`](@ref) method.  If you define
+equality for your own types then you only need to add a [`==`](@ref) method. If you define
 your own equality function, you should probably define a corresponding [`hash`](@ref) method
 to ensure that `isequal(x,y)` implies `hash(x) == hash(y)`.
 
@@ -394,7 +404,7 @@ Julia applies the following order and associativity of operations, from highest
 |:-------------- |:------------------------------------------------------------------------------------------------- |:-------------------------- |
 | Syntax         | `.` followed by `::`                                                                              | Left                       |
 | Exponentiation | `^`                                                                                               | Right                      |
-| Unary          | `+ - √`                                                                                           | Right[^1]                  |
+| Unary          | `+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓ <: >:`                                                                     | Right[^1]                  |
 | Bitshifts      | `<< >> >>>`                                                                                       | Left                       |
 | Fractions      | `//`                                                                                              | Left                       |
 | Multiplication | `* / % & \ ÷`                                                                                     | Left[^2]                   |
@@ -457,7 +467,7 @@ Juxtaposition parses like a unary operator, which has the same natural asymmetry
 Julia supports three forms of numerical conversion, which differ in their handling of inexact
 conversions.
 
-  * The notation `T(x)` or `convert(T,x)` converts `x` to a value of type `T`.
+  * The notation `T(x)` or `convert(T, x)` converts `x` to a value of type `T`.
 
       * If `T` is a floating-point type, the result is the nearest representable value, which could be
         positive or negative infinity.
@@ -502,7 +512,7 @@ julia> round(Int8,127.4)
 127
 
 julia> round(Int8,127.6)
-ERROR: InexactError: trunc(Int8, 128.0)
+ERROR: InexactError: Int8(128.0)
 Stacktrace:
 [...]
 ```
@@ -534,7 +544,7 @@ See [Conversion and Promotion](@ref conversion-and-promotion) for how to define
 | [`mod1(x, y)`](@ref)       | `mod` with offset 1; returns `r∈(0, y]` for `y>0` or `r∈[y, 0)` for `y<0`, where `mod(r, y) == mod(x, y)` |
 | [`mod2pi(x)`](@ref)        | modulus with respect to 2pi;  `0 <= mod2pi(x) < 2pi`                                                      |
 | [`divrem(x, y)`](@ref)     | returns `(div(x, y),rem(x, y))`                                                                           |
-| [`fldmod(x, y)`](@ref)     | returns `(fld(x, y),mod(x, y ))`                                                                          |
+| [`fldmod(x, y)`](@ref)     | returns `(fld(x, y), mod(x, y))`                                                                          |
 | [`gcd(x, y...)`](@ref)     | greatest positive common divisor of `x`, `y`,...                                                          |
 | [`lcm(x, y...)`](@ref)     | least positive common multiple of `x`, `y`,...                                                            |
 
@@ -551,21 +561,22 @@ See [Conversion and Promotion](@ref conversion-and-promotion) for how to define
 
 ### Powers, logs and roots
 
-| Function                 | Description                                                                |
-|:------------------------ |:-------------------------------------------------------------------------- |
-| [`sqrt(x)`](@ref), `√x`  | square root of `x`                                                         |
-| [`cbrt(x)`](@ref), `∛x`  | cube root of `x`                                                           |
-| [`hypot(x, y)`](@ref)    | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
-| [`exp(x)`](@ref)         | natural exponential function at `x`                                        |
-| [`expm1(x)`](@ref)       | accurate `exp(x)-1` for `x` near zero                                      |
-| [`ldexp(x, n)`](@ref)    | `x*2^n` computed efficiently for integer values of `n`                     |
-| [`log(x)`](@ref)         | natural logarithm of `x`                                                   |
-| [`log(b, x)`](@ref)      | base `b` logarithm of `x`                                                  |
-| [`log2(x)`](@ref)        | base 2 logarithm of `x`                                                    |
-| [`log10(x)`](@ref)       | base 10 logarithm of `x`                                                   |
-| [`log1p(x)`](@ref)       | accurate `log(1+x)` for `x` near zero                                      |
-| [`exponent(x)`](@ref)    | binary exponent of `x`                                                     |
-| [`significand(x)`](@ref) | binary significand (a.k.a. mantissa) of a floating-point number `x`        |
+| Function                      | Description                                                                |
+|:----------------------------- |:-------------------------------------------------------------------------- |
+| [`sqrt(x)`](@ref), `√x`       | square root of `x`                                                         |
+| [`cbrt(x)`](@ref), `∛x`       | cube root of `x`                                                           |
+| [`fourthroot(x)`](@ref), `∜x` | fourth root of `x`                                                         |
+| [`hypot(x, y)`](@ref)         | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
+| [`exp(x)`](@ref)              | natural exponential function at `x`                                        |
+| [`expm1(x)`](@ref)            | accurate `exp(x) - 1` for `x` near zero                                    |
+| [`ldexp(x, n)`](@ref)         | `x * 2^n` computed efficiently for integer values of `n`                   |
+| [`log(x)`](@ref)              | natural logarithm of `x`                                                   |
+| [`log(b, x)`](@ref)           | base `b` logarithm of `x`                                                  |
+| [`log2(x)`](@ref)             | base 2 logarithm of `x`                                                    |
+| [`log10(x)`](@ref)            | base 10 logarithm of `x`                                                   |
+| [`log1p(x)`](@ref)            | accurate `log(1 + x)` for `x` near zero                                    |
+| [`exponent(x)`](@ref)         | binary exponent of `x`                                                     |
+| [`significand(x)`](@ref)      | binary significand (a.k.a. mantissa) of a floating-point number `x`        |
 
 For an overview of why functions like [`hypot`](@ref), [`expm1`](@ref), and [`log1p`](@ref)
 are necessary and useful, see John D. Cook's excellent pair of blog posts on the subject: [expm1, log1p, erfc](https://www.johndcook.com/blog/2010/06/07/math-library-functions-that-seem-unnecessary/),
@@ -587,7 +598,7 @@ These are all single-argument functions, with [`atan`](@ref) also accepting two
 corresponding to a traditional [`atan2`](https://en.wikipedia.org/wiki/Atan2) function.
 
 Additionally, [`sinpi(x)`](@ref) and [`cospi(x)`](@ref) are provided for more accurate computations
-of [`sin(pi*x)`](@ref) and [`cos(pi*x)`](@ref) respectively.
+of [`sin(pi * x)`](@ref) and [`cos(pi * x)`](@ref) respectively.
 
 In order to compute trigonometric functions with degrees instead of radians, suffix the function
 with `d`. For example, [`sind(x)`](@ref) computes the sine of `x` where `x` is specified in degrees.
diff --git a/doc/src/manual/memory-management.md b/doc/src/manual/memory-management.md
new file mode 100644
index 0000000000000..4efa683e3f249
--- /dev/null
+++ b/doc/src/manual/memory-management.md
@@ -0,0 +1,177 @@
+# [Memory Management and Garbage Collection](@id man-memory-management)
+
+Julia uses automatic memory management through its built-in garbage collector (GC). This section provides an overview of how Julia manages memory and how you can configure and optimize memory usage for your applications.
+
+## [Garbage Collection Overview](@id man-gc-overview)
+
+Julia features a garbage collector with the following characteristics:
+
+* **Non-moving**: Objects are not relocated in memory during garbage collection
+* **Generational**: Younger objects are collected more frequently than older ones
+* **Parallel and partially concurrent**: The GC can use multiple threads and run concurrently with your program
+* **Mostly precise**: The GC accurately identifies object references for pure Julia code, and it provides conservative scanning APIs for users calling Julia from C
+
+The garbage collector automatically reclaims memory used by objects that are no longer reachable from your program, freeing you from manual memory management in most cases.
+
+## [Memory Architecture](@id man-memory-architecture)
+
+Julia uses a two-tier allocation strategy:
+
+* **Small objects** (currently ≤ 2032 bytes but may change): Allocated using a fast per-thread pool allocator
+* **Large objects** : Allocated directly through the system's `malloc`
+
+This hybrid approach optimizes for both allocation speed and memory efficiency, with the pool allocator providing fast allocation for the many small objects typical in Julia programs.
+
+## [System Memory Requirements](@id man-system-memory)
+
+### Swap Space
+
+Julia's garbage collector is designed with the expectation that your system has adequate swap space configured. The GC uses heuristics that assume it can allocate memory beyond physical RAM when needed, relying on the operating system's virtual memory management.
+
+If your system has limited or no swap space, you may experience out-of-memory errors during garbage collection. In such cases, you can use the `--heap-size-hint` option to limit Julia's memory usage.
+
+### Memory Hints
+
+You can provide a hint to Julia about the maximum amount of memory to use:
+
+```bash
+julia --heap-size-hint=4G  # To set the hint to ~4GB
+julia --heap-size-hint=50% # or to 50% of physical memory
+```
+
+The `--heap-size-hint` option tells the garbage collector to trigger collection more aggressively when approaching the specified limit. This is particularly useful in:
+
+* Containers with memory limits
+* Systems without swap space
+* Shared systems where you want to limit Julia's memory footprint
+
+You can also set this via the `JULIA_HEAP_SIZE_HINT` environment variable:
+
+```bash
+export JULIA_HEAP_SIZE_HINT=2G
+julia
+```
+
+## [Multithreaded Garbage Collection](@id man-gc-multithreading)
+
+Julia's garbage collector can leverage multiple threads to improve performance on multi-core systems.
+
+### GC Thread Configuration
+
+By default, Julia uses multiple threads for garbage collection:
+
+* **Mark threads**: Used during the mark phase to trace object references (default: 1, which is shared with the compute thread if there is only one, otherwise half the number of compute threads)
+* **Sweep threads**: Used for concurrent sweeping of freed memory (default: 0, disabled)
+
+You can configure GC threading using:
+
+```bash
+julia --gcthreads=4,1  # 4 mark threads, 1 sweep thread
+julia --gcthreads=8    # 8 mark threads, 0 sweep threads
+```
+
+Or via environment variable:
+
+```bash
+export JULIA_NUM_GC_THREADS=4,1
+julia
+```
+
+### Recommendations
+
+For compute-intensive workloads:
+
+* Use multiple mark threads (the default configuration is usually appropriate)
+* Consider enabling concurrent sweeping with 1 sweep thread for allocation-heavy workloads
+
+For memory-intensive workloads:
+
+* Enable concurrent sweeping to reduce GC pauses
+* Monitor GC time using `@time` and adjust thread counts accordingly
+
+## [Monitoring and Debugging](@id man-gc-monitoring)
+
+### Basic Memory Monitoring
+
+Use the `@time` macro to see memory allocation and GC overhead:
+
+```julia
+julia> @time some_computation()
+  2.123456 seconds (1.50 M allocations: 58.725 MiB, 17.17% gc time)
+```
+
+### GC Logging
+
+Enable detailed GC logging to understand collection patterns:
+
+```julia
+julia> GC.enable_logging(true)
+julia> # Run your code
+julia> GC.enable_logging(false)
+```
+
+This logs each garbage collection event with timing and memory statistics.
+
+### Manual GC Control
+
+While generally not recommended, you can manually trigger garbage collection:
+
+```julia
+GC.gc()          # Force a garbage collection
+GC.enable(false) # Disable automatic GC (use with caution!)
+GC.enable(true)  # Re-enable automatic GC
+```
+
+**Warning**: Disabling GC can lead to memory exhaustion. Only use this for specific performance measurements or debugging.
+
+## [Performance Considerations](@id man-gc-performance)
+
+### Reducing Allocations
+
+The best way to minimize GC impact is to reduce unnecessary allocations:
+
+* Use in-place operations when possible (e.g., `x .+= y` instead of `x = x + y`)
+* Pre-allocate arrays and reuse them
+* Avoid creating temporary objects in tight loops
+* Consider using `StaticArrays.jl` for small, fixed-size arrays
+
+### Memory-Efficient Patterns
+
+* Avoid global variables that change type
+* Use `const` for global constants
+
+### Profiling Memory Usage
+
+For detailed guidance on profiling memory allocations and identifying performance bottlenecks, see the [Profiling](@ref man-profiling) section.
+
+## [Advanced Configuration](@id man-gc-advanced)
+
+### Integration with System Memory Management
+
+Julia works best when:
+
+* The system has adequate swap space (recommended: 2x physical RAM)
+* Virtual memory is properly configured
+* Other processes leave sufficient memory available
+* Container memory limits are set appropriately with `--heap-size-hint`
+
+## [Troubleshooting Memory Issues](@id man-gc-troubleshooting)
+
+### High GC Overhead
+
+If garbage collection is taking too much time:
+
+1. **Reduce allocation rate**: Focus on algorithmic improvements
+2. **Adjust GC threads**: Experiment with different `--gcthreads` settings
+3. **Use concurrent sweeping**: Enable background sweeping with `--gcthreads=N,1`
+4. **Profile memory patterns**: Identify allocation hotspots and optimize them
+
+### Memory Leaks
+
+While Julia's GC prevents most memory leaks, issues can still occur:
+
+* **Global references**: Avoid holding references to large objects in global variables
+* **Closures**: Be careful with closures that capture large amounts of data
+* **C interop**: Ensure proper cleanup when interfacing with C libraries
+
+For more detailed information about Julia's garbage collector internals, see the Garbage Collection section in the Developer Documentation.
diff --git a/doc/src/manual/metaprogramming.md b/doc/src/manual/metaprogramming.md
index 2d7deae0f1c54..38d688f427078 100644
--- a/doc/src/manual/metaprogramming.md
+++ b/doc/src/manual/metaprogramming.md
@@ -363,6 +363,14 @@ QuoteNode
 
 `QuoteNode` can also be used for certain advanced metaprogramming tasks.
 
+Note that while it does not support `$`, it also does not prevent it, nor does
+it preserve the identity of the wrapped object:
+
+```jldoctest
+julia> b = 2; eval(Expr(:quote, QuoteNode(Expr(:$, :b))))
+:($(QuoteNode(2)))
+```
+
 ### Evaluating expressions
 
 Given an expression object, one can cause Julia to evaluate (execute) it at global scope using
@@ -379,7 +387,7 @@ julia> ex = :(a + b)
 :(a + b)
 
 julia> eval(ex)
-ERROR: UndefVarError: `b` not defined
+ERROR: UndefVarError: `b` not defined in `Main`
 [...]
 
 julia> a = 1; b = 2;
@@ -397,7 +405,7 @@ julia> ex = :(x = 1)
 :(x = 1)
 
 julia> x
-ERROR: UndefVarError: `x` not defined
+ERROR: UndefVarError: `x` not defined in `Main`
 
 julia> eval(ex)
 1
@@ -413,7 +421,7 @@ Since expressions are just `Expr` objects which can be constructed programmatica
 it is possible to dynamically generate arbitrary code which can then be run using [`eval`](@ref).
 Here is a simple example:
 
-```julia-repl
+```jldoctest
 julia> a = 1;
 
 julia> ex = Expr(:call, :+, a, :b)
@@ -629,6 +637,15 @@ julia> @showarg(1+1)
 
 julia> @showarg(println("Yo!"))
 :(println("Yo!"))
+
+julia> @showarg(1)        # Numeric literal
+1
+
+julia> @showarg("Yo!")    # String literal
+"Yo!"
+
+julia> @showarg("Yo! $("hello")")    # String with interpolation is an Expr rather than a String
+:("Yo! $("hello")")
 ```
 
 In addition to the given argument list, every macro is passed extra arguments named `__source__` and `__module__`.
@@ -702,7 +719,7 @@ user to optionally specify their own error message, instead of just printing the
 Just like in functions with a variable number of arguments ([Varargs Functions](@ref)), this is specified with an ellipses
 following the last argument:
 
-```jldoctest assert2
+```julia-repl assert2
 julia> macro assert(ex, msgs...)
            msg_body = isempty(msgs) ? ex : msgs[1]
            msg = string(msg_body)
@@ -1340,8 +1357,7 @@ julia> function sub2ind_loop(dims::NTuple{N}, I::Integer...) where N
                ind = I[i]-1 + dims[i]*ind
            end
            return ind + 1
-       end
-sub2ind_loop (generic function with 1 method)
+       end;
 
 julia> sub2ind_loop((3, 5), 1, 2)
 4
@@ -1380,8 +1396,7 @@ julia> @generated function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
                ex = :(I[$i] - 1 + dims[$i] * $ex)
            end
            return :($ex + 1)
-       end
-sub2ind_gen (generic function with 1 method)
+       end;
 
 julia> sub2ind_gen((3, 5), 1, 2)
 4
@@ -1392,11 +1407,6 @@ julia> sub2ind_gen((3, 5), 1, 2)
 An easy way to find out is to extract the body into another (regular) function:
 
 ```jldoctest sub2ind_gen2
-julia> @generated function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
-           return sub2ind_gen_impl(dims, I...)
-       end
-sub2ind_gen (generic function with 1 method)
-
 julia> function sub2ind_gen_impl(dims::Type{T}, I...) where T <: NTuple{N,Any} where N
            length(I) == N || return :(error("partial indexing is unsupported"))
            ex = :(I[$N] - 1)
@@ -1404,8 +1414,14 @@ julia> function sub2ind_gen_impl(dims::Type{T}, I...) where T <: NTuple{N,Any} w
                ex = :(I[$i] - 1 + dims[$i] * $ex)
            end
            return :($ex + 1)
-       end
-sub2ind_gen_impl (generic function with 1 method)
+       end;
+
+julia> @generated function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
+           return sub2ind_gen_impl(dims, I...)
+       end;
+
+julia> sub2ind_gen((3, 5), 1, 2)
+4
 ```
 
 We can now execute `sub2ind_gen_impl` and examine the expression it returns:
@@ -1434,25 +1450,34 @@ To solve this problem, the language provides syntax for writing normal, non-gene
 alternative implementations of generated functions.
 Applied to the `sub2ind` example above, it would look like this:
 
-```julia
-function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
-    if N != length(I)
-        throw(ArgumentError("Number of dimensions must match number of indices."))
-    end
-    if @generated
-        ex = :(I[$N] - 1)
-        for i = (N - 1):-1:1
-            ex = :(I[$i] - 1 + dims[$i] * $ex)
-        end
-        return :($ex + 1)
-    else
-        ind = I[N] - 1
-        for i = (N - 1):-1:1
-            ind = I[i] - 1 + dims[i]*ind
-        end
-        return ind + 1
-    end
-end
+```jldoctest sub2ind_gen_opt
+julia> function sub2ind_gen_impl(dims::Type{T}, I...) where T <: NTuple{N,Any} where N
+           ex = :(I[$N] - 1)
+           for i = (N - 1):-1:1
+               ex = :(I[$i] - 1 + dims[$i] * $ex)
+           end
+           return :($ex + 1)
+       end;
+
+julia> function sub2ind_gen_fallback(dims::NTuple{N}, I) where N
+           ind = I[N] - 1
+           for i = (N - 1):-1:1
+               ind = I[i] - 1 + dims[i]*ind
+           end
+           return ind + 1
+       end;
+
+julia> function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
+           length(I) == N || error("partial indexing is unsupported")
+           if @generated
+               return sub2ind_gen_impl(dims, I...)
+           else
+               return sub2ind_gen_fallback(dims, I)
+           end
+       end;
+
+julia> sub2ind_gen((3, 5), 1, 2)
+4
 ```
 
 Internally, this code creates two implementations of the function: a generated one where
diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md
index 8ca00aa1cfe76..bbba8b5473630 100644
--- a/doc/src/manual/methods.md
+++ b/doc/src/manual/methods.md
@@ -76,6 +76,7 @@ Applying it to any other types of arguments will result in a [`MethodError`](@re
 ```jldoctest fofxy
 julia> f(2.0, 3)
 ERROR: MethodError: no method matching f(::Float64, ::Int64)
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(::Float64, !Matched::Float64)
@@ -86,6 +87,7 @@ Stacktrace:
 
 julia> f(Float32(2.0), 3.0)
 ERROR: MethodError: no method matching f(::Float32, ::Float64)
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(!Matched::Float64, ::Float64)
@@ -96,6 +98,7 @@ Stacktrace:
 
 julia> f(2.0, "3.0")
 ERROR: MethodError: no method matching f(::Float64, ::String)
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(::Float64, !Matched::Float64)
@@ -106,6 +109,7 @@ Stacktrace:
 
 julia> f("2.0", "3.0")
 ERROR: MethodError: no method matching f(::String, ::String)
+The function `f` exists, but no method is defined for this combination of argument types.
 ```
 
 As you can see, the arguments must be precisely of type [`Float64`](@ref). Other numeric
@@ -164,16 +168,20 @@ and applying it will still result in a [`MethodError`](@ref):
 ```jldoctest fofxy
 julia> f("foo", 3)
 ERROR: MethodError: no method matching f(::String, ::Int64)
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(!Matched::Number, ::Number)
    @ Main none:1
+  f(!Matched::Float64, !Matched::Float64)
+   @ Main none:1
 
 Stacktrace:
 [...]
 
 julia> f()
 ERROR: MethodError: no method matching f()
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(!Matched::Float64, !Matched::Float64)
@@ -280,13 +288,17 @@ Such specializations are *not* listed by `methods`, as this doesn't create new `
 
 For example, if you create a method
 
-```
+```jldoctest mysum_example; output = false
 mysum(x::Real, y::Real) = x + y
+
+# output
+
+mysum (generic function with 1 method)
 ```
 
 you've given the function `mysum` one new method (possibly its only method), and that method takes any pair of `Real` number inputs. But if you then execute
 
-```julia-repl
+```jldoctest mysum_example
 julia> mysum(1, 2)
 3
 
@@ -334,10 +346,11 @@ Stacktrace:
 [...]
 ```
 
-Here the call `g(2.0, 3.0)` could be handled by either the `g(Float64, Any)` or the `g(Any, Float64)`
-method, and neither is more specific than the other. In such cases, Julia raises a [`MethodError`](@ref)
-rather than arbitrarily picking a method. You can avoid method ambiguities by specifying an appropriate
-method for the intersection case:
+Here the call `g(2.0, 3.0)` could be handled by either the `g(::Float64, ::Any)` or the
+`g(::Any, ::Float64)` method. The order in which the methods are defined does not matter and
+neither is more specific than the other. In such cases, Julia raises a
+[`MethodError`](@ref) rather than arbitrarily picking a method. You can avoid method
+ambiguities by specifying an appropriate method for the intersection case:
 
 ```jldoctest gofxy
 julia> g(x::Float64, y::Float64) = 2x + 2y
@@ -406,7 +419,20 @@ Here's an example where the method type parameter `T` is used as the type parame
 type `Vector{T}` in the method signature:
 
 ```jldoctest
-julia> myappend(v::Vector{T}, x::T) where {T} = [v..., x]
+julia> function myappend(v::Vector{T}, x::T) where {T}
+           return [v..., x]
+       end
+myappend (generic function with 1 method)
+```
+
+The type parameter `T` in this example ensures that the added element `x` is a subtype of the
+existing eltype of the vector `v`.
+The `where` keyword introduces a list of those constraints after the method signature definition.
+This works the same for one-line definitions, as seen above, and must appear _before_ the [return
+type declaration](@ref man-functions-return-type), if present, as illustrated below:
+
+```jldoctest
+julia> (myappend(v::Vector{T}, x::T)::Vector) where {T} = [v..., x]
 myappend (generic function with 1 method)
 
 julia> myappend([1,2,3],4)
@@ -418,6 +444,7 @@ julia> myappend([1,2,3],4)
 
 julia> myappend([1,2,3],2.5)
 ERROR: MethodError: no method matching myappend(::Vector{Int64}, ::Float64)
+The function `myappend` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   myappend(::Vector{T}, !Matched::T) where T
@@ -435,6 +462,7 @@ julia> myappend([1.0,2.0,3.0],4.0)
 
 julia> myappend([1.0,2.0,3.0],4)
 ERROR: MethodError: no method matching myappend(::Vector{Float64}, ::Int64)
+The function `myappend` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   myappend(::Vector{T}, !Matched::T) where T
@@ -444,9 +472,9 @@ Stacktrace:
 [...]
 ```
 
-As you can see, the type of the appended element must match the element type of the vector it
-is appended to, or else a [`MethodError`](@ref) is raised. In the following example, the method type parameter
-`T` is used as the return value:
+If the type of the appended element does not match the element type of the vector it is appended to,
+a [`MethodError`](@ref) is raised.
+In the following example, the method's type parameter `T` is used as the return value:
 
 ```jldoctest
 julia> mytypeof(x::T) where {T} = T
@@ -480,6 +508,7 @@ true
 
 julia> same_type_numeric("foo", 2.0)
 ERROR: MethodError: no method matching same_type_numeric(::String, ::Float64)
+The function `same_type_numeric` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   same_type_numeric(!Matched::T, ::T) where T<:Number
@@ -492,6 +521,7 @@ Stacktrace:
 
 julia> same_type_numeric("foo", "bar")
 ERROR: MethodError: no method matching same_type_numeric(::String, ::String)
+The function `same_type_numeric` exists, but no method is defined for this combination of argument types.
 
 julia> same_type_numeric(Int32(1), Int64(2))
 false
@@ -552,72 +582,8 @@ However, future calls to `tryeval` will continue to see the definition of `newfu
 
 You may want to try this for yourself to see how it works.
 
-The implementation of this behavior is a "world age counter".
-This monotonically increasing value tracks each method definition operation.
-This allows describing "the set of method definitions visible to a given runtime environment"
-as a single number, or "world age".
-It also allows comparing the methods available in two worlds just by comparing their ordinal value.
-In the example above, we see that the "current world" (in which the method `newfun` exists),
-is one greater than the task-local "runtime world" that was fixed when the execution of `tryeval` started.
-
-Sometimes it is necessary to get around this (for example, if you are implementing the above REPL).
-Fortunately, there is an easy solution: call the function using [`Base.invokelatest`](@ref):
-
-```jldoctest
-julia> function tryeval2()
-           @eval newfun2() = 2
-           Base.invokelatest(newfun2)
-       end
-tryeval2 (generic function with 1 method)
-
-julia> tryeval2()
-2
-```
-
-Finally, let's take a look at some more complex examples where this rule comes into play.
-Define a function `f(x)`, which initially has one method:
-
-```jldoctest redefinemethod
-julia> f(x) = "original definition"
-f (generic function with 1 method)
-```
-
-Start some other operations that use `f(x)`:
-
-```jldoctest redefinemethod
-julia> g(x) = f(x)
-g (generic function with 1 method)
-
-julia> t = @async f(wait()); yield();
-```
-
-Now we add some new methods to `f(x)`:
-
-```jldoctest redefinemethod
-julia> f(x::Int) = "definition for Int"
-f (generic function with 2 methods)
-
-julia> f(x::Type{Int}) = "definition for Type{Int}"
-f (generic function with 3 methods)
-```
-
-Compare how these results differ:
-
-```jldoctest redefinemethod
-julia> f(1)
-"definition for Int"
-
-julia> g(1)
-"definition for Int"
-
-julia> fetch(schedule(t, 1))
-"original definition"
-
-julia> t = @async f(wait()); yield();
-
-julia> fetch(schedule(t, 1))
-"definition for Int"
-```
+The implementation of this behavior is a "world age counter", which is further described in the [World Age](@ref World-age-in-general)
+manual chapter.
 
 ## Design Patterns with Parametric Methods
 
@@ -638,7 +604,7 @@ abstract type AbstractArray{T, N} end
 eltype(::Type{<:AbstractArray{T}}) where {T} = T
 ```
 
-using so-called triangular dispatch.  Note that `UnionAll` types, for
+using so-called triangular dispatch. Note that `UnionAll` types, for
 example `eltype(AbstractArray{T} where T <: Integer)`, do not match the
 above method. The implementation of `eltype` in `Base` adds a fallback
 method to `Any` for such cases.
@@ -672,11 +638,14 @@ While this works for declared types, it fails for types without
 supertypes:
 
 ```julia-repl
-julia> eltype_wrong(Union{AbstractArray{Int}, AbstractArray{Float64}})
-ERROR: MethodError: no method matching supertype(::Type{Union{AbstractArray{Float64,N} where N, AbstractArray{Int64,N} where N}})
+julia> eltype_wrong(Union{Vector{Int}, Matrix{Int}})
+ERROR: MethodError: no method matching supertype(::Type{VecOrMat{Int64}})
+
 Closest candidates are:
-  supertype(::DataType) at operators.jl:43
-  supertype(::UnionAll) at operators.jl:48
+  supertype(::UnionAll)
+   @ Base operators.jl:44
+  supertype(::DataType)
+   @ Base operators.jl:43
 ```
 
 ### Building a similar type with a different type parameter
@@ -719,8 +688,8 @@ often it is best to separate each level of dispatch into distinct functions.
 This may sound similar in approach to single-dispatch, but as we shall see below, it is still more flexible.
 
 For example, trying to dispatch on the element-type of an array will often run into ambiguous situations.
-Instead, commonly code will dispatch first on the container type,
-then recurse down to a more specific method based on eltype.
+Instead, common code will dispatch first on the container type,
+then recurse down to a more specific method based on `eltype`.
 In most cases, the algorithms lend themselves conveniently to this hierarchical approach,
 while in other cases, this rigor must be resolved manually.
 This dispatching branching can be observed, for example, in the logic to sum two matrices:
@@ -750,7 +719,7 @@ often referred to as a
 
 This pattern is implemented by defining a generic function which
 computes a different singleton value (or type) for each trait-set to which the
-function arguments may belong to.  If this function is pure there is
+function arguments may belong to. If this function is pure there is
 no impact on performance compared to normal dispatch.
 
 The example in the previous section glossed over the implementation details of
@@ -865,8 +834,8 @@ matmul(a, b) = matmul(promote(a, b)...)
 ## Parametrically-constrained Varargs methods
 
 Function parameters can also be used to constrain the number of arguments that may be supplied
-to a "varargs" function ([Varargs Functions](@ref)).  The notation `Vararg{T,N}` is used to indicate
-such a constraint.  For example:
+to a "varargs" function ([Varargs Functions](@ref)). The notation `Vararg{T,N}` is used to indicate
+such a constraint. For example:
 
 ```jldoctest
 julia> bar(a,b,x::Vararg{Any,2}) = (a,b,x)
@@ -874,6 +843,7 @@ bar (generic function with 1 method)
 
 julia> bar(1,2,3)
 ERROR: MethodError: no method matching bar(::Int64, ::Int64, ::Int64)
+The function `bar` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   bar(::Any, ::Any, ::Any, !Matched::Any)
@@ -887,6 +857,7 @@ julia> bar(1,2,3,4)
 
 julia> bar(1,2,3,4,5)
 ERROR: MethodError: no method matching bar(::Int64, ::Int64, ::Int64, ::Int64, ::Int64)
+The function `bar` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   bar(::Any, ::Any, ::Any, ::Any)
@@ -944,7 +915,7 @@ with keyword arguments processed after the matching method is identified.
 ## Function-like objects
 
 Methods are associated with types, so it is possible to make any arbitrary Julia object "callable"
-by adding methods to its type. (Such "callable" objects are sometimes called "functors.")
+by adding methods to its type.
 
 For example, you can define a type that stores the coefficients of a polynomial, but behaves like
 a function evaluating the polynomial:
@@ -970,13 +941,13 @@ there is a terse syntax form. In the function body, `p` will refer to the object
 called. A `Polynomial` can be used as follows:
 
 ```jldoctest polynomial
-julia> p = Polynomial([1,10,100])
+julia> poly = Polynomial([1,10,100])
 Polynomial{Int64}([1, 10, 100])
 
-julia> p(3)
+julia> poly(3)
 931
 
-julia> p()
+julia> poly()
 2551
 ```
 
@@ -997,7 +968,7 @@ function emptyfunc end
 ## [Method design and the avoidance of ambiguities](@id man-method-design-ambiguities)
 
 Julia's method polymorphism is one of its most powerful features, yet
-exploiting this power can pose design challenges.  In particular, in
+exploiting this power can pose design challenges. In particular, in
 more complex method hierarchies it is not uncommon for
 [ambiguities](@ref man-ambiguities) to arise.
 
@@ -1140,7 +1111,7 @@ sure this method is implemented with generic calls (like `similar` and
 When this approach is not possible, it may be worth starting a
 discussion with other developers about resolving the ambiguity; just
 because one method was defined first does not necessarily mean that it
-can't be modified or eliminated.  As a last resort, one developer can
+can't be modified or eliminated. As a last resort, one developer can
 define the "band-aid" method
 
 ```julia
diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md
index 4ffb1bca26e50..65ea9ff0233dc 100644
--- a/doc/src/manual/modules.md
+++ b/doc/src/manual/modules.md
@@ -7,7 +7,8 @@ Modules in Julia help organize code into coherent units. They are delimited synt
    allows the same name to be used for different functions or global variables without conflict, as long as they are in separate modules.
 
 2. Modules have facilities for detailed namespace management: each defines a set of names it
-   `export`s, and can import names from other modules with `using` and `import` (we explain these below).
+   `export`s and marks as `public`, and can import names from other modules with `using` and
+   `import` (we explain these below).
 
 3. Modules can be precompiled for faster loading, and may contain code for runtime initialization.
 
@@ -16,7 +17,7 @@ Typically, in larger Julia packages you will see module code organized into file
 ```julia
 module SomeModule
 
-# export, using, import statements are usually here; we discuss these below
+# export, public, using, import statements are usually here; we discuss these below
 
 include("file1.jl")
 include("file2.jl")
@@ -103,9 +104,25 @@ Also, some modules don't export names at all. This is usually done if they use c
 words, such as `derivative`, in their API, which could easily clash with the export lists of other
 modules. We will see how to manage name clashes below.
 
+To mark a name as public without exporting it into the namespace of folks who call `using NiceStuff`,
+one can use `public` instead of `export`. This marks the public name(s) as part of the public API,
+but does not have any namespace implications. The `public` keyword is only available in Julia 1.11
+and above. To maintain compatibility with Julia 1.10 and below, use the `@compat` macro from the
+[Compat](https://github.com/JuliaLang/Compat.jl) package, or the version-aware construct
+
+```julia
+VERSION >= v"1.11.0-DEV.469" && eval(Meta.parse("public a, b, c"))
+```
+
+`export` is a keyword wherever it occurs whereas the `public` keyword is currently limited to the
+syntactic top level within a file or module. This limitation exists for compatibility reasons,
+as `public` was introduced as a new keyword in Julia 1.11 while `export` has existed since Julia
+1.0. However, this restriction on `public` may be lifted in future releases, so do not use `public`
+as an identifier.
+
 ### Standalone `using` and `import`
 
-Possibly the most common way of loading a module is `using ModuleName`. This [loads](@ref
+For interactive use, the most common way of loading a module is `using ModuleName`. This [loads](@ref
 code-loading) the code associated with `ModuleName`, and brings
 
 1. the module name
@@ -137,13 +154,13 @@ In contrast,
 julia> import .NiceStuff
 ```
 
-brings *only* the module name into scope. Users would need to use `NiceStuff.DOG`, `NiceStuff.Dog`, and `NiceStuff.nice` to access its contents. Usually, `import ModuleName` is used in contexts when the user wants to keep the namespace clean.
-As we will see in the next section `import .NiceStuff` is equivalent to `using .NiceStuff: NiceStuff`.
+brings *only* the module name into scope. Users would need to use `NiceStuff.DOG`, `NiceStuff.Dog`, and `NiceStuff.nice` to access its contents.
+As we will see in the next section `import .NiceStuff` is equivalent to `using .NiceStuff: NiceStuff`. Usually, `import ModuleName` or `using ModuleName: ModuleName` is used in contexts when the user wants to keep the namespace clean.
 
 You can combine multiple `using` and `import` statements of the same kind in a comma-separated expression, e.g.
 
 ```jldoctest module_manual
-julia> using LinearAlgebra, Statistics
+julia> using LinearAlgebra, Random
 ```
 
 ### `using` and `import` with specific identifiers, and adding methods
@@ -161,6 +178,13 @@ Importantly, the module name `NiceStuff` will *not* be in the namespace. If you
 julia> using .NiceStuff: nice, DOG, NiceStuff
 ```
 
+When two or more packages/modules export a name and that name does not refer to the
+same thing in each of the packages, and the packages are loaded via `using` without
+an explicit list of names, it is an error to reference that name without qualification.
+It is thus recommended that code intended to be forward-compatible with future versions
+of its dependencies and of Julia, e.g., code in released packages, list the names it
+uses from each loaded package, e.g., `using Foo: Foo, f` rather than `using Foo`.
+
 Julia has two forms for seemingly the same thing because only `import ModuleName: f` allows adding methods to `f`
 *without a module path*.
 That is to say, the following example will give an error:
@@ -174,10 +198,7 @@ julia> nice(::Cat) = "nice 😸"
 ERROR: invalid method definition in Main: function NiceStuff.nice must be explicitly imported to be extended
 Stacktrace:
  [1] top-level scope
-   @ none:0
- [2] top-level scope
    @ none:1
-
 ```
 
 This error prevents accidentally adding methods to functions in other modules that you only intended to use.
@@ -189,17 +210,16 @@ julia> using .NiceStuff
 julia> struct Cat end
 
 julia> NiceStuff.nice(::Cat) = "nice 😸"
-
 ```
 
 Alternatively, you can `import` the specific function name:
 ```jldoctest module_manual
 julia> import .NiceStuff: nice
 
-julia> struct Cat end
+julia> struct Mouse end
 
-julia> nice(::Cat) = "nice 😸"
-nice (generic function with 2 methods)
+julia> nice(::Mouse) = "nice 🐭"
+nice (generic function with 3 methods)
 ```
 
 Which one you choose is a matter of style. The first form makes it clear that you are adding a
@@ -274,14 +294,14 @@ julia> module B
 B
 ```
 
-The statement `using .A, .B` works, but when you try to call `f`, you get a warning
+The statement `using .A, .B` works, but when you try to call `f`, you get an error with a hint
 
 ```jldoctest module_manual
 julia> using .A, .B
 
 julia> f
-WARNING: both B and A export "f"; uses of it in module Main must be qualified
-ERROR: UndefVarError: `f` not defined
+ERROR: UndefVarError: `f` not defined in `Main`
+Hint: It looks like two or more modules export different bindings with this name, resulting in ambiguity. Try explicitly importing it from a particular module, or qualifying the name with the module it should come from.
 ```
 
 Here, Julia cannot decide which `f` you are referring to, so you have to make a choice. The following solutions are commonly used:
@@ -302,6 +322,68 @@ Here, Julia cannot decide which `f` you are referring to, so you have to make a
 
 3. When the names in question *do* share a meaning, it is common for one module to import it from another, or have a lightweight “base” package with the sole function of defining an interface like this, which can be used by other packages. It is conventional to have such package names end in `...Base` (which has nothing to do with Julia's `Base` module).
 
+### Precedence order of definitions
+
+There are in general four kinds of binding definitions:
+   1. Those provided via implicit import through `using M`
+   2. Those provided via explicit import (e.g. `using M: x`, `import M: x`)
+   3. Those declared implicitly as global (via `global x` without type specification)
+   4. Those declared explicitly using definition syntax (`const`, `global x::T`, `struct`, etc.)
+
+Syntactically, we divide these into three precedence levels (from weakest to strongest)
+   1. Implicit imports
+   2. Implicit declarations
+   3. Explicit declarations and imports
+
+In general, we permit replacement of weaker bindings by stronger ones:
+
+```julia-repl
+julia> module M1; const x = 1; export x; end
+Main.M1
+
+julia> using .M1
+
+julia> x # Implicit import from M1
+1
+
+julia> begin; f() = (global x; x = 1) end
+
+julia> x # Implicit declaration
+ERROR: UndefVarError: `x` not defined in `Main`
+Suggestion: add an appropriate import or assignment. This global was declared but not assigned.
+
+julia> const x = 2 # Explicit declaration
+2
+```
+
+However, within the explicit precedence level, replacement is syntactically disallowed:
+```julia-repl
+julia> module M1; const x = 1; export x; end
+Main.M1
+
+julia> import .M1: x
+
+julia> const x = 2
+ERROR: cannot declare Main.x constant; it was already declared as an import
+Stacktrace:
+ [1] top-level scope
+   @ REPL[3]:1
+```
+
+or ignored:
+
+```julia-repl
+julia> const y = 2
+2
+
+julia> import .M1: x as y
+WARNING: import of M1.x into Main conflicts with an existing identifier; ignored.
+```
+
+The resolution of an implicit binding depends on the set of all `using`'d modules visible
+in the current world age. See [the manual chapter on world age](@ref man-worldage) for more
+details.
+
 ### Default top-level definitions and bare modules
 
 Modules automatically contain `using Core`, `using Base`, and definitions of the [`eval`](@ref)
@@ -357,7 +439,7 @@ There are three important standard modules:
 
 Modules can contain *submodules*, nesting the same syntax `module ... end`. They can be used to introduce separate namespaces, which can be helpful for organizing complex codebases. Note that each `module` introduces its own [scope](@ref scope-of-variables), so submodules do not automatically “inherit” names from their parent.
 
-It is recommended that submodules refer to other modules within the enclosing parent module (including the latter) using *relative module qualifiers* in `using` and `import` statements. A relative module qualifier starts with a period (`.`), which corresponds to the current module, and each successive `.` leads to the parent of the current module. This should be followed by modules if necessary, and eventually the actual name to access, all separated by `.`s.
+It is recommended that submodules refer to other modules within the enclosing parent module (including the latter) using *relative module qualifiers* in `using` and `import` statements. A relative module qualifier starts with a period (`.`), which corresponds to the current module, and each successive `.` leads to the parent of the current module. This should be followed by modules if necessary, and eventually the actual name to access, all separated by `.`s. As a special case, however, referring to the module root can be written without `.`, avoiding the need to count the depth to reach that module.
 
 Consider the following example, where the submodule `SubA` defines a function, which is then extended in its “sibling” module:
 
@@ -372,6 +454,7 @@ julia> module ParentModule
        export add_D # export it from ParentModule too
        module SubB
        import ..SubA: add_D # relative path for a “sibling” module
+       # import ParentModule.SubA: add_D # when in a package, such as when this is loaded by using or import, this would be equivalent to the previous import, but not at the REPL
        struct Infinity end
        add_D(x::Infinity) = x
        end
@@ -379,12 +462,16 @@ julia> module ParentModule
 
 ```
 
-You may see code in packages, which, in a similar situation, uses
+You may see code in packages, which, in a similar situation, uses import without the `.`:
+```jldoctest
+julia> import ParentModule.SubA: add_D
+ERROR: ArgumentError: Package ParentModule not found in current path.
+```
+However, since this operates through [code loading](@ref code-loading), it only works if `ParentModule` is in a package in a file. If `ParentModule` was defined at the REPL, it is necessary to use use relative paths:
 ```jldoctest module_manual
 julia> import .ParentModule.SubA: add_D
 
 ```
-However, this operates through [code loading](@ref code-loading), and thus only works if `ParentModule` is in a package. It is better to use relative paths.
 
 Note that the order of definitions also matters if you are evaluating values. Consider
 
@@ -397,7 +484,7 @@ x = 0
 
 module Sub
 using ..TestPackage
-z = y # ERROR: UndefVarError: `y` not defined
+z = y # ERROR: UndefVarError: `y` not defined in `Main`
 end
 
 y = 1
@@ -413,7 +500,7 @@ For similar reasons, you cannot use a cyclic ordering:
 module A
 
 module B
-using ..C # ERROR: UndefVarError: `C` not defined
+using ..C # ERROR: UndefVarError: `C` not defined in `Main.A`
 end
 
 module C
@@ -429,7 +516,7 @@ Large modules can take several seconds to load because executing all of the stat
 often involves compiling a large amount of code.
 Julia creates precompiled caches of the module to reduce this time.
 
-Precompiled module files (sometimes called "cache files") are created and used automatically when `import` or `using` loads a module.  If the cache file(s) do not yet exist, the module will be compiled and saved for future reuse. You can also manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref) to create these files without loading the module. The resulting
+Precompiled module files (sometimes called "cache files") are created and used automatically when `import` or `using` loads a module. If the cache file(s) do not yet exist, the module will be compiled and saved for future reuse. You can also manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref) to create these files without loading the module. The resulting
 cache files will be stored in the `compiled` subfolder of `DEPOT_PATH[1]`. If nothing about your system changes,
 such cache files will be used when you load the module with `import` or `using`.
 
@@ -440,10 +527,12 @@ recompiled upon `using` or `import`. Dependencies are modules it
 imports, the Julia build, files it includes, or explicit dependencies declared by [`include_dependency(path)`](@ref)
 in the module file(s).
 
-For file dependencies, a change is determined by examining whether the modification time (`mtime`)
-of each file loaded by `include` or added explicitly by `include_dependency` is unchanged, or equal
-to the modification time truncated to the nearest second (to accommodate systems that can't copy
-mtime with sub-second accuracy). It also takes into account whether the path to the file chosen
+For file dependencies loaded by `include`, a change is determined by examining whether the
+file size (`fsize`) or content (condensed into a hash) is unchanged.
+For file dependencies loaded by `include_dependency` a change is determined by examining whether the modification time (`mtime`)
+is unchanged, or equal to the modification time truncated to the nearest second
+(to accommodate systems that can't copy mtime with sub-second accuracy).
+It also takes into account whether the path to the file chosen
 by the search logic in `require` matches the path that had created the precompile file. It also takes
 into account the set of dependencies already loaded into the current process and won't recompile those
 modules, even if their files change or disappear, in order to avoid creating incompatibilities between
@@ -475,16 +564,20 @@ In particular, if you define a `function __init__()` in a module, then Julia wil
 immediately *after* the module is loaded (e.g., by `import`, `using`, or `require`) at runtime
 for the *first* time (i.e., `__init__` is only called once, and only after all statements in the
 module have been executed). Because it is called after the module is fully imported, any submodules
-or other imported modules have their `__init__` functions called *before* the `__init__` of the
-enclosing module.
+or other imported modules have their `__init__` functions called *before* the `__init__` of
+the enclosing module. This is also synchronized across threads, so that code can safely rely upon
+this ordering of effects, such that all `__init__` will have run, in dependency ordering,
+before the `using` result is completed. They may run concurrently with other `__init__`
+methods which are not dependencies however, so be careful when accessing any shared state
+outside the current module to use locks when needed.
 
 Two typical uses of `__init__` are calling runtime initialization functions of external C libraries
-and initializing global constants that involve pointers returned by external libraries.  For example,
+and initializing global constants that involve pointers returned by external libraries. For example,
 suppose that we are calling a C library `libfoo` that requires us to call a `foo_init()` initialization
 function at runtime. Suppose that we also want to define a global constant `foo_data_ptr` that
 holds the return value of a `void *foo_data()` function defined by `libfoo` -- this constant must
 be initialized at runtime (not at compile time) because the pointer address will change from run
-to run.  You could accomplish this by defining the following `__init__` function in your module:
+to run. You could accomplish this by defining the following `__init__` function in your module:
 
 ```julia
 const foo_data_ptr = Ref{Ptr{Cvoid}}(0)
@@ -508,17 +601,6 @@ pointer value must be called at runtime for precompilation to work ([`Ptr`](@ref
 null pointers unless they are hidden inside an [`isbits`](@ref) object). This includes the return values
 of the Julia functions [`@cfunction`](@ref) and [`pointer`](@ref).
 
-Dictionary and set types, or in general anything that depends on the output of a `hash(key)` method,
-are a trickier case.  In the common case where the keys are numbers, strings, symbols, ranges,
-`Expr`, or compositions of these types (via arrays, tuples, sets, pairs, etc.) they are safe to
-precompile.  However, for a few other key types, such as `Function` or `DataType` and generic
-user-defined types where you haven't defined a `hash` method, the fallback `hash` method depends
-on the memory address of the object (via its `objectid`) and hence may change from run to run.
-If you have one of these key types, or if you aren't sure, to be safe you can initialize this
-dictionary from within your `__init__` function. Alternatively, you can use the [`IdDict`](@ref)
-dictionary type, which is specially handled by precompilation so that it is safe to initialize
-at compile-time.
-
 When using precompilation, it is important to keep a clear sense of the distinction between the
 compilation phase and the execution phase. In this mode, it will often be much more clearly apparent
 that Julia is a compiler which allows execution of arbitrary Julia code, not a standalone interpreter
@@ -590,15 +672,19 @@ A few other points to be aware of:
    an error to do this, but you simply need to be prepared that the system will try to copy some
    of these and to create a single unique instance of others.
 
-It is sometimes helpful during module development to turn off incremental precompilation. The
-command line flag `--compiled-modules={yes|no}` enables you to toggle module precompilation on and
-off. When Julia is started with `--compiled-modules=no` the serialized modules in the compile cache
-are ignored when loading modules and module dependencies.
-More fine-grained control is available with `--pkgimages=no`, which suppresses only
-native-code storage during precompilation. `Base.compilecache` can still be called
-manually. The state of this command line flag is passed to `Pkg.build` to disable automatic
-precompilation triggering when installing, updating, and explicitly building packages.
+It is sometimes helpful during module development to turn off incremental precompilation.
+The command line flag `--compiled-modules={yes|no|existing}` enables you to toggle module
+precompilation on and off. When Julia is started with `--compiled-modules=no` the serialized
+modules in the compile cache are ignored when loading modules and module dependencies. In
+some cases, you may want to load existing precompiled modules, but not create new ones. This
+can be done by starting Julia with `--compiled-modules=existing`. More fine-grained control
+is available with `--pkgimages={yes|no|existing}`, which only affects native-code storage
+during precompilation. `Base.compilecache` can still be called manually. The state of this
+command line flag is passed to `Pkg.build` to disable automatic precompilation triggering
+when installing, updating, and explicitly building packages.
 
 You can also debug some precompilation failures with environment variables. Setting
-`JULIA_VERBOSE_LINKING=true` may help resolve failures in linking shared libraries of compiled
-native code. See the **Developer Documentation** part of the Julia manual, where you will find further details in the section documenting Julia's internals under "Package Images".
+`JULIA_VERBOSE_LINKING=true` may help resolve failures in linking shared libraries of
+compiled native code. See the **Developer Documentation** part of the Julia manual, where
+you will find further details in the section documenting Julia's internals under "Package
+Images".
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index 63902f1b7902b..96d3790eef0af 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -5,11 +5,13 @@ of Julia multi-threading features.
 
 ## Starting Julia with multiple threads
 
-By default, Julia starts up with a single thread of execution. This can be verified by using the
-command [`Threads.nthreads()`](@ref):
+By default, Julia starts up with 2 threads of execution; 1 worker thread and 1 interactive thread.
+This can be verified by using the command [`Threads.nthreads()`](@ref):
 
-```jldoctest
-julia> Threads.nthreads()
+```julia
+julia> Threads.nthreads(:default)
+1
+julia> Threads.nthreads(:interactive)
 1
 ```
 
@@ -22,13 +24,21 @@ The number of threads can either be specified as an integer (`--threads=4`) or a
 (`--threads=auto`), where `auto` tries to infer a useful default number of threads to use
 (see [Command-line Options](@ref command-line-interface) for more details).
 
+See [threadpools](@ref man-threadpools) for how to control how many `:default` and `:interactive` threads are in
+each threadpool.
+
 !!! compat "Julia 1.5"
     The `-t`/`--threads` command line argument requires at least Julia 1.5.
     In older versions you must use the environment variable instead.
 
 !!! compat "Julia 1.7"
-    Using `auto` as value of the environment variable `JULIA_NUM_THREADS` requires at least Julia 1.7.
+    Using `auto` as value of the environment variable [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS) requires at least Julia 1.7.
     In older versions, this value is ignored.
+
+!!! compat "Julia 1.12"
+    Starting by default with 1 interactive thread, as well as the 1 worker thread, was made as such in Julia 1.12
+    If the number of threads is set to 1 by either doing `-t1` or `JULIA_NUM_THREADS=1` an interactive thread will not be spawned.
+
 Lets start Julia with 4 threads:
 
 ```bash
@@ -37,7 +47,7 @@ $ julia --threads 4
 
 Let's verify there are 4 threads at our disposal.
 
-```julia-repl
+```jldoctest; filter = r"[0-9]+"
 julia> Threads.nthreads()
 4
 ```
@@ -74,13 +84,15 @@ julia> Threads.threadid()
 
 ### Multiple GC Threads
 
-The Garbage Collector (GC) can use multiple threads. The amount used is either half the number
-of compute worker threads or configured by either the `--gcthreads` command line argument or by using the
-[`JULIA_NUM_GC_THREADS`](@ref env-gc-threads) environment variable.
+The Garbage Collector (GC) can use multiple threads. The amount used by default matches the compute
+worker threads or can configured by either the `--gcthreads` command line argument or by using the
+[`JULIA_NUM_GC_THREADS`](@ref JULIA_NUM_GC_THREADS) environment variable.
 
 !!! compat "Julia 1.10"
     The `--gcthreads` command line argument requires at least Julia 1.10.
 
+For more details about garbage collection configuration and performance tuning, see [Memory Management and Garbage Collection](@ref man-memory-management).
+
 ## [Threadpools](@id man-threadpools)
 
 When a program's threads are busy with many tasks to run, tasks may experience
@@ -96,13 +108,20 @@ using Base.Threads
 Interactive tasks should avoid performing high latency operations, and if they
 are long duration tasks, should yield frequently.
 
-Julia may be started with one or more threads reserved to run interactive tasks:
+By default Julia starts with one interactive thread reserved to run interactive tasks, but that number can
+be controlled with:
 
 ```bash
 $ julia --threads 3,1
+julia> Threads.nthreads(:interactive)
+1
+
+$ julia --threads 3,0
+julia> Threads.nthreads(:interactive)
+0
 ```
 
-The environment variable `JULIA_NUM_THREADS` can also be used similarly:
+The environment variable [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS) can also be used similarly:
 ```bash
 export JULIA_NUM_THREADS=3,1
 ```
@@ -116,8 +135,8 @@ julia> using Base.Threads
 julia> nthreadpools()
 2
 
-julia> threadpool()
-:default
+julia> threadpool() # the main thread is in the interactive thread pool
+:interactive
 
 julia> nthreads(:default)
 3
@@ -128,66 +147,20 @@ julia> nthreads(:interactive)
 julia> nthreads()
 3
 ```
+!!! note
+    Explicitly asking for 1 thread by doing `-t1` or `JULIA_NUM_THREADS=1` does not add an interactive thread.
 
 !!! note
     The zero-argument version of `nthreads` returns the number of threads
     in the default pool.
 
+!!! note
+    Depending on whether Julia has been started with interactive threads,
+    the main thread is either in the default or interactive thread pool.
+
 Either or both numbers can be replaced with the word `auto`, which causes
 Julia to choose a reasonable default.
 
-## Communication and synchronization
-
-Although Julia's threads can communicate through shared memory, it is notoriously
-difficult to write correct and data-race free multi-threaded code. Julia's
-[`Channel`](@ref)s are thread-safe and may be used to communicate safely.
-
-### Data-race freedom
-
-You are entirely responsible for ensuring that your program is data-race free,
-and nothing promised here can be assumed if you do not observe that
-requirement. The observed results may be highly unintuitive.
-
-The best way to ensure this is to acquire a lock around any access to data that
-can be observed from multiple threads. For example, in most cases you should
-use the following code pattern:
-
-```julia-repl
-julia> lock(lk) do
-           use(a)
-       end
-
-julia> begin
-           lock(lk)
-           try
-               use(a)
-           finally
-               unlock(lk)
-           end
-       end
-```
-where `lk` is a lock (e.g. `ReentrantLock()`) and `a` data.
-
-Additionally, Julia is not memory safe in the presence of a data race. Be very
-careful about reading _any_ data if another thread might write to it!
-Instead, always use the lock pattern above when changing data (such as assigning
-to a global or closure variable) accessed by other threads.
-
-```julia
-Thread 1:
-global b = false
-global a = rand()
-global b = true
-
-Thread 2:
-while !b; end
-bad_read1(a) # it is NOT safe to access `a` here!
-
-Thread 3:
-while !@isdefined(a); end
-bad_read2(a) # it is NOT safe to access `a` here
-```
-
 ## The `@threads` Macro
 
 Let's work a simple example using our native threads. Let us create an array of zeros:
@@ -239,10 +212,11 @@ julia> a
 
 Note that [`Threads.@threads`](@ref) does not have an optional reduction parameter like [`@distributed`](@ref).
 
-### Using `@threads` without data races
+### Using `@threads` without data-races
 
-Taking the example of a naive sum
+The concept of a data-race is elaborated on in ["Communication and data races between threads"](@ref man-communication-and-data-races). For now, just know that a data race can result in incorrect results and dangerous errors.
 
+Lets say we want to make the function `sum_single` below multithreaded.
 ```julia-repl
 julia> function sum_single(a)
            s = 0
@@ -275,12 +249,11 @@ julia> sum_multi_bad(1:1_000_000)
 Note that the result is not `500000500000` as it should be, and will most likely change each evaluation.
 
 To fix this, buffers that are specific to the task may be used to segment the sum into chunks that are race-free.
-Here `sum_single` is reused, with its own internal buffer `s`, and vector `a` is split into `nthreads()`
-chunks for parallel work via `nthreads()` `@spawn`-ed tasks.
-
+Here `sum_single` is reused, with its own internal buffer `s`. The input vector `a` is split into at most `nthreads()`
+chunks for parallel work. We then use `Threads.@spawn` to create tasks that individually sum each chunk. Finally, we sum the results from each task using `sum_single` again:
 ```julia-repl
 julia> function sum_multi_good(a)
-           chunks = Iterators.partition(a, length(a) ÷ Threads.nthreads())
+           chunks = Iterators.partition(a, cld(length(a), Threads.nthreads()))
            tasks = map(chunks) do chunk
                Threads.@spawn sum_single(chunk)
            end
@@ -301,7 +274,119 @@ julia> sum_multi_good(1:1_000_000)
 Another option is the use of atomic operations on variables shared across tasks/threads, which may be more performant
 depending on the characteristics of the operations.
 
-## Atomic Operations
+## [Communication and data-races between threads](@id man-communication-and-data-races)
+
+Although Julia's threads can communicate through shared memory, it is notoriously difficult to write correct and data-race free multi-threaded code. Julia's
+[`Channel`](@ref)s are thread-safe and may be used to communicate safely. There are also sections below that explain how to use [locks](@ref man-using-locks) and [atomics](@ref man-atomic-operations) to avoid data-races.
+
+In certain cases, Julia is able to detect safety violations, in particular in regards to deadlocks or other known-unsafe operations such as yielding
+to the currently running task. In these cases, a [`ConcurrencyViolationError`](@ref) is thrown.
+
+### Data-race freedom
+
+You are entirely responsible for ensuring that your program is data-race free,
+and nothing promised here can be assumed if you do not observe that
+requirement. The observed results may be highly unintuitive.
+
+If data-races are introduced, Julia is not memory safe. **Be very
+careful about reading _any_ data if another thread might write to it, as it could result in segmentation faults or worse**. Below are a couple of unsafe ways to access global variables from different threads:
+```julia
+Thread 1:
+global b = false
+global a = rand()
+global b = true
+
+Thread 2:
+while !b; end
+bad_read1(a) # it is NOT safe to access `a` here!
+
+Thread 3:
+while !@isdefined(a); end
+bad_read2(a) # it is NOT safe to access `a` here
+```
+
+### [Using locks to avoid data-races](@id man-using-locks)
+An important tool for avoiding data races, and writing thread-safe code in general, is the concept of a "lock". A lock can be locked and unlocked. If a thread has locked a lock, and not unlocked it, it is said to "hold" the lock. If there is only one lock, and we write code that requires holding the lock to access some data, we can ensure that multiple threads will never access the same data simultaneously.
+
+Note that the link between a lock and a variable is made by the programmer, and not the program. A helper-type [`Base.Lockable`](@ref) exists that helps you associate a lock and a value. This is often more safe than keeping track yourself, and is detailed under [Using Base.Lockable to associate a lock and a value](@ref man-lockable).
+
+For example, we can create a lock `my_lock`, and lock it while we mutate a variable `my_variable`. This is done most simply with the `@lock` macro:
+
+```julia-repl
+julia> my_lock = ReentrantLock();
+
+julia> my_variable = [1, 2, 3];
+
+julia> @lock my_lock my_variable[1] = 100
+100
+```
+
+By using a similar pattern with the same lock and variable, but on another thread, the operations are free from data-races.
+
+We could have performed the operation above with the functional version of `lock`, in the following two ways:
+```julia-repl
+julia> lock(my_lock) do
+           my_variable[1] = 100
+       end
+100
+
+julia> begin
+           lock(my_lock)
+           try
+               my_variable[1] = 100
+           finally
+               unlock(my_lock)
+           end
+       end
+100
+```
+
+All three options are equivalent. Note how the final version requires an explicit `try`-block to ensure that the lock is always unlocked, whereas the first two version do this internally. One should always use the lock pattern above when changing data (such as assigning
+to a global or closure variable) accessed by other threads. Failing to do this could have unforeseen and serious consequences.
+
+#### [Using Base.Lockable to associate a lock and a value](@id man-lockable)
+As mentioned in the previous section, the helper-type [`Base.Lockable`](@ref) can be used to programmatically ensure the association between a lock and a value. This is generally recommended, as it is both less prone to error and more readable for others compared to having the association only by convention.
+
+Any object can be wrapped in `Base.Lockable`:
+```julia-repl
+julia> my_array = [];
+
+julia> my_locked_array = Base.Lockable(my_array);
+```
+
+If the lock is held, the underlying object can be accessed with the empty indexing notation:
+```julia-repl
+julia> begin
+           lock(my_locked_array)
+           try
+               push!(my_locked_array[], 1)
+           finally
+               unlock(my_locked_array)
+           end
+       end
+1-element Vector{Any}:
+ 1
+```
+
+It is usually easier and safer to pass a function as the first argument to `lock`. The function is applied to the unlocked object, and the locking/unlocking is handled automatically:
+```julia-repl
+julia> lock(x -> push!(x, 2), my_locked_array);
+
+julia> lock(display, my_locked_array)
+2-element Vector{Any}:
+ 1
+ 2
+
+julia> lock(my_locked_array) do x
+           x[1] = π
+           display(x)
+       end
+2-element Vector{Any}:
+ π = 3.1415926535897...
+ 2
+```
+
+### [Atomic Operations](@id man-atomic-operations)
 
 Julia supports accessing and modifying values *atomically*, that is, in a thread-safe way to avoid
 [race conditions](https://en.wikipedia.org/wiki/Race_condition). A value (which must be of a primitive
@@ -370,11 +455,12 @@ julia> acc[]
 ```
 
 
-## [Per-field atomics](@id man-atomics)
+#### [Per-field atomics](@id man-atomics)
 
 We can also use atomics on a more granular level using the [`@atomic`](@ref
-Base.@atomic), [`@atomicswap`](@ref Base.@atomicswap), and
-[`@atomicreplace`](@ref Base.@atomicreplace) macros.
+Base.@atomic), [`@atomicswap`](@ref Base.@atomicswap),
+[`@atomicreplace`](@ref Base.@atomicreplace) macros, and
+[`@atomiconce`](@ref Base.@atomiconce) macros.
 
 Specific details of the memory model and other details of the design are written
 in the [Julia Atomics
@@ -439,7 +525,7 @@ threads in Julia:
     multiple threads where at least one thread modifies the collection
     (common examples include `push!` on arrays, or inserting
     items into a `Dict`).
-  * The schedule used by `@spawn` is nondeterministic and should not be relied on.
+  * The schedule used by [`@spawn`](@ref Threads.@spawn) is nondeterministic and should not be relied on.
   * Compute-bound, non-memory-allocating tasks can prevent garbage collection from
     running in other threads that are allocating memory. In these cases it may
     be necessary to insert a manual call to `GC.safepoint()` to allow GC to run.
@@ -448,7 +534,8 @@ threads in Julia:
     method, and module definitions in parallel.
   * Be aware that finalizers registered by a library may break if threads are enabled.
     This may require some transitional work across the ecosystem before threading
-    can be widely adopted with confidence. See the next section for further details.
+    can be widely adopted with confidence. See the section on
+    [the safe use of finalizers](@ref man-finalizers) for further details.
 
 ## [Task Migration](@id man-task-migration)
 
@@ -464,7 +551,7 @@ and therefore should not be used to index into a vector of buffers or stateful o
     Task migration was introduced in Julia 1.7. Before this tasks always remained on the same thread that they were
     started on.
 
-## Safe use of Finalizers
+## [Safe use of Finalizers](@id man-finalizers)
 
 Because finalizers can interrupt any code, they must be very careful in how
 they interact with any global state. Unfortunately, the main reason that
diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md
index 00a10177b2155..3ef41754c1e07 100644
--- a/doc/src/manual/networking-and-streams.md
+++ b/doc/src/manual/networking-and-streams.md
@@ -1,9 +1,10 @@
 # Networking and Streams
 
 Julia provides a rich interface to deal with streaming I/O objects such as terminals, pipes and
-TCP sockets. This interface, though asynchronous at the system level, is presented in a synchronous
-manner to the programmer and it is usually unnecessary to think about the underlying asynchronous
-operation. This is achieved by making heavy use of Julia cooperative threading ([coroutine](@ref man-tasks))
+TCP sockets.
+These objects allow data to be sent and received in a stream-like fashion, which means that data is processed sequentially as it becomes available.
+This interface, though asynchronous at the system level, is presented in a synchronous manner to the programmer.
+This is achieved by making heavy use of Julia cooperative threading ([coroutine](@ref man-tasks))
 functionality.
 
 ## Basic Stream I/O
@@ -30,7 +31,7 @@ For example, to read a simple byte array, we could do:
 
 ```julia-repl
 julia> x = zeros(UInt8, 4)
-4-element Array{UInt8,1}:
+4-element Vector{UInt8}:
  0x00
  0x00
  0x00
@@ -38,7 +39,7 @@ julia> x = zeros(UInt8, 4)
 
 julia> read!(stdin, x)
 abcd
-4-element Array{UInt8,1}:
+4-element Vector{UInt8}:
  0x61
  0x62
  0x63
@@ -51,7 +52,7 @@ example, we could have written the above as:
 ```julia-repl
 julia> read(stdin, 4)
 abcd
-4-element Array{UInt8,1}:
+4-element Vector{UInt8}:
  0x61
  0x62
  0x63
@@ -66,8 +67,8 @@ abcd
 "abcd"
 ```
 
-Note that depending on your terminal settings, your TTY may be line buffered and might thus require
-an additional enter before the data is sent to Julia.
+Note that depending on your terminal settings, your TTY ("teletype terminal") may be line buffered and might thus require an additional enter before `stdin` data is sent to Julia.
+When running Julia from the command line in a TTY, output is sent to the console by default, and standard input is read from the keyboard.
 
 To read every line from [`stdin`](@ref) you can use [`eachline`](@ref):
 
@@ -150,7 +151,7 @@ julia> f = open("hello.txt")
 IOStream(<file hello.txt>)
 
 julia> readlines(f)
-1-element Array{String,1}:
+1-element Vector{String}:
  "Hello, World!"
 ```
 
@@ -205,6 +206,24 @@ julia> open("hello.txt") do f
 "HELLO AGAIN."
 ```
 
+If you want to redirect stdout to a file
+
+```# Open file for writing
+out_file = open("output.txt", "w")
+
+# Redirect stdout to file
+redirect_stdout(out_file) do
+    # Your code here
+    println("This output goes to `out_file` via the `stdout` variable.")
+end
+
+# Close file
+close(out_file)
+
+```
+
+Redirecting stdout to a file can help you save and analyze program output, automate processes, and meet compliance requirements.
+
 ## A simple TCP example
 
 Let's jump right in with a simple example involving TCP sockets.
@@ -214,7 +233,7 @@ Let's first create a simple server:
 ```julia-repl
 julia> using Sockets
 
-julia> errormonitor(@async begin
+julia> errormonitor(Threads.@spawn begin
            server = listen(2000)
            while true
                sock = accept(server)
@@ -286,11 +305,11 @@ printed the message and waited for the next client. Reading and writing works in
 To see this, consider the following simple echo server:
 
 ```julia-repl
-julia> errormonitor(@async begin
+julia> errormonitor(Threads.@spawn begin
            server = listen(2001)
            while true
                sock = accept(server)
-               @async while isopen(sock)
+               Threads.@spawn while isopen(sock)
                    write(sock, readline(sock, keep=true))
                end
            end
@@ -300,7 +319,7 @@ Task (runnable) @0x00007fd31dc12e60
 julia> clientside = connect(2001)
 TCPSocket(RawFD(28) open, 0 bytes waiting)
 
-julia> errormonitor(@async while isopen(clientside)
+julia> errormonitor(Threads.@spawn while isopen(clientside)
            write(stdout, readline(clientside, keep=true))
        end)
 Task (runnable) @0x00007fd31dc11870
@@ -336,20 +355,19 @@ ip"74.125.226.225"
 
 ## Asynchronous I/O
 
-
 All I/O operations exposed by [`Base.read`](@ref) and [`Base.write`](@ref) can be performed
 asynchronously through the use of [coroutines](@ref man-tasks). You can create a new coroutine to
-read from or write to a stream using the [`@async`](@ref) macro:
+read from or write to a stream using the [`Threads.@spawn`](@ref) macro:
 
 ```julia-repl
-julia> task = @async open("foo.txt", "w") do io
+julia> task = Threads.@spawn open("foo.txt", "w") do io
            write(io, "Hello, World!")
        end;
 
 julia> wait(task)
 
 julia> readlines("foo.txt")
-1-element Array{String,1}:
+1-element Vector{String}:
  "Hello, World!"
 ```
 
@@ -361,7 +379,7 @@ your program to block until all of the coroutines it wraps around have exited:
 julia> using Sockets
 
 julia> @sync for hostname in ("google.com", "github.com", "julialang.org")
-           @async begin
+           Threads.@spawn begin
                conn = connect(hostname, 80)
                write(conn, "GET / HTTP/1.1\r\nHost:$(hostname)\r\n\r\n")
                readline(conn, keep=true)
@@ -418,6 +436,7 @@ close(socket)
 This example gives the same functionality as the previous program, but uses IPv6 as the network-layer protocol.
 
 Listener:
+
 ```julia
 using Sockets
 group = Sockets.IPv6("ff05::5:6:7")
@@ -430,6 +449,7 @@ close(socket)
 ```
 
 Sender:
+
 ```julia
 using Sockets
 group = Sockets.IPv6("ff05::5:6:7")
diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md
index 7a2bb0e9ace03..a168d051ef30c 100644
--- a/doc/src/manual/noteworthy-differences.md
+++ b/doc/src/manual/noteworthy-differences.md
@@ -56,11 +56,15 @@ may trip up Julia users accustomed to MATLAB:
   * In Julia, if `A` and `B` are arrays, logical comparison operations like `A == B` do not return
     an array of booleans. Instead, use `A .== B`, and similarly for the other boolean operators like
     [`<`](@ref), [`>`](@ref).
+  * In Julia, when you want to apply a scalar-valued function elementwise to an array, use broadcasting
+    syntax: `f.(A)` instead of `f(A)`. In some cases, both operations are defined but mean different things:
+    in MATLAB `exp(A)` applies elementwise and `expm(A)` is the [matrix exponential](https://en.wikipedia.org/wiki/Matrix_exponential),
+    but in Julia `exp.(A)` applies elementwise and `exp(A)` is the matrix exponential.
   * In Julia, the operators [`&`](@ref), [`|`](@ref), and [`⊻`](@ref xor) ([`xor`](@ref)) perform the
     bitwise operations equivalent to `and`, `or`, and `xor` respectively in MATLAB, and have precedence
-    similar to Python's bitwise operators (unlike C). They can operate on scalars or element-wise
-    across arrays and can be used to combine logical arrays, but note the difference in order of operations:
-    parentheses may be required (e.g., to select elements of `A` equal to 1 or 2 use `(A .== 1) .| (A .== 2)`).
+    similar to Python's bitwise operators (unlike C). To apply logical boolean operators over an array
+    (like common uses of MATLAB's `&` and `|`), broadcast Julia's short-circuiting operators `.&&` and `.||`.
+    For example, to test if the elements in an array `A` are equal to 1 or 2, you can use `A .== 1 .|| A .== 2`.
   * In Julia, the elements of a collection can be passed as arguments to a function using the splat
     operator `...`, as in `xs=[1,2]; f(xs...)`.
   * Julia's [`svd`](@ref) returns singular values as a vector instead of as a dense diagonal matrix.
@@ -78,6 +82,9 @@ may trip up Julia users accustomed to MATLAB:
     provides the higher order functions [`filter`](@ref) and [`filter!`](@ref), allowing users
     to write `filter(z->z>3, x)` and `filter!(z->z>3, x)` as alternatives to the corresponding transliterations
     `x[x.>3]` and `x = x[x.>3]`. Using [`filter!`](@ref) reduces the use of temporary arrays.
+  * Following on from the previous point, to replace values that meet specific criteria, for example a
+    thresholding operation on all elements in a matrix, could be achieved in Matlab as follows `A(A < threshold) = 0`.
+    The Julia equivalent would be `A[A .< threshold] .= 0`.
   * The analogue of extracting (or "dereferencing") all elements of a cell array, e.g. in `vertcat(A{:})`
     in MATLAB, is written using the splat operator in Julia, e.g. as `vcat(A...)`.
   * In Julia, the `adjoint` function performs conjugate transposition; in MATLAB, `adjoint` provides the
@@ -106,7 +113,7 @@ For users coming to Julia from R, these are some noteworthy differences:
       * In Julia, `[1, 2, 3, 4][[true, false]]` throws a [`BoundsError`](@ref).
       * In Julia, `[1, 2, 3, 4][[true, false, true, false]]` produces `[1, 3]`.
   * Like many languages, Julia does not always allow operations on vectors of different lengths, unlike
-    R where the vectors only need to share a common index range.  For example, `c(1, 2, 3, 4) + c(1, 2)`
+    R where the vectors only need to share a common index range. For example, `c(1, 2, 3, 4) + c(1, 2)`
     is valid R but the equivalent `[1, 2, 3, 4] + [1, 2]` will throw an error in Julia.
   * Julia allows an optional trailing comma when that comma does not change the meaning of code.
     This can cause confusion among R users when indexing into arrays. For example, `x[1,]` in R
@@ -137,7 +144,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     or `if 1==1`.
   * Julia does not provide `nrow` and `ncol`. Instead, use `size(M, 1)` for `nrow(M)` and `size(M, 2)`
     for `ncol(M)`.
-  * Julia is careful to distinguish scalars, vectors and matrices.  In R, `1` and `c(1)` are the same.
+  * Julia is careful to distinguish scalars, vectors and matrices. In R, `1` and `c(1)` are the same.
     In Julia, they cannot be used interchangeably.
   * Julia's [`diag`](@ref) and [`diagm`](@ref) are not like R's.
   * Julia cannot assign to the results of function calls on the left hand side of an assignment operation:
@@ -167,12 +174,12 @@ For users coming to Julia from R, these are some noteworthy differences:
     have higher precedence than the `:` operator, whereas the reverse is true in R. For example, `1:n-1` in
     Julia is equivalent to `1:(n-1)` in R.
   * Julia's [`max`](@ref) and [`min`](@ref) are the equivalent of `pmax` and `pmin` respectively
-    in R, but both arguments need to have the same dimensions.  While [`maximum`](@ref) and [`minimum`](@ref)
+    in R, but both arguments need to have the same dimensions. While [`maximum`](@ref) and [`minimum`](@ref)
     replace `max` and `min` in R, there are important differences.
   * Julia's [`sum`](@ref), [`prod`](@ref), [`maximum`](@ref), and [`minimum`](@ref) are different
     from their counterparts in R. They all accept an optional keyword argument `dims`, which indicates the
-    dimensions, over which the operation is carried out.  For instance, let `A = [1 2; 3 4]` in Julia
-    and `B <- rbind(c(1,2),c(3,4))` be the same matrix in R.  Then `sum(A)` gives the same result as
+    dimensions, over which the operation is carried out. For instance, let `A = [1 2; 3 4]` in Julia
+    and `B <- rbind(c(1,2),c(3,4))` be the same matrix in R. Then `sum(A)` gives the same result as
     `sum(B)`, but `sum(A, dims=1)` is a row vector containing the sum over each column and `sum(A, dims=2)`
     is a column vector containing the sum over each row. This contrasts to the behavior of R, where separate
     `colSums(B)` and `rowSums(B)` functions provide these functionalities. If the `dims` keyword argument is a
@@ -213,11 +220,11 @@ For users coming to Julia from R, these are some noteworthy differences:
   * Unlike Python, Julia allows [AbstractArrays with arbitrary indexes](https://julialang.org/blog/2017/04/offset-arrays/).
     Python's special interpretation of negative indexing, `a[-1]` and `a[-2]`, should be written
     `a[end]` and `a[end-1]` in Julia.
-  * Julia requires `end` for indexing until the last element. `x[1:]` in Python is equivalent to `x[2:end]` in Julia.
-  * In Julia, `:` before any object creates a [`Symbol`](@ref) or *quotes* an expression; so, `x[:5]` is same as `x[5]`. If you want to get the first `n` elements of an array, then use range indexing.
+  * Julia requires `end` for indexing until the last element. `x[2:end]` in Julia is equivalent to `x[1:]` in Python.
+  * In Julia, `:` before any object creates a [`Symbol`](@ref) or *quotes* an expression; so, `x[:5]` is the same as `x[5]`. If you want to get the first `n` elements of an array, then use range indexing.
   * Julia's range indexing has the format of `x[start:step:stop]`, whereas Python's format is `x[start:(stop+1):step]`. Hence, `x[0:10:2]` in Python is equivalent to `x[1:2:10]` in Julia. Similarly, `x[::-1]` in Python, which refers to the reversed array, is equivalent to `x[end:-1:1]` in Julia.
   * In Julia, ranges can be constructed independently as `start:step:stop`, the same syntax it uses
-    in array-indexing.  The `range` function is also supported.
+    in array-indexing. The `range` function is also supported.
   * In Julia, indexing a matrix with arrays like `X[[1,2], [1,3]]` refers to a sub-matrix that contains the intersections of the first and second rows with the first and third columns. In Python, `X[[1,2], [1,3]]` refers to a vector that contains the values of cell `[1,1]` and `[2,3]` in the matrix. `X[[1,2], [1,3]]` in Julia is equivalent with `X[np.ix_([0,1],[0,2])]` in Python. `X[[0,1], [0,2]]` in Python is equivalent with `X[[CartesianIndex(1,1), CartesianIndex(2,3)]]` in Julia.
   * Julia has no line continuation syntax: if, at the end of a line, the input so far is a complete
     expression, it is considered done; otherwise the input continues. One way to force an expression
@@ -245,12 +252,17 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, the exponentiation operator is `^`, not `**` as in Python.
   * Julia uses `nothing` of type `Nothing` to represent a null value, whereas Python uses `None` of type `NoneType`.
   * In Julia, the standard operators over a matrix type are matrix operations, whereas, in Python, the standard operators are element-wise operations. When both `A` and `B` are matrices, `A * B` in Julia performs matrix multiplication, not element-wise multiplication as in Python. `A * B` in Julia is equivalent with `A @ B` in Python, whereas `A * B` in Python is equivalent with `A .* B` in Julia.
+  * In Julia, when you want to apply a scalar-valued function elementwise to an array, use broadcasting
+    syntax: `f.(A)` instead of `f(A)`. In some cases, both operations are defined but mean different things:
+    `numpy.exp(A)` applies elementwise and `scipy.linalg.expm(A)` is the [matrix exponential](https://en.wikipedia.org/wiki/Matrix_exponential),
+    but in Julia `exp.(A)` applies elementwise and `exp(A)` is the matrix exponential.
   * The adjoint operator `'` in Julia returns an adjoint of a vector (a lazy representation of row vector), whereas the transpose operator `.T` over a vector in Python returns the original vector (non-op).
   * In Julia, a function may contain multiple concrete implementations (called *methods*), which are selected via multiple dispatch based on the types of all arguments to the call, as compared to functions in Python, which have a single implementation and no polymorphism (as opposed to Python method calls which use a different syntax and allows dispatch on the receiver of the method).
   * There are no classes in Julia. Instead there are structures (mutable or immutable), containing data but no methods.
   * Calling a method of a class instance in Python (`x = MyClass(*args); x.f(y)`) corresponds to a function call in Julia, e.g. `x = MyType(args...); f(x, y)`. In general, multiple dispatch is more flexible and powerful than the Python class system.
   * Julia structures may have exactly one abstract supertype, whereas Python classes can inherit from one or more (abstract or concrete) superclasses.
-  * The logical Julia program structure (Packages and Modules) is independent of the file structure (`include` for additional files), whereas the Python code structure is defined by directories (Packages) and files (Modules).
+  * The logical Julia program structure (Packages and Modules) is independent of the file structure, whereas the Python code structure is defined by directories (Packages) and files (Modules).
+  * In Julia, it is idiomatic to split the text of large modules into multiple files, without introducing a new module per file. The code is reassembled inside a single module in a main file via `include`. While the Python equivalent (`exec`) is not typical for this use (it will silently clobber prior definitions), Julia programs are defined as a unit at the `module` level with `using` or `import`, which will only get executed once when first needed--like `include` in Python. Within those modules, the individual files that make up that module are loaded with `include` by listing them once in the intended order.
   * The ternary operator `x > 0 ? 1 : -1` in Julia corresponds to a conditional expression in Python `1 if x > 0 else -1`.
   * In Julia the `@` symbol refers to a macro, whereas in Python it refers to a decorator.
   * Exception handling in Julia is done using `try` — `catch` — `finally`, instead of `try` — `except` — `finally`. In contrast to Python, it is not recommended to use exception handling as part of the normal workflow in Julia (compared with Python, Julia is faster at ordinary control flow but slower at exception-catching).
@@ -258,7 +270,7 @@ For users coming to Julia from R, these are some noteworthy differences:
   * Be careful with non-constant global variables in Julia, especially in tight loops. Since you can write close-to-metal code in Julia (unlike Python), the effect of globals can be drastic (see [Performance Tips](@ref man-performance-tips)).
   * In Julia, rounding and truncation are explicit. Python's `int(3.7)` should be `floor(Int, 3.7)` or `Int(floor(3.7))` and is distinguished from `round(Int, 3.7)`. `floor(x)` and `round(x)` on their own return an integer value of the same type as `x` rather than always returning `Int`.
   * In Julia, parsing is explicit. Python's `float("3.7")` would be `parse(Float64, "3.7")` in Julia.
-  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`.  Perhaps surprisingly, in Python `if "False"` and `bool("False")` both evaluate to `True` (because `"False"` is a non-empty string); in Julia, `parse(Bool, "false")` returns `false`.
+  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`. Perhaps surprisingly, in Python `if "False"` and `bool("False")` both evaluate to `True` (because `"False"` is a non-empty string); in Julia, `parse(Bool, "false")` returns `false`.
   * In Julia, a new local scope is introduced by most code blocks, including loops and `try` — `catch` — `finally`. Note that comprehensions (list, generator, etc.) introduce a new local scope both in Python and Julia, whereas `if` blocks do not introduce a new local scope in both languages.
 
 ## Noteworthy differences from C/C++
@@ -295,7 +307,7 @@ For users coming to Julia from R, these are some noteworthy differences:
      Floating point literals are closer in behavior to C/C++. Octal (prefixed with `0o`) and binary
     (prefixed with `0b`) literals are also treated as unsigned (or `BigInt` for more than 128 bits).
   * In Julia, the division operator [`/`](@ref) returns a floating point number when both operands
-    are of integer type.  To perform integer division, use [`div`](@ref) or [`÷`](@ref div).
+    are of integer type. To perform integer division, use [`div`](@ref) or [`÷`](@ref div).
   * Indexing an `Array` with floating point types is generally an error in Julia. The Julia
     equivalent of the C expression `a[i / 2]` is `a[i ÷ 2 + 1]`, where `i` is of integer type.
   * String literals can be delimited with either `"`  or `"""`, `"""` delimited literals can contain
@@ -304,7 +316,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     evaluates the variable name or the expression in the context of the function.
   * `//` indicates a [`Rational`](@ref) number, and not a single-line comment (which is `#` in Julia)
   * `#=` indicates the start of a multiline comment, and `=#` ends it.
-  * Functions in Julia return values from their last expression(s) or the `return` keyword.  Multiple
+  * Functions in Julia return values from their last expression(s) or the `return` keyword. Multiple
     values can be returned from functions and assigned as tuples, e.g. `(a, b) = myfunction()` or
     `a, b = myfunction()`, instead of having to pass pointers to values as one would have to do in
     C/C++ (i.e. `a = myfunction(&b)`.
@@ -315,7 +327,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     meaning within `[ ]`, something to watch out for. `;` can be used to separate expressions on a
     single line, but are not strictly necessary in many cases, and are more an aid to readability.
   * In Julia, the operator [`⊻`](@ref xor) ([`xor`](@ref)) performs the bitwise XOR operation, i.e.
-    [`^`](@ref) in C/C++.  Also, the bitwise operators do not have the same precedence as C/C++, so
+    [`^`](@ref) in C/C++. Also, the bitwise operators do not have the same precedence as C/C++, so
     parenthesis may be required.
   * Julia's [`^`](@ref) is exponentiation (pow), not bitwise XOR as in C/C++ (use [`⊻`](@ref xor), or
     [`xor`](@ref), in Julia)
@@ -352,9 +364,9 @@ For users coming to Julia from R, these are some noteworthy differences:
     it's more general than that since methods are dispatched on every argument type, not only `this`,
     using the most-specific-declaration rule).
 
-### Julia &hArr; C/C++: Namespaces
+### Julia ⇔ C/C++: Namespaces
   * C/C++ `namespace`s correspond roughly to Julia `module`s.
-  * There are no private globals or fields in Julia.  Everything is publicly accessible
+  * There are no private globals or fields in Julia. Everything is publicly accessible
     through fully qualified paths (or relative paths, if desired).
   * `using MyNamespace::myfun` (C++) corresponds roughly to `import MyModule: myfun` (Julia).
   * `using namespace MyNamespace` (C++) corresponds roughly to `using MyModule` (Julia)
@@ -364,7 +376,7 @@ For users coming to Julia from R, these are some noteworthy differences:
   * Caveat: `import`/`using` (Julia) works only at the global scope level (`module`s)
     * In C++, `using namespace X` works within arbitrary scopes (ex: function scope).
 
-### Julia &hArr; C/C++: Module loading
+### Julia ⇔ C/C++: Module loading
   * When you think of a C/C++ "**library**", you are likely looking for a Julia "**package**".
     * Caveat: C/C++ libraries often house multiple "software modules" whereas Julia
       "packages" typically house one.
@@ -392,10 +404,10 @@ For users coming to Julia from R, these are some noteworthy differences:
     paths to the `Base.LOAD_PATH` array.
     * Packages from directory-based repositories do not require the `Pkg.add()` tool prior to
       being loaded with `import` or `using`. They are simply available to the project.
-    * Directory-based package repositories are the **quickest solution** to developping local
+    * Directory-based package repositories are the **quickest solution** to developing local
       libraries of "software modules".
 
-### Julia &hArr; C/C++: Assembling modules
+### Julia ⇔ C/C++: Assembling modules
   * In C/C++, `.c`/`.cpp` files are compiled & added to a library with build/`make` scripts.
     * In Julia, `import [PkgName]`/`using [PkgName]` statements load `[PkgName].jl` located
       in a package's `[PkgName]/src/` subdirectory.
@@ -412,20 +424,21 @@ For users coming to Julia from R, these are some noteworthy differences:
       Julia package* ("software module"). It is therefore relatively straightforward to ensure
       file are `include`d only once (No `#ifdef` confusion).
 
-### Julia &hArr; C/C++: Module interface
-  * C++ exposes interfaces using "public" `.h`/`.hpp` files whereas Julia `module`s `export`
-    symbols that are intended for their users.
+### Julia ⇔ C/C++: Module interface
+  * C++ exposes interfaces using "public" `.h`/`.hpp` files whereas Julia `module`s mark
+    specific symbols that are intended for their users as `public`or `export`ed.
     * Often, Julia `module`s simply add functionality by generating new "methods" to existing
       functions (ex: `Base.push!`).
     * Developers of Julia packages therefore cannot rely on header files for interface
       documentation.
     * Interfaces for Julia packages are typically described using docstrings, README.md,
       static web pages, ...
-  * Some developers choose not to `export` all symbols required to use their package/module.
+  * Some developers choose not to `export` all symbols required to use their package/module,
+    but should still mark unexported user facing symbols as `public`.
     * Users might be expected to access these components by qualifying functions/structs/...
       with the package/module name (ex: `MyModule.run_this_task(...)`).
 
-### Julia &hArr; C/C++: Quick reference
+### Julia ⇔ C/C++: Quick reference
 
 | Software Concept   | Julia | C/C++ |
 | :---               | :---  | :---  |
@@ -433,7 +446,7 @@ For users coming to Julia from R, these are some noteworthy differences:
 | function scope     | `function x()` ... `end` | `int x() {` ... `}`                          |
 | global scope       | `module MyMod` ... `end` | `namespace MyNS {` ... `}`                   |
 | software module    | A Julia "package"        | `.h`/`.hpp` files<br>+compiled `somelib.a`   |
-| assembling<br>software modules | `SomePkg.jl`: ...<br>`import("subfile1.jl")`<br>`import("subfile2.jl")`<br>... | `$(AR) *.o` &rArr; `somelib.a` |
+| assembling<br>software modules | `SomePkg.jl`: ...<br>`include("subfile1.jl")`<br>`include("subfile2.jl")`<br>... | `$(AR) *.o` ⇒ `somelib.a` |
 | import<br>software module | `import SomePkg`  | `#include <somelib>`<br>+link in `somelib.a` |
 | module library     | `LOAD_PATH[]`, \*Git repository,<br>\*\*custom package registry  | more `.h`/`.hpp` files<br>+bigger compiled `somebiglib.a` |
 
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index 069c3b2d424e5..f821738d3b161 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -3,7 +3,16 @@
 In the following sections, we briefly go through a few techniques that can help make your Julia
 code run as fast as possible.
 
-## Performance critical code should be inside a function
+## [Table of contents](@id man-performance-tips-toc)
+
+```@contents
+Pages = ["performance-tips.md"]
+Depth = 3
+```
+
+## General advice
+
+### Performance critical code should be inside a function
 
 Any code that is performance critical should be inside a function. Code inside functions tends to run much faster than top level code, due to how Julia's compiler works.
 
@@ -11,7 +20,7 @@ The use of functions is not only important for performance: functions are more r
 
 The functions should take arguments, instead of operating directly on global variables, see the next point.
 
-## Avoid untyped global variables
+### Avoid untyped global variables
 
 The value of an untyped global variable might change at any point, possibly leading to a change of its type. This makes
 it difficult for the compiler to optimize code using global variables. This also applies to type-valued variables,
@@ -24,7 +33,7 @@ performance:
 const DEFAULT_VAL = 0
 ```
 
-If a global is known to always be of the same type, [the type should be annotated](@ref man-typed-globals).
+If a non-constant global is known to always be of the same type, [the type should be annotated](@ref man-typed-globals); `const` globals need not be annotated because their type is inferred from their initialization value.
 
 Uses of untyped globals can be optimized by annotating their types at the point of use:
 
@@ -49,19 +58,21 @@ Passing arguments to functions is better style. It leads to more reusable code a
 
 In the following REPL session:
 
-```julia-repl
+```jldoctest
 julia> x = 1.0
+1.0
 ```
 
 is equivalent to:
 
-```julia-repl
+```jldoctest
 julia> global x = 1.0
+1.0
 ```
 
 so all the performance issues discussed previously apply.
 
-## Measure performance with [`@time`](@ref) and pay attention to memory allocation
+### Measure performance with [`@time`](@ref) and pay attention to memory allocation
 
 A useful tool for measuring performance is the [`@time`](@ref) macro. We here repeat the example
 with the global variable above, but this time with the type annotation removed:
@@ -94,8 +105,8 @@ a vector of 64-bit floats so there should be no need to allocate (heap) memory.
 
 We should clarify that what `@time` reports is specifically *heap* allocations, which are typically needed for either
 mutable objects or for creating/growing variable-sized containers (such as `Array` or `Dict`, strings, or "type-unstable"
-objects whose type is only known at runtime).  Allocating (or deallocating) such blocks of memory may require an expensive
-system call (e.g. via `malloc` in C), and they must be tracked for garbage collection.  In contrast, immutable values like
+objects whose type is only known at runtime). Allocating (or deallocating) such blocks of memory may require an expensive function
+call to libc (e.g. via `malloc` in C), and they must be tracked for garbage collection. In contrast, immutable values like
 numbers (except bignums), tuples, and immutable `struct`s can be stored much more cheaply, e.g. in stack or CPU-register
 memory, so one doesn’t typically worry about the performance cost of "allocating" them.
 
@@ -105,6 +116,8 @@ Consequently, in addition to the allocation itself, it's very likely
 that the code generated for your function is far from optimal. Take such indications seriously
 and follow the advice below.
 
+For more information about memory management and garbage collection in Julia, see [Memory Management and Garbage Collection](@ref man-memory-management).
+
 In this particular case, the memory allocation is due to the usage of a type-unstable global variable `x`, so if we instead pass `x` as an argument to the function it no longer allocates memory
 (the remaining allocation reported below is due to running the `@time` macro in global scope)
 and is significantly faster after the first call:
@@ -149,7 +162,38 @@ its algorithmic aspects (see [Pre-allocating outputs](@ref)).
     For more serious benchmarking, consider the [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl)
     package which among other things evaluates the function multiple times in order to reduce noise.
 
-## [Tools](@id tools)
+### Break functions into multiple definitions
+
+Writing a function as many small definitions allows the compiler to directly call the most applicable
+code, or even inline it.
+
+Here is an example of a "compound function" that should really be written as multiple definitions:
+
+```julia
+using LinearAlgebra
+
+function mynorm(A)
+    if isa(A, Vector)
+        return sqrt(real(dot(A,A)))
+    elseif isa(A, Matrix)
+        return maximum(svdvals(A))
+    else
+        error("mynorm: invalid argument")
+    end
+end
+```
+
+This can be written more concisely and efficiently as:
+
+```julia
+mynorm(x::Vector) = sqrt(real(dot(x, x)))
+mynorm(A::Matrix) = maximum(svdvals(A))
+```
+
+It should however be noted that the compiler is quite efficient at optimizing away the dead branches in code
+written as the `mynorm` example.
+
+### [Tools](@id tools)
 
 Julia and its package ecosystem includes tools that may help you diagnose problems and improve
 the performance of your code:
@@ -157,7 +201,7 @@ the performance of your code:
   * [Profiling](@ref) allows you to measure the performance of your running code and identify lines
     that serve as bottlenecks. For complex projects, the [ProfileView](https://github.com/timholy/ProfileView.jl)
     package can help you visualize your profiling results.
-  * The [Traceur](https://github.com/JunoLab/Traceur.jl) package can help you find common performance problems in your code.
+  * The [JET](https://github.com/aviatesk/JET.jl) package can help you find common performance problems in your code.
   * Unexpectedly-large memory allocations--as reported by [`@time`](@ref), [`@allocated`](@ref), or
     the profiler (through calls to the garbage-collection routines)--hint that there might be issues
     with your code. If you don't see another reason for the allocations, suspect a type problem.
@@ -166,7 +210,14 @@ the performance of your code:
   * `@code_warntype` generates a representation of your code that can be helpful in finding expressions
     that result in type uncertainty. See [`@code_warntype`](@ref) below.
 
-## [Avoid containers with abstract type parameters](@id man-performance-abstract-container)
+## Type inference
+
+In many languages with optional type declarations, adding declarations is the principal way to
+make code run faster. This is *not* the case in Julia. In Julia, the compiler generally knows
+the types of all function arguments, local variables, and expressions. However, there are a few
+specific instances where declarations are helpful.
+
+### [Avoid containers with abstract type parameters](@id man-performance-abstract-container)
 
 When working with parameterized types, including arrays, it is best to avoid parameterizing with
 abstract types where possible.
@@ -210,13 +261,6 @@ better than `IdDict{Type, Vector}`
 
 See also the discussion under [Parametric Types](@ref).
 
-## Type declarations
-
-In many languages with optional type declarations, adding declarations is the principal way to
-make code run faster. This is *not* the case in Julia. In Julia, the compiler generally knows
-the types of all function arguments, local variables, and expressions. However, there are a few
-specific instances where declarations are helpful.
-
 ### Avoid fields with abstract type
 
 Types can be declared without specifying the types of their fields:
@@ -497,7 +541,7 @@ of type `Array{Any}`). But, if you're using one of these structures and happen t
 of an element, it helps to share this knowledge with the compiler:
 
 ```julia
-function foo(a::Array{Any,1})
+function foo(a::Vector{Any})
     x = a[1]::Int32
     b = x+1
     ...
@@ -608,38 +652,7 @@ would not normally specialize that method call. You need to check the
 when argument types are changed, i.e., if `Base.specializations(@which f(...))` contains specializations
 for the argument in question.
 
-## Break functions into multiple definitions
-
-Writing a function as many small definitions allows the compiler to directly call the most applicable
-code, or even inline it.
-
-Here is an example of a "compound function" that should really be written as multiple definitions:
-
-```julia
-using LinearAlgebra
-
-function mynorm(A)
-    if isa(A, Vector)
-        return sqrt(real(dot(A,A)))
-    elseif isa(A, Matrix)
-        return maximum(svdvals(A))
-    else
-        error("mynorm: invalid argument")
-    end
-end
-```
-
-This can be written more concisely and efficiently as:
-
-```julia
-mynorm(x::Vector) = sqrt(real(dot(x, x)))
-mynorm(A::Matrix) = maximum(svdvals(A))
-```
-
-It should however be noted that the compiler is quite efficient at optimizing away the dead branches in code
-written as the `mynorm` example.
-
-## Write "type-stable" functions
+### Write "type-stable" functions
 
 When possible, it helps to ensure that a function always returns a value of the same type. Consider
 the following definition:
@@ -660,7 +673,7 @@ pos(x) = x < 0 ? zero(x) : x
 There is also a [`oneunit`](@ref) function, and a more general [`oftype(x, y)`](@ref) function, which
 returns `y` converted to the type of `x`.
 
-## Avoid changing the type of a variable
+### Avoid changing the type of a variable
 
 An analogous "type-stability" problem exists for variables used repeatedly within a function:
 
@@ -683,7 +696,7 @@ optimize the body of the loop. There are several possible fixes:
   * Use an explicit conversion by `x = oneunit(Float64)`
   * Initialize with the first loop iteration, to `x = 1 / rand()`, then loop `for i = 2:10`
 
-## [Separate kernel functions (aka, function barriers)](@id kernel-functions)
+### [Separate kernel functions (aka, function barriers)](@id kernel-functions)
 
 Many functions follow a pattern of performing some set-up work, and then running many iterations
 to perform a core computation. Where possible, it is a good idea to put these core computations
@@ -742,112 +755,311 @@ or the [`fill!`](@ref) function, which we could have used instead of writing our
 Functions like `strange_twos` occur when dealing with data of uncertain type, for example data
 loaded from an input file that might contain either integers, floats, strings, or something else.
 
-## [Types with values-as-parameters](@id man-performance-value-type)
-
-Let's say you want to create an `N`-dimensional array that has size 3 along each axis. Such arrays
-can be created like this:
-
-```jldoctest
-julia> A = fill(5.0, (3, 3))
-3×3 Matrix{Float64}:
- 5.0  5.0  5.0
- 5.0  5.0  5.0
- 5.0  5.0  5.0
-```
+### [[`@code_warntype`](@ref)](@id man-code-warntype)
 
-This approach works very well: the compiler can figure out that `A` is an `Array{Float64,2}` because
-it knows the type of the fill value (`5.0::Float64`) and the dimensionality (`(3, 3)::NTuple{2,Int}`).
-This implies that the compiler can generate very efficient code for any future usage of `A` in
-the same function.
+The macro [`@code_warntype`](@ref) (or its function variant [`code_warntype`](@ref)) can sometimes
+be helpful in diagnosing type-related problems. Here's an example:
 
-But now let's say you want to write a function that creates a 3×3×... array in arbitrary dimensions;
-you might be tempted to write a function
+```julia-repl
+julia> @noinline pos(x) = x < 0 ? 0 : x;
 
-```jldoctest
-julia> function array3(fillval, N)
-           fill(fillval, ntuple(d->3, N))
-       end
-array3 (generic function with 1 method)
+julia> function f(x)
+           y = pos(x)
+           return sin(y*x + 1)
+       end;
 
-julia> array3(5.0, 2)
-3×3 Matrix{Float64}:
- 5.0  5.0  5.0
- 5.0  5.0  5.0
- 5.0  5.0  5.0
+julia> @code_warntype f(3.2)
+MethodInstance for f(::Float64)
+  from f(x) @ Main REPL[9]:1
+Arguments
+  #self#::Core.Const(f)
+  x::Float64
+Locals
+  y::Union{Float64, Int64}
+Body::Float64
+1 ─      (y = Main.pos(x))
+│   %2 = (y * x)::Float64
+│   %3 = (%2 + 1)::Float64
+│   %4 = Main.sin(%3)::Float64
+└──      return %4
 ```
 
-This works, but (as you can verify for yourself using `@code_warntype array3(5.0, 2)`) the problem
-is that the output type cannot be inferred: the argument `N` is a *value* of type `Int`, and type-inference
-does not (and cannot) predict its value in advance. This means that code using the output of this
-function has to be conservative, checking the type on each access of `A`; such code will be very
-slow.
+Interpreting the output of [`@code_warntype`](@ref), like that of its cousins [`@code_lowered`](@ref),
+[`@code_typed`](@ref), [`@code_llvm`](@ref), and [`@code_native`](@ref), takes a little practice.
+Your code is being presented in form that has been heavily digested on its way to generating
+compiled machine code. Most of the expressions are annotated by a type, indicated by the `::T`
+(where `T` might be [`Float64`](@ref), for example). The most important characteristic of [`@code_warntype`](@ref)
+is that non-concrete types are displayed in red; since this document is written in Markdown, which has no color,
+in this document, red text is denoted by uppercase.
 
-Now, one very good way to solve such problems is by using the [function-barrier technique](@ref kernel-functions).
-However, in some cases you might want to eliminate the type-instability altogether. In such cases,
-one approach is to pass the dimensionality as a parameter, for example through `Val{T}()` (see
-["Value types"](@ref)):
+At the top, the inferred return type of the function is shown as `Body::Float64`.
+The next lines represent the body of `f` in Julia's SSA IR form.
+The numbered boxes are labels and represent targets for jumps (via `goto`) in your code.
+Looking at the body, you can see that the first thing that happens is that `pos` is called and the
+return value has been inferred as the `Union` type `Union{Float64, Int64}` shown in uppercase since
+it is a non-concrete type. This means that we cannot know the exact return type of `pos` based on the
+input types. However, the result of `y*x`is a `Float64` no matter if `y` is a `Float64` or `Int64`
+The net result is that `f(x::Float64)` will not be type-unstable
+in its output, even if some of the intermediate computations are type-unstable.
 
-```jldoctest
-julia> function array3(fillval, ::Val{N}) where N
-           fill(fillval, ntuple(d->3, Val(N)))
-       end
-array3 (generic function with 1 method)
+How you use this information is up to you. Obviously, it would be far and away best to fix `pos`
+to be type-stable: if you did so, all of the variables in `f` would be concrete, and its performance
+would be optimal. However, there are circumstances where this kind of *ephemeral* type instability
+might not matter too much: for example, if `pos` is never used in isolation, the fact that `f`'s
+output is type-stable (for [`Float64`](@ref) inputs) will shield later code from the propagating
+effects of type instability. This is particularly relevant in cases where fixing the type instability
+is difficult or impossible. In such cases, the tips above (e.g., adding type annotations and/or
+breaking up functions) are your best tools to contain the "damage" from type instability.
+Also, note that even Julia Base has functions that are type unstable.
+For example, the function [`findfirst`](@ref) returns the index into an array where a key is found,
+or `nothing` if it is not found, a clear type instability. In order to make it easier to find the
+type instabilities that are likely to be important, `Union`s containing either `missing` or `nothing`
+are color highlighted in yellow, instead of red.
 
-julia> array3(5.0, Val(2))
-3×3 Matrix{Float64}:
- 5.0  5.0  5.0
- 5.0  5.0  5.0
- 5.0  5.0  5.0
-```
+The following examples may help you interpret expressions marked as containing non-concrete types:
 
-Julia has a specialized version of `ntuple` that accepts a `Val{::Int}` instance as the second
-parameter; by passing `N` as a type-parameter, you make its "value" known to the compiler.
-Consequently, this version of `array3` allows the compiler to predict the return type.
+  * Function body starting with `Body::Union{T1,T2})`
+      * Interpretation: function with unstable return type
+      * Suggestion: make the return value type-stable, even if you have to annotate it
 
-However, making use of such techniques can be surprisingly subtle. For example, it would be of
-no help if you called `array3` from a function like this:
+  * `invoke Main.g(%%x::Int64)::Union{Float64, Int64}`
+      * Interpretation: call to a type-unstable function `g`.
+      * Suggestion: fix the function, or if necessary annotate the return value
 
-```julia
-function call_array3(fillval, n)
-    A = array3(fillval, Val(n))
-end
-```
+  * `invoke Base.getindex(%%x::Array{Any,1}, 1::Int64)::Any`
+      * Interpretation: accessing elements of poorly-typed arrays
+      * Suggestion: use arrays with better-defined types, or if necessary annotate the type of individual
+        element accesses
 
-Here, you've created the same problem all over again: the compiler can't guess what `n` is,
-so it doesn't know the *type* of `Val(n)`. Attempting to use `Val`, but doing so incorrectly, can
-easily make performance *worse* in many situations. (Only in situations where you're effectively
-combining `Val` with the function-barrier trick, to make the kernel function more efficient, should
-code like the above be used.)
+  * `Base.getfield(%%x, :(:data))::Array{Float64,N} where N`
+      * Interpretation: getting a field that is of non-concrete type. In this case, the type of `x`, say `ArrayContainer`, had a
+        field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
+      * Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
+        of `ArrayContainer`
 
-An example of correct usage of `Val` would be:
+### [Performance of captured variable](@id man-performance-captured)
 
+Consider the following example that defines an inner function:
 ```julia
-function filter3(A::AbstractArray{T,N}) where {T,N}
-    kernel = array3(1, Val(N))
-    filter(A, kernel)
+function abmult(r::Int)
+    if r < 0
+        r = -r
+    end
+    f = x -> x * r
+    return f
 end
 ```
 
-In this example, `N` is passed as a parameter, so its "value" is known to the compiler. Essentially,
-`Val(T)` works only when `T` is either hard-coded/literal (`Val(3)`) or already specified in the
-type-domain.
-
-## The dangers of abusing multiple dispatch (aka, more on types with values-as-parameters)
-
-Once one learns to appreciate multiple dispatch, there's an understandable tendency to go overboard
-and try to use it for everything. For example, you might imagine using it to store information,
-e.g.
+Function `abmult` returns a function `f` that multiplies its argument by
+the absolute value of `r`. The inner function assigned to `f` is called a
+"closure". Inner functions are also used by the
+language for `do`-blocks and for generator expressions.
 
-```
-struct Car{Make, Model}
-    year::Int
-    ...more fields...
-end
-```
+This style of code presents performance challenges for the language.
+The parser, when translating it into lower-level instructions,
+substantially reorganizes the above code by extracting the
+inner function to a separate code block.  "Captured" variables such as `r`
+that are shared by inner functions and their enclosing scope are
+also extracted into a heap-allocated "box" accessible to both inner and
+outer functions because the language specifies that `r` in the
+inner scope must be identical to `r` in the outer scope even after the
+outer scope (or another inner function) modifies `r`.
 
-and then dispatch on objects like `Car{:Honda,:Accord}(year, args...)`.
+The discussion in the preceding paragraph referred to the "parser", that is, the phase
+of compilation that takes place when the module containing `abmult` is first loaded,
+as opposed to the later phase when it is first invoked. The parser does not "know" that
+`Int` is a fixed type, or that the statement `r = -r` transforms an `Int` to another `Int`.
+The magic of type inference takes place in the later phase of compilation.
 
-This might be worthwhile when either of the following are true:
+Thus, the parser does not know that `r` has a fixed type (`Int`).
+Nor that `r` does not change value once the inner function is created (so that
+the box is unneeded). Therefore, the parser emits code for
+box that holds an object with an abstract type such as `Any`, which
+requires run-time type dispatch for each occurrence of `r`. This can be
+verified by applying `@code_warntype` to the above function. Both the boxing
+and the run-time type dispatch can cause loss of performance.
+
+If captured variables are used in a performance-critical section of the code,
+then the following tips help ensure that their use is performant. First, if
+it is known that a captured variable does not change its type, then this can
+be declared explicitly with a type annotation (on the variable, not the
+right-hand side):
+```julia
+function abmult2(r0::Int)
+    r::Int = r0
+    if r < 0
+        r = -r
+    end
+    f = x -> x * r
+    return f
+end
+```
+The type annotation partially recovers lost performance due to capturing because
+the parser can associate a concrete type to the object in the box.
+Going further, if the captured variable does not need to be boxed at all (because it
+will not be reassigned after the closure is created), this can be indicated
+with `let` blocks as follows.
+```julia
+function abmult3(r::Int)
+    if r < 0
+        r = -r
+    end
+    f = let r = r
+            x -> x * r
+    end
+    return f
+end
+```
+The `let` block creates a new variable `r` whose scope is only the
+inner function. The second technique recovers full language performance
+in the presence of captured variables. Note that this is a rapidly
+evolving aspect of the compiler, and it is likely that future releases
+will not require this degree of programmer annotation to attain performance.
+In the mean time, some user-contributed packages like
+[FastClosures](https://github.com/c42f/FastClosures.jl) automate the
+insertion of `let` statements as in `abmult3`.
+
+#### Use `@__FUNCTION__` for recursive closures
+
+For recursive closures specifically, the [`@__FUNCTION__`](@ref) macro can avoid both type instability and boxing.
+
+First, let's see the unoptimized version:
+
+```julia
+function make_fib_unoptimized()
+    fib(n) = n <= 1 ? 1 : fib(n - 1) + fib(n - 2)  # fib is boxed
+    return fib
+end
+```
+
+The `fib` function is boxed, meaning the return type is inferred as `Any`:
+
+```julia
+@code_warntype make_fib_unoptimized()
+```
+
+Now, to eliminate this type instability, we can instead use `@__FUNCTION__` to refer to the concrete function object:
+
+```julia
+function make_fib_optimized()
+    fib(n) = n <= 1 ? 1 : (@__FUNCTION__)(n - 1) + (@__FUNCTION__)(n - 2)
+    return fib
+end
+```
+
+This gives us a concrete return type:
+
+```julia
+@code_warntype make_fib_optimized()
+```
+
+
+### [Types with values-as-parameters](@id man-performance-value-type)
+
+Let's say you want to create an `N`-dimensional array that has size 3 along each axis. Such arrays
+can be created like this:
+
+```jldoctest
+julia> A = fill(5.0, (3, 3))
+3×3 Matrix{Float64}:
+ 5.0  5.0  5.0
+ 5.0  5.0  5.0
+ 5.0  5.0  5.0
+```
+
+This approach works very well: the compiler can figure out that `A` is an `Array{Float64,2}` because
+it knows the type of the fill value (`5.0::Float64`) and the dimensionality (`(3, 3)::NTuple{2,Int}`).
+This implies that the compiler can generate very efficient code for any future usage of `A` in
+the same function.
+
+But now let's say you want to write a function that creates a 3×3×... array in arbitrary dimensions;
+you might be tempted to write a function
+
+```jldoctest
+julia> function array3(fillval, N)
+           fill(fillval, ntuple(d->3, N))
+       end
+array3 (generic function with 1 method)
+
+julia> array3(5.0, 2)
+3×3 Matrix{Float64}:
+ 5.0  5.0  5.0
+ 5.0  5.0  5.0
+ 5.0  5.0  5.0
+```
+
+This works, but (as you can verify for yourself using `@code_warntype array3(5.0, 2)`) the problem
+is that the output type cannot be inferred: the argument `N` is a *value* of type `Int`, and type-inference
+does not (and cannot) predict its value in advance. This means that code using the output of this
+function has to be conservative, checking the type on each access of `A`; such code will be very
+slow.
+
+Now, one very good way to solve such problems is by using the [function-barrier technique](@ref kernel-functions).
+However, in some cases you might want to eliminate the type-instability altogether. In such cases,
+one approach is to pass the dimensionality as a parameter, for example through `Val{T}()` (see
+["Value types"](@ref)):
+
+```jldoctest
+julia> function array3(fillval, ::Val{N}) where N
+           fill(fillval, ntuple(d->3, Val(N)))
+       end
+array3 (generic function with 1 method)
+
+julia> array3(5.0, Val(2))
+3×3 Matrix{Float64}:
+ 5.0  5.0  5.0
+ 5.0  5.0  5.0
+ 5.0  5.0  5.0
+```
+
+Julia has a specialized version of `ntuple` that accepts a `Val{::Int}` instance as the second
+parameter; by passing `N` as a type-parameter, you make its "value" known to the compiler.
+Consequently, this version of `array3` allows the compiler to predict the return type.
+
+However, making use of such techniques can be surprisingly subtle. For example, it would be of
+no help if you called `array3` from a function like this:
+
+```julia
+function call_array3(fillval, n)
+    A = array3(fillval, Val(n))
+end
+```
+
+Here, you've created the same problem all over again: the compiler can't guess what `n` is,
+so it doesn't know the *type* of `Val(n)`. Attempting to use `Val`, but doing so incorrectly, can
+easily make performance *worse* in many situations. (Only in situations where you're effectively
+combining `Val` with the function-barrier trick, to make the kernel function more efficient, should
+code like the above be used.)
+
+An example of correct usage of `Val` would be:
+
+```julia
+function filter3(A::AbstractArray{T,N}) where {T,N}
+    kernel = array3(1, Val(N))
+    filter(A, kernel)
+end
+```
+
+In this example, `N` is passed as a parameter, so its "value" is known to the compiler. Essentially,
+`Val(T)` works only when `T` is either hard-coded/literal (`Val(3)`) or already specified in the
+type-domain.
+
+### The dangers of abusing multiple dispatch (aka, more on types with values-as-parameters)
+
+Once one learns to appreciate multiple dispatch, there's an understandable tendency to go overboard
+and try to use it for everything. For example, you might imagine using it to store information,
+e.g.
+
+```
+struct Car{Make, Model}
+    year::Int
+    ...more fields...
+end
+```
+
+and then dispatch on objects like `Car{:Honda,:Accord}(year, args...)`.
+
+This might be worthwhile when either of the following are true:
 
   * You require CPU-intensive processing on each `Car`, and it becomes vastly more efficient if you
     know the `Make` and `Model` at compile time and the total number of different `Make` or `Model`
@@ -869,7 +1081,7 @@ JIT-compilation machinery to basically execute the equivalent of a switch statem
 lookup in your own code.
 
 Some run-time benchmarks comparing (1) type dispatch, (2) dictionary lookup, and (3) a "switch"
-statement can be found [on the mailing list](https://groups.google.com/forum/#!msg/julia-users/jUMu9A3QKQQ/qjgVWr7vAwAJ).
+statement can be found [on discourse](https://discourse.julialang.org/t/suggestion-updating-the-dispatch-vs-dict-switch-benchmark-link-in-the-manual/132159).
 
 Perhaps even worse than the run-time impact is the compile-time impact: Julia will compile specialized
 functions for each different `Car{Make, Model}`; if you have hundreds or thousands of such types,
@@ -879,98 +1091,9 @@ or thousands of variants compiled for it. Each of these increases the size of th
 code, the length of internal lists of methods, etc. Excess enthusiasm for values-as-parameters
 can easily waste enormous resources.
 
-## [Access arrays in memory order, along columns](@id man-performance-column-major)
-
-Multidimensional arrays in Julia are stored in column-major order. This means that arrays are
-stacked one column at a time. This can be verified using the `vec` function or the syntax `[:]`
-as shown below (notice that the array is ordered `[1 3 2 4]`, not `[1 2 3 4]`):
-
-```jldoctest
-julia> x = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> x[:]
-4-element Vector{Int64}:
- 1
- 3
- 2
- 4
-```
-
-This convention for ordering arrays is common in many languages like Fortran, Matlab, and R (to
-name a few). The alternative to column-major ordering is row-major ordering, which is the convention
-adopted by C and Python (`numpy`) among other languages. Remembering the ordering of arrays can
-have significant performance effects when looping over arrays. A rule of thumb to keep in mind
-is that with column-major arrays, the first index changes most rapidly. Essentially this means
-that looping will be faster if the inner-most loop index is the first to appear in a slice expression.
-Keep in mind that indexing an array with `:` is an implicit loop that iteratively accesses all elements within a particular dimension; it can be faster to extract columns than rows, for example.
-
-Consider the following contrived example. Imagine we wanted to write a function that accepts a
-[`Vector`](@ref) and returns a square [`Matrix`](@ref) with either the rows or the columns filled with copies
-of the input vector. Assume that it is not important whether rows or columns are filled with these
-copies (perhaps the rest of the code can be easily adapted accordingly). We could conceivably
-do this in at least four ways (in addition to the recommended call to the built-in [`repeat`](@ref)):
-
-```julia
-function copy_cols(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for i = inds
-        out[:, i] = x
-    end
-    return out
-end
-
-function copy_rows(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for i = inds
-        out[i, :] = x
-    end
-    return out
-end
-
-function copy_col_row(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for col = inds, row = inds
-        out[row, col] = x[row]
-    end
-    return out
-end
-
-function copy_row_col(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for row = inds, col = inds
-        out[row, col] = x[col]
-    end
-    return out
-end
-```
-
-Now we will time each of these functions using the same random `10000` by `1` input vector:
+## Memory management and arrays
 
-```julia-repl
-julia> x = randn(10000);
-
-julia> fmt(f) = println(rpad(string(f)*": ", 14, ' '), @elapsed f(x))
-
-julia> map(fmt, [copy_cols, copy_rows, copy_col_row, copy_row_col]);
-copy_cols:    0.331706323
-copy_rows:    1.799009911
-copy_col_row: 0.415630047
-copy_row_col: 1.721531501
-```
-
-Notice that `copy_cols` is much faster than `copy_rows`. This is expected because `copy_cols`
-respects the column-based memory layout of the `Matrix` and fills it one column at a time. Additionally,
-`copy_col_row` is much faster than `copy_row_col` because it follows our rule of thumb that the
-first element to appear in a slice expression should be coupled with the inner-most loop.
-
-## Pre-allocating outputs
+### Pre-allocate outputs
 
 If your function returns an `Array` or some other complex type, it may have to allocate memory.
 Unfortunately, oftentimes allocation and its converse, garbage collection, are substantial bottlenecks.
@@ -980,12 +1103,12 @@ the output. As a trivial example, compare
 
 ```jldoctest prealloc
 julia> function xinc(x)
-           return [x, x+1, x+2]
+           return [x + i for i  in 1:3000]
        end;
 
 julia> function loopinc()
            y = 0
-           for i = 1:10^7
+           for i = 1:10^5
                ret = xinc(i)
                y += ret[2]
            end
@@ -997,16 +1120,16 @@ with
 
 ```jldoctest prealloc
 julia> function xinc!(ret::AbstractVector{T}, x::T) where T
-           ret[1] = x
-           ret[2] = x+1
-           ret[3] = x+2
+           for i in 1:3000
+               ret[i] = x+i
+           end
            nothing
        end;
 
 julia> function loopinc_prealloc()
-           ret = Vector{Int}(undef, 3)
+           ret = Vector{Int}(undef, 3000)
            y = 0
-           for i = 1:10^7
+           for i = 1:10^5
                xinc!(ret, i)
                y += ret[2]
            end
@@ -1018,12 +1141,12 @@ Timing results:
 
 ```jldoctest prealloc; filter = r"[0-9\.]+ seconds \(.*?\)"
 julia> @time loopinc()
-  0.529894 seconds (40.00 M allocations: 1.490 GiB, 12.14% gc time)
-50000015000000
+  0.297454 seconds (200.00 k allocations: 2.239 GiB, 39.80% gc time)
+5000250000
 
 julia> @time loopinc_prealloc()
-  0.030850 seconds (6 allocations: 288 bytes)
-50000015000000
+  0.009410 seconds (2 allocations: 23.477 KiB)
+5000250000
 ```
 
 Preallocation has other advantages, for example by allowing the caller to control the "output"
@@ -1035,7 +1158,55 @@ some judgment may be required. However, for "vectorized" (element-wise) function
 syntax `x .= f.(y)` can be used for in-place operations with fused loops and no temporary arrays
 (see the [dot syntax for vectorizing functions](@ref man-vectorized)).
 
-## More dots: Fuse vectorized operations
+### [Consider using views for slices](@id man-performance-views)
+
+In Julia, an array "slice" expression like `array[1:5, :]` creates
+a copy of that data (except on the left-hand side of an assignment,
+where `array[1:5, :] = ...` assigns in-place to that portion of `array`).
+If you are doing many operations on the slice, this can be good for
+performance because it is more efficient to work with a smaller
+contiguous copy than it would be to index into the original array.
+On the other hand, if you are just doing a few simple operations on
+the slice, the cost of the allocation and copy operations can be
+substantial.
+
+An alternative is to create a "view" of the array, which is
+an array object (a `SubArray`) that actually references the data
+of the original array in-place, without making a copy. (If you
+write to a view, it modifies the original array's data as well.)
+This can be done for individual slices by calling [`view`](@ref),
+or more simply for a whole expression or block of code by putting
+[`@views`](@ref) in front of that expression. For example:
+
+```jldoctest; filter = r"[0-9\.]+ seconds \(.*?\)"
+julia> fcopy(x) = sum(x[2:end-1]);
+
+julia> @views fview(x) = sum(x[2:end-1]);
+
+julia> x = rand(10^6);
+
+julia> @time fcopy(x);
+  0.003051 seconds (3 allocations: 7.629 MB)
+
+julia> @time fview(x);
+  0.001020 seconds (1 allocation: 16 bytes)
+```
+
+Notice both the 3× speedup and the decreased memory allocation
+of the `fview` version of the function.
+
+### Consider StaticArrays.jl for small fixed-size vector/matrix operations
+
+If your application involves many small (`< 100` element) arrays of fixed sizes (i.e. the size is
+known prior to execution), then you might want to consider using the [StaticArrays.jl package](https://github.com/JuliaArrays/StaticArrays.jl).
+This package allows you to represent such arrays in a way that avoids unnecessary heap allocations and allows the compiler to
+specialize code for the *size* of the array, e.g. by completely unrolling vector operations (eliminating the loops) and storing elements in CPU registers.
+
+For example, if you are doing computations with 2d geometries, you might have many computations with 2-component vectors. By
+using the `SVector` type from StaticArrays.jl, you can use convenient vector notation and operations like `norm(3v - w)` on
+vectors `v` and `w`, while allowing the compiler to unroll the code to a minimal computation equivalent to `@inbounds hypot(3v[1]-w[1], 3v[2]-w[2])`.
+
+### More dots: Fuse vectorized operations
 
 Julia has a special [dot syntax](@ref man-vectorized) that converts
 any scalar function into a "vectorized" function call, and any operator
@@ -1081,51 +1252,132 @@ a new temporary array and executes in a separate loop. In this example
 convenient to sprinkle some dots in your expressions than to
 define a separate function for each vectorized operation.
 
-## [Consider using views for slices](@id man-performance-views)
+### [Fewer dots: Unfuse certain intermediate broadcasts](@id man-performance-unfuse)
 
-In Julia, an array "slice" expression like `array[1:5, :]` creates
-a copy of that data (except on the left-hand side of an assignment,
-where `array[1:5, :] = ...` assigns in-place to that portion of `array`).
-If you are doing many operations on the slice, this can be good for
-performance because it is more efficient to work with a smaller
-contiguous copy than it would be to index into the original array.
-On the other hand, if you are just doing a few simple operations on
-the slice, the cost of the allocation and copy operations can be
-substantial.
+The dot loop fusion mentioned above enables concise and idiomatic code to express highly performant operations. However, it is important to remember that the fused operation will be computed at every iteration of the broadcast. This means that in some situations, particularly in the presence of composed or multidimensional broadcasts, an expression with dot calls may be computing a function more times than intended. As an example, say we want to build a random matrix whose rows have Euclidean norm one. We might write something like the following:
+```
+julia> x = rand(1000, 1000);
 
-An alternative is to create a "view" of the array, which is
-an array object (a `SubArray`) that actually references the data
-of the original array in-place, without making a copy. (If you
-write to a view, it modifies the original array's data as well.)
-This can be done for individual slices by calling [`view`](@ref),
-or more simply for a whole expression or block of code by putting
-[`@views`](@ref) in front of that expression. For example:
+julia> d = sum(abs2, x; dims=2);
 
-```jldoctest; filter = r"[0-9\.]+ seconds \(.*?\)"
-julia> fcopy(x) = sum(x[2:end-1]);
+julia> @time x ./= sqrt.(d);
+  0.002049 seconds (4 allocations: 96 bytes)
+```
+This will work. However, this expression will actually recompute `sqrt(d[i])` for *every* element in the row `x[i, :]`, meaning that many more square roots are computed than necessary. To see precisely over which indices the broadcast will iterate, we can call `Broadcast.combine_axes` on the arguments of the fused expression. This will return a tuple of ranges whose entries correspond to the axes of iteration; the product of lengths of these ranges will be the total number of calls to the fused operation.
 
-julia> @views fview(x) = sum(x[2:end-1]);
+It follows that when some components of the broadcast expression are constant along an axis—like the `sqrt` along the second dimension in the preceding example—there is potential for a performance improvement by forcibly "unfusing" those components, i.e. allocating the result of the broadcasted operation in advance and reusing the cached value along its constant axis. Some such potential approaches are to use temporary variables, wrap components of a dot expression in `identity`, or use an equivalent intrinsically vectorized (but non-fused) function.
+```
+julia> @time let s = sqrt.(d); x ./= s end;
+  0.000809 seconds (5 allocations: 8.031 KiB)
 
-julia> x = rand(10^6);
+julia> @time x ./= identity(sqrt.(d));
+  0.000608 seconds (5 allocations: 8.031 KiB)
 
-julia> @time fcopy(x);
-  0.003051 seconds (3 allocations: 7.629 MB)
+julia> @time x ./= map(sqrt, d);
+  0.000611 seconds (4 allocations: 8.016 KiB)
+```
 
-julia> @time fview(x);
-  0.001020 seconds (1 allocation: 16 bytes)
+Any of these options yields approximately a three-fold speedup at the cost of an allocation; for large broadcastables this speedup can be asymptotically very large.
+
+### [Access arrays in memory order, along columns](@id man-performance-column-major)
+
+Multidimensional arrays in Julia are stored in column-major order. This means that arrays are
+stacked one column at a time. This can be verified using the `vec` function or the syntax `[:]`
+as shown below (notice that the array is ordered `[1 3 2 4]`, not `[1 2 3 4]`):
+
+```jldoctest
+julia> x = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> x[:]
+4-element Vector{Int64}:
+ 1
+ 3
+ 2
+ 4
 ```
 
-Notice both the 3× speedup and the decreased memory allocation
-of the `fview` version of the function.
+This convention for ordering arrays is common in many languages like Fortran, Matlab, and R (to
+name a few). The alternative to column-major ordering is row-major ordering, which is the convention
+adopted by C and Python (`numpy`) among other languages. Remembering the ordering of arrays can
+have significant performance effects when looping over arrays. A rule of thumb to keep in mind
+is that with column-major arrays, the first index changes most rapidly. Essentially this means
+that looping will be faster if the inner-most loop index is the first to appear in a slice expression.
+Keep in mind that indexing an array with `:` is an implicit loop that iteratively accesses all elements within a particular dimension; it can be faster to extract columns than rows, for example.
+
+Consider the following contrived example. Imagine we wanted to write a function that accepts a
+[`Vector`](@ref) and returns a square [`Matrix`](@ref) with either the rows or the columns filled with copies
+of the input vector. Assume that it is not important whether rows or columns are filled with these
+copies (perhaps the rest of the code can be easily adapted accordingly). We could conceivably
+do this in at least four ways (in addition to the recommended call to the built-in [`repeat`](@ref)):
+
+```julia
+function copy_cols(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for i = inds
+        out[:, i] = x
+    end
+    return out
+end
+
+function copy_rows(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for i = inds
+        out[i, :] = x
+    end
+    return out
+end
 
-## Copying data is not always bad
+function copy_col_row(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for col = inds, row = inds
+        out[row, col] = x[row]
+    end
+    return out
+end
+
+function copy_row_col(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for row = inds, col = inds
+        out[row, col] = x[col]
+    end
+    return out
+end
+```
+
+Now we will time each of these functions using the same random `10000` by `1` input vector:
+
+```julia-repl
+julia> x = randn(10000);
+
+julia> fmt(f) = println(rpad(string(f)*": ", 14, ' '), @elapsed f(x))
+
+julia> map(fmt, [copy_cols, copy_rows, copy_col_row, copy_row_col]);
+copy_cols:    0.331706323
+copy_rows:    1.799009911
+copy_col_row: 0.415630047
+copy_row_col: 1.721531501
+```
+
+Notice that `copy_cols` is much faster than `copy_rows`. This is expected because `copy_cols`
+respects the column-based memory layout of the `Matrix` and fills it one column at a time. Additionally,
+`copy_col_row` is much faster than `copy_row_col` because it follows our rule of thumb that the
+first element to appear in a slice expression should be coupled with the inner-most loop.
+
+### Copying data is not always bad
 
 Arrays are stored contiguously in memory, lending themselves to CPU vectorization
 and fewer memory accesses due to caching. These are the same reasons that it is recommended
 to access arrays in column-major order (see above). Irregular access patterns and non-contiguous
 views can drastically slow down computations on arrays because of non-sequential memory access.
 
-Copying irregularly-accessed data into a contiguous array before repeated access it can result
+Copying irregularly-accessed data into a contiguous array before repeatedly accessing it can result
 in a large speedup, such as in the example below. Here, a matrix is being accessed at
 randomly-shuffled indices before being multiplied. Copying into plain arrays speeds up the
 multiplication even with the added cost of copying and allocation.
@@ -1158,85 +1410,188 @@ julia> @time iterated_neural_network(A[inds, inds], x, 10)
 Provided there is enough memory, the cost of copying the view to an array is outweighed
 by the speed boost from doing the repeated matrix multiplications on a contiguous array.
 
-## Consider StaticArrays.jl for small fixed-size vector/matrix operations
+### [Multithreading and linear algebra](@id man-multithreading-linear-algebra)
 
-If your application involves many small (`< 100` element) arrays of fixed sizes (i.e. the size is
-known prior to execution), then you might want to consider using the [StaticArrays.jl package](https://github.com/JuliaArrays/StaticArrays.jl).
-This package allows you to represent such arrays in a way that avoids unnecessary heap allocations and allows the compiler to
-specialize code for the *size* of the array, e.g. by completely unrolling vector operations (eliminating the loops) and storing elements in CPU registers.
+This section applies to multithreaded Julia code which, in each thread, performs linear algebra operations.
+Indeed, these linear algebra operations involve BLAS / LAPACK calls, which are themselves multithreaded.
+In this case, one must ensure that cores aren't oversubscribed due to the two different types of multithreading.
+
+Julia compiles and uses its own copy of OpenBLAS for linear algebra, whose number of threads is controlled by the environment variable `OPENBLAS_NUM_THREADS`.
+It can either be set as a command line option when launching Julia, or modified during the Julia session with `BLAS.set_num_threads(N)` (the submodule `BLAS` is exported by `using LinearAlgebra`).
+Its current value can be accessed with `BLAS.get_num_threads()`.
+
+When the user does not specify anything, Julia tries to choose a reasonable value for the number of OpenBLAS threads (e.g. based on the platform, the Julia version, etc.).
+However, it is generally recommended to check and set the value manually.
+The OpenBLAS behavior is as follows:
+
+* If `OPENBLAS_NUM_THREADS=1`, OpenBLAS uses the calling Julia thread(s), i.e. it "lives in" the Julia thread that runs the computation.
+* If `OPENBLAS_NUM_THREADS=N>1`, OpenBLAS creates and manages its own pool of threads (`N` in total). There is just one OpenBLAS thread pool shared among all Julia threads.
+
+When you start Julia in multithreaded mode with `JULIA_NUM_THREADS=X`, it is generally recommended to set `OPENBLAS_NUM_THREADS=1`.
+Given the behavior described above, increasing the number of BLAS threads to `N>1` can very easily lead to worse performance, in particular when `N<<X`.
+However this is just a rule of thumb, and the best way to set each number of threads is to experiment on your specific application.
+
+### [Alternative linear algebra backends](@id man-backends-linear-algebra)
+
+As an alternative to OpenBLAS, there exist several other backends that can help with linear algebra performance.
+Prominent examples include [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl) and [AppleAccelerate.jl](https://github.com/JuliaMath/AppleAccelerate.jl).
+
+These are external packages, so we will not discuss them in detail here.
+Please refer to their respective documentations (especially because they have different behaviors than OpenBLAS with respect to multithreading).
+
+## Execution latency, package loading and package precompiling time
+
+### Reducing time to first plot etc.
+
+The first time a julia method is called it (and any methods it calls, or ones that can be statically determined) will be
+compiled. The [`@time`](@ref) macro family illustrates this.
+
+```
+julia> foo() = rand(2,2) * rand(2,2)
+foo (generic function with 1 method)
+
+julia> @time @eval foo();
+  0.252395 seconds (1.12 M allocations: 56.178 MiB, 2.93% gc time, 98.12% compilation time)
+
+julia> @time @eval foo();
+  0.000156 seconds (63 allocations: 2.453 KiB)
+```
+
+Note that `@time @eval` is better for measuring compilation time because without [`@eval`](@ref), some compilation may
+already be done before timing starts.
+
+When developing a package, you may be able to improve the experience of your users with *precompilation*
+so that when they use the package, the code they use is already compiled. To precompile package code effectively, it's
+recommended to use [`PrecompileTools.jl`](https://julialang.github.io/PrecompileTools.jl/stable/) to run a
+"precompile workload" during precompilation time that is representative of typical package usage, which will cache the
+native compiled code into the package `pkgimage` cache, greatly reducing "time to first execution" (often referred to as
+TTFX) for such usage.
+
+Note that [`PrecompileTools.jl`](https://julialang.github.io/PrecompileTools.jl/stable/) workloads can be
+disabled and sometimes configured via Preferences if you do not want to spend the extra time precompiling, which
+may be the case during development of a package.
+
+### Reducing package loading time
+
+Keeping the time taken to load the package down is usually helpful.
+General good practice for package developers includes:
 
-For example, if you are doing computations with 2d geometries, you might have many computations with 2-component vectors.  By
-using the `SVector` type from StaticArrays.jl, you can use convenient vector notation and operations like `norm(3v - w)` on
-vectors `v` and `w`, while allowing the compiler to unroll the code to a minimal computation equivalent to `@inbounds hypot(3v[1]-w[1], 3v[2]-w[2])`.
+1. Reduce your dependencies to those you really need. Consider using [package extensions](@ref) to support interoperability with other packages without bloating your essential dependencies.
+3. Avoid use of [`__init__()`](@ref) functions unless there is no alternative, especially those which might trigger a lot
+   of compilation, or just take a long time to execute.
+4. Where possible, fix [invalidations](https://julialang.org/blog/2020/08/invalidations/) among your dependencies and from your package code.
 
-## Avoid string interpolation for I/O
+The tool [`@time_imports`](@ref) can be useful in the REPL to review the above factors.
 
-When writing data to a file (or other I/O device), forming extra intermediate strings is a source
-of overhead. Instead of:
+```julia-repl
+julia> @time @time_imports using Plots
+      0.5 ms  Printf
+     16.4 ms  Dates
+      0.7 ms  Statistics
+               ┌ 23.8 ms SuiteSparse_jll.__init__() 86.11% compilation time (100% recompilation)
+     90.1 ms  SuiteSparse_jll 91.57% compilation time (82% recompilation)
+      0.9 ms  Serialization
+               ┌ 39.8 ms SparseArrays.CHOLMOD.__init__() 99.47% compilation time (100% recompilation)
+    166.9 ms  SparseArrays 23.74% compilation time (100% recompilation)
+      0.4 ms  Statistics → SparseArraysExt
+      0.5 ms  TOML
+      8.0 ms  Preferences
+      0.3 ms  PrecompileTools
+      0.2 ms  Reexport
+... many deps omitted for example ...
+      1.4 ms  Tar
+               ┌ 73.8 ms p7zip_jll.__init__() 99.93% compilation time (100% recompilation)
+     79.4 ms  p7zip_jll 92.91% compilation time (100% recompilation)
+               ┌ 27.7 ms GR.GRPreferences.__init__() 99.77% compilation time (100% recompilation)
+     43.0 ms  GR 64.26% compilation time (100% recompilation)
+               ┌ 2.1 ms Plots.__init__() 91.80% compilation time (100% recompilation)
+    300.9 ms  Plots 0.65% compilation time (100% recompilation)
+  1.795602 seconds (3.33 M allocations: 190.153 MiB, 7.91% gc time, 39.45% compilation time: 97% of which was recompilation)
 
-```julia
-println(file, "$a $b")
 ```
 
-use:
+Notice that in this example there are multiple packages loaded, some with `__init__()` functions, some of which cause
+compilation of which some is recompilation. Recompilation is caused by earlier packages invalidating methods, then in
+these cases when the following packages run their `__init__()` function some hit recompilation before the code can be run.
 
-```julia
-println(file, a, " ", b)
-```
+Further, note the `Statistics` extension `SparseArraysExt` has been activated because `SparseArrays` is in the dependency
+tree. i.e. see `0.4 ms  Statistics → SparseArraysExt`.
 
-The first version of the code forms a string, then writes it to the file, while the second version
-writes values directly to the file. Also notice that in some cases string interpolation can be
-harder to read. Consider:
+This report gives a good opportunity to review whether the cost of dependency load time is worth the functionality it brings.
+Also the `Pkg` utility `why` can be used to report why an indirect dependency exists.
 
-```julia
-println(file, "$(f(a))$(f(b))")
+```
+(CustomPackage) pkg> why FFMPEG_jll
+  Plots → FFMPEG → FFMPEG_jll
+  Plots → GR → GR_jll → FFMPEG_jll
 ```
 
-versus:
+or to see the indirect dependencies that a package brings in, you can `pkg> rm` the package, see the deps that are removed
+from the manifest, then revert the change with `pkg> undo`.
 
-```julia
-println(file, f(a), f(b))
+If loading time is dominated by slow `__init__()` methods having compilation, one verbose way to identify what is being
+compiled is to use the julia args `--trace-compile=stderr --trace-compile-timing` which will report a [`precompile`](@ref)
+statement each time a method is compiled, along with how long compilation took. The InteractiveUtils macro
+[`@trace_compile`](@ref) provides a way to enable those args for a specific call. So a call for a complete report report would look like:
+
+```
+julia> @time @time_imports @trace_compile using CustomPackage
+...
 ```
 
-## Optimize network I/O during parallel execution
+Note the `--startup-file=no` which helps isolate the test from packages you may have in your `startup.jl`.
 
-When executing a remote function in parallel:
+More analysis of the reasons for recompilation can be achieved with the
+[`SnoopCompile`](https://github.com/timholy/SnoopCompile.jl) package.
 
-```julia
-using Distributed
+### Tracing expression evaluation
 
-responses = Vector{Any}(undef, nworkers())
-@sync begin
-    for (idx, pid) in enumerate(workers())
-        @async responses[idx] = remotecall_fetch(foo, pid, args...)
-    end
-end
+If you need to understand what code is being evaluated during test or script execution,
+you can use the `--trace-eval` command-line option or the [`Base.TRACE_EVAL`](@ref) global control to trace the outermost expressions being evaluated (top-level statements). Note this does not individually report the contents of function calls or code blocks:
+
+```bash
+# Show only location information during evaluation
+julia --trace-eval=loc script.jl
+
+# Show full expressions being evaluated
+julia --trace-eval=full script.jl
 ```
 
-is faster than:
+You can also control this programmatically:
 
 ```julia
-using Distributed
+# Enable full expression tracing
+Base.TRACE_EVAL = :full
 
-refs = Vector{Any}(undef, nworkers())
-for (idx, pid) in enumerate(workers())
-    refs[idx] = @spawnat pid foo(args...)
-end
-responses = [fetch(r) for r in refs]
+# Show only locations
+Base.TRACE_EVAL = :loc
+
+# Disable tracing
+Base.TRACE_EVAL = :no
+
+# Reset to use command-line setting
+Base.TRACE_EVAL = nothing
 ```
 
-The former results in a single network round-trip to every worker, while the latter results in
-two network calls - first by the [`@spawnat`](@ref) and the second due to the [`fetch`](@ref)
-(or even a [`wait`](@ref)).
-The [`fetch`](@ref)/[`wait`](@ref) is also being executed serially resulting in an overall poorer performance.
 
-## Fix deprecation warnings
+### Reducing precompilation time
 
-A deprecated function internally performs a lookup in order to print a relevant warning only once.
-This extra lookup can cause a significant slowdown, so all uses of deprecated functions should
-be modified as suggested by the warnings.
+If package precompilation is taking a long time, one option is to set the following internal and then precompile.
+```
+julia> Base.PRECOMPILE_TRACE_COMPILE[] = "stderr"
+
+pkg> precompile
+```
+
+This has the effect of setting `--trace-compile=stderr --trace-compile-timing` in the precompilation processes themselves,
+so will show which methods are precompiled and how long they took to precompile.
 
-## Tweaks
+There are also profiling options such as [using the external profiler Tracy to profile the precompilation process](@ref Profiling-package-precompilation-with-Tracy).
+
+
+## Miscellaneous
+
+### Tweaks
 
 These are some minor points that might help in tight inner loops.
 
@@ -1246,17 +1601,23 @@ These are some minor points that might help in tight inner loops.
   * Use [`div(x,y)`](@ref) for truncating division of integers instead of [`trunc(x/y)`](@ref), [`fld(x,y)`](@ref)
     instead of [`floor(x/y)`](@ref), and [`cld(x,y)`](@ref) instead of [`ceil(x/y)`](@ref).
 
-## [Performance Annotations](@id man-performance-annotations)
+### Fix deprecation warnings
+
+A deprecated function internally performs a lookup in order to print a relevant warning only once.
+This extra lookup can cause a significant slowdown, so all uses of deprecated functions should
+be modified as suggested by the warnings.
+
+### [Performance Annotations](@id man-performance-annotations)
 
 Sometimes you can enable better optimization by promising certain program properties.
 
   * Use [`@inbounds`](@ref) to eliminate array bounds checking within expressions. Be certain before doing
-    this. If the subscripts are ever out of bounds, you may suffer crashes or silent corruption.
+    this. If the indices are ever out of bounds, you may suffer crashes or silent corruption.
   * Use [`@fastmath`](@ref) to allow floating point optimizations that are correct for real numbers, but lead
     to differences for IEEE numbers. Be careful when doing this, as this may change numerical results.
     This corresponds to the `-ffast-math` option of clang.
   * Write [`@simd`](@ref) in front of `for` loops to promise that the iterations are independent and may be
-    reordered.  Note that in many cases, Julia can automatically vectorize code without the `@simd` macro;
+    reordered. Note that in many cases, Julia can automatically vectorize code without the `@simd` macro;
     it is only beneficial in cases where such a transformation would otherwise be illegal, including cases
     like allowing floating-point re-associativity and ignoring dependent memory accesses (`@simd ivdep`).
     Again, be very careful when asserting `@simd` as erroneously annotating a loop with dependent iterations
@@ -1279,7 +1640,7 @@ the optimizer from trying to be too clever and defeat our benchmark):
 ```julia
 @noinline function inner(x, y)
     s = zero(eltype(x))
-    for i=eachindex(x)
+    for i in eachindex(x, y)
         @inbounds s += x[i]*y[i]
     end
     return s
@@ -1287,7 +1648,7 @@ end
 
 @noinline function innersimd(x, y)
     s = zero(eltype(x))
-    @simd for i = eachindex(x)
+    @simd for i in eachindex(x, y)
         @inbounds s += x[i] * y[i]
     end
     return s
@@ -1404,7 +1765,7 @@ in generated code by using Julia's [`code_native`](@ref) function.
 
 Note that `@fastmath` also assumes that `NaN`s will not occur during the computation, which can lead to surprising behavior:
 
-```julia-repl
+```jldoctest
 julia> f(x) = isnan(x);
 
 julia> f(NaN)
@@ -1416,7 +1777,7 @@ julia> f_fast(NaN)
 false
 ```
 
-## Treat Subnormal Numbers as Zeros
+### Treat Subnormal Numbers as Zeros
 
 Subnormal numbers, formerly called [denormal numbers](https://en.wikipedia.org/wiki/Denormal_number),
 are useful in many contexts, but incur a performance penalty on some hardware. A call [`set_zero_subnormals(true)`](@ref)
@@ -1489,195 +1850,105 @@ In some applications, an alternative to zeroing subnormal numbers is to inject a
 a = rand(Float32,1000) * 1.f-9
 ```
 
-## [[`@code_warntype`](@ref)](@id man-code-warntype)
+### Avoid string interpolation for I/O
 
-The macro [`@code_warntype`](@ref) (or its function variant [`code_warntype`](@ref)) can sometimes
-be helpful in diagnosing type-related problems. Here's an example:
+When writing data to a file (or other I/O device), forming extra intermediate strings is a source
+of overhead. Instead of:
 
-```julia-repl
-julia> @noinline pos(x) = x < 0 ? 0 : x;
+```julia
+println(file, "$a $b")
+```
 
-julia> function f(x)
-           y = pos(x)
-           return sin(y*x + 1)
-       end;
+use:
 
-julia> @code_warntype f(3.2)
-MethodInstance for f(::Float64)
-  from f(x) @ Main REPL[9]:1
-Arguments
-  #self#::Core.Const(f)
-  x::Float64
-Locals
-  y::Union{Float64, Int64}
-Body::Float64
-1 ─      (y = Main.pos(x))
-│   %2 = (y * x)::Float64
-│   %3 = (%2 + 1)::Float64
-│   %4 = Main.sin(%3)::Float64
-└──      return %4
+```julia
+println(file, a, " ", b)
 ```
 
-Interpreting the output of [`@code_warntype`](@ref), like that of its cousins [`@code_lowered`](@ref),
-[`@code_typed`](@ref), [`@code_llvm`](@ref), and [`@code_native`](@ref), takes a little practice.
-Your code is being presented in form that has been heavily digested on its way to generating
-compiled machine code. Most of the expressions are annotated by a type, indicated by the `::T`
-(where `T` might be [`Float64`](@ref), for example). The most important characteristic of [`@code_warntype`](@ref)
-is that non-concrete types are displayed in red; since this document is written in Markdown, which has no color,
-in this document, red text is denoted by uppercase.
+The first version of the code forms a string, then writes it to the file, while the second version
+writes values directly to the file. Also notice that in some cases string interpolation can be
+harder to read. Consider:
 
-At the top, the inferred return type of the function is shown as `Body::Float64`.
-The next lines represent the body of `f` in Julia's SSA IR form.
-The numbered boxes are labels and represent targets for jumps (via `goto`) in your code.
-Looking at the body, you can see that the first thing that happens is that `pos` is called and the
-return value has been inferred as the `Union` type `Union{Float64, Int64}` shown in uppercase since
-it is a non-concrete type. This means that we cannot know the exact return type of `pos` based on the
-input types. However, the result of `y*x`is a `Float64` no matter if `y` is a `Float64` or `Int64`
-The net result is that `f(x::Float64)` will not be type-unstable
-in its output, even if some of the intermediate computations are type-unstable.
+```julia
+println(file, "$(f(a))$(f(b))")
+```
 
-How you use this information is up to you. Obviously, it would be far and away best to fix `pos`
-to be type-stable: if you did so, all of the variables in `f` would be concrete, and its performance
-would be optimal. However, there are circumstances where this kind of *ephemeral* type instability
-might not matter too much: for example, if `pos` is never used in isolation, the fact that `f`'s
-output is type-stable (for [`Float64`](@ref) inputs) will shield later code from the propagating
-effects of type instability. This is particularly relevant in cases where fixing the type instability
-is difficult or impossible. In such cases, the tips above (e.g., adding type annotations and/or
-breaking up functions) are your best tools to contain the "damage" from type instability.
-Also, note that even Julia Base has functions that are type unstable.
-For example, the function [`findfirst`](@ref) returns the index into an array where a key is found,
-or `nothing` if it is not found, a clear type instability. In order to make it easier to find the
-type instabilities that are likely to be important, `Union`s containing either `missing` or `nothing`
-are color highlighted in yellow, instead of red.
+versus:
 
-The following examples may help you interpret expressions marked as containing non-leaf types:
+```julia
+println(file, f(a), f(b))
+```
 
-  * Function body starting with `Body::Union{T1,T2})`
-      * Interpretation: function with unstable return type
-      * Suggestion: make the return value type-stable, even if you have to annotate it
+### Avoid eager string materialization
 
-  * `invoke Main.g(%%x::Int64)::Union{Float64, Int64}`
-      * Interpretation: call to a type-unstable function `g`.
-      * Suggestion: fix the function, or if necessary annotate the return value
+In settings where a string representation of an object is only needed
+conditionally (e.g. in error paths of functions or conditional warnings such as
+deprecations), it is advisable to avoid the overhead of eagerly materializing
+the string. Since Julia 1.8, this can be achieved via
+[`LazyString`](@ref) and the corresponding string macro [`@lazy_str`](@ref).
 
-  * `invoke Base.getindex(%%x::Array{Any,1}, 1::Int64)::Any`
-      * Interpretation: accessing elements of poorly-typed arrays
-      * Suggestion: use arrays with better-defined types, or if necessary annotate the type of individual
-        element accesses
+For example, instead of:
 
-  * `Base.getfield(%%x, :(:data))::Array{Float64,N} where N`
-      * Interpretation: getting a field that is of non-leaf type. In this case, the type of `x`, say `ArrayContainer`, had a
-        field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
-      * Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
-        of `ArrayContainer`
+```julia
+Base.depwarn("`foo` is deprecated for type $(typeof(x))", :bar)
+```
 
-## [Performance of captured variable](@id man-performance-captured)
+use:
 
-Consider the following example that defines an inner function:
 ```julia
-function abmult(r::Int)
-    if r < 0
-        r = -r
-    end
-    f = x -> x * r
-    return f
-end
+Base.depwarn(lazy"`foo` is deprecated for type $(typeof(x))", :bar)
 ```
 
-Function `abmult` returns a function `f` that multiplies its argument by
-the absolute value of `r`. The inner function assigned to `f` is called a
-"closure". Inner functions are also used by the
-language for `do`-blocks and for generator expressions.
+or the equivalent macro-free version:
 
-This style of code presents performance challenges for the language.
-The parser, when translating it into lower-level instructions,
-substantially reorganizes the above code by extracting the
-inner function to a separate code block.  "Captured" variables such as `r`
-that are shared by inner functions and their enclosing scope are
-also extracted into a heap-allocated "box" accessible to both inner and
-outer functions because the language specifies that `r` in the
-inner scope must be identical to `r` in the outer scope even after the
-outer scope (or another inner function) modifies `r`.
+```julia
+Base.depwarn(LazyString("`foo` is deprecated for type ", typeof(x)), :bar)
+```
 
-The discussion in the preceding paragraph referred to the "parser", that is, the phase
-of compilation that takes place when the module containing `abmult` is first loaded,
-as opposed to the later phase when it is first invoked. The parser does not "know" that
-`Int` is a fixed type, or that the statement `r = -r` transforms an `Int` to another `Int`.
-The magic of type inference takes place in the later phase of compilation.
+Through this approach, the interpolated string will only be constructed when it is actually displayed.
 
-Thus, the parser does not know that `r` has a fixed type (`Int`).
-nor that `r` does not change value once the inner function is created (so that
-the box is unneeded).  Therefore, the parser emits code for
-box that holds an object with an abstract type such as `Any`, which
-requires run-time type dispatch for each occurrence of `r`.  This can be
-verified by applying `@code_warntype` to the above function.  Both the boxing
-and the run-time type dispatch can cause loss of performance.
+### Optimize network I/O during parallel execution
+
+When executing a remote function in parallel:
 
-If captured variables are used in a performance-critical section of the code,
-then the following tips help ensure that their use is performant. First, if
-it is known that a captured variable does not change its type, then this can
-be declared explicitly with a type annotation (on the variable, not the
-right-hand side):
-```julia
-function abmult2(r0::Int)
-    r::Int = r0
-    if r < 0
-        r = -r
-    end
-    f = x -> x * r
-    return f
-end
-```
-The type annotation partially recovers lost performance due to capturing because
-the parser can associate a concrete type to the object in the box.
-Going further, if the captured variable does not need to be boxed at all (because it
-will not be reassigned after the closure is created), this can be indicated
-with `let` blocks as follows.
 ```julia
-function abmult3(r::Int)
-    if r < 0
-        r = -r
-    end
-    f = let r = r
-            x -> x * r
+using Distributed
+
+responses = Vector{Any}(undef, nworkers())
+@sync begin
+    for (idx, pid) in enumerate(workers())
+        Threads.@spawn responses[idx] = remotecall_fetch(foo, pid, args...)
     end
-    return f
 end
 ```
-The `let` block creates a new variable `r` whose scope is only the
-inner function. The second technique recovers full language performance
-in the presence of captured variables. Note that this is a rapidly
-evolving aspect of the compiler, and it is likely that future releases
-will not require this degree of programmer annotation to attain performance.
-In the mean time, some user-contributed packages like
-[FastClosures](https://github.com/c42f/FastClosures.jl) automate the
-insertion of `let` statements as in `abmult3`.
-
-## [Multithreading and linear algebra](@id man-multithreading-linear-algebra)
-
-This section applies to multithreaded Julia code which, in each thread, performs linear algebra operations.
-Indeed, these linear algebra operations involve BLAS / LAPACK calls, which are themselves multithreaded.
-In this case, one must ensure that cores aren't oversubscribed due to the two different types of multithreading.
-
-Julia compiles and uses its own copy of OpenBLAS for linear algebra, whose number of threads is controlled by the environment variable `OPENBLAS_NUM_THREADS`.
-It can either be set as a command line option when launching Julia, or modified during the Julia session with `BLAS.set_num_threads(N)` (the submodule `BLAS` is exported by `using LinearAlgebra`).
-Its current value can be accessed with `BLAS.get_num_threads()`.
-
-When the user does not specify anything, Julia tries to choose a reasonable value for the number of OpenBLAS threads (e.g. based on the platform, the Julia version, etc.).
-However, it is generally recommended to check and set the value manually.
-The OpenBLAS behavior is as follows:
 
-* If `OPENBLAS_NUM_THREADS=1`, OpenBLAS uses the calling Julia thread(s), i.e. it "lives in" the Julia thread that runs the computation.
-* If `OPENBLAS_NUM_THREADS=N>1`, OpenBLAS creates and manages its own pool of threads (`N` in total). There is just one OpenBLAS thread pool shared among all Julia threads.
+is faster than:
 
-When you start Julia in multithreaded mode with `JULIA_NUM_THREADS=X`, it is generally recommended to set `OPENBLAS_NUM_THREADS=1`.
-Given the behavior described above, increasing the number of BLAS threads to `N>1` can very easily lead to worse performance, in particular when `N<<X`.
-However this is just a rule of thumb, and the best way to set each number of threads is to experiment on your specific application.
+```julia
+using Distributed
 
-## [Alternative linear algebra backends](@id man-backends-linear-algebra)
+refs = Vector{Any}(undef, nworkers())
+for (idx, pid) in enumerate(workers())
+    refs[idx] = @spawnat pid foo(args...)
+end
+responses = [fetch(r) for r in refs]
+```
 
-As an alternative to OpenBLAS, there exist several other backends that can help with linear algebra performance.
-Prominent examples include [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl) and [AppleAccelerate.jl](https://github.com/JuliaMath/AppleAccelerate.jl).
+The former results in a single network round-trip to every worker, while the latter results in
+two network calls - first by the [`@spawnat`](@ref) and the second due to the [`fetch`](@ref)
+(or even a [`wait`](@ref)).
+The [`fetch`](@ref)/[`wait`](@ref) is also being executed serially resulting in an overall poorer performance.
 
-These are external packages, so we will not discuss them in detail here.
-Please refer to their respective documentations (especially because they have different behaviors than OpenBLAS with respect to multithreading).
+### [Use `MutableArithmetics` for more control over allocation for mutable arithmetic types](@id man-perftips-mutablearithmetics)
+
+Some [`Number`](@ref) subtypes, such as [`BigInt`](@ref) or [`BigFloat`](@ref), may
+be implemented as [`mutable struct`](@ref) types, or they may have mutable
+components. The arithmetic interfaces in Julia `Base` usually opt for convenience
+over efficiency in such cases, so using them in a naive manner may result in
+suboptimal performance. The abstractions of the
+[`MutableArithmetics`](https://juliahub.com/ui/Packages/General/MutableArithmetics)
+package, on the other hand, make it possible to exploit the mutability of such types
+for writing fast code that allocates only as much as necessary. `MutableArithmetics`
+also makes it possible to copy values of mutable arithmetic types explicitly when
+necessary. `MutableArithmetics` is a user package and is not affiliated with the
+Julia project.
diff --git a/doc/src/manual/profile.md b/doc/src/manual/profile.md
index e5f1d6c417fa6..77f9fb44e2a29 100644
--- a/doc/src/manual/profile.md
+++ b/doc/src/manual/profile.md
@@ -297,10 +297,224 @@ Of course, you can decrease the delay as well as increase it; however, the overh
 grows once the delay becomes similar to the amount of time needed to take a backtrace (~30 microseconds
 on the author's laptop).
 
+## Wall-time Profiler
+
+### Introduction & Problem Motivation
+
+The profiler described in the previous section is a sampling CPU profiler. At a high level, the profiler periodically stops all Julia compute threads to collect their backtraces and estimates the time spent in each function based on the number of backtrace samples that include a frame from that function. However, note that only tasks currently running on system threads just before the profiler stops them will have their backtraces collected.
+
+While this profiler is typically well-suited for workloads where the majority of tasks are compute-bound, it is less helpful for systems where most tasks are IO-heavy or for diagnosing contention on synchronization primitives in your code.
+
+Let's consider this simple workload:
+
+```Julia
+using Base.Threads
+using Profile
+using PProf
+
+ch = Channel(1)
+
+const N_SPAWNED_TASKS = (1 << 10)
+const WAIT_TIME_NS = 10_000_000
+
+function spawn_a_bunch_of_tasks_waiting_on_channel()
+    for i in 1:N_SPAWNED_TASKS
+        Threads.@spawn begin
+            take!(ch)
+        end
+    end
+end
+
+function busywait()
+    t0 = time_ns()
+    while true
+        if time_ns() - t0 > WAIT_TIME_NS
+            break
+        end
+    end
+end
+
+function main()
+    spawn_a_bunch_of_tasks_waiting_on_channel()
+    for i in 1:N_SPAWNED_TASKS
+        put!(ch, i)
+        busywait()
+    end
+end
+
+Profile.@profile main()
+```
+
+Our goal is to detect whether there is contention on the `ch` channel—i.e., whether the number of waiters is excessive given the rate at which work items are being produced in the channel.
+
+If we run this, we obtain the following [PProf](https://github.com/JuliaPerf/PProf.jl) flame graph:
+
+![CPU Profile](./img/cpu-profile.png)
+
+This profile provides no information to help determine where contention occurs in the system’s synchronization primitives. Waiters on a channel will be blocked and descheduled, meaning no system thread will be running the tasks assigned to those waiters, and as a result, they won't be sampled by the profiler.
+
+### Wall-time Profiler
+
+Instead of sampling threads—and thus only sampling tasks that are running—a wall-time task profiler samples tasks independently of their scheduling state. For example, tasks that are sleeping on a synchronization primitive at the time the profiler is running will be sampled with the same probability as tasks that were actively running when the profiler attempted to capture backtraces.
+
+This approach allows us to construct a profile where backtraces from tasks blocked on the `ch` channel, as in the example above, are actually represented.
+
+Let's run the same example, but now with a wall-time profiler:
+
+
+```Julia
+using Base.Threads
+using Profile
+using PProf
+
+ch = Channel(1)
+
+const N_SPAWNED_TASKS = (1 << 10)
+const WAIT_TIME_NS = 10_000_000
+
+function spawn_a_bunch_of_tasks_waiting_on_channel()
+    for i in 1:N_SPAWNED_TASKS
+        Threads.@spawn begin
+            take!(ch)
+        end
+    end
+end
+
+function busywait()
+    t0 = time_ns()
+    while true
+        if time_ns() - t0 > WAIT_TIME_NS
+            break
+        end
+    end
+end
+
+function main()
+    spawn_a_bunch_of_tasks_waiting_on_channel()
+    for i in 1:N_SPAWNED_TASKS
+        put!(ch, i)
+        busywait()
+    end
+end
+
+Profile.@profile_walltime main()
+```
+
+We obtain the following flame graph:
+
+![Wall-time Profile Channel](./img/wall-time-profiler-channel-example.png)
+
+We see that a large number of samples come from channel-related `take!` functions, which allows us to determine that there is indeed an excessive number of waiters in `ch`.
+
+### A Compute-Bound Workload
+
+Despite the wall-time profiler sampling all live tasks in the system and not just the currently running ones, it can still be helpful for identifying performance hotspots, even if your code is compute-bound. Let’s consider a simple example:
+
+```Julia
+using Base.Threads
+using Profile
+using PProf
+
+ch = Channel(1)
+
+const MAX_ITERS = (1 << 22)
+const N_TASKS = (1 << 12)
+
+function spawn_a_task_waiting_on_channel()
+    Threads.@spawn begin
+        take!(ch)
+    end
+end
+
+function sum_of_sqrt()
+    sum_of_sqrt = 0.0
+    for i in 1:MAX_ITERS
+        sum_of_sqrt += sqrt(i)
+    end
+    return sum_of_sqrt
+end
+
+function spawn_a_bunch_of_compute_heavy_tasks()
+    Threads.@sync begin
+        for i in 1:N_TASKS
+            Threads.@spawn begin
+                sum_of_sqrt()
+            end
+        end
+    end
+end
+
+function main()
+    spawn_a_task_waiting_on_channel()
+    spawn_a_bunch_of_compute_heavy_tasks()
+end
+
+Profile.@profile_walltime main()
+```
+
+After collecting a wall-time profile, we get the following flame graph:
+
+![Wall-time Profile Compute-Bound](./img/wall-time-profiler-compute-bound-example.png)
+
+Notice how many of the samples contain `sum_of_sqrt`, which is the expensive compute function in our example.
+
+### Identifying Task Sampling Failures in your Profile
+
+In the current implementation, the wall-time profiler attempts to sample from tasks that have been alive since the last garbage collection, along with those created afterward. However, if most tasks are extremely short-lived, you may end up sampling tasks that have already completed, resulting in missed backtrace captures.
+
+If you encounter samples containing `failed_to_sample_task_fun` or `failed_to_stop_thread_fun`, this likely indicates a high volume of short-lived tasks, which prevented their backtraces from being collected.
+
+Let's consider this simple example:
+
+```Julia
+using Base.Threads
+using Profile
+using PProf
+
+const N_SPAWNED_TASKS = (1 << 16)
+const WAIT_TIME_NS = 100_000
+
+function spawn_a_bunch_of_short_lived_tasks()
+    for i in 1:N_SPAWNED_TASKS
+        Threads.@spawn begin
+            # Do nothing
+        end
+    end
+end
+
+function busywait()
+    t0 = time_ns()
+    while true
+        if time_ns() - t0 > WAIT_TIME_NS
+            break
+        end
+    end
+end
+
+function main()
+    GC.enable(false)
+    spawn_a_bunch_of_short_lived_tasks()
+    for i in 1:N_SPAWNED_TASKS
+        busywait()
+    end
+    GC.enable(true)
+end
+
+Profile.@profile_walltime main()
+```
+
+Notice that the tasks spawned in `spawn_a_bunch_of_short_lived_tasks` are extremely short-lived. Since these tasks constitute the majority in the system, we will likely miss capturing a backtrace for most sampled tasks.
+
+After collecting a wall-time profile, we obtain the following flame graph:
+
+![Task Sampling Failure](./img/task-sampling-failure.png)
+
+The large number of samples from `failed_to_stop_thread_fun` confirms that we have a significant number of short-lived tasks in the system.
+
 ## Memory allocation analysis
 
 One of the most common techniques to improve performance is to reduce memory allocation. Julia
-provides several tools measure this:
+provides several tools to measure this:
 
 ### `@time`
 
@@ -338,15 +552,90 @@ argument can be passed to speed it up by making it skip some allocations.
 Passing `sample_rate=1.0` will make it record everything (which is slow);
 `sample_rate=0.1` will record only 10% of the allocations (faster), etc.
 
-!!! note
+!!! compat "Julia 1.11"
+
+    Older versions of Julia could not capture types in all cases. In older versions of
+    Julia, if you see an allocation of type `Profile.Allocs.UnknownType`, it means that
+    the profiler doesn't know what type of object was allocated. This mainly happened when
+    the allocation was coming from generated code produced by the compiler. See
+    [issue #43688](https://github.com/JuliaLang/julia/issues/43688) for more info.
+
+    Since Julia 1.11, all allocations should have a type reported.
+
+For more details on how to use this tool, please see [the talk from JuliaCon 2022](https://www.youtube.com/watch?v=BFvpwC8hEWQ).
+
+##### Allocation Profiler Example
+
+In this simple example, we use PProf to visualize the alloc profile. You could use another
+visualization tool instead. We collect the profile (specifying a sample rate), then we visualize it.
+```julia
+using Profile, PProf
+Profile.Allocs.clear()
+Profile.Allocs.@profile sample_rate=0.0001 my_function()
+PProf.Allocs.pprof()
+```
+
+Here is a more in-depth example, showing how we can tune the sample rate. A
+good number of samples to aim for is around 1 - 10 thousand. Too many, and the
+profile visualizer can get overwhelmed, and profiling will be slow. Too few,
+and you don't have a representative sample.
+
+
+```julia-repl
+julia> import Profile
+
+julia> @time my_function()  # Estimate allocations from a (second-run) of the function
+  0.110018 seconds (1.50 M allocations: 58.725 MiB, 17.17% gc time)
+500000
+
+julia> Profile.Allocs.clear()
+
+julia> Profile.Allocs.@profile sample_rate=0.001 begin   # 1.5 M * 0.001 = ~1.5K allocs.
+           my_function()
+       end
+500000
+
+julia> prof = Profile.Allocs.fetch();  # If you want, you can also manually inspect the results.
+
+julia> length(prof.allocs)  # Confirm we have expected number of allocations.
+1515
+
+julia> using PProf  # Now, visualize with an external tool, like PProf or ProfileCanvas.
+
+julia> PProf.Allocs.pprof(prof; from_c=false)  # You can optionally pass in a previously fetched profile result.
+Analyzing 1515 allocation samples... 100%|████████████████████████████████| Time: 0:00:00
+Main binary filename not available.
+Serving web UI on http://localhost:62261
+"alloc-profile.pb.gz"
+```
+Then you can view the profile by navigating to http://localhost:62261, and the profile is saved to disk.
+See PProf package for more options.
+
+##### Allocation Profiling Tips
+
+As stated above, aim for around 1-10 thousand samples in your profile.
+
+Note that we are uniformly sampling in the space of _all allocations_, and are not weighting
+our samples by the size of the allocation. So a given allocation profile may not give a
+representative profile of where most bytes are allocated in your program, unless you had set
+`sample_rate=1`.
+
+Allocations can come from users directly constructing objects, but can also come from inside
+the runtime or be inserted into compiled code to handle type instability. Looking at the
+"source code" view can be helpful to isolate them, and then other external tools such as
+[`Cthulhu.jl`](https://github.com/JuliaDebug/Cthulhu.jl) can be useful for identifying the
+cause of the allocation.
 
-    The current implementation of the Allocations Profiler _does not
-    capture types for all allocations._ Allocations for which the profiler
-    could not capture the type are represented as having type
-    `Profile.Allocs.UnknownType`.
+##### Allocation Profile Visualization Tools
 
-    You can read more about the missing types and the plan to improve this, here:
-    [issue #43688](https://github.com/JuliaLang/julia/issues/43688).
+There are several profiling visualization tools now that can all display Allocation
+Profiles. Here is a small list of some of the main ones we know about:
+- [PProf.jl](https://github.com/JuliaPerf/PProf.jl)
+- [ProfileCanvas.jl](https://github.com/pfitzseb/ProfileCanvas.jl)
+- VSCode's built-in profile visualizer (`@profview_allocs`) [docs needed]
+- Viewing the results directly in the REPL
+  - You can inspect the results in the REPL via [`Profile.Allocs.fetch()`](@ref), to view
+    the stacktrace and type of each allocation.
 
 #### Line-by-Line Allocation Tracking
 
@@ -381,7 +670,7 @@ Currently Julia supports `Intel VTune`, `OProfile` and `perf` as external profil
 Depending on the tool you choose, compile with `USE_INTEL_JITEVENTS`, `USE_OPROFILE_JITEVENTS` and
 `USE_PERF_JITEVENTS` set to 1 in `Make.user`. Multiple flags are supported.
 
-Before running Julia set the environment variable `ENABLE_JITPROFILING` to 1.
+Before running Julia set the environment variable [`ENABLE_JITPROFILING`](@ref ENABLE_JITPROFILING) to 1.
 
 Now you have a multitude of ways to employ those tools!
 For example with `OProfile` you can try a simple recording :
diff --git a/doc/src/manual/running-external-programs.md b/doc/src/manual/running-external-programs.md
index ed3fe85194d93..1f9f3129ca16b 100644
--- a/doc/src/manual/running-external-programs.md
+++ b/doc/src/manual/running-external-programs.md
@@ -79,6 +79,18 @@ julia> `echo "foo bar"`[2]
 "foo bar"
 ```
 
+You can also pass a `IOBuffer`, and later read from it:
+
+```jldoctest
+julia> io = PipeBuffer(); # PipeBuffer is a type of IOBuffer
+
+julia> run(`echo world`, devnull, io, stderr);
+
+julia> readlines(io)
+1-element Vector{String}:
+ "world"
+```
+
 ## [Interpolation](@id command-interpolation)
 
 Suppose you want to do something a bit more complicated and use the name of a file in the variable
@@ -320,8 +332,8 @@ will attempt to store the data in the kernel's buffers while waiting for a reade
 Another common solution is to separate the reader and writer of the pipeline into separate [`Task`](@ref)s:
 
 ```julia
-writer = @async write(process, "data")
-reader = @async do_compute(read(process, String))
+writer = Threads.@spawn write(process, "data")
+reader = Threads.@spawn do_compute(read(process, String))
 wait(writer)
 fetch(reader)
 ```
diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md
index fca4fc75d9e0f..2948c8aad0335 100644
--- a/doc/src/manual/strings.md
+++ b/doc/src/manual/strings.md
@@ -25,7 +25,7 @@ There are a few noteworthy high-level features about Julia's strings:
     the [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding. (A [`transcode`](@ref) function is
     provided to convert to/from other Unicode encodings.)
   * All string types are subtypes of the abstract type `AbstractString`, and external packages define
-    additional `AbstractString` subtypes (e.g. for other encodings).  If you define a function expecting
+    additional `AbstractString` subtypes (e.g. for other encodings). If you define a function expecting
     a string argument, you should declare the type as `AbstractString` in order to accept any string
     type.
   * Like C and Java, but unlike most dynamic languages, Julia has a first-class type for representing
@@ -244,7 +244,7 @@ happens to contain only a single character. In Julia these are very different th
 Range indexing makes a copy of the selected part of the original string.
 Alternatively, it is possible to create a view into a string using the type [`SubString`](@ref).
 More simply, using the [`@views`](@ref) macro on a block of code converts all string slices
-into substrings.  For example:
+into substrings. For example:
 
 ```jldoctest
 julia> str = "long string"
@@ -402,7 +402,7 @@ julia> collect(eachindex(s))
 ```
 
 To access the raw code units (bytes for UTF-8) of the encoding, you can use the [`codeunit(s,i)`](@ref)
-function, where the index `i` runs consecutively from `1` to [`ncodeunits(s)`](@ref).  The [`codeunits(s)`](@ref)
+function, where the index `i` runs consecutively from `1` to [`ncodeunits(s)`](@ref). The [`codeunits(s)`](@ref)
 function returns an `AbstractVector{UInt8}` wrapper that lets you access these raw codeunits (bytes) as an array.
 
 Strings in Julia can contain invalid UTF-8 code unit sequences. This convention allows to
@@ -482,7 +482,7 @@ The resulting string may contain different characters than the input strings,
 and its number of characters may be lower than sum of numbers of characters
 of the concatenated strings, e.g.:
 
-```julia-repl
+```jldoctest
 julia> a, b = "\xe2\x88", "\x80"
 ("\xe2\x88", "\x80")
 
@@ -832,7 +832,7 @@ of the substring that matches, but perhaps we want to capture any non-blank text
 character. We could do the following:
 
 ```jldoctest
-julia> m = match(r"^\s*(?:#\s*(.*?)\s*$|$)", "# a comment ")
+julia> m = match(r"^\s*(?:#\s*(.*?)\s*$)", "# a comment ")
 RegexMatch("# a comment ", 1="a comment")
 ```
 
@@ -957,11 +957,11 @@ i   Do case-insensitive pattern matching.
     that would cross the Unicode rules/non-Unicode rules boundary
     (ords 255/256) will not succeed.
 
-m   Treat string as multiple lines.  That is, change "^" and "$"
+m   Treat string as multiple lines. That is, change "^" and "$"
     from matching the start or end of the string to matching the
     start or end of any line anywhere within the string.
 
-s   Treat string as single line.  That is, change "." to match any
+s   Treat string as single line. That is, change "." to match any
     character whatsoever, even a newline, which normally it would
     not match.
 
@@ -981,10 +981,10 @@ x   Tells the regular expression parser to ignore most whitespace
 For example, the following regex has all three flags turned on:
 
 ```jldoctest
-julia> r"a+.*b+.*?d$"ism
-r"a+.*b+.*?d$"ims
+julia> r"a+.*b+.*d$"ism
+r"a+.*b+.*d$"ims
 
-julia> match(r"a+.*b+.*?d$"ism, "Goodbye,\nOh, angry,\nBad world\n")
+julia> match(r"a+.*b+.*d$"ism, "Goodbye,\nOh, angry,\nBad world\n")
 RegexMatch("angry,\nBad world")
 ```
 
@@ -992,7 +992,7 @@ The `r"..."` literal is constructed without interpolation and unescaping (except
 quotation mark `"` which still has to be escaped). Here is an example
 showing the difference from standard string literals:
 
-```julia-repl
+```jldoctest
 julia> x = 10
 10
 
@@ -1006,13 +1006,16 @@ julia> r"\x"
 r"\x"
 
 julia> "\x"
-ERROR: syntax: invalid escape sequence
+ERROR: ParseError:
+# Error @ none:1:2
+"\x"
+#└┘ ── invalid hex escape sequence
 ```
 
 Triple-quoted regex strings, of the form `r"""..."""`, are also supported (and may be convenient
 for regular expressions containing quotation marks or newlines).
 
-The `Regex()` constructor may be used to create a valid regex string programmatically.  This permits using the contents of string variables and other string operations when constructing the regex string. Any of the regex codes above can be used within the single string argument to `Regex()`. Here are some examples:
+The `Regex()` constructor may be used to create a valid regex string programmatically. This permits using the contents of string variables and other string operations when constructing the regex string. Any of the regex codes above can be used within the single string argument to `Regex()`. Here are some examples:
 
 ```jldoctest
 julia> using Dates
@@ -1142,9 +1145,9 @@ some confusion regarding the matter.
 
 Version numbers can easily be expressed with non-standard string literals of the form [`v"..."`](@ref @v_str).
 Version number literals create [`VersionNumber`](@ref) objects which follow the
-specifications of [semantic versioning](https://semver.org/),
+specifications of [semantic versioning 2.0.0-rc2](https://semver.org/spec/v2.0.0-rc.2.html),
 and therefore are composed of major, minor and patch numeric values, followed by pre-release and
-build alpha-numeric annotations. For example, `v"0.2.1-rc1+win64"` is broken into major version
+build alphanumeric annotations. For example, `v"0.2.1-rc1+win64"` is broken into major version
 `0`, minor version `2`, patch version `1`, pre-release `rc1` and build `win64`. When entering
 a version literal, everything except the major version number is optional, therefore e.g.  `v"0.2"`
 is equivalent to `v"0.2.0"` (with empty pre-release/build annotations), `v"2"` is equivalent to
@@ -1203,3 +1206,55 @@ Notice that the first two backslashes appear verbatim in the output, since they
 precede a quote character.
 However, the next backslash character escapes the backslash that follows it, and the
 last backslash escapes a quote, since these backslashes appear before a quote.
+
+
+## [Annotated Strings](@id man-annotated-strings)
+
+!!! note
+    The API for AnnotatedStrings is considered experimental and is subject to change between
+    Julia versions.
+
+It is sometimes useful to be able to hold metadata relating to regions of a
+string. A [`AnnotatedString`](@ref Base.AnnotatedString) wraps another string and
+allows for regions of it to be annotated with labelled values (`:label => value`).
+All generic string operations are applied to the underlying string. However,
+when possible, styling information is preserved. This means you can manipulate a
+[`AnnotatedString`](@ref Base.AnnotatedString) —taking substrings, padding them,
+concatenating them with other strings— and the metadata annotations will "come
+along for the ride".
+
+This string type is fundamental to the [StyledStrings stdlib](@ref
+stdlib-styledstrings), which uses `:face`-labelled annotations to hold styling
+information.
+
+When concatenating a [`AnnotatedString`](@ref Base.AnnotatedString), take care to use
+[`annotatedstring`](@ref Base.annotatedstring) instead of [`string`](@ref) if you want
+to keep the string annotations.
+
+```jldoctest
+julia> str = Base.AnnotatedString("hello there",
+               [(1:5, :word, :greeting), (7:11, :label, 1)])
+"hello there"
+
+julia> length(str)
+11
+
+julia> lpad(str, 14)
+"   hello there"
+
+julia> typeof(lpad(str, 7))
+Base.AnnotatedString{String}
+
+julia> str2 = Base.AnnotatedString(" julia", [(2:6, :face, :magenta)])
+" julia"
+
+julia> Base.annotatedstring(str, str2)
+"hello there julia"
+
+julia> str * str2 == Base.annotatedstring(str, str2) # *-concatenation still works
+true
+```
+
+The annotations of a [`AnnotatedString`](@ref Base.AnnotatedString) can be accessed
+and modified via the [`annotations`](@ref Base.annotations) and
+[`annotate!`](@ref Base.annotate!) functions.
diff --git a/doc/src/manual/style-guide.md b/doc/src/manual/style-guide.md
index d567bf7627073..d683146a595bf 100644
--- a/doc/src/manual/style-guide.md
+++ b/doc/src/manual/style-guide.md
@@ -19,6 +19,11 @@ Julia's compiler works.
 It is also worth emphasizing that functions should take arguments, instead of operating directly
 on global variables (aside from constants like [`pi`](@ref)).
 
+## Write docstrings
+
+Comments describing an object should typically be written as [docstrings](@ref man-writing-documentation) for editor and REPL accessibility.
+Inline comments (`# comment`) and multiline comments (`#= comment =#`) are appropriate for information that is intended only for the reader of the code (as opposed to a user).
+
 ## Avoid writing overly-specific types
 
 Code should be as generic as possible. Instead of writing:
@@ -96,7 +101,7 @@ Instead of:
 
 ```julia
 function double(a::AbstractArray{<:Number})
-    for i = firstindex(a):lastindex(a)
+    for i in eachindex(a)
         a[i] *= 2
     end
     return a
@@ -107,7 +112,7 @@ use:
 
 ```julia
 function double!(a::AbstractArray{<:Number})
-    for i = firstindex(a):lastindex(a)
+    for i in eachindex(a)
         a[i] *= 2
     end
     return a
@@ -116,7 +121,7 @@ end
 
 Julia Base uses this convention throughout and contains examples of functions
 with both copying and modifying forms (e.g., [`sort`](@ref) and [`sort!`](@ref)), and others
-which are just modifying (e.g., [`push!`](@ref), [`pop!`](@ref), [`splice!`](@ref)).  It
+which are just modifying (e.g., [`push!`](@ref), [`pop!`](@ref), [`splice!`](@ref)). It
 is typical for such functions to also return the modified array for convenience.
 
 Functions related to IO or making use of random number generators (RNG) are notable exceptions:
@@ -167,10 +172,9 @@ Counter-examples to this rule include [`NamedTuple`](@ref), [`RegexMatch`](@ref
 ## Use naming conventions consistent with Julia `base/`
 
   * modules and type names use capitalization and camel case: `module SparseArrays`, `struct UnitRange`.
-  * functions are lowercase ([`maximum`](@ref), [`convert`](@ref)) and, when readable, with multiple
-    words squashed together ([`isequal`](@ref), [`haskey`](@ref)). When necessary, use underscores
-    as word separators. Underscores are also used to indicate a combination of concepts ([`remotecall_fetch`](@ref)
-    as a more efficient implementation of `fetch(remotecall(...))`) or as modifiers.
+  * constants use all uppercase and underscores ([`LOAD_PATH`](@ref), [`VERSION`](@ref)).
+  * while anything not marked with `public` or `export` is considered internal, a prefix of
+    `_` also indicates that an object is not intended for public use.
   * functions mutating at least one of their arguments end in `!`.
   * conciseness is valued, but avoid abbreviation ([`indexin`](@ref) rather than `indxin`) as
     it becomes difficult to remember whether and how particular words are abbreviated.
@@ -178,6 +182,9 @@ Counter-examples to this rule include [`NamedTuple`](@ref), [`RegexMatch`](@ref
 If a function name requires multiple words, consider whether it might represent more than one
 concept and might be better split into pieces.
 
+Function names should be written in snake case ([`minimum`](@ref), [`count_zeros`](@ref), [`escape_string`](@ref)).
+Base often breaks this convention by squashing words together ([`splitpath`](@ref), [`readeach`](@ref)) but this style is not recommended for packages.
+
 ## Write functions with argument ordering similar to Julia Base
 
 As a general rule, the Base library uses the following order of arguments to functions,
@@ -262,6 +269,29 @@ Splicing function arguments can be addictive. Instead of `[a..., b...]`, use sim
 which already concatenates arrays. [`collect(a)`](@ref) is better than `[a...]`, but since `a`
 is already iterable it is often even better to leave it alone, and not convert it to an array.
 
+## Ensure constructors return an instance of their own type
+
+When a method `T(x)` is called on a type `T`, it is generally expected to return a value of type T.
+Defining a [constructor](@ref man-constructors) that returns an unexpected type can lead to confusing and unpredictable behavior:
+
+```jldoctest
+julia> struct Foo{T}
+           x::T
+       end
+
+julia> Base.Float64(foo::Foo) = Foo(Float64(foo.x))  # Do not define methods like this
+
+julia> Float64(Foo(3))  # Should return `Float64`
+Foo{Float64}(3.0)
+
+julia> Foo{Int}(x) = Foo{Float64}(x)  # Do not define methods like this
+
+julia> Foo{Int}(3)  # Should return `Foo{Int}`
+Foo{Float64}(3.0)
+```
+
+To maintain code clarity and ensure type consistency, always design constructors to return an instance of the type they are supposed to construct.
+
 ## Don't use unnecessary static parameters
 
 A function signature:
@@ -346,7 +376,7 @@ This would provide custom showing of vectors with a specific new element type. W
 this should be avoided. The trouble is that users will expect a well-known type like `Vector()`
 to behave in a certain way, and overly customizing its behavior can make it harder to work with.
 
-## Avoid type piracy
+## [Avoid type piracy](@id avoid-type-piracy)
 
 "Type piracy" refers to the practice of extending or redefining methods in Base
 or other packages on types that you have not defined. In extreme cases, you can crash Julia
diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md
index 3510dfe7a7042..82319563ae89f 100644
--- a/doc/src/manual/types.md
+++ b/doc/src/manual/types.md
@@ -31,8 +31,11 @@ each other: all concrete types are final and may only have abstract types as the
 While this might at first seem unduly restrictive, it has many beneficial consequences with surprisingly
 few drawbacks. It turns out that being able to inherit behavior is much more important than being
 able to inherit structure, and inheriting both causes significant difficulties in traditional
-object-oriented languages. Other high-level aspects of Julia's type system that should be mentioned
-up front are:
+object-oriented languages. While concrete types do have abstract subtypes, there are only two examples of this
+([`Union{}`](@ref man-abstract-types) and [`Type{T}`](@ref man-typet-type))) and additional subtypes
+of concrete types cannot be declared.
+
+Other high-level aspects of Julia's type system that should be mentioned up front are:
 
   * There is no division between object and non-object values: all values in Julia are true objects
     having a type that belongs to a single, fully connected type graph, all nodes of which are equally
@@ -182,7 +185,7 @@ When no supertype is given, the default supertype is `Any` -- a predefined abstr
 all objects are instances of and all types are subtypes of. In type theory, `Any` is commonly
 called "top" because it is at the apex of the type graph. Julia also has a predefined abstract
 "bottom" type, at the nadir of the type graph, which is written as `Union{}`. It is the exact
-opposite of `Any`: no object is an instance of `Union{}` and all types are supertypes of `Union{}`.
+opposite of `Any`: no object is an instance of `Union{}` and all types (including concrete types) are supertypes of `Union{}`.
 
 Let's consider some of the abstract types that make up Julia's numerical hierarchy:
 
@@ -298,7 +301,7 @@ a name. A primitive type can optionally be declared to be a subtype of some supe
 is omitted, then the type defaults to having `Any` as its immediate supertype. The declaration
 of [`Bool`](@ref) above therefore means that a boolean value takes eight bits to store, and has
 [`Integer`](@ref) as its immediate supertype. Currently, only sizes that are multiples of
-8 bits are supported and you are likely to experience LLVM bugs with sizes other than those used above.
+8 bits are supported and you are more likely to experience bugs with sizes other than those used above.
 Therefore, boolean values, although they really need just a single bit, cannot be declared to be any
 smaller than eight bits.
 
@@ -713,10 +716,12 @@ For the default constructor, exactly one argument must be supplied for each fiel
 ```jldoctest pointtype
 julia> Point{Float64}(1.0)
 ERROR: MethodError: no method matching Point{Float64}(::Float64)
+The type `Point{Float64}` exists, but no method is defined for this combination of argument types when trying to construct it.
 [...]
 
-julia> Point{Float64}(1.0,2.0,3.0)
+julia> Point{Float64}(1.0, 2.0, 3.0)
 ERROR: MethodError: no method matching Point{Float64}(::Float64, ::Float64, ::Float64)
+The type `Point{Float64}` exists, but no method is defined for this combination of argument types when trying to construct it.
 [...]
 ```
 
@@ -748,6 +753,7 @@ to `Point` have the same type. When this isn't the case, the constructor will fa
 ```jldoctest pointtype
 julia> Point(1,2.5)
 ERROR: MethodError: no method matching Point(::Int64, ::Float64)
+The type `Point` exists, but no method is defined for this combination of argument types when trying to construct it.
 
 Closest candidates are:
   Point(::T, !Matched::T) where T
@@ -1099,7 +1105,7 @@ Array{Vector{T}, 1} where T
 Type `T1` defines a 1-dimensional array of 1-dimensional arrays; each
 of the inner arrays consists of objects of the same type, but this type may vary from one inner array to the next.
 On the other hand, type `T2` defines a 1-dimensional array of 1-dimensional arrays all of whose inner arrays must have the
-same type.  Note that `T2` is an abstract type, e.g., `Array{Array{Int,1},1} <: T2`, whereas `T1` is a concrete type. As a consequence, `T1` can be constructed with a zero-argument constructor `a=T1()` but `T2` cannot.
+same type. Note that `T2` is an abstract type, e.g., `Array{Array{Int,1},1} <: T2`, whereas `T1` is a concrete type. As a consequence, `T1` can be constructed with a zero-argument constructor `a=T1()` but `T2` cannot.
 
 There is a convenient syntax for naming such types, similar to the short form of function
 definition syntax:
@@ -1303,6 +1309,9 @@ julia> WrapType(Float64) # sharpened constructor, note more precise Type{Float64
 WrapType{Type{Float64}}(Float64)
 ```
 
+This behavior of `Type{Float64}` is an example of an abstract type subtyping a
+concrete type (here `DataType`).
+
 ## Type Aliases
 
 Sometimes it is convenient to introduce a new name for an already expressible type.
@@ -1338,6 +1347,16 @@ type -- either [`Int32`](@ref) or [`Int64`](@ref).
 reflects the size of a native pointer on that machine, the floating point register sizes
 are specified by the IEEE-754 standard.)
 
+Type aliases may be parametrized:
+
+```jldoctest
+julia> const Family{T} = Set{T}
+Set
+
+julia> Family{Char} === Set{Char}
+true
+```
+
 ## Operations on Types
 
 Since types in Julia are themselves objects, ordinary functions can operate on them. Some functions
@@ -1403,14 +1422,16 @@ is raised:
 ```jldoctest; filter = r"Closest candidates.*"s
 julia> supertype(Union{Float64,Int64})
 ERROR: MethodError: no method matching supertype(::Type{Union{Float64, Int64}})
+The function `supertype` exists, but no method is defined for this combination of argument types.
+
 Closest candidates are:
 [...]
 ```
 
 ## [Custom pretty-printing](@id man-custom-pretty-printing)
 
-Often, one wants to customize how instances of a type are displayed.  This is accomplished by
-overloading the [`show`](@ref) function.  For example, suppose we define a type to represent
+Often, one wants to customize how instances of a type are displayed. This is accomplished by
+overloading the [`show`](@ref) function. For example, suppose we define a type to represent
 complex numbers in polar form:
 
 ```jldoctest polartype
@@ -1465,13 +1486,13 @@ julia> [Polar(3, 4.0), Polar(4.0,5.3)]
  4.0 * exp(5.3im)
 ```
 
-where the single-line `show(io, z)` form is still used for an array of `Polar` values.   Technically,
-the REPL calls `display(z)` to display the result of executing a line, which defaults to `show(stdout, MIME("text/plain"), z)`,
-which in turn defaults to `show(stdout, z)`, but you should *not* define new [`display`](@ref)
+where the single-line `show(io, z)` form is still used for an array of `Polar` values. Technically,
+the REPL calls `display(z)` to display the result `z` of executing a line, which defaults to `show(io, MIME("text/plain"), z)` (where `io` is an [`IOContext`](@ref) wrapper around [`stdout`](@ref)),
+which in turn defaults to `show(io, z)`, but you should *not* define new [`display`](@ref)
 methods unless you are defining a new multimedia display handler (see [Multimedia I/O](@ref Multimedia-I/O)).
 
 Moreover, you can also define `show` methods for other MIME types in order to enable richer display
-(HTML, images, etcetera) of objects in environments that support this (e.g. IJulia).   For example,
+(HTML, images, etcetera) of objects in environments that support this (e.g. IJulia). For example,
 we can define formatted HTML display of `Polar` objects, with superscripts and italics, via:
 
 ```jldoctest polartype
@@ -1493,9 +1514,9 @@ julia> show(stdout, "text/html", Polar(3.0,4.0))
 ```
 
 As a rule of thumb, the single-line `show` method should print a valid Julia expression for creating
-the shown object.  When this `show` method contains infix operators, such as the multiplication
+the shown object. When this `show` method contains infix operators, such as the multiplication
 operator (`*`) in our single-line `show` method for `Polar` above, it may not parse correctly when
-printed as part of another object.  To see this, consider the expression object (see [Program
+printed as part of another object. To see this, consider the expression object (see [Program
 representation](@ref)) which takes the square of a specific instance of our `Polar` type:
 
 ```jldoctest polartype
@@ -1509,7 +1530,7 @@ julia> print(:($a^2))
 
 Because the operator `^` has higher precedence than `*` (see [Operator Precedence and Associativity](@ref)), this
 output does not faithfully represent the expression `a ^ 2` which should be equal to `(3.0 *
-exp(4.0im)) ^ 2`.  To solve this issue, we must make a custom method for `Base.show_unquoted(io::IO,
+exp(4.0im)) ^ 2`. To solve this issue, we must make a custom method for `Base.show_unquoted(io::IO,
 z::Polar, indent::Int, precedence::Int)`, which is called internally by the expression object when
 printing:
 
@@ -1529,7 +1550,7 @@ julia> :($a^2)
 ```
 
 The method defined above adds parentheses around the call to `show` when the precedence of the
-calling operator is higher than or equal to the precedence of multiplication.  This check allows
+calling operator is higher than or equal to the precedence of multiplication. This check allows
 expressions which parse correctly without the parentheses (such as `:($a + 2)` and `:($a == 2)`) to
 omit them when printing:
 
@@ -1572,11 +1593,24 @@ julia> [Polar(3, 4.0) Polar(4.0,5.3)]
 See the [`IOContext`](@ref) documentation for a list of common properties which can be used
 to adjust printing.
 
+### Output-function summary
+
+Here is a brief summary of the different output functions in Julia and how they are related.
+Most new types should only need to define `show` methods, if anything.
+
+* [`display(x)`](@ref) tells the current environment to display `x` in whatever way it thinks best. (This might even be a graphical display in something like a Jupyter or Pluto notebook.) By default (e.g. in scripts or in the text REPL), it calls `show(io, "text/plain", x)`, or equivalently `show(io, MIME"text/plain"(), x)`, for an appropriate `io` stream. (In the REPL, `io` is an [`IOContext`](@ref) wrapper around [`stdout`](@ref).) The REPL uses `display` to output the result of an evaluated expression.
+* The 3-argument [`show(io, ::MIME"text/plain", x)`](@ref) method performs verbose pretty-printing of `x`. By default (if no 3-argument method is defined for `typeof(x)`), it calls the 2-argument `show(io, x)`. It is called by the 2-argument `repr("text/plain", x)`. Other 3-argument `show` methods can be defined for additional MIME types as discussed above, to enable richer display of `x` in some interactive environments.
+* The 2-argument [`show(io, x)`](@ref) is the default simple text representation of `x`. It is called by the 1-argument [`repr(x)`](@ref), and is typically the format you might employ to input `x` into Julia. The 1-argument `show(x)` calls `show(stdout, x)`.
+* [`print(io, x)`](@ref) by default calls `show(io, x)`, but a few types have a distinct `print` format — most notably, when `x` is a string, `print` outputs the raw text whereas `show` outputs an escaped string enclosed in quotation marks. The 1-argument `print(x)` calls `print(stdout, x)`. `print` is also called by [`string(x)`](@ref).  See also [`println`](@ref) (to append a newline) and [`printstyled`](@ref) (to add colors etc.), both of which call `print`.
+* [`write(io, x)`](@ref), if it is defined (it generally has *no* default definition for new types), writes a "raw" binary representation of `x` to `io`, e.g. an `x::Int32` will be written as 4 bytes.
+
+It is also helpful to be familiar with the metadata that can be attached to an `io` stream by an [`IOContext`](@ref) wrapper. For example, the REPL sets the `:limit => true` flag from `display` for an evaluated expression, in order to limit the output to fit in the terminal; you can query this flag with `get(io, :limit, false)`. And when displaying an object contained within, for example, a multi-column matrix, the `:compact => true` flag could be set, which you can query with `get(io, :compact, false)`.
+
 ## "Value types"
 
 In Julia, you can't dispatch on a *value* such as `true` or `false`. However, you can dispatch
 on parametric types, and Julia allows you to include "plain bits" values (Types, Symbols, Integers,
-floating-point numbers, tuples, etc.) as type parameters.  A common example is the dimensionality
+floating-point numbers, tuples, etc.) as type parameters. A common example is the dimensionality
 parameter in `Array{T,N}`, where `T` is a type (e.g., [`Float64`](@ref)) but `N` is just an `Int`.
 
 You can create your own custom types that take values as parameters, and use them to control dispatch
@@ -1594,7 +1628,7 @@ julia> Val(x) = Val{x}()
 Val
 ```
 
-There is no more to the implementation of `Val` than this.  Some functions in Julia's standard
+There is no more to the implementation of `Val` than this. Some functions in Julia's standard
 library accept `Val` instances as arguments, and you can also use it to write your own functions.
  For example:
 
@@ -1617,7 +1651,7 @@ a *type*, i.e., use `foo(Val(:bar))` rather than `foo(Val{:bar})`.
 
 It's worth noting that it's extremely easy to mis-use parametric "value" types, including `Val`;
 in unfavorable cases, you can easily end up making the performance of your code much *worse*.
- In particular, you would never want to write actual code as illustrated above.  For more information
+ In particular, you would never want to write actual code as illustrated above. For more information
 about the proper (and improper) uses of `Val`, please read [the more extensive discussion in the performance tips](@ref man-performance-value-type).
 
 [^1]: "Small" is defined by the `max_union_splitting` configuration, which currently defaults to 4.
diff --git a/doc/src/manual/unicode-input.md b/doc/src/manual/unicode-input.md
index 7539e75bb4f24..eba970c051f1e 100644
--- a/doc/src/manual/unicode-input.md
+++ b/doc/src/manual/unicode-input.md
@@ -2,7 +2,7 @@
 
 The following table lists Unicode characters that can be entered via
 tab completion of LaTeX-like abbreviations in the Julia REPL (and
-in various other editing environments).  You can also get information on how to
+in various other editing environments). You can also get information on how to
 type a symbol by entering it in the REPL help, i.e. by typing `?` and then
 entering the symbol in the REPL (e.g., by copy-paste from somewhere you saw
 the symbol).
@@ -52,11 +52,12 @@ function fix_combining_chars(char)
     return cat == 6 || cat == 8 ? "$NBSP$char$NBSP" : "$char"
 end
 
-
 function table_entries(completions, unicode_dict)
-    entries = [[
-        "Code point(s)", "Character(s)",
-        "Tab completion sequence(s)", "Unicode name(s)"
+    entries = Any[Any[
+        ["Code point(s)"],
+        ["Character(s)"],
+        ["Tab completion sequence(s)"],
+        ["Unicode name(s)"],
     ]]
     for (chars, inputs) in sort!(collect(completions), by = first)
         code_points, unicode_names, characters = String[], String[], String[]
@@ -65,12 +66,21 @@ function table_entries(completions, unicode_dict)
             push!(unicode_names, get(unicode_dict, UInt32(char), "(No Unicode name)"))
             push!(characters, isempty(characters) ? fix_combining_chars(char) : "$char")
         end
+        inputs_md = []
+        for (i, input) in enumerate(inputs)
+            i > 1 && push!(inputs_md, ", ")
+            push!(inputs_md, Markdown.Code("", input))
+        end
         push!(entries, [
-            join(code_points, " + "), join(characters),
-            join(inputs, ", "), join(unicode_names, " + ")
+            [join(code_points, " + ")],
+            [join(characters)],
+            inputs_md,
+            [join(unicode_names, " + ")],
         ])
     end
-    return Markdown.Table(entries, [:l, :l, :l, :l])
+    table = Markdown.Table(entries, [:l, :c, :l, :l])
+    # We also need to wrap the Table in a Markdown.MD "document"
+    return Markdown.MD([table])
 end
 
 table_entries(
diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md
index c763d62680091..9756b4ec5bd87 100644
--- a/doc/src/manual/variables-and-scoping.md
+++ b/doc/src/manual/variables-and-scoping.md
@@ -16,17 +16,58 @@ introduce a "soft scope", which affects whether
 [shadowing](https://en.wikipedia.org/wiki/Variable_shadowing)
 a global variable by the same name is allowed or not.
 
-### [Scope constructs](@id man-scope-table)
+!!! info "Summary"
+    Variables defined in global scope may be undefined in inner local scopes,
+    depending on where the code is run, in order to balance safety and convenience.
+    The hard and soft local scoping rules define the interplay between global and local variables.
+
+    However, variables defined only in local scope behave consistently in all contexts.
+    If the variable is already defined, it will be reused. If the variable is not defined,
+    it will be made available to the current and inner scopes (but not outer scopes).
+
+!!! tip "A Common Confusion"
+    If you run into an unexpectedly undefined variable,
+
+    ```julia ; nodoctest = "Pseudocode"
+    # Print the numbers 1 through 5
+    i = 0
+    while i < 5
+        i += 1     # ERROR: UndefVarError: `i` not defined
+        println(i)
+    end
+    ```
+
+    a simple fix is to change all global variable definitions into local definitions
+    by wrapping the code in a `let` block or `function`.
+
+    ```julia ; nodoctest = "Pseudocode"
+    # Print the numbers 1 through 5
+    let i = 0
+        while i < 5
+            i += 1     # Now outer `i` is defined in the inner scope of the while loop
+            println(i)
+        end
+    end
+    ```
+
+    This is a common source of confusion when writing procedural scripts,
+    but it becomes a non-issue if code is moved inside functions
+    or executed interactively in the REPL.
+
+    See also the [`global`](@ref) and [`local`](@ref) keywords
+    to explicitly achieve any desired scoping behavior.
+
+### [Scope Constructs](@id man-scope-table)
 
 The constructs introducing scope blocks are:
 
-| Construct | Scope type | Allowed within |
-|:----------|:-----------|:---------------|
+| Construct | Scope Type Introduced | Scope Types Able to Contain Construct |
+|:----------|:----------------------|:--------------------------------------|
 | [`module`](@ref), [`baremodule`](@ref) | global | global |
-| [`struct`](@ref) | local (soft) | global |
-| [`for`](@ref), [`while`](@ref), [`try`](@ref try) | local (soft) | global, local |
+| [`struct`](@ref) | local (hard) | global |
 | [`macro`](@ref) | local (hard) | global |
-| functions, [`do`](@ref) blocks, [`let`](@ref) blocks, comprehensions, generators | local (hard) | global, local |
+| [`for`](@ref), [`while`](@ref), [`try`](@ref try) | local (soft) | global, local |
+| [`function`](@ref), [`do`](@ref), [`let`](@ref), [comprehensions](@ref man-comprehensions), [generators](@ref man-generators) | local (hard) | global, local |
 
 Notably missing from this table are
 [begin blocks](@ref man-compound-expressions) and [if blocks](@ref man-conditional-evaluation)
@@ -67,31 +108,7 @@ Each module introduces a new global scope, separate from the global scope of all
 is no all-encompassing global scope. Modules can introduce variables of other modules into their
 scope through the [using or import](@ref modules) statements or through qualified access using the
 dot-notation, i.e. each module is a so-called *namespace* as well as a first-class data structure
-associating names with values. Note that while variable bindings can be read externally, they can only
-be changed within the module to which they belong. As an escape hatch, you can always evaluate code
-inside that module to modify a variable; this guarantees, in particular, that module bindings cannot
-be modified externally by code that never calls `eval`.
-
-```jldoctest
-julia> module A
-           a = 1 # a global in A's scope
-       end;
-
-julia> module B
-           module C
-               c = 2
-           end
-           b = C.c    # can access the namespace of a nested global scope
-                      # through a qualified access
-           import ..A # makes module A available
-           d = A.a
-       end;
-
-julia> module D
-           b = a # errors as D's global scope is separate from A's
-       end;
-ERROR: UndefVarError: `a` not defined
-```
+associating names with values.
 
 If a top-level expression contains a variable declaration with keyword `local`,
 then that variable is not accessible outside that expression.
@@ -119,12 +136,14 @@ inside of another local scope, the scope it creates is nested inside of all the
 local scopes that it appears within, which are all ultimately nested inside of
 the global scope of the module in which the code is evaluated. Variables in
 outer scopes are visible from any scope they contain — meaning that they can be
-read and written in inner scopes — unless there is a local variable with the
-same name that "shadows" the outer variable of the same name. This is true even
-if the outer local is declared after (in the sense of textually below) an inner
+read and written in inner scopes — unless there is a variable with the same name
+that "shadows" the outer variable of the same name. This is true even if the
+outer local is declared after (in the sense of textually below) an inner
 block. When we say that a variable "exists" in a given scope, this means that a
 variable by that name exists in any of the scopes that the current scope is
-nested inside of, including the current one.
+nested inside of, including the current one. If a variable's value is used in a
+local scope, but nothing with its name exists in this scope, it is assumed to be
+a global.
 
 Some programming languages require explicitly declaring new variables before
 using them. Explicit declaration works in Julia too: in any local scope, writing
@@ -152,10 +171,10 @@ that location:
 1. **Existing local:** If `x` is *already a local variable*, then the existing local `x` is
    assigned;
 2. **Hard scope:** If `x` is *not already a local variable* and assignment occurs inside of any
-   hard scope construct (i.e. within a `let` block, function or macro body, comprehension, or
+   hard scope construct (i.e. within a `let` block, function, struct or macro body, comprehension, or
    generator), a new local named `x` is created in the scope of the assignment;
 3. **Soft scope:** If `x` is *not already a local variable* and all of the scope constructs
-   containing the assignment are soft scopes (loops, `try`/`catch` blocks, or `struct` blocks), the
+   containing the assignment are soft scopes (loops, `try`/`catch` blocks), the
    behavior depends on whether the global variable `x` is defined:
    * if global `x` is *undefined*, a new local named `x` is created in the scope of the
      assignment;
@@ -187,7 +206,7 @@ julia> greet()
 hello
 
 julia> x # global
-ERROR: UndefVarError: `x` not defined
+ERROR: UndefVarError: `x` not defined in `Main`
 ```
 
 Inside of the `greet` function, the assignment `x = "hello"` causes `x` to be a new local variable
@@ -256,7 +275,7 @@ julia> sum_to(10)
 55
 
 julia> s # global
-ERROR: UndefVarError: `s` not defined
+ERROR: UndefVarError: `s` not defined in `Main`
 ```
 
 Since `s` is local to the function `sum_to`, calling the function has no effect on the global
@@ -343,7 +362,7 @@ hello
 hello
 
 julia> x
-ERROR: UndefVarError: `x` not defined
+ERROR: UndefVarError: `x` not defined in `Main`
 ```
 
 Since the global `x` is not defined when the `for` loop is evaluated, the first clause of the soft
@@ -351,7 +370,7 @@ scope rule applies and `x` is created as local to the `for` loop and therefore g
 undefined after the loop executes. Next, let's consider the body of `sum_to_def` extracted into global
 scope, fixing its argument to `n = 10`
 
-```julia
+```julia ; nodoctest = "Specifically shows scope differences"
 s = 0
 for i = 1:10
     t = s + i
@@ -408,7 +427,7 @@ julia> code = """
 julia> include_string(Main, code)
 ┌ Warning: Assignment to `s` in soft scope is ambiguous because a global variable by the same name exists: `s` will be treated as a new local. Disambiguate by using `local s` to suppress this warning or `global s` to assign to the existing global variable.
 └ @ string:4
-ERROR: LoadError: UndefVarError: `s` not defined
+ERROR: LoadError: UndefVarError: `s` not defined in local scope
 ```
 
 Here we use [`include_string`](@ref), to evaluate `code` as though it were the contents of a file.
@@ -453,7 +472,7 @@ years were confused about this behavior and complained that it was complicated a
 explain and understand. Fair point. Second, and arguably worse, is that it's bad for programming "at
 scale." When you see a small piece of code in one place like this, it's quite clear what's going on:
 
-```julia
+```julia ; nodoctest="Expects global file scope"
 s = 0
 for i = 1:10
     s += i
@@ -464,7 +483,7 @@ Obviously the intention is to modify the existing global variable `s`. What else
 However, not all real world code is so short or so clear. We found that code like the following
 often occurs in the wild:
 
-```julia
+```julia ; nodoctest="Expects global file scope"
 x = 123
 
 # much later
@@ -559,7 +578,7 @@ julia> let x = 1, z
            println("z: $z") # errors as z has not been assigned yet but is local
        end
 x: 1, y: -1
-ERROR: UndefVarError: `z` not defined
+ERROR: UndefVarError: `z` not defined in local scope
 ```
 
 The assignments are evaluated in order, with each right-hand side evaluated in the scope before
@@ -713,86 +732,31 @@ Note that `const` only affects the variable binding; the variable may be bound t
 object (such as an array), and that object may still be modified. Additionally when one tries
 to assign a value to a variable that is declared constant the following scenarios are possible:
 
-* if a new value has a different type than the type of the constant then an error is thrown:
-```jldoctest
-julia> const x = 1.0
-1.0
-
-julia> x = 1
-ERROR: invalid redefinition of constant x
-```
-* if a new value has the same type as the constant then a warning is printed:
-```jldoctest
-julia> const y = 1.0
-1.0
-
-julia> y = 2.0
-WARNING: redefinition of constant y. This may fail, cause incorrect answers, or produce other errors.
-2.0
-```
-* if an assignment would not result in the change of variable value no message is given:
-```jldoctest
-julia> const z = 100
-100
-
-julia> z = 100
-100
-```
-The last rule applies to immutable objects even if the variable binding would change, e.g.:
-```julia-repl
-julia> const s1 = "1"
-"1"
-
-julia> s2 = "1"
-"1"
-
-julia> pointer.([s1, s2], 1)
-2-element Array{Ptr{UInt8},1}:
- Ptr{UInt8} @0x00000000132c9638
- Ptr{UInt8} @0x0000000013dd3d18
-
-julia> s1 = s2
-"1"
-
-julia> pointer.([s1, s2], 1)
-2-element Array{Ptr{UInt8},1}:
- Ptr{UInt8} @0x0000000013dd3d18
- Ptr{UInt8} @0x0000000013dd3d18
-```
-However, for mutable objects the warning is printed as expected:
-```jldoctest
-julia> const a = [1]
-1-element Vector{Int64}:
- 1
-
-julia> a = [1]
-WARNING: redefinition of constant a. This may fail, cause incorrect answers, or produce other errors.
-1-element Vector{Int64}:
- 1
-```
+* Attempting to replace a constant without the const `keyword` is disallowed:
 
-Note that although sometimes possible, changing the value of a `const` variable is strongly
-discouraged, and is intended only for convenience during interactive use. Changing constants can
-cause various problems or unexpected behaviors. For instance, if a method references a constant and
-is already compiled before the constant is changed, then it might keep using the old value:
+  ```jldoctest
+  julia> const x = 1.0
+  1.0
 
-```jldoctest
-julia> const x = 1
-1
+  julia> x = 1
+  ERROR: invalid assignment to constant x. This redefinition may be permitted using the `const` keyword.
+  ```
 
-julia> f() = x
-f (generic function with 1 method)
+* All other definitions of constants are permitted, but may cause significant re-compilation:
 
-julia> f()
-1
+  ```jldoctest
+  julia> const y = 1.0
+  1.0
 
-julia> x = 2
-WARNING: redefinition of constant x. This may fail, cause incorrect answers, or produce other errors.
-2
+  julia> const y = 2.0
+  2.0
+  ```
 
-julia> f()
-1
-```
+!!! compat "Julia 1.12"
+    Prior to julia 1.12, redefinition of constants was poorly supported. It was restricted to
+    redefinition of constants of the same type and could lead to observably incorrect behavior
+    or crashes. Constant redefinition is highly discouraged in versions of julia prior to 1.12.
+    See the manual for prior julia versions for further information.
 
 ## [Typed Globals](@id man-typed-globals)
 
diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md
index 6c22719c1ce86..074a7207698d1 100644
--- a/doc/src/manual/variables.md
+++ b/doc/src/manual/variables.md
@@ -3,21 +3,17 @@
 A variable, in Julia, is a name associated (or bound) to a value. It's useful when you want to
 store a value (that you obtained after some math, for example) for later use. For example:
 
-```julia-repl
-# Assign the value 10 to the variable x
-julia> x = 10
+```jldoctest
+julia> x = 10   # Assign the value 10 to the variable x
 10
 
-# Doing math with x's value
-julia> x + 1
+julia> x + 1    # Doing math with x's value
 11
 
-# Reassign x's value
-julia> x = 1 + 1
+julia> x = 1 + 1   # Reassign x's value
 2
 
-# You can assign values of other types, like strings of text
-julia> x = "Hello World!"
+julia> x = "Hello World!"   # You can assign values of other types, like strings of text
 "Hello World!"
 ```
 
@@ -59,10 +55,10 @@ name `δ` can be entered by typing `\delta`-*tab*, or even `α̂⁽²⁾` by `\a
 that you don't know how to type, the REPL help will tell you: just type `?` and
 then paste the symbol.)
 
-Julia will even let you redefine built-in constants and functions if needed (although
-this is not recommended to avoid potential confusions):
+Julia will even let you shadow existing exported constants and functions with local ones
+(although this is not recommended to avoid potential confusions):
 
-```jldoctest
+```jldoctest; filter = r"with \d+ methods"
 julia> pi = 3
 3
 
@@ -71,12 +67,20 @@ julia> pi
 
 julia> sqrt = 4
 4
+
+julia> length() = 5
+length (generic function with 1 method)
+
+julia> Base.length
+length (generic function with 79 methods)
 ```
 
-However, if you try to redefine a built-in constant or function already in use, Julia will give
-you an error:
+However, if you try to redefine a built-in constant or function that you
+have explicitly imported, Julia will give you an error:
 
 ```jldoctest
+julia> using Base: pi, sqrt
+
 julia> pi
 π = 3.1415926535897...
 
@@ -90,6 +94,10 @@ julia> sqrt = 4
 ERROR: cannot assign a value to imported variable Base.sqrt from module Main
 ```
 
+!!! compat "Julia 1.12"
+  Note that in versions prior to Julia 1.12, these errors depended on *use* rather than definition of
+  the conflicting binding.
+
 ## [Allowed Variable Names](@id man-allowed-variable-names)
 
 Variable names must begin with a letter (A-Z or a-z), underscore, or a subset of Unicode code
@@ -104,35 +112,40 @@ Operators like `+` are also valid identifiers, but are parsed specially. In some
 can be used just like variables; for example `(+)` refers to the addition function, and `(+) = f`
 will reassign it. Most of the Unicode infix operators (in category Sm), such as `⊕`, are parsed
 as infix operators and are available for user-defined methods (e.g. you can use `const ⊗ = kron`
-to define `⊗` as an infix Kronecker product).  Operators can also be suffixed with modifying marks,
+to define `⊗` as an infix Kronecker product). Operators can also be suffixed with modifying marks,
 primes, and sub/superscripts, e.g. `+̂ₐ″` is parsed as an infix operator with the same precedence as `+`.
 A space is required between an operator that ends with a subscript/superscript letter and a subsequent
 variable name. For example, if `+ᵃ` is an operator, then `+ᵃx` must be written as `+ᵃ x` to distinguish
 it from `+ ᵃx` where `ᵃx` is the variable name.
 
 
-A particular class of variable names is one that contains only underscores. These identifiers can only be assigned values, which are immediately discarded, and cannot therefore be used to assign values to other variables (i.e., they cannot be used as [`rvalues`](https://en.wikipedia.org/wiki/Value_(computer_science)#Assignment:_l-values_and_r-values)) or use the last value
-assigned to them in any way.
+A particular class of variable names is one that contains only underscores. These identifiers are write-only. I.e. they can only be assigned values, which are immediately discarded, and their values cannot be used in any way.
 
-```julia-repl
+```jldoctest
 julia> x, ___ = size([2 2; 1 1])
 (2, 2)
 
 julia> y = ___
-ERROR: syntax: all-underscore identifier used as rvalue
+ERROR: syntax: all-underscore identifiers are write-only and their values cannot be used in expressions
 
 julia> println(___)
-ERROR: syntax: all-underscore identifier used as rvalue
+ERROR: syntax: all-underscore identifiers are write-only and their values cannot be used in expressions
 ```
 
 The only explicitly disallowed names for variables are the names of the built-in [Keywords](@ref Keywords):
 
-```julia-repl
+```jldoctest
 julia> else = false
-ERROR: syntax: unexpected "else"
+ERROR: ParseError:
+# Error @ none:1:1
+else = false
+└──┘ ── invalid identifier
 
 julia> try = "No"
-ERROR: syntax: unexpected "="
+ERROR: ParseError:
+# Error @ none:1:1
+try = "No"
+└────────┘ ── try without catch or finally
 ```
 
 Some Unicode characters are considered to be equivalent in identifiers.
@@ -151,7 +164,7 @@ The minus sign `−` (U+2212) is treated as equivalent to the hyphen-minus sign
 
 An assignment `variable = value` "binds" the name `variable` to the `value` computed
 on the right-hand side, and the whole assignment is treated by Julia as an expression
-equal to the right-hand-side `value`.  This means that assignments can be *chained*
+equal to the right-hand-side `value`. This means that assignments can be *chained*
 (the same `value` assigned to multiple variables with `variable1 = variable2 = value`)
 or used in other expressions, and is also why their result is shown in the REPL as
 the value of the right-hand side.  (In general, the REPL displays the value of whatever
@@ -170,7 +183,7 @@ julia> b
 ```
 
 A common confusion is the distinction between *assignment* (giving a new "name" to a value)
-and *mutation* (changing a value).  If you run `a = 2` followed by `a = 3`, you have changed
+and *mutation* (changing a value). If you run `a = 2` followed by `a = 3`, you have changed
 the "name" `a` to refer to a new value `3` … you haven't changed the number `2`, so `2+2`
 will still give `4` and not `6`!   This distinction becomes more clear when dealing with
 *mutable* types like [arrays](@ref lib-arrays), whose contents *can* be changed:
@@ -208,11 +221,13 @@ julia> b   # b refers to the original array object, which has been mutated
   3
 ```
 That is, `a[i] = value` (an alias for [`setindex!`](@ref)) *mutates* an existing array object
-in memory, accessible via either `a` or `b`.  Subsequently setting `a = 3.14159`
+in memory, accessible via either `a` or `b`. Subsequently setting `a = 3.14159`
 does not change this array, it simply binds `a` to a different object; the array is still
-accessible via `b`. The other common syntax to mutate an existing object is
+accessible via `b`. Another common syntax to mutate an existing object is
 `a.field = value` (an alias for [`setproperty!`](@ref)), which can be used to change
-a [`mutable struct`](@ref).
+a [`mutable struct`](@ref). There is also mutation via dot assignment, for example
+`b .= 5:7` (which mutates our array `b` in-place to contain `[5,6,7]`), as part of Julia's
+[vectorized "dot" syntax](@ref man-dot-operators).
 
 When you call a [function](@ref man-functions) in Julia, it behaves as if you *assigned*
 the argument values to new variable names corresponding to the function arguments, as discussed
diff --git a/doc/src/manual/workflow-tips.md b/doc/src/manual/workflow-tips.md
index 4085a51ff9131..bfc526edbf8dd 100644
--- a/doc/src/manual/workflow-tips.md
+++ b/doc/src/manual/workflow-tips.md
@@ -10,57 +10,40 @@ your experience at the command line.
 
 ### A basic editor/REPL workflow
 
-The most basic Julia workflows involve using a text editor in conjunction with the `julia` command
-line. A common pattern includes the following elements:
+The most basic Julia workflows involve using a text editor in conjunction with the `julia` command line.
 
-  * **Put code under development in a temporary module.** Create a file, say `Tmp.jl`, and include
-    within it
+Create a file, say `Tmp.jl`, and include within it
+```julia
+module Tmp
 
-    ```julia
-    module Tmp
-    export say_hello
+say_hello() = println("Hello!")
 
-    say_hello() = println("Hello!")
+# Your other definitions here
 
-    # your other definitions here
+end # module
 
-    end
-    ```
-  * **Put your test code in another file.** Create another file, say `tst.jl`, which looks like
+using .Tmp
+```
+Then, in the same directory, start the Julia REPL (using the `julia` command).
+Run the new file as follows:
+```
+julia> include("Tmp.jl")
 
-    ```julia
-    include("Tmp.jl")
-    import .Tmp
-    # using .Tmp # we can use `using` to bring the exported symbols in `Tmp` into our namespace
+julia> Tmp.say_hello()
+Hello!
+```
+Explore ideas in the REPL. Save good ideas in `Tmp.jl`.
+To reload the file after it has been changed, just `include` it again.
 
-    Tmp.say_hello()
-    # say_hello()
+The key in the above is that your code is encapsulated in a module.
+That allows you to edit `struct` definitions and remove methods, without restarting Julia.
 
-    # your other test code here
-    ```
+(Explanation: `struct`s cannot be edited after definition, nor can methods be deleted.
+But you _can_ overwrite the definition of a module, which is what we do when we re-`include("Tmp.jl")`).
 
-    and includes tests for the contents of `Tmp`.
-    Alternatively, you can wrap the contents of your test file in a module, as
+In addition, the encapsulation of code in a module protects it from being influenced
+by previous state in the REPL, protecting you from hard-to-detect errors.
 
-    ```julia
-    module Tst
-        include("Tmp.jl")
-        import .Tmp
-        #using .Tmp
-
-        Tmp.say_hello()
-        # say_hello()
-
-        # your other test code here
-    end
-    ```
-
-    The advantage is that your testing code is now contained in a module and does not use the global scope in `Main` for
-    definitions, which is a bit more tidy.
-
-  * `include` the `tst.jl` file in the Julia REPL with `include("tst.jl")`.
-
-  * **Lather. Rinse. Repeat.** Explore ideas at the `julia` command prompt. Save good ideas in `tst.jl`. To execute `tst.jl` after it has been changed, just `include` it again.
 
 ## Browser-based workflow
 
@@ -131,5 +114,5 @@ the following modifications:
    ```
 
    You can iteratively modify the code in MyPkg in your editor and re-run the
-   tests with `include("runtests.jl")`.  You generally should not need to restart
+   tests with `include("runtests.jl")`. You generally should not need to restart
    your Julia session to see the changes take effect (subject to a few [limitations](https://timholy.github.io/Revise.jl/stable/limitations/)).
diff --git a/doc/src/manual/worldage.md b/doc/src/manual/worldage.md
new file mode 100644
index 0000000000000..41cceec3b87c6
--- /dev/null
+++ b/doc/src/manual/worldage.md
@@ -0,0 +1,295 @@
+# [The World Age mechanism](@id man-world-age)
+
+!!! note
+    World age is an advanced concept. For the vast majority of Julia users, the world age
+    mechanism operates invisibly in the background. This documentation is intended for the
+    few users who may encounter world-age related issues or error messages.
+
+!!! compat "Julia 1.12"
+    Prior to Julia 1.12, the world age mechanism did not apply to changes to the global binding table.
+    The documentation in this chapter is specific to Julia 1.12+.
+
+!!! warning
+    This manual chapter uses internal functions to introspect world age and runtime data structures
+    as an explanatory aid. In general, unless otherwise noted the world age mechanism is not a stable
+    interface and should be interacted with in packages through stable APIs (e.g. `invokelatest`) only.
+    In particular, do not assume that world ages are always integers or that they have a linear order.
+
+## World age in general
+
+The "world age counter" is a monotonically increasing counter that is incremented for every
+change to the global method table or the global binding table (e.g. through method definition,
+type definition, `import`/`using` declaration, creation of (typed) globals or definition of constants).
+
+The current value of the global world age counter can be retrieved using the (internal) function [`Base.get_world_counter`](@ref).
+
+```julia-repl
+julia> Base.get_world_counter()
+0x0000000000009632
+
+julia> const x = 1
+
+julia> Base.get_world_counter()
+0x0000000000009633
+```
+
+In addition, each [`Task`](@ref) stores a local world age that determines which modifications to
+the global binding and method tables are currently visible to the running task. The world age of
+the running task will never exceed the global world age counter, but may run arbitrarily behind it.
+In general the term "current world age" refers to the local world age of the currently running task.
+The current world age may be retrieved using the (internal) function [`Base.tls_world_age`](@ref)
+
+```julia-repl
+julia> function f end
+f (generic function with 0 methods)
+
+julia> begin
+           @show (Int(Base.get_world_counter()), Int(Base.tls_world_age()))
+           Core.eval(@__MODULE__, :(f() = 1))
+           @show (Int(Base.get_world_counter()), Int(Base.tls_world_age()))
+           f()
+       end
+(Int(Base.get_world_counter()), Int(Base.tls_world_age())) = (38452, 38452)
+(Int(Base.get_world_counter()), Int(Base.tls_world_age())) = (38453, 38452)
+ERROR: MethodError: no method matching f()
+The applicable method may be too new: running in current world age 38452, while global world is 38453.
+
+Closest candidates are:
+  f() (method too new to be called from this world context.)
+   @ Main REPL[2]:3
+
+Stacktrace:
+ [1] top-level scope
+   @ REPL[2]:5
+
+julia> (f(), Int(Base.tls_world_age()))
+(1, 38453)
+```
+
+Here the definition of the method `f` raised the global world counter, but the current world
+age did not change. As a result, the definition of `f` was not visible in the currently
+executing task and a [`MethodError`](@ref) resulted.
+
+!!! note
+    The method error printing provided additional information that `f()` is available in a newer world age.
+    This information is added by the error display, not the task that threw the `MethodError`.
+    The thrown `MethodError` is identical whether or not a matching definition of `f()` exists
+    in a newer world age.
+
+However, note that the definition of `f()` was subsequently available at the next REPL prompt, because
+the current task's world age had been raised. In general, certain syntactic constructs (in particular most definitions)
+will raise the current task's world age to the latest global world age, thus making all changes
+(both from the current task and any concurrently executing other tasks) visible. The following statements
+raise the current world age:
+
+1. An explicit invocation of `Core.@latestworld`
+2. The start of every top-level statement
+3. The start of every REPL prompt
+4. Any type or struct definition
+5. Any method definition
+6. Any constant declaration
+7. Any global variable declaration (but not a global variable assignment)
+8. Any `using`, `import`, `export` or `public` statement
+9. Certain other macros like [`@eval`](@ref) (depends on the macro implementation)
+
+Note, however, that the current task's world age may only ever be permanently incremented at
+top level. As a general rule, using any of the above statements in non-top-level scope is a syntax error:
+
+```julia-repl
+julia> f() = Core.@latestworld
+ERROR: syntax: World age increment not at top level
+Stacktrace:
+ [1] top-level scope
+   @ REPL[5]:1
+```
+
+When it isn't (for example for `@eval`), the world age side effect is ignored.
+
+As a result of these rules, Julia may assume that the world age does not change
+within the execution of an ordinary function.
+
+```julia
+function my_function()
+    before = Base.tls_world_age()
+    # Any arbitrary code
+    after = Base.tls_world_age()
+    @assert before === after # always true
+end
+```
+
+This is the key invariant that allows Julia to optimize based on the current state
+of its global data structures, while still having the well-defined ability to change
+these data structures.
+
+## Temporarily raising the world age using `invokelatest`
+
+As described above, it is not possible to permanently raise the world age for the remainder of
+a `Task`'s execution unless the task is executing top-level statements. However, it is possible to
+temporarily change the world age in a scoped manner using `invokelatest`:
+
+```jldoctest
+julia> function f end
+f (generic function with 0 methods)
+
+julia> begin
+           Core.eval(@__MODULE__, :(f() = 1))
+           invokelatest(f)
+       end
+1
+```
+
+`invokelatest` will temporarily raise the current task's world age to the latest global world age (at
+entry to `invokelatest`) and execute the provided function. Note that the world age will return
+to its prior value upon exit from `invokelatest`.
+
+## World age and const struct redefinitions
+
+The semantics described above for method redefinition also apply to redefinition of constants:
+
+```jldoctest
+julia> const x = 1
+1
+
+julia> get_const() = x
+get_const (generic function with 1 method)
+
+julia> begin
+           @show get_const()
+           Core.eval(@__MODULE__, :(const x = 2))
+           @show get_const()
+           Core.@latestworld
+           @show get_const()
+       end
+get_const() = 1
+get_const() = 1
+get_const() = 2
+2
+```
+
+However, for the avoidance of doubt, they do not apply to ordinary assignment to global variables, which becomes visible immediately:
+```jldoctest
+julia> global y = 1
+1
+
+julia> get_global() = y
+get_global (generic function with 1 method)
+
+julia> begin
+           @show get_global()
+           Core.eval(@__MODULE__, :(y = 2))
+           @show get_global()
+       end
+get_global() = 1
+get_global() = 2
+2
+```
+
+One particular special case of constant reassignment is the redefinition of struct types:
+
+```jldoctest; filter = r"\@world\(MyStruct, \d+\:\d+\)"
+julia> struct MyStruct
+           x::Int
+       end
+
+julia> const one_field = MyStruct(1)
+MyStruct(1)
+
+julia> struct MyStruct
+           x::Int
+           y::Float64
+       end
+
+julia> const two_field = MyStruct(1, 2.0)
+MyStruct(1, 2.0)
+
+julia> one_field
+@world(MyStruct, 38452:38455)(1)
+
+julia> two_field
+MyStruct(1, 2.0)
+```
+
+Internally the two definitions of `MyStruct` are entirely separate types. However,
+after the new `MyStruct` type is defined, there is no longer any default binding
+for the original definition of `MyStruct`. To nevertheless facilitate access to
+these types, the special [`@world`](@ref) macro may be used to access the meaning
+of a name in a previous world. However, this facility is intended for introspection
+only and in particular note that world age numbers are not stable across precompilation
+and should in general be treated opaquely.
+
+### Binding partition introspection
+
+In certain cases, it can be helpful to introspect the system's understanding of what
+a binding means in any particular world age. The default display printing of `Core.Binding`
+provides a helpful summary (e.g. on the `MyStruct` example from above):
+
+```julia-repl
+julia> convert(Core.Binding, GlobalRef(@__MODULE__, :MyStruct))
+Binding Main.MyStruct
+   38456:∞ - constant binding to MyStruct
+   38452:38455 - constant binding to @world(MyStruct, 38452:38455)
+   38451:38451 - backdated constant binding to @world(MyStruct, 38452:38455)
+   0:38450 - backdated constant binding to @world(MyStruct, 38452:38455)
+```
+
+## World age and `using`/`import`
+
+Bindings provided via `using` and `import` also operate via the world age mechanism.
+Binding resolution is a stateless function of the `import` and `using` definitions
+visible in the current world age. For example:
+
+```julia-repl
+julia> module M1; const x = 1; export x; end
+
+julia> module M2; const x = 2; export x; end
+
+julia> using .M1
+
+julia> x
+1
+
+julia> using .M2
+
+julia> x
+ERROR: UndefVarError: `x` not defined in `Main`
+Hint: It looks like two or more modules export different bindings with this name, resulting in ambiguity. Try explicitly importing it from a particular module, or qualifying the name with the module it should come from.
+
+julia> convert(Core.Binding, GlobalRef(@__MODULE__, :x))
+Binding Main.x
+   38458:∞ - ambiguous binding - guard entry
+   38457:38457 - implicit `using` resolved to constant 1
+```
+
+## World age capture
+
+Certain language features capture the current task's world age. Perhaps the most common of
+these is creation of new tasks. Newly created tasks will inherit the creating task's local
+world age at creation time and will retain said world age (unless explicitly raised) even
+if the originating tasks raises its world age:
+
+```julia-repl
+julia> const x = 1
+
+julia> t = @task (wait(); println("Running now"); x);
+
+julia> const x = 2
+
+julia> schedule(t);
+Running now
+
+julia> x
+2
+
+julia> fetch(t)
+1
+```
+
+In addition to tasks, opaque closures also capture their world age at creation. See [`Base.Experimental.@opaque`](@ref).
+
+```@docs
+Base.@world
+Base.get_world_counter
+Base.tls_world_age
+Base.invoke_in_world
+Base.Experimental.@opaque
+```
diff --git a/julia.spdx.json b/julia.spdx.json
index bea7bdc6c3a5d..8664b2c653386 100644
--- a/julia.spdx.json
+++ b/julia.spdx.json
@@ -86,6 +86,18 @@
             "copyrightText": "Copyright (c) 2020 Stefan Karpinski <stefan@karpinski.org> and contributors",
             "summary": "ArgTools provides tools for creating consistent, flexible APIs that work with various kinds of function arguments."
         },
+        {
+            "name": "LinearAlgebra.jl",
+            "SPDXID": "SPDXRef-JuliaLinearAlgebra",
+            "downloadLocation": "git+https://github.com/JuliaLang/LinearAlgebra.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://juliastats.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/LinearAlgebra.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2009-2024: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors",
+            "summary": "Development repository for the LinearAlgebra standard library (stdlib) that ships with Julia."
+        },
         {
             "name": "Tar.jl",
             "SPDXID": "SPDXRef-JuliaTar",
@@ -219,16 +231,16 @@
             "summary": "libssh2 is a library implementing the SSH2 protocol, available under the revised BSD license."
         },
         {
-            "name": "mbedtls",
-            "SPDXID": "SPDXRef-mbedtls",
-            "downloadLocation": "git+https://github.com/ARMmbed/mbedtls.git",
+            "name": "OpenSSL",
+            "SPDXID": "SPDXRef-OpenSSL",
+            "downloadLocation": "git+https://github.com/openssl/openssl.git",
             "filesAnalyzed": false,
-            "homepage": "https://tls.mbed.org",
-            "sourceInfo": "The version in use can be found in the file deps/mbedtls.version",
+            "homepage": "https://www.openssl.org",
+            "sourceInfo": "The version in use can be found in the file deps/openssl.version",
             "licenseConcluded": "Apache-2.0",
             "licenseDeclared": "Apache-2.0",
-            "copyrightText": "NOASSERTION",
-            "summary": "An open source, portable, easy to use, readable and flexible SSL library."
+            "copyrightText": "Copyright (c) 1998-2024 The OpenSSL Project Authors. Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson.",
+            "summary": "OpenSSL is a robust, commercial-grade, full-featured Open Source Toolkit for the TLS (formerly SSL), DTLS and QUIC (currently client side only) protocols."
         },
         {
             "name": "mpfr",
@@ -370,6 +382,32 @@
             "copyrightText": "Copyright © 2014-2019 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.",
             "summary": "utf8proc is a small, clean C library that provides Unicode normalization, case-folding, and other operations for data in the UTF-8 encoding."
         },
+        {
+            "name": "LibTracyClient",
+            "SPDXID": "SPDXRef-LibTracyClient",
+            "downloadLocation": "git+https://github.com/wolfpld/tracy.git",
+            "filesAnalyzed": false,
+            "homepage": "https://github.com/wolfpld/tracy",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/libtracyclient.version",
+            "licenseConcluded": "BSD-3-Clause",
+            "licenseDeclared": "BSD-3-Clause",
+            "copyrightText": "Copyright (c) 2017-2024, Bartosz Taudul <wolf@nereid.pl>",
+            "summary": "A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.",
+            "comment": "LibTracyClient is an optional dependency that is not built by default"
+        },
+        {
+            "name": "ittapi",
+            "SPDXID": "SPDXRef-ittapi",
+            "downloadLocation": "git+https://github.com/intel/ittapi.git",
+            "filesAnalyzed": false,
+            "homepage": "https://github.com/intel/ittapi",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/ittapi.version",
+            "licenseConcluded": "BSD-3-Clause AND GPL-2.0-only",
+            "licenseDeclared": "BSD-3-Clause AND GPL-2.0-only",
+            "copyrightText": "Copyright (c) 2019 Intel Corporation",
+            "summary": "The Instrumentation and Tracing Technology (ITT) API enables your application to generate and control the collection of trace data during its execution across different Intel tools.",
+            "comment": "ITTAPI is an optional dependency that is not built by default"
+        },
         {
             "name": "7-Zip",
             "SPDXID": "SPDXRef-7zip",
@@ -394,6 +432,18 @@
             "copyrightText": "Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler",
             "summary": "A massively spiffy yet delicately unobtrusive compression library."
         },
+        {
+            "name": "zstd",
+            "SPDXID": "SPDXRef-zstd",
+            "downloadLocation": "git+https://github.com/facebook/zstd.git",
+            "filesAnalyzed": false,
+            "homepage": "https://www.zstd.net",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/zstd.version",
+            "licenseConcluded": "BSD-3-Clause",
+            "licenseDeclared": "GPL-2.0+ OR BSD-3-Clause",
+            "copyrightText": "Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.",
+            "summary": "Zstandard, or zstd as short version, is a fast lossless compression algorithm."
+        },
         {
             "name": "patchelf",
             "SPDXID": "SPDXRef-patchelf",
@@ -522,7 +572,7 @@
             "relatedSpdxElement": "SPDXRef-JuliaMain"
         },
         {
-            "spdxElementId": "SPDXRef-mbedtls",
+            "spdxElementId": "SPDXRef-OpenSSL",
             "relationshipType": "BUILD_DEPENDENCY_OF",
             "relatedSpdxElement": "SPDXRef-JuliaMain"
         },
@@ -581,6 +631,16 @@
             "relationshipType": "BUILD_DEPENDENCY_OF",
             "relatedSpdxElement": "SPDXRef-JuliaMain"
         },
+        {
+            "spdxElementId": "SPDXRef-LibTracyClient",
+            "relationshipType": "OPTIONAL_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-ittapi",
+            "relationshipType": "OPTIONAL_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
         {
             "spdxElementId": "SPDXRef-7zip",
             "relationshipType": "RUNTIME_DEPENDENCY_OF",
@@ -591,6 +651,11 @@
             "relationshipType": "BUILD_DEPENDENCY_OF",
             "relatedSpdxElement": "SPDXRef-JuliaMain"
         },
+        {
+            "spdxElementId": "SPDXRef-zstd",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
         {
             "spdxElementId": "SPDXRef-patchelf",
             "relationshipType": "BUILD_TOOL_OF",
diff --git a/pkgimage.mk b/pkgimage.mk
index 0803a188851bb..ed5e1095c0229 100644
--- a/pkgimage.mk
+++ b/pkgimage.mk
@@ -2,132 +2,37 @@ SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 BUILDDIR := .
 JULIAHOME := $(SRCDIR)
 include $(JULIAHOME)/Make.inc
+include $(JULIAHOME)/stdlib/stdlib.mk
 
-VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
+DEPOTDIR := $(build_prefix)/share/julia
 
 # set some influential environment variables
-export JULIA_DEPOT_PATH := $(build_prefix)/share/julia
-export JULIA_LOAD_PATH := @stdlib
+export JULIA_DEPOT_PATH := $(shell echo $(call cygpath_w,$(DEPOTDIR)))
+export JULIA_LOAD_PATH := @stdlib$(PATHSEP)$(shell echo $(call cygpath_w,$(JULIAHOME)/stdlib))
 unexport JULIA_PROJECT :=
 unexport JULIA_BINDIR :=
 
+export JULIA_FALLBACK_REPL := true
+
 default: release
-release: all-release
-debug: all-debug
+release: $(BUILDDIR)/stdlib/release.image
+debug: $(BUILDDIR)/stdlib/debug.image
 all: release debug
 
-$(JULIA_DEPOT_PATH):
+$(DEPOTDIR)/compiled:
 	mkdir -p $@
 
 print-depot-path:
 	@$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e '@show Base.DEPOT_PATH')
 
-STDLIBS := ArgTools Artifacts Base64 CRC32c FileWatching Libdl NetworkOptions SHA Serialization \
-		   GMP_jll LLVMLibUnwind_jll LibUV_jll LibUnwind_jll MbedTLS_jll OpenLibm_jll PCRE2_jll \
-		   Zlib_jll dSFMT_jll libLLVM_jll libblastrampoline_jll OpenBLAS_jll Printf Random Tar \
-		   LibSSH2_jll MPFR_jll LinearAlgebra Dates Distributed Future LibGit2 Profile SparseArrays UUIDs \
-		   SharedArrays TOML Test LibCURL Downloads Pkg Dates LazyArtifacts Sockets Unicode Markdown \
-		   InteractiveUtils REPL DelimitedFiles
-
-all-release: $(addprefix cache-release-, $(STDLIBS))
-all-debug:   $(addprefix cache-debug-, $(STDLIBS))
-
-define pkgimg_builder
-$1_SRCS := $$(shell find $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/src -name \*.jl) \
-    $$(wildcard $$(build_prefix)/manifest/$$(VERSDIR)/$1)
-$$(BUILDDIR)/stdlib/$1.release.image: $$($1_SRCS) $$(addsuffix .release.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys.$(SHLIB_EXT)
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
-	touch $$@
-cache-release-$1: $$(BUILDDIR)/stdlib/$1.release.image
-$$(BUILDDIR)/stdlib/$1.debug.image: $$($1_SRCS) $$(addsuffix .debug.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
-cache-debug-$1: $$(BUILDDIR)/stdlib/$1.debug.image
-.SECONDARY: $$(BUILDDIR)/stdlib/$1.release.image $$(BUILDDIR)/stdlib/$1.debug.image
-endef
-
-# Used to just define them in the dependency graph
-# reside in the system image
-define sysimg_builder
-$$(BUILDDIR)/stdlib/$1.release.image:
-	touch $$@
-cache-release-$1: $$(BUILDDIR)/stdlib/$1.release.image
-$$(BUILDDIR)/stdlib/$1.debug.image:
-	touch $$@
-cache-debug-$1: $$(BUILDDIR)/stdlib/$1.debug.image
-.SECONDARY: $$(BUILDDIR)/stdlib/$1.release.image $$(BUILDDIR)/stdlib/$1.debug.image
-endef
-
-# no dependencies
-$(eval $(call pkgimg_builder,MozillaCACerts_jll,))
-$(eval $(call sysimg_builder,ArgTools,))
-$(eval $(call sysimg_builder,Artifacts,))
-$(eval $(call sysimg_builder,Base64,))
-$(eval $(call sysimg_builder,CRC32c,))
-$(eval $(call sysimg_builder,FileWatching,))
-$(eval $(call sysimg_builder,Libdl,))
-$(eval $(call sysimg_builder,Logging,))
-$(eval $(call sysimg_builder,Mmap,))
-$(eval $(call sysimg_builder,NetworkOptions,))
-$(eval $(call sysimg_builder,SHA,))
-$(eval $(call sysimg_builder,Serialization,))
-$(eval $(call sysimg_builder,Sockets,))
-$(eval $(call sysimg_builder,Unicode,))
-$(eval $(call pkgimg_builder,Profile,))
-
-# 1-depth packages
-$(eval $(call pkgimg_builder,GMP_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,LLVMLibUnwind_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,LibUV_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,LibUnwind_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,MbedTLS_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,nghttp2_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,OpenLibm_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,PCRE2_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,Zlib_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,dSFMT_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,libLLVM_jll,Artifacts Libdl))
-$(eval $(call sysimg_builder,libblastrampoline_jll,Artifacts Libdl))
-$(eval $(call sysimg_builder,OpenBLAS_jll,Artifacts Libdl))
-$(eval $(call sysimg_builder,Markdown,Base64))
-$(eval $(call sysimg_builder,Printf,Unicode))
-$(eval $(call sysimg_builder,Random,SHA))
-$(eval $(call sysimg_builder,Tar,ArgTools,SHA))
-$(eval $(call pkgimg_builder,DelimitedFiles,Mmap))
-
-# 2-depth packages
-$(eval $(call pkgimg_builder,LLD_jll,Zlib_jll libLLVM_jll Artifacts Libdl))
-$(eval $(call pkgimg_builder,LibSSH2_jll,Artifacts Libdl MbedTLS_jll))
-$(eval $(call pkgimg_builder,MPFR_jll,Artifacts Libdl GMP_jll))
-$(eval $(call sysimg_builder,LinearAlgebra,Libdl libblastrampoline_jll OpenBLAS_jll))
-$(eval $(call sysimg_builder,Dates,Printf))
-$(eval $(call pkgimg_builder,Distributed,Random Serialization Sockets))
-$(eval $(call sysimg_builder,Future,Random))
-$(eval $(call sysimg_builder,InteractiveUtils,Markdown))
-$(eval $(call sysimg_builder,LibGit2,NetworkOptions Printf SHA Base64))
-$(eval $(call sysimg_builder,UUIDs,Random SHA))
-
- # 3-depth packages
- # LibGit2_jll
-$(eval $(call pkgimg_builder,LibCURL_jll,LibSSH2_jll nghttp2_jll MbedTLS_jll Zlib_jll Artifacts Libdl))
-$(eval $(call sysimg_builder,REPL,InteractiveUtils Markdown Sockets Unicode))
-$(eval $(call pkgimg_builder,SharedArrays,Distributed Mmap Random Serialization))
-$(eval $(call sysimg_builder,TOML,Dates))
-$(eval $(call pkgimg_builder,Test,Logging Random Serialization InteractiveUtils))
-
-# 4-depth packages
-$(eval $(call sysimg_builder,LibCURL,LibCURL_jll MozillaCACerts_jll))
-
-# 5-depth packages
-$(eval $(call sysimg_builder,Downloads,ArgTools FileWatching LibCURL NetworkOptions))
-
-# 6-depth packages
-$(eval $(call sysimg_builder,Pkg,Dates LibGit2 Libdl Logging Printf Random SHA UUIDs)) # Markdown REPL
+$(BUILDDIR)/stdlib/%.image: $(JULIAHOME)/stdlib/Project.toml $(JULIAHOME)/stdlib/Manifest.toml $(INDEPENDENT_STDLIBS_SRCS) $(DEPOTDIR)/compiled
+	@$(call PRINT_JULIA, JULIA_CPU_TARGET="sysimage" $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e \
+		'Base.Precompilation.precompilepkgs(configs=[``=>Base.CacheFlags(debug_level=2, opt_level=3), ``=>Base.CacheFlags(check_bounds=1, debug_level=2, opt_level=3)])')
+	touch $@
 
-# 7-depth packages
-$(eval $(call pkgimg_builder,LazyArtifacts,Artifacts Pkg))
+$(BUILDDIR)/stdlib/release.image: $(build_private_libdir)/sys.$(SHLIB_EXT)
+$(BUILDDIR)/stdlib/debug.image: $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
 
-$(eval $(call pkgimg_builder,SparseArrays,Libdl LinearAlgebra Random Serialization))
-# SuiteSparse_jll
-# Statistics
+clean:
+	rm -rf $(DEPOTDIR)/compiled
+	rm -f $(BUILDDIR)/stdlib/*.image
diff --git a/src/.clang-tidy b/src/.clang-tidy
new file mode 100644
index 0000000000000..c4a5137980bce
--- /dev/null
+++ b/src/.clang-tidy
@@ -0,0 +1,2 @@
+---
+Checks: -clang-analyzer-optin.performance.Padding
diff --git a/src/.gitignore b/src/.gitignore
index 4ddd75fbb5d62..70f3afe9eab15 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -10,6 +10,7 @@
 /julia_flisp.boot
 /julia_flisp.boot.inc
 /flisp.boot.inc
+/jl_data_globals_defs.inc
 /jl_internal_funcs.inc
 
 /libjulia-debug.a
@@ -24,5 +25,5 @@
 /base/
 
 # Clang compilation database
-/compile_commands*.json
+compile_commands*.json
 .clangd/
diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp
index f06d4362bf958..64b87a1096d44 100644
--- a/src/APInt-C.cpp
+++ b/src/APInt-C.cpp
@@ -7,16 +7,11 @@
 #include <llvm/Support/MathExtras.h>
 
 #include "APInt-C.h"
-#include "julia.h"
 #include "julia_assert.h"
 #include "julia_internal.h"
 
 using namespace llvm;
 
-inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
-    return alignTo(Value, Align, Skew);
-}
-
 const unsigned int integerPartWidth = llvm::APInt::APINT_BITS_PER_WORD;
 const unsigned int host_char_bit = 8;
 
@@ -25,15 +20,15 @@ const unsigned int host_char_bit = 8;
     APInt s; \
     if ((numbits % integerPartWidth) != 0) { \
         /* use LLT_ALIGN to round the memory area up to the nearest integerPart-sized chunk */ \
-        unsigned nbytes = RoundUpToAlignment(numbits, integerPartWidth) / host_char_bit; \
+        unsigned nbytes = alignTo(numbits, integerPartWidth) / host_char_bit; \
         integerPart *data_a64 = (integerPart*)alloca(nbytes); \
         /* TODO: this memcpy assumes little-endian,
          * for big-endian, need to align the copy to the other end */ \
-        memcpy(data_a64, p##s, RoundUpToAlignment(numbits, host_char_bit) / host_char_bit); \
-        s = APInt(numbits, makeArrayRef(data_a64, nbytes / sizeof(integerPart))); \
+        memcpy(data_a64, p##s, alignTo(numbits, host_char_bit) / host_char_bit); \
+        s = APInt(numbits, ArrayRef<uint64_t>(data_a64, nbytes / sizeof(integerPart))); \
     } \
     else { \
-        s = APInt(numbits, makeArrayRef(p##s, numbits / integerPartWidth)); \
+        s = APInt(numbits, ArrayRef<uint64_t>(p##s, numbits / integerPartWidth)); \
     }
 
 /* assign to "integerPart *pr" from "APInt a" */
@@ -47,7 +42,7 @@ const unsigned int host_char_bit = 8;
     else if (numbits <= 64) \
         *(uint64_t*)p##r = a.getZExtValue(); \
     else \
-        memcpy(p##r, a.getRawData(), RoundUpToAlignment(numbits, host_char_bit) / host_char_bit); \
+        memcpy(p##r, a.getRawData(), alignTo(numbits, host_char_bit) / host_char_bit); \
 
 extern "C" JL_DLLEXPORT
 void LLVMNeg(unsigned numbits, integerPart *pa, integerPart *pr) {
@@ -313,17 +308,25 @@ void LLVMByteSwap(unsigned numbits, integerPart *pa, integerPart *pr) {
     ASSIGN(r, a)
 }
 
-void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) {
+extern "C" float julia_half_to_float(uint16_t ival) JL_NOTSAFEPOINT;
+extern "C" uint16_t julia_float_to_half(float param) JL_NOTSAFEPOINT;
+extern "C" float julia_bfloat_to_float(uint16_t ival) JL_NOTSAFEPOINT;
+extern "C" uint16_t julia_float_to_bfloat(float param) JL_NOTSAFEPOINT;
+
+void LLVMFPtoInt(jl_datatype_t *ty, void *pa, jl_datatype_t *oty, integerPart *pr, bool isSigned, bool *isExact) {
     double Val;
-    if (numbits == 16)
-        Val = julia__gnu_h2f_ieee(*(uint16_t*)pa);
-    else if (numbits == 32)
+    if (ty == jl_float16_type)
+        Val = julia_half_to_float(*(uint16_t*)pa);
+    else if (ty == jl_bfloat16_type)
+        Val = julia_bfloat_to_float(*(uint16_t*)pa);
+    else if (ty == jl_float32_type)
         Val = *(float*)pa;
-    else if (numbits == 64)
+    else if (ty == jl_float64_type)
         Val = *(double*)pa;
     else
         jl_error("FPtoSI: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
+    unsigned onumbytes = jl_datatype_size(oty);
+    unsigned onumbits = onumbytes * host_char_bit;
     if (onumbits <= 64) { // fast-path, if possible
         if (isSigned) {
             int64_t ia = Val;
@@ -349,8 +352,8 @@ void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr,
     else {
         APFloat a(Val);
         bool isVeryExact;
-        APFloat::roundingMode rounding_mode = APFloat::rmNearestTiesToEven;
-        unsigned nbytes = RoundUpToAlignment(onumbits, integerPartWidth) / host_char_bit;
+        APFloat::roundingMode rounding_mode = RoundingMode::TowardZero;
+        unsigned nbytes = alignTo(onumbits, integerPartWidth) / host_char_bit;
         integerPart *parts = (integerPart*)alloca(nbytes);
         APFloat::opStatus status = a.convertToInteger(MutableArrayRef<integerPart>(parts, nbytes), onumbits, isSigned, rounding_mode, &isVeryExact);
         memcpy(pr, parts, onumbytes);
@@ -360,69 +363,78 @@ void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr,
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMFPtoSI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    LLVMFPtoInt(numbits, pa, onumbits, pr, true, NULL);
+void LLVMFPtoSI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
+    LLVMFPtoInt(ty, pa, oty, pr, true, NULL);
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMFPtoUI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    LLVMFPtoInt(numbits, pa, onumbits, pr, false, NULL);
+void LLVMFPtoUI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
+    LLVMFPtoInt(ty, pa, oty, pr, false, NULL);
 }
 
 extern "C" JL_DLLEXPORT
-int LLVMFPtoSI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+int LLVMFPtoSI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     bool isExact;
-    LLVMFPtoInt(numbits, pa, onumbits, pr, true, &isExact);
+    LLVMFPtoInt(ty, pa, oty, pr, true, &isExact);
     return isExact;
 }
 
 extern "C" JL_DLLEXPORT
-int LLVMFPtoUI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+int LLVMFPtoUI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     bool isExact;
-    LLVMFPtoInt(numbits, pa, onumbits, pr, false, &isExact);
+    LLVMFPtoInt(ty, pa, oty, pr, false, &isExact);
     return isExact;
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+void LLVMSItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     double val;
     { // end scope before jl_error call
+        unsigned numbytes = jl_datatype_size(ty);
+        unsigned numbits = numbytes * host_char_bit;
         CREATE(a)
         val = a.roundToDouble(true);
     }
-    if (onumbits == 16)
-        *(uint16_t*)pr = julia__gnu_f2h_ieee(val);
-    else if (onumbits == 32)
+    if (oty == jl_float16_type)
+        *(uint16_t*)pr = julia_float_to_half(val);
+    else if (oty == jl_bfloat16_type)
+        *(uint16_t*)pr = julia_float_to_bfloat(val);
+    else if (oty == jl_float32_type)
         *(float*)pr = val;
-    else if (onumbits == 64)
+    else if (oty == jl_float64_type)
         *(double*)pr = val;
     else
         jl_error("SItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+void LLVMUItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     double val;
     { // end scope before jl_error call
+        unsigned numbytes = jl_datatype_size(ty);
+        unsigned numbits = numbytes * host_char_bit;
         CREATE(a)
         val = a.roundToDouble(false);
     }
-    if (onumbits == 16)
-        *(uint16_t*)pr = julia__gnu_f2h_ieee(val);
-    else if (onumbits == 32)
+    if (oty == jl_float16_type)
+        *(uint16_t*)pr = julia_float_to_half(val);
+    else if (oty == jl_bfloat16_type)
+        *(uint16_t*)pr = julia_float_to_bfloat(val);
+    else if (oty == jl_float32_type)
         *(float*)pr = val;
-    else if (onumbits == 64)
+    else if (oty == jl_float64_type)
         *(double*)pr = val;
     else
         jl_error("UItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMSExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    if (!(onumbits > inumbits))
+void LLVMSExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerPart *pr) {
+    unsigned inumbytes = jl_datatype_size(ty);
+    unsigned onumbytes = jl_datatype_size(otys);
+    if (!(onumbytes > inumbytes))
         jl_error("SExt: output bitsize must be > input bitsize");
-    unsigned inumbytes = RoundUpToAlignment(inumbits, host_char_bit) / host_char_bit;
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
+    unsigned inumbits = inumbytes * host_char_bit;
     int bits = (0 - inumbits) % host_char_bit;
     int signbit = (inumbits - 1) % host_char_bit;
     int sign = ((unsigned char*)pa)[inumbytes - 1] & (1 << signbit) ? -1 : 0;
@@ -437,11 +449,12 @@ void LLVMSExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMZExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    if (!(onumbits > inumbits))
+void LLVMZExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerPart *pr) {
+    unsigned inumbytes = jl_datatype_size(ty);
+    unsigned onumbytes = jl_datatype_size(otys);
+    if (!(onumbytes > inumbytes))
         jl_error("ZExt: output bitsize must be > input bitsize");
-    unsigned inumbytes = RoundUpToAlignment(inumbits, host_char_bit) / host_char_bit;
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
+    unsigned inumbits = inumbytes * host_char_bit;
     int bits = (0 - inumbits) % host_char_bit;
     // copy over the input bytes
     memcpy(pr, pa, inumbytes);
@@ -454,31 +467,32 @@ void LLVMZExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMTrunc(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    if (!(onumbits < inumbits))
+void LLVMTrunc(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerPart *pr) {
+    unsigned inumbytes = jl_datatype_size(ty);
+    unsigned onumbytes = jl_datatype_size(otys);
+    if (!(onumbytes < inumbytes))
         jl_error("Trunc: output bitsize must be < input bitsize");
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
     memcpy(pr, pa, onumbytes);
 }
 
 extern "C" JL_DLLEXPORT
-unsigned countTrailingZeros_8(uint8_t Val) {
-    return countTrailingZeros(Val);
+unsigned countr_zero_8(uint8_t Val) {
+    return countr_zero(Val);
 }
 
 extern "C" JL_DLLEXPORT
-unsigned countTrailingZeros_16(uint16_t Val) {
-    return countTrailingZeros(Val);
+unsigned countr_zero_16(uint16_t Val) {
+    return countr_zero(Val);
 }
 
 extern "C" JL_DLLEXPORT
-unsigned countTrailingZeros_32(uint32_t Val) {
-    return countTrailingZeros(Val);
+unsigned countr_zero_32(uint32_t Val) {
+    return countr_zero(Val);
 }
 
 extern "C" JL_DLLEXPORT
-unsigned countTrailingZeros_64(uint64_t Val) {
-    return countTrailingZeros(Val);
+unsigned countr_zero_64(uint64_t Val) {
+    return countr_zero(Val);
 }
 
 extern "C" JL_DLLEXPORT
@@ -510,31 +524,31 @@ void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integer
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa) {
+unsigned LLVMPopcount(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countPopulation();
+    return a.popcount();
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountTrailingOnes(unsigned numbits, integerPart *pa) {
+unsigned LLVMCountr_one(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countTrailingOnes();
+    return a.countr_one();
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa) {
+unsigned LLVMCountr_zero(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countTrailingZeros();
+    return a.countr_zero();
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa) {
+unsigned LLVMCountl_one(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countLeadingOnes();
+    return a.countl_one();
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa) {
+unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countLeadingZeros();
+    return a.countl_zero();
 }
diff --git a/src/APInt-C.h b/src/APInt-C.h
index e71d49e82e99a..59ce3c765eeec 100644
--- a/src/APInt-C.h
+++ b/src/APInt-C.h
@@ -3,12 +3,15 @@
 #ifndef JL_APINT_C_H
 #define JL_APINT_C_H
 
+#include "julia.h"
+#include "dtypes.h"
+#include "llvm-version.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
-#include "dtypes.h"
 
-#ifdef LLVM_VERSION_MAJOR
+#if defined(__cplusplus) && defined(LLVM_VERSION_MAJOR)
 using integerPart = llvm::APInt::WordType;
 #else
 typedef void integerPart;
@@ -51,30 +54,30 @@ JL_DLLEXPORT int LLVMDiv_uov(unsigned numbits, integerPart *pa, integerPart *pb,
 JL_DLLEXPORT int LLVMRem_sov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 JL_DLLEXPORT int LLVMRem_uov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 
-JL_DLLEXPORT unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa);
-JL_DLLEXPORT unsigned LLVMCountTrailingOnes(unsigned numbits, integerPart *pa);
-JL_DLLEXPORT unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa);
-JL_DLLEXPORT unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa);
-JL_DLLEXPORT unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMPopcount(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountr_one(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountr_zero(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountl_one(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa);
 
-JL_DLLEXPORT void LLVMFPtoSI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMFPtoUI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMSExt(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMZExt(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMTrunc(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
+JL_DLLEXPORT void LLVMFPtoSI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMFPtoUI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMSItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMUItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMSExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMZExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMTrunc(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
 
-JL_DLLEXPORT int LLVMFPtoSI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT int LLVMFPtoUI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
+JL_DLLEXPORT int LLVMFPtoSI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT int LLVMFPtoUI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
 
 JL_DLLEXPORT void jl_LLVMSMod(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 JL_DLLEXPORT void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 
-JL_DLLEXPORT unsigned countTrailingZeros_8(uint8_t Val);
-JL_DLLEXPORT unsigned countTrailingZeros_16(uint16_t Val);
-JL_DLLEXPORT unsigned countTrailingZeros_32(uint32_t Val);
-JL_DLLEXPORT unsigned countTrailingZeros_64(uint64_t Val);
+JL_DLLEXPORT unsigned countr_zero_8(uint8_t Val);
+JL_DLLEXPORT unsigned countr_zero_16(uint16_t Val);
+JL_DLLEXPORT unsigned countr_zero_32(uint32_t Val);
+JL_DLLEXPORT unsigned countr_zero_64(uint64_t Val);
 
 //uint8_t getSwappedBytes_8(uint8_t Value); // no-op
 //uint16_t getSwappedBytes_16(uint16_t Value);
diff --git a/src/Makefile b/src/Makefile
index 9e34dfda1c4ed..a8e498586e9cc 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,80 +4,109 @@ BUILDDIR := .
 include $(JULIAHOME)/Make.inc
 include $(JULIAHOME)/deps/llvm-ver.make
 
-JCFLAGS += $(CFLAGS)
-JCXXFLAGS += $(CXXFLAGS)
-JCPPFLAGS += $(CPPFLAGS)
-JLDFLAGS += $(LDFLAGS)
+# Users may set CFLAGS (affects dependencies) or JL_CFLAGS (does not affect dependencies) for customizations to the build
+JCFLAGS_COMMON += $(CFLAGS) $(JL_CFLAGS)
+JCXXFLAGS_COMMON += $(CXXFLAGS) $(JL_CXXFLAGS)
+JCPPFLAGS_COMMON += $(CPPFLAGS) $(JL_CPPFLAGS)
+JLDFLAGS += $(LDFLAGS) $(JL_LDFLAGS)
 
 # -I BUILDDIR comes before -I SRCDIR so that the user can override <options.h> on a per-build-directory basis
 #  for gcc/clang, suggested content is:
 #  #include_next <options.h>
 #  #define ARGUMENT_TO_OVERRIDE 1
-FLAGS := \
+FLAGS_COMMON := \
 	-D_GNU_SOURCE -I$(BUILDDIR) -I$(SRCDIR) \
 	-I$(SRCDIR)/flisp -I$(SRCDIR)/support \
 	-I$(LIBUV_INC) -I$(build_includedir) \
 	-I$(JULIAHOME)/deps/valgrind
-FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \
+FLAGS_COMMON += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \
 		 -Wno-comment -Wpointer-arith -Wundef
-ifeq ($(USEGCC),1) # GCC bug #25509 (void)__attribute__((warn_unused_result))
-FLAGS += -Wno-unused-result
-endif
-JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
 
-ifeq ($(USECLANG),1)
-FLAGS += -Wno-return-type-c-linkage
+# GCC-specific flags
+FLAGS_GCC  :=
+# GCC bug #25509 (void)__attribute__((warn_unused_result))
+FLAGS_GCC += -Wno-unused-result
+# GCC is throwing warnings like `warning: 'int __builtin_memcmp_eq(const void*, const void*, long unsigned int)' specified bound 18446744073709551615 exceeds maximum object size 9223372036854775807` in llvm's StringRef.h == seemingly because it doesn't realise the size can't be 0.
+FLAGS_GCC += -Wno-stringop-overflow
+FLAGS_GCC += -Wno-stringop-overread
+
+# Required to be allowed to use nullability extension and not be required to annotate all of everything
+JCFLAGS_CLANG += -Wno-atomic-alignment -Wno-nullability-extension -Wno-nullability-completeness
+JCXXFLAGS_CLANG += -Wno-return-type-c-linkage -Wno-atomic-alignment -Wno-nullability-extension -Wno-nullability-completeness
+JCFLAGS_COMMON += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
+
+# Add common flags that apply to both compilers
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+FLAGS_COMMON += -I$(MMTK_API_INC)
 endif
 
-FLAGS += -DJL_BUILD_ARCH='"$(ARCH)"'
+FLAGS_COMMON += -DJL_BUILD_ARCH='"$(ARCH)"'
 ifeq ($(OS),WINNT)
-FLAGS += -DJL_BUILD_UNAME='"NT"'
+FLAGS_COMMON += -DJL_BUILD_UNAME='"NT"'
 else
-FLAGS += -DJL_BUILD_UNAME='"$(OS)"'
+FLAGS_COMMON += -DJL_BUILD_UNAME='"$(OS)"'
 endif
 
 ifeq ($(OS),FreeBSD)
-FLAGS += -I$(LOCALBASE)/include
+FLAGS_COMMON += -I$(LOCALBASE)/include
+endif
+
+# GC source code. It depends on which GC implementation to use.
+GC_SRCS := gc-common gc-stacks gc-alloc-profiler gc-heap-snapshot
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+GC_SRCS += gc-mmtk
+else
+GC_SRCS += gc-stock gc-debug gc-pages gc-page-profiler
 endif
 
 SRCS := \
 	jltypes gf typemap smallintset ast builtins module interpreter symbol \
-	dlload sys init task array staticdata toplevel jl_uv datatype \
-	simplevector runtime_intrinsics precompile jloptions \
-	threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
-	jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
-	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall
+	dlload sys init task array genericmemory staticdata toplevel jl_uv datatype \
+	simplevector runtime_intrinsics precompile jloptions mtarraylist \
+	threading scheduler stackwalk null_sysimage \
+	method jlapi signal-handling safepoint timing subtype rtutils \
+	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall engine \
+	$(GC_SRCS)
 
 RT_LLVMLINK :=
 CG_LLVMLINK :=
 
 ifeq ($(JULIACODEGEN),LLVM)
-CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
-	llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \
-	llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
+# Currently these files are used by both GCs. But we should make the list specific to stock, and MMTk should have its own implementation.
+GC_CODEGEN_SRCS := llvm-final-gc-lowering llvm-late-gc-lowering llvm-gc-invariant-verifier
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+FLAGS_COMMON += -I$(MMTK_API_INC)
+GC_CODEGEN_SRCS += llvm-late-gc-lowering-mmtk
+else
+GC_CODEGEN_SRCS += llvm-late-gc-lowering-stock
+endif
+CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop \
+	llvm-pass-helpers llvm-ptls llvm-propagate-addrspaces \
 	llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \
-	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline llvm_api
-FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
+	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures llvm-expand-atomic-modify \
+	pipeline llvm_api \
+	$(GC_CODEGEN_SRCS)
+FLAGS_COMMON += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
 CG_LLVM_LIBS := all
 ifeq ($(USE_POLLY),1)
 CG_LLVMLINK += -lPolly -lPollyISL
-FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --src-root)/tools/polly/include
-FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --obj-root)/tools/polly/include
-FLAGS += -DUSE_POLLY
+FLAGS_COMMON += -I$(shell $(LLVM_CONFIG_HOST) --src-root)/tools/polly/include
+FLAGS_COMMON += -I$(shell $(LLVM_CONFIG_HOST) --obj-root)/tools/polly/include
+FLAGS_COMMON += -DUSE_POLLY
 ifeq ($(USE_POLLY_OPENMP),1)
-FLAGS += -fopenmp
+FLAGS_COMMON += -fopenmp
 endif
 ifeq ($(USE_POLLY_ACC),1)
 CG_LLVMLINK += -lPollyPPCG -lGPURuntime
-FLAGS += -DUSE_POLLY_ACC
-FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --src-root)/tools/polly/tools # Required to find GPURuntime/GPUJIT.h
+FLAGS_COMMON += -DUSE_POLLY_ACC
+FLAGS_COMMON += -I$(shell $(LLVM_CONFIG_HOST) --src-root)/tools/polly/tools # Required to find GPURuntime/GPUJIT.h
 endif
 endif
 else
 # JULIACODEGEN != LLVM
 endif
 
-RT_LLVM_LIBS := support
+RT_LLVM_LIBS := support targetparser
 
 ifeq ($(OS),WINNT)
 SRCS += win32_ucontext
@@ -99,16 +128,26 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
 UV_HEADERS += uv.h
 UV_HEADERS += uv/*.h
 endif
-PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
+PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h jl_exported_data.inc)
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+	PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-mmtk.h gc-wb-mmtk.h)
+else
+	PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-stock.h gc-wb-stock.h)
+endif
 ifeq ($(OS),WINNT)
 PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
 endif
-HEADERS := $(PUBLIC_HEADERS) $(addprefix $(SRCDIR)/,julia_internal.h options.h timing.h passes.h) $(addprefix $(BUILDDIR)/,$(DTRACE_HEADERS) jl_internal_funcs.inc)
+HEADERS := $(PUBLIC_HEADERS) $(addprefix $(SRCDIR)/,julia_internal.h options.h timing.h passes.h jl_internal_data.inc) $(addprefix $(BUILDDIR)/,$(DTRACE_HEADERS) jl_internal_funcs.inc jl_data_globals_defs.inc)
 PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,julia_gcext.h)
 PUBLIC_HEADER_TARGETS := $(addprefix $(build_includedir)/julia/,$(notdir $(PUBLIC_HEADERS)) $(UV_HEADERS))
 
 LLVM_LDFLAGS := $(shell $(LLVM_CONFIG_HOST) --ldflags)
 LLVM_CXXFLAGS := $(shell $(LLVM_CONFIG_HOST) --cxxflags)
+LLVM_CFLAGS := $(shell $(LLVM_CONFIG_HOST) --cflags)
+
+ifeq ($(OS)_$(BINARY),WINNT_32)
+LLVM_CXXFLAGS += -I$(SRCDIR)/support/win32-clang-ABI-bug
+endif
 
 # llvm-config --cxxflags does not return -DNDEBUG
 ifeq ($(shell $(LLVM_CONFIG_HOST) --assertion-mode),OFF)
@@ -119,8 +158,9 @@ ifeq ($(JULIACODEGEN),LLVM)
 ifneq ($(USE_SYSTEM_LLVM),0)
 # USE_SYSTEM_LLVM != 0
 CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs)
-LLVM_SHLIB_SYMBOL_VERSION := $(shell nm -D --with-symbol-versions $(shell $(LLVM_CONFIG_HOST) --libfiles --link-shared | awk '{print $1; exit}') | \
-                               grep _ZN4llvm3Any6TypeId | head -n 1 | sed -e 's/.*@//')
+LLVM_SHLIB_SYMBOL_VERSION := $(shell readelf -W --dyn-syms $(shell $(LLVM_CONFIG_HOST) --libfiles --link-shared | awk '{print $1; exit}') | \
+                               grep _ZN4llvm3Any6TypeId | head -n 1 | sed -ne 's/.*@//p')
+CG_LLVMLINK += -lz -lzstd
 
 # HACK: llvm-config doesn't correctly point to shared libs on all platforms
 #       https://github.com/JuliaLang/julia/issues/29981
@@ -136,15 +176,18 @@ CG_LLVMLINK += $(LLVM_LDFLAGS) -lLLVM
 else
 CG_LLVMLINK += $(LLVM_LDFLAGS) $(LLVM_SHARED_LINK_FLAG)
 endif # OS
+CG_LLVMLINK += -lz -lzstd
 endif # USE_LLVM_SHLIB
 endif # USE_SYSTEM_LLVM
 
 ifeq ($(USE_LLVM_SHLIB),1)
-FLAGS += -DLLVM_SHLIB
+FLAGS_COMMON += -DLLVM_SHLIB
 endif # USE_LLVM_SHLIB == 1
 endif # JULIACODEGEN == LLVM
 
-RT_LLVM_LINK_ARGS := $(shell $(LLVM_CONFIG_HOST) --libs $(RT_LLVM_LIBS) --system-libs --link-static)
+# Use subst to work around llvm-configure bug in Yggdrasil build: https://github.com/llvm/llvm-project/pull/139945
+CG_LLVMLINK := $(subst /workspace/destdir/lib/libzstd.dll.a,-lzstd,$(CG_LLVMLINK))
+RT_LLVM_LINK_ARGS := $(subst /workspace/destdir/lib/libzstd.dll.a,-lzstd,$(shell $(LLVM_CONFIG_HOST) --libs $(RT_LLVM_LIBS) --system-libs --link-static))
 RT_LLVMLINK += $(LLVM_LDFLAGS) $(RT_LLVM_LINK_ARGS)
 ifeq ($(OS), WINNT)
 RT_LLVMLINK += -luuid -lole32
@@ -160,11 +203,27 @@ LIBJULIA_PATH_REL := libjulia
 endif
 
 COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir)
-RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI)
+RT_LIBS := $(call whole_archive,$(LIBUV)) $(call whole_archive,$(LIBUTF8PROC)) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) -lzstd
+# NB: CG needs uv_mutex_* symbols, but we expect to export them from libjulia-internal
 CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI)
-RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS)
+
+ifeq ($(USEGCC),1)
+ifeq ($(USE_RT_STATIC_LIBGCC),1)
+RT_LIBS += -static-libgcc
+endif
+ifeq ($(USE_RT_STATIC_LIBSTDCXX),1)
+RT_LIBS += -static-libstdc++
+endif
+endif
+
+ifeq (${USE_THIRD_PARTY_GC},mmtk)
+RT_LIBS += $(MMTK_LIB)
+CG_LIBS += $(MMTK_LIB)
+endif
+
+RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(call whole_archive,$(BUILDDIR)/flisp/libflisp-debug.a) $(call whole_archive,$(BUILDDIR)/support/libsupport-debug.a) -ljulia-debug $(RT_LIBS)
 CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
-RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS)
+RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(call whole_archive,$(BUILDDIR)/flisp/libflisp.a) $(call whole_archive,$(BUILDDIR)/support/libsupport.a) -ljulia $(RT_LIBS)
 CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal
 
 OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
@@ -173,18 +232,23 @@ DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
 CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o)
 CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj)
 
+ifeq ($(OS)_$(BINARY),WINNT_32)
+OBJS += $(BUILDDIR)/llvm-Compression.o
+DOBJS += $(BUILDDIR)/llvm-Compression.dbg.obj
+CODEGEN_OBJS += $(BUILDDIR)/llvm-Compression.o
+CODEGEN_DOBJS += $(BUILDDIR)/llvm-Compression.dbg.obj
+endif
+
 # Add SONAME defines so we can embed proper `dlopen()` calls.
-ADDL_SHIPFLAGS  := "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\"" \
-                   "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL).$(JL_MAJOR_SHLIB_EXT)\""
-ADDL_DEBUGFLAGS := "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)\"" \
-                   "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL)-debug.$(JL_MAJOR_SHLIB_EXT)\""
-
-SHIPFLAGS         += $(FLAGS) $(ADDL_SHIPFLAGS)
-DEBUGFLAGS        += $(FLAGS) $(ADDL_DEBUGFLAGS)
-SHIPFLAGS_GCC     += $(FLAGS) $(ADDL_SHIPFLAGS)
-DEBUGFLAGS_GCC    += $(FLAGS) $(ADDL_DEBUGFLAGS)
-SHIPFLAGS_CLANG   += $(FLAGS) $(ADDL_SHIPFLAGS)
-DEBUGFLAGS_CLANG  += $(FLAGS) $(ADDL_DEBUGFLAGS)
+SHIPFLAGS_COMMON  += -DJL_SYSTEM_IMAGE_PATH=$(call shell_escape,$(call c_escape,$(call normalize_path,$(build_private_libdir_rel)/sys.$(SHLIB_EXT)))) \
+                     -DJL_LIBJULIA_SONAME=$(call shell_escape,$(call c_escape,$(LIBJULIA_PATH_REL).$(JL_MAJOR_SHLIB_EXT)))
+DEBUGFLAGS_COMMON += -DJL_SYSTEM_IMAGE_PATH=$(call shell_escape,$(call c_escape,$(call normalize_path,$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)))) \
+                     -DJL_LIBJULIA_SONAME=$(call shell_escape,$(call c_escape,$(LIBJULIA_PATH_REL)-debug.$(JL_MAJOR_SHLIB_EXT)))
+
+SHIPFLAGS_COMMON  += $(FLAGS_COMMON)
+DEBUGFLAGS_COMMON += $(FLAGS_COMMON)
+SHIPFLAGS_GCC  += $(FLAGS_GCC)
+DEBUGFLAGS_GCC += $(FLAGS_GCC)
 
 ifeq ($(USE_CROSS_FLISP), 1)
 FLISPDIR := $(BUILDDIR)/flisp/host
@@ -201,10 +265,14 @@ else
 FLISP_EXECUTABLE := $(FLISP_EXECUTABLE_$(JULIA_BUILD_MODE))
 endif
 
+.PHONY: default
 default: $(JULIA_BUILD_MODE) # contains either "debug" or "release"
+
+.PHONY: all
 all: debug release
 
-release debug: %: libjulia-internal-% libjulia-codegen-%
+.PHONY: release debug
+release debug: %: libjulia-internal-% libjulia-codegen-% regenerate-compile_commands
 
 $(BUILDDIR):
 	mkdir -p $(BUILDDIR)
@@ -223,17 +291,24 @@ $(BUILDDIR)/jl_internal_funcs.inc: $(SRCDIR)/jl_exported_funcs.inc
 	# to have a `ijl_` prefix instead of `jl_`, to denote that they are coming from `libjulia-internal`.  This avoids
 	# potential confusion with debugging tools, when inspecting a process that has both `libjulia` and `libjulia-internal`
 	# loaded at the same time.
-	grep 'XX(.\+)' $< | sed -E 's/.*XX\((.+)\).*/#define \1 i\1/g' >$@
+	grep 'XX(..*)' $< | sed -E 's/.*XX\((.+)\).*/#define \1 i\1/g' >$@
+
+$(BUILDDIR)/jl_data_globals_defs.inc: $(SRCDIR)/jl_exported_data.inc
+	# Generate `.inc` file that contains a list of `#define` macros to access global data through struct instances
+	{ \
+		grep 'XX(.*)' $< | sed -E 's/.*XX\(([^,]+), .*\).*/#define jl_\1 (sysimg_global.\1)/g'; \
+		grep 'YY(.*)' $< | sed -E 's/.*YY\(([^,]+), .*\).*/#define jl_\1 (const_globals.jl\1)/g'; \
+	} >$@
 
 # source file rules
 $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) $(LLVM_CFLAGS) $(SHIPFLAGS) $(JCPPFLAGS) $(JCFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) $(LLVM_CFLAGS) $(DEBUGFLAGS) $(JCPPFLAGS) $(JCFLAGS) -c $< -o $@)
 $(BUILDDIR)/%.o: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONFIG_ABSOLUTE) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(SHIPFLAGS) $(CXX_DISABLE_ASSERTION) -c $< -o $@)
+	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(SHIPFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(CXX_DISABLE_ASSERTION) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONFIG_ABSOLUTE) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -c $< -o $@)
+	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(DEBUGFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) -c $< -o $@)
 $(BUILDDIR)/%.o : $(SRCDIR)/%.d
 	@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
 $(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d
@@ -251,7 +326,13 @@ $(eval $(call dir_target,$(build_includedir)/julia/uv))
 $(build_includedir)/julia/uv/*.h: $(LIBUV_INC)/uv/*.h | $(build_includedir)/julia/uv
 	$(INSTALL_F) $^ $(build_includedir)/julia/uv
 
+.PHONY: libccalltest
 libccalltest: $(build_shlibdir)/libccalltest.$(SHLIB_EXT)
+.PHONY: libccalllazyfoo
+libccalllazyfoo: $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT)
+.PHONY: libccalllazybar
+libccalllazybar: $(build_shlibdir)/libccalllazybar.$(SHLIB_EXT)
+.PHONY: libllvmcalltest
 libllvmcalltest: $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT)
 
 ifeq ($(OS), Linux)
@@ -260,7 +341,7 @@ else
 JULIA_SPLITDEBUG := 0
 endif
 $(build_shlibdir)/libccalltest.$(SHLIB_EXT): $(SRCDIR)/ccalltest.c
-	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@.tmp $(LDFLAGS))
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JCPPFLAGS) $(FLAGS_COMMON) -O3 $< $(fPIC) -shared -o $@.tmp $(LDFLAGS))
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@.tmp
 ifeq ($(JULIA_SPLITDEBUG),1)
 	@# Create split debug info file for libccalltest stacktraces test
@@ -276,9 +357,16 @@ endif
 	mv $@.tmp $@
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@
 
+$(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT): $(SRCDIR)/ccalllazyfoo.c
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JCPPFLAGS) $(FLAGS_COMMON) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazyfoo.$(SHLIB_EXT)))
+
+$(build_shlibdir)/libccalllazybar.$(SHLIB_EXT): $(SRCDIR)/ccalllazybar.c $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT)
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JCPPFLAGS) $(FLAGS_COMMON) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazybar.$(SHLIB_EXT)) -lccalllazyfoo)
+
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvmcalltest.cpp $(LLVM_CONFIG_ABSOLUTE)
-	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(FLAGS) $(CPPFLAGS) $(CXXFLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(NO_WHOLE_ARCHIVE) $(CG_LLVMLINK)) -lpthread
+	@$(call PRINT_CC, $(CXX) $(JCXXFLAGS) $(LLVM_CXXFLAGS) $(FLAGS_COMMON) $(CPPFLAGS) $(CXXFLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(CG_LLVMLINK)) -lpthread
 
+.PHONY: julia_flisp.boot.inc.phony
 julia_flisp.boot.inc.phony: $(BUILDDIR)/julia_flisp.boot.inc
 
 $(BUILDDIR)/julia_flisp.boot.inc: $(BUILDDIR)/julia_flisp.boot $(FLISP_EXECUTABLE_release)
@@ -291,34 +379,36 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
 		$(call cygpath_w,$(SRCDIR)/mk_julia_flisp_boot.scm) $(call cygpath_w,$(dir $<)) $(notdir $<) $(call cygpath_w,$@))
 
 # additional dependency links
-$(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(SRCDIR)/intrinsics.h
+$(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(addprefix $(SRCDIR)/,intrinsics.h llvm-julia-passes.inc)
 $(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
-$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/builtin_proto.h
+$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/idset.c $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
 	intrinsics.cpp jitlayers.h intrinsics.h llvm-codegen-shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h)
 $(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
 $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
 $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
-$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
-$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h
-$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-mmtk.o $(BUILDDIR)/gc-mmtk.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-stacks.o $(BUILDDIR)/gc-stacks.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-stock.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h
+$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc-heap-snapshot.h
+$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-page-profiler.o $(BUILDDIR)/gc-page-profiler.dbg.obj: $(SRCDIR)/gc-page-profiler.h
 $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h
-$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-julia-task-dispatcher.h
 $(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvm-codegen-shared.h $(BUILDDIR)/julia_version.h
 $(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
 $(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
 $(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h
 $(BUILDDIR)/llvm-demote-float16.o $(BUILDDIR)/llvm-demote-float16.dbg.obj: $(SRCDIR)/jitlayers.h
-$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h
 $(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-alloc-helpers.h $(SRCDIR)/llvm-pass-helpers.h
-$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
-$(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h
 $(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/llvm-propagate-addrspaces.o $(BUILDDIR)/llvm-propagate-addrspaces.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
@@ -328,10 +418,11 @@ $(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,pr
 $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
 $(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/staticdata_utils.c $(SRCDIR)/precompile_utils.c $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h
-$(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h
-$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h
+$(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h $(SRCDIR)/common_symbols1.inc $(SRCDIR)/common_symbols2.inc
+$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(addprefix $(SRCDIR)/,passes.h jitlayers.h llvm-julia-passes.inc)
+$(BUILDDIR)/llvm_api.o $(BUILDDIR)/llvm_api.dbg.obj: $(SRCDIR)/llvm-julia-passes.inc
 
-$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
+$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc-common.o gc-stock.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
 
 # archive library file rules
@@ -368,18 +459,18 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
 
 CXXLD = $(CXX) -shared
 
-$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION $(LLVM_CONFIG_ABSOLUTE)
 	sed <'$<' >'$@' -e "s/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/" \
 		        -e "s/@LLVM_SHLIB_SYMBOL_VERSION@/$(LLVM_SHLIB_SYMBOL_VERSION)/"
 
 $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV)
-	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(SHIPFLAGS) $(JCXXFLAGS) $(CXXLDFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \
+		$(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
 $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV)
-	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(DEBUGFLAGS) $(JCXXFLAGS) $(CXXLDFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \
 		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
@@ -401,13 +492,13 @@ libjulia-internal-debug: $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MI
 libjulia-internal-debug libjulia-internal-release: $(PUBLIC_HEADER_TARGETS)
 
 $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT)
-	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(SHIPFLAGS) $(JCXXFLAGS) $(CXXLDFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \
+		$(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-codegen.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
 $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_DOBJS) $(BUILDDIR)/support/libsupport-debug.a $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
-	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(CODEGEN_DOBJS) $(RPATH_LIB) -o $@ \
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(DEBUGFLAGS) $(JCXXFLAGS) $(CXXLDFLAGS) $(CODEGEN_DOBJS) $(RPATH_LIB) -o $@ \
 		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-codegen-debug.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
@@ -427,24 +518,76 @@ libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINO
 libjulia-codegen-debug libjulia-codegen-release: $(PUBLIC_HEADER_TARGETS)
 
 # set the exports for the source files based on where they are getting linked
-$(OBJS): SHIPFLAGS += -DJL_LIBRARY_EXPORTS_INTERNAL
-$(DOBJS): DEBUGFLAGS += -DJL_LIBRARY_EXPORTS_INTERNAL
-$(CODEGEN_OBJS): SHIPFLAGS += -DJL_LIBRARY_EXPORTS_CODEGEN
-$(CODEGEN_DOBJS): DEBUGFLAGS += -DJL_LIBRARY_EXPORTS_CODEGEN
-
+$(OBJS) $(DOBJS): FLAGS_COMMON += -DJL_LIBRARY_EXPORTS_INTERNAL -DBUILDING_UV_SHARED
+$(CODEGEN_OBJS) $(CODEGEN_DOBJS): FLAGS_COMMON += -DJL_LIBRARY_EXPORTS_CODEGEN -DUSING_UV_SHARED
+# set the exports for the source files based on where they are getting linked
+$(addprefix clang-sa-,$(SRCS)):      FLAGS_COMMON += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(addprefix clang-sagc-,$(SRCS)):    FLAGS_COMMON += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(addprefix clang-tidy-,$(SRCS)):    FLAGS_COMMON += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(addprefix clang-sa-,$(CODEGEN_SRCS)):   FLAGS_COMMON += -DJL_LIBRARY_EXPORTS_CODEGEN
+$(addprefix clang-sagc-,$(CODEGEN_SRCS)): FLAGS_COMMON += -DJL_LIBRARY_EXPORTS_CODEGEN
+$(addprefix clang-tidy-,$(CODEGEN_SRCS)): FLAGS_COMMON += -DJL_LIBRARY_EXPORTS_CODEGEN
+
+# Common flag patterns for all clang tooling (clang-sa, clang-tidy, compile-database)
+CLANG_TOOLING_C_FLAGS = $(CLANGSA_FLAGS) $(LLVM_CFLAGS) $(DEBUGFLAGS_CLANG) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG)
+CLANG_TOOLING_CXX_FLAGS = $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(DEBUGFLAGS_CLANG) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG)
+
+# Additional headers from dependency links, categorized by usage, for adding as fake compiler targets.
+# Adding these as compiler inputs doesn't always work (due to missing includes), but if we don't add them, they won't work at all.
+ADDITIONAL_C_HEADERS := builtin_proto.h common_symbols1.inc common_symbols2.inc gc-alloc-profiler.h \
+	gc-common.h gc-heap-snapshot.h gc-page-profiler.h gc-stock.h serialize.h \
+	support/htable.h support/htable.inc
+ADDITIONAL_CXX_HEADERS := debug-registry.h debuginfo.h intrinsics.h jitlayers.h llvm-alloc-helpers.h \
+	llvm-codegen-shared.h llvm-gc-interface-passes.h llvm-julia-passes.inc \
+	llvm-julia-task-dispatcher.h llvm-pass-helpers.h passes.h processor.h
+
+# Included files that should be compiled using their including file's compiler settings
+INCLUDED_C_FILES := \
+	builtins.c:iddict.c \
+	builtins.c:idset.c \
+	crc32c.c:crc32c-tables.c \
+	signal-handling.c:signals-win.c \
+	signal-handling.c:signals-unix.c \
+	signal-handling.c:mach_excServer.c \
+	signal-handling.c:signals-mach.c \
+	staticdata.c:staticdata_utils.c \
+	staticdata.c:precompile_utils.c
+INCLUDED_CXX_FILES := \
+	codegen.cpp:abi_llvm.cpp \
+	codegen.cpp:abi_arm.cpp \
+	codegen.cpp:abi_aarch64.cpp \
+	codegen.cpp:abi_riscv.cpp \
+	codegen.cpp:abi_ppc64le.cpp \
+	codegen.cpp:abi_win32.cpp \
+	codegen.cpp:abi_win64.cpp \
+	codegen.cpp:abi_x86_64.cpp \
+	codegen.cpp:abi_x86.cpp \
+	codegen.cpp:cgutils.cpp \
+	codegen.cpp:intrinsics.cpp \
+	codegen.cpp:ccall.cpp \
+	processor.cpp:processor_x86.cpp \
+	processor.cpp:processor_arm.cpp \
+	processor.cpp:processor_fallback.cpp
+
+.PHONY: clean
 clean:
-	-rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest*
-	-rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc
+	-rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen*
+	-rm -rf $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* $(build_shlibdir)/libccalllazyfoo* $(build_shlibdir)/libccalllazybar*
+	-rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc $(BUILDDIR)/jl_data_globals_defs.inc
 	-rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen
 	-rm -f $(BUILDDIR)/julia.expmap
 	-rm -f $(BUILDDIR)/julia_version.h
+	-rm -f $(BUILDDIR)/compile_commands.json*
 
+.PHONY: clean-flisp
 clean-flisp:
 	-$(MAKE) -C $(SRCDIR)/flisp clean BUILDDIR='$(abspath $(BUILDDIR)/flisp)'
 
+.PHONY: clean-support
 clean-support:
 	-$(MAKE) -C $(SRCDIR)/support clean BUILDDIR='$(abspath $(BUILDDIR)/support)'
 
+.PHONY: cleanall
 cleanall: clean clean-flisp clean-support clean-analyzegc
 
 $(build_shlibdir)/lib%Plugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/%.cpp $(LLVM_CONFIG_ABSOLUTE)
@@ -456,16 +599,20 @@ $(build_shlibdir)/lib%Plugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/%.cpp $(LLVM_CONFIG
 # before attempting this static analysis, so that all necessary headers
 # and dependencies are properly installed:
 #   make -C src install-analysis-deps
-ANALYSIS_DEPS := llvm clang llvm-tools libuv utf8proc
+ANALYSIS_DEPS := llvm clang llvm-tools libuv utf8proc zstd
 ifeq ($(OS),Darwin)
 ANALYSIS_DEPS += llvmunwind
+else ifeq ($(OS),OpenBSD)
+ANALYSIS_DEPS += llvmunwind
 else ifneq ($(OS),WINNT)
 ANALYSIS_DEPS += unwind
 endif
+.PHONY: install-analysis-deps
 install-analysis-deps:
 	$(MAKE) -C $(JULIAHOME)/deps $(addprefix install-,$(ANALYSIS_DEPS))
 
-analyzegc-deps-check: $(BUILDDIR)/julia_version.h $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc
+.PHONY: analyzegc-deps-check
+analyzegc-deps-check: $(addprefix $(BUILDDIR)/,julia_version.h julia_flisp.boot.inc jl_internal_funcs.inc jl_data_globals_defs.inc)
 ifeq ($(USE_BINARYBUILDER_LLVM),0)
 ifneq ($(BUILD_LLVM_CLANG),1)
 	$(error Clang must be available to use the clang analyzer. Either build it (BUILD_LLVM_CLANG=1) or use BinaryBuilder)
@@ -481,56 +628,133 @@ SA_EXCEPTIONS-jloptions.c                   := -Xanalyzer -analyzer-config -Xana
 SA_EXCEPTIONS-subtype.c                     := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.uninitialized.Assign;core.UndefinedBinaryOperatorResult"
 SA_EXCEPTIONS-codegen.c                     := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core"
  # these need to be annotated (and possibly fixed)
-SKIP_IMPLICIT_ATOMICS := staticdata.c
- # these need to be annotated (and possibly fixed)
 SKIP_GC_CHECK := codegen.cpp rtutils.c
 
 # make sure LLVM's invariant information is not discarded with -DNDEBUG
-clang-sagc-%: JL_CXXFLAGS += -UNDEBUG
+clang-sagc-%: JCXXFLAGS_COMMON += -UNDEBUG
 clang-sagc-%: $(SRCDIR)/%.c $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \
 		-Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \
 		$(SA_EXCEPTIONS-$(notdir $<)) \
-		$(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -x c $<)
+		$(CLANG_TOOLING_C_FLAGS) -fcolor-diagnostics -x c $<)
 clang-sagc-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \
 		-Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \
 		$(SA_EXCEPTIONS-$(notdir $<)) \
-		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -x c++ $<)
+		$(CLANG_TOOLING_CXX_FLAGS) -fcolor-diagnostics -x c++ $<)
 
-clang-sa-%: JL_CXXFLAGS += -UNDEBUG
+clang-sa-%: JCXXFLAGS_COMMON += -UNDEBUG
 clang-sa-%: $(SRCDIR)/%.c .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \
 		-Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \
 		$(SA_EXCEPTIONS-$(notdir $<)) \
-		$(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -Werror -x c $<)
+		$(CLANG_TOOLING_C_FLAGS) -fcolor-diagnostics -Werror -x c $<)
 clang-sa-%: $(SRCDIR)/%.cpp .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \
 		-Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \
 		$(SA_EXCEPTIONS-$(notdir $<)) \
-		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -Werror -x c++ $<)
+		$(CLANG_TOOLING_CXX_FLAGS) -fcolor-diagnostics -Werror -x c++ $<)
 
 clang-tidy-%: $(SRCDIR)/%.c $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \
 		-load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \
-		-- $(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -fno-caret-diagnostics -x c)
+		-- $(CLANG_TOOLING_C_FLAGS) -fcolor-diagnostics -fno-caret-diagnostics -x c)
 clang-tidy-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \
 		-load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \
-		-- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++)
-
-# set the exports for the source files based on where they are getting linked
-clang-sa-% clang-sagc-% clang-tidy-%: DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS
+		-- $(CLANG_TOOLING_CXX_FLAGS) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++)
 
 # Add C files as a target of `analyzesrc` and `analyzegc` and `tidysrc`
-tidysrc: $(addprefix clang-tidy-,$(filter-out $(basename $(SKIP_IMPLICIT_ATOMICS)),$(CODEGEN_SRCS) $(SRCS)))
+.PHONY: tidysrc
+tidysrc: $(addprefix clang-tidy-,$(CODEGEN_SRCS) $(SRCS))
+.PHONY: analyzesrc
 analyzesrc: $(addprefix clang-sa-,$(CODEGEN_SRCS) $(SRCS))
+.PHONY: analyzegc
 analyzegc: $(addprefix clang-sagc-,$(filter-out $(basename $(SKIP_GC_CHECK)),$(CODEGEN_SRCS) $(SRCS)))
+.PHONY: analyze
 analyze: analyzesrc analyzegc tidysrc
 
+$(addprefix analyze-,$(CODEGEN_SRCS) $(SRCS)) : analyze-% : clang-sa-%
+$(addprefix analyze-,$(CODEGEN_SRCS) $(SRCS)) : analyze-% : clang-tidy-%
+$(addprefix analyze-,$(filter-out $(basename $(SKIP_GC_CHECK)),$(CODEGEN_SRCS) $(SRCS))) : analyze-% : clang-sagc-%
+
+.PHONY: clean-analyzegc
 clean-analyzegc:
 	rm -f $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT)
 	rm -f $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT)
 
+# Compilation database generation using existing clang infrastructure
+.PHONY: regenerate-compile_commands
+regenerate-compile_commands:
+	@TMPFILE=$$(mktemp $(abspath $(BUILDDIR)/compile_commands.json.XXXXXX)); \
+	{ \
+		CLANG_TOOLING_C_FLAGS="$$($(JULIAHOME)/contrib/escape_json.sh clang $(CLANG_TOOLING_C_FLAGS))"; \
+		CLANG_TOOLING_CXX_FLAGS="$$($(JULIAHOME)/contrib/escape_json.sh clang $(CLANG_TOOLING_CXX_FLAGS))"; \
+		echo "["; \
+		first=true; \
+		for src in $(SRCS); do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			if [ -f $(SRCDIR)/$$src.cpp ]; then \
+				cmd="$${CLANG_TOOLING_CXX_FLAGS}, \"-DJL_LIBRARY_EXPORTS_INTERNAL\", \"$$src.cpp\""; \
+				file_path="$$src.cpp"; \
+			else \
+				cmd="$${CLANG_TOOLING_C_FLAGS}, \"-DJL_LIBRARY_EXPORTS_INTERNAL\", \"$$src.c\""; \
+				file_path="$$src.c"; \
+			fi; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$file_path" "$$cmd"; \
+		done; \
+		for src in $(CODEGEN_SRCS); do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			if [ -f $(SRCDIR)/$$src.cpp ]; then \
+				cmd="$${CLANG_TOOLING_CXX_FLAGS}, \"-DJL_LIBRARY_EXPORTS_CODEGEN\", \"$$src.cpp\""; \
+				file_path="$$src.cpp"; \
+			else \
+				cmd="$${CLANG_TOOLING_C_FLAGS}, \"-DJL_LIBRARY_EXPORTS_CODEGEN\", \"$$src.c\""; \
+				file_path="$$src.c"; \
+			fi; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$file_path" "$$cmd"; \
+		done; \
+		for header in $(HEADERS) $(ADDITIONAL_C_HEADERS); do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			file_path="$$header"; \
+			cmd="$${CLANG_TOOLING_C_FLAGS}, \"-DJL_LIBRARY_EXPORTS_INTERNAL\", \"-x\", \"c-header\", \"$$file_path\""; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$file_path" "$$cmd"; \
+		done; \
+		for header in $(ADDITIONAL_CXX_HEADERS); do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			file_path="$$header"; \
+			cmd="$${CLANG_TOOLING_CXX_FLAGS}, \"-DJL_LIBRARY_EXPORTS_CODEGEN\", \"-x\", \"c++-header\", \"$$file_path\""; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$file_path" "$$cmd"; \
+		done; \
+		for included_pair in $(INCLUDED_C_FILES); do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			including_file=$${included_pair%%:*}; \
+			included_file=$${included_pair##*:}; \
+			case " $(CODEGEN_SRCS) " in *" $${including_file%.c} "*) export_flag="-DJL_LIBRARY_EXPORTS_CODEGEN";; *) export_flag="-DJL_LIBRARY_EXPORTS_INTERNAL";; esac; \
+			cmd="$${CLANG_TOOLING_C_FLAGS}, \"$$export_flag\", \"$$including_file\""; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$included_file" "$$cmd"; \
+		done; \
+		for included_pair in $(INCLUDED_CXX_FILES); do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			including_file=$${included_pair%%:*}; \
+			included_file=$${included_pair##*:}; \
+			case " $(CODEGEN_SRCS) " in *" $${including_file%.cpp} "*) export_flag="-DJL_LIBRARY_EXPORTS_CODEGEN";; *) export_flag="-DJL_LIBRARY_EXPORTS_INTERNAL";; esac; \
+			cmd="$${CLANG_TOOLING_CXX_FLAGS}, \"$$export_flag\", \"$$including_file\""; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$included_file" "$$cmd"; \
+		done; \
+		echo "]"; \
+	} > $$TMPFILE; \
+	if ! cmp -s $$TMPFILE $(BUILDDIR)/compile_commands.json; then \
+		mv $$TMPFILE $(BUILDDIR)/compile_commands.json; \
+	else \
+		rm -f $$TMPFILE; \
+	fi
+
+.PHONY: compile-database
+compile-database: regenerate-compile_commands
+	$(MAKE) -C $(SRCDIR)/flisp compile-database BUILDDIR='$(abspath $(BUILDDIR)/flisp)'
+	$(MAKE) -C $(SRCDIR)/support compile-database BUILDDIR='$(abspath $(BUILDDIR)/support)'
+	@echo "Compilation database created for src"
+
 .FORCE:
-.PHONY: default all debug release clean cleanall clean-* libccalltest libllvmcalltest julia_flisp.boot.inc.phony analyzegc analyzesrc .FORCE
+.PHONY: .FORCE
diff --git a/src/abi_aarch64.cpp b/src/abi_aarch64.cpp
index 514c3c5a81a6d..0a193ee132556 100644
--- a/src/abi_aarch64.cpp
+++ b/src/abi_aarch64.cpp
@@ -16,7 +16,7 @@ struct ABI_AArch64Layout : AbiLayout {
 Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields > 0`
     if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
         return nullptr;
     size_t nfields = dt->layout->nfields;
@@ -62,7 +62,7 @@ Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields == 0`
     Type *lltype;
     // Check size first since it's cheaper.
     switch (jl_datatype_size(dt)) {
@@ -88,7 +88,7 @@ Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    if (dt->name->mutabl || dt->layout->npointers || dt->layout->haspadding)
+    if (dt->name->mutabl || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
         return nullptr;
     return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
 }
@@ -184,7 +184,7 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
     // uniquely addressable members.
     // Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
     size_t dsz = jl_datatype_size(dt);
-    if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->haspadding)
+    if (dsz > 64 || !dt->layout || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
         return NULL;
     nele = 0;
     ElementType eltype;
diff --git a/src/abi_arm.cpp b/src/abi_arm.cpp
index 441aa95b1fdf6..8839a37da6e13 100644
--- a/src/abi_arm.cpp
+++ b/src/abi_arm.cpp
@@ -82,7 +82,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
     if (jl_is_structtype(dt)) {
         // Fast path checks before descending the type hierarchy
         // (4 x 128b vector == 64B max size)
-        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || dt->layout->haspadding)
+        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
             return 0;
 
         base = NULL;
diff --git a/src/abi_ppc64le.cpp b/src/abi_ppc64le.cpp
index 2e18acdbd4f4b..f02e1022ddc2d 100644
--- a/src/abi_ppc64le.cpp
+++ b/src/abi_ppc64le.cpp
@@ -44,7 +44,7 @@ struct ABI_PPC64leLayout : AbiLayout {
 // count the homogeneous floating aggregate size (saturating at max count of 8)
 unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
 {
-    if (jl_datatype_size(ty) > 128 || ty->layout->npointers || ty->layout->haspadding)
+    if (jl_datatype_size(ty) > 128 || ty->layout->npointers || !ty->layout->flags.isbitsegal || ty->layout->flags.haspadding)
         return 9;
 
     size_t i, l = ty->layout->nfields;
@@ -118,7 +118,12 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *T
 Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     // Arguments are either scalar or passed by value
-    size_t size = jl_datatype_size(dt);
+
+    // LLVM passes Float16 in floating-point registers, but this doesn't match the ABI.
+    // No C compiler seems to support _Float16 yet, so in the meantime, pass as i16
+    if (dt == jl_float16_type || dt == jl_bfloat16_type)
+        return Type::getInt16Ty(ctx);
+
     // don't need to change bitstypes
     if (!jl_datatype_nfields(dt))
         return NULL;
@@ -143,6 +148,7 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const
     }
     // rewrite integer-sized (non-HFA) struct to an array
     // the bitsize of the integer gives the desired alignment
+    size_t size = jl_datatype_size(dt);
     if (size > 8) {
         if (jl_datatype_align(dt) <= 8) {
             Type  *T_int64 = Type::getInt64Ty(ctx);
diff --git a/src/abi_riscv.cpp b/src/abi_riscv.cpp
new file mode 100644
index 0000000000000..cbd85892801c8
--- /dev/null
+++ b/src/abi_riscv.cpp
@@ -0,0 +1,315 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+//===----------------------------------------------------------------------===//
+//
+// The ABI implementation used for RISC-V targets.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Procedure Call Standard can be found here:
+// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
+//
+// This code is based on:
+// - The Rust implementation:
+//    https://github.com/rust-lang/rust/blob/master/compiler/rustc_target/src/abi/call/riscv.rs
+// - The LLVM RISC-V backend:
+//   https://github.com/llvm/llvm-project/blob/78533528cf5ed04ac78722afff7c9f2f91aa8359/llvm/lib/Target/RISCV/RISCVISelLowering.cpp#L10865
+//
+//===----------------------------------------------------------------------===//
+
+
+struct ABI_RiscvLayout : AbiLayout {
+
+static const size_t XLen = 8;
+static const size_t FLen = 8;
+static const int NumArgGPRs = 8;
+static const int NumArgFPRs = 8;
+
+// available register num is needed to determine if fp pair or int-fp pair in a struct should be unpacked
+// WARN: with this, use_sret must only be called once before the next
+// needPassByRef call, otherwise avail_gprs is wrong
+int avail_gprs, avail_fprs;
+
+// preferred type is determined in the same time of use_sret & needPassByRef
+// cache it here to avoid computing it again in preferred_llvm_type
+Type *cached_llvmtype = NULL;
+
+ABI_RiscvLayout() : avail_gprs(NumArgGPRs), avail_fprs(NumArgFPRs) {}
+
+enum RegPassKind { UNKNOWN = 0, INTEGER = 1, FLOAT = 2 };
+
+struct ElementType {
+    RegPassKind type;
+    jl_datatype_t *dt;
+    ElementType() : type(RegPassKind::UNKNOWN), dt(NULL) {};
+};
+
+bool is_floattype(jl_datatype_t *dt) const
+{
+    return dt == jl_float16_type || dt == jl_float32_type || dt == jl_float64_type;
+}
+
+Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
+{
+    assert(is_floattype(dt));
+    switch (jl_datatype_size(dt)) {
+    case 2: return Type::getHalfTy(ctx);
+    case 4: return Type::getFloatTy(ctx);
+    case 8: return Type::getDoubleTy(ctx);
+    case 16: return Type::getFP128Ty(ctx);
+    default: assert(0 && "abi_riscv: unsupported floating point type"); return NULL;
+    }
+}
+
+// for primitive types that can be passed as integer
+// includes integer, bittypes, pointer
+Type *get_llvm_inttype(jl_datatype_t *dt, LLVMContext &ctx) const
+{
+    assert(jl_is_primitivetype(dt));
+    // XXX: without Zfh, Float16 is passed in integer registers
+    if (dt == jl_float16_type)
+        return Type::getInt32Ty(ctx);
+    assert(!is_floattype(dt));
+    if (dt == jl_bool_type)
+        return getInt8Ty(ctx);
+    if (dt == jl_int32_type)
+        return getInt32Ty(ctx);
+    if (dt == jl_int64_type)
+        return getInt64Ty(ctx);
+    int nb = jl_datatype_size(dt);
+    return Type::getIntNTy(ctx, nb * 8);
+}
+
+bool should_use_fp_conv(jl_datatype_t *dt, ElementType &ele1, ElementType &ele2) const
+{
+    if (jl_is_primitivetype(dt)) {
+        size_t dsz = jl_datatype_size(dt);
+        if (dsz > FLen) {
+            return false;
+        }
+        if (is_floattype(dt)) {
+            if (ele1.type == RegPassKind::UNKNOWN) {
+                ele1.type = RegPassKind::FLOAT;
+                ele1.dt = dt;
+            }
+            else if (ele2.type == RegPassKind::UNKNOWN) {
+                ele2.type = RegPassKind::FLOAT;
+                ele2.dt = dt;
+            }
+            else {
+                // 3 elements not eligible, must be a pair
+                return false;
+            }
+        }
+        // integer or pointer type or bitstypes
+        else {
+            if (ele1.type == RegPassKind::UNKNOWN) {
+                ele1.type = RegPassKind::INTEGER;
+                ele1.dt = dt;
+            }
+            else if (ele1.type == RegPassKind::INTEGER) {
+                // two integers not eligible
+                return false;
+            }
+            // ele1.type == RegPassKind::FLOAT
+            else {
+                if (ele2.type == RegPassKind::UNKNOWN) {
+                    ele2.type = RegPassKind::INTEGER;
+                    ele2.dt = dt;
+                }
+                else {
+                    // 3 elements not eligible, must be a pair
+                    return false;
+                }
+            }
+        }
+    }
+    else { // aggregates
+        while (size_t nfields = jl_datatype_nfields(dt)) {
+            size_t i;
+            size_t fieldsz;
+            for (i = 0; i < nfields; i++) {
+                if ((fieldsz = jl_field_size(dt, i))) {
+                    break;
+                }
+            }
+            assert(i < nfields);
+            // If there's only one non zero sized member, try again on this member
+            if (fieldsz == jl_datatype_size(dt)) {
+                dt = (jl_datatype_t *)jl_field_type(dt, i);
+                if (!jl_is_datatype(dt)) // could be inline union #46787
+                    return false;
+                continue;
+            }
+            for (; i < nfields; i++) {
+                size_t fieldsz = jl_field_size(dt, i);
+                if (fieldsz == 0)
+                    continue;
+                jl_datatype_t *fieldtype = (jl_datatype_t *)jl_field_type(dt, i);
+                if (!jl_is_datatype(dt)) // could be inline union
+                    return false;
+                // This needs to be done after the zero size member check
+                if (ele2.type != RegPassKind::UNKNOWN) {
+                    // we already have a pair and can't accept more elements
+                    return false;
+                }
+                if (!should_use_fp_conv(fieldtype, ele1, ele2)) {
+                    return false;
+                }
+            }
+            break;
+        }
+    }
+    // Tuple{Int,} can reach here as well, but doesn't really hurt
+    return true;
+}
+
+Type *get_llvm_inttype_byxlen(size_t xlen, LLVMContext &ctx) const
+{
+    if (xlen == 8) {
+        return getInt64Ty(ctx);
+    }
+    else if (xlen == 4) {
+        return getInt32Ty(ctx);
+    }
+    else {
+        assert(0 && "abi_riscv: unsupported xlen");
+        return NULL;
+    }
+}
+
+Type *classify_arg(jl_datatype_t *ty, int &avail_gprs, int &avail_fprs, bool &onstack,
+                   LLVMContext &ctx) const
+{
+    onstack = false;
+    if (ty == jl_nothing_type) {
+        return NULL;
+    }
+    ElementType ele1, ele2;
+    if (should_use_fp_conv(ty, ele1, ele2)) {
+        if (ele1.type == RegPassKind::FLOAT) {
+            if (ele2.type == RegPassKind::FLOAT) {
+                if (avail_fprs >= 2) {
+                    avail_fprs -= 2;
+                    SmallVector<Type *, 2> eles;
+                    eles.push_back(get_llvm_fptype(ele1.dt, ctx));
+                    eles.push_back(get_llvm_fptype(ele2.dt, ctx));
+                    return StructType::get(ctx, eles);
+                }
+            }
+            else if (ele2.type == RegPassKind::INTEGER) {
+                if (avail_fprs >= 1 && avail_gprs >= 1) {
+                    avail_fprs -= 1;
+                    avail_gprs -= 1;
+                    SmallVector<Type *, 2> eles;
+                    eles.push_back(get_llvm_fptype(ele1.dt, ctx));
+                    eles.push_back(get_llvm_inttype(ele2.dt, ctx));
+                    return StructType::get(ctx, eles);
+                }
+            }
+            else {
+                // A struct containing just one floating-point real is passed
+                // as though it were a standalone floating-point real.
+                if (avail_fprs >= 1) {
+                    avail_fprs -= 1;
+                    return get_llvm_fptype(ele1.dt, ctx);
+                }
+            }
+        }
+        else if (ele1.type == RegPassKind::INTEGER) {
+            if (ele2.type == RegPassKind::FLOAT) {
+                if (avail_fprs >= 1 && avail_gprs >= 1) {
+                    avail_fprs -= 1;
+                    avail_gprs -= 1;
+                    return StructType::get(get_llvm_inttype(ele1.dt, ctx),
+                                           get_llvm_fptype(ele2.dt, ctx));
+                }
+            }
+        }
+    }
+    size_t dsz = jl_datatype_size(ty);
+    if (dsz > 2 * XLen) {
+        if (!jl_is_primitivetype(ty)) {
+            onstack = true;
+        }
+        // else let llvm backend handle scalars
+        if (avail_gprs >= 1) {
+            avail_gprs -= 1;
+        }
+        return NULL;
+    }
+
+    if (dsz > XLen) {
+        size_t alignment = jl_datatype_align(ty);
+        bool align_regs = alignment > XLen;
+        if (avail_gprs >= 2) {
+            avail_gprs -= 2;
+        }
+        // should we handle variadic as well?
+        // Variadic arguments with 2×XLEN-bit alignment and size at most 2×XLEN
+        // bits are passed in an aligned register pair
+        else {
+            avail_gprs = 0;
+        }
+
+        if (!jl_is_primitivetype(ty)) {
+            // Aggregates or scalars passed on the stack are aligned to the
+            // greater of the type alignment and XLen bits, but never more than
+            // the stack alignment.
+            if (align_regs) {
+                if (alignment == 16) {
+                    return Type::getInt128Ty(ctx);
+                }
+                else {
+                    return Type::getInt64Ty(ctx);
+                }
+            }
+            else {
+                return ArrayType::get(get_llvm_inttype_byxlen(XLen, ctx), 2);
+            }
+        }
+        // let llvm backend handle scalars
+        return NULL;
+    }
+
+    //else dsz <= XLen
+    if (avail_gprs >= 1) {
+        avail_gprs -= 1;
+    }
+    if (!jl_is_primitivetype(ty)) {
+        return get_llvm_inttype_byxlen(XLen, ctx);
+    }
+    return get_llvm_inttype(ty, ctx);
+}
+
+bool use_sret(jl_datatype_t *ty, LLVMContext &ctx) override
+{
+    bool onstack = false;
+    int gprs = 2;
+    int fprs = FLen ? 2 : 0;
+    this->cached_llvmtype = classify_arg(ty, gprs, fprs, onstack, ctx);
+    if (onstack) {
+        this->avail_gprs -= 1;
+        return true;
+    }
+    else {
+        return false;
+    }
+}
+
+bool needPassByRef(jl_datatype_t *ty, AttrBuilder &ab, LLVMContext &ctx,
+                   Type *Ty) override
+{
+    bool onstack = false;
+    this->cached_llvmtype =
+        classify_arg(ty, this->avail_gprs, this->avail_fprs, onstack, ctx);
+    return onstack;
+}
+
+Type *preferred_llvm_type(jl_datatype_t *ty, bool isret,
+                          LLVMContext &ctx) const override
+{
+    return this->cached_llvmtype;
+}
+
+};
diff --git a/src/abi_win32.cpp b/src/abi_win32.cpp
index 078d9b6df4e44..ccfc6a16ebee3 100644
--- a/src/abi_win32.cpp
+++ b/src/abi_win32.cpp
@@ -52,7 +52,7 @@ bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     // Use pass by reference for all structs
-    if (dt->layout->nfields > 0) {
+    if (dt->layout->nfields > 0 || dt->layout->npointers) {
         ab.addByValAttr(Ty);
         return true;
     }
@@ -63,7 +63,7 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const
 {
     // Arguments are either scalar or passed by value
     // rewrite integer sized (non-sret) struct to the corresponding integer
-    if (!dt->layout->nfields)
+    if (!dt->layout->nfields && !dt->layout->npointers)
         return NULL;
     return Type::getIntNTy(ctx, jl_datatype_nbits(dt));
 }
diff --git a/src/abi_x86_64.cpp b/src/abi_x86_64.cpp
index c3d12417e6de8..6a853421dbccd 100644
--- a/src/abi_x86_64.cpp
+++ b/src/abi_x86_64.cpp
@@ -118,7 +118,8 @@ struct Classification {
 void classifyType(Classification& accum, jl_datatype_t *dt, uint64_t offset) const
 {
     // Floating point types
-    if (dt == jl_float64_type || dt == jl_float32_type) {
+    if (dt == jl_float64_type || dt == jl_float32_type || dt == jl_float16_type ||
+        dt == jl_bfloat16_type) {
         accum.addField(offset, Sse);
     }
     // Misc types
@@ -147,7 +148,7 @@ void classifyType(Classification& accum, jl_datatype_t *dt, uint64_t offset) con
         accum.addField(offset, Sse);
     }
     // Other struct types
-    else if (jl_datatype_size(dt) <= 16 && dt->layout) {
+    else if (jl_datatype_size(dt) <= 16 && dt->layout && !jl_is_layout_opaque(dt->layout)) {
         size_t i;
         for (i = 0; i < jl_datatype_nfields(dt); ++i) {
             jl_value_t *ty = jl_field_type(dt, i);
@@ -239,7 +240,9 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const
                 types[0] = Type::getIntNTy(ctx, nbits);
             break;
         case Sse:
-            if (size <= 4)
+            if (size <= 2)
+                types[0] = Type::getHalfTy(ctx);
+            else if (size <= 4)
                 types[0] = Type::getFloatTy(ctx);
             else
                 types[0] = Type::getDoubleTy(ctx);
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 1f02a014175b4..fe8b97a052f03 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -4,7 +4,8 @@
 #include "platform.h"
 
 // target support
-#include <llvm/ADT/Triple.h>
+#include <llvm/TargetParser/Triple.h>
+#include "llvm/Support/CodeGen.h"
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/TargetLibraryInfo.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
@@ -14,24 +15,9 @@
 
 // analysis passes
 #include <llvm/Analysis/Passes.h>
-#include <llvm/Analysis/BasicAliasAnalysis.h>
-#include <llvm/Analysis/TypeBasedAliasAnalysis.h>
-#include <llvm/Analysis/ScopedNoAliasAA.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Vectorize.h>
-#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
-#include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
-#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
-#include <llvm/Transforms/Scalar/GVN.h>
-#include <llvm/Transforms/IPO/AlwaysInliner.h>
-#include <llvm/Transforms/InstCombine/InstCombine.h>
-#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
-#include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
-#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
 #include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Passes/PassPlugin.h>
@@ -56,18 +42,17 @@
 #include <llvm/Support/FormatAdapters.h>
 #include <llvm/Linker/Linker.h>
 
-
 using namespace llvm;
 
+#include <zstd.h>
+
 #include "jitlayers.h"
 #include "serialize.h"
 #include "julia_assert.h"
-#include "llvm-codegen-shared.h"
 #include "processor.h"
 
 #define DEBUG_TYPE "julia_aotcompile"
 
-STATISTIC(CICacheLookups, "Number of codeinst cache lookups");
 STATISTIC(CreateNativeCalls, "Number of jl_create_native calls made");
 STATISTIC(CreateNativeMethods, "Number of methods compiled for jl_create_native");
 STATISTIC(CreateNativeMax, "Max number of methods compiled at once for jl_create_native");
@@ -85,11 +70,11 @@ static void addComdat(GlobalValue *G, Triple &T)
 
 typedef struct {
     orc::ThreadSafeModule M;
-    std::vector<GlobalValue*> jl_sysimg_fvars;
-    std::vector<GlobalValue*> jl_sysimg_gvars;
+    SmallVector<GlobalValue*, 0> jl_sysimg_fvars;
+    SmallVector<GlobalValue*, 0> jl_sysimg_gvars;
     std::map<jl_code_instance_t*, std::tuple<uint32_t, uint32_t>> jl_fvar_map;
-    std::vector<void*> jl_value_to_llvm;
-    std::vector<jl_code_instance_t*> jl_external_to_llvm;
+    SmallVector<void*, 0> jl_value_to_llvm;
+    SmallVector<jl_code_instance_t*, 0> jl_external_to_llvm;
 } jl_native_code_desc_t;
 
 extern "C" JL_DLLEXPORT_CODEGEN
@@ -106,22 +91,72 @@ void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
     }
 }
 
-extern "C" JL_DLLEXPORT_CODEGEN
-void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs)
+extern "C" JL_DLLEXPORT_CODEGEN void
+jl_get_llvm_cis_impl(void *native_code, size_t *num_elements, jl_code_instance_t **data)
 {
-    // map a memory location (jl_value_t or jl_binding_t) to a GlobalVariable
-    jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
-    arraylist_grow(gvs, data->jl_value_to_llvm.size());
-    memcpy(gvs->items, data->jl_value_to_llvm.data(), gvs->len * sizeof(void*));
+    jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
+    auto &map = desc->jl_fvar_map;
+
+    if (data == NULL) {
+        *num_elements = map.size();
+        return;
+    }
+
+    assert(*num_elements == map.size());
+    size_t i = 0;
+    for (auto &ci : map) {
+        data[i++] = ci.first;
+    }
 }
 
-extern "C" JL_DLLEXPORT_CODEGEN
-void jl_get_llvm_external_fns_impl(void *native_code, arraylist_t *external_fns)
+// get the list of global variables managed by the compiler
+extern "C" JL_DLLEXPORT_CODEGEN void jl_get_llvm_gvs_impl(void *native_code,
+                                                          size_t *num_elements, void **data)
 {
-    jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
-    arraylist_grow(external_fns, data->jl_external_to_llvm.size());
-    memcpy(external_fns->items, data->jl_external_to_llvm.data(),
-        external_fns->len * sizeof(jl_code_instance_t*));
+    jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
+    auto &gvars = desc->jl_sysimg_gvars;
+
+    if (data == NULL) {
+        *num_elements = gvars.size();
+        return;
+    }
+
+    assert(*num_elements == gvars.size());
+    memcpy(data, gvars.data(), *num_elements * sizeof(void *));
+}
+
+// get the initializer values (jl_value_t or jl_binding_t ptr) of managed global variables
+extern "C" JL_DLLEXPORT_CODEGEN void jl_get_llvm_gv_inits_impl(void *native_code,
+                                                               size_t *num_elements,
+                                                               void **data)
+{
+    jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
+    auto &inits = desc->jl_value_to_llvm;
+
+    if (data == NULL) {
+        *num_elements = inits.size();
+        return;
+    }
+
+    assert(*num_elements == inits.size());
+    memcpy(data, inits.data(), *num_elements * sizeof(void *));
+}
+
+extern "C" JL_DLLEXPORT_CODEGEN void jl_get_llvm_external_fns_impl(void *native_code,
+                                                                   size_t *num_elements,
+                                                                   jl_code_instance_t *data)
+{
+    jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
+    auto &external_map = desc->jl_external_to_llvm;
+
+    if (data == NULL) {
+        *num_elements = external_map.size();
+        return;
+    }
+
+    assert(*num_elements == external_map.size());
+    memcpy((void *)data, (const void *)external_map.data(),
+           *num_elements * sizeof(jl_code_instance_t *));
 }
 
 extern "C" JL_DLLEXPORT_CODEGEN
@@ -145,12 +180,78 @@ GlobalValue* jl_get_llvm_function_impl(void *native_code, uint32_t idx)
 }
 
 
-static void emit_offset_table(Module &mod, const std::vector<GlobalValue*> &vars, StringRef name, Type *T_psize)
+
+template<typename T>
+static inline SmallVector<T*, 0> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
+{
+    // Get information about sysimg export functions from the two global variables.
+    // Strip them from the Module so that it's easier to handle the uses.
+    GlobalVariable *gv = M.getGlobalVariable(name);
+    assert(gv && gv->hasInitializer());
+    ArrayType *Ty = cast<ArrayType>(gv->getInitializer()->getType());
+    unsigned nele = Ty->getArrayNumElements();
+    SmallVector<T*, 0> res(nele);
+    ConstantArray *ary = nullptr;
+    if (gv->getInitializer()->isNullValue()) {
+        for (unsigned i = 0; i < nele; ++i)
+            res[i] = cast<T>(Constant::getNullValue(Ty->getArrayElementType()));
+    }
+    else {
+        ary = cast<ConstantArray>(gv->getInitializer());
+        unsigned i = 0;
+        while (i < nele) {
+            llvm::Value *val = ary->getOperand(i)->stripPointerCasts();
+            if (allow_bad_fvars && (!isa<T>(val) || (isa<Function>(val) && cast<Function>(val)->isDeclaration()))) {
+                // Shouldn't happen in regular use, but can happen in bugpoint.
+                nele--;
+                continue;
+            }
+            res[i++] = cast<T>(val);
+        }
+        res.resize(nele);
+    }
+    assert(gv->use_empty());
+    gv->eraseFromParent();
+    if (ary && ary->use_empty())
+        ary->destroyConstant();
+    return res;
+}
+
+static Constant *get_ptrdiff32(Type *T_size, Constant *ptr, Constant *base)
+{
+    if (ptr->getType()->isPointerTy())
+        ptr = ConstantExpr::getPtrToInt(ptr, T_size);
+    auto ptrdiff = ConstantExpr::getSub(ptr, base);
+    return T_size->getPrimitiveSizeInBits() > 32 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
+}
+
+static Constant *emit_offset_table(Module &M, Type *T_size, ArrayRef<Constant*> vars,
+                                   StringRef name, StringRef suffix)
+{
+    auto T_int32 = Type::getInt32Ty(M.getContext());
+    uint32_t nvars = vars.size();
+    ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1);
+    auto gv = new GlobalVariable(M, vars_type, true,
+                                 GlobalVariable::ExternalLinkage,
+                                 nullptr,
+                                 name + "_offsets" + suffix);
+    auto vbase = ConstantExpr::getPtrToInt(gv, T_size);
+    SmallVector<Constant*, 0> offsets(nvars + 1);
+    offsets[0] = ConstantInt::get(T_int32, nvars);
+    for (uint32_t i = 0; i < nvars; i++)
+        offsets[i + 1] = get_ptrdiff32(T_size, vars[i], vbase);
+    gv->setInitializer(ConstantArray::get(vars_type, offsets));
+    gv->setVisibility(GlobalValue::HiddenVisibility);
+    gv->setDSOLocal(true);
+    return vbase;
+}
+
+static void emit_table(Module &mod, ArrayRef<GlobalValue*> vars,
+                       StringRef name, Type *T_psize)
 {
     // Emit a global variable with all the variable addresses.
-    // The cloning pass will convert them into offsets.
     size_t nvars = vars.size();
-    std::vector<Constant*> addrs(nvars);
+    SmallVector<Constant*, 0> addrs(nvars);
     for (size_t i = 0; i < nvars; i++) {
         Constant *var = vars[i];
         addrs[i] = ConstantExpr::getBitCast(var, T_psize);
@@ -223,152 +324,572 @@ static void makeSafeName(GlobalObject &G)
         G.setName(StringRef(SafeName.data(), SafeName.size()));
 }
 
-static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance_t *mi, size_t world, jl_code_instance_t **ci_out, jl_code_info_t **src_out)
+namespace { // file-local namespace
+class egal_set {
+public:
+    jl_genericmemory_t *list = (jl_genericmemory_t*)jl_an_empty_memory_any;
+    jl_genericmemory_t *keyset = (jl_genericmemory_t*)jl_an_empty_memory_any;
+    egal_set(egal_set&) = delete;
+    egal_set(egal_set&&) = delete;
+    egal_set() = default;
+    void insert(jl_value_t *val)
+    {
+        jl_value_t *rval = jl_idset_get(list, keyset, val);
+        if (rval == NULL) {
+            ssize_t idx;
+            list = jl_idset_put_key(list, val, &idx);
+            keyset = jl_idset_put_idx(list, keyset, idx);
+        }
+    }
+    jl_value_t *get(jl_value_t *val)
+    {
+        return jl_idset_get(list, keyset, val);
+    }
+};
+}
+using ::egal_set;
+struct jl_compiled_function_t {
+   orc::ThreadSafeModule TSM;
+   jl_llvm_functions_t decls;
+};
+typedef DenseMap<jl_code_instance_t*, jl_compiled_function_t> jl_compiled_functions_t;
+
+static void record_method_roots(egal_set &method_roots, jl_method_instance_t *mi)
+{
+    jl_method_t *m = mi->def.method;
+    if (!jl_is_method(m))
+        return;
+    // the method might have a root for this already; use it if so
+    JL_LOCK(&m->writelock);
+    if (m->roots) {
+        size_t j, len = jl_array_dim0(m->roots);
+        for (j = 0; j < len; j++) {
+            jl_value_t *v = jl_array_ptr_ref(m->roots, j);
+            if (jl_is_globally_rooted(v))
+                continue;
+            method_roots.insert(v);
+        }
+    }
+    JL_UNLOCK(&m->writelock);
+}
+
+static void aot_optimize_roots(jl_codegen_params_t &params, egal_set &method_roots, jl_compiled_functions_t &compiled_functions)
+{
+    for (size_t i = 0; i < jl_array_dim0(params.temporary_roots); i++) {
+        jl_value_t *val = jl_array_ptr_ref(params.temporary_roots, i);
+        auto ref = params.global_targets.find((void*)val);
+        if (ref == params.global_targets.end())
+            continue;
+        auto get_global_root = [val, &method_roots]() {
+            if (jl_is_globally_rooted(val))
+                return val;
+            jl_value_t *mval = method_roots.get(val);
+            if (mval)
+                return mval;
+            return jl_as_global_root(val, 1);
+        };
+        jl_value_t *mval = get_global_root();
+        if (mval != val) {
+            GlobalVariable *GV = ref->second;
+            params.global_targets.erase(ref);
+            auto mref = params.global_targets.find((void*)mval);
+            if (mref != params.global_targets.end()) {
+                // replace ref with mref in all Modules
+                std::string OldName(GV->getName());
+                StringRef NewName(mref->second->getName());
+                for (auto &def : compiled_functions) {
+                    orc::ThreadSafeModule &TSM = def.second.TSM;
+                    Module &M = *TSM.getModuleUnlocked();
+                    if (GlobalValue *GV2 = M.getNamedValue(OldName)) {
+                        if (GV2 == GV)
+                            GV = nullptr;
+                        // either replace or rename the old value to use the other equivalent name
+                        if (GlobalValue *GV3 = M.getNamedValue(NewName)) {
+                            GV2->replaceAllUsesWith(GV3);
+                            GV2->eraseFromParent();
+                        }
+                        else {
+                            GV2->setName(NewName);
+                        }
+                    }
+                }
+                assert(GV == nullptr);
+            }
+            else {
+                params.global_targets[(void*)mval] = GV;
+            }
+        }
+    }
+}
+
+static jl_compiled_functions_t::iterator get_ci_equiv_compiled(jl_code_instance_t *ci JL_PROPAGATES_ROOT, jl_compiled_functions_t &compiled_functions) JL_NOTSAFEPOINT
 {
-    ++CICacheLookups;
-    jl_value_t *ci = cgparams.lookup(mi, world, world);
-    JL_GC_PROMISE_ROOTED(ci);
+    jl_value_t *def = ci->def;
+    jl_value_t *owner = ci->owner;
+    jl_value_t *rettype = ci->rettype;
+    size_t min_world = jl_atomic_load_relaxed(&ci->min_world);
+    size_t max_world = jl_atomic_load_relaxed(&ci->max_world);
+    for (auto it = compiled_functions.begin(), E = compiled_functions.end(); it != E; ++it) {
+        auto codeinst = it->first;
+        if (codeinst != ci &&
+            jl_atomic_load_relaxed(&codeinst->inferred) != NULL &&
+            jl_atomic_load_relaxed(&codeinst->min_world) <= min_world &&
+            jl_atomic_load_relaxed(&codeinst->max_world) >= max_world &&
+            jl_egal(codeinst->def, def) &&
+            jl_egal(codeinst->owner, owner) &&
+            jl_egal(codeinst->rettype, rettype)) {
+            return it;
+        }
+    }
+    return compiled_functions.end();
+}
+
+static void resolve_workqueue(jl_codegen_params_t &params, egal_set &method_roots, jl_compiled_functions_t &compiled_functions)
+{
+    jl_workqueue_t workqueue;
+    std::swap(params.workqueue, workqueue);
     jl_code_instance_t *codeinst = NULL;
-    if (ci != jl_nothing) {
-        codeinst = (jl_code_instance_t*)ci;
-        *src_out = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-        jl_method_t *def = codeinst->def->def.method;
-        if ((jl_value_t*)*src_out == jl_nothing)
-            *src_out = NULL;
-        if (*src_out && jl_is_method(def))
-            *src_out = jl_uncompress_ir(def, codeinst, (jl_value_t*)*src_out);
-    }
-    if (*src_out == NULL || !jl_is_code_info(*src_out)) {
-        if (cgparams.lookup != jl_rettype_inferred_addr) {
-            jl_error("Refusing to automatically run type inference with custom cache lookup.");
+    JL_GC_PUSH1(&codeinst);
+    assert(!params.cache);
+    while (!workqueue.empty()) {
+        auto it = workqueue.pop_back_val();
+        codeinst = it.first;
+        auto &proto = it.second;
+        // try to emit code for this item from the workqueue
+        StringRef invokeName = "";
+        StringRef preal_decl = "";
+        bool preal_specsig = false;
+        {
+            auto it = compiled_functions.find(codeinst);
+            if (it == compiled_functions.end())
+                it = get_ci_equiv_compiled(codeinst, compiled_functions);
+            if (it != compiled_functions.end()) {
+                auto &decls = it->second.decls;
+                invokeName = decls.functionObject;
+                if (decls.functionObject == "jl_fptr_args") {
+                    preal_decl = decls.specFunctionObject;
+                }
+                else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call" && decls.functionObject != "jl_fptr_const_return") {
+                    preal_decl = decls.specFunctionObject;
+                    preal_specsig = true;
+                }
+            }
         }
-        else {
-            *src_out = jl_type_infer(mi, world, 0);
-            if (*src_out) {
-                codeinst = jl_get_method_inferred(mi, (*src_out)->rettype, (*src_out)->min_world, (*src_out)->max_world);
-                if ((*src_out)->inferred) {
-                    jl_value_t *null = nullptr;
-                    jl_atomic_cmpswap_relaxed(&codeinst->inferred, &null, jl_nothing);
+        // patch up the prototype we emitted earlier
+        Module *mod = proto.decl->getParent();
+        assert(proto.decl->isDeclaration());
+        Function *pinvoke = nullptr;
+        if (preal_decl.empty() && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) {
+            std::string gf_thunk_name = emit_abi_constreturn(mod, params, proto.specsig, codeinst);
+            preal_specsig = proto.specsig;
+            if (invokeName.empty())
+                invokeName = "jl_fptr_const_return";
+            preal_decl = mod->getNamedValue(gf_thunk_name)->getName();
+        }
+        if (preal_decl.empty()) {
+            pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params);
+            if (!proto.specsig) {
+                proto.decl->replaceAllUsesWith(pinvoke);
+                proto.decl->eraseFromParent();
+                proto.decl = pinvoke;
+            }
+        }
+        if (proto.specsig && !preal_specsig) {
+            // get or build an fptr1 that can invoke codeinst
+            if (pinvoke == nullptr)
+                pinvoke = get_or_emit_fptr1(preal_decl, mod);
+            // emit specsig-to-(jl)invoke conversion
+            proto.decl->setLinkage(GlobalVariable::InternalLinkage);
+            //protodecl->setAlwaysInline();
+            jl_init_function(proto.decl, params);
+            jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+            size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed
+            bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+            // TODO: maybe this can be cached in codeinst->specfptr?
+            emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke);
+            preal_decl = ""; // no need to fixup the name
+        }
+        if (!preal_decl.empty()) {
+            // merge and/or rename this prototype to the real function
+            if (Function *specfun = cast_or_null<Function>(mod->getNamedValue(preal_decl))) {
+                if (proto.decl != specfun) {
+                    proto.decl->replaceAllUsesWith(specfun);
+                    proto.decl->eraseFromParent();
+                    proto.decl = specfun;
+                }
+            }
+            else {
+                proto.decl->setName(preal_decl);
+            }
+        }
+        if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too
+            assert(proto.specsig);
+            StringRef ocinvokeDecl = invokeName;
+            // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too
+            // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure
+            if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return")
+                ocinvokeDecl = pinvoke->getName();
+            assert(!ocinvokeDecl.empty());
+            assert(ocinvokeDecl != "jl_fptr_args");
+            assert(ocinvokeDecl != "jl_fptr_const_return");
+            assert(ocinvokeDecl != "jl_fptr_sparam");
+            // merge and/or rename this prototype to the real function
+            if (Function *specfun = cast_or_null<Function>(mod->getNamedValue(ocinvokeDecl))) {
+                if (proto.oc != specfun) {
+                    proto.oc->replaceAllUsesWith(specfun);
+                    proto.oc->eraseFromParent();
+                    proto.oc = specfun;
                 }
             }
+            else {
+                proto.oc->setName(ocinvokeDecl);
+            }
         }
+        workqueue.append(params.workqueue);
+        params.workqueue.clear();
     }
-    *ci_out = codeinst;
+    JL_GC_POP();
 }
 
-// takes the running content that has collected in the shadow module and dump it to disk
-// this builds the object file portion of the sysimage files for fast startup, and can
+
+/// Link the function in the source module into the destination module if
+/// needed, setting up mapping information.
+/// Similar to orc::cloneFunctionDecl, but more complete for greater correctness
+Function *IRLinker_copyFunctionProto(Module *DstM, Function *SF) {
+  // If there is no linkage to be performed or we are linking from the source,
+  // bring SF over, if we haven't already.
+  if (SF->getParent() == DstM)
+    return SF;
+  if (auto *F = DstM->getNamedValue(SF->getName()))
+      return cast<Function>(F);
+  auto *F = Function::Create(SF->getFunctionType(), SF->getLinkage(),
+                             SF->getAddressSpace(), SF->getName(), DstM);
+  F->copyAttributesFrom(SF);
+  F->IsNewDbgInfoFormat = SF->IsNewDbgInfoFormat;
+
+  // Remove these copied constants since they point to the source module.
+  F->setPersonalityFn(nullptr);
+  F->setPrefixData(nullptr);
+  F->setPrologueData(nullptr);
+  return F;
+}
+
+static Function *aot_abi_converter(jl_codegen_params_t &params, Module *M, jl_abi_t from_abi, jl_code_instance_t *codeinst, Module *defM, StringRef func, StringRef specfunc, bool target_specsig)
+{
+    std::string gf_thunk_name;
+    if (!specfunc.empty()) {
+        Value *llvmtarget = IRLinker_copyFunctionProto(M, defM->getFunction(specfunc));
+        gf_thunk_name = emit_abi_converter(M, params, from_abi, codeinst, llvmtarget, target_specsig);
+    }
+    else {
+        Value *llvmtarget = func.empty() ? nullptr : IRLinker_copyFunctionProto(M, defM->getFunction(func));
+        gf_thunk_name = emit_abi_dispatcher(M, params, from_abi, codeinst, llvmtarget);
+    }
+    auto F = M->getFunction(gf_thunk_name);
+    assert(F);
+    return F;
+}
+
+static void generate_cfunc_thunks(jl_codegen_params_t &params, jl_compiled_functions_t &compiled_functions)
+{
+    DenseMap<jl_method_instance_t*, jl_code_instance_t*> compiled_mi;
+    for (auto &def : compiled_functions) {
+        jl_code_instance_t *this_code = def.first;
+        jl_method_instance_t *mi = jl_get_ci_mi(this_code);
+        if (this_code->owner == jl_nothing && jl_atomic_load_relaxed(&this_code->max_world) == ~(size_t)0 && this_code->def == (jl_value_t*)mi)
+            compiled_mi[mi] = this_code;
+    }
+    size_t latestworld = jl_atomic_load_acquire(&jl_world_counter);
+    for (cfunc_decl_t &cfunc : params.cfuncs) {
+        Module *M = cfunc.cfuncdata->getParent();
+        jl_value_t *sigt = cfunc.abi.sigt;
+        JL_GC_PROMISE_ROOTED(sigt);
+        jl_value_t *declrt = cfunc.abi.rt;
+        JL_GC_PROMISE_ROOTED(declrt);
+        Function *unspec = aot_abi_converter(params, M, cfunc.abi, nullptr, nullptr, "", "", false);
+        jl_code_instance_t *codeinst = nullptr;
+        auto assign_fptr = [&params, &cfunc, &codeinst, &unspec](Function *f) {
+            ConstantArray *init = cast<ConstantArray>(cfunc.cfuncdata->getInitializer());
+            SmallVector<Constant*,8> initvals;
+            for (unsigned i = 0; i < init->getNumOperands(); ++i)
+                initvals.push_back(init->getOperand(i));
+            assert(initvals.size() == 8);
+            assert(initvals[0]->isNullValue());
+            assert(initvals[2]->isNullValue());
+            if (codeinst) {
+                Constant *llvmcodeinst = literal_pointer_val_slot(params, f->getParent(), (jl_value_t*)codeinst);
+                initvals[2] = llvmcodeinst; // plast_codeinst
+            }
+            assert(initvals[4]->isNullValue());
+            initvals[4] = unspec;
+            initvals[0] = f;
+            cfunc.cfuncdata->setInitializer(ConstantArray::get(init->getType(), initvals));
+        };
+        Module *defM = nullptr;
+        StringRef func;
+        jl_method_instance_t *mi = (jl_method_instance_t*)jl_get_specialization1((jl_tupletype_t*)sigt, latestworld, 0);
+        if ((jl_value_t*)mi != jl_nothing) {
+            auto it = compiled_mi.find(mi);
+            if (it != compiled_mi.end()) {
+                codeinst = it->second;
+                JL_GC_PROMISE_ROOTED(codeinst);
+                auto defs = compiled_functions.find(codeinst);
+                defM = defs->second.TSM.getModuleUnlocked();
+                const jl_llvm_functions_t &decls = defs->second.decls;
+                func = decls.functionObject;
+                StringRef specfunc = decls.specFunctionObject;
+                jl_value_t *astrt = codeinst->rettype;
+                if (astrt != (jl_value_t*)jl_bottom_type &&
+                    jl_type_intersection(astrt, declrt) == jl_bottom_type) {
+                    // Do not warn if the function never returns since it is
+                    // occasionally required by the C API (typically error callbacks)
+                    // even though we're likely to encounter memory errors in that case
+                    jl_printf(JL_STDERR, "WARNING: cfunction: return type of %s does not match\n", name_from_method_instance(mi));
+                }
+                if (func == "jl_fptr_const_return") {
+                    std::string gf_thunk_name = emit_abi_constreturn(M, params, cfunc.abi, codeinst->rettype_const);
+                    auto F = M->getFunction(gf_thunk_name);
+                    assert(F);
+                    assign_fptr(F);
+                    continue;
+                }
+                else if (func == "jl_fptr_args") {
+                    assert(!specfunc.empty());
+                    if (!cfunc.abi.specsig && jl_subtype(astrt, declrt)) {
+                        assign_fptr(IRLinker_copyFunctionProto(M, defM->getFunction(specfunc)));
+                        continue;
+                    }
+                    assign_fptr(aot_abi_converter(params, M, cfunc.abi, codeinst, defM, func, specfunc, false));
+                    continue;
+                }
+                else if (func == "jl_fptr_sparam" || func == "jl_f_opaque_closure_call") {
+                    func = ""; // use jl_invoke instead for these, since we don't declare these prototypes
+                }
+                else {
+                    assert(!specfunc.empty());
+                    if (jl_egal(mi->specTypes, sigt) && jl_egal(declrt, astrt)) {
+                        assign_fptr(IRLinker_copyFunctionProto(M, defM->getFunction(specfunc)));
+                        continue;
+                    }
+                    assign_fptr(aot_abi_converter(params, M, cfunc.abi, codeinst, defM, func, specfunc, true));
+                    continue;
+                }
+            }
+        }
+        Function *f = codeinst ? aot_abi_converter(params, M, cfunc.abi, codeinst, defM, func, "", false) : unspec;
+        assign_fptr(f);
+    }
+}
+
+// destructively move the contents of src into dest
+// this assumes that the targets of the two modules are the same
+// including the DataLayout and ModuleFlags (for example)
+// and that there is no module-level assembly
+// Comdat is also removed, since this needs to be re-added later
+static void jl_merge_module(Linker &L, orc::ThreadSafeModule srcTSM) JL_NOTSAFEPOINT
+{
+    srcTSM.consumingModuleDo([&L](std::unique_ptr<Module> src) JL_NOTSAFEPOINT {
+        bool error = L.linkInModule(std::move(src));
+        assert(!error && "linking llvmcall modules failed");
+        (void)error;
+    });
+}
+
+static bool canPartition(const Function &F)
+{
+    return !F.hasFnAttribute(Attribute::AlwaysInline) &&
+           !F.hasFnAttribute(Attribute::InlineHint);
+}
+
+// this builds the object file portion of the sysimage files for fast startup
+// `external_linkage` create linkages between pkgimages.
+extern "C" JL_DLLEXPORT_CODEGEN
+void *jl_create_native_impl(LLVMOrcThreadSafeModuleRef llvmmod, int trim, int external_linkage, size_t world,
+                           jl_array_t *mod_array, jl_array_t *worklist, int all, jl_array_t *module_init_order)
+{
+    JL_TIMING(INFERENCE, INFERENCE);
+    auto ct = jl_current_task;
+    if (!jl_compile_and_emit_func) {
+        jl_error("inference not available for generating compiled output");
+    }
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
+    uint64_t compiler_start_time = 0;
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
+        compiler_start_time = jl_hrtime();
+
+    jl_value_t **fargs;
+    JL_GC_PUSHARGS(fargs, 8);
+#ifdef _P64
+    jl_value_t *jl_array_ulong_type = jl_array_uint64_type;
+#else
+    jl_value_t *jl_array_ulong_type = jl_array_uint32_type;
+#endif
+    jl_array_t *worlds = jl_alloc_array_1d(jl_array_ulong_type, 2);
+    fargs[0] = jl_compile_and_emit_func;
+    fargs[1] = (jl_value_t*)worlds;
+    jl_array_data(worlds, size_t)[0] = jl_typeinf_world;
+    int compiler_world = 1;
+    if (trim || jl_array_data(worlds, size_t)[0] == 0 || external_linkage)
+        compiler_world = 0;
+    jl_array_data(worlds, size_t)[compiler_world] = world; // might overwrite previous
+    worlds->dimsize[0] = 1 + compiler_world;
+    fargs[2] = jl_box_uint8(trim);
+    fargs[3] = jl_box_bool(external_linkage);
+    fargs[4] = worklist ? (jl_value_t*)worklist : jl_nothing; // worklist (or nothing)
+    fargs[5] = mod_array ? (jl_value_t*)mod_array : jl_nothing; // mod_array (or nothing)
+    fargs[6] = jl_box_bool(all);
+    fargs[7] = module_init_order ? (jl_value_t*)module_init_order : jl_nothing; // module_init_order (or nothing)
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_typeinf_world;
+    fargs[0] = jl_apply(fargs, 8);
+    fargs[1] = fargs[2] = fargs[3] = fargs[4] = fargs[5] = fargs[6] = fargs[7] = NULL;
+    ct->world_age = last_age;
+    jl_value_t *codeinfos = fargs[0];
+    JL_TYPECHK(jl_create_native, array_any, codeinfos);
+    void *data = jl_emit_native((jl_array_t*)codeinfos, llvmmod, NULL, external_linkage ? 1 : 0);
+    JL_GC_POP();
+
+    // move everything inside, now that we've merged everything
+    // (before adding the exported headers)
+    ((jl_native_code_desc_t*)data)->M.withModuleDo([&](Module &M) {
+        auto TT = Triple(M.getTargetTriple());
+        Function *juliapersonality_func = nullptr;
+        if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+            // setting the function personality enables stack unwinding and catching exceptions
+            // so make sure everything has something set
+            Type *T_int32 = Type::getInt32Ty(M.getContext());
+            juliapersonality_func = Function::Create(FunctionType::get(T_int32, true),
+                Function::ExternalLinkage, "__julia_personality", M);
+            juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
+        }
+        for (GlobalObject &G : M.global_objects()) {
+            if (!G.isDeclaration()) {
+                G.setLinkage(GlobalValue::InternalLinkage);
+                G.setDSOLocal(true);
+                makeSafeName(G);
+                if (Function *F = dyn_cast<Function>(&G)) {
+                    if (juliapersonality_func) {
+                        // Add unwind exception personalities to functions to handle async exceptions
+                        F->setPersonalityFn(juliapersonality_func);
+                    }
+                }
+            }
+        }
+    });
+
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            auto end = jl_hrtime();
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+        }
+        ct->reentrant_timing &= ~1ull;
+    }
+    return data;
+}
+
+
 // also be used be extern consumers like GPUCompiler.jl to obtain a module containing
 // all reachable & inferrrable functions.
-// The `policy` flag switches between the default mode `0` and the extern mode `1` used by GPUCompiler.
-// `_imaging_mode` controls if raw pointers can be embedded (e.g. the code will be loaded into the same session).
-// `_external_linkage` create linkages between pkgimages.
 extern "C" JL_DLLEXPORT_CODEGEN
-void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world)
+void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int external_linkage)
 {
     JL_TIMING(NATIVE_AOT, NATIVE_Create);
     ++CreateNativeCalls;
-    CreateNativeMax.updateMax(jl_array_len(methods));
+    CreateNativeMax.updateMax(jl_array_nrows(codeinfos));
     if (cgparams == NULL)
         cgparams = &jl_default_cgparams;
+    jl_cgparams_t target_cgparams = *cgparams;
+    target_cgparams.sanitize_memory = jl_options.target_sanitize_memory;
+    target_cgparams.sanitize_thread = jl_options.target_sanitize_thread;
+    target_cgparams.sanitize_address = jl_options.target_sanitize_address;
     jl_native_code_desc_t *data = new jl_native_code_desc_t;
-    CompilationPolicy policy = (CompilationPolicy) _policy;
-    bool imaging = imaging_default() || _imaging_mode == 1;
-    jl_workqueue_t emitted;
-    jl_method_instance_t *mi = NULL;
-    jl_code_info_t *src = NULL;
-    JL_GC_PUSH1(&src);
-    auto ct = jl_current_task;
-    bool timed = (ct->reentrant_timing & 1) == 0;
-    if (timed)
-        ct->reentrant_timing |= 1;
     orc::ThreadSafeContext ctx;
     orc::ThreadSafeModule backing;
     if (!llvmmod) {
-        ctx = jl_ExecutionEngine->acquireContext();
-        backing = jl_create_ts_module("text", ctx, imaging);
+        ctx = jl_ExecutionEngine->makeContext();
+        backing = jl_create_ts_module("text", ctx, jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple());
     }
     orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing;
     auto ctxt = clone.getContext();
 
-    uint64_t compiler_start_time = 0;
-    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-    if (measure_compile_time_enabled)
-        compiler_start_time = jl_hrtime();
-
     // compile all methods for the current world and type-inference world
-
-    JL_LOCK(&jl_codegen_lock);
     auto target_info = clone.withModuleDo([&](Module &M) {
         return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
     });
+    egal_set method_roots;
     jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second));
-    params.params = cgparams;
-    params.imaging = imaging;
-    params.debug_level = jl_options.debug_level;
-    params.external_linkage = _external_linkage;
-    size_t compile_for[] = { jl_typeinf_world, _world };
-    for (int worlds = 0; worlds < 2; worlds++) {
-        params.world = compile_for[worlds];
-        if (!params.world)
-            continue;
-        // Don't emit methods for the typeinf_world with extern policy
-        if (policy != CompilationPolicy::Default && params.world == jl_typeinf_world)
-            continue;
-        size_t i, l;
-        for (i = 0, l = jl_array_len(methods); i < l; i++) {
-            // each item in this list is either a MethodInstance indicating something
-            // to compile, or an svec(rettype, sig) describing a C-callable alias to create.
-            jl_value_t *item = jl_array_ptr_ref(methods, i);
-            if (jl_is_simplevector(item)) {
-                if (worlds == 1)
-                    jl_compile_extern_c(wrap(&clone), &params, NULL, jl_svecref(item, 0), jl_svecref(item, 1));
-                continue;
-            }
-            mi = (jl_method_instance_t*)item;
-            src = NULL;
-            // if this method is generally visible to the current compilation world,
-            // and this is either the primary world, or not applicable in the primary world
-            // then we want to compile and emit this
-            if (mi->def.method->primary_world <= params.world && params.world <= mi->def.method->deleted_world) {
-                // find and prepare the source code to compile
-                jl_code_instance_t *codeinst = NULL;
-                jl_ci_cache_lookup(*cgparams, mi, params.world, &codeinst, &src);
-                if (src && !emitted.count(codeinst)) {
-                    // now add it to our compilation results
-                    JL_GC_PROMISE_ROOTED(codeinst->rettype);
-                    orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, params.imaging,
-                            clone.getModuleUnlocked()->getDataLayout(),
-                            Triple(clone.getModuleUnlocked()->getTargetTriple()));
-                    jl_llvm_functions_t decls = jl_emit_code(result_m, mi, src, codeinst->rettype, params);
-                    if (result_m)
-                        emitted[codeinst] = {std::move(result_m), std::move(decls)};
-                }
-            }
+    if (!llvmmod)
+        params.getContext().setDiscardValueNames(true);
+    params.params = &target_cgparams;
+    assert(params.imaging_mode); // `_imaging_mode` controls if broken features like code-coverage are disabled
+    params.external_linkage = external_linkage;
+    params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+    bool safepoint_on_entry = params.safepoint_on_entry;
+    JL_GC_PUSH3(&params.temporary_roots, &method_roots.list, &method_roots.keyset);
+    jl_compiled_functions_t compiled_functions;
+    size_t i, l;
+    for (i = 0, l = jl_array_nrows(codeinfos); i < l; i++) {
+        // each item in this list is either a CodeInstance followed by a CodeInfo indicating something
+        // to compile, or a rettype followed by a sig describing a C-callable alias to create.
+        jl_value_t *item = jl_array_ptr_ref(codeinfos, i);
+        if (jl_is_code_instance(item)) {
+            // now add it to our compilation results
+            jl_code_instance_t *codeinst = (jl_code_instance_t*)item;
+            jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i);
+            assert(jl_is_code_info(src));
+            if (compiled_functions.count(codeinst))
+                continue; // skip any duplicates that accidentally made there way in here (or make this an error?)
+            if (jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX)
+                params.safepoint_on_entry = false; // ensure we don't block ExpandAtomicModifyPass from inlining this code if applicable
+            orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)),
+                    params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
+                    Triple(clone.getModuleUnlocked()->getTargetTriple()));
+            jl_llvm_functions_t decls;
+            if (!(params.params->force_emit_all) && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)
+                decls.functionObject = "jl_fptr_const_return";
+            else
+                decls = jl_emit_codeinst(result_m, codeinst, src, params);
+            params.safepoint_on_entry = safepoint_on_entry;
+            record_method_roots(method_roots, jl_get_ci_mi(codeinst));
+            if (result_m)
+                compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+        }
+        else {
+            assert(jl_is_simplevector(item));
+            jl_value_t *rt = jl_svecref(item, 0);
+            jl_value_t *sig = jl_svecref(item, 1);
+            jl_value_t *nameval = jl_svec_len(item) == 2 ? jl_nothing : jl_svecref(item, 2);
+            assert(jl_is_type(rt) && jl_is_type(sig));
+            jl_generate_ccallable(clone.getModuleUnlocked(), nameval, rt, sig, params);
         }
-
-        // finally, make sure all referenced methods also get compiled or fixed up
-        jl_compile_workqueue(emitted, *clone.getModuleUnlocked(), params, policy);
     }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
+    // finally, make sure all referenced methods get fixed up, particularly if the user declined to compile them
+    resolve_workqueue(params, method_roots, compiled_functions);
+    // including generating cfunction thunks
+    generate_cfunc_thunks(params, compiled_functions);
+    aot_optimize_roots(params, method_roots, compiled_functions);
+    params.temporary_roots = nullptr;
+    params.temporary_roots_set.clear();
     JL_GC_POP();
 
     // process the globals array, before jl_merge_module destroys them
-    std::vector<std::string> gvars(params.globals.size());
-    data->jl_value_to_llvm.resize(params.globals.size());
+    SmallVector<std::string, 0> gvars(params.global_targets.size());
+    data->jl_value_to_llvm.resize(params.global_targets.size());
     StringSet<> gvars_names;
     DenseSet<GlobalValue *> gvars_set;
 
     size_t idx = 0;
-    for (auto &global : params.globals) {
+    for (auto &global : params.global_targets) {
         gvars[idx] = global.second->getName().str();
         assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!");
         assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!");
         data->jl_value_to_llvm[idx] = global.first;
         idx++;
     }
-    CreateNativeMethods += emitted.size();
+    CreateNativeMethods += compiled_functions.size();
 
     size_t offset = gvars.size();
     data->jl_external_to_llvm.resize(params.external_fns.size());
@@ -390,94 +911,142 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
 
     // clones the contents of the module `m` to the shadow_output collector
     // while examining and recording what kind of function pointer we have
-    Linker L(*clone.getModuleUnlocked());
-    for (auto &def : emitted) {
-        jl_merge_module(clone, std::move(std::get<0>(def.second)));
-        jl_code_instance_t *this_code = def.first;
-        jl_llvm_functions_t decls = std::get<1>(def.second);
-        StringRef func = decls.functionObject;
-        StringRef cfunc = decls.specFunctionObject;
-        uint32_t func_id = 0;
-        uint32_t cfunc_id = 0;
-        if (func == "jl_fptr_args") {
-            func_id = -1;
+    {
+        Linker L(*clone.getModuleUnlocked());
+        for (auto &def : compiled_functions) {
+            jl_code_instance_t *this_code = def.first;
+            JL_GC_PROMISE_ROOTED(this_code);
+            jl_llvm_functions_t &decls = def.second.decls;
+            StringRef func = decls.functionObject;
+            StringRef cfunc = decls.specFunctionObject;
+            orc::ThreadSafeModule &M = def.second.TSM;
+            if (external_linkage) {
+                uint8_t specsigflags;
+                jl_callptr_t invoke;
+                void *fptr;
+                jl_read_codeinst_invoke(this_code, &specsigflags, &invoke, &fptr, 0);
+                if (invoke != NULL && (specsigflags & JL_CI_FLAGS_FROM_IMAGE)) {
+                    // this codeinst is already available externally: keep it only if canPartition demands it for local use
+                    // TODO: for performance, avoid generating the src code when we know it would reach here anyways?
+                    if (M.withModuleDo([&](Module &M) { return !canPartition(*cast<Function>(M.getNamedValue(cfunc))); })) {
+                        jl_merge_module(L, std::move(M));
+                    }
+                    continue;
+                }
+            }
+            jl_merge_module(L, std::move(M));
+            uint32_t func_id = 0;
+            uint32_t cfunc_id = 0;
+            if (func == "jl_fptr_args") {
+                func_id = -1;
+            }
+            else if (func == "jl_fptr_sparam") {
+                func_id = -2;
+            }
+            else if (func == "jl_f_opaque_closure_call") {
+                func_id = -4;
+            }
+            else if (func == "jl_fptr_const_return") {
+                func_id = -5;
+            }
+            else {
+                //Safe b/c context is locked by params
+                data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
+                func_id = data->jl_sysimg_fvars.size();
+            }
+            if (!cfunc.empty()) {
+                //Safe b/c context is locked by params
+                data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
+                cfunc_id = data->jl_sysimg_fvars.size();
+            }
+            data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
         }
-        else if (func == "jl_fptr_sparam") {
-            func_id = -2;
+        bool Changed = true;
+        while (Changed) {
+            Changed = false;
+            // make sure everything referenced got included though, since some functions aren't
+            // correctly implemented by staticdata for external use, and so codegen won't emit
+            // an external reference but expects a private copy here instead
+            for (auto &def : compiled_functions) {
+                orc::ThreadSafeModule &M = def.second.TSM;
+                if (!M)
+                    continue;
+                jl_llvm_functions_t &decls = def.second.decls;
+                StringRef func = decls.functionObject;
+                StringRef cfunc = decls.specFunctionObject;
+                if (func != "jl_fptr_args" &&
+                    func != "jl_fptr_sparam" &&
+                    func != "jl_f_opaque_closure_call" &&
+                    clone.getModuleUnlocked()->getNamedValue(func)) {
+                    jl_merge_module(L, std::move(M));
+                    Changed = true;
+                    continue;
+                }
+                if (!cfunc.empty() && clone.getModuleUnlocked()->getNamedValue(cfunc)) {
+                    Changed = true;
+                    jl_merge_module(L, std::move(M));
+                }
+            }
         }
-        else {
-            //Safe b/c context is locked by params
-            data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
-            func_id = data->jl_sysimg_fvars.size();
+#ifndef NDEBUG
+        // make sure we didn't forget anything that we promised to include in here
+        for (auto &def : compiled_functions) {
+            jl_llvm_functions_t &decls = def.second.decls;
+            StringRef func = decls.functionObject;
+            StringRef cfunc = decls.specFunctionObject;
+            if (func != "jl_fptr_args" &&
+                func != "jl_fptr_sparam" &&
+                func != "jl_f_opaque_closure_call") {
+                GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(func);
+                assert(!F || !F->isDeclaration());
+            }
+            if (!cfunc.empty()) {
+                GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(cfunc);
+                assert(!F || !F->isDeclaration());
+            }
         }
-        if (!cfunc.empty()) {
-            //Safe b/c context is locked by params
-            data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
-            cfunc_id = data->jl_sysimg_fvars.size();
+#endif
+        compiled_functions.clear();
+        if (params._shared_module) {
+            bool error = L.linkInModule(std::move(params._shared_module));
+            assert(!error && "Error linking in shared module");
+            (void)error;
         }
-        data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
-    }
-    if (params._shared_module) {
-        bool error = L.linkInModule(std::move(params._shared_module));
-        assert(!error && "Error linking in shared module");
-        (void)error;
     }
 
     // now get references to the globals in the merged module
     // and set them to be internalized and initialized at startup
+    // filter out any gvars that got optimized away
+    idx = 0;
+    size_t newoffset = 0;
+    size_t newidx = 0;
     for (auto &global : gvars) {
         //Safe b/c context is locked by params
-        GlobalVariable *G = cast<GlobalVariable>(clone.getModuleUnlocked()->getNamedValue(global));
-        G->setInitializer(ConstantPointerNull::get(cast<PointerType>(G->getValueType())));
-        G->setLinkage(GlobalValue::ExternalLinkage);
-        G->setVisibility(GlobalValue::HiddenVisibility);
-        G->setDSOLocal(true);
-        data->jl_sysimg_gvars.push_back(G);
-    }
-    CreateNativeGlobals += gvars.size();
-
-    //Safe b/c context is locked by params
-    auto TT = Triple(clone.getModuleUnlocked()->getTargetTriple());
-    Function *juliapersonality_func = nullptr;
-    if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
-        // setting the function personality enables stack unwinding and catching exceptions
-        // so make sure everything has something set
-        Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
-        juliapersonality_func = Function::Create(FunctionType::get(T_int32, true),
-            Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
-        juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
-    }
-
-    // move everything inside, now that we've merged everything
-    // (before adding the exported headers)
-    if (policy == CompilationPolicy::Default) {
-        //Safe b/c context is locked by params
-        for (GlobalObject &G : clone.getModuleUnlocked()->global_objects()) {
-            if (!G.isDeclaration()) {
-                G.setLinkage(GlobalValue::ExternalLinkage);
-                G.setVisibility(GlobalValue::HiddenVisibility);
-                G.setDSOLocal(true);
-                makeSafeName(G);
-                if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
-                    // Add unwind exception personalities to functions to handle async exceptions
-                    if (Function *F = dyn_cast<Function>(&G))
-                        F->setPersonalityFn(juliapersonality_func);
-                }
+        GlobalVariable *G = cast_or_null<GlobalVariable>(clone.getModuleUnlocked()->getNamedValue(global));
+        if (G != nullptr) {
+            assert(!G->hasInitializer());
+            G->setInitializer(Constant::getNullValue(G->getValueType()));
+            G->setLinkage(GlobalValue::InternalLinkage);
+            G->setDSOLocal(true);
+            assert(newidx == data->jl_sysimg_gvars.size());
+            if (idx < offset) {
+                data->jl_value_to_llvm[newidx] = data->jl_value_to_llvm[idx];
+                newoffset = newidx + 1;
+            }
+            else {
+                data->jl_external_to_llvm[newidx - newoffset] = data->jl_external_to_llvm[idx - offset];
             }
+            data->jl_sysimg_gvars.push_back(G);
+            newidx++;
         }
+        idx++;
     }
+    data->jl_value_to_llvm.resize(newoffset);
+    data->jl_external_to_llvm.resize(newidx - newoffset);
+    gvars.clear();
+    CreateNativeGlobals += idx;
 
     data->M = std::move(clone);
-    if (timed) {
-        if (measure_compile_time_enabled) {
-            auto end = jl_hrtime();
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
-        }
-        ct->reentrant_timing &= ~1ull;
-    }
-    if (ctx.getContext()) {
-        jl_ExecutionEngine->releaseContext(std::move(ctx));
-    }
     return (void*)data;
 }
 
@@ -495,7 +1064,6 @@ static void reportWriterError(const ErrorInfoBase &E)
     jl_safe_printf("ERROR: failed to emit output file %s\n", err.c_str());
 }
 
-#if JULIA_FLOAT16_ABI == 1
 static void injectCRTAlias(Module &M, StringRef name, StringRef alias, FunctionType *FT)
 {
     Function *target = M.getFunction(alias);
@@ -512,7 +1080,7 @@ static void injectCRTAlias(Module &M, StringRef name, StringRef alias, FunctionT
     auto val = builder.CreateCall(target, CallArgs);
     builder.CreateRet(val);
 }
-#endif
+
 void multiversioning_preannotate(Module &M);
 
 // See src/processor.h for documentation about this table. Corresponds to jl_image_shard_t.
@@ -528,14 +1096,13 @@ static GlobalVariable *emit_shard_table(Module &M, Type *T_size, Type *T_psize,
             return gv;
         };
         auto table = tables.data() + i * sizeof(jl_image_shard_t) / sizeof(void *);
-        table[offsetof(jl_image_shard_t, fvar_base) / sizeof(void*)] = create_gv("jl_fvar_base", false);
-        table[offsetof(jl_image_shard_t, fvar_offsets) / sizeof(void*)] = create_gv("jl_fvar_offsets", true);
+        table[offsetof(jl_image_shard_t, fvar_count) / sizeof(void*)] = create_gv("jl_fvar_count", true);
+        table[offsetof(jl_image_shard_t, fvar_ptrs) / sizeof(void*)] = create_gv("jl_fvar_ptrs", true);
         table[offsetof(jl_image_shard_t, fvar_idxs) / sizeof(void*)] = create_gv("jl_fvar_idxs", true);
-        table[offsetof(jl_image_shard_t, gvar_base) / sizeof(void*)] = create_gv("jl_gvar_base", false);
         table[offsetof(jl_image_shard_t, gvar_offsets) / sizeof(void*)] = create_gv("jl_gvar_offsets", true);
         table[offsetof(jl_image_shard_t, gvar_idxs) / sizeof(void*)] = create_gv("jl_gvar_idxs", true);
         table[offsetof(jl_image_shard_t, clone_slots) / sizeof(void*)] = create_gv("jl_clone_slots", true);
-        table[offsetof(jl_image_shard_t, clone_offsets) / sizeof(void*)] = create_gv("jl_clone_offsets", true);
+        table[offsetof(jl_image_shard_t, clone_ptrs) / sizeof(void*)] = create_gv("jl_clone_ptrs", true);
         table[offsetof(jl_image_shard_t, clone_idxs) / sizeof(void*)] = create_gv("jl_clone_idxs", true);
     }
     auto tables_arr = ConstantArray::get(ArrayType::get(T_psize, tables.size()), tables);
@@ -546,10 +1113,18 @@ static GlobalVariable *emit_shard_table(Module &M, Type *T_size, Type *T_psize,
     return tables_gv;
 }
 
+static Function *emit_pgcstack_default_func(Module &M, Type *T_ptr) {
+    auto FT = FunctionType::get(T_ptr, false);
+    auto F = Function::Create(FT, GlobalValue::InternalLinkage, "pgcstack_default_func", &M);
+    llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", F));
+    builder.CreateRet(Constant::getNullValue(T_ptr));
+    return F;
+}
+
 // See src/processor.h for documentation about this table. Corresponds to jl_image_ptls_t.
-static GlobalVariable *emit_ptls_table(Module &M, Type *T_size, Type *T_psize) {
+static GlobalVariable *emit_ptls_table(Module &M, Type *T_size, Type *T_ptr) {
     std::array<Constant *, 3> ptls_table{
-        new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_pgcstack_func_slot"),
+        new GlobalVariable(M, T_ptr, false, GlobalValue::ExternalLinkage, emit_pgcstack_default_func(M, T_ptr), "jl_pgcstack_func_slot"),
         new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_pgcstack_key_slot"),
         new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_tls_offset"),
     };
@@ -557,7 +1132,7 @@ static GlobalVariable *emit_ptls_table(Module &M, Type *T_size, Type *T_psize) {
         cast<GlobalVariable>(gv)->setVisibility(GlobalValue::HiddenVisibility);
         cast<GlobalVariable>(gv)->setDSOLocal(true);
     }
-    auto ptls_table_arr = ConstantArray::get(ArrayType::get(T_psize, ptls_table.size()), ptls_table);
+    auto ptls_table_arr = ConstantArray::get(ArrayType::get(T_ptr, ptls_table.size()), ptls_table);
     auto ptls_table_gv = new GlobalVariable(M, ptls_table_arr->getType(), false,
                                             GlobalValue::ExternalLinkage, ptls_table_arr, "jl_ptls_table");
     ptls_table_gv->setVisibility(GlobalValue::HiddenVisibility);
@@ -638,7 +1213,7 @@ static FunctionInfo getFunctionWeight(const Function &F)
         auto val = F.getFnAttribute("julia.mv.clones").getValueAsString();
         // base16, so must be at most 4 * length bits long
         // popcount gives number of clones
-        info.clones = APInt(val.size() * 4, val, 16).countPopulation() + 1;
+        info.clones = APInt(val.size() * 4, val, 16).popcount() + 1;
     }
     info.weight += info.insts;
     // more basic blocks = more complex than just sum of insts,
@@ -685,17 +1260,19 @@ ModuleInfo compute_module_info(Module &M) {
 }
 
 struct Partition {
-    StringSet<> globals;
+    StringMap<bool> globals;
     StringMap<unsigned> fvars;
     StringMap<unsigned> gvars;
     size_t weight;
 };
 
-static inline bool verify_partitioning(const SmallVectorImpl<Partition> &partitions, const Module &M, size_t fvars_size, size_t gvars_size) {
+static inline bool verify_partitioning(const SmallVectorImpl<Partition> &partitions, const Module &M, DenseMap<GlobalValue *, unsigned> &fvars, DenseMap<GlobalValue *, unsigned> &gvars) {
     bool bad = false;
 #ifndef JL_NDEBUG
-    SmallVector<uint32_t> fvars(fvars_size);
-    SmallVector<uint32_t> gvars(gvars_size);
+    size_t fvars_size = fvars.size();
+    size_t gvars_size = gvars.size();
+    SmallVector<uint32_t, 0> fvars_partition(fvars_size);
+    SmallVector<uint32_t, 0> gvars_partition(gvars_size);
     StringMap<uint32_t> GVNames;
     for (uint32_t i = 0; i < partitions.size(); i++) {
         for (auto &name : partitions[i].globals) {
@@ -706,45 +1283,55 @@ static inline bool verify_partitioning(const SmallVectorImpl<Partition> &partiti
             GVNames[name.getKey()] = i;
         }
         for (auto &fvar : partitions[i].fvars) {
-            if (fvars[fvar.second] != 0) {
+            if (fvars_partition[fvar.second] != 0) {
                 bad = true;
-                dbgs() << "Duplicate fvar " << fvar.first() << " in partitions " << i << " and " << fvars[fvar.second] - 1 << "\n";
+                dbgs() << "Duplicate fvar " << fvar.first() << " in partitions " << i << " and " << fvars_partition[fvar.second] - 1 << "\n";
             }
-            fvars[fvar.second] = i+1;
+            fvars_partition[fvar.second] = i+1;
         }
         for (auto &gvar : partitions[i].gvars) {
-            if (gvars[gvar.second] != 0) {
+            if (gvars_partition[gvar.second] != 0) {
                 bad = true;
-                dbgs() << "Duplicate gvar " << gvar.first() << " in partitions " << i << " and " << gvars[gvar.second] - 1 << "\n";
+                dbgs() << "Duplicate gvar " << gvar.first() << " in partitions " << i << " and " << gvars_partition[gvar.second] - 1 << "\n";
             }
-            gvars[gvar.second] = i+1;
+            gvars_partition[gvar.second] = i+1;
         }
     }
-    for (auto &GV : M.globals()) {
+    for (auto &GV : M.global_values()) {
         if (GV.isDeclaration()) {
             if (GVNames.count(GV.getName())) {
                 bad = true;
                 dbgs() << "Global " << GV.getName() << " is a declaration but is in partition " << GVNames[GV.getName()] << "\n";
             }
         } else {
+            // Local global values are not partitioned
             if (!GVNames.count(GV.getName())) {
                 bad = true;
                 dbgs() << "Global " << GV << " not in any partition\n";
             }
-            if (!GV.hasExternalLinkage()) {
-                bad = true;
-                dbgs() << "Global " << GV << " has non-external linkage " << GV.getLinkage() << " but is in partition " << GVNames[GV.getName()] << "\n";
+            for (ConstantUses<GlobalValue> uses(const_cast<GlobalValue*>(&GV), const_cast<Module&>(M)); !uses.done(); uses.next()) {
+                auto val = uses.get_info().val;
+                if (!GVNames.count(val->getName())) {
+                    bad = true;
+                    dbgs() << "Global " << val->getName() << " used by " << GV.getName() << ", which is not in any partition\n";
+                    continue;
+                }
+                if (GVNames[val->getName()] != GVNames[GV.getName()]) {
+                    bad = true;
+                    dbgs() << "Global " << val->getName() << " used by " << GV.getName() << ", which is in partition " << GVNames[GV.getName()] << " but " << val->getName() << " is in partition " << GVNames[val->getName()] << "\n";
+                }
             }
         }
     }
     for (uint32_t i = 0; i < fvars_size; i++) {
-        if (fvars[i] == 0) {
+        if (fvars_partition[i] == 0) {
+            auto gv = find_if(fvars.begin(), fvars.end(), [i](auto var) { return var.second == i; });
             bad = true;
-            dbgs() << "fvar " << i << " not in any partition\n";
+            dbgs() << "fvar " << gv->first->getName() << " at " << i << " not in any partition\n";
         }
     }
     for (uint32_t i = 0; i < gvars_size; i++) {
-        if (gvars[i] == 0) {
+        if (gvars_partition[i] == 0) {
             bad = true;
             dbgs() << "gvar " << i << " not in any partition\n";
         }
@@ -767,7 +1354,7 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
             unsigned size;
             size_t weight;
         };
-        std::vector<Node> nodes;
+        SmallVector<Node, 0> nodes;
         DenseMap<GlobalValue *, unsigned> node_map;
         unsigned merged;
 
@@ -806,9 +1393,16 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
     for (auto &G : M.global_values()) {
         if (G.isDeclaration())
             continue;
-        if (isa<Function>(G)) {
-            partitioner.make(&G, getFunctionWeight(cast<Function>(G)).weight);
-        } else {
+        // Currently ccallable global aliases have extern linkage, we only want to make the
+        // internally linked functions/global variables extern+hidden
+        if (G.hasLocalLinkage()) {
+            G.setLinkage(GlobalValue::ExternalLinkage);
+            G.setVisibility(GlobalValue::HiddenVisibility);
+        }
+        if (auto F = dyn_cast<Function>(&G)) {
+            partitioner.make(&G, getFunctionWeight(*F).weight);
+        }
+        else {
             partitioner.make(&G, 1);
         }
     }
@@ -818,6 +1412,8 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
         for (ConstantUses<GlobalValue> uses(partitioner.nodes[i].GV, M); !uses.done(); uses.next()) {
             auto val = uses.get_info().val;
             auto idx = partitioner.node_map.find(val);
+            // This can fail if we can't partition a global, but it uses something we can partition
+            // This should be fixed by altering canPartition to not permit partitioning this global
             assert(idx != partitioner.node_map.end());
             partitioner.merge(i, idx->second);
         }
@@ -828,12 +1424,12 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
     auto pcomp = [](const Partition *p1, const Partition *p2) {
         return p1->weight > p2->weight;
     };
-    std::priority_queue<Partition *, std::vector<Partition *>, decltype(pcomp)> pq(pcomp);
+    std::priority_queue<Partition *, SmallVector<Partition *, 0>, decltype(pcomp)> pq(pcomp);
     for (unsigned i = 0; i < threads; ++i) {
         pq.push(&partitions[i]);
     }
 
-    std::vector<unsigned> idxs(partitioner.nodes.size());
+    SmallVector<unsigned, 0> idxs(partitioner.nodes.size());
     std::iota(idxs.begin(), idxs.end(), 0);
     std::sort(idxs.begin(), idxs.end(), [&](unsigned a, unsigned b) {
         //because roots have more weight than their children,
@@ -845,40 +1441,42 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
     for (unsigned idx = 0; idx < idxs.size(); ++idx) {
         auto i = idxs[idx];
         auto root = partitioner.find(i);
-        assert(root == i || partitioner.nodes[root].GV == nullptr);
-        if (partitioner.nodes[root].GV) {
+        assert(root == i || partitioner.nodes[root].weight == 0);
+        if (partitioner.nodes[root].weight) {
             auto &node = partitioner.nodes[root];
             auto &P = *pq.top();
             pq.pop();
             auto name = node.GV->getName();
-            P.globals.insert(name);
+            P.globals.insert({name, true});
             if (fvars.count(node.GV))
                 P.fvars[name] = fvars[node.GV];
             if (gvars.count(node.GV))
                 P.gvars[name] = gvars[node.GV];
             P.weight += node.weight;
-            node.GV = nullptr;
+            node.weight = 0;
             node.size = &P - partitions.data();
             pq.push(&P);
         }
         if (root != i) {
             auto &node = partitioner.nodes[i];
-            assert(node.GV != nullptr);
+            assert(node.weight != 0);
             // we assigned its root already, so just add it to the root's partition
             // don't touch the priority queue, since we're not changing the weight
             auto &P = partitions[partitioner.nodes[root].size];
             auto name = node.GV->getName();
-            P.globals.insert(name);
+            P.globals.insert({name, true});
             if (fvars.count(node.GV))
                 P.fvars[name] = fvars[node.GV];
             if (gvars.count(node.GV))
                 P.gvars[name] = gvars[node.GV];
-            node.GV = nullptr;
+            node.weight = 0;
             node.size = partitioner.nodes[root].size;
         }
     }
 
-    bool verified = verify_partitioning(partitions, M, fvars.size(), gvars.size());
+    bool verified = verify_partitioning(partitions, M, fvars, gvars);
+    if (!verified)
+        llvm_dump(&M);
     assert(verified && "Partitioning failed to partition globals correctly");
     (void) verified;
 
@@ -920,7 +1518,6 @@ struct ShardTimers {
     ImageTimer deserialize;
     ImageTimer materialize;
     ImageTimer construct;
-    ImageTimer deletion;
     // impl timers
     ImageTimer unopt;
     ImageTimer optimize;
@@ -934,13 +1531,12 @@ struct ShardTimers {
     void print(raw_ostream &out, bool clear=false) {
         StringRef sep = "===-------------------------------------------------------------------------===";
         out << formatv("{0}\n{1}\n{0}\n", sep, fmt_align(name + " : " + desc, AlignStyle::Center, sep.size()));
-        auto total = deserialize.elapsed + materialize.elapsed + construct.elapsed + deletion.elapsed +
+        auto total = deserialize.elapsed + materialize.elapsed + construct.elapsed +
             unopt.elapsed + optimize.elapsed + opt.elapsed + obj.elapsed + asm_.elapsed;
         out << "Time (s)  Name  Description\n";
         deserialize.print(out, clear);
         materialize.print(out, clear);
         construct.print(out, clear);
-        deletion.print(out, clear);
         unopt.print(out, clear);
         optimize.print(out, clear);
         opt.print(out, clear);
@@ -950,8 +1546,6 @@ struct ShardTimers {
     }
 };
 
-void emitFloat16Wrappers(Module &M, bool external);
-
 struct AOTOutputs {
     SmallVector<char, 0> unopt, opt, obj, asm_;
 };
@@ -970,7 +1564,7 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
             SourceTM.getRelocationModel(),
             SourceTM.getCodeModel(),
             SourceTM.getOptLevel()));
-
+    fixupTM(*TM);
     if (unopt) {
         timers.unopt.startTimer();
         raw_svector_ostream OS(out.unopt);
@@ -984,60 +1578,77 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
     if (!opt && !obj && !asm_) {
         return out;
     }
-    assert(!verifyModule(M, &errs()));
-
-    timers.optimize.startTimer();
-
-#ifndef JL_USE_NEW_PM
-    legacy::PassManager optimizer;
-    addTargetPasses(&optimizer, TM->getTargetTriple(), TM->getTargetIRAnalysis());
-    addOptimizationPasses(&optimizer, jl_options.opt_level, true, true);
-    addMachinePasses(&optimizer, jl_options.opt_level);
-#else
+    assert(!verifyLLVMIR(M));
 
-    auto PMTM = std::unique_ptr<TargetMachine>(
-        SourceTM.getTarget().createTargetMachine(
-            SourceTM.getTargetTriple().str(),
-            SourceTM.getTargetCPU(),
-            SourceTM.getTargetFeatureString(),
-            SourceTM.Options,
-            SourceTM.getRelocationModel(),
-            SourceTM.getCodeModel(),
-            SourceTM.getOptLevel()));
-    NewPM optimizer{std::move(PMTM), getOptLevel(jl_options.opt_level), OptimizationOptions::defaults(true, true)};
-#endif
-    optimizer.run(M);
-    assert(!verifyModule(M, &errs()));
-    bool inject_aliases = false;
-    for (auto &F : M.functions()) {
-        if (!F.isDeclaration() && F.getName() != "_DllMainCRTStartup") {
-            inject_aliases = true;
-            break;
-        }
-    }
-    // no need to inject aliases if we have no functions
-
-    if (inject_aliases) {
-#if JULIA_FLOAT16_ABI == 1
-        // We would like to emit an alias or an weakref alias to redirect these symbols
-        // but LLVM doesn't let us emit a GlobalAlias to a declaration...
-        // So for now we inject a definition of these functions that calls our runtime
-        // functions. We do so after optimization to avoid cloning these functions.
-        injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee",
-                FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
-        injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee",
-                FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
-        injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee",
-                FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
-        injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee",
-                FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
-        injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2",
-                FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
+    {
+        timers.optimize.startTimer();
+
+        auto PMTM = std::unique_ptr<TargetMachine>(
+            SourceTM.getTarget().createTargetMachine(
+                SourceTM.getTargetTriple().str(),
+                SourceTM.getTargetCPU(),
+                SourceTM.getTargetFeatureString(),
+                SourceTM.Options,
+                SourceTM.getRelocationModel(),
+                SourceTM.getCodeModel(),
+                SourceTM.getOptLevel()));
+        fixupTM(*PMTM);
+        auto options = OptimizationOptions::defaults(true, true);
+        options.sanitize_memory = jl_options.target_sanitize_memory;
+        options.sanitize_thread = jl_options.target_sanitize_thread;
+        options.sanitize_address = jl_options.target_sanitize_address;
+        NewPM optimizer{std::move(PMTM), getOptLevel(jl_options.opt_level), options};
+        optimizer.run(M);
+        assert(!verifyLLVMIR(M));
+        bool inject_aliases = false;
+        for (auto &F : M.functions()) {
+            if (!F.isDeclaration() && F.getName() != "_DllMainCRTStartup") {
+                inject_aliases = true;
+                break;
+            }
+        }
+        // no need to inject aliases if we have no functions
+
+        if (inject_aliases) {
+            // We would like to emit an alias or an weakref alias to redirect these symbols
+            // but LLVM doesn't let us emit a GlobalAlias to a declaration...
+            // So for now we inject a definition of these functions that calls our runtime
+            // functions. We do so after optimization to avoid cloning these functions.
+            // Float16 conversion routines
+#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_)
+            // LLVM 16 reverted to soft-float ABI for passing half on x86_64 Darwin
+            // https://github.com/llvm/llvm-project/commit/2bcf51c7f82ca7752d1bba390a2e0cb5fdd05ca9
+            injectCRTAlias(M, "__gnu_h2f_ieee", "julia_half_to_float",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getInt16Ty(M.getContext()) }, false));
+            injectCRTAlias(M, "__extendhfsf2", "julia_half_to_float",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getInt16Ty(M.getContext()) }, false));
+            injectCRTAlias(M, "__gnu_f2h_ieee", "julia_float_to_half",
+                    FunctionType::get(Type::getInt16Ty(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncsfhf2", "julia_float_to_half",
+                    FunctionType::get(Type::getInt16Ty(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncdfhf2", "julia_double_to_half",
+                    FunctionType::get(Type::getInt16Ty(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
 #else
-        emitFloat16Wrappers(M, false);
+            injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee",
+                    FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee",
+                    FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2",
+                    FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
 #endif
+
+            // BFloat16 conversion routines
+            injectCRTAlias(M, "__truncsfbf2", "julia__truncsfbf2",
+                    FunctionType::get(Type::getBFloatTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncsdbf2", "julia__truncdfbf2",
+                    FunctionType::get(Type::getBFloatTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
+        }
+        timers.optimize.stopTimer();
     }
-    timers.optimize.stopTimer();
 
     if (opt) {
         timers.opt.startTimer();
@@ -1055,7 +1666,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
         raw_svector_ostream OS(out.obj);
         legacy::PassManager emitter;
         addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+#if JL_LLVM_VERSION >= 180000
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::ObjectFile, false))
+#else
         if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_ObjectFile, false))
+#endif
             jl_safe_printf("ERROR: target does not support generation of object files\n");
         emitter.run(M);
         timers.obj.stopTimer();
@@ -1066,7 +1681,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
         raw_svector_ostream OS(out.asm_);
         legacy::PassManager emitter;
         addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+#if JL_LLVM_VERSION >= 180000
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::AssemblyFile, false))
+#else
         if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_AssemblyFile, false))
+#endif
             jl_safe_printf("ERROR: target does not support generation of assembly files\n");
         emitter.run(M);
         timers.asm_.stopTimer();
@@ -1077,7 +1696,7 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
 
 // serialize module to bitcode
 static auto serializeModule(const Module &M) {
-    assert(!verifyModule(M, &errs()) && "Serializing invalid module!");
+    assert(!verifyLLVMIR(M) && "Serializing invalid module!");
     SmallVector<char, 0> ClonedModuleBuffer;
     BitcodeWriter BCWriter(ClonedModuleBuffer);
     BCWriter.writeModule(M);
@@ -1089,38 +1708,60 @@ static auto serializeModule(const Module &M) {
 // Modules are deserialized lazily by LLVM, to avoid deserializing
 // unnecessary functions. We take advantage of this by serializing
 // the entire module once, then deleting the bodies of functions
-// that are not in this partition. Once unnecesary functions are
+// that are not in this partition. Once unnecessary functions are
 // deleted, we then materialize the entire module to make use-lists
 // consistent.
 static void materializePreserved(Module &M, Partition &partition) {
     DenseSet<GlobalValue *> Preserve;
-    for (auto &GV : M.global_values()) {
-        if (!GV.isDeclaration()) {
-            if (partition.globals.count(GV.getName())) {
-                Preserve.insert(&GV);
-            }
+    for (auto &Name : partition.globals) {
+        auto *GV = M.getNamedValue(Name.first());
+        assert(GV && !GV->isDeclaration() && !GV->hasLocalLinkage());
+        if (!Name.second) {
+            // We skip partitioning for internal variables, so this has
+            // the same effect as putting it in preserve.
+            // This just avoids a hashtable lookup.
+            GV->setLinkage(GlobalValue::InternalLinkage);
+            assert(GV->hasDefaultVisibility());
+        }
+        else {
+            Preserve.insert(GV);
         }
     }
+
     for (auto &F : M.functions()) {
-        if (!F.isDeclaration()) {
-            if (!Preserve.contains(&F)) {
-                F.deleteBody();
-                F.setLinkage(GlobalValue::ExternalLinkage);
-                F.setVisibility(GlobalValue::HiddenVisibility);
-                F.setDSOLocal(true);
-            }
+        if (F.isDeclaration())
+            continue;
+        if (F.hasLocalLinkage())
+            continue;
+        if (Preserve.contains(&F))
+            continue;
+        if (!canPartition(F)) {
+            F.setLinkage(GlobalValue::AvailableExternallyLinkage);
+            F.setVisibility(GlobalValue::HiddenVisibility);
+            F.setDSOLocal(true);
+            continue;
         }
+        F.deleteBody();
+        F.setLinkage(GlobalValue::ExternalLinkage);
+        F.setVisibility(GlobalValue::HiddenVisibility);
+        F.setDSOLocal(true);
     }
+
     for (auto &GV : M.globals()) {
-        if (!GV.isDeclaration()) {
-            if (!Preserve.contains(&GV)) {
-                GV.setInitializer(nullptr);
-                GV.setLinkage(GlobalValue::ExternalLinkage);
-                GV.setVisibility(GlobalValue::HiddenVisibility);
-                GV.setDSOLocal(true);
-            }
-        }
+        if (GV.isDeclaration())
+            continue;
+        if (Preserve.contains(&GV))
+            continue;
+        if (GV.hasLocalLinkage())
+            continue;
+        GV.setInitializer(nullptr);
+        GV.setLinkage(GlobalValue::ExternalLinkage);
+        GV.setVisibility(GlobalValue::HiddenVisibility);
+        if (GV.getDLLStorageClass() != GlobalValue::DLLStorageClassTypes::DefaultStorageClass)
+            continue; // Don't mess with exported or imported globals
+        GV.setDSOLocal(true);
     }
+
     // Global aliases are a pain to deal with. It is illegal to have an alias to a declaration,
     // so we need to replace them with either a function or a global variable declaration. However,
     // we can't just delete the alias, because that would break the users of the alias. Therefore,
@@ -1129,25 +1770,27 @@ static void materializePreserved(Module &M, Partition &partition) {
     // to deleting the old alias.
     SmallVector<std::pair<GlobalAlias *, GlobalValue *>> DeletedAliases;
     for (auto &GA : M.aliases()) {
-        if (!GA.isDeclaration()) {
-            if (!Preserve.contains(&GA)) {
-                if (GA.getValueType()->isFunctionTy()) {
-                    auto F = Function::Create(cast<FunctionType>(GA.getValueType()), GlobalValue::ExternalLinkage, "", &M);
-                    // This is an extremely sad hack to make sure the global alias never points to an extern function
-                    auto BB = BasicBlock::Create(M.getContext(), "", F);
-                    new UnreachableInst(M.getContext(), BB);
-                    GA.setAliasee(F);
-
-                    DeletedAliases.push_back({ &GA, F });
-                }
-                else {
-                    auto GV = new GlobalVariable(M, GA.getValueType(), false, GlobalValue::ExternalLinkage, Constant::getNullValue(GA.getValueType()));
-                    DeletedAliases.push_back({ &GA, GV });
-                }
-            }
+        assert(!GA.isDeclaration() && "Global aliases can't be declarations!"); // because LLVM says so
+        if (Preserve.contains(&GA))
+            continue;
+        if (GA.hasLocalLinkage())
+            continue;
+        if (GA.getValueType()->isFunctionTy()) {
+            auto F = Function::Create(cast<FunctionType>(GA.getValueType()), GlobalValue::ExternalLinkage, "", &M);
+            // This is an extremely sad hack to make sure the global alias never points to an extern function
+            auto BB = BasicBlock::Create(M.getContext(), "", F);
+            new UnreachableInst(M.getContext(), BB);
+            GA.setAliasee(F);
+            DeletedAliases.push_back({ &GA, F });
+        }
+        else {
+            auto GV = new GlobalVariable(M, GA.getValueType(), false, GlobalValue::ExternalLinkage, Constant::getNullValue(GA.getValueType()));
+            DeletedAliases.push_back({ &GA, GV });
         }
     }
+
     cantFail(M.materializeAll());
+
     for (auto &Deleted : DeletedAliases) {
         Deleted.second->takeName(Deleted.first);
         Deleted.first->replaceAllUsesWith(Deleted.second);
@@ -1162,8 +1805,8 @@ static void materializePreserved(Module &M, Partition &partition) {
 }
 
 // Reconstruct jl_fvars, jl_gvars, jl_fvars_idxs, and jl_gvars_idxs from the partition
-static void construct_vars(Module &M, Partition &partition) {
-    std::vector<std::pair<uint32_t, GlobalValue *>> fvar_pairs;
+static void construct_vars(Module &M, Partition &partition, StringRef suffix) {
+    SmallVector<std::pair<uint32_t, GlobalValue *>> fvar_pairs;
     fvar_pairs.reserve(partition.fvars.size());
     for (auto &fvar : partition.fvars) {
         auto F = M.getFunction(fvar.first());
@@ -1171,8 +1814,8 @@ static void construct_vars(Module &M, Partition &partition) {
         assert(!F->isDeclaration());
         fvar_pairs.push_back({ fvar.second, F });
     }
-    std::vector<GlobalValue *> fvars;
-    std::vector<uint32_t> fvar_idxs;
+    SmallVector<GlobalValue *, 0> fvars;
+    SmallVector<uint32_t, 0> fvar_idxs;
     fvars.reserve(fvar_pairs.size());
     fvar_idxs.reserve(fvar_pairs.size());
     std::sort(fvar_pairs.begin(), fvar_pairs.end());
@@ -1180,16 +1823,16 @@ static void construct_vars(Module &M, Partition &partition) {
         fvars.push_back(fvar.second);
         fvar_idxs.push_back(fvar.first);
     }
-    std::vector<std::pair<uint32_t, GlobalValue *>> gvar_pairs;
+    SmallVector<std::pair<uint32_t, GlobalValue *>, 0> gvar_pairs;
     gvar_pairs.reserve(partition.gvars.size());
     for (auto &gvar : partition.gvars) {
-        auto GV = M.getGlobalVariable(gvar.first());
+        auto GV = M.getNamedGlobal(gvar.first());
         assert(GV);
         assert(!GV->isDeclaration());
         gvar_pairs.push_back({ gvar.second, GV });
     }
-    std::vector<GlobalValue *> gvars;
-    std::vector<uint32_t> gvar_idxs;
+    SmallVector<Constant*, 0> gvars;
+    SmallVector<uint32_t, 0> gvar_idxs;
     gvars.reserve(gvar_pairs.size());
     gvar_idxs.reserve(gvar_pairs.size());
     std::sort(gvar_pairs.begin(), gvar_pairs.end());
@@ -1199,9 +1842,9 @@ static void construct_vars(Module &M, Partition &partition) {
     }
 
     // Now commit the fvars, gvars, and idxs
-    auto T_psize = M.getDataLayout().getIntPtrType(M.getContext())->getPointerTo();
-    emit_offset_table(M, fvars, "jl_fvars", T_psize);
-    emit_offset_table(M, gvars, "jl_gvars", T_psize);
+    auto T_size = M.getDataLayout().getIntPtrType(M.getContext());
+    emit_table(M, fvars, "jl_fvars", PointerType::getUnqual(T_size->getContext()));
+    emit_offset_table(M, T_size, gvars, "jl_gvar", suffix);
     auto fidxs = ConstantDataArray::get(M.getContext(), fvar_idxs);
     auto fidxs_var = new GlobalVariable(M, fidxs->getType(), true,
                                         GlobalVariable::ExternalLinkage,
@@ -1211,23 +1854,21 @@ static void construct_vars(Module &M, Partition &partition) {
     auto gidxs = ConstantDataArray::get(M.getContext(), gvar_idxs);
     auto gidxs_var = new GlobalVariable(M, gidxs->getType(), true,
                                         GlobalVariable::ExternalLinkage,
-                                        gidxs, "jl_gvar_idxs");
+                                        gidxs, "jl_gvar_idxs" + suffix);
     gidxs_var->setVisibility(GlobalValue::HiddenVisibility);
     gidxs_var->setDSOLocal(true);
 }
 
-// Materialization will leave many unused declarations, which multiversioning would otherwise clone.
-// This function removes them to avoid unnecessary cloning of declarations.
-// The GlobalDCEPass is much better at this, but we only care about removing unused
-// declarations, not actually about seeing if code is dead (codegen knows it is live, by construction).
-static void dropUnusedGlobals(Module &M) {
-    std::vector<GlobalValue *> unused;
-    for (auto &G : M.global_values()) {
-        if (G.isDeclaration() && G.use_empty())
-            unused.push_back(&G);
-    }
-    for (auto &G : unused)
-        G->eraseFromParent();
+template<typename CB>
+static inline void schedule_uv_thread(uv_thread_t *worker, CB &&cb)
+{
+    auto func = new CB(std::move(cb));
+    // Use libuv thread to avoid issues with stack sizes
+    uv_thread_create(worker, [] (void *arg) {
+        auto func = static_cast<CB*>(arg);
+        (*func)();
+        delete func;
+    }, func);
 }
 
 // Entrypoint to optionally-multithreaded image compilation. This handles global coordination of the threading,
@@ -1248,7 +1889,6 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
         timers[i].deserialize.init("deserialize_" + idx, "Deserialize module");
         timers[i].materialize.init("materialize_" + idx, "Materialize declarations");
         timers[i].construct.init("construct_" + idx, "Construct partitioned definitions");
-        timers[i].deletion.init("deletion_" + idx, "Delete dead declarations");
         timers[i].unopt.init("unopt_" + idx, "Emit unoptimized bitcode");
         timers[i].optimize.init("optimize_" + idx, "Optimize shard");
         timers[i].opt.init("opt_" + idx, "Emit optimized bitcode");
@@ -1276,7 +1916,17 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
     // Single-threaded case
     if (threads == 1) {
         output_timer.startTimer();
-        outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out);
+        {
+            JL_TIMING(NATIVE_AOT, NATIVE_Opt);
+            // convert gvars to the expected offset table format for shard 0
+            if (M.getGlobalVariable("jl_gvars")) {
+                auto gvars = consume_gv<Constant>(M, "jl_gvars", false);
+                Type *T_size = M.getDataLayout().getIntPtrType(M.getContext());
+                emit_offset_table(M, T_size, gvars, "jl_gvar", "_0"); // module flag "julia.mv.suffix"
+                M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs_0");
+            }
+            outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out);
+        }
         output_timer.stopTimer();
         // Don't need M anymore
         module_released(M);
@@ -1314,40 +1964,48 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
     output_timer.startTimer();
 
     // Start all of the worker threads
-    std::vector<std::thread> workers(threads);
-    for (unsigned i = 0; i < threads; i++) {
-        workers[i] = std::thread([&, i]() {
-            LLVMContext ctx;
-            // Lazily deserialize the entire module
-            timers[i].deserialize.startTimer();
-            auto M = cantFail(getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx), "Error loading module");
-            timers[i].deserialize.stopTimer();
-
-            timers[i].materialize.startTimer();
-            materializePreserved(*M, partitions[i]);
-            timers[i].materialize.stopTimer();
-
-            timers[i].construct.startTimer();
-            construct_vars(*M, partitions[i]);
-            M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), "_" + std::to_string(i)));
-            // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
-            // or it may skip emitting debug info for that file. Here set it to ./julia#N
-            DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), ".");
-            for (DICompileUnit *CU : M->debug_compile_units())
-                CU->replaceOperandWith(0, topfile);
-            timers[i].construct.stopTimer();
-
-            timers[i].deletion.startTimer();
-            dropUnusedGlobals(*M);
-            timers[i].deletion.stopTimer();
-
-            outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
-        });
-    }
+    {
+        JL_TIMING(NATIVE_AOT, NATIVE_Opt);
+        std::vector<uv_thread_t> workers(threads);
+        for (unsigned i = 0; i < threads; i++) {
+            schedule_uv_thread(&workers[i], [&, i]() {
+                LLVMContext ctx;
+                ctx.setDiscardValueNames(true);
+                // Lazily deserialize the entire module
+                timers[i].deserialize.startTimer();
+                auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx);
+                // Make sure this also fails with only julia, but not LLVM assertions enabled,
+                // otherwise, the first error we hit is the LLVM module verification failure,
+                // which will look very confusing, because the module was partially deserialized.
+                bool deser_succeeded = (bool)EM;
+                auto M = cantFail(std::move(EM), "Error loading module");
+                assert(deser_succeeded); (void)deser_succeeded;
+                timers[i].deserialize.stopTimer();
+
+                timers[i].materialize.startTimer();
+                materializePreserved(*M, partitions[i]);
+                timers[i].materialize.stopTimer();
+
+                timers[i].construct.startTimer();
+                std::string suffix = "_" + std::to_string(i);
+                construct_vars(*M, partitions[i], suffix);
+                M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), suffix));
+                // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
+                // or it may skip emitting debug info for that file. Here set it to ./julia#N
+                DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), ".");
+                if (M->getNamedMetadata("llvm.dbg.cu"))
+                    for (auto CU: M->getNamedMetadata("llvm.dbg.cu")->operands())
+                        CU->replaceOperandWith(0, topfile);
+                timers[i].construct.stopTimer();
+
+                outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
+            });
+        }
 
-    // Wait for all of the worker threads to finish
-    for (auto &w : workers)
-        w.join();
+        // Wait for all of the worker threads to finish
+        for (unsigned i = 0; i < threads; i++)
+            uv_thread_join(&workers[i]);
+    }
 
     output_timer.stopTimer();
 
@@ -1372,12 +2030,15 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
     return outputs;
 }
 
+extern int jl_is_timing_passes;
 static unsigned compute_image_thread_count(const ModuleInfo &info) {
     // 32-bit systems are very memory-constrained
 #ifdef _P32
     LLVM_DEBUG(dbgs() << "32-bit systems are restricted to a single thread\n");
     return 1;
 #endif
+    if (jl_is_timing_passes) // LLVM isn't thread safe when timing the passes https://github.com/llvm/llvm-project/issues/44417
+        return 1;
     // This is not overridable because empty modules do occasionally appear, but they'll be very small and thus exit early to
     // known easy behavior. Plus they really don't warrant multiple threads
     if (info.weight < 1000) {
@@ -1385,7 +2046,7 @@ static unsigned compute_image_thread_count(const ModuleInfo &info) {
         return 1;
     }
 
-    unsigned threads = std::max(jl_cpu_threads() / 2, 1);
+    unsigned threads = std::max(jl_effective_threads() / 2, 1);
 
     auto max_threads = info.globals / 100;
     if (max_threads < threads) {
@@ -1427,13 +2088,16 @@ static unsigned compute_image_thread_count(const ModuleInfo &info) {
     return threads;
 }
 
+jl_emission_params_t default_emission_params = { 1 };
+
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup
 extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_native_impl(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname,
         const char *asm_fname,
-        ios_t *z, ios_t *s)
+        ios_t *z, ios_t *s,
+        jl_emission_params_t *params)
 {
     JL_TIMING(NATIVE_AOT, NATIVE_Dump);
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -1442,6 +2106,11 @@ void jl_dump_native_impl(void *native_code,
         delete data;
         return;
     }
+
+    if (!params) {
+        params = &default_emission_params;
+    }
+
     // We don't want to use MCJIT's target machine because
     // it uses the large code model and we may potentially
     // want less optimizations there.
@@ -1451,16 +2120,24 @@ void jl_dump_native_impl(void *native_code,
         TheTriple.setObjectFormat(Triple::COFF);
     } else if (TheTriple.isOSDarwin()) {
         TheTriple.setObjectFormat(Triple::MachO);
-        TheTriple.setOS(llvm::Triple::MacOSX);
-    }
-    Optional<Reloc::Model> RelocModel;
-    if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD()) {
+        SmallString<16> Str;
+        Str += "macosx";
+        if (TheTriple.isAArch64())
+            Str += "11.0.0"; // Update this if MACOSX_VERSION_MIN changes
+        else
+            Str += "10.14.0";
+        TheTriple.setOSName(Str);
+    }
+    std::optional<Reloc::Model> RelocModel;
+    if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD() || TheTriple.isOSOpenBSD()) {
         RelocModel = Reloc::PIC_;
     }
+
     CodeModel::Model CMModel = CodeModel::Small;
-    if (TheTriple.isPPC()) {
-        // On PPC the small model is limited to 16bit offsets
-        CMModel = CodeModel::Medium;
+    if (TheTriple.isPPC() || TheTriple.isRISCV() ||
+        (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())) {
+        // On PPC the small model is limited to 16bit offsets. For very large images the small code model
+        CMModel = CodeModel::Medium; //  isn't good enough on x86 so use Medium, it has no cost because only the image goes in .ldata
     }
     std::unique_ptr<TargetMachine> SourceTM(
         jl_ExecutionEngine->getTarget().createTargetMachine(
@@ -1470,8 +2147,13 @@ void jl_dump_native_impl(void *native_code,
             jl_ExecutionEngine->getTargetOptions(),
             RelocModel,
             CMModel,
+#if JL_LLVM_VERSION >= 180000
+            CodeGenOptLevel::Aggressive // -O3 TODO: respect command -O0 flag?
+#else
             CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag?
+#endif
             ));
+    fixupTM(*SourceTM);
     auto DL = jl_create_datalayout(*SourceTM);
     std::string StackProtectorGuard;
     unsigned OverrideStackAlignment;
@@ -1488,35 +2170,68 @@ void jl_dump_native_impl(void *native_code,
     SmallVector<AOTOutputs, 16> data_outputs;
     SmallVector<AOTOutputs, 16> metadata_outputs;
     if (z) {
+        JL_TIMING(NATIVE_AOT, NATIVE_Sysimg);
         LLVMContext Context;
+        Context.setDiscardValueNames(true);
         Module sysimgM("sysimg", Context);
         sysimgM.setTargetTriple(TheTriple.str());
         sysimgM.setDataLayout(DL);
         sysimgM.setStackProtectorGuard(StackProtectorGuard);
         sysimgM.setOverrideStackAlignment(OverrideStackAlignment);
-        Constant *data = ConstantDataArray::get(Context,
-            ArrayRef<uint8_t>((const unsigned char*)z->buf, z->size));
+
+        int compression = jl_options.compress_sysimage ? 15 : 0;
+        ArrayRef<char> sysimg_data{z->buf, (size_t)z->size};
+        SmallVector<char, 0> compressed_data;
+        if (compression) {
+            compressed_data.resize(ZSTD_compressBound(z->size));
+            size_t comp_size = ZSTD_compress(compressed_data.data(), compressed_data.size(),
+                                             z->buf, z->size, compression);
+            compressed_data.resize(comp_size);
+            sysimg_data = compressed_data;
+            ios_close(z);
+            free(z);
+        }
+
+        Constant *data = ConstantDataArray::get(Context, sysimg_data);
         auto sysdata = new GlobalVariable(sysimgM, data->getType(), false,
                                      GlobalVariable::ExternalLinkage,
                                      data, "jl_system_image_data");
-        sysdata->setAlignment(Align(64));
+        sysdata->setAlignment(Align(jl_page_size));
+#if JL_LLVM_VERSION >= 180000
+        sysdata->setCodeModel(CodeModel::Large);
+#else
+        if (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())
+            sysdata->setSection(".ldata");
+#endif
         addComdat(sysdata, TheTriple);
-        Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), z->size);
+        Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), sysimg_data.size());
         addComdat(new GlobalVariable(sysimgM, len->getType(), true,
                                      GlobalVariable::ExternalLinkage,
                                      len, "jl_system_image_size"), TheTriple);
-        // Free z here, since we've copied out everything into data
-        // Results in serious memory savings
-        ios_close(z);
-        free(z);
+
+        const char *unpack_func = compression ? "jl_image_unpack_zstd" : "jl_image_unpack_uncomp";
+        auto unpack = new GlobalVariable(sysimgM, DL.getIntPtrType(Context), true,
+                                         GlobalVariable::ExternalLinkage, nullptr,
+                                         unpack_func);
+        addComdat(new GlobalVariable(sysimgM, PointerType::getUnqual(Context), true,
+                                     GlobalVariable::ExternalLinkage, unpack,
+                                     "jl_image_unpack"),
+                  TheTriple);
+
+        if (!compression) {
+            // Free z here, since we've copied out everything into data
+            // Results in serious memory savings
+            ios_close(z);
+            free(z);
+        }
+        compressed_data.clear();
         // Note that we don't set z to null, this allows the check in WRITE_ARCHIVE
         // to function as expected
         // no need to free the module/context, destructor handles that
         sysimg_outputs = compile(sysimgM, "sysimg", 1, [](Module &) {});
     }
 
-    bool imaging_mode = imaging_default() || jl_options.outputo;
-
+    const bool imaging_mode = true;
     unsigned threads = 1;
     unsigned nfvars = 0;
     unsigned ngvars = 0;
@@ -1526,11 +2241,23 @@ void jl_dump_native_impl(void *native_code,
     bool has_veccall = false;
 
     data->M.withModuleDo([&](Module &dataM) {
+        JL_TIMING(NATIVE_AOT, NATIVE_Setup);
         dataM.setTargetTriple(TheTriple.str());
         dataM.setDataLayout(DL);
+        dataM.setPICLevel(PICLevel::BigPIC);
         auto &Context = dataM.getContext();
 
-        Type *T_psize = dataM.getDataLayout().getIntPtrType(Context)->getPointerTo();
+        Type *T_psize = PointerType::getUnqual(Context);
+
+        // This should really be in jl_create_native, but we haven't
+        // yet set the target triple binary format correctly at that
+        // point. This should be resolved when we start JITting for
+        // COFF when we switch over to JITLink.
+        for (auto &GA : dataM.aliases()) {
+            // Global aliases are only used for ccallable things, so we should
+            // mark them as dllexport
+            addComdat(&GA, TheTriple);
+        }
 
         // add metadata information
         if (imaging_mode) {
@@ -1560,9 +2287,9 @@ void jl_dump_native_impl(void *native_code,
             LLVM_DEBUG(dbgs() << "Using " << threads << " to emit aot image\n");
             nfvars = data->jl_sysimg_fvars.size();
             ngvars = data->jl_sysimg_gvars.size();
-            emit_offset_table(dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize);
-            emit_offset_table(dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize);
-            std::vector<uint32_t> idxs;
+            emit_table(dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize);
+            emit_table(dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize);
+            SmallVector<uint32_t, 0> idxs;
             idxs.resize(data->jl_sysimg_gvars.size());
             std::iota(idxs.begin(), idxs.end(), 0);
             auto gidxs = ConstantDataArray::get(Context, idxs);
@@ -1582,22 +2309,13 @@ void jl_dump_native_impl(void *native_code,
             fidxs_var->setDSOLocal(true);
             dataM.addModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(Context, "_0"));
 
-            // reflect the address of the jl_RTLD_DEFAULT_handle variable
-            // back to the caller, so that we can check for consistency issues
-            GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(&dataM);
-            addComdat(new GlobalVariable(dataM,
-                                        jlRTLD_DEFAULT_var->getType(),
-                                        true,
-                                        GlobalVariable::ExternalLinkage,
-                                        jlRTLD_DEFAULT_var,
-                                        "jl_RTLD_DEFAULT_handle_pointer"), TheTriple);
-
             // let the compiler know we are going to internalize a copy of this,
             // if it has a current usage with ExternalLinkage
-            auto small_typeof_copy = dataM.getGlobalVariable("small_typeof");
-            if (small_typeof_copy) {
-                small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
-                small_typeof_copy->setDSOLocal(true);
+            auto jl_small_typeof_copy = dataM.getGlobalVariable("jl_small_typeof");
+            if (jl_small_typeof_copy) {
+                jl_small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
+                jl_small_typeof_copy->setDSOLocal(true);
+                jl_small_typeof_copy->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DefaultStorageClass);
             }
         }
 
@@ -1610,27 +2328,47 @@ void jl_dump_native_impl(void *native_code,
         auto lock = TSCtx.getLock();
         auto dataM = data->M.getModuleUnlocked();
 
-        // Delete data when add_output thinks it's done with it
-        // Saves memory for use when multithreading
-        data_outputs = compile(*dataM, "text", threads, [data](Module &) { delete data; });
+        data_outputs = compile(*dataM, "text", threads, [data, &lock, &TSCtx](Module &) {
+            // Delete data when add_output thinks it's done with it
+            // Saves memory for use when multithreading
+            auto lock2 = std::move(lock);
+            delete data;
+            // Drop last reference to shared LLVM::Context
+            auto TSCtx2 = std::move(TSCtx);
+        });
     }
 
-    {
+    if (params->emit_metadata) {
+        JL_TIMING(NATIVE_AOT, NATIVE_Metadata);
         LLVMContext Context;
+        Context.setDiscardValueNames(true);
         Module metadataM("metadata", Context);
         metadataM.setTargetTriple(TheTriple.str());
         metadataM.setDataLayout(DL);
         metadataM.setStackProtectorGuard(StackProtectorGuard);
         metadataM.setOverrideStackAlignment(OverrideStackAlignment);
 
+        // reflect the address of the jl_RTLD_DEFAULT_handle variable
+        // back to the caller, so that we can check for consistency issues
+        GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(&metadataM);
+
         Type *T_size = DL.getIntPtrType(Context);
-        Type *T_psize = T_size->getPointerTo();
+        Type *T_psize = PointerType::getUnqual(T_size->getContext());
+        Type *T_ptr = PointerType::get(Context, 0);
+
+        auto FT = FunctionType::get(PointerType::getUnqual(Context), {}, false);
+        auto F = Function::Create(FT, Function::ExternalLinkage, "get_jl_RTLD_DEFAULT_handle_addr", metadataM);
+        llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F));
+        builder.CreateRet(jlRTLD_DEFAULT_var);
+        F->setLinkage(GlobalValue::ExternalLinkage);
+        if (TheTriple.isOSBinFormatCOFF())
+            F->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
 
         if (TheTriple.isOSWindows()) {
             // Windows expect that the function `_DllMainStartup` is present in an dll.
             // Normal compilers use something like Zig's crtdll.c instead we provide a
             // a stub implementation.
-            auto T_pvoid = Type::getInt8Ty(Context)->getPointerTo();
+            auto T_pvoid = PointerType::getUnqual(Context);
             auto T_int32 = Type::getInt32Ty(Context);
             auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false);
             auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", metadataM);
@@ -1640,9 +2378,9 @@ void jl_dump_native_impl(void *native_code,
             builder.CreateRet(ConstantInt::get(T_int32, 1));
         }
         if (imaging_mode) {
-            auto specs = jl_get_llvm_clone_targets();
+            auto specs = jl_get_llvm_clone_targets(jl_options.cpu_target);
             const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0;
-            std::vector<uint8_t> data;
+            SmallVector<uint8_t, 0> data;
             auto push_i32 = [&] (uint32_t v) {
                 uint8_t buff[4];
                 memcpy(buff, &v, 4);
@@ -1659,24 +2397,33 @@ void jl_dump_native_impl(void *native_code,
                                         GlobalVariable::InternalLinkage,
                                         value, "jl_dispatch_target_ids");
             auto shards = emit_shard_table(metadataM, T_size, T_psize, threads);
-            auto ptls = emit_ptls_table(metadataM, T_size, T_psize);
+            auto ptls = emit_ptls_table(metadataM, T_size, T_ptr);
             auto header = emit_image_header(metadataM, threads, nfvars, ngvars);
-            auto AT = ArrayType::get(T_size, sizeof(small_typeof) / sizeof(void*));
-            auto small_typeof_copy = new GlobalVariable(metadataM, AT, false,
+            auto AT = ArrayType::get(T_size, sizeof(jl_small_typeof) / sizeof(void*));
+            auto jl_small_typeof_copy = new GlobalVariable(metadataM, AT, false,
                                                         GlobalVariable::ExternalLinkage,
                                                         Constant::getNullValue(AT),
-                                                        "small_typeof");
-            small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
-            small_typeof_copy->setDSOLocal(true);
-            AT = ArrayType::get(T_psize, 5);
+                                                        "jl_small_typeof");
+            jl_small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
+            jl_small_typeof_copy->setDSOLocal(true);
+
+            // Create CPU target string constant
+            auto cpu_target_str = jl_options.cpu_target ? jl_options.cpu_target : "native";
+            auto cpu_target_data = ConstantDataArray::getString(Context, cpu_target_str, true);
+            auto cpu_target_global = new GlobalVariable(metadataM, cpu_target_data->getType(), true,
+                                                       GlobalVariable::InternalLinkage,
+                                                       cpu_target_data, "jl_cpu_target_string");
+
+            AT = ArrayType::get(T_psize, 6);
             auto pointers = new GlobalVariable(metadataM, AT, false,
                                             GlobalVariable::ExternalLinkage,
                                             ConstantArray::get(AT, {
                                                     ConstantExpr::getBitCast(header, T_psize),
                                                     ConstantExpr::getBitCast(shards, T_psize),
                                                     ConstantExpr::getBitCast(ptls, T_psize),
-                                                    ConstantExpr::getBitCast(small_typeof_copy, T_psize),
-                                                    ConstantExpr::getBitCast(target_ids, T_psize)
+                                                    ConstantExpr::getBitCast(jl_small_typeof_copy, T_psize),
+                                                    ConstantExpr::getBitCast(target_ids, T_psize),
+                                                    ConstantExpr::getBitCast(cpu_target_global, T_psize)
                                             }),
                                             "jl_image_pointers");
             addComdat(pointers, TheTriple);
@@ -1690,10 +2437,18 @@ void jl_dump_native_impl(void *native_code,
         metadata_outputs = compile(metadataM, "data", 1, [](Module &) {});
     }
 
-    object::Archive::Kind Kind = getDefaultForHost(TheTriple);
+    {
+        JL_TIMING(NATIVE_AOT, NATIVE_Write);
+
+        object::Archive::Kind Kind = getDefaultForHost(TheTriple);
+#if JL_LLVM_VERSION >= 180000
+#define WritingMode SymtabWritingMode::NormalSymtab
+#else
+#define WritingMode true
+#endif
 #define WRITE_ARCHIVE(fname, field, prefix, suffix) \
     if (fname) {\
-        std::vector<NewArchiveMember> archive; \
+        SmallVector<NewArchiveMember, 0> archive; \
         SmallVector<std::string, 16> filenames; \
         SmallVector<StringRef, 16> buffers; \
         for (size_t i = 0; i < threads; i++) { \
@@ -1709,432 +2464,166 @@ void jl_dump_native_impl(void *native_code,
         for (size_t i = 0; i < filenames.size(); i++) { \
             archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \
         } \
-        handleAllErrors(writeArchive(fname, archive, true, Kind, true, false), reportWriterError); \
+        handleAllErrors(writeArchive(fname, archive, WritingMode, Kind, true, false), reportWriterError); \
     }
 
-    WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc");
-    WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc");
-    WRITE_ARCHIVE(obj_fname, obj, "", ".o");
-    WRITE_ARCHIVE(asm_fname, asm_, "", ".s");
-}
-
-void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
-{
-    PM->add(new TargetLibraryInfoWrapperPass(triple));
-    PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
+        WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc");
+        WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc");
+        WRITE_ARCHIVE(obj_fname, obj, "", ".o");
+        WRITE_ARCHIVE(asm_fname, asm_, "", ".s");
+#undef WRITE_ARCHIVE
+    }
 }
 
 
-void addMachinePasses(legacy::PassManagerBase *PM, int optlevel)
+// sometimes in GDB you want to find out what code would be created from a mi
+extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi)
 {
-    // TODO: don't do this on CPUs that natively support Float16
-    PM->add(createDemoteFloat16Pass());
-    if (optlevel > 1)
-        PM->add(createGVNPass());
-}
+    jl_llvmf_dump_t llvmf_dump;
+    size_t world = jl_current_task->world_age;
+    JL_STREAM *stream = (JL_STREAM*)STDERR_FILENO;
 
-// this defines the set of optimization passes defined for Julia at various optimization levels.
-// it assumes that the TLI and TTI wrapper passes have already been added.
-void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
-                           bool lower_intrinsics, bool dump_native,
-                           bool external_use)
-{
-    // Note: LLVM 12 disabled the hoisting of common instruction
-    //       before loop vectorization (https://reviews.llvm.org/D84108).
-    //
-    // TODO: CommonInstruction hoisting/sinking enables AllocOpt
-    //       to merge allocations and sometimes eliminate them,
-    //       since AllocOpt does not handle PhiNodes.
-    //       Enable this instruction hoisting because of this and Union benchmarks.
-    auto basicSimplifyCFGOptions = SimplifyCFGOptions()
-        .convertSwitchRangeToICmp(true)
-        .convertSwitchToLookupTable(true)
-        .forwardSwitchCondToPhi(true);
-    auto aggressiveSimplifyCFGOptions = SimplifyCFGOptions()
-        .convertSwitchRangeToICmp(true)
-        .convertSwitchToLookupTable(true)
-        .forwardSwitchCondToPhi(true)
-        //These mess with loop rotation, so only do them after that
-        .hoistCommonInsts(true)
-        // Causes an SRET assertion error in late-gc-lowering
-        // .sinkCommonInsts(true)
-        ;
-#ifdef JL_DEBUG_BUILD
-    PM->add(createGCInvariantVerifierPass(true));
-    PM->add(createVerifierPass());
-#endif
+    jl_code_info_t *src = jl_gdbcodetyped1(mi, world);
+    JL_GC_PUSH1(&src);
 
-    PM->add(createConstantMergePass());
-    if (opt_level < 2) {
-        if (!dump_native) {
-            // we won't be multiversioning, so lower CPU feature checks early on
-            // so that we can avoid an additional CFG simplification pass at the end.
-            PM->add(createCPUFeaturesPass());
-            if (opt_level == 1)
-                PM->add(createInstSimplifyLegacyPass());
-        }
-        PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-        if (opt_level == 1) {
-            PM->add(createSROAPass());
-            PM->add(createInstructionCombiningPass());
-            PM->add(createEarlyCSEPass());
-            // maybe add GVN?
-            // also try GVNHoist and GVNSink
-        }
-        PM->add(createMemCpyOptPass());
-        PM->add(createAlwaysInlinerLegacyPass()); // Respect always_inline
-        PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-        if (lower_intrinsics) {
-            PM->add(createBarrierNoopPass());
-            PM->add(createLowerExcHandlersPass());
-            PM->add(createGCInvariantVerifierPass(false));
-            PM->add(createRemoveNIPass());
-            PM->add(createLateLowerGCFramePass());
-            PM->add(createFinalLowerGCPass());
-            PM->add(createLowerPTLSPass(dump_native));
-        }
-        else {
-            PM->add(createRemoveNIPass());
-        }
-        PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-        if (dump_native) {
-            PM->add(createMultiVersioningPass(external_use));
-            PM->add(createCPUFeaturesPass());
-            // minimal clean-up to get rid of CPU feature checks
-            if (opt_level == 1) {
-                PM->add(createInstSimplifyLegacyPass());
-                PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-            }
-        }
-#if JL_LLVM_VERSION < 150000
-#if defined(_COMPILER_ASAN_ENABLED_)
-        PM->add(createAddressSanitizerFunctionPass());
-#endif
-#if defined(_COMPILER_MSAN_ENABLED_)
-        PM->add(createMemorySanitizerLegacyPassPass());
-#endif
-#if defined(_COMPILER_TSAN_ENABLED_)
-        PM->add(createThreadSanitizerLegacyPassPass());
-#endif
-#endif
-        return;
-    }
-    PM->add(createPropagateJuliaAddrspaces());
-    PM->add(createScopedNoAliasAAWrapperPass());
-    PM->add(createTypeBasedAAWrapperPass());
-    if (opt_level >= 3) {
-        PM->add(createBasicAAWrapperPass());
-    }
-
-    PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-    PM->add(createDeadCodeEliminationPass());
-    PM->add(createSROAPass());
-
-    //PM->add(createMemCpyOptPass());
-
-    PM->add(createAlwaysInlinerLegacyPass()); // Respect always_inline
-
-    // Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
-    // merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
-    // pass.
-    PM->add(createAllocOptPass());
-    // consider AggressiveInstCombinePass at optlevel > 2
-    PM->add(createInstructionCombiningPass());
-    PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-    if (dump_native)
-        PM->add(createMultiVersioningPass(external_use));
-    PM->add(createCPUFeaturesPass());
-    PM->add(createSROAPass());
-    PM->add(createInstSimplifyLegacyPass());
-    PM->add(createJumpThreadingPass());
-    PM->add(createCorrelatedValuePropagationPass());
-
-    PM->add(createReassociatePass());
-
-    PM->add(createEarlyCSEPass());
-
-    // Load forwarding above can expose allocations that aren't actually used
-    // remove those before optimizing loops.
-    PM->add(createAllocOptPass());
-    PM->add(createLoopRotatePass());
-    // moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
-#ifdef USE_POLLY
-    // LCSSA (which has already run at this point due to the dependencies of the
-    // above passes) introduces redundant phis that hinder Polly. Therefore we
-    // run InstCombine here to remove them.
-    PM->add(createInstructionCombiningPass());
-    PM->add(polly::createCodePreparationPass());
-    polly::registerPollyPasses(*PM);
-    PM->add(polly::createCodegenCleanupPass());
-#endif
-    // LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
-    PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-    PM->add(createLICMPass());
-    PM->add(createJuliaLICMPass());
-#if JL_LLVM_VERSION >= 150000
-    PM->add(createSimpleLoopUnswitchLegacyPass());
-#else
-    PM->add(createLoopUnswitchPass());
-#endif
-    PM->add(createLICMPass());
-    PM->add(createJuliaLICMPass());
-    PM->add(createInductiveRangeCheckEliminationPass()); // Must come before indvars
-    // Subsequent passes not stripping metadata from terminator
-    PM->add(createInstSimplifyLegacyPass());
-    PM->add(createLoopIdiomPass());
-    PM->add(createIndVarSimplifyPass());
-    PM->add(createLoopDeletionPass());
-    PM->add(createSimpleLoopUnrollPass());
-
-    // Run our own SROA on heap objects before LLVM's
-    PM->add(createAllocOptPass());
-    // Re-run SROA after loop-unrolling (useful for small loops that operate,
-    // over the structure of an aggregate)
-    PM->add(createSROAPass());
-    // might not be necessary:
-    PM->add(createInstSimplifyLegacyPass());
-
-    PM->add(createGVNPass());
-    PM->add(createMemCpyOptPass());
-    PM->add(createSCCPPass());
-
-    //These next two passes must come before IRCE to eliminate the bounds check in #43308
-    PM->add(createCorrelatedValuePropagationPass());
-    PM->add(createDeadCodeEliminationPass());
-
-    PM->add(createInductiveRangeCheckEliminationPass()); // Must come between the two GVN passes
-
-    // Run instcombine after redundancy elimination to exploit opportunities
-    // opened up by them.
-    // This needs to be InstCombine instead of InstSimplify to allow
-    // loops over Union-typed arrays to vectorize.
-    PM->add(createInstructionCombiningPass());
-    PM->add(createJumpThreadingPass());
-    if (opt_level >= 3) {
-        PM->add(createGVNPass()); // Must come after JumpThreading and before LoopVectorize
-    }
-    PM->add(createDeadStoreEliminationPass());
-    // see if all of the constant folding has exposed more loops
-    // to simplification and deletion
-    // this helps significantly with cleaning up iteration
-    PM->add(createCFGSimplificationPass(aggressiveSimplifyCFGOptions));
-
-    // More dead allocation (store) deletion before loop optimization
-    // consider removing this:
-    // Moving this after aggressive CFG simplification helps deallocate when allocations are hoisted
-    PM->add(createAllocOptPass());
-    PM->add(createLoopDeletionPass());
-    PM->add(createInstructionCombiningPass());
-    PM->add(createLoopVectorizePass());
-    PM->add(createLoopLoadEliminationPass());
-    // Cleanup after LV pass
-    PM->add(createInstructionCombiningPass());
-    PM->add(createCFGSimplificationPass( // Aggressive CFG simplification
-        aggressiveSimplifyCFGOptions
-    ));
-    PM->add(createSLPVectorizerPass());
-    // might need this after LLVM 11:
-    //PM->add(createVectorCombinePass());
-
-    PM->add(createAggressiveDCEPass());
-
-    if (lower_intrinsics) {
-        // LowerPTLS removes an indirect call. As a result, it is likely to trigger
-        // LLVM's devirtualization heuristics, which would result in the entire
-        // pass pipeline being re-executed. Prevent this by inserting a barrier.
-        PM->add(createBarrierNoopPass());
-        PM->add(createLowerExcHandlersPass());
-        PM->add(createGCInvariantVerifierPass(false));
-        // Needed **before** LateLowerGCFrame on LLVM < 12
-        // due to bug in `CreateAlignmentAssumption`.
-        PM->add(createRemoveNIPass());
-        PM->add(createLateLowerGCFramePass());
-        PM->add(createFinalLowerGCPass());
-        // We need these two passes and the instcombine below
-        // after GC lowering to let LLVM do some constant propagation on the tags.
-        // and remove some unnecessary write barrier checks.
-        PM->add(createGVNPass());
-        PM->add(createSCCPPass());
-        // Remove dead use of ptls
-        PM->add(createDeadCodeEliminationPass());
-        PM->add(createLowerPTLSPass(dump_native));
-        PM->add(createInstructionCombiningPass());
-        // Clean up write barrier and ptls lowering
-        PM->add(createCFGSimplificationPass());
+    jl_printf(stream, "---- dumping IR for ----\n");
+    jl_static_show(stream, (jl_value_t*)mi);
+    jl_printf(stream, "\n----\n");
+
+    jl_printf(stream, "\n---- unoptimized IR ----\n");
+    jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, false, jl_default_cgparams);
+    if (llvmf_dump.F) {
+        jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
+        if (ir != NULL && jl_is_string(ir))
+            jl_printf(stream, "%s", jl_string_data(ir));
     }
-    else {
-        PM->add(createRemoveNIPass());
+    jl_printf(stream, "\n----\n");
+
+    jl_printf(stream, "\n---- optimized IR ----\n");
+    jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, true, jl_default_cgparams);
+    if (llvmf_dump.F) {
+        jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
+        if (ir != NULL && jl_is_string(ir))
+            jl_printf(stream, "%s", jl_string_data(ir));
     }
-    PM->add(createCombineMulAddPass());
-    PM->add(createDivRemPairsPass());
-#if JL_LLVM_VERSION < 150000
-#if defined(_COMPILER_ASAN_ENABLED_)
-    PM->add(createAddressSanitizerFunctionPass());
-#endif
-#if defined(_COMPILER_MSAN_ENABLED_)
-    PM->add(createMemorySanitizerLegacyPassPass());
-#endif
-#if defined(_COMPILER_TSAN_ENABLED_)
-    PM->add(createThreadSanitizerLegacyPassPass());
-#endif
-#endif
-}
+    jl_printf(stream, "\n----\n");
 
-// An LLVM module pass that just runs all julia passes in order. Useful for
-// debugging
-template <int OptLevel, bool dump_native>
-class JuliaPipeline : public Pass {
-public:
-    static char ID;
-    // A bit of a hack, but works
-    struct TPMAdapter : public PassManagerBase {
-        PMTopLevelManager *TPM;
-        TPMAdapter(PMTopLevelManager *TPM) : TPM(TPM) {}
-        void add(Pass *P) { TPM->schedulePass(P); }
-    };
-    void preparePassManager(PMStack &Stack) override {
-        (void)jl_init_llvm();
-        PMTopLevelManager *TPM = Stack.top()->getTopLevelManager();
-        TPMAdapter Adapter(TPM);
-        addTargetPasses(&Adapter, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
-        addOptimizationPasses(&Adapter, OptLevel, true, dump_native, true);
-        addMachinePasses(&Adapter, OptLevel);
-    }
-    JuliaPipeline() : Pass(PT_PassManager, ID) {}
-    Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const override {
-        return createPrintModulePass(O, Banner);
+    jl_printf(stream, "\n---- assembly ----\n");
+    jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, true, jl_default_cgparams);
+    if (llvmf_dump.F) {
+        jl_value_t *ir = jl_dump_function_asm(&llvmf_dump, 0, "", "source", 0, true);
+        if (ir != NULL && jl_is_string(ir))
+            jl_printf(stream, "%s", jl_string_data(ir));
     }
-};
-template<> char JuliaPipeline<0,false>::ID = 0;
-template<> char JuliaPipeline<2,false>::ID = 0;
-template<> char JuliaPipeline<3,false>::ID = 0;
-template<> char JuliaPipeline<0,true>::ID = 0;
-template<> char JuliaPipeline<2,true>::ID = 0;
-template<> char JuliaPipeline<3,true>::ID = 0;
-static RegisterPass<JuliaPipeline<0,false>> X("juliaO0", "Runs the entire julia pipeline (at -O0)", false, false);
-static RegisterPass<JuliaPipeline<2,false>> Y("julia", "Runs the entire julia pipeline (at -O2)", false, false);
-static RegisterPass<JuliaPipeline<3,false>> Z("juliaO3", "Runs the entire julia pipeline (at -O3)", false, false);
-
-static RegisterPass<JuliaPipeline<0,true>> XS("juliaO0-sysimg", "Runs the entire julia pipeline (at -O0/sysimg mode)", false, false);
-static RegisterPass<JuliaPipeline<2,true>> YS("julia-sysimg", "Runs the entire julia pipeline (at -O2/sysimg mode)", false, false);
-static RegisterPass<JuliaPipeline<3,true>> ZS("juliaO3-sysimg", "Runs the entire julia pipeline (at -O3/sysimg mode)", false, false);
+    jl_printf(stream, "\n----\n");
+    JL_GC_POP();
 
-extern "C" JL_DLLEXPORT_CODEGEN
-void jl_add_optimization_passes_impl(LLVMPassManagerRef PM, int opt_level, int lower_intrinsics) {
-    addOptimizationPasses(unwrap(PM), opt_level, lower_intrinsics);
+    return src;
 }
 
 // --- native code info, and dump function to IR and ASM ---
 // Get pointer to llvm::Function instance, compiling if necessary
 // for use in reflection from Julia.
-// this is paired with jl_dump_function_ir, jl_dump_function_asm, jl_dump_method_asm in particular ways:
-// misuse will leak memory or cause read-after-free
+// This is paired with jl_dump_function_ir and jl_dump_function_asm, either of which will free all memory allocated here
 extern "C" JL_DLLEXPORT_CODEGEN
-void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
+void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params)
 {
-    if (jl_is_method(mi->def.method) && mi->def.method->source == NULL &&
-            mi->def.method->generator == NULL) {
-        // not a generic function
-        dump->F = NULL;
-        return;
-    }
-
-    // get the source code for this function
-    jl_value_t *jlrettype = (jl_value_t*)jl_any_type;
-    jl_code_info_t *src = NULL;
-    jl_code_instance_t *codeinst = NULL;
-    JL_GC_PUSH3(&src, &jlrettype, &codeinst);
-    if (jl_is_method(mi->def.method) && mi->def.method->source != NULL && mi->def.method->source != jl_nothing && jl_ir_flag_inferred(mi->def.method->source)) {
-        src = (jl_code_info_t*)mi->def.method->source;
-        if (src && !jl_is_code_info(src))
-            src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
-    }
-    else {
-        jl_value_t *ci = jl_rettype_inferred_addr(mi, world, world);
-        if (ci != jl_nothing) {
-            codeinst = (jl_code_instance_t*)ci;
-            src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-            if ((jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-                src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
-            jlrettype = codeinst->rettype;
-            codeinst = NULL; // not needed outside of this branch
-        }
-        if (!src || (jl_value_t*)src == jl_nothing) {
-            src = jl_type_infer(mi, world, 0);
-            if (src)
-                jlrettype = src->rettype;
-            else if (jl_is_method(mi->def.method)) {
-                src = mi->def.method->generator ? jl_code_for_staged(mi, world) : (jl_code_info_t*)mi->def.method->source;
-                if (src && (jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-                    src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
-            }
-            // TODO: use mi->uninferred
-        }
-    }
-
     // emit this function into a new llvm module
+    dump->F = nullptr;
+    dump->TSM = nullptr;
     if (src && jl_is_code_info(src)) {
-        auto ctx = jl_ExecutionEngine->getContext();
-        orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx, imaging_default());
-        uint64_t compiler_start_time = 0;
-        uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-        if (measure_compile_time_enabled)
-            compiler_start_time = jl_hrtime();
-        JL_LOCK(&jl_codegen_lock);
-        auto target_info = m.withModuleDo([&](Module &M) {
-            return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
-        });
-        jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second));
-        output.world = world;
-        output.params = &params;
-        output.imaging = imaging_default();
-        // This would be nice, but currently it causes some assembly regressions that make printed output
-        // differ very significantly from the actual non-imaging mode code.
-        // // Force imaging mode for names of pointers
-        // output.imaging = true;
-        // This would also be nice, but it seems to cause OOMs on the windows32 builder
-        // // Force at least medium debug info for introspection
-        // No debug info = no variable names,
-        // max debug info = llvm.dbg.declare/value intrinsics which clutter IR output
-        output.debug_level = jl_options.debug_level;
-        auto decls = jl_emit_code(m, mi, src, jlrettype, output);
-        JL_UNLOCK(&jl_codegen_lock); // Might GC
-
-        Function *F = NULL;
-        if (m) {
-            // if compilation succeeded, prepare to return the result
-            // For imaging mode, global constants are currently private without initializer
-            // which isn't legal. Convert them to extern linkage so that the code can compile
-            // and will better match what's actually in sysimg.
-            for (auto &global : output.globals)
-                global.second->setLinkage(GlobalValue::ExternalLinkage);
-            assert(!verifyModule(*m.getModuleUnlocked(), &errs()));
-            if (optimize) {
-#ifndef JL_USE_NEW_PM
-                legacy::PassManager PM;
-                addTargetPasses(&PM, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
-                addOptimizationPasses(&PM, jl_options.opt_level);
-                addMachinePasses(&PM, jl_options.opt_level);
-#else
-                NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level)};
-#endif
-                //Safe b/c context lock is held by output
-                PM.run(*m.getModuleUnlocked());
-                assert(!verifyModule(*m.getModuleUnlocked(), &errs()));
+        auto ctx = jl_ExecutionEngine->makeContext();
+        const auto &DL = jl_ExecutionEngine->getDataLayout();
+        const auto &TT = jl_ExecutionEngine->getTargetTriple();
+        orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx, DL, TT);
+        Function *F = nullptr;
+        {
+            uint64_t compiler_start_time = 0;
+            uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+            if (measure_compile_time_enabled)
+                compiler_start_time = jl_hrtime();
+            jl_codegen_params_t output(ctx, DL, TT);
+            output.params = &params;
+            output.imaging_mode = jl_options.image_codegen;
+            output.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+            JL_GC_PUSH1(&output.temporary_roots);
+            jl_llvm_functions_t decls = jl_emit_code(m, mi, src, mi->specTypes, src->rettype, output);
+            // while not required, also emit the cfunc thunks, based on the
+            // inferred ABIs of their targets in the current latest world,
+            // since otherwise it is challenging to see all relevant codes
+            jl_compiled_functions_t compiled_functions;
+            size_t latestworld = jl_atomic_load_acquire(&jl_world_counter);
+            for (cfunc_decl_t &cfunc : output.cfuncs) {
+                jl_value_t *sigt = cfunc.abi.sigt;
+                JL_GC_PROMISE_ROOTED(sigt);
+                jl_value_t *mi = jl_get_specialization1((jl_tupletype_t*)sigt, latestworld, 0);
+                if (mi == jl_nothing)
+                    continue;
+                jl_code_instance_t *codeinst = jl_type_infer((jl_method_instance_t*)mi, latestworld, SOURCE_MODE_NOT_REQUIRED, jl_options.trim);
+                if (codeinst == nullptr || compiled_functions.count(codeinst))
+                    continue;
+                orc::ThreadSafeModule decl_m = jl_create_ts_module("extern", ctx, DL, TT);
+                jl_llvm_functions_t decls;
+                if (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)
+                    decls.functionObject = "jl_fptr_const_return";
+                else
+                    decls = jl_emit_codedecls(decl_m, codeinst, output);
+                compiled_functions[codeinst] = {std::move(decl_m), std::move(decls)};
+            }
+            generate_cfunc_thunks(output, compiled_functions);
+            emit_always_inline(m, output);
+            output.workqueue.clear();
+            compiled_functions.clear();
+            output.temporary_roots = nullptr;
+            JL_GC_POP(); // GC the global_targets array contents now since reflection doesn't need it
+
+            if (m) {
+                // if compilation succeeded, prepare to return the result
+                // Similar to jl_link_global from jitlayers.cpp,
+                // so that code_llvm shows similar codegen to the jit
+                for (auto &global : output.global_targets) {
+                    if (jl_options.image_codegen) {
+                        global.second->setLinkage(GlobalValue::ExternalLinkage);
+                    }
+                    else {
+                        auto p = literal_static_pointer_val(global.first, global.second->getValueType());
+                        Type *elty = PointerType::get(p->getContext(), 0);
+                        // For pretty printing, when LLVM inlines the global initializer into its loads
+                        auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent());
+                        global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType()));
+                        global.second->setConstant(true);
+                        global.second->setLinkage(GlobalValue::PrivateLinkage);
+                        global.second->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+                        global.second->setVisibility(GlobalValue::DefaultVisibility);
+                    }
+                }
+                if (!jl_options.image_codegen) {
+                    optimizeDLSyms(*m.getModuleUnlocked());
+                }
+                assert(!verifyLLVMIR(*m.getModuleUnlocked()));
+                if (optimize) {
+                    auto opts = OptimizationOptions::defaults();
+                    opts.sanitize_memory = params.sanitize_memory;
+                    opts.sanitize_thread = params.sanitize_thread;
+                    opts.sanitize_address = params.sanitize_address;
+                    NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level), opts};
+                    //Safe b/c context lock is held by output
+                    PM.run(*m.getModuleUnlocked());
+                    assert(!verifyLLVMIR(*m.getModuleUnlocked()));
+                }
+                const std::string *fname;
+                if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
+                    getwrapper = false;
+                if (!getwrapper)
+                    fname = &decls.specFunctionObject;
+                else
+                    fname = &decls.functionObject;
+                F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
+            }
+            if (measure_compile_time_enabled) {
+                auto end = jl_hrtime();
+                jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
             }
-            const std::string *fname;
-            if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
-                getwrapper = false;
-            if (!getwrapper)
-                fname = &decls.specFunctionObject;
-            else
-                fname = &decls.functionObject;
-            F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
-        }
-        JL_GC_POP();
-        if (measure_compile_time_enabled) {
-            auto end = jl_hrtime();
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
         }
         if (F) {
             dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m)));
@@ -2142,7 +2631,4 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
             return;
         }
     }
-
-    const char *mname = name_from_method_instance(mi);
-    jl_errorf("unable to compile source for function %s", mname);
 }
diff --git a/src/array.c b/src/array.c
index 5226c729d32e7..da9cb24b4d0e9 100644
--- a/src/array.c
+++ b/src/array.c
@@ -16,200 +16,28 @@
 extern "C" {
 #endif
 
-#define JL_ARRAY_IMPL_NUL 1
-
-#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
-
-static inline void arrayassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
-{
-    // array can assume more alignment than a field would normally have
-    assert(nb >= jl_datatype_size(jl_typeof(src))); // nb might move some undefined bits, but we should be okay with that
-    if (hasptr) {
-        size_t nptr = nb / sizeof(void*);
-        memmove_refs((void**)dst, (void* const*)src, nptr);
-        jl_gc_multi_wb(parent, src);
-    }
-    else {
-        switch (nb) {
-        case  0: break;
-        case  1: *(uint8_t*)dst  = *(uint8_t*)src;  break;
-        case  2: *(uint16_t*)dst = *(uint16_t*)src; break;
-        case  4: *(uint32_t*)dst = *(uint32_t*)src; break;
-        case  8: *(uint64_t*)dst = *(uint64_t*)src; break;
-        case 16:
-            memcpy(jl_assume_aligned(dst, 16), jl_assume_aligned(src, 16), 16);
-            break;
-        default: memcpy(dst, src, nb);
-        }
-    }
-}
-
-static inline void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT
-{
-    if (hasptr)
-        memmove_refs((void**)dst, (void**)src, nb / sizeof(void*));
-    else
-        memmove(dst, src, nb);
-}
-
-// array constructors ---------------------------------------------------------
-JL_DLLEXPORT char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT
-{
-    assert(jl_array_isbitsunion(a));
-    return ((char*)jl_array_data(a)) + ((jl_array_ndims(a) == 1 ? (a->maxsize - a->offset) : jl_array_len(a)) * a->elsize) + a->offset;
-}
-
-STATIC_INLINE jl_value_t *jl_array_owner(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
-{
-    if (a->flags.how == 3) {
-        a = (jl_array_t*)jl_array_data_owner(a);
-        assert(jl_is_string(a) || a->flags.how != 3);
-    }
-    return (jl_value_t*)a;
-}
-
-#if defined(_P64) && defined(UINT128MAX)
-typedef __uint128_t wideint_t;
-#else
-typedef uint64_t wideint_t;
-#endif
-
 #define MAXINTVAL (((size_t)-1)>>1)
 
-JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, size_t *tot, uint32_t ndims, size_t *dims, size_t elsz)
+JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dims)
 {
     size_t i;
     size_t _nel = 1;
-    for(i=0; i < ndims; i++) {
+    for (i = 0; i < ndims; i++) {
         size_t di = dims[i];
-        wideint_t prod = (wideint_t)_nel * (wideint_t)di;
-        if (prod >= (wideint_t) MAXINTVAL || di >= MAXINTVAL)
+        int overflow = __builtin_mul_overflow(_nel, di, &_nel);
+        if (overflow || di >= MAXINTVAL)
             return 1;
-        _nel = prod;
     }
-    wideint_t prod = (wideint_t)elsz * (wideint_t)_nel;
-    if (prod >= (wideint_t) MAXINTVAL)
-        return 2;
     *nel = _nel;
-    *tot = (size_t)prod;
     return 0;
 }
 
-static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                               int8_t isunboxed, int8_t hasptr, int8_t isunion, int8_t zeroinit, size_t elsz)
-{
-    jl_task_t *ct = jl_current_task;
-    size_t i, tot, nel;
-    void *data;
-    jl_array_t *a;
-    assert(isunboxed || elsz == sizeof(void*));
-    assert(atype == NULL || isunion == jl_is_uniontype(jl_tparam0(atype)));
-    int validated = jl_array_validate_dims(&nel, &tot, ndims, dims, elsz);
-    if (validated == 1)
-        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    else if (validated == 2)
-        jl_error("invalid Array size");
-    if (isunboxed) {
-        if (elsz == 1 && !isunion) {
-            // extra byte for all julia allocated byte arrays
-            tot++;
-        }
-        if (isunion) {
-            // an extra byte for each isbits union array element, stored after a->maxsize
-            tot += nel;
-        }
-    }
-
-    int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
-    if (tot <= ARRAY_INLINE_NBYTES) {
-        // align data area
-        if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
-            tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT);
-        else if (isunboxed && elsz >= 4)
-            tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT);
-        size_t doffs = tsz;
-        tsz += tot;
-        // jl_array_t is large enough that objects will always be aligned 16
-        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-        assert(((size_t)a & 15) == 0);
-        // No allocation or safepoint allowed after this
-        a->flags.how = 0;
-        data = (char*)a + doffs;
-    }
-    else {
-        data = jl_gc_managed_malloc(tot);
-        // Allocate the Array **after** allocating the data
-        // to make sure the array is still young
-        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-        // No allocation or safepoint allowed after this
-        a->flags.how = 2;
-        jl_gc_track_malloced_array(ct->ptls, a);
-    }
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-
-    if (zeroinit)
-        memset(data, 0, tot);
-    a->data = data;
-    if (JL_ARRAY_IMPL_NUL && elsz == 1)
-        ((char*)data)[tot - 1] = '\0';
-    a->length = nel;
-    a->flags.ndims = ndims;
-    a->flags.ptrarray = !isunboxed;
-    a->flags.hasptr = hasptr;
-    a->elsize = elsz;
-    a->flags.isshared = 0;
-    a->flags.isaligned = 1;
-    a->offset = 0;
-    if (ndims == 1) {
-        a->nrows = nel;
-        a->maxsize = nel;
-    }
-    else if (a->flags.ndims != ndims) {
-        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    }
-    else {
-        size_t *adims = &a->nrows;
-        for (i = 0; i < ndims; i++)
-            adims[i] = dims[i];
-    }
-
-    return a;
-}
-
-static inline jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t *dims)
-{
-    jl_value_t *eltype = jl_tparam0(atype);
-    size_t elsz = 0, al = 0;
-    if (!jl_is_kind(jl_typeof(eltype)))
-        jl_type_error_rt("Array", "element type", (jl_value_t*)jl_type_type, eltype);
-    int isunboxed = jl_islayout_inline(eltype, &elsz, &al);
-    int isunion = jl_is_uniontype(eltype);
-    int hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
-    if (!isunboxed) {
-        elsz = sizeof(void*);
-        al = elsz;
-    }
-    else {
-        elsz = LLT_ALIGN(elsz, al);
-    }
-    int zi = !isunboxed || hasptr || isunion || (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->zeroinit);
-
-    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, zi, elsz);
-}
-
-jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                                             int isunboxed, int hasptr, int isunion, int elsz)
-{
-    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, 0, (size_t)elsz);
-}
-
 #ifndef JL_NDEBUG
 static inline int is_ntuple_long(jl_value_t *v)
 {
     if (!jl_is_tuple(v))
         return 0;
-    jl_value_t *tt = jl_typeof(v);
+    jl_value_t *tt = (jl_value_t*)jl_typetagof(v);
     size_t i, nfields = jl_nparams(tt);
     for (i = 0; i < nfields; i++) {
         if (jl_tparam(tt, i) != (jl_value_t*)jl_long_type) {
@@ -220,313 +48,130 @@ static inline int is_ntuple_long(jl_value_t *v)
 }
 #endif
 
-JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
-                                          jl_value_t *_dims)
+#define jl_array_elsize(a) (((jl_datatype_t*)jl_typetagof((a)->ref.mem))->layout->size)
+
+static char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
-    assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
+    assert(jl_genericmemory_isbitsunion(a->ref.mem));
+    return jl_genericmemory_typetagdata(a->ref.mem) + (uintptr_t)a->ref.ptr_or_offset;
+}
 
-    size_t ndims = jl_nfields(_dims);
-    assert(is_ntuple_long(_dims));
-    size_t *dims = (size_t*)_dims;
-    int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*);
+STATIC_INLINE jl_array_t *_new_array(jl_value_t *atype, jl_genericmemory_t *mem, const jl_datatype_layout_t *layout, uint32_t ndims, size_t *dims)
+{
+    jl_task_t *ct = jl_current_task;
+    size_t i;
+    int tsz = sizeof(jl_array_t) + ndims*sizeof(size_t);
     jl_array_t *a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-    // No allocation or safepoint allowed after this
-    // copy data (except dims) from the old object
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->flags.ndims = ndims;
-    a->offset = 0;
-    a->data = NULL;
-    a->flags.isaligned = data->flags.isaligned;
-    a->elsize = data->elsize;
-    a->flags.ptrarray = data->flags.ptrarray;
-    a->flags.hasptr = data->flags.hasptr;
-
-    // if data is itself a shared wrapper,
-    // owner should point back to the original array
-    jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
-    jl_array_data_owner(a) = (jl_value_t*)owner;
-
-    a->flags.how = 3;
-    a->data = data->data;
-    a->flags.isshared = 1;
-    data->flags.isshared = 1;
-
-    if (ndims == 1) {
-        size_t l = dims[0];
-        a->length = l;
-        a->nrows = l;
-        a->maxsize = l;
-    }
-    else if (a->flags.ndims != ndims) {
-        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    }
-    else {
-        size_t *adims = &a->nrows;
-        size_t l = 1;
-        wideint_t prod;
-        for (size_t i = 0; i < ndims; i++) {
-            adims[i] = dims[i];
-            prod = (wideint_t)l * (wideint_t)adims[i];
-            if (prod > (wideint_t) MAXINTVAL)
-                jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-            l = prod;
-        }
-        a->length = l;
-    }
-
+    a->ref.mem = mem;
+    if (layout->flags.arrayelem_isunion || layout->size == 0)
+        a->ref.ptr_or_offset = 0;
+    else
+        a->ref.ptr_or_offset = mem->ptr;
+    for (i = 0; i < ndims; i++)
+        a->dimsize[i] = dims[i];
     return a;
 }
 
-JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
+STATIC_INLINE jl_array_t *new_array(jl_value_t *atype, uint32_t ndims, size_t *dims)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_array_t *a;
-
-    int ndimwords = jl_array_ndimwords(1);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type);
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->flags.ndims = 1;
-    a->offset = 0;
-    a->data = jl_string_data(str);
-    a->flags.isaligned = 0;
-    a->elsize = 1;
-    a->flags.ptrarray = 0;
-    a->flags.hasptr = 0;
-    jl_array_data_owner(a) = str;
-    a->flags.how = 3;
-    a->flags.isshared = 1;
-    size_t l = jl_string_len(str);
-    a->length = l;
-    a->nrows = a->maxsize = l;
+    size_t nel;
+    if (jl_array_validate_dims(&nel, ndims, dims))
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions: too large for system address width");
+    if (*(size_t*)jl_tparam1(atype) != ndims)
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
+    jl_value_t *mtype = jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)atype, 0), 1);
+    // extra byte for all julia allocated byte vectors
+    jl_genericmemory_t *mem = jl_alloc_genericmemory(mtype, nel);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *a = _new_array(atype, mem, ((jl_datatype_t*)mtype)->layout, ndims, dims);
+    JL_GC_POP();
     return a;
 }
 
-// own_buffer != 0 iff GC should call free() on this pointer eventually
+jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz);
+
+JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str);
+
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                                             size_t nel, int own_buffer)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_array_t *a;
-    jl_value_t *eltype = jl_tparam0(atype);
-
-    int isunboxed = jl_stored_inline(eltype);
-    if (isunboxed && jl_is_uniontype(eltype))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: unspecified layout for union element type");
-    size_t elsz;
-    unsigned align;
-    if (isunboxed) {
-        elsz = jl_datatype_size(eltype);
-        align = jl_datatype_align(eltype);
-    }
-    else {
-        align = elsz = sizeof(void*);
-    }
-    if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
-
-    int ndimwords = jl_array_ndimwords(1);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-    // No allocation or safepoint allowed after this
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->data = data;
-    a->length = nel;
-    a->elsize = LLT_ALIGN(elsz, align);
-    a->flags.ptrarray = !isunboxed;
-    a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
-    a->flags.ndims = 1;
-    a->flags.isshared = 1;
-    a->flags.isaligned = 0;  // TODO: allow passing memalign'd buffers
-    if (own_buffer) {
-        a->flags.how = 2;
-        jl_gc_track_malloced_array(ct->ptls, a);
-        jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
-    }
-    else {
-        a->flags.how = 0;
-    }
-
-    a->nrows = nel;
-    a->maxsize = nel;
-    a->offset = 0;
+    if (*(size_t*)jl_tparam1(atype) != 1)
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
+    jl_value_t *mtype = jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)atype, 0), 1);
+    jl_genericmemory_t *mem = jl_ptr_to_genericmemory(mtype, data, nel, own_buffer);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *a = _new_array(atype, mem, ((jl_datatype_t*)mtype)->layout, 1, &nel);
+    JL_GC_POP();
     return a;
 }
 
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                                          jl_value_t *_dims, int own_buffer)
 {
-    jl_task_t *ct = jl_current_task;
-    size_t nel = 1;
-    jl_array_t *a;
     size_t ndims = jl_nfields(_dims);
-    wideint_t prod;
     assert(is_ntuple_long(_dims));
     size_t *dims = (size_t*)_dims;
-    for (size_t i = 0; i < ndims; i++) {
-        prod = (wideint_t)nel * (wideint_t)dims[i];
-        if (prod > (wideint_t) MAXINTVAL)
-            jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-        nel = prod;
-    }
-    if (__unlikely(ndims == 1))
-        return jl_ptr_to_array_1d(atype, data, nel, own_buffer);
-    jl_value_t *eltype = jl_tparam0(atype);
-
-    int isunboxed = jl_stored_inline(eltype);
-    if (isunboxed && jl_is_uniontype(eltype))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: unspecified layout for union element type");
-    size_t elsz;
-    unsigned align;
-    if (isunboxed) {
-        elsz = jl_datatype_size(eltype);
-        align = jl_datatype_align(eltype);
-    }
-    else {
-        align = elsz = sizeof(void*);
-    }
-    if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
-
-    int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-    // No allocation or safepoint allowed after this
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->data = data;
-    a->length = nel;
-    a->elsize = LLT_ALIGN(elsz, align);
-    a->flags.ptrarray = !isunboxed;
-    a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
-    a->flags.ndims = ndims;
-    a->offset = 0;
-    a->flags.isshared = 1;
-    a->flags.isaligned = 0;
-    if (own_buffer) {
-        a->flags.how = 2;
-        jl_gc_track_malloced_array(ct->ptls, a);
-        jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
-    }
-    else {
-        a->flags.how = 0;
-    }
-
-    assert(ndims != 1); // handled above
-    if (a->flags.ndims != ndims)
+    size_t nel;
+    if (jl_array_validate_dims(&nel, ndims, dims))
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions: too large for system address width");
+    if (*(size_t*)jl_tparam1(atype) != ndims)
         jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    memcpy(&a->nrows, dims, ndims * sizeof(size_t));
-    return a;
-}
-
-JL_DLLEXPORT jl_array_t *jl_new_array(jl_value_t *atype, jl_value_t *_dims)
-{
-    size_t ndims = jl_nfields(_dims);
-    assert(is_ntuple_long(_dims));
-    return _new_array(atype, ndims, (size_t*)_dims);
-}
-
-JL_DLLEXPORT jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr)
-{
-    return _new_array(atype, 1, &nr);
-}
-
-JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr,
-                                           size_t nc)
-{
-    size_t d[2] = {nr, nc};
-    return _new_array(atype, 2, &d[0]);
-}
-
-JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr,
-                                           size_t nc, size_t z)
-{
-    size_t d[3] = {nr, nc, z};
-    return _new_array(atype, 3, &d[0]);
-}
-
-JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len)
-{
-    jl_array_t *a = jl_alloc_array_1d(jl_array_uint8_type, len);
-    memcpy(a->data, str, len);
+    jl_value_t *mtype = jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)atype, 0), 1);
+    jl_genericmemory_t *mem = jl_ptr_to_genericmemory(mtype, data, nel, own_buffer);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *a = _new_array(atype, mem, ((jl_datatype_t*)mtype)->layout, ndims, dims);
+    JL_GC_POP();
     return a;
 }
 
 JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
 {
-    size_t len = jl_array_len(a);
+    size_t len = jl_array_nrows(a); // only for Vector
     if (len == 0) {
         // this may seem like purely an optimization (which it also is), but it
         // also ensures that calling `String(a)` doesn't corrupt a previous
         // string also created the same way, where `a = StringVector(_)`.
         return jl_an_empty_string;
     }
-    if (a->flags.how == 3 && a->offset == 0 && a->elsize == 1 &&
-        (jl_array_ndims(a) != 1 ||
-         ((a->maxsize + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS)))) {
-        jl_value_t *o = jl_array_data_owner(a);
-        if (jl_is_string(o)) {
-            a->flags.isshared = 1;
-            *(size_t*)o = len;
-            a->nrows = 0;
-            a->length = 0;
-            a->maxsize = 0;
-            return o;
-        }
-    }
-    a->nrows = 0;
-    a->length = 0;
-    a->maxsize = 0;
-    return jl_pchar_to_string((const char*)jl_array_data(a), len);
+    jl_value_t *str;
+    if (a->ref.ptr_or_offset == a->ref.mem->ptr)
+        str = jl_genericmemory_to_string(a->ref.mem, len);
+    else
+        str = jl_pchar_to_string(jl_array_data(a, char), len);
+    a->ref.mem = (jl_genericmemory_t*)((jl_datatype_t*)jl_memory_uint8_type)->instance;
+    a->ref.ptr_or_offset = a->ref.mem->ptr;
+    a->dimsize[0] = 0;
+    return str;
 }
 
-JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
+JL_DLLEXPORT jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr)
 {
-    if (len == 0)
-        return jl_an_empty_string;
-    size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
-    if (sz < len) // overflow
-        jl_throw(jl_memory_exception);
-    jl_task_t *ct = jl_current_task;
-    jl_value_t *s;
-    jl_ptls_t ptls = ct->ptls;
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    if (sz <= GC_MAX_SZCLASS) {
-        int pool_id = jl_gc_szclass_align8(allocsz);
-        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
-        int osize = jl_gc_sizeclasses[pool_id];
-        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
-        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-        s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
-    }
-    else {
-        if (allocsz < sz) // overflow in adding offs, size was "negative"
-            jl_throw(jl_memory_exception);
-        s = jl_gc_big_alloc_noinline(ptls, allocsz);
-    }
-    jl_set_typetagof(s, jl_string_tag, 0);
-    maybe_record_alloc_to_profile(s, len, jl_string_type);
-    *(size_t*)s = len;
-    jl_string_data(s)[len] = 0;
-    return s;
+    return new_array(atype, 1, &nr);
 }
 
-JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
+JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr, size_t nc)
 {
-    jl_value_t *s = jl_alloc_string(len);
-    if (len > 0)
-        memcpy(jl_string_data(s), str, len);
-    return s;
+    size_t dims[2] = {nr, nc};
+    return new_array(atype, 2, &dims[0]);
 }
 
-JL_DLLEXPORT jl_value_t *jl_cstr_to_string(const char *str)
+JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr, size_t nc, size_t z)
 {
-    return jl_pchar_to_string(str, strlen(str));
+    size_t dims[3] = {nr, nc, z};
+    return new_array(atype, 3, &dims[0]);
+}
+
+JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims)
+{
+    return new_array(atype, ndims, dims);
+}
+
+JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len)
+{
+    jl_array_t *a = jl_alloc_array_1d(jl_array_uint8_type, len);
+    assert(jl_array_data(a, char));
+    memcpy(jl_array_data(a, char), str, len);
+    return a;
 }
 
 JL_DLLEXPORT jl_array_t *jl_alloc_vec_any(size_t n)
@@ -543,714 +188,70 @@ JL_DLLEXPORT jl_value_t *jl_apply_array_type(jl_value_t *type, size_t dim)
     return ret;
 }
 
-// array primitives -----------------------------------------------------------
-
-JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
-{
-    assert(i < jl_array_len(a));
-    assert(a->flags.ptrarray);
-    jl_value_t *elt = jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)a->data) + i);
-    if (elt == NULL)
-        jl_throw(jl_undefref_exception);
-    return elt;
-}
-
-
-JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
-{
-    if (a->flags.ptrarray)
-        return jl_ptrarrayref(a, i);
-    assert(i < jl_array_len(a));
-    jl_value_t *eltype = (jl_value_t*)jl_tparam0(jl_typeof(a));
-    if (jl_is_uniontype(eltype)) {
-        // isbits union selector bytes are always stored directly after the last array element
-        uint8_t sel = jl_array_typetagdata(a)[i];
-        eltype = jl_nth_union_component(eltype, sel);
-        if (jl_is_datatype_singleton((jl_datatype_t*)eltype))
-            return ((jl_datatype_t*)eltype)->instance;
-    }
-    jl_value_t *r = undefref_check((jl_datatype_t*)eltype, jl_new_bits(eltype, &((char*)a->data)[i * a->elsize]));
-    if (__unlikely(r == NULL))
-        jl_throw(jl_undefref_exception);
-    return r;
-}
-
-JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i)
-{
-    if (a->flags.ptrarray) {
-        return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)jl_array_data(a)) + i) != NULL;
-    }
-    else if (a->flags.hasptr) {
-         jl_datatype_t *eltype = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
-         assert(eltype->layout->first_ptr >= 0);
-         jl_value_t **elem = (jl_value_t**)((char*)a->data + i * a->elsize);
-         return elem[eltype->layout->first_ptr] != NULL;
-    }
-    return 1;
-}
-
-JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, size_t i)
-{
-    assert(i < jl_array_len(a));
-    jl_value_t *eltype = jl_tparam0(jl_typeof(a));
-    if (eltype != (jl_value_t*)jl_any_type) {
-        JL_GC_PUSH1(&rhs);
-        if (!jl_isa(rhs, eltype))
-            jl_type_error("arrayset", eltype, rhs);
-        JL_GC_POP();
-    }
-    if (!a->flags.ptrarray) {
-        int hasptr;
-        if (jl_is_uniontype(eltype)) {
-            uint8_t *psel = &((uint8_t*)jl_array_typetagdata(a))[i];
-            unsigned nth = 0;
-            if (!jl_find_union_component(eltype, jl_typeof(rhs), &nth))
-                assert(0 && "invalid arrayset to isbits union");
-            *psel = nth;
-            if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
-                return;
-            hasptr = 0;
-        }
-        else {
-            hasptr = a->flags.hasptr;
-        }
-        arrayassign_safe(hasptr, jl_array_owner(a), &((char*)a->data)[i * a->elsize], rhs, a->elsize);
-    }
-    else {
-        jl_atomic_store_release(((_Atomic(jl_value_t*)*)a->data) + i, rhs);
-        jl_gc_wb(jl_array_owner(a), rhs);
-    }
-}
-
-JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
-{
-    if (i >= jl_array_len(a))
-        jl_bounds_error_int((jl_value_t*)a, i + 1);
-    if (a->flags.ptrarray)
-        jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)a->data) + i, NULL);
-    else if (a->flags.hasptr) {
-        size_t elsize = a->elsize;
-        jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
-        memset((char*)a->data + elsize * i, 0, elsize);
-    }
-}
-
-// at this size and bigger, allocate resized array data with malloc directly
-// instead of managing them separately as gc objects
-#define MALLOC_THRESH 1048576
-
-// Resize the buffer to a max size of `newlen`
-// The buffer can either be newly allocated or realloc'd, the return
-// value is 1 if a new buffer is allocated and 0 if it is realloc'd.
-// the caller needs to take care of moving the data from the old buffer
-// to the new one if necessary.
-// When this function returns, the `->data` pointer always points to
-// the **beginning** of the new buffer.
-static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
-{
-    jl_task_t *ct = jl_current_task;
-    assert(!a->flags.isshared || a->flags.how == 3);
-    size_t elsz = a->elsize;
-    size_t nbytes = newlen * elsz;
-    size_t oldnbytes = a->maxsize * elsz;
-    size_t oldoffsnb = a->offset * elsz;
-    size_t oldlen = a->nrows;
-    int isbitsunion = jl_array_isbitsunion(a);
-    assert(nbytes >= oldnbytes);
-    if (elsz == 1 && !isbitsunion) {
-        nbytes++;
-        oldnbytes++;
-    }
-    if (isbitsunion) {
-        nbytes += newlen;
-        oldnbytes += a->maxsize;
-    }
-    int newbuf = 0;
-    if (a->flags.how == 2) {
-        // already malloc'd - use realloc
-        char *olddata = (char*)a->data - oldoffsnb;
-        a->data = jl_gc_managed_realloc(olddata, nbytes, oldnbytes,
-                                        a->flags.isaligned, (jl_value_t*)a);
-    }
-    else if (a->flags.how == 3 && jl_is_string(jl_array_data_owner(a)) && !isbitsunion) {
-        // if data is in a String, keep it that way
-        jl_value_t *s;
-        if (a->flags.isshared) {
-            s = jl_alloc_string(nbytes - (elsz == 1));
-            newbuf = 1;
-        }
-        else {
-            s = jl_gc_realloc_string(jl_array_data_owner(a), nbytes - (elsz == 1));
-        }
-        jl_array_data_owner(a) = s;
-        jl_gc_wb(a, s);
-        a->data = jl_string_data(s);
-    }
-    else {
-        newbuf = 1;
-        if (nbytes >= MALLOC_THRESH) {
-            a->data = jl_gc_managed_malloc(nbytes);
-            jl_gc_track_malloced_array(ct->ptls, a);
-            a->flags.how = 2;
-            a->flags.isaligned = 1;
-        }
-        else {
-            a->data = jl_gc_alloc_buf(ct->ptls, nbytes);
-            a->flags.how = 1;
-            jl_gc_wb_buf(a, a->data, nbytes);
-        }
-    }
-    if (JL_ARRAY_IMPL_NUL && elsz == 1 && !isbitsunion)
-        memset((char*)a->data + oldnbytes - 1, 0, nbytes - oldnbytes + 1);
-    (void)oldlen;
-    assert(oldlen == a->nrows &&
-           "Race condition detected: recursive resizing on the same array.");
-    a->flags.isshared = 0;
-    a->maxsize = newlen;
-    return newbuf;
-}
-
-static void NOINLINE array_try_unshare(jl_array_t *a)
-{
-    if (a->flags.isshared) {
-        if (a->flags.how != 3)
-            jl_error("cannot resize array with shared data");
-        // allow resizing when data is shared with a String
-        if (jl_is_string(jl_array_data_owner(a)))
-            return;
-        assert(a->offset == 0);
-        size_t len = a->maxsize;
-        size_t nbytes = len * a->elsize;
-        if (jl_array_isbitsunion(a)) {
-            nbytes += len;
-        }
-        char *olddata = (char*)a->data;
-        int newbuf = array_resize_buffer(a, len);
-        assert(newbuf);
-        (void)newbuf;
-        memcpy(a->data, olddata, nbytes);
-    }
-}
-
-size_t overallocation(size_t maxsize)
-{
-    if (maxsize < 8)
-        return 8;
-    // compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
-    // for small n, we grow faster than O(n)
-    // for large n, we grow at O(n/8)
-    // and as we reach O(memory) for memory>>1MB,
-    // this means we end by adding about 10% of memory each time
-    int exp2 = sizeof(maxsize) * 8 -
-#ifdef _P64
-        __builtin_clzll(maxsize);
-#else
-        __builtin_clz(maxsize);
-#endif
-    maxsize += ((size_t)1 << (exp2 * 7 / 8)) * 4 + maxsize / 8;
-    return maxsize;
-}
-
-STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
-                                        size_t n)
+JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc)
 {
-    // designed to handle the case of growing and shrinking at both ends
-    if (__unlikely(a->flags.isshared)) {
-        if (a->flags.how != 3)
-            jl_error("cannot resize array with shared data");
-        if (inc == 0) {
-            // If inc > 0, it will always trigger the slow path and unshare the
-            // buffer
-            array_try_unshare(a);
-            return;
-        }
-    }
+    size_t n = jl_array_nrows(a);
+    size_t elsz = jl_array_elsize(a);
+    char *data = jl_array_data(a,char);
+    jl_value_t *mtype = (jl_value_t*)jl_typetagof(a->ref.mem);
+    int isbitsunion = jl_genericmemory_isbitsunion(a->ref.mem);
     size_t newnrows = n + inc;
-    size_t elsz = a->elsize;
-    size_t nbinc = inc * elsz;
-    char *data = (char*)a->data;
-    char *newdata;
-    char *typetagdata;
-    char *newtypetagdata = NULL;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (isbitsunion) typetagdata = jl_array_typetagdata(a);
-    if (a->offset >= inc) {
-        // already have enough space in a->offset
-        newdata = data - nbinc;
-        a->offset -= inc;
-        if (isbitsunion) newtypetagdata = typetagdata - inc;
-        if (idx > 0) {
-            // inserting new elements after 1st element
-            memmove_safe(a->flags.hasptr, newdata, data, idx * elsz);
-            if (isbitsunion) {
-                memmove(newtypetagdata, typetagdata, idx);
-                memset(newtypetagdata + idx, 0, inc);
-            }
-        }
-    }
-    else {
-        // not enough room for requested growth from existing a->offset
-        size_t oldoffset = a->offset;
-        size_t oldoffsnb = oldoffset * elsz;
-        size_t oldmaxsize = a->maxsize;
-        size_t nb1 = idx * elsz;
-        if (inc > (a->maxsize - n) / 2 - (a->maxsize - n) / 20) {
-            // not enough room for requested growth from end of array
-            size_t newlen = inc * 2;
-            while (n + 2 * inc > newlen - a->offset)
-                newlen *= 2;
-            size_t newmaxsize = overallocation(a->maxsize);
-            if (newlen < newmaxsize)
-                newlen = newmaxsize;
-            size_t newoffset = (newlen - newnrows) / 2;
-            if (!array_resize_buffer(a, newlen)) {
-                data = (char*)a->data + oldoffsnb;
-            }
-            newdata = (char*)a->data + newoffset * elsz;
-            if (isbitsunion) {
-                typetagdata = data + (oldmaxsize - oldoffset) * elsz + oldoffset;
-                newtypetagdata = newdata + (a->maxsize - newoffset) * elsz + newoffset;
-                memmove(newtypetagdata, typetagdata, idx);
-                memset(newtypetagdata + idx, 0, inc);
-                memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-            }
-            // We could use memcpy if resizing allocates a new buffer,
-            // hopefully it's not a particularly important optimization.
-            if (idx > 0 && newdata < data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-            }
-            memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
-            if (idx > 0 && newdata > data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-            }
-            a->offset = newoffset;
-        }
-        else {
-            // use extra space between a->nrows & a->maxsize
-            a->offset = (a->maxsize - newnrows) / 2;
-            newdata = data - oldoffsnb + a->offset * elsz;
-            if (isbitsunion) newtypetagdata = newdata + (a->maxsize - a->offset) * elsz + a->offset;
-            if (idx > 0 && newdata < data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-                if (isbitsunion) {
-                    memmove(newtypetagdata, typetagdata, idx);
-                    memset(newtypetagdata + idx, 0, inc);
-                }
-            }
-            memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
-            if (isbitsunion) memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-            if (idx > 0 && newdata > data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-                if (isbitsunion) {
-                    memmove(newtypetagdata, typetagdata, idx);
-                    memset(newtypetagdata + idx, 0, inc);
-                }
-            }
-        }
-    }
-    a->length = newnrows;
-    a->nrows = newnrows;
-    a->data = newdata;
-    if (jl_is_array_zeroinit(a)) {
-        memset(newdata + idx * elsz, 0, nbinc);
-    }
-    if (newtypetagdata) {
-        memset(newtypetagdata + idx, 0, inc);
-    }
-}
-
-STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
-                                        size_t inc, size_t n)
-{
-    // optimized for the case of only growing and shrinking at the end
-    if (__unlikely(a->flags.isshared)) {
-        if (a->flags.how != 3)
-            jl_error("cannot resize array with shared data");
-        if (inc == 0) {
-            // If inc > 0, it will always trigger the slow path and unshare the
-            // buffer
-            array_try_unshare(a);
-            return;
-        }
+    if (!isbitsunion && elsz == 0) {
+        jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 2);
+        a->ref.mem = newmem;
+        jl_gc_wb(a, newmem);
+        a->dimsize[0] = newnrows;
+        return;
     }
-    size_t elsz = a->elsize;
-    char *data = (char*)a->data;
-    char *typetagdata;
-    char *newtypetagdata;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (isbitsunion) typetagdata = jl_array_typetagdata(a);
-    int has_gap = n > idx;
-    size_t reqmaxsize = a->offset + n + inc;
-    if (__unlikely(reqmaxsize > a->maxsize)) {
-        size_t nb1 = idx * elsz;
-        size_t nbinc = inc * elsz;
-        // grow either by our computed overallocation factor or exactly the requested size,
-        // whichever is larger
-        size_t newmaxsize = overallocation(a->maxsize);
+    size_t oldoffset = isbitsunion ? (size_t)data : (data - (char*)a->ref.mem->ptr) / elsz;
+    if (isbitsunion)
+        data = (char*)a->ref.mem->ptr + oldoffset * elsz;
+    size_t oldmaxsize = a->ref.mem->length;
+    size_t reqmaxsize = oldoffset + newnrows;
+    if (__unlikely(reqmaxsize > oldmaxsize)) {
+        size_t newmaxsize;
+        if (oldmaxsize < 4) // typical sequence: 0, // 4, // 6, 9, 13, 19, 28, 42, // 50, 60, 72, ...
+            newmaxsize = 4;
+        else if (oldmaxsize < 48)
+            newmaxsize = oldmaxsize*3/2; // grow by 50%
+        else
+            newmaxsize = oldmaxsize*6/5; // grow by 20%
         if (newmaxsize < reqmaxsize)
             newmaxsize = reqmaxsize;
-        size_t oldmaxsize = a->maxsize;
-        int newbuf = array_resize_buffer(a, newmaxsize);
-        char *newdata = (char*)a->data + a->offset * elsz;
-        if (isbitsunion) newtypetagdata = newdata + (a->maxsize - a->offset) * elsz + a->offset;
-        if (newbuf) {
-            memcpy(newdata, data, nb1);
-            if (isbitsunion) {
-                memcpy(newtypetagdata, typetagdata, idx);
-                if (has_gap) memcpy(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-                memset(newtypetagdata + idx, 0, inc);
-            }
-            if (has_gap) memcpy(newdata + nb1 + nbinc, data + nb1, n * elsz - nb1);
-        }
-        else {
-            if (isbitsunion) {
-                typetagdata = newdata + (oldmaxsize - a->offset) * elsz + a->offset;
-                if (has_gap) memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-                memmove(newtypetagdata, typetagdata, idx);
-                memset(newtypetagdata + idx, 0, inc);
-            }
-            if (has_gap) memmove_safe(a->flags.hasptr, newdata + nb1 + nbinc, newdata + nb1, n * elsz - nb1);
-        }
-        a->data = data = newdata;
-    }
-    else if (has_gap) {
-        if (isbitsunion) {
-            memmove(typetagdata + idx + inc, typetagdata + idx, n - idx);
-            memset(typetagdata + idx, 0, inc);
-        }
-        size_t nb1 = idx * elsz;
-        memmove_safe(a->flags.hasptr, data + nb1 + inc * elsz, data + nb1, n * elsz - nb1);
-    }
-    else {
-        // there was enough room for requested growth already in a->maxsize
-        if (isbitsunion)
-            memset(typetagdata + idx, 0, inc);
-    }
-    size_t newnrows = n + inc;
-    a->length = newnrows;
-    a->nrows = newnrows;
-    if (jl_is_array_zeroinit(a)) {
-        memset(data + idx * elsz, 0, inc * elsz);
-    }
-}
-
-JL_DLLEXPORT void jl_array_grow_at(jl_array_t *a, ssize_t idx, size_t inc)
-{
-    // No need to explicitly unshare.
-    // Shared arrays are guaranteed to trigger the slow path for growing.
-    size_t n = jl_array_nrows(a);
-    if (idx < 0 || idx > n)
-        jl_bounds_error_int((jl_value_t*)a, idx + 1);
-    if (idx + 1 < n / 2) {
-        jl_array_grow_at_beg(a, idx, inc, n);
-    }
-    else {
-        jl_array_grow_at_end(a, idx, inc, n);
-    }
-}
-
-JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc)
-{
-    size_t n = jl_array_nrows(a);
-    jl_array_grow_at_end(a, n, inc, n);
-}
-
-JL_DLLEXPORT void jl_array_grow_beg(jl_array_t *a, size_t inc)
-{
-    size_t n = jl_array_nrows(a);
-    jl_array_grow_at_beg(a, 0, inc, n);
-}
-
-STATIC_INLINE void jl_array_shrink(jl_array_t *a, size_t dec)
-{
-    //if we don't manage this array return
-    if (a->flags.how == 0) return;
-
-    size_t elsz = a->elsize;
-    size_t newbytes = (a->maxsize - dec) * a->elsize;
-    size_t oldnbytes = (a->maxsize) * a->elsize;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (isbitsunion) {
-        newbytes += a->maxsize - dec;
-        oldnbytes += a->maxsize;
-    }
-
-    if (elsz == 1 && !isbitsunion) {
-        newbytes++;
-        oldnbytes++;
-    }
-    char *originalptr = ((char*) a->data) - a->offset * a->elsize;
-    if (a->flags.how == 1) {
-        //this is a julia-allocated buffer that needs to be marked
-        char *typetagdata;
-        char *newtypetagdata;
-        if (isbitsunion) {
-            typetagdata = (char*)malloc_s(a->nrows);
-            memcpy(typetagdata, jl_array_typetagdata(a), a->nrows);
-        }
-        jl_task_t *ct = jl_current_task;
-        char *originaldata = (char*) a->data - a->offset * a->elsize;
-        char *newdata = (char*)jl_gc_alloc_buf(ct->ptls, newbytes);
-        jl_gc_wb_buf(a, newdata, newbytes);
-        a->maxsize -= dec;
-        if (isbitsunion) {
-            newtypetagdata = jl_array_typetagdata(a);
-            memcpy(newtypetagdata, typetagdata, a->nrows);
-            free(typetagdata);
-        }
-        memcpy(newdata, originaldata, newbytes);
-        a->data = newdata + a->offset * elsz;
-    }
-    else if (a->flags.how == 2) {
-        //malloc-allocated pointer this array object manages
-        char *typetagdata;
-        char *newtypetagdata;
-        if (isbitsunion) {
-            typetagdata = (char*)malloc_s(a->nrows);
-            memcpy(typetagdata, jl_array_typetagdata(a), a->nrows);
-        }
-        size_t oldoffsnb = a->offset * elsz;
-        a->data = ((char*)jl_gc_managed_realloc(originalptr, newbytes, oldnbytes,
-                a->flags.isaligned, (jl_value_t*) a)) + oldoffsnb;
-        a->maxsize -= dec;
-        if (isbitsunion) {
-            newtypetagdata = jl_array_typetagdata(a);
-            memcpy(newtypetagdata, typetagdata, a->nrows);
-            free(typetagdata);
-        }
-    }
-    else if (a->flags.how == 3) {
-        //this has has a pointer to the object that owns the data
-    }
-}
-
-static size_t jl_array_limit_offset(jl_array_t *a, size_t offset)
-{
-    // make sure offset doesn't grow forever due to deleting at beginning
-    // and growing at end
-    if (offset >= 13 * a->maxsize / 20)
-        offset = 17 * (a->maxsize - a->nrows) / 100;
-#ifdef _P64
-    while (offset > (size_t)UINT32_MAX) {
-        offset /= 2;
-    }
-#endif
-    return offset;
-}
-
-STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
-                                       size_t n)
-{
-    // no error checking
-    // assume inbounds, assume unshared
-    size_t elsz = a->elsize;
-    size_t offset = a->offset;
-    int isbitsunion = jl_array_isbitsunion(a);
-    offset += dec;
-    a->length = n - dec;
-    a->nrows = n - dec;
-    size_t newoffs = jl_array_limit_offset(a, offset);
-    assert(newoffs <= offset);
-    size_t nbdec = dec * elsz;
-    if (__unlikely(newoffs != offset) || idx > 0) {
-        char *olddata = (char*)a->data;
-        char *newdata = olddata - (a->offset - newoffs) * elsz;
-        char *typetagdata;
-        char *newtypetagdata;
-        if (isbitsunion) {
-            typetagdata = jl_array_typetagdata(a);
-            newtypetagdata = typetagdata - (a->offset - newoffs);
-        }
-
-        size_t nb1 = idx * elsz; // size in bytes of the first block
-        size_t nbtotal = a->nrows * elsz; // size in bytes of the new array
-        // Implicit '\0' for byte arrays
-        if (elsz == 1 && !isbitsunion)
-            nbtotal++;
-        if (idx > 0) {
-            memmove_safe(a->flags.hasptr, newdata, olddata, nb1);
-            if (isbitsunion) memmove(newtypetagdata, typetagdata, idx);
-        }
-        // Move the rest of the data if the offset changed
-        if (newoffs != offset) {
-            memmove_safe(a->flags.hasptr, newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
-            if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, a->nrows - idx);
-        }
-        a->data = newdata;
-    }
-    else {
-        char *data = (char*)a->data;
-        a->data = data + nbdec;
-    }
-    a->offset = newoffs;
-}
-
-STATIC_INLINE void jl_array_del_at_end(jl_array_t *a, size_t idx, size_t dec,
-                                       size_t n)
-{
-    // no error checking
-    // assume inbounds, assume unshared
-    char *data = (char*)a->data;
-    size_t elsz = a->elsize;
-    int isbitsunion = jl_array_isbitsunion(a);
-    size_t last = idx + dec;
-    if (n > last) {
-        memmove_safe(a->flags.hasptr, data + idx * elsz, data + last * elsz, (n - last) * elsz);
+        // TODO: round this up to newmaxsize < GC_MAX_SZCLASS ? jl_gc_sizeclasses[jl_gc_szclass(newmaxsize)] : LLT_ALIGN(newmaxsize, 4096), after accounting for the object header (24 bytes)
+        jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, newmaxsize);
+        char *newdata = (char*)newmem->ptr + oldoffset * elsz;
+        memcpy(newdata, data, n * elsz);
         if (isbitsunion) {
             char *typetagdata = jl_array_typetagdata(a);
-            memmove(typetagdata + idx, typetagdata + last, n - last);
+            char *newtypetagdata = (char*)newmem->ptr + newmaxsize * elsz + oldoffset;
+            memcpy(newtypetagdata, typetagdata, n);
         }
+        a->ref.mem = newmem;
+        jl_gc_wb(a, newmem);
+        if (isbitsunion)
+            a->ref.ptr_or_offset = (void*)oldoffset;
+        else
+            a->ref.ptr_or_offset = newdata;
     }
-    n -= dec;
-    if (elsz == 1 && !isbitsunion)
-        data[n] = 0;
-    a->nrows = n;
-    a->length = n;
-}
-
-JL_DLLEXPORT void jl_array_del_at(jl_array_t *a, ssize_t idx, size_t dec)
-{
-    size_t n = jl_array_nrows(a);
-    size_t last = idx + dec;
-    if (__unlikely(idx < 0))
-        jl_bounds_error_int((jl_value_t*)a, idx + 1);
-    if (__unlikely(last > n))
-        jl_bounds_error_int((jl_value_t*)a, last);
-    // The unsharing needs to happen before we modify the buffer
-    if (__unlikely(a->flags.isshared))
-        array_try_unshare(a);
-    if (idx < n - last) {
-        jl_array_del_at_beg(a, idx, dec, n);
-    }
-    else {
-        jl_array_del_at_end(a, idx, dec, n);
-    }
-}
-
-JL_DLLEXPORT void jl_array_del_beg(jl_array_t *a, size_t dec)
-{
-    size_t n = jl_array_nrows(a);
-    if (__unlikely(dec > n))
-        jl_bounds_error_int((jl_value_t*)a, dec);
-    if (__unlikely(a->flags.isshared))
-        array_try_unshare(a);
-    if (dec == 0)
-        return;
-    jl_array_del_at_beg(a, 0, dec, n);
+    a->dimsize[0] = newnrows;
 }
 
 JL_DLLEXPORT void jl_array_del_end(jl_array_t *a, size_t dec)
 {
+    // assume inbounds, assume unshared
     size_t n = jl_array_nrows(a);
     if (__unlikely(n < dec))
         jl_bounds_error_int((jl_value_t*)a, 0);
-    if (__unlikely(a->flags.isshared))
-        array_try_unshare(a);
-    if (dec == 0)
+    if (__unlikely(dec == 0))
         return;
-    jl_array_del_at_end(a, n - dec, dec, n);
-}
-
-JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz)
-{
-    size_t n = jl_array_nrows(a);
-
-    size_t min = a->offset + a->length;
-    sz = (sz < min) ? min : sz;
-
-    if (sz <= a->maxsize) {
-        size_t dec = a->maxsize - sz;
-        //if we don't save at least an eighth of maxsize then its not worth it to shrink
-        if (dec <= a->maxsize / 8) return;
-        jl_array_shrink(a, dec);
-    }
-    else {
-        size_t inc = sz - n;
-        jl_array_grow_end(a, inc);
-
-        a->nrows = n;
-        a->length = n;
-    }
-}
-
-JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
-{
-    size_t elsz = ary->elsize;
-    size_t len = jl_array_len(ary);
-    int isunion = jl_is_uniontype(jl_tparam0(jl_typeof(ary)));
-    jl_array_t *new_ary = _new_array_(jl_typeof(ary), jl_array_ndims(ary),
-                                      &ary->nrows, !ary->flags.ptrarray,
-                                      ary->flags.hasptr, isunion, 0, elsz);
-    memcpy(new_ary->data, ary->data, len * elsz);
-    // ensure isbits union arrays copy their selector bytes correctly
-    if (jl_array_isbitsunion(ary))
-        memcpy(jl_array_typetagdata(new_ary), jl_array_typetagdata(ary), len);
-    return new_ary;
-}
-
-// Copy element by element until we hit a young object, at which point
-// we can finish by using `memmove`.
-static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner,
-                                                  void **src_p, void **dest_p,
-                                                  ssize_t n) JL_NOTSAFEPOINT
-{
-    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
-    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
-    for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_pa + i);
-        jl_atomic_store_release(dest_pa + i, val);
-        // `val` is young or old-unmarked
-        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
-            jl_gc_queue_root(owner);
-            return i;
-        }
-    }
-    return n;
-}
-
-static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner,
-                                                   void **src_p, void **dest_p,
-                                                   ssize_t n) JL_NOTSAFEPOINT
-{
-    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
-    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
-    for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_pa + n - i - 1);
-        jl_atomic_store_release(dest_pa + n - i - 1, val);
-        // `val` is young or old-unmarked
-        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
-            jl_gc_queue_root(owner);
-            return i;
-        }
-    }
-    return n;
-}
-
-// Unsafe, assume inbounds and that dest and src have the same eltype
-JL_DLLEXPORT void jl_array_ptr_copy(jl_array_t *dest, void **dest_p,
-                                    jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT
-{
-    assert(dest->flags.ptrarray && src->flags.ptrarray);
-    jl_value_t *owner = jl_array_owner(dest);
-    // Destination is old and doesn't refer to any young object
-    if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
-        jl_value_t *src_owner = jl_array_owner(src);
-        // Source is young or being promoted or might refer to young objects
-        // (i.e. source is not an old object that doesn't have wb triggered)
-        if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
-            ssize_t done;
-            if (dest_p < src_p || dest_p > src_p + n) {
-                done = jl_array_ptr_copy_forward(owner, src_p, dest_p, n);
-                dest_p += done;
-                src_p += done;
-            }
-            else {
-                done = jl_array_ptr_copy_backward(owner, src_p, dest_p, n);
-            }
-            n -= done;
-        }
+    n -= dec;
+    a->dimsize[0] = n;
+    // don't leave behind deleted data
+    if (jl_is_genericmemory_zeroinit(a->ref.mem) && !jl_genericmemory_isbitsunion(a->ref.mem)) {
+        size_t elsz = jl_array_elsize(a);
+        memset(jl_array_data(a,char) + n * elsz, 0, elsz * dec);
     }
-    memmove_refs(dest_p, src_p, n);
 }
 
 JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item)
@@ -1274,50 +275,63 @@ JL_DLLEXPORT void jl_array_ptr_1d_append(jl_array_t *a, jl_array_t *a2)
     }
 }
 
-JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a) JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy_slice(jl_genericmemory_t *mem, void *data, size_t len);
+
+JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
 {
-    return jl_array_data_owner(a);
+    size_t len = jl_array_len(ary);
+    jl_genericmemory_t *mem = jl_genericmemory_copy_slice(ary->ref.mem, ary->ref.ptr_or_offset, len);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *new_ary = _new_array((jl_value_t*)jl_typetagof(ary), mem, ((jl_datatype_t*)jl_typetagof(ary->ref.mem))->layout, jl_array_ndims(ary), &ary->dimsize[0]);
+    JL_GC_POP();
+    return new_ary;
 }
 
-STATIC_INLINE int jl_has_implicit_byte_owned(jl_array_t *a)
+JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
 {
-    assert(a->flags.how != 3);
-    if (!a->flags.isshared)
-        return 1;
-    return a->flags.how == 1;
+    if (len == 0)
+        return jl_an_empty_string;
+    size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
+    if (sz < len) // overflow
+        jl_throw(jl_memory_exception);
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *s;
+    jl_ptls_t ptls = ct->ptls;
+    s = (jl_value_t*)jl_gc_alloc(ptls, sz, jl_string_type);
+    jl_set_typetagof(s, jl_string_tag, 0);
+    *(size_t*)s = len;
+    jl_string_data(s)[len] = 0;
+    return s;
 }
 
-STATIC_INLINE int jl_has_implicit_byte(jl_array_t *a)
+JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
 {
-    // * unshared:
-    //   * how: 0-2
-    //     We own and allocated the data.
-    //     It should have the extra byte.
-    // * shared:
-    //   * how: 0, 2
-    //     The data might come from external source without implicit NUL byte.
-    //     There could be an entra byte for a `reinterpreted` array
-    //     but that should be unlikely for strings.
-    //   * how: 1
-    //     We allocated the data with the extra byte.
-    //   * how: 3
-    //     We should check the owner.
-    if (a->flags.how == 3) {
-        a = (jl_array_t*)jl_array_data_owner(a);
-        if (jl_is_string(a)) return 1;
-        return a->elsize == 1 && jl_has_implicit_byte_owned(a);
-    }
-    return jl_has_implicit_byte_owned(a);
+    jl_value_t *s = jl_alloc_string(len);
+    if (len > 0)
+        memcpy(jl_string_data(s), str, len);
+    return s;
 }
 
-// Create an array with the same content
-JL_DLLEXPORT jl_array_t *jl_array_cconvert_cstring(jl_array_t *a)
+JL_DLLEXPORT jl_value_t *jl_cstr_to_string(const char *str)
 {
-    assert(jl_typeof(a) == jl_array_uint8_type);
-    if (!jl_has_implicit_byte(a))
-        a = jl_array_copy(a);
-    ((char*)a->data)[a->nrows] = 0;
-    return a;
+    return jl_pchar_to_string(str, strlen(str));
+}
+
+
+// deprecated and unused internally, but some packages (notably OrderedCollections.jl) have not yet started to use the modern Base.unsetindex API
+JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
+{
+    if (i >= jl_array_len(a))
+        jl_bounds_error_int((jl_value_t*)a, i + 1);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        jl_atomic_store_relaxed(jl_array_data(a,_Atomic(jl_value_t*)) + i, NULL);
+    }
+    else if (layout->first_ptr >= 0) {
+        size_t elsize = layout->size;
+        jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
+        memset(jl_array_data(a,char) + elsize * i, 0, elsize);
+    }
 }
 
 #ifdef __cplusplus
diff --git a/src/ast.c b/src/ast.c
index 06727b453d6a3..d6e3893751c9f 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+
 #ifdef _OS_WINDOWS_
 #include <malloc.h>
 #endif
@@ -19,100 +20,9 @@
 extern "C" {
 #endif
 
-// head symbols for each expression type
-JL_DLLEXPORT jl_sym_t *jl_call_sym;
-JL_DLLEXPORT jl_sym_t *jl_invoke_sym;
-JL_DLLEXPORT jl_sym_t *jl_invoke_modify_sym;
-JL_DLLEXPORT jl_sym_t *jl_empty_sym;
-JL_DLLEXPORT jl_sym_t *jl_top_sym;
-JL_DLLEXPORT jl_sym_t *jl_module_sym;
-JL_DLLEXPORT jl_sym_t *jl_slot_sym;
-JL_DLLEXPORT jl_sym_t *jl_export_sym;
-JL_DLLEXPORT jl_sym_t *jl_import_sym;
-JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
-JL_DLLEXPORT jl_sym_t *jl_quote_sym;
-JL_DLLEXPORT jl_sym_t *jl_line_sym;
-JL_DLLEXPORT jl_sym_t *jl_incomplete_sym;
-JL_DLLEXPORT jl_sym_t *jl_goto_sym;
-JL_DLLEXPORT jl_sym_t *jl_goto_ifnot_sym;
-JL_DLLEXPORT jl_sym_t *jl_return_sym;
-JL_DLLEXPORT jl_sym_t *jl_lineinfo_sym;
-JL_DLLEXPORT jl_sym_t *jl_lambda_sym;
-JL_DLLEXPORT jl_sym_t *jl_assign_sym;
-JL_DLLEXPORT jl_sym_t *jl_globalref_sym;
-JL_DLLEXPORT jl_sym_t *jl_do_sym;
-JL_DLLEXPORT jl_sym_t *jl_method_sym;
-JL_DLLEXPORT jl_sym_t *jl_core_sym;
-JL_DLLEXPORT jl_sym_t *jl_enter_sym;
-JL_DLLEXPORT jl_sym_t *jl_leave_sym;
-JL_DLLEXPORT jl_sym_t *jl_pop_exception_sym;
-JL_DLLEXPORT jl_sym_t *jl_exc_sym;
-JL_DLLEXPORT jl_sym_t *jl_error_sym;
-JL_DLLEXPORT jl_sym_t *jl_new_sym;
-JL_DLLEXPORT jl_sym_t *jl_using_sym;
-JL_DLLEXPORT jl_sym_t *jl_splatnew_sym;
-JL_DLLEXPORT jl_sym_t *jl_block_sym;
-JL_DLLEXPORT jl_sym_t *jl_new_opaque_closure_sym;
-JL_DLLEXPORT jl_sym_t *jl_opaque_closure_method_sym;
-JL_DLLEXPORT jl_sym_t *jl_const_sym;
-JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
-JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
-JL_DLLEXPORT jl_sym_t *jl_as_sym;
-JL_DLLEXPORT jl_sym_t *jl_global_sym;
-JL_DLLEXPORT jl_sym_t *jl_list_sym;
-JL_DLLEXPORT jl_sym_t *jl_dot_sym;
-JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
-JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
-JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
-JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
-JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
-JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
-JL_DLLEXPORT jl_sym_t *jl_meta_sym;
-JL_DLLEXPORT jl_sym_t *jl_inert_sym;
-JL_DLLEXPORT jl_sym_t *jl_polly_sym;
-JL_DLLEXPORT jl_sym_t *jl_unused_sym;
-JL_DLLEXPORT jl_sym_t *jl_static_parameter_sym;
-JL_DLLEXPORT jl_sym_t *jl_inline_sym;
-JL_DLLEXPORT jl_sym_t *jl_noinline_sym;
-JL_DLLEXPORT jl_sym_t *jl_generated_sym;
-JL_DLLEXPORT jl_sym_t *jl_generated_only_sym;
-JL_DLLEXPORT jl_sym_t *jl_isdefined_sym;
-JL_DLLEXPORT jl_sym_t *jl_propagate_inbounds_sym;
-JL_DLLEXPORT jl_sym_t *jl_specialize_sym;
-JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym;
-JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym;
-JL_DLLEXPORT jl_sym_t *jl_purity_sym;
-JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym;
-JL_DLLEXPORT jl_sym_t *jl_nospecializeinfer_sym;
-JL_DLLEXPORT jl_sym_t *jl_macrocall_sym;
-JL_DLLEXPORT jl_sym_t *jl_colon_sym;
-JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym;
-JL_DLLEXPORT jl_sym_t *jl_throw_undef_if_not_sym;
-JL_DLLEXPORT jl_sym_t *jl_getfield_undefref_sym;
-JL_DLLEXPORT jl_sym_t *jl_gc_preserve_begin_sym;
-JL_DLLEXPORT jl_sym_t *jl_gc_preserve_end_sym;
-JL_DLLEXPORT jl_sym_t *jl_coverageeffect_sym;
-JL_DLLEXPORT jl_sym_t *jl_escape_sym;
-JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
-JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
-JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
-JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
-JL_DLLEXPORT jl_sym_t *jl_atom_sym;
-JL_DLLEXPORT jl_sym_t *jl_statement_sym;
-JL_DLLEXPORT jl_sym_t *jl_all_sym;
-JL_DLLEXPORT jl_sym_t *jl_compile_sym;
-JL_DLLEXPORT jl_sym_t *jl_force_compile_sym;
-JL_DLLEXPORT jl_sym_t *jl_infer_sym;
-JL_DLLEXPORT jl_sym_t *jl_max_methods_sym;
-JL_DLLEXPORT jl_sym_t *jl_atomic_sym;
-JL_DLLEXPORT jl_sym_t *jl_not_atomic_sym;
-JL_DLLEXPORT jl_sym_t *jl_unordered_sym;
-JL_DLLEXPORT jl_sym_t *jl_monotonic_sym;
-JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
-JL_DLLEXPORT jl_sym_t *jl_release_sym;
-JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
-JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
-
+#define XX(name) JL_DLLEXPORT jl_sym_t *jl_##name;
+JL_COMMON_SYMBOLS(XX)
+#undef XX
 
 static const uint8_t flisp_system_image[] = {
 #include <julia_flisp.boot.inc>
@@ -149,32 +59,71 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo
 static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v);
 static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world, int throw_load_error);
 
+static jl_sym_t *scmsym_to_julia(fl_context_t *fl_ctx, value_t s)
+{
+    assert(issymbol(s));
+    if (fl_isgensym(fl_ctx, s)) {
+        char gsname[16];
+        char *n = uint2str(&gsname[1], sizeof(gsname)-1,
+                           ((gensym_t*)ptr(s))->id, 10);
+        *(--n) = '#';
+        return jl_symbol(n);
+    }
+    return jl_symbol(symbol_name(fl_ctx, s));
+}
+
 static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
     // tells whether a var is defined in and *by* the current module
     argcount(fl_ctx, "defined-julia-global", nargs, 1);
     (void)tosymbol(fl_ctx, args[0], "defined-julia-global");
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
-    jl_sym_t *var = jl_symbol(symbol_name(fl_ctx, args[0]));
+    jl_sym_t *var = scmsym_to_julia(fl_ctx, args[0]);
     jl_binding_t *b = jl_get_module_binding(ctx->module, var, 0);
-    return (b != NULL && jl_atomic_load_relaxed(&b->owner) == b) ? fl_ctx->T : fl_ctx->F;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return (bpart != NULL && jl_binding_kind(bpart) == PARTITION_KIND_GLOBAL) ? fl_ctx->T : fl_ctx->F;
 }
 
-static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
+// Used to generate a unique suffix for a given symbol (e.g. variable or type name)
+// first argument contains a stack of method definitions seen so far by `closure-convert` in flisp.
+// if the top of the stack is non-NIL, we use it to augment the suffix so that it becomes
+// of the form $top_level_method_name##$counter, where `counter` is the smallest integer
+// such that the resulting name is not already defined in the current module's bindings.
+// If the top of the stack is NIL, we simply return the current module's counter.
+// This ensures that precompile statements are a bit more stable across different versions
+// of a codebase. see #53719
+static value_t fl_module_unique_name(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
+    argcount(fl_ctx, "julia-module-unique-name", nargs, 1);
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
-    assert(ctx->module);
-    return fixnum(jl_module_next_counter(ctx->module));
-}
-
-static value_t fl_julia_current_file(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
-{
-    return symbol(fl_ctx, jl_filename);
-}
-
-static value_t fl_julia_current_line(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
-{
-    return fixnum(jl_lineno);
+    jl_module_t *m = ctx->module;
+    assert(m != NULL);
+    // Get the outermost function name from the `parsed_method_stack` top
+    char *funcname = NULL;
+    value_t parsed_method_stack = args[0];
+    if (parsed_method_stack != fl_ctx->NIL) {
+        value_t bottom_stack_symbol = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "last")), parsed_method_stack);
+        funcname = tosymbol(fl_ctx, bottom_stack_symbol, "julia-module-unique-name")->name;
+    }
+    size_t sz = funcname != NULL ? strlen(funcname) + 32 : 32; // 32 is enough for the suffix
+    char *buf = (char*)alloca(sz);
+    if (funcname != NULL && strchr(funcname, '#') == NULL) {
+        for (int i = 0; ; i++) {
+            snprintf(buf, sz, "%s##%d", funcname, i);
+            jl_sym_t *sym = jl_symbol(buf);
+            JL_LOCK(&m->lock);
+            if (jl_get_module_binding(m, sym, 0) == NULL) { // make sure this name is not already taken
+                jl_get_module_binding(m, sym, 1); // create the binding
+                JL_UNLOCK(&m->lock);
+                return symbol(fl_ctx, buf);
+            }
+            JL_UNLOCK(&m->lock);
+        }
+    }
+    else {
+        snprintf(buf, sz, "%d", jl_module_next_counter(m));
+    }
+    return symbol(fl_ctx, buf);
 }
 
 static int jl_is_number(jl_value_t *v)
@@ -206,10 +155,8 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
 
 static const builtinspec_t julia_flisp_ast_ext[] = {
     { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint
-    { "current-julia-module-counter", fl_current_module_counter },
+    { "current-julia-module-counter", fl_module_unique_name },
     { "julia-scalar?", fl_julia_scalar },
-    { "julia-current-file", fl_julia_current_file },
-    { "julia-current-line", fl_julia_current_line },
     { NULL, NULL }
 };
 
@@ -304,8 +251,7 @@ void jl_init_common_symbols(void)
     jl_lambda_sym = jl_symbol("lambda");
     jl_module_sym = jl_symbol("module");
     jl_export_sym = jl_symbol("export");
-    jl_import_sym = jl_symbol("import");
-    jl_using_sym = jl_symbol("using");
+    jl_public_sym = jl_symbol("public");
     jl_assign_sym = jl_symbol("=");
     jl_method_sym = jl_symbol("method");
     jl_exc_sym = jl_symbol("the_exception");
@@ -318,6 +264,7 @@ void jl_init_common_symbols(void)
     jl_opaque_closure_method_sym = jl_symbol("opaque_closure_method");
     jl_const_sym = jl_symbol("const");
     jl_global_sym = jl_symbol("global");
+    jl_local_sym = jl_symbol("local");
     jl_thunk_sym = jl_symbol("thunk");
     jl_toplevel_sym = jl_symbol("toplevel");
     jl_dot_sym = jl_symbol(".");
@@ -363,6 +310,8 @@ void jl_init_common_symbols(void)
     jl_aliasscope_sym = jl_symbol("aliasscope");
     jl_popaliasscope_sym = jl_symbol("popaliasscope");
     jl_thismodule_sym = jl_symbol("thismodule");
+    jl_eval_sym = jl_symbol("eval");
+    jl_include_sym = jl_symbol("include");
     jl_block_sym = jl_symbol("block");
     jl_atom_sym = jl_symbol("atom");
     jl_statement_sym = jl_symbol("statement");
@@ -375,6 +324,8 @@ void jl_init_common_symbols(void)
     jl_release_sym = jl_symbol("release");
     jl_acquire_release_sym = jl_symbol("acquire_release");
     jl_sequentially_consistent_sym = jl_symbol("sequentially_consistent");
+    jl_uninferred_sym = jl_symbol("uninferred");
+    jl_latestworld_sym = jl_symbol("latestworld");
 }
 
 JL_DLLEXPORT void jl_lisp_prompt(void)
@@ -413,20 +364,6 @@ JL_DLLEXPORT void fl_profile(const char *fname)
     jl_ast_ctx_leave(ctx);
 }
 
-
-static jl_sym_t *scmsym_to_julia(fl_context_t *fl_ctx, value_t s)
-{
-    assert(issymbol(s));
-    if (fl_isgensym(fl_ctx, s)) {
-        char gsname[16];
-        char *n = uint2str(&gsname[1], sizeof(gsname)-1,
-                           ((gensym_t*)ptr(s))->id, 10);
-        *(--n) = '#';
-        return jl_symbol(n);
-    }
-    return jl_symbol(symbol_name(fl_ctx, s));
-}
-
 static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mod)
 {
     jl_value_t *v = NULL;
@@ -436,7 +373,7 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo
     }
     JL_CATCH {
         // if expression cannot be converted, replace with error expr
-        //jl_(jl_current_exception());
+        //jl_(jl_current_exception(jl_current_task));
         //jlbacktrace();
         jl_expr_t *ex = jl_exprn(jl_error_sym, 1);
         v = (jl_value_t*)ex;
@@ -537,20 +474,16 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             JL_GC_POP();
             return temp;
         }
-        else if (sym == jl_lineinfo_sym && n == 5) {
-            jl_value_t *modu=NULL, *name=NULL, *file=NULL, *linenum=NULL, *inlinedat=NULL;
-            JL_GC_PUSH5(&modu, &name, &file, &linenum, &inlinedat);
+        else if (sym == jl_lineinfo_sym && n == 3) {
+            jl_value_t *file=NULL, *linenum=NULL, *inlinedat=NULL;
+            JL_GC_PUSH3(&file, &linenum, &inlinedat);
             value_t lst = e;
-            modu = scm_to_julia_(fl_ctx, car_(lst), mod);
-            lst = cdr_(lst);
-            name = scm_to_julia_(fl_ctx, car_(lst), mod);
-            lst = cdr_(lst);
             file = scm_to_julia_(fl_ctx, car_(lst), mod);
             lst = cdr_(lst);
             linenum = scm_to_julia_(fl_ctx, car_(lst), mod);
             lst = cdr_(lst);
             inlinedat = scm_to_julia_(fl_ctx, car_(lst), mod);
-            temp = jl_new_struct(jl_lineinfonode_type, modu, name, file, linenum, inlinedat);
+            temp = jl_new_struct(jl_lineinfonode_type, file, linenum, inlinedat);
             JL_GC_POP();
             return temp;
         }
@@ -564,6 +497,15 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             temp = scm_to_julia(fl_ctx, car_(cdr_(e)), mod);
             temp = jl_new_struct(jl_gotoifnot_type, ex, temp);
         }
+        else if (sym == jl_enter_sym) {
+            ex = scm_to_julia_(fl_ctx, car_(e), mod);
+            temp = jl_new_struct_uninit(jl_enternode_type);
+            jl_enternode_scope(temp) = NULL;
+            jl_enternode_catch_dest(temp) = jl_unbox_long(ex);
+            if (n == 2) {
+                jl_enternode_scope(temp) = scm_to_julia(fl_ctx, car_(cdr_(e)), mod);
+            }
+        }
         else if (sym == jl_newvar_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = jl_new_struct(jl_newvarnode_type, ex);
@@ -627,6 +569,8 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
     if (iscvalue(e) && cv_class((cvalue_t*)ptr(e)) == jl_ast_ctx(fl_ctx)->jvtype) {
         return *(jl_value_t**)cv_data((cvalue_t*)ptr(e));
     }
+    fl_print(fl_ctx, ios_stderr, e);
+    ios_putc('\n', ios_stderr);
     jl_error("malformed tree");
 }
 
@@ -648,9 +592,9 @@ static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v)
 static void array_to_list(fl_context_t *fl_ctx, jl_array_t *a, value_t *pv, int check_valid)
 {
     value_t temp;
-    for(long i=jl_array_len(a)-1; i >= 0; i--) {
+    for (long i = jl_array_nrows(a) - 1; i >= 0; i--) {
         *pv = fl_cons(fl_ctx, fl_ctx->NIL, *pv);
-        temp = julia_to_scm_(fl_ctx, jl_array_ptr_ref(a,i), check_valid);
+        temp = julia_to_scm_(fl_ctx, jl_array_ptr_ref(a, i), check_valid);
         // note: must be separate statement
         car_(*pv) = temp;
     }
@@ -685,8 +629,20 @@ static int julia_to_scm_noalloc1(fl_context_t *fl_ctx, jl_value_t *v, value_t *r
 
 static value_t julia_to_scm_noalloc2(fl_context_t *fl_ctx, jl_value_t *v, int check_valid) JL_NOTSAFEPOINT
 {
-    if (jl_is_long(v) && fits_fixnum(jl_unbox_long(v)))
-        return fixnum(jl_unbox_long(v));
+    if (jl_is_long(v)) {
+        if (fits_fixnum(jl_unbox_long(v))) {
+            return fixnum(jl_unbox_long(v));
+        } else {
+#ifdef _P64
+            value_t prim = cprim(fl_ctx, fl_ctx->int64type, sizeof(int64_t));
+            *((int64_t*)cp_data((cprim_t*)ptr(prim))) = jl_unbox_long(v);
+#else
+            value_t prim = cprim(fl_ctx, fl_ctx->int32type, sizeof(int32_t));
+            *((int32_t*)cp_data((cprim_t*)ptr(prim))) = jl_unbox_long(v);
+#endif
+            return prim;
+        }
+    }
     if (check_valid) {
         if (jl_is_ssavalue(v))
             lerror(fl_ctx, symbol(fl_ctx, "error"), "SSAValue objects should not occur in an AST");
@@ -878,7 +834,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
         JL_GC_PUSH2(&new_ci, &new_code);
         new_ci = jl_copy_code_info(new_ci);
         new_code = jl_array_copy(new_ci->code);
-        size_t clen = jl_array_len(new_code);
+        size_t clen = jl_array_nrows(new_code);
         for (int i = 0; i < clen; ++i) {
             jl_array_ptr_set(new_code, i, jl_copy_ast(
                 jl_array_ptr_ref(new_code, i)
@@ -890,18 +846,9 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
         jl_gc_wb(new_ci, new_ci->slotnames);
         new_ci->slotflags = jl_array_copy(new_ci->slotflags);
         jl_gc_wb(new_ci, new_ci->slotflags);
-        new_ci->codelocs = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->codelocs);
-        jl_gc_wb(new_ci, new_ci->codelocs);
-        new_ci->linetable = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->linetable);
-        jl_gc_wb(new_ci, new_ci->linetable);
         new_ci->ssaflags = jl_array_copy(new_ci->ssaflags);
         jl_gc_wb(new_ci, new_ci->ssaflags);
 
-        if (new_ci->edges != jl_nothing) {
-            new_ci->edges = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->edges);
-            jl_gc_wb(new_ci, new_ci->edges);
-        }
-
         if (jl_is_array(new_ci->ssavaluetypes)) {
             new_ci->ssavaluetypes = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->ssavaluetypes);
             jl_gc_wb(new_ci, new_ci->ssavaluetypes);
@@ -911,7 +858,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
     }
     if (jl_is_expr(expr)) {
         jl_expr_t *e = (jl_expr_t*)expr;
-        size_t i, l = jl_array_len(e->args);
+        size_t i, l = jl_array_nrows(e->args);
         jl_expr_t *ne = jl_exprn(e->head, l);
         JL_GC_PUSH2(&ne, &expr);
         for (i = 0; i < l; i++) {
@@ -942,7 +889,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
     return expr;
 }
 
-JL_DLLEXPORT int jl_is_operator(char *sym)
+JL_DLLEXPORT int jl_is_operator(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -951,7 +898,7 @@ JL_DLLEXPORT int jl_is_operator(char *sym)
     return res;
 }
 
-JL_DLLEXPORT int jl_is_unary_operator(char *sym)
+JL_DLLEXPORT int jl_is_unary_operator(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -960,7 +907,7 @@ JL_DLLEXPORT int jl_is_unary_operator(char *sym)
     return res;
 }
 
-JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
+JL_DLLEXPORT int jl_is_unary_and_binary_operator(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -969,7 +916,7 @@ JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
     return res;
 }
 
-JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
+JL_DLLEXPORT int jl_is_syntactic_operator(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -978,7 +925,7 @@ JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
     return res;
 }
 
-JL_DLLEXPORT int jl_operator_precedence(char *sym)
+JL_DLLEXPORT int jl_operator_precedence(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -989,11 +936,11 @@ JL_DLLEXPORT int jl_operator_precedence(char *sym)
 
 int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
 {
-    size_t i, l = jl_array_len(body);
+    size_t i, l = jl_array_nrows(body);
     for (i = 0; i < l; i++) {
         jl_expr_t *stmt = (jl_expr_t*)jl_array_ptr_ref(body, i);
         if (jl_is_expr((jl_value_t*)stmt) && stmt->head == jl_meta_sym) {
-            size_t i, l = jl_array_len(stmt->args);
+            size_t i, l = jl_array_nrows(stmt->args);
             for (i = 0; i < l; i++)
                 if (jl_array_ptr_ref(stmt->args, i) == (jl_value_t*)sym)
                     return 1;
@@ -1004,14 +951,15 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
 
 // Utility function to return whether `e` is any of the special AST types or
 // will always evaluate to itself exactly unchanged. This corresponds to
-// `is_self_quoting` in Core.Compiler utilities.
-int jl_is_ast_node(jl_value_t *e) JL_NOTSAFEPOINT
+// `isa_ast_node` in Core.Compiler utilities.
+int jl_isa_ast_node(jl_value_t *e) JL_NOTSAFEPOINT
 {
     return jl_is_newvarnode(e)
         || jl_is_code_info(e)
         || jl_is_linenode(e)
         || jl_is_gotonode(e)
         || jl_is_gotoifnot(e)
+        || jl_is_enternode(e)
         || jl_is_returnnode(e)
         || jl_is_ssavalue(e)
         || jl_is_slotnumber(e)
@@ -1026,9 +974,10 @@ int jl_is_ast_node(jl_value_t *e) JL_NOTSAFEPOINT
         || jl_is_expr(e);
 }
 
-static int is_self_quoting_expr(jl_expr_t *e) JL_NOTSAFEPOINT
+static int is_self_escaping_expr(jl_expr_t *e) JL_NOTSAFEPOINT
 {
     return (e->head == jl_inert_sym ||
+            e->head == jl_leave_sym ||
             e->head == jl_core_sym ||
             e->head == jl_line_sym ||
             e->head == jl_lineinfo_sym ||
@@ -1046,19 +995,20 @@ int need_esc_node(jl_value_t *e) JL_NOTSAFEPOINT
         || jl_is_ssavalue(e)
         || jl_is_slotnumber(e)
         || jl_is_argument(e)
+        || jl_is_enternode(e)
         || jl_is_quotenode(e))
         return 0;
     if (jl_is_expr(e))
-        return !is_self_quoting_expr((jl_expr_t*)e);
+        return !is_self_escaping_expr((jl_expr_t*)e);
     // note: jl_is_globalref(e) is not included here, since we care a little about about having a line number for it
-    return jl_is_ast_node(e);
+    return jl_isa_ast_node(e);
 }
 
 static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, jl_value_t **lineinfo, size_t world, int throw_load_error)
 {
     jl_task_t *ct = jl_current_task;
     JL_TIMING(MACRO_INVOCATION, MACRO_INVOCATION);
-    size_t nargs = jl_array_len(args) + 1;
+    size_t nargs = jl_array_nrows(args) + 1;
     JL_NARGSV("macrocall", 3); // macro name, location, and module
     jl_value_t **margs;
     JL_GC_PUSHARGS(margs, nargs);
@@ -1066,8 +1016,21 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
     margs[0] = jl_array_ptr_ref(args, 0);
     // __source__ argument
     jl_value_t *lno = jl_array_ptr_ref(args, 1);
-    if (!jl_is_linenode(lno))
+    jl_value_t *retry_lno = NULL;
+    if (!jl_is_linenumbernode(lno)) {
+        if (lno != jl_nothing) {
+            // Special case: The magic @VERSION macro currently gets a special
+            // Core.MacroSource for its __source__ argument. However, to avoid
+            // giving this to macros that do not expect it, we check for that
+            // special case and retry with just the LineNumberNode if needed.
+            if (jl_typeof(lno) == jl_get_global(jl_core_module, jl_symbol("MacroSource"))) {
+                retry_lno = jl_fieldref_noalloc(lno, 0);
+                goto lno_ok;
+            }
+        }
         lno = jl_new_struct(jl_linenumbernode_type, jl_box_long(0), jl_nothing);
+    }
+lno_ok:
     margs[1] = lno;
     margs[2] = (jl_value_t*)inmodule;
     for (i = 3; i < nargs; i++)
@@ -1079,9 +1042,17 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
         ct->world_age = world;
     jl_value_t *result;
     JL_TRY {
-        margs[0] = jl_toplevel_eval(*ctx, margs[0]);
-        jl_method_instance_t *mfunc = jl_method_lookup(margs, nargs, ct->world_age);
+        jl_module_t *ctx_module = *ctx;
+        JL_GC_PROMISE_ROOTED(ctx_module);
+        margs[0] = jl_toplevel_eval(ctx_module, margs[0]);
+        jl_method_instance_t *mfunc = NULL;
+        mfunc = jl_method_lookup(margs, nargs, ct->world_age);
         JL_GC_PROMISE_ROOTED(mfunc);
+        if (mfunc == NULL && retry_lno != NULL) {
+            margs[1] = retry_lno;
+            mfunc = jl_method_lookup(margs, nargs, ct->world_age);
+            JL_GC_PROMISE_ROOTED(mfunc);
+        }
         if (mfunc == NULL) {
             jl_method_error(margs[0], &margs[1], nargs, ct->world_age);
             // unreachable
@@ -1103,7 +1074,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
                 margs[0] = jl_cstr_to_string("<macrocall>");
             margs[1] = jl_fieldref(lno, 0); // extract and allocate line number
             jl_rethrow_other(jl_new_struct(jl_loaderror_type, margs[0], margs[1],
-                                           jl_current_exception()));
+                                           jl_current_exception(ct)));
         }
     }
     ct->world_age = last_age;
@@ -1119,7 +1090,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     jl_expr_t *e = (jl_expr_t*)expr;
     if (e->head == jl_inert_sym ||
         e->head == jl_module_sym ||
-        //e->head == jl_toplevel_sym || // TODO: enable this once julia-expand-macroscope is fixed / removed
+        e->head == jl_toplevel_sym ||
         e->head == jl_meta_sym) {
         return expr;
     }
@@ -1197,7 +1168,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     }
 
     size_t i;
-    for (i = 0; i < jl_array_len(e->args); i++) {
+    for (i = 0; i < jl_array_nrows(e->args); i++) {
         jl_value_t *a = jl_array_ptr_ref(e->args, i);
         jl_value_t *a2 = jl_expand_macros(a, inmodule, macroctx, onelevel, world, throw_load_error);
         if (a != a2)
@@ -1206,120 +1177,131 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     return expr;
 }
 
-JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)
-{
-    JL_TIMING(LOWERING, LOWERING);
-    JL_GC_PUSH1(&expr);
-    expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_atomic_load_acquire(&jl_world_counter), 0);
-    expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule);
-    JL_GC_POP();
-    return expr;
-}
-
-JL_DLLEXPORT jl_value_t *jl_macroexpand1(jl_value_t *expr, jl_module_t *inmodule)
-{
-    JL_TIMING(LOWERING, LOWERING);
-    JL_GC_PUSH1(&expr);
-    expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 1, jl_atomic_load_acquire(&jl_world_counter), 0);
-    expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule);
-    JL_GC_POP();
-    return expr;
-}
-
-// Lower an expression tree into Julia's intermediate-representation.
-JL_DLLEXPORT jl_value_t *jl_expand(jl_value_t *expr, jl_module_t *inmodule)
-{
-    return jl_expand_with_loc(expr, inmodule, "none", 0);
-}
-
-// Lowering, with starting program location specified
-JL_DLLEXPORT jl_value_t *jl_expand_with_loc(jl_value_t *expr, jl_module_t *inmodule,
-                                            const char *file, int line)
-{
-    return jl_expand_in_world(expr, inmodule, file, line, ~(size_t)0);
-}
-
-// Lowering, with starting program location and worldage specified
-JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmodule,
-                                            const char *file, int line, size_t world)
+JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule, int recursive, int inplace, int expand_scope)
 {
     JL_TIMING(LOWERING, LOWERING);
-    jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK);
     JL_GC_PUSH1(&expr);
-    expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0, world, 1);
-    expr = jl_call_scm_on_ast_and_loc("jl-expand-to-thunk", expr, inmodule, file, line);
+    if (!inplace)
+        expr = jl_copy_ast(expr);
+    expr = jl_expand_macros(expr, inmodule, NULL, !recursive, jl_atomic_load_acquire(&jl_world_counter), 0);
+    if (expand_scope)
+        expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule);
     JL_GC_POP();
     return expr;
 }
 
-// Same as the above, but printing warnings when applicable
-JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *inmodule,
-                                                 const char *file, int line)
+// warn: Print any lowering warnings returned; otherwise ignore
+JL_DLLEXPORT jl_value_t *jl_fl_lower(jl_value_t *expr, jl_module_t *inmodule,
+                                     const char *filename, int line, size_t world, bool_t warn)
 {
     JL_TIMING(LOWERING, LOWERING);
-    jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK);
+    jl_timing_show_location(filename, line, inmodule, JL_TIMING_DEFAULT_BLOCK);
     jl_array_t *kwargs = NULL;
-    JL_GC_PUSH2(&expr, &kwargs);
+    JL_GC_PUSH3(&expr, &kwargs, &inmodule);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, world, 1);
     jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule);
     fl_context_t *fl_ctx = &ctx->fl;
     value_t arg = julia_to_scm(fl_ctx, expr);
-    value_t e = fl_applyn(fl_ctx, 4, symbol_value(symbol(fl_ctx, "jl-expand-to-thunk-warn")), arg,
-                          symbol(fl_ctx, file), fixnum(line), fl_ctx->F);
-    expr = scm_to_julia(fl_ctx, e, inmodule);
+    value_t e = fl_applyn(fl_ctx, 3, symbol_value(symbol(fl_ctx, "jl-lower-to-thunk")), arg,
+                          symbol(fl_ctx, filename), fixnum(line));
+    value_t lwr = car_(e);
+    value_t warnings = car_(cdr_(e));
+    expr = scm_to_julia(fl_ctx, lwr, inmodule);
     jl_ast_ctx_leave(ctx);
     jl_sym_t *warn_sym = jl_symbol("warn");
-    if (jl_is_expr(expr) && ((jl_expr_t*)expr)->head == warn_sym) {
-        size_t nargs = jl_expr_nargs(expr);
-        for (int i = 0; i < nargs - 1; i++) {
-            jl_value_t *warning = jl_exprarg(expr, i);
-            size_t nargs = 0;
-            if (jl_is_expr(warning) && ((jl_expr_t*)warning)->head == warn_sym)
-                 nargs = jl_expr_nargs(warning);
-            int kwargs_len = (int)nargs - 6;
-            if (nargs < 6 || kwargs_len % 2 != 0) {
-                jl_error("julia-logmsg: bad argument list - expected "
-                         ":warn level (symbol) group (symbol) id file line msg . kwargs");
-            }
-            jl_value_t *level = jl_exprarg(warning, 0);
-            jl_value_t *group = jl_exprarg(warning, 1);
-            jl_value_t *id = jl_exprarg(warning, 2);
-            jl_value_t *file = jl_exprarg(warning, 3);
-            jl_value_t *line = jl_exprarg(warning, 4);
-            jl_value_t *msg = jl_exprarg(warning, 5);
-            kwargs = jl_alloc_vec_any(kwargs_len);
-            for (int i = 0; i < kwargs_len; ++i) {
-                jl_array_ptr_set(kwargs, i, jl_exprarg(warning, i + 6));
-            }
-            JL_TYPECHK(logmsg, long, level);
-            jl_log(jl_unbox_long(level), NULL, group, id, file, line, (jl_value_t*)kwargs, msg);
+    for (; warn && iscons(warnings); warnings = cdr_(warnings)) {
+        jl_value_t *warning = scm_to_julia(fl_ctx, car_(warnings), inmodule);
+        size_t nargs = 0;
+        if (jl_is_expr(warning) && ((jl_expr_t*)warning)->head == warn_sym)
+            nargs = jl_expr_nargs(warning);
+        int kwargs_len = (int)nargs - 6;
+        if (nargs < 6 || kwargs_len % 2 != 0) {
+            jl_error("julia-logmsg: bad argument list - expected "
+                     ":warn level (symbol) group (symbol) id file line msg . kwargs");
+        }
+        JL_GC_PUSH1(&warning);
+        jl_value_t *level = jl_exprarg(warning, 0);
+        jl_value_t *group = jl_exprarg(warning, 1);
+        jl_value_t *id = jl_exprarg(warning, 2);
+        jl_value_t *file = jl_exprarg(warning, 3);
+        jl_value_t *line = jl_exprarg(warning, 4);
+        jl_value_t *msg = jl_exprarg(warning, 5);
+        kwargs = jl_alloc_vec_any(kwargs_len);
+        for (int i = 0; i < kwargs_len; ++i) {
+            jl_array_ptr_set(kwargs, i, jl_exprarg(warning, i + 6));
         }
-        expr = jl_exprarg(expr, nargs - 1);
+        JL_TYPECHK(logmsg, long, level);
+        jl_log(jl_unbox_long(level), NULL, group, id, file, line, (jl_value_t*)kwargs, msg);
+        JL_GC_POP();
     }
+    jl_value_t *result = (jl_value_t *)jl_svec1(expr);
     JL_GC_POP();
-    return expr;
+    return result;
+}
+
+// Main C entry point to lowering.  Calls jl_fl_lower during bootstrap, and
+// Core._lower otherwise (this is also jl_fl_lower unless we have JuliaLowering)
+JL_DLLEXPORT jl_value_t *jl_lower(jl_value_t *expr, jl_module_t *inmodule,
+                                  const char *filename, int line, size_t world, bool_t warn)
+{
+    jl_value_t *julia_lower = NULL;
+    if (inmodule) {
+        julia_lower = jl_get_global(inmodule, jl_symbol("_internal_julia_lower"));
+    }
+    if ((!julia_lower || julia_lower == jl_nothing) && jl_core_module)
+        julia_lower = jl_get_global_value(jl_core_module, jl_symbol("_lower"), jl_current_task->world_age);
+    if (!julia_lower || julia_lower == jl_nothing) {
+        return jl_fl_lower(expr, inmodule, filename, line, world, warn);
+    }
+    jl_value_t **args;
+    JL_GC_PUSHARGS(args, 7);
+    args[0] = julia_lower;
+    args[1] = expr;
+    args[2] = (jl_value_t*)inmodule;
+    args[3] = jl_cstr_to_string(filename);
+    args[4] = jl_box_ulong(line);
+    args[5] = jl_box_ulong(world);
+    args[6] = warn ? jl_true : jl_false;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    jl_value_t *result = jl_apply(args, 7);
+    ct->world_age = last_age;
+    args[0] = result; // root during error check below
+    JL_TYPECHK(parse, simplevector, result);
+    if (jl_svec_len(result) < 1)
+        jl_error("Result from lowering should be `svec(a::Any, x::Any...)`");
+    JL_GC_POP();
+    return result;
 }
 
-// expand in a context where the expression value is unused
-JL_DLLEXPORT jl_value_t *jl_expand_stmt_with_loc(jl_value_t *expr, jl_module_t *inmodule,
-                                                 const char *file, int line)
+jl_code_info_t *jl_outer_ctor_body(jl_value_t *thistype, size_t nfields, size_t nsparams, jl_module_t *inmodule, const char *file, int line)
 {
     JL_TIMING(LOWERING, LOWERING);
+    jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK);
+    jl_expr_t *expr = jl_exprn(jl_empty_sym, 3);
     JL_GC_PUSH1(&expr);
-    expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
-    expr = jl_call_scm_on_ast_and_loc("jl-expand-to-thunk-stmt", expr, inmodule, file, line);
+    jl_exprargset(expr, 0, thistype);
+    jl_exprargset(expr, 1, jl_box_long(nfields));
+    jl_exprargset(expr, 2, jl_box_long(nsparams));
+    jl_code_info_t *ci = (jl_code_info_t*)jl_call_scm_on_ast_and_loc("jl-default-outer-ctor-body", (jl_value_t*)expr, inmodule, file, line);
     JL_GC_POP();
-    return expr;
+    assert(jl_is_code_info(ci));
+    return ci;
 }
 
-JL_DLLEXPORT jl_value_t *jl_expand_stmt(jl_value_t *expr, jl_module_t *inmodule)
+jl_code_info_t *jl_inner_ctor_body(jl_array_t *fieldkinds, jl_module_t *inmodule, const char *file, int line)
 {
-    return jl_expand_stmt_with_loc(expr, inmodule, "none", 0);
+    JL_TIMING(LOWERING, LOWERING);
+    jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK);
+    jl_expr_t *expr = jl_exprn(jl_empty_sym, 0);
+    JL_GC_PUSH1(&expr);
+    expr->args = fieldkinds;
+    jl_code_info_t *ci = (jl_code_info_t*)jl_call_scm_on_ast_and_loc("jl-default-inner-ctor-body", (jl_value_t*)expr, inmodule, file, line);
+    JL_GC_POP();
+    assert(jl_is_code_info(ci));
+    return ci;
 }
 
 
@@ -1330,20 +1312,23 @@ JL_DLLEXPORT jl_value_t *jl_expand_stmt(jl_value_t *expr, jl_module_t *inmodule)
 // `text` is passed as a pointer to allow raw non-String buffers to be used
 // without copying.
 jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                     size_t lineno, size_t offset, jl_value_t *options)
+                     size_t lineno, size_t offset, jl_value_t *options, jl_module_t *inmodule)
 {
-    jl_value_t *core_parse = NULL;
-    if (jl_core_module) {
-        core_parse = jl_get_global(jl_core_module, jl_symbol("_parse"));
+    jl_value_t *parser = NULL;
+    if (inmodule) {
+        parser = jl_get_global(inmodule, jl_symbol("_internal_julia_parse"));
+    }
+    if ((!parser || parser == jl_nothing) && jl_core_module) {
+        parser = jl_get_global(jl_core_module, jl_symbol("_parse"));
     }
-    if (!core_parse || core_parse == jl_nothing) {
+    if (!parser || parser == jl_nothing) {
         // In bootstrap, directly call the builtin parser.
         jl_value_t *result = jl_fl_parse(text, text_len, filename, lineno, offset, options);
         return result;
     }
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 6);
-    args[0] = core_parse;
+    args[0] = parser;
     args[1] = (jl_value_t*)jl_alloc_svec(2);
     jl_svecset(args[1], 0, jl_box_uint8pointer((uint8_t*)text));
     jl_svecset(args[1], 1, jl_box_long(text_len));
@@ -1372,7 +1357,7 @@ JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len,
 {
     jl_value_t *fname = jl_pchar_to_string(filename, filename_len);
     JL_GC_PUSH1(&fname);
-    jl_value_t *p = jl_parse(text, text_len, fname, lineno, 0, (jl_value_t*)jl_all_sym);
+    jl_value_t *p = jl_parse(text, text_len, fname, lineno, 0, (jl_value_t*)jl_all_sym, NULL);
     JL_GC_POP();
     return jl_svecref(p, 0);
 }
@@ -1385,7 +1370,7 @@ JL_DLLEXPORT jl_value_t *jl_parse_string(const char *text, size_t text_len,
     jl_value_t *fname = jl_cstr_to_string("none");
     JL_GC_PUSH1(&fname);
     jl_value_t *result = jl_parse(text, text_len, fname, 1, offset,
-                                  (jl_value_t*)(greedy ? jl_statement_sym : jl_atom_sym));
+                                  (jl_value_t*)(greedy ? jl_statement_sym : jl_atom_sym), NULL);
     JL_GC_POP();
     return result;
 }
diff --git a/src/ast.scm b/src/ast.scm
index 87db8449b3992..ea538e0aede4e 100644
--- a/src/ast.scm
+++ b/src/ast.scm
@@ -114,7 +114,7 @@
                    (deparse-prefix-call (cadr e) (cddr e) #\( #\)))))
            (($ &)          (if (and (pair? (cadr e))
                                     (not (memq (caadr e)
-                                               '(outerref null true false tuple $ vect braces))))
+                                               '(null true false tuple $ vect braces))))
                                (string (car e) "(" (deparse (cadr e)) ")")
                                (string (car e) (deparse (cadr e)))))
            ((|::|)         (if (length= e 2)
@@ -249,12 +249,11 @@
            ;; misc syntax forms
            ((import using)
             (string (car e) " " (string.join (map deparse-import-path (cdr e)) ", ")))
-           ((global local export) (string (car e) " " (string.join (map deparse (cdr e)) ", ")))
+           ((global local export public) (string (car e) " " (string.join (map deparse (cdr e)) ", ")))
            ((const)        (string "const " (deparse (cadr e))))
            ((top)          (deparse (cadr e)))
            ((core)         (string "Core." (deparse (cadr e))))
            ((globalref)    (string (deparse (cadr e)) "." (deparse-colon-dot (caddr e))))
-           ((outerref)     (string (deparse (cadr e))))
            ((ssavalue)     (string "SSAValue(" (cadr e) ")"))
            ((line)         (if (length= e 2)
                                (string "# line " (cadr e))
@@ -298,7 +297,7 @@
 ;; predicates and accessors
 
 (define (quoted? e)
-  (memq (car e) '(quote top core globalref outerref line break inert meta inbounds inline noinline loopinfo)))
+  (memq (car e) '(quote top core globalref line break inert meta inbounds inline noinline loopinfo)))
 (define (quotify e) `',e)
 (define (unquote e)
   (if (and (pair? e) (memq (car e) '(quote inert)))
@@ -393,9 +392,6 @@
 (define (globalref? e)
   (and (pair? e) (eq? (car e) 'globalref)))
 
-(define (outerref? e)
-  (and (pair? e) (eq? (car e) 'outerref)))
-
 (define (nothing? e)
   (and (pair? e) (eq? (car e) 'null)))
 
@@ -453,7 +449,7 @@
                   (check-dotop (cadr e))))))
   e)
 
-(define (vararg? x) (and (pair? x) (eq? (car x) '...)))
+(define (vararg? x) (and (pair? x) (eq? (car x) '...) (length= x 2)))
 (define (vararg-type-expr? x)
   (or (eq? x 'Vararg)
       (and (length> x 1)
@@ -470,6 +466,7 @@
 (define (make-assignment l r) `(= ,l ,r))
 (define (assignment? e) (and (pair? e) (eq? (car e) '=)))
 (define (return? e) (and (pair? e) (eq? (car e) 'return)))
+(define (thisfunction? e) (and (pair? e) (eq? (car e) 'thisfunction)))
 
 (define (tuple-call? e)
   (and (length> e 1)
@@ -497,6 +494,7 @@
 (define (vinfo:never-undef v) (< 0 (logand (caddr v) 4)))
 (define (vinfo:read v) (< 0 (logand (caddr v) 8)))
 (define (vinfo:sa v) (< 0 (logand (caddr v) 16)))
+(define (vinfo:nospecialize v) (< 0 (logand (caddr v) 128)))
 (define (set-bit x b val) (if val (logior x b) (logand x (lognot b))))
 ;; record whether var is captured
 (define (vinfo:set-capt! v c)  (set-car! (cddr v) (set-bit (caddr v) 1 c)))
@@ -511,6 +509,7 @@
 ;; occurs undef: mask 32
 ;; whether var is called (occurs in function call head position)
 (define (vinfo:set-called! v a)  (set-car! (cddr v) (set-bit (caddr v) 64 a)))
+(define (vinfo:set-nospecialize! v c)  (set-car! (cddr v) (set-bit (caddr v) 128 c)))
 
 (define var-info-for assq)
 
diff --git a/src/builtin_proto.h b/src/builtin_proto.h
index 64e3fbd1af366..ff634149a06f6 100644
--- a/src/builtin_proto.h
+++ b/src/builtin_proto.h
@@ -8,69 +8,93 @@ extern "C" {
 #endif
 
 // declarations for julia-callable builtin functions
+#define JL_BUILTIN_FUNCTIONS(XX) \
+    XX(_abstracttype,"_abstracttype") \
+    XX(_apply_iterate,"_apply_iterate") \
+    XX(_call_in_world_total,"_call_in_world_total") \
+    XX(_compute_sparams,"_compute_sparams") \
+    XX(_defaultctors,"_defaultctors") \
+    XX(_equiv_typedef,"_equiv_typedef") \
+    XX(_expr,"_expr") \
+    XX(_import, "_import") \
+    XX(_primitivetype,"_primitivetype") \
+    XX(_setsuper,"_setsuper!") \
+    XX(_structtype,"_structtype") \
+    XX(_svec_len,"_svec_len") \
+    XX(_svec_ref,"_svec_ref") \
+    XX(_typebody,"_typebody!") \
+    XX(_typevar,"_typevar") \
+    XX(_using, "_using") \
+    XX(applicable,"applicable") \
+    XX(apply_type,"apply_type") \
+    XX(compilerbarrier,"compilerbarrier") \
+    XX(current_scope,"current_scope") \
+    XX(donotdelete,"donotdelete") \
+    XX(fieldtype,"fieldtype") \
+    XX(finalizer,"finalizer") \
+    XX(get_binding_type,"get_binding_type") \
+    XX(getfield,"getfield") \
+    XX(getglobal,"getglobal") \
+    XX(declare_global,"declare_global") \
+    XX(ifelse,"ifelse") \
+    XX(intrinsic_call,"intrinsic_call") \
+    XX(invoke,"invoke") \
+    XX(invoke_in_world,"invoke_in_world") \
+    XX(invokelatest,"invokelatest") \
+    XX(is,"===") \
+    XX(isa,"isa") \
+    XX(isdefined,"isdefined") \
+    XX(isdefinedglobal,"isdefinedglobal") \
+    XX(issubtype,"<:") \
+    XX(memorynew,"memorynew") \
+    XX(memoryrefnew,"memoryrefnew") \
+    XX(memoryref_isassigned,"memoryref_isassigned") \
+    XX(memoryrefget,"memoryrefget") \
+    XX(memoryrefmodify,"memoryrefmodify!") \
+    XX(memoryrefoffset,"memoryrefoffset") \
+    XX(memoryrefreplace,"memoryrefreplace!") \
+    XX(memoryrefset,"memoryrefset!") \
+    XX(memoryrefsetonce,"memoryrefsetonce!") \
+    XX(memoryrefswap,"memoryrefswap!") \
+    XX(modifyfield,"modifyfield!") \
+    XX(modifyglobal,"modifyglobal!") \
+    XX(nfields,"nfields") \
+    XX(opaque_closure_call,"opaque_closure_call") \
+    XX(replacefield,"replacefield!") \
+    XX(replaceglobal,"replaceglobal!") \
+    XX(declare_const,"declare_const") \
+    XX(setfield,"setfield!") \
+    XX(setfieldonce,"setfieldonce!") \
+    XX(setglobal,"setglobal!") \
+    XX(setglobalonce,"setglobalonce!") \
+    XX(sizeof,"sizeof") \
+    XX(svec,"svec") \
+    XX(swapfield,"swapfield!") \
+    XX(swapglobal,"swapglobal!") \
+    XX(throw,"throw") \
+    XX(throw_methoderror,"throw_methoderror") \
+    XX(tuple,"tuple") \
+    XX(typeassert,"typeassert") \
+    XX(typeof,"typeof") \
 
-#ifdef DEFINE_BUILTIN_GLOBALS
-#define DECLARE_BUILTIN(name) \
-    JL_CALLABLE(jl_f_##name); \
-    JL_DLLEXPORT jl_value_t *jl_builtin_##name; \
-    JL_DLLEXPORT jl_fptr_args_t jl_f_##name##_addr = &jl_f_##name
-#else
-#define DECLARE_BUILTIN(name) \
-    JL_CALLABLE(jl_f_##name); \
-    JL_DLLEXPORT extern jl_value_t *jl_builtin_##name; \
-    JL_DLLEXPORT extern jl_fptr_args_t jl_f_##name##_addr
-#endif
+#define DECLARE_BUILTIN(cname,jlname) \
+    JL_CALLABLE(jl_f_##cname);
+JL_BUILTIN_FUNCTIONS(DECLARE_BUILTIN)
+#undef DECLARE_BUILTIN
+
+#define BUILTIN(cname) (jl_builtin_instances[jl_builtin_id_##cname])
+
+enum jl_builtin_ids {
+#define BUILTIN_IDS(cname,jlname) jl_builtin_id_##cname,
+JL_BUILTIN_FUNCTIONS(BUILTIN_IDS)
+#undef BUILTIN_IDS
+    jl_n_builtins
+};
 
-DECLARE_BUILTIN(applicable);
-DECLARE_BUILTIN(_apply_iterate);
-DECLARE_BUILTIN(_apply_pure);
-DECLARE_BUILTIN(apply_type);
-DECLARE_BUILTIN(arrayref);
-DECLARE_BUILTIN(arrayset);
-DECLARE_BUILTIN(arraysize);
-DECLARE_BUILTIN(_call_in_world);
-DECLARE_BUILTIN(_call_in_world_total);
-DECLARE_BUILTIN(_call_latest);
-DECLARE_BUILTIN(replacefield);
-DECLARE_BUILTIN(const_arrayref);
-DECLARE_BUILTIN(_expr);
-DECLARE_BUILTIN(fieldtype);
-DECLARE_BUILTIN(getfield);
-DECLARE_BUILTIN(ifelse);
-DECLARE_BUILTIN(invoke);
-DECLARE_BUILTIN(is);
-DECLARE_BUILTIN(isa);
-DECLARE_BUILTIN(isdefined);
-DECLARE_BUILTIN(issubtype);
-DECLARE_BUILTIN(modifyfield);
-DECLARE_BUILTIN(nfields);
-DECLARE_BUILTIN(setfield);
-DECLARE_BUILTIN(sizeof);
-DECLARE_BUILTIN(svec);
-DECLARE_BUILTIN(swapfield);
-DECLARE_BUILTIN(throw);
-DECLARE_BUILTIN(tuple);
-DECLARE_BUILTIN(typeassert);
-DECLARE_BUILTIN(_typebody);
-DECLARE_BUILTIN(typeof);
-DECLARE_BUILTIN(_typevar);
-DECLARE_BUILTIN(donotdelete);
-DECLARE_BUILTIN(compilerbarrier);
-DECLARE_BUILTIN(getglobal);
-DECLARE_BUILTIN(setglobal);
-DECLARE_BUILTIN(finalizer);
-DECLARE_BUILTIN(_compute_sparams);
-DECLARE_BUILTIN(_svec_ref);
+JL_DLLEXPORT extern jl_fptr_args_t const jl_builtin_f_addrs[];
+JL_DLLEXPORT extern const char *const jl_builtin_f_names[];
+JL_DLLEXPORT extern jl_value_t *jl_builtin_instances[];
 
-JL_CALLABLE(jl_f__structtype);
-JL_CALLABLE(jl_f__abstracttype);
-JL_CALLABLE(jl_f__primitivetype);
-JL_CALLABLE(jl_f__setsuper);
-JL_CALLABLE(jl_f__equiv_typedef);
-JL_CALLABLE(jl_f_get_binding_type);
-JL_CALLABLE(jl_f_set_binding_type);
-JL_CALLABLE(jl_f__compute_sparams);
-JL_CALLABLE(jl_f__svec_ref);
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/builtins.c b/src/builtins.c
index b664b8d73710f..7b29580c76086 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -3,6 +3,8 @@
 /*
   implementations of built-in functions
 */
+#include "dtypes.h"
+#include "julia_atomics.h"
 #include "platform.h"
 
 #include <stdlib.h>
@@ -30,6 +32,27 @@
 extern "C" {
 #endif
 
+jl_fptr_args_t const jl_builtin_f_addrs[jl_n_builtins] = {
+#define BUILTIN_ADDRS(cname,jlname) &jl_f_##cname,
+JL_BUILTIN_FUNCTIONS(BUILTIN_ADDRS)
+#undef BUILTIN_ADDRS
+};
+
+const char *const jl_builtin_f_names[jl_n_builtins] = {
+#define BUILTIN_F_NAMES(cname,jlname) XSTR(jl_f_##cname),
+JL_BUILTIN_FUNCTIONS(BUILTIN_F_NAMES)
+#undef BUILTIN_F_NAMES
+};
+
+jl_value_t *jl_builtin_instances[jl_n_builtins];
+
+static const char *const jl_builtin_names[jl_n_builtins] = {
+#define BUILTIN_NAMES(cname,jlname) jlname,
+JL_BUILTIN_FUNCTIONS(BUILTIN_NAMES)
+#undef BUILTIN_NAMES
+};
+
+
 // egal and object_id ---------------------------------------------------------
 
 static int bits_equal(const void *a, const void *b, int sz) JL_NOTSAFEPOINT
@@ -115,7 +138,7 @@ static int NOINLINE compare_fields(const jl_value_t *a, const jl_value_t *b, jl_
                     continue; // skip this field (it is #undef)
                 }
             }
-            if (!ft->layout->haspadding) {
+            if (!ft->layout->flags.haspadding && ft->layout->flags.isbitsegal) {
                 if (!bits_equal(ao, bo, ft->layout->size))
                     return 0;
             }
@@ -222,9 +245,10 @@ JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const j
 JL_DLLEXPORT int jl_egal__bitstag(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT
 {
     if (dtag < jl_max_tags << 4) {
-        switch ((enum jlsmall_typeof_tags)(dtag >> 4)) {
+        switch ((enum jl_small_typeof_tags)(dtag >> 4)) {
         case jl_int8_tag:
         case jl_uint8_tag:
+        case jl_addrspacecore_tag:
             return *(uint8_t*)a == *(uint8_t*)b;
         case jl_int16_tag:
         case jl_uint16_tag:
@@ -232,10 +256,14 @@ JL_DLLEXPORT int jl_egal__bitstag(const jl_value_t *a JL_MAYBE_UNROOTED, const j
         case jl_int32_tag:
         case jl_uint32_tag:
         case jl_char_tag:
+        case jl_intrinsic_tag:
             return *(uint32_t*)a == *(uint32_t*)b;
         case jl_int64_tag:
         case jl_uint64_tag:
             return *(uint64_t*)a == *(uint64_t*)b;
+        case jl_ssavalue_tag:
+        case jl_slotnumber_tag:
+            return *(size_t*)a == *(size_t*)b;
         case jl_unionall_tag:
             return egal_types(a, b, NULL, 1);
         case jl_uniontype_tag:
@@ -247,6 +275,7 @@ JL_DLLEXPORT int jl_egal__bitstag(const jl_value_t *a JL_MAYBE_UNROOTED, const j
         case jl_symbol_tag:
         case jl_module_tag:
         case jl_bool_tag:
+        case jl_nothing_tag:
             return 0;
         case jl_simplevector_tag:
             return compare_svec((jl_svec_t*)a, (jl_svec_t*)b);
@@ -284,7 +313,7 @@ inline int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t
     if (sz == 0)
         return 1;
     size_t nf = jl_datatype_nfields(dt);
-    if (nf == 0 || !dt->layout->haspadding)
+    if (nf == 0 || (!dt->layout->flags.haspadding && dt->layout->flags.isbitsegal))
         return bits_equal(a, b, sz);
     return compare_fields(a, b, dt);
 }
@@ -344,6 +373,9 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN
             i++;
             pe = pe->prev;
         }
+        uintptr_t bits = jl_astaggedvalue(v)->header;
+        if (bits & GC_IN_IMAGE)
+            return ((uintptr_t*)v)[-2];
         return inthash((uintptr_t)v);
     }
     if (tv == jl_uniontype_type) {
@@ -391,7 +423,7 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
     if (sz == 0)
         return ~h;
     size_t f, nf = jl_datatype_nfields(dt);
-    if (nf == 0 || (!dt->layout->haspadding && dt->layout->npointers == 0)) {
+    if (nf == 0 || (!dt->layout->flags.haspadding && dt->layout->flags.isbitsegal && dt->layout->npointers == 0)) {
         // operate element-wise if there are unused bits inside,
         // otherwise just take the whole data block at once
         // a few select pointers (notably symbol) also have special hash values
@@ -432,55 +464,62 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
     return h;
 }
 
-static uintptr_t NOINLINE jl_object_id__cold(jl_datatype_t *dt, jl_value_t *v) JL_NOTSAFEPOINT
+static uintptr_t NOINLINE jl_object_id__cold(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    if (dt == jl_simplevector_type)
-        return hash_svec((jl_svec_t*)v);
-    if (dt == jl_datatype_type) {
-        jl_datatype_t *dtv = (jl_datatype_t*)v;
-        uintptr_t h = ~dtv->name->hash;
-        return bitmix(h, hash_svec(dtv->parameters));
-    }
-    if (dt == jl_string_type) {
+    jl_datatype_t *dt = (jl_datatype_t*)jl_to_typeof(tv);
+    if (dt->name->mutabl) {
+        if (dt == jl_string_type) {
 #ifdef _P64
-        return memhash_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
+            return memhash_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
 #else
-        return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
+            return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
 #endif
-    }
-    if (dt == jl_module_type) {
-        jl_module_t *m = (jl_module_t*)v;
-        return m->hash;
-    }
-    if (dt->name->mutabl)
+        }
+        if (dt == jl_simplevector_type)
+            return hash_svec((jl_svec_t*)v);
+        if (dt == jl_datatype_type) {
+            jl_datatype_t *dtv = (jl_datatype_t*)v;
+            uintptr_t h = ~dtv->name->hash;
+            return bitmix(h, hash_svec(dtv->parameters));
+        }
+        if (dt == jl_module_type) {
+            jl_module_t *m = (jl_module_t*)v;
+            return m->hash;
+        }
+        uintptr_t bits = jl_astaggedvalue(v)->header;
+        if (bits & GC_IN_IMAGE)
+            return ((uintptr_t*)v)[-2];
         return inthash((uintptr_t)v);
+    }
     return immut_id_(dt, v, dt->hash);
 }
 
-JL_DLLEXPORT inline uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT
+JL_DLLEXPORT inline uintptr_t jl_object_id_(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    jl_datatype_t *dt = (jl_datatype_t*)tv;
-    if (dt == jl_symbol_type)
+    if (tv == jl_symbol_tag << 4) {
         return ((jl_sym_t*)v)->hash;
-    if (dt == jl_typename_type)
-        return ((jl_typename_t*)v)->hash;
-    if (dt == jl_datatype_type) {
+    }
+    else if (tv == jl_datatype_tag << 4) {
         jl_datatype_t *dtv = (jl_datatype_t*)v;
         if (dtv->isconcretetype)
             return dtv->hash;
     }
-    return jl_object_id__cold(dt, v);
+    else if (tv == (uintptr_t)jl_typename_type) {
+        return ((jl_typename_t*)v)->hash;
+    }
+    return jl_object_id__cold(tv, v);
 }
 
 
 JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT
 {
-    return jl_object_id_(jl_typeof(v), v);
+    return jl_object_id_(jl_typetagof(v), v);
 }
 
 // eq hash table --------------------------------------------------------------
 
 #include "iddict.c"
+#include "idset.c"
 
 // object model and type primitives -------------------------------------------
 
@@ -511,21 +550,18 @@ JL_CALLABLE(jl_f_sizeof)
     }
     if (jl_is_datatype(x)) {
         jl_datatype_t *dx = (jl_datatype_t*)x;
-        if (dx->layout == NULL) {
+        if (!jl_struct_try_layout(dx)) {
             if (dx->name->abstract)
                 jl_errorf("Abstract type %s does not have a definite size.", jl_symbol_name(dx->name->name));
             else
                 jl_errorf("Argument is an incomplete %s type and does not have a definite size.", jl_symbol_name(dx->name->name));
         }
-        if (jl_is_layout_opaque(dx->layout))
+        if (jl_is_layout_opaque(dx->layout)) // includes all GenericMemory{kind,T}
             jl_errorf("Type %s does not have a definite size.", jl_symbol_name(dx->name->name));
         return jl_box_long(jl_datatype_size(x));
     }
     if (x == jl_bottom_type)
         jl_error("The empty type does not have a definite size since it does not have instances.");
-    if (jl_is_array(x)) {
-        return jl_box_long(jl_array_len(x) * ((jl_array_t*)x)->elsize);
-    }
     if (jl_is_string(x))
         return jl_box_long(jl_string_len(x));
     if (jl_is_symbol(x))
@@ -535,7 +571,10 @@ JL_CALLABLE(jl_f_sizeof)
     jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(x);
     assert(jl_is_datatype(dt));
     assert(!dt->name->abstract);
-    return jl_box_long(jl_datatype_size(dt));
+    size_t sz = dt->layout->size;
+    if (jl_is_genericmemory(x))
+        sz = (sz + (dt->layout->flags.arrayelem_isunion ? 1 : 0)) * ((jl_genericmemory_t*)x)->length;
+    return jl_box_long(sz);
 }
 
 JL_CALLABLE(jl_f_issubtype)
@@ -570,6 +609,14 @@ JL_CALLABLE(jl_f_throw)
     return jl_nothing;
 }
 
+JL_CALLABLE(jl_f_throw_methoderror)
+{
+    JL_NARGSV(throw_methoderror, 1);
+    size_t world = jl_get_tls_world_age();
+    jl_method_error(args[0], &args[1], nargs, world);
+    return jl_nothing;
+}
+
 JL_CALLABLE(jl_f_ifelse)
 {
     JL_NARGS(ifelse, 3, 3);
@@ -577,6 +624,12 @@ JL_CALLABLE(jl_f_ifelse)
     return (args[0] == jl_false ? args[2] : args[1]);
 }
 
+JL_CALLABLE(jl_f_current_scope)
+{
+    JL_NARGS(current_scope, 0, 0);
+    return jl_current_task->scope;
+}
+
 // apply ----------------------------------------------------------------------
 
 static NOINLINE jl_svec_t *_copy_to(size_t newalloc, jl_value_t **oldargs, size_t oldalloc)
@@ -607,14 +660,36 @@ STATIC_INLINE void _grow_to(jl_value_t **root, jl_value_t ***oldargs, jl_svec_t
     *n_alloc = newalloc;
 }
 
-static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *iterate)
+
+static jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
+{
+    return jl_memoryrefget(jl_memoryrefindex(a->ref, i), 0);
+}
+
+JL_CALLABLE(jl_f__apply_iterate)
 {
-    jl_function_t *f = args[0];
+    JL_NARGSV(_apply_iterate, 2);
+    jl_value_t *iterate = args[0];
+    jl_value_t *f = args[1];
+    assert(iterate);
+    args += 1;
+    nargs -= 1;
     if (nargs == 2) {
         // some common simple cases
-        if (f == jl_builtin_svec) {
+        if (f == BUILTIN(svec)) {
             if (jl_is_svec(args[1]))
                 return args[1];
+            if (jl_is_genericmemory(args[1])) {
+                jl_genericmemory_t *mem = (jl_genericmemory_t*)args[1];
+                size_t n = mem->length;
+                jl_svec_t *t = jl_alloc_svec(n);
+                JL_GC_PUSH1(&t);
+                for (size_t i = 0; i < n; i++) {
+                    jl_svecset(t, i, jl_genericmemoryref(mem, i));
+                }
+                JL_GC_POP();
+                return (jl_value_t*)t;
+            }
             if (jl_is_array(args[1])) {
                 size_t n = jl_array_len(args[1]);
                 jl_svec_t *t = jl_alloc_svec(n);
@@ -626,9 +701,15 @@ static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *itera
                 return (jl_value_t*)t;
             }
         }
-        else if (f == jl_builtin_tuple && jl_is_tuple(args[1])) {
-            return args[1];
+        else if (f == BUILTIN(tuple)) {
+            if (jl_is_tuple(args[1]))
+                return args[1];
+            if (jl_is_svec(args[1]))
+                return jl_f_tuple(NULL, jl_svec_data(args[1]), jl_svec_len(args[1]));
         }
+        // optimization for `f(svec...)`
+        if (jl_is_svec(args[1]))
+            return jl_apply_generic(f, jl_svec_data(args[1]), jl_svec_len(args[1]));
     }
     // estimate how many real arguments we appear to have
     size_t precount = 1;
@@ -641,6 +722,9 @@ static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *itera
         else if (jl_is_tuple(args[i]) || jl_is_namedtuple(args[i])) {
             precount += jl_nfields(args[i]);
         }
+        else if (jl_is_genericmemory(args[i])) {
+            precount += ((jl_genericmemory_t*)args[i])->length;
+        }
         else if (jl_is_array(args[i])) {
             precount += jl_array_len(args[i]);
         }
@@ -648,9 +732,6 @@ static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *itera
             extra += 1;
         }
     }
-    if (extra && iterate == NULL) {
-        jl_undefined_var_error(jl_symbol("iterate"));
-    }
     // allocate space for the argument array and gc roots for it
     // based on our previous estimates
     // use the stack if we have a good estimate that it is small
@@ -709,13 +790,40 @@ static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *itera
                     jl_gc_wb(arg_heap, newargs[n - 1]);
             }
         }
+        else if (jl_is_genericmemory(ai)) {
+            jl_genericmemory_t *mem = (jl_genericmemory_t*)ai;
+            size_t j, al = mem->length;
+            precount = (precount > al) ? precount - al : 0;
+            _grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + al, extra);
+            assert(newargs != NULL); // inform GCChecker that we didn't write a NULL here
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(mem))->layout;
+            if (layout->flags.arrayelem_isboxed) {
+                for (j = 0; j < al; j++) {
+                    jl_value_t *arg = jl_genericmemory_ptr_ref(mem, j);
+                    // apply with array splatting may have embedded NULL value (#11772)
+                    if (__unlikely(arg == NULL))
+                        jl_throw(jl_undefref_exception);
+                    newargs[n++] = arg;
+                    if (arg_heap)
+                        jl_gc_wb(arg_heap, arg);
+                }
+            }
+            else {
+                for (j = 0; j < al; j++) {
+                    newargs[n++] = jl_genericmemoryref(mem, j);
+                    if (arg_heap)
+                        jl_gc_wb(arg_heap, newargs[n - 1]);
+                }
+            }
+        }
         else if (jl_is_array(ai)) {
             jl_array_t *aai = (jl_array_t*)ai;
             size_t j, al = jl_array_len(aai);
             precount = (precount > al) ? precount - al : 0;
             _grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + al, extra);
             assert(newargs != NULL); // inform GCChecker that we didn't write a NULL here
-            if (aai->flags.ptrarray) {
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(aai->ref.mem))->layout;
+            if (layout->flags.arrayelem_isboxed) {
                 for (j = 0; j < al; j++) {
                     jl_value_t *arg = jl_array_ptr_ref(aai, j);
                     // apply with array splatting may have embedded NULL value (#11772)
@@ -770,41 +878,10 @@ static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *itera
     return result;
 }
 
-JL_CALLABLE(jl_f__apply_iterate)
-{
-    JL_NARGSV(_apply_iterate, 2);
-    return do_apply(args + 1, nargs - 1, args[0]);
-}
-
-// this is like `_apply`, but with quasi-exact checks to make sure it is pure
-JL_CALLABLE(jl_f__apply_pure)
-{
-    jl_task_t *ct = jl_current_task;
-    int last_in = ct->ptls->in_pure_callback;
-    jl_value_t *ret = NULL;
-    JL_TRY {
-        ct->ptls->in_pure_callback = 1;
-        // because this function was declared pure,
-        // we should be allowed to run it in any world
-        // so we run it in the newest world;
-        // because, why not :)
-        // and `promote` works better this way
-        size_t last_age = ct->world_age;
-        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        ret = do_apply(args, nargs, NULL);
-        ct->world_age = last_age;
-        ct->ptls->in_pure_callback = last_in;
-    }
-    JL_CATCH {
-        ct->ptls->in_pure_callback = last_in;
-        jl_rethrow();
-    }
-    return ret;
-}
-
 // this is like a regular call, but always runs in the newest world
-JL_CALLABLE(jl_f__call_latest)
+JL_CALLABLE(jl_f_invokelatest)
 {
+    JL_NARGSV(invokelatest, 1);
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
     if (!ct->ptls->in_pure_callback)
@@ -814,14 +891,14 @@ JL_CALLABLE(jl_f__call_latest)
     return ret;
 }
 
-// Like call_in_world, but runs in the specified world.
+// Like invokelatest, but runs in the specified world.
 // If world > jl_atomic_load_acquire(&jl_world_counter), run in the latest world.
-JL_CALLABLE(jl_f__call_in_world)
+JL_CALLABLE(jl_f_invoke_in_world)
 {
-    JL_NARGSV(_apply_in_world, 2);
+    JL_NARGSV(invoke_in_world, 2);
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
-    JL_TYPECHK(_apply_in_world, ulong, args[0]);
+    JL_TYPECHK(invoke_in_world, ulong, args[0]);
     size_t world = jl_unbox_ulong(args[0]);
     if (!ct->ptls->in_pure_callback) {
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
@@ -836,7 +913,7 @@ JL_CALLABLE(jl_f__call_in_world)
 JL_CALLABLE(jl_f__call_in_world_total)
 {
     JL_NARGSV(_call_in_world_total, 2);
-    JL_TYPECHK(_apply_in_world, ulong, args[0]);
+    JL_TYPECHK(_call_in_world_total, ulong, args[0]);
     jl_task_t *ct = jl_current_task;
     int last_in = ct->ptls->in_pure_callback;
     jl_value_t *ret = NULL;
@@ -860,22 +937,27 @@ JL_CALLABLE(jl_f__call_in_world_total)
 
 // tuples ---------------------------------------------------------------------
 
-JL_CALLABLE(jl_f_tuple)
+static jl_value_t *arg_tuple(jl_value_t *a1, jl_value_t **args, size_t nargs)
 {
     size_t i;
-    if (nargs == 0)
-        return (jl_value_t*)jl_emptytuple;
-    jl_datatype_t *tt = jl_inst_arg_tuple_type(args[0], &args[1], nargs, 0);
+    jl_datatype_t *tt = jl_inst_arg_tuple_type(a1, args, nargs, 0);
     JL_GC_PROMISE_ROOTED(tt); // it is a concrete type
     if (tt->instance != NULL)
         return tt->instance;
     jl_task_t *ct = jl_current_task;
     jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(tt), tt);
     for (i = 0; i < nargs; i++)
-        set_nth_field(tt, jv, i, args[i], 0);
+        set_nth_field(tt, jv, i, i == 0 ? a1 : args[i - 1], 0);
     return jv;
 }
 
+JL_CALLABLE(jl_f_tuple)
+{
+    if (nargs == 0)
+        return (jl_value_t*)jl_emptytuple;
+    return arg_tuple(args[0], &args[1], nargs);
+}
+
 JL_CALLABLE(jl_f_svec)
 {
     size_t i;
@@ -937,11 +1019,11 @@ static inline size_t get_checked_fieldindex(const char *name, jl_datatype_t *st,
     else {
         jl_value_t *ts[2] = {(jl_value_t*)jl_long_type, (jl_value_t*)jl_symbol_type};
         jl_value_t *t = jl_type_union(ts, 2);
-        jl_type_error("getfield", t, arg);
+        jl_type_error(name, t, arg);
     }
     if (mutabl && jl_field_isconst(st, idx)) {
         jl_errorf("%s: const field .%s of type %s cannot be changed", name,
-                jl_symbol_name((jl_sym_t*)jl_svec_ref(jl_field_names(st), idx)), jl_symbol_name(st->name->name));
+                jl_symbol_name((jl_sym_t*)jl_svecref(jl_field_names(st), idx)), jl_symbol_name(st->name->name));
     }
     return idx;
 }
@@ -972,9 +1054,11 @@ JL_CALLABLE(jl_f_getfield)
         jl_atomic_error("getfield: non-atomic field cannot be accessed atomically");
     if (isatomic && order == jl_memory_order_notatomic)
         jl_atomic_error("getfield: atomic field cannot be accessed non-atomically");
-    v = jl_get_nth_field_checked(v, idx);
-    if (order >= jl_memory_order_acq_rel || order == jl_memory_order_acquire)
-        jl_fence(); // `v` already had at least consume ordering
+    if (order >= jl_memory_order_seq_cst)
+        jl_fence();
+    v = jl_get_nth_field_checked(v, idx); // `v` already had at least consume ordering
+    if (order >= jl_memory_order_acquire)
+        jl_fence();
     return v;
 }
 
@@ -996,7 +1080,7 @@ JL_CALLABLE(jl_f_setfield)
     jl_value_t *ft = jl_field_type_concrete(st, idx);
     if (!jl_isa(args[2], ft))
         jl_type_error("setfield!", ft, args[2]);
-    if (order >= jl_memory_order_acq_rel || order == jl_memory_order_release)
+    if (order >= jl_memory_order_release)
         jl_fence(); // `st->[idx]` will have at least relaxed ordering
     set_nth_field(st, v, idx, args[2], isatomic);
     return args[2];
@@ -1070,6 +1154,35 @@ JL_CALLABLE(jl_f_replacefield)
     return v;
 }
 
+JL_CALLABLE(jl_f_setfieldonce)
+{
+    enum jl_memory_order success_order = jl_memory_order_notatomic;
+    JL_NARGS(setfieldonce!, 3, 5);
+    if (nargs >= 4) {
+        JL_TYPECHK(setfieldonce!, symbol, args[3]);
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+    }
+    enum jl_memory_order failure_order = success_order;
+    if (nargs == 5) {
+        JL_TYPECHK(setfieldonce!, symbol, args[4]);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 0);
+    }
+    if (failure_order > success_order)
+        jl_atomic_error("invalid atomic ordering");
+    // TODO: filter more invalid ordering combinations?
+    jl_value_t *v = args[0];
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t idx = get_checked_fieldindex("setfieldonce!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (success_order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "setfieldonce!: atomic field cannot be written non-atomically"
+                                 : "setfieldonce!: non-atomic field cannot be written atomically");
+    if (isatomic == (failure_order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "setfieldonce!: atomic field cannot be accessed non-atomically"
+                                 : "setfieldonce!: non-atomic field cannot be accessed atomically");
+    int success = set_nth_fieldonce(st, v, idx, args[2], isatomic); // always seq_cst, if isatomic needed at all
+    return success ? jl_true : jl_false;
+}
 
 static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
 {
@@ -1125,6 +1238,8 @@ static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
             tt = ((jl_tvar_t*)tt)->ub;
         if (tt == (jl_value_t*)jl_any_type)
             return (jl_value_t*)jl_any_type;
+        if (tt == (jl_value_t*)jl_bottom_type)
+            return (jl_value_t*)jl_bottom_type;
         JL_GC_PUSH1(&f);
         if (jl_is_symbol(f))
             f = jl_box_long(field_index+1);
@@ -1178,7 +1293,12 @@ JL_CALLABLE(jl_f_isdefined)
         JL_TYPECHK(isdefined, symbol, args[1]);
         m = (jl_module_t*)args[0];
         s = (jl_sym_t*)args[1];
-        return jl_boundp(m, s) ? jl_true : jl_false; // is seq_cst already
+        if (order == jl_memory_order_unspecified)
+            order = jl_memory_order_unordered;
+        if (order < jl_memory_order_unordered)
+            jl_atomic_error("isdefined: module binding cannot be accessed non-atomically");
+        int bound = jl_boundp(m, s, 1); // seq_cst always
+        return bound ? jl_true : jl_false;
     }
     jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(args[0]);
     assert(jl_is_datatype(vt));
@@ -1205,15 +1325,11 @@ JL_CALLABLE(jl_f_isdefined)
         jl_atomic_error("isdefined: non-atomic field cannot be accessed atomically");
     if (isatomic && order == jl_memory_order_notatomic)
         jl_atomic_error("isdefined: atomic field cannot be accessed non-atomically");
-    int v = jl_field_isdefined(args[0], idx);
-    if (v == 2) {
-        if (order > jl_memory_order_notatomic)
-            jl_fence(); // isbits case has no ordering already
-    }
-    else {
-        if (order >= jl_memory_order_acq_rel || order == jl_memory_order_acquire)
-            jl_fence(); // `v` already gave at least consume ordering
-    }
+    if (order >= jl_memory_order_seq_cst)
+        jl_fence();
+    int v = jl_field_isdefined(args[0], idx); // relaxed ordering
+    if (order >= jl_memory_order_acquire)
+        jl_fence();
     return v ? jl_true : jl_false;
 }
 
@@ -1234,11 +1350,41 @@ JL_CALLABLE(jl_f_getglobal)
     JL_TYPECHK(getglobal, symbol, (jl_value_t*)sym);
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("getglobal: module binding cannot be read non-atomically");
-    jl_value_t *v = jl_eval_global_var(mod, sym);
-    // is seq_cst already, no fence needed
+    else if (order >= jl_memory_order_seq_cst)
+        jl_fence();
+    jl_value_t *v = jl_eval_global_var(mod, sym, jl_current_task->world_age); // relaxed load
+    if (order >= jl_memory_order_acquire)
+        jl_fence();
     return v;
 }
 
+JL_CALLABLE(jl_f_isdefinedglobal)
+{
+    jl_module_t *m = NULL;
+    jl_sym_t *s = NULL;
+    JL_NARGS(isdefined, 2, 3);
+    int allow_import = 1;
+    enum jl_memory_order order = jl_memory_order_unspecified;
+    JL_TYPECHK(isdefined, module, args[0]);
+    JL_TYPECHK(isdefined, symbol, args[1]);
+    if (nargs == 3) {
+        JL_TYPECHK(isdefined, bool, args[2]);
+        allow_import = jl_unbox_bool(args[2]);
+    }
+    if (nargs == 4) {
+        JL_TYPECHK(isdefined, symbol, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 0);
+    }
+    m = (jl_module_t*)args[0];
+    s = (jl_sym_t*)args[1];
+    if (order == jl_memory_order_unspecified)
+        order = jl_memory_order_unordered;
+    if (order < jl_memory_order_unordered)
+        jl_atomic_error("isdefined: module binding cannot be accessed non-atomically");
+    int bound = jl_boundp(m, s, allow_import); // seq_cst always
+    return bound ? jl_true : jl_false;
+}
+
 JL_CALLABLE(jl_f_setglobal)
 {
     enum jl_memory_order order = jl_memory_order_release;
@@ -1253,9 +1399,12 @@ JL_CALLABLE(jl_f_setglobal)
     JL_TYPECHK(setglobal!, symbol, (jl_value_t*)var);
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("setglobal!: module binding cannot be written non-atomically");
-    // is seq_cst already, no fence needed
+    else if (order >= jl_memory_order_seq_cst)
+        jl_fence();
     jl_binding_t *b = jl_get_binding_wr(mod, var);
-    jl_checked_assignment(b, mod, var, args[2]);
+    jl_checked_assignment(b, mod, var, args[2]); // release store
+    if (order >= jl_memory_order_seq_cst)
+        jl_fence();
     return args[2];
 }
 
@@ -1266,42 +1415,181 @@ JL_CALLABLE(jl_f_get_binding_type)
     jl_sym_t *var = (jl_sym_t*)args[1];
     JL_TYPECHK(get_binding_type, module, (jl_value_t*)mod);
     JL_TYPECHK(get_binding_type, symbol, (jl_value_t*)var);
-    jl_value_t *ty = jl_get_binding_type(mod, var);
-    if (ty == (jl_value_t*)jl_nothing) {
-        jl_binding_t *b = jl_get_module_binding(mod, var, 0);
-        if (b == NULL)
-            return (jl_value_t*)jl_any_type;
-        jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-        if (b2 != b)
-            return (jl_value_t*)jl_any_type;
-        jl_value_t *old_ty = NULL;
-        jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
-        return jl_atomic_load_relaxed(&b->ty);
-    }
-    return ty;
-}
-
-JL_CALLABLE(jl_f_set_binding_type)
-{
-    JL_NARGS(set_binding_type!, 2, 3);
-    jl_module_t *m = (jl_module_t*)args[0];
-    jl_sym_t *s = (jl_sym_t*)args[1];
-    JL_TYPECHK(set_binding_type!, module, (jl_value_t*)m);
-    JL_TYPECHK(set_binding_type!, symbol, (jl_value_t*)s);
-    jl_value_t *ty = nargs == 2 ? (jl_value_t*)jl_any_type : args[2];
-    JL_TYPECHK(set_binding_type!, type, ty);
-    jl_binding_t *b = jl_get_binding_wr(m, s);
-    jl_value_t *old_ty = NULL;
-    if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, ty) && ty != old_ty) {
-        if (nargs == 2)
-            return jl_nothing;
-        jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.",
-                  jl_symbol_name(m->name), jl_symbol_name(s));
-    }
-    jl_gc_wb_binding(b, ty);
+    jl_value_t *ret = jl_get_binding_type(mod, var);
+    if (ret == jl_nothing)
+        return (jl_value_t*)jl_any_type;
+    return ret;
+}
+
+JL_CALLABLE(jl_f_swapglobal)
+{
+    enum jl_memory_order order = jl_memory_order_release;
+    JL_NARGS(swapglobal!, 3, 4);
+    if (nargs == 4) {
+        JL_TYPECHK(swapglobal!, symbol, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+    }
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(swapglobal!, module, (jl_value_t*)mod);
+    JL_TYPECHK(swapglobal!, symbol, (jl_value_t*)var);
+    if (order == jl_memory_order_notatomic)
+        jl_atomic_error("swapglobal!: module binding cannot be written non-atomically");
+    // is seq_cst already, no fence needed
+    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    return jl_checked_swap(b, mod, var, args[2]);
+}
+
+JL_CALLABLE(jl_f_modifyglobal)
+{
+    enum jl_memory_order order = jl_memory_order_release;
+    JL_NARGS(modifyglobal!, 4, 5);
+    if (nargs == 5) {
+        JL_TYPECHK(modifyglobal!, symbol, args[4]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 1);
+    }
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(modifyglobal!, module, (jl_value_t*)mod);
+    JL_TYPECHK(modifyglobal!, symbol, (jl_value_t*)var);
+    if (order == jl_memory_order_notatomic)
+        jl_atomic_error("modifyglobal!: module binding cannot be written non-atomically");
+    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    // is seq_cst already, no fence needed
+    return jl_checked_modify(b, mod, var, args[2], args[3]);
+}
+
+JL_CALLABLE(jl_f_replaceglobal)
+{
+    enum jl_memory_order success_order = jl_memory_order_release;
+    JL_NARGS(replaceglobal!, 4, 6);
+    if (nargs >= 5) {
+        JL_TYPECHK(replaceglobal!, symbol, args[4]);
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 1);
+    }
+    enum jl_memory_order failure_order = success_order;
+    if (nargs == 6) {
+        JL_TYPECHK(replaceglobal!, symbol, args[5]);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[5], 1, 0);
+    }
+    if (failure_order > success_order)
+        jl_atomic_error("invalid atomic ordering");
+    // TODO: filter more invalid ordering combinations?
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(replaceglobal!, module, (jl_value_t*)mod);
+    JL_TYPECHK(replaceglobal!, symbol, (jl_value_t*)var);
+    if (success_order == jl_memory_order_notatomic)
+        jl_atomic_error("replaceglobal!: module binding cannot be written non-atomically");
+    if (failure_order == jl_memory_order_notatomic)
+        jl_atomic_error("replaceglobal!: module binding cannot be accessed non-atomically");
+    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    // is seq_cst already, no fence needed
+    return jl_checked_replace(b, mod, var, args[2], args[3]);
+}
+
+JL_CALLABLE(jl_f_setglobalonce)
+{
+    enum jl_memory_order success_order = jl_memory_order_release;
+    JL_NARGS(setglobalonce!, 3, 5);
+    if (nargs >= 4) {
+        JL_TYPECHK(setglobalonce!, symbol, args[3]);
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+    }
+    enum jl_memory_order failure_order = success_order;
+    if (nargs == 5) {
+        JL_TYPECHK(setglobalonce!, symbol, args[4]);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 0);
+    }
+    if (failure_order > success_order)
+        jl_atomic_error("invalid atomic ordering");
+    // TODO: filter more invalid ordering combinations?
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(setglobalonce!, module, (jl_value_t*)mod);
+    JL_TYPECHK(setglobalonce!, symbol, (jl_value_t*)var);
+    if (success_order == jl_memory_order_notatomic)
+        jl_atomic_error("setglobalonce!: module binding cannot be written non-atomically");
+    if (failure_order == jl_memory_order_notatomic)
+        jl_atomic_error("setglobalonce!: module binding cannot be accessed non-atomically");
+    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    // is seq_cst already, no fence needed
+    jl_value_t *old = jl_checked_assignonce(b, mod, var, args[2]);
+    return old == NULL ? jl_true : jl_false;
+}
+
+// declare_global(module::Module, name::Symbol, [strong::Bool=false, [ty::Type]])
+JL_CALLABLE(jl_f_declare_global)
+{
+    JL_NARGS(declare_global, 3, 4);
+    JL_TYPECHK(declare_global, module, args[0]);
+    JL_TYPECHK(declare_global, symbol, args[1]);
+    JL_TYPECHK(declare_global, bool, args[2]);
+    int strong = args[2] == jl_true;
+    jl_value_t *set_type = NULL;
+    if (nargs >= 4) {
+        JL_TYPECHK(declare_global, type, args[3]);
+        set_type = args[3];
+    }
+    jl_declare_global((jl_module_t *)args[0], args[1], set_type, strong);
+    return jl_nothing;
+}
+
+JL_CALLABLE(jl_f_declare_const)
+{
+    JL_NARGS(declare_const, 2, 3);
+    JL_TYPECHK(declare_const, module, args[0]);
+    if (nargs == 3)
+        JL_TYPECHK(declare_const, symbol, args[1]);
+    jl_binding_t *b = jl_get_module_binding((jl_module_t *)args[0], (jl_sym_t *)args[1], 1);
+    jl_value_t *val = nargs == 3 ? args[2] : NULL;
+    jl_declare_constant_val(b, (jl_module_t *)args[0], (jl_sym_t *)args[1], val);
+    return nargs > 2 ? args[2] : jl_nothing;
+}
+
+// import, using --------------------------------------------------------------
+
+// Import binding `from.sym` as `asname` into `to`:
+//     _import(to::Module, from::Module, asname::Symbol, sym::Symbol, imported::Bool)
+//
+// Create const binding to `mod` in `to` with name `asname`:
+//     _import(to::Module, mod::Module, asname::Symbol)
+JL_CALLABLE(jl_f__import)
+{
+    JL_NARGS(_import, 3, 5);
+    JL_TYPECHK(_import, module, args[0]);
+    JL_TYPECHK(_import, module, args[1]);
+    JL_TYPECHK(_import, symbol, args[2]);
+    if (nargs == 3) {
+        jl_import_module(jl_current_task, (jl_module_t *)args[0], (jl_module_t *)args[1],
+                         (jl_sym_t *)args[2]);
+    }
+    else if (nargs == 4) {
+        jl_too_few_args("_import", 5);
+    }
+    else if (nargs == 5) {
+        JL_TYPECHK(_import, symbol, args[3]);
+        JL_TYPECHK(_import, bool, args[4]);
+        jl_module_import(jl_current_task, (jl_module_t *)args[0], (jl_module_t *)args[1],
+                         (jl_sym_t *)args[2], (jl_sym_t *)args[3], args[4] == jl_true);
+    }
     return jl_nothing;
 }
 
+// _using(to::Module, from::Module)
+JL_CALLABLE(jl_f__using)
+{
+    JL_NARGS(_using, 2, 3);
+    JL_TYPECHK(_using, module, args[0]);
+    JL_TYPECHK(_using, module, args[1]);
+    size_t flags = 0;
+    if (nargs == 3) {
+        JL_TYPECHK(_using, uint8, args[2]);
+        flags = jl_unbox_uint8(args[2]);
+    }
+    jl_module_using((jl_module_t *)args[0], (jl_module_t *)args[1], flags);
+    return jl_nothing;
+}
 
 // apply_type -----------------------------------------------------------------
 
@@ -1363,11 +1651,11 @@ JL_CALLABLE(jl_f_apply_type)
         jl_vararg_t *vm = (jl_vararg_t*)args[0];
         if (!vm->T) {
             JL_NARGS(apply_type, 2, 3);
-            return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL, 1);
+            return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL, 1, 0);
         }
         else if (!vm->N) {
             JL_NARGS(apply_type, 2, 2);
-            return (jl_value_t*)jl_wrap_vararg(vm->T, args[1], 1);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, args[1], 1, 0);
         }
     }
     else if (jl_is_unionall(args[0])) {
@@ -1398,14 +1686,49 @@ JL_CALLABLE(jl_f_invoke)
 {
     JL_NARGSV(invoke, 2);
     jl_value_t *argtypes = args[1];
-    JL_GC_PUSH1(&argtypes);
-    if (!jl_is_tuple_type(jl_unwrap_unionall(args[1])))
-        jl_type_error("invoke", (jl_value_t*)jl_anytuple_type_type, args[1]);
+    if (jl_is_method(argtypes)) {
+        jl_method_t *m = (jl_method_t*)argtypes;
+        if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)m->sig))
+            jl_type_error("invoke: argument type error", argtypes, arg_tuple(args[0], &args[2], nargs - 1));
+        return jl_gf_invoke_by_method(m, args[0], &args[2], nargs - 1);
+    } else if (jl_is_code_instance(argtypes)) {
+        jl_code_instance_t *codeinst = (jl_code_instance_t*)args[1];
+        jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+        jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        // N.B.: specTypes need not be a subtype of the method signature. We need to check both.
+        if (jl_is_abioverride(codeinst->def)) {
+            jl_datatype_t *abi = (jl_datatype_t*)((jl_abi_override_t*)(codeinst->def))->abi;
+            if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, abi)) {
+                jl_type_error("invoke: argument type error (ABI overwrite)", (jl_value_t*)abi, arg_tuple(args[0], &args[2], nargs - 1));
+            }
+        } else {
+            if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)mi->specTypes) ||
+                (jl_is_method(mi->def.value) && !jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)mi->def.method->sig))) {
+                jl_type_error("invoke: argument type error", mi->specTypes, arg_tuple(args[0], &args[2], nargs - 1));
+            }
+        }
+        if (jl_atomic_load_relaxed(&codeinst->min_world) > jl_current_task->world_age ||
+            jl_current_task->world_age > jl_atomic_load_relaxed(&codeinst->max_world)) {
+            jl_error("invoke: CodeInstance not valid for this world");
+        }
+        if (!invoke) {
+            jl_compile_codeinst(codeinst);
+            invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        }
+        if (invoke) {
+            return invoke(args[0], &args[2], nargs - 2, codeinst);
+        } else {
+            if (codeinst->owner != jl_nothing) {
+                jl_error("Failed to invoke or compile external codeinst");
+            }
+            return jl_invoke(args[0], &args[2], nargs - 1, mi);
+        }
+    }
+    if (!jl_is_tuple_type(jl_unwrap_unionall(argtypes)))
+        jl_type_error("invoke", (jl_value_t*)jl_anytuple_type_type, argtypes);
     if (!jl_tuple_isa(&args[2], nargs - 2, (jl_datatype_t*)argtypes))
-        jl_error("invoke: argument type error");
-    jl_value_t *res = jl_gf_invoke(argtypes, args[0], &args[2], nargs - 1);
-    JL_GC_POP();
-    return res;
+        jl_type_error("invoke: argument type error", argtypes, jl_f_tuple(NULL, &args[2], nargs - 2));
+    return jl_gf_invoke(argtypes, args[0], &args[2], nargs - 1);
 }
 
 // Expr constructor for internal use ------------------------------------------
@@ -1463,72 +1786,287 @@ JL_CALLABLE(jl_f__typevar)
     return (jl_value_t *)jl_new_typevar((jl_sym_t*)args[0], args[1], args[2]);
 }
 
-// arrays ---------------------------------------------------------------------
-
-JL_CALLABLE(jl_f_arraysize)
+// genericmemory ---------------------------------------------------------------------
+JL_CALLABLE(jl_f_memorynew)
 {
-    JL_NARGS(arraysize, 2, 2);
-    JL_TYPECHK(arraysize, array, args[0]);
-    jl_array_t *a = (jl_array_t*)args[0];
-    size_t nd = jl_array_ndims(a);
-    JL_TYPECHK(arraysize, long, args[1]);
-    int dno = jl_unbox_long(args[1]);
-    if (dno < 1)
-        jl_error("arraysize: dimension out of range");
-    if (dno > nd)
-        return jl_box_long(1);
-    return jl_box_long((&a->nrows)[dno-1]);
+    JL_NARGS(memorynew, 2, 2);
+    jl_datatype_t *jl_genericmemory_type_type = jl_datatype_type;
+    JL_TYPECHK(memorynew, genericmemory_type, args[0]);
+    JL_TYPECHK(memorynew, long, args[1]);
+    size_t nel = jl_unbox_long(args[1]);
+    return (jl_value_t*)jl_alloc_genericmemory(args[0], nel);
 }
 
-static size_t array_nd_index(jl_array_t *a, jl_value_t **args, size_t nidxs,
-                             const char *fname)
+JL_CALLABLE(jl_f_memoryrefnew)
 {
-    size_t i = 0;
-    size_t k, stride = 1;
-    size_t nd = jl_array_ndims(a);
-    for (k = 0; k < nidxs; k++) {
-        if (!jl_is_long(args[k]))
-            jl_type_error(fname, (jl_value_t*)jl_long_type, args[k]);
-        size_t ii = jl_unbox_long(args[k]) - 1;
-        i += ii * stride;
-        size_t d = (k >= nd) ? 1 : jl_array_dim(a, k);
-        if (k < nidxs - 1 && ii >= d)
-            jl_bounds_error_v((jl_value_t*)a, args, nidxs);
-        stride *= d;
+    JL_NARGS(memoryrefnew, 1, 3);
+    if (nargs == 1) {
+        JL_TYPECHK(memoryrefnew, genericmemory, args[0]);
+        jl_genericmemory_t *m = (jl_genericmemory_t*)args[0];
+        jl_value_t *typ = jl_apply_type((jl_value_t*)jl_genericmemoryref_type, jl_svec_data(((jl_datatype_t*)jl_typetagof(m))->parameters), 3);
+        JL_GC_PROMISE_ROOTED(typ); // it is a concrete type
+        const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+        if (layout->flags.arrayelem_isunion || layout->size == 0)
+            return (jl_value_t*)jl_new_memoryref(typ, m, 0);
+        return (jl_value_t*)jl_new_memoryref(typ, m, m->ptr);
+    }
+    else {
+        JL_TYPECHK(memoryrefnew, long, args[1]);
+        if (nargs == 3)
+            JL_TYPECHK(memoryrefnew, bool, args[2]);
+        size_t i = (size_t) jl_unbox_long(args[1]) - 1;
+        char *data;
+        if (jl_is_genericmemory(args[0])) {
+            jl_genericmemory_t *m = (jl_genericmemory_t*)args[0];
+            jl_value_t *typ = jl_apply_type((jl_value_t*)jl_genericmemoryref_type, jl_svec_data(((jl_datatype_t*)jl_typetagof(m))->parameters), 3);
+            JL_GC_PROMISE_ROOTED(typ); // it is a concrete type
+            if (i >= m->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+            if (layout->flags.arrayelem_isunion || layout->size == 0)
+                return (jl_value_t*)jl_new_memoryref(typ, m, (char*)i);
+            else if (layout->flags.arrayelem_isboxed)
+                return (jl_value_t*)jl_new_memoryref(typ, m, (char*)m->ptr + sizeof(jl_value_t*)*i);
+            return (jl_value_t*)jl_new_memoryref(typ, m, (char*)m->ptr + layout->size*i);
+        }
+        JL_TYPECHK(memoryrefnew, genericmemoryref, args[0]);
+        jl_genericmemoryref_t *m = (jl_genericmemoryref_t*)args[0];
+        jl_genericmemory_t *mem = m->mem;
+        data = (char*)m->ptr_or_offset;
+        const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(mem))->layout;
+        if (layout->flags.arrayelem_isboxed) {
+            if (((data - (char*)mem->ptr) / sizeof(jl_value_t*)) + i >= mem->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            data += sizeof(jl_value_t*) * i;
+        }
+        else if (layout->flags.arrayelem_isunion || layout->size == 0) {
+            if ((size_t)data + i >= mem->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            data += i;
+        }
+        else {
+            if (((data - (char*)mem->ptr) / layout->size) + i >= mem->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            data += layout->size * i;
+        }
+        return (jl_value_t*)jl_new_memoryref((jl_value_t*)jl_typetagof(m), mem, data);
     }
-    for (; k < nd; k++)
-        stride *= jl_array_dim(a, k);
-    if (i >= stride)
-        jl_bounds_error_v((jl_value_t*)a, args, nidxs);
-    return i;
 }
 
-JL_CALLABLE(jl_f_arrayref)
+JL_CALLABLE(jl_f_memoryrefoffset)
 {
-    JL_NARGSV(arrayref, 3);
-    JL_TYPECHK(arrayref, bool, args[0]);
-    JL_TYPECHK(arrayref, array, args[1]);
-    jl_array_t *a = (jl_array_t*)args[1];
-    size_t i = array_nd_index(a, &args[2], nargs - 2, "arrayref");
-    return jl_arrayref(a, i);
+    JL_NARGS(memoryrefoffset, 1, 1);
+    JL_TYPECHK(memoryrefoffest, genericmemoryref, args[0]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    size_t offset;
+    if (layout->flags.arrayelem_isboxed) {
+        offset = (((char*)m.ptr_or_offset - (char*)m.mem->ptr) / sizeof(jl_value_t*));
+    }
+    else if (layout->flags.arrayelem_isunion || layout->size == 0) {
+        offset = (size_t)m.ptr_or_offset;
+    }
+    else {
+        offset = ((char*)m.ptr_or_offset - (char*)m.mem->ptr) / layout->size;
+    }
+    return (jl_value_t*)jl_box_long(offset + 1);
 }
 
-JL_CALLABLE(jl_f_const_arrayref)
+JL_CALLABLE(jl_f_memoryrefget)
 {
-    return jl_f_arrayref(F, args, nargs);
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefget, 3, 3);
+    JL_TYPECHK(memoryrefget, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefget, symbol, args[1]);
+    JL_TYPECHK(memoryrefget, bool, args[2]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[1] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[1], 1, 0);
+            jl_atomic_error("memoryrefget: non-atomic memory cannot be accessed atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[1], 1, 0);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefget: atomic memory cannot be accessed non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefget(m, kind == (jl_value_t*)jl_atomic_sym);
 }
 
-JL_CALLABLE(jl_f_arrayset)
+JL_CALLABLE(jl_f_memoryrefset)
 {
-    JL_NARGSV(arrayset, 4);
-    JL_TYPECHK(arrayset, bool, args[0]);
-    JL_TYPECHK(arrayset, array, args[1]);
-    jl_array_t *a = (jl_array_t*)args[1];
-    size_t i = array_nd_index(a, &args[3], nargs - 3, "arrayset");
-    jl_arrayset(a, args[2], i);
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefset!, 4, 4);
+    JL_TYPECHK(memoryrefset!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefset!, symbol, args[2]);
+    JL_TYPECHK(memoryrefset!, bool, args[3]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[2] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 0, 1);
+            jl_atomic_error("memoryrefset!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 0, 1);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefset!: atomic memory cannot be written non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    jl_memoryrefset(m, args[1], kind == (jl_value_t*)jl_atomic_sym);
     return args[1];
 }
 
+JL_CALLABLE(jl_f_memoryref_isassigned)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryref_isassigned, 3, 3);
+    JL_TYPECHK(memoryref_isassigned, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryref_isassigned, symbol, args[1]);
+    JL_TYPECHK(memoryref_isassigned, bool, args[2]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[1] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[1], 1, 0);
+            jl_atomic_error("memoryref_isassigned: non-atomic memory cannot be accessed atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[1], 1, 0);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryref_isassigned: atomic memory cannot be accessed non-atomically");
+    }
+    if (m.mem->length == 0)
+        // TODO(jwn): decide on the fences required for ordering here
+        return jl_false;
+    return jl_memoryref_isassigned(m, kind == (jl_value_t*)jl_atomic_sym);
+}
+
+JL_CALLABLE(jl_f_memoryrefswap)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefswap!, 4, 4);
+    JL_TYPECHK(memoryrefswap!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefswap!, symbol, args[2]);
+    JL_TYPECHK(memoryrefswap!, bool, args[3]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[2] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 1);
+            jl_atomic_error("memoryrefswap!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 1);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefswap!: atomic memory cannot be written non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefswap(m, args[1], kind == (jl_value_t*)jl_atomic_sym);
+}
+
+JL_CALLABLE(jl_f_memoryrefmodify)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefmodify!, 5, 5);
+    JL_TYPECHK(memoryrefmodify!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefmodify!, symbol, args[3]);
+    JL_TYPECHK(memoryrefmodify!, bool, args[4]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[3] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+            jl_atomic_error("memoryrefmodify!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefmodify!: atomic memory cannot be written non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefmodify(m, args[1], args[2], kind == (jl_value_t*)jl_atomic_sym);
+}
+
+JL_CALLABLE(jl_f_memoryrefreplace)
+{
+    enum jl_memory_order success_order = jl_memory_order_notatomic;
+    enum jl_memory_order failure_order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefreplace!, 6, 6);
+    JL_TYPECHK(memoryrefreplace!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefreplace!, symbol, args[3]);
+    JL_TYPECHK(memoryrefreplace!, symbol, args[4]);
+    JL_TYPECHK(memoryrefreplace!, bool, args[5]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[4] != kind)
+            jl_atomic_error("invalid atomic ordering"); // because either it is invalid, or failure_order > success_order
+        if (args[3] != kind) {
+            success_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+            jl_atomic_error("memoryrefreplace!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 0);
+        if (failure_order > success_order)
+            jl_atomic_error("invalid atomic ordering"); // because either it is invalid, or failure_order > success_order
+        if (success_order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefreplace!: atomic memory cannot be written non-atomically");
+        if (failure_order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefreplace!: atomic memory cannot be accessed non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefreplace(m, args[1], args[2], kind == (jl_value_t*)jl_atomic_sym);
+}
+
+JL_CALLABLE(jl_f_memoryrefsetonce)
+{
+    enum jl_memory_order success_order = jl_memory_order_notatomic;
+    enum jl_memory_order failure_order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefsetonce!, 5, 5);
+    JL_TYPECHK(memoryrefsetonce!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefsetonce!, symbol, args[2]);
+    JL_TYPECHK(memoryrefsetonce!, symbol, args[3]);
+    JL_TYPECHK(memoryrefsetonce!, bool, args[4]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[3] != kind)
+            jl_atomic_error("invalid atomic ordering"); // because either it is invalid, or failure_order > success_order
+        if (args[2] != kind) {
+            success_order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 1);
+            jl_atomic_error("memoryrefsetonce!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 1);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 0);
+        if (failure_order > success_order)
+            jl_atomic_error("invalid atomic ordering"); // because either it is invalid, or failure_order > success_order
+        if (success_order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefsetonce!: atomic memory cannot be written non-atomically");
+        if (failure_order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefsetonce!: atomic memory cannot be accessed non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefsetonce(m, args[1], kind == (jl_value_t*)jl_atomic_sym);
+}
+
 // type definition ------------------------------------------------------------
 
 JL_CALLABLE(jl_f__structtype)
@@ -1653,13 +2191,19 @@ JL_CALLABLE(jl_f__compute_sparams)
     return (jl_value_t*)env;
 }
 
+JL_CALLABLE(jl_f__svec_len)
+{
+    JL_NARGS(_svec_len, 1, 1);
+    jl_svec_t *s = (jl_svec_t*)args[0];
+    JL_TYPECHK(_svec_len, simplevector, (jl_value_t*)s);
+    return jl_box_long(jl_svec_len(s));
+}
+
 JL_CALLABLE(jl_f__svec_ref)
 {
-    JL_NARGS(_svec_ref, 3, 3);
-    jl_value_t *b = args[0];
-    jl_svec_t *s = (jl_svec_t*)args[1];
-    jl_value_t *i = (jl_value_t*)args[2];
-    JL_TYPECHK(_svec_ref, bool, b);
+    JL_NARGS(_svec_ref, 2, 2);
+    jl_svec_t *s = (jl_svec_t*)args[0];
+    jl_value_t *i = (jl_value_t*)args[1];
     JL_TYPECHK(_svec_ref, simplevector, (jl_value_t*)s);
     JL_TYPECHK(_svec_ref, long, i);
     size_t len = jl_svec_len(s);
@@ -1667,7 +2211,7 @@ JL_CALLABLE(jl_f__svec_ref)
     if (idx < 1 || idx > len) {
         jl_bounds_error_int((jl_value_t*)s, idx);
     }
-    return jl_svec_ref(s, idx-1);
+    return jl_svecref(s, idx-1);
 }
 
 static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
@@ -1680,7 +2224,7 @@ static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
         jl_value_t *ta = jl_svecref(old, i);
         jl_value_t *tb = jl_svecref(ft, i);
         if (jl_has_free_typevars(ta)) {
-            if (!jl_has_free_typevars(tb) || !jl_egal(ta, tb))
+            if (!jl_has_free_typevars(tb) || !jl_types_egal(ta, tb))
                 return 0;
         }
         else if (jl_has_free_typevars(tb) || jl_typetagof(ta) != jl_typetagof(tb) ||
@@ -1696,36 +2240,54 @@ static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
 // inline it. The only way fields can reference this type (due to
 // syntax-enforced restrictions) is via being passed as a type parameter. Thus
 // we can conservatively check this by examining only the parameters of the
-// dependent types.
-// affects_layout is a hack introduced by #35275 to workaround a problem
-// introduced by #34223: it checks whether we will potentially need to
-// compute the layout of the object before we have fully computed the types of
-// the fields during recursion over the allocation of the parameters for the
-// field types (of the concrete subtypes)
-static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout) JL_NOTSAFEPOINT
-{
-    if (jl_is_uniontype(p))
-        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout) ||
-               references_name(((jl_uniontype_t*)p)->b, name, affects_layout);
-    if (jl_is_unionall(p))
-        return references_name((jl_value_t*)((jl_unionall_t*)p)->var->lb, name, 0) ||
-               references_name((jl_value_t*)((jl_unionall_t*)p)->var->ub, name, 0) ||
-               references_name(((jl_unionall_t*)p)->body, name, affects_layout);
+// dependent types. Additionally, a field might have already observed this
+// object for layout purposes before we got around to deciding if inlining
+// would be possible, so we cannot change the layout now if so.
+// affects_layout is a (conservative) analysis of layout_uses_free_typevars
+// freevars is a (conservative) analysis of what calling jl_has_bound_typevars from name->wrapper gives (TODO: just call this instead?)
+static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout, int freevars) JL_NOTSAFEPOINT
+{
+    if (freevars && !jl_has_free_typevars(p))
+        freevars = 0;
+    while (jl_is_unionall(p)) {
+        if (references_name((jl_value_t*)((jl_unionall_t*)p)->var->lb, name, 0, freevars) ||
+            references_name((jl_value_t*)((jl_unionall_t*)p)->var->ub, name, 0, freevars))
+            return 1;
+       p = ((jl_unionall_t*)p)->body;
+    }
+    if (jl_is_uniontype(p)) {
+        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout, freevars) ||
+               references_name(((jl_uniontype_t*)p)->b, name, affects_layout, freevars);
+    }
+    if (jl_is_vararg(p)) {
+        jl_value_t *T = ((jl_vararg_t*)p)->T;
+        jl_value_t *N = ((jl_vararg_t*)p)->N;
+        return (T && references_name(T, name, affects_layout, freevars)) ||
+               (N && references_name(N, name, affects_layout, freevars));
+    }
     if (jl_is_typevar(p))
         return 0; // already checked by unionall, if applicable
     if (jl_is_datatype(p)) {
         jl_datatype_t *dp = (jl_datatype_t*)p;
         if (affects_layout && dp->name == name)
             return 1;
-        // affects_layout checks whether we will need to attempt to layout this
-        // type (based on whether all copies of it have the same layout) in
-        // that case, we still need to check the recursive parameters for
-        // layout recursion happening also, but we know it won't itself cause
-        // problems for the layout computation
-        affects_layout = ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->layout == NULL;
+        affects_layout = jl_is_genericmemory_type(dp) || ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->layout == NULL;
+        // and even if it has a layout, the fields themselves might trigger layouts if they use tparam i
+        // rather than checking this for each field, we just assume it applies
+        if (!affects_layout && freevars && jl_field_names(dp) != jl_emptysvec) {
+            jl_svec_t *types = ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->types;
+            size_t i, l = jl_svec_len(types);
+            for (i = 0; i < l; i++) {
+                jl_value_t *ft = jl_svecref(types, i);
+                if (!jl_is_typevar(ft) && jl_has_free_typevars(ft)) {
+                    affects_layout = 1;
+                    break;
+                }
+            }
+        }
         size_t i, l = jl_nparams(p);
         for (i = 0; i < l; i++) {
-            if (references_name(jl_tparam(p, i), name, affects_layout))
+            if (references_name(jl_tparam(p, i), name, affects_layout, freevars))
                 return 1;
         }
     }
@@ -1735,11 +2297,13 @@ static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layou
 
 JL_CALLABLE(jl_f__typebody)
 {
-    JL_NARGS(_typebody!, 1, 2);
-    jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(args[0]);
+    JL_NARGS(_typebody!, 2, 3);
+    jl_value_t *prev = args[0];
+    jl_value_t *tret = args[1];
+    jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(args[1]);
     JL_TYPECHK(_typebody!, datatype, (jl_value_t*)dt);
-    if (nargs == 2) {
-        jl_value_t *ft = args[1];
+    if (nargs == 3) {
+        jl_value_t *ft = args[2];
         JL_TYPECHK(_typebody!, simplevector, ft);
         size_t nf = jl_svec_len(ft);
         for (size_t i = 0; i < nf; i++) {
@@ -1750,43 +2314,67 @@ JL_CALLABLE(jl_f__typebody)
                                  (jl_value_t*)jl_type_type, elt);
             }
         }
-        if (dt->types != NULL) {
-            if (!equiv_field_types((jl_value_t*)dt->types, ft))
-                jl_errorf("invalid redefinition of type %s", jl_symbol_name(dt->name->name));
-        }
-        else {
-            dt->types = (jl_svec_t*)ft;
-            jl_gc_wb(dt, ft);
-            // If a supertype can reference the same type, then we may not be
-            // able to compute the layout of the object before needing to
-            // publish it, so we must assume it cannot be inlined, if that
-            // check passes, then we also still need to check the fields too.
-            if (!dt->name->mutabl && (nf == 0 || !references_name((jl_value_t*)dt->super, dt->name, 1))) {
-                int mayinlinealloc = 1;
-                size_t i;
-                for (i = 0; i < nf; i++) {
-                    jl_value_t *fld = jl_svecref(ft, i);
-                    if (references_name(fld, dt->name, 1)) {
-                        mayinlinealloc = 0;
-                        break;
+        // Optimization: To avoid lots of unnecessary churning, lowering contains an optimization
+        // that re-uses the typevars of an existing definition (if any exists) for compute the field
+        // types. If such a previous type exists, there are two possibilities:
+        //  1. The field types are identical, we don't need to do anything and can proceed with the
+        //     old type as if it was the new one.
+        //  2. The field types are not identical, in which case we need to rename the typevars
+        //     back to their equivalents in the new type before proceeding.
+        if (prev == jl_false) {
+            if (dt->types != NULL)
+                jl_errorf("Internal Error: Expected type fields to be unset");
+        } else {
+            jl_datatype_t *prev_dt = (jl_datatype_t*)jl_unwrap_unionall(prev);
+            JL_TYPECHK(_typebody!, datatype, (jl_value_t*)prev_dt);
+            if (equiv_field_types((jl_value_t*)prev_dt->types, ft)) {
+                tret = prev;
+                goto have_type;
+            } else {
+                if (jl_svec_len(prev_dt->parameters) != jl_svec_len(dt->parameters))
+                    jl_errorf("Internal Error: Types should not have been considered equivalent");
+                for (size_t i = 0; i < nf; i++) {
+                    jl_value_t *elt = jl_svecref(ft, i);
+                    for (int j = 0; j < jl_svec_len(prev_dt->parameters); ++j) {
+                        // Only the last svecset matters for semantics, but we re-use the GC root
+                        elt = jl_substitute_var(elt, (jl_tvar_t *)jl_svecref(prev_dt->parameters, j), jl_svecref(dt->parameters, j));
+                        jl_svecset(ft, i, elt);
                     }
                 }
-                dt->name->mayinlinealloc = mayinlinealloc;
             }
         }
+        dt->types = (jl_svec_t*)ft;
+        jl_gc_wb(dt, ft);
+        // If a supertype can reference the same type, then we may not be
+        // able to compute the layout of the object before needing to
+        // publish it, so we must assume it cannot be inlined, if that
+        // check passes, then we also still need to check the fields too.
+        if (!dt->name->mutabl && (nf == 0 || !references_name((jl_value_t*)dt->super, dt->name, 0, 1))) {
+            int mayinlinealloc = 1;
+            size_t i;
+            for (i = 0; i < nf; i++) {
+                jl_value_t *fld = jl_svecref(ft, i);
+                if (references_name(fld, dt->name, 1, 1)) {
+                    mayinlinealloc = 0;
+                    break;
+                }
+            }
+            dt->name->mayinlinealloc = mayinlinealloc;
+        }
     }
-
-    JL_TRY {
-        jl_reinstantiate_inner_types(dt);
-    }
-    JL_CATCH {
-        dt->name->partial = NULL;
-        jl_rethrow();
+    {
+        JL_TRY {
+            jl_reinstantiate_inner_types(dt);
+        }
+        JL_CATCH {
+            dt->name->partial = NULL;
+            jl_rethrow();
+        }
     }
-
     if (jl_is_structtype(dt))
         jl_compute_field_offsets(dt);
-    return jl_nothing;
+have_type:
+    return tret;
 }
 
 // this is a heuristic for allowing "redefining" a type to something identical
@@ -1819,13 +2407,18 @@ static int equiv_type(jl_value_t *ta, jl_value_t *tb)
     JL_GC_PUSH2(&a, &b);
     a = jl_rewrap_unionall((jl_value_t*)dta->super, dta->name->wrapper);
     b = jl_rewrap_unionall((jl_value_t*)dtb->super, dtb->name->wrapper);
+    // if tb recursively refers to itself in its supertype, assume that it refers to ta
+    // before checking whether the supertypes are equal
+    b = jl_substitute_datatype(b, dtb, dta);
     if (!jl_types_equal(a, b))
         goto no;
-    JL_TRY {
-        a = jl_apply_type(dtb->name->wrapper, jl_svec_data(dta->parameters), jl_nparams(dta));
-    }
-    JL_CATCH {
-        ok = 0;
+    {
+        JL_TRY {
+            a = jl_apply_type(dtb->name->wrapper, jl_svec_data(dta->parameters), jl_nparams(dta));
+        }
+        JL_CATCH {
+            ok = 0;
+        }
     }
     if (!ok)
         goto no;
@@ -1854,6 +2447,13 @@ JL_CALLABLE(jl_f__equiv_typedef)
     return equiv_type(args[0], args[1]) ? jl_true : jl_false;
 }
 
+JL_CALLABLE(jl_f__defaultctors)
+{
+    JL_NARGS(_defaultctors, 2, 2);
+    jl_ctor_def(args[0], args[1]);
+    return jl_nothing;
+}
+
 // IntrinsicFunctions ---------------------------------------------------------
 
 static void (*runtime_fp[num_intrinsics])(void);
@@ -1861,13 +2461,12 @@ static unsigned intrinsic_nargs[num_intrinsics];
 
 JL_CALLABLE(jl_f_intrinsic_call)
 {
-    JL_TYPECHK(intrinsic_call, intrinsic, F);
     enum intrinsic f = (enum intrinsic)*(uint32_t*)jl_data_ptr(F);
     if (f == cglobal && nargs == 1)
         f = cglobal_auto;
     unsigned fargs = intrinsic_nargs[f];
     if (!fargs)
-        jl_errorf("`%s` must be compiled to be called", jl_intrinsic_name(f));
+        jl_errorf("`%s` requires the compiler", jl_intrinsic_name(f));
     JL_NARGS(intrinsic_call, fargs, fargs);
 
     union {
@@ -1893,7 +2492,7 @@ JL_CALLABLE(jl_f_intrinsic_call)
         default:
             assert(0 && "unexpected number of arguments to an intrinsic function");
     }
-    jl_gc_debug_critical_error();
+    jl_gc_debug_fprint_critical_error(ios_safe_stderr);
     abort();
 }
 
@@ -1920,16 +2519,16 @@ unsigned jl_intrinsic_nargs(int f)
 
 static void add_intrinsic_properties(enum intrinsic f, unsigned nargs, void (*pfunc)(void))
 {
+    assert(nargs <= 5 && "jl_f_intrinsic_call only implements up to 5 args");
     intrinsic_nargs[f] = nargs;
     runtime_fp[f] = pfunc;
 }
 
 static void add_intrinsic(jl_module_t *inm, const char *name, enum intrinsic f) JL_GC_DISABLED
 {
-    jl_value_t *i = jl_permbox32(jl_intrinsic_type, 0, (int32_t)f);
+    jl_value_t *i = jl_permbox32(jl_intrinsic_type, jl_intrinsic_tag, (int32_t)f);
     jl_sym_t *sym = jl_symbol(name);
-    jl_set_const(inm, sym, i);
-    jl_module_export(inm, sym);
+    jl_set_initial_const(inm, sym, i, 1);
 }
 
 void jl_init_intrinsic_properties(void) JL_GC_DISABLED
@@ -1945,18 +2544,14 @@ void jl_init_intrinsic_properties(void) JL_GC_DISABLED
 
 void jl_init_intrinsic_functions(void) JL_GC_DISABLED
 {
-    jl_module_t *inm = jl_new_module(jl_symbol("Intrinsics"), NULL);
-    inm->parent = jl_core_module;
-    jl_set_const(jl_core_module, jl_symbol("Intrinsics"), (jl_value_t*)inm);
-    jl_mk_builtin_func(jl_intrinsic_type, "IntrinsicFunction", jl_f_intrinsic_call);
-    jl_mk_builtin_func(
-        (jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type),
-        "OpaqueClosure", jl_f_opaque_closure_call);
+    jl_module_t *inm = jl_new_module_(jl_symbol("Intrinsics"), jl_core_module, 0, 1);
+    jl_set_initial_const(jl_core_module, jl_symbol("Intrinsics"), (jl_value_t*)inm, 0);
+    jl_mk_builtin_func(jl_intrinsic_type, jl_symbol("IntrinsicFunction"), jl_f_intrinsic_call);
 
+    jl_datatype_t *oc = (jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type);
     // Save a reference to the just created OpaqueClosure method, so we can provide special
     // codegen for it later.
-    jl_opaque_closure_method = (jl_method_t*)jl_methtable_lookup(jl_opaque_closure_typename->mt,
-        (jl_value_t*)jl_anytuple_type, 1);
+    jl_opaque_closure_method = jl_mk_builtin_func(oc, jl_symbol("OpaqueClosure"), jl_f_opaque_closure_call); // TODO: awkwardly not actually declared a Builtin, even though it relies on being handled by the special cases for Builtin everywhere else
 
 #define ADD_I(name, nargs) add_intrinsic(inm, #name, name);
 #define ADD_HIDDEN(name, nargs)
@@ -1969,82 +2564,24 @@ void jl_init_intrinsic_functions(void) JL_GC_DISABLED
 
 static void add_builtin(const char *name, jl_value_t *v)
 {
-    jl_set_const(jl_core_module, jl_symbol(name), v);
-}
-
-jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b)
-{
-    assert(jl_isa(b, (jl_value_t*)jl_builtin_type));
-    jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_atomic_load_relaxed(&jl_gf_mtable(b)->defs);
-    jl_method_instance_t *mi = jl_atomic_load_relaxed(&entry->func.method->unspecialized);
-    jl_code_instance_t *ci = jl_atomic_load_relaxed(&mi->cache);
-    return jl_atomic_load_relaxed(&ci->specptr.fptr1);
-}
-
-static jl_value_t *add_builtin_func(const char *name, jl_fptr_args_t fptr)
-{
-    return jl_mk_builtin_func(NULL, name, fptr)->instance;
+    jl_set_initial_const(jl_core_module, jl_symbol(name), v, 0);
 }
 
 void jl_init_primitives(void) JL_GC_DISABLED
 {
-    jl_builtin_is = add_builtin_func("===", jl_f_is);
-    jl_builtin_typeof = add_builtin_func("typeof", jl_f_typeof);
-    jl_builtin_sizeof = add_builtin_func("sizeof", jl_f_sizeof);
-    jl_builtin_issubtype = add_builtin_func("<:", jl_f_issubtype);
-    jl_builtin_isa = add_builtin_func("isa", jl_f_isa);
-    jl_builtin_typeassert = add_builtin_func("typeassert", jl_f_typeassert);
-    jl_builtin_throw = add_builtin_func("throw", jl_f_throw);
-    jl_builtin_tuple = add_builtin_func("tuple", jl_f_tuple);
-    jl_builtin_ifelse = add_builtin_func("ifelse", jl_f_ifelse);
-
-    // field access
-    jl_builtin_getfield = add_builtin_func("getfield",  jl_f_getfield);
-    jl_builtin_setfield = add_builtin_func("setfield!",  jl_f_setfield);
-    jl_builtin_swapfield = add_builtin_func("swapfield!",  jl_f_swapfield);
-    jl_builtin_modifyfield = add_builtin_func("modifyfield!",  jl_f_modifyfield);
-    jl_builtin_replacefield = add_builtin_func("replacefield!",  jl_f_replacefield);
-    jl_builtin_fieldtype = add_builtin_func("fieldtype", jl_f_fieldtype);
-    jl_builtin_nfields = add_builtin_func("nfields", jl_f_nfields);
-    jl_builtin_isdefined = add_builtin_func("isdefined", jl_f_isdefined);
-
-    // module bindings
-    jl_builtin_getglobal = add_builtin_func("getglobal", jl_f_getglobal);
-    jl_builtin_setglobal = add_builtin_func("setglobal!", jl_f_setglobal);
-    add_builtin_func("get_binding_type", jl_f_get_binding_type);
-    add_builtin_func("set_binding_type!", jl_f_set_binding_type);
-
-    // array primitives
-    jl_builtin_arrayref = add_builtin_func("arrayref", jl_f_arrayref);
-    jl_builtin_const_arrayref = add_builtin_func("const_arrayref", jl_f_arrayref);
-    jl_builtin_arrayset = add_builtin_func("arrayset", jl_f_arrayset);
-    jl_builtin_arraysize = add_builtin_func("arraysize", jl_f_arraysize);
-
-    // method table utils
-    jl_builtin_applicable = add_builtin_func("applicable", jl_f_applicable);
-    jl_builtin_invoke = add_builtin_func("invoke", jl_f_invoke);
-
-    // internal functions
-    jl_builtin_apply_type = add_builtin_func("apply_type", jl_f_apply_type);
-    jl_builtin__apply_iterate = add_builtin_func("_apply_iterate", jl_f__apply_iterate);
-    jl_builtin__expr = add_builtin_func("_expr", jl_f__expr);
-    jl_builtin_svec = add_builtin_func("svec", jl_f_svec);
-    add_builtin_func("_apply_pure", jl_f__apply_pure);
-    add_builtin_func("_call_latest", jl_f__call_latest);
-    add_builtin_func("_call_in_world", jl_f__call_in_world);
-    add_builtin_func("_call_in_world_total", jl_f__call_in_world_total);
-    add_builtin_func("_typevar", jl_f__typevar);
-    add_builtin_func("_structtype", jl_f__structtype);
-    add_builtin_func("_abstracttype", jl_f__abstracttype);
-    add_builtin_func("_primitivetype", jl_f__primitivetype);
-    add_builtin_func("_setsuper!", jl_f__setsuper);
-    jl_builtin__typebody = add_builtin_func("_typebody!", jl_f__typebody);
-    add_builtin_func("_equiv_typedef", jl_f__equiv_typedef);
-    jl_builtin_donotdelete = add_builtin_func("donotdelete", jl_f_donotdelete);
-    jl_builtin_compilerbarrier = add_builtin_func("compilerbarrier", jl_f_compilerbarrier);
-    add_builtin_func("finalizer", jl_f_finalizer);
-    add_builtin_func("_compute_sparams", jl_f__compute_sparams);
-    add_builtin_func("_svec_ref", jl_f__svec_ref);
+    // Builtins are specially considered available from world 0
+    for (int i = 0; i < jl_n_builtins; i++) {
+        if (i == jl_builtin_id_intrinsic_call ||
+            i == jl_builtin_id_opaque_closure_call)
+            continue;
+        jl_sym_t *sname = jl_symbol(jl_builtin_names[i]);
+        jl_value_t *builtin = jl_new_generic_function_with_supertype(sname, jl_core_module, jl_builtin_type, 0);
+        jl_set_initial_const(jl_core_module, sname, builtin, 0);
+        jl_mk_builtin_func((jl_datatype_t*)jl_typeof(builtin), sname, jl_builtin_f_addrs[i]);
+        jl_builtin_instances[i] = builtin;
+    }
+    add_builtin("OpaqueClosure", (jl_value_t*)jl_opaque_closure_type);
+    add_builtin("IntrinsicFunction", (jl_value_t*)jl_intrinsic_type);
 
     // builtin types
     add_builtin("Any", (jl_value_t*)jl_any_type);
@@ -2060,10 +2597,12 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("Tuple", (jl_value_t*)jl_anytuple_type);
     add_builtin("TypeofVararg", (jl_value_t*)jl_vararg_type);
     add_builtin("SimpleVector", (jl_value_t*)jl_simplevector_type);
-    add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0));
+    add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0, 0));
 
     add_builtin("Module", (jl_value_t*)jl_module_type);
     add_builtin("MethodTable", (jl_value_t*)jl_methtable_type);
+    add_builtin("methodtable", (jl_value_t*)jl_method_table);
+    add_builtin("MethodCache", (jl_value_t*)jl_methcache_type);
     add_builtin("Method", (jl_value_t*)jl_method_type);
     add_builtin("CodeInstance", (jl_value_t*)jl_code_instance_type);
     add_builtin("TypeMapEntry", (jl_value_t*)jl_typemap_entry_type);
@@ -2072,31 +2611,31 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("SSAValue", (jl_value_t*)jl_ssavalue_type);
     add_builtin("SlotNumber", (jl_value_t*)jl_slotnumber_type);
     add_builtin("Argument", (jl_value_t*)jl_argument_type);
-    add_builtin("Const", (jl_value_t*)jl_const_type);
-    add_builtin("PartialStruct", (jl_value_t*)jl_partial_struct_type);
-    add_builtin("PartialOpaque", (jl_value_t*)jl_partial_opaque_type);
-    add_builtin("InterConditional", (jl_value_t*)jl_interconditional_type);
     add_builtin("MethodMatch", (jl_value_t*)jl_method_match_type);
-    add_builtin("IntrinsicFunction", (jl_value_t*)jl_intrinsic_type);
     add_builtin("Function", (jl_value_t*)jl_function_type);
     add_builtin("Builtin", (jl_value_t*)jl_builtin_type);
     add_builtin("MethodInstance", (jl_value_t*)jl_method_instance_type);
     add_builtin("CodeInfo", (jl_value_t*)jl_code_info_type);
-    add_builtin("Ref", (jl_value_t*)jl_ref_type);
-    add_builtin("Ptr", (jl_value_t*)jl_pointer_type);
     add_builtin("LLVMPtr", (jl_value_t*)jl_llvmpointer_type);
     add_builtin("Task", (jl_value_t*)jl_task_type);
-    add_builtin("OpaqueClosure", (jl_value_t*)jl_opaque_closure_type);
 
+    add_builtin("AddrSpace", (jl_value_t*)jl_addrspace_type);
+    add_builtin("Ref", (jl_value_t*)jl_ref_type);
+    add_builtin("Ptr", (jl_value_t*)jl_pointer_type);
+    //add_builtin("GenericPtr", (jl_value_t*)jl_genericpointer_type);
     add_builtin("AbstractArray", (jl_value_t*)jl_abstractarray_type);
     add_builtin("DenseArray", (jl_value_t*)jl_densearray_type);
     add_builtin("Array", (jl_value_t*)jl_array_type);
+    add_builtin("GenericMemory", (jl_value_t*)jl_genericmemory_type);
+    add_builtin("GenericMemoryRef", (jl_value_t*)jl_genericmemoryref_type);
 
     add_builtin("Expr", (jl_value_t*)jl_expr_type);
     add_builtin("LineNumberNode", (jl_value_t*)jl_linenumbernode_type);
-    add_builtin("LineInfoNode", (jl_value_t*)jl_lineinfonode_type);
+    add_builtin("LegacyLineInfoNode", (jl_value_t*)jl_lineinfonode_type);
+    add_builtin("DebugInfo", (jl_value_t*)jl_debuginfo_type);
     add_builtin("GotoNode", (jl_value_t*)jl_gotonode_type);
     add_builtin("GotoIfNot", (jl_value_t*)jl_gotoifnot_type);
+    add_builtin("EnterNode", (jl_value_t*)jl_enternode_type);
     add_builtin("ReturnNode", (jl_value_t*)jl_returnnode_type);
     add_builtin("PiNode", (jl_value_t*)jl_pinode_type);
     add_builtin("PhiNode", (jl_value_t*)jl_phinode_type);
@@ -2105,6 +2644,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("QuoteNode", (jl_value_t*)jl_quotenode_type);
     add_builtin("NewvarNode", (jl_value_t*)jl_newvarnode_type);
     add_builtin("Binding", (jl_value_t*)jl_binding_type);
+    add_builtin("BindingPartition", (jl_value_t*)jl_binding_partition_type);
     add_builtin("GlobalRef", (jl_value_t*)jl_globalref_type);
     add_builtin("NamedTuple", (jl_value_t*)jl_namedtuple_type);
 
@@ -2123,6 +2663,26 @@ void jl_init_primitives(void) JL_GC_DISABLED
 
     add_builtin("AbstractString", (jl_value_t*)jl_abstractstring_type);
     add_builtin("String", (jl_value_t*)jl_string_type);
+
+    // ensure that primitive types are fully allocated (since jl_init_types is incomplete)
+    assert(jl_atomic_load_relaxed(&jl_world_counter) == 1);
+    jl_module_t *core = jl_core_module;
+    jl_svec_t *bindings = jl_atomic_load_relaxed(&core->bindings);
+    jl_value_t **table = jl_svec_data(bindings);
+    for (size_t i = 0; i < jl_svec_len(bindings); i++) {
+        if (table[i] != jl_nothing) {
+            jl_binding_t *b = (jl_binding_t*)table[i];
+            jl_value_t *v = jl_get_binding_value_in_world(b, 1);
+            if (v) {
+                if (jl_is_unionall(v))
+                    v = jl_unwrap_unionall(v);
+                if (jl_is_datatype(v)) {
+                    jl_datatype_t *tt = (jl_datatype_t*)v;
+                    tt->name->module = core;
+                }
+            }
+        }
+    }
 }
 
 #ifdef __cplusplus
diff --git a/src/ccall.cpp b/src/ccall.cpp
index 47496a3a91ba6..f67268b1a0007 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -22,21 +22,25 @@ TRANSFORMED_CCALL_STAT(jl_cpu_wake);
 TRANSFORMED_CCALL_STAT(jl_gc_safepoint);
 TRANSFORMED_CCALL_STAT(jl_get_ptls_states);
 TRANSFORMED_CCALL_STAT(jl_threadid);
+TRANSFORMED_CCALL_STAT(jl_get_ptls_rng);
+TRANSFORMED_CCALL_STAT(jl_set_ptls_rng);
+TRANSFORMED_CCALL_STAT(jl_get_tls_world_age);
+TRANSFORMED_CCALL_STAT(jl_get_world_counter);
 TRANSFORMED_CCALL_STAT(jl_gc_enable_disable_finalizers_internal);
 TRANSFORMED_CCALL_STAT(jl_get_current_task);
 TRANSFORMED_CCALL_STAT(jl_set_next_task);
 TRANSFORMED_CCALL_STAT(jl_sigatomic_begin);
 TRANSFORMED_CCALL_STAT(jl_sigatomic_end);
-TRANSFORMED_CCALL_STAT(jl_svec_len);
-TRANSFORMED_CCALL_STAT(jl_svec_ref);
-TRANSFORMED_CCALL_STAT(jl_array_isassigned);
 TRANSFORMED_CCALL_STAT(jl_string_ptr);
 TRANSFORMED_CCALL_STAT(jl_symbol_name);
+TRANSFORMED_CCALL_STAT(jl_genericmemory_owner);
+TRANSFORMED_CCALL_STAT(jl_alloc_genericmemory);
 TRANSFORMED_CCALL_STAT(memcpy);
 TRANSFORMED_CCALL_STAT(memset);
 TRANSFORMED_CCALL_STAT(memmove);
 TRANSFORMED_CCALL_STAT(jl_object_id);
 #undef TRANSFORMED_CCALL_STAT
+extern "C" JL_DLLEXPORT jl_value_t *ijl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 
 STATISTIC(EmittedCCalls, "Number of ccalls emitted");
 STATISTIC(DeferredCCallLookups, "Number of ccalls looked up at runtime");
@@ -50,13 +54,43 @@ GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M)
     return prepare_global_in(M, jlRTLD_DEFAULT_var);
 }
 
+typedef struct {
+    jl_value_t *gcroot[2];     // GC roots for strings [f_name, f_lib]
+
+    // Static name resolution (compile-time known)
+    const char *f_name;        // static function name
+    const char *f_lib;         // static library name
+
+    // Dynamic name resolution (simple runtime expressions)
+    jl_value_t *f_name_expr;   // expression for function name
+    jl_value_t *f_lib_expr;    // expression for library name
+
+    // Runtime pointer
+    Value *jl_ptr;             // callable pointer expression result
+} native_sym_arg_t;
 
 // Find or create the GVs for the library and symbol lookup.
-// Return `runtime_lib` (whether the library name is a string)
+// Return `runtime_lib` (whether the library name is a string) if it returns `lib`.
 // The `lib` and `sym` GV returned may not be in the current module.
-static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_name,
+static bool runtime_sym_gvs(jl_codectx_t &ctx, const native_sym_arg_t &symarg,
                             GlobalVariable *&lib, GlobalVariable *&sym)
 {
+    const auto &f_lib = symarg.f_lib;
+    const auto &f_name = symarg.f_name;
+    // If f_name isn't constant or f_lib_expr is present but not present,
+    // emit a local cache for sym, but do not cache lib
+    if (!((f_lib || symarg.f_lib_expr == NULL) && f_name)) {
+        std::string name = "dynccall_";
+        name += std::to_string(jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1));
+        Module *M = jl_Module;
+        auto T_pvoidfunc = getPointerTy(M->getContext());
+        lib = nullptr;
+        sym = new GlobalVariable(*M, T_pvoidfunc, false,
+                                 GlobalVariable::InternalLinkage,
+                                 Constant::getNullValue(T_pvoidfunc), name);
+        return false;
+    }
+
     auto M = &ctx.emission_context.shared_module();
     bool runtime_lib = false;
     GlobalVariable *libptrgv;
@@ -80,13 +114,13 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_
     else {
         std::string name = "ccalllib_";
         name += llvm::sys::path::filename(f_lib);
-        name += std::to_string(jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1));
+        name += std::to_string(jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1));
         runtime_lib = true;
         auto &libgv = ctx.emission_context.libMapGV[f_lib];
         if (libgv.first == NULL) {
-            libptrgv = new GlobalVariable(*M, getInt8PtrTy(M->getContext()), false,
+            libptrgv = new GlobalVariable(*M, getPointerTy(M->getContext()), false,
                                           GlobalVariable::ExternalLinkage,
-                                          Constant::getNullValue(getInt8PtrTy(M->getContext())), name);
+                                          Constant::getNullValue(getPointerTy(M->getContext())), name);
             libgv.first = libptrgv;
         }
         else {
@@ -100,8 +134,8 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_
         std::string name = "ccall_";
         name += f_name;
         name += "_";
-        name += std::to_string(jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1));
-        auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(M->getContext());
+        name += std::to_string(jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1));
+        auto T_pvoidfunc = getPointerTy(M->getContext());
         llvmgv = new GlobalVariable(*M, T_pvoidfunc, false,
                                     GlobalVariable::ExternalLinkage,
                                     Constant::getNullValue(T_pvoidfunc), name);
@@ -115,27 +149,29 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_
 static Value *runtime_sym_lookup(
         jl_codegen_params_t &emission_context,
         IRBuilder<> &irbuilder,
-        jl_codectx_t *ctx,
-        PointerType *funcptype, const char *f_lib, jl_value_t *lib_expr,
-        const char *f_name, Function *f,
+        jl_codectx_t *pctx,
+        const native_sym_arg_t &symarg, Function *f,
         GlobalVariable *libptrgv,
         GlobalVariable *llvmgv, bool runtime_lib)
 {
     ++RuntimeSymLookups;
-    // in pseudo-code, this function emits the following:
+    // in pseudo-code, this function emits the following if libptrgv is set:
     //   global HMODULE *libptrgv
     //   global void **llvmgv
-    //   if (*llvmgv == NULL) {
+    //   if (*llvmgv == NULL)
     //       *llvmgv = jl_load_and_lookup(f_lib, f_name, libptrgv);
-    //   }
     //   return (*llvmgv)
-    auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(irbuilder.getContext());
+    // otherwise it emits:
+    //   global void **llvmgv
+    //   if (*llvmgv == NULL)
+    //       *llvmgv = jl_lazy_load_and_lookup(f_lib_expr, f_name_expr);
+    //   return (*llvmgv)
+    auto T_pvoidfunc = getPointerTy(irbuilder.getContext());
     BasicBlock *enter_bb = irbuilder.GetInsertBlock();
     BasicBlock *dlsym_lookup = BasicBlock::Create(irbuilder.getContext(), "dlsym");
     BasicBlock *ccall_bb = BasicBlock::Create(irbuilder.getContext(), "ccall");
     Constant *initnul = ConstantPointerNull::get(T_pvoidfunc);
     LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(T_pvoidfunc, llvmgv, Align(sizeof(void*)));
-    setName(emission_context, llvmf_orig, f_name + StringRef(".cached"));
     // This in principle needs a consume ordering so that load from
     // this pointer sees a valid value. However, this is not supported by
     // LLVM (or agreed on in the C/C++ standard FWIW) and should be
@@ -152,80 +188,69 @@ static Value *runtime_sym_lookup(
             dlsym_lookup);
 
     assert(f->getParent() != NULL);
-    f->getBasicBlockList().push_back(dlsym_lookup);
+    dlsym_lookup->insertInto(f);
     irbuilder.SetInsertPoint(dlsym_lookup);
     Instruction *llvmf;
-    Value *nameval = stringConstPtr(emission_context, irbuilder, f_name);
-    if (lib_expr) {
-        jl_cgval_t libval = emit_expr(*ctx, lib_expr);
-        llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jllazydlsym_func),
-                    { boxed(*ctx, libval), nameval });
-    }
-    else {
+    if (libptrgv) {
+        // Call jl_load_and_lookup
+        assert(symarg.f_name);
         Value *libname;
-        if (runtime_lib) {
-            libname = stringConstPtr(emission_context, irbuilder, f_lib);
-        }
-        else {
+        if (runtime_lib)
+            libname = stringConstPtr(emission_context, irbuilder, symarg.f_lib);
+        else
             // f_lib is actually one of the special sentinel values
-            libname = ConstantExpr::getIntToPtr(ConstantInt::get(emission_context.DL.getIntPtrType(irbuilder.getContext()), (uintptr_t)f_lib), getInt8PtrTy(irbuilder.getContext()));
-        }
-        llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
+            libname = ConstantExpr::getIntToPtr(ConstantInt::get(emission_context.DL.getIntPtrType(irbuilder.getContext()), (uintptr_t)symarg.f_lib), getPointerTy(irbuilder.getContext()));
+        Value *nameval = stringConstPtr(emission_context, irbuilder, symarg.f_name);
+        auto lookup = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
                     { libname, nameval, libptrgv });
+        llvmf = lookup;
+        setName(emission_context, llvmf, symarg.f_name + StringRef(".found"));
+    }
+    else {
+        // Call jl_lazy_load_and_lookup
+        assert(pctx);
+        jl_codectx_t &ctx = *pctx;
+        Value *fname_val;
+        if (symarg.f_name)
+            fname_val = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_symbol(symarg.f_name)));
+        else
+            fname_val = boxed(ctx, emit_expr(ctx, symarg.f_name_expr));
+        Value *lib_val;
+        if (symarg.f_lib)
+            lib_val = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_symbol(symarg.f_lib)));
+        else if (symarg.f_lib_expr)
+            // n.b. f_lib_expr is required to be something simple here (from
+            // resolve_definition_effects validation) such as a globalref or a
+            // quote node for example, not a general expression
+            lib_val = boxed(ctx, emit_expr(ctx, symarg.f_lib_expr));
+        else
+            lib_val = ConstantPointerNull::get(ctx.types().T_prjlvalue);
+        llvmf = irbuilder.CreateCall(prepare_call(jllazydlsym_func), {lib_val, fname_val});
     }
-    setName(emission_context, llvmf, f_name + StringRef(".found"));
     StoreInst *store = irbuilder.CreateAlignedStore(llvmf, llvmgv, Align(sizeof(void*)));
     store->setAtomic(AtomicOrdering::Release);
     irbuilder.CreateBr(ccall_bb);
 
-    f->getBasicBlockList().push_back(ccall_bb);
+    ccall_bb->insertInto(f);
     irbuilder.SetInsertPoint(ccall_bb);
     PHINode *p = irbuilder.CreatePHI(T_pvoidfunc, 2);
     p->addIncoming(llvmf_orig, enter_bb);
     p->addIncoming(llvmf, llvmf->getParent());
-    setName(emission_context, p, f_name);
-    return irbuilder.CreateBitCast(p, funcptype);
+    return p;
 }
 
 static Value *runtime_sym_lookup(
         jl_codectx_t &ctx,
-        PointerType *funcptype, const char *f_lib, jl_value_t *lib_expr,
-        const char *f_name, Function *f,
-        GlobalVariable *libptrgv,
-        GlobalVariable *llvmgv, bool runtime_lib)
+        const native_sym_arg_t &symarg, Function *f)
 {
-    return runtime_sym_lookup(ctx.emission_context, ctx.builder, &ctx, funcptype, f_lib, lib_expr,
-                              f_name, f, libptrgv, llvmgv, runtime_lib);
-}
-
-static Value *runtime_sym_lookup(
-        jl_codectx_t &ctx,
-        PointerType *funcptype, const char *f_lib, jl_value_t *lib_expr,
-        const char *f_name, Function *f)
-{
-    auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(ctx.builder.getContext());
     GlobalVariable *libptrgv;
     GlobalVariable *llvmgv;
-    bool runtime_lib;
-    if (lib_expr) {
-        // for computed library names, generate a global variable to cache the function
-        // pointer just for this call site.
-        runtime_lib = true;
-        libptrgv = NULL;
-        std::string gvname = "libname_";
-        gvname += f_name;
-        gvname += "_";
-        gvname += std::to_string(jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1));
-        llvmgv = new GlobalVariable(*jl_Module, T_pvoidfunc, false,
-                                    GlobalVariable::ExternalLinkage,
-                                    Constant::getNullValue(T_pvoidfunc), gvname);
-    }
-    else {
-        runtime_lib = runtime_sym_gvs(ctx, f_lib, f_name, libptrgv, llvmgv);
+    bool runtime_lib = runtime_sym_gvs(ctx, symarg, libptrgv, llvmgv);
+    if (libptrgv) {
         libptrgv = prepare_global_in(jl_Module, libptrgv);
+        llvmgv = prepare_global_in(jl_Module, llvmgv);
     }
-    llvmgv = prepare_global_in(jl_Module, llvmgv);
-    return runtime_sym_lookup(ctx, funcptype, f_lib, lib_expr, f_name, f, libptrgv, llvmgv, runtime_lib);
+    return runtime_sym_lookup(ctx.emission_context, ctx.builder, &ctx, symarg, f, libptrgv, llvmgv, runtime_lib);
 }
 
 // Emit a "PLT" entry that will be lazily initialized
@@ -233,39 +258,51 @@ static Value *runtime_sym_lookup(
 static GlobalVariable *emit_plt_thunk(
         jl_codectx_t &ctx,
         FunctionType *functype, const AttributeList &attrs,
-        CallingConv::ID cc, const char *f_lib, const char *f_name,
+        CallingConv::ID cc, const native_sym_arg_t &symarg,
         GlobalVariable *libptrgv, GlobalVariable *llvmgv,
         bool runtime_lib)
 {
     ++PLTThunks;
-    auto M = &ctx.emission_context.shared_module();
-    PointerType *funcptype = PointerType::get(functype, 0);
-    libptrgv = prepare_global_in(M, libptrgv);
-    llvmgv = prepare_global_in(M, llvmgv);
+    bool shared = libptrgv != nullptr;
+    assert(shared && "not yet supported by runtime_sym_lookup");
+    Module *M = shared ? &ctx.emission_context.shared_module() : jl_Module;
+    if (shared) {
+        assert(symarg.f_name);
+        libptrgv = prepare_global_in(M, libptrgv);
+        llvmgv = prepare_global_in(M, llvmgv);
+    }
     std::string fname;
-    raw_string_ostream(fname) << "jlplt_" << f_name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+    if (symarg.f_name)
+        raw_string_ostream(fname) << "jlplt_" << symarg.f_name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+    else
+        raw_string_ostream(fname) << "jldynplt_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
     Function *plt = Function::Create(functype,
-                                     GlobalVariable::ExternalLinkage,
+                                     GlobalVariable::PrivateLinkage,
                                      fname, M);
     plt->setAttributes(attrs);
     if (cc != CallingConv::C)
         plt->setCallingConv(cc);
-    fname += "_got";
-    auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(M->getContext());
+    auto T_pvoidfunc = getPointerTy(M->getContext());
     GlobalVariable *got = new GlobalVariable(*M, T_pvoidfunc, false,
-                                             GlobalVariable::ExternalLinkage,
-                                             ConstantExpr::getBitCast(plt, T_pvoidfunc),
-                                             fname);
+                                             shared ? GlobalVariable::ExternalLinkage : GlobalVariable::PrivateLinkage,
+                                             plt,
+                                             fname + "_got");
+    if (shared) {
+        if (runtime_lib)
+            got->addAttribute("julia.libname", symarg.f_lib);
+        else
+            got->addAttribute("julia.libidx", std::to_string((uintptr_t) symarg.f_lib));
+        got->addAttribute("julia.fname", symarg.f_name);
+    }
     BasicBlock *b0 = BasicBlock::Create(M->getContext(), "top", plt);
     IRBuilder<> irbuilder(b0);
-    Value *ptr = runtime_sym_lookup(ctx.emission_context, irbuilder, NULL, funcptype, f_lib, NULL, f_name, plt, libptrgv,
+    Value *ptr = runtime_sym_lookup(ctx.emission_context, irbuilder, NULL, symarg, plt, libptrgv,
                                     llvmgv, runtime_lib);
-    StoreInst *store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, Align(sizeof(void*)));
+    StoreInst *store = irbuilder.CreateAlignedStore(ptr, got, Align(sizeof(void*)));
     store->setAtomic(AtomicOrdering::Release);
     SmallVector<Value*, 16> args;
-    for (Function::arg_iterator arg = plt->arg_begin(), arg_e = plt->arg_end(); arg != arg_e; ++arg)
-        args.push_back(&*arg);
-    assert(cast<PointerType>(ptr->getType())->isOpaqueOrPointeeTypeMatches(functype));
+    for (auto &arg : plt->args())
+        args.push_back(&arg);
     CallInst *ret = irbuilder.CreateCall(
         functype,
         ptr, ArrayRef<Value*>(args));
@@ -304,34 +341,34 @@ static Value *emit_plt(
         jl_codectx_t &ctx,
         FunctionType *functype,
         const AttributeList &attrs,
-        CallingConv::ID cc, const char *f_lib, const char *f_name)
+        CallingConv::ID cc, const native_sym_arg_t &symarg)
 {
     ++PLT;
-    assert(ctx.emission_context.imaging);
     // Don't do this for vararg functions so that the `musttail` is only
     // an optimization and is not required to function correctly.
     assert(!functype->isVarArg());
     GlobalVariable *libptrgv;
     GlobalVariable *llvmgv;
-    bool runtime_lib = runtime_sym_gvs(ctx, f_lib, f_name, libptrgv, llvmgv);
-    PointerType *funcptype = PointerType::get(functype, 0);
+    bool runtime_lib = runtime_sym_gvs(ctx, symarg, libptrgv, llvmgv);
+    if (!libptrgv)
+        return runtime_sym_lookup(ctx, symarg, ctx.f);
 
     auto &pltMap = ctx.emission_context.allPltMap[attrs];
     auto key = std::make_tuple(llvmgv, functype, cc);
     GlobalVariable *&sharedgot = pltMap[key];
     if (!sharedgot) {
         sharedgot = emit_plt_thunk(ctx,
-                functype, attrs, cc, f_lib, f_name, libptrgv, llvmgv, runtime_lib);
+                functype, attrs, cc, symarg, libptrgv, llvmgv, runtime_lib);
     }
     GlobalVariable *got = prepare_global_in(jl_Module, sharedgot);
     LoadInst *got_val = ctx.builder.CreateAlignedLoad(got->getValueType(), got, Align(sizeof(void*)));
-    setName(ctx.emission_context, got_val, f_name);
+    setName(ctx.emission_context, got_val, symarg.f_name);
     // See comment in `runtime_sym_lookup` above. This in principle needs a
     // consume ordering too. This is even less likely to cause issues though
     // since the only thing we do to this loaded pointer is to call it
     // immediately.
     got_val->setAtomic(AtomicOrdering::Unordered);
-    return ctx.builder.CreateBitCast(got_val, funcptype);
+    return got_val;
 }
 
 // --- ABI Implementations ---
@@ -368,6 +405,7 @@ static bool is_native_simd_type(jl_datatype_t *dt) {
 
 #include "abi_arm.cpp"
 #include "abi_aarch64.cpp"
+#include "abi_riscv.cpp"
 #include "abi_ppc64le.cpp"
 #include "abi_win32.cpp"
 #include "abi_win64.cpp"
@@ -376,22 +414,24 @@ static bool is_native_simd_type(jl_datatype_t *dt) {
 
 #if defined ABI_LLVM
   typedef ABI_LLVMLayout DefaultAbiState;
-#elif defined _CPU_X86_64_
-#  if defined _OS_WINDOWS_
+#elif defined _OS_WINDOWS_
+#  if defined _CPU_X86_64_
      typedef ABI_Win64Layout DefaultAbiState;
-#  else
-     typedef ABI_x86_64Layout DefaultAbiState;
-#  endif
-#elif defined _CPU_X86_
-#  if defined _OS_WINDOWS_
+#  elif defined _CPU_X86_
      typedef ABI_Win32Layout DefaultAbiState;
 #  else
-     typedef ABI_x86Layout DefaultAbiState;
+#    error Windows is currently only supported on x86 and x86_64
 #  endif
+#elif defined _CPU_X86_64_
+  typedef ABI_x86_64Layout DefaultAbiState;
+#elif defined _CPU_X86_
+  typedef ABI_x86Layout DefaultAbiState;
 #elif defined _CPU_ARM_
   typedef ABI_ARMLayout DefaultAbiState;
 #elif defined _CPU_AARCH64_
   typedef ABI_AArch64Layout DefaultAbiState;
+#elif defined _CPU_RISCV64_
+  typedef ABI_RiscvLayout DefaultAbiState;
 #elif defined _CPU_PPC64_
   typedef ABI_PPC64leLayout DefaultAbiState;
 #else
@@ -414,7 +454,7 @@ static Value *llvm_type_rewrite(
 
     assert(from_type->isPointerTy() == target_type->isPointerTy()); // expect that all ABIs consider all pointers to be equivalent
     if (target_type->isPointerTy())
-        return emit_bitcast(ctx, v, target_type);
+        return v;
 
     // simple integer and float widening & conversion cases
     if (from_type->getPrimitiveSizeInBits() > 0 &&
@@ -442,24 +482,13 @@ static Value *llvm_type_rewrite(
     // we need to use this alloca copy trick instead
     // On ARM and AArch64, the ABI requires casting through memory to different
     // sizes.
-    Value *from;
-    Value *to;
     const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
-    unsigned align = std::max(DL.getPrefTypeAlignment(target_type), DL.getPrefTypeAlignment(from_type));
-    if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
-        to = emit_static_alloca(ctx, target_type);
-        setName(ctx.emission_context, to, "type_rewrite_buffer");
-        cast<AllocaInst>(to)->setAlignment(Align(align));
-        from = emit_bitcast(ctx, to, from_type->getPointerTo());
-    }
-    else {
-        from = emit_static_alloca(ctx, from_type);
-        setName(ctx.emission_context, from, "type_rewrite_buffer");
-        cast<AllocaInst>(from)->setAlignment(Align(align));
-        to = emit_bitcast(ctx, from, target_type->getPointerTo());
-    }
-    ctx.builder.CreateAlignedStore(v, from, Align(align));
-    auto pun = ctx.builder.CreateAlignedLoad(target_type, to, Align(align));
+    Align align = std::max(DL.getPrefTypeAlign(target_type), DL.getPrefTypeAlign(from_type));
+    size_t nb = std::max(DL.getTypeAllocSize(target_type), DL.getTypeAllocSize(from_type));
+    AllocaInst *cast = emit_static_alloca(ctx, nb, align);
+    setName(ctx.emission_context, cast, "type_rewrite_buffer");
+    ctx.builder.CreateAlignedStore(v, cast, align);
+    auto pun = ctx.builder.CreateAlignedLoad(target_type, cast, align);
     setName(ctx.emission_context, pun, "type_rewrite");
     return pun;
 }
@@ -473,12 +502,9 @@ static Value *runtime_apply_type_env(jl_codectx_t &ctx, jl_value_t *ty)
     Value *args[] = {
         literal_pointer_val(ctx, ty),
         literal_pointer_val(ctx, (jl_value_t*)ctx.linfo->def.method->sig),
-        ctx.builder.CreateInBoundsGEP(
-                ctx.types().T_prjlvalue,
-                ctx.spvals_ptr,
-                ConstantInt::get(ctx.types().T_size, sizeof(jl_svec_t) / sizeof(jl_value_t*)))
+        emit_ptrgep(ctx, maybe_decay_tracked(ctx, ctx.spvals_ptr), sizeof(jl_svec_t))
     };
-    auto call = ctx.builder.CreateCall(prepare_call(jlapplytype_func), makeArrayRef(args));
+    auto call = ctx.builder.CreateCall(prepare_call(jlapplytype_func), ArrayRef<Value*>(args));
     addRetAttr(call, Attribute::getWithAlignment(ctx.builder.getContext(), Align(16)));
     return call;
 }
@@ -488,15 +514,16 @@ static const std::string make_errmsg(const char *fname, int n, const char *err)
     std::string _msg;
     raw_string_ostream msg(_msg);
     msg << fname;
-    if (n > 0)
-        msg << " argument " << n;
-    else
+    if (n > 0) {
+        msg << " argument ";
+        msg << n;
+    } else
         msg << " return";
     msg << err;
     return msg.str();
 }
 
-static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_value_t *jlto, jl_unionall_t *jlto_env, int argn)
+static jl_cgval_t typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_value_t *jlto, jl_unionall_t *jlto_env, int argn)
 {
     if (jlto != (jl_value_t*)jl_any_type && !jl_subtype(jvinfo.typ, jlto)) {
         if (jlto == (jl_value_t*)jl_voidpointer_type) {
@@ -504,6 +531,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
             if (!jl_is_cpointer_type(jvinfo.typ)) {
                 // emit a typecheck, if not statically known to be correct
                 emit_cpointercheck(ctx, jvinfo, make_errmsg("ccall", argn + 1, ""));
+                return update_julia_type(ctx, jvinfo, (jl_value_t*)jl_pointer_type);
             }
         }
         else {
@@ -528,8 +556,10 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
                 ctx.builder.CreateUnreachable();
                 ctx.builder.SetInsertPoint(passBB);
             }
+            return update_julia_type(ctx, jvinfo, jlto);
         }
     }
+    return jvinfo;
 }
 
 // Emit code to convert argument to form expected by C ABI
@@ -539,7 +569,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
 static Value *julia_to_native(
         jl_codectx_t &ctx,
         Type *to, bool toboxed, jl_value_t *jlto, jl_unionall_t *jlto_env,
-        const jl_cgval_t &jvinfo,
+        jl_cgval_t jvinfo,
         bool byRef, int argn)
 {
     // We're passing Any
@@ -549,82 +579,55 @@ static Value *julia_to_native(
     }
     assert(jl_is_datatype(jlto) && jl_struct_try_layout((jl_datatype_t*)jlto));
 
-    typeassert_input(ctx, jvinfo, jlto, jlto_env, argn);
+    jvinfo = typeassert_input(ctx, jvinfo, jlto, jlto_env, argn);
     if (!byRef)
         return emit_unbox(ctx, to, jvinfo, jlto);
 
     // pass the address of an alloca'd thing, not a box
     // since those are immutable.
-    Value *slot = emit_static_alloca(ctx, to);
+    Align align(julia_alignment(jlto));
+    Value *slot = emit_static_alloca(ctx, to, align);
     setName(ctx.emission_context, slot, "native_convert_buffer");
-    if (!jvinfo.ispointer()) {
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
-        ai.decorateInst(ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot));
-    }
-    else {
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
-        emit_memcpy(ctx, slot, ai, jvinfo, jl_datatype_size(jlto), julia_alignment(jlto));
-    }
+    emit_unbox_store(ctx, jvinfo, slot, ctx.tbaa().tbaa_stack, align, align);
     return slot;
 }
 
-typedef struct {
-    Value *jl_ptr;  // if the argument is a run-time computed pointer
-    void (*fptr)(void);     // if the argument is a constant pointer
-    const char *f_name;   // if the symbol name is known
-    const char *f_lib;    // if a library name is specified
-    jl_value_t *lib_expr; // expression to compute library path lazily
-    jl_value_t *gcroot;
-} native_sym_arg_t;
-
-static inline const char *invalid_symbol_err_msg(bool ccall)
-{
-    return ccall ?
-        "ccall: first argument not a pointer or valid constant expression" :
-        "cglobal: first argument not a pointer or valid constant expression";
-}
-
 // --- parse :sym or (:sym, :lib) argument into address info ---
-static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_value_t *arg, bool ccall, bool llvmcall)
+static void interpret_cglobal_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_value_t *arg)
 {
     Value *&jl_ptr = out.jl_ptr;
-    void (*&fptr)(void) = out.fptr;
     const char *&f_name = out.f_name;
     const char *&f_lib = out.f_lib;
-
     jl_value_t *ptr = static_eval(ctx, arg);
     if (ptr == NULL) {
         if (jl_is_expr(arg) && ((jl_expr_t*)arg)->head == jl_call_sym && jl_expr_nargs(arg) == 3 &&
             jl_is_globalref(jl_exprarg(arg,0)) && jl_globalref_mod(jl_exprarg(arg,0)) == jl_core_module &&
             jl_globalref_name(jl_exprarg(arg,0)) == jl_symbol("tuple")) {
-            // attempt to interpret a non-constant 2-tuple expression as (func_name, lib_name()), where
-            // `lib_name()` will be executed when first used.
-            jl_value_t *name_val = static_eval(ctx, jl_exprarg(arg,1));
-            if (name_val && jl_is_symbol(name_val)) {
-                f_name = jl_symbol_name((jl_sym_t*)name_val);
-                out.lib_expr = jl_exprarg(arg, 2);
-                return;
+            // attempt to interpret a non-constant 2-tuple expression as (func_name, lib_name)
+            out.f_name_expr = jl_exprarg(arg, 1);
+            out.f_lib_expr = jl_exprarg(arg, 2);
+            jl_value_t *name_val = static_eval(ctx, out.f_name_expr);
+            out.gcroot[0] = name_val;
+            if (name_val) {
+                if (jl_is_symbol(name_val))
+                    f_name = jl_symbol_name((jl_sym_t*)name_val);
+                else if (jl_is_string(name_val))
+                    f_name = jl_string_data(name_val);
             }
-            else if (name_val && jl_is_string(name_val)) {
-                f_name = jl_string_data(name_val);
-                out.gcroot = name_val;
-                out.lib_expr = jl_exprarg(arg, 2);
-                return;
+            jl_value_t *lib_val = static_eval(ctx, out.f_lib_expr);
+            out.gcroot[1] = lib_val;
+            if (lib_val) {
+                if (jl_is_symbol(lib_val))
+                    f_lib = jl_symbol_name((jl_sym_t*)lib_val);
+                else if (jl_is_string(lib_val))
+                    f_lib = jl_string_data(lib_val);
             }
         }
-        jl_cgval_t arg1 = emit_expr(ctx, arg);
-        jl_value_t *ptr_ty = arg1.typ;
-        if (!jl_is_cpointer_type(ptr_ty)) {
-            const char *errmsg = invalid_symbol_err_msg(ccall);
-            emit_cpointercheck(ctx, arg1, errmsg);
-        }
-        arg1 = update_julia_type(ctx, arg1, (jl_value_t*)jl_voidpointer_type);
-        jl_ptr = emit_unbox(ctx, ctx.types().T_size, arg1, (jl_value_t*)jl_voidpointer_type);
     }
     else {
-        out.gcroot = ptr;
         if (jl_is_tuple(ptr) && jl_nfields(ptr) == 1) {
             ptr = jl_fieldref(ptr, 0);
+            out.gcroot[0] = ptr;
         }
 
         if (jl_is_symbol(ptr))
@@ -635,23 +638,15 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
         if (f_name != NULL) {
             // just symbol, default to JuliaDLHandle
             // will look in process symbol table
-            if (!llvmcall) {
-                void *symaddr;
-                std::string iname("i");
-                iname += f_name;
-                if (jl_dlsym(jl_libjulia_internal_handle, iname.c_str(), &symaddr, 0)) {
-                    f_lib = JL_LIBJULIA_INTERNAL_DL_LIBNAME;
-                    f_name = jl_symbol_name(jl_symbol(iname.c_str()));
-                }
-                else {
-                    f_lib = jl_dlfind(f_name);
-                }
-            }
+            f_lib = jl_dlfind(f_name);
+            out.f_name_expr = jl_new_struct(jl_quotenode_type, ptr);
+            out.gcroot[0] = out.f_name_expr;
         }
         else if (jl_is_cpointer_type(jl_typeof(ptr))) {
-            fptr = *(void(**)(void))jl_data_ptr(ptr);
+            uint64_t fptr = (uintptr_t)*(void(**)(void))jl_data_ptr(ptr);
+            jl_ptr = ConstantExpr::getIntToPtr(ConstantInt::get(ctx.types().T_size, fptr), ctx.types().T_ptr);
         }
-        else if (jl_is_tuple(ptr) && jl_nfields(ptr) > 1) {
+        else if (jl_is_tuple(ptr) && jl_nfields(ptr) == 2) {
             jl_value_t *t0 = jl_fieldref(ptr, 0);
             if (jl_is_symbol(t0))
                 f_name = jl_symbol_name((jl_sym_t*)t0);
@@ -663,15 +658,110 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
                 f_lib = jl_symbol_name((jl_sym_t*)t1);
             else if (jl_is_string(t1))
                 f_lib = jl_string_data(t1);
-            else
-                f_name = NULL;
+
+            out.f_name_expr = jl_new_struct(jl_quotenode_type, t0);
+            out.gcroot[0] = out.f_name_expr;
+            out.f_lib_expr = jl_new_struct(jl_quotenode_type, t1);
+            out.gcroot[1] = out.f_lib_expr;
+        }
+    }
+}
+
+static void interpret_ccall_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_value_t *arg)
+{
+    // Initialize all fields to safe defaults
+    out.f_name = nullptr;
+    out.f_lib = nullptr;
+    out.f_name_expr = nullptr;
+    out.f_lib_expr = nullptr;
+    out.jl_ptr = nullptr;
+    out.gcroot[0] = nullptr;
+    out.gcroot[1] = nullptr;
+
+    // Check if this is a tuple (normalized by julia-syntax.scm)
+    if (jl_is_expr(arg) && ((jl_expr_t*)arg)->head == jl_symbol("tuple")) {
+        size_t nargs = jl_expr_nargs(arg);
+        jl_array_t *tuple_args = ((jl_expr_t*)arg)->args;
+
+        if (nargs == 1) {
+            // Single element tuple: (func_name,) - use default library
+            jl_value_t *fname_arg = jl_array_ptr_ref(tuple_args, 0);
+            jl_value_t *fname_val = static_eval(ctx, fname_arg);
+            // Dynamic resolution - single function name expression, will use default library at runtime
+            out.f_name_expr = fname_arg;
+
+            if (fname_val != nullptr) {
+                // Static resolution succeeded
+                out.gcroot[0] = fname_val;
+                if (jl_is_symbol(fname_val)) {
+                    out.f_name = jl_symbol_name((jl_sym_t*)fname_val);
+                }
+                else if (jl_is_string(fname_val)) {
+                    out.f_name = jl_string_data(fname_val);
+                }
+             }
+        }
+        else if (nargs == 2) {
+            // Two element tuple: (func_name, lib_name)
+            jl_value_t *fname_arg = jl_array_ptr_ref(tuple_args, 0);
+            jl_value_t *lib_arg = jl_array_ptr_ref(tuple_args, 1);
+            out.f_name_expr = fname_arg;
+            out.f_lib_expr = lib_arg;
+
+            jl_value_t *fname_val = static_eval(ctx, fname_arg);
+            jl_value_t *lib_val = static_eval(ctx, lib_arg);
+            if (fname_val != nullptr) {
+                // Static resolution for both
+                out.gcroot[0] = fname_val; // Keep function name for GC
+                if (jl_is_symbol(fname_val)) {
+                    out.f_name = jl_symbol_name((jl_sym_t*)fname_val);
+                }
+                else if (jl_is_string(fname_val)) {
+                    out.f_name = jl_string_data(fname_val);
+                }
+            }
+
+            if (lib_val != nullptr) {
+                out.gcroot[1] = lib_val;   // Keep library name for GC
+                if (jl_is_symbol(lib_val)) {
+                    out.f_lib = jl_symbol_name((jl_sym_t*)lib_val);
+                }
+                else if (jl_is_string(lib_val)) {
+                    out.f_lib = jl_string_data(lib_val);
+                }
+            }
+        }
+    }
+    else {
+        // Not a tuple - pointer expression
+        jl_cgval_t arg1 = emit_expr(ctx, arg);
+        jl_value_t *ptr_ty = arg1.typ;
+        if (!jl_is_cpointer_type(ptr_ty)) {
+            const char *errmsg = "ccall: first argument not a pointer or valid constant expression";
+            emit_cpointercheck(ctx, arg1, errmsg);
+        }
+        arg1 = update_julia_type(ctx, arg1, (jl_value_t*)jl_voidpointer_type);
+        out.jl_ptr = emit_unbox(ctx, ctx.types().T_ptr, arg1, (jl_value_t*)jl_voidpointer_type);
+    }
+
+    // Handle Julia internal symbol lookup for static function names
+    if (out.f_name != nullptr && out.f_lib_expr == nullptr) {
+        void *symaddr;
+        std::string iname("i");
+        iname += out.f_name;
+        if (jl_dlsym(jl_libjulia_internal_handle, iname.c_str(), &symaddr, 0, 0)) {
+            out.f_lib = JL_LIBJULIA_INTERNAL_DL_LIBNAME;
+            out.f_name = jl_symbol_name(jl_symbol(iname.c_str()));
+        }
+        else {
+            out.f_lib = jl_dlfind(out.f_name);
         }
     }
 }
 
 // --- code generator for cglobal ---
 
-static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const jl_cgval_t *argv, size_t nargs);
+static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, ArrayRef<jl_cgval_t> argv, size_t nargs);
 
 static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 {
@@ -680,13 +770,13 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
     jl_value_t *rt = NULL;
     Value *res;
     native_sym_arg_t sym = {};
-    JL_GC_PUSH2(&rt, &sym.gcroot);
+    JL_GC_PUSH3(&rt, &sym.gcroot[0], &sym.gcroot[1]);
 
     if (nargs == 2) {
         rt = static_eval(ctx, args[2]);
         if (rt == NULL) {
             JL_GC_POP();
-            jl_cgval_t argv[2] = {jl_cgval_t(), jl_cgval_t()};
+            jl_cgval_t argv[2];
             argv[0] = emit_expr(ctx, args[1]);
             argv[1] = emit_expr(ctx, args[2]);
             return emit_runtime_call(ctx, JL_I::cglobal, argv, nargs);
@@ -698,51 +788,25 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
     else {
         rt = (jl_value_t*)jl_voidpointer_type;
     }
-    Type *lrt = ctx.types().T_size;
-    assert(lrt == julia_type_to_llvm(ctx, rt));
-
-    interpret_symbol_arg(ctx, sym, args[1], /*ccall=*/false, false);
-
-    if (sym.f_name == NULL && sym.fptr == NULL && sym.jl_ptr == NULL && sym.gcroot != NULL) {
-        const char *errmsg = invalid_symbol_err_msg(/*ccall=*/false);
-        jl_cgval_t arg1 = emit_expr(ctx, args[1]);
-        emit_type_error(ctx, arg1, literal_pointer_val(ctx, (jl_value_t *)jl_pointer_type), errmsg);
-        JL_GC_POP();
-        return jl_cgval_t();
-    }
-
+    interpret_cglobal_symbol_arg(ctx, sym, args[1]);
     if (sym.jl_ptr != NULL) {
-        res = ctx.builder.CreateBitCast(sym.jl_ptr, lrt);
+        res = sym.jl_ptr;
     }
-    else if (sym.fptr != NULL) {
-        res = ConstantInt::get(lrt, (uint64_t)sym.fptr);
-        if (ctx.emission_context.imaging)
-            jl_printf(JL_STDERR,"WARNING: literal address used in cglobal for %s; code cannot be statically compiled\n", sym.f_name);
+    else if (sym.f_name_expr != NULL) {
+        res = runtime_sym_lookup(ctx, sym, ctx.f);
     }
     else {
-        if (sym.lib_expr) {
-            res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), NULL, sym.lib_expr, sym.f_name, ctx.f);
-        }
-        else if (ctx.emission_context.imaging) {
-            res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
-            res = ctx.builder.CreatePtrToInt(res, lrt);
-        }
-        else {
-            void *symaddr;
-
-            void* libsym = jl_get_library_(sym.f_lib, 0);
-            int symbol_found = jl_dlsym(libsym, sym.f_name, &symaddr, 0);
-            if (!libsym || !symbol_found) {
-                // Error mode, either the library or the symbol couldn't be find during compiletime.
-                // Fallback to a runtime symbol lookup.
-                res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
-                res = ctx.builder.CreatePtrToInt(res, lrt);
-            } else {
-                // since we aren't saving this code, there's no sense in
-                // putting anything complicated here: just JIT the address of the cglobal
-                res = ConstantInt::get(lrt, (uint64_t)symaddr);
-            }
-        }
+        // Fall back to runtime intrinsic
+        JL_GC_POP();
+        jl_cgval_t argv[2];
+        argv[0] = emit_expr(ctx, args[1]);
+        if (nargs == 2)
+            argv[1] = emit_expr(ctx, args[2]);
+        if (!jl_is_cpointer_type(argv[0].typ))
+            return emit_runtime_call(ctx, nargs == 1 ? JL_I::cglobal_auto : JL_I::cglobal, argv, nargs);
+        argv[0] = update_julia_type(ctx, argv[0], (jl_value_t*)jl_voidpointer_type);
+        sym.jl_ptr = emit_unbox(ctx, ctx.types().T_ptr, argv[0], (jl_value_t*)jl_voidpointer_type);
+        res = sym.jl_ptr;
     }
 
     JL_GC_POP();
@@ -767,7 +831,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     jl_value_t *ir_arg = args[1];
     JL_GC_PUSH4(&ir, &rt, &at, &entry);
     if (jl_is_ssavalue(ir_arg))
-        ir_arg = jl_arrayref((jl_array_t*)ctx.source->code, ((jl_ssavalue_t*)ir_arg)->id - 1);
+        ir_arg = jl_array_ptr_ref((jl_array_t*)ctx.source->code, ((jl_ssavalue_t*)ir_arg)->id - 1);
     ir = static_eval(ctx, ir_arg);
     if (!ir) {
         emit_error(ctx, "error statically evaluating llvm IR argument");
@@ -775,7 +839,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         return jl_cgval_t();
     }
     if (jl_is_ssavalue(args[2]) && !jl_is_long(ctx.source->ssavaluetypes)) {
-        jl_value_t *rtt = jl_arrayref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[2])->id - 1);
+        jl_value_t *rtt = jl_array_ptr_ref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[2])->id - 1);
         if (jl_is_type_type(rtt))
             rt = jl_tparam0(rtt);
     }
@@ -788,7 +852,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         }
     }
     if (jl_is_ssavalue(args[3]) && !jl_is_long(ctx.source->ssavaluetypes)) {
-        jl_value_t *att = jl_arrayref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[3])->id - 1);
+        jl_value_t *att = jl_array_ptr_ref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[3])->id - 1);
         if (jl_is_type_type(att))
             at = jl_tparam0(att);
     }
@@ -832,19 +896,14 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     JL_TYPECHK(llvmcall, type, rt);
     JL_TYPECHK(llvmcall, type, at);
 
-    // Generate arguments
-    std::string arguments;
-    raw_string_ostream argstream(arguments);
-    jl_svec_t *tt = ((jl_datatype_t*)at)->parameters;
-    jl_value_t *rtt = rt;
+    // Determine argument types
+    //
+    // Semantics for arguments are as follows:
+    // If the argument type is immutable (including bitstype), we pass the loaded llvm value
+    // type. Otherwise we pass a pointer to a jl_value_t.
+    jl_svec_t *tt = ((jl_datatype_t *)at)->parameters;
     size_t nargt = jl_svec_len(tt);
-
-    /*
-     * Semantics for arguments are as follows:
-     * If the argument type is immutable (including bitstype), we pass the loaded llvm value
-     * type. Otherwise we pass a pointer to a jl_value_t.
-     */
-    std::vector<llvm::Type*> argtypes;
+    SmallVector<llvm::Type*, 0> argtypes;
     SmallVector<Value *, 8> argvals(nargt);
     for (size_t i = 0; i < nargt; ++i) {
         jl_value_t *tti = jl_svecref(tt,i);
@@ -864,45 +923,91 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         argvals[i] = llvm_type_rewrite(ctx, v, t, issigned);
     }
 
+    // Determine return type
+    jl_value_t *rtt = rt;
     bool retboxed;
     Type *rettype = julia_type_to_llvm(ctx, rtt, &retboxed);
 
     // Make sure to find a unique name
     std::string ir_name;
     while (true) {
-        raw_string_ostream(ir_name) << (ctx.f->getName().str()) << "u" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+        raw_string_ostream(ir_name)
+            << (ctx.f->getName().str()) << "u"
+            << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
         if (jl_Module->getFunction(ir_name) == NULL)
             break;
     }
 
     // generate a temporary module that contains our IR
     std::unique_ptr<Module> Mod;
+    bool shouldDiscardValueNames = ctx.builder.getContext().shouldDiscardValueNames();
+    Function *f;
     if (entry == NULL) {
         // we only have function IR, which we should put in a function
 
-        bool first = true;
-        for (std::vector<Type *>::iterator it = argtypes.begin(); it != argtypes.end(); ++it) {
-            if (!first)
+        // stringify arguments
+        std::string arguments;
+        raw_string_ostream argstream(arguments);
+        for (SmallVector<Type *, 0>::iterator it = argtypes.begin(); it != argtypes.end(); ++it) {
+            if (it != argtypes.begin())
                 argstream << ",";
-            else
-                first = false;
             (*it)->print(argstream);
             argstream << " ";
         }
 
+        // stringify return type
         std::string rstring;
         raw_string_ostream rtypename(rstring);
         rettype->print(rtypename);
-        std::map<uint64_t,std::string> localDecls;
 
+        // generate IR function definition
         std::string ir_string;
         raw_string_ostream ir_stream(ir_string);
-        ir_stream << "; Number of arguments: " << nargt << "\n"
-        << "define "<<rtypename.str()<<" @\"" << ir_name << "\"("<<argstream.str()<<") {\n"
-        << jl_string_data(ir) << "\n}";
+        ir_stream << "define " << rtypename.str() << " @\"" << ir_name << "\"("
+                  << argstream.str() << ") {\n"
+                  << jl_string_data(ir) << "\n}";
 
         SMDiagnostic Err = SMDiagnostic();
+        ctx.builder.getContext().setDiscardValueNames(false);
         Mod = parseAssemblyString(ir_stream.str(), Err, ctx.builder.getContext());
+        ctx.builder.getContext().setDiscardValueNames(shouldDiscardValueNames);
+
+        // backwards compatibility: support for IR with integer pointers
+        if (!Mod) {
+            std::string compat_arguments;
+            raw_string_ostream compat_argstream(compat_arguments);
+            for (size_t i = 0; i < nargt; ++i) {
+                if (i > 0)
+                    compat_argstream << ",";
+                jl_value_t *tti = jl_svecref(tt, i);
+                Type *t;
+                if (jl_is_cpointer_type(tti))
+                    t = ctx.types().T_size;
+                else
+                    t = argtypes[i];
+                t->print(compat_argstream);
+                compat_argstream << " ";
+            }
+
+            std::string compat_rstring;
+            raw_string_ostream compat_rtypename(compat_rstring);
+            if (jl_is_cpointer_type(rtt))
+                ctx.types().T_size->print(compat_rtypename);
+            else
+                rettype->print(compat_rtypename);
+
+            std::string compat_ir_string;
+            raw_string_ostream compat_ir_stream(compat_ir_string);
+            compat_ir_stream << "define " << compat_rtypename.str() << " @\"" << ir_name
+                             << "\"(" << compat_argstream.str() << ") {\n"
+                             << jl_string_data(ir) << "\n}";
+
+            SMDiagnostic Err = SMDiagnostic();
+            ctx.builder.getContext().setDiscardValueNames(false);
+            Mod = parseAssemblyString(compat_ir_stream.str(), Err, ctx.builder.getContext());
+            ctx.builder.getContext().setDiscardValueNames(shouldDiscardValueNames);
+        }
+
         if (!Mod) {
             std::string message = "Failed to parse LLVM assembly: \n";
             raw_string_ostream stream(message);
@@ -912,7 +1017,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
             return jl_cgval_t();
         }
 
-        Function *f = Mod->getFunction(ir_name);
+        f = Mod->getFunction(ir_name);
         f->addFnAttr(Attribute::AlwaysInline);
     }
     else {
@@ -920,7 +1025,9 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
 
         if (jl_is_string(ir)) {
             SMDiagnostic Err = SMDiagnostic();
+            ctx.builder.getContext().setDiscardValueNames(false);
             Mod = parseAssemblyString(jl_string_data(ir), Err, ctx.builder.getContext());
+            ctx.builder.getContext().setDiscardValueNames(shouldDiscardValueNames);
             if (!Mod) {
                 std::string message = "Failed to parse LLVM assembly: \n";
                 raw_string_ostream stream(message);
@@ -932,7 +1039,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         }
         else {
             auto Buf = MemoryBuffer::getMemBuffer(
-                StringRef((char *)jl_array_data(ir), jl_array_len(ir)), "llvmcall",
+                StringRef(jl_array_data(ir, char), jl_array_nrows(ir)), "llvmcall",
                 /*RequiresNullTerminator*/ false);
             Expected<std::unique_ptr<Module>> ModuleOrErr =
                 parseBitcodeFile(*Buf, ctx.builder.getContext());
@@ -950,21 +1057,96 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
             Mod = std::move(ModuleOrErr.get());
         }
 
-        Function *f = Mod->getFunction(jl_string_data(entry));
+        f = Mod->getFunction(jl_string_data(entry));
         if (!f) {
             emit_error(ctx, "Module IR does not contain specified entry function");
             JL_GC_POP();
             return jl_cgval_t();
         }
+        assert(!f->isDeclaration());
         f->setName(ir_name);
+    }
 
-        // verify the function type
-        assert(!f->isDeclaration());
-        assert(f->getReturnType() == rettype);
-        int i = 0;
-        for (std::vector<Type *>::iterator it = argtypes.begin();
-            it != argtypes.end(); ++it, ++i)
-            assert(*it == f->getFunctionType()->getParamType(i));
+    // backwards compatibility: support for IR with integer pointers
+    bool mismatched_pointers = false;
+    for (size_t i = 0; i < nargt; ++i) {
+        jl_value_t *tti = jl_svecref(tt, i);
+        if (jl_is_cpointer_type(tti) &&
+            !f->getFunctionType()->getParamType(i)->isPointerTy()) {
+            mismatched_pointers = true;
+            break;
+        }
+    }
+    if (mismatched_pointers) {
+        if (jl_options.depwarn) {
+            if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
+                jl_error("llvmcall with integer pointers is deprecated, "
+                         "use an actual pointer type instead.");
+
+            // ensure we only depwarn once per method
+            // TODO: lift this into a reusable codegen-level depwarn utility
+            static std::set<jl_method_t*> llvmcall_depwarns;
+            jl_method_t *m = ctx.linfo->def.method;
+            if (llvmcall_depwarns.find(m) == llvmcall_depwarns.end()) {
+                llvmcall_depwarns.insert(m);
+                jl_printf(JL_STDERR,
+                        "WARNING: llvmcall with integer pointers is deprecated.\n"
+                        "Use actual pointers instead, replacing i32 or i64 with i8* or ptr\n"
+                        "in ");
+                jl_static_show(JL_STDERR, (jl_value_t*) ctx.linfo->def.method);
+                jl_printf(JL_STDERR, " at %s\n", ctx.file.str().c_str());
+            }
+        }
+
+        // wrap the function, performing the necessary pointer conversion
+
+        Function *inner = f;
+        inner->setName(ir_name + ".inner");
+
+        FunctionType *wrapper_ft = FunctionType::get(rettype, argtypes, false);
+        Function *wrapper =
+            Function::Create(wrapper_ft, inner->getLinkage(), ir_name, *Mod);
+
+        wrapper->copyAttributesFrom(inner);
+        inner->addFnAttr(Attribute::AlwaysInline);
+
+        BasicBlock *entry = BasicBlock::Create(ctx.builder.getContext(), "", wrapper);
+        IRBuilder<> irbuilder(entry);
+        SmallVector<Value *, 0> wrapper_args;
+        for (size_t i = 0; i < nargt; ++i) {
+            jl_value_t *tti = jl_svecref(tt, i);
+            Value *v = wrapper->getArg(i);
+            if (jl_is_cpointer_type(tti))
+                v = irbuilder.CreatePtrToInt(v, ctx.types().T_size);
+            wrapper_args.push_back(v);
+        }
+        Value *call = irbuilder.CreateCall(inner, wrapper_args);
+        // check if void
+        if (rettype->isVoidTy())
+            irbuilder.CreateRetVoid();
+        else {
+            if (jl_is_cpointer_type(rtt))
+                call = irbuilder.CreateIntToPtr(call, ctx.types().T_ptr);
+            irbuilder.CreateRet(call);
+        }
+
+        f = wrapper;
+    }
+
+    // verify the function type
+    assert(f->getReturnType() == rettype);
+    int i = 0;
+    for (SmallVector<Type *, 0>::iterator it = argtypes.begin(); it != argtypes.end();
+         ++it, ++i) {
+        if (*it != f->getFunctionType()->getParamType(i)) {
+            std::string message;
+            raw_string_ostream stream(message);
+            stream << "Malformed llvmcall: argument " << i + 1 << " type "
+                   << *f->getFunctionType()->getParamType(i)
+                   << " does not match expected argument type " << **it;
+            emit_error(ctx, stream.str());
+            return jl_cgval_t();
+        }
     }
 
     // copy module properties that should always match
@@ -1002,7 +1184,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     if (inst->getType() != rettype) {
         std::string message;
         raw_string_ostream stream(message);
-        stream << "llvmcall return type " << *inst->getType()
+        stream << "Malformed llvmcall: return type " << *inst->getType()
                << " does not match declared return type" << *rettype;
         emit_error(ctx, stream.str());
         return jl_cgval_t();
@@ -1019,8 +1201,9 @@ static Value *box_ccall_result(jl_codectx_t &ctx, Value *result, Value *runtime_
     // XXX: need to handle parameterized zero-byte types (singleton)
     const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
     unsigned nb = DL.getTypeStoreSize(result->getType());
+    unsigned align = sizeof(void*); // Allocations are at least pointer aligned
     MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
-    Value *strct = emit_allocobj(ctx, nb, runtime_dt);
+    Value *strct = emit_allocobj(ctx, nb, runtime_dt, true, align);
     setName(ctx.emission_context, strct, "ccall_result_box");
     init_bits_value(ctx, strct, result, tbaa);
     return strct;
@@ -1041,13 +1224,14 @@ static jl_cgval_t mark_or_box_ccall_result(jl_codectx_t &ctx, Value *result, boo
 
 class function_sig_t {
 public:
-    std::vector<Type*> fargt; // vector of llvm output types (julia_struct_to_llvm) for arguments
-    std::vector<Type*> fargt_sig; // vector of ABI coercion types for call signature
-    std::vector<bool> fargt_isboxed; // vector of whether the llvm output type is a Julia-box for each argument
-    std::vector<bool> byRefList; // vector of "byref" parameters
+    SmallVector<Type*, 0> fargt; // vector of llvm output types (julia_struct_to_llvm) for arguments
+    SmallVector<Type*, 0> fargt_sig; // vector of ABI coercion types for call signature
+    SmallVector<bool, 0> fargt_isboxed; // vector of whether the llvm output type is a Julia-box for each argument
+    SmallVector<bool, 0> byRefList; // vector of "byref" parameters
     AttributeList attributes; // vector of function call site attributes
     Type *lrt; // input parameter of the llvm return type (from julia_struct_to_llvm)
     bool retboxed; // input parameter indicating whether lrt is jl_value_t*
+    bool gc_safe; // input parameter indicating whether the call is safe to execute concurrently to GC
     Type *prt; // out parameter of the llvm return type for the function signature
     int sret; // out parameter for indicating whether return value has been moved to the first argument position
     std::string err_msg;
@@ -1060,8 +1244,8 @@ class function_sig_t {
     size_t nreqargs; // number of required arguments in ccall function definition
     jl_codegen_params_t *ctx;
 
-    function_sig_t(const char *fname, Type *lrt, jl_value_t *rt, bool retboxed, jl_svec_t *at, jl_unionall_t *unionall_env, size_t nreqargs, CallingConv::ID cc, bool llvmcall, jl_codegen_params_t *ctx)
-      : lrt(lrt), retboxed(retboxed),
+    function_sig_t(const char *fname, Type *lrt, jl_value_t *rt, bool retboxed, bool gc_safe, jl_svec_t *at, jl_unionall_t *unionall_env, size_t nreqargs, CallingConv::ID cc, bool llvmcall, jl_codegen_params_t *ctx)
+      : lrt(lrt), retboxed(retboxed), gc_safe(gc_safe),
         prt(NULL), sret(0), cc(cc), llvmcall(llvmcall),
         at(at), rt(rt), unionall_env(unionall_env),
         nccallargs(jl_svec_len(at)), nreqargs(nreqargs),
@@ -1073,7 +1257,7 @@ class function_sig_t {
     FunctionType *functype(LLVMContext &ctxt) const {
         assert(err_msg.empty());
         if (nreqargs > 0)
-            return FunctionType::get(sret ? getVoidTy(ctxt) : prt, makeArrayRef(fargt_sig).slice(0, nreqargs), true);
+            return FunctionType::get(sret ? getVoidTy(ctxt) : prt, ArrayRef<Type*>(fargt_sig).slice(0, nreqargs), true);
         else
             return FunctionType::get(sret ? getVoidTy(ctxt) : prt, fargt_sig, false);
     }
@@ -1082,7 +1266,7 @@ class function_sig_t {
             jl_codectx_t &ctx,
             const native_sym_arg_t &symarg,
             jl_cgval_t *argv,
-            SmallVector<Value*, 16> &gc_uses,
+            SmallVectorImpl<Value*> &gc_uses,
             bool static_rt) const;
 
 private:
@@ -1090,7 +1274,7 @@ std::string generate_func_sig(const char *fname)
 {
     assert(rt && !jl_is_abstract_ref_type(rt));
 
-    std::vector<AttributeSet> paramattrs;
+    SmallVector<AttributeSet, 0> paramattrs;
     std::unique_ptr<AbiLayout> abi;
     if (llvmcall)
         abi.reset(new ABI_LLVMLayout());
@@ -1115,7 +1299,7 @@ std::string generate_func_sig(const char *fname)
             }
             retattrs.addAttribute(Attribute::NoAlias);
             paramattrs.push_back(AttributeSet::get(LLVMCtx, retattrs));
-            fargt_sig.push_back(PointerType::get(lrt, 0));
+            fargt_sig.push_back(PointerType::get(LLVMCtx, 0));
             sret = 1;
             prt = lrt;
         }
@@ -1133,7 +1317,7 @@ std::string generate_func_sig(const char *fname)
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            t = getInt8PtrTy(LLVMCtx);
+            t = getPointerTy(LLVMCtx);
             isboxed = false;
         }
         else if (llvmcall && jl_is_llvmpointer_type(tti)) {
@@ -1142,26 +1326,26 @@ std::string generate_func_sig(const char *fname)
             isboxed = false;
         }
         else {
-            if (jl_is_primitivetype(tti)) {
+            t = _julia_struct_to_llvm(ctx, LLVMCtx, tti, &isboxed, llvmcall);
+            if (t == getVoidTy(LLVMCtx)) {
+                return make_errmsg(fname, i + 1, " type doesn't correspond to a C type");
+            }
+            if (jl_is_primitivetype(tti) && t->isIntegerTy()) {
                 // see pull req #978. need to annotate signext/zeroext for
                 // small integer arguments.
                 jl_datatype_t *bt = (jl_datatype_t*)tti;
-                if (jl_datatype_size(bt) < 4 && bt != jl_float16_type) {
+                if (jl_datatype_size(bt) < 4) {
                     if (jl_signed_type && jl_subtype(tti, (jl_value_t*)jl_signed_type))
                         ab.addAttribute(Attribute::SExt);
                     else
                         ab.addAttribute(Attribute::ZExt);
                 }
             }
-
-            t = _julia_struct_to_llvm(ctx, LLVMCtx, tti, &isboxed, llvmcall);
-            if (t == getVoidTy(LLVMCtx)) {
-                return make_errmsg(fname, i + 1, " type doesn't correspond to a C type");
-            }
         }
 
         Type *pat;
-        if (!jl_is_datatype(tti) || ((jl_datatype_t*)tti)->layout == NULL || jl_is_layout_opaque(((jl_datatype_t*)tti)->layout)) {
+        // n.b. `Array` used as argument type just passes a julia object reference
+        if (!jl_is_datatype(tti) || ((jl_datatype_t*)tti)->layout == NULL || jl_is_array_type(tti) || jl_is_layout_opaque(((jl_datatype_t*)tti)->layout)) {
             tti = (jl_value_t*)jl_voidpointer_type; // passed as pointer
         }
 
@@ -1173,7 +1357,7 @@ std::string generate_func_sig(const char *fname)
             pat = t;
         }
         else if (byRef) {
-            pat = PointerType::get(t, AddressSpace::Derived);
+            pat = PointerType::get(LLVMCtx, AddressSpace::Derived);
         }
         else {
             pat = abi->preferred_llvm_type((jl_datatype_t*)tti, false, LLVMCtx);
@@ -1209,6 +1393,7 @@ std::string generate_func_sig(const char *fname)
         RetAttrs = RetAttrs.addAttribute(LLVMCtx, Attribute::NonNull);
     if (rt == jl_bottom_type)
         FnAttrs = FnAttrs.addAttribute(LLVMCtx, Attribute::NoReturn);
+
     assert(attributes.isEmpty());
     attributes = AttributeList::get(LLVMCtx, FnAttrs, RetAttrs, paramattrs);
     return "";
@@ -1292,7 +1477,7 @@ static const std::string verify_ccall_sig(jl_value_t *&rt, jl_value_t *at,
     JL_TYPECHK(ccall, type, rt);
     JL_TYPECHK(ccall, simplevector, at);
 
-    if (rt == (jl_value_t*)jl_any_type || jl_is_array_type(rt) ||
+    if (rt == (jl_value_t*)jl_any_type || jl_is_array_type(rt) || jl_is_genericmemory_type(rt) ||
             (jl_is_datatype(rt) && ((jl_datatype_t*)rt)->layout != NULL &&
              jl_is_layout_opaque(((jl_datatype_t*)rt)->layout))) {
         // n.b. `Array` used as return type just returns a julia object reference
@@ -1326,7 +1511,7 @@ static const std::string verify_ccall_sig(jl_value_t *&rt, jl_value_t *at,
 
 const int fc_args_start = 6;
 
-// Expr(:foreigncall, pointer, rettype, (argtypes...), nreq, [cconv | (cconv, effects)], args..., roots...)
+// Expr(:foreigncall, pointer, rettype, (argtypes...), nreq, gc_safe, [cconv | (cconv, effects)], args..., roots...)
 static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 {
     JL_NARGSV(ccall, 5);
@@ -1338,46 +1523,27 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     assert(jl_is_quotenode(args[5]));
     jl_value_t *jlcc = jl_quotenode_value(args[5]);
     jl_sym_t *cc_sym = NULL;
+    bool gc_safe = false;
     if (jl_is_symbol(jlcc)) {
         cc_sym = (jl_sym_t*)jlcc;
     }
     else if (jl_is_tuple(jlcc)) {
         cc_sym = (jl_sym_t*)jl_get_nth_field_noalloc(jlcc, 0);
+        gc_safe = jl_unbox_bool(jl_get_nth_field_checked(jlcc, 2));
     }
     assert(jl_is_symbol(cc_sym));
     native_sym_arg_t symarg = {};
-    JL_GC_PUSH3(&rt, &at, &symarg.gcroot);
+    JL_GC_PUSH4(&rt, &at, &symarg.gcroot[0], &symarg.gcroot[1]);
 
     CallingConv::ID cc = CallingConv::C;
     bool llvmcall = false;
     std::tie(cc, llvmcall) = convert_cconv(cc_sym);
 
-    interpret_symbol_arg(ctx, symarg, args[1], /*ccall=*/true, llvmcall);
-    Value *&jl_ptr = symarg.jl_ptr;
-    void (*&fptr)(void) = symarg.fptr;
+    interpret_ccall_symbol_arg(ctx, symarg, args[1]);
     const char *&f_name = symarg.f_name;
     const char *&f_lib = symarg.f_lib;
 
-    if (f_name == NULL && fptr == NULL && jl_ptr == NULL) {
-        if (symarg.gcroot != NULL) { // static_eval(ctx, args[1]) could not be interpreted to a function pointer
-            const char *errmsg = invalid_symbol_err_msg(/*ccall=*/true);
-            jl_cgval_t arg1 = emit_expr(ctx, args[1]);
-            emit_type_error(ctx, arg1, literal_pointer_val(ctx, (jl_value_t *)jl_pointer_type), errmsg);
-        } else {
-            emit_error(ctx, "ccall: null function pointer");
-        }
-        JL_GC_POP();
-        return jl_cgval_t();
-    }
-
-    auto ccallarg = [=] (size_t i) {
-        assert(i < nccallargs && i + fc_args_start <= nargs);
-        return args[fc_args_start + i];
-    };
-
-    auto _is_libjulia_func = [&] (uintptr_t ptr, StringRef name) {
-        if ((uintptr_t)fptr == ptr)
-            return true;
+    auto _is_libjulia_func = [&f_lib, &f_name] (StringRef name) {
         if (f_lib) {
             if ((f_lib == JL_EXE_LIBNAME) || // preventing invalid pointer access
                 (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) ||
@@ -1395,27 +1561,30 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         }
         return f_name && f_name == name;
     };
-#define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), StringRef(XSTR(name)))
+#define is_libjulia_func(name) _is_libjulia_func(StringRef(XSTR(name)))
 
     // emit arguments
     SmallVector<jl_cgval_t, 4> argv(nccallargs);
     for (size_t i = 0; i < nccallargs; i++) {
         // Julia (expression) value of current parameter
-        jl_value_t *argi = ccallarg(i);
+        assert(i < nccallargs && i + fc_args_start <= nargs);
+        jl_value_t *argi = args[fc_args_start + i];
         argv[i] = emit_expr(ctx, argi);
+        if (argv[i].typ == jl_bottom_type) {
+            JL_GC_POP();
+            return jl_cgval_t();
+        }
     }
 
     // emit roots
-    SmallVector<Value*, 16> gc_uses;
+    SmallVector<Value*> gc_uses;
     for (size_t i = nccallargs + fc_args_start; i <= nargs; i++) {
         // Julia (expression) value of current parameter gcroot
         jl_value_t *argi_root = args[i];
         if (jl_is_long(argi_root))
             continue;
         jl_cgval_t arg_root = emit_expr(ctx, argi_root);
-        Value *gc_root = get_gc_root_for(arg_root);
-        if (gc_root)
-            gc_uses.push_back(gc_root);
+        gc_uses.append(get_gc_roots_for(ctx, arg_root));
     }
 
     jl_unionall_t *unionall = (jl_is_method(ctx.linfo->def.method) && jl_is_unionall(ctx.linfo->def.method->sig))
@@ -1462,8 +1631,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         return jl_cgval_t();
     }
     if (rt != args[2] && rt != (jl_value_t*)jl_any_type)
-        rt = jl_ensure_rooted(ctx, rt);
-    function_sig_t sig("ccall", lrt, rt, retboxed,
+        jl_temporary_root(ctx, rt);
+    function_sig_t sig("ccall", lrt, rt, retboxed, gc_safe,
                        (jl_svec_t*)at, unionall, nreqargs,
                        cc, llvmcall, &ctx.emission_context);
     for (size_t i = 0; i < nccallargs; i++) {
@@ -1479,25 +1648,16 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     // some special functions
     bool isVa = nreqargs > 0;
     (void)isVa; // prevent compiler warning
-    if (is_libjulia_func(jl_array_ptr)) {
-        ++CCALL_STAT(jl_array_ptr);
-        assert(lrt == ctx.types().T_size);
-        assert(!isVa && !llvmcall && nccallargs == 1);
-        const jl_cgval_t &ary = argv[0];
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, ctx.builder.CreatePtrToInt(emit_unsafe_arrayptr(ctx, ary), lrt),
-                                        retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_value_ptr)) {
+    if (is_libjulia_func(jl_value_ptr)) {
         ++CCALL_STAT(jl_value_ptr);
-        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == ctx.types().T_size);
+        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == ctx.types().T_ptr);
         assert(!isVa && !llvmcall && nccallargs == 1);
         jl_value_t *tti = jl_svecref(at, 0);
         Type *largty;
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            largty = ctx.types().T_size;
+            largty = ctx.types().T_ptr;
             isboxed = false;
         }
         else {
@@ -1506,11 +1666,10 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         Value *retval;
         if (isboxed) {
             retval = boxed(ctx, argv[0]);
-            retval = emit_pointer_from_objref(ctx, emit_bitcast(ctx, retval, ctx.types().T_prjlvalue));
+            retval = emit_pointer_from_objref(ctx, retval /*T_prjlvalue*/);
         }
         else {
             retval = emit_unbox(ctx, largty, argv[0], tti);
-            retval = emit_inttoptr(ctx, retval, ctx.types().T_pjlvalue);
         }
         // retval is now an untracked jl_value_t*
         if (retboxed)
@@ -1584,23 +1743,20 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
         return ghostValue(ctx, jl_nothing_type);
     }
-    else if (is_libjulia_func("jl_get_ptls_states")) {
+    else if (is_libjulia_func(jl_get_ptls_states)) {
         ++CCALL_STAT(jl_get_ptls_states);
-        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        return mark_or_box_ccall_result(ctx,
-            ctx.builder.CreatePtrToInt(get_current_ptls(ctx), lrt),
-            retboxed, rt, unionall, static_rt);
+        return mark_or_box_ccall_result(ctx, get_current_ptls(ctx), retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_threadid)) {
         ++CCALL_STAT(jl_threadid);
         assert(lrt == getInt16Ty(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), getInt16PtrTy(ctx.builder.getContext()));
+        Value *ptask = get_current_task(ctx);
         const int tid_offset = offsetof(jl_task_t, tid);
-        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int16_t)));
+        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptask, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int8_t)));
         setName(ctx.emission_context, ptid, "thread_id_ptr");
         LoadInst *tid = ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), ptid, Align(sizeof(int16_t)));
         setName(ctx.emission_context, tid, "thread_id");
@@ -1608,15 +1764,77 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         ai.decorateInst(tid);
         return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt);
     }
+    else if (is_libjulia_func(jl_get_ptls_rng)) {
+        ++CCALL_STAT(jl_get_ptls_rng);
+        assert(lrt == getInt64Ty(ctx.builder.getContext()));
+        assert(!isVa && !llvmcall && nccallargs == 0);
+        JL_GC_POP();
+        Value *ptls_p = get_current_ptls(ctx);
+        const int rng_offset = offsetof(jl_tls_states_t, rngseed);
+        Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t)));
+        setName(ctx.emission_context, rng_ptr, "rngseed_ptr");
+        LoadInst *rng_value = ctx.builder.CreateAlignedLoad(getInt64Ty(ctx.builder.getContext()), rng_ptr, Align(sizeof(void*)));
+        setName(ctx.emission_context, rng_value, "rngseed");
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        ai.decorateInst(rng_value);
+        return mark_or_box_ccall_result(ctx, rng_value, retboxed, rt, unionall, static_rt);
+    }
+    else if (is_libjulia_func(jl_set_ptls_rng)) {
+        ++CCALL_STAT(jl_set_ptls_rng);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
+        assert(!isVa && !llvmcall && nccallargs == 1);
+        JL_GC_POP();
+        Value *ptls_p = get_current_ptls(ctx);
+        const int rng_offset = offsetof(jl_tls_states_t, rngseed);
+        Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t)));
+        setName(ctx.emission_context, rng_ptr, "rngseed_ptr");
+        Value *val64 = emit_unbox(ctx, getInt64Ty(ctx.builder.getContext()), argv[0], (jl_value_t*)jl_uint64_type);
+        auto store = ctx.builder.CreateAlignedStore(val64, rng_ptr, Align(sizeof(void*)));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        ai.decorateInst(store);
+        return ghostValue(ctx, jl_nothing_type);
+    }
+    else if (is_libjulia_func(jl_get_tls_world_age)) {
+        ++CCALL_STAT(jl_get_tls_world_age);
+        assert(lrt == ctx.types().T_size);
+        assert(!isVa && !llvmcall && nccallargs == 0);
+        JL_GC_POP();
+        Value *world_age = get_tls_world_age(ctx);
+        return mark_or_box_ccall_result(ctx, world_age, retboxed, rt, unionall, static_rt);
+    }
+    else if (is_libjulia_func(jl_get_world_counter)) {
+        ++CCALL_STAT(jl_get_world_counter);
+        assert(lrt == ctx.types().T_size);
+        assert(!isVa && !llvmcall && nccallargs == 0);
+        JL_GC_POP();
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+
+        // jl_task_t *ct = jl_current_task;
+        // if (ct->ptls->in_pure_callback)
+        //     return ~(size_t)0;
+        // return jl_atomic_load_acquire(&jl_world_counter);
+        Type *T_int16 = getInt16Ty(ctx.builder.getContext());
+        Value *offset = ConstantInt::get(ctx.types().T_size, offsetof(jl_tls_states_t, in_pure_callback) / sizeof(int16_t));
+        Value *field_ptr = ctx.builder.CreateInBoundsGEP(T_int16, get_current_ptls(ctx), offset);
+        Instruction *in_pure_callback = ai.decorateInst(ctx.builder.CreateAlignedLoad(T_int16,
+            field_ptr, Align(sizeof(int16_t)), "in_pure_callback"));
+        Value *cond = ctx.builder.CreateICmpEQ(in_pure_callback, ConstantInt::get(T_int16, 0));
+
+        Value *world_counter = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+            prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
+        cast<LoadInst>(world_counter)->setOrdering(AtomicOrdering::Acquire);
+        Value *ret = ctx.builder.CreateSelect(cond, world_counter, ConstantInt::get(ctx.types().T_size, ~(size_t)0));
+        return mark_or_box_ccall_result(ctx, ret, retboxed, rt, unionall, static_rt);
+    }
     else if (is_libjulia_func(jl_gc_disable_finalizers_internal)
 #ifdef NDEBUG
              || is_libjulia_func(jl_gc_enable_finalizers_internal)
 #endif
              ) {
         JL_GC_POP();
-        Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), getInt32PtrTy(ctx.builder.getContext()));
+        Value *ptls_p = get_current_ptls(ctx);
         const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited);
-        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(ctx.types().T_size, finh_offset / 4));
+        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, finh_offset / sizeof(int8_t)));
         setName(ctx.emission_context, pfinh, "finalizers_inhibited_ptr");
         LoadInst *finh = ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), pfinh, Align(sizeof(int32_t)));
         setName(ctx.emission_context, finh, "finalizers_inhibited");
@@ -1639,7 +1857,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(lrt == ctx.types().T_prjlvalue);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        auto ct = track_pjlvalue(ctx, emit_bitcast(ctx, get_current_task(ctx), ctx.types().T_pjlvalue));
+        auto ct = track_pjlvalue(ctx, get_current_task(ctx));
         return mark_or_box_ccall_result(ctx, ct, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_set_next_task)) {
@@ -1647,7 +1865,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 1);
         JL_GC_POP();
-        Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), ctx.types().T_ppjlvalue);
+        Value *ptls_pv = get_current_ptls(ctx);
         const int nt_offset = offsetof(jl_tls_states_t, next_task);
         Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(ctx.types().T_size, nt_offset / sizeof(void*)));
         setName(ctx.emission_context, pnt, "next_task_ptr");
@@ -1700,154 +1918,57 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         ctx.builder.SetInsertPoint(checkBB);
         auto signal_page_load = ctx.builder.CreateLoad(
                 ctx.types().T_size,
-                ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size,
-                    get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1),
+                emit_ptrgep(ctx, get_current_signal_page_from_ptls(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const),
+                    -sizeof(size_t)),
                 true);
         setName(ctx.emission_context, signal_page_load, "signal_page_load");
         ctx.builder.CreateBr(contBB);
-        ctx.f->getBasicBlockList().push_back(contBB);
+        contBB->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(contBB);
         return ghostValue(ctx, jl_nothing_type);
     }
-    else if (is_libjulia_func(jl_svec_len)) {
-        ++CCALL_STAT(jl_svec_len);
-        assert(!isVa && !llvmcall && nccallargs == 1);
-        const jl_cgval_t &svecv = argv[0];
-        Value *len;
-        if (svecv.constant && svecv.typ == (jl_value_t*)jl_simplevector_type) {
-            // Check the type as well before we call
-            len = ConstantInt::get(ctx.types().T_size, jl_svec_len(svecv.constant));
-        }
-        else {
-            auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_size->getPointerTo());
-            setName(ctx.emission_context, ptr, "svec_len_ptr");
-            len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr);
-            setName(ctx.emission_context, len, "svec_len");
-            // Only mark with TBAA if we are sure about the type.
-            // This could otherwise be in a dead branch
-            if (svecv.typ == (jl_value_t*)jl_simplevector_type) {
-                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-                ai.decorateInst(cast<Instruction>(len));
-            }
-            MDBuilder MDB(ctx.builder.getContext());
-            auto rng = MDB.createRange(
-                Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, INTPTR_MAX / sizeof(void*) - 1));
-            cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
-        }
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, len, retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_svec_ref) && argv[1].typ == (jl_value_t*)jl_long_type) {
-        ++CCALL_STAT(jl_svec_ref);
-        assert(lrt == ctx.types().T_prjlvalue);
-        assert(!isVa && !llvmcall && nccallargs == 2);
-        const jl_cgval_t &svecv = argv[0];
-        const jl_cgval_t &idxv = argv[1];
-        Value *idx = emit_unbox(ctx, ctx.types().T_size, idxv, (jl_value_t*)jl_long_type);
-        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, 1));
-        setName(ctx.emission_context, idx, "svec_idx");
-        auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_pprjlvalue);
-        setName(ctx.emission_context, ptr, "svec_data_ptr");
-        Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue,
-                                                         decay_derived(ctx, ptr), idx);
-        setName(ctx.emission_context, slot_addr, "svec_slot_addr");
-        LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr,
-                                                       Align(sizeof(void*)));
-        setName(ctx.emission_context, load, "svec_slot");
-        load->setAtomic(AtomicOrdering::Unordered);
-        // Only mark with TBAA if we are sure about the type.
-        // This could otherwise be in a dead branch
-        if (svecv.typ == (jl_value_t*)jl_simplevector_type) {
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-            ai.decorateInst(load);
-        }
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, load, retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_array_isassigned) &&
-             argv[1].typ == (jl_value_t*)jl_ulong_type) {
-        ++CCALL_STAT(jl_array_isassigned);
-        assert(!isVa && !llvmcall && nccallargs == 2);
-        jl_value_t *aryex = ccallarg(0);
-        const jl_cgval_t &aryv = argv[0];
-        const jl_cgval_t &idxv = argv[1];
-        jl_datatype_t *arydt = (jl_datatype_t*)jl_unwrap_unionall(aryv.typ);
-        if (jl_is_array_type(arydt)) {
-            jl_value_t *ety = jl_tparam0(arydt);
-            bool ptrarray = !jl_stored_inline(ety);
-            if (!ptrarray && !jl_type_hasptr(ety)) {
-                JL_GC_POP();
-                return mark_or_box_ccall_result(ctx, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1),
-                                                false, rt, unionall, static_rt);
-            }
-            else if (!jl_has_free_typevars(ety)) {
-                Value *idx = emit_unbox(ctx, ctx.types().T_size, idxv, (jl_value_t*)jl_ulong_type);
-                Value *arrayptr = emit_bitcast(ctx, emit_arrayptr(ctx, aryv, aryex), ctx.types().T_pprjlvalue);
-                if (!ptrarray) {
-                    size_t elsz = jl_datatype_size(ety);
-                    unsigned align = jl_datatype_align(ety);
-                    size_t stride = LLT_ALIGN(elsz, align) / sizeof(jl_value_t*);
-                    if (stride != 1)
-                        idx = ctx.builder.CreateMul(idx, ConstantInt::get(ctx.types().T_size, stride));
-                    idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, ((jl_datatype_t*)ety)->layout->first_ptr));
-                    setName(ctx.emission_context, idx, "array_idx");
-                }
-                Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, arrayptr, idx);
-                setName(ctx.emission_context, slot_addr, "array_slot_addr");
-                LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr, Align(sizeof(void*)));
-                setName(ctx.emission_context, load, "array_slot");
-                load->setAtomic(AtomicOrdering::Unordered);
-                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_ptrarraybuf);
-                ai.decorateInst(load);
-                Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(ctx.types().T_prjlvalue)), getInt32Ty(ctx.builder.getContext()));
-                JL_GC_POP();
-                return mark_or_box_ccall_result(ctx, res, retboxed, rt, unionall, static_rt);
-            }
-        }
-    }
     else if (is_libjulia_func(jl_string_ptr)) {
         ++CCALL_STAT(jl_string_ptr);
-        assert(lrt == ctx.types().T_size);
+        assert(lrt == ctx.types().T_ptr);
         assert(!isVa && !llvmcall && nccallargs == 1);
-        auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
-                                ctx.types().T_pprjlvalue);
+        auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
-        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1);
-        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
-        setName(ctx.emission_context, strp, "string_ptr");
+        auto strp = emit_ptrgep(ctx, obj, ctx.types().sizeof_ptr, "string_ptr");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_symbol_name)) {
         ++CCALL_STAT(jl_symbol_name);
-        assert(lrt == ctx.types().T_size);
+        assert(lrt == ctx.types().T_ptr);
         assert(!isVa && !llvmcall && nccallargs == 1);
-        auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
-                                ctx.types().T_pprjlvalue);
+        auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
-        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(
-            ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*));
-        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
-        setName(ctx.emission_context, strp, "symbol_name");
+        auto strp = emit_ptrgep(ctx, obj, sizeof(jl_sym_t), "symbol_name");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
+    else if (is_libjulia_func(jl_genericmemory_owner) || is_libjulia_func(ijl_genericmemory_owner)) {
+        ++CCALL_STAT(jl_genericmemory_owner);
+        assert(lrt == ctx.types().T_prjlvalue);
+        assert(!isVa && !llvmcall && nccallargs == 1);
+        Value *obj = emit_genericmemoryowner(ctx, boxed(ctx, argv[0]));
+        JL_GC_POP();
+        return mark_julia_type(ctx, obj, true, jl_any_type);
+    }
     else if (is_libjulia_func(memcpy) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) {
         ++CCALL_STAT(memcpy);
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_ptr, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemCpy(
-                emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+                destp,
+                MaybeAlign(1),
+                emit_unbox(ctx, ctx.types().T_ptr, src, (jl_value_t*)jl_voidpointer_type),
                 MaybeAlign(1),
-                emit_inttoptr(ctx,
-                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
-                    getInt8PtrTy(ctx.builder.getContext())),
-                MaybeAlign(0),
                 emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
         JL_GC_POP();
@@ -1859,11 +1980,11 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &val = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_ptr, dst, (jl_value_t*)jl_voidpointer_type);
         Value *val32 = emit_unbox(ctx, getInt32Ty(ctx.builder.getContext()), val, (jl_value_t*)jl_uint32_type);
         Value *val8 = ctx.builder.CreateTrunc(val32, getInt8Ty(ctx.builder.getContext()), "memset_val");
         ctx.builder.CreateMemSet(
-            emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+            destp,
             val8,
             emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
             MaybeAlign(1)
@@ -1877,14 +1998,12 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_ptr, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemMove(
-                emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+                destp,
                 MaybeAlign(0),
-                emit_inttoptr(ctx,
-                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
-                    getInt8PtrTy(ctx.builder.getContext())),
+                emit_unbox(ctx, ctx.types().T_ptr, src, (jl_value_t*)jl_voidpointer_type),
                 MaybeAlign(0),
                 emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
@@ -1899,7 +2018,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         if (val.typ == (jl_value_t*)jl_symbol_type) {
             JL_GC_POP();
             const int hash_offset = offsetof(jl_sym_t, hash);
-            Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), ctx.types().T_size->getPointerTo());
+            Value *ph1 = decay_derived(ctx, boxed(ctx, val));
             Value *ph2 = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, ph1, ConstantInt::get(ctx.types().T_size, hash_offset / ctx.types().sizeof_ptr));
             setName(ctx.emission_context, ph2, "object_id_ptr");
             LoadInst *hashval = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ph2, ctx.types().alignof_ptr);
@@ -1911,17 +2030,15 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         else if (!val.isboxed) {
             // If the value is not boxed, try to compute the object id without
             // reboxing it.
-            auto T_pint8_derived = PointerType::get(getInt8Ty(ctx.builder.getContext()), AddressSpace::Derived);
-            if (!val.isghost && !val.ispointer())
+            auto T_p_derived = PointerType::get(ctx.builder.getContext(), AddressSpace::Derived);
+            if (!val.isghost)
                 val = value_to_pointer(ctx, val);
             Value *args[] = {
-                emit_typeof(ctx, val),
-                val.isghost ? ConstantPointerNull::get(T_pint8_derived) :
-                    ctx.builder.CreateBitCast(
-                        decay_derived(ctx, data_pointer(ctx, val)),
-                        T_pint8_derived)
+                emit_typeof(ctx, val, false, true),
+                val.isghost ? ConstantPointerNull::get(T_p_derived) :
+                        decay_derived(ctx, data_pointer(ctx, val))
             };
-            Value *ret = ctx.builder.CreateCall(prepare_call(jl_object_id__func), makeArrayRef(args));
+            Value *ret = ctx.builder.CreateCall(prepare_call(jl_object_id__func), ArrayRef<Value*>(args));
             setName(ctx.emission_context, ret, "object_id");
             JL_GC_POP();
             return mark_or_box_ccall_result(ctx, ret, retboxed, rt, unionall, static_rt);
@@ -1938,11 +2055,13 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     return retval;
 }
 
+static inline Constant *literal_static_pointer_val(const void *p, Type *T);
+
 jl_cgval_t function_sig_t::emit_a_ccall(
         jl_codectx_t &ctx,
         const native_sym_arg_t &symarg,
         jl_cgval_t *argv,
-        SmallVector<Value*, 16> &gc_uses,
+        SmallVectorImpl<Value*> &gc_uses,
         bool static_rt) const
 {
     ++EmittedCCalls;
@@ -1958,10 +2077,10 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         // Current C function parameter
         jl_cgval_t &arg = argv[ai];
         jl_value_t *jargty = jl_svecref(at, ai); // Julia type of the current parameter
-        Type *largty = fargt.at(ai); // LLVM type of the current parameter
-        bool toboxed = fargt_isboxed.at(ai);
-        Type *pargty = fargt_sig.at(ai + sret); // LLVM coercion type
-        bool byRef = byRefList.at(ai); // Argument attributes
+        Type *largty = fargt[ai]; // LLVM type of the current parameter
+        bool toboxed = fargt_isboxed[ai];
+        Type *pargty = fargt_sig[ai + sret]; // LLVM coercion type
+        bool byRef = byRefList[ai]; // Argument attributes
 
         // if we know the function sparams, try to fill those in now
         // so that the julia_to_native type checks are more likely to be doable (e.g. concrete types) at compile-time
@@ -1969,8 +2088,11 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         if (ctx.spvals_ptr == NULL && !toboxed && unionall_env && jl_has_typevar_from_unionall(jargty, unionall_env) &&
                 jl_svec_len(ctx.linfo->sparam_vals) > 0) {
             jargty_in_env = jl_instantiate_type_in_env(jargty_in_env, unionall_env, jl_svec_data(ctx.linfo->sparam_vals));
-            if (jargty_in_env != jargty)
-                jargty_in_env = jl_ensure_rooted(ctx, jargty_in_env);
+            if (jargty_in_env != jargty) {
+                JL_GC_PUSH1(&jargty_in_env);
+                jl_temporary_root(ctx, jargty_in_env);
+                JL_GC_POP();
+            }
         }
 
         Value *v;
@@ -2010,21 +2132,21 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     if (sret) {
         assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid");
         if (jl_is_pointerfree(rt)) {
-            result = emit_static_alloca(ctx, lrt);
+            result = emit_static_alloca(ctx, lrt, Align(julia_alignment(rt)));
             setName(ctx.emission_context, result, "ccall_sret");
             sretty = lrt;
-            argvals[0] = ctx.builder.CreateBitCast(result, fargt_sig.at(0));
+            argvals[0] = result;
         }
         else {
             // XXX: result needs to be zero'd and given a GC root here
             // and has incorrect write barriers.
             // instead this code path should behave like `unsafe_load`
-            result = emit_allocobj(ctx, (jl_datatype_t*)rt);
+            result = emit_allocobj(ctx, (jl_datatype_t*)rt, true);
             setName(ctx.emission_context, result, "ccall_sret_box");
             sretty = ctx.types().T_jlvalue;
             sretboxed = true;
             gc_uses.push_back(result);
-            argvals[0] = ctx.builder.CreateBitCast(emit_pointer_from_objref(ctx, result), fargt_sig.at(0));
+            argvals[0] = emit_pointer_from_objref(ctx, result);
         }
     }
 
@@ -2034,20 +2156,16 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     Value *llvmf;
     if (llvmcall) {
         ++EmittedLLVMCalls;
-        if (symarg.jl_ptr != NULL) {
-            emit_error(ctx, "llvmcall doesn't support dynamic pointers");
+        if (symarg.f_name == NULL) {
+            // TODO: this should be checked/enforced a bit better (less dynamically)
+            emit_error(ctx, "llvmcall doesn't support dynamic names");
             return jl_cgval_t();
         }
-        else if (symarg.fptr != NULL) {
-            emit_error(ctx, "llvmcall doesn't support static pointers");
-            return jl_cgval_t();
-        }
-        else if (symarg.f_lib != NULL) {
+        else if (symarg.f_lib_expr != NULL) {
             emit_error(ctx, "llvmcall doesn't support dynamic libraries");
             return jl_cgval_t();
         }
         else {
-            assert(symarg.f_name != NULL);
             StringRef f_name(symarg.f_name);
             bool f_extern = f_name.consume_front("extern ");
             llvmf = NULL;
@@ -2056,9 +2174,13 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 if (!isa<Function>(llvmf) || cast<Function>(llvmf)->isIntrinsic() || cast<Function>(llvmf)->getFunctionType() != functype)
                     llvmf = NULL;
             }
-            else if (f_name.startswith("llvm.")) {
+            else if (f_name.starts_with("llvm.")) {
                 // compute and verify auto-mangling for intrinsic name
+#if JL_LLVM_VERSION >= 200000
+                auto ID = Intrinsic::lookupIntrinsicID(f_name);
+#else
                 auto ID = Function::lookupIntrinsicID(f_name);
+#endif
                 if (ID != Intrinsic::not_intrinsic) {
                     // Accumulate an array of overloaded types for the given intrinsic
                     // and compute the new name mangling schema
@@ -2070,7 +2192,11 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                     if (res == Intrinsic::MatchIntrinsicTypes_Match) {
                         bool matchvararg = !Intrinsic::matchIntrinsicVarArg(functype->isVarArg(), TableRef);
                         if (matchvararg) {
+#if JL_LLVM_VERSION >= 200000
+                            Function *intrinsic = Intrinsic::getOrInsertDeclaration(jl_Module, ID, overloadTys);
+#else
                             Function *intrinsic = Intrinsic::getDeclaration(jl_Module, ID, overloadTys);
+#endif
                             assert(intrinsic->getFunctionType() == functype);
                             if (intrinsic->getName() == f_name || Intrinsic::getBaseName(ID) == f_name)
                                 llvmf = intrinsic;
@@ -2086,56 +2212,40 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     }
     else if (symarg.jl_ptr != NULL) {
         ++LiteralCCalls;
-        null_pointer_check(ctx, symarg.jl_ptr);
-        Type *funcptype = PointerType::get(functype, 0);
-        llvmf = emit_inttoptr(ctx, symarg.jl_ptr, funcptype);
+        null_pointer_check(ctx, symarg.jl_ptr, nullptr);
+        llvmf = symarg.jl_ptr;
     }
-    else if (symarg.fptr != NULL) {
-        ++LiteralCCalls;
-        Type *funcptype = PointerType::get(functype, 0);
-        llvmf = literal_static_pointer_val((void*)(uintptr_t)symarg.fptr, funcptype);
-        if (ctx.emission_context.imaging)
-            jl_printf(JL_STDERR,"WARNING: literal address used in ccall for %s; code cannot be statically compiled\n", symarg.f_name);
+    else if (!ctx.params->use_jlplt) {
+        if ((symarg.f_lib && !((symarg.f_lib == JL_EXE_LIBNAME) ||
+              (symarg.f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) ||
+              (symarg.f_lib == JL_LIBJULIA_DL_LIBNAME))) || symarg.f_lib_expr) {
+            // n.b. this is not semantically valid, but use_jlplt=1 when semantic correctness is desired
+            emit_error(ctx, "ccall: Had library expression, but symbol lookup was disabled");
+        }
+        if (symarg.f_name == nullptr)
+            emit_error(ctx, "ccall: Had name expression, but symbol lookup was disabled");
+        llvmf = jl_Module->getOrInsertFunction(symarg.f_name, functype).getCallee();
     }
     else {
-        assert(symarg.f_name != NULL);
-        PointerType *funcptype = PointerType::get(functype, 0);
-        if (symarg.lib_expr) {
-            ++DeferredCCallLookups;
-            llvmf = runtime_sym_lookup(ctx, funcptype, NULL, symarg.lib_expr, symarg.f_name, ctx.f);
-        }
-        else if (ctx.emission_context.imaging) {
-            ++DeferredCCallLookups;
-            // vararg requires musttail,
-            // but musttail is incompatible with noreturn.
-            if (functype->isVarArg())
-                llvmf = runtime_sym_lookup(ctx, funcptype, symarg.f_lib, NULL, symarg.f_name, ctx.f);
-            else
-                llvmf = emit_plt(ctx, functype, attributes, cc, symarg.f_lib, symarg.f_name);
-        }
-        else {
-            void *symaddr;
-            void *libsym = jl_get_library_(symarg.f_lib, 0);
-            int symbol_found = jl_dlsym(libsym, symarg.f_name, &symaddr, 0);
-            if (!libsym || !symbol_found) {
-                ++DeferredCCallLookups;
-                // either the library or the symbol could not be found, place a runtime
-                // lookup here instead.
-                llvmf = runtime_sym_lookup(ctx, funcptype, symarg.f_lib, NULL, symarg.f_name, ctx.f);
-            } else {
-                ++LiteralCCalls;
-                // since we aren't saving this code, there's no sense in
-                // putting anything complicated here: just JIT the function address
-                llvmf = literal_static_pointer_val(symaddr, funcptype);
-            }
-        }
+        ++DeferredCCallLookups;
+        // vararg requires musttail,
+        // but musttail is incompatible with noreturn.
+        if (functype->isVarArg())
+            llvmf = runtime_sym_lookup(ctx, symarg, ctx.f);
+        else
+            llvmf = emit_plt(ctx, functype, attributes, cc, symarg);
     }
 
-    OperandBundleDef OpBundle("jl_roots", gc_uses);
+    // Potentially we could add gc_uses to `gc-transition`, instead of emitting them separately as jl_roots
+    SmallVector<OperandBundleDef, 2> bundles;
+    if (!gc_uses.empty())
+        bundles.push_back(OperandBundleDef("jl_roots", gc_uses));
+    if (gc_safe)
+        bundles.push_back(OperandBundleDef("gc-transition", get_current_ptls(ctx)));
     // the actual call
     CallInst *ret = ctx.builder.CreateCall(functype, llvmf,
             argvals,
-            ArrayRef<OperandBundleDef>(&OpBundle, gc_uses.empty() ? 0 : 1));
+            bundles);
     ((CallInst*)ret)->setAttributes(attributes);
 
     if (cc != CallingConv::C)
@@ -2183,10 +2293,10 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         else if (jlretboxed && !retboxed) {
             assert(jl_is_datatype(rt));
             if (static_rt) {
-                Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt);
+                Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt, true);
                 setName(ctx.emission_context, strct, "ccall_ret_box");
                 MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
-                int boxalign = julia_alignment(rt);
+                Align boxalign(julia_alignment(rt));
                 // copy the data from the return value to the new struct
                 const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
                 auto resultTy = result->getType();
@@ -2194,12 +2304,11 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 if (DL.getTypeStoreSize(resultTy) > rtsz) {
                     // ARM and AArch64 can use a LLVM type larger than the julia type.
                     // When this happens, cast through memory.
-                    auto slot = emit_static_alloca(ctx, resultTy);
+                    auto slot = emit_static_alloca(ctx, resultTy, boxalign);
                     setName(ctx.emission_context, slot, "type_pun_slot");
-                    slot->setAlignment(Align(boxalign));
-                    ctx.builder.CreateAlignedStore(result, slot, Align(boxalign));
+                    ctx.builder.CreateAlignedStore(result, slot, boxalign);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-                    emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign);
+                    emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign, boxalign);
                 }
                 else {
                     init_bits_value(ctx, strct, result, tbaa, boxalign);
diff --git a/src/ccalllazybar.c b/src/ccalllazybar.c
new file mode 100644
index 0000000000000..84bf9763fffa5
--- /dev/null
+++ b/src/ccalllazybar.c
@@ -0,0 +1,10 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "ccalltest_common.h"
+
+// We expect this to come from `libccalllazyfoo`
+extern int foo(int);
+
+DLLEXPORT int bar(int a) {
+    return foo(a + 1);
+}
diff --git a/src/ccalllazyfoo.c b/src/ccalllazyfoo.c
new file mode 100644
index 0000000000000..d68421adef67b
--- /dev/null
+++ b/src/ccalllazyfoo.c
@@ -0,0 +1,7 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "ccalltest_common.h"
+
+DLLEXPORT int foo(int a) {
+    return a*2;
+}
diff --git a/src/ccalltest.c b/src/ccalltest.c
index e35ff38eb7dc8..0c7c85b328415 100644
--- a/src/ccalltest.c
+++ b/src/ccalltest.c
@@ -1,41 +1,10 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <complex.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include "../src/support/platform.h"
-#include "../src/support/dtypes.h"
-
-// Borrow definition from `support/dtypes.h`
-#ifdef _OS_WINDOWS_
-#  define DLLEXPORT __declspec(dllexport)
-#else
-# if defined(_OS_LINUX_) && !defined(_COMPILER_CLANG_)
-// Clang and ld disagree about the proper relocation for STV_PROTECTED, causing
-// linker errors.
-#  define DLLEXPORT __attribute__ ((visibility("protected")))
-# else
-#  define DLLEXPORT __attribute__ ((visibility("default")))
-# endif
-#endif
-
-
-#ifdef _P64
-#define jint int64_t
-#define PRIjint PRId64
-#else
-#define jint int32_t
-#define PRIjint PRId32
-#endif
+#include "ccalltest_common.h"
 
 int verbose = 1;
-
 int c_int = 0;
 
-
 //////////////////////////////////
 // Test for proper argument register truncation
 
diff --git a/src/ccalltest_common.h b/src/ccalltest_common.h
new file mode 100644
index 0000000000000..484cbde593369
--- /dev/null
+++ b/src/ccalltest_common.h
@@ -0,0 +1,30 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+#include <stdio.h>
+#include <stdlib.h>
+#include <complex.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "../src/support/platform.h"
+#include "../src/support/dtypes.h"
+
+// Borrow definition from `support/dtypes.h`
+#ifdef _OS_WINDOWS_
+#  define DLLEXPORT __declspec(dllexport)
+#else
+# if defined(_OS_LINUX_) && !defined(_COMPILER_CLANG_)
+// Clang and ld disagree about the proper relocation for STV_PROTECTED, causing
+// linker errors.
+#  define DLLEXPORT __attribute__ ((visibility("protected")))
+# else
+#  define DLLEXPORT __attribute__ ((visibility("default")))
+# endif
+#endif
+
+#ifdef _P64
+#define jint int64_t
+#define PRIjint PRId64
+#else
+#define jint int32_t
+#define PRIjint PRId32
+#endif
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
index b627224e027a9..e570834b8500e 100644
--- a/src/cgmemmgr.cpp
+++ b/src/cgmemmgr.cpp
@@ -3,7 +3,11 @@
 #include "llvm-version.h"
 #include "platform.h"
 
+#include <llvm/ExecutionEngine/JITLink/JITLink.h>
+#include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
+#include <llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h>
 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
+
 #include "julia.h"
 #include "julia_internal.h"
 
@@ -25,18 +29,21 @@
 #  include <sys/types.h>
 #  include <sys/resource.h>
 #endif
+#ifdef _OS_OPENBSD_
+#  include <sys/resource.h>
+#endif
 #include "julia_assert.h"
 
 namespace {
 
-static size_t get_block_size(size_t size)
+static size_t get_block_size(size_t size) JL_NOTSAFEPOINT
 {
     return (size > jl_page_size * 256 ? LLT_ALIGN(size, jl_page_size) :
             jl_page_size * 256);
 }
 
 // Wrapper function to mmap/munmap/mprotect pages...
-static void *map_anon_page(size_t size)
+static void *map_anon_page(size_t size) JL_NOTSAFEPOINT
 {
 #ifdef _OS_WINDOWS_
     char *mem = (char*)VirtualAlloc(NULL, size + jl_page_size,
@@ -51,7 +58,7 @@ static void *map_anon_page(size_t size)
     return mem;
 }
 
-static void unmap_page(void *ptr, size_t size)
+static void unmap_page(void *ptr, size_t size) JL_NOTSAFEPOINT
 {
 #ifdef _OS_WINDOWS_
     VirtualFree(ptr, size, MEM_DECOMMIT);
@@ -68,7 +75,7 @@ enum class Prot : int {
     NO = PAGE_NOACCESS
 };
 
-static void protect_page(void *ptr, size_t size, Prot flags)
+static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT
 {
     DWORD old_prot;
     if (!VirtualProtect(ptr, size, (DWORD)flags, &old_prot)) {
@@ -86,7 +93,7 @@ enum class Prot : int {
     NO = PROT_NONE
 };
 
-static void protect_page(void *ptr, size_t size, Prot flags)
+static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT
 {
     int ret = mprotect(ptr, size, (int)flags);
     if (ret != 0) {
@@ -95,7 +102,7 @@ static void protect_page(void *ptr, size_t size, Prot flags)
     }
 }
 
-static bool check_fd_or_close(int fd)
+static bool check_fd_or_close(int fd) JL_NOTSAFEPOINT
 {
     if (fd == -1)
         return false;
@@ -126,7 +133,7 @@ static intptr_t anon_hdl = -1;
 // Also, creating big file mapping and then map pieces of it seems to
 // consume too much global resources. Therefore, we use each file mapping
 // as a block on windows
-static void *create_shared_map(size_t size, size_t id)
+static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT
 {
     void *addr = MapViewOfFile((HANDLE)id, FILE_MAP_ALL_ACCESS,
                                0, 0, size);
@@ -134,13 +141,13 @@ static void *create_shared_map(size_t size, size_t id)
     return addr;
 }
 
-static intptr_t init_shared_map()
+static intptr_t init_shared_map() JL_NOTSAFEPOINT
 {
     anon_hdl = 0;
     return 0;
 }
 
-static void *alloc_shared_page(size_t size, size_t *id, bool exec)
+static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT
 {
     assert(size % jl_page_size == 0);
     DWORD file_mode = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
@@ -159,7 +166,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
 }
 #else // _OS_WINDOWS_
 // For shared mapped region
-static intptr_t get_anon_hdl(void)
+static intptr_t get_anon_hdl(void) JL_NOTSAFEPOINT
 {
     int fd = -1;
 
@@ -179,15 +186,17 @@ static intptr_t get_anon_hdl(void)
     pid_t pid = getpid();
     // `shm_open` can't be mapped exec on mac
 #  ifndef _OS_DARWIN_
+    int shm_open_errno;
     do {
         snprintf(shm_name, sizeof(shm_name),
                  "julia-codegen-%d-%d", (int)pid, rand());
         fd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRWXU);
+        shm_open_errno = errno; // check_fd_or_close trashes errno, so save beforehand
         if (check_fd_or_close(fd)) {
             shm_unlink(shm_name);
             return fd;
         }
-    } while (errno == EEXIST);
+    } while (shm_open_errno == EEXIST);
 #  endif
     FILE *tmpf = tmpfile();
     if (tmpf) {
@@ -225,7 +234,7 @@ static struct _make_shared_map_lock {
     };
 } shared_map_lock;
 
-static size_t get_map_size_inc()
+static size_t get_map_size_inc() JL_NOTSAFEPOINT
 {
     rlimit rl;
     if (getrlimit(RLIMIT_FSIZE, &rl) != -1) {
@@ -239,7 +248,7 @@ static size_t get_map_size_inc()
     return map_size_inc_default;
 }
 
-static void *create_shared_map(size_t size, size_t id)
+static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT
 {
     void *addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED,
                       anon_hdl, id);
@@ -247,7 +256,7 @@ static void *create_shared_map(size_t size, size_t id)
     return addr;
 }
 
-static intptr_t init_shared_map()
+[[maybe_unused]] static intptr_t init_shared_map() JL_NOTSAFEPOINT
 {
     anon_hdl = get_anon_hdl();
     if (anon_hdl == -1)
@@ -262,7 +271,7 @@ static intptr_t init_shared_map()
     return anon_hdl;
 }
 
-static void *alloc_shared_page(size_t size, size_t *id, bool exec)
+static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT
 {
     assert(size % jl_page_size == 0);
     size_t off = jl_atomic_fetch_add(&map_offset, size);
@@ -289,7 +298,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
 #ifdef _OS_LINUX_
 // Using `/proc/self/mem`, A.K.A. Keno's remote memory manager.
 
-ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr)
+ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) JL_NOTSAFEPOINT
 {
     static_assert(sizeof(off_t) >= 8, "off_t is smaller than 64bits");
 #ifdef _P64
@@ -316,7 +325,7 @@ ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr)
 
 // Do not call this directly.
 // Use `get_self_mem_fd` which has a guard to call this only once.
-static int _init_self_mem()
+static int _init_self_mem() JL_NOTSAFEPOINT
 {
     struct utsname kernel;
     uname(&kernel);
@@ -356,13 +365,13 @@ static int _init_self_mem()
     return fd;
 }
 
-static int get_self_mem_fd()
+static int get_self_mem_fd() JL_NOTSAFEPOINT
 {
     static int fd = _init_self_mem();
     return fd;
 }
 
-static void write_self_mem(void *dest, void *ptr, size_t size)
+static void write_self_mem(void *dest, void *ptr, size_t size) JL_NOTSAFEPOINT
 {
     while (size > 0) {
         ssize_t ret = pwrite_addr(get_self_mem_fd(), ptr, size, (uintptr_t)dest);
@@ -421,7 +430,7 @@ struct Block {
 
     Block(const Block&) = delete;
     Block &operator=(const Block&) = delete;
-    Block(Block &&other)
+    Block(Block &&other) JL_NOTSAFEPOINT
         : ptr(other.ptr),
           total(other.total),
           avail(other.avail)
@@ -430,9 +439,9 @@ struct Block {
         other.total = other.avail = 0;
     }
 
-    Block() = default;
+    Block() JL_NOTSAFEPOINT = default;
 
-    void *alloc(size_t size, size_t align)
+    void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT
     {
         size_t aligned_avail = avail & (-align);
         if (aligned_avail < size)
@@ -441,7 +450,7 @@ struct Block {
         avail = aligned_avail - size;
         return p;
     }
-    void reset(void *addr, size_t size)
+    void reset(void *addr, size_t size) JL_NOTSAFEPOINT
     {
         if (avail >= jl_page_size) {
             uintptr_t end = uintptr_t(ptr) + total;
@@ -455,17 +464,27 @@ struct Block {
     }
 };
 
+struct Allocation {
+    // Address to write to (the one returned by the allocation function)
+    void *wr_addr;
+    // Runtime address
+    void *rt_addr;
+    size_t sz;
+    bool relocated;
+};
+
 class RWAllocator {
     static constexpr int nblocks = 8;
     Block blocks[nblocks]{};
 public:
-    void *alloc(size_t size, size_t align)
+    RWAllocator() JL_NOTSAFEPOINT = default;
+    Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT
     {
         size_t min_size = (size_t)-1;
         int min_id = 0;
         for (int i = 0;i < nblocks && blocks[i].ptr;i++) {
             if (void *ptr = blocks[i].alloc(size, align))
-                return ptr;
+                return {ptr, ptr, size, false};
             if (blocks[i].avail < min_size) {
                 min_size = blocks[i].avail;
                 min_id = i;
@@ -473,7 +492,8 @@ class RWAllocator {
         }
         size_t block_size = get_block_size(size);
         blocks[min_id].reset(map_anon_page(block_size), block_size);
-        return blocks[min_id].alloc(size, align);
+        void *ptr = blocks[min_id].alloc(size, align);
+        return {ptr, ptr, size, false};
     }
 };
 
@@ -495,9 +515,9 @@ struct SplitPtrBlock : public Block {
 
     uintptr_t wr_ptr{0};
     uint32_t state{0};
-    SplitPtrBlock() = default;
+    SplitPtrBlock() JL_NOTSAFEPOINT = default;
 
-    void swap(SplitPtrBlock &other)
+    void swap(SplitPtrBlock &other) JL_NOTSAFEPOINT
     {
         std::swap(ptr, other.ptr);
         std::swap(total, other.total);
@@ -506,23 +526,13 @@ struct SplitPtrBlock : public Block {
         std::swap(state, other.state);
     }
 
-    SplitPtrBlock(SplitPtrBlock &&other)
+    SplitPtrBlock(SplitPtrBlock &&other) JL_NOTSAFEPOINT
         : SplitPtrBlock()
     {
         swap(other);
     }
 };
 
-struct Allocation {
-    // Address to write to (the one returned by the allocation function)
-    void *wr_addr;
-    // Runtime address
-    void *rt_addr;
-    size_t sz;
-    bool relocated;
-};
-
-template<bool exec>
 class ROAllocator {
 protected:
     static constexpr int nblocks = 8;
@@ -531,25 +541,25 @@ class ROAllocator {
     // but might not have all the permissions set or data copied yet.
     SmallVector<SplitPtrBlock, 16> completed;
     virtual void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr,
-                             size_t size, size_t align) = 0;
-    virtual SplitPtrBlock alloc_block(size_t size) = 0;
+                             size_t size, size_t align) JL_NOTSAFEPOINT = 0;
+    virtual SplitPtrBlock alloc_block(size_t size) JL_NOTSAFEPOINT = 0;
 public:
-    virtual ~ROAllocator() {}
-    virtual void finalize()
+    ROAllocator() JL_NOTSAFEPOINT = default;
+    virtual ~ROAllocator() JL_NOTSAFEPOINT {}
+    virtual void finalize() JL_NOTSAFEPOINT
     {
-        for (auto &alloc: allocations) {
-            // ensure the mapped pages are consistent
-            sys::Memory::InvalidateInstructionCache(alloc.wr_addr,
-                                                    alloc.sz);
-            sys::Memory::InvalidateInstructionCache(alloc.rt_addr,
-                                                    alloc.sz);
-        }
+        // Note: on some aarch64 platforms, like Apple CPUs, we need read
+        // permission in order to invalidate instruction cache lines.  We are
+        // not guaranteed to have read permission on the wr_addr when using
+        // DualMapAllocator.
+        for (auto &alloc : allocations)
+            sys::Memory::InvalidateInstructionCache(alloc.rt_addr, alloc.sz);
         completed.clear();
         allocations.clear();
     }
     // Allocations that have not been finalized yet.
     SmallVector<Allocation, 16> allocations;
-    void *alloc(size_t size, size_t align)
+    Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT
     {
         size_t min_size = (size_t)-1;
         int min_id = 0;
@@ -565,8 +575,9 @@ class ROAllocator {
                     wr_ptr = get_wr_ptr(block, ptr, size, align);
                 }
                 block.state |= SplitPtrBlock::Alloc;
-                allocations.push_back(Allocation{wr_ptr, ptr, size, false});
-                return wr_ptr;
+                Allocation a{wr_ptr, ptr, size, false};
+                allocations.push_back(a);
+                return a;
             }
             if (block.avail < min_size) {
                 min_size = block.avail;
@@ -587,20 +598,23 @@ class ROAllocator {
 #ifdef _OS_WINDOWS_
         block.state = SplitPtrBlock::Alloc;
         void *wr_ptr = get_wr_ptr(block, ptr, size, align);
-        allocations.push_back(Allocation{wr_ptr, ptr, size, false});
+        Allocation a{wr_ptr, ptr, size, false};
+        allocations.push_back(a);
         ptr = wr_ptr;
 #else
         block.state = SplitPtrBlock::Alloc | SplitPtrBlock::InitAlloc;
-        allocations.push_back(Allocation{ptr, ptr, size, false});
+        Allocation a{ptr, ptr, size, false};
+        allocations.push_back(a);
 #endif
-        return ptr;
+        return a;
     }
 };
 
-template<bool exec>
-class DualMapAllocator : public ROAllocator<exec> {
+class DualMapAllocator : public ROAllocator {
+    bool exec;
+
 protected:
-    void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override
+    void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override JL_NOTSAFEPOINT
     {
         assert((char*)rt_ptr >= block.ptr &&
                (char*)rt_ptr < (block.ptr + block.total));
@@ -615,7 +629,7 @@ class DualMapAllocator : public ROAllocator<exec> {
         }
         return (char*)rt_ptr + (block.wr_ptr - uintptr_t(block.ptr));
     }
-    SplitPtrBlock alloc_block(size_t size) override
+    SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT
     {
         SplitPtrBlock new_block;
         // use `wr_ptr` to record the id initially
@@ -623,7 +637,7 @@ class DualMapAllocator : public ROAllocator<exec> {
         new_block.reset(ptr, size);
         return new_block;
     }
-    void finalize_block(SplitPtrBlock &block, bool reset)
+    void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT
     {
         // This function handles setting the block to the right mode
         // and free'ing maps that are not needed anymore.
@@ -659,11 +673,11 @@ class DualMapAllocator : public ROAllocator<exec> {
         }
     }
 public:
-    DualMapAllocator()
+    DualMapAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec)
     {
         assert(anon_hdl != -1);
     }
-    void finalize() override
+    void finalize() override JL_NOTSAFEPOINT
     {
         for (auto &block : this->blocks) {
             finalize_block(block, false);
@@ -672,17 +686,17 @@ class DualMapAllocator : public ROAllocator<exec> {
             finalize_block(block, true);
             block.reset(nullptr, 0);
         }
-        ROAllocator<exec>::finalize();
+        ROAllocator::finalize();
     }
 };
 
 #ifdef _OS_LINUX_
-template<bool exec>
-class SelfMemAllocator : public ROAllocator<exec> {
+class SelfMemAllocator : public ROAllocator {
+    bool exec;
     SmallVector<Block, 16> temp_buff;
 protected:
     void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr,
-                     size_t size, size_t align) override
+                     size_t size, size_t align) override JL_NOTSAFEPOINT
     {
         assert(!(block.state & SplitPtrBlock::InitAlloc));
         for (auto &wr_block: temp_buff) {
@@ -696,13 +710,13 @@ class SelfMemAllocator : public ROAllocator<exec> {
         new_block.reset(map_anon_page(block_size), block_size);
         return new_block.alloc(size, align);
     }
-    SplitPtrBlock alloc_block(size_t size) override
+    SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT
     {
         SplitPtrBlock new_block;
         new_block.reset(map_anon_page(size), size);
         return new_block;
     }
-    void finalize_block(SplitPtrBlock &block, bool reset)
+    void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT
     {
         if (!(block.state & SplitPtrBlock::Alloc))
             return;
@@ -715,13 +729,11 @@ class SelfMemAllocator : public ROAllocator<exec> {
         }
     }
 public:
-    SelfMemAllocator()
-        : ROAllocator<exec>(),
-          temp_buff()
+    SelfMemAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec), temp_buff()
     {
         assert(get_self_mem_fd() != -1);
     }
-    void finalize() override
+    void finalize() override JL_NOTSAFEPOINT
     {
         for (auto &block : this->blocks) {
             finalize_block(block, false);
@@ -751,11 +763,27 @@ class SelfMemAllocator : public ROAllocator<exec> {
         }
         if (cached)
             temp_buff.resize(1);
-        ROAllocator<exec>::finalize();
+        ROAllocator::finalize();
     }
 };
 #endif // _OS_LINUX_
 
+std::pair<std::unique_ptr<ROAllocator>, std::unique_ptr<ROAllocator>>
+get_preferred_allocators() JL_NOTSAFEPOINT
+{
+#if !(defined(_CPU_AARCH64_) || defined(_CPU_RISCV64_))
+#ifdef _OS_LINUX_
+    if (get_self_mem_fd() != -1)
+        return {std::make_unique<SelfMemAllocator>(false),
+                std::make_unique<SelfMemAllocator>(true)};
+#endif
+    if (init_shared_map() != -1)
+        return {std::make_unique<DualMapAllocator>(false),
+                std::make_unique<DualMapAllocator>(true)};
+#endif
+    return {};
+}
+
 class RTDyldMemoryManagerJL : public SectionMemoryManager {
     struct EHFrame {
         uint8_t *addr;
@@ -765,38 +793,25 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
     void operator=(const RTDyldMemoryManagerJL&) = delete;
     SmallVector<EHFrame, 16> pending_eh;
     RWAllocator rw_alloc;
-    std::unique_ptr<ROAllocator<false>> ro_alloc;
-    std::unique_ptr<ROAllocator<true>> exe_alloc;
-    bool code_allocated;
+    std::unique_ptr<ROAllocator> ro_alloc;
+    std::unique_ptr<ROAllocator> exe_alloc;
     size_t total_allocated;
 
 public:
-    RTDyldMemoryManagerJL()
+    RTDyldMemoryManagerJL() JL_NOTSAFEPOINT
         : SectionMemoryManager(),
           pending_eh(),
           rw_alloc(),
-          ro_alloc(),
-          exe_alloc(),
-          code_allocated(false),
           total_allocated(0)
     {
-#ifdef _OS_LINUX_
-        if (!ro_alloc && get_self_mem_fd() != -1) {
-            ro_alloc.reset(new SelfMemAllocator<false>());
-            exe_alloc.reset(new SelfMemAllocator<true>());
-        }
-#endif
-        if (!ro_alloc && init_shared_map() != -1) {
-            ro_alloc.reset(new DualMapAllocator<false>());
-            exe_alloc.reset(new DualMapAllocator<true>());
-        }
+        std::tie(ro_alloc, exe_alloc) = get_preferred_allocators();
     }
-    ~RTDyldMemoryManagerJL() override
+    ~RTDyldMemoryManagerJL() override JL_NOTSAFEPOINT
     {
     }
-    size_t getTotalBytes() { return total_allocated; }
+    size_t getTotalBytes() JL_NOTSAFEPOINT { return total_allocated; }
     void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                          size_t Size) override;
+                          size_t Size) override JL_NOTSAFEPOINT;
 #if 0
     // Disable for now since we are not actually using this.
     void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
@@ -804,16 +819,16 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
 #endif
     uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID,
-                                 StringRef SectionName) override;
+                                 StringRef SectionName) override JL_NOTSAFEPOINT;
     uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID, StringRef SectionName,
-                                 bool isReadOnly) override;
+                                 bool isReadOnly) override JL_NOTSAFEPOINT;
     using SectionMemoryManager::notifyObjectLoaded;
     void notifyObjectLoaded(RuntimeDyld &Dyld,
-                            const object::ObjectFile &Obj) override;
-    bool finalizeMemory(std::string *ErrMsg = nullptr) override;
+                            const object::ObjectFile &Obj) override JL_NOTSAFEPOINT;
+    bool finalizeMemory(std::string *ErrMsg = nullptr) override JL_NOTSAFEPOINT;
     template <typename DL, typename Alloc>
-    void mapAddresses(DL &Dyld, Alloc &&allocator)
+    void mapAddresses(DL &Dyld, Alloc &&allocator) JL_NOTSAFEPOINT
     {
         for (auto &alloc: allocator->allocations) {
             if (alloc.rt_addr == alloc.wr_addr || alloc.relocated)
@@ -823,53 +838,26 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
         }
     }
     template <typename DL>
-    void mapAddresses(DL &Dyld)
+    void mapAddresses(DL &Dyld) JL_NOTSAFEPOINT
     {
         if (!ro_alloc)
             return;
         mapAddresses(Dyld, ro_alloc);
         mapAddresses(Dyld, exe_alloc);
     }
-#ifdef _OS_WINDOWS_
-    template <typename Alloc>
-    void *lookupWriteAddressFor(void *rt_addr, Alloc &&allocator)
-    {
-        for (auto &alloc: allocator->allocations) {
-            if (alloc.rt_addr == rt_addr) {
-                return alloc.wr_addr;
-            }
-        }
-        return nullptr;
-    }
-    void *lookupWriteAddressFor(void *rt_addr)
-    {
-        if (!ro_alloc)
-            return rt_addr;
-        if (void *ptr = lookupWriteAddressFor(rt_addr, ro_alloc))
-            return ptr;
-        if (void *ptr = lookupWriteAddressFor(rt_addr, exe_alloc))
-            return ptr;
-        return rt_addr;
-    }
-#endif // _OS_WINDOWS_
 };
 
 uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
                                                     unsigned Alignment,
                                                     unsigned SectionID,
-                                                    StringRef SectionName)
+                                                    StringRef SectionName) JL_NOTSAFEPOINT
 {
     // allocating more than one code section can confuse libunwind.
-#if !defined(_COMPILER_MSAN_ENABLED_) && !defined(_COMPILER_ASAN_ENABLED_)
-    // TODO: Figure out why msan and now asan too need this.
-    assert(!code_allocated);
-    code_allocated = true;
-#endif
     total_allocated += Size;
     jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
     jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size);
     if (exe_alloc)
-        return (uint8_t*)exe_alloc->alloc(Size, Alignment);
+        return (uint8_t*)exe_alloc->alloc(Size, Alignment).wr_addr;
     return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
                                                      SectionName);
 }
@@ -878,21 +866,21 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
                                                     unsigned Alignment,
                                                     unsigned SectionID,
                                                     StringRef SectionName,
-                                                    bool isReadOnly)
+                                                    bool isReadOnly) JL_NOTSAFEPOINT
 {
     total_allocated += Size;
     jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
     jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size);
     if (!isReadOnly)
-        return (uint8_t*)rw_alloc.alloc(Size, Alignment);
+        return (uint8_t*)rw_alloc.alloc(Size, Alignment).wr_addr;
     if (ro_alloc)
-        return (uint8_t*)ro_alloc->alloc(Size, Alignment);
+        return (uint8_t*)ro_alloc->alloc(Size, Alignment).wr_addr;
     return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID,
                                                      SectionName, isReadOnly);
 }
 
 void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld,
-                                               const object::ObjectFile &Obj)
+                                               const object::ObjectFile &Obj) JL_NOTSAFEPOINT
 {
     if (!ro_alloc) {
         assert(!exe_alloc);
@@ -903,9 +891,8 @@ void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld,
     mapAddresses(Dyld);
 }
 
-bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg)
+bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) JL_NOTSAFEPOINT
 {
-    code_allocated = false;
     if (ro_alloc) {
         ro_alloc->finalize();
         assert(exe_alloc);
@@ -923,7 +910,7 @@ bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg)
 
 void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr,
                                              uint64_t LoadAddr,
-                                             size_t Size)
+                                             size_t Size) JL_NOTSAFEPOINT
 {
     if (uintptr_t(Addr) == LoadAddr) {
         register_eh_frames(Addr, Size);
@@ -936,27 +923,156 @@ void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr,
 #if 0
 void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr,
                                                uint64_t LoadAddr,
-                                               size_t Size)
+                                               size_t Size) JL_NOTSAFEPOINT
 {
     deregister_eh_frames((uint8_t*)LoadAddr, Size);
 }
 #endif
 
-}
+class JLJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
+    using OnFinalizedFunction =
+        jitlink::JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction;
 
-#ifdef _OS_WINDOWS_
-void *lookupWriteAddressFor(RTDyldMemoryManager *memmgr, void *rt_addr)
+    std::mutex Mutex;
+    RWAllocator RWAlloc;
+    std::unique_ptr<ROAllocator> ROAlloc;
+    std::unique_ptr<ROAllocator> ExeAlloc;
+    SmallVector<OnFinalizedFunction> FinalizedCallbacks;
+    uint32_t InFlight{0};
+
+public:
+    class InFlightAlloc;
+
+    static std::unique_ptr<JITLinkMemoryManager> Create()
+    {
+        auto [ROAlloc, ExeAlloc] = get_preferred_allocators();
+        if (ROAlloc && ExeAlloc)
+            return std::unique_ptr<JLJITLinkMemoryManager>(
+                new JLJITLinkMemoryManager(std::move(ROAlloc), std::move(ExeAlloc)));
+
+        return cantFail(
+            orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>(
+                /*Reservation Granularity*/ 16 * 1024 * 1024));
+    }
+
+    void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G,
+                  OnAllocatedFunction OnAllocated) override;
+
+    void deallocate(std::vector<FinalizedAlloc> Allocs,
+                    OnDeallocatedFunction OnDeallocated) override
+    {
+        // This shouldn't be reachable, but we will get a better error message
+        // from JITLink if we leak this allocation and fail elsewhere.
+    }
+
+protected:
+    JLJITLinkMemoryManager(std::unique_ptr<ROAllocator> ROAlloc,
+                           std::unique_ptr<ROAllocator> ExeAlloc)
+      : ROAlloc(std::move(ROAlloc)), ExeAlloc(std::move(ExeAlloc))
+    {
+    }
+
+    void finalize(OnFinalizedFunction OnFinalized)
+    {
+        SmallVector<OnFinalizedFunction> Callbacks;
+        {
+            std::unique_lock Lock{Mutex};
+            FinalizedCallbacks.push_back(std::move(OnFinalized));
+
+            if (--InFlight > 0)
+                return;
+
+            ROAlloc->finalize();
+            ExeAlloc->finalize();
+            Callbacks = std::move(FinalizedCallbacks);
+        }
+
+        for (auto &CB : Callbacks)
+            std::move(CB)(FinalizedAlloc{});
+    }
+};
+
+class JLJITLinkMemoryManager::InFlightAlloc
+  : public jitlink::JITLinkMemoryManager::InFlightAlloc {
+    JLJITLinkMemoryManager &MM;
+    jitlink::LinkGraph &G;
+
+public:
+    InFlightAlloc(JLJITLinkMemoryManager &MM, jitlink::LinkGraph &G) : MM(MM), G(G) {}
+
+    void abandon(OnAbandonedFunction OnAbandoned) override {
+        // This shouldn't be reachable, but we will get a better error message
+        // from JITLink if we leak this allocation and fail elsewhere.
+    }
+
+    void finalize(OnFinalizedFunction OnFinalized) override
+    {
+        auto *GP = &G;
+        MM.finalize([GP, OnFinalized =
+                             std::move(OnFinalized)](Expected<FinalizedAlloc> FA) mutable {
+            if (!FA)
+                return OnFinalized(FA.takeError());
+            // Need to handle dealloc actions when we GC code
+            auto E = orc::shared::runFinalizeActions(GP->allocActions());
+            if (!E)
+                return OnFinalized(E.takeError());
+            OnFinalized(std::move(FA));
+        });
+    }
+};
+
+using orc::MemProt;
+
+void JLJITLinkMemoryManager::allocate(const jitlink::JITLinkDylib *JD,
+                                      jitlink::LinkGraph &G,
+                                      OnAllocatedFunction OnAllocated)
 {
-    return ((RTDyldMemoryManagerJL*)memmgr)->lookupWriteAddressFor(rt_addr);
+    jitlink::BasicLayout BL{G};
+
+    {
+        std::unique_lock Lock{Mutex};
+        for (auto &[AG, Seg] : BL.segments()) {
+            if (AG.getMemLifetime() == orc::MemLifetime::NoAlloc)
+                continue;
+            assert(AG.getMemLifetime() == orc::MemLifetime::Standard);
+
+            auto Prot = AG.getMemProt();
+            uint64_t Alignment = Seg.Alignment.value();
+            uint64_t Size = Seg.ContentSize + Seg.ZeroFillSize;
+            Allocation Alloc;
+            if (Prot == (MemProt::Read | MemProt::Write))
+                Alloc = RWAlloc.alloc(Size, Alignment);
+            else if (Prot == MemProt::Read)
+                Alloc = ROAlloc->alloc(Size, Alignment);
+            else if (Prot == (MemProt::Read | MemProt::Exec))
+                Alloc = ExeAlloc->alloc(Size, Alignment);
+            else
+                abort();
+
+            Seg.Addr = orc::ExecutorAddr::fromPtr(Alloc.rt_addr);
+            Seg.WorkingMem = (char *)Alloc.wr_addr;
+        }
+    }
+
+    if (auto Err = BL.apply())
+        return OnAllocated(std::move(Err));
+
+    ++InFlight;
+    OnAllocated(std::make_unique<InFlightAlloc>(*this, G));
+}
 }
-#endif
 
-RTDyldMemoryManager* createRTDyldMemoryManager()
+RTDyldMemoryManager* createRTDyldMemoryManager() JL_NOTSAFEPOINT
 {
     return new RTDyldMemoryManagerJL();
 }
 
-size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm)
+size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT
 {
     return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes();
 }
+
+std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager()
+{
+    return JLJITLinkMemoryManager::Create();
+}
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 8442ba99bb411..62f31e237f4b6 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -28,13 +28,8 @@ STATISTIC(EmittedGetfieldKnowns, "Number of known getfield calls emitted");
 STATISTIC(EmittedSetfield, "Number of setfield calls emitted");
 STATISTIC(EmittedUnionLoads, "Number of union loads emitted");
 STATISTIC(EmittedVarargsLength, "Number of varargs length calls emitted");
-STATISTIC(EmittedArraysize, "Number of arraysize calls emitted");
-STATISTIC(EmittedArraylen, "Number of array length calls emitted");
-STATISTIC(EmittedArrayptr, "Number of array data pointer loads emitted");
-STATISTIC(EmittedArrayflags, "Number of arrayflags calls emitted");
-STATISTIC(EmittedArrayNDims, "Number of array ndims calls emitted");
+STATISTIC(EmittedArrayptr, "Number of array ptr calls emitted");
 STATISTIC(EmittedArrayElsize, "Number of array elsize calls emitted");
-STATISTIC(EmittedArrayOffset, "Number of array offset calls emitted");
 STATISTIC(EmittedArrayNdIndex, "Number of array nd index calls emitted");
 STATISTIC(EmittedBoxes, "Number of box operations emitted");
 STATISTIC(EmittedCPointerChecks, "Number of C pointer checks emitted");
@@ -62,10 +57,10 @@ static Value *maybe_decay_untracked(jl_codectx_t &ctx, Value *V)
 static Value *decay_derived(jl_codectx_t &ctx, Value *V)
 {
     Type *T = V->getType();
-    if (cast<PointerType>(T)->getAddressSpace() == AddressSpace::Derived)
+    if (T->getPointerAddressSpace() == AddressSpace::Derived)
         return V;
     // Once llvm deletes pointer element types, we won't need it here any more either.
-    Type *NewT = PointerType::getWithSamePointeeType(cast<PointerType>(T), AddressSpace::Derived);
+    Type *NewT = PointerType::get(T->getContext(), AddressSpace::Derived);
     return ctx.builder.CreateAddrSpaceCast(V, NewT);
 }
 
@@ -73,9 +68,9 @@ static Value *decay_derived(jl_codectx_t &ctx, Value *V)
 static Value *maybe_decay_tracked(jl_codectx_t &ctx, Value *V)
 {
     Type *T = V->getType();
-    if (cast<PointerType>(T)->getAddressSpace() != AddressSpace::Tracked)
+    if (T->getPointerAddressSpace() != AddressSpace::Tracked)
         return V;
-    Type *NewT = PointerType::getWithSamePointeeType(cast<PointerType>(T), AddressSpace::Derived);
+    Type *NewT = PointerType::get(T->getContext(), AddressSpace::Derived);
     return ctx.builder.CreateAddrSpaceCast(V, NewT);
 }
 
@@ -83,7 +78,7 @@ static Value *mark_callee_rooted(jl_codectx_t &ctx, Value *V)
 {
     assert(V->getType() == ctx.types().T_pjlvalue || V->getType() == ctx.types().T_prjlvalue);
     return ctx.builder.CreateAddrSpaceCast(V,
-        PointerType::get(ctx.types().T_jlvalue, AddressSpace::CalleeRooted));
+        PointerType::get(V->getContext(), AddressSpace::CalleeRooted));
 }
 
 AtomicOrdering get_llvm_atomic_order(enum jl_memory_order order)
@@ -111,20 +106,24 @@ AtomicOrdering get_llvm_atomic_order(enum jl_memory_order order)
 static Value *stringConstPtr(
         jl_codegen_params_t &emission_context,
         IRBuilder<> &irbuilder,
-        const std::string &txt)
+        const Twine &txt)
 {
     Module *M = jl_builderModule(irbuilder);
-    StringRef ctxt(txt.c_str(), txt.size() + 1);
-    Constant *Data = ConstantDataArray::get(irbuilder.getContext(), arrayRefFromStringRef(ctxt));
-    GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, "_j_str", *M);
-    Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0);
-    Value *Args[] = { zero, zero };
-    auto gep = irbuilder.CreateInBoundsGEP(gv->getValueType(),
-                                       // Addrspacecast in case globals are in non-0 AS
-                                       irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0)),
-                                       Args);
-    setName(emission_context, gep, "string_const_ptr");
-    return gep;
+    SmallVector<char, 128> ctxt;
+    txt.toVector(ctxt);
+    // null-terminate the string
+    ctxt.push_back(0);
+    Constant *Data = ConstantDataArray::get(irbuilder.getContext(), ctxt);
+    ctxt.pop_back();
+    // We use this for the name of the gv, so cap its size to avoid memory blowout
+    if (ctxt.size() > 28) {
+        ctxt.resize(28);
+        ctxt[25] = ctxt[26] = ctxt[27] = '.';
+    }
+    // Doesn't need to be aligned, we shouldn't operate on these like julia objects
+    GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, Align(1), "_j_str_" + StringRef(ctxt.data(), ctxt.size()), *M);
+    // AddrSpaceCast in case globals are in non-0 AS
+    return irbuilder.CreateAddrSpaceCast(gv, PointerType::getUnqual(gv->getContext()));
 }
 
 
@@ -203,9 +202,9 @@ static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_debugcache_t &debu
         uint64_t SizeInBits = jl_datatype_nbits(jdt);
         ditype = dbuilder->createBasicType(tname, SizeInBits, llvm::dwarf::DW_ATE_unsigned);
     }
-    else if (jl_is_structtype(jt) && !jl_is_layout_opaque(jdt->layout)) {
+    else if (jl_is_structtype(jt) && !jl_is_layout_opaque(jdt->layout) && !jl_is_array_type(jdt)) {
         size_t ntypes = jl_datatype_nfields(jdt);
-        std::vector<llvm::Metadata*> Elements(ntypes);
+        SmallVector<llvm::Metadata*, 0> Elements(ntypes);
         for (unsigned i = 0; i < ntypes; i++) {
             jl_value_t *el = jl_field_type_concrete(jdt, i);
             DIType *di;
@@ -268,7 +267,7 @@ void jl_debugcache_t::initialize(Module *m) {
                                                 __alignof__(jl_value_t*) * 8);
 
     SmallVector<llvm::Metadata *, 1> Elts;
-    std::vector<Metadata*> diargs(0);
+    SmallVector<Metadata*, 0> diargs(0);
     Elts.push_back(jl_pvalue_dillvmt);
     dbuilder.replaceArrays(jl_value_dillvmt,
     dbuilder.getOrCreateArray(Elts));
@@ -290,13 +289,10 @@ void jl_debugcache_t::initialize(Module *m) {
 
 static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
 {
-    unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();
+    unsigned AS = V->getType()->getPointerAddressSpace();
     if (AS != AddressSpace::Tracked && AS != AddressSpace::Derived)
         return V;
     V = decay_derived(ctx, V);
-    Type *T = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-    if (V->getType() != T)
-        V = ctx.builder.CreateBitCast(V, T);
     Function *F = prepare_call(pointer_from_objref_func);
     CallInst *Call = ctx.builder.CreateCall(F, V);
     Call->setAttributes(F->getAttributes());
@@ -304,38 +300,108 @@ static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
     return Call;
 }
 
-static Value *get_gc_root_for(const jl_cgval_t &x)
+static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt);
+static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, MaybeAlign align_src, Align align_dst, bool isVolatile=false);
+
+static bool type_is_permalloc(jl_value_t *typ)
 {
-    if (x.Vboxed)
-        return x.Vboxed;
-    if (x.ispointer() && !x.constant) {
-        assert(x.V);
-        if (PointerType *T = dyn_cast<PointerType>(x.V->getType())) {
-            if (T->getAddressSpace() == AddressSpace::Tracked ||
-                T->getAddressSpace() == AddressSpace::Derived) {
-                return x.V;
-            }
+    // Singleton should almost always be handled by the later optimization passes.
+    // Also do it here since it is cheap and save some effort in LLVM passes.
+    if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ))
+        return true;
+    return typ == (jl_value_t*)jl_symbol_type ||
+        typ == (jl_value_t*)jl_int8_type ||
+        typ == (jl_value_t*)jl_uint8_type;
+}
+
+
+// find the offset of pointer fields which never need a write barrier since their type-analysis
+// shows they are permanently rooted
+static void find_perm_offsets(jl_datatype_t *typ, SmallVectorImpl<unsigned> &res, unsigned offset)
+{
+    // This is a inlined field at `offset`.
+    if (!typ->layout || typ->layout->npointers == 0)
+        return;
+    jl_svec_t *types = jl_get_fieldtypes(typ);
+    size_t nf = jl_svec_len(types);
+    for (size_t i = 0; i < nf; i++) {
+        jl_value_t *_fld = jl_svecref(types, i);
+        if (!jl_is_datatype(_fld))
+            continue;
+        jl_datatype_t *fld = (jl_datatype_t*)_fld;
+        if (jl_field_isptr(typ, i)) {
+            // pointer field, check if field is perm-alloc
+            if (type_is_permalloc((jl_value_t*)fld))
+                res.push_back(offset + jl_field_offset(typ, i));
+            continue;
         }
+        // inline field
+        find_perm_offsets(fld, res, offset + jl_field_offset(typ, i));
     }
-    return nullptr;
 }
 
-// --- emitting pointers directly into code ---
+// load a pointer to N inlined_roots into registers (as a SmallVector)
+static llvm::SmallVector<Value*,0> load_gc_roots(jl_codectx_t &ctx, Value *inline_roots_ptr, size_t npointers, MDNode *tbaa, bool isVolatile=false)
+{
+    SmallVector<Value*,0> gcroots(npointers);
+    Type *T_prjlvalue = ctx.types().T_prjlvalue;
+    auto roots_ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+    for (size_t i = 0; i < npointers; i++) {
+        auto *ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(jl_value_t*)), Align(sizeof(void*)), isVolatile);
+        roots_ai.decorateInst(ptr);
+        gcroots[i] = ptr;
+    }
+    return gcroots;
+}
+
+// inlined bool indicates whether this must return the inlined roots inside x separately, or whether x itself may be used as the root (if x is already isboxed)
+static llvm::SmallVector<Value*,0> get_gc_roots_for(jl_codectx_t &ctx, const jl_cgval_t &x, bool inlined=false)
+{
+    if (x.constant || x.typ == jl_bottom_type)
+        return {};
+    if (!inlined && x.Vboxed) // superset of x.isboxed
+        return {x.Vboxed};
+    assert(!x.isboxed || !inlined);
+    if (!x.inline_roots.empty()) {
+        // if (!inlined) { // TODO: implement this filter operation
+        //     SmallVector<unsigned,4> perm_offsets;
+        //     find_perm_offsets(typ, perm_offsets, 0);
+        //     return filter(!in(perm_offsets), x.inline_roots)
+        // }
+        return x.inline_roots;
+    }
+    if (!inlined && x.ispointer()) {
+        assert(x.V);
+        assert(x.V->getType()->getPointerAddressSpace() != AddressSpace::Tracked);
+        return {x.V};
+    }
+    else if (jl_is_concrete_immutable(x.typ) && !jl_is_pointerfree(x.typ)) {
+        jl_value_t *jltype = x.typ;
+        Type *T = julia_type_to_llvm(ctx, jltype);
+        Value *agg = emit_unbox(ctx, T, x, jltype);
+        SmallVector<unsigned,4> perm_offsets;
+        find_perm_offsets((jl_datatype_t*)jltype, perm_offsets, 0);
+        return ExtractTrackedValues(agg, agg->getType(), false, ctx.builder, perm_offsets);
+    }
+    // nothing here to root, move along
+    return {};
+}
 
+// --- emitting pointers directly into code ---
 
-static inline Constant *literal_static_pointer_val(const void *p, Type *T);
+static void jl_temporary_root(jl_codegen_params_t &ctx, jl_value_t *val);
+static void jl_temporary_root(jl_codectx_t &ctx, jl_value_t *val);
 
-static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
+static Constant *julia_pgv(jl_codegen_params_t &params, Module *M, const char *cname, void *addr)
 {
     // emit a GlobalVariable for a jl_value_t named "cname"
     // store the name given so we can reuse it (facilitating merging later)
     // so first see if there already is a GlobalVariable for this address
-    GlobalVariable* &gv = ctx.global_targets[addr];
-    Module *M = jl_Module;
+    GlobalVariable* &gv = params.global_targets[addr];
     StringRef localname;
     std::string gvname;
     if (!gv) {
-        uint64_t id = ctx.emission_context.imaging ? jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1) : ctx.global_targets.size();
+        uint64_t id = jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); // TODO: use params.global_targets.size()
         raw_string_ostream(gvname) << cname << id;
         localname = StringRef(gvname);
     }
@@ -345,9 +411,9 @@ static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
             gv = cast_or_null<GlobalVariable>(M->getNamedValue(localname));
     }
     if (gv == nullptr)
-        gv = new GlobalVariable(*M, ctx.types().T_pjlvalue,
+        gv = new GlobalVariable(*M, getPointerTy(M->getContext()),
                                 false, GlobalVariable::ExternalLinkage,
-                                NULL, localname);
+                                nullptr, localname);
     // LLVM passes sometimes strip metadata when moving load around
     // since the load at the new location satisfy the same condition as the original one.
     // Mark the global as constant to LLVM code using our own metadata
@@ -358,7 +424,7 @@ static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
     return gv;
 }
 
-static Constant *julia_pgv(jl_codectx_t &ctx, const char *prefix, jl_sym_t *name, jl_module_t *mod, void *addr)
+static Constant *julia_pgv(jl_codegen_params_t &params, Module *M, const char *prefix, jl_sym_t *name, jl_module_t *mod, void *addr)
 {
     // emit a GlobalVariable for a jl_value_t, using the prefix, name, and module to
     // to create a readable name of the form prefixModA.ModB.name#
@@ -383,66 +449,54 @@ static Constant *julia_pgv(jl_codectx_t &ctx, const char *prefix, jl_sym_t *name
     finalname.resize(orig_end + prefix_name.size());
     std::reverse_copy(prefix_name.begin(), prefix_name.end(), finalname.begin() + orig_end);
     std::reverse(finalname.begin(), finalname.end());
-    return julia_pgv(ctx, finalname.c_str(), addr);
+    return julia_pgv(params, M, finalname.c_str(), addr);
 }
 
 static JuliaVariable *julia_const_gv(jl_value_t *val);
-static Constant *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
+Constant *literal_pointer_val_slot(jl_codegen_params_t &params, Module *M, jl_value_t *p)
 {
     // emit a pointer to a jl_value_t* which will allow it to be valid across reloading code
     // also, try to give it a nice name for gdb, for easy identification
-    if (!ctx.emission_context.imaging) {
-        // TODO: this is an optimization, but is it useful or premature
-        // (it'll block any attempt to cache these, but can be simply deleted)
-        Module *M = jl_Module;
-        GlobalVariable *gv = new GlobalVariable(
-                *M, ctx.types().T_pjlvalue, true, GlobalVariable::PrivateLinkage,
-                literal_static_pointer_val(p, ctx.types().T_pjlvalue));
-        gv->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-        return gv;
-    }
     if (JuliaVariable *gv = julia_const_gv(p)) {
         // if this is a known special object, use the existing GlobalValue
-        return prepare_global_in(jl_Module, gv);
+        return prepare_global_in(M, gv);
     }
     if (jl_is_datatype(p)) {
         jl_datatype_t *addr = (jl_datatype_t*)p;
         if (addr->smalltag) {
             // some common builtin datatypes have a special pool for accessing them by smalltag id
-            Constant *tag = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), addr->smalltag << 4);
-            Constant *smallp = ConstantExpr::getInBoundsGetElementPtr(getInt8Ty(ctx.builder.getContext()), prepare_global_in(jl_Module, jlsmall_typeof_var), tag);
-            return ConstantExpr::getBitCast(smallp, ctx.types().T_ppjlvalue);
+            Constant *tag = ConstantInt::get(getInt32Ty(M->getContext()), addr->smalltag << 4);
+            Constant *smallp = ConstantExpr::getInBoundsGetElementPtr(getInt8Ty(M->getContext()), prepare_global_in(M, jl_small_typeof_var), tag);
+            if (smallp->getType()->getPointerAddressSpace() != 0)
+                smallp = ConstantExpr::getAddrSpaceCast(smallp, getPointerTy(M->getContext()));
+            return smallp;
         }
         // DataTypes are prefixed with a +
-        return julia_pgv(ctx, "+", addr->name->name, addr->name->module, p);
+        return julia_pgv(params, M, "+", addr->name->name, addr->name->module, p);
     }
     if (jl_is_method(p)) {
         jl_method_t *m = (jl_method_t*)p;
         // functions are prefixed with a -
-        return julia_pgv(ctx, "-", m->name, m->module, p);
+        return julia_pgv(params, M, "-", m->name, m->module, p);
     }
     if (jl_is_method_instance(p)) {
         jl_method_instance_t *linfo = (jl_method_instance_t*)p;
         // Type-inferred functions are also prefixed with a -
         if (jl_is_method(linfo->def.method))
-            return julia_pgv(ctx, "-", linfo->def.method->name, linfo->def.method->module, p);
+            return julia_pgv(params, M, "-", linfo->def.method->name, linfo->def.method->module, p);
     }
     if (jl_is_symbol(p)) {
         jl_sym_t *addr = (jl_sym_t*)p;
         // Symbols are prefixed with jl_sym#
-        return julia_pgv(ctx, "jl_sym#", addr, NULL, p);
+        return julia_pgv(params, M, "jl_sym#", addr, NULL, p);
     }
     // something else gets just a generic name
-    return julia_pgv(ctx, "jl_global#", p);
+    return julia_pgv(params, M, "jl_global#", p);
 }
 
 static size_t dereferenceable_size(jl_value_t *jt)
 {
-    if (jl_is_array_type(jt)) {
-        // Array has at least this much data
-        return sizeof(jl_array_t);
-    }
-    else if (jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)) {
+    if (jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)) {
         return jl_datatype_size(jt);
     }
     return 0;
@@ -451,15 +505,12 @@ static size_t dereferenceable_size(jl_value_t *jt)
 // Return the min required / expected alignment of jltype (on the stack or heap)
 static unsigned julia_alignment(jl_value_t *jt)
 {
-    if (jl_is_array_type(jt)) {
-        // Array always has this alignment
-        return JL_SMALL_BYTE_ALIGNMENT;
-    }
     if (jt == (jl_value_t*)jl_datatype_type) {
         // types are never allocated in julia code/on the stack
         // and this is the guarantee we have for the GC bits
         return 16;
     }
+
     assert(jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt));
     unsigned alignment = jl_datatype_align(jt);
     if (alignment > JL_HEAP_ALIGNMENT)
@@ -513,9 +564,7 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p)
 {
     if (p == NULL)
         return Constant::getNullValue(ctx.types().T_pjlvalue);
-    if (!ctx.emission_context.imaging)
-        return literal_static_pointer_val(p, ctx.types().T_pjlvalue);
-    Value *pgv = literal_pointer_val_slot(ctx, p);
+    Value *pgv = literal_pointer_val_slot(ctx.emission_context, jl_Module, p);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto load = ai.decorateInst(maybe_mark_load_dereferenceable(
             ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))),
@@ -524,49 +573,28 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p)
     return load;
 }
 
-// Returns ctx.types().T_pjlvalue
-static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p)
-{
-    // emit a pointer to any jl_value_t which will be valid across reloading code
-    if (p == NULL)
-        return Constant::getNullValue(ctx.types().T_pjlvalue);
-    if (!ctx.emission_context.imaging)
-        return literal_static_pointer_val(p, ctx.types().T_pjlvalue);
-    // bindings are prefixed with jl_bnd#
-    jl_globalref_t *gr = p->globalref;
-    Value *pgv = gr ? julia_pgv(ctx, "jl_bnd#", gr->name, gr->mod, p) : julia_pgv(ctx, "jl_bnd#", p);
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    auto load = ai.decorateInst(maybe_mark_load_dereferenceable(
-            ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))),
-            false, sizeof(jl_binding_t), alignof(jl_binding_t)));
-    setName(ctx.emission_context, load, pgv->getName());
-    return load;
-}
-
 // bitcast a value, but preserve its address space when dealing with pointer types
 static Value *emit_bitcast(jl_codectx_t &ctx, Value *v, Type *jl_value)
 {
-    if (isa<PointerType>(jl_value) &&
-        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
-        // Cast to the proper address space
-        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
-        ++EmittedPointerBitcast;
-        return ctx.builder.CreateBitCast(v, jl_value_addr);
+    if (isa<PointerType>(jl_value)) {
+        return v;
     }
     else {
         return ctx.builder.CreateBitCast(v, jl_value);
     }
 }
 
-static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) {
-    if (to != V->getType())
-        return emit_bitcast(ctx, V, to);
-    return V;
-}
+// static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) {
+//     if (isa<PointerType>(to)) {
+//         return V;
+//     }
+//     if (to != V->getType())
+//         return emit_bitcast(ctx, V, to);
+//     return V;
+// }
 
 static Value *julia_binding_pvalue(jl_codectx_t &ctx, Value *bv)
 {
-    bv = emit_bitcast(ctx, bv, ctx.types().T_pprjlvalue);
     Value *offset = ConstantInt::get(ctx.types().T_size, offsetof(jl_binding_t, value) / ctx.types().sizeof_ptr);
     return ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, bv, offset);
 }
@@ -575,32 +603,16 @@ static Value *julia_binding_gv(jl_codectx_t &ctx, jl_binding_t *b)
 {
     // emit a literal_pointer_val to a jl_binding_t
     // binding->value are prefixed with *
-    if (ctx.emission_context.imaging) {
-        jl_globalref_t *gr = b->globalref;
-        Value *pgv = gr ? julia_pgv(ctx, "*", gr->name, gr->mod, b) : julia_pgv(ctx, "*jl_bnd#", b);
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-        auto load = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))));
-        setName(ctx.emission_context, load, pgv->getName());
-        return load;
-    }
-    else {
-        return literal_static_pointer_val(b, ctx.types().T_pjlvalue);
-    }
+    jl_globalref_t *gr = b->globalref;
+    Value *pgv = gr ? julia_pgv(ctx.emission_context, jl_Module, "*", gr->name, gr->mod, b) : julia_pgv(ctx.emission_context, jl_Module, "*jl_bnd#", b);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    auto load = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))));
+    setName(ctx.emission_context, load, pgv->getName());
+    return load;
 }
 
 // --- mapping between julia and llvm types ---
 
-static bool type_is_permalloc(jl_value_t *typ)
-{
-    // Singleton should almost always be handled by the later optimization passes.
-    // Also do it here since it is cheap and save some effort in LLVM passes.
-    if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ))
-        return true;
-    return typ == (jl_value_t*)jl_symbol_type ||
-        typ == (jl_value_t*)jl_int8_type ||
-        typ == (jl_value_t*)jl_uint8_type;
-}
-
 static unsigned convert_struct_offset(const llvm::DataLayout &DL, Type *lty, unsigned byte_offset)
 {
     const StructLayout *SL = DL.getStructLayout(cast<StructType>(lty));
@@ -614,19 +626,13 @@ static unsigned convert_struct_offset(jl_codectx_t &ctx, Type *lty, unsigned byt
     return convert_struct_offset(ctx.builder.GetInsertBlock()->getModule()->getDataLayout(), lty, byte_offset);
 }
 
-static Value *emit_struct_gep(jl_codectx_t &ctx, Type *lty, Value *base, unsigned byte_offset)
-{
-    unsigned idx = convert_struct_offset(ctx, lty, byte_offset);
-    return ctx.builder.CreateConstInBoundsGEP2_32(lty, base, 0, idx);
-}
-
 static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall=false);
 
 static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed)
 {
     // this function converts a Julia Type into the equivalent LLVM type
     if (isboxed) *isboxed = false;
-    if (jt == (jl_value_t*)jl_bottom_type)
+    if (jt == (jl_value_t*)jl_bottom_type || jt == (jl_value_t*)jl_typeofbottom_type || jt == (jl_value_t*)jl_typeofbottom_type->super)
         return getVoidTy(ctxt);
     if (jl_is_concrete_immutable(jt)) {
         if (jl_datatype_nbits(jt) == 0)
@@ -667,6 +673,10 @@ static Type *bitstype_to_llvm(jl_value_t *bt, LLVMContext &ctxt, bool llvmcall =
         return getFloatTy(ctxt);
     if (bt == (jl_value_t*)jl_float64_type)
         return getDoubleTy(ctxt);
+    if (bt == (jl_value_t*)jl_bfloat16_type)
+        return getBFloatTy(ctxt);
+    if (jl_is_cpointer_type(bt))
+        return PointerType::get(ctxt, 0);
     if (jl_is_llvmpointer_type(bt)) {
         jl_value_t *as_param = jl_tparam1(bt);
         int as;
@@ -676,7 +686,7 @@ static Type *bitstype_to_llvm(jl_value_t *bt, LLVMContext &ctxt, bool llvmcall =
             as = jl_unbox_int64(as_param);
         else
             jl_error("invalid pointer address space");
-        return PointerType::get(getInt8Ty(ctxt), as);
+        return PointerType::get(ctxt, as);
     }
     int nb = jl_datatype_size(bt);
     return Type::getIntNTy(ctxt, nb * 8);
@@ -695,18 +705,55 @@ static unsigned jl_field_align(jl_datatype_t *dt, size_t i)
     return std::min({al, (unsigned)jl_datatype_align(dt), (unsigned)JL_HEAP_ALIGNMENT});
 }
 
+static llvm::StructType* get_jlmemoryref(llvm::LLVMContext &C, unsigned AS) {
+    return llvm::StructType::get(C, {
+            llvm::PointerType::get(C, AS),
+            JuliaType::get_prjlvalue_ty(C),
+            });
+}
+static llvm::StructType* get_jlmemoryboxedref(llvm::LLVMContext &C, unsigned AS) {
+    return llvm::StructType::get(C, {
+            llvm::PointerType::get(C, AS),
+            JuliaType::get_prjlvalue_ty(C),
+            });
+}
+static llvm::StructType* get_jlmemoryunionref(llvm::LLVMContext &C, llvm::Type *T_size) {
+    return llvm::StructType::get(C, {
+            T_size, // offset
+            JuliaType::get_prjlvalue_ty(C),
+            });
+}
+static StructType *get_memoryref_type(LLVMContext &ctxt, Type *T_size, const jl_datatype_layout_t *layout, unsigned AS)
+{
+    // TODO: try to remove this slightly odd special case
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    if (isboxed)
+        return get_jlmemoryboxedref(ctxt, AS);
+    if (isunion || isghost)
+        return get_jlmemoryunionref(ctxt, T_size);
+    return get_jlmemoryref(ctxt, AS);
+}
+
 static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall)
 {
     // this function converts a Julia Type into the equivalent LLVM struct
     // use this where C-compatible (unboxed) structs are desired
     // use julia_type_to_llvm directly when you want to preserve Julia's type semantics
     if (isboxed) *isboxed = false;
-    if (jt == (jl_value_t*)jl_bottom_type)
+    if (jt == (jl_value_t*)jl_bottom_type || jt == (jl_value_t*)jl_typeofbottom_type || jt == (jl_value_t*)jl_typeofbottom_type->super)
         return getVoidTy(ctxt);
     if (jl_is_primitivetype(jt))
         return bitstype_to_llvm(jt, ctxt, llvmcall);
     jl_datatype_t *jst = (jl_datatype_t*)jt;
-    if (jl_is_structtype(jt) && !(jst->layout && jl_is_layout_opaque(jst->layout))) {
+    if (jl_is_structtype(jt) && !(jst->layout && jl_is_layout_opaque(jst->layout)) && !jl_is_array_type(jst) && !jl_is_genericmemory_type(jst)) {
+        if (jl_is_genericmemoryref_type(jst)) {
+            jl_value_t *mty_dt = jl_field_type_concrete(jst, 1);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            Type *T_size = bitstype_to_llvm((jl_value_t*)jl_long_type, ctxt);
+            return get_memoryref_type(ctxt, T_size, layout, 0);
+        }
         bool isTuple = jl_is_tuple_type(jt);
         jl_svec_t *ftypes = jl_get_fieldtypes(jst);
         size_t i, ntypes = jl_svec_len(ftypes);
@@ -717,12 +764,13 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
         if (ntypes == 0 || jl_datatype_nbits(jst) == 0)
             return getVoidTy(ctxt);
         Type *_struct_decl = NULL;
-        // TODO: we should probably make a temporary root for `jst` somewhere
+        if (ctx)
+            jl_temporary_root(*ctx, jt);
         // don't use pre-filled struct_decl for llvmcall (f16, etc. may be different)
         Type *&struct_decl = (ctx && !llvmcall ? ctx->llvmtypes[jst] : _struct_decl);
         if (struct_decl)
             return struct_decl;
-        std::vector<Type*> latypes(0);
+        SmallVector<Type*, 0> latypes(0);
         bool isarray = true;
         bool isvector = true;
         jl_value_t *jlasttype = NULL;
@@ -744,17 +792,15 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
                 lty = JuliaType::get_prjlvalue_ty(ctxt);
                 isvector = false;
             }
-            else if (ty == (jl_value_t*)jl_bool_type) {
-                lty = getInt8Ty(ctxt);
-            }
             else if (jl_is_uniontype(ty)) {
                 // pick an Integer type size such that alignment will generally be correct,
                 // and always end with an Int8 (selector byte).
                 // We may need to insert padding first to get to the right offset
                 size_t fsz = 0, al = 0;
                 bool isptr = !jl_islayout_inline(ty, &fsz, &al);
-                assert(!isptr && fsz == jl_field_size(jst, i) - 1); (void)isptr;
-                if (fsz > 0) {
+                assert(!isptr && fsz < jl_field_size(jst, i)); (void)isptr;
+                size_t fsz1 = jl_field_size(jst, i) - 1;
+                if (fsz1 > 0) {
                     if (al > MAX_ALIGN) {
                         Type *AlignmentType;
                         AlignmentType = ArrayType::get(FixedVectorType::get(getInt8Ty(ctxt), al), 0);
@@ -762,8 +808,8 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
                         al = MAX_ALIGN;
                     }
                     Type *AlignmentType = IntegerType::get(ctxt, 8 * al);
-                    unsigned NumATy = fsz / al;
-                    unsigned remainder = fsz % al;
+                    unsigned NumATy = fsz1 / al;
+                    unsigned remainder = fsz1 % al;
                     assert(al == 1 || NumATy > 0);
                     while (NumATy--)
                         latypes.push_back(AlignmentType);
@@ -821,13 +867,13 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
     //  // pick an Integer type size such that alignment will be correct
     //  // and always end with an Int8 (selector byte)
     //  lty = ArrayType::get(IntegerType::get(lty->getContext(), 8 * al), fsz / al);
-    //  std::vector<Type*> Elements(2);
+    //  SmallVector<Type*, 0> Elements(2);
     //  Elements[0] = lty;
     //  Elements[1] = getInt8Ty(ctxt);
     //  unsigned remainder = fsz % al;
     //  while (remainder--)
     //      Elements.push_back(getInt8Ty(ctxt));
-    //  lty = StructType::get(lty->getContext(), makeArrayRef(Elements));
+    //  lty = StructType::get(lty->getContext(),ArrayRef<Type*>(Elements));
     // }
     if (isboxed) *isboxed = true;
     return JuliaType::get_prjlvalue_ty(ctxt);
@@ -874,7 +920,7 @@ static bool is_tupletype_homogeneous(jl_svec_t *t, bool allow_va = false)
 }
 
 static bool for_each_uniontype_small(
-        std::function<void(unsigned, jl_datatype_t*)> f,
+        llvm::function_ref<void(unsigned, jl_datatype_t*)> f,
         jl_value_t *ty,
         unsigned &counter)
 {
@@ -885,6 +931,9 @@ static bool for_each_uniontype_small(
         allunbox &= for_each_uniontype_small(f, ((jl_uniontype_t*)ty)->b, counter);
         return allunbox;
     }
+    else if (ty == (jl_value_t*)jl_typeofbottom_type->super) {
+        f(++counter, jl_typeofbottom_type); // treat Tuple{union{}} as identical to typeof(Union{})
+    }
     else if (jl_is_pointerfree(ty)) {
         f(++counter, (jl_datatype_t*)ty);
         return true;
@@ -928,7 +977,7 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
     if (x.constant) {
         Constant *val = julia_const_to_llvm(ctx, x.constant);
         if (val)
-            data = get_pointer_to_constant(ctx.emission_context, val, "_j_const", *jl_Module);
+            data = get_pointer_to_constant(ctx.emission_context, val, Align(julia_alignment(jl_typeof(x.constant))), "_j_const", *jl_Module);
         else
             data = literal_pointer_val(ctx, x.constant);
     }
@@ -943,57 +992,10 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                             jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align, bool is_volatile)
+                             jl_aliasinfo_t const &src_ai, uint64_t sz, Align align_dst, Align align_src, bool is_volatile)
 {
     if (sz == 0)
         return;
-    assert(align && "align must be specified");
-    // If the types are small and simple, use load and store directly.
-    // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int
-    // that interferes with other optimizations.
-    // TODO: Restore this for opaque pointers? Needs extra type information from the caller.
-    if (ctx.builder.getContext().supportsTypedPointers() && sz <= 64) {
-        // The size limit is arbitrary but since we mainly care about floating points and
-        // machine size vectors this should be enough.
-        const DataLayout &DL = jl_Module->getDataLayout();
-        auto srcty = cast<PointerType>(src->getType());
-        //TODO unsafe nonopaque pointer
-        auto srcel = srcty->getNonOpaquePointerElementType();
-        auto dstty = cast<PointerType>(dst->getType());
-        //TODO unsafe nonopaque pointer
-        auto dstel = dstty->getNonOpaquePointerElementType();
-        while (srcel->isArrayTy() && srcel->getArrayNumElements() == 1) {
-            src = ctx.builder.CreateConstInBoundsGEP2_32(srcel, src, 0, 0);
-            srcel = srcel->getArrayElementType();
-            srcty = srcel->getPointerTo();
-        }
-        while (dstel->isArrayTy() && dstel->getArrayNumElements() == 1) {
-            dst = ctx.builder.CreateConstInBoundsGEP2_32(dstel, dst, 0, 0);
-            dstel = dstel->getArrayElementType();
-            dstty = dstel->getPointerTo();
-        }
-
-        llvm::Type *directel = nullptr;
-        if (srcel->isSized() && srcel->isSingleValueType() && DL.getTypeStoreSize(srcel) == sz) {
-            directel = srcel;
-            dst = emit_bitcast(ctx, dst, srcty);
-        }
-        else if (dstel->isSized() && dstel->isSingleValueType() &&
-                 DL.getTypeStoreSize(dstel) == sz) {
-            directel = dstel;
-            src = emit_bitcast(ctx, src, dstty);
-        }
-        if (directel) {
-            if (isa<Instruction>(src) && !src->hasName())
-                setName(ctx.emission_context, src, "memcpy_refined_src");
-            if (isa<Instruction>(dst) && !dst->hasName())
-                setName(ctx.emission_context, dst, "memcpy_refined_dst");
-            auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile));
-            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile));
-            ++SkippedMemcpys;
-            return;
-        }
-    }
     ++EmittedMemcpys;
 
     // the memcpy intrinsic does not allow to specify different alias tags
@@ -1007,51 +1009,278 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
     // above problem won't be as serious.
 
     auto merged_ai = dst_ai.merge(src_ai);
-    ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile,
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                             jl_aliasinfo_t const &src_ai, Value *sz, unsigned align, bool is_volatile)
+                             jl_aliasinfo_t const &src_ai, Value *sz, Align align_dst, Align align_src, bool is_volatile)
 {
     if (auto const_sz = dyn_cast<ConstantInt>(sz)) {
-        emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align, is_volatile);
+        emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align_dst, align_src, is_volatile);
         return;
     }
     ++EmittedMemcpys;
 
     auto merged_ai = dst_ai.merge(src_ai);
-    ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile,
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
 template<typename T1>
 static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                        jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align, bool is_volatile=false)
+                        jl_aliasinfo_t const &src_ai, T1 &&sz, Align align_dst, Align align_src, bool is_volatile=false)
 {
-    emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align, is_volatile);
+    emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align_dst, align_src, is_volatile);
 }
 
 template<typename T1>
 static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, const jl_cgval_t &src,
-                        T1 &&sz, unsigned align, bool is_volatile=false)
+                        T1 &&sz, Align align_dst, Align align_src, bool is_volatile=false)
 {
     auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, src.tbaa);
-    emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align, is_volatile);
+    emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, align_src, is_volatile);
+}
+
+static bool allpointers(jl_datatype_t *typ)
+{
+    return jl_datatype_size(typ) == typ->layout->npointers * sizeof(void*);
+}
+
+// compute the space required by split_value_into, by simulating it
+// returns (sizeof(split_value), n_pointers)
+static std::pair<size_t,size_t> split_value_size(jl_datatype_t *typ)
+{
+    assert(jl_is_datatype(typ));
+    size_t dst_off = 0;
+    bool hasptr = typ->layout->first_ptr >= 0;
+    size_t npointers = hasptr ? typ->layout->npointers : 0;
+    // drop the data pointer if the entire structure is just pointers
+    // TODO: eventually we could drop the slots for the pointers from inside the
+    //       types to pack it together, but this can change the alignment of the bits
+    //       in the fields inside, even if those bits have no pointers themselves. So
+    //       we would actually need to compute, for each pointer, whether any
+    //       subsequent field needed the extra alignment (for example, we can
+    //       drop space for any runs of two/four pointer).  Some of these
+    //       functions are already written in a way to support that, but not
+    //       fully implemented yet.
+    bool nodata = allpointers(typ);
+    if (nodata)
+        dst_off = 0;
+    else
+        dst_off = jl_datatype_size(typ);
+    return std::make_pair(dst_off, npointers);
+}
+
+// take a value `x` and split its bits into dst and the roots into inline_roots
+static void split_value_into(jl_codectx_t &ctx, const jl_cgval_t &x, Align align_src, Value *dst, Align align_dst, jl_aliasinfo_t const &dst_ai, Value *inline_roots_ptr, jl_aliasinfo_t const &roots_ai, bool isVolatileStore=false)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    assert(jl_is_concrete_type(x.typ));
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    Type *T_prjlvalue = ctx.types().T_prjlvalue;
+    if (!x.inline_roots.empty()) {
+        auto sizes = split_value_size(typ);
+        if (sizes.first > 0)
+            emit_memcpy(ctx, dst, dst_ai, x.V, src_ai, sizes.first, align_dst, align_src, isVolatileStore);
+        for (size_t i = 0; i < sizes.second; i++) {
+            Value *unbox = x.inline_roots[i];
+            roots_ai.decorateInst(ctx.builder.CreateAlignedStore(unbox, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(void*)), Align(sizeof(void*)), isVolatileStore));
+        }
+        return;
+    }
+    if (inline_roots_ptr == nullptr) {
+        emit_unbox_store(ctx, x, dst, ctx.tbaa().tbaa_stack, align_src, align_dst, isVolatileStore);
+        return;
+    }
+    Value *src = data_pointer(ctx, value_to_pointer(ctx, x));
+    bool isstack = isa<AllocaInst>(src->stripInBoundsOffsets()) || src_ai.tbaa == ctx.tbaa().tbaa_stack;
+    size_t dst_off = 0;
+    size_t src_off = 0;
+    bool hasptr = typ->layout->first_ptr >= 0;
+    size_t npointers = hasptr ? typ->layout->npointers : 0;
+    bool nodata = allpointers(typ);
+    for (size_t i = 0; true; i++) {
+        bool last = i == npointers;
+        size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*));
+        if (ptr > src_off) {
+            emit_memcpy(ctx,
+                emit_ptrgep(ctx, dst, dst_off),
+                dst_ai,
+                emit_ptrgep(ctx, src, src_off),
+                src_ai,
+                ptr - src_off,
+                align_dst,
+                align_src,
+                isVolatileStore);
+            dst_off += ptr - src_off;
+        }
+        if (last)
+            break;
+        auto *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, src, ptr), Align(sizeof(void*)));
+        if (!isstack)
+            load->setOrdering(AtomicOrdering::Unordered);
+        src_ai.decorateInst(load);
+        roots_ai.decorateInst(ctx.builder.CreateAlignedStore(load, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(void*)), Align(sizeof(void*)), isVolatileStore));
+        align_src = align_dst = Align(sizeof(void*));
+        src_off = ptr + sizeof(void*);
+        if (!nodata) {
+            // store an undef pointer here, to make sure nobody looks at this
+            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(
+                ctx.builder.getIntN(sizeof(void*) * 8, (uint64_t)-1),
+                emit_ptrgep(ctx, dst, dst_off),
+                align_src,
+                isVolatileStore));
+            dst_off += sizeof(void*);
+            assert(dst_off == src_off);
+        }
+    }
 }
 
-static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type)
+static void split_value_into(jl_codectx_t &ctx, const jl_cgval_t &x, Align align_src, Value *dst, Align align_dst, jl_aliasinfo_t const &dst_ai, MutableArrayRef<Value*> inline_roots)
 {
-    // p = (jl_value_t**)v; *(type*)&p[n]
-    Value *vptr = ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_prjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue),
-            idx);
-    setName(ctx.emission_context, vptr, "arraysize_ptr");
-    LoadInst *load = ctx.builder.CreateLoad(type, emit_bitcast(ctx, vptr, PointerType::get(type, 0)));
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-    ai.decorateInst(load);
-    return load;
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    assert(jl_is_concrete_type(x.typ));
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    Type *T_prjlvalue = ctx.types().T_prjlvalue;
+    if (!x.inline_roots.empty()) {
+        auto sizes = split_value_size(typ);
+        if (sizes.first > 0)
+            emit_memcpy(ctx, dst, dst_ai, x.V, src_ai, sizes.first, align_dst, align_src);
+        for (size_t i = 0; i < sizes.second; i++)
+            inline_roots[i] = x.inline_roots[i];
+        return;
+    }
+    if (inline_roots.empty()) {
+        emit_unbox_store(ctx, x, dst, ctx.tbaa().tbaa_stack, align_src, align_dst, false);
+        return;
+    }
+    Value *src = data_pointer(ctx, value_to_pointer(ctx, x));
+    bool isstack = isa<AllocaInst>(src->stripInBoundsOffsets()) || src_ai.tbaa == ctx.tbaa().tbaa_stack;
+    size_t dst_off = 0;
+    size_t src_off = 0;
+    bool hasptr = typ->layout->first_ptr >= 0;
+    size_t npointers = hasptr ? typ->layout->npointers : 0;
+    bool nodata = allpointers(typ);
+    for (size_t i = 0; true; i++) {
+        bool last = i == npointers;
+        size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*));
+        if (ptr > src_off) {
+            emit_memcpy(ctx,
+                emit_ptrgep(ctx, dst, dst_off),
+                dst_ai,
+                emit_ptrgep(ctx, src, src_off),
+                src_ai,
+                ptr - src_off,
+                align_dst,
+                align_src);
+            dst_off += ptr - src_off;
+        }
+        if (last)
+            break;
+        auto *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, src, ptr), Align(sizeof(void*)));
+        if (!isstack)
+            load->setOrdering(AtomicOrdering::Unordered);
+        src_ai.decorateInst(load);
+        inline_roots[i] = load;
+        align_src = align_dst = Align(sizeof(void*));
+        src_off = ptr + sizeof(void*);
+        if (!nodata) {
+            // store an undef pointer here, to make sure nobody looks at this
+            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(
+                ctx.builder.getIntN(sizeof(void*) * 8, (uint64_t)-1),
+                emit_ptrgep(ctx, dst, dst_off),
+                align_src));
+            dst_off += sizeof(void*);
+            assert(dst_off == src_off);
+        }
+    }
+}
+
+static std::pair<AllocaInst*, SmallVector<Value*,0>> split_value(jl_codectx_t &ctx, const jl_cgval_t &x, Align x_alignment)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    auto sizes = split_value_size(typ);
+    Align align_dst(julia_alignment((jl_value_t*)typ));
+    AllocaInst *bits = sizes.first > 0 ? emit_static_alloca(ctx, sizes.first, align_dst) : nullptr;
+    SmallVector<Value*,0> roots(sizes.second);
+    auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+    split_value_into(ctx, x, x_alignment, bits, align_dst, stack_ai, MutableArrayRef(roots));
+    return std::make_pair(bits, roots);
+}
+
+// Return the offset values corresponding to jl_field_offset, but into the two buffers for a split value (or -1)
+static std::pair<ssize_t,ssize_t> split_value_field(jl_datatype_t *typ, unsigned idx)
+{
+    size_t fldoff = jl_field_offset(typ, idx);
+    size_t src_off = 0;
+    size_t dst_off = 0;
+    assert(typ->layout->first_ptr >= 0);
+    size_t npointers = typ->layout->npointers;
+    bool nodata = allpointers(typ);
+    for (size_t i = 0; i < npointers; i++) {
+        size_t ptr = jl_ptr_offset(typ, i) * sizeof(void*);
+        if (ptr >= fldoff) {
+            if (ptr >= fldoff + jl_field_size(typ, idx))
+                break;
+            bool onlyptr = jl_field_isptr(typ, idx) || allpointers((jl_datatype_t*)jl_field_type(typ, idx));
+            return std::make_pair(onlyptr ? -1 : dst_off + fldoff - src_off, i);
+        }
+        dst_off += ptr - src_off;
+        src_off = ptr + sizeof(void*);
+        if (!nodata) {
+            assert(dst_off + sizeof(void*) == src_off);
+            dst_off = src_off;
+        }
+    }
+    return std::make_pair(dst_off + fldoff - src_off, -1);
+}
+
+// Copy `x` to `dst`, where `x` was a split value and dst needs to have a native layout, copying any inlined roots back into their native location.
+// This does not respect roots, so you must call emit_write_multibarrier afterwards.
+static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatileStore)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    assert(jl_is_concrete_type(x.typ));
+    assert(typ->layout->first_ptr >= 0 && !x.inline_roots.empty());
+    Align align_dst = alignment;
+    Align align_src(julia_alignment(x.typ));
+    Value *src = x.V;
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    size_t dst_off = 0;
+    size_t src_off = 0;
+    size_t npointers = typ->layout->npointers;
+    bool nodata = allpointers(typ);
+    bool isstack = isa<AllocaInst>(dst->stripInBoundsOffsets()) || dst_ai.tbaa == ctx.tbaa().tbaa_stack;
+    for (size_t i = 0; true; i++) {
+        bool last = i == npointers;
+        size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*));
+        if (ptr > dst_off) {
+            emit_memcpy(ctx,
+                emit_ptrgep(ctx, dst, dst_off),
+                dst_ai,
+                emit_ptrgep(ctx, src, src_off),
+                src_ai,
+                ptr - dst_off,
+                align_dst,
+                align_src,
+                isVolatileStore);
+            src_off += ptr - dst_off;
+        }
+        if (last)
+            break;
+        auto *root = x.inline_roots[i];
+        auto *store = ctx.builder.CreateAlignedStore(root, emit_ptrgep(ctx, dst, ptr), Align(sizeof(void*)), isVolatileStore);
+        if (!isstack)
+            store->setOrdering(AtomicOrdering::Unordered);
+        dst_ai.decorateInst(store);
+        align_dst = align_src = Align(sizeof(void*));
+        dst_off = ptr + sizeof(void*);
+        if (!nodata) {
+            assert(src_off + sizeof(void*) == dst_off);
+            src_off = dst_off;
+        }
+    }
 }
 
 static Value *emit_tagfrom(jl_codectx_t &ctx, jl_datatype_t *dt)
@@ -1095,7 +1324,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
         if (jl_has_intersect_type_not_kind(typ))
             return false;
         for (size_t i = 0; i < jl_tags_count; i++) {
-            jl_datatype_t *dt = small_typeof[(i << 4) / sizeof(*small_typeof)];
+            jl_datatype_t *dt = jl_small_typeof[(i << 4) / sizeof(*jl_small_typeof)];
             if (dt && !jl_has_empty_intersection((jl_value_t*)dt, typ))
                 return false;
         }
@@ -1106,24 +1335,19 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
     if (p.TIndex) {
         Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
         bool allunboxed = is_uniontype_allunboxed(p.typ);
-        Type *expr_type = justtag ? ctx.types().T_size : ctx.emission_context.imaging ? ctx.types().T_pjlvalue : ctx.types().T_prjlvalue;
-        Value *datatype_or_p = Constant::getNullValue(ctx.emission_context.imaging ? expr_type->getPointerTo() : expr_type);
+        Type *expr_type = justtag ? ctx.types().T_size : ctx.types().T_pjlvalue;
+        Value *datatype_or_p = Constant::getNullValue(PointerType::getUnqual(expr_type->getContext()));
         unsigned counter = 0;
         for_each_uniontype_small(
             [&](unsigned idx, jl_datatype_t *jt) {
                 Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx));
                 Constant *ptr;
                 if (justtag && jt->smalltag) {
-                    ptr = ConstantInt::get(expr_type, jt->smalltag << 4);
-                    if (ctx.emission_context.imaging)
-                        ptr = get_pointer_to_constant(ctx.emission_context, ptr, "_j_tag", *jl_Module);
+                    ptr = get_pointer_to_constant(ctx.emission_context, ConstantInt::get(expr_type, jt->smalltag << 4), Align(sizeof(jl_value_t*)), StringRef("_j_smalltag_") + jl_symbol_name(jt->name->name), *jl_Module);
+                }
+                else {
+                    ptr = ConstantExpr::getBitCast(literal_pointer_val_slot(ctx.emission_context, jl_Module, (jl_value_t*)jt), datatype_or_p->getType());
                 }
-                else if (ctx.emission_context.imaging)
-                    ptr = ConstantExpr::getBitCast(literal_pointer_val_slot(ctx, (jl_value_t*)jt), datatype_or_p->getType());
-                else if (justtag)
-                    ptr = ConstantInt::get(expr_type, (uintptr_t)jt);
-                else
-                    ptr = ConstantExpr::getAddrSpaceCast(literal_static_pointer_val((jl_value_t*)jt, ctx.types().T_pjlvalue), expr_type);
                 datatype_or_p = ctx.builder.CreateSelect(cmp, ptr, datatype_or_p);
                 setName(ctx.emission_context, datatype_or_p, "typetag_ptr");
             },
@@ -1131,12 +1355,9 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
             counter);
         auto emit_unboxty = [&] () -> Value* {
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-            if (ctx.emission_context.imaging) {
-                Value *datatype = ai.decorateInst(ctx.builder.CreateAlignedLoad(expr_type, datatype_or_p, Align(sizeof(void*))));
-                setName(ctx.emission_context, datatype, "typetag");
-                return justtag ? datatype : track_pjlvalue(ctx, datatype);
-            }
-            return datatype_or_p;
+            Value *datatype = ai.decorateInst(ctx.builder.CreateAlignedLoad(expr_type, datatype_or_p, Align(sizeof(void*))));
+            setName(ctx.emission_context, datatype, "typetag");
+            return justtag ? datatype : track_pjlvalue(ctx, datatype);
         };
         Value *res;
         if (!allunboxed) {
@@ -1171,35 +1392,42 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
 
 static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt)
 {
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppjlvalue);
-    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, types) / sizeof(void*));
+    Value *Ptr = decay_derived(ctx, dt);
+    unsigned Idx = offsetof(jl_datatype_t, types);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto types = ai.decorateInst(ctx.builder.CreateAlignedLoad(
-                ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*))));
+                ctx.types().T_pjlvalue, emit_ptrgep(ctx, Ptr, Idx), Align(sizeof(void*))));
     setName(ctx.emission_context, types, "datatype_types");
     return types;
 }
 
 static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt)
 {
-    Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), ctx.types().T_size->getPointerTo());
+    Value *type_svec = emit_datatype_types(ctx, dt);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto nfields = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, type_svec, Align(sizeof(void*))));
     setName(ctx.emission_context, nfields, "datatype_nfields");
     return nfields;
 }
 
-static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt)
+// emit the size field from the layout of a dt
+static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt, bool add_isunion=false)
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), getInt32PtrTy(ctx.builder.getContext())->getPointerTo());
-    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, layout) / sizeof(int32_t*));
-    Ptr = ctx.builder.CreateInBoundsGEP(getInt32PtrTy(ctx.builder.getContext()), Ptr, Idx);
-    Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32PtrTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*))));
-    Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, size) / sizeof(int32_t));
-    Ptr = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx);
-    auto Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t))));
+    Value *Ptr = decay_derived(ctx, dt);
+    Ptr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_t, layout));
+    Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getPointerTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*))));
+    Value *SizePtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, size));
+    Value *Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), SizePtr, Align(sizeof(int32_t))));
     setName(ctx.emission_context, Size, "datatype_size");
+    if (add_isunion) {
+        Value *FlagPtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, flags));
+        Value *Flag = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), FlagPtr, Align(sizeof(int16_t))));
+        Flag = ctx.builder.CreateLShr(Flag, 4);
+        Flag = ctx.builder.CreateAnd(Flag, ConstantInt::get(Flag->getType(), 1));
+        Flag = ctx.builder.CreateZExt(Flag, Size->getType());
+        Size = ctx.builder.CreateAdd(Size, Flag);
+    }
     return Size;
 }
 
@@ -1222,7 +1450,7 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
             BasicBlock *dynloadBB = BasicBlock::Create(ctx.builder.getContext(), "dyn_sizeof", ctx.f);
             BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_sizeof", ctx.f);
             Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             ctx.builder.CreateCondBr(isboxed, dynloadBB, postBB);
             ctx.builder.SetInsertPoint(dynloadBB);
@@ -1251,26 +1479,26 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
         return dyn_size;
     }
 }
+*/
 
 static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppint8);
+    Value *Ptr = decay_derived(ctx, dt);
     Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, name));
     Value *Nam = ai.decorateInst(
-            ctx.builder.CreateAlignedLoad(getInt8PtrTy(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8PtrTy(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int8_t*))));
+            ctx.builder.CreateAlignedLoad(getPointerTy(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getPointerTy(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int8_t*))));
     Value *Idx2 = ConstantInt::get(ctx.types().T_size, offsetof(jl_typename_t, n_uninitialized) + sizeof(((jl_typename_t*)nullptr)->n_uninitialized));
     Value *mutabl = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), Nam, Idx2), Align(1)));
     mutabl = ctx.builder.CreateLShr(mutabl, 1);
     return ctx.builder.CreateTrunc(mutabl, getInt1Ty(ctx.builder.getContext()));
 }
-*/
 
 static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ)
 {
     Value *isprimitive;
-    isprimitive = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, decay_derived(ctx, typ), getInt8PtrTy(ctx.builder.getContext())), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    isprimitive = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     isprimitive = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isprimitive, Align(1)));
     isprimitive = ctx.builder.CreateLShr(isprimitive, 7);
@@ -1282,10 +1510,7 @@ static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ)
 static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 {
     unsigned n = offsetof(jl_datatype_t, name) / sizeof(char*);
-    Value *vptr = ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_pjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, dt), ctx.types().T_ppjlvalue),
-            ConstantInt::get(ctx.types().T_size, n));
+    Value *vptr = emit_ptrgep(ctx, maybe_decay_tracked(ctx, dt), n * sizeof(jl_value_t*));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto name = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*))));
     setName(ctx.emission_context, name, "datatype_name");
@@ -1297,13 +1522,13 @@ static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 // the error is always thrown. This may cause non dominated use
 // of SSA value error in the verifier.
 
-static void just_emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
+static void just_emit_error(jl_codectx_t &ctx, Function *F, const Twine &txt)
 {
     ++EmittedErrors;
     ctx.builder.CreateCall(F, stringConstPtr(ctx.emission_context, ctx.builder, txt));
 }
 
-static void emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
+static void emit_error(jl_codectx_t &ctx, Function *F, const Twine &txt)
 {
     just_emit_error(ctx, F, txt);
     ctx.builder.CreateUnreachable();
@@ -1311,25 +1536,30 @@ static void emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
     ctx.builder.SetInsertPoint(cont);
 }
 
-static void emit_error(jl_codectx_t &ctx, const std::string &txt)
+static void emit_error(jl_codectx_t &ctx, const Twine &txt)
 {
     emit_error(ctx, prepare_call(jlerror_func), txt);
 }
 
 // DO NOT PASS IN A CONST CONDITION!
-static void error_unless(jl_codectx_t &ctx, Value *cond, const std::string &msg)
+static void error_unless(jl_codectx_t &ctx, Function *F, Value *cond, const Twine &msg)
 {
     ++EmittedConditionalErrors;
     BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
     BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass");
     ctx.builder.CreateCondBr(cond, passBB, failBB);
     ctx.builder.SetInsertPoint(failBB);
-    just_emit_error(ctx, prepare_call(jlerror_func), msg);
+    just_emit_error(ctx, F, msg);
     ctx.builder.CreateUnreachable();
-    ctx.f->getBasicBlockList().push_back(passBB);
+    passBB->insertInto(ctx.f);
     ctx.builder.SetInsertPoint(passBB);
 }
 
+static void error_unless(jl_codectx_t &ctx, Value *cond, const Twine &msg)
+{
+    error_unless(ctx, prepare_call(jlerror_func), cond, msg);
+}
+
 static void raise_exception(jl_codectx_t &ctx, Value *exc,
                             BasicBlock *contBB=nullptr)
 {
@@ -1340,7 +1570,7 @@ static void raise_exception(jl_codectx_t &ctx, Value *exc,
         contBB = BasicBlock::Create(ctx.builder.getContext(), "after_throw", ctx.f);
     }
     else {
-        ctx.f->getBasicBlockList().push_back(contBB);
+        contBB->insertInto(ctx.f);
     }
     ctx.builder.SetInsertPoint(contBB);
 }
@@ -1356,16 +1586,51 @@ static void raise_exception_unless(jl_codectx_t &ctx, Value *cond, Value *exc)
     raise_exception(ctx, exc, passBB);
 }
 
+static void undef_var_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *name, jl_value_t *scope)
+{
+    ++EmittedUndefVarErrors;
+    BasicBlock *err = BasicBlock::Create(ctx.builder.getContext(), "err", ctx.f);
+    BasicBlock *ifok = BasicBlock::Create(ctx.builder.getContext(), "ok");
+    ctx.builder.CreateCondBr(ok, ifok, err);
+    ctx.builder.SetInsertPoint(err);
+    ctx.builder.CreateCall(prepare_call(jlundefvarerror_func), {
+            mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)name)),
+            mark_callee_rooted(ctx, literal_pointer_val(ctx, scope))});
+    ctx.builder.CreateUnreachable();
+    ifok->insertInto(ctx.f);
+    ctx.builder.SetInsertPoint(ifok);
+}
+
+
+static bool has_known_null_nullptr(Type *T)
+{
+    if (auto PT = cast<PointerType>(T)) {
+        auto addrspace = PT->getAddressSpace();
+        if (addrspace == AddressSpace::Generic || (AddressSpace::FirstSpecial <= addrspace && addrspace <= AddressSpace::LastSpecial)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+// ctx.builder.CreateIsNotNull(v) lowers incorrectly in non-standard
+// address spaces where null is not zero
+// TODO: adapt to https://github.com/llvm/llvm-project/pull/131557 once merged
 static Value *null_pointer_cmp(jl_codectx_t &ctx, Value *v)
 {
     ++EmittedNullchecks;
-    return ctx.builder.CreateICmpNE(v, Constant::getNullValue(v->getType()));
+    Type *T = v->getType();
+    if (has_known_null_nullptr(T))
+        return ctx.builder.CreateIsNotNull(v);
+    else
+        return ctx.builder.CreateICmpNE(v, ctx.builder.CreateAddrSpaceCast(
+            Constant::getNullValue(ctx.builder.getPtrTy(0)), T));
 }
 
 
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
-static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck = nullptr)
+static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck)
 {
     if (nullcheck) {
         *nullcheck = v;
@@ -1375,13 +1640,34 @@ static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck =
             literal_pointer_val(ctx, jl_undefref_exception));
 }
 
+
+static void null_load_check(jl_codectx_t &ctx, Value *v, jl_module_t *scope, jl_sym_t *name)
+{
+    Value *notnull = null_pointer_cmp(ctx, v);
+    if (name && scope)
+        undef_var_error_ifnot(ctx, notnull, name, (jl_value_t*)scope);
+    else
+        raise_exception_unless(ctx, notnull, literal_pointer_val(ctx, jl_undefref_exception));
+}
+
 template<typename Func>
-static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func)
+static void emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, MutableArrayRef<Value*> defval, Func &&func)
 {
+    if (ifnot == nullptr) {
+        auto res = func();
+        assert(res.size() == defval.size());
+        for (size_t i = 0; i < defval.size(); i++)
+            defval[i] = res[i];
+        return;
+    }
     if (auto Cond = dyn_cast<ConstantInt>(ifnot)) {
         if (Cond->isZero())
-            return defval;
-        return func();
+            return;
+        auto res = func();
+        assert(res.size() == defval.size());
+        for (size_t i = 0; i < defval.size(); i++)
+            defval[i] = res[i];
+        return;
     }
     ++EmittedGuards;
     BasicBlock *currBB = ctx.builder.GetInsertBlock();
@@ -1390,16 +1676,33 @@ static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval,
     ctx.builder.CreateCondBr(ifnot, passBB, exitBB);
     ctx.builder.SetInsertPoint(passBB);
     auto res = func();
+    assert(res.size() == defval.size());
     passBB = ctx.builder.GetInsertBlock();
     ctx.builder.CreateBr(exitBB);
     ctx.builder.SetInsertPoint(exitBB);
-    if (defval == nullptr)
+    for (size_t i = 0; i < defval.size(); i++) {
+        PHINode *phi = ctx.builder.CreatePHI(defval[i]->getType(), 2);
+        phi->addIncoming(defval[i], currBB);
+        phi->addIncoming(res[i], passBB);
+        setName(ctx.emission_context, phi, "guard_res");
+        defval[i] = phi;
+    }
+}
+
+template<typename Func>
+static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func)
+{
+    MutableArrayRef res(&defval, defval == nullptr ? 0 : 1);
+    auto funcwrap = [&func] () -> SmallVector<Value*,1> {
+        auto res = func();
+        if (res == nullptr)
+            return {};
+        return {res};
+    };
+    emit_guarded_test(ctx, ifnot, res, funcwrap);
+    if (res.empty())
         return nullptr;
-    PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2);
-    phi->addIncoming(defval, currBB);
-    phi->addIncoming(res, passBB);
-    setName(ctx.emission_context, phi, "guard_res");
-    return phi;
+    return res[0];
 }
 
 template<typename Func>
@@ -1456,8 +1759,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just
             // we lied a bit: this wasn't really an object (though it was valid for GC rooting)
             // and we need to use it as an index to get the real object now
             Module *M = jl_Module;
-            Value *smallp = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), prepare_global_in(M, jlsmall_typeof_var), tag);
-            smallp = ctx.builder.CreateBitCast(smallp, typetag->getType()->getPointerTo(0));
+            Value *smallp = emit_ptrgep(ctx, prepare_global_in(M, jl_small_typeof_var), tag);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
             auto small = ctx.builder.CreateAlignedLoad(typetag->getType(), smallp, M->getDataLayout().getPointerABIAlignment(0));
             small->setMetadata(LLVMContext::MD_nonnull, MDNode::get(M->getContext(), None));
@@ -1470,14 +1772,14 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just
 
 static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v,  bool is_promotable=false);
 
-static void just_emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
+static void just_emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const Twine &msg)
 {
     Value *msg_val = stringConstPtr(ctx.emission_context, ctx.builder, msg);
     ctx.builder.CreateCall(prepare_call(jltypeerror_func),
                        { msg_val, maybe_decay_untracked(ctx, type), mark_callee_rooted(ctx, boxed(ctx, x))});
 }
 
-static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
+static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const Twine &msg)
 {
     just_emit_type_error(ctx, x, type, msg);
     ctx.builder.CreateUnreachable();
@@ -1516,28 +1818,37 @@ static bool can_optimize_isa_union(jl_uniontype_t *type)
 }
 
 // a simple case of emit_isa that is obvious not to include a safe-point
-static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_datatype_t *dt)
+static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_datatype_t *dt, bool could_be_null=false)
 {
-    assert(jl_is_concrete_type((jl_value_t*)dt));
+    assert(jl_is_concrete_type((jl_value_t*)dt) || is_uniquerep_Type((jl_value_t*)dt));
     if (arg.TIndex) {
         unsigned tindex = get_box_tindex(dt, arg.typ);
         if (tindex > 0) {
             // optimize more when we know that this is a split union-type where tindex = 0 is invalid
-            Value *xtindex = ctx.builder.CreateAnd(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+            Value *xtindex = ctx.builder.CreateAnd(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), ~UNION_BOX_MARKER));
             auto isa = ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex));
             setName(ctx.emission_context, isa, "exactly_isa");
             return isa;
         }
         else if (arg.Vboxed) {
-            // test for (arg.TIndex == 0x80 && typeof(arg.V) == type)
-            Value *isboxed = ctx.builder.CreateICmpEQ(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+            // test for (arg.TIndex == UNION_BOX_MARKER && typeof(arg.V) == type)
+            Value *isboxed = ctx.builder.CreateICmpEQ(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
+            if (could_be_null) {
+                isboxed = ctx.builder.CreateAnd(isboxed,
+                    ctx.builder.CreateNot(null_pointer_cmp(ctx, arg.Vboxed)));
+            }
             setName(ctx.emission_context, isboxed, "isboxed");
             BasicBlock *currBB = ctx.builder.GetInsertBlock();
             BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
             BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f);
             ctx.builder.CreateCondBr(isboxed, isaBB, postBB);
             ctx.builder.SetInsertPoint(isaBB);
-            Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg.Vboxed, false, true), emit_tagfrom(ctx, dt));
+            Value *istype_boxed = NULL;
+            if (is_uniquerep_Type((jl_value_t*)dt)) {
+                istype_boxed = ctx.builder.CreateICmpEQ(decay_derived(ctx, arg.Vboxed), decay_derived(ctx, literal_pointer_val(ctx, jl_tparam0(dt))));
+            } else {
+                istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg.Vboxed, false, true), emit_tagfrom(ctx, dt));
+            }
             ctx.builder.CreateBr(postBB);
             isaBB = ctx.builder.GetInsertBlock(); // could have changed
             ctx.builder.SetInsertPoint(postBB);
@@ -1551,13 +1862,20 @@ static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_data
             return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         }
     }
-    auto isa = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt));
-    setName(ctx.emission_context, isa, "exactly_isa");
-    return isa;
+    Value *isnull = NULL;
+    if (could_be_null && arg.isboxed) {
+        isnull = null_pointer_cmp(ctx, arg.Vboxed);
+    }
+    Constant *Vfalse = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
+    return emit_guarded_test(ctx, isnull, Vfalse, [&]{
+        auto isa = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt));
+        setName(ctx.emission_context, isa, "exactly_isa");
+        return isa;
+    });
 }
 
 static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
-                                        jl_value_t *type, const std::string *msg);
+                                        jl_value_t *type, const Twine &msg);
 
 static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type,
                            SmallVectorImpl<std::pair<std::pair<BasicBlock*,BasicBlock*>,Value*>> &bbs)
@@ -1569,7 +1887,7 @@ static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
         return;
     }
     BasicBlock *enter = ctx.builder.GetInsertBlock();
-    Value *v = emit_isa(ctx, x, type, nullptr).first;
+    Value *v = emit_isa(ctx, x, type, Twine()).first;
     BasicBlock *exit = ctx.builder.GetInsertBlock();
     bbs.emplace_back(std::make_pair(enter, exit), v);
     BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
@@ -1577,7 +1895,7 @@ static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
 }
 
 // Should agree with `_can_optimize_isa` above
-static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const std::string *msg)
+static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const Twine &msg)
 {
     ++EmittedIsa;
     // TODO: The subtype check below suffers from incorrectness issues due to broken
@@ -1585,7 +1903,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     // actual `isa` calls, this optimization should already have been performed upstream
     // anyway, but having this optimization in codegen might still be beneficial for
     // `typeassert`s if we can make it correct.
-    Optional<bool> known_isa;
+    std::optional<bool> known_isa;
     jl_value_t *intersected_type = type;
     if (x.constant)
         known_isa = jl_isa(x.constant, type);
@@ -1596,9 +1914,11 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         if (intersected_type == (jl_value_t*)jl_bottom_type)
             known_isa = false;
     }
+    if (intersected_type == (jl_value_t*)jl_typeofbottom_type->super)
+        intersected_type = (jl_value_t*)jl_typeofbottom_type; // swap abstract Type{Union{}} for concrete typeof(Union{})
     if (known_isa) {
-        if (!*known_isa && msg) {
-            emit_type_error(ctx, x, literal_pointer_val(ctx, type), *msg);
+        if (!*known_isa && !msg.isTriviallyEmpty()) {
+            emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
         }
         return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), *known_isa), true);
     }
@@ -1630,7 +1950,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     if (jl_has_intersect_type_not_kind(type) || jl_has_intersect_type_not_kind(intersected_type)) {
         Value *vx = boxed(ctx, x);
         Value *vtyp = track_pjlvalue(ctx, literal_pointer_val(ctx, type));
-        if (msg && *msg == "typeassert") {
+        if (msg.isSingleStringRef() && msg.getSingleStringRef() == "typeassert") {
             ctx.builder.CreateCall(prepare_call(jltypeassert_func), { vx, vtyp });
             return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1), true);
         }
@@ -1690,17 +2010,17 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
 // declare that the pointer is legal (for zero bytes) even though it might be undef.
 static Value *emit_isa_and_defined(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ)
 {
-    return emit_nullcheck_guard(ctx, val.ispointer() ? val.V : nullptr, [&] {
-        return emit_isa(ctx, val, typ, nullptr).first;
+    return emit_nullcheck_guard(ctx, val.inline_roots.empty() && val.ispointer() ? val.V : nullptr, [&] {
+        return emit_isa(ctx, val, typ, Twine()).first;
     });
 }
 
 
-static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const std::string &msg)
+static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const Twine &msg)
 {
     Value *istype;
     bool handled_msg;
-    std::tie(istype, handled_msg) = emit_isa(ctx, x, type, &msg);
+    std::tie(istype, handled_msg) = emit_isa(ctx, x, type, msg);
     if (!handled_msg) {
         ++EmittedTypechecks;
         BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
@@ -1711,7 +2031,7 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
         just_emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
         ctx.builder.CreateUnreachable();
 
-        ctx.f->getBasicBlockList().push_back(passBB);
+        passBB->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(passBB);
     }
 }
@@ -1719,7 +2039,7 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
 static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ)
 {
     Value *isconcrete;
-    isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, decay_derived(ctx, typ), getInt8PtrTy(ctx.builder.getContext())), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    isconcrete = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     isconcrete = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1)));
     isconcrete = ctx.builder.CreateLShr(isconcrete, 1);
@@ -1728,7 +2048,7 @@ static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ)
     return isconcrete;
 }
 
-static void emit_concretecheck(jl_codectx_t &ctx, Value *typ, const std::string &msg)
+static void emit_concretecheck(jl_codectx_t &ctx, Value *typ, const Twine &msg)
 {
     ++EmittedConcretechecks;
     assert(typ->getType() == ctx.types().T_prjlvalue);
@@ -1754,7 +2074,6 @@ static bool bounds_check_enabled(jl_codectx_t &ctx, jl_value_t *inbounds) {
 static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_value_t *ty, Value *i, Value *len, jl_value_t *boundscheck)
 {
     Value *im1 = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
-#if CHECK_BOUNDS==1
     if (bounds_check_enabled(ctx, boundscheck)) {
         ++EmittedBoundschecks;
         Value *ok = ctx.builder.CreateICmpULT(im1, len);
@@ -1772,7 +2091,10 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
         else { // unboxed jl_value_t*
             Value *a = ainfo.V;
             if (ainfo.isghost) {
-                a = Constant::getNullValue(getInt8PtrTy(ctx.builder.getContext()));
+                a = Constant::getNullValue(getPointerTy(ctx.builder.getContext()));
+            }
+            else if (!ainfo.inline_roots.empty()) {
+                a = value_to_pointer(ctx, ainfo).V;
             }
             else if (!ainfo.ispointer()) {
                 // CreateAlloca is OK here since we are on an error branch
@@ -1782,26 +2104,32 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
                 a = tempSpace;
             }
             ctx.builder.CreateCall(prepare_call(jluboundserror_func), {
-                    emit_bitcast(ctx, decay_derived(ctx, a), getInt8PtrTy(ctx.builder.getContext())),
+                    decay_derived(ctx, a),
                     literal_pointer_val(ctx, ty),
                     i });
         }
         ctx.builder.CreateUnreachable();
-        ctx.f->getBasicBlockList().push_back(passBB);
+        passBB->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(passBB);
     }
-#endif
     return im1;
 }
 
-static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt);
-static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile=false);
+static Value *CreateSimplifiedExtractValue(jl_codectx_t &ctx, Value *Agg, ArrayRef<unsigned> Idxs)
+{
+    // aka IRBuilder<InstSimplifyFolder>
+    SimplifyQuery SQ(jl_Module->getDataLayout()); // not actually used, but required by API
+    if (Value *Inst = simplifyExtractValueInst(Agg, Idxs, SQ))
+        return Inst;
+    return ctx.builder.CreateExtractValue(Agg, Idxs);
+}
 
 static void emit_write_barrier(jl_codectx_t&, Value*, ArrayRef<Value*>);
 static void emit_write_barrier(jl_codectx_t&, Value*, Value*);
 static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*, jl_value_t*);
+static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, const jl_cgval_t &x);
 
-std::vector<unsigned> first_ptr(Type *T)
+SmallVector<unsigned, 0> first_ptr(Type *T)
 {
     if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
         if (!isa<StructType>(T)) {
@@ -1819,7 +2147,7 @@ std::vector<unsigned> first_ptr(Type *T)
         unsigned i = 0;
         for (Type *ElTy : T->subtypes()) {
             if (isa<PointerType>(ElTy) && ElTy->getPointerAddressSpace() == AddressSpace::Tracked) {
-                return std::vector<unsigned>{i};
+                return SmallVector<unsigned, 0>{i};
             }
             auto path = first_ptr(ElTy);
             if (!path.empty()) {
@@ -1837,23 +2165,23 @@ Value *extract_first_ptr(jl_codectx_t &ctx, Value *V)
     if (path.empty())
         return NULL;
     std::reverse(std::begin(path), std::end(path));
-    return ctx.builder.CreateExtractValue(V, path);
+    return CreateSimplifiedExtractValue(ctx, V, path);
 }
 
 
 static void emit_lockstate_value(jl_codectx_t &ctx, Value *strct, bool newstate)
 {
     ++EmittedLockstates;
-    Value *v = mark_callee_rooted(ctx, strct);
-    ctx.builder.CreateCall(prepare_call(newstate ? jllockvalue_func : jlunlockvalue_func), v);
-}
-static void emit_lockstate_value(jl_codectx_t &ctx, const jl_cgval_t &strct, bool newstate)
-{
-    assert(strct.isboxed);
-    emit_lockstate_value(ctx, boxed(ctx, strct), newstate);
+    if (strct->getType()->getPointerAddressSpace() == AddressSpace::Loaded) {
+        Value *v = strct;
+        ctx.builder.CreateCall(prepare_call(newstate ? jllockfield_func : jlunlockfield_func), v);
+    }
+    else {
+        Value *v = mark_callee_rooted(ctx, strct);
+        ctx.builder.CreateCall(prepare_call(newstate ? jllockvalue_func : jlunlockvalue_func), v);
+    }
 }
 
-
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, jl_value_t *jltype,
@@ -1861,50 +2189,67 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
                              bool maybe_null_if_boxed = true, unsigned alignment = 0,
                              Value **nullcheck = nullptr)
 {
-    // TODO: we should use unordered loads for anything with CountTrackedPointers(elty).count > 0 (if not otherwise locked)
     Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype);
-    if (type_is_ghost(elty))
+    if (type_is_ghost(elty)) {
+        if (isStrongerThanMonotonic(Order))
+            ctx.builder.CreateFence(Order);
         return ghostValue(ctx, jltype);
+    }
+    if (isboxed)
+        alignment = sizeof(void*);
+    else if (!alignment)
+        alignment = julia_alignment(jltype);
+    if (idx_0based)
+        ptr = ctx.builder.CreateInBoundsGEP(elty, ptr, idx_0based);
     unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
     // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type
     AllocaInst *intcast = NULL;
-    if (Order == AtomicOrdering::NotAtomic) {
-        if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) {
-            intcast = emit_static_alloca(ctx, elty);
-            setName(ctx.emission_context, intcast, "aggregate_load_box");
+    if (Order == AtomicOrdering::NotAtomic && !isboxed && !aliasscope && elty->isAggregateType() && !jl_is_genericmemoryref_type(jltype)) {
+        // use split_value to do this load
+        auto src = mark_julia_slot(ptr, jltype, NULL, tbaa);
+        auto copy = split_value(ctx, src, Align(alignment));
+        if (maybe_null_if_boxed && !copy.second.empty()) {
+            null_pointer_check(ctx, copy.second[0], nullcheck);
         }
+        return mark_julia_slot(copy.first, jltype, NULL, ctx.tbaa().tbaa_stack, copy.second);
     }
-    else {
+    Type *realelty = elty;
+    if (Order != AtomicOrdering::NotAtomic) {
         if (!isboxed && !elty->isIntOrPtrTy()) {
-            intcast = emit_static_alloca(ctx, elty);
+            intcast = emit_static_alloca(ctx, elty, Align(alignment));
             setName(ctx.emission_context, intcast, "atomic_load_box");
-            elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
+            realelty = elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
+        }
+        if (isa<IntegerType>(elty)) {
+            unsigned nb2 = PowerOf2Ceil(nb);
+            if (nb != nb2)
+                elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
         }
     }
-    Type *realelty = elty;
-    if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
-        unsigned nb2 = PowerOf2Ceil(nb);
-        if (nb != nb2)
-            elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
-    }
-    Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
-    Value *data;
-    if (ptr->getType() != ptrty)
-        data = emit_bitcast(ctx, ptr, ptrty);
-    else
-        data = ptr;
-    if (idx_0based)
-        data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based);
     Value *instr = nullptr;
-    if (isboxed)
-        alignment = sizeof(void*);
-    else if (!alignment)
-        alignment = julia_alignment(jltype);
-    if (intcast && Order == AtomicOrdering::NotAtomic) {
-        emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, alignment);
+    if (!isboxed && jl_is_genericmemoryref_type(jltype)) {
+        //We don't specify the stronger expected memory ordering here because of fears it may interfere with vectorization and other optimizations
+        //if (Order == AtomicOrdering::NotAtomic)
+        //    Order = AtomicOrdering::Monotonic;
+        // load these FCA as individual fields, so LLVM does not need to split them later
+        Value *fld0 = ctx.builder.CreateStructGEP(elty, ptr, 0);
+        LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false);
+        load0->setOrdering(Order);
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        ai.scope = MDNode::concatenate(aliasscope, ai.scope);
+        ai.decorateInst(load0);
+        Value *fld1 = ctx.builder.CreateStructGEP(elty, ptr, 1);
+        LoadInst *load1 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(1), fld1, Align(alignment), false);
+        static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order");
+        maybe_mark_load_dereferenceable(load1, true, sizeof(void*)*2, alignof(void*));
+        load1->setOrdering(Order);
+        ai.decorateInst(load1);
+        instr = Constant::getNullValue(elty);
+        instr = ctx.builder.CreateInsertValue(instr, load0, 0);
+        instr = ctx.builder.CreateInsertValue(instr, load1, 1);
     }
     else {
-        LoadInst *load = ctx.builder.CreateAlignedLoad(elty, data, Align(alignment), false);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment), false);
         load->setOrdering(Order);
         if (isboxed)
             maybe_mark_load_dereferenceable(load, true, jltype);
@@ -1912,16 +2257,16 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         ai.scope = MDNode::concatenate(aliasscope, ai.scope);
         ai.decorateInst(load);
         instr = load;
-        if (elty != realelty)
-            instr = ctx.builder.CreateTrunc(instr, realelty);
-        if (intcast) {
-            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
-            instr = nullptr;
-        }
+    }
+    if (elty != realelty)
+        instr = ctx.builder.CreateTrunc(instr, realelty);
+    if (intcast) {
+        ctx.builder.CreateAlignedStore(instr, intcast, Align(alignment));
+        instr = nullptr;
     }
     if (maybe_null_if_boxed) {
         if (intcast)
-            instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+            instr = ctx.builder.CreateAlignedLoad(intcast->getAllocatedType(), intcast, Align(alignment));
         Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
         if (first_ptr)
             null_pointer_check(ctx, first_ptr, nullcheck);
@@ -1934,7 +2279,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 2)) }));
         if (intcast)
-            instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+            instr = ctx.builder.CreateAlignedLoad(intcast->getAllocatedType(), intcast, Align(alignment));
         instr = ctx.builder.CreateTrunc(instr, getInt1Ty(ctx.builder.getContext()));
     }
     if (instr)
@@ -1943,19 +2288,22 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         return mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
 }
 
+static Function *emit_modifyhelper(jl_codectx_t &ctx2, const jl_cgval_t &op, const jl_cgval_t &modifyop, jl_value_t *jltype, Type *elty, jl_cgval_t rhs, const Twine &fname, bool gcstack_arg);
+
 static jl_cgval_t typed_store(jl_codectx_t &ctx,
-        Value *ptr, Value *idx_0based, jl_cgval_t rhs, jl_cgval_t cmp,
+        Value *ptr, jl_cgval_t rhs, jl_cgval_t cmpop,
         jl_value_t *jltype, MDNode *tbaa, MDNode *aliasscope,
         Value *parent,  // for the write barrier, NULL if no barrier needed
         bool isboxed, AtomicOrdering Order, AtomicOrdering FailOrder, unsigned alignment,
-        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
-        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const std::string &fname)
+        Value *needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield, bool issetfieldonce,
+        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const Twine &fname,
+        jl_module_t *mod, jl_sym_t *var)
 {
     auto newval = [&](const jl_cgval_t &lhs) {
-        const jl_cgval_t argv[3] = { cmp, lhs, rhs };
+        const jl_cgval_t argv[3] = { cmpop, lhs, rhs };
         jl_cgval_t ret;
         if (modifyop) {
-            ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+            ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type, true);
         }
         else {
             Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
@@ -1965,16 +2313,21 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         ret = update_julia_type(ctx, ret, jltype);
         return ret;
     };
-    assert(!needlock || parent != nullptr);
+    if (isboxed)
+        alignment = sizeof(void*);
+    else if (!alignment)
+        alignment = julia_alignment(jltype);
     Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype);
-    if (type_is_ghost(elty)) {
+    if (type_is_ghost(elty) ||
+            (issetfieldonce && !maybe_null_if_boxed) ||
+            (issetfieldonce && !isboxed && !jl_type_hasptr(jltype))) {
         if (isStrongerThanMonotonic(Order))
             ctx.builder.CreateFence(Order);
         if (issetfield) {
             return rhs;
         }
         else if (isreplacefield) {
-            Value *Success = emit_f_is(ctx, cmp, ghostValue(ctx, jltype));
+            Value *Success = emit_f_is(ctx, cmpop, ghostValue(ctx, jltype));
             Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
             const jl_cgval_t argv[2] = {ghostValue(ctx, jltype), mark_julia_type(ctx, Success, false, jl_bool_type)};
             jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
@@ -1983,21 +2336,32 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         else if (isswapfield) {
             return ghostValue(ctx, jltype);
         }
-        else { // modifyfield
+        else if (ismodifyfield) {
             jl_cgval_t oldval = ghostValue(ctx, jltype);
             const jl_cgval_t argv[2] = { oldval, newval(oldval) };
             jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
             return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
+        else { // issetfieldonce
+            return mark_julia_const(ctx, jl_false);
+        }
     }
+    // if FailOrder was inherited from Order, may need to remove Load-only effects now
+    if (FailOrder == AtomicOrdering::AcquireRelease)
+        FailOrder = AtomicOrdering::Acquire;
+    if (FailOrder == AtomicOrdering::Release)
+        FailOrder = AtomicOrdering::Monotonic;
     unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
     AllocaInst *intcast = nullptr;
+    Type *intcast_eltyp = nullptr;
+    bool tracked_pointers = isboxed || CountTrackedPointers(elty).count > 0;
     if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
+        intcast_eltyp = elty;
+        elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
         if (!issetfield) {
-            intcast = emit_static_alloca(ctx, elty);
+            intcast = emit_static_alloca(ctx, elty, Align(alignment));
             setName(ctx.emission_context, intcast, "atomic_store_box");
         }
-        elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
     }
     Type *realelty = elty;
     if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
@@ -2006,35 +2370,33 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
     }
     Value *r = nullptr;
-    if (issetfield || isswapfield || isreplacefield)  {
-        if (isboxed)
+    if (issetfield || isswapfield || isreplacefield || issetfieldonce)  { // e.g. !ismodifyfield
+        assert(isboxed || rhs.typ == jltype);
+        if (isboxed) {
             r = boxed(ctx, rhs);
-        else if (aliasscope || Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
+        }
+        else if (intcast) {
+            emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, MaybeAlign(), intcast->getAlign());
+            r = ctx.builder.CreateLoad(realelty, intcast);
+        }
+        else if (aliasscope || Order != AtomicOrdering::NotAtomic || (tracked_pointers && rhs.inline_roots.empty())) {
             r = emit_unbox(ctx, realelty, rhs, jltype);
-            if (realelty != elty)
-                r = ctx.builder.CreateZExt(r, elty);
         }
+        if (realelty != elty)
+            r = ctx.builder.CreateZExt(r, elty);
     }
-    Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
-    if (ptr->getType() != ptrty)
-        ptr = ctx.builder.CreateBitCast(ptr, ptrty);
-    if (idx_0based)
-        ptr = ctx.builder.CreateInBoundsGEP(elty, ptr, idx_0based);
-    if (isboxed)
-        alignment = sizeof(void*);
-    else if (!alignment)
-        alignment = julia_alignment(jltype);
     Value *instr = nullptr;
     Value *Compare = nullptr;
     Value *Success = nullptr;
     BasicBlock *DoneBB = nullptr;
     if (needlock)
-        emit_lockstate_value(ctx, parent, true);
+        emit_lockstate_value(ctx, needlock, true);
     jl_cgval_t oldval = rhs;
+    // TODO: we should do Release ordering for anything with CountTrackedPointers(elty).count > 0, instead of just isboxed
     if (issetfield || (Order == AtomicOrdering::NotAtomic && isswapfield)) {
         if (isswapfield) {
             auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
-            setName(ctx.emission_context, load, "swapfield_load");
+            setName(ctx.emission_context, load, "swap_load");
             if (isboxed)
                 load->setOrdering(AtomicOrdering::Unordered);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -2044,28 +2406,85 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             instr = load;
         }
         if (r) {
-            StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
-            store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
             ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
-            ai.decorateInst(store);
+            if (false && !isboxed && Order == AtomicOrdering::NotAtomic && jl_is_genericmemoryref_type(jltype)) {
+                // if enabled, store these FCA as individual fields, so LLVM does not need to split them later and they can use release ordering
+                assert(r->getType() == ctx.types().T_jlgenericmemory);
+                Value *f1 = ctx.builder.CreateExtractValue(r, 0);
+                Value *f2 = ctx.builder.CreateExtractValue(r, 1);
+                static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order");
+                StoreInst *store = ctx.builder.CreateAlignedStore(f1, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 0), Align(alignment));
+                store->setOrdering(AtomicOrdering::Release);
+                ai.decorateInst(store);
+                store = ctx.builder.CreateAlignedStore(f2, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 1), Align(alignment));
+                store->setOrdering(AtomicOrdering::Release);
+                ai.decorateInst(store);
+            }
+            else {
+                StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
+                store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order);
+                ai.decorateInst(store);
+            }
         }
         else {
             assert(Order == AtomicOrdering::NotAtomic && !isboxed && rhs.typ == jltype);
-            emit_unbox_store(ctx, rhs, ptr, tbaa, alignment);
+            emit_unbox_store(ctx, rhs, ptr, tbaa, MaybeAlign(), Align(alignment));
         }
     }
-    else if (isswapfield && isStrongerThanMonotonic(Order)) {
+    else if (isswapfield) {
+        if (Order == AtomicOrdering::Unordered)
+            Order = AtomicOrdering::Monotonic;
         assert(Order != AtomicOrdering::NotAtomic && r);
         auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
-        setName(ctx.emission_context, store, "swapfield_atomicrmw");
+        setName(ctx.emission_context, store, "swap_atomicrmw");
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
         ai.decorateInst(store);
         instr = store;
     }
-    else {
-        // replacefield, modifyfield, or swapfield (isboxed && atomic)
+    else if (ismodifyfield && modifyop && !needlock && Order != AtomicOrdering::NotAtomic && !isboxed && realelty == elty && !intcast && elty->isIntegerTy() && !jl_type_hasptr(jltype)) {
+        // emit this only if we have a possibility of optimizing it
+        if (Order == AtomicOrdering::Unordered)
+            Order = AtomicOrdering::Monotonic;
+        if (jl_is_pointerfree(rhs.typ) && !rhs.isghost && (rhs.constant || rhs.isboxed || rhs.ispointer())) {
+            // if this value can be loaded from memory, do that now so that it is sequenced before the atomicmodify
+            // and the IR is less dependent on what was emitted before now to create this rhs.
+            // Inlining should do okay to clean this up later if there are parts we don't need.
+            rhs = jl_cgval_t(emit_unbox(ctx, julia_type_to_llvm(ctx, rhs.typ), rhs, rhs.typ), rhs.typ, NULL);
+        }
+        bool gcstack_arg = JL_FEAT_TEST(ctx,gcstack_arg);
+        Function *op = emit_modifyhelper(ctx, cmpop, *modifyop, jltype, elty, rhs, fname, gcstack_arg);
+        std::string intr_name = "julia.atomicmodify.i";
+        intr_name += utostr(cast<IntegerType>(elty)->getBitWidth());
+        intr_name += ".p";
+        intr_name += utostr(ptr->getType()->getPointerAddressSpace());
+        FunctionCallee intr = jl_Module->getOrInsertFunction(intr_name,
+                FunctionType::get(StructType::get(elty, elty), {ptr->getType(), ctx.builder.getPtrTy(), ctx.builder.getInt8Ty(), ctx.builder.getInt8Ty()}, true),
+                AttributeList::get(elty->getContext(),
+                  Attributes(elty->getContext(), {Attribute::NoMerge}), // prevent llvm from merging calls to different functions
+                  AttributeSet(),
+                  None));
+        SmallVector<Value*,0> Args = {ptr, op, ctx.builder.getInt8((unsigned)Order), ctx.builder.getInt8(SyncScope::System)};
+        if (rhs.V)
+            Args.push_back(rhs.V);
+        if (rhs.Vboxed)
+            Args.push_back(rhs.Vboxed);
+        if (rhs.TIndex)
+            Args.push_back(rhs.TIndex);
+        Args.append(rhs.inline_roots);
+        if (gcstack_arg)
+            Args.push_back(ctx.pgcstack);
+        auto oldnew = ctx.builder.CreateCall(intr, Args);
+        oldnew->addParamAttr(0, Attribute::getWithAlignment(oldnew->getContext(), Align(alignment)));
+        //jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        //ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
+        //ai.decorateInst(oldnew);
+        oldval = mark_julia_type(ctx, ctx.builder.CreateExtractValue(oldnew, 0), isboxed, jltype);
+        rhs = mark_julia_type(ctx, ctx.builder.CreateExtractValue(oldnew, 1), isboxed, jltype);
+    }
+    else {
+        // replacefield, modifyfield, swapfield, setfieldonce (isboxed && atomic)
         DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg", ctx.f);
         bool needloop;
         PHINode *Succ = nullptr, *Current = nullptr;
@@ -2075,15 +2494,16 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             }
             else if (!isboxed) {
                 assert(jl_is_concrete_type(jltype));
-                needloop = ((jl_datatype_t*)jltype)->layout->haspadding;
-                Value *SameType = emit_isa(ctx, cmp, jltype, nullptr).first;
+                needloop = ((jl_datatype_t*)jltype)->layout->flags.haspadding ||
+                          !((jl_datatype_t*)jltype)->layout->flags.isbitsegal;
+                Value *SameType = emit_isa(ctx, cmpop, jltype, Twine()).first;
                 if (SameType != ConstantInt::getTrue(ctx.builder.getContext())) {
                     BasicBlock *SkipBB = BasicBlock::Create(ctx.builder.getContext(), "skip_xchg", ctx.f);
                     BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "ok_xchg", ctx.f);
                     ctx.builder.CreateCondBr(SameType, BB, SkipBB);
                     ctx.builder.SetInsertPoint(SkipBB);
                     LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
-                    setName(ctx.emission_context, load, "atomic_replacefield_initial");
+                    setName(ctx.emission_context, load, "atomic_replace_initial");
                     load->setOrdering(FailOrder == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Monotonic : FailOrder);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
                     ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
@@ -2096,21 +2516,33 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                     Current->addIncoming(instr, SkipBB);
                     ctx.builder.SetInsertPoint(BB);
                 }
-                Compare = emit_unbox(ctx, realelty, cmp, jltype);
+                cmpop = update_julia_type(ctx, cmpop, jltype);
+                if (intcast) {
+                    emit_unbox_store(ctx, cmpop, intcast, ctx.tbaa().tbaa_stack, MaybeAlign(), intcast->getAlign());
+                    Compare = ctx.builder.CreateLoad(realelty, intcast);
+                }
+                else {
+                    Compare = emit_unbox(ctx, realelty, cmpop, jltype);
+                }
                 if (realelty != elty)
                     Compare = ctx.builder.CreateZExt(Compare, elty);
             }
-            else if (cmp.isboxed || cmp.constant || jl_pointer_egal(jltype)) {
-                Compare = boxed(ctx, cmp);
-                needloop = !jl_pointer_egal(jltype) && !jl_pointer_egal(cmp.typ);
-                if (needloop && !cmp.isboxed) // try to use the same box in the compare now and later
-                    cmp = mark_julia_type(ctx, Compare, true, cmp.typ);
+            else if (cmpop.isboxed || cmpop.constant || jl_pointer_egal(jltype)) {
+                Compare = boxed(ctx, cmpop);
+                needloop = !jl_pointer_egal(jltype) && !jl_pointer_egal(cmpop.typ);
+                if (needloop && !cmpop.isboxed) // try to use the same box in the compare now and later
+                    cmpop = mark_julia_type(ctx, Compare, true, cmpop.typ);
             }
             else {
                 Compare = Constant::getNullValue(ctx.types().T_prjlvalue); // TODO: does this need to be an invalid bit pattern?
                 needloop = true;
             }
         }
+        else if (issetfieldonce) {
+            needloop = !isboxed && Order != AtomicOrdering::NotAtomic && nb > sizeof(void*);
+            if (Order != AtomicOrdering::NotAtomic)
+                Compare = Constant::getNullValue(elty);
+        }
         else { // swap or modify
             LoadInst *Current = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
             Current->setOrdering(Order == AtomicOrdering::NotAtomic && !isboxed ? Order : AtomicOrdering::Monotonic);
@@ -2133,21 +2565,22 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         }
         if (ismodifyfield) {
             if (needlock)
-                emit_lockstate_value(ctx, parent, false);
+                emit_lockstate_value(ctx, needlock, false); // unlock
             Value *realCompare = Compare;
             if (realelty != elty)
                 realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
             if (intcast) {
-                ctx.builder.CreateStore(realCompare, ctx.builder.CreateBitCast(intcast, realCompare->getType()->getPointerTo()));
-                if (maybe_null_if_boxed)
-                    realCompare = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+                assert(!isboxed);
+                ctx.builder.CreateStore(realCompare, intcast);
+                if (tracked_pointers)
+                    realCompare = ctx.builder.CreateLoad(intcast_eltyp, intcast);
             }
-            if (maybe_null_if_boxed) {
-                Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
-                if (first_ptr)
-                    null_pointer_check(ctx, first_ptr, nullptr);
+            if (maybe_null_if_boxed && tracked_pointers) {
+                Value *first_ptr = isboxed ? realCompare : extract_first_ptr(ctx, realCompare);
+                assert(first_ptr);
+                null_load_check(ctx, first_ptr, mod, var);
             }
-            if (intcast)
+            if (intcast && !tracked_pointers)
                 oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
             else
                 oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
@@ -2155,18 +2588,24 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             if (isboxed) {
                 r = boxed(ctx, rhs);
             }
-            else if (Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
+            else if (intcast) {
+                emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, MaybeAlign(), intcast->getAlign());
+                r = ctx.builder.CreateLoad(realelty, intcast);
+                if (!tracked_pointers) // oldval is a slot, so put the oldval back
+                    ctx.builder.CreateStore(realCompare, intcast);
+            }
+            else if (Order != AtomicOrdering::NotAtomic || (tracked_pointers && rhs.inline_roots.empty())) {
                 r = emit_unbox(ctx, realelty, rhs, jltype);
-                if (realelty != elty)
-                    r = ctx.builder.CreateZExt(r, elty);
             }
+            if (realelty != elty)
+                r = ctx.builder.CreateZExt(r, elty);
             if (needlock)
-                emit_lockstate_value(ctx, parent, true);
-            cmp = oldval;
+                emit_lockstate_value(ctx, needlock, true); // relock
+            cmpop = oldval;
         }
         Value *Done;
         if (Order == AtomicOrdering::NotAtomic) {
-            // modifyfield or replacefield
+            // modifyfield or replacefield or setfieldonce
             assert(elty == realelty && !intcast);
             auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -2178,9 +2617,11 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             if (maybe_null_if_boxed && !ismodifyfield)
                 first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
             oldval = mark_julia_type(ctx, load, isboxed, jltype);
-            Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
-                return emit_f_is(ctx, oldval, cmp);
-            });
+            assert(!issetfieldonce || first_ptr != nullptr);
+            if (issetfieldonce)
+                Success = ctx.builder.CreateIsNull(first_ptr);
+            else
+                Success = emit_f_is(ctx, oldval, cmpop, first_ptr, nullptr);
             if (needloop && ismodifyfield)
                 CmpPhi->addIncoming(load, ctx.builder.GetInsertBlock());
             assert(Succ == nullptr);
@@ -2195,18 +2636,18 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             }
             else {
                 assert(!isboxed && rhs.typ == jltype);
-                emit_unbox_store(ctx, rhs, ptr, tbaa, alignment);
+                emit_unbox_store(ctx, rhs, ptr, tbaa, MaybeAlign(), Align(alignment));
             }
             ctx.builder.CreateBr(DoneBB);
             instr = load;
         }
-        else {
+        else { // something atomic
             assert(r);
             if (Order == AtomicOrdering::Unordered)
                 Order = AtomicOrdering::Monotonic;
             if (Order == AtomicOrdering::Monotonic && isboxed)
                 Order = AtomicOrdering::Release;
-            if (!isreplacefield)
+            if (!isreplacefield && !issetfieldonce)
                 FailOrder = AtomicOrdering::Monotonic;
             else if (FailOrder == AtomicOrdering::Unordered)
                 FailOrder = AtomicOrdering::Monotonic;
@@ -2217,28 +2658,38 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
             Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
             Done = Success;
-            if (isreplacefield && needloop) {
+            if ((isreplacefield || issetfieldonce) && needloop) {
                 Value *realinstr = instr;
                 if (realelty != elty)
                     realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
                 if (intcast) {
-                    ctx.builder.CreateStore(realinstr, ctx.builder.CreateBitCast(intcast, realinstr->getType()->getPointerTo()));
+                    ctx.builder.CreateStore(realinstr, intcast);
+                    // n.b. this oldval is only used for emit_f_is in this branch, so we know a priori that it does not need a gc-root
                     oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
                     if (maybe_null_if_boxed)
-                        realinstr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+                        realinstr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
                 }
                 else {
                     oldval = mark_julia_type(ctx, realinstr, isboxed, jltype);
                 }
-                Done = emit_guarded_test(ctx, ctx.builder.CreateNot(Success), false, [&] {
-                    Value *first_ptr = nullptr;
-                    if (maybe_null_if_boxed)
-                        first_ptr = isboxed ? realinstr : extract_first_ptr(ctx, realinstr);
-                    return emit_nullcheck_guard(ctx, first_ptr, [&] {
-                        return emit_f_is(ctx, oldval, cmp);
+                if (issetfieldonce) {
+                    assert(!isboxed && maybe_null_if_boxed);
+                    Value *first_ptr = extract_first_ptr(ctx, realinstr);
+                    assert(first_ptr != nullptr);
+                    // Done = Success || first_ptr != NULL
+                    Done = ctx.builder.CreateOr(Success, ctx.builder.CreateIsNotNull(first_ptr));
+                }
+                else {
+                    // Done = Success || first_ptr == NULL || oldval == cmpop)
+                    // Done = !(!Success && (first_ptr != NULL && oldval == cmpop))
+                    Done = emit_guarded_test(ctx, ctx.builder.CreateNot(Success), false, [&] {
+                        Value *first_ptr = nullptr;
+                        if (maybe_null_if_boxed)
+                            first_ptr = isboxed ? realinstr : extract_first_ptr(ctx, realinstr);
+                        return emit_f_is(ctx, oldval, cmpop, first_ptr, nullptr);
                     });
-                });
-                Done = ctx.builder.CreateNot(Done);
+                    Done = ctx.builder.CreateNot(Done);
+                }
             }
             if (needloop)
                 ctx.builder.CreateCondBr(Done, DoneBB, BB);
@@ -2257,22 +2708,37 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
     if (DoneBB)
         ctx.builder.SetInsertPoint(DoneBB);
     if (needlock)
-        emit_lockstate_value(ctx, parent, false);
-    if (parent != NULL) {
-        if (isreplacefield) {
-            // TODO: avoid this branch if we aren't making a write barrier
+        emit_lockstate_value(ctx, needlock, false);
+    if (parent != NULL && tracked_pointers && (!isboxed || !type_is_permalloc(rhs.typ))) {
+        if (isreplacefield || issetfieldonce) {
             BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f);
             DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f);
             ctx.builder.CreateCondBr(Success, BB, DoneBB);
             ctx.builder.SetInsertPoint(BB);
         }
         if (r) {
+            if (realelty != elty)
+                r = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, r, realelty));
+            if (intcast) {
+                ctx.builder.CreateStore(r, intcast);
+                r = ctx.builder.CreateLoad(intcast_eltyp, intcast);
+            }
+            else if (!isboxed && intcast_eltyp) {
+                assert(issetfield);
+                // issetfield doesn't use intcast, so need to reload rhs with the correct type
+                r = emit_unbox(ctx, intcast_eltyp, rhs, jltype);
+            }
             if (!isboxed)
                 emit_write_multibarrier(ctx, parent, r, rhs.typ);
-            else if (!type_is_permalloc(rhs.typ))
+            else
                 emit_write_barrier(ctx, parent, r);
         }
-        if (isreplacefield) {
+        else {
+            assert(!isboxed);
+            assert(!rhs.inline_roots.empty());
+            emit_write_multibarrier(ctx, parent, rhs);
+        }
+        if (isreplacefield || issetfieldonce) {
             ctx.builder.CreateBr(DoneBB);
             ctx.builder.SetInsertPoint(DoneBB);
         }
@@ -2282,26 +2748,26 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
         oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
     }
+    else if (issetfieldonce) {
+        oldval = mark_julia_type(ctx, Success, false, jl_bool_type);
+    }
     else if (!issetfield) { // swapfield or replacefield
         if (realelty != elty)
             instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
         if (intcast) {
-            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
-            instr = nullptr;
+            ctx.builder.CreateStore(instr, intcast);
+            if (tracked_pointers)
+                instr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
         }
-        if (maybe_null_if_boxed) {
-            if (intcast)
-                instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+        if (maybe_null_if_boxed && tracked_pointers) {
             Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
-            if (first_ptr)
-                null_pointer_check(ctx, first_ptr, nullptr);
-            if (intcast && !first_ptr)
-                instr = nullptr;
+            assert(first_ptr);
+            null_load_check(ctx, first_ptr, mod, var);
         }
-        if (instr)
-            oldval = mark_julia_type(ctx, instr, isboxed, jltype);
-        else
+        if (intcast && !tracked_pointers)
             oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
+        else
+            oldval = mark_julia_type(ctx, instr, isboxed, jltype);
         if (isreplacefield) {
             Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
             const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
@@ -2325,7 +2791,7 @@ static Value *julia_bool(jl_codectx_t &ctx, Value *cond)
 
 // --- accessing the representations of built-in data types ---
 
-static void emit_atomic_error(jl_codectx_t &ctx, const std::string &msg)
+static void emit_atomic_error(jl_codectx_t &ctx, const Twine &msg)
 {
     emit_error(ctx, prepare_call(jlatomicerror_func), msg);
 }
@@ -2334,6 +2800,32 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                                          unsigned idx, jl_datatype_t *jt,
                                          enum jl_memory_order order, Value **nullcheck=nullptr);
 
+static bool field_may_be_null(const jl_cgval_t &strct, jl_datatype_t *stt, size_t idx)
+{
+    size_t nfields = jl_datatype_nfields(stt);
+    if (idx < nfields - (unsigned)stt->name->n_uninitialized)
+        return false;
+    if (!jl_field_isptr(stt, idx) && !jl_type_hasptr(jl_field_type(stt, idx)))
+        return false;
+    if (strct.constant) {
+        if ((jl_is_immutable(stt) || jl_field_isconst(stt, idx)) && jl_field_isdefined(strct.constant, idx))
+            return false;
+    }
+    return true;
+}
+
+static bool field_may_be_null(const jl_cgval_t &strct, jl_datatype_t *stt)
+{
+    size_t nfields = jl_datatype_nfields(stt);
+    for (size_t i = 0; i < (unsigned)stt->name->n_uninitialized; i++) {
+        size_t idx = nfields - i - 1;
+        if (field_may_be_null(strct, stt, idx))
+            return true;
+    }
+    return false;
+}
+
+
 static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
         jl_cgval_t *ret, jl_cgval_t strct,
         Value *idx, jl_datatype_t *stt, jl_value_t *inbounds,
@@ -2341,7 +2833,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
 {
     ++EmittedGetfieldUnknowns;
     size_t nfields = jl_datatype_nfields(stt);
-    bool maybe_null = (unsigned)stt->name->n_uninitialized != 0;
+    bool maybe_null = field_may_be_null(strct, stt);
     auto idx0 = [&]() {
         return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(ctx.types().T_size, nfields), inbounds);
     };
@@ -2360,7 +2852,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     }
     assert(!jl_is_vecelement_type((jl_value_t*)stt));
 
-    if (!strct.ispointer()) { // unboxed
+    if (strct.inline_roots.empty() && !strct.ispointer()) { // unboxed
         assert(jl_is_concrete_immutable((jl_value_t*)stt));
         bool isboxed = is_datatype_all_pointers(stt);
         jl_svec_t *types = stt->types;
@@ -2399,24 +2891,25 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             assert((cast<ArrayType>(strct.V->getType())->getElementType() == ctx.types().T_prjlvalue) == isboxed);
             Value *idx = idx0();
             unsigned i = 0;
-            Value *fld = ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i));
+            Value *fld = ctx.builder.CreateExtractValue(strct.V, ArrayRef<unsigned>(i));
             for (i = 1; i < nfields; i++) {
                 fld = ctx.builder.CreateSelect(
                         ctx.builder.CreateICmpEQ(idx, ConstantInt::get(idx->getType(), i)),
-                        ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i)),
+                        ctx.builder.CreateExtractValue(strct.V, ArrayRef<unsigned>(i)),
                         fld);
             }
             setName(ctx.emission_context, fld, "getfield");
             jl_value_t *jft = issame ? jl_svecref(types, 0) : (jl_value_t*)jl_any_type;
             if (isboxed && maybe_null)
-                null_pointer_check(ctx, fld);
+                null_pointer_check(ctx, fld, nullptr);
             *ret = mark_julia_type(ctx, fld, isboxed, jft);
             return true;
         }
     }
 
     bool maybeatomic = stt->name->atomicfields != NULL;
-    if (strct.ispointer() && !maybeatomic) { // boxed or stack
+    if ((strct.inline_roots.empty() && strct.ispointer()) && !maybeatomic) { // boxed or stack
+        // COMBAK: inline_roots support could be implemented for this
         if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
             emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
             *ret = jl_cgval_t(); // unreachable
@@ -2438,7 +2931,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             }
             Value *fldptr = ctx.builder.CreateInBoundsGEP(
                     ctx.types().T_prjlvalue,
-                    emit_bitcast(ctx, data_pointer(ctx, strct), ctx.types().T_pprjlvalue),
+                    data_pointer(ctx, strct),
                     idx0());
             setName(ctx.emission_context, fldptr, "getfield_ptr");
             LoadInst *fld = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fldptr, Align(sizeof(void*)));
@@ -2448,7 +2941,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             ai.decorateInst(fld);
             maybe_mark_load_dereferenceable(fld, maybe_null, minimum_field_size, minimum_align);
             if (maybe_null)
-                null_pointer_check(ctx, fld);
+                null_pointer_check(ctx, fld, nullptr);
             *ret = mark_julia_type(ctx, fld, true, jl_any_type);
             return true;
         }
@@ -2461,8 +2954,8 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             if (!stt->name->mutabl && !(maybe_null && (jft == (jl_value_t*)jl_bool_type ||
                                                  ((jl_datatype_t*)jft)->layout->npointers))) {
                 // just compute the pointer and let user load it when necessary
-                Type *fty = julia_type_to_llvm(ctx, jft);
-                Value *addr = ctx.builder.CreateInBoundsGEP(fty, emit_bitcast(ctx, ptr, PointerType::get(fty, 0)), idx);
+                Type *fty = julia_type_to_llvm(ctx, jft); //TODO: move this to a int8 GEP
+                Value *addr = ctx.builder.CreateInBoundsGEP(fty, ptr, idx);
                 *ret = mark_julia_slot(addr, jft, NULL, strct.tbaa);
                 return true;
             }
@@ -2492,27 +2985,150 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex,
     Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1), tindex0);
     if (fsz > 0 && mutabl) {
         // move value to an immutable stack slot (excluding tindex)
-        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al);
-        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        AllocaInst *lv = emit_static_alloca(ctx, fsz, Align(al));
         setName(ctx.emission_context, lv, "immutable_union");
-        if (al > 1)
-            lv->setAlignment(Align(al));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-        emit_memcpy(ctx, lv, ai, addr, ai, fsz, al);
+        emit_memcpy(ctx, lv, ai, addr, ai, fsz, Align(al), Align(al));
         addr = lv;
     }
     return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, tbaa);
 }
 
+static bool isTBAA(MDNode *TBAA, std::initializer_list<const char*> const strset)
+{
+    if (!TBAA)
+        return false;
+    while (TBAA->getNumOperands() > 1) {
+        TBAA = cast<MDNode>(TBAA->getOperand(1).get());
+        auto str = cast<MDString>(TBAA->getOperand(0))->getString();
+        for (auto str2 : strset) {
+            if (str == str2) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+// Check if this is a load from an immutable value. The easiest
+// way to do so is to look at the tbaa and see if it derives from
+// jtbaa_immut.
+static bool isLoadFromImmut(LoadInst *LI)
+{
+    if (LI->getMetadata(LLVMContext::MD_invariant_load))
+        return true;
+    MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
+    if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype", "jtbaa_memoryptr", "jtbaa_memorylen", "jtbaa_memoryown"}))
+        return true;
+    return false;
+}
+
+static bool isConstGV(GlobalVariable *gv)
+{
+    return gv->isConstant() || gv->getMetadata("julia.constgv");
+}
+
+// Check if this is can be traced through constant loads to an constant global
+// or otherwise globally rooted value.
+// Almost all `tbaa_const` loads satisfies this with the exception of
+// task local constants which are constant as far as the code is concerned but aren't
+// global constants. For task local constant `task_local` will be true when this function
+// returns.
+// Unlike this function in llvm-late-gc-lowering, we do not examine PhiNode, as those are not emitted yet
+static bool isLoadFromConstGV(LoadInst *LI);
+static bool isLoadFromConstGV(Value *v)
+{
+    v = v->stripInBoundsOffsets();
+    if (auto LI = dyn_cast<LoadInst>(v))
+        return isLoadFromConstGV(LI);
+    if (auto gv = dyn_cast<GlobalVariable>(v))
+        return isConstGV(gv);
+    // null pointer
+    if (isa<ConstantData>(v))
+        return true;
+    // literal pointers
+    if (auto CE = dyn_cast<ConstantExpr>(v))
+        return (CE->getOpcode() == Instruction::IntToPtr &&
+                isa<ConstantData>(CE->getOperand(0)));
+    if (auto SL = dyn_cast<SelectInst>(v))
+        return (isLoadFromConstGV(SL->getTrueValue()) &&
+                isLoadFromConstGV(SL->getFalseValue()));
+    if (auto call = dyn_cast<CallInst>(v)) {
+        auto callee = call->getCalledFunction();
+        if (callee && callee->getName() == "julia.typeof") {
+            return true;
+        }
+        if (callee && callee->getName() == "julia.get_pgcstack") {
+            return true;
+        }
+        if (callee && callee->getName() == "julia.gc_loaded") {
+            return isLoadFromConstGV(call->getArgOperand(0)) &&
+                   isLoadFromConstGV(call->getArgOperand(1));
+        }
+    }
+    if (isa<Argument>(v)) {
+        return true;
+    }
+    return false;
+}
+
+// The white list implemented here and above in `isLoadFromConstGV(Value*)` should
+// cover all the cases we and LLVM generates.
+static bool isLoadFromConstGV(LoadInst *LI)
+{
+    // We only emit single slot GV in codegen
+    // but LLVM global merging can change the pointer operands to GEPs/bitcasts
+    auto load_base = LI->getPointerOperand()->stripInBoundsOffsets();
+    assert(load_base); // Static analyzer
+    auto gv = dyn_cast<GlobalVariable>(load_base);
+    if (isLoadFromImmut(LI)) {
+        if (gv)
+            return true;
+        return isLoadFromConstGV(load_base);
+    }
+    if (gv)
+        return isConstGV(gv);
+    return false;
+}
+
+
+static MDNode *best_field_tbaa(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_datatype_t *jt, unsigned idx, size_t byte_offset)
+{
+    auto tbaa = strct.tbaa;
+    if (tbaa == ctx.tbaa().tbaa_datatype)
+        if (byte_offset != offsetof(jl_datatype_t, types))
+            return ctx.tbaa().tbaa_const;
+    if (tbaa == ctx.tbaa().tbaa_array) {
+        if (jl_is_genericmemory_type(jt)) {
+            if (idx == 0)
+                return ctx.tbaa().tbaa_memorylen;
+            if (idx == 1)
+                return ctx.tbaa().tbaa_memoryptr;
+        }
+        else if (jl_is_array_type(jt)) {
+            if (idx == 0)
+                return ctx.tbaa().tbaa_arrayptr;
+            if (idx == 1)
+                return ctx.tbaa().tbaa_arraysize;
+        }
+    }
+    if (strct.V && jl_field_isconst(jt, idx) && isLoadFromConstGV(strct.V))
+        return ctx.tbaa().tbaa_const; //TODO: it seems odd to have a field with a tbaa that doesn't alias it's containing struct's tbaa
+                                      //Does the fact that this is marked as constant make this fine?
+    return tbaa;
+}
+
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &strct,
                                          unsigned idx, jl_datatype_t *jt,
                                          enum jl_memory_order order, Value **nullcheck)
 {
+    auto get_objname = [&]() {
+        return strct.V ? strct.V->getName() : StringRef("");
+    };
     jl_value_t *jfty = jl_field_type(jt, idx);
     bool isatomic = jl_field_isatomic(jt, idx);
-    bool needlock = isatomic && !jl_field_isptr(jt, idx) && jl_datatype_size(jfty) > MAX_ATOMIC_SIZE;
     if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
         emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
         return jl_cgval_t(); // unreachable
@@ -2530,44 +3146,56 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
     }
     if (type_is_ghost(julia_type_to_llvm(ctx, jfty)))
         return ghostValue(ctx, jfty);
-    size_t nfields = jl_datatype_nfields(jt);
-    bool maybe_null = idx >= nfields - (unsigned)jt->name->n_uninitialized;
+    Value *needlock = nullptr;
+    if (isatomic && !jl_field_isptr(jt, idx) && jl_datatype_size(jfty) > MAX_ATOMIC_SIZE) {
+        assert(strct.isboxed);
+        needlock = boxed(ctx, strct);
+    }
+    bool maybe_null = field_may_be_null(strct, jt, idx);
     size_t byte_offset = jl_field_offset(jt, idx);
-    auto tbaa = strct.tbaa;
-    if (tbaa == ctx.tbaa().tbaa_datatype && byte_offset != offsetof(jl_datatype_t, types))
-        tbaa = ctx.tbaa().tbaa_const;
-    if (strct.ispointer()) {
-        Value *staddr = data_pointer(ctx, strct);
-        bool isboxed;
-        Type *lt = julia_type_to_llvm(ctx, (jl_value_t*)jt, &isboxed);
-        Value *addr;
-        if (isboxed) {
-            // byte_offset == 0 is an important special case here, e.g.
-            // for single field wrapper types. Introducing the bitcast
-            // can pessimize mem2reg
-            if (byte_offset > 0) {
-                addr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()),
-                        emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())),
-                        ConstantInt::get(ctx.types().T_size, byte_offset));
-            }
-            else {
-                addr = staddr;
-            }
+    if (!strct.inline_roots.empty()) {
+        assert(!isatomic && !needlock);
+        auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset);
+        auto offsets = split_value_field(jt, idx);
+        bool hasptr = offsets.second >= 0;
+        assert(hasptr == jl_field_isptr(jt, idx) || jl_type_hasptr(jfty));
+        ArrayRef<Value*> roots;
+        if (hasptr) {
+            roots = ArrayRef(strct.inline_roots).slice(offsets.second, jl_field_isptr(jt, idx) ? 1 : ((jl_datatype_t*)jfty)->layout->npointers);
+            if (maybe_null)
+                null_pointer_check(ctx, roots[0], nullcheck);
+        }
+        if (jl_field_isptr(jt, idx)) {
+            return mark_julia_type(ctx, roots[0], true, jfty);
+        }
+        Value *addr = offsets.first < 0 ? nullptr : offsets.first == 0 ? strct.V : emit_ptrgep(ctx, strct.V, offsets.first);
+        if (jl_is_uniontype(jfty)) {
+            size_t fsz = 0, al = 0;
+            int union_max = jl_islayout_inline(jfty, &fsz, &al);
+            size_t fsz1 = jl_field_size(jt, idx) - 1;
+            bool isptr = (union_max == 0);
+            assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr;
+            Value *ptindex = emit_ptrgep(ctx, addr, fsz1);
+            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, false, union_max, strct.tbaa);
+        }
+        else if (jfty == (jl_value_t*)jl_bool_type) {
+            unsigned align = jl_field_align(jt, idx);
+            return typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false,
+                    AtomicOrdering::NotAtomic, maybe_null, align, nullcheck);
         }
         else {
-            staddr = maybe_bitcast(ctx, staddr, lt->getPointerTo());
-            if (jl_is_vecelement_type((jl_value_t*)jt))
-                addr = staddr; // VecElement types are unwrapped in LLVM.
-            else if (isa<StructType>(lt))
-                addr = emit_struct_gep(ctx, lt, staddr, byte_offset);
-            else
-                addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx);
+            return mark_julia_slot(addr, jfty, nullptr, tbaa, roots);
         }
+    }
+    else if (strct.ispointer()) {
+        auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset);
+        Value *staddr = data_pointer(ctx, strct);
+        Value *addr = (byte_offset == 0 ? staddr : emit_ptrgep(ctx, staddr, byte_offset));
+        if (addr != staddr)
+            setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr"));
         if (jl_field_isptr(jt, idx)) {
-            setName(ctx.emission_context, addr, "getfield_addr");
-            LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, maybe_bitcast(ctx, addr, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
-            setName(ctx.emission_context, Load, "getfield");
+            LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*)));
+            setNameWithField(ctx.emission_context, Load, get_objname, jt, idx, Twine());
             Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx));
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -2580,16 +3208,14 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
             size_t fsz = 0, al = 0;
             int union_max = jl_islayout_inline(jfty, &fsz, &al);
             bool isptr = (union_max == 0);
-            assert(!isptr && fsz == jl_field_size(jt, idx) - 1); (void)isptr;
-            Value *ptindex;
-            if (isboxed) {
-                ptindex = ctx.builder.CreateConstInBoundsGEP1_32(
-                    getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())), byte_offset + fsz);
-            }
-            else {
-                ptindex = emit_struct_gep(ctx, cast<StructType>(lt), staddr, byte_offset + fsz);
+            assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr;
+            size_t fsz1 = jl_field_size(jt, idx) - 1;
+            Value *ptindex = emit_ptrgep(ctx, staddr, byte_offset + fsz1);
+            auto val = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, strct.tbaa);
+            if (val.V && val.V != addr) {
+                setNameWithField(ctx.emission_context, val.V, get_objname, jt, idx, Twine());
             }
-            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, ctx.tbaa().tbaa_unionselbyte);
+            return val;
         }
         assert(jl_is_concrete_type(jfty));
         if (jl_field_isconst(jt, idx) && !(maybe_null && (jfty == (jl_value_t*)jl_bool_type ||
@@ -2599,12 +3225,15 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         }
         unsigned align = jl_field_align(jt, idx);
         if (needlock)
-            emit_lockstate_value(ctx, strct, true);
+            emit_lockstate_value(ctx, needlock, true);
         jl_cgval_t ret = typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false,
                 needlock ? AtomicOrdering::NotAtomic : get_llvm_atomic_order(order),
                 maybe_null, align, nullcheck);
+        if (ret.V) {
+            setNameWithField(ctx.emission_context, ret.V, get_objname, jt, idx, Twine());
+        }
         if (needlock)
-            emit_lockstate_value(ctx, strct, false);
+            emit_lockstate_value(ctx, needlock, false);
         return ret;
     }
     else if (isa<UndefValue>(strct.V)) {
@@ -2620,6 +3249,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         }
         else if (isa<VectorType>(T)) {
             fldv = ctx.builder.CreateExtractElement(obj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), idx));
+            setNameWithField(ctx.emission_context, fldv, get_objname, jt, idx, Twine());
         }
         else if (!jl_field_isptr(jt, idx) && jl_is_uniontype(jfty)) {
             int fsz = jl_field_size(jt, idx) - 1;
@@ -2629,30 +3259,29 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 unsigned st_idx = convert_struct_offset(ctx, T, byte_offset);
                 IntegerType *ET = cast<IntegerType>(T->getStructElementType(st_idx));
                 unsigned align = (ET->getBitWidth() + 7) / 8;
-                lv = emit_static_alloca(ctx, ET);
-                setName(ctx.emission_context, lv, "union_split");
-                lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align));
+                lv = emit_static_alloca(ctx, fsz, Align(align));
                 // emit all of the align-sized words
                 unsigned i = 0;
                 for (; i < fsz / align; i++) {
                     unsigned fld = st_idx + i;
-                    Value *fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(fld));
-                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                    Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(fld));
+                    Value *fldp = emit_ptrgep(ctx, lv, i * align);
                     ctx.builder.CreateAlignedStore(fldv, fldp, Align(align));
                 }
                 // emit remaining bytes up to tindex
                 if (i < ptindex - st_idx) {
-                    Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                    staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
+                    Value *staddr = emit_ptrgep(ctx, lv, i * align);
                     for (; i < ptindex - st_idx; i++) {
-                        Value *fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(st_idx + i));
-                        Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
+                        Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(st_idx + i));
+                        Value *fldp = emit_ptrgep(ctx, staddr, i);
                         ctx.builder.CreateAlignedStore(fldv, fldp, Align(1));
                     }
                 }
+                setNameWithField(ctx.emission_context, lv, get_objname, jt, idx, Twine());
             }
-            Value *tindex0 = ctx.builder.CreateExtractValue(obj, makeArrayRef(ptindex));
+            Value *tindex0 = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(ptindex));
             Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1), tindex0);
+            setNameWithField(ctx.emission_context, tindex, get_objname, jt, idx, Twine(".tindex"));
             return mark_julia_slot(lv, jfty, tindex, ctx.tbaa().tbaa_stack);
         }
         else {
@@ -2663,7 +3292,8 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 st_idx = convert_struct_offset(ctx, T, byte_offset);
             else
                 llvm_unreachable("encountered incompatible type for a struct");
-            fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(st_idx));
+            fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(st_idx));
+            setNameWithField(ctx.emission_context, fldv, get_objname, jt, idx, Twine());
         }
         if (maybe_null) {
             Value *first_ptr = jl_field_isptr(jt, idx) ? fldv : extract_first_ptr(ctx, fldv);
@@ -2694,394 +3324,125 @@ static Value *emit_n_varargs(jl_codectx_t &ctx)
 #endif
 }
 
-static bool arraytype_constdim(jl_value_t *ty, size_t *dim)
+static Value *emit_genericmemoryelsize(jl_codectx_t &ctx, Value *v, jl_value_t *typ, bool add_isunion)
 {
-    if (jl_is_array_type(ty) && jl_is_long(jl_tparam1(ty))) {
-        *dim = jl_unbox_long(jl_tparam1(ty));
-        return true;
+    ++EmittedArrayElsize;
+    jl_datatype_t *sty = (jl_datatype_t*)jl_unwrap_unionall(typ);
+    if (jl_is_datatype(sty) && !jl_has_free_typevars((jl_value_t*)sty) && sty->layout) {
+        if (jl_is_genericmemoryref_type(sty))
+            sty = (jl_datatype_t*)jl_field_type_concrete(sty, 1);
+        size_t sz = sty->layout->size;
+        if (sty->layout->flags.arrayelem_isunion && add_isunion)
+            sz++;
+        auto elsize = ConstantInt::get(ctx.types().T_size, sz);
+        return elsize;
+    }
+    else {
+        Value *t = emit_typeof(ctx, v, false, false, true);
+        Value *elsize = emit_datatype_size(ctx, t, add_isunion);
+        elsize = ctx.builder.CreateZExt(elsize, ctx.types().T_size);
+        setName(ctx.emission_context, elsize, "elsize");
+        return elsize;
     }
-    return false;
-}
-
-static bool arraytype_constshape(jl_value_t *ty)
-{
-    size_t dim;
-    if (!arraytype_constdim(ty, &dim))
-        return false;
-    return dim != 1;
 }
 
-static bool arraytype_constelsize(jl_datatype_t *ty, size_t *elsz)
+static ssize_t genericmemoryype_constelsize(jl_value_t *typ)
 {
-    assert(jl_is_array_type(ty));
-    jl_value_t *ety = jl_tparam0(ty);
-    if (jl_has_free_typevars(ety))
-        return false;
-    // `jl_islayout_inline` requires `*elsz` and `al` to be initialized.
-    size_t al = 0;
-    *elsz = 0;
-    int union_max = jl_islayout_inline(ety, elsz, &al);
-    bool isboxed = (union_max == 0);
-    if (isboxed) {
-        *elsz = sizeof(void*);
-    }
-    else if (jl_is_primitivetype(ety)) {
-        // Primitive types should use the array element size, but
-        // this can be different from the type's size
-        *elsz = LLT_ALIGN(*elsz, al);
+    jl_datatype_t *sty = (jl_datatype_t*)jl_unwrap_unionall(typ);
+    if (jl_is_datatype(sty) && !jl_has_free_typevars((jl_value_t*)sty) && sty->layout) {
+        if (jl_is_array_type(sty))
+            sty = (jl_datatype_t*)jl_field_type_concrete(sty, 0);
+        if (jl_is_genericmemoryref_type(sty))
+            sty = (jl_datatype_t*)jl_field_type_concrete(sty, 1);
+        return sty->layout->size;
     }
-    return true;
+    return -1;
 }
 
-static intptr_t arraytype_maxsize(jl_value_t *ty)
+static intptr_t genericmemoryype_maxsize(jl_value_t *ty) // the maxsize is strictly less than the return value
 {
-    if (!jl_is_array_type(ty))
-        return INTPTR_MAX;
-    size_t elsz;
-    if (arraytype_constelsize((jl_datatype_t*)ty, &elsz) || elsz == 0)
+    ssize_t elsz = genericmemoryype_constelsize(ty);
+    if (elsz <= 1)
         return INTPTR_MAX;
     return INTPTR_MAX / elsz;
 }
 
-static Value *emit_arraylen(jl_codectx_t &ctx, const jl_cgval_t &tinfo);
-
-static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *dim)
+static Value *emit_genericmemorylen(jl_codectx_t &ctx, Value *addr, jl_value_t *typ)
 {
-    size_t ndim;
-    MDNode *tbaa = ctx.tbaa().tbaa_arraysize;
-    if (arraytype_constdim(tinfo.typ, &ndim)) {
-        if (ndim == 0)
-            return ConstantInt::get(ctx.types().T_size, 1);
-        if (ndim == 1) {
-            if (auto d = dyn_cast<ConstantInt>(dim)) {
-                if (d->getZExtValue() == 1) {
-                    return emit_arraylen(ctx, tinfo);
-                }
-            }
-        }
-        if (ndim > 1) {
-            if (tinfo.constant && isa<ConstantInt>(dim)) {
-                auto n = cast<ConstantInt>(dim)->getZExtValue() - 1;
-                return ConstantInt::get(ctx.types().T_size, jl_array_dim(tinfo.constant, n));
-            }
-            tbaa = ctx.tbaa().tbaa_const;
-        }
-    }
-    ++EmittedArraysize;
-    Value *t = boxed(ctx, tinfo);
-    int o = offsetof(jl_array_t, nrows) / sizeof(void*) - 1;
-    auto load = emit_nthptr_recast(ctx,
-            t,
-            ctx.builder.CreateAdd(dim, ConstantInt::get(dim->getType(), o)),
-            tbaa, ctx.types().T_size);
-    setName(ctx.emission_context, load, "arraysize");
+    addr = decay_derived(ctx, addr);
+    addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 0);
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(ctx.types().T_jlgenericmemory->getElementType(0), addr, Align(sizeof(size_t)));
+    jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen);
+    aliasinfo_mem.decorateInst(LI);
     MDBuilder MDB(ctx.builder.getContext());
-    auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ)));
-    load->setMetadata(LLVMContext::MD_range, rng);
-    return load;
-}
-
-static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int dim)
-{
-    return emit_arraysize(ctx, tinfo, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), dim));
-}
-
-static Value *emit_vectormaxsize(jl_codectx_t &ctx, const jl_cgval_t &ary)
-{
-    return emit_arraysize(ctx, ary, 2); // maxsize aliases ncols in memory layout for vector
-}
-
-static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    size_t ndim;
-    jl_value_t *ty = tinfo.typ;
-    MDNode *tbaa = ctx.tbaa().tbaa_arraylen;
-    if (arraytype_constdim(ty, &ndim)) {
-        if (ndim == 0)
-            return ConstantInt::get(ctx.types().T_size, 1);
-        if (ndim != 1) {
-            if (tinfo.constant)
-                return ConstantInt::get(ctx.types().T_size, jl_array_len(tinfo.constant));
-            tbaa = ctx.tbaa().tbaa_const;
-        }
-    }
-    ++EmittedArraylen;
-    Value *t = boxed(ctx, tinfo);
-    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            1); //index (not offset) of length field in ctx.types().T_pjlarray
-    setName(ctx.emission_context, addr, "arraylen_ptr");
-    LoadInst *len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, addr, ctx.types().alignof_ptr);
-    setName(ctx.emission_context, len, "arraylen");
-    len->setOrdering(AtomicOrdering::NotAtomic);
-    MDBuilder MDB(ctx.builder.getContext());
-    auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ)));
-    len->setMetadata(LLVMContext::MD_range, rng);
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-    return ai.decorateInst(len);
-}
-
-static Value *emit_arraylen(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    return emit_arraylen_prim(ctx, tinfo);
+    auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, genericmemoryype_maxsize(typ)));
+    LI->setMetadata(LLVMContext::MD_range, rng);
+    setName(ctx.emission_context, LI, "memory_len");
+    return LI;
 }
 
-static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *t, unsigned AS, bool isboxed)
+static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_datatype_layout_t *layout, unsigned AS)
 {
     ++EmittedArrayptr;
-    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
-                                              emit_bitcast(ctx, t, ctx.types().T_pjlarray), 0);
-    setName(ctx.emission_context, addr, "arrayptr_ptr");
-    // Normally allocated array of 0 dimension always have a inline pointer.
-    // However, we can't rely on that here since arrays can also be constructed from C pointers.
-    PointerType *PT = cast<PointerType>(addr->getType());
-    PointerType *PPT = cast<PointerType>(ctx.types().T_jlarray->getElementType(0));
-    PointerType *LoadT = PPT;
-
-    if (isboxed) {
-        LoadT = PointerType::get(ctx.types().T_prjlvalue, AS);
-    }
-    else if (AS != PPT->getAddressSpace()) {
-        LoadT = PointerType::getWithSamePointeeType(PPT, AS);
-    }
-    if (LoadT != PPT) {
-        const auto Ty = PointerType::get(LoadT, PT->getAddressSpace());
-        addr = ctx.builder.CreateBitCast(addr, Ty);
-    }
-
-    LoadInst *LI = ctx.builder.CreateAlignedLoad(LoadT, addr, Align(sizeof(char *)));
-    setName(ctx.emission_context, LI, "arrayptr");
+    Value *addr = mem;
+    addr = decay_derived(ctx, addr);
+    addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 1);
+    setName(ctx.emission_context, addr, "memory_data_ptr");
+    PointerType *PPT = cast<PointerType>(ctx.types().T_jlgenericmemory->getElementType(1));
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(PPT, addr, Align(sizeof(char*)));
     LI->setOrdering(AtomicOrdering::NotAtomic);
     LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
-    jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, arraytype_constshape(tinfo.typ) ? ctx.tbaa().tbaa_const : ctx.tbaa().tbaa_arrayptr);
+    jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr);
     aliasinfo.decorateInst(LI);
-
-    return LI;
-}
-
-static Value *emit_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, bool isboxed = false)
-{
-    Value *t = boxed(ctx, tinfo);
-    return emit_arrayptr_internal(ctx, tinfo, decay_derived(ctx, t), AddressSpace::Loaded, isboxed);
-}
-
-static Value *emit_unsafe_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, bool isboxed = false)
-{
-    Value *t = boxed(ctx, tinfo);
-    t = emit_pointer_from_objref(ctx, decay_derived(ctx, t));
-    return emit_arrayptr_internal(ctx, tinfo, t, 0, isboxed);
-}
-
-static Value *emit_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, jl_value_t *ex, bool isboxed = false)
-{
-    return emit_arrayptr(ctx, tinfo, isboxed);
-}
-
-static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, jl_value_t *ex, int dim)
-{
-    return emit_arraysize(ctx, tinfo, dim);
-}
-
-static Value *emit_arrayflags(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    ++EmittedArrayflags;
-    Value *t = boxed(ctx, tinfo);
-    int arrayflag_field = 2;
-    Value *addr = ctx.builder.CreateStructGEP(
-            ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            arrayflag_field);
-    setName(ctx.emission_context, addr, "arrayflags_ptr");
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayflags);
-    auto flags = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t))));
-    setName(ctx.emission_context, flags, "arrayflags");
-    return flags;
-}
-
-static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary)
-{
-    ++EmittedArrayNDims;
-    Value *flags = emit_arrayflags(ctx, ary);
-    cast<LoadInst>(flags)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(ctx.builder.getContext(), None));
-    flags = ctx.builder.CreateLShr(flags, 2);
-    flags = ctx.builder.CreateAnd(flags, 0x1FF); // (1<<9) - 1
-    setName(ctx.emission_context, flags, "arrayndims");
-    return flags;
-}
-
-static Value *emit_arrayelsize(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    ++EmittedArrayElsize;
-    Value *t = boxed(ctx, tinfo);
-    int elsize_field = 3;
-    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            elsize_field);
-    setName(ctx.emission_context, addr, "arrayelsize_ptr");
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    auto elsize = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t))));
-    setName(ctx.emission_context, elsize, "arrayelsize");
-    return elsize;
-}
-
-static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int nd)
-{
-    ++EmittedArrayOffset;
-    if (nd != -1 && nd != 1) // only Vector can have an offset
-        return ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0);
-    Value *t = boxed(ctx, tinfo);
-    int offset_field = 4;
-
-    Value *addr = ctx.builder.CreateStructGEP(
-            ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            offset_field);
-    setName(ctx.emission_context, addr, "arrayoffset_ptr");
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayoffset);
-    auto offset = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), addr, Align(sizeof(int32_t))));
-    setName(ctx.emission_context, offset, "arrayoffset");
-    return offset;
-}
-
-// Returns the size of the array represented by `tinfo` for the given dimension `dim` if
-// `dim` is a valid dimension, otherwise returns constant one.
-static Value *emit_arraysize_for_unsafe_dim(jl_codectx_t &ctx,
-        const jl_cgval_t &tinfo, jl_value_t *ex, size_t dim, size_t nd)
-{
-    return dim > nd ? ConstantInt::get(ctx.types().T_size, 1) : emit_arraysize(ctx, tinfo, ex, dim);
+    Value *ptr = LI;
+    if (AS) {
+        assert(AS == AddressSpace::Loaded);
+        ptr = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, ptr });
+    }
+    setName(ctx.emission_context, ptr, "memory_data");
+    return ptr;
 }
 
-// `nd == -1` means the dimension is unknown.
-static Value *emit_array_nd_index(
-        jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_value_t *ex, ssize_t nd,
-        const jl_cgval_t *argv, size_t nidxs, jl_value_t *inbounds)
+static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t)
 {
-    ++EmittedArrayNdIndex;
-    Value *a = boxed(ctx, ainfo);
-    Value *i = Constant::getNullValue(ctx.types().T_size);
-    Value *stride = ConstantInt::get(ctx.types().T_size, 1);
-#if CHECK_BOUNDS==1
-    bool bc = bounds_check_enabled(ctx, inbounds);
-    BasicBlock *failBB = NULL, *endBB = NULL;
-    if (bc) {
-        failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
-        endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
-    }
-#endif
-    SmallVector<Value *> idxs(nidxs);
-    for (size_t k = 0; k < nidxs; k++) {
-        idxs[k] = emit_unbox(ctx, ctx.types().T_size, argv[k], (jl_value_t*)jl_long_type); // type asserted by caller
-    }
-    Value *ii = NULL;
-    for (size_t k = 0; k < nidxs; k++) {
-        ii = ctx.builder.CreateSub(idxs[k], ConstantInt::get(ctx.types().T_size, 1));
-        i = ctx.builder.CreateAdd(i, ctx.builder.CreateMul(ii, stride));
-        if (k < nidxs - 1) {
-            assert(nd >= 0);
-            Value *d = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k + 1, nd);
-#if CHECK_BOUNDS==1
-            if (bc) {
-                BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "ib");
-                // if !(i < d) goto error
-                auto bc = ctx.builder.CreateICmpULT(ii, d);
-                setName(ctx.emission_context, bc, "inbounds");
-                ctx.builder.CreateCondBr(bc, okBB, failBB);
-                ctx.f->getBasicBlockList().push_back(okBB);
-                ctx.builder.SetInsertPoint(okBB);
-            }
-#endif
-            stride = ctx.builder.CreateMul(stride, d);
-            setName(ctx.emission_context, stride, "stride");
-        }
-    }
-#if CHECK_BOUNDS==1
-    if (bc) {
-        // We have already emitted a bounds check for each index except for
-        // the last one which we therefore have to do here.
-        if (nidxs == 1) {
-            // Linear indexing: Check against the entire linear span of the array
-            Value *alen = emit_arraylen(ctx, ainfo);
-            auto bc = ctx.builder.CreateICmpULT(i, alen);
-            setName(ctx.emission_context, bc, "inbounds");
-            ctx.builder.CreateCondBr(bc, endBB, failBB);
-        } else if (nidxs >= (size_t)nd){
-            // No dimensions were omitted; just check the last remaining index
-            assert(nd >= 0);
-            Value *last_index = ii;
-            Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd);
-            auto bc = ctx.builder.CreateICmpULT(last_index, last_dimension);
-            setName(ctx.emission_context, bc, "inbounds");
-            ctx.builder.CreateCondBr(bc, endBB, failBB);
-        } else {
-            // There were fewer indices than dimensions; check the last remaining index
-            BasicBlock *checktrailingdimsBB = BasicBlock::Create(ctx.builder.getContext(), "dimsib");
-            assert(nd >= 0);
-            Value *last_index = ii;
-            Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd);
-            auto bc = ctx.builder.CreateICmpULT(last_index, last_dimension);
-            setName(ctx.emission_context, bc, "inbounds");
-            ctx.builder.CreateCondBr(bc, checktrailingdimsBB, failBB);
-            ctx.f->getBasicBlockList().push_back(checktrailingdimsBB);
-            ctx.builder.SetInsertPoint(checktrailingdimsBB);
-            // And then also make sure that all dimensions that weren't explicitly
-            // indexed into have size 1
-            for (size_t k = nidxs+1; k < (size_t)nd; k++) {
-                BasicBlock *dimsokBB = BasicBlock::Create(ctx.builder.getContext(), "dimsok");
-                Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k, nd);
-                auto bc = ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1));
-                setName(ctx.emission_context, bc, "inbounds");
-                ctx.builder.CreateCondBr(bc, dimsokBB, failBB);
-                ctx.f->getBasicBlockList().push_back(dimsokBB);
-                ctx.builder.SetInsertPoint(dimsokBB);
-            }
-            Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nd, nd);
-            auto bc2 = ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1));
-            setName(ctx.emission_context, bc2, "inbounds");
-            ctx.builder.CreateCondBr(bc2, endBB, failBB);
-        }
-
-        ctx.f->getBasicBlockList().push_back(failBB);
-        ctx.builder.SetInsertPoint(failBB);
-        // CreateAlloca is OK here since we are on an error branch
-        Value *tmp = ctx.builder.CreateAlloca(ctx.types().T_size, ConstantInt::get(ctx.types().T_size, nidxs));
-        setName(ctx.emission_context, tmp, "errorbox");
-        for (size_t k = 0; k < nidxs; k++) {
-            ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(ctx.types().T_size, tmp, ConstantInt::get(ctx.types().T_size, k)), ctx.types().alignof_ptr);
-        }
-        ctx.builder.CreateCall(prepare_call(jlboundserrorv_func),
-            { mark_callee_rooted(ctx, a), tmp, ConstantInt::get(ctx.types().T_size, nidxs) });
-        ctx.builder.CreateUnreachable();
-
-        ctx.f->getBasicBlockList().push_back(endBB);
-        ctx.builder.SetInsertPoint(endBB);
-    }
-#endif
-
-    return i;
+    Value *m = decay_derived(ctx, t);
+    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, m, 1);
+    Type *T_data = ctx.types().T_jlgenericmemory->getElementType(1);
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(T_data, addr, Align(sizeof(char*)));
+    LI->setOrdering(AtomicOrdering::NotAtomic);
+    LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
+    jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryown);
+    aliasinfo_mem.decorateInst(LI);
+    addr = emit_ptrgep(ctx, m, JL_SMALL_BYTE_ALIGNMENT);
+    Value *foreign = ctx.builder.CreateICmpNE(addr, decay_derived(ctx, LI));
+    return emit_guarded_test(ctx, foreign, t, [&] {
+            addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_jlgenericmemory, m, 1);
+            LoadInst *owner = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*)));
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr);
+            ai.decorateInst(owner);
+            return ctx.builder.CreateSelect(ctx.builder.CreateIsNull(owner), t, owner);
+        });
 }
 
 // --- boxing ---
 
-static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt);
+static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt, bool fully_initialized);
 
 static void init_bits_value(jl_codectx_t &ctx, Value *newv, Value *v, MDNode *tbaa,
-                            unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned
+                            Align alignment = Align(sizeof(void*))) // min alignment in julia's gc is pointer-aligned
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
     // newv should already be tagged
-    ai.decorateInst(ctx.builder.CreateAlignedStore(v, emit_bitcast(ctx, newv,
-        PointerType::get(v->getType(), 0)), Align(alignment)));
+    ai.decorateInst(ctx.builder.CreateAlignedStore(v, newv, alignment));
 }
 
-static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v, MDNode *tbaa)
+static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t &v)
 {
-    // newv should already be tagged
-    if (v.ispointer()) {
-        emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), sizeof(void*));
-    }
-    else {
-        init_bits_value(ctx, newv, v.V, tbaa);
-    }
+    MDNode *tbaa = jl_is_mutable(v.typ) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
+    unsigned alignment = julia_alignment(v.typ);
+    unsigned newv_align = std::max(alignment, (unsigned)sizeof(void*));
+    newv = maybe_decay_tracked(ctx, newv);
+    emit_unbox_store(ctx, v, newv, tbaa, Align(alignment), Align(newv_align));
 }
 
 static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant *constant, jl_value_t *jt)
@@ -3125,14 +3486,14 @@ static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant
     if (const auto *CC = dyn_cast<ConstantAggregate>(constant))
         nargs = CC->getNumOperands();
     else if (const auto *CAZ = dyn_cast<ConstantAggregateZero>(constant)) {
-        // SVE: Elsewhere we use `getMinKownValue`
+        // SVE: Elsewhere we use `getMinKnownValue`
         nargs = CAZ->getElementCount().getFixedValue();
     }
     else if (const auto *CDS = dyn_cast<ConstantDataSequential>(constant))
         nargs = CDS->getNumElements();
     else
         return NULL;
-    assert(nargs > 0 && jst->instance == NULL);
+    assert(nargs > 0 && !jl_is_datatype_singleton(jst));
     if (nargs != jl_datatype_nfields(jst))
         return NULL;
 
@@ -3168,8 +3529,6 @@ static Value *call_with_attrs(jl_codectx_t &ctx, JuliaFunction<TypeFn_t> *intr,
     return Call;
 }
 
-static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val);
-
 static Value *as_value(jl_codectx_t &ctx, Type *to, const jl_cgval_t &v)
 {
     assert(!v.isboxed);
@@ -3198,11 +3557,13 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
     if (t == getInt1Ty(ctx.builder.getContext()))
         return track_pjlvalue(ctx, julia_bool(ctx, as_value(ctx, t, vinfo)));
 
-    if (ctx.linfo && jl_is_method(ctx.linfo->def.method) && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel
+    if (ctx.linfo && jl_is_method(ctx.linfo->def.method) && vinfo.inline_roots.empty() && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel
         if (Constant *c = dyn_cast<Constant>(vinfo.V)) {
             jl_value_t *s = static_constant_instance(jl_Module->getDataLayout(), c, jt);
             if (s) {
-                s = jl_ensure_rooted(ctx, s);
+                JL_GC_PUSH1(&s);
+                jl_temporary_root(ctx, s);
+                JL_GC_POP();
                 return track_pjlvalue(ctx, literal_pointer_val(ctx, s));
             }
         }
@@ -3235,17 +3596,19 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
     else if (jb == jl_char_type)
         box = call_with_attrs(ctx, box_char_func, as_value(ctx, t, vinfo));
     else if (jb == jl_ssavalue_type) {
-        unsigned zero = 0;
         Value *v = as_value(ctx, t, vinfo);
         assert(v->getType() == ctx.emission_context.llvmtypes[jl_ssavalue_type]);
-        v = ctx.builder.CreateExtractValue(v, makeArrayRef(&zero, 1));
+        v = ctx.builder.CreateExtractValue(v, 0);
         box = call_with_attrs(ctx, box_ssavalue_func, v);
     }
     else if (!jb->name->abstract && jl_datatype_nbits(jb) == 0) {
         // singleton
-        assert(jb->instance != NULL);
+        assert(jl_is_datatype_singleton(jb));
         return track_pjlvalue(ctx, literal_pointer_val(ctx, jb->instance));
     }
+    if (box) {
+        setName(ctx.emission_context, box, [&]() {return "box_" + std::string(jl_symbol_name(jb->name->name));});
+    }
     return box;
 }
 
@@ -3262,7 +3625,7 @@ static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype_tag, jl_valu
             },
             ut,
             counter);
-    setName(ctx.emission_context, tindex, "tindex");
+    setName(ctx.emission_context, tindex, datatype_tag->getName() + ".tindex");
     return tindex;
 }
 
@@ -3279,6 +3642,7 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j
     return compute_box_tindex(ctx, typof, val.typ, typ);
 }
 
+
 static void union_alloca_type(jl_uniontype_t *ut,
         bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align)
 {
@@ -3291,7 +3655,7 @@ static void union_alloca_type(jl_uniontype_t *ut,
             [&](unsigned idx, jl_datatype_t *jt) {
                 if (!jl_is_datatype_singleton(jt)) {
                     size_t nb1 = jl_datatype_size(jt);
-                    size_t align1 = jl_datatype_align(jt);
+                    size_t align1 = julia_alignment((jl_value_t*)jt);
                     if (nb1 > nbytes)
                         nbytes = nb1;
                     if (align1 > align)
@@ -3302,6 +3666,8 @@ static void union_alloca_type(jl_uniontype_t *ut,
             },
             (jl_value_t*)ut,
             counter);
+    if (align > JL_HEAP_ALIGNMENT)
+        align = JL_HEAP_ALIGNMENT;
 }
 
 static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool &allunbox, size_t &min_align, size_t &nbytes)
@@ -3310,12 +3676,9 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
     union_alloca_type(ut, allunbox, nbytes, align, min_align);
     if (nbytes > 0) {
         // at least some of the values can live on the stack
-        // try to pick an Integer type size such that SROA will emit reasonable code
-        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align);
-        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        assert(align % min_align == 0);
+        AllocaInst *lv = emit_static_alloca(ctx, nbytes, Align(align));
         setName(ctx.emission_context, lv, "unionalloca");
-        if (align > 1)
-            lv->setAlignment(Align(align));
         return lv;
     }
     return NULL;
@@ -3326,7 +3689,7 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
  * returning `Constant::getNullValue(ctx.types().T_pjlvalue)` in one of the skipped cases. If `skip` is not empty,
  * skip[0] (corresponding to unknown boxed) must always be set. In that
  * case, the calling code must separately deal with the case where
- * `vinfo` is already an unknown boxed union (union tag 0x80).
+ * `vinfo` is already an unknown boxed union (union tag UNION_BOX_MARKER).
  */
 // Returns ctx.types().T_prjlvalue
 static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip)
@@ -3369,9 +3732,9 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
                     jl_cgval_t vinfo_r = jl_cgval_t(vinfo, (jl_value_t*)jt, NULL);
                     box = _boxed_special(ctx, vinfo_r, t);
                     if (!box) {
-                        box = emit_allocobj(ctx, jt);
+                        box = emit_allocobj(ctx, jt, true);
                         setName(ctx.emission_context, box, "unionbox");
-                        init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
+                        init_bits_cgval(ctx, box, vinfo_r);
                     }
                 }
                 tempBB = ctx.builder.GetInsertBlock(); // could have changed
@@ -3387,7 +3750,11 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
         ctx.builder.CreateBr(postBB);
     }
     else if (!vinfo.Vboxed) {
+#if JL_LLVM_VERSION >= 200000
+        Function *trap_func = Intrinsic::getOrInsertDeclaration(
+#else
         Function *trap_func = Intrinsic::getDeclaration(
+#endif
                 ctx.f->getParent(),
                 Intrinsic::trap);
         ctx.builder.CreateCall(trap_func);
@@ -3414,7 +3781,7 @@ static Function *mangleIntrinsic(IntrinsicInst *call) //mangling based on replac
 
     auto newfType = FunctionType::get(
             oldfType->getReturnType(),
-            makeArrayRef(argTys).slice(0, oldfType->getNumParams()),
+            ArrayRef<Type*>(argTys).slice(0, oldfType->getNumParams()),
             oldfType->isVarArg());
 
     // Accumulate an array of overloaded types for the given intrinsic
@@ -3431,7 +3798,11 @@ static Function *mangleIntrinsic(IntrinsicInst *call) //mangling based on replac
         assert(matchvararg);
         (void)matchvararg;
     }
+#if JL_LLVM_VERSION >= 200000
+    auto newF = Intrinsic::getOrInsertDeclaration(call->getModule(), ID, overloadTys);
+#else
     auto newF = Intrinsic::getDeclaration(call->getModule(), ID, overloadTys);
+#endif
     assert(newF->getFunctionType() == newfType);
     newF->setCallingConv(call->getCallingConv());
     return newF;
@@ -3444,7 +3815,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig
     for (auto *User : Val->users()) {
         if (isa<GetElementPtrInst>(User)) {
             GetElementPtrInst *Inst = cast<GetElementPtrInst>(User);
-            Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
+            Inst->mutateType(PointerType::get(Inst->getContext(), ToAS));
             recursively_adjust_ptr_type(Inst, FromAS, ToAS);
         }
         else if (isa<IntrinsicInst>(User)) {
@@ -3453,7 +3824,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig
         }
         else if (isa<BitCastInst>(User)) {
             BitCastInst *Inst = cast<BitCastInst>(User);
-            Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
+            Inst->mutateType(PointerType::get(Inst->getContext(), ToAS));
             recursively_adjust_ptr_type(Inst, FromAS, ToAS);
         }
     }
@@ -3486,21 +3857,20 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
         box = box_union(ctx, vinfo, skip_none);
     }
     else {
-        assert(vinfo.V && "Missing data for unboxed value.");
+        assert((vinfo.V || !vinfo.inline_roots.empty()) && "Missing data for unboxed value.");
         assert(jl_is_concrete_immutable(jt) && "This type shouldn't have been unboxed.");
         Type *t = julia_type_to_llvm(ctx, jt);
         assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above!
         box = _boxed_special(ctx, vinfo, t);
         if (!box) {
             bool do_promote = vinfo.promotion_point;
-            if (do_promote && is_promotable) {
+            if (do_promote && is_promotable && vinfo.inline_roots.empty()) {
                 auto IP = ctx.builder.saveIP();
                 ctx.builder.SetInsertPoint(vinfo.promotion_point);
-                box = emit_allocobj(ctx, (jl_datatype_t*)jt);
+                box = emit_allocobj(ctx, (jl_datatype_t*)jt, true);
                 Value *decayed = decay_derived(ctx, box);
                 AllocaInst *originalAlloca = cast<AllocaInst>(vinfo.V);
                 box->takeName(originalAlloca);
-                decayed = maybe_bitcast(ctx, decayed, PointerType::getWithSamePointeeType(originalAlloca->getType(), AddressSpace::Derived));
                 // Warning: Very illegal IR here temporarily
                 originalAlloca->mutateType(decayed->getType());
                 recursively_adjust_ptr_type(originalAlloca, 0, AddressSpace::Derived);
@@ -3508,10 +3878,14 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
                 // end illegal IR
                 originalAlloca->eraseFromParent();
                 ctx.builder.restoreIP(IP);
-            } else {
-                box = emit_allocobj(ctx, (jl_datatype_t*)jt);
-                setName(ctx.emission_context, box, "box");
-                init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
+            }
+            else {
+                auto arg_typename = [&] JL_NOTSAFEPOINT {
+                    return "box::" + std::string(jl_symbol_name(((jl_datatype_t*)(jt))->name->name));
+                };
+                box = emit_allocobj(ctx, (jl_datatype_t*)jt, true);
+                setName(ctx.emission_context, box, arg_typename);
+                init_bits_cgval(ctx, box, vinfo);
             }
         }
     }
@@ -3524,30 +3898,25 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
     if (AllocaInst *ai = dyn_cast<AllocaInst>(dest))
         // TODO: make this a lifetime_end & dereferenceable annotation?
         ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlign());
-    if (jl_is_concrete_type(src.typ) || src.constant) {
-        jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ;
+    if (src.constant) {
+        jl_value_t *typ = jl_typeof(src.constant);
         assert(skip || jl_is_pointerfree(typ));
         if (jl_is_pointerfree(typ)) {
-            unsigned alignment = julia_alignment(typ);
-            if (!src.ispointer() || src.constant) {
-                emit_unbox_store(ctx, src, dest, tbaa_dst, alignment, isVolatile);
-            }
-            else {
-                Value *src_ptr = data_pointer(ctx, src);
-                unsigned nb = jl_datatype_size(typ);
-                // TODO: this branch may be bad for performance, but is necessary to work around LLVM bugs with the undef option that we want to use:
-                //   select copy dest -> dest to simulate an undef value / conditional copy
-                // if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr);
-                auto f = [&] {
-                    (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
-                                      jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, isVolatile);
-                    return nullptr;
-                };
-                if (skip)
-                    emit_guarded_test(ctx, skip, nullptr, f);
-                else
-                    f();
-            }
+            emit_guarded_test(ctx, skip, nullptr, [&] {
+                unsigned alignment = julia_alignment(typ);
+                emit_unbox_store(ctx, mark_julia_const(ctx, src.constant), dest, tbaa_dst, Align(alignment), Align(alignment), isVolatile);
+                return nullptr;
+            });
+        }
+    }
+    else if (jl_is_concrete_type(src.typ)) {
+        assert(skip || jl_is_pointerfree(src.typ));
+        if (jl_is_pointerfree(src.typ)) {
+            emit_guarded_test(ctx, skip, nullptr, [&] {
+                unsigned alignment = julia_alignment(src.typ);
+                emit_unbox_store(ctx, src, dest, tbaa_dst, Align(alignment), Align(alignment), isVolatile);
+                return nullptr;
+            });
         }
     }
     else if (src.TIndex) {
@@ -3555,8 +3924,6 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
         if (skip)
             tindex = ctx.builder.CreateSelect(skip, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), tindex);
         Value *src_ptr = data_pointer(ctx, src);
-        src_ptr = src_ptr ? maybe_bitcast(ctx, src_ptr, getInt8PtrTy(ctx.builder.getContext())) : src_ptr;
-        dest = maybe_bitcast(ctx, dest, getInt8PtrTy(ctx.builder.getContext()));
         BasicBlock *defaultBB = BasicBlock::Create(ctx.builder.getContext(), "union_move_skip", ctx.f);
         SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB);
         BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_move", ctx.f);
@@ -3571,13 +3938,17 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
                     if (nb > 0) {
                         if (!src_ptr) {
                             Function *trap_func =
+#if JL_LLVM_VERSION >= 200000
+                                Intrinsic::getOrInsertDeclaration(ctx.f->getParent(), Intrinsic::trap);
+#else
                                 Intrinsic::getDeclaration(ctx.f->getParent(), Intrinsic::trap);
+#endif
                             ctx.builder.CreateCall(trap_func);
                             ctx.builder.CreateUnreachable();
                             return;
                         } else {
                             emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
-                                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, isVolatile);
+                                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, Align(alignment), Align(alignment), isVolatile);
                         }
                     }
                     ctx.builder.CreateBr(postBB);
@@ -3586,7 +3957,11 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
                 counter);
         ctx.builder.SetInsertPoint(defaultBB);
         if (!skip && allunboxed && (src.V == NULL || isa<AllocaInst>(src.V))) {
+#if JL_LLVM_VERSION >= 200000
+            Function *trap_func = Intrinsic::getOrInsertDeclaration(
+#else
             Function *trap_func = Intrinsic::getDeclaration(
+#endif
                     ctx.f->getParent(),
                     Intrinsic::trap);
             ctx.builder.CreateCall(trap_func);
@@ -3599,21 +3974,18 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
     }
     else {
         assert(src.isboxed && "expected boxed value for sizeof/alignment computation");
-        auto f = [&] {
+        emit_guarded_test(ctx, skip, nullptr, [&] {
             Value *datatype = emit_typeof(ctx, src, false, false);
             Value *copy_bytes = emit_datatype_size(ctx, datatype);
-            emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src, copy_bytes, /*TODO: min-align*/1, isVolatile);
+            emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
+                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, Align(1), Align(1), isVolatile);
             return nullptr;
-        };
-        if (skip)
-            emit_guarded_test(ctx, skip, nullptr, f);
-        else
-            f();
+        });
     }
 }
 
 
-static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std::string &msg)
+static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const Twine &msg)
 {
     ++EmittedCPointerChecks;
     Value *t = emit_typeof(ctx, x, false, false);
@@ -3630,13 +4002,14 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std
     just_emit_type_error(ctx, x, literal_pointer_val(ctx, (jl_value_t*)jl_pointer_type), msg);
     ctx.builder.CreateUnreachable();
 
-    ctx.f->getBasicBlockList().push_back(passBB);
+    passBB->insertInto(ctx.f);
     ctx.builder.SetInsertPoint(passBB);
 }
 
 // allocation for known size object
 // returns a prjlvalue
-static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
+static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt,
+                            bool fully_initialized, unsigned align)
 {
     ++EmittedAllocObjs;
     Value *current_task = get_current_task(ctx);
@@ -3644,19 +4017,22 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
     auto call = ctx.builder.CreateCall(F, {current_task, ConstantInt::get(ctx.types().T_size, static_size), maybe_decay_untracked(ctx, jt)});
     call->setAttributes(F->getAttributes());
     if (static_size > 0)
-        call->addRetAttr(Attribute::getWithDereferenceableBytes(ctx.builder.getContext(), static_size));
+        call->addRetAttr(Attribute::getWithDereferenceableBytes(call->getContext(), static_size));
+    call->addRetAttr(Attribute::getWithAlignment(call->getContext(), Align(align)));
+    if (fully_initialized)
+        call->addFnAttr(Attribute::get(call->getContext(), Attribute::AllocKind, uint64_t(AllocFnKind::Alloc | AllocFnKind::Uninitialized)));
     return call;
 }
 
-static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt)
+static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt, bool fully_initialized)
 {
-    return emit_allocobj(ctx, jl_datatype_size(jt), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jt), ctx.types().T_pjlvalue));
+    return emit_allocobj(ctx, jl_datatype_size(jt), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jt), ctx.types().T_pjlvalue),
+                         fully_initialized, julia_alignment((jl_value_t*)jt));
 }
 
 // allocation for unknown object from an untracked pointer
 static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval)
 {
-    pval = ctx.builder.CreateBitCast(pval, getInt8PtrTy(ctx.builder.getContext()));
     Function *F = prepare_call(jl_newbits_func);
     auto call = ctx.builder.CreateCall(F, { jt, pval });
     call->setAttributes(F->getAttributes());
@@ -3666,7 +4042,7 @@ static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval)
 // if ptr is NULL this emits a write barrier _back_
 static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr)
 {
-    emit_write_barrier(ctx, parent, makeArrayRef(ptr));
+    emit_write_barrier(ctx, parent, ArrayRef<Value*>(ptr));
 }
 
 static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*> ptrs)
@@ -3676,36 +4052,13 @@ static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*
     if (ptrs.empty())
         return;
     SmallVector<Value*, 8> decay_ptrs;
-    decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, parent, ctx.types().T_prjlvalue)));
+    decay_ptrs.push_back(maybe_decay_untracked(ctx, parent));
     for (auto ptr : ptrs) {
-        decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, ptr, ctx.types().T_prjlvalue)));
+        decay_ptrs.push_back(maybe_decay_untracked(ctx, ptr));
     }
     ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), decay_ptrs);
 }
 
-static void find_perm_offsets(jl_datatype_t *typ, SmallVector<unsigned,4> &res, unsigned offset)
-{
-    // This is a inlined field at `offset`.
-    if (!typ->layout || typ->layout->npointers == 0)
-        return;
-    jl_svec_t *types = jl_get_fieldtypes(typ);
-    size_t nf = jl_svec_len(types);
-    for (size_t i = 0; i < nf; i++) {
-        jl_value_t *_fld = jl_svecref(types, i);
-        if (!jl_is_datatype(_fld))
-            continue;
-        jl_datatype_t *fld = (jl_datatype_t*)_fld;
-        if (jl_field_isptr(typ, i)) {
-            // pointer field, check if field is perm-alloc
-            if (type_is_permalloc((jl_value_t*)fld))
-                res.push_back(offset + jl_field_offset(typ, i));
-            continue;
-        }
-        // inline field
-        find_perm_offsets(fld, res, offset + jl_field_offset(typ, i));
-    }
-}
-
 static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg,
                                     jl_value_t *jltype)
 {
@@ -3716,156 +4069,192 @@ static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg
     emit_write_barrier(ctx, parent, ptrs);
 }
 
+static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, const jl_cgval_t &x)
+{
+    auto ptrs = get_gc_roots_for(ctx, x, true);
+    emit_write_barrier(ctx, parent, ptrs);
+}
+
+static jl_cgval_t union_store(jl_codectx_t &ctx,
+        Value *ptr, Value *ptindex, jl_cgval_t rhs, jl_cgval_t cmp,
+        jl_value_t *jltype, MDNode *tbaa, MDNode *tbaa_tindex,
+        AtomicOrdering Order, AtomicOrdering FailOrder,
+        Value *needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield, bool issetfieldonce,
+        const jl_cgval_t *modifyop, const Twine &fname)
+{
+    assert(Order == AtomicOrdering::NotAtomic);
+    if (issetfieldonce)
+        return mark_julia_const(ctx, jl_false);
+    size_t fsz = 0, al = 0;
+    int union_max = jl_islayout_inline(jltype, &fsz, &al);
+    assert(union_max > 0);
+    // compute tindex from rhs
+    jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jltype);
+    if (rhs_union.typ == jl_bottom_type)
+        return jl_cgval_t();
+    if (needlock)
+        emit_lockstate_value(ctx, needlock, true);
+    BasicBlock *ModifyBB = NULL;
+    if (ismodifyfield) {
+        ModifyBB = BasicBlock::Create(ctx.builder.getContext(), "modify_xchg", ctx.f);
+        ctx.builder.CreateBr(ModifyBB);
+        ctx.builder.SetInsertPoint(ModifyBB);
+    }
+    jl_cgval_t oldval = rhs;
+    if (!issetfield)
+        oldval = emit_unionload(ctx, ptr, ptindex, jltype, fsz, al, tbaa, true, union_max, tbaa_tindex);
+    Value *Success = NULL;
+    BasicBlock *DoneBB = NULL;
+    if (isreplacefield || ismodifyfield) {
+        if (ismodifyfield) {
+            if (needlock)
+                emit_lockstate_value(ctx, needlock, false);
+            const jl_cgval_t argv[3] = { cmp, oldval, rhs };
+            if (modifyop) {
+                rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type, true);
+            }
+            else {
+                Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
+                rhs = mark_julia_type(ctx, callval, true, jl_any_type);
+            }
+            emit_typecheck(ctx, rhs, jltype, fname);
+            rhs = update_julia_type(ctx, rhs, jltype);
+            rhs_union = convert_julia_type(ctx, rhs, jltype);
+            if (rhs_union.typ == jl_bottom_type)
+                return jl_cgval_t();
+            if (needlock)
+                emit_lockstate_value(ctx, needlock, true);
+            cmp = oldval;
+            oldval = emit_unionload(ctx, ptr, ptindex, jltype, fsz, al, tbaa, true, union_max, tbaa_tindex);
+        }
+        BasicBlock *XchgBB = BasicBlock::Create(ctx.builder.getContext(), "xchg", ctx.f);
+        DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg", ctx.f);
+        Success = emit_f_is(ctx, oldval, cmp);
+        ctx.builder.CreateCondBr(Success, XchgBB, ismodifyfield ? ModifyBB : DoneBB);
+        ctx.builder.SetInsertPoint(XchgBB);
+    }
+    Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jltype);
+    tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_tindex);
+    ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
+    // copy data
+    if (!rhs.isghost) {
+        emit_unionmove(ctx, ptr, tbaa, rhs, nullptr);
+    }
+    if (isreplacefield || ismodifyfield) {
+        ctx.builder.CreateBr(DoneBB);
+        ctx.builder.SetInsertPoint(DoneBB);
+    }
+    if (needlock)
+        emit_lockstate_value(ctx, needlock, false);
+    if (isreplacefield) {
+        Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
+        jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
+        jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+    }
+    else if (ismodifyfield) {
+        jl_cgval_t argv[2] = {oldval, rhs};
+        jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+    }
+    return oldval;
+}
+
 static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         jl_datatype_t *sty, const jl_cgval_t &strct, size_t idx0,
         jl_cgval_t rhs, jl_cgval_t cmp,
         bool wb, AtomicOrdering Order, AtomicOrdering FailOrder,
-        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
-        const jl_cgval_t *modifyop, const std::string &fname)
+        Value *needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield, bool issetfieldonce,
+        const jl_cgval_t *modifyop, const Twine &fname)
 {
+    auto get_objname = [&]() {
+        return strct.V ? strct.V->getName() : StringRef("");
+    };
     ++EmittedSetfield;
     assert(strct.ispointer());
     size_t byte_offset = jl_field_offset(sty, idx0);
+    auto tbaa = best_field_tbaa(ctx, strct, sty, idx0, byte_offset);
     Value *addr = data_pointer(ctx, strct);
     if (byte_offset > 0) {
-        addr = ctx.builder.CreateInBoundsGEP(
-                getInt8Ty(ctx.builder.getContext()),
-                emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(ctx.types().T_size, byte_offset)); // TODO: use emit_struct_gep
+        addr = emit_ptrgep(ctx, addr, byte_offset);
+        setNameWithField(ctx.emission_context, addr, get_objname, sty, idx0, Twine("_ptr"));
     }
     jl_value_t *jfty = jl_field_type(sty, idx0);
-    if (!jl_field_isptr(sty, idx0) && jl_is_uniontype(jfty)) {
-        size_t fsz = 0, al = 0;
-        int union_max = jl_islayout_inline(jfty, &fsz, &al);
-        bool isptr = (union_max == 0);
-        assert(!isptr && fsz == jl_field_size(sty, idx0) - 1); (void)isptr;
-        // compute tindex from rhs
-        jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jfty);
-        if (rhs_union.typ == jl_bottom_type)
-            return jl_cgval_t();
-        Value *ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()),
-                emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(ctx.types().T_size, fsz));
-        if (needlock)
-            emit_lockstate_value(ctx, strct, true);
-        BasicBlock *ModifyBB = NULL;
-        if (ismodifyfield) {
-            ModifyBB = BasicBlock::Create(ctx.builder.getContext(), "modify_xchg", ctx.f);
-            ctx.builder.CreateBr(ModifyBB);
-            ctx.builder.SetInsertPoint(ModifyBB);
-        }
-        jl_cgval_t oldval = rhs;
-        if (!issetfield)
-            oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
-        Value *Success = NULL;
-        BasicBlock *DoneBB = NULL;
-        if (isreplacefield || ismodifyfield) {
-            if (ismodifyfield) {
-                if (needlock)
-                    emit_lockstate_value(ctx, strct, false);
-                const jl_cgval_t argv[3] = { cmp, oldval, rhs };
-                if (modifyop) {
-                    rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
-                }
-                else {
-                    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
-                    rhs = mark_julia_type(ctx, callval, true, jl_any_type);
-                }
-                emit_typecheck(ctx, rhs, jfty, fname);
-                rhs = update_julia_type(ctx, rhs, jfty);
-                rhs_union = convert_julia_type(ctx, rhs, jfty);
-                if (rhs_union.typ == jl_bottom_type)
-                    return jl_cgval_t();
-                if (needlock)
-                    emit_lockstate_value(ctx, strct, true);
-                cmp = oldval;
-                oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
-            }
-            BasicBlock *XchgBB = BasicBlock::Create(ctx.builder.getContext(), "xchg", ctx.f);
-            DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg", ctx.f);
-            Success = emit_f_is(ctx, oldval, cmp);
-            ctx.builder.CreateCondBr(Success, XchgBB, ismodifyfield ? ModifyBB : DoneBB);
-            ctx.builder.SetInsertPoint(XchgBB);
-        }
-        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
-        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
-        ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
-        // copy data
-        if (!rhs.isghost) {
-            emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr);
-        }
-        if (isreplacefield || ismodifyfield) {
-            ctx.builder.CreateBr(DoneBB);
-            ctx.builder.SetInsertPoint(DoneBB);
-        }
-        if (needlock)
-            emit_lockstate_value(ctx, strct, false);
-        if (isreplacefield) {
-            Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
-            jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
-            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty);
-            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
-        }
-        else if (ismodifyfield) {
-            jl_cgval_t argv[2] = {oldval, rhs};
-            jl_datatype_t *rettyp = jl_apply_modify_type(jfty);
-            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
-        }
-        return oldval;
-    }
-    else {
-        unsigned align = jl_field_align(sty, idx0);
-        bool isboxed = jl_field_isptr(sty, idx0);
-        size_t nfields = jl_datatype_nfields(sty);
-        bool maybe_null = idx0 >= nfields - (unsigned)sty->name->n_uninitialized;
-        return typed_store(ctx, addr, NULL, rhs, cmp, jfty, strct.tbaa, nullptr,
-            wb ? boxed(ctx, strct) : nullptr,
-            isboxed, Order, FailOrder, align,
-            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, maybe_null, modifyop, fname);
-    }
-}
-
-static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool is_promotable)
+    bool isboxed = jl_field_isptr(sty, idx0);
+    if (!isboxed && jl_is_uniontype(jfty)) {
+        size_t fsz1 = jl_field_size(sty, idx0) - 1;
+        Value *ptindex = emit_ptrgep(ctx, addr, fsz1);
+        setNameWithField(ctx.emission_context, ptindex, get_objname, sty, idx0, Twine(".tindex_ptr"));
+        return union_store(ctx, addr, ptindex, rhs, cmp, jfty, tbaa, strct.tbaa,
+            Order, FailOrder,
+            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, issetfieldonce,
+            modifyop, fname);
+    }
+    unsigned align = jl_field_align(sty, idx0);
+    bool maybe_null = field_may_be_null(strct, sty, idx0);
+    return typed_store(ctx, addr, rhs, cmp, jfty, tbaa, nullptr,
+        wb ? boxed(ctx, strct) : nullptr,
+        isboxed, Order, FailOrder, align,
+        needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, issetfieldonce,
+        maybe_null, modifyop, fname, nullptr, nullptr);
+}
+
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, ArrayRef<jl_cgval_t> argv, bool is_promotable)
 {
     ++EmittedNewStructs;
     assert(jl_is_datatype(ty));
     assert(jl_is_concrete_type(ty));
     jl_datatype_t *sty = (jl_datatype_t*)ty;
+    auto arg_typename = [&] JL_NOTSAFEPOINT {
+        return "new::" + std::string(jl_symbol_name((sty)->name->name));
+    };
     size_t nf = jl_datatype_nfields(sty);
     if (nf > 0 || sty->name->mutabl) {
         if (deserves_stack(ty)) {
             Type *lt = julia_type_to_llvm(ctx, ty);
             unsigned na = nargs < nf ? nargs : nf;
 
-            // whether we should perform the initialization with the struct as a IR value
-            // or instead initialize the stack buffer with stores
-            auto tracked = CountTrackedPointers(lt);
+            // choose whether we should perform the initialization with the struct as a IR value
+            // or instead initialize the stack buffer with stores (the later is nearly always better)
+            // although we do the former if it is a vector or could be a vector element
+            auto tracked = split_value_size(sty);
+            assert(CountTrackedPointers(lt).count == tracked.second);
             bool init_as_value = false;
-            if (lt->isVectorTy() || jl_is_vecelement_type(ty)) { // maybe also check the size ?
-                init_as_value = true;
-            }
-            else if (tracked.count) {
+            if (lt->isVectorTy() || jl_special_vector_alignment(1, ty) != 0) {
                 init_as_value = true;
             }
 
             Instruction *promotion_point = nullptr;
             ssize_t promotion_ssa = -1;
             Value *strct;
+            SmallVector<Value*,0> inline_roots;
             if (type_is_ghost(lt)) {
-                strct = NULL;
+                strct = nullptr;
             }
             else if (init_as_value) {
-                if (tracked.count)
+                if (tracked.second) {
                     strct = Constant::getNullValue(lt);
-                else
+                }
+                else {
                     strct = UndefValue::get(lt);
+                    if (nargs < nf)
+                        strct = ctx.builder.CreateFreeze(strct); // Change this to zero initialize instead?
+                }
+            }
+            else if (tracked.second) {
+                inline_roots.resize(tracked.second, Constant::getNullValue(ctx.types().T_prjlvalue));
+                strct = nullptr;
+                if (tracked.first) {
+                    AllocaInst *bits = emit_static_alloca(ctx, tracked.first, Align(julia_alignment(ty)));
+                    strct = bits;
+                    setName(ctx.emission_context, bits, arg_typename);
+                    is_promotable = false; // wrong layout for promotion
+                }
             }
             else {
-                strct = emit_static_alloca(ctx, lt);
-                setName(ctx.emission_context, strct, "newstruct");
-                if (tracked.count)
-                    undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack);
+                strct = emit_static_alloca(ctx, lt, Align(julia_alignment(ty)));
+                setName(ctx.emission_context, strct, arg_typename);
             }
 
             for (unsigned i = 0; i < na; i++) {
@@ -3877,26 +4266,33 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 fval_info = update_julia_type(ctx, fval_info, jtype);
                 if (fval_info.typ == jl_bottom_type)
                     return jl_cgval_t();
+                if (type_is_ghost(lt))
+                    continue;
+                Type *fty = julia_type_to_llvm(ctx, jtype);
+                if (type_is_ghost(fty))
+                    continue;
+                Instruction *dest = nullptr;
+                MutableArrayRef<Value*> roots;
+                ssize_t offs = jl_field_offset(sty, i);
+                ssize_t ptrsoffs = -1;
+                if (!inline_roots.empty())
+                    std::tie(offs, ptrsoffs) = split_value_field(sty, i);
+                unsigned llvm_idx = init_as_value ? ((i > 0 && isa<StructType>(lt)) ? convert_struct_offset(ctx, lt, offs) : i) : -1u;
                 // TODO: Use (post-)domination instead.
                 bool field_promotable = !jl_is_uniontype(jtype) && !init_as_value && fval_info.promotion_ssa != -1 &&
+                    fval_info.inline_roots.empty() && inline_roots.empty() && // these need to be compatible, if they were to be implemented
                     fval_info.promotion_point && fval_info.promotion_point->getParent() == ctx.builder.GetInsertBlock();
                 if (field_promotable) {
                     savedIP = ctx.builder.saveIP();
                     ctx.builder.SetInsertPoint(fval_info.promotion_point);
                 }
-                if (type_is_ghost(lt))
-                    continue;
-                Type *fty = julia_type_to_llvm(ctx, jtype);
-                if (type_is_ghost(fty))
-                    continue;
-                Value *dest = NULL;
-                unsigned offs = jl_field_offset(sty, i);
-                unsigned llvm_idx = (i > 0 && isa<StructType>(lt)) ? convert_struct_offset(ctx, lt, offs) : i;
                 if (!init_as_value) {
                     // avoid unboxing the argument explicitly
                     // and use memcpy instead
-                    Instruction *inst;
-                    dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx));
+                    Instruction *inst = strct && offs >= 0 ? cast<Instruction>(emit_ptrgep(ctx, strct, offs)) : nullptr;
+                    if (!inline_roots.empty() && ptrsoffs >= 0)
+                        roots = MutableArrayRef(inline_roots).slice(ptrsoffs, jl_field_isptr(sty, i) ? 1 : ((jl_datatype_t*)jtype)->layout->npointers);
+                    dest = inst;
                     // Our promotion point needs to come before
                     //  A) All of our arguments' promotion points
                     //  B) Any instructions we insert at any of our arguments' promotion points
@@ -3916,10 +4312,13 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 if (jl_field_isptr(sty, i)) {
                     fval = boxed(ctx, fval_info, field_promotable);
                     if (!init_as_value) {
-                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
-                        StoreInst *SI = cast<StoreInst>(ai.decorateInst(
-                                ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i)))));
-                        SI->setOrdering(AtomicOrdering::Unordered);
+                        if (dest) {
+                            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                            ai.decorateInst(ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i))));
+                        }
+                        else {
+                            roots[0] = fval;
+                        }
                     }
                 }
                 else if (jl_is_uniontype(jtype)) {
@@ -3931,47 +4330,46 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
                     size_t fsz = 0, al = 0;
                     bool isptr = !jl_islayout_inline(jtype, &fsz, &al);
-                    assert(!isptr && fsz == jl_field_size(sty, i) - 1); (void)isptr;
+                    assert(!isptr && fsz < jl_field_size(sty, i)); (void)isptr;
+                    size_t fsz1 = jl_field_size(sty, i) - 1;
                     if (init_as_value) {
                         // If you wanted to implement init_as_value,
                         // would need to emit the union-move into temporary memory,
                         // then load it and combine with the tindex.
                         // But more efficient to just store it directly.
-                        unsigned ptindex = convert_struct_offset(ctx, lt, offs + fsz);
-                        if (fsz > 0 && !fval_info.isghost) {
+                        unsigned ptindex = convert_struct_offset(ctx, lt, offs + fsz1);
+                        if (fsz1 > 0 && !fval_info.isghost) {
                             Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al);
                             assert(lt->getStructElementType(llvm_idx) == ET);
-                            AllocaInst *lv = emit_static_alloca(ctx, ET);
+                            AllocaInst *lv = emit_static_alloca(ctx, fsz1, Align(al));
                             setName(ctx.emission_context, lv, "unioninit");
-                            lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + al - 1) / al));
                             emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr);
                             // emit all of the align-sized words
                             unsigned i = 0;
-                            for (; i < fsz / al; i++) {
-                                Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                            for (; i < fsz1 / al; i++) {
+                                Value *fldp = emit_ptrgep(ctx, lv, i * al);
                                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
                                 Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(ET, fldp, Align(al)));
-                                strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i));
+                                strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
                             }
                             // emit remaining bytes up to tindex
                             if (i < ptindex - llvm_idx) {
-                                Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                                staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
+                                Value *staddr = emit_ptrgep(ctx, lv, i * al);
                                 for (; i < ptindex - llvm_idx; i++) {
-                                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
+                                    Value *fldp = emit_ptrgep(ctx, staddr, i);
                                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
                                     Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), fldp, Align(1)));
-                                    strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i));
+                                    strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
                                 }
                             }
                         }
                         llvm_idx = ptindex;
                         fval = tindex;
                         if (jl_is_vecelement_type(ty))
-                            fval = ctx.builder.CreateInsertValue(strct, fval, makeArrayRef(llvm_idx));
+                            fval = ctx.builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(llvm_idx));
                     }
                     else {
-                        Value *ptindex = emit_struct_gep(ctx, lt, strct, offs + fsz);
+                        Value *ptindex = emit_ptrgep(ctx, strct, offs + fsz1);
                         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
                         ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
                         if (!rhs_union.isghost)
@@ -3979,13 +4377,20 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     }
                 }
                 else {
+                    Align align_dst(jl_field_align(sty, i));
+                    Align align_src(julia_alignment(jtype));
                     if (field_promotable) {
                         fval_info.V->replaceAllUsesWith(dest);
                         cast<Instruction>(fval_info.V)->eraseFromParent();
-                    } else if (init_as_value) {
+                    }
+                    else if (init_as_value) {
                         fval = emit_unbox(ctx, fty, fval_info, jtype);
-                    } else {
-                        emit_unbox_store(ctx, fval_info, dest, ctx.tbaa().tbaa_stack, jl_field_align(sty, i));
+                    }
+                    else if (!roots.empty()) {
+                        split_value_into(ctx, fval_info, align_src, dest, align_dst, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), roots);
+                    }
+                    else {
+                        emit_unbox_store(ctx, fval_info, dest, ctx.tbaa().tbaa_stack, align_src, align_dst);
                     }
                 }
                 if (init_as_value) {
@@ -3995,7 +4400,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     else if (lt->isVectorTy())
                         strct = ctx.builder.CreateInsertElement(strct, fval, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), llvm_idx));
                     else if (lt->isAggregateType())
-                        strct = ctx.builder.CreateInsertValue(strct, fval, makeArrayRef(llvm_idx));
+                        strct = ctx.builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(llvm_idx));
                     else
                         assert(false);
                 }
@@ -4003,28 +4408,38 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     ctx.builder.restoreIP(savedIP);
                 }
             }
-            for (size_t i = nargs; i < nf; i++) {
-                if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
-                    unsigned offs = jl_field_offset(sty, i);
-                    int fsz = jl_field_size(sty, i) - 1;
-                    unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
-                    if (init_as_value)
-                        strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), makeArrayRef(llvm_idx));
-                    else {
-                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
-                        ai.decorateInst(ctx.builder.CreateAlignedStore(
-                                ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
-                                ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx),
-                                Align(1)));
+            if (init_as_value) {
+                for (size_t i = nargs; i < nf; i++) {
+                    if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
+                        ssize_t offs = jl_field_offset(sty, i);
+                        ssize_t ptrsoffs = -1;
+                        if (!inline_roots.empty())
+                            std::tie(offs, ptrsoffs) = split_value_field(sty, i);
+                        assert(ptrsoffs < 0 && offs >= 0);
+                        int fsz = jl_field_size(sty, i) - 1;
+                        unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
+                        strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ArrayRef<unsigned>(llvm_idx));
                     }
                 }
             }
+            if (nargs < nf) {
+                assert(!init_as_value);
+                IRBuilderBase::InsertPoint savedIP = ctx.builder.saveIP();
+                if (promotion_point)
+                    ctx.builder.SetInsertPoint(promotion_point);
+                if (strct) {
+                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                    promotion_point = ai.decorateInst(ctx.builder.CreateMemSet(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
+                                                                jl_datatype_size(ty), Align(julia_alignment(ty))));
+                }
+                ctx.builder.restoreIP(savedIP);
+            }
             if (type_is_ghost(lt))
                 return mark_julia_const(ctx, sty->instance);
             else if (init_as_value)
                 return mark_julia_type(ctx, strct, false, ty);
             else {
-                jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa().tbaa_stack);
+                jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa().tbaa_stack, inline_roots);
                 if (is_promotable && promotion_point) {
                     ret.promotion_point = promotion_point;
                     ret.promotion_ssa = promotion_ssa;
@@ -4032,19 +4447,19 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 return ret;
             }
         }
-        Value *strct = emit_allocobj(ctx, sty);
-        setName(ctx.emission_context, strct, "newstruct");
+        Value *strct = emit_allocobj(ctx, sty, nargs >= nf);
+        setName(ctx.emission_context, strct, arg_typename);
         jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty);
         strct = decay_derived(ctx, strct);
         undef_derived_strct(ctx, strct, sty, strctinfo.tbaa);
         for (size_t i = nargs; i < nf; i++) {
             if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
-                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
-                ai.decorateInst(ctx.builder.CreateAlignedStore(
+                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, strctinfo.tbaa);
+                auto *store = ctx.builder.CreateAlignedStore(
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
-                        ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, strct, getInt8PtrTy(ctx.builder.getContext())),
-                                ConstantInt::get(ctx.types().T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
-                        Align(1)));
+                        emit_ptrgep(ctx, strct, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1),
+                        Align(1));
+                ai.decorateInst(store);
             }
         }
         // TODO: verify that nargs <= nf (currently handled by front-end)
@@ -4060,18 +4475,19 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
             rhs = update_julia_type(ctx, rhs, ft);
             if (rhs.typ == jl_bottom_type)
                 return jl_cgval_t();
-            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false, false, false, nullptr, "");
+            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, nullptr, true, false, false, false, false, nullptr, "new");
         }
         return strctinfo;
     }
     else {
-        // 0 fields, ghost or bitstype
+        // 0 fields, ghost or primitive type
         if (jl_datatype_nbits(sty) == 0)
             return ghostValue(ctx, sty);
+        // n.b. this is not valid IR form to construct a primitive type (use bitcast for example)
         bool isboxed;
         Type *lt = julia_type_to_llvm(ctx, ty, &isboxed);
         assert(!isboxed);
-        return mark_julia_type(ctx, UndefValue::get(lt), false, ty);
+        return mark_julia_type(ctx, ctx.builder.CreateFreeze(UndefValue::get(lt)), false, ty);
     }
 }
 
@@ -4083,11 +4499,8 @@ static void emit_signal_fence(jl_codectx_t &ctx)
 static Value *emit_defer_signal(jl_codectx_t &ctx)
 {
     ++EmittedDeferSignal;
-    Value *ptls = emit_bitcast(ctx, get_current_ptls(ctx),
-                               PointerType::get(ctx.types().T_sigatomic, 0));
-    Constant *offset = ConstantInt::getSigned(getInt32Ty(ctx.builder.getContext()),
-            offsetof(jl_tls_states_t, defer_signal) / sizeof(sig_atomic_t));
-    return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
+    Value *ptls = get_current_ptls(ctx);
+    return emit_ptrgep(ctx, ptls, offsetof(jl_tls_states_t, defer_signal));
 }
 
 #ifndef JL_NDEBUG
@@ -4099,10 +4512,462 @@ static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
            (a->prefer_specsig == b->prefer_specsig) &&
            (a->gnu_pubnames == b->gnu_pubnames) &&
            (a->debug_info_kind == b->debug_info_kind) &&
-           (a->lookup == b->lookup) &&
-           (a->generic_context == b->generic_context);
+           (a->safepoint_on_entry == b->safepoint_on_entry) &&
+           (a->gcstack_arg == b->gcstack_arg) &&
+           (a->use_jlplt == b->use_jlplt) &&
+           (a->force_emit_all == b->force_emit_all);
+}
+#endif
+
+static auto *emit_genericmemory_unchecked(jl_codectx_t &ctx, Value *cg_nbytes, Value *cg_typ)
+{
+    auto ptls = get_current_ptls(ctx);
+    auto call = prepare_call(jl_alloc_genericmemory_unchecked_func);
+    auto *alloc = ctx.builder.CreateCall(call, { ptls, cg_nbytes, cg_typ});
+    alloc->setAttributes(call->getAttributes());
+    alloc->addRetAttr(Attribute::getWithAlignment(alloc->getContext(), Align(JL_HEAP_ALIGNMENT)));
+    call->addRetAttr(Attribute::getWithDereferenceableBytes(call->getContext(), sizeof(jl_genericmemory_t)));
+    return alloc;
+}
+
+static void emit_memory_zeroinit_and_stores(jl_codectx_t &ctx, jl_datatype_t *typ, Value* alloc, Value* nbytes, Value* nel, int zi)
+{
+    auto arg_typename = [&] JL_NOTSAFEPOINT {
+        std::string type_str;
+        auto eltype = jl_tparam1(typ);
+        if (jl_is_datatype(eltype))
+            type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name);
+        else if (jl_is_uniontype(eltype))
+            type_str = "Union";
+        else
+            type_str = "<unknown type>";
+        return "Memory{" + type_str + "}[]";
+    };
+    setName(ctx.emission_context, alloc, arg_typename);
+    // set length (jl_alloc_genericmemory_unchecked_func doesn't have it)
+    Value *decay_alloc = decay_derived(ctx, alloc);
+    Value *len_field = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 0);
+    auto len_store = ctx.builder.CreateAlignedStore(nel, len_field, Align(sizeof(void*)));
+    auto aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen);
+    aliasinfo.decorateInst(len_store);
+    // zeroinit pointers and unions
+    if (zi) {
+        Value *memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1);
+        auto *load = ctx.builder.CreateAlignedLoad(ctx.types().T_ptr, memory_ptr, Align(sizeof(void*)));
+        aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr);
+        aliasinfo.decorateInst(load);
+        auto int8t = getInt8Ty(ctx.builder.getContext());
+        ctx.builder.CreateMemSet(load, ConstantInt::get(int8t, 0), nbytes, Align(sizeof(void*)));
+    }
+    return;
+}
+
+
+static jl_cgval_t emit_const_len_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, size_t nel, jl_genericmemory_t *inst)
+{
+    if (nel == 0) {
+        Value *empty_alloc = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)inst));
+        return mark_julia_type(ctx, empty_alloc, true, typ);
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)typ)->layout;
+    assert(((jl_datatype_t*)typ)->has_concrete_subtype && layout != NULL);
+    size_t elsz = layout->size;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    int zi = ((jl_datatype_t*)typ)->zeroinit;
+    if (isboxed)
+        elsz = sizeof(void*);
+    size_t nbytes;
+    bool overflow = __builtin_mul_overflow(nel, elsz, &nbytes);
+    if (isunion) {
+        // an extra byte for each isbits union memory element, stored at m->ptr + m->length
+        overflow |= __builtin_add_overflow(nbytes, nel, &nbytes);
+    }
+    // overflow if signed size is too big or nel is too big (the latter matters iff elsz==0)
+    ssize_t tmp=1;
+    overflow |= __builtin_add_overflow(nel, 1, &tmp) || __builtin_add_overflow(nbytes, 1, &tmp);
+    if (overflow)
+        emit_error(ctx, prepare_call(jlargumenterror_func), "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
+
+    auto T_size = ctx.types().T_size;
+    auto cg_typ = literal_pointer_val(ctx, (jl_value_t*) typ);
+    auto cg_nbytes = ConstantInt::get(T_size, nbytes);
+    auto cg_nel = ConstantInt::get(T_size, nel);
+    size_t tot = nbytes + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT);
+    // if allocation fits within GC pools
+    int pooled = tot <= GC_MAX_SZCLASS;
+    Value *alloc, *decay_alloc, *memory_ptr;
+    jl_aliasinfo_t aliasinfo;
+    if (pooled) {
+        alloc = emit_allocobj(ctx, tot, cg_typ, false, JL_SMALL_BYTE_ALIGNMENT);
+        decay_alloc = decay_derived(ctx, alloc);
+        memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1);
+        setName(ctx.emission_context, memory_ptr, "memory_ptr");
+        auto objref = emit_pointer_from_objref(ctx, alloc);
+        Value *memory_data = emit_ptrgep(ctx, objref, JL_SMALL_BYTE_ALIGNMENT);
+        auto *store = ctx.builder.CreateAlignedStore(memory_data, memory_ptr, Align(sizeof(void*)));
+        aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr);
+        aliasinfo.decorateInst(store);
+        setName(ctx.emission_context, memory_data, "memory_data");
+    } else { // just use the dynamic length version since the malloc will be slow anyway
+        alloc = emit_genericmemory_unchecked(ctx, cg_nbytes, cg_typ);
+    }
+    emit_memory_zeroinit_and_stores(ctx, typ, alloc, cg_nbytes, cg_nel, zi);
+    return mark_julia_type(ctx, alloc, true, typ);
+}
+
+static jl_cgval_t emit_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, jl_cgval_t nel, jl_genericmemory_t *inst)
+{
+    emit_typecheck(ctx, nel, (jl_value_t*)jl_long_type, "memorynew");
+    nel = update_julia_type(ctx, nel, (jl_value_t*)jl_long_type);
+    if (nel.typ == jl_bottom_type)
+        return jl_cgval_t();
+
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)typ)->layout;
+    assert(((jl_datatype_t*)typ)->has_concrete_subtype && layout != NULL);
+    size_t elsz = layout->size;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    int zi = ((jl_datatype_t*)typ)->zeroinit;
+    if (isboxed)
+        elsz = sizeof(void*);
+
+    auto T_size = ctx.types().T_size;
+    BasicBlock *emptymemBB, *nonemptymemBB, *retvalBB;
+    emptymemBB = BasicBlock::Create(ctx.builder.getContext(), "emptymem");
+    nonemptymemBB = BasicBlock::Create(ctx.builder.getContext(), "nonemptymem");
+    retvalBB = BasicBlock::Create(ctx.builder.getContext(), "retval");
+    auto nel_unboxed = emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_long_type);
+    Value *memorynew_empty = ctx.builder.CreateICmpEQ(nel_unboxed, ConstantInt::get(T_size, 0));
+    setName(ctx.emission_context, memorynew_empty, "memorynew_empty");
+    ctx.builder.CreateCondBr(memorynew_empty, emptymemBB, nonemptymemBB);
+    // if nel == 0
+    emptymemBB->insertInto(ctx.f);
+    ctx.builder.SetInsertPoint(emptymemBB);
+    auto emptyalloc = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)inst));
+    ctx.builder.CreateBr(retvalBB);
+    nonemptymemBB->insertInto(ctx.f);
+    ctx.builder.SetInsertPoint(nonemptymemBB);
+
+    auto cg_typ = literal_pointer_val(ctx, (jl_value_t*) typ);
+    auto cg_elsz = ConstantInt::get(T_size, elsz);
+
+#if JL_LLVM_VERSION >= 200000
+    FunctionCallee intr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::smul_with_overflow, ArrayRef<Type*>(T_size));
+#else
+    FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, Intrinsic::smul_with_overflow, ArrayRef<Type*>(T_size));
+#endif
+    // compute nbytes with possible overflow
+    Value *prod_with_overflow = ctx.builder.CreateCall(intr, {nel_unboxed, cg_elsz});
+    Value *nbytes = ctx.builder.CreateExtractValue(prod_with_overflow, 0);
+    Value *overflow = ctx.builder.CreateExtractValue(prod_with_overflow, 1);
+    if (isunion) {
+        // if isunion, we need to allocate the union selector bytes as well
+#if JL_LLVM_VERSION >= 200000
+        intr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::sadd_with_overflow, ArrayRef<Type*>(T_size));
+#else
+        intr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sadd_with_overflow, ArrayRef<Type*>(T_size));
+#endif
+        Value *add_with_overflow = ctx.builder.CreateCall(intr, {nel_unboxed, nbytes});
+        nbytes = ctx.builder.CreateExtractValue(add_with_overflow, 0);
+        Value *overflow1 = ctx.builder.CreateExtractValue(add_with_overflow, 1);
+        overflow = ctx.builder.CreateOr(overflow, overflow1);
+    }
+    Value *negnel = ctx.builder.CreateICmpSLT(nel_unboxed, ConstantInt::get(T_size, 0));
+    overflow = ctx.builder.CreateOr(overflow, negnel);
+    auto cg_typemax_int = ConstantInt::get(T_size, (((size_t)-1)>>1)-1);
+    Value *tobignel = ctx.builder.CreateICmpSLT(cg_typemax_int, elsz == 0 ? nel_unboxed: nbytes);
+    overflow = ctx.builder.CreateOr(overflow, tobignel);
+    Value *notoverflow = ctx.builder.CreateNot(overflow);
+    error_unless(ctx, prepare_call(jlargumenterror_func), notoverflow, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
+    // actually allocate the memory
+
+    Value *alloc = emit_genericmemory_unchecked(ctx, nbytes, cg_typ);
+    emit_memory_zeroinit_and_stores(ctx, typ, alloc, nbytes, nel_unboxed, zi);
+    ctx.builder.CreateBr(retvalBB);
+    nonemptymemBB = ctx.builder.GetInsertBlock();
+    // phi node to choose which side of branch
+    retvalBB->insertInto(ctx.f);
+    ctx.builder.SetInsertPoint(retvalBB);
+    auto phi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
+    phi->addIncoming(emptyalloc, emptymemBB);
+    phi->addIncoming(alloc, nonemptymemBB);
+    return mark_julia_type(ctx, phi, true, typ);
+}
+
+static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, Value *mem, Value *data, const jl_datatype_layout_t *layout, jl_value_t *typ)
+{
+    //jl_cgval_t argv[] = {
+    //    mark_julia_type(ctx, mem, true, jl_any_type),
+    //    mark_julia_type(ctx, data, false, jl_voidpointer_type)
+    //};
+    //return emit_new_struct(ctx, typ, 3, argv);
+    Value *ref = Constant::getNullValue(get_memoryref_type(ctx.builder.getContext(), ctx.types().T_size, layout, 0));
+    ref = ctx.builder.CreateInsertValue(ref, data, 0);
+    ref = ctx.builder.CreateInsertValue(ref, mem, 1);
+    setName(ctx.emission_context, ref, "memory_ref");
+    return mark_julia_type(ctx, ref, false, typ);
+}
+
+static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &mem, const jl_datatype_layout_t *layout, jl_value_t *typ)
+{
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    Value *data = (!isboxed && isunion) || isghost ? ConstantInt::get(ctx.types().T_size, 0) : emit_genericmemoryptr(ctx, boxed(ctx, mem), layout, 0);
+    return _emit_memoryref(ctx, boxed(ctx, mem), data, layout, typ);
+}
+
+static jl_cgval_t emit_memoryref_direct(jl_codectx_t &ctx, const jl_cgval_t &mem, jl_cgval_t idx, jl_value_t *typ, jl_value_t *inbounds, const jl_datatype_layout_t *layout)
+{
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    Value *boxmem = boxed(ctx, mem);
+    Value *i = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
+    Value *idx0 = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
+    bool bc = bounds_check_enabled(ctx, inbounds);
+    if (bc) {
+        BasicBlock *failBB, *endBB;
+        failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+        endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
+        Value *mlen = emit_genericmemorylen(ctx, boxmem, typ);
+        Value *inbound = ctx.builder.CreateICmpULT(idx0, mlen);
+        setName(ctx.emission_context, inbound, "memoryref_isinbounds");
+        ctx.builder.CreateCondBr(inbound, endBB, failBB);
+        failBB->insertInto(ctx.f);
+        ctx.builder.SetInsertPoint(failBB);
+        ctx.builder.CreateCall(prepare_call(jlboundserror_func),
+            { mark_callee_rooted(ctx, boxmem), i });
+        ctx.builder.CreateUnreachable();
+        endBB->insertInto(ctx.f);
+        ctx.builder.SetInsertPoint(endBB);
+    }
+    Value *data;
+
+    if ((!isboxed && isunion) || isghost) {
+        data = idx0;
+
+    } else {
+        data = emit_genericmemoryptr(ctx, boxmem, layout, 0);
+        idx0 = ctx.builder.CreateMul(idx0, emit_genericmemoryelsize(ctx, boxmem, mem.typ, false), "", true, true);
+        data = ctx.builder.CreatePtrAdd(data, idx0);
+    }
+
+    return _emit_memoryref(ctx, boxmem, data, layout, typ);
+}
+
+static Value *emit_memoryref_FCA(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    if (!ref.inline_roots.empty()) {
+        LLVMContext &C = ctx.builder.getContext();
+        StructType *type = get_memoryref_type(C, ctx.types().T_size, layout, 0);
+        LoadInst *load0 = ctx.builder.CreateLoad(type->getElementType(0), ref.V);
+        jl_aliasinfo_t ai0 = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa);
+        ai0.decorateInst(load0);
+        setName(ctx.emission_context, load0, "memory_ref_FCA0");
+        Value *root = ctx.builder.CreateBitCast(ref.inline_roots[0], type->getElementType(1));
+        Value *load = Constant::getNullValue(type);
+        load = ctx.builder.CreateInsertValue(load, load0, 0);
+        load = ctx.builder.CreateInsertValue(load, root, 1);
+        return load;
+    }
+    else if (ref.ispointer()) {
+        LLVMContext &C = ctx.builder.getContext();
+        Type *type = get_memoryref_type(C, ctx.types().T_size, layout, 0);
+        LoadInst *load = ctx.builder.CreateLoad(type, data_pointer(ctx, ref));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa);
+        ai.decorateInst(load);
+        setName(ctx.emission_context, load, "memory_ref_FCA");
+        return load;
+    }
+    else {
+        return ref.V;
+    }
 }
+
+static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cgval_t idx, jl_value_t *inbounds, const jl_datatype_layout_t *layout)
+{
+    ++EmittedArrayNdIndex;
+    emit_typecheck(ctx, idx, (jl_value_t*)jl_long_type, "memoryrefnew");
+    idx = update_julia_type(ctx, idx, (jl_value_t*)jl_long_type);
+    if (idx.typ == jl_bottom_type)
+        return jl_cgval_t();
+    Value *V = emit_memoryref_FCA(ctx, ref, layout);
+    Value *data = CreateSimplifiedExtractValue(ctx, V, 0);
+    maybeSetName(ctx.emission_context, data, "memoryref_data");
+    Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+    maybeSetName(ctx.emission_context, mem, "memoryref_mem");
+    Value *i = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
+    Value *offset = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
+    setName(ctx.emission_context, offset, "memoryref_offset");
+    Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
+    bool bc = bounds_check_enabled(ctx, inbounds);
+#if 1
+    Value *ovflw = nullptr;
 #endif
+    Value *newdata;
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    if ((!isboxed && isunion) || isghost) {
+        newdata = ctx.builder.CreateAdd(data, offset);
+        setName(ctx.emission_context, newdata, "memoryref_data+offset");
+        if (bc) {
+            BasicBlock *failBB, *endBB;
+            failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+            endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            Value *inbound = ctx.builder.CreateICmpULT(newdata, mlen);
+            setName(ctx.emission_context, offset, "memoryref_isinbounds");
+            ctx.builder.CreateCondBr(inbound, endBB, failBB);
+            failBB->insertInto(ctx.f);
+            ctx.builder.SetInsertPoint(failBB);
+            ctx.builder.CreateCall(prepare_call(jlboundserror_func),
+                { mark_callee_rooted(ctx, boxed(ctx, ref)), i });
+            ctx.builder.CreateUnreachable();
+            endBB->insertInto(ctx.f);
+            ctx.builder.SetInsertPoint(endBB);
+        }
+    }
+    else {
+        Value *boffset;
+#if 0
+        if (bc) {
+#if JL_LLVM_VERSION >= 200000
+            auto *MulF = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::smul_with_overflow, offset->getType());
+#else
+            auto *MulF = Intrinsic::getDeclaration(jl_Module, Intrinsic::smul_with_overflow, offset->getType());
+#endif
+            CallInst *Mul = ctx.builder.CreateCall(MulF, {offset, elsz});
+            boffset = ctx.builder.CreateExtractValue(Mul, 0);
+            ovflw = ctx.builder.CreateExtractValue(Mul, 1);
+        }
+        else
+#else
+        if (bc) {
+            // n.b. we could boundscheck that -len<=offset<=len instead of using smul.ovflw,
+            // since we know that len*elsz does not overflow,
+            // and we can further rearrange that as ovflw = !( offset+len < len+len ) as unsigned math
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            ovflw = ctx.builder.CreateICmpUGE(ctx.builder.CreateAdd(offset, mlen), ctx.builder.CreateNUWAdd(mlen, mlen));
+            setName(ctx.emission_context, ovflw, "memoryref_ovflw");
+        }
+#endif
+        boffset = ctx.builder.CreateMul(offset, elsz);
+        setName(ctx.emission_context, boffset, "memoryref_byteoffset");
+        newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), data, boffset);
+        setName(ctx.emission_context, newdata, "memoryref_data_byteoffset");
+        (void)boffset; // LLVM is very bad at handling GEP with types different from the load
+        if (bc) {
+            BasicBlock *failBB, *endBB;
+            failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+            endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            Value *mptr = emit_genericmemoryptr(ctx, mem, layout, 0);
+#if 0
+            Value *mend = mptr;
+            Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
+            mend = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), mptr, blen);
+            Value *inbound = ctx.builder.CreateAnd(
+                    ctx.builder.CreateICmpULE(mptr, newdata),
+                    ctx.builder.CreateICmpULT(newdata, mend));
+            inbound = ctx.builder.CreateAnd(
+                    ctx.builder.CreateNot(ovflw),
+                    inbound);
+#elif 1
+            Value *bidx0 = ctx.builder.CreateSub(
+                ctx.builder.CreatePtrToInt(newdata, ctx.types().T_size),
+                ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+            Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
+            setName(ctx.emission_context, blen, "memoryref_bytelen");
+            Value *inbound = ctx.builder.CreateICmpULT(bidx0, blen);
+            setName(ctx.emission_context, inbound, "memoryref_isinbounds");
+            inbound = ctx.builder.CreateAnd(ctx.builder.CreateNot(ovflw), inbound);
+            setName(ctx.emission_context, inbound, "memoryref_isinbounds&notovflw");
+#else
+            Value *idx0; // (newdata - mptr) / elsz
+            idx0 = ctx.builder.CreateSub(
+                ctx.builder.CreatePtrToInt(newdata, ctx.types().T_size),
+                ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+            idx0 = ctx.builder.CreateExactUDiv(idx0, elsz);
+            Value *inbound = ctx.builder.CreateICmpULT(idx0, mlen);
+#endif
+            ctx.builder.CreateCondBr(inbound, endBB, failBB);
+            failBB->insertInto(ctx.f);
+            ctx.builder.SetInsertPoint(failBB);
+            ctx.builder.CreateCall(prepare_call(jlboundserror_func),
+                { mark_callee_rooted(ctx, boxed(ctx, ref)), i });
+            ctx.builder.CreateUnreachable();
+            endBB->insertInto(ctx.f);
+            ctx.builder.SetInsertPoint(endBB);
+        }
+    }
+    return _emit_memoryref(ctx, mem, newdata, layout, ref.typ);
+}
+
+static jl_cgval_t emit_memoryref_offset(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    Value *offset;
+    Value *V = emit_memoryref_FCA(ctx, ref, layout);
+    Value *data = CreateSimplifiedExtractValue(ctx, V, 0);
+    if (layout->flags.arrayelem_isunion || layout->size == 0) {
+        offset = data;
+    }
+    else {
+        Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+        Value *mptr = emit_genericmemoryptr(ctx, mem, layout, 0);
+        // (data - mptr) / elsz
+        offset = ctx.builder.CreateSub(
+            ctx.builder.CreatePtrToInt(data, ctx.types().T_size),
+            ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+        setName(ctx.emission_context, offset, "memoryref_offset");
+        Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
+        offset = ctx.builder.CreateExactUDiv(offset, elsz);
+        setName(ctx.emission_context, offset, "memoryref_offsetidx");
+    }
+    offset = ctx.builder.CreateAdd(offset, ConstantInt::get(ctx.types().T_size, 1));
+    return mark_julia_type(ctx, offset, false, jl_long_type);
+}
+
+static Value *emit_memoryref_mem(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    Value *V = emit_memoryref_FCA(ctx, ref, layout);
+    V = CreateSimplifiedExtractValue(ctx, V, 1);
+    maybeSetName(ctx.emission_context, V, "memoryref_mem");
+    return V;
+}
+
+static Value *emit_memoryref_ptr(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    assert(!layout->flags.arrayelem_isunion && layout->size != 0);
+    Value *newref = emit_memoryref_FCA(ctx, ref, layout);
+    Value *data = CreateSimplifiedExtractValue(ctx, newref, 0);
+    unsigned AS = AddressSpace::Loaded;
+    Value *mem = CreateSimplifiedExtractValue(ctx, newref, 1);
+    // rebuild GEP on data, so that we manually hoist this gc_loaded_func call over it, back to the original load
+    // we should add this to llvm-julia-licm too, so we can attempt hoisting over PhiNodes too (which aren't defined yet here)
+    IRBuilder<>::InsertPointGuard resetIP(ctx.builder);
+    SmallVector<GetElementPtrInst*,0> GEPlist;
+    data = data->stripPointerCastsSameRepresentation();
+    while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(data)) { // ignoring bitcast will not be required with opaque pointers
+        GEPlist.push_back(GEP);
+        data = GEP->getPointerOperand()->stripPointerCastsSameRepresentation();
+    }
+    data = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, data });
+    if (!GEPlist.empty()) {
+        for (auto &GEP : make_range(GEPlist.rbegin(), GEPlist.rend())) {
+            GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GEP->clone());
+            GEP2->mutateType(PointerType::get(GEP->getContext(), AS));
+            GEP2->setOperand(GetElementPtrInst::getPointerOperandIndex(), data);
+            GEP2->setIsInBounds(true);
+            ctx.builder.Insert(GEP2);
+            data = GEP2;
+        }
+    }
+    setName(ctx.emission_context, data, "memoryref_data");
+    return data;
+}
 
 // Reset us back to codegen debug type
 #undef DEBUG_TYPE
diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp
index 086d925802f63..09a034a9549d8 100644
--- a/src/clangsa/GCChecker.cpp
+++ b/src/clangsa/GCChecker.cpp
@@ -1,5 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include "clang/AST/Type.h"
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/StaticAnalyzer/Checkers/SValExplainer.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
@@ -14,6 +15,7 @@
 #include "clang/Tooling/Tooling.h"
 #include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h"
 
+#include "llvm/Support/Debug.h"
 #include <iostream>
 #include <memory>
 
@@ -29,7 +31,7 @@ namespace {
 using namespace clang;
 using namespace ento;
 
-#define PDP std::shared_ptr<PathDiagnosticPiece>
+typedef std::shared_ptr<PathDiagnosticPiece> PDP;
 #define MakePDP make_unique<PathDiagnosticEventPiece>
 
 static const Stmt *getStmtForDiagnostics(const ExplodedNode *N)
@@ -199,7 +201,7 @@ class GCChecker
   static bool isGCTracked(const Expr *E);
   bool isGloballyRootedType(QualType Type) const;
   static void dumpState(const ProgramStateRef &State);
-  static bool declHasAnnotation(const clang::Decl *D, const char *which);
+  static const AnnotateAttr *declHasAnnotation(const clang::Decl *D, const char *which);
   static bool isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const SourceManager &SM);
   static const SourceManager &getSM(CheckerContext &C) { return C.getSourceManager(); }
   bool isSafepoint(const CallEvent &Call, CheckerContext &C) const;
@@ -251,6 +253,18 @@ class GCChecker
     PDP VisitNode(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) override;
   };
 
+  class SafepointBugVisitor : public BugReporterVisitor {
+  public:
+    SafepointBugVisitor() {}
+
+    void Profile(llvm::FoldingSetNodeID &ID) const override {
+      static int X = 0;
+      ID.AddPointer(&X);
+    }
+
+    PDP VisitNode(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) override;
+  };
+
   class GCValueBugVisitor : public BugReporterVisitor {
   protected:
     SymbolRef Sym;
@@ -364,6 +378,38 @@ PDP GCChecker::GCBugVisitor::VisitNode(const ExplodedNode *N,
   return nullptr;
 }
 
+PDP GCChecker::SafepointBugVisitor::VisitNode(const ExplodedNode *N,
+                                       BugReporterContext &BRC, PathSensitiveBugReport &BR) {
+  const ExplodedNode *PrevN = N->getFirstPred();
+  unsigned NewSafepointDisabled = N->getState()->get<SafepointDisabledAt>();
+  unsigned OldSafepointDisabled = PrevN->getState()->get<SafepointDisabledAt>();
+  if (NewSafepointDisabled != OldSafepointDisabled) {
+    const Decl *D = &N->getCodeDecl();
+    const AnnotateAttr *Ann = declHasAnnotation(D, "julia_not_safepoint");
+    PathDiagnosticLocation Pos;
+    if (OldSafepointDisabled == (unsigned)-1) {
+      if (Ann) {
+        Pos = PathDiagnosticLocation{Ann->getLoc(), BRC.getSourceManager()};
+        return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here.");
+      } else {
+        PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin(
+            N->getLocationContext(), BRC.getSourceManager());
+        if (Pos.isValid())
+          return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here.");
+        //N->getLocation().dump();
+      }
+    } else if (NewSafepointDisabled == (unsigned)-1) {
+      PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin(
+          N->getLocationContext(), BRC.getSourceManager());
+      if (Pos.isValid())
+        return MakePDP(Pos, "Safepoints re-enabled here");
+      //N->getLocation().dump();
+    }
+    // n.b. there may be no position here to report if they were disabled by julia_notsafepoint_enter/leave
+  }
+  return nullptr;
+}
+
 PDP GCChecker::GCValueBugVisitor::ExplainNoPropagationFromExpr(
     const clang::Expr *FromWhere, const ExplodedNode *N,
     PathDiagnosticLocation Pos, BugReporterContext &BRC, PathSensitiveBugReport &BR) {
@@ -712,12 +758,12 @@ void GCChecker::checkEndFunction(const clang::ReturnStmt *RS,
   }
 }
 
-bool GCChecker::declHasAnnotation(const clang::Decl *D, const char *which) {
+const AnnotateAttr *GCChecker::declHasAnnotation(const clang::Decl *D, const char *which) {
   for (const auto *Ann : D->specific_attrs<AnnotateAttr>()) {
     if (Ann->getAnnotation() == which)
-      return true;
+      return Ann;
   }
-  return false;
+  return nullptr;
 }
 
 bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const SourceManager &SM) {
@@ -726,82 +772,92 @@ bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const S
   SourceLocation Loc = FD->getLocation();
   StringRef Name = SM.getFilename(Loc);
   Name = llvm::sys::path::filename(Name);
-  if (Name.startswith("llvm-"))
+  if (Name.starts_with("llvm-"))
       return true;
   return false;
 }
 
 static bool isMutexLock(StringRef name) {
     return name == "uv_mutex_lock" ||
-           //name == "uv_mutex_trylock" ||
+           name == "uv_mutex_trylock" ||
            name == "pthread_mutex_lock" ||
-           //name == "pthread_mutex_trylock" ||
+           name == "pthread_mutex_trylock" ||
+           name == "__gthread_mutex_lock" ||
+           name == "__gthread_mutex_trylock" ||
+           name == "__gthread_recursive_mutex_lock" ||
+           name == "__gthread_recursive_mutex_trylock" ||
            name == "pthread_spin_lock" ||
-           //name == "pthread_spin_trylock" ||
+           name == "pthread_spin_trylock" ||
            name == "uv_rwlock_rdlock" ||
-           //name == "uv_rwlock_tryrdlock" ||
+           name == "uv_rwlock_tryrdlock" ||
            name == "uv_rwlock_wrlock" ||
-           //name == "uv_rwlock_trywrlock" ||
+           name == "uv_rwlock_trywrlock" ||
            false;
 }
 
 static bool isMutexUnlock(StringRef name) {
     return name == "uv_mutex_unlock" ||
            name == "pthread_mutex_unlock" ||
+           name == "__gthread_mutex_unlock" ||
+           name == "__gthread_recursive_mutex_unlock" ||
            name == "pthread_spin_unlock" ||
            name == "uv_rwlock_rdunlock" ||
            name == "uv_rwlock_wrunlock" ||
            false;
 }
 
-#if LLVM_VERSION_MAJOR >= 13
-#define endswith_lower endswith_insensitive
-#endif
 
 bool GCChecker::isGCTrackedType(QualType QT) {
   return isJuliaType(
              [](StringRef Name) {
-               if (Name.endswith_lower("jl_value_t") ||
-                   Name.endswith_lower("jl_svec_t") ||
-                   Name.endswith_lower("jl_sym_t") ||
-                   Name.endswith_lower("jl_expr_t") ||
-                   Name.endswith_lower("jl_code_info_t") ||
-                   Name.endswith_lower("jl_array_t") ||
-                   Name.endswith_lower("jl_method_t") ||
-                   Name.endswith_lower("jl_method_instance_t") ||
-                   Name.endswith_lower("jl_tupletype_t") ||
-                   Name.endswith_lower("jl_datatype_t") ||
-                   Name.endswith_lower("jl_typemap_entry_t") ||
-                   Name.endswith_lower("jl_typemap_level_t") ||
-                   Name.endswith_lower("jl_typename_t") ||
-                   Name.endswith_lower("jl_module_t") ||
-                   Name.endswith_lower("jl_tupletype_t") ||
-                   Name.endswith_lower("jl_gc_tracked_buffer_t") ||
-                   Name.endswith_lower("jl_binding_t") ||
-                   Name.endswith_lower("jl_ordereddict_t") ||
-                   Name.endswith_lower("jl_tvar_t") ||
-                   Name.endswith_lower("jl_typemap_t") ||
-                   Name.endswith_lower("jl_unionall_t") ||
-                   Name.endswith_lower("jl_methtable_t") ||
-                   Name.endswith_lower("jl_cgval_t") ||
-                   Name.endswith_lower("jl_codectx_t") ||
-                   Name.endswith_lower("jl_ast_context_t") ||
-                   Name.endswith_lower("jl_code_instance_t") ||
-                   Name.endswith_lower("jl_excstack_t") ||
-                   Name.endswith_lower("jl_task_t") ||
-                   Name.endswith_lower("jl_uniontype_t") ||
-                   Name.endswith_lower("jl_method_match_t") ||
-                   Name.endswith_lower("jl_vararg_t") ||
-                   Name.endswith_lower("jl_opaque_closure_t") ||
-                   Name.endswith_lower("jl_globalref_t") ||
-                   // Probably not technically true for these, but let's allow it
-                   Name.endswith_lower("typemap_intersection_env") ||
-                   Name.endswith_lower("interpreter_state") ||
-                   Name.endswith_lower("jl_typeenv_t") ||
-                   Name.endswith_lower("jl_stenv_t") ||
-                   Name.endswith_lower("jl_varbinding_t") ||
-                   Name.endswith_lower("set_world") ||
-                   Name.endswith_lower("jl_codectx_t")) {
+               if (Name.ends_with_insensitive("jl_value_t") ||
+                   Name.ends_with_insensitive("jl_svec_t") ||
+                   Name.ends_with_insensitive("jl_sym_t") ||
+                   Name.ends_with_insensitive("jl_expr_t") ||
+                   Name.ends_with_insensitive("jl_code_info_t") ||
+                   Name.ends_with_insensitive("jl_array_t") ||
+                   Name.ends_with_insensitive("jl_genericmemory_t") ||
+                   //Name.ends_with_insensitive("jl_genericmemoryref_t") ||
+                   Name.ends_with_insensitive("jl_method_t") ||
+                   Name.ends_with_insensitive("jl_method_instance_t") ||
+                   Name.ends_with_insensitive("jl_debuginfo_t") ||
+                   Name.ends_with_insensitive("jl_tupletype_t") ||
+                   Name.ends_with_insensitive("jl_datatype_t") ||
+                   Name.ends_with_insensitive("jl_typemap_entry_t") ||
+                   Name.ends_with_insensitive("jl_typemap_level_t") ||
+                   Name.ends_with_insensitive("jl_typename_t") ||
+                   Name.ends_with_insensitive("jl_module_t") ||
+                   Name.ends_with_insensitive("jl_tupletype_t") ||
+                   Name.ends_with_insensitive("jl_gc_tracked_buffer_t") ||
+                   Name.ends_with_insensitive("jl_binding_t") ||
+                   Name.ends_with_insensitive("jl_binding_partition_t") ||
+                   Name.ends_with_insensitive("jl_ordereddict_t") ||
+                   Name.ends_with_insensitive("jl_tvar_t") ||
+                   Name.ends_with_insensitive("jl_typemap_t") ||
+                   Name.ends_with_insensitive("jl_unionall_t") ||
+                   Name.ends_with_insensitive("jl_methtable_t") ||
+                   Name.ends_with_insensitive("jl_methcache_t") ||
+                   Name.ends_with_insensitive("jl_cgval_t") ||
+                   Name.ends_with_insensitive("jl_codectx_t") ||
+                   Name.ends_with_insensitive("jl_ast_context_t") ||
+                   Name.ends_with_insensitive("jl_code_instance_t") ||
+                   Name.ends_with_insensitive("jl_excstack_t") ||
+                   Name.ends_with_insensitive("jl_task_t") ||
+                   Name.ends_with_insensitive("jl_uniontype_t") ||
+                   Name.ends_with_insensitive("jl_method_match_t") ||
+                   Name.ends_with_insensitive("jl_vararg_t") ||
+                   Name.ends_with_insensitive("jl_opaque_closure_t") ||
+                   Name.ends_with_insensitive("jl_globalref_t") ||
+                   Name.ends_with_insensitive("jl_abi_override_t") ||
+                   // Probably not technically true for these, but let's allow it as a root
+                   Name.ends_with_insensitive("jl_ircode_state") ||
+                   Name.ends_with_insensitive("typemap_intersection_env") ||
+                   Name.ends_with_insensitive("interpreter_state") ||
+                   Name.ends_with_insensitive("jl_typeenv_t") ||
+                   Name.ends_with_insensitive("jl_stenv_t") ||
+                   Name.ends_with_insensitive("jl_varbinding_t") ||
+                   Name.ends_with_insensitive("set_world") ||
+                   Name.ends_with_insensitive("jl_codectx_t")) {
                  return true;
                }
                return false;
@@ -824,7 +880,7 @@ bool GCChecker::isGCTracked(const Expr *E) {
 
 bool GCChecker::isGloballyRootedType(QualType QT) const {
   return isJuliaType(
-      [](StringRef Name) { return Name.endswith("jl_sym_t"); }, QT);
+      [](StringRef Name) { return Name.ends_with("jl_sym_t"); }, QT);
 }
 
 bool GCChecker::isSafepoint(const CallEvent &Call, CheckerContext &C) const {
@@ -853,9 +909,11 @@ bool GCChecker::isSafepoint(const CallEvent &Call, CheckerContext &C) const {
     if (!Decl || !FD) {
       if (Callee == nullptr) {
         isCalleeSafepoint = true;
-      } else if (const TypedefType *TDT = dyn_cast<TypedefType>(Callee->getType())) {
-        isCalleeSafepoint =
-            !declHasAnnotation(TDT->getDecl(), "julia_not_safepoint");
+      } else if (const ElaboratedType *ET = dyn_cast<ElaboratedType>(Callee->getType())){
+        if (const TypedefType *TDT = dyn_cast<TypedefType>(ET->getNamedType())) {
+          isCalleeSafepoint =
+              !declHasAnnotation(TDT->getDecl(), "julia_not_safepoint");
+        }
       } else if (const CXXPseudoDestructorExpr *PDE =
                      dyn_cast<CXXPseudoDestructorExpr>(Callee)) {
         // A pseudo-destructor is an expression that looks like a member
@@ -868,9 +926,9 @@ bool GCChecker::isSafepoint(const CallEvent &Call, CheckerContext &C) const {
       if (FD->getBuiltinID() != 0 || FD->isTrivial())
         isCalleeSafepoint = false;
       else if (FD->getDeclName().isIdentifier() &&
-               (FD->getName().startswith("uv_") ||
-                FD->getName().startswith("unw_") ||
-                FD->getName().startswith("_U")) &&
+               (FD->getName().starts_with("uv_") ||
+                FD->getName().starts_with("unw_") ||
+                FD->getName().starts_with("_U")) &&
                FD->getName() != "uv_run")
         isCalleeSafepoint = false;
       else
@@ -900,7 +958,7 @@ bool GCChecker::processPotentialSafepoint(const CallEvent &Call,
             isGCTrackedType(ParmType->getPointeeType())) {
           // This is probably an out parameter. Find the value it refers to now.
           SVal Loaded =
-              State->getSVal(Call.getArgSVal(i).getAs<Loc>().getValue());
+              State->getSVal(*(Call.getArgSVal(i).getAs<Loc>()));
           SpeciallyRootedSymbol = Loaded.getAsSymbol();
           continue;
         }
@@ -1007,13 +1065,13 @@ bool GCChecker::processAllocationOfResult(const CallEvent &Call,
         // global roots.
         StringRef FDName =
             FD->getDeclName().isIdentifier() ? FD->getName() : "";
-        if (FDName.startswith("jl_box_") || FDName.startswith("ijl_box_")) {
+        if (FDName.starts_with("jl_box_") || FDName.starts_with("ijl_box_")) {
           SVal Arg = Call.getArgSVal(0);
           if (auto CI = Arg.getAs<nonloc::ConcreteInt>()) {
             const llvm::APSInt &Value = CI->getValue();
             bool GloballyRooted = false;
             const int64_t NBOX_C = 1024;
-            if (FDName.startswith("jl_box_u") || FDName.startswith("ijl_box_u")) {
+            if (FDName.starts_with("jl_box_u") || FDName.starts_with("ijl_box_u")) {
               if (Value < NBOX_C) {
                 GloballyRooted = true;
               }
@@ -1123,10 +1181,10 @@ void GCChecker::checkDerivingExpr(const Expr *Result, const Expr *Parent,
     // TODO: We may want to refine this. This is to track pointers through the
     // array list in jl_module_t.
     bool ParentIsModule = isJuliaType(
-        [](StringRef Name) { return Name.endswith("jl_module_t"); },
+        [](StringRef Name) { return Name.ends_with("jl_module_t"); },
         Parent->getType());
     bool ResultIsArrayList = isJuliaType(
-        [](StringRef Name) { return Name.endswith("arraylist_t"); },
+        [](StringRef Name) { return Name.ends_with("arraylist_t"); },
         Result->getType());
     if (!(ParentIsModule && ResultIsArrayList) && isGCTracked(Parent)) {
       ResultTracked = false;
@@ -1302,6 +1360,7 @@ void GCChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const {
               Report->addNote(
                   "Tried to call method defined here",
                   PathDiagnosticLocation::create(FD, C.getSourceManager()));
+            Report->addVisitor(make_unique<SafepointBugVisitor>());
           },
           C, ("Calling potential safepoint as " +
               Call.getKindAsString() + " from function annotated JL_NOTSAFEPOINT").str());
@@ -1398,7 +1457,8 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
   } else if (name == "JL_GC_PUSH1" || name == "JL_GC_PUSH2" ||
              name == "JL_GC_PUSH3" || name == "JL_GC_PUSH4" ||
              name == "JL_GC_PUSH5" || name == "JL_GC_PUSH6" ||
-             name == "JL_GC_PUSH7" || name == "JL_GC_PUSH8") {
+             name == "JL_GC_PUSH7" || name == "JL_GC_PUSH8" ||
+             name == "JL_GC_PUSH9") {
     ProgramStateRef State = C.getState();
     // Transform slots to roots, transform values to rooted
     unsigned NumArgs = CE->getNumArgs();
@@ -1478,7 +1538,7 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
       }
     }
     if (FD) {
-      Loc ItemsLoc = State->getLValue(FD, ArrayList).getAs<Loc>().getValue();
+      Loc ItemsLoc = *(State->getLValue(FD, ArrayList).getAs<Loc>());
       SVal Items = State->getSVal(ItemsLoc);
       if (Items.isUnknown()) {
         Items = C.getSValBuilder().conjureSymbolVal(
@@ -1646,7 +1706,7 @@ void GCChecker::checkLocation(SVal SLoc, bool IsLoad, const Stmt *S,
   // better than this.
   if (IsLoad && (RS = State->get<GCRootMap>(SLoc.getAsRegion()))) {
     SymbolRef LoadedSym =
-        State->getSVal(SLoc.getAs<Loc>().getValue()).getAsSymbol();
+        State->getSVal(*SLoc.getAs<Loc>()).getAsSymbol();
     if (LoadedSym) {
       const ValueState *ValS = State->get<GCValueMap>(LoadedSym);
       if (!ValS || !ValS->isRooted() || ValS->RootDepth > RS->RootedAtDepth) {
diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c
index de5f2a2770c04..dbb9ae2c3ee20 100644
--- a/src/codegen-stubs.c
+++ b/src/codegen-stubs.c
@@ -13,14 +13,15 @@
 JL_DLLEXPORT void jl_dump_native_fallback(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
         ios_t *z, ios_t *s) UNAVAILABLE
-JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
-JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, size_t *num, void **gvs) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvm_gv_inits_fallback(void *native_code, size_t *num, void **inits) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, size_t *num, void **gvs) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvm_cis_fallback(void *native_code, size_t *num, void **CIs) UNAVAILABLE
 
-JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE
 JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world,
         char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
 JL_DLLEXPORT jl_value_t *jl_dump_function_ir_fallback(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo) UNAVAILABLE
-JL_DLLEXPORT void jl_get_llvmf_defn_fallback(jl_llvmf_dump_t *dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvmf_defn_fallback(jl_llvmf_dump_t *dump, jl_method_instance_t *linfo, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
 
 JL_DLLEXPORT void *jl_LLVMCreateDisasm_fallback(const char *TripleName, void *DisInfo, int TagType, void *GetOpInfo, void *SymbolLookUp) UNAVAILABLE
 JL_DLLEXPORT size_t jl_LLVMDisasmInstruction_fallback(void *DC, uint8_t *Bytes, uint64_t BytesSize, uint64_t PC, char *OutString, size_t OutStringSize) UNAVAILABLE
@@ -38,26 +39,44 @@ JL_DLLEXPORT void jl_register_fptrs_fallback(uint64_t image_base, const struct _
     (void)image_base; (void)fptrs; (void)linfos; (void)n;
 }
 
-JL_DLLEXPORT jl_code_instance_t *jl_generate_fptr_fallback(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+JL_DLLEXPORT void jl_generate_fptr_for_unspecialized_fallback(jl_code_instance_t *unspec)
 {
-    return NULL;
+    jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
 }
 
-JL_DLLEXPORT void jl_generate_fptr_for_unspecialized_fallback(jl_code_instance_t *unspec)
+JL_DLLEXPORT int jl_compile_codeinst_fallback(jl_code_instance_t *unspec)
 {
-    jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
+    // Do nothing. The caller will notice that we failed to provide an ->invoke and trigger
+    // appropriate fallbacks.
+    return 0;
 }
 
-JL_DLLEXPORT void jl_generate_fptr_for_oc_wrapper_fallback(jl_code_instance_t *unspec) UNAVAILABLE
+JL_DLLEXPORT void jl_emit_codeinst_to_jit_fallback(jl_code_instance_t *codeinst, jl_code_info_t *src)
+{
+    jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
+    if (jl_is_code_info(inferred))
+        return;
+    if (jl_is_svec(src->edges)) {
+        jl_atomic_store_release(&codeinst->inferred, (jl_value_t*)src->edges);
+        jl_gc_wb(codeinst, src->edges);
+    }
+    jl_atomic_store_release(&codeinst->debuginfo, src->debuginfo);
+    jl_gc_wb(codeinst, src->debuginfo);
+    jl_atomic_store_release(&codeinst->inferred, (jl_value_t*)src);
+    jl_gc_wb(codeinst, src);
+}
 
 JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_fallback(void)
 {
     return 0;
 }
 
-JL_DLLEXPORT int jl_compile_extern_c_fallback(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
+JL_DLLEXPORT int jl_compile_extern_c_fallback(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *name, jl_value_t *declrt, jl_value_t *sigt)
 {
-    return 0;
+    // Assume we were able to register the ccallable with the JIT. The
+    // fact that we didn't is not observable since we cannot compile
+    // anything else.
+    return 1;
 }
 
 JL_DLLEXPORT void jl_teardown_codegen_fallback(void) JL_NOTSAFEPOINT
@@ -69,7 +88,8 @@ JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void)
     return 0;
 }
 
-JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world) UNAVAILABLE
+JL_DLLEXPORT void *jl_create_native_fallback(LLVMOrcThreadSafeModuleRef llvmmod, int _trim, int _external_linkage, size_t _world, jl_array_t *_mod_array, jl_array_t *_worklist, int _all, jl_array_t *_module_init_order) UNAVAILABLE
+JL_DLLEXPORT void *jl_emit_native_fallback(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _external_linkage) UNAVAILABLE
 
 JL_DLLEXPORT void jl_dump_compiles_fallback(void *s)
 {
@@ -107,59 +127,8 @@ JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr)
     return 0;
 }
 
-JL_DLLEXPORT void jl_add_optimization_passes_fallback(void *PM, int opt_level, int lower_intrinsics) UNAVAILABLE
-
-JL_DLLEXPORT void jl_build_newpm_pipeline_fallback(void *MPM, void *PB, int Speedup, int Size,
-    int lower_intrinsics, int dump_native, int external_use, int llvm_only) UNAVAILABLE
-
 JL_DLLEXPORT void jl_register_passbuilder_callbacks_fallback(void *PB) { }
 
-JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraJuliaLICMPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddAllocOptPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass_fallback(void *PM, bool_t imaging_mode) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddRemoveNIPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_fallback(void *PM, bool_t Strong) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_fallback(void *PM) UNAVAILABLE
-
-#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT void LLVMExtraMPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
-#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT void LLVMExtraCGPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
-#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT void LLVMExtraFPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
-#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT void LLVMExtraLPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
-
-#include "llvm-julia-passes.inc"
-
-#undef MODULE_PASS
-#undef CGSCC_PASS
-#undef FUNCTION_PASS
-#undef LOOP_PASS
-
 //LLVM C api to the julia JIT
 JL_DLLEXPORT void* JLJITGetLLVMOrcExecutionSession_fallback(void* JIT) UNAVAILABLE
 
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 04f7564dd3e33..ed427f999e8e3 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -16,13 +16,14 @@
 #include <array>
 #include <vector>
 #include <set>
+#include <unordered_set>
 #include <functional>
 
 // target machine computation
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
 #include <llvm/MC/TargetRegistry.h>
 #include <llvm/Target/TargetOptions.h>
-#include <llvm/Support/Host.h>
+#include <llvm/TargetParser/Host.h>
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Object/SymbolSize.h>
 
@@ -40,10 +41,10 @@
 #include <llvm/IR/Attributes.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/MDBuilder.h>
+#include <llvm/Analysis/InstructionSimplify.h>
 
 // support
 #include <llvm/ADT/SmallBitVector.h>
-#include <llvm/ADT/Optional.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Support/raw_ostream.h>
 #include <llvm/Support/FormattedStream.h>
@@ -75,22 +76,13 @@
 #include "llvm/Support/Path.h" // for llvm::sys::path
 #include <llvm/Bitcode/BitcodeReader.h>
 #include <llvm/Linker/Linker.h>
+#include <llvm/CodeGen/MachineModuleInfo.h>
 
-using namespace llvm;
-
-static bool jl_fpo_disabled(const Triple &TT) {
-#ifdef JL_DISABLE_FPO
-    return true;
-#endif
-#ifdef _COMPILER_MSAN_ENABLED_
-    // MSAN doesn't support FPO
-    return true;
+#ifdef USE_ITTAPI
+#include "ittapi/ittnotify.h"
 #endif
-    if (TT.isOSLinux() || TT.isOSWindows() || TT.isOSFreeBSD()) {
-        return true;
-    }
-    return false;
-}
+
+using namespace llvm;
 
 static bool jl_floattemp_var_needed(const Triple &TT) {
 #ifdef JL_NEED_FLOATTEMP_VAR
@@ -125,6 +117,9 @@ auto getFloatTy(LLVMContext &ctxt) {
 auto getDoubleTy(LLVMContext &ctxt) {
     return Type::getDoubleTy(ctxt);
 }
+auto getBFloatTy(LLVMContext &ctxt) {
+    return Type::getBFloatTy(ctxt);
+}
 auto getFP128Ty(LLVMContext &ctxt) {
     return Type::getFP128Ty(ctxt);
 }
@@ -134,23 +129,8 @@ auto getVoidTy(LLVMContext &ctxt) {
 auto getCharTy(LLVMContext &ctxt) {
     return getInt32Ty(ctxt);
 }
-auto getInt8PtrTy(LLVMContext &ctxt) {
-    return Type::getInt8PtrTy(ctxt);
-}
-auto getInt16PtrTy(LLVMContext &ctxt) {
-    return Type::getInt16PtrTy(ctxt);
-}
-auto getInt32PtrTy(LLVMContext &ctxt) {
-    return Type::getInt32PtrTy(ctxt);
-}
-auto getInt64PtrTy(LLVMContext &ctxt) {
-    return Type::getInt64PtrTy(ctxt);
-}
-auto getFloatPtrTy(LLVMContext &ctxt) {
-    return Type::getFloatPtrTy(ctxt);
-}
-auto getDoublePtrTy(LLVMContext &ctxt) {
-    return Type::getDoublePtrTy(ctxt);
+auto getPointerTy(LLVMContext &ctxt) {
+    return PointerType::get(ctxt, 0);
 }
 
 typedef Instruction TerminatorInst;
@@ -160,7 +140,6 @@ typedef Instruction TerminatorInst;
 #endif
 
 #include "jitlayers.h"
-#include "llvm-codegen-shared.h"
 #include "processor.h"
 #include "julia_assert.h"
 
@@ -171,9 +150,57 @@ void setName(jl_codegen_params_t &params, Value *V, const Twine &Name)
 {
     // we do the constant check again later, duplicating it here just makes sure the assertion
     // fires on debug builds even if debug info is not enabled
+    // note that if this assertion fires then the implication is that the caller of setName
+    // is not checking that setName is only called for non-folded instructions (e.g. folded bitcasts
+    // and 0-byte geps), which can result in information loss on the renamed instruction.
     assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
-    if (params.debug_level && !isa<Constant>(V)) {
+    if (!isa<Constant>(V)) {
+        V->setName(Name);
+    }
+}
+
+void maybeSetName(jl_codegen_params_t &params, Value *V, const Twine &Name)
+{
+    // To be used when we may get an Instruction or something that is not an instruction i.e Constants/Arguments
+    if (isa<Instruction>(V))
         V->setName(Name);
+}
+
+void setName(jl_codegen_params_t &params, Value *V, std::function<std::string()> GetName)
+{
+    assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
+    if (!params.getContext().shouldDiscardValueNames() && !isa<Constant>(V))
+        V->setName(Twine(GetName()));
+}
+
+void setNameWithField(jl_codegen_params_t &params, Value *V, std::function<StringRef()> GetObjName, jl_datatype_t *jt, unsigned idx, const Twine &suffix)
+{
+    assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
+    if (!params.getContext().shouldDiscardValueNames() && !isa<Constant>(V)) {
+        if (jl_is_tuple_type(jt)){
+            V->setName(Twine(GetObjName()) + "[" + Twine(idx + 1) + "]"+ suffix);
+            return;
+        }
+
+        if (jl_is_namedtuple_type(jt)) {
+            auto names = jl_tparam0(jt);
+            assert(jl_is_tuple(names));
+            if (idx < jl_nfields(names)) {
+                auto name = jl_fieldref(names, idx);
+                assert(jl_is_symbol(name));
+                V->setName(Twine(GetObjName()) + "." + Twine(jl_symbol_name((jl_sym_t*)name)) + suffix);
+                return;
+            }
+        } else {
+            auto flds = jl_field_names(jt);
+            if (idx < jl_svec_len(flds)) {
+                auto name = jl_svecref(flds, idx);
+                assert(jl_is_symbol(name));
+                V->setName(Twine(GetObjName()) + "." + Twine(jl_symbol_name((jl_sym_t*)name)) + suffix);
+                return;
+            }
+        }
+        V->setName(Twine(GetObjName()) + "." + Twine("unknown field") + suffix);
     }
 }
 
@@ -191,7 +218,6 @@ STATISTIC(EmittedSpecfunCalls, "Number of specialized calls emitted");
 STATISTIC(EmittedInvokes, "Number of invokes emitted");
 STATISTIC(EmittedCalls, "Number of calls emitted");
 STATISTIC(EmittedUndefVarErrors, "Number of undef var errors emitted");
-STATISTIC(EmittedOpaqueClosureFunctions, "Number of opaque closures emitted");
 STATISTIC(EmittedToJLInvokes, "Number of tojlinvoke calls emitted");
 STATISTIC(EmittedCFuncInvalidates, "Number of C function invalidates emitted");
 STATISTIC(GeneratedCFuncWrappers, "Number of C function wrappers generated");
@@ -213,13 +239,13 @@ extern void __stack_chk_fail();
 
 #ifdef _OS_WINDOWS_
 #if defined(_CPU_X86_64_)
-#if defined(_COMPILER_GCC_)
+#if defined(__MINGW32__)
 extern void ___chkstk_ms(void);
 #else
 extern void __chkstk(void);
 #endif
 #else
-#if defined(_COMPILER_GCC_)
+#if defined(__MINGW32__)
 #undef _alloca
 extern void _alloca(void);
 #else
@@ -239,63 +265,62 @@ extern void _chkstk(void);
 
 // types
 struct jl_typecache_t {
+    PointerType *T_ptr;
     Type *T_size;
     Type *T_jlvalue;
-    Type *T_pjlvalue;
-    Type *T_prjlvalue;
-    Type *T_ppjlvalue;
-    Type *T_pprjlvalue;
+    PointerType *T_pjlvalue;
+    PointerType *T_prjlvalue;
+    PointerType *T_ppjlvalue;
+    PointerType *T_pprjlvalue;
+    StructType *T_jlgenericmemory;
     StructType *T_jlarray;
-    Type *T_pjlarray;
+    PointerType *T_pjlarray;
     FunctionType *T_jlfunc;
     FunctionType *T_jlfuncparams;
 
     IntegerType *T_sigatomic;
 
-    Type *T_ppint8;
     unsigned sizeof_ptr;
     Align alignof_ptr;
 
     bool initialized;
 
     jl_typecache_t() :
-        T_jlvalue(nullptr), T_pjlvalue(nullptr), T_prjlvalue(nullptr),
-        T_ppjlvalue(nullptr), T_pprjlvalue(nullptr), T_jlarray(nullptr),
-        T_pjlarray(nullptr), T_jlfunc(nullptr), T_jlfuncparams(nullptr),
-        T_sigatomic(nullptr), T_ppint8(nullptr), initialized(false) {}
+        T_ptr(nullptr), T_jlvalue(nullptr), T_pjlvalue(nullptr), T_prjlvalue(nullptr),
+        T_ppjlvalue(nullptr), T_pprjlvalue(nullptr),
+        T_jlgenericmemory(nullptr), T_jlarray(nullptr), T_pjlarray(nullptr),
+        T_jlfunc(nullptr), T_jlfuncparams(nullptr), T_sigatomic(nullptr),
+        initialized(false) {}
 
     void initialize(LLVMContext &context, const DataLayout &DL) {
         if (initialized) {
             return;
         }
         initialized = true;
-        T_ppint8 = PointerType::get(getInt8PtrTy(context), 0);
+        T_ptr = getPointerTy(context);
         T_sigatomic = Type::getIntNTy(context, sizeof(sig_atomic_t) * 8);
         T_size = DL.getIntPtrType(context);
         sizeof_ptr = DL.getPointerSize();
         // use pointer abi alignment for intptr_t
         alignof_ptr = DL.getPointerABIAlignment(0);
         T_jlvalue = JuliaType::get_jlvalue_ty(context);
-        T_pjlvalue = PointerType::get(T_jlvalue, 0);
-        T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        T_ppjlvalue = PointerType::get(T_pjlvalue, 0);
-        T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
+        T_pjlvalue = getPointerTy(context);
+        T_prjlvalue = PointerType::get(context, AddressSpace::Tracked);
+        T_ppjlvalue = getPointerTy(context);
+        T_pprjlvalue = getPointerTy(context);
 
         T_jlfunc = JuliaType::get_jlfunc_ty(context);
         assert(T_jlfunc != NULL);
         T_jlfuncparams = JuliaType::get_jlfuncparams_ty(context);
         assert(T_jlfuncparams != NULL);
 
-        Type *vaelts[] = {PointerType::get(getInt8Ty(context), AddressSpace::Loaded)
-                        , T_size
-                        , getInt16Ty(context)
-                        , getInt16Ty(context)
-                        , getInt32Ty(context)
+        T_jlgenericmemory = StructType::get(context, { T_size, T_pprjlvalue /* [, real-owner] */ });
+        Type *vaelts[] = { PointerType::get(context, AddressSpace::Loaded),
+                           PointerType::get(context, AddressSpace::Tracked),
+                           // dimsize[ndims]
         };
-        static_assert(sizeof(jl_array_flags_t) == sizeof(int16_t),
-                    "Size of jl_array_flags_t is not the same as int16_t");
-        T_jlarray = StructType::get(context, makeArrayRef(vaelts));
-        T_pjlarray = PointerType::get(T_jlarray, 0);
+        T_jlarray = StructType::get(context, ArrayRef<Type*>(vaelts));
+        T_pjlarray = getPointerTy(context);
     }
 };
 
@@ -309,19 +334,19 @@ struct jl_tbaacache_t {
     MDNode *tbaa_unionselbyte;   // a selector byte in isbits Union struct fields
     MDNode *tbaa_data;       // Any user data that `pointerset/ref` are allowed to alias
     MDNode *tbaa_binding;        // jl_binding_t::value
-    MDNode *tbaa_value;          // jl_value_t, that is not jl_array_t
+    MDNode *tbaa_value;          // jl_value_t, that is not jl_array_t or jl_genericmemory_t
     MDNode *tbaa_mutab;              // mutable type
     MDNode *tbaa_datatype;               // datatype
     MDNode *tbaa_immut;              // immutable type
     MDNode *tbaa_ptrarraybuf;    // Data in an array of boxed values
     MDNode *tbaa_arraybuf;       // Data in an array of POD
-    MDNode *tbaa_array;      // jl_array_t
-    MDNode *tbaa_arrayptr;       // The pointer inside a jl_array_t
+    MDNode *tbaa_array;      // jl_array_t or jl_genericmemory_t
+    MDNode *tbaa_arrayptr;       // The pointer inside a jl_array_t (to a memoryref)
     MDNode *tbaa_arraysize;      // A size in a jl_array_t
-    MDNode *tbaa_arraylen;       // The len in a jl_array_t
-    MDNode *tbaa_arrayflags;     // The flags in a jl_array_t
-    MDNode *tbaa_arrayoffset;     // The offset in a jl_array_t
-    MDNode *tbaa_arrayselbyte;   // a selector byte in a isbits Union jl_array_t
+    MDNode *tbaa_arrayselbyte;   // a selector byte in a isbits Union jl_genericmemory_t
+    MDNode *tbaa_memoryptr;      // The pointer inside a jl_genericmemory_t
+    MDNode *tbaa_memorylen;      // The length in a jl_genericmemory_t
+    MDNode *tbaa_memoryown;      // The owner in a foreign jl_genericmemory_t
     MDNode *tbaa_const;      // Memory that is immutable by the time LLVM can see it
     bool initialized;
 
@@ -330,8 +355,8 @@ struct jl_tbaacache_t {
                     tbaa_value(nullptr), tbaa_mutab(nullptr), tbaa_datatype(nullptr),
                     tbaa_immut(nullptr), tbaa_ptrarraybuf(nullptr), tbaa_arraybuf(nullptr),
                     tbaa_array(nullptr), tbaa_arrayptr(nullptr), tbaa_arraysize(nullptr),
-                    tbaa_arraylen(nullptr), tbaa_arrayflags(nullptr), tbaa_arrayoffset(nullptr),
-                    tbaa_arrayselbyte(nullptr), tbaa_const(nullptr), initialized(false) {}
+                    tbaa_arrayselbyte(nullptr), tbaa_memoryptr(nullptr), tbaa_memorylen(nullptr), tbaa_memoryown(nullptr),
+                    tbaa_const(nullptr), initialized(false) {}
 
     auto tbaa_make_child(MDBuilder &mbuilder, const char *name, MDNode *parent = nullptr, bool isConstant = false) {
         MDNode *scalar = mbuilder.createTBAAScalarTypeNode(name, parent ? parent : tbaa_root);
@@ -369,11 +394,11 @@ struct jl_tbaacache_t {
         std::tie(tbaa_array, tbaa_array_scalar) = tbaa_make_child(mbuilder, "jtbaa_array");
         tbaa_arrayptr = tbaa_make_child(mbuilder, "jtbaa_arrayptr", tbaa_array_scalar).first;
         tbaa_arraysize = tbaa_make_child(mbuilder, "jtbaa_arraysize", tbaa_array_scalar).first;
-        tbaa_arraylen = tbaa_make_child(mbuilder, "jtbaa_arraylen", tbaa_array_scalar).first;
-        tbaa_arrayflags = tbaa_make_child(mbuilder, "jtbaa_arrayflags", tbaa_array_scalar).first;
-        tbaa_arrayoffset = tbaa_make_child(mbuilder, "jtbaa_arrayoffset", tbaa_array_scalar).first;
-        tbaa_const = tbaa_make_child(mbuilder, "jtbaa_const", nullptr, true).first;
         tbaa_arrayselbyte = tbaa_make_child(mbuilder, "jtbaa_arrayselbyte", tbaa_array_scalar).first;
+        tbaa_memoryptr = tbaa_make_child(mbuilder, "jtbaa_memoryptr", tbaa_array_scalar).first;
+        tbaa_memorylen = tbaa_make_child(mbuilder, "jtbaa_memorylen", tbaa_array_scalar).first;
+        tbaa_memoryown = tbaa_make_child(mbuilder, "jtbaa_memoryown", tbaa_array_scalar).first;
+        tbaa_const = tbaa_make_child(mbuilder, "jtbaa_const", nullptr, true).first;
     }
 };
 
@@ -386,7 +411,7 @@ struct jl_noaliascache_t {
         MDNode *gcframe;        // GC frame
         MDNode *stack;          // Stack slot
         MDNode *data;           // Any user data that `pointerset/ref` are allowed to alias
-        MDNode *type_metadata;  // Non-user-accessible type metadata incl. size, union selectors, etc.
+        MDNode *type_metadata;  // Non-user-accessible type metadata incl. union selectors, etc.
         MDNode *constant;       // Memory that is immutable by the time LLVM can see it
 
         jl_regions_t(): gcframe(nullptr), stack(nullptr), data(nullptr), type_metadata(nullptr), constant(nullptr) {}
@@ -493,9 +518,12 @@ struct JuliaVariable {
         if (GlobalValue *V = m->getNamedValue(name))
             return cast<GlobalVariable>(V);
         auto T_size = m->getDataLayout().getIntPtrType(m->getContext());
-        return new GlobalVariable(*m, _type(T_size),
+        auto var = new GlobalVariable(*m, _type(T_size),
                 isconst, GlobalVariable::ExternalLinkage,
                 NULL, name);
+        if (Triple(m->getTargetTriple()).isOSWindows())
+            var->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLImportStorageClass); // Cross-library imports must be explicit for COFF (Windows)
+        return var;
     }
     GlobalVariable *realize(jl_codectx_t &ctx);
 };
@@ -527,7 +555,14 @@ FunctionType *invoke_type(TypeFnContextAndTriple f, Module &M)
 template<typename TypeFn_t = TypeFnContextOnly>
 struct JuliaFunction {
 public:
-    llvm::StringLiteral name;
+    template <size_t N>
+    constexpr JuliaFunction(const char (&cname)[N], TypeFn_t _type, llvm::AttributeList (*_attrs)(llvm::LLVMContext &C))
+        : name(StringRef(cname, N-1)), _type(_type), _attrs(_attrs) {}
+    JuliaFunction(StringRef cname, TypeFn_t _type, llvm::AttributeList (*_attrs)(llvm::LLVMContext &C))
+        : name(cname), _type(_type), _attrs(_attrs) {}
+    JuliaFunction(char *cname, TypeFn_t _type, llvm::AttributeList (*_attrs)(llvm::LLVMContext &C)) = delete;
+
+    llvm::StringRef name;
     TypeFn_t _type;
     llvm::AttributeList (*_attrs)(llvm::LLVMContext &C);
 
@@ -560,22 +595,21 @@ static inline void add_named_global(StringRef name, T *addr)
     add_named_global(name, (void*)(uintptr_t)addr);
 }
 
-AttributeSet Attributes(LLVMContext &C, std::initializer_list<Attribute::AttrKind> attrkinds)
+AttributeSet Attributes(LLVMContext &C, std::initializer_list<Attribute::AttrKind> attrkinds, std::initializer_list<Attribute> extra={})
 {
-    SmallVector<Attribute, 8> attrs(attrkinds.size());
+    SmallVector<Attribute, 8> attrs(attrkinds.size() + extra.size());
     for (size_t i = 0; i < attrkinds.size(); i++)
         attrs[i] = Attribute::get(C, attrkinds.begin()[i]);
-    return AttributeSet::get(C, makeArrayRef(attrs));
+    for (size_t i = 0; i < extra.size(); i++)
+        attrs[attrkinds.size() + i] = extra.begin()[i];
+    return AttributeSet::get(C, ArrayRef<Attribute>(attrs));
 }
 
 static Type *get_pjlvalue(LLVMContext &C) { return JuliaType::get_pjlvalue_ty(C); }
 
 static FunctionType *get_func_sig(LLVMContext &C) { return JuliaType::get_jlfunc_ty(C); }
 static FunctionType *get_func2_sig(LLVMContext &C) { return JuliaType::get_jlfunc2_ty(C); }
-
-static FunctionType *get_donotdelete_sig(LLVMContext &C) {
-    return FunctionType::get(getVoidTy(C), true);
-}
+static FunctionType *get_func3_sig(LLVMContext &C) { return JuliaType::get_jlfunc3_ty(C); }
 
 static AttributeList get_func_attrs(LLVMContext &C)
 {
@@ -586,15 +620,6 @@ static AttributeList get_func_attrs(LLVMContext &C)
              Attributes(C, {Attribute::NoAlias, Attribute::ReadOnly, Attribute::NoCapture, Attribute::NoUndef})});
 }
 
-static AttributeList get_donotdelete_func_attrs(LLVMContext &C)
-{
-    AttributeSet FnAttrs = Attributes(C, {Attribute::InaccessibleMemOnly, Attribute::WillReturn, Attribute::NoUnwind});
-    return AttributeList::get(C,
-            FnAttrs,
-            Attributes(C, {}),
-            None);
-}
-
 static AttributeList get_attrs_noreturn(LLVMContext &C)
 {
     return AttributeList::get(C,
@@ -611,20 +636,53 @@ static AttributeList get_attrs_basic(LLVMContext &C)
                 None);
 }
 
-static AttributeList get_attrs_sext(LLVMContext &C)
+static AttributeList get_attrs_box_float(LLVMContext &C, unsigned nbytes)
 {
+    auto FnAttrs = AttrBuilder(C);
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly() | MemoryEffects::readOnly());
+    auto RetAttrs = AttrBuilder(C);
+    RetAttrs.addAttribute(Attribute::NonNull);
+    RetAttrs.addDereferenceableAttr(nbytes);
+    RetAttrs.addAlignmentAttr(Align(alignof(void*)));
     return AttributeList::get(C,
-                AttributeSet(),
-                Attributes(C, {Attribute::NonNull}),
-                {Attributes(C, {Attribute::SExt})});
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                None);
 }
 
-static AttributeList get_attrs_zext(LLVMContext &C)
+static AttributeList get_attrs_box_sext(LLVMContext &C, unsigned nbytes)
 {
+    auto FnAttrs = AttrBuilder(C);
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly() | MemoryEffects::readOnly());
+    auto RetAttrs = AttrBuilder(C);
+    RetAttrs.addAttribute(Attribute::NonNull);
+    RetAttrs.addAttribute(Attribute::getWithDereferenceableBytes(C, nbytes));
+    RetAttrs.addDereferenceableAttr(nbytes);
+    RetAttrs.addAlignmentAttr(Align(alignof(void*)));
     return AttributeList::get(C,
-                AttributeSet(),
-                Attributes(C, {Attribute::NonNull}),
-                {Attributes(C, {Attribute::ZExt})});
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                AttributeSet::get(C, {Attribute::get(C, Attribute::SExt)}));
+}
+
+static AttributeList get_attrs_box_zext(LLVMContext &C, unsigned nbytes)
+{
+    auto FnAttrs = AttrBuilder(C);
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly() | MemoryEffects::readOnly());
+    auto RetAttrs = AttrBuilder(C);
+    RetAttrs.addAttribute(Attribute::NonNull);
+    RetAttrs.addDereferenceableAttr(nbytes);
+    RetAttrs.addAlignmentAttr(Align(alignof(void*)));
+    return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                AttributeSet::get(C, {Attribute::get(C, Attribute::ZExt)}));
 }
 
 
@@ -632,25 +690,25 @@ static AttributeList get_attrs_zext(LLVMContext &C)
 static const auto jlRTLD_DEFAULT_var = new JuliaVariable{
     XSTR(jl_RTLD_DEFAULT_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jlexe_var = new JuliaVariable{
     XSTR(jl_exe_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jldll_var = new JuliaVariable{
     XSTR(jl_libjulia_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jldlli_var = new JuliaVariable{
     XSTR(jl_libjulia_internal_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
-static const auto jlsmall_typeof_var = new JuliaVariable{
-    XSTR(small_typeof),
+static const auto jl_small_typeof_var = new JuliaVariable{
+    XSTR(jl_small_typeof),
     true,
     [](Type *T_size) -> Type * { return getInt8Ty(T_size->getContext()); },
 };
@@ -681,7 +739,7 @@ static const auto jlboxed_uint8_cache = new JuliaVariable{
 
 static const auto jlpgcstack_func = new JuliaFunction<>{
     "julia.get_pgcstack",
-    [](LLVMContext &C) { return FunctionType::get(PointerType::get(JuliaType::get_ppjlvalue_ty(C), 0), false); },
+    [](LLVMContext &C) { return FunctionType::get(getPointerTy(C), false); },
     nullptr,
 };
 
@@ -712,50 +770,58 @@ static const auto jlsplatnew_func = new JuliaFunction<>{
 static const auto jlthrow_func = new JuliaFunction<>{
     XSTR(jl_throw),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {PointerType::get(C, AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlerror_func = new JuliaFunction<>{
     XSTR(jl_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C)}, false); },
+            {getPointerTy(C)}, false); },
+    get_attrs_noreturn,
+};
+static const auto jlargumenterror_func = new JuliaFunction<>{
+    XSTR(jl_argument_error),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {getPointerTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlatomicerror_func = new JuliaFunction<>{
     XSTR(jl_atomic_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C)}, false); },
+            {getPointerTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jltypeerror_func = new JuliaFunction<>{
     XSTR(jl_type_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C), JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {getPointerTy(C), JuliaType::get_prjlvalue_ty(C), PointerType::get(C, AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlundefvarerror_func = new JuliaFunction<>{
     XSTR(jl_undefined_var_error),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+    [](LLVMContext &C) {
+        Type *T = PointerType::get(C, AddressSpace::CalleeRooted);
+        return FunctionType::get(getVoidTy(C), {T, T}, false);
+    },
     get_attrs_noreturn,
 };
 static const auto jlhasnofield_func = new JuliaFunction<>{
     XSTR(jl_has_no_field_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted),
-             PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {PointerType::get(C, AddressSpace::CalleeRooted),
+             PointerType::get(C, AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlboundserrorv_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_ints),
     [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), T_size->getPointerTo(), T_size}, false); },
+            {PointerType::get(C, AddressSpace::CalleeRooted), getPointerTy(C), T_size}, false); },
     get_attrs_noreturn,
 };
 static const auto jlboundserror_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_int),
     [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), T_size}, false); },
+            {PointerType::get(C, AddressSpace::CalleeRooted), T_size}, false); },
     get_attrs_noreturn,
 };
 static const auto jlvboundserror_func = new JuliaFunction<TypeFnContextAndSizeT>{
@@ -768,7 +834,7 @@ static const auto jluboundserror_func = new JuliaFunction<TypeFnContextAndSizeT>
     XSTR(jl_bounds_error_unboxed_int),
     [](LLVMContext &C, Type *T_size) {
         return FunctionType::get(getVoidTy(C),
-            {PointerType::get(getInt8Ty(C), AddressSpace::Derived), JuliaType::get_pjlvalue_ty(C), T_size}, false); },
+            {PointerType::get(C, AddressSpace::Derived), JuliaType::get_pjlvalue_ty(C), T_size}, false); },
     get_attrs_noreturn,
 };
 static const auto jlcheckassign_func = new JuliaFunction<>{
@@ -776,32 +842,78 @@ static const auto jlcheckassign_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
         return FunctionType::get(getVoidTy(C),
-            {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(C, AddressSpace::CalleeRooted)}, false); },
     nullptr,
 };
-static const auto jldeclareconst_func = new JuliaFunction<>{
-    XSTR(jl_declare_constant),
+static const auto jlcheckreplace_func = new JuliaFunction<>{
+    XSTR(jl_checked_replace),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, T_prjlvalue, T_prjlvalue}, false); },
+    nullptr,
+};
+static const auto jlcheckmodify_func = new JuliaFunction<>{
+    XSTR(jl_checked_modify),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, T_prjlvalue, T_prjlvalue}, false); },
+    nullptr,
+};
+static const auto jlcheckswap_func = new JuliaFunction<>{
+    XSTR(jl_checked_swap),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(C, AddressSpace::CalleeRooted)}, false); },
+    nullptr,
+};
+static const auto jlcheckassignonce_func = new JuliaFunction<>{
+    XSTR(jl_checked_assignonce),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(C, AddressSpace::CalleeRooted)}, false); },
+    nullptr,
+};
+static const auto jldeclareglobal_func = new JuliaFunction<>{
+    XSTR(jl_declare_global),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(getVoidTy(C),
-            {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false); },
+            {T_pjlvalue, T_pjlvalue, T_prjlvalue, getInt32Ty(C)}, false); },
     nullptr,
 };
-static const auto jlgetbindingorerror_func = new JuliaFunction<>{
-    XSTR(jl_get_binding_or_error),
+static const auto jldepcheck_func = new JuliaFunction<>{
+    XSTR(jl_binding_deprecation_check),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
-        return FunctionType::get(T_pjlvalue,
-                {T_pjlvalue, T_pjlvalue}, false);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue}, false); },
+    nullptr,
+};
+static const auto jlcheckbpwritable_func = new JuliaFunction<>{
+    XSTR(jl_check_binding_currently_writable),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+                {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false);
     },
     nullptr,
 };
-static const auto jlgetbindingwrorerror_func = new JuliaFunction<>{
-    XSTR(jl_get_binding_wr),
+static const auto jlgetbindingvalue_func = new JuliaFunction<>{
+    XSTR(jl_get_binding_value_seqcst),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
-        return FunctionType::get(T_pjlvalue,
-                {T_pjlvalue, T_pjlvalue}, false);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+                {T_pjlvalue}, false);
     },
     nullptr,
 };
@@ -810,7 +922,7 @@ static const auto jlboundp_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
         return FunctionType::get(getInt32Ty(C),
-                {T_pjlvalue, T_pjlvalue}, false);
+                {T_pjlvalue, T_pjlvalue, getInt32Ty(C)}, false);
     },
     nullptr,
 };
@@ -846,38 +958,41 @@ static const auto jlapplygeneric_func = new JuliaFunction<>{
 static const auto jlinvoke_func = new JuliaFunction<>{
     XSTR(jl_invoke),
     get_func2_sig,
-    [](LLVMContext &C) { return AttributeList::get(C,
-            AttributeSet(),
-            Attributes(C, {Attribute::NonNull}),
-            {AttributeSet(),
-             Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); },
+    get_func_attrs,
+};
+static const auto jlinvokeoc_func = new JuliaFunction<>{
+    XSTR(jl_invoke_oc),
+    get_func2_sig,
+    get_func_attrs,
+};
+static const auto jlopaque_closure_call_func = new JuliaFunction<>{
+    XSTR(jl_f_opaque_closure_call),
+    get_func_sig,
+    get_func_attrs,
 };
 static const auto jlmethod_func = new JuliaFunction<>{
     XSTR(jl_method_def),
     [](LLVMContext &C) {
-        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
-        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
-        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
+        auto T_pjlvalue = getPointerTy(C);
+        auto T_prjlvalue = PointerType::get(C, AddressSpace::Tracked);
         return FunctionType::get(T_prjlvalue,
                 {T_prjlvalue, T_prjlvalue, T_prjlvalue, T_pjlvalue}, false);
     },
     nullptr,
 };
 static const auto jlgenericfunction_func = new JuliaFunction<>{
-    XSTR(jl_generic_function_def),
+    XSTR(jl_declare_const_gf),
     [](LLVMContext &C) {
-        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
-        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
-        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
-        return FunctionType::get(T_prjlvalue, {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue}, false);
+        auto T_pjlvalue = getPointerTy(C);
+        auto T_prjlvalue = PointerType::get(C, AddressSpace::Tracked);
+        return FunctionType::get(T_prjlvalue, {T_pjlvalue, T_pjlvalue}, false);
     },
     nullptr,
 };
 static const auto jllockvalue_func = new JuliaFunction<>{
     XSTR(jl_lock_value),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {PointerType::get(C, AddressSpace::CalleeRooted)}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             AttributeSet(),
@@ -886,7 +1001,25 @@ static const auto jllockvalue_func = new JuliaFunction<>{
 static const auto jlunlockvalue_func = new JuliaFunction<>{
     XSTR(jl_unlock_value),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {PointerType::get(C, AddressSpace::CalleeRooted)}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            AttributeSet(),
+            {Attributes(C, {Attribute::NoCapture})}); },
+};
+static const auto jllockfield_func = new JuliaFunction<>{
+    XSTR(jl_lock_field),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(C, AddressSpace::Loaded)}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            AttributeSet(),
+            {Attributes(C, {Attribute::NoCapture})}); },
+};
+static const auto jlunlockfield_func = new JuliaFunction<>{
+    XSTR(jl_unlock_field),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(C, AddressSpace::Loaded)}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             AttributeSet(),
@@ -894,57 +1027,118 @@ static const auto jlunlockvalue_func = new JuliaFunction<>{
 };
 static const auto jlenter_func = new JuliaFunction<>{
     XSTR(jl_enter_handler),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C)}, false); },
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, getPointerTy(C)}, false); },
     nullptr,
 };
 static const auto jl_current_exception_func = new JuliaFunction<>{
     XSTR(jl_current_exception),
-    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), false); },
+    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), {JuliaType::get_pjlvalue_ty(C)}, false); },
     nullptr,
 };
 static const auto jlleave_func = new JuliaFunction<>{
     XSTR(jl_pop_handler),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt32Ty(C)}, false); },
-    nullptr,
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, getInt32Ty(C)}, false); },
+    [](LLVMContext &C) {
+            auto FnAttrs = AttrBuilder(C);
+            FnAttrs.addAttribute(Attribute::WillReturn);
+            FnAttrs.addAttribute(Attribute::NoUnwind);
+            auto RetAttrs = AttrBuilder(C);
+            return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet(),
+                None);
+        },
+};
+static const auto jlleave_noexcept_func = new JuliaFunction<>{
+    XSTR(jl_pop_handler_noexcept),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, getInt32Ty(C)}, false); },
+    [](LLVMContext &C) {
+            auto FnAttrs = AttrBuilder(C);
+            FnAttrs.addAttribute(Attribute::WillReturn);
+            FnAttrs.addAttribute(Attribute::NoUnwind);
+            auto RetAttrs = AttrBuilder(C);
+            return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet(),
+                None);
+        },
 };
 static const auto jl_restore_excstack_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_restore_excstack),
-    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
-            {T_size}, false); },
+    [](LLVMContext &C, Type *T_size) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, T_size}, false); },
     nullptr,
 };
 static const auto jl_excstack_state_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_excstack_state),
-    [](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size, false); },
+    [](LLVMContext &C, Type *T_size) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(T_size, {T_pjlvalue}, false); },
     nullptr,
 };
 static const auto jlegalx_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_egal__unboxed),
     [](LLVMContext &C, Type *T_size) {
-        Type *T = PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived);
+        Type *T = PointerType::get(C, AddressSpace::Derived);
         return FunctionType::get(getInt32Ty(C), {T, T, T_size}, false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
-            AttributeSet(),
-            None); },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet(),
+                None); },
 };
 static const auto jl_alloc_obj_func = new JuliaFunction<TypeFnContextAndSizeT>{
     "julia.gc_alloc_obj",
     [](LLVMContext &C, Type *T_size) {
-        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
-        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        auto T_ppjlvalue = PointerType::get(PointerType::get(T_jlvalue, 0), 0);
+        auto T_pjlvalue = getPointerTy(C);
+        auto T_prjlvalue = PointerType::get(C, AddressSpace::Tracked);
         return FunctionType::get(T_prjlvalue,
-                {T_ppjlvalue, T_size, T_prjlvalue}, false);
+                {T_pjlvalue, T_size, T_prjlvalue}, false);
     },
     [](LLVMContext &C) {
         auto FnAttrs = AttrBuilder(C);
         FnAttrs.addAllocSizeAttr(1, None); // returns %1 bytes
-#if JL_LLVM_VERSION >= 150000
-        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned);
-#endif
+        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc);
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly());
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        auto RetAttrs = AttrBuilder(C);
+        RetAttrs.addAttribute(Attribute::NoAlias);
+        RetAttrs.addAttribute(Attribute::NonNull);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
+            AttributeSet::get(C, RetAttrs),
+            None);
+    },
+};
+static const auto jl_alloc_genericmemory_unchecked_func = new JuliaFunction<TypeFnContextAndSizeT>{
+    XSTR(jl_alloc_genericmemory_unchecked),
+    [](LLVMContext &C, Type *T_size) {
+        auto T_pjlvalue = getPointerTy(C);
+        auto T_prjlvalue = PointerType::get(C, AddressSpace::Tracked);
+        return FunctionType::get(T_prjlvalue,
+                {T_pjlvalue, T_size, T_pjlvalue}, false);
+    },
+    [](LLVMContext &C) {
+        auto FnAttrs = AttrBuilder(C);
+        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc);
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly());
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
         auto RetAttrs = AttrBuilder(C);
         RetAttrs.addAttribute(Attribute::NoAlias);
         RetAttrs.addAttribute(Attribute::NonNull);
@@ -959,7 +1153,7 @@ static const auto jl_newbits_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-                {T_prjlvalue, getInt8PtrTy(C)}, false);
+                {T_prjlvalue, getPointerTy(C)}, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
@@ -976,27 +1170,31 @@ static const auto jl_typeof_func = new JuliaFunction<>{
         return FunctionType::get(T_prjlvalue,
                 {T_prjlvalue}, false);
     },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadNone, Attribute::NoUnwind, Attribute::NoRecurse}),
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::none());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto jl_loopinfo_marker_func = new JuliaFunction<>{
-    "julia.loopinfo_marker",
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
-            AttributeSet(),
-            None); },
-};
+
 static const auto jl_write_barrier_func = new JuliaFunction<>{
     "julia.write_barrier",
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {JuliaType::get_prjlvalue_ty(C)}, true); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::NoUnwind, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             AttributeSet(),
-            {Attributes(C, {Attribute::ReadOnly})}); },
+            {Attributes(C, {Attribute::ReadOnly})});
+    },
 };
 
 static const auto jlisa_func = new JuliaFunction<>{
@@ -1021,17 +1219,16 @@ static const auto jlsubtype_func = new JuliaFunction<>{
 static const auto jlapplytype_func = new JuliaFunction<>{
     XSTR(jl_instantiate_type_in_env),
     [](LLVMContext &C) {
-        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
-        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
-        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
-        return FunctionType::get(T_prjlvalue,
-            {T_pjlvalue, T_pjlvalue, T_pprjlvalue}, false);
+        auto T_ptr = PointerType::get(C, 0);
+        auto T_tracked = PointerType::get(C, AddressSpace::Tracked);
+        auto T_derived = PointerType::get(C, AddressSpace::Derived);
+        return FunctionType::get(T_tracked,
+            {T_ptr, T_ptr, T_derived}, false);
     },
     [](LLVMContext &C) {
         return AttributeList::get(C,
             AttributeSet(),
-            AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::NonNull),
+            AttributeSet::get(C, ArrayRef<Attribute>({Attribute::get(C, Attribute::NonNull),
                                                Attribute::getWithAlignment(C, Align(16))})),
             None);
     },
@@ -1039,7 +1236,7 @@ static const auto jlapplytype_func = new JuliaFunction<>{
 static const auto jl_object_id__func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_object_id_),
     [](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size,
-            {JuliaType::get_prjlvalue_ty(C), PointerType::get(getInt8Ty(C), AddressSpace::Derived)}, false); },
+            {T_size, PointerType::get(C, AddressSpace::Derived)}, false); },
     nullptr,
 };
 static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
@@ -1047,9 +1244,9 @@ static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
     [](LLVMContext &C, const Triple &T) {
         if (T.isOSWindows())
             return FunctionType::get(getInt32Ty(C),
-                {getInt8PtrTy(C)}, false);
+                {getPointerTy(C)}, false);
         return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C), getInt32Ty(C)}, false);
+            {getPointerTy(C), getInt32Ty(C)}, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReturnsTwice}),
@@ -1059,23 +1256,27 @@ static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
 static const auto memcmp_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(memcmp),
     [](LLVMContext &C, Type *T_size) { return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C), getInt8PtrTy(C), T_size}, false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
+            {getPointerTy(C), getPointerTy(C), T_size}, false); },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref));
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             AttributeSet(),
             None); },
     // TODO: inferLibFuncAttributes(*memcmp_func, TLI);
 };
 static const auto jldlsym_func = new JuliaFunction<>{
     XSTR(jl_load_and_lookup),
-    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
-            {getInt8PtrTy(C), getInt8PtrTy(C), PointerType::get(getInt8PtrTy(C), 0)}, false); },
+    [](LLVMContext &C) { return FunctionType::get(getPointerTy(C),
+            {getPointerTy(C), getPointerTy(C), getPointerTy(C)}, false); },
     nullptr,
 };
 static const auto jllazydlsym_func = new JuliaFunction<>{
     XSTR(jl_lazy_load_and_lookup),
-    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
-            {JuliaType::get_prjlvalue_ty(C), getInt8PtrTy(C)}, false); },
+    [](LLVMContext &C) { return FunctionType::get(getPointerTy(C),
+            {JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C)}, false); },
     nullptr,
 };
 static const auto jltypeassert_func = new JuliaFunction<>{
@@ -1099,6 +1300,23 @@ static const auto jlgetnthfieldchecked_func = new JuliaFunction<TypeFnContextAnd
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
+static const auto jlfieldindex_func = new JuliaFunction<>{
+    XSTR(jl_field_index),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(getInt32Ty(C),
+            {T_prjlvalue, T_prjlvalue, getInt32Ty(C)}, false);
+    },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::readOnly());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
+            AttributeSet(),
+            None); }, // This function can error if the third argument is 1 so don't do that.
+};
 static const auto jlfieldisdefinedchecked_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_field_isdefined_checked),
     [](LLVMContext &C, Type *T_size) {
@@ -1114,20 +1332,18 @@ static const auto jlfieldisdefinedchecked_func = new JuliaFunction<TypeFnContext
 static const auto jlgetcfunctiontrampoline_func = new JuliaFunction<>{
     XSTR(jl_get_cfunction_trampoline),
     [](LLVMContext &C) {
-        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
-        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
-        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        auto T_ppjlvalue = PointerType::get(T_pjlvalue, 0);
-        auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
+        auto T_pjlvalue = getPointerTy(C);
+        auto T_prjlvalue = PointerType::get(C, AddressSpace::Tracked);
+        auto T_derived = PointerType::get(C, AddressSpace::Derived);
         return FunctionType::get(T_prjlvalue,
             {
                 T_prjlvalue, // f (object)
                 T_pjlvalue, // result
-                getInt8PtrTy(C), // cache
+                getPointerTy(C), // cache
                 T_pjlvalue, // fill
-                FunctionType::get(getInt8PtrTy(C), { getInt8PtrTy(C), T_ppjlvalue }, false)->getPointerTo(), // trampoline
+                getPointerTy(C), // trampoline
                 T_pjlvalue, // env
-                T_pprjlvalue, // vals
+                T_derived, // vals
             }, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -1135,6 +1351,14 @@ static const auto jlgetcfunctiontrampoline_func = new JuliaFunction<>{
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
+static const auto jlgetabiconverter_func = new JuliaFunction<TypeFnContextAndSizeT>{
+    XSTR(jl_get_abi_converter),
+    [](LLVMContext &C, Type *T_size) {
+        Type *T_ptr = getPointerTy(C);
+        return FunctionType::get(T_ptr, {T_ptr, T_ptr}, false);
+    },
+    nullptr,
+};
 static const auto diff_gc_total_bytes_func = new JuliaFunction<>{
     XSTR(jl_gc_diff_total_bytes),
     [](LLVMContext &C) { return FunctionType::get(getInt64Ty(C), false); },
@@ -1146,34 +1370,43 @@ static const auto sync_gc_total_bytes_func = new JuliaFunction<>{
             {getInt64Ty(C)}, false); },
     nullptr,
 };
-static const auto jlarray_data_owner_func = new JuliaFunction<>{
-    XSTR(jl_array_data_owner),
-    [](LLVMContext &C) {
+static const auto jl_allocgenericmemory = new JuliaFunction<TypeFnContextAndSizeT>{
+    XSTR(jl_alloc_genericmemory),
+    [](LLVMContext &C, Type *T_Size) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
-        return FunctionType::get(T_prjlvalue,
-            {T_prjlvalue}, false);
-    },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind}),
-            Attributes(C, {Attribute::NonNull}),
-            None); },
+        return FunctionType::get(T_prjlvalue, // new Memory
+                                {T_prjlvalue, // type
+                                T_Size        // nelements
+                                }, false); },
+        [](LLVMContext &C) {
+            AttrBuilder FnAttrs(C);
+            AttrBuilder RetAttrs(C);
+            FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly() | MemoryEffects::readOnly());
+            FnAttrs.addAttribute(Attribute::WillReturn);
+            RetAttrs.addAlignmentAttr(Align(16));
+            RetAttrs.addAttribute(Attribute::NonNull);
+            RetAttrs.addDereferenceableAttr(16);
+            return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                None); },
 };
-#define BOX_FUNC(ct,at,attrs)                                                    \
+#define BOX_FUNC(ct,at,attrs,nbytes)                                                    \
 static const auto box_##ct##_func = new JuliaFunction<>{                           \
     XSTR(jl_box_##ct),                                                           \
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C),\
             {at}, false); },                                                     \
-    attrs,                                                                       \
-}
-BOX_FUNC(int16, getInt16Ty(C), get_attrs_sext);
-BOX_FUNC(uint16, getInt16Ty(C), get_attrs_zext);
-BOX_FUNC(int32, getInt32Ty(C), get_attrs_sext);
-BOX_FUNC(uint32, getInt32Ty(C), get_attrs_zext);
-BOX_FUNC(int64, getInt64Ty(C), get_attrs_sext);
-BOX_FUNC(uint64, getInt64Ty(C), get_attrs_zext);
-BOX_FUNC(char, getCharTy(C), get_attrs_zext);
-BOX_FUNC(float32, getFloatTy(C), get_attrs_basic);
-BOX_FUNC(float64, getDoubleTy(C), get_attrs_basic);
+    [](LLVMContext &C) { return attrs(C,nbytes); },                                                                \
+}
+BOX_FUNC(int16, getInt16Ty(C), get_attrs_box_sext, 2);
+BOX_FUNC(uint16, getInt16Ty(C), get_attrs_box_zext, 2);
+BOX_FUNC(int32, getInt32Ty(C), get_attrs_box_sext, 4);
+BOX_FUNC(uint32, getInt32Ty(C), get_attrs_box_zext, 4);
+BOX_FUNC(int64, getInt64Ty(C), get_attrs_box_sext, 8);
+BOX_FUNC(uint64, getInt64Ty(C), get_attrs_box_zext, 8);
+BOX_FUNC(char, getCharTy(C), get_attrs_box_zext, 1);
+BOX_FUNC(float32, getFloatTy(C), get_attrs_box_float, 4);
+BOX_FUNC(float64, getDoubleTy(C), get_attrs_box_float, 8);
 #undef BOX_FUNC
 
 static const auto box_ssavalue_func = new JuliaFunction<TypeFnContextAndSizeT>{
@@ -1185,7 +1418,22 @@ static const auto box_ssavalue_func = new JuliaFunction<TypeFnContextAndSizeT>{
     },
     get_attrs_basic,
 };
-
+static const auto jldnd_func = new JuliaFunction<>{
+    XSTR(jl_f_donotdelete),
+    [](LLVMContext &C) {
+        return FunctionType::get(getVoidTy(C), true);
+    },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly() | MemoryEffects::readOnly());
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                Attributes(C, {}),
+                None);
+    },
+};
 
 // placeholder functions
 static const auto gcroot_flush_func = new JuliaFunction<>{
@@ -1203,23 +1451,48 @@ static const auto gc_preserve_end_func = new JuliaFunction<> {
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), {Type::getTokenTy(C)}, false); },
     nullptr,
 };
-static const auto except_enter_func = new JuliaFunction<>{
-    "julia.except_enter",
-    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C), false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::ReturnsTwice)})),
-            AttributeSet(),
-            None); },
-};
 static const auto pointer_from_objref_func = new JuliaFunction<>{
     "julia.pointer_from_objref",
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pjlvalue_ty(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived)}, false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::ReadNone), Attribute::get(C, Attribute::NoUnwind)})),
+            {PointerType::get(C, AddressSpace::Derived)}, false); },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::none());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::Speculatable);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        FnAttrs.addAttribute(Attribute::NoSync);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
+static const auto gc_loaded_func = new JuliaFunction<>{
+    "julia.gc_loaded",
+    // # memory(none) nosync nounwind speculatable willreturn norecurse
+    // declare nonnull noundef ptr(Loaded) @"julia.gc_loaded"(ptr(Tracked) nocapture nonnull noundef readnone, ptr nonnull noundef readnone)
+    //  top:
+    //   %metadata GC base pointer is ptr(Tracked)
+    //   ret addrspacecast ptr to ptr(Loaded)
+    [](LLVMContext &C) { return FunctionType::get(PointerType::get(C, AddressSpace::Loaded),
+            {JuliaType::get_prjlvalue_ty(C), getPointerTy(C)}, false); },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addAttribute(Attribute::NoSync);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::Speculatable);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        FnAttrs.addMemoryAttr(MemoryEffects::none());
+        AttrBuilder RetAttrs(C);
+        RetAttrs.addAttribute(Attribute::NonNull);
+        RetAttrs.addAttribute(Attribute::NoUndef);
+        return AttributeList::get(C, AttributeSet::get(C,FnAttrs), AttributeSet::get(C,RetAttrs),
+                { Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone, Attribute::NoCapture}),
+                  Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone}) });
+                  },
+};
 
 // julia.call represents a call with julia calling convention, it is used as
 //
@@ -1237,7 +1510,7 @@ static const auto julia_call = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-            {get_func_sig(C)->getPointerTo(),
+            {getPointerTy(C),
              T_prjlvalue}, // %f
             true); }, // %args
     get_attrs_basic,
@@ -1250,73 +1523,49 @@ static const auto julia_call2 = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-            {get_func2_sig(C)->getPointerTo(),
+            {getPointerTy(C),
              T_prjlvalue, // %arg1
              T_prjlvalue}, // %f
             true); }, // %args
     get_attrs_basic,
 };
 
+// julia.call3 is like julia.call, except that %fptr is derived rather than tracked
+static const auto julia_call3 = new JuliaFunction<>{
+    "julia.call3",
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        Type *T = PointerType::get(C, AddressSpace::Derived);
+        return FunctionType::get(T_prjlvalue,
+            {getPointerTy(C),
+             T}, // %f
+            true); }, // %args
+    get_attrs_basic,
+};
+
+
 static const auto jltuple_func = new JuliaFunction<>{XSTR(jl_f_tuple), get_func_sig, get_func_attrs};
-static const auto &builtin_func_map() {
-    static std::map<jl_fptr_args_t, JuliaFunction<>*> builtins = {
-          { jl_f_is_addr,                 new JuliaFunction<>{XSTR(jl_f_is), get_func_sig, get_func_attrs} },
-          { jl_f_typeof_addr,             new JuliaFunction<>{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} },
-          { jl_f_sizeof_addr,             new JuliaFunction<>{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} },
-          { jl_f_issubtype_addr,          new JuliaFunction<>{XSTR(jl_f_issubtype), get_func_sig, get_func_attrs} },
-          { jl_f_isa_addr,                new JuliaFunction<>{XSTR(jl_f_isa), get_func_sig, get_func_attrs} },
-          { jl_f_typeassert_addr,         new JuliaFunction<>{XSTR(jl_f_typeassert), get_func_sig, get_func_attrs} },
-          { jl_f_ifelse_addr,             new JuliaFunction<>{XSTR(jl_f_ifelse), get_func_sig, get_func_attrs} },
-          { jl_f__apply_iterate_addr,     new JuliaFunction<>{XSTR(jl_f__apply_iterate), get_func_sig, get_func_attrs} },
-          { jl_f__apply_pure_addr,        new JuliaFunction<>{XSTR(jl_f__apply_pure), get_func_sig, get_func_attrs} },
-          { jl_f__call_latest_addr,       new JuliaFunction<>{XSTR(jl_f__call_latest), get_func_sig, get_func_attrs} },
-          { jl_f__call_in_world_addr,     new JuliaFunction<>{XSTR(jl_f__call_in_world), get_func_sig, get_func_attrs} },
-          { jl_f__call_in_world_total_addr, new JuliaFunction<>{XSTR(jl_f__call_in_world_total), get_func_sig, get_func_attrs} },
-          { jl_f_throw_addr,              new JuliaFunction<>{XSTR(jl_f_throw), get_func_sig, get_func_attrs} },
-          { jl_f_tuple_addr,              jltuple_func },
-          { jl_f_svec_addr,               new JuliaFunction<>{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
-          { jl_f_applicable_addr,         new JuliaFunction<>{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
-          { jl_f_invoke_addr,             new JuliaFunction<>{XSTR(jl_f_invoke), get_func_sig, get_func_attrs} },
-          { jl_f_isdefined_addr,          new JuliaFunction<>{XSTR(jl_f_isdefined), get_func_sig, get_func_attrs} },
-          { jl_f_getfield_addr,           new JuliaFunction<>{XSTR(jl_f_getfield), get_func_sig, get_func_attrs} },
-          { jl_f_setfield_addr,           new JuliaFunction<>{XSTR(jl_f_setfield), get_func_sig, get_func_attrs} },
-          { jl_f_swapfield_addr,          new JuliaFunction<>{XSTR(jl_f_swapfield), get_func_sig, get_func_attrs} },
-          { jl_f_modifyfield_addr,        new JuliaFunction<>{XSTR(jl_f_modifyfield), get_func_sig, get_func_attrs} },
-          { jl_f_fieldtype_addr,          new JuliaFunction<>{XSTR(jl_f_fieldtype), get_func_sig, get_func_attrs} },
-          { jl_f_nfields_addr,            new JuliaFunction<>{XSTR(jl_f_nfields), get_func_sig, get_func_attrs} },
-          { jl_f__expr_addr,              new JuliaFunction<>{XSTR(jl_f__expr), get_func_sig, get_func_attrs} },
-          { jl_f__typevar_addr,           new JuliaFunction<>{XSTR(jl_f__typevar), get_func_sig, get_func_attrs} },
-          { jl_f_arrayref_addr,           new JuliaFunction<>{XSTR(jl_f_arrayref), get_func_sig, get_func_attrs} },
-          { jl_f_const_arrayref_addr,     new JuliaFunction<>{XSTR(jl_f_const_arrayref), get_func_sig, get_func_attrs} },
-          { jl_f_arrayset_addr,           new JuliaFunction<>{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} },
-          { jl_f_arraysize_addr,          new JuliaFunction<>{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} },
-          { jl_f_apply_type_addr,         new JuliaFunction<>{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} },
-          { jl_f_donotdelete_addr,        new JuliaFunction<>{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} },
-          { jl_f_compilerbarrier_addr,    new JuliaFunction<>{XSTR(jl_f_compilerbarrier), get_func_sig, get_func_attrs} },
-          { jl_f_finalizer_addr,          new JuliaFunction<>{XSTR(jl_f_finalizer), get_func_sig, get_func_attrs} },
-          { jl_f__svec_ref_addr,          new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} }
-        };
-    return builtins;
+static const auto jlintrinsic_func = new JuliaFunction<>{XSTR(jl_f_intrinsic_call), get_func3_sig, get_func_attrs};
+static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
+
+static const auto mk_builtin_func_map() {
+    auto builtin_addrs = new DenseMap<jl_value_t*, JuliaFunction<>*>();
+    for (int i = 0; i < jl_n_builtins; i++) {
+        jl_value_t *builtin = jl_builtin_instances[i];
+        if (builtin) // a couple do not have instances (e.g. IntrinsicFunction)
+            (*builtin_addrs)[builtin] = new JuliaFunction<>{StringRef(jl_builtin_f_names[i]), get_func_sig, get_func_attrs};
+    }
+    return builtin_addrs;
 }
 
-static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
+static const auto &builtin_func_map() {
+    static auto builtins = mk_builtin_func_map();
+    return *builtins;
+}
 
 static _Atomic(uint64_t) globalUniqueGeneratedNames{1};
 
 // --- code generation ---
-extern "C" {
-    jl_cgparams_t jl_default_cgparams = {1, 1, 0,
-#ifdef _OS_WINDOWS_
-        0,
-#else
-        1,
-#endif
-        (int) DICompileUnit::DebugEmissionKind::FullDebug,
-        1,
-        1,
-        jl_rettype_inferred_addr, NULL };
-}
-
 
 static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) {
     jt = jl_unwrap_unionall(jt);
@@ -1327,6 +1576,8 @@ static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) {
         return tbaa_cache.tbaa_value;
     if (jl_is_abstracttype(jt))
         return tbaa_cache.tbaa_value;
+    if (jl_is_genericmemory_type(jt) || jl_is_array_type(jt))
+        return tbaa_cache.tbaa_array;
     // If we're here, we know all subtypes are (im)mutable, even if we
     // don't know what the exact type is
     return jl_is_mutable(jt) ? tbaa_cache.tbaa_mutab : tbaa_cache.tbaa_immut;
@@ -1336,7 +1587,7 @@ static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) {
 // note that this includes jl_isbits, although codegen should work regardless
 static bool jl_is_concrete_immutable(jl_value_t* t)
 {
-    return jl_is_immutable_datatype(t) && ((jl_datatype_t*)t)->isconcretetype;
+    return jl_may_be_immutable_datatype(t) && ((jl_datatype_t*)t)->isconcretetype;
 }
 
 static bool jl_is_pointerfree(jl_value_t* t)
@@ -1436,21 +1687,32 @@ struct jl_aliasinfo_t {
 };
 
 // metadata tracking for a llvm Value* during codegen
+const uint8_t UNION_BOX_MARKER = 0x80;
 struct jl_cgval_t {
     Value *V; // may be of type T* or T, or set to NULL if ghost (or if the value has not been initialized yet, for a variable definition)
     // For unions, we may need to keep a reference to the boxed part individually.
     // If this is non-NULL, then, at runtime, we satisfy the invariant that (for the corresponding
-    // runtime values) if `(TIndex | 0x80) != 0`, then `Vboxed == V` (by value).
+    // runtime values) if `(TIndex | UNION_BOX_MARKER) != 0`, then `Vboxed == V` (by value).
     // For convenience, we also set this value of isboxed values, in which case
     // it is equal (at compile time) to V.
-    // If this is non-NULL, it is always of type `T_prjlvalue`
+
+    // If this is non-NULL (at compile time), it is always of type `T_prjlvalue`.
+    // N.B.: In general we expect this to always be a dereferenceable pointer at runtime.
+    //       However, there are situations where this value may be a runtime NULL
+    //       (PhiNodes with undef predecessors or PhiC with undef UpsilonNode).
+    //       The middle-end arranges appropriate error checks before any use
+    //       of this value that may read a non-dereferenceable Vboxed, with two
+    //       exceptions: PhiNode and UpsilonNode arguments which need special
+    //       handling to account for the possibility that this may be NULL.
     Value *Vboxed;
+
     Value *TIndex; // if `V` is an unboxed (tagged) Union described by `typ`, this gives the DataType index (1-based, small int) as an i8
+    SmallVector<Value*,0> inline_roots; // if present, `V` is a pointer, but not in canonical layout
     jl_value_t *constant; // constant value (rooted in linfo.def.roots)
-    jl_value_t *typ; // the original type of V, never NULL
+    jl_value_t *typ; // the original type of V, never nullptr
     bool isboxed; // whether this value is a jl_value_t* allocated on the heap with the right type tag
     bool isghost; // whether this value is "ghost"
-    MDNode *tbaa; // The related tbaa node. Non-NULL iff this holds an address.
+    MDNode *tbaa; // The related tbaa node. Non-nullptr iff this holds an address.
     // If non-null, this memory location may be promoted on use, by hoisting the
     // destination memory above the promotion point.
     Instruction *promotion_point;
@@ -1461,13 +1723,15 @@ struct jl_cgval_t {
     bool ispointer() const
     {
         // whether this value is compatible with `data_pointer`
+        assert(inline_roots.empty());
         return tbaa != nullptr;
     }
     jl_cgval_t(Value *Vval, jl_value_t *typ, Value *tindex) : // general value constructor
-        V(Vval), // V is allowed to be NULL in a jl_varinfo_t context, but not during codegen contexts
+        V(Vval), // V is allowed to be nullptr in a jl_varinfo_t context, but not during codegen contexts
         Vboxed(nullptr),
         TIndex(tindex),
-        constant(NULL),
+        inline_roots(),
+        constant(nullptr),
         typ(typ),
         isboxed(false),
         isghost(false),
@@ -1475,13 +1739,15 @@ struct jl_cgval_t {
         promotion_point(nullptr),
         promotion_ssa(-1)
     {
-        assert(TIndex == NULL || TIndex->getType() == getInt8Ty(TIndex->getContext()));
+        assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext()));
     }
-    jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa) : // general pointer constructor
+    jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, Value* inline_roots) = delete;
+    jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, ArrayRef<Value*> inline_roots) : // general pointer constructor
         V(Vptr),
         Vboxed(isboxed ? Vptr : nullptr),
         TIndex(tindex),
-        constant(NULL),
+        inline_roots(inline_roots),
+        constant(nullptr),
         typ(typ),
         isboxed(isboxed),
         isghost(false),
@@ -1491,15 +1757,16 @@ struct jl_cgval_t {
     {
         if (Vboxed)
             assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext()));
-        assert(tbaa != NULL);
-        assert(!(isboxed && TIndex != NULL));
-        assert(TIndex == NULL || TIndex->getType() == getInt8Ty(TIndex->getContext()));
+        assert(tbaa != nullptr);
+        assert(!(isboxed && TIndex != nullptr));
+        assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext()));
     }
     explicit jl_cgval_t(jl_value_t *typ) : // ghost value constructor
-        // mark explicit to avoid being used implicitly for conversion from NULL (use jl_cgval_t() instead)
-        V(NULL),
-        Vboxed(NULL),
-        TIndex(NULL),
+        // mark explicit to avoid being used implicitly for conversion from nullptr (use jl_cgval_t() instead)
+        V(nullptr),
+        Vboxed(nullptr),
+        TIndex(nullptr),
+        inline_roots(),
         constant(((jl_datatype_t*)typ)->instance),
         typ(typ),
         isboxed(false),
@@ -1515,6 +1782,7 @@ struct jl_cgval_t {
         V(v.V),
         Vboxed(v.Vboxed),
         TIndex(tindex),
+        inline_roots(v.inline_roots),
         constant(v.constant),
         typ(typ),
         isboxed(v.isboxed),
@@ -1528,17 +1796,18 @@ struct jl_cgval_t {
         // this constructor expects we had a badly or equivalently typed version
         // make sure we aren't discarding the actual type information
         if (v.TIndex) {
-            assert((TIndex == NULL) == jl_is_concrete_type(typ));
+            assert((TIndex == nullptr) == jl_is_concrete_type(typ));
         }
         else {
             assert(isboxed || v.typ == typ || tindex);
         }
     }
     explicit jl_cgval_t() : // undef / unreachable constructor
-        V(NULL),
-        Vboxed(NULL),
-        TIndex(NULL),
-        constant(NULL),
+        V(nullptr),
+        Vboxed(nullptr),
+        TIndex(nullptr),
+        inline_roots(),
+        constant(nullptr),
         typ(jl_bottom_type),
         isboxed(false),
         isghost(true),
@@ -1554,6 +1823,7 @@ struct jl_varinfo_t {
     Instruction *boxroot; // an address, if the var might be in a jl_value_t** stack slot (marked ctx.tbaa().tbaa_const, if appropriate)
     jl_cgval_t value; // a stack slot or constant value
     Value *pTIndex; // i8* stack slot for the value.TIndex tag describing `value.V`
+    AllocaInst *inline_roots; // stack roots for the inline_roots array, if needed
     DILocalVariable *dinfo;
     // if the variable might be used undefined and is not boxed
     // this i1 flag is true when it is defined
@@ -1564,11 +1834,12 @@ struct jl_varinfo_t {
     bool usedUndef;
     bool used;
 
-    jl_varinfo_t(LLVMContext &ctxt) : boxroot(NULL),
+    jl_varinfo_t(LLVMContext &ctxt) : boxroot(nullptr),
                      value(jl_cgval_t()),
-                     pTIndex(NULL),
-                     dinfo(NULL),
-                     defFlag(NULL),
+                     pTIndex(nullptr),
+                     inline_roots(nullptr),
+                     dinfo(nullptr),
+                     defFlag(nullptr),
                      isSA(false),
                      isVolatile(false),
                      isArgument(false),
@@ -1585,17 +1856,17 @@ class jl_codectx_t {
     IRBuilder<> builder;
     jl_codegen_params_t &emission_context;
     llvm::MapVector<jl_code_instance_t*, jl_codegen_call_target_t> call_targets;
-    std::map<void*, GlobalVariable*> &global_targets;
-    std::map<std::tuple<jl_code_instance_t*, bool>, GlobalVariable*> &external_calls;
     Function *f = NULL;
+    MDNode* LoopID = NULL;
     // local var info. globals are not in here.
-    std::vector<jl_varinfo_t> slots;
+    SmallVector<jl_varinfo_t, 0> slots;
     std::map<int, jl_varinfo_t> phic_slots;
-    std::vector<jl_cgval_t> SAvalues;
-    std::vector<std::tuple<jl_cgval_t, BasicBlock *, AllocaInst *, PHINode *, jl_value_t *>> PhiNodes;
-    std::vector<bool> ssavalue_assigned;
-    std::vector<int> ssavalue_usecount;
-    std::vector<orc::ThreadSafeModule> oc_modules;
+    std::map<int, std::pair<Value*, Value*> > scope_restore;
+    std::map<jl_value_t*, AllocaInst*> eh_buffers;
+    SmallVector<jl_cgval_t, 0> SAvalues;
+    SmallVector<std::tuple<jl_cgval_t, BasicBlock *, AllocaInst *, PHINode *, SmallVector<PHINode*,0>, jl_value_t *>, 0> PhiNodes;
+    SmallVector<bool, 0> ssavalue_assigned;
+    SmallVector<int, 0> ssavalue_usecount;
     jl_module_t *module = NULL;
     jl_typecache_t type_cache;
     jl_tbaacache_t tbaa_cache;
@@ -1604,10 +1875,11 @@ class jl_codectx_t {
     jl_value_t *rettype = NULL;
     jl_code_info_t *source = NULL;
     jl_array_t *code = NULL;
-    size_t world = 0;
+    size_t min_world = 0;
+    size_t max_world = -1;
     const char *name = NULL;
     StringRef file{};
-    ssize_t *line = NULL;
+    int32_t line = -1;
     Value *spvals_ptr = NULL;
     Value *argArray = NULL;
     Value *argCount = NULL;
@@ -1620,23 +1892,27 @@ class jl_codectx_t {
 
     Value *pgcstack = NULL;
     Instruction *topalloca = NULL;
+    Value *world_age_at_entry = NULL;
 
     bool use_cache = false;
     bool external_linkage = false;
     const jl_cgparams_t *params = NULL;
 
-    std::vector<std::unique_ptr<Module>> llvmcall_modules;
+    SmallVector<std::unique_ptr<Module>, 0> llvmcall_modules;
 
-    jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t &params)
+    jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t &params, size_t min_world, size_t max_world)
       : builder(llvmctx),
         emission_context(params),
         call_targets(),
-        global_targets(params.globals),
-        external_calls(params.external_fns),
-        world(params.world),
+        min_world(min_world),
+        max_world(max_world),
         use_cache(params.cache),
         external_linkage(params.external_linkage),
-        params(params.params) { }
+        params(params.params) {
+    }
+
+    jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t &params, jl_code_instance_t *ci) :
+        jl_codectx_t(llvmctx, params, jl_atomic_load_relaxed(&ci->min_world), jl_atomic_load_relaxed(&ci->max_world)) {}
 
     jl_typecache_t &types() {
         type_cache.initialize(builder.getContext(), emission_context.DL);
@@ -1736,28 +2012,30 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) {
 }
 
 static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL);
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg);
+static jl_returninfo_t get_specsig_function(jl_codegen_params_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure,
+        ArrayRef<const char*> ArgNames=None, unsigned nreq=0);
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
-static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
-                                     jl_binding_t **pbnd, bool assign);
-static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa);
+static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, jl_value_t *scope, bool isvol, MDNode *tbaa);
 static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
-static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg);
+static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const Twine &msg);
 static Value *get_current_task(jl_codectx_t &ctx);
 static Value *get_current_ptls(jl_codectx_t &ctx);
-static Value *get_last_age_field(jl_codectx_t &ctx);
+static Value *get_tls_world_age(jl_codectx_t &ctx);
+static Value *get_scope_field(jl_codectx_t &ctx);
+static Value *get_tls_world_age_field(jl_codectx_t &ctx);
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
-static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
-                             const jl_cgval_t *args, size_t nargs, JuliaFunction<> *trampoline);
+static CallInst *emit_jlcall(jl_codectx_t &ctx, Value *theFptr, Value *theF,
+                             ArrayRef<jl_cgval_t> args, size_t nargs, JuliaFunction<> *trampoline);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value *theF,
-                             const jl_cgval_t *args, size_t nargs, JuliaFunction<> *trampoline);
+                             ArrayRef<jl_cgval_t> args, size_t nargs, JuliaFunction<> *trampoline);
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
-static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool is_promotable=false);
-static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt);
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, ArrayRef<jl_cgval_t> argv, bool is_promotable=false);
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayRef<jl_cgval_t> argv, size_t nargs, jl_value_t *rt, bool always_inline);
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
-static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
+static unsigned julia_alignment(jl_value_t *jt);
+static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatile);
 
 static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G)
 {
@@ -1778,53 +2056,96 @@ static inline GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G)
     if (!local) {
         // Copy the GlobalVariable, but without the initializer, so it becomes a declaration
         GlobalVariable *proto = new GlobalVariable(*M, G->getValueType(),
-                G->isConstant(), GlobalVariable::ExternalLinkage,
+                G->isConstant(), G->getLinkage(),
                 nullptr, G->getName(), nullptr, G->getThreadLocalMode());
+        if (proto->hasLocalLinkage()) {
+            proto->setInitializer(G->getInitializer());
+        }
         proto->copyAttributesFrom(G);
-        // DLLImport only needs to be set for the shadow module
-        // it just gets annoying in the JIT
-        proto->setDLLStorageClass(GlobalValue::DefaultStorageClass);
         return proto;
     }
     return cast<GlobalVariable>(local);
 }
 
+static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, size_t byte_offset, const Twine &Name="")
+{
+    auto *gep = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), base, byte_offset);
+    setName(ctx.emission_context, gep, Name);
+    return gep;
+}
+
+static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, Value *byte_offset, const Twine &Name="")
+{
+    auto *gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), base, byte_offset, Name);
+    setName(ctx.emission_context, gep, Name);
+    return gep;
+}
+
 
 // --- convenience functions for tagging llvm values with julia types ---
 
-static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_context, Constant *val, StringRef name, Module &M)
+static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_context, Constant *val, Align align, const Twine &name, Module &M)
 {
     GlobalVariable *&gv = emission_context.mergedConstants[val];
-    StringRef localname;
-    std::string ssno;
-    if (gv == nullptr) {
-        raw_string_ostream(ssno) << name << emission_context.mergedConstants.size();
-        localname = StringRef(ssno);
-    }
-    else {
-        localname = gv->getName();
-        if (gv->getParent() != &M)
-            gv = cast_or_null<GlobalVariable>(M.getNamedValue(localname));
-    }
-    if (gv == nullptr) {
-        gv = new GlobalVariable(
+    auto get_gv = [&](const Twine &name) {
+        auto gv = new GlobalVariable(
                 M,
                 val->getType(),
                 true,
                 GlobalVariable::PrivateLinkage,
                 val,
-                localname);
+                name);
         gv->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+        gv->setAlignment(align);
+        return gv;
+    };
+    if (gv == nullptr) {
+        gv = get_gv(name + "#" + Twine(emission_context.mergedConstants.size()));
+    }
+    else if (gv->getParent() != &M) {
+        StringRef gvname = gv->getName();
+        gv = M.getNamedGlobal(gvname);
+        if (!gv) {
+            gv = get_gv(gvname);
+        }
     }
-    assert(localname == gv->getName());
+    assert(gv->getName().starts_with(name.str()));
     assert(val == gv->getInitializer());
     return gv;
 }
 
-static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty)
+static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty, Align align)
 {
     ++EmittedAllocas;
-    return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), "", /*InsertBefore=*/ctx.topalloca);
+    return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), nullptr, align, "",
+#if JL_LLVM_VERSION >= 200000
+                /*InsertBefore=*/ctx.topalloca->getIterator()
+#else
+                /*InsertBefore=*/ctx.topalloca
+#endif
+    );
+}
+
+static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, unsigned nb, Align align)
+{
+    // Stupid hack: SROA takes hints from the element type, and will happily split this allocation into lots of unaligned bits
+    // if it cannot find something better to do, which is terrible for performance.
+    // However, if we emit this with an element size equal to the alignment, it will instead split it into aligned chunks
+    // which is great for performance and vectorization.
+    if (alignTo(nb, align) == align.value()) // don't bother with making an array of length 1
+        return emit_static_alloca(ctx, ctx.builder.getIntNTy(align.value() * 8), align);
+    return emit_static_alloca(ctx, ArrayType::get(ctx.builder.getIntNTy(align.value() * 8), alignTo(nb, align) / align.value()), align);
+}
+
+static AllocaInst *emit_static_roots(jl_codectx_t &ctx, unsigned nroots)
+{
+    AllocaInst *staticroots = emit_static_alloca(ctx, ctx.types().T_prjlvalue, Align(sizeof(void*)));
+    staticroots->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nroots));
+    IRBuilder<> builder(ctx.topalloca);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+    // make sure these are nullptr early from LLVM's perspective, in case it decides to SROA it
+    ai.decorateInst(builder.CreateMemSet(staticroots, builder.getInt8(0), nroots * sizeof(void*), staticroots->getAlign()))->moveAfter(ctx.topalloca);
+    return staticroots;
 }
 
 static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *sty, MDNode *tbaa)
@@ -1833,13 +2154,12 @@ static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *st
     size_t first_offset = sty->layout->nfields ? jl_field_offset(sty, 0) : 0;
     if (first_offset != 0)
         ctx.builder.CreateMemSet(ptr, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), first_offset, MaybeAlign(0));
-    size_t i, np = sty->layout->npointers;
-    if (np == 0)
+    if (sty->layout->first_ptr < 0)
         return;
+    size_t i, np = sty->layout->npointers;
     auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx.builder.getContext());
-    ptr = ctx.builder.CreateBitCast(ptr, T_prjlvalue->getPointerTo(ptr->getType()->getPointerAddressSpace()));
     for (i = 0; i < np; i++) {
-        Value *fld = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, jl_ptr_offset(sty, i));
+        Value *fld = emit_ptrgep(ctx, ptr, jl_ptr_offset(sty, i) * sizeof(jl_value_t*));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(T_prjlvalue), fld));
     }
@@ -1852,8 +2172,8 @@ static Value *emit_inttoptr(jl_codectx_t &ctx, Value *v, Type *ty)
     if (auto I = dyn_cast<PtrToIntInst>(v)) {
         auto ptr = I->getOperand(0);
         if (ty->getPointerAddressSpace() == ptr->getType()->getPointerAddressSpace())
-            return ctx.builder.CreateBitCast(ptr, ty);
-        else if (cast<PointerType>(ty)->hasSameElementTypeAs(cast<PointerType>(ptr->getType())))
+            return ptr;
+        else
             return ctx.builder.CreateAddrSpaceCast(ptr, ty);
     }
     ++EmittedIntToPtrs;
@@ -1871,8 +2191,10 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_value_t *typ)
     if (jl_is_type_type(typ)) {
         assert(is_uniquerep_Type(typ));
         // replace T::Type{T} with T, by assuming that T must be a leaftype of some sort
-        jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
         constant.constant = jl_tparam0(typ);
+        if (typ == (jl_value_t*)jl_typeofbottom_type->super)
+            constant.isghost = true;
         return constant;
     }
     return jl_cgval_t(typ);
@@ -1885,7 +2207,7 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_datatype_t *typ)
 static inline jl_cgval_t mark_julia_const(jl_codectx_t &ctx, jl_value_t *jv)
 {
     jl_value_t *typ;
-    if (jl_is_type(jv)) {
+    if (jl_is_type(jv) && jv != jl_bottom_type) {
         typ = (jl_value_t*)jl_wrap_Type(jv); // TODO: gc-root this?
     }
     else {
@@ -1893,16 +2215,16 @@ static inline jl_cgval_t mark_julia_const(jl_codectx_t &ctx, jl_value_t *jv)
         if (jl_is_datatype_singleton((jl_datatype_t*)typ))
             return ghostValue(ctx, typ);
     }
-    jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+    jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
     constant.constant = jv;
     return constant;
 }
 
 
-static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, MDNode *tbaa)
+static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, MDNode *tbaa, ArrayRef<Value*> inline_roots=None)
 {
     // this enables lazy-copying of immutable values and stack or argument slots
-    jl_cgval_t tagval(v, false, typ, tindex, tbaa);
+    jl_cgval_t tagval(v, false, typ, tindex, tbaa, inline_roots);
     return tagval;
 }
 
@@ -1920,20 +2242,43 @@ static bool valid_as_globalinit(const Value *v) {
     return isa<Constant>(v);
 }
 
+static Value *zext_struct(jl_codectx_t &ctx, Value *V);
+
+// TODO: in the future, assume all callers will handle the interior pointers separately, and have
+// have zext_struct strip them out, so we aren't saving those to the stack here causing shadow stores
+// to be necessary too
 static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_t *typ, Value *tindex)
 {
     Value *loc;
+    v = zext_struct(ctx, v);
+    Align align(julia_alignment(typ));
     if (valid_as_globalinit(v)) { // llvm can't handle all the things that could be inside a ConstantExpr
-        loc = get_pointer_to_constant(ctx.emission_context, cast<Constant>(v), "_j_const", *jl_Module);
+        assert(jl_is_concrete_type(typ)); // not legal to have an unboxed abstract type
+        loc = get_pointer_to_constant(ctx.emission_context, cast<Constant>(v), align, "_j_const", *jl_Module);
     }
     else {
-        loc = emit_static_alloca(ctx, v->getType());
-        ctx.builder.CreateStore(v, loc);
+        loc = emit_static_alloca(ctx, v->getType(), align);
+        ctx.builder.CreateAlignedStore(v, loc, align);
     }
     return mark_julia_slot(loc, typ, tindex, ctx.tbaa().tbaa_stack);
 }
 static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, const jl_cgval_t &v)
 {
+    if (!v.inline_roots.empty()) {
+        //if (v.V == nullptr) {
+        //    AllocaInst *loc = emit_static_roots(ctx, v.inline_roots.size());
+        //    for (size_t i = 0; i < v.inline_roots.counts(); i++)
+        //        ctx.builder.CreateAlignedStore(v.inline_roots[i], emit_ptrgep(ctx, loc, i * sizeof(void*)), Align(sizeof(void*)));
+        //    return mark_julia_slot(loc, v.typ, v.TIndex, ctx.tbaa().tbaa_gcframe);
+        //}
+        Align align(julia_alignment(v.typ));
+        Type *ty = julia_type_to_llvm(ctx, v.typ);
+        AllocaInst *loc = emit_static_alloca(ctx, ty, align);
+        auto tbaa = v.V == nullptr ? ctx.tbaa().tbaa_gcframe : ctx.tbaa().tbaa_stack;
+        auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        recombine_value(ctx, v, loc, stack_ai, align, false);
+        return mark_julia_slot(loc, v.typ, v.TIndex, tbaa);
+    }
     if (v.ispointer())
         return v;
     return value_to_pointer(ctx, v.V, v.typ, v.TIndex);
@@ -1946,7 +2291,8 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
             // replace T::Type{T} with T
             return ghostValue(ctx, typ);
         }
-    } else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
+    }
+    else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
         // no need to explicitly load/store a constant/ghost value
         return ghostValue(ctx, typ);
     }
@@ -1954,13 +2300,14 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
     if (type_is_ghost(T)) {
         return ghostValue(ctx, typ);
     }
-    if (v && !isboxed && v->getType()->isAggregateType() && CountTrackedPointers(v->getType()).count == 0) {
+    if (v && !isboxed && v->getType()->isAggregateType()) {
         // eagerly put this back onto the stack
         // llvm mem2reg pass will remove this if unneeded
-        return value_to_pointer(ctx, v, typ, NULL);
+        if (CountTrackedPointers(v->getType()).count == 0)
+            return value_to_pointer(ctx, v, typ, NULL);
     }
     if (isboxed)
-        return jl_cgval_t(v, isboxed, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        return jl_cgval_t(v, isboxed, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
     return jl_cgval_t(v, typ, NULL);
 }
 
@@ -1995,7 +2342,7 @@ static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t &
             if (alwaysboxed) {
                 // discovered that this union-split type must actually be isboxed
                 if (v.Vboxed) {
-                    return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+                    return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), v.inline_roots);
                 }
                 else {
                     // type mismatch (there weren't any boxed values in the union)
@@ -2038,7 +2385,7 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 {
     assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things");
     if (vi.usedUndef) {
-        vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()));
+        vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()), Align(1));
         setName(ctx.emission_context, vi.defFlag, "isdefined");
         store_def_flag(ctx, vi, false);
     }
@@ -2047,21 +2394,14 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 
 // --- utilities ---
 
-static Constant *undef_value_for_type(Type *T) {
-    auto tracked = CountTrackedPointers(T);
-    Constant *undef;
-    if (tracked.count)
-        // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
-        undef = Constant::getNullValue(T);
-    else
-        undef = UndefValue::get(T);
-    return undef;
-}
-
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block)
 {
     Function *f = irbuilder.GetInsertBlock()->getParent();
+#if JL_LLVM_VERSION >= 200000
+    Function *trap_func = Intrinsic::getOrInsertDeclaration(
+#else
     Function *trap_func = Intrinsic::getDeclaration(
+#endif
             f->getParent(),
             Intrinsic::trap);
     irbuilder.CreateCall(trap_func);
@@ -2084,7 +2424,11 @@ static void CreateConditionalAbort(IRBuilder<> &irbuilder, Value *test)
     BasicBlock *postBB = BasicBlock::Create(irbuilder.getContext(), "post_abort", f);
     irbuilder.CreateCondBr(test, abortBB, postBB);
     irbuilder.SetInsertPoint(abortBB);
+#if JL_LLVM_VERSION >= 200000
+    Function *trap_func = Intrinsic::getOrInsertDeclaration(
+#else
     Function *trap_func = Intrinsic::getDeclaration(
+#endif
             f->getParent(),
             Intrinsic::trap);
     irbuilder.CreateCall(trap_func);
@@ -2100,7 +2444,7 @@ static void CreateConditionalAbort(IRBuilder<> &irbuilder, Value *test)
 static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
 {
     // previous value was a split union, compute new index, or box
-    Value *new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+    Value *new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
     SmallBitVector skip_box(1, true);
     Value *tindex = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
     if (jl_is_uniontype(typ)) {
@@ -2143,14 +2487,14 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
     // some of the values are still unboxed
     if (!isa<Constant>(new_tindex)) {
         Value *wasboxed = NULL;
-        // If the old value was boxed and unknown (type tag 0x80),
+        // If the old value was boxed and unknown (type tag UNION_BOX_MARKER),
         // it is possible that the tag was actually one of the types
         // that are now explicitly represented. To find out, we need
         // to compare typeof(v.Vboxed) (i.e. the type of the unknown
         // value) against all the types that are now explicitly
         // selected and select the appropriate one as our new tindex.
         if (v.Vboxed) {
-            wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+            wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
             new_tindex = ctx.builder.CreateOr(wasboxed, new_tindex);
             wasboxed = ctx.builder.CreateICmpNE(wasboxed, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             setName(ctx.emission_context, wasboxed, "wasboxed");
@@ -2172,10 +2516,10 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
             };
 
             // If we don't find a match. The type remains unknown
-            // (0x80). We could use `v.Tindex`, here, since we know
-            // it has to be 0x80, but it seems likely the backend
+            // (UNION_BOX_MARKER). We could use `v.Tindex`, here, since we know
+            // it has to be UNION_BOX_MARKER, but it seems likely the backend
             // will like the explicit constant better.
-            Value *union_box_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+            Value *union_box_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
             unsigned counter = 0;
             for_each_uniontype_small(
                 // for each new union-split value
@@ -2185,7 +2529,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                         // didn't handle this item before, select its new union index
                         maybe_setup_union_isa();
                         Value *cmp = ctx.builder.CreateICmpEQ(emit_tagfrom(ctx, jt), union_box_dt);
-                        union_box_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80 | idx), union_box_tindex);
+                        union_box_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER | idx), union_box_tindex);
                     }
                 },
                 typ,
@@ -2195,7 +2539,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_isa", ctx.f);
                 ctx.builder.CreateBr(postBB);
                 ctx.builder.SetInsertPoint(currBB);
-                Value *wasunknown = ctx.builder.CreateICmpEQ(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                Value *wasunknown = ctx.builder.CreateICmpEQ(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
                 ctx.builder.CreateCondBr(wasunknown, union_isaBB, postBB);
                 ctx.builder.SetInsertPoint(postBB);
                 PHINode *tindex_phi = ctx.builder.CreatePHI(getInt8Ty(ctx.builder.getContext()), 2);
@@ -2207,14 +2551,14 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
         }
         if (!skip_box.all()) {
             // some values weren't unboxed in the new union
-            // box them now (tindex above already selected 0x80 = box for them)
+            // box them now (tindex above already selected UNION_BOX_MARKER = box for them)
             Value *boxv = box_union(ctx, v, skip_box);
             if (v.Vboxed) {
                 // If the value is boxed both before and after, we don't need
                 // to touch it at all. Otherwise we're either transitioning
                 // unboxed->boxed, or leaving an unboxed value in place.
                 Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(new_tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(new_tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                 boxv = ctx.builder.CreateSelect(
                     ctx.builder.CreateAnd(wasboxed, isboxed), v.Vboxed, boxv);
@@ -2233,22 +2577,22 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 tbaa = oldv.tbaa;
                 slotv = ctx.builder.CreateSelect(isboxv,
                             decay_derived(ctx, boxv),
-                            decay_derived(ctx, emit_bitcast(ctx, slotv, boxv->getType())));
+                            decay_derived(ctx, slotv));
             }
-            jl_cgval_t newv = jl_cgval_t(slotv, false, typ, new_tindex, tbaa);
+            jl_cgval_t newv = jl_cgval_t(slotv, false, typ, new_tindex, tbaa, v.inline_roots);
             assert(boxv->getType() == ctx.types().T_prjlvalue);
             newv.Vboxed = boxv;
             return newv;
         }
     }
     else {
-        return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
     }
     return jl_cgval_t(v, typ, new_tindex);
 }
 
 // given a value marked with type `v.typ`, compute the mapping and/or boxing to return a value of type `typ`
-// TODO: should this set TIndex when trivial (such as 0x80 or concrete types) ?
+// TODO: should this set TIndex when trivial (such as UNION_BOX_MARKER or concrete types) ?
 static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
 {
     if (typ == (jl_value_t*)jl_typeofbottom_type)
@@ -2260,29 +2604,28 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
         return ghostValue(ctx, typ);
     Value *new_tindex = NULL;
     if (jl_is_concrete_type(typ)) {
-        if (v.TIndex && !jl_is_pointerfree(typ)) {
-            // discovered that this union-split type must actually be isboxed
-            if (v.Vboxed) {
-                return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
-            }
-            else {
-                // type mismatch: there weren't any boxed values in the union
-                if (skip)
-                    *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
-                else
-                    CreateTrap(ctx.builder);
-                return jl_cgval_t();
-            }
+        if (jl_is_concrete_type(v.typ)) {
+            // type mismatch: changing from one leaftype to another
+            if (skip)
+                *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
+            else
+                CreateTrap(ctx.builder);
+            return jl_cgval_t();
         }
-        if (jl_is_concrete_type(v.typ) && !jl_is_kind(v.typ)) {
-            if (jl_is_concrete_type(typ) && !jl_is_kind(typ)) {
-                // type mismatch: changing from one leaftype to another
-                if (skip)
-                    *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
-                else
-                    CreateTrap(ctx.builder);
-                return jl_cgval_t();
+        bool mustbox_union = v.TIndex && !jl_is_pointerfree(typ);
+        if (v.Vboxed && (v.isboxed || mustbox_union)) {
+            if (skip) {
+                *skip = ctx.builder.CreateNot(emit_exactly_isa(ctx, v, (jl_datatype_t*)typ, true));
             }
+            return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), v.inline_roots);
+        }
+        if (mustbox_union) {
+            // type mismatch: there weren't any boxed values in the union
+            if (skip)
+                *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
+            else
+                CreateTrap(ctx.builder);
+            return jl_cgval_t();
         }
     }
     else {
@@ -2296,9 +2639,9 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
             unsigned new_idx = get_box_tindex((jl_datatype_t*)v.typ, typ);
             if (new_idx) {
                 new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), new_idx);
-                if (v.V && !v.ispointer()) {
+                if (v.V && v.inline_roots.empty() && !v.ispointer()) {
                     // TODO: remove this branch once all consumers of v.TIndex understand how to handle a non-ispointer value
-                    return value_to_pointer(ctx, v.V, typ, new_tindex);
+                    return jl_cgval_t(value_to_pointer(ctx, v), typ, new_tindex);
                 }
             }
             else if (jl_subtype(v.typ, typ)) {
@@ -2320,27 +2663,19 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
         }
         if (makeboxed) {
             // convert to a simple isboxed value
-            return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+            return mark_julia_type(ctx, boxed(ctx, v), true, typ);
         }
     }
     return jl_cgval_t(v, typ, new_tindex);
 }
 
-std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &context, bool imaging_mode, const DataLayout &DL, const Triple &triple)
+std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &context, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT
 {
     ++ModulesCreated;
     auto m = std::make_unique<Module>(name, context);
-    // Some linkers (*cough* OS X) don't understand DWARF v4, so we use v2 in
-    // imaging mode. The structure of v4 is slightly nicer for debugging JIT
-    // code.
+    // According to clang darwin above 10.10 supports dwarfv4
     if (!m->getModuleFlag("Dwarf Version")) {
-        int dwarf_version = 4;
-    if (triple.isOSDarwin()) {
-        if (imaging_mode) {
-            dwarf_version = 2;
-        }
-    }
-    m->addModuleFlag(llvm::Module::Warning, "Dwarf Version", dwarf_version);
+        m->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 4);
     }
     if (!m->getModuleFlag("Debug Info Version"))
         m->addModuleFlag(llvm::Module::Warning, "Debug Info Version",
@@ -2361,23 +2696,26 @@ std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &conte
     return m;
 }
 
-static void jl_name_jlfunc_args(jl_codegen_params_t &params, Function *F) {
+static void jl_name_jlfunc_args(jl_codegen_params_t &params, Function *F) JL_NOTSAFEPOINT
+{
     assert(F->arg_size() == 3);
-    F->getArg(0)->setName("function");
-    F->getArg(1)->setName("args");
-    F->getArg(2)->setName("nargs");
+    F->getArg(0)->setName("function::Core.Function");
+    F->getArg(1)->setName("args::Any[]");
+    F->getArg(2)->setName("nargs::UInt32");
 }
 
-static void jl_name_jlfuncparams_args(jl_codegen_params_t &params, Function *F) {
+static void jl_name_jlfuncparams_args(jl_codegen_params_t &params, Function *F) JL_NOTSAFEPOINT
+{
     assert(F->arg_size() == 4);
-    F->getArg(0)->setName("function");
-    F->getArg(1)->setName("args");
-    F->getArg(2)->setName("nargs");
-    F->getArg(3)->setName("sparams");
+    F->getArg(0)->setName("function::Core.Function");
+    F->getArg(1)->setName("args::Any[]");
+    F->getArg(2)->setName("nargs::UInt32");
+    F->getArg(3)->setName("sparams::Any");
 }
 
-static void jl_init_function(Function *F, const Triple &TT)
+void jl_init_function(Function *F, const jl_codegen_params_t &params) JL_NOTSAFEPOINT
 {
+    auto &TT = params.TargetTriple;
     // set any attributes that *must* be set on all functions
     AttrBuilder attr(F->getContext());
     if (TT.isOSWindows() && TT.getArch() == Triple::x86) {
@@ -2387,34 +2725,25 @@ static void jl_init_function(Function *F, const Triple &TT)
         attr.addStackAlignmentAttr(16);
     }
     if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
-#if JL_LLVM_VERSION < 150000
-        attr.addAttribute(Attribute::UWTable); // force NeedsWinEH
-#else
         attr.addUWTableAttr(llvm::UWTableKind::Default); // force NeedsWinEH
-#endif
     }
-    if (jl_fpo_disabled(TT))
-        attr.addAttribute("frame-pointer", "all");
-    if (!TT.isOSWindows()) {
-#if !defined(_COMPILER_ASAN_ENABLED_)
+    attr.addAttribute("frame-pointer", "all");
+    if (!TT.isOSWindows() && !JL_FEAT_TEST(params, sanitize_address)) {
         // ASAN won't like us accessing undefined memory causing spurious issues,
         // and Windows has platform-specific handling which causes it to mishandle
         // this annotation. Other platforms should just ignore this if they don't
         // implement it.
         attr.addAttribute("probe-stack", "inline-asm");
         //attr.addAttribute("stack-probe-size", "4096"); // can use this to change the default
-#endif
     }
-#if defined(_COMPILER_ASAN_ENABLED_)
-    attr.addAttribute(Attribute::SanitizeAddress);
-#endif
-#if defined(_COMPILER_MSAN_ENABLED_)
-    attr.addAttribute(Attribute::SanitizeMemory);
-#endif
+    if (JL_FEAT_TEST(params, sanitize_address))
+        attr.addAttribute(Attribute::SanitizeAddress);
+    if (JL_FEAT_TEST(params, sanitize_memory))
+        attr.addAttribute(Attribute::SanitizeMemory);
     F->addFnAttrs(attr);
 }
 
-static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t *rettype, bool prefer_specsig)
+static bool uses_specsig(jl_value_t *sig, bool needsparams, jl_value_t *rettype, bool prefer_specsig)
 {
     if (needsparams)
         return false;
@@ -2424,11 +2753,10 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t
         return false;
     if (jl_nparams(sig) == 0)
         return false;
-    if (va) {
-        if (jl_is_vararg(jl_tparam(sig, jl_nparams(sig) - 1)))
-            return false;
-    }
+    if (jl_vararg_kind(jl_tparam(sig, jl_nparams(sig) - 1)) == JL_VARARG_UNBOUND)
+        return false;
     // not invalid, consider if specialized signature is worthwhile
+    // n.b. sig is sometimes wrong for OC (tparam0 might be the captures type of the specialization, even though what gets passed in that slot is an OC object), so prefer_specsig is always set (instead of recomputing tparam0 using get_oc_type)
     if (prefer_specsig)
         return true;
     if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype) && rettype != (jl_value_t*)jl_bool_type)
@@ -2445,6 +2773,7 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t
     bool allSingleton = true;
     for (size_t i = 0; i < jl_nparams(sig); i++) {
         jl_value_t *sigt = jl_tparam(sig, i);
+        // TODO: sigt = unwrap_va(sigt)
         bool issing = jl_is_datatype(sigt) && jl_is_datatype_singleton((jl_datatype_t*)sigt);
         allSingleton &= issing;
         if (!deserves_argbox(sigt) && !issing) {
@@ -2456,10 +2785,8 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t
     return false; // jlcall sig won't require any box allocations
 }
 
-static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t *rettype, bool prefer_specsig)
+static std::pair<bool, bool> uses_specsig(jl_value_t *abi, jl_method_instance_t *lam, jl_value_t *rettype, bool prefer_specsig)
 {
-    int va = lam->def.method->isva;
-    jl_value_t *sig = lam->specTypes;
     bool needsparams = false;
     if (jl_is_method(lam->def.method)) {
         if ((size_t)jl_subtype_env_size(lam->def.method->sig) != jl_svec_len(lam->sparam_vals))
@@ -2469,7 +2796,7 @@ static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t
                 needsparams = true;
         }
     }
-    return std::make_pair(uses_specsig(sig, needsparams, va, rettype, prefer_specsig), needsparams);
+    return std::make_pair(uses_specsig(abi, needsparams, rettype, prefer_specsig), needsparams);
 }
 
 
@@ -2483,18 +2810,17 @@ static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const cha
 {
     Value *pv = ConstantExpr::getIntToPtr(
         ConstantInt::get(ctx.types().T_size, (uintptr_t)ptr),
-        getInt64PtrTy(ctx.builder.getContext()));
-    Value *v = ctx.builder.CreateLoad(getInt64Ty(ctx.builder.getContext()), pv, true, name);
-    v = ctx.builder.CreateAdd(v, addend);
-    ctx.builder.CreateStore(v, pv, true); // volatile, not atomic, so this might be an underestimate,
-                                          // but it's faster this way
+        getPointerTy(ctx.builder.getContext()));
+    ctx.builder.CreateAtomicRMW(AtomicRMWInst::Add, pv,
+                                           addend, MaybeAlign(),
+                                           AtomicOrdering::Monotonic);
 }
 
 // Code coverage
 
 static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line)
 {
-    if (ctx.emission_context.imaging)
+    if (ctx.emission_context.imaging_mode)
         return; // TODO
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
@@ -2505,7 +2831,7 @@ static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line)
 
 static void mallocVisitLine(jl_codectx_t &ctx, StringRef filename, int line, Value *sync)
 {
-    if (ctx.emission_context.imaging)
+    if (ctx.emission_context.imaging_mode)
         return; // TODO
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
@@ -2517,30 +2843,16 @@ static void mallocVisitLine(jl_codectx_t &ctx, StringRef filename, int line, Val
 
 // --- constant determination ---
 
-static void show_source_loc(jl_codectx_t &ctx, JL_STREAM *out)
-{
-    jl_printf(out, "in %s at %s", ctx.name, ctx.file.str().c_str());
-}
-
-static void cg_bdw(jl_codectx_t &ctx, jl_sym_t *var, jl_binding_t *b)
-{
-    jl_binding_deprecation_warning(ctx.module, var, b);
-    if (b->deprecated == 1 && jl_options.depwarn) {
-        show_source_loc(ctx, JL_STDERR);
-        jl_printf(JL_STDERR, "\n");
-    }
-}
-
-static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args, size_t nargs)
+static jl_value_t *static_apply_type(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> args, size_t nargs)
 {
     assert(nargs > 1);
-    SmallVector<jl_value_t *> v(nargs);
+    SmallVector<jl_value_t *, 0> v(nargs);
     for (size_t i = 0; i < nargs; i++) {
         if (!args[i].constant)
             return NULL;
         v[i] = args[i].constant;
     }
-    assert(v[0] == jl_builtin_apply_type);
+    assert(v[0] == BUILTIN(apply_type));
     size_t last_age = jl_current_task->world_age;
     // call apply_type, but ignore errors. we know that will work in world 1.
     jl_current_task->world_age = 1;
@@ -2555,14 +2867,26 @@ static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args,
     return result;
 }
 
+static void emit_depwarn_check(jl_codectx_t &ctx, jl_binding_t *b)
+{
+    Value *bp = julia_binding_gv(ctx, b);
+    ctx.builder.CreateCall(prepare_call(jldepcheck_func), { bp });
+}
+
 // try to statically evaluate, NULL if not possible. note that this may allocate, and as
 // such the resulting value should not be embedded directly in the generated code.
 static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
 {
     if (jl_is_symbol(ex)) {
         jl_sym_t *sym = (jl_sym_t*)ex;
-        if (jl_is_const(ctx.module, sym))
-            return jl_get_global(ctx.module, sym);
+        jl_binding_t *bnd = jl_get_module_binding(ctx.module, sym, 1);
+        int possibly_deprecated = 0;
+        jl_value_t *cval = jl_get_binding_leaf_partitions_value_if_const(bnd, &possibly_deprecated, ctx.min_world, ctx.max_world);
+        if (cval) {
+            if (possibly_deprecated)
+                emit_depwarn_check(ctx, bnd);
+            return cval;
+        }
         return NULL;
     }
     if (jl_is_slotnumber(ex) || jl_is_argument(ex))
@@ -2570,8 +2894,8 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
     if (jl_is_ssavalue(ex)) {
         ssize_t idx = ((jl_ssavalue_t*)ex)->id - 1;
         assert(idx >= 0);
-        if (ctx.ssavalue_assigned.at(idx)) {
-            return ctx.SAvalues.at(idx).constant;
+        if (ctx.ssavalue_assigned[idx]) {
+            return ctx.SAvalues[idx].constant;
         }
         return NULL;
     }
@@ -2583,11 +2907,13 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
     jl_sym_t *s = NULL;
     if (jl_is_globalref(ex)) {
         s = jl_globalref_name(ex);
-        jl_binding_t *b = jl_get_binding(jl_globalref_mod(ex), s);
-        if (b && b->constp) {
-            if (b->deprecated)
-                cg_bdw(ctx, s, b);
-            return jl_atomic_load_relaxed(&b->value);
+        jl_binding_t *bnd = jl_get_module_binding(jl_globalref_mod(ex), s, 1);
+        int possibly_deprecated = 0;
+        jl_value_t *v = jl_get_binding_leaf_partitions_value_if_const(bnd, &possibly_deprecated, ctx.min_world, ctx.max_world);
+        if (v) {
+            if (possibly_deprecated)
+                emit_depwarn_check(ctx, bnd);
+            return v;
         }
         return NULL;
     }
@@ -2596,7 +2922,7 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
         if (e->head == jl_call_sym) {
             jl_value_t *f = static_eval(ctx, jl_exprarg(e, 0));
             if (f) {
-                if (jl_array_dim0(e->args) == 3 && (f == jl_builtin_getfield || f == jl_builtin_getglobal)) {
+                if (jl_array_dim0(e->args) == 3 && (f == BUILTIN(getfield) || f == BUILTIN(getglobal))) {
                     m = (jl_module_t*)static_eval(ctx, jl_exprarg(e, 1));
                     // Check the tag before evaluating `s` so that a value of random
                     // type won't be corrupted.
@@ -2605,18 +2931,21 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                     // Assumes that the module is rooted somewhere.
                     s = (jl_sym_t*)static_eval(ctx, jl_exprarg(e, 2));
                     if (s && jl_is_symbol(s)) {
-                        jl_binding_t *b = jl_get_binding(m, s);
-                        if (b && b->constp) {
-                            if (b->deprecated)
-                                cg_bdw(ctx, s, b);
-                            return jl_atomic_load_relaxed(&b->value);
+                        jl_binding_t *bnd = jl_get_module_binding(m, s, 1);
+                        int possibly_deprecated = 0;
+                        jl_value_t *v = jl_get_binding_leaf_partitions_value_if_const(bnd, &possibly_deprecated, ctx.min_world, ctx.max_world);
+                        if (v) {
+                            if (possibly_deprecated)
+                                emit_depwarn_check(ctx, bnd);
+                            return v;
                         }
                     }
                 }
-                else if (f==jl_builtin_tuple || f==jl_builtin_apply_type) {
+                else if (f==BUILTIN(tuple) || f==BUILTIN(apply_type)) {
                     size_t i;
                     size_t n = jl_array_dim0(e->args)-1;
-                    if (n==0 && f==jl_builtin_tuple) return (jl_value_t*)jl_emptytuple;
+                    if (n==0 && f==BUILTIN(tuple))
+                        return (jl_value_t*)jl_emptytuple;
                     jl_value_t **v;
                     JL_GC_PUSHARGS(v, n+1);
                     v[0] = f;
@@ -2669,7 +2998,6 @@ static bool slot_eq(jl_value_t *e, int sl)
 // --- find volatile variables ---
 
 // assigned in a try block and used outside that try block
-
 static bool local_var_occurs(jl_value_t *e, int sl)
 {
     if (slot_eq(e, sl)) {
@@ -2694,45 +3022,47 @@ static bool local_var_occurs(jl_value_t *e, int sl)
     return false;
 }
 
-static std::set<int> assigned_in_try(jl_array_t *stmts, int s, long l)
+static bool have_try_block(jl_array_t *stmts)
 {
-    std::set<int> av;
-    for(int i=s; i <= l; i++) {
-        jl_value_t *st = jl_array_ptr_ref(stmts,i);
-        if (jl_is_expr(st)) {
-            if (((jl_expr_t*)st)->head == jl_assign_sym) {
-                jl_value_t *ar = jl_exprarg(st, 0);
-                if (jl_is_slotnumber(ar)) {
-                    av.insert(jl_slot_number(ar)-1);
-                }
-            }
+    size_t slength = jl_array_dim0(stmts);
+    for (int i = 0; i < (int)slength; i++) {
+        jl_value_t *st = jl_array_ptr_ref(stmts, i);
+        if (jl_is_enternode(st)) {
+            int last = jl_enternode_catch_dest(st);
+            if (last == 0)
+                continue;
+            return 1;
         }
     }
-    return av;
+    return 0;
 }
 
-static void mark_volatile_vars(jl_array_t *stmts, std::vector<jl_varinfo_t> &slots)
+// conservative marking of all variables potentially used after a catch block that were assigned after the try
+static void mark_volatile_vars(jl_array_t *stmts, SmallVectorImpl<jl_varinfo_t> &slots, const std::set<int> &bbstarts)
 {
+    if (!have_try_block(stmts))
+        return;
     size_t slength = jl_array_dim0(stmts);
-    for (int i = 0; i < (int)slength; i++) {
-        jl_value_t *st = jl_array_ptr_ref(stmts, i);
-        if (jl_is_expr(st)) {
-            if (((jl_expr_t*)st)->head == jl_enter_sym) {
-                int last = jl_unbox_long(jl_exprarg(st, 0));
-                std::set<int> as = assigned_in_try(stmts, i + 1, last);
-                for (int j = 0; j < (int)slength; j++) {
-                    if (j < i || j > last) {
-                        std::set<int>::iterator it = as.begin();
-                        for (; it != as.end(); it++) {
-                            if (local_var_occurs(jl_array_ptr_ref(stmts, j), *it)) {
-                                jl_varinfo_t &vi = slots[*it];
-                                vi.isVolatile = true;
-                            }
-                        }
-                    }
+    BitVector assigned_in_block(slots.size()); // since we don't have domtree access, conservatively only ignore slots assigned in the same basic block
+    for (int j = 0; j < (int)slength; j++) {
+        if (bbstarts.count(j + 1))
+            assigned_in_block.reset();
+        jl_value_t *stmt = jl_array_ptr_ref(stmts, j);
+        if (jl_is_expr(stmt)) {
+            jl_expr_t *e = (jl_expr_t*)stmt;
+            if (e->head == jl_assign_sym) {
+                jl_value_t *l = jl_exprarg(e, 0);
+                if (jl_is_slotnumber(l)) {
+                    assigned_in_block.set(jl_slot_number(l)-1);
                 }
             }
         }
+        for (int slot = 0; slot < (int)slots.size(); slot++) {
+            if (!assigned_in_block.test(slot) && local_var_occurs(stmt, slot)) {
+                jl_varinfo_t &vi = slots[slot];
+                vi.isVolatile = true;
+            }
+        }
     }
 }
 
@@ -2785,7 +3115,7 @@ static void general_use_analysis(jl_codectx_t &ctx, jl_value_t *expr, callback &
     }
     else if (jl_is_phicnode(expr)) {
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 0);
-        size_t i, elen = jl_array_len(values);
+        size_t i, elen = jl_array_nrows(values);
         for (i = 0; i < elen; i++) {
             jl_value_t *v = jl_array_ptr_ref(values, i);
             general_use_analysis(ctx, v, f);
@@ -2793,7 +3123,7 @@ static void general_use_analysis(jl_codectx_t &ctx, jl_value_t *expr, callback &
     }
     else if (jl_is_phinode(expr)) {
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 1);
-        size_t i, elen = jl_array_len(values);
+        size_t i, elen = jl_array_nrows(values);
         for (i = 0; i < elen; i++) {
             jl_value_t *v = jl_array_ptr_ref(values, i);
             if (v)
@@ -2819,77 +3149,146 @@ static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr)
 
 // ---- Get Element Pointer (GEP) instructions within the GC frame ----
 
-static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val)
-{
-    if (jl_is_globally_rooted(val))
-        return val;
-    jl_method_t *m = ctx.linfo->def.method;
-    if (jl_is_method(m)) {
-        // the method might have a root for this already; use it if so
-        JL_LOCK(&m->writelock);
-        if (m->roots) {
-            size_t i, len = jl_array_dim0(m->roots);
-            for (i = 0; i < len; i++) {
-                jl_value_t *mval = jl_array_ptr_ref(m->roots, i);
-                if (mval == val || jl_egal(mval, val)) {
-                    JL_UNLOCK(&m->writelock);
-                    return mval;
-                }
-            }
-        }
-        JL_UNLOCK(&m->writelock);
+static void jl_temporary_root(jl_codegen_params_t &ctx, jl_value_t *val)
+{
+    if (!jl_is_globally_rooted(val)) {
+        jl_array_t *roots = ctx.temporary_roots;
+        if (ctx.temporary_roots_set.find(val) != ctx.temporary_roots_set.end())
+            return;
+        jl_array_ptr_1d_push(roots, val);
+        ctx.temporary_roots_set.insert(val);
     }
-    return jl_as_global_root(val);
+}
+static void jl_temporary_root(jl_codectx_t &ctx, jl_value_t *val)
+{
+    jl_temporary_root(ctx.emission_context, val);
 }
 
 // --- generating function calls ---
 
-static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *name, AtomicOrdering order)
+static jl_cgval_t emit_globalref_runtime(jl_codectx_t &ctx, jl_binding_t *bnd, jl_module_t *mod, jl_sym_t *name)
 {
-    jl_binding_t *bnd = NULL;
-    Value *bp = global_binding_pointer(ctx, mod, name, &bnd, false);
-    if (bp == NULL)
-        return jl_cgval_t();
-    bp = julia_binding_pvalue(ctx, bp);
-    if (bnd) {
-        jl_value_t *v = jl_atomic_load_acquire(&bnd->value); // acquire value for ty
-        if (v != NULL) {
-            if (bnd->constp)
-                return mark_julia_const(ctx, v);
-            LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
-            setName(ctx.emission_context, v, jl_symbol_name(name));
-            v->setOrdering(order);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
-            ai.decorateInst(v);
-            jl_value_t *ty = jl_atomic_load_relaxed(&bnd->ty);
-            return mark_julia_type(ctx, v, true, ty);
-        }
-    }
-    // todo: use type info to avoid undef check
-    return emit_checked_var(ctx, bp, name, false, ctx.tbaa().tbaa_binding);
+    Value *bp = julia_binding_gv(ctx, bnd);
+    Value *v = ctx.builder.CreateCall(prepare_call(jlgetbindingvalue_func), { bp });
+    undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name, (jl_value_t*)mod);
+    return mark_julia_type(ctx, v, true, jl_any_type);
 }
 
-static bool emit_globalset(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *sym, const jl_cgval_t &rval_info, AtomicOrdering Order)
+static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *name, AtomicOrdering order)
 {
-    jl_binding_t *bnd = NULL;
-    Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
-    if (bp == NULL)
-        return false;
-    Value *rval = boxed(ctx, rval_info);
-    if (bnd && !bnd->constp) {
-        jl_value_t *ty = jl_atomic_load_relaxed(&bnd->ty);
-        if (ty && jl_subtype(rval_info.typ, ty)) { // TODO: use typeassert here instead
-            StoreInst *v = ctx.builder.CreateAlignedStore(rval, julia_binding_pvalue(ctx, bp), Align(sizeof(void*)));
-            v->setOrdering(Order);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
-            ai.decorateInst(v);
-            emit_write_barrier(ctx, bp, rval);
-            return true;
-        }
+    jl_binding_t *bnd = jl_get_module_binding(mod, name, 1);
+    struct restriction_kind_pair rkp = { NULL, NULL, PARTITION_KIND_GUARD, 0 };
+    if (!jl_get_binding_leaf_partitions_restriction_kind(bnd, &rkp, ctx.min_world, ctx.max_world)) {
+        return emit_globalref_runtime(ctx, bnd, mod, name);
     }
-    ctx.builder.CreateCall(prepare_call(jlcheckassign_func),
-            { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), mark_callee_rooted(ctx, rval) });
-    return true;
+    if (jl_bkind_is_real_constant(rkp.kind) || rkp.kind == PARTITION_KIND_UNDEF_CONST) {
+        if (rkp.maybe_depwarn) {
+            Value *bp = julia_binding_gv(ctx, bnd);
+            ctx.builder.CreateCall(prepare_call(jldepcheck_func), { bp });
+        }
+        jl_value_t *constval = rkp.restriction;
+        if (!constval) {
+            undef_var_error_ifnot(ctx, ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), name, (jl_value_t*)mod);
+            return jl_cgval_t();
+        }
+        return mark_julia_const(ctx, constval);
+    }
+    if (rkp.kind != PARTITION_KIND_GLOBAL) {
+        return emit_globalref_runtime(ctx, bnd, mod, name);
+    }
+    Value *bp = julia_binding_gv(ctx, bnd);
+    if (rkp.maybe_depwarn) {
+        ctx.builder.CreateCall(prepare_call(jldepcheck_func), { bp });
+    }
+    if (bnd != rkp.binding_if_global)
+        bp = julia_binding_gv(ctx, rkp.binding_if_global);
+    jl_value_t *ty = rkp.restriction;
+    Value *bpval = julia_binding_pvalue(ctx, bp);
+    if (ty == nullptr)
+        ty = (jl_value_t*)jl_any_type;
+    return update_julia_type(ctx, emit_checked_var(ctx, bpval, name, (jl_value_t*)mod, false, ctx.tbaa().tbaa_binding), ty);
+}
+
+static jl_cgval_t emit_globalop(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *sym, jl_cgval_t rval, const jl_cgval_t &cmp,
+                                AtomicOrdering Order, AtomicOrdering FailOrder,
+                                bool issetglobal, bool isreplaceglobal, bool isswapglobal, bool ismodifyglobal, bool issetglobalonce,
+                                const jl_cgval_t *modifyop, bool alloc)
+{
+    jl_binding_t *bnd = jl_get_module_binding(mod, sym, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition_all(bnd, ctx.min_world, ctx.max_world);
+    Value *bp = julia_binding_gv(ctx, bnd);
+    if (bpart) {
+        if (jl_binding_kind(bpart) == PARTITION_KIND_GLOBAL) {
+            int possibly_deprecated = bpart->kind & PARTITION_FLAG_DEPWARN;
+            jl_value_t *ty = bpart->restriction;
+            if (ty != nullptr) {
+                const std::string fname = issetglobal ? "setglobal!" : isreplaceglobal ? "replaceglobal!" : isswapglobal ? "swapglobal!" : ismodifyglobal ? "modifyglobal!" : "setglobalonce!";
+                if (!ismodifyglobal) {
+                    emit_typecheck(ctx, rval, ty, fname.c_str());
+                    rval = update_julia_type(ctx, rval, ty);
+                    if (rval.typ == jl_bottom_type)
+                        return jl_cgval_t();
+                }
+                bool isboxed = true;
+                bool maybe_null = jl_atomic_load_relaxed(&bnd->value) == NULL;
+                if (possibly_deprecated) {
+                    ctx.builder.CreateCall(prepare_call(jldepcheck_func), { bp });
+                }
+                return typed_store(ctx,
+                                julia_binding_pvalue(ctx, bp),
+                                rval, cmp, ty,
+                                ctx.tbaa().tbaa_binding,
+                                nullptr,
+                                bp,
+                                isboxed,
+                                Order,
+                                FailOrder,
+                                0,
+                                nullptr,
+                                issetglobal,
+                                isreplaceglobal,
+                                isswapglobal,
+                                ismodifyglobal,
+                                issetglobalonce,
+                                maybe_null,
+                                modifyop,
+                                fname,
+                                mod,
+                                sym);
+
+            }
+        }
+    }
+    Value *m = literal_pointer_val(ctx, (jl_value_t*)mod);
+    Value *s = literal_pointer_val(ctx, (jl_value_t*)sym);
+    ctx.builder.CreateCall(prepare_call(jlcheckbpwritable_func),
+        { bp, m, s });
+    if (issetglobal) {
+        ctx.builder.CreateCall(prepare_call(jlcheckassign_func),
+                { bp, m, s, mark_callee_rooted(ctx, boxed(ctx, rval)) });
+        return rval;
+    }
+    else if (isreplaceglobal) {
+        Value *r = ctx.builder.CreateCall(prepare_call(jlcheckreplace_func),
+                { bp, m, s, boxed(ctx, cmp), boxed(ctx, rval) });
+        return mark_julia_type(ctx, r, true, jl_any_type);
+    }
+    else if (isswapglobal) {
+        Value *r = ctx.builder.CreateCall(prepare_call(jlcheckswap_func),
+                { bp, m, s, mark_callee_rooted(ctx, boxed(ctx, rval)) });
+        return mark_julia_type(ctx, r, true, jl_any_type);
+    }
+    else if (ismodifyglobal) {
+        Value *r = ctx.builder.CreateCall(prepare_call(jlcheckmodify_func),
+                { bp, m, s, boxed(ctx, cmp), boxed(ctx, rval) });
+        return mark_julia_type(ctx, r, true, jl_any_type);
+    }
+    else if (issetglobalonce) {
+        Value *r = ctx.builder.CreateCall(prepare_call(jlcheckassignonce_func),
+                { bp, m, s, mark_callee_rooted(ctx, boxed(ctx, rval)) });
+        return mark_julia_type(ctx, r, true, jl_bool_type);
+    }
+    abort(); // unreachable
 }
 
 static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
@@ -2958,7 +3357,11 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
         counter);
     assert(allunboxed); (void)allunboxed;
     ctx.builder.SetInsertPoint(defaultBB);
+#if JL_LLVM_VERSION >= 200000
+    Function *trap_func = Intrinsic::getOrInsertDeclaration(
+#else
     Function *trap_func = Intrinsic::getDeclaration(
+#endif
         ctx.f->getParent(),
         Intrinsic::trap);
     ctx.builder.CreateCall(trap_func);
@@ -2971,25 +3374,26 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
 static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2)
 {
     ++EmittedBitsCompares;
+    jl_value_t *argty = (arg1.constant ? jl_typeof(arg1.constant) : arg1.typ);
     bool isboxed;
     Type *at = julia_type_to_llvm(ctx, arg1.typ, &isboxed);
-    assert(jl_is_datatype(arg1.typ) && arg1.typ == arg2.typ && !isboxed);
+    assert(jl_is_datatype(arg1.typ) && arg1.typ == (arg2.constant ? jl_typeof(arg2.constant) : arg2.typ) && !isboxed);
 
     if (type_is_ghost(at))
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
 
     if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
         Type *at_int = INTT(at, ctx.emission_context.DL);
-        Value *varg1 = emit_unbox(ctx, at_int, arg1, arg1.typ);
-        Value *varg2 = emit_unbox(ctx, at_int, arg2, arg2.typ);
+        Value *varg1 = emit_unbox(ctx, at_int, arg1, argty);
+        Value *varg2 = emit_unbox(ctx, at_int, arg2, argty);
         return ctx.builder.CreateICmpEQ(varg1, varg2);
     }
 
     if (at->isVectorTy()) {
-        jl_svec_t *types = ((jl_datatype_t*)arg1.typ)->types;
+        jl_svec_t *types = ((jl_datatype_t*)argty)->types;
         Value *answer = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
-        Value *varg1 = emit_unbox(ctx, at, arg1, arg1.typ);
-        Value *varg2 = emit_unbox(ctx, at, arg2, arg2.typ);
+        Value *varg1 = emit_unbox(ctx, at, arg1, argty);
+        Value *varg2 = emit_unbox(ctx, at, arg2, argty);
         for (size_t i = 0, l = jl_svec_len(types); i < l; i++) {
             jl_value_t *fldty = jl_svecref(types, i);
             Value *subAns, *fld1, *fld2;
@@ -3004,27 +3408,27 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
     }
 
     if (at->isAggregateType()) { // Struct or Array
-        jl_datatype_t *sty = (jl_datatype_t*)arg1.typ;
+        jl_datatype_t *sty = (jl_datatype_t*)argty;
         size_t sz = jl_datatype_size(sty);
-        if (sz > 512 && !sty->layout->haspadding) {
-            Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) :
+        if (sz > 512 && !sty->layout->flags.haspadding && sty->layout->flags.isbitsegal) {
+            Value *varg1 = arg1.inline_roots.empty() && arg1.ispointer() ? data_pointer(ctx, arg1) :
                 value_to_pointer(ctx, arg1).V;
-            Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) :
+            Value *varg2 = arg2.inline_roots.empty() && arg2.ispointer() ? data_pointer(ctx, arg2) :
                 value_to_pointer(ctx, arg2).V;
             varg1 = emit_pointer_from_objref(ctx, varg1);
             varg2 = emit_pointer_from_objref(ctx, varg2);
-            Value *gc_uses[2];
-            int nroots = 0;
-            if ((gc_uses[nroots] = get_gc_root_for(arg1)))
-                nroots++;
-            if ((gc_uses[nroots] = get_gc_root_for(arg2)))
-                nroots++;
-            OperandBundleDef OpBundle("jl_roots", makeArrayRef(gc_uses, nroots));
+            SmallVector<Value*, 0> gc_uses;
+            // these roots may seem a bit overkill, but we want to make sure
+            // that a!=b implies (a,)!=(b,) even if a and b are unused and
+            // therefore could be freed and then the memory for a reused for b
+            gc_uses.append(get_gc_roots_for(ctx, arg1));
+            gc_uses.append(get_gc_roots_for(ctx, arg2));
+            OperandBundleDef OpBundle("jl_roots", gc_uses);
             auto answer = ctx.builder.CreateCall(prepare_call(memcmp_func), {
-                        ctx.builder.CreateBitCast(varg1, getInt8PtrTy(ctx.builder.getContext())),
-                        ctx.builder.CreateBitCast(varg2, getInt8PtrTy(ctx.builder.getContext())),
+                        varg1,
+                        varg2,
                         ConstantInt::get(ctx.types().T_size, sz) },
-                    ArrayRef<OperandBundleDef>(&OpBundle, nroots ? 1 : 0));
+                    ArrayRef<OperandBundleDef>(&OpBundle, gc_uses.empty() ? 0 : 1));
 
             if (arg1.tbaa || arg2.tbaa) {
                 jl_aliasinfo_t ai;
@@ -3086,8 +3490,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     if (arg1.constant && arg2.constant)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), jl_egal(arg1.constant, arg2.constant));
 
-    jl_value_t *rt1 = arg1.typ;
-    jl_value_t *rt2 = arg2.typ;
+    jl_value_t *rt1 = (arg1.constant ? jl_typeof(arg1.constant) : arg1.typ);
+    jl_value_t *rt2 = (arg2.constant ? jl_typeof(arg2.constant) : arg2.typ);
     if (jl_is_concrete_type(rt1) && jl_is_concrete_type(rt2) && !jl_is_kind(rt1) && !jl_is_kind(rt2) && rt1 != rt2) {
         // disjoint concrete leaf types are never equal (quick test)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
@@ -3109,8 +3513,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
             // not TIndex && not boxed implies it is an unboxed value of a different type from this singleton
             // (which was probably caught above, but just to be safe, we repeat it here explicitly)
             return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
-        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
-        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, ctx.types().T_pjlvalue);
+        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.Vboxed;
+        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.Vboxed;
         // rooting these values isn't needed since we won't load this pointer
         // and we know at least one of them is a unique Singleton
         // which is already enough to ensure pointer uniqueness for this test
@@ -3122,16 +3526,18 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     if (jl_type_intersection(rt1, rt2) == (jl_value_t*)jl_bottom_type) // types are disjoint (exhaustive test)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
 
-    bool justbits1 = jl_is_concrete_immutable(rt1);
-    bool justbits2 = jl_is_concrete_immutable(rt2);
+    // can compare any concrete immutable by bits, except for UnionAll
+    // which has a special non-bits based egal
+    bool justbits1 = jl_is_concrete_immutable(rt1) && !jl_is_kind(rt1);
+    bool justbits2 = jl_is_concrete_immutable(rt2) && !jl_is_kind(rt2);
     if (justbits1 || justbits2) { // whether this type is unique'd by value
         return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] () -> Value* {
             jl_datatype_t *typ = (jl_datatype_t*)(justbits1 ? rt1 : rt2);
             if (typ == jl_bool_type) { // aka jl_pointer_egal
                 // some optimizations for bool, since pointer comparison may be better
                 if ((arg1.isboxed || arg1.constant) && (arg2.isboxed || arg2.constant)) { // aka have-fast-pointer
-                    Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
-                    Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, ctx.types().T_pjlvalue);
+                    Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.Vboxed;
+                    Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.Vboxed;
                     return ctx.builder.CreateICmpEQ(decay_derived(ctx, varg1), decay_derived(ctx, varg2));
                 }
             }
@@ -3167,37 +3573,70 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
 }
 
 static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
-                            const jl_cgval_t *argv, size_t nargs, const jl_cgval_t *modifyop)
+                            ArrayRef<jl_cgval_t> argv, size_t nargs, const jl_cgval_t *modifyop)
 {
+    bool issetglobal = f == BUILTIN(setglobal);
+    bool isreplaceglobal = f == BUILTIN(replaceglobal);
+    bool isswapglobal = f == BUILTIN(swapglobal);
+    bool ismodifyglobal = f == BUILTIN(modifyglobal);
+    bool issetglobalonce = f == BUILTIN(setglobalonce);
+    const jl_cgval_t undefval;
     const jl_cgval_t &mod = argv[1];
     const jl_cgval_t &sym = argv[2];
-    const jl_cgval_t &val = argv[3];
-    enum jl_memory_order order = jl_memory_order_unspecified;
-    assert(f == jl_builtin_setglobal && modifyop == nullptr && "unimplemented");
-
-    if (nargs == 4) {
-        const jl_cgval_t &arg4 = argv[4];
-        if (arg4.constant && jl_is_symbol(arg4.constant))
-            order = jl_get_atomic_order((jl_sym_t*)arg4.constant, false, true);
-        else
+    jl_cgval_t val = argv[isreplaceglobal || ismodifyglobal ? 4 : 3];
+    const jl_cgval_t &cmp = isreplaceglobal || ismodifyglobal ? argv[3] : undefval;
+    enum jl_memory_order order = jl_memory_order_release;
+    const std::string fname = issetglobal ? "setglobal!" : isreplaceglobal ? "replaceglobal!" : isswapglobal ? "swapglobal!" : ismodifyglobal ? "modifyglobal!" : "setglobalonce!";
+    if (nargs >= (isreplaceglobal || ismodifyglobal ? 5 : 4)) {
+        const jl_cgval_t &ord = argv[isreplaceglobal || ismodifyglobal ? 5 : 4];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
             return false;
+        order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetglobal, true);
+    }
+    enum jl_memory_order fail_order = order;
+    if ((isreplaceglobal || issetglobalonce) && nargs == (isreplaceglobal ? 6 : 5)) {
+        const jl_cgval_t &ord = argv[isreplaceglobal ? 6 : 5];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    }
+    if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        *ret = jl_cgval_t(); // unreachable
+        return true;
     }
-    else
-        order = jl_memory_order_release;
 
-    if (order == jl_memory_order_invalid || order == jl_memory_order_notatomic) {
-        emit_atomic_error(ctx, order == jl_memory_order_invalid ? "invalid atomic ordering" : "setglobal!: module binding cannot be written non-atomically");
+    if (order == jl_memory_order_notatomic) {
+        emit_atomic_error(ctx,
+                issetglobal ? "setglobal!: module binding cannot be written non-atomically" :
+                isreplaceglobal ? "replaceglobal!: module binding cannot be written non-atomically" :
+                isswapglobal ? "swapglobal!: module binding cannot be written non-atomically" :
+                ismodifyglobal ? "modifyglobal!: module binding cannot be written non-atomically" :
+                "setglobalonce!: module binding cannot be written non-atomically");
+        *ret = jl_cgval_t(); // unreachable
+        return true;
+    }
+    else if (fail_order == jl_memory_order_notatomic) {
+        emit_atomic_error(ctx,
+                isreplaceglobal ? "replaceglobal!: module binding cannot be accessed non-atomically" :
+                "setglobalonce!: module binding cannot be accessed non-atomically");
         *ret = jl_cgval_t(); // unreachable
         return true;
     }
 
     if (sym.constant && jl_is_symbol(sym.constant)) {
-        jl_sym_t *name = (jl_sym_t*)sym.constant;
         if (mod.constant && jl_is_module(mod.constant)) {
-            if (emit_globalset(ctx, (jl_module_t*)mod.constant, name, val, get_llvm_atomic_order(order)))
-                *ret = val;
-            else
-                *ret = jl_cgval_t(); // unreachable
+            *ret = emit_globalop(ctx, (jl_module_t*)mod.constant, (jl_sym_t*)sym.constant, val, cmp,
+                                 get_llvm_atomic_order(order), get_llvm_atomic_order(fail_order),
+                                 issetglobal,
+                                 isreplaceglobal,
+                                 isswapglobal,
+                                 ismodifyglobal,
+                                 issetglobalonce,
+                                 modifyop,
+                                 false);
             return true;
         }
     }
@@ -3206,20 +3645,21 @@ static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 }
 
 static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
-                           const jl_cgval_t *argv, size_t nargs, const jl_cgval_t *modifyop)
+                           ArrayRef<jl_cgval_t> argv, size_t nargs, const jl_cgval_t *modifyop)
 {
     ++EmittedOpfields;
-    bool issetfield = f == jl_builtin_setfield;
-    bool isreplacefield = f == jl_builtin_replacefield;
-    bool isswapfield = f == jl_builtin_swapfield;
-    bool ismodifyfield = f == jl_builtin_modifyfield;
+    bool issetfield = f == BUILTIN(setfield);
+    bool isreplacefield = f == BUILTIN(replacefield);
+    bool isswapfield = f == BUILTIN(swapfield);
+    bool ismodifyfield = f == BUILTIN(modifyfield);
+    bool issetfieldonce = f == BUILTIN(setfieldonce);
     const jl_cgval_t undefval;
     const jl_cgval_t &obj = argv[1];
     const jl_cgval_t &fld = argv[2];
     jl_cgval_t val = argv[isreplacefield || ismodifyfield ? 4 : 3];
     const jl_cgval_t &cmp = isreplacefield || ismodifyfield ? argv[3] : undefval;
     enum jl_memory_order order = jl_memory_order_notatomic;
-    const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : "modifyfield!";
+    const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : ismodifyfield ? "modifyfield!" : "setfieldonce!";
     if (nargs >= (isreplacefield || ismodifyfield ? 5 : 4)) {
         const jl_cgval_t &ord = argv[isreplacefield || ismodifyfield ? 5 : 4];
         emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
@@ -3228,8 +3668,8 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
     }
     enum jl_memory_order fail_order = order;
-    if (isreplacefield && nargs == 6) {
-        const jl_cgval_t &ord = argv[6];
+    if ((isreplacefield || issetfieldonce) && nargs == (isreplacefield ? 6 : 5)) {
+        const jl_cgval_t &ord = argv[isreplacefield ? 6 : 5];
         emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
         if (!ord.constant)
             return false;
@@ -3277,13 +3717,19 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                             isswapfield ?
                             (isatomic ? "swapfield!: atomic field cannot be written non-atomically"
                                       : "swapfield!: non-atomic field cannot be written atomically") :
+                            ismodifyfield ?
                             (isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
-                                      : "modifyfield!: non-atomic field cannot be written atomically"));
+                                      : "modifyfield!: non-atomic field cannot be written atomically") :
+                            (isatomic ? "setfieldonce!: atomic field cannot be written non-atomically"
+                                      : "setfieldonce!: non-atomic field cannot be written atomically"));
                 }
                 else if (isatomic == (fail_order == jl_memory_order_notatomic)) {
                     emit_atomic_error(ctx,
+                            isreplacefield ?
                             (isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
-                                      : "replacefield!: non-atomic field cannot be accessed atomically"));
+                                      : "replacefield!: non-atomic field cannot be accessed atomically") :
+                            (isatomic ? "setfieldonce!: atomic field cannot be accessed non-atomically"
+                                      : "setfieldonce!: non-atomic field cannot be accessed atomically"));
                 }
                 else if (!uty->name->mutabl) {
                     std::string msg = fname + ": immutable struct of type "
@@ -3293,13 +3739,14 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 }
                 else if (jl_field_isconst(uty, idx)) {
                     std::string msg = fname + ": const field ."
-                        + std::string(jl_symbol_name((jl_sym_t*)jl_svec_ref(jl_field_names(uty), idx)))
+                        + std::string(jl_symbol_name((jl_sym_t*)jl_svecref(jl_field_names(uty), idx)))
                         + " of type "
                         + std::string(jl_symbol_name(uty->name->name))
                         + " cannot be changed";
                     emit_error(ctx, msg);
                 }
                 else {
+                    assert(obj.isboxed);
                     *ret = emit_setfield(ctx, uty, obj, idx, val, cmp, true,
                             (needlock || order <= jl_memory_order_notatomic)
                                 ? AtomicOrdering::NotAtomic
@@ -3307,7 +3754,8 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                             (needlock || fail_order <= jl_memory_order_notatomic)
                                 ? AtomicOrdering::NotAtomic
                                 : get_llvm_atomic_order(fail_order),
-                            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield,
+                            needlock ? boxed(ctx, obj) : nullptr,
+                            issetfield, isreplacefield, isswapfield, ismodifyfield, issetfieldonce,
                             modifyop, fname);
                 }
                 return true;
@@ -3317,30 +3765,239 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     return false;
 }
 
+static jl_cgval_t emit_isdefinedglobal(jl_codectx_t &ctx, jl_module_t *modu, jl_sym_t *name, int allow_import, enum jl_memory_order order)
+{
+    jl_binding_t *bnd = allow_import ? jl_get_binding(modu, name) : jl_get_module_binding(modu, name, 0);
+    struct restriction_kind_pair rkp = { NULL, NULL, PARTITION_KIND_GUARD, 0 };
+    if (allow_import && jl_get_binding_leaf_partitions_restriction_kind(bnd, &rkp, ctx.min_world, ctx.max_world)) {
+        if (jl_bkind_is_real_constant(rkp.kind))
+            return mark_julia_const(ctx, jl_true);
+        if (rkp.kind == PARTITION_KIND_GLOBAL) {
+            Value *bp = julia_binding_gv(ctx, rkp.binding_if_global);
+            bp = julia_binding_pvalue(ctx, bp);
+            LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
+            ai.decorateInst(v);
+            v->setOrdering(get_llvm_atomic_order(order));
+            Value *isnull = ctx.builder.CreateICmpNE(v, Constant::getNullValue(ctx.types().T_prjlvalue));
+            return mark_julia_type(ctx, isnull, false, jl_bool_type);
+        }
+    }
+    Value *isdef = ctx.builder.CreateCall(prepare_call(jlboundp_func), {
+            literal_pointer_val(ctx, (jl_value_t*)modu),
+            literal_pointer_val(ctx, (jl_value_t*)name),
+            ConstantInt::get(getInt32Ty(ctx.builder.getContext()), allow_import)
+        });
+    isdef = ctx.builder.CreateTrunc(isdef, getInt1Ty(ctx.builder.getContext()));
+    return mark_julia_type(ctx, isdef, false, jl_bool_type);
+}
+
+static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
+                            ArrayRef<jl_cgval_t> argv, size_t nargs, const jl_cgval_t *modifyop)
+{
+    bool issetmemory = f == BUILTIN(memoryrefset);
+    bool isreplacememory = f == BUILTIN(memoryrefreplace);
+    bool isswapmemory = f == BUILTIN(memoryrefswap);
+    bool ismodifymemory = f == BUILTIN(memoryrefmodify);
+    bool issetmemoryonce = f == BUILTIN(memoryrefsetonce);
+
+    const jl_cgval_t undefval;
+    const jl_cgval_t &ref = argv[1];
+    jl_cgval_t val = argv[isreplacememory || ismodifymemory ? 3 : 2];
+    jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+    if (!jl_is_genericmemoryref_type(mty_dt) || !jl_is_concrete_type(mty_dt))
+        return false;
+
+    jl_value_t *kind = jl_tparam0(mty_dt);
+    jl_value_t *ety = jl_tparam1(mty_dt);
+    jl_value_t *addrspace = jl_tparam2(mty_dt); (void)addrspace; // TODO
+    mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+    if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+        return false;
+
+    const jl_cgval_t &cmp = isreplacememory || ismodifymemory ? argv[2] : undefval;
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    const std::string fname = issetmemory ? "memoryrefset!" : isreplacememory ? "memoryrefreplace!" : isswapmemory ? "memoryrefswap!" : ismodifymemory ? "memoryrefmodify!" : "memoryrefsetonce!";
+    {
+        const jl_cgval_t &ord = argv[isreplacememory || ismodifymemory ? 4 : 3];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetmemory, true);
+    }
+    enum jl_memory_order fail_order = order;
+    if (isreplacememory || issetmemoryonce) {
+        const jl_cgval_t &ord = argv[isreplacememory ? 5 : 4];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    }
+    if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        *ret = jl_cgval_t(); // unreachable
+        return true;
+    }
+
+    jl_value_t *boundscheck = argv[nargs].constant;
+    emit_typecheck(ctx, argv[nargs], (jl_value_t*)jl_bool_type, fname);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked;
+    bool needlock = layout->flags.arrayelem_islocked;
+    size_t elsz = layout->size;
+    size_t al = layout->alignment;
+    if (al > JL_HEAP_ALIGNMENT)
+        al = JL_HEAP_ALIGNMENT;
+    if (isatomic == (order == jl_memory_order_notatomic)) {
+        emit_atomic_error(ctx,
+                issetmemory ?
+                (isatomic ? "memoryrefset!: atomic memory cannot be written non-atomically"
+                          : "memoryrefset!: non-atomic memory cannot be written atomically") :
+                isreplacememory ?
+                (isatomic ? "memoryrefreplace!: atomic memory cannot be written non-atomically"
+                          : "memoryrefreplace!: non-atomic memory cannot be written atomically") :
+                isswapmemory ?
+                (isatomic ? "memoryrefswap!: atomic memory cannot be written non-atomically"
+                          : "memoryrefswap!: non-atomic memory cannot be written atomically") :
+                ismodifymemory ?
+                (isatomic ? "memoryrefmodify!: atomic memory cannot be written non-atomically"
+                          : "memoryrefmodify!: non-atomic memory cannot be written atomically") :
+                (isatomic ? "memoryrefsetonce!: atomic memory cannot be written non-atomically"
+                          : "memoryrefsetonce!: non-atomic memory cannot be written atomically"));
+        *ret = jl_cgval_t();
+        return true;
+    }
+    else if (isatomic == (fail_order == jl_memory_order_notatomic)) {
+        emit_atomic_error(ctx,
+                isreplacememory ?
+                (isatomic ? "memoryrefreplace!: atomic memory cannot be accessed non-atomically"
+                          : "memoryrefreplace!: non-atomic memory cannot be accessed atomically") :
+                (isatomic ? "memoryrefsetonce!: atomic memory cannot be accessed non-atomically"
+                          : "memoryrefsetonce!: non-atomic memory cannot be accessed atomically"));
+        *ret = jl_cgval_t();
+        return true;
+    }
+    Value *mem = emit_memoryref_mem(ctx, ref, layout);
+    Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+    if (bounds_check_enabled(ctx, boundscheck)) {
+        BasicBlock *failBB, *endBB;
+        failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+        endBB = BasicBlock::Create(ctx.builder.getContext(), "load");
+        ctx.builder.CreateCondBr(ctx.builder.CreateIsNull(mlen), failBB, endBB);
+        failBB->insertInto(ctx.f);
+        ctx.builder.SetInsertPoint(failBB);
+        ctx.builder.CreateCall(prepare_call(jlboundserror_func), { mark_callee_rooted(ctx, mem), ConstantInt::get(ctx.types().T_size, 1) });
+        ctx.builder.CreateUnreachable();
+        endBB->insertInto(ctx.f);
+        ctx.builder.SetInsertPoint(endBB);
+    }
+    if (!ismodifymemory) {
+        emit_typecheck(ctx, val, ety, fname);
+        val = update_julia_type(ctx, val, ety);
+        if (val.typ == jl_bottom_type)
+            return true;
+    }
+    AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
+                            ? AtomicOrdering::NotAtomic
+                            : get_llvm_atomic_order(order);
+    AtomicOrdering FailOrder = (needlock || fail_order <= jl_memory_order_notatomic)
+                        ? AtomicOrdering::NotAtomic
+                        : get_llvm_atomic_order(fail_order);
+    if (isunion) {
+        assert(!isatomic && !needlock);
+        Value *V = emit_memoryref_FCA(ctx, ref, layout);
+        Value *idx0 = CreateSimplifiedExtractValue(ctx, V, 0);
+        Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+        Value *data = emit_genericmemoryptr(ctx, mem, layout, AddressSpace::Loaded);
+        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
+        // compute tindex from val
+        Value *ptindex;
+        if (elsz == 0) {
+            ptindex = data;
+        }
+        else {
+            // isbits union selector bytes are stored after mem->length
+            ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
+            data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
+        }
+        ptindex = emit_ptrgep(ctx, ptindex, idx0);
+        *ret = union_store(ctx, data, ptindex, val, cmp, ety,
+            ctx.tbaa().tbaa_arraybuf, ctx.tbaa().tbaa_arrayselbyte,
+            Order, FailOrder,
+            nullptr, issetmemory, isreplacememory, isswapmemory, ismodifymemory, issetmemoryonce,
+            modifyop, fname);
+    }
+    else {
+        Value *ptr = (layout->size == 0 ? nullptr : emit_memoryref_ptr(ctx, ref, layout));
+        Value *lock = nullptr;
+        bool maybenull = true;
+        if (needlock) {
+            assert(ptr);
+            lock = ptr;
+            // ptr += sizeof(lock);
+            ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+        }
+        Value *data_owner = NULL; // owner object against which the write barrier must check
+        if (isboxed || layout->first_ptr >= 0) { // if elements are just bits, don't need a write barrier
+            data_owner = emit_memoryref_mem(ctx, ref, layout);
+        }
+        *ret = typed_store(ctx,
+                    ptr,
+                    val, cmp, ety,
+                    isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
+                    ctx.noalias().aliasscope.current,
+                    data_owner,
+                    isboxed,
+                    Order,
+                    FailOrder,
+                    al,
+                    lock,
+                    issetmemory,
+                    isreplacememory,
+                    isswapmemory,
+                    ismodifymemory,
+                    issetmemoryonce,
+                    maybenull,
+                    modifyop,
+                    fname,
+                    nullptr,
+                    nullptr);
+    }
+    return true;
+}
+
 static jl_llvm_functions_t
     emit_function(
         orc::ThreadSafeModule &TSM,
         jl_method_instance_t *lam,
         jl_code_info_t *src,
+        jl_value_t *abi,
         jl_value_t *jlrettype,
         jl_codegen_params_t &params);
 
-static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *type, jl_cgval_t name);
+static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_datatype_t *type, jl_cgval_t name);
 
 static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
-                              const jl_cgval_t *argv, size_t nargs, jl_value_t *rt,
+                              ArrayRef<jl_cgval_t> argv, size_t nargs, jl_value_t *rt,
                               jl_expr_t *ex, bool is_promotable)
 // returns true if the call has been handled
 {
     ++EmittedBuiltinCalls;
-    if (f == jl_builtin_is && nargs == 2) {
+    if (f == BUILTIN(is) && nargs == 2) {
         // emit comparison test
         Value *ans = emit_f_is(ctx, argv[1], argv[2]);
-        *ret = mark_julia_type(ctx, ctx.builder.CreateZExt(ans, getInt8Ty(ctx.builder.getContext())), false, jl_bool_type);
+        *ret = mark_julia_type(ctx, ans, false, jl_bool_type);
+        return true;
+    }
+
+    else if (f == BUILTIN(ifelse) && nargs == 3) {
+        *ret = emit_ifelse(ctx, argv[1], argv[2], argv[3], rt);
         return true;
     }
 
-    else if (f == jl_builtin_typeof && nargs == 1) {
+    else if (f == BUILTIN(typeof) && nargs == 1) {
         const jl_cgval_t &p = argv[1];
         if (p.constant)
             *ret = mark_julia_const(ctx, jl_typeof(p.constant));
@@ -3351,7 +4008,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
-    else if (f == jl_builtin_typeassert && nargs == 2) {
+    else if (f == BUILTIN(typeassert) && nargs == 2) {
         const jl_cgval_t &arg = argv[1];
         const jl_cgval_t &ty = argv[2];
         if (jl_is_type_type(ty.typ) && !jl_has_free_typevars(ty.typ)) {
@@ -3369,20 +4026,18 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         }
     }
 
-    else if (f == jl_builtin_isa && nargs == 2) {
+    else if (f == BUILTIN(isa) && nargs == 2) {
         const jl_cgval_t &arg = argv[1];
         const jl_cgval_t &ty = argv[2];
         if (jl_is_type_type(ty.typ) && !jl_has_free_typevars(ty.typ)) {
             jl_value_t *tp0 = jl_tparam0(ty.typ);
-            Value *isa_result = emit_isa(ctx, arg, tp0, NULL).first;
-            if (isa_result->getType() == getInt1Ty(ctx.builder.getContext()))
-                isa_result = ctx.builder.CreateZExt(isa_result, getInt8Ty(ctx.builder.getContext()));
+            Value *isa_result = emit_isa(ctx, arg, tp0, Twine()).first;
             *ret = mark_julia_type(ctx, isa_result, false, jl_bool_type);
             return true;
         }
     }
 
-    else if (f == jl_builtin_issubtype && nargs == 2) {
+    else if (f == BUILTIN(issubtype) && nargs == 2) {
         const jl_cgval_t &ta = argv[1];
         const jl_cgval_t &tb = argv[2];
         if (jl_is_type_type(ta.typ) && !jl_has_free_typevars(ta.typ) &&
@@ -3393,306 +4048,373 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         }
     }
 
-    else if ((f == jl_builtin__apply_iterate && nargs == 3) && ctx.vaSlot > 0) {
+    else if (f == BUILTIN(_apply_iterate) && nargs == 3) {
         // turn Core._apply_iterate(iter, f, Tuple) ==> f(Tuple...) using the jlcall calling convention if Tuple is the va allocation
-        if (LoadInst *load = dyn_cast_or_null<LoadInst>(argv[3].V)) {
-            if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
-                Value *theF = boxed(ctx, argv[2]);
-                Value *nva = emit_n_varargs(ctx);
+        if (ctx.vaSlot > 0) {
+            if (LoadInst *load = dyn_cast_or_null<LoadInst>(argv[3].V)) {
+                if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
+                    Value *theF = boxed(ctx, argv[2]);
+                    Value *nva = emit_n_varargs(ctx);
 #ifdef _P64
-                nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
+                    nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
 #endif
-                Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs));
-                Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
-                *ret = mark_julia_type(ctx, r, true, jl_any_type);
-                return true;
+                    Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*));
+                    Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
+                    *ret = mark_julia_type(ctx, r, true, jl_any_type);
+                    return true;
+                }
             }
         }
+        // optimization for _apply_iterate when there is one argument and it is a SimpleVector
+        const jl_cgval_t &arg = argv[3];
+        if (arg.typ == (jl_value_t*)jl_simplevector_type) {
+            Value *theF = boxed(ctx, argv[2]);
+            Value *svec_val = boxed(ctx, arg);
+            Value *svec_len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, decay_derived(ctx, svec_val), Align(ctx.types().sizeof_ptr));
+#ifdef _P64
+            svec_len = ctx.builder.CreateTrunc(svec_len, getInt32Ty(ctx.builder.getContext()));
+#endif
+            Value *svec_data = emit_ptrgep(ctx, emit_pointer_from_objref(ctx, svec_val), ctx.types().sizeof_ptr);
+            OperandBundleDef OpBundle("jl_roots", svec_val);
+            Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, svec_data, svec_len }, OpBundle);
+            *ret = mark_julia_type(ctx, r, true, jl_any_type);
+            return true;
+        }
     }
 
-    else if (f == jl_builtin_tuple) {
+    else if (f == BUILTIN(tuple)) {
         if (nargs == 0) {
             *ret = ghostValue(ctx, jl_emptytuple_type);
             return true;
         }
         if (jl_is_tuple_type(rt) && jl_is_concrete_type(rt) && nargs == jl_datatype_nfields(rt)) {
-            *ret = emit_new_struct(ctx, rt, nargs, &argv[1], is_promotable);
+            *ret = emit_new_struct(ctx, rt, nargs, argv.drop_front(), is_promotable);
+            return true;
+        }
+    }
+
+    else if (f == BUILTIN(svec)) {
+        if (nargs == 0) {
+            *ret = mark_julia_const(ctx, (jl_value_t*)jl_emptysvec);
             return true;
         }
+        Value *svec = emit_allocobj(ctx, ctx.types().sizeof_ptr * (nargs + 1), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jl_simplevector_type), ctx.types().T_pjlvalue), true, julia_alignment((jl_value_t*)jl_simplevector_type));
+        Value *svec_derived = decay_derived(ctx, svec);
+        ctx.builder.CreateAlignedStore(ConstantInt::get(ctx.types().T_size, nargs), svec_derived, Align(ctx.types().sizeof_ptr));
+        Value *svec_data = emit_ptrgep(ctx, svec_derived, ctx.types().sizeof_ptr);
+        ctx.builder.CreateMemSet(svec_data, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ctx.types().sizeof_ptr * nargs, Align(ctx.types().sizeof_ptr));
+        for (size_t i = 0; i < nargs; i++) {
+            Value *elem = boxed(ctx, argv[i + 1]);
+            Value *elem_ptr = emit_ptrgep(ctx, svec_derived, ctx.types().sizeof_ptr * (i + 1));
+            auto *store = ctx.builder.CreateAlignedStore(elem, elem_ptr, Align(ctx.types().sizeof_ptr));
+            store->setOrdering(AtomicOrdering::Release);
+            emit_write_barrier(ctx, svec, elem);
+        }
+        *ret = mark_julia_type(ctx, svec, true, jl_simplevector_type);
+        return true;
     }
 
-    else if (f == jl_builtin_throw && nargs == 1) {
+    else if (f == BUILTIN(throw) && nargs == 1) {
         Value *arg1 = boxed(ctx, argv[1]);
         raise_exception(ctx, arg1);
         *ret = jl_cgval_t();
         return true;
     }
 
-    else if (f == jl_builtin_arraysize && nargs == 2) {
-        const jl_cgval_t &ary = argv[1];
-        const jl_cgval_t &idx = argv[2];
-        jl_value_t *aty = jl_unwrap_unionall(ary.typ);
-        if (jl_is_array_type(aty) && idx.typ == (jl_value_t*)jl_long_type) {
-            jl_value_t *ndp = jl_tparam1(aty);
-            if (jl_is_long(ndp)) {
-                size_t ndims = jl_unbox_long(ndp);
-                if (idx.constant) {
-                    uint32_t idx_const = (uint32_t)jl_unbox_long(idx.constant);
-                    if (idx_const > 0 && idx_const <= ndims) {
-                        jl_value_t *ary_ex = jl_exprarg(ex, 1);
-                        *ret = mark_julia_type(ctx, emit_arraysize(ctx, ary, ary_ex, idx_const), false, jl_long_type);
-                        return true;
-                    }
-                    else if (idx_const > ndims) {
-                        *ret = mark_julia_type(ctx, ConstantInt::get(ctx.types().T_size, 1), false, jl_long_type);
-                        return true;
-                    }
-                }
-                else {
-                    Value *idx_dyn = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
-                    auto positive = ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(ctx.types().T_size));
-                    setName(ctx.emission_context, positive, "ispositive");
-                    error_unless(ctx, positive, "arraysize: dimension out of range");
-                    BasicBlock *outBB = BasicBlock::Create(ctx.builder.getContext(), "outofrange", ctx.f);
-                    BasicBlock *inBB = BasicBlock::Create(ctx.builder.getContext(), "inrange");
-                    BasicBlock *ansBB = BasicBlock::Create(ctx.builder.getContext(), "arraysize");
-                    auto oor = ctx.builder.CreateICmpSLE(idx_dyn,
-                                ConstantInt::get(ctx.types().T_size, ndims));
-                    setName(ctx.emission_context, oor, "sizeddim");
-                    ctx.builder.CreateCondBr(oor, inBB, outBB);
-                    ctx.builder.SetInsertPoint(outBB);
-                    Value *v_one = ConstantInt::get(ctx.types().T_size, 1);
-                    ctx.builder.CreateBr(ansBB);
-                    ctx.f->getBasicBlockList().push_back(inBB);
-                    ctx.builder.SetInsertPoint(inBB);
-                    Value *v_sz = emit_arraysize(ctx, ary, idx_dyn);
-                    ctx.builder.CreateBr(ansBB);
-                    inBB = ctx.builder.GetInsertBlock(); // could have changed
-                    ctx.f->getBasicBlockList().push_back(ansBB);
-                    ctx.builder.SetInsertPoint(ansBB);
-                    PHINode *result = ctx.builder.CreatePHI(ctx.types().T_size, 2);
-                    result->addIncoming(v_one, outBB);
-                    result->addIncoming(v_sz, inBB);
-                    setName(ctx.emission_context, result, "arraysize");
-                    *ret = mark_julia_type(ctx, result, false, jl_long_type);
-                    return true;
-                }
-            }
+    else if (f == BUILTIN(memorynew) && (nargs == 2)) {
+        const jl_cgval_t &memty = argv[1];
+        if (!memty.constant)
+            return false;
+        jl_datatype_t *typ = (jl_datatype_t*) memty.constant;
+        if (!jl_is_concrete_type((jl_value_t*)typ) || !jl_is_genericmemory_type(typ))
+            return false;
+        jl_genericmemory_t *inst = (jl_genericmemory_t*)((jl_datatype_t*)typ)->instance;
+        if (inst == NULL)
+            return false;
+        if (argv[2].constant) {
+            if (!jl_is_long(argv[2].constant))
+                return false;
+            // Cast to a unsigned size and let `emit_const_len_memorynew`
+            // make sure that `nel` fits in a positive signed integer.
+            size_t nel = (size_t)jl_unbox_long(argv[2].constant);
+            *ret = emit_const_len_memorynew(ctx, typ, nel, inst);
+        }
+        else {
+            *ret = emit_memorynew(ctx, typ, argv[2], inst);
         }
+        return true;
     }
 
-    else if ((f == jl_builtin_arrayref || f == jl_builtin_const_arrayref) && nargs >= 3) {
-        const jl_cgval_t &ary = argv[2];
-        bool indices_ok = true;
-        for (size_t i = 3; i <= nargs; i++) {
-            if (argv[i].typ != (jl_value_t*)jl_long_type) {
-                indices_ok = false;
-                break;
-            }
-        }
-        jl_value_t *aty_dt = jl_unwrap_unionall(ary.typ);
-        if (jl_is_array_type(aty_dt) && indices_ok) {
-            jl_value_t *ety = jl_tparam0(aty_dt);
-            jl_value_t *ndp = jl_tparam1(aty_dt);
-            if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 3)) {
-                jl_value_t *ary_ex = jl_exprarg(ex, 2);
-                size_t elsz = 0, al = 0;
-                int union_max = jl_islayout_inline(ety, &elsz, &al);
-                bool isboxed = (union_max == 0);
-                if (isboxed)
-                    ety = (jl_value_t*)jl_any_type;
-                ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
-                jl_value_t *boundscheck = argv[1].constant;
-                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayref");
-                Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[3], nargs - 2, boundscheck);
-                if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
-                    assert(((jl_datatype_t*)ety)->instance != NULL);
-                    *ret = ghostValue(ctx, ety);
-                }
-                else if (!isboxed && jl_is_uniontype(ety)) {
-                    Value *data = emit_arrayptr(ctx, ary, ary_ex);
-                    Value *offset = emit_arrayoffset(ctx, ary, nd);
-                    Value *ptindex;
-                    if (elsz == 0) {
-                        ptindex = data;
-                    }
-                    else {
-                        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
-                        data = emit_bitcast(ctx, data, AT->getPointerTo());
-                        // isbits union selector bytes are stored after a->maxsize
-                        Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
-                        setName(ctx.emission_context, ndims, "ndims");
-                        Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
-                        setName(ctx.emission_context, is_vector, "isvec");
-                        Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size));
-                        setName(ctx.emission_context, selidx_v, "selidx_v");
-                        Value *selidx_m = emit_arraylen(ctx, ary);
-                        Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                        setName(ctx.emission_context, selidx, "selidx");
-                        ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                        setName(ctx.emission_context, ptindex, "ptindex");
-                        data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
-                        setName(ctx.emission_context, data, "data");
-                    }
-                    ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
-                    ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset);
-                    ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx);
-                    *ret = emit_unionload(ctx, data, ptindex, ety, elsz, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte);
-                    if (ret->V)
-                        setName(ctx.emission_context, ret->V, "arrayref");
-                }
-                else {
-                    MDNode *aliasscope = (f == jl_builtin_const_arrayref) ? ctx.noalias().aliasscope.current : nullptr;
-                    *ret = typed_load(ctx,
-                            emit_arrayptr(ctx, ary, ary_ex),
-                            idx, ety,
-                            isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
-                            aliasscope,
-                            isboxed,
-                            AtomicOrdering::NotAtomic);
-                    if (ret->V)
-                        setName(ctx.emission_context, ret->V, "arrayref");
-                }
-                return true;
-            }
+    else if (f == BUILTIN(memoryrefnew) && nargs == 1) {
+        const jl_cgval_t &mem = argv[1];
+        jl_datatype_t *mty_dt = (jl_datatype_t*)jl_unwrap_unionall(mem.typ);
+        if (jl_is_genericmemory_type(mty_dt) && jl_is_concrete_type((jl_value_t*)mty_dt)) {
+            jl_value_t *typ = jl_apply_type((jl_value_t*)jl_genericmemoryref_type, jl_svec_data(mty_dt->parameters), jl_svec_len(mty_dt->parameters));
+            const jl_datatype_layout_t *layout = mty_dt->layout;
+            *ret = _emit_memoryref(ctx, mem, layout, typ);
+            return true;
         }
     }
 
-    else if (f == jl_builtin_arrayset && nargs >= 4) {
-        const jl_cgval_t &ary = argv[2];
-        jl_cgval_t val = argv[3];
-        bool indices_ok = true;
-        for (size_t i = 4; i <= nargs; i++) {
-            if (argv[i].typ != (jl_value_t*)jl_long_type) {
-                indices_ok = false;
-                break;
-            }
+    else if (f == BUILTIN(memoryrefnew) && (nargs == 2 || nargs == 3)) {
+        const jl_cgval_t &ref = argv[1];
+        jl_datatype_t *mty_dt = (jl_datatype_t*)jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type((jl_value_t*)mty_dt)) {
+            mty_dt = (jl_datatype_t*)jl_field_type_concrete(mty_dt, 1);
+            const jl_datatype_layout_t *layout = mty_dt->layout;
+            jl_value_t *boundscheck = nargs == 3 ? argv[3].constant : nullptr;
+            if (nargs == 3)
+                emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "memoryrefnew");
+            *ret = emit_memoryref(ctx, ref, argv[2], boundscheck, layout);
+            return true;
         }
-        jl_value_t *aty_dt = jl_unwrap_unionall(ary.typ);
-        if (jl_is_array_type(aty_dt) && indices_ok) {
-            jl_value_t *ety = jl_tparam0(aty_dt);
-            jl_value_t *ndp = jl_tparam1(aty_dt);
-            if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 4)) {
-                emit_typecheck(ctx, val, ety, "arrayset");
-                val = update_julia_type(ctx, val, ety);
-                if (val.typ == jl_bottom_type)
-                    return true;
-                size_t elsz = 0, al = 0;
-                int union_max = jl_islayout_inline(ety, &elsz, &al);
-                bool isboxed = (union_max == 0);
-                if (isboxed)
-                    ety = (jl_value_t*)jl_any_type;
-                jl_value_t *ary_ex = jl_exprarg(ex, 2);
-                ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
-                jl_value_t *boundscheck = argv[1].constant;
-                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayset");
-                Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
-                if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
-                    // no-op
+        if (jl_is_genericmemory_type(mty_dt) && jl_is_concrete_type((jl_value_t*)mty_dt)) {
+            const jl_datatype_layout_t *layout = mty_dt->layout;
+            jl_value_t *boundscheck = nargs == 3 ? argv[3].constant : nullptr;
+            if (nargs == 3)
+                emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "memoryrefnew");
+            jl_value_t *typ = jl_apply_type((jl_value_t*)jl_genericmemoryref_type, jl_svec_data(mty_dt->parameters), jl_svec_len(mty_dt->parameters));
+            *ret = emit_memoryref_direct(ctx, ref, argv[2], typ, boundscheck, layout);
+            return true;
+        }
+    }
+
+    else if (f == BUILTIN(memoryrefoffset) && nargs == 1) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            *ret = emit_memoryref_offset(ctx, ref, layout);
+            return true;
+        }
+    }
+
+    else if (f == BUILTIN(memoryrefget) && nargs == 3) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            jl_value_t *kind = jl_tparam0(mty_dt);
+            jl_value_t *ety = jl_tparam1(mty_dt);
+            jl_value_t *addrspace = jl_tparam2(mty_dt); (void)addrspace; // TODO
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+                return false;
+            enum jl_memory_order order = jl_memory_order_unspecified;
+            const std::string fname = "memoryrefget";
+            {
+                const jl_cgval_t &ord = argv[2];
+                emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+                if (!ord.constant)
+                    return false;
+                order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+            }
+            if (order == jl_memory_order_invalid) {
+                emit_atomic_error(ctx, "invalid atomic ordering");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            bool isatomic = kind == (jl_value_t*)jl_atomic_sym;
+            if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+                emit_atomic_error(ctx, "memoryrefget: non-atomic memory cannot be accessed atomically");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            if (isatomic && order == jl_memory_order_notatomic) {
+                emit_atomic_error(ctx, "memoryrefget: atomic memory cannot be accessed non-atomically");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            if (order == jl_memory_order_unspecified) {
+                order = isatomic ? jl_memory_order_unordered : jl_memory_order_notatomic;
+            }
+            jl_value_t *boundscheck = argv[3].constant;
+            emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "memoryrefget");
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            Value *mem = emit_memoryref_mem(ctx, ref, layout);
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            if (bounds_check_enabled(ctx, boundscheck)) {
+                BasicBlock *failBB, *endBB;
+                failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+                endBB = BasicBlock::Create(ctx.builder.getContext(), "load");
+                ctx.builder.CreateCondBr(ctx.builder.CreateIsNull(mlen), failBB, endBB);
+                failBB->insertInto(ctx.f);
+                ctx.builder.SetInsertPoint(failBB);
+                ctx.builder.CreateCall(prepare_call(jlboundserror_func), { mark_callee_rooted(ctx, mem), ConstantInt::get(ctx.types().T_size, 1) });
+                ctx.builder.CreateUnreachable();
+                endBB->insertInto(ctx.f);
+                ctx.builder.SetInsertPoint(endBB);
+            }
+            bool isboxed = layout->flags.arrayelem_isboxed;
+            bool isunion = layout->flags.arrayelem_isunion;
+            size_t elsz = layout->size;
+            size_t al = layout->alignment;
+            if (al > JL_HEAP_ALIGNMENT)
+                al = JL_HEAP_ALIGNMENT;
+            bool needlock = layout->flags.arrayelem_islocked;
+            AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
+                                    ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic)
+                                    : get_llvm_atomic_order(order);
+            bool maybenull = true;
+            if (!isboxed && !isunion && elsz == 0) {
+                assert(jl_is_datatype(ety) && jl_is_datatype_singleton((jl_datatype_t*)ety));
+                *ret = ghostValue(ctx, ety);
+                if (isStrongerThanMonotonic(Order))
+                    ctx.builder.CreateFence(Order);
+            }
+            else if (isunion) {
+                assert(!isatomic && !needlock);
+                Value *V = emit_memoryref_FCA(ctx, ref, layout);
+                Value *idx0 = CreateSimplifiedExtractValue(ctx, V, 0);
+                Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+                Value *data = emit_genericmemoryptr(ctx, mem, layout, AddressSpace::Loaded);
+                Value *ptindex;
+                if (elsz == 0) {
+                    ptindex = data;
                 }
                 else {
-                    PHINode *data_owner = NULL; // owner object against which the write barrier must check
-                    if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
-                        Value *aryv = boxed(ctx, ary);
-                        Value *flags = emit_arrayflags(ctx, ary);
-                        // the owner of the data is ary itself except if ary->how == 3
-                        flags = ctx.builder.CreateAnd(flags, 3);
-                        Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 3));
-                        setName(ctx.emission_context, is_owned, "has_owner");
-                        BasicBlock *curBB = ctx.builder.GetInsertBlock();
-                        BasicBlock *ownedBB = BasicBlock::Create(ctx.builder.getContext(), "array_owned", ctx.f);
-                        BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge_own", ctx.f);
-                        ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
-                        ctx.builder.SetInsertPoint(ownedBB);
-                        // load owner pointer
-                        Instruction *own_ptr;
-                        if (jl_is_long(ndp)) {
-                            own_ptr = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue,
-                                        emit_bitcast(ctx, decay_derived(ctx, aryv), ctx.types().T_pprjlvalue),
-                                        jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
-                                    Align(sizeof(void*)));
-                            setName(ctx.emission_context, own_ptr, "external_owner");
-                            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-                            ai.decorateInst(maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
-                        }
-                        else {
-                            own_ptr = ctx.builder.CreateCall(
-                                prepare_call(jlarray_data_owner_func),
-                                {aryv});
-                        }
-                        ctx.builder.CreateBr(mergeBB);
-                        ctx.builder.SetInsertPoint(mergeBB);
-                        data_owner = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
-                        data_owner->addIncoming(aryv, curBB);
-                        data_owner->addIncoming(own_ptr, ownedBB);
-                        setName(ctx.emission_context, data_owner, "data_owner");
-                    }
-                    if (!isboxed && jl_is_uniontype(ety)) {
-                        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
-                        Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
-                        Value *offset = emit_arrayoffset(ctx, ary, nd);
-                        // compute tindex from val
-                        jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
-                        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
-                        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
-                        Value *ptindex;
-                        if (elsz == 0) {
-                            ptindex = data;
-                        }
-                        else {
-                            Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
-                            Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
-                            setName(ctx.emission_context, is_vector, "is_vector");
-                            Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size));
-                            setName(ctx.emission_context, selidx_v, "selidx_v");
-                            Value *selidx_m = emit_arraylen(ctx, ary);
-                            Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                            setName(ctx.emission_context, selidx, "selidx");
-                            ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                            setName(ctx.emission_context, ptindex, "ptindex");
-                            data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
-                            setName(ctx.emission_context, data, "data");
-                        }
-                        ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
-                        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset);
-                        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx);
-                        setName(ctx.emission_context, ptindex, "ptindex");
-                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayselbyte);
-                        ai.decorateInst(ctx.builder.CreateStore(tindex, ptindex));
-                        if (elsz > 0 && (!jl_is_datatype(val.typ) || jl_datatype_size(val.typ) > 0)) {
-                            // copy data (if any)
-                            emit_unionmove(ctx, data, ctx.tbaa().tbaa_arraybuf, val, nullptr);
-                        }
-                    }
-                    else {
-                        typed_store(ctx,
-                                    emit_arrayptr(ctx, ary, ary_ex, isboxed),
-                                    idx, val, jl_cgval_t(), ety,
-                                    isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
-                                    ctx.noalias().aliasscope.current,
-                                    data_owner,
-                                    isboxed,
-                                    isboxed ? AtomicOrdering::Release : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                                    /*FailOrder*/AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                                    0,
-                                    false,
-                                    true,
-                                    false,
-                                    false,
-                                    false,
-                                    false,
-                                    nullptr,
-                                    "");
-                    }
+                    Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
+                    // isbits union selector bytes are stored after mem->length bytes
+                    ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
+                    data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
+                }
+                ptindex = emit_ptrgep(ctx, ptindex, idx0);
+                size_t elsz_c = 0, al_c = 0;
+                int union_max = jl_islayout_inline(ety, &elsz_c, &al_c);
+                assert(union_max && LLT_ALIGN(elsz_c, al_c) == elsz && al_c == al);
+                *ret = emit_unionload(ctx, data, ptindex, ety, elsz_c, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte);
+            }
+            else {
+                Value *ptr = (layout->size == 0 ? nullptr : emit_memoryref_ptr(ctx, ref, layout));
+                Value *lock = nullptr;
+                if (needlock) {
+                    assert(ptr);
+                    lock = ptr;
+                    // ptr += sizeof(lock);
+                    ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+                    emit_lockstate_value(ctx, lock, true);
                 }
-                *ret = ary;
+                *ret = typed_load(ctx, ptr, nullptr, ety,
+                        isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
+                        ctx.noalias().aliasscope.current,
+                        isboxed, Order, maybenull, al);
+                if (needlock) {
+                    emit_lockstate_value(ctx, lock, false);
+                }
+            }
+            return true;
+        }
+    }
+
+    else if ((f == BUILTIN(memoryrefset) && nargs == 4) ||
+             (f == BUILTIN(memoryrefswap) && nargs == 4) ||
+             (f == BUILTIN(memoryrefreplace) && nargs == 6) ||
+             (f == BUILTIN(memoryrefmodify) && nargs == 5) ||
+             (f == BUILTIN(memoryrefsetonce) && nargs == 5)) {
+        return emit_f_opmemory(ctx, ret, f, argv, nargs, nullptr);
+    }
+
+
+    else if (f == BUILTIN(memoryref_isassigned) && nargs == 3) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            jl_value_t *kind = jl_tparam0(mty_dt);
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+                return false;
+            enum jl_memory_order order = jl_memory_order_unspecified;
+            const std::string fname = "memoryref_isassigned";
+            {
+                const jl_cgval_t &ord = argv[2];
+                emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+                if (!ord.constant)
+                    return false;
+                order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+            }
+            if (order == jl_memory_order_invalid) {
+                emit_atomic_error(ctx, "invalid atomic ordering");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            bool isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked;
+            if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+                emit_atomic_error(ctx, "memoryref_isassigned: non-atomic memory cannot be accessed atomically");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            if (isatomic && order == jl_memory_order_notatomic) {
+                emit_atomic_error(ctx, "memoryref_isassigned: atomic memory cannot be accessed non-atomically");
+                *ret = jl_cgval_t(); // unreachable
                 return true;
             }
+            if (order == jl_memory_order_unspecified) {
+                order = isatomic ? jl_memory_order_unordered : jl_memory_order_notatomic;
+            }
+            jl_value_t *boundscheck = argv[3].constant;
+            emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, fname);
+            Value *mem = emit_memoryref_mem(ctx, ref, layout);
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            Value *oob = bounds_check_enabled(ctx, boundscheck) ? ctx.builder.CreateIsNull(mlen) : nullptr;
+            bool isboxed = layout->flags.arrayelem_isboxed;
+            if (isboxed || layout->first_ptr >= 0) {
+                bool needlock = layout->flags.arrayelem_islocked;
+                AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
+                                        ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic)
+                                        : get_llvm_atomic_order(order);
+                PHINode *result = nullptr;
+                if (oob) {
+                    BasicBlock *passBB, *endBB, *fromBB;
+                    passBB = BasicBlock::Create(ctx.builder.getContext(), "load");
+                    endBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+
+                    passBB->insertInto(ctx.f);
+                    endBB->insertInto(ctx.f);
+                    fromBB = ctx.builder.CreateCondBr(oob, endBB, passBB)->getParent();
+                    ctx.builder.SetInsertPoint(endBB);
+                    result = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
+                    result->addIncoming(ConstantInt::get(result->getType(), 0), fromBB);
+                    setName(ctx.emission_context, result, "arraysize");
+                    ctx.builder.SetInsertPoint(passBB);
+                }
+                Value *elem = emit_memoryref_ptr(ctx, ref, layout);
+                if (!isboxed)
+                    elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*));
+                else if (needlock)
+                    // n.b. no actual lock acquire needed, as the check itself only needs to load a single pointer and check for null
+                    // elem += sizeof(lock);
+                    elem = emit_ptrgep(ctx, elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+                // emit this using the same type as BUILTIN(memoryrefget)
+                // so that LLVM may be able to load-load forward them and fold the result
+                auto tbaa = isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf;
+                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+                LoadInst *fldv = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, elem, ctx.types().alignof_ptr);
+                fldv->setOrdering(Order);
+                ai.decorateInst(fldv);
+                Value *isdef = ctx.builder.CreateIsNotNull(fldv);
+                setName(ctx.emission_context, isdef, fname);
+                if (oob) {
+                    assert(result);
+                    result->addIncoming(isdef, ctx.builder.CreateBr(result->getParent())->getParent());
+                    ctx.builder.SetInsertPoint(result->getParent());
+                    isdef = result;
+                }
+                *ret = mark_julia_type(ctx, isdef, false, jl_bool_type);
+            }
+            else if (oob) {
+                Value *isdef = ctx.builder.CreateNot(oob);
+                *ret = mark_julia_type(ctx, isdef, false, jl_bool_type);
+            }
+            else {
+                *ret = mark_julia_const(ctx, jl_true);
+            }
+            return true;
         }
     }
 
-    else if (f == jl_builtin_getfield && (nargs == 2 || nargs == 3 || nargs == 4)) {
+
+    else if (f == BUILTIN(getfield) && (nargs == 2 || nargs == 3 || nargs == 4)) {
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
         enum jl_memory_order order = jl_memory_order_unspecified;
@@ -3750,7 +4472,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
                         Value *valen = emit_n_varargs(ctx);
                         jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check (it only checks the `.V` field)
-                                ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)),
+                                emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)),
                                 NULL, NULL);
                         Value *idx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                         idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck);
@@ -3791,61 +4513,68 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     // For tuples, we can emit code even if we don't know the exact
                     // type (e.g. because we don't know the length). This is possible
                     // as long as we know that all elements are of the same (leaf) type.
-                    if (obj.ispointer()) {
-                        if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
-                            emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
-                            *ret = jl_cgval_t(); // unreachable
-                            return true;
-                        }
-                        // Determine which was the type that was homogeneous
-                        jl_value_t *jt = jl_tparam0(utt);
-                        if (jl_is_vararg(jt))
-                            jt = jl_unwrap_vararg(jt);
-                        assert(jl_is_datatype(jt));
-                        // This is not necessary for correctness, but allows to omit
-                        // the extra code for getting the length of the tuple
-                        if (!bounds_check_enabled(ctx, boundscheck)) {
-                            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
-                        }
-                        else {
-                            vidx = emit_bounds_check(ctx, obj, (jl_value_t*)obj.typ, vidx,
-                                emit_datatype_nfields(ctx, emit_typeof(ctx, obj, false, false)),
-                                jl_true);
-                        }
-                        bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
-                        Value *ptr = data_pointer(ctx, obj);
-                        *ret = typed_load(ctx, ptr, vidx,
-                                isboxed ? (jl_value_t*)jl_any_type : jt,
-                                obj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false);
+                    jl_cgval_t ptrobj = obj.isboxed ? obj : value_to_pointer(ctx, obj);
+                    if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+                        emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
+                        *ret = jl_cgval_t(); // unreachable
                         return true;
                     }
+                    // Determine which was the type that was homogeneous
+                    jl_value_t *jt = jl_tparam0(utt);
+                    if (jl_is_vararg(jt))
+                        jt = jl_unwrap_vararg(jt);
+                    assert(jl_is_datatype(jt));
+                    // This is not necessary for correctness, but allows to omit
+                    // the extra code for getting the length of the tuple
+                    if (!bounds_check_enabled(ctx, boundscheck)) {
+                        vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
+                    }
+                    else {
+                        vidx = emit_bounds_check(ctx, ptrobj, (jl_value_t*)ptrobj.typ, vidx,
+                            emit_datatype_nfields(ctx, emit_typeof(ctx, ptrobj, false, false)),
+                            jl_true);
+                    }
+                    bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
+                    Value *ptr = data_pointer(ctx, ptrobj);
+                    *ret = typed_load(ctx, ptr, vidx,
+                            isboxed ? (jl_value_t*)jl_any_type : jt,
+                            ptrobj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false);
+                    return true;
                 }
 
                 // Unknown object, but field known to be integer
                 vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
-                Value *fld_val = ctx.builder.CreateCall(prepare_call(jlgetnthfieldchecked_func), { boxed(ctx, obj), vidx });
+                Value *fld_val = ctx.builder.CreateCall(prepare_call(jlgetnthfieldchecked_func), { boxed(ctx, obj), vidx }, "getfield");
                 *ret = mark_julia_type(ctx, fld_val, true, jl_any_type);
                 return true;
             }
         }
-        else if (fld.typ == (jl_value_t*)jl_symbol_type) {
-            if (jl_is_datatype(utt) && !jl_is_namedtuple_type(utt)) { // TODO: Look into this for NamedTuple
-                if (jl_struct_try_layout(utt) && (jl_datatype_nfields(utt) == 1)) {
+        else if (fld.typ == (jl_value_t*)jl_symbol_type) { // Known type but unknown symbol
+            if (jl_is_datatype(utt) && (utt != jl_module_type) && jl_struct_try_layout(utt)) {
+                if ((jl_datatype_nfields(utt) == 1 && !jl_is_namedtuple_type(utt) && !jl_is_tuple_type(utt))) {
                     jl_svec_t *fn = jl_field_names(utt);
                     assert(jl_svec_len(fn) == 1);
                     Value *typ_sym = literal_pointer_val(ctx, jl_svecref(fn, 0));
                     Value *cond = ctx.builder.CreateICmpEQ(mark_callee_rooted(ctx, typ_sym), mark_callee_rooted(ctx, boxed(ctx, fld)));
-                    emit_hasnofield_error_ifnot(ctx, cond, utt->name->name, fld);
+                    emit_hasnofield_error_ifnot(ctx, cond, utt, fld);
                     *ret = emit_getfield_knownidx(ctx, obj, 0, utt, order);
                     return true;
                 }
+                else {
+                    Value *index = ctx.builder.CreateCall(prepare_call(jlfieldindex_func),
+                            {emit_typeof(ctx, obj, false, false), boxed(ctx, fld), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)});
+                    Value *cond = ctx.builder.CreateICmpNE(index, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), -1));
+                    emit_hasnofield_error_ifnot(ctx, cond, utt, fld);
+                    Value *idx2 = ctx.builder.CreateAdd(ctx.builder.CreateIntCast(index, ctx.types().T_size, false), ConstantInt::get(ctx.types().T_size, 1)); // getfield_unknown is 1 based
+                    if (emit_getfield_unknownidx(ctx, ret, obj, idx2, utt, jl_false, order))
+                        return true;
+                }
             }
         }
-        // TODO: generic getfield func with more efficient calling convention
         return false;
     }
 
-    else if (f == jl_builtin_getglobal && (nargs == 2 || nargs == 3)) {
+    else if (f == BUILTIN(getglobal) && (nargs == 2 || nargs == 3)) {
         const jl_cgval_t &mod = argv[1];
         const jl_cgval_t &sym = argv[2];
         enum jl_memory_order order = jl_memory_order_unspecified;
@@ -3877,18 +4606,37 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return false;
     }
 
-    else if (f == jl_builtin_setglobal && (nargs == 3 || nargs == 4)) {
+    else if ((f == BUILTIN(setglobal) && (nargs == 3 || nargs == 4)) ||
+             (f == BUILTIN(swapglobal) && (nargs == 3 || nargs == 4)) ||
+             (f == BUILTIN(replaceglobal) && (nargs == 4 || nargs == 5 || nargs == 6)) ||
+             (f == BUILTIN(modifyglobal) && (nargs == 4 || nargs == 5)) ||
+             (f == BUILTIN(setglobalonce) && (nargs == 3 || nargs == 4 || nargs == 5))) {
         return emit_f_opglobal(ctx, ret, f, argv, nargs, nullptr);
     }
 
-    else if ((f == jl_builtin_setfield && (nargs == 3 || nargs == 4)) ||
-             (f == jl_builtin_swapfield && (nargs == 3 || nargs == 4)) ||
-             (f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6)) ||
-             (f == jl_builtin_modifyfield && (nargs == 4 || nargs == 5))) {
+    else if ((f == BUILTIN(setfield) && (nargs == 3 || nargs == 4)) ||
+             (f == BUILTIN(swapfield) && (nargs == 3 || nargs == 4)) ||
+             (f == BUILTIN(replacefield) && (nargs == 4 || nargs == 5 || nargs == 6)) ||
+             (f == BUILTIN(modifyfield) && (nargs == 4 || nargs == 5)) ||
+             (f == BUILTIN(setfieldonce) && (nargs == 3 || nargs == 4 || nargs == 5))) {
         return emit_f_opfield(ctx, ret, f, argv, nargs, nullptr);
     }
 
-    else if (f == jl_builtin_nfields && nargs == 1) {
+    else if (f == BUILTIN(_svec_len) && nargs == 1) {
+        const jl_cgval_t &obj = argv[1];
+        Value *len;
+        if (obj.constant && jl_is_svec(obj.constant)) {
+            len = ConstantInt::get(ctx.types().T_size, jl_svec_len(obj.constant));
+        }
+        else {
+            Value *svec_val = decay_derived(ctx, boxed(ctx, obj));
+            len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, svec_val, Align(ctx.types().sizeof_ptr));
+        }
+        *ret = mark_julia_type(ctx, len, false, jl_long_type);
+        return true;
+    }
+
+    else if (f == BUILTIN(nfields) && nargs == 1) {
         const jl_cgval_t &obj = argv[1];
         if (ctx.vaSlot > 0) {
             // optimize VA tuple
@@ -3920,7 +4668,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
-    else if (f == jl_builtin_fieldtype && (nargs == 2 || nargs == 3)) {
+    else if (f == BUILTIN(fieldtype) && (nargs == 2 || nargs == 3)) {
         const jl_cgval_t &typ = argv[1];
         const jl_cgval_t &fld = argv[2];
         if ((jl_is_type_type(typ.typ) && jl_is_concrete_type(jl_tparam0(typ.typ))) ||
@@ -3935,7 +4683,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 if (nargs == 3)
                     emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "fieldtype");
                 emit_bounds_check(ctx, typ, (jl_value_t*)jl_datatype_type, idx, types_len, boundscheck);
-                Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, emit_bitcast(ctx, types_svec, ctx.types().T_pprjlvalue)), idx);
+                Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, types_svec), idx);
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
                 Value *fieldtyp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fieldtyp_p, Align(sizeof(void*))));
                 setName(ctx.emission_context, fieldtyp, "fieldtype");
@@ -3945,7 +4693,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         }
     }
 
-    else if (f == jl_builtin_sizeof && nargs == 1) {
+    else if (f == BUILTIN(sizeof) && nargs == 1) {
         const jl_cgval_t &obj = argv[1];
         jl_datatype_t *sty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
         assert(jl_string_type->name->mutabl);
@@ -3962,7 +4710,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 return true;
             }
             // String and SimpleVector's length fields have the same layout
-            auto ptr = emit_bitcast(ctx, boxed(ctx, obj), ctx.types().T_size->getPointerTo());
+            auto ptr = boxed(ctx, obj);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
             Value *len = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr));
             MDBuilder MDB(ctx.builder.getContext());
@@ -3981,36 +4729,66 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             *ret = mark_julia_type(ctx, len, false, jl_long_type);
             return true;
         }
-        else if (jl_is_array_type(sty)) {
-            auto len = emit_arraylen(ctx, obj);
-            Value *elsize;
-            size_t elsz;
-            if (arraytype_constelsize(sty, &elsz)) {
-                elsize = ConstantInt::get(ctx.types().T_size, elsz);
-            }
-            else {
-                elsize = ctx.builder.CreateZExt(emit_arrayelsize(ctx, obj), ctx.types().T_size);
-            }
+        else if (jl_is_genericmemory_type(sty)) {
+            Value *v = boxed(ctx, obj);
+            auto len = emit_genericmemorylen(ctx, v, (jl_value_t*)sty);
+            auto elsize = emit_genericmemoryelsize(ctx, v, obj.typ, true);
             *ret = mark_julia_type(ctx, ctx.builder.CreateMul(len, elsize), false, jl_long_type);
-            if (ret->V)
-                setName(ctx.emission_context, ret->V, "sizeof");
             return true;
         }
     }
 
-    else if (f == jl_builtin_apply_type && nargs > 0) {
+    else if (f == BUILTIN(apply_type) && nargs > 0) {
         if (jl_is_method(ctx.linfo->def.method)) {
             // don't bother codegen constant-folding for toplevel.
             jl_value_t *ty = static_apply_type(ctx, argv, nargs + 1);
             if (ty != NULL) {
-                ty = jl_ensure_rooted(ctx, ty);
+                JL_GC_PUSH1(&ty);
+                jl_temporary_root(ctx, ty);
+                JL_GC_POP();
                 *ret = mark_julia_const(ctx, ty);
                 return true;
             }
         }
     }
 
-    else if (f == jl_builtin_isdefined && (nargs == 2 || nargs == 3)) {
+    else if (f == BUILTIN(isdefinedglobal) && (nargs == 2 || nargs == 3 || nargs == 4)) {
+        const jl_cgval_t &mod = argv[1];
+        const jl_cgval_t &sym = argv[2];
+        bool allow_import = true;
+        enum jl_memory_order order = jl_memory_order_unspecified;
+
+        if (nargs >= 3) {
+            const jl_cgval_t &arg3 = argv[3];
+            if (arg3.constant && jl_is_bool(arg3.constant))
+                allow_import = jl_unbox_bool(arg3.constant);
+            else
+                return false;
+        }
+
+        if (nargs == 4) {
+            const jl_cgval_t &arg4 = argv[4];
+            if (arg4.constant && jl_is_symbol(arg4.constant))
+                order = jl_get_atomic_order((jl_sym_t*)arg4.constant, true, false);
+            else
+                return false;
+        }
+        else
+            order = jl_memory_order_unordered;
+
+        if (order < jl_memory_order_unordered) {
+            return false;
+        }
+
+        if (!mod.constant || !sym.constant || !jl_is_symbol(sym.constant) || !jl_is_module(mod.constant)) {
+            return false;
+        }
+
+        *ret = emit_isdefinedglobal(ctx, (jl_module_t*)mod.constant, (jl_sym_t*)sym.constant, allow_import, order);
+        return true;
+    }
+
+    else if (f == BUILTIN(isdefined) && (nargs == 2 || nargs == 3)) {
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
         jl_datatype_t *stt = (jl_datatype_t*)obj.typ;
@@ -4082,20 +4860,23 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             *ret = jl_cgval_t(); // unreachable
             return true;
         }
-        else if (fieldidx < nf - stt->name->n_uninitialized) {
+        else if (!field_may_be_null(obj, stt, fieldidx)) {
             *ret = mark_julia_const(ctx, jl_true);
         }
         else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) {
             Value *fldv;
             size_t offs = jl_field_offset(stt, fieldidx) / sizeof(jl_value_t*);
-            auto tbaa = obj.tbaa;
-            if (tbaa == ctx.tbaa().tbaa_datatype && offs != offsetof(jl_datatype_t, types))
-                tbaa = ctx.tbaa().tbaa_const;
-            if (obj.ispointer()) {
+            if (!obj.inline_roots.empty()) {
+                auto offsets = split_value_field(stt, fieldidx);
+                assert(offsets.second >= 0);
+                fldv = obj.inline_roots[offsets.second];
+            }
+            else if (obj.ispointer()) {
+                auto tbaa = best_field_tbaa(ctx, obj, stt, fieldidx, offs);
                 if (!jl_field_isptr(stt, fieldidx))
                     offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr;
-                Value *ptr = emit_bitcast(ctx, data_pointer(ctx, obj), ctx.types().T_pprjlvalue);
-                Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, ptr, offs);
+                Value *ptr = data_pointer(ctx, obj);
+                Value *addr = emit_ptrgep(ctx, ptr, offs * sizeof(jl_value_t*));
                 // emit this using the same type as emit_getfield_knownidx
                 // so that LLVM may be able to load-load forward them and fold the result
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -4123,18 +4904,21 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
-    else if (f == jl_builtin_donotdelete) {
+    else if (f == BUILTIN(current_scope) && (nargs == 0)) {
+        jl_aliasinfo_t scope_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        Instruction *v = scope_ai.decorateInst(
+            ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, get_scope_field(ctx), ctx.types().alignof_ptr));
+        *ret = mark_julia_type(ctx, v, /*boxed*/ true, rt);
+        return true;
+    }
+
+    else if (f == BUILTIN(donotdelete)) {
         // For now we emit this as a vararg call to the builtin
         // (which doesn't look at the arguments). In the future,
         // this should be an LLVM builtin.
-        auto it = builtin_func_map().find(jl_f_donotdelete_addr);
-        if (it == builtin_func_map().end()) {
-            return false;
-        }
-
         *ret = mark_julia_const(ctx, jl_nothing);
         FunctionType *Fty = FunctionType::get(getVoidTy(ctx.builder.getContext()), true);
-        Function *dnd = prepare_call(it->second);
+        Function *dnd = prepare_call(jldnd_func);
         SmallVector<Value*, 1> call_args;
 
         for (size_t i = 1; i <= nargs; ++i) {
@@ -4153,22 +4937,36 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
+    else if (f == BUILTIN(compilerbarrier) && (nargs == 2)) {
+        emit_typecheck(ctx, argv[1], (jl_value_t*)jl_symbol_type, "compilerbarrier");
+        *ret = argv[2];
+        return true;
+    }
+
     return false;
 }
 
 // Returns ctx.types().T_prjlvalue
-static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
-                             const jl_cgval_t *argv, size_t nargs, JuliaFunction<> *trampoline)
+static CallInst *emit_jlcall(jl_codectx_t &ctx, Value *theFptr, Value *theF,
+                             ArrayRef<jl_cgval_t> argv, size_t nargs, JuliaFunction<> *trampoline)
 {
     ++EmittedJLCalls;
     Function *TheTrampoline = prepare_call(trampoline);
     // emit arguments
     SmallVector<Value*, 4> theArgs;
-    theArgs.push_back(theFptr.getCallee());
+    theArgs.push_back(theFptr);
     if (theF)
         theArgs.push_back(theF);
     for (size_t i = 0; i < nargs; i++) {
-        Value *arg = boxed(ctx, argv[i]);
+        Value *arg;
+        if (i == 0 && trampoline == julia_call3) {
+            const jl_cgval_t &f = argv[i];
+            arg = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V;
+            arg = decay_derived(ctx, arg);
+        }
+        else {
+            arg = boxed(ctx, argv[i]);
+        }
         theArgs.push_back(arg);
     }
     CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs);
@@ -4179,49 +4977,32 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t
 
 // Returns ctx.types().T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value *theF,
-                             const jl_cgval_t *argv, size_t nargs, JuliaFunction<> *trampoline)
+                             ArrayRef<jl_cgval_t> argv, size_t nargs, JuliaFunction<> *trampoline)
 {
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, trampoline);
 }
 
-static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
-                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, jl_returninfo_t &returninfo, ArrayRef<jl_cgval_t> argv, size_t nargs)
 {
     ++EmittedSpecfunCalls;
     // emit specialized call site
     bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
-    FunctionType *cft = returninfo.decl.getFunctionType();
-    *cc = returninfo.cc;
-    *return_roots = returninfo.return_roots;
-
-    size_t nfargs = cft->getNumParams();
-    SmallVector<Value *> argvals(nfargs);
+    size_t nfargs = returninfo.decl.getFunctionType()->getNumParams();
+    SmallVector<Value *, 0> argvals(nfargs);
     unsigned idx = 0;
     AllocaInst *result = nullptr;
-    switch (returninfo.cc) {
-    case jl_returninfo_t::Boxed:
-    case jl_returninfo_t::Register:
-    case jl_returninfo_t::Ghosts:
-        break;
-    case jl_returninfo_t::SRet:
-        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
-        assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-        argvals[idx] = result;
-        idx++;
-        break;
-    case jl_returninfo_t::Union:
-        result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes));
+
+    if (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union) {
+        result = emit_static_alloca(ctx, returninfo.union_bytes, Align(returninfo.union_align));
         setName(ctx.emission_context, result, "sret_box");
-        if (returninfo.union_align > 1)
-            result->setAlignment(Align(returninfo.union_align));
         argvals[idx] = result;
         idx++;
-        break;
     }
 
+    AllocaInst *return_roots = nullptr;
     if (returninfo.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots));
+        assert(returninfo.cc == jl_returninfo_t::SRet);
+        return_roots = emit_static_roots(ctx, returninfo.return_roots);
         argvals[idx] = return_roots;
         idx++;
     }
@@ -4230,43 +5011,68 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
         idx++;
     }
     for (size_t i = 0; i < nargs; i++) {
-        jl_value_t *jt = jl_nth_slot_type(specTypes, i);
         // n.b.: specTypes is required to be a datatype by construction for specsig
-        jl_cgval_t arg = argv[i];
         if (is_opaque_closure && i == 0) {
-            Type *at = cft->getParamType(idx);
-            // Special optimization for opaque closures: We know that specsig opaque
-            // closures don't look at their type tag (they are fairly quickly discarded
-            // for their environments). Therefore, we can just pass these as a pointer,
-            // rather than a boxed value.
-            arg = value_to_pointer(ctx, arg);
-            argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
-        }
-        else if (is_uniquerep_Type(jt)) {
+            // Special implementation for opaque closures: their jt and thus
+            // julia_type_to_llvm values are likely wrong (based on captures instead of the OC), so override the
+            // behavior here to directly pass the expected pointer directly instead of
+            // computing it from the available information
+            // jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(specTypes, jlretty)
+            jl_cgval_t arg = argv[i];
+            if (arg.isghost) {
+                argvals[idx] = Constant::getNullValue(ctx.builder.getPtrTy(AddressSpace::Derived));
+            }
+            else {
+                if (!arg.isboxed)
+                    arg = value_to_pointer(ctx, arg);
+                argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
+            }
+            idx++;
             continue;
-        } else {
+        }
+        jl_value_t *jt = jl_nth_slot_type(specTypes, i);
+        jl_cgval_t arg = update_julia_type(ctx, argv[i], jt);
+        if (arg.typ == jl_bottom_type) {
+            emit_error(ctx, "(INTERNAL ERROR - IR Validity): Argument type mismatch in Expr(:invoke)");
+            return jl_cgval_t();
+        }
+        if (is_uniquerep_Type(jt)) {
+            continue;
+        }
+        else {
             bool isboxed = deserves_argbox(jt);
             Type *et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
             if (type_is_ghost(et))
                 continue;
             assert(idx < nfargs);
-            Type *at = cft->getParamType(idx);
             if (isboxed) {
-                assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue);
                 argvals[idx] = boxed(ctx, arg);
             }
             else if (et->isAggregateType()) {
-                arg = value_to_pointer(ctx, arg);
-                // can lazy load on demand, no copy needed
-                assert(at == PointerType::get(et, AddressSpace::Derived));
-                argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
+                auto tracked = CountTrackedPointers(et);
+                if (tracked.count && !tracked.all) {
+                    Value *val = arg.V;
+                    SmallVector<Value*,0> roots(arg.inline_roots);
+                    if (roots.empty())
+                        std::tie(val, roots) = split_value(ctx, arg, Align(julia_alignment(jt)));
+                    AllocaInst *proots = emit_static_roots(ctx, roots.size());
+                    for (size_t i = 0; i < roots.size(); i++)
+                        ctx.builder.CreateAlignedStore(roots[i], emit_ptrgep(ctx, proots, i * sizeof(void*)), Align(sizeof(void*)));
+                    assert(val);
+                    argvals[idx++] = decay_derived(ctx, val);
+                    argvals[idx] = proots;
+                }
+                else {
+                    if (!arg.isboxed)
+                        arg = value_to_pointer(ctx, arg);
+                    argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
+                }
             }
             else {
-                assert(at == et);
                 Value *val = emit_unbox(ctx, et, arg, jt);
                 if (!val) {
                     // There was a type mismatch of some sort - exit early
-                    CreateTrap(ctx.builder);
+                    emit_error(ctx, "(INTERNAL ERROR - IR Validity): Argument type mismatch in Expr(:invoke)");
                     return jl_cgval_t();
                 }
                 argvals[idx] = val;
@@ -4275,25 +5081,9 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
         idx++;
     }
     assert(idx == nfargs);
-    Value *TheCallee = returninfo.decl.getCallee();
-    if (fromexternal) {
-        std::string namep("p");
-        namep += cast<Function>(returninfo.decl.getCallee())->getName();
-        GlobalVariable *GV = cast_or_null<GlobalVariable>(jl_Module->getNamedValue(namep));
-        if (GV == nullptr) {
-            GV = new GlobalVariable(*jl_Module, TheCallee->getType(), false,
-                                    GlobalVariable::ExternalLinkage,
-                                    Constant::getNullValue(TheCallee->getType()),
-                                    namep);
-            ctx.external_calls[std::make_tuple(fromexternal, true)] = GV;
-        }
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-        TheCallee = ai.decorateInst(ctx.builder.CreateAlignedLoad(TheCallee->getType(), GV, Align(sizeof(void*))));
-        setName(ctx.emission_context, TheCallee, namep);
-    }
-    CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals);
+    CallInst *call = ctx.builder.CreateCall(returninfo.decl, argvals);
     call->setAttributes(returninfo.attrs);
-    if (gcstack_arg)
+    if (gcstack_arg && ctx.emission_context.use_swiftcc)
         call->setCallingConv(CallingConv::Swift);
 
     jl_cgval_t retval;
@@ -4306,16 +5096,16 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
             break;
         case jl_returninfo_t::SRet:
             assert(result);
-            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack);
+            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_gcframe, load_gc_roots(ctx, return_roots, returninfo.return_roots, ctx.tbaa().tbaa_gcframe));
             break;
         case jl_returninfo_t::Union: {
             Value *box = ctx.builder.CreateExtractValue(call, 0);
             Value *tindex = ctx.builder.CreateExtractValue(call, 1);
             Value *derived = ctx.builder.CreateSelect(
                 ctx.builder.CreateICmpEQ(
-                        ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                        ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
-                decay_derived(ctx, ctx.builder.CreateBitCast(argvals[0], ctx.types().T_pjlvalue)),
+                decay_derived(ctx, result),
                 decay_derived(ctx, box)
             );
             retval = mark_julia_slot(derived,
@@ -4329,155 +5119,219 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
             retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack);
             break;
     }
+    return retval;
+}
+
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *nreturn_roots, jl_value_t *inferred_retty)
+{
+    ++EmittedSpecfunCalls;
+    // emit specialized call site
+    jl_returninfo_t returninfo = get_specsig_function(ctx.emission_context, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure);
+    *cc = returninfo.cc;
+    *nreturn_roots = returninfo.return_roots;
+    if (fromexternal) {
+        std::string namep("p");
+        Value *TheCallee = returninfo.decl.getCallee();
+        namep += cast<Function>(TheCallee)->getName();
+        GlobalVariable *GV = cast_or_null<GlobalVariable>(jl_Module->getNamedValue(namep));
+        if (GV == nullptr) {
+            GV = new GlobalVariable(*jl_Module, TheCallee->getType(), false, GlobalVariable::ExternalLinkage, nullptr, namep);
+            ctx.emission_context.external_fns[std::make_tuple(fromexternal, true)] = GV;
+        }
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+        TheCallee = ai.decorateInst(ctx.builder.CreateAlignedLoad(TheCallee->getType(), GV, Align(sizeof(void*))));
+        setName(ctx.emission_context, TheCallee, namep);
+        returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), TheCallee);
+    }
+    jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, specTypes, jlretty, returninfo, argv, nargs);
     // see if inference has a different / better type for the call than the lambda
     return update_julia_type(ctx, retval, inferred_retty);
 }
 
 static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
-                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
 {
     bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
     return emit_call_specfun_other(ctx, is_opaque_closure, mi->specTypes, jlretty, NULL,
         specFunctionObject, fromexternal, argv, nargs, cc, return_roots, inferred_retty);
 }
 
+static jl_value_t *get_ci_abi(jl_code_instance_t *ci)
+{
+    if (jl_typeof(ci->def) == (jl_value_t*)jl_abioverride_type)
+        return ((jl_abi_override_t*)ci->def)->abi;
+    return jl_get_ci_mi(ci)->specTypes;
+}
+
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_code_instance_t *ci, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
+    ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+{
+    jl_method_instance_t *mi = jl_get_ci_mi(ci);
+    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+    return emit_call_specfun_other(ctx, is_opaque_closure, get_ci_abi(ci), ci->rettype, NULL,
+        specFunctionObject, fromexternal, argv, nargs, cc, return_roots, inferred_retty);
+}
+
 static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
-                                          const jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_value_t *inferred_retty)
 {
     Value *theFptr;
     if (fromexternal) {
         std::string namep("p");
         namep += specFunctionObject;
         GlobalVariable *GV = cast_or_null<GlobalVariable>(jl_Module->getNamedValue(namep));
-        Type *pfunc = ctx.types().T_jlfunc->getPointerTo();
+        Type *pfunc = PointerType::getUnqual(ctx.builder.getContext());
         if (GV == nullptr) {
-            GV = new GlobalVariable(*jl_Module, pfunc, false,
-                                    GlobalVariable::ExternalLinkage,
-                                    Constant::getNullValue(pfunc),
-                                    namep);
-            ctx.external_calls[std::make_tuple(fromexternal, false)] = GV;
+            GV = new GlobalVariable(*jl_Module, pfunc, false, GlobalVariable::ExternalLinkage, nullptr, namep);
+            ctx.emission_context.external_fns[std::make_tuple(fromexternal, false)] = GV;
         }
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
         theFptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(pfunc, GV, Align(sizeof(void*))));
-        setName(ctx.emission_context, theFptr, namep);
+        setName(ctx.emission_context, theFptr, specFunctionObject);
     }
     else {
         theFptr = jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee();
         addRetAttr(cast<Function>(theFptr), Attribute::NonNull);
     }
-    Value *ret = emit_jlcall(ctx, FunctionCallee(ctx.types().T_jlfunc, theFptr), nullptr, argv, nargs, julia_call);
+    Value *ret = emit_jlcall(ctx, theFptr, nullptr, argv, nargs, julia_call);
     return update_julia_type(ctx, mark_julia_type(ctx, ret, true, jlretty), inferred_retty);
 }
 
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     size_t arglen = jl_array_dim0(ex->args);
     size_t nargs = arglen - 1;
     assert(arglen >= 2);
 
     jl_cgval_t lival = emit_expr(ctx, args[0]);
-    SmallVector<jl_cgval_t> argv(nargs);
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i + 1]);
         if (argv[i].typ == jl_bottom_type)
             return jl_cgval_t();
     }
-    return emit_invoke(ctx, lival, argv.data(), nargs, rt);
+    return emit_invoke(ctx, lival, argv, nargs, rt, false);
 }
 
-static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt)
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayRef<jl_cgval_t> argv, size_t nargs, jl_value_t *rt, bool always_inline)
 {
     ++EmittedInvokes;
     bool handled = false;
     jl_cgval_t result;
     if (lival.constant) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)lival.constant;
+        jl_method_instance_t *mi;
+        jl_value_t *ci = nullptr;
+        if (jl_is_method_instance(lival.constant)) {
+            mi = (jl_method_instance_t*)lival.constant;
+        }
+        else if (jl_is_code_instance(lival.constant)) {
+            ci = lival.constant;
+            mi = jl_get_ci_mi((jl_code_instance_t*)ci);
+        } else {
+            emit_error(ctx, "(Internal ERROR - IR Validity): Invoke target is not a method instance or code instance");
+            return jl_cgval_t();
+        }
         assert(jl_is_method_instance(mi));
         if (mi == ctx.linfo) {
-            // handle self-recursion specially
-            jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
-            FunctionType *ft = ctx.f->getFunctionType();
-            StringRef protoname = ctx.f->getName();
+            // handle self-recursion specially (TODO: assuming ci is a valid invoke for mi?)
+            Function *f = ctx.f;
+            FunctionType *ft = f->getFunctionType();
             if (ft == ctx.types().T_jlfunc) {
-                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, nullptr, argv, nargs, rt);
-                handled = true;
+                Value *ret = emit_jlcall(ctx, f, nullptr, argv, nargs, julia_call);
+                result = update_julia_type(ctx, mark_julia_type(ctx, ret, true, ctx.rettype), rt);
+            }
+            else if (ft == ctx.types().T_jlfuncparams) {
+                Value *ret = emit_jlcall(ctx, f, ctx.spvals_ptr, argv, nargs, julia_call2);
+                result = update_julia_type(ctx, mark_julia_type(ctx, ret, true, ctx.rettype), rt);
             }
-            else if (ft != ctx.types().T_jlfuncparams) {
+            else {
                 unsigned return_roots = 0;
+                jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
+                StringRef protoname = f->getName();
                 result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, nullptr, argv, nargs, &cc, &return_roots, rt);
-                handled = true;
             }
+            handled = true;
         }
         else {
-            jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
-            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-            if (ci != jl_nothing) {
+            if (ci) {
+                jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
                 auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
                  // check if we know how to handle this specptr
                 if (invoke == jl_fptr_const_return_addr) {
                     result = mark_julia_const(ctx, codeinst->rettype_const);
-                    handled = true;
                 }
-                else if (invoke != jl_fptr_sparam_addr) {
+                else {
                     bool specsig, needsparams;
-                    std::tie(specsig, needsparams) = uses_specsig(mi, codeinst->rettype, ctx.params->prefer_specsig);
-                    std::string name;
-                    StringRef protoname;
-                    bool need_to_emit = true;
-                    bool cache_valid = ctx.use_cache || ctx.external_linkage;
-                    bool external = false;
-
-                    // Check if we already queued this up
-                    auto it = ctx.call_targets.find(codeinst);
-                    if (need_to_emit && it != ctx.call_targets.end()) {
-                        protoname = std::get<2>(it->second)->getName();
-                        need_to_emit = cache_valid = false;
+                    std::tie(specsig, needsparams) = uses_specsig(get_ci_abi(codeinst), mi, codeinst->rettype, ctx.params->prefer_specsig);
+                    if (needsparams) {
+                        Value *r = emit_jlcall(ctx, jlinvoke_func, track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)mi)), argv, nargs, julia_call2);
+                        result = mark_julia_type(ctx, r, true, rt);
                     }
-
-                    // Check if it is already compiled (either JIT or externally)
-                    if (cache_valid) {
-                        // optimization: emit the correct name immediately, if we know it
-                        // TODO: use `emitted` map here too to try to consolidate names?
-                        // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-                        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-                        if (fptr) {
-                            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
-                                jl_cpu_pause();
-                            }
-                            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-                            if (specsig ? jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1 : invoke == jl_fptr_args_addr) {
-                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
+                    else {
+                        std::string name;
+                        StringRef protoname;
+                        bool need_to_emit = true;
+                        bool cache_valid = (ctx.use_cache || ctx.external_linkage);
+                        bool external = false;
+
+                        // Check if we already queued this up
+                        auto it = ctx.call_targets.find(codeinst);
+                        if (it != ctx.call_targets.end()) {
+                            assert(it->second.specsig == specsig);
+                            protoname = it->second.decl->getName();
+                            if (always_inline)
+                                it->second.private_linkage = true;
+                            else
+                                it->second.external_linkage = true;
+                        }
+                        // Check if it is already compiled (either JIT or externally), and if so, re-use that name if possible
+                        // This is just an optimization to emit the correct name immediately, if we know it, since the JIT and AOT code will be able to do this later also
+                        if (cache_valid) {
+                            // TODO: use `emitted` map here too to try to consolidate names?
+                            uint8_t specsigflags;
+                            jl_callptr_t invoke;
+                            void *fptr;
+                            jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
+                            if (specsig ? specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED : invoke == jl_fptr_args_addr) {
                                 if (ctx.external_linkage) {
                                     // TODO: Add !specsig support to aotcompile.cpp
                                     // Check that the codeinst is containing native code
-                                    if (specsig && jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b100) {
-                                        external = true;
+                                    if (specsig && (specsigflags & JL_CI_FLAGS_FROM_IMAGE)) {
+                                        external = !always_inline;
                                         need_to_emit = false;
                                     }
                                 }
                                 else { // ctx.use_cache
                                     need_to_emit = false;
                                 }
+                                if (!need_to_emit && protoname.empty())
+                                    protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
                             }
                         }
-                    }
-                    if (need_to_emit) {
-                        raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
-                        protoname = StringRef(name);
-                    }
-                    jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
-                    unsigned return_roots = 0;
-                    if (specsig)
-                        result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, &cc, &return_roots, rt);
-                    else
-                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, rt);
-                    handled = true;
-                    if (need_to_emit) {
-                        Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
-                        ctx.call_targets[codeinst] = std::make_tuple(cc, return_roots, trampoline_decl, specsig);
+                        if (it != ctx.call_targets.end())
+                            need_to_emit = false;
+                        else if (always_inline)
+                            need_to_emit = true;
+                        if (protoname.empty()) {
+                            raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+                            protoname = StringRef(name);
+                        }
+
+                        jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
+                        unsigned return_roots = 0;
+                        if (specsig)
+                            result = emit_call_specfun_other(ctx, codeinst, protoname, external ? codeinst : nullptr, argv, nargs, &cc, &return_roots, rt);
+                        else
+                            result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, rt);
+                        if (need_to_emit) {
+                            Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
+                            ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, nullptr, specsig, !always_inline, always_inline};
+                        }
                     }
                 }
+                handled = true;
             }
         }
     }
@@ -4485,47 +5339,68 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
         Value *r = emit_jlcall(ctx, jlinvoke_func, boxed(ctx, lival), argv, nargs, julia_call2);
         result = mark_julia_type(ctx, r, true, rt);
     }
-    if (result.typ == jl_bottom_type)
+    if (result.typ == jl_bottom_type) {
+#ifndef JL_NDEBUG
+        emit_error(ctx, "(INTERNAL ERROR - IR Validity): Returned from function we expected not to.");
+#endif
         CreateTrap(ctx.builder);
+    }
     return result;
 }
 
 static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
     ++EmittedInvokes;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     size_t arglen = jl_array_dim0(ex->args);
     size_t nargs = arglen - 1;
     assert(arglen >= 2);
     jl_cgval_t lival = emit_expr(ctx, args[0]);
-    SmallVector<jl_cgval_t> argv(nargs);
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i + 1]);
         if (argv[i].typ == jl_bottom_type)
             return jl_cgval_t();
     }
     const jl_cgval_t &f = argv[0];
-    jl_cgval_t ret;
-    if (f.constant && f.constant == jl_builtin_modifyfield) {
-        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv.data(), nargs - 1, &lival))
-            return ret;
-        auto it = builtin_func_map().find(jl_f_modifyfield_addr);
-        assert(it != builtin_func_map().end());
-        Value *oldnew = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, julia_call);
-        return mark_julia_type(ctx, oldnew, true, rt);
-    }
-    if (f.constant && jl_typetagis(f.constant, jl_intrinsic_type)) {
-        JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
-        if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
-            return emit_atomic_pointerop(ctx, fi, argv.data(), nargs - 1, &lival);
-    }
+    if (f.constant) {
+        jl_cgval_t ret;
+        auto it = builtin_func_map().end();
+        if (f.constant == BUILTIN(modifyfield)) {
+            if (emit_f_opfield(ctx, &ret, BUILTIN(modifyfield), argv, nargs - 1, &lival))
+                return ret;
+            it = builtin_func_map().find(f.constant);
+            assert(it != builtin_func_map().end());
+        }
+        else if (f.constant == BUILTIN(modifyglobal)) {
+            if (emit_f_opglobal(ctx, &ret, BUILTIN(modifyglobal), argv, nargs - 1, &lival))
+                return ret;
+            it = builtin_func_map().find(f.constant);
+            assert(it != builtin_func_map().end());
+        }
+        else if (f.constant == BUILTIN(memoryrefmodify)) {
+            if (emit_f_opmemory(ctx, &ret, BUILTIN(memoryrefmodify), argv, nargs - 1, &lival))
+                return ret;
+            it = builtin_func_map().find(f.constant);
+            assert(it != builtin_func_map().end());
+        }
+        else if (jl_is_intrinsic(f.constant)) {
+            JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
+            if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
+                return emit_atomic_pointerop(ctx, fi, ArrayRef<jl_cgval_t>(argv).drop_front(), nargs - 1, &lival);
+        }
 
+        if (it != builtin_func_map().end()) {
+            Value *oldnew = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), ArrayRef<jl_cgval_t>(argv).drop_front(), nargs - 1, julia_call);
+            return mark_julia_type(ctx, oldnew, true, rt);
+        }
+    }
     // emit function and arguments
-    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv.data(), nargs, julia_call);
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
 }
 
-static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, jl_value_t *sigtype, jl_cgval_t *argv, size_t nargs)
+static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, jl_value_t *sigtype, MutableArrayRef<jl_cgval_t> argv /*n.b. this mutation is unusual */, size_t nargs)
 {
     jl_datatype_t *oc_argt = (jl_datatype_t *)jl_tparam0(oc_type);
     jl_value_t *oc_rett = jl_tparam1(oc_type);
@@ -4537,6 +5412,8 @@ static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, j
             typ = jl_unwrap_vararg(typ);
         emit_typecheck(ctx, argv[i+1], typ, "typeassert");
         argv[i+1] = update_julia_type(ctx, argv[i+1], typ);
+        if (argv[i+1].typ == jl_bottom_type)
+            return jl_cgval_t();
     }
     jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
     unsigned return_roots = 0;
@@ -4545,6 +5422,7 @@ static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, j
     jl_cgval_t &theArg = argv[0];
     jl_cgval_t closure_specptr = emit_getfield_knownidx(ctx, theArg, 4, (jl_datatype_t*)oc_type, jl_memory_order_notatomic);
     Value *specptr = emit_unbox(ctx, ctx.types().T_size, closure_specptr, (jl_value_t*)jl_long_type);
+    specptr = emit_inttoptr(ctx, specptr, ctx.types().T_ptr);
     JL_GC_PUSH1(&sigtype);
     jl_cgval_t r = emit_call_specfun_other(ctx, true, sigtype, oc_rett, specptr, "", NULL, argv, nargs,
         &cc, &return_roots, oc_rett);
@@ -4555,25 +5433,23 @@ static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, j
 static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bool is_promotable)
 {
     ++EmittedCalls;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     size_t nargs = jl_array_dim0(ex->args);
     assert(nargs >= 1);
     jl_cgval_t f = emit_expr(ctx, args[0]);
+    if (f.typ == jl_bottom_type) {
+        return jl_cgval_t();
+    }
 
-    if (f.constant && jl_typetagis(f.constant, jl_intrinsic_type)) {
+    // a couple intrinsics (really just llvmcall, though partly cglobal too)
+    // have non-standard (aka invalid) evaluation semantics, so we must handle these first
+    if (f.constant && jl_is_intrinsic(f.constant)) {
         JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
         return emit_intrinsic(ctx, fi, args, nargs - 1);
     }
 
-    jl_value_t *context = ctx.params->generic_context == jl_nothing ? nullptr : ctx.params->generic_context;
-    size_t n_generic_args = nargs + (context ? 1 : 0);
-
-    SmallVector<jl_cgval_t> generic_argv(n_generic_args);
-    jl_cgval_t *argv = generic_argv.data();
-    if (context) {
-        generic_argv[0] = mark_julia_const(ctx, context);
-        argv = &generic_argv[1];
-    }
+    size_t n_generic_args = nargs;
+    SmallVector<jl_cgval_t, 0> argv(n_generic_args);
     argv[0] = f;
     for (size_t i = 1; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i]);
@@ -4581,19 +5457,20 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
             return jl_cgval_t(); // anything past here is unreachable
     }
 
-    if (f.constant && jl_isa(f.constant, (jl_value_t*)jl_builtin_type)) {
-        if (f.constant == jl_builtin_ifelse && nargs == 4)
-            return emit_ifelse(ctx, argv[1], argv[2], argv[3], rt);
+    if (f.typ == (jl_value_t*)jl_intrinsic_type) {
+        Value *ret = emit_jlcall(ctx, prepare_call(jlintrinsic_func), nullptr, argv, nargs, julia_call3);
+        setName(ctx.emission_context, ret, "Builtin_ret");
+        return mark_julia_type(ctx, ret, true, rt);
+    }
+    else if (f.constant && jl_isa(f.constant, (jl_value_t*)jl_builtin_type)) {
         jl_cgval_t result;
         bool handled = emit_builtin_call(ctx, &result, f.constant, argv, nargs - 1, rt, ex, is_promotable);
-        if (handled) {
+        if (handled)
             return result;
-        }
-
-        // special case for known builtin not handled by emit_builtin_call
-        auto it = builtin_func_map().find(jl_get_builtin_fptr(f.constant));
+        auto it = builtin_func_map().find(f.constant);
         if (it != builtin_func_map().end()) {
-            Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, julia_call);
+            Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), ArrayRef<jl_cgval_t>(argv).drop_front(), nargs - 1, julia_call);
+            setName(ctx.emission_context, ret, it->second->name + "_ret");
             return mark_julia_type(ctx, ret, true, rt);
         }
     }
@@ -4604,37 +5481,23 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
         jl_value_t *oc_rett = jl_tparam1(f.typ);
         if (jl_is_datatype(oc_argt) && jl_tupletype_length_compat(oc_argt, nargs-1)) {
             jl_value_t *sigtype = jl_argtype_with_function_type((jl_value_t*)f.typ, (jl_value_t*)oc_argt);
-            if (uses_specsig(sigtype, false, true, oc_rett, true)) {
+            if (uses_specsig(sigtype, false, oc_rett, true)) {
                 JL_GC_PUSH1(&sigtype);
                 jl_cgval_t r = emit_specsig_oc_call(ctx, f.typ, sigtype, argv, nargs);
                 JL_GC_POP();
                 return r;
             }
+            // TODO: else emit_oc_call
         }
     }
-
     // emit function and arguments
-    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, generic_argv.data(), n_generic_args, julia_call);
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, n_generic_args, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
 }
 
 // --- accessing and assigning variables ---
 
-static void undef_var_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *name)
-{
-    ++EmittedUndefVarErrors;
-    BasicBlock *err = BasicBlock::Create(ctx.builder.getContext(), "err", ctx.f);
-    BasicBlock *ifok = BasicBlock::Create(ctx.builder.getContext(), "ok");
-    ctx.builder.CreateCondBr(ok, ifok, err);
-    ctx.builder.SetInsertPoint(err);
-    ctx.builder.CreateCall(prepare_call(jlundefvarerror_func),
-        mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)name)));
-    ctx.builder.CreateUnreachable();
-    ctx.f->getBasicBlockList().push_back(ifok);
-    ctx.builder.SetInsertPoint(ifok);
-}
-
-static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *type, jl_cgval_t name)
+static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_datatype_t *type, jl_cgval_t name)
 {
     ++EmittedUndefVarErrors;
     assert(name.typ == (jl_value_t*)jl_symbol_type);
@@ -4646,77 +5509,11 @@ static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *
                           {mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)type)),
                            mark_callee_rooted(ctx, boxed(ctx, name))});
     ctx.builder.CreateUnreachable();
-    ctx.f->getBasicBlockList().push_back(ifok);
+    ifok->insertInto(ctx.f);
     ctx.builder.SetInsertPoint(ifok);
 }
 
-// returns a jl_ppvalue_t location for the global variable m.s
-// if the reference currently bound or assign == true,
-//   pbnd will also be assigned with the binding address
-static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
-                                     jl_binding_t **pbnd, bool assign)
-{
-    jl_binding_t *b = jl_get_module_binding(m, s, 1);
-    if (assign) {
-        if (jl_atomic_load_relaxed(&b->owner) == NULL)
-            // not yet declared
-            b = NULL;
-    }
-    else {
-        b = jl_atomic_load_relaxed(&b->owner);
-        if (b == NULL)
-            // try to look this up now
-            b = jl_get_binding(m, s);
-    }
-    if (b == NULL) {
-        // var not found. switch to delayed lookup.
-        Constant *initnul = Constant::getNullValue(ctx.types().T_pjlvalue);
-        GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), ctx.types().T_pjlvalue,
-                false, GlobalVariable::PrivateLinkage, initnul);
-        LoadInst *cachedval = ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, bindinggv, Align(sizeof(void*)));
-        setName(ctx.emission_context, cachedval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".cached");
-        cachedval->setOrdering(AtomicOrdering::Unordered);
-        BasicBlock *have_val = BasicBlock::Create(ctx.builder.getContext(), "found");
-        BasicBlock *not_found = BasicBlock::Create(ctx.builder.getContext(), "notfound");
-        BasicBlock *currentbb = ctx.builder.GetInsertBlock();
-        auto iscached = ctx.builder.CreateICmpNE(cachedval, initnul);
-        setName(ctx.emission_context, iscached, "iscached");
-        ctx.builder.CreateCondBr(iscached, have_val, not_found);
-        ctx.f->getBasicBlockList().push_back(not_found);
-        ctx.builder.SetInsertPoint(not_found);
-        Value *bval = ctx.builder.CreateCall(prepare_call(assign ? jlgetbindingwrorerror_func : jlgetbindingorerror_func),
-                { literal_pointer_val(ctx, (jl_value_t*)m),
-                  literal_pointer_val(ctx, (jl_value_t*)s) });
-        setName(ctx.emission_context, bval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".found");
-        ctx.builder.CreateAlignedStore(bval, bindinggv, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
-        ctx.builder.CreateBr(have_val);
-        ctx.f->getBasicBlockList().push_back(have_val);
-        ctx.builder.SetInsertPoint(have_val);
-        PHINode *p = ctx.builder.CreatePHI(ctx.types().T_pjlvalue, 2);
-        p->addIncoming(cachedval, currentbb);
-        p->addIncoming(bval, not_found);
-        setName(ctx.emission_context, p, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s));
-        return p;
-    }
-    if (assign) {
-        if (jl_atomic_load_relaxed(&b->owner) != b) {
-            // this will fail at runtime, so defer to the runtime to create the error
-            ctx.builder.CreateCall(prepare_call(jlgetbindingwrorerror_func),
-                    { literal_pointer_val(ctx, (jl_value_t*)m),
-                      literal_pointer_val(ctx, (jl_value_t*)s) });
-            CreateTrap(ctx.builder);
-            return NULL;
-        }
-    }
-    else {
-        if (b->deprecated)
-            cg_bdw(ctx, s, b);
-    }
-    *pbnd = b;
-    return julia_binding_gv(ctx, b);
-}
-
-static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa)
+static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, jl_value_t *scope, bool isvol, MDNode *tbaa)
 {
     LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
     setName(ctx.emission_context, v, jl_symbol_name(name) + StringRef(".checked"));
@@ -4727,7 +5524,7 @@ static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name,
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.decorateInst(v);
     }
-    undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name);
+    undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name, scope);
     return mark_julia_type(ctx, v, true, jl_any_type);
 }
 
@@ -4739,11 +5536,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
             return mark_julia_const(ctx, e);
         }
     }
-    assert(ctx.spvals_ptr != NULL);
-    Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-            ctx.types().T_prjlvalue,
-            ctx.spvals_ptr,
-            i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
+    Value *bp = emit_ptrgep(ctx, maybe_decay_tracked(ctx, ctx.spvals_ptr), i * sizeof(jl_value_t*) + sizeof(jl_svec_t));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
     setName(ctx.emission_context, sp, "sparam");
@@ -4753,11 +5546,11 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
         sparam = (jl_unionall_t*)sparam->body;
         assert(jl_is_unionall(sparam));
     }
-    undef_var_error_ifnot(ctx, isnull, sparam->var->name);
+    undef_var_error_ifnot(ctx, isnull, sparam->var->name, (jl_value_t*)jl_static_parameter_sym);
     return mark_julia_type(ctx, sp, true, jl_any_type);
 }
 
-static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
+static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym, int allow_import)
 {
     Value *isnull = NULL;
     if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
@@ -4774,10 +5567,10 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
             Value *box_isnull = ctx.builder.CreateICmpNE(boxed, Constant::getNullValue(ctx.types().T_prjlvalue));
             if (vi.pTIndex) {
                 // value is either boxed in the stack slot, or unboxed in value
-                // as indicated by testing (pTIndex & 0x80)
+                // as indicated by testing (pTIndex & UNION_BOX_MARKER)
                 Value *tindex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(sizeof(void*)), vi.isVolatile);
                 Value *load_unbox = ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                             ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
             }
@@ -4795,87 +5588,65 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
                 return mark_julia_const(ctx, jl_true);
             }
         }
-        assert(ctx.spvals_ptr != NULL);
-        Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-                ctx.types().T_prjlvalue,
-                ctx.spvals_ptr,
-                i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
+        Value *bp = emit_ptrgep(ctx, maybe_decay_tracked(ctx, ctx.spvals_ptr), i * sizeof(jl_value_t*) + sizeof(jl_svec_t));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
         Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
         isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type));
     }
     else {
-        jl_module_t *modu;
-        jl_sym_t *name;
-        if (jl_is_globalref(sym)) {
-            modu = jl_globalref_mod(sym);
-            name = jl_globalref_name(sym);
-        }
-        else {
-            assert(jl_is_symbol(sym) && "malformed isdefined expression");
-            modu = ctx.module;
-            name = (jl_sym_t*)sym;
-        }
-        jl_binding_t *bnd = jl_get_binding(modu, name);
-        if (bnd) {
-            if (jl_atomic_load_relaxed(&bnd->value) != NULL)
-                return mark_julia_const(ctx, jl_true);
-            Value *bp = julia_binding_gv(ctx, bnd);
-            bp = julia_binding_pvalue(ctx, bp);
-            LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
-            ai.decorateInst(v);
-            v->setOrdering(AtomicOrdering::Unordered);
-            isnull = ctx.builder.CreateICmpNE(v, Constant::getNullValue(ctx.types().T_prjlvalue));
-        }
-        else {
-            Value *v = ctx.builder.CreateCall(prepare_call(jlboundp_func), {
-                    literal_pointer_val(ctx, (jl_value_t*)modu),
-                    literal_pointer_val(ctx, (jl_value_t*)name)
-                });
-            isnull = ctx.builder.CreateICmpNE(v, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
-        }
+        assert(false && "malformed expression");
     }
     return mark_julia_type(ctx, isnull, false, jl_bool_type);
 }
 
 static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *varname) {
-    jl_value_t *typ = vi.value.typ;
     jl_cgval_t v;
     Value *isnull = NULL;
     if (vi.boxroot == NULL || vi.pTIndex != NULL) {
-        if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !vi.value.V) {
+        if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !(vi.value.V || vi.inline_roots)) {
             v = vi.value;
             if (vi.pTIndex)
                 v.TIndex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1));
         }
         else {
             // copy value to a non-mutable (non-volatile SSA) location
-            AllocaInst *varslot = cast<AllocaInst>(vi.value.V);
-            setName(ctx.emission_context, varslot, jl_symbol_name(varname));
-            Type *T = varslot->getAllocatedType();
-            assert(!varslot->isArrayAllocation() && "variables not expected to be VLA");
-            AllocaInst *ssaslot = cast<AllocaInst>(varslot->clone());
-            setName(ctx.emission_context, ssaslot, jl_symbol_name(varname) + StringRef(".ssa"));
-            ssaslot->insertAfter(varslot);
-            if (vi.isVolatile) {
-                Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot,
-                        varslot->getAlign(),
-                        true);
-                ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign());
-            }
-            else {
-                const DataLayout &DL = jl_Module->getDataLayout();
-                uint64_t sz = DL.getTypeStoreSize(T);
-                emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign().value());
+            // since this might be a union slot, the most convenient approach to copying
+            // is to move the whole alloca chunk
+            AllocaInst *ssaslot = nullptr;
+            if (vi.value.V) {
+                auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                AllocaInst *varslot = cast<AllocaInst>(vi.value.V);
+                Type *T = varslot->getAllocatedType();
+                assert(!varslot->isArrayAllocation() && "variables not expected to be VLA");
+                ssaslot = cast<AllocaInst>(varslot->clone());
+                setName(ctx.emission_context, ssaslot, varslot->getName() + StringRef(".ssa"));
+                ssaslot->insertAfter(varslot);
+                if (vi.isVolatile) {
+                    Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot, varslot->getAlign(), true);
+                    stack_ai.decorateInst(ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign()));
+                }
+                else {
+                    const DataLayout &DL = jl_Module->getDataLayout();
+                    uint64_t sz = DL.getTypeStoreSize(T);
+                    emit_memcpy(ctx, ssaslot, stack_ai, vi.value, sz, ssaslot->getAlign(), varslot->getAlign());
+                }
             }
             Value *tindex = NULL;
             if (vi.pTIndex)
                 tindex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1), vi.isVolatile);
-            v = mark_julia_slot(ssaslot, vi.value.typ, tindex, ctx.tbaa().tbaa_stack);
+            v = mark_julia_slot(ssaslot, vi.value.typ, tindex, ctx.tbaa().tbaa_stack, None);
+        }
+        if (vi.inline_roots) {
+            AllocaInst *varslot = vi.inline_roots;
+            size_t nroots = cast<ConstantInt>(varslot->getArraySize())->getZExtValue();
+            auto T_prjlvalue = varslot->getAllocatedType();
+            if (auto AT = dyn_cast<ArrayType>(T_prjlvalue)) {
+                nroots *= AT->getNumElements();
+                T_prjlvalue = AT->getElementType();
+            }
+            assert(T_prjlvalue == ctx.types().T_prjlvalue);
+            v.inline_roots = load_gc_roots(ctx, varslot, nroots, ctx.tbaa().tbaa_gcframe, vi.isVolatile);
         }
-        if (vi.boxroot == NULL)
-            v = update_julia_type(ctx, v, typ);
         if (vi.usedUndef) {
             assert(vi.defFlag);
             isnull = ctx.builder.CreateAlignedLoad(getInt1Ty(ctx.builder.getContext()), vi.defFlag, Align(1), vi.isVolatile);
@@ -4886,32 +5657,30 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
         Value *box_isnull = NULL;
         if (vi.usedUndef)
             box_isnull = ctx.builder.CreateICmpNE(boxed, Constant::getNullValue(ctx.types().T_prjlvalue));
-        maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, typ);
+        maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, vi.value.typ);
         if (vi.pTIndex) {
             // value is either boxed in the stack slot, or unboxed in value
-            // as indicated by testing (pTIndex & 0x80)
+            // as indicated by testing (pTIndex & UNION_BOX_MARKER)
             Value *load_unbox = ctx.builder.CreateICmpEQ(
-                        ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                        ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             if (vi.usedUndef)
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
-            if (v.V) { // v.V will be null if it is a union of all ghost values
-                v.V = ctx.builder.CreateSelect(load_unbox, emit_bitcast(ctx,
-                    decay_derived(ctx, v.V), boxed->getType()), decay_derived(ctx, boxed));
-            } else
+            if (v.V) // v.V will be null if it is a union of all ghost values
+                v.V = ctx.builder.CreateSelect(load_unbox, decay_derived(ctx, v.V), decay_derived(ctx, boxed));
+            else
                 v.V = boxed;
             v.Vboxed = boxed;
-            v = update_julia_type(ctx, v, typ);
         }
         else {
-            v = mark_julia_type(ctx, boxed, true, typ);
+            v = mark_julia_type(ctx, boxed, true, vi.value.typ);
             if (vi.usedUndef)
                 isnull = box_isnull;
         }
     }
     if (isnull) {
         setName(ctx.emission_context, isnull, jl_symbol_name(varname) + StringRef("_is_null"));
-        undef_var_error_ifnot(ctx, isnull, varname);
+        undef_var_error_ifnot(ctx, isnull, varname, (jl_value_t*)jl_local_sym);
     }
     return v;
 }
@@ -4921,6 +5690,12 @@ static jl_cgval_t emit_local(jl_codectx_t &ctx, jl_value_t *slotload)
     size_t sl = jl_slot_number(slotload) - 1;
     jl_varinfo_t &vi = ctx.slots[sl];
     jl_sym_t *sym = slot_symbol(ctx, sl);
+    if (sym == jl_unused_sym) {
+        // This shouldn't happen in well-formed input, but let's be robust,
+        // since we otherwise cause undefined behavior here.
+        emit_error(ctx, "(INTERNAL ERROR - IR Validity): Tried to use `#undef#` argument.");
+        return jl_cgval_t();
+    }
     return emit_varinfo(ctx, vi, sym);
 }
 
@@ -4930,51 +5705,27 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
         store_def_flag(ctx, vi, true);
 
     if (!vi.value.constant) { // check that this is not a virtual store
-        assert(vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL));
+        assert(vi.inline_roots || vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL));
         // store value
-        if (vi.value.V == NULL) {
-            // all ghost values in destination - nothing to copy or store
-        }
-        else if (rval_info.constant || !rval_info.ispointer()) {
-            if (rval_info.isghost) {
-                // all ghost values in source - nothing to copy or store
-            }
-            else {
-                if (rval_info.typ != vi.value.typ && !vi.pTIndex && !rval_info.TIndex) {
-                    // isbits cast-on-assignment is invalid. this branch should be dead-code.
-                    CreateTrap(ctx.builder);
-                }
-                else {
-                    Value *dest = vi.value.V;
-                    if (vi.pTIndex)
-                        ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
-                    Type *store_ty = julia_type_to_llvm(ctx, rval_info.constant ? jl_typeof(rval_info.constant) : rval_info.typ);
-                    Type *dest_ty = store_ty->getPointerTo();
-                    if (dest_ty != dest->getType())
-                        dest = emit_bitcast(ctx, dest, dest_ty);
-                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
-                    ai.decorateInst(ctx.builder.CreateStore(
-                                      emit_unbox(ctx, store_ty, rval_info, rval_info.typ),
-                                      dest,
-                                      vi.isVolatile));
-                }
-            }
-        }
-        else {
-            if (vi.pTIndex == NULL) {
-                assert(jl_is_concrete_type(vi.value.typ));
-                // Sometimes we can get into situations where the LHS and RHS
-                // are the same slot. We're not allowed to memcpy in that case
-                // due to LLVM bugs.
-                // This check should probably mostly catch the relevant situations.
-                if (vi.value.V != rval_info.V) {
-                    Value *copy_bytes = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(vi.value.typ));
-                    emit_memcpy(ctx, vi.value.V, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), rval_info, copy_bytes,
-                                julia_alignment(rval_info.typ), vi.isVolatile);
-                }
-            }
+        rval_info = update_julia_type(ctx, rval_info, vi.value.typ);
+        if (rval_info.typ == jl_bottom_type)
+            return;
+        if (vi.pTIndex && vi.value.V) // TODO: use lifetime-end here instead
+            ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
+        // Sometimes we can get into situations where the LHS and RHS
+        // are the same slot. We're not allowed to memcpy in that case
+        // due to LLVM bugs.
+        // This check should probably mostly catch the relevant situations.
+        if (vi.value.V != nullptr ? vi.value.V != rval_info.V : vi.inline_roots != nullptr) {
+            MDNode *tbaa = ctx.tbaa().tbaa_stack; // Use vi.value.tbaa ?
+            if (rval_info.TIndex)
+                emit_unionmove(ctx, vi.value.V, tbaa, rval_info, /*skip*/isboxed, vi.isVolatile);
             else {
-                emit_unionmove(ctx, vi.value.V, ctx.tbaa().tbaa_stack, rval_info, /*skip*/isboxed, vi.isVolatile);
+                Align align(julia_alignment(rval_info.typ));
+                if (vi.inline_roots)
+                    split_value_into(ctx, rval_info, align, vi.value.V, align, jl_aliasinfo_t::fromTBAA(ctx, tbaa), vi.inline_roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe), vi.isVolatile);
+                else
+                    emit_unbox_store(ctx, rval_info, vi.value.V, tbaa, align, align, vi.isVolatile);
             }
         }
     }
@@ -4989,7 +5740,8 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
     jl_value_t *phiType = NULL;
     if (jl_is_array(ssavalue_types)) {
         phiType = jl_array_ptr_ref(ssavalue_types, idx);
-    } else {
+    }
+    else {
         phiType = (jl_value_t*)jl_any_type;
     }
     jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
@@ -4999,6 +5751,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         return;
     }
     AllocaInst *dest = nullptr;
+    SmallVector<PHINode*,0> roots;
     // N.B.: For any memory space, used as a phi,
     // we need to emit space twice here. The reason for this is that
     // phi nodes may be arguments of other phi nodes, so if we don't
@@ -5009,34 +5762,34 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         size_t min_align, nbytes;
         dest = try_emit_union_alloca(ctx, ((jl_uniontype_t*)phiType), allunbox, min_align, nbytes);
         if (dest) {
-            Instruction *phi = dest->clone();
+            AllocaInst *phi = cast<AllocaInst>(dest->clone());
             phi->insertAfter(dest);
-            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_len(edges), "tindex_phi");
-            BB->getInstList().insert(InsertPt, Tindex_phi);
-            PHINode *ptr_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_len(edges), "ptr_phi");
-            BB->getInstList().insert(InsertPt, ptr_phi);
+            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi");
+            Tindex_phi->insertInto(BB, InsertPt);
+            PHINode *ptr_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_nrows(edges), "ptr_phi");
+            ptr_phi->insertInto(BB, InsertPt);
             Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
-            ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, MaybeAlign(0), nbytes, false);
+            ctx.builder.CreateMemCpy(phi, Align(min_align), dest, dest->getAlign(), nbytes, false);
             ctx.builder.CreateLifetimeEnd(dest);
             Value *ptr = ctx.builder.CreateSelect(isboxed,
-                maybe_bitcast(ctx, decay_derived(ctx, ptr_phi), getInt8PtrTy(ctx.builder.getContext())),
-                maybe_bitcast(ctx, decay_derived(ctx, phi), getInt8PtrTy(ctx.builder.getContext())));
-            jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, ctx.tbaa().tbaa_stack); // XXX: this TBAA is wrong for ptr_phi
+                decay_derived(ctx, ptr_phi),
+                decay_derived(ctx, phi));
+            jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, best_tbaa(ctx.tbaa(), phiType));
             val.Vboxed = ptr_phi;
-            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, r));
-            ctx.SAvalues.at(idx) = val;
-            ctx.ssavalue_assigned.at(idx) = true;
+            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, roots, r));
+            ctx.SAvalues[idx] = val;
+            ctx.ssavalue_assigned[idx] = true;
             return;
         }
         else if (allunbox) {
-            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_len(edges), "tindex_phi");
-            BB->getInstList().insert(InsertPt, Tindex_phi);
+            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi");
+            Tindex_phi->insertInto(BB, InsertPt);
             jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, ctx.tbaa().tbaa_stack);
-            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)NULL, r));
-            ctx.SAvalues.at(idx) = val;
-            ctx.ssavalue_assigned.at(idx) = true;
+            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)nullptr, roots, r));
+            ctx.SAvalues[idx] = val;
+            ctx.ssavalue_assigned[idx] = true;
             return;
         }
     }
@@ -5045,39 +5798,58 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
     // The frontend should really not emit this, but we allow it
     // for convenience.
     if (type_is_ghost(vtype)) {
-        assert(jl_is_datatype(phiType) && ((jl_datatype_t*)phiType)->instance);
+        assert(jl_is_datatype(phiType) && jl_is_datatype_singleton((jl_datatype_t*)phiType));
         // Skip adding it to the PhiNodes list, since we didn't create one.
-        ctx.SAvalues.at(idx) = mark_julia_const(ctx, ((jl_datatype_t*)phiType)->instance);
-        ctx.ssavalue_assigned.at(idx) = true;
+        ctx.SAvalues[idx] = mark_julia_const(ctx, ((jl_datatype_t*)phiType)->instance);
+        ctx.ssavalue_assigned[idx] = true;
         return;
     }
     jl_cgval_t slot;
     PHINode *value_phi = NULL;
-    if (vtype->isAggregateType() && CountTrackedPointers(vtype).count == 0) {
+    if (!isboxed && vtype->isAggregateType()) {
         // the value will be moved into dest in the predecessor critical block.
         // here it's moved into phi in the successor (from dest)
-        dest = emit_static_alloca(ctx, vtype);
-        Value *phi = emit_static_alloca(ctx, vtype);
-        ctx.builder.CreateMemCpy(phi, MaybeAlign(julia_alignment(phiType)),
-             dest, MaybeAlign(0),
-             jl_datatype_size(phiType), false);
-        ctx.builder.CreateLifetimeEnd(dest);
-        slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack);
+        auto tracked = CountTrackedPointers(vtype);
+        if (tracked.count) {
+            roots.resize(tracked.count);
+            assert(tracked.count == split_value_size((jl_datatype_t*)phiType).second);
+            for (size_t nr = 0; nr < tracked.count; nr++) {
+                auto root_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_nrows(edges), "root_phi");
+                root_phi->insertInto(BB, InsertPt);
+                roots[nr] = root_phi;
+            }
+        }
+        AllocaInst *phi = nullptr;
+        if (!tracked.all) {
+            Align align(julia_alignment(phiType));
+            unsigned nb = jl_datatype_size(phiType);
+            dest = emit_static_alloca(ctx, nb, align);
+            phi = cast<AllocaInst>(dest->clone());
+#if JL_LLVM_VERSION >= 200000
+            phi->insertBefore(dest->getIterator());
+#else
+            phi->insertBefore(dest);
+#endif
+            ctx.builder.CreateMemCpy(phi, align, dest, align, nb, false);
+            ctx.builder.CreateLifetimeEnd(dest);
+        }
+        slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack,
+                roots.empty() ? ArrayRef<Value*>() : ArrayRef((Value *const *)&roots.front(), roots.size()));
     }
     else {
-        value_phi = PHINode::Create(vtype, jl_array_len(edges), "value_phi");
-        BB->getInstList().insert(InsertPt, value_phi);
+        value_phi = PHINode::Create(vtype, jl_array_nrows(edges), "value_phi");
+        value_phi->insertInto(BB, InsertPt);
         slot = mark_julia_type(ctx, value_phi, isboxed, phiType);
     }
-    ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, r));
-    ctx.SAvalues.at(idx) = slot;
-    ctx.ssavalue_assigned.at(idx) = true;
+    ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, roots, r));
+    ctx.SAvalues[idx] = slot;
+    ctx.ssavalue_assigned[idx] = true;
     return;
 }
 
 static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_value_t *r)
 {
-    assert(!ctx.ssavalue_assigned.at(ssaidx_0based));
+    assert(!ctx.ssavalue_assigned[ssaidx_0based]);
     if (jl_is_phinode(r)) {
         return emit_phinode_assign(ctx, ssaidx_0based, r);
     }
@@ -5089,8 +5861,9 @@ static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_valu
             it = ctx.phic_slots.emplace(ssaidx_0based, jl_varinfo_t(ctx.builder.getContext())).first;
         }
         slot = emit_varinfo(ctx, it->second, jl_symbol("phic"));
-    } else {
-        slot = emit_expr(ctx, r, ssaidx_0based); // slot could be a jl_value_t (unboxed) or jl_value_t* (ispointer)
+    }
+    else {
+        slot = emit_expr(ctx, r, ssaidx_0based);
     }
     if (slot.isboxed || slot.TIndex) {
         // see if inference suggested a different type for the ssavalue than the expression
@@ -5103,18 +5876,27 @@ static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_valu
             }
         }
     }
-    ctx.SAvalues.at(ssaidx_0based) = slot; // now SAvalues[ssaidx_0based] contains the SAvalue
-    ctx.ssavalue_assigned.at(ssaidx_0based) = true;
+    ctx.SAvalues[ssaidx_0based] = slot; // now SAvalues[ssaidx_0based] contains the SAvalue
+    ctx.ssavalue_assigned[ssaidx_0based] = true;
 }
 
-static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t rval_info, jl_value_t *l=NULL)
+static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t rval_info, jl_value_t *l=NULL, bool allow_mismatch=false)
 {
     if (!vi.used || vi.value.typ == jl_bottom_type)
         return;
 
     // convert rval-type to lval-type
     jl_value_t *slot_type = vi.value.typ;
-    rval_info = convert_julia_type(ctx, rval_info, slot_type);
+    // If allow_mismatch is set, type mismatches will not result in traps.
+    // This is used for upsilon nodes, where the destination can have a narrower
+    // type than the store, if inference determines that the store is never read.
+    Value *skip = NULL;
+    rval_info = convert_julia_type(ctx, rval_info, slot_type, &skip);
+    if (!allow_mismatch && skip) {
+        CreateTrap(ctx.builder);
+        return;
+    }
+
     if (rval_info.typ == jl_bottom_type)
         return;
 
@@ -5124,13 +5906,13 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
         if (rval_info.TIndex) {
             tindex = rval_info.TIndex;
             if (!vi.boxroot)
-                tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+                tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), ~UNION_BOX_MARKER));
         }
         else {
             assert(rval_info.isboxed || rval_info.constant);
             tindex = compute_tindex_unboxed(ctx, rval_info, vi.value.typ);
             if (vi.boxroot)
-                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
             else
                 rval_info.TIndex = tindex;
         }
@@ -5144,7 +5926,7 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
         if (vi.pTIndex && rval_info.TIndex) {
             ctx.builder.CreateStore(rval_info.TIndex, vi.pTIndex, vi.isVolatile);
             isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(rval_info.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(rval_info.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             rval = rval_info.Vboxed ? rval_info.Vboxed : Constant::getNullValue(ctx.types().T_prjlvalue);
             assert(rval->getType() == ctx.types().T_prjlvalue);
@@ -5159,8 +5941,13 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
 
     // store unboxed variables
     if (!vi.boxroot || (vi.pTIndex && rval_info.TIndex)) {
-        emit_vi_assignment_unboxed(ctx, vi, isboxed, rval_info);
+        emit_guarded_test(ctx, skip ? ctx.builder.CreateNot(skip) : nullptr, nullptr, [&]{
+            emit_vi_assignment_unboxed(ctx, vi, isboxed, rval_info);
+            return nullptr;
+        });
     }
+
+    return;
 }
 
 static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssize_t ssaval)
@@ -5172,21 +5959,26 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssi
         int sl = jl_slot_number(l) - 1;
         // it's a local variable
         jl_varinfo_t &vi = ctx.slots[sl];
-        return emit_varinfo_assign(ctx, vi, rval_info, l);
+        emit_varinfo_assign(ctx, vi, rval_info, l);
+        return;
     }
 
     jl_module_t *mod;
     jl_sym_t *sym;
+    bool toplevel = jl_is_module(ctx.linfo->def.value);
+    bool alloc = toplevel;
     if (jl_is_symbol(l)) {
         mod = ctx.module;
         sym = (jl_sym_t*)l;
     }
     else {
         assert(jl_is_globalref(l));
+        alloc &= jl_globalref_mod(l) == ctx.module;
         mod = jl_globalref_mod(l);
         sym = jl_globalref_name(l);
     }
-    emit_globalset(ctx, mod, sym, rval_info, AtomicOrdering::Release);
+    emit_globalop(ctx, mod, sym, rval_info, jl_cgval_t(), AtomicOrdering::Release, AtomicOrdering::NotAtomic,
+                  true, false, false, false, false, nullptr, alloc);
     // Global variable. Does not need debug info because the debugger knows about
     // its memory location.
 }
@@ -5203,15 +5995,17 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
     // upsilon node is not dynamically observed.
     if (val) {
         jl_cgval_t rval_info = emit_expr(ctx, val);
-        if (rval_info.typ == jl_bottom_type)
+        if (rval_info.typ == jl_bottom_type) {
             // as a special case, PhiC nodes are allowed to use undefined
             // values, since they are just copy operations, so we need to
             // ignore the store (it will not by dynamically observed), while
             // normally, for any other operation result, we'd assume this store
             // was unreachable and dead
             val = NULL;
-        else
-            emit_varinfo_assign(ctx, vi, rval_info);
+        }
+        else {
+            emit_varinfo_assign(ctx, vi, rval_info, NULL, true);
+        }
     }
     if (!val) {
         if (vi.boxroot) {
@@ -5223,16 +6017,27 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
             // does need to satisfy the union invariants (i.e. inbounds
             // tindex).
             ctx.builder.CreateAlignedStore(
-                vi.boxroot ? ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80) :
+                vi.boxroot ? ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER) :
                              ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x01),
                 vi.pTIndex, Align(1), true);
         }
         else if (vi.value.V && !vi.value.constant && vi.value.typ != jl_bottom_type) {
-            assert(vi.value.ispointer());
-            Type *T = cast<AllocaInst>(vi.value.V)->getAllocatedType();
-            if (CountTrackedPointers(T).count) {
-                // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
-                ctx.builder.CreateStore(Constant::getNullValue(T), vi.value.V, true);
+            assert(vi.inline_roots || vi.value.ispointer());
+            if (vi.inline_roots) {
+                // memory optimization: make gc pointers re-initialized to NULL
+                AllocaInst *ssaroots = vi.inline_roots;
+                size_t nroots = cast<ConstantInt>(ssaroots->getArraySize())->getZExtValue();
+                auto T_prjlvalue = ssaroots->getAllocatedType();
+                if (auto AT = dyn_cast<ArrayType>(T_prjlvalue)) {
+                    nroots *= AT->getNumElements();
+                    T_prjlvalue = AT->getElementType();
+                }
+                assert(T_prjlvalue == ctx.types().T_prjlvalue);
+                Value *nullval = Constant::getNullValue(T_prjlvalue);
+                auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+                for (size_t i = 0; i < nroots; i++) {
+                    stack_ai.decorateInst(ctx.builder.CreateAlignedStore(nullval, emit_ptrgep(ctx, ssaroots, i * sizeof(void*)), ssaroots->getAlign(), true));
+                }
             }
         }
     }
@@ -5242,7 +6047,7 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
 
 static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, const jl_cgval_t &fexpr, jl_value_t *rt, jl_svec_t *argt);
 
-static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg)
+static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const Twine &msg)
 {
     bool isbool = (condV.typ == (jl_value_t*)jl_bool_type);
     if (!isbool) {
@@ -5253,19 +6058,18 @@ static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const s
         emit_typecheck(ctx, condV, (jl_value_t*)jl_bool_type, msg);
     }
     if (isbool) {
-        Value *cond = emit_unbox(ctx, getInt8Ty(ctx.builder.getContext()), condV, (jl_value_t*)jl_bool_type);
-        assert(cond->getType() == getInt8Ty(ctx.builder.getContext()));
-        return ctx.builder.CreateXor(ctx.builder.CreateTrunc(cond, getInt1Ty(ctx.builder.getContext())), ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1));
+        Value *cond = emit_unbox(ctx, getInt1Ty(ctx.builder.getContext()), condV, (jl_value_t*)jl_bool_type);
+        return ctx.builder.CreateNot(cond);
     }
     if (condV.isboxed) {
         return ctx.builder.CreateICmpEQ(boxed(ctx, condV),
             track_pjlvalue(ctx, literal_pointer_val(ctx, jl_false)));
     }
-    // not a boolean
-    return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0); // TODO: replace with Undef
+    // not a boolean (unreachable dead code)
+    return UndefValue::get(getInt1Ty(ctx.builder.getContext()));
 }
 
-static Value *emit_condition(jl_codectx_t &ctx, jl_value_t *cond, const std::string &msg)
+static Value *emit_condition(jl_codectx_t &ctx, jl_value_t *cond, const Twine &msg)
 {
     return emit_condition(ctx, emit_expr(ctx, cond), msg);
 }
@@ -5304,7 +6108,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
         return;
     }
     jl_expr_t *ex = (jl_expr_t*)expr;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     jl_sym_t *head = ex->head;
     if (head == jl_meta_sym || head == jl_inbounds_sym || head == jl_coverageeffect_sym
             || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
@@ -5313,14 +6117,55 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
         return;
     }
     else if (head == jl_leave_sym) {
-        assert(jl_is_long(args[0]));
-        ctx.builder.CreateCall(prepare_call(jlleave_func),
-                           ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_unbox_long(args[0])));
+        Value *scope_to_restore = nullptr, *token = nullptr;
+        SmallVector<AllocaInst*> handler_to_end;
+        for (size_t i = 0; i < jl_expr_nargs(ex); ++i) {
+            jl_value_t *arg = args[i];
+            if (arg == jl_nothing)
+                continue;
+            assert(jl_is_ssavalue(arg));
+            size_t enter_idx = ((jl_ssavalue_t*)arg)->id - 1;
+            jl_value_t *enter_stmt = jl_array_ptr_ref(ctx.code, enter_idx);
+            if (enter_stmt == jl_nothing)
+                continue;
+            if (ctx.scope_restore.count(enter_idx)) {
+                // TODO: The semantics of `gc_preserve` are not perfect here. An `Expr(:enter, ...)` block may
+                //       have multiple exits, but effects of `preserve_end` are only extended to the end of the
+                //       dominance of each `Expr(:leave, ...)`.
+                //
+                //       That means that a scope object can suddenly end up preserved again outside of an
+                //       `Expr(:enter, ...)` region where it ought to be dead. It'd be preferable if the effects
+                //       of gc_preserve_end propagated through a control-flow joins as long as all incoming
+                //       agree about the preserve state.
+                //
+                //       This is correct as-is anyway - it just means the scope lives longer than it needs to
+                //       if the `Expr(:enter, ...)` has multiple exits.
+                std::tie(token, scope_to_restore) = ctx.scope_restore[enter_idx];
+                ctx.builder.CreateCall(prepare_call(gc_preserve_end_func), {token});
+            }
+            if (jl_enternode_catch_dest(enter_stmt)) {
+                handler_to_end.push_back(ctx.eh_buffers[enter_stmt]);
+                // We're not actually setting up the exception frames for these, so
+                // we don't need to exit them.
+                scope_to_restore = nullptr; // restored by exception handler
+            }
+        }
+        ctx.builder.CreateCall(prepare_call(jlleave_noexcept_func), {get_current_task(ctx), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), handler_to_end.size())});
+        auto *handler_sz64 = ConstantInt::get(Type::getInt64Ty(ctx.builder.getContext()),
+                  sizeof(jl_handler_t));
+        for (AllocaInst *handler : handler_to_end) {
+            ctx.builder.CreateLifetimeEnd(handler, handler_sz64);
+        }
+        if (scope_to_restore) {
+            Value *scope_ptr = get_scope_field(ctx);
+            jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe).decorateInst(
+                ctx.builder.CreateAlignedStore(scope_to_restore, scope_ptr, ctx.types().alignof_ptr));
+        }
     }
     else if (head == jl_pop_exception_sym) {
         jl_cgval_t excstack_state = emit_expr(ctx, jl_exprarg(expr, 0));
         assert(excstack_state.V && excstack_state.V->getType() == ctx.types().T_size);
-        ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), excstack_state.V);
+        ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), {get_current_task(ctx), excstack_state.V});
         return;
     }
     else {
@@ -5333,8 +6178,7 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
 {
     jl_svec_t *sig_args = NULL;
     jl_value_t *sigtype = NULL;
-    jl_code_info_t *ir = NULL;
-    JL_GC_PUSH3(&sig_args, &sigtype, &ir);
+    JL_GC_PUSH2(&sig_args, &sigtype);
 
     size_t nsig = 1 + jl_svec_len(argt_typ->parameters);
     sig_args = jl_alloc_svec_uninit(nsig);
@@ -5344,64 +6188,101 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
     }
     sigtype = jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
 
-    jl_method_instance_t *mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
-    jl_code_instance_t *ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.world, ctx.world);
+    jl_method_instance_t *mi;
+    jl_code_instance_t *ci;
 
-    if (ci == NULL || (jl_value_t*)ci == jl_nothing) {
-        JL_GC_POP();
-        return std::make_pair((Function*)NULL, (Function*)NULL);
+    if (closure_method->source) {
+        mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
+        ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.min_world, ctx.max_world);
+    }
+    else {
+        mi = (jl_method_instance_t*)jl_atomic_load_relaxed(&closure_method->specializations);
+        assert(jl_is_method_instance(mi));
+        ci = jl_atomic_load_relaxed(&mi->cache);
     }
-    auto inferred = jl_atomic_load_relaxed(&ci->inferred);
-    if (!inferred || inferred == jl_nothing) {
+    if (ci == NULL || (jl_value_t*)ci == jl_nothing || ci->rettype != rettype || !jl_egal(sigtype, mi->specTypes)) { // TODO: correctly handle the ABI conversion if rettype != ci->rettype
         JL_GC_POP();
         return std::make_pair((Function*)NULL, (Function*)NULL);
     }
-    ++EmittedOpaqueClosureFunctions;
-
-    ir = jl_uncompress_ir(closure_method, ci, (jl_value_t*)inferred);
 
-    // TODO: Emit this inline and outline it late using LLVM's coroutine support.
-    orc::ThreadSafeModule closure_m = jl_create_ts_module(
-            name_from_method_instance(mi), ctx.emission_context.tsctx,
-            ctx.emission_context.imaging,
-            jl_Module->getDataLayout(), Triple(jl_Module->getTargetTriple()));
-    jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context);
-
-    assert(closure_decls.functionObject != "jl_fptr_sparam");
-    bool isspecsig = closure_decls.functionObject != "jl_fptr_args";
+    // method lookup code (similar to emit_invoke, and the inverse of emit_specsig_oc_call)
+    bool specsig = uses_specsig(sigtype, false, rettype, true);
+    std::string name;
+    std::string oc;
+    StringRef protoname;
+    StringRef proto_oc;
+
+    // Check if we already queued this up
+    auto it = ctx.call_targets.find(ci);
+    bool need_to_emit = it == ctx.call_targets.end();
+    if (!need_to_emit) {
+        assert(specsig == it->second.specsig);
+        if (specsig) {
+            protoname = it->second.decl->getName();
+            proto_oc = it->second.oc->getName();
+        }
+        else {
+            proto_oc = it->second.decl->getName();
+        }
+        need_to_emit = false;
+    }
+    else {
+        if (specsig) {
+            raw_string_ostream(name) << "j_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+            protoname = StringRef(name);
+        }
+        raw_string_ostream(oc) << "j1_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+        proto_oc = StringRef(oc);
+    }
 
-    Function *F = NULL;
-    std::string fname = isspecsig ?
-        closure_decls.functionObject :
-        closure_decls.specFunctionObject;
-    if (GlobalValue *V = jl_Module->getNamedValue(fname)) {
+    // Get the fptr1 OC
+    Function *F = nullptr;
+    if (GlobalValue *V = jl_Module->getNamedValue(proto_oc)) {
         F = cast<Function>(V);
-    } else {
+    }
+    else {
         F = Function::Create(get_func_sig(ctx.builder.getContext()),
                              Function::ExternalLinkage,
-                             fname, jl_Module);
-        jl_init_function(F, ctx.emission_context.TargetTriple);
+                             proto_oc, jl_Module);
+        jl_init_function(F, ctx.emission_context);
         jl_name_jlfunc_args(ctx.emission_context, F);
         F->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), F->getAttributes()}));
     }
-    Function *specF = NULL;
-    if (!isspecsig) {
-        specF = F;
-    } else {
-        //emission context holds context lock so can get module
-        specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject);
-        if (specF) {
-            jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL,
-                closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
-            specF = cast<Function>(returninfo.decl.getCallee());
-        }
+
+    // Get the specsig (if applicable)
+    Function *specF = nullptr;
+    jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
+    unsigned return_roots = 0;
+    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+    assert(is_opaque_closure);
+    if (specsig) {
+        jl_returninfo_t returninfo = get_specsig_function(ctx.emission_context, jl_Module, nullptr, protoname, mi->specTypes, rettype, is_opaque_closure);
+        cc = returninfo.cc;
+        return_roots = returninfo.return_roots;
+        specF = cast<Function>(returninfo.decl.getCallee());
+    }
+
+    if (need_to_emit) {
+        ctx.call_targets[ci] = {cc, return_roots, specsig ? specF : F, specsig ? F : nullptr, specsig, true, false};
     }
-    ctx.oc_modules.push_back(std::move(closure_m));
+
     JL_GC_POP();
     return std::make_pair(F, specF);
 }
 
-// `expr` is not clobbered in JL_TRY
+static void emit_latestworld(jl_codectx_t &ctx)
+{
+    auto world_age_field = get_tls_world_age_field(ctx);
+    LoadInst *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+        prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr,
+        /*isVolatile*/false);
+    world->setOrdering(AtomicOrdering::Acquire);
+    StoreInst *store_world = ctx.builder.CreateAlignedStore(world, world_age_field,
+        ctx.types().alignof_ptr, /*isVolatile*/false);
+    (void)store_world;
+}
+
+// `expr` is not actually clobbered in JL_TRY
 JL_GCC_IGNORE_START("-Wclobbered")
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_0based)
 {
@@ -5415,12 +6296,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     if (jl_is_ssavalue(expr)) {
         ssize_t idx = ((jl_ssavalue_t*)expr)->id - 1;
         assert(idx >= 0);
-        if (!ctx.ssavalue_assigned.at(idx)) {
-            ctx.ssavalue_assigned.at(idx) = true; // (assignment, not comparison test)
+        if (!ctx.ssavalue_assigned[idx]) {
+            ctx.ssavalue_assigned[idx] = true; // (assignment, not comparison test)
             return jl_cgval_t(); // dead code branch
         }
         else {
-            return ctx.SAvalues.at(idx); // at this point, SAvalues[idx] actually contains the SAvalue
+            return ctx.SAvalues[idx]; // at this point, SAvalues[idx] actually contains the SAvalue
         }
     }
     if (jl_is_globalref(expr)) {
@@ -5443,21 +6324,27 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         jl_value_t *val = expr;
         if (jl_is_quotenode(expr))
             val = jl_fieldref_noalloc(expr, 0);
-        if (jl_is_method(ctx.linfo->def.method)) // toplevel exprs are already rooted
-            val = jl_ensure_rooted(ctx, val);
+        // Toplevel exprs are rooted but because codegen assumes this is constant, it removes the write barriers for this code.
+        // This means we have to globally root the value here. (The other option would be to change how we optimize toplevel code)
+        jl_temporary_root(ctx, val);
         return mark_julia_const(ctx, val);
     }
 
     jl_expr_t *ex = (jl_expr_t*)expr;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
-    size_t nargs = jl_array_len(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
+    size_t nargs = jl_array_nrows(ex->args);
     jl_sym_t *head = ex->head;
     // this is object-disoriented.
     // however, this is a good way to do it because it should *not* be easy
     // to add new node types.
     if (head == jl_isdefined_sym) {
-        assert(nargs == 1);
-        return emit_isdefined(ctx, args[0]);
+        assert(nargs == 1 || nargs == 2);
+        int allow_import = 1;
+        if (nargs == 2) {
+            assert(jl_is_bool(args[1]));
+            allow_import = args[1] == jl_true;
+        }
+        return emit_isdefined(ctx, args[0], allow_import);
     }
     else if (head == jl_throw_undef_if_not_sym) {
         assert(nargs == 2);
@@ -5468,7 +6355,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 literal_pointer_val(ctx, jl_undefref_exception));
         }
         else {
-            undef_var_error_ifnot(ctx, cond, var);
+            undef_var_error_ifnot(ctx, cond, var, (jl_value_t*)jl_local_sym);
         }
         return ghostValue(ctx, jl_nothing_type);
     }
@@ -5492,7 +6379,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             expr_t = (jl_value_t*)jl_any_type;
         else {
             expr_t = jl_is_long(ctx.source->ssavaluetypes) ? (jl_value_t*)jl_any_type : jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaidx_0based);
-            is_promotable = ctx.ssavalue_usecount.at(ssaidx_0based) == 1;
+            is_promotable = ctx.ssavalue_usecount[ssaidx_0based] == 1;
         }
         jl_cgval_t res = emit_call(ctx, ex, expr_t, is_promotable);
         // some intrinsics (e.g. typeassert) can return a wider type
@@ -5511,8 +6398,8 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     }
     else if (head == jl_cfunction_sym) {
         assert(nargs == 5);
-        jl_cgval_t fexpr_rt = emit_expr(ctx, args[1]);
-        return emit_cfunction(ctx, args[0], fexpr_rt, args[2], (jl_svec_t*)args[3]);
+        jl_cgval_t fexpr_val = emit_expr(ctx, args[1]);
+        return emit_cfunction(ctx, args[0], fexpr_val, args[2], (jl_svec_t*)args[3]);
     }
     else if (head == jl_assign_sym) {
         assert(nargs == 2);
@@ -5526,10 +6413,8 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     else if (head == jl_method_sym) {
         if (nargs == 1) {
             jl_value_t *mn = args[0];
-            assert(jl_is_symbol(mn) || jl_is_slotnumber(mn));
+            assert(jl_is_symbol(mn) || jl_is_slotnumber(mn) || jl_is_globalref(mn));
 
-            Value *bp = NULL, *name;
-            jl_binding_t *bnd = NULL;
             bool issym = jl_is_symbol(mn);
             bool isglobalref = !issym && jl_is_globalref(mn);
             jl_module_t *mod = ctx.module;
@@ -5538,34 +6423,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                     mod = jl_globalref_mod(mn);
                     mn = (jl_value_t*)jl_globalref_name(mn);
                 }
-                JL_TRY {
-                    if (jl_symbol_name((jl_sym_t*)mn)[0] == '@')
-                        jl_errorf("macro definition not allowed inside a local scope");
-                    name = literal_pointer_val(ctx, mn);
-                    bnd = jl_get_binding_for_method_def(mod, (jl_sym_t*)mn);
-                }
-                JL_CATCH {
-                    jl_value_t *e = jl_current_exception();
-                    // errors. boo. :(
-                    e = jl_as_global_root(e);
-                    raise_exception(ctx, literal_pointer_val(ctx, e));
-                    return ghostValue(ctx, jl_nothing_type);
-                }
-                bp = julia_binding_gv(ctx, bnd);
-                bp = julia_binding_pvalue(ctx, bp);
-            }
-            else if (jl_is_slotnumber(mn) || jl_is_argument(mn)) {
-                // XXX: eval_methoddef does not have this code branch
-                int sl = jl_slot_number(mn)-1;
-                jl_varinfo_t &vi = ctx.slots[sl];
-                bp = vi.boxroot;
-                name = literal_pointer_val(ctx, (jl_value_t*)slot_symbol(ctx, sl));
-            }
-            if (bp) {
-                Value *mdargs[] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp, literal_pointer_val(ctx, bnd) };
                 jl_cgval_t gf = mark_julia_type(
                         ctx,
-                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), makeArrayRef(mdargs)),
+                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), {
+                            literal_pointer_val(ctx, (jl_value_t*)mod),
+                            literal_pointer_val(ctx, (jl_value_t*)mn)
+                        }),
                         true,
                         jl_function_type);
                 return gf;
@@ -5584,34 +6447,18 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         };
         jl_cgval_t meth = mark_julia_type(
             ctx,
-            ctx.builder.CreateCall(prepare_call(jlmethod_func), makeArrayRef(mdargs)),
+            ctx.builder.CreateCall(prepare_call(jlmethod_func), ArrayRef<Value*>(mdargs)),
             true,
             jl_method_type);
         return meth;
     }
-    else if (head == jl_const_sym) {
-        assert(nargs == 1);
-        jl_sym_t *sym = (jl_sym_t*)args[0];
-        jl_module_t *mod = ctx.module;
-        if (jl_is_globalref(sym)) {
-            mod = jl_globalref_mod(sym);
-            sym = jl_globalref_name(sym);
-        }
-        if (jl_is_symbol(sym)) {
-            jl_binding_t *bnd = NULL;
-            Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
-            if (bp)
-                ctx.builder.CreateCall(prepare_call(jldeclareconst_func),
-                        { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym) });
-        }
-    }
     else if (head == jl_new_sym) {
         bool is_promotable = false;
         if (ssaidx_0based >= 0) {
-            is_promotable = ctx.ssavalue_usecount.at(ssaidx_0based) == 1;
+            is_promotable = ctx.ssavalue_usecount[ssaidx_0based] == 1;
         }
         assert(nargs > 0);
-        SmallVector<jl_cgval_t> argv(nargs);
+        SmallVector<jl_cgval_t, 0> argv(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
@@ -5620,12 +6467,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 jl_is_datatype(jl_tparam0(ty)) &&
                 jl_is_concrete_type(jl_tparam0(ty))) {
             assert(nargs <= jl_datatype_nfields(jl_tparam0(ty)) + 1);
-            jl_cgval_t res = emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, argv.data() + 1, is_promotable);
+            jl_cgval_t res = emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, ArrayRef<jl_cgval_t>(argv).drop_front(), is_promotable);
             if (is_promotable && res.promotion_point && res.promotion_ssa==-1)
                 res.promotion_ssa = ssaidx_0based;
             return res;
         }
-        Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv.data(), nargs, julia_call);
+        Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv, nargs, julia_call);
         // temporarily mark as `Any`, expecting `emit_ssaval_assign` to update
         // it to the inferred type.
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
@@ -5643,19 +6490,20 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
     }
     else if (head == jl_new_opaque_closure_sym) {
-        assert(nargs >= 4 && "Not enough arguments in new_opaque_closure");
-        SmallVector<jl_cgval_t, 4> argv(nargs, jl_cgval_t());
+        assert(nargs >= 5 && "Not enough arguments in new_opaque_closure");
+        SmallVector<jl_cgval_t, 5> argv(nargs, jl_cgval_t());
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
         const jl_cgval_t &argt = argv[0];
         const jl_cgval_t &lb = argv[1];
         const jl_cgval_t &ub = argv[2];
-        const jl_cgval_t &source = argv[3];
+        // argv[3] - constprop marker not used here
+        const jl_cgval_t &source = argv[4];
         if (source.constant == NULL) {
             // For now, we require non-constant source to be handled by using
             // eval. This should probably be a verifier error and an abort here.
-            emit_error(ctx, "(internal error) invalid IR: opaque closure source must be constant");
+            emit_error(ctx, "(INTERNAL ERROR - IR Validity): opaque closure source must be constant");
             return jl_cgval_t();
         }
         bool can_optimize = argt.constant != NULL && lb.constant != NULL && ub.constant != NULL &&
@@ -5664,35 +6512,40 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             ((jl_method_t*)source.constant)->nargs > 0 &&
             jl_is_valid_oc_argtype((jl_tupletype_t*)argt.constant, (jl_method_t*)source.constant);
 
+
         if (can_optimize) {
             jl_value_t *closure_t = NULL;
             jl_value_t *env_t = NULL;
             JL_GC_PUSH2(&closure_t, &env_t);
 
-            SmallVector<jl_value_t *> env_component_ts(nargs-4);
-            for (size_t i = 0; i < nargs - 4; ++i) {
-                env_component_ts[i] = argv[4+i].typ;
+            size_t ncapture_args = nargs-5;
+            SmallVector<jl_value_t *, 0> env_component_ts(ncapture_args);
+            for (size_t i = 0; i < ncapture_args; ++i) {
+                jl_value_t *typ = argv[nargs-ncapture_args+i].typ;
+                if (typ == jl_bottom_type) {
+                    JL_GC_POP();
+                    return jl_cgval_t();
+                }
+                env_component_ts[i] = typ;
             }
 
-            env_t = jl_apply_tuple_type_v(env_component_ts.data(), nargs-4);
+            env_t = jl_apply_tuple_type_v(env_component_ts.data(), ncapture_args);
             // we need to know the full env type to look up the right specialization
             if (jl_is_concrete_type(env_t)) {
                 jl_tupletype_t *argt_typ = (jl_tupletype_t*)argt.constant;
                 Function *F, *specF;
-                std::tie(F, specF) = get_oc_function(ctx, (jl_method_t*)source.constant, (jl_datatype_t*)env_t, argt_typ, ub.constant);
+                std::tie(F, specF) = get_oc_function(ctx, (jl_method_t*)source.constant, (jl_tupletype_t*)env_t, argt_typ, ub.constant);
                 if (F) {
                     jl_cgval_t jlcall_ptr = mark_julia_type(ctx, F, false, jl_voidpointer_type);
-                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
-                    Instruction *I = ctx.builder.CreateAlignedLoad(ctx.types().T_size, get_last_age_field(ctx), ctx.types().alignof_ptr);
-                    jl_cgval_t world_age = mark_julia_type(ctx, ai.decorateInst(I), false, jl_long_type);
+                    jl_cgval_t world_age = mark_julia_type(ctx, get_tls_world_age(ctx), false, jl_long_type);
                     jl_cgval_t fptr;
                     if (specF)
                         fptr = mark_julia_type(ctx, specF, false, jl_voidpointer_type);
                     else
-                        fptr = mark_julia_type(ctx, (llvm::Value*)Constant::getNullValue(ctx.types().T_size), false, jl_voidpointer_type);
+                        fptr = mark_julia_type(ctx, Constant::getNullValue(ctx.types().T_size), false, jl_voidpointer_type);
 
                     // TODO: Inline the env at the end of the opaque closure and generate a descriptor for GC
-                    jl_cgval_t env = emit_new_struct(ctx, env_t, nargs-4, &argv.data()[4]);
+                    jl_cgval_t env = emit_new_struct(ctx, env_t, ncapture_args, ArrayRef<jl_cgval_t>(argv).drop_front(nargs-ncapture_args));
 
                     jl_cgval_t closure_fields[5] = {
                         env,
@@ -5713,13 +6566,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         }
 
         return mark_julia_type(ctx,
-                emit_jlcall(ctx, jl_new_opaque_closure_jlcall_func, Constant::getNullValue(ctx.types().T_prjlvalue), argv.data(), nargs, julia_call),
+                emit_jlcall(ctx, jl_new_opaque_closure_jlcall_func, Constant::getNullValue(ctx.types().T_prjlvalue), argv, nargs, julia_call),
                 true, jl_any_type);
     }
     else if (head == jl_exc_sym) {
         assert(nargs == 0);
         return mark_julia_type(ctx,
-                ctx.builder.CreateCall(prepare_call(jl_current_exception_func)),
+                ctx.builder.CreateCall(prepare_call(jl_current_exception_func), {get_current_task(ctx)}),
                 true, jl_any_type);
     }
     else if (head == jl_copyast_sym) {
@@ -5735,43 +6588,41 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     }
     else if (head == jl_loopinfo_sym) {
         // parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4))
+        // to LLVM LoopID
         SmallVector<Metadata *, 8> MDs;
+
+        // Reserve first location for self reference to the LoopID metadata node.
+        TempMDTuple TempNode = MDNode::getTemporary(ctx.builder.getContext(), None);
+        MDs.push_back(TempNode.get());
+
         for (int i = 0, ie = nargs; i < ie; ++i) {
             Metadata *MD = to_md_tree(args[i], ctx.builder.getContext());
             if (MD)
                 MDs.push_back(MD);
         }
 
-        MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs);
-        CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func));
-        I->setMetadata("julia.loopinfo", MD);
+        ctx.LoopID = MDNode::getDistinct(ctx.builder.getContext(), MDs);
+        // Replace the temporary node with a self-reference.
+        ctx.LoopID->replaceOperandWith(0, ctx.LoopID);
         return jl_cgval_t();
     }
     else if (head == jl_leave_sym || head == jl_coverageeffect_sym
-            || head == jl_pop_exception_sym || head == jl_enter_sym || head == jl_inbounds_sym
+            || head == jl_pop_exception_sym || head == jl_inbounds_sym
             || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
         jl_errorf("Expr(:%s) in value position", jl_symbol_name(head));
     }
     else if (head == jl_boundscheck_sym) {
-        return mark_julia_const(ctx, bounds_check_enabled(ctx, jl_true) ? jl_true : jl_false);
+        jl_value_t *def = (nargs == 0) ? jl_true : args[0];
+        return mark_julia_const(ctx, bounds_check_enabled(ctx, def) ? jl_true : jl_false);
     }
     else if (head == jl_gc_preserve_begin_sym) {
-        SmallVector<jl_cgval_t> argv(nargs);
+        SmallVector<jl_cgval_t, 0> argv(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
-        std::vector<Value*> vals;
+        SmallVector<Value*, 0> vals;
         for (size_t i = 0; i < nargs; ++i) {
-            const jl_cgval_t &ai = argv[i];
-            if (ai.constant || ai.typ == jl_bottom_type)
-                continue;
-            if (ai.isboxed) {
-                vals.push_back(ai.Vboxed);
-            }
-            else if (jl_is_concrete_immutable(ai.typ) && !jl_is_pointerfree(ai.typ)) {
-                Type *at = julia_type_to_llvm(ctx, ai.typ);
-                vals.push_back(emit_unbox(ctx, at, ai, ai.typ));
-            }
+            vals.append(get_gc_roots_for(ctx, argv[i]));
         }
         Value *token = vals.empty()
             ? (Value*)ConstantTokenNone::get(ctx.builder.getContext())
@@ -5792,6 +6643,10 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             ctx.builder.CreateCall(prepare_call(gc_preserve_end_func), {token.V});
         return jl_cgval_t((jl_value_t*)jl_nothing_type);
     }
+    else if (head == jl_latestworld_sym && !jl_is_method(ctx.linfo->def.method)) {
+        emit_latestworld(ctx);
+        return jl_cgval_t((jl_value_t*)jl_nothing_type);
+    }
     else {
         if (jl_is_toplevel_only_expr(expr) &&
             !jl_is_method(ctx.linfo->def.method)) {
@@ -5814,116 +6669,252 @@ JL_GCC_IGNORE_STOP
 // --- generate function bodies ---
 
 // gc frame emission
-static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=false)
+static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=false) JL_NOTSAFEPOINT
 {
     // allocate a placeholder gc instruction
     // this will require the runtime, but it gets deleted later if unused
     ctx.topalloca = ctx.builder.CreateCall(prepare_call(or_new ? jladoptthread_func : jlpgcstack_func));
-    ctx.pgcstack = ctx.topalloca;
+    ctx.topalloca->setName("pgcstack");
+    if (ctx.pgcstack == nullptr)
+        ctx.pgcstack = ctx.topalloca;
 }
 
 static Value *get_current_task(jl_codectx_t &ctx)
 {
-    return get_current_task_from_pgcstack(ctx.builder, ctx.types().T_size, ctx.pgcstack);
+    return get_current_task_from_pgcstack(ctx.builder, ctx.pgcstack);
 }
 
 // Get PTLS through current task.
 static Value *get_current_ptls(jl_codectx_t &ctx)
 {
-    return get_current_ptls_from_task(ctx.builder, ctx.types().T_size, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
+    return get_current_ptls_from_task(ctx.builder, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
 }
 
 // Get the address of the world age of the current task
-static Value *get_last_age_field(jl_codectx_t &ctx)
+static Value *get_tls_world_age_field(jl_codectx_t &ctx)
 {
     Value *ct = get_current_task(ctx);
-    return ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_size,
-            ctx.builder.CreateBitCast(ct, ctx.types().T_size->getPointerTo()),
-            ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, world_age) / ctx.types().sizeof_ptr),
-            "world_age");
+    return emit_ptrgep(ctx, ct, offsetof(jl_task_t, world_age), "world_age");
 }
 
-static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_codegen_params_t &params)
+// Get the value of the world age of the current task
+static Value *get_tls_world_age(jl_codectx_t &ctx)
+{
+    if (ctx.world_age_at_entry)
+        return ctx.world_age_at_entry;
+    IRBuilderBase::InsertPointGuard IP(ctx.builder);
+    bool toplevel = !jl_is_method(ctx.linfo->def.method);
+    if (!toplevel) {
+        ctx.builder.SetInsertPoint(ctx.topalloca->getParent(), ++ctx.topalloca->getIterator());
+        ctx.builder.SetCurrentDebugLocation(ctx.topalloca->getStableDebugLoc());
+    }
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+    auto *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size, get_tls_world_age_field(ctx), ctx.types().alignof_ptr);
+    ai.decorateInst(world);
+    if (!toplevel)
+        ctx.world_age_at_entry = world;
+    return world;
+}
+
+static Value *get_scope_field(jl_codectx_t &ctx)
+{
+    Value *ct = get_current_task(ctx);
+    return emit_ptrgep(ctx, ct, offsetof(jl_task_t, scope), "scope");
+}
+
+static std::string get_function_name(bool specsig, bool needsparams, const char *unadorned_name, const Triple &TargetTriple)
+{
+    std::string _funcName;
+    raw_string_ostream funcName(_funcName);
+    // try to avoid conflicts in the global symbol table
+    if (specsig)
+        funcName << "julia_"; // api 5
+    else if (needsparams)
+        funcName << "japi3_";
+    else
+        funcName << "japi1_";
+    if (TargetTriple.isOSLinux()) {
+        if (unadorned_name[0] == '@')
+            unadorned_name++;
+    }
+    funcName << unadorned_name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+    return funcName.str();
+}
+
+static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *abi, jl_value_t *jlretty, jl_value_t *declrt, jl_returninfo_t &f, unsigned nargs, int retarg, bool is_opaque_closure, StringRef funcName,
+        Module *M, jl_codegen_params_t &params);
+
+Function *get_or_emit_fptr1(StringRef preal_decl, Module *M)
+{
+    return cast<Function>(M->getOrInsertFunction(preal_decl, get_func_sig(M->getContext()), get_func_attrs(M->getContext())).getCallee());
+}
+
+static Function *emit_modifyhelper(jl_codectx_t &ctx2, const jl_cgval_t &op, const jl_cgval_t &modifyop, jl_value_t *jltype, Type *elty, jl_cgval_t rhs, const Twine &fname, bool gcstack_arg)
+{
+    Module *M = ctx2.f->getParent();
+    jl_codectx_t ctx(M->getContext(), ctx2.emission_context, ctx2.min_world, ctx2.max_world);
+    SmallVector<Type*> ArgTy;
+    ArgTy.push_back(elty);
+    if (rhs.V)
+        ArgTy.push_back(rhs.V->getType());
+    if (rhs.Vboxed)
+        ArgTy.push_back(rhs.Vboxed->getType());
+    if (rhs.TIndex)
+        ArgTy.push_back(rhs.TIndex->getType());
+    for (auto &root : rhs.inline_roots)
+        ArgTy.push_back(root->getType());
+    if (gcstack_arg)
+        ArgTy.push_back(ctx.builder.getPtrTy());
+    FunctionType *FT = FunctionType::get(elty, ArgTy, false);
+    Function *w = Function::Create(FT, GlobalVariable::PrivateLinkage, "", M);
+    jl_init_function(w, ctx.emission_context);
+    w->addFnAttr(Attribute::AlwaysInline);
+    w->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+    Function::arg_iterator AI = w->arg_begin();
+    Argument *A = &*AI++;
+    // rebuild a copy of rhs from the arguments
+    if (rhs.V)
+        rhs.V = &*AI++;
+    if (rhs.Vboxed)
+        rhs.Vboxed = &*AI++;
+    if (rhs.TIndex)
+        rhs.TIndex = &*AI++;
+    for (size_t i = 0; i < rhs.inline_roots.size(); i++)
+        rhs.inline_roots[i] = &*AI++;
+    rhs.promotion_point = nullptr;
+    rhs.promotion_ssa = -1;
+    if (gcstack_arg) {
+        AttrBuilder param(ctx.builder.getContext());
+        if (ctx.emission_context.use_swiftcc) {
+            w->setCallingConv(CallingConv::Swift);
+            param.addAttribute(Attribute::SwiftSelf);
+        }
+        param.addAttribute("gcstack");
+        param.addAttribute(Attribute::NonNull);
+        Argument *gcstackarg = &*AI++;
+        gcstackarg->addAttrs(param);
+        gcstackarg->setName("pgcstack_arg");
+        ctx.pgcstack = gcstackarg;
+    }
+    assert(AI == w->arg_end());
+    ctx.f = w;
+    ctx.rettype = jltype;
+    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", w);
+    ctx.builder.SetInsertPoint(b0);
+    DebugLoc noDbg;
+    ctx.builder.SetCurrentDebugLocation(noDbg);
+    allocate_gc_frame(ctx, b0);
+    const jl_cgval_t argv[3] = { op, mark_julia_type(ctx, A, false, jltype), rhs };
+    jl_cgval_t ret = emit_invoke(ctx, modifyop, argv, 3, (jl_value_t*)jl_any_type, true);
+    emit_typecheck(ctx, ret, jltype, fname);
+    ret = update_julia_type(ctx, ret, jltype);
+    ctx.builder.CreateRet(emit_unbox(ctx, elty, ret, jltype));
+    if (ctx.topalloca->use_empty()) {
+      ctx.topalloca->eraseFromParent();
+      ctx.topalloca = nullptr;
+    }
+    return w;
+}
+
+
+Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Value *theFunc, Module *M, jl_codegen_params_t &params) JL_NOTSAFEPOINT
 {
     ++EmittedToJLInvokes;
-    jl_codectx_t ctx(M->getContext(), params);
+    jl_codectx_t ctx(M->getContext(), params, codeinst);
     std::string name;
-    raw_string_ostream(name) << "tojlinvoke" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+    raw_string_ostream(name) << "tojlinvoke" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
     Function *f = Function::Create(ctx.types().T_jlfunc,
             GlobalVariable::InternalLinkage,
             name, M);
-    jl_init_function(f, params.TargetTriple);
+    jl_init_function(f, params);
     jl_name_jlfunc_args(params, f);
     //f->setAlwaysInline();
     ctx.f = f; // for jl_Module
-    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", f);
+    BasicBlock *b0 = BasicBlock::Create(M->getContext(), "top", f);
     ctx.builder.SetInsertPoint(b0);
-    Function *theFunc;
     Value *theFarg;
-    auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-    bool cache_valid = params.cache;
 
-    if (cache_valid && invoke != NULL) {
-        StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
-        theFunc = cast<Function>(
-            M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(ctx.builder.getContext())).getCallee());
+    if (theFunc) {
         theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst);
     }
     else {
-        theFunc = prepare_call(jlinvoke_func);
-        theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst->def);
+        jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+        bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+        theFunc = prepare_call(is_opaque_closure ? jlinvokeoc_func : jlinvoke_func);
+        theFarg = literal_pointer_val(ctx, (jl_value_t*)mi);
     }
     theFarg = track_pjlvalue(ctx, theFarg);
     auto args = f->arg_begin();
-    CallInst *r = ctx.builder.CreateCall(theFunc, { &*args, &*++args, &*++args, theFarg });
-    r->setAttributes(theFunc->getAttributes());
+    CallInst *r = ctx.builder.CreateCall(FunctionCallee(jlinvoke_func->_type(M->getContext()), theFunc), { &*args, &*++args, &*++args, theFarg });
+    r->setAttributes(jlinvoke_func->_attrs(M->getContext()));
     ctx.builder.CreateRet(r);
     return f;
 }
 
-static Type *get_returnroots_type(jl_codectx_t &ctx, unsigned rootcount) {
-    return ArrayType::get(ctx.types().T_prjlvalue, rootcount);
+Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t &params) JL_NOTSAFEPOINT
+{
+    Value *theFunc = nullptr;
+    if (!theFptrName.empty())
+        theFunc = M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(M->getContext()), jlinvoke_func->_attrs(M->getContext())).getCallee();
+    return emit_tojlinvoke(codeinst, theFunc, M, params);
 }
 
-static Type *get_unionbytes_type(LLVMContext &C, unsigned unionbytes) {
-    return ArrayType::get(getInt8Ty(C), unionbytes);
+static jl_value_t *get_oc_type(jl_value_t *calltype, jl_value_t *rettype) JL_ALWAYS_LEAFTYPE
+{
+    jl_value_t *argtype = jl_argtype_without_function((jl_value_t*)calltype);
+    JL_GC_PUSH1(&argtype);
+    jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, argtype, rettype);
+    JL_GC_PROMISE_ROOTED(oc_type);
+    JL_GC_POP();
+    return oc_type;
 }
 
-static void emit_cfunc_invalidate(
+static void emit_specsig_to_specsig(
         Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
         jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
         size_t nargs,
         jl_codegen_params_t &params,
-        Function *target)
+        Value *target,
+        jl_value_t *targetsig,
+        jl_value_t *targetrt,
+        jl_returninfo_t *targetspec,
+        jl_value_t *rettype_const)
 {
     ++EmittedCFuncInvalidates;
-    jl_codectx_t ctx(gf_thunk->getParent()->getContext(), params);
+    jl_codectx_t ctx(gf_thunk->getParent()->getContext(), params, 0, 0);
     ctx.f = gf_thunk;
 
     BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", gf_thunk);
     ctx.builder.SetInsertPoint(b0);
     DebugLoc noDbg;
     ctx.builder.SetCurrentDebugLocation(noDbg);
-    allocate_gc_frame(ctx, b0);
     Function::arg_iterator AI = gf_thunk->arg_begin();
-    SmallVector<jl_cgval_t> myargs(nargs);
+    SmallVector<jl_cgval_t, 0> myargs(nargs);
     if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union)
         ++AI;
     if (return_roots)
         ++AI;
-    if (JL_FEAT_TEST(ctx,gcstack_arg)){
+    if (JL_FEAT_TEST(ctx,gcstack_arg)) {
+        ctx.pgcstack = AI;
         ++AI; // gcstack_arg
     }
+    allocate_gc_frame(ctx, b0);
     for (size_t i = 0; i < nargs; i++) {
+        if (i == 0 && is_for_opaque_closure) {
+            // `jt` would be wrong here (it is the captures type), so is not used used for
+            // the ABI decisions, but the argument actually will require boxing as its real type
+            // which can be exactly recomputed from the specialization, as that defined the ABI
+            jl_value_t *oc_type = get_oc_type(calltype, rettype);
+            Value *arg_v = &*AI;
+            ++AI;
+            myargs[i] = mark_julia_slot(arg_v, (jl_value_t*)oc_type, NULL, ctx.tbaa().tbaa_const);
+            continue;
+        }
+        // n.b. calltype is required to be a datatype by construction for specsig
         jl_value_t *jt = jl_nth_slot_type(calltype, i);
-        // n.b. specTypes is required to be a datatype by construction for specsig
         bool isboxed = false;
         Type *et;
-        if (i == 0 && is_for_opaque_closure) {
-            et = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-        }
-        else if (deserves_argbox(jt)) {
+        if (deserves_argbox(jt)) {
             et = ctx.types().T_prjlvalue;
             isboxed = true;
         }
@@ -5934,33 +6925,45 @@ static void emit_cfunc_invalidate(
             myargs[i] = mark_julia_const(ctx, jl_tparam0(jt));
         }
         else if (type_is_ghost(et)) {
-            assert(jl_is_datatype(jt) && ((jl_datatype_t*)jt)->instance);
+            assert(jl_is_datatype(jt) && jl_is_datatype_singleton((jl_datatype_t*)jt));
             myargs[i] = mark_julia_const(ctx, ((jl_datatype_t*)jt)->instance);
         }
         else {
             Value *arg_v = &*AI;
             ++AI;
-            Type *at = arg_v->getType();
-            if ((i == 0 && is_for_opaque_closure) || (!isboxed && et->isAggregateType())) {
-                myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const);
+            if (!isboxed && et->isAggregateType()) {
+                auto tracked = CountTrackedPointers(et);
+                SmallVector<Value*,0> roots;
+                if (tracked.count && !tracked.all) {
+                    roots = load_gc_roots(ctx, &*AI, tracked.count, ctx.tbaa().tbaa_const);
+                    ++AI;
+                }
+                myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const, roots);
             }
             else {
-                assert(at == et);
+                assert(arg_v->getType() == et);
                 myargs[i] = mark_julia_type(ctx, arg_v, isboxed, jt);
             }
-            (void)at;
         }
     }
     assert(AI == gf_thunk->arg_end());
-    Value *gf_ret = emit_jlcall(ctx, target, nullptr, myargs.data(), nargs, julia_call);
-    jl_cgval_t gf_retbox = mark_julia_type(ctx, gf_ret, true, jl_any_type);
-    if (cc != jl_returninfo_t::Boxed) {
-        emit_typecheck(ctx, gf_retbox, rettype, "cfunction");
+    jl_cgval_t gf_retval;
+    if (target || targetspec) {
+        if (targetspec == nullptr)
+            gf_retval = mark_julia_type(ctx, emit_jlcall(ctx, target, nullptr, myargs, nargs, julia_call), true, targetrt);
+        else
+            gf_retval = emit_call_specfun_other(ctx, is_for_opaque_closure, targetsig, targetrt, *targetspec, myargs, nargs);
+    }
+    if (rettype_const)
+        gf_retval = mark_julia_const(ctx, rettype_const);
+    if (targetrt != rettype) {
+        emit_typecheck(ctx, gf_retval, rettype, "cfunction");
+        gf_retval = update_julia_type(ctx, gf_retval, rettype);
     }
 
     switch (cc) {
     case jl_returninfo_t::Boxed:
-        ctx.builder.CreateRet(gf_ret);
+        ctx.builder.CreateRet(boxed(ctx, gf_retval));
         break;
     case jl_returninfo_t::Register: {
         Type *gfrt = gf_thunk->getReturnType();
@@ -5968,111 +6971,289 @@ static void emit_cfunc_invalidate(
             ctx.builder.CreateRetVoid();
         }
         else {
-            gf_ret = emit_bitcast(ctx, gf_ret, gfrt->getPointerTo());
-            ctx.builder.CreateRet(ctx.builder.CreateAlignedLoad(gfrt, gf_ret, Align(julia_alignment(rettype))));
+            ctx.builder.CreateRet(emit_unbox(ctx, gfrt, gf_retval, rettype));
         }
         break;
     }
     case jl_returninfo_t::SRet: {
+        Value *sret = &*gf_thunk->arg_begin();
+        Align align(julia_alignment(rettype));
         if (return_roots) {
-            Value *root1 = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]*
-            assert(cast<PointerType>(root1->getType())->isOpaqueOrPointeeTypeMatches(get_returnroots_type(ctx, return_roots)));
-            root1 = ctx.builder.CreateConstInBoundsGEP2_32(get_returnroots_type(ctx, return_roots), root1, 0, 0);
-            ctx.builder.CreateStore(gf_ret, root1);
+            Value *roots = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]*
+            split_value_into(ctx, gf_retval, align, sret, align, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe));
+        }
+        else {
+            emit_unbox_store(ctx, gf_retval, sret, ctx.tbaa().tbaa_stack, align, align);
         }
-        emit_memcpy(ctx, &*gf_thunk->arg_begin(), jl_aliasinfo_t::fromTBAA(ctx, nullptr), gf_ret,
-                    jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype));
         ctx.builder.CreateRetVoid();
         break;
     }
-    case jl_returninfo_t::Union: {
-        Type *retty = gf_thunk->getReturnType();
-        Value *gf_retval = UndefValue::get(retty);
-        Value *tindex = compute_box_tindex(ctx, emit_typeof(ctx, gf_retbox, false, true), (jl_value_t*)jl_any_type, rettype);
-        tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
-        gf_retval = ctx.builder.CreateInsertValue(gf_retval, gf_ret, 0);
-        gf_retval = ctx.builder.CreateInsertValue(gf_retval, tindex, 1);
-        ctx.builder.CreateRet(gf_retval);
-        break;
+    case jl_returninfo_t::Union: {
+        Value *gf_ret = boxed(ctx, gf_retval); // TODO: this is not the most optimal way to emit this
+        Type *retty = gf_thunk->getReturnType();
+        Value *retval = UndefValue::get(retty);
+        Value *tindex = compute_box_tindex(ctx, emit_typeof(ctx, gf_retval, false, true), (jl_value_t*)jl_any_type, rettype);
+        tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
+        retval = ctx.builder.CreateInsertValue(retval, gf_ret, 0);
+        retval = ctx.builder.CreateInsertValue(retval, tindex, 1);
+        ctx.builder.CreateRet(retval);
+        break;
+    }
+    case jl_returninfo_t::Ghosts: {
+        Value *retval = compute_tindex_unboxed(ctx, gf_retval, rettype);
+        ctx.builder.CreateRet(retval);
+        break;
+    }
+    }
+    if (ctx.topalloca != ctx.pgcstack && ctx.topalloca->use_empty()) {
+       ctx.topalloca->eraseFromParent();
+       ctx.topalloca = nullptr;
+    }
+}
+
+void emit_specsig_to_fptr1(
+        Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
+        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
+        size_t nargs,
+        jl_codegen_params_t &params,
+        Function *target)
+{
+    emit_specsig_to_specsig(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, params, target, calltype, rettype, nullptr, nullptr);
+}
+
+static void emit_fptr1_wrapper(Module *M, StringRef gf_thunk_name, Value *target, jl_value_t *rettype_const, jl_value_t *declrt, jl_value_t *jlrettype, jl_codegen_params_t &params)
+{
+    Function *w = Function::Create(get_func_sig(M->getContext()), GlobalVariable::ExternalLinkage, gf_thunk_name, M);
+    jl_init_function(w, params);
+    w->setAttributes(AttributeList::get(M->getContext(), {get_func_attrs(M->getContext()), w->getAttributes()}));
+    w->addFnAttr(Attribute::OptimizeNone);
+    w->addFnAttr(Attribute::NoInline);
+
+    jl_codectx_t ctx(M->getContext(), params, 0, 0);
+    ctx.f = w;
+    ctx.rettype = declrt;
+
+    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", w);
+    ctx.builder.SetInsertPoint(b0);
+    DebugLoc noDbg;
+    ctx.builder.SetCurrentDebugLocation(noDbg);
+    allocate_gc_frame(ctx, b0);
+
+    jl_cgval_t gf_retval;
+    if (target) {
+        FunctionCallee theFunc(w->getFunctionType(), target);
+        auto args = w->arg_begin();
+        CallInst *r = ctx.builder.CreateCall(theFunc, { &*args, &*++args, &*++args }); // cf emit_tojlinvoke
+        assert(++args == w->arg_end());
+        r->setAttributes(w->getAttributes());
+        gf_retval = mark_julia_type(ctx, r, true, jlrettype);
+    }
+    if (rettype_const)
+        gf_retval = mark_julia_const(ctx, rettype_const);
+    if (jlrettype != declrt)
+        emit_typecheck(ctx, gf_retval, declrt, "cfunction");
+    ctx.builder.CreateRet(boxed(ctx, gf_retval));
+}
+
+static void emit_specsig_to_specsig(
+        Module *M, StringRef gf_thunk_name,
+        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
+        size_t nargs,
+        jl_codegen_params_t &params,
+        Value *target,
+        jl_value_t *targetsig,
+        jl_value_t *targetrt,
+        jl_returninfo_t *targetspec,
+        jl_value_t *rettype_const)
+{
+    jl_returninfo_t returninfo = get_specsig_function(params, M, nullptr, gf_thunk_name, calltype, rettype, is_for_opaque_closure);
+    Function *gf_thunk = cast<Function>(returninfo.decl.getCallee());
+    jl_init_function(gf_thunk, params);
+    gf_thunk->setAttributes(AttributeList::get(gf_thunk->getContext(), {returninfo.attrs, gf_thunk->getAttributes()}));
+    emit_specsig_to_specsig(gf_thunk, returninfo.cc, returninfo.return_roots, calltype, rettype, is_for_opaque_closure, nargs, params, target, targetsig, targetrt, targetspec, rettype_const);
+}
+
+std::string emit_abi_converter(Module *M, jl_codegen_params_t &params, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *target, bool target_specsig)
+{
+    // this builds a method that calls a method with the same arguments but a different specsig
+    // build a specsig -> specsig converter thunk
+    // build a specsig -> arg1 converter thunk
+    // build a args1 -> specsig converter thunk (gen_invoke_wrapper)
+    // build a args1 -> args1 converter thunk (to add typeassert on result)
+    bool needsparams = false;
+    bool target_is_opaque_closure = false;
+    jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+    std::string gf_thunk_name = get_function_name(from_abi.specsig, needsparams, name_from_method_instance(mi), params.TargetTriple);
+    gf_thunk_name += "_gfthunk";
+    if (target_specsig) {
+        jl_value_t *abi = get_ci_abi(codeinst);
+        jl_returninfo_t targetspec = get_specsig_function(params, M, target, "", abi, codeinst->rettype, target_is_opaque_closure);
+        if (from_abi.specsig)
+            emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, params,
+                    target, mi->specTypes, codeinst->rettype, &targetspec, nullptr);
+        else
+            gen_invoke_wrapper(mi, abi, codeinst->rettype, from_abi.rt, targetspec, from_abi.nargs, -1, from_abi.is_opaque_closure, gf_thunk_name, M, params);
+    }
+    else {
+        if (from_abi.specsig)
+            emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, params,
+                    target, mi->specTypes, codeinst->rettype, nullptr, nullptr);
+        else
+            emit_fptr1_wrapper(M, gf_thunk_name, target, nullptr, from_abi.rt, codeinst->rettype, params);
     }
-    case jl_returninfo_t::Ghosts: {
-        Value *gf_retval = compute_tindex_unboxed(ctx, gf_retbox, rettype);
-        ctx.builder.CreateRet(gf_retval);
-        break;
+    return gf_thunk_name;
+}
+
+std::string emit_abi_dispatcher(Module *M, jl_codegen_params_t &params, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *invoke)
+{
+    // this builds a method that calls a method with the same arguments but a different specsig
+    // build a specsig -> args1 (apply_generic) or invoke (emit_tojlinvoke) call
+    // build a args1 -> args1 call (emit_fptr1_wrapper)
+    // build a args1 -> invoke call (emit_tojlinvoke)
+    Value *target;
+    if (!codeinst)
+        target = prepare_call_in(M, jlapplygeneric_func);
+    else
+        target = emit_tojlinvoke(codeinst, invoke, M, params); // TODO: inline this call?
+    std::string gf_thunk_name;
+    if (codeinst)
+        raw_string_ostream(gf_thunk_name) << "jfptr_" << name_from_method_instance(jl_get_ci_mi(codeinst)) << "_";
+    else
+        raw_string_ostream(gf_thunk_name) << "j_";
+    raw_string_ostream(gf_thunk_name) << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1) << "_gfthunk";
+    if (from_abi.specsig)
+        emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, params,
+                target, from_abi.sigt, codeinst ? codeinst->rettype : (jl_value_t*)jl_any_type, nullptr, nullptr);
+    else
+        emit_fptr1_wrapper(M, gf_thunk_name, target, nullptr, from_abi.rt, codeinst ? codeinst->rettype : (jl_value_t*)jl_any_type, params);
+    return gf_thunk_name;
+}
+
+std::string emit_abi_constreturn(Module *M, jl_codegen_params_t &params, jl_abi_t from_abi, jl_value_t *rettype_const)
+{
+    std::string gf_thunk_name;
+    raw_string_ostream(gf_thunk_name) << "jconst_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+    if (from_abi.specsig) {
+        emit_specsig_to_specsig(M, gf_thunk_name, from_abi.sigt, from_abi.rt, from_abi.is_opaque_closure, from_abi.nargs, params,
+                nullptr, from_abi.sigt, jl_typeof(rettype_const), nullptr, rettype_const);
     }
+    else {
+        emit_fptr1_wrapper(M, gf_thunk_name, nullptr, rettype_const, from_abi.rt, jl_typeof(rettype_const), params);
     }
+    return gf_thunk_name;
 }
 
-static void emit_cfunc_invalidate(
-        Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
-        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
-        size_t nargs,
-        jl_codegen_params_t &params)
+std::string emit_abi_constreturn(Module *M, jl_codegen_params_t &params, bool specsig, jl_code_instance_t *codeinst)
+{
+    jl_value_t *sigt = get_ci_abi(codeinst);
+    jl_value_t *rt = codeinst->rettype;
+
+    jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+
+    size_t nargs = specsig ? jl_nparams(sigt) : 0;
+    jl_abi_t abi = {sigt, rt, nargs, specsig, is_opaque_closure};
+
+    return emit_abi_constreturn(M, params, abi, codeinst->rettype_const);
+}
+
+// release jl_world_counter
+// store theFptr
+// release last_world_v
+//
+// acquire last_world_v
+// read theFptr
+// acquire jl_world_counter
+// if (last_world_v != jl_world_counter)
+//   fptr = compute_new_fptr(&last_world_v)
+// return fptr()
+static jl_cgval_t emit_abi_call(jl_codectx_t &ctx, jl_value_t *declrt, jl_value_t *sigt, ArrayRef<jl_cgval_t> inputargs, size_t nargs, Value *world_age_field)
 {
-    emit_cfunc_invalidate(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, params,
-        prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func));
+    jl_cgval_t retval;
+    if (sigt) {
+        jl_temporary_root(ctx, declrt);
+        jl_temporary_root(ctx, sigt);
+        assert(nargs == jl_nparams(sigt));
+        bool needsparams = false;
+        bool is_opaque_closure = false;
+        bool specsig = uses_specsig(sigt, needsparams, declrt, ctx.params->prefer_specsig);
+        PointerType *T_ptr = ctx.types().T_ptr;
+        Type *T_size = ctx.types().T_size;
+        Constant *Vnull = ConstantPointerNull::get(T_ptr);
+        Module *M = jl_Module;
+        ArrayType *T_cfuncdata = ArrayType::get(T_ptr, 8);
+        size_t flags = specsig;
+        GlobalVariable *cfuncdata = new GlobalVariable(*M, T_cfuncdata, false,
+                GlobalVariable::PrivateLinkage,
+                ConstantArray::get(T_cfuncdata, {
+                    Vnull,
+                    Vnull,
+                    Vnull,
+                    Vnull,
+                    Vnull,
+                    literal_pointer_val_slot(ctx.emission_context, M, declrt),
+                    literal_pointer_val_slot(ctx.emission_context, M, sigt),
+                    literal_static_pointer_val((void*)flags, T_ptr)}));
+        Value *last_world_p = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, cfuncdata, 1);
+        LoadInst *last_world_v = ctx.builder.CreateAlignedLoad(T_size, last_world_p, ctx.types().alignof_ptr);
+        last_world_v->setOrdering(AtomicOrdering::Acquire);
+        LoadInst *callee = ctx.builder.CreateAlignedLoad(T_ptr, cfuncdata, ctx.types().alignof_ptr);
+        callee->setOrdering(AtomicOrdering::Monotonic);
+        LoadInst *world_v = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+            prepare_global_in(M, jlgetworld_global), ctx.types().alignof_ptr);
+        world_v->setOrdering(AtomicOrdering::Acquire);
+        ctx.builder.CreateStore(world_v, world_age_field);
+        Value *age_not_ok = ctx.builder.CreateICmpNE(last_world_v, world_v);
+        Value *target = emit_guarded_test(ctx, age_not_ok, callee, [&] {
+                Function *getcaller = prepare_call(jlgetabiconverter_func);
+                CallInst *cw = ctx.builder.CreateCall(getcaller, {get_current_task(ctx), cfuncdata});
+                cw->setAttributes(getcaller->getAttributes());
+                return cw;
+            });
+        jl_abi_t cfuncabi = {sigt, declrt, nargs, specsig, is_opaque_closure};
+        ctx.emission_context.cfuncs.push_back({cfuncabi, cfuncdata});
+        if (specsig) {
+            // TODO: could we force this to guarantee passing a box for `f` here (since we
+            // know we had it here) and on the receiver end (emit_abi_converter /
+            // emit_abi_dispatcher), force it to know that it can simply use this pointer
+            // instead of re-boxing it if it needs to the boxed copy of it. This comes up
+            // very rarely since usually the ABI calls are concrete and match exactly and
+            // aren't closures, but sometimes there are cases like that because of
+            // `::Function` de-specialization heuristics, such as for the `Returns` callable
+            // given that it is `@nospecialize`.
+            jl_returninfo_t targetspec = get_specsig_function(ctx.emission_context, M, target, "", sigt, declrt, is_opaque_closure);
+            retval = emit_call_specfun_other(ctx, is_opaque_closure, sigt, declrt, targetspec, inputargs, nargs);
+        }
+        else {
+            retval = mark_julia_type(ctx, emit_jlcall(ctx, target, nullptr, inputargs, nargs, julia_call), true, declrt);
+        }
+    }
+    else {
+        // emit a dispatch
+        Value *ret = emit_jlcall(ctx, jlapplygeneric_func, NULL, inputargs, nargs, julia_call);
+        retval = mark_julia_type(ctx, ret, true, jl_any_type);
+        // inline a call to typeassert here
+        emit_typecheck(ctx, retval, declrt, "cfunction");
+        retval = update_julia_type(ctx, retval, declrt);
+    }
+    return retval;
 }
 
-#include <iostream>
-static Function* gen_cfun_wrapper(
+static Function *gen_cfun_wrapper(
     Module *into, jl_codegen_params_t &params,
     const function_sig_t &sig, jl_value_t *ff, const char *aliasname,
-    jl_value_t *declrt, jl_method_instance_t *lam,
+    jl_value_t *declrt, jl_value_t *sigt,
     jl_unionall_t *unionall_env, jl_svec_t *sparam_vals, jl_array_t **closure_types)
 {
     ++GeneratedCFuncWrappers;
     // Generate a c-callable wrapper
     assert(into);
     size_t nargs = sig.nccallargs;
-    const char *name = "cfunction";
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    jl_code_instance_t *codeinst = NULL;
+    const char *name = aliasname ? aliasname : "cfunction";
     bool nest = (!ff || unionall_env);
-    jl_value_t *astrt = (jl_value_t*)jl_any_type;
-    void *callptr = NULL;
-    int calltype = 0;
-    if (aliasname)
-        name = aliasname;
-    else if (lam)
-        name = jl_symbol_name(lam->def.method->name);
-    if (lam && params.cache) {
-        // TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
-        codeinst = jl_compile_method_internal(lam, world);
-        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
-        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-        assert(invoke);
-        if (fptr) {
-            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
-                jl_cpu_pause();
-            }
-            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-        }
-        // WARNING: this invoke load is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-        if (invoke == jl_fptr_args_addr) {
-            callptr = fptr;
-            calltype = 1;
-        }
-        else if (invoke == jl_fptr_const_return_addr) {
-            // don't need the fptr
-            callptr = (void*)codeinst->rettype_const;
-            calltype = 2;
-        }
-        else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
-            callptr = fptr;
-            calltype = 3;
-        }
-        astrt = codeinst->rettype;
-        if (astrt != (jl_value_t*)jl_bottom_type &&
-            jl_type_intersection(astrt, declrt) == jl_bottom_type) {
-            // Do not warn if the function never returns since it is
-            // occasionally required by the C API (typically error callbacks)
-            // even though we're likely to encounter memory errors in that case
-            jl_printf(JL_STDERR, "WARNING: cfunction: return type of %s does not match\n", name);
-        }
-    }
 
     std::string funcName;
-    raw_string_ostream(funcName) << "jlcapi_" << name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+    raw_string_ostream(funcName) << "jlcapi_" << name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
 
     Module *M = into; // Safe because ctx lock is held by params
     AttributeList attributes = sig.attributes;
@@ -6080,13 +7261,13 @@ static Function* gen_cfun_wrapper(
     if (nest) {
         // add nest parameter (pointer to jl_value_t* data array) after sret arg
         assert(closure_types);
-        std::vector<Type*> fargt_sig(sig.fargt_sig);
+        SmallVector<Type*, 0> fargt_sig(sig.fargt_sig.begin(), sig.fargt_sig.end());
 
         fargt_sig.insert(fargt_sig.begin() + sig.sret, JuliaType::get_pprjlvalue_ty(M->getContext()));
 
         // Shift LLVM attributes for parameters one to the right, as
         // we are adding the extra nest parameter after sret arg.
-        std::vector<std::pair<unsigned, AttributeSet>> newAttributes;
+        SmallVector<std::pair<unsigned, AttributeSet>, 0> newAttributes;
         newAttributes.reserve(attributes.getNumAttrSets() + 1);
         auto it = *attributes.indexes().begin();
         const auto it_end = *attributes.indexes().end();
@@ -6133,12 +7314,11 @@ static Function* gen_cfun_wrapper(
     Function *cw = Function::Create(functype,
             GlobalVariable::ExternalLinkage,
             funcName, M);
-    jl_init_function(cw, params.TargetTriple);
+    jl_init_function(cw, params);
     cw->setAttributes(AttributeList::get(M->getContext(), {attributes, cw->getAttributes()}));
 
-    jl_codectx_t ctx(M->getContext(), params);
+    jl_codectx_t ctx(M->getContext(), params, 0, 0);
     ctx.f = cw;
-    ctx.world = world;
     ctx.name = name;
     ctx.funcName = name;
 
@@ -6148,33 +7328,16 @@ static Function* gen_cfun_wrapper(
     ctx.builder.SetCurrentDebugLocation(noDbg);
     allocate_gc_frame(ctx, b0, true);
 
-    Value *world_age_field = get_last_age_field(ctx);
+    auto world_age_field = get_tls_world_age_field(ctx);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
-    Value *last_age = ai.decorateInst(
+    ctx.world_age_at_entry = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
 
-    Value *world_v = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
-        prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
-    cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Acquire);
-
-    Value *age_ok = NULL;
-    if (calltype) {
-        LoadInst *lam_max = ctx.builder.CreateAlignedLoad(
-                ctx.types().T_size,
-                ctx.builder.CreateConstInBoundsGEP1_32(
-                    ctx.types().T_size,
-                    emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), ctx.types().T_size->getPointerTo()),
-                    offsetof(jl_code_instance_t, max_world) / ctx.types().sizeof_ptr),
-                ctx.types().alignof_ptr);
-        age_ok = ctx.builder.CreateICmpUGE(lam_max, world_v);
-    }
-    ctx.builder.CreateStore(world_v, world_age_field);
-
     // first emit code to record the arguments
     Function::arg_iterator AI = cw->arg_begin();
     Value *sretPtr = sig.sret ? &*AI++ : NULL;
     Value *nestPtr = nest ? &*AI++ : NULL;
-    SmallVector<jl_cgval_t> inputargs(nargs + 1);
+    SmallVector<jl_cgval_t, 0> inputargs(nargs + 1);
     if (ff) {
         // we need to pass the function object even if (even though) it is a singleton
         inputargs[0] = mark_julia_const(ctx, ff);
@@ -6191,7 +7354,7 @@ static Function* gen_cfun_wrapper(
     for (size_t i = 0; i < nargs; ++i, ++AI) {
         // figure out how to unpack this argument type
         Value *val = &*AI;
-        assert(sig.fargt_sig.at(i + sig.sret) == val->getType());
+        assert(sig.fargt_sig[i + sig.sret] == val->getType());
         jl_cgval_t &inputarg = inputargs[i + 1];
         jl_value_t *jargty = jl_svecref(sig.at, i);
         bool aref = jl_is_abstract_ref_type(jargty);
@@ -6217,7 +7380,7 @@ static Function* gen_cfun_wrapper(
         if (aref) {
             if (jargty == (jl_value_t*)jl_any_type) {
                 inputarg = mark_julia_type(ctx,
-                        ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, emit_bitcast(ctx, val, ctx.types().T_pprjlvalue), Align(sizeof(void*))),
+                        ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, val, Align(sizeof(void*))),
                         true, jl_any_type);
             }
             else if (static_at && jl_is_concrete_immutable(jargty)) { // anything that could be stored unboxed
@@ -6229,14 +7392,13 @@ static Function* gen_cfun_wrapper(
                     inputarg = ghostValue(ctx, jargty);
                 }
                 else {
-                    val = emit_bitcast(ctx, val, T->getPointerTo());
                     val = ctx.builder.CreateAlignedLoad(T, val, Align(1)); // make no alignment assumption about pointer from C
                     inputarg = mark_julia_type(ctx, val, false, jargty);
                 }
             }
-            else if (static_at || (!jl_is_typevar(jargty) && !jl_is_immutable_datatype(jargty))) {
-                // must be a jl_value_t* (because it's mutable or contains gc roots)
-                inputarg = mark_julia_type(ctx, maybe_decay_untracked(ctx, emit_bitcast(ctx, val, ctx.types().T_prjlvalue)), true, jargty_proper);
+            else if (static_at || (!jl_is_typevar(jargty) && (!jl_is_datatype(jargty) || jl_is_abstracttype(jargty) || jl_is_mutable_datatype(jargty)))) {
+                // must be a jl_value_t* (because it is mutable or abstract)
+                inputarg = mark_julia_type(ctx, maybe_decay_untracked(ctx, val), true, jargty_proper);
             }
             else {
                 // allocate val into a new box, if it might not be boxed
@@ -6246,41 +7408,45 @@ static Function* gen_cfun_wrapper(
                     *closure_types = jl_alloc_vec_any(0);
                 jl_array_ptr_1d_push(*closure_types, jargty);
                 Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                        ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
+                        emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr),
                         Align(sizeof(void*)));
                 BasicBlock *boxedBB = BasicBlock::Create(ctx.builder.getContext(), "isboxed", cw);
-                BasicBlock *loadBB = BasicBlock::Create(ctx.builder.getContext(), "need-load", cw);
+                BasicBlock *notanyBB = BasicBlock::Create(ctx.builder.getContext(), "not-any", cw);
                 BasicBlock *unboxedBB = BasicBlock::Create(ctx.builder.getContext(), "maybe-unboxed", cw);
                 BasicBlock *isanyBB = BasicBlock::Create(ctx.builder.getContext(), "any", cw);
                 BasicBlock *afterBB = BasicBlock::Create(ctx.builder.getContext(), "after", cw);
-                Value *isrtboxed = ctx.builder.CreateIsNull(val); // XXX: this is the wrong condition and should be inspecting runtime_dt intead
-                ctx.builder.CreateCondBr(isrtboxed, boxedBB, loadBB);
-                ctx.builder.SetInsertPoint(boxedBB);
-                Value *p1 = ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue);
-                p1 = track_pjlvalue(ctx, p1);
-                ctx.builder.CreateBr(afterBB);
-                ctx.builder.SetInsertPoint(loadBB);
                 Value *isrtany = ctx.builder.CreateICmpEQ(
-                        literal_pointer_val(ctx, (jl_value_t*)jl_any_type),
-                        ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue));
-                ctx.builder.CreateCondBr(isrtany, isanyBB, unboxedBB);
+                        track_pjlvalue(ctx,literal_pointer_val(ctx, (jl_value_t*)jl_any_type)), runtime_dt);
+                ctx.builder.CreateCondBr(isrtany, isanyBB, notanyBB);
                 ctx.builder.SetInsertPoint(isanyBB);
-                Value *p2 = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateBitCast(val, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
+                Value *p1 = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, val, Align(sizeof(void*)));
+                ctx.builder.CreateBr(afterBB);
+                isanyBB = ctx.builder.GetInsertBlock(); // could have changed
+                ctx.builder.SetInsertPoint(notanyBB);
+                jl_cgval_t runtime_dt_val = mark_julia_type(ctx, runtime_dt, true, jl_any_type);
+                Value *isrtboxed = // (!jl_is_datatype(runtime_dt) || !jl_is_concrete_datatype(runtime_dt) || jl_is_mutable_datatype(runtime_dt))
+                    emit_guarded_test(ctx, emit_exactly_isa(ctx, runtime_dt_val, jl_datatype_type), true, [&] {
+                            return ctx.builder.CreateOr(ctx.builder.CreateNot(emit_isconcrete(ctx, runtime_dt)), emit_datatype_mutabl(ctx, runtime_dt));
+                    });
+                ctx.builder.CreateCondBr(isrtboxed, boxedBB, unboxedBB);
+                ctx.builder.SetInsertPoint(boxedBB);
+                Value *p2 = track_pjlvalue(ctx, val);
                 ctx.builder.CreateBr(afterBB);
+                boxedBB = ctx.builder.GetInsertBlock(); // could have changed
                 ctx.builder.SetInsertPoint(unboxedBB);
                 Value *p3 = emit_new_bits(ctx, runtime_dt, val);
                 unboxedBB = ctx.builder.GetInsertBlock(); // could have changed
                 ctx.builder.CreateBr(afterBB);
                 ctx.builder.SetInsertPoint(afterBB);
                 PHINode *p = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 3);
-                p->addIncoming(p1, boxedBB);
-                p->addIncoming(p2, isanyBB);
+                p->addIncoming(p1, isanyBB);
+                p->addIncoming(p2, boxedBB);
                 p->addIncoming(p3, unboxedBB);
                 inputarg = mark_julia_type(ctx, p, true, jargty_proper);
             }
         }
         else {
-            bool argboxed = sig.fargt_isboxed.at(i);
+            bool argboxed = sig.fargt_isboxed[i];
             if (argboxed) {
                 // a jl_value_t*, even when represented as a struct
                 inputarg = mark_julia_type(ctx, val, true, jargty_proper);
@@ -6289,7 +7455,7 @@ static Function* gen_cfun_wrapper(
                 // something of type T
                 // undo whatever we might have done to this poor argument
                 assert(jl_is_datatype(jargty));
-                if (sig.byRefList.at(i)) {
+                if (sig.byRefList[i]) {
                     val = ctx.builder.CreateAlignedLoad(sig.fargt[i], val, Align(1)); // unknown alignment from C
                 }
                 else {
@@ -6313,7 +7479,7 @@ static Function* gen_cfun_wrapper(
                         *closure_types = jl_alloc_vec_any(0);
                     jl_array_ptr_1d_push(*closure_types, jargty);
                     Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                            ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
+                            emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr),
                             Align(sizeof(void*)));
                     Value *strct = box_ccall_result(ctx, val, runtime_dt, jargty);
                     inputarg = mark_julia_type(ctx, strct, true, jargty_proper);
@@ -6325,187 +7491,8 @@ static Function* gen_cfun_wrapper(
     assert(AI == cw->arg_end());
 
     // Create the call
-    bool jlfunc_sret;
-    jl_cgval_t retval;
-    if (calltype == 2) {
-        nargs = 0; // arguments not needed -- TODO: not really true, should emit an age_ok test and jlcall
-        jlfunc_sret = false;
-        retval = mark_julia_const(ctx, (jl_value_t*)callptr);
-    }
-    else if (calltype == 0 || calltype == 1) {
-        // emit a jlcall
-        jlfunc_sret = false;
-        Function *theFptr = NULL;
-        if (calltype == 1) {
-            StringRef fname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
-            theFptr = cast_or_null<Function>(jl_Module->getNamedValue(fname));
-            if (!theFptr) {
-                theFptr = Function::Create(ctx.types().T_jlfunc, GlobalVariable::ExternalLinkage,
-                                           fname, jl_Module);
-                jl_init_function(theFptr, ctx.emission_context.TargetTriple);
-                jl_name_jlfunc_args(ctx.emission_context, theFptr);
-                addRetAttr(theFptr, Attribute::NonNull);
-            }
-            else {
-                assert(theFptr->getFunctionType() == ctx.types().T_jlfunc);
-            }
-        }
-        BasicBlock *b_generic, *b_jlcall, *b_after;
-        Value *ret_jlcall;
-        if (age_ok) {
-            assert(theFptr);
-            b_generic = BasicBlock::Create(ctx.builder.getContext(), "generic", cw);
-            b_jlcall = BasicBlock::Create(ctx.builder.getContext(), "apply", cw);
-            b_after = BasicBlock::Create(ctx.builder.getContext(), "after", cw);
-            ctx.builder.CreateCondBr(age_ok, b_jlcall, b_generic);
-            ctx.builder.SetInsertPoint(b_jlcall);
-            // for jlcall, we need to pass the function object even if it is a ghost.
-            Value *theF = boxed(ctx, inputargs[0]);
-            assert(theF);
-            ret_jlcall = emit_jlcall(ctx, theFptr, theF, &inputargs[1], nargs, julia_call);
-            ctx.builder.CreateBr(b_after);
-            ctx.builder.SetInsertPoint(b_generic);
-        }
-        Value *ret = emit_jlcall(ctx, jlapplygeneric_func, NULL, inputargs.data(), nargs + 1, julia_call);
-        if (age_ok) {
-            ctx.builder.CreateBr(b_after);
-            ctx.builder.SetInsertPoint(b_after);
-            PHINode *retphi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
-            retphi->addIncoming(ret_jlcall, b_jlcall);
-            retphi->addIncoming(ret, b_generic);
-            ret = retphi;
-        }
-        retval = mark_julia_type(ctx, ret, true, astrt);
-    }
-    else {
-        bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
-        assert(calltype == 3);
-        // emit a specsig call
-        bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-        StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
-        jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg);
-        FunctionType *cft = returninfo.decl.getFunctionType();
-        jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
-
-        // TODO: Can use use emit_call_specfun_other here?
-        std::vector<Value*> args;
-        Value *result;
-        if (jlfunc_sret || returninfo.cc == jl_returninfo_t::Union) {
-            // fuse the two sret together, or emit an alloca to hold it
-            if (sig.sret && jlfunc_sret) {
-                result = emit_bitcast(ctx, sretPtr, cft->getParamType(0));
-            }
-            else {
-                if (jlfunc_sret) {
-                    result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
-                    setName(ctx.emission_context, result, "sret");
-                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-                } else {
-                    result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes));
-                    setName(ctx.emission_context, result, "result_union");
-                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-                }
-            }
-            args.push_back(result);
-        }
-        if (returninfo.return_roots) {
-            AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots));
-            setName(ctx.emission_context, return_roots, "return_roots");
-            args.push_back(return_roots);
-        }
-        if (gcstack_arg)
-            args.push_back(ctx.pgcstack);
-        for (size_t i = 0; i < nargs + 1; i++) {
-            // figure out how to repack the arguments
-            jl_cgval_t &inputarg = inputargs[i];
-            Value *arg;
-            jl_value_t *spect = (i == 0 && is_opaque_closure) ? (jl_value_t*)jl_any_type :
-                jl_nth_slot_type(lam->specTypes, i);
-            // n.b. specTypes is required to be a datatype by construction for specsig
-            bool isboxed = deserves_argbox(spect);
-            Type *T = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, spect);
-            if (is_uniquerep_Type(spect)) {
-                continue;
-            }
-            else if (isboxed) {
-                arg = boxed(ctx, inputarg);
-            }
-            else if (type_is_ghost(T)) {
-                continue; // ghost types are skipped by the specsig method signature
-            }
-            else if (T->isAggregateType()) {
-                // aggregate types are passed by pointer
-                inputarg = value_to_pointer(ctx, inputarg);
-                arg = maybe_bitcast(ctx, decay_derived(ctx, data_pointer(ctx, inputarg)),
-                    T->getPointerTo());
-            }
-            else {
-                arg = emit_unbox(ctx, T, inputarg, spect);
-                assert(!isa<UndefValue>(arg));
-            }
-
-            // add to argument list
-            args.push_back(arg);
-        }
-        Value *theFptr = returninfo.decl.getCallee();
-        assert(theFptr);
-        if (age_ok) {
-            funcName += "_gfthunk";
-            Function *gf_thunk = Function::Create(returninfo.decl.getFunctionType(),
-                    GlobalVariable::InternalLinkage, funcName, M);
-            jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
-            gf_thunk->setAttributes(AttributeList::get(M->getContext(), {returninfo.attrs, gf_thunk->getAttributes()}));
-            // build a  specsig -> jl_apply_generic converter thunk
-            // this builds a method that calls jl_apply_generic (as a closure over a singleton function pointer),
-            // but which has the signature of a specsig
-            emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context);
-            theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk);
-        }
-
-        assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType()));
-        CallInst *call = ctx.builder.CreateCall(
-            returninfo.decl.getFunctionType(),
-            theFptr, ArrayRef<Value*>(args));
-        call->setAttributes(returninfo.attrs);
-        if (gcstack_arg)
-            call->setCallingConv(CallingConv::Swift);
-
-        switch (returninfo.cc) {
-            case jl_returninfo_t::Boxed:
-                retval = mark_julia_type(ctx, call, true, astrt);
-                break;
-            case jl_returninfo_t::Register:
-                retval = mark_julia_type(ctx, call, false, astrt);
-                break;
-            case jl_returninfo_t::SRet:
-                retval = mark_julia_slot(result, astrt, NULL, ctx.tbaa().tbaa_stack);
-                break;
-            case jl_returninfo_t::Union: {
-                Value *box = ctx.builder.CreateExtractValue(call, 0);
-                Value *tindex = ctx.builder.CreateExtractValue(call, 1);
-                Value *derived = ctx.builder.CreateSelect(
-                    ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
-                            ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
-                    decay_derived(ctx, ctx.builder.CreateBitCast(result, ctx.types().T_pjlvalue)),
-                    decay_derived(ctx, box));
-                retval = mark_julia_slot(derived,
-                                         astrt,
-                                         tindex,
-                                         ctx.tbaa().tbaa_stack);
-                assert(box->getType() == ctx.types().T_prjlvalue);
-                retval.Vboxed = box;
-                break;
-            }
-            case jl_returninfo_t::Ghosts:
-                retval = mark_julia_slot(NULL, astrt, call, ctx.tbaa().tbaa_stack);
-                break;
-        }
-    }
-
-    // inline a call to typeassert here, if required
-    emit_typecheck(ctx, retval, declrt, "cfunction");
-    retval = update_julia_type(ctx, retval, declrt);
+    jl_cgval_t retval = emit_abi_call(ctx, declrt, sigt, inputargs, nargs + 1, world_age_field);
+    bool jlfunc_sret = retval.V && isa<AllocaInst>(retval.V) && !retval.TIndex && retval.inline_roots.empty();
 
     // Prepare the return value
     Value *r;
@@ -6515,7 +7502,12 @@ static Function* gen_cfun_wrapper(
         r = boxed(ctx, retval);
     }
     else if (sig.sret && jlfunc_sret) {
-        // nothing to do
+        // fuse the two sret together
+        assert(retval.ispointer());
+        AllocaInst *result = cast<AllocaInst>(retval.V);
+        retval.V = sretPtr;
+        result->replaceAllUsesWith(sretPtr);
+        result->eraseFromParent();
         r = NULL;
     }
     else if (!type_is_ghost(sig.lrt)) {
@@ -6532,24 +7524,19 @@ static Function* gen_cfun_wrapper(
         r = NULL;
     }
 
-    ctx.builder.CreateStore(last_age, world_age_field);
+    ctx.builder.CreateStore(ctx.world_age_at_entry, world_age_field);
     ctx.builder.CreateRet(r);
 
     ctx.builder.SetCurrentDebugLocation(noDbg);
     ctx.builder.ClearInsertionPoint();
 
-    if (aliasname) {
-        GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
-                            GlobalValue::ExternalLinkage, aliasname, cw, M);
-    }
-
     if (nest) {
         funcName += "make";
         Function *cw_make = Function::Create(
-                FunctionType::get(getInt8PtrTy(ctx.builder.getContext()), { getInt8PtrTy(ctx.builder.getContext()), ctx.types().T_ppjlvalue }, false),
+                FunctionType::get(getPointerTy(ctx.builder.getContext()), { getPointerTy(ctx.builder.getContext()), ctx.types().T_ppjlvalue }, false),
                 GlobalVariable::ExternalLinkage,
                 funcName, M);
-        jl_init_function(cw_make, ctx.emission_context.TargetTriple);
+        jl_init_function(cw_make, ctx.emission_context);
         cw_make->getArg(0)->setName("wrapper");
         cw_make->getArg(1)->setName("newval");
         BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", cw_make);
@@ -6557,12 +7544,20 @@ static Function* gen_cfun_wrapper(
         Function::arg_iterator AI = cw_make->arg_begin();
         Argument *Tramp = &*AI; ++AI;
         Argument *NVal = &*AI; ++AI;
+#if JL_LLVM_VERSION >= 200000
+        Function *init_trampoline = Intrinsic::getOrInsertDeclaration(cw_make->getParent(), Intrinsic::init_trampoline);
+#else
         Function *init_trampoline = Intrinsic::getDeclaration(cw_make->getParent(), Intrinsic::init_trampoline);
+#endif
+#if JL_LLVM_VERSION >= 200000
+        Function *adjust_trampoline = Intrinsic::getOrInsertDeclaration(cw_make->getParent(), Intrinsic::adjust_trampoline);
+#else
         Function *adjust_trampoline = Intrinsic::getDeclaration(cw_make->getParent(), Intrinsic::adjust_trampoline);
+#endif
         cwbuilder.CreateCall(init_trampoline, {
                 Tramp,
-                cwbuilder.CreateBitCast(cw, getInt8PtrTy(ctx.builder.getContext())),
-                cwbuilder.CreateBitCast(NVal, getInt8PtrTy(ctx.builder.getContext()))
+                cw,
+                NVal,
             });
         cwbuilder.CreateRet(cwbuilder.CreateCall(adjust_trampoline, { Tramp }));
         cw = cw_make;
@@ -6571,10 +7566,25 @@ static Function* gen_cfun_wrapper(
     return cw;
 }
 
+static const char *derive_sigt_name(jl_value_t *jargty)
+{
+    jl_datatype_t *dt = (jl_datatype_t*)jl_argument_datatype(jargty);
+    if ((jl_value_t*)dt == jl_nothing)
+        return NULL;
+    jl_sym_t *name = dt->name->singletonname;
+    if (jl_is_type_type((jl_value_t*)dt)) {
+        dt = (jl_datatype_t*)jl_argument_datatype(jl_tparam0(dt));
+        if ((jl_value_t*)dt != jl_nothing) {
+            name = dt->name->singletonname;
+        }
+    }
+    return jl_symbol_name(name);
+}
+
 // Get the LLVM Function* for the C-callable entry point for a certain function
 // and argument types.
 // here argt does not include the leading function type argument
-static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, const jl_cgval_t &fexpr_rt, jl_value_t *declrt, jl_svec_t *argt)
+static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, const jl_cgval_t &fexpr_val, jl_value_t *declrt, jl_svec_t *argt)
 {
     jl_unionall_t *unionall_env = (jl_is_method(ctx.linfo->def.method) && jl_is_unionall(ctx.linfo->def.method->sig))
         ? (jl_unionall_t*)ctx.linfo->def.method->sig
@@ -6619,9 +7629,9 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         return jl_cgval_t();
     }
     if (rt != declrt && rt != (jl_value_t*)jl_any_type)
-        rt = jl_ensure_rooted(ctx, rt);
+        jl_temporary_root(ctx, rt);
 
-    function_sig_t sig("cfunction", lrt, rt, retboxed, argt, unionall_env, false, CallingConv::C, false, &ctx.emission_context);
+    function_sig_t sig("cfunction", lrt, rt, retboxed, false, argt, unionall_env, false, CallingConv::C, false, &ctx.emission_context);
     assert(sig.fargt.size() + sig.sret == sig.fargt_sig.size());
     if (!sig.err_msg.empty()) {
         emit_error(ctx, sig.err_msg);
@@ -6632,8 +7642,8 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
     // compute+verify the dispatch signature, and see if it depends on the environment sparams
     bool approx = false;
     sigt = (jl_value_t*)jl_alloc_svec(nargt + 1);
-    jl_svecset(sigt, 0, fexpr_rt.typ);
-    if (!fexpr_rt.constant && (!jl_is_concrete_type(fexpr_rt.typ) || jl_is_kind(fexpr_rt.typ)))
+    jl_svecset(sigt, 0, fexpr_val.typ);
+    if (!fexpr_val.constant && (!jl_is_concrete_type(fexpr_val.typ) || jl_is_kind(fexpr_val.typ)))
         approx = true;
     for (size_t i = 0; i < nargt; i++) {
         jl_value_t *jargty = jl_svecref(argt, i);
@@ -6656,43 +7666,42 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         sigt = NULL;
     }
     else {
-        sigt = jl_apply_tuple_type((jl_svec_t*)sigt);
+        sigt = jl_apply_tuple_type((jl_svec_t*)sigt, 1);
     }
     if (sigt && !(unionall_env && jl_has_typevar_from_unionall(rt, unionall_env))) {
         unionall_env = NULL;
     }
 
-    bool nest = (!fexpr_rt.constant || unionall_env);
+    bool nest = (!fexpr_val.constant || unionall_env);
     if (ctx.emission_context.TargetTriple.isAArch64() || ctx.emission_context.TargetTriple.isARM() || ctx.emission_context.TargetTriple.isPPC64()) {
         if (nest) {
             emit_error(ctx, "cfunction: closures are not supported on this platform");
+            JL_GC_POP();
             return jl_cgval_t();
         }
     }
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    size_t min_valid = 0;
-    size_t max_valid = ~(size_t)0;
-    // try to look up this function for direct invoking
-    jl_method_instance_t *lam = sigt ? jl_get_specialization1((jl_tupletype_t*)sigt, world, &min_valid, &max_valid, 0) : NULL;
+    const char *name = derive_sigt_name(fexpr_val.typ);
     Value *F = gen_cfun_wrapper(
             jl_Module, ctx.emission_context,
-            sig, fexpr_rt.constant, NULL,
-            declrt, lam,
+            sig, fexpr_val.constant, name,
+            declrt, sigt,
             unionall_env, sparam_vals, &closure_types);
     bool outboxed;
     if (nest) {
         // F is actually an init_trampoline function that returns the real address
         // Now fill in the nest parameters
-        Value *fobj = boxed(ctx, fexpr_rt);
+        Value *fobj = boxed(ctx, fexpr_val);
         jl_svec_t *fill = jl_emptysvec;
         if (closure_types) {
             assert(ctx.spvals_ptr);
-            size_t n = jl_array_len(closure_types);
-            jl_svec_t *fill = jl_alloc_svec_uninit(n);
+            size_t n = jl_array_nrows(closure_types);
+            fill = jl_alloc_svec_uninit(n);
             for (size_t i = 0; i < n; i++) {
                 jl_svecset(fill, i, jl_array_ptr_ref(closure_types, i));
             }
-            fill = (jl_svec_t*)jl_ensure_rooted(ctx, (jl_value_t*)fill);
+            JL_GC_PUSH1(&fill);
+            jl_temporary_root(ctx, (jl_value_t*)fill);
+            JL_GC_POP();
         }
         Type *T_htable = ArrayType::get(ctx.types().T_size, sizeof(htable_t) / sizeof(void*));
         Value *cache = new GlobalVariable(*jl_Module, T_htable, false,
@@ -6701,11 +7710,11 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         F = ctx.builder.CreateCall(prepare_call(jlgetcfunctiontrampoline_func), {
                  fobj,
                  literal_pointer_val(ctx, output_type),
-                 ctx.builder.CreateBitCast(cache, getInt8PtrTy(ctx.builder.getContext())),
+                 cache,
                  literal_pointer_val(ctx, (jl_value_t*)fill),
                  F,
                  closure_types ? literal_pointer_val(ctx, (jl_value_t*)unionall_env) : Constant::getNullValue(ctx.types().T_pjlvalue),
-                 closure_types ? ctx.spvals_ptr : ConstantPointerNull::get(cast<PointerType>(ctx.types().T_pprjlvalue))
+                 closure_types ? decay_derived(ctx, ctx.spvals_ptr) : ConstantPointerNull::get(ctx.builder.getPtrTy(AddressSpace::Derived))
              });
         outboxed = true;
     }
@@ -6714,14 +7723,14 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         outboxed = (output_type != (jl_value_t*)jl_voidpointer_type);
         if (outboxed) {
             assert(jl_datatype_size(output_type) == sizeof(void*) * 4);
-            Value *strct = emit_allocobj(ctx, (jl_datatype_t*)output_type);
+            Value *strct = emit_allocobj(ctx, (jl_datatype_t*)output_type, true);
             setName(ctx.emission_context, strct, "cfun_result");
-            Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), ctx.types().T_size->getPointerTo());
+            Value *derived_strct = decay_derived(ctx, strct);
             MDNode *tbaa = best_tbaa(ctx.tbaa(), output_type);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
             ai.decorateInst(ctx.builder.CreateStore(F, derived_strct));
             ai.decorateInst(ctx.builder.CreateStore(
-                ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, fexpr_rt.constant), ctx.types().T_size),
+                ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, fexpr_val.constant), ctx.types().T_size),
                 ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, derived_strct, 1)));
             ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(ctx.types().T_size),
                     ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, derived_strct, 2)));
@@ -6736,13 +7745,14 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
 
 // do codegen to create a C-callable alias/wrapper, or if sysimg_handle is set,
 // restore one from a loaded system image.
-const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params)
+const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params)
 {
     ++GeneratedCCallables;
     jl_datatype_t *ft = (jl_datatype_t*)jl_tparam0(sigt);
+    assert(jl_is_datatype(ft));
     jl_value_t *ff = ft->instance;
     assert(ff);
-    const char *name = jl_symbol_name(ft->name->mt->name);
+    const char *name = !jl_is_string(nameval) ? jl_symbol_name(ft->name->singletonname) : jl_string_data(nameval);
     jl_value_t *crt = declrt;
     if (jl_is_abstract_ref_type(declrt)) {
         declrt = jl_tparam0(declrt);
@@ -6761,23 +7771,15 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi
     }
     jl_value_t *err;
     { // scope block for sig
-        function_sig_t sig("cfunction", lcrt, crt, toboxed,
+        function_sig_t sig("cfunction", lcrt, crt, toboxed, false,
                            argtypes, NULL, false, CallingConv::C, false, &params);
         if (sig.err_msg.empty()) {
-            size_t world = jl_atomic_load_acquire(&jl_world_counter);
-            size_t min_valid = 0;
-            size_t max_valid = ~(size_t)0;
-            if (sysimg_handle) {
-                // restore a ccallable from the system image
-                void *addr;
-                int found = jl_dlsym(sysimg_handle, name, &addr, 0);
-                if (found)
-                    add_named_global(name, addr);
-            }
-            else {
-                jl_method_instance_t *lam = jl_get_specialization1((jl_tupletype_t*)sigt, world, &min_valid, &max_valid, 0);
-                //Safe b/c params holds context lock
-                gen_cfun_wrapper(unwrap(llvmmod)->getModuleUnlocked(), params, sig, ff, name, declrt, lam, NULL, NULL, NULL);
+            //Safe b/c params holds context lock
+            Function *cw = gen_cfun_wrapper(llvmmod, params, sig, ff, name, declrt, sigt, NULL, NULL, NULL);
+            auto alias = GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
+                                GlobalValue::ExternalLinkage, name, cw, llvmmod);
+            if (params.TargetTriple.isOSBinFormatCOFF()) {
+                alias->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
             }
             JL_GC_POP();
             return name;
@@ -6788,12 +7790,13 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi
 }
 
 // generate a julia-callable function that calls f (AKA lam)
-static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, int retarg, StringRef funcName,
+// if is_opaque_closure, then generate the OC invoke, rather than a real invoke
+static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *abi, jl_value_t *jlretty, jl_value_t *declrt, jl_returninfo_t &f, unsigned nargs, int retarg, bool is_opaque_closure, StringRef funcName,
         Module *M, jl_codegen_params_t &params)
 {
     ++GeneratedInvokeWrappers;
     Function *w = Function::Create(get_func_sig(M->getContext()), GlobalVariable::ExternalLinkage, funcName, M);
-    jl_init_function(w, params.TargetTriple);
+    jl_init_function(w, params);
     jl_name_jlfunc_args(params, w);
     w->setAttributes(AttributeList::get(M->getContext(), {get_func_attrs(M->getContext()), w->getAttributes()}));
     w->addFnAttr(Attribute::OptimizeNone);
@@ -6805,11 +7808,10 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     //Value *mfunc = &*AI++; (void)mfunc; // unused
     assert(AI == w->arg_end());
 
-    jl_codectx_t ctx(M->getContext(), params);
+    jl_codectx_t ctx(M->getContext(), params, 0, 0);
     ctx.f = w;
     ctx.linfo = lam;
     ctx.rettype = jlretty;
-    ctx.world = 0;
 
     BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", w);
     ctx.builder.SetInsertPoint(b0);
@@ -6817,136 +7819,69 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     ctx.builder.SetCurrentDebugLocation(noDbg);
     allocate_gc_frame(ctx, b0);
 
-    // TODO: replace this with emit_call_specfun_other?
-    FunctionType *ftype = const_cast<llvm::FunctionCallee&>(f.decl).getFunctionType();
-    size_t nfargs = ftype->getNumParams();
-    SmallVector<Value *> args(nfargs);
-    unsigned idx = 0;
-    AllocaInst *result = NULL;
-    switch (f.cc) {
-    case jl_returninfo_t::Boxed:
-    case jl_returninfo_t::Register:
-    case jl_returninfo_t::Ghosts:
-        break;
-    case jl_returninfo_t::SRet:
-        assert(cast<PointerType>(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType()));
-        result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType());
-        setName(ctx.emission_context, result, "sret");
-        args[idx] = result;
-        idx++;
-        break;
-    case jl_returninfo_t::Union:
-        result = ctx.builder.CreateAlloca(ArrayType::get(getInt8Ty(ctx.builder.getContext()), f.union_bytes));
-        if (f.union_align > 1)
-            result->setAlignment(Align(f.union_align));
-        args[idx] = result;
-        idx++;
-        setName(ctx.emission_context, result, "result_union");
-        break;
-    }
-    if (f.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, f.return_roots));
-        setName(ctx.emission_context, return_roots, "return_roots");
-        args[idx] = return_roots;
-        idx++;
-    }
-    bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-    if (gcstack_arg) {
-        args[idx] = ctx.pgcstack;
-        idx++;
-    }
-    bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
-    for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) {
-        jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type :
-            jl_nth_slot_type(lam->specTypes, i);
-        // n.b. specTypes is required to be a datatype by construction for specsig
-        bool isboxed = deserves_argbox(ty);
-        Type *lty = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, ty);
-        if (type_is_ghost(lty) || is_uniquerep_Type(ty))
+    SmallVector<jl_cgval_t, 0> argv(nargs);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    for (size_t i = 0; i < nargs; ++i) {
+        if (i == 0 && is_opaque_closure) {
+            jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(lam->specTypes, jlretty)
+            argv[i] = mark_julia_slot(funcArg, oc_type, NULL, ctx.tbaa().tbaa_const);
             continue;
+        }
+        jl_value_t *ty = jl_nth_slot_type(abi, i);
         Value *theArg;
         if (i == 0) {
-            // This function adapts from generic jlcall to OC specsig. Generic jlcall pointers
-            // come in as ::Tracked, but specsig expected ::Derived.
-            if (is_opaque_closure)
-                theArg = decay_derived(ctx, funcArg);
-            else
-                theArg = funcArg;
+            theArg = funcArg;
         }
         else {
-            Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr);
             theArg = ai.decorateInst(maybe_mark_load_dereferenceable(
                     ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                     false,
                     ty));
         }
-        if (!isboxed) {
-            theArg = decay_derived(ctx, emit_bitcast(ctx, theArg, PointerType::get(lty, 0)));
-            if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
-                theArg = ctx.builder.CreateAlignedLoad(lty, theArg, Align(julia_alignment(ty)));
-        }
-        assert(dyn_cast<UndefValue>(theArg) == NULL);
-        args[idx] = theArg;
-        idx++;
+        argv[i] = mark_julia_type(ctx, theArg, true, ty);
+    }
+    jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, abi, jlretty, f, argv, nargs);
+    if (declrt != jlretty) {
+        emit_typecheck(ctx, retval, declrt, "cfunction");
+        retval = update_julia_type(ctx, retval, declrt);
     }
-    CallInst *call = ctx.builder.CreateCall(f.decl, args);
-    call->setAttributes(f.attrs);
-    if (gcstack_arg)
-        call->setCallingConv(CallingConv::Swift);
-    jl_cgval_t retval;
     if (retarg != -1) {
         Value *theArg;
         if (retarg == 0)
             theArg = funcArg;
         else
             theArg = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, retarg - 1),
+                    emit_ptrgep(ctx, argArray, (retarg - 1) * ctx.types().sizeof_ptr),
                     Align(sizeof(void*)));
         retval = mark_julia_type(ctx, theArg, true, jl_any_type);
     }
-    else {
-        switch (f.cc) {
-        case jl_returninfo_t::Boxed:
-            retval = mark_julia_type(ctx, call, true, jlretty);
-            break;
-        case jl_returninfo_t::Register:
-            retval = mark_julia_type(ctx, call, false, jlretty);
-            break;
-        case jl_returninfo_t::SRet:
-            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack);
-            break;
-        case jl_returninfo_t::Union:
-            // result is technically not right here, but `boxed` will only look at it
-            // for the unboxed values, so it's ok.
-            retval = mark_julia_slot(result,
-                                     jlretty,
-                                     ctx.builder.CreateExtractValue(call, 1),
-                                     ctx.tbaa().tbaa_stack);
-            retval.Vboxed = ctx.builder.CreateExtractValue(call, 0);
-            assert(retval.Vboxed->getType() == ctx.types().T_prjlvalue);
-            break;
-        case jl_returninfo_t::Ghosts:
-            retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack);
-            break;
-        }
+    if (retval.typ == jl_bottom_type)
+        CreateTrap(ctx.builder, false);
+    else
+        ctx.builder.CreateRet(boxed(ctx, retval));
+    if (ctx.topalloca != ctx.pgcstack && ctx.topalloca->use_empty()) {
+       ctx.topalloca->eraseFromParent();
+       ctx.topalloca = nullptr;
     }
-    ctx.builder.CreateRet(boxed(ctx, retval));
-    return w;
 }
 
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg)
+static jl_returninfo_t get_specsig_function(jl_codegen_params_t &params, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure,
+        ArrayRef<const char*> ArgNames, unsigned nreq)
 {
+    bool gcstack_arg = params.params->gcstack_arg;
     jl_returninfo_t props = {};
-    SmallVector<Type*, 8> fsig;
+    SmallVector<Type*,8> fsig;
+    SmallVector<std::string,4> argnames;
     Type *rt = NULL;
     Type *srt = NULL;
+    Type *T_prjlvalue = PointerType::get(M->getContext(), AddressSpace::Tracked);
     if (jlrettype == (jl_value_t*)jl_bottom_type) {
-        rt = getVoidTy(ctx.builder.getContext());
+        rt = getVoidTy(M->getContext());
         props.cc = jl_returninfo_t::Register;
     }
     else if (jl_is_structtype(jlrettype) && jl_is_datatype_singleton((jl_datatype_t*)jlrettype)) {
-        rt = getVoidTy(ctx.builder.getContext());
+        rt = getVoidTy(M->getContext());
         props.cc = jl_returninfo_t::Register;
     }
     else if (jl_is_uniontype(jlrettype)) {
@@ -6954,103 +7889,110 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         union_alloca_type((jl_uniontype_t*)jlrettype, allunbox, props.union_bytes, props.union_align, props.union_minalign);
         if (props.union_bytes) {
             props.cc = jl_returninfo_t::Union;
-            Type *AT = ArrayType::get(getInt8Ty(ctx.builder.getContext()), props.union_bytes);
-            fsig.push_back(AT->getPointerTo());
-            Type *pair[] = { ctx.types().T_prjlvalue, getInt8Ty(ctx.builder.getContext()) };
-            rt = StructType::get(ctx.builder.getContext(), makeArrayRef(pair));
+            fsig.push_back(PointerType::getUnqual(M->getContext()));
+            argnames.push_back("union_bytes_return");
+            Type *pair[] = { T_prjlvalue, getInt8Ty(M->getContext()) };
+            rt = StructType::get(M->getContext(), ArrayRef<Type*>(pair));
         }
         else if (allunbox) {
             props.cc = jl_returninfo_t::Ghosts;
-            rt = getInt8Ty(ctx.builder.getContext());
+            rt = getInt8Ty(M->getContext());
         }
         else {
-            rt = ctx.types().T_prjlvalue;
+            rt = T_prjlvalue;
         }
     }
     else if (!deserves_retbox(jlrettype)) {
         bool retboxed;
-        rt = julia_type_to_llvm(ctx, jlrettype, &retboxed);
+        rt = _julia_type_to_llvm(&params, M->getContext(), jlrettype, &retboxed);
         assert(!retboxed);
-        if (rt != getVoidTy(ctx.builder.getContext()) && deserves_sret(jlrettype, rt)) {
-            auto tracked = CountTrackedPointers(rt);
+        if (rt != getVoidTy(M->getContext()) && deserves_sret(jlrettype, rt)) {
+            auto tracked = CountTrackedPointers(rt, true);
             assert(!tracked.derived);
-            if (tracked.count && !tracked.all)
+            if (tracked.count && !tracked.all) {
                 props.return_roots = tracked.count;
+                assert(props.return_roots == ((jl_datatype_t*)jlrettype)->layout->npointers);
+            }
             props.cc = jl_returninfo_t::SRet;
+            props.union_bytes = jl_datatype_size(jlrettype);
+            props.union_align = props.union_minalign = julia_alignment(jlrettype);
             // sret is always passed from alloca
             assert(M);
-            fsig.push_back(rt->getPointerTo(M->getDataLayout().getAllocaAddrSpace()));
+            fsig.push_back(PointerType::get(M->getContext(), M->getDataLayout().getAllocaAddrSpace()));
+            argnames.push_back("sret_return");
             srt = rt;
-            rt = getVoidTy(ctx.builder.getContext());
+            rt = getVoidTy(M->getContext());
         }
         else {
             props.cc = jl_returninfo_t::Register;
         }
     }
     else {
-        rt = ctx.types().T_prjlvalue;
+        rt = T_prjlvalue;
     }
 
     SmallVector<AttributeSet, 8> attrs; // function declaration attributes
     if (props.cc == jl_returninfo_t::SRet) {
         assert(srt);
-        AttrBuilder param(ctx.builder.getContext());
+        AttrBuilder param(M->getContext());
         param.addStructRetAttr(srt);
         param.addAttribute(Attribute::NoAlias);
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
-        attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
+        attrs.push_back(AttributeSet::get(M->getContext(), param));
         assert(fsig.size() == 1);
     }
     if (props.cc == jl_returninfo_t::Union) {
-        AttrBuilder param(ctx.builder.getContext());
+        AttrBuilder param(M->getContext());
         param.addAttribute(Attribute::NoAlias);
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
-        attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
+        attrs.push_back(AttributeSet::get(M->getContext(), param));
         assert(fsig.size() == 1);
     }
 
     if (props.return_roots) {
-        AttrBuilder param(ctx.builder.getContext());
+        AttrBuilder param(M->getContext());
         param.addAttribute(Attribute::NoAlias);
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
-        attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
-        fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0));
+        attrs.push_back(AttributeSet::get(M->getContext(), param));
+        fsig.push_back(getPointerTy(M->getContext()));
+        argnames.push_back("return_roots");
     }
 
-    if (gcstack_arg){
-        AttrBuilder param(ctx.builder.getContext());
-        param.addAttribute(Attribute::SwiftSelf);
+    if (gcstack_arg) {
+        AttrBuilder param(M->getContext());
+        if (params.use_swiftcc)
+            param.addAttribute(Attribute::SwiftSelf);
+        param.addAttribute("gcstack");
         param.addAttribute(Attribute::NonNull);
-        attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
-        fsig.push_back(PointerType::get(JuliaType::get_ppjlvalue_ty(ctx.builder.getContext()), 0));
+        attrs.push_back(AttributeSet::get(M->getContext(), param));
+        fsig.push_back(PointerType::get(M->getContext(), 0));
+        argnames.push_back("pgcstack_arg");
     }
 
-    for (size_t i = 0; i < jl_nparams(sig); i++) {
+    size_t nparams = jl_nparams(sig);
+    for (size_t i = 0; i < nparams; i++) {
         jl_value_t *jt = jl_tparam(sig, i);
         bool isboxed = false;
-        Type *ty = NULL;
-        if (i == 0 && is_opaque_closure) {
-            ty = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-            isboxed = true; // true-ish anyway - we might not have the type tag
-        }
-        else {
+        Type *et = nullptr;
+        if (i != 0 || !is_opaque_closure) { // special token for OC argument
             if (is_uniquerep_Type(jt))
                 continue;
             isboxed = deserves_argbox(jt);
-            ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+            et = isboxed ? T_prjlvalue : _julia_type_to_llvm(&params, M->getContext(), jt, nullptr);
+            if (type_is_ghost(et))
+                continue;
         }
-        if (type_is_ghost(ty))
-            continue;
-        AttrBuilder param(ctx.builder.getContext());
-        if (ty->isAggregateType()) { // aggregate types are passed by pointer
+        AttrBuilder param(M->getContext());
+        Type *ty = et;
+        if (et == nullptr || et->isAggregateType()) { // aggregate types are passed by pointer
             param.addAttribute(Attribute::NoCapture);
             param.addAttribute(Attribute::ReadOnly);
-            ty = PointerType::get(ty, AddressSpace::Derived);
+            ty = PointerType::get(M->getContext(), AddressSpace::Derived);
         }
-        else if (isboxed && jl_is_immutable_datatype(jt)) {
+        else if (isboxed && jl_may_be_immutable_datatype(jt) && !jl_is_abstracttype(jt)) {
             param.addAttribute(Attribute::ReadOnly);
         }
         else if (jl_is_primitivetype(jt) && ty->isIntegerTy()) {
@@ -7058,25 +8000,53 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
             Attribute::AttrKind attr = issigned ? Attribute::SExt : Attribute::ZExt;
             param.addAttribute(attr);
         }
-        attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
+        attrs.push_back(AttributeSet::get(M->getContext(), param));
         fsig.push_back(ty);
+        size_t argno = i < nreq ? i : nreq;
+        std::string genname;
+        if (!ArgNames.empty()) {
+            genname = ArgNames[argno];
+            if (genname.empty())
+                genname = (StringRef("#") + Twine(argno + 1)).str();
+            if (i >= nreq)
+                genname += (StringRef("[") + Twine(i - nreq + 1) + StringRef("]")).str();
+            const char *arg_typename = jl_is_datatype(jt) ? jl_symbol_name(((jl_datatype_t*)jt)->name->name) : "<unknown type>";
+            argnames.push_back((genname + StringRef("::") + arg_typename).str());
+        }
+        if (et && et->isAggregateType()) {
+            auto tracked = CountTrackedPointers(et);
+            if (tracked.count && !tracked.all) {
+                attrs.push_back(AttributeSet::get(M->getContext(), param));
+                fsig.push_back(PointerType::get(M->getContext(), M->getDataLayout().getAllocaAddrSpace()));
+                if (!genname.empty())
+                    argnames.push_back((Twine(".roots.") + genname).str());
+            }
+        }
     }
 
     AttributeSet FnAttrs;
     AttributeSet RetAttrs;
     if (jlrettype == (jl_value_t*)jl_bottom_type)
-        FnAttrs = FnAttrs.addAttribute(ctx.builder.getContext(), Attribute::NoReturn);
-    else if (rt == ctx.types().T_prjlvalue)
-        RetAttrs = RetAttrs.addAttribute(ctx.builder.getContext(), Attribute::NonNull);
-    AttributeList attributes = AttributeList::get(ctx.builder.getContext(), FnAttrs, RetAttrs, attrs);
+        FnAttrs = FnAttrs.addAttribute(M->getContext(), Attribute::NoReturn);
+    else if (rt == T_prjlvalue)
+        RetAttrs = RetAttrs.addAttribute(M->getContext(), Attribute::NonNull);
+    AttributeList attributes = AttributeList::get(M->getContext(), FnAttrs, RetAttrs, attrs);
 
     FunctionType *ftype = FunctionType::get(rt, fsig, false);
     if (fval == NULL) {
         Function *f = M ? cast_or_null<Function>(M->getNamedValue(name)) : NULL;
         if (f == NULL) {
             f = Function::Create(ftype, GlobalVariable::ExternalLinkage, name, M);
-            jl_init_function(f, ctx.emission_context.TargetTriple);
-            f->setAttributes(AttributeList::get(f->getContext(), {attributes, f->getAttributes()}));
+            jl_init_function(f, params);
+            if (params.params->debug_info_level >= 2) {
+                ios_t sigbuf;
+                ios_mem(&sigbuf, 0);
+                jl_static_show_func_sig((JL_STREAM*) &sigbuf, sig);
+                f->setAttributes(AttributeList::get(f->getContext(), {attributes.addFnAttribute(M->getContext(),"julia.fsig", StringRef(sigbuf.buf, sigbuf.size)), f->getAttributes()}));
+                ios_close(&sigbuf);
+            } else {
+                f->setAttributes(AttributeList::get(f->getContext(), {attributes, f->getAttributes()}));
+            }
         }
         else {
             assert(f->getFunctionType() == ftype);
@@ -7084,31 +8054,26 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         fval = f;
     }
     else {
-        if (fval->getType()->isIntegerTy())
-            fval = emit_inttoptr(ctx, fval, ftype->getPointerTo());
-        else
-            fval = emit_bitcast(ctx, fval, ftype->getPointerTo());
+        assert(fval->getType()->isPointerTy());
+    }
+    if (auto F = dyn_cast<Function>(fval)) {
+        if (gcstack_arg && params.use_swiftcc)
+            F->setCallingConv(CallingConv::Swift);
+        assert(F->arg_size() >= argnames.size());
+        for (size_t i = 0; i < argnames.size(); i++) {
+            F->getArg(i)->setName(argnames[i]);
+        }
     }
-    if (gcstack_arg && isa<Function>(fval))
-        cast<Function>(fval)->setCallingConv(CallingConv::Swift);
     props.decl = FunctionCallee(ftype, fval);
     props.attrs = attributes;
     return props;
 }
 
-static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, Type *ShadowT, unsigned count)
-{
-    if (isptr && !cast<PointerType>(Src->getType())->isOpaqueOrPointeeTypeMatches(T))
-        Src = ctx.builder.CreateBitCast(Src, T->getPointerTo(Src->getType()->getPointerAddressSpace()));
-    unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ShadowT, ctx.builder); //This comes from Late-GC-Lowering??
-    assert(emitted == count); (void)emitted; (void)count;
-}
-
 static DISubroutineType *
 get_specsig_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *rt, jl_value_t *sig, DIBuilder &dbuilder)
 {
     size_t nargs = jl_nparams(sig); // TODO: if this is a Varargs function, our debug info for the `...` var may be misleading
-    std::vector<Metadata*> ditypes(nargs + 1);
+    SmallVector<Metadata*, 0> ditypes(nargs + 1);
     ditypes[0] = julia_type_to_di(ctx, debuginfo, rt, &dbuilder, false);
     for (size_t i = 0; i < nargs; i++) {
         jl_value_t *jt = jl_tparam(sig, i);
@@ -7118,13 +8083,13 @@ get_specsig_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *rt, jl
 }
 
 /* aka Core.Compiler.tuple_tfunc */
-static jl_datatype_t *compute_va_type(jl_method_instance_t *lam, size_t nreq)
+static jl_datatype_t *compute_va_type(jl_value_t *sig, size_t nreq)
 {
-    size_t nvargs = jl_nparams(lam->specTypes)-nreq;
+    size_t nvargs = jl_nparams(sig)-nreq;
     jl_svec_t *tupargs = jl_alloc_svec(nvargs);
     JL_GC_PUSH1(&tupargs);
-    for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
-        jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+    for (size_t i = nreq; i < jl_nparams(sig); ++i) {
+        jl_value_t *argType = jl_nth_slot_type(sig, i);
         // n.b. specTypes is required to be a datatype by construction for specsig
         if (is_uniquerep_Type(argType))
             argType = jl_typeof(jl_tparam0(argType));
@@ -7134,66 +8099,44 @@ static jl_datatype_t *compute_va_type(jl_method_instance_t *lam, size_t nreq)
         }
         jl_svecset(tupargs, i-nreq, argType);
     }
-    jl_value_t *typ = jl_apply_tuple_type(tupargs);
+    jl_value_t *typ = jl_apply_tuple_type(tupargs, 1);
     JL_GC_POP();
     return (jl_datatype_t*)typ;
 }
 
-static std::string get_function_name(bool specsig, bool needsparams, const char *unadorned_name, const Triple &TargetTriple)
-{
-    std::string _funcName;
-    raw_string_ostream funcName(_funcName);
-    // try to avoid conflicts in the global symbol table
-    if (specsig)
-        funcName << "julia_"; // api 5
-    else if (needsparams)
-        funcName << "japi3_";
-    else
-        funcName << "japi1_";
-    if (TargetTriple.isOSLinux()) {
-        if (unadorned_name[0] == '@')
-            unadorned_name++;
-    }
-    funcName << unadorned_name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
-    return funcName.str();
-}
-
 // Compile to LLVM IR, using a specialized signature if applicable.
 static jl_llvm_functions_t
     emit_function(
         orc::ThreadSafeModule &TSM,
         jl_method_instance_t *lam,
         jl_code_info_t *src,
+        jl_value_t *abi,
         jl_value_t *jlrettype,
         jl_codegen_params_t &params)
 {
     ++EmittedFunctions;
     // step 1. unpack AST and allocate codegen context for this function
+    size_t min_world = src->min_world;
+    size_t max_world = src->max_world;
     jl_llvm_functions_t declarations;
-    jl_codectx_t ctx(*params.tsctx.getContext(), params);
+    jl_codectx_t ctx(*params.tsctx.getContext(), params, min_world, max_world);
     jl_datatype_t *vatyp = NULL;
     JL_GC_PUSH2(&ctx.code, &vatyp);
     ctx.code = src->code;
     ctx.source = src;
 
-    std::map<int, BasicBlock*> labels;
-    bool toplevel = false;
     ctx.module = jl_is_method(lam->def.method) ? lam->def.method->module : lam->def.module;
     ctx.linfo = lam;
-    ctx.name = TSM.getModuleUnlocked()->getModuleIdentifier().data();
-    size_t nreq = 0;
-    int va = 0;
-    if (jl_is_method(lam->def.method)) {
-        ctx.nargs = nreq = lam->def.method->nargs;
-        ctx.is_opaque_closure = lam->def.method->is_for_opaque_closure;
-        if ((nreq > 0 && jl_is_method(lam->def.value) && lam->def.method->isva)) {
-            assert(nreq > 0);
-            nreq--;
-            va = 1;
-        }
+    ctx.name = name_from_method_instance(lam);
+    size_t nreq = src->nargs;
+    int va = src->isva;
+    ctx.nargs = nreq;
+    if (va) {
+        assert(nreq > 0);
+        nreq--;
     }
-    else {
-        ctx.nargs = 0;
+    if (jl_is_method(lam->def.value)) {
+        ctx.is_opaque_closure = lam->def.method->is_for_opaque_closure;
     }
     ctx.nReqArgs = nreq;
     if (va) {
@@ -7201,7 +8144,6 @@ static jl_llvm_functions_t
         if (vn != jl_unused_sym)
             ctx.vaSlot = ctx.nargs - 1;
     }
-    toplevel = !jl_is_method(lam->def.method);
     ctx.rettype = jlrettype;
     ctx.funcName = ctx.name;
     ctx.spvals_ptr = NULL;
@@ -7221,26 +8163,75 @@ static jl_llvm_functions_t
     if (lam && jl_is_method(lam->def.method)) {
         toplineno = lam->def.method->line;
         ctx.file = jl_symbol_name(lam->def.method->file);
-    }
-    else if (jl_array_len(src->linetable) > 0) {
-        jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, 0);
-        ctx.file = jl_symbol_name((jl_sym_t*)jl_fieldref_noalloc(locinfo, 2));
-        toplineno = jl_unbox_int32(jl_fieldref(locinfo, 3));
+        ctx.line = lam->def.method->line;
+    }
+    else if ((jl_value_t*)src->debuginfo != jl_nothing) {
+        // look for the file and line info of the original start of this block, as reported by lowering
+        jl_debuginfo_t *debuginfo = src->debuginfo;
+        while ((jl_value_t*)debuginfo->linetable != jl_nothing)
+            debuginfo = debuginfo->linetable;
+        ctx.file = jl_debuginfo_file(debuginfo);
+        struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, 0);
+        ctx.line = lineidx.line;
+        toplineno = std::max((int32_t)0, lineidx.line);
     }
     if (ctx.file.empty())
         ctx.file = "<missing>";
     // jl_printf(JL_STDERR, "\n*** compiling %s at %s:%d\n\n",
     //           jl_symbol_name(ctx.name), ctx.file.str().c_str(), toplineno);
 
-    bool debug_enabled = ctx.emission_context.debug_level != 0;
+    bool debug_enabled = ctx.emission_context.params->debug_info_level != 0;
     if (dbgFuncName.empty()) // Should never happen anymore?
         debug_enabled = false;
 
+    // First go through and collect all branch targets, so we know where to
+    // split basic blocks.
+    std::set<int> branch_targets; // 1-indexed, sorted
+    for (size_t i = 0; i < stmtslen; ++i) {
+        jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
+        if (jl_is_gotoifnot(stmt)) {
+            int dest = jl_gotoifnot_label(stmt);
+            branch_targets.insert(dest);
+            // The next 1-indexed statement
+            branch_targets.insert(i + 2);
+        }
+        else if (jl_is_returnnode(stmt)) {
+            // We don't do dead branch elimination before codegen
+            // so we need to make sure to start a BB after any
+            // return node, even if they aren't otherwise branch
+            // targets.
+            if (i + 2 <= stmtslen)
+                branch_targets.insert(i + 2);
+        }
+        else if (jl_is_enternode(stmt)) {
+            branch_targets.insert(i + 1);
+            if (i + 2 <= stmtslen)
+                branch_targets.insert(i + 2);
+            size_t catch_dest = jl_enternode_catch_dest(stmt);
+            if (catch_dest)
+                branch_targets.insert(catch_dest);
+        }
+        else if (jl_is_gotonode(stmt)) {
+            int dest = jl_gotonode_label(stmt);
+            branch_targets.insert(dest);
+            if (i + 2 <= stmtslen)
+                branch_targets.insert(i + 2);
+        }
+        else if (jl_is_phinode(stmt)) {
+            jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(stmt, 0);
+            for (size_t j = 0; j < jl_array_nrows(edges); ++j) {
+                size_t edge = jl_array_data(edges, int32_t)[j];
+                if (edge == i)
+                    branch_targets.insert(i + 1);
+            }
+        }
+    }
+
     // step 2. process var-info lists to see what vars need boxing
-    int n_ssavalues = jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes);
+    int n_ssavalues = jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_nrows(src->ssavaluetypes);
     size_t vinfoslen = jl_array_dim0(src->slotflags);
     ctx.slots.resize(vinfoslen, jl_varinfo_t(ctx.builder.getContext()));
-    assert(lam->specTypes); // the specTypes field should always be assigned
+    assert(abi); // the specTypes field should always be assigned
 
 
     // create SAvalue locations for SSAValue objects
@@ -7249,25 +8240,24 @@ static jl_llvm_functions_t
     ctx.ssavalue_usecount.assign(n_ssavalues, 0);
 
     bool specsig, needsparams;
-    std::tie(specsig, needsparams) = uses_specsig(lam, jlrettype, params.params->prefer_specsig);
-    if (!src->inferred)
-        specsig = false;
+    std::tie(specsig, needsparams) = uses_specsig(abi, lam, jlrettype, params.params->prefer_specsig);
 
     // step 3. some variable analysis
     size_t i;
-    for (i = 0; i < nreq; i++) {
+    for (i = 0; i < nreq && i < vinfoslen; i++) {
         jl_varinfo_t &varinfo = ctx.slots[i];
         varinfo.isArgument = true;
         jl_sym_t *argname = slot_symbol(ctx, i);
         if (argname == jl_unused_sym)
             continue;
-        jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i);
+        jl_value_t *ty = jl_nth_slot_type(abi, i);
         // TODO: jl_nth_slot_type should call jl_rewrap_unionall
         //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
         // OpaqueClosure implicitly loads the env
         if (i == 0 && ctx.is_opaque_closure) {
+            // n.b. this is not really needed, because ty was already supposed to be correct
             if (jl_is_array(src->slottypes)) {
-                ty = jl_arrayref((jl_array_t*)src->slottypes, i);
+                ty = jl_array_ptr_ref((jl_array_t*)src->slottypes, i);
             }
             else {
                 ty = (jl_value_t*)jl_any_type;
@@ -7278,7 +8268,7 @@ static jl_llvm_functions_t
     if (va && ctx.vaSlot != -1) {
         jl_varinfo_t &varinfo = ctx.slots[ctx.vaSlot];
         varinfo.isArgument = true;
-        vatyp = specsig ? compute_va_type(lam, nreq) : (jl_tuple_type);
+        vatyp = specsig ? compute_va_type(abi, nreq) : (jl_tuple_type);
         varinfo.value = mark_julia_type(ctx, (Value*)NULL, false, vatyp);
     }
 
@@ -7286,7 +8276,7 @@ static jl_llvm_functions_t
         jl_varinfo_t &varinfo = ctx.slots[i];
         uint8_t flags = jl_array_uint8_ref(src->slotflags, i);
         varinfo.isSA = (jl_vinfo_sa(flags) != 0) || varinfo.isArgument;
-        varinfo.usedUndef = (jl_vinfo_usedundef(flags) != 0) || (!varinfo.isArgument && !src->inferred);
+        varinfo.usedUndef = (jl_vinfo_usedundef(flags) != 0) || !varinfo.isArgument;
         if (!varinfo.isArgument) {
             varinfo.value = mark_julia_type(ctx, (Value*)NULL, false, (jl_value_t*)jl_any_type);
         }
@@ -7297,7 +8287,7 @@ static jl_llvm_functions_t
         simple_use_analysis(ctx, jl_array_ptr_ref(stmts, i));
 
     // determine which vars need to be volatile
-    mark_volatile_vars(stmts, ctx.slots);
+    mark_volatile_vars(stmts, ctx.slots, branch_targets);
 
     // step 4. determine function signature
     if (!specsig)
@@ -7309,24 +8299,38 @@ static jl_llvm_functions_t
     // allocate Function declarations and wrapper objects
     //Safe because params holds ctx lock
     Module *M = TSM.getModuleUnlocked();
-    M->addModuleFlag(Module::Warning, "julia.debug_level", ctx.emission_context.debug_level);
-    jl_debugcache_t debuginfo;
-    debuginfo.initialize(M);
+    jl_debugcache_t debugcache;
+    debugcache.initialize(M);
     jl_returninfo_t returninfo = {};
     Function *f = NULL;
     bool has_sret = false;
     if (specsig) { // assumes !va and !needsparams
-        returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes,
-                                          jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg));
+        SmallVector<const char*,0> ArgNames(0);
+        if (!M->getContext().shouldDiscardValueNames()) {
+            ArgNames.resize(ctx.nargs, "");
+            for (int i = 0; i < ctx.nargs; i++) {
+                jl_sym_t *argname = slot_symbol(ctx, i);
+                if (argname == jl_unused_sym)
+                    continue;
+                const char *name = jl_symbol_name(argname);
+                if (name[0] == '\0' && ctx.vaSlot == i)
+                    ArgNames[i] = "...";
+                else
+                    ArgNames[i] = name;
+            }
+        }
+        returninfo = get_specsig_function(params, M, NULL, declarations.specFunctionObject, abi,
+                                          jlrettype, ctx.is_opaque_closure,
+                                          ArgNames, nreq);
         f = cast<Function>(returninfo.decl.getCallee());
         has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
-        jl_init_function(f, ctx.emission_context.TargetTriple);
+        jl_init_function(f, ctx.emission_context);
 
         // common pattern: see if all return statements are an argument in that
         // case the apply-generic call can re-use the original box for the return
         int retarg = [stmts, nreq]() {
             int retarg = -1;
-            for (size_t i = 0; i < jl_array_len(stmts); ++i) {
+            for (size_t i = 0; i < jl_array_nrows(stmts); ++i) {
                 jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
                 if (jl_is_returnnode(stmt)) {
                     stmt = jl_returnnode_value(stmt);
@@ -7347,9 +8351,10 @@ static jl_llvm_functions_t
         }();
 
         std::string wrapName;
-        raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+        raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
         declarations.functionObject = wrapName;
-        (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context);
+        size_t nparams = jl_nparams(abi);
+        gen_invoke_wrapper(lam, abi, jlrettype, jlrettype, returninfo, nparams, retarg, ctx.is_opaque_closure, declarations.functionObject, M, ctx.emission_context);
         // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType)
         // TODO: add attributes: dereferenceable<sizeof(void*) * nreq>
         // TODO: (if needsparams) add attributes: dereferenceable<sizeof(void*) * length(sp)>, readonly, nocapture
@@ -7358,17 +8363,24 @@ static jl_llvm_functions_t
         f = Function::Create(needsparams ? ctx.types().T_jlfuncparams : ctx.types().T_jlfunc,
                              GlobalVariable::ExternalLinkage,
                              declarations.specFunctionObject, M);
-        jl_init_function(f, ctx.emission_context.TargetTriple);
-        if (needsparams) {
+        jl_init_function(f, ctx.emission_context);
+        if (needsparams)
             jl_name_jlfuncparams_args(ctx.emission_context, f);
-        } else {
+        else
             jl_name_jlfunc_args(ctx.emission_context, f);
-        }
         f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()}));
         returninfo.decl = f;
         declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args";
     }
 
+    if (!params.getContext().shouldDiscardValueNames() && ctx.emission_context.params->debug_info_level >= 2 && lam->def.method && jl_is_method(lam->def.method) && lam->specTypes != (jl_value_t*)jl_emptytuple_type) {
+        ios_t sigbuf;
+        ios_mem(&sigbuf, 0);
+        jl_static_show_func_sig((JL_STREAM*) &sigbuf, (jl_value_t*)abi);
+        f->addFnAttr("julia.fsig", StringRef(sigbuf.buf, sigbuf.size));
+        ios_close(&sigbuf);
+    }
+
     AttrBuilder FnAttrs(ctx.builder.getContext(), f->getAttributes().getFnAttrs());
     AttrBuilder RetAttrs(ctx.builder.getContext(), f->getAttributes().getRetAttrs());
 
@@ -7387,11 +8399,9 @@ static jl_llvm_functions_t
     FnAttrs.addAttribute(Attribute::StackProtectStrong);
 #endif
 
-#ifdef _COMPILER_TSAN_ENABLED_
-    // TODO: enable this only when a argument like `-race` is passed to Julia
-    //       add a macro for no_sanitize_thread
-    FnAttrs.addAttribute(llvm::Attribute::SanitizeThread);
-#endif
+    // TODO: add a macro for no_sanitize_thread
+    if (JL_FEAT_TEST(ctx, sanitize_thread))
+        FnAttrs.addAttribute(llvm::Attribute::SanitizeThread);
 
     // add the optimization level specified for this module, if any
     int optlevel = jl_get_module_optlevel(ctx.module);
@@ -7416,12 +8426,12 @@ static jl_llvm_functions_t
     if (debug_enabled) {
         topfile = dbuilder.createFile(ctx.file, ".");
         DISubroutineType *subrty;
-        if (ctx.emission_context.debug_level <= 1)
-            subrty = debuginfo.jl_di_func_null_sig;
+        if (ctx.emission_context.params->debug_info_level <= 1)
+            subrty = debugcache.jl_di_func_null_sig;
         else if (!specsig)
-            subrty = debuginfo.jl_di_func_sig;
+            subrty = debugcache.jl_di_func_sig;
         else
-            subrty = get_specsig_di(ctx, debuginfo, jlrettype, lam->specTypes, dbuilder);
+            subrty = get_specsig_di(ctx, debugcache, jlrettype, abi, dbuilder);
         SP = dbuilder.createFunction(nullptr
                                      ,dbgFuncName      // Name
                                      ,f->getName()     // LinkageName
@@ -7437,7 +8447,7 @@ static jl_llvm_functions_t
                                      );
         topdebugloc = DILocation::get(ctx.builder.getContext(), toplineno, 0, SP, NULL);
         f->setSubprogram(SP);
-        if (ctx.emission_context.debug_level >= 2) {
+        if (ctx.emission_context.params->debug_info_level >= 2) {
             const bool AlwaysPreserve = true;
             // Go over all arguments and local variables and initialize their debug information
             for (i = 0; i < nreq; i++) {
@@ -7452,7 +8462,7 @@ static jl_llvm_functions_t
                     topfile,                            // File
                     toplineno == -1 ? 0 : toplineno,    // Line
                     // Variable type
-                    julia_type_to_di(ctx, debuginfo, varinfo.value.typ, &dbuilder, false),
+                    julia_type_to_di(ctx, debugcache, varinfo.value.typ, &dbuilder, false),
                     AlwaysPreserve,                     // May be deleted if optimized out
                     DINode::FlagZero);                  // Flags (TODO: Do we need any)
             }
@@ -7463,7 +8473,7 @@ static jl_llvm_functions_t
                     has_sret + nreq + 1,                // Argument number (1-based)
                     topfile,                            // File
                     toplineno == -1 ? 0 : toplineno,    // Line (for now, use lineno of the function)
-                    julia_type_to_di(ctx, debuginfo, ctx.slots[ctx.vaSlot].value.typ, &dbuilder, false),
+                    julia_type_to_di(ctx, debugcache, ctx.slots[ctx.vaSlot].value.typ, &dbuilder, false),
                     AlwaysPreserve,                     // May be deleted if optimized out
                     DINode::FlagZero);                  // Flags (TODO: Do we need any)
             }
@@ -7478,7 +8488,7 @@ static jl_llvm_functions_t
                     jl_symbol_name(s),       // Variable name
                     topfile,                 // File
                     toplineno == -1 ? 0 : toplineno, // Line (for now, use lineno of the function)
-                    julia_type_to_di(ctx, debuginfo, varinfo.value.typ, &dbuilder, false), // Variable type
+                    julia_type_to_di(ctx, debugcache, varinfo.value.typ, &dbuilder, false), // Variable type
                     AlwaysPreserve,          // May be deleted if optimized out
                     DINode::FlagZero         // Flags (TODO: Do we need any)
                     );
@@ -7508,19 +8518,70 @@ static jl_llvm_functions_t
             ctx.spvals_ptr = &*AI++;
         }
     }
-    // step 6. set up GC frame
+    // step 6a. set up special arguments and attributes
+    Function::arg_iterator AI = f->arg_begin();
+    SmallVector<AttributeSet, 0> attrs(f->arg_size()); // function declaration attributes
+
+    if (has_sret) {
+        Argument *Arg = &*AI;
+        ++AI;
+        AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
+        if (returninfo.cc == jl_returninfo_t::Union) {
+            param.addAttribute(Attribute::NonNull);
+            // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
+            param.addDereferenceableAttr(returninfo.union_bytes);
+            param.addAlignmentAttr(returninfo.union_align);
+        }
+        else {
+            const DataLayout &DL = jl_Module->getDataLayout();
+            Type *RT = Arg->getParamStructRetType();
+            TypeSize sz = DL.getTypeAllocSize(RT);
+            Align al = DL.getPrefTypeAlign(RT);
+            if (al > MAX_ALIGN)
+                al = Align(MAX_ALIGN);
+            param.addAttribute(Attribute::NonNull);
+            // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
+            param.addDereferenceableAttr(sz);
+            param.addAlignmentAttr(al);
+        }
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+    }
+    if (returninfo.return_roots) {
+        Argument *Arg = &*AI;
+        ++AI;
+        AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
+        param.addAttribute(Attribute::NonNull);
+        // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
+        size_t size = returninfo.return_roots * sizeof(jl_value_t*);
+        param.addDereferenceableAttr(size);
+        param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+    }
+    if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)) {
+        Argument *Arg = &*AI;
+        ctx.pgcstack = Arg;
+        ++AI;
+        AttrBuilder param(ctx.builder.getContext());
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param);
+    }
+
+    // step 6b. Setup the GC frame and entry safepoint before any loads
     allocate_gc_frame(ctx, b0);
+    if (params.safepoint_on_entry && JL_FEAT_TEST(ctx, safepoint_on_entry))
+        emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
+
     Value *last_age = NULL;
-    Value *world_age_field = get_last_age_field(ctx);
-    if (toplevel || ctx.is_opaque_closure) {
+    Value *world_age_field = NULL;
+    if (ctx.is_opaque_closure) {
+        world_age_field = get_tls_world_age_field(ctx);
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
         last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad(
-            ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
+                   ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
     }
 
     // step 7. allocate local variables slots
     // must be in the first basic block for the llvm mem2reg pass to work
-    auto allocate_local = [&](jl_varinfo_t &varinfo, jl_sym_t *s) {
+    auto allocate_local = [&ctx, &dbuilder, &debugcache, topdebugloc, va, debug_enabled](jl_varinfo_t &varinfo, jl_sym_t *s, int i) {
         jl_value_t *jt = varinfo.value.typ;
         assert(!varinfo.boxroot); // variables shouldn't have memory locs already
         if (varinfo.value.constant) {
@@ -7528,10 +8589,10 @@ static jl_llvm_functions_t
             alloc_def_flag(ctx, varinfo);
             return;
         }
-        else if (varinfo.isArgument && !(specsig && i == (size_t)ctx.vaSlot)) {
-            // if we can unbox it, just use the input pointer
-            if (i != (size_t)ctx.vaSlot && jl_is_concrete_immutable(jt))
-                return;
+        else if (varinfo.isArgument && (!va || ctx.vaSlot == -1 || i != ctx.vaSlot)) {
+            // just use the input pointer, if we have it
+            // (we will need to attach debuginfo later to it)
+            return;
         }
         else if (jl_is_uniontype(jt)) {
             bool allunbox;
@@ -7540,16 +8601,18 @@ static jl_llvm_functions_t
             if (lv) {
                 lv->setName(jl_symbol_name(s));
                 varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
-                varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
+                varinfo.pTIndex = emit_static_alloca(ctx, 1, Align(1));
                 setName(ctx.emission_context, varinfo.pTIndex, "tindex");
+                // TODO: attach debug metadata to this variable
             }
             else if (allunbox) {
                 // all ghost values just need a selector allocated
-                AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
+                AllocaInst *lv = emit_static_alloca(ctx, 1, Align(1));
                 lv->setName(jl_symbol_name(s));
                 varinfo.pTIndex = lv;
                 varinfo.value.tbaa = NULL;
                 varinfo.value.isboxed = false;
+                // TODO: attach debug metadata to this variable
             }
             if (lv || allunbox)
                 alloc_def_flag(ctx, varinfo);
@@ -7557,48 +8620,35 @@ static jl_llvm_functions_t
                 return;
         }
         else if (deserves_stack(jt)) {
-            bool isboxed;
-            Type *vtype = julia_type_to_llvm(ctx, jt, &isboxed);
-            assert(!isboxed);
-            assert(!type_is_ghost(vtype) && "constants should already be handled");
-            Value *lv = new AllocaInst(vtype, M->getDataLayout().getAllocaAddrSpace(), NULL, Align(jl_datatype_align(jt)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
-            if (CountTrackedPointers(vtype).count) {
-                StoreInst *SI = new StoreInst(Constant::getNullValue(vtype), lv, false, Align(sizeof(void*)));
-                SI->insertAfter(ctx.topalloca);
-            }
-            varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
+            auto sizes = split_value_size((jl_datatype_t*)jt);
+            AllocaInst *bits = sizes.first > 0 ? emit_static_alloca(ctx, sizes.first, Align(julia_alignment(jt))) : nullptr;
+            AllocaInst *roots = sizes.second > 0 ? emit_static_roots(ctx, sizes.second) : nullptr;
+            if (bits) bits->setName(jl_symbol_name(s));
+            if (roots) roots->setName(StringRef(".roots.") + jl_symbol_name(s));
+            varinfo.value = mark_julia_slot(bits, jt, NULL, ctx.tbaa().tbaa_stack, None);
+            varinfo.inline_roots = roots;
             alloc_def_flag(ctx, varinfo);
             if (debug_enabled && varinfo.dinfo) {
-                assert((Metadata*)varinfo.dinfo->getType() != debuginfo.jl_pvalue_dillvmt);
-                dbuilder.insertDeclare(lv, varinfo.dinfo, dbuilder.createExpression(),
+                assert((Metadata*)varinfo.dinfo->getType() != debugcache.jl_pvalue_dillvmt);
+                dbuilder.insertDeclare(bits ? bits : roots, varinfo.dinfo, dbuilder.createExpression(),
                                        topdebugloc,
                                        ctx.builder.GetInsertBlock());
             }
             return;
         }
-        if (!varinfo.isArgument || // always need a slot if the variable is assigned
-            specsig || // for arguments, give them stack slots if they aren't in `argArray` (otherwise, will use that pointer)
-            (va && (int)i == ctx.vaSlot) || // or it's the va arg tuple
-            i == 0) { // or it is the first argument (which isn't in `argArray`)
-            AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, M->getDataLayout().getAllocaAddrSpace(),
-                jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
-            StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*)));
-            SI->insertAfter(ctx.topalloca);
-            varinfo.boxroot = av;
-            if (debug_enabled && varinfo.dinfo) {
-                DIExpression *expr;
-                if ((Metadata*)varinfo.dinfo->getType() == debuginfo.jl_pvalue_dillvmt) {
-                    expr = dbuilder.createExpression();
-                }
-                else {
-                    SmallVector<uint64_t, 8> addr;
-                    addr.push_back(llvm::dwarf::DW_OP_deref);
-                    expr = dbuilder.createExpression(addr);
-                }
-                dbuilder.insertDeclare(av, varinfo.dinfo, expr,
-                                            topdebugloc,
-                                ctx.builder.GetInsertBlock());
-            }
+        // otherwise give it a boxroot in this function
+        AllocaInst *av = emit_static_roots(ctx, 1);
+        av->setName(jl_symbol_name(s));
+        varinfo.boxroot = av;
+        if (debug_enabled && varinfo.dinfo) {
+            SmallVector<uint64_t, 1> addr;
+            DIExpression *expr;
+            if ((Metadata*)varinfo.dinfo->getType() != debugcache.jl_pvalue_dillvmt)
+                addr.push_back(llvm::dwarf::DW_OP_deref);
+            expr = dbuilder.createExpression(addr);
+            dbuilder.insertDeclare(av, varinfo.dinfo, expr,
+                                        topdebugloc,
+                            ctx.builder.GetInsertBlock());
         }
     };
 
@@ -7612,7 +8662,7 @@ static jl_llvm_functions_t
             varinfo.usedUndef = false;
             continue;
         }
-        allocate_local(varinfo, s);
+        allocate_local(varinfo, s, (int)i);
     }
 
     std::map<int, int> upsilon_to_phic;
@@ -7621,12 +8671,21 @@ static jl_llvm_functions_t
     // yield to them.
     // Also count ssavalue uses.
     {
-        for (size_t i = 0; i < jl_array_len(stmts); ++i) {
+        for (size_t i = 0; i < jl_array_nrows(stmts); ++i) {
             jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
 
             auto scan_ssavalue = [&](jl_value_t *val) {
                 if (jl_is_ssavalue(val)) {
-                    ctx.ssavalue_usecount[((jl_ssavalue_t*)val)->id-1] += 1;
+                    size_t ssa_idx = ((jl_ssavalue_t*)val)->id-1;
+                    /*
+                     * We technically allow out of bounds SSAValues in dead IR, so make
+                     * sure to bounds check this here. It's still not *good* to leave
+                     * dead code in the IR, because this will conservatively overcount
+                     * it, but let's at least make it not crash.
+                     */
+                    if (ssa_idx < ctx.ssavalue_usecount.size()) {
+                        ctx.ssavalue_usecount[ssa_idx] += 1;
+                    }
                     return true;
                 }
                 return false;
@@ -7635,7 +8694,7 @@ static jl_llvm_functions_t
 
             if (jl_is_phicnode(stmt)) {
                 jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(stmt, 0);
-                for (size_t j = 0; j < jl_array_len(values); ++j) {
+                for (size_t j = 0; j < jl_array_nrows(values); ++j) {
                     jl_value_t *val = jl_array_ptr_ref(values, j);
                     assert(jl_is_ssavalue(val));
                     upsilon_to_phic[((jl_ssavalue_t*)val)->id] = i;
@@ -7646,15 +8705,12 @@ static jl_llvm_functions_t
                 vi.used = true;
                 vi.isVolatile = true;
                 vi.value = mark_julia_type(ctx, (Value*)NULL, false, typ);
-                allocate_local(vi, jl_symbol("phic"));
+                allocate_local(vi, jl_symbol("phic"), -1);
             }
         }
     }
 
     // step 8. move args into local variables
-    Function::arg_iterator AI = f->arg_begin();
-    std::vector<AttributeSet> attrs(f->arg_size()); // function declaration attributes
-
     auto get_specsig_arg = [&](jl_value_t *argType, Type *llvmArgType, bool isboxed) {
         if (type_is_ghost(llvmArgType)) { // this argument is not actually passed
             return ghostValue(ctx, argType);
@@ -7666,112 +8722,80 @@ static jl_llvm_functions_t
         ++AI;
         AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
         jl_cgval_t theArg;
-        if (llvmArgType->isAggregateType()) {
+        if (!isboxed && llvmArgType->isAggregateType()) {
             maybe_mark_argument_dereferenceable(param, argType);
-            theArg = mark_julia_slot(Arg, argType, NULL, ctx.tbaa().tbaa_const); // this argument is by-pointer
+            SmallVector<Value*,0> roots;
+            auto tracked = CountTrackedPointers(llvmArgType);
+            if (tracked.count && !tracked.all) {
+                Argument *RootArg = &*AI;
+                roots = load_gc_roots(ctx, RootArg, tracked.count, ctx.tbaa().tbaa_const);
+                AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
+                param.addAttribute(Attribute::NonNull);
+                param.addAttribute(Attribute::NoUndef);
+                param.addDereferenceableAttr(tracked.count * sizeof(void*));
+                param.addAlignmentAttr(alignof(void*));
+                attrs[RootArg->getArgNo()] = AttributeSet::get(Arg->getContext(), param);
+                ++AI;
+            }
+            theArg = mark_julia_slot(Arg, argType, NULL, ctx.tbaa().tbaa_const, roots); // this argument is by-pointer
         }
         else {
-            if (isboxed) // e.g. is-pointer
+            if (isboxed)
                 maybe_mark_argument_dereferenceable(param, argType);
             theArg = mark_julia_type(ctx, Arg, isboxed, argType);
             if (theArg.tbaa == ctx.tbaa().tbaa_immut)
                 theArg.tbaa = ctx.tbaa().tbaa_const;
         }
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
         return theArg;
     };
 
-    if (has_sret) {
-        Argument *Arg = &*AI;
-        ++AI;
-        AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
-        if (returninfo.cc == jl_returninfo_t::Union) {
-            param.addAttribute(Attribute::NonNull);
-            // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
-            param.addDereferenceableAttr(returninfo.union_bytes);
-            param.addAlignmentAttr(returninfo.union_align);
-        }
-        else {
-            const DataLayout &DL = jl_Module->getDataLayout();
-            Type *RT = Arg->getParamStructRetType();
-            TypeSize sz = DL.getTypeAllocSize(RT);
-            Align al = DL.getPrefTypeAlign(RT);
-            param.addAttribute(Attribute::NonNull);
-            // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
-            param.addDereferenceableAttr(sz);
-            param.addAlignmentAttr(al);
-        }
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
-    }
-    if (returninfo.return_roots) {
-        Argument *Arg = &*AI;
-        ++AI;
-        AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
-        param.addAttribute(Attribute::NonNull);
-        // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
-        size_t size = returninfo.return_roots * sizeof(jl_value_t*);
-        param.addDereferenceableAttr(size);
-        param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
-    }
-    if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){
-        Argument *Arg = &*AI;
-        ++AI;
-        AttrBuilder param(ctx.builder.getContext());
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param);
-    }
-    for (i = 0; i < nreq; i++) {
+    for (i = 0; i < nreq && i < vinfoslen; i++) {
         jl_sym_t *s = slot_symbol(ctx, i);
-        jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
-        // TODO: jl_nth_slot_type should call jl_rewrap_unionall?
-        //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
-        bool isboxed = deserves_argbox(argType);
-        Type *llvmArgType = NULL;
-        if (i == 0 && ctx.is_opaque_closure) {
-            isboxed = true;
-            llvmArgType = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-            argType = (jl_value_t*)jl_any_type;
-        }
-        else {
-            llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
-        }
-        if (s == jl_unused_sym) {
-            if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
-                ++AI;
-            continue;
-        }
         jl_varinfo_t &vi = ctx.slots[i];
         jl_cgval_t theArg;
-        if (s == jl_unused_sym || vi.value.constant) {
-            assert(vi.boxroot == NULL);
-            if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
-                ++AI;
+        if (i == 0 && ctx.is_opaque_closure) {
+            // If this is an opaque closure, implicitly load the env and switch
+            // the world age. The specTypes value is wrong for this field, so
+            // this needs to be handled first.
+            // jl_value_t *oc_type = get_oc_type(calltype, rettype);
+            Value *oc_this = decay_derived(ctx, &*AI);
+            ++AI; // both specsig (derived) and fptr1 (box) pass this argument as a distinct argument
+            // Load closure world
+            Value *worldaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, world));
+            Align alignof_ptr(ctx.types().alignof_ptr);
+            jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
+                nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, alignof_ptr.value());
+            assert(ctx.world_age_at_entry == nullptr);
+            ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure
+            emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, alignof_ptr, alignof_ptr);
+
+            if (s == jl_unused_sym || vi.value.constant)
+                continue;
+
+            // Load closure env, which is always a boxed value (usually some Tuple) currently
+            Value *envaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, captures));
+            theArg = typed_load(ctx, envaddr, NULL, (jl_value_t*)vi.value.typ,
+                nullptr, nullptr, /*isboxed*/true, AtomicOrdering::NotAtomic, false, sizeof(void*));
         }
         else {
-            // If this is an opaque closure, implicitly load the env and switch
-            // the world age.
-            if (i == 0 && ctx.is_opaque_closure) {
-                // Load closure world
-                Value *oc_this = decay_derived(ctx, &*AI++);
-                Value *argaddr = emit_bitcast(ctx, oc_this, getInt8PtrTy(ctx.builder.getContext()));
-                Value *worldaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, world)));
-
-                jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
-                    nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value());
-                emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr.value());
-
-                // Load closure env
-                Value *envaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, captures)));
-
-                jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
-                    nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
-                theArg = update_julia_type(ctx, closure_env, vi.value.typ);
-            }
-            else if (specsig) {
+            jl_value_t *argType = jl_nth_slot_type(abi, i);
+            // TODO: jl_nth_slot_type should call jl_rewrap_unionall?
+            //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
+            bool isboxed = deserves_argbox(argType);
+            Type *llvmArgType = NULL;
+            llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
+            if (s == jl_unused_sym || vi.value.constant) {
+                assert(vi.boxroot == NULL);
+                if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) {
+                    ++AI;
+                    auto tracked = CountTrackedPointers(llvmArgType);
+                    if (tracked.count && !tracked.all)
+                        ++AI;
+                }
+                continue;
+            }
+            if (specsig) {
                 theArg = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             else {
@@ -7780,18 +8804,18 @@ static jl_llvm_functions_t
                     theArg = mark_julia_type(ctx, fArg, true, vi.value.typ);
                 }
                 else {
-                    Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
+                    Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
                     Value *load = ai.decorateInst(maybe_mark_load_dereferenceable(
                             ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                             false, vi.value.typ));
                     theArg = mark_julia_type(ctx, load, true, vi.value.typ);
-                    if (debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) {
+                    if (debug_enabled && vi.dinfo && !vi.boxroot) {
                         SmallVector<uint64_t, 8> addr;
                         addr.push_back(llvm::dwarf::DW_OP_deref);
                         addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
                         addr.push_back((i - 1) * sizeof(void*));
-                        if ((Metadata*)vi.dinfo->getType() != debuginfo.jl_pvalue_dillvmt)
+                        if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt)
                             addr.push_back(llvm::dwarf::DW_OP_deref);
                         dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr),
                                         topdebugloc,
@@ -7799,36 +8823,28 @@ static jl_llvm_functions_t
                     }
                 }
             }
+        }
 
-
-            if (vi.boxroot == NULL) {
-                assert(vi.value.V == NULL && "unexpected variable slot created for argument");
-                // keep track of original (possibly boxed) value to avoid re-boxing or moving
-                vi.value = theArg;
-                if (specsig && theArg.V && debug_enabled && vi.dinfo) {
-                    SmallVector<uint64_t, 8> addr;
-                    Value *parg;
-                    if (theArg.ispointer()) {
-                        parg = theArg.V;
-                        if ((Metadata*)vi.dinfo->getType() != debuginfo.jl_pvalue_dillvmt)
-                            addr.push_back(llvm::dwarf::DW_OP_deref);
-                    }
-                    else {
-                        parg = ctx.builder.CreateAlloca(theArg.V->getType(), NULL, jl_symbol_name(s));
-                        ctx.builder.CreateStore(theArg.V, parg);
-                    }
-                    dbuilder.insertDeclare(parg, vi.dinfo, dbuilder.createExpression(addr),
-                                                topdebugloc,
-                                                ctx.builder.GetInsertBlock());
+        if (vi.boxroot == nullptr) {
+            assert(vi.value.V == nullptr && vi.inline_roots == nullptr && "unexpected variable slot created for argument");
+            // keep track of original (possibly boxed) value to avoid re-boxing or moving
+            vi.value = theArg;
+            if (debug_enabled && vi.dinfo && theArg.V) {
+                if (!theArg.inline_roots.empty() || theArg.ispointer()) {
+                    dbuilder.insertDeclare(theArg.V, vi.dinfo, dbuilder.createExpression(),
+                                            topdebugloc, ctx.builder.GetInsertBlock());
+                }
+                else {
+                    dbuilder.insertDbgValueIntrinsic(theArg.V, vi.dinfo, dbuilder.createExpression(),
+                                                        topdebugloc, ctx.builder.GetInsertBlock());
                 }
             }
-            else {
-                Value *argp = boxed(ctx, theArg);
-                ctx.builder.CreateStore(argp, vi.boxroot);
-            }
+        }
+        else {
+            Value *argp = boxed(ctx, theArg);
+            ctx.builder.CreateStore(argp, vi.boxroot);
         }
     }
-
     // step 9. allocate rest argument
     CallInst *restTuple = NULL;
     if (va && ctx.vaSlot != -1) {
@@ -7837,22 +8853,22 @@ static jl_llvm_functions_t
             assert(vi.boxroot == NULL);
         }
         else if (specsig) {
-            ctx.nvargs = jl_nparams(lam->specTypes) - nreq;
-            SmallVector<jl_cgval_t> vargs(ctx.nvargs);
-            for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
-                jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+            ctx.nvargs = jl_nparams(abi) - nreq;
+            SmallVector<jl_cgval_t, 0> vargs(ctx.nvargs);
+            for (size_t i = nreq; i < jl_nparams(abi); ++i) {
+                jl_value_t *argType = jl_nth_slot_type(abi, i);
                 // n.b. specTypes is required to be a datatype by construction for specsig
                 bool isboxed = deserves_argbox(argType);
                 Type *llvmArgType = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
                 vargs[i - nreq] = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             if (jl_is_concrete_type(vi.value.typ)) {
-                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs.data());
+                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs);
                 emit_varinfo_assign(ctx, vi, tuple);
             }
             else {
                 restTuple = emit_jlcall(ctx, jltuple_func, Constant::getNullValue(ctx.types().T_prjlvalue),
-                    vargs.data(), ctx.nvargs, julia_call);
+                    vargs, ctx.nvargs, julia_call);
                 jl_cgval_t tuple = mark_julia_type(ctx, restTuple, true, vi.value.typ);
                 emit_varinfo_assign(ctx, vi, tuple);
             }
@@ -7863,10 +8879,8 @@ static jl_llvm_functions_t
             restTuple =
                 ctx.builder.CreateCall(F,
                         { Constant::getNullValue(ctx.types().T_prjlvalue),
-                          ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, argArray,
-                                  ConstantInt::get(ctx.types().T_size, nreq - 1)),
-                          ctx.builder.CreateSub(argCount,
-                                  ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nreq - 1)) });
+                          emit_ptrgep(ctx, argArray, (nreq - 1) * sizeof(jl_value_t*)),
+                          ctx.builder.CreateSub(argCount, ctx.builder.getInt32(nreq - 1)) });
             restTuple->setAttributes(F->getAttributes());
             ctx.builder.CreateStore(restTuple, vi.boxroot);
         }
@@ -7883,8 +8897,8 @@ static jl_llvm_functions_t
         return (!jl_is_submodule(mod, jl_base_module) &&
                 !jl_is_submodule(mod, jl_core_module));
     };
-    auto in_tracked_path = [] (StringRef file) {
-        return jl_options.tracked_path != NULL && file.startswith(jl_options.tracked_path);
+    auto in_tracked_path = [] (StringRef file) { // falls within an explicitly set file or directory
+        return jl_options.tracked_path != NULL && file.starts_with(jl_options.tracked_path);
     };
     bool mod_is_user_mod = in_user_mod(ctx.module);
     bool mod_is_tracked = in_tracked_path(ctx.file);
@@ -7892,89 +8906,115 @@ static jl_llvm_functions_t
         DebugLoc loc;
         StringRef file;
         ssize_t line;
+        ssize_t line0; // if this represents pc=1, then also cover the entry to the function (pc=0)
         bool is_user_code;
-        bool is_tracked; // falls within an explicitly set file or directory
-        unsigned inlined_at;
-        bool operator ==(const DebugLineTable &other) const {
-            return other.loc == loc && other.file == file && other.line == line && other.is_user_code == is_user_code && other.is_tracked == is_tracked && other.inlined_at == inlined_at;
-        }
+        int32_t edgeid;
+        bool sameframe(const DebugLineTable &other) const {
+            // detect if the line info for this frame is unchanged (equivalent to loc == other.loc ignoring the inlined_at field)
+            return other.edgeid == edgeid && other.line == line;
+        };
     };
-    std::vector<DebugLineTable> linetable;
-    { // populate the linetable data format
-        assert(jl_is_array(src->linetable));
-        size_t nlocs = jl_array_len(src->linetable);
-        std::map<std::tuple<StringRef, StringRef>, DISubprogram*> subprograms;
-        linetable.resize(nlocs + 1);
-        DebugLineTable &topinfo = linetable[0];
-        topinfo.file = ctx.file;
-        topinfo.line = toplineno;
-        topinfo.is_user_code = mod_is_user_mod;
-        topinfo.is_tracked = mod_is_tracked;
-        topinfo.inlined_at = 0;
-        topinfo.loc = topdebugloc;
-        for (size_t i = 0; i < nlocs; i++) {
-            // LineInfoNode(mod::Module, method::Any, file::Symbol, line::Int32, inlined_at::Int32)
-            jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, i);
-            DebugLineTable &info = linetable[i + 1];
-            assert(jl_typetagis(locinfo, jl_lineinfonode_type));
-            jl_module_t *module = (jl_module_t*)jl_fieldref_noalloc(locinfo, 0);
-            jl_value_t *method = jl_fieldref_noalloc(locinfo, 1);
-            jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 2);
-            info.line = jl_unbox_int32(jl_fieldref(locinfo, 3));
-            info.inlined_at = jl_unbox_int32(jl_fieldref(locinfo, 4));
-            assert(info.inlined_at <= i);
-            info.file = jl_symbol_name(filesym);
-            if (info.file.empty())
-                info.file = "<missing>";
-            if (module == ctx.module)
-                info.is_user_code = mod_is_user_mod;
-            else
-                info.is_user_code = in_user_mod(module);
-            info.is_tracked = in_tracked_path(info.file);
-            if (debug_enabled) {
-                StringRef fname;
-                if (jl_is_method_instance(method))
-                    method = ((jl_method_instance_t*)method)->def.value;
-                if (jl_is_method(method))
-                    method = (jl_value_t*)((jl_method_t*)method)->name;
-                if (jl_is_symbol(method))
-                    fname = jl_symbol_name((jl_sym_t*)method);
-                if (fname.empty())
-                    fname = "macro expansion";
-                if (info.inlined_at == 0 && info.file == ctx.file) { // if everything matches, emit a toplevel line number
-                    info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, SP, NULL);
+    DebugLineTable topinfo;
+    topinfo.file = ctx.file;
+    topinfo.line = toplineno;
+    topinfo.line0 = 0;
+    topinfo.is_user_code = mod_is_user_mod;
+    topinfo.loc = topdebugloc;
+    topinfo.edgeid = 0;
+    std::map<std::tuple<StringRef, StringRef>, DISubprogram*> subprograms;
+    SmallVector<DebugLineTable, 0> prev_lineinfo, new_lineinfo;
+    auto update_lineinfo = [&] (size_t pc) {
+        std::function<bool(jl_debuginfo_t*, jl_value_t*, size_t, size_t)> append_lineinfo =
+                [&] (jl_debuginfo_t *debuginfo, jl_value_t *func, size_t to, size_t pc) -> bool {
+            while (1) {
+                if (!jl_is_symbol(debuginfo->def)) // this is a path
+                    func = debuginfo->def; // this is inlined
+                struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, pc);
+                size_t i = lineidx.line;
+                if (i < 0) // pc out of range: broken debuginfo?
+                    return false;
+                if (i == 0 && lineidx.to == 0) // no update
+                    return false;
+                if (pc > 0 && (jl_value_t*)debuginfo->linetable != jl_nothing) {
+                    // indirection node
+                    if (!append_lineinfo(debuginfo->linetable, func, to, i))
+                        return false; // no update
                 }
-                else { // otherwise, describe this as an inlining frame
-                    DISubprogram *&inl_SP = subprograms[std::make_tuple(fname, info.file)];
-                    if (inl_SP == NULL) {
-                        DIFile *difile = dbuilder.createFile(info.file, ".");
-                        inl_SP = dbuilder.createFunction(difile
-                                                     ,std::string(fname) + ";" // Name
-                                                     ,fname            // LinkageName
-                                                     ,difile           // File
-                                                     ,0                // LineNo
-                                                     ,debuginfo.jl_di_func_null_sig // Ty
-                                                     ,0                // ScopeLine
-                                                     ,DINode::FlagZero // Flags
-                                                     ,DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized // SPFlags
-                                                     ,nullptr          // Template Parameters
-                                                     ,nullptr          // Template Declaration
-                                                     ,nullptr          // ThrownTypes
-                                                     );
+                else {
+                    // actual node
+                    DebugLineTable info;
+                    info.edgeid = to;
+                    jl_module_t *modu = func ? jl_debuginfo_module1(func) : NULL;
+                    if (modu == NULL)
+                        modu = ctx.module;
+                    info.file = jl_debuginfo_file1(debuginfo);
+                    info.line = i;
+                    info.line0 = 0;
+                    if (pc == 1) {
+                        struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, 0);
+                        assert(lineidx.to == 0 && lineidx.pc == 0);
+                        if (lineidx.line > 0 && info.line != lineidx.line)
+                            info.line0 = lineidx.line;
+                    }
+                    if (info.file.empty())
+                        info.file = "<missing>";
+                    if (modu == ctx.module)
+                        info.is_user_code = mod_is_user_mod;
+                    else
+                        info.is_user_code = in_user_mod(modu);
+                    if (debug_enabled) {
+                        StringRef fname = jl_debuginfo_name(func);
+                        if (new_lineinfo.empty() && info.file == ctx.file) { // if everything matches, emit a toplevel line number
+                            info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, SP, NULL);
+                        }
+                        else { // otherwise, describe this as an inlining frame
+                            DebugLoc inl_loc = new_lineinfo.empty() ? DebugLoc(DILocation::get(ctx.builder.getContext(), 0, 0, SP, NULL)) : new_lineinfo.back().loc;
+                            DISubprogram *&inl_SP = subprograms[std::make_tuple(fname, info.file)];
+                            if (inl_SP == NULL) {
+                                DIFile *difile = dbuilder.createFile(info.file, ".");
+                                inl_SP = dbuilder.createFunction(difile
+                                                             ,std::string(fname) + ";" // Name
+                                                             ,fname            // LinkageName
+                                                             ,difile           // File
+                                                             ,0                // LineNo
+                                                             ,debugcache.jl_di_func_null_sig // Ty
+                                                             ,0                // ScopeLine
+                                                             ,DINode::FlagZero // Flags
+                                                             ,DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized // SPFlags
+                                                             ,nullptr          // Template Parameters
+                                                             ,nullptr          // Template Declaration
+                                                             ,nullptr          // ThrownTypes
+                                                             );
+                            }
+                            info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, inl_SP, inl_loc);
+                        }
                     }
-                    DebugLoc inl_loc = (info.inlined_at == 0) ? DebugLoc(DILocation::get(ctx.builder.getContext(), 0, 0, SP, NULL)) : linetable.at(info.inlined_at).loc;
-                    info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, inl_SP, inl_loc);
+                    new_lineinfo.push_back(info);
                 }
+                to = lineidx.to;
+                if (to == 0)
+                    return true;
+                pc = lineidx.pc;
+                debuginfo = (jl_debuginfo_t*)jl_svecref(debuginfo->edges, to - 1);
+                func = NULL;
             }
-        }
-    }
+        };
+        prev_lineinfo.resize(0);
+        std::swap(prev_lineinfo, new_lineinfo);
+        bool updated = append_lineinfo(src->debuginfo, (jl_value_t*)lam, 0, pc + 1);
+        if (!updated)
+            std::swap(prev_lineinfo, new_lineinfo);
+        else
+            assert(new_lineinfo.size() > 0);
+        return updated;
+    };
 
-    std::vector<MDNode*> aliasscopes;
+    SmallVector<MDNode*, 0> aliasscopes;
     MDNode* current_aliasscope = nullptr;
-    std::vector<Metadata*> scope_stack;
-    std::vector<MDNode*> scope_list_stack;
+    SmallVector<Metadata*, 0> scope_stack;
+    SmallVector<MDNode*, 0> scope_list_stack;
     {
-        size_t nstmts = jl_array_len(stmts);
+        size_t nstmts = jl_array_nrows(stmts);
         aliasscopes.resize(nstmts + 1, nullptr);
         MDBuilder mbuilder(ctx.builder.getContext());
         MDNode *alias_domain = mbuilder.createAliasScopeDomain(ctx.name);
@@ -8004,23 +9044,12 @@ static jl_llvm_functions_t
 
     Instruction &prologue_end = ctx.builder.GetInsertBlock()->back();
 
-    // step 11a. For top-level code, load the world age
-    if (toplevel && !ctx.is_opaque_closure) {
-        LoadInst *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
-            prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
-        world->setOrdering(AtomicOrdering::Acquire);
-        ctx.builder.CreateAlignedStore(world, world_age_field, ctx.types().alignof_ptr);
-    }
-
-    // step 11b. Emit the entry safepoint
-    if (JL_FEAT_TEST(ctx, safepoint_on_entry))
-        emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
-
-    // step 11c. Do codegen in control flow order
-    std::vector<int> workstack;
-    std::map<int, BasicBlock*> BB;
-    std::map<size_t, BasicBlock*> come_from_bb;
+    // step 11. Do codegen in control flow order
+    SmallVector<int, 0> workstack;
+    DenseMap<size_t, BasicBlock*> BB;
+    DenseMap<size_t, BasicBlock*> come_from_bb;
     int cursor = 0;
+    int current_label = 0;
     auto find_next_stmt = [&] (int seq_next) {
         // new style ir is always in dominance order, but frontend IR might not be
         // `seq_next` is the next statement we want to emit
@@ -8037,6 +9066,7 @@ static jl_llvm_functions_t
             workstack.pop_back();
             auto nextbb = BB.find(item + 1);
             if (nextbb == BB.end()) {
+                // Not a BB
                 cursor = item;
                 return;
             }
@@ -8047,138 +9077,118 @@ static jl_llvm_functions_t
             seq_next = -1;
             // if this BB is non-empty, we've visited it before so skip it
             if (!nextbb->second->getTerminator()) {
+                // New BB
                 ctx.builder.SetInsertPoint(nextbb->second);
                 cursor = item;
+                current_label = item;
                 return;
             }
         }
         cursor = -1;
     };
 
+    // If a pkgimage or sysimage is being generated, disable tracking.
+    // This means sysimage build or pkgimage precompilation workloads aren't tracked.
     auto do_coverage = [&] (bool in_user_code, bool is_tracked) {
-        return (coverage_mode == JL_LOG_ALL ||
+        return (jl_generating_output() == 0 &&
+                (coverage_mode == JL_LOG_ALL ||
                 (in_user_code && coverage_mode == JL_LOG_USER) ||
-                (is_tracked && coverage_mode == JL_LOG_PATH));
+                (is_tracked && coverage_mode == JL_LOG_PATH)));
     };
     auto do_malloc_log = [&] (bool in_user_code, bool is_tracked) {
-        return (malloc_log_mode == JL_LOG_ALL ||
+        return (jl_generating_output() == 0 &&
+                (malloc_log_mode == JL_LOG_ALL ||
                 (in_user_code && malloc_log_mode == JL_LOG_USER) ||
-                (is_tracked && malloc_log_mode == JL_LOG_PATH));
+                (is_tracked && malloc_log_mode == JL_LOG_PATH)));
     };
-    std::vector<unsigned> current_lineinfo, new_lineinfo;
-    auto coverageVisitStmt = [&] (size_t dbg) {
-        if (dbg == 0 || dbg >= linetable.size())
-            return;
-        // Compute inlining stack for current line, inner frame first
-        while (dbg) {
-            new_lineinfo.push_back(dbg);
-            dbg = linetable.at(dbg).inlined_at;
-        }
+    auto coverageVisitStmt = [&] () {
         // Visit frames which differ from previous statement as tracked in
-        // current_lineinfo (tracked outer frame first).
-        current_lineinfo.resize(new_lineinfo.size(), 0);
+        // prev_lineinfo (tracked outer frame first).
+        size_t dbg;
         for (dbg = 0; dbg < new_lineinfo.size(); dbg++) {
-            unsigned newdbg = new_lineinfo[new_lineinfo.size() - dbg - 1];
-            if (newdbg != current_lineinfo[dbg]) {
-                current_lineinfo[dbg] = newdbg;
-                const auto &info = linetable.at(newdbg);
-                if (do_coverage(info.is_user_code, info.is_tracked))
-                    coverageVisitLine(ctx, info.file, info.line);
+            if (dbg >= prev_lineinfo.size() || !new_lineinfo[dbg].sameframe(prev_lineinfo[dbg]))
+                break;
+        }
+        for (; dbg < new_lineinfo.size(); dbg++) {
+            const auto &newdbg = new_lineinfo[dbg];
+            bool is_tracked = in_tracked_path(newdbg.file);
+            if (do_coverage(newdbg.is_user_code, is_tracked)) {
+                if (newdbg.line0 != 0 && (dbg >= prev_lineinfo.size() || newdbg.edgeid != prev_lineinfo[dbg].edgeid || newdbg.line0 != prev_lineinfo[dbg].line))
+                    coverageVisitLine(ctx, newdbg.file, newdbg.line0);
+                coverageVisitLine(ctx, newdbg.file, newdbg.line);
             }
         }
-        new_lineinfo.clear();
     };
-    auto mallocVisitStmt = [&] (unsigned dbg, Value *sync) {
-        if (!do_malloc_log(mod_is_user_mod, mod_is_tracked) || dbg == 0) {
+    auto mallocVisitStmt = [&] (Value *sync, bool have_dbg_update) {
+        if (!do_malloc_log(mod_is_user_mod, mod_is_tracked) || !have_dbg_update) {
+            // TODD: add || new_lineinfo[0].sameframe(prev_lineinfo[0])) above, but currently this breaks the test for it (by making an optimization better)
             if (do_malloc_log(true, mod_is_tracked) && sync)
                 ctx.builder.CreateCall(prepare_call(sync_gc_total_bytes_func), {sync});
             return;
         }
-        while (linetable.at(dbg).inlined_at)
-            dbg = linetable.at(dbg).inlined_at;
-        mallocVisitLine(ctx, ctx.file, linetable.at(dbg).line, sync);
+        mallocVisitLine(ctx, new_lineinfo[0].file, new_lineinfo[0].line, sync);
     };
     if (coverage_mode != JL_LOG_NONE) {
         // record all lines that could be covered
-        for (const auto &info : linetable)
-            if (do_coverage(info.is_user_code, info.is_tracked))
-                jl_coverage_alloc_line(info.file, info.line);
-    }
-
-    come_from_bb[0] = ctx.builder.GetInsertBlock();
-
-    // First go through and collect all branch targets, so we know where to
-    // split basic blocks.
-    std::set<int> branch_targets; // 1-indexed
-    {
-        for (size_t i = 0; i < stmtslen; ++i) {
-            jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
-            if (jl_is_gotoifnot(stmt)) {
-                int dest = jl_gotoifnot_label(stmt);
-                branch_targets.insert(dest);
-                // The next 1-indexed statement
-                branch_targets.insert(i + 2);
-            } else if (jl_is_returnnode(stmt)) {
-                // We don't do dead branch elimination before codegen
-                // so we need to make sure to start a BB after any
-                // return node, even if they aren't otherwise branch
-                // targets.
-                if (i + 2 <= stmtslen)
-                    branch_targets.insert(i + 2);
-            } else if (jl_is_expr(stmt)) {
-                if (((jl_expr_t*)stmt)->head == jl_enter_sym) {
-                    branch_targets.insert(i + 1);
-                    if (i + 2 <= stmtslen)
-                        branch_targets.insert(i + 2);
-                    int dest = jl_unbox_long(jl_array_ptr_ref(((jl_expr_t*)stmt)->args, 0));
-                    branch_targets.insert(dest);
-                }
-            } else if (jl_is_gotonode(stmt)) {
-                int dest = jl_gotonode_label(stmt);
-                branch_targets.insert(dest);
-                if (i + 2 <= stmtslen)
-                    branch_targets.insert(i + 2);
-            } else if (jl_is_phinode(stmt)) {
-                jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(stmt, 0);
-                for (size_t j = 0; j < jl_array_len(edges); ++j) {
-                    size_t edge = ((int32_t*)jl_array_data(edges))[j];
-                    if (edge == i)
-                        branch_targets.insert(i + 1);
+        std::function<void(jl_debuginfo_t *debuginfo, jl_value_t *func)> record_line_exists = [&](jl_debuginfo_t *debuginfo, jl_value_t *func) {
+            if (!jl_is_symbol(debuginfo->def)) // this is a path
+                func = debuginfo->def; // this is inlined
+            for (size_t i = 0; i < jl_svec_len(debuginfo->edges); i++) {
+                jl_debuginfo_t *edge = (jl_debuginfo_t*)jl_svecref(debuginfo->edges, i);
+                record_line_exists(edge, NULL);
+            }
+            while ((jl_value_t*)debuginfo->linetable != jl_nothing)
+                debuginfo = debuginfo->linetable;
+            jl_module_t *modu = func ? jl_debuginfo_module1(func) : NULL;
+            if (modu == NULL)
+                modu = ctx.module;
+            StringRef file = jl_debuginfo_file1(debuginfo);
+            if (file.empty())
+                file = "<missing>";
+            bool is_user_code;
+            if (modu == ctx.module)
+                is_user_code = mod_is_user_mod;
+            else
+                is_user_code = in_user_mod(modu);
+            bool is_tracked = in_tracked_path(file);
+            if (do_coverage(is_user_code, is_tracked)) {
+                for (size_t pc = 0; 1; pc++) {
+                    struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, pc);
+                    if (lineidx.line == -1)
+                        break;
+                    if (lineidx.line > 0)
+                        jl_coverage_alloc_line(file, lineidx.line);
                 }
             }
-        }
+        };
+        record_line_exists(src->debuginfo, (jl_value_t*)lam);
     }
 
+    come_from_bb[0] = ctx.builder.GetInsertBlock();
+
     for (int label : branch_targets) {
         BasicBlock *bb = BasicBlock::Create(ctx.builder.getContext(),
             "L" + std::to_string(label), f);
         BB[label] = bb;
     }
 
+    new_lineinfo.push_back(topinfo);
     Value *sync_bytes = nullptr;
     if (do_malloc_log(true, mod_is_tracked))
         sync_bytes = ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {});
-    { // coverage for the function definition line number
-        const auto &topinfo = linetable.at(0);
-        if (linetable.size() > 1) {
-            if (topinfo == linetable.at(1))
-                current_lineinfo.push_back(1);
-        }
-        if (do_coverage(topinfo.is_user_code, topinfo.is_tracked))
-            coverageVisitLine(ctx, topinfo.file, topinfo.line);
-    }
+    // coverage for the function definition line number (topinfo)
+    coverageVisitStmt();
 
     find_next_stmt(0);
     while (cursor != -1) {
-        int32_t debuginfoloc = ((int32_t*)jl_array_data(src->codelocs))[cursor];
-        if (debuginfoloc > 0) {
+        bool have_dbg_update = update_lineinfo(cursor);
+        if (have_dbg_update) {
             if (debug_enabled)
-                ctx.builder.SetCurrentDebugLocation(linetable.at(debuginfoloc).loc);
-            coverageVisitStmt(debuginfoloc);
+                ctx.builder.SetCurrentDebugLocation(new_lineinfo.back().loc);
+            coverageVisitStmt();
         }
         ctx.noalias().aliasscope.current = aliasscopes[cursor];
         jl_value_t *stmt = jl_array_ptr_ref(stmts, cursor);
-        jl_expr_t *expr = jl_is_expr(stmt) ? (jl_expr_t*)stmt : nullptr;
         if (jl_is_returnnode(stmt)) {
             jl_value_t *retexpr = jl_returnnode_value(stmt);
             if (retexpr == NULL) {
@@ -8231,7 +9241,7 @@ static jl_llvm_functions_t
                         // also need to account for the possibility the return object is boxed
                         // and avoid / skip copying it to the stack
                         isboxed_union = ctx.builder.CreateICmpNE(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                             ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                         data = ctx.builder.CreateSelect(isboxed_union, retvalinfo.Vboxed, data);
                     }
@@ -8240,7 +9250,7 @@ static jl_llvm_functions_t
                     // treat this as a simple boxed returninfo
                     //assert(retvalinfo.isboxed);
                     tindex = compute_tindex_unboxed(ctx, retvalinfo, jlrettype);
-                    tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                    tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
                     data = boxed(ctx, retvalinfo);
                     sret = NULL;
                 }
@@ -8254,37 +9264,39 @@ static jl_llvm_functions_t
                 break;
             }
             if (sret) {
-                if (retvalinfo.ispointer()) {
-                    if (returninfo.return_roots) {
-                        Type *store_ty = julia_type_to_llvm(ctx, retvalinfo.typ);
-                        emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
-                    }
+                Align align(returninfo.union_align);
+                if (!returninfo.return_roots && !retvalinfo.inline_roots.empty()) {
+                    assert(retvalinfo.V == nullptr);
+                    assert(returninfo.cc == jl_returninfo_t::SRet);
+                    split_value_into(ctx, retvalinfo, align, nullptr, align,
+                            jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), sret, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe));
+                }
+                else if (returninfo.return_roots) {
+                    assert(returninfo.cc == jl_returninfo_t::SRet);
+                    Value *return_roots = f->arg_begin() + 1;
+                    split_value_into(ctx, retvalinfo, align, sret, align,
+                            jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), return_roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe));
+                }
+                else if (retvalinfo.ispointer()) {
                     if (returninfo.cc == jl_returninfo_t::SRet) {
                         assert(jl_is_concrete_type(jlrettype));
                         emit_memcpy(ctx, sret, jl_aliasinfo_t::fromTBAA(ctx, nullptr), retvalinfo,
-                                    jl_datatype_size(jlrettype), julia_alignment(jlrettype));
+                                    jl_datatype_size(jlrettype), align, align);
                     }
                     else { // must be jl_returninfo_t::Union
                         emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union);
                     }
                 }
                 else {
-                    Type *store_ty = retvalinfo.V->getType();
-                    Type *dest_ty = store_ty->getPointerTo();
-                    Value *Val = retvalinfo.V;
-                    if (returninfo.return_roots) {
-                        assert(julia_type_to_llvm(ctx, retvalinfo.typ) == store_ty);
-                        emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
-                    }
-                    if (dest_ty != sret->getType())
-                        sret = emit_bitcast(ctx, sret, dest_ty);
-                    ctx.builder.CreateAlignedStore(Val, sret, Align(julia_alignment(retvalinfo.typ)));
+                    ctx.builder.CreateAlignedStore(retvalinfo.V, sret, align);
                     assert(retvalinfo.TIndex == NULL && "unreachable"); // unimplemented representation
                 }
             }
 
-            mallocVisitStmt(debuginfoloc, sync_bytes);
-            if (toplevel || ctx.is_opaque_closure)
+            mallocVisitStmt(sync_bytes, have_dbg_update);
+            // N.B.: For toplevel thunks, we expect world age restore to be handled
+            // by the interpreter which invokes us.
+            if (ctx.is_opaque_closure)
                 ctx.builder.CreateStore(last_age, world_age_field);
             assert(type_is_ghost(retty) || returninfo.cc == jl_returninfo_t::SRet ||
                 retval->getType() == ctx.f->getReturnType());
@@ -8295,7 +9307,12 @@ static jl_llvm_functions_t
         if (jl_is_gotonode(stmt)) {
             int lname = jl_gotonode_label(stmt);
             come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
-            ctx.builder.CreateBr(BB[lname]);
+            auto br = ctx.builder.CreateBr(BB[lname]);
+            // Check if backwards branch
+            if (ctx.LoopID && lname <= current_label) {
+                br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
+                ctx.LoopID = NULL;
+            }
             find_next_stmt(lname - 1);
             continue;
         }
@@ -8308,44 +9325,94 @@ static jl_llvm_functions_t
             jl_value_t *cond = jl_gotoifnot_cond(stmt);
             int lname = jl_gotoifnot_label(stmt);
             Value *isfalse = emit_condition(ctx, cond, "if");
-            mallocVisitStmt(debuginfoloc, nullptr);
+            mallocVisitStmt(nullptr, have_dbg_update);
             come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
             workstack.push_back(lname - 1);
             BasicBlock *ifnot = BB[lname];
             BasicBlock *ifso = BB[cursor+2];
+            Instruction *br;
             if (ifnot == ifso)
-                ctx.builder.CreateBr(ifnot);
+                br = ctx.builder.CreateBr(ifnot);
             else
-                ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
+                br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
+
+            // Check if backwards branch
+            if (ctx.LoopID && lname <= current_label) {
+                br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
+                ctx.LoopID = NULL;
+            }
             find_next_stmt(cursor + 1);
             continue;
         }
-        else if (expr && expr->head == jl_enter_sym) {
-            jl_value_t **args = (jl_value_t**)jl_array_data(expr->args);
-
-            assert(jl_is_long(args[0]));
-            int lname = jl_unbox_long(args[0]);
-            // Save exception stack depth at enter for use in pop_exception
-            Value *excstack_state =
-                ctx.builder.CreateCall(prepare_call(jl_excstack_state_func));
-            assert(!ctx.ssavalue_assigned.at(cursor));
-            ctx.SAvalues.at(cursor) = jl_cgval_t(excstack_state, (jl_value_t*)jl_ulong_type, NULL);
-            ctx.ssavalue_assigned.at(cursor) = true;
-            CallInst *sj = ctx.builder.CreateCall(prepare_call(except_enter_func));
-            // We need to mark this on the call site as well. See issue #6757
-            sj->setCanReturnTwice();
-            Value *isz = ctx.builder.CreateICmpEQ(sj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
-            BasicBlock *tryblk = BasicBlock::Create(ctx.builder.getContext(), "try", f);
-            BasicBlock *handlr = NULL;
-            handlr = BB[lname];
-            workstack.push_back(lname - 1);
-            come_from_bb[cursor + 1] = ctx.builder.GetInsertBlock();
-            ctx.builder.CreateCondBr(isz, tryblk, handlr);
-            ctx.builder.SetInsertPoint(tryblk);
+        else if (jl_is_enternode(stmt)) {
+            int lname = jl_enternode_catch_dest(stmt);
+            if (lname) {
+                // Save exception stack depth at enter for use in pop_exception
+                Value *excstack_state =
+                    ctx.builder.CreateCall(prepare_call(jl_excstack_state_func), {get_current_task(ctx)});
+                assert(!ctx.ssavalue_assigned[cursor]);
+                ctx.SAvalues[cursor] = jl_cgval_t(excstack_state, (jl_value_t*)jl_ulong_type, NULL);
+                ctx.ssavalue_assigned[cursor] = true;
+                // Actually enter the exception frame
+                auto ct = get_current_task(ctx);
+                auto *handler_sz64 = ConstantInt::get(Type::getInt64Ty(ctx.builder.getContext()),
+                  sizeof(jl_handler_t));
+                AllocaInst* ehbuff = emit_static_alloca(ctx, sizeof(jl_handler_t), Align(16));
+                ctx.eh_buffers[stmt] = ehbuff;
+                ctx.builder.CreateLifetimeStart(ehbuff, handler_sz64);
+                ctx.builder.CreateCall(prepare_call(jlenter_func), {ct, ehbuff});
+                CallInst *sj;
+                if (ctx.emission_context.TargetTriple.isOSWindows())
+                    sj = ctx.builder.CreateCall(prepare_call(setjmp_func), {ehbuff});
+                else
+                    sj = ctx.builder.CreateCall(prepare_call(setjmp_func), {ehbuff, ConstantInt::get(Type::getInt32Ty(ctx.builder.getContext()), 0)});
+                // We need to mark this on the call site as well. See issue #6757
+                sj->setCanReturnTwice();
+                Value *isz = ctx.builder.CreateICmpEQ(sj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+                BasicBlock *tryblk = BasicBlock::Create(ctx.builder.getContext(), "try", f);
+                BasicBlock *catchpop = BasicBlock::Create(ctx.builder.getContext(), "catch_enter", f);
+                BasicBlock *handlr = NULL;
+                handlr = BB[lname];
+                workstack.push_back(lname - 1);
+                come_from_bb[cursor + 1] = ctx.builder.GetInsertBlock();
+                ctx.builder.CreateCondBr(isz, tryblk, catchpop);
+                ctx.builder.SetInsertPoint(catchpop);
+                {
+                    ctx.builder.CreateCall(prepare_call(jlleave_func), {get_current_task(ctx), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1)});
+                    ctx.builder.CreateLifetimeEnd(ehbuff, handler_sz64);
+                    ctx.builder.CreateBr(handlr);
+                }
+                ctx.builder.SetInsertPoint(tryblk);
+                auto ehptr = emit_ptrgep(ctx, ct, offsetof(jl_task_t, eh));
+                ctx.builder.CreateAlignedStore(ehbuff, ehptr, ctx.types().alignof_ptr);
+            }
+            // For the two-arg version of :enter, twiddle the scope
+            if (jl_enternode_scope(stmt)) {
+                jl_cgval_t scope = emit_expr(ctx, jl_enternode_scope(stmt));
+                if (scope.typ == jl_bottom_type) {
+                    // Probably dead code, but let's be loud about it in case it isn't, so we fail
+                    // at the point of the miscompile, rather than later when something attempts to
+                    // read the scope.
+                    emit_error(ctx, "(INTERNAL ERROR - IR Validity): Attempted to execute EnterNode with bad scope");
+                    find_next_stmt(-1);
+                    continue;
+                }
+                Value *scope_boxed = boxed(ctx, scope);
+                Value *scope_ptr = get_scope_field(ctx);
+                LoadInst *current_scope = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, scope_ptr, ctx.types().alignof_ptr);
+                StoreInst *scope_store = ctx.builder.CreateAlignedStore(scope_boxed, scope_ptr, ctx.types().alignof_ptr);
+                jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe).decorateInst(current_scope);
+                jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe).decorateInst(scope_store);
+                // GC preserve the scope, since it is not rooted in the `jl_handler_t *`
+                // and may be removed from jl_current_task by any nested block and then
+                // replaced later
+                Value *scope_token = ctx.builder.CreateCall(prepare_call(gc_preserve_begin_func), {scope_boxed});
+                ctx.scope_restore[cursor] = std::make_pair(scope_token, current_scope);
+            }
         }
         else {
             emit_stmtpos(ctx, stmt, cursor);
-            mallocVisitStmt(debuginfoloc, nullptr);
+            mallocVisitStmt(nullptr, have_dbg_update);
         }
         find_next_stmt(cursor + 1);
     }
@@ -8361,21 +9428,22 @@ static jl_llvm_functions_t
 
     // Codegen Phi nodes
     std::map<std::pair<BasicBlock*, BasicBlock*>, BasicBlock*> BB_rewrite_map;
-    std::vector<llvm::PHINode*> ToDelete;
+    SmallVector<llvm::PHINode*, 0> ToDelete;
     for (auto &tup : ctx.PhiNodes) {
         jl_cgval_t phi_result;
         PHINode *VN;
         jl_value_t *r;
         AllocaInst *dest;
+        SmallVector<PHINode*,0> roots;
         BasicBlock *PhiBB;
-        std::tie(phi_result, PhiBB, dest, VN, r) = tup;
+        std::tie(phi_result, PhiBB, dest, VN, roots, r) = tup;
         jl_value_t *phiType = phi_result.typ;
         jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(r, 1);
         PHINode *TindexN = cast_or_null<PHINode>(phi_result.TIndex);
         DenseSet<BasicBlock*> preds;
-        for (size_t i = 0; i < jl_array_len(edges); ++i) {
-            size_t edge = ((int32_t*)jl_array_data(edges))[i];
+        for (size_t i = 0; i < jl_array_nrows(edges); ++i) {
+            size_t edge = jl_array_data(edges, int32_t)[i];
             jl_value_t *value = jl_array_ptr_ref(values, i);
             // This edge value is undef, handle it the same as if the edge wasn't listed at all
             if (!value)
@@ -8394,7 +9462,7 @@ static jl_llvm_functions_t
                 // Only codegen this branch once for each PHI (the expression must be the same on all branches)
 #ifndef NDEBUG
                 for (size_t j = 0; j < i; ++j) {
-                    size_t j_edge = ((int32_t*)jl_array_data(edges))[j];
+                    size_t j_edge = jl_array_data(edges, int32_t)[j];
                     if (j_edge == edge) {
                         assert(jl_egal(value, jl_array_ptr_ref(values, j)));
                     }
@@ -8408,9 +9476,7 @@ static jl_llvm_functions_t
                 // Can't use `llvm::SplitCriticalEdge` here because
                 // we may have invalid phi nodes in the destination.
                 BasicBlock *NewBB = BasicBlock::Create(terminator->getContext(),
-                   FromBB->getName() + "." + PhiBB->getName() + "_crit_edge");
-                Function::iterator FBBI = FromBB->getIterator();
-                ctx.f->getBasicBlockList().insert(++FBBI, NewBB); // insert after existing block
+                   FromBB->getName() + "." + PhiBB->getName() + "_crit_edge", FromBB->getParent(), FromBB->getNextNode()); // insert after existing block
                 terminator->replaceSuccessorWith(PhiBB, NewBB);
                 DebugLoc Loc = terminator->getDebugLoc();
                 terminator = BranchInst::Create(PhiBB);
@@ -8428,6 +9494,7 @@ static jl_llvm_functions_t
                 val = mark_julia_const(ctx, val.constant); // be over-conservative at making sure `.typ` is set concretely, not tindex
             if (!jl_is_uniontype(phiType) || !TindexN) {
                 if (VN) {
+                    assert(roots.empty() && !dest);
                     Value *V;
                     if (val.typ == (jl_value_t*)jl_bottom_type) {
                         V = undef_value_for_type(VN->getType());
@@ -8448,14 +9515,34 @@ static jl_llvm_functions_t
                     VN->addIncoming(V, ctx.builder.GetInsertBlock());
                     assert(!TindexN);
                 }
-                else if (dest && val.typ != (jl_value_t*)jl_bottom_type) {
+                else if ((dest || !roots.empty()) && val.typ != (jl_value_t*)jl_bottom_type) {
                     // must be careful to emit undef here (rather than a bitcast or
                     // load of val) if the runtime type of val isn't phiType
+                    auto tracked = split_value_size((jl_datatype_t*)phiType).second;
                     Value *isvalid = emit_isa_and_defined(ctx, val, phiType);
-                    emit_guarded_test(ctx, isvalid, nullptr, [&] {
-                        emit_unbox_store(ctx, update_julia_type(ctx, val, phiType), dest, ctx.tbaa().tbaa_stack, julia_alignment(phiType));
-                        return nullptr;
+                    assert(roots.size() == tracked && isvalid != nullptr);
+                    SmallVector<Value*,0> incomingroots(0);
+                    if (tracked)
+                        incomingroots.resize(tracked, Constant::getNullValue(ctx.types().T_prjlvalue));
+                    emit_guarded_test(ctx, isvalid, incomingroots, [&] {
+                        jl_cgval_t typedval = update_julia_type(ctx, val, phiType);
+                        SmallVector<Value*,0> mayberoots(tracked, Constant::getNullValue(ctx.types().T_prjlvalue));
+                        if (typedval.typ != jl_bottom_type) {
+                            Align align(julia_alignment(phiType));
+                            if (tracked)
+                                split_value_into(ctx, typedval, align, dest, align, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), mayberoots);
+                            else
+                                emit_unbox_store(ctx, typedval, dest, ctx.tbaa().tbaa_stack, align, align);
+                        }
+                        return mayberoots;
                     });
+                    for (size_t nr = 0; nr < tracked; nr++)
+                        roots[nr]->addIncoming(incomingroots[nr], ctx.builder.GetInsertBlock());
+                }
+                else if (!roots.empty()) {
+                    Value *V = Constant::getNullValue(ctx.types().T_prjlvalue);
+                    for (size_t nr = 0; nr < roots.size(); nr++)
+                        roots[nr]->addIncoming(V, ctx.builder.GetInsertBlock());
                 }
             }
             else {
@@ -8464,23 +9551,26 @@ static jl_llvm_functions_t
                 // `V` is always initialized when it is used.
                 // Ref https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96629
                 Value *V = nullptr;
+                assert(roots.empty());
                 if (val.typ == (jl_value_t*)jl_bottom_type) {
                     if (VN)
                         V = undef_value_for_type(VN->getType());
                     RTindex = UndefValue::get(getInt8Ty(ctx.builder.getContext()));
                 }
                 else if (jl_is_concrete_type(val.typ) || val.constant) {
-                    size_t tindex = get_box_tindex((jl_datatype_t*)val.typ, phiType);
+                    size_t tindex = get_box_tindex((jl_datatype_t*)(val.constant ? jl_typeof(val.constant) : val.typ), phiType);
                     if (tindex == 0) {
                         if (VN)
                             V = boxed(ctx, val);
-                        RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+                        RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
                     }
                     else {
                         if (VN)
                             V = Constant::getNullValue(ctx.types().T_prjlvalue);
-                        if (dest)
-                            emit_unbox_store(ctx, val, dest, ctx.tbaa().tbaa_stack, julia_alignment(val.typ));
+                        if (dest) {
+                            Align align(julia_alignment(val.typ));
+                            emit_unbox_store(ctx, val, dest, ctx.tbaa().tbaa_stack, align, align);
+                        }
                         RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex);
                     }
                 }
@@ -8496,7 +9586,7 @@ static jl_llvm_functions_t
                         if (dest) {
                             // If dest is not set, this is a ghost union, the recipient of which
                             // is often not prepared to handle a boxed representation of the ghost.
-                            RTindex = ctx.builder.CreateOr(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                            RTindex = ctx.builder.CreateOr(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
                         }
                         new_union.TIndex = RTindex;
                     }
@@ -8504,8 +9594,8 @@ static jl_llvm_functions_t
                         V = new_union.Vboxed ? new_union.Vboxed : Constant::getNullValue(ctx.types().T_prjlvalue);
                     if (dest) { // basically, if !ghost union
                         if (new_union.Vboxed != nullptr) {
-                            Value *isboxed = ctx.builder.CreateICmpNE( // if 0x80 is set, we won't select this slot anyways
-                                    ctx.builder.CreateAnd(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            Value *isboxed = ctx.builder.CreateICmpNE( // if UNION_BOX_MARKER is set, we won't select this slot anyways
+                                    ctx.builder.CreateAnd(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                             skip = skip ? ctx.builder.CreateOr(isboxed, skip) : isboxed;
                         }
@@ -8550,20 +9640,20 @@ static jl_llvm_functions_t
                 Value *undef = undef_value_for_type(VN->getType());
                 VN->addIncoming(undef, FromBB);
                 if (TindexN) // let the runtime / optimizer know this is unknown / boxed / null, so that it won't try to union_move / copy it later
-                    RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+                    RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
             }
             if (TindexN)
                 TindexN->addIncoming(RTindex, FromBB);
-            if (dest) {
+            if (dest)
                 ctx.builder.CreateLifetimeStart(dest);
-                if (CountTrackedPointers(dest->getAllocatedType()).count)
-                    ctx.builder.CreateStore(Constant::getNullValue(dest->getAllocatedType()), dest);
-            }
+            for (size_t nr = 0; nr < roots.size(); nr++)
+                roots[nr]->addIncoming(Constant::getNullValue(ctx.types().T_prjlvalue), FromBB);
             ctx.builder.ClearInsertionPoint();
         }
     }
 
     for (PHINode *PN : ToDelete) {
+        // This basic block is statically unreachable, thus so is this PHINode
         PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
         PN->eraseFromParent();
     }
@@ -8579,21 +9669,10 @@ static jl_llvm_functions_t
                     // make sure that anything we attempt to call has some inlining info, just in case optimization messed up
                     // (except if we know that it is an intrinsic used in our prologue, which should never have its own debug subprogram)
                     Function *F = call->getCalledFunction();
-                    if (!in_prologue || !F || !(F->isIntrinsic() || F->getName().startswith("julia.") || &I == restTuple)) {
+                    if (!in_prologue || !F || !(F->isIntrinsic() || F->getName().starts_with("julia.") || &I == restTuple)) {
                         I.setDebugLoc(topdebugloc);
                     }
                 }
-                if (toplevel && !ctx.is_opaque_closure && !in_prologue) {
-                    // we're at toplevel; insert an atomic barrier between every instruction
-                    // TODO: inference is invalid if this has any effect (which it often does)
-                    LoadInst *world = new LoadInst(ctx.types().T_size,
-                        prepare_global_in(jl_Module, jlgetworld_global), Twine(),
-                        /*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
-                    world->setOrdering(AtomicOrdering::Acquire);
-                    StoreInst *store_world = new StoreInst(world, world_age_field,
-                        /*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
-                    (void)store_world;
-                }
             }
             if (&I == &prologue_end)
                 in_prologue = false;
@@ -8604,15 +9683,18 @@ static jl_llvm_functions_t
 
     if (ctx.vaSlot > 0) {
         // remove VA allocation if we never referenced it
+        assert(ctx.slots[ctx.vaSlot].isSA && ctx.slots[ctx.vaSlot].isArgument);
         Instruction *root = cast_or_null<Instruction>(ctx.slots[ctx.vaSlot].boxroot);
         if (root) {
-            Instruction *store_value = NULL;
             bool have_real_use = false;
-            for (Use &U : root->uses()) {
-                User *RU = U.getUser();
+            for (User *RU : root->users()) {
                 if (StoreInst *SRU = dyn_cast<StoreInst>(RU)) {
-                    if (!store_value)
-                        store_value = dyn_cast<Instruction>(SRU->getValueOperand());
+                    assert(isa<ConstantPointerNull>(SRU->getValueOperand()) || SRU->getValueOperand() == restTuple);
+                    (void)SRU;
+                }
+                else if (MemSetInst *MSI = dyn_cast<MemSetInst>(RU)) {
+                    assert(MSI->getValue() == ctx.builder.getInt8(0));
+                    (void)MSI;
                 }
                 else if (isa<DbgInfoIntrinsic>(RU)) {
                 }
@@ -8624,22 +9706,21 @@ static jl_llvm_functions_t
                 }
             }
             if (!have_real_use) {
-                Instruction *use = NULL;
-                for (Use &U : root->uses()) {
-                    if (use) // erase after the iterator moves on
-                        use->eraseFromParent();
-                    User *RU = U.getUser();
-                    use = cast<Instruction>(RU);
+                for (User *RU : make_early_inc_range(root->users())) {
+                    // This is safe because it checked above that each User is known and has at most one Use of root
+                    cast<Instruction>(RU)->eraseFromParent();
                 }
-                if (use)
-                    use->eraseFromParent();
                 root->eraseFromParent();
-                assert(!store_value || store_value == restTuple);
                 restTuple->eraseFromParent();
             }
         }
     }
 
+    if (ctx.topalloca != ctx.pgcstack && ctx.topalloca->use_empty()) {
+        ctx.topalloca->eraseFromParent();
+        ctx.topalloca = nullptr;
+    }
+
     // link the dependent llvmcall modules, but switch their function's linkage to internal
     // so that they don't conflict when they show up in the execution engine.
     Linker L(*jl_Module);
@@ -8655,44 +9736,61 @@ static jl_llvm_functions_t
             jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
     }
 
-    // link in opaque closure modules
-    for (auto &TSMod : ctx.oc_modules) {
-        SmallVector<std::string, 1> Exports;
-        TSMod.withModuleDo([&](Module &Mod) {
-            for (const auto &F: Mod.functions())
-                if (!F.isDeclaration())
-                    Exports.push_back(F.getName().str());
-        });
-        jl_merge_module(TSM, std::move(TSMod));
-        for (auto FN: Exports)
-            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
-    }
-
     JL_GC_POP();
     return declarations;
 }
 
 // --- entry point ---
 
-void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL);
+jl_llvm_functions_t jl_emit_codedecls(
+        orc::ThreadSafeModule &M,
+        jl_code_instance_t *codeinst,
+        jl_codegen_params_t &params)
+{
+    jl_llvm_functions_t decls = {};
+    jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+    bool specsig, needsparams;
+    std::tie(specsig, needsparams) = uses_specsig(get_ci_abi(codeinst), mi, codeinst->rettype, params.params->prefer_specsig);
+    const char *name = name_from_method_instance(mi);
+    if (specsig)
+        raw_string_ostream(decls.functionObject) << "jfptr_" << name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+    else if (needsparams)
+        decls.functionObject = "jl_fptr_sparam";
+    else
+        decls.functionObject = "jl_fptr_args";
+    raw_string_ostream(decls.specFunctionObject) << (specsig ? "j_" : "j1_") << name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+    M.withModuleDo([&](Module &M) {
+            bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+            if (specsig) {
+                get_specsig_function(params, &M, nullptr, decls.specFunctionObject, get_ci_abi(codeinst), codeinst->rettype, is_opaque_closure);
+            }
+            else {
+                Function *f = Function::Create(needsparams ? JuliaType::get_jlfuncparams_ty(M.getContext()) : JuliaType::get_jlfunc_ty(M.getContext()),
+                                     GlobalVariable::ExternalLinkage,
+                                     decls.specFunctionObject, M);
+                jl_init_function(f, params);
+                f->setAttributes(AttributeList::get(M.getContext(), {get_func_attrs(M.getContext()), f->getAttributes()}));
+            }
+        });
+    return decls;
+}
 
-JL_GCC_IGNORE_START("-Wclobbered")
 jl_llvm_functions_t jl_emit_code(
         orc::ThreadSafeModule &m,
         jl_method_instance_t *li,
         jl_code_info_t *src,
-        jl_value_t *jlrettype,
+        jl_value_t *abi_at,
+        jl_value_t *abi_rt,
         jl_codegen_params_t &params)
 {
     JL_TIMING(CODEGEN, CODEGEN_LLVM);
     jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK);
-    // caller must hold codegen_lock
     jl_llvm_functions_t decls = {};
     assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache ||
         compare_cgparams(params.params, &jl_default_cgparams)) &&
         "functions compiled with custom codegen params must not be cached");
     JL_TRY {
-        decls = emit_function(m, li, src, jlrettype, params);
+        decls = emit_function(m, li, src, abi_at, abi_rt, params);
         auto stream = *jl_ExecutionEngine->get_dump_emitted_mi_name_stream();
         if (stream) {
             jl_printf(stream, "%s\t", decls.specFunctionObject.c_str());
@@ -8713,9 +9811,9 @@ jl_llvm_functions_t jl_emit_code(
         decls.functionObject = "";
         decls.specFunctionObject = "";
         jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error during compilation of %s:\n", mname.c_str());
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(jl_current_task));
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-        jlbacktrace(); // written to STDERR_FILENO
+        jl_fprint_backtrace(ios_safe_stderr);
     }
 
     return decls;
@@ -8723,18 +9821,21 @@ jl_llvm_functions_t jl_emit_code(
 
 static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codegen_params_t &params, jl_method_instance_t *mi, jl_value_t *rettype)
 {
-    Module *M = m.getModuleUnlocked();
-    jl_codectx_t ctx(M->getContext(), params);
-    ctx.name = M->getModuleIdentifier().data();
-    std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple);
     jl_llvm_functions_t declarations;
     declarations.functionObject = "jl_f_opaque_closure_call";
-    if (uses_specsig(mi->specTypes, false, true, rettype, true)) {
-        jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
+    if (uses_specsig(mi->specTypes, false, rettype, true)) {
+        // context lock is held by params
+        Module *M = m.getModuleUnlocked();
+        jl_codectx_t ctx(M->getContext(), params, 0, 0);
+        ctx.name = M->getModuleIdentifier().data();
+        std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple);
+        jl_returninfo_t returninfo = get_specsig_function(params, M, NULL, funcName, mi->specTypes, rettype, true);
         Function *gf_thunk = cast<Function>(returninfo.decl.getCallee());
-        jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
+        jl_init_function(gf_thunk, ctx.emission_context);
         size_t nrealargs = jl_nparams(mi->specTypes);
-        emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, mi->specTypes, rettype, true, nrealargs, ctx.emission_context);
+        emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots,
+                mi->specTypes, rettype, true, nrealargs, ctx.emission_context,
+                prepare_call_in(gf_thunk->getParent(), jlopaque_closure_call_func)); // TODO: this could call emit_oc_call directly
         declarations.specFunctionObject = funcName;
     }
     return declarations;
@@ -8747,286 +9848,122 @@ jl_llvm_functions_t jl_emit_codeinst(
         jl_codegen_params_t &params)
 {
     JL_TIMING(CODEGEN, CODEGEN_Codeinst);
-    jl_timing_show_method_instance(codeinst->def, JL_TIMING_DEFAULT_BLOCK);
-    JL_GC_PUSH1(&src);
+    jl_timing_show_method_instance(jl_get_ci_mi(codeinst), JL_TIMING_DEFAULT_BLOCK);
+    jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
     if (!src) {
-        src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-        jl_method_t *def = codeinst->def->def.method;
-        // Check if this is the generic method for opaque closure wrappers -
-        // if so, generate the specsig -> invoke converter.
-        if (def == jl_opaque_closure_method) {
-            JL_GC_POP();
-            return jl_emit_oc_wrapper(m, params, codeinst->def, codeinst->rettype);
-        }
-        if (src && (jl_value_t*)src != jl_nothing && jl_is_method(def))
-            src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src);
-        if (!src || !jl_is_code_info(src)) {
-            JL_GC_POP();
-            m = orc::ThreadSafeModule();
-            return jl_llvm_functions_t(); // failed
-        }
-    }
-    jl_llvm_functions_t decls = jl_emit_code(m, codeinst->def, src, codeinst->rettype, params);
-
-    const std::string &specf = decls.specFunctionObject;
-    const std::string &f = decls.functionObject;
-    if (params.cache && !f.empty()) {
-        // Prepare debug info to receive this function
-        // record that this function name came from this linfo,
-        // so we can build a reverse mapping for debug-info.
-        bool toplevel = !jl_is_method(codeinst->def->def.method);
-        if (!toplevel) {
-            //Safe b/c params holds context lock
-            const DataLayout &DL = m.getModuleUnlocked()->getDataLayout();
-            // but don't remember toplevel thunks because
-            // they may not be rooted in the gc for the life of the program,
-            // and the runtime doesn't notify us when the code becomes unreachable :(
-            if (!specf.empty())
-                jl_add_code_in_flight(specf, codeinst, DL);
-            if (!f.empty() && f != "jl_fptr_args" && f != "jl_fptr_sparam")
-                jl_add_code_in_flight(f, codeinst, DL);
-        }
-
-        if (params.world) {// don't alter `inferred` when the code is not directly being used
-            jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
-            // don't change inferred state
-            if (inferred) {
-                jl_method_t *def = codeinst->def->def.method;
-                if (// keep code when keeping everything
-                    !(JL_DELETE_NON_INLINEABLE) ||
-                    // aggressively keep code when debugging level >= 2
-                    // note that this uses the global jl_options.debug_level, not the local emission_ctx.debug_level
-                    jl_options.debug_level > 1) {
-                    // update the stored code
-                    if (inferred != (jl_value_t*)src) {
-                        if (jl_is_method(def)) {
-                            src = (jl_code_info_t*)jl_compress_ir(def, src);
-                            assert(jl_is_string(src));
-                            codeinst->relocatability = jl_string_data(src)[jl_string_len(src)-1];
-                        }
-                        jl_atomic_store_release(&codeinst->inferred, (jl_value_t*)src);
-                        jl_gc_wb(codeinst, src);
-                    }
-                }
-                else if (jl_is_method(def)) {// don't delete toplevel code
-                    if (// and there is something to delete (test this before calling jl_ir_inlining_cost)
-                            inferred != jl_nothing &&
-                            // don't delete inlineable code, unless it is constant
-                            (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr ||
-                                (jl_ir_inlining_cost(inferred) == UINT16_MAX)) &&
-                            // don't delete code when generating a precompile file
-                            !(params.imaging || jl_options.incremental)) {
-                        // if not inlineable, code won't be needed again
-                        jl_atomic_store_release(&codeinst->inferred, jl_nothing);
-                    }
-                }
-            }
+        // Assert that this this is the generic method for opaque closure wrappers:
+        // this signals to instead compile specptr such that it holds the specptr -> invoke wrapper
+        // to satisfy the dispatching implementation requirements of jl_f_opaque_closure_call
+        if (mi->def.method == jl_opaque_closure_method) {
+            return jl_emit_oc_wrapper(m, params, mi, codeinst->rettype);
         }
+        m = orc::ThreadSafeModule();
+        return jl_llvm_functions_t(); // user error
     }
-    JL_GC_POP();
+    //assert(jl_egal((jl_value_t*)jl_atomic_load_relaxed(&codeinst->debuginfo), (jl_value_t*)src->debuginfo) && "trying to generate code for a codeinst for an incompatible src");
+    jl_llvm_functions_t decls = jl_emit_code(m, mi, src, get_ci_abi(codeinst), codeinst->rettype, params);
     return decls;
 }
 
+/// Stolen from IRMover.cpp, since it is needlessly private there
+void linkFunctionBody(Function &Dst, Function &Src)
+{
+    assert(Dst.isDeclaration() && !Src.isDeclaration());
 
-void jl_compile_workqueue(
-    jl_workqueue_t &emitted,
-    Module &original,
-    jl_codegen_params_t &params, CompilationPolicy policy)
-{
-    JL_TIMING(CODEGEN, CODEGEN_Workqueue);
-    jl_code_info_t *src = NULL;
-    JL_GC_PUSH1(&src);
-    while (!params.workqueue.empty()) {
-        jl_code_instance_t *codeinst;
-        Function *protodecl;
-        jl_returninfo_t::CallingConv proto_cc;
-        bool proto_specsig;
-        unsigned proto_return_roots;
-        auto it = params.workqueue.back();
-        codeinst = it.first;
-        std::tie(proto_cc, proto_return_roots, protodecl, proto_specsig) = it.second;
-        params.workqueue.pop_back();
-        // try to emit code for this item from the workqueue
-        assert(codeinst->min_world <= params.world && codeinst->max_world >= params.world &&
-            "invalid world for code-instance");
-        StringRef preal_decl = "";
-        bool preal_specsig = false;
-        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
-        bool cache_valid = params.cache;
-        // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-        if (cache_valid && invoke != NULL) {
-            auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-            if (fptr) {
-                while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
-                    jl_cpu_pause();
-                }
-                // in case we are racing with another thread that is emitting this function
-                invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-            }
-            if (invoke == jl_fptr_args_addr) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
-            }
-            else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
-                preal_specsig = true;
-            }
+    // Link in the operands without remapping.
+    if (Src.hasPrefixData())
+        Dst.setPrefixData(Src.getPrefixData());
+    if (Src.hasPrologueData())
+        Dst.setPrologueData(Src.getPrologueData());
+    if (Src.hasPersonalityFn())
+        Dst.setPersonalityFn(Src.getPersonalityFn());
+    if (Src.hasPersonalityFn())
+        Dst.setPersonalityFn(Src.getPersonalityFn());
+    assert(Src.IsNewDbgInfoFormat == Dst.IsNewDbgInfoFormat);
+
+    // Copy over the metadata attachments without remapping.
+    Dst.copyMetadata(&Src, 0);
+
+    // Steal arguments and splice the body of Src into Dst.
+    Dst.stealArgumentListFrom(Src);
+    Dst.splice(Dst.end(), &Src);
+}
+
+void emit_always_inline(orc::ThreadSafeModule &result_m, jl_codegen_params_t &params) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
+{
+    while (true) {
+        SmallVector<jl_workqueue_t::value_type> always_inline;
+        for (auto &it : params.workqueue) {
+            if (it.second.private_linkage && it.second.decl->isDeclaration())
+                always_inline.push_back(it);
+            it.second.private_linkage = false;
         }
-        else {
-            auto &result = emitted[codeinst];
-            jl_llvm_functions_t *decls = NULL;
-            if (std::get<0>(result)) {
-                decls = &std::get<1>(result);
-            }
-            else {
-                // Reinfer the function. The JIT came along and removed the inferred
-                // method body. See #34993
-                if (policy != CompilationPolicy::Default &&
-                    jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
-                    src = jl_type_infer(codeinst->def, jl_atomic_load_acquire(&jl_world_counter), 0);
-                    if (src) {
-                        orc::ThreadSafeModule result_m =
-                        jl_create_ts_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, params.imaging,
-                            original.getDataLayout(), Triple(original.getTargetTriple()));
-                        result.second = jl_emit_code(result_m, codeinst->def, src, src->rettype, params);
-                        result.first = std::move(result_m);
+        if (always_inline.empty())
+            return;
+        jl_task_t *ct = jl_current_task;
+        int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); // codegen may contain safepoints (such as jl_subtype calls)
+        jl_code_info_t *src = nullptr;
+        params.safepoint_on_entry = false;
+        params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+        JL_GC_PUSH2(&params.temporary_roots, &src);
+        for (auto &it : always_inline) {
+            jl_code_instance_t *codeinst = it.first;
+            auto &proto = it.second;
+            Function *decl = proto.decl;
+            if (decl->isDeclaration()) {
+                src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
+                jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+                jl_method_t *def = mi->def.method;
+                if (src && jl_is_string((jl_value_t*)src) && jl_is_method(def) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX)
+                    src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src);
+                if (src && jl_is_code_info(src) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) {
+                    jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints
+                    if (!result_m)
+                        break;
+                    // TODO: jl_optimize_roots(params, mi, *result_m.getModuleUnlocked()); // contains safepoints
+                    Module &M = *result_m.getModuleUnlocked();
+                    if (decls.functionObject != "jl_fptr_args" &&
+                        decls.functionObject != "jl_fptr_sparam" &&
+                        decls.functionObject != "jl_f_opaque_closure_call") {
+                        Function *F = M.getFunction(decls.functionObject);
+                        F->eraseFromParent();
+                    }
+                    if (!decls.specFunctionObject.empty()) {
+                        Function *specF = M.getFunction(decls.specFunctionObject);
+                        linkFunctionBody(*decl, *specF);
+                        decl->addFnAttr(Attribute::InlineHint);
+                        decl->setLinkage(proto.external_linkage ? GlobalValue::AvailableExternallyLinkage : GlobalValue::PrivateLinkage);
+                        specF->eraseFromParent();
                     }
                 }
-                else {
-                    orc::ThreadSafeModule result_m =
-                        jl_create_ts_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, params.imaging,
-                            original.getDataLayout(), Triple(original.getTargetTriple()));
-                    result.second = jl_emit_codeinst(result_m, codeinst, NULL, params);
-                    result.first = std::move(result_m);
-                }
-                if (std::get<0>(result))
-                    decls = &std::get<1>(result);
-                else
-                    emitted.erase(codeinst); // undo the insert above
-            }
-            if (decls) {
-                if (decls->functionObject == "jl_fptr_args") {
-                    preal_decl = decls->specFunctionObject;
-                }
-                else if (decls->functionObject != "jl_fptr_sparam") {
-                    preal_decl = decls->specFunctionObject;
-                    preal_specsig = true;
-                }
-            }
-        }
-        // patch up the prototype we emitted earlier
-        Module *mod = protodecl->getParent();
-        assert(protodecl->isDeclaration());
-        if (proto_specsig) {
-            // expected specsig
-            if (!preal_specsig) {
-                // emit specsig-to-(jl)invoke conversion
-                Function *preal = emit_tojlinvoke(codeinst, mod, params);
-                protodecl->setLinkage(GlobalVariable::InternalLinkage);
-                //protodecl->setAlwaysInline();
-                jl_init_function(protodecl, params.TargetTriple);
-                size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed
-                // TODO: maybe this can be cached in codeinst->specfptr?
-                emit_cfunc_invalidate(protodecl, proto_cc, proto_return_roots, codeinst->def->specTypes, codeinst->rettype, false, nrealargs, params, preal);
-                preal_decl = ""; // no need to fixup the name
-            }
-            else {
-                assert(!preal_decl.empty());
-            }
-        }
-        else {
-            // expected non-specsig
-            if (preal_decl.empty() || preal_specsig) {
-                // emit jlcall1-to-(jl)invoke conversion
-                preal_decl = emit_tojlinvoke(codeinst, mod, params)->getName();
-            }
-        }
-        if (!preal_decl.empty()) {
-            // merge and/or rename this prototype to the real function
-            if (Value *specfun = mod->getNamedValue(preal_decl)) {
-                if (protodecl != specfun)
-                    protodecl->replaceAllUsesWith(specfun);
-            }
-            else {
-                protodecl->setName(preal_decl);
             }
         }
+        params.temporary_roots = nullptr;
+        JL_GC_POP();
+        jl_gc_unsafe_leave(ct->ptls, gc_state);
     }
-    JL_GC_POP();
 }
 
-
 // --- initialization ---
-std::vector<std::pair<jl_value_t**, JuliaVariable*>> gv_for_global;
+static auto gv_for_global = new SmallVector<std::pair<jl_value_t**, JuliaVariable*>, 0>();
 static void global_jlvalue_to_llvm(JuliaVariable *var, jl_value_t **addr)
 {
-    gv_for_global.push_back(std::make_pair(addr, var));
+    gv_for_global->push_back(std::make_pair(addr, var));
 }
 static JuliaVariable *julia_const_gv(jl_value_t *val)
 {
-    for (auto &kv : gv_for_global) {
+    for (auto &kv : *gv_for_global) {
         if (*kv.first == val)
             return kv.second;
     }
     return nullptr;
 }
 
-// Handle FLOAT16 ABI v2
-#if JULIA_FLOAT16_ABI == 2
-static void makeCastCall(Module &M, StringRef wrapperName, StringRef calledName, FunctionType *FTwrapper, FunctionType *FTcalled, bool external)
-{
-    Function *calledFun = M.getFunction(calledName);
-    if (!calledFun) {
-        calledFun = Function::Create(FTcalled, Function::ExternalLinkage, calledName, M);
-    }
-    auto linkage = external ? Function::ExternalLinkage : Function::InternalLinkage;
-    auto wrapperFun = Function::Create(FTwrapper, linkage, wrapperName, M);
-    wrapperFun->addFnAttr(Attribute::AlwaysInline);
-    llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", wrapperFun));
-    SmallVector<Value *, 4> CallArgs;
-    if (wrapperFun->arg_size() != calledFun->arg_size()){
-        llvm::errs() << "FATAL ERROR: Can't match wrapper to called function";
-        abort();
-    }
-    for (auto wrapperArg = wrapperFun->arg_begin(), calledArg = calledFun->arg_begin();
-            wrapperArg != wrapperFun->arg_end() && calledArg != calledFun->arg_end(); ++wrapperArg, ++calledArg)
-    {
-        CallArgs.push_back(builder.CreateBitCast(wrapperArg, calledArg->getType()));
-    }
-    auto val = builder.CreateCall(calledFun, CallArgs);
-    auto retval = builder.CreateBitCast(val,wrapperFun->getReturnType());
-    builder.CreateRet(retval);
-}
-
-void emitFloat16Wrappers(Module &M, bool external)
-{
-    auto &ctx = M.getContext();
-    makeCastCall(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee", FunctionType::get(Type::getFloatTy(ctx), { Type::getHalfTy(ctx) }, false),
-                FunctionType::get(Type::getFloatTy(ctx), { Type::getInt16Ty(ctx) }, false), external);
-    makeCastCall(M, "__extendhfsf2", "julia__gnu_h2f_ieee", FunctionType::get(Type::getFloatTy(ctx), { Type::getHalfTy(ctx) }, false),
-                FunctionType::get(Type::getFloatTy(ctx), { Type::getInt16Ty(ctx) }, false), external);
-    makeCastCall(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee", FunctionType::get(Type::getHalfTy(ctx), { Type::getFloatTy(ctx) }, false),
-                FunctionType::get(Type::getInt16Ty(ctx), { Type::getFloatTy(ctx) }, false), external);
-    makeCastCall(M, "__truncsfhf2", "julia__gnu_f2h_ieee", FunctionType::get(Type::getHalfTy(ctx), { Type::getFloatTy(ctx) }, false),
-                FunctionType::get(Type::getInt16Ty(ctx), { Type::getFloatTy(ctx) }, false), external);
-    makeCastCall(M, "__truncdfhf2", "julia__truncdfhf2", FunctionType::get(Type::getHalfTy(ctx), { Type::getDoubleTy(ctx) }, false),
-                FunctionType::get(Type::getInt16Ty(ctx), { Type::getDoubleTy(ctx) }, false), external);
-}
-
-static void init_f16_funcs(void)
-{
-    auto ctx = jl_ExecutionEngine->acquireContext();
-    auto TSM =  jl_create_ts_module("F16Wrappers", ctx, imaging_default());
-    auto aliasM = TSM.getModuleUnlocked();
-    emitFloat16Wrappers(*aliasM, true);
-    jl_ExecutionEngine->addModule(std::move(TSM));
-}
-#endif
-
 static void init_jit_functions(void)
 {
-    add_named_global(jlsmall_typeof_var, &small_typeof);
+    add_named_global("jl_fptr_args", jl_fptr_args_addr);
+    add_named_global("jl_fptr_sparam", jl_fptr_sparam_addr);
+    add_named_global(jl_small_typeof_var, &jl_small_typeof);
     add_named_global(jlstack_chk_guard_var, &__stack_chk_guard);
     add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle);
     add_named_global(jlexe_var, &jl_exe_handle);
@@ -9060,21 +9997,20 @@ static void init_jit_functions(void)
     add_named_global(memcmp_func, &memcmp);
     add_named_global(jltypeerror_func, &jl_type_error);
     add_named_global(jlcheckassign_func, &jl_checked_assignment);
-    add_named_global(jldeclareconst_func, &jl_declare_constant);
-    add_named_global(jlgetbindingorerror_func, &jl_get_binding_or_error);
-    add_named_global(jlgetbindingwrorerror_func, &jl_get_binding_wr);
+    add_named_global(jlcheckbpwritable_func, &jl_check_binding_currently_writable);
     add_named_global(jlboundp_func, &jl_boundp);
-    for (auto it : builtin_func_map())
-        add_named_global(it.second, it.first);
+    for (int i = 0; i < jl_n_builtins; i++)
+        add_named_global(jl_builtin_f_names[i], jl_builtin_f_addrs[i]);
     add_named_global(jlapplygeneric_func, &jl_apply_generic);
     add_named_global(jlinvoke_func, &jl_invoke);
     add_named_global(jltopeval_func, &jl_toplevel_eval);
     add_named_global(jlcopyast_func, &jl_copy_ast);
     //add_named_global(jlnsvec_func, &jl_svec);
     add_named_global(jlmethod_func, &jl_method_def);
-    add_named_global(jlgenericfunction_func, &jl_generic_function_def);
+    add_named_global(jlgenericfunction_func, &jl_declare_const_gf);
     add_named_global(jlenter_func, &jl_enter_handler);
     add_named_global(jl_current_exception_func, &jl_current_exception);
+    add_named_global(jlleave_noexcept_func, &jl_pop_handler_noexcept);
     add_named_global(jlleave_func, &jl_pop_handler);
     add_named_global(jl_restore_excstack_func, &jl_restore_excstack);
     add_named_global(jl_excstack_state_func, &jl_excstack_state);
@@ -9084,35 +10020,41 @@ static void init_jit_functions(void)
     add_named_global(jltypeassert_func, &jl_typeassert);
     add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);
     add_named_global(jl_object_id__func, &jl_object_id_);
+    add_named_global(jl_alloc_genericmemory_unchecked_func, &jl_alloc_genericmemory_unchecked);
     add_named_global(jl_alloc_obj_func, (void*)NULL);
     add_named_global(jl_newbits_func, (void*)jl_new_bits);
-    add_named_global(jl_loopinfo_marker_func, (void*)NULL);
     add_named_global(jl_typeof_func, (void*)NULL);
     add_named_global(jl_write_barrier_func, (void*)NULL);
     add_named_global(jldlsym_func, &jl_load_and_lookup);
     add_named_global("jl_adopt_thread", &jl_adopt_thread);
     add_named_global(jlgetcfunctiontrampoline_func, &jl_get_cfunction_trampoline);
     add_named_global(jlgetnthfieldchecked_func, &jl_get_nth_field_checked);
+    add_named_global(jlfieldindex_func, &jl_field_index);
     add_named_global(diff_gc_total_bytes_func, &jl_gc_diff_total_bytes);
     add_named_global(sync_gc_total_bytes_func, &jl_gc_sync_total_bytes);
-    add_named_global(jlarray_data_owner_func, &jl_array_data_owner);
+    add_named_global(jl_allocgenericmemory, &jl_alloc_genericmemory);
     add_named_global(gcroot_flush_func, (void*)NULL);
     add_named_global(gc_preserve_begin_func, (void*)NULL);
     add_named_global(gc_preserve_end_func, (void*)NULL);
     add_named_global(pointer_from_objref_func, (void*)NULL);
-    add_named_global(except_enter_func, (void*)NULL);
     add_named_global(julia_call, (void*)NULL);
     add_named_global(julia_call2, (void*)NULL);
+    add_named_global(jllockvalue_func, &jl_lock_value);
+    add_named_global(jlunlockvalue_func, &jl_unlock_value);
+    add_named_global(jllockfield_func, &jl_lock_field);
+    add_named_global(jlunlockfield_func, &jl_unlock_field);
+    add_named_global(jlgetabiconverter_func, &jl_get_abi_converter);
 
 #ifdef _OS_WINDOWS_
 #if defined(_CPU_X86_64_)
-#if defined(_COMPILER_GCC_)
+    add_named_global("__julia_personality", &__julia_personality);
+#if defined(__MINGW32__)
     add_named_global("___chkstk_ms", &___chkstk_ms);
 #else
     add_named_global("__chkstk", &__chkstk);
 #endif
 #else
-#if defined(_COMPILER_GCC_)
+#if defined(__MINGW32__)
     add_named_global("_alloca", &_alloca);
 #else
     add_named_global("_chkstk", &_chkstk);
@@ -9131,7 +10073,7 @@ static void init_jit_functions(void)
 }
 
 #ifdef JL_USE_INTEL_JITEVENTS
-char jl_using_intel_jitevents; // Non-zero if running under Intel VTune Amplifier
+char jl_using_intel_jitevents = 0; // Non-zero if running under Intel VTune Amplifier
 #endif
 
 #ifdef JL_USE_OPROFILE_JITEVENTS
@@ -9142,12 +10084,13 @@ char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile
 char jl_using_perf_jitevents = 0;
 #endif
 
+int jl_is_timing_passes = 0;
+
 extern "C" void jl_init_llvm(void)
 {
     jl_page_size = jl_getpagesize();
-    jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
-    jl_default_cgparams.generic_context = jl_nothing;
-
+    jl_default_debug_info_kind = jl_default_cgparams.debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
+    jl_default_cgparams.debug_info_level = (int) jl_options.debug_level;
     InitializeNativeTarget();
     InitializeNativeTargetAsmPrinter();
     InitializeNativeTargetAsmParser();
@@ -9156,16 +10099,13 @@ extern "C" void jl_init_llvm(void)
     // Initialize passes
     PassRegistry &Registry = *PassRegistry::getPassRegistry();
     initializeCore(Registry);
-#if JL_LLVM_VERSION < 150000
-    initializeCoroutines(Registry);
-#endif
     initializeScalarOpts(Registry);
     initializeVectorization(Registry);
     initializeAnalysis(Registry);
     initializeTransformUtils(Registry);
     initializeInstCombine(Registry);
-    initializeAggressiveInstCombine(Registry);
-    initializeInstrumentation(Registry);
+    // TODO: initializeAggressiveInstCombine(Registry);
+    // TODO: initializeInstrumentation(Registry);
     initializeTarget(Registry);
 #ifdef USE_POLLY
     polly::initializePollyPasses(Registry);
@@ -9181,28 +10121,19 @@ extern "C" void jl_init_llvm(void)
     clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922
     if (clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "0", 1);
-#ifdef JL_USE_NEW_PM
     // For parity with LoopUnswitch
     clopt = llvmopts.lookup("unswitch-threshold");
     if (clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "100", 1);
-#endif
     // if the patch adding this option has been applied, lower its limit to provide
     // better DAGCombiner performance.
     clopt = llvmopts.lookup("combiner-store-merge-dependence-limit");
     if (clopt && clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "4", 1);
 
-#if JL_LLVM_VERSION >= 150000
-    clopt = llvmopts.lookup("opaque-pointers");
-    if (clopt && clopt->getNumOccurrences() == 0) {
-#ifdef JL_LLVM_OPAQUE_POINTERS
-        cl::ProvidePositionalOption(clopt, "true", 1);
-#else
-        cl::ProvidePositionalOption(clopt, "false", 1);
-#endif
-    }
-#endif
+    clopt = llvmopts.lookup("time-passes");
+    if (clopt && clopt->getNumOccurrences() > 0)
+        jl_is_timing_passes = 1;
 
     jl_ExecutionEngine = new JuliaOJIT();
 
@@ -9221,14 +10152,19 @@ extern "C" void jl_init_llvm(void)
 #if defined(JL_USE_INTEL_JITEVENTS) || \
     defined(JL_USE_OPROFILE_JITEVENTS) || \
     defined(JL_USE_PERF_JITEVENTS)
-#ifdef JL_USE_JITLINK
-#pragma message("JIT profiling support (JL_USE_*_JITEVENTS) not yet available on platforms that use JITLink")
-#else
     const char *jit_profiling = getenv("ENABLE_JITPROFILING");
 
 #if defined(JL_USE_INTEL_JITEVENTS)
-    if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_intel_jitevents = 1;
+    if (jit_profiling) {
+        if (atoi(jit_profiling)) {
+            jl_using_intel_jitevents = 1;
+        }
+    } else {
+#ifdef USE_ITTAPI
+        __itt_collection_state state = __itt_get_collection_state();
+        jl_using_intel_jitevents = state == __itt_collection_init_successful ||
+                                   state == __itt_collection_collector_exists;
+#endif
     }
 #endif
 
@@ -9240,24 +10176,23 @@ extern "C" void jl_init_llvm(void)
 
 #if defined(JL_USE_PERF_JITEVENTS)
     if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_perf_jitevents= 1;
+        jl_using_perf_jitevents = 1;
     }
 #endif
 
 #ifdef JL_USE_INTEL_JITEVENTS
     if (jl_using_intel_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createIntelJITEventListener());
+        jl_ExecutionEngine->enableIntelJITEventListener();
 #endif
 
 #ifdef JL_USE_OPROFILE_JITEVENTS
     if (jl_using_oprofile_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createOProfileJITEventListener());
+        jl_ExecutionEngine->enableOProfileJITEventListener();
 #endif
 
 #ifdef JL_USE_PERF_JITEVENTS
     if (jl_using_perf_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener());
-#endif
+        jl_ExecutionEngine->enablePerfJITEventListener();
 #endif
 #endif
 
@@ -9269,9 +10204,6 @@ extern "C" JL_DLLEXPORT_CODEGEN void jl_init_codegen_impl(void)
     jl_init_llvm();
     // Now that the execution engine exists, initialize all modules
     init_jit_functions();
-#if JULIA_FLOAT16_ABI == 2
-    init_f16_funcs();
-#endif
 }
 
 extern "C" JL_DLLEXPORT_CODEGEN void jl_teardown_codegen_impl() JL_NOTSAFEPOINT
@@ -9319,17 +10251,29 @@ namespace llvm {
     class MachineBasicBlock;
     class MachineFunction;
     raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB);
+#if JL_LLVM_VERSION >= 200000
+    void printMIR(raw_ostream &OS, const MachineModuleInfo &MMI,
+                const MachineFunction &MF);
+#else
     void printMIR(raw_ostream &OS, const MachineFunction &MF);
+#endif
 }
 extern "C" void jl_dump_llvm_mbb(void *v)
 {
     errs() << *(llvm::MachineBasicBlock*)v;
 }
+#if JL_LLVM_VERSION >= 200000
+extern "C" void jl_dump_llvm_mfunction(void *m, void *v)
+{
+    llvm::printMIR(errs(), *(llvm::MachineModuleInfo*)v,
+                *(llvm::MachineFunction*)v);
+}
+#else
 extern "C" void jl_dump_llvm_mfunction(void *v)
 {
     llvm::printMIR(errs(), *(llvm::MachineFunction*)v);
 }
-
+#endif
 
 extern void jl_write_bitcode_func(void *F, char *fname) {
     std::error_code EC;
diff --git a/src/common_symbols1.inc b/src/common_symbols1.inc
index 547d5d0eabede..3dfcf17a07b5c 100644
--- a/src/common_symbols1.inc
+++ b/src/common_symbols1.inc
@@ -1,98 +1,90 @@
 jl_symbol("="),
 jl_symbol("getproperty"),
-jl_symbol("apply_type"),
 jl_symbol("getfield"),
+jl_symbol("apply_type"),
+jl_symbol("==="),
 jl_symbol("getindex"),
 jl_symbol("convert"),
-jl_symbol("==="),
-jl_symbol("iterate"),
 jl_symbol("=="),
 jl_symbol("new"),
-jl_symbol("foreigncall"),
 jl_symbol("int.jl"),
-jl_symbol("throw"),
-jl_symbol("nothing"),
-jl_symbol("essentials.jl"),
 jl_symbol("+"),
-jl_symbol("unsafe_convert"),
+jl_symbol("boot.jl"),
+jl_symbol("essentials.jl"),
+jl_symbol("ccall"),
+jl_symbol("foreigncall"),
+jl_symbol("iterate"),
 jl_symbol("not_int"),
+jl_symbol("Base.jl"),
 jl_symbol("-"),
-jl_symbol("boot.jl"),
-jl_symbol("number.jl"),
+jl_symbol("throw"),
+jl_symbol("promotion.jl"),
 jl_symbol("length"),
 jl_symbol("<"),
-jl_symbol("cconvert"),
-jl_symbol("Base.jl"),
-jl_symbol("promotion.jl"),
-jl_symbol("tuple.jl"),
-jl_symbol("static_parameter"),
-jl_symbol("isempty"),
-jl_symbol("<="),
-jl_symbol("array.jl"),
+jl_symbol("isa"),
 jl_symbol("operators.jl"),
-jl_symbol("NamedTuple"),
+jl_symbol("number.jl"),
+jl_symbol("unsafe_convert"),
+jl_symbol("tuple.jl"),
+jl_symbol("nothing"),
 jl_symbol("bitcast"),
-jl_symbol("!"),
+jl_symbol("NamedTuple"),
 jl_symbol("indexed_iterate"),
-jl_symbol("sle_int"),
 jl_symbol("bool.jl"),
-jl_symbol("Ptr"),
-jl_symbol("size"),
+jl_symbol("!"),
+jl_symbol("isempty"),
+jl_symbol("<="),
+jl_symbol("cconvert"),
 jl_symbol("add_int"),
+jl_symbol("static_parameter"),
+jl_symbol("array.jl"),
 jl_symbol("slt_int"),
-jl_symbol("*"),
-jl_symbol("range.jl"),
-jl_symbol("abstractarray.jl"),
 jl_symbol("!="),
-jl_symbol("isa"),
-jl_symbol("setindex!"),
-jl_symbol("string"),
-jl_symbol("ifelse"),
-jl_symbol(":"),
-jl_symbol(">"),
-jl_symbol("_apply_iterate"),
 jl_symbol("UInt64"),
+jl_symbol("range.jl"),
+jl_symbol("sle_int"),
+jl_symbol("size"),
 jl_symbol("&"),
-jl_symbol("max"),
+jl_symbol("abstractarray.jl"),
 jl_symbol("rem"),
-jl_symbol("sub_int"),
-jl_symbol(">="),
-jl_symbol("UInt8"),
-jl_symbol("iterators.jl"),
+jl_symbol(">"),
 jl_symbol("Int64"),
-jl_symbol("pairs"),
+jl_symbol("sub_int"),
+jl_symbol("*"),
 jl_symbol("and_int"),
+jl_symbol("string"),
+jl_symbol(">="),
+jl_symbol("Ptr"),
+jl_symbol("toInt64"),
 jl_symbol("last"),
-jl_symbol("typeof"),
-jl_symbol("arrayref"),
 jl_symbol("pointer.jl"),
-jl_symbol("toInt64"),
-jl_symbol("arraylen"),
+jl_symbol("reinterpret"),
+jl_symbol("first"),
+jl_symbol("pairs"),
+jl_symbol("_apply_iterate"),
 jl_symbol("typeassert"),
-jl_symbol("map"),
+jl_symbol(":"),
+jl_symbol("UInt8"),
+jl_symbol("setindex!"),
+jl_symbol("isdefined"),
+jl_symbol("typeof"),
+jl_symbol("promote"),
 jl_symbol("kwcall"),
-jl_symbol("ArgumentError"),
+jl_symbol("unsigned"),
+jl_symbol("_promote"),
+jl_symbol("toUInt64"),
+jl_symbol("map"),
 jl_symbol("lshr_int"),
+jl_symbol("gc_preserve_begin"),
+jl_symbol("gc_preserve_end"),
+jl_symbol("trunc_int"),
+jl_symbol("ArgumentError"),
 jl_symbol("axes"),
-jl_symbol("reinterpret"),
+jl_symbol("ult_int"),
+jl_symbol("UInt"),
+jl_symbol("zext_int"),
+jl_symbol("strings/string.jl"),
+jl_symbol("ifelse"),
 jl_symbol("Array"),
-jl_symbol("first"),
-jl_symbol("trunc_int"),
-jl_symbol("OneTo"),
-jl_symbol("haskey"),
-jl_symbol("Int"),
-jl_symbol("oneto"),
 jl_symbol("eq_int"),
 jl_symbol("throw_inexacterror"),
-jl_symbol("toUInt64"),
-jl_symbol("arraysize"),
-jl_symbol("UInt"),
-jl_symbol("setproperty!"),
-jl_symbol("check_top_bit"),
-jl_symbol("promote"),
-jl_symbol("unsigned"),
-jl_symbol("is_top_bit_set"),
-jl_symbol("structdiff"),
-jl_symbol("undef"),
-jl_symbol("sizeof"),
-jl_symbol("String"),
diff --git a/src/common_symbols2.inc b/src/common_symbols2.inc
index b5a334172dd76..e9c070ee8da6a 100644
--- a/src/common_symbols2.inc
+++ b/src/common_symbols2.inc
@@ -1,254 +1,248 @@
-jl_symbol("namedtuple.jl"),
-jl_symbol("pop"),
-jl_symbol("inbounds"),
-jl_symbol("strings/string.jl"),
-jl_symbol("Ref"),
-jl_symbol("Vector"),
-jl_symbol("kwerr"),
-jl_symbol("_promote"),
+jl_symbol("|"),
+jl_symbol("setproperty!"),
 jl_symbol("sext_int"),
-jl_symbol("pointer"),
-jl_symbol("similar"),
-jl_symbol("arrayset"),
+jl_symbol("String"),
+jl_symbol("Int"),
+jl_symbol("iterators.jl"),
+jl_symbol("Colon"),
+jl_symbol("unchecked_oneto"),
+jl_symbol("structdiff"),
+jl_symbol("UnitRange"),
+jl_symbol("unitrange_last"),
+jl_symbol("sizeof"),
+jl_symbol("check_sign_bit"),
+jl_symbol("is_top_bit_set"),
+jl_symbol("data"),
+jl_symbol("kwerr"),
 jl_symbol("axes1"),
 jl_symbol("eachindex"),
-jl_symbol("|"),
-jl_symbol("ult_int"),
-jl_symbol("lastindex"),
-jl_symbol("setfield!"),
-jl_symbol("UnitRange"),
-jl_symbol("push!"),
+jl_symbol("or_int"),
 jl_symbol("Bool"),
-jl_symbol("Colon"),
+jl_symbol("setfield!"),
 jl_symbol("fieldtype"),
-jl_symbol("unitrange_last"),
-jl_symbol("bitarray.jl"),
-jl_symbol("<<"),
-jl_symbol("zext_int"),
-jl_symbol("Tuple"),
+jl_symbol("Ref"),
+jl_symbol("pointer"),
+jl_symbol("max"),
+jl_symbol("push!"),
+jl_symbol("lastindex"),
 jl_symbol("reflection.jl"),
-jl_symbol("TypeError"),
-jl_symbol("print"),
-jl_symbol("eltype"),
+jl_symbol("<<"),
+jl_symbol("similar"),
+jl_symbol("Vector"),
+jl_symbol("UInt32"),
 jl_symbol(">>"),
-jl_symbol("strings/basic.jl"),
-jl_symbol("gc_preserve_begin"),
-jl_symbol("require_one_based_indexing"),
-jl_symbol("gc_preserve_end"),
-jl_symbol("DimensionMismatch"),
-jl_symbol("indices.jl"),
-jl_symbol("Cvoid"),
-jl_symbol("oftype"),
-jl_symbol("zero"),
-jl_symbol("float.jl"),
-jl_symbol("Any"),
-jl_symbol("checkbounds"),
-jl_symbol("or_int"),
-jl_symbol("isdefined"),
 jl_symbol("dict.jl"),
+jl_symbol("checkbounds"),
+jl_symbol("undef"),
+jl_symbol("jl_string_ptr"),
+jl_symbol("error"),
 jl_symbol("strings/io.jl"),
-jl_symbol("shl_int"),
-jl_symbol("copy"),
-jl_symbol("macro expansion"),
-jl_symbol("abstractdict.jl"),
+jl_symbol("strings/substring.jl"),
+jl_symbol("bitarray.jl"),
+jl_symbol("strings/basic.jl"),
+jl_symbol("merge"),
+jl_symbol("TypeError"),
+jl_symbol("keyword argument"),
 jl_symbol("in"),
-jl_symbol("io.jl"),
-jl_symbol("BlasInt"),
-jl_symbol("Float64"),
+jl_symbol("print"),
+jl_symbol("macro expansion"),
 jl_symbol("mul_int"),
-jl_symbol("UInt32"),
+jl_symbol("shl_int"),
 jl_symbol("C_NULL"),
+jl_symbol("oftype"),
+jl_symbol("_growend!"),
+jl_symbol("Any"),
+jl_symbol("Tuple"),
+jl_symbol("float.jl"),
+jl_symbol("ncodeunits"),
 jl_symbol("Integer"),
+jl_symbol("io.jl"),
+jl_symbol("eltype"),
+jl_symbol("name"),
+jl_symbol("parent"),
 jl_symbol("!=="),
-jl_symbol("merge"),
-jl_symbol("BoundsError"),
-jl_symbol("broadcasted"),
-jl_symbol("Cint"),
-jl_symbol("min"),
-jl_symbol("libblastrampoline"),
 jl_symbol("iszero"),
+jl_symbol("min"),
+jl_symbol("DimensionMismatch"),
 jl_symbol("refvalue.jl"),
-jl_symbol("stride"),
-jl_symbol("error"),
-jl_symbol("ncodeunits"),
-jl_symbol("LinearIndices"),
-jl_symbol("Clong"),
-jl_symbol("pair.jl"),
-jl_symbol("_growend!"),
-jl_symbol("char.jl"),
-jl_symbol("copyto!"),
-jl_symbol("get"),
-jl_symbol("tail"),
-jl_symbol("real"),
 jl_symbol("Union"),
-jl_symbol("multidimensional.jl"),
-jl_symbol("enter"),
-jl_symbol("leave"),
+jl_symbol("BlasInt"),
+jl_symbol("unsafe_load"),
+jl_symbol("indices.jl"),
+jl_symbol("x"),
+jl_symbol("require_one_based_indexing"),
+jl_symbol("namedtuple.jl"),
+jl_symbol("tail"),
+jl_symbol("Float64"),
+jl_symbol("head"),
+jl_symbol("Cvoid"),
+jl_symbol("copy"),
+jl_symbol("libblastrampoline"),
+jl_symbol("get"),
+jl_symbol("neg_int"),
+jl_symbol("stop"),
+jl_symbol("zero"),
 jl_symbol("add_ptr"),
-jl_symbol("chkstride1"),
+jl_symbol("toUInt32"),
+jl_symbol("ptr"),
+jl_symbol("char.jl"),
+jl_symbol("trunc"),
+jl_symbol("not_atomic"),
+jl_symbol("enter"),
+jl_symbol("Pair"),
+jl_symbol("jl_value_ptr"),
 jl_symbol("Expr"),
-jl_symbol("write"),
-jl_symbol("broadcast.jl"),
+jl_symbol("broadcasted"),
+jl_symbol("pointerref"),
+jl_symbol("multidimensional.jl"),
+jl_symbol("Generator"),
+jl_symbol("leave"),
+jl_symbol("memoryrefnew"),
 jl_symbol("show.jl"),
+jl_symbol("pointer_from_objref"),
+jl_symbol("memoryrefget"),
+jl_symbol("reduce.jl"),
+jl_symbol("stride"),
+jl_symbol("pair.jl"),
+jl_symbol("_string"),
+jl_symbol("cmem.jl"),
+jl_symbol("generator.jl"),
+jl_symbol("broadcast.jl"),
 jl_symbol("none"),
-jl_symbol("Generator"),
+jl_symbol("copyto!"),
+jl_symbol("chkstride1"),
+jl_symbol("value"),
+jl_symbol("write"),
+jl_symbol("identity"),
+jl_symbol("real"),
+jl_symbol("start"),
+jl_symbol("Cint"),
+jl_symbol("fill!"),
+jl_symbol("checkindex"),
+jl_symbol("keys"),
+jl_symbol("BoundsError"),
+jl_symbol("vals"),
+jl_symbol("Symbol"),
+jl_symbol("strings/util.jl"),
 jl_symbol("Int32"),
-jl_symbol("materialize"),
+jl_symbol("ht_keyindex"),
+jl_symbol("io"),
+jl_symbol("~"),
+jl_symbol("AssertionError"),
+jl_symbol("abstractdict.jl"),
 jl_symbol("show"),
-jl_symbol("lock"),
-jl_symbol("unsafe_load"),
-jl_symbol("gmp.jl"),
 jl_symbol("mpfr.jl"),
-jl_symbol("Symbol"),
-jl_symbol("Pair"),
-jl_symbol("resize!"),
-jl_symbol("neg_int"),
-jl_symbol("strings/substring.jl"),
-jl_symbol("AssertionError"),
-jl_symbol("identity"),
-jl_symbol("one"),
-jl_symbol("reduce.jl"),
-jl_symbol("libcholmod"),
 jl_symbol("isless"),
+jl_symbol("args"),
+jl_symbol("lock"),
 jl_symbol("reducedim.jl"),
+jl_symbol("gmp.jl"),
+jl_symbol("offset"),
+jl_symbol("resize!"),
+jl_symbol("throw_boundserror"),
+jl_symbol("Clong"),
+jl_symbol("_call_latest"),
+jl_symbol("argtail"),
+jl_symbol("compiler/ssair/ir.jl"),
+jl_symbol("sub_ptr"),
+jl_symbol("materialize"),
 jl_symbol("checksquare"),
-jl_symbol("sort.jl"),
-jl_symbol("generator.jl"),
-jl_symbol("pointer_from_objref"),
-jl_symbol("Float32"),
-jl_symbol("chklapackerror"),
-jl_symbol("parent"),
-jl_symbol("task.jl"),
+jl_symbol("LinearIndices"),
+jl_symbol("ule_int"),
+jl_symbol("dict"),
 jl_symbol("div"),
-jl_symbol("cholmod_common"),
-jl_symbol("ht_keyindex"),
-jl_symbol("pop_exception"),
-jl_symbol("c.jl"),
-jl_symbol("firstindex"),
-jl_symbol("some.jl"),
-jl_symbol("iobuffer.jl"),
-jl_symbol("sub_ptr"),
-jl_symbol("vect"),
-jl_symbol("unsafe_string"),
-jl_symbol("llvmcall"),
-jl_symbol("checkindex"),
-jl_symbol("_call_latest"),
+jl_symbol("chklapackerror"),
+jl_symbol("count"),
+jl_symbol("Float32"),
+jl_symbol("genericmemory.jl"),
+jl_symbol("print_to_string"),
 jl_symbol("rethrow"),
-jl_symbol("pointerref"),
+jl_symbol("sort.jl"),
+jl_symbol("boundscheck"),
 jl_symbol("println"),
-jl_symbol("keys"),
-jl_symbol("RefValue"),
+jl_symbol("loading.jl"),
+jl_symbol("collect"),
+jl_symbol("ashr_int"),
 jl_symbol("_expr"),
-jl_symbol("toUInt32"),
-jl_symbol("ismissing"),
-jl_symbol("throw_boundserror"),
-jl_symbol("IteratorSize"),
-jl_symbol("iddict.jl"),
+jl_symbol("iobuffer.jl"),
+jl_symbol("DataType"),
+jl_symbol("Dict"),
+jl_symbol("unsafe_string"),
+jl_symbol("RefValue"),
+jl_symbol("step"),
 jl_symbol("to_shape"),
-jl_symbol("Csize_t"),
-jl_symbol("~"),
-jl_symbol("argtail"),
-jl_symbol("include"),
-jl_symbol("set.jl"),
-jl_symbol("isequal"),
+jl_symbol("pop_exception"),
+jl_symbol("Memory"),
+jl_symbol("KeyError"),
+jl_symbol("chunks"),
 jl_symbol("refpointer.jl"),
-jl_symbol("=>"),
-jl_symbol("Val"),
-jl_symbol("Base"),
+jl_symbol("llvmcall"),
+jl_symbol("c.jl"),
+jl_symbol("set.jl"),
+jl_symbol("abs"),
+jl_symbol("checked_trunc_uint"),
+jl_symbol("Type"),
 jl_symbol("%"),
-jl_symbol("collect"),
-jl_symbol("Type##kw"),
-jl_symbol("typemax"),
-jl_symbol("fill!"),
-jl_symbol("ule_int"),
-jl_symbol("atomics.jl"),
-jl_symbol("libgit2"),
+jl_symbol("len"),
 jl_symbol("BigFloat"),
-jl_symbol("ashr_int"),
-jl_symbol("boundscheck"),
-jl_symbol("abs"),
-jl_symbol("^"),
-jl_symbol("ensure_initialized"),
-jl_symbol("_array_for"),
-jl_symbol("strings/util.jl"),
-jl_symbol("Dict"),
+jl_symbol("isequal"),
+jl_symbol("vect"),
+jl_symbol("sprint"),
+jl_symbol("mode"),
+jl_symbol("expr.jl"),
 jl_symbol("Nothing"),
-jl_symbol("compiler/ssair/ir.jl"),
+jl_symbol("Val"),
+jl_symbol("IteratorSize"),
+jl_symbol("=>"),
+jl_symbol("haskey"),
+jl_symbol("iddict.jl"),
 jl_symbol("unsafe_write"),
-jl_symbol("util.jl"),
+jl_symbol("val"),
+jl_symbol("flags"),
+jl_symbol("task.jl"),
+jl_symbol("UnionAll"),
+jl_symbol("memset"),
+jl_symbol("xor"),
+jl_symbol("jl_alloc_genericmemory"),
+jl_symbol("uplo"),
 jl_symbol("toInt32"),
-jl_symbol("loading.jl"),
-jl_symbol("value"),
-jl_symbol("expr.jl"),
-jl_symbol("print_to_string"),
+jl_symbol("Base"),
+jl_symbol("atomics.jl"),
+jl_symbol("uuid"),
+jl_symbol("one"),
+jl_symbol("math.jl"),
+jl_symbol("position"),
+jl_symbol("typemax"),
+jl_symbol("all"),
+jl_symbol("error.jl"),
+jl_symbol("path.jl"),
+jl_symbol("^"),
+jl_symbol("nextind"),
+jl_symbol("include"),
 jl_symbol("the_exception"),
-jl_symbol("nonzeros"),
-jl_symbol("<:"),
-jl_symbol("KeyError"),
-jl_symbol("xor"),
-jl_symbol("logging.jl"),
+jl_symbol("ensure_initialized"),
+jl_symbol("Const"),
+jl_symbol("UInt128"),
+jl_symbol("codeunit"),
 jl_symbol("stat.jl"),
-jl_symbol("close"),
-jl_symbol("adjoint"),
-jl_symbol("meta"),
-jl_symbol("path.jl"),
-jl_symbol("round"),
-jl_symbol("Cstring"),
-jl_symbol("SizeUnknown"),
-jl_symbol("esc"),
-jl_symbol("missing.jl"),
+jl_symbol("gcutils.jl"),
+jl_symbol("UndefRefError"),
+jl_symbol("diag"),
 jl_symbol("throw_undef_if_not"),
-jl_symbol("error.jl"),
-jl_symbol("Type"),
-jl_symbol("mul!"),
-jl_symbol("math.jl"),
-jl_symbol("unsafe_trunc"),
 jl_symbol("missing"),
-jl_symbol("subarray.jl"),
-jl_symbol("noinline"),
 jl_symbol("isnan"),
-jl_symbol("ldiv!"),
-jl_symbol("DataType"),
-jl_symbol("codeunit"),
-jl_symbol("condition.jl"),
-jl_symbol("step"),
-jl_symbol("copyast"),
-jl_symbol("bitset.jl"),
-jl_symbol("float"),
-jl_symbol("fastmath.jl"),
-jl_symbol("_mod64"),
-jl_symbol("_div64"),
-jl_symbol("all"),
-jl_symbol("parse"),
-jl_symbol("joinpath"),
-jl_symbol("nextind"),
-jl_symbol("regex.jl"),
 jl_symbol("Enums.jl"),
-jl_symbol("promote_type"),
-jl_symbol("Cdouble"),
-jl_symbol("ComplexF32"),
-jl_symbol("read"),
-jl_symbol("intfuncs.jl"),
-jl_symbol("Complex"),
+jl_symbol("logging.jl"),
 jl_symbol("_deleteend!"),
-jl_symbol("stat"),
-jl_symbol("UnionAll"),
-jl_symbol("special/trig.jl"),
-jl_symbol("UInt128"),
-jl_symbol("_copyto_impl!"),
-jl_symbol("stream.jl"),
-jl_symbol("lmul!"),
-jl_symbol("repr"),
-jl_symbol("promote_rule"),
-jl_symbol("xor_int"),
-jl_symbol("complex.jl"),
-jl_symbol("transpose"),
-jl_symbol(">>>"),
-jl_symbol("cholmod_sparse"),
-jl_symbol("filemode"),
-jl_symbol("ComplexF64"),
-jl_symbol("SparseMatrixCSC"),
-jl_symbol("view"),
-jl_symbol("GitError"),
-jl_symbol("zeros"),
-jl_symbol("InexactError"),
+jl_symbol("indices"),
+jl_symbol("compiler/utilities.jl"),
+jl_symbol("Pairs"),
+jl_symbol("<:"),
+jl_symbol("compiler/tfuncs.jl"),
+jl_symbol("close"),
+jl_symbol("subarray.jl"),
+jl_symbol("fastmath.jl"),
+jl_symbol("invokelatest"),
+jl_symbol("jl_array_del_end"),
+jl_symbol("_mod64"),
+jl_symbol("parameters"),
diff --git a/src/coverage.cpp b/src/coverage.cpp
index 95924f326524b..ca711e0f9678a 100644
--- a/src/coverage.cpp
+++ b/src/coverage.cpp
@@ -1,9 +1,8 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include <cstdint>
+#include <pthread.h>
 #include <string>
-#include <fstream>
-#include <map>
-#include <vector>
 
 #include "llvm-version.h"
 #include <llvm/ADT/StringRef.h>
@@ -15,7 +14,7 @@
 
 using namespace llvm;
 
-static int codegen_imaging_mode(void)
+static int codegen_imaging_mode(void) JL_NOTSAFEPOINT
 {
     return jl_options.image_codegen || (jl_generating_output() && jl_options.use_pkgimages);
 }
@@ -24,9 +23,11 @@ static int codegen_imaging_mode(void)
 
 const int logdata_blocksize = 32; // target getting nearby lines in the same general cache area and reducing calls to malloc by chunking
 typedef uint64_t logdata_block[logdata_blocksize];
-typedef StringMap< std::vector<logdata_block*> > logdata_t;
+typedef StringMap< SmallVector<logdata_block*, 0> > logdata_t;
 
-static uint64_t *allocLine(std::vector<logdata_block*> &vec, int line)
+pthread_mutex_t coverage_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static uint64_t *allocLine(SmallVector<logdata_block*, 0> &vec, int line) JL_NOTSAFEPOINT
 {
     unsigned block = line / logdata_blocksize;
     line = line % logdata_blocksize;
@@ -45,51 +46,60 @@ static uint64_t *allocLine(std::vector<logdata_block*> &vec, int line)
 
 static logdata_t coverageData;
 
-JL_DLLEXPORT void jl_coverage_alloc_line(StringRef filename, int line)
+JL_DLLEXPORT void jl_coverage_alloc_line(StringRef filename, int line) JL_NOTSAFEPOINT
 {
     assert(!codegen_imaging_mode());
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
+    pthread_mutex_lock(&coverage_lock);
     allocLine(coverageData[filename], line);
+    pthread_mutex_unlock(&coverage_lock);
 }
 
-JL_DLLEXPORT uint64_t *jl_coverage_data_pointer(StringRef filename, int line)
+JL_DLLEXPORT uint64_t *jl_coverage_data_pointer(StringRef filename, int line) JL_NOTSAFEPOINT
 {
-    return allocLine(coverageData[filename], line);
+    pthread_mutex_lock(&coverage_lock);
+    uint64_t* ret = allocLine(coverageData[filename], line);
+    pthread_mutex_unlock(&coverage_lock);
+    return ret;
 }
 
-extern "C" JL_DLLEXPORT void jl_coverage_visit_line(const char *filename_, size_t len_filename, int line)
+extern "C" JL_DLLEXPORT void jl_coverage_visit_line(const char *filename_, size_t len_filename, int line) JL_NOTSAFEPOINT
 {
     StringRef filename = StringRef(filename_, len_filename);
     if (codegen_imaging_mode() || filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
-    std::vector<logdata_block*> &vec = coverageData[filename];
+    pthread_mutex_lock(&coverage_lock);
+    SmallVector<logdata_block*, 0> &vec = coverageData[filename];
     uint64_t *ptr = allocLine(vec, line);
     (*ptr)++;
+    pthread_mutex_unlock(&coverage_lock);
 }
 
 // Memory allocation log (malloc_log)
 
 static logdata_t mallocData;
 
-JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line)
+JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line) JL_NOTSAFEPOINT
 {
-    return allocLine(mallocData[filename], line);
+    pthread_mutex_lock(&coverage_lock);
+    uint64_t* ret = allocLine(mallocData[filename], line);
+    pthread_mutex_unlock(&coverage_lock);
+    return ret;
 }
 
-// Resets the malloc counts.
-extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
+static void clear_log_data(logdata_t &logData, int resetValue) JL_NOTSAFEPOINT
 {
-    logdata_t::iterator it = mallocData.begin();
-    for (; it != mallocData.end(); it++) {
-        std::vector<logdata_block*> &bytes = (*it).second;
-        std::vector<logdata_block*>::iterator itb;
+    logdata_t::iterator it = logData.begin();
+    for (; it != logData.end(); it++) {
+        SmallVector<logdata_block*, 0> &bytes = (*it).second;
+        SmallVector<logdata_block*, 0>::iterator itb;
         for (itb = bytes.begin(); itb != bytes.end(); itb++) {
             if (*itb) {
                 logdata_block &data = **itb;
                 for (int i = 0; i < logdata_blocksize; i++) {
                     if (data[i] > 0)
-                        data[i] = 1;
+                        data[i] = resetValue;
                 }
             }
         }
@@ -97,37 +107,47 @@ extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
     jl_gc_sync_total_bytes(0);
 }
 
-static void write_log_data(logdata_t &logData, const char *extension)
+// Resets the malloc counts.
+extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void) JL_NOTSAFEPOINT
+{
+    pthread_mutex_lock(&coverage_lock);
+    clear_log_data(mallocData, 1);
+    pthread_mutex_unlock(&coverage_lock);
+}
+
+// Resets the code coverage
+extern "C" JL_DLLEXPORT void jl_clear_coverage_data(void) JL_NOTSAFEPOINT
+{
+    pthread_mutex_lock(&coverage_lock);
+    clear_log_data(coverageData, 0);
+    pthread_mutex_unlock(&coverage_lock);
+}
+
+static void write_log_data(logdata_t &logData, const char *extension) JL_NOTSAFEPOINT
 {
     std::string base = std::string(jl_options.julia_bindir);
     base = base + "/../share/julia/base/";
     logdata_t::iterator it = logData.begin();
     for (; it != logData.end(); it++) {
         std::string filename(it->first());
-        std::vector<logdata_block*> &values = it->second;
+        SmallVector<logdata_block*, 0> &values = it->second;
         if (!values.empty()) {
             if (!jl_isabspath(filename.c_str()))
                 filename = base + filename;
-            std::ifstream inf(filename.c_str());
-            if (!inf.is_open())
+            FILE *inf = fopen(filename.c_str(), "r");
+            if (!inf)
                 continue;
             std::string outfile = filename + extension;
-            std::ofstream outf(outfile.c_str(), std::ofstream::trunc | std::ofstream::out | std::ofstream::binary);
-            if (outf.is_open()) {
-                inf.exceptions(std::ifstream::badbit);
-                outf.exceptions(std::ifstream::failbit | std::ifstream::badbit);
+            FILE *outf = fopen(outfile.c_str(), "wb");
+            if (outf) {
                 char line[1024];
                 int l = 1;
                 unsigned block = 0;
-                while (!inf.eof()) {
-                    inf.getline(line, sizeof(line));
-                    if (inf.fail()) {
-                        if (inf.eof())
-                            break; // no content on trailing line
-                        // Read through lines longer than sizeof(line)
-                        inf.clear();
-                        inf.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
-                    }
+                int ret = 0;
+                while (ret != EOF && (ret = fscanf(inf, "%1023[^\n]", line)) != EOF) {
+                    // Skip n non-newline chars and a single trailing newline
+                    if ((ret = fscanf(inf, "%*[^\n]")) != EOF)
+                        ret = fscanf(inf, "%*1[\n]");
                     logdata_block *data = NULL;
                     if (block < values.size()) {
                         data = values[block];
@@ -137,32 +157,32 @@ static void write_log_data(logdata_t &logData, const char *extension)
                         l = 0;
                         block++;
                     }
-                    outf.width(9);
                     if (value == 0)
-                        outf << '-';
+                        fprintf(outf, "        -");
                     else
-                        outf << (value - 1);
-                    outf.width(0);
-                    outf << " " << line << '\n';
+                        fprintf(outf, "%9" PRIu64, value - 1);
+                    fprintf(outf, " %s\n", line);
+                    line[0] = 0;
                 }
-                outf.close();
+                fclose(outf);
             }
-            inf.close();
+            fclose(inf);
         }
     }
 }
 
-static void write_lcov_data(logdata_t &logData, const std::string &outfile)
+static void write_lcov_data(logdata_t &logData, const std::string &outfile) JL_NOTSAFEPOINT
 {
-    std::ofstream outf(outfile.c_str(), std::ofstream::ate | std::ofstream::out | std::ofstream::binary);
+    FILE *outf = fopen(outfile.c_str(), "ab");
+    if (!outf) return;
     //std::string base = std::string(jl_options.julia_bindir);
     //base = base + "/../share/julia/base/";
     logdata_t::iterator it = logData.begin();
     for (; it != logData.end(); it++) {
         StringRef filename = it->first();
-        const std::vector<logdata_block*> &values = it->second;
+        const SmallVector<logdata_block*, 0> &values = it->second;
         if (!values.empty()) {
-            outf << "SF:" << filename.str() << '\n';
+            fprintf(outf, "SF:%.*s\n", (int)filename.size(), filename.data());
             size_t n_covered = 0;
             size_t n_instrumented = 0;
             size_t lno = 0;
@@ -175,7 +195,7 @@ static void write_lcov_data(logdata_t &logData, const std::string &outfile)
                             n_instrumented++;
                             if (cov > 1)
                                 n_covered++;
-                            outf << "DA:" << lno << ',' << (cov - 1) << '\n';
+                            fprintf(outf, "DA:%zu,%" PRIu64 "\n", lno, cov - 1);
                         }
                         lno++;
                     }
@@ -184,19 +204,20 @@ static void write_lcov_data(logdata_t &logData, const std::string &outfile)
                     lno += logdata_blocksize;
                 }
             }
-            outf << "LH:" << n_covered << '\n';
-            outf << "LF:" << n_instrumented << '\n';
-            outf << "end_of_record\n";
+            fprintf(outf, "LH:%zu\n", n_covered);
+            fprintf(outf, "LF:%zu\n", n_instrumented);
+            fprintf(outf, "end_of_record\n");
         }
     }
-    outf.close();
+    fclose(outf);
 }
 
-extern "C" void jl_write_coverage_data(const char *output)
+extern "C" JL_DLLEXPORT void jl_write_coverage_data(const char *output) JL_NOTSAFEPOINT
 {
+    pthread_mutex_lock(&coverage_lock);
     if (output) {
         StringRef output_pattern(output);
-        if (output_pattern.endswith(".info"))
+        if (output_pattern.ends_with(".info"))
             write_lcov_data(coverageData, jl_format_filename(output_pattern.str().c_str()));
     }
     else {
@@ -204,11 +225,14 @@ extern "C" void jl_write_coverage_data(const char *output)
         raw_string_ostream(stm) << "." << uv_os_getpid() << ".cov";
         write_log_data(coverageData, stm.c_str());
     }
+    pthread_mutex_unlock(&coverage_lock);
 }
 
-extern "C" void jl_write_malloc_log(void)
+extern "C" void jl_write_malloc_log(void) JL_NOTSAFEPOINT
 {
+    pthread_mutex_lock(&coverage_lock);
     std::string stm;
     raw_string_ostream(stm) << "." << uv_os_getpid() << ".mem";
     write_log_data(mallocData, stm.c_str());
+    pthread_mutex_unlock(&coverage_lock);
 }
diff --git a/src/crc32c.c b/src/crc32c.c
index 4ca8db06459a1..50d2acc603359 100644
--- a/src/crc32c.c
+++ b/src/crc32c.c
@@ -1,15 +1,16 @@
 /* crc32c.c -- compute CRC-32C using software table or available hardware instructions
- * Copyright (C) 2013 Mark Adler
- * Version 1.1  1 Aug 2013  Mark Adler
+ * Copyright (C) 2013, 2021 Mark Adler
+ * Version 1.1  1 Aug 2013  Mark Adler, updates from Version 1.2 5 June 2021
  *
  * Code retrieved in August 2016 from August 2013 post by Mark Adler on
- *    http://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software
+ *    https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software
  * Modified for use in libjulia:
  *    - exported function renamed to jl_crc32c, DLL exports added.
  *    - removed main() function
  *    - architecture and compiler detection
  *    - precompute crc32c tables and store in a generated .c file
  *    - ARMv8 support
+ * Updated to incorporate upstream 2021 patch by Mark Adler to register constraints.
  */
 
 /*
@@ -39,6 +40,8 @@
 /* Version history:
    1.0  10 Feb 2013  First version
    1.1   1 Aug 2013  Correct comments on why three crc instructions in parallel
+   1.2   5 Jun 2021  Correct register constraints on assembly instructions
+                     (+ other changes that were superfluous for us)
 */
 
 #include "julia.h"
@@ -53,14 +56,9 @@
 #define POLY 0x82f63b78
 
 /* Block sizes for three-way parallel crc computation.  LONG and SHORT must
-   both be powers of two.  The associated string constants must be set
-   accordingly, for use in constructing the assembler instructions. */
+   both be powers of two. */
 #define LONG 8192
-#define LONGx1 "8192"
-#define LONGx2 "16384"
 #define SHORT 256
-#define SHORTx1 "256"
-#define SHORTx2 "512"
 
 #ifndef GEN_CRC32C_TABLES
 #include "crc32c-tables.c"
@@ -97,27 +95,27 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
     /* compute the crc for up to seven leading bytes to bring the data pointer
        to an eight-byte boundary */
     while (len && ((uintptr_t)buf & 7) != 0) {
-        __asm__("crc32b\t" "(%1), %0"
-                : "=r"(crc0)
-                : "r"(buf), "0"(crc0));
+        __asm__("crc32b\t" "%1, %0"
+                : "+r"(crc0)
+                : "m"(*buf));
         buf++;
         len--;
     }
 
-    /* compute the crc on sets of LONG*3 bytes, executing three independent crc
-       instructions, each on LONG bytes -- this is optimized for the Nehalem,
-       Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a
-       throughput of one crc per cycle, but a latency of three cycles */
+    /* compute the crc on sets of LONG*3 bytes,
+       making use of three ALUs in parallel on a single core. */
     while (len >= LONG * 3) {
         uintptr_t crc1 = 0;
         uintptr_t crc2 = 0;
         const char *end = buf + LONG;
         do {
-            __asm__(CRC32_PTR "\t" "(%3), %0\n\t"
-                    CRC32_PTR "\t" LONGx1 "(%3), %1\n\t"
-                    CRC32_PTR "\t" LONGx2 "(%3), %2"
-                    : "=r"(crc0), "=r"(crc1), "=r"(crc2)
-                    : "r"(buf), "0"(crc0), "1"(crc1), "2"(crc2));
+            __asm__(CRC32_PTR "\t%3, %0\n\t"
+                    CRC32_PTR "\t%4, %1\n\t"
+                    CRC32_PTR "\t%5, %2"
+                    : "+r"(crc0), "+r"(crc1), "+r"(crc2)
+                    : "m"(* (const uintptr_t *) &buf[0]),
+                      "m"(* (const uintptr_t *) &buf[LONG]),
+                      "m"(* (const uintptr_t *) &buf[LONG*2]));
             buf += sizeof(void*);
         } while (buf < end);
         crc0 = crc32c_shift(crc32c_long, crc0) ^ crc1;
@@ -133,11 +131,13 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
         uintptr_t crc2 = 0;
         const char *end = buf + SHORT;
         do {
-            __asm__(CRC32_PTR "\t" "(%3), %0\n\t"
-                    CRC32_PTR "\t" SHORTx1 "(%3), %1\n\t"
-                    CRC32_PTR "\t" SHORTx2 "(%3), %2"
-                    : "=r"(crc0), "=r"(crc1), "=r"(crc2)
-                    : "r"(buf), "0"(crc0), "1"(crc1), "2"(crc2));
+            __asm__(CRC32_PTR "\t%3, %0\n\t"
+                    CRC32_PTR "\t%4, %1\n\t"
+                    CRC32_PTR "\t%5, %2"
+                    : "+r"(crc0), "+r"(crc1), "+r"(crc2)
+                    : "m"(* (const uintptr_t *) &buf[0]),
+                      "m"(* (const uintptr_t *) &buf[SHORT]),
+                      "m"(* (const uintptr_t *) &buf[SHORT*2]));
             buf += sizeof(void*);
         } while (buf < end);
         crc0 = crc32c_shift(crc32c_short, crc0) ^ crc1;
@@ -150,18 +150,18 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
        block */
     const char *end = buf + (len - (len & 7));
     while (buf < end) {
-        __asm__(CRC32_PTR "\t" "(%1), %0"
-                : "=r"(crc0)
-                : "r"(buf), "0"(crc0));
+        __asm__(CRC32_PTR "\t" "%1, %0"
+                : "+r"(crc0)
+                : "m"(* (const uintptr_t *) buf));
         buf += sizeof(void*);
     }
     len &= 7;
 
     /* compute the crc for up to seven trailing bytes */
     while (len) {
-        __asm__("crc32b\t" "(%1), %0"
-                : "=r"(crc0)
-                : "r"(buf), "0"(crc0));
+        __asm__("crc32b\t" "%1, %0"
+                : "+r"(crc0)
+                : "m"(*buf));
         buf++;
         len--;
     }
@@ -178,6 +178,9 @@ JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len)
     return crc32c_sse42(crc, buf, len);
 }
 #  else
+#if defined(JL_CRC32C_USE_IFUNC) && defined(_COMPILER_CLANG_)
+JL_UNUSED
+#endif
 static crc32c_func_t crc32c_dispatch(void)
 {
     // When used in ifunc, we cannot call external functions (i.e. jl_cpuid)
diff --git a/src/datatype.c b/src/datatype.c
index 905959fb80e0a..ee947d2512064 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
+#include <stdalign.h>
 #include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
@@ -19,43 +20,50 @@ extern "C" {
 
 // allocating TypeNames -----------------------------------------------------------
 
-static int is10digit(char c) JL_NOTSAFEPOINT
-{
-    return (c >= '0' && c <= '9');
-}
-
 static jl_sym_t *jl_demangle_typename(jl_sym_t *s) JL_NOTSAFEPOINT
 {
     char *n = jl_symbol_name(s);
     if (n[0] != '#')
         return s;
-    char *end = strrchr(n, '#');
+    char *end = strchr(&n[1], '#');
+    // handle `#f...##...#...`
+    if (end != NULL && end[1] == '#')
+        end = strchr(&end[2], '#');
     int32_t len;
-    if (end == n || end == n+1)
+    if (end == NULL || end == n+1)
         len = strlen(n) - 1;
     else
         len = (end-n) - 1;  // extract `f` from `#f#...`
-    if (is10digit(n[1]))
+    if (isdigit(n[1]) || is_canonicalized_anonfn_typename(n))
         return _jl_symbol(n, len+1);
     return _jl_symbol(&n[1], len);
 }
 
+JL_DLLEXPORT jl_methcache_t *jl_new_method_cache(void)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_methcache_t *mc =
+        (jl_methcache_t*)jl_gc_alloc(ct->ptls, sizeof(jl_methcache_t),
+                                     jl_methcache_type);
+    jl_atomic_store_relaxed(&mc->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&mc->cache, jl_nothing);
+    JL_MUTEX_INIT(&mc->writelock, "methodtable->writelock");
+    return mc;
+}
+
 JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *module)
 {
+    jl_methcache_t *mc = jl_new_method_cache();
+    JL_GC_PUSH1(&mc);
     jl_task_t *ct = jl_current_task;
     jl_methtable_t *mt =
-        (jl_methtable_t*)jl_gc_alloc(ct->ptls, sizeof(jl_methtable_t),
-                                     jl_methtable_type);
-    mt->name = jl_demangle_typename(name);
-    mt->module = module;
+        (jl_methtable_t*)jl_gc_alloc(ct->ptls, sizeof(jl_methtable_t), jl_methtable_type);
     jl_atomic_store_relaxed(&mt->defs, jl_nothing);
-    jl_atomic_store_relaxed(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&mt->cache, jl_nothing);
-    jl_atomic_store_relaxed(&mt->max_args, 0);
-    mt->backedges = NULL;
-    JL_MUTEX_INIT(&mt->writelock, "methodtable->writelock");
-    mt->offs = 0;
-    mt->frozen = 0;
+    mt->cache = mc;
+    mt->name = name;
+    mt->module = module;
+    mt->backedges = (jl_genericmemory_t*)jl_an_empty_memory_any;
+    JL_GC_POP();
     return mt;
 }
 
@@ -68,20 +76,23 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
     tn->name = name;
     tn->module = module;
     tn->wrapper = NULL;
+    tn->singletonname = jl_demangle_typename(name);
     jl_atomic_store_relaxed(&tn->Typeofwrapper, NULL);
     jl_atomic_store_relaxed(&tn->cache, jl_emptysvec);
     jl_atomic_store_relaxed(&tn->linearcache, jl_emptysvec);
     tn->names = NULL;
     tn->hash = bitmix(bitmix(module ? module->build_id.lo : 0, name->hash), 0xa1ada1da);
-    tn->_reserved = 0;
+    tn->_unused = 0;
     tn->abstract = abstract;
     tn->mutabl = mutabl;
     tn->mayinlinealloc = 0;
-    tn->mt = NULL;
     tn->partial = NULL;
     tn->atomicfields = NULL;
     tn->constfields = NULL;
     tn->max_methods = 0;
+    jl_atomic_store_relaxed(&tn->max_args, 0);
+    jl_atomic_store_relaxed(&tn->cache_entry_count, 0);
+    tn->constprop_heustic = 0;
     return tn;
 }
 
@@ -134,10 +145,10 @@ static uint32_t _hash_layout_djb2(uintptr_t _layout, void *unused) JL_NOTSAFEPOI
     size_t own_size = sizeof(jl_datatype_layout_t);
     const char *fields = jl_dt_layout_fields(layout);
     assert(fields);
-    size_t fields_size = layout->nfields * jl_fielddesc_size(layout->fielddesc_type);
+    size_t fields_size = layout->nfields * jl_fielddesc_size(layout->flags.fielddesc_type);
     const char *pointers = jl_dt_layout_ptrs(layout);
     assert(pointers);
-    size_t pointers_size = (layout->npointers << layout->fielddesc_type);
+    size_t pointers_size = layout->first_ptr < 0 ? 0 : (layout->npointers << layout->flags.fielddesc_type);
 
     uint_t hash = 5381;
     hash = _hash_djb2(hash, (char *)layout, own_size);
@@ -155,12 +166,12 @@ static int layout_eq(void *_l1, void *_l2, void *unused) JL_NOTSAFEPOINT
         return 0;
     const char *f1 = jl_dt_layout_fields(l1);
     const char *f2 = jl_dt_layout_fields(l2);
-    size_t fields_size = l1->nfields * jl_fielddesc_size(l1->fielddesc_type);
+    size_t fields_size = l1->nfields * jl_fielddesc_size(l1->flags.fielddesc_type);
     if (memcmp(f1, f2, fields_size))
         return 0;
     const char *p1 = jl_dt_layout_ptrs(l1);
     const char *p2 = jl_dt_layout_ptrs(l2);
-    size_t pointers_size = (l1->npointers << l1->fielddesc_type);
+    size_t pointers_size = l1->first_ptr < 0 ? 0 : (l1->npointers << l1->flags.fielddesc_type);
     if (memcmp(p1, p2, pointers_size))
         return 0;
     return 1;
@@ -179,6 +190,8 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
                                            uint32_t npointers,
                                            uint32_t alignment,
                                            int haspadding,
+                                           int isbitsegal,
+                                           int arrayelem,
                                            jl_fielddesc32_t desc[],
                                            uint32_t pointers[]) JL_NOTSAFEPOINT
 {
@@ -186,32 +199,34 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
 
     // compute the smallest fielddesc type that can hold the layout description
     int fielddesc_type = 0;
+    uint32_t max_size = 0;
+    uint32_t max_offset = 0;
     if (nfields > 0) {
-        uint32_t max_size = 0;
-        uint32_t max_offset = desc[nfields - 1].offset;
-        if (npointers > 0 && pointers[npointers - 1] > max_offset)
-            max_offset = pointers[npointers - 1];
+        max_offset = desc[nfields - 1].offset;
         for (size_t i = 0; i < nfields; i++) {
             if (desc[i].size > max_size)
                 max_size = desc[i].size;
         }
-        jl_fielddesc8_t maxdesc8 = { 0, max_size, max_offset };
-        jl_fielddesc16_t maxdesc16 = { 0, max_size, max_offset };
-        jl_fielddesc32_t maxdesc32 = { 0, max_size, max_offset };
-        if (maxdesc8.size != max_size || maxdesc8.offset != max_offset) {
-            fielddesc_type = 1;
-            if (maxdesc16.size != max_size || maxdesc16.offset != max_offset) {
-                fielddesc_type = 2;
-                if (maxdesc32.size != max_size || maxdesc32.offset != max_offset) {
-                    assert(0); // should have been verified by caller
-                }
+    }
+    if (npointers > 0 && pointers[npointers - 1] > max_offset)
+        max_offset = pointers[npointers - 1];
+    jl_fielddesc8_t maxdesc8 = { 0, max_size, max_offset };
+    jl_fielddesc16_t maxdesc16 = { 0, max_size, max_offset };
+    jl_fielddesc32_t maxdesc32 = { 0, max_size, max_offset };
+    if (maxdesc8.size != max_size || maxdesc8.offset != max_offset) {
+        fielddesc_type = 1;
+        if (maxdesc16.size != max_size || maxdesc16.offset != max_offset) {
+            fielddesc_type = 2;
+            if (maxdesc32.size != max_size || maxdesc32.offset != max_offset) {
+                assert(0); // should have been verified by caller
             }
         }
     }
+    int32_t first_ptr = (npointers > 0 ? (int32_t)pointers[0] : -1);
 
     // allocate a new descriptor, on the stack if possible.
     size_t fields_size = nfields * jl_fielddesc_size(fielddesc_type);
-    size_t pointers_size = (npointers << fielddesc_type);
+    size_t pointers_size = first_ptr < 0 ? 0 : (npointers << fielddesc_type);
     size_t flddesc_sz = sizeof(jl_datatype_layout_t) + fields_size + pointers_size;
     int should_malloc = flddesc_sz >= jl_page_size;
     jl_datatype_layout_t *mallocmem = (jl_datatype_layout_t *)(should_malloc ? malloc(flddesc_sz) : NULL);
@@ -221,11 +236,16 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
     flddesc->size = sz;
     flddesc->nfields = nfields;
     flddesc->alignment = alignment;
-    flddesc->haspadding = haspadding;
-    flddesc->fielddesc_type = fielddesc_type;
-    flddesc->padding = 0;
+    flddesc->flags.haspadding = haspadding;
+    flddesc->flags.isbitsegal = isbitsegal;
+    flddesc->flags.fielddesc_type = fielddesc_type;
+    flddesc->flags.arrayelem_isboxed = (arrayelem & 1) != 0;
+    flddesc->flags.arrayelem_isunion = (arrayelem & 2) != 0;
+    flddesc->flags.arrayelem_isatomic = (arrayelem & 4) != 0;
+    flddesc->flags.arrayelem_islocked = (arrayelem & 8) != 0;
+    flddesc->flags.padding = 0;
     flddesc->npointers = npointers;
-    flddesc->first_ptr = (npointers > 0 ? pointers[0] : -1);
+    flddesc->first_ptr = first_ptr;
 
     // fill out the fields of the new descriptor
     jl_fielddesc8_t *desc8 = (jl_fielddesc8_t *)jl_dt_layout_fields(flddesc);
@@ -248,18 +268,20 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
             desc32[i].isptr = desc[i].isptr;
         }
     }
-    uint8_t *ptrs8 = (uint8_t *)jl_dt_layout_ptrs(flddesc);
-    uint16_t *ptrs16 = (uint16_t *)jl_dt_layout_ptrs(flddesc);
-    uint32_t *ptrs32 = (uint32_t *)jl_dt_layout_ptrs(flddesc);
-    for (size_t i = 0; i < npointers; i++) {
-        if (fielddesc_type == 0) {
-            ptrs8[i] = pointers[i];
-        }
-        else if (fielddesc_type == 1) {
-            ptrs16[i] = pointers[i];
-        }
-        else {
-            ptrs32[i] = pointers[i];
+    if (first_ptr >= 0) {
+        uint8_t *ptrs8 = (uint8_t *)jl_dt_layout_ptrs(flddesc);
+        uint16_t *ptrs16 = (uint16_t *)jl_dt_layout_ptrs(flddesc);
+        uint32_t *ptrs32 = (uint32_t *)jl_dt_layout_ptrs(flddesc);
+        for (size_t i = 0; i < npointers; i++) {
+            if (fielddesc_type == 0) {
+                ptrs8[i] = pointers[i];
+            }
+            else if (fielddesc_type == 1) {
+                ptrs16[i] = pointers[i];
+            }
+            else {
+                ptrs32[i] = pointers[i];
+            }
         }
     }
 
@@ -291,9 +313,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
 }
 
 // Determine if homogeneous tuple with fields of type t will have
-// a special alignment beyond normal Julia rules.
+// a special alignment and vector-ABI beyond normal rules for aggregates.
 // Return special alignment if one exists, 0 if normal alignment rules hold.
 // A non-zero result *must* match the LLVM rules for a vector type <nfields x t>.
+// Matching the compiler's `__attribute__ vector_size` behavior.
 // For sake of Ahead-Of-Time (AOT) compilation, this routine has to work
 // without LLVM being available.
 unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
@@ -308,8 +331,12 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
         // motivating use case comes up for Julia, we reject pointers.
         return 0;
     size_t elsz = jl_datatype_size(ty);
-    if (elsz != 1 && elsz != 2 && elsz != 4 && elsz != 8)
-        // Only handle power-of-two-sized elements (for now)
+    if (next_power_of_two(elsz) != elsz)
+        // Only handle power-of-two-sized elements (for now), since other
+        // lengths may be packed into very complicated arrangements (llvm pads
+        // extra bits on most platforms when computing alignment but not when
+        // computing type size, but adds no extra bytes for each element, so
+        // their effect on offsets are never what you may naturally expect).
         return 0;
     size_t size = nfields * elsz;
     // Use natural alignment for this vector: this matches LLVM and clang.
@@ -318,23 +345,24 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
 
 STATIC_INLINE int jl_is_datatype_make_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 {
-    return (!d->name->abstract && jl_datatype_size(d) == 0 && d != jl_symbol_type && d->name != jl_array_typename &&
-            d->isconcretetype && !d->name->mutabl);
+    return d->isconcretetype && jl_datatype_size(d) == 0 && d->layout->npointers == 0 && !d->name->mutabl; // implies jl_is_layout_opaque
 }
 
 STATIC_INLINE void jl_maybe_allocate_singleton_instance(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
+    // It's possible for st to already have an ->instance if it was redefined
+    if (st->instance)
+        return;
     if (jl_is_datatype_make_singleton(st)) {
-        // It's possible for st to already have an ->instance if it was redefined
-        if (!st->instance)
-            st->instance = jl_gc_permobj(0, st);
+        jl_task_t *ct = jl_current_task;
+        st->instance = jl_gc_permobj(ct->ptls, 0, st, 0);
     }
 }
 
 // return whether all concrete subtypes of this type have the same layout
 int jl_struct_try_layout(jl_datatype_t *dt)
 {
-    if (dt->layout)
+    if (dt->layout || jl_is_genericmemory_type(dt))
         return 1;
     else if (!jl_has_fixed_layout(dt))
         return 0;
@@ -346,13 +374,15 @@ int jl_struct_try_layout(jl_datatype_t *dt)
 
 int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree)
 {
+    if (jl_typeofbottom_type && ty == jl_typeofbottom_type->super)
+        ty = jl_typeofbottom_type;
     if (ty->name->mayinlinealloc && jl_struct_try_layout(ty)) {
         if (ty->layout->npointers > 0) {
             if (pointerfree)
                 return 0;
             if (ty->name->n_uninitialized != 0)
                 return 0;
-            if (ty->layout->fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
+            if (ty->layout->flags.fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
                 return 0;
         }
         return 1;
@@ -478,6 +508,126 @@ static int is_type_identityfree(jl_value_t *t)
     return 0;
 }
 
+// make a copy of the layout of st, but with nfields=0
+void jl_get_genericmemory_layout(jl_datatype_t *st)
+{
+    jl_value_t *kind = jl_tparam0(st);
+    jl_value_t *eltype = jl_tparam1(st);
+    jl_value_t *addrspace = jl_tparam2(st);
+    if (!st->isconcretetype) {
+        // Since parent dt has an opaque layout, we may end up here being asked to copy that layout to subtypes,
+        // but we don't actually want to do that unless this object is constructable (or at least has a layout).
+        // The real layout is stored only on the wrapper.
+        return;
+    }
+    if (!jl_is_type(eltype)) {
+        // this is expected to have a layout, but since it is not constructable, we don't care too much what it is
+        static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), {0}};
+        st->layout = &opaque_ptr_layout;
+        st->has_concrete_subtype = 0;
+        return;
+    }
+
+    size_t elsz = 0, al = 1;
+    int isunboxed = jl_islayout_inline(eltype, &elsz, &al) && (kind != (jl_value_t*)jl_atomic_sym || jl_is_datatype(eltype));
+    int isunion = isunboxed && jl_is_uniontype(eltype);
+    int haspadding = 1; // we may want to eventually actually compute this more precisely
+    int isbitsegal = 0;
+    int nfields = 0; // aka jl_is_layout_opaque
+    int npointers = 1;
+    int zi;
+    uint32_t first_ptr = -1;
+    uint32_t *pointers = &first_ptr;
+    int needlock = 0;
+
+    const jl_datatype_layout_t *el_layout = NULL;
+    if (isunboxed) {
+        elsz = LLT_ALIGN(elsz, al);
+        if (kind == (jl_value_t*)jl_atomic_sym) {
+            if (elsz > MAX_ATOMIC_SIZE)
+                needlock = 1;
+            else if (elsz > 0)
+                al = elsz = next_power_of_two(elsz);
+        }
+        if (isunion) {
+            zi = 1;
+        }
+        else {
+            assert(jl_is_datatype(eltype));
+            zi = ((jl_datatype_t*)eltype)->zeroinit;
+            el_layout = ((jl_datatype_t*)eltype)->layout;
+            if (el_layout->first_ptr >= 0) {
+                first_ptr = el_layout->first_ptr;
+                npointers = el_layout->npointers;
+                if (el_layout->flags.fielddesc_type == 2 && !needlock) {
+                    pointers = (uint32_t*)jl_dt_layout_ptrs(el_layout);
+                }
+                else {
+                    pointers = (uint32_t*)alloca(npointers * sizeof(uint32_t));
+                    for (int j = 0; j < npointers; j++) {
+                        pointers[j] = jl_ptr_offset((jl_datatype_t*)eltype, j);
+                    }
+                }
+            }
+        }
+        if (needlock) {
+            assert(al <= JL_SMALL_BYTE_ALIGNMENT);
+            size_t lock_offset = LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT);
+            elsz += lock_offset;
+            if (al < sizeof(void*)) {
+              al = sizeof(void*);
+              elsz = LLT_ALIGN(elsz, al);
+            }
+            haspadding = 1;
+            zi = 1;
+            // Adjust pointer offsets to account for the lock at the beginning
+            if (first_ptr != -1) {
+                uint32_t lock_offset_words = lock_offset / sizeof(void*);
+                first_ptr += lock_offset_words;
+                for (int j = 0; j < npointers; j++) {
+                    pointers[j] += lock_offset_words;
+                }
+            }
+        }
+    }
+    else {
+        elsz = sizeof(void*);
+        al = elsz;
+        zi = 1;
+    }
+
+    // arrayelem is a bitfield: 1=isboxed, 2=isunion, 4=isatomic, 8=islocked
+    int arrayelem = 0;
+    if (!isunboxed)
+        arrayelem |= 1;  // arrayelem_isboxed
+    if (isunion)
+        arrayelem |= 2;  // arrayelem_isunion
+    if (kind == (jl_value_t*)jl_atomic_sym) {
+        arrayelem |= 4;  // arrayelem_isatomic
+        if (needlock)
+            arrayelem |= 8;  // arrayelem_islocked
+    }
+    assert(!st->layout);
+    st->layout = jl_get_layout(elsz, nfields, npointers, al, haspadding, isbitsegal, arrayelem, NULL, pointers);
+    st->zeroinit = zi;
+    //st->has_concrete_subtype = 1;
+    //st->isbitstype = 0;
+    //st->ismutationfree = 0;
+    //st->isidentityfree = 0;
+
+    if (jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0) {
+        if (kind == (jl_value_t*)jl_not_atomic_sym || kind == (jl_value_t*)jl_atomic_sym) {
+            jl_task_t *ct = jl_current_task;
+            jl_genericmemory_t *zeroinst = (jl_genericmemory_t*)jl_gc_permobj(ct->ptls, LLT_ALIGN(sizeof(jl_genericmemory_t), JL_SMALL_BYTE_ALIGNMENT) + (elsz ? elsz : isunion), st, 0);
+            zeroinst->length = 0;
+            zeroinst->ptr = (char*)zeroinst + JL_SMALL_BYTE_ALIGNMENT;
+            memset(zeroinst->ptr, 0, elsz ? elsz : isunion);
+            assert(!st->instance);
+            st->instance = (jl_value_t*)zeroinst;
+        }
+    }
+}
+
 void jl_compute_field_offsets(jl_datatype_t *st)
 {
     const uint64_t max_offset = (((uint64_t)1) << 32) - 1;
@@ -492,6 +642,10 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         st->zeroinit = 0;
         st->has_concrete_subtype = 1;
     }
+    if (st->name == jl_genericmemory_typename) {
+        jl_get_genericmemory_layout(st);
+        return;
+    }
     int isbitstype = st->isconcretetype && st->name->mayinlinealloc;
     int ismutationfree = !w->layout || !jl_is_layout_opaque(w->layout);
     int isidentityfree = !st->name->mutabl;
@@ -501,7 +655,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         st->layout = w->layout;
         st->zeroinit = w->zeroinit;
         st->has_concrete_subtype = w->has_concrete_subtype;
-        if (!jl_is_layout_opaque(st->layout)) { // e.g. jl_array_typename
+        if (!jl_is_layout_opaque(st->layout)) { // e.g. jl_simplevector_type
             st->isbitstype = isbitstype && st->layout->npointers == 0;
             jl_maybe_allocate_singleton_instance(st);
         }
@@ -514,18 +668,17 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         // if we have no fields, we can trivially skip the rest
         if (st == jl_symbol_type || st == jl_string_type) {
             // opaque layout - heap-allocated blob
-            static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, 0, 0};
+            static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, { .isbitsegal=1 }};
             st->layout = &opaque_byte_layout;
             return;
         }
-        else if (st == jl_simplevector_type || st == jl_module_type || st->name == jl_array_typename) {
-            static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), 0, 0};
+        else if (st == jl_simplevector_type || st == jl_module_type) {
+            static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), { .isbitsegal=1 }};
             st->layout = &opaque_ptr_layout;
             return;
         }
         else {
-            // reuse the same layout for all singletons
-            static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, 0, 0};
+            static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, { .isbitsegal=1 }};
             st->layout = &singleton_layout;
         }
     }
@@ -566,6 +719,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         size_t alignm = 1;
         int zeroinit = 0;
         int haspadding = 0;
+        int isbitsegal = 1;
         int homogeneous = 1;
         int needlock = 0;
         uint32_t npointers = 0;
@@ -579,20 +733,37 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                     // Should never happen
                     throw_ovf(should_malloc, desc, st, fsz);
                 desc[i].isptr = 0;
+
                 if (jl_is_uniontype(fld)) {
-                    haspadding = 1;
                     fsz += 1; // selector byte
                     zeroinit = 1;
+                    // TODO: Some unions could be bits comparable.
+                    isbitsegal = 0;
                 }
                 else {
+                    if (fsz > jl_datatype_size(fld)) {
+                        // We have to pad the size to integer size class, but it means this has some padding
+                        isbitsegal = 0;
+                        haspadding = 1;
+                    }
                     uint32_t fld_npointers = ((jl_datatype_t*)fld)->layout->npointers;
-                    if (((jl_datatype_t*)fld)->layout->haspadding)
+                    if (((jl_datatype_t*)fld)->layout->flags.haspadding)
                         haspadding = 1;
+                    if (!((jl_datatype_t*)fld)->layout->flags.isbitsegal)
+                        isbitsegal = 0;
                     if (i >= nfields - st->name->n_uninitialized && fld_npointers &&
                         fld_npointers * sizeof(void*) != fsz) {
-                        // field may be undef (may be uninitialized and contains pointer),
-                        // and contains non-pointer fields of non-zero sizes.
-                        haspadding = 1;
+                        // For field types that contain pointers, we allow inlinealloc
+                        // as long as the field type itself is always fully initialized.
+                        // In such a case, we use the first pointer in the inlined field
+                        // as the #undef marker (if it is zero, we treat the whole inline
+                        // struct as #undef). However, we do not zero-initialize the whole
+                        // struct, so the non-pointer parts of the inline allocation may
+                        // be arbitrary, but still need to compare egal (because all #undef)
+                        // representations are egal. Because of this, we cannot bitscompare
+                        // them.
+                        // TODO: Consider zero-initializing the whole struct.
+                        isbitsegal = 0;
                     }
                     if (!zeroinit)
                         zeroinit = ((jl_datatype_t*)fld)->zeroinit;
@@ -601,15 +772,14 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             }
             else {
                 fsz = sizeof(void*);
-                if (fsz > MAX_ALIGN)
-                    fsz = MAX_ALIGN;
                 al = fsz;
+                if (al > MAX_ALIGN)
+                    al = MAX_ALIGN;
                 desc[i].isptr = 1;
                 zeroinit = 1;
                 npointers++;
                 if (!jl_pointer_egal(fld)) {
-                    // this somewhat poorly named flag says whether some of the bits can be non-unique
-                    haspadding = 1;
+                    isbitsegal = 0;
                 }
             }
             if (isatomic && fsz > MAX_ATOMIC_SIZE)
@@ -670,7 +840,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             }
         }
         assert(ptr_i == npointers);
-        st->layout = jl_get_layout(sz, nfields, npointers, alignm, haspadding, desc, pointers);
+        st->layout = jl_get_layout(sz, nfields, npointers, alignm, haspadding, isbitsegal, 0, desc, pointers);
         if (should_malloc) {
             free(desc);
             if (npointers)
@@ -687,14 +857,6 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     return;
 }
 
-static int is_anonfn_typename(char *name)
-{
-    if (name[0] != '#' || name[1] == '#')
-        return 0;
-    char *other = strrchr(name, '#');
-    return other > &name[1] && is10digit(other[1]);
-}
-
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
         jl_sym_t *name,
         jl_module_t *module,
@@ -710,7 +872,7 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
     jl_typename_t *tn = NULL;
     JL_GC_PUSH2(&t, &tn);
 
-    assert(parameters);
+    assert(parameters && fnames);
 
     // init enough before possibly calling jl_new_typename_in
     t = jl_new_uninitialized_datatype();
@@ -730,18 +892,6 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
     }
     else {
         tn = jl_new_typename_in((jl_sym_t*)name, module, abstract, mutabl);
-        if (super == jl_function_type || super == jl_builtin_type || is_anonfn_typename(jl_symbol_name(name))) {
-            // Callable objects (including compiler-generated closures) get independent method tables
-            // as an optimization
-            tn->mt = jl_new_method_table(name, module);
-            jl_gc_wb(tn, tn->mt);
-            if (jl_svec_len(parameters) == 0 && !abstract)
-                tn->mt->offs = 1;
-        }
-        else {
-            // Everything else, gets to use the unified table
-            tn->mt = jl_nonfunction_mt;
-        }
     }
     t->name = tn;
     jl_gc_wb(t, t->name);
@@ -823,6 +973,18 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     uint32_t nbytes = (nbits + 7) / 8;
     uint32_t alignm = next_power_of_two(nbytes);
+# if defined(_CPU_X86_) && !defined(_OS_WINDOWS_)
+    // datalayout strings are often weird: on 64-bit they usually follow fairly simple rules,
+    // but on x86 32 bit platforms, sometimes 5 to 8 byte types are
+    // 32-bit aligned even though the MAX_ALIGN (for types 9+ bytes) is 16
+    // (except for f80 which is align 4 on Mingw, Linux, and BSDs--but align 16 on MSVC and Darwin)
+    // https://llvm.org/doxygen/ARMTargetMachine_8cpp.html#adb29b487708f0dc2a940345b68649270
+    // https://llvm.org/doxygen/AArch64TargetMachine_8cpp.html#a003a58caf135efbf7273c5ed84e700d7
+    // https://llvm.org/doxygen/X86TargetMachine_8cpp.html#aefdbcd6131ef195da070cef7fdaf0532
+    // 32-bit alignment is weird
+    if (alignm == 8)
+        alignm = 4;
+# endif
     if (alignm > MAX_ALIGN)
         alignm = MAX_ALIGN;
     // memoize isprimitivetype, since it is much easier than checking
@@ -832,7 +994,7 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
     bt->ismutationfree = 1;
     bt->isidentityfree = 1;
     bt->isbitstype = (parameters == jl_emptysvec);
-    bt->layout = jl_get_layout(nbytes, 0, 0, alignm, 0, NULL, NULL);
+    bt->layout = jl_get_layout(nbytes, 0, 0, alignm, 0, 1, 0, NULL, NULL);
     bt->instance = NULL;
     return bt;
 }
@@ -853,10 +1015,15 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
     layout->size = large ? GC_MAX_SZCLASS+1 : 0;
     layout->nfields = 0;
     layout->alignment = sizeof(void *);
-    layout->haspadding = 1;
     layout->npointers = haspointers;
-    layout->fielddesc_type = 3;
-    layout->padding = 0;
+    layout->flags.haspadding = 1;
+    layout->flags.isbitsegal = 0;
+    layout->flags.fielddesc_type = 3;
+    layout->flags.padding = 0;
+    layout->flags.arrayelem_isboxed = 0;
+    layout->flags.arrayelem_isunion = 0;
+    layout->flags.arrayelem_isatomic = 0;
+    layout->flags.arrayelem_islocked = 0;
     jl_fielddescdyn_t * desc =
       (jl_fielddescdyn_t *) ((char *)layout + sizeof(*layout));
     desc->markfunc = markfunc;
@@ -884,7 +1051,7 @@ JL_DLLEXPORT int jl_reinit_foreign_type(jl_datatype_t *dt,
 
 JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
 {
-    return jl_is_datatype(dt) && dt->layout && dt->layout->fielddesc_type == 3;
+    return jl_is_datatype(dt) && dt->layout && dt->layout->flags.fielddesc_type == 3;
 }
 
 // bits constructors ----------------------------------------------------------
@@ -904,7 +1071,7 @@ JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
 
 #if MAX_POINTERATOMIC_SIZE >= 16
 typedef struct _jl_uint128_t {
-    uint64_t a;
+    alignas(16) uint64_t a;
     uint64_t b;
 } jl_uint128_t;
 #endif
@@ -943,6 +1110,10 @@ static inline jl_uint128_t zext_read128(const jl_value_t *x, size_t nb) JL_NOTSA
         memcpy(&y, x, nb);
     return y;
 }
+static void assign_uint128(jl_value_t *v, jl_uint128_t x, size_t nb) JL_NOTSAFEPOINT
+{
+    memcpy(v, &x, nb);
+}
 #endif
 
 JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
@@ -951,23 +1122,30 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
     // but will always have the alignment required by the datatype
     assert(jl_is_datatype(dt));
     jl_datatype_t *bt = (jl_datatype_t*)dt;
-    size_t nb = jl_datatype_size(bt);
     // some types have special pools to minimize allocations
-    if (nb == 0)               return jl_new_struct_uninit(bt); // returns bt->instance
-    if (bt == jl_bool_type)    return (1 & *(int8_t*)data) ? jl_true : jl_false;
-    if (bt == jl_uint8_type)   return jl_box_uint8(*(uint8_t*)data);
-    if (bt == jl_int64_type)   return jl_box_int64(*(int64_t*)data);
-    if (bt == jl_int32_type)   return jl_box_int32(*(int32_t*)data);
-    if (bt == jl_int8_type)    return jl_box_int8(*(int8_t*)data);
-    if (bt == jl_int16_type)   return jl_box_int16(*(int16_t*)data);
-    if (bt == jl_uint64_type)  return jl_box_uint64(*(uint64_t*)data);
-    if (bt == jl_uint32_type)  return jl_box_uint32(*(uint32_t*)data);
-    if (bt == jl_uint16_type)  return jl_box_uint16(*(uint16_t*)data);
-    if (bt == jl_char_type)    return jl_box_char(*(uint32_t*)data);
-
-    assert(!bt->smalltag);
+    switch(bt->smalltag) {
+    case jl_bool_tag: return (1 & *(int8_t*)data) ? jl_true : jl_false;
+    case jl_uint8_tag: return jl_box_uint8(*(uint8_t*)data);
+    case jl_int64_tag: return jl_box_int64(*(int64_t*)data);
+    case jl_int32_tag: return jl_box_int32(*(int32_t*)data);
+    case jl_int8_tag: return jl_box_int8(*(int8_t*)data);
+    case jl_int16_tag: return jl_box_int16(*(int16_t*)data);
+    case jl_uint64_tag: return jl_box_uint64(*(uint64_t*)data);
+    case jl_uint32_tag: return jl_box_uint32(*(uint32_t*)data);
+    case jl_uint16_tag: return jl_box_uint16(*(uint16_t*)data);
+    case jl_char_tag: return jl_box_char(*(uint32_t*)data);
+    case jl_ssavalue_tag: return jl_box_ssavalue(*(size_t*)data);
+    case jl_slotnumber_tag: return jl_box_slotnumber(*(size_t*)data);
+   }
+
+    size_t nb = jl_datatype_size(bt);
+    if (nb == 0)
+        return jl_new_struct_uninit(bt); // returns bt->instance
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
+    if (bt->smalltag)
+        jl_set_typetagof(v, bt->smalltag, 0);
+    // TODO: make this a memmove_refs if relevant
     memcpy(jl_assume_aligned(v, sizeof(void*)), data, nb);
     return v;
 }
@@ -977,23 +1155,30 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data)
     // data must have the required alignment for an atomic of the given size
     assert(jl_is_datatype(dt));
     jl_datatype_t *bt = (jl_datatype_t*)dt;
-    size_t nb = jl_datatype_size(bt);
     // some types have special pools to minimize allocations
-    if (nb == 0)               return jl_new_struct_uninit(bt); // returns bt->instance
-    if (bt == jl_bool_type)    return (1 & jl_atomic_load((_Atomic(int8_t)*)data)) ? jl_true : jl_false;
-    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_load((_Atomic(uint8_t)*)data));
-    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_load((_Atomic(int64_t)*)data));
-    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_load((_Atomic(int32_t)*)data));
-    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_load((_Atomic(int8_t)*)data));
-    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_load((_Atomic(int16_t)*)data));
-    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_load((_Atomic(uint64_t)*)data));
-    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_load((_Atomic(uint32_t)*)data));
-    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_load((_Atomic(uint16_t)*)data));
-    if (bt == jl_char_type)    return jl_box_char(jl_atomic_load((_Atomic(uint32_t)*)data));
-
-    assert(!bt->smalltag);
+    switch(bt->smalltag) {
+    case 0: break;
+    case jl_bool_tag:           return (1 & jl_atomic_load((_Atomic(int8_t)*)data)) ? jl_true : jl_false;
+    case jl_uint8_tag:          return jl_box_uint8(jl_atomic_load((_Atomic(uint8_t)*)data));
+    case jl_int64_tag:          return jl_box_int64(jl_atomic_load((_Atomic(int64_t)*)data));
+    case jl_int32_tag:          return jl_box_int32(jl_atomic_load((_Atomic(int32_t)*)data));
+    case jl_int8_tag:           return jl_box_int8(jl_atomic_load((_Atomic(int8_t)*)data));
+    case jl_int16_tag:          return jl_box_int16(jl_atomic_load((_Atomic(int16_t)*)data));
+    case jl_uint64_tag:         return jl_box_uint64(jl_atomic_load((_Atomic(uint64_t)*)data));
+    case jl_uint32_tag:         return jl_box_uint32(jl_atomic_load((_Atomic(uint32_t)*)data));
+    case jl_uint16_tag:         return jl_box_uint16(jl_atomic_load((_Atomic(uint16_t)*)data));
+    case jl_char_tag:           return jl_box_char(jl_atomic_load((_Atomic(uint32_t)*)data));
+    case jl_ssavalue_tag:       return jl_box_ssavalue(jl_atomic_load((_Atomic(size_t)*)data));
+    case jl_slotnumber_tag:     return jl_box_slotnumber(jl_atomic_load((_Atomic(size_t)*)data));
+    }
+
+    size_t nb = jl_datatype_size(bt);
+    if (nb == 0)
+        return jl_new_struct_uninit(bt); // returns bt->instance
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
+    if (bt->smalltag)
+        jl_set_typetagof(v, bt->smalltag, 0);
     // data is aligned to the power of two,
     // we will write too much of v, but the padding should exist
     if (nb == 1)
@@ -1007,6 +1192,8 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data)
         *(uint64_t*)v = jl_atomic_load((_Atomic(uint64_t)*)data);
 #endif
 #if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 12)
+        assign_uint128(v, jl_atomic_load((_Atomic(jl_uint128_t)*)data), 12);
     else if (nb <= 16)
         *(jl_uint128_t*)v = jl_atomic_load((_Atomic(jl_uint128_t)*)data);
 #endif
@@ -1046,22 +1233,28 @@ JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl
     // dst must have the required alignment for an atomic of the given size
     assert(jl_is_datatype(dt));
     jl_datatype_t *bt = (jl_datatype_t*)dt;
+    if (nb == 0)
+        return jl_new_struct_uninit(bt); // returns bt->instance
     // some types have special pools to minimize allocations
-    if (nb == 0)               return jl_new_struct_uninit(bt); // returns bt->instance
-    if (bt == jl_bool_type)    return (1 & jl_atomic_exchange((_Atomic(int8_t)*)dst, 1 & *(int8_t*)src)) ? jl_true : jl_false;
-    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_exchange((_Atomic(uint8_t)*)dst, *(int8_t*)src));
-    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_exchange((_Atomic(int64_t)*)dst, *(int64_t*)src));
-    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_exchange((_Atomic(int32_t)*)dst, *(int32_t*)src));
-    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_exchange((_Atomic(int8_t)*)dst, *(int8_t*)src));
-    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_exchange((_Atomic(int16_t)*)dst, *(int16_t*)src));
-    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_exchange((_Atomic(uint64_t)*)dst, *(uint64_t*)src));
-    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
-    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src));
-    if (bt == jl_char_type)    return jl_box_char(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
-
-    assert(!bt->smalltag);
+    switch(bt->smalltag) {
+    case jl_bool_tag:           return (1 & jl_atomic_exchange((_Atomic(int8_t)*)dst, 1 & *(int8_t*)src)) ? jl_true : jl_false;
+    case jl_uint8_tag:          return jl_box_uint8(jl_atomic_exchange((_Atomic(uint8_t)*)dst, *(int8_t*)src));
+    case jl_int64_tag:          return jl_box_int64(jl_atomic_exchange((_Atomic(int64_t)*)dst, *(int64_t*)src));
+    case jl_int32_tag:          return jl_box_int32(jl_atomic_exchange((_Atomic(int32_t)*)dst, *(int32_t*)src));
+    case jl_int8_tag:           return jl_box_int8(jl_atomic_exchange((_Atomic(int8_t)*)dst, *(int8_t*)src));
+    case jl_int16_tag:          return jl_box_int16(jl_atomic_exchange((_Atomic(int16_t)*)dst, *(int16_t*)src));
+    case jl_uint64_tag:         return jl_box_uint64(jl_atomic_exchange((_Atomic(uint64_t)*)dst, *(uint64_t*)src));
+    case jl_uint32_tag:         return jl_box_uint32(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
+    case jl_uint16_tag:         return jl_box_uint16(jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src));
+    case jl_char_tag:           return jl_box_char(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
+    case jl_ssavalue_tag:       return jl_box_ssavalue(jl_atomic_exchange((_Atomic(size_t)*)dst, *(size_t*)src));
+    case jl_slotnumber_tag:     return jl_box_slotnumber(jl_atomic_exchange((_Atomic(size_t)*)dst, *(size_t*)src));
+    }
+
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, jl_datatype_size(bt), bt);
+    if (bt->smalltag)
+        jl_set_typetagof(v, bt->smalltag, 0);
     if (nb == 1)
         *(uint8_t*)v = jl_atomic_exchange((_Atomic(uint8_t)*)dst, *(uint8_t*)src);
     else if (nb == 2)
@@ -1073,6 +1266,8 @@ JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl
         *(uint64_t*)v = jl_atomic_exchange((_Atomic(uint64_t)*)dst, zext_read64(src, nb));
 #endif
 #if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 12)
+        assign_uint128(v, jl_atomic_exchange((_Atomic(jl_uint128_t)*)dst, zext_read128(src, nb)), 12);
     else if (nb <= 16)
         *(jl_uint128_t*)v = jl_atomic_exchange((_Atomic(jl_uint128_t)*)dst, zext_read128(src, nb));
 #endif
@@ -1122,13 +1317,10 @@ JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expect
     return success;
 }
 
-JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettyp, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
+JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* NEW pre-allocated output */, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
 {
     // dst must have the required alignment for an atomic of the given size
     // n.b.: this does not spuriously fail if there are padding bits
-    jl_task_t *ct = jl_current_task;
-    int isptr = jl_field_isptr(rettyp, 0);
-    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : jl_datatype_size(rettyp), isptr ? dt : rettyp);
     int success;
     jl_datatype_t *et = (jl_datatype_t*)jl_typeof(expected);
     if (nb == 0) {
@@ -1136,7 +1328,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     }
     else if (nb == 1) {
         uint8_t *y8 = (uint8_t*)y;
-        assert(!dt->layout->haspadding);
+        assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
         if (dt == et) {
             *y8 = *(uint8_t*)expected;
             uint8_t z8 = *(uint8_t*)src;
@@ -1149,7 +1341,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     }
     else if (nb == 2) {
         uint16_t *y16 = (uint16_t*)y;
-        assert(!dt->layout->haspadding);
+        assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
         if (dt == et) {
             *y16 = *(uint16_t*)expected;
             uint16_t z16 = *(uint16_t*)src;
@@ -1167,7 +1359,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
             uint32_t z32 = zext_read32(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32);
-                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1184,7 +1376,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
             uint64_t z64 = zext_read64(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64);
-                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1196,18 +1388,19 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
 #endif
 #if MAX_POINTERATOMIC_SIZE >= 16
     else if (nb <= 16) {
-        jl_uint128_t *y128 = (jl_uint128_t*)y;
         if (dt == et) {
-            *y128 = zext_read128(expected, nb);
+            jl_uint128_t y128 = zext_read128(expected, nb);
             jl_uint128_t z128 = zext_read128(src, nb);
             while (1) {
-                success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128);
-                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, &y128, z128);
+                assign_uint128(y, y128, nb);
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt)) {
                     break;
+                }
             }
         }
         else {
-            *y128 = jl_atomic_load((_Atomic(jl_uint128_t)*)dst);
+            assign_uint128(y, jl_atomic_load((_Atomic(jl_uint128_t)*)dst), nb);
             success = 0;
         }
     }
@@ -1215,37 +1408,65 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     else {
         abort();
     }
-    if (isptr) {
-        JL_GC_PUSH1(&y);
-        jl_value_t *z = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), rettyp);
-        *(jl_value_t**)z = y;
-        JL_GC_POP();
-        y = z;
-        nb = sizeof(jl_value_t*);
-    }
-    *((uint8_t*)y + nb) = success ? 1 : 0;
-    return y;
+    return success;
 }
 
-// used by boot.jl
-JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_datatype_t *bt)
+JL_DLLEXPORT int jl_atomic_storeonce_bits(jl_datatype_t *dt, char *dst, const jl_value_t *src, int nb)
 {
-    uint64_t data = 0xffffffffffffffffULL;
-    jl_task_t *ct = jl_current_task;
-    jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(size_t), bt);
-    if (bt->smalltag)
-        jl_set_typetagof(v, bt->smalltag, 0);
-    memcpy(v, &data, sizeof(size_t));
-    return v;
+    // dst must have the required alignment for an atomic of the given size
+    // n.b.: this does not spuriously fail
+    // n.b.: hasptr == 1 therefore nb >= sizeof(void*), because ((jl_datatype_t*)ty)->layout->has_ptr >= 0
+    int success;
+#ifdef _P64
+    if (nb <= 4) {
+        uint32_t y32 = 0;
+        uint32_t z32 = zext_read32(src, nb);
+        success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, &y32, z32);
+    }
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8) {
+        uint64_t y64 = 0;
+        uint64_t z64 = zext_read64(src, nb);
+        while (1) {
+            success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, &y64, z64);
+            if (success || undefref_check(dt, (jl_value_t*)&y64) != NULL)
+                break;
+        }
+    }
+#endif
+#else
+    if (nb <= 8) {
+        uint64_t y64 = 0;
+        uint64_t z64 = zext_read64(src, nb);
+        success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, &y64, z64);
+    }
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16) {
+        jl_uint128_t y128 = {0};
+        jl_uint128_t z128 = zext_read128(src, nb);
+        while (1) {
+            success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, &y128, z128);
+            if (success || undefref_check(dt, (jl_value_t*)&y128) != NULL)
+                break;
+        }
+    }
+#endif
+    else {
+        abort();
+    }
+    return success;
 }
 
-#define PERMBOXN_FUNC(nb)                                               \
-    jl_value_t *jl_permbox##nb(jl_datatype_t *t, uintptr_t tag, uint##nb##_t x) \
-    {   /* n.b. t must be a concrete isbits datatype of the right size */ \
-        jl_value_t *v = jl_gc_permobj(LLT_ALIGN(nb, sizeof(void*)), t); \
-        if (tag) jl_set_typetagof(v, tag, GC_OLD_MARKED);               \
-        *(uint##nb##_t*)jl_data_ptr(v) = x;                             \
-        return v;                                                       \
+#define PERMBOXN_FUNC(nb)                                                  \
+    jl_value_t *jl_permbox##nb(jl_datatype_t *t, uintptr_t tag, uint##nb##_t x) JL_NOTSAFEPOINT \
+    {   /* n.b. t must be a concrete isbits datatype of the right size */               \
+        jl_task_t *ct = jl_current_task;                                                \
+        jl_value_t *v = jl_gc_permobj(ct->ptls, LLT_ALIGN(nb, sizeof(void*)), t, 0);    \
+        assert(tag);                                                                    \
+        jl_set_typetagof(v, tag, GC_OLD_MARKED);                                        \
+        *(uint##nb##_t*)jl_data_ptr(v) = x;                                             \
+        return v;                                                                       \
     }
 PERMBOXN_FUNC(8)
 PERMBOXN_FUNC(16)
@@ -1253,7 +1474,7 @@ PERMBOXN_FUNC(32)
 PERMBOXN_FUNC(64)
 
 #define UNBOX_FUNC(j_type,c_type)                                       \
-    JL_DLLEXPORT c_type jl_unbox_##j_type(jl_value_t *v)                \
+    JL_DLLEXPORT c_type jl_unbox_##j_type(jl_value_t *v) JL_NOTSAFEPOINT\
     {                                                                   \
         assert(jl_is_primitivetype(jl_typeof(v)));                      \
         assert(jl_datatype_size(jl_typeof(v)) == sizeof(c_type));       \
@@ -1289,9 +1510,6 @@ BOX_FUNC(uint8pointer, uint8_t*,  jl_box)
 
 #define NBOX_C 1024
 
-// some shims to support UIBOX_FUNC definition
-#define jl_ssavalue_tag (((uintptr_t)jl_ssavalue_type) >> 4)
-#define jl_slotnumber_tag (((uintptr_t)jl_slotnumber_type) >> 4)
 
 #define SIBOX_FUNC(typ,c_type)                                          \
     static jl_value_t *boxed_##typ##_cache[NBOX_C];                     \
@@ -1353,40 +1571,31 @@ JL_DLLEXPORT jl_value_t *jl_box_uint8(uint8_t x)
     return jl_boxed_uint8_cache[x];
 }
 
-void jl_init_int32_int64_cache(void)
+void jl_init_box_caches(void)
 {
     int64_t i;
-    for(i=0; i < NBOX_C; i++) {
+    for (i = 0; i < NBOX_C; i++) {
         boxed_int32_cache[i]  = jl_permbox32(jl_int32_type, jl_int32_tag, i-NBOX_C/2);
         boxed_int64_cache[i]  = jl_permbox64(jl_int64_type, jl_int64_tag, i-NBOX_C/2);
+        boxed_uint16_cache[i] = jl_permbox16(jl_uint16_type, jl_uint16_tag, i);
+        boxed_uint64_cache[i] = jl_permbox64(jl_uint64_type, jl_uint64_tag, i);
+        boxed_uint32_cache[i] = jl_permbox32(jl_uint32_type, jl_uint32_tag, i);
+        boxed_int16_cache[i]  = jl_permbox16(jl_int16_type, jl_int16_tag, i-NBOX_C/2);
 #ifdef _P64
-        boxed_ssavalue_cache[i] = jl_permbox64(jl_ssavalue_type, 0, i);
-        boxed_slotnumber_cache[i] = jl_permbox64(jl_slotnumber_type, 0, i);
+        boxed_ssavalue_cache[i] = jl_permbox64(jl_ssavalue_type, jl_ssavalue_tag, i);
+        boxed_slotnumber_cache[i] = jl_permbox64(jl_slotnumber_type, jl_slotnumber_tag, i);
 #else
-        boxed_ssavalue_cache[i] = jl_permbox32(jl_ssavalue_type, 0, i);
-        boxed_slotnumber_cache[i] = jl_permbox32(jl_slotnumber_type, 0, i);
+        boxed_ssavalue_cache[i] = jl_permbox32(jl_ssavalue_type, jl_ssavalue_tag, i);
+        boxed_slotnumber_cache[i] = jl_permbox32(jl_slotnumber_type, jl_slotnumber_tag, i);
 #endif
     }
-    for(i=0; i < 256; i++) {
-        jl_boxed_uint8_cache[i] = jl_permbox8(jl_uint8_type, jl_uint8_tag, i);
-    }
-}
-
-void jl_init_box_caches(void)
-{
-    uint32_t i;
     for (i = 0; i < 128; i++) {
         boxed_char_cache[i] = jl_permbox32(jl_char_type, jl_char_tag, i << 24);
     }
     for (i = 0; i < 256; i++) {
+        jl_boxed_uint8_cache[i] = jl_permbox8(jl_uint8_type, jl_uint8_tag, i);
         jl_boxed_int8_cache[i] = jl_permbox8(jl_int8_type, jl_int8_tag, i);
     }
-    for (i = 0; i < NBOX_C; i++) {
-        boxed_int16_cache[i]  = jl_permbox16(jl_int16_type, jl_int16_tag, i-NBOX_C/2);
-        boxed_uint16_cache[i] = jl_permbox16(jl_uint16_type, jl_uint16_tag, i);
-        boxed_uint32_cache[i] = jl_permbox32(jl_uint32_type, jl_uint32_tag, i);
-        boxed_uint64_cache[i] = jl_permbox64(jl_uint64_type, jl_uint64_tag, i);
-    }
 }
 
 JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x)
@@ -1401,10 +1610,11 @@ JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x)
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 {
     jl_task_t *ct = jl_current_task;
-    if (type->instance != NULL) return type->instance;
-    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL) {
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
+    if (type->instance != NULL)
+        return type->instance;
     va_list args;
     size_t i, nf = jl_datatype_nfields(type);
     va_start(args, type);
@@ -1424,7 +1634,7 @@ JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na)
 {
     jl_task_t *ct = jl_current_task;
-    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL) {
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
     size_t nf = jl_datatype_nfields(type);
@@ -1463,7 +1673,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
     jl_task_t *ct = jl_current_task;
     if (!jl_is_tuple(tup))
         jl_type_error("new", (jl_value_t*)jl_tuple_type, tup);
-    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL)
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout))
         jl_type_error("new", (jl_value_t *)jl_datatype_type, (jl_value_t *)type);
     size_t nargs = jl_nfields(tup);
     size_t nf = jl_datatype_nfields(type);
@@ -1510,10 +1720,13 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
 JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 {
     jl_task_t *ct = jl_current_task;
-    if (type->instance != NULL) return type->instance;
-    if (!jl_is_datatype(type) || type->layout == NULL) {
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
+        if (type == jl_typeofbottom_type->super)
+            return jl_bottom_type; // ::Type{Union{}} is an abstract type, but is also a singleton when used as a field type
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
+    if (type->instance != NULL)
+        return type->instance;
     size_t size = jl_datatype_size(type);
     jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
     if (type->smalltag) // TODO: do we need this?
@@ -1525,14 +1738,51 @@ JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 
 // field access ---------------------------------------------------------------
 
-JL_DLLEXPORT void jl_lock_value(jl_value_t *v) JL_NOTSAFEPOINT
+// TODO(jwn): these lock/unlock pairs must be full seq-cst fences
+JL_DLLEXPORT void jl_lock_value(jl_mutex_t *v) JL_NOTSAFEPOINT
 {
-    JL_LOCK_NOGC((jl_mutex_t*)v);
+    JL_LOCK_NOGC(v);
 }
 
-JL_DLLEXPORT void jl_unlock_value(jl_value_t *v) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_unlock_value(jl_mutex_t *v) JL_NOTSAFEPOINT
 {
-    JL_UNLOCK_NOGC((jl_mutex_t*)v);
+    JL_UNLOCK_NOGC(v);
+}
+
+JL_DLLEXPORT void jl_lock_field(jl_mutex_t *v) JL_NOTSAFEPOINT
+{
+    JL_LOCK_NOGC(v);
+}
+
+JL_DLLEXPORT void jl_unlock_field(jl_mutex_t *v) JL_NOTSAFEPOINT
+{
+    JL_UNLOCK_NOGC(v);
+}
+
+static inline char *lock(char *p, jl_value_t *parent, int needlock, enum atomic_kind isatomic) JL_NOTSAFEPOINT
+{
+    if (needlock) {
+        if (isatomic == isatomic_object) {
+            jl_lock_value((jl_mutex_t*)parent);
+        }
+        else {
+            jl_lock_field((jl_mutex_t*)p);
+            return p + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT);
+        }
+    }
+    return p;
+}
+
+static inline void unlock(char *p, jl_value_t *parent, int needlock, enum atomic_kind isatomic) JL_NOTSAFEPOINT
+{
+    if (needlock) {
+        if (isatomic == isatomic_object) {
+            jl_unlock_value((jl_mutex_t*)parent);
+        }
+        else {
+            jl_unlock_field((jl_mutex_t*)p);
+        }
+    }
 }
 
 JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
@@ -1558,7 +1808,7 @@ JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
         }
     }
     if (err)
-        jl_has_no_field_error(t->name->name, fld);
+        jl_has_no_field_error(t, fld);
     return -1;
 }
 
@@ -1590,11 +1840,12 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i)
     else if (needlock) {
         jl_task_t *ct = jl_current_task;
         r = jl_gc_alloc(ct->ptls, fsz, ty);
-        jl_lock_value(v);
+        jl_lock_value((jl_mutex_t*)v);
         memcpy((char*)r, (char*)v + offs, fsz);
-        jl_unlock_value(v);
+        jl_unlock_value((jl_mutex_t*)v);
     }
     else {
+        // TODO: a finalizer here could make the isunion case not quite right
         r = jl_new_bits(ty, (char*)v + offs);
     }
     return undefref_check((jl_datatype_t*)ty, r);
@@ -1617,30 +1868,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i)
     return r;
 }
 
-static inline void memassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
-{
-    if (hasptr) {
-        // assert that although dst might have some undefined bits, the src heap box should be okay with that
-        assert(LLT_ALIGN(nb, sizeof(void*)) == LLT_ALIGN(jl_datatype_size(jl_typeof(src)), sizeof(void*)));
-        size_t nptr = nb / sizeof(void*);
-        memmove_refs((void**)dst, (void**)src, nptr);
-        jl_gc_multi_wb(parent, src);
-        src = (jl_value_t*)((char*)src + nptr * sizeof(void*));
-        dst = dst + nptr * sizeof(void*);
-        nb -= nptr * sizeof(void*);
-    }
-    else {
-        // src must be a heap box.
-        assert(nb == jl_datatype_size(jl_typeof(src)));
-        if (nb >= 16) {
-            memcpy(dst, jl_assume_aligned(src, 16), nb);
-            return;
-        }
-    }
-    memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb);
-}
-
-void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT
+inline void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT
 {
     size_t offs = jl_field_offset(st, i);
     if (rhs == NULL) { // TODO: this should be invalid, but it happens frequently in ircode.c
@@ -1669,24 +1897,75 @@ void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs,
             hasptr = 0;
         }
         else {
-            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+            hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
         }
         size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+        assert(!isatomic || jl_typeis(rhs, ty));
         int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
         if (isatomic && !needlock) {
             jl_atomic_store_bits((char*)v + offs, rhs, fsz);
-            if (hasptr)
-                jl_gc_multi_wb(v, rhs); // rhs is immutable
         }
         else if (needlock) {
-            jl_lock_value(v);
+            jl_lock_value((jl_mutex_t*)v);
             memcpy((char*)v + offs, (char*)rhs, fsz);
-            jl_unlock_value(v);
+            jl_unlock_value((jl_mutex_t*)v);
+        }
+        else {
+            memassign_safe(hasptr, (char*)v + offs, rhs, fsz);
+        }
+        if (hasptr)
+            jl_gc_multi_wb(v, rhs); // rhs is immutable
+    }
+}
+
+inline jl_value_t *swap_bits(jl_value_t *ty, char *v, uint8_t *psel, jl_value_t *parent, jl_value_t *rhs, enum atomic_kind isatomic)
+{
+    jl_value_t *rty = jl_typeof(rhs);
+    int hasptr;
+    int isunion = psel != NULL;
+    if (isunion) {
+        assert(!isatomic);
+        hasptr = 0;
+    }
+    else {
+        hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
+    }
+    size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+    int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+    assert(!isatomic || jl_typeis(rhs, ty));
+    jl_value_t *r;
+    if (isatomic && !needlock) {
+        r = jl_atomic_swap_bits(rty, v, rhs, fsz);
+    }
+    else {
+        if (needlock) {
+            jl_task_t *ct = jl_current_task;
+            r = jl_gc_alloc(ct->ptls, fsz, ty);
+            char *px = lock(v, parent, needlock, isatomic);
+            memcpy((char*)r, px, fsz);
+            memcpy(px, (char*)rhs, fsz);
+            unlock(v, parent, needlock, isatomic);
         }
         else {
-            memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
+            r = jl_new_bits(isunion ? jl_nth_union_component(ty, *psel) : ty, v);
+            if (isunion) {
+                unsigned nth = 0;
+                if (!jl_find_union_component(ty, rty, &nth))
+                    assert(0 && "invalid field assignment to isbits union");
+                *psel = nth;
+                if (jl_is_datatype_singleton((jl_datatype_t*)rty))
+                    return r;
+            }
+            memassign_safe(hasptr, v, rhs, fsz);
         }
     }
+    if (!isunion)
+        r = undefref_check((jl_datatype_t*)ty, r);
+    if (hasptr)
+        jl_gc_multi_wb(parent, rhs); // rhs is immutable
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
 }
 
 jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic)
@@ -1696,70 +1975,31 @@ jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_
        jl_type_error("swapfield!", ty, rhs);
     size_t offs = jl_field_offset(st, i);
     jl_value_t *r;
+    char *p = (char*)v + offs;
     if (jl_field_isptr(st, i)) {
         if (isatomic)
-            r = jl_atomic_exchange((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
+            r = jl_atomic_exchange((_Atomic(jl_value_t*)*)p, rhs);
         else
-            r = jl_atomic_exchange_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
+            r = jl_atomic_exchange_release((_Atomic(jl_value_t*)*)p, rhs);
         jl_gc_wb(v, rhs);
+        if (__unlikely(r == NULL))
+            jl_throw(jl_undefref_exception);
+        return r;
     }
     else {
-        jl_value_t *rty = jl_typeof(rhs);
-        int hasptr;
-        int isunion = jl_is_uniontype(ty);
-        if (isunion) {
-            assert(!isatomic);
-            r = jl_get_nth_field(v, i);
-            size_t fsz = jl_field_size(st, i);
-            uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
-            unsigned nth = 0;
-            if (!jl_find_union_component(ty, rty, &nth))
-                assert(0 && "invalid field assignment to isbits union");
-            *psel = nth;
-            if (jl_is_datatype_singleton((jl_datatype_t*)rty))
-                return r;
-            hasptr = 0;
-        }
-        else {
-            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
-        }
-        size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-        int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
-        if (isatomic && !needlock) {
-            r = jl_atomic_swap_bits(rty, (char*)v + offs, rhs, fsz);
-            if (hasptr)
-                jl_gc_multi_wb(v, rhs); // rhs is immutable
-        }
-        else {
-            if (needlock) {
-                jl_task_t *ct = jl_current_task;
-                r = jl_gc_alloc(ct->ptls, fsz, ty);
-                jl_lock_value(v);
-                memcpy((char*)r, (char*)v + offs, fsz);
-                memcpy((char*)v + offs, (char*)rhs, fsz);
-                jl_unlock_value(v);
-            }
-            else {
-                if (!isunion)
-                    r = jl_new_bits(ty, (char*)v + offs);
-                memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
-            }
-            if (needlock || !isunion)
-                r = undefref_check((jl_datatype_t*)ty, r);
-        }
+        uint8_t *psel = jl_is_uniontype(ty) ? (uint8_t*)&p[jl_field_size(st, i) - 1] : NULL;
+        return swap_bits(ty, p, psel, v, rhs, isatomic ? isatomic_object : isatomic_none);
     }
-    if (__unlikely(r == NULL))
-        jl_throw(jl_undefref_exception);
-    return r;
 }
 
-jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic)
+inline jl_value_t *modify_value(jl_value_t *ty, _Atomic(jl_value_t*) *p, jl_value_t *parent, jl_value_t *op, jl_value_t *rhs, int isatomic, jl_binding_t *b, jl_module_t *mod, jl_sym_t *name)
 {
-    size_t offs = jl_field_offset(st, i);
-    jl_value_t *ty = jl_field_type_concrete(st, i);
-    jl_value_t *r = jl_get_nth_field_checked(v, i);
-    if (isatomic && jl_field_isptr(st, i))
-        jl_fence(); // load was previously only relaxed
+    jl_value_t *r = isatomic ? jl_atomic_load(p) : jl_atomic_load_relaxed(p);
+    if (__unlikely(r == NULL)) {
+        if (b)
+            jl_undefined_var_error(name, (jl_value_t*)mod);
+        jl_throw(jl_undefref_exception);
+    }
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 2);
     args[0] = r;
@@ -1767,67 +2007,106 @@ jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_valu
         args[1] = rhs;
         jl_value_t *y = jl_apply_generic(op, args, 2);
         args[1] = y;
-        if (!jl_isa(y, ty))
-            jl_type_error("modifyfield!", ty, y);
-        if (jl_field_isptr(st, i)) {
-            _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs);
-            if (isatomic ? jl_atomic_cmpswap(p, &r, y) : jl_atomic_cmpswap_relaxed(p, &r, y))
-                break;
+        if (b)
+            jl_check_binding_assign_value(b, mod, name, y, "modifyglobal!");
+        else if (!jl_isa(y, ty))
+            jl_type_error(jl_is_genericmemory(parent) ? "memoryrefmodify!" : "modifyfield!", ty, y);
+        if (isatomic ? jl_atomic_cmpswap(p, &r, y) : jl_atomic_cmpswap_release(p, &r, y)) {
+            jl_gc_wb(parent, y);
+            break;
+        }
+        args[0] = r;
+        jl_gc_safepoint();
+    }
+    // args[0] == r (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
+    JL_GC_POP();
+    return args[0];
+}
+
+inline jl_value_t *modify_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_t *parent, jl_value_t *op, jl_value_t *rhs, enum atomic_kind isatomic)
+{
+    int hasptr;
+    int isunion = psel != NULL;
+    if (isunion) {
+        assert(!isatomic);
+        hasptr = 0;
+    }
+    else {
+        hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
+    }
+    jl_value_t **args;
+    JL_GC_PUSHARGS(args, 2);
+    while (1) {
+        jl_value_t *r;
+        jl_value_t *rty = isunion ? jl_nth_union_component(ty, *psel) : ty;
+        size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the initial copy
+        int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+        if (isatomic && !needlock) {
+            r = jl_atomic_new_bits(rty, p);
+        }
+        else if (needlock) {
+            jl_task_t *ct = jl_current_task;
+            r = jl_gc_alloc(ct->ptls, fsz, rty);
+            char *px = lock(p, parent, needlock, isatomic);
+            memcpy((char*)r, px, fsz);
+            unlock(p, parent, needlock, isatomic);
         }
         else {
-            jl_value_t *yty = jl_typeof(y);
-            jl_value_t *rty = jl_typeof(r);
-            int hasptr;
-            int isunion = jl_is_uniontype(ty);
-            if (isunion) {
-                assert(!isatomic);
-                hasptr = 0;
-            }
-            else {
-                hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
-            }
-            size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-            int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
-            if (isatomic && !needlock) {
-                if (jl_atomic_bool_cmpswap_bits((char*)v + offs, r, y, fsz)) {
-                    if (hasptr)
-                        jl_gc_multi_wb(v, y); // y is immutable
-                    break;
-                }
-                r = jl_atomic_new_bits(ty, (char*)v + offs);
+            r = jl_new_bits(rty, p);
+        }
+        r = undefref_check((jl_datatype_t*)rty, r);
+        if (__unlikely(r == NULL))
+            jl_throw(jl_undefref_exception);
+        args[0] = r;
+        args[1] = rhs;
+        jl_value_t *y = jl_apply_generic(op, args, 2);
+        args[1] = y;
+        if (!jl_isa(y, ty)) {
+            jl_type_error(jl_is_genericmemory(parent) ? "memoryrefmodify!" : "modifyfield!", ty, y);
+        }
+        jl_value_t *yty = jl_typeof(y);
+        if (isatomic && !needlock) {
+            assert(yty == rty);
+            if (jl_atomic_bool_cmpswap_bits(p, r, y, fsz)) {
+                if (hasptr)
+                    jl_gc_multi_wb(parent, y); // y is immutable
+                break;
             }
-            else {
-                if (needlock)
-                    jl_lock_value(v);
-                int success = memcmp((char*)v + offs, r, fsz) == 0;
-                if (success) {
-                    if (isunion) {
-                        size_t fsz = jl_field_size(st, i);
-                        uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
-                        success = (jl_typeof(r) == jl_nth_union_component(ty, *psel));
-                        if (success) {
-                            unsigned nth = 0;
-                            if (!jl_find_union_component(ty, yty, &nth))
-                                assert(0 && "invalid field assignment to isbits union");
-                            *psel = nth;
-                            if (jl_is_datatype_singleton((jl_datatype_t*)yty))
-                                break;
-                        }
-                        fsz = jl_datatype_size((jl_datatype_t*)yty); // need to shrink-wrap the final copy
-                    }
-                    else {
-                        assert(yty == ty && rty == ty);
+        }
+        else {
+            char *px = lock(p, parent, needlock, isatomic);
+            int success = memcmp(px, (char*)r, fsz) == 0;
+            if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
+                success = jl_egal__bits((jl_value_t*)px, r, (jl_datatype_t*)rty);
+            if (success) {
+                if (isunion) {
+                    success = (rty == jl_nth_union_component(ty, *psel));
+                    if (success) {
+                        unsigned nth = 0;
+                        if (!jl_find_union_component(ty, yty, &nth))
+                            assert(0 && "invalid field assignment to isbits union");
+                        *psel = nth;
+                        if (jl_is_datatype_singleton((jl_datatype_t*)yty))
+                            break;
                     }
-                    memassign_safe(hasptr, v, (char*)v + offs, y, fsz);
+                    fsz = jl_datatype_size((jl_datatype_t*)yty); // need to shrink-wrap the final copy
                 }
-                if (needlock)
-                    jl_unlock_value(v);
-                if (success)
-                    break;
-                r = jl_get_nth_field(v, i);
+                else {
+                    assert(yty == ty && rty == ty);
+                }
+                memassign_safe(hasptr, px, y, fsz);
+            }
+            unlock(p, parent, needlock, isatomic);
+            if (success) {
+                if (hasptr)
+                    jl_gc_multi_wb(parent, y); // y is immutable
+                break;
             }
         }
-        args[0] = r;
         jl_gc_safepoint();
     }
     // args[0] == r (old)
@@ -1839,91 +2118,105 @@ jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_valu
     return args[0];
 }
 
-jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic)
+jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic)
 {
-    jl_value_t *ty = jl_field_type_concrete(st, i);
-    if (!jl_isa(rhs, ty))
-        jl_type_error("replacefield!", ty, rhs);
     size_t offs = jl_field_offset(st, i);
-    jl_value_t *r = expected;
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    char *p = (char*)v + offs;
+    if (jl_field_isptr(st, i)) {
+        return modify_value(ty, (_Atomic(jl_value_t*)*)p, v, op, rhs, isatomic, NULL, NULL, NULL);
+    }
+    else {
+        uint8_t *psel = jl_is_uniontype(ty) ? (uint8_t*)&p[jl_field_size(st, i) - 1] : NULL;
+        return modify_bits(ty, p, psel, v, op, rhs, isatomic ? isatomic_object : isatomic_none);
+    }
+}
+
+inline jl_value_t *replace_value(jl_value_t *ty, _Atomic(jl_value_t*) *p, jl_value_t *parent, jl_value_t *expected, jl_value_t *rhs, int isatomic, jl_module_t *mod, jl_sym_t *name)
+{
     jl_datatype_t *rettyp = jl_apply_cmpswap_type(ty);
     JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
-    if (jl_field_isptr(st, i)) {
-        _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs);
-        int success;
-        while (1) {
-            success = isatomic ? jl_atomic_cmpswap(p, &r, rhs) : jl_atomic_cmpswap_relaxed(p, &r, rhs);
-            if (success)
-                jl_gc_wb(v, rhs);
-            if (__unlikely(r == NULL))
-                jl_throw(jl_undefref_exception);
-            if (success || !jl_egal(r, expected))
-                break;
+    jl_value_t *r = expected;
+    int success;
+    while (1) {
+        success = isatomic ? jl_atomic_cmpswap(p, &r, rhs) : jl_atomic_cmpswap_release(p, &r, rhs);
+        if (success)
+            jl_gc_wb(parent, rhs);
+        if (__unlikely(r == NULL)) {
+            if (mod && name)
+                jl_undefined_var_error(name, (jl_value_t*)mod);
+            jl_throw(jl_undefref_exception);
         }
-        JL_GC_PUSH1(&r);
-        r = jl_new_struct(rettyp, r, success ? jl_true : jl_false);
-        JL_GC_POP();
+        if (success || !jl_egal(r, expected))
+            break;
+    }
+    JL_GC_PUSH1(&r);
+    r = jl_new_struct(rettyp, r, success ? jl_true : jl_false);
+    JL_GC_POP();
+    return r;
+}
+
+inline jl_value_t *replace_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_t *parent, jl_value_t *expected, jl_value_t *rhs, enum atomic_kind isatomic)
+{
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    int hasptr;
+    int isunion = psel != NULL;
+    size_t fsz = jl_field_size(rettyp, 0);
+    int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+    assert(jl_field_offset(rettyp, 1) == fsz);
+    jl_value_t *rty = ty;
+    if (isunion) {
+        assert(!isatomic);
+        hasptr = 0;
+        isatomic = isatomic_none; // this makes GCC happy
     }
     else {
-        int hasptr;
-        int isunion = jl_is_uniontype(ty);
-        int needlock;
-        jl_value_t *rty = ty;
-        size_t fsz = jl_field_size(st, i);
-        if (isunion) {
-            assert(!isatomic);
-            hasptr = 0;
-            needlock = 0;
-            isatomic = 0; // this makes GCC happy
-        }
-        else {
-            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
-            fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-            needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
-        }
-        if (isatomic && !needlock) {
-            r = jl_atomic_cmpswap_bits((jl_datatype_t*)ty, rettyp, (char*)v + offs, r, rhs, fsz);
-            int success = *((uint8_t*)r + fsz);
-            if (success && hasptr)
-                jl_gc_multi_wb(v, rhs); // rhs is immutable
+        hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
+        assert(jl_typeis(rhs, ty));
+    }
+    int success;
+    jl_task_t *ct = jl_current_task;
+    assert(!jl_field_isptr(rettyp, 0));
+    jl_value_t *r = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), rettyp);
+    if (isatomic && !needlock) {
+        size_t rsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the compare
+        success = jl_atomic_cmpswap_bits((jl_datatype_t*)rty, r, p, expected, rhs, rsz);
+        *((uint8_t*)r + fsz) = success ? 1 : 0;
+    }
+    else {
+        char *px = lock(p, parent, needlock, isatomic);
+        if (isunion)
+            rty = jl_nth_union_component(rty, *psel);
+        size_t rsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the compare
+        memcpy((char*)r, px, rsz); // copy field // TODO: make this a memmove_refs if relevant
+        if (isunion)
+            *((uint8_t*)r + fsz - 1) = *psel; // copy union bits
+        success = (rty == jl_typeof(expected));
+        if (success) {
+            success = memcmp((char*)r, (char*)expected, rsz) == 0;
+            if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
+                success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
         }
-        else {
-            jl_task_t *ct = jl_current_task;
-            uint8_t *psel = NULL;
+        *((uint8_t*)r + fsz) = success ? 1 : 0;
+        if (success) {
+            jl_value_t *rty = jl_typeof(rhs);
             if (isunion) {
-                psel = &((uint8_t*)v)[offs + fsz - 1];
-                rty = jl_nth_union_component(rty, *psel);
-            }
-            assert(!jl_field_isptr(rettyp, 0));
-            r = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), (jl_value_t*)rettyp);
-            int success = (rty == jl_typeof(expected));
-            if (needlock)
-                jl_lock_value(v);
-            memcpy((char*)r, (char*)v + offs, fsz); // copy field, including union bits
-            if (success) {
-                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-                if (((jl_datatype_t*)rty)->layout->haspadding)
-                    success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
-                else
-                    success = memcmp((char*)r, (char*)expected, fsz) == 0;
-            }
-            *((uint8_t*)r + fsz) = success ? 1 : 0;
-            if (success) {
-                jl_value_t *rty = jl_typeof(rhs);
-                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-                if (isunion) {
-                    unsigned nth = 0;
-                    if (!jl_find_union_component(ty, rty, &nth))
-                        assert(0 && "invalid field assignment to isbits union");
-                    *psel = nth;
-                    if (jl_is_datatype_singleton((jl_datatype_t*)rty))
-                        return r;
-                }
-                memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
+                rsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+                unsigned nth = 0;
+                if (!jl_find_union_component(ty, rty, &nth))
+                    assert(0 && "invalid field assignment to isbits union");
+                *psel = nth;
+                if (jl_is_datatype_singleton((jl_datatype_t*)rty))
+                    return r;
             }
-            if (needlock)
-                jl_unlock_value(v);
+            memassign_safe(hasptr, px, rhs, rsz);
         }
+        unlock(p, parent, needlock, isatomic);
+    }
+    if (success && hasptr)
+        jl_gc_multi_wb(parent, rhs); // rhs is immutable
+    if (!isunion) {
         r = undefref_check((jl_datatype_t*)rty, r);
         if (__unlikely(r == NULL))
             jl_throw(jl_undefref_exception);
@@ -1931,6 +2224,74 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
     return r;
 }
 
+jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    if (!jl_isa(rhs, ty))
+        jl_type_error("replacefield!", ty, rhs);
+    size_t offs = jl_field_offset(st, i);
+    char *p = (char*)v + offs;
+    if (jl_field_isptr(st, i)) {
+        return replace_value(ty, (_Atomic(jl_value_t*)*)p, v, expected, rhs, isatomic, NULL, NULL);
+    }
+    else {
+        size_t fsz = jl_field_size(st, i);
+        int isunion = jl_is_uniontype(ty);
+        uint8_t *psel = isunion ? (uint8_t*)&p[fsz - 1] : NULL;
+        return replace_bits(ty, p, psel, v, expected, rhs, isatomic ? isatomic_object : isatomic_none);
+    }
+}
+
+inline int setonce_bits(jl_datatype_t *rty, char *p, jl_value_t *parent, jl_value_t *rhs, enum atomic_kind isatomic)
+{
+    size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+    assert(rty->layout->first_ptr >= 0);
+    int hasptr = 1;
+    int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+    int success;
+    if (isatomic && !needlock) {
+        success = jl_atomic_storeonce_bits(rty, p, rhs, fsz);
+    }
+    else {
+        char *px = lock(p, parent, needlock, isatomic);
+        success = undefref_check(rty, (jl_value_t*)px) == NULL;
+        if (success)
+            memassign_safe(hasptr, px, rhs, fsz);
+        unlock(p, parent, needlock, isatomic);
+    }
+    if (success)
+        jl_gc_multi_wb(parent, rhs); // rhs is immutable
+    return success;
+}
+
+int set_nth_fieldonce(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    if (!jl_isa(rhs, ty))
+        jl_type_error("setfieldonce!", ty, rhs);
+    size_t offs = jl_field_offset(st, i);
+    int success;
+    char *p = (char*)v + offs;
+    if (jl_field_isptr(st, i)) {
+        _Atomic(jl_value_t*) *px = (_Atomic(jl_value_t*)*)p;
+        jl_value_t *r = NULL;
+        success = isatomic ? jl_atomic_cmpswap(px, &r, rhs) : jl_atomic_cmpswap_release(px, &r, rhs);
+        if (success)
+            jl_gc_wb(v, rhs);
+    }
+    else {
+        int isunion = jl_is_uniontype(ty);
+        if (isunion)
+            return 0;
+        int hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
+        if (!hasptr)
+            return 0;
+        assert(ty == jl_typeof(rhs));
+        success = setonce_bits((jl_datatype_t*)ty, p, v, rhs, isatomic ? isatomic_object : isatomic_none);
+    }
+    return success;
+}
+
 JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT
 {
     jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
@@ -1963,6 +2324,39 @@ JL_DLLEXPORT size_t jl_get_field_offset(jl_datatype_t *ty, int field)
     return jl_field_offset(ty, field - 1);
 }
 
+jl_value_t *get_nth_pointer(jl_value_t *v, size_t i)
+{
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v);
+    const jl_datatype_layout_t *ly = dt->layout;
+    uint32_t npointers = ly->npointers;
+    if (i >= npointers)
+        jl_bounds_error_int(v, i);
+    const uint8_t *ptrs8 = (const uint8_t *)jl_dt_layout_ptrs(ly);
+    const uint16_t *ptrs16 = (const uint16_t *)jl_dt_layout_ptrs(ly);
+    const uint32_t *ptrs32 = (const uint32_t*)jl_dt_layout_ptrs(ly);
+    uint32_t fld;
+    if (ly->flags.fielddesc_type == 0)
+        fld = ptrs8[i];
+    else if (ly->flags.fielddesc_type == 1)
+        fld = ptrs16[i];
+    else
+        fld = ptrs32[i];
+    return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)(&((jl_value_t**)v)[fld]));
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_nth_pointer(jl_value_t *v, size_t i)
+{
+    jl_value_t *ptrf = get_nth_pointer(v, i);
+    if (__unlikely(ptrf == NULL))
+        jl_throw(jl_undefref_exception);
+    return ptrf;
+}
+
+JL_DLLEXPORT int jl_nth_pointer_isdefined(jl_value_t *v, size_t i)
+{
+    return get_nth_pointer(v, i) != NULL;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/debug-registry.h b/src/debug-registry.h
index bad59f205acb3..46665578b997d 100644
--- a/src/debug-registry.h
+++ b/src/debug-registry.h
@@ -11,10 +11,11 @@
 typedef struct {
     const llvm::object::ObjectFile *obj;
     llvm::DIContext *ctx;
-    int64_t slide;
-} objfileentry_t;
+    uint64_t slide;
+    std::map<uintptr_t, StringRef, std::greater<size_t>> *symbolmap;
+} jl_object_file_entry_t;
 
-// Central registry for resolving function addresses to `jl_method_instance_t`s and
+// Central registry for resolving function addresses to `jl_code_instance_t`s and
 // originating `ObjectFile`s (for the DWARF debug info).
 //
 // A global singleton instance is notified by the JIT whenever a new object is emitted,
@@ -32,7 +33,7 @@ class JITDebugInfoRegistry
             std::unique_lock<std::mutex> lock;
             CResourceT &resource;
 
-            Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT : lock(mutex), resource(resource) {}
+            Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER : lock(mutex), resource(resource) {}
             Lock(Lock &&) JL_NOTSAFEPOINT = default;
             Lock &operator=(Lock &&) JL_NOTSAFEPOINT = default;
 
@@ -56,7 +57,7 @@ class JITDebugInfoRegistry
                 return resource;
             }
 
-            ~Lock() JL_NOTSAFEPOINT = default;
+            ~Lock() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default;
         };
     private:
 
@@ -68,28 +69,29 @@ class JITDebugInfoRegistry
 
         Locked(ResourceT resource = ResourceT()) JL_NOTSAFEPOINT : mutex(), resource(std::move(resource)) {}
 
-        LockT operator*() JL_NOTSAFEPOINT {
+        LockT operator*() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
             return LockT(mutex, resource);
         }
 
-        ConstLockT operator*() const JL_NOTSAFEPOINT {
+        ConstLockT operator*() const JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
             return ConstLockT(mutex, resource);
         }
 
-        ~Locked() JL_NOTSAFEPOINT = default;
+        ~Locked() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default;
     };
 
     struct image_info_t {
         uint64_t base;
         jl_image_fptrs_t fptrs;
-        jl_method_instance_t **fvars_linfo;
+        jl_code_instance_t **fvars_cinst;
         size_t fvars_n;
     };
 
     struct libc_frames_t {
 #if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
-        std::atomic<void(*)(void*)> libc_register_frame_{nullptr};
-        std::atomic<void(*)(void*)> libc_deregister_frame_{nullptr};
+        typedef void (*frame_register_func)(void *) JL_NOTSAFEPOINT;
+        std::atomic<frame_register_func> libc_register_frame_{nullptr};
+        std::atomic<frame_register_func> libc_deregister_frame_{nullptr};
 
         void libc_register_frame(const char *Entry) JL_NOTSAFEPOINT;
 
@@ -98,22 +100,32 @@ class JITDebugInfoRegistry
     };
 private:
 
-    struct ObjectInfo {
-        const llvm::object::ObjectFile *object = nullptr;
-        size_t SectionSize = 0;
-        ptrdiff_t slide = 0;
-        llvm::object::SectionRef Section{};
-        llvm::DIContext *context = nullptr;
+    struct LazyObjectInfo {
+        SmallVector<uint8_t, 0> data;
+        size_t uncompressedsize;
+        std::unique_ptr<const llvm::object::ObjectFile> object;
+        std::unique_ptr<llvm::DIContext> context;
+        LazyObjectInfo() = delete;
+        ~LazyObjectInfo() JL_NOTSAFEPOINT = default;
+    };
+
+    struct SectionInfo {
+        LazyObjectInfo *object;
+        size_t SectionSize;
+        uint64_t slide;
+        uint64_t SectionIndex;
+        SectionInfo() = delete;
+        ~SectionInfo() JL_NOTSAFEPOINT = default;
     };
 
     template<typename KeyT, typename ValT>
     using rev_map = std::map<KeyT, ValT, std::greater<KeyT>>;
 
-    typedef rev_map<size_t, ObjectInfo> objectmap_t;
-    typedef rev_map<uint64_t, objfileentry_t> objfilemap_t;
+    typedef rev_map<size_t, SectionInfo> objectmap_t;
+    typedef rev_map<uint64_t, jl_object_file_entry_t> objfilemap_t;
 
     objectmap_t objectmap{};
-    rev_map<size_t, std::pair<size_t, jl_method_instance_t *>> linfomap{};
+    rev_map<size_t, std::pair<size_t, jl_code_instance_t *>> cimap{};
 
     // Maintain a mapping of unrealized function names -> linfo objects
     // so that when we see it get emitted, we can add a link back to the linfo
@@ -134,12 +146,13 @@ class JITDebugInfoRegistry
     libc_frames_t libc_frames{};
 
     void add_code_in_flight(llvm::StringRef name, jl_code_instance_t *codeinst, const llvm::DataLayout &DL) JL_NOTSAFEPOINT;
-    jl_method_instance_t *lookupLinfo(size_t pointer) JL_NOTSAFEPOINT;
+    jl_code_instance_t *lookupCodeInstance(size_t pointer) JL_NOTSAFEPOINT;
     void registerJITObject(const llvm::object::ObjectFile &Object,
-                        std::function<uint64_t(const llvm::StringRef &)> getLoadAddress,
-                        std::function<void*(void*)> lookupWriteAddress) JL_NOTSAFEPOINT;
+                        std::function<uint64_t(const llvm::StringRef &)> getLoadAddress) JL_NOTSAFEPOINT;
     objectmap_t& getObjectMap() JL_NOTSAFEPOINT;
     void add_image_info(image_info_t info) JL_NOTSAFEPOINT;
     bool get_image_info(uint64_t base, image_info_t *info) const JL_NOTSAFEPOINT;
     Locked<objfilemap_t>::LockT get_objfile_map() JL_NOTSAFEPOINT;
+
+    std::shared_mutex symbol_mutex;
 };
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index 35e41fe657045..f8af82fa72c3c 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -7,6 +7,7 @@
 #include <llvm/DebugInfo/DWARF/DWARFContext.h>
 #include <llvm/Object/SymbolSize.h>
 #include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/MemoryBufferRef.h>
 #include <llvm/IR/Function.h>
 #include <llvm/ADT/StringRef.h>
 #include <llvm/ADT/StringMap.h>
@@ -19,6 +20,8 @@
 #include <llvm/Object/MachO.h>
 #include <llvm/Object/COFF.h>
 #include <llvm/Object/ELFObjectFile.h>
+#include <llvm-Compression.h>
+#include <llvm/Support/Compression.h>
 
 #ifdef _OS_DARWIN_
 #include <CoreFoundation/CoreFoundation.h>
@@ -57,7 +60,7 @@ extern "C" void __register_frame(void*) JL_NOTSAFEPOINT;
 extern "C" void __deregister_frame(void*) JL_NOTSAFEPOINT;
 
 template <typename callback>
-static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f)
+static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) JL_NOTSAFEPOINT
 {
     const char *P = EHFrameAddr;
     const char *End = P + EHFrameSize;
@@ -93,12 +96,12 @@ void JITDebugInfoRegistry::add_code_in_flight(StringRef name, jl_code_instance_t
     (**codeinst_in_flight)[mangle(name, DL)] = codeinst;
 }
 
-jl_method_instance_t *JITDebugInfoRegistry::lookupLinfo(size_t pointer)
+jl_code_instance_t *JITDebugInfoRegistry::lookupCodeInstance(size_t pointer)
 {
     jl_lock_profile();
-    auto region = linfomap.lower_bound(pointer);
-    jl_method_instance_t *linfo = NULL;
-    if (region != linfomap.end() && pointer < region->first + region->second.first)
+    auto region = cimap.lower_bound(pointer);
+    jl_code_instance_t *linfo = NULL;
+    if (region != cimap.end() && pointer < region->first + region->second.first)
         linfo = region->second.second;
     jl_unlock_profile();
     return linfo;
@@ -144,8 +147,8 @@ struct unw_table_entry
 template <typename T>
 static void jl_profile_atomic(T f) JL_NOTSAFEPOINT
 {
-    assert(0 == jl_lock_profile_rd_held());
-    jl_lock_profile_wr();
+    int havelock = jl_lock_profile_wr();
+    assert(havelock);
 #ifndef _OS_WINDOWS_
     sigset_t sset;
     sigset_t oset;
@@ -156,13 +159,24 @@ static void jl_profile_atomic(T f) JL_NOTSAFEPOINT
 #ifndef _OS_WINDOWS_
     pthread_sigmask(SIG_SETMASK, &oset, NULL);
 #endif
-    jl_unlock_profile_wr();
+    if (havelock)
+        jl_unlock_profile_wr();
 }
 
 
 // --- storing and accessing source location metadata ---
-void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL)
+void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
 {
+    // Non-opaque-closure MethodInstances are considered globally rooted
+    // through their methods, but for OC, we need to create a global root
+    // here.
+    jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+    if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) {
+        jl_task_t *ct = jl_current_task;
+        int8_t gc_state = jl_gc_unsafe_enter(ct->ptls);
+        jl_as_global_root((jl_value_t*)mi, 1);
+        jl_gc_unsafe_leave(ct->ptls, gc_state);
+    }
     getJITDebugRegistry().add_code_in_flight(name, codeinst, DL);
 }
 
@@ -222,11 +236,21 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
 #endif
 
 void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
-                        std::function<uint64_t(const StringRef &)> getLoadAddress,
-                        std::function<void*(void*)> lookupWriteAddress)
+                        std::function<uint64_t(const StringRef &)> getLoadAddress)
 {
     object::section_iterator EndSection = Object.section_end();
 
+    bool anyfunctions = false;
+    for (const object::SymbolRef &sym_iter : Object.symbols()) {
+        object::SymbolRef::Type SymbolType = cantFail(sym_iter.getType());
+        if (SymbolType != object::SymbolRef::ST_Function)
+            continue;
+        anyfunctions = true;
+        break;
+    }
+    if (!anyfunctions)
+        return;
+
 #ifdef _CPU_ARM_
     // ARM does not have/use .eh_frame
     uint64_t arm_exidx_addr = 0;
@@ -280,14 +304,13 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
 #if defined(_OS_WINDOWS_)
     uint64_t SectionAddrCheck = 0;
     uint64_t SectionLoadCheck = 0; (void)SectionLoadCheck;
-    uint64_t SectionWriteCheck = 0; (void)SectionWriteCheck;
     uint8_t *UnwindData = NULL;
 #if defined(_CPU_X86_64_)
     uint8_t *catchjmp = NULL;
     for (const object::SymbolRef &sym_iter : Object.symbols()) {
         StringRef sName = cantFail(sym_iter.getName());
-        if (sName.equals("__UnwindData") || sName.equals("__catchjmp")) {
-            uint64_t Addr = cantFail(sym_iter.getAddress());
+        if (sName == "__UnwindData" || sName == "__catchjmp") {
+            uint64_t Addr = cantFail(sym_iter.getAddress()); // offset into object (including section offset)
             auto Section = cantFail(sym_iter.getSection());
             assert(Section != EndSection && Section->isText());
             uint64_t SectionAddr = Section->getAddress();
@@ -299,44 +322,37 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
                         SectionLoadCheck == SectionLoadAddr);
             SectionAddrCheck = SectionAddr;
             SectionLoadCheck = SectionLoadAddr;
-            SectionWriteCheck = SectionLoadAddr;
-            if (lookupWriteAddress)
-                SectionWriteCheck = (uintptr_t)lookupWriteAddress((void*)SectionLoadAddr);
-            Addr += SectionWriteCheck - SectionLoadCheck;
-            if (sName.equals("__UnwindData")) {
+            Addr += SectionLoadAddr - SectionAddr;
+            if (sName == "__UnwindData") {
                 UnwindData = (uint8_t*)Addr;
             }
-            else if (sName.equals("__catchjmp")) {
+            else if (sName == "__catchjmp") {
                 catchjmp = (uint8_t*)Addr;
             }
         }
     }
+    (void)catchjmp;
     assert(catchjmp);
     assert(UnwindData);
-    assert(SectionAddrCheck);
     assert(SectionLoadCheck);
-    assert(!memcmp(catchjmp, "\0\0\0\0\0\0\0\0\0\0\0\0", 12) &&
-            !memcmp(UnwindData, "\0\0\0\0\0\0\0\0\0\0\0\0", 12));
-    catchjmp[0] = 0x48;
-    catchjmp[1] = 0xb8; // mov RAX, QWORD PTR [&__julia_personality]
-    *(uint64_t*)(&catchjmp[2]) = (uint64_t)&__julia_personality;
-    catchjmp[10] = 0xff;
-    catchjmp[11] = 0xe0; // jmp RAX
-    UnwindData[0] = 0x09; // version info, UNW_FLAG_EHANDLER
-    UnwindData[1] = 4;    // size of prolog (bytes)
-    UnwindData[2] = 2;    // count of unwind codes (slots)
-    UnwindData[3] = 0x05; // frame register (rbp) = rsp
-    UnwindData[4] = 4;    // second instruction
-    UnwindData[5] = 0x03; // mov RBP, RSP
-    UnwindData[6] = 1;    // first instruction
-    UnwindData[7] = 0x50; // push RBP
-    *(DWORD*)&UnwindData[8] = (DWORD)(catchjmp - (uint8_t*)SectionWriteCheck); // relative location of catchjmp
-    UnwindData -= SectionWriteCheck - SectionLoadCheck;
 #endif // defined(_OS_X86_64_)
 #endif // defined(_OS_WINDOWS_)
 
+    SmallVector<uint8_t, 0> packed;
+    ArrayRef<uint8_t> unpacked = arrayRefFromStringRef(Object.getData());
+    std::optional<compression::Format> F;
+    if (compression::zstd::isAvailable())
+        F = compression::Format::Zstd;
+    else if (compression::zlib::isAvailable())
+        F = compression::Format::Zlib;
+    if (F)
+        compression::compress(*F, unpacked, packed);
+    // intentionally leak this so that we don't need to ref-count it
+    // intentionally copy the input so that we exact-size the allocation (since no shrink_to_fit function)
+    auto ObjectCopy = new LazyObjectInfo{SmallVector<uint8_t, 0>(F ? ArrayRef(packed) : unpacked), F ? Object.getData().size() : 0};
+    jl_jit_add_bytes(ObjectCopy->data.size());
     auto symbols = object::computeSymbolSizes(Object);
-    bool first = true;
+    bool hassection = false;
     for (const auto &sym_size : symbols) {
         const object::SymbolRef &sym_iter = sym_size.first;
         object::SymbolRef::Type SymbolType = cantFail(sym_iter.getType());
@@ -348,7 +364,7 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
         uint64_t SectionAddr = Section->getAddress();
         StringRef secName = cantFail(Section->getName());
         uint64_t SectionLoadAddr = getLoadAddress(secName);
-        Addr -= SectionAddr - SectionLoadAddr;
+        Addr += SectionLoadAddr - SectionAddr;
         StringRef sName = cantFail(sym_iter.getName());
         uint64_t SectionSize = Section->getSize();
         size_t Size = sym_size.second;
@@ -374,25 +390,24 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
         }
         jl_profile_atomic([&]() JL_NOTSAFEPOINT {
             if (codeinst)
-                linfomap[Addr] = std::make_pair(Size, codeinst->def);
-            if (first) {
-                objectmap[SectionLoadAddr] = {&Object,
-                    (size_t)SectionSize,
-                    (ptrdiff_t)(SectionAddr - SectionLoadAddr),
-                    *Section,
-                    nullptr,
-                    };
-                first = false;
-            }
+                cimap[Addr] = std::make_pair(Size, codeinst);
+            hassection = true;
+            objectmap.insert(std::pair{SectionLoadAddr, SectionInfo{
+                ObjectCopy,
+                (size_t)SectionSize,
+                SectionAddr - SectionLoadAddr,
+                Section->getIndex()
+                }});
         });
     }
+    if (!hassection) // clang-sa demands that we do this to fool cplusplus.NewDeleteLeaks
+        delete ObjectCopy;
 }
 
 void jl_register_jit_object(const object::ObjectFile &Object,
-                            std::function<uint64_t(const StringRef &)> getLoadAddress,
-                            std::function<void *(void *)> lookupWriteAddress) JL_NOTSAFEPOINT
+                            std::function<uint64_t(const StringRef &)> getLoadAddress)
 {
-    getJITDebugRegistry().registerJITObject(Object, getLoadAddress, lookupWriteAddress);
+    getJITDebugRegistry().registerJITObject(Object, getLoadAddress);
 }
 
 // TODO: convert the safe names from aotcomile.cpp:makeSafeName back into symbols
@@ -433,7 +448,7 @@ static std::pair<char *, bool> jl_demangle(const char *name) JL_NOTSAFEPOINT
 // func_name and file_name are either NULL or malloc'd pointers
 static int lookup_pointer(
         object::SectionRef Section, DIContext *context,
-        jl_frame_t **frames, size_t pointer, int64_t slide,
+        jl_frame_t **frames, size_t pointer, uint64_t slide,
         bool demangle, bool noInline) JL_NOTSAFEPOINT
 {
     // This function is not allowed to reference any TLS variables
@@ -462,8 +477,8 @@ static int lookup_pointer(
 
     // DWARFContext/DWARFUnit update some internal tables during these queries, so
     // a lock is needed.
-    assert(0 == jl_lock_profile_rd_held());
-    jl_lock_profile_wr();
+    if (!jl_lock_profile_wr())
+        return lookup_pointer(object::SectionRef(), NULL, frames, pointer, slide, demangle, noInline);
     auto inlineInfo = context->getInliningInfoForAddress(makeAddress(Section, pointer + slide), infoSpec);
     jl_unlock_profile_wr();
 
@@ -476,7 +491,7 @@ static int lookup_pointer(
     if (noInline)
         n_frames = 1;
     if (n_frames > 1) {
-        jl_frame_t *new_frames = (jl_frame_t*)calloc(sizeof(jl_frame_t), n_frames);
+        jl_frame_t *new_frames = (jl_frame_t*)calloc(n_frames, sizeof(jl_frame_t));
         memcpy(&new_frames[n_frames - 1], *frames, sizeof(jl_frame_t));
         free(*frames);
         *frames = new_frames;
@@ -488,7 +503,8 @@ static int lookup_pointer(
             info = inlineInfo.getFrame(i);
         }
         else {
-            jl_lock_profile_wr();
+            int havelock = jl_lock_profile_wr();
+            assert(havelock); (void)havelock;
             info = context->getLineInfoForAddress(makeAddress(Section, pointer + slide), infoSpec);
             jl_unlock_profile_wr();
         }
@@ -503,7 +519,7 @@ static int lookup_pointer(
                 std::size_t semi_pos = func_name.find(';');
                 if (semi_pos != std::string::npos) {
                     func_name = func_name.substr(0, semi_pos);
-                    frame->linfo = NULL; // Looked up on Julia side
+                    frame->ci = NULL; // Looked up on Julia side
                 }
             }
         }
@@ -540,7 +556,7 @@ static int lookup_pointer(
 #if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
 
 void JITDebugInfoRegistry::libc_frames_t::libc_register_frame(const char *Entry) {
-    auto libc_register_frame_ = jl_atomic_load_relaxed(&this->libc_register_frame_);
+    frame_register_func libc_register_frame_ = jl_atomic_load_relaxed(&this->libc_register_frame_);
     if (!libc_register_frame_) {
         libc_register_frame_ = (void(*)(void*))dlsym(RTLD_NEXT, "__register_frame");
         jl_atomic_store_release(&this->libc_register_frame_, libc_register_frame_);
@@ -553,7 +569,7 @@ void JITDebugInfoRegistry::libc_frames_t::libc_register_frame(const char *Entry)
 }
 
 void JITDebugInfoRegistry::libc_frames_t::libc_deregister_frame(const char *Entry) {
-    auto libc_deregister_frame_ = jl_atomic_load_relaxed(&this->libc_deregister_frame_);
+    frame_register_func libc_deregister_frame_  = jl_atomic_load_relaxed(&this->libc_deregister_frame_);
     if (!libc_deregister_frame_) {
         libc_deregister_frame_ = (void(*)(void*))dlsym(RTLD_NEXT, "__deregister_frame");
         jl_atomic_store_release(&this->libc_deregister_frame_, libc_deregister_frame_);
@@ -566,7 +582,7 @@ void JITDebugInfoRegistry::libc_frames_t::libc_deregister_frame(const char *Entr
 }
 #endif
 
-static bool getObjUUID(llvm::object::MachOObjectFile *obj, uint8_t uuid[16]) JL_NOTSAFEPOINT
+static bool getObjUUID(const object::MachOObjectFile *obj, uint8_t uuid[16]) JL_NOTSAFEPOINT
 {
     for (auto Load : obj->load_commands())
     {
@@ -689,9 +705,9 @@ openDebugInfo(StringRef debuginfopath, const debug_link_info &info) JL_NOTSAFEPO
 }
 extern "C" JL_DLLEXPORT_CODEGEN
 void jl_register_fptrs_impl(uint64_t image_base, const jl_image_fptrs_t *fptrs,
-    jl_method_instance_t **linfos, size_t n)
+    jl_code_instance_t **cinfos, size_t n)
 {
-    getJITDebugRegistry().add_image_info({(uintptr_t) image_base, *fptrs, linfos, n});
+    getJITDebugRegistry().add_image_info({(uintptr_t) image_base, *fptrs, cinfos, n});
 }
 
 template<typename T>
@@ -702,7 +718,8 @@ static inline void ignoreError(T &err) JL_NOTSAFEPOINT
 #endif
 }
 
-static void get_function_name_and_base(llvm::object::SectionRef Section, size_t pointer, int64_t slide, bool inimage,
+static void get_function_name_and_base(llvm::object::SectionRef Section, std::map<uintptr_t, StringRef, std::greater<size_t>> *symbolmap,
+                                       size_t pointer, uint64_t slide, bool inimage,
                                        void **saddr, char **name, bool untrusted_dladdr) JL_NOTSAFEPOINT
 {
     bool needs_saddr = saddr && (!*saddr || untrusted_dladdr);
@@ -728,59 +745,73 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t
 #endif
     }
     if (Section.getObject() && (needs_saddr || needs_name)) {
-        size_t distance = (size_t)-1;
-        object::SymbolRef sym_found;
-        for (auto sym : Section.getObject()->symbols()) {
-            if (!Section.containsSymbol(sym))
-                continue;
-            auto addr = sym.getAddress();
-            if (!addr)
-                continue;
-            size_t symptr = addr.get();
-            if (symptr > pointer + slide)
-                continue;
-            size_t new_dist = pointer + slide - symptr;
-            if (new_dist > distance)
-                continue;
-            distance = new_dist;
-            sym_found = sym;
-        }
-        if (distance != (size_t)-1) {
-            if (needs_saddr) {
-                uintptr_t addr = cantFail(sym_found.getAddress());
-                *saddr = (void*)(addr - slide);
-                needs_saddr = false;
+        uintptr_t addr = 0;
+        StringRef nameref{};
+        {
+            std::shared_lock<std::shared_mutex> read_lock(getJITDebugRegistry().symbol_mutex);
+            if (symbolmap->empty()) {
+                read_lock.unlock();
+                {
+                    // symbol map hasn't been generated yet, so fill it in now
+                    std::unique_lock<std::shared_mutex> write_lock(getJITDebugRegistry().symbol_mutex);
+                    if (symbolmap->empty()) {
+                        for (auto sym : Section.getObject()->symbols()) {
+                            if (!Section.containsSymbol(sym))
+                                continue;
+
+                            auto maybe_addr = sym.getAddress();
+                            if (!maybe_addr)
+                                continue;
+                            size_t addr = maybe_addr.get();
+
+                            auto maybe_nameref = sym.getName();
+                            StringRef nameref{};
+                            if (maybe_nameref)
+                                nameref = maybe_nameref.get();
+
+                            symbolmap->emplace(addr, nameref);
+                        }
+                    }
+                }
+                read_lock.lock();
+            }
+            auto fit = symbolmap->lower_bound(pointer + slide);
+            if (fit != symbolmap->end()) {
+                addr = fit->first;
+                nameref = fit->second;
             }
-            if (needs_name) {
-                if (auto name_or_err = sym_found.getName()) {
-                    auto nameref = name_or_err.get();
-                    const char globalPrefix = // == DataLayout::getGlobalPrefix
+        }
+        std::string namerefstr = nameref.str();
+        if (needs_saddr && addr != 0) {
+            *saddr = (void*)(addr - slide);
+            needs_saddr = false;
+        }
+        if (needs_name && !nameref.empty()) {
+            const char globalPrefix = // == DataLayout::getGlobalPrefix
 #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-                        '_';
+                '_';
 #elif defined(_OS_DARWIN_)
-                        '_';
+                '_';
 #else
-                        '\0';
+                '\0';
 #endif
-                    if (globalPrefix) {
-                        if (nameref[0] == globalPrefix)
-                          nameref = nameref.drop_front();
+            if (globalPrefix) {
+                if (nameref[0] == globalPrefix)
+                  nameref = nameref.drop_front();
 #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-                        else if (nameref[0] == '@') // X86_VectorCall
-                          nameref = nameref.drop_front();
+                else if (nameref[0] == '@') // X86_VectorCall
+                  nameref = nameref.drop_front();
 #endif
-                        // else VectorCall, Assembly, Internal, etc.
-                    }
+                // else VectorCall, Assembly, Internal, etc.
+            }
 #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-                    nameref = nameref.split('@').first;
+            nameref = nameref.split('@').first;
 #endif
-                    size_t len = nameref.size();
-                    *name = (char*)realloc_s(*name, len + 1);
-                    memcpy(*name, nameref.data(), len);
-                    (*name)[len] = 0;
-                    needs_name = false;
-                }
-            }
+            size_t len = nameref.size();
+            *name = (char*)realloc_s(*name, len + 1);
+            memcpy(*name, nameref.data(), len);
+            (*name)[len] = 0;
+            needs_name = false;
         }
     }
 #ifdef _OS_WINDOWS_
@@ -804,7 +835,7 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t
 #endif
 }
 
-static objfileentry_t find_object_file(uint64_t fbase, StringRef fname) JL_NOTSAFEPOINT
+static jl_object_file_entry_t find_object_file(uint64_t fbase, StringRef fname) JL_NOTSAFEPOINT
 {
     int isdarwin = 0, islinux = 0, iswindows = 0;
 #if defined(_OS_DARWIN_)
@@ -817,7 +848,7 @@ static objfileentry_t find_object_file(uint64_t fbase, StringRef fname) JL_NOTSA
     (void)iswindows;
 
 // GOAL: Read debuginfo from file
-    objfileentry_t entry{nullptr, nullptr, 0};
+    jl_object_file_entry_t entry{nullptr, nullptr, 0, nullptr};
     auto success = getJITDebugRegistry().get_objfile_map()->emplace(fbase, entry);
     if (!success.second)
         // Return cached value
@@ -828,9 +859,6 @@ static objfileentry_t find_object_file(uint64_t fbase, StringRef fname) JL_NOTSA
     std::string debuginfopath;
     uint8_t uuid[16], uuid2[16];
     if (isdarwin) {
-        // Hide Darwin symbols (e.g. CoreFoundation) from non-Darwin systems.
-#ifdef _OS_DARWIN_
-
         size_t msize = (size_t)(((uint64_t)-1) - fbase);
         std::unique_ptr<MemoryBuffer> membuf = MemoryBuffer::getMemBuffer(
                 StringRef((const char *)fbase, msize), "", false);
@@ -841,14 +869,18 @@ static objfileentry_t find_object_file(uint64_t fbase, StringRef fname) JL_NOTSA
             return entry;
         }
 
-        llvm::object::MachOObjectFile *morigobj = (llvm::object::MachOObjectFile*)
-            origerrorobj.get().get();
+        const object::MachOObjectFile *morigobj = dyn_cast<const object::MachOObjectFile>(
+            origerrorobj.get().get());
 
         // First find the uuid of the object file (we'll use this to make sure we find the
         // correct debug symbol file).
-        if (!getObjUUID(morigobj, uuid))
+        if (!morigobj || !getObjUUID(morigobj, uuid))
             return entry;
 
+        // Hide Darwin symbols (e.g. CoreFoundation) from non-Darwin systems.
+#ifndef _OS_DARWIN_
+        return entry;
+#else
         // On macOS, debug symbols are not contained in the dynamic library.
         // Use DBGCopyFullDSYMURLForUUID from the private DebugSymbols framework
         // to make use of spotlight to find the dSYM file. If that fails, lookup
@@ -904,6 +936,7 @@ static objfileentry_t find_object_file(uint64_t fbase, StringRef fname) JL_NOTSA
         if (dsfmwkbundle) {
             CFRelease(dsfmwkbundle);
         }
+#endif
 
         if (objpath.empty()) {
             // Fall back to simple path relative to the dynamic library.
@@ -913,7 +946,6 @@ static objfileentry_t find_object_file(uint64_t fbase, StringRef fname) JL_NOTSA
             debuginfopath += fname.substr(sep + 1);
             objpath = debuginfopath;
         }
-#endif
     }
     else {
         // On Linux systems we need to mmap another copy because of the permissions on the mmap'ed shared library.
@@ -972,26 +1004,29 @@ static objfileentry_t find_object_file(uint64_t fbase, StringRef fname) JL_NOTSA
 
         if (isdarwin) {
             // verify the UUID matches
-            if (!getObjUUID((llvm::object::MachOObjectFile*)debugobj, uuid2) ||
-                    memcmp(uuid, uuid2, sizeof(uuid)) != 0) {
+            if (!isa<const object::MachOObjectFile>(debugobj) ||
+                !getObjUUID(cast<const object::MachOObjectFile>(debugobj), uuid2) ||
+                memcmp(uuid, uuid2, sizeof(uuid)) != 0) {
                 return entry;
             }
         }
 
-        int64_t slide = 0;
+        uint64_t slide = 0;
         if (auto *OF = dyn_cast<const object::COFFObjectFile>(debugobj)) {
-            assert(iswindows);
+            if (!iswindows) // the COFF parser accepts some garbage inputs (like empty files) that the other parsers correctly reject, so we can end up here even when we should not
+                return entry;
             slide = OF->getImageBase() - fbase;
         }
         else {
-            slide = -(int64_t)fbase;
+            slide = -fbase;
         }
 
         auto context = DWARFContext::create(*debugobj).release();
         auto binary = errorobj->takeBinary();
         binary.first.release();
         binary.second.release();
-        entry = {debugobj, context, slide};
+
+        entry = {debugobj, context, slide, new std::map<uintptr_t, StringRef, std::greater<size_t>>()};
         // update cache
         (*getJITDebugRegistry().get_objfile_map())[fbase] = entry;
     }
@@ -1016,7 +1051,7 @@ static object::SectionRef getModuleSectionForAddress(const object::ObjectFile *o
 }
 
 
-bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *slide, llvm::DIContext **context,
+bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, uint64_t *slide, llvm::DIContext **context,
     bool onlyImage, bool *isImage, uint64_t *_fbase, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT
 {
     *Section = object::SectionRef();
@@ -1041,7 +1076,6 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
     IMAGEHLP_MODULE64 ModuleInfo;
     ModuleInfo.SizeOfStruct = sizeof(IMAGEHLP_MODULE64);
     uv_mutex_lock(&jl_in_stackwalk);
-    jl_refresh_dbg_module_list();
     bool isvalid = SymGetModuleInfo64(GetCurrentProcess(), (DWORD64)pointer, &ModuleInfo);
     uv_mutex_unlock(&jl_in_stackwalk);
     if (!isvalid)
@@ -1121,15 +1155,15 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
         jl_copy_str(filename, dlinfo.dli_fname);
     fname = dlinfo.dli_fname;
 #endif // ifdef _OS_WINDOWS_
-    auto entry = find_object_file(fbase, fname);
+    jl_object_file_entry_t entry = find_object_file(fbase, fname);
     *slide = entry.slide;
     *context = entry.ctx;
     if (entry.obj)
         *Section = getModuleSectionForAddress(entry.obj, pointer + entry.slide);
     // Assume we only need base address for sysimg for now
-    if (!inimage || !image_info.fptrs.base)
+    if (!inimage || 0 == image_info.fptrs.nptrs)
         saddr = nullptr;
-    get_function_name_and_base(*Section, pointer, entry.slide, inimage, saddr, name, untrusted_dladdr);
+    get_function_name_and_base(*Section, entry.symbolmap, pointer, entry.slide, inimage, saddr, name, untrusted_dladdr);
     return true;
 }
 
@@ -1143,7 +1177,6 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
     static IMAGEHLP_LINE64 frame_info_line;
     DWORD dwDisplacement = 0;
     uv_mutex_lock(&jl_in_stackwalk);
-    jl_refresh_dbg_module_list();
     DWORD64 dwAddress = pointer;
     frame_info_line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
     if (SymGetLineFromAddr64(GetCurrentProcess(), dwAddress, &dwDisplacement, &frame_info_line)) {
@@ -1157,7 +1190,7 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
 #endif
     object::SectionRef Section;
     llvm::DIContext *context = NULL;
-    int64_t slide;
+    uint64_t slide;
     bool isImage;
     void *saddr;
     uint64_t fbase;
@@ -1170,18 +1203,17 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
         JITDebugInfoRegistry::image_info_t image;
         bool inimage = getJITDebugRegistry().get_image_info(fbase, &image);
         if (isImage && saddr && inimage) {
-            intptr_t diff = (uintptr_t)saddr - (uintptr_t)image.fptrs.base;
             for (size_t i = 0; i < image.fptrs.nclones; i++) {
-                if (diff == image.fptrs.clone_offsets[i]) {
+                if (saddr == image.fptrs.clone_ptrs[i]) {
                     uint32_t idx = image.fptrs.clone_idxs[i] & jl_sysimg_val_mask;
                     if (idx < image.fvars_n) // items after this were cloned but not referenced directly by a method (such as our ccall PLT thunks)
-                        frame0->linfo = image.fvars_linfo[idx];
+                        frame0->ci = image.fvars_cinst[idx];
                     break;
                 }
             }
             for (size_t i = 0; i < image.fvars_n; i++) {
-                if (diff == image.fptrs.offsets[i]) {
-                    frame0->linfo = image.fvars_linfo[i];
+                if (saddr == image.fptrs.ptrs[i]) {
+                    frame0->ci = image.fvars_cinst[i];
                     break;
                 }
             }
@@ -1190,12 +1222,12 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
     return lookup_pointer(Section, context, frames, pointer, slide, isImage, noInline);
 }
 
-int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
+int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, uint64_t *slide,
         object::SectionRef *Section, llvm::DIContext **context) JL_NOTSAFEPOINT
 {
     int found = 0;
-    assert(0 == jl_lock_profile_rd_held());
-    jl_lock_profile_wr();
+    if (!jl_lock_profile_wr())
+        return 0;
 
     if (symsize)
         *symsize = 0;
@@ -1204,11 +1236,34 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
     auto fit = objmap.lower_bound(fptr);
     if (fit != objmap.end() && fptr < fit->first + fit->second.SectionSize) {
         *slide = fit->second.slide;
-        *Section = fit->second.Section;
-        if (context) {
-            if (fit->second.context == nullptr)
-                fit->second.context = DWARFContext::create(*fit->second.object).release();
-            *context = fit->second.context;
+        auto lazyobject = fit->second.object;
+        if (!lazyobject->object && !lazyobject->data.empty()) {
+            if (lazyobject->uncompressedsize) {
+                SmallVector<uint8_t, 0> unpacked;
+                compression::Format F = compression::zstd::isAvailable() ? compression::Format::Zstd : compression::Format::Zlib;
+                Error E = compression::decompress(F, lazyobject->data, unpacked, lazyobject->uncompressedsize);
+                if (E)
+                    lazyobject->data.clear();
+                else
+                    lazyobject->data = std::move(unpacked);
+                jl_jit_add_bytes(lazyobject->data.size() - lazyobject->uncompressedsize);
+                lazyobject->uncompressedsize = 0;
+            }
+            if (!lazyobject->data.empty()) {
+                auto obj = object::ObjectFile::createObjectFile(MemoryBufferRef(StringRef((const char*)lazyobject->data.data(), lazyobject->data.size()), "jit.o"));
+                if (obj)
+                    lazyobject->object = std::move(*obj);
+                else
+                    lazyobject->data.clear();
+            }
+        }
+        if (lazyobject->object) {
+            *Section = *std::next(lazyobject->object->section_begin(), fit->second.SectionIndex);
+            if (context) {
+                if (lazyobject->context == nullptr)
+                    lazyobject->context = DWARFContext::create(*lazyobject->object);
+                *context = lazyobject->context.get();
+            }
         }
         found = 1;
     }
@@ -1222,25 +1277,25 @@ extern "C" JL_DLLEXPORT_CODEGEN int jl_getFunctionInfo_impl(jl_frame_t **frames_
     // This function is not allowed to reference any TLS variables if noInline
     // since it can be called from an unmanaged thread on OSX.
 
-    jl_frame_t *frames = (jl_frame_t*)calloc(sizeof(jl_frame_t), 1);
+    jl_frame_t *frames = (jl_frame_t*)calloc(1, sizeof(jl_frame_t));
     frames[0].line = -1;
     *frames_out = frames;
 
-    llvm::DIContext *context;
+    llvm::DIContext *context = nullptr;
     object::SectionRef Section;
-    int64_t slide;
+    uint64_t slide;
     uint64_t symsize;
     if (jl_DI_for_fptr(pointer, &symsize, &slide, &Section, &context)) {
-        frames[0].linfo = getJITDebugRegistry().lookupLinfo(pointer);
+        frames[0].ci = getJITDebugRegistry().lookupCodeInstance(pointer);
         int nf = lookup_pointer(Section, context, frames_out, pointer, slide, true, noInline);
         return nf;
     }
     return jl_getDylibFunctionInfo(frames_out, pointer, skipC, noInline);
 }
 
-extern "C" jl_method_instance_t *jl_gdblookuplinfo(void *p) JL_NOTSAFEPOINT
+extern "C" jl_code_instance_t *jl_gdblookupci(void *p) JL_NOTSAFEPOINT
 {
-    return getJITDebugRegistry().lookupLinfo((size_t)p);
+    return getJITDebugRegistry().lookupCodeInstance((size_t)p);
 }
 
 #if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
@@ -1258,14 +1313,14 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
 {
   // On OS X OS X __register_frame takes a single FDE as an argument.
   // See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html
-  processFDEs((char*)Addr, Size, [](const char *Entry) {
+  processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT {
       getJITDebugRegistry().libc_frames.libc_register_frame(Entry);
     });
 }
 
 void deregister_eh_frames(uint8_t *Addr, size_t Size)
 {
-   processFDEs((char*)Addr, Size, [](const char *Entry) {
+   processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT {
       getJITDebugRegistry().libc_frames.libc_deregister_frame(Entry);
     });
 }
@@ -1277,7 +1332,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size)
 
 // Skip over an arbitrary long LEB128 encoding.
 // Return the pointer to the first unprocessed byte.
-static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End)
+static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT
 {
     const uint8_t *P = Addr;
     while ((*P >> 7) != 0 && P < End)
@@ -1289,7 +1344,7 @@ static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End)
 // bytes than what there are more bytes than what the type can store.
 // Adjust the pointer to the first unprocessed byte.
 template<typename T> static T parse_leb128(const uint8_t *&Addr,
-                                           const uint8_t *End)
+                                           const uint8_t *End) JL_NOTSAFEPOINT
 {
     typedef typename std::make_unsigned<T>::type uT;
     uT v = 0;
@@ -1312,7 +1367,7 @@ template<typename T> static T parse_leb128(const uint8_t *&Addr,
 }
 
 template <typename U, typename T>
-static U safe_trunc(T t)
+static U safe_trunc(T t) JL_NOTSAFEPOINT
 {
     assert((t >= static_cast<T>(std::numeric_limits<U>::min()))
            && (t <= static_cast<T>(std::numeric_limits<U>::max())));
@@ -1352,9 +1407,9 @@ enum DW_EH_PE : uint8_t {
 };
 
 // Parse the CIE and return the type of encoding used by FDE
-static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End)
+static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT
 {
-    // http://www.airs.com/blog/archives/460
+    // https://www.airs.com/blog/archives/460
     // Length (4 bytes)
     uint32_t cie_size = *(const uint32_t*)Addr;
     const uint8_t *cie_addr = Addr + 4;
@@ -1458,7 +1513,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
 
     // Now first count the number of FDEs
     size_t nentries = 0;
-    processFDEs((char*)Addr, Size, [&](const char*){ nentries++; });
+    processFDEs((char*)Addr, Size, [&](const char*) JL_NOTSAFEPOINT { nentries++; });
     if (nentries == 0)
         return;
 
@@ -1481,13 +1536,13 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
     // While we're at it, also record the start_ip and size,
     // which we fill in the table
     unw_table_entry *table = new unw_table_entry[nentries];
-    std::vector<uintptr_t> start_ips(nentries);
+    SmallVector<uintptr_t, 0> start_ips(nentries);
     size_t cur_entry = 0;
     // Cache the previously parsed CIE entry so that we can support multiple
     // CIE's (may not happen) without parsing it every time.
     const uint8_t *cur_cie = nullptr;
     DW_EH_PE encoding = DW_EH_PE_omit;
-    processFDEs((char*)Addr, Size, [&](const char *Entry) {
+    processFDEs((char*)Addr, Size, [&](const char *Entry) JL_NOTSAFEPOINT {
             // Skip Length (4bytes) and CIE offset (4bytes)
             uint32_t fde_size = *(const uint32_t*)Entry;
             uint32_t cie_id = ((const uint32_t*)Entry)[1];
@@ -1608,7 +1663,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size)
 #endif
 
 extern "C" JL_DLLEXPORT_CODEGEN
-uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr)
+uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr) JL_NOTSAFEPOINT
 {
     // Might be called from unmanaged thread
     jl_lock_profile();
diff --git a/src/debuginfo.h b/src/debuginfo.h
index 5b5cdcb82d534..9f7d10b5e371c 100644
--- a/src/debuginfo.h
+++ b/src/debuginfo.h
@@ -1,11 +1,12 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 // Declarations for debuginfo.cpp
+void jl_jit_add_bytes(size_t bytes) JL_NOTSAFEPOINT;
 
-int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
+int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, uint64_t *slide,
         llvm::object::SectionRef *Section, llvm::DIContext **context) JL_NOTSAFEPOINT;
 
-bool jl_dylib_DI_for_fptr(size_t pointer, llvm::object::SectionRef *Section, int64_t *slide, llvm::DIContext **context,
+bool jl_dylib_DI_for_fptr(size_t pointer, llvm::object::SectionRef *Section, uint64_t *slide, llvm::DIContext **context,
     bool onlyImage, bool *isImage, uint64_t* fbase, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT;
 
 static object::SectionedAddress makeAddress(
diff --git a/src/disasm.cpp b/src/disasm.cpp
index 9414c0a2a065d..3f994143a6c8b 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -16,7 +16,7 @@
 //
 //    University of Illinois at Urbana-Champaign
 //
-//    http://llvm.org
+//    https://llvm.org
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files (the "Software"), to deal with
@@ -58,7 +58,7 @@
 #include "llvm-version.h"
 
 // for outputting disassembly
-#include <llvm/ADT/Triple.h>
+#include <llvm/TargetParser/Triple.h>
 #include <llvm/AsmParser/Parser.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/BinaryFormat/COFF.h>
@@ -99,6 +99,9 @@
 // for outputting assembly
 #include <llvm/CodeGen/AsmPrinter.h>
 #include <llvm/CodeGen/AsmPrinterHandler.h>
+#if JL_LLVM_VERSION >= 200000
+#include <llvm/CodeGen/CodeGenTargetMachineImpl.h>
+#endif
 #include <llvm/CodeGen/MachineModuleInfo.h>
 #include <llvm/CodeGen/Passes.h>
 #include <llvm/CodeGen/TargetPassConfig.h>
@@ -117,7 +120,7 @@ using namespace llvm;
 // helper class for tracking inlining context while printing debug info
 class DILineInfoPrinter {
     // internal state:
-    std::vector<DILineInfo> context;
+    SmallVector<DILineInfo, 0> context;
     uint32_t inline_depth = 0;
     // configuration options:
     const char* LineStart = "; ";
@@ -147,7 +150,7 @@ class DILineInfoPrinter {
     }
 
     void emit_finish(raw_ostream &Out) JL_NOTSAFEPOINT;
-    void emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo> &DI) JL_NOTSAFEPOINT;
+    void emit_lineinfo(raw_ostream &Out, SmallVectorImpl<DILineInfo> &DI) JL_NOTSAFEPOINT;
 
     struct repeat {
         size_t times;
@@ -169,7 +172,7 @@ class DILineInfoPrinter {
 
     void emit_lineinfo(raw_ostream &Out, DILineInfo &DI) JL_NOTSAFEPOINT
     {
-        std::vector<DILineInfo> DIvec(1);
+        SmallVector<DILineInfo, 0> DIvec(1);
         DIvec[0] = DI;
         emit_lineinfo(Out, DIvec);
     }
@@ -177,7 +180,7 @@ class DILineInfoPrinter {
     void emit_lineinfo(raw_ostream &Out, DIInliningInfo &DI) JL_NOTSAFEPOINT
     {
         uint32_t nframes = DI.getNumberOfFrames();
-        std::vector<DILineInfo> DIvec(nframes);
+        SmallVector<DILineInfo, 0> DIvec(nframes);
         for (uint32_t i = 0; i < DI.getNumberOfFrames(); i++) {
             DIvec[i] = DI.getFrame(i);
         }
@@ -207,7 +210,7 @@ void DILineInfoPrinter::emit_finish(raw_ostream &Out)
     this->inline_depth = 0;
 }
 
-void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo> &DI)
+void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, SmallVectorImpl<DILineInfo> &DI)
 {
     if (verbosity == output_none)
         return;
@@ -217,8 +220,8 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
     // compute the size of the matching prefix in the inlining information stack
     uint32_t nctx;
     for (nctx = 0; nctx < context.size() && nctx < nframes; nctx++) {
-        const DILineInfo &CtxLine = context.at(nctx);
-        const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+        const DILineInfo &CtxLine = context[nctx];
+        const DILineInfo &FrameLine = DI[nframes - 1 - nctx];
         if (CtxLine != FrameLine) {
             break;
         }
@@ -230,27 +233,27 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
             // if so, drop all existing calls to it from the top of the context
             // AND check if instead the context was previously printed that way
             // but now has removed the recursive frames
-            StringRef method = StringRef(context.at(nctx - 1).FunctionName).rtrim(';'); // last matching frame
-            if ((nctx < nframes && StringRef(DI.at(nframes - nctx - 1).FunctionName).rtrim(';') == method) ||
-                (nctx < context.size() && StringRef(context.at(nctx).FunctionName).rtrim(';') == method)) {
+            StringRef method = StringRef(context[nctx - 1].FunctionName).rtrim(';'); // last matching frame
+            if ((nctx < nframes && StringRef(DI[nframes - nctx - 1].FunctionName).rtrim(';') == method) ||
+                (nctx < context.size() && StringRef(context[nctx].FunctionName).rtrim(';') == method)) {
                 update_line_only = true;
                 // transform nctx to exclude the combined frames
-                while (nctx > 0 && StringRef(context.at(nctx - 1).FunctionName).rtrim(';') == method)
+                while (nctx > 0 && StringRef(context[nctx - 1].FunctionName).rtrim(';') == method)
                     nctx -= 1;
             }
         }
         if (!update_line_only && nctx < context.size() && nctx < nframes) {
             // look at the first non-matching element to see if we are only changing the line number
-            const DILineInfo &CtxLine = context.at(nctx);
-            const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+            const DILineInfo &CtxLine = context[nctx];
+            const DILineInfo &FrameLine = DI[nframes - 1 - nctx];
             if (StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';'))
                 update_line_only = true;
         }
     }
     else if (nctx < context.size() && nctx < nframes) {
         // look at the first non-matching element to see if we are only changing the line number
-        const DILineInfo &CtxLine = context.at(nctx);
-        const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+        const DILineInfo &CtxLine = context[nctx];
+        const DILineInfo &FrameLine = DI[nframes - 1 - nctx];
         if (CtxLine.FileName == FrameLine.FileName &&
                 StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';')) {
             update_line_only = true;
@@ -262,9 +265,9 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         uint32_t npops;
         if (collapse_recursive) {
             npops = 1;
-            StringRef Prev = StringRef(context.at(nctx).FunctionName).rtrim(';');
+            StringRef Prev = StringRef(context[nctx].FunctionName).rtrim(';');
             for (uint32_t i = nctx + 1; i < context.size(); i++) {
-                StringRef Next = StringRef(context.at(i).FunctionName).rtrim(';');
+                StringRef Next = StringRef(context[i].FunctionName).rtrim(';');
                 if (Prev != Next)
                     npops += 1;
                 Prev = Next;
@@ -282,7 +285,7 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
     }
     // print the new frames
     while (nctx < nframes) {
-        const DILineInfo &frame = DI.at(nframes - 1 - nctx);
+        const DILineInfo &frame = DI[nframes - 1 - nctx];
         Out << LineStart << inlining_indent("│");
         nctx += 1;
         context.push_back(frame);
@@ -301,7 +304,7 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         Out << " within `" << method << "`";
         if (collapse_recursive) {
             while (nctx < nframes) {
-                const DILineInfo &frame = DI.at(nframes - 1 - nctx);
+                const DILineInfo &frame = DI[nframes - 1 - nctx];
                 if (StringRef(frame.FunctionName).rtrim(';') != method)
                     break;
                 nctx += 1;
@@ -313,10 +316,10 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         Out << "\n";
     }
 #ifndef JL_NDEBUG
-    StringRef Prev = StringRef(context.at(0).FunctionName).rtrim(';');
+    StringRef Prev = StringRef(context[0].FunctionName).rtrim(';');
     uint32_t depth2 = 1;
     for (uint32_t i = 1; i < nctx; i++) {
-        StringRef Next = StringRef(context.at(i).FunctionName).rtrim(';');
+        StringRef Next = StringRef(context[i].FunctionName).rtrim(';');
         if (!collapse_recursive || Prev != Next)
             depth2 += 1;
         Prev = Next;
@@ -363,6 +366,10 @@ class LineNumberAnnotatedWriter : public AssemblyAnnotationWriter {
 void LineNumberAnnotatedWriter::emitFunctionAnnot(
       const Function *F, formatted_raw_ostream &Out)
 {
+    if (F->hasFnAttribute("julia.fsig")) {
+        auto sig = F->getFnAttribute("julia.fsig").getValueAsString();
+        Out << "; Function Signature: " << sig << "\n";
+    }
     InstrLoc = nullptr;
     DISubprogram *FuncLoc = F->getSubprogram();
     if (!FuncLoc) {
@@ -371,7 +378,7 @@ void LineNumberAnnotatedWriter::emitFunctionAnnot(
             FuncLoc = SP->second;
     }
     if (FuncLoc) {
-        std::vector<DILineInfo> DIvec(1);
+        SmallVector<DILineInfo, 0> DIvec(1);
         DILineInfo &DI = DIvec.back();
         DI.FunctionName = FuncLoc->getName().str();
         DI.FileName = FuncLoc->getFilename().str();
@@ -398,7 +405,7 @@ void LineNumberAnnotatedWriter::emitInstructionAnnot(
 {
     if (NewInstrLoc && NewInstrLoc != InstrLoc) {
         InstrLoc = NewInstrLoc;
-        std::vector<DILineInfo> DIvec;
+        SmallVector<DILineInfo, 0> DIvec;
         do {
             DIvec.emplace_back();
             DILineInfo &DI = DIvec.back();
@@ -451,6 +458,9 @@ static void jl_strip_llvm_debug(Module *m, bool all_meta, LineNumberAnnotatedWri
                 if (AAW)
                     AAW->addDebugLoc(&inst, inst.getDebugLoc());
                 inst.setDebugLoc(DebugLoc());
+#if JL_LLVM_VERSION >= 190000
+                inst.dropDbgRecords();
+#endif
             }
             if (deletelast) {
                 deletelast->eraseFromParent();
@@ -491,12 +501,12 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada
     std::string code;
     raw_string_ostream stream(code);
 
-    {
+    if (dump->F) {
         //RAII will release the module
         auto TSM = std::unique_ptr<orc::ThreadSafeModule>(unwrap(dump->TSM));
         //If TSM is not passed in, then the context MUST be locked externally.
         //RAII will release the lock
-        Optional<orc::ThreadSafeContext::Lock> lock;
+        std::optional<orc::ThreadSafeContext::Lock> lock;
         if (TSM) {
             lock.emplace(TSM->getContext().getLock());
         }
@@ -533,7 +543,7 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada
 }
 
 static void jl_dump_asm_internal(
-        uintptr_t Fptr, size_t Fsize, int64_t slide,
+        uintptr_t Fptr, size_t Fsize, uint64_t slide,
         object::SectionRef Section,
         DIContext *di_ctx,
         raw_ostream &rstream,
@@ -583,7 +593,7 @@ jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char emit_mc, const char* asm_v
 
     // Find debug info (line numbers) to print alongside
     object::SectionRef Section;
-    int64_t slide = 0;
+    uint64_t slide = 0;
     uint64_t symsize = 0;
     llvm::DIContext *context = NULL;
     if (!jl_DI_for_fptr(fptr, &symsize, &slide, &Section, &context)) {
@@ -636,9 +646,9 @@ class SymbolTable {
     int Pass;
     const object::ObjectFile *object;
     uint64_t ip; // virtual instruction pointer of the current instruction
-    int64_t slide;
+    uint64_t slide;
 public:
-    SymbolTable(MCContext &Ctx, const object::ObjectFile *object, int64_t slide, const FuncMCView &MemObj) JL_NOTSAFEPOINT
+    SymbolTable(MCContext &Ctx, const object::ObjectFile *object, uint64_t slide, const FuncMCView &MemObj) JL_NOTSAFEPOINT
         : Ctx(Ctx), MemObj(MemObj), object(object), ip(0), slide(slide) {}
     ~SymbolTable() JL_NOTSAFEPOINT = default;
     const FuncMCView &getMemoryObject() const JL_NOTSAFEPOINT { return MemObj; }
@@ -793,19 +803,15 @@ static const char *SymbolLookup(void *DisInfo, uint64_t ReferenceValue, uint64_t
 
 static int OpInfoLookup(void *DisInfo, uint64_t PC,
                         uint64_t Offset,
-#if JL_LLVM_VERSION < 150000
-                        uint64_t Size,
-#else
                         uint64_t OpSize, uint64_t InstSize,
-#endif
                         int TagType, void *TagBuf)
 {
-    SymbolTable *SymTab = (SymbolTable*)DisInfo;
+    // SymbolTable *SymTab = (SymbolTable*)DisInfo;
     LLVMOpInfo1 *info = (LLVMOpInfo1*)TagBuf;
     memset(info, 0, sizeof(*info));
     if (TagType != 1)
         return 0;               // Unknown data format
-    PC += SymTab->getIP() - (uint64_t)(uintptr_t)SymTab->getMemoryObject().data(); // add offset from MemoryObject base
+    // PC += SymTab->getIP() - (uint64_t)(uintptr_t)SymTab->getMemoryObject().data(); // add offset from MemoryObject base
     // TODO: see if we knew of a relocation applied at PC
     // info->AddSymbol.Present = 1;
     // info->AddSymbol.Name = name;
@@ -845,7 +851,7 @@ std::string rawCodeComment(const llvm::ArrayRef<uint8_t>& Memory, const llvm::Tr
 }
 
 static void jl_dump_asm_internal(
-        uintptr_t Fptr, size_t Fsize, int64_t slide,
+        uintptr_t Fptr, size_t Fsize, uint64_t slide,
         object::SectionRef Section,
         DIContext *di_ctx,
         raw_ostream &rstream,
@@ -868,6 +874,8 @@ static void jl_dump_asm_internal(
     SourceMgr SrcMgr;
 
     MCTargetOptions Options;
+    Options.AsmVerbose = true;
+    Options.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory;
     std::unique_ptr<MCAsmInfo> MAI(
         TheTarget->createMCAsmInfo(*TheTarget->createMCRegInfo(TheTriple.str()), TheTriple.str(), Options));
     assert(MAI && "Unable to create target asm info!");
@@ -906,11 +914,7 @@ static void jl_dump_asm_internal(
     std::unique_ptr<MCCodeEmitter> CE;
     std::unique_ptr<MCAsmBackend> MAB;
     if (ShowEncoding) {
-#if JL_LLVM_VERSION >= 150000
         CE.reset(TheTarget->createMCCodeEmitter(*MCII, Ctx));
-#else
-        CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
-#endif
         MAB.reset(TheTarget->createMCAsmBackend(*STI, *MRI, Options));
     }
 
@@ -920,16 +924,18 @@ static void jl_dump_asm_internal(
     // LLVM will destroy the formatted stream, and we keep the raw stream.
     std::unique_ptr<formatted_raw_ostream> ustream(new formatted_raw_ostream(rstream));
     std::unique_ptr<MCStreamer> Streamer(
-            TheTarget->createAsmStreamer(Ctx, std::move(ustream), /*asmverbose*/true,
-                                         /*useDwarfDirectory*/ true,
-                                         IP.release(),
-                                         std::move(CE), std::move(MAB),
-                                         /*ShowInst*/ false));
-#if JL_LLVM_VERSION >= 140000
-    Streamer->initSections(true, *STI);
+#if JL_LLVM_VERSION >= 190000
+        TheTarget->createAsmStreamer(Ctx, std::move(ustream),
+
+                                     IP.release(), std::move(CE), std::move(MAB))
 #else
-    Streamer->InitSections(true);
+        TheTarget->createAsmStreamer(Ctx, std::move(ustream), /*asmverbose*/ true,
+                                     /*useDwarfDirectory*/ true, IP.release(),
+                                     std::move(CE), std::move(MAB),
+                                     /*ShowInst*/ false)
 #endif
+    );
+    Streamer->initSections(true, *STI);
 
     // Make the MemoryObject wrapper
     ArrayRef<uint8_t> memoryObject(const_cast<uint8_t*>((const uint8_t*)Fptr),Fsize);
@@ -1045,9 +1051,6 @@ static void jl_dump_asm_internal(
             MCInst Inst;
             MCDisassembler::DecodeStatus S;
             FuncMCView view = memoryObject.slice(Index);
-#if JL_LLVM_VERSION < 150000
-#define getCommentOS() GetCommentOS()
-#endif
             S = DisAsm->getInstruction(Inst, insSize, view, 0,
                                       /*CStream*/ pass != 0 ? Streamer->getCommentOS () : nulls());
             if (pass != 0 && Streamer->getCommentOS ().tell() > 0)
@@ -1058,6 +1061,8 @@ static void jl_dump_asm_internal(
                 if (insSize == 0) // skip illegible bytes
 #if defined(_CPU_PPC_) || defined(_CPU_PPC64_) || defined(_CPU_ARM_) || defined(_CPU_AARCH64_)
                     insSize = 4; // instructions are always 4 bytes
+#elif defined(_CPU_RISCV64_)
+                    insSize = 2; // instructions can be 2 bytes when compressed
 #else
                     insSize = 1; // attempt to slide 1 byte forward
 #endif
@@ -1105,7 +1110,7 @@ static void jl_dump_asm_internal(
                             const MCOperand &OpI = Inst.getOperand(Op);
                             if (OpI.isImm()) {
                                 int64_t imm = OpI.getImm();
-                                if (opinfo.OpInfo[Op].OperandType == MCOI::OPERAND_PCREL)
+                                if (opinfo.operands()[Op].OperandType == MCOI::OPERAND_PCREL)
                                     imm += Fptr + Index;
                                 const char *name = DisInfo.lookupSymbolName(imm);
                                 if (name)
@@ -1137,7 +1142,11 @@ static void jl_dump_asm_internal(
 
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
 static MCContext *
+#if JL_LLVM_VERSION >= 200000
+addPassesToGenerateCode(CodeGenTargetMachineImpl *TM, PassManagerBase &PM) {
+#else
 addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM) {
+#endif
     TargetPassConfig *PassConfig = TM->createPassConfig(PM);
     PassConfig->setDisableVerify(false);
     PM.add(PassConfig);
@@ -1207,7 +1216,7 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
 {
     // precise printing via IR assembler
     SmallVector<char, 4096> ObjBufferSV;
-    { // scope block
+    if (dump->F) { // scope block also
         auto TSM = std::unique_ptr<orc::ThreadSafeModule>(unwrap(dump->TSM));
         llvm::raw_svector_ostream asmfile(ObjBufferSV);
         TSM->withModuleDo([&](Module &m) {
@@ -1222,12 +1231,25 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
                 f->addFnAttr(Attribute::NoUnwind);
         });
         auto TMBase = jl_ExecutionEngine->cloneTargetMachine();
+#if JL_LLVM_VERSION >= 200000
+        CodeGenTargetMachineImpl *TM = static_cast<CodeGenTargetMachineImpl*>(TMBase.get());
+#else
         LLVMTargetMachine *TM = static_cast<LLVMTargetMachine*>(TMBase.get());
+#endif
+        MCTargetOptions &Options = TM->Options.MCOptions;
+        Options.AsmVerbose = true;
+        Options.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory;
+        if (binary)
+            Options.ShowMCEncoding = true;
         legacy::PassManager PM;
         addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
         if (emit_mc) {
             raw_svector_ostream obj_OS(ObjBufferSV);
+#if JL_LLVM_VERSION >= 180000
+            if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CodeGenFileType::ObjectFile, false, nullptr))
+#else
             if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr))
+#endif
                 return jl_an_empty_string;
             TSM->withModuleDo([&](Module &m) { PM.run(m); });
         }
@@ -1236,7 +1258,7 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
             if (!Context)
                 return jl_an_empty_string;
             Context->setGenDwarfForAssembly(false);
-            // Duplicate LLVMTargetMachine::addAsmPrinter here so we can set the asm dialect and add the custom annotation printer
+            // Duplicate CodeGenTargetMachineImpl::addAsmPrinter here so we can set the asm dialect and add the custom annotation printer
             const MCSubtargetInfo &STI = *TM->getMCSubtargetInfo();
             const MCAsmInfo &MAI = *TM->getMCAsmInfo();
             const MCRegisterInfo &MRI = *TM->getMCRegisterInfo();
@@ -1248,30 +1270,33 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
                 OutputAsmDialect = 1;
             MCInstPrinter *InstPrinter = TM->getTarget().createMCInstPrinter(
                 jl_ExecutionEngine->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI);
-             std::unique_ptr<MCAsmBackend> MAB(TM->getTarget().createMCAsmBackend(
-                STI, MRI, TM->Options.MCOptions));
+            std::unique_ptr<MCAsmBackend> MAB(TM->getTarget().createMCAsmBackend(
+                STI, MRI, Options));
             std::unique_ptr<MCCodeEmitter> MCE;
             if (binary) { // enable MCAsmStreamer::AddEncodingComment printing
-#if JL_LLVM_VERSION >= 150000
                 MCE.reset(TM->getTarget().createMCCodeEmitter(MII, *Context));
-#else
-                MCE.reset(TM->getTarget().createMCCodeEmitter(MII, MRI, *Context));
-#endif
             }
             auto FOut = std::make_unique<formatted_raw_ostream>(asmfile);
             std::unique_ptr<MCStreamer> S(TM->getTarget().createAsmStreamer(
-                *Context, std::move(FOut), true,
-                true, InstPrinter,
-                std::move(MCE), std::move(MAB),
-                false));
-            std::unique_ptr<AsmPrinter> Printer(
-                TM->getTarget().createAsmPrinter(*TM, std::move(S)));
+#if JL_LLVM_VERSION >= 190000
+                *Context, std::move(FOut), InstPrinter, std::move(MCE), std::move(MAB)
+#else
+                *Context, std::move(FOut), true, true, InstPrinter, std::move(MCE),
+                std::move(MAB), false
+#endif
+                    ));
+            AsmPrinter *Printer = TM->getTarget().createAsmPrinter(*TM, std::move(S));
+#if JL_LLVM_VERSION >= 190000
+            Printer->addAsmPrinterHandler(
+                        std::make_unique<LineNumberPrinterHandler>(*Printer, debuginfo));
+#else
             Printer->addAsmPrinterHandler(AsmPrinter::HandlerInfo(
                         std::unique_ptr<AsmPrinterHandler>(new LineNumberPrinterHandler(*Printer, debuginfo)),
                         "emit", "Debug Info Emission", "Julia", "Julia::LineNumberPrinterHandler Markup"));
+#endif
             if (!Printer)
                 return jl_an_empty_string;
-            PM.add(Printer.release());
+            PM.add(Printer);
             PM.add(createFreeMachineFunctionPass());
             TSM->withModuleDo([&](Module &m){ PM.run(m); });
         }
diff --git a/src/dlload.c b/src/dlload.c
index ffa9a053d5f1c..cc3bc4e73043a 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -68,8 +68,6 @@ const char *jl_crtdll_name = CRTDLL_BASENAME ".dll";
 #undef CRTDLL_BASENAME
 #endif
 
-#define PATHBUF 4096
-
 #ifdef _OS_WINDOWS_
 void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT
 {
@@ -164,8 +162,6 @@ JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOI
     }
     else {
         lib = LoadLibraryExW(wfilename, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
-        if (lib)
-            needsSymRefreshModuleList = 1;
     }
     return lib;
 }
@@ -188,7 +184,7 @@ JL_DLLEXPORT JL_NO_SANITIZE void *jl_dlopen(const char *filename, unsigned flags
         dlopen = (dlopen_prototype*)dlsym(RTLD_NEXT, "dlopen");
         if (!dlopen)
             return NULL;
-        void *libdl_handle = dlopen("libdl.so", RTLD_NOW | RTLD_NOLOAD);
+        void *libdl_handle = dlopen("libdl.so.2", RTLD_NOW | RTLD_NOLOAD);
         assert(libdl_handle);
         dlopen = (dlopen_prototype*)dlsym(libdl_handle, "dlopen");
         dlclose(libdl_handle);
@@ -240,28 +236,47 @@ JL_DLLEXPORT int jl_dlclose(void *handle) JL_NOTSAFEPOINT
 #endif
 }
 
-void *jl_find_dynamic_library_by_addr(void *symbol) {
+void *jl_find_dynamic_library_by_addr(void *symbol, int throw_err, int close) JL_NOTSAFEPOINT
+{
     void *handle;
 #ifdef _OS_WINDOWS_
     if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
                             (LPCWSTR)symbol,
                             (HMODULE*)&handle)) {
-        jl_error("could not load base module");
+        if (throw_err)
+            jl_error("could not load base module");
+        return NULL;
     }
 #else
     Dl_info info;
     if (!dladdr(symbol, &info) || !info.dli_fname) {
-        jl_error("could not load base module");
+        if (throw_err)
+            jl_error("could not load base module");
+        return NULL;
     }
+    dlerror();
     handle = dlopen(info.dli_fname, RTLD_NOW | RTLD_NOLOAD | RTLD_LOCAL);
-    dlclose(handle); // Undo ref count increment from `dlopen`
+#if defined(_OS_FREEBSD_)
+    // FreeBSD will not give you a handle for the executable if you dlopen() it
+    // with RTLD_NOLOAD, so check jl_exe_handle.
+    if (handle == NULL && dlerror() == NULL) {
+        handle = jl_exe_handle;
+    }
+#elif !defined(__APPLE__)
+    if (handle == RTLD_DEFAULT && (RTLD_DEFAULT != NULL || dlerror() == NULL)) {
+        // We loaded the executable but got RTLD_DEFAULT back, ask for a real handle instead
+        handle = dlopen("", RTLD_NOW | RTLD_NOLOAD | RTLD_LOCAL);
+    }
+#endif
+    if (handle != NULL && close)
+        dlclose(handle); // Undo ref count increment from `dlopen`
 #endif
     return handle;
 }
 
 JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, int throw_err)
 {
-    char path[PATHBUF], relocated[PATHBUF];
+    ios_t path, relocated;
     int i;
 #ifdef _OS_WINDOWS_
     int err;
@@ -273,11 +288,10 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     // number of extensions to try — if modname already ends with the
     // standard extension, then we don't try adding additional extensions
     int n_extensions = endswith_extension(modname) ? 1 : N_EXTENSIONS;
-    int ret;
 
     // modname == NULL is a sentinel value requesting the handle of libjulia-internal
     if (modname == NULL)
-        return jl_find_dynamic_library_by_addr(&jl_load_dynamic_library);
+        return jl_libjulia_internal_handle;
 
     abspath = jl_isabspath(modname);
     is_atpath = 0;
@@ -298,6 +312,9 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     }
 #endif
 
+    ios_mem(&path, IOS_INLSIZE);
+    ios_mem(&relocated, IOS_INLSIZE);
+
     /*
       this branch permutes all base paths in DL_LOAD_PATH with all extensions
       note: skip when !jl_base_module to avoid UndefVarError(:DL_LOAD_PATH),
@@ -307,51 +324,52 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
       While these exist as OS concepts on Darwin, we want to use them on other platforms
       such as Windows, so we emulate them here.
     */
-    if (!abspath && !is_atpath && jl_base_module != NULL) {
+    if (!abspath && !is_atpath && jl_base_module != NULL && jl_typeinf_world != 1) {
         jl_binding_t *b = jl_get_module_binding(jl_base_module, jl_symbol("DL_LOAD_PATH"), 0);
-        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_atomic_load_relaxed(&b->value) : NULL);
+        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_get_binding_value_in_world(b, jl_typeinf_world) : NULL);
         if (DL_LOAD_PATH != NULL) {
             size_t j;
-            for (j = 0; j < jl_array_len(DL_LOAD_PATH); j++) {
+            for (j = 0; j < jl_array_nrows(DL_LOAD_PATH); j++) {
                 char *dl_path = jl_string_data(jl_array_ptr_data(DL_LOAD_PATH)[j]);
-                size_t len = strlen(dl_path);
-                if (len == 0)
+                if (*dl_path == 0)
                     continue;
 
+                ios_trunc(&relocated, 0);
+
                 // Is this entry supposed to be relative to the bindir?
-                if (len >= 16 && strncmp(dl_path, "@executable_path", 16) == 0) {
-                    snprintf(relocated, PATHBUF, "%s%s", jl_options.julia_bindir, dl_path + 16);
-                    len = len - 16 + strlen(jl_options.julia_bindir);
+                if (strncmp(dl_path, "@executable_path", 16) == 0) {
+                    ios_printf(&relocated, "%s%s", jl_options.julia_bindir, dl_path + 16);
                 } else {
-                    strncpy(relocated, dl_path, PATHBUF);
-                    relocated[PATHBUF-1] = '\0';
+                    ios_puts(dl_path, &relocated);
                 }
+                ios_putc(0, &relocated);
                 for (i = 0; i < n_extensions; i++) {
+                    ios_trunc(&path, 0);
                     const char *ext = extensions[i];
-                    path[0] = '\0';
-                    if (relocated[len-1] == PATHSEPSTRING[0])
-                        snprintf(path, PATHBUF, "%s%s%s", relocated, modname, ext);
-                    else {
-                        ret = snprintf(path, PATHBUF, "%s" PATHSEPSTRING "%s%s", relocated, modname, ext);
-                        if (ret < 0)
-                            jl_errorf("path is longer than %d\n", PATHBUF);
-                    }
+                    if (relocated.buf[relocated.bpos - 2] == PATHSEPSTRING[0])
+                        ios_printf(&path, "%s%s%s", relocated.buf, modname, ext);
+                    else
+                        ios_printf(&path, "%s" PATHSEPSTRING "%s%s", relocated.buf, modname, ext);
+                    ios_putc(0, &path);
 
 #ifdef _OS_WINDOWS_
                     if (i == 0) { // LoadLibrary already tested the extensions, we just need to check the `stat` result
 #endif
-                        handle = jl_dlopen(path, flags);
+                        handle = jl_dlopen(path.buf, flags);
                         if (handle && !(flags & JL_RTLD_NOLOAD))
                             jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, jl_pathname_for_handle(handle));
                         if (handle)
-                            return handle;
+                            goto success;
 #ifdef _OS_WINDOWS_
                         err = GetLastError();
                     }
 #endif
                     // bail out and show the error if file actually exists
-                    if (jl_stat(path, (char*)&stbuf) == 0)
-                        goto notfound;
+                    if (jl_stat(path.buf, (char *)&stbuf) == 0) {
+                        if (!S_ISDIR(stbuf.st_mode)) {
+                            goto notfound;
+                        }
+                    }
                 }
             }
         }
@@ -359,21 +377,25 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
 
     // now fall back and look in default library paths, for all extensions
     for (i = 0; i < n_extensions; i++) {
+        ios_trunc(&path, 0);
         const char *ext = extensions[i];
-        path[0] = '\0';
-        snprintf(path, PATHBUF, "%s%s", modname, ext);
-        handle = jl_dlopen(path, flags);
+        ios_printf(&path, "%s%s", modname, ext);
+        ios_putc(0, &path);
+        handle = jl_dlopen(path.buf, flags);
         if (handle && !(flags & JL_RTLD_NOLOAD))
             jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, jl_pathname_for_handle(handle));
         if (handle)
-            return handle;
+            goto success;
 #ifdef _OS_WINDOWS_
         err = GetLastError();
         break; // LoadLibrary already tested the rest
 #else
         // bail out and show the error if file actually exists
-        if (jl_stat(path, (char*)&stbuf) == 0)
-            break;
+        if (jl_stat(path.buf, (char *)&stbuf) == 0) {
+            if (!S_ISDIR(stbuf.st_mode)) {
+                break;
+            }
+        }
 #endif
     }
 
@@ -385,20 +407,38 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
 #else
         const char *reason = dlerror();
 #endif
+        ios_close(&relocated);
+        ios_close(&path);
         jl_errorf("could not load library \"%s\"\n%s", modname, reason);
     }
     handle = NULL;
 
+success:
+    ios_close(&relocated);
+    ios_close(&path);
     return handle;
 }
 
-JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int throw_err) JL_NOTSAFEPOINT
+/*
+ * When search_deps is 1, act like dlsym and search both the library for the
+ * handle and all its dependencies.  Use this option only when compatibility
+ * with dlsym(3) is required, thought this behaviour is not possible on Windows.
+ *
+ * At time of writing, only Base.dlsym() uses search_deps = 1.
+ */
+JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int throw_err, int search_deps) JL_NOTSAFEPOINT
 {
     int symbol_found = 0;
 
     /* First, get the symbol value */
-#ifdef _OS_WINDOWS_
+#if defined(_OS_WINDOWS_)
     *value = GetProcAddress((HMODULE) handle, symbol);
+#elif defined(_OS_DARWIN_)
+    /* When !search_deps and the handle isn't special, force RTLD_FIRST. */
+    if (!search_deps && handle != RTLD_NEXT && handle != RTLD_DEFAULT &&
+        handle != RTLD_SELF && handle != RTLD_MAIN_ONLY)
+        handle = (void *)((uintptr_t)handle | 1);
+    *value = dlsym(handle, symbol);
 #else
     *value = dlsym(handle, symbol);
 #endif
@@ -422,34 +462,56 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
     }
 #endif
 
-    if (!symbol_found && throw_err) {
+#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
+    /*
+     * Unlike GetProcAddress, dlsym will search the dependencies of the given
+     * library, so we must check where the symbol came from.
+     */
+    if (symbol_found && !search_deps && handle != jl_RTLD_DEFAULT_handle) {
+        void *symbol_handle = jl_find_dynamic_library_by_addr(*value, 0, 1);
+        symbol_found = handle == symbol_handle;
+    }
+#endif
+
+    if (!symbol_found) {
+        if (throw_err) {
 #ifdef _OS_WINDOWS_
-        char err[256];
-        win32_formatmessage(GetLastError(), err, sizeof(err));
+            char err[256];
+            win32_formatmessage(GetLastError(), err, sizeof(err));
 #endif
-        jl_errorf("could not load symbol \"%s\":\n%s", symbol, err);
+            jl_errorf("could not load symbol \"%s\":\n%s", symbol, err);
+        }
+        return 0;
     }
-    return symbol_found;
+
+    return 1;
 }
 
 // Look for symbols in internal libraries
 JL_DLLEXPORT const char *jl_dlfind(const char *f_name)
 {
+#ifdef _OS_FREEBSD_
+    // This is a workaround for FreeBSD <= 13.2 which do not have
+    // https://cgit.freebsd.org/src/commit/?id=21a52f99440c9bec7679f3b0c5c9d888901c3694
+    // (See https://github.com/JuliaLang/julia/issues/50846)
+    if (strcmp(f_name, "dl_iterate_phdr") == 0)
+        return NULL;
+#endif
     void * dummy;
-    if (jl_dlsym(jl_libjulia_internal_handle, f_name, &dummy, 0))
+    if (jl_dlsym(jl_libjulia_internal_handle, f_name, &dummy, 0, 0))
         return JL_LIBJULIA_INTERNAL_DL_LIBNAME;
-    if (jl_dlsym(jl_libjulia_handle, f_name, &dummy, 0))
+    if (jl_dlsym(jl_libjulia_handle, f_name, &dummy, 0, 0))
         return JL_LIBJULIA_DL_LIBNAME;
-    if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0))
+    if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0, 0))
         return JL_EXE_LIBNAME;
 #ifdef _OS_WINDOWS_
-    if (jl_dlsym(jl_kernel32_handle, f_name, &dummy, 0))
+    if (jl_dlsym(jl_kernel32_handle, f_name, &dummy, 0, 0))
         return "kernel32";
-    if (jl_dlsym(jl_crtdll_handle, f_name, &dummy, 0)) // Prefer crtdll over ntdll
+    if (jl_dlsym(jl_crtdll_handle, f_name, &dummy, 0, 0)) // Prefer crtdll over ntdll
         return jl_crtdll_basename;
-    if (jl_dlsym(jl_ntdll_handle, f_name, &dummy, 0))
+    if (jl_dlsym(jl_ntdll_handle, f_name, &dummy, 0, 0))
         return "ntdll";
-    if (jl_dlsym(jl_winsock_handle, f_name, &dummy, 0))
+    if (jl_dlsym(jl_winsock_handle, f_name, &dummy, 0, 0))
         return "ws2_32";
 #endif
     // additional common libraries (libc?) could be added here, but in general,
diff --git a/src/engine.cpp b/src/engine.cpp
new file mode 100644
index 0000000000000..858f37b55e85e
--- /dev/null
+++ b/src/engine.cpp
@@ -0,0 +1,153 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <mutex>
+#include <condition_variable>
+#include <llvm/ADT/DenseMap.h>
+#include <llvm/ADT/DenseSet.h>
+#include <llvm/ADT/SmallVector.h>
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+
+using namespace llvm;
+
+struct ReservationInfo {
+    int16_t tid = 0;
+    jl_code_instance_t *ci = nullptr;
+};
+
+struct InferKey {
+    jl_method_instance_t *mi = nullptr;
+    jl_value_t *owner = nullptr;
+};
+
+template<> struct llvm::DenseMapInfo<InferKey> {
+  using FirstInfo = DenseMapInfo<jl_method_instance_t*>;
+  using SecondInfo = DenseMapInfo<jl_value_t*>;
+
+  static inline InferKey getEmptyKey() {
+    return InferKey{FirstInfo::getEmptyKey(),
+                    SecondInfo::getEmptyKey()};
+  }
+
+  static inline InferKey getTombstoneKey() {
+    return InferKey{FirstInfo::getTombstoneKey(),
+                    SecondInfo::getTombstoneKey()};
+  }
+
+  static unsigned getHashValue(const InferKey& PairVal) {
+    return detail::combineHashValue(FirstInfo::getHashValue(PairVal.mi),
+                                    SecondInfo::getHashValue(PairVal.owner));
+  }
+
+  static bool isEqual(const InferKey &LHS, const InferKey &RHS) {
+    return LHS.mi == RHS.mi && LHS.owner == RHS.owner;
+  }
+};
+
+static std::mutex engine_lock; // n.b. this lock is only ever held briefly
+static std::condition_variable engine_wait; // but it may be waiting a while in this state
+// map from MethodInstance to threadid that owns it currently for inference
+static DenseMap<InferKey, ReservationInfo> Reservations;
+// vector of which threads are blocked and which lease they need
+static SmallVector<InferKey, 0> Awaiting; // (this could be merged into ptls also)
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner)
+{
+    jl_task_t *ct = jl_current_task;
+    ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph
+    jl_code_instance_t *ci = jl_new_codeinst_uninit(m, owner); // allocate a placeholder
+    JL_GC_PUSH1(&ci);
+    auto tid = jl_atomic_load_relaxed(&ct->tid);
+    if (([tid, m, owner, ci] () -> bool { // necessary scope block / lambda for unique_lock
+            jl_unique_gcsafe_lock lock(engine_lock);
+            InferKey key{m, owner};
+            if ((signed)Awaiting.size() < tid + 1)
+                Awaiting.resize(tid + 1);
+            while (1) {
+                auto record = Reservations.find(key);
+                if (record == Reservations.end()) {
+                    Reservations[key] = ReservationInfo{tid, ci};
+                    return false;
+                }
+                // before waiting, need to run deadlock/cycle detection
+                // there is a cycle if the thread holding our lease is blocked
+                // and waiting for (transitively) any lease that is held by this thread
+                auto wait_tid = record->second.tid;
+                while (1) {
+                    if (wait_tid == tid)
+                        return true;
+                    if ((signed)Awaiting.size() <= wait_tid)
+                        break; // no cycle, since it is running (and this should be unreachable)
+                    auto key2 = Awaiting[wait_tid];
+                    if (key2.mi == nullptr)
+                        break; // no cycle, since it is running
+                    auto record2 = Reservations.find(key2);
+                    if (record2 == Reservations.end())
+                        break; // no cycle, since it is about to resume
+                    assert(wait_tid != record2->second.tid);
+                    wait_tid = record2->second.tid;
+                }
+                Awaiting[tid] = key;
+                lock.wait(engine_wait);
+                Awaiting[tid] = InferKey{};
+            }
+        })())
+        ct->ptls->engine_nqueued--;
+    JL_GC_POP();
+    return ci;
+}
+
+int jl_engine_hasreserved(jl_method_instance_t *m, jl_value_t *owner)
+{
+    jl_task_t *ct = jl_current_task;
+    InferKey key = {m, owner};
+    std::unique_lock lock(engine_lock);
+    auto record = Reservations.find(key);
+    return record != Reservations.end() && record->second.tid == jl_atomic_load_relaxed(&ct->tid);
+}
+
+STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_MARKED) != 0;
+}
+
+void jl_engine_sweep(jl_ptls_t *gc_all_tls_states)
+{
+    std::unique_lock lock(engine_lock);
+    bool any = false;
+    for (auto I = Reservations.begin(); I != Reservations.end(); ++I) {
+        jl_code_instance_t *ci = I->second.ci;
+        if (!gc_marked(jl_astaggedvalue(ci)->bits.gc)) {
+            auto tid = I->second.tid;
+            Reservations.erase(I);
+            jl_ptls_t ptls2 = gc_all_tls_states[tid];
+            ptls2->engine_nqueued--;
+            any = true;
+        }
+    }
+    if (any)
+        engine_wait.notify_all();
+}
+
+void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src)
+{
+    jl_task_t *ct = jl_current_task;
+    std::unique_lock lock(engine_lock);
+    auto record = Reservations.find(InferKey{jl_get_ci_mi(ci), ci->owner});
+    if (record == Reservations.end() || record->second.ci != ci)
+        return;
+    assert(jl_atomic_load_relaxed(&ct->tid) == record->second.tid);
+    ct->ptls->engine_nqueued--; // re-enables finalizers, but doesn't immediately try to run them
+    Reservations.erase(record);
+    engine_wait.notify_all();
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/features_x86.h b/src/features_x86.h
index 08f979df546b7..b6e2b23985b4f 100644
--- a/src/features_x86.h
+++ b/src/features_x86.h
@@ -5,6 +5,13 @@
 #else
 #define JL_X86_64ONLY_VER(x) x
 #endif
+// The code is similar to what is here so the bits can be used as reference
+// https://github.com/llvm/llvm-project/blob/3f7905733820851bc4f65cb4af693c3101cbf20d/llvm/lib/TargetParser/Host.cpp#L1257
+
+// The way the bits here work is an index into the features array. This is a bit array
+// The index works as follows:
+// 32*i + j where i is the index into the array and j is the bit in the array.
+// There is a reference to what each index corresponds to in _get_host_cpu
 
 // X86 features definition
 // EAX=1: ECX
@@ -45,15 +52,15 @@ JL_FEATURE_DEF(avx512ifma, 32 * 2 + 21, 0)
 // JL_FEATURE_DEF(pcommit, 32 * 2 + 22, 0) // Deprecated
 JL_FEATURE_DEF(clflushopt, 32 * 2 + 23, 0)
 JL_FEATURE_DEF(clwb, 32 * 2 + 24, 0)
-JL_FEATURE_DEF(avx512pf, 32 * 2 + 26, 0)
-JL_FEATURE_DEF(avx512er, 32 * 2 + 27, 0)
+// JL_FEATURE_DEF(avx512pf, 32 * 2 + 26, 0) // Deprecated in LLVM 19
+// JL_FEATURE_DEF(avx512er, 32 * 2 + 27, 0) // Deprecated in LLVM 19
 JL_FEATURE_DEF(avx512cd, 32 * 2 + 28, 0)
 JL_FEATURE_DEF(sha, 32 * 2 + 29, 0)
 JL_FEATURE_DEF(avx512bw, 32 * 2 + 30, 0)
 JL_FEATURE_DEF(avx512vl, 32 * 2 + 31, 0)
 
 // EAX=7,ECX=0: ECX
-JL_FEATURE_DEF(prefetchwt1, 32 * 3 + 0, 0)
+// JL_FEATURE_DEF(prefetchwt1, 32 * 3 + 0, 0) // Deprecated in LLVM 19
 JL_FEATURE_DEF(avx512vbmi, 32 * 3 + 1, 0)
 JL_FEATURE_DEF(pku, 32 * 3 + 4, 0) // ospke
 JL_FEATURE_DEF(waitpkg, 32 * 3 + 5, 0)
@@ -79,6 +86,7 @@ JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 0)
 JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
 JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
 JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
+// JL_FEATURE_DEF(ibt, 32 * 4 + 20, 0)
 JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
 JL_FEATURE_DEF(avx512fp16, 32 * 4 + 23, 140000)
 JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
@@ -110,10 +118,28 @@ JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
 JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)
 
 // EAX=7,ECX=1: EAX
+JL_FEATURE_DEF(sha512, 32 * 9 + 0, 170000)
+JL_FEATURE_DEF(sm3, 32 * 9 + 1, 170000)
+JL_FEATURE_DEF(sm4, 32 * 9 + 2, 170000)
+JL_FEATURE_DEF(raoint, 32 * 9 + 3, 170000)
 JL_FEATURE_DEF(avxvnni, 32 * 9 + 4, 120000)
 JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 0)
+JL_FEATURE_DEF(cmpccxadd, 32 * 9 + 7, 160000)
+JL_FEATURE_DEF_NAME(amx_fp16, 32 * 9 + 21, 160000, "amx-fp16")
+JL_FEATURE_DEF(hreset, 32 * 9 + 22, 160000)
+JL_FEATURE_DEF(avxifma, 32 * 9 + 23, 160000)
+
+// EAX=7,ECX=1: EBX
+JL_FEATURE_DEF(avxvnniint8, 32 * 10 + 4, 160000)
+JL_FEATURE_DEF(avxneconvert, 32 * 10 + 5, 160000)
+JL_FEATURE_DEF_NAME(amx_complex, 32 * 10 + 8, 170000, "amx-complex")
+JL_FEATURE_DEF(avxvnniint16, 32 * 10 + 10, 170000)
+JL_FEATURE_DEF(prefetchi, 32 * 10 + 14, 160000)
+JL_FEATURE_DEF(usermsr, 32 * 10 + 15, 170000)
+// JL_FEATURE_DEF(avx10, 32 * 10 + 19, 170000) // TODO: What to do about avx10 and it's mess?
+// JL_FEATURE_DEF(apxf, 32 * 10 + 21, 190000)
 
 // EAX=0x14,ECX=0: EBX
-JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)
+JL_FEATURE_DEF(ptwrite, 32 * 11 + 4, 0)
 
 #undef JL_X86_64ONLY_VER
diff --git a/src/flisp/Makefile b/src/flisp/Makefile
index 17292d301115b..a5449fd084a3e 100644
--- a/src/flisp/Makefile
+++ b/src/flisp/Makefile
@@ -3,10 +3,10 @@ JULIAHOME := $(abspath $(SRCDIR)/../..)
 BUILDDIR := .
 include $(JULIAHOME)/Make.inc
 
-JCFLAGS += $(CFLAGS)
-JCXXFLAGS += $(CXXFLAGS)
-JCPPFLAGS += $(CPPFLAGS)
-JLDFLAGS += $(LDFLAGS)
+JCFLAGS_COMMON += $(CFLAGS) $(JL_CFLAGS)
+JCXXFLAGS_COMMON += $(CXXFLAGS) $(JL_CXXFLAGS)
+JCPPFLAGS_COMMON += $(CPPFLAGS) $(JL_CPPFLAGS)
+JLDFLAGS += $(LDFLAGS) $(JL_LDFLAGS)
 
 NAME := flisp
 EXENAME := $(NAME)
@@ -47,31 +47,31 @@ LIBS += $(LIBUTF8PROC)
 endif
 
 
-FLAGS := -I$(LLTSRCDIR) $(JCFLAGS) $(HFILEDIRS:%=-I%) \
-        -I$(LIBUV_INC) -I$(UTF8PROC_INC) -I$(build_includedir) $(LIBDIRS:%=-L%) \
+FLAGS_COMMON := -I$(LLTSRCDIR) $(HFILEDIRS:%=-I%) \
+        -I$(LIBUV_INC) -I$(UTF8PROC_INC) -I$(build_includedir) \
         -DJL_LIBRARY_EXPORTS_INTERNAL -DUTF8PROC_EXPORTS
 ifneq ($(OS), emscripten)
-FLAGS += -DUSE_COMPUTED_GOTO
+FLAGS_COMMON += -DUSE_COMPUTED_GOTO
 endif
-FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
-FLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
+FLAGS_COMMON += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
+FLAGS_COMMON += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
 
-DEBUGFLAGS += $(FLAGS)
-SHIPFLAGS += $(FLAGS)
+SHIPFLAGS_COMMON  += $(FLAGS_COMMON)
+DEBUGFLAGS_COMMON += $(FLAGS_COMMON)
 
 default: release
 
-release: $(BUILDDIR)/$(EXENAME)$(EXE)
+release: $(BUILDDIR)/$(EXENAME)$(EXE) regenerate-compile_commands
 
-debug: $(BUILDDIR)/$(EXENAME)-debug$(EXE)
+debug: $(BUILDDIR)/$(EXENAME)-debug$(EXE) regenerate-compile_commands
 
 $(BUILDDIR):
 	mkdir -p $(BUILDDIR)
 
 $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(DEBUGFLAGS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@)
 
 FLISP_SRCS := $(addprefix $(SRCDIR)/,flisp.c cvalues.c types.c flisp.h print.c read.c equal.c)
 FLMAIN_SRCS := $(addprefix $(SRCDIR)/,flmain.c flisp.h)
@@ -111,13 +111,13 @@ $(BUILDDIR)/$(EXENAME)$(EXE): $(OBJS) $(LIBFILES_release) $(BUILDDIR)/$(LIBTARGE
 $(BUILDDIR)/host/Makefile:
 	mkdir -p $(BUILDDIR)/host
 	@# add Makefiles to the build directories for convenience (pointing back to the source location of each)
-	@echo '# -- This file is automatically generated in julia/src/flisp/Makefile -- #' > $@
-	@echo 'BUILDDIR=$(BUILDDIR)/host' >> $@
-	@echo 'BUILDING_HOST_TOOLS=1' >> $@
-	@echo 'include $(SRCDIR)/Makefile' >> $@
+	@printf "%s\n" '# -- This file is automatically generated in julia/src/flisp/Makefile -- #' > $@
+	@printf "%s\n" 'BUILDDIR=$(BUILDDIR)/host' >> $@
+	@printf "%s\n" 'BUILDING_HOST_TOOLS=1' >> $@
+	@printf "%s\n" 'include $(SRCDIR)/Makefile' >> $@
 
 $(BUILDDIR)/host/$(EXENAME): $(BUILDDIR)/host/Makefile | ${BUILDDIR}/host/flisp.boot
-	make -C $(BUILDDIR)/host $(EXENAME)
+	$(MAKE) -C $(BUILDDIR)/host $(EXENAME)
 
 
 $(BUILDDIR)/host/flisp.boot: $(SRCDIR)/flisp.boot | $(BUILDDIR)/host/Makefile
@@ -133,12 +133,50 @@ endif
 test:
 	$(call spawn,./$(EXENAME)$(EXE)) unittest.lsp
 
+# Common flag patterns for all clang tooling (clang-sa, clang-tidy, compile-database)
+CLANG_TOOLING_C_FLAGS = $(CLANGSA_FLAGS) $(DEBUGFLAGS_CLANG) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG)
+
+# Included files in flisp
+INCLUDED_FLISP_FILES := flisp.c:cvalues.c flisp.c:types.c flisp.c:print.c flisp.c:read.c flisp.c:equal.c
+
+# Compilation database generation
+.PHONY: regenerate-compile_commands
+regenerate-compile_commands:
+	TMPFILE=$$(mktemp $(abspath $(BUILDDIR)/compile_commands.json.XXXXXX)); \
+	{ \
+		CLANG_TOOLING_C_FLAGS="$$($(JULIAHOME)/contrib/escape_json.sh clang $(CLANG_TOOLING_C_FLAGS))"; \
+		echo "["; \
+		first=true; \
+		for src in $(SRCS) flmain.c; do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			cmd="$${CLANG_TOOLING_C_FLAGS}, \"$$src\""; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$src" "$$cmd"; \
+		done; \
+		for included_pair in $(INCLUDED_FLISP_FILES); do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			including_file=$${included_pair%%:*}; \
+			included_file=$${included_pair##*:}; \
+			cmd="$${CLANG_TOOLING_C_FLAGS}, \"$$including_file\""; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$included_file" "$$cmd"; \
+		done; \
+		echo "]"; \
+	} > $$TMPFILE; \
+	if ! cmp -s $$TMPFILE $(BUILDDIR)/compile_commands.json; then \
+		mv $$TMPFILE $(BUILDDIR)/compile_commands.json; \
+	else \
+		rm -f $$TMPFILE; \
+	fi
+
+compile-database: regenerate-compile_commands
+	@echo "Compilation database created for src/flisp"
+
 clean:
 	rm -f $(BUILDDIR)/*.o
 	rm -f $(BUILDDIR)/*.dbg.obj
 	rm -f $(BUILDDIR)/*.a
 	rm -f $(BUILDDIR)/$(EXENAME)$(EXE)
 	rm -f $(BUILDDIR)/$(EXENAME)-debug$(EXE)
+	rm -f $(BUILDDIR)/compile_commands.json*
 	rm -f $(BUILDDIR)/host/*
 
-.PHONY: flisp-deps
+.PHONY: flisp-deps compile-database
diff --git a/src/flisp/compiler.lsp b/src/flisp/compiler.lsp
index fdc516dce3ea8..e5a79e8fee6bb 100644
--- a/src/flisp/compiler.lsp
+++ b/src/flisp/compiler.lsp
@@ -864,7 +864,7 @@
 
 		 (else #f)))))))
 
-; From SRFI 89 by Marc Feeley (http://srfi.schemers.org/srfi-89/srfi-89.html)
+; From SRFI 89 by Marc Feeley (https://srfi.schemers.org/srfi-89/srfi-89.html)
 ; Copyright (C) Marc Feeley 2006. All Rights Reserved.
 ;
 ; "alist" is a list of pairs of the form "(keyword . value)"
diff --git a/src/flisp/cvalues.c b/src/flisp/cvalues.c
index a5635c238ba3c..749b8802dfe82 100644
--- a/src/flisp/cvalues.c
+++ b/src/flisp/cvalues.c
@@ -101,7 +101,7 @@ void cv_autorelease(fl_context_t *fl_ctx, cvalue_t *cv)
     autorelease(fl_ctx, cv);
 }
 
-static value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
+value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
 {
     cprim_t *pcp = (cprim_t*)alloc_words(fl_ctx, CPRIM_NWORDS-1+NWORDS(sz));
     pcp->type = type;
diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h
index b031e456cd3fe..f8dd1cfd81ed0 100644
--- a/src/flisp/flisp.h
+++ b/src/flisp/flisp.h
@@ -158,7 +158,7 @@ value_t fl_cons(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT;
 value_t fl_list2(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT;
 value_t fl_listn(fl_context_t *fl_ctx, size_t n, ...) JL_NOTSAFEPOINT;
 value_t symbol(fl_context_t *fl_ctx, const char *str) JL_NOTSAFEPOINT;
-char *symbol_name(fl_context_t *fl_ctx, value_t v);
+char *symbol_name(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
 int fl_is_keyword_name(const char *str, size_t len);
 value_t alloc_vector(fl_context_t *fl_ctx, size_t n, int init);
 size_t llength(value_t v);
@@ -328,6 +328,7 @@ typedef float    fl_float_t;
 typedef value_t (*builtin_t)(fl_context_t*, value_t*, uint32_t);
 
 value_t cvalue(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
+value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
 value_t cvalue_no_finalizer(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
 void add_finalizer(fl_context_t *fl_ctx, cvalue_t *cv);
 void cv_autorelease(fl_context_t *fl_ctx, cvalue_t *cv);
diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c
index f29e3972755c5..c39c2edfe0f37 100644
--- a/src/flisp/julia_extensions.c
+++ b/src/flisp/julia_extensions.c
@@ -76,7 +76,7 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat)
              wc != 0x233f &&  // notslash
              wc != 0x00a6) || // broken bar
 
-            // math symbol (category Sm) whitelist
+            // math symbol (category Sm) allowlist
             (wc >= 0x2140 && wc <= 0x2a1c &&
              ((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄
               wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿
@@ -130,6 +130,9 @@ JL_DLLEXPORT int jl_id_start_char(uint32_t wc)
         return 1;
     if (wc < 0xA1 || wc > 0x10ffff)
         return 0;
+    // "Rightwards Arrow with Lower Hook"
+    if (wc == 0x1f8b2)
+    	return 1;
     return is_wc_cat_id_start(wc, utf8proc_category((utf8proc_int32_t) wc));
 }
 
@@ -147,7 +150,9 @@ JL_DLLEXPORT int jl_id_char(uint32_t wc)
         cat == UTF8PROC_CATEGORY_SK || cat == UTF8PROC_CATEGORY_ME ||
         cat == UTF8PROC_CATEGORY_NO ||
         // primes (single, double, triple, their reverses, and quadruple)
-        (wc >= 0x2032 && wc <= 0x2037) || (wc == 0x2057))
+        (wc >= 0x2032 && wc <= 0x2037) || (wc == 0x2057) ||
+        // "Rightwards Arrow with Lower Hook"
+        wc == 0x1f8b2)
         return 1;
     return 0;
 }
@@ -405,7 +410,7 @@ value_t fl_string_only_julia_char(fl_context_t *fl_ctx, value_t *args, uint32_t
     uint8_t *s = (uint8_t*)cvalue_data(args[0]);
     size_t len = cv_len((cvalue_t*)ptr(args[0]));
     uint32_t u = _string_only_julia_char(s, len);
-    if (u == (uint32_t)-1)
+    if (u == UINT32_MAX)
         return fl_ctx->F;
     return fl_list2(fl_ctx, fl_ctx->jl_char_sym, mk_uint32(fl_ctx, u));
 }
diff --git a/src/flisp/print.c b/src/flisp/print.c
index 2b20d0d98b225..a6f633c2e6701 100644
--- a/src/flisp/print.c
+++ b/src/flisp/print.c
@@ -518,7 +518,7 @@ static void print_string(fl_context_t *fl_ctx, ios_t *f, char *str, size_t sz)
     }
     else {
         while (i < sz) {
-            size_t n = u8_escape(buf, sizeof(buf), str, &i, sz, 1, 0);
+            size_t n = u8_escape(buf, sizeof(buf), str, &i, sz, "\"", 0);
             outsn(fl_ctx, buf, f, n-1);
         }
     }
diff --git a/src/flisp/read.c b/src/flisp/read.c
index 9a480e0536c7a..7a6039323a988 100644
--- a/src/flisp/read.c
+++ b/src/flisp/read.c
@@ -303,7 +303,7 @@ static uint32_t peek(fl_context_t *fl_ctx)
             fl_ctx->readtokval = fixnum(x);
         }
         else if (c == '!') {
-            // #! single line comment for shbang script support
+            // #! single line comment for shebang script support
             do {
                 ch = ios_getc(readF(fl_ctx));
             } while (ch != IOS_EOF && (char)ch != '\n');
diff --git a/src/flisp/table.c b/src/flisp/table.c
index 1d8aed358e88d..8836c93f81513 100644
--- a/src/flisp/table.c
+++ b/src/flisp/table.c
@@ -102,7 +102,7 @@ value_t fl_table(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
         else
             k = arg;
     }
-    if (h->table != &h->_space[0]) {
+    if (cnt <= HT_N_INLINE && h->table != &h->_space[0]) {
         // We expected to use the inline table, but we ended up outgrowing it.
         // Make sure to register the finalizer.
         add_finalizer(fl_ctx, (cvalue_t*)ptr(nt));
diff --git a/src/flisp/unittest.lsp b/src/flisp/unittest.lsp
index 584d5c81225e8..16774a97e3233 100644
--- a/src/flisp/unittest.lsp
+++ b/src/flisp/unittest.lsp
@@ -267,4 +267,23 @@
 (assert (equal? `(a `(b c)) '(a (quasiquote (b c)))))
 (assert (equal? ````x '```x))
 
+;; make many initialized tables large enough not to be stored in-line
+(for 1 100
+     (lambda (i)
+       (table eq?      2      eqv?     2
+              equal?   2      atom?    1
+              not      1      null?    1
+              boolean? 1      symbol?  1
+              number?  1      bound?   1
+              pair?    1      builtin? 1
+              vector?  1      fixnum?  1
+              cons     2      car      1
+              cdr      1      set-car! 2
+              set-cdr! 2      =        2
+              <        2      compare  2
+              aref     2      aset!    3
+              div0     2)))
+;; now allocate enough to trigger GC
+(for 1 8000000 (lambda (i) (cons 1 2)))
+
 #t
diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp
index 1bcbeb2189f5f..cbaa6f15012c7 100644
--- a/src/gc-alloc-profiler.cpp
+++ b/src/gc-alloc-profiler.cpp
@@ -3,13 +3,13 @@
 #include "gc-alloc-profiler.h"
 
 #include "julia_internal.h"
-#include "gc.h"
+
+#include "llvm/ADT/SmallVector.h"
 
 #include <string>
-#include <vector>
 
 using std::string;
-using std::vector;
+using llvm::SmallVector;
 
 struct jl_raw_backtrace_t {
     jl_bt_element_t *data;
@@ -27,17 +27,17 @@ struct jl_raw_alloc_t {
 // == These structs define the global singleton profile buffer that will be used by
 // callbacks to store profile results. ==
 struct jl_per_thread_alloc_profile_t {
-    vector<jl_raw_alloc_t> allocs;
+    SmallVector<jl_raw_alloc_t, 0> allocs;
 };
 
 struct jl_alloc_profile_t {
     double sample_rate;
 
-    vector<jl_per_thread_alloc_profile_t> per_thread_profiles;
+    SmallVector<jl_per_thread_alloc_profile_t, 0> per_thread_profiles;
 };
 
 struct jl_combined_results {
-    vector<jl_raw_alloc_t> combined_allocs;
+    SmallVector<jl_raw_alloc_t, 0> combined_allocs;
 };
 
 // == Global variables manipulated by callbacks ==
@@ -138,7 +138,8 @@ void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t
 
     auto& profile = global_profile.per_thread_profiles[thread_id];
 
-    auto sample_val = double(rand()) / double(RAND_MAX);
+    jl_ptls_t ptls = jl_current_task->ptls;
+    auto sample_val = double(cong(UINT64_MAX, &ptls->rngseed)) / double(UINT64_MAX);
     auto should_record = sample_val <= global_profile.sample_rate;
     if (!should_record) {
         return;
diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h
index 3fd8bf4388a0a..fcd8e45caa2d8 100644
--- a/src/gc-alloc-profiler.h
+++ b/src/gc-alloc-profiler.h
@@ -35,6 +35,7 @@ void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t
 
 extern int g_alloc_profile_enabled;
 
+// This should only be used from _deprecated_ code paths. We shouldn't see UNKNOWN anymore.
 #define jl_gc_unknown_type_tag ((jl_datatype_t*)0xdeadaa03)
 
 static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *typ) JL_NOTSAFEPOINT {
diff --git a/src/gc-common.c b/src/gc-common.c
new file mode 100644
index 0000000000000..811e441960eb1
--- /dev/null
+++ b/src/gc-common.c
@@ -0,0 +1,724 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "gc-common.h"
+#include "julia.h"
+#include "julia_atomics.h"
+#include "julia_gcext.h"
+#include "julia_assert.h"
+#include "threading.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =========================================================================== //
+// GC Metrics
+// =========================================================================== //
+
+jl_gc_num_t gc_num = {0};
+
+JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
+{
+    return gc_num.total_time;
+}
+
+// =========================================================================== //
+// GC Callbacks
+// =========================================================================== //
+
+jl_gc_callback_list_t *gc_cblist_root_scanner;
+jl_gc_callback_list_t *gc_cblist_task_scanner;
+jl_gc_callback_list_t *gc_cblist_pre_gc;
+jl_gc_callback_list_t *gc_cblist_post_gc;
+jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
+jl_gc_callback_list_t *gc_cblist_notify_external_free;
+jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
+
+static void jl_gc_register_callback(jl_gc_callback_list_t **list,
+        jl_gc_cb_func_t func)
+{
+    while (*list != NULL) {
+        if ((*list)->func == func)
+            return;
+        list = &((*list)->next);
+    }
+    *list = (jl_gc_callback_list_t *)malloc_s(sizeof(jl_gc_callback_list_t));
+    (*list)->next = NULL;
+    (*list)->func = func;
+}
+
+static void jl_gc_deregister_callback(jl_gc_callback_list_t **list,
+        jl_gc_cb_func_t func)
+{
+    while (*list != NULL) {
+        if ((*list)->func == func) {
+            jl_gc_callback_list_t *tmp = *list;
+            (*list) = (*list)->next;
+            free(tmp);
+            return;
+        }
+        list = &((*list)->next);
+    }
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+}
+
+// =========================================================================== //
+// malloc wrappers, aligned allocation
+// =========================================================================== //
+
+#if defined(_OS_WINDOWS_)
+// helper function based partly on wine msvcrt80+ heap.c
+// but with several fixes to improve the correctness of the computation and remove unnecessary parameters
+#define SAVED_PTR(x) ((void *)((DWORD_PTR)((char *)x - sizeof(void *)) & \
+                               ~(sizeof(void *) - 1)))
+static size_t _jl_aligned_msize(void *p)
+{
+    void *alloc_ptr = *(void**)SAVED_PTR(p);
+    return _msize(alloc_ptr) - ((char*)p - (char*)alloc_ptr);
+}
+#undef SAVED_PTR
+#endif
+
+size_t memory_block_usable_size(void *p, int isaligned) JL_NOTSAFEPOINT
+{
+#if defined(_OS_WINDOWS_)
+    if (isaligned)
+        return _jl_aligned_msize(p);
+    else
+        return _msize(p);
+#elif defined(_OS_DARWIN_)
+    return malloc_size(p);
+#else
+    return malloc_usable_size(p);
+#endif
+}
+
+// =========================================================================== //
+// Finalization
+// =========================================================================== //
+
+jl_mutex_t finalizers_lock;
+arraylist_t finalizer_list_marked;
+arraylist_t to_finalize;
+JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
+
+void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
+{
+    arraylist_push(&to_finalize, o);
+    arraylist_push(&to_finalize, f);
+    // doesn't need release, since we'll keep checking (on the reader) until we see the work and
+    // release our lock, and that will have a release barrier by then
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
+}
+
+void run_finalizer(jl_task_t *ct, void *o, void *ff)
+{
+    int ptr_finalizer = gc_ptr_tag(o, 1);
+    o = gc_ptr_clear_tag(o, 3);
+    if (ptr_finalizer) {
+        ((void (*)(void*))ff)((void*)o);
+        return;
+    }
+    JL_TRY {
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1);
+        ct->world_age = last_age;
+    }
+    JL_CATCH {
+        jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
+        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+        jlbacktrace(); // written to STDERR_FILENO
+    }
+}
+
+// if `need_sync` is true, the `list` is the `finalizers` list of another
+// thread and we need additional synchronizations
+static void finalize_object(arraylist_t *list, jl_value_t *o,
+                            arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
+{
+    // The acquire load makes sure that the first `len` objects are valid.
+    // If `need_sync` is true, all mutations of the content should be limited
+    // to the first `oldlen` elements and no mutation is allowed after the
+    // new length is published with the `cmpxchg` at the end of the function.
+    // This way, the mutation should not conflict with the owning thread,
+    // which only writes to locations later than `len`
+    // and will not resize the buffer without acquiring the lock.
+    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
+    size_t oldlen = len;
+    void **items = list->items;
+    size_t j = 0;
+    for (size_t i = 0; i < len; i += 2) {
+        void *v = items[i];
+        int move = 0;
+        if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
+            void *f = items[i + 1];
+            move = 1;
+            arraylist_push(copied_list, v);
+            arraylist_push(copied_list, f);
+        }
+        if (move || __unlikely(!v)) {
+            // remove item
+        }
+        else {
+            if (j < i) {
+                items[j] = items[i];
+                items[j+1] = items[i+1];
+            }
+            j += 2;
+        }
+    }
+    len = j;
+    if (oldlen == len)
+        return;
+    if (need_sync) {
+        // The memset needs to be unconditional since the thread might have
+        // already read the length.
+        // The `memset` (like any other content mutation) has to be done
+        // **before** the `cmpxchg` which publishes the length.
+        memset(&items[len], 0, (oldlen - len) * sizeof(void*));
+        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
+    }
+    else {
+        list->len = len;
+    }
+}
+
+// The first two entries are assumed to be empty and the rest are assumed to
+// be pointers to `jl_value_t` objects
+static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
+{
+    void **items = list->items;
+    items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
+    items[1] = ct->gcstack;
+    ct->gcstack = (jl_gcframe_t*)items;
+}
+
+// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
+// to be hold for the current thread and will release the lock when the
+// function returns.
+static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
+{
+    // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
+    // of `finalizer`) in a finalizer:
+    uint8_t sticky = ct->sticky;
+    // empty out the first two entries for the GC frame
+    arraylist_push(list, list->items[0]);
+    arraylist_push(list, list->items[1]);
+    jl_gc_push_arraylist(ct, list);
+    void **items = list->items;
+    size_t len = list->len;
+    JL_UNLOCK_NOGC(&finalizers_lock);
+    // run finalizers in reverse order they were added, so lower-level finalizers run last
+    for (size_t i = len-4; i >= 2; i -= 2)
+        run_finalizer(ct, items[i], items[i + 1]);
+    // first entries were moved last to make room for GC frame metadata
+    run_finalizer(ct, items[len-2], items[len-1]);
+    // matches the jl_gc_push_arraylist above
+    JL_GC_POP();
+    ct->sticky = sticky;
+}
+
+static uint64_t finalizer_rngState[JL_RNG_SIZE];
+
+void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
+{
+    jl_rng_split(finalizer_rngState, jl_current_task->rngState);
+}
+
+void run_finalizers(jl_task_t *ct, int finalizers_thread)
+{
+    // Racy fast path:
+    // The race here should be OK since the race can only happen if
+    // another thread is writing to it with the lock held. In such case,
+    // we don't need to run pending finalizers since the writer thread
+    // will flush it.
+    if (to_finalize.len == 0)
+        return;
+    JL_LOCK_NOGC(&finalizers_lock);
+    if (to_finalize.len == 0) {
+        JL_UNLOCK_NOGC(&finalizers_lock);
+        return;
+    }
+    arraylist_t copied_list;
+    memcpy(&copied_list, &to_finalize, sizeof(copied_list));
+    if (to_finalize.items == to_finalize._space) {
+        copied_list.items = copied_list._space;
+    }
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
+    arraylist_new(&to_finalize, 0);
+
+    uint64_t save_rngState[JL_RNG_SIZE];
+    memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState));
+    jl_rng_split(ct->rngState, finalizer_rngState);
+
+    // This releases the finalizers lock.
+    int8_t was_in_finalizer = ct->ptls->in_finalizer;
+    ct->ptls->in_finalizer = !finalizers_thread;
+    jl_gc_run_finalizers_in_list(ct, &copied_list);
+    ct->ptls->in_finalizer = was_in_finalizer;
+    arraylist_free(&copied_list);
+
+    memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
+}
+
+JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
+{
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0 && ptls->engine_nqueued == 0) {
+        run_finalizers(ct, 0);
+    }
+}
+
+JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
+{
+    if (ptls == NULL)
+        ptls = jl_current_task->ptls;
+    return ptls->finalizers_inhibited;
+}
+
+JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    ptls->finalizers_inhibited++;
+}
+
+JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
+{
+    jl_task_t *ct = jl_current_task;
+#ifdef NDEBUG
+    ct->ptls->finalizers_inhibited--;
+#else
+    jl_gc_enable_finalizers(ct, 1);
+#endif
+}
+
+JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
+{
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    int old_val = ptls->finalizers_inhibited;
+    int new_val = old_val + (on ? -1 : 1);
+    if (new_val < 0) {
+        JL_TRY {
+            jl_error(""); // get a backtrace
+        }
+        JL_CATCH {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
+            // Only print the backtrace once, to avoid spamming the logs
+            static _Atomic(int) backtrace_printed = 0;
+            if (jl_atomic_load_relaxed(&backtrace_printed) == 0) {
+              if (jl_atomic_exchange_relaxed(&backtrace_printed, 1) == 0) {
+                  jlbacktrace(); // written to STDERR_FILENO
+              }
+            }
+        }
+        return;
+    }
+    ptls->finalizers_inhibited = new_val;
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
+        jl_gc_run_pending_finalizers(ct);
+    }
+}
+
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
+{
+    return jl_current_task->ptls->in_finalizer;
+}
+
+static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
+{
+    void **items = flist->items;
+    size_t len = flist->len;
+    for(size_t i = 0; i < len; i+=2) {
+        void *v = items[i];
+        void *f = items[i + 1];
+        if (__unlikely(!v))
+            continue;
+        schedule_finalization(v, f);
+    }
+    flist->len = 0;
+}
+
+void jl_gc_run_all_finalizers(jl_task_t *ct)
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    // this is called from `jl_atexit_hook`; threads could still be running
+    // so we have to guard the finalizers' lists
+    JL_LOCK_NOGC(&finalizers_lock);
+    schedule_all_finalizers(&finalizer_list_marked);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            schedule_all_finalizers(&ptls2->finalizers);
+    }
+    // unlock here because `run_finalizers` locks this
+    JL_UNLOCK_NOGC(&finalizers_lock);
+    run_finalizers(ct, 1);
+}
+
+void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
+{
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_STATE_UNSAFE);
+    arraylist_t *a = &ptls->finalizers;
+    // This acquire load and the release store at the end are used to
+    // synchronize with `finalize_object` on another thread. Apart from the GC,
+    // which is blocked by entering a unsafe region, there might be only
+    // one other thread accessing our list in `finalize_object`
+    // (only one thread since it needs to acquire the finalizer lock).
+    // Similar to `finalize_object`, all content mutation has to be done
+    // between the acquire and the release of the length.
+    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
+    if (__unlikely(oldlen + 2 > a->max)) {
+        JL_LOCK_NOGC(&finalizers_lock);
+        // `a->len` might have been modified.
+        // Another possibility is to always grow the array to `oldlen + 2` but
+        // it's simpler this way and uses slightly less memory =)
+        oldlen = a->len;
+        arraylist_grow(a, 2);
+        a->len = oldlen;
+        JL_UNLOCK_NOGC(&finalizers_lock);
+    }
+    void **items = a->items;
+    items[oldlen] = v;
+    items[oldlen + 1] = f;
+    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
+}
+
+JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
+{
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
+}
+
+// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
+JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
+{
+    assert(!gc_ptr_tag(v, 3));
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
+}
+
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_value_t *f) JL_NOTSAFEPOINT
+{
+    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
+        jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
+    }
+    else {
+        jl_gc_add_finalizer_(ptls, v, f);
+    }
+}
+
+JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_value_t *f)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_gc_add_finalizer_th(ptls, v, f);
+}
+
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
+{
+    JL_LOCK_NOGC(&finalizers_lock);
+    // Copy the finalizers into a temporary list so that code in the finalizer
+    // won't change the list as we loop through them.
+    // This list is also used as the GC frame when we are running the finalizers
+    arraylist_t copied_list;
+    arraylist_new(&copied_list, 0);
+    // No need to check the to_finalize list since the user is apparently
+    // still holding a reference to the object
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
+    }
+    finalize_object(&finalizer_list_marked, o, &copied_list, 0);
+    if (copied_list.len > 0) {
+        // This releases the finalizers lock.
+        jl_gc_run_finalizers_in_list(ct, &copied_list);
+    }
+    else {
+        JL_UNLOCK_NOGC(&finalizers_lock);
+    }
+    arraylist_free(&copied_list);
+}
+
+JL_DLLEXPORT void jl_finalize(jl_value_t *o)
+{
+    jl_finalize_th(jl_current_task, o);
+}
+
+// =========================================================================== //
+// Threading
+// =========================================================================== //
+
+int gc_n_threads;
+jl_ptls_t* gc_all_tls_states;
+
+// =========================================================================== //
+// Allocation
+// =========================================================================== //
+
+JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    return jl_gc_alloc(ptls, sz, ty);
+}
+
+JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return jl_gc_alloc(ptls, sz, NULL);
+}
+
+// allocator entry points
+
+JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    return jl_gc_alloc_(ptls, sz, ty);
+}
+
+JL_DLLEXPORT void *jl_malloc(size_t sz)
+{
+    return jl_gc_counted_malloc(sz);
+}
+
+//_unchecked_calloc does not check for potential overflow of nm*sz
+STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
+    size_t nmsz = nm*sz;
+    return jl_gc_counted_calloc(nmsz, 1);
+}
+
+JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
+{
+    if (nm > SSIZE_MAX/sz)
+        return NULL;
+    return _unchecked_calloc(nm, sz);
+}
+
+JL_DLLEXPORT void jl_free(void *p)
+{
+    if (p != NULL) {
+        size_t sz = memory_block_usable_size(p, 0);
+        return jl_gc_counted_free_with_size(p, sz);
+    }
+}
+
+JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
+{
+    size_t old = p ? memory_block_usable_size(p, 0) : 0;
+    return jl_gc_counted_realloc_with_old_size(p, old, sz);
+}
+
+// =========================================================================== //
+// Generic Memory
+// =========================================================================== //
+
+size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    size_t sz = layout->size * m->length;
+    if (layout->flags.arrayelem_isunion)
+        // account for isbits Union array selector bytes
+        sz += m->length;
+    return sz;
+}
+
+// tracking Memorys with malloc'd storage
+void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
+    // This is **NOT** a GC safe point.
+    void *a = (void*)((uintptr_t)m | !!isaligned);
+    small_arraylist_push(&ptls->gc_tls_common.heap.mallocarrays, a);
+}
+
+// =========================================================================== //
+// GC Debug
+// =========================================================================== //
+
+int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT
+{
+    int nf = (int)jl_datatype_nfields(vt);
+    for (int i = 1; i < nf; i++) {
+        if (slot < (void*)((char*)obj + jl_field_offset(vt, i)))
+            return i - 1;
+    }
+    return nf - 1;
+}
+
+int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT
+{
+    char *slot = (char*)_slot;
+    jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
+    char *start = NULL;
+    size_t len = 0;
+    size_t elsize = sizeof(void*);
+    if (vt == jl_module_type) {
+        jl_module_t *m = (jl_module_t*)obj;
+        start = (char*)m->usings.items;
+        len = module_usings_length(m);
+        elsize = sizeof(struct _jl_module_using);
+    }
+    else if (vt == jl_simplevector_type) {
+        start = (char*)jl_svec_data(obj);
+        len = jl_svec_len(obj);
+    }
+    if (slot < start || slot >= start + elsize * len)
+        return -1;
+    return (slot - start) / elsize;
+}
+
+// =========================================================================== //
+// GC Control
+// =========================================================================== //
+
+JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) {
+    return jl_atomic_load_acquire(&jl_gc_disable_counter);
+}
+
+JL_DLLEXPORT int jl_gc_is_enabled(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return !ptls->disable_gc;
+}
+
+int gc_logging_enabled = 0;
+
+JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
+    gc_logging_enabled = enable;
+}
+
+JL_DLLEXPORT int jl_is_gc_logging_enabled(void) {
+    return gc_logging_enabled;
+}
+
+
+// collector entry point and control
+_Atomic(uint32_t) jl_gc_disable_counter = 1;
+
+JL_DLLEXPORT int jl_gc_enable(int on)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    int prev = !ptls->disable_gc;
+    ptls->disable_gc = (on == 0);
+    if (on && !prev) {
+        // disable -> enable
+        if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) {
+            gc_num.allocd += gc_num.deferred_alloc;
+            gc_num.deferred_alloc = 0;
+        }
+    }
+    else if (prev && !on) {
+        // enable -> disable
+        jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
+        // check if the GC is running and wait for it to finish
+        jl_gc_safepoint_(ptls);
+    }
+    return prev;
+}
+
+// =========================================================================== //
+// MISC
+// =========================================================================== //
+
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return jl_gc_new_weakref_th(ptls, value);
+}
+
+const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
+JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT
+{
+    return jl_buff_tag;
+}
+
+// callback for passing OOM errors from gmp
+JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
+{
+    jl_throw(jl_memory_exception);
+}
+
+// Sweeping mtarraylist_buffers:
+// These buffers are made unreachable via `mtarraylist_resizeto` from mtarraylist.c
+// and are freed at the end of GC via jl_gc_sweep_stack_pools_and_mtarraylist_buffers
+void sweep_mtarraylist_buffers(void) JL_NOTSAFEPOINT
+{
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls == NULL) {
+            continue;
+        }
+        small_arraylist_t *buffers = &ptls->lazily_freed_mtarraylist_buffers;
+        void *buf;
+        while ((buf = small_arraylist_pop(buffers)) != NULL) {
+            free(buf);
+        }
+    }
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gc-common.h b/src/gc-common.h
new file mode 100644
index 0000000000000..4a489d3e276bc
--- /dev/null
+++ b/src/gc-common.h
@@ -0,0 +1,226 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_GC_COMMON_H
+#define JL_GC_COMMON_H
+
+#include "julia.h"
+#include "julia_internal.h"
+#ifndef _OS_WINDOWS_
+#include <sys/mman.h>
+#if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#endif
+
+#include <stdlib.h>
+
+#if defined(_OS_DARWIN_)
+#include <malloc/malloc.h>
+#else
+#include <malloc.h> // for malloc_trim
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =========================================================================== //
+// GC Big objects
+// =========================================================================== //
+
+// layout for big (>2k) objects
+JL_EXTENSION typedef struct _bigval_t {
+    struct _bigval_t *next;
+    struct _bigval_t *prev;
+    size_t sz;
+#ifdef _P64 // Add padding so that the value is 64-byte aligned
+    // (8 pointers of 8 bytes each) - (4 other pointers in struct)
+    void *_padding[8 - 4];
+#else
+    // (16 pointers of 4 bytes each) - (4 other pointers in struct)
+    void *_padding[16 - 4];
+#endif
+    //struct jl_taggedvalue_t <>;
+    union {
+        uintptr_t header;
+        struct {
+            uintptr_t gc:2;
+        } bits;
+    };
+    // must be 64-byte aligned here, in 32 & 64 bit modes
+} bigval_t;
+
+// =========================================================================== //
+// GC Callbacks
+// =========================================================================== //
+
+typedef void (*jl_gc_cb_func_t)(void);
+
+typedef struct _jl_gc_callback_list_t {
+    struct _jl_gc_callback_list_t *next;
+    jl_gc_cb_func_t func;
+} jl_gc_callback_list_t;
+
+extern jl_gc_callback_list_t *gc_cblist_root_scanner;
+extern jl_gc_callback_list_t *gc_cblist_task_scanner;
+extern jl_gc_callback_list_t *gc_cblist_pre_gc;
+extern jl_gc_callback_list_t *gc_cblist_post_gc;
+extern jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
+extern jl_gc_callback_list_t *gc_cblist_notify_external_free;
+extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
+
+#define gc_invoke_callbacks(ty, list, args) \
+    do { \
+        for (jl_gc_callback_list_t *cb = list; \
+                cb != NULL; \
+                cb = cb->next) \
+        { \
+            ((ty)(cb->func)) args; \
+        } \
+    } while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+// =========================================================================== //
+// malloc wrappers, aligned allocation
+// =========================================================================== //
+
+#if defined(_OS_WINDOWS_)
+STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
+{
+    return _aligned_malloc(sz ? sz : 1, align);
+}
+STATIC_INLINE void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz,
+                                       size_t align)
+{
+    (void)oldsz;
+    return _aligned_realloc(p, sz ? sz : 1, align);
+}
+STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
+{
+    _aligned_free(p);
+}
+#else
+STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
+{
+#if defined(_P64) || defined(__APPLE__)
+    if (align <= 16)
+        return malloc(sz);
+#endif
+    void *ptr;
+    if (posix_memalign(&ptr, align, sz))
+        return NULL;
+    return ptr;
+}
+STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz,
+                                       size_t align)
+{
+#if defined(_P64) || defined(__APPLE__)
+    if (align <= 16)
+        return realloc(d, sz);
+#endif
+    void *b = jl_malloc_aligned(sz, align);
+    if (b != NULL) {
+        memcpy(b, d, oldsz > sz ? sz : oldsz);
+        free(d);
+    }
+    return b;
+}
+STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
+{
+    free(p);
+}
+#endif
+#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
+#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
+
+// =========================================================================== //
+// Pointer tagging
+// =========================================================================== //
+
+STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_MARKED) != 0;
+}
+
+STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_OLD) != 0;
+}
+
+STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT
+{
+    return (tag & ~(uintptr_t)3) | bits;
+}
+
+STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+{
+    return ((uintptr_t)v) & mask;
+}
+
+STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+{
+    return (void*)(((uintptr_t)v) & ~mask);
+}
+
+// =========================================================================== //
+// GC Metrics
+// =========================================================================== //
+
+extern jl_gc_num_t gc_num;
+
+// =========================================================================== //
+// Stop-the-world for GC
+// =========================================================================== //
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
+
+// =========================================================================== //
+// Finalization
+// =========================================================================== //
+
+// Protect all access to `finalizer_list_marked` and `to_finalize`.
+// For accessing `ptls->finalizers`, the lock is needed if a thread
+// is going to realloc the buffer (of its own list) or accessing the
+// list of another thread
+extern jl_mutex_t finalizers_lock;
+// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
+// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
+// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
+//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
+// `to_finalize` should not have tagged pointers.
+extern arraylist_t finalizer_list_marked;
+extern arraylist_t to_finalize;
+
+void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT;
+void run_finalizer(jl_task_t *ct, void *o, void *ff);
+void run_finalizers(jl_task_t *ct, int finalizers_thread);
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_value_t *f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o);
+
+
+// =========================================================================== //
+// Threading
+// =========================================================================== //
+
+extern int gc_n_threads;
+extern jl_ptls_t* gc_all_tls_states;
+
+// =========================================================================== //
+// Logging
+// =========================================================================== //
+
+extern int gc_logging_enabled;
+
+// =========================================================================== //
+// MISC
+// =========================================================================== //
+
+// number of stacks to always keep available per pool
+#define MIN_STACK_MAPPINGS_PER_POOL 5
+
+void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
+void sweep_mtarraylist_buffers(void) JL_NOTSAFEPOINT;
+
+#endif // JL_GC_COMMON_H
diff --git a/src/gc-debug.c b/src/gc-debug.c
index 0e51d625da74a..9741d08504afc 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -1,7 +1,11 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
+#include "julia.h"
 #include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 
 // re-include assert.h without NDEBUG,
@@ -80,7 +84,6 @@ void add_lostval_parent(jl_value_t *parent)
  innocent looking functions which allocate (and thus trigger marking) only on special cases.
 
  If you can't find it, you can try the following :
- - Ensure that should_timeout() is deterministic instead of clock based.
  - Once you have a completely deterministic program which crashes on gc_verify, the addresses
    should stay constant between different runs (with same binary, same environment ...).
    Do not forget to turn off ASLR (linux: echo 0 > /proc/sys/kernel/randomize_va_space).
@@ -97,7 +100,7 @@ static arraylist_t bits_save[4];
 static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
 {
     jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
-    jl_gc_pool_t *pool = &ptls2->heap.norm_pools[pg->pool_n];
+    jl_gc_pool_t *pool = &ptls2->gc_tls.heap.norm_pools[pg->pool_n];
     jl_taggedvalue_t *pv = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
     char *lim = (char*)pv + GC_PAGE_SZ - GC_PAGE_OFFSET - pool->osize;
     while ((char*)pv <= lim) {
@@ -112,7 +115,7 @@ static void gc_clear_mark_outer(int bits)
 {
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
         while (pg != NULL) {
             gc_clear_mark_page(pg, bits);
             pg = pg->next;
@@ -132,7 +135,7 @@ static void clear_mark(int bits)
     }
     bigval_t *v;
     for (int i = 0; i < gc_n_threads; i++) {
-        v = gc_all_tls_states[i]->heap.big_objects;
+        v = gc_all_tls_states[i]->gc_tls.heap.young_generation_of_bigvals;
         while (v != NULL) {
             void *gcv = &v->header;
             if (!gc_verifying)
@@ -142,7 +145,7 @@ static void clear_mark(int bits)
         }
     }
 
-    v = big_objects_marked;
+    v = oldest_generation_of_bigvals;
     while (v != NULL) {
         void *gcv = &v->header;
         if (!gc_verifying)
@@ -170,7 +173,7 @@ static void gc_verify_track(jl_ptls_t ptls)
         return;
     do {
         jl_gc_markqueue_t mq;
-        jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+        jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
         ws_queue_t *cq = &mq.chunk_queue;
         ws_queue_t *q = &mq.ptr_queue;
         jl_atomic_store_relaxed(&cq->top, 0);
@@ -190,7 +193,7 @@ static void gc_verify_track(jl_ptls_t ptls)
             gc_mark_finlist(&mq, &ptls2->finalizers, 0);
         }
         gc_mark_finlist(&mq, &finalizer_list_marked, 0);
-        gc_mark_loop_serial_(ptls, &mq);
+        gc_collect_neighbors(ptls, &mq);
         if (lostval_parents.len == 0) {
             jl_safe_printf("Could not find the missing link. We missed a toplevel root. This is odd.\n");
             break;
@@ -230,7 +233,7 @@ void gc_verify(jl_ptls_t ptls)
         return;
     }
     jl_gc_markqueue_t mq;
-    jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
     ws_queue_t *cq = &mq.chunk_queue;
     ws_queue_t *q = &mq.ptr_queue;
     jl_atomic_store_relaxed(&cq->top, 0);
@@ -252,7 +255,7 @@ void gc_verify(jl_ptls_t ptls)
         gc_mark_finlist(&mq, &ptls2->finalizers, 0);
     }
     gc_mark_finlist(&mq, &finalizer_list_marked, 0);
-    gc_mark_loop_serial_(ptls, &mq);
+    gc_collect_neighbors(ptls, &mq);
     int clean_len = bits_save[GC_CLEAN].len;
     for(int i = 0; i < clean_len + bits_save[GC_OLD].len; i++) {
         jl_taggedvalue_t *v = (jl_taggedvalue_t*)bits_save[i >= clean_len ? GC_OLD : GC_CLEAN].items[i >= clean_len ? i - clean_len : i];
@@ -273,8 +276,8 @@ void gc_verify(jl_ptls_t ptls)
     }
     restore();
     gc_verify_track(ptls);
-    jl_gc_debug_print_status();
-    jl_gc_debug_critical_error();
+    jl_gc_debug_fprint_status(ios_safe_stderr);
+    jl_gc_debug_fprint_critical_error(ios_safe_stderr);
     abort();
 }
 #endif
@@ -289,7 +292,7 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
     jl_ptls_t ptls2 = gc_all_tls_states[t_n];
-    jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
+    jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
     int osize = pg->osize;
     char *data = pg->data;
     char *page_begin = data + GC_PAGE_OFFSET;
@@ -347,44 +350,15 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
     }
 }
 
-static void gc_verify_tags_pagetable0(pagetable0_t *pagetable0)
+static void gc_verify_tags_pagestack(void)
 {
-    for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable0->allocmap[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_verify_tags_page(pagetable0->meta[pg_i * 32 + j]);
-                }
-            }
-        }
-    }
-}
-
-static void gc_verify_tags_pagetable1(pagetable1_t *pagetable1)
-{
-    for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable1->allocmap0[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_verify_tags_pagetable0(pagetable1->meta0[pg_i * 32 + j]);
-                }
-            }
-        }
-    }
-}
-
-static void gc_verify_tags_pagetable(void)
-{
-    for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) {
-        uint32_t line = memory_map.allocmap1[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_verify_tags_pagetable1(memory_map.meta1[pg_i * 32 + j]);
-                }
-            }
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
+        while (pg != NULL) {
+            gc_verify_tags_page(pg);
+            pg = pg->next;
         }
     }
 }
@@ -396,7 +370,7 @@ void gc_verify_tags(void)
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
             // for all pools, iterate its freelist
-            jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
             jl_taggedvalue_t *next = p->freelist;
             jl_taggedvalue_t *last = NULL;
             char *allocating = gc_page_data(next);
@@ -421,7 +395,7 @@ void gc_verify_tags(void)
 
     // verify that all the objects on every page are either valid julia objects
     // or are part of the freelist or are on the allocated half of a page
-    gc_verify_tags_pagetable();
+    gc_verify_tags_pagestack();
 }
 #endif
 
@@ -491,21 +465,21 @@ int jl_gc_debug_check_other(void)
     return gc_debug_alloc_check(&jl_gc_debug_env.other);
 }
 
-void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT
+void jl_gc_debug_fprint_status(ios_t *s) JL_NOTSAFEPOINT
 {
     uint64_t pool_count = jl_gc_debug_env.pool.num;
     uint64_t other_count = jl_gc_debug_env.other.num;
-    jl_safe_printf("Allocations: %" PRIu64 " "
-                   "(Pool: %" PRIu64 "; Other: %" PRIu64 "); GC: %d\n",
-                   pool_count + other_count, pool_count, other_count, gc_num.pause);
+    jl_safe_fprintf(s, "Allocations: %" PRIu64 " "
+                    "(Pool: %" PRIu64 "; Other: %" PRIu64 "); GC: %d\n",
+                    pool_count + other_count, pool_count, other_count, gc_num.pause);
 }
 
-void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT
+void jl_gc_debug_fprint_critical_error(ios_t *s) JL_NOTSAFEPOINT
 {
-    jl_gc_debug_print_status();
+    jl_gc_debug_fprint_status(s);
     if (!jl_gc_debug_env.wait_for_debugger)
         return;
-    jl_safe_printf("Waiting for debugger to attach\n");
+    jl_safe_fprintf(s, "Waiting for debugger to attach\n");
     while (1) {
         sleep(1000);
     }
@@ -515,7 +489,7 @@ void jl_gc_debug_print(void)
 {
     if (!gc_debug_alloc_check(&jl_gc_debug_env.print))
         return;
-    jl_gc_debug_print_status();
+    jl_gc_debug_fprint_status(ios_safe_stderr);
 }
 
 // a list of tasks for conservative stack scan during gc_scrub
@@ -563,13 +537,13 @@ static void gc_scrub_task(jl_task_t *ta)
 
     char *low;
     char *high;
-    if (ta->copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
+    if (ta->ctx.copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
         low  = (char*)ptls2->stackbase - ptls2->stacksize;
         high = (char*)ptls2->stackbase;
     }
-    else if (ta->stkbuf) {
-        low  = (char*)ta->stkbuf;
-        high = (char*)ta->stkbuf + ta->bufsz;
+    else if (ta->ctx.stkbuf) {
+        low  = (char*)ta->ctx.stkbuf;
+        high = (char*)ta->ctx.stkbuf + ta->ctx.bufsz;
     }
     else
         return;
@@ -588,103 +562,18 @@ void gc_scrub(void)
     jl_gc_debug_tasks.len = 0;
 }
 #else
-void jl_gc_debug_critical_error(void)
+void jl_gc_debug_fprint_critical_error(ios_t *s)
 {
 }
 
-void jl_gc_debug_print_status(void)
+void jl_gc_debug_fprint_status(ios_t *s)
 {
     // May not be accurate but should be helpful enough
     uint64_t pool_count = gc_num.poolalloc;
     uint64_t big_count = gc_num.bigalloc;
-    jl_safe_printf("Allocations: %" PRIu64 " "
-                   "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n",
-                   pool_count + big_count, pool_count, big_count, gc_num.pause);
-}
-#endif
-
-#ifdef OBJPROFILE
-static htable_t obj_counts[3];
-static htable_t obj_sizes[3];
-void objprofile_count(void *ty, int old, int sz)
-{
-    if (gc_verifying) return;
-    if ((intptr_t)ty <= 0x10) {
-        ty = (void*)jl_buff_tag;
-    }
-    else if (ty != (void*)jl_buff_tag && ty != jl_malloc_tag &&
-             jl_typeof(ty) == (jl_value_t*)jl_datatype_type &&
-             ((jl_datatype_t*)ty)->instance) {
-        ty = jl_singleton_tag;
-    }
-    void **bp = ptrhash_bp(&obj_counts[old], ty);
-    if (*bp == HT_NOTFOUND)
-        *bp = (void*)2;
-    else
-        (*((intptr_t*)bp))++;
-    bp = ptrhash_bp(&obj_sizes[old], ty);
-    if (*bp == HT_NOTFOUND)
-        *bp = (void*)(intptr_t)(1 + sz);
-    else
-        *((intptr_t*)bp) += sz;
-}
-
-void objprofile_reset(void)
-{
-    for (int g = 0; g < 3; g++) {
-        htable_reset(&obj_counts[g], 0);
-        htable_reset(&obj_sizes[g], 0);
-    }
-}
-
-static void objprofile_print(htable_t nums, htable_t sizes)
-{
-    for(int i=0; i < nums.size; i+=2) {
-        if (nums.table[i+1] != HT_NOTFOUND) {
-            void *ty = nums.table[i];
-            int num = (intptr_t)nums.table[i + 1] - 1;
-            size_t sz = (uintptr_t)ptrhash_get(&sizes, ty) - 1;
-            static const int ptr_hex_width = 2 * sizeof(void*);
-            if (sz > 2e9) {
-                jl_safe_printf(" %6d : %*.1f GB of (%*p) ",
-                               num, 6, ((double)sz) / 1024 / 1024 / 1024,
-                               ptr_hex_width, ty);
-            }
-            else if (sz > 2e6) {
-                jl_safe_printf(" %6d : %*.1f MB of (%*p) ",
-                               num, 6, ((double)sz) / 1024 / 1024,
-                               ptr_hex_width, ty);
-            }
-            else if (sz > 2e3) {
-                jl_safe_printf(" %6d : %*.1f kB of (%*p) ",
-                               num, 6, ((double)sz) / 1024,
-                               ptr_hex_width, ty);
-            }
-            else {
-                jl_safe_printf(" %6d : %*d  B of (%*p) ",
-                          num, 6, (int)sz, ptr_hex_width, ty);
-            }
-            if (ty == (void*)jl_buff_tag)
-                jl_safe_printf("#<buffer>");
-            else if (ty == jl_malloc_tag)
-                jl_safe_printf("#<malloc>");
-            else if (ty == jl_singleton_tag)
-                jl_safe_printf("#<singletons>");
-            else
-                jl_static_show(JL_STDERR, (jl_value_t*)ty);
-            jl_safe_printf("\n");
-        }
-    }
-}
-
-void objprofile_printall(void)
-{
-    jl_safe_printf("Transient mark :\n");
-    objprofile_print(obj_counts[0], obj_sizes[0]);
-    jl_safe_printf("Perm mark :\n");
-    objprofile_print(obj_counts[1], obj_sizes[1]);
-    jl_safe_printf("Remset :\n");
-    objprofile_print(obj_counts[2], obj_sizes[2]);
+    jl_safe_fprintf(s, "Allocations: %" PRIu64 " "
+                    "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n",
+                    pool_count + big_count, pool_count, big_count, gc_num.pause);
 }
 #endif
 
@@ -848,11 +737,11 @@ void gc_time_pool_end(int sweep_full)
     double sweep_speed = sweep_gb / sweep_pool_sec;
     jl_safe_printf("GC sweep pools end %.2f ms at %.1f GB/s "
                    "(skipped %.2f %% of %" PRId64 ", swept %" PRId64 " pgs, "
-                   "%" PRId64 " freed with %" PRId64 " lazily) %s\n",
+                   "%" PRId64 " freed) %s\n",
                    sweep_pool_sec * 1000, sweep_speed,
                    (total_pages ? ((double)skipped_pages * 100) / total_pages : 0),
                    total_pages, total_pages - skipped_pages,
-                   freed_pages, lazy_freed_pages,
+                   freed_pages,
                    sweep_full ? "full" : "quick");
 }
 
@@ -891,29 +780,29 @@ void gc_time_big_end(void)
                    t_ms, big_freed, big_total, big_reset);
 }
 
-static int64_t mallocd_array_total;
-static int64_t mallocd_array_freed;
-static int64_t mallocd_array_sweep_start;
+static int64_t mallocd_memory_total;
+static int64_t mallocd_memory_freed;
+static int64_t mallocd_memory_sweep_start;
 
-void gc_time_mallocd_array_start(void)
+void gc_time_mallocd_memory_start(void)
 {
-    mallocd_array_total = 0;
-    mallocd_array_freed = 0;
-    mallocd_array_sweep_start = jl_hrtime();
+    mallocd_memory_total = 0;
+    mallocd_memory_freed = 0;
+    mallocd_memory_sweep_start = jl_hrtime();
 }
 
-void gc_time_count_mallocd_array(int bits)
+void gc_time_count_mallocd_memory(int bits)
 {
-    mallocd_array_total++;
-    mallocd_array_freed += !gc_marked(bits);
+    mallocd_memory_total++;
+    mallocd_memory_freed += !gc_marked(bits);
 }
 
-void gc_time_mallocd_array_end(void)
+void gc_time_mallocd_memory_end(void)
 {
-    double t_ms = jl_ns2ms(jl_hrtime() - mallocd_array_sweep_start);
+    double t_ms = jl_ns2ms(jl_hrtime() - mallocd_memory_sweep_start);
     jl_safe_printf("GC sweep arrays %.2f ms "
                    "(freed %" PRId64 " / %" PRId64 ")\n",
-                   t_ms, mallocd_array_freed, mallocd_array_total);
+                   t_ms, mallocd_memory_freed, mallocd_memory_total);
 }
 
 void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
@@ -923,8 +812,8 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
     int64_t remset_nptr = 0;
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        last_remset_len += ptls2->heap.last_remset->len;
-        remset_nptr = ptls2->heap.remset_nptr;
+        last_remset_len += ptls2->gc_tls.heap.last_remset->len;
+        remset_nptr = ptls2->gc_tls.heap.remset_nptr;
     }
     jl_safe_printf("GC mark pause %.2f ms | "
                    "scanned %" PRId64 " kB = %" PRId64 " + %" PRId64 " | "
@@ -944,12 +833,12 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
     jl_safe_printf("GC sweep pause %.2f ms live %" PRId64 " kB "
                    "(freed %" PRId64 " kB EST %" PRId64 " kB "
                    "[error %" PRId64 "] = %d%% of allocd b %" PRIu64 ") "
-                   "(%.2f ms in post_mark) %s | next in %" PRId64 " kB\n",
+                   "(%.2f ms in post_mark) %s\n",
                    jl_ns2ms(sweep_pause), live_bytes / 1024,
                    gc_num.freed / 1024, estimate_freed / 1024,
                    gc_num.freed - estimate_freed, pct, gc_num.allocd / 1024,
                    jl_ns2ms(gc_postmark_end - gc_premark_end),
-                   sweep_full ? "full" : "quick", -gc_num.allocd / 1024);
+                   sweep_full ? "full" : "quick");
 }
 
 void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
@@ -969,20 +858,41 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
         jl_safe_printf("TS: %" PRIu64 " Minor collection: estimate freed = %" PRIu64
                        " live = %" PRIu64 "m new interval = %" PRIu64 "m pause time = %"
                        PRIu64 "ms ttsp = %" PRIu64 "us mark time = %" PRIu64
-                       "ms sweep time = %" PRIu64 "ms \n",
+                       "ms sweep time = %" PRIu64 "ms\n",
                        end, freed, live/1024/1024,
                        interval/1024/1024, pause/1000000, ttsp,
                        mark/1000000,sweep/1000000);
 }
+
+void gc_heuristics_summary(
+        uint64_t old_alloc_diff, uint64_t alloc_mem,
+        uint64_t old_mut_time, uint64_t alloc_time,
+        uint64_t old_freed_diff, uint64_t gc_mem,
+        uint64_t old_pause_time, uint64_t gc_time,
+        int thrash_counter, const char *reason,
+        uint64_t current_heap, uint64_t target_heap)
+{
+    jl_safe_printf("Estimates: alloc_diff=%" PRIu64 "kB (%" PRIu64 ")"
+                            //"  nongc_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  mut_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  freed_diff=%" PRIu64 "kB (%" PRIu64 ")"
+                            "  pause_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  thrash_counter=%d%s"
+                            "  current_heap=%" PRIu64 " MB"
+                            "  target_heap=%" PRIu64 " MB\n",
+                   old_alloc_diff/1024, alloc_mem/1024,
+                   old_mut_time/1000, alloc_time/1000,
+                   old_freed_diff/1024, gc_mem/1024,
+                   old_pause_time/1000, gc_time/1000,
+                   thrash_counter, reason,
+                   current_heap/1024/1024, target_heap/1024/1024);
+}
 #endif
 
 void jl_gc_debug_init(void)
 {
 #ifdef GC_DEBUG_ENV
-    char *env = getenv("JULIA_GC_NO_GENERATIONAL");
-    if (env && strcmp(env, "0") != 0)
-        jl_gc_debug_env.always_full = 1;
-    env = getenv("JULIA_GC_WAIT_FOR_DEBUGGER");
+    char *env = getenv("JULIA_GC_WAIT_FOR_DEBUGGER");
     jl_gc_debug_env.wait_for_debugger = env && strcmp(env, "0") != 0;
     gc_debug_alloc_init(&jl_gc_debug_env.pool, "POOL");
     gc_debug_alloc_init(&jl_gc_debug_env.other, "OTHER");
@@ -997,13 +907,6 @@ void jl_gc_debug_init(void)
     arraylist_new(&lostval_parents_done, 0);
 #endif
 
-#ifdef OBJPROFILE
-    for (int g = 0; g < 3; g++) {
-        htable_new(&obj_counts[g], 0);
-        htable_new(&obj_sizes[g], 0);
-    }
-#endif
-
 #ifdef GC_FINAL_STATS
     process_t0 = jl_hrtime();
 #endif
@@ -1012,113 +915,136 @@ void jl_gc_debug_init(void)
 // GC summary stats
 
 #ifdef MEMPROFILE
-// TODO repair this and possibly merge with `gc_count_pool`
-static size_t pool_stats(jl_gc_pool_t *p, size_t *pwaste, size_t *np,
-                         size_t *pnold)
+
+typedef struct _gc_memprofile_stat_t {
+    size_t nfree; // for pool only
+    size_t npgs;  // for pool only
+    size_t nused;
+    size_t nbytes_used;
+    size_t nused_old;
+    size_t nbytes_used_old;
+} gc_memprofile_stat_t;
+
+void gc_stats_all_pool(void)
 {
-    jl_taggedvalue_t *halfpages = p->newpages;
-    size_t osize = p->osize;
-    size_t nused=0, nfree=0, npgs=0, nold=0;
-
-    if (halfpages != NULL) {
-        npgs++;
-        char *v = gc_page_data(halfpages) + GC_PAGE_OFFSET;
-        char *lim = (char*)halfpages - 1;
-        int i = 0;
-        while (v <= lim) {
-            if (!gc_marked(((jl_taggedvalue_t*)v)->bits.gc)) {
-                nfree++;
+    gc_memprofile_stat_t stat[JL_GC_N_POOLS];
+    memset(stat, 0, sizeof(stat));
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
+        while (pg != NULL) {
+            assert(gc_alloc_map_is_set(pg->data));
+            int pool_n = pg->pool_n;
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[pool_n];
+            char *data = pg->data;
+            // compute the start of the data area in this page
+            jl_taggedvalue_t *v0 = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
+            // compute the limit of valid data in this page
+            char *lim = data + GC_PAGE_SZ - pg->osize;
+            char *lim_newpages = data + GC_PAGE_SZ;
+            if (gc_page_data((char*)p->newpages - 1) == data) {
+                lim_newpages = (char*)p->newpages;
             }
-            else {
-                nused++;
-                if (((jl_taggedvalue_t*)v)->bits.gc == GC_OLD_MARKED) {
-                    nold++;
+            char *v = (char*)v0;
+            gc_memprofile_stat_t *stat_n = &stat[pool_n];
+            while (v <= lim) {
+                uint8_t bits = ((jl_taggedvalue_t*)v)->bits.gc;
+                if (!gc_marked(bits) || (char*)v >= lim_newpages) {
+                    stat_n->nfree++;
+                }
+                else {
+                    if (gc_old(bits)) {
+                        assert(bits == GC_OLD_MARKED);
+                        stat_n->nused_old++;
+                        stat_n->nbytes_used_old += pg->osize;
+                    }
+                    else {
+                        stat_n->nused++;
+                        stat_n->nbytes_used += pg->osize;
+                    }
                 }
+                v = v + pg->osize;
             }
-            v = v + osize;
-            i++;
+            stat_n->npgs++;
+            pg = pg->next;
         }
-        // only the first page is allocated on
-    }
-    *pwaste = npgs * GC_PAGE_SZ - (nused * p->osize);
-    *np = npgs;
-    *pnold = nold;
-    if (npgs != 0) {
-        jl_safe_printf("%4d : %7lld/%7lld objects (%3lld%% old), %5lld pages, %5lld kB, %5lld kB waste\n",
-                       p->osize,
-                       (long long)nused,
-                       (long long)(nused + nfree),
-                       (long long)(nused ? (nold * 100) / nused : 0),
-                       (long long)npgs,
-                       (long long)((nused * p->osize) / 1024),
-                       (long long)(*pwaste / 1024));
     }
-    return nused*p->osize;
-}
-
-void gc_stats_all_pool(void)
-{
-    size_t nb=0, w, tw=0, no=0, tp=0, nold=0, noldbytes=0, np, nol;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
-        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
-            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-            size_t b = pool_stats(&ptls2->heap.norm_pools[i], &w, &np, &nol);
-            nb += b;
-            no += (b / ptls2->heap.norm_pools[i].osize);
-            tw += w;
-            tp += np;
-            nold += nol;
-            noldbytes += nol * ptls2->heap.norm_pools[i].osize;
-        }
+        jl_ptls_t ptls = jl_current_task->ptls;
+        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[i];
+        gc_memprofile_stat_t *s = &stat[i];
+        jl_safe_printf("%4d : %7lld/%7lld objects (%3lld%% old), %5lld pages, %5lld kB, %5lld kB waste\n",
+            p->osize,
+            (long long)(s->nused + s->nused_old),
+            (long long)(s->nused + s->nused_old + s->nfree),
+            (long long)((s->nused + s->nused_old) ? (s->nused_old * 100) / (s->nused + s->nused_old) : 0),
+            (long long)s->npgs,
+            (long long)(((s->nused + s->nused_old) * p->osize) / 1024),
+            (long long)((GC_PAGE_SZ * s->npgs - s->nused * p->osize) / 1024));
     }
-    jl_safe_printf("%lld objects (%lld%% old), %lld kB (%lld%% old) total allocated, "
-                   "%lld total fragments (%lld%% overhead), in %lld pages\n",
-                   (long long)no,
-                   (long long)(no ? (nold * 100) / no : 0),
-                   (long long)(nb / 1024),
-                   (long long)(nb ? (noldbytes * 100) / nb : 0),
-                   (long long)tw,
-                   (long long)(nb ? (tw * 100) / nb : 0),
-                   (long long)tp);
 }
 
 void gc_stats_big_obj(void)
 {
-    size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
+    gc_memprofile_stat_t stat;
+    memset(&stat, 0, sizeof(stat));
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        bigval_t *v = ptls2->heap.big_objects;
+        if (ptls2 == NULL) {
+            continue;
+        }
+        bigval_t *v = ptls2->gc_tls.heap.young_generation_of_bigvals;
+        v = v->next; // skip the sentinel
         while (v != NULL) {
             if (gc_marked(v->bits.gc)) {
-                nused++;
-                nbytes += v->sz & ~3;
+                if (gc_old(v->bits.gc)) {
+                    assert(v->bits.gc == GC_OLD_MARKED);
+                    stat.nused_old++;
+                    stat.nbytes_used_old += v->sz;
+                }
+                else {
+                    stat.nused++;
+                    stat.nbytes_used += v->sz;
+                }
             }
             v = v->next;
         }
-        v = big_objects_marked;
+        v = oldest_generation_of_bigvals;
+        v = v->next; // skip the sentinel
         while (v != NULL) {
-            if (gc_marked(v->bits.gc)) {
-                nused_old++;
-                nbytes_old += v->sz & ~3;
-            }
+            assert(v->bits.gc == GC_OLD_MARKED);
+            stat.nused_old++;
+            stat.nbytes_used_old += v->sz;
             v = v->next;
         }
 
-        mallocarray_t *ma = ptls2->heap.mallocarrays;
-        while (ma != NULL) {
-            if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
-                nused++;
-                nbytes += jl_array_nbytes(ma->a);
+        void **lst = ptls2->gc_tls.heap.mallocarrays.items;
+        for (size_t i = 0, l = ptls2->gc_tls.heap.mallocarrays.len; i < l; i++) {
+            jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[i] & ~(uintptr_t)1);
+            uint8_t bits = jl_astaggedvalue(m)->bits.gc;
+            if (gc_marked(bits)) {
+                size_t sz = jl_genericmemory_nbytes(m);
+                if (gc_old(bits)) {
+                    assert(bits == GC_OLD_MARKED);
+                    stat.nused_old++;
+                    stat.nbytes_used_old += sz;
+                }
+                else {
+                    stat.nused++;
+                    stat.nbytes_used += sz;
+                }
             }
-            ma = ma->next;
         }
     }
-
     jl_safe_printf("%lld kB (%lld%% old) in %lld large objects (%lld%% old)\n",
-                   (long long)((nbytes + nbytes_old) / 1024),
-                   (long long)(nbytes + nbytes_old ? (nbytes_old * 100) / (nbytes + nbytes_old) : 0),
-                   (long long)(nused + nused_old),
-                   (long long)(nused + nused_old ? (nused_old * 100) / (nused + nused_old) : 0));
+                   (long long)((stat.nbytes_used + stat.nbytes_used_old) / 1024),
+                   (long long)(stat.nbytes_used + stat.nbytes_used_old ? (stat.nbytes_used_old * 100) / (stat.nbytes_used + stat.nbytes_used_old) : 0),
+                   (long long)(stat.nused + stat.nused_old),
+                   (long long)(stat.nused + stat.nused_old ? (stat.nused_old * 100) / (stat.nused + stat.nused_old) : 0));
 }
 #endif //MEMPROFILE
 
@@ -1150,7 +1076,7 @@ static void gc_count_pool_pagetable(void)
 {
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
         while (pg != NULL) {
             if (gc_alloc_map_is_set(pg->data)) {
                 gc_count_pool_page(pg);
@@ -1174,60 +1100,6 @@ void gc_count_pool(void)
     jl_safe_printf("************************\n");
 }
 
-int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT
-{
-    int nf = (int)jl_datatype_nfields(vt);
-    for (int i = 1; i < nf; i++) {
-        if (slot < (void*)((char*)obj + jl_field_offset(vt, i)))
-            return i - 1;
-    }
-    return nf - 1;
-}
-
-int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT
-{
-    char *slot = (char*)_slot;
-    jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
-    char *start = NULL;
-    size_t len = 0;
-    size_t elsize = sizeof(void*);
-    if (vt == jl_module_type) {
-        jl_module_t *m = (jl_module_t*)obj;
-        start = (char*)m->usings.items;
-        len = m->usings.len;
-    }
-    else if (vt == jl_simplevector_type) {
-        start = (char*)jl_svec_data(obj);
-        len = jl_svec_len(obj);
-    }
-    else if (vt->name == jl_array_typename) {
-        jl_array_t *a = (jl_array_t*)obj;
-        start = (char*)a->data;
-        len = jl_array_len(a);
-        elsize = a->elsize;
-    }
-    if (slot < start || slot >= start + elsize * len)
-        return -1;
-    return (slot - start) / elsize;
-}
-
-static int gc_logging_enabled = 0;
-
-JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
-    gc_logging_enabled = enable;
-}
-
-void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT {
-    if (!gc_logging_enabled) {
-        return;
-    }
-    jl_safe_printf("GC: pause %.2fms. collected %fMB. %s %s\n",
-        pause/1e6, freed/1e6,
-        full ? "full" : "incr",
-        recollect ? "recollect" : ""
-    );
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp
index b1401653d99ff..62379263b03e7 100644
--- a/src/gc-heap-snapshot.cpp
+++ b/src/gc-heap-snapshot.cpp
@@ -2,24 +2,23 @@
 
 #include "gc-heap-snapshot.h"
 
+#include "julia.h"
 #include "julia_internal.h"
-#include "gc.h"
+#include "julia_assert.h"
 
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/FormatVariadic.h"
 
-#include <vector>
-#include <string>
-#include <sstream>
-
-using std::vector;
-using std::string;
-using std::ostringstream;
-using std::pair;
 using std::make_pair;
+using llvm::SmallVector;
 using llvm::StringMap;
 using llvm::DenseMap;
 using llvm::StringRef;
+using llvm::SmallString;
+using llvm::formatv;
 
 // https://stackoverflow.com/a/33799784/751061
 void print_str_escape_json(ios_t *stream, StringRef s)
@@ -53,8 +52,9 @@ void print_str_escape_json(ios_t *stream, StringRef s)
 // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601
 
 struct Edge {
-    size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index.
+    uint8_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index.
     size_t name_or_index; // name of the field (for objects/modules) or index of array
+    size_t from_node;  // This is a deviation from the .heapsnapshot format to support streaming.
     size_t to_node;
 };
 
@@ -63,29 +63,34 @@ struct Edge {
 //   [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ]
 // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2568-L2575
 
-const int k_node_number_of_fields = 7;
 struct Node {
-    size_t type; // index into snapshot->node_types
+    uint8_t type; // index into snapshot->node_types
     size_t name;
     size_t id; // This should be a globally-unique counter, but we use the memory address
     size_t self_size;
     size_t trace_node_id;  // This is ALWAYS 0 in Javascript heap-snapshots.
-    // whether the from_node is attached or dettached from the main application state
+    // whether the from_node is attached or detached from the main application state
     // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745
-    int detachedness;  // 0 - unknown, 1 - attached, 2 - detached
-    vector<Edge> edges;
+    uint8_t detachedness;  // 0 - unknown, 1 - attached, 2 - detached
 
     ~Node() JL_NOTSAFEPOINT = default;
 };
 
-struct StringTable {
+class StringTable {
+protected:
     StringMap<size_t> map;
-    vector<StringRef> strings;
+    SmallVector<StringRef, 0> strings;
+    size_t next_id;
+
+public:
+    StringTable() JL_NOTSAFEPOINT : map(), strings(), next_id(0) {};
 
     size_t find_or_create_string_id(StringRef key) JL_NOTSAFEPOINT {
-        auto val = map.insert(make_pair(key, map.size()));
-        if (val.second)
+        auto val = map.insert(make_pair(key, next_id));
+        if (val.second) {
             strings.push_back(val.first->first());
+            next_id++;
+        }
         return val.first->second;
     }
 
@@ -105,77 +110,205 @@ struct StringTable {
     }
 };
 
-struct HeapSnapshot {
-    vector<Node> nodes;
-    // edges are stored on each from_node
+// a string table with partial strings in memory and all strings serialized to a file
+class SerializedStringTable: public StringTable {
+    public:
+
+    // serialize the string only if it's not already in the table
+    size_t serialize_if_necessary(ios_t *stream, StringRef key) JL_NOTSAFEPOINT {
+        auto val = map.insert(make_pair(key, next_id));
+        if (val.second) {
+            strings.push_back(val.first->first());
+            // persist the string size first, then the string itself
+            // so that we could read it back in the same order
+            size_t s_size = key.size();
+            ios_write(stream, reinterpret_cast<const char*>(&s_size), sizeof(size_t));
+            ios_write(stream, key.data(), s_size);
+            next_id++;
+        }
+        return val.first->second;
+    }
+
+    // serialize the string without checking if it is in the table or not
+    // and return its index. This means that we might have duplicates in the
+    // output string file.
+    size_t serialize(ios_t *stream, StringRef key) JL_NOTSAFEPOINT {
+        size_t s_size = key.size();
+        ios_write(stream, reinterpret_cast<const char*>(&s_size), sizeof(size_t));
+        ios_write(stream, key.data(), s_size);
+        size_t current = next_id;
+        next_id++;
+        return current;
+    }
+};
 
-    StringTable names;
+struct HeapSnapshot {
+    // names could be very large, so we keep them in a separate binary file
+    // and use a StringTable to keep track of the indices of frequently used strings
+    // to reduce duplicates in the output file to some degree
+    SerializedStringTable names;
+    // node types and edge types are very small and keep them in memory
     StringTable node_types;
     StringTable edge_types;
     DenseMap<void *, size_t> node_ptr_to_index_map;
 
-    size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes.
+    size_t num_nodes = 0; // Since we stream out to files,
+    size_t num_edges = 0; // we need to track the counts here.
+
+    // Node internal_root;
+
+    // Used for streaming
+    // Since nodes and edges are just one giant array of integers, we stream them as
+    // *BINARY DATA*: a sequence of bytes, each of which is a 64-bit integer (big enough to
+    // fit the pointer ids).
+    ios_t *nodes;
+    ios_t *edges;
+    // strings are serialized to a file in binary format
+    ios_t *strings;
+    // the following file is written out as json data.
+    ios_t *json;
+
+    size_t internal_root_idx = 0; // node index of the internal root node
+    size_t _gc_root_idx = 1; // node index of the GC roots node
+    size_t _gc_finlist_root_idx = 2; // node index of the GC finlist roots node
 };
 
 // global heap snapshot, mutated by garbage collector
 // when snapshotting is on.
 int gc_heap_snapshot_enabled = 0;
+int gc_heap_snapshot_redact_data = 0;
 HeapSnapshot *g_snapshot = nullptr;
-extern jl_mutex_t heapsnapshot_lock;
+// mutex for gc-heap-snapshot.
+jl_mutex_t heapsnapshot_lock;
 
+void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one);
 void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one);
 static inline void _record_gc_edge(const char *edge_type,
                                    jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT;
-void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT;
-void _add_internal_root(HeapSnapshot *snapshot);
+void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT;
+void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT;
 
 
-JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one)
+JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
+    ios_t *strings, ios_t *json, char all_one, char redact_data)
 {
     HeapSnapshot snapshot;
-    _add_internal_root(&snapshot);
+    snapshot.nodes = nodes;
+    snapshot.edges = edges;
+    snapshot.strings = strings;
+    snapshot.json = json;
 
     jl_mutex_lock(&heapsnapshot_lock);
 
     // Enable snapshotting
     g_snapshot = &snapshot;
+    gc_heap_snapshot_redact_data = redact_data;
     gc_heap_snapshot_enabled = true;
 
+    _add_synthetic_root_entries(&snapshot);
+
     // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot`
     jl_gc_collect(JL_GC_FULL);
 
     // Disable snapshotting
     gc_heap_snapshot_enabled = false;
+    gc_heap_snapshot_redact_data = 0;
     g_snapshot = nullptr;
 
     jl_mutex_unlock(&heapsnapshot_lock);
 
     // When we return, the snapshot is full
     // Dump the snapshot
-    serialize_heap_snapshot((ios_t*)stream, snapshot, all_one);
+    final_serialize_heap_snapshot((ios_t*)json, (ios_t*)strings, snapshot, all_one);
 }
 
-// adds a node at id 0 which is the "uber root":
-// a synthetic node which points to all the GC roots.
-void _add_internal_root(HeapSnapshot *snapshot)
+void serialize_node(HeapSnapshot *snapshot, const Node &node) JL_NOTSAFEPOINT
 {
+    // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"]
+    ios_write(snapshot->nodes, (char*)&node.type, sizeof(node.type));
+    ios_write(snapshot->nodes, (char*)&node.name, sizeof(node.name));
+    ios_write(snapshot->nodes, (char*)&node.id, sizeof(node.id));
+    ios_write(snapshot->nodes, (char*)&node.self_size, sizeof(node.self_size));
+    // NOTE: We don't write edge_count, since it's always 0. It will be reconstructed in
+    // post-processing.
+    ios_write(snapshot->nodes, (char*)&node.trace_node_id, sizeof(node.trace_node_id));
+    ios_write(snapshot->nodes, (char*)&node.detachedness, sizeof(node.detachedness));
+
+    g_snapshot->num_nodes += 1;
+}
+
+void serialize_edge(HeapSnapshot *snapshot, const Edge &edge) JL_NOTSAFEPOINT
+{
+    // ["type","name_or_index","to_node"]
+    ios_write(snapshot->edges, (char*)&edge.type, sizeof(edge.type));
+    ios_write(snapshot->edges, (char*)&edge.name_or_index, sizeof(edge.name_or_index));
+    // NOTE: Row numbers for nodes (not adjusted for k_node_number_of_fields, which is 7)
+    ios_write(snapshot->edges, (char*)&edge.from_node, sizeof(edge.from_node));
+    ios_write(snapshot->edges, (char*)&edge.to_node, sizeof(edge.to_node));
+
+    g_snapshot->num_edges += 1;
+}
+
+// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L212
+// add synthetic nodes for the uber root, the GC roots, and the GC finalizer list roots
+void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT
+{
+    // adds a node at id 0 which is the "uber root":
+    // a synthetic node which points to all the GC roots.
     Node internal_root{
-        snapshot->node_types.find_or_create_string_id("synthetic"),
-        snapshot->names.find_or_create_string_id(""), // name
+        (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, ""), // name
         0, // id
         0, // size
         0, // size_t trace_node_id (unused)
-        0, // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
-        vector<Edge>() // outgoing edges
+        0 // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
     };
-    snapshot->nodes.push_back(internal_root);
+    serialize_node(snapshot, internal_root);
+
+    // Add a node for the GC roots
+    snapshot->_gc_root_idx = snapshot->internal_root_idx + 1;
+    Node gc_roots{
+        (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "GC roots"), // name
+        snapshot->_gc_root_idx, // id
+        0, // size
+        0, // size_t trace_node_id (unused)
+        0 // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+    };
+    serialize_node(snapshot, gc_roots);
+    Edge root_to_gc_roots{
+        (uint8_t)snapshot->edge_types.find_or_create_string_id("internal"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "GC roots"), // edge label
+        snapshot->internal_root_idx, // from
+        snapshot->_gc_root_idx // to
+    };
+    serialize_edge(snapshot, root_to_gc_roots);
+
+    // add a node for the gc finalizer list roots
+    snapshot->_gc_finlist_root_idx = snapshot->internal_root_idx + 2;
+    Node gc_finlist_roots{
+        (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "GC finalizer list roots"), // name
+        snapshot->_gc_finlist_root_idx, // id
+        0, // size
+        0, // size_t trace_node_id (unused)
+        0 // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+    };
+    serialize_node(snapshot, gc_finlist_roots);
+    Edge root_to_gc_finlist_roots{
+        (uint8_t)snapshot->edge_types.find_or_create_string_id("internal"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "GC finalizer list roots"), // edge label
+        snapshot->internal_root_idx, // from
+        snapshot->_gc_finlist_root_idx // to
+    };
+    serialize_edge(snapshot, root_to_gc_finlist_roots);
 }
 
 // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597
 // returns the index of the new node
 size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
 {
-    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size()));
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes));
     if (!val.second) {
         return val.first->second;
     }
@@ -192,7 +325,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
 
     if (jl_is_string(a)) {
         node_type = "String";
-        name = jl_string_data(a);
+        name = gc_heap_snapshot_redact_data ? "<redacted>" : jl_string_data(a);
         self_size = jl_string_len(a);
     }
     else if (jl_is_symbol(a)) {
@@ -241,21 +374,21 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
         ios_mem(&str_, 0);
         JL_STREAM* str = (JL_STREAM*)&str_;
         jl_static_show(str, (jl_value_t*)type);
-
+        node_type = StringRef((const char*)str_.buf, str_.size);
         name = StringRef((const char*)str_.buf, str_.size);
     }
 
-    g_snapshot->nodes.push_back(Node{
-        g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type;
-        g_snapshot->names.find_or_create_string_id(name), // size_t name;
+    auto node = Node{
+        (uint8_t)g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type;
+        g_snapshot->names.serialize(g_snapshot->strings, name), // size_t name;
         (size_t)a,     // size_t id;
         // We add 1 to self-size for the type tag that all heap-allocated objects have.
         // Also because the Chrome Snapshot viewer ignores size-0 leaves!
         sizeof(void*) + self_size, // size_t self_size;
         0,             // size_t trace_node_id (unused)
         0,             // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
-        vector<Edge>() // outgoing edges
-    });
+    };
+    serialize_node(g_snapshot, node);
 
     if (ios_need_close)
         ios_close(&str_);
@@ -265,36 +398,34 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
 
 static size_t record_pointer_to_gc_snapshot(void *a, size_t bytes, StringRef name) JL_NOTSAFEPOINT
 {
-    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size()));
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes));
     if (!val.second) {
         return val.first->second;
     }
 
-    g_snapshot->nodes.push_back(Node{
-        g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type;
-        g_snapshot->names.find_or_create_string_id(name), // size_t name;
+    auto node = Node{
+        (uint8_t)g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type;
+        g_snapshot->names.serialize(g_snapshot->strings, name), // size_t name;
         (size_t)a,     // size_t id;
         bytes,         // size_t self_size;
         0,             // size_t trace_node_id (unused)
         0,             // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
-        vector<Edge>() // outgoing edges
-    });
+    };
+    serialize_node(g_snapshot, node);
 
     return val.first->second;
 }
 
-static string _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT
+static SmallString<128> _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT
 {
-    string res;
+    SmallString<128> res;
     jl_datatype_t *objtype = (jl_datatype_t*)jl_typeof(obj);
 
     while (1) {
         int i = gc_slot_to_fieldidx(obj, slot, objtype);
 
         if (jl_is_tuple_type(objtype) || jl_is_namedtuple_type(objtype)) {
-            ostringstream ss;
-            ss << "[" << i << "]";
-            res += ss.str();
+            res += formatv("[{0}]", i).sstr<8>();
         }
         else {
             jl_svec_t *field_names = jl_field_names(objtype);
@@ -314,16 +445,28 @@ static string _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT
     }
 }
 
-
 void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
 {
-    record_node_to_gc_snapshot(root);
+    size_t to_node_idx = record_node_to_gc_snapshot(root);
+    auto edge_label = g_snapshot->names.serialize(g_snapshot->strings, name);
 
-    auto &internal_root = g_snapshot->nodes.front();
-    auto to_node_idx = g_snapshot->node_ptr_to_index_map[root];
-    auto edge_label = g_snapshot->names.find_or_create_string_id(name);
+    _record_gc_just_edge("internal", g_snapshot->internal_root_idx, to_node_idx, edge_label);
+}
+
+void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT
+{
+    auto to_node_idx = record_node_to_gc_snapshot(root);
+    auto edge_label = g_snapshot->names.serialize(g_snapshot->strings, name);
 
-    _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label);
+    _record_gc_just_edge("internal", g_snapshot->_gc_root_idx, to_node_idx, edge_label);
+}
+
+void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEPOINT
+{
+    auto to_node_idx = record_node_to_gc_snapshot(obj);
+    SmallString<16> ss = formatv("finlist-{0}", index);
+    auto edge_label = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, ss);
+    _record_gc_just_edge("internal", g_snapshot->_gc_finlist_root_idx, to_node_idx, edge_label);
 }
 
 // Add a node to the heap snapshot representing a Julia stack frame.
@@ -332,20 +475,20 @@ void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
 // Stack frame nodes point at the objects they have as local variables.
 size_t _record_stack_frame_node(HeapSnapshot *snapshot, void *frame) JL_NOTSAFEPOINT
 {
-    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->nodes.size()));
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->num_nodes));
     if (!val.second) {
         return val.first->second;
     }
 
-    snapshot->nodes.push_back(Node{
-        snapshot->node_types.find_or_create_string_id("synthetic"),
-        snapshot->names.find_or_create_string_id("(stack frame)"), // name
+    auto node = Node{
+        (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "(stack frame)"), // name
         (size_t)frame, // id
         1, // size
         0, // size_t trace_node_id (unused)
         0, // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
-        vector<Edge>() // outgoing edges
-    });
+    };
+    serialize_node(snapshot, node);
 
     return val.first->second;
 }
@@ -354,30 +497,27 @@ void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) J
 {
     auto from_node_idx = _record_stack_frame_node(g_snapshot, (jl_gcframe_t*)from);
     auto to_idx = record_node_to_gc_snapshot(to);
-    Node &from_node = g_snapshot->nodes[from_node_idx];
 
-    auto name_idx = g_snapshot->names.find_or_create_string_id("local var");
-    _record_gc_just_edge("internal", from_node, to_idx, name_idx);
+    auto name_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "local var");
+    _record_gc_just_edge("internal", from_node_idx, to_idx, name_idx);
 }
 
 void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT
 {
     auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from);
     auto to_node_idx = _record_stack_frame_node(g_snapshot, to);
-    Node &from_node = g_snapshot->nodes[from_node_idx];
 
-    auto name_idx = g_snapshot->names.find_or_create_string_id("stack");
-    _record_gc_just_edge("internal", from_node, to_node_idx, name_idx);
+    auto name_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "stack");
+    _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx);
 }
 
 void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT
 {
     auto from_node_idx = _record_stack_frame_node(g_snapshot, from);
     auto to_node_idx = _record_stack_frame_node(g_snapshot, to);
-    Node &from_node = g_snapshot->nodes[from_node_idx];
 
-    auto name_idx = g_snapshot->names.find_or_create_string_id("next frame");
-    _record_gc_just_edge("internal", from_node, to_node_idx, name_idx);
+    auto name_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "next frame");
+    _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx);
 }
 
 void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT
@@ -387,64 +527,61 @@ void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_
 
 void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void *slot) JL_NOTSAFEPOINT
 {
-    string path = _fieldpath_for_slot(from, slot);
+    SmallString<128> path = _fieldpath_for_slot(from, slot);
     _record_gc_edge("property", from, to,
-                    g_snapshot->names.find_or_create_string_id(path));
+                    g_snapshot->names.serialize_if_necessary(g_snapshot->strings, path));
 }
 
-void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_binding_t *binding) JL_NOTSAFEPOINT
+void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT
 {
-    jl_globalref_t *globalref = binding->globalref;
-    jl_sym_t *name = globalref->name;
     auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)module);
-    auto to_node_idx = record_pointer_to_gc_snapshot(binding, sizeof(jl_binding_t), jl_symbol_name(name));
+    auto to_bindings_idx = record_node_to_gc_snapshot(bindings);
+    auto to_bindingkeyset_idx = record_node_to_gc_snapshot(bindingkeyset);
 
-    jl_value_t *value = jl_atomic_load_relaxed(&binding->value);
-    auto value_idx = value ? record_node_to_gc_snapshot(value) : 0;
-    jl_value_t *ty = jl_atomic_load_relaxed(&binding->ty);
-    auto ty_idx = ty ? record_node_to_gc_snapshot(ty) : 0;
-    auto globalref_idx = record_node_to_gc_snapshot((jl_value_t*)globalref);
-
-    auto &from_node = g_snapshot->nodes[from_node_idx];
-    auto &to_node = g_snapshot->nodes[to_node_idx];
-
-    _record_gc_just_edge("property", from_node, to_node_idx, g_snapshot->names.find_or_create_string_id("<native>"));
-    if (value_idx)     _record_gc_just_edge("internal", to_node, value_idx, g_snapshot->names.find_or_create_string_id("value"));
-    if (ty_idx)        _record_gc_just_edge("internal", to_node, ty_idx, g_snapshot->names.find_or_create_string_id("ty"));
-    if (globalref_idx) _record_gc_just_edge("internal", to_node, globalref_idx, g_snapshot->names.find_or_create_string_id("globalref"));
-}
+    if (to_bindings_idx > 0) {
+        _record_gc_just_edge("internal", from_node_idx, to_bindings_idx, g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "bindings"));
+    }
+    if (to_bindingkeyset_idx > 0) {
+        _record_gc_just_edge("internal", from_node_idx, to_bindingkeyset_idx, g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "bindingkeyset"));
+    }
+ }
 
 void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
 {
     _record_gc_edge("internal", from, to,
-                    g_snapshot->names.find_or_create_string_id("<internal>"));
+                    g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<internal>"));
+}
+
+void _gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    _record_gc_edge("binding", from, to,
+                    g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<binding>"));
 }
 
+
 void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
 {
-    size_t name_or_idx = g_snapshot->names.find_or_create_string_id("<native>");
+    // valid alloc_type values are 0, 1, 2
+    assert(alloc_type <= 2);
+    size_t name_or_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<native>");
 
     auto from_node_idx = record_node_to_gc_snapshot(from);
-    const char *alloc_kind;
+    const char *alloc_kind = NULL;
     switch (alloc_type)
     {
     case 0:
-        alloc_kind = "<malloc>";
+        alloc_kind = "<generic memory - malloc>";
         break;
     case 1:
-        alloc_kind = "<pooled>";
+        alloc_kind = "<generic memory - pool alloc>";
         break;
     case 2:
-        alloc_kind = "<inline>";
-        break;
-    default:
-        alloc_kind = "<undef>";
+        alloc_kind = "<generic memory - inline alloc>";
         break;
     }
     auto to_node_idx = record_pointer_to_gc_snapshot(to, bytes, alloc_kind);
-    auto &from_node = g_snapshot->nodes[from_node_idx];
 
-    _record_gc_just_edge("hidden", from_node, to_node_idx, name_or_idx);
+    _record_gc_just_edge("hidden", from_node_idx, to_node_idx, name_or_idx);
 }
 
 static inline void _record_gc_edge(const char *edge_type, jl_value_t *a,
@@ -453,84 +590,51 @@ static inline void _record_gc_edge(const char *edge_type, jl_value_t *a,
     auto from_node_idx = record_node_to_gc_snapshot(a);
     auto to_node_idx = record_node_to_gc_snapshot(b);
 
-    auto &from_node = g_snapshot->nodes[from_node_idx];
-
-    _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx);
+    _record_gc_just_edge(edge_type, from_node_idx, to_node_idx, name_or_idx);
 }
 
-void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT
+void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT
 {
-    from_node.edges.push_back(Edge{
-        g_snapshot->edge_types.find_or_create_string_id(edge_type),
+    auto edge = Edge{
+        (uint8_t)g_snapshot->edge_types.find_or_create_string_id(edge_type),
         name_or_idx, // edge label
+        from_idx, // from
         to_idx // to
-    });
+    };
 
-    g_snapshot->num_edges += 1;
+    serialize_edge(g_snapshot, edge);
 }
 
-void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one)
+void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one)
 {
     // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567
-    ios_printf(stream, "{\"snapshot\":{");
-    ios_printf(stream, "\"meta\":{");
-    ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],");
-    ios_printf(stream, "\"node_types\":[");
-    snapshot.node_types.print_json_array(stream, false);
-    ios_printf(stream, ",");
-    ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],");
-    ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],");
-    ios_printf(stream, "\"edge_types\":[");
-    snapshot.edge_types.print_json_array(stream, false);
-    ios_printf(stream, ",");
-    ios_printf(stream, "\"string_or_number\",\"from_node\"]");
-    ios_printf(stream, "},\n"); // end "meta"
-    ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size());
-    ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges);
-    ios_printf(stream, "},\n"); // end "snapshot"
-
-    ios_printf(stream, "\"nodes\":[");
-    bool first_node = true;
-    for (const auto &from_node : snapshot.nodes) {
-        if (first_node) {
-            first_node = false;
-        }
-        else {
-            ios_printf(stream, ",");
-        }
-        // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"]
-        ios_printf(stream, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n",
-                            from_node.type,
-                            from_node.name,
-                            from_node.id,
-                            all_one ? (size_t)1 : from_node.self_size,
-                            from_node.edges.size(),
-                            from_node.trace_node_id,
-                            from_node.detachedness);
-    }
-    ios_printf(stream, "],\n");
-
-    ios_printf(stream, "\"edges\":[");
-    bool first_edge = true;
-    for (const auto &from_node : snapshot.nodes) {
-        for (const auto &edge : from_node.edges) {
-            if (first_edge) {
-                first_edge = false;
-            }
-            else {
-                ios_printf(stream, ",");
-            }
-            ios_printf(stream, "%zu,%zu,%zu\n",
-                                edge.type,
-                                edge.name_or_index,
-                                edge.to_node * k_node_number_of_fields);
-        }
-    }
-    ios_printf(stream, "],\n"); // end "edges"
-
-    ios_printf(stream, "\"strings\":");
-
-    snapshot.names.print_json_array(stream, true);
-
-    ios_printf(stream, "}");
+    // also https://github.com/microsoft/vscode-v8-heap-tools/blob/c5b34396392397925ecbb4ecb904a27a2754f2c1/v8-heap-parser/src/decoder.rs#L43-L51
+    ios_printf(json, "{\"snapshot\":{\n");
+
+    ios_printf(json, "  \"meta\":{\n");
+    ios_printf(json, "    \"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],\n");
+    ios_printf(json, "    \"node_types\":[");
+    snapshot.node_types.print_json_array(json, false);
+    ios_printf(json, ",");
+    ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],\n");
+    ios_printf(json, "    \"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],\n");
+    ios_printf(json, "    \"edge_types\":[");
+    snapshot.edge_types.print_json_array(json, false);
+    ios_printf(json, ",");
+    ios_printf(json, "\"string_or_number\",\"from_node\"],\n");
+    // not used. Required by microsoft/vscode-v8-heap-tools
+    ios_printf(json, "    \"trace_function_info_fields\":[\"function_id\",\"name\",\"script_name\",\"script_id\",\"line\",\"column\"],\n");
+    ios_printf(json, "    \"trace_node_fields\":[\"id\",\"function_info_index\",\"count\",\"size\",\"children\"],\n");
+    ios_printf(json, "    \"sample_fields\":[\"timestamp_us\",\"last_assigned_id\"],\n");
+    ios_printf(json, "    \"location_fields\":[\"object_index\",\"script_id\",\"line\",\"column\"]\n");
+    // end not used
+    ios_printf(json, "  },\n"); // end "meta"
+
+    ios_printf(json, "  \"node_count\":%zu,\n", snapshot.num_nodes);
+    ios_printf(json, "  \"edge_count\":%zu,\n", snapshot.num_edges);
+    ios_printf(json, "  \"trace_function_count\":0\n"); // not used. Required by microsoft/vscode-v8-heap-tools
+    ios_printf(json, "}\n"); // end "snapshot"
+
+    // this } is removed by the julia reassembler in Profile
+    ios_printf(json, "}");
 }
diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h
index 8c3af5b86bec7..dc5b22bb72eb1 100644
--- a/src/gc-heap-snapshot.h
+++ b/src/gc-heap-snapshot.h
@@ -20,7 +20,7 @@ void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_N
 void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT;
 void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT;
 void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT;
-void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT;
 // Used for objects managed by GC, but which aren't exposed in the julia object, so have no
 // field or index.  i.e. they're not reachable from julia code, but we _will_ hit them in
 // the GC mark phase (so we can check their type tag to get the size).
@@ -28,10 +28,16 @@ void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *
 // Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the
 // size of the object, even though we're never going to mark that object.
 void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT;
-
+// Used for objects that are reachable from the GC roots
+void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT;
+// Used for objects that are reachable from the finalizer list
+void _gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT;
+// Used for objects reachable from the binding partition pointer union
+void _gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT;
 
 extern int gc_heap_snapshot_enabled;
 extern int prev_sweep_full;
+extern jl_mutex_t heapsnapshot_lock;
 
 int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
 int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
@@ -60,6 +66,12 @@ static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL
         _gc_heap_snapshot_record_root(root, name);
     }
 }
+static inline void gc_heap_snapshot_record_array_edge_index(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && from != NULL && to != NULL)) {
+        _gc_heap_snapshot_record_array_edge(from, to, index);
+    }
+}
 static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t **to) JL_NOTSAFEPOINT
 {
     if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
@@ -73,10 +85,10 @@ static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_valu
     }
 }
 
-static inline void gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT
+static inline void gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT
 {
-    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
-        _gc_heap_snapshot_record_module_to_binding(module, binding);
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full) && bindings != NULL && bindingkeyset != NULL) {
+        _gc_heap_snapshot_record_module_to_binding(module, bindings, bindingkeyset);
     }
 }
 
@@ -87,6 +99,13 @@ static inline void gc_heap_snapshot_record_internal_array_edge(jl_value_t *from,
     }
 }
 
+static inline void gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_binding_partition_edge(from, to);
+    }
+}
+
 static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
 {
     if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
@@ -94,10 +113,25 @@ static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* t
     }
 }
 
+static inline void gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && root != NULL)) {
+        _gc_heap_snapshot_record_gc_roots(root, name);
+    }
+}
+
+static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && finlist != NULL)) {
+        _gc_heap_snapshot_record_finlist(finlist, index);
+    }
+}
+
 // ---------------------------------------------------------------------
 // Functions to call from Julia to take heap snapshot
 // ---------------------------------------------------------------------
-JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one);
+JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
+    ios_t *strings, ios_t *json, char all_one, char redact_data);
 
 
 #ifdef __cplusplus
diff --git a/src/gc-interface.h b/src/gc-interface.h
new file mode 100644
index 0000000000000..7905270b91795
--- /dev/null
+++ b/src/gc-interface.h
@@ -0,0 +1,314 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  Garbage Collection interface that must be implemented by third-party GCs
+*/
+
+#ifndef JL_GC_INTERFACE_H
+#define JL_GC_INTERFACE_H
+
+#include "dtypes.h"
+#include "julia_atomics.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct _jl_tls_states_t;
+struct _jl_value_t;
+struct _jl_weakref_t;
+struct _jl_datatype_t;
+struct _jl_genericmemory_t;
+
+// ========================================================================= //
+// GC Metrics
+// ========================================================================= //
+
+// This struct must be kept in sync with the Julia type of the same name in base/timing.jl
+typedef struct {
+    // (GC Internal) Number of allocated bytes since the last collection. This field is reset
+    // after the end of every garbage collection cycle, so it will always be zero if observed
+    // during execution of Julia user code
+    int64_t allocd;
+    // (GC Internal) Number of allocated bytes within a `gc_disable/gc_enable` block. This field is
+    // reset after every garbage collection cycle and will always be zero in case of no use
+    // of `gc_disable/gc_enable` blocks
+    int64_t deferred_alloc;
+    // (GC Internal) Number of bytes freed bytes in the current collection cycle. This field is
+    // reset after every garbage collection cycle and will always be zero when observed
+    // during execution of Julia user code. It's incremented as memory is reclaimed during a collection,
+    // used to gather some statistics within the collection itself and reset at the end of a GC cycle.
+    int64_t freed;
+    // Number of `malloc/calloc` calls (never reset by the runtime)
+    uint64_t malloc;
+    // Number of `realloc` calls (never reset by the runtime)
+    uint64_t realloc;
+    // Number of pool allocation calls (never reset by the runtime)
+    // NOTE: Julia's stock GC uses an internal (pool) allocator for objects up to 2032 bytes.
+    // Larger objects are allocated through `malloc/calloc`.
+    uint64_t poolalloc;
+    // Number of allocations for "big objects" (non-array objects larger than 2032 bytes)
+    // (never reset by the runtime)
+    uint64_t bigalloc;
+    // Number of `free` calls (never reset by the runtime)
+    uint64_t freecall;
+    // Total time spent in garbage collection (never reset by the runtime)
+    uint64_t total_time;
+    // (GC internal) Total number of bytes allocated since the program started
+    uint64_t total_allocd;
+    // (GC internal) Per-thread allocation quota before triggering a GC
+    // NOTE: This field is no longer used by the heuristics in the stock GC
+    size_t interval;
+    // Duration of the last GC pause in nanoseconds
+    int pause;
+    // Number of full GC sweeps completed so far (never reset by the runtime)
+    int full_sweep;
+    // Maximum pause duration observed so far in nanoseconds
+    uint64_t max_pause;
+    // Maximum number of bytes allocated any point in time.
+    // NOTE: This is aggregated over objects, not pages
+    uint64_t max_memory;
+    // Time taken to reach a safepoint in the last GC cycle in nanoseconds
+    uint64_t time_to_safepoint;
+    // Maximum time taken to reach a safepoint across all GCs in nanoseconds
+    uint64_t max_time_to_safepoint;
+    // Total time taken to reach safepoints across all GCs in nanoseconds
+    uint64_t total_time_to_safepoint;
+    // Time spent in the last GC sweeping phase in nanoseconds
+    uint64_t sweep_time;
+    // Time spent in the last GC marking phase in nanoseconds
+    uint64_t mark_time;
+    // Time spent sweeping stack pools in the last GC in nanoseconds
+    uint64_t stack_pool_sweep_time;
+    // Total time spent in sweeping phase across all GCs in nanoseconds
+    uint64_t total_sweep_time;
+    // Total time spent walking pool allocated pages during sweeping phase across all GCs in nanoseconds
+    uint64_t total_sweep_page_walk_time;
+    // Total time spent in madvise calls during sweeping phase across all GCs in nanoseconds
+    uint64_t total_sweep_madvise_time;
+    // Total time spent in freeing malloc'd memory during sweeping phase across all GCs in nanoseconds
+    uint64_t total_sweep_free_mallocd_memory_time;
+    // Total time spent in marking phase across all GCs in nanoseconds
+    uint64_t total_mark_time;
+    // Total time spent sweeping stack pools across all GCs in nanoseconds
+    uint64_t total_stack_pool_sweep_time;
+    // Timestamp of the last full GC sweep in nanoseconds
+    uint64_t last_full_sweep;
+    // Timestamp of the last incremental GC sweep in nanoseconds
+    uint64_t last_incremental_sweep;
+} jl_gc_num_t;
+
+// ========================================================================= //
+// System-wide Initialization
+// ========================================================================= //
+
+// System-wide initialization function. Responsible for initializing global locks as well as
+// global memory parameters (e.g. target heap size) used by the collector.
+void jl_gc_init(void);
+// Spawns GC threads.
+void jl_start_gc_threads(void);
+
+// ========================================================================= //
+// Per-thread Initialization
+// ========================================================================= //
+
+// Initializes thread-local data structures such as thread-local object pools,
+// thread-local remembered sets and thread-local allocation counters.
+// Should be called exactly once per Julia thread.
+void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT;
+// Deallocates any memory previously used for thread-local GC data structures.
+// Mostly used to ensure that we perform this memory cleanup for foreign threads that are
+// about to leave Julia.
+void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls);
+
+// ========================================================================= //
+// Controls
+// ========================================================================= //
+
+typedef enum {
+    JL_GC_AUTO = 0, // use heuristics to determine the collection type
+    JL_GC_FULL = 1, // force a full collection
+    JL_GC_INCREMENTAL = 2, // force an incremental collection
+} jl_gc_collection_t;
+// Enables or disables (depending on the value of the argument) the collector. Returns
+// whether GC was previously enabled.
+JL_DLLEXPORT int jl_gc_enable(int on);
+// Returns whether the collector is enabled.
+JL_DLLEXPORT int jl_gc_is_enabled(void);
+// Sets a soft limit to Julia's heap.
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
+// Runs a GC cycle. This function's parameter determines whether we're running an
+// incremental, full, or automatic (i.e. heuristic driven) collection.
+JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection);
+// Returns whether the thread with `tid` is a collector thread
+JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT;
+// Returns which GC implementation is being used and possibly its version according to the list of supported GCs
+// NB: it should clearly identify the GC by including e.g. ‘stock’ or ‘mmtk’ as a substring.
+JL_DLLEXPORT const char* jl_gc_active_impl(void);
+// Sweep Julia's stack pools and mtarray buffers. Note that this function has been added to the interface as
+// each GC should implement it but it will most likely not be used by other code in the runtime.
+// It still needs to be annotated with JL_DLLEXPORT since it is called from Rust by MMTk.
+JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT;
+
+// ========================================================================= //
+// Metrics
+// ========================================================================= //
+
+// Retrieves Julia's `GC_Num` (structure that stores GC statistics).
+JL_DLLEXPORT jl_gc_num_t jl_gc_num(void);
+// Returns the difference between the current value of total live bytes now
+// (live bytes at the last collection plus number of bytes allocated since then),
+// compared to the value at the last time this function was called.
+JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
+// Returns the difference between the current value of total live bytes now
+// (live bytes at the last collection plus number of bytes allocated since then)
+// compared to the value at the last time this function was called. The offset parameter
+// is subtracted from this value in order to obtain the return value.
+JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
+// Returns the number of pool allocated bytes. This could always return 0 for GC
+// implementations that do not use pools.
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void);
+// Returns the number of live bytes at the end of the last collection cycle
+// (doesn't include the number of allocated bytes since then).
+JL_DLLEXPORT int64_t jl_gc_live_bytes(void);
+// Stores the number of live bytes at the end of the last collection cycle plus the number
+// of bytes we allocated since then into the 64-bit integer pointer passed as an argument.
+JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT;
+// Retrieves the value of Julia's soft heap limit.
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
+// High-resolution (nano-seconds) value of total time spent in GC.
+JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void);
+
+// ========================================================================= //
+// Allocation
+// ========================================================================= //
+
+// On GCC, this function is inlined when sz is constant (see julia_internal.h)
+// In general, this function should implement allocation and should use the specific GC's logic
+// to decide whether to allocate a small or a large object. Finally, note that this function
+// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record
+// an allocation of that type in the allocation profiler.
+struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty);
+// Allocates small objects and increments Julia allocation counterst. Size of the object
+// header must be included in the object size. The (possibly unused in some implementations)
+// offset to the arena in which we're allocating is passed in the second parameter, and the
+// object size in the third parameter. If thread-local allocators are used, then this
+// function should allocate in the thread-local allocator of the thread referenced by the
+// jl_ptls_t argument. An additional (last) parameter containing information about the type
+// of the object being allocated may be used to record an allocation of that type in the
+// allocation profiler.
+JL_DLLEXPORT struct _jl_value_t *jl_gc_small_alloc(struct _jl_tls_states_t *ptls,
+                                                   int offset, int osize,
+                                                   struct _jl_value_t *type);
+// Description: Allocates large objects and increments Julia allocation counters. Size of
+// the object header must be included in the object size. If thread-local allocators are
+// used, then this function should allocate in the thread-local allocator of the thread
+// referenced by the jl_ptls_t argument. An additional (last) parameter containing
+// information about the type of the object being allocated may be used to record an
+// allocation of that type in the allocation profiler.
+JL_DLLEXPORT struct _jl_value_t *jl_gc_big_alloc(struct _jl_tls_states_t *ptls, size_t sz,
+                                                 struct _jl_value_t *type);
+// Wrapper around Libc malloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
+// Wrapper around Libc calloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz);
+// Wrapper around Libc free that updates Julia allocation counters.
+JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz);
+// Wrapper around Libc realloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz);
+// Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and
+// Strings. It increments Julia allocation counters and should check whether we're close to
+// the Julia heap target, and therefore, whether we should run a collection. Note that this
+// doesn't record the size of the allocation request in a side metadata (i.e. a few words in
+// front of the memory payload): this function is used for Julia object allocations, and we
+// assume that there is already a field in the Julia object being allocated that we may use
+// to store the size of the memory buffer.
+JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
+// Allocates a new weak-reference, assigns its value and increments Julia allocation
+// counters. If thread-local allocators are used, then this function should allocate in the
+// thread-local allocator of the thread referenced by the first jl_ptls_t argument.
+JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls,
+                                                        struct _jl_value_t *value);
+// Permanently allocates a memory slot of the size specified by the first parameter. This
+// block of memory is allocated in an immortal region that is never swept. The second
+// parameter specifies whether the memory should be filled with zeros. The third and fourth
+// parameters specify the alignment and an offset in bytes, respectively. Specifically, the
+// pointer obtained by advancing the result of this function by the number of bytes
+// specified in the fourth parameter will be aligned according to the value given by the
+// third parameter in bytes.
+JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align,
+                                    unsigned offset) JL_NOTSAFEPOINT;
+// Permanently allocates an object of the size specified by the first parameter. Size of the
+// object header must be included in the object size. This object is allocated in an
+// immortal region that is never swept. The second parameter specifies the type of the
+// object being allocated and will be used to set the object header.
+// If the value passed as alignment is 0, then the result will be aligned according to the object
+// size: if sz is 0 it will be aligned to pointer size, to 2x pointer size if sz < 2*sizeof(void*),
+// or to 16 otherwise.
+//
+// !!! warning: Because permanently allocated objects are not swept, the GC will not
+//              necessarily mark any objects that would have ordinarily been rooted by
+//              the allocated object. All objects stored in fields of this object
+//              must be either permanently allocated or have other roots.
+struct _jl_value_t *jl_gc_permobj(struct _jl_tls_states_t *ptls, size_t sz, void *ty, unsigned align) JL_NOTSAFEPOINT;
+// This function notifies the GC about memory addresses that are set when loading the boot image.
+// The GC may use that information to, for instance, determine that such objects should
+// be treated as marked and belonged to the old generation in nursery collections.
+void jl_gc_notify_image_load(const char* img_data, size_t len);
+
+// ========================================================================= //
+// Runtime Write-Barriers
+// ========================================================================= //
+
+// Write barrier slow-path. If a generational collector is used,
+// it may enqueue an old object into the remembered set of the calling thread.
+JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
+// In a generational collector is used, this function walks over the fields of the
+// object specified by the second parameter (as defined by the data type in the third
+// parameter). If a field points to a young object, the first parameter is enqueued into the
+// remembered set of the calling thread.
+JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
+                                        struct _jl_datatype_t *dt) JL_NOTSAFEPOINT;
+// If a generational collector is used, checks whether the function argument points to an
+// old object, and if so, calls the write barrier slow path above. In most cases, this
+// function is used when its caller has verified that there is a young reference in the
+// object that's being passed as an argument to this function.
+STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT;
+// Write barrier function that must be used after pointer writes to heap-allocated objects –
+// the value of the field being written must also point to a heap-allocated object.
+// If a generational collector is used, it may check whether the two function arguments are
+// in different GC generations (i.e. if the first argument points to an old object and the
+// second argument points to a young object), and if so, call the write barrier slow-path.
+STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT;
+// Freshly allocated objects are known to be in the young generation until the next safepoint,
+// so write barriers can be omitted until the next allocation. This function is a no-op that
+// can be used to annotate that a write barrier would be required were it not for this property
+// (as opposed to somebody just having forgotten to think about write barriers).
+STATIC_INLINE void jl_gc_wb_fresh(const void *parent JL_UNUSED, const void *ptr JL_UNUSED) JL_NOTSAFEPOINT {}
+// Used to annotate that a write barrier would be required, but may be omitted because `ptr`
+// is known to be an old object.
+STATIC_INLINE void jl_gc_wb_knownold(const void *parent JL_UNUSED, const void *ptr JL_UNUSED) JL_NOTSAFEPOINT {}
+// Write-barrier function that must be used after copying multiple fields of an object into
+// another. It should be semantically equivalent to triggering multiple write barriers – one
+// per field of the object being copied, but may be special-cased for performance reasons.
+STATIC_INLINE void jl_gc_multi_wb(const void *parent,
+                                  const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
+
+// Write-barrier function that must be used after copying fields of elements of genericmemory objects
+// into another. It should be semantically equivalent to triggering multiple write barriers – one
+// per field of the object being copied, but may be special-cased for performance reasons.
+STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const struct _jl_value_t *owner, struct _jl_genericmemory_t *src, char* src_p,
+                                          size_t n, struct _jl_datatype_t *dt) JL_NOTSAFEPOINT;
+
+// Similar to jl_gc_wb_genericmemory_copy but must be used when copying *boxed* elements of a genericmemory
+// object. Note that this barrier also performs the copying unlike jl_gc_wb_genericmemory_copy_ptr.
+// The parameters src_p, dest_p and n will be modified and will contain information about
+// the *uncopied* data after performing this barrier, and will be copied using memmove_refs.
+STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const struct _jl_value_t *owner, _Atomic(void*) * dest_p,
+                                          struct _jl_genericmemory_t *src, _Atomic(void*) * src_p,
+                                          size_t* n) JL_NOTSAFEPOINT;
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c
new file mode 100644
index 0000000000000..be773a8e625c5
--- /dev/null
+++ b/src/gc-mmtk.c
@@ -0,0 +1,1221 @@
+#include "gc-common.h"
+#include "gc-tls-mmtk.h"
+#include "mmtkMutator.h"
+#include "threading.h"
+
+#ifdef _COMPILER_TSAN_ENABLED_
+#include <sanitizer/tsan_interface.h>
+#endif
+
+// File exists in the binding
+#include "mmtk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// ========================================================================= //
+// Julia specific
+// ========================================================================= //
+
+extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
+extern const unsigned pool_sizes[];
+extern jl_mutex_t finalizers_lock;
+
+// FIXME: Should the values below be shared between both GC's?
+// Note that MMTk uses a hard max heap limit, which is set by default
+// as 70% of the free available memory. The min heap is set as the
+// default_collect_interval variable below.
+
+// max_total_memory is a suggestion.  We try very hard to stay
+// under this limit, but we will go above it rather than halting.
+#ifdef _P64
+typedef uint64_t memsize_t;
+static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
+// We expose this to the user/ci as jl_gc_set_max_memory
+static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
+#else
+typedef uint32_t memsize_t;
+static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
+// Work really hard to stay within 2GB
+// Alternative is to risk running out of address space
+// on 32 bit architectures.
+#define MAX32HEAP 1536 * 1024 * 1024
+static memsize_t max_total_memory = (memsize_t) MAX32HEAP;
+#endif
+
+// ========================================================================= //
+// Defined by the binding
+// ========================================================================= //
+
+extern void mmtk_julia_copy_stack_check(int copy_stack);
+extern void mmtk_gc_init(uintptr_t min_heap_size, uintptr_t max_heap_size, uintptr_t n_gcthreads, uintptr_t header_size, uintptr_t tag);
+extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
+extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);
+extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator);
+extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator);
+extern void mmtk_store_obj_size_c(void* obj, size_t size);
+extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
+extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS;
+
+// ========================================================================= //
+// GC Initialization and Control
+// ========================================================================= //
+
+void jl_gc_init(void) {
+    // TODO: use jl_options.heap_size_hint to set MMTk's fixed heap size? (see issue: https://github.com/mmtk/mmtk-julia/issues/167)
+    JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
+
+    arraylist_new(&to_finalize, 0);
+    arraylist_new(&finalizer_list_marked, 0);
+    gc_num.interval = default_collect_interval;
+    gc_num.allocd = 0;
+    gc_num.max_pause = 0;
+    gc_num.max_memory = 0;
+
+    // Necessary if we want to use Julia heap resizing heuristics
+    uint64_t mem_reserve = 250*1024*1024; // LLVM + other libraries need some amount of memory
+    uint64_t min_heap_size_hint = mem_reserve + 1*1024*1024;
+    uint64_t hint = jl_options.heap_size_hint;
+
+    // check if heap size specified on command line
+    if (jl_options.heap_size_hint == 0) {
+        char *cp = getenv(HEAP_SIZE_HINT);
+        if (cp)
+            hint = parse_heap_size_option(cp, "JULIA_HEAP_SIZE_HINT=\"<size>[<unit>]\"", 1);
+    }
+#ifdef _P64
+    if (hint == 0) {
+        uint64_t constrained_mem = uv_get_constrained_memory();
+        if (constrained_mem > 0 && constrained_mem < uv_get_total_memory())
+            hint = constrained_mem;
+    }
+#endif
+    if (hint) {
+        if (hint < min_heap_size_hint)
+            hint = min_heap_size_hint;
+        jl_gc_set_max_memory(hint - mem_reserve);
+    }
+
+    // MMTK supports setting the heap size using the
+    // MMTK_MIN_HSIZE and MMTK_MAX_HSIZE environment variables
+    long long min_heap_size;
+    long long max_heap_size;
+    char* min_size_def = getenv("MMTK_MIN_HSIZE");
+    char* min_size_gb = getenv("MMTK_MIN_HSIZE_G");
+
+    char* max_size_def = getenv("MMTK_MAX_HSIZE");
+    char* max_size_gb = getenv("MMTK_MAX_HSIZE_G");
+
+    // If min and max values are not specified, set them to 0 here
+    // and use stock heuristics as defined in the binding
+    if (min_size_def != NULL) {
+        char *p;
+        double min_size = strtod(min_size_def, &p);
+        min_heap_size = (long) 1024 * 1024 * min_size;
+    } else if (min_size_gb != NULL) {
+        char *p;
+        double min_size = strtod(min_size_gb, &p);
+        min_heap_size = (long) 1024 * 1024 * 1024 * min_size;
+    } else {
+        min_heap_size = 0;
+    }
+
+    if (max_size_def != NULL) {
+        char *p;
+        double max_size = strtod(max_size_def, &p);
+        max_heap_size = (long) 1024 * 1024 * max_size;
+    } else if (max_size_gb != NULL) {
+        char *p;
+        double max_size = strtod(max_size_gb, &p);
+        max_heap_size = (long) 1024 * 1024 * 1024 * max_size;
+    } else {
+        max_heap_size = 0;
+    }
+
+    // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads
+    assert(jl_n_gcthreads == 0);
+
+    // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined
+    int copy_stacks;
+
+#ifdef COPY_STACKS
+    copy_stacks = 1;
+#else
+    copy_stacks = 0;
+#endif
+
+    mmtk_julia_copy_stack_check(copy_stacks);
+
+    // if only max size is specified initialize MMTk with a fixed size heap
+    // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads.
+    // If the two values are the same, we can use either. Otherwise, we need to be careful.
+    uintptr_t gcthreads = jl_options.nmarkthreads;
+    if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) {
+        mmtk_gc_init(0, max_heap_size, gcthreads, (sizeof(jl_taggedvalue_t)), jl_buff_tag);
+    } else {
+        mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, (sizeof(jl_taggedvalue_t)), jl_buff_tag);
+    }
+}
+
+void jl_start_gc_threads(void) {
+    jl_ptls_t ptls = jl_current_task->ptls;
+    mmtk_initialize_collection((void *)ptls);
+}
+
+void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT {
+    jl_thread_heap_common_t *heap = &ptls->gc_tls_common.heap;
+    small_arraylist_new(&heap->weak_refs, 0);
+    small_arraylist_new(&heap->live_tasks, 0);
+    for (int i = 0; i < JL_N_STACK_POOLS; i++)
+        small_arraylist_new(&heap->free_stacks[i], 0);
+    small_arraylist_new(&heap->mallocarrays, 0);
+    arraylist_new(&ptls->finalizers, 0);
+    // Initialize `lazily_freed_mtarraylist_buffers`
+    small_arraylist_new(&ptls->lazily_freed_mtarraylist_buffers, 0);
+    // Clear the malloc sz count
+    jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0);
+    // Create mutator
+    MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid);
+    // Copy the mutator to the thread local storage
+    memcpy(&ptls->gc_tls.mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext));
+    // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed)
+    mmtk_post_bind_mutator(&ptls->gc_tls.mmtk_mutator, mmtk_mutator);
+    memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num));
+}
+
+void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls) {
+    mmtk_destroy_mutator(&ptls->gc_tls.mmtk_mutator);
+}
+
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) {
+#ifdef _P32
+    max_mem = max_mem < MAX32HEAP ? max_mem : MAX32HEAP;
+#endif
+    max_total_memory = max_mem;
+}
+
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
+{
+    // FIXME: We should return the max heap size set in MMTk
+    // when not using Julia's heap resizing heuristics
+    return max_total_memory;
+}
+
+STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
+{
+    // Just do a safe point for general maybe_collect
+    jl_gc_safepoint_(ptls);
+}
+
+// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll),
+// is expensive. So we only check for every few allocations.
+static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz)
+{
+    // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to
+    // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage
+    // as much as we can.
+    if (ptls->gc_tls.malloc_sz_since_last_poll > 4096) {
+        jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0);
+        mmtk_gc_poll(ptls);
+    } else {
+        size_t curr = jl_atomic_load_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll);
+        jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, curr + sz);
+        jl_gc_safepoint_(ptls);
+    }
+}
+
+// This is called when the user calls for a GC with Gc.gc()
+JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) {
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+        static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
+        jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
+        return;
+    }
+    mmtk_handle_user_collection_request(ptls, collection);
+}
+
+
+// Based on jl_gc_collect from gc-stock.c
+// called when stopping the thread in `mmtk_block_for_gc`
+JL_DLLEXPORT void jl_gc_prepare_to_collect(void)
+{
+    // FIXME: set to JL_GC_AUTO since we're calling it from mmtk
+    // maybe just remove this?
+    JL_PROBE_GC_BEGIN(JL_GC_AUTO);
+
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+        static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
+        jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
+        return;
+    }
+
+    int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state);
+    jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
+    // `jl_safepoint_start_gc()` makes sure only one thread can run the GC.
+    uint64_t t0 = jl_hrtime();
+    if (!jl_safepoint_start_gc(ct)) {
+        jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+        jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
+        return;
+    }
+
+    JL_TIMING_SUSPEND_TASK(GC, ct);
+    JL_TIMING(GC, GC);
+
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    // Now we are ready to wait for other threads to hit the safepoint,
+    // we can do a few things that doesn't require synchronization.
+    //
+    // We must sync here with the tls_lock operations, so that we have a
+    // seq-cst order between these events now we know that either the new
+    // thread must run into our safepoint flag or we must observe the
+    // existence of the thread in the jl_n_threads count.
+    //
+    // TODO: concurrently queue objects
+    jl_fence();
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    jl_gc_wait_for_the_world(gc_all_tls_states, gc_n_threads);
+    JL_PROBE_GC_STOP_THE_WORLD();
+
+    uint64_t t1 = jl_hrtime();
+    uint64_t duration = t1 - t0;
+    if (duration > gc_num.max_time_to_safepoint)
+        gc_num.max_time_to_safepoint = duration;
+    gc_num.time_to_safepoint = duration;
+    gc_num.total_time_to_safepoint += duration;
+
+    if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) {
+        JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock
+#ifndef __clang_gcanalyzer__
+        mmtk_block_thread_for_gc();
+#endif
+        JL_UNLOCK_NOGC(&finalizers_lock);
+    }
+
+    gc_n_threads = 0;
+    gc_all_tls_states = NULL;
+    jl_safepoint_end_gc();
+    jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+    JL_PROBE_GC_END();
+    jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
+
+    // Only disable finalizers on current thread
+    // Doing this on all threads is racy (it's impossible to check
+    // or wait for finalizers on other threads without dead lock).
+    if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
+        JL_TIMING(GC, GC_Finalizers);
+        run_finalizers(ct, 0);
+    }
+    JL_PROBE_GC_FINALIZER();
+
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+}
+
+// ========================================================================= //
+// GC Statistics
+// ========================================================================= //
+
+JL_DLLEXPORT const char* jl_gc_active_impl(void) {
+    const char* mmtk_version = get_mmtk_version();
+    return mmtk_version;
+}
+
+int64_t last_gc_total_bytes = 0;
+int64_t last_live_bytes = 0; // live_bytes at last collection
+int64_t live_bytes = 0;
+
+// FIXME: The functions combine_thread_gc_counts and reset_thread_gc_counts
+// are currently nearly identical for mmtk and for stock. However, the stats
+// are likely different (e.g., MMTk doesn't track the bytes allocated in the fastpath,
+// but only when the slowpath is called). We might need to adapt these later so that
+// the statistics are the same or as close as possible for each GC.
+static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls) {
+            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval);
+            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc);
+            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc);
+            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc);
+            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc);
+            dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc);
+            if (update_heap) {
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
+            }
+        }
+    }
+}
+
+void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls != NULL) {
+            // don't reset `pool_live_bytes` here
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
+        }
+    }
+}
+
+// Retrieves Julia's `GC_Num` (structure that stores GC statistics).
+JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) {
+    jl_gc_num_t num = gc_num;
+    combine_thread_gc_counts(&num, 0);
+    return num;
+}
+
+JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT {
+    int64_t oldtb = last_gc_total_bytes;
+    int64_t newtb;
+    jl_gc_get_total_bytes(&newtb);
+    last_gc_total_bytes = newtb;
+    return newtb - oldtb;
+}
+
+JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT
+{
+    int64_t oldtb = last_gc_total_bytes;
+    int64_t newtb;
+    jl_gc_get_total_bytes(&newtb);
+    last_gc_total_bytes = newtb - offset;
+    return newtb - oldtb;
+}
+
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) {
+    return 0;
+}
+
+void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+}
+
+void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT
+{
+}
+
+int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
+{
+    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
+    return live_bytes += inc;
+}
+
+void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
+{
+    combine_thread_gc_counts(&gc_num, 0);
+    inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
+    gc_num.allocd = 0;
+    gc_num.deferred_alloc = 0;
+    reset_thread_gc_counts();
+}
+
+JL_DLLEXPORT int64_t jl_gc_live_bytes(void) {
+    return last_live_bytes;
+}
+
+JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
+{
+    jl_gc_num_t num = gc_num;
+    combine_thread_gc_counts(&num, 0);
+    // Sync this logic with `base/util.jl:GC_Diff`
+    *bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
+}
+
+// These are needed to collect MMTk statistics from a Julia program using ccall
+JL_DLLEXPORT void (jl_mmtk_harness_begin)(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    mmtk_harness_begin(ptls);
+}
+
+JL_DLLEXPORT void (jl_mmtk_harness_end)(void)
+{
+    mmtk_harness_end();
+}
+
+// ========================================================================= //
+// Root Processing, Object Scanning and Julia-specific sweeping
+// ========================================================================= //
+
+static void add_node_to_roots_buffer(RootsWorkClosure* closure, RootsWorkBuffer* buf, size_t* buf_len, void* root) {
+    if (root == NULL)
+        return;
+
+    buf->ptr[*buf_len] = root;
+    *buf_len += 1;
+    if (*buf_len >= buf->cap) {
+        RootsWorkBuffer new_buf = (closure->report_nodes_func)(buf->ptr, *buf_len, buf->cap, closure->data, true);
+        *buf = new_buf;
+        *buf_len = 0;
+    }
+}
+
+static void add_node_to_tpinned_roots_buffer(RootsWorkClosure* closure, RootsWorkBuffer* buf, size_t* buf_len, void* root) {
+    if (root == NULL)
+        return;
+
+    buf->ptr[*buf_len] = root;
+    *buf_len += 1;
+    if (*buf_len >= buf->cap) {
+        RootsWorkBuffer new_buf = (closure->report_tpinned_nodes_func)(buf->ptr, *buf_len, buf->cap, closure->data, true);
+        *buf = new_buf;
+        *buf_len = 0;
+    }
+}
+
+JL_DLLEXPORT void jl_gc_scan_vm_specific_roots(RootsWorkClosure* closure)
+{
+    // Create a new buf
+    RootsWorkBuffer buf = (closure->report_nodes_func)((void**)0, 0, 0, closure->data, true);
+    size_t len = 0;
+
+    // add module
+    add_node_to_roots_buffer(closure, &buf, &len, jl_main_module);
+
+    // add global_method_table
+    add_node_to_roots_buffer(closure, &buf, &len, jl_method_table);
+
+    // buildin values
+    add_node_to_roots_buffer(closure, &buf, &len, jl_an_empty_vec_any);
+    add_node_to_roots_buffer(closure, &buf, &len, jl_module_init_order);
+    for (size_t i = 0; i < jl_current_modules.size; i += 2) {
+        if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
+            add_node_to_roots_buffer(closure, &buf, &len, jl_current_modules.table[i]);
+        }
+    }
+    add_node_to_roots_buffer(closure, &buf, &len, jl_anytuple_type_type);
+    for (size_t i = 0; i < N_CALL_CACHE; i++) {
+         jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
+        add_node_to_roots_buffer(closure, &buf, &len, v);
+    }
+    add_node_to_roots_buffer(closure, &buf, &len, _jl_debug_method_invalidation);
+
+    // constants
+    add_node_to_roots_buffer(closure, &buf, &len, jl_emptytuple_type);
+    add_node_to_roots_buffer(closure, &buf, &len, cmpswap_names);
+
+    // jl_global_roots_table must be transitively pinned
+    RootsWorkBuffer tpinned_buf = (closure->report_tpinned_nodes_func)((void**)0, 0, 0, closure->data, true);
+    size_t tpinned_len = 0;
+    add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, jl_global_roots_list);
+    add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, jl_global_roots_keyset);
+
+    // FIXME: transivitely pinning for now, should be removed after we add moving Immix
+    add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, precompile_field_replace);
+
+    // Push the result of the work.
+    (closure->report_nodes_func)(buf.ptr, len, buf.cap, closure->data, false);
+    (closure->report_tpinned_nodes_func)(tpinned_buf.ptr, tpinned_len, tpinned_buf.cap, closure->data, false);
+}
+
+JL_DLLEXPORT void jl_gc_scan_julia_exc_obj(void* obj_raw, void* closure, ProcessSlotFn process_slot) {
+    jl_task_t *ta = (jl_task_t*)obj_raw;
+
+    if (ta->excstack) { // inlining label `excstack` from mark_loop
+
+        // the excstack should always be a heap object
+        assert(mmtk_object_is_managed_by_mmtk(ta->excstack));
+
+        process_slot(closure, &ta->excstack);
+        jl_excstack_t *excstack = ta->excstack;
+        size_t itr = ta->excstack->top;
+        size_t bt_index = 0;
+        size_t jlval_index = 0;
+        while (itr > 0) {
+            size_t bt_size = jl_excstack_bt_size(excstack, itr);
+            jl_bt_element_t *bt_data = jl_excstack_bt_data(excstack, itr);
+            for (; bt_index < bt_size; bt_index += jl_bt_entry_size(bt_data + bt_index)) {
+                jl_bt_element_t *bt_entry = bt_data + bt_index;
+                if (jl_bt_is_native(bt_entry))
+                    continue;
+                // Found an extended backtrace entry: iterate over any
+                // GC-managed values inside.
+                size_t njlvals = jl_bt_num_jlvals(bt_entry);
+                while (jlval_index < njlvals) {
+                    jl_value_t** new_obj_slot = &bt_entry[2 + jlval_index].jlvalue;
+                    jlval_index += 1;
+                    process_slot(closure, new_obj_slot);
+                }
+                jlval_index = 0;
+            }
+
+            jl_bt_element_t *stack_raw = (jl_bt_element_t *)(excstack+1);
+            jl_value_t** stack_obj_slot = &stack_raw[itr-1].jlvalue;
+
+            itr = jl_excstack_next(excstack, itr);
+            bt_index = 0;
+            jlval_index = 0;
+            process_slot(closure, stack_obj_slot);
+        }
+    }
+}
+
+// This is used in mmtk_sweep_malloced_memory and it is slightly different
+// from jl_gc_free_memory from gc-stock.c as the stock GC updates the
+// information in the global variable gc_heap_stats (which is specific to the stock GC)
+static void jl_gc_free_memory(jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT
+{
+    assert(jl_is_genericmemory(m));
+    assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
+    char *d = (char*)m->ptr;
+    size_t freed_bytes = memory_block_usable_size(d, isaligned);
+    assert(freed_bytes != 0);
+    if (isaligned)
+        jl_free_aligned(d);
+    else
+        free(d);
+    gc_num.freed += freed_bytes;
+    gc_num.freecall++;
+}
+
+JL_DLLEXPORT void jl_gc_mmtk_sweep_malloced_memory(void) JL_NOTSAFEPOINT
+{
+    void* iter = mmtk_new_mutator_iterator();
+    jl_ptls_t ptls2 = (jl_ptls_t)mmtk_get_next_mutator_tls(iter);
+    while(ptls2 != NULL) {
+        size_t n = 0;
+        size_t l = ptls2->gc_tls_common.heap.mallocarrays.len;
+        void **lst = ptls2->gc_tls_common.heap.mallocarrays.items;
+        // filter without preserving order
+        while (n < l) {
+            jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
+            if (mmtk_is_live_object(m)) {
+                n++;
+            }
+            else {
+                int isaligned = (uintptr_t)lst[n] & 1;
+                jl_gc_free_memory(m, isaligned);
+                l--;
+                lst[n] = lst[l];
+            }
+        }
+        ptls2->gc_tls_common.heap.mallocarrays.len = l;
+        ptls2 = (jl_ptls_t)mmtk_get_next_mutator_tls(iter);
+    }
+    mmtk_close_mutator_iterator(iter);
+}
+
+#define jl_genericmemory_elsize(a) (((jl_datatype_t*)jl_typetagof(a))->layout->size)
+
+// if data is inlined inside the genericmemory object --- to->ptr needs to be updated when copying the array
+JL_DLLEXPORT void jl_gc_update_inlined_array(void* from, void* to) {
+    jl_value_t* jl_from = (jl_value_t*) from;
+    jl_value_t* jl_to = (jl_value_t*) to;
+
+    uintptr_t tag_to = (uintptr_t)jl_typeof(jl_to);
+    jl_datatype_t *vt = (jl_datatype_t*)tag_to;
+
+    if(vt->name == jl_genericmemory_typename) {
+        jl_genericmemory_t *a = (jl_genericmemory_t*)jl_from;
+        jl_genericmemory_t *b = (jl_genericmemory_t*)jl_to;
+        int how = jl_genericmemory_how(a);
+
+        if (how == 0 && mmtk_object_is_managed_by_mmtk(a->ptr)) { // a is inlined (a->ptr points into the mmtk object)
+            size_t offset_of_data = ((size_t)a->ptr - (size_t)a);
+            if (offset_of_data > 0) {
+                b->ptr = (void*)((size_t) b + offset_of_data);
+            }
+        }
+    }
+}
+
+// modified sweep_stack_pools from gc-stacks.c
+JL_DLLEXPORT void jl_gc_mmtk_sweep_stack_pools(void)
+{
+    // Stack sweeping algorithm:
+    //    // deallocate stacks if we have too many sitting around unused
+    //    for (stk in halfof(free_stacks))
+    //        free_stack(stk, pool_sz);
+    //    // then sweep the task stacks
+    //    for (t in live_tasks)
+    //        if (!gc-marked(t))
+    //            stkbuf = t->stkbuf
+    //            bufsz = t->bufsz
+    //            if (stkbuf)
+    //                push(free_stacks[sz], stkbuf)
+    assert(gc_n_threads);
+    for (int i = 0; i < jl_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
+
+        // free half of stacks that remain unused since last sweep
+        for (int p = 0; p < JL_N_STACK_POOLS; p++) {
+            small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p];
+            size_t n_to_free;
+            if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+                n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
+            }
+            else if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
+                n_to_free = al->len / 2;
+                if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
+                    n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
+            }
+            else {
+                n_to_free = 0;
+            }
+            for (int n = 0; n < n_to_free; n++) {
+                void *stk = small_arraylist_pop(al);
+                free_stack(stk, pool_sizes[p]);
+            }
+            if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+                small_arraylist_free(al);
+            }
+        }
+        if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+            small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks);
+        }
+
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = 0;
+        size_t ndel = 0;
+        size_t l = live_tasks->len;
+        void **lst = live_tasks->items;
+        if (l == 0)
+            continue;
+        while (1) {
+            jl_task_t *t = (jl_task_t*)lst[n];
+            if (mmtk_is_live_object(t)) {
+                jl_task_t *maybe_forwarded = (jl_task_t*)mmtk_get_possibly_forwarded(t);
+                live_tasks->items[n] = maybe_forwarded;
+                t = maybe_forwarded;
+                assert(jl_is_task(t));
+                if (t->ctx.stkbuf == NULL)
+                    ndel++; // jl_release_task_stack called
+                else
+                    n++;
+            } else {
+                ndel++;
+                void *stkbuf = t->ctx.stkbuf;
+                size_t bufsz = t->ctx.bufsz;
+                if (stkbuf) {
+                    t->ctx.stkbuf = NULL;
+                    _jl_free_stack(ptls2, stkbuf, bufsz);
+                }
+#ifdef _COMPILER_TSAN_ENABLED_
+                if (t->ctx.tsan_state) {
+                    __tsan_destroy_fiber(t->ctx.tsan_state);
+                    t->ctx.tsan_state = NULL;
+                }
+#endif
+            }
+            if (n >= l - ndel)
+                break;
+            void *tmp = lst[n];
+            lst[n] = lst[n + ndel];
+            lst[n + ndel] = tmp;
+        }
+        live_tasks->len -= ndel;
+    }
+}
+
+JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    jl_gc_mmtk_sweep_stack_pools();
+    sweep_mtarraylist_buffers();
+}
+
+JL_DLLEXPORT void* jl_gc_get_stackbase(int16_t tid) {
+    assert(tid >= 0);
+    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    return ptls2->stackbase;
+}
+
+JL_DLLEXPORT void jl_gc_update_stats(uint64_t inc, size_t mmtk_live_bytes, bool is_nursery_gc) {
+    gc_num.total_time += inc;
+    gc_num.pause += 1;
+    gc_num.full_sweep += !(is_nursery_gc);
+    gc_num.total_allocd += gc_num.allocd;
+    gc_num.allocd = 0;
+    live_bytes = mmtk_live_bytes;
+}
+
+#define jl_genericmemory_data_owner_field_addr(a) ((jl_value_t**)((jl_genericmemory_t*)(a) + 1))
+
+JL_DLLEXPORT void* jl_gc_get_owner_address_to_mmtk(void* m) {
+    return (void*)jl_genericmemory_data_owner_field_addr(m);
+}
+
+// same as jl_genericmemory_how but with JL_DLLEXPORT
+// we should probably inline this in Rust
+JL_DLLEXPORT size_t jl_gc_genericmemory_how(void *arg) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t* m = (jl_genericmemory_t*)arg;
+    if (m->ptr == (void*)((char*)m + 16)) // JL_SMALL_BYTE_ALIGNMENT (from julia_internal.h)
+        return 0;
+    jl_value_t *owner = jl_genericmemory_data_owner_field(m);
+    if (owner == (jl_value_t*)m)
+        return 1;
+    if (owner == NULL)
+        return 2;
+    return 3;
+}
+
+// ========================================================================= //
+// Weak References and Finalizers
+// ========================================================================= //
+
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value)
+{
+    jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type);
+    wr->value = value;  // NOTE: wb not needed here
+    mmtk_add_weak_candidate(wr);
+    return wr;
+}
+
+JL_DLLEXPORT void* jl_gc_get_thread_finalizer_list(void* ptls_raw) {
+    jl_ptls_t ptls = (jl_ptls_t) ptls_raw;
+    return (void*)&ptls->finalizers;
+}
+
+JL_DLLEXPORT void* jl_gc_get_to_finalize_list(void) {
+    return (void*)&to_finalize;
+}
+
+JL_DLLEXPORT void* jl_gc_get_marked_finalizers_list(void) {
+    return (void*)&finalizer_list_marked;
+}
+
+JL_DLLEXPORT int* jl_gc_get_have_pending_finalizers(void) {
+    return (int*)&jl_gc_have_pending_finalizers;
+}
+
+// ========================================================================= //
+// Allocation
+// ========================================================================= //
+
+#define MMTK_DEFAULT_IMMIX_ALLOCATOR (0)
+#define MMTK_IMMORTAL_BUMP_ALLOCATOR (0)
+
+int jl_gc_classify_pools(size_t sz, int *osize)
+{
+    if (sz > GC_MAX_SZCLASS)
+        return -1; // call big alloc function
+    size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    *osize = LLT_ALIGN(allocsz, 16);
+    return 0; // use MMTk's fastpath logic
+}
+
+#define MMTK_MIN_ALIGNMENT 4
+// MMTk assumes allocation size is aligned to min alignment.
+STATIC_INLINE size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT
+{
+    return (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT - 1);
+}
+
+STATIC_INLINE void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* cursor, uintptr_t limit, size_t size, size_t align, size_t offset, int allocator) {
+    intptr_t delta = (-offset - *cursor) & (align - 1);
+    uintptr_t result = *cursor + (uintptr_t)delta;
+
+    if (__unlikely(result + size > limit)) {
+        return (void*) mmtk_alloc(mutator, size, align, offset, allocator);
+    } else{
+        *cursor = result + size;
+        return (void*)result;
+    }
+}
+
+STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
+    ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR];
+    return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0);
+}
+
+inline void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) {
+    mmtk_post_alloc(mutator, obj, size, 0);
+}
+
+STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
+    // FIXME: for now, we do nothing
+    // but when supporting moving, this is where we set the valid object (VO) bit
+}
+
+STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
+    BumpAllocator* allocator = &mutator->allocators.bump_pointer[MMTK_IMMORTAL_BUMP_ALLOCATOR];
+    return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1);
+}
+
+STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
+    // FIXME: Similarly, for now, we do nothing
+    // but when supporting moving, this is where we set the valid object (VO) bit
+    // and log (old gen) bit
+}
+
+JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty)
+{
+    // safepoint
+    jl_gc_safepoint_(ptls);
+
+    jl_value_t *v;
+    if ((uintptr_t)ty != jl_buff_tag) {
+        // v needs to be 16 byte aligned, therefore v_tagged needs to be offset accordingly to consider the size of header
+        jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize, align), align, sizeof(jl_taggedvalue_t));
+        v = jl_valueof(v_tagged);
+        mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize, align));
+    } else {
+        // allocating an extra word to store the size of buffer objects
+        jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align), align, 0);
+        jl_value_t* v_tagged_aligned = ((jl_value_t*)((char*)(v_tagged) + sizeof(jl_taggedvalue_t)));
+        v = jl_valueof(v_tagged_aligned);
+        mmtk_store_obj_size_c(v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align));
+        mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align));
+    }
+
+    ptls->gc_tls_common.gc_num.allocd += osize;
+    ptls->gc_tls_common.gc_num.poolalloc++;
+
+    return v;
+}
+
+JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t sz)
+{
+    // safepoint
+    jl_gc_safepoint_(ptls);
+
+    size_t offs = offsetof(bigval_t, header);
+    assert(sz >= sizeof(jl_taggedvalue_t) && "sz must include tag");
+    static_assert(offsetof(bigval_t, header) >= sizeof(void*), "Empty bigval header?");
+    static_assert(sizeof(bigval_t) % JL_HEAP_ALIGNMENT == 0, "");
+    size_t allocsz = LLT_ALIGN(sz + offs, JL_CACHE_BYTE_ALIGNMENT);
+    if (allocsz < sz) { // overflow in adding offs, size was "negative"
+        assert(0 && "Error when allocating big object");
+        jl_throw(jl_memory_exception);
+    }
+
+    bigval_t *v = (bigval_t*)mmtk_alloc_large(&ptls->gc_tls.mmtk_mutator, allocsz, JL_CACHE_BYTE_ALIGNMENT, 0, 2);
+
+    if (v == NULL) {
+        assert(0 && "Allocation failed");
+        jl_throw(jl_memory_exception);
+    }
+    v->sz = allocsz;
+
+    ptls->gc_tls_common.gc_num.allocd += allocsz;
+    ptls->gc_tls_common.gc_num.bigalloc++;
+
+    jl_value_t *result = jl_valueof(&v->header);
+    mmtk_post_alloc(&ptls->gc_tls.mmtk_mutator, result, allocsz, 2);
+
+    return result;
+}
+
+// Instrumented version of jl_gc_small_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_small_alloc(jl_ptls_t ptls, int offset, int osize, jl_value_t* type)
+{
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
+
+    jl_value_t *val = jl_mmtk_gc_alloc_default(ptls, osize, 16, NULL);
+    maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
+    return val;
+}
+
+// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type)
+{
+    // TODO: assertion needed here?
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
+
+    jl_value_t *val = jl_mmtk_gc_alloc_big(ptls, sz);
+    maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
+    return val;
+}
+
+inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    jl_value_t *v;
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (sz <= GC_MAX_SZCLASS) {
+        v = jl_mmtk_gc_alloc_default(ptls, allocsz, 16, ty);
+    }
+    else {
+        if (allocsz < sz) // overflow in adding offs, size was "negative"
+            jl_throw(jl_memory_exception);
+        v = jl_mmtk_gc_alloc_big(ptls, allocsz);
+    }
+    jl_set_typeof(v, ty);
+    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
+    return v;
+}
+
+// allocation wrappers that track allocation and let collection run
+JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    void *data = malloc(sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        malloc_maybe_collect(ptls, sz);
+        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz);
+    }
+    return data;
+}
+
+JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    void *data = calloc(nm, sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        malloc_maybe_collect(ptls, nm * sz);
+        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz);
+    }
+    return data;
+}
+
+JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    free(p);
+    if (pgcstack != NULL && ct->world_age) {
+        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz);
+    }
+}
+
+JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        malloc_maybe_collect(ptls, sz);
+        if (sz < old)
+            jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, old - sz);
+        else
+            jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz - old);
+    }
+    return realloc(p, sz);
+}
+
+void *jl_gc_perm_alloc_nolock(jl_ptls_t ptls, size_t sz, int zero, unsigned align, unsigned offset)
+{
+    size_t allocsz = mmtk_align_alloc_sz(sz);
+    void* addr = mmtk_immortal_alloc_fast(&ptls->gc_tls.mmtk_mutator, allocsz, align, offset);
+    return addr;
+}
+
+void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return jl_gc_perm_alloc_nolock(ptls, sz, zero, align, offset);
+}
+
+jl_value_t *jl_gc_permobj(jl_ptls_t ptls, size_t sz, void *ty, unsigned align) JL_NOTSAFEPOINT
+{
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (align == 0) {
+        align = ((sz == 0) ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
+                                                 sizeof(void*) * 2 : 16));
+    }
+    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc_nolock(ptls, allocsz, 0, align,
+                                                              sizeof(void*) % align);
+
+    mmtk_immortal_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, jl_valueof(o), allocsz);
+    o->header = (uintptr_t)ty;
+    return jl_valueof(o);
+}
+
+JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    maybe_collect(ptls);
+    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
+    if (allocsz < sz)  // overflow in adding offs, size was "negative"
+        jl_throw(jl_memory_exception);
+
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    void *b = malloc_cache_align(allocsz);
+    if (b == NULL)
+        jl_throw(jl_memory_exception);
+
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+    // FIXME: Should these be part of mmtk's heap?
+    // malloc_maybe_collect(ptls, sz);
+    // jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, allocsz);
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+    // jl_gc_managed_malloc is currently always used for allocating array buffers.
+    maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag);
+    return b;
+}
+
+void jl_gc_notify_image_load(const char* img_data, size_t len)
+{
+    mmtk_set_vm_space((void*)img_data, len);
+}
+
+// ========================================================================= //
+// Code specific to stock that is not supported by MMTk
+// ========================================================================= //
+
+// mutex for page profile
+uv_mutex_t page_profile_lock;
+
+JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream)
+{
+    uv_mutex_lock(&page_profile_lock);
+    const char *str = "Page profiler in unsupported in MMTk.";
+    ios_write(stream, str, strlen(str));
+    uv_mutex_unlock(&page_profile_lock);
+}
+
+// this seems to be needed by the gc tests
+#define JL_GC_N_MAX_POOLS 51
+JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS];
+
+STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT
+{
+    // FIXME: MMTk would have to provide its own stats
+}
+
+#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants
+
+JL_DLLEXPORT uint64_t jl_get_pg_size(void)
+{
+    return MMTK_GC_PAGE_SZ;
+}
+
+// Not used by mmtk
+// Number of GC threads that may run parallel marking
+int jl_n_markthreads;
+// Number of GC threads that may run concurrent sweeping (0 or 1)
+int jl_n_sweepthreads;
+// `tid` of first GC thread
+int gc_first_tid;
+// Number of threads sweeping stacks
+_Atomic(int) gc_n_threads_sweeping_stacks;
+// counter for sharing work when sweeping stacks
+_Atomic(int) gc_ptls_sweep_idx;
+// counter for round robin of giving back stack pages to the OS
+_Atomic(int) gc_stack_free_idx = 0;
+
+JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT
+{
+    mmtk_unreachable();
+}
+
+JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
+                                        struct _jl_datatype_t *dt) JL_NOTSAFEPOINT
+{
+    mmtk_unreachable();
+}
+
+JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
+{
+    mmtk_unreachable();
+    return 0;
+}
+
+JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
+                                            jl_value_t **objs, size_t nobjs)
+{
+    mmtk_unreachable();
+}
+
+JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void)
+{
+    // TODO: meaningful for MMTk?
+    return GC_MAX_SZCLASS;
+}
+
+JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
+{
+    // FIXME: do we need to implement this?
+}
+
+// gc-debug functions
+JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p)
+{
+    return NULL;
+}
+
+void jl_gc_debug_fprint_critical_error(ios_t *s) JL_NOTSAFEPOINT
+{
+}
+
+int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT
+{
+    return 0;
+}
+
+void jl_gc_debug_fprint_status(ios_t *s) JL_NOTSAFEPOINT
+{
+    // May not be accurate but should be helpful enough
+    uint64_t pool_count = gc_num.poolalloc;
+    uint64_t big_count = gc_num.bigalloc;
+    jl_safe_fprintf(s, "Allocations: %" PRIu64 " "
+                    "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n",
+                    pool_count + big_count, pool_count, big_count, gc_num.pause);
+}
+
+JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void)
+{
+    return sizeof(bigval_t);
+}
+
+void jl_print_gc_stats(JL_STREAM *s)
+{
+}
+
+JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
+{
+    return 0;
+}
+
+JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void)
+{
+    return 0;
+}
+
+// TODO: if this is needed, it can be added in MMTk
+JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
+{
+    return NULL;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c
new file mode 100644
index 0000000000000..5e18be62ebfe1
--- /dev/null
+++ b/src/gc-page-profiler.c
@@ -0,0 +1,179 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "gc-page-profiler.h"
+#include "julia.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// whether page profiling is enabled
+int page_profile_enabled;
+// number of pages written
+size_t page_profile_pages_written;
+// stream to write page profile to
+ios_t *page_profile_stream;
+// mutex for page profile
+uv_mutex_t page_profile_lock;
+
+gc_page_profiler_serializer_t gc_page_serializer_create(void) JL_NOTSAFEPOINT
+{
+    gc_page_profiler_serializer_t serializer;
+    if (__unlikely(page_profile_enabled)) {
+        arraylist_new(&serializer.typestrs, GC_PAGE_SZ);
+        serializer.buffers = (char *)malloc_s(GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY);
+        serializer.cursor = 0;
+    }
+    else {
+        serializer.typestrs.len = 0;
+    }
+    return serializer;
+}
+
+void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer,
+                             jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        serializer->typestrs.len = 0;
+        serializer->data = (char *)pg->data;
+        serializer->osize = pg->osize;
+        serializer->cursor = 0;
+        serializer->capacity = GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY;
+    }
+}
+
+void gc_page_serializer_destroy(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        arraylist_free(&serializer->typestrs);
+        free(serializer->buffers);
+    }
+}
+
+void gc_page_serializer_write(gc_page_profiler_serializer_t *serializer,
+                              const char *str) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        arraylist_push(&serializer->typestrs, (void *)str);
+    }
+}
+
+void gc_enable_page_profile(void) JL_NOTSAFEPOINT
+{
+    page_profile_enabled = 1;
+}
+
+void gc_disable_page_profile(void) JL_NOTSAFEPOINT
+{
+    page_profile_enabled = 0;
+}
+
+int gc_page_profile_is_enabled(void) JL_NOTSAFEPOINT
+{
+    return page_profile_enabled;
+}
+
+void gc_page_profile_write_preamble(gc_page_profiler_serializer_t *serializer)
+    JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        char str[4096];
+        snprintf(str, sizeof(str),
+                 "{\"address\": \"%p\",\"object_size\": %d,\"objects\": [",
+                 serializer->data, serializer->osize);
+        ios_write(page_profile_stream, str, strlen(str));
+    }
+}
+
+void gc_page_profile_write_epilogue(gc_page_profiler_serializer_t *serializer)
+    JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        const char *str = "]}";
+        ios_write(page_profile_stream, str, strlen(str));
+    }
+}
+
+void gc_page_profile_write_comma(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        // write comma if not first page
+        if (page_profile_pages_written > 0) {
+            const char *str = ",";
+            ios_write(page_profile_stream, str, strlen(str));
+        }
+    }
+}
+
+void gc_page_profile_write_to_file(gc_page_profiler_serializer_t *serializer)
+    JL_NOTSAFEPOINT
+{
+    size_t large_enough_str_size = 4096;
+    if (__unlikely(page_profile_enabled)) {
+        // write to file
+        uv_mutex_lock(&page_profile_lock);
+        gc_page_profile_write_comma(serializer);
+        gc_page_profile_write_preamble(serializer);
+        char *str = (char *)malloc_s(large_enough_str_size);
+        for (size_t i = 0; i < serializer->typestrs.len; i++) {
+            const char *name = (const char *)serializer->typestrs.items[i];
+            if (name == GC_SERIALIZER_EMPTY) {
+                snprintf(str, large_enough_str_size, "\"empty\",");
+            }
+            else if (name == GC_SERIALIZER_GARBAGE) {
+                snprintf(str, large_enough_str_size, "\"garbage\",");
+            }
+            else {
+                while ((strlen(name) + 1) > large_enough_str_size) {
+                    large_enough_str_size *= 2;
+                    str = (char *)realloc_s(str, large_enough_str_size);
+                }
+                snprintf(str, large_enough_str_size, "\"%s\",", name);
+            }
+            // remove trailing comma for last element
+            if (i == serializer->typestrs.len - 1) {
+                str[strlen(str) - 1] = '\0';
+            }
+            ios_write(page_profile_stream, str, strlen(str));
+        }
+        free(str);
+        gc_page_profile_write_epilogue(serializer);
+        page_profile_pages_written++;
+        uv_mutex_unlock(&page_profile_lock);
+    }
+}
+
+void gc_page_profile_write_json_preamble(ios_t *stream) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        uv_mutex_lock(&page_profile_lock);
+        const char *str = "{\"pages\": [";
+        ios_write(stream, str, strlen(str));
+        uv_mutex_unlock(&page_profile_lock);
+    }
+}
+
+void gc_page_profile_write_json_epilogue(ios_t *stream) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        uv_mutex_lock(&page_profile_lock);
+        const char *str = "]}";
+        ios_write(stream, str, strlen(str));
+        uv_mutex_unlock(&page_profile_lock);
+    }
+}
+
+JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream)
+{
+    gc_enable_page_profile();
+    page_profile_pages_written = 0;
+    page_profile_stream = stream;
+    gc_page_profile_write_json_preamble(stream);
+    jl_gc_collect(JL_GC_FULL);
+    gc_page_profile_write_json_epilogue(stream);
+    gc_disable_page_profile();
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gc-page-profiler.h b/src/gc-page-profiler.h
new file mode 100644
index 0000000000000..0dd72ad072fa9
--- /dev/null
+++ b/src/gc-page-profiler.h
@@ -0,0 +1,146 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef GC_PAGE_PROFILER_H
+#define GC_PAGE_PROFILER_H
+
+#include "gc-stock.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY (4096)
+
+typedef struct {
+    arraylist_t typestrs;
+    char *data;
+    int osize;
+    char *buffers;
+    size_t cursor;
+    size_t capacity;
+} gc_page_profiler_serializer_t;
+
+// mutex for page profile
+extern uv_mutex_t page_profile_lock;
+// whether page profiling is enabled
+extern int page_profile_enabled;
+
+// Serializer functions
+gc_page_profiler_serializer_t gc_page_serializer_create(void) JL_NOTSAFEPOINT;
+void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer,
+                             jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT;
+void gc_page_serializer_destroy(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT;
+void gc_page_serializer_write(gc_page_profiler_serializer_t *serializer,
+                              const char *str) JL_NOTSAFEPOINT;
+// Page profile functions
+#define GC_SERIALIZER_EMPTY ((const char *)0x1)
+#define GC_SERIALIZER_GARBAGE ((const char *)0x2)
+STATIC_INLINE void gc_page_profile_write_empty_page(gc_page_profiler_serializer_t *serializer,
+                                 int enabled) JL_NOTSAFEPOINT
+{
+    if (__unlikely(enabled)) {
+        gc_page_serializer_write(serializer, GC_SERIALIZER_EMPTY);
+    }
+}
+STATIC_INLINE void gc_page_profile_write_garbage(gc_page_profiler_serializer_t *serializer,
+                                                 int enabled) JL_NOTSAFEPOINT
+{
+    if (__unlikely(enabled)) {
+        gc_page_serializer_write(serializer, GC_SERIALIZER_GARBAGE);
+    }
+}
+STATIC_INLINE char *gc_page_profile_request_buffer(gc_page_profiler_serializer_t *serializer, size_t size) JL_NOTSAFEPOINT
+{
+    while (serializer->cursor + size >= serializer->capacity) {
+        serializer->capacity *= 2;
+        serializer->buffers = (char *)realloc_s(serializer->buffers, serializer->capacity);
+    }
+    char *p = &serializer->buffers[serializer->cursor];
+    memset(p, 0, size);
+    serializer->cursor += size;
+    return p;
+}
+STATIC_INLINE void gc_page_profile_write_live_obj(gc_page_profiler_serializer_t *serializer,
+                                                  jl_taggedvalue_t *v,
+                                                  int enabled) JL_NOTSAFEPOINT
+{
+    if (__unlikely(enabled)) {
+        jl_value_t *a = jl_valueof(v);
+        jl_value_t *t = jl_typeof(a);
+        ios_t str_;
+        int ios_need_close = 0;
+        char *type_name = NULL;
+        char *type_name_in_serializer = NULL;
+        if (t == (jl_value_t *)jl_get_buff_tag()) {
+            type_name = "Buffer";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_string(a)) {
+            type_name = "String";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_symbol(a)) {
+            type_name = jl_symbol_name((jl_sym_t *)a);
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_simplevector(a)) {
+            type_name = "SimpleVector";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_module(a)) {
+            type_name = jl_symbol_name_(((jl_module_t *)a)->name);
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_task(a)) {
+            type_name = "Task";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_datatype(a)) {
+            ios_need_close = 1;
+            ios_mem(&str_, 0);
+            JL_STREAM *str = (JL_STREAM *)&str_;
+            jl_static_show(str, a);
+            type_name = str_.buf;
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, str_.size + 1);
+            memcpy(type_name_in_serializer, type_name, str_.size);
+        }
+        else {
+            ios_need_close = 1;
+            ios_mem(&str_, 0);
+            JL_STREAM *str = (JL_STREAM *)&str_;
+            jl_static_show(str, t);
+            type_name = str_.buf;
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, str_.size + 1);
+            memcpy(type_name_in_serializer, type_name, str_.size);
+        }
+        gc_page_serializer_write(serializer, type_name_in_serializer);
+        if (ios_need_close) {
+            ios_close(&str_);
+        }
+        jl_may_leak(type_name_in_serializer);
+    }
+}
+void gc_enable_page_profile(void) JL_NOTSAFEPOINT;
+void gc_disable_page_profile(void) JL_NOTSAFEPOINT;
+int gc_page_profile_is_enabled(void) JL_NOTSAFEPOINT;
+void gc_page_profile_write_to_file(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GC_PAGE_PROFILER_H
diff --git a/src/gc-pages.c b/src/gc-pages.c
index 682e76611f5d9..faee7a60fc406 100644
--- a/src/gc-pages.c
+++ b/src/gc-pages.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
 #ifndef _OS_WINDOWS_
 #  include <sys/resource.h>
 #endif
@@ -9,6 +10,13 @@
 extern "C" {
 #endif
 
+uv_mutex_t gc_pages_lock;
+
+JL_DLLEXPORT uint64_t jl_get_pg_size(void)
+{
+    return GC_PAGE_SZ;
+}
+
 // Try to allocate memory in chunks to permit faster allocation
 // and improve memory locality of the pools
 #ifdef _P64
@@ -52,6 +60,8 @@ char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT
         // round data pointer up to the nearest gc_page_data-aligned
         // boundary if mmap didn't already do so.
         mem = (char*)gc_page_data(mem + GC_PAGE_SZ - 1);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mapped, pages_sz);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, pages_sz);
     return mem;
 }
 
@@ -60,7 +70,7 @@ char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT
 // more chunks (or other allocations). The final page count is recorded
 // and will be used as the starting count next time. If the page count is
 // smaller `MIN_BLOCK_PG_ALLOC` a `jl_memory_exception` is thrown.
-// Assumes `gc_perm_lock` is acquired, the lock is released before the
+// Assumes `gc_pages_lock` is acquired, the lock is released before the
 // exception is thrown.
 char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT
 {
@@ -80,7 +90,7 @@ char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT
             block_pg_cnt = pg_cnt = min_block_pg_alloc;
         }
         else {
-            uv_mutex_unlock(&gc_perm_lock);
+            uv_mutex_unlock(&gc_pages_lock);
             jl_throw(jl_memory_exception);
         }
     }
@@ -98,7 +108,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
     jl_gc_pagemeta_t *meta = NULL;
 
     // try to get page from `pool_lazily_freed`
-    meta = pop_lf_page_metadata_back(&global_page_pool_lazily_freed);
+    meta = pop_lf_back(&global_page_pool_lazily_freed);
     if (meta != NULL) {
         gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         // page is already mapped
@@ -106,42 +116,45 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
     }
 
     // try to get page from `pool_clean`
-    meta = pop_lf_page_metadata_back(&global_page_pool_clean);
+    meta = pop_lf_back(&global_page_pool_clean);
     if (meta != NULL) {
         gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         goto exit;
     }
 
     // try to get page from `pool_freed`
-    meta = pop_lf_page_metadata_back(&global_page_pool_freed);
+    meta = pop_lf_back(&global_page_pool_freed);
     if (meta != NULL) {
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, GC_PAGE_SZ);
         gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         goto exit;
     }
 
-    uv_mutex_lock(&gc_perm_lock);
+    uv_mutex_lock(&gc_pages_lock);
     // another thread may have allocated a large block while we were waiting...
-    meta = pop_lf_page_metadata_back(&global_page_pool_clean);
+    meta = pop_lf_back(&global_page_pool_clean);
     if (meta != NULL) {
-        uv_mutex_unlock(&gc_perm_lock);
-        gc_alloc_map_set(meta->data, 1);
+        uv_mutex_unlock(&gc_pages_lock);
+        gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         goto exit;
     }
-    // must map a new set of pages
-    char *data = jl_gc_try_alloc_pages();
-    meta = (jl_gc_pagemeta_t*)malloc_s(block_pg_cnt * sizeof(jl_gc_pagemeta_t));
-    for (int i = 0; i < block_pg_cnt; i++) {
-        jl_gc_pagemeta_t *pg = &meta[i];
-        pg->data = data + GC_PAGE_SZ * i;
-        gc_alloc_map_maybe_create(pg->data);
-        if (i == 0) {
-            gc_alloc_map_set(pg->data, 1);
-        }
-        else {
-            push_lf_page_metadata_back(&global_page_pool_clean, pg);
+    {
+        // must map a new set of pages
+        char *data = jl_gc_try_alloc_pages();
+        meta = (jl_gc_pagemeta_t*)malloc_s(block_pg_cnt * sizeof(jl_gc_pagemeta_t));
+        for (int i = 0; i < block_pg_cnt; i++) {
+            jl_gc_pagemeta_t *pg = &meta[i];
+            pg->data = data + GC_PAGE_SZ * i;
+            gc_alloc_map_maybe_create(pg->data);
+            if (i == 0) {
+                gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED);
+            }
+            else {
+                push_lf_back(&global_page_pool_clean, pg);
+            }
         }
+        uv_mutex_unlock(&gc_pages_lock);
     }
-    uv_mutex_unlock(&gc_perm_lock);
 exit:
 #ifdef _OS_WINDOWS_
     VirtualAlloc(meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE);
@@ -152,7 +165,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
 }
 
 // return a page to the freemap allocator
-void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
+NOINLINE void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
 {
     void *p = pg->data;
     gc_alloc_map_set((char*)p, GC_PAGE_FREED);
@@ -188,6 +201,7 @@ void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
     madvise(p, decommit_size, MADV_DONTNEED);
 #endif
     msan_unpoison(p, decommit_size);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, -decommit_size);
 }
 
 #ifdef __cplusplus
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index b35c1722c82ff..9387c7fb065ec 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "threading.h"
 #ifndef _OS_WINDOWS_
 #  include <sys/resource.h>
 #endif
@@ -19,9 +20,6 @@
 # endif
 #endif
 
-// number of stacks to always keep available per pool
-#define MIN_STACK_MAPPINGS_PER_POOL 5
-
 const size_t jl_guard_size = (4096 * 8);
 static _Atomic(uint32_t) num_stack_mappings = 0;
 
@@ -32,47 +30,65 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
     void *stk = VirtualAlloc(NULL, bufsz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
     if (stk == NULL)
         return MAP_FAILED;
+
+    // set up a guard page to detect stack overflow
     DWORD dwOldProtect;
     if (!VirtualProtect(stk, jl_guard_size, PAGE_READWRITE | PAGE_GUARD, &dwOldProtect)) {
         VirtualFree(stk, 0, MEM_RELEASE);
         return MAP_FAILED;
     }
-    jl_atomic_fetch_add(&num_stack_mappings, 1);
+
+    jl_atomic_fetch_add_relaxed(&num_stack_mappings, 1);
     return stk;
 }
 
 
-static void free_stack(void *stkbuf, size_t bufsz)
+void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
     VirtualFree(stkbuf, 0, MEM_RELEASE);
-    jl_atomic_fetch_add(&num_stack_mappings, -1);
+    jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1);
 }
 
 #else
 
 static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
 {
+# ifdef _OS_OPENBSD_
+    // we don't set up a guard page to detect stack overflow: on OpenBSD, any
+    // mmap-ed region has guard page managed by the kernel, so there is no
+    // need for it. Additionally, a memory region used as stack (memory
+    // allocated with MAP_STACK option) has strict permission, and you can't
+    // "create" a guard page on such memory by using `mprotect` on it
+    void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+    if (stk == MAP_FAILED)
+        return MAP_FAILED;
+# else
     void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (stk == MAP_FAILED)
         return MAP_FAILED;
-#if !defined(JL_HAVE_UCONTEXT) && !defined(JL_HAVE_SIGALTSTACK)
-    // setup a guard page to detect stack overflow
+
+    // set up a guard page to detect stack overflow
     if (mprotect(stk, jl_guard_size, PROT_NONE) == -1) {
         munmap(stk, bufsz);
         return MAP_FAILED;
     }
-#endif
-    jl_atomic_fetch_add(&num_stack_mappings, 1);
+# endif
+
+    jl_atomic_fetch_add_relaxed(&num_stack_mappings, 1);
     return stk;
 }
 
-static void free_stack(void *stkbuf, size_t bufsz)
+void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
     munmap(stkbuf, bufsz);
-    jl_atomic_fetch_add(&num_stack_mappings, -1);
+    jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1);
 }
 #endif
 
+JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void) JL_NOTSAFEPOINT
+{
+    return jl_atomic_load_relaxed(&num_stack_mappings);
+}
 
 const unsigned pool_sizes[] = {
     128 * 1024,
@@ -104,7 +120,7 @@ static unsigned select_pool(size_t nb) JL_NOTSAFEPOINT
 }
 
 
-static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
+void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
 #ifdef _COMPILER_ASAN_ENABLED_
     __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
@@ -112,7 +128,7 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
     if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
-            arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
+            small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf);
             return;
         }
     }
@@ -130,18 +146,18 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
 {
     // avoid adding an original thread stack to the free list
-    if (task == ptls->root_task && !task->copy_stack)
+    if (task == ptls->root_task && !task->ctx.copy_stack)
         return;
-    void *stkbuf = task->stkbuf;
-    size_t bufsz = task->bufsz;
+    void *stkbuf = task->ctx.stkbuf;
+    size_t bufsz = task->ctx.bufsz;
     if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
-            task->stkbuf = NULL;
+            task->ctx.stkbuf = NULL;
 #ifdef _COMPILER_ASAN_ENABLED_
             __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
 #endif
-            arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
+            small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf);
         }
     }
 }
@@ -156,9 +172,9 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(ssize);
         ssize = pool_sizes[pool_id];
-        arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
+        small_arraylist_t *pool = &ptls->gc_tls_common.heap.free_stacks[pool_id];
         if (pool->len > 0) {
-            stk = arraylist_pop(pool);
+            stk = small_arraylist_pop(pool);
         }
     }
     else {
@@ -177,108 +193,85 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     }
     *bufsz = ssize;
     if (owner) {
-        arraylist_t *live_tasks = &ptls->heap.live_tasks;
-        arraylist_push(live_tasks, owner);
+        small_arraylist_t *live_tasks = &ptls->gc_tls_common.heap.live_tasks;
+        mtarraylist_push(live_tasks, owner);
     }
     return stk;
 }
 
-void sweep_stack_pools(void)
+// Builds a list of the live tasks. Racy: `live_tasks` can expand at any time.
+arraylist_t *jl_get_all_tasks_arraylist(void) JL_NOTSAFEPOINT
 {
-    // Stack sweeping algorithm:
-    //    // deallocate stacks if we have too many sitting around unused
-    //    for (stk in halfof(free_stacks))
-    //        free_stack(stk, pool_sz);
-    //    // then sweep the task stacks
-    //    for (t in live_tasks)
-    //        if (!gc-marked(t))
-    //            stkbuf = t->stkbuf
-    //            bufsz = t->bufsz
-    //            if (stkbuf)
-    //                push(free_stacks[sz], stkbuf)
-    assert(gc_n_threads);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-
-        // free half of stacks that remain unused since last sweep
-        for (int p = 0; p < JL_N_STACK_POOLS; p++) {
-            arraylist_t *al = &ptls2->heap.free_stacks[p];
-            size_t n_to_free;
-            if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
-                n_to_free = al->len / 2;
-                if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
-                    n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
-            }
-            else {
-                n_to_free = 0;
-            }
-            for (int n = 0; n < n_to_free; n++) {
-                void *stk = arraylist_pop(al);
-                free_stack(stk, pool_sizes[p]);
-            }
+    arraylist_t *tasks = (arraylist_t*)malloc_s(sizeof(arraylist_t));
+    arraylist_new(tasks, 0);
+    size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (size_t i = 0; i < nthreads; i++) {
+        // skip GC threads...
+        if (gc_is_collector_thread(i)) {
+            continue;
         }
-
-        arraylist_t *live_tasks = &ptls2->heap.live_tasks;
-        size_t n = 0;
-        size_t ndel = 0;
-        size_t l = live_tasks->len;
-        void **lst = live_tasks->items;
-        if (l == 0)
+        jl_ptls_t ptls2 = allstates[i];
+        if (ptls2 == NULL) {
             continue;
-        while (1) {
-            jl_task_t *t = (jl_task_t*)lst[n];
-            assert(jl_is_task(t));
-            if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
-                if (t->stkbuf == NULL)
-                    ndel++; // jl_release_task_stack called
-                else
-                    n++;
-            }
-            else {
-                ndel++;
-                void *stkbuf = t->stkbuf;
-                size_t bufsz = t->bufsz;
-                if (stkbuf) {
-                    t->stkbuf = NULL;
-                    _jl_free_stack(ptls2, stkbuf, bufsz);
-                }
-#ifdef _COMPILER_TSAN_ENABLED_
-                if (t->ctx.tsan_state) {
-                    __tsan_destroy_fiber(t->ctx.tsan_state);
-                    t->ctx.tsan_state = NULL;
-                }
-#endif
+        }
+        jl_task_t *t = ptls2->root_task;
+        if (t->ctx.stkbuf != NULL) {
+            arraylist_push(tasks, t);
+        }
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        for (size_t i = 0; i < n; i++) {
+            jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
+            assert(t != NULL);
+            if (t->ctx.stkbuf != NULL) {
+                arraylist_push(tasks, t);
             }
-            if (n >= l - ndel)
-                break;
-            void *tmp = lst[n];
-            lst[n] = lst[n + ndel];
-            lst[n + ndel] = tmp;
         }
-        live_tasks->len -= ndel;
     }
+    return tasks;
 }
 
 JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    arraylist_t *live_tasks = &ptls->heap.live_tasks;
-    size_t i, j, l;
-    jl_array_t *a;
-    do {
-        l = live_tasks->len;
-        a = jl_alloc_vec_any(l + 1); // may gc, changing the number of tasks
-    } while (l + 1 < live_tasks->len);
-    l = live_tasks->len;
-    void **lst = live_tasks->items;
-    j = 0;
-    ((void**)jl_array_data(a))[j++] = ptls->root_task;
-    for (i = 0; i < l; i++) {
-        if (((jl_task_t*)lst[i])->stkbuf != NULL)
-            ((void**)jl_array_data(a))[j++] = lst[i];
+    size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    size_t l = 0; // l is not reset on restart, so we keep getting more aggressive at making a big enough list everything it fails
+restart:
+    for (size_t i = 0; i < nthreads; i++) {
+        jl_ptls_t ptls2 = allstates[i];
+        if (ptls2 == NULL)
+            continue;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        l += n + (ptls2->root_task->ctx.stkbuf != NULL);
+    }
+    l += l / 20; // add 5% for margin of estimation error
+    jl_array_t *a = jl_alloc_vec_any(l); // may gc, changing the number of tasks and forcing us to reload everything
+    nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    size_t j = 0;
+    for (size_t i = 0; i < nthreads; i++) {
+        jl_ptls_t ptls2 = allstates[i];
+        if (ptls2 == NULL)
+            continue;
+        jl_task_t *t = ptls2->root_task;
+        if (t->ctx.stkbuf != NULL) {
+            if (j == l)
+                goto restart;
+            jl_array_data(a,void*)[j++] = t;
+        }
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        for (size_t i = 0; i < n; i++) {
+            jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
+            if (t->ctx.stkbuf != NULL) {
+                if (j == l)
+                    goto restart;
+                jl_array_data(a,void*)[j++] = t;
+            }
+        }
     }
-    l = jl_array_len(a);
     if (j < l) {
         JL_GC_PUSH1(&a);
         jl_array_del_end(a, l - j);
diff --git a/src/gc.c b/src/gc-stock.c
similarity index 52%
rename from src/gc.c
rename to src/gc-stock.c
index 1b14452603239..650cbd81c9dfa 100644
--- a/src/gc.c
+++ b/src/gc-stock.c
@@ -1,142 +1,78 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
-#include "julia_gcext.h"
+#include "gc-alloc-profiler.h"
+#include "gc-common.h"
+#include "gc-heap-snapshot.h"
+#include "gc-page-profiler.h"
+#include "gc-stock.h"
+#include "julia.h"
 #include "julia_assert.h"
-#ifdef __GLIBC__
-#include <malloc.h> // for malloc_trim
-#endif
+#include "julia_atomics.h"
+#include "julia_gcext.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+// System-wide heap statistics
+gc_heapstatus_t gc_heap_stats = {0};
+
+// Memory upper bound on 32-bit systems
+const uint64_t max_mem_32bit_systems = 1536 * 1024 * 1024; // 1.5 GiB
+// Julia's GC heuristics will try to keep the heap size below the `max_total_memory` soft limit,
+// but they are allowed to exceed it, instead of aborting the process.
+// This parameter can be changed via `jl_gc_set_max_memory()`.
+#ifdef _P64
+#define PETA_BYTE (1024ULL * 1024 * 1024 * 1024 * 1024)
+static uint64_t max_total_memory = 2 * PETA_BYTE;
+#else
+static uint64_t max_total_memory = max_mem_32bit_systems;
+#endif
+
+#ifdef _P64
+static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); // ~45 MiB
+#else
+static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); // ~12 MiB
+#endif
+
+// ID of first GC thread
+int gc_first_tid;
 // Number of GC threads that may run parallel marking
 int jl_n_markthreads;
-// Number of GC threads that may run concurrent sweeping (0 or 1)
-int jl_n_sweepthreads;
 // Number of threads currently running the GC mark-loop
 _Atomic(int) gc_n_threads_marking;
-// `tid` of mutator thread that triggered GC
-_Atomic(int) gc_master_tid;
-// `tid` of first GC thread
-int gc_first_tid;
+// ID of mutator thread that triggered GC
+_Atomic(int) gc_initiator_tid;
 // Mutex/cond used to synchronize wakeup of GC threads on parallel marking
 uv_mutex_t gc_threads_lock;
 uv_cond_t gc_threads_cond;
-// To indicate whether concurrent sweeping should run
-uv_sem_t gc_sweep_assists_needed;
+// Mutex used to coordinate entry of GC threads in the mark loop
+uv_mutex_t gc_queue_observer_lock;
 
-// Linked list of callback functions
-
-typedef void (*jl_gc_cb_func_t)(void);
-
-typedef struct jl_gc_callback_list_t {
-    struct jl_gc_callback_list_t *next;
-    jl_gc_cb_func_t func;
-} jl_gc_callback_list_t;
-
-static jl_gc_callback_list_t *gc_cblist_root_scanner;
-static jl_gc_callback_list_t *gc_cblist_task_scanner;
-static jl_gc_callback_list_t *gc_cblist_pre_gc;
-static jl_gc_callback_list_t *gc_cblist_post_gc;
-static jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
-static jl_gc_callback_list_t *gc_cblist_notify_external_free;
-
-#define gc_invoke_callbacks(ty, list, args) \
-    do { \
-        for (jl_gc_callback_list_t *cb = list; \
-                cb != NULL; \
-                cb = cb->next) \
-        { \
-            ((ty)(cb->func)) args; \
-        } \
-    } while (0)
-
-static void jl_gc_register_callback(jl_gc_callback_list_t **list,
-        jl_gc_cb_func_t func)
-{
-    while (*list != NULL) {
-        if ((*list)->func == func)
-            return;
-        list = &((*list)->next);
-    }
-    *list = (jl_gc_callback_list_t *)malloc_s(sizeof(jl_gc_callback_list_t));
-    (*list)->next = NULL;
-    (*list)->func = func;
-}
+// Number of threads sweeping
+_Atomic(int) gc_n_threads_sweeping_pools;
+// Temporary for the `ptls->gc_tls.page_metadata_allocd` used during parallel sweeping (padded to avoid false sharing)
+_Atomic(jl_gc_padded_page_stack_t *) gc_allocd_scratch;
 
-static void jl_gc_deregister_callback(jl_gc_callback_list_t **list,
-        jl_gc_cb_func_t func)
-{
-    while (*list != NULL) {
-        if ((*list)->func == func) {
-            jl_gc_callback_list_t *tmp = *list;
-            (*list) = (*list)->next;
-            free(tmp);
-            return;
-        }
-        list = &((*list)->next);
-    }
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
-}
+// Number of GC threads that may run concurrent sweeping (0 or 1)
+int jl_n_sweepthreads;
+// To indicate whether concurrent sweeping should run
+uv_sem_t gc_sweep_assists_needed;
 
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
-}
+// Number of threads sweeping stacks
+_Atomic(int) gc_n_threads_sweeping_stacks;
+// counter for sharing work when sweeping stacks
+_Atomic(int) gc_ptls_sweep_idx;
+// counter for round robin of giving back stack pages to the OS
+_Atomic(int) gc_stack_free_idx;
 
-// Protect all access to `finalizer_list_marked` and `to_finalize`.
-// For accessing `ptls->finalizers`, the lock is needed if a thread
-// is going to realloc the buffer (of its own list) or accessing the
-// list of another thread
-static jl_mutex_t finalizers_lock;
-static uv_mutex_t gc_cache_lock;
+// Tag for sentinel nodes in bigval list
+uintptr_t gc_bigval_sentinel_tag;
+// List of big objects in oldest generation (`GC_OLD_MARKED`).  Not per-thread.  Accessed only by master thread.
+bigval_t *oldest_generation_of_bigvals = NULL;
 
-// mutex for gc-heap-snapshot.
-jl_mutex_t heapsnapshot_lock;
+// Table recording number of full GCs due to each reason
+JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
 
 // Flag that tells us whether we need to support conservative marking
 // of objects.
@@ -174,462 +110,8 @@ static _Atomic(int) support_conservative_marking = 0;
  * finalizers in unmanaged (GC safe) mode.
  */
 
-jl_gc_num_t gc_num = {0};
-static size_t last_long_collect_interval;
-int gc_n_threads;
-jl_ptls_t* gc_all_tls_states;
-const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
-JL_DLLEXPORT uintptr_t jl_get_buff_tag(void)
-{
-    return jl_buff_tag;
-}
-
-// List of marked big objects.  Not per-thread.  Accessed only by master thread.
-bigval_t *big_objects_marked = NULL;
-
-// -- Finalization --
-// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
-// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
-// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
-//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
-// `to_finalize` should not have tagged pointers.
-arraylist_t finalizer_list_marked;
-arraylist_t to_finalize;
-JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
-
-
-NOINLINE uintptr_t gc_get_stack_ptr(void)
-{
-    return (uintptr_t)jl_get_frame_addr();
-}
-
-#define should_timeout() 0
-
-void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
-{
-    JL_TIMING(GC, GC_Stop);
-#ifdef USE_TRACY
-    TracyCZoneCtx ctx = JL_TIMING_DEFAULT_BLOCK->tracy_ctx;
-    TracyCZoneColor(ctx, 0x696969);
-#endif
-    assert(gc_n_threads);
-    if (gc_n_threads > 1)
-        jl_wake_libuv();
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL) {
-            // This acquire load pairs with the release stores
-            // in the signal handler of safepoint so we are sure that
-            // all the stores on those threads are visible.
-            // We're currently also using atomic store release in mutator threads
-            // (in jl_gc_state_set), but we may want to use signals to flush the
-            // memory operations on those threads lazily instead.
-            while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state))
-                jl_cpu_pause(); // yield?
-        }
-    }
-}
-
-// malloc wrappers, aligned allocation
-
-#if defined(_OS_WINDOWS_)
-STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
-{
-    return _aligned_malloc(sz ? sz : 1, align);
-}
-STATIC_INLINE void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz,
-                                       size_t align)
-{
-    (void)oldsz;
-    return _aligned_realloc(p, sz ? sz : 1, align);
-}
-STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    _aligned_free(p);
-}
-#else
-STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
-{
-#if defined(_P64) || defined(__APPLE__)
-    if (align <= 16)
-        return malloc(sz);
-#endif
-    void *ptr;
-    if (posix_memalign(&ptr, align, sz))
-        return NULL;
-    return ptr;
-}
-STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz,
-                                       size_t align)
-{
-#if defined(_P64) || defined(__APPLE__)
-    if (align <= 16)
-        return realloc(d, sz);
-#endif
-    void *b = jl_malloc_aligned(sz, align);
-    if (b != NULL) {
-        memcpy(b, d, oldsz > sz ? sz : oldsz);
-        free(d);
-    }
-    return b;
-}
-STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    free(p);
-}
-#endif
-#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
-#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
-
-static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
-{
-    arraylist_push(&to_finalize, o);
-    arraylist_push(&to_finalize, f);
-    // doesn't need release, since we'll keep checking (on the reader) until we see the work and
-    // release our lock, and that will have a release barrier by then
-    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
-}
-
-static void run_finalizer(jl_task_t *ct, void *o, void *ff)
-{
-    int ptr_finalizer = gc_ptr_tag(o, 1);
-    o = gc_ptr_clear_tag(o, 3);
-    if (ptr_finalizer) {
-        ((void (*)(void*))ff)((void*)o);
-        return;
-    }
-    JL_TRY {
-        size_t last_age = ct->world_age;
-        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1);
-        ct->world_age = last_age;
-    }
-    JL_CATCH {
-        jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
-        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-        jlbacktrace(); // written to STDERR_FILENO
-    }
-}
-
-// if `need_sync` is true, the `list` is the `finalizers` list of another
-// thread and we need additional synchronizations
-static void finalize_object(arraylist_t *list, jl_value_t *o,
-                            arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
-{
-    // The acquire load makes sure that the first `len` objects are valid.
-    // If `need_sync` is true, all mutations of the content should be limited
-    // to the first `oldlen` elements and no mutation is allowed after the
-    // new length is published with the `cmpxchg` at the end of the function.
-    // This way, the mutation should not conflict with the owning thread,
-    // which only writes to locations later than `len`
-    // and will not resize the buffer without acquiring the lock.
-    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
-    size_t oldlen = len;
-    void **items = list->items;
-    size_t j = 0;
-    for (size_t i = 0; i < len; i += 2) {
-        void *v = items[i];
-        int move = 0;
-        if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
-            void *f = items[i + 1];
-            move = 1;
-            arraylist_push(copied_list, v);
-            arraylist_push(copied_list, f);
-        }
-        if (move || __unlikely(!v)) {
-            // remove item
-        }
-        else {
-            if (j < i) {
-                items[j] = items[i];
-                items[j+1] = items[i+1];
-            }
-            j += 2;
-        }
-    }
-    len = j;
-    if (oldlen == len)
-        return;
-    if (need_sync) {
-        // The memset needs to be unconditional since the thread might have
-        // already read the length.
-        // The `memset` (like any other content mutation) has to be done
-        // **before** the `cmpxchg` which publishes the length.
-        memset(&items[len], 0, (oldlen - len) * sizeof(void*));
-        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
-    }
-    else {
-        list->len = len;
-    }
-}
-
-// The first two entries are assumed to be empty and the rest are assumed to
-// be pointers to `jl_value_t` objects
-static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
-{
-    void **items = list->items;
-    items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
-    items[1] = ct->gcstack;
-    ct->gcstack = (jl_gcframe_t*)items;
-}
-
-// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
-// to be hold for the current thread and will release the lock when the
-// function returns.
-static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
-{
-    // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
-    // of `finalizer`) in a finalizer:
-    uint8_t sticky = ct->sticky;
-    // empty out the first two entries for the GC frame
-    arraylist_push(list, list->items[0]);
-    arraylist_push(list, list->items[1]);
-    jl_gc_push_arraylist(ct, list);
-    void **items = list->items;
-    size_t len = list->len;
-    JL_UNLOCK_NOGC(&finalizers_lock);
-    // run finalizers in reverse order they were added, so lower-level finalizers run last
-    for (size_t i = len-4; i >= 2; i -= 2)
-        run_finalizer(ct, items[i], items[i + 1]);
-    // first entries were moved last to make room for GC frame metadata
-    run_finalizer(ct, items[len-2], items[len-1]);
-    // matches the jl_gc_push_arraylist above
-    JL_GC_POP();
-    ct->sticky = sticky;
-}
-
-static uint64_t finalizer_rngState[JL_RNG_SIZE];
-
-void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
-
-JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
-{
-    jl_rng_split(finalizer_rngState, jl_current_task->rngState);
-}
-
-static void run_finalizers(jl_task_t *ct)
-{
-    // Racy fast path:
-    // The race here should be OK since the race can only happen if
-    // another thread is writing to it with the lock held. In such case,
-    // we don't need to run pending finalizers since the writer thread
-    // will flush it.
-    if (to_finalize.len == 0)
-        return;
-    JL_LOCK_NOGC(&finalizers_lock);
-    if (to_finalize.len == 0) {
-        JL_UNLOCK_NOGC(&finalizers_lock);
-        return;
-    }
-    arraylist_t copied_list;
-    memcpy(&copied_list, &to_finalize, sizeof(copied_list));
-    if (to_finalize.items == to_finalize._space) {
-        copied_list.items = copied_list._space;
-    }
-    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
-    arraylist_new(&to_finalize, 0);
-
-    uint64_t save_rngState[JL_RNG_SIZE];
-    memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState));
-    jl_rng_split(ct->rngState, finalizer_rngState);
-
-    // This releases the finalizers lock.
-    int8_t was_in_finalizer = ct->ptls->in_finalizer;
-    ct->ptls->in_finalizer = 1;
-    jl_gc_run_finalizers_in_list(ct, &copied_list);
-    ct->ptls->in_finalizer = was_in_finalizer;
-    arraylist_free(&copied_list);
-
-    memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
-}
-
-JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
-{
-    if (ct == NULL)
-        ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
-        run_finalizers(ct);
-    }
-}
-
-JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
-{
-    if (ptls == NULL)
-        ptls = jl_current_task->ptls;
-    return ptls->finalizers_inhibited;
-}
-
-JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    ptls->finalizers_inhibited++;
-}
-
-JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
-{
-    jl_task_t *ct = jl_current_task;
-#ifdef NDEBUG
-    ct->ptls->finalizers_inhibited--;
-#else
-    jl_gc_enable_finalizers(ct, 1);
-#endif
-}
-
-JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
-{
-    if (ct == NULL)
-        ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    int old_val = ptls->finalizers_inhibited;
-    int new_val = old_val + (on ? -1 : 1);
-    if (new_val < 0) {
-        JL_TRY {
-            jl_error(""); // get a backtrace
-        }
-        JL_CATCH {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
-            // Only print the backtrace once, to avoid spamming the logs
-            static int backtrace_printed = 0;
-            if (backtrace_printed == 0) {
-                backtrace_printed = 1;
-                jlbacktrace(); // written to STDERR_FILENO
-            }
-        }
-        return;
-    }
-    ptls->finalizers_inhibited = new_val;
-    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
-        jl_gc_run_pending_finalizers(ct);
-    }
-}
-
-JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
-{
-    return jl_current_task->ptls->in_finalizer;
-}
-
-static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
-{
-    void **items = flist->items;
-    size_t len = flist->len;
-    for(size_t i = 0; i < len; i+=2) {
-        void *v = items[i];
-        void *f = items[i + 1];
-        if (__unlikely(!v))
-            continue;
-        schedule_finalization(v, f);
-    }
-    flist->len = 0;
-}
-
-void jl_gc_run_all_finalizers(jl_task_t *ct)
-{
-    int gc_n_threads;
-    jl_ptls_t* gc_all_tls_states;
-    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
-    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
-    // this is called from `jl_atexit_hook`; threads could still be running
-    // so we have to guard the finalizers' lists
-    JL_LOCK_NOGC(&finalizers_lock);
-    schedule_all_finalizers(&finalizer_list_marked);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            schedule_all_finalizers(&ptls2->finalizers);
-    }
-    // unlock here because `run_finalizers` locks this
-    JL_UNLOCK_NOGC(&finalizers_lock);
-    gc_n_threads = 0;
-    gc_all_tls_states = NULL;
-    run_finalizers(ct);
-}
-
-void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
-{
-    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
-    arraylist_t *a = &ptls->finalizers;
-    // This acquire load and the release store at the end are used to
-    // synchronize with `finalize_object` on another thread. Apart from the GC,
-    // which is blocked by entering a unsafe region, there might be only
-    // one other thread accessing our list in `finalize_object`
-    // (only one thread since it needs to acquire the finalizer lock).
-    // Similar to `finalize_object`, all content mutation has to be done
-    // between the acquire and the release of the length.
-    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
-    if (__unlikely(oldlen + 2 > a->max)) {
-        JL_LOCK_NOGC(&finalizers_lock);
-        // `a->len` might have been modified.
-        // Another possibility is to always grow the array to `oldlen + 2` but
-        // it's simpler this way and uses slightly less memory =)
-        oldlen = a->len;
-        arraylist_grow(a, 2);
-        a->len = oldlen;
-        JL_UNLOCK_NOGC(&finalizers_lock);
-    }
-    void **items = a->items;
-    items[oldlen] = v;
-    items[oldlen + 1] = f;
-    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
-}
-
-JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
-{
-    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
-}
-
-// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
-JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
-{
-    assert(!gc_ptr_tag(v, 3));
-    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
-}
-
-JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
-{
-    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
-        jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
-    }
-    else {
-        jl_gc_add_finalizer_(ptls, v, f);
-    }
-}
-
-JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
-{
-    JL_LOCK_NOGC(&finalizers_lock);
-    // Copy the finalizers into a temporary list so that code in the finalizer
-    // won't change the list as we loop through them.
-    // This list is also used as the GC frame when we are running the finalizers
-    arraylist_t copied_list;
-    arraylist_new(&copied_list, 0);
-    // No need to check the to_finalize list since the user is apparently
-    // still holding a reference to the object
-    int gc_n_threads;
-    jl_ptls_t* gc_all_tls_states;
-    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
-    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
-    }
-    finalize_object(&finalizer_list_marked, o, &copied_list, 0);
-    gc_n_threads = 0;
-    gc_all_tls_states = NULL;
-    if (copied_list.len > 0) {
-        // This releases the finalizers lock.
-        jl_gc_run_finalizers_in_list(ct, &copied_list);
-    }
-    else {
-        JL_UNLOCK_NOGC(&finalizers_lock);
-    }
-    arraylist_free(&copied_list);
-}
-
 // explicitly scheduled objects for the sweepfunc callback
-static void gc_sweep_foreign_objs_in_list(arraylist_t *objs)
+static void gc_sweep_foreign_objs_in_list(arraylist_t *objs) JL_NOTSAFEPOINT
 {
     size_t p = 0;
     for (size_t i = 0; i < objs->len; i++) {
@@ -647,39 +129,31 @@ static void gc_sweep_foreign_objs_in_list(arraylist_t *objs)
     objs->len = p;
 }
 
-static void gc_sweep_foreign_objs(void)
+static void gc_sweep_foreign_objs(void) JL_NOTSAFEPOINT
 {
-    assert(gc_n_threads);
+    assert(gc_n_threads != 0);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL)
-            gc_sweep_foreign_objs_in_list(&ptls2->sweep_objs);
+            gc_sweep_foreign_objs_in_list(&ptls2->gc_tls.sweep_objs);
     }
 }
 
 // GC knobs and self-measurement variables
 static int64_t last_gc_total_bytes = 0;
 
-// max_total_memory is a suggestion.  We try very hard to stay
-// under this limit, but we will go above it rather than halting.
-#ifdef _P64
-typedef uint64_t memsize_t;
-static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
-static const size_t max_collect_interval = 1250000000UL;
-static size_t total_mem;
-// We expose this to the user/ci as jl_gc_set_max_memory
-static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
-#else
-typedef uint32_t memsize_t;
-static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
-static const size_t max_collect_interval =  500000000UL;
-// Work really hard to stay within 2GB
-// Alternative is to risk running out of address space
-// on 32 bit architectures.
-static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024;
-#endif
-
+// heuristic stuff for https://dl.acm.org/doi/10.1145/3563323
+// start with values that are in the target ranges to reduce transient hiccups at startup
+static uint64_t old_pause_time = 1e7; // 10 ms
+static uint64_t old_mut_time = 1e9; // 1 second
+static uint64_t old_heap_size = 0;
+static uint64_t old_alloc_diff = default_collect_interval;
+static uint64_t old_freed_diff = default_collect_interval;
+static uint64_t gc_end_time = 0;
+static int thrash_counter = 0;
+static int thrashing = 0;
 // global variables for GC stats
+static uint64_t freed_in_runtime = 0;
 
 // Resetting the object to a young object, this is used when marking the
 // finalizer list to collect them the next time because the object is very
@@ -730,76 +204,43 @@ static int mark_reset_age = 0;
 static int64_t scanned_bytes; // young bytes scanned while marking
 static int64_t perm_scanned_bytes; // old bytes scanned while marking
 int prev_sweep_full = 1;
+int current_sweep_full = 0;
+int next_sweep_full = 0;
+int under_pressure = 0;
 
 // Full collection heuristics
 static int64_t live_bytes = 0;
 static int64_t promoted_bytes = 0;
 static int64_t last_live_bytes = 0; // live_bytes at last collection
-static int64_t t_start = 0; // Time GC starts;
 #ifdef __GLIBC__
 // maxrss at last malloc_trim
 static int64_t last_trim_maxrss = 0;
 #endif
 
-static void gc_sync_cache_nolock(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
+static void gc_sync_cache(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
 {
-    const int nbig = gc_cache->nbig_obj;
-    for (int i = 0; i < nbig; i++) {
-        void *ptr = gc_cache->big_obj[i];
-        bigval_t *hdr = (bigval_t*)gc_ptr_clear_tag(ptr, 1);
-        gc_big_object_unlink(hdr);
-        if (gc_ptr_tag(ptr, 1)) {
-            gc_big_object_link(hdr, &ptls->heap.big_objects);
-        }
-        else {
-            // Move hdr from `big_objects` list to `big_objects_marked list`
-            gc_big_object_link(hdr, &big_objects_marked);
-        }
-    }
-    gc_cache->nbig_obj = 0;
     perm_scanned_bytes += gc_cache->perm_scanned_bytes;
     scanned_bytes += gc_cache->scanned_bytes;
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
 }
 
-static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
-{
-    uv_mutex_lock(&gc_cache_lock);
-    gc_sync_cache_nolock(ptls, &ptls->gc_cache);
-    uv_mutex_unlock(&gc_cache_lock);
-}
-
 // No other threads can be running marking at the same time
-static void gc_sync_all_caches_nolock(jl_ptls_t ptls)
+static void gc_sync_all_caches(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    assert(gc_n_threads);
+    assert(gc_n_threads != 0);
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         if (ptls2 != NULL)
-            gc_sync_cache_nolock(ptls, &ptls2->gc_cache);
-    }
-}
-
-STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
-                                       int toyoung) JL_NOTSAFEPOINT
-{
-    const int nentry = sizeof(ptls->gc_cache.big_obj) / sizeof(void*);
-    size_t nobj = ptls->gc_cache.nbig_obj;
-    if (__unlikely(nobj >= nentry)) {
-        gc_sync_cache(ptls);
-        nobj = 0;
+            gc_sync_cache(ptls, &ptls2->gc_tls.gc_cache);
     }
-    uintptr_t v = (uintptr_t)hdr;
-    ptls->gc_cache.big_obj[nobj] = (void*)(toyoung ? (v | 1) : v);
-    ptls->gc_cache.nbig_obj = nobj + 1;
 }
 
 // Atomically set the mark bit for object and return whether it was previously unmarked
 FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT
 {
     assert(gc_marked(mark_mode));
-    uintptr_t tag = jl_atomic_load_relaxed((_Atomic(uintptr_t)*)&o->header);
+    uintptr_t tag = o->header;
     if (gc_marked(tag))
         return 0;
     if (mark_reset_age) {
@@ -813,9 +254,13 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N
         tag = tag | mark_mode;
         assert((tag & 0x3) == mark_mode);
     }
-    jl_atomic_store_relaxed((_Atomic(uintptr_t)*)&o->header, tag); //xchg here was slower than
-    verify_val(jl_valueof(o));                                     //potentially redoing work because of a stale tag.
-    return 1;
+    // XXX: note that marking not only sets the GC bits but also updates the
+    // page metadata for pool allocated objects.
+    // The second step is **not** idempotent, so we need a compare exchange here
+    // (instead of a pair of load&store) to avoid marking an object twice
+    tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag);
+    verify_val(jl_valueof(o));
+    return !gc_marked(tag);
 }
 
 // This function should be called exactly once during marking for each big
@@ -826,21 +271,17 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
     assert(!gc_alloc_map_is_set((char*)o));
     bigval_t *hdr = bigval_header(o);
     if (mark_mode == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += hdr->sz & ~3;
-        gc_queue_big_marked(ptls, hdr, 0);
+        ptls->gc_tls.gc_cache.perm_scanned_bytes += hdr->sz;
     }
     else {
-        ptls->gc_cache.scanned_bytes += hdr->sz & ~3;
-        // We can't easily tell if the object is old or being promoted
-        // from the gc bits but if the `age` is `0` then the object
-        // must be already on a young list.
+        ptls->gc_tls.gc_cache.scanned_bytes += hdr->sz;
         if (mark_reset_age) {
+            assert(jl_atomic_load(&gc_n_threads_marking) == 0); // `mark_reset_age` is only used during single-threaded marking
             // Reset the object as if it was just allocated
-            gc_queue_big_marked(ptls, hdr, 1);
+            gc_big_object_unlink(hdr);
+            gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, hdr);
         }
     }
-    objprofile_count(jl_typeof(jl_valueof(o)),
-                     mark_mode == GC_OLD_MARKED, hdr->sz & ~3);
 }
 
 // This function should be called exactly once during marking for each pool
@@ -852,18 +293,16 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
     gc_setmark_big(ptls, o, mark_mode);
 #else
     if (mark_mode == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += page->osize;
+        ptls->gc_tls.gc_cache.perm_scanned_bytes += page->osize;
         static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), "");
-        page->nold++;
+        jl_atomic_fetch_add_relaxed((_Atomic(uint16_t)*)&page->nold, 1);
     }
     else {
-        ptls->gc_cache.scanned_bytes += page->osize;
+        ptls->gc_tls.gc_cache.scanned_bytes += page->osize;
         if (mark_reset_age) {
             page->has_young = 1;
         }
     }
-    objprofile_count(jl_typeof(jl_valueof(o)),
-                     mark_mode == GC_OLD_MARKED, page->osize);
     page->has_marked = 1;
 #endif
 }
@@ -912,7 +351,7 @@ void gc_setmark_buf(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL
 
 STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
 {
-    if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) {
+    if (jl_atomic_load_relaxed(&gc_heap_stats.heap_size) >= jl_atomic_load_relaxed(&gc_heap_stats.heap_target) || jl_gc_debug_check_other()) {
         jl_gc_collect(JL_GC_AUTO);
     }
     else {
@@ -922,24 +361,23 @@ STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
 
 // weak references
 
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
-                                                jl_value_t *value)
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value)
 {
     jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*),
                                                   jl_weakref_type);
     wr->value = value;  // NOTE: wb not needed here
-    arraylist_push(&ptls->heap.weak_refs, wr);
+    small_arraylist_push(&ptls->gc_tls_common.heap.weak_refs, wr);
     return wr;
 }
 
-static void clear_weak_refs(void)
+static void clear_weak_refs(void) JL_NOTSAFEPOINT
 {
-    assert(gc_n_threads);
+    assert(gc_n_threads != 0);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL) {
-            size_t n, l = ptls2->heap.weak_refs.len;
-            void **lst = ptls2->heap.weak_refs.items;
+            size_t n, l = ptls2->gc_tls_common.heap.weak_refs.len;
+            void **lst = ptls2->gc_tls_common.heap.weak_refs.items;
             for (n = 0; n < l; n++) {
                 jl_weakref_t *wr = (jl_weakref_t*)lst[n];
                 if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
@@ -949,39 +387,50 @@ static void clear_weak_refs(void)
     }
 }
 
-static void sweep_weak_refs(void)
+static void sweep_weak_refs(void) JL_NOTSAFEPOINT
 {
-    assert(gc_n_threads);
+    assert(gc_n_threads != 0);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL) {
-            size_t n = 0;
-            size_t ndel = 0;
-            size_t l = ptls2->heap.weak_refs.len;
-            void **lst = ptls2->heap.weak_refs.items;
-            if (l == 0)
-                continue;
-            while (1) {
-                jl_weakref_t *wr = (jl_weakref_t*)lst[n];
-                if (gc_marked(jl_astaggedvalue(wr)->bits.gc))
-                    n++;
-                else
-                    ndel++;
-                if (n >= l - ndel)
-                    break;
-                void *tmp = lst[n];
-                lst[n] = lst[n + ndel];
-                lst[n + ndel] = tmp;
+        if (ptls2 == NULL) {
+            continue;
+        }
+        size_t n = 0;
+        size_t i = 0;
+        size_t l = ptls2->gc_tls_common.heap.weak_refs.len;
+        void **lst = ptls2->gc_tls_common.heap.weak_refs.items;
+        // filter with preserving order
+        for (i = 0; i < l; i++) {
+            jl_weakref_t *wr = (jl_weakref_t*)lst[i];
+            if (gc_marked(jl_astaggedvalue(wr)->bits.gc)) {
+                lst[n] = wr;
+                n++;
             }
-            ptls2->heap.weak_refs.len -= ndel;
         }
+        ptls2->gc_tls_common.heap.weak_refs.len = n;
     }
 }
 
 
+STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+{
+    uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc) + sz;
+    if (alloc_acc < 16*1024)
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, alloc_acc);
+    else {
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+    }
+}
+
+STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+{
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc) + sz);
+}
+
 // big value list
 
-// Size includes the tag and the tag is not cleared!!
+// Size includes the tag and the tag field is undefined on return (must be set before the next GC safepoint)
 STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
 {
     maybe_collect(ptls);
@@ -997,23 +446,28 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
         jl_throw(jl_memory_exception);
     gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t,
         gc_cblist_notify_external_alloc, (v, allocsz));
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc) + 1);
+    jl_batch_accum_heap_size(ptls, allocsz);
 #ifdef MEMDEBUG
     memset(v, 0xee, allocsz);
 #endif
     v->sz = allocsz;
-    gc_big_object_link(v, &ptls->heap.big_objects);
+#ifndef NDEBUG
+    v->header = 0; // must be initialized (and not gc_bigval_sentinel_tag) or gc_big_object_link assertions will get confused
+#endif
+    gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, v);
     return jl_valueof(&v->header);
 }
 
+
 // Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
-JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
+JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type)
 {
     jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
-    maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag);
+    maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
     return val;
 }
 
@@ -1024,90 +478,101 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) {
     return jl_gc_big_alloc_inner(ptls, sz);
 }
 
-// Sweep list rooted at *pv, removing and freeing any unmarked objects.
-// Return pointer to last `next` field in the culled list.
-static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
+FORCE_INLINE void sweep_unlink_and_free(bigval_t *v) JL_NOTSAFEPOINT
 {
-    bigval_t *v = *pv;
+    gc_big_object_unlink(v);
+    gc_num.freed += v->sz;
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size, jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - v->sz);
+#ifdef MEMDEBUG
+    memset(v, 0xbb, v->sz);
+#endif
+    gc_invoke_callbacks(jl_gc_cb_notify_external_free_t, gc_cblist_notify_external_free, (v));
+    jl_free_aligned(v);
+}
+
+static bigval_t *sweep_list_of_young_bigvals(bigval_t *young) JL_NOTSAFEPOINT
+{
+    bigval_t *last_node = young;
+    bigval_t *v = young->next; // skip the sentinel
+    bigval_t *old = oldest_generation_of_bigvals;
+    int sweep_full = current_sweep_full; // don't load the global in the hot loop
     while (v != NULL) {
         bigval_t *nxt = v->next;
         int bits = v->bits.gc;
         int old_bits = bits;
         if (gc_marked(bits)) {
-            pv = &v->next;
             if (sweep_full || bits == GC_MARKED) {
                 bits = GC_OLD;
+                last_node = v;
+            }
+            else { // `bits == GC_OLD_MARKED`
+                assert(bits == GC_OLD_MARKED);
+                // reached oldest generation, move from young list to old list
+                gc_big_object_unlink(v);
+                gc_big_object_link(old, v);
             }
             v->bits.gc = bits;
         }
         else {
-            // Remove v from list and free it
-            *pv = nxt;
-            if (nxt)
-                nxt->prev = pv;
-            gc_num.freed += v->sz&~3;
-#ifdef MEMDEBUG
-            memset(v, 0xbb, v->sz&~3);
-#endif
-            gc_invoke_callbacks(jl_gc_cb_notify_external_free_t,
-                gc_cblist_notify_external_free, (v));
-            jl_free_aligned(v);
+            sweep_unlink_and_free(v);
         }
         gc_time_count_big(old_bits, bits);
         v = nxt;
     }
-    return pv;
+    return last_node;
 }
 
-static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
+static void sweep_list_of_oldest_bigvals(bigval_t *young) JL_NOTSAFEPOINT
 {
-    gc_time_big_start();
-    assert(gc_n_threads);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            sweep_big_list(sweep_full, &ptls2->heap.big_objects);
-    }
-    if (sweep_full) {
-        bigval_t **last_next = sweep_big_list(sweep_full, &big_objects_marked);
-        // Move all survivors from big_objects_marked list to the big_objects list of this thread.
-        if (ptls->heap.big_objects)
-            ptls->heap.big_objects->prev = last_next;
-        *last_next = ptls->heap.big_objects;
-        ptls->heap.big_objects = big_objects_marked;
-        if (ptls->heap.big_objects)
-            ptls->heap.big_objects->prev = &ptls->heap.big_objects;
-        big_objects_marked = NULL;
+    bigval_t *v = oldest_generation_of_bigvals->next; // skip the sentinel
+    while (v != NULL) {
+        bigval_t *nxt = v->next;
+        assert(v->bits.gc == GC_OLD_MARKED);
+        v->bits.gc = GC_OLD;
+        gc_time_count_big(GC_OLD_MARKED, GC_OLD);
+        v = nxt;
     }
-    gc_time_big_end();
 }
 
-// tracking Arrays with malloc'd storage
-
-void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT
+static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    // This is **NOT** a GC safe point.
-    mallocarray_t *ma;
-    if (ptls->heap.mafreelist == NULL) {
-        ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t));
+    gc_time_big_start();
+    assert(gc_n_threads != 0);
+    bigval_t *last_node_in_my_list = NULL;
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        bigval_t *last_node = sweep_list_of_young_bigvals(ptls2->gc_tls.heap.young_generation_of_bigvals);
+        if (ptls == ptls2) {
+            last_node_in_my_list = last_node;
+        }
     }
-    else {
-        ma = ptls->heap.mafreelist;
-        ptls->heap.mafreelist = ma->next;
+    if (current_sweep_full) {
+        sweep_list_of_oldest_bigvals(ptls->gc_tls.heap.young_generation_of_bigvals);
+        // move all nodes in `oldest_generation_of_bigvals` to my list of bigvals
+        assert(last_node_in_my_list != NULL);
+        assert(last_node_in_my_list->next == NULL);
+        last_node_in_my_list->next = oldest_generation_of_bigvals->next; // skip the sentinel
+        if (oldest_generation_of_bigvals->next != NULL) {
+            oldest_generation_of_bigvals->next->prev = last_node_in_my_list;
+        }
+        oldest_generation_of_bigvals->next = NULL;
     }
-    ma->a = a;
-    ma->next = ptls->heap.mallocarrays;
-    ptls->heap.mallocarrays = ma;
+    gc_time_big_end();
 }
 
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
 {
     jl_ptls_t ptls = jl_current_task->ptls;
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+    jl_batch_accum_heap_size(ptls, sz);
 }
 
-static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
+// Only safe to update the heap inside the GC
+static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
 {
     int gc_n_threads;
     jl_ptls_t* gc_all_tls_states;
@@ -1115,14 +580,21 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
     gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls = gc_all_tls_states[i];
-        if (ptls) {
-            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval);
-            dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed);
-            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc);
-            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc);
-            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc);
-            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc);
-            dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall);
+        if (ptls == NULL) {
+            continue;
+        }
+        dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval);
+        dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc);
+        dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc);
+        dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc);
+        dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc);
+        dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc);
+        if (update_heap) {
+            uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc);
+            freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc);
+            jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
         }
     }
 }
@@ -1135,83 +607,76 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
     gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls = gc_all_tls_states[i];
-        if (ptls != NULL) {
-            memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
-            jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+        if (ptls == NULL) {
+            continue;
         }
+        // don't reset `pool_live_bytes` here
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
     }
 }
 
-static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
-{
-    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
-    return live_bytes += inc;
-}
-
 void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
 {
-    combine_thread_gc_counts(&gc_num);
-    inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
+    combine_thread_gc_counts(&gc_num, 0);
+    int64_t alloc_increment = gc_num.deferred_alloc + gc_num.allocd;
+    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, alloc_increment);
+    live_bytes += alloc_increment;
     gc_num.allocd = 0;
     gc_num.deferred_alloc = 0;
     reset_thread_gc_counts();
 }
 
-size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT
+static void jl_gc_free_memory(jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT
 {
-    size_t sz = 0;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (jl_array_ndims(a) == 1)
-        sz = a->elsize * a->maxsize + ((a->elsize == 1 && !isbitsunion) ? 1 : 0);
+    assert(jl_is_genericmemory(m));
+    assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
+    char *d = (char*)m->ptr;
+    size_t freed_bytes = memory_block_usable_size(d, isaligned);
+    assert(freed_bytes != 0);
+    if (isaligned)
+        jl_free_aligned(d);
     else
-        sz = a->elsize * jl_array_len(a);
-    if (isbitsunion)
-        // account for isbits Union array selector bytes
-        sz += jl_array_len(a);
-    return sz;
-}
-
-static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT
-{
-    if (a->flags.how == 2) {
-        char *d = (char*)a->data - a->offset*a->elsize;
-        if (a->flags.isaligned)
-            jl_free_aligned(d);
-        else
-            free(d);
-        gc_num.freed += jl_array_nbytes(a);
-        gc_num.freecall++;
-    }
+        free(d);
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_bytes);
+    gc_num.freed += freed_bytes;
+    gc_num.freecall++;
 }
 
-static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT
+static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
 {
-    gc_time_mallocd_array_start();
-    assert(gc_n_threads);
+    gc_time_mallocd_memory_start();
+    assert(gc_n_threads != 0);
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        if (ptls2 != NULL) {
-            mallocarray_t *ma = ptls2->heap.mallocarrays;
-            mallocarray_t **pma = &ptls2->heap.mallocarrays;
-            while (ma != NULL) {
-                mallocarray_t *nxt = ma->next;
-                int bits = jl_astaggedvalue(ma->a)->bits.gc;
-                if (gc_marked(bits)) {
-                    pma = &ma->next;
-                }
-                else {
-                    *pma = nxt;
-                    assert(ma->a->flags.how == 2);
-                    jl_gc_free_array(ma->a);
-                    ma->next = ptls2->heap.mafreelist;
-                    ptls2->heap.mafreelist = ma;
-                }
-                gc_time_count_mallocd_array(bits);
-                ma = nxt;
+        if (ptls2 == NULL) {
+            continue;
+        }
+        size_t n = 0;
+        size_t l = ptls2->gc_tls_common.heap.mallocarrays.len;
+        void **lst = ptls2->gc_tls_common.heap.mallocarrays.items;
+        // filter without preserving order
+        while (n < l) {
+            jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
+            if (gc_marked(jl_astaggedvalue(m)->bits.gc)) {
+                n++;
+            }
+            else {
+                int isaligned = (uintptr_t)lst[n] & 1;
+                jl_gc_free_memory(m, isaligned);
+                l--;
+                lst[n] = lst[l];
             }
         }
+        ptls2->gc_tls_common.heap.mallocarrays.len = l;
     }
-    gc_time_mallocd_array_end();
+    gc_time_mallocd_memory_end();
 }
 
 // pool allocation
@@ -1219,7 +684,7 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_
 {
     assert(GC_PAGE_OFFSET >= sizeof(void*));
     pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize;
-    pg->pool_n = p - ptls2->heap.norm_pools;
+    pg->pool_n = p - ptls2->gc_tls.heap.norm_pools;
     jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
     pg->has_young = 0;
     pg->has_marked = 0;
@@ -1230,47 +695,47 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_
     return beg;
 }
 
-jl_gc_global_page_pool_t global_page_pool_lazily_freed;
-jl_gc_global_page_pool_t global_page_pool_clean;
-jl_gc_global_page_pool_t global_page_pool_freed;
+jl_gc_page_stack_t global_page_pool_lazily_freed;
+jl_gc_page_stack_t global_page_pool_clean;
+jl_gc_page_stack_t global_page_pool_freed;
 pagetable_t alloc_map;
 
 // Add a new page to the pool. Discards any pages in `p->newpages` before.
 static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 {
     // Do not pass in `ptls` as argument. This slows down the fast path
-    // in pool_alloc significantly
+    // in small_alloc significantly
     jl_ptls_t ptls = jl_current_task->ptls;
-    jl_gc_pagemeta_t *pg = pop_page_metadata_back(&ptls->page_metadata_lazily_freed);
-    if (pg == NULL) {
-        pg = jl_gc_alloc_page();
-    }
+    jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
     pg->osize = p->osize;
     pg->thread_n = ptls->tid;
     set_page_metadata(pg);
-    push_page_metadata_back(&ptls->page_metadata_allocd, pg);
+    push_lf_back(&ptls->gc_tls.page_metadata_allocd, pg);
     jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, GC_PAGE_SZ);
     p->newpages = fl;
     return fl;
 }
 
 // Size includes the tag and the tag is not cleared!!
-STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset,
+STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset,
                                           int osize)
 {
     // Use the pool offset instead of the pool address as the argument
     // to workaround a llvm bug.
     // Ref https://llvm.org/bugs/show_bug.cgi?id=27190
-    jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + pool_offset);
+    jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + offset);
     assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
 #ifdef MEMDEBUG
-    return jl_gc_big_alloc(ptls, osize);
+    return jl_gc_big_alloc(ptls, osize, NULL);
 #endif
     maybe_collect(ptls);
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize);
-    jl_atomic_store_relaxed(&ptls->gc_num.poolalloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc) + 1);
     // first try to use the freelist
     jl_taggedvalue_t *v = p->freelist;
     if (v != NULL) {
@@ -1310,20 +775,42 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset
     return jl_valueof(v);
 }
 
-// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
-JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
-                                          int osize)
+// Instrumented version of jl_gc_small_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_small_alloc(jl_ptls_t ptls, int offset, int osize, jl_value_t* type)
 {
-    jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
-    maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag);
+    jl_value_t *val = jl_gc_small_alloc_inner(ptls, offset, osize);
+    maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
     return val;
 }
 
-// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into
-// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner`
+// This wrapper exists only to prevent `jl_gc_small_alloc_inner` from being inlined into
+// its callers. We provide an external-facing interface for callers, and inline `jl_gc_small_alloc_inner`
 // into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) {
-    return jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
+jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) {
+    return jl_gc_small_alloc_inner(ptls, offset, osize);
+}
+
+// Size does NOT include the type tag!!
+inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    jl_value_t *v;
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (sz <= GC_MAX_SZCLASS) {
+        int pool_id = jl_gc_szclass(allocsz);
+        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
+        int osize = jl_gc_sizeclasses[pool_id];
+        // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in
+        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+        v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
+    }
+    else {
+        if (allocsz < sz) // overflow in adding offs, size was "negative"
+            jl_throw(jl_memory_exception);
+        v = jl_gc_big_alloc_noinline(ptls, allocsz);
+    }
+    jl_set_typeof(v, ty);
+    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
+    return v;
 }
 
 int jl_gc_classify_pools(size_t sz, int *osize)
@@ -1333,19 +820,49 @@ int jl_gc_classify_pools(size_t sz, int *osize)
     size_t allocsz = sz + sizeof(jl_taggedvalue_t);
     int klass = jl_gc_szclass(allocsz);
     *osize = jl_gc_sizeclasses[klass];
-    return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]);
+    return (int)(intptr_t)(&((jl_ptls_t)0)->gc_tls.heap.norm_pools[klass]);
 }
 
 // sweep phase
 
-int64_t lazy_freed_pages = 0;
+JL_DLLEXPORT gc_fragmentation_stat_t jl_gc_page_fragmentation_stats[JL_GC_N_POOLS];
+JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS];
+
+STATIC_INLINE void gc_update_fragmentation_data_for_size_class(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
+{
+    gc_fragmentation_stat_t *stats = &jl_gc_page_fragmentation_stats[pg->pool_n];
+    jl_atomic_fetch_add_relaxed(&stats->n_freed_objs, pg->nfree);
+    jl_atomic_fetch_add_relaxed(&stats->n_pages_allocd, 1);
+}
+
+STATIC_INLINE void gc_reset_fragmentation_data_for_size_classes(void) JL_NOTSAFEPOINT
+{
+    for (int i = 0; i < JL_GC_N_POOLS; i++) {
+        jl_atomic_store_relaxed(&jl_gc_page_fragmentation_stats[i].n_freed_objs, 0);
+        jl_atomic_store_relaxed(&jl_gc_page_fragmentation_stats[i].n_pages_allocd, 0);
+    }
+}
+
+STATIC_INLINE void gc_compute_utilization_data_for_size_classes(void) JL_NOTSAFEPOINT
+{
+    for (int i = 0; i < JL_GC_N_POOLS; i++) {
+        gc_fragmentation_stat_t *stats = &jl_gc_page_fragmentation_stats[i];
+        double utilization = 1.0;
+        size_t n_freed_objs = jl_atomic_load_relaxed(&stats->n_freed_objs);
+        size_t n_pages_allocd = jl_atomic_load_relaxed(&stats->n_pages_allocd);
+        if (n_pages_allocd != 0) {
+            utilization -= ((double)n_freed_objs * (double)jl_gc_sizeclasses[i]) / (double)n_pages_allocd / (double)GC_PAGE_SZ;
+        }
+        jl_gc_page_utilization_stats[i] = utilization;
+    }
+}
 
-// Returns pointer to terminal pointer of list rooted at *pfl.
-static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allocd,
-                                        jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT
+// Walks over a page, reconstruting the free lists if the page contains at least one live object. If not,
+// queues up the page for later decommit (i.e. through `madvise` on Unix).
+static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT
 {
     char *data = pg->data;
-    jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
+    jl_taggedvalue_t *v0 = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
     char *lim = data + GC_PAGE_SZ - osize;
     char *lim_newpages = data + GC_PAGE_SZ;
     if (gc_page_data((char*)p->newpages - 1) == data) {
@@ -1353,58 +870,56 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
     }
     size_t old_nfree = pg->nfree;
     size_t nfree;
+    // avoid loading a global variable in the hot path
+    int page_profile_enabled = gc_page_profile_is_enabled();
+    gc_page_serializer_init(s, pg);
 
     int re_use_page = 1;
-    int freed_lazily = 0;
     int freedall = 1;
     int pg_skpd = 1;
     if (!pg->has_marked) {
         re_use_page = 0;
-    #ifdef _P64 // TODO: re-enable on `_P32`?
-        // lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
-        // eager version: (freedall) free page as soon as possible
-        // the eager one uses less memory.
-        // FIXME - need to do accounting on a per-thread basis
-        // on quick sweeps, keep a few pages empty but allocated for performance
-        if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) {
-            lazy_freed_pages++;
-            freed_lazily = 1;
-        }
-    #endif
         nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize;
+        gc_page_profile_write_empty_page(s, page_profile_enabled);
         goto done;
     }
-    // note that `pg->nold` may not be accurate with multithreaded marking since
-    // two threads may race when trying to set the mark bit in `gc_try_setmark_tag`.
-    // We're basically losing a bit of precision in the sweep phase at the cost of
-    // making the mark phase considerably cheaper.
-    // See issue #50419
-    if (jl_n_markthreads == 0) {
-        // For quick sweep, we might be able to skip the page if the page doesn't
-        // have any young live cell before marking.
-        if (!sweep_full && !pg->has_young) {
-            assert(!prev_sweep_full || pg->prev_nold >= pg->nold);
-            if (!prev_sweep_full || pg->prev_nold == pg->nold) {
-                // the position of the freelist begin/end in this page
-                // is stored in its metadata
-                if (pg->fl_begin_offset != (uint16_t)-1) {
-                    *pfl = page_pfl_beg(pg);
-                    pfl = (jl_taggedvalue_t**)page_pfl_end(pg);
-                }
-                freedall = 0;
-                nfree = pg->nfree;
-                goto done;
-            }
+    // For quick sweep, we might be able to skip the page if the page doesn't
+    // have any young live cell before marking.
+    if (!current_sweep_full && !pg->has_young) {
+        assert(!prev_sweep_full || pg->prev_nold >= pg->nold);
+        if (!prev_sweep_full || pg->prev_nold == pg->nold) {
+            freedall = 0;
+            nfree = pg->nfree;
+            gc_page_profile_write_empty_page(s, page_profile_enabled);
+            goto done;
         }
     }
 
     pg_skpd = 0;
-    {  // scope to avoid clang goto errors
+    {   // scope to avoid clang goto errors
         int has_marked = 0;
         int has_young = 0;
         int16_t prev_nold = 0;
         int pg_nfree = 0;
+        jl_taggedvalue_t *fl = NULL;
+        jl_taggedvalue_t **pfl = &fl;
         jl_taggedvalue_t **pfl_begin = NULL;
+        // collect page profile
+        jl_taggedvalue_t *v = v0;
+        if (page_profile_enabled) {
+            while ((char*)v <= lim) {
+                int bits = v->bits.gc;
+                if (!gc_marked(bits) || (char*)v >= lim_newpages) {
+                    gc_page_profile_write_garbage(s, page_profile_enabled);
+                }
+                else {
+                    gc_page_profile_write_live_obj(s, v, page_profile_enabled);
+                }
+                v = (jl_taggedvalue_t*)((char*)v + osize);
+            }
+            v = v0;
+        }
+        // sweep the page
         while ((char*)v <= lim) {
             int bits = v->bits.gc;
             // if an object is past `lim_newpages` then we can guarantee it's garbage
@@ -1415,7 +930,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
                 pg_nfree++;
             }
             else { // marked young or old
-                if (sweep_full || bits == GC_MARKED) { // old enough
+                if (current_sweep_full || bits == GC_MARKED) { // old enough
                     bits = v->bits.gc = GC_OLD; // promote
                 }
                 prev_nold++;
@@ -1437,7 +952,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
         }
 
         pg->nfree = pg_nfree;
-        if (sweep_full) {
+        if (current_sweep_full) {
             pg->nold = 0;
             pg->prev_nold = prev_nold;
         }
@@ -1446,71 +961,399 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
 
 done:
     if (re_use_page) {
-        push_page_metadata_back(allocd, pg);
-    }
-    else if (freed_lazily) {
-        push_page_metadata_back(lazily_freed, pg);
+        gc_update_fragmentation_data_for_size_class(pg);
+        push_lf_back(allocd, pg);
     }
     else {
-    #ifdef _P64 // only enable concurrent sweeping on 64bit
-        if (jl_n_sweepthreads == 0) {
-            jl_gc_free_page(pg);
-            push_lf_page_metadata_back(&global_page_pool_freed, pg);
-        }
-        else {
-            gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
-            push_lf_page_metadata_back(&global_page_pool_lazily_freed, pg);
-        }
-    #else
-        jl_gc_free_page(pg);
-        push_lf_page_metadata_back(&global_page_pool_freed, pg);
-    #endif
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
+        gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
+        push_lf_back(&global_page_pool_lazily_freed, pg);
     }
+    gc_page_profile_write_to_file(s);
     gc_time_count_page(freedall, pg_skpd);
-    gc_num.freed += (nfree - old_nfree) * osize;
-    return pfl;
+    jl_ptls_t ptls = jl_current_task->ptls;
+    // Note that we aggregate the `pool_live_bytes` over all threads before returning this
+    // value to the user. It doesn't matter how the `pool_live_bytes` are partitioned among
+    // the threads as long as the sum is correct. Let's add the `pool_live_bytes` to the current thread
+    // instead of adding it to the thread that originally allocated the page, so we can avoid
+    // an atomic-fetch-add here.
+    size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + delta);
+    jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize);
 }
 
 // the actual sweeping over all allocated pages in a memory pool
-STATIC_INLINE void gc_sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t **allocd,
-                                      jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT
+STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_page_stack_t *allocd, jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
 {
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
     jl_ptls_t ptls2 = gc_all_tls_states[t_n];
-    jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
+    jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
     int osize = pg->osize;
-    pfl[t_n * JL_GC_N_POOLS + p_n] = gc_sweep_page(p, allocd, lazily_freed, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize);
+    gc_sweep_page(s, p, allocd, pg, osize);
 }
 
 // sweep over all memory that is being used and not in a pool
 static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
 {
-    sweep_malloced_arrays();
-    sweep_big(ptls, sweep_full);
+    uint64_t t_free_mallocd_memory_start = jl_hrtime();
+    gc_sweep_foreign_objs();
+    sweep_malloced_memory();
+    sweep_big(ptls);
+    uint64_t t_free_mallocd_memory_end = jl_hrtime();
+    gc_num.total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start;
+    jl_engine_sweep(gc_all_tls_states);
+}
+
+// wake up all threads to sweep the stacks
+void gc_sweep_wake_all_stacks(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    uv_mutex_lock(&gc_threads_lock);
+    int first = gc_first_parallel_collector_thread_id();
+    int last = gc_last_parallel_collector_thread_id();
+    for (int i = first; i <= last; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        gc_check_ptls_of_parallel_collector_thread(ptls2);
+        jl_atomic_fetch_add(&ptls2->gc_tls.gc_stack_sweep_requested, 1);
+    }
+    uv_cond_broadcast(&gc_threads_cond);
+    uv_mutex_unlock(&gc_threads_lock);
+    return;
+}
+
+void gc_sweep_wait_for_all_stacks(void) JL_NOTSAFEPOINT
+{
+    while ((jl_atomic_load_acquire(&gc_ptls_sweep_idx) >= 0 ) || jl_atomic_load_acquire(&gc_n_threads_sweeping_stacks) != 0) {
+        jl_cpu_pause();
+    }
+}
+
+extern const unsigned pool_sizes[];
+
+void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT
+{
+    // Stack sweeping algorithm:
+    //    // deallocate stacks if we have too many sitting around unused
+    //    for (stk in halfof(free_stacks))
+    //        free_stack(stk, pool_sz);
+    //    // then sweep the task stacks
+    //    for (t in live_tasks)
+    //        if (!gc-marked(t))
+    //            stkbuf = t->stkbuf
+    //            bufsz = t->bufsz
+    //            if (stkbuf)
+    //                push(free_stacks[sz], stkbuf)
+    jl_atomic_fetch_add(&gc_n_threads_sweeping_stacks, 1);
+    while (1) {
+        int i = jl_atomic_fetch_add_relaxed(&gc_ptls_sweep_idx, -1);
+        if (i < 0)
+            break;
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
+        assert(gc_n_threads != 0);
+        // free half of stacks that remain unused since last sweep
+        if (i == jl_atomic_load_relaxed(&gc_stack_free_idx)) {
+            for (int p = 0; p < JL_N_STACK_POOLS; p++) {
+                small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p];
+                size_t n_to_free;
+                if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+                    n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
+                }
+                else if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
+                    n_to_free = al->len / 2;
+                    if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
+                        n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
+                }
+                else {
+                    n_to_free = 0;
+                }
+                for (int n = 0; n < n_to_free; n++) {
+                    void *stk = small_arraylist_pop(al);
+                    free_stack(stk, pool_sizes[p]);
+                }
+                if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+                    small_arraylist_free(al);
+                }
+            }
+        }
+        if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+            small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks);
+        }
+
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = 0;
+        size_t ndel = 0;
+        size_t l = live_tasks->len;
+        void **lst = live_tasks->items;
+        if (l == 0)
+            continue;
+        while (1) {
+            jl_task_t *t = (jl_task_t*)lst[n];
+            assert(jl_is_task(t));
+            if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
+                if (t->ctx.stkbuf == NULL)
+                    ndel++; // jl_release_task_stack called
+                else
+                    n++;
+            }
+            else {
+                ndel++;
+                void *stkbuf = t->ctx.stkbuf;
+                size_t bufsz = t->ctx.bufsz;
+                if (stkbuf) {
+                    t->ctx.stkbuf = NULL;
+                    _jl_free_stack(ptls2, stkbuf, bufsz);
+                }
+#ifdef _COMPILER_TSAN_ENABLED_
+                if (t->ctx.tsan_state) {
+                    __tsan_destroy_fiber(t->ctx.tsan_state);
+                    t->ctx.tsan_state = NULL;
+                }
+#endif
+            }
+            if (n >= l - ndel)
+                break;
+            void *tmp = lst[n];
+            lst[n] = lst[n + ndel];
+            lst[n + ndel] = tmp;
+        }
+        live_tasks->len -= ndel;
+    }
+    jl_atomic_fetch_add(&gc_n_threads_sweeping_stacks, -1);
+}
+
+JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    // initialize ptls index for parallel sweeping of stack pools
+    assert(gc_n_threads != 0);
+    int stack_free_idx = jl_atomic_load_relaxed(&gc_stack_free_idx);
+    if (stack_free_idx + 1 == gc_n_threads)
+        jl_atomic_store_relaxed(&gc_stack_free_idx, 0);
+    else
+        jl_atomic_store_relaxed(&gc_stack_free_idx, stack_free_idx + 1);
+    jl_atomic_store_release(&gc_ptls_sweep_idx, gc_n_threads - 1); // idx == gc_n_threads = release stacks to the OS so it's serial
+    uv_mutex_lock(&live_tasks_lock);
+    gc_sweep_wake_all_stacks(ptls);
+    sweep_stack_pool_loop();
+    gc_sweep_wait_for_all_stacks();
+    sweep_mtarraylist_buffers();
+    uv_mutex_unlock(&live_tasks_lock);
+}
+
+static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
+{
+    assert(pg->fl_begin_offset != UINT16_MAX);
+    char *cur_pg = gc_page_data(last);
+    // Fast path for page that has no allocation
+    jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
+    if (last == fl_beg)
+        return;
+    int nfree = 0;
+    do {
+        nfree++;
+        last = last->next;
+    } while (gc_page_data(last) == cur_pg);
+    pg->nfree = nfree;
+}
+
+// pre-scan pages to check whether there are enough pages so that's worth parallelizing
+// also sweeps pages that don't need to be linearly scanned
+int gc_sweep_prescan(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_scratch) JL_NOTSAFEPOINT
+{
+    // 4MB worth of pages is worth parallelizing
+    const int n_pages_worth_parallel_sweep = (int)(4 * (1 << 20) / GC_PAGE_SZ);
+    int n_pages_to_scan = 0;
+    gc_page_profiler_serializer_t serializer = gc_page_serializer_create();
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        jl_gc_page_stack_t *dest = &new_gc_allocd_scratch[ptls2->tid].stack;
+        jl_gc_page_stack_t tmp;
+        jl_gc_pagemeta_t *tail = NULL;
+        memset(&tmp, 0, sizeof(tmp));
+        while (1) {
+            jl_gc_pagemeta_t *pg = pop_lf_back_nosync(&ptls2->gc_tls.page_metadata_allocd);
+            if (pg == NULL) {
+                break;
+            }
+            int should_scan = 1;
+            if (!pg->has_marked) {
+                should_scan = 0;
+            }
+            if (!current_sweep_full && !pg->has_young) {
+                assert(!prev_sweep_full || pg->prev_nold >= pg->nold);
+                if (!prev_sweep_full || pg->prev_nold == pg->nold) {
+                    should_scan = 0;
+                }
+            }
+            if (should_scan) {
+                if (tail == NULL) {
+                    tail = pg;
+                }
+                n_pages_to_scan++;
+                push_lf_back_nosync(&tmp, pg);
+            }
+            else {
+                gc_sweep_pool_page(&serializer, dest, pg);
+            }
+            if (n_pages_to_scan >= n_pages_worth_parallel_sweep) {
+                break;
+            }
+        }
+        if (tail != NULL) {
+            tail->next = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
+        }
+        ptls2->gc_tls.page_metadata_allocd = tmp;
+        if (n_pages_to_scan >= n_pages_worth_parallel_sweep) {
+            break;
+        }
+    }
+    gc_page_serializer_destroy(&serializer);
+    return n_pages_to_scan >= n_pages_worth_parallel_sweep;
+}
+
+// wake up all threads to sweep the pages
+void gc_sweep_wake_all_pages(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_scratch) JL_NOTSAFEPOINT
+{
+    int parallel_sweep_worthwhile = gc_sweep_prescan(ptls, new_gc_allocd_scratch);
+    if (parallel_sweep_worthwhile && !page_profile_enabled) {
+        jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch);
+        uv_mutex_lock(&gc_threads_lock);
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_atomic_fetch_add(&ptls2->gc_tls.gc_sweeps_requested, 1);
+        }
+        uv_cond_broadcast(&gc_threads_cond);
+        uv_mutex_unlock(&gc_threads_lock);
+        return;
+    }
+    if (page_profile_enabled) {
+        // we need to ensure that no threads are running sweeping when
+        // collecting a page profile.
+        // wait for all to leave in order to ensure that a straggler doesn't
+        // try to enter sweeping after we set `gc_allocd_scratch` below.
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            while (jl_atomic_load_acquire(&ptls2->gc_tls.gc_sweeps_requested) != 0) {
+                jl_cpu_pause();
+            }
+        }
+    }
+    jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch);
+}
+
+// wait for all threads to finish sweeping
+void gc_sweep_wait_for_all_pages(void) JL_NOTSAFEPOINT
+{
+    jl_atomic_store(&gc_allocd_scratch, NULL);
+    while (jl_atomic_load_acquire(&gc_n_threads_sweeping_pools) != 0) {
+        jl_cpu_pause();
+    }
+}
+
+// sweep all pools
+void gc_sweep_pool_parallel(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    jl_atomic_fetch_add(&gc_n_threads_sweeping_pools, 1);
+    jl_gc_padded_page_stack_t *allocd_scratch = jl_atomic_load(&gc_allocd_scratch);
+    if (allocd_scratch != NULL) {
+        gc_page_profiler_serializer_t serializer = gc_page_serializer_create();
+        while (1) {
+            int found_pg = 0;
+            // sequentially walk the threads and sweep the pages
+            for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+                jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+                // skip foreign threads that already exited
+                if (ptls2 == NULL) {
+                    continue;
+                }
+                jl_gc_page_stack_t *dest = &allocd_scratch[ptls2->tid].stack;
+                jl_gc_pagemeta_t *pg = try_pop_lf_back(&ptls2->gc_tls.page_metadata_allocd);
+                // failed steal attempt
+                if (pg == NULL) {
+                    continue;
+                }
+                gc_sweep_pool_page(&serializer, dest, pg);
+                found_pg = 1;
+            }
+            if (!found_pg) {
+                // check for termination
+                int no_more_work = 1;
+                for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+                    jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+                    // skip foreign threads that already exited
+                    if (ptls2 == NULL) {
+                        continue;
+                    }
+                    jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
+                    if (pg != NULL) {
+                        no_more_work = 0;
+                        break;
+                    }
+                }
+                if (no_more_work) {
+                    break;
+                }
+            }
+            jl_cpu_pause();
+        }
+        gc_page_serializer_destroy(&serializer);
+    }
+    jl_atomic_fetch_add(&gc_n_threads_sweeping_pools, -1);
 }
 
-static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
+// free all pages (i.e. through `madvise` on Linux) that were lazily freed
+void gc_free_pages(void) JL_NOTSAFEPOINT
 {
-    assert(pg->fl_begin_offset != UINT16_MAX);
-    char *cur_pg = gc_page_data(last);
-    // Fast path for page that has no allocation
-    jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
-    if (last == fl_beg)
-        return;
-    int nfree = 0;
-    do {
-        nfree++;
-        last = last->next;
-    } while (gc_page_data(last) == cur_pg);
-    pg->nfree = nfree;
+    size_t n_pages_seen = 0;
+    jl_gc_page_stack_t tmp;
+    memset(&tmp, 0, sizeof(tmp));
+    while (1) {
+        jl_gc_pagemeta_t *pg = pop_lf_back(&global_page_pool_lazily_freed);
+        if (pg == NULL) {
+            break;
+        }
+        n_pages_seen++;
+        // keep the last few pages around for a while
+        if (n_pages_seen * GC_PAGE_SZ <= default_collect_interval) {
+            push_lf_back(&tmp, pg);
+            continue;
+        }
+        jl_gc_free_page(pg);
+        push_lf_back(&global_page_pool_freed, pg);
+    }
+    // If concurrent page sweeping is disabled, then `gc_free_pages` will be called in the stop-the-world
+    // phase. We can guarantee, therefore, that there won't be any concurrent modifications to
+    // `global_page_pool_lazily_freed`, so it's safe to assign `tmp` back to `global_page_pool_lazily_freed`.
+    // Otherwise, we need to use the thread-safe push_lf_back/pop_lf_back functions.
+    if (jl_n_sweepthreads == 0) {
+        global_page_pool_lazily_freed = tmp;
+    }
+    else {
+        while (1) {
+            jl_gc_pagemeta_t *pg = pop_lf_back(&tmp);
+            if (pg == NULL) {
+                break;
+            }
+            push_lf_back(&global_page_pool_lazily_freed, pg);
+        }
+    }
 }
 
 // setup the data-structures for a sweep over all memory pools
-static void gc_sweep_pool(int sweep_full)
+static void gc_sweep_pool(void) JL_NOTSAFEPOINT
 {
     gc_time_pool_start();
-    lazy_freed_pages = 0;
 
     // For the benefit of the analyzer, which doesn't know that gc_n_threads
     // doesn't change over the course of this function
@@ -1518,7 +1361,7 @@ static void gc_sweep_pool(int sweep_full)
 
     // allocate enough space to hold the end of the free list chain
     // for every thread and pool size
-    jl_taggedvalue_t ***pfl = (jl_taggedvalue_t ***) alloca(n_threads * JL_GC_N_POOLS * sizeof(jl_taggedvalue_t**));
+    jl_taggedvalue_t ***pfl = (jl_taggedvalue_t ***) malloc_s(n_threads * JL_GC_N_POOLS * sizeof(jl_taggedvalue_t**));
 
     // update metadata of pages that were pointed to by freelist or newpages from a pool
     // i.e. pages being the current allocation target
@@ -1530,8 +1373,9 @@ static void gc_sweep_pool(int sweep_full)
             }
             continue;
         }
+        jl_atomic_store_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes, 0);
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
-            jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
             jl_taggedvalue_t *last = p->freelist;
             if (last != NULL) {
                 jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last));
@@ -1550,54 +1394,86 @@ static void gc_sweep_pool(int sweep_full)
                 pg->has_young = 1;
             }
         }
-        jl_gc_pagemeta_t *pg = ptls2->page_metadata_lazily_freed;
-        while (pg != NULL) {
-            jl_gc_pagemeta_t *pg2 = pg->next;
-            lazy_freed_pages++;
-            pg = pg2;
-        }
     }
 
-    // the actual sweeping
-    for (int t_i = 0; t_i < n_threads; t_i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        if (ptls2 != NULL) {
-            jl_gc_pagemeta_t *allocd = NULL;
-            jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd;
+    uint64_t t_page_walk_start = jl_hrtime();
+    {
+        // the actual sweeping
+        jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
+        jl_ptls_t ptls = jl_current_task->ptls;
+        gc_reset_fragmentation_data_for_size_classes();
+        gc_sweep_wake_all_pages(ptls, new_gc_allocd_scratch);
+        gc_sweep_pool_parallel(ptls);
+        gc_sweep_wait_for_all_pages();
+
+        // reset half-pages pointers
+        for (int t_i = 0; t_i < n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            if (ptls2 != NULL) {
+                ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
+                for (int i = 0; i < JL_GC_N_POOLS; i++) {
+                    jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
+                    p->newpages = NULL;
+                }
+            }
+        }
+
+        // merge free lists
+        for (int t_i = 0; t_i < n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            if (ptls2 == NULL) {
+                continue;
+            }
+            jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
             while (pg != NULL) {
                 jl_gc_pagemeta_t *pg2 = pg->next;
-                gc_sweep_pool_page(pfl, &allocd, &ptls2->page_metadata_lazily_freed, pg, sweep_full);
+                if (pg->fl_begin_offset != UINT16_MAX) {
+                    char *cur_pg = pg->data;
+                    jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
+                    jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
+                    *pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
+                    pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
+                }
                 pg = pg2;
             }
-            ptls2->page_metadata_allocd = allocd;
-            for (int i = 0; i < JL_GC_N_POOLS; i++) {
-                jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
-                p->newpages = NULL;
-            }
         }
-    }
 
-    // null out terminal pointers of free lists
-    for (int t_i = 0; t_i < n_threads; t_i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        if (ptls2 != NULL) {
-            for (int i = 0; i < JL_GC_N_POOLS; i++) {
-                *pfl[t_i * JL_GC_N_POOLS + i] = NULL;
+        // null out terminal pointers of free lists
+        for (int t_i = 0; t_i < n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            if (ptls2 != NULL) {
+                for (int i = 0; i < JL_GC_N_POOLS; i++) {
+                    *pfl[t_i * JL_GC_N_POOLS + i] = NULL;
+                }
             }
         }
+
+        // cleanup
+        free(pfl);
+        free(new_gc_allocd_scratch);
     }
+    uint64_t t_page_walk_end = jl_hrtime();
+    gc_num.total_sweep_page_walk_time += t_page_walk_end - t_page_walk_start;
 
 #ifdef _P64 // only enable concurrent sweeping on 64bit
     // wake thread up to sweep concurrently
     if (jl_n_sweepthreads > 0) {
         uv_sem_post(&gc_sweep_assists_needed);
     }
+    else {
+        uint64_t t_madvise_start = jl_hrtime();
+        gc_free_pages();
+        uint64_t t_madvise_end = jl_hrtime();
+        gc_num.total_sweep_madvise_time += t_madvise_end - t_madvise_start;
+    }
+#else
+    gc_free_pages();
 #endif
-
-    gc_time_pool_end(sweep_full);
+    gc_compute_utilization_data_for_size_classes();
+    gc_time_pool_end(current_sweep_full);
 }
 
-static void gc_sweep_perm_alloc(void)
+static void gc_sweep_perm_alloc(void) JL_NOTSAFEPOINT
 {
     uint64_t t0 = jl_hrtime();
     gc_sweep_sysimg();
@@ -1610,20 +1486,19 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
 {
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_taggedvalue_t *o = jl_astaggedvalue(ptr);
-    // The modification of the `gc_bits` is not atomic but it
-    // should be safe here since GC is not allowed to run here and we only
-    // write GC_OLD to the GC bits outside GC. This could cause
-    // duplicated objects in the remset but that shouldn't be a problem.
-    o->bits.gc = GC_MARKED;
-    arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
-    ptls->heap.remset_nptr++; // conservative
+    // The modification of the `gc_bits` needs to be atomic.
+    // We need to ensure that objects are in the remset at
+    // most once, since the mark phase may update page metadata,
+    // which is not idempotent. See comments in https://github.com/JuliaLang/julia/issues/50419
+    uintptr_t header = jl_atomic_fetch_and_relaxed((_Atomic(uintptr_t) *)&o->header, ~GC_OLD);
+    if (header & GC_OLD) { // write barrier has not been triggered in this object yet
+        arraylist_push(&ptls->gc_tls.heap.remset, (jl_value_t*)ptr);
+        ptls->gc_tls.heap.remset_nptr++; // conservative
+    }
 }
 
-void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
+void jl_gc_queue_multiroot(const jl_value_t *parent, const void *ptr, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
-    // first check if this is really necessary
-    // TODO: should we store this info in one of the extra gc bits?
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
     const jl_datatype_layout_t *ly = dt->layout;
     uint32_t npointers = ly->npointers;
     //if (npointers == 0) // this was checked by the caller
@@ -1639,14 +1514,14 @@ void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_N
     const uint32_t *ptrs32 = (const uint32_t*)jl_dt_layout_ptrs(ly);
     for (size_t i = 1; i < npointers; i++) {
         uint32_t fld;
-        if (ly->fielddesc_type == 0) {
+        if (ly->flags.fielddesc_type == 0) {
             fld = ptrs8[i];
         }
-        else if (ly->fielddesc_type == 1) {
+        else if (ly->flags.fielddesc_type == 1) {
             fld = ptrs16[i];
         }
         else {
-            assert(ly->fielddesc_type == 2);
+            assert(ly->flags.fielddesc_type == 2);
             fld = ptrs32[i];
         }
         jl_value_t *ptrf = ((jl_value_t**)ptr)[fld];
@@ -1683,7 +1558,7 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset,
 
 STATIC_INLINE void gc_assert_parent_validity(jl_value_t *parent, jl_value_t *child) JL_NOTSAFEPOINT
 {
-#ifdef GC_ASSERT_PARENT_VALIDITY
+#if defined(GC_VERIFY) || defined(GC_ASSERT_PARENT_VALIDITY)
     jl_taggedvalue_t *child_astagged = jl_astaggedvalue(child);
     jl_taggedvalue_t *child_vtag = (jl_taggedvalue_t *)(child_astagged->header & ~(uintptr_t)0xf);
     uintptr_t child_vt = (uintptr_t)child_vtag;
@@ -1700,15 +1575,16 @@ STATIC_INLINE void gc_assert_parent_validity(jl_value_t *parent, jl_value_t *chi
         return;
     }
     if (__unlikely(!jl_is_datatype((jl_datatype_t *)child_vt) || ((jl_datatype_t *)child_vt)->smalltag)) {
-        jl_safe_printf("GC error (probable corruption)\n");
-        jl_gc_debug_print_status();
-        jl_safe_printf("Parent %p\n", (void *)parent);
-        jl_safe_printf("of type:\n");
-        jl_(jl_typeof(parent));
-        jl_safe_printf("While marking child at %p\n", (void *)child);
-        jl_safe_printf("of type:\n");
-        jl_(child_vtag);
-        jl_gc_debug_critical_error();
+        ios_t *const s = ios_safe_stderr;
+        jl_safe_fprintf(s, "GC error (probable corruption)\n");
+        jl_gc_debug_fprint_status(s);
+        jl_safe_fprintf(s, "Parent %p\n", (void *)parent);
+        jl_safe_fprintf(s, "of type:\n");
+        jl_safe_static_show((JL_STREAM*)s, (jl_value_t *)jl_typeof(parent));
+        jl_safe_fprintf(s, "While marking child at %p\n", (void *)child);
+        jl_safe_fprintf(s, "of type:\n");
+        jl_safe_static_show(s, (jl_value_t *)child_vtag);
+        jl_gc_debug_fprint_critical_error(s);
         abort();
     }
 #endif
@@ -1720,8 +1596,8 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
                                        uintptr_t nptr) JL_NOTSAFEPOINT
 {
     if (__unlikely((nptr & 0x3) == 0x3)) {
-        ptls->heap.remset_nptr += nptr >> 2;
-        arraylist_t *remset = ptls->heap.remset;
+        ptls->gc_tls.heap.remset_nptr += nptr >> 2;
+        arraylist_t *remset = &ptls->gc_tls.heap.remset;
         size_t len = remset->len;
         if (__unlikely(len >= remset->max)) {
             arraylist_push(remset, obj);
@@ -1736,6 +1612,10 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
 // Push a work item to the queue
 STATIC_INLINE void gc_ptr_queue_push(jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT
 {
+#ifdef JL_DEBUG_BUILD
+    if (obj == gc_findval)
+        jl_raise_debugger();
+#endif
     ws_array_t *old_a = ws_queue_push(&mq->ptr_queue, &obj, sizeof(jl_value_t*));
     // Put `old_a` in `reclaim_set` to be freed after the mark phase
     if (__unlikely(old_a != NULL))
@@ -1778,21 +1658,22 @@ STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_pop(jl_gc_markqueue_t *mq) JL_NOTSAFEP
 // Dump mark queue on critical error
 JL_NORETURN NOINLINE void gc_dump_queue_and_abort(jl_ptls_t ptls, jl_datatype_t *vt) JL_NOTSAFEPOINT
 {
-    jl_safe_printf("GC error (probable corruption)\n");
-    jl_gc_debug_print_status();
-    jl_(vt);
-    jl_gc_debug_critical_error();
+    ios_t *const s = ios_safe_stderr;
+    jl_safe_fprintf(s, "GC error (probable corruption)\n");
+    jl_gc_debug_fprint_status(s);
+    jl_safe_static_show((JL_STREAM*)s, (jl_value_t *)vt);
+    jl_gc_debug_fprint_critical_error(s);
     if (jl_n_gcthreads == 0) {
-        jl_safe_printf("\n");
+        jl_safe_fprintf(s, "\n");
         jl_value_t *new_obj;
-        jl_gc_markqueue_t *mq = &ptls->mark_queue;
-        jl_safe_printf("thread %d ptr queue:\n", ptls->tid);
-        jl_safe_printf("~~~~~~~~~~ ptr queue top ~~~~~~~~~~\n");
+        jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
+        jl_safe_fprintf(s, "thread %d ptr queue:\n", ptls->tid);
+        jl_safe_fprintf(s, "~~~~~~~~~~ ptr queue top ~~~~~~~~~~\n");
         while ((new_obj = gc_ptr_queue_steal_from(mq)) != NULL) {
-            jl_(new_obj);
-            jl_safe_printf("==========\n");
+            jl_safe_static_show((JL_STREAM*)s, new_obj);
+            jl_safe_fprintf(s, "==========\n");
         }
-        jl_safe_printf("~~~~~~~~~~ ptr queue bottom ~~~~~~~~~~\n");
+        jl_safe_fprintf(s, "~~~~~~~~~~ ptr queue bottom ~~~~~~~~~~\n");
     }
     abort();
 }
@@ -1824,7 +1705,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj8(jl_ptls_t ptls, char *obj8_parent, uint8_
                          uint8_t *obj8_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
     (void)jl_assume(obj8_begin < obj8_end);
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t **slot = NULL;
     jl_value_t *new_obj = NULL;
     for (; obj8_begin < obj8_end; obj8_begin++) {
@@ -1856,7 +1737,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint
                           uint16_t *obj16_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
     (void)jl_assume(obj16_begin < obj16_end);
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t **slot = NULL;
     jl_value_t *new_obj = NULL;
     for (; obj16_begin < obj16_end; obj16_begin++) {
@@ -1888,7 +1769,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint
                           uint32_t *obj32_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
     (void)jl_assume(obj32_begin < obj32_end);
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t **slot = NULL;
     jl_value_t *new_obj = NULL;
     for (; obj32_begin < obj32_end; obj32_begin++) {
@@ -1919,16 +1800,18 @@ STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint
 STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_value_t **obj_begin,
                       jl_value_t **obj_end, uint32_t step, uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     // Decide whether need to chunk objary
+    assert(step > 0);
     (void)jl_assume(step > 0);
     if ((nptr & 0x2) == 0x2) {
         // pre-scan this object: most of this object should be old, so look for
         // the first young object before starting this chunk
         // (this also would be valid for young objects, but probably less beneficial)
         for (; obj_begin < obj_end; obj_begin += step) {
-            new_obj = *obj_begin;
+            jl_value_t **slot = obj_begin;
+            new_obj = *slot;
             if (new_obj != NULL) {
                 verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
                                gc_slot_to_arrayidx(obj_parent, obj_begin));
@@ -1937,7 +1820,7 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
                     nptr |= 1;
                 if (!gc_marked(o->header))
                     break;
-                gc_heap_snapshot_record_array_edge(obj_parent, &new_obj);
+                gc_heap_snapshot_record_array_edge(obj_parent, slot);
             }
         }
     }
@@ -1959,13 +1842,14 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
         }
     }
     for (; obj_begin < scan_end; obj_begin += step) {
+        jl_value_t **slot = obj_begin;
         new_obj = *obj_begin;
         if (new_obj != NULL) {
             verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
                         gc_slot_to_arrayidx(obj_parent, obj_begin));
             gc_assert_parent_validity(obj_parent, new_obj);
             gc_try_claim_and_push(mq, new_obj, &nptr);
-            gc_heap_snapshot_record_array_edge(obj_parent, &new_obj);
+            gc_heap_snapshot_record_array_edge(obj_parent, slot);
         }
     }
     if (too_big) {
@@ -1980,14 +1864,14 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
 }
 
 // Mark array with 8bit field descriptors
-STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_value_t **ary8_begin,
-                    jl_value_t **ary8_end, uint8_t *elem_begin, uint8_t *elem_end,
+STATIC_INLINE void gc_mark_memory8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_value_t **ary8_begin,
+                    jl_value_t **ary8_end, uint8_t *elem_begin, uint8_t *elem_end, uintptr_t elsize,
                     uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
-    size_t elsize = ((jl_array_t *)ary8_parent)->elsize / sizeof(jl_value_t *);
     assert(elsize > 0);
+    (void)jl_assume(elsize > 0);
     // Decide whether need to chunk objary
     if ((nptr & 0x2) == 0x2) {
         // pre-scan this object: most of this object should be old, so look for
@@ -1996,7 +1880,8 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
         for (; ary8_begin < ary8_end; ary8_begin += elsize) {
             int early_end = 0;
             for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
-                new_obj = ary8_begin[*pindex];
+                jl_value_t **slot = &ary8_begin[*pindex];
+                new_obj = *slot;
                 if (new_obj != NULL) {
                     verify_parent2("array", ary8_parent, &new_obj, "elem(%d)",
                                 gc_slot_to_arrayidx(ary8_parent, ary8_begin));
@@ -2007,7 +1892,7 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
                         early_end = 1;
                         break;
                     }
-                    gc_heap_snapshot_record_array_edge(ary8_parent, &new_obj);
+                    gc_heap_snapshot_record_array_edge(ary8_parent, slot);
                 }
             }
             if (early_end)
@@ -2026,26 +1911,27 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
         // case 2: lowest two bits of `nptr` are already set to 0x3, so won't change after
         // scanning the array elements
         if ((nptr & 0x2) != 0x2 || (nptr & 0x3) == 0x3) {
-            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, 0, nptr};
+            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, elsize, nptr};
             gc_chunkqueue_push(mq, &c);
             pushed_chunk = 1;
         }
     }
-    for (; ary8_begin < ary8_end; ary8_begin += elsize) {
+    for (; ary8_begin < scan_end; ary8_begin += elsize) {
         for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
-            new_obj = ary8_begin[*pindex];
+            jl_value_t **slot = &ary8_begin[*pindex];
+            new_obj = *slot;
             if (new_obj != NULL) {
                 verify_parent2("array", ary8_parent, &new_obj, "elem(%d)",
                                gc_slot_to_arrayidx(ary8_parent, ary8_begin));
                 gc_assert_parent_validity(ary8_parent, new_obj);
                 gc_try_claim_and_push(mq, new_obj, &nptr);
-                gc_heap_snapshot_record_array_edge(ary8_parent, &new_obj);
+                gc_heap_snapshot_record_array_edge(ary8_parent, slot);
             }
         }
     }
     if (too_big) {
         if (!pushed_chunk) {
-            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, 0, nptr};
+            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, elsize, nptr};
             gc_chunkqueue_push(mq, &c);
         }
     }
@@ -2055,14 +1941,14 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
 }
 
 // Mark array with 16bit field descriptors
-STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_value_t **ary16_begin,
-                     jl_value_t **ary16_end, uint16_t *elem_begin, uint16_t *elem_end,
+STATIC_INLINE void gc_mark_memory16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_value_t **ary16_begin,
+                     jl_value_t **ary16_end, uint16_t *elem_begin, uint16_t *elem_end, size_t elsize,
                      uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
-    size_t elsize = ((jl_array_t *)ary16_parent)->elsize / sizeof(jl_value_t *);
     assert(elsize > 0);
+    (void)jl_assume(elsize > 0);
     // Decide whether need to chunk objary
     if ((nptr & 0x2) == 0x2) {
         // pre-scan this object: most of this object should be old, so look for
@@ -2071,7 +1957,8 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
         for (; ary16_begin < ary16_end; ary16_begin += elsize) {
             int early_end = 0;
             for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
-                new_obj = ary16_begin[*pindex];
+                jl_value_t **slot = &ary16_begin[*pindex];
+                new_obj = *slot;
                 if (new_obj != NULL) {
                     verify_parent2("array", ary16_parent, &new_obj, "elem(%d)",
                                 gc_slot_to_arrayidx(ary16_parent, ary16_begin));
@@ -2082,7 +1969,7 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
                         early_end = 1;
                         break;
                     }
-                    gc_heap_snapshot_record_array_edge(ary16_parent, &new_obj);
+                    gc_heap_snapshot_record_array_edge(ary16_parent, slot);
                 }
             }
             if (early_end)
@@ -2108,13 +1995,14 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
     }
     for (; ary16_begin < scan_end; ary16_begin += elsize) {
         for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
-            new_obj = ary16_begin[*pindex];
+            jl_value_t **slot = &ary16_begin[*pindex];
+            new_obj = *slot;
             if (new_obj != NULL) {
                 verify_parent2("array", ary16_parent, &new_obj, "elem(%d)",
                                gc_slot_to_arrayidx(ary16_parent, ary16_begin));
                 gc_assert_parent_validity(ary16_parent, new_obj);
                 gc_try_claim_and_push(mq, new_obj, &nptr);
-                gc_heap_snapshot_record_array_edge(ary16_parent, &new_obj);
+                gc_heap_snapshot_record_array_edge(ary16_parent, slot);
             }
         }
     }
@@ -2129,61 +2017,11 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
     }
 }
 
-// Mark chunk of large array
-STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_chunk_t *c) JL_NOTSAFEPOINT
-{
-    switch (c->cid) {
-        case GC_objary_chunk: {
-            jl_value_t *obj_parent = c->parent;
-            jl_value_t **obj_begin = c->begin;
-            jl_value_t **obj_end = c->end;
-            uint32_t step = c->step;
-            uintptr_t nptr = c->nptr;
-            gc_mark_objarray(ptls, obj_parent, obj_begin, obj_end, step,
-                             nptr);
-            break;
-        }
-        case GC_ary8_chunk: {
-            jl_value_t *ary8_parent = c->parent;
-            jl_value_t **ary8_begin = c->begin;
-            jl_value_t **ary8_end = c->end;
-            uint8_t *elem_begin = (uint8_t *)c->elem_begin;
-            uint8_t *elem_end = (uint8_t *)c->elem_end;
-            uintptr_t nptr = c->nptr;
-            gc_mark_array8(ptls, ary8_parent, ary8_begin, ary8_end, elem_begin, elem_end,
-                           nptr);
-            break;
-        }
-        case GC_ary16_chunk: {
-            jl_value_t *ary16_parent = c->parent;
-            jl_value_t **ary16_begin = c->begin;
-            jl_value_t **ary16_end = c->end;
-            uint16_t *elem_begin = (uint16_t *)c->elem_begin;
-            uint16_t *elem_end = (uint16_t *)c->elem_end;
-            uintptr_t nptr = c->nptr;
-            gc_mark_array16(ptls, ary16_parent, ary16_begin, ary16_end, elem_begin, elem_end,
-                            nptr);
-            break;
-        }
-        case GC_finlist_chunk: {
-            jl_value_t **fl_begin = c->begin;
-            jl_value_t **fl_end = c->end;
-            gc_mark_finlist_(mq, fl_begin, fl_end);
-            break;
-        }
-        default: {
-            // `empty-chunk` should be checked by caller
-            jl_safe_printf("GC internal error: chunk mismatch cid=%d\n", c->cid);
-            abort();
-        }
-    }
-}
-
 // Mark gc frame
 STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroots, uintptr_t offset,
                    uintptr_t lb, uintptr_t ub) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     uint32_t nr = nroots >> 2;
     while (1) {
@@ -2228,7 +2066,7 @@ STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroot
 // Mark exception stack
 STATIC_INLINE void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack, size_t itr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     while (itr > 0) {
         size_t bt_size = jl_excstack_bt_size(excstack, itr);
@@ -2256,44 +2094,44 @@ STATIC_INLINE void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack, siz
 }
 
 // Mark module binding
-STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent, jl_binding_t **mb_begin,
-                            jl_binding_t **mb_end, uintptr_t nptr,
+STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent, uintptr_t nptr,
                             uint8_t bits) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
-    for (; mb_begin < mb_end; mb_begin++) {
-        jl_binding_t *b = *mb_begin;
-        if (b == (jl_binding_t *)jl_nothing)
-            continue;
-        verify_parent1("module", mb_parent, mb_begin, "binding_buff");
-        gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)b);
-        gc_try_claim_and_push(mq, b, &nptr);
-    }
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *bindings = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindings);
     gc_assert_parent_validity((jl_value_t *)mb_parent, bindings);
     gc_try_claim_and_push(mq, bindings, &nptr);
     jl_value_t *bindingkeyset = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindingkeyset);
     gc_assert_parent_validity((jl_value_t *)mb_parent, bindingkeyset);
     gc_try_claim_and_push(mq, bindingkeyset, &nptr);
+    gc_heap_snapshot_record_module_to_binding(mb_parent, bindings, bindingkeyset);
     gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)mb_parent->parent);
     gc_try_claim_and_push(mq, (jl_value_t *)mb_parent->parent, &nptr);
-    size_t nusings = mb_parent->usings.len;
+    gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)mb_parent->usings_backedges);
+    gc_try_claim_and_push(mq, (jl_value_t *)mb_parent->usings_backedges, &nptr);
+    gc_heap_snapshot_record_binding_partition_edge((jl_value_t*)mb_parent, mb_parent->usings_backedges);
+    gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)mb_parent->scanned_methods);
+    gc_try_claim_and_push(mq, (jl_value_t *)mb_parent->scanned_methods, &nptr);
+    gc_heap_snapshot_record_binding_partition_edge((jl_value_t*)mb_parent, mb_parent->scanned_methods);
+    size_t nusings = module_usings_length(mb_parent);
     if (nusings > 0) {
         // this is only necessary because bindings for "using" modules
         // are added only when accessed. therefore if a module is replaced
         // after "using" it but before accessing it, this array might
         // contain the only reference.
         jl_value_t *obj_parent = (jl_value_t *)mb_parent;
-        jl_value_t **objary_begin = (jl_value_t **)mb_parent->usings.items;
-        jl_value_t **objary_end = objary_begin + nusings;
-        gc_mark_objarray(ptls, obj_parent, objary_begin, objary_end, 1, nptr);
+        struct _jl_module_using *objary_begin = (struct _jl_module_using *)mb_parent->usings.items;
+        struct _jl_module_using *objary_end = objary_begin + nusings;
+        static_assert(sizeof(struct _jl_module_using) == 4*sizeof(void *), "Mismatch in _jl_module_using size");
+        static_assert(offsetof(struct _jl_module_using, mod) == 0, "Expected `mod` at the beginning of _jl_module_using");
+        gc_mark_objarray(ptls, obj_parent, (jl_value_t**)objary_begin, (jl_value_t**)objary_end, 4, nptr);
     }
     else {
         gc_mark_push_remset(ptls, (jl_value_t *)mb_parent, nptr);
     }
 }
 
-void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end)
+void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT
 {
     jl_value_t *new_obj;
     // Decide whether need to chunk finlist
@@ -2303,8 +2141,10 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
         gc_chunkqueue_push(mq, &c);
         fl_end = fl_begin + GC_CHUNK_BATCH_SIZE;
     }
+    size_t i = 0;
     for (; fl_begin < fl_end; fl_begin++) {
-        new_obj = *fl_begin;
+        jl_value_t **slot = fl_begin;
+        new_obj = *slot;
         if (__unlikely(new_obj == NULL))
             continue;
         if (gc_ptr_tag(new_obj, 1)) {
@@ -2315,6 +2155,13 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
         if (gc_ptr_tag(new_obj, 2))
             continue;
         gc_try_claim_and_push(mq, new_obj, NULL);
+        if (fl_parent != NULL) {
+            gc_heap_snapshot_record_array_edge(fl_parent, slot);
+        } else {
+            // This is a list of objects following the same format as a finlist
+            // if `fl_parent` is NULL
+            gc_heap_snapshot_record_finlist(new_obj, ++i);
+        }
     }
 }
 
@@ -2326,14 +2173,67 @@ void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start)
         return;
     jl_value_t **fl_begin = (jl_value_t **)list->items + start;
     jl_value_t **fl_end = (jl_value_t **)list->items + len;
-    gc_mark_finlist_(mq, fl_begin, fl_end);
+    gc_mark_finlist_(mq, NULL, fl_begin, fl_end);
+}
+
+// Mark chunk of large array
+STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_chunk_t *c) JL_NOTSAFEPOINT
+{
+    switch (c->cid) {
+        case GC_objary_chunk: {
+            jl_value_t *obj_parent = c->parent;
+            jl_value_t **obj_begin = c->begin;
+            jl_value_t **obj_end = c->end;
+            uint32_t step = c->step;
+            uintptr_t nptr = c->nptr;
+            gc_mark_objarray(ptls, obj_parent, obj_begin, obj_end,
+                             step, nptr);
+            break;
+        }
+        case GC_ary8_chunk: {
+            jl_value_t *ary8_parent = c->parent;
+            jl_value_t **ary8_begin = c->begin;
+            jl_value_t **ary8_end = c->end;
+            uint8_t *elem_begin = (uint8_t *)c->elem_begin;
+            uint8_t *elem_end = (uint8_t *)c->elem_end;
+            size_t elsize = c->step;
+            uintptr_t nptr = c->nptr;
+            gc_mark_memory8(ptls, ary8_parent, ary8_begin, ary8_end, elem_begin, elem_end,
+                           elsize, nptr);
+            break;
+        }
+        case GC_ary16_chunk: {
+            jl_value_t *ary16_parent = c->parent;
+            jl_value_t **ary16_begin = c->begin;
+            jl_value_t **ary16_end = c->end;
+            uint16_t *elem_begin = (uint16_t *)c->elem_begin;
+            uint16_t *elem_end = (uint16_t *)c->elem_end;
+            size_t elsize = c->step;
+            uintptr_t nptr = c->nptr;
+            gc_mark_memory16(ptls, ary16_parent, ary16_begin, ary16_end, elem_begin, elem_end,
+                            elsize, nptr);
+            break;
+        }
+        case GC_finlist_chunk: {
+            jl_value_t *fl_parent = c->parent;
+            jl_value_t **fl_begin = c->begin;
+            jl_value_t **fl_end = c->end;
+            gc_mark_finlist_(mq, fl_parent, fl_begin, fl_end);
+            break;
+        }
+        default: {
+            // `empty-chunk` should be checked by caller
+            jl_safe_printf("GC internal error: unknown chunk type\n");
+            abort();
+        }
+    }
 }
 
 JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
 {
     int may_claim = gc_try_setmark_tag(jl_astaggedvalue(obj), GC_MARKED);
     if (may_claim)
-        gc_ptr_queue_push(&ptls->mark_queue, obj);
+        gc_ptr_queue_push(&ptls->gc_tls.mark_queue, obj);
     return may_claim;
 }
 
@@ -2344,24 +2244,18 @@ JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
     gc_mark_objarray(ptls, parent, objs, objs + nobjs, 1, nptr);
 }
 
-// Enqueue and mark all outgoing references from `new_obj` which have not been marked
-// yet. `meta_updated` is mostly used to make sure we don't update metadata twice for
-// objects which have been enqueued into the `remset`
-FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj,
-                              int meta_updated)
+// Enqueue and mark all outgoing references from `new_obj` which have not been marked yet.
+// `_new_obj` has its lowest bit tagged if it's in the remset (in which case we shouldn't update page metadata)
+FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj) JL_NOTSAFEPOINT
 {
-    jl_value_t *new_obj = (jl_value_t *)_new_obj;
+    int meta_updated = (uintptr_t)_new_obj & GC_REMSET_PTR_TAG;
+    jl_value_t *new_obj = (jl_value_t *)((uintptr_t)_new_obj & ~(uintptr_t)GC_REMSET_PTR_TAG);
     mark_obj: {
-    #ifdef JL_DEBUG_BUILD
-        if (new_obj == gc_findval)
-            jl_raise_debugger();
-    #endif
         jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
         uintptr_t vtag = o->header & ~(uintptr_t)0xf;
         uint8_t bits = (gc_old(o->header) && !mark_reset_age) ? GC_OLD_MARKED : GC_MARKED;
         int update_meta = __likely(!meta_updated && !gc_verifying);
         int foreign_alloc = 0;
-        // directly point at eyt_obj_in_img to encourage inlining
         if (update_meta && o->bits.in_image) {
             foreign_alloc = 1;
             update_meta = 0;
@@ -2375,7 +2269,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
             vtag == (jl_vararg_tag << 4)) {
             // these objects have pointers in them, but no other special handling
             // so we want these to fall through to the end
-            vtag = (uintptr_t)small_typeof[vtag / sizeof(*small_typeof)];
+            vtag = (uintptr_t)ijl_small_typeof[vtag / sizeof(*ijl_small_typeof)];
         }
         else if (vtag < jl_max_tags << 4) {
             // these objects either have specialing handling
@@ -2385,8 +2279,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 size_t dtsz = l * sizeof(void *) + sizeof(jl_svec_t);
                 if (update_meta)
                     gc_setmark(ptls, o, bits, dtsz);
-                else if (foreign_alloc)
-                    objprofile_count(jl_simplevector_type, bits == GC_OLD_MARKED, dtsz);
                 jl_value_t *objary_parent = new_obj;
                 jl_value_t **objary_begin = data;
                 jl_value_t **objary_end = data + l;
@@ -2397,22 +2289,13 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
             else if (vtag == jl_module_tag << 4) {
                 if (update_meta)
                     gc_setmark(ptls, o, bits, sizeof(jl_module_t));
-                else if (foreign_alloc)
-                    objprofile_count(jl_module_type, bits == GC_OLD_MARKED, sizeof(jl_module_t));
                 jl_module_t *mb_parent = (jl_module_t *)new_obj;
-                jl_svec_t *bindings = jl_atomic_load_relaxed(&mb_parent->bindings);
-                jl_binding_t **table = (jl_binding_t**)jl_svec_data(bindings);
-                size_t bsize = jl_svec_len(bindings);
-                uintptr_t nptr = ((bsize + mb_parent->usings.len + 1) << 2) | (bits & GC_OLD);
-                jl_binding_t **mb_begin = table + 1;
-                jl_binding_t **mb_end = table + bsize;
-                gc_mark_module_binding(ptls, mb_parent, mb_begin, mb_end, nptr, bits);
+                uintptr_t nptr = ((module_usings_length(mb_parent) + 1) << 2) | (bits & GC_OLD);
+                gc_mark_module_binding(ptls, mb_parent, nptr, bits);
             }
             else if (vtag == jl_task_tag << 4) {
                 if (update_meta)
                     gc_setmark(ptls, o, bits, sizeof(jl_task_t));
-                else if (foreign_alloc)
-                    objprofile_count(jl_task_type, bits == GC_OLD_MARKED, sizeof(jl_task_t));
                 jl_task_t *ta = (jl_task_t *)new_obj;
                 gc_scrub_record_task(ta);
                 if (gc_cblist_task_scanner) {
@@ -2421,9 +2304,9 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                                         (ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task));
                 }
         #ifdef COPY_STACKS
-                void *stkbuf = ta->stkbuf;
-                if (stkbuf && ta->copy_stack) {
-                    gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
+                void *stkbuf = ta->ctx.stkbuf;
+                if (stkbuf && ta->ctx.copy_stack) {
+                    gc_setmark_buf_(ptls, stkbuf, bits, ta->ctx.bufsz);
                     // For gc_heap_snapshot_record:
                     // TODO: attribute size of stack
                     // TODO: edge to stack data
@@ -2436,12 +2319,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 uintptr_t lb = 0;
                 uintptr_t ub = (uintptr_t)-1;
         #ifdef COPY_STACKS
-                if (stkbuf && ta->copy_stack && !ta->ptls) {
+                if (stkbuf && ta->ctx.copy_stack && !ta->ptls) {
                     int16_t tid = jl_atomic_load_relaxed(&ta->tid);
                     assert(tid >= 0);
                     jl_ptls_t ptls2 = gc_all_tls_states[tid];
                     ub = (uintptr_t)ptls2->stackbase;
-                    lb = ub - ta->copy_stack;
+                    lb = ub - ta->ctx.copy_stack;
                     offset = (uintptr_t)stkbuf - lb;
                 }
         #endif
@@ -2461,7 +2344,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                     gc_mark_excstack(ptls, excstack, itr);
                 }
                 const jl_datatype_layout_t *layout = jl_task_type->layout;
-                assert(layout->fielddesc_type == 0);
+                assert(layout->flags.fielddesc_type == 0);
                 assert(layout->nfields > 0);
                 uint32_t npointers = layout->npointers;
                 char *obj8_parent = (char *)ta;
@@ -2481,16 +2364,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 size_t dtsz = jl_string_len(new_obj) + sizeof(size_t) + 1;
                 if (update_meta)
                     gc_setmark(ptls, o, bits, dtsz);
-                else if (foreign_alloc)
-                    objprofile_count(jl_string_type, bits == GC_OLD_MARKED, dtsz);
             }
             else {
-                jl_datatype_t *vt = small_typeof[vtag / sizeof(*small_typeof)];
+                jl_datatype_t *vt = ijl_small_typeof[vtag / sizeof(*ijl_small_typeof)];
                 size_t dtsz = jl_datatype_size(vt);
                 if (update_meta)
                     gc_setmark(ptls, o, bits, dtsz);
-                else if (foreign_alloc)
-                    objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
             }
             return;
         }
@@ -2500,72 +2379,59 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 gc_dump_queue_and_abort(ptls, vt);
         }
         jl_datatype_t *vt = (jl_datatype_t *)vtag;
-        if (vt->name == jl_array_typename) {
-            jl_array_t *a = (jl_array_t *)new_obj;
-            jl_array_flags_t flags = a->flags;
+        if (vt->name == jl_genericmemory_typename) {
+            jl_genericmemory_t *m = (jl_genericmemory_t*)new_obj;
+            int pooled = 1; // The jl_genericmemory_t itself is always pooled-size, even with data attached to it
             if (update_meta) {
-                if (flags.pooled)
+                if (pooled)
                     gc_setmark_pool(ptls, o, bits);
                 else
                     gc_setmark_big(ptls, o, bits);
             }
-            else if (foreign_alloc) {
-                objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_array_t));
+            int how = jl_genericmemory_how(m);
+            if (how == 0 || how == 2) {
+                gc_heap_snapshot_record_hidden_edge(new_obj, m->ptr, jl_genericmemory_nbytes(m), how == 0 ? 2 : 0);
             }
-            if (flags.how == 0) {
-                void *data_ptr = (char*)a + sizeof(jl_array_t) +jl_array_ndimwords(a->flags.ndims) * sizeof(size_t);
-                gc_heap_snapshot_record_hidden_edge(new_obj, data_ptr, jl_array_nbytes(a), 2);
-            }
-            if (flags.how == 1) {
-                void *val_buf = jl_astaggedvalue((char*)a->data - a->offset * a->elsize);
-                verify_parent1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)");
-                gc_heap_snapshot_record_hidden_edge(new_obj, jl_valueof(val_buf), jl_array_nbytes(a), flags.pooled);
-                (void)val_buf;
-                gc_setmark_buf_(ptls, (char*)a->data - a->offset * a->elsize,
-                                bits, jl_array_nbytes(a));
-            }
-            else if (flags.how == 2) {
+            else if (how == 1) {
                 if (update_meta || foreign_alloc) {
-                    objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED,
-                                     jl_array_nbytes(a));
-                    gc_heap_snapshot_record_hidden_edge(new_obj, a->data, jl_array_nbytes(a), flags.pooled);
+                    size_t nb = jl_genericmemory_nbytes(m);
+                    gc_heap_snapshot_record_hidden_edge(new_obj, m->ptr, nb, 0);
                     if (bits == GC_OLD_MARKED) {
-                        ptls->gc_cache.perm_scanned_bytes += jl_array_nbytes(a);
+                        ptls->gc_tls.gc_cache.perm_scanned_bytes += nb;
                     }
                     else {
-                        ptls->gc_cache.scanned_bytes += jl_array_nbytes(a);
+                        ptls->gc_tls.gc_cache.scanned_bytes += nb;
                     }
                 }
             }
-            else if (flags.how == 3) {
-                jl_value_t *owner = jl_array_data_owner(a);
+            else if (how == 3) {
+                jl_value_t *owner = jl_genericmemory_data_owner_field(m);
                 uintptr_t nptr = (1 << 2) | (bits & GC_OLD);
                 gc_try_claim_and_push(mq, owner, &nptr);
                 gc_heap_snapshot_record_internal_array_edge(new_obj, owner);
                 gc_mark_push_remset(ptls, new_obj, nptr);
                 return;
             }
-            if (!a->data || jl_array_len(a) == 0)
+            if (m->length == 0)
                 return;
-            if (flags.ptrarray) {
-                if ((jl_datatype_t *)jl_tparam0(vt) == jl_symbol_type)
+            const jl_datatype_layout_t *layout = vt->layout;
+            if (layout->flags.arrayelem_isboxed) {
+                if ((jl_datatype_t*)jl_tparam1(vt) == jl_symbol_type)
                     return;
-                size_t l = jl_array_len(a);
                 jl_value_t *objary_parent = new_obj;
-                jl_value_t **objary_begin = (jl_value_t **)a->data;
-                jl_value_t **objary_end = objary_begin + l;
+                jl_value_t **objary_begin = (jl_value_t **)m->ptr;
+                jl_value_t **objary_end = objary_begin + m->length;
                 uint32_t step = 1;
-                uintptr_t nptr = (l << 2) | (bits & GC_OLD);
+                uintptr_t nptr = (m->length << 2) | (bits & GC_OLD);
                 gc_mark_objarray(ptls, objary_parent, objary_begin, objary_end, step, nptr);
             }
-            else if (flags.hasptr) {
-                jl_datatype_t *et = (jl_datatype_t *)jl_tparam0(vt);
-                const jl_datatype_layout_t *layout = et->layout;
+            else if (layout->first_ptr >= 0) {
+                const jl_datatype_layout_t *layout = vt->layout;
                 unsigned npointers = layout->npointers;
-                unsigned elsize = a->elsize / sizeof(jl_value_t *);
-                size_t l = jl_array_len(a);
+                unsigned elsize = layout->size / sizeof(jl_value_t*);
+                size_t l = m->length;
                 jl_value_t *objary_parent = new_obj;
-                jl_value_t **objary_begin = (jl_value_t **)a->data;
+                jl_value_t **objary_begin = (jl_value_t**)m->ptr;
                 jl_value_t **objary_end = objary_begin + l * elsize;
                 uint32_t step = elsize;
                 uintptr_t nptr = ((l * npointers) << 2) | (bits & GC_OLD);
@@ -2573,17 +2439,17 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                     objary_begin += layout->first_ptr;
                     gc_mark_objarray(ptls, objary_parent, objary_begin, objary_end, step, nptr);
                 }
-                else if (layout->fielddesc_type == 0) {
-                    uint8_t *obj8_begin = (uint8_t *)jl_dt_layout_ptrs(layout);
+                else if (layout->flags.fielddesc_type == 0) {
+                    uint8_t *obj8_begin = (uint8_t*)jl_dt_layout_ptrs(layout);
                     uint8_t *obj8_end = obj8_begin + npointers;
-                    gc_mark_array8(ptls, objary_parent, objary_begin, objary_end, obj8_begin,
-                                   obj8_end, nptr);
+                    gc_mark_memory8(ptls, objary_parent, objary_begin, objary_end, obj8_begin, obj8_end,
+                                   elsize, nptr);
                 }
-                else if (layout->fielddesc_type == 1) {
-                    uint16_t *obj16_begin = (uint16_t *)jl_dt_layout_ptrs(layout);
+                else if (layout->flags.fielddesc_type == 1) {
+                    uint16_t *obj16_begin = (uint16_t*)jl_dt_layout_ptrs(layout);
                     uint16_t *obj16_end = obj16_begin + npointers;
-                    gc_mark_array16(ptls, objary_parent, objary_begin, objary_end, obj16_begin,
-                                    obj16_end, nptr);
+                    gc_mark_memory16(ptls, objary_parent, objary_begin, objary_end, obj16_begin, obj16_end,
+                                    elsize, nptr);
                 }
                 else {
                     assert(0 && "unimplemented");
@@ -2594,8 +2460,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
         size_t dtsz = jl_datatype_size(vt);
         if (update_meta)
             gc_setmark(ptls, o, bits, dtsz);
-        else if (foreign_alloc)
-            objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
         if (vt == jl_weakref_type)
             return;
         const jl_datatype_layout_t *layout = vt->layout;
@@ -2603,9 +2467,9 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
         if (npointers == 0)
             return;
         uintptr_t nptr = (npointers << 2 | (bits & GC_OLD));
-        assert((layout->nfields > 0 || layout->fielddesc_type == 3) &&
+        assert((layout->nfields > 0 || layout->flags.fielddesc_type == 3) &&
                "opaque types should have been handled specially");
-        if (layout->fielddesc_type == 0) {
+        if (layout->flags.fielddesc_type == 0) {
             char *obj8_parent = (char *)new_obj;
             uint8_t *obj8_begin = (uint8_t *)jl_dt_layout_ptrs(layout);
             uint8_t *obj8_end = obj8_begin + npointers;
@@ -2618,7 +2482,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                     gc_ptr_queue_push(mq, new_obj);
             }
         }
-        else if (layout->fielddesc_type == 1) {
+        else if (layout->flags.fielddesc_type == 1) {
             char *obj16_parent = (char *)new_obj;
             uint16_t *obj16_begin = (uint16_t *)jl_dt_layout_ptrs(layout);
             uint16_t *obj16_end = obj16_begin + npointers;
@@ -2631,7 +2495,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                     gc_ptr_queue_push(mq, new_obj);
             }
         }
-        else if (layout->fielddesc_type == 2) {
+        else if (layout->flags.fielddesc_type == 2) {
             // This is very uncommon
             // Do not do store to load forwarding to save some code size
             char *obj32_parent = (char *)new_obj;
@@ -2647,7 +2511,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
             }
         }
         else {
-            assert(layout->fielddesc_type == 3);
+            assert(layout->flags.fielddesc_type == 3);
             jl_fielddescdyn_t *desc = (jl_fielddescdyn_t *)jl_dt_layout_fields(layout);
             int old = jl_astaggedvalue(new_obj)->bits.gc & 2;
             uintptr_t young = desc->markfunc(ptls, new_obj);
@@ -2657,49 +2521,26 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
     }
 }
 
-// Used in gc-debug
-void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
+void gc_collect_neighbors(jl_ptls_t ptls, jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
 {
     while (1) {
-        void *new_obj = (void *)gc_ptr_queue_pop(&ptls->mark_queue);
+        void *new_obj = (void *)gc_ptr_queue_pop(&ptls->gc_tls.mark_queue);
         // No more objects to mark
         if (__unlikely(new_obj == NULL)) {
             return;
         }
-        gc_mark_outrefs(ptls, mq, new_obj, 0);
+        gc_mark_outrefs(ptls, mq, new_obj);
     }
 }
 
-// Drain items from worker's own chunkqueue
-void gc_drain_own_chunkqueue(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
-{
-    jl_gc_chunk_t c = {.cid = GC_empty_chunk};
-    do {
-        c = gc_chunkqueue_pop(mq);
-        if (c.cid != GC_empty_chunk) {
-            gc_mark_chunk(ptls, mq, &c);
-            gc_mark_loop_serial_(ptls, mq);
-        }
-    } while (c.cid != GC_empty_chunk);
-}
-
-// Main mark loop. Stack (allocated on the heap) of `jl_value_t *`
-// is used to keep track of processed items. Maintaning this stack (instead of
-// native one) avoids stack overflow when marking deep objects and
-// makes it easier to implement parallel marking via work-stealing
-JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls)
+void gc_mark_and_steal(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    gc_mark_loop_serial_(ptls, &ptls->mark_queue);
-    gc_drain_own_chunkqueue(ptls, &ptls->mark_queue);
-}
-
-void gc_mark_and_steal(jl_ptls_t ptls)
-{
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
-    jl_gc_markqueue_t *mq_master = NULL;
-    int master_tid = jl_atomic_load(&gc_master_tid);
-    if (master_tid != -1)
-        mq_master = &gc_all_tls_states[master_tid]->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
+    jl_gc_markqueue_t *mq_initiator = mq;
+    int initiator_tid = jl_atomic_load(&gc_initiator_tid);
+    if (initiator_tid != -1) {
+        mq_initiator = &gc_all_tls_states[initiator_tid]->gc_tls.mark_queue;
+    }
     void *new_obj;
     jl_gc_chunk_t c;
     pop : {
@@ -2715,18 +2556,22 @@ void gc_mark_and_steal(jl_ptls_t ptls)
         goto steal;
     }
     mark : {
-        gc_mark_outrefs(ptls, mq, new_obj, 0);
+        gc_mark_outrefs(ptls, mq, new_obj);
         goto pop;
     }
     // Note that for the stealing heuristics, we try to
-    // steal chunks much more agressively than pointers,
+    // steal chunks much more aggressively than pointers,
     // since we know chunks will likely expand into a lot
     // of work for the mark loop
     steal : {
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
         // Try to steal chunk from random GC thread
         for (int i = 0; i < 4 * jl_n_markthreads; i++) {
-            uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            int v = gc_random_parallel_collector_thread_id(ptls);
+            jl_ptls_t ptls2 = gc_all_tls_states[v];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             c = gc_chunkqueue_steal_from(mq2);
             if (c.cid != GC_empty_chunk) {
                 gc_mark_chunk(ptls, mq, &c);
@@ -2734,122 +2579,178 @@ void gc_mark_and_steal(jl_ptls_t ptls)
             }
         }
         // Sequentially walk GC threads to try to steal chunk
-        for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             c = gc_chunkqueue_steal_from(mq2);
             if (c.cid != GC_empty_chunk) {
                 gc_mark_chunk(ptls, mq, &c);
                 goto pop;
             }
         }
-        // Try to steal chunk from master thread
-        if (mq_master != NULL) {
-            c = gc_chunkqueue_steal_from(mq_master);
-            if (c.cid != GC_empty_chunk) {
-                gc_mark_chunk(ptls, mq, &c);
-                goto pop;
-            }
+        // Try to steal chunk from initiator thread
+        c = gc_chunkqueue_steal_from(mq_initiator);
+        if (c.cid != GC_empty_chunk) {
+            gc_mark_chunk(ptls, mq, &c);
+            goto pop;
         }
         // Try to steal pointer from random GC thread
         for (int i = 0; i < 4 * jl_n_markthreads; i++) {
-            uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            int v = gc_random_parallel_collector_thread_id(ptls);
+            jl_ptls_t ptls2 = gc_all_tls_states[v];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             new_obj = gc_ptr_queue_steal_from(mq2);
             if (new_obj != NULL)
                 goto mark;
         }
         // Sequentially walk GC threads to try to steal pointer
-        for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             new_obj = gc_ptr_queue_steal_from(mq2);
             if (new_obj != NULL)
                 goto mark;
         }
-        // Try to steal pointer from master thread
-        if (mq_master != NULL) {
-            new_obj = gc_ptr_queue_steal_from(mq_master);
-            if (new_obj != NULL)
-                goto mark;
+        // Try to steal pointer from initiator thread
+        new_obj = gc_ptr_queue_steal_from(mq_initiator);
+        if (new_obj != NULL)
+            goto mark;
+    }
+}
+
+size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    assert(ptls != NULL);
+    // assume each chunk is worth 256 units of work and each pointer
+    // is worth 1 unit of work
+    size_t work = 256 * (jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.chunk_queue.bottom) -
+        jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.chunk_queue.top));
+    work += (jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.ptr_queue.bottom) -
+        jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.ptr_queue.top));
+    return work;
+}
+
+/**
+ * Correctness argument for the mark-loop termination protocol.
+ *
+ * Safety properties:
+ * - No work items shall be in any thread's queues when `gc_should_mark` observes
+ * that `gc_n_threads_marking` is zero.
+ *
+ * - No work item shall be stolen from the initiator thread (i.e. mutator thread which started
+ * GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
+ * `gc_should_mark` observes that `gc_n_threads_marking` is zero. This property is
+ * necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
+ * `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
+ * and that no work is stolen from us at that point.
+ *
+ * Proof:
+ * - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that
+ * means that no thread has work on their queue, this is guaranteed because a thread may only exit
+ * `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the
+ * seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock`
+ * guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again,
+ * because incrementing is only legal from inside the lock. Therefore, no thread will reenter
+ * the mark-loop after `gc_n_threads_marking` reaches zero.
+ */
+
+int gc_should_mark(void) JL_NOTSAFEPOINT
+{
+    int should_mark = 0;
+    uv_mutex_lock(&gc_queue_observer_lock);
+    while (1) {
+        int n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
+        if (n_threads_marking == 0) {
+            break;
+        }
+        int tid = jl_atomic_load_relaxed(&gc_initiator_tid);
+        assert(tid != -1);
+        assert(gc_all_tls_states != NULL);
+        size_t work = gc_count_work_in_queue(gc_all_tls_states[tid]);
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            work += gc_count_work_in_queue(ptls2);
+        }
+        // if there is a lot of work left, enter the mark loop
+        if (work >= 16 * n_threads_marking) {
+            jl_atomic_fetch_add(&gc_n_threads_marking, 1); // A possibility would be to allow a thread that found lots
+                                                           // of work to increment this
+            should_mark = 1;
+            break;
         }
+        jl_cpu_pause();
     }
+    uv_mutex_unlock(&gc_queue_observer_lock);
+    return should_mark;
 }
 
-void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
+void gc_wake_all_for_marking(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    int backoff = GC_BACKOFF_MIN;
-    if (master) {
-        jl_atomic_store(&gc_master_tid, ptls->tid);
-        // Wake threads up and try to do some work
-        uv_mutex_lock(&gc_threads_lock);
+    uv_mutex_lock(&gc_threads_lock);
+    uv_cond_broadcast(&gc_threads_cond);
+    uv_mutex_unlock(&gc_threads_lock);
+}
+
+void gc_mark_loop(jl_ptls_t ptls, int mark_loop_initiator) JL_NOTSAFEPOINT
+{
+    if (mark_loop_initiator) {
+        jl_atomic_store(&gc_initiator_tid, ptls->tid);
         jl_atomic_fetch_add(&gc_n_threads_marking, 1);
-        uv_cond_broadcast(&gc_threads_cond);
-        uv_mutex_unlock(&gc_threads_lock);
+        gc_wake_all_for_marking(ptls);
         gc_mark_and_steal(ptls);
         jl_atomic_fetch_add(&gc_n_threads_marking, -1);
     }
-    while (jl_atomic_load(&gc_n_threads_marking) > 0) {
-        // Try to become a thief while other threads are marking
-        jl_atomic_fetch_add(&gc_n_threads_marking, 1);
-        if (jl_atomic_load(&gc_master_tid) != -1) {
-            gc_mark_and_steal(ptls);
+    while (1) {
+        int should_mark = gc_should_mark();
+        if (!should_mark) {
+            break;
         }
+        gc_mark_and_steal(ptls);
         jl_atomic_fetch_add(&gc_n_threads_marking, -1);
-        // Failed to steal
-        gc_backoff(&backoff);
-    }
-}
-
-void gc_mark_loop(jl_ptls_t ptls)
-{
-    if (jl_n_markthreads == 0 || gc_heap_snapshot_enabled) {
-        gc_mark_loop_serial(ptls);
-    }
-    else {
-        gc_mark_loop_parallel(ptls, 1);
     }
 }
 
-void gc_mark_loop_barrier(void)
+void gc_mark_loop_barrier(void) JL_NOTSAFEPOINT
 {
-    jl_atomic_store(&gc_master_tid, -1);
-    while (jl_atomic_load(&gc_n_threads_marking) != 0) {
-        jl_cpu_pause();
-    }
+    assert(jl_atomic_load_relaxed(&gc_n_threads_marking) == 0);
+    jl_atomic_store_relaxed(&gc_initiator_tid, -1);
 }
 
-void gc_mark_clean_reclaim_sets(void)
+void gc_mark_clean_reclaim_sets(void) JL_NOTSAFEPOINT
 {
     // Clean up `reclaim-sets`
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        arraylist_t *reclaim_set2 = &ptls2->mark_queue.reclaim_set;
+        if (ptls2 == NULL) {
+            continue;
+        }
+        arraylist_t *reclaim_set2 = &ptls2->gc_tls.mark_queue.reclaim_set;
         ws_array_t *a = NULL;
         while ((a = (ws_array_t *)arraylist_pop(reclaim_set2)) != NULL) {
             free(a->buffer);
             free(a);
         }
     }
-}
-
-static void gc_premark(jl_ptls_t ptls2)
-{
-    arraylist_t *remset = ptls2->heap.remset;
-    ptls2->heap.remset = ptls2->heap.last_remset;
-    ptls2->heap.last_remset = remset;
-    ptls2->heap.remset->len = 0;
-    ptls2->heap.remset_nptr = 0;
-    // avoid counting remembered objects
-    // in `perm_scanned_bytes`
-    size_t len = remset->len;
-    void **items = remset->items;
-    for (size_t i = 0; i < len; i++) {
-        jl_value_t *item = (jl_value_t *)items[i];
-        objprofile_count(jl_typeof(item), 2, 0);
-        jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
+    // Reset queue indices
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.ptr_queue.bottom, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.ptr_queue.top, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.chunk_queue.bottom, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.chunk_queue.top, 0);
     }
 }
 
-static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
+static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2) JL_NOTSAFEPOINT
 {
     jl_task_t *task;
     task = ptls2->root_task;
@@ -2878,7 +2779,7 @@ static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
     }
 }
 
-static void gc_queue_bt_buf(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
+static void gc_queue_bt_buf(jl_gc_markqueue_t *mq, jl_ptls_t ptls2) JL_NOTSAFEPOINT
 {
     jl_bt_element_t *bt_data = ptls2->bt_data;
     size_t bt_size = ptls2->bt_size;
@@ -2892,14 +2793,29 @@ static void gc_queue_bt_buf(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
     }
 }
 
-static void gc_queue_remset(jl_ptls_t ptls, jl_ptls_t ptls2)
+static void gc_queue_remset(jl_gc_markqueue_t *mq, jl_ptls_t ptls2) JL_NOTSAFEPOINT
 {
-    size_t len = ptls2->heap.last_remset->len;
-    void **items = ptls2->heap.last_remset->items;
+    void **items = ptls2->gc_tls.heap.remset.items;
+    size_t len = ptls2->gc_tls.heap.remset.len;
     for (size_t i = 0; i < len; i++) {
-        // Objects in the `remset` are already marked,
-        // so a `gc_try_claim_and_push` wouldn't work here
-        gc_mark_outrefs(ptls, &ptls->mark_queue, (jl_value_t *)items[i], 1);
+        void *_v = items[i];
+        jl_astaggedvalue(_v)->bits.gc = GC_OLD_MARKED;
+        jl_value_t *v = (jl_value_t *)((uintptr_t)_v | GC_REMSET_PTR_TAG);
+        gc_ptr_queue_push(mq, v);
+    }
+    // Don't forget to clear the remset
+    ptls2->gc_tls.heap.remset.len = 0;
+    ptls2->gc_tls.heap.remset_nptr = 0;
+}
+
+static void gc_check_all_remsets_are_empty(void) JL_NOTSAFEPOINT
+{
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL) {
+            assert(ptls2->gc_tls.heap.remset.len == 0);
+            assert(ptls2->gc_tls.heap.remset_nptr == 0);
+        }
     }
 }
 
@@ -2907,36 +2823,49 @@ extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
 extern jl_task_t *wait_empty JL_GLOBALLY_ROOTED;
 
 // mark the initial root set
-static void gc_mark_roots(jl_gc_markqueue_t *mq)
+static void gc_mark_roots(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
 {
     // modules
     gc_try_claim_and_push(mq, jl_main_module, NULL);
-    gc_heap_snapshot_record_root((jl_value_t*)jl_main_module, "main_module");
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_main_module, "main_module");
     // invisible builtin values
+    gc_try_claim_and_push(mq, jl_method_table, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_method_table, "global_method_table");
     gc_try_claim_and_push(mq, jl_an_empty_vec_any, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_an_empty_vec_any, "an_empty_vec_any");
     gc_try_claim_and_push(mq, jl_module_init_order, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_module_init_order, "module_init_order");
     for (size_t i = 0; i < jl_current_modules.size; i += 2) {
         if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
             gc_try_claim_and_push(mq, jl_current_modules.table[i], NULL);
-            gc_heap_snapshot_record_root((jl_value_t*)jl_current_modules.table[i], "top level module");
+            gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_current_modules.table[i], "top level module");
         }
     }
     gc_try_claim_and_push(mq, jl_anytuple_type_type, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_anytuple_type_type, "anytuple_type_type");
     for (size_t i = 0; i < N_CALL_CACHE; i++) {
         jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
         gc_try_claim_and_push(mq, v, NULL);
+        gc_heap_snapshot_record_array_edge_index((jl_value_t*)jl_anytuple_type_type, (jl_value_t*)v, i);
     }
-    gc_try_claim_and_push(mq, jl_all_methods, NULL);
     gc_try_claim_and_push(mq, _jl_debug_method_invalidation, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)_jl_debug_method_invalidation, "debug_method_invalidation");
     // constants
     gc_try_claim_and_push(mq, jl_emptytuple_type, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_emptytuple_type, "emptytuple_type");
     gc_try_claim_and_push(mq, cmpswap_names, NULL);
-    gc_try_claim_and_push(mq, jl_global_roots_table, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)cmpswap_names, "cmpswap_names");
+    gc_try_claim_and_push(mq, jl_global_roots_list, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_global_roots_list, "global_roots_list");
+    gc_try_claim_and_push(mq, jl_global_roots_keyset, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_global_roots_keyset, "global_roots_keyset");
+    gc_try_claim_and_push(mq, precompile_field_replace, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)precompile_field_replace, "precompile_field_replace");
 }
 
 // find unmarked objects that need to be finalized from the finalizer list "list".
 // this must happen last in the mark phase.
-static void sweep_finalizer_list(arraylist_t *list)
+static void sweep_finalizer_list(arraylist_t *list) JL_NOTSAFEPOINT
 {
     void **items = list->items;
     size_t len = list->len;
@@ -2985,63 +2914,25 @@ static void sweep_finalizer_list(arraylist_t *list)
     list->len = j;
 }
 
-// collector entry point and control
-_Atomic(uint32_t) jl_gc_disable_counter = 1;
-
-JL_DLLEXPORT int jl_gc_enable(int on)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    int prev = !ptls->disable_gc;
-    ptls->disable_gc = (on == 0);
-    if (on && !prev) {
-        // disable -> enable
-        if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) {
-            gc_num.allocd += gc_num.deferred_alloc;
-            gc_num.deferred_alloc = 0;
-        }
-    }
-    else if (prev && !on) {
-        // enable -> disable
-        jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
-        // check if the GC is running and wait for it to finish
-        jl_gc_safepoint_(ptls);
-    }
-    return prev;
-}
-
-JL_DLLEXPORT int jl_gc_is_enabled(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return !ptls->disable_gc;
+int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT {
+    return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid);
 }
 
 JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
 {
     jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num);
+    combine_thread_gc_counts(&num, 0);
     // Sync this logic with `base/util.jl:GC_Diff`
     *bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
 }
 
-JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
-{
-    return gc_num.total_time;
-}
-
 JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
 {
     jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num);
+    combine_thread_gc_counts(&num, 0);
     return num;
 }
 
-JL_DLLEXPORT void jl_gc_reset_stats(void)
-{
-    gc_num.max_pause = 0;
-    gc_num.max_memory = 0;
-    gc_num.max_time_to_safepoint = 0;
-}
-
 // TODO: these were supposed to be thread local
 JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT
 {
@@ -3061,77 +2952,146 @@ JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT
     return newtb - oldtb;
 }
 
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void)
+{
+    int n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    int64_t pool_live_bytes = 0;
+    for (int i = 0; i < n_threads; i++) {
+        jl_ptls_t ptls2 = all_tls_states[i];
+        if (ptls2 != NULL) {
+            pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes);
+        }
+    }
+    return pool_live_bytes;
+}
+
 JL_DLLEXPORT int64_t jl_gc_live_bytes(void)
 {
     return live_bytes;
 }
 
+uint64_t jl_gc_smooth(uint64_t old_val, uint64_t new_val, double factor) JL_NOTSAFEPOINT
+{
+    double est = factor * old_val + (1 - factor) * new_val;
+    if (est <= 1)
+        return 1; // avoid issues with <= 0
+    if (est > (uint64_t)2<<36)
+        return (uint64_t)2<<36; // avoid overflow
+    return est;
+}
+
+// an overallocation curve inspired by array allocations
+// grows very fast initially, then much slower at large heaps
+static uint64_t overallocation(uint64_t old_val, uint64_t val, uint64_t max_val) JL_NOTSAFEPOINT
+{
+    // compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
+    // for small n, we grow much faster than O(n)
+    // for large n, we grow at O(n/8)
+    // and as we reach O(memory) for memory>>1MB,
+    // this means we end by adding about 10% of memory each time at most
+    int exp2 = sizeof(old_val) * 8 -
+#ifdef _P64
+        __builtin_clzll(old_val);
+#else
+        __builtin_clz(old_val);
+#endif
+    uint64_t inc = (uint64_t)((size_t)1 << (exp2 * 7 / 8)) * 4 + old_val / 8;
+    // once overallocation would exceed max_val, grow by no more than 5% of max_val
+    if (inc + val > max_val)
+        if (inc > max_val / 20)
+            return max_val / 20;
+    return inc;
+}
+
 size_t jl_maxrss(void);
 
+void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT {
+    if (!gc_logging_enabled) {
+        return;
+    }
+    jl_safe_printf("\nGC: pause %.2fms. collected %fMB. %s %s\n",
+        pause/1e6, freed/(double)(1<<20),
+        full ? "full" : "incr",
+        recollect ? "recollect" : ""
+    );
+
+    jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB,\nheap_size %.2f MB, heap_target %.2f MB, Fragmentation %.3f\n",
+        jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/(double)(1<<20),
+        jl_atomic_load_relaxed(&gc_heap_stats.bytes_resident)/(double)(1<<20),
+        // live_bytes/(double)(1<<20), live byes tracking is not accurate.
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/(double)(1<<20),
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/(double)(1<<20),
+        (double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size)
+    );
+    // Should fragmentation use bytes_resident instead of heap_size?
+}
+
 // Only one thread should be running in this function
-static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
+static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_NOTSAFEPOINT
 {
-    combine_thread_gc_counts(&gc_num);
+    combine_thread_gc_counts(&gc_num, 1);
 
     // We separate the update of the graph from the update of live_bytes here
     // so that the sweep shows a downward trend in memory usage.
     jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, gc_num.allocd);
 
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
 
     uint64_t gc_start_time = jl_hrtime();
+    uint64_t mutator_time = gc_end_time == 0 ? old_mut_time : gc_start_time - gc_end_time;
+    uint64_t before_free_heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size);
     int64_t last_perm_scanned_bytes = perm_scanned_bytes;
     uint64_t start_mark_time = jl_hrtime();
     JL_PROBE_GC_MARK_BEGIN();
     {
         JL_TIMING(GC, GC_Mark);
-
-        // 1. fix GC bits of objects in the remset.
-        assert(gc_n_threads);
-        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
-            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-            if (ptls2 != NULL)
-                gc_premark(ptls2);
-        }
-
-        assert(gc_n_threads);
+        assert(gc_n_threads != 0);
         int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled);
         for (int t_i = 0; t_i < gc_n_threads; t_i++) {
             jl_ptls_t ptls2 = gc_all_tls_states[t_i];
             jl_ptls_t ptls_dest = ptls;
             jl_gc_markqueue_t *mq_dest = mq;
             if (!single_threaded_mark) {
-                ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_markthreads];
-                mq_dest = &ptls_dest->mark_queue;
+                int dest_tid = gc_ith_parallel_collector_thread_id(t_i % jl_n_markthreads);
+                ptls_dest = gc_all_tls_states[dest_tid];
+                mq_dest = &ptls_dest->gc_tls.mark_queue;
             }
             if (ptls2 != NULL) {
-                // 2.1. mark every thread local root
+                // 1.1. mark every thread local root
                 gc_queue_thread_local(mq_dest, ptls2);
-                // 2.2. mark any managed objects in the backtrace buffer
+                // 1.2. mark any managed objects in the backtrace buffer
                 // TODO: treat these as roots for gc_heap_snapshot_record
                 gc_queue_bt_buf(mq_dest, ptls2);
-                // 2.3. mark every object in the `last_remsets` and `rem_binding`
-                gc_queue_remset(ptls_dest, ptls2);
+                // 1.3. mark every object in the remset
+                gc_queue_remset(mq_dest, ptls2);
             }
         }
+        gc_check_all_remsets_are_empty();
 
-        // 3. walk roots
+        // 2. walk roots
         gc_mark_roots(mq);
         if (gc_cblist_root_scanner) {
             gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
                 gc_cblist_root_scanner, (collection));
         }
-        gc_mark_loop(ptls);
+
+        if (single_threaded_mark) {
+            gc_mark_and_steal(ptls);
+        }
+        else {
+            gc_mark_loop(ptls, 1);
+        }
         gc_mark_loop_barrier();
         gc_mark_clean_reclaim_sets();
 
-        // 4. check for objects to finalize
+        // 3. check for objects to finalize
         clear_weak_refs();
         // Record the length of the marked list since we need to
         // mark the object moved to the marked list from the
         // `finalizer_list` by `sweep_finalizer_list`
         size_t orig_marked_len = finalizer_list_marked.len;
-        assert(gc_n_threads);
+        assert(gc_n_threads != 0);
         for (int i = 0; i < gc_n_threads; i++) {
             jl_ptls_t ptls2 = gc_all_tls_states[i];
             if (ptls2 != NULL)
@@ -3141,7 +3101,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
             sweep_finalizer_list(&finalizer_list_marked);
             orig_marked_len = 0;
         }
-        assert(gc_n_threads);
+        assert(gc_n_threads != 0);
         for (int i = 0; i < gc_n_threads; i++) {
             jl_ptls_t ptls2 = gc_all_tls_states[i];
             if (ptls2 != NULL)
@@ -3150,7 +3110,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         gc_mark_finlist(mq, &finalizer_list_marked, orig_marked_len);
         // "Flush" the mark stack before flipping the reset_age bit
         // so that the objects are not incorrectly reset.
-        gc_mark_loop_serial(ptls);
+        gc_mark_and_steal(ptls);
         // Conservative marking relies on age to tell allocated objects
         // and freelist entries apart.
         mark_reset_age = !jl_gc_conservative_gc_support_enabled();
@@ -3159,7 +3119,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         // and should not be referenced by any old objects so this won't break
         // the GC invariant.
         gc_mark_finlist(mq, &to_finalize, 0);
-        gc_mark_loop_serial(ptls);
+        gc_mark_and_steal(ptls);
         mark_reset_age = 0;
     }
 
@@ -3170,86 +3130,52 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     uint64_t mark_time = end_mark_time - start_mark_time;
     gc_num.mark_time = mark_time;
     gc_num.total_mark_time += mark_time;
-    int64_t allocd = gc_num.allocd;
     gc_settime_postmark_end();
     // marking is over
 
     // Flush everything in mark cache
-    gc_sync_all_caches_nolock(ptls);
+    gc_sync_all_caches(ptls);
 
-    int64_t live_sz_ub = live_bytes + allocd;
-    int64_t live_sz_est = scanned_bytes + perm_scanned_bytes;
-    int64_t estimate_freed = live_sz_ub - live_sz_est;
 
     gc_verify(ptls);
-
     gc_stats_all_pool();
     gc_stats_big_obj();
-    objprofile_printall();
-    objprofile_reset();
     gc_num.total_allocd += gc_num.allocd;
     if (!prev_sweep_full)
         promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
-    // 5. next collection decision
-    int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(allocd/10));
-    int nptr = 0;
-    assert(gc_n_threads);
+    // 4. next collection decision
+    int remset_nptr = 0;
+    int sweep_full = next_sweep_full;
+    int recollect = 0;
+    assert(gc_n_threads != 0);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL)
-            nptr += ptls2->heap.remset_nptr;
+            remset_nptr += ptls2->gc_tls.heap.remset_nptr;
     }
+    (void)remset_nptr; //Use this information for something?
 
-    // many pointers in the intergen frontier => "quick" mark is not quick
-    int large_frontier = nptr*sizeof(void*) >= default_collect_interval;
-    int sweep_full = 0;
-    int recollect = 0;
-
-    // update heuristics only if this GC was automatically triggered
-    if (collection == JL_GC_AUTO) {
-        if (large_frontier) {
-            sweep_full = 1;
-            gc_num.interval = last_long_collect_interval;
-        }
-        if (not_freed_enough || large_frontier) {
-            gc_num.interval = gc_num.interval * 2;
-        }
-
-        size_t maxmem = 0;
-#ifdef _P64
-        // on a big memory machine, increase max_collect_interval to totalmem / nthreads / 2
-        maxmem = total_mem / (gc_n_threads - jl_n_gcthreads) / 2;
-#endif
-        if (maxmem < max_collect_interval)
-            maxmem = max_collect_interval;
-        if (gc_num.interval > maxmem) {
-            sweep_full = 1;
-            gc_num.interval = maxmem;
-        }
-    }
 
     // If the live data outgrows the suggested max_total_memory
     // we keep going with minimum intervals and full gcs until
     // we either free some space or get an OOM error.
-    if (live_bytes > max_total_memory) {
-        sweep_full = 1;
-    }
-    if (gc_sweep_always_full) {
+    if (jl_options.gc_sweep_always_full) {
         sweep_full = 1;
+        gc_record_full_sweep_reason(FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL);
     }
     if (collection == JL_GC_FULL && !prev_sweep_full) {
         sweep_full = 1;
         recollect = 1;
+        gc_record_full_sweep_reason(FULL_SWEEP_REASON_FORCED_FULL_SWEEP);
     }
     if (sweep_full) {
         // these are the difference between the number of gc-perm bytes scanned
         // on the first collection after sweep_full, and the current scan
         perm_scanned_bytes = 0;
         promoted_bytes = 0;
-        last_long_collect_interval = gc_num.interval;
     }
     scanned_bytes = 0;
-    // 6. start sweeping
+    // 5. start sweeping
     uint64_t start_sweep_time = jl_hrtime();
     JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
     {
@@ -3261,19 +3187,24 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 #ifdef USE_TRACY
         TracyCZoneColor(full_timing_block.tracy_ctx, 0xFFA500);
 #endif
+        current_sweep_full = sweep_full;
         sweep_weak_refs();
-        sweep_stack_pools();
-        gc_sweep_foreign_objs();
+        uint64_t stack_pool_time = jl_hrtime();
+        jl_gc_sweep_stack_pools_and_mtarraylist_buffers(ptls);
+        stack_pool_time = jl_hrtime() - stack_pool_time;
+        gc_num.total_stack_pool_sweep_time += stack_pool_time;
+        gc_num.stack_pool_sweep_time = stack_pool_time;
         gc_sweep_other(ptls, sweep_full);
         gc_scrub();
         gc_verify_tags();
-        gc_sweep_pool(sweep_full);
+        gc_sweep_pool();
         if (sweep_full)
             gc_sweep_perm_alloc();
     }
+
     JL_PROBE_GC_SWEEP_END();
 
-    uint64_t gc_end_time = jl_hrtime();
+    gc_end_time = jl_hrtime();
     uint64_t pause = gc_end_time - gc_start_time;
     uint64_t sweep_time = gc_end_time - start_sweep_time;
     gc_num.total_sweep_time += sweep_time;
@@ -3285,22 +3216,164 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         gc_num.last_incremental_sweep = gc_end_time;
     }
 
+    size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_in_runtime;
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size, heap_size);
+    freed_in_runtime = 0;
+    uint64_t user_max = max_total_memory * 0.8;
+    uint64_t alloc_diff = before_free_heap_size - old_heap_size;
+    uint64_t freed_diff = before_free_heap_size - heap_size;
+    uint64_t target_heap;
+    const char *reason = ""; (void)reason; // for GC_TIME output stats
+    old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC
+    if (collection == JL_GC_AUTO && jl_options.hard_heap_limit == 0) {
+        // update any heuristics only when the user does not force the GC
+        // but still update the timings, since GC was run and reset, even if it was too early
+        uint64_t target_allocs = 0.0;
+        double alloc_smooth_factor = 0.95;
+        double collect_smooth_factor = 0.5;
+        double tuning_factor = 2e4;
+        uint64_t alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor);
+        uint64_t alloc_time = jl_gc_smooth(old_mut_time, mutator_time, alloc_smooth_factor); // TODO: subtract estimated finalizer time?
+        uint64_t gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor);
+        uint64_t gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor);
+        old_alloc_diff = alloc_mem;
+        old_mut_time = alloc_time;
+        old_freed_diff = gc_mem;
+        old_pause_time = gc_time;
+        // thrashing estimator: if GC time more than 50% of the runtime
+        if (pause > mutator_time && !(thrash_counter < 4))
+            thrash_counter += 1;
+        else if (thrash_counter > 0)
+            thrash_counter -= 1;
+        if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0) {
+            double alloc_rate = (double)alloc_mem/alloc_time;
+            double gc_rate = (double)gc_mem/gc_time;
+            target_allocs = sqrt((double)heap_size * alloc_rate / gc_rate) * tuning_factor;
+        }
+
+        if (thrashing == 0 && thrash_counter >= 3) {
+            // require 3 consecutive thrashing cycles to force the default allocator rate
+            thrashing = 1;
+            // and require 4 default allocations to clear
+            thrash_counter = 6;
+        }
+        else if (thrashing == 1 && thrash_counter <= 2) {
+            thrashing = 0; // maybe we should report this to the user or error out?
+        }
+
+        target_heap = target_allocs + heap_size;
+        // optionally smooth this:
+        //   target_heap = jl_gc_smooth(jl_atomic_load_relaxed(&gc_heap_stats.heap_target), target_heap, alloc_smooth_factor);
+
+        // compute some guardrails values
+        uint64_t min_target_allocs = heap_size / 20; // minimum 5% of current heap
+        if (min_target_allocs < default_collect_interval / 8) // unless the heap is small
+            min_target_allocs = default_collect_interval / 8;
+        uint64_t max_target_allocs = overallocation(before_free_heap_size, heap_size, user_max);
+        if (max_target_allocs < min_target_allocs)
+            max_target_allocs = min_target_allocs;
+        // respect max_total_memory first
+        if (target_heap > user_max) {
+            target_allocs = heap_size < user_max ? user_max - heap_size : 1;
+            reason = " user limit";
+        }
+        // If we are thrashing use a default only (an average) for a couple collections
+        if (thrashing) {
+            uint64_t thrashing_allocs = sqrt((double)min_target_allocs * max_target_allocs);
+            if (target_allocs < thrashing_allocs) {
+                target_allocs = thrashing_allocs;
+                reason = " thrashing";
+            }
+        }
+        // then add the guardrails for transient issues
+        if (target_allocs > max_target_allocs) {
+            target_allocs = max_target_allocs;
+            reason = " rate limit max";
+        }
+        else if (target_allocs < min_target_allocs) {
+            target_allocs = min_target_allocs;
+            reason = " min limit";
+        }
+        // and set the heap detection threshold
+        target_heap = target_allocs + heap_size;
+        if (target_heap < default_collect_interval) {
+            target_heap = default_collect_interval;
+            reason = " min heap";
+        }
+        jl_atomic_store_relaxed(&gc_heap_stats.heap_target, target_heap);
+    }
+    else {
+        target_heap = jl_atomic_load_relaxed(&gc_heap_stats.heap_target);
+    }
+
+    // Kill the process if we are above the hard heap limit
+    if (jl_options.hard_heap_limit != 0) {
+        if (heap_size > jl_options.hard_heap_limit) {
+            // Can't use `jl_errorf` here, because it will try to allocate memory
+            // and we are already at the hard limit.
+            jl_safe_printf("Heap size exceeded hard limit of %" PRIu64 " bytes.\n",
+                           jl_options.hard_heap_limit);
+            abort();
+        }
+    }
+    // Ignore heap limit computation from MemBalancer-like heuristics
+    // if the heap target increment goes above the value specified through
+    // `--heap-target-increment`.
+    // Note that if we reach this code, we can guarantee that the heap size
+    // is less than the hard limit, so there will be some room to grow the heap
+    // until the next GC without hitting the hard limit.
+    if (jl_options.heap_target_increment != 0) {
+        target_heap = heap_size + jl_options.heap_target_increment;
+        jl_atomic_store_relaxed(&gc_heap_stats.heap_target, target_heap);
+    }
+
+    double old_ratio = (double)promoted_bytes/(double)heap_size;
+    if (heap_size > user_max) {
+        next_sweep_full = 1;
+        gc_record_full_sweep_reason(FULL_SWEEP_REASON_USER_MAX_EXCEEDED);
+    }
+    else if (old_ratio > 0.15) {
+        next_sweep_full = 1;
+        gc_record_full_sweep_reason(FULL_SWEEP_REASON_LARGE_PROMOTION_RATE);
+    }
+    else {
+        next_sweep_full = 0;
+    }
+    if (heap_size > user_max || thrashing)
+        under_pressure = 1;
     // sweeping is over
-    // 7. if it is a quick sweep, put back the remembered objects in queued state
+    // 6. if it is a quick sweep, put back the remembered objects in queued state
     // so that we don't trigger the barrier again on them.
-    assert(gc_n_threads);
+    assert(gc_n_threads != 0);
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         if (ptls2 == NULL)
             continue;
         if (!sweep_full) {
-            for (int i = 0; i < ptls2->heap.remset->len; i++) {
-                void *ptr = ptls2->heap.remset->items[i];
+            for (int i = 0; i < ptls2->gc_tls.heap.remset.len; i++) {
+                void *ptr = ptls2->gc_tls.heap.remset.items[i];
                 jl_astaggedvalue(ptr)->bits.gc = GC_MARKED;
             }
         }
         else {
-            ptls2->heap.remset->len = 0;
+            ptls2->gc_tls.heap.remset.len = 0;
+        }
+        // free empty GC state for threads that have exited
+        if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+            // GC threads should never exit
+            assert(!gc_is_collector_thread(t_i));
+            jl_thread_heap_common_t *common_heap = &ptls2->gc_tls_common.heap;
+            jl_thread_heap_t *heap = &ptls2->gc_tls.heap;
+            if (common_heap->weak_refs.len == 0)
+                small_arraylist_free(&common_heap->weak_refs);
+            if (common_heap->live_tasks.len == 0)
+                small_arraylist_free(&common_heap->live_tasks);
+            if (heap->remset.len == 0)
+                arraylist_free(&heap->remset);
+            if (ptls2->finalizers.len == 0)
+                arraylist_free(&ptls2->finalizers);
+            if (ptls2->gc_tls.sweep_objs.len == 0)
+                arraylist_free(&ptls2->gc_tls.sweep_objs);
         }
     }
 
@@ -3316,59 +3389,32 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     }
 #endif
 
-    _report_gc_finished(pause, gc_num.freed, sweep_full, recollect);
-
-    gc_final_pause_end(gc_start_time, gc_end_time);
-    gc_time_sweep_pause(gc_end_time, allocd, live_bytes,
-                        estimate_freed, sweep_full);
-    gc_num.full_sweep += sweep_full;
+    _report_gc_finished(pause, gc_num.freed, sweep_full, recollect, live_bytes);
     uint64_t max_memory = last_live_bytes + gc_num.allocd;
     if (max_memory > gc_num.max_memory) {
         gc_num.max_memory = max_memory;
     }
-
+    gc_final_pause_end(gc_start_time, gc_end_time);
+    gc_time_sweep_pause(gc_end_time, gc_num.allocd, live_bytes,
+                        gc_num.freed, sweep_full);
+    gc_num.full_sweep += sweep_full;
     last_live_bytes = live_bytes;
-    // Can't call inc_live_bytes here because we already added allocd
-    // to the graph earlier
     live_bytes += -gc_num.freed + gc_num.allocd;
     jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed);
 
-    if (collection == JL_GC_AUTO) {
-        //If we aren't freeing enough or are seeing lots and lots of pointers let it increase faster
-        if (!not_freed_enough || large_frontier) {
-            int64_t tot = 2 * (live_bytes + gc_num.allocd) / 3;
-            if (gc_num.interval > tot) {
-                gc_num.interval = tot;
-                last_long_collect_interval = tot;
-            }
-        // If the current interval is larger than half the live data decrease the interval
-        }
-        else {
-            int64_t half = (live_bytes / 2);
-            if (gc_num.interval > half)
-                gc_num.interval = half;
-        }
-
-        // But never go below default
-        if (gc_num.interval < default_collect_interval) gc_num.interval = default_collect_interval;
-    }
-
-    if (gc_num.interval + live_bytes > max_total_memory) {
-        if (live_bytes < max_total_memory) {
-            gc_num.interval = max_total_memory - live_bytes;
-            last_long_collect_interval = max_total_memory - live_bytes;
-        }
-        else {
-            // We can't stay under our goal so let's go back to
-            // the minimum interval and hope things get better
-            gc_num.interval = default_collect_interval;
-        }
-    }
-
-    gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed,
+    gc_time_summary(sweep_full, gc_start_time, gc_end_time, gc_num.freed,
                     live_bytes, gc_num.interval, pause,
                     gc_num.time_to_safepoint,
                     gc_num.mark_time, gc_num.sweep_time);
+    if (collection == JL_GC_AUTO) {
+        gc_heuristics_summary(
+            old_alloc_diff, alloc_diff,
+            old_mut_time, mutator_time,
+            old_freed_diff, freed_diff,
+            old_pause_time, pause - sweep_time,
+            thrash_counter, reason,
+            heap_size, target_heap);
+    }
 
     prev_sweep_full = sweep_full;
     gc_num.pause += !recollect;
@@ -3390,10 +3436,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
     if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
-        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval;
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
         static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
-        jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
+        jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
         return;
     }
     jl_gc_debug_print();
@@ -3402,9 +3448,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
     // `jl_safepoint_start_gc()` makes sure only one thread can run the GC.
     uint64_t t0 = jl_hrtime();
-    if (!jl_safepoint_start_gc()) {
+    if (!jl_safepoint_start_gc(ct)) {
         // either another thread is running GC, or the GC got disabled just now.
         jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+        jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
         return;
     }
 
@@ -3458,18 +3505,23 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     jl_safepoint_end_gc();
     jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
     JL_PROBE_GC_END();
+    jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
 
     // Only disable finalizers on current thread
     // Doing this on all threads is racy (it's impossible to check
     // or wait for finalizers on other threads without dead lock).
-    if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
+    if (!ptls->finalizers_inhibited && ptls->locks.len == 0 && ptls->engine_nqueued == 0) {
         JL_TIMING(GC, GC_Finalizers);
-        run_finalizers(ct);
+        run_finalizers(ct, 0);
     }
     JL_PROBE_GC_FINALIZER();
 
     gc_invoke_callbacks(jl_gc_cb_post_gc_t,
         gc_cblist_post_gc, (collection));
+    if (under_pressure)
+        gc_invoke_callbacks(jl_gc_cb_notify_gc_pressure_t,
+            gc_cblist_notify_gc_pressure, ());
+    under_pressure = 0;
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
@@ -3478,7 +3530,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
 
 void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
 {
-    assert(gc_n_threads);
+    assert(gc_n_threads != 0);
     for (size_t i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL)
@@ -3487,42 +3539,35 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
     gc_mark_roots(mq);
 }
 
-// allocator entry points
-
-JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
-{
-    return jl_gc_alloc_(ptls, sz, ty);
-}
-
 // Per-thread initialization
 void jl_init_thread_heap(jl_ptls_t ptls)
 {
-    jl_thread_heap_t *heap = &ptls->heap;
+    jl_thread_heap_common_t *common_heap = &ptls->gc_tls_common.heap;
+    jl_thread_heap_t *heap = &ptls->gc_tls.heap;
     jl_gc_pool_t *p = heap->norm_pools;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
         p[i].osize = jl_gc_sizeclasses[i];
         p[i].freelist = NULL;
         p[i].newpages = NULL;
     }
-    arraylist_new(&heap->weak_refs, 0);
-    arraylist_new(&heap->live_tasks, 0);
-    heap->mallocarrays = NULL;
-    heap->mafreelist = NULL;
-    heap->big_objects = NULL;
-    heap->remset = &heap->_remset[0];
-    heap->last_remset = &heap->_remset[1];
-    arraylist_new(heap->remset, 0);
-    arraylist_new(heap->last_remset, 0);
+    small_arraylist_new(&common_heap->weak_refs, 0);
+    small_arraylist_new(&common_heap->live_tasks, 0);
+    for (int i = 0; i < JL_N_STACK_POOLS; i++)
+        small_arraylist_new(&common_heap->free_stacks[i], 0);
+    small_arraylist_new(&common_heap->mallocarrays, 0);
+    heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
+    assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
+    heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;
+    arraylist_new(&heap->remset, 0);
     arraylist_new(&ptls->finalizers, 0);
-    arraylist_new(&ptls->sweep_objs, 0);
+    arraylist_new(&ptls->gc_tls.sweep_objs, 0);
 
-    jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
+    jl_gc_mark_cache_t *gc_cache = &ptls->gc_tls.gc_cache;
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
-    gc_cache->nbig_obj = 0;
 
     // Initialize GC mark-queue
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     ws_queue_t *cq = &mq->chunk_queue;
     ws_array_t *wsa = create_ws_array(GC_CHUNK_QUEUE_INIT_SIZE, sizeof(jl_gc_chunk_t));
     jl_atomic_store_relaxed(&cq->top, 0);
@@ -3534,9 +3579,127 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     jl_atomic_store_relaxed(&q->bottom, 0);
     jl_atomic_store_relaxed(&q->array, wsa2);
     arraylist_new(&mq->reclaim_set, 32);
+    // Initialize `lazily_freed_mtarraylist_buffers`
+    small_arraylist_new(&ptls->lazily_freed_mtarraylist_buffers, 0);
+
+    memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num));
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+}
+
+void jl_free_thread_gc_state(jl_ptls_t ptls)
+{
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
+    ws_queue_t *cq = &mq->chunk_queue;
+    free_ws_array(jl_atomic_load_relaxed(&cq->array));
+    jl_atomic_store_relaxed(&cq->array, NULL);
+    ws_queue_t *q = &mq->ptr_queue;
+    free_ws_array(jl_atomic_load_relaxed(&q->array));
+    jl_atomic_store_relaxed(&q->array, NULL);
+    arraylist_free(&mq->reclaim_set);
+}
+
+extern uv_barrier_t thread_init_done;
+void jl_start_gc_threads(void)
+{
+    int nthreads = jl_atomic_load_relaxed(&jl_n_threads);
+    int ngcthreads = jl_n_gcthreads;
+    int nmutator_threads = nthreads - ngcthreads;
+    uv_thread_t uvtid;
+    for (int i = nmutator_threads; i < nthreads; ++i) {
+        jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
+        t->tid = i;
+        t->barrier = &thread_init_done;
+        if (i == nthreads - 1 && jl_n_sweepthreads == 1) {
+            uv_thread_create(&uvtid, jl_concurrent_gc_threadfun, t);
+        }
+        else {
+            uv_thread_create(&uvtid, jl_parallel_gc_threadfun, t);
+        }
+    }
+}
+
+STATIC_INLINE int may_mark(void) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&gc_n_threads_marking) > 0);
+}
+
+STATIC_INLINE int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&ptls->gc_tls.gc_sweeps_requested) > 0);
+}
+
+STATIC_INLINE int may_sweep_stack(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&ptls->gc_tls.gc_stack_sweep_requested) > 0);
+}
+
+extern _Atomic(int) n_threads_running;
+// parallel gc thread function
+void jl_parallel_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_PARALLEL_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        uv_mutex_lock(&gc_threads_lock);
+        while (!may_mark() && !may_sweep(ptls) && !may_sweep_stack(ptls)) {
+            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
+        }
+        uv_mutex_unlock(&gc_threads_lock);
+        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+        gc_mark_loop(ptls, 0);
+        if (may_sweep_stack(ptls)) {
+            assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+            sweep_stack_pool_loop();
+            jl_atomic_fetch_add(&ptls->gc_tls.gc_stack_sweep_requested, -1);
+        }
+        if (may_sweep(ptls)) {
+            assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+            gc_sweep_pool_parallel(ptls);
+            jl_atomic_fetch_add(&ptls->gc_tls.gc_sweeps_requested, -1);
+        }
+    }
+}
+
+// concurrent gc thread function
+void jl_concurrent_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_CONCURRENT_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
 
-    memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_CONCURRENT_COLLECTOR_THREAD);
+        uv_sem_wait(&gc_sweep_assists_needed);
+        gc_free_pages();
+    }
 }
 
 // System-wide initializations
@@ -3544,48 +3707,64 @@ void jl_gc_init(void)
 {
     JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
     JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
-    uv_mutex_init(&gc_cache_lock);
+    uv_mutex_init(&page_profile_lock);
     uv_mutex_init(&gc_perm_lock);
+    uv_mutex_init(&gc_pages_lock);
     uv_mutex_init(&gc_threads_lock);
     uv_cond_init(&gc_threads_cond);
     uv_sem_init(&gc_sweep_assists_needed, 0);
+    uv_mutex_init(&gc_queue_observer_lock);
+    void *_addr = (void*)calloc_s(1); // dummy allocation to get the sentinel tag
+    uintptr_t addr = (uintptr_t)_addr;
+    gc_bigval_sentinel_tag = addr;
+    oldest_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
+    oldest_generation_of_bigvals->header = gc_bigval_sentinel_tag;
 
     jl_gc_init_page();
     jl_gc_debug_init();
 
     arraylist_new(&finalizer_list_marked, 0);
     arraylist_new(&to_finalize, 0);
-
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_target, default_collect_interval);
+    if (jl_options.hard_heap_limit != 0) {
+        jl_atomic_store_relaxed(&gc_heap_stats.heap_target, jl_options.hard_heap_limit);
+    }
     gc_num.interval = default_collect_interval;
-    last_long_collect_interval = default_collect_interval;
     gc_num.allocd = 0;
     gc_num.max_pause = 0;
     gc_num.max_memory = 0;
 
+    uint64_t mem_reserve = 250*1024*1024; // LLVM + other libraries need some amount of memory
+    uint64_t min_heap_size_hint = mem_reserve + 1*1024*1024;
+    uint64_t hint = jl_options.heap_size_hint;
+
+    // check if heap size specified on command line
+    if (jl_options.heap_size_hint == 0) {
+        char *cp = getenv(HEAP_SIZE_HINT);
+        if (cp)
+            hint = parse_heap_size_option(cp, "JULIA_HEAP_SIZE_HINT=\"<size>[<unit>]\"", 1);
+    }
 #ifdef _P64
-    total_mem = uv_get_total_memory();
-    uint64_t constrained_mem = uv_get_constrained_memory();
-    if (constrained_mem > 0 && constrained_mem < total_mem)
-        total_mem = constrained_mem;
-    double percent;
-    if (total_mem < 128e9)
-        percent = total_mem * 2.34375e-12 + 0.6; // 60% at 0 gigs and 90% at 128 to not
-    else                                         // overcommit too much on memory contrained devices
-        percent = 0.9;
-    max_total_memory = total_mem * percent;
+    size_t total_mem = uv_get_total_memory();
+    if (hint == 0) {
+        uint64_t constrained_mem = uv_get_constrained_memory();
+        if (constrained_mem > 0 && constrained_mem < total_mem)
+            hint = constrained_mem;
+    }
 #endif
-    if (jl_options.heap_size_hint)
-        jl_gc_set_max_memory(jl_options.heap_size_hint);
-
-    t_start = jl_hrtime();
+    if (hint) {
+        if (hint < min_heap_size_hint)
+            hint = min_heap_size_hint;
+        jl_gc_set_max_memory(hint - mem_reserve);
+    }
 }
 
 JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem)
 {
-    if (max_mem > 0
-        && max_mem < (uint64_t)1 << (sizeof(memsize_t) * 8 - 1)) {
-        max_total_memory = max_mem;
-    }
+#ifdef _P32
+    max_mem = max_mem < max_mem_32bit_systems ? max_mem : max_mem_32bit_systems;
+#endif
+    max_total_memory = max_mem;
 }
 
 JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
@@ -3593,132 +3772,72 @@ JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
     return max_total_memory;
 }
 
-// callback for passing OOM errors from gmp
-JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
-{
-    jl_throw(jl_memory_exception);
-}
-
-// allocation wrappers that track allocation and let collection run
+// allocation wrappers that add to gc pressure
 
 JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
 {
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    if (pgcstack != NULL && ct->world_age) {
+    void *data = malloc(sz);
+    jl_task_t *ct = jl_get_current_task();
+    if (data != NULL && ct != NULL) {
+        sz = memory_block_usable_size(data, 0);
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+        jl_batch_accum_heap_size(ptls, sz);
     }
-    return malloc(sz);
+    return data;
 }
 
 JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
 {
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    if (pgcstack != NULL && ct->world_age) {
+    void *data = calloc(nm, sz);
+    jl_task_t *ct = jl_get_current_task();
+    if (data != NULL && ct != NULL) {
+        sz = memory_block_usable_size(data, 0);
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+        jl_batch_accum_heap_size(ptls, sz);
     }
-    return calloc(nm, sz);
+    return data;
 }
 
 JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
 {
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
     free(p);
-    if (pgcstack != NULL && ct->world_age) {
-        jl_ptls_t ptls = ct->ptls;
-        jl_atomic_store_relaxed(&ptls->gc_num.freed,
-            jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.freecall,
-            jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1);
-    }
+    jl_task_t *ct = jl_get_current_task();
+    if (ct != NULL)
+        jl_batch_accum_free_size(ct->ptls, sz);
 }
 
 JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
 {
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    if (pgcstack != NULL && ct->world_age) {
+    void *data = realloc(p, sz);
+    jl_task_t *ct = jl_get_current_task();
+    if (data != NULL && ct != NULL) {
+        sz = memory_block_usable_size(data, 0);
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        if (sz < old)
-            jl_atomic_store_relaxed(&ptls->gc_num.freed,
-                jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz));
-        else
-            jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-                jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old));
-        jl_atomic_store_relaxed(&ptls->gc_num.realloc,
-            jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
-    }
-    return realloc(p, sz);
-}
-
-// allocation wrappers that save the size of allocations, to allow using
-// jl_gc_counted_* functions with a libc-compatible API.
-
-JL_DLLEXPORT void *jl_malloc(size_t sz)
-{
-    int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT);
-    if (p == NULL)
-        return NULL;
-    p[0] = sz;
-    return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
-}
-
-//_unchecked_calloc does not check for potential overflow of nm*sz
-STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
-    size_t nmsz = nm*sz;
-    int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1);
-    if (p == NULL)
-        return NULL;
-    p[0] = nmsz;
-    return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
-}
-
-JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
-{
-    if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT)
-        return NULL;
-    return _unchecked_calloc(nm, sz);
-}
-
-JL_DLLEXPORT void jl_free(void *p)
-{
-    if (p != NULL) {
-        int64_t *pp = (int64_t *)p - 2;
-        size_t sz = pp[0];
-        jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT);
-    }
-}
-
-JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
-{
-    int64_t *pp;
-    size_t szold;
-    if (p == NULL) {
-        pp = NULL;
-        szold = 0;
-    }
-    else {
-        pp = (int64_t *)p - 2;
-        szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT;
+        if (!(sz < old))
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+                jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old));
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1);
+        int64_t diff = sz - old;
+        if (diff < 0) {
+            jl_batch_accum_free_size(ptls, -diff);
+        }
+        else {
+            jl_batch_accum_heap_size(ptls, diff);
+        }
     }
-    int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT);
-    if (pnew == NULL)
-        return NULL;
-    pnew[0] = sz;
-    return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
+    return data;
 }
 
 // allocating blocks for Arrays and Strings
@@ -3730,10 +3849,7 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -3741,6 +3857,14 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     void *b = malloc_cache_align(allocsz);
     if (b == NULL)
         jl_throw(jl_memory_exception);
+
+    size_t allocated_bytes = memory_block_usable_size(b, 1);
+    assert(allocated_bytes >= allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocated_bytes);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+    jl_batch_accum_heap_size(ptls, allocated_bytes);
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
@@ -3750,91 +3874,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     return b;
 }
 
-static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
-                                 int isaligned, jl_value_t *owner, int8_t can_collect)
-{
-    if (can_collect)
-        maybe_collect(ptls);
-
-    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
-    if (allocsz < sz)  // overflow in adding offs, size was "negative"
-        jl_throw(jl_memory_exception);
-
-    if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
-        inc_live_bytes(allocsz - oldsz);
-    }
-    else if (allocsz < oldsz)
-        jl_atomic_store_relaxed(&ptls->gc_num.freed,
-            jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz));
-    else
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
-    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
-
-    int last_errno = errno;
-#ifdef _OS_WINDOWS_
-    DWORD last_error = GetLastError();
-#endif
-    void *b;
-    if (isaligned)
-        b = realloc_cache_align(d, allocsz, oldsz);
-    else
-        b = realloc(d, allocsz);
-    if (b == NULL)
-        jl_throw(jl_memory_exception);
-#ifdef _OS_WINDOWS_
-    SetLastError(last_error);
-#endif
-    errno = last_errno;
-    maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag);
-    return b;
-}
-
-JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
-                                         int isaligned, jl_value_t *owner)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1);
-}
-
-jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
-{
-    size_t len = jl_string_len(s);
-    if (sz <= len) return s;
-    jl_taggedvalue_t *v = jl_astaggedvalue(s);
-    size_t strsz = len + sizeof(size_t) + 1;
-    if (strsz <= GC_MAX_SZCLASS ||
-        // TODO: because of issue #17971 we can't resize old objects
-        gc_marked(v->bits.gc)) {
-        // pool allocated; can't be grown in place so allocate a new object.
-        jl_value_t *snew = jl_alloc_string(sz);
-        memcpy(jl_string_data(snew), jl_string_data(s), len);
-        return snew;
-    }
-    size_t newsz = sz + sizeof(size_t) + 1;
-    size_t offs = sizeof(bigval_t);
-    size_t oldsz = LLT_ALIGN(strsz + offs, JL_CACHE_BYTE_ALIGNMENT);
-    size_t allocsz = LLT_ALIGN(newsz + offs, JL_CACHE_BYTE_ALIGNMENT);
-    if (allocsz < sz)  // overflow in adding offs, size was "negative"
-        jl_throw(jl_memory_exception);
-    bigval_t *hdr = bigval_header(v);
-    jl_ptls_t ptls = jl_current_task->ptls;
-    maybe_collect(ptls); // don't want this to happen during jl_gc_managed_realloc
-    gc_big_object_unlink(hdr);
-    // TODO: this is not safe since it frees the old pointer. ideally we'd like
-    // the old pointer to be left alone if we can't grow in place.
-    // for now it's up to the caller to make sure there are no references to the
-    // old pointer.
-    bigval_t *newbig = (bigval_t*)gc_managed_realloc_(ptls, hdr, allocsz, oldsz, 1, s, 0);
-    newbig->sz = allocsz;
-    gc_big_object_link(newbig, &ptls->heap.big_objects);
-    jl_value_t *snew = jl_valueof(&newbig->header);
-    *(size_t*)snew = sz;
-    return snew;
-}
-
 // Perm gen allocator
 // 2M pool
 #define GC_PERM_POOL_SIZE (2 * 1024 * 1024)
@@ -3861,11 +3900,11 @@ static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned o
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size,sz);
     errno = last_errno;
     jl_may_leak(base);
     assert(align > 0);
-    unsigned diff = (offset - (uintptr_t)base) % align;
-    return (void*)((char*)base + diff);
+    return (void*)(LLT_ALIGN((uintptr_t)base + offset, (uintptr_t)align) - offset);
 }
 
 STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT
@@ -3879,7 +3918,7 @@ STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned o
 }
 
 // **NOT** a safepoint
-void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
+void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT
 {
     // The caller should have acquired `gc_perm_lock`
     assert(align < GC_PERM_POOL_LIMIT);
@@ -3924,51 +3963,18 @@ void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
     return p;
 }
 
-JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    jl_gc_add_finalizer_th(ptls, v, f);
-}
-
-JL_DLLEXPORT void jl_finalize(jl_value_t *o)
-{
-    jl_finalize_th(jl_current_task, o);
-}
-
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value)
+jl_value_t *jl_gc_permobj(jl_ptls_t _ptls, size_t sz, void *ty, unsigned align) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_new_weakref_th(ptls, value);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sz, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, 0, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*), NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL);
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (align == 0) {
+        align = ((sz == 0) ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
+                                                 sizeof(void*) * 2 : 16));
+    }
+    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
+                                                              sizeof(void*) % align);
+    jl_value_t* v = jl_valueof(o);
+    jl_set_typeof(v, (void*)(((uintptr_t)(ty) | GC_OLD_MARKED)));
+    return v;
 }
 
 JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
@@ -3990,7 +3996,7 @@ JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
     }
 }
 
-JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void)
+JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void) JL_NOTSAFEPOINT
 {
     return jl_atomic_load(&support_conservative_marking);
 }
@@ -4024,53 +4030,58 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
             // case 1: full page; `cell` must be an object
             goto valid_object;
         }
-        jl_gc_pool_t *pool =
-            gc_all_tls_states[meta->thread_n]->heap.norm_pools +
-            meta->pool_n;
-        if (meta->fl_begin_offset == (uint16_t) -1) {
-            // case 2: this is a page on the newpages list
-            jl_taggedvalue_t *newpages = pool->newpages;
-            // Check if the page is being allocated from via newpages
-            if (!newpages)
-                return NULL;
-            char *data = gc_page_data(newpages);
-            if (data != meta->data) {
-                // Pages on newpages form a linked list where only the
-                // first one is allocated from (see gc_reset_page()).
-                // All other pages are empty.
-                return NULL;
+        {
+            jl_gc_pool_t *pool =
+                gc_all_tls_states[meta->thread_n]->gc_tls.heap.norm_pools +
+                meta->pool_n;
+            if (meta->fl_begin_offset == UINT16_MAX) {
+                // case 2: this is a page on the newpages list
+                jl_taggedvalue_t *newpages = pool->newpages;
+                // Check if the page is being allocated from via newpages
+                if (!newpages)
+                    return NULL;
+                char *data = gc_page_data(newpages);
+                if (data != meta->data) {
+                    // Pages on newpages form a linked list where only the
+                    // first one is allocated from (see gc_reset_page()).
+                    // All other pages are empty.
+                    return NULL;
+                }
+                // This is the first page on the newpages list, where objects
+                // are allocated from.
+                if ((char *)cell >= (char *)newpages) // past allocation pointer
+                    return NULL;
+                goto valid_object;
             }
-            // This is the first page on the newpages list, where objects
-            // are allocated from.
-            if ((char *)cell >= (char *)newpages) // past allocation pointer
-                return NULL;
-            goto valid_object;
+            // case 3: this is a page with a freelist
+            // marked or old objects can't be on the freelist
+            if (cell->bits.gc)
+                goto valid_object;
+            // When allocating from a freelist, three subcases are possible:
+            // * The freelist of a page has been exhausted; this was handled
+            //   under case 1, as nfree == 0.
+            // * The freelist of the page has not been used, and the age bits
+            //   reflect whether a cell is on the freelist or an object.
+            // * The freelist is currently being allocated from. In this case,
+            //   pool->freelist will point to the current page; any cell with
+            //   a lower address will be an allocated object, and for cells
+            //   with the same or a higher address, the corresponding age
+            //   bit will reflect whether it's on the freelist.
+            // Age bits are set in sweep_page() and are 0 for freelist
+            // entries and 1 for live objects. The above subcases arise
+            // because allocating a cell will not update the age bit, so we
+            // need extra logic for pages that have been allocated from.
+            // We now distinguish between the second and third subcase.
+            // Freelist entries are consumed in ascending order. Anything
+            // before the freelist pointer was either live during the last
+            // sweep or has been allocated since.
+            if (gc_page_data(cell) == gc_page_data(pool->freelist)
+                && (char *)cell < (char *)pool->freelist)
+                goto valid_object;
+            // already skipped marked or old objects above, so here
+            // the age bits are 0, thus the object is on the freelist
+            return NULL;
         }
-        // case 3: this is a page with a freelist
-        // marked or old objects can't be on the freelist
-        if (cell->bits.gc)
-            goto valid_object;
-        // When allocating from a freelist, three subcases are possible:
-        // * The freelist of a page has been exhausted; this was handled
-        //   under case 1, as nfree == 0.
-        // * The freelist of the page has not been used, and the age bits
-        //   reflect whether a cell is on the freelist or an object.
-        // * The freelist is currently being allocated from. In this case,
-        //   pool->freelist will point to the current page; any cell with
-        //   a lower address will be an allocated object, and for cells
-        //   with the same or a higher address, the corresponding age
-        //   bit will reflect whether it's on the freelist.
-        // Age bits are set in sweep_page() and are 0 for freelist
-        // entries and 1 for live objects. The above subcases arise
-        // because allocating a cell will not update the age bit, so we
-        // need extra logic for pages that have been allocated from.
-        // We now distinguish between the second and third subcase.
-        // Freelist entries are consumed in ascending order. Anything
-        // before the freelist pointer was either live during the last
-        // sweep or has been allocated since.
-        if (gc_page_data(cell) == gc_page_data(pool->freelist)
-            && (char *)cell < (char *)pool->freelist)
-            goto valid_object;
         // Not a freelist entry, therefore a valid object.
     valid_object:
         // We have to treat objects with type `jl_buff_tag` differently,
@@ -4094,15 +4105,19 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void)
     return sizeof(bigval_t);
 }
 
+JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
+{
+    arraylist_push(&ptls->gc_tls.sweep_objs, obj);
+}
 
-JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty)
+void jl_gc_notify_image_load(const char* img_data, size_t len)
 {
-    return jl_gc_alloc(ptls, sz, ty);
+    // Do nothing
 }
 
-JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
+JL_DLLEXPORT const char* jl_gc_active_impl(void)
 {
-    arraylist_push(&ptls->sweep_objs, obj);
+    return "Built with stock GC";
 }
 
 #ifdef __cplusplus
diff --git a/src/gc-stock.h b/src/gc-stock.h
new file mode 100644
index 0000000000000..4067093b39de7
--- /dev/null
+++ b/src/gc-stock.h
@@ -0,0 +1,757 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+ * Julia implements a garbage collector (GC) to automate dynamic memory management.
+ * Key characteristics of Julia's stock GC:
+ *
+ * - Mark-sweep: The object graph is traced starting from a root set
+ *   (e.g., global variables and local variables on the stack) to determine live objects.
+ *
+ * - Non-moving: Objects are not relocated to a different memory address.
+ *
+ * - Parallel: Multiple threads can be used during the marking and sweeping phases.
+ *
+ * - Partially concurrent: The runtime can scavenge pool-allocated memory blocks
+ *   (e.g., via madvise on Linux) concurrently with Julia user code.
+ *
+ * - Generational: Objects are partitioned into generations based on how many collection
+ *   cycles they have survived. Younger generations are collected more often.
+ *
+ * - Mostly precise: Julia optionally supports conservative stack scanning for users
+ *   interoperating with foreign languages like C.
+ */
+
+#ifndef JL_GC_H
+#define JL_GC_H
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include "gc-common.h"
+#include "julia.h"
+#include "julia_assert.h"
+#include "julia_internal.h"
+#include "julia_threads.h"
+#include "threading.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef GC_SMALL_PAGE
+#define GC_PAGE_LG2 12 // log2(size of a page)
+#else
+#define GC_PAGE_LG2 14 // log2(size of a page)
+#endif
+#define GC_PAGE_SZ (1 << GC_PAGE_LG2)
+#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT))
+
+// Used by GC_DEBUG_ENV
+typedef struct {
+    uint64_t num;
+    uint64_t next;
+    uint64_t min;
+    uint64_t interv;
+    uint64_t max;
+    unsigned short random[3];
+} jl_alloc_num_t;
+
+typedef struct {
+    int wait_for_debugger;
+    jl_alloc_num_t pool;
+    jl_alloc_num_t other;
+    jl_alloc_num_t print;
+} jl_gc_debug_env_t;
+
+// Array chunks (work items representing suffixes of
+// large arrays that have not been scanned yet)
+
+typedef enum {
+    GC_empty_chunk = 0, // sentine value representing no chunk
+    GC_objary_chunk,    // for chunk of object array
+    GC_ary8_chunk,      // for chunk of array with 8 bit field descriptors
+    GC_ary16_chunk,     // for chunk of array with 16 bit field descriptors
+    GC_finlist_chunk,   // for chunk of finalizer list
+} gc_chunk_id_t;
+
+typedef struct _jl_gc_chunk_t {
+    gc_chunk_id_t cid;          // chunk type identifier
+    struct _jl_value_t *parent; // array parent
+    struct _jl_value_t **begin; // pointer to first element that needs scanning
+    struct _jl_value_t **end;   // pointer to last element that needs scanning
+    void *elem_begin;           // used to scan pointers within objects when marking `ary8` or `ary16`
+    void *elem_end;             // used to scan pointers within objects when marking `ary8` or `ary16`
+    uint32_t step;              // step-size used when marking objarray
+    uintptr_t nptr;             // (`nptr` & 0x1) if array has young element and (`nptr` & 0x2) if array owner is old
+} jl_gc_chunk_t;
+
+#define GC_CHUNK_BATCH_SIZE (1 << 16)       // maximum number of references that can be processed
+                                            // without creating a chunk
+
+#define GC_PTR_QUEUE_INIT_SIZE (1 << 18)    // initial size of queue of `jl_value_t *`
+#define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14)  // initial size of chunk-queue
+
+#define GC_REMSET_PTR_TAG (0x1)             // lowest bit of `jl_value_t *` is tagged if it's in the remset
+
+// Metadata structure that is paired with each pool-allocated page
+typedef struct _jl_gc_pagemeta_t {
+    // Pointer to the next metadata structure in the linked list
+    struct _jl_gc_pagemeta_t *next;
+    // Index of the size class, in the pool allocator, that this metadata structure belongs to
+    uint8_t pool_n;
+    // Whether any cell in the page is marked
+    // This bit is set before sweeping iff there are live cells in the page.
+    // Note that before marking or after sweeping there can be live
+    // (and young) cells in the page for `!has_marked`
+    uint8_t has_marked;
+    // Whether any cell was live and young **before sweeping**.
+    // For a normal sweep (quick sweep that is NOT preceded by a
+    // full sweep) this bit is set iff there are young or newly dead
+    // objects in the page and the page needs to be swept
+    //
+    // For a full sweep, this bit should be ignored
+    //
+    // For a quick sweep preceded by a full sweep. If this bit is set,
+    // the page needs to be swept. If this bit is not set, there could
+    // still be old dead objects in the page and `nold` and `prev_nold`
+    // should be used to determine if the page needs to be swept
+    uint8_t has_young;
+    // Number of old objects in the page
+    uint16_t nold;
+    // Number of old objects in the page at the end of the previous full sweep
+    uint16_t prev_nold;
+    // Number of free objects in this page
+    // Invalid if pool that owns this page is allocating objects from this page
+    uint16_t nfree;
+    uint16_t osize;           // Size of each object in this page
+    uint16_t fl_begin_offset; // Offset of first free object in this page
+    uint16_t fl_end_offset;   // Offset of last free object in this page
+    uint16_t thread_n;        // Thread id of the heap that owns this page
+    char *data;               // Pointer to the start of the regions where objects are allocated
+} jl_gc_pagemeta_t;
+
+extern jl_gc_page_stack_t global_page_pool_lazily_freed;
+extern jl_gc_page_stack_t global_page_pool_clean;
+extern jl_gc_page_stack_t global_page_pool_freed;
+
+/*
+ * Simple lock-free stack implementation for `jl_gc_page_stack_t`.
+ *
+ * NOTE: This is not a general-purpose lock-free stack. It does not implement
+ * any ABA-prevention mechanism. For our specific use case, this is acceptable,
+ * because we avoid the pathological concurrent push/pop sequences on the same
+ * list node that could trigger the ABA problem.
+ *
+ * Safety invariants for this simple lock-free stack:
+ *
+ * 1. If a node is popped from the stack by a mutator thread, it will never
+ *    be pushed back onto the same stack within the same GC epoch
+ *    (i.e., the time window between two consecutive GCs).
+ *
+ * 2. If a node is popped by a GC thread, it will never be pushed back onto
+ *    the same stack.
+ *
+ * These invariants ensure safe usage of this simplified lock-free stack
+ * without requiring ABA prevention.
+ */
+
+STATIC_INLINE void push_lf_back_nosync(jl_gc_page_stack_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
+{
+    jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+    elt->next = old_back;
+    jl_atomic_store_relaxed(&pool->bottom, elt);
+}
+
+STATIC_INLINE void push_lf_back(jl_gc_page_stack_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
+{
+    while (1) {
+        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+        elt->next = old_back;
+        if (jl_atomic_cmpswap(&pool->bottom, &old_back, elt)) {
+            break;
+        }
+        jl_cpu_pause();
+    }
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *try_pop_lf_back(jl_gc_page_stack_t *pool) JL_NOTSAFEPOINT
+{
+    for (int i = 0; i < (1 << 10); i++) {
+        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+        if (old_back == NULL) {
+            return NULL;
+        }
+        if (jl_atomic_cmpswap(&pool->bottom, &old_back, old_back->next)) {
+            return old_back;
+        }
+        jl_cpu_pause();
+    }
+    return NULL;
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *pop_lf_back_nosync(jl_gc_page_stack_t *pool) JL_NOTSAFEPOINT
+{
+    jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+    if (old_back == NULL) {
+        return NULL;
+    }
+    jl_atomic_store_relaxed(&pool->bottom, old_back->next);
+    return old_back;
+}
+
+
+STATIC_INLINE jl_gc_pagemeta_t *pop_lf_back(jl_gc_page_stack_t *pool) JL_NOTSAFEPOINT
+{
+    while (1) {
+        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+        if (old_back == NULL) {
+            return NULL;
+        }
+        if (jl_atomic_cmpswap(&pool->bottom, &old_back, old_back->next)) {
+            return old_back;
+        }
+        jl_cpu_pause();
+    }
+}
+typedef struct {
+    jl_gc_page_stack_t stack;
+    /*
+    * Pad to 128 bytes to avoid false sharing.
+    * 128 bytes is large enough to ensure that two consecutively allocated
+    * `jl_gc_padded_page_stack_t` instances will not share the same cache line.
+    */
+#ifdef _P64
+    void *_pad[15];
+#else
+    void *_pad[31];
+#endif
+} jl_gc_padded_page_stack_t;
+static_assert(sizeof(jl_gc_padded_page_stack_t) == 128, "jl_gc_padded_page_stack_t is not 128 bytes");
+
+typedef struct {
+    _Atomic(size_t) n_freed_objs;
+    _Atomic(size_t) n_pages_allocd;
+} gc_fragmentation_stat_t;
+
+typedef struct {
+    _Atomic(size_t) bytes_mapped;
+    _Atomic(size_t) bytes_resident;
+    _Atomic(size_t) heap_size;
+    _Atomic(size_t) heap_target;
+} gc_heapstatus_t;
+
+extern gc_heapstatus_t gc_heap_stats;
+
+/*
+ * GC Multi-Level Page Table Structures
+ *
+ * Julia uses a hierarchical page table to track the allocation state of
+ * pool-allocated memory pages. This design enables sparse memory representation
+ * and fast lookup of page states.
+ *
+ * - Level 0: pagetable0_t
+ *   - Lowest level of the page table.
+ *   - Each entry in `meta` represents the state of a single GC page
+ *     (GC_PAGE_UNMAPPED, GC_PAGE_ALLOCATED, etc.).
+ *   - Size is determined by REGION0_PG_COUNT, which varies by page size and
+ *     architecture.
+ *
+ * - Level 1: pagetable1_t
+ *   - Middle level of the page table.
+ *   - `meta0` points to Level 0 tables, each covering a contiguous region of pages.
+ *   - Supports sparse allocation: entries can be NULL if no pages in that region
+ *     are used.
+ *
+ * - Level 2 / Root: pagetable_t
+ *   - Top-level root of the page table.
+ *   - `meta1` points to Level 1 tables.
+ *   - Provides the first lookup level for any heap pointer and supports large
+ *     address spaces by subdividing memory into regions.
+ */
+
+#ifdef GC_SMALL_PAGE
+#ifdef _P64
+#define REGION0_PG_COUNT (1 << 16)
+#define REGION1_PG_COUNT (1 << 18)
+#define REGION2_PG_COUNT (1 << 18)
+#define REGION0_INDEX(p) (((uintptr_t)(p) >> 12) & 0xFFFF) // shift by GC_PAGE_LG2
+#define REGION1_INDEX(p) (((uintptr_t)(p) >> 28) & 0x3FFFF)
+#define REGION_INDEX(p)  (((uintptr_t)(p) >> 46) & 0x3FFFF)
+#else
+#define REGION0_PG_COUNT (1 << 10)
+#define REGION1_PG_COUNT (1 << 10)
+#define REGION2_PG_COUNT (1 << 0)
+#define REGION0_INDEX(p) (((uintptr_t)(p) >> 12) & 0x3FF) // shift by GC_PAGE_LG2
+#define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF)
+#define REGION_INDEX(p)  (0)
+#endif
+#else
+#ifdef _P64
+#define REGION0_PG_COUNT (1 << 16)
+#define REGION1_PG_COUNT (1 << 16)
+#define REGION2_PG_COUNT (1 << 18)
+#define REGION0_INDEX(p) (((uintptr_t)(p) >> 14) & 0xFFFF) // shift by GC_PAGE_LG2
+#define REGION1_INDEX(p) (((uintptr_t)(p) >> 30) & 0xFFFF)
+#define REGION_INDEX(p)  (((uintptr_t)(p) >> 46) & 0x3FFFF)
+#else
+#define REGION0_PG_COUNT (1 << 8)
+#define REGION1_PG_COUNT (1 << 10)
+#define REGION2_PG_COUNT (1 << 0)
+#define REGION0_INDEX(p) (((uintptr_t)(p) >> 14) & 0xFF) // shift by GC_PAGE_LG2
+#define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF)
+#define REGION_INDEX(p)  (0)
+#endif
+#endif
+
+#define GC_PAGE_UNMAPPED        0
+#define GC_PAGE_ALLOCATED       1
+#define GC_PAGE_LAZILY_FREED    2
+#define GC_PAGE_FREED           3
+
+typedef struct {
+    uint8_t meta[REGION0_PG_COUNT];
+} pagetable0_t;
+
+typedef struct {
+    pagetable0_t *meta0[REGION1_PG_COUNT];
+} pagetable1_t;
+
+typedef struct {
+    pagetable1_t *meta1[REGION2_PG_COUNT];
+} pagetable_t;
+
+extern pagetable_t alloc_map;
+
+STATIC_INLINE uint8_t gc_alloc_map_is_set(char *_data) JL_NOTSAFEPOINT
+{
+    uintptr_t data = ((uintptr_t)_data);
+    unsigned i;
+    i = REGION_INDEX(data);
+    pagetable1_t *r1 = alloc_map.meta1[i];
+    if (r1 == NULL)
+        return 0;
+    i = REGION1_INDEX(data);
+    pagetable0_t *r0 = r1->meta0[i];
+    if (r0 == NULL)
+        return 0;
+    i = REGION0_INDEX(data);
+    return (r0->meta[i] == GC_PAGE_ALLOCATED);
+}
+
+STATIC_INLINE void gc_alloc_map_set(char *_data, uint8_t v) JL_NOTSAFEPOINT
+{
+    uintptr_t data = ((uintptr_t)_data);
+    unsigned i;
+    i = REGION_INDEX(data);
+    pagetable1_t *r1 = alloc_map.meta1[i];
+    assert(r1 != NULL);
+    i = REGION1_INDEX(data);
+    pagetable0_t *r0 = r1->meta0[i];
+    assert(r0 != NULL);
+    i = REGION0_INDEX(data);
+    r0->meta[i] = v;
+}
+
+STATIC_INLINE void gc_alloc_map_maybe_create(char *_data) JL_NOTSAFEPOINT
+{
+    uintptr_t data = ((uintptr_t)_data);
+    unsigned i;
+    i = REGION_INDEX(data);
+    pagetable1_t *r1 = alloc_map.meta1[i];
+    if (r1 == NULL) {
+        r1 = (pagetable1_t*)calloc_s(sizeof(pagetable1_t));
+        alloc_map.meta1[i] = r1;
+    }
+    i = REGION1_INDEX(data);
+    pagetable0_t *r0 = r1->meta0[i];
+    if (r0 == NULL) {
+        r0 = (pagetable0_t*)calloc_s(sizeof(pagetable0_t));
+        r1->meta0[i] = r0;
+    }
+}
+
+/*
+ * Page Layout
+ *
+ * Each pool-allocated page is divided into three main sections:
+ *
+ * - Metadata Pointer
+ *   - Size: sizeof(jl_gc_pagemeta_t*)
+ *   - Points to the page metadata structure.
+ *
+ * - Padding
+ *   - Size: GC_PAGE_OFFSET - sizeof(jl_gc_pagemeta_t*)
+ *   - Ensures proper alignment of the blocks.
+ *
+ * - Blocks
+ *   - Size per block: osize
+ *   - Each block consists of:
+ *     - Tag: sizeof(jl_taggedvalue_t)
+ *     - Data: up to (osize - sizeof(jl_taggedvalue_t))
+ *
+ * Example layout:
+ *
+ *   +----------------------+ <- page start
+ *   | Metadata Pointer     |  sizeof(jl_gc_pagemeta_t*)
+ *   +----------------------+
+ *   | Padding              |  GC_PAGE_OFFSET - sizeof(jl_gc_pagemeta_t*)
+ *   +----------------------+ <- GC_PAGE_OFFSET
+ *   | Block 0              |  osize
+ *   |   +----------------+|
+ *   |   | Tag            || sizeof(jl_taggedvalue_t)
+ *   |   +----------------+|
+ *   |   | Data           || <= osize - sizeof(jl_taggedvalue_t)
+ *   |   +----------------+|
+ *   | Block 1              |  osize
+ *   |   +----------------+|
+ *   |   | Tag            || sizeof(jl_taggedvalue_t)
+ *   |   +----------------+|
+ *   |   | Data           || <= osize - sizeof(jl_taggedvalue_t)
+ *   |   +----------------+|
+ *   | ...                  |
+ *   +----------------------+ <- page end
+ */
+
+STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT
+{
+    return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2);
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *page_metadata_unsafe(void *_data) JL_NOTSAFEPOINT
+{
+    return *(jl_gc_pagemeta_t**)(gc_page_data(_data));
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT
+{
+    if (!gc_alloc_map_is_set((char*)_data)) {
+        return NULL;
+    }
+    return page_metadata_unsafe(_data);
+}
+
+STATIC_INLINE void set_page_metadata(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
+{
+    *(jl_gc_pagemeta_t**)(pg->data) = pg;
+}
+
+STATIC_INLINE void push_page_metadata_back(jl_gc_pagemeta_t **ppg, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
+{
+    elt->next = *ppg;
+    *ppg = elt;
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *pop_page_metadata_back(jl_gc_pagemeta_t **ppg) JL_NOTSAFEPOINT
+{
+    jl_gc_pagemeta_t *v = *ppg;
+    if (*ppg != NULL) {
+        *ppg = (*ppg)->next;
+    }
+    return v;
+}
+
+STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
+{
+    return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset);
+}
+
+STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
+{
+    return (jl_taggedvalue_t*)(p->data + p->fl_end_offset);
+}
+
+extern int gc_first_tid;
+
+STATIC_INLINE int gc_first_parallel_collector_thread_id(void) JL_NOTSAFEPOINT
+{
+    if (jl_n_markthreads == 0) {
+        return 0;
+    }
+    return gc_first_tid;
+}
+
+STATIC_INLINE int gc_last_parallel_collector_thread_id(void) JL_NOTSAFEPOINT
+{
+    if (jl_n_markthreads == 0) {
+        return -1;
+    }
+    return gc_first_tid + jl_n_markthreads - 1;
+}
+
+STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT
+{
+    assert(i >= 0 && i < jl_n_markthreads);
+    return gc_first_tid + i;
+}
+
+STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT
+{
+    return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id();
+}
+
+STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT
+{
+    if (jl_n_sweepthreads == 0) {
+        return 0;
+    }
+    int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id();
+    int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1;
+    return tid == concurrent_collector_thread_id;
+}
+
+STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    assert(jl_n_markthreads > 0);
+    int v = gc_first_tid + (int)cong(jl_n_markthreads, &ptls->rngseed); // cong is [0, n)
+    assert(v >= gc_first_tid && v <= gc_last_parallel_collector_thread_id());
+    return v;
+}
+
+STATIC_INLINE int gc_parallel_collector_threads_enabled(void) JL_NOTSAFEPOINT
+{
+    return jl_n_markthreads > 0;
+}
+
+STATIC_INLINE void gc_check_ptls_of_parallel_collector_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    (void)ptls;
+    assert(gc_parallel_collector_threads_enabled());
+    assert(ptls != NULL);
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+}
+
+extern uintptr_t gc_bigval_sentinel_tag;
+extern bigval_t *oldest_generation_of_bigvals;
+
+STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT
+{
+    return container_of(o, bigval_t, header);
+}
+
+FORCE_INLINE void gc_big_object_unlink(const bigval_t *node) JL_NOTSAFEPOINT
+{
+    assert(node != oldest_generation_of_bigvals);
+    assert(node->header != gc_bigval_sentinel_tag);
+    assert(node->prev != NULL);
+    if (node->next != NULL) {
+        node->next->prev = node->prev;
+    }
+    node->prev->next = node->next;
+}
+
+FORCE_INLINE void gc_big_object_link(bigval_t *sentinel_node, bigval_t *node) JL_NOTSAFEPOINT
+{
+    assert(sentinel_node != NULL);
+    assert(sentinel_node->header == gc_bigval_sentinel_tag);
+    assert(sentinel_node->prev == NULL);
+    assert(node->header != gc_bigval_sentinel_tag);
+    // a new node gets linked in at the head of the list
+    node->next = sentinel_node->next;
+    node->prev = sentinel_node;
+    if (sentinel_node->next != NULL) {
+        sentinel_node->next->prev = node;
+    }
+    sentinel_node->next = node;
+}
+
+// Must be kept in sync with `base/timing.jl`
+#define FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL (0)
+#define FULL_SWEEP_REASON_FORCED_FULL_SWEEP (1)
+#define FULL_SWEEP_REASON_USER_MAX_EXCEEDED (2)
+#define FULL_SWEEP_REASON_LARGE_PROMOTION_RATE (3)
+#define FULL_SWEEP_NUM_REASONS (4)
+
+extern JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
+STATIC_INLINE void gc_record_full_sweep_reason(int reason) JL_NOTSAFEPOINT
+{
+    assert(reason >= 0 && reason < FULL_SWEEP_NUM_REASONS);
+    jl_full_sweep_reasons[reason]++;
+}
+
+void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
+void gc_collect_neighbors(jl_ptls_t ptls, jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT;
+void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
+void jl_gc_debug_init(void);
+
+// GC permanent allocation
+extern uv_mutex_t gc_perm_lock;
+
+// GC pages
+extern uv_mutex_t gc_pages_lock;
+void jl_gc_init_page(void) JL_NOTSAFEPOINT;
+NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT;
+NOINLINE void jl_gc_free_page(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT;
+
+// GC debug
+#if defined(GC_TIME) || defined(GC_FINAL_STATS)
+void gc_settime_premark_end(void);
+void gc_settime_postmark_end(void);
+#else
+#define gc_settime_premark_end()
+#define gc_settime_postmark_end()
+#endif
+
+#ifdef GC_FINAL_STATS
+void gc_final_count_page(size_t pg_cnt);
+void gc_final_pause_end(int64_t t0, int64_t tend);
+#else
+#define gc_final_count_page(pg_cnt)
+#define gc_final_pause_end(t0, tend)
+#endif
+
+#ifdef GC_TIME
+void gc_time_pool_start(void) JL_NOTSAFEPOINT;
+void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT;
+void gc_time_pool_end(int sweep_full) JL_NOTSAFEPOINT;
+void gc_time_sysimg_end(uint64_t t0) JL_NOTSAFEPOINT;
+
+void gc_time_big_start(void) JL_NOTSAFEPOINT;
+void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT;
+void gc_time_big_end(void) JL_NOTSAFEPOINT;
+
+void gc_time_mallocd_memory_start(void) JL_NOTSAFEPOINT;
+void gc_time_count_mallocd_memory(int bits) JL_NOTSAFEPOINT;
+void gc_time_mallocd_memory_end(void) JL_NOTSAFEPOINT;
+
+void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
+                        int64_t perm_scanned_bytes);
+void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
+                         int64_t live_bytes, int64_t estimate_freed,
+                         int sweep_full);
+void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
+                     uint64_t freed, uint64_t live, uint64_t interval,
+                     uint64_t pause, uint64_t ttsp, uint64_t mark,
+                     uint64_t sweep);
+void gc_heuristics_summary(
+        uint64_t old_alloc_diff, uint64_t alloc_mem,
+        uint64_t old_mut_time, uint64_t alloc_time,
+        uint64_t old_freed_diff, uint64_t gc_mem,
+        uint64_t old_pause_time, uint64_t gc_time,
+        int thrash_counter, const char *reason,
+        uint64_t current_heap, uint64_t target_heap);
+#else
+#define gc_time_pool_start()
+STATIC_INLINE void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT
+{
+    (void)freedall;
+    (void)pg_skpd;
+}
+#define gc_time_pool_end(sweep_full) (void)(sweep_full)
+#define gc_time_sysimg_end(t0) (void)(t0)
+#define gc_time_big_start()
+STATIC_INLINE void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT
+{
+    (void)old_bits;
+    (void)bits;
+}
+#define gc_time_big_end()
+#define gc_time_mallocd_memory_start()
+STATIC_INLINE void gc_time_count_mallocd_memory(int bits) JL_NOTSAFEPOINT
+{
+    (void)bits;
+}
+#define gc_time_mallocd_memory_end()
+#define gc_time_mark_pause(t0, scanned_bytes, perm_scanned_bytes)
+#define gc_time_sweep_pause(gc_end_t, actual_allocd, live_bytes,        \
+                            estimate_freed, sweep_full)
+#define  gc_time_summary(sweep_full, start, end, freed, live,           \
+                         interval, pause, ttsp, mark, sweep)
+#define gc_heuristics_summary( \
+        old_alloc_diff, alloc_mem, \
+        old_mut_time, alloc_time, \
+        old_freed_diff, gc_mem, \
+        old_pause_time, gc_time, \
+        thrash_counter, reason, \
+        current_heap, target_heap)
+#endif
+
+#ifdef MEMFENCE
+void gc_verify_tags(void) JL_NOTSAFEPOINT;
+#else
+static inline void gc_verify_tags(void) JL_NOTSAFEPOINT
+{
+}
+#endif
+
+#ifdef GC_VERIFY
+extern jl_value_t *lostval;
+void gc_verify(jl_ptls_t ptls);
+void add_lostval_parent(jl_value_t *parent);
+#define verify_val(v) do {                                              \
+        if (lostval == (jl_value_t*)(v) && (v) != 0) {                  \
+            jl_printf(JL_STDOUT,                                        \
+                      "Found lostval %p at %s:%d oftype: ",             \
+                      (void*)(lostval), __FILE__, __LINE__);            \
+            jl_static_show(JL_STDOUT, jl_typeof(v));                    \
+            jl_printf(JL_STDOUT, "\n");                                 \
+        }                                                               \
+    } while(0);
+
+#define verify_parent(ty, obj, slot, args...) do {                      \
+        if (gc_ptr_clear_tag(*(void**)(slot), 3) == (void*)lostval &&   \
+            (jl_value_t*)(obj) != lostval) {                            \
+            jl_printf(JL_STDOUT, "Found parent %p %p at %s:%d\n",       \
+                      (void*)(ty), (void*)(obj), __FILE__, __LINE__);   \
+            jl_printf(JL_STDOUT, "\tloc %p : ", (void*)(slot));         \
+            jl_printf(JL_STDOUT, args);                                 \
+            jl_printf(JL_STDOUT, "\n");                                 \
+            jl_printf(JL_STDOUT, "\ttype: ");                           \
+            jl_static_show(JL_STDOUT, jl_typeof(obj));                  \
+            jl_printf(JL_STDOUT, "\n");                                 \
+            add_lostval_parent((jl_value_t*)(obj));                     \
+        }                                                               \
+    } while(0);
+
+#define verify_parent1(ty,obj,slot,arg1) verify_parent(ty,obj,slot,arg1)
+#define verify_parent2(ty,obj,slot,arg1,arg2) verify_parent(ty,obj,slot,arg1,arg2)
+extern int gc_verifying;
+#else
+#define gc_verify(ptls)
+#define verify_val(v)
+#define verify_parent1(ty,obj,slot,arg1) do {} while (0)
+#define verify_parent2(ty,obj,slot,arg1,arg2) do {} while (0)
+#define gc_verifying (0)
+#endif
+
+int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
+int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
+
+#ifdef GC_DEBUG_ENV
+JL_DLLEXPORT extern jl_gc_debug_env_t jl_gc_debug_env;
+int jl_gc_debug_check_other(void);
+void jl_gc_debug_print(void);
+void gc_scrub_record_task(jl_task_t *ta) JL_NOTSAFEPOINT;
+void gc_scrub(void);
+#else
+STATIC_INLINE int jl_gc_debug_check_other(void) JL_NOTSAFEPOINT
+{
+    return 0;
+}
+STATIC_INLINE void jl_gc_debug_print(void) JL_NOTSAFEPOINT
+{
+}
+STATIC_INLINE void gc_scrub_record_task(jl_task_t *ta) JL_NOTSAFEPOINT
+{
+    (void)ta;
+}
+STATIC_INLINE void gc_scrub(void) JL_NOTSAFEPOINT
+{
+}
+#endif
+
+#ifdef MEMPROFILE
+void gc_stats_all_pool(void);
+void gc_stats_big_obj(void);
+#else
+#define gc_stats_all_pool()
+#define gc_stats_big_obj()
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h
new file mode 100644
index 0000000000000..473668d648294
--- /dev/null
+++ b/src/gc-tls-common.h
@@ -0,0 +1,51 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Meant to be included in "julia_threads.h"
+#ifndef JL_GC_TLS_COMMON_H
+#define JL_GC_TLS_COMMON_H
+
+#include "julia_atomics.h"
+
+// GC threading ------------------------------------------------------------------
+
+#include "arraylist.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    // variable for tracking weak references
+    small_arraylist_t weak_refs;
+    // live tasks started on this thread
+    // that are holding onto a stack from the pool
+    small_arraylist_t live_tasks;
+
+    // variable for tracking malloc'd arrays
+    small_arraylist_t mallocarrays;
+
+#define JL_N_STACK_POOLS 16
+    small_arraylist_t free_stacks[JL_N_STACK_POOLS];
+} jl_thread_heap_common_t;
+
+typedef struct {
+    _Atomic(int64_t) allocd;
+    _Atomic(int64_t) pool_live_bytes;
+    _Atomic(uint64_t) malloc;
+    _Atomic(uint64_t) realloc;
+    _Atomic(uint64_t) poolalloc;
+    _Atomic(uint64_t) bigalloc;
+    _Atomic(int64_t) free_acc;
+    _Atomic(uint64_t) alloc_acc;
+} jl_thread_gc_num_common_t;
+
+typedef struct {
+    jl_thread_heap_common_t heap;
+    jl_thread_gc_num_common_t gc_num;
+} jl_gc_tls_states_common_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_GC_TLS_COMMON_H
diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h
new file mode 100644
index 0000000000000..5b69aef5d55fb
--- /dev/null
+++ b/src/gc-tls-mmtk.h
@@ -0,0 +1,23 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_GC_TLS_H
+#define JL_GC_TLS_H
+
+#include <assert.h>
+#include "mmtkMutator.h"
+#include "julia_atomics.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    MMTkMutatorContext mmtk_mutator;
+    _Atomic(size_t) malloc_sz_since_last_poll;
+} jl_gc_tls_states_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_GC_TLS_H
diff --git a/src/gc-tls-stock.h b/src/gc-tls-stock.h
new file mode 100644
index 0000000000000..d82506383c501
--- /dev/null
+++ b/src/gc-tls-stock.h
@@ -0,0 +1,68 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Meant to be included in "julia_threads.h"
+#ifndef JL_GC_TLS_H
+#define JL_GC_TLS_H
+
+#include "julia_atomics.h"
+#include "work-stealing-queue.h"
+// GC threading ------------------------------------------------------------------
+
+#include "arraylist.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    struct _jl_taggedvalue_t *freelist; // root of list of free objects
+    struct _jl_taggedvalue_t *newpages; // root of list of chunks of free objects
+    uint16_t osize; // size of objects in this pool
+} jl_gc_pool_t;
+
+typedef struct {
+    // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects
+    struct _bigval_t *young_generation_of_bigvals;
+
+    // lower bound of the number of pointers inside remembered values
+    int remset_nptr;
+    // remembered set
+    arraylist_t remset;
+
+    // variables for allocating objects from pools
+#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
+    jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];
+} jl_thread_heap_t;
+
+typedef struct {
+    ws_queue_t chunk_queue;
+    ws_queue_t ptr_queue;
+    arraylist_t reclaim_set;
+} jl_gc_markqueue_t;
+
+typedef struct {
+    // thread local increment of `perm_scanned_bytes`
+    size_t perm_scanned_bytes;
+    // thread local increment of `scanned_bytes`
+    size_t scanned_bytes;
+} jl_gc_mark_cache_t;
+
+typedef struct {
+    _Atomic(struct _jl_gc_pagemeta_t *) bottom;
+} jl_gc_page_stack_t;
+
+typedef struct {
+    jl_thread_heap_t heap;
+    jl_gc_page_stack_t page_metadata_allocd;
+    jl_gc_markqueue_t mark_queue;
+    jl_gc_mark_cache_t gc_cache;
+    _Atomic(size_t) gc_sweeps_requested;
+    _Atomic(size_t) gc_stack_sweep_requested;
+    arraylist_t sweep_objs;
+} jl_gc_tls_states_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_GC_TLS_H
diff --git a/src/gc-wb-mmtk.h b/src/gc-wb-mmtk.h
new file mode 100644
index 0000000000000..c8c961544fabc
--- /dev/null
+++ b/src/gc-wb-mmtk.h
@@ -0,0 +1,84 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// ========================================================================= //
+// Runtime Write-Barriers
+// ========================================================================= //
+
+#ifndef JL_GC_WB_H
+#define JL_GC_WB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
+extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);
+extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
+
+#define MMTK_OBJECT_BARRIER (1)
+// Stickyimmix needs write barrier. Immix does not need write barrier.
+#ifdef MMTK_PLAN_IMMIX
+#define MMTK_NEEDS_WRITE_BARRIER (0)
+#endif
+#ifdef MMTK_PLAN_STICKYIMMIX
+#define MMTK_NEEDS_WRITE_BARRIER (1)
+#endif
+
+// Directly call into MMTk for write barrier (debugging only)
+STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT
+{
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr);
+}
+
+// Inlined fastpath
+STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT
+{
+    if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
+        intptr_t addr = (intptr_t) (void*) parent;
+        uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
+        intptr_t shift = (addr >> 3) & 0b111;
+        uint8_t byte_val = *meta_addr;
+        if (((byte_val >> shift) & 1) == 1) {
+            jl_task_t *ct = jl_current_task;
+            jl_ptls_t ptls = ct->ptls;
+            mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr);
+        }
+    }
+}
+
+STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
+{
+    mmtk_gc_wb_fast(parent, ptr);
+}
+
+STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
+{
+    mmtk_gc_wb_fast(ptr, (void*)0);
+}
+
+STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
+{
+    mmtk_gc_wb_fast(parent, (void*)0);
+}
+
+STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const jl_value_t *dest_owner, _Atomic(void*) * dest_p,
+                                          jl_genericmemory_t *src, _Atomic(void*) * src_p,
+                                          size_t* n) JL_NOTSAFEPOINT
+{
+    mmtk_gc_wb_fast(dest_owner, (void*)0);
+}
+
+STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const jl_value_t *owner, jl_genericmemory_t *src, char* src_p,
+                                          size_t n, jl_datatype_t *dt) JL_NOTSAFEPOINT
+{
+    mmtk_gc_wb_fast(owner, (void*)0);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gc-wb-stock.h b/src/gc-wb-stock.h
new file mode 100644
index 0000000000000..4f27c74ef1110
--- /dev/null
+++ b/src/gc-wb-stock.h
@@ -0,0 +1,102 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// ========================================================================= //
+// Runtime Write-Barriers
+// ========================================================================= //
+
+#ifndef JL_GC_WB_H
+#define JL_GC_WB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
+{
+    // parent and ptr isa jl_value_t*
+    if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset
+                   (jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young
+        jl_gc_queue_root((jl_value_t*)parent);
+}
+
+STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
+{
+    // if ptr is old
+    if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) {
+        jl_gc_queue_root((jl_value_t*)ptr);
+    }
+}
+
+STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
+{
+    // 3 == GC_OLD_MARKED
+    // ptr is an immutable object
+    if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
+        return; // parent is young or in remset
+    if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3))
+        return; // ptr is old and not in remset (thus it does not point to young)
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
+    const jl_datatype_layout_t *ly = dt->layout;
+    if (ly->npointers)
+        jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt);
+}
+
+STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const jl_value_t *dest_owner, _Atomic(void*) * dest_p,
+                                          jl_genericmemory_t *src, _Atomic(void*) * src_p,
+                                          size_t* n) JL_NOTSAFEPOINT
+{
+    if (__unlikely(jl_astaggedvalue(dest_owner)->bits.gc == 3 /* GC_OLD_MARKED */ )) {
+        jl_value_t *src_owner = jl_genericmemory_owner(src);
+        size_t done = 0;
+        if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
+            if (dest_p < src_p || dest_p > src_p + (*n)) {
+                for (; done < (*n); done++) { // copy forwards
+                    void *val = jl_atomic_load_relaxed(src_p + done);
+                    jl_atomic_store_release(dest_p + done, val);
+                    // `val` is young or old-unmarked
+                    if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) {
+                        jl_gc_queue_root(dest_owner);
+                        break;
+                    }
+                }
+                src_p += done;
+                dest_p += done;
+            }
+            else {
+                for (; done < (*n); done++) { // copy backwards
+                    void *val = jl_atomic_load_relaxed(src_p + (*n) - done - 1);
+                    jl_atomic_store_release(dest_p + (*n) - done - 1, val);
+                    // `val` is young or old-unmarked
+                    if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) {
+                        jl_gc_queue_root(dest_owner);
+                        break;
+                    }
+                }
+            }
+            (*n) -= done;
+        }
+    }
+}
+
+STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const jl_value_t *owner, jl_genericmemory_t *src, char* src_p,
+                                          size_t n, jl_datatype_t *dt) JL_NOTSAFEPOINT
+{
+    if (__unlikely(jl_astaggedvalue(owner)->bits.gc == 3 /* GC_OLD_MARKED */)) {
+        jl_value_t *src_owner = jl_genericmemory_owner(src);
+        size_t elsz = dt->layout->size;
+        if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
+            dt = (jl_datatype_t*)jl_tparam1(dt);
+            for (size_t done = 0; done < n; done++) { // copy forwards
+                char* s = (char*)src_p+done*elsz;
+                if (*((jl_value_t**)s+dt->layout->first_ptr) != NULL)
+                    jl_gc_queue_multiroot(owner, s, dt);
+            }
+        }
+    }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gc.h b/src/gc.h
deleted file mode 100644
index 83c803c3cb8aa..0000000000000
--- a/src/gc.h
+++ /dev/null
@@ -1,647 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-/*
-  allocation and garbage collection
-  . non-moving, precise mark and sweep collector
-  . pool-allocates small objects, keeps big objects on a simple list
-*/
-
-#ifndef JL_GC_H
-#define JL_GC_H
-
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-#include <inttypes.h>
-#include "julia.h"
-#include "julia_threads.h"
-#include "julia_internal.h"
-#include "threading.h"
-#ifndef _OS_WINDOWS_
-#include <sys/mman.h>
-#if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-#endif
-#include "julia_assert.h"
-#include "gc-heap-snapshot.h"
-#include "gc-alloc-profiler.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define GC_PAGE_LG2 14 // log2(size of a page)
-#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k
-#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT))
-
-#define jl_malloc_tag ((void*)0xdeadaa01)
-#define jl_singleton_tag ((void*)0xdeadaa02)
-
-// Used by GC_DEBUG_ENV
-typedef struct {
-    uint64_t num;
-    uint64_t next;
-    uint64_t min;
-    uint64_t interv;
-    uint64_t max;
-    unsigned short random[3];
-} jl_alloc_num_t;
-
-typedef struct {
-    int always_full;
-    int wait_for_debugger;
-    jl_alloc_num_t pool;
-    jl_alloc_num_t other;
-    jl_alloc_num_t print;
-} jl_gc_debug_env_t;
-
-// This struct must be kept in sync with the Julia type of the same name in base/timing.jl
-typedef struct {
-    int64_t     allocd;
-    int64_t     deferred_alloc;
-    int64_t     freed;
-    uint64_t    malloc;
-    uint64_t    realloc;
-    uint64_t    poolalloc;
-    uint64_t    bigalloc;
-    uint64_t    freecall;
-    uint64_t    total_time;
-    uint64_t    total_allocd;
-    size_t      interval;
-    int         pause;
-    int         full_sweep;
-    uint64_t    max_pause;
-    uint64_t    max_memory;
-    uint64_t    time_to_safepoint;
-    uint64_t    max_time_to_safepoint;
-    uint64_t    total_time_to_safepoint;
-    uint64_t    sweep_time;
-    uint64_t    mark_time;
-    uint64_t    total_sweep_time;
-    uint64_t    total_mark_time;
-    uint64_t    last_full_sweep;
-    uint64_t    last_incremental_sweep;
-} jl_gc_num_t;
-
-// Array chunks (work items representing suffixes of
-// large arrays of pointers left to be marked)
-
-typedef enum {
-    GC_empty_chunk = 0, // for sentinel representing no items left in chunk queue
-    GC_objary_chunk,    // for chunk of object array
-    GC_ary8_chunk,      // for chunk of array with 8 bit field descriptors
-    GC_ary16_chunk,     // for chunk of array with 16 bit field descriptors
-    GC_finlist_chunk,   // for chunk of finalizer list
-} gc_chunk_id_t;
-
-typedef struct _jl_gc_chunk_t {
-    gc_chunk_id_t cid;
-    struct _jl_value_t *parent; // array owner
-    struct _jl_value_t **begin; // pointer to first element that needs scanning
-    struct _jl_value_t **end;   // pointer to last element that needs scanning
-    void *elem_begin;           // used to scan pointers within objects when marking `ary8` or `ary16`
-    void *elem_end;             // used to scan pointers within objects when marking `ary8` or `ary16`
-    uint32_t step;              // step-size used when marking objarray
-    uintptr_t nptr;             // (`nptr` & 0x1) if array has young element and (`nptr` & 0x2) if array owner is old
-} jl_gc_chunk_t;
-
-#define GC_CHUNK_BATCH_SIZE (1 << 16)       // maximum number of references that can be processed
-                                            // without creating a chunk
-
-#define GC_PTR_QUEUE_INIT_SIZE (1 << 18)    // initial size of queue of `jl_value_t *`
-#define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14)  // initial size of chunk-queue
-
-// layout for big (>2k) objects
-
-JL_EXTENSION typedef struct _bigval_t {
-    struct _bigval_t *next;
-    struct _bigval_t **prev; // pointer to the next field of the prev entry
-    size_t sz;
-#ifdef _P64 // Add padding so that the value is 64-byte aligned
-    // (8 pointers of 8 bytes each) - (4 other pointers in struct)
-    void *_padding[8 - 4];
-#else
-    // (16 pointers of 4 bytes each) - (4 other pointers in struct)
-    void *_padding[16 - 4];
-#endif
-    //struct jl_taggedvalue_t <>;
-    union {
-        uintptr_t header;
-        struct {
-            uintptr_t gc:2;
-        } bits;
-    };
-    // must be 64-byte aligned here, in 32 & 64 bit modes
-} bigval_t;
-
-// data structure for tracking malloc'd arrays.
-
-typedef struct _mallocarray_t {
-    jl_array_t *a;
-    struct _mallocarray_t *next;
-} mallocarray_t;
-
-// pool page metadata
-typedef struct _jl_gc_pagemeta_t {
-    struct _jl_gc_pagemeta_t *next;
-    // index of pool that owns this page
-    uint8_t pool_n;
-    // Whether any cell in the page is marked
-    // This bit is set before sweeping iff there are live cells in the page.
-    // Note that before marking or after sweeping there can be live
-    // (and young) cells in the page for `!has_marked`.
-    uint8_t has_marked;
-    // Whether any cell was live and young **before sweeping**.
-    // For a normal sweep (quick sweep that is NOT preceded by a
-    // full sweep) this bit is set iff there are young or newly dead
-    // objects in the page and the page needs to be swept.
-    //
-    // For a full sweep, this bit should be ignored.
-    //
-    // For a quick sweep preceded by a full sweep. If this bit is set,
-    // the page needs to be swept. If this bit is not set, there could
-    // still be old dead objects in the page and `nold` and `prev_nold`
-    // should be used to determine if the page needs to be swept.
-    uint8_t has_young;
-    // number of old objects in this page
-    uint16_t nold;
-    // number of old objects in this page during the previous full sweep
-    uint16_t prev_nold;
-    // number of free objects in this page.
-    // invalid if pool that owns this page is allocating objects from this page.
-    uint16_t nfree;
-    uint16_t osize;           // size of each object in this page
-    uint16_t fl_begin_offset; // offset of first free object in this page
-    uint16_t fl_end_offset;   // offset of last free object in this page
-    uint16_t thread_n;        // thread id of the heap that owns this page
-    char *data;
-} jl_gc_pagemeta_t;
-
-typedef struct {
-    _Atomic(jl_gc_pagemeta_t *) page_metadata_back;
-} jl_gc_global_page_pool_t;
-
-extern jl_gc_global_page_pool_t global_page_pool_lazily_freed;
-extern jl_gc_global_page_pool_t global_page_pool_clean;
-extern jl_gc_global_page_pool_t global_page_pool_freed;
-
-#define GC_BACKOFF_MIN 4
-#define GC_BACKOFF_MAX 12
-
-STATIC_INLINE void gc_backoff(int *i) JL_NOTSAFEPOINT
-{
-    if (*i < GC_BACKOFF_MAX) {
-        (*i)++;
-    }
-    for (int j = 0; j < (1 << *i); j++) {
-        jl_cpu_pause();
-    }
-}
-
-// Lock-free stack implementation taken
-// from Herlihy's "The Art of Multiprocessor Programming"
-
-STATIC_INLINE void push_lf_page_metadata_back(jl_gc_global_page_pool_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
-{
-    while (1) {
-        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back);
-        elt->next = old_back;
-        if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, elt)) {
-            break;
-        }
-        jl_cpu_pause();
-    }
-}
-
-STATIC_INLINE jl_gc_pagemeta_t *pop_lf_page_metadata_back(jl_gc_global_page_pool_t *pool) JL_NOTSAFEPOINT
-{
-    while (1) {
-        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back);
-        if (old_back == NULL) {
-            return NULL;
-        }
-        if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, old_back->next)) {
-            return old_back;
-        }
-        jl_cpu_pause();
-    }
-}
-
-#ifdef _P64
-#define REGION0_PG_COUNT (1 << 16)
-#define REGION1_PG_COUNT (1 << 16)
-#define REGION2_PG_COUNT (1 << 18)
-#define REGION0_INDEX(p) (((uintptr_t)(p) >> 14) & 0xFFFF) // shift by GC_PAGE_LG2
-#define REGION1_INDEX(p) (((uintptr_t)(p) >> 30) & 0xFFFF)
-#define REGION_INDEX(p)  (((uintptr_t)(p) >> 46) & 0x3FFFF)
-#else
-#define REGION0_PG_COUNT (1 << 8)
-#define REGION1_PG_COUNT (1 << 10)
-#define REGION2_PG_COUNT (1 << 0)
-#define REGION0_INDEX(p) (((uintptr_t)(p) >> 14) & 0xFF) // shift by GC_PAGE_LG2
-#define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF)
-#define REGION_INDEX(p)  (0)
-#endif
-
-// define the representation of the levels of the page-table (0 to 2)
-typedef struct {
-    uint8_t meta[REGION0_PG_COUNT];
-} pagetable0_t;
-
-typedef struct {
-    pagetable0_t *meta0[REGION1_PG_COUNT];
-} pagetable1_t;
-
-typedef struct {
-    pagetable1_t *meta1[REGION2_PG_COUNT];
-} pagetable_t;
-
-#define GC_PAGE_UNMAPPED        0
-#define GC_PAGE_ALLOCATED       1
-#define GC_PAGE_LAZILY_FREED    2
-#define GC_PAGE_FREED           3
-
-extern pagetable_t alloc_map;
-
-STATIC_INLINE uint8_t gc_alloc_map_is_set(char *_data) JL_NOTSAFEPOINT
-{
-    uintptr_t data = ((uintptr_t)_data);
-    unsigned i;
-    i = REGION_INDEX(data);
-    pagetable1_t *r1 = alloc_map.meta1[i];
-    if (r1 == NULL)
-        return 0;
-    i = REGION1_INDEX(data);
-    pagetable0_t *r0 = r1->meta0[i];
-    if (r0 == NULL)
-        return 0;
-    i = REGION0_INDEX(data);
-    return (r0->meta[i] == GC_PAGE_ALLOCATED);
-}
-
-STATIC_INLINE void gc_alloc_map_set(char *_data, uint8_t v) JL_NOTSAFEPOINT
-{
-    uintptr_t data = ((uintptr_t)_data);
-    unsigned i;
-    i = REGION_INDEX(data);
-    pagetable1_t *r1 = alloc_map.meta1[i];
-    assert(r1 != NULL);
-    i = REGION1_INDEX(data);
-    pagetable0_t *r0 = r1->meta0[i];
-    assert(r0 != NULL);
-    i = REGION0_INDEX(data);
-    r0->meta[i] = v;
-}
-
-STATIC_INLINE void gc_alloc_map_maybe_create(char *_data) JL_NOTSAFEPOINT
-{
-    uintptr_t data = ((uintptr_t)_data);
-    unsigned i;
-    i = REGION_INDEX(data);
-    pagetable1_t *r1 = alloc_map.meta1[i];
-    if (r1 == NULL) {
-        r1 = (pagetable1_t*)calloc_s(sizeof(pagetable1_t));
-        alloc_map.meta1[i] = r1;
-    }
-    i = REGION1_INDEX(data);
-    pagetable0_t *r0 = r1->meta0[i];
-    if (r0 == NULL) {
-        r0 = (pagetable0_t*)calloc_s(sizeof(pagetable0_t));
-        r1->meta0[i] = r0;
-    }
-}
-
-// Page layout:
-//  Metadata pointer: sizeof(jl_gc_pagemeta_t*)
-//  Padding: GC_PAGE_OFFSET - sizeof(jl_gc_pagemeta_t*)
-//  Blocks: osize * n
-//    Tag: sizeof(jl_taggedvalue_t)
-//    Data: <= osize - sizeof(jl_taggedvalue_t)
-
-STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT
-{
-    return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2);
-}
-
-STATIC_INLINE jl_gc_pagemeta_t *page_metadata_unsafe(void *_data) JL_NOTSAFEPOINT
-{
-    return *(jl_gc_pagemeta_t**)(gc_page_data(_data));
-}
-
-STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT
-{
-    if (!gc_alloc_map_is_set((char*)_data)) {
-        return NULL;
-    }
-    return page_metadata_unsafe(_data);
-}
-
-STATIC_INLINE void set_page_metadata(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
-{
-    *(jl_gc_pagemeta_t**)(pg->data) = pg;
-}
-
-STATIC_INLINE void push_page_metadata_back(jl_gc_pagemeta_t **ppg, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
-{
-    elt->next = *ppg;
-    *ppg = elt;
-}
-
-STATIC_INLINE jl_gc_pagemeta_t *pop_page_metadata_back(jl_gc_pagemeta_t **ppg) JL_NOTSAFEPOINT
-{
-    jl_gc_pagemeta_t *v = *ppg;
-    if (*ppg != NULL) {
-        *ppg = (*ppg)->next;
-    }
-    return v;
-}
-
-#ifdef __clang_gcanalyzer__ /* clang may not have __builtin_ffs */
-unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT;
-#else
-STATIC_INLINE unsigned ffs_u32(uint32_t bitvec)
-{
-    return __builtin_ffs(bitvec) - 1;
-}
-#endif
-
-extern jl_gc_num_t gc_num;
-extern bigval_t *big_objects_marked;
-extern arraylist_t finalizer_list_marked;
-extern arraylist_t to_finalize;
-extern int64_t lazy_freed_pages;
-extern int gc_first_tid;
-extern int gc_n_threads;
-extern jl_ptls_t* gc_all_tls_states;
-
-STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT
-{
-    return container_of(o, bigval_t, header);
-}
-
-STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
-{
-    return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset);
-}
-
-STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
-{
-    return (jl_taggedvalue_t*)(p->data + p->fl_end_offset);
-}
-
-STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
-{
-    return (bits & GC_MARKED) != 0;
-}
-
-STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT
-{
-    return (bits & GC_OLD) != 0;
-}
-
-STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT
-{
-    return (tag & ~(uintptr_t)3) | bits;
-}
-
-STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
-{
-    return ((uintptr_t)v) & mask;
-}
-
-STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
-{
-    return (void*)(((uintptr_t)v) & ~mask);
-}
-
-NOINLINE uintptr_t gc_get_stack_ptr(void);
-
-STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) JL_NOTSAFEPOINT
-{
-    *hdr->prev = hdr->next;
-    if (hdr->next) {
-        hdr->next->prev = hdr->prev;
-    }
-}
-
-STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFEPOINT
-{
-    hdr->next = *list;
-    hdr->prev = list;
-    if (*list)
-        (*list)->prev = &hdr->next;
-    *list = hdr;
-}
-
-extern uv_mutex_t gc_threads_lock;
-extern uv_cond_t gc_threads_cond;
-extern uv_sem_t gc_sweep_assists_needed;
-extern _Atomic(int) gc_n_threads_marking;
-void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
-void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
-void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
-void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
-void gc_mark_loop_serial(jl_ptls_t ptls);
-void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
-void sweep_stack_pools(void);
-void jl_gc_debug_init(void);
-
-// GC pages
-
-void jl_gc_init_page(void) JL_NOTSAFEPOINT;
-NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT;
-void jl_gc_free_page(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT;
-
-// GC debug
-
-#if defined(GC_TIME) || defined(GC_FINAL_STATS)
-void gc_settime_premark_end(void);
-void gc_settime_postmark_end(void);
-#else
-#define gc_settime_premark_end()
-#define gc_settime_postmark_end()
-#endif
-
-#ifdef GC_FINAL_STATS
-void gc_final_count_page(size_t pg_cnt);
-void gc_final_pause_end(int64_t t0, int64_t tend);
-#else
-#define gc_final_count_page(pg_cnt)
-#define gc_final_pause_end(t0, tend)
-#endif
-
-#ifdef GC_TIME
-void gc_time_pool_start(void) JL_NOTSAFEPOINT;
-void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT;
-void gc_time_pool_end(int sweep_full) JL_NOTSAFEPOINT;
-void gc_time_sysimg_end(uint64_t t0) JL_NOTSAFEPOINT;
-
-void gc_time_big_start(void) JL_NOTSAFEPOINT;
-void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT;
-void gc_time_big_end(void) JL_NOTSAFEPOINT;
-
-void gc_time_mallocd_array_start(void) JL_NOTSAFEPOINT;
-void gc_time_count_mallocd_array(int bits) JL_NOTSAFEPOINT;
-void gc_time_mallocd_array_end(void) JL_NOTSAFEPOINT;
-
-void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
-                        int64_t perm_scanned_bytes);
-void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
-                         int64_t live_bytes, int64_t estimate_freed,
-                         int sweep_full);
-void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
-                     uint64_t freed, uint64_t live, uint64_t interval,
-                     uint64_t pause, uint64_t ttsp, uint64_t mark,
-                     uint64_t sweep);
-#else
-#define gc_time_pool_start()
-STATIC_INLINE void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT
-{
-    (void)freedall;
-    (void)pg_skpd;
-}
-#define gc_time_pool_end(sweep_full) (void)(sweep_full)
-#define gc_time_sysimg_end(t0) (void)(t0)
-#define gc_time_big_start()
-STATIC_INLINE void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT
-{
-    (void)old_bits;
-    (void)bits;
-}
-#define gc_time_big_end()
-#define gc_time_mallocd_array_start()
-STATIC_INLINE void gc_time_count_mallocd_array(int bits) JL_NOTSAFEPOINT
-{
-    (void)bits;
-}
-#define gc_time_mallocd_array_end()
-#define gc_time_mark_pause(t0, scanned_bytes, perm_scanned_bytes)
-#define gc_time_sweep_pause(gc_end_t, actual_allocd, live_bytes,        \
-                            estimate_freed, sweep_full)
-#define  gc_time_summary(sweep_full, start, end, freed, live,           \
-                         interval, pause, ttsp, mark, sweep)
-#endif
-
-#ifdef MEMFENCE
-void gc_verify_tags(void);
-#else
-static inline void gc_verify_tags(void)
-{
-}
-#endif
-
-#ifdef GC_VERIFY
-extern jl_value_t *lostval;
-void gc_verify(jl_ptls_t ptls);
-void add_lostval_parent(jl_value_t *parent);
-#define verify_val(v) do {                                              \
-        if (lostval == (jl_value_t*)(v) && (v) != 0) {                  \
-            jl_printf(JL_STDOUT,                                        \
-                      "Found lostval %p at %s:%d oftype: ",             \
-                      (void*)(lostval), __FILE__, __LINE__);            \
-            jl_static_show(JL_STDOUT, jl_typeof(v));                    \
-            jl_printf(JL_STDOUT, "\n");                                 \
-        }                                                               \
-    } while(0);
-
-#define verify_parent(ty, obj, slot, args...) do {                      \
-        if (gc_ptr_clear_tag(*(void**)(slot), 3) == (void*)lostval &&   \
-            (jl_value_t*)(obj) != lostval) {                            \
-            jl_printf(JL_STDOUT, "Found parent %p %p at %s:%d\n",       \
-                      (void*)(ty), (void*)(obj), __FILE__, __LINE__);   \
-            jl_printf(JL_STDOUT, "\tloc %p : ", (void*)(slot));         \
-            jl_printf(JL_STDOUT, args);                                 \
-            jl_printf(JL_STDOUT, "\n");                                 \
-            jl_printf(JL_STDOUT, "\ttype: ");                           \
-            jl_static_show(JL_STDOUT, jl_typeof(obj));                  \
-            jl_printf(JL_STDOUT, "\n");                                 \
-            add_lostval_parent((jl_value_t*)(obj));                     \
-        }                                                               \
-    } while(0);
-
-#define verify_parent1(ty,obj,slot,arg1) verify_parent(ty,obj,slot,arg1)
-#define verify_parent2(ty,obj,slot,arg1,arg2) verify_parent(ty,obj,slot,arg1,arg2)
-extern int gc_verifying;
-#else
-#define gc_verify(ptls)
-#define verify_val(v)
-#define verify_parent1(ty,obj,slot,arg1) do {} while (0)
-#define verify_parent2(ty,obj,slot,arg1,arg2) do {} while (0)
-#define gc_verifying (0)
-#endif
-
-int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
-int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
-NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int offset) JL_NOTSAFEPOINT;
-
-#ifdef GC_DEBUG_ENV
-JL_DLLEXPORT extern jl_gc_debug_env_t jl_gc_debug_env;
-#define gc_sweep_always_full jl_gc_debug_env.always_full
-int jl_gc_debug_check_other(void);
-int gc_debug_check_pool(void);
-void jl_gc_debug_print(void);
-void gc_scrub_record_task(jl_task_t *ta) JL_NOTSAFEPOINT;
-void gc_scrub(void);
-#else
-#define gc_sweep_always_full 0
-static inline int jl_gc_debug_check_other(void)
-{
-    return 0;
-}
-static inline int gc_debug_check_pool(void)
-{
-    return 0;
-}
-static inline void jl_gc_debug_print(void)
-{
-}
-static inline void gc_scrub_record_task(jl_task_t *ta) JL_NOTSAFEPOINT
-{
-    (void)ta;
-}
-static inline void gc_scrub(void)
-{
-}
-#endif
-
-#ifdef OBJPROFILE
-void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT;
-void objprofile_printall(void);
-void objprofile_reset(void);
-#else
-static inline void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT
-{
-}
-
-static inline void objprofile_printall(void)
-{
-}
-
-static inline void objprofile_reset(void)
-{
-}
-#endif
-
-#ifdef MEMPROFILE
-void gc_stats_all_pool(void);
-void gc_stats_big_obj(void);
-#else
-#define gc_stats_all_pool()
-#define gc_stats_big_obj()
-#endif
-
-// For debugging
-void gc_count_pool(void);
-
-size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT;
-
-JL_DLLEXPORT void jl_enable_gc_logging(int enable);
-void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/gen_sysimg_symtab.jl b/src/gen_sysimg_symtab.jl
index 8f03cc1560767..110d83ba9083d 100644
--- a/src/gen_sysimg_symtab.jl
+++ b/src/gen_sysimg_symtab.jl
@@ -11,16 +11,10 @@ import Base.Iterators: take, drop
 function _eachmethod(f, m::Module, visited, vmt)
     push!(visited, m)
     for nm in names(m, all=true)
-        if isdefined(m, nm)
-            x = getfield(m, nm)
+        if isdefinedglobal(m, nm)
+            x = getglobal(m, nm)
             if isa(x, Module) && !in(x, visited)
                 _eachmethod(f, x, visited, vmt)
-            elseif isa(x, Function)
-                mt = typeof(x).name.mt
-                if !in(mt, vmt)
-                    push!(vmt, mt)
-                    Base.visit(f, mt)
-                end
             elseif isa(x, Type)
                 x = Base.unwrap_unionall(x)
                 if isa(x, DataType) && isdefined(x.name, :mt)
@@ -69,5 +63,5 @@ function outputline(io, name)
     println(io, "jl_symbol(\"", name, "\"),")
 end
 
-open(f->foreach(l->outputline(f,l), take(syms, 100)), "common_symbols1.inc", "w")
-open(f->foreach(l->outputline(f,l), take(drop(syms, 100), 254)), "common_symbols2.inc", "w")
+open(f->foreach(l->outputline(f,l), take(syms, 94)), "common_symbols1.inc", "w")
+open(f->foreach(l->outputline(f,l), take(drop(syms, 94), 254)), "common_symbols2.inc", "w")
diff --git a/src/genericmemory.c b/src/genericmemory.c
new file mode 100644
index 0000000000000..ae45237433fcc
--- /dev/null
+++ b/src/genericmemory.c
@@ -0,0 +1,594 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  GenericMemory{kind, T} constructors and primitives
+*/
+#include <stdlib.h>
+#include <string.h>
+#ifdef _OS_WINDOWS_
+#include <malloc.h>
+#endif
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// genericmemory constructors ---------------------------------------------------------
+JL_DLLEXPORT char *jl_genericmemory_typetagdata(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    assert(layout->flags.arrayelem_isunion);
+    return (char*)m->ptr + m->length * layout->size;
+}
+
+#define MAXINTVAL (((size_t)-1)>>1)
+
+// ONLY USE FROM CODEGEN. It only partially initializes the mem
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory_unchecked(jl_ptls_t ptls, size_t nbytes, jl_datatype_t *mtype)
+{
+    size_t tot = nbytes + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT);
+
+    int pooled = tot <= GC_MAX_SZCLASS;
+    char *data;
+    jl_genericmemory_t *m;
+    if (!pooled) {
+        data = (char*)jl_gc_managed_malloc(nbytes);
+        tot = sizeof(jl_genericmemory_t) + sizeof(void*);
+    }
+    m = (jl_genericmemory_t*)jl_gc_alloc(ptls, tot, mtype);
+    if (pooled) {
+        data = (char*)m + JL_SMALL_BYTE_ALIGNMENT;
+    }
+    else {
+        int isaligned = 1; // jl_gc_managed_malloc is always aligned
+        jl_gc_track_malloced_genericmemory(ptls, m, isaligned);
+        jl_genericmemory_data_owner_field(m) = (jl_value_t*)m;
+    }
+    // length set by codegen
+    m->ptr = data;
+    return m;
+}
+
+jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz)
+{
+    if (nel == 0) // zero-sized allocation optimization
+        return (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
+    size_t nbytes = 0; // initialized to workaround clang sa bug on v20: https://github.com/llvm/llvm-project/issues/136292
+    int overflow = __builtin_mul_overflow(nel, elsz, &nbytes);
+    if (isunion) {
+        // an extra byte for each isbits union memory element, stored at m->ptr + m->length
+        overflow |= __builtin_add_overflow(nel, nbytes, &nbytes);
+    }
+    if ((nel >= MAXINTVAL-1) || (nbytes >= MAXINTVAL-1) || overflow)
+        jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
+    jl_task_t *ct = jl_current_task;
+    jl_genericmemory_t *m = jl_alloc_genericmemory_unchecked((jl_ptls_t) ct->ptls, nbytes, (jl_datatype_t*)mtype);
+    m->length = nel;
+    if (zeroinit)
+        memset((char*)m->ptr, 0, nbytes);
+    return m;
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory(jl_value_t *mtype, size_t nel)
+{
+    assert(jl_is_datatype(mtype));
+    jl_genericmemory_t *m = (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    if (m == NULL) {
+        jl_value_t *kind = jl_tparam0((jl_datatype_t*)mtype);
+        if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+            jl_error("GenericMemory kind must be :not_atomic or :atomic");
+        jl_value_t *addrspace = jl_tparam2((jl_datatype_t*)mtype);
+        if (!jl_is_addrspacecore(addrspace) || jl_unbox_uint8(addrspace) != 0)
+            jl_error("GenericMemory addrspace must be Core.CPU");
+        if (!((jl_datatype_t*)mtype)->has_concrete_subtype || layout == NULL)
+            jl_type_error_rt("GenericMemory", "element type", (jl_value_t*)jl_type_type, jl_tparam1(mtype));
+        abort(); // this is checked already by jl_get_genericmemory_layout
+    }
+    assert(((jl_datatype_t*)mtype)->has_concrete_subtype && layout != NULL);
+    if (nel == 0) // zero-sized allocation optimization fast path
+        return m;
+
+    size_t elsz = layout->size;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    int zi = ((jl_datatype_t*)mtype)->zeroinit;
+    if (isboxed)
+        elsz = sizeof(void*);
+    return _new_genericmemory_(mtype, nel, isunion, zi, elsz);
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str)
+{
+    if (jl_string_len(str) == 0)
+        return (jl_genericmemory_t*)((jl_datatype_t*)jl_memory_uint8_type)->instance;
+    jl_task_t *ct = jl_current_task;
+    int tsz = sizeof(jl_genericmemory_t) + sizeof(void*);
+    jl_genericmemory_t *m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, jl_memory_uint8_type);
+    m->length = jl_string_len(str);
+    m->ptr = jl_string_data(str);
+    jl_genericmemory_data_owner_field(m) = str;
+    return m;
+}
+
+// own_buffer != 0 iff GC should call free() on this pointer eventually
+JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void *data,
+                                                         size_t nel, int own_buffer)
+{
+    jl_task_t *ct = jl_current_task;
+    assert(jl_is_datatype(mtype));
+    jl_genericmemory_t *m = (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    if (m == NULL) {
+        jl_value_t *kind = jl_tparam0((jl_datatype_t*)mtype);
+        if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+            jl_error("GenericMemory kind must be :not_atomic or :atomic");
+        jl_value_t *addrspace = jl_tparam2((jl_datatype_t*)mtype);
+        if (!jl_is_addrspacecore(addrspace) || jl_unbox_uint8(addrspace) != 0)
+            jl_error("GenericMemory addrspace must be Core.CPU");
+        if (!((jl_datatype_t*)mtype)->has_concrete_subtype || layout == NULL)
+            jl_type_error_rt("GenericMemory", "element type", (jl_value_t*)jl_type_type, jl_tparam1(mtype));
+        abort();
+    }
+    assert(((jl_datatype_t*)mtype)->has_concrete_subtype && layout != NULL);
+    //if (nel == 0) {// zero-sized allocation optimization fast path
+    //    if (own_buffer)
+    //        free(data);
+    //    return m;
+    //}
+
+    size_t elsz = layout->size;
+    size_t align = layout->alignment;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    if (isboxed)
+        elsz = sizeof(void*);
+    if (isunion)
+        jl_exceptionf(jl_argumenterror_type,
+                      "unsafe_wrap: unspecified layout for union element type");
+    if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
+        jl_exceptionf(jl_argumenterror_type,
+                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
+    size_t nbytes = 0; // initialized to workaround clang sa bug on v20: https://github.com/llvm/llvm-project/issues/136292
+    int overflow = __builtin_mul_overflow(nel, elsz, &nbytes);
+    if (isunion) {
+        // an extra byte for each isbits union memory element, stored at m->ptr + m->length
+        overflow |= __builtin_add_overflow(nel, nbytes, &nbytes);
+    }
+    if ((nel >= MAXINTVAL) || (nbytes >= MAXINTVAL) || overflow)
+        jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
+    int tsz = sizeof(jl_genericmemory_t) + sizeof(void*);
+    m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, mtype);
+    m->ptr = data;
+    m->length = nel;
+    jl_genericmemory_data_owner_field(m) = own_buffer ? (jl_value_t*)m : NULL;
+    if (own_buffer) {
+        int isaligned = 0;  // TODO: allow passing memalign'd buffers
+        jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned);
+        size_t allocated_bytes = memory_block_usable_size(data, isaligned);
+        jl_gc_count_allocd(allocated_bytes);
+    }
+    return m;
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_new_genericmemory(jl_value_t *mtype, jl_value_t *nel)
+{
+    return jl_alloc_genericmemory(mtype, jl_unbox_long(nel));
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_pchar_to_genericmemory(const char *str, size_t len)
+{
+    jl_genericmemory_t *m = jl_alloc_genericmemory(jl_memory_uint8_type, len);
+    memcpy(m->ptr, str, len);
+    return m;
+}
+
+JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_t len)
+{
+    assert(len <= m->length);
+    if (len == 0) {
+        // this may seem like purely an optimization (which it also is), but it
+        // also ensures that calling `String(m)` doesn't corrupt a previous
+        // string also created the same way, where `m = StringVector(_)`.
+        return jl_an_empty_string;
+    }
+    int how = jl_genericmemory_how(m);
+    size_t mlength = m->length;
+    if (how != 0) {
+        jl_value_t *o = jl_genericmemory_data_owner_field(m);
+        jl_genericmemory_data_owner_field(m) = NULL;
+        if (how == 3 && // implies jl_is_string(o)
+             ((mlength + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS))) {
+            if (jl_string_data(o)[len] != '\0')
+                jl_string_data(o)[len] = '\0';
+            if (*(size_t*)o != len)
+                *(size_t*)o = len;
+            return o;
+        }
+        JL_GC_PUSH1(&o);
+        jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len);
+        JL_GC_POP();
+        return str;
+    }
+    // n.b. how == 0 is always pool-allocated, so the freed bytes are computed from the pool not the object
+    return jl_pchar_to_string((const char*)m->ptr, len);
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n)
+{
+    return jl_alloc_genericmemory(jl_memory_any_type, n);
+}
+
+JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destdata,
+                                          jl_genericmemory_t *src, char* srcdata,
+                                          size_t n) JL_NOTSAFEPOINT
+{
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(dest);
+    if (dt != (jl_datatype_t*)jl_typetagof(src))
+        jl_exceptionf(jl_argumenterror_type, "jl_genericmemory_copyto requires source and dest to have same type");
+    const jl_datatype_layout_t *layout = dt->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        _Atomic(void*) * dest_p = (_Atomic(void*)*)destdata;
+        _Atomic(void*) * src_p = (_Atomic(void*)*)srcdata;
+        jl_value_t *owner = jl_genericmemory_owner(dest);
+        jl_gc_wb_genericmemory_copy_boxed(owner, dest_p, src, src_p, &n);
+        return memmove_refs(dest_p, src_p, n);
+    }
+    size_t elsz = layout->size;
+    char *src_p = srcdata;
+    int isbitsunion = layout->flags.arrayelem_isunion;
+    if (isbitsunion) {
+        char *sourcetypetagdata = jl_genericmemory_typetagdata(src);
+        char *desttypetagdata = jl_genericmemory_typetagdata(dest);
+        memmove(desttypetagdata+(size_t)destdata, sourcetypetagdata+(size_t)srcdata, n);
+        srcdata = (char*)src->ptr + elsz*(size_t)srcdata;
+        destdata = (char*)dest->ptr + elsz*(size_t)destdata;
+    }
+    if (layout->first_ptr != -1) {
+        memmove_refs((_Atomic(void*)*)destdata, (_Atomic(void*)*)srcdata, n * elsz / sizeof(void*));
+        jl_value_t *owner = jl_genericmemory_owner(dest);
+        jl_gc_wb_genericmemory_copy_ptr(owner, src, src_p, n, dt);
+    }
+    else {
+        memmove(destdata, srcdata, n * elsz);
+    }
+}
+
+
+// genericmemory primitives -----------------------------------------------------------
+
+JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *mem, size_t i)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(mem))->layout;
+    int isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked;
+    jl_genericmemoryref_t m;
+    m.mem = mem;
+    m.ptr_or_offset = (layout->flags.arrayelem_isunion || layout->size == 0) ? (void*)i : (void*)((char*)mem->ptr + layout->size * i);
+    return jl_memoryrefget(m, isatomic);
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy_slice(jl_genericmemory_t *mem, void *data, size_t len)
+{
+    jl_value_t *mtype = (jl_value_t*)jl_typetagof(mem);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    size_t elsz = layout->size;
+    int isunion = layout->flags.arrayelem_isunion;
+    jl_genericmemory_t *new_mem = _new_genericmemory_(mtype, len, isunion, 0, elsz);
+    if (isunion) {
+        memcpy(new_mem->ptr, (char*)mem->ptr + (size_t)data * elsz, len * elsz);
+        memcpy(jl_genericmemory_typetagdata(new_mem), jl_genericmemory_typetagdata(mem) + (size_t)data, len);
+    }
+    else if (layout->first_ptr != -1) {
+        if (data == NULL) {
+            assert(len * elsz / sizeof(void*) == 0); // make static analyzer happy
+        }
+        memmove_refs((_Atomic(void*)*)new_mem->ptr, (_Atomic(void*)*)data, len * elsz / sizeof(void*));
+    }
+    else if (data != NULL) {
+        memcpy(new_mem->ptr, data, len * elsz);
+    }
+    return new_mem;
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy(jl_genericmemory_t *mem)
+{
+    jl_value_t *mtype = (jl_value_t*)jl_typetagof(mem);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    return jl_genericmemory_copy_slice(mem, layout->flags.arrayelem_isunion || layout->size == 0 ? (void*)0 : mem->ptr, mem->length);
+}
+
+JL_DLLEXPORT jl_value_t *(jl_genericmemory_data_owner)(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    return jl_genericmemory_data_owner_field(m);
+}
+
+jl_genericmemoryref_t *jl_new_memoryref(jl_value_t *typ, jl_genericmemory_t *mem, void *data)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_genericmemoryref_t *m = (jl_genericmemoryref_t*)jl_gc_alloc(ct->ptls, sizeof(jl_genericmemoryref_t), typ);
+    m->mem = mem;
+    m->ptr_or_offset = data;
+    return m;
+}
+
+// memoryref primitives
+JL_DLLEXPORT jl_genericmemoryref_t jl_memoryrefindex(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, size_t idx)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    if ((layout->flags.arrayelem_isboxed || !layout->flags.arrayelem_isunion) && layout->size != 0) {
+        m.ptr_or_offset = (void*)((char*)m.ptr_or_offset + idx * layout->size);
+        assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < layout->size * m.mem->length);
+    }
+    else {
+        m.ptr_or_offset = (void*)((size_t)m.ptr_or_offset + idx);
+        assert((size_t)m.ptr_or_offset < m.mem->length);
+    }
+    return m;
+}
+
+static jl_value_t *jl_ptrmemrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, int isatomic) JL_NOTSAFEPOINT
+{
+    assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+    assert(((jl_datatype_t*)jl_typetagof(m.mem))->layout->flags.arrayelem_isboxed);
+    _Atomic(jl_value_t*) *ptr = (_Atomic(jl_value_t*)*)m.ptr_or_offset;
+    jl_value_t *elt = isatomic ? jl_atomic_load(ptr) : jl_atomic_load_relaxed(ptr);
+    if (elt == NULL)
+        jl_throw(jl_undefref_exception);
+    return elt;
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m, int isatomic)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    assert(isatomic == (layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked));
+    if (layout->flags.arrayelem_isboxed)
+        return jl_ptrmemrefget(m, isatomic);
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        // isbits union selector bytes are always stored directly after the last memory element
+        uint8_t sel = jl_genericmemory_typetagdata(m.mem)[i];
+        eltype = jl_nth_union_component(eltype, sel);
+        data = (char*)m.mem->ptr + i * layout->size;
+    }
+    if (layout->size == 0) {
+        assert(jl_is_datatype_singleton((jl_datatype_t*)eltype));
+        return ((jl_datatype_t*)eltype)->instance;
+    }
+    assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
+    jl_value_t *r;
+    size_t fsz = jl_datatype_size(eltype);
+    int needlock = layout->flags.arrayelem_islocked;
+    if (isatomic && !needlock) {
+        r = jl_atomic_new_bits(eltype, data);
+    }
+    else if (needlock) {
+        jl_task_t *ct = jl_current_task;
+        r = jl_gc_alloc(ct->ptls, fsz, eltype);
+        jl_lock_field((jl_mutex_t*)data);
+        memcpy((char*)r, data + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT), fsz);
+        jl_unlock_field((jl_mutex_t*)data);
+    }
+    else {
+        // TODO: a finalizer here could make the isunion case not quite right
+        r = jl_new_bits(eltype, data);
+    }
+    r = undefref_check((jl_datatype_t*)eltype, r);
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
+}
+
+static int _jl_memoryref_isassigned(jl_genericmemoryref_t m, int isatomic)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    _Atomic(jl_value_t*) *elem = (_Atomic(jl_value_t*)*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isboxed) {
+    }
+    else if (layout->first_ptr >= 0) {
+        elem = &elem[layout->first_ptr];
+    }
+    else {
+        return 1;
+    }
+    return (isatomic ? jl_atomic_load(elem) : jl_atomic_load_relaxed(elem)) != NULL;
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryref_isassigned(jl_genericmemoryref_t m, int isatomic)
+{
+    return _jl_memoryref_isassigned(m, isatomic) ? jl_true : jl_false;
+}
+
+JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, int isatomic)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    assert(isatomic == (layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked));
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        JL_GC_PUSH1(&rhs);
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("memoryrefset!", eltype, rhs);
+        JL_GC_POP();
+    }
+    if (layout->flags.arrayelem_isboxed) {
+        assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        if (isatomic)
+            jl_atomic_store((_Atomic(jl_value_t*)*)m.ptr_or_offset, rhs);
+        else
+            jl_atomic_store_release((_Atomic(jl_value_t*)*)m.ptr_or_offset, rhs);
+        jl_gc_wb(jl_genericmemory_owner(m.mem), rhs);
+        return;
+    }
+    int hasptr;
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        uint8_t *psel = (uint8_t*)jl_genericmemory_typetagdata(m.mem) + i;
+        unsigned nth = 0;
+        if (!jl_find_union_component(eltype, jl_typeof(rhs), &nth))
+            assert(0 && "invalid genericmemoryset to isbits union");
+        *psel = nth;
+        hasptr = 0;
+        data = (char*)m.mem->ptr + i * layout->size;
+    }
+    else {
+        hasptr = layout->first_ptr >= 0;
+    }
+    if (layout->size != 0) {
+        assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
+        int needlock = layout->flags.arrayelem_islocked;
+        size_t fsz = jl_datatype_size((jl_datatype_t*)jl_typeof(rhs)); // need to shrink-wrap the final copy
+        if (isatomic && !needlock) {
+            jl_atomic_store_bits(data, rhs, fsz);
+        }
+        else if (needlock) {
+            jl_lock_field((jl_mutex_t*)data);
+            memassign_safe(hasptr, data + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT), rhs, fsz);
+            jl_unlock_field((jl_mutex_t*)data);
+        }
+        else {
+            memassign_safe(hasptr, data, rhs, fsz);
+        }
+        if (hasptr)
+            jl_gc_multi_wb(jl_genericmemory_owner(m.mem), rhs); // rhs is immutable
+    }
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefswap(jl_genericmemoryref_t m, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("memoryrefswap!", eltype, rhs);
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    jl_value_t *owner = jl_genericmemory_owner(m.mem);
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isboxed) {
+        assert(data - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        jl_value_t *r;
+        if (isatomic)
+            r = jl_atomic_exchange((_Atomic(jl_value_t*)*)data, rhs);
+        else
+            r = jl_atomic_exchange_release((_Atomic(jl_value_t*)*)data, rhs);
+        jl_gc_wb(owner, rhs);
+        if (__unlikely(r == NULL))
+            jl_throw(jl_undefref_exception);
+        return r;
+    }
+    uint8_t *psel = NULL;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        psel = (uint8_t*)jl_genericmemory_typetagdata(m.mem) + i;
+        data = (char*)m.mem->ptr + i * layout->size;
+    }
+    return swap_bits(eltype, data, psel, owner, rhs, isatomic ? isatomic_field : isatomic_none);
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefmodify(jl_genericmemoryref_t m, jl_value_t *op, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    jl_value_t *owner = jl_genericmemory_owner(m.mem);
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isboxed) {
+        assert(data - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        return modify_value(eltype, (_Atomic(jl_value_t*)*)data, owner, op, rhs, isatomic, NULL, NULL, NULL);
+    }
+    size_t fsz = layout->size;
+    uint8_t *psel = NULL;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        psel = (uint8_t*)jl_genericmemory_typetagdata(m.mem) + i;
+        data = (char*)m.mem->ptr + i * fsz;
+    }
+    return modify_bits(eltype, data, psel, owner, op, rhs, isatomic ? isatomic_field : isatomic_none);
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefreplace(jl_genericmemoryref_t m, jl_value_t *expected, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("memoryrefreplace!", eltype, rhs);
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    jl_value_t *owner = jl_genericmemory_owner(m.mem);
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isboxed) {
+        assert(data - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        return replace_value(eltype, (_Atomic(jl_value_t*)*)data, owner, expected, rhs, isatomic, NULL, NULL);
+    }
+    uint8_t *psel = NULL;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        psel = (uint8_t*)jl_genericmemory_typetagdata(m.mem) + i;
+        data = (char*)m.mem->ptr + i * layout->size;
+    }
+    return replace_bits(eltype, data, psel, owner, expected, rhs, isatomic ? isatomic_field : isatomic_none);
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefsetonce(jl_genericmemoryref_t m, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("memoryrefsetonce!", eltype, rhs);
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    jl_value_t *owner = jl_genericmemory_owner(m.mem);
+    char *data = (char*)m.ptr_or_offset;
+    int success;
+    if (layout->flags.arrayelem_isboxed) {
+        assert(data - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        jl_value_t *r = NULL;
+        _Atomic(jl_value_t*) *px = (_Atomic(jl_value_t*)*)data;
+        success = isatomic ? jl_atomic_cmpswap(px, &r, rhs) : jl_atomic_cmpswap_release(px, &r, rhs);
+        if (success)
+            jl_gc_wb(owner, rhs);
+    }
+    else {
+        if (layout->flags.arrayelem_isunion) {
+            assert(!isatomic);
+            assert(jl_is_uniontype(eltype));
+            size_t i = (size_t)data;
+            assert(i < m.mem->length);
+            (void)i;
+            success = 0;
+        }
+        else if (layout->first_ptr < 0) {
+            success = 0;
+        }
+        else {
+            success = setonce_bits((jl_datatype_t*)eltype, data, owner, rhs, isatomic ? isatomic_field : isatomic_none);
+        }
+    }
+    return success ? jl_true : jl_false;
+}
+
+JL_DLLEXPORT jl_value_t *ijl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return jl_genericmemory_owner(m);
+}
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gf.c b/src/gf.c
index 294e1fccb8783..1d3a9636ddfa9 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -24,7 +24,10 @@
 extern "C" {
 #endif
 
+_Atomic(int) allow_new_worlds = 1;
 JL_DLLEXPORT _Atomic(size_t) jl_world_counter = 1; // uses atomic acquire/release
+jl_mutex_t world_counter_lock;
+
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
@@ -39,31 +42,41 @@ JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT
 }
 
 // Compute the maximum number of times to unroll Varargs{T}, based on
-// m->max_varargs (if specified) or a heuristic based on the maximum
-// number of non-varargs arguments in the provided method table.
+// m->max_varargs (if specified) or a heuristic based on the maximum number of
+// non-varargs arguments for the function type of the method signature.
 //
 // If provided, `may_increase` is set to 1 if the returned value is
 // heuristic-based and has a chance of increasing in the future.
 static size_t get_max_varargs(
         jl_method_t *m,
-        jl_methtable_t *kwmt,
-        jl_methtable_t *mt,
         uint8_t *may_increase) JL_NOTSAFEPOINT
 {
     size_t max_varargs = 1;
     if (may_increase != NULL)
         *may_increase = 0;
 
-    if (m->max_varargs != UINT8_MAX)
+    if (m->max_varargs != UINT8_MAX) {
         max_varargs = m->max_varargs;
-    else if (kwmt != NULL && kwmt != jl_type_type_mt && kwmt != jl_nonfunction_mt && kwmt != jl_kwcall_mt) {
-        if (may_increase != NULL)
-            *may_increase = 1; // `max_args` can increase as new methods are inserted
-
-        max_varargs = jl_atomic_load_relaxed(&kwmt->max_args) + 2;
-        if (mt == jl_kwcall_mt)
-            max_varargs += 2;
-        max_varargs -= m->nargs;
+    }
+    else {
+        jl_datatype_t *dt1 = jl_nth_argument_datatype(m->sig, 1);
+        jl_datatype_t *dt;
+        if (jl_kwcall_type && dt1 == jl_kwcall_type)
+            dt = jl_nth_argument_datatype(m->sig, 3);
+        else
+            dt = dt1;
+        if (dt != NULL && !jl_is_type_type((jl_value_t*)dt) && dt != jl_kwcall_type) {
+            if (may_increase != NULL)
+                *may_increase = 1; // `max_args` can increase as new methods are inserted
+
+            max_varargs = jl_atomic_load_relaxed(&dt->name->max_args) + 2;
+            if (jl_kwcall_type && dt1 == jl_kwcall_type)
+                max_varargs += 2;
+            if (max_varargs > m->nargs)
+                max_varargs -= m->nargs;
+            else
+                max_varargs = 0;
+        }
     }
     return max_varargs;
 }
@@ -94,33 +107,35 @@ void jl_call_tracer(tracer_cb callback, jl_value_t *tracee)
     JL_CATCH {
         ct->ptls->in_pure_callback = last_in;
         jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: tracer callback function threw an error:\n");
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-        jlbacktrace(); // written to STDERR_FILENO
+        jl_fprint_backtrace(ios_safe_stderr);
     }
 }
 
 /// ----- Definitions for various internal TypeMaps ----- ///
 
-static int8_t jl_cachearg_offset(jl_methtable_t *mt)
+static int8_t jl_cachearg_offset(void)
 {
-    return mt->offs;
+    return 0;
 }
 
 /// ----- Insertion logic for special entries ----- ///
 
 
-static uint_t speccache_hash(size_t idx, jl_svec_t *data)
+uint_t speccache_hash(size_t idx, jl_value_t *data)
 {
-    jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
+    jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx); // This must always happen inside the lock
     jl_value_t *sig = ml->specTypes;
     if (jl_is_unionall(sig))
         sig = jl_unwrap_unionall(sig);
     return ((jl_datatype_t*)sig)->hash;
 }
 
-static int speccache_eq(size_t idx, const void *ty, jl_svec_t *data, uint_t hv)
+static int speccache_eq(size_t idx, const void *ty, jl_value_t *data, uint_t hv)
 {
+    if (idx >= jl_svec_len(data))
+        return 0; // We got a OOB access, probably due to a data race
     jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
     jl_value_t *sig = ml->specTypes;
     if (ty == sig)
@@ -134,12 +149,12 @@ static int speccache_eq(size_t idx, const void *ty, jl_svec_t *data, uint_t hv)
 // get or create the MethodInstance for a specialization
 static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams, jl_method_instance_t *mi_insert)
 {
-    if (m->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&m->unspecialized) != NULL && m != jl_opaque_closure_method)
+    if (m->source == NULL && m->generator == NULL && jl_atomic_load_relaxed(&m->unspecialized) != NULL && m != jl_opaque_closure_method && !m->is_for_opaque_closure)
         return jl_atomic_load_relaxed(&m->unspecialized); // handle builtin methods
     jl_value_t *ut = jl_is_unionall(type) ? jl_unwrap_unionall(type) : type;
     JL_TYPECHK(specializations, datatype, ut);
     uint_t hv = ((jl_datatype_t*)ut)->hash;
-    jl_array_t *speckeyset = NULL;
+    jl_genericmemory_t *speckeyset = NULL;
     jl_value_t *specializations = NULL;
     size_t i = -1, cl = 0, lastcl;
     for (int locked = 0; locked < 2; locked++) {
@@ -164,7 +179,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
         }
         cl = jl_svec_len(specializations);
         if (hv) {
-            ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, (jl_svec_t*)specializations, hv);
+            ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, specializations, hv, 0);
             if (idx != -1) {
                 jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, idx);
                 if (locked)
@@ -210,7 +225,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
             jl_atomic_store_release(&m->specializations, specializations);
             jl_gc_wb(m, specializations);
             if (hv)
-                jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, 0, (jl_svec_t*)specializations);
+                jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, 0, specializations);
         }
         if (hv) {
             _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
@@ -242,7 +257,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
         assert(jl_svecref(specializations, i) == jl_nothing);
         jl_svecset(specializations, i, mi);
         if (hv)
-            jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, (jl_svec_t*)specializations);
+            jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, specializations);
         JL_GC_POP();
     }
     JL_UNLOCK(&m->writelock); // may gc
@@ -270,13 +285,13 @@ JL_DLLEXPORT jl_value_t *jl_specializations_lookup(jl_method_t *m, jl_value_t *t
     return mi;
 }
 
-JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *type, size_t world)
+JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_value_t *type, size_t world)
 {
     // TODO: this is sort of an odd lookup strategy (and the only user of
     // jl_typemap_assoc_by_type with subtype=0), while normally jl_gf_invoke_lookup would be
     // expected to be used instead
-    struct jl_typemap_assoc search = {type, world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *sf = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, jl_cachearg_offset(mt), /*subtype*/0);
+    struct jl_typemap_assoc search = {type, world, NULL};
+    jl_typemap_entry_t *sf = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&jl_method_table->defs), &search, 0, /*subtype*/0);
     if (!sf)
         return jl_nothing;
     return sf->func.value;
@@ -284,68 +299,119 @@ JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *typ
 
 // ----- MethodInstance specialization instantiation ----- //
 
-JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
-        jl_method_instance_t *mi, jl_value_t *rettype,
-        jl_value_t *inferred_const, jl_value_t *inferred,
-        int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
-        uint8_t relocatability);
-
-jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED
+jl_method_t *jl_mk_builtin_func(jl_datatype_t *dt, jl_sym_t *sname, jl_fptr_args_t fptr) JL_GC_DISABLED
 {
-    jl_sym_t *sname = jl_symbol(name);
-    if (dt == NULL) {
-        jl_value_t *f = jl_new_generic_function_with_supertype(sname, jl_core_module, jl_builtin_type);
-        jl_set_const(jl_core_module, sname, f);
-        dt = (jl_datatype_t*)jl_typeof(f);
-    }
+    jl_value_t *params[2];
+    params[0] = dt->name->wrapper;
+    params[1] = jl_tparam0(jl_anytuple_type);
+    jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2);
+
+    jl_typemap_entry_t *newentry = NULL;
+    jl_method_t *m = NULL;
+    JL_GC_PUSH3(&m, &newentry, &tuptyp);
 
-    jl_method_t *m = jl_new_method_uninit(jl_core_module);
+    m = jl_new_method_uninit(jl_core_module);
     m->name = sname;
     m->module = jl_core_module;
     m->isva = 1;
     m->nargs = 2;
-    m->sig = (jl_value_t*)jl_anytuple_type;
+    jl_atomic_store_relaxed(&m->primary_world, 1);
+    jl_atomic_store_relaxed(&m->dispatch_status, METHOD_SIG_LATEST_ONLY | METHOD_SIG_LATEST_WHICH);
+    m->sig = (jl_value_t*)tuptyp;
     m->slot_syms = jl_an_empty_string;
     m->nospecialize = 0;
     m->nospecialize = ~m->nospecialize;
 
-    jl_methtable_t *mt = dt->name->mt;
-    jl_typemap_entry_t *newentry = NULL;
-    JL_GC_PUSH2(&m, &newentry);
-
-    newentry = jl_typemap_alloc(jl_anytuple_type, NULL, jl_emptysvec,
-            (jl_value_t*)m, 1, ~(size_t)0);
-    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, jl_cachearg_offset(mt));
-
-    jl_method_instance_t *mi = jl_get_specialized(m, (jl_value_t*)jl_anytuple_type, jl_emptysvec);
+    jl_method_instance_t *mi = jl_get_specialized(m, (jl_value_t*)tuptyp, jl_emptysvec);
     jl_atomic_store_relaxed(&m->unspecialized, mi);
     jl_gc_wb(m, mi);
 
-    jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-        (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
-        0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-    jl_mi_cache_insert(mi, codeinst);
+    jl_debuginfo_t *di = NULL;
+    jl_svec_t *edges = jl_emptysvec;
+    jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
+        (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
+        0, 1, ~(size_t)0, 0, jl_nothing, di, edges);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr1, fptr);
     jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_args);
+    jl_mi_cache_insert(mi, codeinst);
+
+    newentry = jl_typemap_alloc(tuptyp, NULL, jl_emptysvec,
+            (jl_value_t*)m, 1, ~(size_t)0);
+    jl_typemap_insert(&jl_method_table->defs, (jl_value_t*)jl_method_table, newentry, 0);
 
-    newentry = jl_typemap_alloc(jl_anytuple_type, NULL, jl_emptysvec,
+    newentry = jl_typemap_alloc(tuptyp, NULL, jl_emptysvec,
             (jl_value_t*)mi, 1, ~(size_t)0);
-    jl_typemap_insert(&mt->cache, (jl_value_t*)mt, newentry, 0);
+    jl_typemap_insert(&jl_method_table->cache->cache, (jl_value_t*)jl_method_table->cache, newentry, 0);
 
-    mt->frozen = 1;
     JL_GC_POP();
-    return dt;
+    return m;
+}
+
+// only relevant for bootstrapping. otherwise fairly broken.
+static int emit_codeinst_and_edges(jl_code_instance_t *codeinst)
+{
+    jl_value_t *code = jl_atomic_load_relaxed(&codeinst->inferred);
+    if (code) {
+        if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL)
+            return 1;
+        if (code != jl_nothing) {
+            JL_GC_PUSH1(&code);
+            jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+            jl_method_t *def = mi->def.method;
+            if (jl_is_method(def))
+                code = (jl_value_t*)jl_uncompress_ir(def, codeinst, (jl_value_t*)code);
+            if (jl_is_code_info(code)) {
+                jl_emit_codeinst_to_jit(codeinst, (jl_code_info_t*)code);
+                if (0) {
+                    // next emit all the invoke edges too (if this seems profitable)
+                    jl_array_t *src = ((jl_code_info_t*)code)->code;
+                    for (size_t i = 0; i < jl_array_dim0(src); i++) {
+                        jl_value_t *stmt = jl_array_ptr_ref(src, i);
+                        if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_assign_sym)
+                            stmt = jl_exprarg(stmt, 1);
+                        if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_invoke_sym) {
+                            jl_value_t *invoke = jl_exprarg(stmt, 0);
+                            if (jl_is_code_instance(invoke))
+                                emit_codeinst_and_edges((jl_code_instance_t*)invoke);
+                        }
+                    }
+                }
+                JL_GC_POP();
+                return 1;
+            }
+            JL_GC_POP();
+        }
+    }
+    return 0;
+}
+
+// Opportunistic SOURCE_MODE_ABI cache lookup, only for bootstrapping.
+static jl_code_instance_t *jl_method_inferred_with_abi(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+{
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
+    for (; codeinst; codeinst = jl_atomic_load_relaxed(&codeinst->next)) {
+        if (codeinst->owner != jl_nothing)
+            continue;
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= world && world <= jl_atomic_load_relaxed(&codeinst->max_world)) {
+            if (emit_codeinst_and_edges(codeinst))
+                return codeinst;
+        }
+    }
+    return NULL;
 }
 
 // run type inference on lambda "mi" for given argument types.
 // returns the inferred source, and may cache the result in mi
 // if successful, also updates the mi argument to describe the validity of this src
 // if inference doesn't occur (or can't finish), returns NULL instead
-jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
+jl_code_instance_t *jl_type_infer(jl_method_instance_t *mi, size_t world, uint8_t source_mode, uint8_t trim_mode)
 {
-    if (jl_typeinf_func == NULL)
-        return NULL;
+    if (jl_typeinf_func == NULL) {
+        if (source_mode == SOURCE_MODE_ABI)
+            return jl_method_inferred_with_abi(mi, world);
+        else
+            return NULL;
+    }
     jl_task_t *ct = jl_current_task;
     if (ct->reentrant_timing & 0b1000) {
         // We must avoid attempting to re-enter inference here
@@ -356,16 +422,22 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
     if ((ct->reentrant_timing & 0b1111) >= 0b110)
         return NULL;
 
-    jl_code_info_t *src = NULL;
+    jl_code_instance_t *ci = NULL;
 #ifdef ENABLE_INFERENCE
-    if (mi->inInference && !force)
+    if (jl_engine_hasreserved(mi, jl_nothing)) // don't recur on a thread on the same MethodInstance--force it to interpret it until the inference has finished
         return NULL;
     JL_TIMING(INFERENCE, INFERENCE);
     jl_value_t **fargs;
-    JL_GC_PUSHARGS(fargs, 3);
+    JL_GC_PUSHARGS(fargs, 5);
     fargs[0] = (jl_value_t*)jl_typeinf_func;
     fargs[1] = (jl_value_t*)mi;
     fargs[2] = jl_box_ulong(world);
+    fargs[3] = jl_box_uint8(source_mode);
+    fargs[4] = jl_box_uint8(trim_mode);
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
 
     jl_timing_show_method_instance(mi, JL_TIMING_DEFAULT_BLOCK);
 #ifdef TRACE_INFERENCE
@@ -375,13 +447,10 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
         jl_printf(JL_STDERR, "\n");
     }
 #endif
-    int last_errno = errno;
-#ifdef _OS_WINDOWS_
-    DWORD last_error = GetLastError();
-#endif
+    int last_pure = ct->ptls->in_pure_callback;
+    ct->ptls->in_pure_callback = 0;
     size_t last_age = ct->world_age;
     ct->world_age = jl_typeinf_world;
-    mi->inInference = 1;
     // first bit is for reentrant timing,
     // so adding 1 to the bit above performs
     // inference reentrancy counter addition.
@@ -391,39 +460,90 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
     // allocate another bit for the counter.
     ct->reentrant_timing += 0b10;
     JL_TRY {
-        src = (jl_code_info_t*)jl_apply(fargs, 3);
+        ci = (jl_code_instance_t*)jl_apply(fargs, 5);
     }
     JL_CATCH {
-        jl_value_t *e = jl_current_exception();
+        jl_value_t *e = jl_current_exception(ct);
+        jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: during type inference of\n");
+        jl_static_show_func_sig((JL_STREAM*)STDERR_FILENO, (jl_value_t*)mi->specTypes);
+        jl_printf((JL_STREAM*)STDERR_FILENO, "\nEncountered ");
         if (e == jl_stackovf_exception) {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: stack overflow in type inference of ");
-            jl_static_show_func_sig((JL_STREAM*)STDERR_FILENO, (jl_value_t*)mi->specTypes);
-            jl_printf((JL_STREAM*)STDERR_FILENO, ".\n");
+            jl_printf((JL_STREAM*)STDERR_FILENO, "stack overflow.\n");
             jl_printf((JL_STREAM*)STDERR_FILENO, "This might be caused by recursion over very long tuples or argument lists.\n");
         }
         else {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error in runtime:\n");
+            jl_printf((JL_STREAM*)STDERR_FILENO, "unexpected error in runtime:\n");
             jl_static_show((JL_STREAM*)STDERR_FILENO, e);
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-            jlbacktrace(); // written to STDERR_FILENO
+            jl_fprint_backtrace(ios_safe_stderr);
         }
-        src = NULL;
+        ci = NULL;
+#ifndef JL_NDEBUG
+        abort();
+#endif
     }
     ct->world_age = last_age;
     ct->reentrant_timing -= 0b10;
-    mi->inInference = 0;
+    ct->ptls->in_pure_callback = last_pure;
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
     errno = last_errno;
 
-    if (src && !jl_is_code_info(src)) {
-        src = NULL;
+    if (ci && !jl_is_code_instance(ci)) {
+        ci = NULL;
+    }
+
+    // Record inference entrance backtrace if enabled
+    if (ci) {
+        JL_GC_PUSH1(&ci);
+        jl_push_inference_entrance_backtraces((jl_value_t*)ci);
+        JL_GC_POP();
     }
+
     JL_GC_POP();
 #endif
 
-    return src;
+    return ci;
+}
+
+// Attempt to run `Core.Compiler.code_typed` on the lambda "mi"
+JL_DLLEXPORT jl_code_info_t *jl_gdbcodetyped1(jl_method_instance_t *mi, size_t world)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_code_info_t *ci = NULL;
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    int last_pure = ct->ptls->in_pure_callback;
+    ct->ptls->in_pure_callback = 0;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_typeinf_world;
+    jl_value_t **fargs;
+    JL_GC_PUSHARGS(fargs, 4);
+    jl_module_t *CC = (jl_module_t*)jl_get_global_value(jl_core_module, jl_symbol("Compiler"), ct->world_age);
+    if (CC != NULL && jl_is_module(CC)) {
+        JL_GC_PROMISE_ROOTED(CC);
+        fargs[0] = jl_get_global_value(CC, jl_symbol("NativeInterpreter"), ct->world_age);
+        fargs[1] = jl_box_ulong(world);
+        fargs[1] = jl_apply(fargs, 2);
+        fargs[0] = jl_get_global_value(CC, jl_symbol("typeinf_code"), ct->world_age);
+        fargs[2] = (jl_value_t*)mi;
+        fargs[3] = jl_true;
+        ci = (jl_code_info_t*)jl_apply(fargs, 4);
+    }
+    ct->world_age = last_age;
+    ct->ptls->in_pure_callback = last_pure;
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+    if (ci && !jl_is_code_info(ci)) {
+        ci = NULL;
+    }
+    JL_GC_POP();
+    return ci;
 }
 
 JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs)
@@ -431,81 +551,198 @@ JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs)
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
     ct->world_age = jl_typeinf_world;
+    int last_pure = ct->ptls->in_pure_callback;
+    ct->ptls->in_pure_callback = 0;
     jl_value_t *ret = jl_apply(args, nargs);
+    ct->ptls->in_pure_callback = last_pure;
     ct->world_age = last_age;
     return ret;
 }
 
-JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
+        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
+        size_t min_world, size_t max_world, jl_debuginfo_t *di, jl_svec_t *edges)
 {
+    jl_value_t *owner = jl_nothing; // TODO: owner should be arg
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
-    while (codeinst) {
-        if (codeinst->min_world <= min_world && max_world <= codeinst->max_world) {
-            jl_value_t *code = jl_atomic_load_relaxed(&codeinst->inferred);
-            if (code && (code == jl_nothing || jl_ir_flag_inferred(code)))
-                return (jl_value_t*)codeinst;
+    for (; codeinst; codeinst = jl_atomic_load_relaxed(&codeinst->next)) {
+        if (jl_atomic_load_relaxed(&codeinst->min_world) == min_world &&
+            jl_atomic_load_relaxed(&codeinst->max_world) == max_world &&
+            jl_egal(codeinst->owner, owner) &&
+            jl_egal(codeinst->rettype, rettype)) {
+            if (di == NULL)
+                return codeinst;
+            jl_debuginfo_t *debuginfo = jl_atomic_load_relaxed(&codeinst->debuginfo);
+            if (di != debuginfo) {
+                if (!(debuginfo == NULL && jl_atomic_cmpswap_relaxed(&codeinst->debuginfo, &debuginfo, di)))
+                    if (!(debuginfo && jl_egal((jl_value_t*)debuginfo, (jl_value_t*)di)))
+                        continue;
+            }
+            // TODO: this is implied by the matching worlds, since it is intrinsic, so do we really need to verify it?
+            jl_svec_t *e = jl_atomic_load_relaxed(&codeinst->edges);
+            if (e && jl_egal((jl_value_t*)e, (jl_value_t*)edges))
+                return codeinst;
         }
-        codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
-    return (jl_value_t*)jl_nothing;
+    codeinst = jl_new_codeinst(
+        mi, owner, rettype, (jl_value_t*)jl_any_type, NULL, NULL,
+        0, min_world, max_world, 0, jl_nothing, di, edges);
+    jl_mi_cache_insert(mi, codeinst);
+    return codeinst;
 }
-JL_DLLEXPORT jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT = jl_rettype_inferred;
 
+JL_DLLEXPORT int jl_mi_cache_has_ci(jl_method_instance_t *mi,
+                                    jl_code_instance_t *ci)
+{
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
+    while (codeinst) {
+        if (codeinst == ci)
+            return 1;
+        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    }
+    return 0;
+}
 
-JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
-        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
-        size_t min_world, size_t max_world)
+// look for something with an egal ABI and properties that is already in the JIT for a whole edge (target_world=0) or can be added to the JIT with new source just for target_world.
+JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, size_t target_world) JL_NOTSAFEPOINT
 {
+    jl_value_t *def = ci->def;
+    jl_method_instance_t *mi = jl_get_ci_mi(ci);
+    jl_value_t *owner = ci->owner;
+    jl_value_t *rettype = ci->rettype;
+    size_t min_world = jl_atomic_load_relaxed(&ci->min_world);
+    size_t max_world = jl_atomic_load_relaxed(&ci->max_world);
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
-        if (codeinst->min_world == min_world &&
-            codeinst->max_world == max_world &&
+        if (codeinst != ci &&
+            jl_atomic_load_relaxed(&codeinst->inferred) != NULL &&
+            (target_world ? 1 : jl_atomic_load_relaxed(&codeinst->invoke) != NULL) &&
+            jl_atomic_load_relaxed(&codeinst->min_world) <= (target_world ? target_world : min_world) &&
+            jl_atomic_load_relaxed(&codeinst->max_world) >= (target_world ? target_world : max_world) &&
+            jl_egal(codeinst->def, def) &&
+            jl_egal(codeinst->owner, owner) &&
             jl_egal(codeinst->rettype, rettype)) {
             return codeinst;
         }
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
-    codeinst = jl_new_codeinst(
-        mi, rettype, NULL, NULL,
-        0, min_world, max_world, 0, 0, jl_nothing, 0);
-    jl_mi_cache_insert(mi, codeinst);
-    return codeinst;
+    return ci;
 }
 
+
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
-        jl_method_instance_t *mi, jl_value_t *rettype,
+        jl_method_instance_t *mi, jl_value_t *owner,
+        jl_value_t *rettype, jl_value_t *exctype,
         jl_value_t *inferred_const, jl_value_t *inferred,
         int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
-        uint8_t relocatability
-        /*, jl_array_t *edges, int absolute_max*/)
+        uint32_t effects, jl_value_t *analysis_results,
+        jl_debuginfo_t *di, jl_svec_t *edges /*, int absolute_max*/)
 {
-    jl_task_t *ct = jl_current_task;
     assert(min_world <= max_world && "attempting to set invalid world constraints");
+    //assert((!jl_is_method(mi->def.value) || max_world != ~(size_t)0 || min_world <= 1 || edges == NULL || jl_svec_len(edges) != 0) && "missing edges");
+    jl_task_t *ct = jl_current_task;
     jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_instance_t),
             jl_code_instance_type);
-    codeinst->def = mi;
-    codeinst->min_world = min_world;
-    codeinst->max_world = max_world;
+    codeinst->def = (jl_value_t*)mi;
+    codeinst->owner = owner;
+    jl_atomic_store_relaxed(&codeinst->edges, edges);
+    jl_atomic_store_relaxed(&codeinst->min_world, min_world);
+    jl_atomic_store_relaxed(&codeinst->max_world, max_world);
     codeinst->rettype = rettype;
+    codeinst->exctype = exctype;
     jl_atomic_store_release(&codeinst->inferred, inferred);
-    //codeinst->edges = NULL;
     if ((const_flags & 2) == 0)
         inferred_const = NULL;
     codeinst->rettype_const = inferred_const;
+    jl_atomic_store_relaxed(&codeinst->debuginfo, (jl_value_t*)di == jl_nothing ? NULL : di);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr, NULL);
     jl_atomic_store_relaxed(&codeinst->invoke, NULL);
     if ((const_flags & 1) != 0) {
         assert(const_flags & 2);
         jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_const_return);
     }
-    jl_atomic_store_relaxed(&codeinst->specsigflags, 0);
+    codeinst->time_infer_total = 0;
+    codeinst->time_infer_self = 0;
+    jl_atomic_store_relaxed(&codeinst->time_compile, 0);
+    jl_atomic_store_relaxed(&codeinst->flags, 0);
     jl_atomic_store_relaxed(&codeinst->precompile, 0);
     jl_atomic_store_relaxed(&codeinst->next, NULL);
-    codeinst->ipo_purity_bits = ipo_effects;
-    jl_atomic_store_relaxed(&codeinst->purity_bits, effects);
-    codeinst->argescapes = argescapes;
-    codeinst->relocatability = relocatability;
+    jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects);
+    codeinst->analysis_results = analysis_results;
+    return codeinst;
+}
+
+JL_DLLEXPORT void jl_update_codeinst(
+        jl_code_instance_t *codeinst, jl_value_t *inferred,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t effects, jl_value_t *analysis_results,
+        double time_infer_total, double time_infer_cache_saved, double time_infer_self,
+        jl_debuginfo_t *di, jl_svec_t *edges /* , int absolute_max*/)
+{
+    assert(min_world <= max_world && "attempting to set invalid world constraints");
+    //assert((!jl_is_method(codeinst->def->def.value) || max_world != ~(size_t)0 || min_world <= 1 || jl_svec_len(edges) != 0) && "missing edges");
+    codeinst->analysis_results = analysis_results;
+    jl_gc_wb(codeinst, analysis_results);
+    codeinst->time_infer_total = julia_double_to_half(time_infer_total);
+    codeinst->time_infer_cache_saved = julia_double_to_half(time_infer_cache_saved);
+    codeinst->time_infer_self = julia_double_to_half(time_infer_self);
+    jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects);
+    jl_atomic_store_relaxed(&codeinst->debuginfo, di);
+    jl_gc_wb(codeinst, di);
+    jl_atomic_store_relaxed(&codeinst->edges, edges);
+    jl_gc_wb(codeinst, edges);
+    if ((const_flags & 1) != 0) {
+        assert(codeinst->rettype_const);
+        jl_atomic_store_release(&codeinst->invoke, jl_fptr_const_return);
+    }
+    jl_atomic_store_release(&codeinst->inferred, inferred);
+    jl_gc_wb(codeinst, inferred);
+    jl_atomic_store_relaxed(&codeinst->min_world, min_world); // XXX: these should be unchanged?
+    jl_atomic_store_relaxed(&codeinst->max_world, max_world); // since the edges shouldn't change after jl_fill_codeinst
+}
+
+JL_DLLEXPORT void jl_fill_codeinst(
+        jl_code_instance_t *codeinst,
+        jl_value_t *rettype, jl_value_t *exctype,
+        jl_value_t *inferred_const,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t effects, jl_value_t *analysis_results,
+        jl_debuginfo_t *di, jl_svec_t *edges /* , int absolute_max*/)
+{
+    assert(min_world <= max_world && "attempting to set invalid world constraints");
+    //assert((!jl_is_method(codeinst->def->def.value) || max_world != ~(size_t)0 || min_world <= 1 || jl_svec_len(edges) != 0) && "missing edges");
+    codeinst->rettype = rettype;
+    jl_gc_wb(codeinst, rettype);
+    codeinst->exctype = exctype;
+    jl_gc_wb(codeinst, exctype);
+    if ((const_flags & 2) != 0) {
+        codeinst->rettype_const = inferred_const;
+        jl_gc_wb(codeinst, inferred_const);
+    }
+    jl_atomic_store_relaxed(&codeinst->edges, edges);
+    jl_gc_wb(codeinst, edges);
+    if ((jl_value_t*)di != jl_nothing) {
+        jl_atomic_store_relaxed(&codeinst->debuginfo, di);
+        jl_gc_wb(codeinst, di);
+    }
+    if ((const_flags & 1) != 0) {
+        // TODO: may want to follow ordering restrictions here (see jitlayers.cpp)
+        assert(const_flags & 2);
+        jl_atomic_store_release(&codeinst->invoke, jl_fptr_const_return);
+    }
+    jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects);
+    codeinst->analysis_results = analysis_results;
+    assert(jl_atomic_load_relaxed(&codeinst->min_world) == 1);
+    assert(jl_atomic_load_relaxed(&codeinst->max_world) == 0);
+    jl_atomic_store_release(&codeinst->inferred, jl_nothing);
+    jl_atomic_store_release(&codeinst->min_world, min_world);
+    jl_atomic_store_release(&codeinst->max_world, max_world);
+}
+
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner)
+{
+    jl_code_instance_t *codeinst = jl_new_codeinst(mi, owner, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL);
+    jl_atomic_store_relaxed(&codeinst->min_world, 1); // make temporarily invalid before returning, so that jl_fill_codeinst is valid later
     return codeinst;
 }
 
@@ -515,162 +752,292 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN
     JL_GC_PUSH1(&ci);
     if (jl_is_method(mi->def.method))
         JL_LOCK(&mi->def.method->writelock);
-    jl_code_instance_t *oldci = jl_atomic_load_relaxed(&mi->cache);
-    jl_atomic_store_relaxed(&ci->next, oldci);
-    if (oldci)
-        jl_gc_wb(ci, oldci);
-    jl_atomic_store_release(&mi->cache, ci);
-    jl_gc_wb(mi, ci);
+    // Set native_cache_valid bit when inserting into cache
+    jl_atomic_fetch_or_relaxed(&ci->flags, JL_CI_FLAGS_NATIVE_CACHE_VALID);
+    // find the preferred location for insertion of ci now:
+    //   - invoke+inferred group
+    //   - inferred group
+    //   - others group
+    //   - unmoved
+    //   - after existing entries with same applicable range
+    jl_value_t *parent = (jl_value_t*)mi;
+    _Atomic(jl_code_instance_t*) *slot = &mi->cache;
+    jl_code_instance_t *oldci = jl_atomic_load_relaxed(slot);
+    int hasinvoke = jl_atomic_load_relaxed(&ci->invoke) != NULL;
+    int hasinferred = jl_atomic_load_relaxed(&ci->inferred) != NULL;
+    size_t max_world = jl_atomic_load_relaxed(&ci->max_world);
+    jl_code_instance_t *next = jl_atomic_load_relaxed(&ci->next);
+    while (oldci) {
+        if (oldci == ci)
+            break;
+        int old_hasinvoke = jl_atomic_load_relaxed(&oldci->invoke) != NULL;
+        int old_hasinferred = jl_atomic_load_relaxed(&oldci->inferred) != NULL;
+        size_t old_max_world = jl_atomic_load_relaxed(&oldci->max_world);
+        if (hasinvoke && !old_hasinvoke)
+            break;
+        if (hasinferred && !old_hasinferred)
+            break;
+        if (next == NULL && old_max_world < max_world)
+            break;
+        parent = (jl_value_t*)oldci;
+        slot = &oldci->next;
+        oldci = jl_atomic_load_relaxed(slot);
+    }
+    if (oldci != ci) {
+        jl_atomic_store_relaxed(&ci->next, oldci);
+        if (oldci)
+            jl_gc_wb(ci, oldci);
+        jl_atomic_store_release(slot, ci);
+        jl_gc_wb(parent, ci);
+        if (oldci != NULL) {
+            // list is now potentially circular, need to go find old pointer to ci starting from oldci and insert next there
+            do {
+                parent = (jl_value_t*)oldci;
+                slot = &oldci->next;
+                oldci = jl_atomic_load_relaxed(slot);
+            } while (oldci && oldci != ci);
+            if (oldci) {
+                jl_atomic_store_release(slot, next);
+                if (next)
+                    jl_gc_wb(parent, next);
+            }
+        }
+    }
     if (jl_is_method(mi->def.method))
         JL_UNLOCK(&mi->def.method->writelock);
     JL_GC_POP();
     return;
 }
 
-static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
+JL_DLLEXPORT int jl_mi_try_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                   jl_code_instance_t *expected_ci,
+                                   jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED)
 {
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    jl_value_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
-    if (specializations == (jl_value_t*)jl_emptysvec)
-        return 1;
-    if (!jl_is_svec(specializations)) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
-        assert(jl_is_method_instance(mi));
-        if (jl_rettype_inferred(mi, world, world) == jl_nothing)
-            jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
-        return 1;
-    }
-    size_t i, l = jl_svec_len(specializations);
-    JL_GC_PUSH1(&specializations);
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
-        if ((jl_value_t*)mi != jl_nothing) {
-            assert(jl_is_method_instance(mi));
-            if (jl_rettype_inferred(mi, world, world) == jl_nothing)
-                jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
-        }
+    JL_GC_PUSH1(&ci);
+    if (jl_is_method(mi->def.method))
+        JL_LOCK(&mi->def.method->writelock);
+    jl_code_instance_t *oldci = jl_atomic_load_relaxed(&mi->cache);
+    int ret = 0;
+    if (oldci == expected_ci) {
+        jl_atomic_store_relaxed(&ci->next, oldci);
+        if (oldci)
+            jl_gc_wb(ci, oldci);
+        jl_atomic_store_release(&mi->cache, ci);
+        jl_gc_wb(mi, ci);
+        ret = 1;
     }
+    if (jl_is_method(mi->def.method))
+        JL_UNLOCK(&mi->def.method->writelock);
     JL_GC_POP();
-    return 1;
+    return ret;
 }
 
-int foreach_mtable_in_module(
-        jl_module_t *m,
-        int (*visit)(jl_methtable_t *mt, void *env),
-        void *env)
+enum top_typename_facts {
+    EXACTLY_ANY = 1 << 0,
+    HAVE_TYPE = 1 << 1,
+    EXACTLY_TYPE = 1 << 2,
+    HAVE_FUNCTION = 1 << 3,
+    EXACTLY_FUNCTION = 1 << 4,
+    HAVE_KWCALL = 1 << 5,
+    EXACTLY_KWCALL = 1 << 6,
+    SHORT_TUPLE = 1 << 7,
+};
+
+static void foreach_top_nth_typename(void (*f)(jl_typename_t*, int, void*), jl_value_t *a JL_PROPAGATES_ROOT, int n, unsigned *facts, void *env)
 {
-    jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
-    for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
-        if ((void*)b == jl_nothing)
-            break;
-        jl_sym_t *name = b->globalref->name;
-        if (jl_atomic_load_relaxed(&b->owner) == b && b->constp) {
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-            if (v) {
-                jl_value_t *uw = jl_unwrap_unionall(v);
-                if (jl_is_datatype(uw)) {
-                    jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
-                    if (tn->module == m && tn->name == name && tn->wrapper == v) {
-                        // this is the original/primary binding for the type (name/wrapper)
-                        jl_methtable_t *mt = tn->mt;
-                        if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
-                            assert(mt->module == m);
-                            if (!visit(mt, env))
-                                return 0;
-                        }
-                    }
+    arraylist_t workqueue;
+    arraylist_new(&workqueue, 0);
+
+    // Push initial work item as jl_value_t* then int (cast to void*)
+    arraylist_push(&workqueue, a);
+    arraylist_push(&workqueue, (void*)(uintptr_t)n);
+
+    while (workqueue.len > 0) {
+        // Pop int n then jl_value_t* a (reverse order)
+        int current_n = (int)(uintptr_t)arraylist_pop(&workqueue);
+        jl_value_t *current_a = (jl_value_t*)arraylist_pop(&workqueue);
+        JL_GC_PROMISE_ROOTED(current_a);
+
+        if (jl_is_datatype(current_a)) {
+            if (current_n <= 0) {
+                jl_datatype_t *dt = ((jl_datatype_t*)current_a);
+                if (dt->name == jl_type_typename) { // key Type{T} on T instead of Type
+                    *facts |= HAVE_TYPE;
+                    arraylist_push(&workqueue, jl_tparam0(current_a));
+                    arraylist_push(&workqueue, (void*)(uintptr_t)-1);
                 }
-                else if (jl_is_module(v)) {
-                    jl_module_t *child = (jl_module_t*)v;
-                    if (child != m && child->parent == m && child->name == name) {
-                        // this is the original/primary binding for the submodule
-                        if (!foreach_mtable_in_module(child, visit, env))
-                            return 0;
-                    }
+                else if (dt == jl_function_type) {
+                    if (current_n == -1) // key Type{>:Function} as Type instead of Function
+                        *facts |= EXACTLY_TYPE; // HAVE_TYPE is already set
+                    else
+                        *facts |= HAVE_FUNCTION | EXACTLY_FUNCTION;
                 }
-                else if (jl_is_mtable(v)) {
-                    jl_methtable_t *mt = (jl_methtable_t*)v;
-                    if (mt->module == m && mt->name == name) {
-                        // this is probably an external method table here, so let's
-                        // assume so as there is no way to precisely distinguish them
-                        if (!visit(mt, env))
-                            return 0;
+                else if (dt == jl_any_type) {
+                    if (current_n == -1) // key Type{>:Any} and kinds as Type instead of Any
+                        *facts |= EXACTLY_TYPE; // HAVE_TYPE is already set
+                    else
+                        *facts |= EXACTLY_ANY;
+                }
+                else if (dt == jl_kwcall_type) {
+                    if (current_n == -1) // key Type{>:typeof(kwcall)} as exactly kwcall
+                        *facts |= EXACTLY_KWCALL;
+                    else
+                        *facts |= HAVE_KWCALL;
+                }
+                else {
+                    while (1) {
+                        jl_datatype_t *super = dt->super;
+                        if (super == jl_function_type) {
+                            *facts |= HAVE_FUNCTION;
+                            break;
+                        }
+                        if (super == jl_any_type || super->super == dt)
+                            break;
+                        dt = super;
                     }
+                    f(dt->name, 1, env);
+                }
+            }
+            else if (jl_is_tuple_type(current_a)) {
+                if (jl_nparams(current_a) >= current_n) {
+                    arraylist_push(&workqueue, jl_tparam(current_a, current_n - 1));
+                    arraylist_push(&workqueue, (void*)(uintptr_t)0);
                 }
+                else
+                    *facts |= SHORT_TUPLE;
             }
         }
-        table = jl_atomic_load_relaxed(&m->bindings);
+        else if (jl_is_typevar(current_a)) {
+            arraylist_push(&workqueue, ((jl_tvar_t*)current_a)->ub);
+            arraylist_push(&workqueue, (void*)(uintptr_t)current_n);
+        }
+        else if (jl_is_unionall(current_a)) {
+            arraylist_push(&workqueue, ((jl_unionall_t*)current_a)->body);
+            arraylist_push(&workqueue, (void*)(uintptr_t)current_n);
+        }
+        else if (jl_is_uniontype(current_a)) {
+            jl_uniontype_t *u = (jl_uniontype_t*)current_a;
+            // Add both union branches to workqueue (push a second to visit first)
+            arraylist_push(&workqueue, u->b);
+            arraylist_push(&workqueue, (void*)(uintptr_t)current_n);
+            arraylist_push(&workqueue, u->a);
+            arraylist_push(&workqueue, (void*)(uintptr_t)current_n);
+        }
     }
-    return 1;
+
+    arraylist_free(&workqueue);
 }
 
-int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), void *env)
+// Inspect type `argtypes` for all backedge keys that might be relevant to it, splitting it
+// up on some commonly observed patterns to make a better distribution.
+// (It could do some of that balancing automatically, but for now just hard-codes kwcall.)
+// Along the way, record some facts about what was encountered, so that those additional
+// calls can be added later if needed for completeness.
+// The `int explct` argument instructs the caller if the callback is due to an exactly
+// encountered type or if it rather encountered a subtype.
+// This is not capable of walking to all top-typenames for an explicitly encountered
+// Function or Any, so the caller a fallback that can scan the entire  in that case.
+// We do not de-duplicate calls when encountering a Union.
+static int jl_foreach_top_typename_for(void (*f)(jl_typename_t*, int, void*), jl_value_t *argtypes JL_PROPAGATES_ROOT, int all_subtypes, void *env)
 {
-    if (!visit(jl_type_type_mt, env))
-        return 0;
-    if (!visit(jl_nonfunction_mt, env))
-        return 0;
-    jl_array_t *mod_array = jl_get_loaded_modules();
-    if (mod_array) {
-        JL_GC_PUSH1(&mod_array);
-        int i;
-        for (i = 0; i < jl_array_len(mod_array); i++) {
-            jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
-            assert(jl_is_module(m));
-            if (m->parent == m) // some toplevel modules (really just Base) aren't actually
-                if (!foreach_mtable_in_module(m, visit, env)) {
-                    JL_GC_POP();
-                    return 0;
-                }
-        }
-        JL_GC_POP();
-    }
-    else {
-        if (!foreach_mtable_in_module(jl_main_module, visit, env))
-            return 0;
-        if (!foreach_mtable_in_module(jl_core_module, visit, env))
-            return 0;
+    unsigned facts = 0;
+    foreach_top_nth_typename(f, argtypes, 1, &facts, env);
+    if (facts & HAVE_KWCALL) {
+        // split kwcall on the 3rd argument instead, using the same logic
+        unsigned kwfacts = 0;
+        foreach_top_nth_typename(f, argtypes, 3, &kwfacts, env);
+        // copy kwfacts to original facts
+        if (kwfacts & SHORT_TUPLE)
+            kwfacts |= (all_subtypes ? EXACTLY_ANY : EXACTLY_KWCALL);
+        facts |= kwfacts;
     }
+    if (all_subtypes && (facts & (EXACTLY_FUNCTION | EXACTLY_TYPE | EXACTLY_ANY)))
+        // flag that we have an explct match than is necessitating a full table scan
+        return 0;
+    // or inform caller of only which supertypes are applicable
+    if (facts & HAVE_FUNCTION)
+        f(jl_function_type->name, facts & EXACTLY_FUNCTION ? 1 : 0, env);
+    if (facts & HAVE_TYPE)
+        f(jl_type_typename, facts & EXACTLY_TYPE ? 1 : 0, env);
+    if (facts & (HAVE_KWCALL | EXACTLY_KWCALL))
+        f(jl_kwcall_type->name, facts & EXACTLY_KWCALL ? 1 : 0, env);
+    f(jl_any_type->name, facts & EXACTLY_ANY ? 1 : 0, env);
     return 1;
 }
 
-static int reset_mt_caches(jl_methtable_t *mt, void *env)
+
+int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), jl_array_t *mod_array, void *env)
 {
-    // removes all method caches
-    // this might not be entirely safe (GC or MT), thus we only do it very early in bootstrapping
-    if (!mt->frozen) { // make sure not to reset builtin functions
-        jl_atomic_store_release(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
-        jl_atomic_store_release(&mt->cache, jl_nothing);
+    if (!visit(jl_method_table, env))
+        return 0;
+
+    if (!mod_array)
+        return 1;
+
+    arraylist_t workqueue;
+    arraylist_new(&workqueue, 0);
+
+    // Add initial toplevel modules to workqueue
+    for (size_t i = 0; i < jl_array_nrows(mod_array); i++) {
+        jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
+        assert(jl_is_module(m));
+        if (m->parent == m) // some toplevel modules (really just Base) aren't actually
+            arraylist_push(&workqueue, m);
+    }
+
+    int result = 1;
+
+    while (workqueue.len > 0) {
+        jl_module_t *current_m = (jl_module_t*)arraylist_pop(&workqueue);
+        JL_GC_PROMISE_ROOTED(current_m);
+
+        jl_svec_t *table = jl_atomic_load_relaxed(&current_m->bindings);
+        for (size_t i = 0; i < jl_svec_len(table); i++) {
+            jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+            if ((void*)b == jl_nothing)
+                break;
+            jl_sym_t *name = b->globalref->name;
+            jl_value_t *v = jl_get_latest_binding_value_if_const(b);
+            if (v) {
+                if (jl_is_module(v)) {
+                    jl_module_t *child = (jl_module_t*)v;
+                    if (child != current_m && child->parent == current_m && child->name == name) {
+                        // this is the original/primary binding for the submodule
+                        arraylist_push(&workqueue, child);
+                    }
+                }
+                else if (jl_is_mtable(v)) {
+                    jl_methtable_t *mt = (jl_methtable_t*)v;
+                    if (mt && mt != jl_method_table) {
+                        if (!visit(mt, env)) {
+                            result = 0;
+                            goto cleanup;
+                        }
+                    }
+                }
+            }
+            table = jl_atomic_load_relaxed(&current_m->bindings);
+        }
     }
-    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), get_method_unspec_list, env);
-    return 1;
-}
 
+cleanup:
+    arraylist_free(&workqueue);
+    return result;
+}
 
-jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED = NULL;
+jl_value_t *jl_typeinf_func JL_GLOBALLY_ROOTED = NULL;
+jl_value_t *jl_compile_and_emit_func JL_GLOBALLY_ROOTED = NULL;
 JL_DLLEXPORT size_t jl_typeinf_world = 1;
 
 JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
 {
-    size_t newfunc = jl_typeinf_world == 1 && jl_typeinf_func == NULL;
-    jl_typeinf_func = (jl_function_t*)f;
+    jl_typeinf_func = (jl_value_t*)f;
     jl_typeinf_world = jl_get_tls_world_age();
-    int world = jl_atomic_fetch_add(&jl_world_counter, 1) + 1; // make type-inference the only thing in this world
-    if (newfunc) {
-        // give type inference a chance to see all of these
-        // TODO: also reinfer if max_world != ~(size_t)0
-        jl_array_t *unspec = jl_alloc_vec_any(0);
-        JL_GC_PUSH1(&unspec);
-        jl_foreach_reachable_mtable(reset_mt_caches, (void*)unspec);
-        size_t i, l;
-        for (i = 0, l = jl_array_len(unspec); i < l; i++) {
-            jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(unspec, i);
-            if (jl_rettype_inferred(mi, world, world) == jl_nothing)
-                jl_type_infer(mi, world, 1);
-        }
-        JL_GC_POP();
-    }
+}
+
+JL_DLLEXPORT void jl_set_compile_and_emit_func(jl_value_t *f)
+{
+    jl_compile_and_emit_func = (jl_value_t*)f;
 }
 
 static int very_general_type(jl_value_t *t)
@@ -735,7 +1102,7 @@ static jl_value_t *inst_varargp_in_env(jl_value_t *decl, jl_svec_t *sparams)
                 vm = T_has_tv ? jl_type_unionall(v, T) : T;
                 if (N_has_tv)
                     N = NULL;
-                vm = (jl_value_t*)jl_wrap_vararg(vm, N, 1); // this cannot throw for these inputs
+                vm = (jl_value_t*)jl_wrap_vararg(vm, N, 1, 0); // this cannot throw for these inputs
             }
             sp++;
             decl = ((jl_unionall_t*)decl)->body;
@@ -747,7 +1114,7 @@ static jl_value_t *inst_varargp_in_env(jl_value_t *decl, jl_svec_t *sparams)
     return vm;
 }
 
-static jl_value_t *ml_matches(jl_methtable_t *mt,
+static jl_value_t *ml_matches(jl_methtable_t *mt, jl_methcache_t *mc,
                               jl_tupletype_t *type, int lim, int include_ambiguous,
                               int intersections, size_t world, int cache_result,
                               size_t *min_valid, size_t *max_valid, int *ambig);
@@ -934,14 +1301,9 @@ static void jl_compilation_sig(
         int notcalled_func = (i_arg > 0 && i_arg <= 8 && !(definition->called & (1 << (i_arg - 1))) &&
                               !jl_has_free_typevars(decl_i) &&
                               jl_subtype(elt, (jl_value_t*)jl_function_type));
-        if (notcalled_func && (type_i == (jl_value_t*)jl_any_type ||
-                               type_i == (jl_value_t*)jl_function_type ||
-                               (jl_is_uniontype(type_i) && // Base.Callable
-                                ((((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_function_type &&
-                                  ((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_type_type) ||
-                                 (((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_function_type &&
-                                  ((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_type_type))))) {
-            // and attempt to despecialize types marked Function, Callable, or Any
+        if (notcalled_func && (jl_subtype((jl_value_t*)jl_function_type, type_i))) {
+            // and attempt to despecialize types marked as a supertype of Function (i.e.
+            // Function, Callable, Any, or a Union{Function, T})
             // when called with a subtype of Function but is not called
             if (!*newparams) *newparams = jl_svec_copy(tt->parameters);
             jl_svecset(*newparams, i, (jl_value_t*)jl_function_type);
@@ -954,9 +1316,10 @@ static void jl_compilation_sig(
     // and the types we find should be bigger.
     if (np >= nspec && jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND) {
         if (!*newparams) *newparams = tt->parameters;
-        if (max_varargs > 0) {
+        if (max_varargs > 0 && nspec >= 2) {
             type_i = jl_svecref(*newparams, nspec - 2);
-        } else {
+        }
+        else {
             // If max varargs is zero, always specialize to (Any...) since
             // there is no preceding parameter to use for `type_i`
             type_i = jl_bottom_type;
@@ -984,7 +1347,7 @@ static void jl_compilation_sig(
             // avoid Vararg{Type{Type{...}}}
             if (jl_is_type_type(type_i) && jl_is_type_type(jl_tparam0(type_i)))
                 type_i = (jl_value_t*)jl_type_type;
-            type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL, 1); // this cannot throw for these inputs
+            type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL, 1, 0); // this cannot throw for these inputs
         }
         else {
             type_i = inst_varargp_in_env(decl, sparams);
@@ -1031,15 +1394,11 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
     if (definition->isva) {
         unsigned nspec_min = nargs + 1; // min number of arg values (including tail vararg)
         unsigned nspec_max = INT32_MAX; // max number of arg values (including tail vararg)
-        jl_methtable_t *mt = jl_method_table_for(decl);
-        jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(decl) : mt;
-        if ((jl_value_t*)mt != jl_nothing) {
-            // try to refine estimate of min and max
-            uint8_t heuristic_used = 0;
-            nspec_max = nspec_min = nargs + get_max_varargs(definition, kwmt, mt, &heuristic_used);
-            if (heuristic_used)
-                nspec_max = INT32_MAX; // new methods may be added, increasing nspec_min later
-        }
+        // try to refine estimate of min and max
+        uint8_t heuristic_used = 0;
+        nspec_max = nspec_min = nargs + get_max_varargs(definition, &heuristic_used);
+        if (heuristic_used)
+            nspec_max = INT32_MAX; // new methods may be added, increasing nspec_min later
         int isunbound = (jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND);
         if (jl_is_vararg(jl_tparam(type, np - 1))) {
             if (!isunbound || np < nspec_min || np > nspec_max)
@@ -1170,15 +1529,9 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
         int notcalled_func = (i_arg > 0 && i_arg <= 8 && !(definition->called & (1 << (i_arg - 1))) &&
                               !jl_has_free_typevars(decl_i) &&
                               jl_subtype(elt, (jl_value_t*)jl_function_type));
-        if (notcalled_func && (type_i == (jl_value_t*)jl_any_type ||
-                               type_i == (jl_value_t*)jl_function_type ||
-                               (jl_is_uniontype(type_i) && // Base.Callable
-                                ((((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_function_type &&
-                                  ((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_type_type) ||
-                                 (((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_function_type &&
-                                  ((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_type_type))))) {
-            // and attempt to despecialize types marked Function, Callable, or Any
-            // when called with a subtype of Function but is not called
+        if (notcalled_func && jl_subtype((jl_value_t*)jl_function_type, type_i)) {
+            // and attempt to despecialize types marked as a supertype of Function (i.e.
+            // Function, Callable, Any, or a Union{Function, T})
             if (elt == (jl_value_t*)jl_function_type)
                 continue;
             JL_GC_POP();
@@ -1213,12 +1566,16 @@ static int concretesig_equal(jl_value_t *tt, jl_value_t *simplesig) JL_NOTSAFEPO
     return 1;
 }
 
-static inline jl_typemap_entry_t *lookup_leafcache(jl_array_t *leafcache JL_PROPAGATES_ROOT, jl_value_t *tt, size_t world) JL_NOTSAFEPOINT
+// if available, returns a TypeMapEntry in the "leafcache" that matches `tt` (by type-equality) and is valid during `world`
+static inline jl_typemap_entry_t *lookup_leafcache(jl_genericmemory_t *leafcache JL_PROPAGATES_ROOT, jl_value_t *tt, size_t world) JL_NOTSAFEPOINT
 {
     jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_eqtable_get(leafcache, (jl_value_t*)tt, NULL);
     if (entry) {
+        // search tail of the linked-list (including the returned entry) for an entry intersecting world
+        //
+        // n.b. this entire chain is type-equal to tt (by construction), so it is unnecessary to call `tt<:entry->sig`
         do {
-            if (entry->min_world <= world && world <= entry->max_world) {
+            if (jl_atomic_load_relaxed(&entry->min_world) <= world && world <= jl_atomic_load_relaxed(&entry->max_world)) {
                 if (entry->simplesig == (void*)jl_nothing || concretesig_equal(tt, (jl_value_t*)entry->simplesig))
                     return entry;
             }
@@ -1227,34 +1584,53 @@ static inline jl_typemap_entry_t *lookup_leafcache(jl_array_t *leafcache JL_PROP
     }
     return NULL;
 }
-
-static jl_method_instance_t *cache_method(
-        jl_methtable_t *mt, _Atomic(jl_typemap_t*) *cache, jl_value_t *parent JL_PROPAGATES_ROOT,
+jl_method_instance_t *cache_method(
+        jl_methtable_t *mt, jl_methcache_t *mc, _Atomic(jl_typemap_t*) *cache, jl_value_t *parent JL_PROPAGATES_ROOT,
         jl_tupletype_t *tt, // the original tupletype of the signature
         jl_method_t *definition,
         size_t world, size_t min_valid, size_t max_valid,
         jl_svec_t *sparams)
 {
-    // caller must hold the mt->writelock
+    // caller must hold the parent->writelock, which this releases
     // short-circuit (now that we hold the lock) if this entry is already present
-    int8_t offs = mt ? jl_cachearg_offset(mt) : 1;
+    int8_t offs = mc ? jl_cachearg_offset() : 1;
     { // scope block
-        if (mt) {
-            jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+        if (mc) {
+            jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mc->leafcache);
             jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
-            if (entry)
+            if (entry) {
+                if (mc) JL_UNLOCK(&mc->writelock);
                 return entry->func.linfo;
+            }
         }
-        struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL, 0, ~(size_t)0};
-        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(cache), &search, offs, /*subtype*/1);
-        if (entry && entry->func.value)
+        struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL};
+        jl_typemap_t *cacheentry = jl_atomic_load_relaxed(cache);
+        assert(cacheentry != NULL);
+        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(cacheentry, &search, offs, /*subtype*/1);
+        if (entry && entry->func.value) {
+            if (mc) JL_UNLOCK(&mc->writelock);
             return entry->func.linfo;
+        }
+    }
+
+    jl_method_instance_t *newmeth = NULL;
+    if (definition->sig == (jl_value_t*)jl_anytuple_type && definition != jl_opaque_closure_method && !definition->is_for_opaque_closure) {
+        newmeth = jl_atomic_load_relaxed(&definition->unspecialized);
+        if (newmeth != NULL) { // handle builtin methods de-specialization (for invoke, or if the global cache entry somehow gets lost)
+            jl_tupletype_t *cachett = (jl_tupletype_t*)newmeth->specTypes;
+            assert(cachett != jl_anytuple_type);
+            jl_typemap_entry_t *newentry = jl_typemap_alloc(cachett, NULL, jl_emptysvec, (jl_value_t*)newmeth, min_valid, max_valid);
+            JL_GC_PUSH1(&newentry);
+            jl_typemap_insert(cache, parent, newentry, offs);
+            JL_GC_POP();
+            if (mc) JL_UNLOCK(&mc->writelock);
+            return newmeth;
+        }
     }
 
     jl_value_t *temp = NULL;
     jl_value_t *temp2 = NULL;
     jl_value_t *temp3 = NULL;
-    jl_method_instance_t *newmeth = NULL;
     jl_svec_t *newparams = NULL;
     JL_GC_PUSH5(&temp, &temp2, &temp3, &newmeth, &newparams);
 
@@ -1262,11 +1638,10 @@ static jl_method_instance_t *cache_method(
     // so that we can minimize the number of required cache entries.
     int cache_with_orig = 1;
     jl_tupletype_t *compilationsig = tt;
-    jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(definition->sig) : mt;
-    intptr_t max_varargs = get_max_varargs(definition, kwmt, mt, NULL);
+    intptr_t max_varargs = get_max_varargs(definition, NULL);
     jl_compilation_sig(tt, sparams, definition, max_varargs, &newparams);
     if (newparams) {
-        temp2 = jl_apply_tuple_type(newparams);
+        temp2 = jl_apply_tuple_type(newparams, 1);
         // Now there may be a problem: the widened signature is more general
         // than just the given arguments, so it might conflict with another
         // definition that does not have cache instances yet. To fix this, we
@@ -1293,20 +1668,23 @@ static jl_method_instance_t *cache_method(
     if (newmeth->cache_with_orig)
         cache_with_orig = 1;
 
+    // Capture world counter at start to detect races
+    size_t current_world = mc ? jl_atomic_load_acquire(&jl_world_counter) : ~(size_t)0;
+
     jl_tupletype_t *cachett = tt;
-    jl_svec_t* guardsigs = jl_emptysvec;
+    jl_svec_t *guardsigs = jl_emptysvec;
     if (!cache_with_orig && mt) {
         // now examine what will happen if we chose to use this sig in the cache
         size_t min_valid2 = 1;
         size_t max_valid2 = ~(size_t)0;
-        temp = ml_matches(mt, compilationsig, MAX_UNSPECIALIZED_CONFLICTS, 1, 1, world, 0, &min_valid2, &max_valid2, NULL);
+        temp = ml_matches(mt, mc, compilationsig, MAX_UNSPECIALIZED_CONFLICTS, 1, 1, world, 0, &min_valid2, &max_valid2, NULL);
         int guards = 0;
         if (temp == jl_nothing) {
             cache_with_orig = 1;
         }
         else {
             int unmatched_tvars = 0;
-            size_t i, l = jl_array_len(temp);
+            size_t i, l = jl_array_nrows(temp);
             for (i = 0; i < l; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(temp, i);
                 if (matc->method == definition)
@@ -1339,7 +1717,7 @@ static jl_method_instance_t *cache_method(
             guardsigs = jl_alloc_svec(guards);
             temp3 = (jl_value_t*)guardsigs;
             guards = 0;
-            for (i = 0, l = jl_array_len(temp); i < l; i++) {
+            for (i = 0, l = jl_array_nrows(temp); i < l; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(temp, i);
                 jl_method_t *other = matc->method;
                 if (other != definition) {
@@ -1347,7 +1725,7 @@ static jl_method_instance_t *cache_method(
                     guards++;
                     // alternative approach: insert sentinel entry
                     //jl_typemap_insert(cache, parent, (jl_tupletype_t*)matc->spec_types,
-                    //        NULL, jl_emptysvec, /*guard*/NULL, jl_cachearg_offset(mt), other->min_world, other->max_world);
+                    //        NULL, jl_emptysvec, /*guard*/NULL, jl_cachearg_offset(), other->min_world, other->max_world);
                 }
             }
             assert(guards == jl_svec_len(guardsigs));
@@ -1364,6 +1742,10 @@ static jl_method_instance_t *cache_method(
         }
     }
 
+    int unconstrained_max = max_valid == ~(size_t)0;
+    if (max_valid > current_world)
+        max_valid = current_world;
+
     // now scan `cachett` and ensure that `Type{T}` in the cache will be matched exactly by `typeof(T)`
     // and also reduce the complexity of rejecting this entry in the cache
     // by replacing non-simple types with jl_any_type to build a new `type`
@@ -1389,14 +1771,14 @@ static jl_method_instance_t *cache_method(
         }
     }
     if (newparams) {
-        simplett = (jl_datatype_t*)jl_apply_tuple_type(newparams);
+        simplett = (jl_datatype_t*)jl_apply_tuple_type(newparams, 1);
         temp2 = (jl_value_t*)simplett;
     }
 
     // short-circuit if an existing entry is already present
     // that satisfies our requirements
     if (cachett != tt) {
-        struct jl_typemap_assoc search = {(jl_value_t*)cachett, world, NULL, 0, ~(size_t)0};
+        struct jl_typemap_assoc search = {(jl_value_t*)cachett, world, NULL};
         jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(cache), &search, offs, /*subtype*/1);
         if (entry && jl_egal((jl_value_t*)entry->simplesig, simplett ? (jl_value_t*)simplett : jl_nothing) &&
                 jl_egal((jl_value_t*)guardsigs, (jl_value_t*)entry->guardsigs)) {
@@ -1407,7 +1789,7 @@ static jl_method_instance_t *cache_method(
 
     jl_typemap_entry_t *newentry = jl_typemap_alloc(cachett, simplett, guardsigs, (jl_value_t*)newmeth, min_valid, max_valid);
     temp = (jl_value_t*)newentry;
-    if (mt && cachett == tt && jl_svec_len(guardsigs) == 0 && tt->hash && !tt->hasfreetypevars) {
+    if (mc && cachett == tt && jl_svec_len(guardsigs) == 0 && tt->hash && !tt->hasfreetypevars) {
         // we check `tt->hash` exists, since otherwise the NamedTuple
         // constructor and `structdiff` method pollutes this lookup with a lot
         // of garbage in the linear table search
@@ -1420,57 +1802,161 @@ static jl_method_instance_t *cache_method(
                 jl_cache_type_(tt);
             JL_UNLOCK(&typecache_lock); // Might GC
         }
-        jl_array_t *oldcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_genericmemory_t *oldcache = jl_atomic_load_relaxed(&mc->leafcache);
         jl_typemap_entry_t *old = (jl_typemap_entry_t*)jl_eqtable_get(oldcache, (jl_value_t*)tt, jl_nothing);
         jl_atomic_store_relaxed(&newentry->next, old);
         jl_gc_wb(newentry, old);
-        jl_array_t *newcache = (jl_array_t*)jl_eqtable_put(jl_atomic_load_relaxed(&mt->leafcache), (jl_value_t*)tt, (jl_value_t*)newentry, NULL);
+        jl_genericmemory_t *newcache = jl_eqtable_put(jl_atomic_load_relaxed(&mc->leafcache), (jl_value_t*)tt, (jl_value_t*)newentry, NULL);
         if (newcache != oldcache) {
-            jl_atomic_store_release(&mt->leafcache, newcache);
-            jl_gc_wb(mt, newcache);
+            jl_atomic_store_release(&mc->leafcache, newcache);
+            jl_gc_wb(mc, newcache);
         }
     }
     else {
          jl_typemap_insert(cache, parent, newentry, offs);
+         if (mt) {
+             jl_datatype_t *dt = jl_nth_argument_datatype((jl_value_t*)tt, 1);
+             if (dt) {
+                 jl_typename_t *tn = dt->name;
+                 int cache_entry_count = jl_atomic_load_relaxed(&tn->cache_entry_count);
+                 if (cache_entry_count < 31)
+                     jl_atomic_store_relaxed(&tn->cache_entry_count, cache_entry_count + 1);
+             }
+         }
+    }
+    if (mc) {
+        JL_UNLOCK(&mc->writelock);
+
+        // Only set METHOD_SIG_LATEST_ONLY on method instance if method does NOT have the bit, no guards required, and min_valid == primary_world
+        int should_set_dispatch_status = !(jl_atomic_load_relaxed(&definition->dispatch_status) & METHOD_SIG_LATEST_ONLY) &&
+            (!cache_with_orig && jl_svec_len(guardsigs) == 0) &&
+            min_valid == jl_atomic_load_relaxed(&definition->primary_world) &&
+            !(jl_atomic_load_relaxed(&newmeth->dispatch_status) & METHOD_SIG_LATEST_ONLY);
+
+        // Combined trylock for both dispatch_status setting and max_world restoration
+        if ((should_set_dispatch_status || unconstrained_max) &&
+            jl_atomic_load_relaxed(&jl_world_counter) == current_world) {
+            JL_LOCK(&world_counter_lock);
+            if (jl_atomic_load_relaxed(&jl_world_counter) == current_world) {
+                if (should_set_dispatch_status) {
+                    jl_atomic_store_relaxed(&newmeth->dispatch_status, METHOD_SIG_LATEST_ONLY);
+                }
+                if (unconstrained_max) {
+                    jl_atomic_store_relaxed(&newentry->max_world, ~(size_t)0);
+                }
+            }
+            JL_UNLOCK(&world_counter_lock);
+        }
     }
 
     JL_GC_POP();
     return newmeth;
 }
 
-static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid);
+JL_DLLEXPORT void jl_promote_cis_to_current(jl_code_instance_t **cis, size_t n, size_t validated_world)
+{
+    size_t current_world = jl_atomic_load_relaxed(&jl_world_counter);
+    // No need to acquire the lock if we've been invalidated anyway
+    if (current_world > validated_world)
+        return;
+    JL_LOCK(&world_counter_lock);
+    current_world = jl_atomic_load_relaxed(&jl_world_counter);
+    if (current_world == validated_world) {
+        arraylist_t workqueue;
+        arraylist_new(&workqueue, 0);
+        for (size_t i = 0; i < n; i++)
+            arraylist_push(&workqueue, cis[i]);
+        while (workqueue.len > 0) {
+            jl_code_instance_t *current_ci = (jl_code_instance_t *)arraylist_pop(&workqueue);
+            if (jl_atomic_load_relaxed(&current_ci->max_world) != validated_world)
+                continue;
+            jl_atomic_store_relaxed(&current_ci->max_world, ~(size_t)0);
+            jl_svec_t *edges = jl_atomic_load_relaxed(&current_ci->edges);
+            for (size_t i = 0; i < jl_svec_len(edges); i++) {
+                jl_value_t *edge = jl_svecref(edges, i);
+                if (!jl_is_code_instance(edge))
+                    continue;
+                arraylist_push(&workqueue, edge);
+            }
+        }
+        arraylist_free(&workqueue);
+    }
+    JL_UNLOCK(&world_counter_lock);
+}
 
-static jl_method_instance_t *jl_mt_assoc_by_type(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_datatype_t *tt, size_t world)
+JL_DLLEXPORT void jl_promote_ci_to_current(jl_code_instance_t *ci, size_t validated_world)
 {
-    // caller must hold the mt->writelock
-    assert(tt->isdispatchtuple || tt->hasfreetypevars);
+    jl_promote_cis_to_current(&ci, 1, validated_world);
+}
+
+JL_DLLEXPORT void jl_promote_mi_to_current(jl_method_instance_t *mi, size_t min_world, size_t validated_world)
+{
+    size_t current_world = jl_atomic_load_relaxed(&jl_world_counter);
+    // No need to acquire the lock if we've been invalidated anyway
+    if (current_world > validated_world)
+        return;
+    // Only set METHOD_SIG_LATEST_ONLY on method instance if method does NOT have the bit and min_valid == primary_world
+    jl_method_t *definition = mi->def.method;
+    if ((jl_atomic_load_relaxed(&definition->dispatch_status) & METHOD_SIG_LATEST_ONLY) ||
+        min_world != jl_atomic_load_relaxed(&definition->primary_world) ||
+        (jl_atomic_load_relaxed(&mi->dispatch_status) & METHOD_SIG_LATEST_ONLY))
+        return;
+    JL_LOCK(&world_counter_lock);
+    current_world = jl_atomic_load_relaxed(&jl_world_counter);
+    if (current_world == validated_world) {
+        jl_atomic_store_relaxed(&mi->dispatch_status, METHOD_SIG_LATEST_ONLY);
+    }
+    JL_UNLOCK(&world_counter_lock);
+}
+
+static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_methtable_t *mt, size_t world, int cache, size_t *min_valid, size_t *max_valid);
+
+JL_DLLEXPORT jl_typemap_entry_t *jl_mt_find_cache_entry(jl_methcache_t *mc JL_PROPAGATES_ROOT, jl_datatype_t *tt JL_MAYBE_UNROOTED JL_ROOTS_TEMPORARILY, size_t world)
+{ // exported only for debugging purposes, not for casual use
     if (tt->isdispatchtuple) {
-        jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mc->leafcache);
         jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
         if (entry)
-            return entry->func.linfo;
+            return entry;
     }
+    JL_GC_PUSH1(&tt);
+    struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL};
+    jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mc->cache), &search, jl_cachearg_offset(), /*subtype*/1);
+    JL_GC_POP();
+    return entry;
+}
 
-    struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->cache), &search, jl_cachearg_offset(mt), /*subtype*/1);
+static jl_method_instance_t *jl_mt_assoc_by_type(jl_methcache_t *mc JL_PROPAGATES_ROOT, jl_datatype_t *tt JL_MAYBE_UNROOTED, size_t world)
+{
+    jl_typemap_entry_t *entry = jl_mt_find_cache_entry(mc, tt, world);
     if (entry)
         return entry->func.linfo;
-
-    size_t min_valid = 0;
-    size_t max_valid = ~(size_t)0;
-    jl_method_match_t *matc = _gf_invoke_lookup((jl_value_t*)tt, jl_nothing, world, &min_valid, &max_valid);
-    jl_method_instance_t *nf = NULL;
-    if (matc) {
-        JL_GC_PUSH1(&matc);
-        jl_method_t *m = matc->method;
-        jl_svec_t *env = matc->sparams;
-        nf = cache_method(mt, &mt->cache, (jl_value_t*)mt, tt, m, world, min_valid, max_valid, env);
-        JL_GC_POP();
+    assert(tt->isdispatchtuple || tt->hasfreetypevars);
+    JL_TIMING(METHOD_LOOKUP_SLOW, METHOD_LOOKUP_SLOW);
+    jl_method_match_t *matc = NULL;
+    JL_GC_PUSH2(&tt, &matc);
+    JL_LOCK(&mc->writelock);
+    jl_method_instance_t *mi = NULL;
+    entry = jl_mt_find_cache_entry(mc, tt, world);
+    if (entry)
+        mi = entry->func.linfo;
+    if (!mi) {
+        size_t min_valid = 0;
+        size_t max_valid = ~(size_t)0;
+        matc = _gf_invoke_lookup((jl_value_t*)tt, jl_method_table, world, 0, &min_valid, &max_valid);
+        if (matc) {
+            jl_method_t *m = matc->method;
+            jl_svec_t *env = matc->sparams;
+            mi = cache_method(jl_method_table, mc, &mc->cache, (jl_value_t*)mc, tt, m, world, min_valid, max_valid, env);
+            JL_GC_POP();
+            return mi;
+        }
     }
-    return nf;
+    JL_UNLOCK(&mc->writelock);
+    JL_GC_POP();
+    return mi;
 }
 
-
 struct matches_env {
     struct typemap_intersection_env match;
     jl_typemap_entry_t *newentry;
@@ -1481,23 +1967,38 @@ struct matches_env {
 static int get_intersect_visitor(jl_typemap_entry_t *oldentry, struct typemap_intersection_env *closure0)
 {
     struct matches_env *closure = container_of(closure0, struct matches_env, match);
+    jl_method_t *oldmethod = oldentry->func.method;
     assert(oldentry != closure->newentry && "entry already added");
-    assert(oldentry->min_world <= closure->newentry->min_world && "old method cannot be newer than new method");
-    assert(oldentry->max_world == ~(size_t)0 && "method cannot be added at the same time as method deleted");
+    assert(jl_atomic_load_relaxed(&oldentry->min_world) <= jl_atomic_load_relaxed(&closure->newentry->min_world) && "old method cannot be newer than new method");
+    //assert(jl_atomic_load_relaxed(&oldentry->max_world) != jl_atomic_load_relaxed(&closure->newentry->min_world) && "method cannot be added at the same time as method deleted");
+    assert((jl_atomic_load_relaxed(&oldentry->max_world) == ~(size_t)0));
     // don't need to consider other similar methods if this oldentry will always fully intersect with them and dominates all of them
-    typemap_slurp_search(oldentry, &closure->match);
-    jl_method_t *oldmethod = oldentry->func.method;
     if (closure->match.issubty // e.g. jl_subtype(closure->newentry.sig, oldentry->sig)
         && jl_subtype(oldmethod->sig, (jl_value_t*)closure->newentry->sig)) { // e.g. jl_type_equal(closure->newentry->sig, oldentry->sig)
-        closure->replaced = oldentry;
+        if (closure->replaced == NULL || jl_atomic_load_relaxed(&closure->replaced->min_world) < jl_atomic_load_relaxed(&oldentry->min_world))
+            closure->replaced = oldentry; // must pick the newest insertion (both are still valid)
     }
     if (closure->shadowed == NULL)
         closure->shadowed = (jl_value_t*)jl_alloc_vec_any(0);
+    // This should be rarely true (in fact, get_intersect_visitor should be
+    // rarely true), but might as well skip the rest of the scan fast anyways
+    // since we can.
+    if (closure->match.issubty) {
+        int only = jl_atomic_load_relaxed(&oldmethod->dispatch_status) & METHOD_SIG_LATEST_ONLY;
+        if (only) {
+            size_t len = jl_array_nrows(closure->shadowed);
+            if (len > 0)
+                jl_array_del_end((jl_array_t*)closure->shadowed, len);
+            jl_array_ptr_1d_push((jl_array_t*)closure->shadowed, (jl_value_t*)oldmethod);
+            return 0;
+        }
+    }
     jl_array_ptr_1d_push((jl_array_t*)closure->shadowed, (jl_value_t*)oldmethod);
+    typemap_slurp_search(oldentry, &closure->match);
     return 1;
 }
 
-static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t *newentry, jl_typemap_entry_t **replaced, int8_t offs, size_t world)
+static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t *newentry, jl_typemap_entry_t **replaced, size_t world)
 {
     jl_tupletype_t *type = newentry->sig;
     jl_tupletype_t *ttypes = (jl_tupletype_t*)jl_unwrap_unionall((jl_value_t*)type);
@@ -1516,7 +2017,7 @@ static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t
             /* .ti = */ NULL, /* .env = */ jl_emptysvec, /* .issubty = */ 0},
         /* .newentry = */ newentry, /* .shadowed */ NULL, /* .replaced */ NULL};
     JL_GC_PUSH3(&env.match.env, &env.match.ti, &env.shadowed);
-    jl_typemap_intersection_visitor(defs, offs, &env.match);
+    jl_typemap_intersection_visitor(defs, 0, &env.match);
     env.match.env = NULL;
     env.match.ti = NULL;
     *replaced = env.replaced;
@@ -1533,14 +2034,6 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m)
     }
 }
 
-static int is_anonfn_typename(char *name)
-{
-    if (name[0] != '#' || name[1] == '#')
-        return 0;
-    char *other = strrchr(name, '#');
-    return other > &name[1] && other[1] > '0' && other[1] <= '9';
-}
-
 static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue)
 {
     // method overwritten
@@ -1548,7 +2041,7 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue
     jl_module_t *newmod = method->module;
     jl_module_t *oldmod = oldvalue->module;
     jl_datatype_t *dt = jl_nth_argument_datatype(oldvalue->sig, 1);
-    if (dt == (jl_datatype_t*)jl_typeof(jl_kwcall_func))
+    if (jl_kwcall_type && dt == jl_kwcall_type)
         dt = jl_nth_argument_datatype(oldvalue->sig, 3);
     int anon = dt && is_anonfn_typename(jl_symbol_name(dt->name->name));
     if ((jl_options.warn_overwrite == JL_OPTIONS_WARN_OVERWRITE_ON) ||
@@ -1568,22 +2061,26 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue
         jl_printf(s, ".\n");
         jl_uv_flush(s);
     }
-    if (jl_options.incremental && jl_generating_output())
-        jl_printf(JL_STDERR, "  ** incremental compilation may be fatally broken for this module **\n\n");
+    if (jl_generating_output() && jl_options.incremental) {
+        jl_printf(JL_STDERR, "ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.\n");
+        jl_throw(jl_precompilable_error);
+    }
 }
 
-static void update_max_args(jl_methtable_t *mt, jl_value_t *type)
+static void update_max_args(jl_value_t *type)
 {
-    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt || mt == jl_kwcall_mt)
-        return;
     type = jl_unwrap_unionall(type);
+    jl_datatype_t *dt = jl_nth_argument_datatype(type, 1);
+    if (dt == NULL || dt == jl_kwcall_type || jl_is_type_type((jl_value_t*)dt))
+        return;
+    jl_typename_t *tn = dt->name;
     assert(jl_is_datatype(type));
     size_t na = jl_nparams(type);
     if (jl_va_tuple_kind((jl_datatype_t*)type) == JL_VARARG_UNBOUND)
         na--;
-    // update occurs inside mt->writelock
-    if (na > jl_atomic_load_relaxed(&mt->max_args))
-        jl_atomic_store_relaxed(&mt->max_args, na);
+    // update occurs inside global writelock
+    if (na > jl_atomic_load_relaxed(&tn->max_args))
+        jl_atomic_store_relaxed(&tn->max_args, na);
 }
 
 jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED = NULL;
@@ -1602,105 +2099,212 @@ JL_DLLEXPORT jl_value_t *jl_debug_method_invalidation(int state)
     return jl_nothing;
 }
 
-// call external callbacks registered with this method_instance
-static void invalidate_external(jl_method_instance_t *mi, size_t max_world) {
-    jl_array_t *callbacks = mi->callbacks;
-    if (callbacks) {
-        // AbstractInterpreter allows for MethodInstances to be present in non-local caches
-        // inform those caches about the invalidation.
-        JL_TRY {
-            size_t i, l = jl_array_len(callbacks);
-            jl_value_t **args;
-            JL_GC_PUSHARGS(args, 3);
-            // these arguments are constant per call
-            args[1] = (jl_value_t*)mi;
-            args[2] = jl_box_uint32(max_world);
-
-            jl_task_t *ct = jl_current_task;
-            size_t last_age = ct->world_age;
-            ct->world_age = jl_get_world_counter();
-
-            jl_value_t **cbs = (jl_value_t**)jl_array_ptr_data(callbacks);
-            for (i = 0; i < l; i++) {
-                args[0] = cbs[i];
-                jl_apply(args, 3);
-            }
-            ct->world_age = last_age;
-            JL_GC_POP();
-        }
-        JL_CATCH {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "error in invalidation callback: ");
-            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
-            jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-            jlbacktrace(); // written to STDERR_FILENO
-        }
-    }
-}
-
-static void do_nothing_with_codeinst(jl_code_instance_t *ci) {}
+static void _invalidate_backedges(jl_method_instance_t *replaced_mi, jl_code_instance_t *replaced_ci, size_t max_world, int depth);
 
 // recursively invalidate cached methods that had an edge to a replaced method
-static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced, size_t max_world, int depth)
+static void invalidate_code_instance(jl_code_instance_t *replaced, size_t max_world, int depth)
 {
     jl_timing_counter_inc(JL_TIMING_COUNTER_Invalidations, 1);
     if (_jl_debug_method_invalidation) {
         jl_value_t *boxeddepth = NULL;
         JL_GC_PUSH1(&boxeddepth);
-        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced);
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced->def);
         boxeddepth = jl_box_int32(depth);
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, boxeddepth);
         JL_GC_POP();
     }
-    //jl_static_show(JL_STDERR, (jl_value_t*)replaced);
-    if (!jl_is_method(replaced->def.method))
+    //jl_static_show(JL_STDERR, (jl_value_t*)replaced->def);
+    jl_method_instance_t *replaced_mi = jl_get_ci_mi(replaced);
+    if (!jl_is_method(replaced_mi->def.method))
         return; // shouldn't happen, but better to be safe
-    JL_LOCK(&replaced->def.method->writelock);
-    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&replaced->cache);
-    while (codeinst) {
-        if (codeinst->max_world == ~(size_t)0) {
-            assert(codeinst->min_world - 1 <= max_world && "attempting to set illogical world constraints (probable race condition)");
-            codeinst->max_world = max_world;
+    JL_LOCK(&replaced_mi->def.method->writelock);
+    size_t replacedmaxworld = jl_atomic_load_relaxed(&replaced->max_world);
+    if (replacedmaxworld == ~(size_t)0) {
+        assert(jl_atomic_load_relaxed(&replaced->min_world) - 1 <= max_world && "attempting to set illogical world constraints (probable race condition)");
+        jl_atomic_store_release(&replaced->max_world, max_world);
+        // recurse to all backedges to update their valid range also
+        _invalidate_backedges(replaced_mi, replaced, max_world, depth + 1);
+        // TODO: should we visit all forward edges now and delete ourself from all of those lists too?
+    }
+    else {
+        assert(jl_atomic_load_relaxed(&replaced->max_world) <= max_world);
+    }
+    JL_UNLOCK(&replaced_mi->def.method->writelock);
+}
+
+JL_DLLEXPORT void jl_invalidate_code_instance(jl_code_instance_t *replaced, size_t max_world)
+{
+    invalidate_code_instance(replaced, max_world, 1);
+}
+
+JL_DLLEXPORT void jl_maybe_log_binding_invalidation(jl_value_t *replaced)
+{
+    if (_jl_debug_method_invalidation) {
+        if (replaced) {
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, replaced);
         }
-        assert(codeinst->max_world <= max_world);
-        JL_GC_PUSH1(&codeinst);
-        (*f)(codeinst);
+        jl_value_t *loctag = jl_cstr_to_string("jl_maybe_log_binding_invalidation");
+        JL_GC_PUSH1(&loctag);
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
         JL_GC_POP();
-        codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
-    // recurse to all backedges to update their valid range also
-    jl_array_t *backedges = replaced->backedges;
-    if (backedges) {
-        JL_GC_PUSH1(&backedges);
-        replaced->backedges = NULL;
-        size_t i = 0, l = jl_array_len(backedges);
-        jl_method_instance_t *replaced;
-        while (i < l) {
-            i = get_next_edge(backedges, i, NULL, &replaced);
-            invalidate_method_instance(f, replaced, max_world, depth + 1);
+}
+
+static void _invalidate_backedges(jl_method_instance_t *replaced_mi, jl_code_instance_t *replaced_ci, size_t max_world, int depth) {
+    uint8_t recursion_flags = 0;
+    jl_array_t *backedges = jl_mi_get_backedges_mutate(replaced_mi, &recursion_flags);
+    if (!backedges)
+        return;
+    // invalidate callers (if any)
+    if (!replaced_ci) {
+        // We know all backedges are deleted - clear them eagerly
+        // Clears both array and flags
+        replaced_mi->backedges = NULL;
+        jl_atomic_fetch_and_relaxed(&replaced_mi->flags, ~MI_FLAG_BACKEDGES_ALL);
+    }
+    JL_GC_PUSH1(&backedges);
+    size_t i = 0, l = jl_array_nrows(backedges);
+    size_t ins = 0;
+    jl_code_instance_t *replaced;
+    while (i < l) {
+        jl_value_t *invokesig = NULL;
+        i = get_next_edge(backedges, i, &invokesig, &replaced);
+        if (!replaced) {
+            ins = i;
+            continue;
+        }
+        JL_GC_PROMISE_ROOTED(replaced); // propagated by get_next_edge from backedges
+        if (replaced_ci) {
+            // If we're invalidating a particular codeinstance, only invalidate
+            // this backedge it actually has an edge for our codeinstance.
+            jl_svec_t *edges = jl_atomic_load_relaxed(&replaced->edges);
+            for (size_t j = 0; j < jl_svec_len(edges); ++j) {
+                jl_value_t *edge = jl_svecref(edges, j);
+                if (edge == (jl_value_t*)replaced_mi || edge == (jl_value_t*)replaced_ci)
+                    goto found;
+            }
+            ins = set_next_edge(backedges, ins, invokesig, replaced);
+            continue;
+        found:;
+            ins = clear_next_edge(backedges, ins, invokesig, replaced);
+            jl_atomic_fetch_or(&replaced_mi->flags, MI_FLAG_BACKEDGES_DIRTY);
+            /* fallthrough */
+        }
+        invalidate_code_instance(replaced, max_world, depth);
+        if (replaced_ci && !replaced_mi->backedges) {
+            // Fast-path early out. If `invalidate_code_instance` invalidated
+            // the entire mi via a recursive edge, there's no point to keep
+            // iterating - they'll already have been invalidated.
+            break;
         }
-        JL_GC_POP();
     }
-    JL_UNLOCK(&replaced->def.method->writelock);
+    if (replaced_ci)
+        jl_mi_done_backedges(replaced_mi, recursion_flags);
+    JL_GC_POP();
 }
 
-// invalidate cached methods that overlap this definition
-static void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
+static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **isect JL_REQUIRE_ROOTED_SLOT, jl_value_t **isect2 JL_REQUIRE_ROOTED_SLOT)
 {
-    JL_LOCK(&replaced_mi->def.method->writelock);
-    jl_array_t *backedges = replaced_mi->backedges;
-    //jl_static_show(JL_STDERR, (jl_value_t*)replaced_mi);
-    if (backedges) {
-        // invalidate callers (if any)
-        replaced_mi->backedges = NULL;
-        JL_GC_PUSH1(&backedges);
-        size_t i = 0, l = jl_array_len(backedges);
-        jl_method_instance_t *replaced;
-        while (i < l) {
-            i = get_next_edge(backedges, i, NULL, &replaced);
-            invalidate_method_instance(f, replaced, max_world, 1);
+    *isect2 = NULL;
+    int is_subty = 0;
+    *isect = jl_type_intersection_env_s(t1, t2, NULL, &is_subty);
+    if (*isect == jl_bottom_type)
+        return 0;
+    if (is_subty)
+        return 1;
+    // TODO: sometimes type intersection returns types with free variables
+    if (jl_has_free_typevars(t1) || jl_has_free_typevars(t2))
+        return 1;
+    // determine if type-intersection can be convinced to give a better, non-bad answer
+    // if the intersection was imprecise, see if we can do better by switching the types
+    *isect2 = jl_type_intersection(t2, t1);
+    if (*isect2 == jl_bottom_type) {
+        *isect = jl_bottom_type;
+        *isect2 = NULL;
+        return 0;
+    }
+    if (jl_types_egal(*isect2, *isect)) {
+        *isect2 = NULL;
+    }
+    return 1;
+}
+
+
+// check if `type` is replacing `m` with an ambiguity here, given other methods in `d` that already match it
+static int is_replacing(char ambig, jl_value_t *type, jl_method_t *m, jl_method_t *const *d, size_t n, jl_value_t *isect, jl_value_t *isect2, char *morespec)
+{
+    size_t k;
+    for (k = 0; k < n; k++) {
+        jl_method_t *m2 = d[k];
+        // see if m2 also fully covered this intersection
+        if (m == m2 || !(jl_subtype(isect, m2->sig) || (isect2 && jl_subtype(isect2, m2->sig))))
+            continue;
+        if (morespec[k])
+            // not actually shadowing this--m2 will still be better
+            return 0;
+        // if type is not more specific than m (thus now dominating it)
+        // then there is a new ambiguity here,
+        // since m2 was also a previous match over isect,
+        // see if m was previously dominant over all m2
+        // or if this was already ambiguous before
+        if (ambig && !jl_type_morespecific(m->sig, m2->sig)) {
+            // m and m2 were previously ambiguous over the full intersection of mi with type, and will still be ambiguous with addition of type
+            return 0;
+        }
+    }
+    return 1;
+}
+
+static int _invalidate_dispatch_backedges(jl_method_instance_t *mi, jl_value_t *type, jl_method_t *m,
+        jl_method_t *const *d, size_t n, int replaced_dispatch, int ambig,
+        size_t max_world, char *morespec)
+{
+    uint8_t backedge_recursion_flags = 0;
+    jl_array_t *backedges = jl_mi_get_backedges_mutate(mi, &backedge_recursion_flags);
+    if (!backedges)
+        return 0;
+    size_t ib = 0, insb = 0, nb = jl_array_nrows(backedges);
+    jl_value_t *invokeTypes;
+    jl_code_instance_t *caller;
+    int invalidated_any = 0;
+    while (mi->backedges && ib < nb) {
+        ib = get_next_edge(backedges, ib, &invokeTypes, &caller);
+        if (!caller) {
+            insb = ib;
+            continue;
+        }
+        JL_GC_PROMISE_ROOTED(caller); // propagated by get_next_edge from backedges
+        int replaced_edge;
+        if (invokeTypes) {
+            // n.b. normally we must have mi.specTypes <: invokeTypes <: m.sig (though it might not strictly hold), so we only need to check the other subtypes
+            if (jl_egal(invokeTypes, jl_get_ci_mi(caller)->def.method->sig))
+                replaced_edge = 0; // if invokeTypes == m.sig, then the only way to change this invoke is to replace the method itself
+            else
+                replaced_edge = jl_subtype(invokeTypes, type) && is_replacing(ambig, type, m, d, n, invokeTypes, NULL, morespec);
+        }
+        else {
+            replaced_edge = replaced_dispatch;
+        }
+        if (replaced_edge) {
+            invalidate_code_instance(caller, max_world, 1);
+            insb = clear_next_edge(backedges, insb, invokeTypes, caller);
+            jl_atomic_fetch_or(&mi->flags, MI_FLAG_BACKEDGES_DIRTY);
+            invalidated_any = 1;
+        }
+        else {
+            insb = set_next_edge(backedges, insb, invokeTypes, caller);
         }
-        JL_GC_POP();
     }
+    jl_mi_done_backedges(mi, backedge_recursion_flags);
+    return invalidated_any;
+}
+
+// invalidate cached methods that overlap this definition
+static void invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
+{
+    // Reset dispatch_status when method instance is replaced
+    JL_LOCK(&replaced_mi->def.method->writelock);
+    _invalidate_backedges(replaced_mi, NULL, max_world, 1);
     JL_UNLOCK(&replaced_mi->def.method->writelock);
     if (why && _jl_debug_method_invalidation) {
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced_mi);
@@ -1709,88 +2313,190 @@ static void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_insta
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
         JL_GC_POP();
     }
+    jl_atomic_store_relaxed(&replaced_mi->dispatch_status, 0);
 }
 
 // add a backedge from callee to caller
-JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_method_instance_t *caller)
+JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_code_instance_t *caller)
 {
-    JL_LOCK(&callee->def.method->writelock);
+    if (!jl_atomic_load_relaxed(&allow_new_worlds))
+        return;
     if (invokesig == jl_nothing)
         invokesig = NULL;      // julia uses `nothing` but C uses NULL (#undef)
-    int found = 0;
-    // TODO: use jl_cache_type_(invokesig) like cache_method does to save memory
-    if (!callee->backedges) {
-        // lazy-init the backedges array
-        callee->backedges = jl_alloc_vec_any(0);
-        jl_gc_wb(callee, callee->backedges);
+    assert(jl_is_method_instance(callee));
+    assert(jl_is_code_instance(caller));
+    assert(invokesig == NULL || jl_is_type(invokesig));
+    JL_LOCK(&callee->def.method->writelock);
+    if (jl_atomic_load_relaxed(&allow_new_worlds)) {
+        jl_array_t *backedges = jl_mi_get_backedges(callee);
+        // TODO: use jl_cache_type_(invokesig) like cache_method does to save memory
+        if (!backedges) {
+            // lazy-init the backedges array
+            backedges = jl_alloc_vec_any(0);
+            callee->backedges = backedges;
+            jl_gc_wb(callee, backedges);
+        }
+        push_edge(backedges, invokesig, caller);
     }
-    else {
-        size_t i = 0, l = jl_array_len(callee->backedges);
-        for (i = 0; i < l; i++) {
-            // optimized version of while (i < l) i = get_next_edge(callee->backedges, i, &invokeTypes, &mi);
-            jl_value_t *mi = jl_array_ptr_ref(callee->backedges, i);
-            if (mi != (jl_value_t*)caller)
-                continue;
-            jl_value_t *invokeTypes = i > 0 ? jl_array_ptr_ref(callee->backedges, i - 1) : NULL;
-            if (invokeTypes && jl_is_method_instance(invokeTypes))
-                invokeTypes = NULL;
-            if ((invokesig == NULL && invokeTypes == NULL) ||
-                (invokesig && invokeTypes && jl_types_equal(invokesig, invokeTypes))) {
-                found = 1;
+    JL_UNLOCK(&callee->def.method->writelock);
+}
+
+
+static int jl_foreach_top_typename_for(void (*f)(jl_typename_t*, int, void*), jl_value_t *argtypes JL_PROPAGATES_ROOT, int all_subtypes, void *env);
+
+struct _typename_add_backedge {
+    jl_value_t *typ;
+    jl_value_t *caller;
+};
+
+static void _typename_add_backedge(jl_typename_t *tn, int explct, void *env0)
+{
+    struct _typename_add_backedge *env = (struct _typename_add_backedge*)env0;
+    JL_GC_PROMISE_ROOTED(env->typ);
+    JL_GC_PROMISE_ROOTED(env->caller);
+    if (!explct)
+        return;
+    jl_genericmemory_t *allbackedges = jl_method_table->backedges;
+    jl_array_t *backedges = (jl_array_t*)jl_eqtable_get(allbackedges, (jl_value_t*)tn, NULL);
+    if (backedges == NULL) {
+        backedges = jl_alloc_vec_any(2);
+        JL_GC_PUSH1(&backedges);
+        jl_array_del_end(backedges, 2);
+        jl_genericmemory_t *newtable = jl_eqtable_put(allbackedges, (jl_value_t*)tn, (jl_value_t*)backedges, NULL);
+        JL_GC_POP();
+        if (newtable != allbackedges) {
+            jl_method_table->backedges = newtable;
+            jl_gc_wb(jl_method_table, newtable);
+        }
+    }
+    // check if the edge is already present and avoid adding a duplicate
+    size_t i, l = jl_array_nrows(backedges);
+    // reuse an already cached instance of this type, if possible
+    // TODO: use jl_cache_type_(tt) like cache_method does, instead of this linear scan?
+    // TODO: use as_global_root and de-dup edges array too
+    for (i = 1; i < l; i += 2) {
+        if (jl_array_ptr_ref(backedges, i) == env->caller) {
+            if (jl_types_equal(jl_array_ptr_ref(backedges, i - 1), env->typ)) {
+                env->typ = jl_array_ptr_ref(backedges, i - 1);
+                return; // this edge already recorded
+            }
+        }
+    }
+    for (i = 1; i < l; i += 2) {
+        if (jl_array_ptr_ref(backedges, i) != env->caller) {
+            if (jl_types_equal(jl_array_ptr_ref(backedges, i - 1), env->typ)) {
+                env->typ = jl_array_ptr_ref(backedges, i - 1);
                 break;
             }
         }
     }
-    if (!found)
-        push_edge(callee->backedges, invokesig, caller);
-    JL_UNLOCK(&callee->def.method->writelock);
+    jl_array_ptr_1d_push(backedges, env->typ);
+    jl_array_ptr_1d_push(backedges, env->caller);
 }
 
 // add a backedge from a non-existent signature to caller
-JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller)
+JL_DLLEXPORT void jl_method_table_add_backedge(jl_value_t *typ, jl_code_instance_t *caller)
 {
-    JL_LOCK(&mt->writelock);
-    if (!mt->backedges) {
-        // lazy-init the backedges array
-        mt->backedges = jl_alloc_vec_any(2);
-        jl_gc_wb(mt, mt->backedges);
-        jl_array_ptr_set(mt->backedges, 0, typ);
-        jl_array_ptr_set(mt->backedges, 1, caller);
+    assert(jl_is_code_instance(caller));
+    if (!jl_atomic_load_relaxed(&allow_new_worlds))
+        return;
+    // try to pick the best cache(s) for this typ edge
+    jl_methtable_t *mt = jl_method_table;
+    jl_methcache_t *mc = mt->cache;
+    JL_LOCK(&mc->writelock);
+    if (jl_atomic_load_relaxed(&allow_new_worlds)) {
+        struct _typename_add_backedge env = {typ, (jl_value_t*)caller};
+        jl_foreach_top_typename_for(_typename_add_backedge, typ, 0, &env);
     }
-    else {
-        // TODO: use jl_cache_type_(tt) like cache_method does, instead of a linear scan
-        size_t i, l = jl_array_len(mt->backedges);
-        for (i = 1; i < l; i += 2) {
-            if (jl_types_equal(jl_array_ptr_ref(mt->backedges, i - 1), typ)) {
-                if (jl_array_ptr_ref(mt->backedges, i) == caller) {
-                    JL_UNLOCK(&mt->writelock);
-                    return;
+    JL_UNLOCK(&mc->writelock);
+}
+
+struct _typename_invalidate_backedge {
+    jl_value_t *type;
+    jl_value_t **isect;
+    jl_value_t **isect2;
+    jl_method_t *const *d;
+    size_t n;
+    size_t max_world;
+    int invalidated;
+};
+
+static void _typename_invalidate_backedges(jl_typename_t *tn, int explct, void *env0)
+{
+    struct _typename_invalidate_backedge *env = (struct _typename_invalidate_backedge*)env0;
+    JL_GC_PROMISE_ROOTED(env->type);
+    JL_GC_PROMISE_ROOTED(env->isect); // isJuliaType considers jl_value_t** to be a julia object too
+    JL_GC_PROMISE_ROOTED(env->isect2); // isJuliaType considers jl_value_t** to be a julia object too
+    jl_array_t *backedges = (jl_array_t*)jl_eqtable_get(jl_method_table->backedges, (jl_value_t*)tn, NULL);
+    if (backedges == NULL)
+        return;
+    jl_value_t **d = jl_array_ptr_data(backedges);
+    size_t i, na = jl_array_nrows(backedges);
+    size_t ins = 0;
+    for (i = 1; i < na; i += 2) {
+        jl_value_t *backedgetyp = d[i - 1];
+        JL_GC_PROMISE_ROOTED(backedgetyp);
+        int missing = 0;
+        if (jl_type_intersection2(backedgetyp, (jl_value_t*)env->type, env->isect, env->isect2)) {
+            // See if the intersection was actually already fully
+            // covered, but that the new method is ambiguous.
+            //  -> no previous method: now there is one, need to update the missing edge
+            //  -> one+ previously matching method(s):
+            //    -> more specific then all of them: need to update the missing edge
+            //      -> some may have been ambiguous: now there is a replacement
+            //      -> some may have been called: now there is a replacement (also will be detected in the loop later)
+            //    -> less specific or ambiguous with any one of them: can ignore the missing edge (not missing)
+            //      -> some may have been ambiguous: still are
+            //      -> some may have been called: they may be partly replaced (will be detected in the loop later)
+            // c.f. `is_replacing`, which is a similar query, but with an existing method match to compare against
+            missing = 1;
+            for (size_t j = 0; j < env->n; j++) {
+                jl_method_t *m = env->d[j];
+                JL_GC_PROMISE_ROOTED(m);
+                if (jl_subtype(*env->isect, m->sig) || (*env->isect2 && jl_subtype(*env->isect2, m->sig))) {
+                    // We now know that there actually was a previous
+                    // method for this part of the type intersection.
+                    if (!jl_type_morespecific(env->type, m->sig)) {
+                        missing = 0;
+                        break;
+                    }
                 }
-                // reuse the already cached instance of this type
-                typ = jl_array_ptr_ref(mt->backedges, i - 1);
             }
         }
-        jl_array_ptr_1d_push(mt->backedges, typ);
-        jl_array_ptr_1d_push(mt->backedges, caller);
+        *env->isect = *env->isect2 = NULL;
+        if (missing) {
+            jl_code_instance_t *backedge = (jl_code_instance_t*)d[i];
+            JL_GC_PROMISE_ROOTED(backedge);
+            invalidate_code_instance(backedge, env->max_world, 0);
+            env->invalidated = 1;
+            if (_jl_debug_method_invalidation)
+                jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)backedgetyp);
+        }
+        else {
+            d[ins++] = d[i - 1];
+            d[ins++] = d[i - 0];
+        }
     }
-    JL_UNLOCK(&mt->writelock);
+    if (ins == 0)
+        jl_eqtable_pop(jl_method_table->backedges, (jl_value_t*)tn, NULL, NULL);
+    else if (na != ins)
+        jl_array_del_end(backedges, na - ins);
 }
 
 struct invalidate_mt_env {
     jl_typemap_entry_t *newentry;
     jl_array_t *shadowed;
     size_t max_world;
-    int invalidated;
 };
 static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
 {
     struct invalidate_mt_env *env = (struct invalidate_mt_env*)closure0;
     JL_GC_PROMISE_ROOTED(env->newentry);
-    if (oldentry->max_world == ~(size_t)0) {
+    if (jl_atomic_load_relaxed(&oldentry->max_world) == ~(size_t)0) {
         jl_method_instance_t *mi = oldentry->func.linfo;
         int intersects = 0;
         jl_method_instance_t **d = (jl_method_instance_t**)jl_array_ptr_data(env->shadowed);
-        size_t i, n = jl_array_len(env->shadowed);
+        size_t i, n = jl_array_nrows(env->shadowed);
         for (i = 0; i < n; i++) {
             if (mi == d[i]) {
                 intersects = 1;
@@ -1821,20 +2527,24 @@ static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
                 jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
                 JL_GC_POP();
             }
-            oldentry->max_world = env->max_world;
-            env->invalidated = 1;
+            jl_atomic_store_relaxed(&oldentry->max_world, env->max_world);
         }
     }
     return 1;
 }
+
+struct disable_mt_env {
+    jl_method_t *replaced;
+    size_t max_world;
+};
 static int disable_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
 {
-    struct invalidate_mt_env *env = (struct invalidate_mt_env*)closure0;
-    if (oldentry->max_world < ~(size_t)0)
+    struct disable_mt_env *env = (struct disable_mt_env*)closure0;
+    if (jl_atomic_load_relaxed(&oldentry->max_world) < ~(size_t)0)
         return 1;
     jl_method_t *m = oldentry->func.linfo->def.method;
-    if (m == env->newentry->func.method)
-        oldentry->max_world = env->max_world;
+    if (m == env->replaced)
+        jl_atomic_store_relaxed(&oldentry->max_world, env->max_world);
     return 1;
 }
 
@@ -1847,61 +2557,61 @@ static int typemap_search(jl_typemap_entry_t *entry, void *closure)
     return 1;
 }
 
-static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT;
-
-#ifndef __clang_gcanalyzer__ /* in general, jl_typemap_visitor could be a safepoint, but not for typemap_search */
-static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT {
+static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) {
     jl_value_t *closure = (jl_value_t*)(method);
     if (jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), typemap_search, &closure))
         jl_error("method not in method table");
     return (jl_typemap_entry_t *)closure;
 }
-#endif
 
-static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *methodentry, size_t max_world)
+static void _method_table_invalidate(jl_methcache_t *mc, void *env0)
 {
-    jl_method_t *method = methodentry->func.method;
-    assert(!method->is_for_opaque_closure);
-    method->deleted_world = methodentry->max_world = max_world;
-    // drop this method from mt->cache
-    struct invalidate_mt_env mt_cache_env;
-    mt_cache_env.max_world = max_world;
-    mt_cache_env.newentry = methodentry;
-    mt_cache_env.shadowed = NULL;
-    mt_cache_env.invalidated = 0;
-    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->cache), disable_mt_cache, (void*)&mt_cache_env);
-    jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-    size_t i, l = jl_array_len(leafcache);
+    // drop this method from mc->cache
+    jl_typemap_visitor(jl_atomic_load_relaxed(&mc->cache), disable_mt_cache, env0);
+    jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mc->leafcache);
+    size_t i, l = leafcache->length;
     for (i = 1; i < l; i += 2) {
-        jl_typemap_entry_t *oldentry = (jl_typemap_entry_t*)jl_array_ptr_ref(leafcache, i);
+        jl_typemap_entry_t *oldentry = (jl_typemap_entry_t*)jl_genericmemory_ptr_ref(leafcache, i);
         if (oldentry) {
             while ((jl_value_t*)oldentry != jl_nothing) {
-                if (oldentry->max_world == ~(size_t)0)
-                    oldentry->max_world = mt_cache_env.max_world;
+                disable_mt_cache(oldentry, env0);
                 oldentry = jl_atomic_load_relaxed(&oldentry->next);
             }
         }
     }
+}
+
+static void jl_method_table_invalidate(jl_method_t *replaced, size_t max_world)
+{
+    if (jl_options.incremental && jl_generating_output())
+        jl_error("Method deletion is not possible during Module precompile.");
+    assert(!replaced->is_for_opaque_closure);
+    assert(jl_atomic_load_relaxed(&jl_world_counter) == max_world);
     // Invalidate the backedges
     int invalidated = 0;
-    jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&replaced->specializations);
     JL_GC_PUSH1(&specializations);
     if (!jl_is_svec(specializations))
         specializations = (jl_value_t*)jl_svec1(specializations);
-    l = jl_svec_len(specializations);
+    size_t i, l = jl_svec_len(specializations);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
         if ((jl_value_t*)mi != jl_nothing) {
             invalidated = 1;
-            invalidate_external(mi, max_world);
-            invalidate_backedges(&do_nothing_with_codeinst, mi, max_world, "jl_method_table_disable");
+            invalidate_backedges(mi, max_world, "jl_method_table_disable");
         }
     }
+
+    jl_methtable_t *mt = jl_method_get_table(replaced);
+    struct disable_mt_env mt_cache_env;
+    mt_cache_env.max_world = max_world;
+    mt_cache_env.replaced = replaced;
+    _method_table_invalidate(mt->cache, &mt_cache_env);
     JL_GC_POP();
     // XXX: this might have resolved an ambiguity, for which we have not tracked the edge here,
     // and thus now introduce a mistake into inference
     if (invalidated && _jl_debug_method_invalidation) {
-        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method);
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced);
         jl_value_t *loctag = jl_cstr_to_string("jl_method_table_disable");
         JL_GC_PUSH1(&loctag);
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
@@ -1909,185 +2619,403 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
     }
 }
 
-JL_DLLEXPORT void jl_method_table_disable(jl_methtable_t *mt, jl_method_t *method)
+static int erase_method_backedges(jl_typemap_entry_t *def, void *closure)
 {
-    if (jl_options.incremental && jl_generating_output())
-        jl_printf(JL_STDERR, "WARNING: method deletion during Module precompile may lead to undefined behavior"
-                             "\n  ** incremental compilation may be fatally broken for this module **\n\n");
+    jl_method_t *method = def->func.method;
+    JL_LOCK(&method->writelock);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+    if (jl_is_svec(specializations)) {
+        size_t i, l = jl_svec_len(specializations);
+        for (i = 0; i < l; i++) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
+            if ((jl_value_t*)mi != jl_nothing) {
+                mi->backedges = 0;
+            }
+        }
+    }
+    else {
+        jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+        mi->backedges = 0;
+    }
+    JL_UNLOCK(&method->writelock);
+    return 1;
+}
+
+static int erase_all_backedges(jl_methtable_t *mt, void *env)
+{
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), erase_method_backedges, env);
+}
+
+JL_DLLEXPORT void jl_disable_new_worlds(void)
+{
+    if (jl_generating_output())
+        jl_error("Disabling Method changes is not possible when generating output.");
+    JL_LOCK(&world_counter_lock);
+    jl_atomic_store_relaxed(&allow_new_worlds, 0);
+    JL_UNLOCK(&world_counter_lock);
+    jl_array_t *mod_array = jl_get_loaded_modules();
+    JL_GC_PUSH1(&mod_array);
+    jl_foreach_reachable_mtable(erase_all_backedges, mod_array, (void*)NULL);
+
+    JL_LOCK(&jl_method_table->cache->writelock);
+    jl_method_table->backedges = (jl_genericmemory_t*)jl_an_empty_memory_any;
+    JL_UNLOCK(&jl_method_table->cache->writelock);
+    JL_GC_POP();
+}
+
+JL_DLLEXPORT void jl_method_table_disable(jl_method_t *method)
+{
+    jl_methtable_t *mt = jl_method_get_table(method);
     jl_typemap_entry_t *methodentry = do_typemap_search(mt, method);
-    JL_LOCK(&mt->writelock);
-    // Narrow the world age on the method to make it uncallable
-    size_t world = jl_atomic_fetch_add(&jl_world_counter, 1);
-    jl_method_table_invalidate(mt, methodentry, world);
-    JL_UNLOCK(&mt->writelock);
+    JL_LOCK(&world_counter_lock);
+    if (!jl_atomic_load_relaxed(&allow_new_worlds))
+        jl_error("Method changes have been disabled via a call to disable_new_worlds.");
+    int enabled = jl_atomic_load_relaxed(&methodentry->max_world) == ~(size_t)0;
+    if (enabled) {
+        // Narrow the world age on the method to make it uncallable
+        size_t world = jl_atomic_load_relaxed(&jl_world_counter);
+        assert(method == methodentry->func.method);
+        jl_atomic_store_relaxed(&method->dispatch_status, 0);
+        assert(jl_atomic_load_relaxed(&methodentry->max_world) == ~(size_t)0);
+        jl_atomic_store_relaxed(&methodentry->max_world, world);
+        jl_method_table_invalidate(method, world);
+        jl_atomic_store_release(&jl_world_counter, world + 1);
+    }
+    JL_UNLOCK(&world_counter_lock);
+    if (!enabled)
+        jl_errorf("Method of %s already disabled", jl_symbol_name(method->name));
 }
 
-static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **isect JL_REQUIRE_ROOTED_SLOT, jl_value_t **isect2 JL_REQUIRE_ROOTED_SLOT)
+jl_typemap_entry_t *jl_method_table_add(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
 {
-    *isect2 = NULL;
-    int is_subty = 0;
-    *isect = jl_type_intersection_env_s(t1, t2, NULL, &is_subty);
-    if (*isect == jl_bottom_type)
-        return 0;
-    if (is_subty)
-        return 1;
-    // TODO: sometimes type intersection returns types with free variables
-    if (jl_has_free_typevars(t1) || jl_has_free_typevars(t2))
-        return 1;
-    // determine if type-intersection can be convinced to give a better, non-bad answer
-    // if the intersection was imprecise, see if we can do better by switching the types
-    *isect2 = jl_type_intersection(t2, t1);
-    if (*isect2 == jl_bottom_type) {
-        *isect = jl_bottom_type;
-        *isect2 = NULL;
-        return 0;
+    JL_TIMING(ADD_METHOD, ADD_METHOD);
+    assert(jl_is_method(method));
+    assert(jl_is_mtable(mt));
+    jl_timing_show_method(method, JL_TIMING_DEFAULT_BLOCK);
+    jl_typemap_entry_t *newentry = NULL;
+    JL_GC_PUSH1(&newentry);
+    // add our new entry
+    assert(jl_atomic_load_relaxed(&method->primary_world) == ~(size_t)0); // min-world
+    assert((jl_atomic_load_relaxed(&method->dispatch_status) & METHOD_SIG_LATEST_WHICH) == 0);
+    assert((jl_atomic_load_relaxed(&method->dispatch_status) & METHOD_SIG_LATEST_ONLY) == 0);
+    JL_LOCK(&mt->cache->writelock);
+    newentry = jl_typemap_alloc((jl_tupletype_t*)method->sig, simpletype, jl_emptysvec, (jl_value_t*)method, ~(size_t)0, 1);
+    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0);
+
+    if (mt == jl_method_table)
+        update_max_args(method->sig);
+    JL_UNLOCK(&mt->cache->writelock);
+    JL_GC_POP();
+    return newentry;
+}
+
+static int has_key(jl_genericmemory_t *keys, jl_value_t *key)
+{
+    for (size_t l = keys->length, i = 0; i < l; i++) {
+        jl_value_t *k = jl_genericmemory_ptr_ref(keys, i);
+        if (k == NULL)
+            return 0;
+        if (jl_genericmemory_ptr_ref(keys, i) == key)
+            return 1;
     }
-    if (jl_types_egal(*isect2, *isect)) {
-        *isect2 = NULL;
+    return 0;
+}
+
+// Check if m2 is in m1's interferences set, which means !morespecific(m1, m2)
+static int method_in_interferences(jl_method_t *m2, jl_method_t *m1)
+{
+    return has_key(jl_atomic_load_relaxed(&m1->interferences), (jl_value_t*)m2);
+}
+
+// Find the index of a method in the method match array
+static int find_method_in_matches(jl_array_t *t, jl_method_t *method)
+{
+    size_t len = jl_array_nrows(t);
+    for (size_t i = 0; i < len; i++) {
+        jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, i);
+        if (matc->method == method)
+            return i;
     }
-    return 1;
+    return -1;
 }
 
-enum morespec_options {
-    morespec_unknown,
-    morespec_isnot,
-    morespec_is
-};
+// Recursively check if any method in interferences covers the given type signature
+static int check_interferences_covers(jl_method_t *m, jl_value_t *ti, jl_array_t *t, arraylist_t *visited, arraylist_t *seen)
+{
+    arraylist_t workqueue;
+    arraylist_new(&workqueue, 0);
+    arraylist_push(&workqueue, m);
+    arraylist_push(seen, (void*)m);
+    int result = 0;
+    while (workqueue.len > 0) {
+        jl_method_t *current_m = (jl_method_t*)arraylist_pop(&workqueue);
+        JL_GC_PROMISE_ROOTED(current_m);
+        jl_genericmemory_t *interferences = jl_atomic_load_relaxed(&current_m->interferences);
+        for (size_t i = 0; i < interferences->length; i++) {
+            jl_method_t *m2 = (jl_method_t*)jl_genericmemory_ptr_ref(interferences, i);
+            if (m2 == NULL)
+                continue;
+            // Check if we already visited this method
+            int in_seen = 0;
+            for (size_t i = 0; i < seen->len; i++) {
+                if (seen->items[i] == (void*)m2) {
+                    in_seen = 1;
+                    break;
+                }
+            }
+            if (in_seen)
+                continue;
+            arraylist_push(seen, (void*)m2);
+            int idx = find_method_in_matches(t, m2);
+            if (idx < 0)
+                continue;
+            if (method_in_interferences(current_m, m2))
+                continue; // ambiguous
+            assert(visited->items[idx] != (void*)0);
+            if (visited->items[idx] != (void*)1)
+                continue; // part of the same SCC cycle (handled by ambiguity later)
+            if (jl_subtype(ti, m2->sig)) {
+                result = 1;
+                goto cleanup;
+            }
+            arraylist_push(&workqueue, m2);
+        }
+    }
+cleanup:
+    seen->len = 0;
+    arraylist_free(&workqueue);
+    return result;
+}
 
-// check if `type` is replacing `m` with an ambiguity here, given other methods in `d` that already match it
-static int is_replacing(char ambig, jl_value_t *type, jl_method_t *m, jl_method_t *const *d, size_t n, jl_value_t *isect, jl_value_t *isect2, char *morespec)
+static int check_fully_ambiguous(jl_method_t *m, jl_value_t *ti, jl_array_t *t, int include_ambiguous, int *has_ambiguity)
 {
-    size_t k;
-    for (k = 0; k < n; k++) {
-        jl_method_t *m2 = d[k];
-        // see if m2 also fully covered this intersection
-        if (m == m2 || !(jl_subtype(isect, m2->sig) || (isect2 && jl_subtype(isect2, m2->sig))))
+    jl_genericmemory_t *interferences = jl_atomic_load_relaxed(&m->interferences);
+    for (size_t i = 0; i < interferences->length; i++) {
+        jl_method_t *m2 = (jl_method_t*)jl_genericmemory_ptr_ref(interferences, i);
+        if (m2 == NULL)
             continue;
-        if (morespec[k] == (char)morespec_unknown)
-            morespec[k] = (char)(jl_type_morespecific(m2->sig, type) ? morespec_is : morespec_isnot);
-        if (morespec[k] == (char)morespec_is)
-            // not actually shadowing this--m2 will still be better
-            return 0;
-        // if type is not more specific than m (thus now dominating it)
-        // then there is a new ambiguity here,
-        // since m2 was also a previous match over isect,
-        // see if m was previously dominant over all m2
-        // or if this was already ambiguous before
-        if (ambig != morespec_is && !jl_type_morespecific(m->sig, m2->sig)) {
-            // m and m2 were previously ambiguous over the full intersection of mi with type, and will still be ambiguous with addition of type
-            return 0;
+        int idx = find_method_in_matches(t, m2);
+        if (idx < 0)
+            continue;
+        if (!method_in_interferences(m, m2))
+            continue;
+        *has_ambiguity = 1;
+        if (!include_ambiguous && jl_subtype(ti, m2->sig))
+            return 1;
+    }
+    return 0;
+}
+
+// Recursively check if target_method is in the interferences of (morespecific than) start_method, but not the reverse
+static int method_morespecific_via_interferences(jl_method_t *target_method, jl_method_t *start_method)
+{
+    if (target_method == start_method)
+        return 0;
+    // Check direct interferences first
+    if (method_in_interferences(start_method, target_method))
+        return 0;
+    if (method_in_interferences(target_method, start_method))
+        return 1;
+    arraylist_t seen;
+    arraylist_t workqueue;
+    arraylist_new(&seen, 0);
+    arraylist_push(&seen, (void*)start_method);
+    arraylist_new(&workqueue, 0);
+    arraylist_push(&workqueue, start_method);
+    int result = 0;
+    while (workqueue.len > 0) {
+        jl_method_t *current = (jl_method_t*)arraylist_pop(&workqueue);
+        JL_GC_PROMISE_ROOTED(current);
+        jl_genericmemory_t *interferences = jl_atomic_load_relaxed(&current->interferences);
+        for (size_t i = 0; i < interferences->length; i++) {
+            jl_method_t *interference_method = (jl_method_t*)jl_genericmemory_ptr_ref(interferences, i);
+            if (interference_method == NULL)
+                continue;
+            // Check if we're already visiting this interference method (cycle prevention)
+            int already_seen = 0;
+            for (size_t j = 0; j < seen.len; j++) {
+                if (seen.items[j] == (void*)interference_method) {
+                    already_seen = 1;
+                    break;
+                }
+            }
+            if (already_seen)
+                continue;
+            arraylist_push(&seen, interference_method);
+            if (method_in_interferences(current, interference_method))
+                continue; // only follow edges to morespecific methods in search of morespecific target (skip ambiguities)
+            // Check direct interferences for this interference method
+            if (method_in_interferences(interference_method, target_method))
+                continue; // return 0 for this path
+            if (method_in_interferences(target_method, interference_method)) {
+                result = 1;
+                goto cleanup;
+            }
+            arraylist_push(&workqueue, interference_method);
         }
     }
-    return 1;
+cleanup:
+    arraylist_free(&workqueue);
+    arraylist_free(&seen);
+    //assert(result == jl_method_morespecific(target_method, start_method) || jl_has_empty_intersection(target_method->sig, start_method->sig) || jl_has_empty_intersection(start_method->sig, target_method->sig));
+    return result;
 }
 
-JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
+
+void jl_method_table_activate(jl_typemap_entry_t *newentry)
 {
     JL_TIMING(ADD_METHOD, ADD_METHOD);
-    assert(jl_is_method(method));
+    jl_method_t *method = newentry->func.method;
+    jl_methtable_t *mt = jl_method_get_table(method);
     assert(jl_is_mtable(mt));
+    assert(jl_is_method(method));
     jl_timing_show_method(method, JL_TIMING_DEFAULT_BLOCK);
-    jl_value_t *type = method->sig;
+    jl_value_t *type = (jl_value_t*)newentry->sig;
     jl_value_t *oldvalue = NULL;
     jl_array_t *oldmi = NULL;
-    if (method->primary_world == 1)
-        method->primary_world = jl_atomic_fetch_add(&jl_world_counter, 1) + 1;
-    size_t max_world = method->primary_world - 1;
+    size_t world = jl_atomic_load_relaxed(&method->primary_world);
+    assert(world == jl_atomic_load_relaxed(&jl_world_counter) + 1); // min-world
+    assert((jl_atomic_load_relaxed(&method->dispatch_status) & METHOD_SIG_LATEST_WHICH) == 0);
+    assert((jl_atomic_load_relaxed(&method->dispatch_status) & METHOD_SIG_LATEST_ONLY) == 0);
+    assert(jl_atomic_load_relaxed(&newentry->min_world) == ~(size_t)0);
+    assert(jl_atomic_load_relaxed(&newentry->max_world) == 1);
+    jl_atomic_store_relaxed(&newentry->min_world, world);
+    jl_atomic_store_relaxed(&method->primary_world, world);
+    size_t max_world = world - 1;
     jl_value_t *loctag = NULL;  // debug info for invalidation
     jl_value_t *isect = NULL;
     jl_value_t *isect2 = NULL;
-    jl_value_t *isect3 = NULL;
-    jl_typemap_entry_t *newentry = NULL;
-    JL_GC_PUSH7(&oldvalue, &oldmi, &newentry, &loctag, &isect, &isect2, &isect3);
-    JL_LOCK(&mt->writelock);
-    // add our new entry
-    newentry = jl_typemap_alloc((jl_tupletype_t*)type, simpletype, jl_emptysvec,
-            (jl_value_t*)method, method->primary_world, method->deleted_world);
-    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, jl_cachearg_offset(mt));
+    jl_genericmemory_t *interferences = NULL;
+    JL_GC_PUSH6(&oldvalue, &oldmi, &loctag, &isect, &isect2, &interferences);
     jl_typemap_entry_t *replaced = NULL;
-    // then check what entries we replaced
-    oldvalue = get_intersect_matches(jl_atomic_load_relaxed(&mt->defs), newentry, &replaced, jl_cachearg_offset(mt), max_world);
-    int invalidated = 0;
-    if (replaced) {
-        oldvalue = (jl_value_t*)replaced;
-        invalidated = 1;
-        method_overwrite(newentry, replaced->func.method);
-        jl_method_table_invalidate(mt, replaced, max_world);
+    // Check what entries this intersects with in the prior world.
+    oldvalue = get_intersect_matches(jl_atomic_load_relaxed(&mt->defs), newentry, &replaced, max_world);
+    jl_method_t *const *d;
+    size_t j, n;
+    if (oldvalue == NULL) {
+        d = NULL;
+        n = 0;
     }
     else {
-        jl_method_t *const *d;
-        size_t j, n;
-        if (oldvalue == NULL) {
-            d = NULL;
-            n = 0;
-        }
-        else {
-            assert(jl_is_array(oldvalue));
-            d = (jl_method_t**)jl_array_ptr_data(oldvalue);
-            n = jl_array_len(oldvalue);
-        }
-        if (mt->backedges) {
-            jl_value_t **backedges = jl_array_ptr_data(mt->backedges);
-            size_t i, na = jl_array_len(mt->backedges);
-            size_t ins = 0;
-            for (i = 1; i < na; i += 2) {
-                jl_value_t *backedgetyp = backedges[i - 1];
-                int missing = 0;
-                if (jl_type_intersection2(backedgetyp, (jl_value_t*)type, &isect, &isect2)) {
-                    // See if the intersection was actually already fully
-                    // covered, but that the new method is ambiguous.
-                    //  -> no previous method: now there is one, need to update the missing edge
-                    //  -> one+ previously matching method(s):
-                    //    -> more specific then all of them: need to update the missing edge
-                    //      -> some may have been ambiguous: now there is a replacement
-                    //      -> some may have been called: now there is a replacement (also will be detected in the loop later)
-                    //    -> less specific or ambiguous with any one of them: can ignore the missing edge (not missing)
-                    //      -> some may have been ambiguous: still are
-                    //      -> some may have been called: they may be partly replaced (will be detected in the loop later)
-                    // c.f. `is_replacing`, which is a similar query, but with an existing method match to compare against
-                    missing = 1;
-                    size_t j;
-                    for (j = 0; j < n; j++) {
-                        jl_method_t *m = d[j];
-                        if (jl_subtype(isect, m->sig) || (isect2 && jl_subtype(isect2, m->sig))) {
-                            // We now know that there actually was a previous
-                            // method for this part of the type intersection.
-                            if (!jl_type_morespecific(type, m->sig)) {
-                                missing = 0;
-                                break;
-                            }
-                        }
+        assert(jl_is_array(oldvalue));
+        d = (jl_method_t**)jl_array_ptr_data(oldvalue);
+        n = jl_array_nrows(oldvalue);
+        oldmi = jl_alloc_vec_any(0);
+    }
+
+    // These get updated from their state stored in the caches files, since content in cache files gets added "all at once".
+    int invalidated = 0;
+    int dispatch_bits = METHOD_SIG_LATEST_WHICH; // Always set LATEST_WHICH
+    // Check precompiled dispatch status bits
+    int precompiled_status = jl_atomic_load_relaxed(&method->dispatch_status);
+    if (!(precompiled_status & METHOD_SIG_PRECOMPILE_MANY))
+        // This will store if this method will be currently the only result that would returned from `ml_matches` given `sig`.
+        dispatch_bits |= METHOD_SIG_LATEST_ONLY; // Tentatively set, will be cleared if not applicable
+    // Holds the set of all intersecting methods not more specific than this one.
+    // Note: this set may be incomplete (may exclude methods whose intersection
+    // is covered by another method that is morespecific than both, causing them
+    // to have no relevant type intersection for sorting).
+    interferences = (jl_genericmemory_t*)jl_atomic_load_relaxed(&method->interferences);
+    if (oldvalue) {
+        assert(n > 0);
+        if (replaced) {
+            oldvalue = (jl_value_t*)replaced;
+            jl_method_t *m = replaced->func.method;
+            invalidated = 1;
+            method_overwrite(newentry, m);
+            // This is an optimized version of below, given we know the type-intersection is exact
+            jl_method_table_invalidate(m, max_world);
+            int m_dispatch = jl_atomic_load_relaxed(&m->dispatch_status);
+            // Clear METHOD_SIG_LATEST_ONLY and METHOD_SIG_LATEST_WHICH bits
+            jl_atomic_store_relaxed(&m->dispatch_status, 0);
+            if (!(m_dispatch & METHOD_SIG_LATEST_ONLY))
+                dispatch_bits &= ~METHOD_SIG_LATEST_ONLY;
+            // Take over the interference list from the replaced method
+            jl_genericmemory_t *m_interferences = jl_atomic_load_relaxed(&m->interferences);
+            if (interferences->length == 0) {
+                interferences = jl_genericmemory_copy(m_interferences);
+            }
+            else {
+                for (size_t i = 0; i < m_interferences->length; i++) {
+                    jl_value_t *k = jl_genericmemory_ptr_ref(m_interferences, i);
+                    if (k && !has_key(interferences, (jl_value_t*)k)) {
+                        ssize_t idx;
+                        interferences = jl_idset_put_key(interferences, (jl_value_t*)k, &idx);
                     }
                 }
-                if (missing) {
-                    jl_method_instance_t *backedge = (jl_method_instance_t*)backedges[i];
-                    invalidate_external(backedge, max_world);
-                    invalidate_method_instance(&do_nothing_with_codeinst, backedge, max_world, 0);
-                    invalidated = 1;
-                    if (_jl_debug_method_invalidation)
-                        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)backedgetyp);
-                }
-                else {
-                    backedges[ins++] = backedges[i - 1];
-                    backedges[ins++] = backedges[i - 0];
+            }
+            ssize_t idx;
+            m_interferences = jl_idset_put_key(m_interferences, (jl_value_t*)method, &idx);
+            jl_atomic_store_release(&m->interferences, m_interferences);
+            jl_gc_wb(m, m_interferences);
+            for (j = 0; j < n; j++) {
+                jl_method_t *m2 = d[j];
+                if (m2 && method_in_interferences(m, m2)) {
+                    jl_genericmemory_t *m2_interferences = jl_atomic_load_relaxed(&m2->interferences);
+                    ssize_t idx;
+                    m2_interferences = jl_idset_put_key(m2_interferences, (jl_value_t*)method, &idx);
+                    jl_atomic_store_release(&m2->interferences, m2_interferences);
+                    jl_gc_wb(m2, m2_interferences);
                 }
             }
-            if (ins == 0)
-                mt->backedges = NULL;
-            else
-                jl_array_del_end(mt->backedges, na - ins);
+            loctag = jl_atomic_load_relaxed(&m->specializations); // use loctag for a gcroot
+            _Atomic(jl_method_instance_t*) *data;
+            size_t l;
+            if (jl_is_svec(loctag)) {
+                data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(loctag);
+                l = jl_svec_len(loctag);
+            }
+            else {
+                data = (_Atomic(jl_method_instance_t*)*) &loctag;
+                l = 1;
+            }
+            for (size_t i = 0; i < l; i++) {
+                jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
+                if ((jl_value_t*)mi == jl_nothing)
+                    continue;
+                jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi);
+            }
+            d = NULL;
+            n = 0;
         }
-        if (oldvalue) {
-            oldmi = jl_alloc_vec_any(0);
+        else {
             char *morespec = (char*)alloca(n);
-            memset(morespec, morespec_unknown, n);
+            // Compute all morespec values upfront
+            for (j = 0; j < n; j++)
+                morespec[j] = (char)jl_type_morespecific(d[j]->sig, type);
             for (j = 0; j < n; j++) {
                 jl_method_t *m = d[j];
-                if (morespec[j] == (char)morespec_is)
+                // Compute ambig state: is there an ambiguity between new method and old m?
+                char ambig = !morespec[j] && !jl_type_morespecific(type, m->sig);
+                // Compute updates to the dispatch state bits
+                int m_dispatch = jl_atomic_load_relaxed(&m->dispatch_status);
+                if (morespec[j] || ambig) {
+                    // !morespecific(new, old)
+                    dispatch_bits &= ~METHOD_SIG_LATEST_ONLY;
+                    // Add the old method to this interference set
+                    ssize_t idx;
+                    if (!has_key(interferences, (jl_value_t*)m))
+                        interferences = jl_idset_put_key(interferences, (jl_value_t*)m, &idx);
+                }
+                if (!morespec[j]) {
+                    // !morespecific(old, new)
+                    m_dispatch &= ~METHOD_SIG_LATEST_ONLY;
+                    // Add the new method to its interference set
+                    jl_genericmemory_t *m_interferences = jl_atomic_load_relaxed(&m->interferences);
+                    ssize_t idx;
+                    m_interferences = jl_idset_put_key(m_interferences, (jl_value_t*)method, &idx);
+                    jl_atomic_store_release(&m->interferences, m_interferences);
+                    jl_gc_wb(m, m_interferences);
+                }
+                // Add methods that intersect but are not more specific to interference list
+                jl_atomic_store_relaxed(&m->dispatch_status, m_dispatch);
+                if (morespec[j])
                     continue;
+
+                // Now examine if this caused any invalidations.
                 loctag = jl_atomic_load_relaxed(&m->specializations); // use loctag for a gcroot
                 _Atomic(jl_method_instance_t*) *data;
-                size_t i, l;
+                size_t l;
                 if (jl_is_svec(loctag)) {
                     data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(loctag);
                     l = jl_svec_len(loctag);
@@ -2096,101 +3024,120 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                     data = (_Atomic(jl_method_instance_t*)*) &loctag;
                     l = 1;
                 }
-                enum morespec_options ambig = morespec_unknown;
-                for (i = 0; i < l; i++) {
+                for (size_t i = 0; i < l; i++) {
                     jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
                     if ((jl_value_t*)mi == jl_nothing)
                         continue;
-                    isect3 = jl_type_intersection(m->sig, (jl_value_t*)mi->specTypes);
-                    if (jl_type_intersection2(type, isect3, &isect, &isect2)) {
+                    if (jl_type_intersection2(type, mi->specTypes, &isect, &isect2)) {
+                        // Replacing a method--see if this really was the selected method previously
+                        // over the intersection (not ambiguous) and the new method will be selected now (morespec).
                         // TODO: this only checks pair-wise for ambiguities, but the ambiguities could arise from the interaction of multiple methods
-                        // and thus might miss a case where we introduce an ambiguity between two existing methods
+                        // and thus might miss a case where we introduce an ambiguity between`.u two existing methods
                         // We could instead work to sort this into 3 groups `morespecific .. ambiguous .. lesspecific`, with `type` in ambiguous,
                         // such that everything in `morespecific` dominates everything in `ambiguous`, and everything in `ambiguous` dominates everything in `lessspecific`
                         // And then compute where each isect falls, and whether it changed group--necessitating invalidation--or not.
-                        if (morespec[j] == (char)morespec_unknown)
-                            morespec[j] = (char)(jl_type_morespecific(m->sig, type) ? morespec_is : morespec_isnot);
-                        if (morespec[j] == (char)morespec_is)
-                            // not actually shadowing--the existing method is still better
-                            break;
-                        if (ambig == morespec_unknown)
-                            ambig = jl_type_morespecific(type, m->sig) ? morespec_is : morespec_isnot;
-                        // replacing a method--see if this really was the selected method previously
-                        // over the intersection (not ambiguous) and the new method will be selected now (morespec_is)
                         int replaced_dispatch = is_replacing(ambig, type, m, d, n, isect, isect2, morespec);
                         // found that this specialization dispatch got replaced by m
-                        // call invalidate_backedges(&do_nothing_with_codeinst, mi, max_world, "jl_method_table_insert");
+                        // call invalidate_backedges(mi, max_world, "jl_method_table_insert");
                         // but ignore invoke-type edges
-                        jl_array_t *backedges = mi->backedges;
-                        if (backedges) {
-                            size_t ib = 0, insb = 0, nb = jl_array_len(backedges);
-                            jl_value_t *invokeTypes;
-                            jl_method_instance_t *caller;
-                            while (ib < nb) {
-                                ib = get_next_edge(backedges, ib, &invokeTypes, &caller);
-                                int replaced_edge;
-                                if (invokeTypes) {
-                                    // n.b. normally we must have mi.specTypes <: invokeTypes <: m.sig (though it might not strictly hold), so we only need to check the other subtypes
-                                    replaced_edge = jl_subtype(invokeTypes, type) && is_replacing(ambig, type, m, d, n, invokeTypes, NULL, morespec);
-                                }
-                                else {
-                                    replaced_edge = replaced_dispatch;
-                                }
-                                if (replaced_edge) {
-                                    invalidate_method_instance(&do_nothing_with_codeinst, caller, max_world, 1);
-                                    invalidated = 1;
-                                }
-                                else {
-                                    insb = set_next_edge(backedges, insb, invokeTypes, caller);
-                                }
-                            }
-                            jl_array_del_end(backedges, nb - insb);
+                        int invalidatedmi = _invalidate_dispatch_backedges(mi, type, m, d, n, replaced_dispatch, ambig, max_world, morespec);
+                        if (replaced_dispatch) {
+                            jl_atomic_store_relaxed(&mi->dispatch_status, 0);
+                            jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi);
                         }
-                        jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi);
-                        invalidate_external(mi, max_world);
-                        if (_jl_debug_method_invalidation && invalidated) {
+                        if (_jl_debug_method_invalidation && invalidatedmi) {
                             jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
                             loctag = jl_cstr_to_string("jl_method_table_insert");
                             jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
                         }
+                        invalidated |= invalidatedmi;
                     }
+                    // TODO: do we have any interesting cases left where isect3 is useful
+                    //jl_value_t *isect3 = NULL;
+                    //jl_value_t *isect4 = NULL;
+                    //jl_value_t *isect5 = NULL;
+                    //JL_GC_PUSH3(&isec3, &isect4, &isect5);
+                    //isect3 = jl_type_intersection(m->sig, (jl_value_t*)mi->specTypes);
+                    //jl_type_intersection2(type, isect3, &isect4, &isect5);
+                    //if (!jl_types_equal(isect, isect4) && (!isect2 || !jl_types_equal(isect2, isect4)) &&
+                    //    (!isect5 || (!jl_types_equal(isect, isect5) && (!isect2 || !jl_types_equal(isect2, isect5))))) {
+                    //    jl_(type);
+                    //    jl_(mi->specTypes);
+                    //    jl_(m->sig);
+                    //}
+                    //JL_GC_POP();
+                    isect = NULL;
+                    isect2 = NULL;
                 }
             }
-            if (jl_array_len(oldmi)) {
-                // search mt->cache and leafcache and drop anything that might overlap with the new method
-                // this is very cheap, so we don't mind being fairly conservative at over-approximating this
-                struct invalidate_mt_env mt_cache_env;
-                mt_cache_env.max_world = max_world;
-                mt_cache_env.shadowed = oldmi;
-                mt_cache_env.newentry = newentry;
-                mt_cache_env.invalidated = 0;
-
-                jl_typemap_visitor(jl_atomic_load_relaxed(&mt->cache), invalidate_mt_cache, (void*)&mt_cache_env);
-                jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-                size_t i, l = jl_array_len(leafcache);
-                for (i = 1; i < l; i += 2) {
-                    jl_value_t *entry = jl_array_ptr_ref(leafcache, i);
-                    if (entry) {
-                        while (entry != jl_nothing) {
-                            invalidate_mt_cache((jl_typemap_entry_t*)entry, (void*)&mt_cache_env);
-                            entry = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)entry)->next);
-                        }
-                    }
+        }
+    }
+
+    jl_methcache_t *mc = jl_method_table->cache;
+    JL_LOCK(&mc->writelock);
+    struct _typename_invalidate_backedge typename_env = {type, &isect, &isect2, d, n, max_world, invalidated};
+    if (!jl_foreach_top_typename_for(_typename_invalidate_backedges, type, 1, &typename_env)) {
+        // if the new method cannot be split into exact backedges, scan the whole table for anything that might be affected
+        jl_genericmemory_t *allbackedges = jl_method_table->backedges;
+        for (size_t i = 0, n = allbackedges->length; i < n; i += 2) {
+            jl_value_t *tn = jl_genericmemory_ptr_ref(allbackedges, i);
+            jl_value_t *backedges = jl_genericmemory_ptr_ref(allbackedges, i+1);
+            if (tn && tn != jl_nothing && backedges)
+                _typename_invalidate_backedges((jl_typename_t*)tn, 0, &typename_env);
+        }
+    }
+    invalidated |= typename_env.invalidated;
+    if (oldmi && jl_array_nrows(oldmi)) {
+        // drop leafcache and search mc->cache and drop anything that might overlap with the new method
+        // this is very cheap, so we don't mind being very conservative at over-approximating this
+        struct invalidate_mt_env mt_cache_env;
+        mt_cache_env.max_world = max_world;
+        mt_cache_env.shadowed = oldmi;
+        mt_cache_env.newentry = newentry;
+
+        jl_typemap_visitor(jl_atomic_load_relaxed(&mc->cache), invalidate_mt_cache, (void*)&mt_cache_env);
+        jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mc->leafcache);
+        size_t i, l = leafcache->length;
+        for (i = 1; i < l; i += 2) {
+            jl_value_t *entry = jl_genericmemory_ptr_ref(leafcache, i);
+            if (entry) {
+                while (entry != jl_nothing) {
+                    jl_atomic_store_relaxed(&((jl_typemap_entry_t*)entry)->max_world, max_world);
+                    entry = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)entry)->next);
                 }
             }
         }
+        jl_atomic_store_relaxed(&mc->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
     }
+    JL_UNLOCK(&mc->writelock);
     if (invalidated && _jl_debug_method_invalidation) {
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method);
         loctag = jl_cstr_to_string("jl_method_table_insert");
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
     }
-    update_max_args(mt, type);
-    JL_UNLOCK(&mt->writelock);
+    jl_atomic_store_relaxed(&newentry->max_world, ~(size_t)0);
+    jl_atomic_store_relaxed(&method->dispatch_status, dispatch_bits); // TODO: this should be sequenced fully after the world counter store
+    jl_atomic_store_release(&method->interferences, interferences);
+    jl_gc_wb(method, interferences);
+    JL_GC_POP();
+}
+
+JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
+{
+    jl_typemap_entry_t *newentry = jl_method_table_add(mt, method, simpletype);
+    JL_GC_PUSH1(&newentry);
+    JL_LOCK(&world_counter_lock);
+    if (!jl_atomic_load_relaxed(&allow_new_worlds))
+        jl_error("Method changes have been disabled via a call to disable_new_worlds.");
+    size_t world = jl_atomic_load_relaxed(&jl_world_counter) + 1;
+    jl_atomic_store_relaxed(&method->primary_world, world);
+    jl_method_table_activate(newentry);
+    jl_atomic_store_release(&jl_world_counter, world);
+    JL_UNLOCK(&world_counter_lock);
     JL_GC_POP();
 }
 
-static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args, size_t world)
+static void JL_NORETURN jl_method_error_bare(jl_value_t *f, jl_value_t *args, size_t world)
 {
     if (jl_methoderror_type) {
         jl_value_t *e = jl_new_struct_uninit(jl_methoderror_type);
@@ -2209,13 +3156,13 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
         jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
         jl_ptls_t ptls = jl_current_task->ptls;
         ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
-        jl_critical_error(0, 0, NULL, jl_current_task);
+        jl_fprint_critical_error(ios_safe_stderr, 0, 0, NULL, jl_current_task);
         abort();
     }
     // not reached
 }
 
-void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world)
+void JL_NORETURN jl_method_error(jl_value_t *f, jl_value_t **args, size_t na, size_t world)
 {
     jl_value_t *argtup = jl_f_tuple(NULL, args, na - 1);
     JL_GC_PUSH1(&argtup);
@@ -2233,25 +3180,33 @@ static jl_tupletype_t *lookup_arg_type_tuple(jl_value_t *arg1 JL_PROPAGATES_ROOT
     return jl_lookup_arg_tuple_type(arg1, args, nargs, 1);
 }
 
-jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world)
+JL_DLLEXPORT jl_value_t *jl_method_lookup_by_tt(jl_tupletype_t *tt, size_t world, jl_value_t *_mt)
+{
+    jl_methtable_t *mt = NULL;
+    if (_mt == jl_nothing) {
+        mt = jl_method_table;
+    }
+    else {
+        assert(jl_is_mtable(_mt));
+        mt = (jl_methtable_t*) _mt;
+    }
+    jl_methcache_t *mc = mt->cache;
+    jl_method_instance_t *mi = jl_mt_assoc_by_type(mc, tt, world);
+    if (!mi)
+        return jl_nothing;
+    return (jl_value_t*) mi;
+}
+
+JL_DLLEXPORT jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world)
 {
     assert(nargs > 0 && "expected caller to handle this case");
-    jl_methtable_t *mt = jl_gf_mtable(args[0]);
-    jl_typemap_t *cache = jl_atomic_load_relaxed(&mt->cache); // XXX: gc root for this?
-    jl_typemap_entry_t *entry = jl_typemap_assoc_exact(cache, args[0], &args[1], nargs, jl_cachearg_offset(mt), world);
+    jl_methcache_t *mc = jl_method_table->cache;
+    jl_typemap_t *cache = jl_atomic_load_relaxed(&mc->cache); // XXX: gc root for this?
+    jl_typemap_entry_t *entry = jl_typemap_assoc_exact(cache, args[0], &args[1], nargs, jl_cachearg_offset(), world);
     if (entry)
         return entry->func.linfo;
     jl_tupletype_t *tt = arg_type_tuple(args[0], &args[1], nargs);
-    jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-    entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
-    if (entry)
-        return entry->func.linfo;
-    JL_GC_PUSH1(&tt);
-    JL_LOCK(&mt->writelock);
-    jl_method_instance_t *sf = jl_mt_assoc_by_type(mt, tt, world);
-    JL_UNLOCK(&mt->writelock);
-    JL_GC_POP();
-    return sf;
+    return jl_mt_assoc_by_type(mc, tt, world);
 }
 
 // return a Vector{Any} of svecs, each describing a method match:
@@ -2260,7 +3215,7 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
 // spvals is any matched static parameter values, m is the Method,
 // full is a boolean indicating if that method fully covers the input
 //
-// lim is the max # of methods to return. if there are more, returns jl_false.
+// lim is the max # of methods to return. if there are more, returns jl_nothing.
 // Negative values stand for no limit.
 // Unless lim == -1, remove matches that are unambiguously covered by earlier ones
 JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
@@ -2274,28 +3229,17 @@ JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *
     if (unw == (jl_value_t*)jl_emptytuple_type || jl_tparam0(unw) == jl_bottom_type)
         return (jl_value_t*)jl_an_empty_vec_any;
     if (mt == jl_nothing)
-        mt = (jl_value_t*)jl_method_table_for(unw);
-    if (mt == jl_nothing)
-        mt = NULL;
-    return ml_matches((jl_methtable_t*)mt, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
-}
-
-jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT)
-{
-    jl_method_t *def = method->def.method;
-    jl_method_instance_t *mi = jl_get_unspecialized(def);
-    if (mi == NULL) {
-        return method;
-    }
-    return mi;
+        mt = (jl_value_t*)jl_method_table;
+    jl_methcache_t *mc = ((jl_methtable_t*)mt)->cache;
+    return ml_matches((jl_methtable_t*)mt, mc, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
 }
 
-jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
+JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
 {
     // one unspecialized version of a function can be shared among all cached specializations
     if (!jl_is_method(def) || def->source == NULL) {
         // generated functions might instead randomly just never get inferred, sorry
-        return NULL;
+        return (jl_method_instance_t*)jl_nothing;
     }
     jl_method_instance_t *unspec = jl_atomic_load_relaxed(&def->unspecialized);
     if (unspec == NULL) {
@@ -2311,36 +3255,97 @@ jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
     return unspec;
 }
 
-
-jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi, size_t world)
+STATIC_INLINE jl_value_t *_jl_rettype_inferred(jl_value_t *owner, jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
 {
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
-        if (codeinst->min_world <= world && world <= codeinst->max_world) {
-            if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL)
-                return codeinst;
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= min_world &&
+            max_world <= jl_atomic_load_relaxed(&codeinst->max_world) &&
+            jl_egal(codeinst->owner, owner)) {
+
+            jl_value_t *code = jl_atomic_load_relaxed(&codeinst->inferred);
+            if (code)
+                return (jl_value_t*)codeinst;
         }
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
+    return (jl_value_t*)jl_nothing;
+}
+
+JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_value_t *owner, jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
+{
+    return (jl_value_t*)_jl_rettype_inferred(owner, mi, min_world, max_world);
+}
+
+JL_DLLEXPORT jl_value_t *jl_rettype_inferred_native(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
+{
+    return (jl_value_t*)_jl_rettype_inferred(jl_nothing, mi, min_world, max_world);
+}
+
+JL_DLLEXPORT jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT = jl_rettype_inferred_native;
+
+STATIC_INLINE jl_callptr_t jl_method_compiled_callptr(jl_method_instance_t *mi, size_t world, jl_code_instance_t **codeinst_out) JL_NOTSAFEPOINT
+{
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
+    for (; codeinst; codeinst = jl_atomic_load_relaxed(&codeinst->next)) {
+        if (codeinst->owner != jl_nothing)
+            continue;
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= world && world <= jl_atomic_load_relaxed(&codeinst->max_world)) {
+            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
+            if (!invoke)
+                continue;
+            *codeinst_out = codeinst;
+            return invoke;
+        }
+    }
     return NULL;
 }
 
+jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi, size_t world) JL_NOTSAFEPOINT
+{
+    jl_code_instance_t *codeinst = NULL;
+    jl_method_compiled_callptr(mi, world, &codeinst);
+    return codeinst;
+}
+
 jl_mutex_t precomp_statement_out_lock;
 
-static void record_precompile_statement(jl_method_instance_t *mi)
+_Atomic(uint8_t) jl_force_trace_compile_timing_enabled = 0;
+
+/**
+ * @brief Enable force trace compile to stderr with timing.
+ */
+JL_DLLEXPORT void jl_force_trace_compile_timing_enable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_compile`.
+    jl_atomic_fetch_add(&jl_force_trace_compile_timing_enabled, 1);
+}
+/**
+ * @brief Disable force trace compile to stderr with timing.
+ */
+JL_DLLEXPORT void jl_force_trace_compile_timing_disable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_compile`.
+    jl_atomic_fetch_add(&jl_force_trace_compile_timing_enabled, -1);
+}
+
+static void record_precompile_statement(jl_method_instance_t *mi, double compilation_time, int is_recompile)
 {
     static ios_t f_precompile;
     static JL_STREAM* s_precompile = NULL;
     jl_method_t *def = mi->def.method;
-    if (jl_options.trace_compile == NULL)
+    uint8_t force_trace_compile = jl_atomic_load_relaxed(&jl_force_trace_compile_timing_enabled);
+    if (force_trace_compile == 0 && jl_options.trace_compile == NULL)
         return;
     if (!jl_is_method(def))
         return;
+    if (def->is_for_opaque_closure)
+        return; // OpaqueClosure methods cannot be looked up by their types, so are incompatible with `precompile(...)`
 
     JL_LOCK(&precomp_statement_out_lock);
     if (s_precompile == NULL) {
         const char *t = jl_options.trace_compile;
-        if (!strncmp(t, "stderr", 6)) {
+        if (force_trace_compile || !strncmp(t, "stderr", 6)) {
             s_precompile = JL_STDERR;
         }
         else {
@@ -2350,17 +3355,145 @@ static void record_precompile_statement(jl_method_instance_t *mi)
         }
     }
     if (!jl_has_free_typevars(mi->specTypes)) {
+        if (is_recompile && s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF)
+            jl_printf(s_precompile, "\e[33m");
+        if (force_trace_compile || jl_options.trace_compile_timing)
+            jl_printf(s_precompile, "#= %6.1f ms =# ", compilation_time / 1e6);
         jl_printf(s_precompile, "precompile(");
         jl_static_show(s_precompile, mi->specTypes);
-        jl_printf(s_precompile, ")\n");
+        jl_printf(s_precompile, ")");
+        if (is_recompile) {
+            jl_printf(s_precompile, " # recompile");
+            if (s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF) {
+                jl_printf(s_precompile, "\e[0m");
+            }
+        }
+        jl_printf(s_precompile, "\n");
         if (s_precompile != JL_STDERR)
             ios_flush(&f_precompile);
     }
     JL_UNLOCK(&precomp_statement_out_lock);
 }
 
+jl_mutex_t dispatch_statement_out_lock;
+
+_Atomic(uint8_t) jl_force_trace_dispatch_enabled = 0;
+
+/**
+ * @brief Enable force trace dispatch to stderr.
+ */
+JL_DLLEXPORT void jl_force_trace_dispatch_enable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_dispatch`.
+    jl_atomic_fetch_add(&jl_force_trace_dispatch_enabled, 1);
+}
+/**
+ * @brief Disable force trace dispatch to stderr.
+ */
+JL_DLLEXPORT void jl_force_trace_dispatch_disable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_dispatch`.
+    jl_atomic_fetch_add(&jl_force_trace_dispatch_enabled, -1);
+}
+
+static void record_dispatch_statement(jl_method_instance_t *mi)
+{
+    static ios_t f_dispatch;
+    static JL_STREAM* s_dispatch = NULL;
+    jl_method_t *def = mi->def.method;
+    if (!jl_is_method(def))
+        return;
+
+    uint8_t force_trace_dispatch = jl_atomic_load_relaxed(&jl_force_trace_dispatch_enabled);
+    JL_LOCK(&dispatch_statement_out_lock);
+    if (s_dispatch == NULL) {
+        const char *t = jl_options.trace_dispatch;
+        if (force_trace_dispatch || !strncmp(t, "stderr", 6)) {
+            s_dispatch = JL_STDERR;
+        }
+        else {
+            if (ios_file(&f_dispatch, t, 1, 1, 1, 1) == NULL)
+                jl_errorf("cannot open dispatch statement file \"%s\" for writing", t);
+            s_dispatch = (JL_STREAM*) &f_dispatch;
+        }
+    }
+    // NOTE: For builtin functions, the specType is just `Tuple`, which is not useful to print.
+    if (!jl_has_free_typevars(mi->specTypes) && (jl_datatype_t*)mi->specTypes != jl_tuple_type) {
+        jl_printf(s_dispatch, "precompile(");
+        jl_static_show(s_dispatch, mi->specTypes);
+        jl_printf(s_dispatch, ")\n");
+        if (s_dispatch != JL_STDERR)
+            ios_flush(&f_dispatch);
+    }
+    JL_UNLOCK(&dispatch_statement_out_lock);
+}
+
+static void record_dispatch_statement_on_first_dispatch(jl_method_instance_t *mfunc) {
+    uint8_t force_trace_dispatch = jl_atomic_load_relaxed(&jl_force_trace_dispatch_enabled);
+    if (force_trace_dispatch || jl_options.trace_dispatch != NULL) {
+        uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags);
+        uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED;
+        if (!was_dispatched) {
+            miflags |= JL_MI_FLAGS_MASK_DISPATCHED;
+            jl_atomic_store_relaxed(&mfunc->flags, miflags);
+            record_dispatch_statement(mfunc);
+        }
+    }
+}
+
+// If waitcompile is 0, this will return NULL if compiling is on-going in the JIT. This is
+// useful for the JIT itself, since it just doesn't cause redundant work or missed updates,
+// but merely causes it to look into the current JIT worklist.
+void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile)
+{
+    uint8_t flags = jl_atomic_load_acquire(&ci->flags); // happens-before for subsequent read of fptr
+    while (1) {
+        jl_callptr_t initial_invoke = jl_atomic_load_acquire(&ci->invoke); // happens-before for subsequent read of fptr
+        if (initial_invoke == jl_fptr_wait_for_compiled_addr) {
+            if (!waitcompile) {
+                *invoke = NULL;
+                *specptr = NULL;
+                *specsigflags = 0b00;
+                return;
+            }
+            jl_compile_codeinst(ci);
+            initial_invoke = jl_atomic_load_acquire(&ci->invoke); // happens-before for subsequent read of fptr
+        }
+        void *fptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
+        // TODO: if fptr is NULL, it may mean we read this too fast, and should have spun and waited for jl_compile_codeinst to finish
+        if (initial_invoke == NULL || fptr == NULL) {
+            *invoke = initial_invoke;
+            *specptr = NULL;
+            *specsigflags = 0b00;
+            return;
+        }
+        while (!(flags & JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR)) {
+            jl_cpu_pause();
+            flags = jl_atomic_load_acquire(&ci->flags);
+        }
+        jl_callptr_t final_invoke = jl_atomic_load_relaxed(&ci->invoke);
+        if (final_invoke == initial_invoke) {
+            *invoke = final_invoke;
+            *specptr = fptr;
+            *specsigflags = flags;
+            return;
+        }
+    }
+}
+
 jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT);
 
+JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src)
+{
+    assert(jl_is_code_info(src));
+    jl_emit_codeinst_to_jit(codeinst, src);
+}
+
+JL_DLLEXPORT int jl_method_is_macro(jl_method_t *m)
+{
+    return jl_symbol_name(m->name)[0] == '@';
+}
+
 jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world)
 {
     // quick check if we already have a compiled result
@@ -2375,33 +3508,34 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         jl_code_instance_t *codeinst2 = jl_compile_method_internal(mi2, world);
         jl_code_instance_t *codeinst = jl_get_method_inferred(
                 mi, codeinst2->rettype,
-                codeinst2->min_world, codeinst2->max_world);
+                jl_atomic_load_relaxed(&codeinst2->min_world),
+                jl_atomic_load_relaxed(&codeinst2->max_world),
+                jl_atomic_load_relaxed(&codeinst2->debuginfo),
+                jl_atomic_load_relaxed(&codeinst2->edges));
         if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
             codeinst->rettype_const = codeinst2->rettype_const;
-            uint8_t specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
-            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst2->invoke);
-            void *fptr = jl_atomic_load_relaxed(&codeinst2->specptr.fptr);
+            jl_gc_wb(codeinst, codeinst->rettype_const);
+            uint8_t specsigflags;
+            jl_callptr_t invoke;
+            void *fptr;
+            jl_read_codeinst_invoke(codeinst2, &specsigflags, &invoke, &fptr, 1);
             if (fptr != NULL) {
-                while (!(specsigflags & 0b10)) {
-                    jl_cpu_pause();
-                    specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
-                }
-                invoke = jl_atomic_load_relaxed(&codeinst2->invoke);
                 void *prev_fptr = NULL;
                 // see jitlayers.cpp for the ordering restrictions here
                 if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) {
-                    jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & 0b1);
                     jl_atomic_store_release(&codeinst->invoke, invoke);
                     // unspec is probably not specsig, but might be using specptr
-                    jl_atomic_store_release(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag
-                } else {
+                    jl_atomic_fetch_or_relaxed(&codeinst->flags, JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR);
+                }
+                else {
                     // someone else already compiled it
-                    while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                    while (!(jl_atomic_load_acquire(&codeinst->flags) & JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR)) {
                         jl_cpu_pause();
                     }
                     // codeinst is now set up fully, safe to return
                 }
-            } else {
+            }
+            else {
                 jl_callptr_t prev = NULL;
                 jl_atomic_cmpswap_acqrel(&codeinst->invoke, &prev, invoke);
             }
@@ -2423,30 +3557,29 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
     // if compilation is disabled or source is unavailable, try calling unspecialized version
     if (compile_option == JL_OPTIONS_COMPILE_OFF ||
         compile_option == JL_OPTIONS_COMPILE_MIN ||
-        def->source == jl_nothing) {
+        (jl_is_method(def) && def->source == jl_nothing)) {
         // copy fptr from the template method definition
         if (jl_is_method(def)) {
             jl_method_instance_t *unspecmi = jl_atomic_load_relaxed(&def->unspecialized);
             if (unspecmi) {
                 jl_code_instance_t *unspec = jl_atomic_load_relaxed(&unspecmi->cache);
-                jl_callptr_t unspec_invoke = NULL;
-                if (unspec && (unspec_invoke = jl_atomic_load_acquire(&unspec->invoke))) {
-                    jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-                        (jl_value_t*)jl_any_type, NULL, NULL,
-                        0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-                    void *unspec_fptr = jl_atomic_load_relaxed(&unspec->specptr.fptr);
-                    if (unspec_fptr) {
-                        // wait until invoke and specsigflags are properly set
-                        while (!(jl_atomic_load_acquire(&unspec->specsigflags) & 0b10)) {
-                            jl_cpu_pause();
-                        }
-                        unspec_invoke = jl_atomic_load_relaxed(&unspec->invoke);
-                    }
-                    jl_atomic_store_release(&codeinst->specptr.fptr, unspec_fptr);
+                if (unspec && jl_atomic_load_acquire(&unspec->invoke) != NULL) {
+                    uint8_t specsigflags;
+                    jl_callptr_t invoke;
+                    void *fptr;
+                    jl_read_codeinst_invoke(unspec, &specsigflags, &invoke, &fptr, 1);
+                    jl_debuginfo_t *di = NULL;
+                    jl_svec_t *edges = jl_emptysvec;
+                    jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
+                        (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
+                        0, 1, ~(size_t)0, 0, jl_nothing, di, edges);
                     codeinst->rettype_const = unspec->rettype_const;
-                    jl_atomic_store_release(&codeinst->invoke, unspec_invoke);
+                    jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
+                    jl_atomic_store_relaxed(&codeinst->invoke, invoke);
+                    // unspec is probably not specsig, but might be using specptr
+                    jl_atomic_store_relaxed(&codeinst->flags, specsigflags & JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR);
                     jl_mi_cache_insert(mi, codeinst);
-                    record_precompile_statement(mi);
+                    record_precompile_statement(mi, 0, 0);
                     return codeinst;
                 }
             }
@@ -2458,34 +3591,83 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         compile_option == JL_OPTIONS_COMPILE_MIN) {
         jl_code_info_t *src = jl_code_for_interpreter(mi, world);
         if (!jl_code_requires_compiler(src, 0)) {
-            jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-                (jl_value_t*)jl_any_type, NULL, NULL,
-                0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
+            jl_debuginfo_t *di = NULL;
+            jl_svec_t *edges = jl_emptysvec;
+            jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
+                (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
+                0, 1, ~(size_t)0, 0, jl_nothing, di, edges);
             jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
             jl_mi_cache_insert(mi, codeinst);
-            record_precompile_statement(mi);
+            record_precompile_statement(mi, 0, 0);
             return codeinst;
         }
         if (compile_option == JL_OPTIONS_COMPILE_OFF) {
-            jl_printf(JL_STDERR, "code missing for ");
+            jl_printf(JL_STDERR, "No compiled code available for ");
             jl_static_show(JL_STDERR, (jl_value_t*)mi);
             jl_printf(JL_STDERR, " : sysimg may not have been built with --compile=all\n");
         }
     }
 
-    codeinst = jl_generate_fptr(mi, world);
+    // Ok, compilation is enabled. We'll need to try to compile something (probably).
+
+    // Everything from here on is considered (user facing) compile time
+    uint64_t compilation_start = jl_hrtime();
+    uint64_t inference_start = jl_typeinf_timing_begin(); // Special-handling for reentrancy
+
+    // Is a recompile if there is cached code, and it was compiled (not only inferred) before
+    int is_recompile = 0;
+    jl_code_instance_t *codeinst_old = jl_atomic_load_relaxed(&mi->cache);
+    while (codeinst_old != NULL) {
+        if (jl_atomic_load_relaxed(&codeinst_old->invoke) != NULL) {
+            is_recompile = 1;
+            break;
+        }
+        codeinst_old = jl_atomic_load_relaxed(&codeinst_old->next);
+    }
+
+    // jl_type_infer will internally do a cache lookup and jl_engine_reserve call
+    // to synchronize this across threads
+    if (!codeinst) {
+        // Don't bother inferring toplevel thunks or macros - the performance cost of inference is likely
+        // to significantly exceed the actual runtime.
+        int should_skip_inference = !jl_is_method(mi->def.method) || jl_method_is_macro(mi->def.method);
+
+        if (!should_skip_inference) {
+            codeinst = jl_type_infer(mi, world, SOURCE_MODE_ABI, jl_options.trim);
+        }
+    }
+
+    if (codeinst) {
+        if (jl_is_compiled_codeinst(codeinst)) {
+            jl_typeinf_timing_end(inference_start, is_recompile);
+            // Already compiled - e.g. constabi, or compiled by a different thread while we were waiting.
+            return codeinst;
+        }
+
+        JL_GC_PUSH1(&codeinst);
+        int did_compile = jl_compile_codeinst(codeinst);
+        double compile_time = jl_hrtime() - compilation_start;
+
+        if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
+            // Something went wrong. Bail to the fallback path.
+            codeinst = NULL;
+        }
+        else if (did_compile && codeinst->owner == jl_nothing) {
+            record_precompile_statement(mi, compile_time, is_recompile);
+        }
+        JL_GC_POP();
+    }
     if (!codeinst) {
-        jl_method_instance_t *unspec = jl_get_unspecialized_from_mi(mi);
-        jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
+        jl_method_instance_t *unspec = jl_get_unspecialized(def);
+        if ((jl_value_t*)unspec == jl_nothing)
+            unspec = mi;
+        jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0, NULL, NULL);
         // ask codegen to make the fptr for unspec
         jl_callptr_t ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
         if (ucache_invoke == NULL) {
-            if (def->source == jl_nothing && (jl_atomic_load_relaxed(&ucache->def->uninferred) == jl_nothing ||
-                                              jl_atomic_load_relaxed(&ucache->def->uninferred) == NULL)) {
-                jl_printf(JL_STDERR, "source not available for ");
-                jl_static_show(JL_STDERR, (jl_value_t*)mi);
-                jl_printf(JL_STDERR, "\n");
-                jl_error("source missing for method that needs to be compiled");
+            if ((!jl_is_method(def) || def->source == jl_nothing) &&
+                !jl_cached_uninferred(jl_atomic_load_relaxed(&jl_get_ci_mi(ucache)->cache), world)) {
+                jl_throw(jl_new_struct(jl_missingcodeerror_type, (jl_value_t*)mi));
             }
             jl_generate_fptr_for_unspecialized(ucache);
             ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
@@ -2494,29 +3676,27 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         if (ucache_invoke != jl_fptr_sparam &&
             ucache_invoke != jl_fptr_interpret_call) {
             // only these care about the exact specTypes, otherwise we can use it directly
+            jl_typeinf_timing_end(inference_start, is_recompile);
             return ucache;
         }
-        codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
-            0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-        void *unspec_fptr = jl_atomic_load_relaxed(&ucache->specptr.fptr);
-        if (unspec_fptr) {
-            // wait until invoke and specsigflags are properly set
-            while (!(jl_atomic_load_acquire(&ucache->specsigflags) & 0b10)) {
-                jl_cpu_pause();
-            }
-            ucache_invoke = jl_atomic_load_relaxed(&ucache->invoke);
-        }
-        // unspec is always not specsig, but might use specptr
-        jl_atomic_store_relaxed(&codeinst->specsigflags, jl_atomic_load_relaxed(&ucache->specsigflags) & 0b10);
-        jl_atomic_store_relaxed(&codeinst->specptr.fptr, unspec_fptr);
+        uint8_t specsigflags;
+        jl_callptr_t invoke;
+        void *fptr;
+        jl_read_codeinst_invoke(ucache, &specsigflags, &invoke, &fptr, 1);
+        jl_debuginfo_t *di = NULL;
+        jl_svec_t *edges = jl_emptysvec;
+        codeinst = jl_new_codeinst(mi, jl_nothing,
+            (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
+            0, 1, ~(size_t)0, 0, jl_nothing, di, edges);
         codeinst->rettype_const = ucache->rettype_const;
-        jl_atomic_store_release(&codeinst->invoke, ucache_invoke);
+        // unspec is always not specsig, but might use specptr
+        jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
+        jl_atomic_store_relaxed(&codeinst->invoke, invoke);
+        jl_atomic_store_relaxed(&codeinst->flags, specsigflags & JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR);
         jl_mi_cache_insert(mi, codeinst);
     }
-    else {
-        record_precompile_statement(mi);
-    }
     jl_atomic_store_relaxed(&codeinst->precompile, 1);
+    jl_typeinf_timing_end(inference_start, is_recompile);
     return codeinst;
 }
 
@@ -2534,21 +3714,54 @@ jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_co
 
 jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
-    jl_svec_t *sparams = m->def->sparam_vals;
+    jl_svec_t *sparams = jl_get_ci_mi(m)->sparam_vals;
     assert(sparams != jl_emptysvec);
     jl_fptr_sparam_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr3);
     assert(invoke && "Forgot to set specptr for jl_fptr_sparam!");
     return invoke(f, args, nargs, sparams);
 }
 
+jl_value_t *jl_fptr_wait_for_compiled(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
+{
+    jl_callptr_t invoke = jl_atomic_load_acquire(&m->invoke);
+    if (invoke == &jl_fptr_wait_for_compiled) {
+        int64_t last_alloc = jl_options.malloc_log ? jl_gc_diff_total_bytes() : 0;
+        int last_errno = errno;
+#ifdef _OS_WINDOWS_
+        DWORD last_error = GetLastError();
+#endif
+        jl_compile_codeinst(m);
+#ifdef _OS_WINDOWS_
+        SetLastError(last_error);
+#endif
+        errno = last_errno;
+        if (jl_options.malloc_log)
+            jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
+        invoke = jl_atomic_load_acquire(&m->invoke);
+    }
+    return invoke(f, args, nargs, m);
+}
+
+// test whether codeinst->invoke is usable already without further compilation needed
+JL_DLLEXPORT int jl_is_compiled_codeinst(jl_code_instance_t *codeinst)
+{
+    jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+    if (invoke == NULL || invoke == &jl_fptr_wait_for_compiled)
+        return 0;
+    return 1;
+}
+
 JL_DLLEXPORT const jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
 
 JL_DLLEXPORT const jl_callptr_t jl_fptr_const_return_addr = &jl_fptr_const_return;
 
 JL_DLLEXPORT const jl_callptr_t jl_fptr_sparam_addr = &jl_fptr_sparam;
 
+JL_CALLABLE(jl_f_opaque_closure_call);
 JL_DLLEXPORT const jl_callptr_t jl_f_opaque_closure_call_addr = (jl_callptr_t)&jl_f_opaque_closure_call;
 
+JL_DLLEXPORT const jl_callptr_t jl_fptr_wait_for_compiled_addr = &jl_fptr_wait_for_compiled;
+
 // Return the index of the invoke api, if known
 JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst)
 {
@@ -2566,18 +3779,17 @@ JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst)
     return -1;
 }
 
-JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_tupletype_t *ti, jl_svec_t *env, jl_method_t *m,
+JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_tupletype_t *ti, jl_svec_t *env, jl_method_t *m,
                                                         int return_if_compileable)
 {
     jl_tupletype_t *tt = NULL;
     jl_svec_t *newparams = NULL;
     JL_GC_PUSH2(&tt, &newparams);
-    jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(m->sig) : mt;
-    intptr_t max_varargs = get_max_varargs(m, kwmt, mt, NULL);
+    intptr_t max_varargs = get_max_varargs(m, NULL);
     jl_compilation_sig(ti, env, m, max_varargs, &newparams);
     int is_compileable = ((jl_datatype_t*)ti)->isdispatchtuple;
     if (newparams) {
-        tt = (jl_datatype_t*)jl_apply_tuple_type(newparams);
+        tt = (jl_datatype_t*)jl_apply_tuple_type(newparams, 1);
         if (!is_compileable) {
             // compute new env, if used below
             jl_value_t *ti = jl_type_intersection_env((jl_value_t*)tt, (jl_value_t*)m->sig, &newparams);
@@ -2597,12 +3809,9 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t
 jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT)
 {
     jl_method_t *def = mi->def.method;
-    if (!jl_is_method(def) || !jl_is_datatype(mi->specTypes))
-        return mi;
-    jl_methtable_t *mt = jl_method_get_table(def);
-    if ((jl_value_t*)mt == jl_nothing)
+    if (!jl_is_method(def) || !jl_is_datatype(mi->specTypes) || def->is_for_opaque_closure)
         return mi;
-    jl_value_t *compilationsig = jl_normalize_to_compilable_sig(mt, (jl_datatype_t*)mi->specTypes, mi->sparam_vals, def, 1);
+    jl_value_t *compilationsig = jl_normalize_to_compilable_sig((jl_datatype_t*)mi->specTypes, mi->sparam_vals, def, 1);
     if (compilationsig == jl_nothing || jl_egal(compilationsig, mi->specTypes))
         return mi;
     jl_svec_t *env = NULL;
@@ -2614,37 +3823,36 @@ jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_
     return mi;
 }
 
-// return a MethodInstance for a compileable method_match
-jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache)
+// return a MethodInstance for a compileable method_match, if valid
+static jl_value_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache)
 {
     jl_method_t *m = match->method;
+    JL_GC_PROMISE_ROOTED(m);
     jl_svec_t *env = match->sparams;
     jl_tupletype_t *ti = match->spec_types;
-    jl_method_instance_t *mi = NULL;
+    jl_value_t *mi = jl_nothing;
     if (jl_is_datatype(ti)) {
-        jl_methtable_t *mt = jl_method_get_table(m);
-        if ((jl_value_t*)mt != jl_nothing) {
-            // get the specialization, possibly also caching it
-            if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) {
-                // Since we also use this presence in the cache
-                // to trigger compilation when producing `.ji` files,
-                // inject it there now if we think it will be
-                // used via dispatch later (e.g. because it was hinted via a call to `precompile`)
-                JL_LOCK(&mt->writelock);
-                mi = cache_method(mt, &mt->cache, (jl_value_t*)mt, ti, m, world, min_valid, max_valid, env);
-                JL_UNLOCK(&mt->writelock);
-            }
-            else {
-                jl_value_t *tt = jl_normalize_to_compilable_sig(mt, ti, env, m, 1);
-                if (tt != jl_nothing) {
-                    JL_GC_PUSH2(&tt, &env);
-                    if (!jl_egal(tt, (jl_value_t*)ti)) {
-                        jl_value_t *ti = jl_type_intersection_env((jl_value_t*)tt, (jl_value_t*)m->sig, &env);
-                        assert(ti != jl_bottom_type); (void)ti;
-                    }
-                    mi = jl_specializations_get_linfo(m, (jl_value_t*)tt, env);
-                    JL_GC_POP();
+        // get the specialization, possibly also caching it
+        if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) {
+            // Since we also use this presence in the cache
+            // to trigger compilation when producing `.ji` files,
+            // inject it there now if we think it will be
+            // used via dispatch later (e.g. because it was hinted via a call to `precompile`)
+            jl_methcache_t *mc = jl_method_table->cache;
+            assert(mc);
+            JL_LOCK(&mc->writelock);
+            mi = (jl_value_t*)cache_method(jl_method_get_table(m), mc, &mc->cache, (jl_value_t*)mc, ti, m, world, min_valid, max_valid, env);
+        }
+        else {
+            jl_value_t *tt = jl_normalize_to_compilable_sig(ti, env, m, 1);
+            if (tt != jl_nothing) {
+                JL_GC_PUSH2(&tt, &env);
+                if (!jl_egal(tt, (jl_value_t*)ti)) {
+                    jl_value_t *ti = jl_type_intersection_env((jl_value_t*)tt, (jl_value_t*)m->sig, &env);
+                    assert(ti != jl_bottom_type); (void)ti;
                 }
+                mi = (jl_value_t*)jl_specializations_get_linfo(m, (jl_value_t*)tt, env);
+                JL_GC_POP();
             }
         }
     }
@@ -2652,51 +3860,44 @@ jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t wor
 }
 
 // compile-time method lookup
-jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache)
+// intersect types with the MT, and return a single compileable specialization that covers the intersection.
+jl_value_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, int mt_cache)
 {
     if (jl_has_free_typevars((jl_value_t*)types))
-        return NULL; // don't poison the cache due to a malformed query
+        return jl_nothing; // don't poison the cache due to a malformed query
     if (!jl_has_concrete_subtype((jl_value_t*)types))
-        return NULL;
+        return jl_nothing;
 
     // find if exactly 1 method matches (issue #7302)
     size_t min_valid2 = 1;
     size_t max_valid2 = ~(size_t)0;
     int ambig = 0;
     jl_value_t *matches = jl_matching_methods(types, jl_nothing, 1, 1, world, &min_valid2, &max_valid2, &ambig);
-    if (*min_valid < min_valid2)
-        *min_valid = min_valid2;
-    if (*max_valid > max_valid2)
-        *max_valid = max_valid2;
-    if (matches == jl_nothing || jl_array_len(matches) != 1 || ambig)
-        return NULL;
+    if (matches == jl_nothing || jl_array_nrows(matches) != 1 || ambig)
+        return jl_nothing;
     JL_GC_PUSH1(&matches);
     jl_method_match_t *match = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
-    jl_method_instance_t *mi = jl_method_match_to_mi(match, world, min_valid2, max_valid2, mt_cache);
+    jl_value_t *mi = jl_method_match_to_mi(match, world, min_valid2, max_valid2, mt_cache);
     JL_GC_POP();
     return mi;
 }
 
-// Get a MethodInstance for a precompile() call. This uses a special kind of lookup that
+// Try to get a MethodInstance for a precompile() call. This uses a special kind of lookup that
 // tries to find a method for which the requested signature is compileable.
-static jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache)
+JL_DLLEXPORT jl_value_t *jl_get_compile_hint_specialization(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, int mt_cache)
 {
     if (jl_has_free_typevars((jl_value_t*)types))
-        return NULL; // don't poison the cache due to a malformed query
+        return jl_nothing; // don't poison the cache due to a malformed query
     if (!jl_has_concrete_subtype((jl_value_t*)types))
-        return NULL;
+        return jl_nothing;
 
     size_t min_valid2 = 1;
     size_t max_valid2 = ~(size_t)0;
     int ambig = 0;
     jl_value_t *matches = jl_matching_methods(types, jl_nothing, -1, 0, world, &min_valid2, &max_valid2, &ambig);
-    if (*min_valid < min_valid2)
-        *min_valid = min_valid2;
-    if (*max_valid > max_valid2)
-        *max_valid = max_valid2;
-    size_t i, n = jl_array_len(matches);
+    size_t i, n = jl_array_nrows(matches);
     if (n == 0)
-        return NULL;
+        return jl_nothing;
     JL_GC_PUSH1(&matches);
     jl_method_match_t *match = NULL;
     if (n == 1) {
@@ -2722,7 +3923,7 @@ static jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *
             exclude = 0;
             for (size_t j = n-1; j > i; j--) {  // more general methods maybe more likely to be at end
                 jl_method_match_t *match2 = (jl_method_match_t*)jl_array_ptr_ref(matches, j);
-                if (jl_type_morespecific(match1->method->sig, match2->method->sig)) {
+                if (jl_method_morespecific(match1->method, match2->method)) {
                     exclude = 1;
                     break;
                 }
@@ -2737,7 +3938,7 @@ static jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *
         if (count == 1)
             match = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
     }
-    jl_method_instance_t *mi = NULL;
+    jl_value_t *mi = jl_nothing;
     if (match != NULL)
         mi = jl_method_match_to_mi(match, world, min_valid2, max_valid2, mt_cache);
     JL_GC_POP();
@@ -2746,10 +3947,10 @@ static jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *
 
 static void _generate_from_hint(jl_method_instance_t *mi, size_t world)
 {
-    jl_value_t *codeinst = jl_rettype_inferred(mi, world, world);
+    jl_value_t *codeinst = jl_rettype_inferred_native(mi, world, world);
     if (codeinst == jl_nothing) {
-        (void)jl_type_infer(mi, world, 1);
-        codeinst = jl_rettype_inferred(mi, world, world);
+        (void)jl_type_infer(mi, world, SOURCE_MODE_NOT_REQUIRED, jl_options.trim);
+        codeinst = jl_rettype_inferred_native(mi, world, world);
     }
     if (codeinst != jl_nothing) {
         if (jl_atomic_load_relaxed(&((jl_code_instance_t*)codeinst)->invoke) == jl_fptr_const_return)
@@ -2763,7 +3964,7 @@ static void jl_compile_now(jl_method_instance_t *mi)
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t tworld = jl_typeinf_world;
     _generate_from_hint(mi, world);
-    if (jl_typeinf_func && mi->def.method->primary_world <= tworld) {
+    if (jl_typeinf_func && jl_atomic_load_relaxed(&mi->def.method->primary_world) <= tworld) {
         // if it's part of the compiler, also attempt to compile for the compiler world too
         _generate_from_hint(mi, tworld);
     }
@@ -2772,7 +3973,8 @@ static void jl_compile_now(jl_method_instance_t *mi)
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world)
 {
     size_t tworld = jl_typeinf_world;
-    jl_atomic_store_relaxed(&mi->precompiled, 1);
+    uint8_t miflags = jl_atomic_load_relaxed(&mi->flags) | JL_MI_FLAGS_MASK_PRECOMPILED;
+    jl_atomic_store_relaxed(&mi->flags, miflags);
     if (jl_generating_output()) {
         jl_compile_now(mi);
         // In addition to full compilation of the compilation-signature, if `types` is more specific (e.g. due to nospecialize),
@@ -2787,12 +3989,13 @@ JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tuplet
             types2 = jl_type_intersection_env((jl_value_t*)types, (jl_value_t*)mi->def.method->sig, &tpenv2);
             jl_method_instance_t *mi2 = jl_specializations_get_linfo(mi->def.method, (jl_value_t*)types2, tpenv2);
             JL_GC_POP();
-            jl_atomic_store_relaxed(&mi2->precompiled, 1);
-            if (jl_rettype_inferred(mi2, world, world) == jl_nothing)
-                (void)jl_type_infer(mi2, world, 1);
-            if (jl_typeinf_func && mi->def.method->primary_world <= tworld) {
-                if (jl_rettype_inferred(mi2, tworld, tworld) == jl_nothing)
-                    (void)jl_type_infer(mi2, tworld, 1);
+            miflags = jl_atomic_load_relaxed(&mi2->flags) | JL_MI_FLAGS_MASK_PRECOMPILED;
+            jl_atomic_store_relaxed(&mi2->flags, miflags);
+            if (jl_rettype_inferred_native(mi2, world, world) == jl_nothing)
+                (void)jl_type_infer(mi2, world, SOURCE_MODE_NOT_REQUIRED, jl_options.trim);
+            if (jl_typeinf_func && jl_atomic_load_relaxed(&mi->def.method->primary_world) <= tworld) {
+                if (jl_rettype_inferred_native(mi2, tworld, tworld) == jl_nothing)
+                    (void)jl_type_infer(mi2, tworld, SOURCE_MODE_NOT_REQUIRED, jl_options.trim);
             }
         }
     }
@@ -2803,19 +4006,31 @@ JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tuplet
     }
 }
 
+JL_DLLEXPORT void jl_compile_method_sig(jl_method_t *m, jl_value_t *types, jl_svec_t *env, size_t world)
+{
+    jl_method_instance_t *mi = jl_specializations_get_linfo(m, types, env);
+    jl_compile_method_instance(mi, NULL, world);
+}
+
+JL_DLLEXPORT int jl_is_compilable(jl_tupletype_t *types)
+{
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    jl_value_t *mi = jl_get_compile_hint_specialization(types, world, 1);
+    return mi == jl_nothing ? 0 : 1;
+}
+
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
 {
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    size_t min_valid = 0;
-    size_t max_valid = ~(size_t)0;
-    jl_method_instance_t *mi = jl_get_compile_hint_specialization(types, world, &min_valid, &max_valid, 1);
-    if (mi == NULL)
+    jl_value_t *mi = jl_get_compile_hint_specialization(types, world, 1);
+    if (mi == jl_nothing)
         return 0;
     JL_GC_PROMISE_ROOTED(mi);
-    jl_compile_method_instance(mi, types, world);
+    jl_compile_method_instance((jl_method_instance_t*)mi, types, world);
     return 1;
 }
 
+
 // add type of `f` to front of argument tuple type
 jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0)
 {
@@ -2832,12 +4047,29 @@ jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_v
     jl_svecset(tt, 0, ft);
     for (size_t i = 0; i < l; i++)
         jl_svecset(tt, i+1, jl_tparam(types,i));
-    tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt);
+    tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt, 1);
     tt = jl_rewrap_unionall_(tt, types0);
     JL_GC_POP();
     return tt;
 }
 
+// undo jl_argtype_with_function transform
+jl_value_t *jl_argtype_without_function(jl_value_t *ftypes)
+{
+    jl_value_t *types = jl_unwrap_unionall(ftypes);
+    size_t l = jl_nparams(types);
+    if (l == 1 && jl_is_vararg(jl_tparam0(types)))
+        return ftypes;
+    jl_value_t *tt = (jl_value_t*)jl_alloc_svec(l - 1);
+    JL_GC_PUSH1(&tt);
+    for (size_t i = 1; i < l; i++)
+        jl_svecset(tt, i - 1, jl_tparam(types, i));
+    tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt, 0);
+    tt = jl_rewrap_unionall_(tt, types);
+    JL_GC_POP();
+    return tt;
+}
+
 #ifdef JL_TRACE
 static int trace_en = 0;
 static int error_en = 1;
@@ -2862,17 +4094,11 @@ STATIC_INLINE jl_value_t *verify_type(jl_value_t *v) JL_NOTSAFEPOINT
 
 STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *mfunc, size_t world)
 {
-    // manually inlined copy of jl_method_compiled
-    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mfunc->cache);
-    while (codeinst) {
-        if (codeinst->min_world <= world && world <= codeinst->max_world) {
-            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
-            if (invoke != NULL) {
-                jl_value_t *res = invoke(F, args, nargs, codeinst);
-                return verify_type(res);
-            }
-        }
-        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    jl_code_instance_t *codeinst = NULL;
+    jl_callptr_t invoke = jl_method_compiled_callptr(mfunc, world, &codeinst);
+    if (invoke) {
+        jl_value_t *res = invoke(F, args, nargs, codeinst);
+        return verify_type(res);
     }
     int64_t last_alloc = jl_options.malloc_log ? jl_gc_diff_total_bytes() : 0;
     int last_errno = errno;
@@ -2886,7 +4112,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
     errno = last_errno;
     if (jl_options.malloc_log)
         jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
-    jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
+    invoke = jl_atomic_load_acquire(&codeinst->invoke);
     jl_value_t *res = invoke(F, args, nargs, codeinst);
     return verify_type(res);
 }
@@ -2897,6 +4123,18 @@ JL_DLLEXPORT jl_value_t *jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t na
     return _jl_invoke(F, args, nargs, mfunc, world);
 }
 
+JL_DLLEXPORT jl_value_t *jl_invoke_oc(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *mfunc)
+{
+    jl_opaque_closure_t *oc = (jl_opaque_closure_t*)F;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    size_t world = oc->world;
+    ct->world_age = world;
+    jl_value_t *ret = _jl_invoke(F, args, nargs, mfunc, world);
+    ct->world_age = last_age;
+    return ret;
+}
+
 STATIC_INLINE int sig_match_fast(jl_value_t *arg1t, jl_value_t **args, jl_value_t **sig, size_t n)
 {
     // NOTE: This function is a huge performance hot spot!!
@@ -2970,8 +4208,9 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
         (callsite >> 16) & (N_CALL_CACHE - 1),
         (callsite >> 24 | callsite << 8) & (N_CALL_CACHE - 1)};
     jl_typemap_entry_t *entry = NULL;
-    jl_methtable_t *mt = NULL;
     int i;
+    jl_tupletype_t *tt = NULL;
+    int64_t last_alloc = 0;
     // check each cache entry to see if it matches
     //#pragma unroll
     //for (i = 0; i < 4; i++) {
@@ -2982,7 +4221,7 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
             entry = jl_atomic_load_relaxed(&call_cache[cache_idx[i]]); \
             if (entry && nargs == jl_svec_len(entry->sig->parameters) && \
                 sig_match_fast(FT, args, jl_svec_data(entry->sig->parameters), nargs) && \
-                world >= entry->min_world && world <= entry->max_world) { \
+                world >= jl_atomic_load_relaxed(&entry->min_world) && world <= jl_atomic_load_relaxed(&entry->max_world)) { \
                 goto have_entry; \
             } \
         } while (0);
@@ -2992,24 +4231,22 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
     LOOP_BODY(3);
 #undef LOOP_BODY
     i = 4;
-    jl_tupletype_t *tt = NULL;
-    int64_t last_alloc = 0;
     if (i == 4) {
         // if no method was found in the associative cache, check the full cache
         JL_TIMING(METHOD_LOOKUP_FAST, METHOD_LOOKUP_FAST);
-        mt = jl_gf_mtable(F);
-        jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_methcache_t *mc = jl_method_table->cache;
+        jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mc->leafcache);
         entry = NULL;
-        if (leafcache != (jl_array_t*)jl_an_empty_vec_any &&
-                jl_typetagis(jl_atomic_load_relaxed(&mt->cache), jl_typemap_level_type)) {
-            // hashing args is expensive, but looking at mt->cache is probably even more expensive
+        int cache_entry_count = jl_atomic_load_relaxed(&((jl_datatype_t*)FT)->name->cache_entry_count);
+        if (leafcache != (jl_genericmemory_t*)jl_an_empty_memory_any && (cache_entry_count == 0 || cache_entry_count >= 8)) {
+            // hashing args is expensive, but so do that only if looking at mc->cache is probably even more expensive
             tt = lookup_arg_type_tuple(F, args, nargs);
             if (tt != NULL)
                 entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
         }
         if (entry == NULL) {
-            jl_typemap_t *cache = jl_atomic_load_relaxed(&mt->cache); // XXX: gc root required?
-            entry = jl_typemap_assoc_exact(cache, F, args, nargs, jl_cachearg_offset(mt), world);
+            jl_typemap_t *cache = jl_atomic_load_relaxed(&mc->cache); // XXX: gc root required?
+            entry = jl_typemap_assoc_exact(cache, F, args, nargs, jl_cachearg_offset(), world);
             if (entry == NULL) {
                 last_alloc = jl_options.malloc_log ? jl_gc_diff_total_bytes() : 0;
                 if (tt == NULL) {
@@ -3027,6 +4264,11 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
             jl_atomic_store_relaxed(&pick_which[cache_idx[0]], which);
             jl_atomic_store_release(&call_cache[cache_idx[which & 3]], entry);
         }
+        if (entry) {
+            // mfunc was found in slow path, so log --trace-dispatch
+            jl_method_instance_t *mfunc = entry->func.linfo;
+            record_dispatch_statement_on_first_dispatch(mfunc);
+        }
     }
 
     jl_method_instance_t *mfunc;
@@ -3035,14 +4277,10 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
         mfunc = entry->func.linfo;
     }
     else {
-        JL_GC_PUSH1(&tt);
         assert(tt);
-        JL_LOCK(&mt->writelock);
         // cache miss case
-        JL_TIMING(METHOD_LOOKUP_SLOW, METHOD_LOOKUP_SLOW);
-        mfunc = jl_mt_assoc_by_type(mt, tt, world);
-        JL_UNLOCK(&mt->writelock);
-        JL_GC_POP();
+        jl_methcache_t *mc = jl_method_table->cache;
+        mfunc = jl_mt_assoc_by_type(mc, tt, world);
         if (jl_options.malloc_log)
             jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
         if (mfunc == NULL) {
@@ -3053,12 +4291,15 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
             jl_method_error(F, args, nargs, world);
             // unreachable
         }
+        // mfunc was found in slow path, so log --trace-dispatch
+        record_dispatch_statement_on_first_dispatch(mfunc);
     }
 
 #ifdef JL_TRACE
     if (traceen)
         jl_printf(JL_STDOUT, " at %s:%d\n", jl_symbol_name(mfunc->def.method->file), mfunc->def.method->line);
 #endif
+
     return mfunc;
 }
 
@@ -3072,19 +4313,16 @@ JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint
     return _jl_invoke(F, args, nargs, mfunc, world);
 }
 
-static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid)
+static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_methtable_t *mt, size_t world, int cache_result, size_t *min_valid, size_t *max_valid)
 {
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
     if (!jl_is_tuple_type(unw))
         return NULL;
     if (jl_tparam0(unw) == jl_bottom_type)
         return NULL;
-    if (mt == jl_nothing)
-        mt = (jl_value_t*)jl_method_table_for(unw);
-    if (mt == jl_nothing)
-        mt = NULL;
-    jl_value_t *matches = ml_matches((jl_methtable_t*)mt, (jl_tupletype_t*)types, 1, 0, 0, world, 1, min_valid, max_valid, NULL);
-    if (matches == jl_nothing || jl_array_len(matches) != 1)
+    jl_methcache_t *mc = ((jl_methtable_t*)mt)->cache;
+    jl_value_t *matches = ml_matches((jl_methtable_t*)mt, mc, (jl_tupletype_t*)types, 1, 0, 0, world, cache_result, min_valid, max_valid, NULL);
+    if (matches == jl_nothing || jl_array_nrows(matches) != 1)
         return NULL;
     jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
     return matc;
@@ -3095,7 +4333,9 @@ JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup(jl_value_t *types, jl_value_t *mt,
     // Deprecated: Use jl_gf_invoke_lookup_worlds for future development
     size_t min_valid = 0;
     size_t max_valid = ~(size_t)0;
-    jl_method_match_t *matc = _gf_invoke_lookup(types, mt, world, &min_valid, &max_valid);
+    if (mt == jl_nothing)
+        mt = (jl_value_t*)jl_method_table;
+    jl_method_match_t *matc = _gf_invoke_lookup(types, (jl_methtable_t*)mt, world, 1, &min_valid, &max_valid);
     if (matc == NULL)
         return jl_nothing;
     return (jl_value_t*)matc->method;
@@ -3104,7 +4344,9 @@ JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup(jl_value_t *types, jl_value_t *mt,
 
 JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, jl_value_t *mt, size_t world, size_t *min_world, size_t *max_world)
 {
-    jl_method_match_t *matc = _gf_invoke_lookup(types, mt, world, min_world, max_world);
+    if (mt == jl_nothing)
+        mt = (jl_value_t*)jl_method_table;
+    jl_method_match_t *matc = _gf_invoke_lookup(types, (jl_methtable_t*)mt, world, 1, min_world, max_world);
     if (matc == NULL)
         return jl_nothing;
     return (jl_value_t*)matc;
@@ -3166,8 +4408,7 @@ jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value
                 int sub = jl_subtype_matching((jl_value_t*)tt, (jl_value_t*)method->sig, &tpenv);
                 assert(sub); (void)sub;
             }
-
-            mfunc = cache_method(NULL, &method->invokes, (jl_value_t*)method, tt, method, 1, 1, ~(size_t)0, tpenv);
+            mfunc = cache_method(NULL, NULL, &method->invokes, (jl_value_t*)method, tt, method, 1, 1, ~(size_t)0, tpenv);
         }
         JL_UNLOCK(&method->writelock);
         JL_GC_POP();
@@ -3175,14 +4416,22 @@ jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value
             jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
     }
     JL_GC_PROMISE_ROOTED(mfunc);
+    uint8_t force_trace_dispatch = jl_atomic_load_relaxed(&jl_force_trace_dispatch_enabled);
+    if (force_trace_dispatch || jl_options.trace_dispatch != NULL) {
+        uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags);
+        uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED;
+        if (!was_dispatched) {
+            miflags |= JL_MI_FLAGS_MASK_DISPATCHED;
+            jl_atomic_store_relaxed(&mfunc->flags, miflags);
+            record_dispatch_statement(mfunc);
+        }
+    }
     size_t world = jl_current_task->world_age;
     return _jl_invoke(gf, args, nargs - 1, mfunc, world);
 }
 
-// Return value is rooted globally
-jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st)
+jl_sym_t *jl_gf_supertype_name(jl_sym_t *name)
 {
-    // type name is function name prefixed with #
     size_t l = strlen(jl_symbol_name(name));
     char *prefixed;
     prefixed = (char*)malloc_s(l+2);
@@ -3190,23 +4439,32 @@ jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_
     strcpy(&prefixed[1], jl_symbol_name(name));
     jl_sym_t *tname = jl_symbol(prefixed);
     free(prefixed);
+    return tname;
+}
+
+// Return value is rooted globally
+jl_value_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st, size_t new_world)
+{
+    // type name is function name prefixed with #
+    jl_sym_t *tname = jl_gf_supertype_name(name);
     jl_datatype_t *ftype = (jl_datatype_t*)jl_new_datatype(
             tname, module, st, jl_emptysvec, jl_emptysvec, jl_emptysvec, jl_emptysvec,
             0, 0, 0);
     assert(jl_is_datatype(ftype));
     JL_GC_PUSH1(&ftype);
-    ftype->name->mt->name = name;
-    jl_gc_wb(ftype->name->mt, name);
-    jl_set_const(module, tname, (jl_value_t*)ftype);
+    ftype->name->singletonname = name;
+    jl_gc_wb(ftype->name, name);
+    jl_declare_constant_val3(NULL, module, tname, (jl_value_t*)ftype, PARTITION_KIND_CONST, new_world);
     jl_value_t *f = jl_new_struct(ftype);
-    ftype->instance = f; jl_gc_wb(ftype, f);
+    ftype->instance = f;
+    jl_gc_wb(ftype, f);
     JL_GC_POP();
-    return (jl_function_t*)f;
+    return (jl_value_t*)f;
 }
 
-jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module)
+jl_value_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module, size_t new_world)
 {
-    return jl_new_generic_function_with_supertype(name, module, jl_function_type);
+    return jl_new_generic_function_with_supertype(name, module, jl_function_type, new_world);
 }
 
 struct ml_matches_env {
@@ -3238,43 +4496,58 @@ static jl_method_match_t *make_method_match(jl_tupletype_t *spec_types, jl_svec_
     return match;
 }
 
+// callback for typemap_visitor
+//
+// This will exit the search early (by returning 0 / false) if the match limit is proven to be
+// exceeded early. This is only best-effort, since specificity means that many matched methods
+// may be sorted and removed in the output processing for ml_matches and therefore we can only
+// conservatively under-approximate the matches during the search.
 static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersection_env *closure0)
 {
     struct ml_matches_env *closure = container_of(closure0, struct ml_matches_env, match);
     if (closure->intersections == 0 && !closure0->issubty)
         return 1;
-    if (closure->world < ml->min_world) {
-        // ignore method table entries that are part of a later world
-        if (closure->match.max_valid >= ml->min_world)
-            closure->match.max_valid = ml->min_world - 1;
+
+    // First, check the world range of the typemap entry to ensure that it intersects
+    // the query world. If it does not, narrow the result world range to guarantee
+    // excluding it from the results is valid for the full span.
+    size_t min_world = jl_atomic_load_relaxed(&ml->min_world);
+    size_t max_world = jl_atomic_load_relaxed(&ml->max_world);
+    if (closure->world < min_world) {
+        // exclude method table entries that are part of a later world
+        if (closure->match.max_valid >= min_world)
+            closure->match.max_valid = min_world - 1;
         return 1;
     }
-    else if (closure->world > ml->max_world) {
-        // ignore method table entries that have been replaced in the current world
-        if (closure->match.min_valid <= ml->max_world)
-            closure->match.min_valid = ml->max_world + 1;
+    else if (closure->world > max_world) {
+        // exclude method table entries that have been replaced in the current world
+        if (closure->match.min_valid <= max_world)
+            closure->match.min_valid = max_world + 1;
         return 1;
     }
-    else {
-        // intersect the env valid range with method's inclusive valid range
-        if (closure->match.min_valid < ml->min_world)
-            closure->match.min_valid = ml->min_world;
-        if (closure->match.max_valid > ml->max_world)
-            closure->match.max_valid = ml->max_world;
-    }
+    if (closure->match.max_valid > max_world)
+        closure->match.max_valid = max_world;
     jl_method_t *meth = ml->func.method;
-    if (closure->lim >= 0 && jl_is_dispatch_tupletype(meth->sig)) {
-        if (closure->lim == 0)
+    int only = jl_atomic_load_relaxed(&meth->dispatch_status) & METHOD_SIG_LATEST_ONLY;
+    if (closure->lim >= 0 && only) {
+        if (closure->lim == 0) {
+            closure->t = jl_an_empty_vec_any;
             return 0;
+        }
         closure->lim--;
     }
-    // don't need to consider other similar methods if this ml will always fully intersect with them and dominates all of them
-    if (!closure->include_ambiguous || closure->lim != -1)
-        typemap_slurp_search(ml, &closure->match);
     closure->matc = make_method_match((jl_tupletype_t*)closure->match.ti,
         closure->match.env, meth,
         closure->match.issubty ? FULLY_COVERS : NOT_FULLY_COVERS);
-    size_t len = jl_array_len(closure->t);
+    size_t len = jl_array_nrows(closure->t);
+    if (closure->match.issubty && only) {
+        if (len == 0)
+            closure->t = (jl_value_t*)jl_alloc_vec_any(1);
+        else if (len > 1)
+            jl_array_del_end((jl_array_t*)closure->t, len - 1);
+        jl_array_ptr_set(closure->t, 0, (jl_value_t*)closure->matc);
+        return 0;
+    }
     if (len == 0) {
         closure->t = (jl_value_t*)jl_alloc_vec_any(1);
         jl_array_ptr_set(closure->t, 0, (jl_value_t*)closure->matc);
@@ -3282,20 +4555,15 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
     else {
         jl_array_ptr_1d_push((jl_array_t*)closure->t, (jl_value_t*)closure->matc);
     }
+    // don't need to consider other similar methods if this ml will always fully intersect with them and dominates all of them
+    if (!closure->include_ambiguous || closure->lim != -1)
+        typemap_slurp_search(ml, &closure->match);
     return 1;
 }
 
-static int ml_mtable_visitor(jl_methtable_t *mt, void *closure0)
-{
-    struct typemap_intersection_env* env = (struct typemap_intersection_env*)closure0;
-    return jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), jl_cachearg_offset(mt), env);
-}
-
-
 // Visit the candidate methods, starting from t[idx], to determine a possible valid sort ordering,
 // where every morespecific method appears before any method which it has a common
-// intersection with but is not partly ambiguous with (ambiguity is transitive, particularly
-// if lim==-1, although morespecific is not transitive).
+// intersection with but is not partly ambiguous with (ambiguity is not transitive, since morespecific is not transitive).
 // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
 // Inputs:
 //  * `t`: the array of vertexes (method matches)
@@ -3303,265 +4571,263 @@ static int ml_mtable_visitor(jl_methtable_t *mt, void *closure0)
 //  * `visited`: the state of the algorithm for each vertex in `t`: either 1 if we visited it already or 1+depth if we are visiting it now
 //  * `stack`: the state of the algorithm for the current vertex (up to length equal to `t`): the list of all vertexes currently in the depth-first path or in the current SCC
 //  * `result`: the output of the algorithm, a sorted list of vertexes (up to length `lim`)
-//  * `allambig`: a list of all vertexes with an ambiguity (up to length equal to `t`), discovered while running the rest of the algorithm
+//  * `recursion_stack`: an array for temporary use
 //  * `lim`: either -1 for unlimited matches, or the maximum length for `result` before returning failure (return -1).
-//           If specified as -1, this will return extra matches that would have been elided from the list because they were already covered by an earlier match.
-//           This gives a sort of maximal set of matching methods (up to the first minmax method).
-//           If specified as -1, the sorting will also include all "weak" edges (every ambiguous pair) which will create much larger ambiguity cycles,
-//           resulting in a less accurate sort order and much less accurate `*has_ambiguity` result.
 //  * `include_ambiguous`: whether to filter out fully ambiguous matches from `result`
 //  * `*has_ambiguity`: whether the algorithm does not need to compute if there is an unresolved ambiguity
 //  * `*found_minmax`: whether there is a minmax method already found, so future fully_covers matches should be ignored
 // Outputs:
-//  * `*has_ambiguity`: whether the caller should check if there remains an unresolved ambiguity (in `allambig`)
+//  * `*has_ambiguity`: whether there are any ambiguities that mean the sort order is not exact
+// Stack frame for iterative sort_mlmatches implementation
+enum sort_state {
+    STATE_VISITING,            // Initial visit and setup
+    STATE_PROCESSING_INTERFERENCES, // Processing interference loop
+    STATE_CHECK_COVERS,        // Check coverage conditions
+    STATE_FINALIZE_SCC         // SCC processing and cleanup
+};
+
+typedef struct {
+    size_t idx;                    // Current method match index
+    size_t interference_index;     // Current position in interferences loop
+    size_t interference_count;     // Total interferences count
+    size_t depth;                  // Stack depth when frame created
+    size_t cycle;                  // Cycle depth tracking
+    jl_method_match_t *matc;       // Current method match
+    jl_method_t *m;                // Current method
+    jl_value_t *ti;                // Type intersection
+    int subt;                      // Subtype flag
+    jl_genericmemory_t *interferences; // Method interferences
+    int child_result;              // Result from child recursive call
+    enum sort_state state;
+} sort_stack_frame_t;
+
 // Returns:
 //  * -1: too many matches for lim, other outputs are undefined
 //  *  0: the child(ren) have been added to the output
 //  * 1+: the children are part of this SCC (up to this depth)
-// TODO: convert this function into an iterative call, rather than recursive
-static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, arraylist_t *stack, arraylist_t *result, arraylist_t *allambig, int lim, int include_ambiguous, int *has_ambiguity, int *found_minmax)
-{
-    size_t cycle = (size_t)visited->items[idx];
-    if (cycle != 0)
-        return cycle - 1; // depth remaining
-    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, idx);
-    jl_method_t *m = matc->method;
-    jl_value_t *ti = (jl_value_t*)matc->spec_types;
-    int subt = matc->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-    // first check if this new method is actually already fully covered by an
-    // existing match and we can just ignore this entry quickly
-    size_t result_len = 0;
-    if (subt) {
-        if (*found_minmax == 2)
-            visited->items[idx] = (void*)1;
-    }
-    else if (lim != -1) {
-        for (; result_len < result->len; result_len++) {
-            size_t idx2 = (size_t)result->items[result_len];
-            jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
-            jl_method_t *m2 = matc2->method;
-            if (jl_subtype(ti, m2->sig)) {
-                if (include_ambiguous) {
-                    if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
-                        continue;
+static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, arraylist_t *stack, arraylist_t *result, arraylist_t *recursion_stack, int lim, int include_ambiguous, int *has_ambiguity, int *found_minmax)
+{
+    // Use arraylist_t for explicit stack of processing frames
+    arraylist_t frame_stack;
+    arraylist_new(&frame_stack, 0);
+
+    // Push initial frame
+    sort_stack_frame_t initial_frame = {
+        .idx = idx,
+        .interference_index = 0,
+        .interference_count = 0,
+        .depth = 0,
+        .cycle = 0,
+        .matc = NULL,
+        .m = NULL,
+        .ti = NULL,
+        .subt = 0,
+        .interferences = NULL,
+        .child_result = 0,
+        .state = STATE_VISITING
+    };
+    arraylist_push(&frame_stack, memcpy(malloc(sizeof(sort_stack_frame_t)), &initial_frame, sizeof(sort_stack_frame_t)));
+
+    int final_result = 0;
+
+    while (1) {
+        sort_stack_frame_t *current = (sort_stack_frame_t*)frame_stack.items[frame_stack.len - 1];
+        JL_GC_PROMISE_ROOTED(current->m);
+        JL_GC_PROMISE_ROOTED(current->interferences);
+        JL_GC_PROMISE_ROOTED(current->ti);
+
+        switch (current->state) {
+            case STATE_VISITING: {
+                size_t cycle = (size_t)visited->items[current->idx];
+                if (cycle != 0) {
+                    final_result = cycle - 1;
+                    goto propagate_to_parent;
                 }
-                visited->items[idx] = (void*)1;
+
+                arraylist_push(stack, (void*)current->idx);
+                current->depth = stack->len;
+                visited->items[current->idx] = (void*)(1 + current->depth);
+                current->matc = (jl_method_match_t*)jl_array_ptr_ref(t, current->idx);
+                current->m = current->matc->method;
+                current->ti = (jl_value_t*)current->matc->spec_types;
+                current->subt = current->matc->fully_covers != NOT_FULLY_COVERS;
+                current->interferences = jl_atomic_load_relaxed(&current->m->interferences);
+                current->cycle = current->depth;
+                current->interference_count = current->interferences->length;
+                current->interference_index = 0;
+                current->state = STATE_PROCESSING_INTERFERENCES;
                 break;
             }
-        }
-    }
-    if ((size_t)visited->items[idx] == 1)
-        return 0;
-    arraylist_push(stack, (void*)idx);
-    size_t depth = stack->len;
-    visited->items[idx] = (void*)(1 + depth);
-    cycle = depth;
-    int addambig = 0;
-    int mayexclude = 0;
-    // First visit all "strong" edges where the child is definitely better.
-    // This likely won't hit any cycles, but might (because morespecific is not transitive).
-    // Along the way, record if we hit any ambiguities-we may need to track those later.
-    for (size_t childidx = 0; childidx < jl_array_len(t); childidx++) {
-        if (childidx == idx)
-            continue;
-        int child_cycle = (size_t)visited->items[childidx];
-        if (child_cycle == 1)
-            continue; // already handled
-        if (child_cycle != 0 && child_cycle - 1 >= cycle)
-            continue; // already part of this cycle
-        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
-        jl_method_t *m2 = matc2->method;
-        int subt2 = matc2->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
-        // TODO: we could change this to jl_has_empty_intersection(ti, (jl_value_t*)matc2->spec_types);
-        // since we only care about sorting of the intersections the user asked us about
-        if (!subt2 && jl_has_empty_intersection(m2->sig, m->sig))
-            continue;
-        int msp = jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig);
-        int msp2 = !msp && jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig);
-        if (!msp) {
-            if (subt || !include_ambiguous || (lim != -1 && msp2)) {
-                if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
-                    // this may be filtered out as fully intersected, if applicable later
-                    mayexclude = 1;
+
+            case STATE_PROCESSING_INTERFERENCES: {
+                // If we have a child result to process, handle it first
+                if (current->child_result != 0) {
+                    if (current->child_result == -1) {
+                        final_result = -1;
+                        goto propagate_to_parent;
+                    }
+                    // record the cycle will resolve at depth "cycle"
+                    if (current->child_result && current->child_result < current->cycle)
+                        current->cycle = current->child_result;
+                    current->child_result = 0; // Clear after processing
                 }
-            }
-            if (!msp2) {
-                addambig = 1; // record there is a least one previously-undetected ambiguity that may need to be investigated later (between m and m2)
-            }
-        }
-        if (lim == -1 ? msp : !msp2) // include only strong or also weak edges, depending on whether the result size is limited
-            continue;
-        // m2 is (lim!=-1 ? better : not-worse), so attempt to visit it first
-        // if limited, then we want to visit only better edges, because that results in finding k best matches quickest
-        // if not limited, then we want to visit all edges, since that results in finding the largest SCC cycles, which requires doing the fewest intersections
-        child_cycle = sort_mlmatches(t, childidx, visited, stack, result, allambig, lim, include_ambiguous, has_ambiguity, found_minmax);
-        if (child_cycle == -1)
-            return -1;
-        if (child_cycle && child_cycle < cycle) {
-            // record the cycle will resolve at depth "cycle"
-            cycle = child_cycle;
-        }
-        if (stack->len == depth) {
-            // if this child resolved without hitting a cycle, then there is
-            // some probability that this method is already fully covered now
-            // (same check as before), and we can delete this vertex now without
-            // anyone noticing (too much)
-            if (subt) {
-                if (*found_minmax == 2)
-                    visited->items[idx] = (void*)1;
-            }
-            else if (lim != -1) {
-                for (; result_len < result->len; result_len++) {
-                    size_t idx2 = (size_t)result->items[result_len];
-                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
-                    jl_method_t *m2 = matc2->method;
-                    if (jl_subtype(ti, m2->sig)) {
-                        if (include_ambiguous) {
-                            if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
-                                continue;
+
+                // Process interferences iteratively
+                while (current->interference_index < current->interference_count) {
+                    jl_method_t *m2 = (jl_method_t*)jl_genericmemory_ptr_ref(current->interferences, current->interference_index);
+                    current->interference_index++;
+
+                    if (m2 == NULL)
+                        continue;
+
+                    int childidx = find_method_in_matches(t, m2);
+                    if (childidx < 0 || (size_t)childidx == current->idx)
+                        continue;
+
+                    int child_cycle = (size_t)visited->items[childidx];
+                    if (child_cycle == 1)
+                        continue; // already handled
+                    if (child_cycle != 0 && child_cycle - 1 >= current->cycle)
+                        continue; // already part of this cycle
+                    if (method_in_interferences(current->m, m2))
+                        continue;
+
+                    // m2 is morespecific, so attempt to visit it first
+                    if (child_cycle != 0) {
+                        // Child already being processed, use cached result
+                        int child_result = child_cycle - 1;
+                        if (child_result == -1) {
+                            final_result = -1;
+                            goto propagate_to_parent;
                         }
-                        visited->items[idx] = (void*)1;
-                        break;
+                        if (child_result && child_result < current->cycle)
+                            current->cycle = child_result;
                     }
-                }
-            }
-            if ((size_t)visited->items[idx] == 1) {
-                assert(cycle == depth);
-                size_t childidx = (size_t)arraylist_pop(stack);
-                assert(childidx == idx); (void)childidx;
-                assert(!subt || *found_minmax == 2);
-                return 0;
-            }
-        }
-    }
-    if (matc->fully_covers == NOT_FULLY_COVERS && addambig)
-        arraylist_push(allambig, (void*)idx);
-    if (cycle != depth)
-        return cycle;
-    result_len = result->len;
-    if (stack->len == depth) {
-        // Found one "best" method to add right now. But we might exclude it if
-        // we determined earlier that we had that option.
-        if (mayexclude) {
-            if (!subt || *found_minmax == 2)
-                visited->items[idx] = (void*)1;
-        }
-    }
-    else {
-        // We have a set of ambiguous methods. Record that.
-        // This is greatly over-approximated for lim==-1
-        *has_ambiguity = 1;
-        // If we followed weak edges above, then this also fully closed the ambiguity cycle
-        if (lim == -1)
-            addambig = 0;
-        // If we're only returning possible matches, now filter out this method
-        // if its intersection is fully ambiguous in this SCC group.
-        // This is a repeat of the "first check", now that we have completed the cycle analysis
-        for (size_t i = depth - 1; i < stack->len; i++) {
-            size_t childidx = (size_t)stack->items[i];
-            jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
-            jl_value_t *ti = (jl_value_t*)matc->spec_types;
-            int subt = matc->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-            if ((size_t)visited->items[childidx] == 1) {
-                assert(subt);
-                continue;
-            }
-            assert(visited->items[childidx] == (void*)(2 + i));
-            // if we only followed strong edges before above
-            // check also if this set has an unresolved ambiguity missing from it
-            if (lim != -1 && !addambig) {
-                for (size_t j = 0; j < allambig->len; j++) {
-                    if ((size_t)allambig->items[j] == childidx) {
-                        addambig = 1;
-                        break;
+                    else {
+                        // Need to process child - push new frame and pause current processing
+                        sort_stack_frame_t child_frame = {
+                            .idx = childidx,
+                            .interference_index = 0,
+                            .interference_count = 0,
+                            .depth = 0,
+                            .cycle = 0,
+                            .matc = NULL,
+                            .m = NULL,
+                            .ti = NULL,
+                            .subt = 0,
+                            .interferences = NULL,
+                            .child_result = 0,
+                            .state = STATE_VISITING
+                        };
+                        arraylist_push(&frame_stack, memcpy(malloc(sizeof(sort_stack_frame_t)), &child_frame, sizeof(sort_stack_frame_t)));
+                        goto continue_main_loop; // Resume processing after child completes
                     }
                 }
+
+                current->state = STATE_CHECK_COVERS;
+                break;
             }
-            // always remove fully_covers matches after the first minmax ambiguity group is handled
-            if (subt) {
-                if (*found_minmax)
-                    visited->items[childidx] = (void*)1;
-                continue;
+
+            case STATE_CHECK_COVERS: {
+                // There is some probability that this method is already fully covered
+                // now, and we can delete this vertex now without anyone noticing.
+                if (current->subt && *found_minmax) {
+                    if (*found_minmax == 2)
+                        visited->items[current->idx] = (void*)1;
+                }
+                else if (check_interferences_covers(current->m, current->ti, t, visited, recursion_stack)) {
+                    visited->items[current->idx] = (void*)1;
+                }
+                else if (check_fully_ambiguous(current->m, current->ti, t, include_ambiguous, has_ambiguity)) {
+                    visited->items[current->idx] = (void*)1;
+                }
+
+                // If there were no cycles hit either, then we can potentially delete all of its edges too.
+                if ((size_t)visited->items[current->idx] == 1 && stack->len == current->depth) {
+                    // n.b. cycle might be < depth, if we had a cycle with a child
+                    // idx, but since we are on the top of the stack, nobody
+                    // observed that and so we are content to ignore this
+                    size_t childidx = (size_t)arraylist_pop(stack);
+                    assert(childidx == current->idx); (void)childidx;
+                    final_result = 0;
+                    goto propagate_to_parent;
+                }
+
+                if (current->cycle != current->depth) {
+                    final_result = current->cycle;
+                    goto propagate_to_parent;
+                }
+
+                current->state = STATE_FINALIZE_SCC;
+                break;
             }
-            else if (lim != -1) {
-                // when limited, don't include this match if it was covered by an earlier one
-                for (size_t result_len = 0; result_len < result->len; result_len++) {
-                    size_t idx2 = (size_t)result->items[result_len];
-                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
-                    jl_method_t *m2 = matc2->method;
-                    if (jl_subtype(ti, m2->sig)) {
-                        if (include_ambiguous) {
-                            if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
-                                continue;
-                        }
-                        visited->items[childidx] = (void*)1;
-                        break;
+
+            case STATE_FINALIZE_SCC: {
+                // If this is in an SCC group, do some additional checks before returning or setting has_ambiguity
+                if (current->depth != stack->len) {
+                    int scc_count = 0;
+                    for (size_t i = current->depth - 1; i < stack->len; i++) {
+                        size_t childidx = (size_t)stack->items[i];
+                        if (visited->items[childidx] == (void*)1)
+                            continue;
+                        scc_count++;
                     }
+                    if (scc_count > 1)
+                        *has_ambiguity = 1;
                 }
-            }
-        }
-        if (!include_ambiguous && lim == -1) {
-            for (size_t i = depth - 1; i < stack->len; i++) {
-                size_t childidx = (size_t)stack->items[i];
-                if ((size_t)visited->items[childidx] == 1)
-                    continue;
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
-                jl_method_t *m = matc->method;
-                jl_value_t *ti = (jl_value_t*)matc->spec_types;
-                for (size_t j = depth - 1; j < stack->len; j++) {
-                    if (i == j)
+
+                // copy this cycle into the results
+                for (size_t i = current->depth - 1; i < stack->len; i++) {
+                    size_t childidx = (size_t)stack->items[i];
+                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+                    int subt = matc->fully_covers != NOT_FULLY_COVERS;
+                    if (subt && *found_minmax)
+                        visited->items[childidx] = (void*)1;
+                    if ((size_t)visited->items[childidx] == 1)
                         continue;
-                    size_t idx2 = (size_t)stack->items[j];
-                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
-                    jl_method_t *m2 = matc2->method;
-                    int subt2 = matc2->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
-                    // if their intersection contributes to the ambiguity cycle
-                    // and the contribution of m is fully ambiguous with the portion of the cycle from m2
-                    if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
-                        // but they aren't themselves simply ordered (here
-                        // we don't consider that a third method might be
-                        // disrupting that ordering and just consider them
-                        // pairwise to keep this simple).
-                        if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) &&
-                            !jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig)) {
-                            visited->items[childidx] = (void*)-1;
-                            break;
-                        }
+                    assert(visited->items[childidx] == (void*)(2 + i));
+                    visited->items[childidx] = (void*)1;
+                    if (lim == -1 || result->len < lim)
+                        arraylist_push(result, (void*)childidx);
+                    else {
+                        final_result = -1;
+                        goto propagate_to_parent;
                     }
                 }
+
+                // now finally cleanup the stack
+                while (stack->len >= current->depth) {
+                    size_t childidx = (size_t)arraylist_pop(stack);
+                    // always remove fully_covers matches after the first minmax ambiguity group is handled
+                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+                    int subt = matc->fully_covers == FULLY_COVERS;
+                    if (subt && *found_minmax == 1)
+                        *found_minmax = 2;
+                    assert(visited->items[childidx] == (void*)1);
+                }
+
+                final_result = 0;
+                goto propagate_to_parent;
             }
         }
-    }
-    // copy this cycle into the results
-    for (size_t i = depth - 1; i < stack->len; i++) {
-        size_t childidx = (size_t)stack->items[i];
-        if ((size_t)visited->items[childidx] == 1)
-            continue;
-        if ((size_t)visited->items[childidx] != -1) {
-            assert(visited->items[childidx] == (void*)(2 + i));
-            visited->items[childidx] = (void*)-1;
-            if (lim == -1 || result->len < lim)
-                arraylist_push(result, (void*)childidx);
-            else
-                return -1;
-        }
-    }
-    // now finally cleanup the stack
-    while (stack->len >= depth) {
-        size_t childidx = (size_t)arraylist_pop(stack);
-        // always remove fully_covers matches after the first minmax ambiguity group is handled
-        //jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
-        if (matc->fully_covers != NOT_FULLY_COVERS && !addambig)
-            *found_minmax = 2;
-        if (visited->items[childidx] != (void*)-1)
+
+        continue_main_loop:
             continue;
-        visited->items[childidx] = (void*)1;
+
+        propagate_to_parent:
+            // Propagate result to parent if exists
+            free(arraylist_pop(&frame_stack));
+            if (frame_stack.len == 0)
+                break;
+            sort_stack_frame_t *parent = (sort_stack_frame_t*)frame_stack.items[frame_stack.len - 1];
+            parent->child_result = final_result;
     }
-    return 0;
+    assert(frame_stack.len == 0);
+    arraylist_free(&frame_stack);
+    return final_result;
 }
 
 
-
 // This is the collect form of calling jl_typemap_intersection_visitor
 // with optimizations to skip fully shadowed methods.
 //
@@ -3572,7 +4838,7 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
 // fully-covers is a Bool indicating subtyping, though temporarily it may be
 // tri-values, with `nothing` indicating a match that is not a subtype, but
 // which is dominated by one which is (and thus should be excluded unless ambiguous)
-static jl_value_t *ml_matches(jl_methtable_t *mt,
+static jl_value_t *ml_matches(jl_methtable_t *mt, jl_methcache_t *mc,
                               jl_tupletype_t *type, int lim, int include_ambiguous,
                               int intersections, size_t world, int cache_result,
                               size_t *min_valid, size_t *max_valid, int *ambig)
@@ -3592,21 +4858,47 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
         else
             va = NULL;
     }
-    struct ml_matches_env env = {{ml_matches_visitor, (jl_value_t*)type, va, /* .search_slurp = */ 0,
-            /* .min_valid = */ *min_valid, /* .max_valid = */ *max_valid,
-            /* .ti = */ NULL, /* .env = */ jl_emptysvec, /* .issubty = */ 0},
-        intersections, world, lim, include_ambiguous, /* .t = */ jl_an_empty_vec_any,
-        /* .matc = */ NULL};
-    struct jl_typemap_assoc search = {(jl_value_t*)type, world, jl_emptysvec, 1, ~(size_t)0};
+    struct ml_matches_env env = {
+        /* match */ {
+            /* inputs */
+            /* fptr / callback */ ml_matches_visitor,
+            /* sig */ (jl_value_t*)type,
+            /* vararg type / tparam0 */ va,
+
+            /* temporaries */
+            /* .search_slurp = */ 0,
+
+            /* outputs */
+            /* .min_valid = */ *min_valid,
+            /* .max_valid = */ *max_valid,
+            /* .ti = */ NULL,
+            /* .env = */ jl_emptysvec,
+            /* .issubty = */ 0
+        },
+        /* inputs */
+        intersections,
+        world,
+        lim,
+        include_ambiguous,
+
+        /* outputs */
+        /* .t = */ jl_an_empty_vec_any,
+
+        /* temporaries */
+        /* .matc = */ NULL
+    };
+    struct jl_typemap_assoc search = {(jl_value_t*)type, world, jl_emptysvec};
     jl_value_t *isect2 = NULL;
     JL_GC_PUSH6(&env.t, &env.matc, &env.match.env, &search.env, &env.match.ti, &isect2);
 
-    if (mt) {
-        // check the leaf cache if this type can be in there
+    if (mc) {
+        // first check the leaf cache if the type might have been put in there
         if (((jl_datatype_t*)unw)->isdispatchtuple) {
-            jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+            jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mc->leafcache);
             jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)type, world);
             if (entry) {
+                // leafcache found a match, construct the MethodMatch by computing the effective
+                // types + sparams and the world bounds
                 jl_method_instance_t *mi = entry->func.linfo;
                 jl_method_t *meth = mi->def.method;
                 if (!jl_is_unionall(meth->sig)) {
@@ -3625,100 +4917,104 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                     env.match.env, meth, FULLY_COVERS);
                 env.t = (jl_value_t*)jl_alloc_vec_any(1);
                 jl_array_ptr_set(env.t, 0, env.matc);
-                if (*min_valid < entry->min_world)
-                    *min_valid = entry->min_world;
-                if (*max_valid > entry->max_world)
-                    *max_valid = entry->max_world;
+                size_t min_world = jl_atomic_load_relaxed(&entry->min_world);
+                size_t max_world = jl_atomic_load_relaxed(&entry->max_world);
+                if (*min_valid < min_world)
+                    *min_valid = min_world;
+                if (*max_valid > max_world)
+                    *max_valid = max_world;
                 JL_GC_POP();
                 return env.t;
             }
         }
+
         // then check the full cache if it seems profitable
         if (((jl_datatype_t*)unw)->isdispatchtuple) {
-            jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->cache), &search, jl_cachearg_offset(mt), /*subtype*/1);
+            jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mc->cache), &search, jl_cachearg_offset(), /*subtype*/1);
             if (entry && (((jl_datatype_t*)unw)->isdispatchtuple || entry->guardsigs == jl_emptysvec)) {
+                // full cache found a match, construct the MethodMatch by computing the effective
+                // types + sparams and the world bounds
                 jl_method_instance_t *mi = entry->func.linfo;
                 jl_method_t *meth = mi->def.method;
-                if (!jl_is_unionall(meth->sig) && ((jl_datatype_t*)unw)->isdispatchtuple) {
-                    env.match.env = jl_emptysvec;
-                    env.match.ti = unw;
-                }
-                else {
-                    // this just calls jl_subtype_env (since we know that `type <: meth->sig` by transitivity)
-                    env.match.ti = jl_type_intersection_env((jl_value_t*)type, (jl_value_t*)meth->sig, &env.match.env);
+                size_t min_world = jl_atomic_load_relaxed(&entry->min_world);
+                // only return this if it appears min_would is fully computed, otherwise do the full lookup to compute min_world exactly
+                if (min_world == jl_atomic_load_relaxed(&meth->primary_world)) {
+                    size_t max_world = jl_atomic_load_relaxed(&entry->max_world);
+                    if (!jl_is_unionall(meth->sig) && ((jl_datatype_t*)unw)->isdispatchtuple) {
+                        env.match.env = jl_emptysvec;
+                        env.match.ti = unw;
+                    }
+                    else {
+                        // this just calls jl_subtype_env (since we know that `type <: meth->sig` by transitivity)
+                        env.match.ti = jl_type_intersection_env((jl_value_t*)type, (jl_value_t*)meth->sig, &env.match.env);
+                    }
+                    env.matc = make_method_match((jl_tupletype_t*)env.match.ti,
+                        env.match.env, meth, FULLY_COVERS);
+                    env.t = (jl_value_t*)jl_alloc_vec_any(1);
+                    jl_array_ptr_set(env.t, 0, env.matc);
+                    if (*min_valid < min_world)
+                        *min_valid = min_world;
+                    if (*max_valid > max_world)
+                        *max_valid = max_world;
+                    JL_GC_POP();
+                    return env.t;
                 }
-                env.matc = make_method_match((jl_tupletype_t*)env.match.ti,
-                    env.match.env, meth, FULLY_COVERS);
-                env.t = (jl_value_t*)jl_alloc_vec_any(1);
-                jl_array_ptr_set(env.t, 0, env.matc);
-                if (*min_valid < entry->min_world)
-                    *min_valid = entry->min_world;
-                if (*max_valid > entry->max_world)
-                    *max_valid = entry->max_world;
-                JL_GC_POP();
-                return env.t;
             }
         }
-        if (!ml_mtable_visitor(mt, &env.match)) {
-            JL_GC_POP();
-            return jl_nothing;
-        }
     }
-    else {
-        // else: scan everything
-        if (!jl_foreach_reachable_mtable(ml_mtable_visitor, &env.match)) {
-            JL_GC_POP();
-            return jl_nothing;
-        }
+    // then scan everything
+    if (!jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), 0, &env.match) && env.t == jl_an_empty_vec_any) {
+        JL_GC_POP();
+        // if we return early without returning methods, lim was proven to be exceeded
+        // during the search set only the min/max valid collected from matching
+        *min_valid = env.match.min_valid;
+        *max_valid = env.match.max_valid;
+        return jl_nothing;
     }
+    // if we return early, set only the min/max valid collected from matching
     *min_valid = env.match.min_valid;
     *max_valid = env.match.max_valid;
     // done with many of these values now
     env.match.ti = NULL; env.matc = NULL; env.match.env = NULL; search.env = NULL;
-    size_t i, j, len = jl_array_len(env.t);
+
+    // all intersecting methods have been collected now. the remaining work is to sort
+    // these and apply specificity to determine a list of dispatch-possible call targets
+    size_t i, j, len = jl_array_nrows(env.t);
+
+    // the 'minmax' method is a method that (1) fully-covers the queried type, and (2) is
+    // more-specific than any other fully-covering method (but if !all_subtypes, there are
+    // non-fully-covering methods to which it is _likely_ not more specific)
     jl_method_match_t *minmax = NULL;
-    int minmax_ambig = 0;
-    int all_subtypes = 1;
+    int any_subtypes = 0;
     if (len > 1) {
-        // first try to pre-process the results to find the most specific
-        // result that fully covers the input, since we can do this in linear
-        // time, and the rest is O(n^2)
+        // first try to pre-process the results to find the most specific option
+        // among the fully-covering methods, since we can do this in O(n^2)
+        // time, and the rest is O(n^3)
         //   - first find a candidate for the best of these method results
         for (i = 0; i < len; i++) {
             jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
             if (matc->fully_covers == FULLY_COVERS) {
+                any_subtypes = 1;
                 jl_method_t *m = matc->method;
-                if (minmax != NULL) {
-                    jl_method_t *minmaxm = minmax->method;
-                    if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
+                for (j = 0; j < len; j++) {
+                    if (i == j)
                         continue;
+                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
+                    if (matc2->fully_covers == FULLY_COVERS) {
+                        jl_method_t *m2 = matc2->method;
+                        if (!method_morespecific_via_interferences(m, m2))
+                            break;
+                    }
                 }
-                minmax = matc;
-            }
-            else {
-                all_subtypes = 0;
-            }
-        }
-        //   - then see if it dominated all of the other choices
-        if (minmax != NULL) {
-            for (i = 0; i < len; i++) {
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (matc == minmax)
+                if (j == len) {
+                    // Found the minmax method
+                    minmax = matc;
                     break;
-                if (matc->fully_covers == FULLY_COVERS) {
-                    jl_method_t *m = matc->method;
-                    jl_method_t *minmaxm = minmax->method;
-                    if (!jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig)) {
-                        minmax_ambig = 1;
-                        minmax = NULL;
-                        has_ambiguity = 1;
-                        break;
-                    }
                 }
             }
         }
-        //   - it may even dominate some choices that are not subtypes!
-        //     move those into the subtype group, where we're filter them out shortly after
+        //   - it may even dominate (be more specific than) some choices that are not fully-covering!
+        //     move those into the subtype group, where we'll filter them out shortly after
         //     (potentially avoiding reporting these as an ambiguity, and
         //     potentially allowing us to hit the next fast path)
         //   - we could always check here if *any* FULLY_COVERS method is
@@ -3726,35 +5022,48 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
         //     cost much extra and is less likely to help us hit a fast path
         //     (we will look for this later, when we compute ambig_groupid, for
         //     correctness)
-        if (!all_subtypes && minmax != NULL) {
-            jl_method_t *minmaxm = minmax->method;
-            all_subtypes = 1;
+        int all_subtypes = any_subtypes;
+        if (any_subtypes) {
+            jl_method_t *minmaxm = NULL;
+            if (minmax != NULL)
+                minmaxm = minmax->method;
+            // scan through all the non-fully-matching methods and count them as "fully-covering" (ish)
+            // (i.e. in the 'subtype' group) if `minmax` is more-specific
             for (i = 0; i < len; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
                 if (matc->fully_covers != FULLY_COVERS) {
                     jl_method_t *m = matc->method;
-                    if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
-                        matc->fully_covers = SENTINEL; // put a sentinel value here for sorting
-                    else
-                        all_subtypes = 0;
+                    if (minmaxm) {
+                        if (method_morespecific_via_interferences(minmaxm, m)) {
+                            matc->fully_covers = SENTINEL; // put a sentinel value here for sorting
+                            continue;
+                        }
+                        if (method_in_interferences(minmaxm, m)) // !morespecific(m, minmaxm)
+                            has_ambiguity = 1;
+                    }
+                    all_subtypes = 0;
                 }
             }
         }
         //    - now we might have a fast-return here, if we see that
         //      we've already processed all of the possible outputs
         if (all_subtypes) {
-            if (minmax_ambig) {
+            if (minmax == NULL) {
+                // all intersecting methods are fully-covering, but there is no unique most-specific method
                 if (!include_ambiguous) {
+                    // there no unambiguous choice of method
                     len = 0;
                     env.t = jl_an_empty_vec_any;
                 }
                 else if (lim == 1) {
+                    // we'd have to return >1 method due to the ambiguity, so bail early
                     JL_GC_POP();
                     return jl_nothing;
                 }
             }
             else {
-                assert(minmax != NULL);
+                // `minmax` is more-specific than all other matches and is fully-covering
+                // we can return it as our only result
                 jl_array_ptr_set(env.t, 0, minmax);
                 jl_array_del_end((jl_array_t*)env.t, len - 1);
                 len = 1;
@@ -3767,20 +5076,23 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
         }
     }
     if (len > 1) {
-        arraylist_t stack, visited, result, allambig;
+        arraylist_t stack, visited, result, recursion_stack;
         arraylist_new(&result, lim != -1 && lim < len ? lim : len);
         arraylist_new(&stack, 0);
         arraylist_new(&visited, len);
-        arraylist_new(&allambig, len);
+        arraylist_new(&recursion_stack, len);
         arraylist_grow(&visited, len);
         memset(visited.items, 0, len * sizeof(size_t));
         // if we had a minmax method (any subtypes), now may now be able to
         // quickly cleanup some of methods
         int found_minmax = 0;
-        if (minmax != NULL)
+        if (has_ambiguity)
+            found_minmax = 1;
+        else if (minmax != NULL)
             found_minmax = 2;
-        else if (minmax_ambig && !include_ambiguous)
+        else if (any_subtypes && !include_ambiguous)
             found_minmax = 1;
+        has_ambiguity = 0;
         if (ambig == NULL) // if we don't care about the result, set it now so we won't bother attempting to compute it accurately later
             has_ambiguity = 1;
         for (i = 0; i < len; i++) {
@@ -3791,9 +5103,9 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                 // by visiting it and it might be a bit costly
                 continue;
             }
-            int child_cycle = sort_mlmatches((jl_array_t*)env.t, i, &visited, &stack, &result, &allambig, lim == -1 || minmax == NULL ? lim : lim - 1, include_ambiguous, &has_ambiguity, &found_minmax);
+            int child_cycle = sort_mlmatches((jl_array_t*)env.t, i, &visited, &stack, &result, &recursion_stack, lim == -1 || minmax == NULL ? lim : lim - 1, include_ambiguous, &has_ambiguity, &found_minmax);
             if (child_cycle == -1) {
-                arraylist_free(&allambig);
+                arraylist_free(&recursion_stack);
                 arraylist_free(&visited);
                 arraylist_free(&stack);
                 arraylist_free(&result);
@@ -3804,91 +5116,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
             assert(stack.len == 0);
             assert(visited.items[i] == (void*)1);
         }
-        // now compute whether there were ambiguities left in this cycle
-        if (has_ambiguity == 0 && allambig.len > 0) {
-            if (lim == -1) {
-                // lim is over-approximated, so has_ambiguities is too
-                has_ambiguity = 1;
-            }
-            else {
-                // go back and find the additional ambiguous methods and temporary add them to the stack
-                // (potentially duplicating them from lower on the stack to here)
-                jl_value_t *ti = NULL;
-                jl_value_t *isect2 = NULL;
-                JL_GC_PUSH2(&ti, &isect2);
-                for (size_t i = 0; i < allambig.len; i++) {
-                    size_t idx = (size_t)allambig.items[i];
-                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx);
-                    jl_method_t *m = matc->method;
-                    int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                    for (size_t idx2 = 0; idx2 < jl_array_len(env.t); idx2++) {
-                        if (idx2 == idx)
-                            continue;
-                        // laborious test, checking for existence and coverage of another method (m3)
-                        // outside of the ambiguity group that dominates any ambiguous methods,
-                        // and means we can ignore this for has_ambiguity
-                        // (has_ambiguity is overestimated for lim==-1, since we don't compute skipped matches either)
-                        // n.b. even if we skipped them earlier, they still might
-                        // contribute to the ambiguities (due to lock of transitivity of
-                        // morespecific over subtyping)
-                        // TODO: we could improve this result by checking if the removal of some
-                        // edge earlier means that this subgraph is now well-ordered and then be
-                        // allowed to ignore these vertexes entirely here
-                        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx2);
-                        jl_method_t *m2 = matc2->method;
-                        int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
-                        if (subt) {
-                            ti = (jl_value_t*)matc2->spec_types;
-                            isect2 = NULL;
-                        }
-                        else if (subt2) {
-                            ti = (jl_value_t*)matc->spec_types;
-                            isect2 = NULL;
-                        }
-                        else {
-                            jl_type_intersection2((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types, &ti, &isect2);
-                        }
-                        // if their intersection contributes to the ambiguity cycle
-                        if (ti == jl_bottom_type)
-                            continue;
-                        // and they aren't themselves simply ordered
-                        if (jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) ||
-                            jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
-                            continue;
-                        // now look for a third method m3 that dominated these and that fully covered this intersection already
-                        size_t k;
-                        for (k = 0; k < result.len; k++) {
-                            size_t idx3 = (size_t)result.items[k];
-                            if (idx3 == idx || idx3 == idx2) {
-                                has_ambiguity = 1;
-                                break;
-                            }
-                            jl_method_match_t *matc3 = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx3);
-                            jl_method_t *m3 = matc3->method;
-                            if ((jl_subtype(ti, m3->sig) || (isect2 && jl_subtype(isect2, m3->sig)))
-                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m->sig)
-                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m2->sig)) {
-                                //if (jl_subtype(matc->spec_types, ti) || jl_subtype(matc->spec_types, matc3->m3->sig))
-                                //    // check if it covered not only this intersection, but all intersections with matc
-                                //    // if so, we do not need to check all of them separately
-                                //    j = len;
-                                break;
-                            }
-                        }
-                        if (k == result.len)
-                            has_ambiguity = 1;
-                        isect2 = NULL;
-                        ti = NULL;
-                        if (has_ambiguity)
-                            break;
-                    }
-                    if (has_ambiguity)
-                        break;
-                }
-                JL_GC_POP();
-            }
-        }
-        arraylist_free(&allambig);
+        arraylist_free(&recursion_stack);
         arraylist_free(&visited);
         arraylist_free(&stack);
         for (j = 0; j < result.len; j++) {
@@ -3903,22 +5131,32 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
             arraylist_push(&result, minmax);
             j++;
         }
-        memcpy(jl_array_data(env.t), result.items, j * sizeof(jl_method_match_t*));
+        memcpy(jl_array_data(env.t, jl_method_match_t*), result.items, j * sizeof(jl_method_match_t*));
         arraylist_free(&result);
         if (j != len)
             jl_array_del_end((jl_array_t*)env.t, len - j);
         len = j;
     }
-    if (mt && cache_result && ((jl_datatype_t*)unw)->isdispatchtuple) { // cache_result parameter keeps this from being recursive
+    for (j = 0; j < len; j++) {
+        jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
+        jl_method_t *m = matc->method;
+        // method applicability is the same as typemapentry applicability
+        size_t min_world = jl_atomic_load_relaxed(&m->primary_world);
+        // intersect the env valid range with method lookup's inclusive valid range
+        if (env.match.min_valid < min_world)
+            env.match.min_valid = min_world;
+    }
+    if (mc && cache_result && ((jl_datatype_t*)unw)->isdispatchtuple) { // cache_result parameter keeps this from being recursive
         if (len == 1 && !has_ambiguity) {
             env.matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, 0);
             jl_method_t *meth = env.matc->method;
             jl_svec_t *tpenv = env.matc->sparams;
-            JL_LOCK(&mt->writelock);
-            cache_method(mt, &mt->cache, (jl_value_t*)mt, (jl_tupletype_t*)unw, meth, world, env.match.min_valid, env.match.max_valid, tpenv);
-            JL_UNLOCK(&mt->writelock);
+            JL_LOCK(&mc->writelock);
+            cache_method(mt, mc, &mc->cache, (jl_value_t*)mc, (jl_tupletype_t*)unw, meth, world, env.match.min_valid, env.match.max_valid, tpenv);
         }
     }
+    *min_valid = env.match.min_valid;
+    *max_valid = env.match.max_valid;
     if (ambig != NULL)
         *ambig = has_ambiguity;
     JL_GC_POP();
@@ -3930,7 +5168,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
 // see if it might be possible to construct an instance of `typ`
 // if n_uninitialized == 0, but a fieldtype is Union{},
 // that type will not be constructable, for example, tested recursively
-int jl_has_concrete_subtype(jl_value_t *typ)
+JL_DLLEXPORT int jl_has_concrete_subtype(jl_value_t *typ)
 {
     if (typ == jl_bottom_type)
         return 0;
@@ -3951,7 +5189,7 @@ JL_DLLEXPORT uint64_t jl_typeinf_timing_begin(void)
     return jl_hrtime();
 }
 
-JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start)
+JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start, int is_recompile)
 {
     if (!start)
         return;
@@ -3960,19 +5198,110 @@ JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start)
     if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
         uint64_t inftime = jl_hrtime() - start;
         jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, inftime);
+        if (is_recompile) {
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, inftime);
+        }
+    }
+}
+
+// declare a C-callable entry point; called during code loading from the toplevel
+JL_DLLEXPORT void jl_extern_c(jl_value_t *name, jl_value_t *declrt, jl_tupletype_t *sigt)
+{
+    // validate arguments. try to do as many checks as possible here to avoid
+    // throwing errors later during codegen.
+    JL_TYPECHK(@ccallable, type, declrt);
+    if (!jl_is_tuple_type(sigt))
+        jl_type_error("@ccallable", (jl_value_t*)jl_anytuple_type_type, (jl_value_t*)sigt);
+    // check that f is a guaranteed singleton type
+    jl_datatype_t *ft = (jl_datatype_t*)jl_tparam0(sigt);
+    if (!jl_is_datatype(ft) || !jl_is_datatype_singleton(ft))
+        jl_error("@ccallable: function object must be a singleton");
+
+    // compute / validate return type
+    if (!jl_type_mappable_to_c(declrt))
+        jl_error("@ccallable: return type doesn't correspond to a C type");
+
+    // validate method signature
+    size_t i, nargs = jl_nparams(sigt);
+    for (i = 1; i < nargs; i++) {
+        jl_value_t *ati = jl_tparam(sigt, i);
+        if (!jl_is_concrete_type(ati) || jl_is_kind(ati) || !jl_type_mappable_to_c(ati))
+            jl_error("@ccallable: argument types must be concrete");
+    }
+
+    // save a record of this so that the alias is generated when we write an object file
+    jl_method_t *meth = (jl_method_t*)jl_methtable_lookup((jl_value_t*)sigt, jl_atomic_load_acquire(&jl_world_counter));
+    if (!jl_is_method(meth))
+        jl_error("@ccallable: could not find requested method");
+    JL_GC_PUSH1(&meth);
+    if (name == jl_nothing)
+        meth->ccallable = jl_svec2(declrt, (jl_value_t*)sigt);
+    else
+        meth->ccallable = jl_svec3(declrt, (jl_value_t*)sigt, name);
+    jl_gc_wb(meth, meth->ccallable);
+    JL_GC_POP();
+}
+
+// Drop all method caches and increment world age as if adding a method that intersects everything
+static void invalidate_method_instance_caches(jl_method_instance_t *mi, size_t world)
+{
+    if ((jl_value_t*)mi == jl_nothing)
+        return;
+
+    // Walk through all code instances for this method instance
+    jl_code_instance_t *ci = jl_atomic_load_relaxed(&mi->cache);
+    while (ci != NULL) {
+        // Invalidate this code instance by setting max_world to current world
+        if (jl_atomic_load_relaxed(&ci->max_world) == ~(size_t)0) {
+            jl_atomic_store_release(&ci->max_world, world);
+        }
+        ci = jl_atomic_load_relaxed(&ci->next);
+    }
+}
+
+static int invalidate_all_specializations(jl_typemap_entry_t *def, void *closure)
+{
+    size_t world = *(size_t*)closure;
+    jl_method_t *method = def->func.method;
+    JL_LOCK(&method->writelock);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+    if (jl_is_svec(specializations)) {
+        size_t i, l = jl_svec_len(specializations);
+        for (i = 0; i < l; i++) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
+            invalidate_method_instance_caches(mi, world);
+        }
     }
+    else if (specializations != NULL) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+        invalidate_method_instance_caches(mi, world);
+    }
+    JL_UNLOCK(&method->writelock);
+    return 1;
 }
 
-JL_DLLEXPORT void jl_typeinf_lock_begin(void)
+static int invalidate_all_caches_visitor(jl_methtable_t *mt, void *env)
 {
-    JL_LOCK(&jl_codegen_lock);
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), invalidate_all_specializations, env);
 }
 
-JL_DLLEXPORT void jl_typeinf_lock_end(void)
+JL_DLLEXPORT void jl_drop_all_caches(void)
 {
-    JL_UNLOCK(&jl_codegen_lock);
+    JL_LOCK(&world_counter_lock);
+
+    // Get current world age - we'll invalidate everything at this world
+    size_t current_world = jl_atomic_load_relaxed(&jl_world_counter);
+
+    invalidate_all_caches_visitor(jl_method_table, &current_world);
+
+    // Increment world age - this forces all subsequent compilation to happen in the new world
+    size_t new_world = current_world + 1;
+    jl_atomic_store_release(&jl_world_counter, new_world);
+
+    JL_UNLOCK(&world_counter_lock);
 }
 
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/iddict.c b/src/iddict.c
index 1fa8a67d1ae96..0a0895d048c32 100644
--- a/src/iddict.c
+++ b/src/iddict.c
@@ -1,49 +1,48 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#define hash_size(h) (jl_array_len(h) / 2)
+#define hash_size(h) (h->length / 2)
 
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
 
-#define keyhash(k) jl_object_id_(jl_typeof(k), k)
+#define keyhash(k) jl_object_id_(jl_typetagof(k), k)
 #define h2index(hv, sz) (size_t)(((hv) & ((sz)-1)) * 2)
 
-static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val);
+static inline int jl_table_assign_bp(jl_genericmemory_t **pa, jl_value_t *key, jl_value_t *val);
 
-JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
+JL_DLLEXPORT jl_genericmemory_t *jl_idtable_rehash(jl_genericmemory_t *a, size_t newsz)
 {
-    size_t sz = jl_array_len(a);
+    size_t sz = a->length;
     size_t i;
-    jl_value_t **ol = (jl_value_t **)a->data;
-    jl_array_t *newa = jl_alloc_vec_any(newsz);
-    // keep the original array in the original slot since we need `ol`
+    jl_value_t **ol = (jl_value_t **) a->ptr;
+    jl_genericmemory_t *newa = NULL;
+    // keep the original memory in the original slot since we need `ol`
     // to be valid in the loop below.
     JL_GC_PUSH2(&newa, &a);
+    newa = jl_alloc_memory_any(newsz);
     for (i = 0; i < sz; i += 2) {
         if (ol[i + 1] != NULL) {
             jl_table_assign_bp(&newa, ol[i], ol[i + 1]);
-            // it is however necessary here because allocation
-            // can (and will) occur in a recursive call inside table_lookup_bp
         }
     }
     JL_GC_POP();
     return newa;
 }
 
-static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val)
+static inline int jl_table_assign_bp(jl_genericmemory_t **pa, jl_value_t *key, jl_value_t *val)
 {
     // pa points to a **un**rooted address
     uint_t hv;
-    jl_array_t *a = *pa;
+    jl_genericmemory_t *a = *pa;
     size_t orig, index, iter, empty_slot;
     size_t newsz, sz = hash_size(a);
     if (sz == 0) {
-        a = jl_alloc_vec_any(HT_N_INLINE);
+        a = jl_alloc_memory_any(HT_N_INLINE);
         sz = hash_size(a);
         *pa = a;
     }
     size_t maxprobe = max_probe(sz);
-    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*) a->ptr;
 
     hv = keyhash(key);
     while (1) {
@@ -92,7 +91,7 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
         /* quadruple size, rehash, retry the insert */
         /* it's important to grow the table really fast; otherwise we waste */
         /* lots of time rehashing all the keys over and over. */
-        sz = jl_array_len(a);
+        sz = a -> length;
         if (sz < HT_N_INLINE)
             newsz = HT_N_INLINE;
         else if (sz >= (1 << 19) || (sz <= (1 << 8)))
@@ -102,20 +101,20 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
         *pa = jl_idtable_rehash(*pa, newsz);
 
         a = *pa;
-        tab = (_Atomic(jl_value_t*)*)a->data;
+        tab =  (_Atomic(jl_value_t*)*) a->ptr;
         sz = hash_size(a);
         maxprobe = max_probe(sz);
     }
 }
 
 /* returns bp if key is in hash, otherwise NULL */
-inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT
+inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_genericmemory_t *a, jl_value_t *key) JL_NOTSAFEPOINT
 {
     size_t sz = hash_size(a);
     if (sz == 0)
         return NULL;
     size_t maxprobe = max_probe(sz);
-    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*) a->ptr;
     uint_t hv = keyhash(key);
     size_t index = h2index(hv, sz);
     sz *= 2;
@@ -142,7 +141,7 @@ inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL
 }
 
 JL_DLLEXPORT
-jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int *p_inserted)
+jl_genericmemory_t *jl_eqtable_put(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *val, int *p_inserted)
 {
     int inserted = jl_table_assign_bp(&h, key, val);
     if (p_inserted)
@@ -153,20 +152,20 @@ jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int
 // Note: lookup in the IdDict is permitted concurrently, if you avoid deletions,
 // and assuming you do use an external lock around all insertions
 JL_DLLEXPORT
-jl_value_t *jl_eqtable_get(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
+jl_value_t *jl_eqtable_get(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
 {
     _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     return (bp == NULL) ? deflt : jl_atomic_load_relaxed(bp);
 }
 
-jl_value_t *jl_eqtable_getkey(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
+jl_value_t *jl_eqtable_getkey(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
 {
     _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     return (bp == NULL) ? deflt : jl_atomic_load_relaxed(bp - 1);
 }
 
 JL_DLLEXPORT
-jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, int *found)
+jl_value_t *jl_eqtable_pop(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt, int *found)
 {
     _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     if (found)
@@ -180,12 +179,12 @@ jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, in
 }
 
 JL_DLLEXPORT
-size_t jl_eqtable_nextind(jl_array_t *t, size_t i)
+size_t jl_eqtable_nextind(jl_genericmemory_t *t, size_t i)
 {
     if (i & 1)
         i++;
-    size_t alen = jl_array_dim0(t);
-    while (i < alen && ((void **)t->data)[i + 1] == NULL)
+    size_t alen = t->length;
+    while (i < alen && ((void**) t->ptr)[i + 1] == NULL)
         i += 2;
     if (i >= alen)
         return (size_t)-1;
@@ -194,3 +193,4 @@ size_t jl_eqtable_nextind(jl_array_t *t, size_t i)
 
 #undef hash_size
 #undef max_probe
+#undef h2index
diff --git a/src/idset.c b/src/idset.c
new file mode 100644
index 0000000000000..b9711ee17f021
--- /dev/null
+++ b/src/idset.c
@@ -0,0 +1,118 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+
+static uint_t idset_hash(size_t idx, jl_value_t *data)
+{
+    jl_value_t *x = jl_genericmemory_ptr_ref(data, idx);
+    // x should not be NULL, unless there was concurrent corruption
+    return x == NULL ? 0 : jl_object_id(x);
+}
+
+static int idset_eq(size_t idx, const void *y, jl_value_t *data, uint_t hv)
+{
+    jl_value_t *x = jl_genericmemory_ptr_ref(data, idx);
+    // x should not be NULL, unless there was concurrent corruption
+    return x == NULL ? 0 : jl_egal(x, (jl_value_t*)y);
+}
+
+jl_genericmemory_t *jl_idset_rehash(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, size_t newsz)
+{
+    if (newsz == 0)
+        return idxs;
+    newsz = next_power_of_two(newsz);
+    //if (idxs->length == newsz)
+    //    jl_idset_put_idx(keys, idxs, -newsz+1);
+    //else
+    return smallintset_rehash(idxs, idset_hash, (jl_value_t*)keys, newsz, 0);
+}
+
+// Return idx if key is in hash, otherwise -1
+// Note: lookup in the IdSet is permitted concurrently, if you avoid deletions,
+// and assuming you do use an external lock around all insertions
+ssize_t jl_idset_peek_bp(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
+{
+    uintptr_t hv = jl_object_id(key);
+    return jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 0);
+}
+
+jl_value_t *jl_idset_get(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
+{
+    ssize_t idx = jl_idset_peek_bp(keys, idxs, key);
+    if (idx == -1)
+        return NULL;
+    return jl_genericmemory_ptr_ref(keys, idx);
+}
+
+
+static ssize_t idset_compact(jl_genericmemory_t *keys)
+{
+    // compact keys before rehashing idxs
+    ssize_t i, j;
+    ssize_t rehash = 0;
+    for (i = j = 0; i < keys->length; i++) {
+        jl_value_t *k = jl_genericmemory_ptr_ref(keys, i);
+        if (k != NULL) {
+            if (i != j) {
+                rehash = 1;
+                jl_genericmemory_ptr_set(keys, j, k);
+                jl_genericmemory_ptr_set(keys, i, NULL);
+            }
+            j++;
+        }
+    }
+    return rehash ? -j : j;
+}
+
+jl_genericmemory_t *jl_idset_put_key(jl_genericmemory_t *keys, jl_value_t *key, ssize_t *newidx)
+{
+    ssize_t l = keys->length;
+    ssize_t i = l;
+    while (i > 0 && jl_genericmemory_ptr_ref(keys, i - 1) == NULL)
+        i--;
+    // i points to the place to insert
+    *newidx = i;
+    if (i == l) {
+        i = idset_compact(keys);
+        if (i < 0) {
+            *newidx = i - 1;
+            i = -i;
+        }
+        if (i >= l / 3 * 2) {
+            size_t nl = l < 4 ? 4 : (l * 3) >> 1; // grow space by 50% if less than 33% free after compacting
+            jl_genericmemory_t *nk = jl_alloc_genericmemory(jl_memory_any_type, nl);
+            if (i > 0)
+                memcpy(nk->ptr, keys->ptr, sizeof(void*) * i);
+            keys = nk;
+        }
+    }
+    assert(jl_genericmemory_ptr_ref(keys, i) == NULL);
+    jl_genericmemory_ptr_set(keys, i, key);
+    return keys;
+}
+
+jl_genericmemory_t *jl_idset_put_idx(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, ssize_t idx)
+{
+    _Atomic(jl_genericmemory_t*) newidxs = idxs;
+    JL_GC_PUSH1(&newidxs);
+    if (idx < 0) { // full rehash
+        smallintset_empty(idxs);
+        for (ssize_t i = 0; i < -idx; i++)
+            if (jl_genericmemory_ptr_ref(keys, i) != NULL)
+                jl_smallintset_insert(&newidxs, NULL, idset_hash, i, (jl_value_t*)keys);
+    }
+    else {
+        jl_smallintset_insert(&newidxs, NULL, idset_hash, idx, (jl_value_t*)keys);
+    }
+    JL_GC_POP();
+    return jl_atomic_load_relaxed(&newidxs);
+}
+
+/* returns idx if key is in hash, otherwise -1 */
+ssize_t jl_idset_pop(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
+{
+    uintptr_t hv = jl_object_id(key);
+    ssize_t idx = jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 1);
+    if (idx != -1)
+        jl_genericmemory_ptr_set(keys, idx, NULL);
+    return idx;
+}
diff --git a/src/init.c b/src/init.c
index 4a152ed04b13d..5c2b9ce66678b 100644
--- a/src/init.c
+++ b/src/init.c
@@ -21,11 +21,13 @@
 #include <pthread_np.h>
 #endif
 
+#if !defined(_OS_WINDOWS_)
+#include <dlfcn.h>
+#endif
+
 #include "julia.h"
 #include "julia_internal.h"
-#define DEFINE_BUILTIN_GLOBALS
 #include "builtin_proto.h"
-#undef DEFINE_BUILTIN_GLOBALS
 #include "threading.h"
 #include "julia_assert.h"
 #include "processor.h"
@@ -35,8 +37,8 @@ extern "C" {
 #endif
 
 #ifdef _OS_WINDOWS_
-extern int needsSymRefreshModuleList;
-extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
+extern void jl_init_stackwalk(void);
+extern void jl_fin_stackwalk(void);
 #else
 #include <sys/resource.h>
 #include <unistd.h>
@@ -64,39 +66,29 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
     // threads since it seems to return bogus values for master thread on Linux
     // and possibly OSX.
     if (!ismaster) {
-#  if defined(_OS_LINUX_)
+#  if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
         pthread_attr_t attr;
+#if defined(_OS_FREEBSD_)
+        pthread_attr_init(&attr);
+        pthread_attr_get_np(pthread_self(), &attr);
+#else
         pthread_getattr_np(pthread_self(), &attr);
+#endif
         void *stackaddr;
         size_t stacksize;
         pthread_attr_getstack(&attr, &stackaddr, &stacksize);
         pthread_attr_destroy(&attr);
-        *stack_lo = (void*)stackaddr;
-#pragma GCC diagnostic push
-#if defined(_COMPILER_GCC_) && __GNUC__ >= 12
-#pragma GCC diagnostic ignored "-Wdangling-pointer"
-#endif
-        *stack_hi = (void*)__builtin_frame_address(0);
-#pragma GCC diagnostic pop
+        *stack_lo = stackaddr;
+        *stack_hi = (char*)stackaddr + stacksize;
         return;
 #  elif defined(_OS_DARWIN_)
         extern void *pthread_get_stackaddr_np(pthread_t thread);
         extern size_t pthread_get_stacksize_np(pthread_t thread);
         pthread_t thread = pthread_self();
         void *stackaddr = pthread_get_stackaddr_np(thread);
-        *stack_lo = (void*)stackaddr;
-        *stack_hi = (void*)__builtin_frame_address(0);
-        return;
-#  elif defined(_OS_FREEBSD_)
-        pthread_attr_t attr;
-        pthread_attr_init(&attr);
-        pthread_attr_get_np(pthread_self(), &attr);
-        void *stackaddr;
-        size_t stacksize;
-        pthread_attr_getstack(&attr, &stackaddr, &stacksize);
-        pthread_attr_destroy(&attr);
-        *stack_lo = (void*)stackaddr;
-        *stack_hi = (void*)__builtin_frame_address(0);
+        size_t stacksize = pthread_get_stacksize_np(thread);
+        *stack_lo = (char*)stackaddr - stacksize;
+        *stack_hi = stackaddr;
         return;
 #  else
 #      warning "Getting precise stack size for thread is not supported."
@@ -246,28 +238,21 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
 
     jl_task_t *ct = jl_get_current_task();
 
-    if (ct) {
-        if (exitcode == 0)
-            jl_write_compiler_output();
+    if (ct == NULL && jl_base_module) {
+        ct = container_of(jl_adopt_thread(), jl_task_t, gcstack);
+    }
+    else if (ct != NULL) {
         // we are about to start tearing everything down, so lets try not to get
         // upset by the local mess of things when we run the user's _atexit hooks
         // this also forces us into a GC-unsafe region without a safepoint
         jl_task_frame_noreturn(ct);
-    }
-
-    if (ct == NULL && jl_base_module)
-        ct = container_of(jl_adopt_thread(), jl_task_t, gcstack);
-    else if (ct != NULL)
         jl_gc_safepoint_(ct->ptls);
-
-    jl_print_gc_stats(JL_STDERR);
-    if (jl_options.code_coverage)
-        jl_write_coverage_data(jl_options.output_code_coverage);
-    if (jl_options.malloc_log)
-        jl_write_malloc_log();
+    }
 
     if (jl_base_module) {
-        jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("_atexit"));
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
+        jl_value_t *f = jl_get_global_value(jl_base_module, jl_symbol("_atexit"), ct->world_age);
         if (f != NULL) {
             jl_value_t **fargs;
             JL_GC_PUSHARGS(fargs, 2);
@@ -275,21 +260,32 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
             fargs[1] = jl_box_int32(exitcode);
             JL_TRY {
                 assert(ct);
-                size_t last_age = ct->world_age;
-                ct->world_age = jl_get_world_counter();
                 jl_apply(fargs, 2);
-                ct->world_age = last_age;
             }
             JL_CATCH {
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\natexit hook threw an error: ");
-                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-                jlbacktrace(); // written to STDERR_FILENO
+                jl_fprint_backtrace(ios_safe_stderr);
             }
             JL_GC_POP();
         }
+        ct->world_age = last_age;
+    }
+
+    if (ct && exitcode == 0) {
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
+        jl_write_compiler_output();
+        ct->world_age = last_age;
     }
 
+    jl_print_gc_stats(JL_STDERR);
+    if (jl_options.code_coverage)
+        jl_write_coverage_data(jl_options.output_code_coverage);
+    if (jl_options.malloc_log)
+        jl_write_malloc_log();
+
     // replace standard output streams with something that we can still print to
     // after the finalizers from base/stream.jl close the TTY
     JL_STDOUT = (uv_stream_t*) STDOUT_FILENO;
@@ -317,9 +313,9 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
                     assert(item);
                     uv_unref(item->h);
                     jl_printf((JL_STREAM*)STDERR_FILENO, "error during exit cleanup: close: ");
-                    jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                    jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
                     jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-                    jlbacktrace(); // written to STDERR_FILENO
+                    jl_fprint_backtrace(ios_safe_stderr);
                     item = next_shutdown_queue_item(item);
                 }
             }
@@ -338,21 +334,24 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
                          // we would like to guarantee this, but cannot currently, so there is still a small race window
                          // that needs to be fixed in libuv
     }
-    if (ct)
-        (void)jl_gc_safe_enter(ct->ptls); // park in gc-safe
     if (loop != NULL) {
         // TODO: consider uv_loop_close(loop) here, before shutdown?
         uv_library_shutdown();
         // no JL_UV_UNLOCK(), since it is now torn down
     }
-
-    // TODO: Destroy threads?
+    if (ct)
+        jl_safepoint_suspend_all_threads(ct); // Destroy other threads, so that they don't segfault
+    if (ct)
+        (void)jl_gc_safe_enter(ct->ptls); // park in gc-safe
 
     jl_destroy_timing(); // cleans up the current timing_stack for noreturn
 #ifdef USE_TIMING_COUNTS
     jl_print_timings();
 #endif
     jl_teardown_codegen(); // prints stats
+#ifdef _OS_WINDOWS_
+    jl_fin_stackwalk();
+#endif
 }
 
 JL_DLLEXPORT void jl_postoutput_hook(void)
@@ -362,21 +361,23 @@ JL_DLLEXPORT void jl_postoutput_hook(void)
 
     if (jl_base_module) {
         jl_task_t *ct = jl_get_current_task();
-        jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("_postoutput"));
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
+        jl_value_t *f = jl_get_global_value(jl_base_module, jl_symbol("_postoutput"), ct->world_age);
         if (f != NULL) {
             JL_TRY {
-                size_t last_age = ct->world_age;
-                ct->world_age = jl_get_world_counter();
+                JL_GC_PUSH1(&f);
                 jl_apply(&f, 1);
-                ct->world_age = last_age;
+                JL_GC_POP();
             }
             JL_CATCH {
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\npostoutput hook threw an error: ");
-                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-                jlbacktrace(); // written to STDERR_FILENO
+                jl_fprint_backtrace(ios_safe_stderr);
             }
         }
+        ct->world_age = last_age;
     }
     return;
 }
@@ -448,6 +449,7 @@ static void *init_stdio_handle(const char *stdio, uv_os_fd_t fd, int readable)
     // This also helps limit the impact other libraries can cause on our file handle.
     if ((err = uv_dup(fd, &fd)))
         jl_errorf("error initializing %s in uv_dup: %s (%s %d)", stdio, uv_strerror(err), uv_err_name(err), err);
+    assert(fd != -1); // This avoids a bug in clang's static analyzer, if an error did not occur, fd != -1
     switch(uv_guess_handle(fd)) {
     case UV_TTY:
         handle = malloc_s(sizeof(uv_tty_t));
@@ -538,159 +540,6 @@ int jl_isabspath(const char *in) JL_NOTSAFEPOINT
     return 0; // relative path
 }
 
-static char *abspath(const char *in, int nprefix)
-{ // compute an absolute realpath location, so that chdir doesn't change the file reference
-  // ignores (copies directly over) nprefix characters at the start of abspath
-#ifndef _OS_WINDOWS_
-    char *out = realpath(in + nprefix, NULL);
-    if (out) {
-        if (nprefix > 0) {
-            size_t sz = strlen(out) + 1;
-            char *cpy = (char*)malloc_s(sz + nprefix);
-            memcpy(cpy, in, nprefix);
-            memcpy(cpy + nprefix, out, sz);
-            free(out);
-            out = cpy;
-        }
-    }
-    else {
-        size_t sz = strlen(in + nprefix) + 1;
-        if (in[nprefix] == PATHSEPSTRING[0]) {
-            out = (char*)malloc_s(sz + nprefix);
-            memcpy(out, in, sz + nprefix);
-        }
-        else {
-            size_t path_size = JL_PATH_MAX;
-            char *path = (char*)malloc_s(JL_PATH_MAX);
-            if (uv_cwd(path, &path_size)) {
-                jl_error("fatal error: unexpected error while retrieving current working directory");
-            }
-            out = (char*)malloc_s(path_size + 1 + sz + nprefix);
-            memcpy(out, in, nprefix);
-            memcpy(out + nprefix, path, path_size);
-            out[nprefix + path_size] = PATHSEPSTRING[0];
-            memcpy(out + nprefix + path_size + 1, in + nprefix, sz);
-            free(path);
-        }
-    }
-#else
-    DWORD n = GetFullPathName(in + nprefix, 0, NULL, NULL);
-    if (n <= 0) {
-        jl_error("fatal error: jl_options.image_file path too long or GetFullPathName failed");
-    }
-    char *out = (char*)malloc_s(n + nprefix);
-    DWORD m = GetFullPathName(in + nprefix, n, out + nprefix, NULL);
-    if (n != m + 1) {
-        jl_error("fatal error: jl_options.image_file path too long or GetFullPathName failed");
-    }
-    memcpy(out, in, nprefix);
-#endif
-    return out;
-}
-
-// create an absolute-path copy of the input path format string
-// formed as `joinpath(replace(pwd(), "%" => "%%"), in)`
-// unless `in` starts with `%`
-static const char *absformat(const char *in)
-{
-    if (in[0] == '%' || jl_isabspath(in))
-        return in;
-    // get an escaped copy of cwd
-    size_t path_size = JL_PATH_MAX;
-    char path[JL_PATH_MAX];
-    if (uv_cwd(path, &path_size)) {
-        jl_error("fatal error: unexpected error while retrieving current working directory");
-    }
-    size_t sz = strlen(in) + 1;
-    size_t i, fmt_size = 0;
-    for (i = 0; i < path_size; i++)
-        fmt_size += (path[i] == '%' ? 2 : 1);
-    char *out = (char*)malloc_s(fmt_size + 1 + sz);
-    fmt_size = 0;
-    for (i = 0; i < path_size; i++) { // copy-replace pwd portion
-        char c = path[i];
-        out[fmt_size++] = c;
-        if (c == '%')
-            out[fmt_size++] = '%';
-    }
-    out[fmt_size++] = PATHSEPSTRING[0]; // path sep
-    memcpy(out + fmt_size, in, sz); // copy over format, including nul
-    return out;
-}
-
-static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
-{   // this function resolves the paths in jl_options to absolute file locations as needed
-    // and it replaces the pointers to `julia_bindir`, `julia_bin`, `image_file`, and output file paths
-    // it may fail, print an error, and exit(1) if any of these paths are longer than JL_PATH_MAX
-    //
-    // note: if you care about lost memory, you should call the appropriate `free()` function
-    // on the original pointer for each `char*` you've inserted into `jl_options`, after
-    // calling `julia_init()`
-    char *free_path = (char*)malloc_s(JL_PATH_MAX);
-    size_t path_size = JL_PATH_MAX;
-    if (uv_exepath(free_path, &path_size)) {
-        jl_error("fatal error: unexpected error while retrieving exepath");
-    }
-    if (path_size >= JL_PATH_MAX) {
-        jl_error("fatal error: jl_options.julia_bin path too long");
-    }
-    jl_options.julia_bin = (char*)malloc_s(path_size + 1);
-    memcpy((char*)jl_options.julia_bin, free_path, path_size);
-    ((char*)jl_options.julia_bin)[path_size] = '\0';
-    if (!jl_options.julia_bindir) {
-        jl_options.julia_bindir = getenv("JULIA_BINDIR");
-        if (!jl_options.julia_bindir) {
-            jl_options.julia_bindir = dirname(free_path);
-        }
-    }
-    if (jl_options.julia_bindir)
-        jl_options.julia_bindir = abspath(jl_options.julia_bindir, 0);
-    free(free_path);
-    free_path = NULL;
-    if (jl_options.image_file) {
-        if (rel == JL_IMAGE_JULIA_HOME && !jl_isabspath(jl_options.image_file)) {
-            // build time path, relative to JULIA_BINDIR
-            free_path = (char*)malloc_s(JL_PATH_MAX);
-            int n = snprintf(free_path, JL_PATH_MAX, "%s" PATHSEPSTRING "%s",
-                             jl_options.julia_bindir, jl_options.image_file);
-            if (n >= JL_PATH_MAX || n < 0) {
-                jl_error("fatal error: jl_options.image_file path too long");
-            }
-            jl_options.image_file = free_path;
-        }
-        if (jl_options.image_file)
-            jl_options.image_file = abspath(jl_options.image_file, 0);
-        if (free_path) {
-            free(free_path);
-            free_path = NULL;
-        }
-    }
-    if (jl_options.outputo)
-        jl_options.outputo = abspath(jl_options.outputo, 0);
-    if (jl_options.outputji)
-        jl_options.outputji = abspath(jl_options.outputji, 0);
-    if (jl_options.outputbc)
-        jl_options.outputbc = abspath(jl_options.outputbc, 0);
-    if (jl_options.outputasm)
-        jl_options.outputasm = abspath(jl_options.outputasm, 0);
-    if (jl_options.machine_file)
-        jl_options.machine_file = abspath(jl_options.machine_file, 0);
-    if (jl_options.output_code_coverage)
-        jl_options.output_code_coverage = absformat(jl_options.output_code_coverage);
-    if (jl_options.tracked_path)
-        jl_options.tracked_path = absformat(jl_options.tracked_path);
-
-    const char **cmdp = jl_options.cmds;
-    if (cmdp) {
-        for (; *cmdp; cmdp++) {
-            const char *cmd = *cmdp;
-            if (cmd[0] == 'L') {
-                *cmdp = abspath(cmd, 1);
-            }
-        }
-    }
-}
-
 JL_DLLEXPORT int jl_is_file_tracked(jl_sym_t *path)
 {
     const char* path_ = jl_symbol_name(path);
@@ -708,45 +557,154 @@ extern jl_mutex_t jl_modules_mutex;
 extern jl_mutex_t precomp_statement_out_lock;
 extern jl_mutex_t newly_inferred_mutex;
 extern jl_mutex_t global_roots_lock;
+extern jl_mutex_t profile_show_peek_cond_lock;
 
 static void restore_fp_env(void)
 {
     if (jl_set_zero_subnormals(0) || jl_set_default_nans(0)) {
         jl_error("Failed to configure floating point environment");
     }
+    if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_OFF && jl_atomic_load_relaxed(&jl_n_threads) > 1) {
+        jl_error("Cannot use `--handle-signals=no` with multiple threads (JULIA_NUM_THREADS > 1).\n"
+        "This will cause segmentation faults due to GC safepoint failures.\n"
+        "Remove `--handle-signals=no` or set JULIA_NUM_THREADS=1.\n"
+        "See: https://github.com/JuliaLang/julia/issues/50278");
+    }
+}
+static NOINLINE void _finish_jl_init_(jl_image_buf_t sysimage, jl_ptls_t ptls, jl_task_t *ct)
+{
+    JL_TIMING(JULIA_INIT, JULIA_INIT);
+
+    if (sysimage.kind == JL_IMAGE_KIND_SO)
+        jl_gc_notify_image_load(sysimage.data, sysimage.size);
+
+    if (jl_options.cpu_target == NULL)
+        jl_options.cpu_target = "native";
+
+    // Parse image, perform relocations, and init JIT targets, etc.
+    jl_image_t parsed_image = jl_init_processor_sysimg(sysimage, jl_options.cpu_target);
+
+    jl_init_codegen();
+
+    if (sysimage.kind != JL_IMAGE_KIND_NONE) {
+        // Load the .ji or .so sysimage
+        jl_restore_system_image(&parsed_image, sysimage);
+    }
+    else {
+        // No sysimage provided, init a minimal environment
+        jl_init_types();
+        jl_global_roots_list = (jl_genericmemory_t*)jl_an_empty_memory_any;
+        jl_global_roots_keyset = (jl_genericmemory_t*)jl_an_empty_memory_any;
+    }
+
+    jl_init_flisp();
+    jl_init_serializer();
+
+    if (sysimage.kind == JL_IMAGE_KIND_NONE) {
+        jl_top_module = jl_core_module;
+        jl_init_intrinsic_functions();
+        jl_init_primitives();
+        jl_init_main_module();
+        jl_load(jl_core_module, "boot.jl");
+        post_boot_hooks();
+    }
+
+    if (jl_base_module == NULL) {
+        // nthreads > 1 requires code in Base
+        jl_atomic_store_relaxed(&jl_n_threads, 1);
+        jl_n_markthreads = 0;
+        jl_n_sweepthreads = 0;
+        jl_n_gcthreads = 0;
+        jl_n_threads_per_pool[JL_THREADPOOL_ID_INTERACTIVE] = 0;
+        jl_n_threads_per_pool[JL_THREADPOOL_ID_DEFAULT] = 1;
+    }
+    jl_start_threads();
+    jl_start_gc_threads();
+    uv_barrier_wait(&thread_init_done);
+
+    jl_gc_enable(1);
+
+    if ((sysimage.kind != JL_IMAGE_KIND_NONE) &&
+            (!jl_generating_output() || jl_options.incremental) && jl_module_init_order) {
+        jl_array_t *init_order = jl_module_init_order;
+        JL_GC_PUSH1(&init_order);
+        jl_module_init_order = NULL;
+        int i, l = jl_array_nrows(init_order);
+        for (i = 0; i < l; i++) {
+            jl_value_t *mod = jl_array_ptr_ref(init_order, i);
+            jl_module_run_initializer((jl_module_t*)mod);
+        }
+        JL_GC_POP();
+    }
+
+
+    if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
+        jl_install_sigint_handler();
 }
 
-static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct);
 
 JL_DLLEXPORT int jl_default_debug_info_kind;
+JL_DLLEXPORT jl_cgparams_t jl_default_cgparams = {
+        /* track_allocations */ 1,
+        /* code_coverage */ 1,
+        /* prefer_specsig */ 0,
+#ifdef _OS_WINDOWS_
+        /* gnu_pubnames */ 0,
+#else
+        /* gnu_pubnames */ 1,
+#endif
+        /* debug_info_kind */ 0, // later DICompileUnit::DebugEmissionKind::FullDebug,
+        /* debug_info_level */ 0, // later jl_options.debug_level,
+        /* safepoint_on_entry */ 1,
+        /* gcstack_arg */ 1,
+        /* use_jlplt*/ 1 ,
+        /*force_emit_all=*/ 0,
+#ifdef _COMPILER_MSAN_ENABLED_
+        /* sanitize_memory */ 1,
+#else
+        /* sanitize_memory */ 0,
+#endif
+#ifdef _COMPILER_TSAN_ENABLED_
+        /* sanitize_thread */ 1,
+#else
+        /* sanitize_thread */ 0,
+#endif
+#ifdef _COMPILER_ASAN_ENABLED_
+        /* sanitize_address */ 1,
+#else
+        /* sanitize_address */ 0,
+#endif
+};
 
 static void init_global_mutexes(void) {
     JL_MUTEX_INIT(&jl_modules_mutex, "jl_modules_mutex");
     JL_MUTEX_INIT(&precomp_statement_out_lock, "precomp_statement_out_lock");
     JL_MUTEX_INIT(&newly_inferred_mutex, "newly_inferred_mutex");
     JL_MUTEX_INIT(&global_roots_lock, "global_roots_lock");
-    JL_MUTEX_INIT(&jl_codegen_lock, "jl_codegen_lock");
     JL_MUTEX_INIT(&typecache_lock, "typecache_lock");
+    JL_MUTEX_INIT(&profile_show_peek_cond_lock, "profile_show_peek_cond_lock");
 }
 
-JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
+JL_DLLEXPORT void jl_init_(jl_image_buf_t sysimage)
 {
     // initialize many things, in no particular order
     // but generally running from simple platform things to optional
     // configuration features
-    jl_init_timing();
+
     // Make sure we finalize the tls callback before starting any threads.
     (void)jl_get_pgcstack();
 
+    // initialize symbol-table lock
+    uv_mutex_init(&symtab_lock);
+    // initialize the live tasks lock
+    uv_mutex_init(&live_tasks_lock);
+    // initialize the profiler buffer lock
+    uv_mutex_init(&bt_data_prof_lock);
+
     // initialize backtraces
     jl_init_profile_lock();
 #ifdef _OS_WINDOWS_
-    uv_mutex_init(&jl_in_stackwalk);
-    SymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES | SYMOPT_IGNORE_CVREC);
-    if (!SymInitialize(GetCurrentProcess(), "", 1)) {
-        jl_safe_printf("WARNING: failed to initialize stack walk info\n");
-    }
-    needsSymRefreshModuleList = 0;
+    jl_init_stackwalk();
 #else
     // nongnu libunwind initialization is only threadsafe on architecture where the
     // author could access TSAN, per https://github.com/libunwind/libunwind/pull/109
@@ -779,33 +737,27 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     void *stack_lo, *stack_hi;
     jl_init_stack_limits(1, &stack_lo, &stack_hi);
 
-    jl_libjulia_internal_handle = jl_find_dynamic_library_by_addr(&jl_load_dynamic_library);
-    jl_libjulia_handle = jl_find_dynamic_library_by_addr(&jl_any_type);
+    // Note that if we ever want to be able to unload Julia entirely, we will
+    // have to dlclose() these handles.
+    jl_libjulia_internal_handle = jl_find_dynamic_library_by_addr(&jl_load_dynamic_library, /* throw_err */ 1, 0);
+    jl_libjulia_handle = jl_find_dynamic_library_by_addr(&jl_options, /* throw_err */ 1, 0);
 #ifdef _OS_WINDOWS_
+    /* If this parameter is NULL, GetModuleHandle returns a handle to the file
+       used to create the calling process (.exe file). */
     jl_exe_handle = GetModuleHandleA(NULL);
-    jl_RTLD_DEFAULT_handle = jl_libjulia_internal_handle;
+    jl_RTLD_DEFAULT_handle = NULL;
     jl_ntdll_handle = jl_dlopen("ntdll.dll", JL_RTLD_NOLOAD); // bypass julia's pathchecking for system dlls
     jl_kernel32_handle = jl_dlopen("kernel32.dll", JL_RTLD_NOLOAD);
     jl_crtdll_handle = jl_dlopen(jl_crtdll_name, JL_RTLD_NOLOAD);
     jl_winsock_handle = jl_dlopen("ws2_32.dll", JL_RTLD_NOLOAD);
-    HMODULE jl_dbghelp = (HMODULE) jl_dlopen("dbghelp.dll", JL_RTLD_NOLOAD);
-    needsSymRefreshModuleList = 0;
-    if (jl_dbghelp)
-        jl_dlsym(jl_dbghelp, "SymRefreshModuleList", (void **)&hSymRefreshModuleList, 1);
 #else
-    jl_exe_handle = jl_dlopen(NULL, JL_RTLD_NOW);
-#ifdef RTLD_DEFAULT
+    /* macOS dlopen(3): If path is NULL and the option RTLD_FIRST is used, the
+       handle returned will only search the main executable. */
+    jl_exe_handle = jl_dlopen(NULL, JL_RTLD_NOW | JL_RTLD_NOLOAD | JL_RTLD_LOCAL | JL_RTLD_FIRST);
+    // RTLD_DEFAULT is mandatory on POSIX
     jl_RTLD_DEFAULT_handle = RTLD_DEFAULT;
-#else
-    jl_RTLD_DEFAULT_handle = jl_exe_handle;
-#endif
 #endif
 
-    if ((jl_options.outputo || jl_options.outputbc || jl_options.outputasm) &&
-        (jl_options.code_coverage || jl_options.malloc_log)) {
-        jl_error("cannot generate code-coverage or track allocation information while generating a .o, .bc, or .s output file");
-    }
-
     jl_init_rand();
     jl_init_runtime_ccall();
     jl_init_tasks();
@@ -818,6 +770,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 
     arraylist_new(&jl_linkage_blobs, 0);
     arraylist_new(&jl_image_relocs, 0);
+    arraylist_new(&jl_top_mods, 0);
     arraylist_new(&eytzinger_image_tree, 0);
     arraylist_new(&eytzinger_idxs, 0);
     arraylist_push(&eytzinger_idxs, (void*)0);
@@ -828,74 +781,20 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 #if defined(_COMPILER_GCC_) && __GNUC__ >= 12
 #pragma GCC diagnostic ignored "-Wdangling-pointer"
 #endif
+    if (jl_options.task_metrics == JL_OPTIONS_TASK_METRICS_ON) {
+        // enable before creating the root task so it gets timings too.
+        jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
+    }
+    // Initialize constant objects
+    jl_nothing = jl_gc_permobj(ptls, 0, jl_nothing_type, 0);
+    jl_set_typetagof(jl_nothing, jl_nothing_tag, GC_OLD_MARKED);
     // warning: this changes `jl_current_task`, so be careful not to call that from this function
     jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    jl_init_box_caches();
+    jl_init_common_symbols();
 #pragma GCC diagnostic pop
     JL_GC_PROMISE_ROOTED(ct);
-    _finish_julia_init(rel, ptls, ct);
-}
-
-static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct)
-{
-    JL_TIMING(JULIA_INIT, JULIA_INIT);
-    jl_resolve_sysimg_location(rel);
-    // loads sysimg if available, and conditionally sets jl_options.cpu_target
-    if (jl_options.image_file)
-        jl_preload_sysimg_so(jl_options.image_file);
-    if (jl_options.cpu_target == NULL)
-        jl_options.cpu_target = "native";
-    jl_init_codegen();
-
-    if (jl_options.image_file) {
-        jl_restore_system_image(jl_options.image_file);
-    } else {
-        jl_init_types();
-        jl_global_roots_table = jl_alloc_vec_any(0);
-    }
-
-    jl_init_common_symbols();
-    jl_init_flisp();
-    jl_init_serializer();
-
-    if (!jl_options.image_file) {
-        jl_core_module = jl_new_module(jl_symbol("Core"), NULL);
-        jl_core_module->parent = jl_core_module;
-        jl_type_typename->mt->module = jl_core_module;
-        jl_top_module = jl_core_module;
-        jl_init_intrinsic_functions();
-        jl_init_primitives();
-        jl_init_main_module();
-        jl_load(jl_core_module, "boot.jl");
-        post_boot_hooks();
-    }
-
-    if (jl_base_module == NULL) {
-        // nthreads > 1 requires code in Base
-        jl_atomic_store_relaxed(&jl_n_threads, 1);
-        jl_n_markthreads = 0;
-        jl_n_sweepthreads = 0;
-        jl_n_gcthreads = 0;
-        jl_n_threads_per_pool[0] = 1;
-        jl_n_threads_per_pool[1] = 0;
-    }
-    jl_start_threads();
-
-    jl_gc_enable(1);
-
-    if (jl_options.image_file && (!jl_generating_output() || jl_options.incremental) && jl_module_init_order) {
-        jl_array_t *init_order = jl_module_init_order;
-        JL_GC_PUSH1(&init_order);
-        jl_module_init_order = NULL;
-        int i, l = jl_array_len(init_order);
-        for (i = 0; i < l; i++) {
-            jl_value_t *mod = jl_array_ptr_ref(init_order, i);
-            jl_module_run_initializer((jl_module_t*)mod);
-        }
-        JL_GC_POP();
-    }
-
-    if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
-        jl_install_sigint_handler();
+    _finish_jl_init_(sysimage, ptls, ct);
 }
 
 #ifdef __cplusplus
diff --git a/src/interpreter.c b/src/interpreter.c
index 2ad56e76b2549..6c0ea1bc16ab1 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -17,6 +17,7 @@ extern "C" {
 typedef struct {
     jl_code_info_t *src; // contains the names and number of slots
     jl_method_instance_t *mi; // MethodInstance we're executing, or NULL if toplevel
+    jl_code_instance_t *ci; // CodeInstance we're executing (for generated functions)
     jl_module_t *module; // context for globals
     jl_value_t **locals; // slots for holding local slots and ssavalues
     jl_svec_t *sparam_vals; // method static parameters, if eval-ing a method body
@@ -32,9 +33,9 @@ typedef struct {
   var = (decltype(var))alloca((n))
 #else
 #define JL_CPPALLOCA(var,n)                                                         \
-  JL_GCC_IGNORE_START("-Wc++-compat")                                               \
+  JL_CC_IGNORE_START("-Wc++-compat")                                               \
   var = alloca((n));                                                                \
-  JL_GCC_IGNORE_STOP
+  JL_CC_IGNORE_STOP
 #endif
 
 #ifdef __clang_gcanalyzer__
@@ -65,7 +66,8 @@ extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT;
 // we define this separately so that we can populate the frame before we add it to the backtrace
 // it's recommended to mark the containing function with NOINLINE, though not essential
 #define JL_GC_ENABLEFRAME(frame) \
-  ((void**)&frame[1])[0] = __builtin_frame_address(0);
+    jl_signal_fence(); \
+    ((void**)&frame[1])[0] = __builtin_frame_address(0);
 
 #endif
 
@@ -91,10 +93,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
         if (!jl_is_symbol(fname)) {
             jl_error("method: invalid declaration");
         }
-        jl_binding_t *b = jl_get_binding_for_method_def(modu, fname);
-        _Atomic(jl_value_t*) *bp = &b->value;
-        jl_value_t *gf = jl_generic_function_def(fname, modu, bp, b);
-        return gf;
+        return jl_declare_const_gf(modu, fname);
     }
 
     jl_value_t *atypes = NULL, *meth = NULL, *fname = NULL;
@@ -102,14 +101,13 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
 
     fname = eval_value(args[0], s);
     jl_methtable_t *mt = NULL;
-    if (jl_typetagis(fname, jl_methtable_type)) {
+    if (jl_is_mtable(fname))
         mt = (jl_methtable_t*)fname;
-    }
     atypes = eval_value(args[1], s);
     meth = eval_value(args[2], s);
-    jl_method_def((jl_svec_t*)atypes, mt, (jl_code_info_t*)meth, s->module);
+    jl_method_t *ret = jl_method_def((jl_svec_t*)atypes, mt, (jl_code_info_t*)meth, s->module);
     JL_GC_POP();
-    return jl_nothing;
+    return (jl_value_t *)ret;
 }
 
 // expression evaluator
@@ -134,38 +132,61 @@ static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interpreter_state
     JL_GC_PUSHARGS(argv, nargs - 1);
     size_t i;
     for (i = 1; i < nargs; i++)
-        argv[i] = eval_value(args[i], s);
-    jl_method_instance_t *meth = (jl_method_instance_t*)args[0];
-    assert(jl_is_method_instance(meth));
-    jl_value_t *result = jl_invoke(argv[1], &argv[2], nargs - 2, meth);
+        argv[i-1] = eval_value(args[i], s);
+    jl_value_t *c = args[0];
+    assert(jl_is_code_instance(c) || jl_is_method_instance(c));
+    jl_value_t *result = NULL;
+    if (jl_is_code_instance(c)) {
+        jl_code_instance_t *codeinst = (jl_code_instance_t*)c;
+        assert(jl_atomic_load_relaxed(&codeinst->min_world) <= jl_current_task->world_age &&
+               jl_current_task->world_age <= jl_atomic_load_relaxed(&codeinst->max_world));
+        jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        if (!invoke) {
+            jl_compile_codeinst(codeinst);
+            invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        }
+        if (invoke) {
+            result = invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, codeinst);
+
+        } else {
+            if (codeinst->owner != jl_nothing) {
+                jl_error("Failed to invoke or compile external codeinst");
+            }
+            result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, jl_get_ci_mi(codeinst));
+        }
+    } else {
+        result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, (jl_method_instance_t*)c);
+    }
     JL_GC_POP();
     return result;
 }
 
-jl_value_t *jl_eval_global_var(jl_module_t *m, jl_sym_t *e)
+// get the global (throwing if null) in the current world
+jl_value_t *jl_eval_global_var(jl_module_t *m, jl_sym_t *e, size_t world)
 {
-    jl_value_t *v = jl_get_global(m, e);
+    jl_value_t *v = jl_get_global_value(m, e, world);
     if (v == NULL)
-        jl_undefined_var_error(e);
+        jl_undefined_var_error(e, (jl_value_t*)m);
     return v;
 }
 
-jl_value_t *jl_eval_globalref(jl_globalref_t *g)
+// get the global (throwing if null) in the current world, optimized
+jl_value_t *jl_eval_globalref(jl_globalref_t *g, size_t world)
 {
-    jl_value_t *v = jl_get_globalref_value(g);
+    jl_value_t *v = jl_get_globalref_value(g, world);
     if (v == NULL)
-        jl_undefined_var_error(g->name);
+        jl_undefined_var_error(g->name, (jl_value_t*)g->mod);
     return v;
 }
 
 static int jl_source_nslots(jl_code_info_t *src) JL_NOTSAFEPOINT
 {
-    return jl_array_len(src->slotflags);
+    return jl_array_nrows(src->slotflags);
 }
 
 static int jl_source_nssavalues(jl_code_info_t *src) JL_NOTSAFEPOINT
 {
-    return jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes);
+    return jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_nrows(src->ssavaluetypes);
 }
 
 static void eval_stmt_value(jl_value_t *stmt, interpreter_state *s)
@@ -190,17 +211,17 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             jl_error("access to invalid slot number");
         jl_value_t *v = s->locals[n - 1];
         if (v == NULL)
-            jl_undefined_var_error((jl_sym_t*)jl_array_ptr_ref(src->slotnames, n - 1));
+            jl_undefined_var_error((jl_sym_t*)jl_array_ptr_ref(src->slotnames, n - 1), (jl_value_t*)jl_local_sym);
         return v;
     }
     if (jl_is_quotenode(e)) {
         return jl_quotenode_value(e);
     }
     if (jl_is_globalref(e)) {
-        return jl_eval_globalref((jl_globalref_t*)e);
+        return jl_eval_globalref((jl_globalref_t*)e, jl_current_task->world_age);
     }
     if (jl_is_symbol(e)) {  // bare symbols appear in toplevel exprs not wrapped in `thunk`
-        return jl_eval_global_var(s->module, (jl_sym_t*)e);
+        return jl_eval_global_var(s->module, (jl_sym_t*)e, jl_current_task->world_age);
     }
     if (jl_is_pinode(e)) {
         jl_value_t *val = eval_value(jl_fieldref_noalloc(e, 0), s);
@@ -216,7 +237,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         return e;
     jl_expr_t *ex = (jl_expr_t*)e;
     jl_value_t **args = jl_array_ptr_data(ex->args);
-    size_t nargs = jl_array_len(ex->args);
+    size_t nargs = jl_array_nrows(ex->args);
     jl_sym_t *head = ex->head;
     if (head == jl_call_sym) {
         return do_call(args, nargs, s);
@@ -230,17 +251,15 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     else if (head == jl_isdefined_sym) {
         jl_value_t *sym = args[0];
         int defined = 0;
+        assert(nargs == 1 && "malformed IR");
         if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
             ssize_t n = jl_slot_number(sym);
             if (src == NULL || n > jl_source_nslots(src) || n < 1 || s->locals == NULL)
                 jl_error("access to invalid slot number");
             defined = s->locals[n - 1] != NULL;
         }
-        else if (jl_is_globalref(sym)) {
-            defined = jl_boundp(jl_globalref_mod(sym), jl_globalref_name(sym));
-        }
-        else if (jl_is_symbol(sym)) {
-            defined = jl_boundp(s->module, (jl_sym_t*)sym);
+        else if (jl_is_globalref(sym) || jl_is_symbol(sym)) {
+            jl_error("[Internal Error]: :isdefined on globalref should use `isdefinedglobal`");
         }
         else if (jl_is_expr(sym) && ((jl_expr_t*)sym)->head == jl_static_parameter_sym) {
             ssize_t n = jl_unbox_long(jl_exprarg(sym, 0));
@@ -267,7 +286,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             if (var == jl_getfield_undefref_sym)
                 jl_throw(jl_undefref_exception);
             else
-                jl_undefined_var_error(var);
+                jl_undefined_var_error(var, (jl_value_t*)jl_local_sym);
         }
         return jl_nothing;
     }
@@ -296,7 +315,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             argv[i] = eval_value(args[i], s);
         JL_NARGSV(new_opaque_closure, 4);
         jl_value_t *ret = (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)argv[0], argv[1], argv[2],
-            argv[3], argv+4, nargs-4, 1);
+            argv[4], argv+5, nargs-5, 1);
         JL_GC_POP();
         return ret;
     }
@@ -306,7 +325,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         if (s->sparam_vals && n <= jl_svec_len(s->sparam_vals)) {
             jl_value_t *sp = jl_svecref(s->sparam_vals, n - 1);
             if (jl_is_typevar(sp) && !s->preevaluation)
-                jl_undefined_var_error(((jl_tvar_t*)sp)->name);
+                jl_undefined_var_error(((jl_tvar_t*)sp)->name, (jl_value_t*)jl_static_parameter_sym);
             return sp;
         }
         // static parameter val unknown needs to be an error for ccall
@@ -316,7 +335,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         return jl_copy_ast(eval_value(args[0], s));
     }
     else if (head == jl_exc_sym) {
-        return jl_current_exception();
+        return jl_current_exception(jl_current_task);
     }
     else if (head == jl_boundscheck_sym) {
         return jl_true;
@@ -350,6 +369,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
     size_t from = s->ip;
     size_t ip = to;
     unsigned nphiblockstmts = 0;
+    unsigned last_phi = 0;
     for (ip = to; ip < ns; ip++) {
         jl_value_t *e = jl_array_ptr_ref(stmts, ip);
         if (!jl_is_phinode(e)) {
@@ -360,9 +380,16 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
             }
             // Everything else is allowed in the phi-block for implementation
             // convenience - fall through.
+        } else {
+            last_phi = nphiblockstmts + 1;
         }
         nphiblockstmts += 1;
     }
+    // Cut off the phi block at the last phi node. For global refs that are not
+    // actually in the phi block, we want to evaluate them in the regular interpreter
+    // loop instead to make sure exception state is set up properly in case they throw.
+    nphiblockstmts = last_phi;
+    ip = to + last_phi;
     if (nphiblockstmts) {
         jl_value_t **dest = &s->locals[jl_source_nslots(s->src) + to];
         jl_value_t **phis; // = (jl_value_t**)alloca(sizeof(jl_value_t*) * nphiblockstmts);
@@ -385,8 +412,8 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
             //   %2 = phi ...
             //   %3 = phi (1)[1 => %a], (2)[2 => %b]
             // from = 1, to = closest = 2, i = 1 --> edge = 2, edge_from = 2, from = 2
-            for (unsigned j = 0; j < jl_array_len(edges); ++j) {
-                size_t edge_from = ((int32_t*)jl_array_data(edges))[j]; // 1-indexed
+            for (unsigned j = 0; j < jl_array_nrows(edges); ++j) {
+                size_t edge_from = jl_array_data(edges, int32_t)[j]; // 1-indexed
                 if (edge_from == from + 1) {
                     if (edge == -1)
                         edge = j;
@@ -443,15 +470,13 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
 static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, int toplevel)
 {
     jl_handler_t __eh;
-    size_t ns = jl_array_len(stmts);
+    size_t ns = jl_array_nrows(stmts);
     jl_task_t *ct = jl_current_task;
 
     while (1) {
         s->ip = ip;
         if (ip >= ns)
             jl_error("`body` expression must terminate in `return`. Use `block` instead.");
-        if (toplevel)
-            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_value_t *stmt = jl_array_ptr_ref(stmts, ip);
         assert(!jl_is_phinode(stmt));
         size_t next_ip = ip + 1;
@@ -480,54 +505,27 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
             ssize_t id = ((jl_ssavalue_t*)phic)->id - 1;
             s->locals[jl_source_nslots(s->src) + id] = val;
         }
-        else if (jl_is_expr(stmt)) {
-            // Most exprs are allowed to end a BB by fall through
-            jl_sym_t *head = ((jl_expr_t*)stmt)->head;
-            if (head == jl_assign_sym) {
-                jl_value_t *lhs = jl_exprarg(stmt, 0);
-                jl_value_t *rhs = eval_value(jl_exprarg(stmt, 1), s);
-                if (jl_is_slotnumber(lhs)) {
-                    ssize_t n = jl_slot_number(lhs);
-                    assert(n <= jl_source_nslots(s->src) && n > 0);
-                    s->locals[n - 1] = rhs;
-                }
-                else {
-                    jl_module_t *modu;
-                    jl_sym_t *sym;
-                    if (jl_is_globalref(lhs)) {
-                        modu = jl_globalref_mod(lhs);
-                        sym = jl_globalref_name(lhs);
-                    }
-                    else {
-                        assert(jl_is_symbol(lhs));
-                        modu = s->module;
-                        sym = (jl_sym_t*)lhs;
-                    }
-                    JL_GC_PUSH1(&rhs);
-                    jl_binding_t *b = jl_get_binding_wr(modu, sym);
-                    jl_checked_assignment(b, modu, sym, rhs);
-                    JL_GC_POP();
-                }
-            }
-            else if (head == jl_enter_sym) {
-                jl_enter_handler(&__eh);
-                // This is a bit tricky, but supports the implementation of PhiC nodes.
-                // They are conceptually slots, but the slot to store to doesn't get explicitly
-                // mentioned in the store (aka the "UpsilonNode") (this makes them integrate more
-                // nicely with the rest of the SSA representation). In a compiler, we would figure
-                // out which slot to store to at compile time when we encounter the statement. We
-                // can't quite do that here, but we do something similar: We scan the catch entry
-                // block (the only place where PhiC nodes may occur) to find all the Upsilons we
-                // can possibly encounter. Then, we remember which slot they store to (we abuse the
-                // SSA value result array for this purpose). TODO: We could do this only the first
-                // time we encounter a given enter.
-                size_t catch_ip = jl_unbox_long(jl_exprarg(stmt, 0)) - 1;
+        else if (jl_is_enternode(stmt)) {
+            jl_enter_handler(ct, &__eh);
+            // This is a bit tricky, but supports the implementation of PhiC nodes.
+            // They are conceptually slots, but the slot to store to doesn't get explicitly
+            // mentioned in the store (aka the "UpsilonNode") (this makes them integrate more
+            // nicely with the rest of the SSA representation). In a compiler, we would figure
+            // out which slot to store to at compile time when we encounter the statement. We
+            // can't quite do that here, but we do something similar: We scan the catch entry
+            // block (the only place where PhiC nodes may occur) to find all the Upsilons we
+            // can possibly encounter. Then, we remember which slot they store to (we abuse the
+            // SSA value result array for this purpose). TODO: We could do this only the first
+            // time we encounter a given enter.
+            size_t catch_ip = jl_enternode_catch_dest(stmt);
+            if (catch_ip) {
+                catch_ip -= 1;
                 while (catch_ip < ns) {
                     jl_value_t *phicnode = jl_array_ptr_ref(stmts, catch_ip);
                     if (!jl_is_phicnode(phicnode))
                         break;
                     jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(phicnode, 0);
-                    for (size_t i = 0; i < jl_array_len(values); ++i) {
+                    for (size_t i = 0; i < jl_array_nrows(values); ++i) {
                         jl_value_t *val = jl_array_ptr_ref(values, i);
                         assert(jl_is_ssavalue(val));
                         size_t upsilon = ((jl_ssavalue_t*)val)->id - 1;
@@ -538,46 +536,107 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                     catch_ip += 1;
                 }
                 // store current top of exception stack for restore in pop_exception.
-                s->locals[jl_source_nslots(s->src) + ip] = jl_box_ulong(jl_excstack_state());
-                if (!jl_setjmp(__eh.eh_ctx, 1)) {
-                    return eval_body(stmts, s, next_ip, toplevel);
+            }
+            s->locals[jl_source_nslots(s->src) + ip] = jl_box_ulong(jl_excstack_state(ct));
+            if (jl_enternode_scope(stmt)) {
+                jl_value_t *scope = eval_value(jl_enternode_scope(stmt), s);
+                // GC preserve the scope, since it is not rooted in the `jl_handler_t *`
+                // and may be removed from jl_current_task by any nested block and then
+                // replaced later
+                JL_GC_PUSH1(&scope);
+                ct->scope = scope;
+                if (!jl_setjmp(__eh.eh_ctx, 0)) {
+                    ct->eh = &__eh;
+                    eval_body(stmts, s, next_ip, toplevel);
+                    jl_unreachable();
                 }
-                else if (s->continue_at) { // means we reached a :leave expression
-                    ip = s->continue_at;
-                    s->continue_at = 0;
-                    continue;
+                JL_GC_POP();
+            }
+            else {
+                if (!jl_setjmp(__eh.eh_ctx, 0)) {
+                    ct->eh = &__eh;
+                    eval_body(stmts, s, next_ip, toplevel);
+                    jl_unreachable();
                 }
-                else { // a real exception
-                    ip = catch_ip;
-                    continue;
+            }
+
+            if (s->continue_at) { // means we reached a :leave expression
+                jl_eh_restore_state_noexcept(ct, &__eh);
+                ip = s->continue_at;
+                s->continue_at = 0;
+                continue;
+            }
+            else { // a real exception
+                jl_eh_restore_state(ct, &__eh);
+                ip = catch_ip;
+                assert(jl_enternode_catch_dest(stmt) != 0);
+                continue;
+            }
+        }
+        else if (jl_is_expr(stmt)) {
+            // Most exprs are allowed to end a BB by fall through
+            jl_sym_t *head = ((jl_expr_t*)stmt)->head;
+            if (head == jl_assign_sym) {
+                jl_value_t *lhs = jl_exprarg(stmt, 0);
+                jl_value_t *rhs = eval_value(jl_exprarg(stmt, 1), s);
+                if (jl_is_slotnumber(lhs)) {
+                    ssize_t n = jl_slot_number(lhs);
+                    assert(n <= jl_source_nslots(s->src) && n > 0);
+                    s->locals[n - 1] = rhs;
+                }
+                else {
+                    // This is an unmodeled error. Our frontend only generates
+                    // legal `=` expressions, but since GlobalRef used to be legal
+                    // here, give a loud error in case any package is modifying
+                    // internals.
+                    jl_error("Invalid IR: Assignment LHS not a Slot");
                 }
             }
             else if (head == jl_leave_sym) {
-                int hand_n_leave = jl_unbox_long(jl_exprarg(stmt, 0));
-                assert(hand_n_leave > 0);
-                // equivalent to jl_pop_handler(hand_n_leave), but retaining eh for longjmp:
-                jl_handler_t *eh = ct->eh;
-                while (--hand_n_leave > 0)
-                    eh = eh->prev;
-                jl_eh_restore_state(eh);
-                // leave happens during normal control flow, but we must
-                // longjmp to pop the eval_body call for each enter.
-                s->continue_at = next_ip;
-                asan_unpoison_task_stack(ct, &eh->eh_ctx);
-                jl_longjmp(eh->eh_ctx, 1);
+                int hand_n_leave = 0;
+                for (int i = 0; i < jl_expr_nargs(stmt); ++i) {
+                    jl_value_t *arg = jl_exprarg(stmt, i);
+                    if (arg == jl_nothing)
+                        continue;
+                    assert(jl_is_ssavalue(arg));
+                    jl_value_t *enter_stmt = jl_array_ptr_ref(stmts, ((jl_ssavalue_t*)arg)->id - 1);
+                    if (enter_stmt == jl_nothing)
+                        continue;
+                    hand_n_leave += 1;
+                }
+                if (hand_n_leave > 0) {
+                    assert(hand_n_leave > 0);
+                    // equivalent to jl_pop_handler(hand_n_leave), longjmping
+                    // to the :enter code above instead, which handles cleanup
+                    jl_handler_t *eh = ct->eh;
+                    while (--hand_n_leave > 0) {
+                        // pop GC frames for any skipped handlers
+                        ct->gcstack = eh->gcstack;
+                        eh = eh->prev;
+                    }
+                    // leave happens during normal control flow, but we must
+                    // longjmp to pop the eval_body call for each enter.
+                    s->continue_at = next_ip;
+                    asan_unpoison_task_stack(ct, &eh->eh_ctx);
+                    jl_longjmp(eh->eh_ctx, 1);
+                }
             }
             else if (head == jl_pop_exception_sym) {
                 size_t prev_state = jl_unbox_ulong(eval_value(jl_exprarg(stmt, 0), s));
-                jl_restore_excstack(prev_state);
+                jl_restore_excstack(ct, prev_state);
             }
             else if (toplevel) {
                 if (head == jl_method_sym && jl_expr_nargs(stmt) > 1) {
-                    eval_methoddef((jl_expr_t*)stmt, s);
+                    jl_value_t *res = eval_methoddef((jl_expr_t*)stmt, s);
+                    s->locals[jl_source_nslots(s->src) + s->ip] = res;
                 }
                 else if (head == jl_toplevel_sym) {
                     jl_value_t *res = jl_toplevel_eval(s->module, stmt);
                     s->locals[jl_source_nslots(s->src) + s->ip] = res;
                 }
+                else if (head == jl_latestworld_sym) {
+                    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+                }
                 else if (jl_is_toplevel_only_expr(stmt)) {
                     jl_toplevel_eval(s->module, stmt);
                 }
@@ -628,7 +687,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
             s->locals[n - 1] = NULL;
         }
         else if (toplevel && jl_is_linenode(stmt)) {
-            jl_lineno = jl_linenode_line(stmt);
+            jl_atomic_store_relaxed(&jl_lineno, jl_linenode_line(stmt));
         }
         else {
             eval_stmt_value(stmt, s);
@@ -640,31 +699,55 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
 
 // preparing method IR for interpreter
 
-jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
+jl_value_t *jl_code_or_ci_for_interpreter(jl_method_instance_t *mi, size_t world)
 {
-    jl_code_info_t *src = (jl_code_info_t*)jl_atomic_load_relaxed(&mi->uninferred);
+    jl_value_t *ret = NULL;
+    jl_code_info_t *src = NULL;
     if (jl_is_method(mi->def.value)) {
-        if (!src || (jl_value_t*)src == jl_nothing) {
-            if (mi->def.method->source) {
-                src = (jl_code_info_t*)mi->def.method->source;
+        if (mi->def.method->source) {
+            jl_method_t *m = mi->def.method;
+            src = (jl_code_info_t*)m->source;
+            if (!jl_is_code_info(src)) {
+                src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
+                // Replace the method source by the uncompressed version,
+                // under the assumption that the interpreter may need to
+                // access it frequently. TODO: Have some sort of usage-based
+                // cache here.
+                m->source = (jl_value_t*)src;
+                jl_gc_wb(m, src);
             }
-            else {
+            ret = (jl_value_t*)src;
+        }
+        else {
+            jl_code_instance_t *cache = jl_atomic_load_relaxed(&mi->cache);
+            jl_code_instance_t *uninferred = jl_cached_uninferred(cache, world);
+            if (!uninferred) {
                 assert(mi->def.method->generator);
-                src = jl_code_for_staged(mi, world);
+                src = jl_code_for_staged(mi, world, &uninferred);
             }
+            ret = (jl_value_t*)uninferred;
+            src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred);
         }
-        if (src && (jl_value_t*)src != jl_nothing) {
-            JL_GC_PUSH1(&src);
-            src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
-            jl_atomic_store_release(&mi->uninferred, (jl_value_t*)src);
-            jl_gc_wb(mi, src);
-            JL_GC_POP();
+    }
+    else {
+        jl_code_instance_t *uninferred = jl_cached_uninferred(jl_atomic_load_relaxed(&mi->cache), world);
+        ret = (jl_value_t*)uninferred;
+        if (ret) {
+            src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred);
         }
     }
     if (!src || !jl_is_code_info(src)) {
-        jl_error("source missing for method called in interpreter");
+        jl_throw(jl_new_struct(jl_missingcodeerror_type, (jl_value_t*)mi));
     }
-    return src;
+    return ret;
+}
+
+jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
+{
+    jl_value_t *code_or_ci = jl_code_or_ci_for_interpreter(mi, world);
+    if (jl_is_code_instance(code_or_ci))
+        return (jl_code_info_t*)jl_atomic_load_relaxed(&((jl_code_instance_t*)code_or_ci)->inferred);
+    return (jl_code_info_t*)code_or_ci;
 }
 
 // interpreter entry points
@@ -672,10 +755,18 @@ jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
 jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *codeinst)
 {
     interpreter_state *s;
-    jl_method_instance_t *mi = codeinst->def;
+    jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
     jl_task_t *ct = jl_current_task;
     size_t world = ct->world_age;
-    jl_code_info_t *src = jl_code_for_interpreter(mi, world);
+    jl_code_info_t *src = NULL;
+    jl_value_t *code = jl_code_or_ci_for_interpreter(mi, world);
+    jl_code_instance_t *ci = NULL;
+    if (jl_is_code_instance(code)) {
+        ci = (jl_code_instance_t*)code;
+        src = (jl_code_info_t*)jl_atomic_load_relaxed(&ci->inferred);
+    } else {
+        src = (jl_code_info_t*)code;
+    }
     jl_array_t *stmts = src->code;
     assert(jl_typetagis(stmts, jl_array_any_type));
     unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src) + 2;
@@ -690,8 +781,8 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     }
     else {
         s->module = mi->def.method->module;
-        size_t defargs = mi->def.method->nargs;
-        int isva = mi->def.method->isva ? 1 : 0;
+        size_t defargs = src->nargs;
+        int isva = src->isva;
         size_t i;
         s->locals[0] = f;
         assert(isva ? nargs + 2 >= defargs : nargs + 1 == defargs);
@@ -706,6 +797,7 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     s->preevaluation = 0;
     s->continue_at = 0;
     s->mi = mi;
+    s->ci = ci;
     JL_GC_ENABLEFRAME(s);
     jl_value_t *r = eval_body(stmts, s, 0, 0);
     JL_GC_POP();
@@ -717,7 +809,25 @@ JL_DLLEXPORT const jl_callptr_t jl_fptr_interpret_call_addr = &jl_fptr_interpret
 jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
 {
     jl_method_t *source = oc->source;
-    jl_code_info_t *code = jl_uncompress_ir(source, NULL, (jl_value_t*)source->source);
+    jl_code_info_t *code = NULL;
+    if (source->source) {
+        code = jl_uncompress_ir(source, NULL, (jl_value_t*)source->source);
+    }
+    else {
+        // OC constructed from optimized IR. It'll have a single specialization with optimized code
+        // in it that we'll try to interpret.
+        jl_svec_t *specializations = (jl_svec_t*)jl_atomic_load_relaxed(&source->specializations);
+        assert(jl_is_method_instance(specializations));
+        jl_method_instance_t *mi = (jl_method_instance_t *)specializations;
+        jl_code_instance_t *ci = jl_atomic_load_relaxed(&mi->cache);
+        jl_value_t *src = jl_atomic_load_relaxed(&ci->inferred);
+        if (!src) {
+            // This can happen if somebody did :new_opaque_closure with broken IR. This is definitely bad
+            // and UB, but let's try to be slightly nicer than segfaulting here for people debugging.
+            jl_error("Internal Error: Opaque closure with no source at all");
+        }
+        code = jl_uncompress_ir(source, ci, src);
+    }
     interpreter_state *s;
     unsigned nroots = jl_source_nslots(code) + jl_source_nssavalues(code) + 2;
     jl_task_t *ct = jl_current_task;
@@ -737,6 +847,7 @@ jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **ar
     s->preevaluation = 0;
     s->continue_at = 0;
     s->mi = NULL;
+    s->ci = NULL;
     size_t defargs = source->nargs;
     int isva = source->isva;
     assert(isva ? nargs + 2 >= defargs : nargs + 1 == defargs);
@@ -768,11 +879,9 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t
     s->sparam_vals = jl_emptysvec;
     s->continue_at = 0;
     s->mi = NULL;
+    s->ci = NULL;
     JL_GC_ENABLEFRAME(s);
-    jl_task_t *ct = jl_current_task;
-    size_t last_age = ct->world_age;
     jl_value_t *r = eval_body(stmts, s, 0, 1);
-    ct->world_age = last_age;
     JL_GC_POP();
     return r;
 }
@@ -792,6 +901,7 @@ jl_value_t *NOINLINE jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e
     s->preevaluation = (sparam_vals != NULL);
     s->continue_at = 0;
     s->mi = NULL;
+    s->ci = NULL;
     JL_GC_ENABLEFRAME(s);
     jl_value_t *v = eval_value(e, s);
     assert(v);
@@ -811,7 +921,8 @@ JL_DLLEXPORT size_t jl_capture_interp_frame(jl_bt_element_t *bt_entry,
     uintptr_t entry_tags = jl_bt_entry_descriptor(njlvalues, 0, JL_BT_INTERP_FRAME_TAG, s->ip);
     bt_entry[0].uintptr = JL_BT_NON_PTR_ENTRY;
     bt_entry[1].uintptr = entry_tags;
-    bt_entry[2].jlvalue = s->mi  ? (jl_value_t*)s->mi  :
+    bt_entry[2].jlvalue = s->ci  ? (jl_value_t*)s->ci  :
+                          s->mi  ? (jl_value_t*)s->mi  :
                           s->src ? (jl_value_t*)s->src : (jl_value_t*)jl_nothing;
     if (need_module) {
         // If we only have a CodeInfo (s->src), we are in a top level thunk and
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 810982370de19..ae25c3cc83ca5 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -4,6 +4,10 @@ namespace JL_I {
 #include "intrinsics.h"
 }
 
+#include <array>
+#include <bitset>
+#include <string>
+
 #include "ccall.cpp"
 
 //Mark our stats as being from intrinsics irgen
@@ -15,9 +19,9 @@ STATISTIC(EmittedCoercedUnboxes, "Number of unbox coercions emitted");
 STATISTIC(EmittedUnboxes, "Number of unboxes emitted");
 STATISTIC(EmittedRuntimeCalls, "Number of runtime intrinsic calls emitted");
 STATISTIC(EmittedIntrinsics, "Number of intrinsic calls emitted");
-STATISTIC(Emitted_arraylen, "Number of arraylen calls emitted");
 STATISTIC(Emitted_pointerref, "Number of pointerref calls emitted");
 STATISTIC(Emitted_pointerset, "Number of pointerset calls emitted");
+STATISTIC(Emitted_pointerarith, "Number of pointer arithmetic calls emitted");
 STATISTIC(Emitted_atomic_fence, "Number of atomic_fence calls emitted");
 STATISTIC(Emitted_atomic_pointerref, "Number of atomic_pointerref calls emitted");
 STATISTIC(Emitted_atomic_pointerop, "Number of atomic_pointerop calls emitted");
@@ -79,10 +83,14 @@ const auto &float_func() {
             float_func[sub_float] = true;
             float_func[mul_float] = true;
             float_func[div_float] = true;
+            float_func[min_float] = true;
+            float_func[max_float] = true;
             float_func[add_float_fast] = true;
             float_func[sub_float_fast] = true;
             float_func[mul_float_fast] = true;
             float_func[div_float_fast] = true;
+            float_func[min_float_fast] = true;
+            float_func[max_float_fast] = true;
             float_func[fma_float] = true;
             float_func[muladd_float] = true;
             float_func[eq_float] = true;
@@ -165,7 +173,7 @@ static Type *INTT(Type *t, const DataLayout &DL)
         return getInt64Ty(ctxt);
     if (t == getFloatTy(ctxt))
         return getInt32Ty(ctxt);
-    if (t == getHalfTy(ctxt))
+    if (t == getHalfTy(ctxt) || t == getBFloatTy(ctxt))
         return getInt16Ty(ctxt);
     unsigned nb = t->getPrimitiveSizeInBits();
     assert(t != getVoidTy(ctxt) && nb > 0);
@@ -174,12 +182,7 @@ static Type *INTT(Type *t, const DataLayout &DL)
 
 static Value *uint_cnvt(jl_codectx_t &ctx, Type *to, Value *x)
 {
-    Type *t = x->getType();
-    if (t == to)
-        return x;
-    if (to->getPrimitiveSizeInBits() < x->getType()->getPrimitiveSizeInBits())
-        return ctx.builder.CreateTrunc(x, to);
-    return ctx.builder.CreateZExt(x, to);
+    return ctx.builder.CreateZExtOrTrunc(x, to);
 }
 
 static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_datatype_t *bt)
@@ -229,7 +232,7 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
     }
 
     size_t nf = jl_datatype_nfields(bt);
-    std::vector<Constant*> fields(0);
+    SmallVector<Constant*, 0> fields(0);
     for (size_t i = 0; i < nf; i++) {
         size_t offs = jl_field_offset(bt, i);
         jl_value_t *ft = jl_field_type(bt, i);
@@ -245,8 +248,8 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
         if (jl_is_uniontype(ft)) {
             // compute the same type layout as julia_struct_to_llvm
             size_t fsz = 0, al = 0;
-            (void)jl_islayout_inline(ft, &fsz, &al);
-            fsz = jl_field_size(bt, i);
+            (void)jl_islayout_inline(ft, &fsz, &al); // compute al
+            fsz = jl_field_size(bt, i); // get LLT_ALIGN(fsz+1,al)
             uint8_t sel = ((const uint8_t*)ptr)[offs + fsz - 1];
             jl_value_t *active_ty = jl_nth_union_component(ft, sel);
             size_t active_sz = jl_datatype_size(active_ty);
@@ -318,47 +321,110 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, jl_value_t *e)
     return julia_const_to_llvm(ctx, e, (jl_datatype_t*)bt);
 }
 
+static Constant *undef_value_for_type(Type *T) {
+    auto tracked = CountTrackedPointers(T);
+    Constant *undef;
+    if (tracked.count)
+        // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
+        undef = Constant::getNullValue(T);
+    else
+        undef = UndefValue::get(T);
+    return undef;
+}
+
+// rebuild a struct type with any i1 Bool (e.g. the llvmcall type) widened to i8 (the native size for memcpy)
+static Type *zext_struct_type(Type *T)
+{
+    if (auto *AT = dyn_cast<ArrayType>(T)) {
+        return ArrayType::get(AT->getElementType(), AT->getNumElements());
+    }
+    else if (auto *ST = dyn_cast<StructType>(T)) {
+        SmallVector<Type*> Elements(ST->element_begin(), ST->element_end());
+        for (size_t i = 0; i < Elements.size(); i++) {
+            Elements[i] = zext_struct_type(Elements[i]);
+        }
+        return StructType::get(ST->getContext(), Elements, ST->isPacked());
+    }
+    else if (auto *VT = dyn_cast<VectorType>(T)) {
+        return VectorType::get(zext_struct_type(VT->getElementType()), VT);
+    }
+    else if (auto *IT = dyn_cast<IntegerType>(T)) {
+        unsigned BitWidth = IT->getBitWidth();
+        if (alignTo(BitWidth, 8) != BitWidth)
+            return IntegerType::get(IT->getContext(), alignTo(BitWidth, 8));
+    }
+    return T;
+}
+
+// rebuild a struct with any i1 Bool (e.g. the llvmcall type) widened to i8 (the native size for memcpy)
+static Value *zext_struct_helper(jl_codectx_t &ctx, Value *V, Type *T2)
+{
+    Type *T = V->getType();
+    if (T == T2)
+        return V;
+    if (auto *AT = dyn_cast<ArrayType>(T2)) {
+        Value *V2 = undef_value_for_type(AT);
+        for (size_t i = 0; i < AT->getNumElements(); i++) {
+            Value *E = zext_struct_helper(ctx, ctx.builder.CreateExtractValue(V, i), AT->getElementType());
+            V2 = ctx.builder.CreateInsertValue(V2, E, i);
+        }
+        return V2;
+    }
+    else if (auto *ST = dyn_cast<StructType>(T2)) {
+        Value *V2 = undef_value_for_type(ST);
+        for (size_t i = 0; i < ST->getNumElements(); i++) {
+            Value *E = zext_struct_helper(ctx, ctx.builder.CreateExtractValue(V, i), ST->getElementType(i));
+            V2 = ctx.builder.CreateInsertValue(V2, E, i);
+        }
+        return V2;
+    }
+    else if (T2->isIntegerTy() || T2->isVectorTy()) {
+        return ctx.builder.CreateZExt(V, T2);
+    }
+    return V;
+}
+
+static Value *zext_struct(jl_codectx_t &ctx, Value *V)
+{
+    return zext_struct_helper(ctx, V, zext_struct_type(V->getType()));
+}
+
 static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
 {
+    if (unboxed->getType() == to)
+        return unboxed;
+    if (CastInst::castIsValid(Instruction::Trunc, unboxed, to))
+        return ctx.builder.CreateTrunc(unboxed, to);
+    unboxed = zext_struct(ctx, unboxed);
     Type *ty = unboxed->getType();
     if (ty == to)
         return unboxed;
     bool frompointer = ty->isPointerTy();
     bool topointer = to->isPointerTy();
     const DataLayout &DL = jl_Module->getDataLayout();
-    if (ty->isIntegerTy(1) && to->isIntegerTy(8)) {
-        // bools may be stored internally as int8
-        unboxed = ctx.builder.CreateZExt(unboxed, to);
-    }
-    else if (ty->isIntegerTy(8) && to->isIntegerTy(1)) {
-        // bools may be stored internally as int8
-        unboxed = ctx.builder.CreateTrunc(unboxed, to);
-    }
-    else if (ty->isVoidTy() || DL.getTypeSizeInBits(ty) != DL.getTypeSizeInBits(to)) {
+    if (ty->isVoidTy() || DL.getTypeSizeInBits(ty) != DL.getTypeSizeInBits(to)) {
         // this can happen in dead code
-        //emit_unreachable(ctx);
+        CreateTrap(ctx.builder);
         return UndefValue::get(to);
     }
-    if (frompointer && topointer) {
-        unboxed = emit_bitcast(ctx, unboxed, to);
-    }
     else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
         assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
-        AllocaInst *cast = ctx.builder.CreateAlloca(ty);
+        Align align = std::max(DL.getPrefTypeAlign(ty), DL.getPrefTypeAlign(to));
+        AllocaInst *cast = emit_static_alloca(ctx, ty, align);
         setName(ctx.emission_context, cast, "coercion");
-        ctx.builder.CreateStore(unboxed, cast);
-        unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
+        ctx.builder.CreateAlignedStore(unboxed, cast, align);
+        unboxed = ctx.builder.CreateAlignedLoad(to, cast, align);
     }
     else if (frompointer) {
         Type *INTT_to = INTT(to, DL);
         unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to);
         setName(ctx.emission_context, unboxed, "coercion");
-        if (INTT_to != to)
+        if (INTT_to != to) //TODO when is this true?
             unboxed = ctx.builder.CreateBitCast(unboxed, to);
     }
     else if (topointer) {
         Type *INTT_to = INTT(to, DL);
-        if (to != INTT_to)
+        if (to != INTT_to) //TODO when is this true?
             unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to);
         unboxed = emit_inttoptr(ctx, unboxed, to);
         setName(ctx.emission_context, unboxed, "coercion");
@@ -379,15 +445,16 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
         // up being dead code, and type inference knows that the other
         // branch's type is the only one that matters.
         if (type_is_ghost(to)) {
-            return NULL;
+            return nullptr;
         }
-        //emit_unreachable(ctx);
+        CreateTrap(ctx.builder);
         return UndefValue::get(to); // type mismatch error
     }
 
-    Constant *c = x.constant ? julia_const_to_llvm(ctx, x.constant) : NULL;
-    if (!x.ispointer() || c) { // already unboxed, but sometimes need conversion
+    Constant *c = x.constant ? julia_const_to_llvm(ctx, x.constant) : nullptr;
+    if ((x.inline_roots.empty() && !x.ispointer()) || c != nullptr) { // already unboxed, but sometimes need conversion
         Value *unboxed = c ? c : x.V;
+        assert(unboxed); // clang-sa doesn't know that !x.ispointer() implies x.V does have a value
         return emit_unboxed_coercion(ctx, to, unboxed);
     }
 
@@ -395,9 +462,10 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
     Value *p = x.constant ? literal_pointer_val(ctx, x.constant) : x.V;
 
     if (jt == (jl_value_t*)jl_bool_type || to->isIntegerTy(1)) {
+        assert(p && x.inline_roots.empty()); // clang-sa doesn't know that x.ispointer() implied these are true
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
-        Instruction *unbox_load = ai.decorateInst(ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), maybe_bitcast(ctx, p, getInt8PtrTy(ctx.builder.getContext()))));
-        setName(ctx.emission_context, unbox_load, "unbox");
+        Instruction *unbox_load = ai.decorateInst(ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), p));
+        setName(ctx.emission_context, unbox_load, p->getName() + ".unbox");
         if (jt == (jl_value_t*)jl_bool_type)
             unbox_load->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), {
                 ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
@@ -411,34 +479,23 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
     }
 
     unsigned alignment = julia_alignment(jt);
-    Type *ptype = to->getPointerTo();
-    if (p->getType() != ptype && isa<AllocaInst>(p)) {
-        // LLVM's mem2reg can't handle coercion if the load/store type does
-        // not match the type of the alloca. As such, it is better to
-        // perform the load using the alloca's type and then perform the
-        // appropriate coercion manually.
-        AllocaInst *AI = cast<AllocaInst>(p);
-        Type *AllocType = AI->getAllocatedType();
-        const DataLayout &DL = jl_Module->getDataLayout();
-        if (!AI->isArrayAllocation() &&
-                (AllocType->isFloatingPointTy() || AllocType->isIntegerTy() || AllocType->isPointerTy()) &&
-                (to->isFloatingPointTy() || to->isIntegerTy() || to->isPointerTy()) &&
-                DL.getTypeSizeInBits(AllocType) == DL.getTypeSizeInBits(to)) {
-            Instruction *load = ctx.builder.CreateAlignedLoad(AllocType, p, Align(alignment));
-            setName(ctx.emission_context, load, "unbox");
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
-            return emit_unboxed_coercion(ctx, to, ai.decorateInst(load));
-        }
-    }
-    p = maybe_bitcast(ctx, p, ptype);
-    Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment));
-    setName(ctx.emission_context, load, "unbox");
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    if (!x.inline_roots.empty()) {
+        assert(x.typ == jt);
+        AllocaInst *combined = emit_static_alloca(ctx, to, Align(alignment));
+        auto combined_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+        recombine_value(ctx, x, combined, combined_ai, Align(alignment), false);
+        p = combined;
+        ai = combined_ai;
+    }
+    assert(p); // clang-sa doesn't know that x.ispointer() implied this is true
+    Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment));
+    setName(ctx.emission_context, load, p->getName() + ".unbox");
     return ai.decorateInst(load);
 }
 
 // emit code to store a raw value into a destination
-static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile)
+static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest, MDNode *tbaa_dest, MaybeAlign align_src, Align align_dst, bool isVolatile)
 {
     if (x.isghost) {
         // this can happen when a branch yielding a different type ends
@@ -447,29 +504,25 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest
         return;
     }
 
-    Value *unboxed = nullptr;
-    if (!x.ispointer()) { // already unboxed, but sometimes need conversion
-        unboxed = x.V;
-        assert(unboxed);
-    }
+    auto dest_ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest);
 
-    // bools stored as int8, but can be narrowed to int1 often
-    if (x.typ == (jl_value_t*)jl_bool_type)
-        unboxed = emit_unbox(ctx, getInt8Ty(ctx.builder.getContext()), x, (jl_value_t*)jl_bool_type);
+    if (!x.inline_roots.empty()) {
+        recombine_value(ctx, x, dest, dest_ai, align_dst, isVolatile);
+        return;
+    }
 
-    if (unboxed) {
-        Type *dest_ty = unboxed->getType()->getPointerTo();
-        if (dest->getType() != dest_ty)
-            dest = emit_bitcast(ctx, dest, dest_ty);
-        StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, Align(alignment));
+    if (!x.ispointer()) { // already unboxed, but sometimes need conversion (e.g. f32 -> i32)
+        assert(x.V);
+        Value *unboxed = zext_struct(ctx, x.V);
+        StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, align_dst);
         store->setVolatile(isVolatile);
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest);
-        ai.decorateInst(store);
+        dest_ai.decorateInst(store);
         return;
     }
 
     Value *src = data_pointer(ctx, x);
-    emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, isVolatile);
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    emit_memcpy(ctx, dest, dest_ai, src, src_ai, jl_datatype_size(x.typ), Align(align_dst), align_src ? *align_src : Align(julia_alignment(x.typ)), isVolatile);
 }
 
 static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ)
@@ -484,10 +537,10 @@ static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ)
     return NULL;
 }
 
-static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const jl_cgval_t *argv, size_t nargs)
+static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, ArrayRef<jl_cgval_t> argv, size_t nargs)
 {
     Function *func = prepare_call(runtime_func()[f]);
-    SmallVector<Value *> argvalues(nargs);
+    SmallVector<Value *, 0> argvalues(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argvalues[i] = boxed(ctx, argv[i]);
     }
@@ -496,7 +549,7 @@ static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const
 }
 
 // put a bits type tag on some value (despite the name, this doesn't necessarily actually change anything about the value however)
-static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
+static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     // Give the arguments names //
     const jl_cgval_t &bt_value = argv[0];
@@ -537,7 +590,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
         }
         else {
             Value *size = emit_datatype_size(ctx, typ);
-            auto sizecheck = ctx.builder.CreateICmpEQ(size, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nb));
+            auto sizecheck = ctx.builder.CreateICmpEQ(size, ConstantInt::get(size->getType(), nb));
             setName(ctx.emission_context, sizecheck, "sizecheck");
             error_unless(ctx,
                     sizecheck,
@@ -561,8 +614,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, v.tbaa);
         vx = ai.decorateInst(ctx.builder.CreateLoad(
             storage_type,
-            emit_bitcast(ctx, data_pointer(ctx, v),
-                storage_type->getPointerTo())));
+            data_pointer(ctx, v)));
         setName(ctx.emission_context, vx, "bitcast");
     }
 
@@ -574,13 +626,25 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
             vx = ctx.builder.CreateZExt(vx, llvmt);
         } else if (vxt->isPointerTy() && !llvmt->isPointerTy()) {
             vx = ctx.builder.CreatePtrToInt(vx, llvmt);
-            setName(ctx.emission_context, vx, "bitcast_coercion");
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // CreatePtrToInt may undo an IntToPtr
+                setName(ctx.emission_context, vx, "bitcast_coercion");
         } else if (!vxt->isPointerTy() && llvmt->isPointerTy()) {
             vx = emit_inttoptr(ctx, vx, llvmt);
-            setName(ctx.emission_context, vx, "bitcast_coercion");
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // emit_inttoptr may undo an PtrToInt
+                setName(ctx.emission_context, vx, "bitcast_coercion");
+        } else if (vxt->isPointerTy() && llvmt->isPointerTy()) {
+            // emit_bitcast preserves the origin address space, which we can't have here
+            vx = ctx.builder.CreateAddrSpaceCast(vx, llvmt);
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // cast may have been folded
+                setName(ctx.emission_context, vx, "bitcast_coercion");
         } else {
             vx = emit_bitcast(ctx, vx, llvmt);
-            setName(ctx.emission_context, vx, "bitcast_coercion");
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // emit_bitcast may undo another bitcast
+                setName(ctx.emission_context, vx, "bitcast_coercion");
         }
     }
 
@@ -588,7 +652,8 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
         return mark_julia_type(ctx, vx, false, bt);
     }
     else {
-        Value *box = emit_allocobj(ctx, nb, bt_value_rt);
+        unsigned align = sizeof(void*); // Allocations are at least pointer aligned
+        Value *box = emit_allocobj(ctx, nb, bt_value_rt, true, align);
         setName(ctx.emission_context, box, "bitcast_box");
         init_bits_value(ctx, box, vx, ctx.tbaa().tbaa_immut);
         return mark_julia_type(ctx, box, true, bt->name->wrapper);
@@ -598,7 +663,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
 static jl_cgval_t generic_cast(
         jl_codectx_t &ctx,
         intrinsic f, Instruction::CastOps Op,
-        const jl_cgval_t *argv, bool toint, bool fromint)
+        ArrayRef<jl_cgval_t> argv, bool toint, bool fromint)
 {
     auto &TT = ctx.emission_context.TargetTriple;
     auto &DL = ctx.emission_context.DL;
@@ -610,16 +675,29 @@ static jl_cgval_t generic_cast(
     uint32_t nb = jl_datatype_size(jlto);
     Type *to = bitstype_to_llvm((jl_value_t*)jlto, ctx.builder.getContext(), true);
     Type *vt = bitstype_to_llvm(v.typ, ctx.builder.getContext(), true);
-    if (toint)
-        to = INTT(to, DL);
-    else
-        to = FLOATT(to);
-    if (fromint)
-        vt = INTT(vt, DL);
-    else
-        vt = FLOATT(vt);
+
+    // fptrunc and fpext depend on the specific floating point
+    // format to work correctly, and so do not pun their argument types.
+    if (!(f == fpext || f == fptrunc)) {
+        // uitofp/sitofp require a specific float type argument
+        if (!(f == uitofp || f == sitofp)){
+            if (toint)
+                to = INTT(to, DL);
+            else
+                to = FLOATT(to);
+        }
+        // fptoui/fptosi require a specific float value argument
+        if (!(f == fptoui || f == fptosi)) {
+            if (fromint)
+                vt = INTT(vt, DL);
+            else
+                vt = FLOATT(vt);
+        }
+    }
+
     if (!to || !vt)
         return emit_runtime_call(ctx, f, argv, 2);
+
     Value *from = emit_unbox(ctx, vt, v, v.typ);
     if (!CastInst::castIsValid(Op, from, to))
         return emit_runtime_call(ctx, f, argv, 2);
@@ -631,10 +709,11 @@ static jl_cgval_t generic_cast(
             // understood that everything is implicitly rounded to 23 bits,
             // but if we start looking at more bits we need to actually do the
             // rounding first instead of carrying around incorrect low bits.
-            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
+            Align align(julia_alignment((jl_value_t*)jlto));
+            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType(), align);
             setName(ctx.emission_context, jlfloattemp_var, "rounding_slot");
-            ctx.builder.CreateStore(from, jlfloattemp_var);
-            from  = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
+            ctx.builder.CreateAlignedStore(from, jlfloattemp_var, align);
+            from = ctx.builder.CreateAlignedLoad(from->getType(), jlfloattemp_var, align, /*force this to load from the stack*/true);
             setName(ctx.emission_context, from, "rounded");
         }
     }
@@ -647,19 +726,20 @@ static jl_cgval_t generic_cast(
     else {
         Value *targ_rt = boxed(ctx, targ);
         emit_concretecheck(ctx, targ_rt, std::string(jl_intrinsic_name(f)) + ": target type not a leaf primitive type");
-        Value *box = emit_allocobj(ctx, nb, targ_rt);
+        unsigned align = sizeof(void*); // Allocations are at least pointer aligned
+        Value *box = emit_allocobj(ctx, nb, targ_rt, true, align);
         setName(ctx.emission_context, box, "cast_box");
         init_bits_value(ctx, box, ans, ctx.tbaa().tbaa_immut);
         return mark_julia_type(ctx, box, true, jlto->name->wrapper);
     }
 }
 
-static jl_cgval_t emit_runtime_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_runtime_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     return emit_runtime_call(ctx, pointerref, argv, 3);
 }
 
-static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &e = argv[0];
     const jl_cgval_t &i = argv[1];
@@ -688,7 +768,8 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 
     if (ety == (jl_value_t*)jl_any_type) {
         Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
-        setName(ctx.emission_context, thePtr, "unbox_any_ptr");
+        if (isa<Instruction>(thePtr) && !thePtr->hasName())
+            setName(ctx.emission_context, thePtr, "unbox_any_ptr");
         LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, thePtr, im1), Align(align_nb));
         setName(ctx.emission_context, load, "any_unbox");
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
@@ -698,16 +779,16 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
     else if (!deserves_stack(ety)) {
         assert(jl_is_datatype(ety));
         uint64_t size = jl_datatype_size(ety);
-        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety);
+        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety, true);
         setName(ctx.emission_context, strct, "pointerref_box");
         im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         setName(ctx.emission_context, im1, "pointerref_offset");
-        Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
-        thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1);
+        Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
+        thePtr = emit_ptrgep(ctx, thePtr, im1);
         setName(ctx.emission_context, thePtr, "pointerref_src");
         MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
-        emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, 1);
+        emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, Align(sizeof(jl_value_t*)), Align(align_nb));
         return mark_julia_type(ctx, strct, true, ety);
     }
     else {
@@ -715,8 +796,9 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
-            Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            auto load = typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, false, align_nb);
+            Value *thePtr = emit_unbox(ctx, PointerType::getUnqual(ptrty->getContext()), e, e.typ);
+            thePtr = ctx.builder.CreateInBoundsGEP(ptrty, thePtr, im1);
+            auto load = typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, false, align_nb);
             setName(ctx.emission_context, load.V, "pointerref");
             return load;
         }
@@ -726,16 +808,16 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
     }
 }
 
-static jl_cgval_t emit_runtime_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_runtime_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     return emit_runtime_call(ctx, pointerset, argv, 4);
 }
 
 // e[i] = x
-static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = argv[1];
+    jl_cgval_t x = argv[1];
     const jl_cgval_t &i = argv[2];
     const jl_cgval_t &align = argv[3];
 
@@ -758,15 +840,17 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         return jl_cgval_t();
     }
     emit_typecheck(ctx, x, ety, "pointerset");
+    x = update_julia_type(ctx, x, ety);
+    if (x.typ == jl_bottom_type)
+        return jl_cgval_t();
 
     Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type);
     Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
     setName(ctx.emission_context, im1, "pointerset_idx");
 
-    Value *thePtr;
+    Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
     if (ety == (jl_value_t*)jl_any_type) {
         // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots.
-        thePtr = emit_unbox(ctx, ctx.types().T_size->getPointerTo(), e, e.typ);
         auto gep = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1);
         setName(ctx.emission_context, gep, "pointerset_ptr");
         auto val = ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size);
@@ -775,30 +859,60 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
         ai.decorateInst(store);
     }
+    else if (!x.inline_roots.empty()) {
+        recombine_value(ctx, e, thePtr, jl_aliasinfo_t(), Align(align_nb), false);
+    }
     else if (x.ispointer()) {
-        thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         uint64_t size = jl_datatype_size(ety);
         im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         setName(ctx.emission_context, im1, "pointerset_offset");
-        auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1);
+        auto gep = emit_ptrgep(ctx, thePtr, im1);
         setName(ctx.emission_context, gep, "pointerset_ptr");
-        emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb);
+        emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, Align(align_nb), Align(julia_alignment(ety)));
     }
     else {
         bool isboxed;
         Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
-            thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            typed_store(ctx, thePtr, im1, x, jl_cgval_t(), ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
-                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false, false, false, nullptr, "");
+            thePtr = ctx.builder.CreateInBoundsGEP(ptrty, thePtr, im1);
+            typed_store(ctx, thePtr, x, jl_cgval_t(), ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
+                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, nullptr, true, false, false, false, false, false, nullptr, "atomic_pointerset", nullptr, nullptr);
         }
     }
     return e;
 }
 
-static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, jl_cgval_t *argv)
+// ptr + offset
+// ptr - offset
+static jl_cgval_t emit_pointerarith(jl_codectx_t &ctx, intrinsic f,
+                                    ArrayRef<jl_cgval_t> argv)
+{
+    jl_value_t *ptrtyp = argv[0].typ;
+    jl_value_t *offtyp = argv[1].typ;
+    if (!jl_is_cpointer_type(ptrtyp) || offtyp != (jl_value_t *)jl_ulong_type)
+        return emit_runtime_call(ctx, f, argv, argv.size());
+    assert(f == add_ptr || f == sub_ptr);
+
+    Value *ptr = emit_unbox(ctx, ctx.types().T_ptr, argv[0], ptrtyp);
+    Value *off = emit_unbox(ctx, ctx.types().T_size, argv[1], offtyp);
+    if (f == sub_ptr)
+        off = ctx.builder.CreateNeg(off);
+    Value *ans = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), ptr, off);
+
+    if (jl_is_concrete_type(ptrtyp)) {
+        return mark_julia_type(ctx, ans, false, ptrtyp);
+    }
+    else {
+        Value *box = emit_allocobj(ctx, (jl_datatype_t *)ptrtyp, true);
+        setName(ctx.emission_context, box, "ptr_box");
+        init_bits_value(ctx, box, ans, ctx.tbaa().tbaa_immut);
+        return mark_julia_type(ctx, box, true, (jl_datatype_t *)ptrtyp);
+    }
+}
+
+static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &ord = argv[0];
     if (ord.constant && jl_is_symbol(ord.constant)) {
@@ -814,7 +928,7 @@ static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, jl_cgval_t *argv)
     return emit_runtime_call(ctx, atomic_fence, argv, 1);
 }
 
-static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &e = argv[0];
     const jl_cgval_t &ord = argv[1];
@@ -854,18 +968,17 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 
     if (!deserves_stack(ety)) {
         assert(jl_is_datatype(ety));
-        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety);
+        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety, true);
         setName(ctx.emission_context, strct, "atomic_pointerref_box");
-        Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
         Type *loadT = Type::getIntNTy(ctx.builder.getContext(), nb * 8);
-        thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo());
         MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
         LoadInst *load = ctx.builder.CreateAlignedLoad(loadT, thePtr, Align(nb));
         setName(ctx.emission_context, load, "atomic_pointerref");
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.decorateInst(load);
         load->setOrdering(llvm_order);
-        thePtr = emit_bitcast(ctx, strct, thePtr->getType());
+        thePtr = strct;
         StoreInst *store = ctx.builder.CreateAlignedStore(load, thePtr, Align(julia_alignment(ety)));
         ai.decorateInst(store);
         return mark_julia_type(ctx, strct, true, ety);
@@ -875,7 +988,7 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
-            Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
+            Value *thePtr = emit_unbox(ctx, PointerType::getUnqual(ptrty->getContext()), e, e.typ);
             auto load = typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, false, nb);
             setName(ctx.emission_context, load.V, "atomic_pointerref");
             return load;
@@ -892,7 +1005,7 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 // e[i] <= x (swap)
 // e[i] y => x (replace)
 // x(e[i], y) (modify)
-static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs, const jl_cgval_t *modifyop)
+static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, ArrayRef<jl_cgval_t> argv, int nargs, const jl_cgval_t *modifyop)
 {
     bool issetfield = f == atomic_pointerset;
     bool isreplacefield = f == atomic_pointerreplace;
@@ -900,7 +1013,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
     bool ismodifyfield = f == atomic_pointermodify;
     const jl_cgval_t undefval;
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
+    jl_cgval_t x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
     const jl_cgval_t &y = isreplacefield || ismodifyfield ? argv[1] : undefval;
     const jl_cgval_t &ord = isreplacefield || ismodifyfield ? argv[3] : argv[2];
     const jl_cgval_t &failord = isreplacefield ? argv[4] : undefval;
@@ -929,8 +1042,8 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
         // n.b.: the expected value (y) must be rooted, but not the others
         Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
         bool isboxed = true;
-        jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
+        jl_cgval_t ret = typed_store(ctx, thePtr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
+                    llvm_order, llvm_failorder, sizeof(jl_value_t*), nullptr, issetfield, isreplacefield, isswapfield, ismodifyfield, false, false, modifyop, "atomic_pointermodify", nullptr, nullptr);
         if (issetfield)
             ret = e;
         return ret;
@@ -942,8 +1055,12 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
         emit_error(ctx, msg);
         return jl_cgval_t();
     }
-    if (!ismodifyfield)
+    if (!ismodifyfield) {
         emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
+        x = update_julia_type(ctx, x, ety);
+        if (x.typ == jl_bottom_type)
+            return jl_cgval_t();
+    }
 
     size_t nb = jl_datatype_size(ety);
     if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
@@ -955,7 +1072,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
 
     if (!jl_isbits(ety)) {
         //if (!deserves_stack(ety))
-        //Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        //Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
         //uint64_t size = jl_datatype_size(ety);
         return emit_runtime_call(ctx, f, argv, nargs); // TODO: optimizations
     }
@@ -965,11 +1082,11 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
         assert(!isboxed);
         Value *thePtr;
         if (!type_is_ghost(ptrty))
-            thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
+            thePtr = emit_unbox(ctx, PointerType::getUnqual(ptrty->getContext()), e, e.typ);
         else
             thePtr = nullptr; // could use any value here, since typed_store will not use it
-        jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
+        jl_cgval_t ret = typed_store(ctx, thePtr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
+                    llvm_order, llvm_failorder, nb, nullptr, issetfield, isreplacefield, isswapfield, ismodifyfield, false, false, modifyop, "atomic_pointermodify", nullptr, nullptr);
         if (issetfield)
             ret = e;
         return ret;
@@ -1029,7 +1146,7 @@ struct math_builder {
     }
 };
 
-static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **argvalues, size_t nargs,
+static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, ArrayRef<Value*> argvalues, size_t nargs,
                                      jl_datatype_t **newtyp, jl_value_t *xtyp);
 
 
@@ -1106,8 +1223,6 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
             else {
                 x_ptr = decay_derived(ctx, x_ptr);
                 y_ptr = decay_derived(ctx, y_ptr);
-                if (x_ptr->getType() != y_ptr->getType())
-                    y_ptr = ctx.builder.CreateBitCast(y_ptr, x_ptr->getType());
                 ifelse_result = ctx.builder.CreateSelect(isfalse, y_ptr, x_ptr);
                 setName(ctx.emission_context, ifelse_result, "ifelse_result");
                 ifelse_tbaa = MDNode::getMostGenericTBAA(x.tbaa, y.tbaa);
@@ -1193,11 +1308,11 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
     if (f == cglobal_auto || f == cglobal)
         return emit_cglobal(ctx, args, nargs);
 
-    SmallVector<jl_cgval_t> argv(nargs);
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         jl_cgval_t arg = emit_expr(ctx, args[i + 1]);
         if (arg.typ == jl_bottom_type) {
-            // intrinsics generally don't handle buttom values, so bail out early
+            // intrinsics generally don't handle bottom values, so bail out early
             return jl_cgval_t();
         }
         argv[i] = arg;
@@ -1207,84 +1322,82 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
     // return emit_runtime_call(ctx, f, argv, nargs);
 
     switch (f) {
-    case arraylen: {
-        ++Emitted_arraylen;
-        assert(nargs == 1);
-        const jl_cgval_t &x = argv[0];
-        jl_value_t *typ = jl_unwrap_unionall(x.typ);
-        if (!jl_is_datatype(typ) || ((jl_datatype_t*)typ)->name != jl_array_typename)
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
-        return mark_julia_type(ctx, emit_arraylen(ctx, x), false, jl_long_type);
-    }
     case pointerref:
         ++Emitted_pointerref;
         assert(nargs == 3);
-        return emit_pointerref(ctx, argv.data());
+        return emit_pointerref(ctx, argv);
     case pointerset:
         ++Emitted_pointerset;
         assert(nargs == 4);
-        return emit_pointerset(ctx, argv.data());
+        return emit_pointerset(ctx, argv);
+
+    case add_ptr:
+    case sub_ptr:
+        ++Emitted_pointerarith;
+        assert(nargs == 2);
+        return emit_pointerarith(ctx, f, argv);
+
     case atomic_fence:
         ++Emitted_atomic_fence;
         assert(nargs == 1);
-        return emit_atomicfence(ctx, argv.data());
+        return emit_atomicfence(ctx, argv);
     case atomic_pointerref:
         ++Emitted_atomic_pointerref;
         assert(nargs == 2);
-        return emit_atomic_pointerref(ctx, argv.data());
+        return emit_atomic_pointerref(ctx, argv);
     case atomic_pointerset:
     case atomic_pointerswap:
     case atomic_pointermodify:
     case atomic_pointerreplace:
         ++Emitted_atomic_pointerop;
-        return emit_atomic_pointerop(ctx, f, argv.data(), nargs, nullptr);
+        return emit_atomic_pointerop(ctx, f, argv, nargs, nullptr);
     case bitcast:
         ++Emitted_bitcast;
         assert(nargs == 2);
-        return generic_bitcast(ctx, argv.data());
+        return generic_bitcast(ctx, argv);
     case trunc_int:
         ++Emitted_trunc_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::Trunc, argv.data(), true, true);
+        return generic_cast(ctx, f, Instruction::Trunc, argv, true, true);
     case sext_int:
         ++Emitted_sext_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::SExt, argv.data(), true, true);
+        return generic_cast(ctx, f, Instruction::SExt, argv, true, true);
     case zext_int:
         ++Emitted_zext_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::ZExt, argv.data(), true, true);
+        return generic_cast(ctx, f, Instruction::ZExt, argv, true, true);
     case uitofp:
         ++Emitted_uitofp;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::UIToFP, argv.data(), false, true);
+        return generic_cast(ctx, f, Instruction::UIToFP, argv, false, true);
     case sitofp:
         ++Emitted_sitofp;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::SIToFP, argv.data(), false, true);
+        return generic_cast(ctx, f, Instruction::SIToFP, argv, false, true);
     case fptoui:
         ++Emitted_fptoui;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPToUI, argv.data(), true, false);
+        return generic_cast(ctx, f, Instruction::FPToUI, argv, true, false);
     case fptosi:
         ++Emitted_fptosi;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPToSI, argv.data(), true, false);
+        return generic_cast(ctx, f, Instruction::FPToSI, argv, true, false);
     case fptrunc:
         ++Emitted_fptrunc;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPTrunc, argv.data(), false, false);
+        return generic_cast(ctx, f, Instruction::FPTrunc, argv, false, false);
     case fpext:
         ++Emitted_fpext;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPExt, argv.data(), false, false);
+        return generic_cast(ctx, f, Instruction::FPExt, argv, false, false);
 
     case not_int: {
         ++Emitted_not_int;
         assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!jl_is_primitivetype(x.typ))
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
         Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true), DL);
         Value *from = emit_unbox(ctx, xt, x, x.typ);
         Value *ans = ctx.builder.CreateNot(from);
@@ -1296,7 +1409,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!x.constant || !jl_is_datatype(x.constant))
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
         jl_datatype_t *dt = (jl_datatype_t*) x.constant;
 
         // select the appropriated overloaded intrinsic
@@ -1306,7 +1419,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         else if (dt == jl_float64_type)
             intr_name += "f64";
         else
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
 
         FunctionCallee intr = jl_Module->getOrInsertFunction(intr_name, getInt1Ty(ctx.builder.getContext()));
         auto ret = ctx.builder.CreateCall(intr);
@@ -1319,14 +1432,17 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
 
         // verify argument types
         if (!jl_is_primitivetype(xinfo.typ))
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
         Type *xtyp = bitstype_to_llvm(xinfo.typ, ctx.builder.getContext(), true);
-        if (float_func()[f])
-            xtyp = FLOATT(xtyp);
-        else
+        if (float_func()[f]) {
+            if (!xtyp->isFloatingPointTy())
+                return emit_runtime_call(ctx, f, argv, nargs);
+        }
+        else {
             xtyp = INTT(xtyp, DL);
+        }
         if (!xtyp)
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
         ////Bool are required to be in the range [0,1]
         ////so while they are represented as i8,
         ////the operations need to be done in mod 1
@@ -1337,31 +1453,31 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         //if (xtyp == (jl_value_t*)jl_bool_type)
         //    r = getInt1Ty(ctx.builder.getContext());
 
-        SmallVector<Type *> argt(nargs);
+        SmallVector<Type *, 0> argt(nargs);
         argt[0] = xtyp;
 
         if (f == shl_int || f == lshr_int || f == ashr_int) {
             if (!jl_is_primitivetype(argv[1].typ))
-                return emit_runtime_call(ctx, f, argv.data(), nargs);
+                return emit_runtime_call(ctx, f, argv, nargs);
             argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true), DL);
         }
         else {
             for (size_t i = 1; i < nargs; ++i) {
                 if (xinfo.typ != argv[i].typ)
-                    return emit_runtime_call(ctx, f, argv.data(), nargs);
+                    return emit_runtime_call(ctx, f, argv, nargs);
                 argt[i] = xtyp;
             }
         }
 
         // unbox the arguments
-        SmallVector<Value *> argvalues(nargs);
+        SmallVector<Value *, 0> argvalues(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argvalues[i] = emit_unbox(ctx, argt[i], argv[i], argv[i].typ);
         }
 
         // call the intrinsic
         jl_value_t *newtyp = xinfo.typ;
-        Value *r = emit_untyped_intrinsic(ctx, f, argvalues.data(), nargs, (jl_datatype_t**)&newtyp, xinfo.typ);
+        Value *r = emit_untyped_intrinsic(ctx, f, argvalues, nargs, (jl_datatype_t**)&newtyp, xinfo.typ);
         // Turn Bool operations into mod 1 now, if needed
         if (newtyp == (jl_value_t*)jl_bool_type && !r->getType()->isIntegerTy(1))
             r = ctx.builder.CreateTrunc(r, getInt1Ty(ctx.builder.getContext()));
@@ -1371,7 +1487,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
     assert(0 && "unreachable");
 }
 
-static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **argvalues, size_t nargs,
+static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, ArrayRef<Value*> argvalues, size_t nargs,
                                      jl_datatype_t **newtyp, jl_value_t *xtyp)
 {
     ++EmittedUntypedIntrinsics;
@@ -1391,32 +1507,56 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     case srem_int: return ctx.builder.CreateSRem(x, y);
     case urem_int: return ctx.builder.CreateURem(x, y);
 
-    // LLVM will not fold ptrtoint+arithmetic+inttoptr to GEP. The reason for this
-    // has to do with alias analysis. When adding two integers, either one of them
-    // could be the pointer base. With getelementptr, it is clear which of the
-    // operands is the pointer base. We also have this information at the julia
-    // level. Thus, to not lose information, we need to have a separate intrinsic
-    // for pointer arithmetic which lowers to getelementptr.
-    case add_ptr: {
-        return ctx.builder.CreatePtrToInt(
-            ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()),
-                emit_inttoptr(ctx, x, getInt8PtrTy(ctx.builder.getContext())), y), t);
-
-    }
-
-    case sub_ptr: {
-        return ctx.builder.CreatePtrToInt(
-            ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()),
-                emit_inttoptr(ctx, x, getInt8PtrTy(ctx.builder.getContext())), ctx.builder.CreateNeg(y)), t);
-
-    }
-
     case neg_float: return math_builder(ctx)().CreateFNeg(x);
     case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(x);
     case add_float: return math_builder(ctx)().CreateFAdd(x, y);
     case sub_float: return math_builder(ctx)().CreateFSub(x, y);
     case mul_float: return math_builder(ctx)().CreateFMul(x, y);
     case div_float: return math_builder(ctx)().CreateFDiv(x, y);
+    case min_float: {
+        assert(x->getType() == y->getType());
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee minintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::minimum, ArrayRef<Type*>(t));
+#else
+        FunctionCallee minintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::minimum, ArrayRef<Type*>(t));
+#endif
+        return ctx.builder.CreateCall(minintr, {x, y});
+    }
+    case max_float: {
+        assert(x->getType() == y->getType());
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee maxintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::maximum, ArrayRef<Type*>(t));
+#else
+        FunctionCallee maxintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::maximum, ArrayRef<Type*>(t));
+#endif
+        return ctx.builder.CreateCall(maxintr, {x, y});
+    }
+    case min_float_fast: {
+        assert(x->getType() == y->getType());
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee minintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::minimum, ArrayRef<Type*>(t));
+#else
+        FunctionCallee minintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::minimum, ArrayRef<Type*>(t));
+#endif
+        auto call = ctx.builder.CreateCall(minintr, {x, y});
+        auto fmf = call->getFastMathFlags();
+        fmf.setFast();
+        call->copyFastMathFlags(fmf);
+        return call;
+    }
+    case max_float_fast: {
+        assert(x->getType() == y->getType());
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee maxintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::maximum, ArrayRef<Type*>(t));
+#else
+        FunctionCallee maxintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::maximum, ArrayRef<Type*>(t));
+#endif
+        auto call = ctx.builder.CreateCall(maxintr, {x, y});
+        auto fmf = call->getFastMathFlags();
+        fmf.setFast();
+        call->copyFastMathFlags(fmf);
+        return call;
+    }
     case add_float_fast: return math_builder(ctx, true)().CreateFAdd(x, y);
     case sub_float_fast: return math_builder(ctx, true)().CreateFSub(x, y);
     case mul_float_fast: return math_builder(ctx, true)().CreateFMul(x, y);
@@ -1424,7 +1564,11 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     case fma_float: {
         assert(y->getType() == x->getType());
         assert(z->getType() == y->getType());
-        FunctionCallee fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee fmaintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::fma, ArrayRef<Type*>(t));
+#else
+        FunctionCallee fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma, ArrayRef<Type*>(t));
+#endif
         return ctx.builder.CreateCall(fmaintr, {x, y, z});
     }
     case muladd_float: {
@@ -1454,13 +1598,12 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
                 (f == checked_smul_int ?
                  Intrinsic::smul_with_overflow :
                  Intrinsic::umul_with_overflow)))));
-        FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, intr_id, makeArrayRef(t));
-        Value *res = ctx.builder.CreateCall(intr, {x, y});
-        Value *val = ctx.builder.CreateExtractValue(res, ArrayRef<unsigned>(0));
-        setName(ctx.emission_context, val, "checked");
-        Value *obit = ctx.builder.CreateExtractValue(res, ArrayRef<unsigned>(1));
-        setName(ctx.emission_context, obit, "overflow");
-        Value *obyte = ctx.builder.CreateZExt(obit, getInt8Ty(ctx.builder.getContext()));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee intr = Intrinsic::getOrInsertDeclaration(jl_Module, intr_id, ArrayRef<Type*>(t));
+#else
+        FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, intr_id, ArrayRef<Type*>(t));
+#endif
+        Value *tupval = ctx.builder.CreateCall(intr, {x, y});
 
         jl_value_t *params[2];
         params[0] = xtyp;
@@ -1468,10 +1611,6 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2);
         *newtyp = tuptyp;
 
-        Value *tupval;
-        tupval = UndefValue::get(julia_type_to_llvm(ctx, (jl_value_t*)tuptyp));
-        tupval = ctx.builder.CreateInsertValue(tupval, val, ArrayRef<unsigned>(0));
-        tupval = ctx.builder.CreateInsertValue(tupval, obyte, ArrayRef<unsigned>(1));
         return tupval;
     }
 
@@ -1574,30 +1713,54 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         }
     }
     case bswap_int: {
-        FunctionCallee bswapintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap, makeArrayRef(t));
-        return ctx.builder.CreateCall(bswapintr, x);
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee bswapintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::bswap, ArrayRef<Type*>(t)); //TODO: Move to deduction guides
+#else
+        FunctionCallee bswapintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap, ArrayRef<Type*>(t)); //TODO: Move to deduction guides
+#endif
+        return ctx.builder.CreateCall(bswapintr, x);                                                           // when we drop LLVM 15
     }
     case ctpop_int: {
-        FunctionCallee ctpopintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee ctpopintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::ctpop, ArrayRef<Type*>(t));
+#else
+        FunctionCallee ctpopintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop, ArrayRef<Type*>(t));
+#endif
         return ctx.builder.CreateCall(ctpopintr, x);
     }
     case ctlz_int: {
-        FunctionCallee ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee ctlz = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::ctlz, ArrayRef<Type*>(t));
+#else
+        FunctionCallee ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz, ArrayRef<Type*>(t));
+#endif
         y = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         return ctx.builder.CreateCall(ctlz, {x, y});
     }
     case cttz_int: {
-        FunctionCallee cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee cttz = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::cttz, ArrayRef<Type*>(t));
+#else
+        FunctionCallee cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, ArrayRef<Type*>(t));
+#endif
         y = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         return ctx.builder.CreateCall(cttz, {x, y});
     }
 
     case abs_float: {
-        FunctionCallee absintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee absintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::fabs, ArrayRef<Type*>(t));
+#else
+        FunctionCallee absintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs, ArrayRef<Type*>(t));
+#endif
         return ctx.builder.CreateCall(absintr, x);
     }
     case copysign_float: {
-        FunctionCallee copyintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::copysign, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee copyintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::copysign, ArrayRef<Type*>(t));
+#else
+        FunctionCallee copyintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::copysign, ArrayRef<Type*>(t));
+#endif
         return ctx.builder.CreateCall(copyintr, {x, y});
     }
     case flipsign_int: {
@@ -1616,27 +1779,51 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         return ctx.builder.CreateXor(ctx.builder.CreateAdd(x, tmp), tmp);
     }
     case ceil_llvm: {
-        FunctionCallee ceilintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee ceilintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::ceil, ArrayRef<Type*>(t));
+#else
+        FunctionCallee ceilintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil, ArrayRef<Type*>(t));
+#endif
         return ctx.builder.CreateCall(ceilintr, x);
     }
     case floor_llvm: {
-        FunctionCallee floorintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::floor, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee floorintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::floor, ArrayRef<Type*>(t));
+#else
+        FunctionCallee floorintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::floor, ArrayRef<Type*>(t));
+#endif
         return ctx.builder.CreateCall(floorintr, x);
     }
     case trunc_llvm: {
-        FunctionCallee truncintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee truncintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::trunc, ArrayRef<Type*>(t));
+#else
+        FunctionCallee truncintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc, ArrayRef<Type*>(t));
+#endif
         return ctx.builder.CreateCall(truncintr, x);
     }
     case rint_llvm: {
-        FunctionCallee rintintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::rint, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee rintintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::rint, ArrayRef<Type*>(t));
+#else
+        FunctionCallee rintintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::rint, ArrayRef<Type*>(t));
+#endif
         return ctx.builder.CreateCall(rintintr, x);
     }
     case sqrt_llvm: {
-        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee sqrtintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::sqrt, ArrayRef<Type*>(t));
+#else
+        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, ArrayRef<Type*>(t));
+#endif
         return ctx.builder.CreateCall(sqrtintr, x);
     }
     case sqrt_llvm_fast: {
-        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
+#if JL_LLVM_VERSION >= 200000
+        FunctionCallee sqrtintr = Intrinsic::getOrInsertDeclaration(jl_Module, Intrinsic::sqrt, ArrayRef<Type*>(t));
+#else
+        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, ArrayRef<Type*>(t));
+#endif
         return math_builder(ctx, true)().CreateCall(sqrtintr, x);
     }
 
diff --git a/src/intrinsics.h b/src/intrinsics.h
index 93747faa74160..5765e3e671bc6 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -12,13 +12,13 @@
     ADD_I(udiv_int, 2) \
     ADD_I(srem_int, 2) \
     ADD_I(urem_int, 2) \
-    ADD_I(add_ptr, 2) \
-    ADD_I(sub_ptr, 2) \
     ADD_I(neg_float, 1) \
     ADD_I(add_float, 2) \
     ADD_I(sub_float, 2) \
     ADD_I(mul_float, 2) \
     ADD_I(div_float, 2) \
+    ADD_I(min_float, 2) \
+    ADD_I(max_float, 2) \
     ADD_I(fma_float, 3) \
     ADD_I(muladd_float, 3) \
     /*  fast arithmetic */ \
@@ -27,6 +27,8 @@
     ALIAS(sub_float_fast, sub_float) \
     ALIAS(mul_float_fast, mul_float) \
     ALIAS(div_float_fast, div_float) \
+    ALIAS(min_float_fast, min_float) \
+    ALIAS(max_float_fast, max_float) \
     /*  same-type comparisons */ \
     ADD_I(eq_int, 2) \
     ADD_I(ne_int, 2) \
@@ -86,6 +88,9 @@
     ADD_I(rint_llvm, 1) \
     ADD_I(sqrt_llvm, 1) \
     ADD_I(sqrt_llvm_fast, 1) \
+    /*  pointer arithmetic */ \
+    ADD_I(add_ptr, 2) \
+    ADD_I(sub_ptr, 2) \
     /*  pointer access */ \
     ADD_I(pointerref, 3) \
     ADD_I(pointerset, 4) \
@@ -99,8 +104,6 @@
     /*  c interface */ \
     ADD_I(cglobal, 2) \
     ALIAS(llvmcall, llvmcall) \
-    /*  object access */ \
-    ADD_I(arraylen, 1) \
     /*  cpu feature tests */ \
     ADD_I(have_fma, 1) \
     /*  hidden intrinsics */ \
diff --git a/src/ircode.c b/src/ircode.c
index bc5cc61e7f892..e99bd26aa304a 100644
--- a/src/ircode.c
+++ b/src/ircode.c
@@ -10,21 +10,82 @@
 #include "julia_internal.h"
 #include "serialize.h"
 
-#ifndef _OS_WINDOWS_
-#include <dlfcn.h>
-#endif
-
-#include "valgrind.h"
 #include "julia_assert.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#define TAG_SYMBOL              2
+#define TAG_SSAVALUE            3
+#define TAG_DATATYPE            4
+#define TAG_SLOTNUMBER          5
+#define TAG_SVEC                6
+#define TAG_NEARBYSSAVALUE      7
+#define TAG_NULL                8
+#define TAG_EXPR                9
+#define TAG_PHINODE            10
+#define TAG_PHICNODE           11
+#define TAG_LONG_SYMBOL        12
+#define TAG_LONG_SVEC          13
+#define TAG_LONG_EXPR          14
+#define TAG_LONG_PHINODE       15
+#define TAG_LONG_PHICNODE      16
+#define TAG_METHODROOT         17
+#define TAG_EDGE               18
+#define TAG_STRING             19
+#define TAG_SHORT_INT64        20
+//#define TAG_UNUSED           21
+#define TAG_CNULL              22
+#define TAG_ARRAY1D            23
+#define TAG_SINGLETON          24
+#define TAG_MODULE             25
+#define TAG_TVAR               26
+#define TAG_METHOD_INSTANCE    27
+#define TAG_METHOD             28
+#define TAG_CODE_INSTANCE      29
+#define TAG_COMMONSYM          30
+#define TAG_NEARBYGLOBAL       31
+#define TAG_GLOBALREF          32
+#define TAG_CORE               33
+#define TAG_BASE               34
+#define TAG_BITYPENAME         35
+#define TAG_NEARBYMODULE       36
+#define TAG_INT32              37
+#define TAG_INT64              38
+#define TAG_UINT8              39
+#define TAG_VECTORTY           40
+#define TAG_PTRTY              41
+#define TAG_LONG_SSAVALUE      42
+#define TAG_LONG_METHODROOT    43
+#define TAG_LONG_EDGE          44
+#define TAG_SHORTER_INT64      45
+#define TAG_SHORT_INT32        46
+#define TAG_CALL1              47
+#define TAG_CALL2              48
+#define TAG_SHORT_BACKREF      49
+#define TAG_BACKREF            50
+#define TAG_UNIONALL           51
+#define TAG_GOTONODE           52
+#define TAG_QUOTENODE          53
+#define TAG_GENERAL            54
+#define TAG_GOTOIFNOT          55
+#define TAG_RETURNNODE         56
+#define TAG_ARGUMENT           57
+#define TAG_RELOC_METHODROOT   58
+#define TAG_BINDING            59
+#define TAG_MEMORYT            60
+#define TAG_ENTERNODE          61
+
+#define LAST_TAG 61
+
+
 typedef struct {
     ios_t *s;
+    size_t ssaid;
     // method we're compressing for
     jl_method_t *method;
+    jl_svec_t *edges;
     jl_ptls_t ptls;
     uint8_t relocatability;
 } jl_ircode_state;
@@ -37,28 +98,29 @@ static jl_value_t *deser_tag[256];
 static htable_t common_symbol_tag;
 static jl_value_t *deser_symbols[256];
 
-void *jl_lookup_ser_tag(jl_value_t *v)
+static void *jl_lookup_ser_tag(jl_value_t *v)
 {
     return ptrhash_get(&ser_tag, v);
 }
 
-void *jl_lookup_common_symbol(jl_value_t *v)
+static void *jl_lookup_common_symbol(jl_value_t *v)
 {
     return ptrhash_get(&common_symbol_tag, v);
 }
 
-jl_value_t *jl_deser_tag(uint8_t tag)
+static jl_value_t *jl_deser_tag(uint8_t tag)
 {
     return deser_tag[tag];
 }
 
-jl_value_t *jl_deser_symbol(uint8_t tag)
+static jl_value_t *jl_deser_symbol(uint8_t tag)
 {
     return deser_symbols[tag];
 }
 
 // --- encoding ---
 
+static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal);
 #define jl_encode_value(s, v) jl_encode_value_((s), (jl_value_t*)(v), 0)
 
 static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i)
@@ -67,11 +129,11 @@ static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i)
         s->relocatability = 0;
 }
 
-static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v) JL_GC_DISABLED
+static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v)
 {
     jl_array_t *rs = s->method->roots;
-    int i, l = jl_array_len(rs);
-    if (jl_is_symbol(v) || jl_is_concrete_type(v)) {
+    int i, l = jl_array_nrows(rs);
+    if (jl_is_symbol(v) || jl_is_concrete_type(v)) { // TODO: or more generally, any ptr-egal value
         for (i = 0; i < l; i++) {
             if (jl_array_ptr_ref(rs, i) == v)
                 return tagged_root(rr, s, i);
@@ -83,8 +145,14 @@ static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v)
                 return tagged_root(rr, s, i);
         }
     }
+    for (size_t i = 0; i < jl_svec_len(s->edges); i++) {
+        if (jl_svecref(s->edges, i) == v) {
+            rr->index = i;
+            return;
+        }
+    }
     jl_add_method_root(s->method, jl_precompile_toplevel_module, v);
-    return tagged_root(rr, s, jl_array_len(rs) - 1);
+    return tagged_root(rr, s, jl_array_nrows(rs) - 1);
 }
 
 static void jl_encode_int32(jl_ircode_state *s, int32_t x)
@@ -101,11 +169,24 @@ static void jl_encode_int32(jl_ircode_state *s, int32_t x)
 
 static void jl_encode_as_indexed_root(jl_ircode_state *s, jl_value_t *v)
 {
-    rle_reference rr;
+    rle_reference rr = {.key = -1, .index = -1};
 
+    if (jl_is_string(v))
+        v = jl_as_global_root(v, 1);
     literal_val_id(&rr, s, v);
     int id = rr.index;
     assert(id >= 0);
+    if (rr.key == -1) {
+        if (id <= UINT8_MAX) {
+            write_uint8(s->s, TAG_EDGE);
+            write_uint8(s->s, id);
+        }
+        else {
+            write_uint8(s->s, TAG_LONG_EDGE);
+            write_uint32(s->s, id);
+        }
+        return;
+    }
     if (rr.key) {
         write_uint8(s->s, TAG_RELOC_METHODROOT);
         write_uint64(s->s, rr.key);
@@ -121,7 +202,45 @@ static void jl_encode_as_indexed_root(jl_ircode_state *s, jl_value_t *v)
     }
 }
 
-static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED
+static void jl_encode_memory_slice(jl_ircode_state *s, jl_genericmemory_t *mem, size_t offset, size_t len)
+{
+    jl_datatype_t *t = (jl_datatype_t*)jl_typetagof(mem);
+    size_t i;
+    const jl_datatype_layout_t *layout = t->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        for (i = 0; i < len; i++) {
+            jl_value_t *e = jl_genericmemory_ptr_ref(mem, offset + i);
+            jl_encode_value(s, e);
+        }
+    }
+    else if (layout->first_ptr >= 0) {
+        uint16_t elsz = layout->size;
+        size_t j, np = layout->npointers;
+        const char *data = (const char*)mem->ptr + offset * elsz;
+        for (i = 0; i < len; i++) {
+            const char *start = data;
+            for (j = 0; j < np; j++) {
+                uint32_t ptr = jl_ptr_offset(t, j);
+                const jl_value_t *const *fld = &((const jl_value_t *const *)data)[ptr];
+                if ((const char*)fld != start)
+                    ios_write(s->s, start, (const char*)fld - start);
+                JL_GC_PROMISE_ROOTED(*fld);
+                jl_encode_value(s, *fld);
+                start = (const char*)&fld[1];
+            }
+            data += elsz;
+            if (data != start)
+                ios_write(s->s, start, data - start);
+        }
+    }
+    else {
+        ios_write(s->s, (char*)mem->ptr + offset * layout->size, len * layout->size);
+        if (layout->flags.arrayelem_isunion)
+            ios_write(s->s, jl_genericmemory_typetagdata(mem) + offset, len);
+    }
+}
+
+static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
 {
     size_t i;
 
@@ -189,6 +308,10 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
             jl_encode_value(s, jl_globalref_name(v));
         }
     }
+    else if (jl_is_ssavalue(v) && s->ssaid - ((jl_ssavalue_t*)v)->id < 256) {
+        write_uint8(s->s, TAG_NEARBYSSAVALUE);
+        write_uint8(s->s, s->ssaid - ((jl_ssavalue_t*)v)->id);
+    }
     else if (jl_is_ssavalue(v) && ((jl_ssavalue_t*)v)->id < 256 && ((jl_ssavalue_t*)v)->id >= 0) {
         write_uint8(s->s, TAG_SSAVALUE);
         write_uint8(s->s, ((jl_ssavalue_t*)v)->id);
@@ -203,7 +326,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     }
     else if (jl_is_expr(v)) {
         jl_expr_t *e = (jl_expr_t*)v;
-        size_t l = jl_array_len(e->args);
+        size_t l = jl_array_nrows(e->args);
         if (e->head == jl_call_sym) {
             if (l == 2) {
                 write_uint8(s->s, TAG_CALL1);
@@ -235,31 +358,34 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     else if (jl_is_phinode(v)) {
         jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(v, 0);
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(v, 1);
-        size_t l = jl_array_len(edges);
-        if (l <= 255 && jl_array_len(values) == l) {
+        size_t l = jl_array_nrows(edges);
+        if (l <= 255 && jl_array_nrows(values) == l) {
             write_uint8(s->s, TAG_PHINODE);
             write_uint8(s->s, (uint8_t)l);
         }
         else {
             write_uint8(s->s, TAG_LONG_PHINODE);
             write_int32(s->s, l);
-            write_int32(s->s, jl_array_len(values));
+            write_int32(s->s, jl_array_nrows(values));
         }
         for (i = 0; i < l; i++) {
-            int32_t e = ((int32_t*)jl_array_data(edges))[i];
-            if (e <= 20)
-                jl_encode_value(s, jl_box_int32(e));
+            int32_t e = jl_array_data(edges, int32_t)[i];
+            if (e <= 0 && e <= 20) { // 1-byte encodings
+                jl_value_t *ebox = jl_box_int32(e);
+                JL_GC_PROMISE_ROOTED(ebox);
+                jl_encode_value(s, ebox);
+            }
             else
                 jl_encode_int32(s, e);
         }
-        l = jl_array_len(values);
+        l = jl_array_nrows(values);
         for (i = 0; i < l; i++) {
             jl_encode_value(s, jl_array_ptr_ref(values, i));
         }
     }
     else if (jl_is_phicnode(v)) {
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(v, 0);
-        size_t l = jl_array_len(values);
+        size_t l = jl_array_nrows(values);
         if (l <= 255) {
             write_uint8(s->s, TAG_PHICNODE);
             write_uint8(s->s, (uint8_t)l);
@@ -274,20 +400,39 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     }
     else if (jl_is_gotonode(v)) {
         write_uint8(s->s, TAG_GOTONODE);
-        jl_encode_value(s, jl_get_nth_field(v, 0));
+        jl_value_t *f  = jl_get_nth_field(v, 0);
+        JL_GC_PUSH1(&f);
+        jl_encode_value(s, f);
+        JL_GC_POP();
     }
     else if (jl_is_gotoifnot(v)) {
         write_uint8(s->s, TAG_GOTOIFNOT);
-        jl_encode_value(s, jl_get_nth_field(v, 0));
-        jl_encode_value(s, jl_get_nth_field(v, 1));
+        jl_value_t *f  = jl_get_nth_field_noalloc(v, 0);
+        JL_GC_PUSH1(&f);
+        jl_encode_value(s, f);
+        f  = jl_get_nth_field(v, 1);
+        jl_encode_value(s, f);
+        JL_GC_POP();
+    }
+    else if (jl_is_enternode(v)) {
+        write_uint8(s->s, TAG_ENTERNODE);
+        jl_value_t *f  = jl_get_nth_field(v, 0);
+        JL_GC_PUSH1(&f);
+        jl_encode_value(s, f);
+        f  = jl_get_nth_field_noalloc(v, 1);
+        jl_encode_value(s, f);
+        JL_GC_POP();
     }
     else if (jl_is_argument(v)) {
         write_uint8(s->s, TAG_ARGUMENT);
-        jl_encode_value(s, jl_get_nth_field(v, 0));
+        jl_value_t *f  = jl_get_nth_field(v, 0);
+        JL_GC_PUSH1(&f);
+        jl_encode_value(s, f);
+        JL_GC_POP();
     }
     else if (jl_is_returnnode(v)) {
         write_uint8(s->s, TAG_RETURNNODE);
-        jl_encode_value(s, jl_get_nth_field(v, 0));
+        jl_encode_value(s, jl_returnnode_value(v));
     }
     else if (jl_is_quotenode(v)) {
         write_uint8(s->s, TAG_QUOTENODE);
@@ -321,11 +466,6 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_uint8(s->s, TAG_UINT8);
         write_int8(s->s, *(int8_t*)jl_data_ptr(v));
     }
-    else if (jl_typetagis(v, jl_lineinfonode_type)) {
-        write_uint8(s->s, TAG_LINEINFO);
-        for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++)
-            jl_encode_value(s, jl_get_nth_field(v, i));
-    }
     else if (((jl_datatype_t*)jl_typeof(v))->instance == v) {
         write_uint8(s->s, TAG_SINGLETON);
         jl_encode_value(s, jl_typeof(v));
@@ -335,68 +475,41 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_int32(s->s, jl_string_len(v));
         ios_write(s->s, jl_string_data(v), jl_string_len(v));
     }
-    else if (as_literal && jl_is_array(v)) {
+    else if (as_literal && jl_is_array(v) && jl_array_ndims(v)) {
         jl_array_t *ar = (jl_array_t*)v;
-        jl_value_t *et = jl_tparam0(jl_typeof(ar));
-        int isunion = jl_is_uniontype(et);
-        if (ar->flags.ndims == 1 && ar->elsize <= 0x1f) {
-            write_uint8(s->s, TAG_ARRAY1D);
-            write_uint8(s->s, (ar->flags.ptrarray << 7) | (ar->flags.hasptr << 6) | (isunion << 5) | (ar->elsize & 0x1f));
-        }
-        else {
-            write_uint8(s->s, TAG_ARRAY);
-            write_uint16(s->s, ar->flags.ndims);
-            write_uint16(s->s, (ar->flags.ptrarray << 15) | (ar->flags.hasptr << 14) | (isunion << 13) | (ar->elsize & 0x1fff));
-        }
-        for (i = 0; i < ar->flags.ndims; i++)
-            jl_encode_value(s, jl_box_long(jl_array_dim(ar,i)));
+        write_uint8(s->s, TAG_ARRAY1D);
+        size_t l = jl_array_dim0(ar);
+        jl_value_t *lbox = jl_box_long(l);
+        JL_GC_PUSH1(&lbox);
+        jl_encode_value(s, lbox);
+        JL_GC_POP();
         jl_encode_value(s, jl_typeof(ar));
-        size_t l = jl_array_len(ar);
-        if (ar->flags.ptrarray) {
-            for (i = 0; i < l; i++) {
-                jl_value_t *e = jl_array_ptr_ref(v, i);
-                jl_encode_value(s, e);
-            }
-        }
-        else if (ar->flags.hasptr) {
-            const char *data = (const char*)jl_array_data(ar);
-            uint16_t elsz = ar->elsize;
-            size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
-            for (i = 0; i < l; i++) {
-                const char *start = data;
-                for (j = 0; j < np; j++) {
-                    uint32_t ptr = jl_ptr_offset((jl_datatype_t*)et, j);
-                    const jl_value_t *const *fld = &((const jl_value_t *const *)data)[ptr];
-                    if ((const char*)fld != start)
-                        ios_write(s->s, start, (const char*)fld - start);
-                    JL_GC_PROMISE_ROOTED(*fld);
-                    jl_encode_value(s, *fld);
-                    start = (const char*)&fld[1];
-                }
-                data += elsz;
-                if (data != start)
-                    ios_write(s->s, start, data - start);
-            }
-        }
-        else {
-            ios_write(s->s, (char*)jl_array_data(ar), l * ar->elsize);
-            if (jl_array_isbitsunion(ar))
-                ios_write(s->s, jl_array_typetagdata(ar), l);
-        }
+        const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(ar->ref.mem))->layout;
+        size_t offset;
+        if (layout->flags.arrayelem_isunion || layout->size == 0)
+            offset = (uintptr_t)ar->ref.ptr_or_offset;
+        else
+            offset = ((char*)ar->ref.ptr_or_offset - (char*)ar->ref.mem->ptr) / layout->size;
+        jl_encode_memory_slice(s, ar->ref.mem, offset, l);
+    }
+    else if (as_literal && jl_is_genericmemory(v)) {
+        jl_genericmemory_t* m = (jl_genericmemory_t*)v;
+        write_uint8(s->s, TAG_MEMORYT);
+        jl_encode_value(s, (jl_datatype_t*)jl_typetagof(v));
+        jl_value_t *lbox = jl_box_long(m->length);
+        JL_GC_PUSH1(&lbox);
+        jl_encode_value(s, lbox);
+        JL_GC_POP();
+        jl_encode_memory_slice(s, m, 0, m->length);
+    }
+    else if (as_literal && jl_is_layout_opaque(((jl_datatype_t*)jl_typeof(v))->layout)) {
+        assert(0 && "not legal to store this as literal");
     }
     else if (as_literal || jl_is_uniontype(v) || jl_is_newvarnode(v) || jl_is_linenode(v) ||
              jl_is_upsilonnode(v) || jl_is_pinode(v) || jl_is_slotnumber(v) || jl_is_ssavalue(v) ||
              (jl_isbits(jl_typeof(v)) && jl_datatype_size(jl_typeof(v)) <= 64)) {
+        write_uint8(s->s, TAG_GENERAL);
         jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
-        size_t tsz = jl_datatype_size(t);
-        if (tsz <= 255) {
-            write_uint8(s->s, TAG_SHORT_GENERAL);
-            write_uint8(s->s, tsz);
-        }
-        else {
-            write_uint8(s->s, TAG_GENERAL);
-            write_int32(s->s, tsz);
-        }
         jl_encode_value(s, t);
 
         char *data = (char*)jl_data_ptr(v);
@@ -434,105 +547,115 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     }
 }
 
-static jl_code_info_flags_t code_info_flags(uint8_t inferred, uint8_t propagate_inbounds, uint8_t has_fcall,
-                                            uint8_t nospecializeinfer, uint8_t inlining, uint8_t constprop)
+static jl_code_info_flags_t code_info_flags(uint8_t propagate_inbounds, uint8_t has_fcall, uint8_t has_image_globalref,
+                                            uint8_t nospecializeinfer, uint8_t isva,
+                                            uint8_t inlining, uint8_t constprop, uint8_t nargsmatchesmethod,
+                                            jl_array_t *ssaflags)
 {
     jl_code_info_flags_t flags;
-    flags.bits.inferred = inferred;
     flags.bits.propagate_inbounds = propagate_inbounds;
     flags.bits.has_fcall = has_fcall;
+    flags.bits.has_image_globalref = has_image_globalref;
     flags.bits.nospecializeinfer = nospecializeinfer;
+    flags.bits.isva = isva;
     flags.bits.inlining = inlining;
     flags.bits.constprop = constprop;
+    flags.bits.nargsmatchesmethod = nargsmatchesmethod;
+    flags.bits.has_ssaflags = 0;
+    const uint32_t *ssaflag_data = jl_array_data(ssaflags, uint32_t);
+    for (size_t i = 0, l = jl_array_dim0(ssaflags); i < l; i++)
+        if (ssaflag_data[i])
+            flags.bits.has_ssaflags = 1;
     return flags;
 }
 
 // --- decoding ---
 
-static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED;
+static jl_value_t *jl_decode_value(jl_ircode_state *s);
 
-static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag)
 {
     size_t i, len;
     if (tag == TAG_SVEC)
         len = read_uint8(s->s);
     else
         len = read_int32(s->s);
-    jl_svec_t *sv = jl_alloc_svec_uninit(len);
-    jl_value_t **data = jl_svec_data(sv);
-    for (i = 0; i < len; i++) {
-        data[i] = jl_decode_value(s);
-    }
+    jl_svec_t *sv = jl_alloc_svec(len);
+    JL_GC_PUSH1(&sv);
+    for (i = 0; i < len; i++)
+        jl_svecset(sv, i, jl_decode_value(s));
+    JL_GC_POP();
     return (jl_value_t*)sv;
 }
 
-static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_genericmemory_t *jl_decode_value_memory(jl_ircode_state *s, jl_value_t *mty, size_t nel)
 {
-    int16_t i, ndims;
-    int isptr, isunion, hasptr, elsize;
-    if (tag == TAG_ARRAY1D) {
-        ndims = 1;
-        elsize = read_uint8(s->s);
-        isptr = (elsize >> 7) & 1;
-        hasptr = (elsize >> 6) & 1;
-        isunion = (elsize >> 5) & 1;
-        elsize = elsize & 0x1f;
-    }
-    else {
-        ndims = read_uint16(s->s);
-        elsize = read_uint16(s->s);
-        isptr = (elsize >> 15) & 1;
-        hasptr = (elsize >> 14) & 1;
-        isunion = (elsize >> 13) & 1;
-        elsize = elsize & 0x1fff;
-    }
-    size_t *dims = (size_t*)alloca(ndims * sizeof(size_t));
-    for (i = 0; i < ndims; i++) {
-        dims[i] = jl_unbox_long(jl_decode_value(s));
-    }
-    jl_array_t *a = jl_new_array_for_deserialization(
-            (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize);
-    jl_value_t *aty = jl_decode_value(s);
-    jl_set_typeof(a, aty);
-    if (a->flags.ptrarray) {
-        jl_value_t **data = (jl_value_t**)jl_array_data(a);
-        size_t i, numel = jl_array_len(a);
+    jl_genericmemory_t *m = jl_alloc_genericmemory(mty, nel);
+    JL_GC_PUSH1(&m);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty)->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        jl_value_t **data = (jl_value_t**)m->ptr;
+        size_t i, numel = m->length;
         for (i = 0; i < numel; i++) {
             data[i] = jl_decode_value(s);
+            jl_gc_wb(m, data[i]);
         }
-        assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled
-    }
-    else if (a->flags.hasptr) {
-        size_t i, numel = jl_array_len(a);
-        char *data = (char*)jl_array_data(a);
-        uint16_t elsz = a->elsize;
-        jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
-        size_t j, np = et->layout->npointers;
+    }
+    else if (layout->first_ptr >= 0) {
+        size_t i, numel = m->length;
+        char *data = (char*)m->ptr;
+        uint16_t elsz = layout->size;
+        size_t j, np = layout->npointers;
         for (i = 0; i < numel; i++) {
             char *start = data;
             for (j = 0; j < np; j++) {
-                uint32_t ptr = jl_ptr_offset(et, j);
+                uint32_t ptr = jl_ptr_offset((jl_datatype_t*)mty, j);
                 jl_value_t **fld = &((jl_value_t**)data)[ptr];
                 if ((char*)fld != start)
                     ios_readall(s->s, start, (const char*)fld - start);
                 *fld = jl_decode_value(s);
+                jl_gc_wb(m, fld);
                 start = (char*)&fld[1];
             }
             data += elsz;
             if (data != start)
                 ios_readall(s->s, start, data - start);
         }
-        assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled
     }
     else {
-        size_t extra = jl_array_isbitsunion(a) ? jl_array_len(a) : 0;
-        size_t tot = jl_array_len(a) * a->elsize + extra;
-        ios_readall(s->s, (char*)jl_array_data(a), tot);
+        size_t extra = jl_genericmemory_isbitsunion(m) ? m->length : 0;
+        size_t tot = m->length * layout->size + extra;
+        ios_readall(s->s, (char*)m->ptr, tot);
     }
+    JL_GC_POP();
+    return m;
+}
+
+JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims);
+
+static jl_value_t *jl_decode_value_array1d(jl_ircode_state *s, uint8_t tag)
+{
+    int16_t ndims = 1;
+    size_t dim0 = jl_unbox_long(jl_decode_value(s));
+    size_t len = dim0;
+    jl_value_t *aty = jl_decode_value(s);
+    JL_GC_PROMISE_ROOTED(aty); // (JL_ALWAYS_LEAFTYPE)
+    jl_genericmemory_t *mem = jl_decode_value_memory(s, jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)aty, 0), 1), len);
+    JL_GC_PUSH1(&mem);
+    int tsz = sizeof(jl_array_t) + ndims*sizeof(size_t);
+    jl_array_t *a = (jl_array_t*)jl_gc_alloc(s->ptls, tsz, aty);
+    a->ref.mem = mem;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout;
+    if (layout->flags.arrayelem_isunion || layout->size == 0)
+        a->ref.ptr_or_offset = (void*)0;
+    else
+        a->ref.ptr_or_offset = a->ref.mem->ptr;
+    a->dimsize[0] = dim0;
+    JL_GC_POP();
     return (jl_value_t*)a;
 }
 
-static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag)
 {
     size_t i, len;
     jl_sym_t *head = NULL;
@@ -553,14 +676,18 @@ static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_D
     if (head == NULL)
         head = (jl_sym_t*)jl_decode_value(s);
     jl_expr_t *e = jl_exprn(head, len);
-    jl_value_t **data = (jl_value_t**)(e->args->data);
+    JL_GC_PUSH1(&e);
+    jl_value_t **data = jl_array_ptr_data(e->args);
+    jl_value_t *owner = jl_array_owner(e->args);
     for (i = 0; i < len; i++) {
         data[i] = jl_decode_value(s);
+        jl_gc_wb(owner, data[i]);
     }
+    JL_GC_POP();
     return (jl_value_t*)e;
 }
 
-static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag)
 {
     size_t i, len_e, len_v;
     if (tag == TAG_PHINODE) {
@@ -570,21 +697,27 @@ static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DI
         len_e = read_int32(s->s);
         len_v = read_int32(s->s);
     }
-    jl_array_t *e = jl_alloc_array_1d(jl_array_int32_type, len_e);
-    jl_array_t *v = jl_alloc_vec_any(len_v);
-    jl_value_t *phi = jl_new_struct(jl_phinode_type, e, v);
-    int32_t *data_e = (int32_t*)(e->data);
+    jl_array_t *e = NULL;
+    jl_array_t *v = NULL;
+    jl_value_t *phi = NULL;
+    JL_GC_PUSH3(&e, &v, &phi);
+    e = jl_alloc_array_1d(jl_array_int32_type, len_e);
+    v = jl_alloc_vec_any(len_v);
+    phi = jl_new_struct(jl_phinode_type, e, v);
+    int32_t *data_e = jl_array_data(e, int32_t);
     for (i = 0; i < len_e; i++) {
         data_e[i] = jl_unbox_int32(jl_decode_value(s));
     }
-    jl_value_t **data_v = (jl_value_t**)(v->data);
+    jl_value_t **data_v = jl_array_ptr_data(v);
     for (i = 0; i < len_v; i++) {
         data_v[i] = jl_decode_value(s);
+        jl_gc_wb(jl_array_owner(v), data_v[i]);
     }
+    JL_GC_POP();
     return phi;
 }
 
-static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag)
 {
     size_t i, len;
     if (tag == TAG_PHICNODE)
@@ -592,41 +725,53 @@ static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) JL_GC_D
     else
         len = read_int32(s->s);
     jl_array_t *v = jl_alloc_vec_any(len);
-    jl_value_t *phic = jl_new_struct(jl_phicnode_type, v);
-    jl_value_t **data = (jl_value_t**)(v->data);
+    jl_value_t *phic = (jl_value_t*)v;
+    JL_GC_PUSH1(&phic);
+    phic = jl_new_struct(jl_phicnode_type, v);
+    jl_value_t **data = jl_array_ptr_data(v);
     for (i = 0; i < len; i++) {
         data[i] = jl_decode_value(s);
+        jl_gc_wb(jl_array_owner(v), data[i]);
     }
+    JL_GC_POP();
     return phic;
 }
 
-static jl_value_t *jl_decode_value_globalref(jl_ircode_state *s) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_globalref(jl_ircode_state *s)
 {
-    jl_value_t *mod = jl_decode_value(s);
-    jl_value_t *var = jl_decode_value(s);
-    return jl_module_globalref((jl_module_t*)mod, (jl_sym_t*)var);
+    jl_module_t *mod = (jl_module_t*)jl_decode_value(s);
+    JL_GC_PROMISE_ROOTED(mod);
+    jl_sym_t *var = (jl_sym_t*)jl_decode_value(s);
+    JL_GC_PROMISE_ROOTED(var);
+    return jl_module_globalref(mod, var);
 }
 
-static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_any(jl_ircode_state *s)
 {
-    int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s));
-    jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL);
-    jl_set_typeof(v, (void*)(intptr_t)0xf50);
     jl_datatype_t *dt = (jl_datatype_t*)jl_decode_value(s);
-    if (dt->smalltag)
+    JL_GC_PROMISE_ROOTED(dt); // (JL_ALWAYS_LEAFTYPE)
+    // jl_new_struct_uninit
+    size_t sz = jl_datatype_size(dt);
+    jl_value_t *v = jl_gc_alloc(s->ptls, sz, dt);
+    if (dt->smalltag) // TODO: do we need this?
         jl_set_typetagof(v, dt->smalltag, 0);
-    else
-        jl_set_typeof(v, dt);
     char *data = (char*)jl_data_ptr(v);
     size_t i, np = dt->layout->npointers;
     char *start = data;
-    for (i = 0; i < np; i++) {
-        uint32_t ptr = jl_ptr_offset(dt, i);
-        jl_value_t **fld = &((jl_value_t**)data)[ptr];
-        if ((char*)fld != start)
-            ios_readall(s->s, start, (const char*)fld - start);
-        *fld = jl_decode_value(s);
-        start = (char*)&fld[1];
+    if (np) {
+        if (sz > 0)
+            memset(v, 0, sz);
+        JL_GC_PUSH1(&v);
+        for (i = 0; i < np; i++) {
+            uint32_t ptr = jl_ptr_offset(dt, i);
+            jl_value_t **fld = &((jl_value_t**)data)[ptr];
+            if ((char*)fld != start)
+                ios_readall(s->s, start, (const char*)fld - start);
+            *fld = jl_decode_value(s);
+            jl_gc_wb(v, *fld);
+            start = (char*)&fld[1];
+        }
+        JL_GC_POP();
     }
     data += jl_datatype_size(dt);
     if (data != start)
@@ -634,11 +779,11 @@ static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DI
     return v;
 }
 
-static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
+static jl_value_t *jl_decode_value(jl_ircode_state *s)
 {
     assert(!ios_eof(s->s));
     jl_value_t *v;
-    size_t i, n;
+    size_t n;
     uint64_t key;
     uint8_t tag = read_uint8(s->s);
     if (tag > LAST_TAG)
@@ -649,6 +794,7 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
         tag = read_uint8(s->s);
         return jl_deser_tag(tag);
     case TAG_RELOC_METHODROOT:
+    {
         key = read_uint64(s->s);
         tag = read_uint8(s->s);
         assert(tag == TAG_METHODROOT || tag == TAG_LONG_METHODROOT);
@@ -659,10 +805,15 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
             index = read_uint32(s->s);
         assert(index >= 0);
         return lookup_root(s->method, key, index);
+    }
     case TAG_METHODROOT:
         return lookup_root(s->method, 0, read_uint8(s->s));
     case TAG_LONG_METHODROOT:
         return lookup_root(s->method, 0, read_uint32(s->s));
+    case TAG_EDGE:
+        return jl_svecref(s->edges, read_uint8(s->s));
+    case TAG_LONG_EDGE:
+        return jl_svecref(s->edges, read_uint32(s->s));
     case TAG_SVEC: JL_FALLTHROUGH; case TAG_LONG_SVEC:
         return jl_decode_value_svec(s, tag);
     case TAG_COMMONSYM:
@@ -670,14 +821,21 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     case TAG_SSAVALUE:
         v = jl_box_ssavalue(read_uint8(s->s));
         return v;
+    case TAG_NEARBYSSAVALUE:
+        v = jl_box_ssavalue(s->ssaid - read_uint8(s->s));
+        return v;
     case TAG_LONG_SSAVALUE:
         v = jl_box_ssavalue(read_uint16(s->s));
         return v;
     case TAG_SLOTNUMBER:
         v = jl_box_slotnumber(read_uint16(s->s));
         return v;
-    case TAG_ARRAY: JL_FALLTHROUGH; case TAG_ARRAY1D:
-        return jl_decode_value_array(s, tag);
+    case TAG_ARRAY1D:
+        return jl_decode_value_array1d(s, tag);
+    case TAG_MEMORYT:
+        v = jl_decode_value(s);
+        JL_GC_PROMISE_ROOTED(v); // (JL_ALWAYS_LEAFTYPE)
+        return (jl_value_t*)jl_decode_value_memory(s, v, jl_unbox_long(jl_decode_value(s)));
     case TAG_EXPR:      JL_FALLTHROUGH;
     case TAG_LONG_EXPR: JL_FALLTHROUGH;
     case TAG_CALL1:     JL_FALLTHROUGH;
@@ -688,22 +846,47 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     case TAG_PHICNODE: JL_FALLTHROUGH; case TAG_LONG_PHICNODE:
         return jl_decode_value_phic(s, tag);
     case TAG_GOTONODE: JL_FALLTHROUGH; case TAG_QUOTENODE:
+    {
         v = jl_new_struct_uninit(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type);
+        JL_GC_PUSH1(&v);
         set_nth_field(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type, v, 0, jl_decode_value(s), 0);
+        JL_GC_POP();
         return v;
+    }
     case TAG_GOTOIFNOT:
+    {
         v = jl_new_struct_uninit(jl_gotoifnot_type);
+        JL_GC_PUSH1(&v);
         set_nth_field(jl_gotoifnot_type, v, 0, jl_decode_value(s), 0);
         set_nth_field(jl_gotoifnot_type, v, 1, jl_decode_value(s), 0);
+        JL_GC_POP();
         return v;
+    }
+    case TAG_ENTERNODE:
+    {
+        v = jl_new_struct_uninit(jl_enternode_type);
+        JL_GC_PUSH1(&v);
+        set_nth_field(jl_enternode_type, v, 0, jl_decode_value(s), 0);
+        set_nth_field(jl_enternode_type, v, 1, jl_decode_value(s), 0);
+        JL_GC_POP();
+        return v;
+    }
     case TAG_ARGUMENT:
+    {
         v = jl_new_struct_uninit(jl_argument_type);
+        JL_GC_PUSH1(&v);
         set_nth_field(jl_argument_type, v, 0, jl_decode_value(s), 0);
+        JL_GC_POP();
         return v;
+    }
     case TAG_RETURNNODE:
+    {
         v = jl_new_struct_uninit(jl_returnnode_type);
+        JL_GC_PUSH1(&v);
         set_nth_field(jl_returnnode_type, v, 0, jl_decode_value(s), 0);
+        JL_GC_POP();
         return v;
+    }
     case TAG_SHORTER_INT64:
         v = jl_box_int64((int16_t)read_uint16(s->s));
         return v;
@@ -722,9 +905,14 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     case TAG_UINT8:
         return jl_box_uint8(read_uint8(s->s));
     case TAG_NEARBYGLOBAL:
-        assert(s->method != NULL);
+    {
+        jl_method_t *m = s->method;
+        assert(m != NULL);
+        JL_GC_PROMISE_ROOTED(m);
         v = jl_decode_value(s);
-        return jl_module_globalref(s->method->module, (jl_sym_t*)v);
+        JL_GC_PROMISE_ROOTED(v); // symbol
+        return jl_module_globalref(m->module, (jl_sym_t*)v);
+    }
     case TAG_NEARBYMODULE:
         assert(s->method != NULL);
         return (jl_value_t*)s->method->module;
@@ -737,26 +925,29 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     case TAG_BASE:
         return (jl_value_t*)jl_base_module;
     case TAG_VECTORTY:
+    {
         v = jl_decode_value(s);
-        return jl_apply_type2((jl_value_t*)jl_array_type, v, jl_box_long(1));
+        JL_GC_PUSH1(&v);
+        v = jl_apply_type2((jl_value_t*)jl_array_type, v, jl_box_long(1));
+        JL_GC_POP();
+        return v;
+    }
     case TAG_PTRTY:
+    {
         v = jl_decode_value(s);
-        return jl_apply_type1((jl_value_t*)jl_pointer_type, v);
+        JL_GC_PUSH1(&v);
+        v = jl_apply_type1((jl_value_t*)jl_pointer_type, v);
+        JL_GC_POP();
+        return v;
+    }
     case TAG_STRING:
         n = read_int32(s->s);
         v = jl_alloc_string(n);
         ios_readall(s->s, jl_string_data(v), n);
         return v;
-    case TAG_LINEINFO:
-        v = jl_new_struct_uninit(jl_lineinfonode_type);
-        for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++) {
-            //size_t offs = jl_field_offset(jl_lineinfonode_type, i);
-            set_nth_field(jl_lineinfonode_type, v, i, jl_decode_value(s), 0);
-        }
-        return v;
     default:
-        assert(tag == TAG_GENERAL || tag == TAG_SHORT_GENERAL);
-        return jl_decode_value_any(s, tag);
+        assert(tag == TAG_GENERAL);
+        return jl_decode_value_any(s);
     }
 }
 
@@ -764,93 +955,152 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
 
 typedef jl_value_t jl_string_t; // for local expressibility
 
+static size_t codelocs_parseheader(jl_string_t *cl, int *line_offset, int *line_bytes, int *to_bytes) JL_NOTSAFEPOINT
+{
+    if (jl_string_len(cl) == 0) {
+        *line_offset = *line_bytes = *to_bytes = 0;
+        return 0;
+    }
+    int32_t header[3];
+    memcpy(&header, (char*)jl_string_data(cl), sizeof(header));
+    *line_offset = header[0];
+    if (header[1] < 255)
+        *line_bytes = 1;
+    else if (header[1] < 65535)
+        *line_bytes = 2;
+    else
+        *line_bytes = 4;
+    if (header[2] == 0)
+        *to_bytes = 0;
+    else if (header[2] < 255)
+        *to_bytes = 1;
+    else if (header[2] < 65535)
+        *to_bytes = 2;
+    else
+        *to_bytes = 4;
+    assert(jl_string_len(cl) >= sizeof(header) + *line_bytes);
+    return (jl_string_len(cl) - sizeof(header) - *line_bytes) / (*line_bytes + *to_bytes * 2); // compute nstmts
+}
+#ifndef NDEBUG
+static int codelocs_nstmts(jl_string_t *cl) JL_NOTSAFEPOINT
+{
+    int line_offset, line_bytes, to_bytes;
+    return codelocs_parseheader(cl, &line_offset, &line_bytes, &to_bytes);
+}
+#endif
+
+#define IR_DATASIZE_FLAGS         sizeof(uint16_t)
+#define IR_DATASIZE_PURITY        sizeof(uint16_t)
+#define IR_DATASIZE_INLINING_COST sizeof(uint8_t)
+#define IR_DATASIZE_NSLOTS        sizeof(int32_t)
+typedef enum {
+    ir_offset_flags         = 0,
+    ir_offset_purity        = 0 + IR_DATASIZE_FLAGS,
+    ir_offset_inlining_cost = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY,
+    ir_offset_nslots        = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY + IR_DATASIZE_INLINING_COST,
+    ir_offset_slotflags     = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY + IR_DATASIZE_INLINING_COST + IR_DATASIZE_NSLOTS
+} ir_offset;
+
+// static_assert is technically a declaration, so shenanigans are required to
+// open an inline declaration context. `sizeof` is the traditional way to do this,
+// but this pattern is illegal in C++, which some compilers warn about, so use
+// `offsetof` instead.
+#define declaration_context(what) (void)offsetof(struct{what; int dummy_;}, dummy_)
+
+// Checks (at compile time) that sizeof(data) == macro_size
+#define checked_size(data, macro_size) \
+    (declaration_context(static_assert(sizeof(data) == macro_size, #macro_size " does not match written size")), data)
+
 JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
 {
     JL_TIMING(AST_COMPRESS, AST_COMPRESS);
     JL_LOCK(&m->writelock); // protect the roots array (Might GC)
+    int isdef = code == NULL;
+    if (isdef)
+        code = (jl_code_info_t*)m->source;
     assert(jl_is_method(m));
     assert(jl_is_code_info(code));
+    assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0);
     ios_t dest;
     ios_mem(&dest, 0);
-    int en = jl_gc_enable(0); // Might GC
-    size_t i;
 
     if (m->roots == NULL) {
         m->roots = jl_alloc_vec_any(0);
         jl_gc_wb(m, m->roots);
     }
+    jl_value_t *edges = code->edges;
     jl_ircode_state s = {
         &dest,
+        0,
         m,
+        (!isdef && jl_is_svec(edges)) ? (jl_svec_t*)edges : jl_emptysvec,
         jl_current_task->ptls,
         1
     };
 
-    jl_code_info_flags_t flags = code_info_flags(code->inferred, code->propagate_inbounds, code->has_fcall,
-                                                 code->nospecializeinfer, code->inlining, code->constprop);
-    write_uint8(s.s, flags.packed);
-    write_uint8(s.s, code->purity.bits);
-    write_uint16(s.s, code->inlining_cost);
-
-    size_t nslots = jl_array_len(code->slotflags);
+    uint8_t nargsmatchesmethod = code->nargs == m->nargs;
+    jl_code_info_flags_t flags = code_info_flags(code->propagate_inbounds, code->has_fcall, code->has_image_globalref,
+                                                 code->nospecializeinfer, code->isva,
+                                                 code->inlining, code->constprop,
+                                                 nargsmatchesmethod,
+                                                 code->ssaflags);
+    write_uint16(s.s, checked_size(flags.packed, IR_DATASIZE_FLAGS));
+    write_uint16(s.s, checked_size(code->purity.bits, IR_DATASIZE_PURITY));
+    write_uint8(s.s, checked_size(jl_encode_inlining_cost(code->inlining_cost), IR_DATASIZE_INLINING_COST));
+
+    size_t nslots = jl_array_nrows(code->slotflags);
     assert(nslots >= m->nargs && nslots < INT32_MAX); // required by generated functions
-    write_int32(s.s, nslots);
-    ios_write(s.s, (char*)jl_array_data(code->slotflags), nslots);
+    write_int32(s.s, checked_size((int32_t)nslots, IR_DATASIZE_NSLOTS));
+    ios_write(s.s, jl_array_data(code->slotflags, const char), nslots);
 
     // N.B.: The layout of everything before this point is explicitly referenced
     // by the various jl_ir_ accessors. Make sure to adjust those if you change
     // the data layout.
-
-    for (i = 0; i < 6; i++) {
-        int copy = 1;
-        if (i == 1) { // skip codelocs
-            assert(jl_field_offset(jl_code_info_type, i) == offsetof(jl_code_info_t, codelocs));
-            continue;
-        }
-        if (i == 4) { // don't copy contents of method_for_inference_limit_heuristics field
-            assert(jl_field_offset(jl_code_info_type, i) == offsetof(jl_code_info_t, method_for_inference_limit_heuristics));
-            copy = 0;
-        }
-        jl_encode_value_(&s, jl_get_nth_field((jl_value_t*)code, i), copy);
-    }
+    if (!nargsmatchesmethod) {
+        size_t nargs = code->nargs;
+        assert(nargs < INT32_MAX);
+        write_int32(s.s, (int32_t)nargs);
+    }
+
+    size_t i, l = jl_array_dim0(code->code);
+    write_uint64(s.s, l);
+    for (i = 0; i < l; i++) {
+        s.ssaid = i;
+        jl_encode_value(&s, jl_array_ptr_ref(code->code, i));
+    }
+    s.ssaid = 0;
+    jl_encode_value_(&s, (jl_value_t*)code->ssavaluetypes, 1);
+    assert(jl_typetagis(code->ssaflags, jl_array_uint32_type));
+    assert(jl_array_dim0(code->ssaflags) == l);
+    const uint32_t *ssaflags_data = jl_array_data(code->ssaflags, uint32_t);
+    if (flags.bits.has_ssaflags)
+        ios_write(s.s, (const char*)ssaflags_data, l * sizeof(*ssaflags_data));
 
     // For opaque closure, also save the slottypes. We technically only need the first slot type,
     // but this is simpler for now. We may want to refactor where this gets stored in the future.
     if (m->is_for_opaque_closure)
         jl_encode_value_(&s, code->slottypes, 1);
 
-    if (m->generator)
+    jl_string_t *v = NULL;
+    JL_GC_PUSH1(&v);
+    // Slotnames. For regular methods, we require that m->slot_syms matches the
+    // CodeInfo's slotnames, so we do not need to save it here.
+    if (m->generator) {
         // can't optimize generated functions
-        jl_encode_value_(&s, (jl_value_t*)jl_compress_argnames(code->slotnames), 1);
-    else
-        jl_encode_value(&s, jl_nothing);
-
-    size_t nstmt = jl_array_len(code->code);
-    assert(nstmt == jl_array_len(code->codelocs));
-    if (jl_array_len(code->linetable) < 256) {
-        for (i = 0; i < nstmt; i++) {
-            write_uint8(s.s, ((int32_t*)jl_array_data(code->codelocs))[i]);
-        }
-    }
-    else if (jl_array_len(code->linetable) < 65536) {
-        for (i = 0; i < nstmt; i++) {
-            write_uint16(s.s, ((int32_t*)jl_array_data(code->codelocs))[i]);
-        }
+        v = jl_compress_argnames(code->slotnames);
+        jl_encode_value_(&s, (jl_value_t*)v, 1);
     }
     else {
-        ios_write(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
+        jl_encode_value(&s, jl_nothing);
     }
 
     write_uint8(s.s, s.relocatability);
 
     ios_flush(s.s);
-    jl_string_t *v = jl_pchar_to_string(s.s->buf, s.s->size);
+    v = jl_pchar_to_string(s.s->buf, s.s->size);
     ios_close(s.s);
-    if (jl_array_len(m->roots) == 0) {
+    if (jl_array_nrows(m->roots) == 0)
         m->roots = NULL;
-    }
-    JL_GC_PUSH1(&v);
-    jl_gc_enable(en);
     JL_UNLOCK(&m->writelock); // Might GC
     JL_GC_POP();
 
@@ -861,105 +1111,116 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
 {
     if (jl_is_code_info(data))
         return (jl_code_info_t*)data;
+    if (!jl_is_string(data))
+        return (jl_code_info_t*)jl_nothing;
     JL_TIMING(AST_UNCOMPRESS, AST_UNCOMPRESS);
     JL_LOCK(&m->writelock); // protect the roots array (Might GC)
     assert(jl_is_method(m));
     assert(jl_is_string(data));
-    size_t i;
     ios_t src;
     ios_mem(&src, 0);
     ios_setbuf(&src, (char*)jl_string_data(data), jl_string_len(data), 0);
     src.size = jl_string_len(data);
-    int en = jl_gc_enable(0); // Might GC
     jl_ircode_state s = {
         &src,
+        0,
         m,
+        metadata == NULL ? NULL : jl_atomic_load_relaxed(&metadata->edges),
         jl_current_task->ptls,
         1
     };
-
     jl_code_info_t *code = jl_new_code_info_uninit();
+    jl_value_t *slotnames = NULL;
+    JL_GC_PUSH2(&code, &slotnames);
+
     jl_code_info_flags_t flags;
-    flags.packed = read_uint8(s.s);
+    flags.packed = read_uint16(s.s);
     code->inlining = flags.bits.inlining;
     code->constprop = flags.bits.constprop;
-    code->inferred = flags.bits.inferred;
     code->propagate_inbounds = flags.bits.propagate_inbounds;
     code->has_fcall = flags.bits.has_fcall;
+    code->has_image_globalref = flags.bits.has_image_globalref;
     code->nospecializeinfer = flags.bits.nospecializeinfer;
-    code->purity.bits = read_uint8(s.s);
-    code->inlining_cost = read_uint16(s.s);
+    code->isva = flags.bits.isva;
+    code->purity.bits = read_uint16(s.s);
+    code->inlining_cost = jl_decode_inlining_cost(read_uint8(s.s));
 
-    size_t nslots = read_int32(&src);
+    size_t nslots = read_int32(s.s);
     code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots);
-    ios_readall(s.s, (char*)jl_array_data(code->slotflags), nslots);
+    jl_gc_wb(code, code->slotflags);
+    ios_readall(s.s, jl_array_data(code->slotflags, char), nslots);
+
+    if (flags.bits.nargsmatchesmethod) {
+        code->nargs = m->nargs;
+    } else {
+        code->nargs = read_int32(s.s);
+    }
+
+    size_t i, l = read_uint64(s.s);
+    code->code = jl_alloc_array_1d(jl_array_any_type, l);
+    jl_gc_wb(code, code->code);
+    for (i = 0; i < l; i++) {
+        s.ssaid = i;
+        jl_array_ptr_set(code->code, i, jl_decode_value(&s));
+    }
+    s.ssaid = 0;
+    code->ssavaluetypes = jl_decode_value(&s);
+    jl_gc_wb(code, code->ssavaluetypes);
+    code->ssaflags = jl_alloc_array_1d(jl_array_uint32_type, l);
+    jl_gc_wb(code, code->ssaflags);
+    uint32_t *ssaflags_data = jl_array_data(code->ssaflags, uint32_t);
+    if (flags.bits.has_ssaflags)
+        ios_readall(s.s, (char*)ssaflags_data, l * sizeof(*ssaflags_data));
+    else
+        memset(ssaflags_data, 0, l * sizeof(*ssaflags_data));
 
-    for (i = 0; i < 6; i++) {
-        if (i == 1)  // skip codelocs
-            continue;
-        assert(jl_field_isptr(jl_code_info_type, i));
-        jl_value_t **fld = (jl_value_t**)((char*)jl_data_ptr(code) + jl_field_offset(jl_code_info_type, i));
-        *fld = jl_decode_value(&s);
-    }
-    if (m->is_for_opaque_closure)
+    if (m->is_for_opaque_closure) {
         code->slottypes = jl_decode_value(&s);
+        jl_gc_wb(code, code->slottypes);
+    }
 
-    jl_value_t *slotnames = jl_decode_value(&s);
+    slotnames = jl_decode_value(&s);
     if (!jl_is_string(slotnames))
         slotnames = m->slot_syms;
     code->slotnames = jl_uncompress_argnames(slotnames);
+    jl_gc_wb(code, code->slotnames);
 
-    size_t nstmt = jl_array_len(code->code);
-    code->codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nstmt);
-    if (jl_array_len(code->linetable) < 256) {
-        for (i = 0; i < nstmt; i++) {
-            ((int32_t*)jl_array_data(code->codelocs))[i] = read_uint8(s.s);
-        }
-    }
-    else if (jl_array_len(code->linetable) < 65536) {
-        for (i = 0; i < nstmt; i++) {
-            ((int32_t*)jl_array_data(code->codelocs))[i] = read_uint16(s.s);
-        }
-    }
-    else {
-        ios_readall(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
-    }
+    if (metadata)
+        code->debuginfo = jl_atomic_load_relaxed(&metadata->debuginfo);
+    else
+        code->debuginfo = m->debuginfo;
+    jl_gc_wb(code, code->debuginfo);
+    assert(code->debuginfo);
+    assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0);
 
     (void) read_uint8(s.s);   // relocatability
-
+    assert(!ios_eof(s.s));
     assert(ios_getc(s.s) == -1);
+
     ios_close(s.s);
-    JL_GC_PUSH1(&code);
-    jl_gc_enable(en);
     JL_UNLOCK(&m->writelock); // Might GC
-    JL_GC_POP();
     if (metadata) {
-        code->min_world = metadata->min_world;
-        code->max_world = metadata->max_world;
+        code->parent = jl_get_ci_mi(metadata);
+        jl_gc_wb(code, code->parent);
         code->rettype = metadata->rettype;
-        code->parent = metadata->def;
+        jl_gc_wb(code, code->rettype);
+        code->min_world = jl_atomic_load_relaxed(&metadata->min_world);
+        code->max_world = jl_atomic_load_relaxed(&metadata->max_world);
+        code->edges = (jl_value_t*)s.edges;
+        jl_gc_wb(code, s.edges);
     }
+    JL_GC_POP();
 
     return code;
 }
 
-JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_string_t *data)
-{
-    if (jl_is_code_info(data))
-        return ((jl_code_info_t*)data)->inferred;
-    assert(jl_is_string(data));
-    jl_code_info_flags_t flags;
-    flags.packed = jl_string_data(data)[0];
-    return flags.bits.inferred;
-}
-
 JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_string_t *data)
 {
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inlining;
     assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = jl_string_data(data)[0];
+    flags.packed = jl_string_data(data)[ir_offset_flags];
     return flags.bits.inlining;
 }
 
@@ -969,22 +1230,66 @@ JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_string_t *data)
         return ((jl_code_info_t*)data)->has_fcall;
     assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = jl_string_data(data)[0];
+    flags.packed = jl_string_data(data)[ir_offset_flags];
     return flags.bits.has_fcall;
 }
 
-JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_string_t *data)
+JL_DLLEXPORT uint8_t jl_ir_flag_has_image_globalref(jl_string_t *data)
 {
+    if (jl_is_code_info(data))
+        return ((jl_code_info_t*)data)->has_image_globalref;
+    assert(jl_is_string(data));
+    jl_code_info_flags_t flags;
+    flags.packed = jl_string_data(data)[ir_offset_flags];
+    return flags.bits.has_image_globalref;
+}
+
+// create a compressed u16 value with range 0..3968, 3 bits exponent, 5 bits mantissa, implicit first digit, rounding up, full accuracy over 0..63
+JL_DLLEXPORT uint8_t jl_encode_inlining_cost(uint16_t inlining_cost)
+{
+    unsigned shift = 0;
+    unsigned mantissa;
+    if (inlining_cost <= 0x1f) {
+        mantissa = inlining_cost;
+    }
+    else {
+        while (inlining_cost >> 5 >> shift != 0)
+            shift++;
+        assert(1 <= shift && shift <= 11);
+        mantissa = (inlining_cost >> (shift - 1)) & 0x1f;
+        mantissa += (inlining_cost & ((1 << (shift - 1)) - 1)) != 0; // round up if trailing bits non-zero, overflowing into exp
+    }
+    unsigned r = (shift << 5) + mantissa;
+    if (r > 0xff)
+        r = 0xff;
+    return r;
+}
+
+JL_DLLEXPORT uint16_t jl_decode_inlining_cost(uint8_t inlining_cost)
+{
+    unsigned shift = inlining_cost >> 5;
+    if (inlining_cost == 0xff)
+        return 0xffff;
+    else if (shift == 0)
+        return inlining_cost;
+    else
+        return (0x20 | (inlining_cost & 0x1f)) << (shift - 1);
+}
+
+JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_value_t *data)
+{
+    if (jl_is_uint8(data))
+        return jl_decode_inlining_cost(*(uint8_t*)data);
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inlining_cost;
     assert(jl_is_string(data));
-    uint16_t res = jl_load_unaligned_i16(jl_string_data(data) + 2);
+    uint16_t res = jl_decode_inlining_cost(*(uint8_t*)(jl_string_data(data) + ir_offset_inlining_cost));
     return res;
 }
 
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms)
 {
-    size_t nsyms = jl_array_len(syms);
+    size_t nsyms = jl_array_nrows(syms);
     size_t i, len = 0;
     for (i = 0; i < nsyms; i++) {
         jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(syms, i);
@@ -1012,11 +1317,11 @@ JL_DLLEXPORT ssize_t jl_ir_nslots(jl_value_t *data)
 {
     if (jl_is_code_info(data)) {
         jl_code_info_t *func = (jl_code_info_t*)data;
-        return jl_array_len(func->slotnames);
+        return jl_array_nrows(func->slotnames);
     }
     else {
         assert(jl_is_string(data));
-        int nslots = jl_load_unaligned_i32(jl_string_data(data) + 2 + sizeof(uint16_t));
+        int nslots = jl_load_unaligned_i32(jl_string_data(data) + ir_offset_nslots);
         return nslots;
     }
 }
@@ -1024,10 +1329,12 @@ JL_DLLEXPORT ssize_t jl_ir_nslots(jl_value_t *data)
 JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_string_t *data, size_t i)
 {
     assert(i < jl_ir_nslots(data));
-    if (jl_is_code_info(data))
-        return ((uint8_t*)((jl_code_info_t*)data)->slotflags->data)[i];
+    if (jl_is_code_info(data)) {
+        jl_array_t *slotflags = ((jl_code_info_t*)data)->slotflags;
+        return jl_array_data(slotflags, uint8_t)[i];
+    }
     assert(jl_is_string(data));
-    return jl_string_data(data)[2 + sizeof(uint16_t) + sizeof(int32_t) + i];
+    return jl_string_data(data)[ir_offset_slotflags + i];
 }
 
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms)
@@ -1073,6 +1380,244 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i)
     return jl_nothing;
 }
 
+// codelocs are compressed as follows:
+// The input vector is a NTuple{3,UInt32} (struct jl_codeloc_t)
+// The vector is scanned for min and max of the values for each element
+// The output is then allocated to hold (min-line, max-line, max-at) first, then line - min (in the smallest space), then the remainder (in the smallest space)
+static inline struct jl_codeloc_t unpack_codeloc(jl_string_t *cl, size_t pc, int line_offset, int line_bytes, int to_bytes) JL_NOTSAFEPOINT
+{
+    const char *ptr = jl_string_data(cl) + sizeof(int32_t[3]);
+    if (pc == 0)
+        to_bytes = 0;
+    else
+        ptr += line_bytes + (pc - 1) * (line_bytes + to_bytes * 2);
+    uint8_t int8;
+    uint16_t int16;
+    uint32_t int32;
+    struct jl_codeloc_t codeloc;
+    switch (line_bytes) {
+    case 0:
+        codeloc.line = 0;
+        break;
+    case 1:
+        memcpy(&int8, ptr, 1);
+        codeloc.line = int8;
+        break;
+    case 2:
+        memcpy(&int16, ptr, 2);
+        codeloc.line = int16;
+        break;
+    case 4:
+        memcpy(&int32, ptr, 4);
+        codeloc.line = int32;
+        break;
+    }
+    if (codeloc.line > 0)
+        codeloc.line += line_offset - 1;
+    ptr += line_bytes;
+    switch (to_bytes) {
+    case 0:
+        codeloc.to = 0;
+        break;
+    case 1:
+        memcpy(&int8, ptr, 1);
+        codeloc.to = int8;
+        break;
+    case 2:
+        memcpy(&int16, ptr, 2);
+        codeloc.to = int16;
+        break;
+    case 4:
+        memcpy(&int32, ptr, 4);
+        codeloc.to = int32;
+        break;
+    }
+    ptr += to_bytes;
+    switch (to_bytes) {
+    case 0:
+        codeloc.pc = 0;
+        break;
+    case 1:
+        memcpy(&int8, ptr, 1);
+        codeloc.pc = int8;
+        break;
+    case 2:
+        memcpy(&int16, ptr, 2);
+        codeloc.pc = int16;
+        break;
+    case 3:
+        memcpy(&int32, ptr, 4);
+        codeloc.pc = int32;
+        break;
+    }
+    ptr += to_bytes;
+    return codeloc;
+}
+
+
+static const struct jl_codeloc_t badloc = {-1, 0, 0};
+
+JL_DLLEXPORT struct jl_codeloc_t jl_uncompress1_codeloc(jl_string_t *cl, size_t pc) JL_NOTSAFEPOINT
+{
+    assert(jl_is_string(cl));
+    int line_offset, line_bytes, to_bytes;
+    size_t nstmts = codelocs_parseheader(cl, &line_offset, &line_bytes, &to_bytes);
+    if (pc > nstmts)
+        return badloc;
+    return unpack_codeloc(cl, pc, line_offset, line_bytes, to_bytes);
+}
+
+static int allzero(jl_value_t *codelocs) JL_NOTSAFEPOINT
+{
+    int32_t *p = jl_array_data(codelocs,int32_t);
+    int32_t *pend = p + jl_array_nrows(codelocs);
+    do {
+        if (*p)
+            return 0;
+    } while (++p < pend);
+    return 1;
+}
+
+JL_DLLEXPORT jl_string_t *jl_compress_codelocs(int32_t firstline, jl_value_t *codelocs, size_t nstmts) // firstline+Vector{Int32} => Memory{UInt8}
+{
+    assert(jl_typeis(codelocs, jl_array_int32_type));
+    if (jl_array_nrows(codelocs) == 0)
+        nstmts = 0;
+    assert(nstmts * 3 == jl_array_nrows(codelocs));
+    if (allzero(codelocs))
+        return jl_an_empty_string;
+    struct jl_codeloc_t codeloc, min, max;
+    size_t i;
+    min.line = min.to = min.pc = firstline <= 0 ? INT32_MAX : firstline;
+    max.line = max.to = max.pc = 0;
+    for (i = 0; i < nstmts; i++) {
+        memcpy(&codeloc, jl_array_data(codelocs,int32_t) + 3 * i, sizeof(codeloc));
+#define SETMIN(x) if (codeloc.x < min.x) min.x = codeloc.x
+#define SETMAX(x) if (codeloc.x > max.x) max.x = codeloc.x
+        if (codeloc.line > 0)
+            SETMIN(line);
+        SETMAX(line);
+        SETMIN(to);
+        SETMAX(to);
+        SETMIN(pc);
+        SETMAX(pc);
+#undef SETMIN
+#undef SETMAX
+    }
+    int32_t header[3];
+    header[0] = min.line > max.line ? 0 : min.line;
+    header[1] = min.line > max.line ? 0 : max.line - min.line;
+    header[2] = max.to > max.pc ? max.to : max.pc;
+    size_t line_bytes;
+    if (header[1] < 255)
+        line_bytes = 1;
+    else if (header[1] < 65535)
+        line_bytes = 2;
+    else
+        line_bytes = 4;
+    size_t to_bytes;
+    if (header[2] == 0)
+        to_bytes = 0;
+    else if (header[2] < 255)
+        to_bytes = 1;
+    else if (header[2] < 65535)
+        to_bytes = 2;
+    else
+        to_bytes = 4;
+    jl_string_t *cl = jl_alloc_string(sizeof(header) + line_bytes + nstmts * (line_bytes + to_bytes * 2));
+    // store header structure
+    memcpy(jl_string_data(cl), &header, sizeof(header));
+    // pack bytes
+    char *ptr = jl_string_data(cl) + sizeof(header);
+    uint8_t int8;
+    uint16_t int16;
+    uint32_t int32;
+    { // store firstline value
+        int8 = int16 = int32 = firstline > 0 ? firstline - header[0] + 1 : 0;
+        switch (line_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += line_bytes;
+    }
+    for (i = 0; i < nstmts; i++) {
+        memcpy(&codeloc, jl_array_data(codelocs,int32_t) + 3 * i, sizeof(codeloc));
+        int8 = int16 = int32 = codeloc.line > 0 ? codeloc.line - header[0] + 1 : 0;
+        switch (line_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += line_bytes;
+        int8 = int16 = int32 = codeloc.to;
+        switch (to_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += to_bytes;
+        int8 = int16 = int32 = codeloc.pc;
+        switch (to_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += to_bytes;
+    }
+    return cl;
+}
+
+JL_DLLEXPORT jl_value_t *jl_uncompress_codelocs(jl_string_t *cl, size_t nstmts) // Memory{UInt8} => Vector{Int32}
+{
+    assert(jl_is_string(cl));
+    int line_offset, line_bytes, to_bytes;
+    size_t nlocs = codelocs_parseheader(cl, &line_offset, &line_bytes, &to_bytes);
+    assert(nlocs == 0 || nlocs == nstmts);
+    jl_value_t *codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nstmts * 3);
+    size_t i;
+    for (i = 0; i < nlocs; i++) {
+        struct jl_codeloc_t codeloc = unpack_codeloc(cl, i + 1, line_offset, line_bytes, to_bytes);;
+        memcpy(jl_array_data(codelocs,int32_t) + i * 3, &codeloc, sizeof(codeloc));
+    }
+    if (nlocs == 0) {
+        memset(jl_array_data(codelocs,int32_t), 0, nstmts * sizeof(struct jl_codeloc_t));
+    }
+    return codelocs;
+}
+
 void jl_init_serializer(void)
 {
     jl_task_t *ct = jl_current_task;
@@ -1109,15 +1654,15 @@ void jl_init_serializer(void)
                      jl_densearray_type, jl_function_type, jl_typename_type,
                      jl_builtin_type, jl_task_type, jl_uniontype_type,
                      jl_array_any_type, jl_intrinsic_type,
-                     jl_methtable_type, jl_typemap_level_type,
                      jl_voidpointer_type, jl_newvarnode_type, jl_abstractstring_type,
                      jl_array_symbol_type, jl_anytuple_type, jl_tparam0(jl_anytuple_type),
-                     jl_emptytuple_type, jl_array_uint8_type, jl_code_info_type,
+                     jl_emptytuple_type, jl_array_uint8_type, jl_array_uint32_type, jl_code_info_type,
                      jl_typeofbottom_type, jl_typeofbottom_type->super,
                      jl_namedtuple_type, jl_array_int32_type,
                      jl_uint32_type, jl_uint64_type,
-                     jl_type_type_mt, jl_nonfunction_mt,
                      jl_opaque_closure_type,
+                     jl_memory_any_type,
+                     jl_memory_uint8_type,
 
                      ct->ptls->root_task,
 
@@ -1134,7 +1679,8 @@ void jl_init_serializer(void)
     deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type;
     deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type;
     deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type;
-    deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type;
+    deser_tag[TAG_ARRAY1D] = (jl_value_t*)jl_array_type;
+    deser_tag[TAG_MEMORYT] = (jl_value_t*)jl_genericmemory_type;
     deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type;
     deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type;
     deser_tag[TAG_PHICNODE] = (jl_value_t*)jl_phicnode_type;
@@ -1148,7 +1694,6 @@ void jl_init_serializer(void)
     deser_tag[TAG_INT32] = (jl_value_t*)jl_int32_type;
     deser_tag[TAG_INT64] = (jl_value_t*)jl_int64_type;
     deser_tag[TAG_UINT8] = (jl_value_t*)jl_uint8_type;
-    deser_tag[TAG_LINEINFO] = (jl_value_t*)jl_lineinfonode_type;
     deser_tag[TAG_UNIONALL] = (jl_value_t*)jl_unionall_type;
     deser_tag[TAG_GOTONODE] = (jl_value_t*)jl_gotonode_type;
     deser_tag[TAG_QUOTENODE] = (jl_value_t*)jl_quotenode_type;
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index ae4a2ed02fb7e..90091cc1f38db 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -3,9 +3,10 @@
 #include "llvm-version.h"
 #include "platform.h"
 #include <stdint.h>
-#include <sstream>
+#include <string>
 
 #include "llvm/IR/Mangler.h"
+#include <llvm/ADT/BitmaskEnum.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/ADT/StringMap.h>
 #include <llvm/Analysis/TargetLibraryInfo.h>
@@ -14,6 +15,19 @@
 #include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
 #include <llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h>
 #include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h>
+#if JL_LLVM_VERSION >= 200000
+#include <llvm/ExecutionEngine/Orc/AbsoluteSymbols.h>
+#include <llvm/ExecutionEngine/Orc/EHFrameRegistrationPlugin.h>
+#endif
+#if JL_LLVM_VERSION >= 180000
+#include <llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h>
+#include <llvm/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h>
+#endif
+#if JL_LLVM_VERSION >= 190000
+#include <llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h>
+#endif
 #include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Support/DynamicLibrary.h>
@@ -28,31 +42,30 @@
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
 #include <llvm/MC/TargetRegistry.h>
 #include <llvm/Target/TargetOptions.h>
-#include <llvm/Support/Host.h>
+#include <llvm/TargetParser/Host.h>
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Object/SymbolSize.h>
 
 using namespace llvm;
 
-#include "llvm-codegen-shared.h"
 #include "jitlayers.h"
 #include "julia_assert.h"
 #include "processor.h"
+#include "llvm-julia-task-dispatcher.h"
 
+#if JL_LLVM_VERSION >= 180000
+# include <llvm/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.h>
+#else
 # include <llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h>
+#endif
 # include <llvm/ExecutionEngine/JITLink/EHFrameSupport.h>
 # include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
-# if JL_LLVM_VERSION >= 150000
 # include <llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h>
-# endif
 # include <llvm/ExecutionEngine/SectionMemoryManager.h>
 
 #define DEBUG_TYPE "julia_jitlayers"
 
 STATISTIC(LinkedGlobals, "Number of globals linked");
-STATISTIC(CompiledCodeinsts, "Number of codeinsts compiled directly");
-STATISTIC(MaxWorkqueueSize, "Maximum number of elements in the workqueue");
-STATISTIC(IndirectCodeinsts, "Number of dependent codeinsts compiled");
 STATISTIC(SpecFPtrCount, "Number of specialized function pointers compiled");
 STATISTIC(UnspecFPtrCount, "Number of specialized function pointers compiled");
 STATISTIC(ModulesAdded, "Number of modules added to the JIT");
@@ -61,7 +74,6 @@ STATISTIC(OptO0, "Number of modules optimized at level -O0");
 STATISTIC(OptO1, "Number of modules optimized at level -O1");
 STATISTIC(OptO2, "Number of modules optimized at level -O2");
 STATISTIC(OptO3, "Number of modules optimized at level -O3");
-STATISTIC(ModulesMerged, "Number of modules merged");
 STATISTIC(InternedGlobals, "Number of global constants interned in the string pool");
 
 #ifdef _COMPILER_MSAN_ENABLED_
@@ -116,6 +128,15 @@ static void *getTLSAddress(void *control)
 }
 #endif
 
+#ifdef _OS_OPENBSD_
+extern "C" {
+    __int128 __divti3(__int128, __int128);
+    __int128 __modti3(__int128, __int128);
+    unsigned __int128 __udivti3(unsigned __int128, unsigned __int128);
+    unsigned __int128 __umodti3(unsigned __int128, unsigned __int128);
+}
+#endif
+
 // Snooping on which functions are being compiled, and how long it takes
 extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_compiles_impl(void *s)
@@ -128,24 +149,19 @@ void jl_dump_llvm_opt_impl(void *s)
     **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (ios_t*)s;
 }
 
-static int jl_add_to_ee(
-        orc::ThreadSafeModule &M,
-        const StringMap<orc::ThreadSafeModule*> &NewExports,
-        DenseMap<orc::ThreadSafeModule*, int> &Queued,
-        std::vector<orc::ThreadSafeModule*> &Stack) JL_NOTSAFEPOINT;
 static void jl_decorate_module(Module &M) JL_NOTSAFEPOINT;
-static uint64_t getAddressForFunction(StringRef fname) JL_NOTSAFEPOINT;
 
 void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT
 {
     ++LinkedGlobals;
     Constant *P = literal_static_pointer_val(addr, GV->getValueType());
     GV->setInitializer(P);
+    GV->setDSOLocal(true);
     if (jl_options.image_codegen) {
         // If we are forcing imaging mode codegen for debugging,
         // emit external non-const symbol to avoid LLVM optimizing the code
         // similar to non-imaging mode.
-        GV->setLinkage(GlobalValue::ExternalLinkage);
+        assert(GV->hasExternalLinkage());
     }
     else {
         GV->setConstant(true);
@@ -155,375 +171,713 @@ void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT
     }
 }
 
-void jl_jit_globals(std::map<void *, GlobalVariable*> &globals) JL_NOTSAFEPOINT
+// convert local roots into global roots, if they are needed
+static void jl_optimize_roots(jl_codegen_params_t &params, jl_method_instance_t *mi, Module &M)
 {
-    for (auto &global : globals) {
-        jl_link_global(global.second, global.first);
+    JL_GC_PROMISE_ROOTED(params.temporary_roots); // rooted by caller
+    if (jl_array_dim0(params.temporary_roots) == 0)
+        return;
+    jl_method_t *m = mi->def.method;
+    if (jl_is_method(m))
+        // the method might have a root for this already; use it if so
+        JL_LOCK(&m->writelock);
+    for (size_t i = 0; i < jl_array_dim0(params.temporary_roots); i++) {
+        jl_value_t *val = jl_array_ptr_ref(params.temporary_roots, i);
+        auto ref = params.global_targets.find((void*)val);
+        if (ref == params.global_targets.end())
+            continue;
+        auto get_global_root = [val, m]() {
+            if (jl_is_globally_rooted(val))
+                return val;
+            if (jl_is_method(m) && m->roots) {
+                size_t j, len = jl_array_dim0(m->roots);
+                for (j = 0; j < len; j++) {
+                    jl_value_t *mval = jl_array_ptr_ref(m->roots, j);
+                    if (jl_egal(mval, val)) {
+                        return mval;
+                    }
+                }
+            }
+            return jl_as_global_root(val, 1);
+        };
+        jl_value_t *mval = get_global_root();
+        if (mval != val) {
+            GlobalVariable *GV = ref->second;
+            params.global_targets.erase(ref);
+            auto mref = params.global_targets.find((void*)mval);
+            if (mref != params.global_targets.end()) {
+                GV->replaceAllUsesWith(mref->second);
+                GV->eraseFromParent();
+            }
+            else {
+                params.global_targets[(void*)mval] = GV;
+            }
+        }
     }
+    if (jl_is_method(m))
+        JL_UNLOCK(&m->writelock);
 }
 
-// this generates llvm code for the lambda info
-// and adds the result to the jitlayers
-// (and the shadow module),
-// and generates code for it
-static jl_callptr_t _jl_compile_codeinst(
-        jl_code_instance_t *codeinst,
-        jl_code_info_t *src,
-        size_t world,
-        orc::ThreadSafeContext context,
-        bool is_recompile)
+static void finish_params(Module *M, jl_codegen_params_t &params, SmallVector<orc::ThreadSafeModule,0> &sharedmodules) JL_NOTSAFEPOINT
 {
-    // caller must hold codegen_lock
-    // and have disabled finalizers
-    uint64_t start_time = 0;
-    bool timed = !!*jl_ExecutionEngine->get_dump_compiles_stream();
-    if (timed)
-        start_time = jl_hrtime();
+    if (params._shared_module) {
+        sharedmodules.push_back(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx));
+    }
 
-    assert(jl_is_code_instance(codeinst));
-    assert(codeinst->min_world <= world && (codeinst->max_world >= world || codeinst->max_world == 0) &&
-        "invalid world for method-instance");
+    // In imaging mode, we can't inline global variable initializers in order to preserve
+    // the fiction that we don't know what loads from the global will return. Thus, we
+    // need to emit a separate module for the globals before any functions are compiled,
+    // to ensure that the globals are defined when they are compiled.
+    if (jl_options.image_codegen) {
+        if (!params.global_targets.empty()) {
+            void **globalslots = new void*[params.global_targets.size()];
+            void **slot = globalslots;
+            for (auto &global : params.global_targets) {
+                auto GV = global.second;
+                *slot = global.first;
+                jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot);
+                slot++;
+            }
+#ifdef __clang_analyzer__
+            static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it
+#endif
+        }
+    }
+    else {
+        StringMap<void*> NewGlobals;
+        for (auto &global : params.global_targets) {
+            NewGlobals[global.second->getName()] = global.first;
+        }
+        for (auto &GV : M->globals()) {
+            auto InitValue = NewGlobals.find(GV.getName());
+            if (InitValue != NewGlobals.end()) {
+                jl_link_global(&GV, InitValue->second);
+            }
+        }
+    }
+}
 
-    JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE);
-#ifdef USE_TRACY
-    if (is_recompile) {
-        TracyCZoneColor(JL_TIMING_DEFAULT_BLOCK->tracy_ctx, 0xFFA500);
+// Return a specptr that is ABI-compatible with `from_abi` which invokes `codeinst`.
+//
+// If `codeinst` is NULL, the returned specptr instead performs a standard `apply_generic`
+// call via a dynamic dispatch.
+extern "C" JL_DLLEXPORT_CODEGEN
+void *jl_jit_abi_converter_impl(jl_task_t *ct, jl_abi_t from_abi,
+                                jl_code_instance_t *codeinst)
+{
+    void *target = nullptr;
+    bool target_specsig = false;
+    jl_callptr_t invoke = nullptr;
+    if (codeinst != nullptr) {
+        uint8_t specsigflags;
+        jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+        void *specptr = nullptr;
+        jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &specptr, /* waitcompile */ 1);
+        if (invoke != nullptr) {
+            if (invoke == jl_fptr_const_return_addr) {
+                target = nullptr;
+                target_specsig = false;
+            }
+            else if (invoke == jl_fptr_args_addr) {
+                assert(specptr != nullptr);
+                if (!from_abi.specsig && jl_subtype(codeinst->rettype, from_abi.rt))
+                    return specptr; // no adapter required
+
+                target = specptr;
+                target_specsig = false;
+            }
+            else if (specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED) {
+                assert(specptr != nullptr);
+                if (from_abi.specsig && jl_egal(mi->specTypes, from_abi.sigt) && jl_egal(codeinst->rettype, from_abi.rt))
+                    return specptr; // no adapter required
+
+                target = specptr;
+                target_specsig = true;
+            }
+        }
     }
-#endif
-    jl_callptr_t fptr = NULL;
-    // emit the code in LLVM IR form
-    jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
-    params.cache = true;
-    params.world = world;
-    params.imaging = imaging_default();
-    params.debug_level = jl_options.debug_level;
-    jl_workqueue_t emitted;
+
+    orc::ThreadSafeModule result_m;
+    std::string gf_thunk_name;
     {
-        orc::ThreadSafeModule result_m =
-            jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.imaging, params.DL, params.TargetTriple);
-        jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params);
-        if (result_m)
-            emitted[codeinst] = {std::move(result_m), std::move(decls)};
-        {
-            auto temp_module = jl_create_llvm_module(name_from_method_instance(codeinst->def), params.getContext(), params.imaging);
-            jl_compile_workqueue(emitted, *temp_module, params, CompilationPolicy::Default);
+        jl_codegen_params_t params(std::make_unique<LLVMContext>(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
+        params.getContext().setDiscardValueNames(true);
+        params.cache = true;
+        params.imaging_mode = 0;
+        result_m = jl_create_ts_module("gfthunk", params.tsctx, params.DL, params.TargetTriple);
+        Module *M = result_m.getModuleUnlocked();
+        if (target) {
+            Value *llvmtarget = literal_static_pointer_val((void*)target, PointerType::get(M->getContext(), 0));
+            gf_thunk_name = emit_abi_converter(M, params, from_abi, codeinst, llvmtarget, target_specsig);
+        }
+        else if (invoke == jl_fptr_const_return_addr) {
+            gf_thunk_name = emit_abi_constreturn(M, params, from_abi, codeinst->rettype_const);
         }
+        else {
+            Value *llvminvoke = invoke ? literal_static_pointer_val((void*)invoke, PointerType::get(M->getContext(), 0)) : nullptr;
+            gf_thunk_name = emit_abi_dispatcher(M, params, from_abi, codeinst, llvminvoke);
+        }
+        SmallVector<orc::ThreadSafeModule,0> sharedmodules;
+        finish_params(M, params, sharedmodules);
+        assert(sharedmodules.empty());
+    }
+    int8_t gc_state = jl_gc_safe_enter(ct->ptls);
+    jl_ExecutionEngine->addModule(std::move(result_m));
+    uintptr_t Addr = jl_ExecutionEngine->getFunctionAddress(gf_thunk_name);
+    jl_gc_safe_leave(ct->ptls, gc_state);
+    assert(Addr);
+    return (void*)Addr;
+}
 
-        if (params._shared_module)
-            jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx));
-        if (!params.imaging) {
-            StringMap<orc::ThreadSafeModule*> NewExports;
-            StringMap<void*> NewGlobals;
-            for (auto &global : params.globals) {
-                NewGlobals[global.second->getName()] = global.first;
+
+  // lock for places where only single threaded behavior is implemented, so we need GC support
+static jl_mutex_t jitlock;
+  // locks and barriers for this state
+static std::mutex engine_lock;
+static std::condition_variable engine_wait;
+static int threads_in_compiler_phase;
+  // the TSM for each codeinst
+static SmallVector<orc::ThreadSafeModule,0> sharedmodules;
+static DenseMap<jl_code_instance_t*, orc::ThreadSafeModule> emittedmodules;
+  // the invoke and specsig function names in the JIT
+static DenseMap<jl_code_instance_t*, jl_llvm_functions_t> invokenames;
+  // everything that any thread wants to compile right now
+static DenseSet<jl_code_instance_t*> compileready;
+  // everything that any thread has compiled recently
+static DenseSet<jl_code_instance_t*> linkready;
+  // a map from a codeinst to the outgoing edges needed before linking it
+static DenseMap<jl_code_instance_t*, SmallVector<jl_code_instance_t*,0>> complete_graph;
+  // the state for each codeinst and the number of unresolved edges (we don't
+  // really need this once JITLink is available everywhere, since every module
+  // is automatically complete, and we can emit any required fixups later as a
+  // separate module)
+static DenseMap<jl_code_instance_t*, std::tuple<jl_codegen_params_t, int>> incompletemodules;
+  // the set of incoming unresolved edges resolved by a codeinstance
+static DenseMap<jl_code_instance_t*, SmallVector<jl_code_instance_t*,0>> incomplete_rgraph;
+
+// Lock hierarchy here:
+//   jitlock is outermost, can contain others and allows GC
+//   engine_lock is next
+//   ThreadSafeContext locks are next, they should not be nested (unless engine_lock is also held, but this may make TSAN sad anyways)
+//   jl_ExecutionEngine internal locks are exclusive to this list, since OrcJIT promises to never hold a lock over a materialization unit:
+//        construct a query object from a query set and query handler
+//        lock the session
+//        lodge query against requested symbols, collect required materializers (if any)
+//        unlock the session
+//        dispatch materializers (if any)
+//     However, this guarantee relies on Julia releasing all TSC locks before causing any materialization units to be dispatched
+//     as materialization may need to acquire TSC locks.
+
+
+static int jl_analyze_workqueue(jl_code_instance_t *callee, jl_codegen_params_t &params, bool forceall=false) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
+{
+    jl_task_t *ct = jl_current_task;
+    jl_workqueue_t edges;
+    std::swap(params.workqueue, edges);
+    for (auto &it : edges) {
+        jl_code_instance_t *codeinst = it.first;
+        JL_GC_PROMISE_ROOTED(codeinst);
+        auto &proto = it.second;
+        if (proto.external_linkage || proto.decl->isDeclaration()) { // if it is not expected externally and has a definition locally, there is no need to patch this edge up
+            // try to emit code for this item from the workqueue
+            StringRef invokeName = "";
+            StringRef preal_decl = "";
+            bool preal_specsig = false;
+            jl_callptr_t invoke = nullptr;
+            bool isedge = false;
+            assert(params.cache);
+            // Checking the cache here is merely an optimization and not strictly required
+            // But it must be consistent with the following invokenames lookup, which is protected by the engine_lock
+            uint8_t specsigflags;
+            void *fptr;
+            void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile) JL_NOTSAFEPOINT; // declare it is not a safepoint (or deadlock) in this file due to 0 parameter
+            jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
+            //if (specsig ? specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED : invoke == jl_fptr_args_addr)
+            if (invoke == jl_fptr_args_addr) {
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
             }
-            for (auto &def : emitted) {
-                orc::ThreadSafeModule &TSM = std::get<0>(def.second);
-                //The underlying context object is still locked because params is not destroyed yet
-                auto M = TSM.getModuleUnlocked();
-                for (auto &F : M->global_objects()) {
-                    if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                        NewExports[F.getName()] = &TSM;
+            else if (specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED) {
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
+                preal_specsig = true;
+            }
+            bool force = forceall || invoke != nullptr;
+            if (preal_decl.empty()) {
+                auto it = invokenames.find(codeinst);
+                if (it != invokenames.end()) {
+                    auto &decls = it->second;
+                    invokeName = decls.functionObject;
+                    if (decls.functionObject == "jl_fptr_args") {
+                        preal_decl = decls.specFunctionObject;
+                        isedge = true;
                     }
-                }
-                // Let's link all globals here also (for now)
-                for (auto &GV : M->globals()) {
-                    auto InitValue = NewGlobals.find(GV.getName());
-                    if (InitValue != NewGlobals.end()) {
-                        jl_link_global(&GV, InitValue->second);
+                    else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") {
+                        preal_decl = decls.specFunctionObject;
+                        preal_specsig = true;
+                        isedge = true;
                     }
+                    force = true;
                 }
             }
-            DenseMap<orc::ThreadSafeModule*, int> Queued;
-            std::vector<orc::ThreadSafeModule*> Stack;
-            for (auto &def : emitted) {
-                // Add the results to the execution engine now
-                orc::ThreadSafeModule &M = std::get<0>(def.second);
-                jl_add_to_ee(M, NewExports, Queued, Stack);
-                assert(Queued.empty() && Stack.empty() && !M);
+            if (preal_decl.empty()) {
+                // there may be an equivalent method already compiled (or at least registered with the JIT to compile), in which case we should be using that instead
+                jl_code_instance_t *compiled_ci = jl_get_ci_equiv(codeinst, 0);
+                if (compiled_ci != codeinst) {
+                    codeinst = compiled_ci;
+                    uint8_t specsigflags;
+                    void *fptr;
+                    jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
+                    //if (specsig ? specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED : invoke == jl_fptr_args_addr)
+                    if (invoke == jl_fptr_args_addr) {
+                        preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
+                    }
+                    else if (specsigflags & JL_CI_FLAGS_SPECPTR_SPECIALIZED) {
+                        preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
+                        preal_specsig = true;
+                    }
+                    if (preal_decl.empty()) {
+                        auto it = invokenames.find(codeinst);
+                        if (it != invokenames.end()) {
+                            auto &decls = it->second;
+                            invokeName = decls.functionObject;
+                            if (decls.functionObject == "jl_fptr_args") {
+                                preal_decl = decls.specFunctionObject;
+                                isedge = true;
+                            }
+                            else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") {
+                                preal_decl = decls.specFunctionObject;
+                                preal_specsig = true;
+                                isedge = true;
+                            }
+                        }
+                    }
+                }
             }
-        } else {
-            jl_jit_globals(params.globals);
-            auto main = std::move(emitted[codeinst].first);
-            for (auto &def : emitted) {
-                if (def.first != codeinst) {
-                    jl_merge_module(main, std::move(def.second.first));
+            if (!preal_decl.empty() || force) {
+                // if we have a prototype emitted, compare it to what we emitted earlier
+                Module *mod = proto.decl->getParent();
+                Function *pinvoke = nullptr;
+                if (proto.decl->isDeclaration()) {
+                    if (preal_decl.empty()) {
+                        if (invoke != nullptr && invokeName.empty()) {
+                            assert(invoke != jl_fptr_args_addr);
+                            if (invoke == jl_fptr_sparam_addr)
+                                invokeName = "jl_fptr_sparam";
+                            else if (invoke == jl_f_opaque_closure_call_addr)
+                                invokeName = "jl_f_opaque_closure_call";
+                            else
+                                invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst);
+                        }
+                        pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params);
+                        if (!proto.specsig) {
+                            proto.decl->replaceAllUsesWith(pinvoke);
+                            proto.decl->eraseFromParent();
+                            proto.decl = pinvoke;
+                        }
+                        isedge = false;
+                    }
+                    if (proto.specsig && !preal_specsig) {
+                        // get or build an fptr1 that can invoke codeinst
+                        if (pinvoke == nullptr)
+                            pinvoke = get_or_emit_fptr1(preal_decl, mod);
+                        // emit specsig-to-(jl)invoke conversion
+                        proto.decl->setLinkage(GlobalVariable::InternalLinkage);
+                        //protodecl->setAlwaysInline();
+                        jl_init_function(proto.decl, params);
+                        // TODO: maybe this can be cached in codeinst->specfptr?
+                        int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); // codegen may contain safepoints (such as jl_subtype calls)
+                        jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+                        size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed
+                        bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+                        emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke);
+                        jl_gc_unsafe_leave(ct->ptls, gc_state);
+                        preal_decl = ""; // no need to fixup the name
+                    }
+                }
+                else if (proto.specsig && !preal_specsig) {
+                    // privatize our definition, since for some reason we couldn't use the external one but have an internal one
+                    proto.decl->setLinkage(GlobalValue::PrivateLinkage);
+                    preal_decl = ""; // no need to fixup the name
+                }
+                if (!preal_decl.empty()) {
+                    // merge and/or rename this prototype to the real function
+                    if (Function *specfun = cast_or_null<Function>(mod->getNamedValue(preal_decl))) {
+                        if (proto.decl != specfun) {
+                            proto.decl->replaceAllUsesWith(specfun);
+                            if (!proto.decl->isDeclaration() && specfun->isDeclaration())
+                                linkFunctionBody(*specfun, *proto.decl);
+                            proto.decl->eraseFromParent();
+                            proto.decl = specfun;
+                        }
+                    }
+                    else {
+                        proto.decl->setName(preal_decl);
+                    }
+                }
+                if (proto.oc) { // additionally, if we are dealing with an OC constructor, then we might also need to fix up the fptr1 reference too
+                    assert(proto.specsig);
+                    StringRef ocinvokeDecl = invokeName;
+                    if (invoke != nullptr && ocinvokeDecl.empty()) {
+                        // check for some special tokens used by opaque_closure.c and convert those to their real functions
+                        assert(invoke != jl_fptr_args_addr);
+                        assert(invoke != jl_fptr_sparam_addr);
+                        if (invoke == jl_fptr_interpret_call_addr)
+                            ocinvokeDecl = "jl_fptr_interpret_call";
+                        else if (invoke == jl_fptr_const_return_addr)
+                            ocinvokeDecl = "jl_fptr_const_return";
+                        else if (invoke == jl_f_opaque_closure_call_addr)
+                            ocinvokeDecl = "jl_f_opaque_closure_call";
+                        //else if (invoke == jl_interpret_opaque_closure_addr)
+                        else
+                            ocinvokeDecl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst);
+                    }
+                    // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too
+                    // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure
+                    if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") {
+                        if (pinvoke == nullptr)
+                            ocinvokeDecl = get_or_emit_fptr1(preal_decl, mod)->getName();
+                        else
+                            ocinvokeDecl = pinvoke->getName();
+                    }
+                    assert(!ocinvokeDecl.empty());
+                    assert(ocinvokeDecl != "jl_fptr_args");
+                    assert(ocinvokeDecl != "jl_fptr_sparam");
+                    // merge and/or rename this prototype to the real function
+                    if (Function *specfun = cast_or_null<Function>(mod->getNamedValue(ocinvokeDecl))) {
+                        if (proto.oc != specfun) {
+                            proto.oc->replaceAllUsesWith(specfun);
+                            proto.oc->eraseFromParent();
+                            proto.oc = specfun;
+                        }
+                    }
+                    else {
+                        proto.oc->setName(ocinvokeDecl);
+                    }
                 }
             }
-            jl_ExecutionEngine->addModule(std::move(main));
-        }
-        ++CompiledCodeinsts;
-        MaxWorkqueueSize.updateMax(emitted.size());
-        IndirectCodeinsts += emitted.size() - 1;
-    }
-
-    size_t i = 0;
-    for (auto &def : emitted) {
-        jl_code_instance_t *this_code = def.first;
-        if (i < jl_timing_print_limit)
-            jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_DEFAULT_BLOCK);
-
-        jl_llvm_functions_t decls = std::get<1>(def.second);
-        jl_callptr_t addr;
-        bool isspecsig = false;
-        if (decls.functionObject == "jl_fptr_args") {
-            addr = jl_fptr_args_addr;
-        }
-        else if (decls.functionObject == "jl_fptr_sparam") {
-            addr = jl_fptr_sparam_addr;
-        }
-        else if (decls.functionObject == "jl_f_opaque_closure_call") {
-            addr = jl_f_opaque_closure_call_addr;
+            else {
+                isedge = true;
+                params.workqueue.push_back(it);
+                incomplete_rgraph[codeinst].push_back(callee);
+            }
+            if (isedge)
+                complete_graph[callee].push_back(codeinst);
         }
-        else {
-            addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
-            isspecsig = true;
+    }
+    return params.workqueue.size();
+}
+
+// move codeinst (and deps) from incompletemodules to emitted modules
+// and populate compileready from complete_graph
+static void prepare_compile(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
+{
+    SmallVector<jl_code_instance_t*> workqueue;
+    workqueue.push_back(codeinst);
+    while (!workqueue.empty()) {
+        codeinst = workqueue.pop_back_val();
+        if (!invokenames.count(codeinst)) {
+            // this means it should be compiled already while the callee was in stasis
+            assert(jl_is_compiled_codeinst(codeinst));
+            continue;
         }
-        if (!decls.specFunctionObject.empty()) {
-            void *prev_specptr = NULL;
-            auto spec = (void*)getAddressForFunction(decls.specFunctionObject);
-            if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
-                // only set specsig and invoke if we were the first to set specptr
-                jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig);
-                // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr
-                // either assumes that specptr was null, doesn't care about specptr,
-                // or will wait until specsigflags has 0b10 set before reloading invoke
-                jl_atomic_store_release(&this_code->invoke, addr);
-                jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig));
-            } else {
-                //someone else beat us, don't commit any results
-                while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) {
-                    jl_cpu_pause();
+        // if this was incomplete, force completion now of it
+        auto it = incompletemodules.find(codeinst);
+        if (it != incompletemodules.end()) {
+            int waiting = 0;
+            auto &edges = complete_graph[codeinst];
+            auto edges_end = std::remove_if(edges.begin(), edges.end(), [&waiting, codeinst] (jl_code_instance_t *edge) JL_NOTSAFEPOINT -> bool {
+                auto &redges = incomplete_rgraph[edge];
+                // waiting += std::erase(redges, codeinst);
+                auto redges_end = std::remove(redges.begin(), redges.end(), codeinst);
+                if (redges_end != redges.end()) {
+                    waiting += redges.end() - redges_end;
+                    redges.erase(redges_end, redges.end());
+                    assert(!invokenames.count(edge));
                 }
-                addr = jl_atomic_load_relaxed(&this_code->invoke);
-            }
-        } else {
-            jl_callptr_t prev_invoke = NULL;
-            if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
-                addr = prev_invoke;
-                //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
-                //known lesser function)
+                return !invokenames.count(edge);
+            });
+            edges.erase(edges_end, edges.end());
+            assert(waiting == std::get<1>(it->second));
+            std::get<1>(it->second) = 0;
+            auto &params = std::get<0>(it->second);
+            params.tsctx_lock = params.tsctx.getLock();
+            waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint
+            assert(!waiting); (void)waiting;
+            Module *M = emittedmodules[codeinst].getModuleUnlocked();
+            finish_params(M, params, sharedmodules);
+            incompletemodules.erase(it);
+        }
+        // and then indicate this should be compiled now
+        if (!linkready.count(codeinst) && compileready.insert(codeinst).second) {
+            auto edges = complete_graph.find(codeinst);
+            if (edges != complete_graph.end()) {
+                workqueue.append(edges->second);
             }
         }
-        if (this_code == codeinst)
-            fptr = addr;
-        i++;
     }
-    if (i > jl_timing_print_limit)
-        jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "... <%d methods truncated>", i - 10);
+}
 
-    uint64_t end_time = 0;
-    if (timed)
-        end_time = jl_hrtime();
-
-    // If logging of the compilation stream is enabled,
-    // then dump the method-instance specialization type to the stream
-    jl_method_instance_t *mi = codeinst->def;
-    if (jl_is_method(mi->def.method)) {
-        auto stream = *jl_ExecutionEngine->get_dump_compiles_stream();
-        if (stream) {
-            ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time);
-            jl_static_show((JL_STREAM*)stream, mi->specTypes);
-            ios_printf(stream, "\"\n");
+// notify any other pending work that this edge now has code defined
+static void complete_emit(jl_code_instance_t *edge) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
+{
+    auto notify = incomplete_rgraph.find(edge);
+    if (notify == incomplete_rgraph.end())
+        return;
+    auto redges = std::move(notify->second);
+    incomplete_rgraph.erase(notify);
+    for (size_t i = 0; i < redges.size(); i++) {
+        jl_code_instance_t *callee = redges[i];
+        auto it = incompletemodules.find(callee);
+        assert(it != incompletemodules.end());
+        if (--std::get<1>(it->second) == 0) {
+            auto &params = std::get<0>(it->second);
+            params.tsctx_lock = params.tsctx.getLock();
+            assert(callee == it->first);
+            orc::ThreadSafeModule &M = emittedmodules[callee];
+            emit_always_inline(M, params); // may safepoint
+            int waiting = jl_analyze_workqueue(callee, params); // may safepoint
+            assert(!waiting); (void)waiting;
+            finish_params(M.getModuleUnlocked(), params, sharedmodules);
+            incompletemodules.erase(it);
         }
     }
-    return fptr;
 }
 
-const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
 
-// compile a C-callable alias
-extern "C" JL_DLLEXPORT_CODEGEN
-int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
+// set the invoke field for codeinst (and all deps, and assist with other pending work from other threads) now
+static void jl_compile_codeinst_now(jl_code_instance_t *codeinst)
 {
-    auto ct = jl_current_task;
-    bool timed = (ct->reentrant_timing & 1) == 0;
-    if (timed)
-        ct->reentrant_timing |= 1;
-    uint64_t compiler_start_time = 0;
-    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-    if (measure_compile_time_enabled)
-        compiler_start_time = jl_hrtime();
-    orc::ThreadSafeContext ctx;
-    auto into = unwrap(llvmmod);
-    jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
-    orc::ThreadSafeModule backing;
-    if (into == NULL) {
-        if (!pparams) {
-            ctx = jl_ExecutionEngine->acquireContext();
+    jl_unique_gcsafe_lock lock(engine_lock);
+    if (!invokenames.count(codeinst))
+        return;
+    threads_in_compiler_phase++;
+    prepare_compile(codeinst); // may safepoint
+    while (1) {
+        // TODO: split up this work by ThreadSafeContext, so two threads don't need to get the same locks and stall
+        if (!sharedmodules.empty()) {
+            auto TSM = sharedmodules.pop_back_val();
+            lock.native.unlock();
+            {
+                auto Lock = TSM.getContext().getLock();
+                jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint
+            }
+            jl_ExecutionEngine->addModule(std::move(TSM));
+            lock.native.lock();
         }
-        backing = jl_create_ts_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->imaging : imaging_default());
-        into = &backing;
-    }
-    JL_LOCK(&jl_codegen_lock);
-    auto target_info = into->withModuleDo([&](Module &M) {
-        return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
-    });
-    jl_codegen_params_t params(into->getContext(), std::move(target_info.first), std::move(target_info.second));
-    params.imaging = imaging_default();
-    params.debug_level = jl_options.debug_level;
-    if (pparams == NULL)
-        pparams = &params;
-    assert(pparams->tsctx.getContext() == into->getContext().getContext());
-    const char *name = jl_generate_ccallable(wrap(into), sysimg, declrt, sigt, *pparams);
-    bool success = true;
-    if (!sysimg) {
-        if (jl_ExecutionEngine->getGlobalValueAddress(name)) {
-            success = false;
+        else if (!compileready.empty()) {
+            // move a function from compileready to linkready then compile it
+            auto compilenext = compileready.begin();
+            codeinst = *compilenext;
+            compileready.erase(compilenext);
+            auto TSMref = emittedmodules.find(codeinst);
+            assert(TSMref != emittedmodules.end());
+            auto TSM = std::move(TSMref->second);
+            linkready.insert(codeinst);
+            emittedmodules.erase(TSMref);
+            lock.native.unlock();
+            uint64_t start_time = jl_hrtime();
+            {
+                auto Lock = TSM.getContext().getLock();
+                jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint
+            }
+            jl_ExecutionEngine->addModule(std::move(TSM)); // may safepoint
+            // If logging of the compilation stream is enabled,
+            // then dump the method-instance specialization type to the stream
+            jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+            uint64_t end_time = jl_hrtime();
+            if (jl_is_method(mi->def.method)) {
+                auto stream = *jl_ExecutionEngine->get_dump_compiles_stream();
+                if (stream) {
+                    ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time);
+                    jl_static_show((JL_STREAM*)stream, mi->specTypes);
+                    ios_printf(stream, "\"\n");
+                }
+            }
+            jl_atomic_store_relaxed(&codeinst->time_compile,
+                julia_double_to_half(julia_half_to_float(jl_atomic_load_relaxed(&codeinst->time_compile))
+                    + (end_time - start_time) * 1e-9));
+            lock.native.lock();
         }
-        if (success && p == NULL) {
-            jl_jit_globals(params.globals);
-            assert(params.workqueue.empty());
-            if (params._shared_module)
-                jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx));
+        else {
+            break;
         }
-        if (success && llvmmod == NULL)
-            jl_ExecutionEngine->addModule(std::move(*into));
     }
-    JL_UNLOCK(&jl_codegen_lock);
-    if (timed) {
-        if (measure_compile_time_enabled) {
-            auto end = jl_hrtime();
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+    codeinst = nullptr;
+    // barrier until all threads have finished calling addModule
+    if (--threads_in_compiler_phase == 0) {
+        // the last thread out will finish linking everything
+        // then release all of the other threads
+        // move the function pointers out from invokenames to the codeinst
+
+        // batch compile job for all new functions
+        SmallVector<StringRef> NewDefs;
+        for (auto &this_code : linkready) {
+            auto it = invokenames.find(this_code);
+            assert(it != invokenames.end());
+            jl_llvm_functions_t &decls = it->second;
+            assert(!decls.functionObject.empty());
+            if (decls.functionObject != "jl_fptr_args" &&
+                decls.functionObject != "jl_fptr_sparam" &&
+                decls.functionObject != "jl_f_opaque_closure_call")
+                NewDefs.push_back(decls.functionObject);
+            if (!decls.specFunctionObject.empty())
+                NewDefs.push_back(decls.specFunctionObject);
         }
-        ct->reentrant_timing &= ~1ull;
+        auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs);
+
+        size_t nextaddr = 0;
+        for (auto &this_code : linkready) {
+            auto it = invokenames.find(this_code);
+            assert(it != invokenames.end());
+            jl_llvm_functions_t &decls = it->second;
+            jl_callptr_t addr;
+            bool isspecsig = false;
+            if (decls.functionObject == "jl_fptr_args") {
+                addr = jl_fptr_args_addr;
+            }
+            else if (decls.functionObject == "jl_fptr_sparam") {
+                addr = jl_fptr_sparam_addr;
+            }
+            else if (decls.functionObject == "jl_f_opaque_closure_call") {
+                addr = jl_f_opaque_closure_call_addr;
+            }
+            else {
+                assert(NewDefs[nextaddr] == decls.functionObject);
+                addr = (jl_callptr_t)Addrs[nextaddr++];
+                assert(addr);
+                isspecsig = true;
+            }
+            if (!decls.specFunctionObject.empty()) {
+                void *prev_specptr = nullptr;
+                assert(NewDefs[nextaddr] == decls.specFunctionObject);
+                void *spec = (void*)Addrs[nextaddr++];
+                assert(spec);
+                if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
+                    // only set specsig and invoke if we were the first to set specptr
+                    // Clear compilation state bits, then set SPECPTR_SPECIALIZED if needed
+                    if (isspecsig)
+                        jl_atomic_fetch_or_relaxed(&this_code->flags, JL_CI_FLAGS_SPECPTR_SPECIALIZED);
+                    // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr
+                    // either assumes that specptr was null, doesn't care about specptr,
+                    // or will wait until flags has 0b10 set before reloading invoke
+                    jl_atomic_store_release(&this_code->invoke, addr);
+                    // Set INVOKE_MATCHES_SPECPTR to signal completion
+                    jl_atomic_fetch_or_relaxed(&this_code->flags, JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR);
+                }
+                else {
+                    //someone else beat us, don't commit any results
+                    while (!(jl_atomic_load_acquire(&this_code->flags) & JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR)) {
+                        jl_cpu_pause();
+                    }
+                    addr = jl_atomic_load_relaxed(&this_code->invoke);
+                }
+            }
+            else {
+                jl_callptr_t prev_invoke = nullptr;
+                // Allow replacing addr if it is either nullptr or our special waiting placeholder.
+                if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
+                    if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
+                        addr = prev_invoke;
+                        //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
+                        //known lesser function)
+                    }
+                }
+            }
+            invokenames.erase(it);
+            complete_graph.erase(this_code);
+        }
+        linkready.clear();
+        engine_wait.notify_all();
     }
-    if (ctx.getContext()) {
-        jl_ExecutionEngine->releaseContext(std::move(ctx));
+    else while (threads_in_compiler_phase) {
+        lock.wait(engine_wait);
     }
-    return success;
 }
 
-// declare a C-callable entry point; called during code loading from the toplevel
-extern "C" JL_DLLEXPORT_CODEGEN
-void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
-{
-    // validate arguments. try to do as many checks as possible here to avoid
-    // throwing errors later during codegen.
-    JL_TYPECHK(@ccallable, type, declrt);
-    if (!jl_is_tuple_type(sigt))
-        jl_type_error("@ccallable", (jl_value_t*)jl_anytuple_type_type, (jl_value_t*)sigt);
-    // check that f is a guaranteed singleton type
-    jl_datatype_t *ft = (jl_datatype_t*)jl_tparam0(sigt);
-    if (!jl_is_datatype(ft) || ft->instance == NULL)
-        jl_error("@ccallable: function object must be a singleton");
-
-    // compute / validate return type
-    if (!jl_is_concrete_type(declrt) || jl_is_kind(declrt))
-        jl_error("@ccallable: return type must be concrete and correspond to a C type");
-    if (!jl_type_mappable_to_c(declrt))
-        jl_error("@ccallable: return type doesn't correspond to a C type");
-
-    // validate method signature
-    size_t i, nargs = jl_nparams(sigt);
-    for (i = 1; i < nargs; i++) {
-        jl_value_t *ati = jl_tparam(sigt, i);
-        if (!jl_is_concrete_type(ati) || jl_is_kind(ati) || !jl_type_mappable_to_c(ati))
-            jl_error("@ccallable: argument types must be concrete");
-    }
-
-    // save a record of this so that the alias is generated when we write an object file
-    jl_method_t *meth = (jl_method_t*)jl_methtable_lookup(ft->name->mt, (jl_value_t*)sigt, jl_atomic_load_acquire(&jl_world_counter));
-    if (!jl_is_method(meth))
-        jl_error("@ccallable: could not find requested method");
-    JL_GC_PUSH1(&meth);
-    meth->ccallable = jl_svec2(declrt, (jl_value_t*)sigt);
-    jl_gc_wb(meth, meth->ccallable);
-    JL_GC_POP();
-
-    // create the alias in the current runtime environment
-    int success = jl_compile_extern_c(NULL, NULL, NULL, declrt, (jl_value_t*)sigt);
-    if (!success)
-        jl_error("@ccallable was already defined for this method name");
-}
+void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
 
-// this compiles li and emits fptr
 extern "C" JL_DLLEXPORT_CODEGEN
-jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+void jl_emit_codeinst_to_jit_impl(
+        jl_code_instance_t *codeinst,
+        jl_code_info_t *src)
 {
-    auto ct = jl_current_task;
-    bool timed = (ct->reentrant_timing & 1) == 0;
-    if (timed)
-        ct->reentrant_timing |= 1;
-    uint64_t compiler_start_time = 0;
-    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-    bool is_recompile = false;
-    if (measure_compile_time_enabled)
-        compiler_start_time = jl_hrtime();
-    // if we don't have any decls already, try to generate it now
-    jl_code_info_t *src = NULL;
-    jl_code_instance_t *codeinst = NULL;
-    JL_GC_PUSH2(&src, &codeinst);
-    JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
-    jl_value_t *ci = jl_rettype_inferred_addr(mi, world, world);
-    if (ci != jl_nothing)
-        codeinst = (jl_code_instance_t*)ci;
-    if (codeinst) {
-        src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-        if ((jl_value_t*)src == jl_nothing)
-            src = NULL;
-        else if (jl_is_method(mi->def.method))
-            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
+    if (jl_is_compiled_codeinst(codeinst))
+        return;
+    { // lock scope
+        jl_unique_gcsafe_lock lock(engine_lock);
+        if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst))
+            return;
     }
-    else {
-        // identify whether this is an invalidated method that is being recompiled
-        is_recompile = jl_atomic_load_relaxed(&mi->cache) != NULL;
-    }
-    if (src == NULL && jl_is_method(mi->def.method) &&
-             jl_symbol_name(mi->def.method->name)[0] != '@') {
-        if (mi->def.method->source != jl_nothing) {
-            // If the caller didn't provide the source and IR is available,
-            // see if it is inferred, or try to infer it for ourself.
-            // (but don't bother with typeinf on macros or toplevel thunks)
-            src = jl_type_infer(mi, world, 0);
-        }
+    JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE);
+    // emit the code in LLVM IR form to the new context
+    jl_codegen_params_t params(std::make_unique<LLVMContext>(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
+    params.getContext().setDiscardValueNames(true);
+    params.cache = true;
+    params.imaging_mode = 0;
+    orc::ThreadSafeModule result_m =
+        jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), params.tsctx, params.DL, params.TargetTriple);
+    params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+    JL_GC_PUSH1(&params.temporary_roots);
+    jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints
+    if (!result_m) {
+        JL_GC_POP();
+        return;
     }
-    jl_code_instance_t *compiled = jl_method_compiled(mi, world);
-    if (compiled) {
-        codeinst = compiled;
+    jl_optimize_roots(params, jl_get_ci_mi(codeinst), *result_m.getModuleUnlocked()); // contains safepoints
+    params.temporary_roots = nullptr;
+    params.temporary_roots_set.clear();
+    JL_GC_POP();
+    { // drop lock before acquiring engine_lock
+        auto release = std::move(params.tsctx_lock);
     }
-    else if (src && jl_is_code_info(src)) {
-        if (!codeinst) {
-            codeinst = jl_get_method_inferred(mi, src->rettype, src->min_world, src->max_world);
-            if (src->inferred) {
-                jl_value_t *null = nullptr;
-                jl_atomic_cmpswap_relaxed(&codeinst->inferred, &null, jl_nothing);
-            }
-        }
-        ++SpecFPtrCount;
-        _jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext(), is_recompile);
-        if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL)
-            codeinst = NULL;
+    jl_unique_gcsafe_lock lock(engine_lock);
+    if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst))
+        return; // destroy everything
+    const std::string &specf = decls.specFunctionObject;
+    const std::string &f = decls.functionObject;
+    assert(!f.empty());
+    // Prepare debug info to receive this function
+    // record that this function name came from this linfo,
+    // so we can build a reverse mapping for debug-info.
+    bool toplevel = !jl_is_method(jl_get_ci_mi(codeinst)->def.method);
+    if (!toplevel) {
+        // don't remember toplevel thunks because
+        // they may not be rooted in the gc for the life of the program,
+        // and the runtime doesn't notify us when the code becomes unreachable :(
+        if (!specf.empty())
+            jl_add_code_in_flight(specf, codeinst, params.DL);
+        if (f != "jl_fptr_args" && f != "jl_fptr_sparam")
+            jl_add_code_in_flight(f, codeinst, params.DL);
     }
-    else {
-        codeinst = NULL;
+    jl_callptr_t expected = NULL;
+    jl_atomic_cmpswap_relaxed(&codeinst->invoke, &expected, jl_fptr_wait_for_compiled_addr);
+    invokenames[codeinst] = std::move(decls);
+    complete_emit(codeinst);
+    params.tsctx_lock = params.tsctx.getLock(); // re-acquire lock
+    emit_always_inline(result_m, params);
+    int waiting = jl_analyze_workqueue(codeinst, params);
+    if (waiting) {
+        auto release = std::move(params.tsctx_lock); // unlock again before moving from it
+        incompletemodules.try_emplace(codeinst, std::move(params), waiting);
     }
-    JL_UNLOCK(&jl_codegen_lock);
-    if (timed) {
-        if (measure_compile_time_enabled) {
-            uint64_t t_comp = jl_hrtime() - compiler_start_time;
-            if (is_recompile) {
-                jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
-            }
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
-        }
-        ct->reentrant_timing &= ~1ull;
+    else {
+        finish_params(result_m.getModuleUnlocked(), params, sharedmodules);
     }
-    JL_GC_POP();
-    return codeinst;
+    emittedmodules[codeinst] = std::move(result_m);
 }
 
+
 extern "C" JL_DLLEXPORT_CODEGEN
-void jl_generate_fptr_for_oc_wrapper_impl(jl_code_instance_t *oc_wrap)
+int jl_compile_codeinst_impl(jl_code_instance_t *ci)
 {
-    if (jl_atomic_load_relaxed(&oc_wrap->invoke) != NULL) {
-        return;
-    }
-    JL_LOCK(&jl_codegen_lock);
-    if (jl_atomic_load_relaxed(&oc_wrap->invoke) == NULL) {
-        _jl_compile_codeinst(oc_wrap, NULL, 1, *jl_ExecutionEngine->getContext(), 0);
+    int newly_compiled = 0;
+    if (!jl_is_compiled_codeinst(ci)) {
+        ++SpecFPtrCount;
+        uint64_t start = jl_typeinf_timing_begin();
+        jl_compile_codeinst_now(ci);
+        jl_typeinf_timing_end(start, 0);
+        newly_compiled = 1;
     }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
+    return newly_compiled;
 }
 
 extern "C" JL_DLLEXPORT_CODEGEN
@@ -540,31 +894,44 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
-    JL_LOCK(&jl_codegen_lock);
-    if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
-        jl_code_info_t *src = NULL;
-        JL_GC_PUSH1(&src);
-        jl_method_t *def = unspec->def->def.method;
-        if (jl_is_method(def)) {
-            src = (jl_code_info_t*)def->source;
-            if (src && (jl_value_t*)src != jl_nothing)
-                src = jl_uncompress_ir(def, NULL, (jl_value_t*)src);
-        }
-        else {
-            src = (jl_code_info_t*)jl_atomic_load_relaxed(&unspec->def->uninferred);
-            assert(src);
-        }
-        if (src) {
+    jl_code_info_t *src = NULL;
+    JL_GC_PUSH1(&src);
+    jl_method_t *def = jl_get_ci_mi(unspec)->def.method;
+    if (jl_is_method(def)) {
+        src = (jl_code_info_t*)def->source;
+        if (src && (jl_value_t*)src != jl_nothing)
+            src = jl_uncompress_ir(def, NULL, (jl_value_t*)src);
+    }
+    else {
+        jl_method_instance_t *mi = jl_get_ci_mi(unspec);
+        jl_code_instance_t *uninferred = jl_cached_uninferred(jl_atomic_load_relaxed(&mi->cache), 1);
+        assert(uninferred);
+        src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred);
+        assert(src);
+    }
+    if (src) {
+        // TODO: first prepare recursive_compile_graph(unspec, src) before taking this lock to avoid recursion?
+        JL_LOCK(&jitlock); // TODO: use a better lock
+        if (!jl_is_compiled_codeinst(unspec)) {
             assert(jl_is_code_info(src));
             ++UnspecFPtrCount;
-            _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext(), 0);
+            jl_svec_t *edges = (jl_svec_t*)src->edges;
+            if (jl_is_svec(edges)) {
+                jl_atomic_store_release(&unspec->edges, edges); // n.b. this assumes the field was always empty svec(), which is not entirely true
+                jl_gc_wb(unspec, edges);
+            }
+            jl_debuginfo_t *debuginfo = src->debuginfo;
+            jl_atomic_store_release(&unspec->debuginfo, debuginfo); // n.b. this assumes the field was previously NULL, which is not entirely true
+            jl_gc_wb(unspec, debuginfo);
+            jl_emit_codeinst_to_jit(unspec, src);
+            jl_compile_codeinst_now(unspec);
         }
-        jl_callptr_t null = nullptr;
-        // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
-        jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
-        JL_GC_POP();
+        JL_UNLOCK(&jitlock); // Might GC
     }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
+    JL_GC_POP();
+    jl_callptr_t null = nullptr;
+    // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
+    jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
     if (timed) {
         if (measure_compile_time_enabled) {
             auto end = jl_hrtime();
@@ -581,116 +948,86 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
         char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
 {
     // printing via disassembly
-    jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
+    jl_code_instance_t *codeinst = jl_compile_method_internal(mi, world);
     if (codeinst) {
         uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
-        if (getwrapper)
-            return jl_dump_fptr_asm(fptr, emit_mc, asm_variant, debuginfo, binary);
         uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-        if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
-            // normally we prevent native code from being generated for these functions,
-            // (using sentinel value `1` instead)
-            // so create an exception here so we can print pretty our lies
-            auto ct = jl_current_task;
-            bool timed = (ct->reentrant_timing & 1) == 0;
-            if (timed)
-                ct->reentrant_timing |= 1;
-            uint64_t compiler_start_time = 0;
-            uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-            if (measure_compile_time_enabled)
-                compiler_start_time = jl_hrtime();
-            JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
-            specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-            if (specfptr == 0) {
-                jl_code_info_t *src = jl_type_infer(mi, world, 0);
-                JL_GC_PUSH1(&src);
-                jl_method_t *def = mi->def.method;
-                if (jl_is_method(def)) {
-                    if (!src) {
-                        // TODO: jl_code_for_staged can throw
-                        src = def->generator ? jl_code_for_staged(mi, world) : (jl_code_info_t*)def->source;
-                    }
-                    if (src && (jl_value_t*)src != jl_nothing)
-                        src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
-                }
-                fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
-                specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-                if (src && jl_is_code_info(src)) {
-                    if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
-                        fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext(), 0);
-                        specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-                    }
-                }
-                JL_GC_POP();
-            }
-            JL_UNLOCK(&jl_codegen_lock);
-            if (timed) {
-                if (measure_compile_time_enabled) {
-                    auto end = jl_hrtime();
-                    jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
-                }
-                ct->reentrant_timing &= ~1ull;
-            }
-        }
+        if (getwrapper || specfptr == 0)
+            specfptr = fptr;
         if (specfptr != 0)
             return jl_dump_fptr_asm(specfptr, emit_mc, asm_variant, debuginfo, binary);
     }
-
-    // whatever, that didn't work - use the assembler output instead
-    jl_llvmf_dump_t llvmf_dump;
-    jl_get_llvmf_defn(&llvmf_dump, mi, world, getwrapper, true, jl_default_cgparams);
-    if (!llvmf_dump.F)
-        return jl_an_empty_string;
-    return jl_dump_function_asm(&llvmf_dump, emit_mc, asm_variant, debuginfo, binary, false);
+    return jl_an_empty_string;
 }
 
+#if JL_LLVM_VERSION >= 180000
+CodeGenOptLevel CodeGenOptLevelFor(int optlevel)
+{
+#ifdef DISABLE_OPT
+    return CodeGenOptLevel::None;
+#else
+    return optlevel == 0 ? CodeGenOptLevel::None :
+        optlevel == 1 ? CodeGenOptLevel::Less :
+        optlevel == 2 ? CodeGenOptLevel::Default :
+        CodeGenOptLevel::Aggressive;
+#endif
+}
+#else
 CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
 {
 #ifdef DISABLE_OPT
     return CodeGenOpt::None;
 #else
-    return optlevel < 2 ? CodeGenOpt::None :
+    return optlevel == 0 ? CodeGenOpt::None :
+        optlevel == 1 ? CodeGenOpt::Less :
         optlevel == 2 ? CodeGenOpt::Default :
         CodeGenOpt::Aggressive;
 #endif
 }
+#endif
 
 static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT
 {
     return std::distance(F.begin(), F.end());
 }
 
-void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) {
-    ++ModulesOptimized;
-    size_t optlevel = SIZE_MAX;
-    TSM.withModuleDo([&](Module &M) {
-        if (jl_generating_output()) {
-            optlevel = 0;
-        }
-        else {
-            optlevel = std::max(static_cast<int>(jl_options.opt_level), 0);
-            size_t optlevel_min = std::max(static_cast<int>(jl_options.opt_level_min), 0);
-            for (auto &F : M.functions()) {
-                if (!F.getBasicBlockList().empty()) {
+static constexpr size_t N_optlevels = 4;
+
+static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
+    TSM.withModuleDo([](Module &M) JL_NOTSAFEPOINT {
+        size_t opt_level = std::max(static_cast<int>(jl_options.opt_level), 0);
+        do {
+            if (jl_generating_output()) {
+                opt_level = 0;
+                break;
+            }
+            size_t opt_level_min = std::max(static_cast<int>(jl_options.opt_level_min), 0);
+            for (auto &F : M) {
+                if (!F.isDeclaration()) {
                     Attribute attr = F.getFnAttribute("julia-optimization-level");
                     StringRef val = attr.getValueAsString();
                     if (val != "") {
                         size_t ol = (size_t)val[0] - '0';
-                        if (ol < optlevel)
-                            optlevel = ol;
+                        if (ol < opt_level)
+                            opt_level = ol;
                     }
                 }
             }
-            optlevel = std::min(std::max(optlevel, optlevel_min), this->count);
-        }
+            if (opt_level < opt_level_min)
+                opt_level = opt_level_min;
+        } while (0);
+        // currently -O3 is max
+        opt_level = std::min(opt_level, N_optlevels - 1);
+        M.addModuleFlag(Module::Warning, "julia.optlevel", opt_level);
     });
-    assert(optlevel != SIZE_MAX && "Failed to select a valid optimization level!");
-    this->optimizers[optlevel]->OptimizeLayer.emit(std::move(R), std::move(TSM));
+    return TSM;
+}
+static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+    return selectOptLevel(std::move(TSM));
 }
 
 void jl_register_jit_object(const object::ObjectFile &debugObj,
-                            std::function<uint64_t(const StringRef &)> getLoadAddress,
-                            std::function<void *(void *)> lookupWriteAddress) JL_NOTSAFEPOINT;
+                            std::function<uint64_t(const StringRef &)> getLoadAddress);
 
 namespace {
 
@@ -705,30 +1042,27 @@ struct JITObjectInfo {
 class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
     std::mutex PluginMutex;
     std::map<MaterializationResponsibility *, std::unique_ptr<JITObjectInfo>> PendingObjs;
-    // Resources from distinct MaterializationResponsibilitys can get merged
-    // after emission, so we can have multiple debug objects per resource key.
-    std::map<ResourceKey, std::vector<std::unique_ptr<JITObjectInfo>>> RegisteredObjs;
 
 public:
     void notifyMaterializing(MaterializationResponsibility &MR, jitlink::LinkGraph &G,
                              jitlink::JITLinkContext &Ctx,
                              MemoryBufferRef InputObject) override
     {
-        // Keeping around a full copy of the input object file (and re-parsing it) is
-        // wasteful, but for now, this lets us reuse the existing debuginfo.cpp code.
-        // Should look into just directly pulling out all the information required in
-        // a JITLink pass and just keeping the required tables/DWARF sections around
-        // (perhaps using the LLVM DebuggerSupportPlugin as a reference).
         auto NewBuffer =
             MemoryBuffer::getMemBufferCopy(InputObject.getBuffer(), G.getName());
+        // Re-parsing the InputObject is wasteful, but for now, this lets us
+        // reuse the existing debuginfo.cpp code. Should look into just
+        // directly pulling out all the information required in a JITLink pass
+        // and just keeping the required tables/DWARF sections around (perhaps
+        // using the LLVM DebuggerSupportPlugin as a reference).
         auto NewObj =
             cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
 
         {
             std::lock_guard<std::mutex> lock(PluginMutex);
             assert(PendingObjs.count(&MR) == 0);
-            PendingObjs[&MR] = std::unique_ptr<JITObjectInfo>(
-                new JITObjectInfo{std::move(NewBuffer), std::move(NewObj), {}});
+            PendingObjs[&MR] = std::unique_ptr<JITObjectInfo>(new JITObjectInfo{
+                std::move(NewBuffer), std::move(NewObj), {}});
         }
     }
 
@@ -753,14 +1087,9 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
                 return result->second;
             };
 
-            jl_register_jit_object(*NewInfo->Object, getLoadAddress, nullptr);
-        }
-
-        cantFail(MR.withResourceKeyDo([&](ResourceKey K) {
-            std::lock_guard<std::mutex> lock(PluginMutex);
-            RegisteredObjs[K].push_back(std::move(PendingObjs[&MR]));
+            jl_register_jit_object(*NewInfo->Object, getLoadAddress);
             PendingObjs.erase(&MR);
-        }));
+        }
 
         return Error::success();
     }
@@ -772,24 +1101,13 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
         return Error::success();
     }
 
-    Error notifyRemovingResources(ResourceKey K) override
+    Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override
     {
-        std::lock_guard<std::mutex> lock(PluginMutex);
-        RegisteredObjs.erase(K);
-        // TODO: If we ever unload code, need to notify debuginfo registry.
         return Error::success();
     }
 
-    void notifyTransferringResources(ResourceKey DstKey, ResourceKey SrcKey) override
-    {
-        std::lock_guard<std::mutex> lock(PluginMutex);
-        auto SrcIt = RegisteredObjs.find(SrcKey);
-        if (SrcIt != RegisteredObjs.end()) {
-            for (std::unique_ptr<JITObjectInfo> &Info : SrcIt->second)
-                RegisteredObjs[DstKey].push_back(std::move(Info));
-            RegisteredObjs.erase(SrcIt);
-        }
-    }
+    void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey,
+                                     orc::ResourceKey SrcKey) override {}
 
     void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &,
                           jitlink::PassConfiguration &PassConfig) override
@@ -829,20 +1147,21 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
 
 class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
 private:
-    std::atomic<size_t> &total_size;
+    _Atomic(size_t)* jit_bytes_size;
 
 public:
 
-    JLMemoryUsagePlugin(std::atomic<size_t> &total_size)
-        : total_size(total_size) {}
+    JLMemoryUsagePlugin(_Atomic(size_t)* jit_bytes_size)
+        : jit_bytes_size(jit_bytes_size) {}
 
     Error notifyFailed(orc::MaterializationResponsibility &MR) override {
         return Error::success();
     }
-    Error notifyRemovingResources(orc::ResourceKey K) override {
+    Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override
+    {
         return Error::success();
     }
-    void notifyTransferringResources(orc::ResourceKey DstKey,
+    void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey,
                                      orc::ResourceKey SrcKey) override {}
 
     void modifyPassConfig(orc::MaterializationResponsibility &,
@@ -860,7 +1179,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
                 for (auto block : section.blocks()) {
                     secsize += block->getSize();
                 }
-                if ((section.getMemProt() & jitlink::MemProt::Exec) == jitlink::MemProt::None) {
+                if ((section.getMemProt() & orc::MemProt::Exec) == orc::MemProt::None) {
                     data_size += secsize;
                 } else {
                     code_size += secsize;
@@ -869,7 +1188,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
             }
             (void) code_size;
             (void) data_size;
-            this->total_size.fetch_add(graph_size, std::memory_order_relaxed);
+            jl_atomic_fetch_add_relaxed(this->jit_bytes_size, graph_size);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, data_size);
@@ -889,16 +1208,6 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
 #pragma clang diagnostic ignored "-Wunused-function"
 #endif
 
-// TODO: Port our memory management optimisations to JITLink instead of using the
-// default InProcessMemoryManager.
-std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() {
-#if JL_LLVM_VERSION < 150000
-    return cantFail(jitlink::InProcessMemoryManager::Create());
-#else
-    return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>());
-#endif
-}
-
 #ifdef _COMPILER_CLANG_
 #pragma clang diagnostic pop
 #endif
@@ -921,7 +1230,8 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar {
     }
 };
 
-RTDyldMemoryManager* createRTDyldMemoryManager(void);
+RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT;
+std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() JL_NOTSAFEPOINT;
 
 // A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr
 class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
@@ -930,7 +1240,10 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
 
 public:
     ForwardingMemoryManager(std::shared_ptr<RuntimeDyld::MemoryManager> MemMgr) : MemMgr(MemMgr) {}
-    virtual ~ForwardingMemoryManager() = default;
+    ForwardingMemoryManager(ForwardingMemoryManager &) = delete;
+    virtual ~ForwardingMemoryManager() {
+        assert(!MemMgr);
+    }
     virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                      unsigned SectionID,
                                      StringRef SectionName) override {
@@ -942,11 +1255,9 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
                                      bool IsReadOnly) override {
         return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName, IsReadOnly);
     }
-    virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
-                                        uintptr_t RODataSize,
-                                        uint32_t RODataAlign,
-                                        uintptr_t RWDataSize,
-                                        uint32_t RWDataAlign) override {
+    virtual void reserveAllocationSpace(uintptr_t CodeSize, Align CodeAlign,
+                                        uintptr_t RODataSize, Align RODataAlign,
+                                        uintptr_t RWDataSize, Align RWDataAlign) override {
         return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign);
     }
     virtual bool needsToReserveAllocationSpace() override {
@@ -956,11 +1267,13 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
                                   size_t Size) override {
         return MemMgr->registerEHFrames(Addr, LoadAddr, Size);
     }
-    virtual void deregisterEHFrames() override {
-        return MemMgr->deregisterEHFrames();
-    }
+    virtual void deregisterEHFrames() override { /* not actually supported or allowed with this */ }
     virtual bool finalizeMemory(std::string *ErrMsg = nullptr) override {
-        return MemMgr->finalizeMemory(ErrMsg);
+        bool b = false;
+        if (MemMgr.use_count() == 2)
+            b = MemMgr->finalizeMemory(ErrMsg);
+        MemMgr.reset();
+        return b;
     }
     virtual void notifyObjectLoaded(RuntimeDyld &RTDyld,
                                     const object::ObjectFile &Obj) override {
@@ -968,33 +1281,12 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
     }
 };
 
-
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-void *lookupWriteAddressFor(RTDyldMemoryManager *MemMgr, void *rt_addr);
-#endif
-
-void registerRTDyldJITObject(const object::ObjectFile &Object,
-                             const RuntimeDyld::LoadedObjectInfo &L,
-                             const std::shared_ptr<RTDyldMemoryManager> &MemMgr)
+#ifndef JL_USE_JITLINK
+static void registerRTDyldJITObject(orc::MaterializationResponsibility &MR,
+                                    const object::ObjectFile &Object,
+                                    const RuntimeDyld::LoadedObjectInfo &L)
 {
-    auto SavedObject = L.getObjectForDebug(Object).takeBinary();
-    // If the debug object is unavailable, save (a copy of) the original object
-    // for our backtraces.
-    // This copy seems unfortunate, but there doesn't seem to be a way to take
-    // ownership of the original buffer.
-    if (!SavedObject.first) {
-        auto NewBuffer =
-            MemoryBuffer::getMemBufferCopy(Object.getData(), Object.getFileName());
-        auto NewObj =
-            cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
-        SavedObject = std::make_pair(std::move(NewObj), std::move(NewBuffer));
-    }
-    const object::ObjectFile *DebugObj = SavedObject.first.release();
-    SavedObject.second.release();
-
     StringMap<object::SectionRef> loadedSections;
-    // Use the original Object, not the DebugObject, as this is used for the
-    // RuntimeDyld::LoadedObjectInfo lookup.
     for (const object::SectionRef &lSection : Object.sections()) {
         auto sName = lSection.getName();
         if (sName) {
@@ -1011,14 +1303,11 @@ void registerRTDyldJITObject(const object::ObjectFile &Object,
         return L.getSectionLoadAddress(search->second);
     };
 
-    jl_register_jit_object(*DebugObj, getLoadAddress,
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-        [MemMgr](void *p) { return lookupWriteAddressFor(MemMgr.get(), p); }
-#else
-        nullptr
-#endif
-    );
+    auto DebugObject = L.getObjectForDebug(Object); // ELF requires us to make a copy to mutate the header with the section load addresses. On other platforms this is a no-op.
+    jl_register_jit_object(DebugObject.getBinary() ? *DebugObject.getBinary() : Object, getLoadAddress);
 }
+#endif
+
 namespace {
     static std::unique_ptr<TargetMachine> createTargetMachine() JL_NOTSAFEPOINT {
         TargetOptions options = TargetOptions();
@@ -1037,9 +1326,19 @@ namespace {
 #if defined(MSAN_EMUTLS_WORKAROUND)
         options.EmulatedTLS = true;
         options.ExplicitEmulatedTLS = true;
+#endif
+#if defined(_CPU_RISCV64_)
+        // we set these manually to avoid LLVM defaulting to soft-float
+#if defined(__riscv_float_abi_double)
+        options.MCOptions.ABIName = "lp64d";
+#elif defined(__riscv_float_abi_single)
+        options.MCOptions.ABIName = "lp64f";
+#else
+        options.MCOptions.ABIName = "lp64";
+#endif
 #endif
         uint32_t target_flags = 0;
-        auto target = jl_get_llvm_target(imaging_default(), target_flags);
+        auto target = jl_get_llvm_target(jl_options.cpu_target, jl_generating_output(), target_flags);
         auto &TheCPU = target.first;
         SmallVector<std::string, 10> targetFeatures(target.second.begin(), target.second.end());
         std::string errorstr;
@@ -1071,7 +1370,7 @@ namespace {
             FeaturesStr = Features.getString();
         }
         // Allocate a target...
-        Optional<CodeModel::Model> codemodel =
+        std::optional<CodeModel::Model> codemodel =
 #ifdef _P64
             // Make sure we are using the large code model on 64bit
             // Let LLVM pick a default suitable for jitting on 32bit
@@ -1079,40 +1378,45 @@ namespace {
 #else
             None;
 #endif
+        if (TheTriple.isAArch64())
+            codemodel = CodeModel::Small;
+#if JL_LLVM_VERSION < 200000
+        else if (TheTriple.isRISCV()) {
+            // RISC-V only supports large code model from LLVM 20
+            // https://github.com/llvm/llvm-project/pull/70308
+            codemodel = CodeModel::Medium;
+        }
+#endif
+        // Generate simpler code for JIT
+        Reloc::Model relocmodel = Reloc::Static;
+        if (TheTriple.isRISCV()) {
+            // until large code model is supported, use PIC for RISC-V
+            // https://github.com/llvm/llvm-project/issues/106203
+            relocmodel = Reloc::PIC_;
+        }
         auto optlevel = CodeGenOptLevelFor(jl_options.opt_level);
         auto TM = TheTarget->createTargetMachine(
                 TheTriple.getTriple(), TheCPU, FeaturesStr,
                 options,
-                Reloc::Static, // Generate simpler code for JIT
+                relocmodel,
                 codemodel,
                 optlevel,
                 true // JIT
                 );
         assert(TM && "Failed to select target machine -"
                      " Is the LLVM backend for this CPU enabled?");
-        if (!TheTriple.isARM() && !TheTriple.isPPC64()) {
-            // FastISel seems to be buggy for ARM. Ref #13321
-            if (jl_options.opt_level < 2)
-                TM->setFastISel(true);
-        }
+        fixupTM(*TM);
         return std::unique_ptr<TargetMachine>(TM);
     }
-} // namespace
-
-namespace {
 
-#ifndef JL_USE_NEW_PM
-    typedef legacy::PassManager PassManager;
-#else
     typedef NewPM PassManager;
-#endif
 
     orc::JITTargetMachineBuilder createJTMBFromTM(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT {
         return orc::JITTargetMachineBuilder(TM.getTargetTriple())
             .setCPU(TM.getTargetCPU().str())
             .setFeatures(TM.getTargetFeatureString())
             .setOptions(TM.Options)
-            .setRelocationModel(Reloc::Static)
+            .setRelocationModel(TM.getRelocationModel())
             .setCodeModel(TM.getCodeModel())
             .setCodeGenOptLevel(CodeGenOptLevelFor(optlevel));
     }
@@ -1124,212 +1428,517 @@ namespace {
             : JTMB(createJTMBFromTM(TM, optlevel)) {}
 
         std::unique_ptr<TargetMachine> operator()() JL_NOTSAFEPOINT {
-            return cantFail(JTMB.createTargetMachine());
+            auto TM = cantFail(JTMB.createTargetMachine());
+            fixupTM(*TM);
+            return TM;
         }
     };
 
-#ifndef JL_USE_NEW_PM
-    struct PMCreator {
-        std::unique_ptr<TargetMachine> TM;
-        int optlevel;
-        PMCreator(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
-            : TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
-        // overload for newpm compatibility
-        PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &) JL_NOTSAFEPOINT
-            : PMCreator(TM, optlevel) {}
-        PMCreator(const PMCreator &other) JL_NOTSAFEPOINT
-            : PMCreator(*other.TM, other.optlevel) {}
-        PMCreator(PMCreator &&other) JL_NOTSAFEPOINT
-            : TM(std::move(other.TM)), optlevel(other.optlevel) {}
-        friend void swap(PMCreator &self, PMCreator &other) JL_NOTSAFEPOINT {
-            using std::swap;
-            swap(self.TM, other.TM);
-            swap(self.optlevel, other.optlevel);
-        }
-        PMCreator &operator=(PMCreator other) JL_NOTSAFEPOINT {
-            swap(*this, other);
-            return *this;
-        }
-        auto operator()() JL_NOTSAFEPOINT {
-            auto PM = std::make_unique<legacy::PassManager>();
-            addTargetPasses(PM.get(), TM->getTargetTriple(), TM->getTargetIRAnalysis());
-            addOptimizationPasses(PM.get(), optlevel);
-            addMachinePasses(PM.get(), optlevel);
-            return PM;
-        }
-    };
-#else
     struct PMCreator {
         orc::JITTargetMachineBuilder JTMB;
         OptimizationLevel O;
-        std::vector<std::function<void()>> &printers;
-        PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
-            : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers) {}
+        SmallVector<std::function<void()>, 0> &printers;
+        std::mutex &llvm_printing_mutex;
+        PMCreator(TargetMachine &TM, int optlevel, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT
+            : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers), llvm_printing_mutex(llvm_printing_mutex) {}
 
         auto operator()() JL_NOTSAFEPOINT {
-            auto NPM = std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
-            printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
-                NPM->printTimers();
-            });
+            auto TM = cantFail(JTMB.createTargetMachine());
+            fixupTM(*TM);
+            auto NPM = std::make_unique<NewPM>(std::move(TM), O, OptimizationOptions::defaults());
+            // TODO this needs to be locked, as different resource pools may add to the printer vector at the same time
+            {
+                std::lock_guard<std::mutex> lock(llvm_printing_mutex);
+                printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
+                    NPM->printTimers();
+                });
+            }
             return NPM;
         }
     };
-#endif
 
-    struct OptimizerT {
-        OptimizerT(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
-            : optlevel(optlevel), PMs(PMCreator(TM, optlevel, printers)) {}
-        OptimizerT(OptimizerT&) JL_NOTSAFEPOINT = delete;
-        OptimizerT(OptimizerT&&) JL_NOTSAFEPOINT = default;
+    template<size_t N>
+    struct sizedOptimizerT {
+        sizedOptimizerT(TargetMachine &TM, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT {
+            for (size_t i = 0; i < N; i++) {
+                PMs[i] = std::make_unique<JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>>>(PMCreator(TM, i, printers, llvm_printing_mutex));
+            }
+        }
 
-        OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+        orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
             TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
+                auto PoolIdx = cast<ConstantInt>(cast<ConstantAsMetadata>(M.getModuleFlag("julia.optlevel"))->getValue())->getZExtValue();
+                assert(PoolIdx < N && "Invalid optimization pool index");
+
                 uint64_t start_time = 0;
-                std::stringstream before_stats_ss;
-                bool should_dump_opt_stats = false;
+
+                struct Stat {
+                    std::string name;
+                    uint64_t insts;
+                    uint64_t bbs;
+
+                    void dump(ios_t *stream) JL_NOTSAFEPOINT {
+                        ios_printf(stream, "    \"%s\":\n", name.c_str());
+                        ios_printf(stream, "        instructions: %u\n", insts);
+                        ios_printf(stream, "        basicblocks: %zd\n", bbs);
+                    }
+
+                    Stat(Function &F) JL_NOTSAFEPOINT : name(F.getName().str()), insts(F.getInstructionCount()), bbs(countBasicBlocks(F)) {}
+
+                    ~Stat() JL_NOTSAFEPOINT = default;
+                };
+                SmallVector<Stat, 8> before_stats;
                 {
-                    auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream();
-                    if (stream) {
-                        // Ensures that we don't _just_ write the second part of the YAML object
-                        should_dump_opt_stats = true;
-                        // We use a stringstream to later atomically write a YAML object
-                        // without the need to hold the stream lock over the optimization
-                        // Print LLVM function statistics _before_ optimization
-                        // Print all the information about this invocation as a YAML object
-                        before_stats_ss << "- \n";
-                        // We print the name and some statistics for each function in the module, both
-                        // before optimization and again afterwards.
-                        before_stats_ss << "  before: \n";
+                    if (*jl_ExecutionEngine->get_dump_llvm_opt_stream()) {
                         for (auto &F : M.functions()) {
-                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                            if (F.isDeclaration() || F.getName().starts_with("jfptr_")) {
                                 continue;
                             }
                             // Each function is printed as a YAML object with several attributes
-                            before_stats_ss << "    \"" << F.getName().str().c_str() << "\":\n";
-                            before_stats_ss << "        instructions: " << F.getInstructionCount() << "\n";
-                            before_stats_ss << "        basicblocks: " << countBasicBlocks(F) << "\n";
+                            before_stats.emplace_back(F);
                         }
 
                         start_time = jl_hrtime();
                     }
                 }
 
-                JL_TIMING(LLVM_OPT, LLVM_OPT);
-
-                //Run the optimization
-                assert(!verifyModule(M, &errs()));
-                (***PMs).run(M);
-                assert(!verifyModule(M, &errs()));
+                {
+                    JL_TIMING(LLVM_JIT, JIT_Opt);
+                    //Run the optimization
+                    (****PMs[PoolIdx]).run(M);
+                    assert(!verifyLLVMIR(M));
+                }
 
-                uint64_t end_time = 0;
                 {
-                    auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream();
-                    if (stream && should_dump_opt_stats) {
-                        ios_printf(stream, "%s", before_stats_ss.str().c_str());
-                        end_time = jl_hrtime();
+                    // Print optimization statistics as a YAML object
+                    // Looks like:
+                    // -
+                    //   before:
+                    //     "foo":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    //    "bar":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    //   time_ns: uint64
+                    //   optlevel: int
+                    //   after:
+                    //     "foo":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    //    "bar":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    if (auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream()) {
+                        uint64_t end_time = jl_hrtime();
+                        ios_printf(stream, "- \n");
+
+                        // Print LLVM function statistic _before_ optimization
+                        ios_printf(stream, "  before: \n");
+                        for (auto &s : before_stats) {
+                            s.dump(stream);
+                        }
                         ios_printf(stream, "  time_ns: %" PRIu64 "\n", end_time - start_time);
-                        ios_printf(stream, "  optlevel: %d\n", optlevel);
+                        ios_printf(stream, "  optlevel: %d\n", PoolIdx);
 
                         // Print LLVM function statistics _after_ optimization
                         ios_printf(stream, "  after: \n");
                         for (auto &F : M.functions()) {
-                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                            if (F.isDeclaration() || F.getName().starts_with("jfptr_")) {
                                 continue;
                             }
-                            ios_printf(stream, "    \"%s\":\n", F.getName().str().c_str());
-                            ios_printf(stream, "        instructions: %u\n", F.getInstructionCount());
-                            ios_printf(stream, "        basicblocks: %zd\n", countBasicBlocks(F));
+                            Stat(F).dump(stream);
                         }
                     }
                 }
+                ++ModulesOptimized;
+                switch (PoolIdx) {
+                    case 0:
+                        ++OptO0;
+                        break;
+                    case 1:
+                        ++OptO1;
+                        break;
+                    case 2:
+                        ++OptO2;
+                        break;
+                    case 3:
+                        ++OptO3;
+                        break;
+                    default:
+                        // Change this if we ever gain other optlevels
+                        llvm_unreachable("optlevel is between 0 and 3!");
+                }
             });
-            switch (optlevel) {
-                case 0:
-                    ++OptO0;
-                    break;
-                case 1:
-                    ++OptO1;
-                    break;
-                case 2:
-                    ++OptO2;
-                    break;
-                case 3:
-                    ++OptO3;
-                    break;
-                default:
-                    llvm_unreachable("optlevel is between 0 and 3!");
-            }
-            return Expected<orc::ThreadSafeModule>{std::move(TSM)};
+            return TSM;
         }
     private:
-        int optlevel;
-        JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>> PMs;
+        std::array<std::unique_ptr<JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>>>, N> PMs;
     };
 
+    // shim for converting a unique_ptr to a TransformFunction to a TransformFunction
+    template <typename T>
+    struct IRTransformRef {
+        IRTransformRef(T &transform) : transform(transform) {}
+        OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+            return transform(std::move(TSM), R);
+        }
+    private:
+        T &transform;
+    };
+
+    template<size_t N>
     struct CompilerT : orc::IRCompileLayer::IRCompiler {
 
-        CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
-            : orc::IRCompileLayer::IRCompiler(MO), TMs(TMCreator(TM, optlevel)) {}
+        CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM) JL_NOTSAFEPOINT
+            : orc::IRCompileLayer::IRCompiler(MO) {
+            for (size_t i = 0; i < N; ++i) {
+                TMs[i] = std::make_unique<JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>>>(TMCreator(TM, i));
+            }
+        }
 
         Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) override {
-            return orc::SimpleCompiler(***TMs)(M);
+            JL_TIMING(LLVM_JIT, JIT_Compile);
+            size_t PoolIdx;
+            if (auto opt_level = M.getModuleFlag("julia.optlevel")) {
+                PoolIdx = cast<ConstantInt>(cast<ConstantAsMetadata>(opt_level)->getValue())->getZExtValue();
+            }
+            else {
+                PoolIdx = jl_options.opt_level;
+            }
+            assert(PoolIdx < N && "Invalid optimization level for compiler!");
+
+            auto TM = **TMs[PoolIdx];
+            if (M.getDataLayout().isDefault())
+                M.setDataLayout((*TM)->createDataLayout());
+
+            SmallVector<char, 0> ObjBufferSV;
+            {
+                raw_svector_ostream ObjStream(ObjBufferSV);
+                legacy::PassManager PM;
+                MCContext *Ctx;
+                if ((*TM)->addPassesToEmitMC(PM, Ctx, ObjStream))
+                    return make_error<StringError>("Target does not support MC emission",
+                                                   inconvertibleErrorCode());
+                PM.run(M);
+            }
+
+            // OrcJIT requires that all modules / files have unique names:
+            // https://llvm.org/doxygen/namespacellvm_1_1orc.html#a1f5a1bc60c220cdccbab0f26b2a425e1
+            auto name = (M.getModuleIdentifier() + "-jitted-" +
+                         Twine(jl_atomic_fetch_add_relaxed(&bufcounter, 1)))
+                            .str();
+            return std::make_unique<SmallVectorMemoryBuffer>(std::move(ObjBufferSV), name,
+                                                             false);
         }
 
-        JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>> TMs;
+        std::array<std::unique_ptr<JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>>>, N> TMs;
+        _Atomic(size_t) bufcounter{0};
     };
 }
 
+struct JuliaOJIT::OptimizerT {
+    OptimizerT(TargetMachine &TM, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex)
+        : opt(TM, printers, llvm_printing_mutex) {}
+    orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
+        return opt(std::move(TSM));
+    }
+    OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+        return opt(std::move(TSM));
+    }
+private:
+    struct sizedOptimizerT<N_optlevels> opt;
+};
+
+struct JuliaOJIT::JITPointersT {
+    JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT
+        : SharedBytes(SharedBytes), Lock(Lock) {}
+
+    orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
+        TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
+            std::lock_guard<std::mutex> locked(Lock);
+            for (auto &GV : make_early_inc_range(M.globals())) {
+                if (auto *Shared = getSharedBytes(GV)) {
+                    ++InternedGlobals;
+                    GV.replaceAllUsesWith(Shared);
+                    GV.eraseFromParent();
+                }
+            }
+
+            // Windows needs some inline asm to help
+            // build unwind tables, if they have any functions to decorate
+            if (!M.functions().empty())
+                jl_decorate_module(M);
+        });
+        return TSM;
+    }
+    Expected<orc::ThreadSafeModule> operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+        return operator()(std::move(TSM));
+    }
+
+private:
+    // optimize memory by turning long strings into memoized copies, instead of
+    // making a copy per object file of output.
+    // we memoize them using a StringSet with a custom-alignment allocator
+    // to ensure they are properly aligned
+    Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT {
+        // We could probably technically get away with
+        // interning even external linkage globals,
+        // as long as they have global unnamedaddr,
+        // but currently we shouldn't be emitting those
+        // except in imaging mode, and we don't want to
+        // do this optimization there.
+        if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) {
+            return nullptr;
+        }
+        if (!GV.hasInitializer()) {
+            return nullptr;
+        }
+        if (!GV.isConstant()) {
+            return nullptr;
+        }
+        auto CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer());
+        if (!CDS) {
+            return nullptr;
+        }
+        StringRef Data = CDS->getRawDataValues();
+        if (Data.size() < 16) {
+            // Cutoff, since we don't want to intern small strings
+            return nullptr;
+        }
+        Align Required = GV.getAlign().valueOrOne();
+        Align Preferred = MaxAlignedAlloc::alignment(Data.size());
+        if (Required > Preferred)
+            return nullptr;
+        StringRef Interned = SharedBytes.insert(Data).first->getKey();
+        assert(llvm::isAddrAligned(Preferred, Interned.data()));
+        return literal_static_pointer_val(Interned.data(), GV.getType());
+    }
+
+    SharedBytesT &SharedBytes;
+    std::mutex &Lock;
+};
+
+
+struct JuliaOJIT::DLSymOptimizer {
+    DLSymOptimizer(bool named) JL_NOTSAFEPOINT {
+        this->named = named;
+#define INIT_RUNTIME_LIBRARY(libname, handle) \
+        do { \
+            auto libidx = (uintptr_t) libname; \
+            if (libidx >= runtime_symbols.size()) { \
+                runtime_symbols.resize(libidx + 1); \
+            } \
+            runtime_symbols[libidx].first = handle; \
+        } while (0)
+
+        INIT_RUNTIME_LIBRARY(NULL, jl_RTLD_DEFAULT_handle);
+        INIT_RUNTIME_LIBRARY(JL_EXE_LIBNAME, jl_exe_handle);
+        INIT_RUNTIME_LIBRARY(JL_LIBJULIA_INTERNAL_DL_LIBNAME, jl_libjulia_internal_handle);
+        INIT_RUNTIME_LIBRARY(JL_LIBJULIA_DL_LIBNAME, jl_libjulia_handle);
+
+#undef INIT_RUNTIME_LIBRARY
+    }
+    ~DLSymOptimizer() JL_NOTSAFEPOINT = default;
+
+    void *lookup_symbol(void *libhandle, const char *fname) JL_NOTSAFEPOINT {
+        void *addr;
+        jl_dlsym(libhandle, fname, &addr, 0, 1);
+        return addr;
+    }
+
+    void *lookup(const char *libname, const char *fname) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER {
+        StringRef lib(libname);
+        StringRef f(fname);
+        std::lock_guard<std::mutex> lock(symbols_mutex);
+        auto uit = user_symbols.find(lib);
+        if (uit == user_symbols.end()) {
+            jl_task_t *ct = jl_current_task;
+            int8_t gc_state = jl_gc_unsafe_enter(ct->ptls);
+            void *handle = jl_get_library_(libname, 0);
+            jl_gc_unsafe_leave(ct->ptls, gc_state);
+            if (!handle)
+                return nullptr;
+            uit = user_symbols.insert(std::make_pair(lib, std::make_pair(handle, StringMap<void*>()))).first;
+        }
+        auto &symmap = uit->second.second;
+        auto it = symmap.find(f);
+        if (it != symmap.end()) {
+            return it->second;
+        }
+        void *handle = lookup_symbol(uit->second.first, fname);
+        symmap[f] = handle;
+        return handle;
+    }
+
+    void *lookup(uintptr_t libidx, const char *fname) JL_NOTSAFEPOINT {
+        std::lock_guard<std::mutex> lock(symbols_mutex);
+        runtime_symbols.resize(std::max(runtime_symbols.size(), libidx + 1));
+        auto it = runtime_symbols[libidx].second.find(fname);
+        if (it != runtime_symbols[libidx].second.end()) {
+            return it->second;
+        }
+        auto handle = lookup_symbol(runtime_symbols[libidx].first, fname);
+        runtime_symbols[libidx].second[fname] = handle;
+        return handle;
+    }
+
+    void operator()(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER {
+        for (auto &GV : M.globals()) {
+            auto Name = GV.getName();
+            if (Name.starts_with("jlplt") && Name.ends_with("got")) {
+                auto fname = GV.getAttribute("julia.fname").getValueAsString().str();
+                void *addr;
+                if (GV.hasAttribute("julia.libname")) {
+                    auto libname = GV.getAttribute("julia.libname").getValueAsString().str();
+                    addr = lookup(libname.data(), fname.data());
+                } else {
+                    assert(GV.hasAttribute("julia.libidx") && "PLT entry should have either libname or libidx attribute!");
+                    auto libidx = (uintptr_t)std::stoull(GV.getAttribute("julia.libidx").getValueAsString().str());
+                    addr = lookup(libidx, fname.data());
+                }
+                if (addr) {
+                    Function *Thunk = nullptr;
+                    if (!GV.isDeclaration()) {
+                        Thunk = cast<Function>(GV.getInitializer()->stripPointerCasts());
+                        assert(++Thunk->uses().begin() == Thunk->uses().end() && "Thunk should only have one use in PLT initializer!");
+                        assert(Thunk->hasLocalLinkage() && "Thunk should not have non-local linkage!");
+                    }
+                    else {
+                        GV.setLinkage(GlobalValue::PrivateLinkage);
+                    }
+                    auto init = ConstantExpr::getIntToPtr(ConstantInt::get(M.getDataLayout().getIntPtrType(M.getContext()), (uintptr_t)addr), GV.getValueType());
+                    if (named) {
+                        auto T = GV.getValueType();
+                        assert(T->isPointerTy());
+                        init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, GV.getName() + ".jit", init, &M);
+                    }
+                    GV.setInitializer(init);
+                    GV.setConstant(true);
+                    GV.setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+                    if (Thunk) {
+                        Thunk->eraseFromParent();
+                    }
+                }
+            }
+        }
+
+        for (auto &F : M) {
+            for (auto &BB : F) {
+                SmallVector<Instruction *, 0> to_delete;
+                for (auto &I : make_early_inc_range(BB)) {
+                    auto CI = dyn_cast<CallInst>(&I);
+                    if (!CI)
+                        continue;
+                    auto Callee = CI->getCalledFunction();
+                    if (!Callee || Callee->getName() != XSTR(jl_load_and_lookup))
+                        continue;
+                    // Long-winded way of extracting fname without needing a second copy in an attribute
+                    auto fname = cast<ConstantDataArray>(cast<GlobalVariable>(CI->getArgOperand(1)->stripPointerCasts())->getInitializer())->getAsCString();
+                    auto libarg = CI->getArgOperand(0)->stripPointerCasts();
+                    // Should only use in store and phi node
+                    // Note that this uses the raw output of codegen,
+                    // which is why we can assume this
+                    assert(++++CI->use_begin() == CI->use_end());
+                    void *addr;
+                    if (auto GV = dyn_cast<GlobalVariable>(libarg)) {
+                        // Can happen if the library is the empty string, just give up when that happens
+                        if (isa<ConstantAggregateZero>(GV->getInitializer()))
+                            continue;
+                        auto libname = cast<ConstantDataArray>(GV->getInitializer())->getAsCString();
+                        addr = lookup(libname.data(), fname.data());
+                    } else {
+                        // Can happen if we fail the compile time dlfind i.e when we try a symbol that doesn't exist in libc
+                        if (dyn_cast<ConstantPointerNull>(libarg))
+                            continue;
+                        assert(cast<ConstantExpr>(libarg)->getOpcode() == Instruction::IntToPtr && "libarg should be either a global variable or a integer index!");
+                        libarg = cast<ConstantExpr>(libarg)->getOperand(0);
+                        auto libidx = cast<ConstantInt>(libarg)->getZExtValue();
+                        addr = lookup(libidx, fname.data());
+                    }
+                    if (addr) {
+                        auto init = ConstantExpr::getIntToPtr(ConstantInt::get(M.getDataLayout().getIntPtrType(M.getContext()), (uintptr_t)addr), CI->getType());
+                        if (named) {
+                            auto T = CI->getType();
+                            assert(T->isPointerTy());
+                            init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, CI->getName() + ".jit", init, &M);
+                        }
+                        // DCE and SimplifyCFG will kill the branching structure around
+                        // the call, so we don't need to worry about removing everything
+                        for (auto user : make_early_inc_range(CI->users())) {
+                            if (auto SI = dyn_cast<StoreInst>(user)) {
+                                to_delete.push_back(SI);
+                            } else {
+                                auto PHI = cast<PHINode>(user);
+                                PHI->replaceAllUsesWith(init);
+                                to_delete.push_back(PHI);
+                            }
+                        }
+                        to_delete.push_back(CI);
+                    }
+                }
+                for (auto I : to_delete) {
+                    I->eraseFromParent();
+                }
+            }
+        }
+    }
+
+    std::mutex symbols_mutex;
+    StringMap<std::pair<void *, StringMap<void *>>> user_symbols;
+    SmallVector<std::pair<void *, StringMap<void *>>, 0> runtime_symbols;
+    bool named;
+};
+
+void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER {
+    JuliaOJIT::DLSymOptimizer(true)(M);
+}
+
+void fixupTM(TargetMachine &TM) {
+    auto TheTriple = TM.getTargetTriple();
+    if (jl_options.opt_level < 2) {
+        if (!TheTriple.isARM() && !TheTriple.isPPC64() && !TheTriple.isAArch64())
+            TM.setFastISel(true);
+        else    // FastISel seems to be buggy Ref #13321
+            TM.setFastISel(false);
+    }
+}
+
 llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
     // Mark our address spaces as non-integral
     auto jl_data_layout = TM.createDataLayout();
-    jl_data_layout.reset(jl_data_layout.getStringRepresentation() + "-ni:10:11:12:13");
+    jl_data_layout = DataLayout(jl_data_layout.getStringRepresentation() + "-ni:10:11:12:13");
     return jl_data_layout;
 }
 
-JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers)
-  : CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
-      std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
-    OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer,
-            llvm::orc::IRTransformLayer::TransformFunction(OptimizerT(TM, optlevel, PrintLLVMTimers))) {}
-
-#ifdef _COMPILER_ASAN_ENABLED_
-int64_t ___asan_globals_registered;
-#endif
-
 JuliaOJIT::JuliaOJIT()
   : TM(createTargetMachine()),
     DL(jl_create_datalayout(*TM)),
-    ES(cantFail(orc::SelfExecutorProcessControl::Create())),
+    ES(cantFail(orc::SelfExecutorProcessControl::Create(nullptr, std::make_unique<::JuliaTaskDispatcher>()))),
     GlobalJD(ES.createBareJITDylib("JuliaGlobals")),
     JD(ES.createBareJITDylib("JuliaOJIT")),
     ExternalJD(ES.createBareJITDylib("JuliaExternal")),
-    ContextPool([](){
-        auto ctx = std::make_unique<LLVMContext>();
-        return orc::ThreadSafeContext(std::move(ctx));
-    }),
+    DLSymOpt(std::make_unique<DLSymOptimizer>(false)),
 #ifdef JL_USE_JITLINK
     MemMgr(createJITLinkMemoryManager()),
     ObjectLayer(ES, *MemMgr),
 #else
     MemMgr(createRTDyldMemoryManager()),
-    ObjectLayer(
+    UnlockedObjectLayer(
             ES,
             [this]() {
                 std::unique_ptr<RuntimeDyld::MemoryManager> result(new ForwardingMemoryManager(MemMgr));
                 return result;
             }
         ),
+    ObjectLayer(UnlockedObjectLayer),
 #endif
-    LockLayer(ObjectLayer),
-    Pipelines{
-        std::make_unique<PipelineT>(LockLayer, *TM, 0, PrintLLVMTimers),
-        std::make_unique<PipelineT>(LockLayer, *TM, 1, PrintLLVMTimers),
-        std::make_unique<PipelineT>(LockLayer, *TM, 2, PrintLLVMTimers),
-        std::make_unique<PipelineT>(LockLayer, *TM, 3, PrintLLVMTimers),
-    },
-    OptSelLayer(Pipelines),
-    ExternalCompileLayer(ES, LockLayer,
-        std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM, 2))
+    CompileLayer(ES, ObjectLayer, std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)),
+    JITPointers(std::make_unique<JITPointersT>(SharedBytes, RLST_mutex)),
+    JITPointersLayer(ES, CompileLayer, IRTransformRef(*JITPointers)),
+    Optimizers(std::make_unique<OptimizerT>(*TM, PrintLLVMTimers, llvm_printing_mutex)),
+    OptimizeLayer(ES, JITPointersLayer, IRTransformRef(*Optimizers)),
+    OptSelLayer(ES, OptimizeLayer, static_cast<orc::ThreadSafeModule (*)(orc::ThreadSafeModule, orc::MaterializationResponsibility&)>(selectOptLevel))
 {
 #ifdef JL_USE_JITLINK
 # if defined(LLVM_SHLIB)
@@ -1343,14 +1952,9 @@ JuliaOJIT::JuliaOJIT()
         ES, std::move(ehRegistrar)));
 
     ObjectLayer.addPlugin(std::make_unique<JLDebuginfoPlugin>());
-    ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(total_size));
+    ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(&jit_bytes_size));
 #else
-    ObjectLayer.setNotifyLoaded(
-        [this](orc::MaterializationResponsibility &MR,
-               const object::ObjectFile &Object,
-               const RuntimeDyld::LoadedObjectInfo &LO) {
-            registerRTDyldJITObject(Object, LO, MemMgr);
-        });
+    UnlockedObjectLayer.setNotifyLoaded(registerRTDyldJITObject);
 #endif
 
     std::string ErrorStr;
@@ -1395,7 +1999,7 @@ JuliaOJIT::JuliaOJIT()
                   DL.getGlobalPrefix(),
                   [&](const orc::SymbolStringPtr &S) {
                         const char *const atomic_prefix = "__atomic_";
-                        return (*S).startswith(atomic_prefix);
+                        return (*S).starts_with(atomic_prefix);
                   })));
         }
     }
@@ -1405,47 +2009,79 @@ JuliaOJIT::JuliaOJIT()
     ExternalJD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
     ExternalJD.addToLinkOrder(JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
 
-#if JULIA_FLOAT16_ABI == 1
     orc::SymbolAliasMap jl_crt = {
-        { mangle("__gnu_h2f_ieee"), { mangle("julia__gnu_h2f_ieee"), JITSymbolFlags::Exported } },
-        { mangle("__extendhfsf2"),  { mangle("julia__gnu_h2f_ieee"), JITSymbolFlags::Exported } },
-        { mangle("__gnu_f2h_ieee"), { mangle("julia__gnu_f2h_ieee"), JITSymbolFlags::Exported } },
-        { mangle("__truncsfhf2"),   { mangle("julia__gnu_f2h_ieee"), JITSymbolFlags::Exported } },
-        { mangle("__truncdfhf2"),   { mangle("julia__truncdfhf2"),   JITSymbolFlags::Exported } }
+        // Float16 conversion routines
+#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_)
+        // LLVM 16 reverted to soft-float ABI for passing half on x86_64 Darwin
+        // https://github.com/llvm/llvm-project/commit/2bcf51c7f82ca7752d1bba390a2e0cb5fdd05ca9
+        { mangle("__gnu_h2f_ieee"), { mangle("julia_half_to_float"),  JITSymbolFlags::Exported } },
+        { mangle("__extendhfsf2"),  { mangle("julia_half_to_float"),  JITSymbolFlags::Exported } },
+        { mangle("__gnu_f2h_ieee"), { mangle("julia_float_to_half"),  JITSymbolFlags::Exported } },
+        { mangle("__truncsfhf2"),   { mangle("julia_float_to_half"),  JITSymbolFlags::Exported } },
+        { mangle("__truncdfhf2"),   { mangle("julia_double_to_half"), JITSymbolFlags::Exported } },
+#else
+        { mangle("__gnu_h2f_ieee"), { mangle("julia__gnu_h2f_ieee"),  JITSymbolFlags::Exported } },
+        { mangle("__extendhfsf2"),  { mangle("julia__gnu_h2f_ieee"),  JITSymbolFlags::Exported } },
+        { mangle("__gnu_f2h_ieee"), { mangle("julia__gnu_f2h_ieee"),  JITSymbolFlags::Exported } },
+        { mangle("__truncsfhf2"),   { mangle("julia__gnu_f2h_ieee"),  JITSymbolFlags::Exported } },
+        { mangle("__truncdfhf2"),   { mangle("julia__truncdfhf2"),    JITSymbolFlags::Exported } },
+#endif
+        // BFloat16 conversion routines
+        { mangle("__truncsfbf2"),   { mangle("julia__truncsfbf2"),    JITSymbolFlags::Exported } },
+        { mangle("__truncdfbf2"),   { mangle("julia__truncdfbf2"),    JITSymbolFlags::Exported } },
     };
     cantFail(GlobalJD.define(orc::symbolAliases(jl_crt)));
+
+#ifdef _OS_OPENBSD_
+    orc::SymbolMap i128_crt;
+
+    i128_crt[mangle("__divti3")] = JITEvaluatedSymbol::fromPointer(&__divti3, JITSymbolFlags::Exported);
+    i128_crt[mangle("__modti3")] = JITEvaluatedSymbol::fromPointer(&__modti3, JITSymbolFlags::Exported);
+    i128_crt[mangle("__udivti3")] = JITEvaluatedSymbol::fromPointer(&__udivti3, JITSymbolFlags::Exported);
+    i128_crt[mangle("__umodti3")] = JITEvaluatedSymbol::fromPointer(&__umodti3, JITSymbolFlags::Exported);
+
+    cantFail(GlobalJD.define(orc::absoluteSymbols(i128_crt)));
 #endif
 
 #ifdef MSAN_EMUTLS_WORKAROUND
     orc::SymbolMap msan_crt;
-    msan_crt[mangle("__emutls_get_address")] = JITEvaluatedSymbol::fromPointer(msan_workaround::getTLSAddress, JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_param_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_param_origin_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param_origin)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_retval_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_retval_origin_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval_origin)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_va_arg_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_va_arg_origin_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_origin)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_va_arg_overflow_size_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_overflow_size)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_origin_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_get_address")] = {ExecutorAddr::fromPtr(msan_workaround::getTLSAddress), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_param_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_param_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param_origin))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_retval_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_retval_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval_origin))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_va_arg_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_va_arg_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_origin))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_va_arg_overflow_size_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_overflow_size))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin))), JITSymbolFlags::Exported};
     cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt)));
 #endif
 #ifdef _COMPILER_ASAN_ENABLED_
+    // this is a hack to work around a bad assertion:
+    //   /workspace/srcdir/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp:3028: llvm::Error llvm::orc::ExecutionSession::OL_notifyResolved(llvm::orc::MaterializationResponsibility&, const SymbolMap&): Assertion `(KV.second.getFlags() & ~JITSymbolFlags::Common) == (I->second & ~JITSymbolFlags::Common) && "Resolving symbol with incorrect flags"' failed.
+    static int64_t jl___asan_globals_registered;
     orc::SymbolMap asan_crt;
-    asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported);
+    asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&jl___asan_globals_registered), JITSymbolFlags::Common | JITSymbolFlags::Exported};
     cantFail(JD.define(orc::absoluteSymbols(asan_crt)));
 #endif
 }
 
 JuliaOJIT::~JuliaOJIT() = default;
 
+ThreadSafeContext JuliaOJIT::makeContext()
+{
+    auto ctx = std::make_unique<LLVMContext>();
+    return orc::ThreadSafeContext(std::move(ctx));
+}
+
 orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name)
 {
     std::string MangleName = getMangledName(Name);
@@ -1454,59 +2090,50 @@ orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name)
 
 void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr)
 {
-    cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), JITEvaluatedSymbol::fromPointer((void*)Addr)}})));
+    cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), {ExecutorAddr::fromPtr((void*)Addr), JITSymbolFlags::Exported}}})));
 }
 
 void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
 {
-    JL_TIMING(LLVM_ORC, LLVM_ORC);
+    JL_TIMING(LLVM_JIT, JIT_Total);
     ++ModulesAdded;
-    orc::SymbolLookupSet NewExports;
-    TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
-        jl_decorate_module(M);
-        shareStrings(M);
-        for (auto &F : M.global_values()) {
-            if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                auto Name = ES.intern(getMangledName(F.getName()));
-                NewExports.add(std::move(Name));
-            }
-        }
-#if !defined(JL_NDEBUG) && !defined(JL_USE_JITLINK)
-        // validate the relocations for M (not implemented for the JITLink memory manager yet)
-        for (Module::global_object_iterator I = M.global_objects().begin(), E = M.global_objects().end(); I != E; ) {
-            GlobalObject *F = &*I;
-            ++I;
-            if (F->isDeclaration()) {
-                if (F->use_empty())
-                    F->eraseFromParent();
-                else if (!((isa<Function>(F) && isIntrinsicFunction(cast<Function>(F))) ||
-                        findUnmangledSymbol(F->getName()) ||
-                        SectionMemoryManager::getSymbolAddressInProcess(
-                            getMangledName(F->getName())))) {
-                    llvm::errs() << "FATAL ERROR: "
-                                << "Symbol \"" << F->getName().str() << "\""
-                                << "not found";
-                    abort();
-                }
-            }
+    TSM = selectOptLevel(std::move(TSM));
+    TSM = (*Optimizers)(std::move(TSM));
+    TSM = (*JITPointers)(std::move(TSM));
+    auto Lock = TSM.getContext().getLock();
+    Module &M = *TSM.getModuleUnlocked();
+
+    for (auto &f : M) {
+        if (!f.isDeclaration()){
+            jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, f.getName().str().c_str());
         }
-#endif
-    });
+    }
 
-    // TODO: what is the performance characteristics of this?
-    cantFail(OptSelLayer.add(JD, std::move(TSM)));
-    // force eager compilation (for now), due to memory management specifics
-    // (can't handle compilation recursion)
-    for (auto &sym : cantFail(ES.lookup({{&JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly}}, NewExports))) {
-        assert(sym.second);
-        (void) sym;
+    // Treat this as if one of the passes might contain a safepoint
+    // even though that shouldn't be the case and might be unwise
+    Expected<std::unique_ptr<MemoryBuffer>> Obj = CompileLayer.getCompiler()(M);
+    if (!Obj) {
+#ifndef __clang_analyzer__ // reportError calls an arbitrary function, which the static analyzer thinks might be a safepoint
+        ES.reportError(Obj.takeError());
+#endif
+        errs() << "Failed to add module to JIT!\n";
+        errs() << "Dumping failing module\n" << M << "\n";
+        return;
+    }
+    { auto release = std::move(Lock); }
+    auto Err = JuliaOJIT::addObjectFile(JD, std::move(*Obj));
+    if (Err) {
+#ifndef __clang_analyzer__ // reportError calls an arbitrary function, which the static analyzer thinks might be a safepoint
+        ES.reportError(std::move(Err));
+#endif
+        errs() << "Failed to add objectfile to JIT!\n";
+        abort();
     }
 }
 
 Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize)
 {
-    if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error
-            {
+    if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error {
             if (M.getDataLayout().isDefault())
                 M.setDataLayout(DL);
             if (M.getDataLayout() != DL)
@@ -1515,38 +2142,57 @@ Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM,
                     M.getDataLayout().getStringRepresentation() + " (module) vs " +
                     DL.getStringRepresentation() + " (jit)",
                 inconvertibleErrorCode());
-
+            // OrcJIT requires that all modules / files have unique names:
+            M.setModuleIdentifier((M.getModuleIdentifier() + Twine("-") + Twine(jl_atomic_fetch_add_relaxed(&jitcounter, 1))).str());
             return Error::success();
-            }))
+        }))
         return Err;
-    return ExternalCompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM));
+    //if (ShouldOptimize)
+    //    return OptimizeLayer.add(JD, std::move(TSM));
+    return CompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM));
 }
 
 Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
     assert(Obj && "Can not add null object");
-    return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
+    return ObjectLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
 }
 
-JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
+SmallVector<uint64_t> JuliaOJIT::findSymbols(ArrayRef<StringRef> Names)
+{
+    // assert(MemMgr.use_count() == 1); (true single-threaded, but slightly race-y to assert it with concurrent threads)
+    DenseMap<orc::NonOwningSymbolStringPtr, size_t> Unmangled;
+    orc::SymbolLookupSet Exports;
+    for (StringRef Name : Names) {
+        auto Mangled = ES.intern(getMangledName(Name));
+        Unmangled[NonOwningSymbolStringPtr(Mangled)] = Unmangled.size();
+        Exports.add(std::move(Mangled));
+    }
+    SymbolMap Syms = cantFail(::safelookup(ES, orc::makeJITDylibSearchOrder(ArrayRef(&JD)), std::move(Exports)));
+    SmallVector<uint64_t> Addrs(Names.size());
+    for (auto it : Syms) {
+        Addrs[Unmangled.at(orc::NonOwningSymbolStringPtr(it.first))] = it.second.getAddress().getValue();
+    }
+    return Addrs;
+}
+
+Expected<ExecutorSymbolDef> JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
 {
     orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD};
-    ArrayRef<orc::JITDylib*> SearchOrder = makeArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1);
-    auto Sym = ES.lookup(SearchOrder, Name);
-    if (Sym)
-        return *Sym;
-    return Sym.takeError();
+    ArrayRef<orc::JITDylib*> SearchOrder = ArrayRef<orc::JITDylib*>(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1);
+    auto Sym = ::safelookup(ES, SearchOrder, Name);
+    return Sym;
 }
 
-JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name)
+Expected<ExecutorSymbolDef> JuliaOJIT::findUnmangledSymbol(StringRef Name)
 {
     return findSymbol(getMangledName(Name), true);
 }
 
-Expected<JITEvaluatedSymbol> JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly)
+Expected<ExecutorSymbolDef> JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly)
 {
     orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD};
-    ArrayRef<orc::JITDylib*> SearchOrder = makeArrayRef(&SearchOrders[0], ExternalJDOnly ? 1 : 3);
-    auto Sym = ES.lookup(SearchOrder, getMangledName(Name));
+    ArrayRef<orc::JITDylib*> SearchOrder = ArrayRef<orc::JITDylib*>(&SearchOrders[0], ExternalJDOnly ? 1 : 3);
+    auto Sym = ::safelookup(ES, SearchOrder, getMangledName(Name));
     return Sym;
 }
 
@@ -1557,7 +2203,7 @@ uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name)
         consumeError(addr.takeError());
         return 0;
     }
-    return cantFail(addr.getAddress());
+    return addr->getAddress().getValue();
 }
 
 uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
@@ -1567,18 +2213,18 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
         consumeError(addr.takeError());
         return 0;
     }
-    return cantFail(addr.getAddress());
+    return addr->getAddress().getValue();
 }
 
-StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst)
+StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst)
 {
     std::lock_guard<std::mutex> lock(RLST_mutex);
+    assert(Addr != (uint64_t)jl_fptr_wait_for_compiled_addr);
     std::string *fname = &ReverseLocalSymbolTable[(void*)(uintptr_t)Addr];
     if (fname->empty()) {
         std::string string_fname;
         raw_string_ostream stream_fname(string_fname);
         // try to pick an appropriate name that describes it
-        jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
         if (Addr == (uintptr_t)invoke) {
             stream_fname << "jsysw_";
         }
@@ -1591,7 +2237,7 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod
         else {
             stream_fname << "jlsys_";
         }
-        const char* unadorned_name = jl_symbol_name(codeinst->def->def.method->name);
+        const char* unadorned_name = jl_symbol_name(jl_get_ci_mi(codeinst)->def.method->name);
         stream_fname << unadorned_name << "_" << RLST_inc++;
         *fname = std::move(stream_fname.str()); // store to ReverseLocalSymbolTable
         addGlobalMapping(*fname, Addr);
@@ -1599,34 +2245,88 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod
     return *fname;
 }
 
-
 #ifdef JL_USE_JITLINK
-extern "C" orc::shared::CWrapperFunctionResult
-llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size);
+#define addAbsoluteToMap(map,name) \
+    (map[mangle(#name)] = {ExecutorAddr::fromPtr(&name), JITSymbolFlags::Exported | JITSymbolFlags::Callable}, orc::ExecutorAddr::fromPtr(&name))
 
 void JuliaOJIT::enableJITDebuggingSupport()
 {
     orc::SymbolMap GDBFunctions;
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBAllocAction, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBWrapper, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+    addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBAllocAction);
+    auto registerJITLoaderGDBWrapper = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBWrapper);
     cantFail(JD.define(orc::absoluteSymbols(GDBFunctions)));
+    (void)registerJITLoaderGDBWrapper;
     if (TM->getTargetTriple().isOSBinFormatMachO())
         ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple())));
+#ifndef _COMPILER_ASAN_ENABLED_ // TODO: Fix duplicated sections spam #51794
     else if (TM->getTargetTriple().isOSBinFormatELF())
         //EPCDebugObjectRegistrar doesn't take a JITDylib, so we have to directly provide the call address
-        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, orc::ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBWrapper))));
+        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, registerJITLoaderGDBWrapper)));
+#endif
+}
+
+void JuliaOJIT::enableIntelJITEventListener()
+{
+#if JL_LLVM_VERSION >= 190000
+    if (TM->getTargetTriple().isOSBinFormatELF()) {
+        orc::SymbolMap VTuneFunctions;
+        auto RegisterImplAddr = addAbsoluteToMap(VTuneFunctions,llvm_orc_registerVTuneImpl);
+        auto UnregisterImplAddr = addAbsoluteToMap(VTuneFunctions,llvm_orc_unregisterVTuneImpl);
+        ObjectLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create()));
+        //ObjectLayer.addPlugin(cantFail(VTuneSupportPlugin::Create(ES.getExecutorProcessControl(),
+        //                           JD, /*EmitDebugInfo=*/true,
+        //                           /*TestMode=*/false)));
+        bool EmitDebugInfo = true;
+        ObjectLayer.addPlugin(std::make_unique<VTuneSupportPlugin>(
+            ES.getExecutorProcessControl(), RegisterImplAddr, UnregisterImplAddr, EmitDebugInfo));
+    }
+#endif
+}
+
+void JuliaOJIT::enableOProfileJITEventListener()
+{
+    // implement when available in LLVM
+}
+
+void JuliaOJIT::enablePerfJITEventListener()
+{
+#if JL_LLVM_VERSION >= 180000
+    if (TM->getTargetTriple().isOSBinFormatELF()) {
+        orc::SymbolMap PerfFunctions;
+        auto StartAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfStart);
+        auto EndAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfEnd);
+        auto ImplAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfImpl);
+        cantFail(JD.define(orc::absoluteSymbols(PerfFunctions)));
+        ObjectLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create()));
+        //ObjectLayer.addPlugin(cantFail(PerfSupportPlugin::Create(
+        //    ES.getExecutorProcessControl(), *JD, true, true)));
+        bool EmitDebugInfo = true, EmitUnwindInfo = true;
+        ObjectLayer.addPlugin(std::make_unique<PerfSupportPlugin>(
+            ES.getExecutorProcessControl(), StartAddr, EndAddr, ImplAddr, EmitDebugInfo, EmitUnwindInfo));
+    }
+#endif
 }
 #else
+void JuliaOJIT::RegisterJITEventListener(JITEventListener *L)
+{
+    if (L)
+        UnlockedObjectLayer.registerJITEventListener(*L);
+}
 void JuliaOJIT::enableJITDebuggingSupport()
 {
     RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
 }
-
-void JuliaOJIT::RegisterJITEventListener(JITEventListener *L)
+void JuliaOJIT::enableIntelJITEventListener()
 {
-    if (!L)
-        return;
-    this->ObjectLayer.registerJITEventListener(*L);
+    RegisterJITEventListener(JITEventListener::createIntelJITEventListener());
+}
+void JuliaOJIT::enableOProfileJITEventListener()
+{
+    RegisterJITEventListener(JITEventListener::createOProfileJITEventListener());
+}
+void JuliaOJIT::enablePerfJITEventListener()
+{
+    RegisterJITEventListener(JITEventListener::createPerfJITEventListener());
 }
 #endif
 
@@ -1647,180 +2347,40 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV)
     return getMangledName(GV->getName());
 }
 
-#ifdef JL_USE_JITLINK
 size_t JuliaOJIT::getTotalBytes() const
 {
-    return total_size.load(std::memory_order_relaxed);
+    auto bytes = jl_atomic_load_relaxed(&jit_bytes_size);
+#ifndef JL_USE_JITLINK
+    size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
+    bytes += getRTDyldMemoryManagerTotalBytes(MemMgr.get());
+#endif
+    return bytes;
 }
-#else
-size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
 
-size_t JuliaOJIT::getTotalBytes() const
+void JuliaOJIT::addBytes(size_t bytes)
 {
-    return getRTDyldMemoryManagerTotalBytes(MemMgr.get());
+    jl_atomic_fetch_add_relaxed(&jit_bytes_size, bytes);
 }
-#endif
 
 void JuliaOJIT::printTimers()
 {
-#ifdef JL_USE_NEW_PM
     for (auto &printer : PrintLLVMTimers) {
         printer();
     }
-#endif
     reportAndResetTimings();
 }
 
-JuliaOJIT *jl_ExecutionEngine;
-
-// destructively move the contents of src into dest
-// this assumes that the targets of the two modules are the same
-// including the DataLayout and ModuleFlags (for example)
-// and that there is no module-level assembly
-// Comdat is also removed, since the JIT doesn't need it
-void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTSM)
-{
-    ++ModulesMerged;
-    destTSM.withModuleDo([&](Module &dest) JL_NOTSAFEPOINT {
-        srcTSM.withModuleDo([&](Module &src) JL_NOTSAFEPOINT {
-            assert(&dest != &src && "Cannot merge module with itself!");
-            assert(&dest.getContext() == &src.getContext() && "Cannot merge modules with different contexts!");
-            assert(dest.getDataLayout() == src.getDataLayout() && "Cannot merge modules with different data layouts!");
-            assert(dest.getTargetTriple() == src.getTargetTriple() && "Cannot merge modules with different target triples!");
-
-            for (Module::global_iterator I = src.global_begin(), E = src.global_end(); I != E;) {
-                GlobalVariable *sG = &*I;
-                GlobalVariable *dG = cast_or_null<GlobalVariable>(dest.getNamedValue(sG->getName()));
-                ++I;
-                // Replace a declaration with the definition:
-                if (dG) {
-                    if (sG->isDeclaration()) {
-                        sG->replaceAllUsesWith(dG);
-                        sG->eraseFromParent();
-                        continue;
-                    }
-                    //// If we start using llvm.used, we need to enable and test this
-                    //else if (!dG->isDeclaration() && dG->hasAppendingLinkage() && sG->hasAppendingLinkage()) {
-                    //    auto *dCA = cast<ConstantArray>(dG->getInitializer());
-                    //    auto *sCA = cast<ConstantArray>(sG->getInitializer());
-                    //    SmallVector<Constant *, 16> Init;
-                    //    for (auto &Op : dCA->operands())
-                    //        Init.push_back(cast_or_null<Constant>(Op));
-                    //    for (auto &Op : sCA->operands())
-                    //        Init.push_back(cast_or_null<Constant>(Op));
-                    //    Type *Int8PtrTy = Type::getInt8PtrTy(dest.getContext());
-                    //    ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
-                    //    GlobalVariable *GV = new GlobalVariable(dest, ATy, dG->isConstant(),
-                    //            GlobalValue::AppendingLinkage, ConstantArray::get(ATy, Init), "",
-                    //            dG->getThreadLocalMode(), dG->getType()->getAddressSpace());
-                    //    GV->copyAttributesFrom(dG);
-                    //    sG->replaceAllUsesWith(GV);
-                    //    dG->replaceAllUsesWith(GV);
-                    //    GV->takeName(sG);
-                    //    sG->eraseFromParent();
-                    //    dG->eraseFromParent();
-                    //    continue;
-                    //}
-                    else {
-                        assert(dG->isDeclaration() || dG->getInitializer() == sG->getInitializer());
-                        dG->replaceAllUsesWith(sG);
-                        dG->eraseFromParent();
-                    }
-                }
-                // Reparent the global variable:
-                sG->removeFromParent();
-                dest.getGlobalList().push_back(sG);
-                // Comdat is owned by the Module
-                sG->setComdat(nullptr);
-            }
-
-            for (Module::iterator I = src.begin(), E = src.end(); I != E;) {
-                Function *sG = &*I;
-                Function *dG = cast_or_null<Function>(dest.getNamedValue(sG->getName()));
-                ++I;
-                // Replace a declaration with the definition:
-                if (dG) {
-                    if (sG->isDeclaration()) {
-                        sG->replaceAllUsesWith(dG);
-                        sG->eraseFromParent();
-                        continue;
-                    }
-                    else {
-                        assert(dG->isDeclaration());
-                        dG->replaceAllUsesWith(sG);
-                        dG->eraseFromParent();
-                    }
-                }
-                // Reparent the global variable:
-                sG->removeFromParent();
-                dest.getFunctionList().push_back(sG);
-                // Comdat is owned by the Module
-                sG->setComdat(nullptr);
-            }
-
-            for (Module::alias_iterator I = src.alias_begin(), E = src.alias_end(); I != E;) {
-                GlobalAlias *sG = &*I;
-                GlobalAlias *dG = cast_or_null<GlobalAlias>(dest.getNamedValue(sG->getName()));
-                ++I;
-                if (dG) {
-                    if (!dG->isDeclaration()) { // aliases are always definitions, so this test is reversed from the above two
-                        sG->replaceAllUsesWith(dG);
-                        sG->eraseFromParent();
-                        continue;
-                    }
-                    else {
-                        dG->replaceAllUsesWith(sG);
-                        dG->eraseFromParent();
-                    }
-                }
-                sG->removeFromParent();
-                dest.getAliasList().push_back(sG);
-            }
-
-            // metadata nodes need to be explicitly merged not just copied
-            // so there are special passes here for each known type of metadata
-            NamedMDNode *sNMD = src.getNamedMetadata("llvm.dbg.cu");
-            if (sNMD) {
-                NamedMDNode *dNMD = dest.getOrInsertNamedMetadata("llvm.dbg.cu");
-                for (MDNode *I : sNMD->operands()) {
-                    dNMD->addOperand(I);
-                }
-            }
-        });
-    });
+void JuliaOJIT::optimizeDLSyms(Module &M) {
+    (*DLSymOpt)(M);
 }
 
-// optimize memory by turning long strings into memoized copies, instead of
-// making a copy per object file of output.
-void JuliaOJIT::shareStrings(Module &M)
-{
-    ++InternedGlobals;
-    std::vector<GlobalVariable*> erase;
-    for (auto &GV : M.globals()) {
-        if (!GV.hasInitializer() || !GV.isConstant())
-            continue;
-        ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer());
-        if (CDS == nullptr)
-            continue;
-        StringRef data = CDS->getRawDataValues();
-        if (data.size() > 16) { // only for long strings: keep short ones as values
-            Type *T_size = Type::getIntNTy(GV.getContext(), sizeof(void*) * 8);
-            Constant *v = ConstantExpr::getIntToPtr(
-                ConstantInt::get(T_size, (uintptr_t)(*ES.intern(data)).data()),
-                GV.getType());
-            GV.replaceAllUsesWith(v);
-            erase.push_back(&GV);
-        }
-    }
-    for (auto GV : erase)
-        GV->eraseFromParent();
-}
+JuliaOJIT *jl_ExecutionEngine;
 
 //TargetMachine pass-through methods
 
 std::unique_ptr<TargetMachine> JuliaOJIT::cloneTargetMachine() const
 {
-    return std::unique_ptr<TargetMachine>(getTarget()
+    auto NewTM = std::unique_ptr<TargetMachine>(getTarget()
         .createTargetMachine(
             getTargetTriple().str(),
             getTargetCPU(),
@@ -1829,6 +2389,8 @@ std::unique_ptr<TargetMachine> JuliaOJIT::cloneTargetMachine() const
             TM->getRelocationModel(),
             TM->getCodeModel(),
             TM->getOptLevel()));
+    fixupTM(*NewTM);
+    return NewTM;
 }
 
 const Triple& JuliaOJIT::getTargetTriple() const {
@@ -1855,94 +2417,64 @@ static void jl_decorate_module(Module &M) {
     if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
         // Add special values used by debuginfo to build the UnwindData table registration for Win64
         // This used to be GV, but with https://reviews.llvm.org/D100944 we no longer can emit GV into `.text`
-        // TODO: The data is set in debuginfo.cpp but it should be okay to actually emit it here.
-        M.appendModuleInlineAsm("\
-    .section .text                  \n\
-    .type   __UnwindData,@object    \n\
-    .p2align        2, 0x90         \n\
-    __UnwindData:                   \n\
-        .zero   12                  \n\
-        .size   __UnwindData, 12    \n\
-                                    \n\
-        .type   __catchjmp,@object  \n\
-        .p2align        2, 0x90     \n\
-    __catchjmp:                     \n\
-        .zero   12                  \n\
-        .size   __catchjmp, 12");
-    }
-}
-
-// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
-static int jl_add_to_ee(
-        orc::ThreadSafeModule &M,
-        const StringMap<orc::ThreadSafeModule*> &NewExports,
-        DenseMap<orc::ThreadSafeModule*, int> &Queued,
-        std::vector<orc::ThreadSafeModule*> &Stack)
-{
-    // First check if the TSM is empty (already compiled)
-    if (!M)
-        return 0;
-    // Next check and record if it is on the stack somewhere
-    {
-        auto &Id = Queued[&M];
-        if (Id)
-            return Id;
-        Stack.push_back(&M);
-        Id = Stack.size();
-    }
-    // Finally work out the SCC
-    int depth = Stack.size();
-    int MergeUp = depth;
-    std::vector<orc::ThreadSafeModule*> Children;
-    M.withModuleDo([&](Module &m) JL_NOTSAFEPOINT {
-        for (auto &F : m.global_objects()) {
-            if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                auto Callee = NewExports.find(F.getName());
-                if (Callee != NewExports.end()) {
-                    auto *CM = Callee->second;
-                    if (*CM && CM != &M) {
-                        auto Down = Queued.find(CM);
-                        if (Down != Queued.end())
-                            MergeUp = std::min(MergeUp, Down->second);
-                        else
-                            Children.push_back(CM);
-                    }
-                }
-            }
-        }
-    });
-    assert(MergeUp > 0);
-    for (auto *CM : Children) {
-        int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack);
-        assert(Down <= (int)Stack.size());
-        if (Down)
-            MergeUp = std::min(MergeUp, Down);
-    }
-    if (MergeUp < depth)
-        return MergeUp;
-    while (1) {
-        // Not in a cycle (or at the top of it)
-        // remove SCC state and merge every CM from the cycle into M
-        orc::ThreadSafeModule *CM = Stack.back();
-        auto it = Queued.find(CM);
-        assert(it->second == (int)Stack.size());
-        Queued.erase(it);
-        Stack.pop_back();
-        if ((int)Stack.size() < depth) {
-            assert(&M == CM);
-            break;
-        }
-        jl_merge_module(M, std::move(*CM));
+        // and with JITLink it became difficult to change the content afterwards, but we
+        // would prefer that this simple content wasn't recompiled in every single module,
+        // so we emit the necessary PLT trampoline as inline assembly.
+        // This is somewhat duplicated with the .pdata section, but we haven't been able to
+        // use that yet due to relocation issues.
+#define ASM_USES_ELF // use ELF or COFF syntax based on FORCE_ELF
+        StringRef inline_asm(
+    ".section"
+#if JL_LLVM_VERSION >= 180000
+        " .ltext,\"ax\",@progbits\n"
+#else
+        " .text\n"
+#endif
+    ".globl __julia_personality\n"
+    "\n"
+#ifdef ASM_USES_ELF
+    ".type __UnwindData,@object\n"
+#else
+    ".def __UnwindData\n"
+    ".scl 2\n"
+    ".type 0\n"
+    ".endef\n"
+#endif
+    ".p2align        2, 0x90\n"
+    "__UnwindData:\n"
+    "  .byte 0x09;\n" // version info, UNW_FLAG_EHANDLER
+    "  .byte 4;\n"    // size of prolog (bytes)
+    "  .byte 2;\n"    // count of unwind codes (slots)
+    "  .byte 0x05;\n" // frame register (rbp) = rsp
+    "  .byte 4;\n"    // second instruction
+    "  .byte 0x03;\n" // mov RBP, RSP
+    "  .byte 1;\n"    // first instruction
+    "  .byte 0x50;\n" // push RBP
+    "  .int __catchjmp - "
+#if JL_LLVM_VERSION >= 180000
+    ".ltext;\n" // Section-relative offset (if using COFF and JITLink, this can be relative to __ImageBase instead, though then we could possibly use pdata/xdata directly then)
+#else
+    ".text;\n"
+#endif
+    ".size __UnwindData, 12\n"
+    "\n"
+#ifdef ASM_USES_ELF
+    ".type __catchjmp,@function\n"
+#else
+    ".def __catchjmp\n"
+    ".scl 2\n"
+    ".type 32\n"
+    ".endef\n"
+#endif
+    ".p2align        2, 0x90\n"
+    "__catchjmp:\n"
+    "  movabsq $__julia_personality, %rax\n"
+    "  jmpq *%rax\n"
+    ".size __catchjmp, . - __catchjmp\n"
+    "\n");
+        M.appendModuleInlineAsm(inline_asm);
     }
-    jl_ExecutionEngine->addModule(std::move(M));
-    return 0;
-}
-
-static uint64_t getAddressForFunction(StringRef fname)
-{
-    auto addr = jl_ExecutionEngine->getFunctionAddress(fname);
-    assert(addr);
-    return addr;
+#undef ASM_USES_ELF
 }
 
 // helper function for adding a DLLImport (dlsym) address to the execution engine
@@ -1956,3 +2488,9 @@ size_t jl_jit_total_bytes_impl(void)
 {
     return jl_ExecutionEngine->getTotalBytes();
 }
+
+// API for adding bytes to record being owned by the JIT
+void jl_jit_add_bytes(size_t bytes)
+{
+    jl_ExecutionEngine->addBytes(bytes);
+}
diff --git a/src/jitlayers.h b/src/jitlayers.h
index 3aa3998d3ac23..5c1fb10a84205 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -1,6 +1,9 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include "llvm/ADT/SmallSet.h"
 #include <llvm/ADT/MapVector.h>
+#include <llvm/ADT/StringSet.h>
+#include <llvm/Support/AllocatorBase.h>
 
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/IR/Constants.h>
@@ -8,6 +11,7 @@
 #include <llvm/IR/Value.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/PassTimingInfo.h>
 
 #include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
 #include <llvm/ExecutionEngine/Orc/IRTransformLayer.h>
@@ -22,9 +26,11 @@
 #include "julia.h"
 #include "julia_internal.h"
 #include "platform.h"
-
+#include "llvm-codegen-shared.h"
+#include "llvm-version.h"
 #include <stack>
 #include <queue>
+#include <tuple>
 
 // As of LLVM 13, there are two runtime JIT linker implementations, the older
 // RuntimeDyld (used via orc::RTDyldObjectLinkingLayer) and the newer JITLink
@@ -42,23 +48,19 @@
 // and feature support (e.g. Windows, JITEventListeners for various profilers,
 // etc.). Thus, we currently only use JITLink where absolutely required, that is,
 // for Mac/aarch64 and Linux/aarch64.
-// #define JL_FORCE_JITLINK
+//#define JL_FORCE_JITLINK
 
 #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
 # define HAS_SANITIZER
 #endif
 // The sanitizers don't play well with our memory manager
 
-#if defined(JL_FORCE_JITLINK) || JL_LLVM_VERSION >= 150000 && defined(HAS_SANITIZER)
+#if defined(JL_FORCE_JITLINK) || defined(_CPU_AARCH64_) || defined(HAS_SANITIZER)
+# define JL_USE_JITLINK
+#endif
+
+#if defined(_CPU_RISCV64_)
 # define JL_USE_JITLINK
-#else
-# if defined(_CPU_AARCH64_)
-#  if defined(_OS_LINUX_) && JL_LLVM_VERSION < 150000
-#   pragma message("On aarch64-gnu-linux, LLVM version >= 15 is required for JITLink; fallback suffers from occasional segfaults")
-#  else
-#   define JL_USE_JITLINK
-#  endif
-# endif
 #endif
 
 # include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
@@ -67,51 +69,75 @@
 
 using namespace llvm;
 
-extern "C" jl_cgparams_t jl_default_cgparams;
-
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeContextRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
 
 void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) JL_NOTSAFEPOINT;
-void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false, bool external_use=false) JL_NOTSAFEPOINT;
-void addMachinePasses(legacy::PassManagerBase *PM, int optlevel) JL_NOTSAFEPOINT;
-void jl_merge_module(orc::ThreadSafeModule &dest, orc::ThreadSafeModule src) JL_NOTSAFEPOINT;
 GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M) JL_NOTSAFEPOINT;
 DataLayout jl_create_datalayout(TargetMachine &TM) JL_NOTSAFEPOINT;
 
-static inline bool imaging_default() JL_NOTSAFEPOINT {
-    return jl_options.image_codegen || (jl_generating_output() && (!jl_options.incremental || jl_options.use_pkgimages));
-}
-
 struct OptimizationOptions {
     bool lower_intrinsics;
     bool dump_native;
     bool external_use;
     bool llvm_only;
+    bool always_inline;
+    bool enable_early_simplifications;
+    bool enable_early_optimizations;
+    bool enable_scalar_optimizations;
+    bool enable_loop_optimizations;
+    bool enable_vector_pipeline;
+    bool remove_ni;
+    bool cleanup;
+    bool warn_missed_transformations;
+    bool sanitize_memory;
+    bool sanitize_thread;
+    bool sanitize_address;
 
     static constexpr OptimizationOptions defaults(
         bool lower_intrinsics=true,
         bool dump_native=false,
         bool external_use=false,
-        bool llvm_only=false) {
-        return {lower_intrinsics, dump_native, external_use, llvm_only};
+        bool llvm_only=false,
+        bool always_inline=true,
+        bool enable_early_simplifications=true,
+        bool enable_early_optimizations=true,
+        bool enable_scalar_optimizations=true,
+        bool enable_loop_optimizations=true,
+        bool enable_vector_pipeline=true,
+        bool remove_ni=true,
+        bool cleanup=true,
+        bool warn_missed_transformations=false,
+#ifdef _COMPILER_MSAN_ENABLED_
+        bool sanitize_memory=true,
+#else
+        bool sanitize_memory=false,
+#endif
+#ifdef _COMPILER_TSAN_ENABLED_
+        bool sanitize_thread=true,
+#else
+        bool sanitize_thread=false,
+#endif
+#ifdef _COMPILER_ASAN_ENABLED_
+        bool sanitize_address=true
+#else
+        bool sanitize_address=false
+#endif
+) JL_NOTSAFEPOINT {
+        return {lower_intrinsics, dump_native, external_use, llvm_only,
+                always_inline, enable_early_simplifications,
+                enable_early_optimizations, enable_scalar_optimizations,
+                enable_loop_optimizations, enable_vector_pipeline,
+                remove_ni, cleanup, warn_missed_transformations,
+                sanitize_memory, sanitize_thread, sanitize_address};
     }
 };
 
-// LLVM's new pass manager is scheduled to replace the legacy pass manager
-// for middle-end IR optimizations.
-#if JL_LLVM_VERSION >= 150000
-#define JL_USE_NEW_PM
-#endif
-
 struct NewPM {
     std::unique_ptr<TargetMachine> TM;
-    StandardInstrumentations SI;
-    std::unique_ptr<PassInstrumentationCallbacks> PIC;
-    PassBuilder PB;
-    ModulePassManager MPM;
     OptimizationLevel O;
-
+    OptimizationOptions options;
+    TimePassesHandler TimePasses;
     NewPM(std::unique_ptr<TargetMachine> TM, OptimizationLevel O, OptimizationOptions options = OptimizationOptions::defaults()) JL_NOTSAFEPOINT;
     ~NewPM() JL_NOTSAFEPOINT;
 
@@ -141,11 +167,11 @@ struct jl_locked_stream {
         std::unique_lock<std::mutex> lck;
         ios_t *&stream;
 
-        lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT
+        lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER
             : lck(mutex), stream(stream) {}
         lock(lock&) = delete;
         lock(lock&&) JL_NOTSAFEPOINT = default;
-        ~lock() JL_NOTSAFEPOINT = default;
+        ~lock() JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT = default;
 
         ios_t *&operator*() JL_NOTSAFEPOINT {
             return stream;
@@ -164,18 +190,23 @@ struct jl_locked_stream {
         }
     };
 
-    jl_locked_stream() JL_NOTSAFEPOINT = default;
-    ~jl_locked_stream() JL_NOTSAFEPOINT = default;
+    jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER = default;
+    ~jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default;
 
     lock operator*() JL_NOTSAFEPOINT {
         return lock(mutex, stream);
     }
 };
 
-typedef struct _jl_llvm_functions_t {
+struct jl_llvm_functions_t {
     std::string functionObject;     // jlcall llvm Function name
     std::string specFunctionObject; // specialized llvm Function name
-} jl_llvm_functions_t;
+    jl_llvm_functions_t() JL_NOTSAFEPOINT = default;
+    jl_llvm_functions_t &operator=(const jl_llvm_functions_t&) JL_NOTSAFEPOINT = default;
+    jl_llvm_functions_t(const jl_llvm_functions_t &) JL_NOTSAFEPOINT = default;
+    jl_llvm_functions_t(jl_llvm_functions_t &&) JL_NOTSAFEPOINT = default;
+    ~jl_llvm_functions_t() JL_NOTSAFEPOINT = default;
+};
 
 struct jl_returninfo_t {
     llvm::FunctionCallee decl;
@@ -193,21 +224,45 @@ struct jl_returninfo_t {
     unsigned return_roots;
 };
 
-typedef std::tuple<jl_returninfo_t::CallingConv, unsigned, llvm::Function*, bool> jl_codegen_call_target_t;
+struct jl_codegen_call_target_t {
+    jl_returninfo_t::CallingConv cc;
+    unsigned return_roots;
+    llvm::Function *decl;
+    llvm::Function *oc;
+    bool specsig;
+    bool external_linkage; // whether codegen would like this edge to be externally-available
+    bool private_linkage; // whether codegen would like this edge to be internally-available
+    // external = ExternalLinkage (similar to "extern")
+    // private = InternalLinkage (similar to "static")
+    // external+private = AvailableExternallyLinkage+ExternalLinkage or ExternalLinkage (similar to "static inline")
+    // neither = unused
+};
+
+// reification of a call to jl_jit_abi_convert, so that it isn't necessary to parse the Modules to recover this info
+struct cfunc_decl_t {
+    jl_abi_t abi;
+    llvm::GlobalVariable *cfuncdata;
+};
 
-typedef struct _jl_codegen_params_t {
+typedef SmallVector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>, 0> jl_workqueue_t;
+
+typedef std::list<std::tuple<std::string, std::string, unsigned int>> CallFrames;
+struct jl_codegen_params_t {
     orc::ThreadSafeContext tsctx;
     orc::ThreadSafeContext::Lock tsctx_lock;
     DataLayout DL;
     Triple TargetTriple;
 
-    inline LLVMContext &getContext() {
+    inline LLVMContext &getContext() JL_NOTSAFEPOINT {
         return *tsctx.getContext();
     }
     typedef StringMap<GlobalVariable*> SymMapGV;
     // outputs
-    std::vector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>> workqueue;
-    std::map<void*, GlobalVariable*> globals;
+    jl_workqueue_t workqueue;
+    SmallVector<cfunc_decl_t,0> cfuncs;
+    std::map<void*, GlobalVariable*> global_targets;
+    jl_array_t *temporary_roots = nullptr;
+    SmallSet<jl_value_t *, 8> temporary_roots_set;
     std::map<std::tuple<jl_code_instance_t*,bool>, GlobalVariable*> external_fns;
     std::map<jl_datatype_t*, DIType*> ditypes;
     std::map<jl_datatype_t*, Type*> llvmtypes;
@@ -232,22 +287,35 @@ typedef struct _jl_codegen_params_t {
     std::unique_ptr<Module> _shared_module;
     inline Module &shared_module();
     // inputs
-    size_t world = 0;
     const jl_cgparams_t *params = &jl_default_cgparams;
     bool cache = false;
     bool external_linkage = false;
-    bool imaging;
-    int debug_level;
-    _jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple)
-        : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()),
-            DL(std::move(DL)), TargetTriple(std::move(triple)), imaging(imaging_default()) {}
-} jl_codegen_params_t;
+    bool imaging_mode;
+    bool safepoint_on_entry = true;
+    bool use_swiftcc = true;
+    jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple) JL_NOTSAFEPOINT  JL_NOTSAFEPOINT_ENTER
+      : tsctx(std::move(ctx)),
+        tsctx_lock(tsctx.getLock()),
+        DL(std::move(DL)),
+        TargetTriple(std::move(triple)),
+        imaging_mode(1)
+    {
+        // LLVM's RISC-V back-end currently does not support the Swift calling convention
+        if (TargetTriple.isRISCV())
+            use_swiftcc = false;
+    }
+    jl_codegen_params_t(jl_codegen_params_t &&) JL_NOTSAFEPOINT = default;
+    ~jl_codegen_params_t() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default;
+};
+
+const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
 
 jl_llvm_functions_t jl_emit_code(
         orc::ThreadSafeModule &M,
         jl_method_instance_t *mi,
         jl_code_info_t *src,
-        jl_value_t *jlrettype,
+        jl_value_t *abi_at,
+        jl_value_t *abi_rt,
         jl_codegen_params_t &params);
 
 jl_llvm_functions_t jl_emit_codeinst(
@@ -256,24 +324,43 @@ jl_llvm_functions_t jl_emit_codeinst(
         jl_code_info_t *src,
         jl_codegen_params_t &params);
 
+jl_llvm_functions_t jl_emit_codedecls(
+        orc::ThreadSafeModule &M,
+        jl_code_instance_t *codeinst,
+        jl_codegen_params_t &params);
+
+void linkFunctionBody(Function &Dst, Function &Src) JL_NOTSAFEPOINT;
+void emit_always_inline(orc::ThreadSafeModule &result_m, jl_codegen_params_t &params) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
+
 enum CompilationPolicy {
     Default = 0,
     Extern = 1,
 };
 
-typedef std::map<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_workqueue_t;
-
-void jl_compile_workqueue(
-    jl_workqueue_t &emitted,
-    Module &original,
-    jl_codegen_params_t &params,
-    CompilationPolicy policy);
-
-Function *jl_cfunction_object(jl_function_t *f, jl_value_t *rt, jl_tupletype_t *argt,
+Function *jl_cfunction_object(jl_value_t *f, jl_value_t *rt, jl_tupletype_t *argt,
     jl_codegen_params_t &params);
 
+extern "C" JL_DLLEXPORT_CODEGEN
+void *jl_jit_abi_convert(jl_task_t *ct, jl_abi_t from_abi, _Atomic(void*) *fptr, _Atomic(size_t) *last_world, void *data);
+std::string emit_abi_dispatcher(Module *M, jl_codegen_params_t &params, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *invoke);
+std::string emit_abi_converter(Module *M, jl_codegen_params_t &params, jl_abi_t from_abi, jl_code_instance_t *codeinst, Value *target, bool target_specsig);
+std::string emit_abi_constreturn(Module *M, jl_codegen_params_t &params, jl_abi_t from_abi, jl_value_t *rettype_const);
+std::string emit_abi_constreturn(Module *M, jl_codegen_params_t &params, bool specsig, jl_code_instance_t *codeinst);
+
+Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t &params) JL_NOTSAFEPOINT;
+void emit_specsig_to_fptr1(
+        Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
+        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
+        size_t nargs,
+        jl_codegen_params_t &params,
+        Function *target) JL_NOTSAFEPOINT;
+Function *get_or_emit_fptr1(StringRef Name, Module *M) JL_NOTSAFEPOINT;
+void jl_init_function(Function *F, const jl_codegen_params_t &params) JL_NOTSAFEPOINT;
+
 void add_named_global(StringRef name, void *addr) JL_NOTSAFEPOINT;
 
+Constant *literal_pointer_val_slot(jl_codegen_params_t &params, Module *M, jl_value_t *p);
+
 static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT
 {
     // this function will emit a static pointer into the generated code
@@ -291,22 +378,61 @@ static const inline char *name_from_method_instance(jl_method_instance_t *li) JL
     return jl_is_method(li->def.method) ? jl_symbol_name(li->def.method->name) : "top-level scope";
 }
 
-typedef JITSymbol JL_JITSymbol;
-// The type that is similar to SymbolInfo on LLVM 4.0 is actually
-// `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol
-// is expected.
-typedef JITSymbol JL_SymbolInfo;
+template <size_t offset = 0>
+class MaxAlignedAllocImpl
+    : public AllocatorBase<MaxAlignedAllocImpl<offset>> {
+
+public:
+    MaxAlignedAllocImpl() JL_NOTSAFEPOINT = default;
+
+    static Align alignment(size_t Size) JL_NOTSAFEPOINT {
+        // Define the maximum alignment we expect to require, from offset bytes off
+        // the returned pointer, this is >= alignof(std::max_align_t), which is too
+        // small often to actually use.
+        const size_t MaxAlignment = JL_CACHE_BYTE_ALIGNMENT;
+        if (Size <= offset)
+            return Align(1);
+        return Align(std::min((size_t)llvm::PowerOf2Ceil(Size - offset), MaxAlignment));
+    }
+
+    LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) {
+        Align MaxAlign = alignment(Size);
+        assert(Alignment < MaxAlign); (void)Alignment;
+        return jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset);
+    }
+
+    inline LLVM_ATTRIBUTE_RETURNS_NONNULL
+    void * Allocate(size_t Size, size_t Alignment) {
+        return Allocate(Size, Align(Alignment));
+    }
+
+    // Pull in base class overloads.
+    using AllocatorBase<MaxAlignedAllocImpl>::Allocate;
+
+    void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) { abort(); }
+
+    // Pull in base class overloads.
+    using AllocatorBase<MaxAlignedAllocImpl>::Deallocate;
+
+private:
+};
+using MaxAlignedAlloc = MaxAlignedAllocImpl<>;
 
 using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
 using OptimizerResultT = Expected<orc::ThreadSafeModule>;
+using SharedBytesT = StringSet<MaxAlignedAllocImpl<sizeof(StringSet<>::MapEntryTy)>>;
 
 class JuliaOJIT {
+private:
+    // any verification the user wants to do when adding an OwningResource to the pool
+    template <typename AnyT>
+    static void verifyResource(AnyT &resource) JL_NOTSAFEPOINT { }
+    static void verifyResource(orc::ThreadSafeContext &context) JL_NOTSAFEPOINT { assert(context.getContext()); }
 public:
 #ifdef JL_USE_JITLINK
     typedef orc::ObjectLinkingLayer ObjLayerT;
 #else
     typedef orc::RTDyldObjectLinkingLayer ObjLayerT;
-#endif
     struct LockLayerT : public orc::ObjectLayer {
 
         LockLayerT(orc::ObjectLayer &BaseLayer) JL_NOTSAFEPOINT : orc::ObjectLayer(BaseLayer.getExecutionSession()), BaseLayer(BaseLayer) {}
@@ -314,23 +440,27 @@ class JuliaOJIT {
 
         void emit(std::unique_ptr<orc::MaterializationResponsibility> R,
                             std::unique_ptr<MemoryBuffer> O) override {
+            JL_TIMING(LLVM_JIT, JIT_Link);
 #ifndef JL_USE_JITLINK
-            std::lock_guard<std::mutex> lock(EmissionMutex);
+            std::lock_guard<std::recursive_mutex> lock(EmissionMutex);
 #endif
             BaseLayer.emit(std::move(R), std::move(O));
         }
     private:
         orc::ObjectLayer &BaseLayer;
-        std::mutex EmissionMutex;
+        std::recursive_mutex EmissionMutex;
     };
+#endif
     typedef orc::IRCompileLayer CompileLayerT;
+    typedef orc::IRTransformLayer JITPointersLayerT;
     typedef orc::IRTransformLayer OptimizeLayerT;
+    typedef orc::IRTransformLayer OptSelLayerT;
     typedef object::OwningBinary<object::ObjectFile> OwningObj;
     template
     <typename ResourceT, size_t max = 0,
         typename BackingT = std::stack<ResourceT,
             std::conditional_t<max == 0,
-                SmallVector<ResourceT>,
+                SmallVector<ResourceT, 0>,
                 SmallVector<ResourceT, max>
             >
         >
@@ -347,11 +477,16 @@ class JuliaOJIT {
                 : pool(pool), resource(std::move(resource)) {}
             OwningResource(const OwningResource &) = delete;
             OwningResource &operator=(const OwningResource &) = delete;
-            OwningResource(OwningResource &&) JL_NOTSAFEPOINT = default;
+            OwningResource(OwningResource &&other) JL_NOTSAFEPOINT
+                : pool(other.pool), resource(std::move(other.resource)) {
+                    other.resource.reset();
+                }
             OwningResource &operator=(OwningResource &&) JL_NOTSAFEPOINT = default;
             ~OwningResource() JL_NOTSAFEPOINT { // _LEAVE
-                if (resource)
+                if (resource) {
+                    verifyResource(*resource);
                     pool.release(std::move(*resource));
+                }
             }
             ResourceT release() JL_NOTSAFEPOINT {
                 ResourceT res(std::move(*resource));
@@ -384,7 +519,7 @@ class JuliaOJIT {
             }
             private:
             ResourcePool &pool;
-            llvm::Optional<ResourceT> resource;
+            std::optional<ResourceT> resource;
         };
 
         OwningResource operator*() JL_NOTSAFEPOINT {
@@ -436,35 +571,16 @@ class JuliaOJIT {
 
         std::unique_ptr<WNMutex> mutex;
     };
-    struct PipelineT {
-        PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers);
-        CompileLayerT CompileLayer;
-        OptimizeLayerT OptimizeLayer;
-    };
 
-    struct OptSelLayerT : orc::IRLayer {
-
-        template<size_t N>
-        OptSelLayerT(const std::array<std::unique_ptr<PipelineT>, N> &optimizers) JL_NOTSAFEPOINT
-            : orc::IRLayer(optimizers[0]->OptimizeLayer.getExecutionSession(),
-                optimizers[0]->OptimizeLayer.getManglingOptions()),
-            optimizers(optimizers.data()),
-            count(N) {
-            static_assert(N > 0, "Expected array with at least one optimizer!");
-        }
-        ~OptSelLayerT() JL_NOTSAFEPOINT = default;
+    typedef ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPoolT;
 
-        void emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) override;
+    struct DLSymOptimizer;
+    struct OptimizerT;
+    struct JITPointersT;
 
-        private:
-        const std::unique_ptr<PipelineT> * const optimizers;
-        size_t count;
-    };
-
-private:
-    // Custom object emission notification handler for the JuliaOJIT
-    template <typename ObjT, typename LoadResult>
-    void registerObject(const ObjT &Obj, const LoadResult &LO);
+#ifndef JL_USE_JITLINK
+    void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT;
+#endif
 
 public:
 
@@ -472,39 +588,31 @@ class JuliaOJIT {
     ~JuliaOJIT() JL_NOTSAFEPOINT;
 
     void enableJITDebuggingSupport() JL_NOTSAFEPOINT;
-#ifndef JL_USE_JITLINK
-    // JITLink doesn't support old JITEventListeners (yet).
-    void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT;
-#endif
+    void enableIntelJITEventListener() JL_NOTSAFEPOINT;
+    void enableOProfileJITEventListener() JL_NOTSAFEPOINT;
+    void enablePerfJITEventListener() JL_NOTSAFEPOINT;
 
     orc::SymbolStringPtr mangle(StringRef Name) JL_NOTSAFEPOINT;
     void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT;
-    void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT;
+    void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
 
     //Methods for the C API
     Error addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM,
                             bool ShouldOptimize = false) JL_NOTSAFEPOINT;
     Error addObjectFile(orc::JITDylib &JD,
                         std::unique_ptr<MemoryBuffer> Obj) JL_NOTSAFEPOINT;
-    Expected<JITEvaluatedSymbol> findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT;
-    orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return ExternalCompileLayer; };
+    orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return CompileLayer; };
     orc::ExecutionSession &getExecutionSession() JL_NOTSAFEPOINT { return ES; }
     orc::JITDylib &getExternalJITDylib() JL_NOTSAFEPOINT { return ExternalJD; }
 
-    JL_JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
-    JL_JITSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
+    Expected<llvm::orc::ExecutorSymbolDef> findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
+    Expected<llvm::orc::ExecutorSymbolDef> findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
+    Expected<llvm::orc::ExecutorSymbolDef> findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT;
+    SmallVector<uint64_t> findSymbols(ArrayRef<StringRef> Names) JL_NOTSAFEPOINT;
     uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT;
     uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT;
-    StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT;
-    auto getContext() JL_NOTSAFEPOINT {
-        return *ContextPool;
-    }
-    orc::ThreadSafeContext acquireContext() { // JL_NOTSAFEPOINT_ENTER?
-        return ContextPool.acquire();
-    }
-    void releaseContext(orc::ThreadSafeContext &&ctx) { // JL_NOTSAFEPOINT_LEAVE?
-        ContextPool.release(std::move(ctx));
-    }
+    StringRef getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT;
+    orc::ThreadSafeContext makeContext() JL_NOTSAFEPOINT;
     const DataLayout& getDataLayout() const JL_NOTSAFEPOINT;
 
     // TargetMachine pass-through methods
@@ -517,21 +625,26 @@ class JuliaOJIT {
     TargetIRAnalysis getTargetIRAnalysis() const JL_NOTSAFEPOINT;
 
     size_t getTotalBytes() const JL_NOTSAFEPOINT;
+    void addBytes(size_t bytes) JL_NOTSAFEPOINT;
     void printTimers() JL_NOTSAFEPOINT;
 
-    jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT {
+    jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
         return dump_emitted_mi_name_stream;
     }
-    jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT {
+    jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
         return dump_compiles_stream;
     }
-    jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT {
+    jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
         return dump_llvm_opt_stream;
     }
-private:
     std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT;
     std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT;
-    void shareStrings(Module &M) JL_NOTSAFEPOINT;
+
+    // Note that this is a potential safepoint due to jl_get_library_ and jl_dlsym calls
+    // but may be called from inside safe-regions due to jit compilation locks
+    void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
+
+private:
 
     const std::unique_ptr<TargetMachine> TM;
     const DataLayout DL;
@@ -544,66 +657,57 @@ class JuliaOJIT {
     std::mutex RLST_mutex{};
     int RLST_inc = 0;
     DenseMap<void*, std::string> ReverseLocalSymbolTable;
+    SharedBytesT SharedBytes;
+
+    std::unique_ptr<DLSymOptimizer> DLSymOpt;
 
     //Compilation streams
     jl_locked_stream dump_emitted_mi_name_stream;
     jl_locked_stream dump_compiles_stream;
     jl_locked_stream dump_llvm_opt_stream;
 
-    std::vector<std::function<void()>> PrintLLVMTimers;
-
-    ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
+    std::mutex llvm_printing_mutex{};
+    SmallVector<std::function<void()>, 0> PrintLLVMTimers;
 
-#ifndef JL_USE_JITLINK
-    const std::shared_ptr<RTDyldMemoryManager> MemMgr;
-#else
-    std::atomic<size_t> total_size{0};
+    _Atomic(size_t) jit_bytes_size{0};
+    _Atomic(size_t) jitcounter{0};
+#ifdef JL_USE_JITLINK
     const std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr;
-#endif
     ObjLayerT ObjectLayer;
-    LockLayerT LockLayer;
-    const std::array<std::unique_ptr<PipelineT>, 4> Pipelines;
+#else
+    const std::shared_ptr<RTDyldMemoryManager> MemMgr; // shared_ptr protected by LockLayerT.EmissionMutex
+    ObjLayerT UnlockedObjectLayer;
+    LockLayerT ObjectLayer;
+#endif
+    CompileLayerT CompileLayer;
+    std::unique_ptr<JITPointersT> JITPointers;
+    JITPointersLayerT JITPointersLayer;
+    std::unique_ptr<OptimizerT> Optimizers;
+    OptimizeLayerT OptimizeLayer;
     OptSelLayerT OptSelLayer;
-    CompileLayerT ExternalCompileLayer;
-
 };
 extern JuliaOJIT *jl_ExecutionEngine;
-std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &ctx, bool imaging_mode, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT;
-inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafeContext ctx, bool imaging_mode, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT {
+std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &ctx, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT;
+inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafeContext ctx, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT {
     auto lock = ctx.getLock();
-    return orc::ThreadSafeModule(jl_create_llvm_module(name, *ctx.getContext(), imaging_mode, DL, triple), ctx);
+    return orc::ThreadSafeModule(jl_create_llvm_module(name, *ctx.getContext(), DL, triple), ctx);
 }
 
 Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT {
     if (!_shared_module) {
-        _shared_module = jl_create_llvm_module("globals", getContext(), imaging, DL, TargetTriple);
+        _shared_module = jl_create_llvm_module("globals", getContext(), DL, TargetTriple);
     }
     return *_shared_module;
 }
+void fixupTM(TargetMachine &TM) JL_NOTSAFEPOINT;
 
-Pass *createLowerPTLSPass(bool imaging_mode) JL_NOTSAFEPOINT;
-Pass *createCombineMulAddPass() JL_NOTSAFEPOINT;
-Pass *createFinalLowerGCPass() JL_NOTSAFEPOINT;
-Pass *createLateLowerGCFramePass() JL_NOTSAFEPOINT;
-Pass *createLowerExcHandlersPass() JL_NOTSAFEPOINT;
-Pass *createGCInvariantVerifierPass(bool Strong) JL_NOTSAFEPOINT;
-Pass *createPropagateJuliaAddrspaces() JL_NOTSAFEPOINT;
-Pass *createRemoveJuliaAddrspacesPass() JL_NOTSAFEPOINT;
-Pass *createRemoveNIPass() JL_NOTSAFEPOINT;
-Pass *createJuliaLICMPass() JL_NOTSAFEPOINT;
-Pass *createMultiVersioningPass(bool external_use) JL_NOTSAFEPOINT;
-Pass *createAllocOptPass() JL_NOTSAFEPOINT;
-Pass *createDemoteFloat16Pass() JL_NOTSAFEPOINT;
-Pass *createCPUFeaturesPass() JL_NOTSAFEPOINT;
-Pass *createLowerSimdLoopPass() JL_NOTSAFEPOINT;
+void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
 
 // NewPM
 #include "passes.h"
 
-// Whether the Function is an llvm or julia intrinsic.
-static inline bool isIntrinsicFunction(Function *F) JL_NOTSAFEPOINT
-{
-    return F->isIntrinsic() || F->getName().startswith("julia.");
-}
-
+#if JL_LLVM_VERSION >= 180000
+CodeGenOptLevel CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;
+#else
 CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;
+#endif
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index 092a48be81930..dc3073f42ddeb 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -2,138 +2,165 @@
 
 // Pointers that are exposed through the public libjulia
 #define JL_EXPORTED_DATA_POINTERS(XX) \
-    XX(jl_abstractarray_type) \
-    XX(jl_abstractstring_type) \
-    XX(jl_an_empty_string) \
-    XX(jl_an_empty_vec_any) \
-    XX(jl_anytuple_type) \
-    XX(jl_anytuple_type_type) \
-    XX(jl_any_type) \
-    XX(jl_argumenterror_type) \
-    XX(jl_argument_type) \
-    XX(jl_array_any_type) \
-    XX(jl_array_int32_type) \
-    XX(jl_array_symbol_type) \
-    XX(jl_array_type) \
-    XX(jl_array_typename) \
-    XX(jl_array_uint8_type) \
-    XX(jl_array_uint64_type) \
-    XX(jl_atomicerror_type) \
-    XX(jl_base_module) \
-    XX(jl_bool_type) \
-    XX(jl_bottom_type) \
-    XX(jl_boundserror_type) \
-    XX(jl_builtin_type) \
-    XX(jl_char_type) \
-    XX(jl_code_info_type) \
-    XX(jl_code_instance_type) \
-    XX(jl_const_type) \
-    XX(jl_core_module) \
-    XX(jl_datatype_type) \
-    XX(jl_densearray_type) \
-    XX(jl_diverror_exception) \
-    XX(jl_emptysvec) \
-    XX(jl_emptytuple) \
-    XX(jl_emptytuple_type) \
-    XX(jl_errorexception_type) \
-    XX(jl_expr_type) \
-    XX(jl_false) \
-    XX(jl_float16_type) \
-    XX(jl_float32_type) \
-    XX(jl_float64_type) \
-    XX(jl_floatingpoint_type) \
-    XX(jl_function_type) \
-    XX(jl_binding_type) \
-    XX(jl_globalref_type) \
-    XX(jl_gotoifnot_type) \
-    XX(jl_gotonode_type) \
-    XX(jl_initerror_type) \
-    XX(jl_int16_type) \
-    XX(jl_int32_type) \
-    XX(jl_int64_type) \
-    XX(jl_int8_type) \
-    XX(jl_interconditional_type) \
-    XX(jl_interrupt_exception) \
-    XX(jl_intrinsic_type) \
-    XX(jl_kwcall_func) \
-    XX(jl_lineinfonode_type) \
-    XX(jl_linenumbernode_type) \
-    XX(jl_llvmpointer_type) \
-    XX(jl_llvmpointer_typename) \
-    XX(jl_loaderror_type) \
-    XX(jl_main_module) \
-    XX(jl_memory_exception) \
-    XX(jl_methoderror_type) \
-    XX(jl_method_instance_type) \
-    XX(jl_method_match_type) \
-    XX(jl_method_type) \
-    XX(jl_methtable_type) \
-    XX(jl_module_type) \
-    XX(jl_n_threads_per_pool) \
-    XX(jl_namedtuple_type) \
-    XX(jl_namedtuple_typename) \
-    XX(jl_newvarnode_type) \
-    XX(jl_nonfunction_mt) \
-    XX(jl_nothing) \
-    XX(jl_nothing_type) \
-    XX(jl_number_type) \
-    XX(jl_opaque_closure_type) \
-    XX(jl_opaque_closure_typename) \
-    XX(jl_pair_type) \
-    XX(jl_partial_opaque_type) \
-    XX(jl_partial_struct_type) \
-    XX(jl_phicnode_type) \
-    XX(jl_phinode_type) \
-    XX(jl_pinode_type) \
-    XX(jl_pointer_type) \
-    XX(jl_pointer_typename) \
-    XX(jl_quotenode_type) \
-    XX(jl_readonlymemory_exception) \
-    XX(jl_ref_type) \
-    XX(jl_returnnode_type) \
-    XX(jl_signed_type) \
-    XX(jl_simplevector_type) \
-    XX(jl_slotnumber_type) \
-    XX(jl_ssavalue_type) \
-    XX(jl_stackovf_exception) \
-    XX(jl_string_type) \
-    XX(jl_symbol_type) \
-    XX(jl_task_type) \
-    XX(jl_top_module) \
-    XX(jl_true) \
-    XX(jl_tuple_typename) \
-    XX(jl_tvar_type) \
-    XX(jl_typeerror_type) \
-    XX(jl_typemap_entry_type) \
-    XX(jl_typemap_level_type) \
-    XX(jl_typename_type) \
-    XX(jl_typeofbottom_type) \
-    XX(jl_type_type) \
-    XX(jl_type_type_mt) \
-    XX(jl_type_typename) \
-    XX(jl_uint16_type) \
-    XX(jl_uint32_type) \
-    XX(jl_uint64_type) \
-    XX(jl_uint8pointer_type) \
-    XX(jl_uint8_type) \
-    XX(jl_undefref_exception) \
-    XX(jl_undefvarerror_type) \
-    XX(jl_unionall_type) \
-    XX(jl_uniontype_type) \
-    XX(jl_upsilonnode_type) \
-    XX(jl_vararg_type) \
-    XX(jl_vecelement_typename) \
-    XX(jl_voidpointer_type) \
-    XX(jl_void_type) \
-    XX(jl_weakref_type) \
+    XX(abioverride_type, jl_datatype_t*) \
+    XX(abstractarray_type, jl_unionall_t*) \
+    XX(abstractstring_type, jl_datatype_t*) \
+    XX(addrspace_type, jl_unionall_t*) \
+    XX(addrspace_typename, jl_typename_t*) \
+    XX(addrspacecore_type, jl_datatype_t*) \
+    XX(an_empty_string, jl_value_t*) \
+    XX(an_empty_vec_any, jl_value_t*) \
+    XX(an_empty_memory_any, jl_value_t*) \
+    XX(anytuple_type, jl_datatype_t*) \
+    XX(anytuple_type_type, jl_unionall_t*) \
+    XX(any_type, jl_datatype_t*) \
+    XX(argumenterror_type, jl_datatype_t*) \
+    XX(argument_type, jl_datatype_t*) \
+    XX(array_any_type, jl_value_t*) \
+    XX(array_int32_type, jl_value_t*) \
+    XX(array_symbol_type, jl_value_t*) \
+    XX(array_type, jl_unionall_t*) \
+    XX(array_typename, jl_typename_t*) \
+    XX(array_uint8_type, jl_value_t*) \
+    XX(array_uint32_type, jl_value_t*) \
+    XX(array_uint64_type, jl_value_t*) \
+    XX(atomicerror_type, jl_datatype_t*) \
+    XX(base_module, jl_module_t*) \
+    XX(bool_type, jl_datatype_t*) \
+    XX(bottom_type, jl_value_t*) \
+    XX(boundserror_type, jl_datatype_t*) \
+    XX(builtin_type, jl_datatype_t*) \
+    XX(char_type, jl_datatype_t*) \
+    XX(code_info_type, jl_datatype_t*) \
+    XX(code_instance_type, jl_datatype_t*) \
+    XX(const_type, jl_datatype_t*) \
+    XX(core_module, jl_module_t*) \
+    XX(datatype_type, jl_datatype_t*) \
+    XX(debuginfo_type, jl_datatype_t*) \
+    XX(densearray_type, jl_unionall_t*) \
+    XX(diverror_exception, jl_value_t*) \
+    XX(emptysvec, jl_svec_t*) \
+    XX(emptytuple, jl_value_t*) \
+    XX(emptytuple_type, jl_datatype_t*) \
+    XX(errorexception_type, jl_datatype_t*) \
+    XX(expr_type, jl_datatype_t*) \
+    XX(float16_type, jl_datatype_t*) \
+    XX(float32_type, jl_datatype_t*) \
+    XX(float64_type, jl_datatype_t*) \
+    XX(bfloat16_type, jl_datatype_t*) \
+    XX(floatingpoint_type, jl_datatype_t*) \
+    XX(function_type, jl_datatype_t*) \
+    XX(binding_type, jl_datatype_t*) \
+    XX(binding_partition_type, jl_datatype_t*) \
+    XX(globalref_type, jl_datatype_t*) \
+    XX(gotoifnot_type, jl_datatype_t*) \
+    XX(enternode_type, jl_datatype_t*) \
+    XX(gotonode_type, jl_datatype_t*) \
+    XX(initerror_type, jl_datatype_t*) \
+    XX(int16_type, jl_datatype_t*) \
+    XX(int32_type, jl_datatype_t*) \
+    XX(int64_type, jl_datatype_t*) \
+    XX(int8_type, jl_datatype_t*) \
+    XX(interconditional_type, jl_datatype_t*) \
+    XX(interrupt_exception, jl_value_t*) \
+    XX(intrinsic_type, jl_datatype_t*) \
+    XX(kwcall_type, jl_datatype_t*) \
+    XX(lineinfonode_type, jl_datatype_t*) \
+    XX(linenumbernode_type, jl_datatype_t*) \
+    XX(llvmpointer_type, jl_unionall_t*) \
+    XX(llvmpointer_typename, jl_typename_t*) \
+    XX(loaderror_type, jl_datatype_t*) \
+    XX(main_module, jl_module_t*) \
+    XX(memory_any_type, jl_value_t*) \
+    XX(memory_exception, jl_value_t*) \
+    XX(genericmemory_type, jl_unionall_t*) \
+    XX(genericmemory_typename, jl_typename_t*) \
+    XX(memory_uint8_type, jl_value_t*) \
+    XX(memory_uint16_type, jl_value_t*) \
+    XX(memory_uint32_type, jl_value_t*) \
+    XX(memory_uint64_type, jl_value_t*) \
+    XX(memoryref_any_type, jl_value_t*) \
+    XX(genericmemoryref_type, jl_unionall_t*) \
+    XX(genericmemoryref_typename, jl_typename_t*) \
+    XX(memoryref_uint8_type, jl_value_t*) \
+    XX(methoderror_type, jl_datatype_t*) \
+    XX(method_instance_type, jl_datatype_t*) \
+    XX(method_match_type, jl_datatype_t*) \
+    XX(method_type, jl_datatype_t*) \
+    XX(methtable_type, jl_datatype_t*) \
+    XX(methcache_type, jl_datatype_t*) \
+    XX(missingcodeerror_type, jl_datatype_t*) \
+    XX(module_type, jl_datatype_t*) \
+    XX(namedtuple_type, jl_unionall_t*) \
+    XX(namedtuple_typename, jl_typename_t*) \
+    XX(newvarnode_type, jl_datatype_t*) \
+    XX(nothing_type, jl_datatype_t*) \
+    XX(number_type, jl_datatype_t*) \
+    XX(opaque_closure_method, jl_method_t*) \
+    XX(opaque_closure_type, jl_unionall_t*) \
+    XX(opaque_closure_typename, jl_typename_t*) \
+    XX(pair_type, jl_value_t*) \
+    XX(partial_opaque_type, jl_datatype_t*) \
+    XX(partial_struct_type, jl_datatype_t*) \
+    XX(phicnode_type, jl_datatype_t*) \
+    XX(phinode_type, jl_datatype_t*) \
+    XX(pinode_type, jl_datatype_t*) \
+    XX(pointer_type, jl_unionall_t*) \
+    XX(pointer_typename, jl_typename_t*) \
+    XX(precompilable_error, jl_value_t*) \
+    XX(quotenode_type, jl_datatype_t*) \
+    XX(readonlymemory_exception, jl_value_t*) \
+    XX(ref_type, jl_unionall_t*) \
+    XX(returnnode_type, jl_datatype_t*) \
+    XX(signed_type, jl_datatype_t*) \
+    XX(simplevector_type, jl_datatype_t*) \
+    XX(slotnumber_type, jl_datatype_t*) \
+    XX(ssavalue_type, jl_datatype_t*) \
+    XX(stackovf_exception, jl_value_t*) \
+    XX(string_type, jl_datatype_t*) \
+    XX(symbol_type, jl_datatype_t*) \
+    XX(task_type, jl_datatype_t*) \
+    XX(top_module, jl_module_t*) \
+    XX(trimfailure_type, jl_datatype_t*) \
+    XX(tuple_typename, jl_typename_t*) \
+    XX(tvar_type, jl_datatype_t*) \
+    XX(typeerror_type, jl_datatype_t*) \
+    XX(typemap_entry_type, jl_datatype_t*) \
+    XX(typemap_level_type, jl_datatype_t*) \
+    XX(typename_type, jl_datatype_t*) \
+    XX(typeofbottom_type, jl_datatype_t*) \
+    XX(type_type, jl_unionall_t*) \
+    XX(type_typename, jl_typename_t*) \
+    XX(uint16_type, jl_datatype_t*) \
+    XX(uint32_type, jl_datatype_t*) \
+    XX(uint64_type, jl_datatype_t*) \
+    XX(uint8pointer_type, jl_datatype_t*) \
+    XX(uint8_type, jl_datatype_t*) \
+    XX(undefref_exception, jl_value_t*) \
+    XX(undefvarerror_type, jl_datatype_t*) \
+    XX(fielderror_type, jl_datatype_t*) \
+    XX(unionall_type, jl_datatype_t*) \
+    XX(uniontype_type, jl_datatype_t*) \
+    XX(upsilonnode_type, jl_datatype_t*) \
+    XX(vararg_type, jl_datatype_t*) \
+    XX(vecelement_typename, jl_typename_t*) \
+    XX(voidpointer_type, jl_datatype_t*) \
+    XX(weakref_type, jl_datatype_t*) \
+
+// Global constant values (jl_true, jl_false, jl_nothing)
+#define JL_CONST_GLOBAL_VARS(YY) \
+    YY(nothing, jl_value_t*) \
+    YY(false, jl_value_t*) \
+    YY(true, jl_value_t*) \
 
 // Data symbols that are defined inside the public libjulia
-#define JL_EXPORTED_DATA_SYMBOLS(XX) \
-    XX(jl_n_threadpools, int) \
-    XX(jl_n_threads, _Atomic(int)) \
-    XX(jl_n_gcthreads, int) \
-    XX(jl_options, jl_options_t) \
-    XX(jl_task_gcstack_offset, int) \
-    XX(jl_task_ptls_offset, int) \
+#define JL_EXPORTED_DATA_SYMBOLS(ZZ) \
+    ZZ(jl_n_threads_per_pool, int*) \
+    ZZ(jl_n_gcthreads, int) \
+    ZZ(jl_n_threadpools, int) \
+    ZZ(jl_n_threads, _Atomic(int)) \
+    ZZ(jl_options, jl_options_t) \
+    ZZ(jl_task_gcstack_offset, int) \
+    ZZ(jl_task_ptls_offset, int) \
 
 // end of file
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index c2b2a1578fd76..c090d3fe253fe 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -2,12 +2,12 @@
 
 #define JL_RUNTIME_EXPORTED_FUNCS(XX) \
     XX(jl_active_task_stack) \
-    XX(jl_add_standard_imports) \
     XX(jl_adopt_thread) \
     XX(jl_alignment) \
     XX(jl_alloc_array_1d) \
     XX(jl_alloc_array_2d) \
     XX(jl_alloc_array_3d) \
+    XX(jl_alloc_array_nd) \
     XX(jl_alloc_string) \
     XX(jl_alloc_svec) \
     XX(jl_alloc_svec_uninit) \
@@ -21,40 +21,27 @@
     XX(jl_apply_type1) \
     XX(jl_apply_type2) \
     XX(jl_argument_datatype) \
-    XX(jl_arraylen) \
-    XX(jl_arrayref) \
-    XX(jl_arrayset) \
-    XX(jl_arrayunset) \
-    XX(jl_array_cconvert_cstring) \
-    XX(jl_array_copy) \
-    XX(jl_array_del_at) \
-    XX(jl_array_del_beg) \
     XX(jl_array_del_end) \
     XX(jl_array_eltype) \
-    XX(jl_array_grow_at) \
-    XX(jl_array_grow_beg) \
     XX(jl_array_grow_end) \
-    XX(jl_array_isassigned) \
     XX(jl_array_ptr) \
     XX(jl_array_ptr_1d_append) \
     XX(jl_array_ptr_1d_push) \
-    XX(jl_array_ptr_copy) \
+    XX(jl_genericmemory_owner) \
+    XX(jl_genericmemoryref) \
     XX(jl_array_rank) \
-    XX(jl_array_size) \
-    XX(jl_array_sizehint) \
     XX(jl_array_to_string) \
-    XX(jl_array_typetagdata) \
-    XX(jl_array_validate_dims) \
     XX(jl_atexit_hook) \
     XX(jl_atomic_bool_cmpswap_bits) \
     XX(jl_atomic_cmpswap_bits) \
     XX(jl_atomic_error) \
     XX(jl_atomic_new_bits) \
     XX(jl_atomic_store_bits) \
+    XX(jl_atomic_storeonce_bits) \
     XX(jl_atomic_swap_bits) \
+    XX(jl_autoinit_and_adopt_thread) \
     XX(jl_backtrace_from_here) \
     XX(jl_base_relative_to) \
-    XX(jl_binding_resolved_p) \
     XX(jl_bitcast) \
     XX(jl_boundp) \
     XX(jl_bounds_error) \
@@ -84,6 +71,7 @@
     XX(jl_call1) \
     XX(jl_call2) \
     XX(jl_call3) \
+    XX(jl_call4) \
     XX(jl_calloc) \
     XX(jl_call_in_typeinf_world) \
     XX(jl_capture_interp_frame) \
@@ -109,8 +97,6 @@
     XX(jl_cstr_to_string) \
     XX(jl_current_exception) \
     XX(jl_debug_method_invalidation) \
-    XX(jl_declare_constant) \
-    XX(jl_defines_or_exports_p) \
     XX(jl_deprecate_binding) \
     XX(jl_dlclose) \
     XX(jl_dlopen) \
@@ -121,10 +107,10 @@
     XX(jl_egal__bits) \
     XX(jl_egal__bitstag) \
     XX(jl_eh_restore_state) \
+    XX(jl_eh_restore_state_noexcept) \
     XX(jl_enter_handler) \
     XX(jl_enter_threaded_region) \
     XX(jl_environ) \
-    XX(jl_eof_error) \
     XX(jl_eqtable_get) \
     XX(jl_eqtable_pop) \
     XX(jl_eqtable_put) \
@@ -139,12 +125,6 @@
     XX(jl_exit) \
     XX(jl_exit_on_sigint) \
     XX(jl_exit_threaded_region) \
-    XX(jl_expand) \
-    XX(jl_expand_and_resolve) \
-    XX(jl_expand_stmt) \
-    XX(jl_expand_stmt_with_loc) \
-    XX(jl_expand_with_loc) \
-    XX(jl_expand_with_loc_warn) \
     XX(jl_field_index) \
     XX(jl_field_isdefined) \
     XX(jl_gc_add_finalizer) \
@@ -152,10 +132,6 @@
     XX(jl_gc_add_ptr_finalizer) \
     XX(jl_gc_add_quiescent) \
     XX(jl_gc_allocobj) \
-    XX(jl_gc_alloc_0w) \
-    XX(jl_gc_alloc_1w) \
-    XX(jl_gc_alloc_2w) \
-    XX(jl_gc_alloc_3w) \
     XX(jl_gc_alloc_typed) \
     XX(jl_gc_big_alloc) \
     XX(jl_gc_collect) \
@@ -175,22 +151,23 @@
     XX(jl_gc_internal_obj_base_ptr) \
     XX(jl_gc_is_enabled) \
     XX(jl_gc_is_in_finalizer) \
+    XX(jl_gc_pool_live_bytes) \
     XX(jl_gc_live_bytes) \
     XX(jl_gc_managed_malloc) \
-    XX(jl_gc_managed_realloc) \
     XX(jl_gc_mark_queue_obj) \
     XX(jl_gc_mark_queue_objarray) \
     XX(jl_gc_max_internal_obj_size) \
     XX(jl_gc_new_weakref) \
     XX(jl_gc_new_weakref_th) \
     XX(jl_gc_num) \
-    XX(jl_gc_pool_alloc) \
+    XX(jl_gc_small_alloc) \
     XX(jl_gc_queue_multiroot) \
     XX(jl_gc_queue_root) \
     XX(jl_gc_safepoint) \
     XX(jl_gc_schedule_foreign_sweepfunc) \
     XX(jl_gc_set_cb_notify_external_alloc) \
     XX(jl_gc_set_cb_notify_external_free) \
+    XX(jl_gc_set_cb_notify_gc_pressure) \
     XX(jl_gc_set_cb_post_gc) \
     XX(jl_gc_set_cb_pre_gc) \
     XX(jl_gc_set_cb_root_scanner) \
@@ -200,19 +177,22 @@
     XX(jl_gc_total_hrtime) \
     XX(jl_gdblookup) \
     XX(jl_generating_output) \
-    XX(jl_generic_function_def) \
+    XX(jl_declare_const_gf) \
+    XX(jl_declare_constant_val) \
     XX(jl_gensym) \
+    XX(jl_getaffinity) \
     XX(jl_getallocationgranularity) \
     XX(jl_getnameinfo) \
     XX(jl_getpagesize) \
     XX(jl_get_ARCH) \
     XX(jl_get_backtrace) \
     XX(jl_get_binding) \
-    XX(jl_get_binding_for_method_def) \
-    XX(jl_get_binding_or_error) \
     XX(jl_get_binding_wr) \
+    XX(jl_check_binding_currently_writable) \
     XX(jl_get_cpu_name) \
     XX(jl_get_cpu_features) \
+    XX(jl_get_sysimage_cpu_target) \
+    XX(jl_cpu_has_fma) \
     XX(jl_get_current_task) \
     XX(jl_get_default_sysimg_path) \
     XX(jl_get_excstack) \
@@ -223,11 +203,12 @@
     XX(jl_get_JIT) \
     XX(jl_get_julia_bin) \
     XX(jl_get_julia_bindir) \
-    XX(jl_get_method_inferred) \
     XX(jl_get_module_compile) \
     XX(jl_get_module_infer) \
     XX(jl_get_module_of_binding) \
     XX(jl_get_module_optlevel) \
+    XX(jl_get_module_usings_backedges) \
+    XX(jl_get_module_binding_or_nothing) \
     XX(jl_get_next_task) \
     XX(jl_get_nth_field) \
     XX(jl_get_nth_field_checked) \
@@ -244,9 +225,9 @@
     XX(jl_get_world_counter) \
     XX(jl_get_zero_subnormals) \
     XX(jl_gf_invoke_lookup) \
+    XX(jl_method_lookup_by_tt) \
+    XX(jl_method_lookup) \
     XX(jl_gf_invoke_lookup_worlds) \
-    XX(jl_git_branch) \
-    XX(jl_git_commit) \
     XX(jl_global_event_loop) \
     XX(jl_has_empty_intersection) \
     XX(jl_has_free_typevars) \
@@ -255,13 +236,13 @@
     XX(jl_has_typevar_from_unionall) \
     XX(jl_hrtime) \
     XX(jl_idtable_rehash) \
-    XX(jl_infer_thunk) \
     XX(jl_init) \
+    XX(jl_init_) \
     XX(jl_init_options) \
     XX(jl_init_restored_module) \
     XX(jl_init_with_image) \
-    XX(jl_init_with_image__threading) \
-    XX(jl_init__threading) \
+    XX(jl_init_with_image_file) \
+    XX(jl_init_with_image_handle) \
     XX(jl_install_sigint_handler) \
     XX(jl_instantiate_type_in_env) \
     XX(jl_instantiate_unionall) \
@@ -275,7 +256,6 @@
     XX(jl_ios_buffer_n) \
     XX(jl_ios_fd) \
     XX(jl_ios_get_nbyte_int) \
-    XX(jl_ir_flag_inferred) \
     XX(jl_ir_flag_has_fcall) \
     XX(jl_ir_flag_inlining) \
     XX(jl_ir_inlining_cost) \
@@ -287,7 +267,9 @@
     XX(jl_istopmod) \
     XX(jl_is_binding_deprecated) \
     XX(jl_is_char_signed) \
+    XX(jl_is_compilable) \
     XX(jl_is_const) \
+    XX(jl_is_assertsbuild) \
     XX(jl_is_debugbuild) \
     XX(jl_is_foreign_type) \
     XX(jl_is_identifier) \
@@ -308,10 +290,10 @@
     XX(jl_load_dynamic_library) \
     XX(jl_load_file_string) \
     XX(jl_lookup_code_address) \
+    XX(jl_lower) \
     XX(jl_lseek) \
     XX(jl_lstat) \
     XX(jl_macroexpand) \
-    XX(jl_macroexpand1) \
     XX(jl_malloc) \
     XX(jl_malloc_stack) \
     XX(jl_matching_methods) \
@@ -325,20 +307,19 @@
     XX(jl_methtable_lookup) \
     XX(jl_mi_cache_insert) \
     XX(jl_module_build_id) \
-    XX(jl_module_export) \
     XX(jl_module_exports_p) \
     XX(jl_module_globalref) \
     XX(jl_module_import) \
     XX(jl_module_name) \
     XX(jl_module_names) \
     XX(jl_module_parent) \
-    XX(jl_module_use) \
+    XX(jl_module_getloc) \
+    XX(jl_module_public_p) \
     XX(jl_module_using) \
     XX(jl_module_usings) \
     XX(jl_module_uuid) \
     XX(jl_native_alignment) \
     XX(jl_nb_available) \
-    XX(jl_new_array) \
     XX(jl_new_bits) \
     XX(jl_new_codeinst) \
     XX(jl_new_code_info_uninit) \
@@ -349,6 +330,10 @@
     XX(jl_new_method_table) \
     XX(jl_new_method_uninit) \
     XX(jl_new_module) \
+    XX(jl_begin_new_module) \
+    XX(jl_end_new_module) \
+    XX(jl_new_opaque_closure_from_code_info) \
+    XX(jl_new_opaque_closure_from_code_info_in_world) \
     XX(jl_new_primitivetype) \
     XX(jl_new_struct) \
     XX(jl_new_structt) \
@@ -375,7 +360,8 @@
     XX(jl_pointerref) \
     XX(jl_pointerset) \
     XX(jl_pop_handler) \
-    XX(jl_preload_sysimg_so) \
+    XX(jl_pop_handler_noexcept) \
+    XX(jl_preload_sysimg) \
     XX(jl_prepend_cwd) \
     XX(jl_printf) \
     XX(jl_print_backtrace) \
@@ -389,29 +375,30 @@
     XX(jl_profile_maxlen_data) \
     XX(jl_profile_start_timer) \
     XX(jl_profile_stop_timer) \
-    XX(jl_ptrarrayref) \
     XX(jl_ptr_to_array) \
     XX(jl_ptr_to_array_1d) \
     XX(jl_queue_work) \
     XX(jl_raise_debugger) \
     XX(jl_readuntil) \
     XX(jl_cache_flags) \
+    XX(jl_match_cache_flags_current) \
     XX(jl_match_cache_flags) \
     XX(jl_read_verify_header) \
     XX(jl_realloc) \
     XX(jl_register_newmeth_tracer) \
-    XX(jl_reshape_array) \
-    XX(jl_resolve_globals_in_ir) \
+    XX(jl_resolve_definition_effects_in_ir) \
     XX(jl_restore_excstack) \
     XX(jl_restore_incremental) \
     XX(jl_restore_package_image_from_file) \
     XX(jl_restore_system_image) \
-    XX(jl_restore_system_image_data) \
     XX(jl_rethrow) \
     XX(jl_rethrow_other) \
     XX(jl_running_on_valgrind) \
     XX(jl_safe_printf) \
+    XX(jl_safepoint_suspend_thread) \
+    XX(jl_safepoint_resume_thread) \
     XX(jl_SC_CLK_TCK) \
+    XX(jl_setaffinity) \
     XX(jl_set_ARGS) \
     XX(jl_set_const) \
     XX(jl_set_errno) \
@@ -432,20 +419,18 @@
     XX(jl_set_zero_subnormals) \
     XX(jl_sigatomic_begin) \
     XX(jl_sigatomic_end) \
-    XX(jl_sig_throw) \
     XX(jl_spawn) \
     XX(jl_specializations_get_linfo) \
     XX(jl_specializations_lookup) \
     XX(jl_static_show) \
+    XX(jl_safe_static_show) \
     XX(jl_static_show_func_sig) \
     XX(jl_stderr_obj) \
     XX(jl_stderr_stream) \
     XX(jl_stdin_stream) \
-    XX(jl_stdout_obj) \
     XX(jl_stdout_stream) \
     XX(jl_stored_inline) \
     XX(jl_string_ptr) \
-    XX(jl_string_to_array) \
     XX(jl_subtype) \
     XX(jl_subtype_env) \
     XX(jl_subtype_env_size) \
@@ -454,7 +439,6 @@
     XX(jl_svec2) \
     XX(jl_svec_copy) \
     XX(jl_svec_fill) \
-    XX(jl_svec_ref) \
     XX(jl_switch) \
     XX(jl_switchto) \
     XX(jl_symbol) \
@@ -463,11 +447,12 @@
     XX(jl_tagged_gensym) \
     XX(jl_take_buffer) \
     XX(jl_task_get_next) \
-    XX(jl_task_stack_buffer) \
     XX(jl_termios_size) \
     XX(jl_test_cpu_feature) \
     XX(jl_threadid) \
     XX(jl_threadpoolid) \
+    XX(jl_get_ptls_rng) \
+    XX(jl_set_ptls_rng) \
     XX(jl_throw) \
     XX(jl_throw_out_of_memory_error) \
     XX(jl_too_few_args) \
@@ -478,8 +463,6 @@
     XX(jl_try_substrtof) \
     XX(jl_tty_set_mode) \
     XX(jl_typeassert) \
-    XX(jl_typeinf_lock_begin) \
-    XX(jl_typeinf_lock_end) \
     XX(jl_typeinf_timing_begin) \
     XX(jl_typeinf_timing_end) \
     XX(jl_typename_str) \
@@ -490,7 +473,7 @@
     XX(jl_type_intersection) \
     XX(jl_type_intersection_with_env) \
     XX(jl_type_morespecific) \
-    XX(jl_type_morespecific_no_subtype) \
+    XX(jl_method_morespecific) \
     XX(jl_type_union) \
     XX(jl_type_unionall) \
     XX(jl_unbox_bool) \
@@ -510,6 +493,7 @@
     XX(jl_uncompress_argname_n) \
     XX(jl_uncompress_ir) \
     XX(jl_undefined_var_error) \
+    XX(jl_unwrap_unionall) \
     XX(jl_has_no_field_error) \
     XX(jl_value_ptr) \
     XX(jl_ver_is_release) \
@@ -521,7 +505,6 @@
     XX(jl_vprintf) \
     XX(jl_wakeup_thread) \
     XX(jl_write_compiler_output) \
-    XX(jl_yield) \
 
 #define JL_RUNTIME_EXPORTED_FUNCS_WIN(XX) \
     XX(jl_setjmp) \
@@ -530,24 +513,24 @@
 #define JL_CODEGEN_EXPORTED_FUNCS(YY) \
     YY(jl_dump_function_ir) \
     YY(jl_dump_method_asm) \
-    YY(jl_extern_c) \
+    YY(jl_emit_codeinst_to_jit) \
     YY(jl_get_llvmf_defn) \
     YY(jl_get_llvm_function) \
     YY(jl_get_llvm_module) \
     YY(jl_get_LLVM_VERSION) \
     YY(jl_dump_native) \
     YY(jl_get_llvm_gvs) \
+    YY(jl_get_llvm_gv_inits) \
     YY(jl_get_llvm_external_fns) \
+    YY(jl_get_llvm_cis) \
     YY(jl_dump_function_asm) \
     YY(jl_LLVMCreateDisasm) \
     YY(jl_LLVMDisasmInstruction) \
     YY(jl_init_codegen) \
     YY(jl_getFunctionInfo) \
     YY(jl_register_fptrs) \
-    YY(jl_generate_fptr) \
     YY(jl_generate_fptr_for_unspecialized) \
-    YY(jl_generate_fptr_for_oc_wrapper) \
-    YY(jl_compile_extern_c) \
+    YY(jl_compile_codeinst) \
     YY(jl_teardown_codegen) \
     YY(jl_jit_total_bytes) \
     YY(jl_create_native) \
@@ -555,44 +538,13 @@
     YY(jl_dump_emitted_mi_name) \
     YY(jl_dump_llvm_opt) \
     YY(jl_dump_fptr_asm) \
+    YY(jl_emit_native) \
     YY(jl_get_function_id) \
     YY(jl_type_to_llvm) \
     YY(jl_getUnwindInfo) \
     YY(jl_get_libllvm) \
-    YY(jl_add_optimization_passes) \
-    YY(jl_build_newpm_pipeline) \
     YY(jl_register_passbuilder_callbacks) \
-    YY(LLVMExtraAddLowerSimdLoopPass) \
-    YY(LLVMExtraAddFinalLowerGCPass) \
-    YY(LLVMExtraAddPropagateJuliaAddrspaces) \
-    YY(LLVMExtraAddRemoveJuliaAddrspacesPass) \
-    YY(LLVMExtraAddCombineMulAddPass) \
-    YY(LLVMExtraAddMultiVersioningPass) \
-    YY(LLVMExtraAddLowerExcHandlersPass) \
-    YY(LLVMExtraAddLateLowerGCFramePass) \
-    YY(LLVMExtraJuliaLICMPass) \
-    YY(LLVMExtraAddAllocOptPass) \
-    YY(LLVMExtraAddLowerPTLSPass) \
-    YY(LLVMExtraAddRemoveNIPass) \
-    YY(LLVMExtraAddGCInvariantVerifierPass) \
-    YY(LLVMExtraAddDemoteFloat16Pass) \
-    YY(LLVMExtraAddCPUFeaturesPass) \
-    YY(LLVMExtraMPMAddCPUFeaturesPass) \
-    YY(LLVMExtraMPMAddRemoveNIPass) \
-    YY(LLVMExtraMPMAddLowerSIMDLoopPass) \
-    YY(LLVMExtraMPMAddFinalLowerGCPass) \
-    YY(LLVMExtraMPMAddMultiVersioningPass) \
-    YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \
-    YY(LLVMExtraMPMAddRemoveAddrspacesPass) \
-    YY(LLVMExtraMPMAddLowerPTLSPass) \
-    YY(LLVMExtraFPMAddDemoteFloat16Pass) \
-    YY(LLVMExtraFPMAddCombineMulAddPass) \
-    YY(LLVMExtraFPMAddLateLowerGCPass) \
-    YY(LLVMExtraFPMAddAllocOptPass) \
-    YY(LLVMExtraFPMAddPropagateJuliaAddrspacesPass) \
-    YY(LLVMExtraFPMAddLowerExcHandlersPass) \
-    YY(LLVMExtraFPMAddGCInvariantVerifierPass) \
-    YY(LLVMExtraLPMAddJuliaLICMPass) \
+    YY(jl_jit_abi_converter) \
     YY(JLJITGetLLVMOrcExecutionSession) \
     YY(JLJITGetJuliaOJIT) \
     YY(JLJITGetExternalJITDylib) \
diff --git a/src/jl_internal_data.inc b/src/jl_internal_data.inc
new file mode 100644
index 0000000000000..0a475cc7336f3
--- /dev/null
+++ b/src/jl_internal_data.inc
@@ -0,0 +1,7 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Internal data pointers that are not part of the public API
+#define JL_INTERNAL_DATA(XX) \
+    XX(method_table, jl_methtable_t*) \
+
+// end of file
diff --git a/src/jl_uv.c b/src/jl_uv.c
index 281dd798dbb36..e41b896320693 100644
--- a/src/jl_uv.c
+++ b/src/jl_uv.c
@@ -8,6 +8,9 @@
 #include <stdlib.h>
 #include <string.h>
 
+// Needs to come before windows platform headers
+#include "support/dtypes.h"
+
 #ifdef _OS_WINDOWS_
 #include <ws2tcpip.h>
 #include <malloc.h>
@@ -39,21 +42,24 @@ static void walk_print_cb(uv_handle_t *h, void *arg)
     const char *type = uv_handle_type_name(h->type);
     if (!type)
         type = "<unknown>";
+    size_t resource_id; // fits an int or pid_t on Unix, HANDLE or PID on Windows
     uv_os_fd_t fd;
     if (h->type == UV_PROCESS)
-        fd = uv_process_get_pid((uv_process_t*)h);
-    else if (uv_fileno(h, &fd))
-        fd = (uv_os_fd_t)-1;
+        resource_id = (size_t)uv_process_get_pid((uv_process_t*)h);
+    else if (uv_fileno(h, &fd) == 0)
+        resource_id = (size_t)fd;
+    else
+        resource_id = -1;
     const char *pad = "                "; // 16 spaces
-    int npad = fd == -1 ? 0 : snprintf(NULL, 0, "%zd", (size_t)fd);
+    int npad = resource_id == -1 ? 0 : snprintf(NULL, 0, "%zd", resource_id);
     if (npad < 0)
         npad = 0;
     npad += strlen(type);
     pad += npad < strlen(pad) ? npad : strlen(pad);
-    if (fd == -1)
-        jl_safe_printf(" %s   %s@%p->%p\n", type,             pad, (void*)h, (void*)h->data);
+    if (resource_id == -1)
+        jl_safe_printf(" %s   %s%p->%p\n", type,             pad, (void*)h, (void*)h->data);
     else
-        jl_safe_printf(" %s[%zd] %s@%p->%p\n", type, (size_t)fd, pad, (void*)h, (void*)h->data);
+        jl_safe_printf(" %s[%zd] %s%p->%p\n", type, resource_id, pad, (void*)h, (void*)h->data);
 }
 
 static void wait_empty_func(uv_timer_t *t)
@@ -62,33 +68,39 @@ static void wait_empty_func(uv_timer_t *t)
     uv_unref((uv_handle_t*)&signal_async);
     if (!uv_loop_alive(t->loop))
         return;
-    jl_safe_printf("\n[pid %zd] waiting for IO to finish:\n"
-                   " TYPE[FD/PID]       @UV_HANDLE_T->DATA\n",
+    jl_safe_printf("\n[pid %zd] Waiting for background task / IO / timer to finish:\n"
+                   " Handle type        uv_handle_t->data\n",
                    (size_t)uv_os_getpid());
     uv_walk(jl_io_loop, walk_print_cb, NULL);
+    if (jl_generating_output() && jl_options.incremental) {
+        jl_safe_printf("This means that a package has started a background task or event source that has not finished running. For precompilation to complete successfully, the event source needs to be closed explicitly. See the developer documentation on fixing precompilation hangs for more help.\n");
+    }
     jl_gc_collect(JL_GC_FULL);
 }
 
 void jl_wait_empty_begin(void)
 {
     JL_UV_LOCK();
-    if (wait_empty_worker.type != UV_TIMER && jl_io_loop) {
-        // try to purge anything that is just waiting for cleanup
-        jl_io_loop->stop_flag = 0;
-        uv_run(jl_io_loop, UV_RUN_NOWAIT);
-        uv_timer_init(jl_io_loop, &wait_empty_worker);
+    if (jl_io_loop) {
+        if (wait_empty_worker.type != UV_TIMER) {
+            // try to purge anything that is just waiting for cleanup
+            jl_io_loop->stop_flag = 0;
+            uv_run(jl_io_loop, UV_RUN_NOWAIT);
+            uv_timer_init(jl_io_loop, &wait_empty_worker);
+            uv_unref((uv_handle_t*)&wait_empty_worker);
+        }
+        // make sure this is running
         uv_update_time(jl_io_loop);
         uv_timer_start(&wait_empty_worker, wait_empty_func, 10, 15000);
-        uv_unref((uv_handle_t*)&wait_empty_worker);
     }
     JL_UV_UNLOCK();
 }
-
 void jl_wait_empty_end(void)
 {
-    JL_UV_LOCK();
-    uv_close((uv_handle_t*)&wait_empty_worker, NULL);
-    JL_UV_UNLOCK();
+    // n.b. caller must be holding jl_uv_mutex
+    if (wait_empty_worker.type == UV_TIMER)
+        // make sure this timer is stopped, but not destroyed in case the user calls jl_wait_empty_begin again
+        uv_timer_stop(&wait_empty_worker);
 }
 
 
@@ -130,11 +142,17 @@ void JL_UV_LOCK(void)
     }
 }
 
+/**
+ * @brief Begin an IO lock.
+ */
 JL_DLLEXPORT void jl_iolock_begin(void)
 {
     JL_UV_LOCK();
 }
 
+/**
+ * @brief End an IO lock.
+ */
 JL_DLLEXPORT void jl_iolock_end(void)
 {
     JL_UV_UNLOCK();
@@ -145,10 +163,11 @@ static void jl_uv_call_close_callback(jl_value_t *val)
 {
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 2); // val is "rooted" in the finalizer list only right now
-    args[0] = jl_get_global(jl_base_relative_to(((jl_datatype_t*)jl_typeof(val))->name->module),
-            jl_symbol("_uv_hook_close")); // topmod(typeof(val))._uv_hook_close
+    args[0] = jl_eval_global_var(
+            jl_base_relative_to(((jl_datatype_t*)jl_typeof(val))->name->module),
+            jl_symbol("_uv_hook_close"),
+            jl_current_task->world_age); // topmod(typeof(val))._uv_hook_close
     args[1] = val;
-    assert(args[0]);
     jl_apply(args, 2); // TODO: wrap in try-catch?
     JL_GC_POP();
 }
@@ -173,9 +192,12 @@ static void jl_uv_closeHandle(uv_handle_t *handle)
         ct->world_age = last_age;
         return;
     }
-    if (handle == (uv_handle_t*)&signal_async || handle == (uv_handle_t*)&wait_empty_worker)
+    if (handle == (uv_handle_t*)&wait_empty_worker)
+        handle->type = UV_UNKNOWN_HANDLE;
+    else if (handle == (uv_handle_t*)&signal_async)
         return;
-    free(handle);
+    else
+        free(handle);
 }
 
 static void jl_uv_flush_close_callback(uv_write_t *req, int status)
@@ -218,9 +240,16 @@ static void uv_flush_callback(uv_write_t *req, int status)
     free(req);
 }
 
-// Turn a normal write into a blocking write (primarily for use from C and gdb).
-// Warning: This calls uv_run, so it can have unbounded side-effects.
-// Be care where you call it from! - the libuv loop is also not reentrant.
+/**
+ * @brief Flush a UV stream.
+ *
+ * Primarily used from C and gdb to convert a normal write operation on a UV stream
+ * into a blocking write. It calls uv_run, which can have unbounded side-effects.
+ * Caution is advised as the location from where this function is called is critical
+ * due to the non-reentrancy of the libuv loop.
+ *
+ * @param stream A pointer to `uv_stream_t` representing the stream to flush.
+ */
 JL_DLLEXPORT void jl_uv_flush(uv_stream_t *stream)
 {
     if (stream == (void*)STDIN_FILENO ||
@@ -252,27 +281,115 @@ JL_DLLEXPORT void jl_uv_flush(uv_stream_t *stream)
 
 // getters and setters
 // TODO: check if whoever calls these is thread-safe
+/**
+ * @brief Get the process ID of a UV process.
+ *
+ * @param p A pointer to `uv_process_t` representing the UV process.
+ * @return The process ID.
+ */
 JL_DLLEXPORT int jl_uv_process_pid(uv_process_t *p) { return p->pid; }
+
+/**
+ * @brief Get the data associated with a UV process.
+ *
+ * @param p A pointer to `uv_process_t` representing the UV process.
+ * @return A pointer to the process data.
+ */
 JL_DLLEXPORT void *jl_uv_process_data(uv_process_t *p) { return p->data; }
+
+/**
+ * @brief Get the base pointer of a UV buffer.
+ *
+ * @param buf A constant pointer to `uv_buf_t` representing the UV buffer.
+ * @return A pointer to the base of the buffer.
+ */
 JL_DLLEXPORT void *jl_uv_buf_base(const uv_buf_t *buf) { return buf->base; }
+
+/**
+ * @brief Get the length of a UV buffer.
+ *
+ * @param buf A constant pointer to `uv_buf_t` representing the UV buffer.
+ * @return The length of the buffer as `size_t`.
+ */
 JL_DLLEXPORT size_t jl_uv_buf_len(const uv_buf_t *buf) { return buf->len; }
+
+/**
+ * @brief Set the base pointer of a UV buffer.
+ *
+ * @param buf A pointer to `uv_buf_t` representing the UV buffer.
+ * @param b A pointer to `char` representing the new base of the buffer.
+ */
 JL_DLLEXPORT void jl_uv_buf_set_base(uv_buf_t *buf, char *b) { buf->base = b; }
+
+/**
+ * @brief Set the length of a UV buffer.
+ *
+ * @param buf A pointer to `uv_buf_t` representing the UV buffer.
+ * @param n The new length of the buffer as `size_t`.
+ */
 JL_DLLEXPORT void jl_uv_buf_set_len(uv_buf_t *buf, size_t n) { buf->len = n; }
+
+/**
+ * @brief Get the handle associated with a UV connect request.
+ *
+ * @param connect A pointer to `uv_connect_t` representing the connect request.
+ * @return A pointer to the associated handle.
+ */
 JL_DLLEXPORT void *jl_uv_connect_handle(uv_connect_t *connect) { return connect->handle; }
+
+/**
+ * @brief Get the file descriptor from a UV file structure.
+ *
+ * @param f A pointer to `jl_uv_file_t` representing the UV file.
+ * @return The file descriptor as `uv_os_fd_t`.
+ */
 JL_DLLEXPORT uv_os_fd_t jl_uv_file_handle(jl_uv_file_t *f) { return f->file; }
+
+/**
+ * @brief Get the data field from a UV request.
+ *
+ * @param req A pointer to `uv_req_t` representing the request.
+ * @return A pointer to the data associated with the request.
+ */
 JL_DLLEXPORT void *jl_uv_req_data(uv_req_t *req) { return req->data; }
+
+/**
+ * @brief Set the data field of a UV request.
+ *
+ * @param req A pointer to `uv_req_t` representing the request.
+ * @param data A pointer to the data to be associated with the request.
+ */
 JL_DLLEXPORT void jl_uv_req_set_data(uv_req_t *req, void *data) { req->data = data; }
+
+/**
+ * @brief Get the data field from a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` representing the handle.
+ * @return A pointer to the data associated with the handle.
+ */
 JL_DLLEXPORT void *jl_uv_handle_data(uv_handle_t *handle) { return handle->data; }
-JL_DLLEXPORT void *jl_uv_write_handle(uv_write_t *req) { return req->handle; }
 
-extern _Atomic(unsigned) _threadedregion;
+/**
+ * @brief Get the handle associated with a UV write request.
+ *
+ * @param req A pointer to `uv_write_t` representing the write request.
+ * @return A pointer to the handle associated with the write request.
+ */
+JL_DLLEXPORT void *jl_uv_write_handle(uv_write_t *req) { return req->handle; }
 
+/**
+ * @brief Process pending UV events.
+ *
+ * See also `uv_run` in the libuv documentation for status code enumeration.
+ *
+ * @return An integer indicating the status of the event processing.
+ */
 JL_DLLEXPORT int jl_process_events(void)
 {
     jl_task_t *ct = jl_current_task;
     uv_loop_t *loop = jl_io_loop;
     jl_gc_safepoint_(ct->ptls);
-    if (loop && (jl_atomic_load_relaxed(&_threadedregion) || jl_atomic_load_relaxed(&ct->tid) == 0)) {
+    if (loop && (jl_atomic_load_relaxed(&_threadedregion) || jl_atomic_load_relaxed(&ct->tid) == jl_atomic_load_relaxed(&io_loop_tid))) {
         if (jl_atomic_load_relaxed(&jl_uv_n_waiters) == 0 && jl_mutex_trylock(&jl_uv_mutex)) {
             JL_PROBE_RT_START_PROCESS_EVENTS(ct);
             loop->stop_flag = 0;
@@ -293,6 +410,11 @@ static void jl_proc_exit_cleanup_cb(uv_process_t *process, int64_t exit_status,
     uv_close((uv_handle_t*)process, (uv_close_cb)&free);
 }
 
+/**
+ * @brief Close a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` that needs to be closed.
+ */
 JL_DLLEXPORT void jl_close_uv(uv_handle_t *handle)
 {
     JL_UV_LOCK();
@@ -326,6 +448,11 @@ JL_DLLEXPORT void jl_close_uv(uv_handle_t *handle)
     JL_UV_UNLOCK();
 }
 
+/**
+ * @brief Forcefully close a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` to be forcefully closed.
+ */
 JL_DLLEXPORT void jl_forceclose_uv(uv_handle_t *handle)
 {
     if (!uv_is_closing(handle)) { // avoid double-closing the stream
@@ -337,12 +464,23 @@ JL_DLLEXPORT void jl_forceclose_uv(uv_handle_t *handle)
     }
 }
 
+/**
+ * @brief Associate a Julia structure with a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` to be associated with a Julia structure.
+ * @param data Additional parameters representing the Julia structure to be associated.
+ */
 JL_DLLEXPORT void jl_uv_associate_julia_struct(uv_handle_t *handle,
                                                jl_value_t *data)
 {
     handle->data = data;
 }
 
+/**
+ * @brief Disassociate a Julia structure from a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` from which the Julia structure will be disassociated.
+ */
 JL_DLLEXPORT void jl_uv_disassociate_julia_struct(uv_handle_t *handle)
 {
     handle->data = NULL;
@@ -350,20 +488,44 @@ JL_DLLEXPORT void jl_uv_disassociate_julia_struct(uv_handle_t *handle)
 
 #define UV_HANDLE_CLOSED 0x02
 
+/**
+ * @brief Spawn a new process.
+ *
+ * Spawns a new process to execute external programs or scripts within the context of the Julia application.
+ *
+ * @param name A C string representing the name or path of the executable to spawn.
+ * @param argv An array of C strings representing the arguments for the process. The array should be null-terminated.
+ * @param loop A pointer to `uv_loop_t` representing the event loop where the process is registered.
+ * @param proc A pointer to `uv_process_t` where the details of the spawned process are stored.
+ * @param stdio An array of `uv_stdio_container_t` representing the file descriptors for standard input, output, and error.
+ * @param nstdio An integer representing the number of elements in the stdio array.
+ * @param flags A uint32_t representing process creation flags.
+          See also `enum uv_process_flags` in the libuv documentation.
+ * @param env An array of C strings for setting environment variables. The array should be null-terminated.
+ * @param cwd A C string representing the current working directory for the process.
+ * @param cpumask A C string representing the CPU affinity mask for the process.
+          See also the `cpumask` field of the `uv_process_options_t` structure in the libuv documentation.
+ * @param cpumask_size The size of the cpumask.
+ * @param uid The user ID for the process (only used if UV_PROCESS_SETUID flag is set).
+ * @param gid The group ID for the process (only used if UV_PROCESS_SETGID flag is set).
+ * @param cb A function pointer to `uv_exit_cb` which is the callback function to be called upon process exit.
+ *
+ * @return An integer indicating the success or failure of the spawn operation. A return value of 0 indicates success,
+ *         while a non-zero value indicates an error.
+ */
 JL_DLLEXPORT int jl_spawn(char *name, char **argv,
                           uv_loop_t *loop, uv_process_t *proc,
                           uv_stdio_container_t *stdio, int nstdio,
                           uint32_t flags, char **env, char *cwd, char* cpumask,
-                          size_t cpumask_size, uv_exit_cb cb)
+                          size_t cpumask_size, uint32_t uid, uint32_t gid, uv_exit_cb cb)
 {
     uv_process_options_t opts = {0};
     opts.stdio = stdio;
     opts.file = name;
     opts.env = env;
     opts.flags = flags;
-    // unused fields:
-    //opts.uid = 0;
-    //opts.gid = 0;
+    opts.uid = (uv_uid_t)uid;
+    opts.gid = (uv_gid_t)gid;
     opts.cpumask = cpumask;
     opts.cpumask_size = cpumask_size;
     opts.cwd = cwd;
@@ -478,7 +640,7 @@ JL_DLLEXPORT int jl_fs_write(uv_os_fd_t handle, const char *data, size_t len,
 {
     jl_task_t *ct = jl_get_current_task();
     // TODO: fix this cheating
-    if (jl_get_safe_restore() || ct == NULL || jl_atomic_load_relaxed(&ct->tid) != 0)
+    if (jl_get_safe_restore() || ct == NULL || jl_atomic_load_relaxed(&ct->tid) != jl_atomic_load_relaxed(&io_loop_tid))
 #ifdef _OS_WINDOWS_
         return WriteFile(handle, data, len, NULL, NULL);
 #else
@@ -506,25 +668,6 @@ JL_DLLEXPORT int jl_fs_read(uv_os_fd_t handle, char *data, size_t len)
     return ret;
 }
 
-JL_DLLEXPORT int jl_fs_read_byte(uv_os_fd_t handle)
-{
-    uv_fs_t req;
-    unsigned char c;
-    uv_buf_t buf[1];
-    buf[0].base = (char*)&c;
-    buf[0].len = 1;
-    int ret = uv_fs_read(unused_uv_loop_arg, &req, handle, buf, 1, -1, NULL);
-    uv_fs_req_cleanup(&req);
-    switch (ret) {
-    case -1: return ret;
-    case  0: jl_eof_error();
-    case  1: return (int)c;
-    default:
-        assert(0 && "jl_fs_read_byte: Invalid return value from uv_fs_read");
-        return -1;
-    }
-}
-
 JL_DLLEXPORT int jl_fs_close(uv_os_fd_t handle)
 {
     uv_fs_t req;
@@ -578,7 +721,7 @@ JL_DLLEXPORT void jl_uv_puts(uv_stream_t *stream, const char *str, size_t n)
 
     // TODO: Hack to make CoreIO thread-safer
     jl_task_t *ct = jl_get_current_task();
-    if (ct == NULL || jl_atomic_load_relaxed(&ct->tid) != 0) {
+    if (ct == NULL || jl_atomic_load_relaxed(&ct->tid) != jl_atomic_load_relaxed(&io_loop_tid)) {
         if (stream == JL_STDOUT) {
             fd = UV_STDOUT_FD;
         }
@@ -674,23 +817,19 @@ JL_DLLEXPORT int jl_printf(uv_stream_t *s, const char *format, ...)
     return c;
 }
 
-JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...)
+static void jl_safe_vfprintf(ios_t *s, const char *fmt, va_list args) JL_NOTSAFEPOINT
 {
-    static char buf[1000];
+    char buf[1000];
     buf[0] = '\0';
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
-
-    va_list args;
-    va_start(args, fmt);
     // Not async signal safe on some platforms?
     vsnprintf(buf, sizeof(buf), fmt, args);
-    va_end(args);
 
     buf[999] = '\0';
-    if (write(STDERR_FILENO, buf, strlen(buf)) < 0) {
+    if (!ios_write(s, buf, strlen(buf))) {
         // nothing we can do; ignore the failure
     }
 #ifdef _OS_WINDOWS_
@@ -699,6 +838,22 @@ JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...)
     errno = last_errno;
 }
 
+JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...)
+{
+    va_list args;
+    va_start(args, fmt);
+    jl_safe_vfprintf(ios_safe_stderr, fmt, args);
+    va_end(args);
+}
+
+JL_DLLEXPORT void jl_safe_fprintf(ios_t *s, const char *fmt, ...)
+{
+    va_list args;
+    va_start(args, fmt);
+    jl_safe_vfprintf(s, fmt, args);
+    va_end(args);
+}
+
 typedef union {
     struct sockaddr in;
     struct sockaddr_in v4;
@@ -973,31 +1128,39 @@ static inline int ishexchar(char c)
 
 JL_DLLEXPORT int jl_ispty(uv_pipe_t *pipe)
 {
-    if (pipe->type != UV_NAMED_PIPE) return 0;
+    char namebuf[0];
     size_t len = 0;
-    if (uv_pipe_getpeername(pipe, NULL, &len) != UV_ENOBUFS) return 0;
+    if (pipe->type != UV_NAMED_PIPE)
+        return 0;
+    if (uv_pipe_getpeername(pipe, namebuf, &len) != UV_ENOBUFS)
+        return 0;
     char *name = (char*)alloca(len + 1);
-    if (uv_pipe_getpeername(pipe, name, &len)) return 0;
+    if (uv_pipe_getpeername(pipe, name, &len))
+        return 0;
     name[len] = '\0';
     // return true if name matches regex:
     // ^\\\\?\\pipe\\(msys|cygwin)-[0-9a-z]{16}-[pt]ty[1-9][0-9]*-
     //jl_printf(JL_STDERR,"pipe_name: %s\n", name);
     int n = 0;
-    if (!strncmp(name,"\\\\?\\pipe\\msys-",14))
+    if (!strncmp(name, "\\\\?\\pipe\\msys-", 14))
         n = 14;
-    else if (!strncmp(name,"\\\\?\\pipe\\cygwin-",16))
+    else if (!strncmp(name, "\\\\?\\pipe\\cygwin-", 16))
         n = 16;
     else
         return 0;
     //jl_printf(JL_STDERR,"prefix pass\n");
     name += n;
     for (int n = 0; n < 16; n++)
-        if (!ishexchar(*name++)) return 0;
+        if (!ishexchar(*name++))
+            return 0;
     //jl_printf(JL_STDERR,"hex pass\n");
-    if ((*name++)!='-') return 0;
-    if (*name != 'p' && *name != 't') return 0;
+    if ((*name++)!='-')
+        return 0;
+    if (*name != 'p' && *name != 't')
+        return 0;
     name++;
-    if (*name++ != 't' || *name++ != 'y') return 0;
+    if (*name++ != 't' || *name++ != 'y')
+        return 0;
     //jl_printf(JL_STDERR,"tty pass\n");
     return 1;
 }
@@ -1014,10 +1177,12 @@ JL_DLLEXPORT uv_handle_type jl_uv_handle_type(uv_handle_t *handle)
 
 JL_DLLEXPORT int jl_tty_set_mode(uv_tty_t *handle, int mode)
 {
+    if (!handle)
+        return UV__EOF;
     if (handle->type != UV_TTY) return 0;
     uv_tty_mode_t mode_enum = UV_TTY_MODE_NORMAL;
     if (mode)
-        mode_enum = UV_TTY_MODE_RAW;
+        mode_enum = UV_TTY_MODE_RAW_VT;
     // TODO: do we need lock?
     return uv_tty_set_mode(handle, mode_enum);
 }
diff --git a/src/jlapi.c b/src/jlapi.c
index 0dffaac627288..c296cbeba1fad 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -26,78 +26,116 @@ extern "C" {
 #include <fenv.h>
 #endif
 
+static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel, const char* julia_bindir);
+
+/**
+ * @brief Check if Julia is already initialized.
+ *
+ * Determine if Julia has been previously initialized via `jl_init` or
+ * `jl_init_with_image_file` or `jl_init_with_image_handle`.
+ *
+ * @return Returns 1 if Julia is initialized, 0 otherwise.
+ */
 JL_DLLEXPORT int jl_is_initialized(void)
 {
     return jl_main_module != NULL;
 }
 
-JL_DLLEXPORT void jl_set_ARGS(int argc, char **argv)
+/**
+ * @brief Set Julia command line arguments.
+ *
+ * Allows setting the command line arguments for Julia,
+ * similar to arguments passed in the main function of a C program.
+ *
+ * @param argc The number of command line arguments.
+ * @param argv Array of command line arguments.
+ */
+JL_DLLEXPORT jl_value_t *jl_set_ARGS(int argc, char **argv)
 {
-    if (jl_core_module != NULL) {
-        jl_array_t *args = (jl_array_t*)jl_get_global(jl_core_module, jl_symbol("ARGS"));
-        if (args == NULL) {
-            args = jl_alloc_vec_any(0);
-            JL_GC_PUSH1(&args);
+    jl_array_t *args = NULL;
+    jl_value_t *vecstr = NULL;
+    JL_GC_PUSH2(&args, &vecstr);
+    if (jl_core_module != NULL)
+        args = (jl_array_t*)jl_get_global(jl_core_module, jl_symbol("ARGS"));
+    if (args == NULL) {
+        vecstr = jl_apply_array_type((jl_value_t*)jl_string_type, 1);
+        args = jl_alloc_array_1d(vecstr, 0);
+        if (jl_core_module != NULL)
             jl_set_const(jl_core_module, jl_symbol("ARGS"), (jl_value_t*)args);
-            JL_GC_POP();
-        }
-        assert(jl_array_len(args) == 0);
-        jl_array_grow_end(args, argc);
-        int i;
-        for (i = 0; i < argc; i++) {
-            jl_value_t *s = (jl_value_t*)jl_cstr_to_string(argv[i]);
-            jl_arrayset(args, s, i);
-        }
     }
+    assert(jl_array_nrows(args) == 0);
+    jl_array_grow_end(args, argc);
+    int i;
+    for (i = 0; i < argc; i++) {
+        jl_value_t *s = (jl_value_t*)jl_cstr_to_string(argv[i]);
+        jl_array_ptr_set(args, i, s);
+    }
+    JL_GC_POP();
+    return (jl_value_t*)args;
 }
 
-// First argument is the usr/bin directory where the julia binary is, or NULL to guess.
-// Second argument is the path of a system image file (*.so).
-// A non-absolute path is interpreted as relative to the first argument path, or
-// relative to the default julia home dir.
-// The default is something like ../lib/julia/sys.so
-JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
-                                     const char *image_path)
+JL_DLLEXPORT void jl_init_with_image_handle(void *handle) {
+    if (jl_is_initialized())
+        return;
+
+    const char *image_path = jl_pathname_for_handle(handle);
+    jl_options.image_file = image_path;
+
+    jl_resolve_sysimg_location(JL_IMAGE_JULIA_HOME, NULL);
+    jl_image_buf_t sysimage = jl_set_sysimg_so(handle);
+
+    jl_init_(sysimage);
+
+    jl_exception_clear();
+}
+/**
+ * @brief Initialize Julia with a specified system image file.
+ *
+ * Initializes Julia by specifying the usr/bin directory where the Julia binary is
+ * and the path of a system image file (*.so). If the julia_bindir is NULL, the function
+ * attempts to guess the directory. The image_path is interpreted as a path to the system image
+ * file. A non-absolute path for the system image is considered relative to julia_bindir, or
+ * relative to the default Julia home directory. The default system image is typically
+ * something like ../lib/julia/sys.so.
+ *
+ * @param julia_bindir The usr/bin directory where the Julia binary is located, or NULL to guess.
+ * @param image_path The path of a system image file (*.so). Interpreted as relative to julia_bindir
+ *                   or the default Julia home directory if not an absolute path.
+ */
+JL_DLLEXPORT void jl_init_with_image_file(const char *julia_bindir,
+                                          const char *image_path)
 {
     if (jl_is_initialized())
         return;
-    libsupport_init();
-    jl_options.julia_bindir = julia_bindir;
     if (image_path != NULL)
         jl_options.image_file = image_path;
     else
         jl_options.image_file = jl_get_default_sysimg_path();
-    julia_init(JL_IMAGE_JULIA_HOME);
-    jl_exception_clear();
-}
 
-JL_DLLEXPORT void jl_init(void)
-{
-    char *libbindir = NULL;
-#ifdef _OS_WINDOWS_
-    libbindir = strdup(jl_get_libdir());
-#else
-    (void)asprintf(&libbindir, "%s" PATHSEPSTRING ".." PATHSEPSTRING "%s", jl_get_libdir(), "bin");
-#endif
-    if (!libbindir) {
-        printf("jl_init unable to find libjulia!\n");
-        abort();
-    }
-    jl_init_with_image(libbindir, jl_get_default_sysimg_path());
-    free(libbindir);
+    jl_resolve_sysimg_location(JL_IMAGE_JULIA_HOME, julia_bindir);
+    jl_image_buf_t sysimage = jl_preload_sysimg(jl_options.image_file);
+
+    jl_init_(sysimage);
+
+    jl_exception_clear();
 }
 
-// HACK: remove this for Julia 1.8 (see <https://github.com/JuliaLang/julia/issues/40730>)
-JL_DLLEXPORT void jl_init__threading(void)
+// Deprecated function, kept for backward compatibility
+JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
+                                    const char *image_path)
 {
-    jl_init();
+    jl_init_with_image_file(julia_bindir, image_path);
 }
 
-// HACK: remove this for Julia 1.8 (see <https://github.com/JuliaLang/julia/issues/40730>)
-JL_DLLEXPORT void jl_init_with_image__threading(const char *julia_bindir,
-                                     const char *image_relative_path)
+/**
+ * @brief Initialize the Julia runtime.
+ *
+ * Initializes the Julia runtime without any specific system image.
+ * It must be called before any other Julia API functions.
+ */
+JL_DLLEXPORT void jl_init(void)
 {
-    jl_init_with_image(julia_bindir, image_relative_path);
+    jl_init_with_image_file(NULL, jl_get_default_sysimg_path());
 }
 
 static void _jl_exception_clear(jl_task_t *ct) JL_NOTSAFEPOINT
@@ -105,6 +143,13 @@ static void _jl_exception_clear(jl_task_t *ct) JL_NOTSAFEPOINT
     ct->ptls->previous_exception = NULL;
 }
 
+/**
+ * @brief Evaluate a Julia expression from a string.
+ *
+ * @param str A C string containing the Julia expression to be evaluated.
+ * @return A pointer to `jl_value_t` representing the result of the evaluation.
+ *         Returns `NULL` if an error occurs during parsing or evaluation.
+ */
 JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
 {
     jl_value_t *r;
@@ -119,29 +164,50 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         r = NULL;
     }
     return r;
 }
 
-JL_DLLEXPORT jl_value_t *jl_current_exception(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
+/**
+ * @brief Get the current exception in the Julia context.
+ *
+ * @return A pointer to `jl_value_t` representing the current exception.
+ *         Returns `NULL` if no exception is currently thrown.
+ */
+JL_DLLEXPORT jl_value_t *jl_current_exception(jl_task_t *ct) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
 {
-    jl_excstack_t *s = jl_current_task->excstack;
+    jl_excstack_t *s = ct->excstack;
     return s && s->top != 0 ? jl_excstack_exception(s, s->top) : jl_nothing;
 }
 
+/**
+ * @brief Check if an exception has occurred in the Julia context.
+ *
+ * @return A pointer to `jl_value_t` representing the exception that occurred.
+ *         Returns `NULL` if no exception has occurred.
+ */
 JL_DLLEXPORT jl_value_t *jl_exception_occurred(void)
 {
     return jl_current_task->ptls->previous_exception;
 }
 
+/**
+ * @brief Clear the current exception in the Julia context.
+ *
+ */
 JL_DLLEXPORT void jl_exception_clear(void)
 {
     _jl_exception_clear(jl_current_task);
 }
 
-// get the name of a type as a string
+/**
+ * @brief Get the type name of a Julia value.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A C string containing the name of the type.
+ */
 JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v)
 {
     if (!jl_is_datatype(v))
@@ -149,33 +215,79 @@ JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v)
     return jl_symbol_name(((jl_datatype_t*)v)->name->name);
 }
 
-// get the name of typeof(v) as a string
+/**
+ * @brief Get the string representation of a Julia value's type.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A C string describing the type of the value.
+ */
 JL_DLLEXPORT const char *jl_typeof_str(jl_value_t *v)
 {
     return jl_typename_str((jl_value_t*)jl_typeof(v));
 }
 
+/**
+ * @brief Get the element type of a Julia array.
+ *
+ * @param a A pointer to `jl_value_t` representing the Julia array.
+ * @return A pointer to the type of the array elements.
+ */
 JL_DLLEXPORT void *jl_array_eltype(jl_value_t *a)
 {
     return jl_tparam0(jl_typeof(a));
 }
 
+/**
+ * @brief Get the number of dimensions of a Julia array.
+ *
+ * Returns the rank (number of dimensions) of a Julia array.
+ *
+ * @param a A pointer to `jl_value_t` representing the Julia array.
+ * @return An integer representing the number of dimensions of the array.
+ */
 JL_DLLEXPORT int jl_array_rank(jl_value_t *a)
 {
     return jl_array_ndims(a);
 }
 
-JL_DLLEXPORT size_t jl_array_size(jl_value_t *a, int d)
+/**
+ * @brief Get the size of a specific dimension of a Julia array.
+ *
+ * Returns the size (number of elements) of a specific dimension
+ * of a Julia array.
+ *
+ * @param a A pointer to `jl_array_t` representing the Julia array.
+ * @param d The dimension for which the size is requested.
+ * @return The size of the specified dimension of the array.
+ */
+JL_DLLEXPORT size_t jl_array_size(jl_array_t *a, int d)
 {
+    // n.b this functions only use was to violate the vector abstraction, so we have to continue to emulate that
+    if (d >= jl_array_ndims(a))
+        return a->ref.mem->length;
     return jl_array_dim(a, d);
 }
 
+/**
+ * @brief Get the C string pointer from a Julia string.
+ *
+ * @param s A pointer to `jl_value_t` representing the Julia string.
+ * @return A C string pointer containing the contents of the Julia string.
+ */
 JL_DLLEXPORT const char *jl_string_ptr(jl_value_t *s)
 {
     return jl_string_data(s);
 }
 
-JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, uint32_t nargs)
+/**
+ * @brief Call a Julia function with a specified number of arguments.
+ *
+ * @param f A pointer to `jl_value_t` representing the Julia function to call.
+ * @param args An array of pointers to `jl_value_t` representing the arguments.
+ * @param nargs The number of arguments in the array.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
+JL_DLLEXPORT jl_value_t *jl_call(jl_value_t *f, jl_value_t **args, uint32_t nargs)
 {
     jl_value_t *v;
     jl_task_t *ct = jl_current_task;
@@ -194,13 +306,21 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, uint32_t n
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
-JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
+/**
+ * @brief Call a Julia function with no arguments.
+ *
+ * A specialized case of `jl_call` for simpler scenarios.
+ *
+ * @param f A pointer to `jl_value_t` representing the Julia function to call.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
+JL_DLLEXPORT jl_value_t *jl_call0(jl_value_t *f)
 {
     jl_value_t *v;
     jl_task_t *ct = jl_current_task;
@@ -214,13 +334,22 @@ JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
-JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
+/**
+ * @brief Call a Julia function with one argument.
+ *
+ * A specialized case of `jl_call` for simpler scenarios.
+ *
+ * @param f A pointer to `jl_value_t` representing the Julia function to call.
+ * @param a A pointer to `jl_value_t` representing the argument to the function.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
+JL_DLLEXPORT jl_value_t *jl_call1(jl_value_t *f, jl_value_t *a)
 {
     jl_value_t *v;
     jl_task_t *ct = jl_current_task;
@@ -237,13 +366,23 @@ JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
-JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b)
+/**
+ * @brief Call a Julia function with two arguments.
+ *
+ * A specialized case of `jl_call` for simpler scenarios.
+ *
+ * @param f A pointer to `jl_value_t` representing the Julia function to call.
+ * @param a A pointer to `jl_value_t` representing the first argument.
+ * @param b A pointer to `jl_value_t` representing the second argument.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
+JL_DLLEXPORT jl_value_t *jl_call2(jl_value_t *f, jl_value_t *a, jl_value_t *b)
 {
     jl_value_t *v;
     jl_task_t *ct = jl_current_task;
@@ -261,13 +400,24 @@ JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
-JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
+/**
+ * @brief Call a Julia function with three arguments.
+ *
+ * A specialized case of `jl_call` for simpler scenarios.
+ *
+ * @param f A pointer to `jl_value_t` representing the Julia function to call.
+ * @param a A pointer to `jl_value_t` representing the first argument.
+ * @param b A pointer to `jl_value_t` representing the second argument.
+ * @param c A pointer to `jl_value_t` representing the third argument.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
+JL_DLLEXPORT jl_value_t *jl_call3(jl_value_t *f, jl_value_t *a,
                                   jl_value_t *b, jl_value_t *c)
 {
     jl_value_t *v;
@@ -287,24 +437,63 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
-JL_DLLEXPORT void jl_yield(void)
+/**
+ * @brief Call a Julia function with three arguments.
+ *
+ * A specialized case of `jl_call` for simpler scenarios.
+ *
+ * @param f A pointer to `jl_value_t` representing the Julia function to call.
+ * @param a A pointer to `jl_value_t` representing the first argument.
+ * @param b A pointer to `jl_value_t` representing the second argument.
+ * @param c A pointer to `jl_value_t` representing the third argument.
+ * @param d A pointer to `jl_value_t` representing the fourth argument.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
+JL_DLLEXPORT jl_value_t *jl_call4(jl_value_t *f, jl_value_t *a,
+                                  jl_value_t *b, jl_value_t *c,
+                                  jl_value_t *d)
 {
-    static jl_function_t *yieldfunc = NULL;
-    if (yieldfunc == NULL)
-        yieldfunc = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("yield"));
-    if (yieldfunc != NULL)
-        jl_call0(yieldfunc);
+    jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
+    JL_TRY {
+        jl_value_t **argv;
+        JL_GC_PUSHARGS(argv, 5);
+        argv[0] = f;
+        argv[1] = a;
+        argv[2] = b;
+        argv[3] = c;
+        argv[4] = d;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
+        v = jl_apply(argv, 5);
+        ct->world_age = last_age;
+        JL_GC_POP();
+        _jl_exception_clear(ct);
+    }
+    JL_CATCH {
+        ct->ptls->previous_exception = jl_current_exception(ct);
+        v = NULL;
+    }
+    return v;
 }
 
+/**
+ * @brief Get a field from a Julia object.
+ *
+ * @param o A pointer to `jl_value_t` representing the Julia object.
+ * @param fld A C string representing the name of the field to retrieve.
+ * @return A pointer to `jl_value_t` representing the value of the field.
+ */
 JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_value_t *s = (jl_value_t*)jl_symbol(fld);
         int i = jl_field_index((jl_datatype_t*)jl_typeof(o), (jl_sym_t*)s, 1);
@@ -312,17 +501,29 @@ JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld)
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_current_task->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
+/**
+ * @brief Begin an atomic signal-protected region.
+ *
+ * Marks the start of a region of code that should be protected
+ * from interruption by asynchronous signals.
+ */
 JL_DLLEXPORT void jl_sigatomic_begin(void)
 {
     JL_SIGATOMIC_BEGIN();
 }
 
+/**
+ * @brief End an atomic signal-protected region.
+ *
+ * Marks the end of a region of code protected from asynchronous signals.
+ * It should be used in conjunction with `jl_sigatomic_begin` to define signal-protected regions.
+ */
 JL_DLLEXPORT void jl_sigatomic_end(void)
 {
     jl_task_t *ct = jl_current_task;
@@ -331,6 +532,11 @@ JL_DLLEXPORT void jl_sigatomic_end(void)
     JL_SIGATOMIC_END();
 }
 
+/**
+ * @brief Check if Julia is running in debug build mode.
+ *
+ * @return Returns 1 if Julia is in debug build mode, 0 otherwise.
+ */
 JL_DLLEXPORT int jl_is_debugbuild(void) JL_NOTSAFEPOINT
 {
 #ifdef JL_DEBUG_BUILD
@@ -340,6 +546,24 @@ JL_DLLEXPORT int jl_is_debugbuild(void) JL_NOTSAFEPOINT
 #endif
 }
 
+/**
+ * @brief Check if Julia has been build with assertions enabled.
+ *
+ * @return Returns 1 if assertions are enabled, 0 otherwise.
+ */
+JL_DLLEXPORT int8_t jl_is_assertsbuild(void) JL_NOTSAFEPOINT {
+#ifndef JL_NDEBUG
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+/**
+ * @brief Check if Julia's memory debugging is enabled.
+ *
+ * @return Returns 1 if memory debugging is enabled, 0 otherwise.
+ */
 JL_DLLEXPORT int8_t jl_is_memdebug(void) JL_NOTSAFEPOINT {
 #ifdef MEMDEBUG
     return 1;
@@ -348,92 +572,143 @@ JL_DLLEXPORT int8_t jl_is_memdebug(void) JL_NOTSAFEPOINT {
 #endif
 }
 
+/**
+ * @brief Get the directory path of the Julia binary.
+ *
+ * @return A pointer to `jl_value_t` representing the directory path as a Julia string.
+ */
 JL_DLLEXPORT jl_value_t *jl_get_julia_bindir(void)
 {
     return jl_cstr_to_string(jl_options.julia_bindir);
 }
 
+/**
+ * @brief Get the path to the Julia binary.
+ *
+ * @return A pointer to `jl_value_t` representing the full path as a Julia string.
+ */
 JL_DLLEXPORT jl_value_t *jl_get_julia_bin(void)
 {
     return jl_cstr_to_string(jl_options.julia_bin);
 }
 
+/**
+ * @brief Get the path to the Julia system image file.
+ *
+ * @return A pointer to `jl_value_t` representing the system image file path as a Julia string.
+ */
 JL_DLLEXPORT jl_value_t *jl_get_image_file(void)
 {
     return jl_cstr_to_string(jl_options.image_file);
 }
 
+/**
+ * @brief Get the major version number of Julia.
+ *
+ * @return The major version number as an integer.
+ */
 JL_DLLEXPORT int jl_ver_major(void)
 {
     return JULIA_VERSION_MAJOR;
 }
 
+/**
+ * @brief Get the minor version number of Julia.
+ *
+ * @return The minor version number as an integer.
+ */
 JL_DLLEXPORT int jl_ver_minor(void)
 {
     return JULIA_VERSION_MINOR;
 }
 
+/**
+ * @brief Get the patch version number of Julia.
+ *
+ * @return The patch version number as an integer.
+ */
 JL_DLLEXPORT int jl_ver_patch(void)
 {
     return JULIA_VERSION_PATCH;
 }
 
+/**
+ * @brief Check if the current Julia version is a release version.
+ *
+ * @return Returns 1 if it is a release version, 0 otherwise.
+ */
 JL_DLLEXPORT int jl_ver_is_release(void)
 {
     return JULIA_VERSION_IS_RELEASE;
 }
 
+/**
+ * @brief Get the Julia version as a string.
+ *
+ * @return A C string containing the version information.
+ */
 JL_DLLEXPORT const char *jl_ver_string(void)
 {
    return JULIA_VERSION_STRING;
 }
 
-// return char* from String field in Base.GIT_VERSION_INFO
-static const char *git_info_string(const char *fld)
-{
-    static jl_value_t *GIT_VERSION_INFO = NULL;
-    if (!GIT_VERSION_INFO)
-        GIT_VERSION_INFO = jl_get_global(jl_base_module, jl_symbol("GIT_VERSION_INFO"));
-    jl_value_t *f = jl_get_field(GIT_VERSION_INFO, fld);
-    assert(jl_is_string(f));
-    return jl_string_data(f);
-}
-
-JL_DLLEXPORT const char *jl_git_branch(void)
-{
-    static const char *branch = NULL;
-    if (!branch) branch = git_info_string("branch");
-    return branch;
-}
-
-JL_DLLEXPORT const char *jl_git_commit(void)
-{
-    static const char *commit = NULL;
-    if (!commit) commit = git_info_string("commit");
-    return commit;
-}
-
-// Create function versions of some useful macros for GDB or FFI use
+/**
+ * @brief Convert a Julia value to a tagged value.
+ *
+ * Converts a Julia value into its corresponding tagged value representation.
+ * Tagged values include additional metadata used internally by the Julia runtime.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A pointer to `jl_taggedvalue_t` representing the tagged value.
+ */
 JL_DLLEXPORT jl_taggedvalue_t *(jl_astaggedvalue)(jl_value_t *v)
 {
     return jl_astaggedvalue(v);
 }
 
+/**
+ * @brief Convert a tagged value back to a Julia value.
+ *
+ * Converts a tagged value back into its original Julia value.
+ * It's the inverse operation of `jl_astaggedvalue`.
+ *
+ * @param v A pointer to `jl_taggedvalue_t` representing the tagged value.
+ * @return A pointer to `jl_value_t` representing the original Julia value.
+ */
 JL_DLLEXPORT jl_value_t *(jl_valueof)(jl_taggedvalue_t *v)
 {
     return jl_valueof(v);
 }
 
+/**
+ * @brief Get the type of a Julia value.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A pointer to `jl_value_t` representing the type of the value.
+ */
 JL_DLLEXPORT jl_value_t *(jl_typeof)(jl_value_t *v)
 {
     return jl_typeof(v);
 }
 
+/**
+ * @brief Get the field types of a Julia value.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A pointer to `jl_value_t` representing the field types.
+ */
 JL_DLLEXPORT jl_value_t *(jl_get_fieldtypes)(jl_value_t *v)
 {
     return (jl_value_t*)jl_get_fieldtypes((jl_datatype_t*)v);
 }
 
+/**
+ * @brief Check equality of two Julia values.
+ *
+ * @param a A pointer to `jl_value_t` representing the first Julia value.
+ * @param b A pointer to `jl_value_t` representing the second Julia value.
+ * @return Returns 1 if the values are equal, 0 otherwise.
+ */
 JL_DLLEXPORT int ijl_egal(jl_value_t *a, jl_value_t *b)
 {
     return jl_egal(a, b);
@@ -441,24 +716,56 @@ JL_DLLEXPORT int ijl_egal(jl_value_t *a, jl_value_t *b)
 
 
 #ifndef __clang_gcanalyzer__
+/**
+ * @brief Enter a state where concurrent garbage collection (GC) is considered unsafe.
+ *
+ * Marks the beginning of a code region where garbage collection operations are unsafe.
+ * Used to make it legal to access GC-managed state (almost anything)
+ *
+ * @return An `int8_t` state value representing the previous GC state.
+ */
 JL_DLLEXPORT int8_t (jl_gc_unsafe_enter)(void)
 {
     jl_task_t *ct = jl_current_task;
     return jl_gc_unsafe_enter(ct->ptls);
 }
 
+/**
+ * @brief Leave the state where garbage collection is considered unsafe.
+ *
+ * Ends a code region where garbage collection was marked as unsafe.
+ * It restores the previous GC state using the state value returned by `jl_gc_unsafe_enter`.
+ *
+ * @param state The state value returned by `jl_gc_unsafe_enter` to restore the previous GC state.
+ */
 JL_DLLEXPORT void (jl_gc_unsafe_leave)(int8_t state)
 {
     jl_task_t *ct = jl_current_task;
     jl_gc_unsafe_leave(ct->ptls, state);
 }
 
+/**
+ * @brief Enter a state where garbage collection (GC) is considered safe.
+ *
+ * Marks the beginning of a code region where garbage collection operations are safe.
+ * Used to enable GC in sections of code where it was previously marked as unsafe.
+ *
+ * @return An `int8_t` state value representing the previous GC state.
+ */
 JL_DLLEXPORT int8_t (jl_gc_safe_enter)(void)
 {
     jl_task_t *ct = jl_current_task;
     return jl_gc_safe_enter(ct->ptls);
 }
 
+/**
+ * @brief Leave the state where garbage collection is considered safe.
+ *
+ * Ends a code region where garbage collection was marked as safe.
+ * It restores the previous GC state using the state value returned by `jl_gc_safe_enter`.
+ *
+ * @param state The state value returned by `jl_gc_safe_enter` to restore the previous GC state.
+ */
 JL_DLLEXPORT void (jl_gc_safe_leave)(int8_t state)
 {
     jl_task_t *ct = jl_current_task;
@@ -466,49 +773,118 @@ JL_DLLEXPORT void (jl_gc_safe_leave)(int8_t state)
 }
 #endif
 
+/**
+ * @brief Trigger a garbage collection safepoint in a GC-unsafe region.
+ *
+ * Triggers a safepoint for garbage collection. Used to
+ * ensure that the garbage collector can run at specific points in the code,
+ * particularly in long-running operations or loops.
+ */
 JL_DLLEXPORT void jl_gc_safepoint(void)
 {
     jl_task_t *ct = jl_current_task;
     jl_gc_safepoint_(ct->ptls);
 }
 
+/**
+ * @brief Pause CPU execution for a brief moment.
+ *
+ * Used to pause the CPU briefly, typically to reduce power consumption
+ * or manage CPU resources more effectively in a tight loop or busy wait scenario.
+ */
 JL_DLLEXPORT void (jl_cpu_pause)(void)
 {
     jl_cpu_pause();
 }
 
+/**
+ * @brief Suspend CPU execution.
+ *
+ * Suspends CPU execution until a specific condition or event occurs.
+ */
 JL_DLLEXPORT void (jl_cpu_suspend)(void)
 {
     jl_cpu_suspend();
 }
 
+/**
+ * @brief Wake the CPU from a suspended state.
+ *
+ * Used to resume CPU execution after it has been suspended using `jl_cpu_suspend`.
+ */
 JL_DLLEXPORT void (jl_cpu_wake)(void)
 {
     jl_cpu_wake();
 }
 
+/**
+ * @brief Enable cumulative compile timing.
+ */
 JL_DLLEXPORT void jl_cumulative_compile_timing_enable(void)
 {
     // Increment the flag to allow reentrant callers to `@time`.
     jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
 }
 
+/**
+ * @brief Disable cumulative compile timing.
+ */
 JL_DLLEXPORT void jl_cumulative_compile_timing_disable(void)
 {
     // Decrement the flag when done measuring, allowing other callers to continue measuring.
     jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
 }
 
+/**
+ * @brief Get the cumulative compilation time in nanoseconds.
+ *
+ * @return The cumulative compilation time in nanoseconds.
+ */
 JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns(void)
 {
     return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
 }
 
+/**
+ * @brief Get the cumulative recompilation time in nanoseconds.
+ *
+ * @return The cumulative recompilation time in nanoseconds.
+ */
 JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
 {
     return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
 }
 
+/**
+ * @brief Enable per-task timing.
+ */
+JL_DLLEXPORT void jl_task_metrics_enable(void)
+{
+    // Increment the flag to allow reentrant callers.
+    jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
+}
+
+/**
+ * @brief Disable per-task timing.
+ */
+JL_DLLEXPORT void jl_task_metrics_disable(void)
+{
+    // Prevent decrementing the counter below zero
+    uint8_t enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled);
+    while (enabled > 0) {
+        if (jl_atomic_cmpswap(&jl_task_metrics_enabled, &enabled, enabled-1))
+            break;
+    }
+}
+
+/**
+ * @brief Retrieve floating-point environment constants.
+ *
+ * Populates an array with constants related to the floating-point environment,
+ * such as rounding modes and exception flags.
+ *
+ * @param ret An array of integers to be populated with floating-point environment constants.
+ */
 JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
 {
     ret[0] = FE_INEXACT;
@@ -530,6 +906,14 @@ JL_DLLEXPORT int jl_get_fenv_rounding(void)
 {
     return fegetround();
 }
+
+/**
+ * @brief Set the floating-point rounding mode.
+ *
+ * @param i An integer representing the desired floating-point rounding mode.
+          See also "floating-point rounding" macros in `<fenv.h>`.
+ * @return An integer indicating the success or failure of setting the rounding mode.
+ */
 JL_DLLEXPORT int jl_set_fenv_rounding(int i)
 {
     return fesetround(i);
@@ -537,15 +921,16 @@ JL_DLLEXPORT int jl_set_fenv_rounding(int i)
 
 static int exec_program(char *program)
 {
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_load(jl_main_module, program);
     }
     JL_CATCH {
-        // TODO: It is possible for this output to be mangled due to `jl_print_backtrace`
+        // TODO: It is possible for this output to be mangled due to `jl_fprint_backtrace`
         //       printing directly to STDERR_FILENO.
         int shown_err = 0;
         jl_printf(JL_STDERR, "error during bootstrap:\n");
-        jl_value_t *exc = jl_current_exception();
+        jl_value_t *exc = jl_current_exception(ct);
         jl_value_t *showf = jl_base_module ? jl_get_function(jl_base_module, "show") : NULL;
         if (showf) {
             jl_value_t *errs = jl_stderr_obj();
@@ -560,7 +945,7 @@ static int exec_program(char *program)
             jl_static_show((JL_STREAM*)STDERR_FILENO, exc);
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         }
-        jl_print_backtrace(); // written to STDERR_FILENO
+        jl_fprint_backtrace(ios_safe_stderr);
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         return 1;
     }
@@ -571,22 +956,31 @@ static NOINLINE int true_main(int argc, char *argv[])
 {
     jl_set_ARGS(argc, argv);
 
-    jl_function_t *start_client = jl_base_module ?
-        (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("_start")) : NULL;
+
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_get_world_counter();
+
+    jl_value_t *start_client = jl_base_module ?
+        (jl_value_t*)jl_get_global_value(jl_base_module, jl_symbol("_start"), ct->world_age) : NULL;
 
     if (start_client) {
-        jl_task_t *ct = jl_current_task;
+        int ret = 1;
         JL_TRY {
-            size_t last_age = ct->world_age;
-            ct->world_age = jl_get_world_counter();
-            jl_apply(&start_client, 1);
-            ct->world_age = last_age;
+            JL_GC_PUSH1(&start_client);
+            jl_value_t *r = jl_apply(&start_client, 1);
+            JL_GC_POP();
+            if (jl_typeof(r) != (jl_value_t*)jl_int32_type)
+                jl_type_error("typeassert", (jl_value_t*)jl_int32_type, r);
+            ret = jl_unbox_int32(r);
         }
         JL_CATCH {
-            jl_no_exc_handler(jl_current_exception(), ct);
+            jl_no_exc_handler(jl_current_exception(ct), ct);
         }
-        return 0;
+        ct->world_age = last_age;
+        return ret;
     }
+    ct->world_age = last_age;
 
     // run program if specified, otherwise enter REPL
     if (argc > 0) {
@@ -602,8 +996,8 @@ static NOINLINE int true_main(int argc, char *argv[])
     while (!ios_eof(ios_stdin)) {
         char *volatile line = NULL;
         JL_TRY {
-            ios_puts("\njulia> ", ios_stdout);
-            ios_flush(ios_stdout);
+            jl_printf(JL_STDOUT, "\njulia> ");
+            jl_uv_flush(JL_STDOUT);
             line = ios_readline(ios_stdin);
             jl_value_t *val = (jl_value_t*)jl_eval_string(line);
             JL_GC_PUSH1(&val);
@@ -619,7 +1013,6 @@ static NOINLINE int true_main(int argc, char *argv[])
             jl_printf(JL_STDOUT, "\n");
             free(line);
             line = NULL;
-            jl_process_events();
         }
         JL_CATCH {
             if (line) {
@@ -627,9 +1020,9 @@ static NOINLINE int true_main(int argc, char *argv[])
                 line = NULL;
             }
             jl_printf((JL_STREAM*)STDERR_FILENO, "\nparser error:\n");
-            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-            jl_print_backtrace(); // written to STDERR_FILENO
+            jl_fprint_backtrace(ios_safe_stderr);
         }
     }
     return 0;
@@ -687,6 +1080,13 @@ static void rr_detach_teleport(void) {
 }
 #endif
 
+/**
+ * @brief Entry point for the Julia REPL (Read-Eval-Print Loop).
+ *
+ * @param argc The number of command-line arguments.
+ * @param argv Array of command-line arguments.
+ * @return An integer indicating the exit status of the REPL session.
+ */
 JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
 {
 #ifdef USE_TRACY
@@ -722,7 +1122,14 @@ JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
         jl_error("Failed to self-execute");
     }
 
-    julia_init(jl_options.image_file_specified ? JL_IMAGE_CWD : JL_IMAGE_JULIA_HOME);
+    JL_IMAGE_SEARCH rel = jl_options.image_file_specified ? JL_IMAGE_CWD : JL_IMAGE_JULIA_HOME;
+    jl_resolve_sysimg_location(rel, NULL);
+    jl_image_buf_t sysimage = { JL_IMAGE_KIND_NONE };
+    if (jl_options.image_file)
+        sysimage = jl_preload_sysimg(jl_options.image_file);
+
+    jl_init_(sysimage);
+
     if (lisp_prompt) {
         jl_current_task->world_age = jl_get_world_counter();
         jl_lisp_prompt();
@@ -733,6 +1140,158 @@ JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
     return ret;
 }
 
+// create an absolute-path copy of the input path format string
+// formed as `joinpath(replace(pwd(), "%" => "%%"), in)`
+// unless `in` starts with `%`
+static const char *absformat(const char *in)
+{
+    if (in[0] == '%' || jl_isabspath(in))
+        return in;
+    // get an escaped copy of cwd
+    size_t path_size = JL_PATH_MAX;
+    char path[JL_PATH_MAX];
+    if (uv_cwd(path, &path_size)) {
+        jl_error("fatal error: unexpected error while retrieving current working directory");
+    }
+    size_t sz = strlen(in) + 1;
+    size_t i, fmt_size = 0;
+    for (i = 0; i < path_size; i++)
+        fmt_size += (path[i] == '%' ? 2 : 1);
+    char *out = (char*)malloc_s(fmt_size + 1 + sz);
+    fmt_size = 0;
+    for (i = 0; i < path_size; i++) { // copy-replace pwd portion
+        char c = path[i];
+        out[fmt_size++] = c;
+        if (c == '%')
+            out[fmt_size++] = '%';
+    }
+    out[fmt_size++] = PATHSEPSTRING[0]; // path sep
+    memcpy(out + fmt_size, in, sz); // copy over format, including nul
+    return out;
+}
+
+static char *absrealpath(const char *in, int nprefix)
+{ // compute an absolute realpath location, so that chdir doesn't change the file reference
+  // ignores (copies directly over) nprefix characters at the start of abspath
+    char *out;
+    uv_fs_t req;
+    int realpath_ret = uv_fs_realpath(NULL, &req, in + nprefix, NULL);
+    if (realpath_ret >= 0) {
+        size_t sz = strlen((char*)(req.ptr)) + 1;
+        out = (char*)malloc_s(sz + nprefix);
+        memcpy(out, in, nprefix);
+        memcpy(out + nprefix, req.ptr, sz);
+        uv_fs_req_cleanup(&req);
+    }
+    else {
+        uv_fs_req_cleanup(&req);
+        size_t sz = strlen(in + nprefix) + 1;
+        if (jl_isabspath(in + nprefix)) {
+            out = (char*)malloc_s(sz + nprefix);
+            memcpy(out, in, sz + nprefix);
+        }
+        else {
+            size_t path_size = JL_PATH_MAX;
+            char *path = (char*)malloc_s(JL_PATH_MAX);
+            if (uv_cwd(path, &path_size)) {
+                jl_error("fatal error: unexpected error while retrieving current working directory");
+            }
+            out = (char*)malloc_s(path_size + 1 + sz + nprefix);
+            memcpy(out, in, nprefix);
+            memcpy(out + nprefix, path, path_size);
+            out[nprefix + path_size] = PATHSEPSTRING[0];
+            memcpy(out + nprefix + path_size + 1, in + nprefix, sz);
+            free(path);
+        }
+    }
+    return out;
+}
+
+static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel, const char* julia_bindir)
+{
+    libsupport_init();
+    jl_init_timing();
+
+    // this function resolves the paths in jl_options to absolute file locations as needed
+    // and it replaces the pointers to `julia_bindir`, `julia_bin`, `image_file`, and output file paths
+    // it may fail, print an error, and exit(1) if any of these paths are longer than JL_PATH_MAX
+    //
+    // note: if you care about lost memory, you should call the appropriate `free()` function
+    // on the original pointer for each `char*` you've inserted into `jl_options`, after
+    // calling `jl_init_()`
+    char *free_path = (char*)malloc_s(JL_PATH_MAX);
+    size_t path_size = JL_PATH_MAX;
+    if (uv_exepath(free_path, &path_size)) {
+        jl_error("fatal error: unexpected error while retrieving exepath");
+    }
+    if (path_size >= JL_PATH_MAX) {
+        jl_error("fatal error: jl_options.julia_bin path too long");
+    }
+    jl_options.julia_bin = (char*)malloc_s(path_size + 1);
+    memcpy((char*)jl_options.julia_bin, free_path, path_size);
+    ((char*)jl_options.julia_bin)[path_size] = '\0';
+    if (julia_bindir == NULL) {
+        jl_options.julia_bindir = getenv("JULIA_BINDIR");
+        if (!jl_options.julia_bindir) {
+#ifdef _OS_WINDOWS_
+            jl_options.julia_bindir = strdup(jl_get_libdir());
+#else
+            int written = asprintf((char**)&jl_options.julia_bindir, "%s" PATHSEPSTRING ".." PATHSEPSTRING "%s", jl_get_libdir(), "bin");
+            if (written < 0)
+                abort(); // unexpected: memory allocation failed
+#endif
+        }
+    } else {
+        jl_options.julia_bindir = julia_bindir;
+    }
+    if (jl_options.julia_bindir)
+        jl_options.julia_bindir = absrealpath(jl_options.julia_bindir, 0);
+    free(free_path);
+    free_path = NULL;
+    if (jl_options.image_file) {
+        if (rel == JL_IMAGE_JULIA_HOME && !jl_isabspath(jl_options.image_file)) {
+            // build time path, relative to JULIA_BINDIR
+            free_path = (char*)malloc_s(JL_PATH_MAX);
+            int n = snprintf(free_path, JL_PATH_MAX, "%s" PATHSEPSTRING "%s",
+                             jl_options.julia_bindir, jl_options.image_file);
+            if (n >= JL_PATH_MAX || n < 0) {
+                jl_error("fatal error: jl_options.image_file path too long");
+            }
+            jl_options.image_file = free_path;
+        }
+        if (jl_options.image_file)
+            jl_options.image_file = absrealpath(jl_options.image_file, 0);
+        if (free_path) {
+            free(free_path);
+            free_path = NULL;
+        }
+    }
+    if (jl_options.outputo)
+        jl_options.outputo = absrealpath(jl_options.outputo, 0);
+    if (jl_options.outputji)
+        jl_options.outputji = absrealpath(jl_options.outputji, 0);
+    if (jl_options.outputbc)
+        jl_options.outputbc = absrealpath(jl_options.outputbc, 0);
+    if (jl_options.outputasm)
+        jl_options.outputasm = absrealpath(jl_options.outputasm, 0);
+    if (jl_options.machine_file)
+        jl_options.machine_file = absrealpath(jl_options.machine_file, 0);
+    if (jl_options.output_code_coverage)
+        jl_options.output_code_coverage = absformat(jl_options.output_code_coverage);
+    if (jl_options.tracked_path)
+        jl_options.tracked_path = absrealpath(jl_options.tracked_path, 0);
+
+    const char **cmdp = jl_options.cmds;
+    if (cmdp) {
+        for (; *cmdp; cmdp++) {
+            const char *cmd = *cmdp;
+            if (cmd[0] == 'L') {
+                *cmdp = absrealpath(cmd, 1);
+            }
+        }
+    }
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm
index d376bc27085ab..01ed5b7017a41 100644
--- a/src/jlfrontend.scm
+++ b/src/jlfrontend.scm
@@ -31,8 +31,6 @@
 
 ;; this is overwritten when we run in actual julia
 (define (defined-julia-global v) #f)
-(define (julia-current-file) 'none)
-(define (julia-current-line) 0)
 
 ;; parser entry points
 
@@ -111,7 +109,7 @@
 ;; return a lambda expression representing a thunk for a top-level expression
 ;; note: expansion of stuff inside module is delayed, so the contents obey
 ;; toplevel expansion order (don't expand until stuff before is evaluated).
-(define (expand-toplevel-expr-- e file line)
+(define (lower-toplevel-expr-- e file line)
   (let ((lno (first-lineno e))
         (ex0 (julia-expand-macroscope e)))
     (if (and lno (or (not (length= lno 3)) (not (atom? (caddr lno))))) (set! lno #f))
@@ -120,8 +118,8 @@
             ex0
             (if lno `(toplevel ,lno ,ex0) ex0))
         (let* ((linenode (if (and lno (or (= line 0) (eq? file 'none))) lno `(line ,line ,file)))
-               (ex (julia-expand0 ex0 linenode))
-               (th (julia-expand1
+               (ex (julia-lower0 ex0 linenode))
+               (th (julia-lower1
                     `(lambda () ()
                              (scope-block
                               ,(blockify ex lno)))
@@ -140,24 +138,23 @@
 
 (define (toplevel-only-expr? e)
   (and (pair? e)
-       (or (memq (car e) '(toplevel line module import using export
-                                    error incomplete))
-           (and (memq (car e) '(global const)) (every symbol? (cdr e))))))
+       (or (memq (car e) '(toplevel line module export public
+                                    error incomplete)))))
 
-(define *in-expand* #f)
+(define *in-lowering* #f)
 
-(define (expand-toplevel-expr e file line)
+(define (lower-toplevel-expr e file line)
   (cond ((or (atom? e) (toplevel-only-expr? e))
          (if (underscore-symbol? e)
-             (error "all-underscore identifier used as rvalue"))
+             (error "all-underscore identifiers are write-only and their values cannot be used in expressions"))
          e)
         (else
-         (let ((last *in-expand*))
+         (let ((last *in-lowering*))
            (if (not last)
                (begin (reset-gensyms)
-                      (set! *in-expand* #t)))
-           (begin0 (expand-toplevel-expr-- e file line)
-                   (set! *in-expand* last))))))
+                      (set! *in-lowering* #t)))
+           (begin0 (lower-toplevel-expr-- e file line)
+                   (set! *in-lowering* last))))))
 
 ;; used to collect warnings during lowering, which are usually discarded
 ;; unless logging is requested
@@ -165,63 +162,40 @@
 
 ;; expand a piece of raw surface syntax to an executable thunk
 
-(define (expand-to-thunk- expr file line)
+(define (lower-to-thunk- expr file line)
   (error-wrap (lambda ()
-                (expand-toplevel-expr expr file line))))
+                (lower-toplevel-expr expr file line))))
 
-(define (expand-to-thunk-stmt- expr file line)
-  (expand-to-thunk- (if (toplevel-only-expr? expr)
-                        expr
-                        `(block ,expr (null)))
-                    file line))
-
-(define (jl-expand-to-thunk-warn expr file line stmt)
+;; Returns a list `(,lowered-code ,warnings) where
+;; - warnings (currently only ambiguous soft scope assignments) may be ignored,
+;;   e.g. when running interactively
+;; - more items may be added to the list later
+(define (jl-lower-to-thunk expr file line)
   (let ((warnings '()))
     (with-bindings
      ;; Abuse scm_to_julia here to convert arguments to warn. This is meant for
      ;; `Expr`s but should be good enough provided we're only passing simple
      ;; numbers, symbols and strings.
-     ((lowering-warning (lambda lst (set! warnings (cons (cons 'warn lst) warnings)))))
-     (let ((thunk (if stmt
-                      (expand-to-thunk-stmt- expr file line)
-                      (expand-to-thunk- expr file line))))
-       (if (pair? warnings) `(warn ,@(reverse warnings) ,thunk) thunk)))))
-
-(define (jl-expand-to-thunk expr file line)
-  (expand-to-thunk- expr file line))
-
-(define (jl-expand-to-thunk-stmt expr file line)
-  (expand-to-thunk-stmt- expr file line))
+     ((lowering-warning (lambda (level group warn_file warn_line . lst)
+        (let ((line (if (= warn_line 0) line warn_line))
+              (file (if (eq? warn_file 'none) file warn_file)))
+          (set! warnings (cons (list* 'warn level group (symbol (string file line)) file line lst) warnings))))))
+     `(,(lower-to-thunk- expr file line)
+       ,(reverse warnings)))))
 
 (define (jl-expand-macroscope expr)
   (error-wrap (lambda ()
                 (julia-expand-macroscope expr))))
 
-;; construct default definitions of `eval` for non-bare modules
-;; called by jl_eval_module_expr
-(define (module-default-defs name file line)
-  (jl-expand-to-thunk
-   (let* ((loc  (if (and (eq? file 'none) (eq? line 0)) '() `((line ,line ,file))))
-          (x    (if (eq? name 'x) 'y 'x))
-          (mex  (if (eq? name 'mapexpr) 'map_expr 'mapexpr)))
-     `(block
-       (= (call eval ,x)
-          (block
-           ,@loc
-           (call (core eval) ,name ,x)))
-       (= (call include ,x)
-          (block
-           ,@loc
-           (call (core _call_latest) (top include) ,name ,x)))
-       (= (call include (:: ,mex (top Function)) ,x)
-          (block
-           ,@loc
-           (call (core _call_latest) (top include) ,mex ,name ,x)))))
-   file line))
+(define (jl-default-inner-ctor-body field-kinds file line)
+  (lower-to-thunk- (default-inner-ctor-body (cdr field-kinds) file line) file line))
+
+(define (jl-default-outer-ctor-body args file line)
+  (lower-to-thunk- (default-outer-ctor-body (cadr args) (caddr args) (cadddr args) file line) file line))
 
 ; run whole frontend on a string. useful for testing.
 (define (fe str)
-  (expand-toplevel-expr (julia-parse str) 'none 0))
+  (lower-toplevel-expr (julia-parse str) 'none 0))
 
 (define (profile-e s)
   (with-exception-catcher
diff --git a/src/jloptions.c b/src/jloptions.c
index 129ba9df2510e..50ef415bc0bac 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -8,6 +8,7 @@
 
 #include <unistd.h>
 #include <getopt.h>
+
 #include "julia_assert.h"
 
 #ifdef _OS_WINDOWS_
@@ -18,12 +19,71 @@ char *shlib_ext = ".dylib";
 char *shlib_ext = ".so";
 #endif
 
+/* This simple hand-crafted tolower exists to avoid locale-dependent effects in
+ * behaviors (and utf8proc_tolower wasn't linking properly on all platforms) */
+static char ascii_tolower(char c)
+{
+    if ('A' <= c && c <= 'Z')
+        return c - 'A' + 'a';
+    return c;
+}
+
 static const char system_image_path[256] = "\0" JL_SYSTEM_IMAGE_PATH;
 JL_DLLEXPORT const char *jl_get_default_sysimg_path(void)
 {
     return &system_image_path[1];
 }
 
+/* This function is also used by gc-stock.c to parse the
+ * JULIA_HEAP_SIZE_HINT environment variable. */
+uint64_t parse_heap_size_option(const char *optarg, const char *option_name, int allow_pct)
+{
+    long double value = 0.0;
+    char unit[4] = {0};
+    int nparsed = sscanf(optarg, "%Lf%3s", &value, unit);
+    if (nparsed == 0 || strlen(unit) > 2 || (strlen(unit) == 2 && ascii_tolower(unit[1]) != 'b')) {
+        jl_errorf("julia: invalid argument to %s (%s)", option_name, optarg);
+    }
+    uint64_t multiplier = 1ull;
+    switch (ascii_tolower(unit[0])) {
+        case '\0':
+        case 'b':
+            break;
+        case 'k':
+            multiplier <<= 10;
+            break;
+        case 'm':
+            multiplier <<= 20;
+            break;
+        case 'g':
+            multiplier <<= 30;
+            break;
+        case 't':
+            multiplier <<= 40;
+            break;
+        case '%':
+            if (allow_pct) {
+                if (value > 100)
+                    jl_errorf("julia: invalid percentage specified in %s", option_name);
+                uint64_t mem = uv_get_total_memory();
+                uint64_t cmem = uv_get_constrained_memory();
+                if (cmem > 0 && cmem < mem)
+                    mem = cmem;
+                multiplier = mem/100;
+                break;
+            }
+        default:
+            jl_errorf("julia: invalid argument to %s (%s)", option_name, optarg);
+            break;
+    }
+    long double sz = value * multiplier;
+    if (isnan(sz) || sz < 0) {
+        jl_errorf("julia: invalid argument to %s (%s)", option_name, optarg);
+    }
+    const long double limit = ldexpl(1.0, 64); // UINT64_MAX + 1
+    return sz < limit ? (uint64_t)sz : UINT64_MAX;
+}
+
 static int jl_options_initialized = 0;
 
 JL_DLLEXPORT void jl_init_options(void)
@@ -46,6 +106,7 @@ JL_DLLEXPORT void jl_init_options(void)
                         0,    // nprocs
                         NULL, // machine_file
                         NULL, // project
+                        NULL, // program_file
                         0,    // isinteractive
                         0,    // color
                         JL_OPTIONS_HISTORYFILE_ON, // history file
@@ -67,10 +128,12 @@ JL_DLLEXPORT void jl_init_options(void)
                         1,    // can_inline
                         JL_OPTIONS_POLLY_ON, // polly
                         NULL, // trace_compile
+                        NULL, // trace_dispatch
                         JL_OPTIONS_FAST_MATH_DEFAULT,
                         0,    // worker
                         NULL, // cookie
                         JL_OPTIONS_HANDLE_SIGNALS_ON,
+                        JL_OPTIONS_USE_EXPERIMENTAL_FEATURES_NO,
                         JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES,
                         JL_OPTIONS_USE_COMPILED_MODULES_YES,
                         JL_OPTIONS_USE_PKGIMAGES_YES,
@@ -90,129 +153,222 @@ JL_DLLEXPORT void jl_init_options(void)
                         0, // strip-ir
                         0, // permalloc_pkgimg
                         0, // heap-size-hint
+                        0, // hard-heap-limit
+                        0, // heap-target-increment
+                        0, // trace_compile_timing
+                        JL_TRIM_NO, // trim
+                        0, // trace-eval
+                        0, // task_metrics
+                        -1, // timeout_for_safepoint_straggler_s
+                        0, // gc_sweep_always_full
+                        0, // compress_sysimage
+                        0, // alert_on_critical_error
+                        0, // target_sanitize_memory
+                        0, // target_sanitize_thread
+                        0, // target_sanitize_address
     };
     jl_options_initialized = 1;
 }
 
 static const char usage[] = "\n    julia [switches] -- [programfile] [args...]\n\n";
 static const char opts[]  =
-    "Switches (a '*' marks the default value, if applicable; settings marked '($)' may trigger package precompilation):\n\n"
-    " -v, --version              Display version information\n"
-    " -h, --help                 Print this message (--help-hidden for more)\n"
-    " --help-hidden              Uncommon options not shown by `-h`\n\n"
+    "Switches (a '*' marks the default value, if applicable; settings marked '($)' may trigger package\n"
+    "precompilation):\n\n"
+    " Option                                        Description\n"
+    " ---------------------------------------------------------------------------------------------------\n"
+    " -v, --version                                 Display version information\n"
+    " -h, --help                                    Print command-line options (this message)\n"
+    " --help-hidden                                 Print uncommon options not shown by `-h`\n\n"
 
     // startup options
-    " --project[={<dir>|@.}]     Set <dir> as the home project/environment\n"
-    " -J, --sysimage <file>      Start up with the given system image file\n"
-    " -H, --home <dir>           Set location of `julia` executable\n"
-    " --startup-file={yes*|no}   Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH`\n"
-    "                            environment variable is unset, load `~/.julia/config/startup.jl`\n"
-    " --handle-signals={yes*|no} Enable or disable Julia's default signal handlers\n"
-    " --sysimage-native-code={yes*|no}\n"
-    "                            Use native code from system image if available\n"
-    " --compiled-modules={yes*|no}\n"
-    "                            Enable or disable incremental precompilation of modules\n"
-    " --pkgimages={yes*|no}\n"
-    "                            Enable or disable usage of native code caching in the form of pkgimages ($)\n\n"
+    " --project[={<dir>|@temp|@.|@script[<rel>]}]   Set <dir> as the active project/environment.\n"
+    "                                               Or, create a temporary environment with `@temp`\n"
+    "                                               The default @. option will search through parent\n"
+    "                                               directories until a Project.toml or JuliaProject.toml\n"
+    "                                               file is found. @script is similar, but searches up\n"
+    "                                               from the programfile or a path relative to\n"
+    "                                               programfile.\n"
+    " -J, --sysimage <file>                         Start up with the given system image file\n"
+    " -H, --home <dir>                              Set location of `julia` executable\n"
+    " --startup-file={yes*|no}                      Load `JULIA_DEPOT_PATH/config/startup.jl`; \n"
+    "                                               if `JULIA_DEPOT_PATH` environment variable is unset,\n"
+    "                                               load `~/.julia/config/startup.jl`\n"
+    " --handle-signals={yes*|no}                    Enable or disable Julia's default signal handlers\n"
+    " --sysimage-native-code={yes*|no}              Use native code from system image if available\n"
+    " --compiled-modules={yes*|no|existing|strict}  Enable or disable incremental precompilation of\n"
+    "                                               modules. The `existing` option allows use of existing\n"
+    "                                               compiled modules that were previously precompiled,\n"
+    "                                               but disallows creation of new precompile files.\n"
+    "                                               The `strict` option is similar, but will error if no\n"
+    "                                               precompile file is found.\n"
+    " --pkgimages={yes*|no|existing}                Enable or disable usage of native code caching in the\n"
+    "                                               form of pkgimages. The `existing` option allows use\n"
+    "                                               of existing pkgimages but disallows creation of new\n"
+    "                                               ones ($)\n\n"
 
     // actions
-    " -e, --eval <expr>          Evaluate <expr>\n"
-    " -E, --print <expr>         Evaluate <expr> and display the result\n"
-    " -L, --load <file>          Load <file> immediately on all processors\n\n"
+    " -e, --eval <expr>                             Evaluate <expr>\n"
+    " -E, --print <expr>                            Evaluate <expr> and display the result\n"
+    " -m, --module <Package> [args]                 Run entry point of `Package` (`@main` function) with\n"
+    "                                               `args'.\n"
+    " -L, --load <file>                             Load <file> immediately on all processors\n\n"
 
     // parallel options
-    " -t, --threads {auto|N[,auto|M]}\n"
-    "                           Enable N[+M] threads; N threads are assigned to the `default`\n"
-    "                           threadpool, and if M is specified, M threads are assigned to the\n"
-    "                           `interactive` threadpool; \"auto\" tries to infer a useful\n"
-    "                           default number of threads to use but the exact behavior might change\n"
-    "                           in the future. Currently sets N to the number of CPUs assigned to\n"
-    "                           this Julia process based on the OS-specific affinity assignment\n"
-    "                           interface if supported (Linux and Windows) or to the number of CPU\n"
-    "                           threads if not supported (MacOS) or if process affinity is not\n"
-    "                           configured, and sets M to 1.\n"
-    " --gcthreads=M[,N]         Use M threads for the mark phase of GC and N (0 or 1) threads for the concurrent sweeping phase of GC.\n"
-    "                           M is set to half of the number of compute threads and N is set to 0 if unspecified.\n"
-    " -p, --procs {N|auto}      Integer value N launches N additional local worker processes\n"
-    "                           \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n"
-    " --machine-file <file>     Run processes on hosts listed in <file>\n\n"
+    " -t, --threads {auto|N[,auto|M]}               Enable N[+M] threads; N threads are assigned to the\n"
+    "                                               `default` threadpool, and if M is specified, M\n"
+    "                                               threads are assigned to the `interactive`\n"
+    "                                               threadpool; `auto` tries to infer a useful\n"
+    "                                               default number of threads to use but the exact\n"
+    "                                               behavior might change in the future. Currently sets\n"
+    "                                               N to the number of CPUs assigned to this Julia\n"
+    "                                               process based on the OS-specific affinity assignment\n"
+    "                                               interface if supported (Linux and Windows) or to the\n"
+    "                                               number of CPU threads if not supported (MacOS) or if\n"
+    "                                               process affinity is not configured, and sets M to 1.\n"
+    " --gcthreads=N[,M]                             Use N threads for the mark phase of GC and M (0 or 1)\n"
+    "                                               threads for the concurrent sweeping phase of GC.\n"
+    "                                               N is set to the number of compute threads and\n"
+    "                                               M is set to 0 if unspecified.\n"
+    " -p, --procs {N|auto}                          Integer value N launches N additional local worker\n"
+    "                                               processes `auto` launches as many workers as the\n"
+    "                                               number of local CPU threads (logical cores).\n"
+    " --machine-file <file>                         Run processes on hosts listed in <file>\n\n"
 
     // interactive options
-    " -i, --interactive          Interactive mode; REPL runs and `isinteractive()` is true\n"
-    " -q, --quiet                Quiet startup: no banner, suppress REPL warnings\n"
-    " --banner={yes|no|auto*}    Enable or disable startup banner\n"
-    " --color={yes|no|auto*}     Enable or disable color text\n"
-    " --history-file={yes*|no}   Load or save history\n\n"
+    " -i, --interactive                             Interactive mode; REPL runs and\n"
+    "                                               `isinteractive()` is true.\n"
+    " -q, --quiet                                   Quiet startup: no banner, suppress REPL warnings\n"
+    " --banner={yes|no|short|auto*}                 Enable or disable startup banner\n"
+    " --color={yes|no|auto*}                        Enable or disable color text\n"
+    " --history-file={yes*|no}                      Load or save history\n\n"
 
     // error and warning options
-    " --depwarn={yes|no*|error}  Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)\n"
-    " --warn-overwrite={yes|no*} Enable or disable method overwrite warnings\n"
-    " --warn-scope={yes*|no}     Enable or disable warning for ambiguous top-level scope\n\n"
+    " --depwarn={yes|no*|error}                     Enable or disable syntax and method deprecation\n"
+    "                                               warnings (`error` turns warnings into errors)\n"
+    " --warn-overwrite={yes|no*}                    Enable or disable method overwrite warnings\n"
+    " --warn-scope={yes*|no}                        Enable or disable warning for ambiguous top-level\n"
+    "                                               scope\n\n"
 
     // code generation options
-    " -C, --cpu-target <target>  Limit usage of CPU features up to <target>; set to `help` to see the available options\n"
-    " -O, --optimize={0,1,2*,3}  Set the optimization level (level 3 if `-O` is used without a level) ($)\n"
-    " --min-optlevel={0*,1,2,3}  Set a lower bound on the optimization level\n"
+    " -C, --cpu-target <target>                     Limit usage of CPU features up to <target>; set to\n"
+    "                                               `help` to see the available options\n"
+    " -O, --optimize={0|1|2*|3}                     Set the optimization level (level 3 if `-O` is used\n"
+    "                                               without a level) ($)\n"
+    " --min-optlevel={0*|1|2|3}                     Set a lower bound on the optimization level\n"
 #ifdef JL_DEBUG_BUILD
-        " -g, --debug-info=[{0,1,2*}] Set the level of debug info generation in the julia-debug build ($)\n"
+    " -g, --debug-info=[{0|1|2*}]                   Set the level of debug info generation in the\n"
+    "                                               julia-debug build ($)\n"
 #else
-        " -g, --debug-info=[{0,1*,2}] Set the level of debug info generation (level 2 if `-g` is used without a level) ($)\n"
+    " -g, --debug-info=[{0|1*|2}]                   Set the level of debug info generation (level 2 if\n"
+    "                                               `-g` is used without a level) ($)\n"
 #endif
-    " --inline={yes*|no}         Control whether inlining is permitted, including overriding @inline declarations\n"
-    " --check-bounds={yes|no|auto*}\n"
-    "                            Emit bounds checks always, never, or respect @inbounds declarations ($)\n"
+    " --inline={yes*|no}                            Control whether inlining is permitted, including\n"
+    "                                               overriding @inline declarations\n"
+    " --check-bounds={yes|no|auto*}                 Emit bounds checks always, never, or respect\n"
+    "                                               @inbounds declarations ($)\n"
+    " --math-mode={ieee|user*}                      Always follow `ieee` floating point semantics or\n"
+    "                                               respect `@fastmath` declarations\n\n"
 #ifdef USE_POLLY
-    " --polly={yes*|no}          Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)\n"
+    " --polly={yes*|no}                             Enable or disable the polyhedral optimizer Polly\n"
+    "                                               (overrides @polly declaration)\n"
 #endif
 
     // instrumentation options
-    " --code-coverage[={none*|user|all}]\n"
-    "                            Count executions of source lines (omitting setting is equivalent to `user`)\n"
-    " --code-coverage=@<path>\n"
-    "                            Count executions but only in files that fall under the given file path/directory.\n"
-    "                            The `@` prefix is required to select this option. A `@` with no path will track the\n"
-    "                            current directory.\n"
+    " --code-coverage[={none*|user|all}]            Count executions of source lines (omitting setting is\n"
+    "                                               equivalent to `user`)\n"
+    " --code-coverage=@<path>                       Count executions but only in files that fall under\n"
+    "                                               the given file path/directory. The `@` prefix is\n"
+    "                                               required to select this option. A `@` with no path\n"
+    "                                               will track the current directory.\n"
 
-    " --code-coverage=tracefile.info\n"
-    "                            Append coverage information to the LCOV tracefile (filename supports format tokens)\n"
+    " --code-coverage=tracefile.info                Append coverage information to the LCOV tracefile\n"
+    "                                               (filename supports format tokens)\n"
 // TODO: These TOKENS are defined in `runtime_ccall.cpp`. A more verbose `--help` should include that list here.
-    " --track-allocation[={none*|user|all}]\n"
-    "                            Count bytes allocated by each source line (omitting setting is equivalent to `user`)\n"
-    " --track-allocation=@<path>\n"
-    "                            Count bytes but only in files that fall under the given file path/directory.\n"
-    "                            The `@` prefix is required to select this option. A `@` with no path will track the\n"
-    "                            current directory.\n"
-    " --bug-report=KIND          Launch a bug report session. It can be used to start a REPL, run a script, or evaluate\n"
-    "                            expressions. It first tries to use BugReporting.jl installed in current environment and\n"
-    "                            fallbacks to the latest compatible BugReporting.jl if not. For more information, see\n"
-    "                            --bug-report=help.\n\n"
-
-    " --heap-size-hint=<size>    Forces garbage collection if memory usage is higher than that value.\n"
-    "                            The memory hint might be specified in megabytes(500M) or gigabytes(1G)\n\n"
+    " --track-allocation[={none*|user|all}]         Count bytes allocated by each source line (omitting\n"
+    "                                               setting is equivalent to `user`)\n"
+    " --track-allocation=@<path>                    Count bytes but only in files that fall under the\n"
+    "                                               given file path/directory. The `@` prefix is required\n"
+    "                                               to select this option. A `@` with no path will track\n"
+    "                                               the current directory.\n"
+    " --bug-report=KIND                             Launch a bug report session. It can be used to start\n"
+    "                                               a REPL, run a script, or evaluate expressions. It\n"
+    "                                               first tries to use BugReporting.jl installed in\n"
+    "                                               current environment and fallbacks to the latest\n"
+    "                                               compatible BugReporting.jl if not. For more\n"
+    "                                               information, see --bug-report=help.\n\n"
+    " --heap-size-hint=<size>[<unit>]               Forces garbage collection if memory usage is higher\n"
+    "                                               than the given value. The value may be specified as a\n"
+    "                                               number of bytes, optionally in units of: B,\n"
+    "                                               K (kibibytes), M (mebibytes), G (gibibytes),\n"
+    "                                               T (tebibytes), or % (percentage of physical memory).\n\n"
 ;
 
-static const char opts_hidden[]  =
+static const char opts_hidden[] =
     "Switches (a '*' marks the default value, if applicable):\n\n"
+    " Option                                        Description\n"
+    " ---------------------------------------------------------------------------------------------------\n"
     // code generation options
-    " --compile={yes*|no|all|min}\n"
-    "                          Enable or disable JIT compiler, or request exhaustive or minimal compilation\n\n"
+    " --compile={yes*|no|all|min}                   Enable or disable JIT compiler, or request exhaustive\n"
+    "                                               or minimal compilation\n\n"
 
     // compiler output options
-    " --output-o <name>        Generate an object file (including system image data)\n"
-    " --output-ji <name>       Generate a system image data file (.ji)\n"
-    " --strip-metadata         Remove docstrings and source location info from system image\n"
-    " --strip-ir               Remove IR (intermediate representation) of compiled functions\n\n"
+    " --output-o <name>                             Generate an object file (including system image data)\n"
+    " --output-ji <name>                            Generate a system image data file (.ji)\n"
+    " --strip-metadata                              Remove docstrings and source location info from\n"
+    "                                               system image\n"
+    " --strip-ir                                    Remove IR (intermediate representation) of compiled\n"
+    "                                               functions\n"
+    " --compress-sysimage={yes|no*}                 Compress the sys/pkgimage heap at the expense of\n"
+    "                                               slightly increased load time.\n"
+    "\n"
 
-    // compiler debugging (see the devdocs for tips on using these options)
-    " --output-unopt-bc <name> Generate unoptimized LLVM bitcode (.bc)\n"
-    " --output-bc <name>       Generate LLVM bitcode (.bc)\n"
-    " --output-asm <name>      Generate an assembly file (.s)\n"
-    " --output-incremental={yes|no*}\n"
-    "                          Generate an incremental output file (rather than complete)\n"
-    " --trace-compile={stderr,name}\n"
-    "                          Print precompile statements for methods compiled during execution or save to a path\n"
-    " --image-codegen          Force generate code in imaging mode\n"
-    " --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
+    // compiler debugging and experimental (see the devdocs for tips on using these options)
+    " --experimental                                Enable the use of experimental (alpha) features\n"
+    " --output-unopt-bc <name>                      Generate unoptimized LLVM bitcode (.bc)\n"
+    " --output-bc <name>                            Generate LLVM bitcode (.bc)\n"
+    " --output-asm <name>                           Generate an assembly file (.s)\n"
+    " --output-incremental={yes|no*}                Generate an incremental output file (rather than\n"
+    "                                               complete)\n"
+    " --timeout-for-safepoint-straggler <seconds>   If this value is set, then we will dump the backtrace\n"
+    "                                               for a thread that fails to reach a safepoint within\n"
+    "                                               the specified time\n"
+    " --trace-compile={stderr|name}                 Print precompile statements for methods compiled\n"
+    "                                               during execution or save to stderr or a path. Methods\n"
+    "                                               that were recompiled are printed in yellow or with\n"
+    "                                               a trailing comment if color is not supported\n"
+    " --trace-compile-timing                        If --trace-compile is enabled show how long each took\n"
+    "                                               to compile in ms\n"
+    " --task-metrics={yes|no*}                      Enable collection of per-task timing data.\n"
+    " --image-codegen                               Force generate code in imaging mode\n"
+    " --permalloc-pkgimg={yes|no*}                  Copy the data section of package images into memory\n\n"
+
+    " --trim={no*|safe|unsafe|unsafe-warn}          Build a sysimage including only code provably\n"
+    "                                               reachable from methods marked by calling\n"
+    "                                               `entrypoint`. In unsafe mode, the resulting binary\n"
+    "                                               might be missing needed code and can throw errors.\n"
+    "                                               With unsafe-warn warnings will be printed for\n"
+    "                                               dynamic call sites that might lead to such errors.\n"
+    "                                               In safe mode compile-time errors are given instead.\n"
+    " --trace-eval={loc|full|no*}                   Show the expression being evaluated before eval.\n"
+    " --hard-heap-limit=<size>[<unit>]              Set a hard limit on the heap size: if we ever\n"
+    "                                               go above this limit, we will abort. The value\n"
+    "                                               may be specified as a number of bytes,\n"
+    "                                               optionally in units of: B, K (kibibytes),\n"
+    "                                               M (mebibytes), G (gibibytes) or T (tebibytes).\n"
+    " --heap-target-increment=<size>[<unit>]        Set an upper bound on how much the heap\n"
+    "                                               target can increase between consecutive\n"
+    "                                               collections. The value may be specified as\n"
+    "                                               a number of bytes, optionally in units of:\n"
+    "                                               B, K (kibibytes), M (mebibytes), G (gibibytes)\n"
+    "                                               or T (tebibytes).\n"
+    " --gc-sweep-always-full                        Makes the GC always do a full sweep of the heap\n"
+    " --target-sanitize=memory                      Instrument generated code for MemorySanitizer.\n"
+    " --target-sanitize=thread                      Instrument generated code for ThreadSanitizer.\n"
+    " --target-sanitize=address                     Instrument generated code for AddressSanitizer.\n"
+    "                                               The above options control the instrumentation of\n"
+    "                                               code generated by --output-* only. JITed code is\n"
+    "                                               instrumented if Julia itself is built with\n"
+    "                                               sanitizers.\n"
 ;
 
 JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
@@ -232,7 +388,11 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_warn_scope,
            opt_inline,
            opt_polly,
+           opt_timeout_for_safepoint_straggler,
            opt_trace_compile,
+           opt_trace_compile_timing,
+           opt_trace_dispatch,
+           opt_task_metrics,
            opt_math_mode,
            opt_worker,
            opt_bind_to,
@@ -257,10 +417,18 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_strip_metadata,
            opt_strip_ir,
            opt_heap_size_hint,
+           opt_hard_heap_limit,
+           opt_heap_target_increment,
+           opt_gc_sweep_always_full,
            opt_gc_threads,
-           opt_permalloc_pkgimg
+           opt_permalloc_pkgimg,
+           opt_trim,
+           opt_trace_eval,
+           opt_experimental_features,
+           opt_compress_sysimage,
+           opt_target_sanitize,
     };
-    static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:";
+    static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:m:";
     static const struct option longopts[] = {
         // exposed command line options
         // NOTE: This set of required arguments need to be kept in sync
@@ -273,6 +441,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "banner",          required_argument, 0, opt_banner },
         { "home",            required_argument, 0, 'H' },
         { "eval",            required_argument, 0, 'e' },
+        { "module",          required_argument, 0, 'm' },
         { "print",           required_argument, 0, 'E' },
         { "load",            required_argument, 0, 'L' },
         { "bug-report",      required_argument, 0, opt_bug_report },
@@ -307,10 +476,15 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "warn-scope",      required_argument, 0, opt_warn_scope },
         { "inline",          required_argument, 0, opt_inline },
         { "polly",           required_argument, 0, opt_polly },
+        { "timeout-for-safepoint-straggler", required_argument, 0, opt_timeout_for_safepoint_straggler },
         { "trace-compile",   required_argument, 0, opt_trace_compile },
+        { "trace-compile-timing",  no_argument, 0, opt_trace_compile_timing },
+        { "trace-dispatch",  required_argument, 0, opt_trace_dispatch },
+        { "task-metrics",    required_argument, 0, opt_task_metrics },
         { "math-mode",       required_argument, 0, opt_math_mode },
         { "handle-signals",  required_argument, 0, opt_handle_signals },
         // hidden command line options
+        { "experimental",    no_argument,       0, opt_experimental_features },
         { "worker",          optional_argument, 0, opt_worker },
         { "bind-to",         required_argument, 0, opt_bind_to },
         { "lisp",            no_argument,       0, 1 },
@@ -320,6 +494,13 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "strip-ir",        no_argument,       0, opt_strip_ir },
         { "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg },
         { "heap-size-hint",  required_argument, 0, opt_heap_size_hint },
+        { "hard-heap-limit", required_argument, 0, opt_hard_heap_limit },
+        { "heap-target-increment", required_argument, 0, opt_heap_target_increment },
+        { "gc-sweep-always-full", no_argument, 0, opt_gc_sweep_always_full },
+        { "trim",  optional_argument, 0, opt_trim },
+        { "compress-sysimage", required_argument, 0, opt_compress_sysimage },
+        { "trace-eval",       optional_argument, 0, opt_trace_eval },
+        { "target-sanitize", required_argument, 0, opt_target_sanitize },
         { 0, 0, 0, 0 }
     };
 
@@ -332,7 +513,6 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
     const char **cmds = NULL;
     int codecov = JL_LOG_NONE;
     int malloclog = JL_LOG_NONE;
-    int pkgimage_explicit = 0;
     int argc = *argcp;
     char **argv = *argvp;
     char *endptr;
@@ -411,6 +591,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         case 'e': // eval
         case 'E': // print
         case 'L': // load
+        case 'm': // module
         case opt_bug_report: // bug
         {
             size_t sz = strlen(optarg) + 1;
@@ -424,6 +605,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             ncmds++;
             cmds[ncmds] = 0;
             jl_options.cmds = cmds;
+            if (c == 'm') {
+                optind -= 1;
+                goto parsing_args_done;
+            }
             break;
         }
         case 'J': // sysimage
@@ -444,33 +629,50 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_options.banner = 0;
             else if (!strcmp(optarg, "auto"))
                 jl_options.banner = -1;
+            else if (!strcmp(optarg, "short"))
+                jl_options.banner = 2;
             else
-                jl_errorf("julia: invalid argument to --banner={yes|no|auto} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --banner={yes|no|auto|short} (%s)", optarg);
+            break;
+        case opt_experimental_features:
+            jl_options.use_experimental_features = JL_OPTIONS_USE_EXPERIMENTAL_FEATURES_YES;
             break;
         case opt_sysimage_native_code:
-            if (!strcmp(optarg,"yes"))
+            if (!strcmp(optarg,"yes")) {
                 jl_options.use_sysimage_native_code = JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES;
-            else if (!strcmp(optarg,"no"))
+            }
+            else if (!strcmp(optarg,"no")) {
                 jl_options.use_sysimage_native_code = JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_NO;
-            else
+                if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
+                    jl_errorf("julia: --sysimage-native-code=no is deprecated");
+                else if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ON)
+                    jl_printf(JL_STDERR, "WARNING: --sysimage-native-code=no is deprecated\n");
+            }
+            else {
                 jl_errorf("julia: invalid argument to --sysimage-native-code={yes|no} (%s)", optarg);
+            }
             break;
         case opt_compiled_modules:
             if (!strcmp(optarg,"yes"))
                 jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_YES;
             else if (!strcmp(optarg,"no"))
                 jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_NO;
+            else if (!strcmp(optarg,"existing"))
+                jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_EXISTING;
+            else if (!strcmp(optarg,"strict"))
+                jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_STRICT;
             else
-                jl_errorf("julia: invalid argument to --compiled-modules={yes|no} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --compiled-modules={yes|no|existing|strict} (%s)", optarg);
             break;
         case opt_pkgimages:
-            pkgimage_explicit = 1;
             if (!strcmp(optarg,"yes"))
                 jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_YES;
             else if (!strcmp(optarg,"no"))
                 jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_NO;
+            else if (!strcmp(optarg,"existing"))
+                jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_EXISTING;
             else
-                jl_errorf("julia: invalid argument to --pkgimage={yes|no} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --pkgimages={yes|no} (%s)", optarg);
             break;
         case 'C': // cpu-target
             jl_options.cpu_target = strdup(optarg);
@@ -478,9 +680,15 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_error("julia: failed to allocate memory");
             break;
         case 't': // threads
+        {
             errno = 0;
-            jl_options.nthreadpools = 1;
-            long nthreads = -1, nthreadsi = 0;
+            jl_options.nthreadpools = 2;
+            // By default:
+            // default threads = -1 (== "auto")
+            long nthreads = -1;
+            // interactive threads = 1, or 0 if generating output
+            long nthreadsi = jl_generating_output() ? 0 : 1;
+
             if (!strncmp(optarg, "auto", 4)) {
                 jl_options.nthreads = -1;
                 if (optarg[4] == ',') {
@@ -489,10 +697,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                     else {
                         errno = 0;
                         nthreadsi = strtol(&optarg[5], &endptr, 10);
-                        if (errno != 0 || endptr == &optarg[5] || *endptr != 0 || nthreadsi < 1 || nthreadsi >= INT16_MAX)
-                            jl_errorf("julia: -t,--threads=auto,<m>; m must be an integer >= 1");
+                        if (errno != 0 || endptr == &optarg[5] || *endptr != 0 || nthreadsi < 0 || nthreadsi >= INT16_MAX)
+                            jl_errorf("julia: -t,--threads=auto,<m>; m must be an integer >= 0");
                     }
-                    jl_options.nthreadpools++;
                 }
             }
             else {
@@ -506,19 +713,24 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                         errno = 0;
                         char *endptri;
                         nthreadsi = strtol(&endptr[1], &endptri, 10);
-                        if (errno != 0 || endptri == &endptr[1] || *endptri != 0 || nthreadsi < 1 || nthreadsi >= INT16_MAX)
-                            jl_errorf("julia: -t,--threads=<n>,<m>; n and m must be integers >= 1");
+                        // Allow 0 for interactive
+                        if (errno != 0 || endptri == &endptr[1] || *endptri != 0 || nthreadsi < 0 || nthreadsi >= INT16_MAX)
+                            jl_errorf("julia: -t,--threads=<n>,<m>; m must be an integer >= 0");
+                        if (nthreadsi == 0)
+                            jl_options.nthreadpools = 1;
                     }
-                    jl_options.nthreadpools++;
+                } else if (nthreads == 1) { // User asked for 1 thread so don't add an interactive one
+                    jl_options.nthreadpools = 1;
+                    nthreadsi = 0;
                 }
                 jl_options.nthreads = nthreads + nthreadsi;
             }
             int16_t *ntpp = (int16_t *)malloc_s(jl_options.nthreadpools * sizeof(int16_t));
             ntpp[0] = (int16_t)nthreads;
-            if (jl_options.nthreadpools == 2)
-                ntpp[1] = (int16_t)nthreadsi;
+            ntpp[1] = (int16_t)nthreadsi;
             jl_options.nthreads_per_pool = ntpp;
             break;
+        }
         case 'p': // procs
             errno = 0;
             if (!strcmp(optarg,"auto")) {
@@ -735,7 +947,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_errorf("julia: invalid argument to --inline (%s)", optarg);
             }
             break;
-       case opt_polly:
+        case opt_polly:
             if (!strcmp(optarg,"yes"))
                 jl_options.polly = JL_OPTIONS_POLLY_ON;
             else if (!strcmp(optarg,"no"))
@@ -744,11 +956,19 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_errorf("julia: invalid argument to --polly (%s)", optarg);
             }
             break;
-         case opt_trace_compile:
+        case opt_trace_compile:
             jl_options.trace_compile = strdup(optarg);
             if (!jl_options.trace_compile)
                 jl_errorf("fatal error: failed to allocate memory: %s", strerror(errno));
             break;
+        case opt_trace_compile_timing:
+            jl_options.trace_compile_timing = 1;
+            break;
+         case opt_trace_dispatch:
+            jl_options.trace_dispatch = strdup(optarg);
+            if (!jl_options.trace_dispatch)
+                jl_errorf("fatal error: failed to allocate memory: %s", strerror(errno));
+            break;
         case opt_math_mode:
             if (!strcmp(optarg,"ieee"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_OFF;
@@ -757,7 +977,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             else if (!strcmp(optarg,"user"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_DEFAULT;
             else
-                jl_errorf("julia: invalid argument to --math-mode (%s)", optarg);
+                jl_errorf("julia: invalid argument to --math-mode={ieee|user} (%s)", optarg);
             break;
         case opt_worker:
             jl_options.worker = 1;
@@ -793,40 +1013,26 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             jl_options.strip_ir = 1;
             break;
         case opt_heap_size_hint:
-            if (optarg != NULL) {
-                size_t endof = strlen(optarg);
-                long double value = 0.0;
-                if (sscanf(optarg, "%Lf", &value) == 1 && value > 1e-7) {
-                    char unit = optarg[endof - 1];
-                    uint64_t multiplier = 1ull;
-                    switch (unit) {
-                        case 'k':
-                        case 'K':
-                            multiplier <<= 10;
-                            break;
-                        case 'm':
-                        case 'M':
-                            multiplier <<= 20;
-                            break;
-                        case 'g':
-                        case 'G':
-                            multiplier <<= 30;
-                            break;
-                        case 't':
-                        case 'T':
-                            multiplier <<= 40;
-                            break;
-                        default:
-                            break;
-                    }
-                    jl_options.heap_size_hint = (uint64_t)(value * multiplier);
-                }
-            }
+            if (optarg != NULL)
+                jl_options.heap_size_hint = parse_heap_size_option(optarg, "--heap-size-hint=<size>[<unit>]", 1);
             if (jl_options.heap_size_hint == 0)
-                jl_errorf("julia: invalid argument to --heap-size-hint without memory size specified");
+                jl_errorf("julia: invalid memory size specified in --heap-size-hint=<size>[<unit>]");
 
             break;
+        case opt_hard_heap_limit:
+            if (optarg != NULL)
+                jl_options.hard_heap_limit = parse_heap_size_option(optarg, "--hard-heap-limit=<size>[<unit>]", 0);
+            if (jl_options.hard_heap_limit == 0)
+                jl_errorf("julia: invalid memory size specified in --hard-heap-limit=<size>[<unit>]");
+            break;
+        case opt_heap_target_increment:
+            if (optarg != NULL)
+                jl_options.heap_target_increment = parse_heap_size_option(optarg, "--heap-target-increment=<size>[<unit>]", 0);
+            if (jl_options.heap_target_increment == 0)
+                jl_errorf("julia: invalid memory size specified in --heap-target-increment=<size>[<unit>]");
+            break;
         case opt_gc_threads:
+        {
             errno = 0;
             long nmarkthreads = strtol(optarg, &endptr, 10);
             if (errno != 0 || optarg == endptr || nmarkthreads < 1 || nmarkthreads >= INT16_MAX) {
@@ -838,9 +1044,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 char *endptri;
                 long nsweepthreads = strtol(&endptr[1], &endptri, 10);
                 if (errno != 0 || endptri == &endptr[1] || *endptri != 0 || nsweepthreads < 0 || nsweepthreads > 1)
-                    jl_errorf("julia: --gcthreads=<n>,<m>; n must be 0 or 1");
+                    jl_errorf("julia: --gcthreads=<n>,<m>; m must be 0 or 1");
                 jl_options.nsweepthreads = (int8_t)nsweepthreads;
             }
+        }
             break;
         case opt_permalloc_pkgimg:
             if (!strcmp(optarg,"yes"))
@@ -850,17 +1057,74 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             else
                 jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg);
             break;
+        case opt_timeout_for_safepoint_straggler:
+        {
+            errno = 0;
+            long timeout = strtol(optarg, &endptr, 10);
+            if (errno != 0 || optarg == endptr || timeout < 1 || timeout > INT16_MAX)
+                jl_errorf("julia: --timeout-for-safepoint-straggler=<seconds>; seconds must be an integer between 1 and %d", INT16_MAX);
+            jl_options.timeout_for_safepoint_straggler_s = (int16_t)timeout;
+            break;
+        }
+        case opt_gc_sweep_always_full:
+            jl_options.gc_sweep_always_full = 1;
+            break;
+        case opt_trim:
+            if (optarg == NULL || !strcmp(optarg,"safe"))
+                jl_options.trim = JL_TRIM_SAFE;
+            else if (!strcmp(optarg,"no"))
+                jl_options.trim = JL_TRIM_NO;
+            else if (!strcmp(optarg,"unsafe"))
+                jl_options.trim = JL_TRIM_UNSAFE;
+            else if (!strcmp(optarg,"unsafe-warn"))
+                jl_options.trim = JL_TRIM_UNSAFE_WARN;
+            else
+                jl_errorf("julia: invalid argument to --trim={safe|no|unsafe|unsafe-warn} (%s)", optarg);
+            break;
+        case opt_trace_eval:
+            if (optarg == NULL || !strcmp(optarg,"loc"))
+                jl_options.trace_eval = 1;
+            else if (!strcmp(optarg,"full"))
+                jl_options.trace_eval = 2;
+            else if (!strcmp(optarg,"no"))
+                jl_options.trace_eval = 0;
+            else
+                jl_errorf("julia: invalid argument to --trace-eval={yes|no} (%s)", optarg);
+            break;
+        case opt_task_metrics:
+            if (!strcmp(optarg, "no"))
+                jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_OFF;
+            else if (!strcmp(optarg, "yes"))
+                jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_ON;
+            else
+                jl_errorf("julia: invalid argument to --task-metrics={yes|no} (%s)", optarg);
+            break;
+        case opt_compress_sysimage:
+            if (!strcmp(optarg,"yes"))
+                jl_options.compress_sysimage = 1;
+            else if (!strcmp(optarg,"no"))
+                jl_options.compress_sysimage = 0;
+            break;
+        case opt_target_sanitize:
+            if (!strcmp(optarg, "memory"))
+                jl_options.target_sanitize_memory = 1;
+            else if (!strcmp(optarg, "thread"))
+                jl_options.target_sanitize_thread = 1;
+            else if (!strcmp(optarg, "address"))
+                jl_options.target_sanitize_address = 1;
+            else
+                jl_errorf("julia: invalid argument to --target-sanitize={memory|thread|address} (%s)", optarg);
+            break;
         default:
             jl_errorf("julia: unhandled option -- %c\n"
                       "This is a bug, please report it.", c);
         }
     }
-    if (codecov || malloclog) {
-        if (pkgimage_explicit && jl_options.use_pkgimages) {
-            jl_errorf("julia: Can't use --pkgimages=yes together "
-                      "with --track-allocation or --code-coverage.");
-        }
-        jl_options.use_pkgimages = 0;
+    jl_options.program_file = optind < argc ? strdup(argv[optind]) : "";
+    parsing_args_done:
+    if (!jl_options.use_experimental_features) {
+        if (jl_options.trim != JL_TRIM_NO)
+            jl_errorf("julia: --trim is an experimental feature, you must enable it with --experimental");
     }
     jl_options.code_coverage = codecov;
     jl_options.malloc_log = malloclog;
diff --git a/src/jloptions.h b/src/jloptions.h
index 8649c405112d7..14a5dfe83b43b 100644
--- a/src/jloptions.h
+++ b/src/jloptions.h
@@ -21,6 +21,7 @@ typedef struct {
     int32_t nprocs;
     const char *machine_file;
     const char *project;
+    const char *program_file;
     int8_t isinteractive;
     int8_t color;
     int8_t historyfile;
@@ -38,10 +39,12 @@ typedef struct {
     int8_t can_inline;
     int8_t polly;
     const char *trace_compile;
+    const char *trace_dispatch;
     int8_t fast_math;
     int8_t worker;
     const char *cookie;
     int8_t handle_signals;
+    int8_t use_experimental_features;
     int8_t use_sysimage_native_code;
     int8_t use_compiled_modules;
     int8_t use_pkgimages;
@@ -61,6 +64,19 @@ typedef struct {
     int8_t strip_ir;
     int8_t permalloc_pkgimg;
     uint64_t heap_size_hint;
+    uint64_t hard_heap_limit;
+    uint64_t heap_target_increment;
+    int8_t trace_compile_timing;
+    int8_t trim;
+    int8_t trace_eval;
+    int8_t task_metrics;
+    int16_t timeout_for_safepoint_straggler_s;
+    int8_t gc_sweep_always_full;
+    int8_t compress_sysimage;
+    int8_t alert_on_critical_error;
+    int8_t target_sanitize_memory;
+    int8_t target_sanitize_thread;
+    int8_t target_sanitize_address;
 } jl_options_t;
 
 #endif
diff --git a/src/jltypes.c b/src/jltypes.c
index 90e6b251256df..db75be1c9db0a 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -20,7 +20,14 @@ extern "C" {
 #endif
 
 _Atomic(jl_value_t*) cmpswap_names JL_GLOBALLY_ROOTED;
-jl_datatype_t *small_typeof[(jl_max_tags << 4) / sizeof(*small_typeof)]; // 16-bit aligned, like the GC
+jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(*ijl_small_typeof)]; // 16-bit aligned, like the GC
+
+// Global data structures for accessing symbols and other globals
+#if !defined(__clang_analyzer__)
+struct jl_sysimg_global sysimg_global;
+struct jl_const_globals const_globals;
+struct jl_internal_global internal_global;
+#endif
 
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
@@ -65,7 +72,9 @@ static int layout_uses_free_typevars(jl_value_t *v, jl_typeenv_t *env)
         }
         if (jl_is_datatype(v)) {
             jl_datatype_t *dt = (jl_datatype_t*)v;
-            if (dt->layout || dt->isconcretetype || !dt->name->mayinlinealloc)
+            if (dt->isconcretetype)
+                return 0;
+            if (dt->layout || !dt->name->mayinlinealloc)
                 return 0;
             if (dt->name == jl_namedtuple_typename)
                 return layout_uses_free_typevars(jl_tparam0(dt), env) || layout_uses_free_typevars(jl_tparam1(dt), env);
@@ -215,7 +224,7 @@ JL_DLLEXPORT jl_array_t *jl_find_free_typevars(jl_value_t *v)
 }
 
 // test whether a type has vars bound by the given environment
-static int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
+int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
 {
     while (1) {
         if (jl_is_typevar(v)) {
@@ -290,7 +299,13 @@ JL_DLLEXPORT int jl_has_typevar_from_unionall(jl_value_t *t, jl_unionall_t *ua)
 
 int jl_has_fixed_layout(jl_datatype_t *dt)
 {
-    if (dt->layout || dt->isconcretetype)
+    if (dt->isconcretetype)
+        return 1;
+    if (jl_is_genericmemory_type(dt)) { // GenericMemory{kind,addrspace,T} uses T for final layout, which is a parameter not a field however
+        // optionally: return !layout_uses_free_typevars(jl_tparam1(dt), env);
+        return 0;
+    }
+    if (dt->layout)
         return 1;
     if (dt->name->abstract)
         return 0;
@@ -313,15 +328,15 @@ int jl_has_fixed_layout(jl_datatype_t *dt)
 int jl_type_mappable_to_c(jl_value_t *ty)
 {
     assert(!jl_is_typevar(ty) && jl_is_type(ty));
+    if (jl_is_array_type(ty) || jl_is_genericmemory_type(ty) ||
+        (jl_is_datatype(ty) && ((jl_datatype_t*)ty)->layout != NULL &&
+            jl_is_layout_opaque(((jl_datatype_t*)ty)->layout)))
+        return 1; // as boxed
     if (jl_is_structtype(ty))
         return jl_has_fixed_layout((jl_datatype_t*)ty) && ((jl_datatype_t*)ty)->name->atomicfields == NULL;
     if (jl_is_primitivetype(ty))
-        return 1;
-    if (ty == (jl_value_t*)jl_any_type || ty == (jl_value_t*)jl_bottom_type)
-        return 1; // as boxed
-    if (jl_is_abstract_ref_type(ty) || jl_is_array_type(ty) ||
-        (jl_is_datatype(ty) && ((jl_datatype_t*)ty)->layout != NULL &&
-            jl_is_layout_opaque(((jl_datatype_t*)ty)->layout)))
+        return 1; // as isbits
+    if (ty == (jl_value_t*)jl_any_type || ty == (jl_value_t*)jl_bottom_type || jl_is_abstract_ref_type(ty))
         return 1; // as boxed
     return 0; // refuse to map Union and UnionAll to C
 }
@@ -333,7 +348,7 @@ JL_DLLEXPORT int jl_get_size(jl_value_t *val, size_t *pnt)
     if (jl_is_long(val)) {
         ssize_t slen = jl_unbox_long(val);
         if (slen < 0)
-            jl_errorf("size or dimension is negative: %d", slen);
+            jl_errorf("size or dimension is negative: %zd", slen);
         *pnt = slen;
         return 1;
     }
@@ -548,6 +563,90 @@ static void isort_union(jl_value_t **a, size_t len) JL_NOTSAFEPOINT
     }
 }
 
+static int simple_subtype(jl_value_t *a, jl_value_t *b, int hasfree, int isUnion)
+{
+    assert(hasfree == (jl_has_free_typevars(a) | (jl_has_free_typevars(b) << 1)));
+    if (a == jl_bottom_type || b == (jl_value_t*)jl_any_type)
+        return 1;
+    if (jl_egal(a, b))
+        return 1;
+    if (hasfree == 0) {
+        int mergeable = isUnion;
+        if (!mergeable) // issue #24521: don't merge Type{T} where typeof(T) varies
+            mergeable = !(jl_is_type_type(a) && jl_is_type_type(b) &&
+             jl_typeof(jl_tparam0(a)) != jl_typeof(jl_tparam0(b)));
+        return mergeable && jl_subtype(a, b);
+    }
+    if (jl_is_typevar(a)) {
+        jl_value_t *na = ((jl_tvar_t*)a)->ub;
+        hasfree &= (jl_has_free_typevars(na) | 2);
+        return simple_subtype(na, b, hasfree, isUnion);
+    }
+    if (jl_is_typevar(b)) {
+        jl_value_t *nb = ((jl_tvar_t*)b)->lb;
+        // This branch is not valid if `b` obeys diagonal rule,
+        // as it might normalize `Union` into a single `TypeVar`, e.g.
+        // Tuple{Union{Int,T},T} where {T>:Int} != Tuple{T,T} where {T>:Int}
+        if (is_leaf_bound(nb))
+            return 0;
+        hasfree &= ((jl_has_free_typevars(nb) << 1) | 1);
+        return simple_subtype(a, nb, hasfree, isUnion);
+    }
+    if (b==(jl_value_t*)jl_datatype_type || b==(jl_value_t*)jl_typeofbottom_type) {
+        // This branch is not valid for `Union`/`UnionAll`, e.g.
+        // (Type{Union{Int,T2} where {T2<:T1}} where {T1}){Int} == Type{Int64}
+        // (Type{Union{Int,T1}} where {T1}){Int} == Type{Int64}
+        return jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b;
+    }
+    return 0;
+}
+
+// merge Union{Tuple{}, Tuple{T}, Tuple{T, T, Vararg{T}}} into Tuple{Vararg{T}}
+// assumes temp is already sorted by number of type parameters
+STATIC_INLINE void merge_vararg_unions(jl_value_t **temp, size_t nt)
+{
+    for (size_t i = nt-1; i > 0; i--) {
+        // match types of form Tuple{T, ..., Vararg{T}}
+        jl_value_t *tt = temp[i];
+        if (!(tt && jl_is_tuple_type(tt))) continue;
+        size_t nfields = jl_nparams(tt);
+        if (nfields <= 1) continue;
+        jl_value_t *va = jl_tparam(tt, nfields-1);
+        if (jl_vararg_kind(va) != JL_VARARG_UNBOUND) continue;
+        jl_value_t *t = jl_unwrap_vararg(va);
+        size_t min_elements = nfields-1;
+        for (size_t j = 0; j < nfields-1; j++)
+            if (!jl_egal(jl_tparam(tt, j), t)) goto outer_loop;
+
+        // look for Tuple{T, T, ...} then Tuple{T, ...}, etc
+        for (long j = i-1; j >= 0; j--) {
+            jl_value_t *ttj = temp[j];
+            if (!(ttj && jl_is_tuple_type(ttj))) break;
+            size_t nfieldsj = jl_nparams(ttj);
+            if (nfieldsj >= min_elements) continue;
+            if (nfieldsj != min_elements-1) break;
+            for (size_t k = 0; k < nfieldsj; k++)
+                if (!jl_egal(jl_tparam(ttj, k), t)) goto inner_loop;
+
+            temp[j] = NULL;
+            min_elements--;
+ inner_loop:
+            continue;
+        }
+
+        if (min_elements == nfields-1) continue;
+        jl_value_t** params;
+        JL_GC_PUSHARGS(params, min_elements+1);
+        for (size_t j = 0; j < min_elements; j++)
+            params[j] = t;
+        params[min_elements] = va;
+        temp[i] = jl_apply_type((jl_value_t*)jl_tuple_type, params, min_elements+1);
+        JL_GC_POP();
+ outer_loop:
+        continue;
+    }
+}
+
 JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
 {
     if (n == 0)
@@ -572,17 +671,14 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
         int has_free = temp[i] != NULL && jl_has_free_typevars(temp[i]);
         for (j = 0; j < nt; j++) {
             if (j != i && temp[i] && temp[j]) {
-                if (temp[i] == jl_bottom_type ||
-                    temp[j] == (jl_value_t*)jl_any_type ||
-                    jl_egal(temp[i], temp[j]) ||
-                    (!has_free && !jl_has_free_typevars(temp[j]) &&
-                     jl_subtype(temp[i], temp[j]))) {
+                int has_free2 = has_free | (jl_has_free_typevars(temp[j]) << 1);
+                if (simple_subtype(temp[i], temp[j], has_free2, 1))
                     temp[i] = NULL;
-                }
             }
         }
     }
     isort_union(temp, nt);
+    merge_vararg_unions(temp, nt);
     jl_value_t **ptu = &temp[nt];
     *ptu = jl_bottom_type;
     int k;
@@ -600,18 +696,9 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
     return tu;
 }
 
-// note: this is turned off as `Union` doesn't do such normalization.
-// static int simple_subtype(jl_value_t *a, jl_value_t *b)
-// {
-//     if (jl_is_kind(b) && jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b)
-//         return 1;
-//     if (jl_is_typevar(b) && obviously_egal(a, ((jl_tvar_t*)b)->lb))
-//         return 1;
-//     return 0;
-// }
-
-static int simple_subtype2(jl_value_t *a, jl_value_t *b, int hasfree)
+static int simple_subtype2(jl_value_t *a, jl_value_t *b, int hasfree, int isUnion)
 {
+    assert(hasfree == (jl_has_free_typevars(a) | (jl_has_free_typevars(b) << 1)));
     int subab = 0, subba = 0;
     if (jl_egal(a, b)) {
         subab = subba = 1;
@@ -622,9 +709,9 @@ static int simple_subtype2(jl_value_t *a, jl_value_t *b, int hasfree)
     else if (b == jl_bottom_type || a == (jl_value_t*)jl_any_type) {
         subba = 1;
     }
-    else if (hasfree) {
-        // subab = simple_subtype(a, b);
-        // subba = simple_subtype(b, a);
+    else if (hasfree != 0) {
+        subab = simple_subtype(a, b, hasfree, isUnion);
+        subba = simple_subtype(b, a, ((hasfree & 2) >> 1) | ((hasfree & 1) << 1), isUnion);
     }
     else if (jl_is_type_type(a) && jl_is_type_type(b) &&
              jl_typeof(jl_tparam0(a)) != jl_typeof(jl_tparam0(b))) {
@@ -656,10 +743,11 @@ jl_value_t *simple_union(jl_value_t *a, jl_value_t *b)
     // first remove cross-redundancy and check if `a >: b` or `a <: b`.
     for (i = 0; i < nta; i++) {
         if (temp[i] == NULL) continue;
-        int hasfree = jl_has_free_typevars(temp[i]);
+        int has_free = jl_has_free_typevars(temp[i]);
         for (j = nta; j < nt; j++) {
             if (temp[j] == NULL) continue;
-            int subs = simple_subtype2(temp[i], temp[j], hasfree || jl_has_free_typevars(temp[j]));
+            int has_free2 = has_free | (jl_has_free_typevars(temp[j]) << 1);
+            int subs = simple_subtype2(temp[i], temp[j], has_free2, 0);
             int subab = subs & 1, subba = subs >> 1;
             if (subab) {
                 temp[i] = NULL;
@@ -689,19 +777,14 @@ jl_value_t *simple_union(jl_value_t *a, jl_value_t *b)
         size_t jmax = i < nta ? nta : nt;
         for (j = jmin; j < jmax; j++) {
             if (j != i && temp[i] && temp[j]) {
-                if (temp[i] == jl_bottom_type ||
-                    temp[j] == (jl_value_t*)jl_any_type ||
-                    jl_egal(temp[i], temp[j]) ||
-                    (!has_free && !jl_has_free_typevars(temp[j]) &&
-                     // issue #24521: don't merge Type{T} where typeof(T) varies
-                     !(jl_is_type_type(temp[i]) && jl_is_type_type(temp[j]) && jl_typeof(jl_tparam0(temp[i])) != jl_typeof(jl_tparam0(temp[j]))) &&
-                     jl_subtype(temp[i], temp[j]))) {
+                int has_free2 = has_free | (jl_has_free_typevars(temp[j]) << 1);
+                if (simple_subtype(temp[i], temp[j], has_free2, 0))
                     temp[i] = NULL;
-                }
             }
         }
     }
     isort_union(temp, nt);
+    merge_vararg_unions(temp, nt);
     temp[nt] = jl_bottom_type;
     size_t k;
     for (k = nt; k-- > 0; ) {
@@ -758,10 +841,11 @@ jl_value_t *simple_intersect(jl_value_t *a, jl_value_t *b, int overesi)
     for (i = 0; i < nta; i++) {
         if (temp[i] == NULL) continue;
         all_disjoint = 0;
-        int hasfree = jl_has_free_typevars(temp[i]);
+        int has_free = jl_has_free_typevars(temp[i]);
         for (j = nta; j < nt; j++) {
             if (temp[j] == NULL) continue;
-            int subs = simple_subtype2(temp[i], temp[j], hasfree || jl_has_free_typevars(temp[j]));
+            int has_free2 = has_free | (jl_has_free_typevars(temp[j]) << 1);
+            int subs = simple_subtype2(temp[i], temp[j], has_free2, 0);
             int subab = subs & 1, subba = subs >> 1;
             if (subba && !subab) {
                 stemp[i] = -1;
@@ -833,7 +917,7 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
         if (jl_options.depwarn) {
             if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
                 jl_error("Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.");
-            jl_printf(JL_STDERR, "WARNING: Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.\n");
+            jl_printf(JL_STDERR, "WARNING: Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.\nTo make this warning an error, and hence obtain a stack trace, use `julia --depwarn=error`.\n");
         }
         jl_vararg_t *vm = (jl_vararg_t*)body;
         int T_has_tv = vm->T && jl_has_typevar(vm->T, v);
@@ -847,14 +931,14 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
         if (T_has_tv) {
             jl_value_t *wrapped = jl_type_unionall(v, vm->T);
             JL_GC_PUSH1(&wrapped);
-            wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N, 1);
+            wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N, 1, 0);
             JL_GC_POP();
             return wrapped;
         }
         else {
             assert(N_has_tv);
             assert(vm->N == (jl_value_t*)v);
-            return (jl_value_t*)jl_wrap_vararg(vm->T, NULL, 1);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, NULL, 1, 0);
         }
     }
     if (!jl_is_type(body) && !jl_is_typevar(body))
@@ -1125,6 +1209,7 @@ static void cache_insert_type_set(jl_datatype_t *val, uint_t hv)
 
 jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz)
 {
+    newsz = newsz ? next_power_of_two(newsz) : 0;
     jl_value_t **ol = jl_svec_data(a);
     size_t sz = jl_svec_len(a);
     while (1) {
@@ -1158,7 +1243,6 @@ static void cache_insert_type_linear(jl_datatype_t *type, ssize_t insert_at)
         jl_atomic_store_release(&type->name->linearcache, nc);
         jl_gc_wb(type->name, nc);
         cache = nc;
-        n = jl_svec_len(nc);
     }
     assert(jl_svecref(cache, insert_at) == jl_nothing);
     jl_svecset(cache, insert_at, (jl_value_t*)type); // todo: make this an atomic-store
@@ -1317,7 +1401,7 @@ struct _jl_typestack_t;
 typedef struct _jl_typestack_t jl_typestack_t;
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env, int check);
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check, int nothrow);
 
 // Build an environment mapping a TypeName's parameters to parameter values.
 // This is the environment needed for instantiating a type's supertype and field types.
@@ -1325,7 +1409,7 @@ static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **
                                      jl_typestack_t *stack, jl_typeenv_t *env, int c)
 {
     if (jl_is_datatype(dt))
-        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env, 1);
+        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env, 1, 0);
     assert(jl_is_unionall(dt));
     jl_unionall_t *ua = (jl_unionall_t*)dt;
     jl_typeenv_t e = { ua->var, iparams[c], env };
@@ -1351,8 +1435,12 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
     JL_GC_PUSH1(&tc);
     jl_value_t *tc0 = tc;
     for (i=0; i < n; i++) {
-        if (!jl_is_unionall(tc0))
-            jl_error("too many parameters for type");
+        if (!jl_is_unionall(tc0)){
+            char *typ = "";
+            if (jl_is_datatype(tc0))
+                typ = jl_symbol_name_(((jl_datatype_t*)tc0)->name->name);
+            jl_errorf("too many parameters for type %s", typ);
+        }
         jl_value_t *pi = params[i];
 
         tc0 = ((jl_unionall_t*)tc0)->body;
@@ -1404,6 +1492,15 @@ JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value
     return jl_apply_type(tc, args, 2);
 }
 
+JL_DLLEXPORT jl_value_t *jl_apply_type3(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2, jl_value_t *p3)
+{
+    jl_value_t *args[3];
+    args[0] = p1;
+    args[1] = p2;
+    args[2] = p3;
+    return jl_apply_type(tc, args, 3);
+}
+
 jl_datatype_t *jl_apply_modify_type(jl_value_t *dt)
 {
     jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2(jl_pair_type, dt, dt);
@@ -1431,29 +1528,18 @@ jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *ty)
     return rettyp;
 }
 
-// used to expand an NTuple to a flat representation
-static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v)
-{
-    jl_value_t *p = NULL;
-    JL_GC_PUSH1(&p);
-    p = (jl_value_t*)jl_svec_fill(n, v);
-    p = jl_apply_tuple_type((jl_svec_t*)p);
-    JL_GC_POP();
-    return p;
-}
-
 JL_EXTENSION struct _jl_typestack_t {
     jl_datatype_t *tt;
     struct _jl_typestack_t *prev;
 };
 
-static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check);
-static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack);
+static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check, int nothrow);
+static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack, int cacheable);
 
 JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p)
 {
     jl_typeenv_t env = { u->var, p, NULL };
-    return inst_type_w_(u->body, &env, NULL, 1);
+    return inst_type_w_(u->body, &env, NULL, 1, 0);
 }
 
 jl_unionall_t *jl_rename_unionall(jl_unionall_t *u)
@@ -1462,18 +1548,27 @@ jl_unionall_t *jl_rename_unionall(jl_unionall_t *u)
     jl_value_t *t = NULL;
     JL_GC_PUSH2(&v, &t);
     jl_typeenv_t env = { u->var, (jl_value_t *)v, NULL };
-    t = inst_type_w_(u->body, &env, NULL, 0);
+    t = inst_type_w_(u->body, &env, NULL, 0, 0);
     t = jl_new_struct(jl_unionall_type, v, t);
     JL_GC_POP();
     return (jl_unionall_t*)t;
 }
 
+jl_value_t *jl_substitute_var_nothrow(jl_value_t *t, jl_tvar_t *var, jl_value_t *val, int nothrow)
+{
+    if (val == (jl_value_t*)var)
+        return t;
+    nothrow = jl_is_typevar(val) ? 0 : nothrow;
+    jl_typeenv_t env = { var, val, NULL };
+    return inst_type_w_(t, &env, NULL, 1, nothrow);
+}
+
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val)
 {
     if (val == (jl_value_t*)var)
         return t;
     jl_typeenv_t env = { var, val, NULL };
-    return inst_type_w_(t, &env, NULL, 1);
+    return inst_type_w_(t, &env, NULL, 1, 0);
 }
 
 jl_value_t *jl_unwrap_unionall(jl_value_t *v)
@@ -1519,6 +1614,118 @@ jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u)
     return t;
 }
 
+// Create a copy of type expression t where any occurrence of data type x is replaced by y.
+// If x does not occur in t, return t without any copy.
+// For example, jl_substitute_datatype(Foo{Bar}, Foo{T}, Qux{S}) is Qux{Bar}, with T and S
+// free type variables.
+// To substitute type variables, use jl_substitute_var instead.
+jl_value_t *jl_substitute_datatype(jl_value_t *t, jl_datatype_t * x, jl_datatype_t * y)
+{
+    if jl_is_datatype(t) {
+        jl_datatype_t *typ = (jl_datatype_t*)t;
+        // For datatypes call itself recursively on the parameters to form new parameters.
+        // Then, if typename(t) == typename(x), rewrap the wrapper of y around the new
+        // parameters. Otherwise, do the same around the wrapper of t.
+        // This ensures that the types and supertype are properly set.
+        // Start by check whether there is a parameter that needs replacing.
+        long i_firstnewparam = -1;
+        size_t nparams = jl_svec_len(typ->parameters);
+        jl_value_t *firstnewparam = NULL;
+        JL_GC_PUSH1(&firstnewparam);
+        for (size_t i = 0; i < nparams; i++) {
+            jl_value_t *param = NULL;
+            JL_GC_PUSH1(&param);
+            param = jl_svecref(typ->parameters, i);
+            firstnewparam = jl_substitute_datatype(param, x, y);
+            if (param != firstnewparam) {
+                i_firstnewparam = i;
+                JL_GC_POP();
+                break;
+            }
+            JL_GC_POP();
+        }
+        // If one of the parameters needs to be updated, or if the type name is that to
+        // substitute, create a new datataype
+        if (i_firstnewparam != -1 || typ->name == x->name) {
+            jl_datatype_t *uw = typ->name == x->name ? y : typ; // substitution occurs here
+            jl_value_t *wrapper = uw->name->wrapper;
+            jl_datatype_t *w = (jl_datatype_t*)jl_unwrap_unionall(wrapper);
+            jl_svec_t *sv = jl_alloc_svec_uninit(jl_svec_len(uw->parameters));
+            JL_GC_PUSH1(&sv);
+            jl_value_t **vals = jl_svec_data(sv);
+            // no JL_GC_PUSHARGS(vals, ...) since GC is already aware of sv
+            for (long i = 0; i < i_firstnewparam; i++) { // copy the identical parameters
+                vals[i] = jl_svecref(typ->parameters, i); // value
+            }
+            if (i_firstnewparam != -1) { // insert the first non-identical parameter
+                vals[i_firstnewparam] = firstnewparam;
+            }
+            for (size_t i = i_firstnewparam+1; i < nparams; i++) { // insert the remaining parameters
+                vals[i] = jl_substitute_datatype(jl_svecref(typ->parameters, i), x, y);
+            }
+            if (jl_is_tuple_type(wrapper)) {
+                // special case for tuples, since the wrapper (Tuple) does not have as
+                // many parameters as t (it only has a Vararg instead).
+                t = jl_apply_tuple_type(sv, 0);
+            } else {
+                t = jl_instantiate_type_in_env((jl_value_t*)w, (jl_unionall_t*)wrapper, vals);
+            }
+            JL_GC_POP();
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_unionall(t) { // recursively call itself on body and var bounds
+        jl_unionall_t* ut = (jl_unionall_t*)t;
+        jl_value_t *lb = NULL;
+        jl_value_t *ub = NULL;
+        jl_value_t *body = NULL;
+        JL_GC_PUSH3(&lb, &ub, &body);
+        lb = jl_substitute_datatype(ut->var->lb, x, y);
+        ub = jl_substitute_datatype(ut->var->ub, x, y);
+        body = jl_substitute_datatype(ut->body, x, y);
+        if (lb != ut->var->lb || ub != ut->var->ub) {
+            jl_tvar_t *newtvar = jl_new_typevar(ut->var->name, lb, ub);
+            JL_GC_PUSH1(&newtvar);
+            body = jl_substitute_var(body, ut->var, (jl_value_t*)newtvar);
+            t = jl_new_struct(jl_unionall_type, newtvar, body);
+            JL_GC_POP();
+        }
+        else if (body != ut->body) {
+            t = jl_new_struct(jl_unionall_type, ut->var, body);
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_uniontype(t) { // recursively call itself on a and b
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        jl_value_t *a = NULL;
+        jl_value_t *b = NULL;
+        JL_GC_PUSH2(&a, &b);
+        a = jl_substitute_datatype(u->a, x, y);
+        b = jl_substitute_datatype(u->b, x, y);
+        if (a != u->a || b != u->b) {
+            t = jl_new_struct(jl_uniontype_type, a, b);
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_vararg(t) { // recursively call itself on T
+        jl_vararg_t *vt = (jl_vararg_t*)t;
+        if (vt->T) { // vt->T could be NULL
+            jl_value_t *rT = NULL;
+            JL_GC_PUSH1(&rT);
+            rT = jl_substitute_datatype(vt->T, x, y);
+            if (rT != vt->T) {
+                jl_task_t *ct = jl_current_task;
+                t = jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+                jl_set_typetagof((jl_vararg_t *)t, jl_vararg_tag, 0);
+                ((jl_vararg_t *)t)->T = rT;
+                ((jl_vararg_t *)t)->N = vt->N;
+            }
+            JL_GC_POP();
+        }
+    }
+    return t;
+}
+
 static jl_value_t *lookup_type_stack(jl_typestack_t *stack, jl_datatype_t *tt, size_t ntp,
                                      jl_value_t **iparams)
 {
@@ -1593,19 +1800,20 @@ static unsigned typekey_hash(jl_typename_t *tn, jl_value_t **key, size_t n, int
     int failed = nofail;
     for (j = 0; j < n; j++) {
         jl_value_t *p = key[j];
+        size_t repeats = 1;
         if (jl_is_vararg(p)) {
             jl_vararg_t *vm = (jl_vararg_t*)p;
-            if (!nofail && vm->N)
-                return 0;
-            // 0x064eeaab is just a randomly chosen constant
-            hash = bitmix(vm->N ? type_hash(vm->N, &failed) : 0x064eeaab, hash);
-            if (failed && !nofail)
-                return 0;
+            if (vm->N && jl_is_long(vm->N))
+                repeats = jl_unbox_long(vm->N);
+            else
+                hash = bitmix(0x064eeaab, hash); // 0x064eeaab is just a randomly chosen constant
             p = vm->T ? vm->T : (jl_value_t*)jl_any_type;
         }
-        hash = bitmix(type_hash(p, &failed), hash);
+        unsigned hashp = type_hash(p, &failed);
         if (failed && !nofail)
             return 0;
+        while (repeats--)
+            hash = bitmix(hashp, hash);
     }
     hash = bitmix(~tn->hash, hash);
     return hash ? hash : 1;
@@ -1684,7 +1892,7 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
     dt->hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), l, cacheable);
 }
 
-static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, size_t np)
+static int check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, size_t np, int nothrow)
 {
     jl_value_t *wrapper = tn->wrapper;
     jl_value_t **bounds;
@@ -1702,6 +1910,10 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
         assert(jl_is_unionall(wrapper));
         jl_tvar_t *tv = ((jl_unionall_t*)wrapper)->var;
         if (!within_typevar(params[i], bounds[2*i], bounds[2*i+1])) {
+            if (nothrow) {
+                JL_GC_POP();
+                return 1;
+            }
             if (tv->lb != bounds[2*i] || tv->ub != bounds[2*i+1])
                 // pass a new version of `tv` containing the instantiated bounds
                 tv = jl_new_typevar(tv->name, bounds[2*i], bounds[2*i+1]);
@@ -1711,15 +1923,29 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
         int j;
         for (j = 2*i + 2; j < 2*np; j++) {
             jl_value_t *bj = bounds[j];
-            if (bj != (jl_value_t*)jl_any_type && bj != jl_bottom_type)
-                bounds[j] = jl_substitute_var(bj, tv, params[i]);
+            if (bj != (jl_value_t*)jl_any_type && bj != jl_bottom_type) {
+                int isub = j & 1;
+                // use different nothrow level for lb and ub substitution.
+                // TODO: This assuming the top instantiation could only start with
+                // `nothrow == 2` or `nothrow == 0`. If `nothrow` is initially set to 1
+                // then we might miss some inner error, perhaps the normal path should
+                // also follow this rule？
+                jl_value_t *nb = jl_substitute_var_nothrow(bj, tv, params[i], nothrow ? (isub ? 2 : 1) : 0 );
+                if (nb == NULL) {
+                    assert(nothrow);
+                    JL_GC_POP();
+                    return 1;
+                }
+                bounds[j] = nb;
+            }
         }
         wrapper = ((jl_unionall_t*)wrapper)->body;
     }
     JL_GC_POP();
+    return 0;
 }
 
-jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY_ROOTED
+static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT JL_GLOBALLY_ROOTED
 {
     t = jl_unwrap_unionall(t);
     if (jl_is_datatype(t))
@@ -1734,7 +1960,7 @@ jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY_ROOTED
     return NULL;
 }
 
-int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_count) JL_NOTSAFEPOINT
+static int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_count) JL_NOTSAFEPOINT
 {
     while (1) {
         if (v == (jl_value_t*)var) {
@@ -1791,13 +2017,13 @@ int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_c
 //  * `var` does not appear in invariant position
 //  * `var` appears at most once (in covariant position) and not in a `Vararg`
 //    unless the upper bound is concrete (diagonal rule)
-int may_substitute_ub(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
+static int may_substitute_ub(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
 {
     int cov_count = 0;
     return _may_substitute_ub(v, var, 0, &cov_count);
 }
 
-jl_value_t *normalize_unionalls(jl_value_t *t)
+static jl_value_t *normalize_unionalls(jl_value_t *t)
 {
     if (jl_is_uniontype(t)) {
         jl_uniontype_t *u = (jl_uniontype_t*)t;
@@ -1814,7 +2040,7 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
     else if (jl_is_unionall(t)) {
         jl_unionall_t *u = (jl_unionall_t*)t;
         jl_value_t *body = normalize_unionalls(u->body);
-        JL_GC_PUSH1(&body);
+        JL_GC_PUSH2(&body, &t);
         if (body != u->body) {
             t = jl_new_struct(jl_unionall_type, u->var, body);
             u = (jl_unionall_t*)t;
@@ -1835,10 +2061,40 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
     return t;
 }
 
+// used to expand an NTuple to a flat representation
+static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *t, int check, int nothrow)
+{
+    jl_value_t *p = NULL;
+    JL_GC_PUSH1(&p);
+    if (check) {
+        // Since we are skipping making the Vararg and skipping checks later,
+        // we inline the checks from jl_wrap_vararg here now
+        if (!jl_valid_type_param(t)) {
+            if (nothrow) {
+                JL_GC_POP();
+                return NULL;
+            }
+            jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+        }
+        // jl_wrap_vararg sometimes simplifies the type, so we only do this 1 time, instead of for each n later
+        t = normalize_unionalls(t);
+        p = t;
+        jl_value_t *tw = extract_wrapper(t);
+        if (tw && t != tw && !jl_has_free_typevars(t) && jl_types_equal(t, tw))
+            t = tw;
+        p = t;
+        check = 0; // remember that checks are already done now
+    }
+    p = (jl_value_t*)jl_svec_fill(n, t);
+    p = jl_apply_tuple_type((jl_svec_t*)p, check);
+    JL_GC_POP();
+    return p;
+}
+
 static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev, jl_typestack_t *stack);
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env, int check)
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check, int nothrow)
 {
     jl_typestack_t top;
     jl_typename_t *tn = dt->name;
@@ -1869,8 +2125,11 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                     break;
                 }
             }
-            if (pi == jl_bottom_type)
+            if (pi == jl_bottom_type) {
+                if (nothrow)
+                    return NULL;
                 jl_errorf("Tuple field type cannot be Union{}");
+            }
             if (cacheable && !jl_is_concrete_type(pi))
                 cacheable = 0;
         }
@@ -1905,7 +2164,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             // normalize types equal to wrappers (prepare for Typeofwrapper)
             jl_value_t *tw = extract_wrapper(pi);
             if (tw && tw != pi && (tn != jl_type_typename || jl_typeof(pi) == jl_typeof(tw)) &&
-                    jl_types_equal(pi, tw)) {
+                    !jl_has_free_typevars(pi) && jl_types_equal(pi, tw)) {
                 iparams[i] = tw;
                 if (p) jl_gc_wb(p, tw);
             }
@@ -1930,7 +2189,8 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     // for whether this is even valid
     if (check && !istuple) {
         assert(ntp > 0);
-        check_datatype_parameters(tn, iparams, ntp);
+        if (check_datatype_parameters(tn, iparams, ntp, nothrow))
+            return NULL;
     }
     else if (ntp == 0 && jl_emptytuple_type != NULL) {
         // empty tuple type case
@@ -1957,7 +2217,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             if (nt == 0 || !jl_has_free_typevars(va0)) {
                 if (ntp == 1) {
                     JL_GC_POP();
-                    return jl_tupletype_fill(nt, va0);
+                    return jl_tupletype_fill(nt, va0, 0, 0);
                 }
                 size_t i, l;
                 p = jl_alloc_svec(ntp - 1 + nt);
@@ -1966,31 +2226,27 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                 l = ntp - 1 + nt;
                 for (; i < l; i++)
                     jl_svecset(p, i, va0);
-                jl_value_t *ndt = jl_apply_tuple_type(p);
+                size_t np = jl_svec_len(p);
+                jl_value_t **pp = jl_svec_data(p);
+                jl_value_t *ndt = inst_datatype_inner(jl_anytuple_type, p, pp, np, NULL, NULL, check, nothrow);
                 JL_GC_POP();
                 return ndt;
             }
         }
     }
 
-    // move array of instantiated parameters to heap; we need to keep it
-    if (p == NULL) {
-        p = jl_alloc_svec_uninit(ntp);
-        for (size_t i = 0; i < ntp; i++)
-            jl_svecset(p, i, iparams[i]);
-    }
-
     // try to simplify some type parameters
     if (check && tn != jl_type_typename) {
-        size_t i;
         int changed = 0;
         if (istuple) // normalization might change Tuple's, but not other types's, cacheable status
             cacheable = 1;
+        size_t i;
         for (i = 0; i < ntp; i++) {
-            jl_value_t *newp = normalize_unionalls(iparams[i]);
-            if (newp != iparams[i]) {
+            jl_value_t *pi = iparams[i];
+            jl_value_t *newp = normalize_unionalls(pi);
+            if (newp != pi) {
                 iparams[i] = newp;
-                jl_svecset(p, i, newp);
+                if (p) jl_gc_wb(p, newp);
                 changed = 1;
             }
             if (istuple && cacheable && !jl_is_concrete_type(newp))
@@ -2016,12 +2272,39 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         }
     }
 
+    // try to reduce duplication in objects (if the caller didn't already check) by
+    // comparing them against a list of objects already known to be globally rooted and
+    // swapping them as possible
+    if (check && jl_global_roots_list != NULL) {
+        for (size_t i = 0; i < ntp; i++) {
+            jl_value_t *pi = iparams[i];
+            if (cacheable || !jl_has_free_typevars(pi)) {
+                pi = jl_as_global_root(pi, cacheable);
+                if (pi != NULL) {
+                    iparams[i] = pi;
+                    if (p) jl_gc_wb(p, pi);
+                }
+            }
+        }
+    }
+
+    // move array of instantiated parameters to heap; we need to keep it
+    if (p == NULL) {
+        p = jl_alloc_svec_uninit(ntp);
+        for (size_t i = 0; i < ntp; i++) {
+            jl_svecset(p, i, iparams[i]);
+        }
+    }
+
+    ndt = jl_new_uninitialized_datatype();
+
+    // now that most allocations are done
     // acquire the write lock now that we know we need a new object
     // since we're going to immediately leak it globally via the instantiation stack
     if (cacheable) {
         JL_LOCK(&typecache_lock); // Might GC
         jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp);
-        if (lkup != NULL) {
+        if (lkup) {
             JL_UNLOCK(&typecache_lock); // Might GC
             JL_GC_POP();
             return lkup;
@@ -2029,7 +2312,6 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     }
 
     // create and initialize new type
-    ndt = jl_new_uninitialized_datatype();
     ndt->isprimitivetype = dt->isprimitivetype;
     // Usually dt won't have ismutationfree set at this point, but it is
     // overridden for `Type`, which we handle here.
@@ -2045,6 +2327,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     ndt->parameters = p;
     jl_gc_wb(ndt, ndt->parameters);
     ndt->types = NULL; // to be filled in below
+    int invalid = 0;
     if (istuple) {
         ndt->types = p; // TODO: this may need to filter out certain types
     }
@@ -2052,30 +2335,70 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         jl_value_t *names_tup = jl_svecref(p, 0);
         jl_value_t *values_tt = jl_svecref(p, 1);
         if (!jl_has_free_typevars(names_tup) && !jl_has_free_typevars(values_tt)) {
-            if (!jl_is_tuple(names_tup))
-                jl_type_error_rt("NamedTuple", "names", (jl_value_t*)jl_anytuple_type, names_tup);
+            if (!jl_is_tuple(names_tup)) {
+                if (!nothrow)
+                    jl_type_error_rt("NamedTuple", "names", (jl_value_t*)jl_anytuple_type, names_tup);
+                invalid = 1;
+            }
             size_t nf = jl_nfields(names_tup);
             for (size_t i = 0; i < nf; i++) {
                 jl_value_t *ni = jl_fieldref(names_tup, i);
-                if (!jl_is_symbol(ni))
-                    jl_type_error_rt("NamedTuple", "name", (jl_value_t*)jl_symbol_type, ni);
+                if (!jl_is_symbol(ni)) {
+                    if (!nothrow)
+                        jl_type_error_rt("NamedTuple", "name", (jl_value_t*)jl_symbol_type, ni);
+                    invalid = 1; break;
+                }
                 for (size_t j = 0; j < i; j++) {
-                    if (ni == jl_fieldref_noalloc(names_tup, j))
-                        jl_errorf("duplicate field name in NamedTuple: \"%s\" is not unique", jl_symbol_name((jl_sym_t*)ni));
+                    if (ni == jl_fieldref_noalloc(names_tup, j)) {
+                        if (!nothrow)
+                            jl_errorf("duplicate field name in NamedTuple: \"%s\" is not unique", jl_symbol_name((jl_sym_t*)ni));
+                        invalid = 1; break;
+                    }
+                }
+                if (invalid) break;
+            }
+            if (values_tt == jl_bottom_type && nf > 0) {
+                ndt->types = jl_svec_fill(nf, jl_bottom_type);
+            }
+            else {
+                if (!jl_is_datatype(values_tt)) {
+                    // should have been checked within `check_datatype_parameters`.
+                    jl_error("NamedTuple field type must be a tuple datatype");
+                }
+                if (jl_is_va_tuple((jl_datatype_t*)values_tt) || jl_nparams(values_tt) != nf) {
+                    if (!nothrow)
+                        jl_error("NamedTuple names and field types must have matching lengths");
+                    invalid = 1;
                 }
+                ndt->types = ((jl_datatype_t*)values_tt)->parameters;
             }
-            if (!jl_is_datatype(values_tt))
-                jl_error("NamedTuple field type must be a tuple type");
-            if (jl_is_va_tuple((jl_datatype_t*)values_tt) || jl_nparams(values_tt) != nf)
-                jl_error("NamedTuple names and field types must have matching lengths");
-            ndt->types = ((jl_datatype_t*)values_tt)->parameters;
             jl_gc_wb(ndt, ndt->types);
         }
         else {
-            ndt->types = jl_emptysvec; // XXX: this is essentially always false
+            ndt->types = jl_emptysvec; // XXX: this is essentially always incorrect
+        }
+    }
+    else if (tn == jl_genericmemoryref_typename || tn == jl_genericmemory_typename) {
+        jl_value_t *isatomic = jl_svecref(p, 0);
+        if (!jl_is_typevar(isatomic) && !jl_is_symbol(isatomic)) {
+            if (!nothrow)
+                jl_type_error_rt("GenericMemory", "isatomic parameter", (jl_value_t*)jl_symbol_type, isatomic);
+            invalid = 1;
+        }
+        jl_value_t *addrspace = jl_svecref(p, 2);
+        if (!jl_is_typevar(addrspace) && !jl_is_addrspace(addrspace)) {
+            if (!nothrow)
+                jl_type_error_rt("GenericMemory", "addrspace parameter", (jl_value_t*)jl_addrspace_type, addrspace);
+            invalid = 1;
         }
     }
 
+    if (nothrow && invalid) {
+        if (cacheable)
+            JL_UNLOCK(&typecache_lock);
+        JL_GC_POP();
+        return NULL;
+    }
     jl_datatype_t *primarydt = ((jl_datatype_t*)jl_unwrap_unionall(tn->wrapper));
     jl_precompute_memoized_dt(ndt, cacheable);
     if (primarydt->layout)
@@ -2085,7 +2408,14 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         ndt->super = jl_any_type;
     }
     else if (dt->super) {
-        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)dt->super, env, stack, check);
+        jl_value_t *super = inst_type_w_((jl_value_t*)dt->super, env, stack, check, nothrow);
+        if (nothrow && super == NULL) {
+            if (cacheable)
+                JL_UNLOCK(&typecache_lock);
+            JL_GC_POP();
+            return NULL;
+        }
+        ndt->super = (jl_datatype_t *)super;
         jl_gc_wb(ndt, ndt->super);
     }
     jl_svec_t *ftypes = dt->types;
@@ -2109,9 +2439,9 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         else if (cacheable) {
             // recursively instantiate the types of the fields
             if (dt->types == NULL)
-                ndt->types = jl_compute_fieldtypes(ndt, stack);
+                ndt->types = jl_compute_fieldtypes(ndt, stack, cacheable);
             else
-                ndt->types = inst_ftypes(ftypes, env, stack);
+                ndt->types = inst_ftypes(ftypes, env, stack, cacheable);
             jl_gc_wb(ndt, ndt->types);
         }
     }
@@ -2131,19 +2461,19 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     return (jl_value_t*)ndt;
 }
 
-static jl_value_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params)
+static jl_value_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params, int check)
 {
-    return inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL, 1);
+    return inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL, check, 0);
 }
 
-JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params)
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params, int check)
 {
-    return jl_apply_tuple_type_v_(jl_svec_data(params), jl_svec_len(params), params);
+    return jl_apply_tuple_type_v_(jl_svec_data(params), jl_svec_len(params), params, check);
 }
 
 JL_DLLEXPORT jl_value_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np)
 {
-    return jl_apply_tuple_type_v_(p, np, NULL);
+    return jl_apply_tuple_type_v_(p, np, NULL, 1);
 }
 
 jl_tupletype_t *jl_lookup_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf)
@@ -2172,22 +2502,23 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
             }
             jl_svecset(params, i, ai);
         }
-        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL, 1);
+        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL, 1, 0);
         JL_GC_POP();
     }
     return tt;
 }
 
-static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack)
+static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack, int cacheable)
 {
     size_t i;
     size_t lp = jl_svec_len(p);
     jl_svec_t *np = jl_alloc_svec(lp);
-    JL_GC_PUSH1(&np);
+    jl_value_t *pi = NULL;
+    JL_GC_PUSH2(&np, &pi);
     for (i = 0; i < lp; i++) {
-        jl_value_t *pi = jl_svecref(p, i);
+        pi = jl_svecref(p, i);
         JL_TRY {
-            pi = inst_type_w_(pi, env, stack, 1);
+            pi = inst_type_w_(pi, env, stack, 1, 0);
             if (!jl_is_type(pi) && !jl_is_typevar(pi)) {
                 pi = jl_bottom_type;
             }
@@ -2195,24 +2526,27 @@ static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *s
         JL_CATCH {
             pi = jl_bottom_type;
         }
-        jl_svecset(np, i, pi);
+        jl_value_t *globalpi = jl_as_global_root(pi, cacheable);
+        jl_svecset(np, i, globalpi ? globalpi : pi);
     }
     JL_GC_POP();
     return np;
 }
 
-static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check)
+static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check, int nothrow)
 {
     jl_datatype_t *tt = (jl_datatype_t*)t;
     jl_svec_t *tp = tt->parameters;
     size_t ntp = jl_svec_len(tp);
-    // Instantiate NTuple{3,Int}
+    // Instantiate Tuple{Vararg{T,N}} where T is fixed and N is known, such as Dims{3}
+    // And avoiding allocating the intermediate steps
     // Note this does not instantiate Tuple{Vararg{Int,3}}; that's done in inst_datatype_inner
+    // Note this does not instantiate NTuple{N,T}, since it is unnecessary and inefficient to expand that now
     if (jl_is_va_tuple(tt) && ntp == 1) {
-        // If this is a Tuple{Vararg{T,N}} with known N, expand it to
+        // If this is a Tuple{Vararg{T,N}} with known N and T, expand it to
         // a fixed-length tuple
         jl_value_t *T=NULL, *N=NULL;
-        jl_value_t *va = jl_unwrap_unionall(jl_tparam0(tt));
+        jl_value_t *va = jl_tparam0(tt);
         jl_value_t *ttT = jl_unwrap_vararg(va);
         jl_value_t *ttN = jl_unwrap_vararg_num(va);
         jl_typeenv_t *e = env;
@@ -2223,11 +2557,14 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
                 N = e->val;
             e = e->prev;
         }
-        if (T != NULL && N != NULL && jl_is_long(N)) {
+        if (T != NULL && N != NULL && jl_is_long(N)) { // TODO: && !jl_has_free_typevars(T) to match inst_datatype_inner, or even && jl_is_concrete_type(T)
+            // Since this is skipping jl_wrap_vararg, we inline the checks from it here
             ssize_t nt = jl_unbox_long(N);
-            if (nt < 0)
-                jl_errorf("size or dimension is negative: %zd", nt);
-            return jl_tupletype_fill(nt, T);
+            if (nt >= 0)
+                return jl_tupletype_fill(nt, T, check, nothrow);
+            if (nothrow)
+                return NULL;
+            jl_errorf("Vararg length is negative: %zd", nt);
         }
     }
     jl_value_t **iparams;
@@ -2239,23 +2576,36 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         iparams[0] = (jl_value_t*)ip_heap;
         iparams = jl_svec_data(ip_heap);
     }
-    int bound = 0;
-    int i;
+    int i, bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
-        jl_value_t *pi = inst_type_w_(elt, env, stack, check);
+        jl_value_t *pi = inst_type_w_(elt, env, stack, check, nothrow);
+        if (pi == NULL) {
+            assert(nothrow);
+            if (nothrow == 1 || (i == ntp-1 && jl_is_vararg(elt))) {
+                t = NULL;
+                break;
+            }
+            else {
+                pi = jl_bottom_type;
+            }
+        }
         iparams[i] = pi;
         if (ip_heap)
             jl_gc_wb(ip_heap, pi);
         bound |= (pi != elt);
     }
-    if (bound)
-        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env, check);
+    if (t != NULL && bound)
+        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env, check, nothrow);
     JL_GC_POP();
     return t;
 }
 
-static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check)
+// `nothrow` means that when type checking fails, the type instantiation should
+// return `NULL` instead of immediately throwing an error. If `nothrow` == 2 then
+// we further assume that the imprecise instantiation for non invariant parameters
+// is acceptable, and inner error (`NULL`) would be ignored.
+static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check, int nothrow)
 {
     size_t i;
     if (jl_is_typevar(t)) {
@@ -2275,42 +2625,73 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         jl_value_t *var = NULL;
         jl_value_t *newbody = NULL;
         JL_GC_PUSH3(&lb, &var, &newbody);
-        lb = inst_type_w_(ua->var->lb, env, stack, check);
-        var = inst_type_w_(ua->var->ub, env, stack, check);
-        if (lb != ua->var->lb || var != ua->var->ub) {
-            var = (jl_value_t*)jl_new_typevar(ua->var->name, lb, var);
-        }
-        else {
-            var = (jl_value_t*)ua->var;
-        }
-        jl_typeenv_t newenv = { ua->var, var, env };
-        newbody = inst_type_w_(ua->body, &newenv, stack, check);
-        if (newbody == (jl_value_t*)jl_emptytuple_type) {
-            // NTuple{0} => Tuple{} can make a typevar disappear
-            t = (jl_value_t*)jl_emptytuple_type;
+        // set nothrow <= 1 to ensure lb's accuracy.
+        lb = inst_type_w_(ua->var->lb, env, stack, check, nothrow ? 1 : 0);
+        if (lb == NULL) {
+            assert(nothrow);
+            t = NULL;
+        }
+        if (t != NULL) {
+            var = inst_type_w_(ua->var->ub, env, stack, check, nothrow);
+            if (var == NULL) {
+                if (lb == jl_bottom_type)
+                    var = jl_bottom_type;
+                else
+                    t = NULL;
+            }
+            else if (lb != ua->var->lb || var != ua->var->ub) {
+                var = (jl_value_t*)jl_new_typevar(ua->var->name, lb, var);
+            }
+            else {
+                var = (jl_value_t*)ua->var;
+            }
         }
-        else if (newbody != ua->body || var != (jl_value_t*)ua->var) {
-            // if t's parameters are not bound in the environment, return it uncopied (#9378)
-            t = jl_new_struct(jl_unionall_type, var, newbody);
+        if (t != NULL) {
+            jl_typeenv_t newenv = { ua->var, var, env };
+            newbody = inst_type_w_(ua->body, &newenv, stack, check, nothrow);
+            if (newbody == NULL) {
+                t = NULL;
+            }
+            else if (!jl_has_typevar(newbody, (jl_tvar_t *)var)) {
+                // inner instantiation might make a typevar disappear, e.g.
+                // NTuple{0,T} => Tuple{}
+                t = newbody;
+            }
+            else if (newbody != ua->body || var != (jl_value_t*)ua->var) {
+                // if t's parameters are not bound in the environment, return it uncopied (#9378)
+                t = jl_new_struct(jl_unionall_type, var, newbody);
+            }
         }
         JL_GC_POP();
         return t;
     }
     if (jl_is_uniontype(t)) {
         jl_uniontype_t *u = (jl_uniontype_t*)t;
-        jl_value_t *a = inst_type_w_(u->a, env, stack, check);
+        jl_value_t *a = inst_type_w_(u->a, env, stack, check, nothrow);
         jl_value_t *b = NULL;
         JL_GC_PUSH2(&a, &b);
-        b = inst_type_w_(u->b, env, stack, check);
+        b = inst_type_w_(u->b, env, stack, check, nothrow);
+        if (nothrow) {
+            // ensure jl_type_union nothrow.
+            if (a && !(jl_is_typevar(a) || jl_is_type(a)))
+                a = NULL;
+            if (b && !(jl_is_typevar(b) || jl_is_type(b)))
+                b = NULL;
+        }
         if (a != u->a || b != u->b) {
-            if (check) {
-                jl_value_t *uargs[2] = {a, b};
-                t = jl_type_union(uargs, 2);
-            }
-            else {
+            if (!check) {
                 // fast path for `jl_rename_unionall`.
                 t = jl_new_struct(jl_uniontype_type, a, b);
             }
+            else if (a == NULL || b == NULL) {
+                assert(nothrow);
+                t = nothrow == 1 ? NULL : a == NULL ? b : a;
+            }
+            else {
+                assert(a != NULL && b != NULL);
+                jl_value_t *uargs[2] = {a, b};
+                t = jl_type_union(uargs, 2);
+            }
         }
         JL_GC_POP();
         return t;
@@ -2321,13 +2702,22 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         jl_value_t *N = NULL;
         JL_GC_PUSH2(&T, &N);
         if (v->T) {
-            T = inst_type_w_(v->T, env, stack, check);
-            if (v->N)
-                N = inst_type_w_(v->N, env, stack, check);
-        }
-        if (T != v->T || N != v->N) {
-            t = (jl_value_t*)jl_wrap_vararg(T, N, check);
+            T = inst_type_w_(v->T, env, stack, check, nothrow);
+            if (T == NULL) {
+                if (nothrow == 2)
+                    T = jl_bottom_type;
+                else
+                    t = NULL;
+            }
+            if (t && v->N) {
+                // set nothrow <= 1 to ensure invariant parameter's accuracy.
+                N = inst_type_w_(v->N, env, stack, check, nothrow ? 1 : 0);
+                if (N == NULL)
+                    t = NULL;
+            }
         }
+        if (t && (T != v->T || N != v->N))
+            t = (jl_value_t*)jl_wrap_vararg(T, N, check, nothrow);
         JL_GC_POP();
         return t;
     }
@@ -2339,20 +2729,26 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         return t;
     jl_typename_t *tn = tt->name;
     if (tn == jl_tuple_typename)
-        return inst_tuple_w_(t, env, stack, check);
+        return inst_tuple_w_(t, env, stack, check, nothrow);
     size_t ntp = jl_svec_len(tp);
     jl_value_t **iparams;
     JL_GC_PUSHARGS(iparams, ntp);
     int bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
-        jl_value_t *pi = inst_type_w_(elt, env, stack, check);
+        // set nothrow <= 1 to ensure invariant parameter's accuracy.
+        jl_value_t *pi = inst_type_w_(elt, env, stack, check, nothrow ? 1 : 0);
+        if (pi == NULL) {
+            assert(nothrow);
+            t = NULL;
+            break;
+        }
         iparams[i] = pi;
         bound |= (pi != elt);
     }
     // if t's parameters are not bound in the environment, return it uncopied (#9378)
-    if (bound)
-        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env, check);
+    if (t != NULL && bound)
+        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env, check, nothrow);
     JL_GC_POP();
     return t;
 }
@@ -2363,7 +2759,7 @@ static jl_value_t *instantiate_with(jl_value_t *t, jl_value_t **env, size_t n, j
         jl_typeenv_t en = { (jl_tvar_t*)env[0], env[1], te };
         return instantiate_with(t, &env[2], n-1, &en );
     }
-    return inst_type_w_(t, te, NULL, 1);
+    return inst_type_w_(t, te, NULL, 1, 0);
 }
 
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n)
@@ -2377,7 +2773,7 @@ static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *en
     if (jl_is_unionall(env->body))
         return _jl_instantiate_type_in_env(ty, (jl_unionall_t*)env->body, vals + 1, &en, stack);
     else
-        return inst_type_w_(ty, &en, stack, 1);
+        return inst_type_w_(ty, &en, stack, 1, 0);
 }
 
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals)
@@ -2399,8 +2795,10 @@ jl_datatype_t *jl_wrap_Type(jl_value_t *t)
     return (jl_datatype_t*)jl_instantiate_unionall(jl_type_type, t);
 }
 
-jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check)
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check, int nothrow)
 {
+    int valid = 1;
+    jl_vararg_t *vm = NULL;
     jl_task_t *ct = jl_current_task;
     JL_GC_PUSH1(&t);
     if (check) {
@@ -2411,36 +2809,49 @@ jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check)
                 // values and not the bounds of variables.
                 /*
                 jl_tvar_t *N = (jl_tvar_t*)n;
-                if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type))
-                    jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
+                if (valid && !(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type)) {
+                    if (!nothrow)
+                        jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
+                    invalid = 1;
+                }
                 */
             }
-            else if (!jl_is_long(n)) {
-                jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n);
+            else if (valid && !jl_is_long(n)) {
+                if (!nothrow)
+                    jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n);
+                valid = 0;
             }
-            else if (jl_unbox_long(n) < 0) {
-                jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n));
+            else if (valid && jl_unbox_long(n) < 0) {
+                if (!nothrow)
+                    jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n));
+                valid = 0;
             }
         }
         if (t) {
-            if (!jl_valid_type_param(t)) {
-                jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+            if (valid && !jl_valid_type_param(t)) {
+                if (!nothrow)
+                    jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+                valid = 0;
+            }
+            if (valid) {
+                t = normalize_unionalls(t);
+                jl_value_t *tw = extract_wrapper(t);
+                if (tw && t != tw && !jl_has_free_typevars(t) && jl_types_equal(t, tw))
+                    t = tw;
             }
-            t = normalize_unionalls(t);
-            jl_value_t *tw = extract_wrapper(t);
-            if (tw && t != tw && jl_types_equal(t, tw))
-                t = tw;
         }
     }
-    jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
-    jl_set_typetagof(vm, jl_vararg_tag, 0);
-    vm->T = t;
-    vm->N = n;
+    if (valid) {
+        vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+        jl_set_typetagof(vm, jl_vararg_tag, 0);
+        vm->T = t;
+        vm->N = n;
+    }
     JL_GC_POP();
     return vm;
 }
 
-JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack)
+JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack, int cacheable)
 {
     assert(st->name != jl_namedtuple_typename && st->name != jl_tuple_typename);
     jl_datatype_t *wt = (jl_datatype_t*)jl_unwrap_unionall(st->name->wrapper);
@@ -2460,7 +2871,7 @@ JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_RO
     jl_typestack_t top;
     top.tt = st;
     top.prev = (jl_typestack_t*)stack;
-    st->types = inst_ftypes(wt->types, &env[n - 1], &top);
+    st->types = inst_ftypes(wt->types, &env[n - 1], &top, cacheable);
     jl_gc_wb(st, st->types);
     return st->types;
 }
@@ -2477,7 +2888,7 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
     if (partial == NULL)
         return;
     if (n == 0) {
-        assert(jl_array_len(partial) == 0);
+        assert(jl_array_nrows(partial) == 0);
         return;
     }
 
@@ -2488,28 +2899,34 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
         env[i].prev = i == 0 ? NULL : &env[i - 1];
     }
 
-    for (j = 0; j < jl_array_len(partial); j++) {
+    for (j = 0; j < jl_array_nrows(partial); j++) {
         jl_datatype_t *ndt = (jl_datatype_t*)jl_array_ptr_ref(partial, j);
+        if (ndt == NULL)
+            continue;
         assert(jl_unwrap_unionall(ndt->name->wrapper) == (jl_value_t*)t);
         for (i = 0; i < n; i++)
             env[i].val = jl_svecref(ndt->parameters, i);
 
-        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)t->super, &env[n - 1], &top, 1);
+        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)t->super, &env[n - 1], &top, 1, 0);
         jl_gc_wb(ndt, ndt->super);
     }
 
     if (t->types != jl_emptysvec) {
-        for (j = 0; j < jl_array_len(partial); j++) {
+        for (j = 0; j < jl_array_nrows(partial); j++) {
             jl_datatype_t *ndt = (jl_datatype_t*)jl_array_ptr_ref(partial, j);
+            if (ndt == NULL)
+                continue;
             for (i = 0; i < n; i++)
                 env[i].val = jl_svecref(ndt->parameters, i);
             assert(ndt->types == NULL);
-            ndt->types = inst_ftypes(t->types, &env[n - 1], &top);
+            ndt->types = inst_ftypes(t->types, &env[n - 1], &top, 1);
             jl_gc_wb(ndt, ndt->types);
             if (ndt->isconcretetype) { // cacheable
                 jl_compute_field_offsets(ndt);
             }
+            jl_array_ptr_set(partial, j, NULL);
         }
+        t->name->partial = NULL;
     }
     else {
         assert(jl_field_names(t) == jl_emptysvec);
@@ -2524,23 +2941,38 @@ static jl_tvar_t *tvar(const char *name)
                           (jl_value_t*)jl_any_type);
 }
 
-void export_small_typeof(void)
+void export_jl_small_typeof(void)
 {
-    void *copy;
-#ifdef _OS_WINDOWS_
-    jl_dlsym(jl_libjulia_handle, "small_typeof", &copy, 1);
-#else
-    jl_dlsym(jl_libjulia_internal_handle, "small_typeof", &copy, 1);
-#endif
-    memcpy(copy, &small_typeof, sizeof(small_typeof));
+    memcpy(&jl_small_typeof, &ijl_small_typeof, sizeof(jl_small_typeof));
+}
+
+void export_jl_sysimg_globals(void)
+{
+    // Use jl_dlsym to reference "jl_"#name from the jl_libjulia_handle instead
+    // of directly making a symbol from it which will have problems with cpp
+#define XX(name, type) do { \
+        type *ptr; \
+        jl_dlsym(jl_libjulia_handle, "jl_" #name, (void**)&ptr, 1, 0); \
+        *ptr = jl_##name; \
+    } while (0);
+    JL_EXPORTED_DATA_POINTERS(XX)
+#undef XX
+#define YY(name, type) do { \
+        type *ptr; \
+        jl_dlsym(jl_libjulia_handle, "jl_" #name, (void**)&ptr, 1, 0); \
+        *ptr = jl_##name; \
+    } while (0);
+    JL_CONST_GLOBAL_VARS(YY)
+#undef YY
 }
 
 #define XX(name) \
-    small_typeof[(jl_##name##_tag << 4) / sizeof(*small_typeof)] = jl_##name##_type; \
+    ijl_small_typeof[(jl_##name##_tag << 4) / sizeof(*ijl_small_typeof)] = jl_##name##_type; \
     jl_##name##_type->smalltag = jl_##name##_tag;
 void jl_init_types(void) JL_GC_DISABLED
 {
     jl_module_t *core = NULL; // will need to be assigned later
+    jl_task_t *ct = jl_current_task;
 
     // create base objects
     jl_datatype_type = jl_new_uninitialized_datatype();
@@ -2550,22 +2982,20 @@ void jl_init_types(void) JL_GC_DISABLED
     XX(symbol);
     jl_simplevector_type = jl_new_uninitialized_datatype();
     XX(simplevector);
+    jl_methcache_type = jl_new_uninitialized_datatype();
     jl_methtable_type = jl_new_uninitialized_datatype();
+    jl_method_table = jl_new_method_table(jl_symbol("methodtable"), core);
 
-    jl_emptysvec = (jl_svec_t*)jl_gc_permobj(sizeof(void*), jl_simplevector_type);
+    jl_emptysvec = (jl_svec_t*)jl_gc_permobj(ct->ptls, sizeof(void*), jl_simplevector_type, 0);
     jl_set_typetagof(jl_emptysvec, jl_simplevector_tag, GC_OLD_MARKED);
     jl_svec_set_len_unsafe(jl_emptysvec, 0);
 
     jl_any_type = (jl_datatype_t*)jl_new_abstracttype((jl_value_t*)jl_symbol("Any"), core, NULL, jl_emptysvec);
     jl_any_type->super = jl_any_type;
-    jl_nonfunction_mt = jl_any_type->name->mt;
-    jl_any_type->name->mt = NULL;
 
     jl_datatype_t *type_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Type"), core, jl_any_type, jl_emptysvec);
     jl_type_type = (jl_unionall_t*)type_type;
     jl_type_typename = type_type->name;
-    jl_type_type_mt = jl_new_method_table(jl_type_typename->name, core);
-    jl_type_typename->mt = jl_type_type_mt;
 
     // initialize them. lots of cycles.
     // NOTE: types are not actually mutable, but we want to ensure they are heap-allocated with stable addresses
@@ -2600,54 +3030,59 @@ void jl_init_types(void) JL_GC_DISABLED
 
     jl_typename_type->name = jl_new_typename_in(jl_symbol("TypeName"), core, 0, 1);
     jl_typename_type->name->wrapper = (jl_value_t*)jl_typename_type;
-    jl_typename_type->name->mt = jl_nonfunction_mt;
     jl_typename_type->super = jl_any_type;
     jl_typename_type->parameters = jl_emptysvec;
-    jl_typename_type->name->n_uninitialized = 15 - 2;
-    jl_typename_type->name->names = jl_perm_symsvec(15, "name", "module",
+    jl_typename_type->name->n_uninitialized = 18 - 2;
+    jl_typename_type->name->names = jl_perm_symsvec(18, "name", "module", "singletonname",
                                                     "names", "atomicfields", "constfields",
                                                     "wrapper", "Typeofwrapper", "cache", "linearcache",
-                                                    "mt", "partial",
-                                                    "hash", "n_uninitialized",
+                                                    "partial", "hash", "max_args", "n_uninitialized",
                                                     "flags", // "abstract", "mutable", "mayinlinealloc",
-                                                    "max_methods");
-    const static uint32_t typename_constfields[1] = { 0x00003a3f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13)
-    const static uint32_t typename_atomicfields[1] = { 0x00000180 }; // (1<<7)|(1<<8)
+                                                    "cache_entry_count", "max_methods", "constprop_heuristic");
+    const static uint32_t typename_constfields[1]  = { 0b000110100001001011 }; // TODO: put back atomicfields and constfields in this list
+    const static uint32_t typename_atomicfields[1] = { 0b001001001110000000 };
     jl_typename_type->name->constfields = typename_constfields;
     jl_typename_type->name->atomicfields = typename_atomicfields;
     jl_precompute_memoized_dt(jl_typename_type, 1);
-    jl_typename_type->types = jl_svec(15, jl_symbol_type, jl_any_type /*jl_module_type*/,
-                                      jl_simplevector_type, jl_any_type/*jl_voidpointer_type*/, jl_any_type/*jl_voidpointer_type*/,
-                                      jl_type_type, jl_type_type, jl_simplevector_type, jl_simplevector_type,
-                                      jl_methtable_type, jl_any_type,
-                                      jl_any_type /*jl_long_type*/, jl_any_type /*jl_int32_type*/,
+    jl_typename_type->types = jl_svec(18, jl_symbol_type, jl_any_type /*jl_module_type*/, jl_symbol_type,
+                                      jl_simplevector_type,
+                                      jl_any_type/*jl_voidpointer_type*/, jl_any_type/*jl_voidpointer_type*/,
+                                      jl_type_type, jl_simplevector_type, jl_simplevector_type,
+                                      jl_methcache_type, jl_any_type,
+                                      jl_any_type /*jl_long_type*/,
+                                      jl_any_type /*jl_int32_type*/,
+                                      jl_any_type /*jl_int32_type*/,
+                                      jl_any_type /*jl_uint8_type*/,
+                                      jl_any_type /*jl_uint8_type*/,
                                       jl_any_type /*jl_uint8_type*/,
                                       jl_any_type /*jl_uint8_type*/);
 
+    jl_methcache_type->name = jl_new_typename_in(jl_symbol("MethodCache"), core, 0, 1);
+    jl_methcache_type->name->wrapper = (jl_value_t*)jl_methcache_type;
+    jl_methcache_type->super = jl_any_type;
+    jl_methcache_type->parameters = jl_emptysvec;
+    jl_methcache_type->name->n_uninitialized = 4 - 2;
+    jl_methcache_type->name->names = jl_perm_symsvec(4, "leafcache", "cache", "", "");
+    const static uint32_t methcache_atomicfields[1] = { 0b1111 };
+    jl_methcache_type->name->atomicfields = methcache_atomicfields;
+    jl_precompute_memoized_dt(jl_methcache_type, 1);
+    jl_methcache_type->types = jl_svec(4, jl_any_type, jl_any_type, jl_any_type/*voidpointer*/, jl_any_type/*int32*/);
+
     jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core, 0, 1);
     jl_methtable_type->name->wrapper = (jl_value_t*)jl_methtable_type;
-    jl_methtable_type->name->mt = jl_nonfunction_mt;
     jl_methtable_type->super = jl_any_type;
     jl_methtable_type->parameters = jl_emptysvec;
-    jl_methtable_type->name->n_uninitialized = 11 - 6;
-    jl_methtable_type->name->names = jl_perm_symsvec(11, "name", "defs",
-                                                     "leafcache", "cache", "max_args",
-                                                     "module", "backedges",
-                                                     "", "", "offs", "");
-    const static uint32_t methtable_constfields[1] = { 0x00000020 }; // (1<<5);
-    const static uint32_t methtable_atomicfields[1] = { 0x0000001e }; // (1<<1)|(1<<2)|(1<<3)|(1<<4);
+    jl_methtable_type->name->n_uninitialized = 0;
+    jl_methtable_type->name->names = jl_perm_symsvec(5, "defs", "cache", "name", "module", "backedges");
+    const static uint32_t methtable_constfields[1] = { 0b01110 };
+    const static uint32_t methtable_atomicfields[1] = { 0b00001 };
     jl_methtable_type->name->constfields = methtable_constfields;
     jl_methtable_type->name->atomicfields = methtable_atomicfields;
     jl_precompute_memoized_dt(jl_methtable_type, 1);
-    jl_methtable_type->types = jl_svec(11, jl_symbol_type, jl_any_type, jl_any_type,
-                                       jl_any_type, jl_any_type/*jl_long*/,
-                                       jl_any_type/*module*/, jl_any_type/*any vector*/,
-                                       jl_any_type/*voidpointer*/, jl_any_type/*int32*/,
-                                       jl_any_type/*uint8*/, jl_any_type/*uint8*/);
+    jl_methtable_type->types = jl_svec(5, jl_any_type, jl_methcache_type, jl_symbol_type, jl_any_type /*jl_module_type*/, jl_any_type);
 
     jl_symbol_type->name = jl_new_typename_in(jl_symbol("Symbol"), core, 0, 1);
     jl_symbol_type->name->wrapper = (jl_value_t*)jl_symbol_type;
-    jl_symbol_type->name->mt = jl_nonfunction_mt;
     jl_symbol_type->super = jl_any_type;
     jl_symbol_type->parameters = jl_emptysvec;
     jl_symbol_type->name->n_uninitialized = 0;
@@ -2657,7 +3092,6 @@ void jl_init_types(void) JL_GC_DISABLED
 
     jl_simplevector_type->name = jl_new_typename_in(jl_symbol("SimpleVector"), core, 0, 1);
     jl_simplevector_type->name->wrapper = (jl_value_t*)jl_simplevector_type;
-    jl_simplevector_type->name->mt = jl_nonfunction_mt;
     jl_simplevector_type->super = jl_any_type;
     jl_simplevector_type->parameters = jl_emptysvec;
     jl_simplevector_type->name->n_uninitialized = 0;
@@ -2668,8 +3102,7 @@ void jl_init_types(void) JL_GC_DISABLED
     // now they can be used to create the remaining base kinds and types
     jl_nothing_type = jl_new_datatype(jl_symbol("Nothing"), core, jl_any_type, jl_emptysvec,
                                       jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
-    jl_void_type = jl_nothing_type; // deprecated alias
-    jl_astaggedvalue(jl_nothing)->header = ((uintptr_t)jl_nothing_type) | GC_OLD_MARKED;
+    XX(nothing);
     jl_nothing_type->instance = jl_nothing;
 
     jl_tvar_type = jl_new_datatype(jl_symbol("TypeVar"), core, jl_any_type, jl_emptysvec,
@@ -2683,7 +3116,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_typeofbottom_type = jl_new_datatype(jl_symbol("TypeofBottom"), core, type_type, jl_emptysvec,
                                            jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     XX(typeofbottom);
-    jl_bottom_type = jl_gc_permobj(0, jl_typeofbottom_type);
+    jl_bottom_type = jl_gc_permobj(ct->ptls, 0, jl_typeofbottom_type, 0);
     jl_set_typetagof(jl_bottom_type, jl_typeofbottom_tag, GC_OLD_MARKED);
     jl_typeofbottom_type->instance = jl_bottom_type;
 
@@ -2718,8 +3151,9 @@ void jl_init_types(void) JL_GC_DISABLED
     XX(vararg);
     // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist
     jl_vararg_type->name->mayinlinealloc = 0;
+    jl_vararg_type->ismutationfree = 1;
 
-    jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL, 0));
+    jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL, 0, 0));
     jl_anytuple_type = jl_new_datatype(jl_symbol("Tuple"), core, jl_any_type, anytuple_params,
                                        jl_emptysvec, anytuple_params, jl_emptysvec, 0, 0, 0);
     jl_tuple_typename = jl_anytuple_type->name;
@@ -2730,8 +3164,9 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_anytuple_type->layout = NULL;
 
     jl_typeofbottom_type->super = jl_wrap_Type(jl_bottom_type);
-    jl_emptytuple_type = (jl_datatype_t*)jl_apply_tuple_type(jl_emptysvec);
-    jl_emptytuple = jl_gc_permobj(0, jl_emptytuple_type);
+    jl_typeofbottom_type->super->layout = jl_typeofbottom_type->layout; // the only abstract type with a layout
+    jl_emptytuple_type = (jl_datatype_t*)jl_apply_tuple_type(jl_emptysvec, 0);
+    jl_emptytuple = jl_gc_permobj(ct->ptls, 0, jl_emptytuple_type, 0);
     jl_emptytuple_type->instance = jl_emptytuple;
 
     // non-primitive definitions follow
@@ -2758,19 +3193,19 @@ void jl_init_types(void) JL_GC_DISABLED
                                        jl_perm_symsvec(1, "id"),
                                        jl_svec1(jl_long_type),
                                        jl_emptysvec, 0, 0, 1);
+    XX(ssavalue);
 
     jl_slotnumber_type = jl_new_datatype(jl_symbol("SlotNumber"), core, jl_any_type, jl_emptysvec,
                                          jl_perm_symsvec(1, "id"),
                                          jl_svec1(jl_long_type),
                                          jl_emptysvec, 0, 0, 1);
+    XX(slotnumber);
 
     jl_argument_type = jl_new_datatype(jl_symbol("Argument"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "n"),
                                        jl_svec1(jl_long_type),
                                        jl_emptysvec, 0, 0, 1);
 
-    jl_init_int32_int64_cache();
-
     jl_bool_type = NULL;
     jl_bool_type = jl_new_primitivetype((jl_value_t*)jl_symbol("Bool"), core,
                                         jl_any_type, jl_emptysvec, 8);
@@ -2834,17 +3269,77 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type),
                         jl_emptysvec,
                         0, 1, 4);
-    const static uint32_t typemap_entry_constfields[1] = { 0x000003fe }; // (1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)
-    const static uint32_t typemap_entry_atomicfields[1] = { 0x00000001 }; // (1<<0)
+    const static uint32_t typemap_entry_constfields[1] = { 0x000003ce }; // (1<<1)|(1<<2)|(1<<3)|(1<<6)|(1<<7)|(1<<8)|(1<<9)
+    const static uint32_t typemap_entry_atomicfields[1] = { 0x00000031 }; // (1<<0)|(1<<4)|(1<<5)
     jl_typemap_entry_type->name->constfields = typemap_entry_constfields;
     jl_typemap_entry_type->name->atomicfields = typemap_entry_atomicfields;
 
     jl_function_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Function"), core, jl_any_type, jl_emptysvec);
     jl_builtin_type  = jl_new_abstracttype((jl_value_t*)jl_symbol("Builtin"), core, jl_function_type, jl_emptysvec);
-    jl_function_type->name->mt = NULL; // subtypes of Function have independent method tables
-    jl_builtin_type->name->mt = NULL;  // so they don't share the Any type table
 
-    jl_svec_t *tv = jl_svec2(tvar("T"), tvar("N"));
+    jl_svec_t *tv;
+
+    jl_module_type =
+        jl_new_datatype(jl_symbol("Module"), core, jl_any_type, jl_emptysvec,
+                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
+    XX(module);
+    assert(jl_module_type->instance == NULL);
+    jl_compute_field_offsets(jl_module_type);
+
+    jl_binding_partition_type =
+        jl_new_datatype(jl_symbol("BindingPartition"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(5, "restriction", "min_world", "max_world", "next", "kind"),
+                        jl_svec(5, jl_any_type,
+                        jl_ulong_type, jl_ulong_type, jl_any_type/*jl_binding_partition_type*/, jl_ulong_type),
+                        jl_emptysvec, 0, 1, 0);
+    const static uint32_t binding_partition_atomicfields[] = { 0b01110 }; // Set fields 2, 3, 4 as atomic
+    jl_binding_partition_type->name->atomicfields = binding_partition_atomicfields;
+    const static uint32_t binding_partition_constfields[]  = { 0b10001 }; // Set fields 1, 5 as constant
+    jl_binding_partition_type->name->constfields = binding_partition_constfields;
+
+    jl_binding_type =
+        jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(5, "globalref", "value", "partitions", "backedges", "flags"),
+                        jl_svec(5, jl_any_type/*jl_globalref_type*/, jl_any_type, jl_binding_partition_type,
+                                   jl_any_type, jl_uint8_type),
+                        jl_emptysvec, 0, 1, 0);
+    const static uint32_t binding_atomicfields[] = { 0x0016 }; // Set fields 2, 3, 5 as atomic
+    jl_binding_type->name->atomicfields = binding_atomicfields;
+    const static uint32_t binding_constfields[] = { 0x0001 }; // Set fields 1 as constant
+    jl_binding_type->name->constfields = binding_constfields;
+
+    jl_globalref_type =
+        jl_new_datatype(jl_symbol("GlobalRef"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(3, "mod", "name", "binding"),
+                        jl_svec(3, jl_module_type, jl_symbol_type, jl_binding_type),
+                        jl_emptysvec, 0, 0, 3);
+    jl_globalref_type->name->mayinlinealloc = 0; // not at all worthwhile, since the only constructor returns a boxed object
+
+    jl_core_module = jl_new_module(jl_symbol("Core"), NULL);
+
+    tv = jl_svec1(tvar("Backend"));
+    jl_addrspace_typename =
+        jl_new_primitivetype((jl_value_t*)jl_symbol("AddrSpace"), core, jl_any_type, tv, 8)->name;
+    jl_addrspace_type = (jl_unionall_t*)jl_addrspace_typename->wrapper;
+    jl_addrspacecore_type = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_addrspace_type, (jl_value_t*)jl_core_module);
+    XX(addrspacecore);
+    jl_value_t *cpumem = jl_permbox8(jl_addrspacecore_type, jl_addrspacecore_tag, 0);
+
+    tv = jl_svec1(tvar("T"));
+    jl_ref_type = (jl_unionall_t*)
+        jl_new_abstracttype((jl_value_t*)jl_symbol("Ref"), core, jl_any_type, tv)->name->wrapper;
+
+    tv = jl_svec1(tvar("T"));
+    jl_pointer_typename =
+        jl_new_primitivetype((jl_value_t*)jl_symbol("Ptr"), core,
+                             (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_ref_type, jl_svec_data(tv), 1),
+                             tv,
+                             sizeof(void*) * 8)->name;
+    jl_pointer_type = (jl_unionall_t*)jl_pointer_typename->wrapper;
+    jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
+    jl_voidpointer_type = (jl_datatype_t*)pointer_void;
+
+    tv = jl_svec2(tvar("T"), tvar("N"));
     jl_abstractarray_type = (jl_unionall_t*)
         jl_new_abstracttype((jl_value_t*)jl_symbol("AbstractArray"), core,
                             jl_any_type, tv)->name->wrapper;
@@ -2855,22 +3350,67 @@ void jl_init_types(void) JL_GC_DISABLED
                             (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_abstractarray_type, jl_svec_data(tv), 2),
                             tv)->name->wrapper;
 
+    tv = jl_svec(3, tvar("isatomic"), tvar("T"), tvar("addrspace"));
+    jl_datatype_t *jl_memory_supertype = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_densearray_type, jl_svecref(tv, 1), jl_box_long(1));
+    jl_datatype_t *memory_datatype =
+        jl_new_datatype(jl_symbol("GenericMemory"), core, jl_memory_supertype, tv,
+                        jl_perm_symsvec(2, "length", "ptr"),
+                        jl_svec(2, jl_long_type, pointer_void),
+                        jl_emptysvec, 0, 1, 2);
+    jl_genericmemory_typename = memory_datatype->name;
+    jl_genericmemory_type = (jl_unionall_t*)jl_genericmemory_typename->wrapper;
+    const static uint32_t memory_constfields[1] = { 0x00000003 }; // (1<<1)|(1<<0)
+    memory_datatype->name->constfields = memory_constfields;
+    memory_datatype->ismutationfree = 0;
+
+    jl_datatype_t *jl_memoryref_supertype = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_ref_type, jl_svecref(tv, 1));
+    jl_datatype_t *memoryref_datatype =
+        jl_new_datatype(jl_symbol("GenericMemoryRef"), core, jl_memoryref_supertype, tv,
+                        jl_perm_symsvec(2, "ptr_or_offset", "mem"),
+                        jl_svec(2, pointer_void, memory_datatype),
+                        jl_emptysvec, 0, 0, 2);
+    jl_genericmemoryref_typename = memoryref_datatype->name;
+    jl_genericmemoryref_type = (jl_unionall_t*)jl_genericmemoryref_typename->wrapper;
+    memoryref_datatype->ismutationfree = 0;
+
+    jl_memory_any_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_any_type, cpumem);
+    jl_memory_uint8_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint8_type, cpumem);
+    jl_memory_uint16_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint16_type, cpumem);
+    jl_memory_uint32_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint32_type, cpumem);
+    jl_memory_uint64_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint64_type, cpumem);
+    jl_memoryref_any_type = jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_any_type, cpumem);
+    jl_memoryref_uint8_type = jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint8_type, cpumem);
+
     tv = jl_svec2(tvar("T"), tvar("N"));
-    jl_array_type = (jl_unionall_t*)
-        jl_new_datatype(jl_symbol("Array"), core,
+    jl_array_typename = jl_new_datatype(jl_symbol("Array"), core,
                         (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_densearray_type, jl_svec_data(tv), 2),
-                        tv, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0)->name->wrapper;
-    jl_array_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->name;
-    jl_compute_field_offsets((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type));
+                        tv,
+                        jl_perm_symsvec(2, "ref", "size"),
+                        jl_svec(2,
+                            jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, jl_svecref(tv, 0), cpumem),
+                            jl_apply_type1((jl_value_t*)jl_tuple_type, (jl_value_t*)jl_wrap_vararg((jl_value_t*)jl_long_type, jl_svecref(tv, 1), 0, 0))),
+                            jl_emptysvec, 0, 1, 2)->name;
+    jl_array_type = (jl_unionall_t*)jl_array_typename->wrapper;
 
     jl_array_any_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_any_type, jl_box_long(1));
     jl_array_symbol_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_symbol_type, jl_box_long(1));
     jl_array_uint8_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint8_type, jl_box_long(1));
+    jl_array_uint32_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint32_type, jl_box_long(1));
     jl_array_int32_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_int32_type, jl_box_long(1));
     jl_array_uint64_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint64_type, jl_box_long(1));
     jl_an_empty_vec_any = (jl_value_t*)jl_alloc_vec_any(0); // used internally
-    jl_atomic_store_relaxed(&jl_nonfunction_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&jl_type_type_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+    jl_an_empty_memory_any = (jl_value_t*)jl_alloc_memory_any(0); // used internally
+
+    // finish initializing module Core
+    core = jl_core_module;
+    jl_method_table->module = core;
+    jl_atomic_store_relaxed(&jl_method_table->cache->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_method_table->backedges = (jl_genericmemory_t*)jl_an_empty_memory_any;
+    jl_atomic_store_relaxed(&core->bindingkeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    // export own name, so "using Foo" makes "Foo" itself visible
+    jl_set_initial_const(core, core->name, (jl_value_t*)core, 1);
+    jl_set_initial_const(core, jl_symbol("CPU"), (jl_value_t*)cpumem, 0);
+    core = NULL;
 
     jl_expr_type =
         jl_new_datatype(jl_symbol("Expr"), core,
@@ -2879,14 +3419,7 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(2, jl_symbol_type, jl_array_any_type),
                         jl_emptysvec, 0, 1, 2);
 
-    jl_module_type =
-        jl_new_datatype(jl_symbol("Module"), core, jl_any_type, jl_emptysvec,
-                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
-    XX(module);
-    jl_module_type->instance = NULL;
-    jl_compute_field_offsets(jl_module_type);
-
-    jl_value_t *symornothing[2] = { (jl_value_t*)jl_symbol_type, (jl_value_t*)jl_void_type };
+    jl_value_t *symornothing[2] = { (jl_value_t*)jl_symbol_type, (jl_value_t*)jl_nothing_type };
     jl_linenumbernode_type =
         jl_new_datatype(jl_symbol("LineNumberNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "line", "file"),
@@ -2894,10 +3427,10 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_emptysvec, 0, 0, 2);
 
     jl_lineinfonode_type =
-        jl_new_datatype(jl_symbol("LineInfoNode"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(5, "module", "method", "file", "line", "inlined_at"),
-                        jl_svec(5, jl_module_type, jl_any_type, jl_symbol_type, jl_int32_type, jl_int32_type),
-                        jl_emptysvec, 0, 0, 5);
+        jl_new_datatype(jl_symbol("LegacyLineInfoNode"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(3, "file", "line", "inlined_at"),
+                        jl_svec(3, jl_symbol_type, jl_int32_type, jl_int32_type),
+                        jl_emptysvec, 0, 0, 3);
 
     jl_gotonode_type =
         jl_new_datatype(jl_symbol("GotoNode"), core, jl_any_type, jl_emptysvec,
@@ -2911,6 +3444,12 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(2, jl_any_type, jl_long_type),
                         jl_emptysvec, 0, 0, 2);
 
+    jl_enternode_type =
+        jl_new_datatype(jl_symbol("EnterNode"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(2, "catch_dest", "scope"),
+                        jl_svec(2, jl_long_type, jl_any_type),
+                        jl_emptysvec, 0, 0, 1);
+
     jl_returnnode_type =
         jl_new_datatype(jl_symbol("ReturnNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "val"),
@@ -2953,16 +3492,30 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(1, jl_slotnumber_type),
                         jl_emptysvec, 0, 0, 1);
 
+    jl_debuginfo_type =
+        jl_new_datatype(jl_symbol("DebugInfo"), core,
+                        jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(4,
+                            "def",
+                            "linetable",
+                            "edges",
+                            "codelocs"),
+                        jl_svec(4,
+                            jl_any_type, // union(jl_method_instance_type, jl_method_type, jl_symbol_type),
+                            jl_any_type, // union(jl_nothing, jl_debuginfo_type)
+                            jl_simplevector_type, // memory{debuginfo}
+                            jl_string_type),
+                        jl_emptysvec, 0, 0, 4);
+    jl_debuginfo_type->name->mayinlinealloc = 0;
+
     jl_code_info_type =
         jl_new_datatype(jl_symbol("CodeInfo"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(22,
+                        jl_perm_symsvec(23,
                             "code",
-                            "codelocs",
+                            "debuginfo",
                             "ssavaluetypes",
                             "ssaflags",
-                            "method_for_inference_limit_heuristics",
-                            "linetable",
                             "slotnames",
                             "slotflags",
                             "slottypes",
@@ -2971,36 +3524,40 @@ void jl_init_types(void) JL_GC_DISABLED
                             "edges",
                             "min_world",
                             "max_world",
-                            "inferred",
+                            "method_for_inference_limit_heuristics",
+                            "nargs",
                             "propagate_inbounds",
                             "has_fcall",
+                            "has_image_globalref",
                             "nospecializeinfer",
+                            "isva",
                             "inlining",
                             "constprop",
                             "purity",
                             "inlining_cost"),
-                        jl_svec(22,
+                        jl_svec(23,
                             jl_array_any_type,
-                            jl_array_int32_type,
-                            jl_any_type,
-                            jl_array_uint8_type,
-                            jl_any_type,
+                            jl_debuginfo_type,
                             jl_any_type,
+                            jl_array_uint32_type,
                             jl_array_symbol_type,
                             jl_array_uint8_type,
                             jl_any_type,
                             jl_any_type,
                             jl_any_type,
-                            jl_any_type,
+                            jl_any_type, // prefers svec, but tolerates Vector{Any}
                             jl_ulong_type,
                             jl_ulong_type,
+                            jl_any_type,
+                            jl_ulong_type,
+                            jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
                             jl_uint8_type,
                             jl_uint8_type,
-                            jl_uint8_type,
+                            jl_uint16_type,
                             jl_uint16_type),
                         jl_emptysvec,
                         0, 1, 22);
@@ -3008,19 +3565,21 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_method_type =
         jl_new_datatype(jl_symbol("Method"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(30,
+                        jl_perm_symsvec(33,
                             "name",
                             "module",
                             "file",
                             "line",
-                            "primary_world",
-                            "deleted_world", // !const
+                            "dispatch_status", // atomic
+                            "interferences", // atomic
+                            "primary_world", // atomic
                             "sig",
                             "specializations", // !const
                             "speckeyset", // !const
                             "slot_syms",
                             "external_mt",
                             "source", // !const
+                            "debuginfo", // !const
                             "unspecialized", // !const
                             "generator", // !const
                             "roots", // !const
@@ -3036,22 +3595,25 @@ void jl_init_types(void) JL_GC_DISABLED
                             "isva",
                             "is_for_opaque_closure",
                             "nospecializeinfer",
+                            "did_scan_source",
                             "constprop",
                             "max_varargs",
                             "purity"),
-                        jl_svec(30,
+                        jl_svec(33,
                             jl_symbol_type,
                             jl_module_type,
                             jl_symbol_type,
                             jl_int32_type,
-                            jl_ulong_type,
+                            jl_uint8_type,
+                            jl_memory_any_type,
                             jl_ulong_type,
                             jl_type_type,
                             jl_any_type, // union(jl_simplevector_type, jl_method_instance_type),
-                            jl_array_type,
+                            jl_genericmemory_type, // union(jl_memory_uint8_type, jl_memory_uint16_type, jl_memory_uint32_type, jl_memory_uint64_type, jl_memory_any_type)
                             jl_string_type,
                             jl_any_type,
                             jl_any_type,
+                            jl_debuginfo_type,
                             jl_any_type, // jl_method_instance_type
                             jl_any_type,
                             jl_array_any_type,
@@ -3069,126 +3631,110 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_uint8_type,
                             jl_uint8_type,
-                            jl_uint8_type),
+                            jl_uint8_type,
+                            jl_uint16_type),
                         jl_emptysvec,
                         0, 1, 10);
-    //const static uint32_t method_constfields[1] = { 0x03fc065f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<6)|(1<<9)|(1<<10)|(1<<18)|(1<<19)|(1<<20)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25);
+    //const static uint32_t method_constfields[] = { 0b0, 0b0 }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<6)|(1<<9)|(1<<10)|(1<<17)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25)|(1<<26)|(1<<27)|(1<<28)|(1<<29)|(1<<30);
     //jl_method_type->name->constfields = method_constfields;
+    const static uint32_t method_atomicfields[] = { 0x20000070, 0x0 }; // (1<<4)|(1<<5)|(1<<6)|(1<<29)
+    jl_method_type->name->atomicfields = method_atomicfields;
 
     jl_method_instance_type =
         jl_new_datatype(jl_symbol("MethodInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(10,
+                        jl_perm_symsvec(8,
                             "def",
                             "specTypes",
                             "sparam_vals",
-                            "uninferred",
                             "backedges",
-                            "callbacks",
                             "cache",
-                            "inInference",
                             "cache_with_orig",
-                            "precompiled"),
-                        jl_svec(10,
+                            "flags",
+                            "dispatch_status"),
+                        jl_svec(8,
                             jl_new_struct(jl_uniontype_type, jl_method_type, jl_module_type),
                             jl_any_type,
                             jl_simplevector_type,
-                            jl_any_type,
                             jl_array_any_type,
-                            jl_any_type,
-                            jl_any_type,
+                            jl_any_type/*jl_code_instance_type*/,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_bool_type),
+                            jl_uint8_type),
                         jl_emptysvec,
                         0, 1, 3);
     // These fields should be constant, but Serialization wants to mutate them in initialization
-    //const static uint32_t method_instance_constfields[1] = { 0x00000007 }; // (1<<0)|(1<<1)|(1<<2);
-    const static uint32_t method_instance_atomicfields[1] = { 0x00000248 }; // (1<<3)|(1<<6)|(1<<9);
-    //Fields 4 and 5 must be protected by method->write_lock, and thus all operations on jl_method_instance_t are threadsafe. TODO: except inInference
+    //const static uint32_t method_instance_constfields[1] = { 0b00000111 }; // fields 1, 2, 3
+    const static uint32_t method_instance_atomicfields[1]  = { 0b11010000 }; // fields 5, 7, 8
+    //Fields 4 and 5 must be protected by method->write_lock, and thus all operations on jl_method_instance_t are threadsafe.
     //jl_method_instance_type->name->constfields = method_instance_constfields;
     jl_method_instance_type->name->atomicfields = method_instance_atomicfields;
 
     jl_code_instance_type =
         jl_new_datatype(jl_symbol("CodeInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(15,
+                        jl_perm_symsvec(21,
                             "def",
+                            "owner",
                             "next",
                             "min_world",
                             "max_world",
                             "rettype",
+                            "exctype",
                             "rettype_const",
                             "inferred",
-                            //"edges",
+                            "debuginfo",
+                            "edges",
+                            "analysis_results",
+                            "ipo_purity_bits",
+                            "time_infer_total",
+                            "time_infer_cache_saved",
+                            "time_infer_self",
+                            "time_compile",
                             //"absolute_max",
-                            "ipo_purity_bits", "purity_bits",
-                            "argescapes",
-                            "isspecsig", "precompile", "relocatability",
+                            "flags", "precompile",
                             "invoke", "specptr"), // function object decls
-                        jl_svec(15,
-                            jl_method_instance_type,
+                        jl_svec(21,
+                            jl_any_type,
+                            jl_any_type,
                             jl_any_type,
                             jl_ulong_type,
                             jl_ulong_type,
                             jl_any_type,
                             jl_any_type,
                             jl_any_type,
-                            //jl_any_type,
-                            //jl_bool_type,
-                            jl_uint32_type, jl_uint32_type,
                             jl_any_type,
-                            jl_bool_type,
-                            jl_bool_type,
+                            jl_debuginfo_type,
+                            jl_simplevector_type,
+                            jl_any_type,
+                            jl_uint32_type,
+                            jl_uint16_type,
+                            jl_uint16_type,
+                            jl_uint16_type,
+                            jl_uint16_type,
+                            //jl_bool_type,
                             jl_uint8_type,
+                            jl_bool_type,
                             jl_any_type, jl_any_type), // fptrs
                         jl_emptysvec,
                         0, 1, 1);
-    jl_svecset(jl_code_instance_type->types, 1, jl_code_instance_type);
-    const static uint32_t code_instance_constfields[1]  = { 0b000001010110001 }; // Set fields 1, 5-6, 8, 10 as const
-    const static uint32_t code_instance_atomicfields[1] = { 0b110100101000010 }; // Set fields 2, 7, 9, 12, 14-15 as atomic
-    //Fields 3-4 are only operated on by construction and deserialization, so are const at runtime
-    //Fields 11 and 15 must be protected by locks, and thus all operations on jl_code_instance_t are threadsafe
+    jl_svecset(jl_code_instance_type->types, 2, jl_code_instance_type);
+    const static uint32_t code_instance_constfields[1]  = { 0b000001110000011100011 }; // Set fields 1, 2, 6-8, 14-16 as const
+    const static uint32_t code_instance_atomicfields[1] = { 0b111110001011100011100 }; // Set fields 3-5, 9-12, 13, 17-21 as atomic
+    // Fields 4-5 are only operated on by construction and deserialization, so are effectively const at runtime
+    // Fields ipo_purity_bits and analysis_results are not currently threadsafe or reliable, as they get mutated after optimization, but are not declared atomic
+    // and there is no way to tell (during inference) if their value is finalized yet (to wait for them to be narrowed if applicable)
     jl_code_instance_type->name->constfields = code_instance_constfields;
     jl_code_instance_type->name->atomicfields = code_instance_atomicfields;
 
-    jl_const_type = jl_new_datatype(jl_symbol("Const"), core, jl_any_type, jl_emptysvec,
-                                       jl_perm_symsvec(1, "val"),
-                                       jl_svec1(jl_any_type),
-                                       jl_emptysvec, 0, 0, 1);
-
-    jl_partial_struct_type = jl_new_datatype(jl_symbol("PartialStruct"), core, jl_any_type, jl_emptysvec,
-                                       jl_perm_symsvec(2, "typ", "fields"),
-                                       jl_svec2(jl_any_type, jl_array_any_type),
-                                       jl_emptysvec, 0, 0, 2);
-
-    jl_interconditional_type = jl_new_datatype(jl_symbol("InterConditional"), core, jl_any_type, jl_emptysvec,
-                                          jl_perm_symsvec(3, "slot", "thentype", "elsetype"),
-                                          jl_svec(3, jl_long_type, jl_any_type, jl_any_type),
-                                          jl_emptysvec, 0, 0, 3);
-
     jl_method_match_type = jl_new_datatype(jl_symbol("MethodMatch"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(4, "spec_types", "sparams", "method", "fully_covers"),
                                        jl_svec(4, jl_type_type, jl_simplevector_type, jl_method_type, jl_bool_type),
                                        jl_emptysvec, 0, 0, 4);
 
-    // all Kinds share the Type method table (not the nonfunction one)
-    jl_unionall_type->name->mt = jl_uniontype_type->name->mt = jl_datatype_type->name->mt =
-        jl_type_type_mt;
-
     jl_intrinsic_type = jl_new_primitivetype((jl_value_t*)jl_symbol("IntrinsicFunction"), core,
                                              jl_builtin_type, jl_emptysvec, 32);
-
-    tv = jl_svec1(tvar("T"));
-    jl_ref_type = (jl_unionall_t*)
-        jl_new_abstracttype((jl_value_t*)jl_symbol("Ref"), core, jl_any_type, tv)->name->wrapper;
-
-    tv = jl_svec1(tvar("T"));
-    jl_pointer_type = (jl_unionall_t*)
-        jl_new_primitivetype((jl_value_t*)jl_symbol("Ptr"), core,
-                             (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_ref_type, jl_svec_data(tv), 1), tv,
-                             sizeof(void*)*8)->name->wrapper;
-    jl_pointer_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->name;
+    XX(intrinsic);
 
     // LLVMPtr{T, AS} where {T, AS}
     jl_tvar_t *elvar = tvar("T");
@@ -3221,24 +3767,35 @@ void jl_init_types(void) JL_GC_DISABLED
                         NULL,
                         jl_any_type,
                         jl_emptysvec,
-                        jl_perm_symsvec(16,
+                        jl_perm_symsvec(27,
                                         "next",
                                         "queue",
                                         "storage",
                                         "donenotify",
                                         "result",
-                                        "logstate",
+                                        "scope",
                                         "code",
+                                        "_state",
+                                        "sticky",
+                                        "priority",
+                                        "_isexception",
+                                        "pad00",
+                                        "pad01",
+                                        "pad02",
                                         "rngState0",
                                         "rngState1",
                                         "rngState2",
                                         "rngState3",
                                         "rngState4",
-                                        "_state",
-                                        "sticky",
-                                        "_isexception",
-                                        "priority"),
-                        jl_svec(16,
+                                        "metrics_enabled",
+                                        "pad10",
+                                        "pad11",
+                                        "pad12",
+                                        "first_enqueued_at",
+                                        "last_started_running_at",
+                                        "running_time_ns",
+                                        "finished_at"),
+                        jl_svec(27,
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
@@ -3246,39 +3803,38 @@ void jl_init_types(void) JL_GC_DISABLED
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
+                                jl_uint8_type,
+                                jl_bool_type,
+                                jl_uint16_type,
+                                jl_bool_type,
+                                jl_uint8_type,
+                                jl_uint8_type,
+                                jl_uint8_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
-                                jl_uint8_type,
-                                jl_bool_type,
                                 jl_bool_type,
-                                jl_uint16_type),
+                                jl_uint8_type,
+                                jl_uint8_type,
+                                jl_uint8_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
+                                jl_uint64_type),
                         jl_emptysvec,
                         0, 1, 6);
     XX(task);
     jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
     jl_svecset(jl_task_type->types, 0, listt);
+    // Set field 20 (metrics_enabled) as const
+    // Set fields 8 (_state) and 24-27 (metric counters) as atomic
+    const static uint32_t task_constfields[1]  = { 0b00000000000010000000000000000000 };
+    const static uint32_t task_atomicfields[1] = { 0b00000111100000000000000010000000 };
+    jl_task_type->name->constfields = task_constfields;
+    jl_task_type->name->atomicfields = task_atomicfields;
 
-    jl_binding_type =
-        jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(5, "value", "globalref", "owner", "ty", "flags"),
-                        jl_svec(5, jl_any_type, jl_any_type/*jl_globalref_type*/, jl_any_type/*jl_binding_type*/, jl_type_type, jl_uint8_type),
-                        jl_emptysvec, 0, 1, 0);
-    const static uint32_t binding_atomicfields[] = { 0x0015 }; // Set fields 1, 3, 4 as atomic
-    jl_binding_type->name->atomicfields = binding_atomicfields;
-    const static uint32_t binding_constfields[] = { 0x0002 }; // Set fields 2 as constant
-    jl_binding_type->name->constfields = binding_constfields;
-
-    jl_globalref_type =
-        jl_new_datatype(jl_symbol("GlobalRef"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(3, "mod", "name", "binding"),
-                        jl_svec(3, jl_module_type, jl_symbol_type, jl_binding_type),
-                        jl_emptysvec, 0, 0, 3);
-
-    jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
-    jl_voidpointer_type = (jl_datatype_t*)pointer_void;
     tv = jl_svec2(tvar("A"), tvar("R"));
     jl_opaque_closure_type = (jl_unionall_t*)jl_new_datatype(jl_symbol("OpaqueClosure"), core, jl_function_type, tv,
         // N.B.: OpaqueClosure call code relies on specptr being field 5.
@@ -3289,56 +3845,58 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_opaque_closure_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type))->name;
     jl_compute_field_offsets((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type));
 
-    jl_partial_opaque_type = jl_new_datatype(jl_symbol("PartialOpaque"), core, jl_any_type, jl_emptysvec,
-        jl_perm_symsvec(4, "typ", "env", "parent", "source"),
-        jl_svec(4, jl_type_type, jl_any_type, jl_method_instance_type, jl_any_type),
-        jl_emptysvec, 0, 0, 4);
-
     // complete builtin type metadata
     jl_uint8pointer_type = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_uint8_type);
     jl_svecset(jl_datatype_type->types, 5, jl_voidpointer_type);
     jl_svecset(jl_datatype_type->types, 6, jl_int32_type);
     jl_svecset(jl_datatype_type->types, 7, jl_uint16_type);
     jl_svecset(jl_typename_type->types, 1, jl_module_type);
-    jl_svecset(jl_typename_type->types, 3, jl_voidpointer_type);
     jl_svecset(jl_typename_type->types, 4, jl_voidpointer_type);
-    jl_svecset(jl_typename_type->types, 5, jl_type_type);
+    jl_svecset(jl_typename_type->types, 5, jl_voidpointer_type);
     jl_svecset(jl_typename_type->types, 6, jl_type_type);
+    jl_svecset(jl_typename_type->types, 7, jl_type_type);
     jl_svecset(jl_typename_type->types, 11, jl_long_type);
     jl_svecset(jl_typename_type->types, 12, jl_int32_type);
-    jl_svecset(jl_typename_type->types, 13, jl_uint8_type);
+    jl_svecset(jl_typename_type->types, 13, jl_int32_type);
     jl_svecset(jl_typename_type->types, 14, jl_uint8_type);
-    jl_svecset(jl_methtable_type->types, 4, jl_long_type);
-    jl_svecset(jl_methtable_type->types, 5, jl_module_type);
-    jl_svecset(jl_methtable_type->types, 6, jl_array_any_type);
-    jl_svecset(jl_methtable_type->types, 7, jl_long_type); // voidpointer
-    jl_svecset(jl_methtable_type->types, 8, jl_long_type); // uint32_t plus alignment
-    jl_svecset(jl_methtable_type->types, 9, jl_uint8_type);
-    jl_svecset(jl_methtable_type->types, 10, jl_uint8_type);
-    jl_svecset(jl_method_type->types, 12, jl_method_instance_type);
-    jl_svecset(jl_method_instance_type->types, 6, jl_code_instance_type);
-    jl_svecset(jl_code_instance_type->types, 13, jl_voidpointer_type);
-    jl_svecset(jl_code_instance_type->types, 14, jl_voidpointer_type);
-    jl_svecset(jl_binding_type->types, 1, jl_globalref_type);
-    jl_svecset(jl_binding_type->types, 2, jl_binding_type);
+    jl_svecset(jl_typename_type->types, 15, jl_uint8_type);
+    jl_svecset(jl_typename_type->types, 16, jl_uint8_type);
+    jl_svecset(jl_typename_type->types, 17, jl_uint8_type);
+    jl_svecset(jl_methcache_type->types, 2, jl_long_type); // voidpointer
+    jl_svecset(jl_methcache_type->types, 3, jl_long_type); // uint32_t plus alignment
+    jl_svecset(jl_methtable_type->types, 3, jl_module_type);
+    jl_svecset(jl_method_type->types, 14, jl_method_instance_type);
+    //jl_svecset(jl_debuginfo_type->types, 0, jl_method_instance_type); // union(jl_method_instance_type, jl_method_type, jl_symbol_type)
+    jl_svecset(jl_method_instance_type->types, 4, jl_code_instance_type);
+    jl_svecset(jl_code_instance_type->types, 19, jl_voidpointer_type);
+    jl_svecset(jl_code_instance_type->types, 20, jl_voidpointer_type);
+    jl_svecset(jl_binding_type->types, 0, jl_globalref_type);
+    jl_svecset(jl_binding_type->types, 3, jl_array_any_type);
+    jl_svecset(jl_binding_partition_type->types, 3, jl_binding_partition_type);
 
     jl_compute_field_offsets(jl_datatype_type);
     jl_compute_field_offsets(jl_typename_type);
     jl_compute_field_offsets(jl_uniontype_type);
     jl_compute_field_offsets(jl_tvar_type);
     jl_compute_field_offsets(jl_methtable_type);
-    jl_compute_field_offsets(jl_module_type);
+    jl_compute_field_offsets(jl_methcache_type);
     jl_compute_field_offsets(jl_method_instance_type);
     jl_compute_field_offsets(jl_code_instance_type);
     jl_compute_field_offsets(jl_unionall_type);
     jl_compute_field_offsets(jl_simplevector_type);
     jl_compute_field_offsets(jl_symbol_type);
+    jl_compute_field_offsets(jl_binding_partition_type);
 
     // override ismutationfree for builtin types that are mutable for identity
     jl_string_type->ismutationfree = jl_string_type->isidentityfree = 1;
     jl_symbol_type->ismutationfree = jl_symbol_type->isidentityfree = 1;
-    jl_simplevector_type->ismutationfree = jl_simplevector_type->isidentityfree = 1;
+    jl_simplevector_type->isidentityfree = 1;
+    jl_typename_type->ismutationfree = 1;
     jl_datatype_type->ismutationfree = 1;
+    jl_uniontype_type->ismutationfree = 1;
+    jl_unionall_type->ismutationfree = 1;
+    assert(((jl_datatype_t*)jl_array_any_type)->ismutationfree == 0);
+    assert(((jl_datatype_t*)jl_array_uint8_type)->ismutationfree == 0);
 
     // Technically not ismutationfree, but there's a separate system to deal
     // with mutations for global state.
@@ -3346,17 +3904,8 @@ void jl_init_types(void) JL_GC_DISABLED
     // Module object identity is determined by its name and parent name.
     jl_module_type->isidentityfree = 1;
 
-    // Array's mutable data is hidden, so we need to override it
-    ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_any_type)->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_symbol_type)->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_uint8_type)->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_int32_type)->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_uint64_type)->ismutationfree = 0;
-
-    // override the preferred layout for a couple types
-    jl_lineinfonode_type->name->mayinlinealloc = 0; // FIXME: assumed to be a pointer by codegen
-    export_small_typeof();
+    export_jl_small_typeof();
+    export_jl_sysimg_globals();
 }
 
 static jl_value_t *core(const char *name)
@@ -3364,6 +3913,7 @@ static jl_value_t *core(const char *name)
     return jl_get_global(jl_core_module, jl_symbol(name));
 }
 
+
 // fetch references to things defined in boot.jl
 void post_boot_hooks(void)
 {
@@ -3379,6 +3929,8 @@ void post_boot_hooks(void)
     //XX(float32);
     jl_float64_type = (jl_datatype_t*)core("Float64");
     //XX(float64);
+    jl_bfloat16_type = (jl_datatype_t*)core("BFloat16");
+    //XX(bfloat16);
     jl_floatingpoint_type = (jl_datatype_t*)core("AbstractFloat");
     jl_number_type  = (jl_datatype_t*)core("Number");
     jl_signed_type  = (jl_datatype_t*)core("Signed");
@@ -3393,52 +3945,44 @@ void post_boot_hooks(void)
     jl_int32_type->super = jl_signed_type;
     jl_int64_type->super = jl_signed_type;
 
-    jl_errorexception_type = (jl_datatype_t*)core("ErrorException");
-    jl_stackovf_exception  = jl_new_struct_uninit((jl_datatype_t*)core("StackOverflowError"));
-    jl_diverror_exception  = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
-    jl_undefref_exception  = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
-    jl_undefvarerror_type  = (jl_datatype_t*)core("UndefVarError");
-    jl_atomicerror_type    = (jl_datatype_t*)core("ConcurrencyViolationError");
-    jl_interrupt_exception = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
-    jl_boundserror_type    = (jl_datatype_t*)core("BoundsError");
-    jl_memory_exception    = jl_new_struct_uninit((jl_datatype_t*)core("OutOfMemoryError"));
+    jl_stackovf_exception       = jl_new_struct_uninit((jl_datatype_t*)core("StackOverflowError"));
+    jl_diverror_exception       = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
+    jl_undefref_exception       = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
+    jl_interrupt_exception      = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
+    jl_memory_exception         = jl_new_struct_uninit((jl_datatype_t*)core("OutOfMemoryError"));
     jl_readonlymemory_exception = jl_new_struct_uninit((jl_datatype_t*)core("ReadOnlyMemoryError"));
-    jl_typeerror_type      = (jl_datatype_t*)core("TypeError");
-    jl_argumenterror_type  = (jl_datatype_t*)core("ArgumentError");
-    jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
-    jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
-    jl_initerror_type      = (jl_datatype_t*)core("InitError");
-    jl_pair_type           = core("Pair");
-    jl_kwcall_func         = core("kwcall");
-    jl_kwcall_mt           = ((jl_datatype_t*)jl_typeof(jl_kwcall_func))->name->mt;
-    jl_atomic_store_relaxed(&jl_kwcall_mt->max_args, 0);
+    jl_precompilable_error      = jl_new_struct_uninit((jl_datatype_t*)core("PrecompilableError"));
+
+    jl_errorexception_type   = (jl_datatype_t*)core("ErrorException");
+    jl_undefvarerror_type    = (jl_datatype_t*)core("UndefVarError");
+    jl_fielderror_type       = (jl_datatype_t*)core("FieldError");
+    jl_atomicerror_type      = (jl_datatype_t*)core("ConcurrencyViolationError");
+    jl_boundserror_type      = (jl_datatype_t*)core("BoundsError");
+    jl_typeerror_type        = (jl_datatype_t*)core("TypeError");
+    jl_argumenterror_type    = (jl_datatype_t*)core("ArgumentError");
+    jl_methoderror_type      = (jl_datatype_t*)core("MethodError");
+    jl_loaderror_type        = (jl_datatype_t*)core("LoadError");
+    jl_initerror_type        = (jl_datatype_t*)core("InitError");
+    jl_missingcodeerror_type = (jl_datatype_t*)core("MissingCodeError");
+
+    jl_pair_type             = core("Pair");
+    jl_value_t *kwcall_func  = core("kwcall");
+    jl_kwcall_type = (jl_datatype_t*)jl_typeof(kwcall_func);
+    jl_atomic_store_relaxed(&jl_kwcall_type->name->max_args, 0);
 
     jl_weakref_type = (jl_datatype_t*)core("WeakRef");
     jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
+    jl_abioverride_type = (jl_datatype_t*)core("ABIOverride");
 
-    jl_init_box_caches();
-
-    // set module field of primitive types
-    jl_svec_t *bindings = jl_atomic_load_relaxed(&jl_core_module->bindings);
-    jl_value_t **table = jl_svec_data(bindings);
-    for (size_t i = 0; i < jl_svec_len(bindings); i++) {
-        if (table[i] != jl_nothing) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-            if (v) {
-                if (jl_is_unionall(v))
-                    v = jl_unwrap_unionall(v);
-                if (jl_is_datatype(v)) {
-                    jl_datatype_t *tt = (jl_datatype_t*)v;
-                    tt->name->module = jl_core_module;
-                    if (tt->name->mt)
-                        tt->name->mt->module = jl_core_module;
-                }
-            }
-        }
-    }
-    export_small_typeof();
+    jl_const_type = (jl_datatype_t*)core("Const");
+    jl_partial_struct_type = (jl_datatype_t*)core("PartialStruct");
+    jl_interconditional_type = (jl_datatype_t*)core("InterConditional");
+    jl_partial_opaque_type = (jl_datatype_t*)core("PartialOpaque");
+
+    export_jl_small_typeof();
+    export_jl_sysimg_globals();
 }
+
 #undef XX
 
 #ifdef __cplusplus
diff --git a/src/jsvm-emscripten/asyncify_setup.js b/src/jsvm-emscripten/asyncify_setup.js
deleted file mode 100644
index 6783206602fd0..0000000000000
--- a/src/jsvm-emscripten/asyncify_setup.js
+++ /dev/null
@@ -1,144 +0,0 @@
-Module.preRun.push(function() {
-    if (typeof Asyncify !== "undefined") {
-        Asyncify.instrumentWasmExports = function (exports) { return exports; };
-        Asyncify.handleSleep = function (startAsync) {
-            if (ABORT) return;
-            Module['noExitRuntime'] = true;
-            if (Asyncify.state === Asyncify.State.Normal) {
-                // Prepare to sleep. Call startAsync, and see what happens:
-                // if the code decided to call our callback synchronously,
-                // then no async operation was in fact begun, and we don't
-                // need to do anything.
-                var reachedCallback = false;
-                var reachedAfterCallback = false;
-                var task = get_current_task();
-                startAsync(function(returnValue) {
-                assert(!returnValue || typeof returnValue === 'number'); // old emterpretify API supported other stuff
-                if (ABORT) return;
-                Asyncify.returnValue = returnValue || 0;
-                reachedCallback = true;
-                if (!reachedAfterCallback) {
-                    // We are happening synchronously, so no need for async.
-                    return;
-                }
-                schedule_and_wait(task);
-                });
-                reachedAfterCallback = true;
-                if (!reachedCallback) {
-                    Module['_jl_task_wait']();
-                }
-            } else if (Asyncify.state === Asyncify.State.Rewinding) {
-                // Stop a resume.
-                finish_schedule_task();
-            } else {
-                abort('invalid state: ' + Asyncify.state);
-            }
-            return Asyncify.returnValue;
-        };
-    }
-});
-
-function get_current_task() {
-    return Module['_jl_get_current_task']();
-}
-
-function get_root_task() {
-    return Module['_jl_get_root_task']();
-}
-
-function task_ctx_ptr(task) {
-    return Module["_task_ctx_ptr"](task);
-}
-
-function ctx_save(ctx) {
-    var stackPtr = stackSave();
-
-    // Save the bottom of the C stack in the task context. It simultaneously
-    // serves as the top of the asyncify stack.
-    HEAP32[ctx + 4 >> 2] = stackPtr;
-
-    Asyncify.state = Asyncify.State.Unwinding;
-    Module['_asyncify_start_unwind'](ctx);
-    if (Browser.mainLoop.func) {
-        Browser.mainLoop.pause();
-    }
-}
-
-function do_start_task(old_stack)
-{
-    try {
-        // start_task is always the entry point for any task
-        Module['_start_task']();
-    } catch(e) {
-        stackRestore(old_stack)
-        if (e !== e+0 && e !== 'killed') throw e;
-        maybe_schedule_next();
-        return;
-    }
-    // Either unwind or normal exit. In either case, we're back at the main task
-    if (Asyncify.state === Asyncify.State.Unwinding) {
-        // We just finished unwinding for a sleep.
-        Asyncify.state = Asyncify.State.Normal;
-        Module['_asyncify_stop_unwind']();
-    }
-    stackRestore(old_stack);
-    maybe_schedule_next();
-}
-
-function schedule_and_wait(task) {
-    Module['_jl_schedule_task'](task);
-    Module['_jl_task_wait']();
-}
-
-function finish_schedule_task() {
-    Asyncify.state = Asyncify.State.Normal;
-    Module['_asyncify_stop_rewind']();
-}
-
-next_ctx = 0;
-next_need_start = true;
-function set_next_ctx(ctx, needs_start) {
-    next_ctx = ctx;
-    next_need_start = needs_start;
-}
-
-function root_ctx() {
-    return task_ctx_ptr(get_root_task())
-}
-
-function ctx_switch(lastt_ctx) {
-    if (lastt_ctx == root_ctx()) {
-        // If we're in the root context, switch to
-        // the new ctx now, else we'll get there after
-        // unwinding.
-        return schedule_next()
-    } else if (lastt_ctx == 0) {
-        throw 'killed';
-    } else {
-        return ctx_save(lastt_ctx);
-    }
-}
-
-function schedule_next()
-{
-    old_stack = stackSave();
-    var next_task_stack = HEAP32[next_ctx + 4 >> 2];
-    if (!next_need_start) {
-        Asyncify.state = Asyncify.State.Rewinding;
-        Module['_asyncify_start_rewind'](next_ctx);
-        if (Browser.mainLoop.func) {
-            Browser.mainLoop.resume();
-        }
-    }
-    next_ctx = -1;
-    stackRestore(next_task_stack);
-    do_start_task(old_stack)
-}
-
-function maybe_schedule_next() {
-    assert(next_ctx != -1);
-    if (next_ctx == root_ctx() || next_ctx == 0) {
-        return;
-    }
-    schedule_next()
-}
diff --git a/src/jsvm-emscripten/task.js b/src/jsvm-emscripten/task.js
deleted file mode 100644
index ba695a5a40052..0000000000000
--- a/src/jsvm-emscripten/task.js
+++ /dev/null
@@ -1,15 +0,0 @@
-mergeInto(LibraryManager.library, {
-  jl_set_fiber: function(ctx) {
-    set_next_ctx(ctx, false);
-    return ctx_switch(0)
-  },
-  jl_swap_fiber: function(lastt_ctx, ctx) {
-    set_next_ctx(ctx, false);
-    return ctx_switch(lastt_ctx)
-  },
-  jl_start_fiber: function(lastt_ctx, ctx) {
-    set_next_ctx(ctx, true);
-    return ctx_switch(lastt_ctx)
-  }
-});
-
diff --git a/src/julia-parser.scm b/src/julia-parser.scm
index 210ba8f0ae07b..1a11494b5c8e3 100644
--- a/src/julia-parser.scm
+++ b/src/julia-parser.scm
@@ -10,7 +10,7 @@
 ;; comma - higher than assignment outside parentheses, lower when inside
 (define prec-pair (add-dots '(=>)))
 (define prec-conditional '(?))
-(define prec-arrow       (add-dots '(← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⥷ ⭄ ⥺ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ￩ ￫ ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <-->)))
+(define prec-arrow       (add-dots '(← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⥷ ⭄ ⥺ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ￩ ￫ ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <--> 🢲)))
 (define prec-lazy-or     (add-dots '(|\|\||)))
 (define prec-lazy-and    (add-dots '(&&)))
 (define prec-comparison
@@ -716,7 +716,7 @@
 
 ;; ";" at the top level produces a sequence of top level expressions
 (define (parse-stmts s)
-  (let ((ex (parse-Nary s (lambda (s) (parse-docstring s parse-eq))
+  (let ((ex (parse-Nary s (lambda (s) (parse-public s parse-eq))
                         '(#\;) 'toplevel (lambda (x) (eqv? x #\newline)) #f)))
     ;; check for unparsed junk after an expression
     (let ((t (peek-token s)))
@@ -1329,13 +1329,13 @@
 
 (define (valid-func-sig? paren sig)
   (and (pair? sig)
-       (or (eq? (car sig) 'call)
-           (eq? (car sig) 'tuple)
+       (or (memq (car sig) '(call tuple))
+           (and (not paren) (eq? (car sig) 'macrocall))
            (and paren (eq? (car sig) 'block))
            (and paren (eq? (car sig) '...))
            (and (eq? (car sig) '|::|)
                 (pair? (cadr sig))
-                (eq? (car (cadr sig)) 'call))
+                (memq (car (cadr sig)) '(call macrocall)))
            (and (eq? (car sig) 'where)
                 (valid-func-sig? paren (cadr sig))))))
 
@@ -1608,18 +1608,18 @@
        ((module baremodule)
         (let* ((name (parse-unary-prefix s))
                (loc  (line-number-node s))
-               (body (parse-block s (lambda (s) (parse-docstring s parse-eq)))))
+               (body (parse-block s (lambda (s) (parse-public s parse-eq)))))
           (if (reserved-word? name)
               (error (string "invalid module name \"" name "\"")))
           (expect-end s word)
           (list 'module (if (eq? word 'module) '(true) '(false)) name
                 `(block ,loc ,@(cdr body)))))
-       ((export)
+       ((export public)
         (let ((es (map macrocall-to-atsym
                        (parse-comma-separated s parse-unary-prefix))))
           (if (not (every symbol-or-interpolate? es))
-              (error "invalid \"export\" statement"))
-          `(export ,@es)))
+              (error (string "invalid \"" word "\" statement")))
+          `(,word ,@es)))
        ((import using)
         (parse-imports s word))
        ((do)
@@ -2610,15 +2610,23 @@
 
 (define (valid-modref? e)
   (and (length= e 3) (eq? (car e) '|.|) (pair? (caddr e))
-       (eq? (car (caddr e)) 'quote) (symbol? (cadr (caddr e)))
+       (or (eq? (car (caddr e)) 'quote)
+           (eq? (car (caddr e)) 'inert))
+       (symbol? (cadr (caddr e)))
        (or (symbol? (cadr e))
            (valid-modref? (cadr e)))))
 
 (define (macroify-name e . suffixes)
   (cond ((symbol? e) (symbol (apply string #\@ e suffixes)))
+        ((and (pair? e) (eq? (car e) 'quote))
+         `(quote ,(apply macroify-name (cadr e) suffixes)))
+        ((and (pair? e) (eq? (car e) 'inert))
+         `(inert ,(apply macroify-name (cadr e) suffixes)))
+        ((globalref? e)
+         `(globalref ,(cadr e) ,(apply macroify-name (caddr e) suffixes)))
         ((valid-modref? e)
          `(|.| ,(cadr e)
-               (quote ,(apply macroify-name (cadr (caddr e)) suffixes))))
+               ,(apply macroify-name (caddr e) suffixes)))
         (else (error (string "invalid macro usage \"@(" (deparse e) ")\"" )))))
 
 (define (macroify-call s call startloc)
@@ -2664,6 +2672,17 @@
            ;; string interpolation
            (eq? (car e) 'string))))
 
+(define (parse-public s production)
+  (if (eq? (peek-token s) 'public)
+      (let ((spc (ts:space? s)))
+        (take-token s)
+        (if (memv (peek-token s) '(#\( = #\[))
+            (begin ;; TODO: deprecation warning here
+                   (ts:put-back! s 'public spc)
+                   (parse-docstring s production))
+            (parse-resword s 'public)))
+      (parse-docstring s production)))
+
 (define (parse-docstring s production)
   (let ((startloc (line-number-node s)) ; be sure to use the line number from the head of the docstring
         (ex       (production s)))
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index cd11f46b1eb38..c9b1fd551e359 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -3,11 +3,16 @@
 
 ;; pass 1: syntax desugaring
 
-;; allow (:: T) => (:: #gensym T) in formal argument lists
+;; unnamed or all-underscore arguments may still be read from internally, so
+;; convert (:: T) => (:: #gensym T) and _ => #gensym in formal argument lists
 (define (fill-missing-argname a unused)
-  (if (and (pair? a) (eq? (car a) '|::|) (null? (cddr a)))
-      `(|::| ,(if unused UNUSED (gensy)) ,(cadr a))
-      a))
+  (define (replace-if-underscore u)
+    (if (underscore-symbol? u) (if unused UNUSED (gensy)) u))
+  (if (and (pair? a) (eq? (car a) '|::|))
+      (cond ((null? (cddr a))  `(|::| ,(if unused UNUSED (gensy)) ,(cadr a)))
+            ((null? (cdddr a)) `(|::| ,(replace-if-underscore (cadr a)) ,(caddr a)))
+            (else a))
+      (replace-if-underscore a)))
 (define (fix-arglist l (unused #t))
   (if (any vararg? (butlast l))
       (error "invalid \"...\" on non-final argument"))
@@ -165,10 +170,7 @@
 ;; GF method does not need to keep decl expressions on lambda args
 ;; except for rest arg
 (define (method-lambda-expr argl body rett)
-  (let ((argl (map (lambda (x)
-                     (let ((n (arg-name x)))
-                       (if (underscore-symbol? n) UNUSED n)))
-                   argl))
+  (let ((argl (map arg-name argl))
         (body (blockify body)))
     `(lambda ,argl ()
              (scope-block
@@ -183,6 +185,7 @@
                        (meta ret-type ,R)
                        ,@(list-tail body (+ 1 (length meta))))))))))
 
+
 ;; convert x<:T<:y etc. exprs into (name lower-bound upper-bound)
 ;; a bound is #f if not specified
 (define (analyze-typevar e)
@@ -193,10 +196,14 @@
   (cond ((atom? e) (list (check-sym e) #f #f))
         ((eq? (car e) 'var-bounds)  (cdr e))
         ((and (eq? (car e) 'comparison) (length= e 6))
-         (cons (check-sym (cadddr e))
-               (cond ((and (eq? (caddr e) '|<:|) (eq? (caddr (cddr e)) '|<:|))
-                      (list (cadr e) (last e)))
-                     (else (error "invalid bounds in \"where\"")))))
+         (let* ((lhs (list-ref e 1))
+                (rel (list-ref e 2))
+                (t (check-sym (list-ref e 3)))
+                (rel-same (eq? rel (list-ref e 4)))
+                (rhs (list-ref e 5)))
+           (cond ((and rel-same (eq? rel '|<:|)) (list t lhs rhs))
+                 ((and rel-same (eq? rel '|>:|)) (list t rhs lhs))
+                 (else (error "invalid bounds in \"where\"")))))
         ((eq? (car e) '|<:|)
          (list (check-sym (cadr e)) #f (caddr e)))
         ((eq? (car e) '|>:|)
@@ -225,13 +232,23 @@
                  (if lb (list lb ub) (list ub))
                  (if lb (list lb '(core Any)) '())))))
 
+(define (is-method? x)
+  (if (and (pair? x) (eq? (car x) 'method))
+      (let ((name (cadr x)))
+        (if (and (pair? name) (eq? (car name) 'globalref))
+            (let ((name (caddr name)))
+              (if (symbol? name)
+                  #t
+                  #f))
+            (if (symbol? name)
+                #t
+                #f)))
+      #f))
+
 (define (method-expr-name m)
   (let ((name (cadr m)))
-     (let ((name (if (or (length= m 2) (not (pair? name)) (not (quoted? name))) name (cadr name))))
-       (cond ((not (pair? name)) name)
-             ((eq? (car name) 'outerref) (cadr name))
-             ;((eq? (car name) 'globalref) (caddr name))
-             (else name)))))
+      (cond ((globalref? name) (caddr name))
+            (else name))))
 
 ;; extract static parameter names from a (method ...) expression
 (define (method-expr-static-parameters m)
@@ -248,8 +265,7 @@
 
 (define (nodot-sym-ref? e)
   (or (symbol? e)
-      (and (length= e 3) (eq? (car e) 'globalref))
-      (and (length= e 2) (eq? (car e) 'outerref))))
+      (and (length= e 3) (eq? (car e) 'globalref))))
 
 ;; expressions of the form a.b.c... where everything is a symbol
 (define (sym-ref? e)
@@ -296,7 +312,8 @@
                                   (if (eq? n '|#self#|) (gensy) n))
                                 arg-names))))
     (let ((body (insert-after-meta body  ;; don't specialize on generator arguments
-                                   `((meta nospecialize ,@arg-names)))))
+                                   ;; arg-names slots start at 2 (after name)
+                                   `((meta nospecialize ,@(map (lambda (idx) `(slot ,(+ idx 2))) (iota (length arg-names))))))))
       `(block
         (global ,name)
         (function (call ,name ,@arg-names) ,body)))))
@@ -359,7 +376,7 @@
                                    (append req opt vararg) rett)))))
    ;; no optional positional args
    (let* ((names (map car sparams))
-          (anames (map (lambda (x) (if (underscore-symbol? x) UNUSED x)) (llist-vars argl)))
+          (anames (llist-vars argl))
           (unused_anames (filter (lambda (x) (not (eq? x UNUSED))) anames))
           (ename (if (nodot-sym-ref? name) name
                     (if (overlay? name) (cadr name) `(null)))))
@@ -375,7 +392,7 @@
             (generator (if (expr-contains-p if-generated? body (lambda (x) (not (function-def? x))))
                            (let* ((gen    (generated-version body))
                                   (nongen (non-generated-version body))
-                                  (gname  (symbol (string (gensy) "#" (current-julia-module-counter))))
+                                  (gname  (symbol (string (gensy) "#" (current-julia-module-counter '()))))
                                   (gf     (make-generator-function gname names anames gen)))
                              (set! body (insert-after-meta
                                          nongen
@@ -423,10 +440,12 @@
                                  (inert ,loc)))
                           ,body))))
        (if (or (symbol? name) (globalref? name))
-           `(block ,@generator (method ,name) ,mdef (unnecessary ,name))  ;; return the function
-           (if (not (null? generator))
-               `(block ,@generator ,mdef)
-               mdef))))))
+           `(block ,@generator (method ,name) (latestworld-if-toplevel) ,mdef (unnecessary ,name))  ;; return the function
+           (if (overlay? name)
+             (if (not (null? generator))
+                `(block ,@generator ,mdef)
+                mdef)
+            `(block ,@generator ,mdef (null))))))))
 
 ;; wrap expr in nested scopes assigning names to vals
 (define (scopenest names vals expr)
@@ -508,31 +527,31 @@
                   sparams))
          (kw      (gensy))
          (kwdecl  `(|::| ,kw (core NamedTuple)))
-         (rkw     (if (null? restkw) (make-ssavalue) (symbol (string (car restkw) "..."))))
+         (rkw     (if (null? restkw) '() (symbol (string (car restkw) "..."))))
          (restkw  (map (lambda (v) `(|::| ,v (call (top pairs) (core NamedTuple)))) restkw))
          (mangled (let ((und (and name (undot-name name))))
                     (symbol (string (if (and name (= (string.char (string name) 0) #\#))
                                         ""
                                         "#")
                                     (or und '_) "#"
-                                    (string (current-julia-module-counter)))))))
+                                    (string (current-julia-module-counter '())))))))
       ;; this is a hack: nest these statements inside a call so they get closure
       ;; converted together, allowing all needed types to be defined before any methods.
       `(call (core ifelse) (false) (false) (block
         ;; forward-declare function so its type can occur in the signature of the inner method below
         ,@(if (or (symbol? name) (globalref? name)) `((method ,name)) '())
+        (latestworld-if-toplevel)
 
         ;; call with keyword args pre-sorted - original method code goes here
         ,(method-def-expr-
           mangled sparams
-          `((|::| ,mangled (call (core typeof) ,mangled)) ,@vars ,@restkw
-            ;; strip type off function self argument if not needed for a static param.
-            ;; then it is ok for cl-convert to move this definition above the original def.
-            ,@not-optional ,@vararg)
+          `((|::| ,mangled (call (core typeof) ,mangled)) ,@vars ,@restkw ,@not-optional ,@vararg)
           (insert-after-meta `(block
                                ,@stmts)
                              (cons `(meta nkw ,(+ (length vars) (length restkw)))
-                                   annotations))
+                                   (if (has-thisfunction? `(block ,@stmts))
+                                       (cons `(meta thisfunction-original ,(arg-name (car not-optional))) annotations)
+                                       annotations)))
           rett)
 
         ;; call with no keyword args
@@ -555,7 +574,7 @@
           name positional-sparams
           `((|::|
              ;; if there are optional positional args, we need to be able to reference the function name
-             ,(if (any kwarg? pargl) (gensy) UNUSED)
+             ,(if (any kwarg? `(,@pargl ,@vararg)) (gensy) UNUSED)
              (call (core kwftype) ,ftype)) ,kwdecl ,@pargl ,@vararg)
           `(block
             ;; propagate method metadata to keyword sorter
@@ -607,16 +626,18 @@
                                ,tempslot)))
                    vars vals)
               `(block
-                (= ,rkw (call (top pairs)
-                              ,(if (null? keynames)
-                                   kw
-                                   `(call (top structdiff) ,kw (curly (core NamedTuple)
-                                                                      (tuple ,@(map quotify keynames)))))))
-                ,@(if (null? restkw)
-                      `((if (call (top isempty) ,rkw)
+                ,(if (null? restkw)
+                      `(if (call (top isempty)
+                                 (call (top diff_names)
+                                       (call (top keys) ,kw)
+                                       (tuple ,@(map quotify keynames))))
                             (null)
-                            (call (top kwerr) ,kw ,@(map arg-name pargl) ,@splatted-vararg)))
-                      '())
+                            (call (top kwerr) ,kw ,@(map arg-name pargl) ,@splatted-vararg))
+                      `(= ,rkw (call (top pairs)
+                                     ,(if (null? keynames)
+                                          kw
+                                          `(call (top structdiff) ,kw (curly (core NamedTuple)
+                                                                             (tuple ,@(map quotify keynames))))))))
                 (return (call ,mangled  ;; finally, call the core function
                               ,@keyvars
                               ,@(if (null? restkw) '() (list rkw))
@@ -662,26 +683,28 @@
                       (vals   (list-tail dfl n))
                       (absent (list-tail opt n)) ;; absent arguments
                       (body
-                       (if (any (lambda (defaultv)
-                                  ;; does any default val expression...
-                                  (contains (lambda (e)
-                                              ;; contain "e" such that...
-                                              (any (lambda (a)
-                                                     ;; "e" is in an absent arg
-                                                     (contains (lambda (u)
-                                                                 (eq? u e))
-                                                               a))
-                                                   absent))
-                                            defaultv))
-                                vals)
-                           ;; then add only one next argument
-                           `(block
-                             ,@prologue
-                             (call ,(arg-name (car req)) ,@(map arg-name (cdr passed)) ,(car vals)))
-                           ;; otherwise add all
-                           `(block
-                             ,@prologue
-                             (call ,(arg-name (car req)) ,@(map arg-name (cdr passed)) ,@vals)))))
+                       (if (any vararg? (butlast vals))
+                           ;; Forbid splat in all but the final default value
+                           (error "invalid \"...\" in non-final positional argument default value")
+                           (if (any (lambda (defaultv)
+                                      ;; does any default val expression...
+                                      (expr-contains-p
+                                       (lambda (e)
+                                         ;; contain "e" such that...
+                                         (any (lambda (a)
+                                                ;; "e" is in an absent arg
+                                                (expr-contains-eq e a))
+                                              absent))
+                                       defaultv))
+                                    vals)
+                               ;; then add only one next argument
+                               `(block
+                                 ,@prologue
+                                 (call ,(arg-name (car req)) ,@(map arg-name (cdr passed)) ,(car vals)))
+                               ;; otherwise add all
+                               `(block
+                                 ,@prologue
+                                 (call ,(arg-name (car req)) ,@(map arg-name (cdr passed)) ,@vals))))))
                  (method-def-expr- name sp passed body)))
              (iota (length opt)))
       ,(method-def-expr- name sparams overall-argl body rett))))
@@ -737,64 +760,34 @@
    (params bounds) (sparam-name-bounds params)
    (struct-def-expr- name params bounds super (flatten-blocks fields) mut)))
 
-;; replace field names with gensyms if they conflict with field-types
-(define (safe-field-names field-names field-types)
-  (if (any (lambda (v) (contains (lambda (e) (eq? e v)) field-types))
-           field-names)
-      (map (lambda (x) (gensy)) field-names)
-      ;; use a different name for a field called `_`
-      (map (lambda (x) (if (eq? x '_) (gensy) x)) field-names)))
-
-(define (with-wheres call wheres)
-  (if (pair? wheres)
-      `(where ,call ,@wheres)
-      call))
-
-(define (default-inner-ctors name field-names field-types params bounds locs)
-  (let* ((field-names (safe-field-names field-names field-types))
-         (all-ctor (if (null? params)
-          ;; definition with exact types for all arguments
-          `(function (call ,name
-                          ,@(map make-decl field-names field-types))
-                    (block
-                     ,@locs
-                     (new (outerref ,name) ,@field-names)))
-          #f))
-         (any-ctor (if (or (not all-ctor) (any (lambda (t) (not (equal? t '(core Any))))
-                                 field-types))
-          ;; definition with Any for all arguments
-          ;; only if any field type is not Any, checked at runtime
-          `(function (call (|::| |#ctor-self#|
-                            ,(with-wheres
-                              `(curly (core Type) ,(if (pair? params)
-                                                       `(curly ,name ,@params)
-                                                       name))
-                              (map (lambda (b) (cons 'var-bounds b)) bounds)))
-                           ,@field-names)
-                     (block
-                      ,@locs
-                      (call new ,@field-names))) ; this will add convert calls later
-          #f)))
-    (if all-ctor
-        (if any-ctor
-            (list all-ctor
-                  `(if ,(foldl (lambda (t u)
-                           `(&& ,u (call (core ===) (core Any) ,t)))
-                         `(call (core ===) (core Any) ,(car field-types))
-                         (cdr field-types))
-                       '(block)
-                       ,any-ctor))
-            (list all-ctor))
-        (list any-ctor))))
-
-(define (default-outer-ctor name field-names field-types params bounds locs)
-  (let ((field-names (safe-field-names field-names field-types)))
-    `(function ,(with-wheres
-                 `(call ,name ,@(map make-decl field-names field-types))
-                 (map (lambda (b) (cons 'var-bounds b)) bounds))
-               (block
-                ,@locs
-                (new (curly ,name ,@params) ,@field-names)))))
+;; definition with Any for all arguments (except type, which is exact)
+;; field-kinds:
+;;   -1 no convert (e.g. because it is Any)
+;;    0 normal convert to fieldtype
+;;   1+ static_parameter N
+(define (default-inner-ctor-body field-kinds file line)
+  (let* ((name '|#ctor-self#|)
+         (field-names (map (lambda (idx) (symbol (string "_" (+ idx 1)))) (iota (length field-kinds))))
+         (field-convert (lambda (fld fty val)
+              (cond ((eq? fty -1) val)
+                    ((> fty 0) (convert-for-type-decl val `(static_parameter ,fty) #f #f))
+                    (else (convert-for-type-decl val `(call (core fieldtype) ,name ,(+ fld 1)) #f #f)))))
+         (field-vals (map field-convert (iota (length field-names)) field-kinds field-names))
+         (body `(block
+                 (line ,line ,file)
+                 (return (new ,name ,@field-vals)))))
+    `(lambda ,(cons name field-names) () (scope-block ,body))))
+
+;; definition with exact types for all arguments (except type, which is not parameterized)
+(define (default-outer-ctor-body thistype field-count sparam-count file line)
+  (let* ((name '|#ctor-self#|)
+         (field-names (map (lambda (idx) (symbol (string "_" (+ idx 1)))) (iota field-count)))
+         (sparams (map (lambda (idx) `(static_parameter ,(+ idx 1))) (iota sparam-count)))
+         (type (if (null? sparams) name `(curly ,thistype ,@sparams)))
+         (body `(block
+                 (line ,line ,file)
+                 (return (new ,type ,@field-names)))))
+    `(lambda ,(cons name field-names) () (scope-block ,body))))
 
 (define (num-non-varargs args)
   (count (lambda (a) (not (vararg? a))) args))
@@ -810,10 +803,10 @@
     (if (> nnv (length params))
         (error "too many type parameters specified in \"new{...}\"")))
   (let* ((Texpr (if (null? type-params)
-                    `(outerref ,Tname)
+                    `(globalref (thismodule) ,Tname)
                     (if selftype?
                         '|#ctor-self#|
-                        `(curly (outerref ,Tname)
+                        `(curly (globalref (thismodule) ,Tname)
                                 ,@type-params))))
          (tn (if (symbol? Texpr) Texpr (make-ssavalue)))
          (field-convert (lambda (fld fty val)
@@ -948,6 +941,19 @@
         (ctors-min-initialized (car expr))
         (ctors-min-initialized (cdr expr)))))
 
+(define (insert-struct-shim field-types name)
+  (map (lambda (x)
+      (expr-replace (lambda (y)
+                      (and (length= y 3) (eq? (car y) '|.|)
+                            (or (equal? (caddr y) `(quote ,name))
+                                (equal? (caddr y) `(inert ,name)))))
+                    x
+                    (lambda (y)
+                      `(call (core struct_name_shim)
+                              ,(cadr y) ,(caddr y)
+                              (thismodule) ,name))))
+        field-types))
+
 (define (struct-def-expr- name params bounds super fields0 mut)
   (receive
    (fields defs) (separate eventually-decl? fields0)
@@ -964,16 +970,15 @@
                          fields)))
           (attrs (reverse attrs))
           (defs        (filter (lambda (x) (not (or (effect-free? x) (eq? (car x) 'string)))) defs))
-          (locs        (if (and (pair? fields0) (linenum? (car fields0)))
-                           (list (car fields0))
-                           '()))
+          (loc         (if (and (pair? fields0) (linenum? (car fields0)))
+                           (car fields0)
+                           '(line 0 ||)))
           (field-names (map decl-var fields))
           (field-types (map decl-type fields))
-          (defs2 (if (null? defs)
-                     (default-inner-ctors name field-names field-types params bounds locs)
-                     defs))
           (min-initialized (min (ctors-min-initialized defs) (length fields)))
-          (prev (make-ssavalue)))
+          (hasprev (make-ssavalue))
+          (prev (make-ssavalue))
+          (newdef (make-ssavalue)))
      (let ((dups (has-dups field-names)))
        (if dups (error (string "duplicate field name: \"" (car dups) "\" is not unique"))))
      (for-each (lambda (v)
@@ -981,73 +986,58 @@
                      (error (string "field name \"" (deparse v) "\" is not a symbol"))))
                field-names)
      `(block
-       (global ,name) (const ,name)
+       (global ,name)
        (scope-block
         (block
          (hardscope)
          (local-def ,name)
          ,@(map (lambda (v) `(local ,v)) params)
          ,@(map (lambda (n v) (make-assignment n (bounds-to-TypeVar v #t))) params bounds)
-         (toplevel-only struct (outerref ,name))
+         (toplevel-only struct (globalref (thismodule) ,name))
          (= ,name (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@params)
                         (call (core svec) ,@(map quotify field-names))
                         (call (core svec) ,@attrs)
                         ,mut ,min-initialized))
          (call (core _setsuper!) ,name ,super)
-         (if (isdefined (outerref ,name))
-             (block
-              (= ,prev (outerref ,name))
-              (if (call (core _equiv_typedef) ,prev ,name)
-                  ;; if this is compatible with an old definition, use the existing type object
-                  ;; and its parameters
-                  (block (= ,name ,prev)
-                         ,@(if (pair? params)
-                               `((= (tuple ,@params) (|.|
-                                                      ,(foldl (lambda (_ x) `(|.| ,x (quote body)))
-                                                              prev
-                                                              params)
-                                                      (quote parameters))))
-                               '()))
-                  ;; otherwise do an assignment to trigger an error
-                  (= (outerref ,name) ,name)))
-             (= (outerref ,name) ,name))
-         (call (core _typebody!) ,name (call (core svec) ,@field-types))
+         (= ,hasprev (&& (call (core isdefinedglobal) (thismodule) (inert ,name) (false)) (call (core _equiv_typedef) (globalref (thismodule) ,name) ,name)))
+         (= ,prev (if ,hasprev (globalref (thismodule) ,name) (false)))
+         (if ,hasprev
+            ;; if this is compatible with an old definition, use the old parameters, but the
+            ;; new object. This will fail to capture recursive cases, but the call to typebody!
+            ;; below is permitted to choose either type definition to put into the binding table
+            (block ,@(if (pair? params)
+                          `((= (tuple ,@params) (|.|
+                                                ,(foldl (lambda (_ x) `(|.| ,x (quote body)))
+                                                        prev
+                                                        params)
+                                                (quote parameters))))
+                          '())))
+         (= ,newdef (call (core _typebody!) ,prev ,name (call (core svec) ,@(insert-struct-shim field-types name))))
+         (const (globalref (thismodule) ,name) ,newdef)
+         (latestworld)
          (null)))
-       ;; "inner" constructors
-       (scope-block
-        (block
-         (hardscope)
-         (global ,name)
-         ,@(map (lambda (c)
-                  (rewrite-ctor c name params field-names field-types))
-                defs2)))
-       ;; "outer" constructors
-       ,@(if (and (null? defs)
-                  (not (null? params))
-                  ;; To generate an outer constructor, each parameter must occur in a field
-                  ;; type, or in the bounds of a subsequent parameter.
-                  ;; Otherwise the constructor would not work, since the parameter values
-                  ;; would never be specified.
-                  (let loop ((root-types field-types)
-                             (sp         (reverse bounds)))
-                    (or (null? sp)
-                        (let ((p (car sp)))
-                          (and (expr-contains-eq (car p) (cons 'list root-types))
-                               (loop (append (cdr p) root-types)
-                                     (cdr sp)))))))
-             `((scope-block
-                (block
-                 (global ,name)
-                 ,(default-outer-ctor name field-names field-types
-                    params bounds locs))))
-             '())
+       ;; Always define ctors even if we didn't change the definition.
+       ;; If newdef===prev, then this is a bit suspect, since we don't know what might be
+       ;; changing about the old ctor definitions (we don't even track whether we're
+       ;; replacing defaultctors with identical ones). But it seems better to have the ctors
+       ;; added alongside (replacing) the old ones, than to not have them and need them.
+       ;; Commonly Revise.jl should be used to figure out actually which methods should
+       ;; actually be deleted or added anew.
+       ,(if (null? defs)
+          `(call (core _defaultctors) ,newdef (inert ,loc))
+          `(scope-block
+            (block
+             (hardscope)
+             (global ,name)
+             ,@(map (lambda (c) (rewrite-ctor c name params field-names field-types)) defs))))
+       (latestworld)
        (null)))))
 
 (define (abstract-type-def-expr name params super)
   (receive
    (params bounds) (sparam-name-bounds params)
    `(block
-     (global ,name) (const ,name)
+     (global ,name)
      (scope-block
       (block
        (local-def ,name)
@@ -1056,18 +1046,19 @@
        (toplevel-only abstract_type)
        (= ,name (call (core _abstracttype) (thismodule) (inert ,name) (call (core svec) ,@params)))
        (call (core _setsuper!) ,name ,super)
-       (call (core _typebody!) ,name)
-       (if (&& (isdefined (outerref ,name))
-               (call (core _equiv_typedef) (outerref ,name) ,name))
+       (call (core _typebody!) (false) ,name)
+       (if (&& (call (core isdefinedglobal) (thismodule) (inert ,name) (false))
+               (call (core _equiv_typedef) (globalref (thismodule) ,name) ,name))
            (null)
-           (= (outerref ,name) ,name))
+           (const (globalref (thismodule) ,name) ,name))
+       (latestworld)
        (null))))))
 
 (define (primitive-type-def-expr n name params super)
   (receive
    (params bounds) (sparam-name-bounds params)
    `(block
-     (global ,name) (const ,name)
+     (global ,name)
      (scope-block
       (block
        (local-def ,name)
@@ -1076,11 +1067,12 @@
        (toplevel-only primitive_type)
        (= ,name (call (core _primitivetype) (thismodule) (inert ,name) (call (core svec) ,@params) ,n))
        (call (core _setsuper!) ,name ,super)
-       (call (core _typebody!) ,name)
-       (if (&& (isdefined (outerref ,name))
-               (call (core _equiv_typedef) (outerref ,name) ,name))
+       (call (core _typebody!) (false) ,name)
+       (if (&& (call (core isdefinedglobal) (thismodule) (inert ,name) (false))
+               (call (core _equiv_typedef) (globalref (thismodule) ,name) ,name))
            (null)
-           (= (outerref ,name) ,name))
+           (const (globalref (thismodule) ,name) ,name))
+       (latestworld)
        (null))))))
 
 ;; take apart a type signature, e.g. T{X} <: S{Y}
@@ -1095,6 +1087,15 @@
                        (values name params super)) ex)
       (error "invalid type signature")))
 
+;; normalize ccall first argument to tuple form with basic error checking
+(define (normalize-ccall-name raw-name)
+  (cond
+   ;; Already a tuple - keep as-is, validation will happen in C
+   ((tuple-syntax? raw-name)
+    raw-name)
+   ;; Otherwise it is an atom or pointer expression, which will be validated later in C
+   (else (expand-forms raw-name))))
+
 ;; insert calls to convert() in ccall, and pull out expressions that might
 ;; need to be rooted before conversion.
 (define (lower-ccall name RT atypes args cconv nreq)
@@ -1114,7 +1115,7 @@
       (if (null? A)
           `(block
             ,.(reverse! stmts)
-            (foreigncall ,(expand-forms name) ,(expand-forms RT) (call (core svec) ,@(reverse! T))
+            (foreigncall ,(normalize-ccall-name name) ,(expand-forms RT) (call (core svec) ,@(reverse! T))
                          ;; 0 or number of arguments before ... in definition
                          ,(or nreq
                               (if isseq (- (length atypes) 1) 0))
@@ -1158,7 +1159,7 @@
         (error (string "invalid argument destructuring syntax \"" (deparse a) "\""))
         a))
   (define (transform-arg a)
-    (cond ((and (pair? a) (eq? (car a) 'tuple))
+    (cond ((tuple-syntax? a)
            (let ((a2 (gensy)))
              (cons a2 `(local (= ,(check-lhs a) ,a2)))))
           ((or (and (decl? a) (length= a 3)) (kwarg? a))
@@ -1195,7 +1196,9 @@
     (cond ((and (length= e 2) (or (symbol? name) (globalref? name)))
            (if (not (valid-name? name))
                (error (string "invalid function name \"" name "\"")))
-           `(method ,name))
+           (if (globalref? name)
+             `(block (global ,name) (method ,name))
+             `(block (global-if-global ,name) (method ,name))))
           ((not (pair? name))  e)
           ((eq? (car name) 'call)
            (let* ((raw-typevars (or where '()))
@@ -1249,7 +1252,7 @@
                    (list a)))
          ;; TODO: always use a specific special name like #anon# or _, then ignore
          ;; this as a local variable name.
-         (name (symbol (string "#" (current-julia-module-counter)))))
+         (name (symbol (string "#" (current-julia-module-counter '())))))
     (expand-forms
      `(block (local ,name)
              (function
@@ -1327,8 +1330,7 @@
                                           (= ,vname ,tmp)
                                           ,blk)))))))
                ;; (a, b, c, ...) = rhs
-               ((and (pair? (cadar binds))
-                     (eq? (caadar binds) 'tuple))
+               ((tuple-syntax? (cadar binds))
                 (let ((vars (lhs-vars (cadar binds))))
                   (loop (cdr binds)
                         (let ((tmp (make-ssavalue)))
@@ -1341,15 +1343,18 @@
                (else (error "invalid let syntax"))))
              (else (error "invalid let syntax")))))))))
 
+(define (valid-macro-def-name? e)
+  (or (symbol? e) (valid-modref? e) (globalref? e)))
+
 (define (expand-macro-def e)
   (cond ((and (pair? (cadr e))
               (eq? (car (cadr e)) 'call)
-              (symbol? (cadr (cadr e))))
+              (valid-macro-def-name? (cadr (cadr e))))
          (let ((anames (remove-empty-parameters (cddr (cadr e)))))
            (if (has-parameters? anames)
                (error "macros cannot accept keyword arguments"))
            (expand-forms
-            `(function (call ,(symbol (string #\@ (cadr (cadr e))))
+            `(function (call ,(macroify-name (cadr (cadr e)))
                              (|::| __source__ (core LineNumberNode))
                              (|::| __module__ (core Module))
                              ,@(map (lambda (v)
@@ -1358,8 +1363,8 @@
                                           v))
                                     anames))
                        ,@(cddr e)))))
-        ((and (length= e 2) (symbol? (cadr e)))
-         (expand-forms `(function ,(symbol (string #\@ (cadr e))))))
+        ((and (length= e 2) (valid-macro-def-name? (cadr e)))
+         (expand-forms `(function ,(macroify-name (cadr e)))))
         (else
          (error "invalid macro definition"))))
 
@@ -1420,7 +1425,7 @@
                 (scope-block ,finalb)))))
           ((length> e 3)
            (and (length> e 6) (error "invalid \"try\" form"))
-           (let ((elseb (if (length= e 6) (cdddddr e) '())))
+           (let ((elseb (if (length= e 6) `((scope-block ,@(cdddddr e))) '())))
              (expand-forms
                `(,(if (null? elseb) 'trycatch 'trycatchelse)
                  (scope-block ,tryb)
@@ -1434,42 +1439,59 @@
           (else
            (error "invalid \"try\" form")))))
 
-(define (expand-unionall-def name type-ex)
+(define (expand-unionall-def name type-ex (const? #t))
   (if (and (pair? name)
            (eq? (car name) 'curly))
       (let ((name   (cadr name))
-            (params (cddr name)))
+            (params (cddr name))
+            (rr     (make-ssavalue)))
         (if (null? params)
             (error (string "empty type parameter list in \"" (deparse `(= (curly ,name) ,type-ex)) "\"")))
-        `(block
-          (const-if-global ,name)
-          ,(expand-forms
-            `(= ,name (where ,type-ex ,@params)))))
+          (expand-forms
+            `(block
+              (= ,rr (where ,type-ex ,@params))
+              (,(if const? 'const 'assign-const-if-global) ,name ,rr)
+              (latestworld-if-toplevel)
+              ,rr)))
       (expand-forms
        `(const (= ,name ,type-ex)))))
 
-;; take apart e.g. `const a::Int = 0` into `const a; a::Int = 0`
+(define (filter-not-underscore syms)
+  (filter (lambda (x) (not (underscore-symbol? x))) syms))
+
+;; Expand `[global] const a::T = val`
 (define (expand-const-decl e)
-  (let ((arg (cadr e)))
-    (if (atom? arg)
-        e
-        (case (car arg)
-          ((global local local-def)
-           (for-each (lambda (b) (if (not (assignment? b))
-                                     (error "expected assignment after \"const\"")))
-                     (cdr arg))
-           (expand-forms (expand-decls (car arg) (cdr arg) #t)))
-          ((= |::|)
-           (expand-forms (expand-decls 'const (cdr e) #f)))
-          (else (error "expected assignment after \"const\""))))))
+  (define (check-assignment asgn)
+    (unless (and (pair? asgn) (eq? (car asgn) '=))
+      ;; (const (global x)) is possible due to a parser quirk
+      (error "expected assignment after \"const\"")))
+  (if (length= e 3)
+      `(const ,(cadr e) ,(expand-forms (caddr e)))
+      (let ((arg (cadr e)))
+        (cond
+         ((symbol? arg)
+          ;; Undefined constant: Expr(:const, :a) (not available in surface syntax)
+          `(block ,e (latestworld)))
+         ((eq? (car arg) 'global)
+          (let ((asgn (cadr arg)))
+            (check-assignment asgn)
+            `(block
+              ,.(map (lambda (v) `(global ,v))
+                     (lhs-bound-names (cadr asgn)))
+              ,(expand-assignment asgn #t))))
+         ((eq? (car arg) '=)
+          (check-assignment arg)
+          (expand-assignment arg #t))
+         (else
+          (error "expected assignment after \"const\""))))))
 
 (define (expand-atomic-decl e)
   (error "unimplemented or unsupported atomic declaration"))
 
 (define (expand-local-or-global-decl e)
-  (if (and (symbol? (cadr e)) (length= e 2))
+  (if (and (or (symbol? (cadr e)) (globalref? (cadr e))) (length= e 2))
       e
-      (expand-forms (expand-decls (car e) (cdr e) #f))))
+      (expand-forms (expand-decls (car e) (cdr e)))))
 
 ;; given a complex assignment LHS, return the symbol that will ultimately be assigned to
 (define (assigned-name e)
@@ -1479,37 +1501,185 @@
          (assigned-name (cadr e)))
         (else e)))
 
-;; local x, y=2, z => local x;local y;local z;y = 2
-(define (expand-decls what binds const?)
+;; local x, (y=2), z => local x;local y;local z;y = 2
+(define (expand-decls what binds)
   (if (not (list? binds))
       (error (string "invalid \"" what "\" declaration")))
   (let loop ((b       binds)
-             (vars    '())
+             (decls   '())
              (assigns '()))
     (if (null? b)
         `(block
-          ,.(if const?
-                (map (lambda (x) `(const ,x)) vars)
-                '())
-          ,.(map (lambda (x) `(,what ,x)) vars)
-          ,.(reverse assigns))
+          ,.(reverse decls)
+          ,.(reverse assigns)
+          ,.(if (null? assigns) `((null)) '()))
         (let ((x (car b)))
           (cond ((or (assignment-like? x) (function-def? x))
-                 (loop (cdr b)
-                       (append (lhs-decls (assigned-name (cadr x))) vars)
+                 (let ((new-vars (lhs-decls (assigned-name (cadr x)))))
+                  (loop (cdr b)
+                       (append (map (lambda (x) `(,what ,x)) new-vars) decls)
                        (cons `(,(car x) ,(all-decl-vars (cadr x)) ,(caddr x))
-                             assigns)))
+                             assigns))))
                 ((and (pair? x) (eq? (car x) '|::|))
                  (loop (cdr b)
-                       (cons (decl-var x) vars)
-                       (cons `(decl ,@(cdr x)) assigns)))
-                ((symbol? x)
-                 (loop (cdr b) (cons x vars) assigns))
+                       (cons `(decl ,@(cdr x)) (cons `(,what ,(decl-var x)) decls))
+                       assigns))
+                ((or (symbol? x) (globalref? x))
+                  ;; TODO: consider removing support for Expr(:global,
+                  ;; GlobalRef(...))  and other Exprs that cannot be produced by
+                  ;; the parser (tested by test/precompile.jl #50538).
+                 (loop (cdr b) (cons `(,what, x) decls) assigns))
                 (else
                  (error (string "invalid syntax in \"" what "\" declaration"))))))))
 
+(define (expand-assignment e (const? #f))
+  (define lhs (cadr e))
+  (define (function-lhs? lhs)
+    (and (pair? lhs)
+         (or (eq? (car lhs) 'call)
+             (eq? (car lhs) 'where)
+             (and (eq? (car lhs) '|::|)
+                  (pair? (cadr lhs))
+                  (eq? (car (cadr lhs)) 'call)))))
+  (define (assignment-to-function lhs e)  ;; convert '= expr to 'function expr
+    (cons 'function (cdr e)))
+  (define (maybe-wrap-const x)
+    (if const? `(const ,x) x))
+  (cond
+   ((function-lhs? lhs)
+    ;; `const f() = ...` - The `const` here is inoperative, but the syntax
+    ;; happened to work in earlier versions, so simply strip `const`.
+    (expand-forms (assignment-to-function lhs e)))
+   ((and (pair? lhs)
+         (eq? (car lhs) 'curly))
+    (expand-unionall-def (cadr e) (caddr e) const?))
+   ((assignment? (caddr e))
+    ;; chain of assignments - convert a=b=c to `b=c; a=c`
+    (let loop ((lhss (list lhs))
+               (rhs  (caddr e)))
+      (if (and (assignment? rhs) (not (function-lhs? (cadr rhs))))
+          (loop (cons (cadr rhs) lhss) (caddr rhs))
+          (let* ((rr (if (symbol-like? rhs) rhs (make-ssavalue)))
+                 (lhss (reverse lhss))
+                 (lhs0 (car lhss))
+                 (lhss (cdr lhss))
+                 (lhss (reverse lhss)))
+            (expand-forms
+             `(block ,.(if (eq? rr rhs) '() `((= ,rr ,(if (assignment? rhs)
+                                                          (assignment-to-function (cadr rhs) rhs)
+                                                          rhs))))
+                     ,@(map (lambda (l) `(= ,l ,rr)) lhss)
+                     ;; In const x = y = z, only x becomes const
+                     ,(maybe-wrap-const `(= ,lhs0 ,rr))
+                     (unnecessary ,rr)))))))
+   ((or (and (symbol-like? lhs) (valid-name? lhs))
+        (globalref? lhs))
+    ;; TODO: We currently call (latestworld) after every (const _ _), but this
+    ;; may need to be moved elsewhere if we want to avoid making one const
+    ;; visible before side effects have been performed (#57484)
+    (if const?
+        (let ((rr (make-ssavalue)))
+          `(block
+            ,(sink-assignment rr (expand-forms (caddr e)))
+            (const ,lhs ,rr)
+            (latestworld)
+            (unnecessary ,rr)))
+        (sink-assignment lhs (expand-forms (caddr e)))))
+   ((atom? lhs)
+    (error (string "invalid assignment location \"" (deparse lhs) "\"")))
+   (else
+    (case (car lhs)
+      ((|.|)
+       ;; a.b =
+       (when const?
+         (error (string "cannot declare \"" (deparse lhs) "\" `const`")))
+       (let* ((a   (cadr lhs))
+              (b  (caddr lhs))
+              (rhs (caddr e)))
+         (if (and (length= b 2) (eq? (car b) 'tuple))
+             (error (string "invalid syntax \""
+                            (string (deparse a) ".(" (deparse (cadr b)) ") = ...") "\"")))
+         (let ((aa (if (symbol-like? a) a (make-ssavalue)))
+               (bb (if (or (atom? b) (symbol-like? b) (and (pair? b) (quoted? b)))
+                       b (make-ssavalue)))
+               (rr (if (or (symbol-like? rhs) (atom? rhs)) rhs (make-ssavalue))))
+           `(block
+             ,.(if (eq? aa a)   '() (list (sink-assignment aa (expand-forms a))))
+             ,.(if (eq? bb b)   '() (list (sink-assignment bb (expand-forms b))))
+             ,.(if (eq? rr rhs) '() (list (sink-assignment rr (expand-forms rhs))))
+             (call (top setproperty!) ,aa ,bb ,rr)
+             (unnecessary ,rr)))))
+      ((tuple)
+       (let ((lhss (cdr lhs))
+             (x    (caddr e)))
+         (if (has-parameters? lhss)
+             ;; property destructuring
+             (expand-property-destruct lhss x maybe-wrap-const)
+             ;; multiple assignment
+             (expand-tuple-destruct lhss x maybe-wrap-const))))
+      ((typed_hcat)
+       (error "invalid spacing in left side of indexed assignment"))
+      ((typed_vcat typed_ncat)
+       (error "unexpected \";\" in left side of indexed assignment"))
+      ((ref)
+       ;; (= (ref a . idxs) rhs)
+       (when const?
+         (error (string "cannot declare \"" (deparse lhs) "\" `const`")))
+       (let ((a    (cadr lhs))
+             (idxs (cddr lhs))
+             (rhs  (caddr e)))
+         (let* ((reuse (and (pair? a)
+                            (contains (lambda (x) (eq? x 'end))
+                                      idxs)))
+                (arr   (if reuse (make-ssavalue) a))
+                (stmts (if reuse `((= ,arr ,(expand-forms a))) '()))
+                (rrhs (and (pair? rhs) (not (ssavalue? rhs)) (not (quoted? rhs))))
+                (r    (if rrhs (make-ssavalue) rhs))
+                (rini (if rrhs (list (sink-assignment r (expand-forms rhs))) '())))
+           (receive
+               (new-idxs stuff) (process-indices arr idxs)
+             `(block
+               ,@stmts
+               ,.(map expand-forms stuff)
+               ,@rini
+               ,(expand-forms
+                 `(call (top setindex!) ,arr ,r ,@new-idxs))
+               (unnecessary ,r))))))
+      ((|::|)
+       ;; (= (|::| T) rhs) is an error
+       (if (null? (cddr lhs))
+           (error (string "invalid assignment location \"" (deparse lhs) "\"")))
+       ;; (= (|::| x T) rhs)
+       (let ((x (cadr lhs))
+             (T (caddr lhs))
+             (rhs (caddr e)))
+         (let ((e (remove-argument-side-effects x)))
+           (if const?
+               ;; This could go through convert-assignment in the closure
+               ;; conversion pass, but since constants don't have declared types
+               ;; the way other variables do, we insert convert() here.
+               (expand-forms
+                ;; TODO: This behaviour (`const _:T = ...` does not call convert,
+                ;; but still evaluates RHS) should be documented.
+                `(const (= ,(car e) ,(if (underscore-symbol? (car e))
+                                         rhs
+                                         (convert-for-type-decl rhs T #t #f)))))
+               (expand-forms
+                `(block ,@(cdr e)
+                        ;; TODO: When x is a complex expression, this acts as a
+                        ;; typeassert rather than a declaration.
+                        ,.(if (underscore-symbol? (car e))
+                              '() ; Assignment to _ will ultimately be discarded---don't declare anything
+                              `((decl ,(car e) ,T)))
+                        ,(maybe-wrap-const `(= ,(car e) ,rhs))))))))
+      ((vcat ncat)
+       ;; (= (vcat . args) rhs)
+       (error "use \"(a, b) = ...\" to assign multiple values"))
+      (else
+       (error (string "invalid assignment location \"" (deparse lhs) "\"")))))))
+
 ;; convert (lhss...) = (tuple ...) to assignments, eliminating the tuple
-(define (tuple-to-assignments lhss0 x)
+(define (tuple-to-assignments lhss0 x wrap)
   (let loop ((lhss lhss0)
              (assigned lhss0)
              (rhss (cdr x))
@@ -1531,7 +1701,7 @@
                  (loop (cdr lhss)
                        (cons L assigned)
                        (cdr rhss)
-                       (cons (make-assignment L R) stmts)
+                       (cons (wrap (make-assignment L R)) stmts)
                        after
                        (cons R elts)))
                 ((vararg? L)
@@ -1542,7 +1712,7 @@
                        `(block ,@(reverse stmts)
                                (= ,temp (tuple ,@rhss))
                                ,@(reverse after)
-                               (= ,(cadr L) ,temp)
+                               ,(wrap `(= ,(cadr L) ,temp))
                                (unnecessary (tuple ,@(reverse elts) (... ,temp)))))
                      (let ((lhss- (reverse lhss))
                            (rhss- (reverse rhss))
@@ -1574,13 +1744,13 @@
                               (assigns (if (and (length= lhss- 1) (vararg? (car lhss-)))
                                            (begin
                                              (set-car! end
-                                                       (cons `(= ,(cadar lhss-) ,temp) (car end)))
+                                                       (cons (wrap `(= ,(cadar lhss-) ,temp)) (car end)))
                                              assigns)
                                            (append (if (> n 0)
                                                        `(,@assigns (local ,st))
                                                        assigns)
                                                    (destructure- 1 (reverse lhss-) temp
-                                                                 n st end)))))
+                                                                 n st end wrap)))))
                          (loop lhs-tail
                                (append (map (lambda (x) (if (vararg? x) (cadr x) x)) lhss-) assigned)
                                rhs-tail
@@ -1593,7 +1763,7 @@
                    `(block ,@(reverse stmts)
                            ,(make-assignment temp (cadr R))
                            ,@(reverse after)
-                           (= (tuple ,@lhss) ,temp)
+                           ,(wrap `(= (tuple ,@lhss) ,temp))
                            (unnecessary (tuple ,@(reverse elts) (... ,temp))))))
                 (else
                  (let ((temp (if (eventually-call? L) (gensy) (make-ssavalue))))
@@ -1603,11 +1773,11 @@
                          (if (symbol? temp)
                              (list* (make-assignment temp R) `(local-def ,temp) stmts)
                              (cons  (make-assignment temp R) stmts))
-                         (cons (make-assignment L temp) after)
+                         (cons (wrap (make-assignment L temp)) after)
                          (cons temp elts)))))))))
 
 ;; convert (lhss...) = x to tuple indexing
-(define (lower-tuple-assignment lhss x)
+(define (lower-tuple-assignment lhss x (wrap (lambda (x i) x)))
   (let ((t (make-ssavalue)))
     `(block
       (= ,t ,x)
@@ -1622,9 +1792,10 @@
                           `(block
                             (local-def ,temp)
                             (= ,temp (call (core getfield) ,t ,i))
-                            (= ,(car lhs) ,temp)))
-                        `(= ,(car lhs)
-                            (call (core getfield) ,t ,i)))
+                            ,(wrap `(= ,(car lhs) ,temp) i)))
+                        (wrap
+                          `(= ,(car lhs)
+                            (call (core getfield) ,t ,i)) i))
                     (loop (cdr lhs)
                           (+ i 1)))))
       ,t)))
@@ -1638,17 +1809,21 @@
       (cons e '())
       (let ((a '()))
         (define (arg-to-temp x)
-          (cond ((effect-free? x)  x)
-                ((or (eq? (car x) '...) (eq? (car x) '&))
-                 `(,(car x) ,(arg-to-temp (cadr x))))
+          (cond ((effect-free? x) x)
+                ((eq? (car x) '...)
+                 `(... ,(arg-to-temp (cadr x))))
                 ((eq? (car x) 'kw)
-                 `(,(car x) ,(cadr x) ,(arg-to-temp (caddr x))))
+                 `(kw ,(cadr x) ,(arg-to-temp (caddr x))))
+                ((eq? (car x) 'parameters)
+                 `(parameters ,@(map arg-to-temp (cdr x))))
                 (else
                  (let ((g (make-ssavalue)))
                    (begin (set! a (cons `(= ,g ,x) a))
                           g)))))
-        (cons (cons (car e) (map arg-to-temp (cdr e)))
-              (reverse a)))))
+        (if (eq? (car e) 'let)
+          (cons (arg-to-temp e) (reverse a))
+          (cons (cons (car e) (map arg-to-temp (cdr e)))
+                (reverse a))))))
 
 (define (lower-kw-call f args)
   (let* ((para (if (has-parameters? args) (cdar args) '()))
@@ -1696,7 +1871,7 @@
                      (cons temp (append (cdr e) (list `(= ,temp ,newlhs))))
                      e))
          (newlhs (or temp newlhs)))
-    (if (and (pair? lhs) (eq? (car lhs) 'tuple))
+    (if (tuple-syntax? lhs)
         (let loop ((a (cdr newlhs))
                    (b (cdr lhs)))
           (if (pair? a)
@@ -1741,7 +1916,7 @@
                    ,(expand-update-operator op op= (car e) rhs T))))
         (else
          (if (and (pair? lhs) (eq? op= '=)
-                  (not (memq (car lhs) '(|.| tuple vcat ncat typed_hcat typed_vcat typed_ncat))))
+                  (not (memq (car lhs) '(|.| globalref tuple vcat ncat typed_hcat typed_vcat typed_ncat))))
              (error (string "invalid assignment location \"" (deparse lhs) "\"")))
          (expand-update-operator- op op= lhs rhs declT))))
 
@@ -1785,7 +1960,7 @@
   (let ((copied-vars  ;; variables not declared `outer` are copied in the innermost loop
          ;; TODO: maybe filter these to remove vars not assigned in the loop
          (delete-duplicates
-          (filter (lambda (x) (not (underscore-symbol? x)))
+          (filter-not-underscore
                   (apply append
                          (map lhs-vars
                               (filter (lambda (x) (not (outer? x))) (butlast lhss))))))))
@@ -1859,8 +2034,7 @@
                         ((and flat (pair? expr) (eq? (car expr) 'flatten))
                          (expand-generator (cadr expr) #t (delete-duplicates (append outervars myvars))))
                         ((pair? outervars)
-                         `(let (block ,@(map (lambda (v) `(= ,v ,v)) (filter (lambda (x) (not (underscore-symbol? x)))
-                                                                             outervars)))
+                         `(let (block ,@(map (lambda (v) `(= ,v ,v)) (filter-not-underscore outervars)))
                             ,expr))
                         (else expr))))
         `(-> ,argname (block ,@splat ,expr)))))))
@@ -2241,7 +2415,7 @@
          (gensy))
         (else (make-ssavalue))))
 
-(define (expand-property-destruct lhs x)
+(define (expand-property-destruct lhs x (wrap identity))
   (if (not (length= lhs 1))
       (error (string "invalid assignment location \"" (deparse `(tuple ,lhs)) "\"")))
   (let* ((lhss (cdar lhs))
@@ -2256,7 +2430,7 @@
                                 (cadr field))
                                (else
                                 (error (string "invalid assignment location \"" (deparse `(tuple ,lhs)) "\""))))))
-               (expand-forms `(= ,field (call (top getproperty) ,xx (quote ,prop))))))
+               (expand-forms (wrap `(= ,field (call (top getproperty) ,xx (quote ,prop)))))))
            lhss)
        (unnecessary ,xx))))
 
@@ -2270,7 +2444,10 @@
 ;; `end`:  car collects statements to be executed afterwards.
 ;;         In general, actual assignments should only happen after
 ;;         the whole iterator is desctructured (https://github.com/JuliaLang/julia/issues/40574)
-(define (destructure- i lhss xx n st end)
+;;
+;; The `wrap` argument is a callback that will be called on all assignments to
+;; symbols `lhss`, e.g. to insert a `const` declaration.
+(define (destructure- i lhss xx n st end wrap)
   (if (null? lhss)
       '()
       (let* ((lhs  (car lhss))
@@ -2285,35 +2462,38 @@
                                              (make-ssavalue))))))
                          ;; can't use ssavalues if it's a function definition
                          ((eventually-call? lhs) (gensy))
-                         (else (make-ssavalue)))))
+                         (else (make-ssavalue))))
+             ;; If we use an intermediary lhs, don't wrap `const`.
+             (wrap-subassign (if (eq? lhs lhs-) wrap identity))
+             (wrapfirst (lambda (x i) (if (= i 1) (wrap-subassign x) x))))
         (if (and (vararg? lhs) (any vararg? (cdr lhss)))
             (error "multiple \"...\" on lhs of assignment"))
         (if (not (eq? lhs lhs-))
             (if (vararg? lhs)
-                (set-car! end (cons (expand-forms `(= ,(cadr lhs) ,(cadr lhs-))) (car end)))
-                (set-car! end (cons (expand-forms `(= ,lhs ,lhs-)) (car end)))))
+                (set-car! end (cons (expand-forms (wrap `(= ,(cadr lhs) ,(cadr lhs-)))) (car end)))
+                (set-car! end (cons (expand-forms (wrap `(= ,lhs ,lhs-))) (car end)))))
         (if (vararg? lhs-)
             (if (= i n)
                 (if (underscore-symbol? (cadr lhs-))
                     '()
                     (list (expand-forms
-                            `(= ,(cadr lhs-) (call (top rest) ,xx ,@(if (eq? i 1) '() `(,st)))))))
+                            (wrap-subassign `(= ,(cadr lhs-) (call (top rest) ,xx ,@(if (eq? i 1) '() `(,st))))))))
                 (let ((tail (if (eventually-call? lhs) (gensy) (make-ssavalue))))
                   (cons (expand-forms
                           (lower-tuple-assignment
                             (list (cadr lhs-) tail)
-                            `(call (top split_rest) ,xx ,(- n i) ,@(if (eq? i 1) '() `(,st)))))
-                        (destructure- 1 (cdr lhss) tail (- n i) st end))))
+                            `(call (top split_rest) ,xx ,(- n i) ,@(if (eq? i 1) '() `(,st))) wrapfirst))
+                        (destructure- 1 (cdr lhss) tail (- n i) st end wrap))))
             (cons (expand-forms
                     (lower-tuple-assignment
                       (if (= i n)
                           (list lhs-)
                           (list lhs- st))
                       `(call (top indexed_iterate)
-                             ,xx ,i ,@(if (eq? i 1) '() `(,st)))))
-                  (destructure- (+ i 1) (cdr lhss) xx n st end))))))
+                             ,xx ,i ,@(if (eq? i 1) '() `(,st))) wrapfirst))
+                  (destructure- (+ i 1) (cdr lhss) xx n st end wrap))))))
 
-(define (expand-tuple-destruct lhss x)
+(define (expand-tuple-destruct lhss x (wrap identity))
   (define (sides-match? l r)
     ;; l and r either have equal lengths, or r has a trailing ...
     (cond ((null? l)          (null? r))
@@ -2321,12 +2501,12 @@
           ((null? r)          #f)
           ((vararg? (car r))  (null? (cdr r)))
           (else               (sides-match? (cdr l) (cdr r)))))
-  (if (and (pair? x) (pair? lhss) (eq? (car x) 'tuple) (not (any assignment? (cdr x)))
+  (if (and (tuple-syntax? x) (pair? lhss) (not (any assignment? (cdr x)))
            (not (has-parameters? (cdr x)))
            (sides-match? lhss (cdr x)))
       ;; (a, b, ...) = (x, y, ...)
       (expand-forms
-       (tuple-to-assignments lhss x))
+       (tuple-to-assignments lhss x wrap))
       ;; (a, b, ...) = other
       (begin
         ;; like memq, but if lhs is (... sym), check against sym instead
@@ -2347,7 +2527,7 @@
           `(block
             ,@(if (> n 0) `((local ,st)) '())
             ,@ini
-            ,@(destructure- 1 lhss xx n st end)
+            ,@(destructure- 1 lhss xx n st end wrap)
             ,@(reverse (car end))
             (unnecessary ,xx))))))
 
@@ -2360,7 +2540,7 @@
       `(= ,lhs ,rhs)))
 
 (define (expand-forms e)
-  (if (or (atom? e) (memq (car e) '(quote inert top core globalref outerref module toplevel ssavalue null true false meta using import export thismodule toplevel-only)))
+  (if (or (atom? e) (memq (car e) '(quote inert top core globalref module toplevel ssavalue null true false meta export public thismodule toplevel-only)))
       e
       (let ((ex (get expand-table (car e) #f)))
         (if ex
@@ -2368,6 +2548,32 @@
             (cons (car e)
                   (map expand-forms (cdr e)))))))
 
+(define (find pred e)
+  (let loop ((xs e))
+    (if (null? xs)
+        #f
+        (let ((elt (car xs)))
+          (if (pred elt)
+              elt
+              (loop (cdr xs)))))))
+
+(define (something e)
+  (find (lambda (x) (not (equal? x '(null)))) e))
+
+(define (check-import-paths what e)
+  (define (check-dot-path e)
+    (and (list? e) (eq? (car e) '|.|) (every symbol? (cdr e))))
+  (define (check-path e)
+    (and (pair? e)
+         (or (check-dot-path e)
+             (and (eq? (car e) 'as)
+                  (check-dot-path (cadr e)) (symbol? (caddr e))))))
+  (unless (and (list? e)
+               (or (every check-path e)
+                   (and (list? (car e)) (eq? (caar e) ':)
+                        (every check-path (cdar e)))))
+    (error (string "malformed \"" what "\" statement"))))
+
 ;; table mapping expression head to a function expanding that form
 (define expand-table
   (table
@@ -2387,18 +2593,21 @@
 
    'opaque_closure
    (lambda (e)
-     (let* ((ty   (and (length> e 2) (expand-forms (cadr e))))
-            (F    (if (length> e 2) (caddr e) (cadr e)))
+     (let* ((argt  (something (list (expand-forms (cadr e)) #f)))
+            (rt_lb (something (list (expand-forms (caddr e)) #f)))
+            (rt_ub (something (list (expand-forms (cadddr e)) #f)))
+            (allow-partial (caddddr e))
+            (F             (cadddddr e))
             (isva (let* ((arglist (function-arglist F))
                          (lastarg (and (pair? arglist) (last arglist))))
-                    (if (and ty (any (lambda (arg)
+                    (if (and argt (any (lambda (arg)
                                        (let ((arg (if (vararg? arg) (cadr arg) arg)))
-                                         (not (equal? (arg-type arg) '(core Any)))))
+                                         (not (symbol? arg))))
                                      arglist))
                         (error "Opaque closure argument type may not be specified both in the method signature and separately"))
                     (if (or (varargexpr? lastarg) (vararg? lastarg))
                         '(true) '(false))))
-            (meth (caddr (caddr (expand-forms F)))) ;; `method` expr
+            (meth  (cadddr (caddr (expand-forms F)))) ;; `method` expr
             (lam       (cadddr meth))
             (sig-block (caddr meth))
             (sig-block (if (and (pair? sig-block) (eq? (car sig-block) 'block))
@@ -2409,11 +2618,13 @@
             (typ-svec  (caddr sig-svec))
             (tvars     (cddr (cadddr sig-svec)))
             (argtypes  (cdddr typ-svec))
-            (functionloc (cadr (caddddr sig-svec))))
-       (let* ((argtype   (foldl (lambda (var ex) `(call (core UnionAll) ,var ,ex))
-                                (expand-forms `(curly (core Tuple) ,@argtypes))
-                                (reverse tvars))))
-         `(_opaque_closure ,(or ty argtype) ,isva ,(length argtypes) ,functionloc ,lam))))
+            (functionloc (cadr (caddddr sig-svec)))
+            (argtype   (foldl (lambda (var ex) `(call (core UnionAll) ,var ,ex))
+                              (expand-forms `(curly (core Tuple) ,@argtypes))
+                              (reverse tvars)))
+            (argtype (or argt argtype))
+            (argtype (if (null? stmts) argtype `(block ,@stmts ,argtype))))
+       `(_opaque_closure ,argtype ,rt_lb ,rt_ub ,isva ,(length argtypes) ,allow-partial ,functionloc ,lam)))
 
    'block
    (lambda (e)
@@ -2458,115 +2669,7 @@
    'global expand-local-or-global-decl
    'local-def expand-local-or-global-decl
 
-   '=
-   (lambda (e)
-     (define lhs (cadr e))
-     (define (function-lhs? lhs)
-       (and (pair? lhs)
-            (or (eq? (car lhs) 'call)
-                (eq? (car lhs) 'where)
-                (and (eq? (car lhs) '|::|)
-                     (pair? (cadr lhs))
-                     (eq? (car (cadr lhs)) 'call)))))
-     (define (assignment-to-function lhs e)  ;; convert '= expr to 'function expr
-       (cons 'function (cdr e)))
-     (cond
-      ((function-lhs? lhs)
-       (expand-forms (assignment-to-function lhs e)))
-      ((and (pair? lhs)
-            (eq? (car lhs) 'curly))
-       (expand-unionall-def (cadr e) (caddr e)))
-      ((assignment? (caddr e))
-       ;; chain of assignments - convert a=b=c to `b=c; a=c`
-       (let loop ((lhss (list lhs))
-                  (rhs  (caddr e)))
-         (if (and (assignment? rhs) (not (function-lhs? (cadr rhs))))
-             (loop (cons (cadr rhs) lhss) (caddr rhs))
-             (let ((rr (if (symbol-like? rhs) rhs (make-ssavalue))))
-               (expand-forms
-                `(block ,.(if (eq? rr rhs) '() `((= ,rr ,(if (assignment? rhs)
-                                                             (assignment-to-function (cadr rhs) rhs)
-                                                             rhs))))
-                        ,@(map (lambda (l) `(= ,l ,rr))
-                               lhss)
-                        (unnecessary ,rr)))))))
-      ((or (and (symbol-like? lhs) (valid-name? lhs))
-           (globalref? lhs) (outerref? lhs))
-       (sink-assignment lhs (expand-forms (caddr e))))
-      ((atom? lhs)
-       (error (string "invalid assignment location \"" (deparse lhs) "\"")))
-      (else
-       (case (car lhs)
-         ((|.|)
-          ;; a.b =
-          (let* ((a   (cadr lhs))
-                 (b  (caddr lhs))
-                 (rhs (caddr e)))
-            (if (and (length= b 2) (eq? (car b) 'tuple))
-                (error (string "invalid syntax \""
-                               (string (deparse a) ".(" (deparse (cadr b)) ") = ...") "\"")))
-            (let ((aa (if (symbol-like? a) a (make-ssavalue)))
-                  (bb (if (or (atom? b) (symbol-like? b) (and (pair? b) (quoted? b)))
-                          b (make-ssavalue)))
-                  (rr (if (or (symbol-like? rhs) (atom? rhs)) rhs (make-ssavalue))))
-              `(block
-                ,.(if (eq? aa a)   '() (list (sink-assignment aa (expand-forms a))))
-                ,.(if (eq? bb b)   '() (list (sink-assignment bb (expand-forms b))))
-                ,.(if (eq? rr rhs) '() (list (sink-assignment rr (expand-forms rhs))))
-                (call (top setproperty!) ,aa ,bb ,rr)
-                (unnecessary ,rr)))))
-         ((tuple)
-          (let ((lhss (cdr lhs))
-                (x    (caddr e)))
-            (if (has-parameters? lhss)
-                ;; property destructuring
-                (expand-property-destruct lhss x)
-                ;; multiple assignment
-                (expand-tuple-destruct lhss x))))
-         ((typed_hcat)
-          (error "invalid spacing in left side of indexed assignment"))
-         ((typed_vcat typed_ncat)
-          (error "unexpected \";\" in left side of indexed assignment"))
-         ((ref)
-          ;; (= (ref a . idxs) rhs)
-          (let ((a    (cadr lhs))
-                (idxs (cddr lhs))
-                (rhs  (caddr e)))
-            (let* ((reuse (and (pair? a)
-                               (contains (lambda (x) (eq? x 'end))
-                                         idxs)))
-                   (arr   (if reuse (make-ssavalue) a))
-                   (stmts (if reuse `((= ,arr ,(expand-forms a))) '()))
-                   (rrhs (and (pair? rhs) (not (ssavalue? rhs)) (not (quoted? rhs))))
-                   (r    (if rrhs (make-ssavalue) rhs))
-                   (rini (if rrhs (list (sink-assignment r (expand-forms rhs))) '())))
-              (receive
-               (new-idxs stuff) (process-indices arr idxs)
-               `(block
-                 ,@stmts
-                 ,.(map expand-forms stuff)
-                 ,@rini
-                 ,(expand-forms
-                   `(call (top setindex!) ,arr ,r ,@new-idxs))
-                 (unnecessary ,r))))))
-         ((|::|)
-          ;; (= (|::| T) rhs) is an error
-          (if (null? (cddr lhs))
-              (error (string "invalid assignment location \"" (deparse lhs) "\"")))
-          ;; (= (|::| x T) rhs)
-          (let ((x (cadr lhs))
-                (T (caddr lhs))
-                (rhs (caddr e)))
-            (let ((e (remove-argument-side-effects x)))
-              (expand-forms
-               `(block ,@(cdr e)
-                       (decl ,(car e) ,T)
-                       (= ,(car e) ,rhs))))))
-         ((vcat ncat)
-          ;; (= (vcat . args) rhs)
-          (error "use \"(a, b) = ...\" to assign multiple values"))
-         (else
-          (error (string "invalid assignment location \"" (deparse lhs) "\"")))))))
+   '= expand-assignment
 
    'abstract
    (lambda (e)
@@ -2627,13 +2730,12 @@
                          (argtypes (cadr after-cconv))
                          (args (cddr after-cconv)))
                         (begin
-                          (if (not (and (pair? argtypes)
-                                        (eq? (car argtypes) 'tuple)))
-                              (if (and (pair? RT)
-                                       (eq? (car RT) 'tuple))
+                          (if (not (tuple-syntax? argtypes))
+                              (if (tuple-syntax? RT)
                                   (error "ccall argument types must be a tuple; try \"(T,)\" and check if you specified a correct return type")
                                   (error "ccall argument types must be a tuple; try \"(T,)\"")))
-                          (lower-ccall name RT (cdr argtypes) args
+                          (lower-ccall name
+                                       RT (cdr argtypes) args
                                        (if have-cconv
                                            (if have-cconv-expr
                                                (cadr cconv)
@@ -2657,8 +2759,7 @@
                           (if (null? run) '()
                               (list `(call (core tuple) ,.(reverse run))))
                           (let ((x (car a)))
-                            (if (and (length= x 2)
-                                     (eq? (car x) '...))
+                            (if (vararg? x)
                                 (if (null? run)
                                     (list* (cadr x)
                                            (tuple-wrap (cdr a) '()))
@@ -2672,10 +2773,27 @@
                  ((and (eq? (identifier-name f) '^) (length= e 4) (integer? (cadddr e)))
                   (expand-forms
                    `(call (top literal_pow) ,f ,(caddr e) (call (call (core apply_type) (top Val) ,(cadddr e))))))
+                 ((eq? f 'include)
+                  (let ((r (make-ssavalue)))
+                    `(block (= ,r ,(map expand-forms e)) (latestworld-if-toplevel) ,r)))
                  (else
                   (map expand-forms e))))
          (map expand-forms e)))
 
+   'foreigncall
+   (lambda (e)
+     (if (not (length> e 5)) (error "too few arguments to foreigncall"))
+     (let* ((name (car (list-tail e 1)))
+            (RT (car (list-tail e 2)))
+            (atypes (car (list-tail e 3)))
+            (nreq (car (list-tail e 4)))
+            (cconv (car (list-tail e 5)))
+            (args-and-roots (list-tail e 6)))
+       (begin
+         ;; Return expanded foreigncall
+         `(foreigncall ,(normalize-ccall-name name) ,(expand-forms RT) ,(expand-forms atypes)
+                       ,nreq ,cconv ,@(map expand-forms args-and-roots)))))
+
    'do
    (lambda (e)
      (let* ((call (cadr e))
@@ -2777,7 +2895,10 @@
    '.>>>=   lower-update-op
 
    '|...|
-   (lambda (e) (error "\"...\" expression outside call"))
+   (lambda (e)
+     (if (not (length= e 2))
+         (error "wrong number of expressions following \"...\""))
+     (error "\"...\" expression outside call"))
 
    '$
    (lambda (e) (error "\"$\" expression outside quote"))
@@ -2823,6 +2944,7 @@
    'generator
    (lambda (e)
      (check-no-return e)
+     (check-no-thisfunction e)
      (expand-generator e #f '()))
 
    'flatten
@@ -2866,6 +2988,38 @@
     (lambda (e)
       (set! *current-desugar-loc* e)
       e)
+
+    ;; We insert (latestworld) after every call to _eval_import or _eval_using
+    ;; to avoid having to do it in eval_import_path (#57316)
+    'import
+    (lambda (e)
+      (check-import-paths "import" (cdr e))
+      `(block
+        (toplevel-only import)
+        ,.(if (eq? (caadr e) ':)
+              `((call (top _eval_import) (true) (thismodule)
+                      ,.(map (lambda (x) `(inert ,x)) (cdadr e)))
+                (latestworld))
+              (map (lambda (x)
+                     `(block
+                       (call (top _eval_import) (true) (thismodule) (null) (inert ,x))
+                       (latestworld)))
+                   (cdr e)))))
+
+    'using
+    (lambda (e)
+      (check-import-paths "using" (cdr e))
+      `(block
+        (toplevel-only using)
+        ,.(if (eq? (caadr e) ':)
+              `((call (top _eval_import) (false) (thismodule)
+                      ,.(map (lambda (x) `(inert ,x)) (cdadr e)))
+                (latestworld))
+              (map (lambda (x)
+                     `(block
+                       (call (top _eval_using) (thismodule) (inert ,x))
+                       (latestworld)))
+                   (cdr e)))))
     ))
 
 (define (has-return? e)
@@ -2875,6 +3029,13 @@
   (if (has-return? e)
       (error "\"return\" not allowed inside comprehension or generator")))
 
+(define (has-thisfunction? e)
+  (expr-contains-p thisfunction? e (lambda (x) (not (function-def? x)))))
+
+(define (check-no-thisfunction e)
+  (if (has-thisfunction? e)
+      (error "\"@__FUNCTION__\" not allowed inside comprehension or generator")))
+
 (define (has-break-or-continue? e)
   (expr-contains-p (lambda (x) (and (pair? x) (memq (car x) '(break continue))))
                    e
@@ -2883,6 +3044,7 @@
 
 (define (lower-comprehension ty expr itrs)
   (check-no-return expr)
+  (check-no-thisfunction expr)
   (if (has-break-or-continue? expr)
       (error "break or continue outside loop"))
   (let ((result    (make-ssavalue))
@@ -2932,6 +3094,16 @@
 (define (lhs-vars e)
   (map decl-var (lhs-decls e)))
 
+;; Return all the names that will be bound by the assignment LHS, including
+;; curlies and calls.
+(define (lhs-bound-names e)
+  (cond ((underscore-symbol? e) '())
+        ((atom? e) (list e))
+        ((and (pair? e) (memq (car e) '(call curly where |::|)))
+         (lhs-bound-names (cadr e)))
+        ((and (pair? e) (memq (car e) '(tuple parameters)))
+         (apply append (map lhs-bound-names (cdr e))))))
+
 (define (all-decl-vars e)  ;; map decl-var over every level of an assignment LHS
   (cond ((eventually-call? e) e)
         ((decl? e)   (decl-var e))
@@ -2954,10 +3126,15 @@
                  (set! vars (cons v vars)))
              (if (not (length= e 2))
                  (find-assigned-vars- (caddr e)))))
-          ((=)
+          ((assign-const-if-global)
+            ;; like v = val, except that if `v` turns out global(either
+            ;; implicitly or by explicit `global`), it gains an implicit `const`
+            (set! vars (cons (cadr e) vars)))
+          ((= const)
            (let ((v (decl-var (cadr e))))
-             (find-assigned-vars- (caddr e))
-             (if (or (ssavalue? v) (globalref? v) (outerref? v) (underscore-symbol? v))
+             (unless (and (eq? (car e) 'const) (null? (cddr e)))
+               (find-assigned-vars- (caddr e)))
+             (if (or (ssavalue? v) (globalref? v) (underscore-symbol? v))
                  '()
                  (set! vars (cons v vars)))))
           (else
@@ -3005,7 +3182,7 @@
     (for-each (lambda (v) (push-var! tab v v)) sp)
     (for-each (lambda (v) (push-var! tab v v)) locals)
     (for-each (lambda (pair) (push-var! tab (car pair) (cdr pair))) renames)
-    (for-each (lambda (v) (push-var! tab v `(outerref ,v))) globals)
+    (for-each (lambda (v) (push-var! tab v `(globalref (thismodule) ,v))) globals)
     (for-each (lambda (v) (push-var! tab v v)) args)
     (vector lam args locals globals sp renames prev soft? hard? implicit-globals warn-vars tab)))
 
@@ -3035,13 +3212,13 @@
       (or (and (memq var (scope:args scope))    'argument)
           (and (memq var (scope:locals scope))  'local)
           (and (memq var (scope:globals scope))
-               (if (and exclude-top-level-globals
+              (if (and exclude-top-level-globals
                         (null? (lam:args (scope:lam scope)))
                         ;; don't inherit global decls from the outermost scope block
                         ;; in a top-level expression.
                         (or (not (scope:prev scope))
                             (not (scope:prev (scope:prev scope)))))
-                   'none 'global))
+                  'none 'global))
           (and (memq var (scope:sp scope))      'static-parameter)
           (var-kind var (scope:prev scope) exclude-top-level-globals))
       'none))
@@ -3069,16 +3246,36 @@
          (let ((val (and scope (get (scope:table scope) e #f))))
            (cond (val (car val))
                  ((underscore-symbol? e) e)
-                 (else `(outerref ,e)))))
+                 (else `(globalref (thismodule) ,e)))))
         ((or (not (pair? e)) (quoted? e) (memq (car e) '(toplevel symbolicgoto symboliclabel toplevel-only)))
          e)
+        ((eq? (car e) 'isglobal)
+         (let ((val (and scope (get (scope:table scope) (cadr e) #f))))
+           (cond (val `(false))
+                 ((underscore-symbol? (cadr e)) `(false))
+                 (else `(true)))))
         ((eq? (car e) 'global)
          (check-valid-name (cadr e))
          e)
+
+        ((eq? (car e) 'assign-const-if-global)
+           (if (eq? (var-kind (cadr e) scope) 'local)
+               (if (length= e 2)
+                   (null)
+                   (resolve-scopes- `(= ,@(cdr e)) scope sp loc))
+               (resolve-scopes- `(const ,@(cdr e)) scope sp loc)))
+        ((eq? (car e) 'global-if-global)
+           (if (eq? (var-kind (cadr e) scope) 'local)
+               '(null)
+               `(global ,@(cdr e))))
+
         ((memq (car e) '(local local-def))
          (check-valid-name (cadr e))
          ;; remove local decls
          '(null))
+        ((memq (car e) '(export public))
+          ;; no scope resolution - identifiers remain raw symbols
+          e)
         ((eq? (car e) 'require-existing-local)
          (if (not (in-scope? (cadr e) scope))
              (error "no outer local variable declaration exists for \"for outer\""))
@@ -3167,7 +3364,6 @@
                                         vars)
                               t)
                             #f)))))
-
            (for-each (lambda (v)
                        (if (or (memq v locals-def) (memq v local-decls))
                            (error (string "variable \"" v "\" declared both local and global")))
@@ -3225,16 +3421,14 @@
            ,(resolve-scopes- (caddr  e) scope)
            ,(resolve-scopes- (cadddr e) scope (method-expr-static-parameters e))))
         (else
-         (if (and (eq? (car e) '=) (symbol? (cadr e))
+         (if (and (memq (car e) '(= const)) (symbol? (cadr e))
                   scope (null? (lam:args (scope:lam scope)))
                   (warn-var?! (cadr e) scope)
                   (= *scopewarn-opt* 1))
              (let* ((v    (cadr e))
-                    (loc  (extract-line-file loc))
-                    (line (if (= (car loc) 0) (julia-current-line) (car loc)))
-                    (file (if (eq? (cadr loc) 'none) (julia-current-file) (cadr loc))))
+                    (loc  (extract-line-file loc)))
                (lowering-warning
-                1000 'warn (symbol (string file line)) file line
+                1000 'warn (cadr loc) (car loc)
                 (string "Assignment to `" v "` in soft scope is ambiguous "
                         "because a global variable by the same name exists: "
                         "`" v "` will be treated as a new local. "
@@ -3255,11 +3449,11 @@
 (define (lambda-all-vars e)
   (append (lam:argnames e) (caddr e)))
 
-;; compute set of variables referenced in a lambda but not bound by it
+;; compute set of non-global variables referenced in a lambda but not bound by it
 (define (free-vars- e tab)
   (cond ((or (eq? e UNUSED) (underscore-symbol? e)) tab)
         ((symbol? e) (put! tab e #t))
-        ((and (pair? e) (eq? (car e) 'outerref)) tab)
+        ((and (pair? e) (memq (car e) '(global globalref))) tab)
         ((and (pair? e) (eq? (car e) 'break-block)) (free-vars- (caddr e) tab))
         ((and (pair? e) (eq? (car e) 'with-static-parameters)) (free-vars- (cadr e) tab))
         ((or (atom? e) (quoted? e)) tab)
@@ -3283,9 +3477,15 @@
               vi)
     tab))
 
+;; env:      list of vinfo (should not include globals)
+;; captvars: list of vinfo
+;; sp:       list of symbol
+;; new-sp:   list of symbol (static params declared here)
+;; methsig:  `(call (core svec) ...)
+;; tab:      table of (name . var-info)
 (define (analyze-vars-lambda e env captvars sp new-sp methsig tab)
   (let* ((args (lam:args e))
-         (locl (caddr e))
+         (locl (lam:vinfo e))
          (allv (nconc (map arg-name args) locl))
          (fv   (let* ((fv (diff (free-vars (lam:body e)) allv))
                       ;; add variables referenced in declared types for free vars
@@ -3295,27 +3495,23 @@
                                             fv))))
                  (append (diff dv fv) fv)))
          (sig-fv (if methsig (free-vars methsig) '()))
-         (glo  (find-global-decls (lam:body e)))
          ;; make var-info records for vars introduced by this lambda
          (vi   (nconc
                 (map (lambda (decl) (make-var-info (decl-var decl)))
                      args)
                 (map make-var-info locl)))
-         (capt-sp (filter (lambda (v) (or (and (memq v fv) (not (memq v glo)) (not (memq v new-sp)))
+         (capt-sp (filter (lambda (v) (or (and (memq v fv) (not (memq v new-sp)))
                                           (memq v sig-fv)))
                           sp))
          ;; captured vars: vars from the environment that occur
          ;; in our set of free variables (fv).
          (cv    (append (filter (lambda (v) (and (memq (vinfo:name v) fv)
-                                                 (not (memq (vinfo:name v) new-sp))
-                                                 (not (memq (vinfo:name v) glo))))
+                                                 (not (memq (vinfo:name v) new-sp))))
                                 env)
                         (map make-var-info capt-sp)))
          (new-env (append vi
                           ;; new environment: add our vars
-                          (filter (lambda (v)
-                                    (and (not (memq (vinfo:name v) allv))
-                                         (not (memq (vinfo:name v) glo))))
+                          (filter (lambda (v) (not (memq (vinfo:name v) allv)))
                                   env))))
     (analyze-vars (lam:body e)
                   new-env
@@ -3338,27 +3534,38 @@
 (define (analyze-vars e env captvars sp tab)
   (if (or (atom? e) (quoted? e))
       (begin
-        (if (symbol? e)
-            (let ((vi (get tab e #f)))
-              (if vi
-                  (vinfo:set-read! vi #t))))
+        (cond
+         ((symbol? e)
+          (let ((vi (get tab e #f)))
+            (if vi
+                (vinfo:set-read! vi #t))))
+         ((nospecialize-meta? e)
+          (let ((vi (get tab (caddr e) #f)))
+            (if vi
+                (vinfo:set-nospecialize! vi #t)))))
         e)
       (case (car e)
         ((local-def) ;; a local that we know has an assignment that dominates all usages
          (let ((vi (get tab (cadr e) #f)))
               (vinfo:set-never-undef! vi #t)))
-        ((=)
+        ((= const)
          (let ((vi (and (symbol? (cadr e)) (get tab (cadr e) #f))))
            (if vi ; if local or captured
                (begin (if (vinfo:asgn vi)
                           (vinfo:set-sa! vi #f)
                           (vinfo:set-sa! vi #t))
                       (vinfo:set-asgn! vi #t))))
-         (analyze-vars (caddr e) env captvars sp tab))
+         (unless (null? (cddr e))
+           (analyze-vars (caddr e) env captvars sp tab)))
         ((call)
          (let ((vi (get tab (cadr e) #f)))
            (if vi
                (vinfo:set-called! vi #t))
+           ;; calls f(x...) go through `_apply_iterate`
+           (if (and (length> e 3) (equal? (cadr e) '(core _apply_iterate)))
+               (let ((vi2 (get tab (cadddr e) #f)))
+                 (if vi2
+                     (vinfo:set-called! vi2 #t))))
            ;; calls to functions with keyword args have head of `kwcall` first
            (if (and (length> e 3) (equal? (cadr e) '(core kwcall)))
                (let ((vi2 (get tab (cadddr e) #f)))
@@ -3422,34 +3629,17 @@ f(x) = yt(x)
 (define (type-for-closure-parameterized name P names fields types super)
   (let ((n (length P))
         (s (make-ssavalue)))
-    `((thunk
-       (lambda ()
+    `((thunk ,(linearize `(lambda ()
          (() () 0 ())
-         (block (global ,name) (const ,name)
-                ,@(map (lambda (p n) `(= ,p (call (core TypeVar) ',n (core Any)))) P names)
+         (block ,@(map (lambda (p n) `(= ,p (call (core TypeVar) ',n (core Any)))) P names)
                 (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@P)
                             (call (core svec) ,@(map quotify fields))
                             (call (core svec))
                             (false) ,(length fields)))
                 (call (core _setsuper!) ,s ,super)
-                (= (outerref ,name) ,s)
-                (call (core _typebody!) ,s (call (core svec) ,@types))
-                (return (null))))))))
-
-(define (type-for-closure name fields super)
-  (let ((s (make-ssavalue)))
-    `((thunk (lambda ()
-               (() () 0 ())
-               (block (global ,name) (const ,name)
-                      (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec))
-                                  (call (core svec) ,@(map quotify fields))
-                                  (call (core svec))
-                                  (false) ,(length fields)))
-                      (call (core _setsuper!) ,s ,super)
-                      (= (outerref ,name) ,s)
-                      (call (core _typebody!) ,s
-                            (call (core svec) ,@(map (lambda (v) '(core Box)) fields)))
-                      (return (null))))))))
+                (const (globalref (thismodule) ,name) ,s)
+                (call (core _typebody!) (false) ,s (call (core svec) ,@types))
+                (return (null)))))))))
 
 ;; better versions of above, but they get handled wrong in many places
 ;; need to fix that in order to handle #265 fully (and use the definitions)
@@ -3477,14 +3667,14 @@ f(x) = yt(x)
 
 
 (define (vinfo:not-capt vi)
-  (list (car vi) (cadr vi) (logand (caddr vi) (lognot 5))))
+  (list (car vi) (cadr vi) (logand (caddr vi) (lognot 1))))
 
 (define (clear-capture-bits vinfos)
   (map vinfo:not-capt vinfos))
 
-(define (convert-lambda lam fname interp capt-sp opaq)
+(define (convert-lambda lam fname interp capt-sp opaq toplevel-pure parsed-method-stack)
   (let ((body (add-box-inits-to-body
-               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq (table) (vinfo-to-table (car (lam:vinfo lam)))))))
+               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq toplevel-pure parsed-method-stack (table) (vinfo-to-table (car (lam:vinfo lam)))))))
     `(lambda ,(lam:args lam)
        (,(clear-capture-bits (car (lam:vinfo lam)))
         ()
@@ -3537,7 +3727,7 @@ f(x) = yt(x)
       `(call (core getfield) ,fname ,(get opaq var))
       `(call (core getfield) ,fname (inert ,var))))
 
-(define (convert-global-assignment var rhs0 globals lam)
+(define (convert-global-assignment var rhs0 globals lam toplevel-pure)
   (let* ((rhs1 (if (or (simple-atom? rhs0)
                        (equal? rhs0 '(the_exception)))
                    rhs0
@@ -3548,18 +3738,24 @@ f(x) = yt(x)
                    (convert-for-type-decl rhs1 ty #f lam)
                    rhs1))
          (ex   `(= ,var ,rhs)))
-    (if (eq? rhs1 rhs0)
-        `(block ,ex ,rhs0)
-        `(block (= ,rhs1 ,rhs0)
-                ,ex
-                ,rhs1))))
+    `(toplevel-butfirst
+      ,(if (eq? rhs1 rhs0)
+           `(block ,ex ,rhs0)
+           `(block (= ,rhs1 ,rhs0) ,ex ,rhs1))
+      ;; If this assignment is associated with a type declaration, we will have
+      ;; inserted it into the `globals` table before reaching this point.  If it
+      ;; isn't there, we must generate a declare_global call now.
+      ,.(if (or toplevel-pure (get globals ref #f))
+            '()
+            `((call (core declare_global) ,(cadr ref) (inert ,(caddr ref)) (true))
+              (latestworld))))))
 
 ;; convert assignment to a closed variable to a setfield! call.
 ;; while we're at it, generate `convert` calls for variables with
 ;; declared types.
 ;; when doing this, the original value needs to be preserved, to
 ;; ensure the expression `a=b` always returns exactly `b`.
-(define (convert-assignment var rhs0 fname lam interp opaq globals locals)
+(define (convert-assignment var rhs0 fname lam interp opaq toplevel-pure parsed-method-stack globals locals)
   (cond
     ((symbol? var)
      (let* ((vi (get locals var #f))
@@ -3572,12 +3768,12 @@ f(x) = yt(x)
        (if (and (not closed) (not capt) (equal? vt '(core Any)))
            (if (or (local-in? var lam) (underscore-symbol? var))
                `(= ,var ,rhs0)
-               (convert-global-assignment var rhs0 globals lam))
+               (convert-global-assignment var rhs0 globals lam toplevel-pure))
            (let* ((rhs1 (if (or (simple-atom? rhs0)
                                 (equal? rhs0 '(the_exception)))
                             rhs0
                             (make-ssavalue)))
-                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq (table) locals) #t lam))
+                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq toplevel-pure parsed-method-stack (table) locals) #t lam))
                   (ex (cond (closed `(call (core setfield!)
                                            ,(if interp
                                                 `($ ,var)
@@ -3591,18 +3787,24 @@ f(x) = yt(x)
                  `(block (= ,rhs1 ,rhs0)
                          ,ex
                          ,rhs1))))))
-     ((or (outerref? var) (globalref? var))
-      (convert-global-assignment var rhs0 globals lam))
+     ((globalref? var)
+      (convert-global-assignment var rhs0 globals lam toplevel-pure))
      ((ssavalue? var)
       `(= ,var ,rhs0))
      (else
        (error (string "invalid assignment location \"" (deparse var) "\"")))))
 
+(define (sig-type-expr namemap name expr)
+  (let ((newname (get namemap name expr)))
+    (if (symbol? newname)
+      `(globalref (thismodule) ,newname)
+      newname)))
+
 (define (rename-sig-types ex namemap)
   (pattern-replace
    (pattern-set
     (pattern-lambda (call (core (-/ Typeof)) name)
-                    (get namemap name __)))
+                    (sig-type-expr namemap name __)))
    ex))
 
 ;; replace leading (function) argument type with `typ`
@@ -3702,9 +3904,9 @@ f(x) = yt(x)
   (Set '(quote top core lineinfo line inert local-def unnecessary copyast
          meta inbounds boundscheck loopinfo decl aliasscope popaliasscope
          thunk with-static-parameters toplevel-only
-         global globalref outerref const-if-global thismodule
+         global globalref global-if-global assign-const-if-global isglobal thismodule thisfunction
          const atomic null true false ssavalue isdefined toplevel module lambda
-         error gc_preserve_begin gc_preserve_end import using export inline noinline)))
+         error gc_preserve_begin gc_preserve_end export public inline noinline purity)))
 
 (define (local-in? s lam (tab #f))
   (or (and tab (has? tab s))
@@ -3808,8 +4010,6 @@ f(x) = yt(x)
              (let ((prev  (table.clone live))
                    (decl- (table.clone decl)))
                (let ((result (eager-any visit (cdr e))))
-                 (if (eq? (car e) '_while)
-                     (kill))  ;; body might not have run
                  (leave-loop! decl-)
                  (if result
                      #t
@@ -3860,20 +4060,22 @@ f(x) = yt(x)
       (let ((cv (assq v (cadr (lam:vinfo lam)))))
         (and cv (vinfo:asgn cv) (vinfo:capt cv)))))
 
+(define (is-var-nospecialize? v lam)
+  (let ((vi (assq v (car (lam:vinfo lam)))))
+    (and vi (vinfo:nospecialize vi))))
+
 (define (toplevel-preserving? e)
-  (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse))))
+  (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse = const))))
 
-(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
+(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack (globals (table)) (locals (table)))
   (if toplevel
       (map (lambda (x)
-             (let ((tl (lift-toplevel (cl-convert x fname lam namemap defined
-                                                  (and toplevel (toplevel-preserving? x))
-                                                  interp opaq globals locals))))
+             (let ((tl (lift-toplevel (cl-convert x fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals))))
                (if (null? (cdr tl))
                    (car tl)
                    `(block ,@(cdr tl) ,(car tl)))))
            exprs)
-      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals locals)) exprs)))
+      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq toplevel-pure parsed-method-stack globals locals)) exprs)))
 
 (define (prepare-lambda! lam)
   ;; mark all non-arguments as assigned, since locals that are never assigned
@@ -3882,11 +4084,17 @@ f(x) = yt(x)
             (list-tail (car (lam:vinfo lam)) (length (lam:args lam))))
   (lambda-optimize-vars! lam))
 
-(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
+;; must start with a hash and second character must be numeric
+(define (anon-function-name? str)
+  (and (>= (string-length str) 2)
+       (char=? (string.char str 0) #\#)
+       (char-numeric? (string.char str 1))))
+
+(define (cl-convert- e fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack (globals (table)) (locals (table)))
   (if (and (not lam)
            (not (and (pair? e) (memq (car e) '(lambda method macro opaque_closure)))))
       (if (atom? e) e
-          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals)))
+          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals)))
       (cond
        ((symbol? e)
         (define (new-undef-var name)
@@ -3905,7 +4113,10 @@ f(x) = yt(x)
                  (val (if (equal? typ '(core Any))
                           val
                           `(call (core typeassert) ,val
-                                 ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals locals)))))
+                                 ,(let ((convt (cl-convert typ fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals)))
+                                    (if (or (symbol-like? convt) (quoted? convt))
+                                        convt
+                                        (renumber-assigned-ssavalues convt)))))))
             `(block
                ,@(if (eq? box access) '() `((= ,access ,box)))
                ,undefcheck
@@ -3929,7 +4140,11 @@ f(x) = yt(x)
        ((atom? e) e)
        (else
         (case (car e)
-          ((quote top core globalref outerref thismodule lineinfo line break inert module toplevel null true false meta) e)
+          ((quote top core globalref thismodule thisfunction lineinfo line break inert module toplevel null true false meta) e)
+          ((toplevel_pure)
+           ;; Used to wrap the Expr returned from generation functions: do not
+           ;; generate top-level side effects for this Expr (declare_global).
+           (cl-convert- (cadr e) fname lam namemap defined toplevel interp opaq #t parsed-method-stack globals locals))
           ((toplevel-only)
            ;; hack to avoid generating a (method x) expr for struct types
            (if (eq? (cadr e) 'struct)
@@ -3937,8 +4152,8 @@ f(x) = yt(x)
            e)
           ((=)
            (let ((var (cadr e))
-                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals locals)))
-             (convert-assignment var rhs fname lam interp opaq globals locals)))
+                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals)))
+             (convert-assignment var rhs fname lam interp opaq toplevel-pure parsed-method-stack globals locals)))
           ((local-def) ;; make new Box for local declaration of defined variable
            (let ((vi (get locals (cadr e) #f)))
              (if (and vi (vinfo:asgn vi) (vinfo:capt vi))
@@ -3951,14 +4166,21 @@ f(x) = yt(x)
                  (if (vinfo:never-undef vi)
                      '(null)
                      `(newvar ,(cadr e))))))
+          ((global)
+           `(toplevel-butfirst
+             (unused-only global)
+             ;; Leftover `global` forms become weak globals.
+             ,.(if toplevel-pure
+                   '()
+                   `(,(if (globalref? (cadr e))
+                          `(call (core declare_global) ,(cadr (cadr e)) (inert ,(caddr (cadr e))) (false))
+                          `(call (core declare_global) (thismodule) (inert ,(cadr e)) (false)))
+                     (latestworld)))))
           ((const)
-           (put! globals (binding-to-globalref (cadr e)) #f)
+           (when (globalref? (cadr e))
+             (put! globals (cadr e) #f))
            e)
           ((atomic) e)
-          ((const-if-global)
-           (if (local-in? (cadr e) lam locals)
-               '(null)
-               `(const ,(cadr e))))
           ((isdefined) ;; convert isdefined expr to function for closure converted variables
            (let* ((sym (cadr e))
                   (vi (and (symbol? sym) (get locals sym #f)))
@@ -3976,11 +4198,14 @@ f(x) = yt(x)
                     (if (and (vinfo:asgn vi) (vinfo:capt vi))
                         `(call (core isdefined) ,sym (inert contents))
                         e))
-                   (else e))))
+                   (else (if (globalref? sym)
+                      `(call (core isdefinedglobal) ,(cadr sym) (inert ,(caddr sym)))
+                      e)))))
           ((_opaque_closure)
-           (let* ((isva  (caddr e))
-                  (nargs (cadddr e))
-                  (functionloc (caddddr e))
+           (let* ((isva  (car (cddddr e)))
+                  (nargs (cadr (cddddr e)))
+                  (allow-partial (caddr (cddddr e)))
+                  (functionloc   (cadddr (cddddr e)))
                   (lam2  (last e))
                   (vis   (lam:vinfo lam2))
                   (cvs   (map car (cadr vis))))
@@ -3991,9 +4216,10 @@ f(x) = yt(x)
                                            (capt-var-access v fname opaq)
                                            v)))
                                    cvs)))
+               (set-car! (cdddr (lam:vinfo lam2)) '()) ;; must capture static_parameters as values inside opaque_closure
                `(new_opaque_closure
-                 ,(cadr e) (call (core apply_type) (core Union)) (core Any)
-                 (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs)))
+                 ,(cadr e) ,(or (caddr e) '(call (core apply_type) (core Union))) ,(or (cadddr e) '(core Any)) ,allow-partial
+                 (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs) toplevel-pure parsed-method-stack))
                  ,@var-exprs))))
           ((method)
            (let* ((name  (method-expr-name e))
@@ -4002,12 +4228,13 @@ f(x) = yt(x)
                   (vis   (if short '(() () ()) (lam:vinfo lam2)))
                   (cvs   (map car (cadr vis)))
                   (local? (lambda (s) (and lam (symbol? s) (local-in? s lam locals))))
-                  (local (and (not (outerref? (cadr e))) (local? name)))
+                  (local (and (not (globalref? (cadr e))) (local? name)))
                   (sig      (and (not short) (caddr e)))
                   (sp-inits (if (or short (not (eq? (car sig) 'block)))
                                 '()
                                 (map-cl-convert (butlast (cdr sig))
-                                                fname lam namemap defined toplevel interp opaq globals locals)))
+                                                fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals)))
+                  (r        (make-ssavalue))
                   (sig      (and sig (if (eq? (car sig) 'block)
                                          (last sig)
                                          sig))))
@@ -4025,32 +4252,37 @@ f(x) = yt(x)
                                     `(toplevel-butfirst
                                       ;; wrap in toplevel-butfirst so it gets moved higher along with
                                       ;; closure type definitions
+                                      (unnecessary ,(cadr e))
                                       ,e
-                                      (thunk (lambda () (() () 0 ()) (block (return ,e))))))))
+                                      (latestworld)))))
                        ((null? cvs)
                         `(block
                           ,@sp-inits
-                          (method ,(cadr e) ,(cl-convert
+                          (= ,r (method ,(cadr e) ,(cl-convert
                                           ;; anonymous functions with keyword args generate global
                                           ;; functions that refer to the type of a local function
                                           (rename-sig-types sig namemap)
-                                          fname lam namemap defined toplevel interp opaq globals locals)
+                                          fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals)
                                   ,(let ((body (add-box-inits-to-body
                                                 lam2
-                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq (table)
+                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq toplevel-pure parsed-method-stack (table)
                                                             (vinfo-to-table (car (lam:vinfo lam2)))))))
                                      `(lambda ,(cadr lam2)
                                         (,(clear-capture-bits (car vis))
                                          ,@(cdr vis))
-                                        ,body)))))
+                                        ,body))))
+                          (latestworld)
+                          ,r))
                        (else
-                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f)))
+                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f toplevel-pure parsed-method-stack)))
                                (top-stmts (cdr exprs))
                                (newlam    (compact-and-renumber (linearize (car exprs)) 'none 0)))
                           `(toplevel-butfirst
                             (block ,@sp-inits
-                                   (method ,name ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals locals)
+                                   (= ,r (method ,(cadr e) ,(cl-convert sig fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals)
                                            ,(julia-bq-macro newlam)))
+                                   (latestworld)
+                                   ,r)
                             ,@top-stmts))))
 
                  ;; local case - lift to a new type at top level
@@ -4058,9 +4290,11 @@ f(x) = yt(x)
                         (type-name  (or (get namemap name #f)
                                         (and name
                                              (symbol (string (if (= (string.char (string name) 0) #\#)
-                                                                 ""
-                                                                 "#")
-                                                             name "#" (current-julia-module-counter))))))
+                                                                  (if (anon-function-name? (string name))
+                                                                    (string "#" (current-julia-module-counter parsed-method-stack))
+                                                                    name)
+                                                                  (string "#" name))
+                                                              "#" (current-julia-module-counter parsed-method-stack))))))
                         (alldefs (expr-find-all
                                   (lambda (ex) (and (length> ex 2) (eq? (car ex) 'method)
                                                     (not (eq? ex e))
@@ -4131,33 +4365,31 @@ f(x) = yt(x)
                         (closure-param-syms (map (lambda (s) (make-ssavalue)) closure-param-names))
                         (typedef  ;; expression to define the type
                          (let* ((fieldtypes (map (lambda (v)
-                                                   (if (is-var-boxed? v lam)
-                                                       '(core Box)
-                                                       (make-ssavalue)))
+                                                   (cond ((is-var-boxed? v lam) '(core Box))
+                                                         ((is-var-nospecialize? v lam) (vinfo:type (assq v (car (lam:vinfo lam)))))
+                                                         (else (make-ssavalue))))
                                                  capt-vars))
                                 (para (append closure-param-syms
                                               (filter ssavalue? fieldtypes)))
                                 (fieldnames (append closure-param-names (filter (lambda (v) (not (is-var-boxed? v lam))) capt-vars))))
-                           (if (null? para)
-                               (type-for-closure type-name capt-vars '(core Function))
-                               (type-for-closure-parameterized type-name para fieldnames capt-vars fieldtypes '(core Function)))))
+                           (type-for-closure-parameterized type-name para fieldnames capt-vars fieldtypes '(core Function))))
                         (mk-method ;; expression to make the method
                          (if short '()
                              (let* ((iskw ;; TODO jb/functions need more robust version of this
                                      (contains (lambda (x) (eq? x 'kwftype)) sig))
                                     (renamemap (map cons closure-param-names closure-param-syms))
                                     (arg-defs (replace-vars
-                                               (fix-function-arg-type sig type-name iskw namemap closure-param-syms)
+                                               (fix-function-arg-type sig `(globalref (thismodule) ,type-name) iskw namemap closure-param-syms)
                                                renamemap)))
                                (append (map (lambda (gs tvar)
                                               (make-assignment gs `(call (core TypeVar) ',tvar (core Any))))
                                             closure-param-syms closure-param-names)
-                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals locals)
+                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals)
                                                  ,(convert-lambda lam2
                                                                   (if iskw
                                                                       (caddr (lam:args lam2))
                                                                       (car (lam:args lam2)))
-                                                                  #f closure-param-names #f)))))))
+                                                                  #f closure-param-names #f toplevel-pure parsed-method-stack)))))))
                         (mk-closure  ;; expression to make the closure
                          (let* ((var-exprs (map (lambda (v)
                                                   (let ((cv (assq v (cadr (lam:vinfo lam)))))
@@ -4170,13 +4402,13 @@ f(x) = yt(x)
                                 (P (append
                                     closure-param-names
                                     (filter identity (map (lambda (v ve)
-                                                            (if (is-var-boxed? v lam)
+                                                            (if (or (is-var-boxed? v lam) (is-var-nospecialize? v lam))
                                                                 #f
-                                                                `(call (core typeof) ,ve)))
+                                                                `(call (core _typeof_captured_variable) ,ve)))
                                                           capt-vars var-exprs)))))
                            `(new ,(if (null? P)
-                                      type-name
-                                      `(call (core apply_type) ,type-name ,@P))
+                                      `(globalref (thismodule) ,type-name)
+                                      `(call (core apply_type) (globalref (thismodule) ,type-name) ,@P))
                                  ,@var-exprs))))
                    (if (pair? moved-vars)
                        (set-car! (lam:vinfo lam)
@@ -4186,16 +4418,20 @@ f(x) = yt(x)
                    (if (or exists (and short (pair? alldefs)))
                        `(toplevel-butfirst
                          (null)
+                         ,@(map (lambda (v) `(moved-local ,v)) moved-vars)
                          ,@sp-inits
-                         ,@mk-method)
+                         ,@mk-method
+                         (latestworld))
                        (begin
                          (put! defined name #t)
                          `(toplevel-butfirst
-                           ,(convert-assignment name mk-closure fname lam interp opaq globals locals)
+                           ,(convert-assignment name mk-closure fname lam interp opaq toplevel-pure parsed-method-stack globals locals)
                            ,@typedef
+                           (latestworld)
                            ,@(map (lambda (v) `(moved-local ,v)) moved-vars)
                            ,@sp-inits
-                           ,@mk-method))))))))
+                           ,@mk-method
+                           (latestworld)))))))))
           ((lambda)  ;; happens inside (thunk ...) and generated function bodies
            (for-each (lambda (vi) (vinfo:set-asgn! vi #t))
                      (list-tail (car (lam:vinfo e)) (length (lam:args e))))
@@ -4205,14 +4441,14 @@ f(x) = yt(x)
                                        (table)
                                        (table)
                                        (null? (cadr e)) ;; only toplevel thunks have 0 args
-                                       interp opaq globals (vinfo-to-table (car (lam:vinfo e))))))
+                                       interp opaq toplevel-pure parsed-method-stack globals (vinfo-to-table (car (lam:vinfo e))))))
              `(lambda ,(cadr e)
                 (,(clear-capture-bits (car (lam:vinfo e)))
                  () ,@(cddr (lam:vinfo e)))
                 (block ,@body))))
           ;; remaining `::` expressions are type assertions
           ((|::|)
-           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals locals))
+           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals))
           ;; remaining `decl` expressions are only type assertions if the
           ;; argument is global or a non-symbol.
           ((decl)
@@ -4226,46 +4462,68 @@ f(x) = yt(x)
                          (begin
                            (put! globals ref #t)
                            `(block
-                             (toplevel-only set_binding_type! ,(cadr e))
-                             (call (core set_binding_type!) ,(cadr ref) (inert ,(caddr ref)) ,(caddr e))))
+                             (toplevel-butfirst
+                              (toplevel-only decl ,ref)
+                              (call (core declare_global) ,(cadr ref) (inert ,(caddr ref)) (true) ,(caddr e))
+                              (latestworld))))
                          `(call (core typeassert) ,@(cdr e))))
-                   fname lam namemap defined toplevel interp opaq globals locals))))
+                   fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals))))
           ;; `with-static-parameters` expressions can be removed now; used only by analyze-vars
           ((with-static-parameters)
-           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals locals))
+           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals))
           (else
            (cons (car e)
-                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals))))))))
+                 (map-cl-convert (cdr e) fname lam namemap defined (and toplevel (toplevel-preserving? e)) interp opaq toplevel-pure parsed-method-stack globals locals))))))))
+
+;; wrapper for `cl-convert-`
+(define (cl-convert e fname lam namemap defined toplevel interp opaq toplevel-pure (parsed-method-stack '()) (globals (table)) (locals (table)))
+  (if (is-method? e)
+      (let ((name (method-expr-name e)))
+        (cl-convert- e fname lam namemap defined toplevel interp opaq toplevel-pure (cons name parsed-method-stack) globals locals))
+      (cl-convert- e fname lam namemap defined toplevel interp opaq toplevel-pure parsed-method-stack globals locals)))
 
-(define (closure-convert e) (cl-convert e #f #f (table) (table) #f #f #f))
+(define (closure-convert e) (cl-convert e #f #f (table) (table) #f #f #f #f))
 
 ;; pass 5: convert to linear IR
 
 (define (linearize e)
   (cond ((or (not (pair? e)) (quoted? e)) e)
         ((eq? (car e) 'lambda)
-         (set-car! (cdddr e) (compile-body (cadddr e) (append (car (caddr e))
-                                                              (cadr (caddr e)))
-                                           e)))
-        (else (for-each linearize (cdr e))))
-  e)
+         (list-set e 3 (compile-body (cadddr e)
+                                     (append (car (caddr e))
+                                             (cadr (caddr e))) e)))
+        (else (cons (car e) (map linearize (cdr e))))))
 
 (define (valid-ir-argument? e)
-  (or (simple-atom? e) (symbol? e)
+  (or (simple-atom? e)
       (and (pair? e)
-           (memq (car e) '(quote inert top core globalref outerref
-                                 slot static_parameter boundscheck)))))
+           (memq (car e) '(quote inert top core
+                                 slot static_parameter)))))
 
 (define (valid-ir-rvalue? lhs e)
   (or (ssavalue? lhs)
       (valid-ir-argument? e)
       (and (symbol? lhs) (pair? e)
-           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast new_opaque_closure)))))
+           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast new_opaque_closure globalref)))))
 
 (define (valid-ir-return? e)
   ;; returning lambda directly is needed for @generated
   (or (valid-ir-argument? e) (and (pair? e) (memq (car e) '(lambda)))))
 
+(define (code-trivially-effect-free? e)
+  ;; determine whether the execution of this code can be observed.
+  ;; If not it may be deleted. In general, the only thing we can detect here
+  ;; is empty blocks that only have metadata in them.
+  (if (pair? e)
+    (case (car e)
+      ((block) (every code-trivially-effect-free? (cdr e)))
+      ((line null) #t)
+      (else #f))
+    #t))
+
+(define (tuple-syntax? fptr)
+  (and (pair? fptr) (eq? (car fptr) 'tuple)))
+
 ;; this pass behaves like an interpreter on the given code.
 ;; to perform stateful operations, it calls `emit` to record that something
 ;; needs to be done. in value position, it returns an expression computing
@@ -4278,7 +4536,6 @@ f(x) = yt(x)
         (first-line #t)
         (current-loc #f)
         (rett #f)
-        (global-const-error #f)
         (vinfo-table (vinfo-to-table (car (lam:vinfo lam))))
         (arg-map #f)          ;; map arguments to new names if they are assigned
         (label-counter 0)     ;; counter for generating label addresses
@@ -4289,7 +4546,7 @@ f(x) = yt(x)
                               ;; be emitted at the exit of the block. Code
                               ;; should enter the finally block via `enter-finally-block`.
         (handler-goto-fixups '())  ;; `goto`s that might need `leave` exprs added
-        (handler-level 0)     ;; exception handler nesting depth
+        (handler-token-stack '())  ;; tokens identifying handler stack while active
         (catch-token-stack '())) ;; tokens identifying handler enter for current catch blocks
     (define (emit c)
       (or c (raise "missing value in IR"))
@@ -4320,12 +4577,13 @@ f(x) = yt(x)
         (emit `(= ,(car finally-handler) ,tag))
         (if need-goto
             (let ((label (cadr finally-handler))
-                  (dest-handler-level (cadddr finally-handler))
-                  (dest-tokens        (caddddr finally-handler)))
+                  (dest-handler-tokens (cadddr finally-handler))
+                  (dest-catch-tokens   (caddddr finally-handler)))
               ;; Leave current exception handling scope and jump to finally block
-              (let ((pexc (pop-exc-expr catch-token-stack dest-tokens)))
+              (let ((pexc (pop-exc-expr catch-token-stack dest-catch-tokens)))
                 (if pexc (emit pexc)))
-              (emit `(leave ,(+ 1 (- handler-level dest-handler-level))))
+              (let ((plist (pop-handler-list handler-token-stack (cdr dest-handler-tokens) '())))
+                (emit `(leave ,@plist)))
               (emit `(goto ,label))))
         tag))
     (define (pop-exc-expr src-tokens dest-tokens)
@@ -4338,7 +4596,20 @@ f(x) = yt(x)
                                      (car s)
                                      (loop (cdr s))))))
             `(pop_exception ,restore-token))))
-    (define (emit-return x)
+    (define (pop-handler-list src-tokens dest-tokens lab)
+      (if (eq? src-tokens dest-tokens)
+          #f
+          (reverse
+            (let loop ((s src-tokens)
+                       (l '()))
+              (if (not (pair? s))
+                  (if (null? lab)
+                    (error "Attempt to jump into catch block")
+                    (error (string "cannot goto label \"" lab "\" inside try/catch block"))))
+              (if (eq? (cdr s) dest-tokens)
+                  (cons (car s) l)
+                  (loop (cdr s) (cons (car s) l)))))))
+    (define (emit-return tail x)
       (define (emit- x)
         (let* ((tmp (if ((if (null? catch-token-stack) valid-ir-return? simple-atom?) x)
                         #f
@@ -4347,35 +4618,39 @@ f(x) = yt(x)
               (begin (emit `(= ,tmp ,x)) tmp)
               x)))
       (define (actually-return x)
-        (let* ((x (if rett
-                      (compile (convert-for-type-decl (emit- x) rett #t lam) '() #t #f)
+        (let* ((x (begin0 (emit- x)
+                          ;; if we are adding an implicit return then mark it as having no location
+                          (if (not (eq? tail 'explicit))
+                              (emit '(line #f)))))
+               (x (if rett
+                      (compile (convert-for-type-decl x rett #t lam) '() #t #f)
                       x))
                (x (emit- x)))
           (let ((pexc (pop-exc-expr catch-token-stack '())))
             (if pexc (emit pexc)))
           (emit `(return ,x))))
       (if x
-          (if (> handler-level 0)
+          (if (null? handler-token-stack)
+              (actually-return x)
               (let ((tmp (cond ((and (simple-atom? x) (or (not (ssavalue? x)) (not finally-handler))) #f)
                                (finally-handler  (new-mutable-var))
                                (else             (make-ssavalue)))))
                 (if tmp (emit `(= ,tmp ,x)))
                 (if finally-handler
                     (enter-finally-block `(return ,(or tmp x)))
-                    (begin (emit `(leave ,handler-level))
+                    (begin (emit `(leave ,@handler-token-stack))
                            (actually-return (or tmp x))))
-                (or tmp x))
-              (actually-return x))))
+                (or tmp x)))))
     (define (emit-break labl)
-      (let ((lvl (caddr labl))
+      (let ((dest-handler-tokens (caddr labl))
             (dest-tokens (cadddr labl)))
-        (if (and finally-handler (> (cadddr finally-handler) lvl))
+        (if (and finally-handler (> (length (cadddr finally-handler)) (length dest-handler-tokens)))
             (enter-finally-block `(break ,labl))
             (begin
               (let ((pexc (pop-exc-expr catch-token-stack dest-tokens)))
                 (if pexc (emit pexc)))
-              (if (> handler-level lvl)
-                  (emit `(leave ,(- handler-level lvl))))
+              (let ((plist (pop-handler-list handler-token-stack dest-handler-tokens '())))
+                (if plist (emit `(leave ,@plist))))
               (emit `(goto ,(cadr labl)))))))
     (define (new-mutable-var . name)
       (let ((g (if (null? name) (gensy) (named-gensy (car name)))))
@@ -4389,52 +4664,64 @@ f(x) = yt(x)
           ((primitive_type)    "\"primitive type\" expression")
           ((struct_type)       "\"struct\" expression")
           ((method)            "method definition")
-          ((set_binding_type!) (string "type declaration for global \"" (deparse (cadr e)) "\""))
+          ((latestworld)       "World age increment")
+          ((decl)              (string "type declaration for global \"" (deparse (cadr e)) "\""))
           (else                (string "\"" h "\" expression"))))
       (if (not (null? (cadr lam)))
           (error (string (head-to-text (car e)) " not at top level"))))
+    (define (valid-body-ir-argument? aval)
+      (or (valid-ir-argument? aval)
+          (and (symbol? aval) ; Arguments are always defined slots
+               (or (memq aval (lam:args lam))
+                   (let ((vi (get vinfo-table aval #f)))
+                     (and vi (vinfo:never-undef vi)))))))
+    (define (single-assign-var? aval)
+      (and (symbol? aval) ; Arguments are always sa
+           (or (memq aval (lam:args lam))
+               (let ((vi (get vinfo-table aval #f)))
+                 (and vi (vinfo:sa vi))))))
+    ;; TODO: We could also allow const globals here
+    (define (const-read-arg? x)
+      ;; Even if we have side effects, we know that singly-assigned
+      ;; locals cannot be affected them, so we can inline them anyway.
+      (or (simple-atom? x) (single-assign-var? x)
+        (and (pair? x)
+          (memq (car x) '(quote inert top core)))))
     ;; evaluate the arguments of a call, creating temporary locations as needed
     (define (compile-args lst break-labels)
       (if (null? lst) '()
-          (let ((simple? (every (lambda (x) (or (simple-atom? x) (symbol? x)
-                                                (and (pair? x)
-                                                     (memq (car x) '(quote inert top core globalref outerref boundscheck)))))
-                                lst)))
-            (let loop ((lst  lst)
-                       (vals '()))
-              (if (null? lst)
-                  (reverse! vals)
-                  (let* ((arg (car lst))
-                         (aval (or (compile arg break-labels #t #f)
-                                   ;; TODO: argument exprs that don't yield a value?
-                                   '(null))))
-                    (loop (cdr lst)
-                          (cons (if (and (not simple?)
-                                         (not (simple-atom? arg))
-                                         (not (simple-atom? aval))
-                                         (not (and (pair? arg)
-                                                   (memq (car arg) '(quote inert top core boundscheck))))
-                                         (not (and (symbol? aval) ;; function args are immutable and always assigned
-                                                   (memq aval (lam:args lam))))
-                                         (not (and (or (symbol? arg)
-                                                       (and (pair? arg)
-                                                            (memq (car arg) '(globalref outerref))))
-                                                   (or (null? (cdr lst))
-                                                       (null? vals)))))
-                                    (let ((tmp (make-ssavalue)))
-                                      (emit `(= ,tmp ,aval))
-                                      tmp)
-                                    aval)
-                                vals))))))))
+        ;; First check if all the arguments as simple (and therefore side-effect free).
+        ;; Otherwise, we need to use ssa values for all arguments to ensure proper
+        ;; left-to-right evaluation semantics.
+        (let ((simple? (every (lambda (x) (or (simple-atom? x) (symbol? x)
+                                              (and (pair? x)
+                                                   (memq (car x) '(quote inert top core globalref)))))
+                              lst)))
+          (let loop ((lst  lst)
+                     (vals '()))
+            (if (null? lst)
+                (reverse! vals)
+                (let* ((arg (car lst))
+                       (aval (or (compile arg break-labels #t #f)
+                                 ;; TODO: argument exprs that don't yield a value?
+                                 '(null))))
+                  (loop (cdr lst)
+                        (cons (if (and
+                                   (or simple? (const-read-arg? aval))
+                                   (valid-body-ir-argument? aval))
+                                  aval
+                                  (let ((tmp (make-ssavalue)))
+                                    (emit `(= ,tmp ,aval))
+                                    tmp))
+                              vals))))))))
     (define (compile-cond ex break-labels)
       (let ((cnd (or (compile ex break-labels #t #f)
                      ;; TODO: condition exprs that don't yield a value?
                      '(null))))
-        (if (not (valid-ir-argument? cnd))
+        (if (valid-body-ir-argument? cnd) cnd
             (let ((tmp (make-ssavalue)))
               (emit `(= ,tmp ,cnd))
-              tmp)
-            cnd)))
+              tmp))))
     (define (emit-cond cnd break-labels endl)
       (let* ((cnd (if (and (pair? cnd) (eq? (car cnd) 'block))
                       (flatten-ex 'block cnd)
@@ -4461,14 +4748,24 @@ f(x) = yt(x)
                                  (cdr cnd)
                                  (list cnd))))))
           tests))
-    (define (emit-assignment lhs rhs)
+    (define (emit-assignment-or-setglobal lhs rhs (op '=))
+      ;; (= (globalref _ _) _)     => setglobal!
+      ;; (const (globalref _ _) _) => declare_const
+      (cond ((and (globalref? lhs) (eq? op '=))
+             (emit `(call (core setglobal!) ,(cadr lhs) (inert ,(caddr lhs)) ,rhs)))
+            ((and (globalref? lhs) (eq? op 'const))
+             (emit `(call (core declare_const) ,(cadr lhs) (inert ,(caddr lhs)) ,rhs)))
+            (else
+             (assert (eq? op '=))
+             (emit `(= ,lhs ,rhs)))))
+    (define (emit-assignment lhs rhs (op '=))
       (if rhs
           (if (valid-ir-rvalue? lhs rhs)
-              (emit `(= ,lhs ,rhs))
+              (emit-assignment-or-setglobal lhs rhs op)
               (let ((rr (make-ssavalue)))
                 (emit `(= ,rr ,rhs))
-                (emit `(= ,lhs ,rr))))
-          (emit `(= ,lhs (null)))) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved
+                (emit-assignment-or-setglobal lhs rr op)))
+          (emit-assignment-or-setglobal lhs `(null) op)) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved
       #f)
     ;; the interpreter loop. `break-labels` keeps track of the labels to jump to
     ;; for all currently closing break-blocks.
@@ -4478,20 +4775,18 @@ f(x) = yt(x)
     ;; from the current function.
     (define (compile e break-labels value tail)
       (if (or (not (pair? e)) (memq (car e) '(null true false ssavalue quote inert top core copyast the_exception $
-                                                   globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall)))
+                                                   globalref thismodule cdecl stdcall fastcall thiscall llvmcall static_parameter)))
           (let ((e1 (if (and arg-map (symbol? e))
                         (get arg-map e e)
                         e)))
-            (if (and value (or (underscore-symbol? e)
-                               (and (pair? e) (or (eq? (car e) 'outerref)
-                                                  (eq? (car e) 'globalref))
-                                    (underscore-symbol? (cadr e)))))
-                (error (string "all-underscore identifier used as rvalue" (format-loc current-loc))))
-            (cond (tail  (emit-return e1))
+            (if (or (underscore-symbol? e)
+                    (and (pair? e) (eq? (car e) 'globalref)
+                         (underscore-symbol? (cadr e))))
+                (error (string "all-underscore identifiers are write-only and their values cannot be used in expressions" (format-loc current-loc))))
+            (cond (tail  (emit-return tail e1))
                   (value e1)
                   ((symbol? e1) (emit e1) #f)  ;; keep symbols for undefined-var checking
-                  ((and (pair? e1) (eq? (car e1) 'outerref)) (emit e1) #f)  ;; keep globals for undefined-var checking
-                  ((and (pair? e1) (eq? (car e1) 'globalref)) (emit e1) #f) ;; keep globals for undefined-var checking
+                  ((and (pair? e1) (memq (car e1) '(globalref static_parameter))) (emit e1) #f) ;; keep for undefined-var checking
                   (else #f)))
           (case (car e)
             ((call new splatnew foreigncall cfunction new_opaque_closure)
@@ -4501,10 +4796,10 @@ f(x) = yt(x)
              (let* ((args
                      (cond ((eq? (car e) 'foreigncall)
                             ;; NOTE: 2nd to 5th arguments of ccall must be left in place
-                            ;;       the 1st should be compiled if an atom.
-                            (append (if (atom-or-not-tuple-call? (cadr e))
-                                        (compile-args (list (cadr e)) break-labels)
-                                        (list (cadr e)))
+                            ;;       the 1st should be compiled unless it is a syntactic tuple from earlier
+                            (append (if (tuple-syntax? (cadr e))
+                                        (list (cadr e))
+                                        (compile-args (list (cadr e)) break-labels))
                                     (list-head (cddr e) 4)
                                     (compile-args (list-tail e 6) break-labels)))
                            ;; NOTE: arguments of cfunction must be left in place
@@ -4514,17 +4809,17 @@ f(x) = yt(x)
                               (cons (cadr e) (cons fptr (cdddr e)))))
                            ;; Leave a literal lambda in place for later global expansion
                            ((eq? (car e) 'new_opaque_closure)
-                            (let* ((oc_method (car (list-tail (cdr e) 3))) ;; opaque_closure_method
+                            (let* ((oc_method (car (list-tail (cdr e) 4))) ;; opaque_closure_method
                                    (lambda (list-ref oc_method 5))
                                    (lambda (linearize lambda)))
                               (append
-                               (compile-args (list-head (cdr e) 3) break-labels)
+                               (compile-args (list-head (cdr e) 4) break-labels)
                                (list (append (butlast oc_method) (list lambda)))
-                               (compile-args (list-tail (cdr e) 4) break-labels))))
+                               (compile-args (list-tail (cdr e) 5) break-labels))))
                            ;; NOTE: 1st argument to cglobal treated same as for ccall
                            ((and (length> e 2)
                                  (or (eq? (cadr e) 'cglobal)
-                                     (equal? (cadr e) '(outerref cglobal))))
+                                     (equal? (cadr e) '(globalref (thismodule) cglobal))))
                             (append (list (cadr e))
                                     (if (atom-or-not-tuple-call? (caddr e))
                                         (compile-args (list (caddr e)) break-labels)
@@ -4533,26 +4828,36 @@ f(x) = yt(x)
                            (else
                             (compile-args (cdr e) break-labels))))
                     (callex (cons (car e) args)))
-               (cond (tail (emit-return callex))
+               (cond (tail (emit-return tail callex))
                      (value callex)
                      (else (emit callex)))))
-            ((=)
+            ((= const)
+             (when (eq? (car e) 'const)
+               (when (local-in? (cadr e) lam)
+                 (error (string "unsupported `const` declaration on local variable" (format-loc current-loc))))
+               (when (pair? (cadr lam))
+                 (error (string "`global const` declaration not allowed inside function" (format-loc current-loc)))))
              (let ((lhs (cadr e)))
-               (if (and (symbol? lhs) (underscore-symbol? lhs))
-                   (compile (caddr e) break-labels value tail)
-                   (let* ((rhs (compile (caddr e) break-labels #t #f))
-                          (lhs (if (and arg-map (symbol? lhs))
-                                   (get arg-map lhs lhs)
-                                   lhs)))
-                     (if (and value rhs)
-                         (let ((rr (if (or (atom? rhs) (ssavalue? rhs) (eq? (car rhs) 'null))
-                                       rhs (make-ssavalue))))
-                           (if (not (eq? rr rhs))
-                               (emit `(= ,rr ,rhs)))
-                           (emit `(= ,lhs ,rr))
-                           (if tail (emit-return rr))
-                           rr)
-                         (emit-assignment lhs rhs))))))
+               (cond ((and (symbol? lhs) (underscore-symbol? lhs))
+                      (compile (caddr e) break-labels value tail))
+                     ((and (eq? (car e) 'const) (null? (cddr e)) (globalref? (cadr e)))
+                      ;; No RHS - make undefined constant
+                      (let ((lhs (cadr e)))
+                        (emit `(call (core declare_const) ,(cadr lhs) (inert ,(caddr lhs))))))
+                     (else
+                      (let* ((rhs (compile (caddr e) break-labels #t #f))
+                             (lhs (if (and arg-map (symbol? lhs))
+                                      (get arg-map lhs lhs)
+                                      lhs)))
+                        (if (and value rhs)
+                            (let ((rr (if (or (atom? rhs) (ssavalue? rhs) (eq? (car rhs) 'null))
+                                          rhs (make-ssavalue))))
+                              (if (not (eq? rr rhs))
+                                  (emit `(= ,rr ,rhs)))
+                              (emit-assignment-or-setglobal lhs rr (car e))
+                              (if tail (emit-return tail rr))
+                              rr)
+                            (emit-assignment lhs rhs (car e))))))))
             ((block)
              (let* ((last-fname filename)
                     (fnm        (first-non-meta e))
@@ -4603,7 +4908,7 @@ f(x) = yt(x)
                   (if file-diff (set! filename last-fname))
                   v)))
             ((return)
-             (compile (cadr e) break-labels #t #t)
+             (compile (cadr e) break-labels #t 'explicit)
              #f)
             ((unnecessary)
              ;; `unnecessary` marks expressions generated by lowering that
@@ -4618,7 +4923,8 @@ f(x) = yt(x)
                (let ((v1 (compile (caddr e) break-labels value tail)))
                  (if val (emit-assignment val v1))
                  (if (and (not tail) (or (length> e 3) val))
-                     (emit end-jump))
+                     (begin (emit `(line #f))
+                            (emit end-jump)))
                  (let ((elselabel (make&mark-label)))
                    (for-each (lambda (test)
                                (set-car! (cddr test) elselabel))
@@ -4630,7 +4936,7 @@ f(x) = yt(x)
                    (if (not tail)
                        (set-car! (cdr end-jump) (make&mark-label))
                        (if (length= e 3)
-                           (emit-return v2)))
+                           (emit-return tail v2)))
                    val))))
             ((_while)
              (let* ((endl (make-label))
@@ -4652,7 +4958,7 @@ f(x) = yt(x)
             ((break-block)
              (let ((endl (make-label)))
                (compile (caddr e)
-                        (cons (list (cadr e) endl handler-level catch-token-stack)
+                        (cons (list (cadr e) endl handler-token-stack catch-token-stack)
                               break-labels)
                         #f #f)
                (mark-label endl))
@@ -4666,13 +4972,13 @@ f(x) = yt(x)
              (if (eq? (car e) 'symboliclabel)
                  (if (has? label-nesting (cadr e))
                      (error (string "label \"" (cadr e) "\" defined multiple times"))
-                     (put! label-nesting (cadr e) (list handler-level catch-token-stack))))
+                     (put! label-nesting (cadr e) (list handler-token-stack catch-token-stack))))
              (let ((m (get label-map (cadr e) #f)))
                (if m
                    (emit `(label ,m))
                    (put! label-map (cadr e) (make&mark-label)))
                (if tail
-                   (emit-return '(null))
+                   (emit-return tail '(null))
                    (if value (error "misplaced label")))))
             ((symbolicgoto)
              (let* ((m (get label-map (cadr e) #f))
@@ -4682,28 +4988,34 @@ f(x) = yt(x)
                (emit `(null))  ;; save space for `leave` that might be needed
                (emit `(goto ,m))
                (set! handler-goto-fixups
-                     (cons (list code handler-level catch-token-stack (cadr e)) handler-goto-fixups))
+                     (cons (list code handler-token-stack catch-token-stack (cadr e)) handler-goto-fixups))
                #f))
 
             ;; exception handlers are lowered using
-            ;; (= tok (enter L)) - push handler with catch block at label L, yielding token
+            ;; (= tok (enter L scope))
+            ;;      push handler with catch block at label L and scope `scope`, yielding token
+            ;;      `scope` is only recognized for tryfinally and may be omitted in the lowering
             ;; (leave n) - pop N exception handlers
             ;; (pop_exception tok) - pop exception stack back to state of associated enter
             ((trycatch tryfinally trycatchelse)
              (let ((handler-token (make-ssavalue))
                    (catch (make-label))
+                   (catchcode (if (eq? (car e) 'tryfinally) '(call (top rethrow)) (caddr e)))
                    (els   (and (eq? (car e) 'trycatchelse) (make-label)))
                    (endl  (make-label))
                    (last-finally-handler finally-handler)
-                   (finally           (if (eq? (car e) 'tryfinally) (new-mutable-var) #f))
+                   ;; Special case optimization: If the finally block is trivially empty, don't perform finally
+                   ;; lowering, just lower this as a try/catch block with rethrow and scope hnadling.
+                   (finally           (if (and (eq? (car e) 'tryfinally) (not (code-trivially-effect-free? (caddr e)))) (new-mutable-var) #f))
+                   (scope             (if (eq? (car e) 'tryfinally) (cdddr e) '()))
                    (my-finally-handler #f))
                ;; handler block entry
-               (emit `(= ,handler-token (enter ,catch)))
-               (set! handler-level (+ handler-level 1))
-               (if finally (begin (set! my-finally-handler (list finally endl '() handler-level catch-token-stack))
+               (emit `(= ,handler-token (enter ,catch ,@(compile-args scope break-labels))))
+               (set! handler-token-stack (cons handler-token handler-token-stack))
+               (if finally (begin (set! my-finally-handler (list finally endl '() handler-token-stack catch-token-stack))
                                   (set! finally-handler my-finally-handler)
                                   (emit `(= ,finally -1))))
-               (let* ((v1  (compile (cadr e) break-labels value #f)) ;; emit try block code
+               (let* ((v1 (compile (cadr e) break-labels value #f)) ;; emit try block code
                       (val (if (and value (not tail))
                                (new-mutable-var) #f)))
                  ;; handler block postfix
@@ -4711,26 +5023,30 @@ f(x) = yt(x)
                  (if tail
                      (begin (if els
                                 (begin (if (and (not val) v1) (emit v1))
-                                       (emit '(leave 1)))
-                                (if v1 (emit-return v1)))
+                                       (emit `(leave ,handler-token)))
+                                (if v1 (emit-return tail v1)))
                             (if (not finally) (set! endl #f)))
-                     (begin (emit '(leave 1))
+                     (begin (emit `(leave ,handler-token))
                             (emit `(goto ,(or els endl)))))
-                 (set! handler-level (- handler-level 1))
+                 (set! handler-token-stack (cdr handler-token-stack))
                  ;; emit else block
                  (if els
                      (begin (mark-label els)
                             (let ((v3 (compile (cadddr e) break-labels value tail))) ;; emit else block code
                               (if val (emit-assignment val v3)))
                             (if endl (emit `(goto ,endl)))))
-                 ;; emit either catch or finally block
+                 ;; emit either catch or finally block. A combined try/catch/finally block was split into
+                 ;; separate trycatch and tryfinally blocks earlier.
                  (mark-label catch)
-                 (emit `(leave 1))
                  (if finally
-                     (begin (enter-finally-block '(call (top rethrow)) #f) ;; enter block via exception
+                     (begin (set! finally-handler last-finally-handler)
+                            (set! catch-token-stack (cons handler-token catch-token-stack))
+                            (compile (caddr e) break-labels #f #f) ;; enter block via exception
+                            (emit '(call (top rethrow)))
+                            (emit-return tail '(null)) ; unreachable
+                            (set! catch-token-stack (cdr catch-token-stack))
                             (mark-label endl) ;; non-exceptional control flow enters here
-                            (set! finally-handler last-finally-handler)
-                            (compile (caddr e) break-labels #f #f)
+                            (compile (renumber-assigned-ssavalues (caddr e)) break-labels #f #f)
                             ;; emit actions to be taken at exit of finally
                             ;; block, depending on the tag variable `finally`
                             (let loop ((actions (caddr my-finally-handler)))
@@ -4744,14 +5060,14 @@ f(x) = yt(x)
                                           (emit `(= ,tmp (call (core ===) ,finally ,(caar actions))))
                                           (emit `(gotoifnot ,tmp ,skip))))
                                     (let ((ac (cdar actions)))
-                                      (cond ((eq? (car ac) 'return) (emit-return (cadr ac)))
+                                      (cond ((eq? (car ac) 'return) (emit-return tail (cadr ac)))
                                             ((eq? (car ac) 'break)  (emit-break (cadr ac)))
                                             (else ;; assumed to be a rethrow
                                              (emit ac))))
                                     (if skip (mark-label skip))
                                     (loop (cdr actions))))))
                      (begin (set! catch-token-stack (cons handler-token catch-token-stack))
-                            (let ((v2 (compile (caddr e) break-labels value tail)))
+                            (let ((v2 (compile catchcode break-labels value tail)))
                               (if val (emit-assignment val v2))
                               (if (not tail) (emit `(pop_exception ,handler-token)))
                                              ;; else done in emit-return from compile
@@ -4766,25 +5082,14 @@ f(x) = yt(x)
                       (has? vinfo-table (cadr e)))
                  (emit e)
                  #f))
-            ((global) ; keep global declarations as statements
-             (if value (error "misplaced \"global\" declaration"))
-             (emit e))
             ((local-def) #f)
             ((local) #f)
             ((moved-local)
              (set-car! (lam:vinfo lam) (append (car (lam:vinfo lam)) `((,(cadr e) Any 2))))
              #f)
-            ((const)
-             (if (local-in? (cadr e) lam)
-                 (error (string "unsupported `const` declaration on local variable" (format-loc current-loc)))
-                 (if (pair? (cadr lam))
-                     ;; delay this error to allow "misplaced struct" errors to happen first
-                     (if (not global-const-error)
-                         (set! global-const-error current-loc))
-                     (emit e))))
             ((atomic) (error "misplaced atomic declaration"))
-            ((isdefined) (if tail (emit-return e) e))
-            ((boundscheck) (if tail (emit-return e) e))
+            ((isdefined throw_undef_if_not) (if tail (emit-return tail e) e))
+            ((boundscheck) (if tail (emit-return tail e) e))
 
             ((method)
              (if (not (null? (cadr lam)))
@@ -4803,22 +5108,29 @@ f(x) = yt(x)
                                  (let ((l  (make-ssavalue)))
                                    (emit `(= ,l ,(compile lam break-labels #t #f)))
                                    l))))
-                   (emit `(method ,(or (cadr e) '(false)) ,sig ,lam))
-                   (if value (compile '(null) break-labels value tail)))
-                 (cond (tail  (emit-return e))
+                   (let ((val (make-ssavalue)))
+                    (emit `(= ,val (method ,(or (cadr e) '(false)) ,sig ,lam)))
+                    (if tail (emit-return tail val))
+                    val))
+                 (cond (tail  (emit-return tail e))
                        (value e)
                        (else  (emit e)))))
             ((lambda)
              (let ((temp (linearize e)))
-               (cond (tail  (emit-return temp))
+               (cond (tail  (emit-return tail temp))
                      (value temp)
                      (else  (emit temp)))))
 
             ;; top level expressions
-            ((thunk module)
+            ((thunk)
              (check-top-level e)
              (emit e)
-             (if tail (emit-return '(null)))
+             (if tail (emit-return tail '(null)))
+             '(null))
+            ((module)
+             (check-top-level e)
+             (emit e)
+             (if tail (emit-return tail '(null)))
              '(null))
             ((toplevel-only)
              (check-top-level (cdr e))
@@ -4828,16 +5140,24 @@ f(x) = yt(x)
              (check-top-level e)
              (let ((val (make-ssavalue)))
                (emit `(= ,val ,e))
-               (if tail (emit-return val))
+               (emit `(latestworld))
+               (if tail (emit-return tail val))
                val))
 
+            ((latestworld-if-toplevel)
+             (if (null? (cadr lam))
+               (emit `(latestworld)))
+             '(null))
+
             ;; other top level expressions
-            ((import using export)
+            ((export public latestworld)
              (check-top-level e)
-             (emit e)
+             (if (not (eq? (car e) 'latestworld))
+              (emit e))
+             (emit `(latestworld))
              (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return))))
                (if (and tail (not have-ret?))
-                   (emit-return '(null))))
+                   (emit-return tail '(null))))
              '(null))
 
             ((gc_preserve_begin)
@@ -4845,7 +5165,7 @@ f(x) = yt(x)
                (cons (car e) args)))
 
             ;; metadata expressions
-            ((lineinfo line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline)
+            ((lineinfo line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline purity)
              (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return))))
                (cond ((eq? (car e) 'line)
                       (set! current-loc e)
@@ -4861,15 +5181,51 @@ f(x) = yt(x)
                      (else
                       (emit e)))
                (if (and tail (not have-ret?))
-                   (emit-return '(null)))
+                   (emit-return tail '(null)))
                '(null)))
 
             ;; unsupported assignment operators
             ((≔ ⩴ ≕ :=)
              (error (string "unsupported assignment operator \"" (deparse (car e)) "\"")))
 
+            ;; Returned from statements that should error if the result is used
+            ((unused-only)
+             (assert (eq? (cadr e) 'global))
+             (when (and value (not tail))
+               (error (string "misplaced \"global\" declaration")))
+             (when tail
+               (emit-return tail '(null))))
+
+            ;; bare :escape
+            ((escape)
+             (error (string "\"esc(...)\" used outside of macro expansion")))
+
             ((error)
              (error (cadr e)))
+
+            ;; thisfunction replaced with first argument name
+            ((thisfunction)
+             (let ((first-arg (and (pair? (lam:args lam)) (car (lam:args lam)))))
+               (if first-arg
+                   (let* ((arg-name (arg-name first-arg))
+                          ;; Check for thisfunction-original metadata in keyword wrapper functions
+                          (original-name (let ((body (lam:body lam)))
+                                          (and (pair? body) (pair? (cdr body))
+                                               (let loop ((stmts (cdr body)))
+                                                 (if (pair? stmts)
+                                                     (let ((stmt (car stmts)))
+                                                       (if (and (pair? stmt) (eq? (car stmt) 'meta)
+                                                                (pair? (cdr stmt)) (eq? (cadr stmt) 'thisfunction-original)
+                                                                (pair? (cddr stmt)))
+                                                           (caddr stmt)
+                                                           (loop (cdr stmts))))
+                                                     #f)))))
+                          (final-name (or original-name arg-name)))
+                     (cond (tail  (emit-return tail final-name))
+                           (value final-name)
+                           (else (emit final-name) #f)))
+                    (error "\"@__FUNCTION__\" can only be used inside a function"))))
+
             (else
              (error (string "invalid syntax " (deparse e)))))))
     ;; introduce new slots for assigned arguments
@@ -4883,24 +5239,19 @@ f(x) = yt(x)
     (compile e '() #t #t)
     (for-each (lambda (x)
                 (let ((point (car x))
-                      (hl    (cadr x))
-                      (src-tokens (caddr x))
+                      (src-handler-tokens (cadr x))
+                      (src-catch-tokens (caddr x))
                       (lab   (cadddr x)))
                   (let ((target-nesting (get label-nesting lab #f)))
                     (if (not target-nesting)
                         (error (string "label \"" lab "\" referenced but not defined")))
-                    (let ((target-level (car target-nesting)))
-                      (cond ((> target-level hl)
-                            (error (string "cannot goto label \"" lab "\" inside try/catch block")))
-                            ((= target-level hl)
-                             (set-cdr! point (cddr point))) ;; remove empty slot
-                            (else
-                             (set-car! (cdr point) `(leave ,(- hl target-level))))))
-                    (let ((pexc (pop-exc-expr src-tokens (cadr target-nesting))))
-                      (if pexc (set-cdr! point (cons pexc (cdr point))))))))
+                    (let ((target-handler-tokens (car target-nesting))
+                          (target-catch-tokens (cadr target-nesting)))
+                      (let ((plist (pop-handler-list src-handler-tokens target-handler-tokens lab)))
+                        (if plist (set-car! (cdr point) `(leave ,@plist))))
+                      (let ((pexc (pop-exc-expr src-catch-tokens target-catch-tokens)))
+                        (if pexc (set-cdr! point (cons pexc (cdr point)))))))))
               handler-goto-fixups)
-    (if global-const-error
-        (error (string "`global const` declaration not allowed inside function" (format-loc global-const-error))))
     (let* ((stmts (reverse! code))
            (di    (definitely-initialized-vars stmts vi))
            (body  (cons 'block (filter (lambda (e)
@@ -4966,19 +5317,29 @@ f(x) = yt(x)
              (list ,@(cadr vi)) ,(caddr vi) (list ,@(cadddr vi)))
        ,@(cdddr lam))))
 
-(define (make-lineinfo name file line (inlined-at #f))
-  `(lineinfo (thismodule) ,(if inlined-at '|macro expansion| name) ,file ,line ,(or inlined-at 0)))
+(define (make-lineinfo file line (inlined-at #f))
+  `(lineinfo ,file ,line ,(or inlined-at 0)))
 
 (define (set-lineno! lineinfo num)
-  (set-car! (cddddr lineinfo) num))
-
-(define (compact-ir body name file line)
+  (set-car! (cddr lineinfo) num))
+
+;; note that the 'list and 'block atoms make all lists 1-indexed.
+;; returns a 5-element vector containing:
+;;   code:           `(block ,@(n expressions))
+;;   locs:           list of line-table index, where code[i] has lineinfo line-table[locs[i]]
+;;   line-table:     list of `(lineinfo file.jl 123 0)'
+;;   ssavalue-table: table of (ssa-num . code-index)
+;;                   where ssavalue references in `code` need this remapping
+;;   label-table:    table of (label . code-index)
+(define (compact-ir body file line)
   (let ((code         '(block))
         (locs         '(list))
         (linetable    '(list))
+        (linetablelen 0)
         (labltable    (table))
         (ssavtable    (table))
         (current-loc  0)
+        (nowhere      #f)
         (current-file file)
         (current-line line)
         (locstack     '())
@@ -4987,35 +5348,45 @@ f(x) = yt(x)
       (or e (raise "missing value in IR"))
       (if (and (null? (cdr linetable))
                (not (and (pair? e) (eq? (car e) 'meta))))
-          (begin (set! linetable (cons (make-lineinfo name file line) linetable))
+          (begin (set! linetable (cons (make-lineinfo file line) linetable))
+                 (set! linetablelen (+ linetablelen 1))
                  (set! current-loc 1)))
       (set! code (cons e code))
       (set! i (+ i 1))
-      (set! locs (cons current-loc locs)))
+      (set! locs (cons (if nowhere 0 current-loc) locs))
+      (set! nowhere #f))
     (let loop ((stmts (cdr body)))
       (if (pair? stmts)
           (let ((e (car stmts)))
             (cond ((atom? e) (emit e))
                   ((eq? (car e) 'line)
-                   (if (and (= current-line 0) (length= e 2) (pair? linetable))
-                       ;; (line n) after push_loc just updates the line for the new file
-                       (begin (set-lineno! (car linetable) (cadr e))
-                              (set! current-line (cadr e)))
-                       (begin
-                         (set! current-line (cadr e))
-                         (if (pair? (cddr e))
-                             (set! current-file (caddr e)))
-                         (set! linetable (cons (if (null? locstack)
-                                                   (make-lineinfo name current-file current-line)
-                                                   (make-lineinfo name current-file current-line (caar locstack)))
-                                               linetable))
-                         (set! current-loc (- (length linetable) 1)))))
+                   (cond ((and (length= e 2) (not (cadr e)))
+                          ;; (line #f) marks that we are entering a generated statement
+                          ;; that should not be counted as belonging to the previous marked location,
+                          ;; for example `return` after a not-executed `if` arm in tail position.
+                          (set! nowhere #t))
+                         ((and (= current-line 0) (length= e 2) (pair? linetable))
+                          ;; (line n) after push_loc just updates the line for the new file
+                          (begin (set-lineno! (car linetable) (cadr e))
+                                 (set! current-line (cadr e))))
+                         (else
+                          (begin
+                            (set! current-line (cadr e))
+                            (if (pair? (cddr e))
+                                (set! current-file (caddr e)))
+                            (set! linetable (cons (if (null? locstack)
+                                                      (make-lineinfo current-file current-line)
+                                                      (make-lineinfo current-file current-line (caar locstack)))
+                                                  linetable))
+                            (set! linetablelen (+ linetablelen 1))
+                            (set! current-loc linetablelen)))))
                   ((and (length> e 2) (eq? (car e) 'meta) (eq? (cadr e) 'push_loc))
                    (set! locstack (cons (list current-loc current-line current-file) locstack))
                    (set! current-file (caddr e))
                    (set! current-line 0)
-                   (set! linetable (cons (make-lineinfo name current-file current-line current-loc) linetable))
-                   (set! current-loc (- (length linetable) 1)))
+                   (set! linetable (cons (make-lineinfo current-file current-line current-loc) linetable))
+                   (set! linetablelen (+ linetablelen 1))
+                   (set! current-loc linetablelen))
                   ((and (length= e 2) (eq? (car e) 'meta) (eq? (cadr e) 'pop_loc))
                    (let ((l (car locstack)))
                      (set! locstack (cdr locstack))
@@ -5039,7 +5410,6 @@ f(x) = yt(x)
 
 (define (renumber-lambda lam file line)
   (let* ((stuff (compact-ir (lam:body lam)
-                            (if (null? (cadr lam)) '|top-level scope| 'none)
                             file line))
          (code (aref stuff 0))
          (locs (aref stuff 1))
@@ -5051,7 +5421,8 @@ f(x) = yt(x)
     (define slot-table (symbol-to-idx-map (map car (car (lam:vinfo lam)))))
     (define sp-table (symbol-to-idx-map (lam:sp lam)))
     (define (renumber-stuff e)
-      (cond ((symbol? e)
+      (cond ((eq? e UNUSED) (error "Attempted to use slot marked unused"))
+            ((symbol? e)
              (let ((idx (get slot-table e #f)))
                (if idx
                    `(slot ,idx)
@@ -5059,19 +5430,19 @@ f(x) = yt(x)
                      (if idx
                          `(static_parameter ,idx)
                          e)))))
-            ((and (pair? e) (eq? (car e) 'outerref))
-             (cadr e))
             ((nospecialize-meta? e)
              ;; convert nospecialize vars to slot numbers
              `(meta ,(cadr e) ,@(map renumber-stuff (cddr e))))
-            ((or (atom? e) (quoted? e) (eq? (car e) 'global))
+            ((or (atom? e) (quoted? e) (memq (car e) '(export public global toplevel)))
              e)
             ((ssavalue? e)
              (let ((idx (get ssavalue-table (cadr e) #f)))
-               (if (not idx) (begin (prn e) (prn lam) (error "ssavalue with no def")))
+               (if (not idx) (error "internal bug: ssavalue with no def"))
                `(ssavalue ,idx)))
-            ((memq (car e) '(goto enter))
-             (list* (car e) (get label-table (cadr e)) (cddr e)))
+            ((eq? (car e) 'goto)
+             `(goto ,(get label-table (cadr e))))
+            ((eq? (car e) 'enter)
+             `(enter ,(get label-table (cadr e)) ,@(map renumber-stuff (cddr e))))
             ((eq? (car e) 'gotoifnot)
              `(gotoifnot ,(renumber-stuff (cadr e)) ,(get label-table (caddr e))))
             ((eq? (car e) 'lambda)
@@ -5080,8 +5451,8 @@ f(x) = yt(x)
              (let ((e (cons (car e)
                             (map renumber-stuff (cdr e)))))
                (if (and (eq? (car e) 'foreigncall)
-                        (tuple-call? (cadr e))
-                        (expr-contains-p (lambda (x) (or (ssavalue? x) (slot? x))) (cadr e)))
+                        (tuple-syntax? (cadr e))
+                        (expr-contains-p (lambda (x) (or (ssavalue? x) (slot? x))) (cadr e))) ;; TODO: use allow-list here
                    (error "ccall function name and library expression cannot reference local variables"))
                e))))
     (let ((body (renumber-stuff (lam:body lam)))
@@ -5105,7 +5476,7 @@ f(x) = yt(x)
 
 ;; expander entry point
 
-(define (julia-expand1 ex file line)
+(define (julia-lower1 ex file line)
   (compact-and-renumber
    (linearize
     (closure-convert
@@ -5114,7 +5485,7 @@ f(x) = yt(x)
 
 (define *current-desugar-loc* #f)
 
-(define (julia-expand0 ex lno)
+(define (julia-lower0 ex lno)
   (with-bindings ((*current-desugar-loc* lno))
    (trycatch (expand-forms ex)
              (lambda (e)
@@ -5127,7 +5498,7 @@ f(x) = yt(x)
                    (error (string (cadr e) (format-loc *current-desugar-loc*))))
                    (raise e)))))
 
-(define (julia-expand ex (file 'none) (line 0))
-  (julia-expand1
-   (julia-expand0
+(define (julia-lower ex (file 'none) (line 0))
+  (julia-lower1
+   (julia-lower0
     (julia-expand-macroscope ex) `(line ,line ,file)) file line))
diff --git a/src/julia.expmap.in b/src/julia.expmap.in
index 484c83a4b16b2..5a3fbce0d1a82 100644
--- a/src/julia.expmap.in
+++ b/src/julia.expmap.in
@@ -1,43 +1,41 @@
 @JULIA_SHLIB_SYMBOL_VERSION@ {
   global:
     pthread*;
-    __stack_chk_guard;
-    asprintf;
+    __stack_chk_*;
+    asprintf*;
     bitvector_*;
     ios_*;
-    arraylist_grow;
-    small_arraylist_grow;
-    small_typeof;
+    arraylist_*;
+    small_arraylist_*;
     jl_*;
     ijl_*;
     _jl_mutex_*;
-    rec_backtrace;
+    rec_backtrace*;
     julia_*;
-    libsupport_init;
-    localtime_r;
-    memhash;
-    memhash32;
-    memhash32_seed;
-    memhash_seed;
-    restore_signals;
+    libsupport_init*;
+    localtime_r*;
+    memhash*;
+    memhash32*;
+    memhash32_seed*;
+    memhash_seed*;
+    restore_signals*;
     u8_*;
     uv_*;
-    add_library_mapping;
+    add_library_mapping*;
     utf8proc_*;
-    jlbacktrace;
-    jlbacktracet;
-    _IO_stdin_used;
-    _Z24jl_coverage_data_pointerN4llvm9StringRefEi;
-    _Z22jl_coverage_alloc_lineN4llvm9StringRefEi;
-    _Z22jl_malloc_data_pointerN4llvm9StringRefEi;
+    jlbacktrace*;
+    jlbacktracet*;
+    _IO_stdin_used*; /* glibc expects this to be exported to detect which version of glibc is being used, see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=634261#109 for further details */
+    _Z24jl_coverage_data_pointerN4llvm9StringRefEi*;
+    _Z22jl_coverage_alloc_lineN4llvm9StringRefEi*;
+    _Z22jl_malloc_data_pointerN4llvm9StringRefEi*;
     _jl_timing_*;
-    LLVMExtra*;
     JLJIT*;
-    llvmGetPassPluginInfo;
+    llvmGetPassPluginInfo*;
 
     /* freebsd */
-    environ;
-    __progname;
+    environ*;
+    __progname*;
 
   local:
     *;
diff --git a/src/julia.h b/src/julia.h
index 5af8a5bc1a170..2937ec44eb2f5 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -13,7 +13,9 @@
 #undef jl_setjmp
 #undef jl_longjmp
 #undef jl_egal
+#undef jl_genericmemory_owner
 #endif
+#include "jl_exported_data.inc"
 
 #include "julia_fasttls.h"
 #include "libsupport.h"
@@ -23,33 +25,27 @@
 #include "htable.h"
 #include "arraylist.h"
 #include "analyzer_annotations.h"
+#include "jloptions.h"
 
 #include <setjmp.h>
 #ifndef _OS_WINDOWS_
-#  define jl_jmp_buf sigjmp_buf
-#  if defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_WASM_)
-#    define MAX_ALIGN 8
-#  elif defined(_CPU_AARCH64_)
-// int128 is 16 bytes aligned on aarch64
-#    define MAX_ALIGN 16
-#  elif defined(_P64)
-// Generically we assume MAX_ALIGN is sizeof(void*)
-#    define MAX_ALIGN 8
-#  else
-#    define MAX_ALIGN 4
-#  endif
+    #define jl_jmp_buf sigjmp_buf
 #else
-#  include "win32_ucontext.h"
-#  define jl_jmp_buf jmp_buf
-#  define MAX_ALIGN 8
+    #include "win32_ucontext.h"
+    #define jl_jmp_buf jmp_buf
 #endif
 
 // Define the largest size (bytes) of a properly aligned object that the
-// processor family and compiler typically supports without a lock
-// (assumed to be at least a pointer size). Since C is bad at handling 16-byte
-// types, we currently use 8 here as the default.
+// processor family (MAX_ATOMIC_SIZE) and compiler (MAX_POINTERATOMIC_SIZE)
+// typically supports without a lock (assumed to be at least a pointer size)
+// with MAX_POINTERATOMIC_SIZE >= MAX_ATOMIC_SIZE.
+#ifdef _P64
+#define MAX_ATOMIC_SIZE 16
+#define MAX_POINTERATOMIC_SIZE 16
+#else
 #define MAX_ATOMIC_SIZE 8
 #define MAX_POINTERATOMIC_SIZE 8
+#endif
 
 #ifdef _P64
 #define NWORDS(sz) (((sz)+7)>>3)
@@ -76,22 +72,22 @@ typedef struct _jl_tls_states_t *jl_ptls_t;
 #ifdef JL_LIBRARY_EXPORTS
 #include "uv.h"
 #endif
+#include "gc-interface.h"
 #include "julia_atomics.h"
-#include "julia_threads.h"
 #include "julia_assert.h"
 
+// the common fields are hidden before the pointer, but the following macro is
+// used to indicate which types below are subtypes of jl_value_t
+#define JL_DATA_TYPE
+typedef struct _jl_value_t jl_value_t;
+#include "julia_threads.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 // core data types ------------------------------------------------------------
 
-// the common fields are hidden before the pointer, but the following macro is
-// used to indicate which types below are subtypes of jl_value_t
-#define JL_DATA_TYPE
-
-typedef struct _jl_value_t jl_value_t;
-
 struct _jl_taggedvalue_bits {
     uintptr_t gc:2;
     uintptr_t in_image:1;
@@ -120,7 +116,8 @@ JL_DLLEXPORT jl_taggedvalue_t *_jl_astaggedvalue(jl_value_t *v JL_PROPAGATES_ROO
 jl_value_t *_jl_valueof(jl_taggedvalue_t *tv JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 #define jl_valueof(v) _jl_valueof((jl_taggedvalue_t*)(v))
 JL_DLLEXPORT jl_value_t *_jl_typeof(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
-#define jl_typeof(v) _jl_typeof((jl_value_t*)(v))
+#define jl_typeof(v) (_jl_typeof((jl_value_t*)(v)))
+#define jl_typetagof(v) ((uintptr_t)_jl_typeof((jl_value_t*)(v)))
 #else
 #define jl_astaggedvalue(v)                                             \
     ((jl_taggedvalue_t*)((char*)(v) - sizeof(jl_taggedvalue_t)))
@@ -128,6 +125,8 @@ JL_DLLEXPORT jl_value_t *_jl_typeof(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFE
     ((jl_value_t*)((char*)(v) + sizeof(jl_taggedvalue_t)))
 #define jl_typeof(v)                                                    \
     jl_to_typeof(jl_typetagof(v))
+#define jl_typetagof(v)                                                 \
+    ((jl_astaggedvalue(v)->header) & ~(uintptr_t)15)
 #endif
 static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
 {
@@ -135,8 +134,6 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
     jl_taggedvalue_t *tag = jl_astaggedvalue(v);
     jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)&tag->type, (jl_value_t*)t);
 }
-#define jl_typetagof(v)                                                 \
-    ((jl_astaggedvalue(v)->header) & ~(uintptr_t)15)
 #define jl_typeis(v,t) (jl_typeof(v)==(jl_value_t*)(t))
 #define jl_typetagis(v,t) (jl_typetagof(v)==(uintptr_t)(t))
 #define jl_set_typetagof(v,t,gc) (jl_set_typeof((v), (void*)(((uintptr_t)(t) << 4) | (gc))))
@@ -167,52 +164,42 @@ typedef struct {
     // jl_value_t *data[];
 } jl_svec_t;
 
-typedef struct {
-    /*
-      how - allocation style
-      0 = data is inlined, or a foreign pointer we don't manage
-      1 = julia-allocated buffer that needs to be marked
-      2 = malloc-allocated pointer this array object manages
-      3 = has a pointer to the object that owns the data
-    */
-    uint16_t how:2;
-    uint16_t ndims:9;
-    uint16_t pooled:1;
-    uint16_t ptrarray:1; // representation is pointer array
-    uint16_t hasptr:1; // representation has embedded pointers
-    uint16_t isshared:1; // data is shared by multiple Arrays
-    uint16_t isaligned:1; // data allocated with memalign
-} jl_array_flags_t;
+JL_EXTENSION typedef struct _jl_genericmemory_t {
+    JL_DATA_TYPE
+    size_t length;
+    void *ptr;
+    // followed by padding and inline data, or owner pointer
+#ifdef _P64
+    // union {
+    //     jl_value_t *owner;
+    //     T inl[];
+    // };
+#else
+    //
+    // jl_value_t *owner;
+    // size_t padding[1];
+    // T inl[];
+#endif
+} jl_genericmemory_t;
 
 JL_EXTENSION typedef struct {
     JL_DATA_TYPE
-    void *data;
-    size_t length;
-    jl_array_flags_t flags;
-    uint16_t elsize;  // element size including alignment (dim 1 memory stride)
-    uint32_t offset;  // for 1-d only. does not need to get big.
-    size_t nrows;
-    union {
-        // 1d
-        size_t maxsize;
-        // Nd
-        size_t ncols;
-    };
-    // other dim sizes go here for ndims > 2
+    void *ptr_or_offset;
+    jl_genericmemory_t *mem;
+} jl_genericmemoryref_t;
 
-    // followed by alignment padding and inline data, or owner pointer
+JL_EXTENSION typedef struct {
+    JL_DATA_TYPE
+    jl_genericmemoryref_t ref;
+    size_t dimsize[]; // length for 1-D, otherwise length is mem->length
 } jl_array_t;
 
-// compute # of extra words needed to store dimensions
-STATIC_INLINE int jl_array_ndimwords(uint32_t ndims) JL_NOTSAFEPOINT
-{
-    return (ndims < 3 ? 0 : ndims-2);
-}
 
 typedef struct _jl_datatype_t jl_tupletype_t;
 struct _jl_code_instance_t;
 typedef struct _jl_method_instance_t jl_method_instance_t;
 typedef struct _jl_globalref_t jl_globalref_t;
+typedef struct _jl_typemap_entry_t jl_typemap_entry_t;
 
 
 // TypeMap is an implicitly defined type
@@ -246,63 +233,82 @@ JL_DLLEXPORT extern const jl_callptr_t jl_fptr_interpret_call_addr;
 
 JL_DLLEXPORT extern const jl_callptr_t jl_f_opaque_closure_call_addr;
 
-typedef struct _jl_line_info_node_t {
-    struct _jl_module_t *module;
-    jl_value_t *method; // may contain a jl_symbol, jl_method_t, or jl_method_instance_t
-    jl_sym_t *file;
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_wait_for_compiled_addr;
+
+struct jl_codeloc_t {
     int32_t line;
-    int32_t inlined_at;
-} jl_line_info_node_t;
+    int32_t to;
+    int32_t pc;
+};
+
+typedef struct _jl_debuginfo_t {
+    jl_value_t *def;
+    struct _jl_debuginfo_t *linetable; // or nothing
+    jl_svec_t *edges; // Memory{DebugInfo}
+    jl_value_t *codelocs; // String // Memory{UInt8} // compressed info
+} jl_debuginfo_t;
 
-// the following mirrors `struct EffectsOverride` in `base/compiler/effects.jl`
+// the following mirrors `struct EffectsOverride` in `base/expr.jl`
 typedef union __jl_purity_overrides_t {
     struct {
-        uint8_t ipo_consistent          : 1;
-        uint8_t ipo_effect_free         : 1;
-        uint8_t ipo_nothrow             : 1;
-        uint8_t ipo_terminates_globally : 1;
+        uint16_t ipo_consistent          : 1;
+        uint16_t ipo_effect_free         : 1;
+        uint16_t ipo_nothrow             : 1;
+        uint16_t ipo_terminates_globally : 1;
         // Weaker form of `terminates` that asserts
         // that any control flow syntactically in the method
         // is guaranteed to terminate, but does not make
         // assertions about any called functions.
-        uint8_t ipo_terminates_locally  : 1;
-        uint8_t ipo_notaskstate         : 1;
-        uint8_t ipo_inaccessiblememonly : 1;
+        uint16_t ipo_terminates_locally  : 1;
+        uint16_t ipo_notaskstate         : 1;
+        uint16_t ipo_inaccessiblememonly : 1;
+        uint16_t ipo_noub                : 1;
+        uint16_t ipo_noub_if_noinbounds  : 1;
+        uint16_t ipo_consistent_overlay  : 1;
+        uint16_t ipo_nortcall            : 1;
     } overrides;
-    uint8_t bits;
+    uint16_t bits;
 } _jl_purity_overrides_t;
 
+#define NUM_EFFECTS_OVERRIDES 11
+#define NUM_IR_FLAGS 3
+
 // This type describes a single function body
 typedef struct _jl_code_info_t {
+    JL_DATA_TYPE
     // ssavalue-indexed arrays of properties:
     jl_array_t *code;  // Any array of statements
-    jl_value_t *codelocs; // Int32 array of indices into the line table
+    jl_debuginfo_t *debuginfo; // Table of edge data for each statement
     jl_value_t *ssavaluetypes; // types of ssa values (or count of them)
-    jl_array_t *ssaflags; // flags associated with each statement:
-        // 0 = inbounds
-        // 1 = inline
-        // 2 = noinline
-        // 3 = <reserved> strict-ieee (strictfp)
-        // 4 = effect-free (may be deleted if unused)
-        // 5-6 = <unused>
-        // 7 = has out-of-band info
+    jl_array_t *ssaflags; // 32 bits flags associated with each statement:
+        // 1 << 0 = inbounds region
+        // 1 << 1 = callsite inline region
+        // 1 << 2 = callsite noinline region
+        // 1 << 3-14 = purity
+        // 1 << 16+ = reserved for inference
     // miscellaneous data:
-    jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference
-    jl_value_t *linetable; // Table of locations [TODO: make this volatile like slotnames]
     jl_array_t *slotnames; // names of local variables
     jl_array_t *slotflags;  // local var bit flags
-    // the following are optional transient properties (not preserved by compression--as they typically get stored elsewhere):
+    // the following is a deprecated property (not preserved by compression)
     jl_value_t *slottypes; // inferred types of slots
-    jl_value_t *rettype;
-    jl_method_instance_t *parent; // context (optionally, if available, otherwise nothing)
-    jl_value_t *edges; // forward edges to method instances that must be invalidated
+    // more inferred data:
+    jl_value_t *rettype; // return type relevant for fptr
+    jl_method_instance_t *parent; // context (after inference, otherwise nothing)
+    // the following are required to cache the method correctly
+    jl_value_t *edges; // forward edge info (svec preferred, but tolerates Array{Any} and nothing token)
     size_t min_world;
     size_t max_world;
+
+    // These may be used by generated functions to further constrain the resulting inputs.
+    jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference
+    size_t nargs;
+
     // various boolean properties:
-    uint8_t inferred;
     uint8_t propagate_inbounds;
     uint8_t has_fcall;
+    uint8_t has_image_globalref;
     uint8_t nospecializeinfer;
+    uint8_t isva;
     // uint8 settings
     uint8_t inlining; // 0 = default; 1 = @inline; 2 = @noinline
     uint8_t constprop; // 0 = use heuristic; 1 = aggressive; 2 = none
@@ -313,25 +319,32 @@ typedef struct _jl_code_info_t {
 
 // This type describes a single method definition, and stores data
 // shared by the specializations of a function.
+//
+// Reading or writing requires `writelock` or exclusive ownership:
+//   roots, root_blocks, nroots_sysimg, ccallable
+// No lock is required to read these fields, set once on construction:
+//   all other fields
 typedef struct _jl_method_t {
     JL_DATA_TYPE
     jl_sym_t *name;  // for error reporting
     struct _jl_module_t *module;
     jl_sym_t *file;
     int32_t line;
-    size_t primary_world;
-    size_t deleted_world;
+    _Atomic(uint8_t) dispatch_status; // bits defined in staticdata.jl
+    _Atomic(jl_genericmemory_t*) interferences; // set of intersecting methods not more specific
+    _Atomic(size_t) primary_world;
 
     // method's type signature. redundant with TypeMapEntry->specTypes
     jl_value_t *sig;
 
     // table of all jl_method_instance_t specializations we have
     _Atomic(jl_value_t*) specializations; // allocated as [hashable, ..., NULL, linear, ....], or a single item
-    _Atomic(jl_array_t*) speckeyset; // index lookup by hash into specializations
+    _Atomic(jl_genericmemory_t*) speckeyset; // index lookup by hash into specializations
 
     jl_value_t *slot_syms; // compacted list of slot names (String)
     jl_value_t *external_mt; // reference to the method table this method is part of, null if part of the internal table
     jl_value_t *source;  // original code template (jl_code_info_t, but may be compressed), null for builtins
+    jl_debuginfo_t *debuginfo;  // fixed linetable from the source argument, null if not available
     _Atomic(jl_method_instance_t*) unspecialized;  // unspecialized executable method instance, or null
     jl_value_t *generator;  // executable code-generating function if available
     jl_array_t *roots;  // pointers in generated code (shared to reduce memory), or null
@@ -361,6 +374,11 @@ typedef struct _jl_method_t {
     uint8_t isva;
     uint8_t is_for_opaque_closure;
     uint8_t nospecializeinfer;
+    // bit flags, 0x01 = scanned
+    // 0x02 = added to module scanned list (either from scanning or inference edge)
+    // 0x04 = Source was invalidated since jl_require_world
+    _Atomic(uint8_t) did_scan_source;
+
     // uint8 settings
     uint8_t constprop;      // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none
     uint8_t max_varargs;    // 0xFF = use heuristic; otherwise, max # of args to expand
@@ -371,13 +389,19 @@ typedef struct _jl_method_t {
     _jl_purity_overrides_t purity;
 
 // hidden fields:
-    // lock for modifications to the method
     jl_mutex_t writelock;
 } jl_method_t;
 
 // This type is a placeholder to cache data for a specType signature specialization of a Method
 // can can be used as a unique dictionary key representation of a call to a particular Method
 // with a particular set of argument types
+//
+// Reading or writing requires `def.method->writelock` or exclusive ownership:
+//   backedges
+// Reading or writing requires the associated jl_methcache_t's `writelock`:
+//   cache_with_orig
+// No lock is required to read these fields, set once on construction:
+//   def, specTypes, sparam_vals
 struct _jl_method_instance_t {
     JL_DATA_TYPE
     union {
@@ -386,15 +410,22 @@ struct _jl_method_instance_t {
         jl_method_t *method; // method this is specialized from
     } def; // pointer back to the context for this code
     jl_value_t *specTypes;  // argument types this was specialized for
-    jl_svec_t *sparam_vals; // static parameter values, indexed by def.method->sparam_syms
-    _Atomic(jl_value_t*) uninferred; // cached uncompressed code, for generated functions, top-level thunks, or the interpreter
-    jl_array_t *backedges; // list of method-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
-    jl_array_t *callbacks; // list of callback functions to inform external caches about invalidations
+    jl_svec_t *sparam_vals; // static parameter values, indexed by def.method->sig
+    // list of code-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
+    jl_array_t *backedges;
     _Atomic(struct _jl_code_instance_t*) cache;
-    uint8_t inInference; // flags to tell if inference is running on this object
     uint8_t cache_with_orig; // !cache_with_specTypes
-    _Atomic(uint8_t) precompiled; // true if this instance was generated by an explicit `precompile(...)` call
+
+    // flags for this method instance
+    //   bit 0: generated by an explicit `precompile(...)`
+    //   bit 1: dispatched
+    //   bit 2: The ->backedges field is currently being walked higher up the stack - entries may be deleted, but not moved
+    //   bit 3: The ->backedges field was modified and should be compacted when clearing bit 2
+    _Atomic(uint8_t) flags;
+    _Atomic(uint8_t) dispatch_status; // bits defined in staticdata.jl
 };
+#define JL_MI_FLAGS_MASK_PRECOMPILED    0x01
+#define JL_MI_FLAGS_MASK_DISPATCHED     0x02
 
 // OpaqueClosure
 typedef struct _jl_opaque_closure_t {
@@ -402,56 +433,75 @@ typedef struct _jl_opaque_closure_t {
     jl_value_t *captures;
     size_t world;
     jl_method_t *source;
-    jl_fptr_args_t invoke;
-    void *specptr;
+    jl_fptr_args_t invoke; // n.b. despite the similar name, this is not an invoke ABI (jl_call_t / julia.call2), but rather the fptr1 (jl_fptr_args_t / julia.call) ABI
+    void *specptr; // n.b. despite the similarity in field name, this is not arbitrary private data for jlcall, but rather the codegen ABI for specsig, and is mandatory if specsig is valid
 } jl_opaque_closure_t;
 
 // This type represents an executable operation
+//
+// No lock is required to read these fields, which are set while we have
+// exclusive ownership of the CodeInstance:
+//   def, owner, rettype, exctype, rettype_const, analysis_results,
+//   time_infer_total, time_infer_self
+
+// flags bits for CodeInstance
+#define JL_CI_FLAGS_SPECPTR_SPECIALIZED      0b0001
+#define JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR   0b0010
+#define JL_CI_FLAGS_FROM_IMAGE               0b0100
+#define JL_CI_FLAGS_NATIVE_CACHE_VALID       0b1000
+
 typedef struct _jl_code_instance_t {
     JL_DATA_TYPE
-    jl_method_instance_t *def; // method this is specialized from
+    jl_value_t *def; // MethodInstance or ABIOverride
+    jl_value_t *owner; // Compiler token this belongs to, `jl_nothing` is reserved for native
     _Atomic(struct _jl_code_instance_t*) next; // pointer to the next cache entry
 
     // world range for which this object is valid to use
-    size_t min_world;
-    size_t max_world;
+    _Atomic(size_t) min_world;
+    _Atomic(size_t) max_world;
 
     // inference state cache
     jl_value_t *rettype; // return type for fptr
+    jl_value_t *exctype; // thrown type for fptr
     jl_value_t *rettype_const; // inferred constant return value, or null
-    _Atomic(jl_value_t *) inferred; // inferred jl_code_info_t (may be compressed), or jl_nothing, or null
-    //TODO: jl_array_t *edges; // stored information about edges from this object
-    //TODO: uint8_t absolute_max; // whether true max world is unknown
+
+    // Inferred result. When part of the runtime cache, either
+    // - A jl_code_info_t (may be compressed as a String) containing the inferred IR
+    // - jl_nothing, indicating that inference was completed, but the result was
+    //               deleted to save space.
+    // - UInt8, indicating that inference recorded the estimated inlining cost, but deleted the result to save space
+    // - NULL, indicating that inference was not yet completed or did not succeed
+    _Atomic(jl_value_t *) inferred;
+    _Atomic(jl_debuginfo_t *) debuginfo; // stored information about edges from this object (set once, with a happens-before both source and invoke)
+    _Atomic(jl_svec_t *) edges; // forward edge info
 
     // purity results
+    jl_value_t *analysis_results; // Analysis results about this code (IPO-safe)
     // see also encode_effects() and decode_effects() in `base/compiler/effects.jl`,
-    uint32_t ipo_purity_bits;
-    // ipo_purity_flags:
-    //     uint8_t ipo_consistent          : 2;
-    //     uint8_t ipo_effect_free         : 2;
-    //     uint8_t ipo_nothrow             : 2;
-    //     uint8_t ipo_terminates          : 2;
-    //     uint8_t ipo_nonoverlayed        : 1;
-    //     uint8_t ipo_notaskstate         : 2;
-    //     uint8_t ipo_inaccessiblememonly : 2;
-    _Atomic(uint32_t) purity_bits;
+    _Atomic(uint32_t) ipo_purity_bits;
     // purity_flags:
-    //     uint8_t consistent          : 2;
+    //     uint8_t consistent          : 3;
     //     uint8_t effect_free         : 2;
-    //     uint8_t nothrow             : 2;
-    //     uint8_t terminates          : 2;
-    //     uint8_t nonoverlayed        : 1;
-    //     uint8_t notaskstate         : 2;
+    //     uint8_t nothrow             : 1;
+    //     uint8_t terminates          : 1;
+    //     uint8_t notaskstate         : 1;
     //     uint8_t inaccessiblememonly : 2;
-    jl_value_t *argescapes; // escape information of call arguments
+    //     uint8_t noub                : 2;
+    //     uint8_t nonoverlayed        : 2;
 
     // compilation state cache
-    _Atomic(uint8_t) specsigflags; // & 0b001 == specptr is a specialized function signature for specTypes->rettype
-                                   // & 0b010 == invokeptr matches specptr
-                                   // & 0b100 == From image
+    // these time fields have units of seconds (60 ns minimum resolution and 18 hour maximum saturates to Infinity) and are stored in Float16 format
+    uint16_t time_infer_total; // total cost of computing `inferred` originally
+    uint16_t time_infer_cache_saved; // adjustment to total cost, reflecting how much time was saved by having caches, to give a stable real cost without caches for comparisons
+    uint16_t time_infer_self; // self cost of julia inference for `inferred` (included in time_infer_total)
+    _Atomic(uint16_t) time_compile; // self cost of llvm compilation (e.g. of computing `invoke`)
+    //TODO: uint8_t absolute_max; // whether true max world is unknown
+    _Atomic(uint8_t) flags; // & 0b001 == specptr is a specialized function signature for specTypes->rettype
+                            // & 0b010 == invokeptr matches specptr
+                            // & 0b100 == From image
+                            // & 0b1000 == native_cache_valid
     _Atomic(uint8_t) precompile;  // if set, this will be added to the output system image
-    uint8_t relocatability;  // nonzero if all roots are built into sysimg or tagged by module key
-    _Atomic(jl_callptr_t) invoke; // jlcall entry point
+    _Atomic(jl_callptr_t) invoke; // jlcall entry point usually, but if this codeinst belongs to an OC Method, then this is an jl_fptr_args_t fptr1 instead, unless it is not, because it is a special token object instead
     union _jl_generic_specptr_t {
         _Atomic(void*) fptr;
         _Atomic(jl_fptr_args_t) fptr1;
@@ -461,22 +511,26 @@ typedef struct _jl_code_instance_t {
     } specptr; // private data for `jlcall entry point
 } jl_code_instance_t;
 
-// all values are callable as Functions
-typedef jl_value_t jl_function_t;
+// May be used as the ->def field of a CodeInstance to override the ABI
+typedef struct _jl_abi_override_t {
+    JL_DATA_TYPE
+    jl_value_t *abi;
+    jl_method_instance_t *def;
+} jl_abi_override_t;
 
 typedef struct {
     JL_DATA_TYPE
-    jl_sym_t *name;
-    jl_value_t *lb;   // lower bound
-    jl_value_t *ub;   // upper bound
+    jl_sym_t *JL_NONNULL name;
+    jl_value_t *JL_NONNULL lb;   // lower bound
+    jl_value_t *JL_NONNULL ub;   // upper bound
 } jl_tvar_t;
 
 // UnionAll type (iterated union over all values of a variable in certain bounds)
 // written `body where lb<:var<:ub`
 typedef struct {
     JL_DATA_TYPE
-    jl_tvar_t *var;
-    jl_value_t *body;
+    jl_tvar_t *JL_NONNULL var;
+    jl_value_t *JL_NONNULL body;
 } jl_unionall_t;
 
 // represents the "name" part of a DataType, describing the syntactic structure
@@ -486,6 +540,7 @@ typedef struct {
     JL_DATA_TYPE
     jl_sym_t *name;
     struct _jl_module_t *module;
+    jl_sym_t *singletonname; // sometimes used for debug printing
     jl_svec_t *names;  // field names
     const uint32_t *atomicfields; // if any fields are atomic, we record them here
     const uint32_t *constfields; // if any fields are const, we record them here
@@ -495,22 +550,24 @@ typedef struct {
     _Atomic(jl_value_t*) Typeofwrapper;  // cache for Type{wrapper}
     _Atomic(jl_svec_t*) cache;        // sorted array
     _Atomic(jl_svec_t*) linearcache;  // unsorted array
-    struct _jl_methtable_t *mt;
     jl_array_t *partial;     // incomplete instantiations of this type
     intptr_t hash;
+    _Atomic(int32_t) max_args;  // max # of non-vararg arguments in a signature with this type as the function
     int32_t n_uninitialized;
     // type properties
     uint8_t abstract:1;
     uint8_t mutabl:1;
     uint8_t mayinlinealloc:1;
-    uint8_t _reserved:5;
+    uint8_t _unused:5;
+    _Atomic(uint8_t) cache_entry_count; // (approximate counter of TypeMapEntry for heuristics)
     uint8_t max_methods; // override for inference's max_methods setting (0 = no additional limit or relaxation)
+    uint8_t constprop_heustic; // override for inference's constprop heuristic
 } jl_typename_t;
 
 typedef struct {
     JL_DATA_TYPE
-    jl_value_t *a;
-    jl_value_t *b;
+    jl_value_t *JL_NONNULL a;
+    jl_value_t *JL_NONNULL b;
 } jl_uniontype_t;
 
 // in little-endian, isptr is always the first bit, avoiding the need for a branch in computing isptr
@@ -538,9 +595,19 @@ typedef struct {
     uint32_t npointers; // number of pointers embedded inside
     int32_t first_ptr; // index of the first pointer (or -1)
     uint16_t alignment; // strictest alignment over all fields
-    uint16_t haspadding : 1; // has internal undefined bytes
-    uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32, 3 -> foreign type
-    uint16_t padding : 13;
+    struct { // combine these fields into a struct so that we can take addressof them
+        uint16_t haspadding : 1; // has internal undefined bytes
+        uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32, 3 -> foreign type
+        // metadata bit only for GenericMemory eltype layout
+        uint16_t arrayelem_isboxed : 1;
+        uint16_t arrayelem_isunion : 1;
+        uint16_t arrayelem_isatomic : 1;
+        uint16_t arrayelem_islocked : 1;
+        // If set, this type's egality can be determined entirely by comparing
+        // the non-padding bits of this datatype.
+        uint16_t isbitsegal : 1;
+        uint16_t padding : 8;
+    } flags;
     // union {
     //     jl_fielddesc8_t field8[nfields];
     //     jl_fielddesc16_t field16[nfields];
@@ -587,18 +654,169 @@ typedef struct _jl_weakref_t {
     jl_value_t *value;
 } jl_weakref_t;
 
+// N.B: Needs to be synced with runtime_internals.jl
+// We track essentially three levels of binding strength:
+//
+// 1. Implicit Bindings (Weakest)
+//   These binding kinds depend solely on the set of using'd packages and are not explicitly
+//   declared:
+//
+//      PARTITION_KIND_IMPLICIT_CONST
+//      PARTITION_KIND_IMPLICIT_GLOBAL
+//      PARTITION_KIND_GUARD
+//      PARTITION_KIND_FAILED
+//
+// 2. Weakly Declared Bindings (Weak)
+//    The binding was declared using `global`. It is treated as a mutable, `Any` type global
+//    for almost all purposes, except that it receives slightly worse optimizations, since it
+//    may be replaced.
+//
+//      PARTITION_KIND_DECLARED
+//
+// 3. Strong Declared Bindings (Weak)
+//    All other bindings are explicitly declared using a keyword or global assignment.
+//   These are considered strongest:
+//
+//      PARTITION_KIND_CONST
+//      PARTITION_KIND_CONST_IMPORT
+//      PARTITION_KIND_EXPLICIT
+//      PARTITION_KIND_IMPORTED
+//      PARTITION_KIND_GLOBAL
+//      PARTITION_KIND_UNDEF_CONST
+//
+// The runtime supports syntactic invalidation (by raising the world age and changing the partition type
+// in the new world age) from any partition kind to any other.
+//
+// However, not all transitions are allowed syntactically. We have the following rules for SYNTACTIC invalidation:
+// 1. It is always syntactically permissable to replace a weaker binding by a stronger binding
+// 2. Implicit bindings can be syntactically changed to other implicit bindings by changing the `using` set.
+// 3. Finally, we syntactically permit replacing one PARTITION_KIND_CONST(_IMPORT) by another of a different value.
+//
+// We may make this list more permissive in the future.
+//
+// Finally, PARTITION_KIND_BACKDATED_CONST is a special case, and the only case where we may replace an
+// existing partition by a different partition kind in the same world age. As such, it needs special
+// support in inference. Any partition kind that may be replaced by a PARTITION_KIND_BACKDATED_CONST
+// must be inferred accordingly. PARTITION_KIND_BACKDATED_CONST is intended as a temporary compatibility
+// measure. The following kinds may be replaced by PARTITION_KIND_BACKDATED_CONST:
+//  - PARTITION_KIND_GUARD
+//  - PARTITION_KIND_FAILED
+//  - PARTITION_KIND_DECLARED
+enum jl_partition_kind {
+    // Constant: This binding partition is a constant declared using `const _ = ...`
+    //  ->restriction holds the constant value
+    PARTITION_KIND_CONST        = 0x0,
+    // Import Constant: This binding partition is a constant declared using `import A`
+    //  ->restriction holds the constant value
+    PARTITION_KIND_CONST_IMPORT = 0x1,
+    // Global: This binding partition is a global variable. It was declared either using
+    // `global x::T` to implicitly through a syntactic global assignment.
+    //  -> restriction holds the type restriction
+    PARTITION_KIND_GLOBAL       = 0x2,
+    // Implicit: The binding was a global, implicitly imported from a `using`'d module.
+    //  ->restriction holds the ultimately imported global binding
+    PARTITION_KIND_IMPLICIT_GLOBAL     = 0x3,
+    // Implicit: The binding was a constant, implicitly imported from a `using`'d module.
+    //  ->restriction holds the ultimately imported constant value
+    PARTITION_KIND_IMPLICIT_CONST     = 0x4,
+    // Explicit: The binding was explicitly `using`'d by name
+    //  ->restriction holds the imported binding
+    PARTITION_KIND_EXPLICIT     = 0x5,
+    // Imported: The binding was explicitly `import`'d by name
+    //  ->restriction holds the imported binding
+    PARTITION_KIND_IMPORTED     = 0x6,
+    // Failed: We attempted to import the binding, but the import was ambiguous
+    //  ->restriction is NULL.
+    PARTITION_KIND_FAILED       = 0x7,
+    // Declared: The binding was declared using `global` or similar. This acts in most ways like
+    // PARTITION_KIND_GLOBAL with an `Any` restriction, except that it may be redefined to a stronger
+    // binding like `const` or an explicit import.
+    //  ->restriction is NULL.
+    PARTITION_KIND_DECLARED     = 0x8,
+    // Guard: The binding was looked at, but no global or import was resolved at the time
+    //  ->restriction is NULL.
+    PARTITION_KIND_GUARD        = 0x9,
+    // Undef Constant: This binding partition is a constant declared using `const`, but
+    // without a value.
+    //  ->restriction is NULL
+    PARTITION_KIND_UNDEF_CONST  = 0xa,
+    // Backated constant. A constant that was backdated for compatibility. In all other
+    // ways equivalent to PARTITION_KIND_CONST, but prints a warning on access
+    PARTITION_KIND_BACKDATED_CONST = 0xb,
+
+    // This is not a real binding kind, but can be used to ask for a re-resolution
+    // of the implicit binding kind
+    PARTITION_FAKE_KIND_IMPLICIT_RECOMPUTE = 0xc,
+    PARTITION_FAKE_KIND_CYCLE = 0xd
+};
+
+static const uint8_t PARTITION_MASK_KIND = 0x0f;
+static const uint8_t PARTITION_MASK_FLAG = 0xf0;
+
+//// These are flags that get anded into the above
+//
+// _EXPORTED: This binding partition is exported. In the world ranges covered by this partitions,
+// other modules that `using` this module, may implicit import this binding.
+static const uint8_t PARTITION_FLAG_EXPORTED       = 0x10;
+// _DEPRECATED: This binding partition is deprecated. It is considered weak for the purposes of
+// implicit import resolution.
+static const uint8_t PARTITION_FLAG_DEPRECATED     = 0x20;
+// _DEPWARN: This binding partition will print a deprecation warning on access. Note that _DEPWARN
+// implies _DEPRECATED. However, the reverse is not true. Such bindings are usually used for functions,
+// where calling the function itself will provide a (better) deprecation warning/error.
+static const uint8_t PARTITION_FLAG_DEPWARN        = 0x40;
+// _IMPLICITLY_EXPORTED: This binding partition is implicitly exported via @reexport. Unlike _EXPORTED,
+// this flag is set during implicit resolution and can be removed if the resolution changes.
+static const uint8_t PARTITION_FLAG_IMPLICITLY_EXPORTED = 0x80;
+
+#if defined(_COMPILER_MICROSOFT_)
+#define JL_ALIGNED_ATTR(alignment) \
+    __declspec(align(alignment))
+#else
+#define JL_ALIGNED_ATTR(alignment) \
+    __attribute__((aligned(alignment)))
+#endif
+
+typedef struct JL_ALIGNED_ATTR(8) _jl_binding_partition_t {
+    JL_DATA_TYPE
+    /* union {
+     *   // For ->kind == PARTITION_KIND_GLOBAL
+     *   jl_value_t *type_restriction;
+     *   // For ->kind in (PARTITION_KIND_CONST(_IMPORT), PARTITION_KIND_IMPLICIT_CONST)
+     *   jl_value_t *constval;
+     *   // For ->kind in (PARTITION_KIND_IMPLICIT_GLOBAL, PARTITION_KIND_EXPLICIT, PARTITION_KIND_IMPORT)
+     *   jl_binding_t *imported;
+     * } restriction;
+     */
+    jl_value_t *restriction;
+    _Atomic(size_t) min_world;
+    _Atomic(size_t) max_world;
+    _Atomic(struct _jl_binding_partition_t *) next;
+    size_t kind;
+} jl_binding_partition_t;
+
+STATIC_INLINE enum jl_partition_kind jl_binding_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT
+{
+    return (enum jl_partition_kind)(bpart->kind & 0xf);
+}
+
+enum jl_binding_flags {
+    BINDING_FLAG_DID_PRINT_BACKDATE_ADMONITION        = 0x1,
+    BINDING_FLAG_DID_PRINT_IMPLICIT_IMPORT_ADMONITION = 0x2,
+    // `export` is tracked in partitions, but sets this as well
+    BINDING_FLAG_PUBLICP                              = 0x4,
+    // Set if any methods defined in this module implicitly reference
+    // this binding. If not, invalidation is optimized.
+    BINDING_FLAG_ANY_IMPLICIT_EDGES                   = 0x8
+};
+
 typedef struct _jl_binding_t {
     JL_DATA_TYPE
-    _Atomic(jl_value_t*) value;
     jl_globalref_t *globalref;  // cached GlobalRef for this binding
-    _Atomic(struct _jl_binding_t*) owner;  // for individual imported bindings (NULL until 'resolved')
-    _Atomic(jl_value_t*) ty;  // binding type
-    uint8_t constp:1;
-    uint8_t exportp:1;
-    uint8_t imported:1;
-    uint8_t usingfailed:1;
-    uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
-    uint8_t padding:2;
+    _Atomic(jl_value_t*) value;
+    _Atomic(jl_binding_partition_t*) partitions;
+    jl_array_t *backedges;
+    _Atomic(uint8_t) flags;
 } jl_binding_t;
 
 typedef struct {
@@ -606,17 +824,29 @@ typedef struct {
     uint64_t lo;
 } jl_uuid_t;
 
+// Reading or writing requires `lock`:
+//   scanned_methods, usings
+// Reading or writing requires `Base.require_lock`:
+//   uuid
+// Reading or writing requires `world_counter_lock`:
+//   usings_backedges (TODO)
+// No lock is required to read these fields, set once on construction:
+//   name, parent, file, line, build_id, uuid, nospecialize, optlevel, compile,
+//   infer, iistopmod, max_methods
 typedef struct _jl_module_t {
     JL_DATA_TYPE
     jl_sym_t *name;
     struct _jl_module_t *parent;
     _Atomic(jl_svec_t*) bindings;
-    _Atomic(jl_array_t*) bindingkeyset; // index lookup by name into bindings
+    _Atomic(jl_genericmemory_t*) bindingkeyset; // index lookup by name into bindings
+    jl_sym_t *file;
+    int32_t line;
+    jl_value_t *usings_backedges;
+    jl_value_t *scanned_methods;
     // hidden fields:
-    arraylist_t usings;  // modules with all bindings potentially imported
+    arraylist_t usings; /* arraylist of struct jl_module_using */  // modules with all bindings potentially imported
     jl_uuid_t build_id;
     jl_uuid_t uuid;
-    size_t primary_world;
     _Atomic(uint32_t) counter;
     int32_t nospecialize;  // global bit flags: initialization for new methods
     int8_t optlevel;
@@ -624,25 +854,40 @@ typedef struct _jl_module_t {
     int8_t infer;
     uint8_t istopmod;
     int8_t max_methods;
+    // If cleared no binding partition in this module has PARTITION_FLAG_EXPORTED and min_world > jl_require_world.
+    _Atomic(int8_t) export_set_changed_since_require_world;
+    // Set if this module has any reexport usings (used to bypass fast-path in implicit resolution)
+    _Atomic(int8_t) has_reexports;
     jl_mutex_t lock;
     intptr_t hash;
 } jl_module_t;
 
-typedef struct _jl_globalref_t {
+struct _jl_module_using {
+    jl_module_t *mod;
+    size_t min_world;
+    size_t max_world;
+    size_t flags;
+};
+
+// Flags for _jl_module_using.flags
+static const uint8_t JL_MODULE_USING_REEXPORT = 0x1;
+
+struct _jl_globalref_t {
+    JL_DATA_TYPE
     jl_module_t *mod;
     jl_sym_t *name;
     jl_binding_t *binding;
-} jl_globalref_t;
+};
 
 // one Type-to-Value entry
-typedef struct _jl_typemap_entry_t {
+struct _jl_typemap_entry_t {
     JL_DATA_TYPE
     _Atomic(struct _jl_typemap_entry_t*) next; // invasive linked list
     jl_tupletype_t *sig; // the type signature for this entry
     jl_tupletype_t *simplesig; // a simple signature for fast rejection
     jl_svec_t *guardsigs;
-    size_t min_world;
-    size_t max_world;
+    _Atomic(size_t) min_world;
+    _Atomic(size_t) max_world;
     union {
         jl_value_t *value; // generic accessor
         jl_method_instance_t *linfo; // [nullable] for guard entries
@@ -652,7 +897,7 @@ typedef struct _jl_typemap_entry_t {
     int8_t isleafsig; // isleaftype(sig) & !any(isType, sig) : unsorted and very fast
     int8_t issimplesig; // all(isleaftype | isAny | isType | isVararg, sig) : sorted and fast
     int8_t va; // isVararg(sig)
-} jl_typemap_entry_t;
+};
 
 // one level in a TypeMap tree (each level splits on a type at a given offset)
 typedef struct _jl_typemap_level_t {
@@ -662,29 +907,37 @@ typedef struct _jl_typemap_level_t {
     // next split may be on Type{T} as LeafTypes then TypeName's parents up to Any
     // next split may be on LeafType
     // next split may be on TypeName
-    _Atomic(jl_array_t*) arg1; // contains LeafType (in a map of non-abstract TypeName)
-    _Atomic(jl_array_t*) targ; // contains Type{LeafType} (in a map of non-abstract TypeName)
-    _Atomic(jl_array_t*) name1; // a map for a map for TypeName, for parents up to (excluding) Any
-    _Atomic(jl_array_t*) tname; // a map for Type{TypeName}, for parents up to (including) Any
+    _Atomic(jl_genericmemory_t*) arg1; // contains LeafType (in a map of non-abstract TypeName)
+    _Atomic(jl_genericmemory_t*) targ; // contains Type{LeafType} (in a map of non-abstract TypeName)
+    _Atomic(jl_genericmemory_t*) name1; // a map for a map for TypeName, for parents up to (excluding) Any
+    _Atomic(jl_genericmemory_t*) tname; // a map for Type{TypeName}, for parents up to (including) Any
     // next a linear list of things too complicated at this level for analysis (no more levels)
     _Atomic(jl_typemap_entry_t*) linear;
     // finally, start a new level if the type at offs is Any
     _Atomic(jl_typemap_t*) any;
 } jl_typemap_level_t;
 
-// contains the TypeMap for one Type
+typedef struct _jl_methcache_t {
+    JL_DATA_TYPE
+    // hash map from dispatchtuple type to a linked-list of TypeMapEntry
+    // entry.sig == type for all entries in the linked-list
+    _Atomic(jl_genericmemory_t*) leafcache;
+
+    // cache for querying everything else (anything that didn't seem profitable to put into leafcache)
+    _Atomic(jl_typemap_t*) cache;
+
+    jl_mutex_t writelock;
+} jl_methcache_t;
+
+// contains global MethodTable
 typedef struct _jl_methtable_t {
     JL_DATA_TYPE
-    jl_sym_t *name; // sometimes used for debug printing
+    // full set of entries
     _Atomic(jl_typemap_t*) defs;
-    _Atomic(jl_array_t*) leafcache;
-    _Atomic(jl_typemap_t*) cache;
-    _Atomic(intptr_t) max_args;  // max # of non-vararg arguments in a signature
+    jl_methcache_t *cache;
+    jl_sym_t *name; // sometimes used for debug printing
     jl_module_t *module; // sometimes used for debug printing
-    jl_array_t *backedges; // (sig, caller::MethodInstance) pairs
-    jl_mutex_t writelock;
-    uint8_t offs;  // 0, or 1 to skip splitting typemap on first (function) argument
-    uint8_t frozen; // whether this accepts adding new methods
+    jl_genericmemory_t *backedges; // IdDict{top typenames, Vector{uncovered (sig => caller::CodeInstance)}}
 } jl_methtable_t;
 
 typedef struct {
@@ -722,10 +975,12 @@ typedef struct {
     XX(task) \
     /* bits types with special allocators */ \
     XX(bool) \
+    XX(nothing) \
     XX(char) \
     /*XX(float16)*/ \
     /*XX(float32)*/ \
     /*XX(float64)*/ \
+    /*XX(bfloat16)*/ \
     XX(int16) \
     XX(int32) \
     XX(int64) \
@@ -734,13 +989,15 @@ typedef struct {
     XX(uint32) \
     XX(uint64) \
     XX(uint8) \
+    XX(addrspacecore) \
+    XX(intrinsic) \
     /* AST objects */ \
     /* XX(argument) */ \
     /* XX(newvarnode) */ \
-    /* XX(slotnumber) */ \
-    /* XX(ssavalue) */ \
+    XX(slotnumber) \
+    XX(ssavalue) \
     /* end of JL_SMALL_TYPEOF */
-enum jlsmall_typeof_tags {
+enum jl_small_typeof_tags {
     jl_null_tag = 0,
 #define XX(name) jl_##name##_tag,
     JL_SMALL_TYPEOF(XX)
@@ -749,139 +1006,58 @@ enum jlsmall_typeof_tags {
     jl_bitstags_first = jl_char_tag, // n.b. bool is not considered a bitstype, since it can be compared by pointer
     jl_max_tags = 64
 };
-extern jl_datatype_t *small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
+extern JL_DLLIMPORT jl_datatype_t *jl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
+#ifndef JL_LIBRARY_EXPORTS_INTERNAL
+static inline jl_value_t *jl_to_typeof(uintptr_t t)
+{
+    if (t < (jl_max_tags << 4))
+        return (jl_value_t*)jl_small_typeof[t / sizeof(*jl_small_typeof)];
+    return (jl_value_t*)t;
+}
+#else
+extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
 static inline jl_value_t *jl_to_typeof(uintptr_t t)
 {
     if (t < (jl_max_tags << 4))
-        return (jl_value_t*)small_typeof[t / sizeof(*small_typeof)];
+        return (jl_value_t*)ijl_small_typeof[t / sizeof(*ijl_small_typeof)];
     return (jl_value_t*)t;
 }
+#endif
 
 
-// kinds
-extern JL_DLLIMPORT jl_datatype_t *jl_typeofbottom_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_datatype_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_uniontype_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_unionall_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_tvar_type JL_GLOBALLY_ROOTED;
-
-extern JL_DLLIMPORT jl_datatype_t *jl_any_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_unionall_t *jl_type_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_typename_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_typename_t *jl_type_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_symbol_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_ssavalue_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_slotnumber_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_argument_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_const_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_partial_struct_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_partial_opaque_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_interconditional_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_method_match_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_simplevector_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_typename_t *jl_tuple_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_typename_t *jl_vecelement_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_anytuple_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_emptytuple_type JL_GLOBALLY_ROOTED;
 #define jl_tuple_type jl_anytuple_type
-extern JL_DLLIMPORT jl_unionall_t *jl_anytuple_type_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_vararg_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_function_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_builtin_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_unionall_t *jl_opaque_closure_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_typename_t *jl_opaque_closure_typename JL_GLOBALLY_ROOTED;
-
-extern JL_DLLIMPORT jl_value_t *jl_bottom_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_method_instance_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_code_instance_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_code_info_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_method_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_module_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_unionall_t *jl_abstractarray_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_unionall_t *jl_densearray_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_unionall_t *jl_array_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_typename_t *jl_array_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_weakref_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_abstractstring_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_string_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_errorexception_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_argumenterror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_loaderror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_initerror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_typeerror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_methoderror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_undefvarerror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_atomicerror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_lineinfonode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_stackovf_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_memory_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_readonlymemory_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_diverror_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_undefref_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_interrupt_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_boundserror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_an_empty_vec_any JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_an_empty_string JL_GLOBALLY_ROOTED;
-
-extern JL_DLLIMPORT jl_datatype_t *jl_bool_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_char_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_int8_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_uint8_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_int16_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_uint16_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_int32_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_uint32_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_int64_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_uint64_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_float16_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_float32_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_float64_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_floatingpoint_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_number_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_void_type JL_GLOBALLY_ROOTED;  // deprecated
-extern JL_DLLIMPORT jl_datatype_t *jl_nothing_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_signed_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_voidpointer_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_uint8pointer_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_unionall_t *jl_pointer_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_unionall_t *jl_llvmpointer_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_unionall_t *jl_ref_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_typename_t *jl_pointer_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_typename_t *jl_llvmpointer_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_typename_t *jl_namedtuple_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_unionall_t *jl_namedtuple_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_task_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_pair_type JL_GLOBALLY_ROOTED;
-
-extern JL_DLLIMPORT jl_value_t *jl_array_uint8_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_array_any_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_array_symbol_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_array_int32_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_array_uint64_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_expr_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_binding_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_globalref_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_linenumbernode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_gotonode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_gotoifnot_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_returnnode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_phinode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_pinode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_phicnode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_upsilonnode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_quotenode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_newvarnode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_intrinsic_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_methtable_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_typemap_level_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_typemap_entry_type JL_GLOBALLY_ROOTED;
-
-extern JL_DLLIMPORT jl_svec_t *jl_emptysvec JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_emptytuple JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_true JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_false JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_nothing JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_value_t *jl_kwcall_func JL_GLOBALLY_ROOTED;
+
+#if !defined(JL_LIBRARY_EXPORTS_INTERNAL) || defined(__clang_analyzer__)
+#define XX(name, type) extern JL_DLLIMPORT type jl_##name JL_GLOBALLY_ROOTED;
+JL_EXPORTED_DATA_POINTERS(XX)
+#undef XX
+
+#define XX(name, type) extern JL_DLLIMPORT type jl_##name JL_GLOBALLY_ROOTED;
+JL_CONST_GLOBAL_VARS(XX)
+#undef XX
+
+#else
+// Struct definitions for global data access (internal hidden copies)
+struct jl_sysimg_global {
+#define XX(name, type) type name JL_GLOBALLY_ROOTED;
+    JL_EXPORTED_DATA_POINTERS(XX)
+#undef XX
+};
+
+struct jl_const_globals {
+#define XX(name, type) type jl##name JL_GLOBALLY_ROOTED;
+    JL_CONST_GLOBAL_VARS(XX)
+#undef XX
+};
+
+extern JL_HIDDEN struct jl_sysimg_global sysimg_global;
+extern JL_HIDDEN struct jl_const_globals const_globals;
+#include <jl_data_globals_defs.inc>
+#endif
+
+#define XX(name, type) extern JL_DLLIMPORT type name;
+JL_EXPORTED_DATA_SYMBOLS(XX)
+#undef XX
 
 // gc -------------------------------------------------------------------------
 
@@ -904,6 +1080,7 @@ struct _jl_gcframe_t {
 
 #define JL_GC_ENCODE_PUSHARGS(n)   (((size_t)(n))<<2)
 #define JL_GC_ENCODE_PUSH(n)       ((((size_t)(n))<<2)|1)
+#define JL_GC_DECODE_NROOTS(n)     (n >> 2)
 
 #ifdef __clang_gcanalyzer__
 
@@ -914,8 +1091,10 @@ extern void JL_GC_PUSH2(void *, void *) JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH3(void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH4(void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH5(void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
+extern void JL_GC_PUSH6(void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH7(void *, void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH8(void *, void *, void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
+extern void JL_GC_PUSH9(void *, void *, void *, void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void _JL_GC_PUSHARGS(jl_value_t **, size_t) JL_NOTSAFEPOINT;
 // This is necessary, because otherwise the analyzer considers this undefined
 // behavior and terminates the exploration
@@ -955,10 +1134,15 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT;
 #define JL_GC_PUSH7(arg1, arg2, arg3, arg4, arg5, arg6, arg7)                                           \
   void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(7), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7}; \
   jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
+
 #define JL_GC_PUSH8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8)                                     \
   void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(8), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8}; \
   jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
 
+#define JL_GC_PUSH9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9)                               \
+  void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(9), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9}; \
+  jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
+
 
 #define JL_GC_PUSHARGS(rts_var,n)                                                                       \
   rts_var = ((jl_value_t**)alloca(((n)+2)*sizeof(jl_value_t*)))+2;                                      \
@@ -971,73 +1155,28 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT;
 
 #endif
 
-JL_DLLEXPORT int jl_gc_enable(int on);
-JL_DLLEXPORT int jl_gc_is_enabled(void);
-
-typedef enum {
-    JL_GC_AUTO = 0,         // use heuristics to determine the collection type
-    JL_GC_FULL = 1,         // force a full collection
-    JL_GC_INCREMENTAL = 2,  // force an incremental collection
-} jl_gc_collection_t;
-
-JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t);
-
-JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_value_t *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_finalize(jl_value_t *o);
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz);
 JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, struct _jl_task_t *owner) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz);
-JL_DLLEXPORT void jl_gc_use(jl_value_t *a);
-// Set GC memory trigger in bytes for greedy memory collecting
-JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
-JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
-
-JL_DLLEXPORT void jl_clear_malloc_data(void);
-
-// GC write barriers
-JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *root) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const jl_value_t *stored) JL_NOTSAFEPOINT;
-
-STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
-{
-    // parent and ptr isa jl_value_t*
-    if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && // parent is old and not in remset
-                   (jl_astaggedvalue(ptr)->bits.gc & 1) == 0)) // ptr is young
-        jl_gc_queue_root((jl_value_t*)parent);
-}
-
-STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
-{
-    // if ptr is old
-    if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3)) {
-        jl_gc_queue_root((jl_value_t*)ptr);
-    }
-}
 
-STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
-{
-    // ptr is an immutable object
-    if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
-        return; // parent is young or in remset
-    if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3))
-        return; // ptr is old and not in remset (thus it does not point to young)
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
-    const jl_datatype_layout_t *ly = dt->layout;
-    if (ly->npointers)
-        jl_gc_queue_multiroot((jl_value_t*)parent, ptr);
-}
+// Allocates a new weak-reference, assigns its value and increments Julia allocation
+// counters. If thread-local allocators are used, then this function should allocate in the
+// thread-local allocator of the current thread.
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
 
-JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
-JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
-                                         int isaligned, jl_value_t *owner);
 JL_DLLEXPORT void jl_gc_safepoint(void);
+JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate);
+JL_DLLEXPORT void jl_safepoint_suspend_all_threads(struct _jl_task_t *ct);
+JL_DLLEXPORT void jl_safepoint_resume_all_threads(struct _jl_task_t *ct);
+JL_DLLEXPORT int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT;
+
+void *mtarraylist_get(small_arraylist_t *_a, size_t idx) JL_NOTSAFEPOINT;
+size_t mtarraylist_length(small_arraylist_t *_a) JL_NOTSAFEPOINT;
+void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx) JL_NOTSAFEPOINT;
+void mtarraylist_push(small_arraylist_t *_a, void *elt) JL_NOTSAFEPOINT;
 
 // object accessors -----------------------------------------------------------
 
@@ -1074,16 +1213,115 @@ STATIC_INLINE jl_value_t *jl_svecset(
 }
 #endif
 
-#define jl_array_len(a)   (((jl_array_t*)(a))->length)
-#define jl_array_data(a)  ((void*)((jl_array_t*)(a))->data)
-#define jl_array_dim(a,i) ((&((jl_array_t*)(a))->nrows)[i])
-#define jl_array_dim0(a)  (((jl_array_t*)(a))->nrows)
-#define jl_array_nrows(a) (((jl_array_t*)(a))->nrows)
-#define jl_array_ndims(a) ((int32_t)(((jl_array_t*)a)->flags.ndims))
-#define jl_array_data_owner_offset(ndims) (offsetof(jl_array_t,ncols) + sizeof(size_t)*(1+jl_array_ndimwords(ndims))) // in bytes
-#define jl_array_data_owner(a) (*((jl_value_t**)((char*)a + jl_array_data_owner_offset(jl_array_ndims(a)))))
+#define jl_genericmemory_data_owner_field(a) (*(jl_value_t**)((jl_genericmemory_t*)(a) + 1))
+
+#define jl_nparams(t)  jl_svec_len(((jl_datatype_t*)(t))->parameters)
+#define jl_tparam0(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 0)
+#define jl_tparam1(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 1)
+#define jl_tparam2(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 2)
+#define jl_tparam(t,i) jl_svecref(((jl_datatype_t*)(t))->parameters, i)
+#define jl_array_data(a,t) ((t*)((jl_array_t*)(a))->ref.ptr_or_offset)
+#define jl_array_data_(a) ((void*)((jl_array_t*)(a))->ref.ptr_or_offset)
+#define jl_array_dim(a,i) (((jl_array_t*)(a))->dimsize[i])
+#define jl_array_dim0(a)  (((jl_array_t*)(a))->dimsize[0])
+#define jl_array_nrows(a) (((jl_array_t*)(a))->dimsize[0])
+#define jl_array_ndims(a) (*(size_t*)jl_tparam1(jl_typetagof(a)))
+#define jl_array_maxsize(a) (((jl_array_t*)(a))->ref.mem->length)
+#define jl_array_len(a)   (jl_array_ndims(a) == 1 ? jl_array_nrows(a) : jl_array_maxsize(a))
+
+JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
+#define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack))
+
+STATIC_INLINE jl_value_t *jl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+
+// write barriers
+
+#ifndef WITH_THIRD_PARTY_HEAP
+#include "gc-wb-stock.h"
+#else
+// Pick the appropriate third-party implementation
+#ifdef WITH_THIRD_PARTY_HEAP
+#if WITH_THIRD_PARTY_HEAP == 1 // MMTk
+#include "gc-wb-mmtk.h"
+#endif
+#endif
+#endif
+
+/*
+  how - allocation style
+  0 = data is inlined
+  1 = owns the gc-managed data, exclusively (will free it)
+  2 = malloc-allocated pointer (does not own it)
+  3 = has a pointer to the String object that owns the data pointer (m must be isbits)
+*/
+STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    if (m->ptr == (void*)((char*)m + 16)) // JL_SMALL_BYTE_ALIGNMENT (from julia_internal.h)
+        return 0;
+    jl_value_t *owner = jl_genericmemory_data_owner_field(m);
+    if (owner == (jl_value_t*)m)
+        return 1;
+    if (owner == NULL)
+        return 2;
+    return 3;
+}
+
+STATIC_INLINE jl_value_t *jl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return (jl_value_t*)m;
+}
+
+JL_DLLEXPORT char *jl_genericmemory_typetagdata(jl_genericmemory_t *m) JL_NOTSAFEPOINT;
+
+#ifdef __clang_gcanalyzer__
+jl_value_t **jl_genericmemory_ptr_data(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_ref(void *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set(
+    void *m JL_ROOTING_ARGUMENT, size_t i,
+    void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT;
+#else
+#define jl_genericmemory_ptr_data(a)  ((jl_value_t**)((jl_genericmemory_t*)(a))->ptr)
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_ref(void *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(((jl_datatype_t*)jl_typetagof(m_))->layout->flags.arrayelem_isboxed);
+    assert(i < m_->length);
+    return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)(m_->ptr)) + i);
+}
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set(
+    void *m JL_ROOTING_ARGUMENT, size_t i,
+    void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(((jl_datatype_t*)jl_typetagof(m_))->layout->flags.arrayelem_isboxed);
+    assert(i < m_->length);
+    jl_atomic_store_release(((_Atomic(jl_value_t*)*)(m_->ptr)) + i, (jl_value_t*)x);
+    if (x) {
+        jl_gc_wb(m, x);
+    }
+    return (jl_value_t*)x;
+}
+#endif
+
+STATIC_INLINE uint8_t jl_memory_uint8_ref(void *m, size_t i) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(jl_typetagis(m_, jl_memory_uint8_type));
+    assert(i < m_->length);
+    return ((uint8_t*)m_->ptr)[i];
+}
+STATIC_INLINE void jl_memory_uint8_set(void *m, size_t i, uint8_t x) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(jl_typetagis(m_, jl_memory_uint8_type));
+    assert(i < m_->length);
+    ((uint8_t*)m_->ptr)[i] = x;
+}
 
-JL_DLLEXPORT char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_value_t *jl_array_owner(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return jl_genericmemory_owner(a->ref.mem);
+}
 
 #ifdef __clang_gcanalyzer__
 jl_value_t **jl_array_ptr_data(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
@@ -1092,25 +1330,22 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set(
     void *a JL_ROOTING_ARGUMENT, size_t i,
     void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT;
 #else
-#define jl_array_ptr_data(a)  ((jl_value_t**)((jl_array_t*)(a))->data)
+#define jl_array_ptr_data(a) (jl_array_data(a, jl_value_t*))
 STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
 {
-    assert(((jl_array_t*)a)->flags.ptrarray);
+    assert(((jl_datatype_t*)jl_typetagof(((jl_array_t*)a)->ref.mem))->layout->flags.arrayelem_isboxed);
     assert(i < jl_array_len(a));
-    return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i);
+    return jl_atomic_load_relaxed(jl_array_data(a, _Atomic(jl_value_t*)) + i);
 }
 STATIC_INLINE jl_value_t *jl_array_ptr_set(
     void *a JL_ROOTING_ARGUMENT, size_t i,
     void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT
 {
-    assert(((jl_array_t*)a)->flags.ptrarray);
+    assert(((jl_datatype_t*)jl_typetagof(((jl_array_t*)a)->ref.mem))->layout->flags.arrayelem_isboxed);
     assert(i < jl_array_len(a));
-    jl_atomic_store_release(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i, (jl_value_t*)x);
+    jl_atomic_store_release(jl_array_data(a, _Atomic(jl_value_t*)) + i, (jl_value_t*)x);
     if (x) {
-        if (((jl_array_t*)a)->flags.how == 3) {
-            a = jl_array_data_owner(a);
-        }
-        jl_gc_wb(a, x);
+        jl_gc_wb(jl_array_owner((jl_array_t*)a), x);
     }
     return (jl_value_t*)x;
 }
@@ -1118,20 +1353,26 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set(
 
 STATIC_INLINE uint8_t jl_array_uint8_ref(void *a, size_t i) JL_NOTSAFEPOINT
 {
-    assert(i < jl_array_len(a));
     assert(jl_typetagis(a, jl_array_uint8_type));
-    return ((uint8_t*)(jl_array_data(a)))[i];
+    assert(i < jl_array_len(a));
+    return jl_array_data(a, uint8_t)[i];
 }
 STATIC_INLINE void jl_array_uint8_set(void *a, size_t i, uint8_t x) JL_NOTSAFEPOINT
 {
-    assert(i < jl_array_len(a));
     assert(jl_typetagis(a, jl_array_uint8_type));
-    ((uint8_t*)(jl_array_data(a)))[i] = x;
+    assert(i < jl_array_len(a));
+    jl_array_data(a, uint8_t)[i] = x;
+}
+STATIC_INLINE void jl_array_uint32_set(void *a, size_t i, uint32_t x) JL_NOTSAFEPOINT
+{
+    assert(i < jl_array_len(a));
+    assert(jl_typetagis(a, jl_array_uint32_type) || jl_typetagis(a, jl_array_int32_type));
+    jl_array_data(a, uint32_t)[i] = x;
 }
 
 #define jl_exprarg(e,n) jl_array_ptr_ref(((jl_expr_t*)(e))->args, n)
 #define jl_exprargset(e, n, v) jl_array_ptr_set(((jl_expr_t*)(e))->args, n, v)
-#define jl_expr_nargs(e) jl_array_len(((jl_expr_t*)(e))->args)
+#define jl_expr_nargs(e) jl_array_nrows(((jl_expr_t*)(e))->args)
 
 #define jl_fieldref(s,i) jl_get_nth_field(((jl_value_t*)(s)),i)
 #define jl_fieldref_noalloc(s,i) jl_get_nth_field_noalloc(((jl_value_t*)(s)),i)
@@ -1145,28 +1386,24 @@ STATIC_INLINE void jl_array_uint8_set(void *a, size_t i, uint8_t x) JL_NOTSAFEPO
 #define jl_gotonode_label(x) (((intptr_t*)(x))[0])
 #define jl_gotoifnot_cond(x) (((jl_value_t**)(x))[0])
 #define jl_gotoifnot_label(x) (((intptr_t*)(x))[1])
+#define jl_enternode_catch_dest(x) (((intptr_t*)(x))[0])
+#define jl_enternode_scope(x) (((jl_value_t**)(x))[1])
 #define jl_globalref_mod(s) (*(jl_module_t**)(s))
 #define jl_globalref_name(s) (((jl_sym_t**)(s))[1])
 #define jl_quotenode_value(x) (((jl_value_t**)x)[0])
 #define jl_returnnode_value(x) (((jl_value_t**)x)[0])
 
-#define jl_nparams(t)  jl_svec_len(((jl_datatype_t*)(t))->parameters)
-#define jl_tparam0(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 0)
-#define jl_tparam1(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 1)
-#define jl_tparam(t,i) jl_svecref(((jl_datatype_t*)(t))->parameters, i)
-
 // get a pointer to the data in a datatype
 #define jl_data_ptr(v)  ((jl_value_t**)v)
 
 #define jl_string_data(s) ((char*)s + sizeof(void*))
 #define jl_string_len(s)  (*(size_t*)s)
 
-#define jl_gf_mtable(f) (((jl_datatype_t*)jl_typeof(f))->name->mt)
-#define jl_gf_name(f)   (jl_gf_mtable(f)->name)
+#define jl_gf_name(f) (((jl_datatype_t*)jl_typeof(f))->name->singletonname)
 
 // struct type info
-JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack);
-#define jl_get_fieldtypes(st) ((st)->types ? (st)->types : jl_compute_fieldtypes((st), NULL))
+JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack, int cacheable);
+#define jl_get_fieldtypes(st) ((st)->types ? (st)->types : jl_compute_fieldtypes((st), NULL, 0))
 STATIC_INLINE jl_svec_t *jl_field_names(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
     return st->name->names;
@@ -1181,10 +1418,24 @@ STATIC_INLINE jl_value_t *jl_field_type_concrete(jl_datatype_t *st JL_PROPAGATES
     return jl_svecref(st->types, i);
 }
 
-#define jl_datatype_size(t)    (((jl_datatype_t*)t)->layout->size)
-#define jl_datatype_align(t)   (((jl_datatype_t*)t)->layout->alignment)
-#define jl_datatype_nbits(t)   ((((jl_datatype_t*)t)->layout->size)*8)
-#define jl_datatype_nfields(t) (((jl_datatype_t*)(t))->layout->nfields)
+STATIC_INLINE int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
+{
+    return l->nfields == 0 && l->npointers > 0;
+}
+
+JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+
+#define jl_inlinedatatype_layout(t) (((jl_datatype_t*)t)->layout)
+STATIC_INLINE const jl_datatype_layout_t *jl_datatype_layout(jl_datatype_t *t) JL_NOTSAFEPOINT
+{
+    if (t->layout == NULL || jl_is_layout_opaque(t->layout)) // e.g. GenericMemory
+        t = (jl_datatype_t*)jl_unwrap_unionall(t->name->wrapper);
+    return t->layout;
+}
+#define jl_datatype_size(t)    (jl_datatype_layout((jl_datatype_t*)(t))->size)
+#define jl_datatype_align(t)   (jl_datatype_layout((jl_datatype_t*)(t))->alignment)
+#define jl_datatype_nbits(t)   ((jl_datatype_layout((jl_datatype_t*)(t))->size)*8)
+#define jl_datatype_nfields(t) (jl_datatype_layout((jl_datatype_t*)(t))->nfields)
 
 JL_DLLEXPORT void *jl_symbol_name(jl_sym_t *s);
 // inline version with strong type check to detect typos in a `->name` chain
@@ -1194,6 +1445,13 @@ STATIC_INLINE char *jl_symbol_name_(jl_sym_t *s) JL_NOTSAFEPOINT
 }
 #define jl_symbol_name(s) jl_symbol_name_(s)
 
+STATIC_INLINE const char *jl_module_debug_name(jl_module_t *mod) JL_NOTSAFEPOINT
+{
+    if (!mod)
+        return "<null>";
+    return jl_symbol_name(mod->name);
+}
+
 static inline uint32_t jl_fielddesc_size(int8_t fielddesc_type) JL_NOTSAFEPOINT
 {
     assert(fielddesc_type >= 0 && fielddesc_type <= 2);
@@ -1212,23 +1470,23 @@ static inline uint32_t jl_fielddesc_size(int8_t fielddesc_type) JL_NOTSAFEPOINT
 #define jl_dt_layout_fields(d) ((const char*)(d) + sizeof(jl_datatype_layout_t))
 static inline const char *jl_dt_layout_ptrs(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
 {
-    return jl_dt_layout_fields(l) + jl_fielddesc_size(l->fielddesc_type) * l->nfields;
+    return jl_dt_layout_fields(l) + jl_fielddesc_size(l->flags.fielddesc_type) * l->nfields;
 }
 
 #define DEFINE_FIELD_ACCESSORS(f)                                             \
     static inline uint32_t jl_field_##f(jl_datatype_t *st,                    \
                                         int i) JL_NOTSAFEPOINT                \
     {                                                                         \
-        const jl_datatype_layout_t *ly = st->layout;                          \
+        const jl_datatype_layout_t *ly = jl_datatype_layout(st);              \
         assert(i >= 0 && (size_t)i < ly->nfields);                            \
-        if (ly->fielddesc_type == 0) {                                        \
+        if (ly->flags.fielddesc_type == 0) {                                  \
             return ((const jl_fielddesc8_t*)jl_dt_layout_fields(ly))[i].f;    \
         }                                                                     \
-        else if (ly->fielddesc_type == 1) {                                   \
+        else if (ly->flags.fielddesc_type == 1) {                             \
             return ((const jl_fielddesc16_t*)jl_dt_layout_fields(ly))[i].f;   \
         }                                                                     \
         else {                                                                \
-            assert(ly->fielddesc_type == 2);                                  \
+            assert(ly->flags.fielddesc_type == 2);                            \
             return ((const jl_fielddesc32_t*)jl_dt_layout_fields(ly))[i].f;   \
         }                                                                     \
     }                                                                         \
@@ -1239,24 +1497,24 @@ DEFINE_FIELD_ACCESSORS(size)
 
 static inline int jl_field_isptr(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 {
-    const jl_datatype_layout_t *ly = st->layout;
+    const jl_datatype_layout_t *ly = jl_datatype_layout(st);
     assert(i >= 0 && (size_t)i < ly->nfields);
-    return ((const jl_fielddesc8_t*)(jl_dt_layout_fields(ly) + jl_fielddesc_size(ly->fielddesc_type) * i))->isptr;
+    return ((const jl_fielddesc8_t*)(jl_dt_layout_fields(ly) + jl_fielddesc_size(ly->flags.fielddesc_type) * i))->isptr;
 }
 
 static inline uint32_t jl_ptr_offset(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 {
-    const jl_datatype_layout_t *ly = st->layout;
+    const jl_datatype_layout_t *ly = st->layout; // NOT jl_datatype_layout(st)
     assert(i >= 0 && (size_t)i < ly->npointers);
     const void *ptrs = jl_dt_layout_ptrs(ly);
-    if (ly->fielddesc_type == 0) {
+    if (ly->flags.fielddesc_type == 0) {
         return ((const uint8_t*)ptrs)[i];
     }
-    else if (ly->fielddesc_type == 1) {
+    else if (ly->flags.fielddesc_type == 1) {
         return ((const uint16_t*)ptrs)[i];
     }
     else {
-        assert(ly->fielddesc_type == 2);
+        assert(ly->flags.fielddesc_type == 2);
         return ((const uint32_t*)ptrs)[i];
     }
 }
@@ -1285,11 +1543,6 @@ static inline int jl_field_isconst(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 }
 
 
-static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
-{
-    return l->nfields == 0 && l->npointers > 0;
-}
-
 // basic predicates -----------------------------------------------------------
 #define jl_is_nothing(v)     (((jl_value_t*)(v)) == ((jl_value_t*)jl_nothing))
 #define jl_is_tuple(v)       (((jl_datatype_t*)jl_typeof(v))->name == jl_tuple_typename)
@@ -1300,7 +1553,7 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_mutable(t)     (((jl_datatype_t*)t)->name->mutabl)
 #define jl_is_mutable_datatype(t) (jl_is_datatype(t) && (((jl_datatype_t*)t)->name->mutabl))
 #define jl_is_immutable(t)   (!((jl_datatype_t*)t)->name->mutabl)
-#define jl_is_immutable_datatype(t) (jl_is_datatype(t) && (!((jl_datatype_t*)t)->name->mutabl))
+#define jl_may_be_immutable_datatype(t) (jl_is_datatype(t) && (!((jl_datatype_t*)t)->name->mutabl))
 #define jl_is_uniontype(v)   jl_typetagis(v,jl_uniontype_tag<<4)
 #define jl_is_typevar(v)     jl_typetagis(v,jl_tvar_tag<<4)
 #define jl_is_unionall(v)    jl_typetagis(v,jl_unionall_tag<<4)
@@ -1316,14 +1569,16 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_uint64(v)      jl_typetagis(v,jl_uint64_tag<<4)
 #define jl_is_bool(v)        jl_typetagis(v,jl_bool_tag<<4)
 #define jl_is_symbol(v)      jl_typetagis(v,jl_symbol_tag<<4)
-#define jl_is_ssavalue(v)    jl_typetagis(v,jl_ssavalue_type)
-#define jl_is_slotnumber(v)  jl_typetagis(v,jl_slotnumber_type)
+#define jl_is_ssavalue(v)    jl_typetagis(v,jl_ssavalue_tag<<4)
+#define jl_is_slotnumber(v)  jl_typetagis(v,jl_slotnumber_tag<<4)
 #define jl_is_expr(v)        jl_typetagis(v,jl_expr_type)
 #define jl_is_binding(v)     jl_typetagis(v,jl_binding_type)
+#define jl_is_binding_partition(v) jl_typetagis(v,jl_binding_partition_type)
 #define jl_is_globalref(v)   jl_typetagis(v,jl_globalref_type)
 #define jl_is_gotonode(v)    jl_typetagis(v,jl_gotonode_type)
 #define jl_is_gotoifnot(v)   jl_typetagis(v,jl_gotoifnot_type)
 #define jl_is_returnnode(v)  jl_typetagis(v,jl_returnnode_type)
+#define jl_is_enternode(v)   jl_typetagis(v,jl_enternode_type)
 #define jl_is_argument(v)    jl_typetagis(v,jl_argument_type)
 #define jl_is_pinode(v)      jl_typetagis(v,jl_pinode_type)
 #define jl_is_phinode(v)     jl_typetagis(v,jl_phinode_type)
@@ -1332,23 +1587,32 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_quotenode(v)   jl_typetagis(v,jl_quotenode_type)
 #define jl_is_newvarnode(v)  jl_typetagis(v,jl_newvarnode_type)
 #define jl_is_linenode(v)    jl_typetagis(v,jl_linenumbernode_type)
+#define jl_is_linenumbernode(v) jl_typetagis(v,jl_linenumbernode_type)
 #define jl_is_method_instance(v) jl_typetagis(v,jl_method_instance_type)
 #define jl_is_code_instance(v) jl_typetagis(v,jl_code_instance_type)
 #define jl_is_code_info(v)   jl_typetagis(v,jl_code_info_type)
 #define jl_is_method(v)      jl_typetagis(v,jl_method_type)
 #define jl_is_module(v)      jl_typetagis(v,jl_module_tag<<4)
 #define jl_is_mtable(v)      jl_typetagis(v,jl_methtable_type)
+#define jl_is_mcache(v)      jl_typetagis(v,jl_methcache_type)
 #define jl_is_task(v)        jl_typetagis(v,jl_task_tag<<4)
 #define jl_is_string(v)      jl_typetagis(v,jl_string_tag<<4)
 #define jl_is_cpointer(v)    jl_is_cpointer_type(jl_typeof(v))
 #define jl_is_pointer(v)     jl_is_cpointer_type(jl_typeof(v))
 #define jl_is_uint8pointer(v)jl_typetagis(v,jl_uint8pointer_type)
 #define jl_is_llvmpointer(v) (((jl_datatype_t*)jl_typeof(v))->name == jl_llvmpointer_typename)
-#define jl_is_intrinsic(v)   jl_typetagis(v,jl_intrinsic_type)
-#define jl_array_isbitsunion(a) (!(((jl_array_t*)(a))->flags.ptrarray) && jl_is_uniontype(jl_tparam0(jl_typeof(a))))
+#define jl_is_intrinsic(v)   jl_typetagis(v,jl_intrinsic_tag<<4)
+#define jl_is_addrspacecore(v) jl_typetagis(v,jl_addrspacecore_tag<<4)
+#define jl_is_abioverride(v) jl_typetagis(v,jl_abioverride_type)
+#define jl_genericmemory_isbitsunion(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isunion)
+#define jl_genericmemory_isatomic(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isatomic)
+#define jl_genericmemory_islocked(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_islocked)
+#define jl_is_array_any(v)    jl_typetagis(v,jl_array_any_type)
 
 JL_DLLEXPORT int jl_subtype(jl_value_t *a, jl_value_t *b);
 
+int is_leaf_bound(jl_value_t *v) JL_NOTSAFEPOINT;
+
 STATIC_INLINE int jl_is_kind(jl_value_t *v) JL_NOTSAFEPOINT
 {
     return (v==(jl_value_t*)jl_uniontype_type || v==(jl_value_t*)jl_datatype_type ||
@@ -1381,23 +1645,23 @@ STATIC_INLINE int jl_is_structtype(void *v) JL_NOTSAFEPOINT
 
 STATIC_INLINE int jl_isbits(void *t) JL_NOTSAFEPOINT // corresponding to isbitstype() in julia
 {
-    return (jl_is_datatype(t) && ((jl_datatype_t*)t)->isbitstype);
+    return jl_is_datatype(t) && ((jl_datatype_t*)t)->isbitstype;
 }
 
 STATIC_INLINE int jl_is_datatype_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 {
-    return (d->instance != NULL);
+    return d->instance != NULL && d->layout->size == 0 && d->layout->npointers == 0;
 }
 
 STATIC_INLINE int jl_is_abstracttype(void *v) JL_NOTSAFEPOINT
 {
-    return (jl_is_datatype(v) && ((jl_datatype_t*)(v))->name->abstract);
+    return jl_is_datatype(v) && ((jl_datatype_t*)(v))->name->abstract;
 }
 
 STATIC_INLINE int jl_is_array_type(void *t) JL_NOTSAFEPOINT
 {
-    return (jl_is_datatype(t) &&
-            ((jl_datatype_t*)(t))->name == jl_array_typename);
+    return jl_is_datatype(t) &&
+           ((jl_datatype_t*)(t))->name == jl_array_typename;
 }
 
 STATIC_INLINE int jl_is_array(void *v) JL_NOTSAFEPOINT
@@ -1406,6 +1670,42 @@ STATIC_INLINE int jl_is_array(void *v) JL_NOTSAFEPOINT
     return jl_is_array_type(t);
 }
 
+STATIC_INLINE int jl_is_genericmemory_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_genericmemory_typename);
+}
+
+STATIC_INLINE int jl_is_genericmemory(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_genericmemory_type(t);
+}
+
+STATIC_INLINE int jl_is_genericmemoryref_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_genericmemoryref_typename);
+}
+
+STATIC_INLINE int jl_is_genericmemoryref(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_genericmemoryref_type(t);
+}
+
+STATIC_INLINE int jl_is_addrspace_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_addrspace_typename);
+}
+
+STATIC_INLINE int jl_is_addrspace(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_addrspace_type(t);
+}
+
 
 STATIC_INLINE int jl_is_opaque_closure_type(void *t) JL_NOTSAFEPOINT
 {
@@ -1461,12 +1761,9 @@ STATIC_INLINE int jl_is_type_type(jl_value_t *v) JL_NOTSAFEPOINT
             ((jl_datatype_t*)(v))->name == ((jl_datatype_t*)jl_type_type->body)->name);
 }
 
-STATIC_INLINE int jl_is_array_zeroinit(jl_array_t *a) JL_NOTSAFEPOINT
+STATIC_INLINE int jl_is_genericmemory_zeroinit(jl_genericmemory_t *m) JL_NOTSAFEPOINT
 {
-    if (a->flags.ptrarray || a->flags.hasptr)
-        return 1;
-    jl_value_t *elty = jl_tparam0(jl_typeof(a));
-    return jl_is_datatype(elty) && ((jl_datatype_t*)elty)->zeroinit;
+    return ((jl_datatype_t*)jl_typeof(m))->zeroinit;
 }
 
 // object identity
@@ -1480,7 +1777,7 @@ JL_DLLEXPORT uintptr_t jl_type_hash(jl_value_t *v) JL_NOTSAFEPOINT;
 STATIC_INLINE int jl_egal__unboxed_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT
 {
     if (dtag < jl_max_tags << 4) {
-        if (dtag == jl_symbol_tag << 4 || dtag == jl_bool_tag << 4)
+        if (dtag == jl_symbol_tag << 4 || dtag == jl_bool_tag << 4 || dtag == jl_nothing_tag << 4)
             return 0;
     }
     else if (((jl_datatype_t*)dtag)->name->mutabl)
@@ -1515,6 +1812,7 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body);
 JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT const char *jl_typeof_str(jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_type_morespecific(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT int jl_method_morespecific(jl_method_t *ma, jl_method_t *mb);
 
 STATIC_INLINE int jl_is_dispatch_tupletype(jl_value_t *v) JL_NOTSAFEPOINT
 {
@@ -1535,9 +1833,10 @@ JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p
 JL_DLLEXPORT jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n);
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1);
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2);
+JL_DLLEXPORT jl_value_t *jl_apply_type3(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2, jl_value_t *p3);
 JL_DLLEXPORT jl_datatype_t *jl_apply_modify_type(jl_value_t *dt);
 JL_DLLEXPORT jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt);
-JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params);
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params, int check); // if uncertain, set check=1
 JL_DLLEXPORT jl_value_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(jl_sym_t *name,
                                             jl_module_t *module,
@@ -1556,10 +1855,11 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name,
 // constructors
 JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *bt, const void *src);
 JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *src);
-JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb);
+JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb);
-JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
-JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettype, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
+JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_atomic_storeonce_bits(jl_datatype_t *dt, char *dst, const jl_value_t *src, int nb) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...);
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na);
 JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup);
@@ -1568,6 +1868,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void);
 JL_DLLEXPORT jl_svec_t *jl_svec(size_t n, ...) JL_MAYBE_UNROOTED;
 JL_DLLEXPORT jl_svec_t *jl_svec1(void *a);
 JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b);
+JL_DLLEXPORT jl_svec_t *jl_svec3(void *a, void *b, void *c);
 JL_DLLEXPORT jl_svec_t *jl_alloc_svec(size_t n);
 JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n);
 JL_DLLEXPORT jl_svec_t *jl_svec_copy(jl_svec_t *a);
@@ -1578,14 +1879,18 @@ JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_gensym(void);
 JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len);
 JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void);
-JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
-                                                 jl_module_t *module,
-                                                 _Atomic(jl_value_t*) *bp,
-                                                 jl_binding_t *bnd);
+JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b JL_PROPAGATES_ROOT);
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_in_world(jl_binding_t *b JL_PROPAGATES_ROOT, size_t world);
+JL_DLLEXPORT jl_value_t *jl_get_latest_binding_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT);
+JL_DLLEXPORT jl_value_t *jl_get_latest_binding_value_if_resolved_debug_only(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_get_latest_binding_value_if_resolved_and_const_debug_only(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_module_t *mod, jl_sym_t *name);
 JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module);
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world);
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world, jl_code_instance_t **cache);
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src);
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_drop_all_caches(void);
 JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_int8(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_uint8(uint8_t x) JL_NOTSAFEPOINT;
@@ -1653,53 +1958,57 @@ int jl_uniontype_size(jl_value_t *ty, size_t *sz);
 JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al);
 
 // arrays
-JL_DLLEXPORT jl_array_t *jl_new_array(jl_value_t *atype, jl_value_t *dims);
-JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
-                                          jl_value_t *dims);
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                                             size_t nel, int own_buffer);
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                                          jl_value_t *dims, int own_buffer);
 
 JL_DLLEXPORT jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr);
-JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr,
-                                           size_t nc);
-JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr,
-                                           size_t nc, size_t z);
+JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr, size_t nc);
+JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr, size_t nc, size_t z);
+JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims);
 JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len);
 JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len);
 JL_DLLEXPORT jl_value_t *jl_cstr_to_string(const char *str);
 JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len);
 JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a);
 JL_DLLEXPORT jl_array_t *jl_alloc_vec_any(size_t n);
-JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i);  // 0-indexed
-JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;  // 0-indexed
-JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *v JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, size_t i);  // 0-indexed
-JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i);  // 0-indexed
-JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i);  // 0-indexed
 JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc);
 JL_DLLEXPORT void jl_array_del_end(jl_array_t *a, size_t dec);
-JL_DLLEXPORT void jl_array_grow_beg(jl_array_t *a, size_t inc);
-JL_DLLEXPORT void jl_array_del_beg(jl_array_t *a, size_t dec);
-JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz);
 JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item);
 JL_DLLEXPORT void jl_array_ptr_1d_append(jl_array_t *a, jl_array_t *a2);
 JL_DLLEXPORT jl_value_t *jl_apply_array_type(jl_value_t *type, size_t dim);
-JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, size_t *tot, uint32_t ndims, size_t *dims, size_t elsz);
 // property access
 JL_DLLEXPORT void *jl_array_ptr(jl_array_t *a);
 JL_DLLEXPORT void *jl_array_eltype(jl_value_t *a);
 JL_DLLEXPORT int jl_array_rank(jl_value_t *a);
-JL_DLLEXPORT size_t jl_array_size(jl_value_t *a, int d);
+
+// genericmemory
+JL_DLLEXPORT jl_genericmemory_t *jl_new_genericmemory(jl_value_t *mtype, jl_value_t *dim);
+JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void *data,
+                                           size_t nel, int own_buffer);
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory(jl_value_t *mtype, size_t nel);
+JL_DLLEXPORT jl_genericmemory_t *jl_pchar_to_memory(const char *str, size_t len);
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory_unchecked(jl_ptls_t ptls, size_t nbytes, jl_datatype_t *mtype);
+JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_t len);
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n);
+JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *m, size_t i);  // 0-indexed
+
+JL_DLLEXPORT jl_genericmemoryref_t *jl_new_memoryref(jl_value_t *typ, jl_genericmemory_t *mem, void *data);
+JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_ptrmemoryrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_memoryref_isassigned(jl_genericmemoryref_t m, int isatomic) JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT jl_genericmemoryref_t jl_memoryrefindex(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, size_t idx) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, jl_value_t *v JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_memoryrefswap(jl_genericmemoryref_t m, jl_value_t *v, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_memoryrefmodify(jl_genericmemoryref_t m, jl_value_t *op, jl_value_t *v, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_memoryrefreplace(jl_genericmemoryref_t m, jl_value_t *expected, jl_value_t *v, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_memoryrefsetonce(jl_genericmemoryref_t m, jl_value_t *v, int isatomic);
 
 // strings
 JL_DLLEXPORT const char *jl_string_ptr(jl_value_t *s);
 
 // modules and global variables
-extern JL_DLLIMPORT jl_module_t *jl_main_module JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_module_t *jl_core_module JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_module_t *jl_base_module JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_module_t *jl_top_module JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name, jl_module_t *parent);
 JL_DLLEXPORT void jl_set_module_nospecialize(jl_module_t *self, int on);
 JL_DLLEXPORT void jl_set_module_optlevel(jl_module_t *self, int lvl);
@@ -1710,46 +2019,44 @@ JL_DLLEXPORT void jl_set_module_infer(jl_module_t *self, int value);
 JL_DLLEXPORT int jl_get_module_infer(jl_module_t *m);
 JL_DLLEXPORT void jl_set_module_max_methods(jl_module_t *self, int value);
 JL_DLLEXPORT int jl_get_module_max_methods(jl_module_t *m);
+JL_DLLEXPORT jl_value_t *jl_get_module_usings_backedges(jl_module_t *m);
+JL_DLLEXPORT jl_value_t *jl_get_module_scanned_methods(jl_module_t *m);
+JL_DLLEXPORT jl_value_t *jl_get_module_binding_or_nothing(jl_module_t *m, jl_sym_t *s);
+
 // get binding for reading
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
-JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var);
-JL_DLLEXPORT jl_binding_t *jl_get_binding_if_bound(jl_module_t *m, jl_sym_t *var);
-JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var);
+JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var);
 // get binding for assignment
+JL_DLLEXPORT void jl_check_binding_currently_writable(jl_binding_t *b, jl_module_t *m, jl_sym_t *s);
 JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
-JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
-JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var);
-JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var);
-JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var);
+JL_DLLEXPORT jl_value_t *jl_get_existing_strong_gf(jl_binding_t *b JL_PROPAGATES_ROOT, size_t new_world);
+JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var, int allow_import);
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_globalref_is_const(jl_globalref_t *gr);
-JL_DLLEXPORT int jl_globalref_boundp(jl_globalref_t *gr);
-JL_DLLEXPORT jl_value_t *jl_get_globalref_value(jl_globalref_t *gr);
 JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
 JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
+void jl_set_initial_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT, int exported);
 JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var);
-JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from);
-JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
-JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
-JL_DLLEXPORT void jl_module_import(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
-JL_DLLEXPORT void jl_module_import_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
-JL_DLLEXPORT void jl_module_export(jl_module_t *from, jl_sym_t *s);
+JL_DLLEXPORT jl_value_t *jl_checked_swap(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *expected, jl_value_t *rhs);
+JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs);
+JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, enum jl_partition_kind);
+JL_DLLEXPORT void jl_module_import(jl_task_t *ct, jl_module_t *to, jl_module_t *from, jl_sym_t *asname, jl_sym_t *s, int explici);
+JL_DLLEXPORT void jl_import_module(jl_task_t *ct, jl_module_t *m, jl_module_t *import, jl_sym_t *asname);
+JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from, size_t flags);
+int jl_module_public_(jl_module_t *from, jl_sym_t *s, int exported, size_t new_world);
 JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *s);
 JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var);
-JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m);
-STATIC_INLINE jl_function_t *jl_get_function(jl_module_t *m, const char *name)
-{
-    return (jl_function_t*)jl_get_global(m, jl_symbol(name));
-}
 
 // eq hash tables
-JL_DLLEXPORT jl_array_t *jl_eqtable_put(jl_array_t *h JL_ROOTING_ARGUMENT, jl_value_t *key, jl_value_t *val JL_ROOTED_ARGUMENT, int *inserted);
-JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_array_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, int *found);
-jl_value_t *jl_eqtable_getkey(jl_array_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_genericmemory_t *jl_eqtable_put(jl_genericmemory_t *h JL_ROOTING_ARGUMENT, jl_value_t *key, jl_value_t *val JL_ROOTED_ARGUMENT, int *inserted);
+JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_genericmemory_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_eqtable_pop(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt, int *found);
+jl_value_t *jl_eqtable_getkey(jl_genericmemory_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
 
 // system information
 JL_DLLEXPORT int jl_errno(void) JL_NOTSAFEPOINT;
@@ -1763,12 +2070,11 @@ JL_DLLEXPORT int jl_is_debugbuild(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_get_UNAME(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_get_ARCH(void) JL_NOTSAFEPOINT;
 JL_DLLIMPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT;
-extern JL_DLLIMPORT int jl_n_threadpools;
-extern JL_DLLIMPORT _Atomic(int) jl_n_threads;
-extern JL_DLLIMPORT int jl_n_gcthreads;
 extern int jl_n_markthreads;
 extern int jl_n_sweepthreads;
-extern JL_DLLIMPORT int *jl_n_threads_per_pool;
+
+#define JL_THREADPOOL_ID_INTERACTIVE 0
+#define JL_THREADPOOL_ID_DEFAULT 1
 
 // environment entries
 JL_DLLEXPORT jl_value_t *jl_environ(int i);
@@ -1789,8 +2095,13 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error_rt(const char *fname,
                                                const char *context,
                                                jl_value_t *ty JL_MAYBE_UNROOTED,
                                                jl_value_t *got JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var);
-JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_sym_t *type_name, jl_sym_t *var);
+JL_DLLEXPORT void JL_NORETURN jl_type_error_global(const char *fname,
+                                               jl_module_t *mod, jl_sym_t *sym,
+                                               jl_value_t *ty JL_MAYBE_UNROOTED,
+                                               jl_value_t *got JL_MAYBE_UNROOTED);
+JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var, jl_value_t *scope JL_MAYBE_UNROOTED);
+JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_datatype_t *t, jl_sym_t *var);
+JL_DLLEXPORT void JL_NORETURN jl_argument_error(char *str);
 JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error(jl_value_t *v JL_MAYBE_UNROOTED,
                                               jl_value_t *t JL_MAYBE_UNROOTED);
@@ -1803,18 +2114,6 @@ JL_DLLEXPORT void JL_NORETURN jl_bounds_error_tuple_int(jl_value_t **v,
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error_unboxed_int(void *v, jl_value_t *vt, size_t i);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error_ints(jl_value_t *v JL_MAYBE_UNROOTED,
                                                    size_t *idxs, size_t nidxs);
-JL_DLLEXPORT void JL_NORETURN jl_eof_error(void);
-
-// Return the exception currently being handled, or `jl_nothing`.
-//
-// The catch scope is determined dynamically so this works in functions called
-// from a catch block.  The returned value is gc rooted until we exit the
-// enclosing JL_CATCH.
-// FIXME: Teach the static analyzer about this rather than using
-// JL_GLOBALLY_ROOTED which is far too optimistic.
-JL_DLLEXPORT jl_value_t *jl_current_exception(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_exception_occurred(void);
-JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
 
 #define JL_NARGS(fname, min, max)                               \
     if (nargs < min) jl_too_few_args(#fname, min);              \
@@ -1836,14 +2135,32 @@ JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
 typedef enum {
     JL_IMAGE_CWD = 0,
     JL_IMAGE_JULIA_HOME = 1,
-    //JL_IMAGE_LIBJULIA = 2,
+    JL_IMAGE_IN_MEMORY = 2
 } JL_IMAGE_SEARCH;
 
+typedef enum {
+    JL_IMAGE_KIND_NONE = 0,
+    JL_IMAGE_KIND_JI,
+    JL_IMAGE_KIND_SO,
+} jl_image_kind_t;
+
+// A loaded, but unparsed .ji or .so image file
+typedef struct {
+    jl_image_kind_t kind;
+    const void *pointers; // jl_image_pointers_t *
+    const char *data;
+    size_t size;
+    uint64_t base;
+} jl_image_buf_t;
+
+struct _jl_image_t;
+typedef struct _jl_image_t jl_image_t;
+
 JL_DLLIMPORT const char *jl_get_libdir(void);
-JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel);
 JL_DLLEXPORT void jl_init(void);
-JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
-                                     const char *image_path);
+JL_DLLEXPORT void jl_init_with_image_file(const char *julia_bindir,
+                                          const char *image_path);
+JL_DLLEXPORT void jl_init_with_image_handle(void *handle);
 JL_DLLEXPORT const char *jl_get_default_sysimg_path(void);
 JL_DLLEXPORT int jl_is_initialized(void);
 JL_DLLEXPORT void jl_atexit_hook(int status);
@@ -1855,15 +2172,18 @@ JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle);
 JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void);
 
 JL_DLLEXPORT int jl_deserialize_verify_header(ios_t *s);
-JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname);
-JL_DLLEXPORT void jl_set_sysimg_so(void *handle);
-JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t emit_split, ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos);
-JL_DLLEXPORT void jl_restore_system_image(const char *fname);
-JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
+JL_DLLEXPORT jl_image_buf_t jl_preload_sysimg(const char *fname);
+JL_DLLEXPORT jl_image_buf_t jl_set_sysimg_so(void *handle);
+JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t emit_split, ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos, jl_array_t *module_init_order);
+JL_DLLEXPORT void jl_restore_system_image(jl_image_t *image, jl_image_buf_t buf);
 JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete, const char *pkgimage);
+JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
+JL_DLLEXPORT jl_array_t* jl_compute_new_ext_cis(void);
 JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci);
+JL_DLLEXPORT void jl_set_inference_entrance_backtraces(jl_value_t *inference_entrance_backtraces);
+JL_DLLEXPORT void jl_push_inference_entrance_backtraces(jl_value_t *ci);
 JL_DLLEXPORT void jl_write_compiler_output(void);
 
 // parsing
@@ -1872,16 +2192,9 @@ JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len,
 JL_DLLEXPORT jl_value_t *jl_parse_string(const char *text, size_t text_len,
                                          int offset, int greedy);
 // lowering
-JL_DLLEXPORT jl_value_t *jl_expand(jl_value_t *expr, jl_module_t *inmodule);
-JL_DLLEXPORT jl_value_t *jl_expand_with_loc(jl_value_t *expr, jl_module_t *inmodule,
-                                            const char *file, int line);
-JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *inmodule,
-                                                 const char *file, int line);
-JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmodule,
-                                            const char *file, int line, size_t world);
-JL_DLLEXPORT jl_value_t *jl_expand_stmt(jl_value_t *expr, jl_module_t *inmodule);
-JL_DLLEXPORT jl_value_t *jl_expand_stmt_with_loc(jl_value_t *expr, jl_module_t *inmodule,
-                                                 const char *file, int line);
+JL_DLLEXPORT jl_value_t *jl_lower(jl_value_t *expr, jl_module_t *inmodule,
+                                  const char *file, int line, size_t world,
+                                  bool_t warn);
 // deprecated; use jl_parse_all
 JL_DLLEXPORT jl_value_t *jl_parse_input_line(const char *text, size_t text_len,
                                              const char *filename, size_t filename_len);
@@ -1906,7 +2219,7 @@ typedef void *jl_libhandle; // compatible with dlopen (void*) / LoadLibrary (HMO
 JL_DLLEXPORT jl_libhandle jl_load_dynamic_library(const char *fname, unsigned flags, int throw_err);
 JL_DLLEXPORT jl_libhandle jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_dlclose(jl_libhandle handle) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_dlsym(jl_libhandle handle, const char *symbol, void ** value, int throw_err) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_dlsym(jl_libhandle handle, const char *symbol, void ** value, int throw_err, int search_deps) JL_NOTSAFEPOINT;
 
 // evaluation
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval(jl_module_t *m, jl_value_t *v);
@@ -1928,22 +2241,26 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr JL_MAYBE_UNROOTED);
 // IR representation
 JL_DLLEXPORT jl_value_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code);
 JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_value_t *data);
-JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_value_t *data) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_value_t *data) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint8_t jl_ir_flag_has_image_globalref(jl_value_t *data) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_value_t *data) JL_NOTSAFEPOINT;
 JL_DLLEXPORT ssize_t jl_ir_nslots(jl_value_t *data) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_value_t *data, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms);
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms);
 JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i);
-
-
-JL_DLLEXPORT int jl_is_operator(char *sym);
-JL_DLLEXPORT int jl_is_unary_operator(char *sym);
-JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym);
-JL_DLLEXPORT int jl_is_syntactic_operator(char *sym);
-JL_DLLEXPORT int jl_operator_precedence(char *sym);
+JL_DLLEXPORT struct jl_codeloc_t jl_uncompress1_codeloc(jl_value_t *cl, size_t pc) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_compress_codelocs(int32_t firstline, jl_value_t *codelocs, size_t nstmts);
+JL_DLLEXPORT jl_value_t *jl_uncompress_codelocs(jl_value_t *cl, size_t nstmts);
+JL_DLLEXPORT uint8_t jl_encode_inlining_cost(uint16_t inlining_cost) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint16_t jl_decode_inlining_cost(uint8_t inlining_cost) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT int jl_is_operator(const char *sym);
+JL_DLLEXPORT int jl_is_unary_operator(const char *sym);
+JL_DLLEXPORT int jl_is_unary_and_binary_operator(const char *sym);
+JL_DLLEXPORT int jl_is_syntactic_operator(const char *sym);
+JL_DLLEXPORT int jl_operator_precedence(const char *sym);
 
 STATIC_INLINE int jl_vinfo_sa(uint8_t vi)
 {
@@ -1959,6 +2276,7 @@ STATIC_INLINE int jl_vinfo_usedundef(uint8_t vi)
 
 JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint32_t nargs);
 JL_DLLEXPORT jl_value_t *jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *meth);
+JL_DLLEXPORT jl_value_t *jl_invoke_oc(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *meth);
 JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *linfo);
 
 STATIC_INLINE jl_value_t *jl_apply(jl_value_t **args, uint32_t nargs)
@@ -1966,15 +2284,15 @@ STATIC_INLINE jl_value_t *jl_apply(jl_value_t **args, uint32_t nargs)
     return jl_apply_generic(args[0], &args[1], nargs - 1);
 }
 
-JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t **args, uint32_t nargs);
-JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f JL_MAYBE_UNROOTED);
-JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED);
-JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED);
-JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED,
+JL_DLLEXPORT jl_value_t *jl_call(jl_value_t *f JL_MAYBE_UNROOTED, jl_value_t **args, uint32_t nargs);
+JL_DLLEXPORT jl_value_t *jl_call0(jl_value_t *f JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_call1(jl_value_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_call2(jl_value_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_call3(jl_value_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED,
                                   jl_value_t *b JL_MAYBE_UNROOTED, jl_value_t *c JL_MAYBE_UNROOTED);
-
-// interfacing with Task runtime
-JL_DLLEXPORT void jl_yield(void);
+JL_DLLEXPORT jl_value_t *jl_call4(jl_value_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED,
+                                  jl_value_t *b JL_MAYBE_UNROOTED, jl_value_t *c JL_MAYBE_UNROOTED,
+                                  jl_value_t *d JL_MAYBE_UNROOTED);
 
 // async signal handling ------------------------------------------------------
 
@@ -1984,104 +2302,59 @@ JL_DLLEXPORT void jl_sigatomic_end(void);
 
 // tasks and exceptions -------------------------------------------------------
 
-typedef struct _jl_timing_block_t jl_timing_block_t;
-typedef struct _jl_timing_event_t jl_timing_event_t;
-typedef struct _jl_excstack_t jl_excstack_t;
-
 // info describing an exception handler
-typedef struct _jl_handler_t {
+struct _jl_handler_t {
     jl_jmp_buf eh_ctx;
     jl_gcframe_t *gcstack;
+    jl_value_t *scope;
     struct _jl_handler_t *prev;
     int8_t gc_state;
     size_t locks_len;
     sig_atomic_t defer_signal;
     jl_timing_block_t *timing_stack;
     size_t world_age;
-} jl_handler_t;
-
-#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1
-
-typedef struct _jl_task_t {
-    JL_DATA_TYPE
-    jl_value_t *next; // invasive linked list for scheduler
-    jl_value_t *queue; // invasive linked list for scheduler
-    jl_value_t *tls;
-    jl_value_t *donenotify;
-    jl_value_t *result;
-    jl_value_t *logstate;
-    jl_function_t *start;
-    // 4 byte padding on 32-bit systems
-    // uint32_t padding0;
-    uint64_t rngState[JL_RNG_SIZE];
-    _Atomic(uint8_t) _state;
-    uint8_t sticky; // record whether this Task can be migrated to a new thread
-    _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
-    // 1 byte padding
-    // uint8_t padding1;
-    // multiqueue priority
-    uint16_t priority;
-
-// hidden state:
-
-#ifdef USE_TRACY
-    const char *name;
-#endif
-    // id of owning thread - does not need to be defined until the task runs
-    _Atomic(int16_t) tid;
-    // threadpool id
-    int8_t threadpoolid;
-    // Reentrancy bits
-    // Bit 0: 1 if we are currently running inference/codegen
-    // Bit 1-2: 0-3 counter of how many times we've reentered inference
-    // Bit 3: 1 if we are writing the image and inference is illegal
-    uint8_t reentrant_timing;
-    // 2 bytes of padding on 32-bit, 6 bytes on 64-bit
-    // uint16_t padding2_32;
-    // uint48_t padding2_64;
-    // saved gc stack top for context switches
-    jl_gcframe_t *gcstack;
-    size_t world_age;
-    // quick lookup for current ptls
-    jl_ptls_t ptls; // == jl_all_tls_states[tid]
-    // saved exception stack
-    jl_excstack_t *excstack;
-    // current exception handler
-    jl_handler_t *eh;
-    // saved thread state
-    jl_ucontext_t ctx;
-    void *stkbuf; // malloc'd memory (either copybuf or stack)
-    size_t bufsz; // actual sizeof stkbuf
-    unsigned int copy_stack:31; // sizeof stack for copybuf
-    unsigned int started:1;
-} jl_task_t;
+};
 
 #define JL_TASK_STATE_RUNNABLE 0
 #define JL_TASK_STATE_DONE     1
 #define JL_TASK_STATE_FAILED   2
 
-JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t*, jl_value_t*, size_t);
+JL_DLLEXPORT jl_task_t *jl_new_task(jl_value_t*, jl_value_t*, size_t);
 JL_DLLEXPORT void jl_switchto(jl_task_t **pt);
 JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow(void);
-JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e, jl_task_t *ct);
-JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
-#define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack))
 
-extern JL_DLLIMPORT int jl_task_gcstack_offset;
-extern JL_DLLIMPORT int jl_task_ptls_offset;
 
+#ifdef __cplusplus
+}
+#endif
 #include "julia_locks.h"   // requires jl_task_t definition
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Return the exception currently being handled, or `jl_nothing`.
+//
+// The catch scope is determined dynamically so this works in functions called
+// from a catch block.  The returned value is gc rooted until we exit the
+// enclosing JL_CATCH.
+// FIXME: Teach the static analyzer about this rather than using
+// JL_GLOBALLY_ROOTED which is far too optimistic.
+JL_DLLEXPORT jl_value_t *jl_current_exception(jl_task_t *ct) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_exception_occurred(void);
+JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh);
-JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh);
-JL_DLLEXPORT void jl_pop_handler(int n);
-JL_DLLEXPORT size_t jl_excstack_state(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_enter_handler(jl_task_t *ct, jl_handler_t *eh) JL_NOTSAFEPOINT ;
+JL_DLLEXPORT void jl_eh_restore_state(jl_task_t *ct, jl_handler_t *eh);
+JL_DLLEXPORT void jl_eh_restore_state_noexcept(jl_task_t *ct, jl_handler_t *eh) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_pop_handler(jl_task_t *ct, int n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_pop_handler_noexcept(jl_task_t *ct, int n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_excstack_state(jl_task_t *ct) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_restore_excstack(jl_task_t *ct, size_t state) JL_NOTSAFEPOINT;
 
 #if defined(_OS_WINDOWS_)
 #if defined(_COMPILER_GCC_)
@@ -2121,10 +2394,9 @@ void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #define jl_setjmp_name "sigsetjmp"
 #endif
 #define jl_setjmp(a,b) sigsetjmp(a,b)
-#if defined(_COMPILER_ASAN_ENABLED_) && __GLIBC__
-// Bypass the ASAN longjmp wrapper - we're unpoisoning the stack ourselves.
-JL_DLLIMPORT int __attribute__ ((nothrow)) (__libc_siglongjmp)(jl_jmp_buf buf, int val);
-#define jl_longjmp(a,b) __libc_siglongjmp(a,b)
+#if defined(_COMPILER_ASAN_ENABLED_) && defined(__GLIBC__)
+extern void (*real_siglongjmp)(jmp_buf _Buf, int _Value);
+#define jl_longjmp(a,b) real_siglongjmp(a,b)
 #else
 #define jl_longjmp(a,b) siglongjmp(a,b)
 #endif
@@ -2133,24 +2405,37 @@ JL_DLLIMPORT int __attribute__ ((nothrow)) (__libc_siglongjmp)(jl_jmp_buf buf, i
 
 #ifdef __clang_gcanalyzer__
 
-// This is hard. Ideally we'd teach the static analyzer about the extra control
-// flow edges. But for now, just hide this as best we can
 extern int had_exception;
-#define JL_TRY if (1)
-#define JL_CATCH if (had_exception)
+
+// The analyzer assumes that the TRY block always executes to completion.
+// This can lead to both false positives and false negatives, since it doesn't model the fact that throwing always leaves the try block early.
+#define JL_TRY                                                      \
+    int i__try, i__catch; jl_handler_t __eh; jl_task_t *__eh_ct;    \
+    __eh_ct = jl_current_task;                                      \
+    size_t __excstack_state = jl_excstack_state(__eh_ct);           \
+    jl_enter_handler(__eh_ct, &__eh);                               \
+    __eh_ct->eh = &__eh;                                            \
+    for (i__try=1; i__try; i__try=0)
+
+#define JL_CATCH                                                    \
+    if (!had_exception)                                             \
+        jl_eh_restore_state_noexcept(__eh_ct, &__eh);               \
+    else                                                            \
+        for (i__catch=1, jl_eh_restore_state(__eh_ct, &__eh); i__catch; i__catch=0, /* CATCH BLOCK; */ jl_restore_excstack(__eh_ct, __excstack_state))
 
 #else
 
-#define JL_TRY                                                    \
-    int i__tr, i__ca; jl_handler_t __eh;                          \
-    size_t __excstack_state = jl_excstack_state();                \
-    jl_enter_handler(&__eh);                                      \
-    if (!jl_setjmp(__eh.eh_ctx,0))                                \
-        for (i__tr=1; i__tr; i__tr=0, jl_eh_restore_state(&__eh))
+#define JL_TRY                                                      \
+    int i__try, i__catch; jl_handler_t __eh; jl_task_t *__eh_ct;    \
+    __eh_ct = jl_current_task;                                      \
+    size_t __excstack_state = jl_excstack_state(__eh_ct);           \
+    jl_enter_handler(__eh_ct, &__eh);                               \
+    if (!jl_setjmp(__eh.eh_ctx, 0))                                 \
+        for (i__try=1, __eh_ct->eh = &__eh; i__try; i__try=0, /* TRY BLOCK; */ jl_eh_restore_state_noexcept(__eh_ct, &__eh))
 
-#define JL_CATCH                                                \
-    else                                                        \
-        for (i__ca=1, jl_eh_restore_state(&__eh); i__ca; i__ca=0, jl_restore_excstack(__excstack_state))
+#define JL_CATCH                                                    \
+    else                                                            \
+        for (i__catch=1, jl_eh_restore_state(__eh_ct, &__eh); i__catch; i__catch=0, /* CATCH BLOCK; */ jl_restore_excstack(__eh_ct, __excstack_state))
 
 #endif
 
@@ -2185,19 +2470,26 @@ typedef struct {
 } jl_uv_file_t;
 
 #ifdef __GNUC__
-#define _JL_FORMAT_ATTR(type, str, arg) \
-    __attribute__((format(type, str, arg)))
+#  ifdef __MINGW32__
+#define _JL_FORMAT_ATTR(str, arg) \
+    __attribute__((format(__MINGW_PRINTF_FORMAT, str, arg)))
+#  else
+#define _JL_FORMAT_ATTR(str, arg) \
+    __attribute__((format(printf, str, arg)))
+#  endif
 #else
-#define _JL_FORMAT_ATTR(type, str, arg)
+#define _JL_FORMAT_ATTR(str, arg)
 #endif
 
 JL_DLLEXPORT void jl_uv_puts(struct uv_stream_s *stream, const char *str, size_t n);
 JL_DLLEXPORT int jl_printf(struct uv_stream_s *s, const char *format, ...)
-    _JL_FORMAT_ATTR(printf, 2, 3);
+    _JL_FORMAT_ATTR(2, 3);
 JL_DLLEXPORT int jl_vprintf(struct uv_stream_s *s, const char *format, va_list args)
-    _JL_FORMAT_ATTR(printf, 2, 0);
+    _JL_FORMAT_ATTR(2, 0);
 JL_DLLEXPORT void jl_safe_printf(const char *str, ...) JL_NOTSAFEPOINT
-    _JL_FORMAT_ATTR(printf, 1, 2);
+    _JL_FORMAT_ATTR(1, 2);
+JL_DLLEXPORT void jl_safe_fprintf(ios_t *s, const char *str, ...) JL_NOTSAFEPOINT
+    _JL_FORMAT_ATTR(2, 3);
 
 extern JL_DLLEXPORT JL_STREAM *JL_STDIN;
 extern JL_DLLEXPORT JL_STREAM *JL_STDOUT;
@@ -2210,31 +2502,30 @@ JL_DLLEXPORT int jl_termios_size(void);
 
 // showing and std streams
 JL_DLLEXPORT void jl_flush_cstdio(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_stdout_obj(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_safe_static_show(JL_STREAM *out, jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_fprint_backtrace(ios_t *s) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT; // deprecated
 // Mainly for debugging, use `void*` so that no type cast is needed in C++.
 JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT;
 
 // julia options -----------------------------------------------------------
 
-#include "jloptions.h"
-
-extern JL_DLLIMPORT jl_options_t jl_options;
-
 JL_DLLEXPORT ssize_t jl_sizeof_jl_options(void);
 
 // Parse an argc/argv pair to extract general julia options, passing back out
 // any arguments that should be passed on to the script.
 JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp);
-JL_DLLEXPORT char *jl_format_filename(const char *output_pattern);
+JL_DLLEXPORT char *jl_format_filename(const char *output_pattern) JL_NOTSAFEPOINT;
+
+uint64_t parse_heap_size_option(const char *optarg, const char *option_name, int allow_pct);
 
 // Set julia-level ARGS array according to the arguments provided in
 // argc/argv
-JL_DLLEXPORT void jl_set_ARGS(int argc, char **argv);
+JL_DLLEXPORT jl_value_t *jl_set_ARGS(int argc, char **argv);
 
 JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT;
 
@@ -2292,15 +2583,29 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT;
 #define JL_OPTIONS_HANDLE_SIGNALS_ON 1
 #define JL_OPTIONS_HANDLE_SIGNALS_OFF 0
 
+#define JL_OPTIONS_USE_EXPERIMENTAL_FEATURES_YES 1
+#define JL_OPTIONS_USE_EXPERIMENTAL_FEATURES_NO 0
+
 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES 1
 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_NO 0
 
+#define JL_OPTIONS_USE_COMPILED_MODULES_STRICT 3
+#define JL_OPTIONS_USE_COMPILED_MODULES_EXISTING 2
 #define JL_OPTIONS_USE_COMPILED_MODULES_YES 1
 #define JL_OPTIONS_USE_COMPILED_MODULES_NO 0
 
+#define JL_OPTIONS_USE_PKGIMAGES_EXISTING 2
 #define JL_OPTIONS_USE_PKGIMAGES_YES 1
 #define JL_OPTIONS_USE_PKGIMAGES_NO 0
 
+#define JL_TRIM_NO 0
+#define JL_TRIM_SAFE 1
+#define JL_TRIM_UNSAFE 2
+#define JL_TRIM_UNSAFE_WARN 3
+
+#define JL_OPTIONS_TASK_METRICS_OFF 0
+#define JL_OPTIONS_TASK_METRICS_ON 1
+
 // Version information
 #include <julia_version.h> // Generated file
 
@@ -2309,8 +2614,6 @@ JL_DLLEXPORT extern int jl_ver_minor(void);
 JL_DLLEXPORT extern int jl_ver_patch(void);
 JL_DLLEXPORT extern int jl_ver_is_release(void);
 JL_DLLEXPORT extern const char *jl_ver_string(void);
-JL_DLLEXPORT const char *jl_git_branch(void);
-JL_DLLEXPORT const char *jl_git_commit(void);
 
 // nullable struct representations
 typedef struct {
@@ -2324,9 +2627,13 @@ typedef struct {
 } jl_nullable_float32_t;
 
 #define jl_root_task (jl_current_task->ptls->root_task)
-
 JL_DLLEXPORT jl_task_t *jl_get_current_task(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 
+STATIC_INLINE jl_value_t *jl_get_function(jl_module_t *m, const char *name)
+{
+    return (jl_value_t*)jl_get_global(m, jl_symbol(name));
+}
+
 // TODO: we need to pin the task while using this (set pure bit)
 JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *) JL_NOTSAFEPOINT;
@@ -2334,8 +2641,7 @@ JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *) JL_NOTSAFEPOINT;
 // codegen interface ----------------------------------------------------------
 // The root propagation here doesn't have to be literal, but callers should
 // ensure that the return value outlives the MethodInstance
-typedef jl_value_t *(*jl_codeinstance_lookup_t)(jl_method_instance_t *mi JL_PROPAGATES_ROOT,
-    size_t min_world, size_t max_world);
+// Must be kept in sync with `base/reflection.jl` (CodegenParams)
 typedef struct {
     int track_allocations;  // can we track allocations?
     int code_coverage;      // can we measure coverage?
@@ -2345,18 +2651,24 @@ typedef struct {
     int gnu_pubnames;       // can we emit the gnu pubnames debuginfo
     int debug_info_kind;    // Enum for line-table-only, line-directives-only,
                             // limited, standalone
-
+    int debug_info_level;   // equivalent to the -g level from the cli
     int safepoint_on_entry; // Emit a safepoint on entry to each function
     int gcstack_arg; // Pass the ptls value as an argument with swiftself
 
-    // Cache access. Default: jl_rettype_inferred.
-    jl_codeinstance_lookup_t lookup;
+    int use_jlplt; // Whether to use the Julia PLT mechanism or emit symbols directly
+    int force_emit_all; // Force emission of code for const return functions
 
-    // If not `nothing`, rewrite all generic calls to call
-    // generic_context(f, args...) instead of f(args...).
-    jl_value_t *generic_context;
+    // These options control the sanitizer passes and are used to AOT compile instrumented sysimages
+    int sanitize_memory;
+    int sanitize_thread;
+    int sanitize_address;
 } jl_cgparams_t;
 extern JL_DLLEXPORT int jl_default_debug_info_kind;
+extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
+
+typedef struct {
+    int emit_metadata;
+} jl_emission_params_t;
 
 #ifdef __cplusplus
 }
diff --git a/src/julia_assert.h b/src/julia_assert.h
index 4b120fd9e845b..13cbdbcd36f5b 100644
--- a/src/julia_assert.h
+++ b/src/julia_assert.h
@@ -10,6 +10,7 @@
 // Files that need `assert` should include this file after all other includes.
 // All files should also check `JL_NDEBUG` instead of `NDEBUG`.
 
+#pragma GCC visibility push(default)
 #ifdef NDEBUG
 #  ifndef JL_NDEBUG
 #    undef NDEBUG
@@ -28,3 +29,4 @@
 #    include <assert.h>
 #  endif
 #endif
+#pragma GCC visibility pop
diff --git a/src/julia_atomics.h b/src/julia_atomics.h
index c4488f774c987..1d8fba3b44e33 100644
--- a/src/julia_atomics.h
+++ b/src/julia_atomics.h
@@ -56,6 +56,15 @@ enum jl_memory_order {
     jl_memory_order_seq_cst
 };
 
+/**
+ * Cache line size
+*/
+#if (defined(_CPU_AARCH64_) && defined(_OS_DARWIN_)) || defined(_CPU_PPC64_)  // Apple silicon and PPC7+ have 128 byte cache lines
+#define JL_CACHE_BYTE_ALIGNMENT 128
+#else
+#define JL_CACHE_BYTE_ALIGNMENT 64
+#endif
+
 /**
  * Thread synchronization primitives:
  *
@@ -94,12 +103,12 @@ enum jl_memory_order {
 // this wrong thus we include the correct definitions here (with implicit
 // conversion), instead of using the macro version
 template<class T>
-T jl_atomic_load(std::atomic<T> *ptr)
+T jl_atomic_load(const std::atomic<T> *ptr)
 {
      return std::atomic_load<T>(ptr);
 }
 template<class T>
-T jl_atomic_load_explicit(std::atomic<T> *ptr, std::memory_order order)
+T jl_atomic_load_explicit(const std::atomic<T> *ptr, std::memory_order order)
 {
      return std::atomic_load_explicit<T>(ptr, order);
 }
@@ -165,6 +174,11 @@ bool jl_atomic_cmpswap_acqrel(std::atomic<T> *ptr, T *expected, S val)
 {
      return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, memory_order_acq_rel, memory_order_acquire);
 }
+template<class T, class S>
+bool jl_atomic_cmpswap_release(std::atomic<T> *ptr, T *expected, S val)
+{
+     return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, memory_order_release, memory_order_relaxed);
+}
 #define jl_atomic_cmpswap_relaxed(ptr, expected, val) jl_atomic_cmpswap_explicit(ptr, expected, val, memory_order_relaxed)
 template<class T, class S>
 T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
@@ -176,6 +190,7 @@ T jl_atomic_exchange_explicit(std::atomic<T> *ptr, S desired, std::memory_order
 {
      return std::atomic_exchange_explicit<T>(ptr, desired, order);
 }
+#define jl_atomic_exchange_release(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_release)
 #define jl_atomic_exchange_relaxed(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_relaxed)
 extern "C" {
 #else
@@ -196,11 +211,15 @@ extern "C" {
     atomic_compare_exchange_strong(obj, expected, desired)
 #  define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
     atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
-#define jl_atomic_cmpswap_acqrel(obj, expected, desired) \
+#  define jl_atomic_cmpswap_release(obj, expected, desired) \
+    atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_release, memory_order_relaxed)
+#  define jl_atomic_cmpswap_acqrel(obj, expected, desired) \
     atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_acq_rel, memory_order_acquire)
 // TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
 #  define jl_atomic_exchange(obj, desired)       \
     atomic_exchange(obj, desired)
+#  define jl_atomic_exchange_release(obj, desired)      \
+    atomic_exchange_explicit(obj, desired, memory_order_release)
 #  define jl_atomic_exchange_relaxed(obj, desired)      \
     atomic_exchange_explicit(obj, desired, memory_order_relaxed)
 #  define jl_atomic_store(obj, val)                     \
@@ -247,6 +266,7 @@ extern "C" {
 #define _Atomic(T) T
 
 #undef jl_atomic_exchange
+#undef jl_atomic_exchange_release
 #undef jl_atomic_exchange_relaxed
 #define jl_atomic_exchange(obj, desired) \
     (__extension__({ \
@@ -255,10 +275,12 @@ extern "C" {
             *p__analyzer__ = (desired); \
             temp__analyzer__; \
         }))
+#define jl_atomic_exchange_release jl_atomic_exchange
 #define jl_atomic_exchange_relaxed jl_atomic_exchange
 
 #undef jl_atomic_cmpswap
 #undef jl_atomic_cmpswap_acqrel
+#undef jl_atomic_cmpswap_release
 #undef jl_atomic_cmpswap_relaxed
 #define jl_atomic_cmpswap(obj, expected, desired) \
     (__extension__({ \
@@ -273,6 +295,7 @@ extern "C" {
             eq__analyzer__; \
         }))
 #define jl_atomic_cmpswap_acqrel jl_atomic_cmpswap
+#define jl_atomic_cmpswap_release jl_atomic_cmpswap
 #define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
 
 #undef jl_atomic_store
diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h
index 1c0929717b293..1f35d3693fefd 100644
--- a/src/julia_fasttls.h
+++ b/src/julia_fasttls.h
@@ -22,14 +22,9 @@ extern "C" {
 
 typedef struct _jl_gcframe_t jl_gcframe_t;
 
-#if defined(_OS_DARWIN_)
-#include <pthread.h>
-typedef void *(jl_get_pgcstack_func)(pthread_key_t); // aka typeof(pthread_getspecific)
-#else
 typedef jl_gcframe_t **(jl_get_pgcstack_func)(void);
-#endif
 
-#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_)
 #define JULIA_DEFINE_FAST_TLS                                                                   \
 static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec;  \
 JL_DLLEXPORT _Atomic(char) jl_pgcstack_static_semaphore;                                        \
diff --git a/src/julia_gcext.h b/src/julia_gcext.h
index 27f0a6b5ec11c..651200017a9a2 100644
--- a/src/julia_gcext.h
+++ b/src/julia_gcext.h
@@ -34,9 +34,13 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_al
 JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb,
         int enable);
 
+// Memory pressure callback
+typedef void (*jl_gc_cb_notify_gc_pressure_t)(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable);
+
 // Types for custom mark and sweep functions.
-typedef uintptr_t (*jl_markfunc_t)(jl_ptls_t, jl_value_t *obj);
-typedef void (*jl_sweepfunc_t)(jl_value_t *obj);
+typedef uintptr_t (*jl_markfunc_t)(jl_ptls_t, jl_value_t *obj) JL_NOTSAFEPOINT;
+typedef void (*jl_sweepfunc_t)(jl_value_t *obj) JL_NOTSAFEPOINT;
 
 // Function to create a new foreign type with custom
 // mark and sweep functions.
@@ -56,10 +60,10 @@ JL_DLLEXPORT int jl_reinit_foreign_type(
         jl_markfunc_t markfunc,
         jl_sweepfunc_t sweepfunc);
 
-JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt);
+JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void);
-JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void);
+JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) JL_NOTSAFEPOINT;
 
 // Field layout descriptor for custom types that do
 // not fit Julia layout conventions. This is associated with
@@ -76,9 +80,9 @@ JL_DLLEXPORT void *jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty);
 // Queue an object or array of objects for scanning by the garbage collector.
 // These functions must only be called from within a root scanner callback
 // or from within a custom mark function.
-JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj);
+JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
-    jl_value_t **objs, size_t nobjs);
+    jl_value_t **objs, size_t nobjs) JL_NOTSAFEPOINT;
 
 // Sweep functions will not automatically be called for objects of
 // foreign types, as that may not always be desired. Only calling
@@ -115,7 +119,7 @@ JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void);
 // This function returns whether support for conservative scanning has
 // been enabled. The return values are the same as for
 // jl_gc_enable_conservative_gc_support().
-JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void);
+JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void) JL_NOTSAFEPOINT;
 
 // Returns the base address of a memory block, assuming it is stored in
 // a julia memory pool. Return NULL otherwise. Conservative support
@@ -129,16 +133,7 @@ JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void);
 // jl_typeof(obj) is an actual type object.
 //
 // NOTE: Only valid to call from within a GC context.
-JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p);
-
-// Return a non-null pointer to the start of the stack area if the task
-// has an associated stack buffer. In that case, *size will also contain
-// the size of that stack buffer upon return. Also, if task is a thread's
-// current task, that thread's id will be stored in *tid; otherwise,
-// *tid will be set to -1.
-//
-// DEPRECATED: use jl_active_task_stack() instead.
-JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid);
+JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) JL_NOTSAFEPOINT;
 
 // Query the active and total stack range for the given task, and set
 // *active_start and *active_end respectively *total_start and *total_end
@@ -146,7 +141,7 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid)
 // and may not be tight.
 JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
                                        char **active_start, char **active_end,
-                                       char **total_start, char **total_end);
+                                       char **total_start, char **total_end) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/julia_internal.h b/src/julia_internal.h
index cf65521770681..487486ce29964 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -3,6 +3,7 @@
 #ifndef JL_INTERNAL_H
 #define JL_INTERNAL_H
 
+#include "dtypes.h"
 #include "options.h"
 #include "julia_assert.h"
 #include "julia_locks.h"
@@ -13,9 +14,16 @@
 #include "support/strtod.h"
 #include "gc-alloc-profiler.h"
 #include "support/rle.h"
+#include <ctype.h>
+#include <stdint.h>
 #include <uv.h>
 #include <llvm-c/Types.h>
 #include <llvm-c/Orc.h>
+#include <llvm-version.h>
+
+#define STR(x) #x
+#define XSTR(x) STR(x)
+
 #if !defined(_WIN32)
 #include <unistd.h>
 #else
@@ -62,7 +70,8 @@ static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf)
        that we're resetting to. The idea is to remove the poison from the frames
        that we're skipping over, since they won't be unwound. */
     uintptr_t top = jmpbuf_sp(buf);
-    uintptr_t bottom = (uintptr_t)ct->stkbuf;
+    uintptr_t bottom = (uintptr_t)(ct->ctx.copy_stack ? (char*)ct->ptls->stackbase  - ct->ptls->stacksize : (char*)ct->ctx.stkbuf);
+    //uintptr_t bottom = (uintptr_t)&top;
     __asan_unpoison_stack_memory(bottom, top - bottom);
 }
 static inline void asan_unpoison_stack_memory(uintptr_t addr, size_t size) {
@@ -90,28 +99,25 @@ static inline void msan_unpoison(const volatile void *a, size_t size) JL_NOTSAFE
 static inline void msan_allocated_memory(const volatile void *a, size_t size) JL_NOTSAFEPOINT {}
 static inline void msan_unpoison_string(const volatile char *a) JL_NOTSAFEPOINT {}
 #endif
-#ifdef _COMPILER_TSAN_ENABLED_
-JL_DLLIMPORT void *__tsan_create_fiber(unsigned flags);
-JL_DLLIMPORT void *__tsan_get_current_fiber(void);
-JL_DLLIMPORT void __tsan_destroy_fiber(void *fiber);
-JL_DLLIMPORT void __tsan_switch_to_fiber(void *fiber, unsigned flags);
-#endif
-#ifdef __cplusplus
-}
-#endif
 
-// Remove when C11 is required for C code.
-#ifndef static_assert
-#  ifndef __cplusplus
-// C11 should already have `static_assert` from `<assert.h>` so there's no need
-// to check C version.
-#    ifdef __GNUC__
-#      define static_assert _Static_assert
-#    else
-#      define static_assert(...)
-#    endif
-#  endif
-// For C++, C++11 or MSVC is required. Both provide `static_assert`.
+#ifndef _OS_WINDOWS_
+    #if defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_WASM_)
+        #define MAX_ALIGN 8
+    #elif defined(_CPU_AARCH64_) || defined(_CPU_RISCV64_) || (JL_LLVM_VERSION >= 180000 && (defined(_CPU_X86_64_) || defined(_CPU_X86_)) || (JL_LLVM_VERSION >= 200000 && defined(_CPU_PPC64_)))
+    // int128 is 16 bytes aligned on aarch64 and riscv, and on x86 with LLVM >= 18 and on ppc64 with LLVM >= 20
+        #define MAX_ALIGN 16
+    #elif defined(_P64)
+    // Generically we assume MAX_ALIGN is sizeof(void*)
+        #define MAX_ALIGN 8
+    #else
+        #define MAX_ALIGN 4
+    #endif
+#else
+    #if JL_LLVM_VERSION >= 180000
+        #define MAX_ALIGN 16
+    #else
+        #define MAX_ALIGN 8
+    #endif
 #endif
 
 #ifndef alignof
@@ -170,7 +176,16 @@ JL_DLLIMPORT void __tsan_switch_to_fiber(void *fiber, unsigned flags);
 #endif
 #endif
 
+#if defined(HAVE_SSP) && defined(_OS_DARWIN_)
+// On Darwin, this is provided by libSystem and imported
+extern JL_DLLIMPORT uintptr_t __stack_chk_guard;
+#elif defined(HAVE_SSP)
+// Added by compiler runtime in final link - not DLLIMPORT
+extern uintptr_t __stack_chk_guard;
+#else
+// The system doesn't have it - we define our own
 extern JL_DLLEXPORT uintptr_t __stack_chk_guard;
+#endif
 
 // If this is detected in a backtrace of segfault, it means the functions
 // that use this value must be reworked into their async form with cb arg
@@ -179,13 +194,37 @@ static uv_loop_t *const unused_uv_loop_arg = (uv_loop_t *)0xBAD10;
 
 extern jl_mutex_t jl_uv_mutex;
 extern _Atomic(int) jl_uv_n_waiters;
-void JL_UV_LOCK(void);
-#define JL_UV_UNLOCK() JL_UNLOCK(&jl_uv_mutex)
 
-#ifdef __cplusplus
-extern "C" {
+// Global data structures for accessing symbols and other globals
+#include "jl_internal_data.inc"
+
+#if defined(__clang_analyzer__)
+#define XX(name, type) extern JL_HIDDEN type jl_##name JL_GLOBALLY_ROOTED;
+JL_INTERNAL_DATA(XX)
+#undef XX
+#else
+// Struct definition for internal data access
+struct jl_internal_global {
+#define XX(name, type) type name JL_GLOBALLY_ROOTED;
+JL_INTERNAL_DATA(XX)
+#undef XX
+};
+
+extern JL_HIDDEN struct jl_internal_global internal_global;
+
+// Define accessor macros for internal data
+#define jl_method_table (internal_global.method_table)
 #endif
 
+// Generated macros to access globals
+void JL_UV_LOCK(void);
+#define JL_UV_UNLOCK() JL_UNLOCK(&jl_uv_mutex)
+extern _Atomic(unsigned) _threadedregion;
+extern _Atomic(uint16_t) io_loop_tid;
+
+JL_DLLEXPORT void jl_init_(jl_image_buf_t sysimage);
+JL_DLLEXPORT void jl_enter_threaded_region(void);
+JL_DLLEXPORT void jl_exit_threaded_region(void);
 int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT;
 
 //--------------------------------------------------
@@ -193,23 +232,64 @@ int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT;
 // Returns time in nanosec
 JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT void jl_set_peek_cond(uintptr_t);
 JL_DLLEXPORT double jl_get_profile_peek_duration(void);
 JL_DLLEXPORT void jl_set_profile_peek_duration(double);
 
 JL_DLLEXPORT void jl_init_profile_lock(void);
-JL_DLLEXPORT uintptr_t jl_lock_profile_rd_held(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
+JL_DLLEXPORT int jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
 JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
-JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
+JL_DLLEXPORT int jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
 JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
 
+arraylist_t *jl_get_all_tasks_arraylist(void) JL_NOTSAFEPOINT;
+typedef struct {
+    size_t bt_size;
+    int tid;
+} jl_record_backtrace_result_t;
+JL_DLLEXPORT JL_DLLEXPORT size_t jl_try_record_thread_backtrace(jl_ptls_t ptls2, struct _jl_bt_element_t *bt_data,
+                                                                size_t max_bt_size) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_record_backtrace_result_t jl_record_backtrace(jl_task_t *t, struct _jl_bt_element_t *bt_data,
+                                                              size_t max_bt_size, int all_tasks_profiler) JL_NOTSAFEPOINT;
+extern volatile struct _jl_bt_element_t *profile_bt_data_prof;
+extern volatile size_t profile_bt_size_max;
+extern volatile size_t profile_bt_size_cur;
+extern volatile int profile_running;
+extern volatile int profile_all_tasks;
+// Ensures that we can safely read the `live_tasks`field of every TLS when profiling.
+// We want to avoid the case that a GC gets interleaved with `jl_profile_task` and shrinks
+// the `live_tasks` array while we are reading it or frees tasks that are being profiled.
+// Because of that, this lock must be held in `jl_profile_task` and `jl_gc_sweep_stack_pools_and_mtarraylist_buffers`.
+extern uv_mutex_t live_tasks_lock;
+// Ensures that we can safely write to `profile_bt_data_prof` and `profile_bt_size_cur`.
+// We want to avoid the case that:
+// - We start to profile a task very close to the profiling time window end.
+// - The profiling time window ends and we start to read the profile data in a compute thread.
+// - We write to the profile in a profiler thread while the compute thread is reading it.
+// Locking discipline: `bt_data_prof_lock` must be held inside the scope of `live_tasks_lock`.
+extern uv_mutex_t bt_data_prof_lock;
+#define PROFILE_STATE_THREAD_NOT_SLEEPING (1)
+#define PROFILE_STATE_THREAD_SLEEPING (2)
+#define PROFILE_STATE_WALL_TIME_PROFILING (3)
+void jl_profile_task(void);
+#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
+JL_DLLEXPORT void jl_set_profile_abort_ptr(_Atomic(int) *abort_ptr) JL_NOTSAFEPOINT;
+#endif
+
 // number of cycles since power-on
 static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
 {
 #if defined(_CPU_X86_64_)
+    // This is nopl 0(%rax, %rax, 1), but assembler are inconsistent about whether
+    // they emit that as a 4 or 5 byte sequence and we need to be guaranteed to use
+    // the 5 byte one.
+#define NOP5_OVERRIDE_NOP ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t"
     uint64_t low, high;
-    __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
+    // This instruction sequence is promised by rr to be patchable. rr can usually
+    // also patch `rdtsc` in regular code, but without the preceding nop, there could
+    // be an interfering branch into the middle of rr's patch region. Using this
+    // sequence prevents a massive rr-induced slowdown if the compiler happens to emit
+    // an unlucky pattern. See https://github.com/rr-debugger/rr/pull/3580.
+    __asm__ volatile(NOP5_OVERRIDE_NOP "rdtsc" : "=a"(low), "=d"(high));
     return (high << 32) | low;
 #elif defined(_CPU_X86_)
     int64_t ret;
@@ -243,6 +323,11 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
     struct timeval tv;
     gettimeofday(&tv, NULL);
     return (int64_t)(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(_CPU_RISCV64_)
+    // taken from https://github.com/google/benchmark/blob/3b3de69400164013199ea448f051d94d7fc7d81f/src/cycleclock.h#L190
+    uint64_t ret;
+    __asm__ volatile("rdcycle %0" : "=r"(ret));
+    return ret;
 #elif defined(_CPU_PPC64_)
     // This returns a time-base, which is not always precisely a cycle-count.
     // https://reviews.llvm.org/D78084
@@ -258,11 +343,17 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
 
 #include "timing.h"
 
+JL_DLLEXPORT uint64_t jl_typeinf_timing_begin(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start, int is_recompile) JL_NOTSAFEPOINT;
+
 // Global *atomic* integers controlling *process-wide* measurement of compilation time.
 extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled;
 extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time;
 extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time;
 
+// Global *atomic* integer controlling *process-wide* task timing.
+extern JL_DLLEXPORT _Atomic(uint8_t) jl_task_metrics_enabled;
+
 #define jl_return_address() ((uintptr_t)__builtin_return_address(0))
 
 STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
@@ -282,24 +373,41 @@ STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
 // without risk of creating pointers out of thin air
 // TODO: replace with LLVM's llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32
 //       aka `__llvm_memmove_element_unordered_atomic_8` (for 64 bit)
-static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
+static inline void memmove_refs(_Atomic(void*) *dstp, _Atomic(void*) *srcp, size_t n) JL_NOTSAFEPOINT
 {
     size_t i;
-    _Atomic(void*) *srcpa = (_Atomic(void*)*)srcp;
-    _Atomic(void*) *dstpa = (_Atomic(void*)*)dstp;
     if (dstp < srcp || dstp > srcp + n) {
         for (i = 0; i < n; i++) {
-            jl_atomic_store_release(dstpa + i, jl_atomic_load_relaxed(srcpa + i));
+            jl_atomic_store_release(dstp + i, jl_atomic_load_relaxed(srcp + i));
         }
     }
     else {
         for (i = 0; i < n; i++) {
-            jl_atomic_store_release(dstpa + n - i - 1, jl_atomic_load_relaxed(srcpa + n - i - 1));
+            jl_atomic_store_release(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1));
         }
     }
 }
 
-// -- gc.c -- //
+static inline void memassign_safe(int hasptr, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
+{
+    assert(nb == jl_datatype_size(jl_typeof(src)));
+    if (hasptr) {
+        size_t nptr = nb / sizeof(void*);
+        memmove_refs((_Atomic(void*)*)dst, (_Atomic(void*)*)src, nptr);
+        nb -= nptr * sizeof(void*);
+        if (__likely(nb == 0))
+            return;
+        src = (jl_value_t*)((char*)src + nptr * sizeof(void*));
+        dst = dst + nptr * sizeof(void*);
+    }
+    else if (nb >= 16) {
+        memcpy(dst, jl_assume_aligned(src, 16), nb);
+        return;
+    }
+    memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb);
+}
+
+// -- GC -- //
 
 #define GC_CLEAN  0 // freshly allocated
 #define GC_MARKED 1 // reachable and young
@@ -307,11 +415,17 @@ static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOT
 #define GC_OLD_MARKED (GC_OLD | GC_MARKED) // reachable and old
 #define GC_IN_IMAGE 4
 
+// data structures for runtime codegen
+typedef struct _jl_abi_t {
+    jl_value_t *sigt;
+    jl_value_t *rt;
+    size_t nargs;
+    int specsig; // bool
+    // OpaqueClosure Methods override the first argument of their signature
+    int is_opaque_closure;
+} jl_abi_t;
+
 // useful constants
-extern JL_DLLIMPORT jl_methtable_t *jl_type_type_mt JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
-extern jl_methtable_t *jl_kwcall_mt JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_method_t *jl_opaque_closure_method JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT _Atomic(size_t) jl_world_counter;
 
 typedef void (*tracer_cb)(jl_value_t *tracee);
@@ -321,28 +435,27 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m);
 extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages
 JL_DLLEXPORT extern arraylist_t jl_image_relocs;  // external linkage: sysimg/pkgimages
+JL_DLLEXPORT extern arraylist_t jl_top_mods;  // external linkage: sysimg/pkgimages
 extern arraylist_t eytzinger_image_tree;
 extern arraylist_t eytzinger_idxs;
 
 extern JL_DLLEXPORT size_t jl_page_size;
-extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT jl_value_t *jl_typeinf_func JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT jl_value_t *jl_compile_and_emit_func JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT size_t jl_typeinf_world;
+extern JL_DLLEXPORT jl_value_t *jl_libdl_dlopen_func JL_GLOBALLY_ROOTED;
 extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
-extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
 
-JL_DLLEXPORT extern int jl_lineno;
-JL_DLLEXPORT extern const char *jl_filename;
+void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
 
-jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset,
+JL_DLLEXPORT extern _Atomic(int) jl_lineno;
+JL_DLLEXPORT extern _Atomic(const char *) jl_filename;
+
+jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset,
                                    int osize);
 jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
 JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
-extern uv_mutex_t gc_perm_lock;
-void *jl_gc_perm_alloc_nolock(size_t sz, int zero,
-    unsigned align, unsigned offset) JL_NOTSAFEPOINT;
-void *jl_gc_perm_alloc(size_t sz, int zero,
-    unsigned align, unsigned offset) JL_NOTSAFEPOINT;
-void gc_sweep_sysimg(void);
+void gc_sweep_sysimg(void) JL_NOTSAFEPOINT;
 
 
 // pools are 16376 bytes large (GC_POOL_SZ - GC_PAGE_OFFSET)
@@ -364,24 +477,48 @@ static const int jl_gc_sizeclasses[] = {
     144, 160, 176, 192, 208, 224, 240, 256,
 
     // the following tables are computed for maximum packing efficiency via the formula:
-    // pg = 2^14
+    // pg = GC_SMALL_PAGE ? 2^12 : 2^14
     // sz = (div.(pg-8, rng).÷16)*16; hcat(sz, (pg-8).÷sz, pg .- (pg-8).÷sz.*sz)'
 
+#ifdef GC_SMALL_PAGE
+    // rng = 15:-1:2 (14 pools)
+    272, 288, 304, 336, 368, 400, 448, 496, 576, 672, 816, 1008, 1360, 2032
+//  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, /pool
+//  16, 64, 144, 64, 48, 96, 64, 128, 64, 64, 16, 64, 16, 32, bytes lost
+#else
     // rng = 60:-4:32 (8 pools)
     272, 288, 304, 336, 368, 400, 448, 496,
-//   60,  56,  53,  48,  44,  40,  36,  33, /pool
-//   64, 256, 272, 256, 192, 384, 256,  16, bytes lost
+//  60, 56, 53, 48, 44, 40, 36, 33, /pool
+//  64, 256, 272, 256, 192, 384, 256,  16, bytes lost
 
     // rng = 30:-2:16 (8 pools)
     544, 576, 624, 672, 736, 816, 896, 1008,
-//   30,  28,  26,  24,  22,  20,  18,  16, /pool
-//   64, 256, 160, 256, 192,  64, 256, 256, bytes lost
+//  30, 28, 26, 24, 22, 20, 18, 16, /pool
+//  64, 256, 160, 256, 192,  64, 256, 256, bytes lost
 
     // rng = 15:-1:8 (8 pools)
     1088, 1168, 1248, 1360, 1488, 1632, 1808, 2032
-//    15,   14,   13,   12,   11,   10,    9,    8, /pool
-//    64,   32,  160,   64,   16,   64,  112,  128, bytes lost
+//   15, 14, 13, 12, 11, 10, 9, 8, /pool
+//   64, 32, 160, 64, 16, 64, 112,  128, bytes lost
+#endif
 };
+#ifdef GC_SMALL_PAGE
+#ifdef _P64
+#  define JL_GC_N_POOLS 39
+#elif MAX_ALIGN > 4
+#  define JL_GC_N_POOLS 40
+#else
+#  define JL_GC_N_POOLS 41
+#endif
+#else
+#ifdef _P64
+#  define JL_GC_N_POOLS 49
+#elif MAX_ALIGN > 4
+#  define JL_GC_N_POOLS 50
+#else
+#  define JL_GC_N_POOLS 51
+#endif
+#endif
 static_assert(sizeof(jl_gc_sizeclasses) / sizeof(jl_gc_sizeclasses[0]) == JL_GC_N_POOLS, "");
 
 STATIC_INLINE int jl_gc_alignment(size_t sz) JL_NOTSAFEPOINT
@@ -391,7 +528,7 @@ STATIC_INLINE int jl_gc_alignment(size_t sz) JL_NOTSAFEPOINT
 #ifdef _P64
     (void)sz;
     return 16;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
     return sz <= 4 ? 8 : 16;
 #else
     // szclass 8
@@ -408,7 +545,12 @@ JL_DLLEXPORT int jl_alignment(size_t sz) JL_NOTSAFEPOINT;
 
 // the following table is computed as:
 // [searchsortedfirst(jl_gc_sizeclasses, i) - 1 for i = 0:16:jl_gc_sizeclasses[end]]
-static const uint8_t szclass_table[] = {0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48};
+static const uint8_t szclass_table[] =
+#ifdef GC_SMALL_PAGE
+    {0,1,3,5,7,9,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,28,29,29,30,30,31,31,31,32,32,32,33,33,33,33,33,34,34,34,34,34,34,35,35,35,35,35,35,35,35,35,36,36,36,36,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38};
+#else
+    {0,1,3,5,7,9,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,28,29,29,30,30,31,31,31,32,32,32,33,33,33,34,34,35,35,35,36,36,36,37,37,37,37,38,38,38,38,38,39,39,39,39,39,40,40,40,40,40,40,40,41,41,41,41,41,42,42,42,42,42,43,43,43,43,43,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45,46,46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,47,47,47,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
+#endif
 static_assert(sizeof(szclass_table) == 128, "");
 
 STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) JL_NOTSAFEPOINT
@@ -418,7 +560,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) JL_NOTSAFEPOINT
     if (sz <= 8)
         return 0;
     const int N = 0;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
     if (sz <= 8)
         return (sz >= 4 ? 1 : 0);
     const int N = 1;
@@ -436,7 +578,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE
     if (sz >= 16 && sz <= 152) {
 #ifdef _P64
         const int N = 0;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
         const int N = 1;
 #else
         const int N = 2;
@@ -447,34 +589,11 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE
 }
 
 #define JL_SMALL_BYTE_ALIGNMENT 16
-#define JL_CACHE_BYTE_ALIGNMENT 64
 // JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide
 #define JL_HEAP_ALIGNMENT JL_SMALL_BYTE_ALIGNMENT
 #define GC_MAX_SZCLASS (2032-sizeof(void*))
 static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, "");
 
-STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
-{
-    jl_value_t *v;
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    if (sz <= GC_MAX_SZCLASS) {
-        int pool_id = jl_gc_szclass(allocsz);
-        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
-        int osize = jl_gc_sizeclasses[pool_id];
-        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
-        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-        v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
-    }
-    else {
-        if (allocsz < sz) // overflow in adding offs, size was "negative"
-            jl_throw(jl_memory_exception);
-        v = jl_gc_big_alloc_noinline(ptls, allocsz);
-    }
-    jl_set_typeof(v, ty);
-    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
-    return v;
-}
-
 /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a
  * gc frame, until it has been fully initialized. An uninitialized value in a
  * gc frame can crash upon encountering the first safepoint. By delaying use of
@@ -496,7 +615,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty);
 // defined as uint64_t[3] so that we can get the right alignment of this and a "type tag" on it
 const extern uint64_t _jl_buff_tag[3];
 #define jl_buff_tag ((uintptr_t)LLT_ALIGN((uintptr_t)&_jl_buff_tag[1],16))
-JL_DLLEXPORT uintptr_t jl_get_buff_tag(void);
+JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT;
 
 typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer
 STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
@@ -504,19 +623,8 @@ STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
     return jl_gc_alloc(ptls, sz, (void*)jl_buff_tag);
 }
 
-STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
-{
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
-                                                 sizeof(void*) * 2 : 16));
-    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
-                                                              sizeof(void*) % align);
-    uintptr_t tag = (uintptr_t)ty;
-    o->header = tag | GC_OLD_MARKED;
-    return jl_valueof(o);
-}
-jl_value_t *jl_permbox8(jl_datatype_t *t, uintptr_t tag, uint8_t x);
-jl_value_t *jl_permbox32(jl_datatype_t *t, uintptr_t tag, uint32_t x);
+jl_value_t *jl_permbox8(jl_datatype_t *t, uintptr_t tag, uint8_t x) JL_NOTSAFEPOINT;
+jl_value_t *jl_permbox32(jl_datatype_t *t, uintptr_t tag, uint32_t x) JL_NOTSAFEPOINT;
 jl_svec_t *jl_perm_symsvec(size_t n, ...);
 
 // this sizeof(__VA_ARGS__) trick can't be computed until C11, but that only matters to Clang in some situations
@@ -550,38 +658,16 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...);
 #endif
 #endif
 
-jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz);
-JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
-
-JL_DLLEXPORT void JL_NORETURN jl_throw_out_of_memory_error(void);
-
-
-JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
-void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT;
+void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT;
+size_t jl_genericmemory_nbytes(jl_genericmemory_t *a) JL_NOTSAFEPOINT;
+size_t memory_block_usable_size(void *mem, int isaligned) JL_NOTSAFEPOINT;
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
 void jl_gc_run_all_finalizers(jl_task_t *ct);
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
 void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;
 
-void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT;
-
-STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t*
-{
-    jl_gc_wb(bnd, val);
-}
-
-STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t*
-{
-    // if parent is marked and buf is not
-    if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) {
-        jl_task_t *ct = jl_current_task;
-        gc_setmark_buf(ct->ptls, bufptr, 3, minsz);
-    }
-}
-
-void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT;
+void jl_gc_debug_fprint_status(ios_t *s) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_debug_fprint_critical_error(ios_t *s) JL_NOTSAFEPOINT;
 void jl_print_gc_stats(JL_STREAM *s);
 void jl_gc_reset_alloc_count(void);
 uint32_t jl_get_gs_ctr(void);
@@ -603,41 +689,107 @@ STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NO
 // -- helper types -- //
 
 typedef struct {
-    uint8_t inferred:1;
-    uint8_t propagate_inbounds:1;
-    uint8_t has_fcall:1;
-    uint8_t nospecializeinfer:1;
-    uint8_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none
-    uint8_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+    uint16_t propagate_inbounds:1;
+    uint16_t has_fcall:1;
+    uint16_t has_image_globalref:1;
+    uint16_t nospecializeinfer:1;
+    uint16_t isva:1;
+    uint16_t nargsmatchesmethod:1;
+    uint16_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+    uint16_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+    uint16_t has_ssaflags:1;
 } jl_code_info_flags_bitfield_t;
 
 typedef union {
     jl_code_info_flags_bitfield_t bits;
-    uint8_t packed;
+    uint16_t packed;
 } jl_code_info_flags_t;
 
 // -- functions -- //
 
-JL_DLLEXPORT jl_code_info_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force);
+// Also defined in typeinfer.jl - See documentation there.
+#define SOURCE_MODE_NOT_REQUIRED            0x0
+#define SOURCE_MODE_ABI                     0x1
+
+#define METHOD_SIG_LATEST_WHICH             0b0001
+#define METHOD_SIG_LATEST_ONLY              0b0010
+#define METHOD_SIG_PRECOMPILE_MANY          0b0100
+
+JL_DLLEXPORT jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner);
+JL_DLLEXPORT void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src);
+void jl_engine_sweep(jl_ptls_t *gc_all_tls_states) JL_NOTSAFEPOINT;
+int jl_engine_hasreserved(jl_method_instance_t *m, jl_value_t *owner) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_code_instance_t *jl_type_infer(jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t world, uint8_t source_mode, uint8_t trim_mode);
+JL_DLLEXPORT jl_code_info_t *jl_gdbcodetyped1(jl_method_instance_t *mi, size_t world);
 JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *meth JL_PROPAGATES_ROOT, size_t world);
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
-        size_t min_world, size_t max_world);
-jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT);
-jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
+        size_t min_world, size_t max_world, jl_debuginfo_t *di, jl_svec_t *edges);
+JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile);
+JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src);
+
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner);
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
+        jl_method_instance_t *mi, jl_value_t *owner,
+        jl_value_t *rettype, jl_value_t *exctype,
+        jl_value_t *inferred_const, jl_value_t *inferred,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t effects, jl_value_t *analysis_results,
+        jl_debuginfo_t *di, jl_svec_t *edges /* , int absolute_max*/);
+JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, size_t target_world) JL_NOTSAFEPOINT;
+
+STATIC_INLINE jl_method_instance_t *jl_get_ci_mi(jl_code_instance_t *ci JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    jl_value_t *def = ci->def;
+    if (jl_is_abioverride(def))
+        return ((jl_abi_override_t*)def)->def;
+    assert(jl_is_method_instance(def));
+    return (jl_method_instance_t*)def;
+}
 
+JL_DLLEXPORT const char *jl_debuginfo_file(jl_debuginfo_t *debuginfo) JL_NOTSAFEPOINT;
+JL_DLLEXPORT const char *jl_debuginfo_file1(jl_debuginfo_t *debuginfo) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_module_t *jl_debuginfo_module1(jl_value_t *debuginfo_def) JL_NOTSAFEPOINT;
+JL_DLLEXPORT const char *jl_debuginfo_name(jl_value_t *func) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT int jl_is_compiled_codeinst(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world);
+JL_DLLEXPORT void jl_compile_method_sig(jl_method_t *m, jl_value_t *types, jl_svec_t *sparams, size_t world);
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types);
 jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
+jl_value_t *jl_code_or_ci_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
 int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile);
 jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ast);
 JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void);
-JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
+JL_DLLEXPORT void jl_resolve_definition_effects_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals, jl_value_t *binding_edge,
                                            int binding_effects);
+JL_DLLEXPORT int jl_maybe_add_binding_backedge(jl_binding_t *b, jl_value_t *edge, jl_method_t *in_method);
+JL_DLLEXPORT void jl_add_binding_backedge(jl_binding_t *b, jl_value_t *edge);
+
+static const uint8_t MI_FLAG_BACKEDGES_INUSE = 0b0100;
+static const uint8_t MI_FLAG_BACKEDGES_DIRTY = 0b1000;
+static const uint8_t MI_FLAG_BACKEDGES_ALL = 0b1100;
+
+STATIC_INLINE jl_array_t *jl_mi_get_backedges_mutate(jl_method_instance_t *mi JL_PROPAGATES_ROOT, uint8_t *flags) {
+    *flags = jl_atomic_load_relaxed(&mi->flags) & (MI_FLAG_BACKEDGES_ALL);
+    jl_array_t *ret = mi->backedges;
+    if (ret)
+        jl_atomic_fetch_or_relaxed(&mi->flags, MI_FLAG_BACKEDGES_INUSE);
+    return ret;
+}
 
-int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_method_instance_t **caller) JL_NOTSAFEPOINT;
-int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_method_instance_t *caller);
-void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *caller);
+STATIC_INLINE jl_array_t *jl_mi_get_backedges(jl_method_instance_t *mi JL_PROPAGATES_ROOT) {
+    assert(!(jl_atomic_load_relaxed(&mi->flags) & MI_FLAG_BACKEDGES_ALL));
+    jl_array_t *ret = mi->backedges;
+    return ret;
+}
+
+int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_code_instance_t **caller) JL_NOTSAFEPOINT;
+int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_code_instance_t *caller);
+int clear_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_code_instance_t *caller);
+void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_code_instance_t *caller);
+void jl_mi_done_backedges(jl_method_instance_t *mi JL_PROPAGATES_ROOT, uint8_t old_flags);
 
 JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root);
 void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots);
@@ -649,7 +801,7 @@ int jl_valid_type_param(jl_value_t *v);
 
 JL_DLLEXPORT jl_value_t *jl_apply_2va(jl_value_t *f, jl_value_t **args, uint32_t nargs);
 
-void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world);
+void JL_NORETURN jl_method_error(jl_value_t *F, jl_value_t **args, size_t na, size_t world);
 JL_DLLEXPORT jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
 
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
@@ -658,14 +810,10 @@ JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
     JL_DLLEXPORT jl_value_t *name(jl_value_t *F, jl_value_t **args, uint32_t nargs)
 
 JL_CALLABLE(jl_f_tuple);
-JL_CALLABLE(jl_f_intrinsic_call);
-JL_CALLABLE(jl_f_opaque_closure_call);
 void jl_install_default_signal_handlers(void);
 void restore_signals(void);
 void jl_install_thread_signal_handler(jl_ptls_t ptls);
 
-JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b);
-
 extern uv_loop_t *jl_io_loop;
 JL_DLLEXPORT void jl_uv_flush(uv_stream_t *stream);
 
@@ -678,14 +826,23 @@ typedef struct jl_typeenv_t {
 int jl_tuple_isa(jl_value_t **child, size_t cl, jl_datatype_t *pdt);
 int jl_tuple1_isa(jl_value_t *child1, jl_value_t **child, size_t cl, jl_datatype_t *pdt);
 
+enum atomic_kind {
+    isatomic_none = 0,
+    isatomic_object = 1,
+    isatomic_field = 2
+};
+
 JL_DLLEXPORT int jl_has_intersect_type_not_kind(jl_value_t *t);
 int jl_subtype_invariant(jl_value_t *a, jl_value_t *b, int ta);
-int jl_has_concrete_subtype(jl_value_t *typ);
+JL_DLLEXPORT int jl_has_concrete_subtype(jl_value_t *typ);
 jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf);
 jl_tupletype_t *jl_lookup_arg_tuple_type(jl_value_t *arg1 JL_PROPAGATES_ROOT, jl_value_t **args, size_t nargs, int leaf);
 JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype);
-jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED;
+void jl_method_table_activate(jl_typemap_entry_t *newentry);
+jl_typemap_entry_t *jl_method_table_add(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype);
+jl_method_t *jl_mk_builtin_func(jl_datatype_t *dt, jl_sym_t *name, jl_fptr_args_t fptr) JL_GC_DISABLED;
 int jl_obviously_unequal(jl_value_t *a, jl_value_t *b);
+int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_array_t *jl_find_free_typevars(jl_value_t *v);
 int jl_has_fixed_layout(jl_datatype_t *t);
 JL_DLLEXPORT int jl_struct_try_layout(jl_datatype_t *dt);
@@ -694,16 +851,18 @@ jl_svec_t *jl_outer_unionall_vars(jl_value_t *u);
 jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, int *issubty);
 jl_value_t *jl_type_intersection_env(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
 int jl_subtype_matching(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
-JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b) JL_NOTSAFEPOINT;
 // specificity comparison assuming !(a <: b) and !(b <: a)
 JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b);
-jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n);
+JL_DLLEXPORT jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n);
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals);
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val);
+jl_value_t *jl_substitute_var_nothrow(jl_value_t *t, jl_tvar_t *var, jl_value_t *val, int nothrow);
 jl_unionall_t *jl_rename_unionall(jl_unionall_t *u);
 JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u);
+jl_value_t* jl_substitute_datatype(jl_value_t *t, jl_datatype_t * x, jl_datatype_t * y);
 int jl_count_union_components(jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT;
 int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT;
@@ -712,7 +871,7 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module,
 jl_datatype_t *jl_new_uninitialized_datatype(void);
 void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable);
 JL_DLLEXPORT jl_datatype_t *jl_wrap_Type(jl_value_t *t);  // x -> Type{x}
-jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check);
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check, int nothrow);
 void jl_reinstantiate_inner_types(jl_datatype_t *t);
 jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type);
 void jl_cache_type_(jl_datatype_t *type);
@@ -721,21 +880,64 @@ void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs,
 jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic);
 jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic);
 jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic);
+int set_nth_fieldonce(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic);
+jl_value_t *swap_bits(jl_value_t *ty, char *v, uint8_t *psel, jl_value_t *parent, jl_value_t *rhs, enum atomic_kind isatomic);
+jl_value_t *replace_value(jl_value_t *ty, _Atomic(jl_value_t*) *p, jl_value_t *parent, jl_value_t *expected, jl_value_t *rhs, int isatomic, jl_module_t *mod, jl_sym_t *name);
+jl_value_t *replace_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_t *parent, jl_value_t *expected, jl_value_t *rhs, enum atomic_kind isatomic);
+jl_value_t *modify_value(jl_value_t *ty, _Atomic(jl_value_t*) *p, jl_value_t *parent, jl_value_t *op, jl_value_t *rhs, int isatomic, jl_binding_t *b, jl_module_t *mod, jl_sym_t *name);
+jl_value_t *modify_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_t *parent, jl_value_t *op, jl_value_t *rhs, enum atomic_kind isatomic);
+int setonce_bits(jl_datatype_t *rty, char *p, jl_value_t *owner, jl_value_t *rhs, enum atomic_kind isatomic);
 jl_expr_t *jl_exprn(jl_sym_t *head, size_t n);
-jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module);
-jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st);
-int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), void *env);
-int foreach_mtable_in_module(jl_module_t *m, int (*visit)(jl_methtable_t *mt, void *env), void *env);
+jl_value_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module, size_t new_world);
+jl_value_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st, size_t new_world);
+int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), jl_array_t *mod_array, void *env);
 void jl_init_main_module(void);
 JL_DLLEXPORT int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT;
 jl_array_t *jl_get_loaded_modules(void);
 JL_DLLEXPORT int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
 int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT;
 
-void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type);
-jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded);
+jl_value_t *jl_check_binding_assign_value(jl_binding_t *b JL_PROPAGATES_ROOT, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED, const char *msg);
+void jl_binding_set_type(jl_binding_t *b, jl_module_t *mod, jl_sym_t *sym, jl_value_t *ty);
+JL_DLLEXPORT void jl_declare_global(jl_module_t *m, jl_value_t *arg, jl_value_t *set_type, int strong);
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val3(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, enum jl_partition_kind, size_t new_world) JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded, const char **toplevel_filename, int *toplevel_lineno);
+
+void jl_module_initial_using(jl_module_t *to, jl_module_t *from);
+STATIC_INLINE struct _jl_module_using *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_module_t *module_usings_getmod(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+void jl_add_usings_backedge(jl_module_t *from, jl_module_t *to);
+typedef struct _modstack_t {
+    jl_binding_t *b;
+    struct _modstack_t *prev;
+} modstack_t;
+
+#ifndef __clang_gcanalyzer__
+// The analyzer doesn't like looking through the arraylist, so just model the
+// access for it using this function
+STATIC_INLINE struct _jl_module_using *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT {
+    return (struct _jl_module_using *)&(m->usings.items[4*i]);
+}
+STATIC_INLINE jl_module_t *module_usings_getmod(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT {
+    return module_usings_getidx(m, i)->mod;
+}
+#endif
+
+STATIC_INLINE size_t module_usings_length(jl_module_t *m) JL_NOTSAFEPOINT {
+    return m->usings.len/4;
+}
+
+STATIC_INLINE size_t module_usings_max(jl_module_t *m) JL_NOTSAFEPOINT {
+    return m->usings.max/4;
+}
 
-jl_value_t *jl_eval_global_var(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *e);
+JL_DLLEXPORT jl_sym_t *jl_module_name(jl_module_t *m) JL_NOTSAFEPOINT;
+jl_module_t *jl_module_root(jl_module_t *m);
+void jl_add_scanned_method(jl_module_t *m, jl_method_t *meth);
+jl_value_t *jl_eval_global_var(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *e, size_t world);
+JL_DLLEXPORT jl_value_t *jl_eval_globalref(jl_globalref_t *g, size_t world);
+jl_value_t *jl_get_globalref_value(jl_globalref_t *gr, size_t world);
+jl_value_t *jl_get_global_value(jl_module_t *m, jl_sym_t *var, size_t world);
 jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *clos, jl_value_t **args, size_t nargs);
 jl_value_t *jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t *src);
 jl_value_t *jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e,
@@ -744,8 +946,10 @@ jl_value_t *jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e,
 JL_DLLEXPORT int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT;
 jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr,
                                        jl_module_t *inmodule, const char *file, int line);
+int jl_isa_ast_node(jl_value_t *e) JL_NOTSAFEPOINT;
 
-jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world);
+JL_DLLEXPORT jl_value_t *jl_method_lookup_by_tt(jl_tupletype_t *tt, size_t world, jl_value_t *_mt);
+JL_DLLEXPORT jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world);
 
 jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value_t **args, size_t nargs);
 jl_value_t *jl_gf_invoke(jl_value_t *types, jl_value_t *f, jl_value_t **args, size_t nargs);
@@ -759,30 +963,202 @@ jl_datatype_t *jl_nth_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT,
 JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_methtable_t *jl_method_table_for(
     jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_methcache_t *jl_method_cache_for(
+    jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 jl_methtable_t *jl_kwmethod_table_for(
     jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+jl_methcache_t *jl_kwmethod_cache_for(
+    jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_methtable_t *jl_method_get_table(
     jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_methcache_t *jl_method_get_cache(
+    jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT int jl_pointer_egal(jl_value_t *t);
 JL_DLLEXPORT jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 void jl_compute_field_offsets(jl_datatype_t *st);
-jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                                             int isunboxed, int hasptr, int isunion, int elsz);
 void jl_module_run_initializer(jl_module_t *m);
 JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
-JL_DLLEXPORT void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *sym, jl_binding_t *b);
+JL_DLLEXPORT void jl_binding_deprecation_warning(jl_binding_t *b);
+JL_DLLEXPORT jl_binding_partition_t *jl_replace_binding_locked(jl_binding_t *b JL_PROPAGATES_ROOT,
+    jl_binding_partition_t *old_bpart, jl_value_t *restriction_val, enum jl_partition_kind kind, size_t new_world) JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT jl_binding_partition_t *jl_replace_binding_locked2(jl_binding_t *b JL_PROPAGATES_ROOT,
+    jl_binding_partition_t *old_bpart, jl_value_t *restriction_val, size_t kind, size_t new_world) JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT void jl_update_loaded_bpart(jl_binding_t *b, jl_binding_partition_t *bpart);
 extern jl_array_t *jl_module_init_order JL_GLOBALLY_ROOTED;
 extern htable_t jl_current_modules JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module JL_GLOBALLY_ROOTED;
-extern jl_array_t *jl_global_roots_table JL_GLOBALLY_ROOTED;
+extern jl_module_t *jl_precompile_toplevel_module JL_GLOBALLY_ROOTED;
+extern jl_genericmemory_t *jl_global_roots_list JL_GLOBALLY_ROOTED;
+extern jl_genericmemory_t *jl_global_roots_keyset JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT extern size_t jl_require_world;
 JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert) JL_GLOBALLY_ROOTED;
+extern jl_svec_t *precompile_field_replace JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT void jl_set_precompile_field_replace(jl_value_t *val, jl_value_t *field, jl_value_t *newval) JL_GLOBALLY_ROOTED;
 
 jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
     jl_value_t *source,  jl_value_t **env, size_t nenv, int do_compile);
+jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
+    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva, int isinferred);
 JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source);
 
+STATIC_INLINE int jl_bkind_is_some_import(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == PARTITION_KIND_IMPLICIT_CONST || kind == PARTITION_KIND_IMPLICIT_GLOBAL || kind == PARTITION_KIND_EXPLICIT || kind == PARTITION_KIND_IMPORTED;
+}
+
+STATIC_INLINE int jl_bkind_is_some_explicit_import(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == PARTITION_KIND_EXPLICIT || kind == PARTITION_KIND_IMPORTED;
+}
+
+STATIC_INLINE int jl_bkind_is_some_guard(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == PARTITION_KIND_FAILED || kind == PARTITION_KIND_GUARD;
+}
+
+STATIC_INLINE int jl_bkind_is_some_implicit(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == PARTITION_KIND_IMPLICIT_CONST || kind == PARTITION_KIND_IMPLICIT_GLOBAL || jl_bkind_is_some_guard(kind);
+}
+
+STATIC_INLINE int jl_bkind_is_some_constant(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == PARTITION_KIND_IMPLICIT_CONST || kind == PARTITION_KIND_CONST || kind == PARTITION_KIND_CONST_IMPORT || kind == PARTITION_KIND_UNDEF_CONST || kind == PARTITION_KIND_BACKDATED_CONST;
+}
+
+STATIC_INLINE int jl_bkind_is_defined_constant(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == PARTITION_KIND_IMPLICIT_CONST || kind == PARTITION_KIND_CONST || kind == PARTITION_KIND_CONST_IMPORT || kind == PARTITION_KIND_BACKDATED_CONST;
+}
+
+STATIC_INLINE int jl_bkind_is_real_constant(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == PARTITION_KIND_IMPLICIT_CONST || kind == PARTITION_KIND_CONST || kind == PARTITION_KIND_CONST_IMPORT;
+}
+
+STATIC_INLINE int jl_bpart_is_exported(uint8_t flags) JL_NOTSAFEPOINT {
+    return flags & (PARTITION_FLAG_EXPORTED | PARTITION_FLAG_IMPLICITLY_EXPORTED);
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b JL_PROPAGATES_ROOT, size_t world) JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT jl_binding_partition_t *jl_get_binding_partition_with_hint(jl_binding_t *b JL_PROPAGATES_ROOT, jl_binding_partition_t *previous_part, size_t world) JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT jl_binding_partition_t *jl_get_binding_partition_all(jl_binding_t *b JL_PROPAGATES_ROOT, size_t min_world, size_t max_world) JL_GLOBALLY_ROOTED;
+
+struct restriction_kind_pair {
+    jl_binding_t *binding_if_global;
+    jl_value_t *restriction;
+    enum jl_partition_kind kind;
+    int maybe_depwarn;
+};
+JL_DLLEXPORT int jl_get_binding_leaf_partitions_restriction_kind(jl_binding_t *b JL_PROPAGATES_ROOT, struct restriction_kind_pair *rkp, size_t min_world, size_t max_world) JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT jl_value_t *jl_get_binding_leaf_partitions_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT, int *maybe_depwarn, size_t min_world, size_t max_world);
+
+EXTERN_INLINE_DECLARE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT {
+    return (uint8_t)(bpart->kind & 0xf);
+}
+
+STATIC_INLINE void jl_walk_binding_inplace(jl_binding_t **bnd, jl_binding_partition_t **bpart JL_PROPAGATES_ROOT, size_t world) JL_NOTSAFEPOINT;
+STATIC_INLINE void jl_walk_binding_inplace_depwarn(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t world, int *depwarn) JL_NOTSAFEPOINT;
+STATIC_INLINE void jl_walk_binding_inplace_all(jl_binding_t **bnd, jl_binding_partition_t **bpart JL_PROPAGATES_ROOT, int *depwarn, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
+STATIC_INLINE void jl_walk_binding_inplace_worlds(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t *min_world, size_t *max_world, int *depwarn, size_t world) JL_NOTSAFEPOINT;
+
+#ifndef __clang_analyzer__
+STATIC_INLINE void jl_walk_binding_inplace(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t world) JL_NOTSAFEPOINT
+{
+    while (1) {
+        enum jl_partition_kind kind = jl_binding_kind(*bpart);
+        if (!jl_bkind_is_some_explicit_import(kind) && kind != PARTITION_KIND_IMPLICIT_GLOBAL)
+            return;
+        *bnd = (jl_binding_t*)(*bpart)->restriction;
+        *bpart = jl_get_binding_partition(*bnd, world);
+    }
+}
+
+STATIC_INLINE void jl_walk_binding_inplace_depwarn(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t world, int *depwarn) JL_NOTSAFEPOINT
+{
+    int passed_explicit = 0;
+    while (1) {
+        enum jl_partition_kind kind = jl_binding_kind(*bpart);
+        if (!jl_bkind_is_some_explicit_import(kind) && kind != PARTITION_KIND_IMPLICIT_GLOBAL) {
+            if (!passed_explicit && depwarn)
+                *depwarn |= (*bpart)->kind & PARTITION_FLAG_DEPWARN;
+            return;
+        }
+        if (!passed_explicit && depwarn)
+            *depwarn |= (*bpart)->kind & PARTITION_FLAG_DEPWARN;
+        if (kind != PARTITION_KIND_IMPLICIT_GLOBAL)
+            passed_explicit = 1;
+        *bnd = (jl_binding_t*)(*bpart)->restriction;
+        *bpart = jl_get_binding_partition(*bnd, world);
+    }
+}
+
+
+STATIC_INLINE void jl_walk_binding_inplace_all(jl_binding_t **bnd, jl_binding_partition_t **bpart, int *depwarn, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
+{
+    int passed_explicit = 0;
+    while (*bpart) {
+        enum jl_partition_kind kind = jl_binding_kind(*bpart);
+        if (!jl_bkind_is_some_explicit_import(kind) && kind != PARTITION_KIND_IMPLICIT_GLOBAL) {
+            if (!passed_explicit && depwarn)
+                *depwarn |= (*bpart)->kind & PARTITION_FLAG_DEPWARN;
+            return;
+        }
+        if (!passed_explicit && depwarn)
+            *depwarn |= (*bpart)->kind & PARTITION_FLAG_DEPWARN;
+        if (kind != PARTITION_KIND_IMPLICIT_GLOBAL)
+            passed_explicit = 1;
+        *bnd = (jl_binding_t*)(*bpart)->restriction;
+        *bpart = jl_get_binding_partition_all(*bnd, min_world, max_world);
+    }
+}
+
+STATIC_INLINE void jl_walk_binding_inplace_worlds(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t *min_world, size_t *max_world, int *depwarn, size_t world) JL_NOTSAFEPOINT
+{
+    int passed_explicit = 0;
+    while (*bpart) {
+        if (*min_world < (*bpart)->min_world)
+            *min_world = (*bpart)->min_world;
+        size_t bpart_max_world = jl_atomic_load_relaxed(&(*bpart)->max_world);
+        if (*max_world > bpart_max_world)
+            *max_world = bpart_max_world;
+        enum jl_partition_kind kind = jl_binding_kind(*bpart);
+        if (!jl_bkind_is_some_explicit_import(kind) && kind != PARTITION_KIND_IMPLICIT_GLOBAL) {
+            if (!passed_explicit && depwarn)
+                *depwarn |= (*bpart)->kind & PARTITION_FLAG_DEPWARN;
+            return;
+        }
+        if (!passed_explicit && depwarn)
+            *depwarn |= (*bpart)->kind & PARTITION_FLAG_DEPWARN;
+        if (kind != PARTITION_KIND_IMPLICIT_GLOBAL)
+            passed_explicit = 1;
+        *bnd = (jl_binding_t*)(*bpart)->restriction;
+        *bpart = jl_get_binding_partition(*bnd, world);
+    }
+}
+#endif
+
+STATIC_INLINE int is10digit(char c) JL_NOTSAFEPOINT
+{
+    return (c >= '0' && c <= '9');
+}
+
+STATIC_INLINE int is_anonfn_typename(char *name)
+{
+    if (name[0] != '#' || name[1] == '#')
+        return 0;
+    char *other = strrchr(name, '#');
+    return other > &name[1] && is10digit(other[1]);
+}
+
+// Returns true for typenames of anounymous functions that have been canonicalized (i.e.
+// we mangled the name of the outermost enclosing function in their name).
+STATIC_INLINE int is_canonicalized_anonfn_typename(char *name) JL_NOTSAFEPOINT
+{
+    char *delim = strchr(&name[1], '#');
+    if (delim == NULL)
+        return 0;
+    if (delim[1] != '#')
+        return 0;
+    if (!is10digit(delim[2]))
+        return 0;
+    return 1;
+}
+
 // Each tuple can exist in one of 4 Vararg states:
 //   NONE: no vararg                            Tuple{Int,Float32}
 //   INT: vararg with integer length            Tuple{Int,Vararg{Float32,2}}
@@ -850,9 +1226,8 @@ STATIC_INLINE jl_vararg_kind_t jl_va_tuple_kind(jl_datatype_t *t) JL_NOTSAFEPOIN
 // -- init.c -- //
 
 void jl_init_types(void) JL_GC_DISABLED;
-void jl_init_box_caches(void);
 void jl_init_flisp(void);
-void jl_init_common_symbols(void);
+void jl_init_common_symbols(void) JL_NOTSAFEPOINT;
 void jl_init_primitives(void) JL_GC_DISABLED;
 void jl_init_llvm(void);
 void jl_init_runtime_ccall(void);
@@ -862,10 +1237,8 @@ void jl_init_tasks(void) JL_GC_DISABLED;
 void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo) JL_NOTSAFEPOINT;
 jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi);
 void jl_init_serializer(void);
-void jl_gc_init(void);
 void jl_init_uv(void);
-void jl_init_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT;
-void jl_init_int32_int64_cache(void);
+void jl_init_box_caches(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_init_options(void);
 
 void jl_set_base_ctx(char *__stk);
@@ -876,11 +1249,12 @@ void jl_init_threading(void);
 void jl_start_threads(void);
 
 // Whether the GC is running
+extern uv_mutex_t safepoint_lock;
 extern char *jl_safepoint_pages;
 STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr)
 {
     uintptr_t safepoint_addr = (uintptr_t)jl_safepoint_pages;
-    return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 3;
+    return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4;
 }
 extern _Atomic(uint32_t) jl_gc_running;
 extern _Atomic(uint32_t) jl_gc_disable_counter;
@@ -896,7 +1270,7 @@ void jl_safepoint_init(void);
 // before calling this function. If the calling thread is to run the GC,
 // it should also wait for the mutator threads to hit a safepoint **AFTER**
 // this function returns
-int jl_safepoint_start_gc(void);
+int jl_safepoint_start_gc(jl_task_t *ct);
 // Can only be called by the thread that have got a `1` return value from
 // `jl_safepoint_start_gc()`. This disables the safepoint (for GC,
 // the `mprotect` may not be removed if there's pending SIGINT) and wake
@@ -906,8 +1280,9 @@ void jl_safepoint_end_gc(void);
 // Wait for the GC to finish
 // This function does **NOT** modify the `gc_state` to inform the GC thread
 // The caller should set it **BEFORE** calling this function.
-void jl_safepoint_wait_gc(void);
-
+void jl_safepoint_wait_gc(jl_task_t *ct) JL_NOTSAFEPOINT;
+void jl_safepoint_wait_thread_resume(jl_task_t *ct) JL_NOTSAFEPOINT;
+int8_t jl_safepoint_take_sleep_lock(jl_ptls_t ptls) JL_NOTSAFEPOINT_ENTER;
 // Set pending sigint and enable the mechanisms to deliver the sigint.
 void jl_safepoint_enable_sigint(void);
 // If the safepoint is enabled to deliver sigint, disable it
@@ -921,9 +1296,7 @@ int jl_safepoint_consume_sigint(void);
 void jl_wake_libuv(void) JL_NOTSAFEPOINT;
 
 void jl_set_pgcstack(jl_gcframe_t **) JL_NOTSAFEPOINT;
-#if defined(_OS_DARWIN_)
-typedef pthread_key_t jl_pgcstack_key_t;
-#elif defined(_OS_WINDOWS_)
+#if defined(_OS_WINDOWS_)
 typedef DWORD jl_pgcstack_key_t;
 #else
 typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT;
@@ -934,18 +1307,7 @@ JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t
 extern pthread_mutex_t in_signal_lock;
 #endif
 
-#if !defined(__clang_gcanalyzer__) && !defined(_OS_DARWIN_)
-static inline void jl_set_gc_and_wait(void)
-{
-    jl_task_t *ct = jl_current_task;
-    // reading own gc state doesn't need atomic ops since no one else
-    // should store to it.
-    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
-    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
-    jl_safepoint_wait_gc();
-    jl_atomic_store_release(&ct->ptls->gc_state, state);
-}
-#endif
+void jl_set_gc_and_wait(jl_task_t *ct); // n.b. not used on _OS_DARWIN_
 
 // Query if a Julia object is if a permalloc region (due to part of a sys- pkg-image)
 STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
@@ -955,37 +1317,60 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
 
 size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;
 
-uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
+// Query if this object is perm-allocated in an image.
+JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
 
 // the first argument to jl_idtable_rehash is used to return a value
 // make sure it is rooted if it is used after the function returns
-JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz);
-_Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_genericmemory_t *jl_idtable_rehash(jl_genericmemory_t *a, size_t newsz);
+_Atomic(jl_value_t*) *jl_table_peek_bp(jl_genericmemory_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t*);
 
+jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, uint8_t default_using_core, uint8_t self_name);
+jl_module_t *jl_add_standard_imports(jl_module_t *m);
 JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *module);
-JL_DLLEXPORT jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache);
-jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_svec_t *sp);
-JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
+JL_DLLEXPORT jl_methcache_t *jl_new_method_cache(void);
+JL_DLLEXPORT jl_value_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, int mt_cache);
+jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_svec_t *sp) JL_PROPAGATES_ROOT;
+JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_value_t *owner, jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
+JL_DLLEXPORT jl_value_t *jl_rettype_inferred_native(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_value_t *type, size_t world);
+JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_value_t *type, size_t world) JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(
     jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams);
-jl_method_instance_t *jl_specializations_get_or_insert(jl_method_instance_t *mi_ins);
-JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_method_instance_t *caller);
-JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller);
+jl_method_instance_t *jl_specializations_get_or_insert(jl_method_instance_t *mi_ins JL_PROPAGATES_ROOT);
+JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_code_instance_t *caller);
+JL_DLLEXPORT void jl_method_table_add_backedge(jl_value_t *typ, jl_code_instance_t *caller);
 JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
                                      jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT int jl_mi_try_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                   jl_code_instance_t *expected_ci,
+                                   jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_code_instance_t *jl_cached_uninferred(jl_code_instance_t *codeinst, size_t world);
+JL_DLLEXPORT jl_code_instance_t *jl_cache_uninferred(jl_method_instance_t *mi, jl_code_instance_t *checked, size_t world, jl_code_instance_t *newci JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_for_uninferred(jl_method_instance_t *mi, jl_code_info_t *src);
 JL_DLLEXPORT extern jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
 
+JL_DLLEXPORT void jl_force_trace_compile_timing_enable(void);
+JL_DLLEXPORT void jl_force_trace_compile_timing_disable(void);
+
+JL_DLLEXPORT void jl_force_trace_dispatch_enable(void);
+JL_DLLEXPORT void jl_force_trace_dispatch_disable(void);
+
+JL_DLLEXPORT void jl_tag_newly_inferred_enable(void);
+JL_DLLEXPORT void jl_tag_newly_inferred_disable(void);
+
 uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT;
 jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs);
 
 JL_DLLEXPORT int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                                  size_t lineno, size_t offset, jl_value_t *options);
+                                  size_t lineno, size_t offset, jl_value_t *options, jl_module_t *inmodule);
+jl_code_info_t *jl_inner_ctor_body(jl_array_t *fieldkinds, jl_module_t *inmodule, const char *file, int line);
+jl_code_info_t *jl_outer_ctor_body(jl_value_t *thistype, size_t nfields, size_t nsparams, jl_module_t *inmodule, const char *file, int line);
+void jl_ctor_def(jl_value_t *ty, jl_value_t *functionloc);
 
 //--------------------------------------------------
 // Backtraces
@@ -1097,7 +1482,7 @@ typedef struct {
     char *func_name;
     char *file_name;
     int line;
-    jl_method_instance_t *linfo;
+    jl_code_instance_t *ci;
     int fromC;
     int inlined;
 } jl_frame_t;
@@ -1145,14 +1530,16 @@ size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t
 size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT;
 #endif
 JL_DLLEXPORT jl_value_t *jl_get_backtrace(void);
-void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *ct);
+JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip);
+void jl_fprint_critical_error(ios_t *t, int sig, int si_code, bt_context_t *context, jl_task_t *ct);
 JL_DLLEXPORT void jl_raise_debugger(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT;
-void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT;
-void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_data) JL_NOTSAFEPOINT;
-#ifdef _OS_WINDOWS_
-JL_DLLEXPORT void jl_refresh_dbg_module_list(void);
-#endif
+JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT;
+void jl_fprint_native_codeloc(ios_t *s, uintptr_t ip) JL_NOTSAFEPOINT;
+void jl_fprint_bt_entry_codeloc(ios_t *s, jl_bt_element_t *bt_data) JL_NOTSAFEPOINT;
+void jl_thread_resume(int tid) JL_NOTSAFEPOINT;
+int jl_thread_suspend(int16_t tid, bt_context_t *ctx) JL_NOTSAFEPOINT;
+
 // *to is NULL or malloc'd pointer, from is allowed to be NULL
 STATIC_INLINE char *jl_copy_str(char **to, const char *from) JL_NOTSAFEPOINT
 {
@@ -1209,23 +1596,47 @@ STATIC_INLINE size_t jl_excstack_next(jl_excstack_t *stack, size_t itr) JL_NOTSA
     return itr-2 - jl_excstack_bt_size(stack, itr);
 }
 // Exception stack manipulation
-void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
+void jl_push_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
                       jl_value_t *exception JL_ROOTED_ARGUMENT,
                       jl_bt_element_t *bt_data, size_t bt_size);
 
+// System util to get maximum RSS
+JL_DLLEXPORT size_t jl_maxrss(void);
+
 //--------------------------------------------------
 // congruential random number generator
 // for a small amount of thread-local randomness
-STATIC_INLINE void unbias_cong(uint64_t max, uint64_t *unbias) JL_NOTSAFEPOINT
-{
-    *unbias = UINT64_MAX - ((UINT64_MAX % max) + 1);
-}
-STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed) JL_NOTSAFEPOINT
+
+//TODO: utilize https://github.com/openssl/openssl/blob/master/crypto/rand/rand_uniform.c#L13-L99
+// for better performance, it does however require making users expect a 32bit random number.
+
+STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT // Open interval [0, max)
 {
-    while ((*seed = 69069 * (*seed) + 362437) > unbias)
-        ;
-    return *seed % max;
+    if (max < 2)
+        return 0;
+    uint64_t mask = ~(uint64_t)0;
+    int zeros = __builtin_clzll(max);
+    int bits = CHAR_BIT * sizeof(uint64_t) - zeros;
+    mask = mask >> zeros;
+    do {
+        uint64_t value = 69069 * (*seed) + 362437;
+        *seed = value;
+        uint64_t x = value & mask;
+        if (x < max) {
+            return x;
+        }
+        int bits_left = zeros;
+        while (bits_left >= bits) {
+            value >>= bits;
+            x = value & mask;
+            if (x < max) {
+                return x;
+            }
+            bits_left -= bits;
+        }
+    } while (1);
 }
+
 JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_init_rand(void);
@@ -1245,14 +1656,16 @@ void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT;
 #endif
 
 JL_DLLEXPORT void *jl_get_library_(const char *f_lib, int throw_err);
-void *jl_find_dynamic_library_by_addr(void *symbol);
+void *jl_find_dynamic_library_by_addr(void *symbol, int throw_err, int close) JL_NOTSAFEPOINT;
 #define jl_get_library(f_lib) jl_get_library_(f_lib, 1)
 JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd);
-JL_DLLEXPORT void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name);
+JL_DLLEXPORT void *jl_lazy_load_and_lookup(jl_value_t *lib_val, jl_value_t *f_name);
 JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline(
     jl_value_t *fobj, jl_datatype_t *result, htable_t *cache, jl_svec_t *fill,
     void *(*init_trampoline)(void *tramp, void **nval),
     jl_unionall_t *env, jl_value_t **vals);
+JL_DLLEXPORT void *jl_get_abi_converter(jl_task_t *ct, void *data);
+JL_DLLIMPORT void *jl_jit_abi_converter(jl_task_t *ct, jl_abi_t from_abi, jl_code_instance_t *codeinst);
 
 
 // Special filenames used to refer to internal julia libraries
@@ -1270,7 +1683,7 @@ JL_DLLEXPORT unsigned jl_intrinsic_nargs(int f) JL_NOTSAFEPOINT;
 
 STATIC_INLINE int is_valid_intrinsic_elptr(jl_value_t *ety)
 {
-    return ety == (jl_value_t*)jl_any_type || (jl_is_concrete_type(ety) && !jl_is_layout_opaque(((jl_datatype_t*)ety)->layout));
+    return ety == (jl_value_t*)jl_any_type || (jl_is_concrete_type(ety) && !jl_is_layout_opaque(((jl_datatype_t*)ety)->layout) && !jl_is_array_type(ety));
 }
 JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t *align);
@@ -1301,6 +1714,8 @@ JL_DLLEXPORT jl_value_t *jl_add_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_sub_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_mul_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_div_float(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT jl_value_t *jl_min_float(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT jl_value_t *jl_max_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_fma_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
 JL_DLLEXPORT jl_value_t *jl_muladd_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
 
@@ -1361,31 +1776,44 @@ JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
 
-JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *a);
 JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type);
 JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a);
-JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i);
 JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary);
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy(jl_genericmemory_t *mem);
 
-JL_DLLEXPORT uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uintptr_t jl_object_id_(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT;
 
+JL_DLLEXPORT uint16_t julia_double_to_half(double param) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint16_t julia_float_to_half(float param) JL_NOTSAFEPOINT;
+JL_DLLEXPORT float julia_half_to_float(uint16_t param) JL_NOTSAFEPOINT;
+
 // -- synchronization utilities -- //
 
 extern jl_mutex_t typecache_lock;
-extern JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
+extern jl_mutex_t world_counter_lock;
 
 #if defined(__APPLE__)
-void jl_mach_gc_end(void);
+void jl_mach_gc_end(void) JL_NOTSAFEPOINT;
+void jl_safepoint_resume_thread_mach(jl_ptls_t ptls2, int16_t tid2) JL_NOTSAFEPOINT;
 #endif
 
 // -- smallintset.c -- //
 
-typedef uint_t (*smallintset_hash)(size_t val, jl_svec_t *data);
-typedef int (*smallintset_eq)(size_t val, const void *key, jl_svec_t *data, uint_t hv);
-ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *key, jl_svec_t *data, uint_t hv);
-void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data);
+typedef uint_t (*smallintset_hash)(size_t val, jl_value_t *data);
+typedef int (*smallintset_eq)(size_t val, const void *key, jl_value_t *data, uint_t hv);
+ssize_t jl_smallintset_lookup(jl_genericmemory_t *cache, smallintset_eq eq, const void *key, jl_value_t *data, uint_t hv, int pop);
+void jl_smallintset_insert(_Atomic(jl_genericmemory_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_value_t *data);
+jl_genericmemory_t* smallintset_rehash(jl_genericmemory_t* a, smallintset_hash hash, jl_value_t *data, size_t newsz, size_t np);
+void smallintset_empty(const jl_genericmemory_t *a) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_genericmemory_t *jl_idset_rehash(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, size_t newsz);
+JL_DLLEXPORT ssize_t jl_idset_peek_bp(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_idset_get(jl_genericmemory_t *keys JL_PROPAGATES_ROOT, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_genericmemory_t *jl_idset_put_key(jl_genericmemory_t *keys, jl_value_t *key, ssize_t *newidx);
+JL_DLLEXPORT jl_genericmemory_t *jl_idset_put_idx(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, ssize_t idx);
+JL_DLLEXPORT ssize_t jl_idset_pop(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT;
 
 // -- typemap.c -- //
 
@@ -1401,8 +1829,6 @@ struct jl_typemap_assoc {
     size_t const world;
     // outputs
     jl_svec_t *env; // subtype env (initialize to null to perform intersection without an environment)
-    size_t min_valid;
-    size_t max_valid;
 };
 
 jl_typemap_entry_t *jl_typemap_assoc_by_type(
@@ -1450,23 +1876,20 @@ void typemap_slurp_search(jl_typemap_entry_t *ml, struct typemap_intersection_en
 
 // -- simplevector.c -- //
 
-// For codegen only.
-JL_DLLEXPORT size_t (jl_svec_len)(jl_svec_t *t) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_svec_ref(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i);
-
 // check whether the specified number of arguments is compatible with the
 // specified number of parameters of the tuple type
 JL_DLLEXPORT int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0);
 JL_DLLEXPORT jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_value_t *types0);
+JL_DLLEXPORT jl_value_t *jl_argtype_without_function(jl_value_t *ftypes);
 
 JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
 
-void register_eh_frames(uint8_t *Addr, size_t Size);
-void deregister_eh_frames(uint8_t *Addr, size_t Size);
+void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT;
+void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT;
 
-STATIC_INLINE void *jl_get_frame_addr(void)
+STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT
 {
 #ifdef __GNUC__
     return __builtin_frame_address(0);
@@ -1478,8 +1901,6 @@ STATIC_INLINE void *jl_get_frame_addr(void)
 #endif
 }
 
-JL_DLLEXPORT jl_array_t *jl_array_cconvert_cstring(jl_array_t *a);
-
 // Log `msg` to the current logger by calling CoreLogging.logmsg_shim() on the
 // julia side. If any of module, group, id, file or line are NULL, these will
 // be passed to the julia side as `nothing`.  If `kwargs` is NULL an empty set
@@ -1490,106 +1911,115 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
 
 JL_DLLEXPORT int jl_isabspath(const char *in) JL_NOTSAFEPOINT;
 
-extern JL_DLLEXPORT jl_sym_t *jl_call_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_invoke_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_invoke_modify_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_empty_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_top_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_module_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_slot_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_export_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_import_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_quote_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_line_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_incomplete_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_goto_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_goto_ifnot_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_return_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_lineinfo_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_lambda_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_assign_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_binding_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_globalref_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_do_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_method_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_core_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_enter_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_leave_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_pop_exception_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_exc_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_error_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_new_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_using_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_splatnew_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_block_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_new_opaque_closure_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_opaque_closure_method_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_const_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_as_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_global_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_list_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_dot_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_meta_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_inert_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_polly_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_unused_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_static_parameter_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_inline_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_noinline_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_generated_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_generated_only_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_isdefined_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_propagate_inbounds_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_specialize_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_purity_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_nospecializeinfer_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_macrocall_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_colon_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_throw_undef_if_not_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_getfield_undefref_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_gc_preserve_begin_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_gc_preserve_end_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_coverageeffect_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_escape_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_atom_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_statement_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_all_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_compile_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_force_compile_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_infer_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_max_methods_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_atomic_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_not_atomic_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_unordered_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_monotonic_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_release_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
+// Commonly used symbols (jl_sym_t* values)
+#define JL_COMMON_SYMBOLS(XX) \
+    XX(acquire_release_sym) \
+    XX(acquire_sym) \
+    XX(aggressive_constprop_sym) \
+    XX(aliasscope_sym) \
+    XX(all_sym) \
+    XX(as_sym) \
+    XX(assign_sym) \
+    XX(atom_sym) \
+    XX(atomic_sym) \
+    XX(block_sym) \
+    XX(boundscheck_sym) \
+    XX(call_sym) \
+    XX(cfunction_sym) \
+    XX(colon_sym) \
+    XX(compile_sym) \
+    XX(const_sym) \
+    XX(copyast_sym) \
+    XX(core_sym) \
+    XX(coverageeffect_sym) \
+    XX(do_sym) \
+    XX(dot_sym) \
+    XX(empty_sym) \
+    XX(enter_sym) \
+    XX(error_sym) \
+    XX(escape_sym) \
+    XX(eval_sym) \
+    XX(exc_sym) \
+    XX(export_sym) \
+    XX(force_compile_sym) \
+    XX(foreigncall_sym) \
+    XX(gc_preserve_begin_sym) \
+    XX(gc_preserve_end_sym) \
+    XX(generated_only_sym) \
+    XX(generated_sym) \
+    XX(getfield_undefref_sym) \
+    XX(global_sym) \
+    XX(globalref_sym) \
+    XX(goto_ifnot_sym) \
+    XX(goto_sym) \
+    XX(hygienicscope_sym) \
+    XX(inbounds_sym) \
+    XX(include_sym) \
+    XX(incomplete_sym) \
+    XX(inert_sym) \
+    XX(infer_sym) \
+    XX(inline_sym) \
+    XX(invoke_modify_sym) \
+    XX(invoke_sym) \
+    XX(isdefined_sym) \
+    XX(lambda_sym) \
+    XX(latestworld_sym) \
+    XX(leave_sym) \
+    XX(line_sym) \
+    XX(lineinfo_sym) \
+    XX(list_sym) \
+    XX(local_sym) \
+    XX(loopinfo_sym) \
+    XX(macrocall_sym) \
+    XX(max_methods_sym) \
+    XX(meta_sym) \
+    XX(method_sym) \
+    XX(module_sym) \
+    XX(monotonic_sym) \
+    XX(new_opaque_closure_sym) \
+    XX(new_sym) \
+    XX(newvar_sym) \
+    XX(no_constprop_sym) \
+    XX(noinline_sym) \
+    XX(nospecialize_sym) \
+    XX(nospecializeinfer_sym) \
+    XX(not_atomic_sym) \
+    XX(opaque_closure_method_sym) \
+    XX(optlevel_sym) \
+    XX(polly_sym) \
+    XX(pop_exception_sym) \
+    XX(popaliasscope_sym) \
+    XX(propagate_inbounds_sym) \
+    XX(public_sym) \
+    XX(purity_sym) \
+    XX(quote_sym) \
+    XX(release_sym) \
+    XX(return_sym) \
+    XX(sequentially_consistent_sym) \
+    XX(slot_sym) \
+    XX(specialize_sym) \
+    XX(splatnew_sym) \
+    XX(statement_sym) \
+    XX(static_parameter_sym) \
+    XX(thismodule_sym) \
+    XX(throw_undef_if_not_sym) \
+    XX(thunk_sym) \
+    XX(top_sym) \
+    XX(toplevel_sym) \
+    XX(uninferred_sym) \
+    XX(unordered_sym) \
+    XX(unused_sym) \
+
+#define XX(name) extern JL_DLLEXPORT jl_sym_t *jl_##name;
+JL_COMMON_SYMBOLS(XX)
+#undef XX
 
 JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
 JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
 
 struct _jl_image_fptrs_t;
 
-void jl_write_coverage_data(const char*);
+JL_DLLEXPORT void jl_write_coverage_data(const char*);
 void jl_write_malloc_log(void);
 
 #if jl_has_builtin(__builtin_unreachable) || defined(_COMPILER_GCC_) || defined(_COMPILER_INTEL_)
@@ -1598,6 +2028,7 @@ void jl_write_malloc_log(void);
 #  define jl_unreachable() ((void)jl_assume(0))
 #endif
 
+extern uv_mutex_t symtab_lock;
 jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
 
 // Tools for locally disabling spurious compiler warnings
@@ -1608,8 +2039,8 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
 //   are used inside a JL_TRY as being "clobbered" if JL_CATCH is entered. This
 //   warning is spurious if the variable is not modified inside the JL_TRY.
 //   See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65041
-#ifdef _COMPILER_GCC_
 #define JL_DO_PRAGMA(s) _Pragma(#s)
+#ifdef _COMPILER_GCC_
 #define JL_GCC_IGNORE_START(warning) \
     JL_DO_PRAGMA(GCC diagnostic push) \
     JL_DO_PRAGMA(GCC diagnostic ignored warning)
@@ -1620,6 +2051,24 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
 #define JL_GCC_IGNORE_STOP
 #endif // _COMPILER_GCC_
 
+#ifdef _COMPILER_CLANG_
+#define JL_CLANG_IGNORE_START(warning) \
+    JL_DO_PRAGMA(clang diagnostic push) \
+    JL_DO_PRAGMA(clang diagnostic ignored warning)
+#define JL_CLANG_IGNORE_STOP \
+    JL_DO_PRAGMA(clang diagnostic pop)
+#else
+#define JL_CLANG_IGNORE_START(w)
+#define JL_CLANG_IGNORE_STOP
+#endif // _COMPILER_CLANG_
+
+#define JL_CC_IGNORE_START(warning) \
+    JL_GCC_IGNORE_START(warning) \
+    JL_CLANG_IGNORE_START(warning)
+#define JL_CC_IGNORE_STOP \
+    JL_GCC_IGNORE_STOP \
+    JL_CLANG_IGNORE_STOP
+
 #ifdef __clang_gcanalyzer__
   // Not a safepoint (so it doesn't free other values), but an artificial use.
   // Usually this is unnecessary because the analyzer can see all real uses,
@@ -1630,27 +2079,15 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
   #define JL_GC_ASSERT_LIVE(x) (void)(x)
 #endif
 
-JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) JL_NOTSAFEPOINT;
-
 JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len);
 
 // -- exports from codegen -- //
 
-JL_DLLIMPORT jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
+#define IR_FLAG_INBOUNDS 0x01
+
 JL_DLLIMPORT void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec);
-JL_DLLIMPORT void jl_generate_fptr_for_oc_wrapper(jl_code_instance_t *unspec);
-JL_DLLIMPORT int jl_compile_extern_c(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
+JL_DLLIMPORT int jl_compile_codeinst(jl_code_instance_t *unspec);
+JL_DLLIMPORT void jl_emit_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src);
 
 typedef struct {
     LLVMOrcThreadSafeModuleRef TSM;
@@ -1659,22 +2096,27 @@ typedef struct {
 
 JL_DLLIMPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
         char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
-JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
+JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params);
 JL_DLLIMPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary);
 JL_DLLIMPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo);
 JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw);
 
-JL_DLLIMPORT void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode, int cache, size_t world);
+typedef jl_value_t *(*jl_codeinstance_lookup_t)(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
+JL_DLLIMPORT void *jl_create_native(LLVMOrcThreadSafeModuleRef llvmmod, int trim, int cache, size_t world, jl_array_t *mod_array JL_MAYBE_UNROOTED, jl_array_t *worklist JL_MAYBE_UNROOTED, int all, jl_array_t *module_init_order JL_MAYBE_UNROOTED);
+JL_DLLIMPORT void *jl_emit_native(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _external_linkage);
 JL_DLLIMPORT void jl_dump_native(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
-        ios_t *z, ios_t *s);
-JL_DLLIMPORT void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs);
-JL_DLLIMPORT void jl_get_llvm_external_fns(void *native_code, arraylist_t *gvs);
+        ios_t *z, ios_t *s, jl_emission_params_t *params);
+JL_DLLIMPORT void jl_get_llvm_gvs(void *native_code, size_t *num_els, void **gvs);
+JL_DLLIMPORT void jl_get_llvm_gv_inits(void *native_code, size_t *num_els, void **inits);
+JL_DLLIMPORT void jl_get_llvm_external_fns(void *native_code, size_t *num_els,
+                                           jl_code_instance_t *fns);
 JL_DLLIMPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
         int32_t *func_idx, int32_t *specfunc_idx);
 JL_DLLIMPORT void jl_register_fptrs(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs,
                                     jl_method_instance_t **linfos, size_t n);
-
+JL_DLLIMPORT void jl_get_llvm_cis(void *native_code, size_t *num_els,
+                                  jl_code_instance_t **CIs);
 JL_DLLIMPORT void jl_init_codegen(void);
 JL_DLLIMPORT void jl_teardown_codegen(void) JL_NOTSAFEPOINT;
 JL_DLLIMPORT int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
diff --git a/src/julia_locks.h b/src/julia_locks.h
index 47e258f69aab2..a4b5fd96b8fb4 100644
--- a/src/julia_locks.h
+++ b/src/julia_locks.h
@@ -3,6 +3,10 @@
 #ifndef JL_LOCKS_H
 #define JL_LOCKS_H
 
+#ifdef _COMPILER_TSAN_ENABLED_
+#include <sanitizer/tsan_interface.h>
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -34,7 +38,13 @@ static inline void jl_mutex_lock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT JL_NOTSA
     // Hide this body from the analyzer, otherwise it complains that we're calling
     // a non-safepoint from this function. The 0 arguments guarantees that we do
     // not reach the safepoint, but the analyzer can't figure that out
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_pre_lock(lock, __tsan_mutex_write_reentrant);
+#endif
     jl_mutex_wait(lock, 0);
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_post_lock(lock, __tsan_mutex_write_reentrant, 1);
+#endif
 #endif
 }
 
@@ -96,8 +106,41 @@ static inline void jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEP
 #define JL_LOCK_NOGC(m) jl_mutex_lock_nogc(m)
 #define JL_UNLOCK_NOGC(m) jl_mutex_unlock_nogc(m)
 
+JL_DLLEXPORT void jl_lock_value(jl_mutex_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_unlock_value(jl_mutex_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_lock_field(jl_mutex_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_unlock_field(jl_mutex_t *v) JL_NOTSAFEPOINT;
+
 #ifdef __cplusplus
 }
+
+#include <mutex>
+#include <condition_variable>
+// simple C++ shim around a std::unique_lock + gc-safe + disabled finalizers region
+// since we nearly always want that combination together
+class jl_unique_gcsafe_lock {
+public:
+    int8_t gc_state;
+    std::unique_lock<std::mutex> native;
+    explicit jl_unique_gcsafe_lock(std::mutex &native) JL_NOTSAFEPOINT_ENTER
+    {
+        jl_task_t *ct = jl_current_task;
+        gc_state = jl_gc_safe_enter(ct->ptls); // contains jl_gc_safepoint after enter
+        this->native = std::unique_lock<std::mutex>(native);
+        ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph
+    }
+    jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &&native) = delete;
+    jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &native) = delete;
+    ~jl_unique_gcsafe_lock() JL_NOTSAFEPOINT_LEAVE {
+        jl_task_t *ct = jl_current_task;
+        native.unlock();
+        jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint after leave
+        ct->ptls->engine_nqueued--; // enable finalizers (but don't run them until the next gc)
+    }
+    void wait(std::condition_variable& cond) JL_NOTSAFEPOINT {
+        cond.wait(native);
+    }
+};
 #endif
 
 #endif
diff --git a/src/julia_threads.h b/src/julia_threads.h
index f4c235243e684..364931e43d2e9 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -4,7 +4,17 @@
 #ifndef JL_THREADS_H
 #define JL_THREADS_H
 
-#include "work-stealing-queue.h"
+#ifndef WITH_THIRD_PARTY_HEAP
+#include "gc-tls-stock.h"
+#else
+// Pick the appropriate third-party implementation
+#ifdef WITH_THIRD_PARTY_HEAP
+#if WITH_THIRD_PARTY_HEAP == 1 // MMTk
+#include "gc-tls-mmtk.h"
+#endif
+#endif
+#endif
+#include "gc-tls-common.h"
 #include "julia_atomics.h"
 #ifndef _OS_WINDOWS_
 #include "pthread.h"
@@ -18,6 +28,8 @@ extern "C" {
 
 JL_DLLEXPORT int16_t jl_threadid(void);
 JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT;
 
 // JULIA_ENABLE_THREADING may be controlled by altering JULIA_THREADS in Make.user
 
@@ -27,77 +39,64 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT;
 #define JULIA_DEBUG_SLEEPWAKE(x)
 
 //  Options for task switching algorithm (in order of preference):
-// JL_HAVE_ASM -- mostly setjmp
-// JL_HAVE_ASM && JL_HAVE_UNW_CONTEXT -- libunwind-based
-// JL_HAVE_UNW_CONTEXT -- libunwind-based
-// JL_HAVE_ASYNCIFY -- task switching based on the binary asyncify transform
-// JL_HAVE_UCONTEXT -- posix standard API, requires syscall for resume
-// JL_HAVE_SIGALTSTACK -- requires several syscall for start, setjmp for resume
+// JL_TASK_SWITCH_ASM -- mostly setjmp
+// JL_TASK_SWITCH_ASM && JL_TASK_SWITCH_LIBUNWIND -- libunwind-based
+// JL_TASK_SWITCH_LIBUNWIND -- libunwind-based
+// JL_TASK_SWITCH_WINDOWS -- implementation for Windows
 
 #ifdef _OS_WINDOWS_
-#define JL_HAVE_UCONTEXT
+#define JL_TASK_SWITCH_WINDOWS
 typedef win32_ucontext_t jl_stack_context_t;
 typedef jl_stack_context_t _jl_ucontext_t;
+
 #else
+
+#if defined(_OS_OPENBSD_)
+#define JL_TASK_SWITCH_LIBUNWIND
+#endif
+
 typedef struct {
     jl_jmp_buf uc_mcontext;
 } jl_stack_context_t;
-#if !defined(JL_HAVE_UCONTEXT) && \
-    !defined(JL_HAVE_ASM) && \
-    !defined(JL_HAVE_UNW_CONTEXT) && \
-    !defined(JL_HAVE_SIGALTSTACK) && \
-    !defined(JL_HAVE_ASYNCIFY)
+
+#if !defined(JL_TASK_SWITCH_ASM) && \
+    !defined(JL_TASK_SWITCH_LIBUNWIND)
 #if (defined(_CPU_X86_64_) || defined(_CPU_X86_) || defined(_CPU_AARCH64_) ||  \
-     defined(_CPU_ARM_) || defined(_CPU_PPC64_))
-#define JL_HAVE_ASM
+     defined(_CPU_ARM_) || defined(_CPU_PPC64_) || defined(_CPU_RISCV64_))
+#define JL_TASK_SWITCH_ASM
 #endif
 #if 0
 // very slow, but more debugging
 //#elif defined(_OS_DARWIN_)
-//#define JL_HAVE_UNW_CONTEXT
+//#define JL_TASK_SWITCH_LIBUNWIND
 //#elif defined(_OS_LINUX_)
-//#define JL_HAVE_UNW_CONTEXT
-#elif defined(_OS_EMSCRIPTEN_)
-#define JL_HAVE_ASYNCIFY
-#elif !defined(JL_HAVE_ASM)
-#define JL_HAVE_UNW_CONTEXT // optimistically?
+//#define JL_TASK_SWITCH_LIBUNWIND
+#elif !defined(JL_TASK_SWITCH_ASM)
+#define JL_TASK_SWITCH_LIBUNWIND // optimistically?
 #endif
 #endif
 
-#if (!defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)) || defined(JL_HAVE_SIGALTSTACK)
-typedef jl_stack_context_t _jl_ucontext_t;
-#endif
-#if defined(JL_HAVE_ASYNCIFY)
-#if defined(_COMPILER_TSAN_ENABLED_)
-#error TSAN not currently supported with asyncify
-#endif
-typedef struct {
-    // This is the extent of the asyncify stack, but because the top of the
-    // asyncify stack (stacktop) is also the bottom of the C stack, we can
-    // reuse stacktop for both. N.B.: This matches the layout of the
-    // __asyncify_data struct.
-    void *stackbottom;
-    void *stacktop;
-} _jl_ucontext_t;
-#endif
+#if defined(JL_TASK_SWITCH_LIBUNWIND)
 #pragma GCC visibility push(default)
-#if defined(JL_HAVE_UNW_CONTEXT)
 #define UNW_LOCAL_ONLY
 #include <libunwind.h>
 typedef unw_context_t _jl_ucontext_t;
-#endif
-#if defined(JL_HAVE_UCONTEXT)
-#include <ucontext.h>
-typedef ucontext_t _jl_ucontext_t;
-#endif
 #pragma GCC visibility pop
+#elif defined(JL_TASK_SWITCH_ASM)
+typedef jl_stack_context_t _jl_ucontext_t;
+#endif
+
 #endif
 
 typedef struct {
     union {
-        _jl_ucontext_t ctx;
-        jl_stack_context_t copy_ctx;
+        _jl_ucontext_t *ctx;
+        jl_stack_context_t *copy_ctx;
     };
+    void *stkbuf; // malloc'd memory (either copybuf or stack)
+    size_t bufsz; // actual sizeof stkbuf
+    unsigned int copy_stack:31; // sizeof stack for copybuf
+    unsigned int started:1;
 #if defined(_COMPILER_TSAN_ENABLED_)
     void *tsan_state;
 #endif
@@ -109,7 +108,7 @@ typedef struct {
 
 // handle to reference an OS thread
 #ifdef _OS_WINDOWS_
-typedef DWORD jl_thread_t;
+typedef HANDLE jl_thread_t;
 #else
 typedef pthread_t jl_thread_t;
 #endif
@@ -122,83 +121,7 @@ typedef struct {
     uint32_t count;
 } jl_mutex_t;
 
-typedef struct {
-    jl_taggedvalue_t *freelist;   // root of list of free objects
-    jl_taggedvalue_t *newpages;   // root of list of chunks of free objects
-    uint16_t osize;      // size of objects in this pool
-} jl_gc_pool_t;
-
-typedef struct {
-    _Atomic(int64_t) allocd;
-    _Atomic(int64_t) freed;
-    _Atomic(uint64_t) malloc;
-    _Atomic(uint64_t) realloc;
-    _Atomic(uint64_t) poolalloc;
-    _Atomic(uint64_t) bigalloc;
-    _Atomic(uint64_t) freecall;
-} jl_thread_gc_num_t;
-
-typedef struct {
-    // variable for tracking weak references
-    arraylist_t weak_refs;
-    // live tasks started on this thread
-    // that are holding onto a stack from the pool
-    arraylist_t live_tasks;
-
-    // variables for tracking malloc'd arrays
-    struct _mallocarray_t *mallocarrays;
-    struct _mallocarray_t *mafreelist;
-
-    // variables for tracking big objects
-    struct _bigval_t *big_objects;
-
-    // variables for tracking "remembered set"
-    arraylist_t _remset[2]; // contains jl_value_t*
-    // lower bound of the number of pointers inside remembered values
-    int remset_nptr;
-    arraylist_t *remset;
-    arraylist_t *last_remset;
-
-    // variables for allocating objects from pools
-#ifdef _P64
-#  define JL_GC_N_POOLS 49
-#elif MAX_ALIGN == 8
-#  define JL_GC_N_POOLS 50
-#else
-#  define JL_GC_N_POOLS 51
-#endif
-    jl_gc_pool_t norm_pools[JL_GC_N_POOLS];
-
-#define JL_N_STACK_POOLS 16
-    arraylist_t free_stacks[JL_N_STACK_POOLS];
-} jl_thread_heap_t;
-
-typedef struct {
-    ws_queue_t chunk_queue;
-    ws_queue_t ptr_queue;
-    arraylist_t reclaim_set;
-} jl_gc_markqueue_t;
-
-typedef struct {
-    // thread local increment of `perm_scanned_bytes`
-    size_t perm_scanned_bytes;
-    // thread local increment of `scanned_bytes`
-    size_t scanned_bytes;
-    // Number of queued big objects (<= 1024)
-    size_t nbig_obj;
-    // Array of queued big objects to be moved between the young list
-    // and the old list.
-    // A set low bit means that the object should be moved from the old list
-    // to the young list (`mark_reset_age`).
-    // Objects can only be put into this list when the mark bit is flipped to
-    // `1` (atomically). Combining with the sync after marking,
-    // this makes sure that a single objects can only appear once in
-    // the lists (the mark bit cannot be flipped to `0` without sweeping)
-    void *big_obj[1024];
-} jl_gc_mark_cache_t;
-
 struct _jl_bt_element_t;
-struct _jl_gc_pagemeta_t;
 
 // This includes all the thread local states we care about for a thread.
 // Changes to TLS field types must be reflected in codegen.
@@ -207,40 +130,44 @@ typedef struct _jl_tls_states_t {
     int16_t tid;
     int8_t threadpoolid;
     uint64_t rngseed;
-    volatile size_t *safepoint;
+    _Atomic(volatile size_t *) safepoint; // may be changed to the suspend page by any thread
     _Atomic(int8_t) sleep_check_state; // read/write from foreign threads
     // Whether it is safe to execute GC at the same time.
+#define JL_GC_STATE_UNSAFE 0
+    // gc_state = 0 means the thread is running Julia code and is not
+    //              safe to run concurrently to the GC
 #define JL_GC_STATE_WAITING 1
     // gc_state = 1 means the thread is doing GC or is waiting for the GC to
     //              finish.
 #define JL_GC_STATE_SAFE 2
     // gc_state = 2 means the thread is running unmanaged code that can be
     //              execute at the same time with the GC.
+#define JL_GC_PARALLEL_COLLECTOR_THREAD 3
+    // gc_state = 3 means the thread is a parallel collector thread (i.e. never runs Julia code)
+#define JL_GC_CONCURRENT_COLLECTOR_THREAD 4
+    // gc_state = 4 means the thread is a concurrent collector thread (background sweeper thread that never runs Julia code)
     _Atomic(int8_t) gc_state; // read from foreign threads
     // execution of certain certain impure
     // statements is prohibited from certain
     // callbacks (such as generated functions)
     // as it may make compilation undecidable
-    int8_t in_pure_callback;
-    int8_t in_finalizer;
-    int8_t disable_gc;
+    int16_t in_pure_callback;
+    int16_t in_finalizer;
+    int16_t disable_gc;
     // Counter to disable finalizer **on the current thread**
     int finalizers_inhibited;
-    jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen
-    jl_thread_gc_num_t gc_num;
+    jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen
+    jl_gc_tls_states_common_t gc_tls_common; // common tls for both GCs
+    small_arraylist_t lazily_freed_mtarraylist_buffers;
     volatile sig_atomic_t defer_signal;
     _Atomic(struct _jl_task_t*) current_task;
     struct _jl_task_t *next_task;
     struct _jl_task_t *previous_task;
     struct _jl_task_t *root_task;
     struct _jl_timing_block_t *timing_stack;
+    // This is the location of our copy_stack
     void *stackbase;
     size_t stacksize;
-    union {
-        _jl_ucontext_t base_ctx; // base context of stack
-        // This hack is needed to support always_copy_stacks:
-        jl_stack_context_t copy_stack_ctx;
-    };
     // Temp storage for exception thrown in signal handler. Not rooted.
     struct _jl_value_t *sig_exception;
     // Temporary backtrace buffer. Scanned for gc roots when bt_size > 0.
@@ -258,20 +185,21 @@ typedef struct _jl_tls_states_t {
     int needs_resetstkoflw;
 #else
     void *signal_stack;
+    size_t signal_stack_size;
 #endif
     jl_thread_t system_id;
+    _Atomic(int16_t) suspend_count;
     arraylist_t finalizers;
-    struct _jl_gc_pagemeta_t *page_metadata_allocd;
-    struct _jl_gc_pagemeta_t *page_metadata_lazily_freed;
-    jl_gc_markqueue_t mark_queue;
-    jl_gc_mark_cache_t gc_cache;
-    arraylist_t sweep_objs;
     // Saved exception for previous *external* API call or NULL if cleared.
     // Access via jl_exception_occurred().
     struct _jl_value_t *previous_exception;
+#ifdef _OS_DARWIN_
+    jl_jmp_buf *volatile safe_restore;
+#endif
 
     // currently-held locks, to be released when an exception is thrown
     small_arraylist_t locks;
+    size_t engine_nqueued;
 
     JULIA_DEBUG_SLEEPWAKE(
         uint64_t uv_run_enter;
@@ -287,10 +215,74 @@ typedef struct _jl_tls_states_t {
 #endif
 } jl_tls_states_t;
 
-#ifndef JL_LIBRARY_EXPORTS
-// deprecated (only for external consumers)
-JL_DLLEXPORT void *jl_get_ptls_states(void);
+#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1
+
+typedef struct _jl_timing_block_t jl_timing_block_t;
+typedef struct _jl_timing_event_t jl_timing_event_t;
+typedef struct _jl_excstack_t jl_excstack_t;
+
+typedef struct _jl_handler_t jl_handler_t;
+
+typedef struct _jl_task_t {
+    JL_DATA_TYPE
+    jl_value_t *next; // invasive linked list for scheduler
+    jl_value_t *queue; // invasive linked list for scheduler
+    jl_value_t *tls;
+    jl_value_t *donenotify;
+    jl_value_t *result;
+    jl_value_t *scope;
+    jl_value_t *start;
+    _Atomic(uint8_t) _state;
+    uint8_t sticky; // record whether this Task can be migrated to a new thread
+    uint16_t priority;
+    _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
+    uint8_t pad0[3];
+    // === 64 bytes (cache line)
+    uint64_t rngState[JL_RNG_SIZE];
+    // flag indicating whether or not to record timing metrics for this task
+    uint8_t metrics_enabled;
+    uint8_t pad1[3];
+    // timestamp this task first entered the run queue
+    _Atomic(uint64_t) first_enqueued_at;
+    // timestamp this task was most recently scheduled to run
+    _Atomic(uint64_t) last_started_running_at;
+    // time this task has spent running; updated when it yields or finishes.
+    _Atomic(uint64_t) running_time_ns;
+    // === 64 bytes (cache line)
+    // timestamp this task finished (i.e. entered state DONE or FAILED).
+    _Atomic(uint64_t) finished_at;
+
+// hidden state:
+
+    // id of owning thread - does not need to be defined until the task runs
+    _Atomic(int16_t) tid;
+    // threadpool id
+    int8_t threadpoolid;
+    // Reentrancy bits
+    // Bit 0: 1 if we are currently running inference/codegen
+    // Bit 1-2: 0-3 counter of how many times we've reentered inference
+    // Bit 3: 1 if we are writing the image and inference is illegal
+    uint8_t reentrant_timing;
+    // 2 bytes of padding on 32-bit, 6 bytes on 64-bit
+    // uint16_t padding2_32;
+    // uint48_t padding2_64;
+    // saved gc stack top for context switches
+    jl_gcframe_t *gcstack;
+    size_t world_age;
+    // quick lookup for current ptls
+    jl_ptls_t ptls; // == jl_all_tls_states[tid]
+#ifdef USE_TRACY
+    const char *name;
 #endif
+    // saved exception stack
+    jl_excstack_t *excstack;
+    // current exception handler
+    jl_handler_t *eh;
+    // saved thread state
+    jl_ucontext_t ctx; // pointer into stkbuf, if suspended
+} jl_task_t;
+
+JL_DLLEXPORT void *jl_get_ptls_states(void);
 
 // Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
 #ifdef __MIC__
@@ -328,26 +320,26 @@ void jl_sigint_safepoint(jl_ptls_t tls);
 // This triggers a SegFault when we are in GC
 // Assign it to a variable to make sure the compiler emit the load
 // and to avoid Clang warning for -Wunused-volatile-lvalue
-#define jl_gc_safepoint_(ptls) do {                     \
-        jl_signal_fence();                              \
-        size_t safepoint_load = *ptls->safepoint;       \
-        jl_signal_fence();                              \
-        (void)safepoint_load;                           \
+#define jl_gc_safepoint_(ptls) do {                                            \
+        jl_signal_fence();                                                     \
+        size_t safepoint_load = jl_atomic_load_relaxed(&ptls->safepoint)[0];   \
+        jl_signal_fence();                                                     \
+        (void)safepoint_load;                                                  \
     } while (0)
-#define jl_sigint_safepoint(ptls) do {                  \
-        jl_signal_fence();                              \
-        size_t safepoint_load = ptls->safepoint[-1];    \
-        jl_signal_fence();                              \
-        (void)safepoint_load;                           \
+#define jl_sigint_safepoint(ptls) do {                                         \
+        jl_signal_fence();                                                     \
+        size_t safepoint_load = jl_atomic_load_relaxed(&ptls->safepoint)[-1];  \
+        jl_signal_fence();                                                     \
+        (void)safepoint_load;                                                  \
     } while (0)
 #endif
 STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
                                      int8_t old_state)
 {
+    assert(old_state != JL_GC_PARALLEL_COLLECTOR_THREAD);
+    assert(old_state != JL_GC_CONCURRENT_COLLECTOR_THREAD);
     jl_atomic_store_release(&ptls->gc_state, state);
-    // A safe point is required if we transition from GC-safe region to
-    // non GC-safe region.
-    if (old_state && !state)
+    if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE)
         jl_gc_safepoint_(ptls);
     return old_state;
 }
@@ -357,26 +349,31 @@ STATIC_INLINE int8_t jl_gc_state_save_and_set(jl_ptls_t ptls,
     return jl_gc_state_set(ptls, state, jl_atomic_load_relaxed(&ptls->gc_state));
 }
 #ifdef __clang_gcanalyzer__
-int8_t jl_gc_unsafe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE; // this could be a safepoint, but we will assume it is not
-void jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
-int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
-void jl_gc_safe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_LEAVE; // this might not be a safepoint, but we have to assume it could be (statically)
+// these might not be a safepoint (if they are no-op safe=>safe transitions), but we have to assume it could be (statically)
+// however mark a delineated region in which safepoints would be not permissible
+int8_t jl_gc_unsafe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT_LEAVE;
+void jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_ENTER;
+int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT_ENTER;
+void jl_gc_safe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_LEAVE;
 #else
-#define jl_gc_unsafe_enter(ptls) jl_gc_state_save_and_set(ptls, 0)
-#define jl_gc_unsafe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), 0))
+#define jl_gc_unsafe_enter(ptls) jl_gc_state_save_and_set(ptls, JL_GC_STATE_UNSAFE)
+#define jl_gc_unsafe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), JL_GC_STATE_UNSAFE))
 #define jl_gc_safe_enter(ptls) jl_gc_state_save_and_set(ptls, JL_GC_STATE_SAFE)
 #define jl_gc_safe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), JL_GC_STATE_SAFE))
 #endif
 
 JL_DLLEXPORT void jl_gc_enable_finalizers(struct _jl_task_t *ct, int on);
-JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void);
+JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void);
 JL_DLLEXPORT void jl_gc_run_pending_finalizers(struct _jl_task_t *ct);
 extern JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers;
-JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void);
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid);
 
+JL_DLLEXPORT int jl_getaffinity(int16_t tid, char *mask, int cpumasksize);
+JL_DLLEXPORT int jl_setaffinity(int16_t tid, char *mask, int cpumasksize);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/llvm-Compression.cpp b/src/llvm-Compression.cpp
new file mode 100644
index 0000000000000..c83f626747c27
--- /dev/null
+++ b/src/llvm-Compression.cpp
@@ -0,0 +1,245 @@
+//===--- Compression.cpp - Compression implementation ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements compression functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-Compression.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#if LLVM_ENABLE_ZLIB
+#include <zlib.h>
+#endif
+#if LLVM_ENABLE_ZSTD
+#include <zstd.h>
+#endif
+
+using namespace llvm;
+using namespace llvm::compression;
+
+const char *compression::getReasonIfUnsupported(compression::Format F) {
+  switch (F) {
+  case compression::Format::Zlib:
+    if (zlib::isAvailable())
+      return nullptr;
+    return "LLVM was not built with LLVM_ENABLE_ZLIB or did not find zlib at "
+           "build time";
+  case compression::Format::Zstd:
+    if (zstd::isAvailable())
+      return nullptr;
+    return "LLVM was not built with LLVM_ENABLE_ZSTD or did not find zstd at "
+           "build time";
+  }
+  llvm_unreachable("");
+}
+
+void compression::compress(Params P, ArrayRef<uint8_t> Input,
+                           SmallVectorImpl<uint8_t> &Output) {
+  switch (P.format) {
+  case compression::Format::Zlib:
+    zlib::compress(Input, Output, P.level);
+    break;
+  case compression::Format::Zstd:
+    zstd::compress(Input, Output, P.level, P.zstdEnableLdm);
+    break;
+  }
+}
+
+Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
+                              uint8_t *Output, size_t UncompressedSize) {
+  switch (formatFor(T)) {
+  case compression::Format::Zlib:
+    return zlib::decompress(Input, Output, UncompressedSize);
+  case compression::Format::Zstd:
+    return zstd::decompress(Input, Output, UncompressedSize);
+  }
+  llvm_unreachable("");
+}
+
+Error compression::decompress(compression::Format F, ArrayRef<uint8_t> Input,
+                              SmallVectorImpl<uint8_t> &Output,
+                              size_t UncompressedSize) {
+  switch (F) {
+  case compression::Format::Zlib:
+    return zlib::decompress(Input, Output, UncompressedSize);
+  case compression::Format::Zstd:
+    return zstd::decompress(Input, Output, UncompressedSize);
+  }
+  llvm_unreachable("");
+}
+
+Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
+                              SmallVectorImpl<uint8_t> &Output,
+                              size_t UncompressedSize) {
+  return decompress(formatFor(T), Input, Output, UncompressedSize);
+}
+
+#if LLVM_ENABLE_ZLIB
+
+static StringRef convertZlibCodeToString(int Code) {
+  switch (Code) {
+  case Z_MEM_ERROR:
+    return "zlib error: Z_MEM_ERROR";
+  case Z_BUF_ERROR:
+    return "zlib error: Z_BUF_ERROR";
+  case Z_STREAM_ERROR:
+    return "zlib error: Z_STREAM_ERROR";
+  case Z_DATA_ERROR:
+    return "zlib error: Z_DATA_ERROR";
+  case Z_OK:
+  default:
+    llvm_unreachable("unknown or unexpected zlib status code");
+  }
+}
+
+bool zlib::isAvailable() { return true; }
+
+void zlib::compress(ArrayRef<uint8_t> Input,
+                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
+  unsigned long CompressedSize = ::compressBound(Input.size());
+  CompressedBuffer.resize_for_overwrite(CompressedSize);
+  int Res = ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
+                        (const Bytef *)Input.data(), Input.size(), Level);
+  if (Res == Z_MEM_ERROR)
+    report_bad_alloc_error("Allocation failed");
+  assert(Res == Z_OK);
+  // Tell MemorySanitizer that zlib output buffer is fully initialized.
+  // This avoids a false report when running LLVM with uninstrumented ZLib.
+  __msan_unpoison(CompressedBuffer.data(), CompressedSize);
+  if (CompressedSize < CompressedBuffer.size())
+    CompressedBuffer.truncate(CompressedSize);
+}
+
+Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
+                       size_t &UncompressedSize) {
+  int Res = ::uncompress((Bytef *)Output, (uLongf *)&UncompressedSize,
+                         (const Bytef *)Input.data(), Input.size());
+  // Tell MemorySanitizer that zlib output buffer is fully initialized.
+  // This avoids a false report when running LLVM with uninstrumented ZLib.
+  __msan_unpoison(Output, UncompressedSize);
+  return Res ? make_error<StringError>(convertZlibCodeToString(Res),
+                                       inconvertibleErrorCode())
+             : Error::success();
+}
+
+Error zlib::decompress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &Output,
+                       size_t UncompressedSize) {
+  Output.resize_for_overwrite(UncompressedSize);
+  Error E = zlib::decompress(Input, Output.data(), UncompressedSize);
+  if (UncompressedSize < Output.size())
+    Output.truncate(UncompressedSize);
+  return E;
+}
+
+#else
+bool zlib::isAvailable() { return false; }
+void zlib::compress(ArrayRef<uint8_t> Input,
+                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
+  llvm_unreachable("zlib::compress is unavailable");
+}
+Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
+                       size_t &UncompressedSize) {
+  llvm_unreachable("zlib::decompress is unavailable");
+}
+Error zlib::decompress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &UncompressedBuffer,
+                       size_t UncompressedSize) {
+  llvm_unreachable("zlib::decompress is unavailable");
+}
+#endif
+
+#if LLVM_ENABLE_ZSTD
+
+bool zstd::isAvailable() { return true; }
+
+#include <zstd.h> // Ensure ZSTD library is included
+
+void zstd::compress(ArrayRef<uint8_t> Input,
+                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level,
+                    bool EnableLdm) {
+  ZSTD_CCtx *Cctx = ZSTD_createCCtx();
+  if (!Cctx)
+    report_bad_alloc_error("Failed to create ZSTD_CCtx");
+
+  if (ZSTD_isError(ZSTD_CCtx_setParameter(
+          Cctx, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
+    ZSTD_freeCCtx(Cctx);
+    report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
+  }
+
+  if (ZSTD_isError(
+          ZSTD_CCtx_setParameter(Cctx, ZSTD_c_compressionLevel, Level))) {
+    ZSTD_freeCCtx(Cctx);
+    report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
+  }
+
+  unsigned long CompressedBufferSize = ZSTD_compressBound(Input.size());
+  CompressedBuffer.resize_for_overwrite(CompressedBufferSize);
+
+  size_t const CompressedSize =
+      ZSTD_compress2(Cctx, CompressedBuffer.data(), CompressedBufferSize,
+                     Input.data(), Input.size());
+
+  ZSTD_freeCCtx(Cctx);
+
+  if (ZSTD_isError(CompressedSize))
+    report_bad_alloc_error("Compression failed");
+
+  __msan_unpoison(CompressedBuffer.data(), CompressedSize);
+  if (CompressedSize < CompressedBuffer.size())
+    CompressedBuffer.truncate(CompressedSize);
+}
+
+Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
+                       size_t &UncompressedSize) {
+  const size_t Res = ::ZSTD_decompress(
+      Output, UncompressedSize, (const uint8_t *)Input.data(), Input.size());
+  UncompressedSize = Res;
+  if (ZSTD_isError(Res))
+    return make_error<StringError>(ZSTD_getErrorName(Res),
+                                   inconvertibleErrorCode());
+  // Tell MemorySanitizer that zstd output buffer is fully initialized.
+  // This avoids a false report when running LLVM with uninstrumented ZLib.
+  __msan_unpoison(Output, UncompressedSize);
+  return Error::success();
+}
+
+Error zstd::decompress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &Output,
+                       size_t UncompressedSize) {
+  Output.resize_for_overwrite(UncompressedSize);
+  Error E = zstd::decompress(Input, Output.data(), UncompressedSize);
+  if (UncompressedSize < Output.size())
+    Output.truncate(UncompressedSize);
+  return E;
+}
+
+#else
+bool zstd::isAvailable() { return false; }
+void zstd::compress(ArrayRef<uint8_t> Input,
+                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level,
+                    bool EnableLdm) {
+  llvm_unreachable("zstd::compress is unavailable");
+}
+Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
+                       size_t &UncompressedSize) {
+  llvm_unreachable("zstd::decompress is unavailable");
+}
+Error zstd::decompress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &Output,
+                       size_t UncompressedSize) {
+  llvm_unreachable("zstd::decompress is unavailable");
+}
+#endif
diff --git a/src/llvm-Compression.h b/src/llvm-Compression.h
new file mode 100644
index 0000000000000..246ccbd6f6dcf
--- /dev/null
+++ b/src/llvm-Compression.h
@@ -0,0 +1,136 @@
+//===-- llvm/Support/Compression.h ---Compression----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains basic functions for compression/decompression.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_COMPRESSION_H
+#define LLVM_SUPPORT_COMPRESSION_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+template <typename T> class SmallVectorImpl;
+class Error;
+
+// None indicates no compression. The other members are a subset of
+// compression::Format, which is used for compressed debug sections in some
+// object file formats (e.g. ELF). This is a separate class as we may add new
+// compression::Format members for non-debugging purposes.
+enum class DebugCompressionType {
+  None, ///< No compression
+  Zlib, ///< zlib
+  Zstd, ///< Zstandard
+};
+
+namespace compression {
+namespace zlib {
+
+constexpr int NoCompression = 0;
+constexpr int BestSpeedCompression = 1;
+constexpr int DefaultCompression = 6;
+constexpr int BestSizeCompression = 9;
+
+LLVM_ABI bool isAvailable();
+
+LLVM_ABI void compress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &CompressedBuffer,
+                       int Level = DefaultCompression);
+
+LLVM_ABI Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
+                          size_t &UncompressedSize);
+
+LLVM_ABI Error decompress(ArrayRef<uint8_t> Input,
+                          SmallVectorImpl<uint8_t> &Output,
+                          size_t UncompressedSize);
+
+} // End of namespace zlib
+
+namespace zstd {
+
+constexpr int NoCompression = -5;
+constexpr int BestSpeedCompression = 1;
+constexpr int DefaultCompression = 5;
+constexpr int BestSizeCompression = 12;
+
+LLVM_ABI bool isAvailable();
+
+LLVM_ABI void compress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &CompressedBuffer,
+                       int Level = DefaultCompression, bool EnableLdm = false);
+
+LLVM_ABI Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
+                          size_t &UncompressedSize);
+
+LLVM_ABI Error decompress(ArrayRef<uint8_t> Input,
+                          SmallVectorImpl<uint8_t> &Output,
+                          size_t UncompressedSize);
+
+} // End of namespace zstd
+
+enum class Format {
+  Zlib,
+  Zstd,
+};
+
+inline Format formatFor(DebugCompressionType Type) {
+  switch (Type) {
+  case DebugCompressionType::None:
+    llvm_unreachable("not a compression type");
+  case DebugCompressionType::Zlib:
+    return Format::Zlib;
+  case DebugCompressionType::Zstd:
+    return Format::Zstd;
+  }
+  llvm_unreachable("");
+}
+
+struct Params {
+  constexpr Params(Format F)
+      : format(F), level(F == Format::Zlib ? zlib::DefaultCompression
+                                           : zstd::DefaultCompression) {}
+  constexpr Params(Format F, int L, bool Ldm = false)
+      : format(F), level(L), zstdEnableLdm(Ldm) {}
+  Params(DebugCompressionType Type) : Params(formatFor(Type)) {}
+
+  Format format;
+  int level;
+  bool zstdEnableLdm = false; // Enable zstd long distance matching
+  // This may support multi-threading for zstd in the future. Note that
+  // different threads may produce different output, so be careful if certain
+  // output determinism is desired.
+};
+
+// Return nullptr if LLVM was built with support (LLVM_ENABLE_ZLIB,
+// LLVM_ENABLE_ZSTD) for the specified compression format; otherwise
+// return a string literal describing the reason.
+LLVM_ABI const char *getReasonIfUnsupported(Format F);
+
+// Compress Input with the specified format P.Format. If Level is -1, use
+// *::DefaultCompression for the format.
+LLVM_ABI void compress(Params P, ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &Output);
+
+// Decompress Input. The uncompressed size must be available.
+LLVM_ABI Error decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
+                          uint8_t *Output, size_t UncompressedSize);
+LLVM_ABI Error decompress(Format F, ArrayRef<uint8_t> Input,
+                          SmallVectorImpl<uint8_t> &Output,
+                          size_t UncompressedSize);
+LLVM_ABI Error decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
+                          SmallVectorImpl<uint8_t> &Output,
+                          size_t UncompressedSize);
+
+} // End of namespace compression
+
+} // End of namespace llvm
+
+#endif
diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp
index d24c08b4b4930..a1ed66a190190 100644
--- a/src/llvm-alloc-helpers.cpp
+++ b/src/llvm-alloc-helpers.cpp
@@ -88,6 +88,8 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
     memop.isaggr = isa<StructType>(elty) || isa<ArrayType>(elty) || isa<VectorType>(elty);
     memop.isobjref = hasObjref(elty);
     auto &field = getField(offset, size, elty);
+    field.second.hasunboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
+
     if (field.second.hasobjref != memop.isobjref)
         field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits
     if (!isstore)
@@ -125,13 +127,23 @@ JL_USED_FUNC void AllocUseInfo::dump(llvm::raw_ostream &OS)
     OS << "hastypeof: " << hastypeof << '\n';
     OS << "refload: " << refload << '\n';
     OS << "refstore: " << refstore << '\n';
+    OS << "allockind:";
+    if ((allockind & AllocFnKind::Uninitialized) != AllocFnKind::Unknown)
+      OS << " uninitialized";
+    if ((allockind & AllocFnKind::Zeroed) != AllocFnKind::Unknown)
+      OS << " zeroed";
+    OS << '\n';
     OS << "Uses: " << uses.size() << '\n';
-    for (auto inst: uses)
+    for (auto inst: uses) {
         inst->print(OS);
+        OS << '\n';
+    }
     if (!preserves.empty()) {
         OS << "Preserves: " << preserves.size() << '\n';
-        for (auto inst: preserves)
+        for (auto inst: preserves) {
             inst->print(OS);
+            OS << '\n';
+        }
     }
     OS << "MemOps: " << memops.size() << '\n';
     for (auto &field: memops) {
@@ -164,8 +176,11 @@ JL_USED_FUNC void AllocUseInfo::dump()
 #define REMARK(remark)
 #endif
 
-void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options) {
+void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options) {
     required.use_info.reset();
+    Attribute allockind = I->getFnAttr(Attribute::AllocKind);
+    if (allockind.isValid())
+        required.use_info.allockind = allockind.getAllocKind();
     if (I->use_empty())
         return;
     CheckInst::Frame cur{I, 0, I->use_begin(), I->use_end()};
@@ -189,6 +204,7 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 auto elty = inst->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             } else if (!required.use_info.addMemOp(inst, 0, cur.offset,
                                                                inst->getType(),
@@ -198,6 +214,7 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
         }
         if (auto call = dyn_cast<CallInst>(inst)) {
             // TODO handle `memcmp`
+            // TODO handle `memcpy` which is used a lot more often since opaque pointers
             // None of the intrinsics should care if the memory is stack or heap allocated.
             auto callee = call->getCalledOperand();
             if (auto II = dyn_cast<IntrinsicInst>(call)) {
@@ -233,6 +250,11 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 required.use_info.addrescaped = true;
                 return true;
             }
+            if (required.pass.gc_loaded_func == callee) {
+                // TODO add manual load->store forwarding
+                push_inst(inst);
+                return true;
+            }
             if (required.pass.typeof_func == callee) {
                 required.use_info.hastypeof = true;
                 assert(use->get() == I);
@@ -251,9 +273,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 }
                 LLVM_DEBUG(dbgs() << "Unknown call, marking escape\n");
                 REMARK([&]() {
+                    std::string str;
+                    llvm::raw_string_ostream rso(str);
+                    inst->print(rso);
                     return OptimizationRemarkMissed(DEBUG_TYPE, "UnknownCall",
                                                     inst)
-                           << "Unknown call, marking escape (" << ore::NV("Call", inst) << ")";
+                           << "Unknown call, marking escape (" << ore::NV("Call", StringRef(str)) << ")";
                 });
                 required.use_info.escaped = true;
                 return false;
@@ -267,9 +292,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
             if (use->getOperandNo() != StoreInst::getPointerOperandIndex()) {
                 LLVM_DEBUG(dbgs() << "Object address is stored somewhere, marking escape\n");
                 REMARK([&]() {
+                    std::string str;
+                    llvm::raw_string_ostream rso(str);
+                    inst->print(rso);
                     return OptimizationRemarkMissed(DEBUG_TYPE, "StoreObjAddr",
                                                     inst)
-                           << "Object address is stored somewhere, marking escape (" << ore::NV("Store", inst) << ")";
+                           << "Object address is stored somewhere, marking escape (" << ore::NV("Store", StringRef(str)) << ")";
                 });
                 required.use_info.escaped = true;
                 return false;
@@ -280,6 +308,7 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 auto elty = storev->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             } else if (!required.use_info.addMemOp(inst, use->getOperandNo(),
                                                                cur.offset, storev->getType(),
@@ -292,19 +321,26 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
             if (use->getOperandNo() != isa<AtomicCmpXchgInst>(inst) ? AtomicCmpXchgInst::getPointerOperandIndex() : AtomicRMWInst::getPointerOperandIndex()) {
                 LLVM_DEBUG(dbgs() << "Object address is cmpxchg/rmw-ed somewhere, marking escape\n");
                 REMARK([&]() {
+                    std::string str;
+                    llvm::raw_string_ostream rso(str);
+                    inst->print(rso);
                     return OptimizationRemarkMissed(DEBUG_TYPE, "StoreObjAddr",
                                                     inst)
-                           << "Object address is cmpxchg/rmw-ed somewhere, marking escape (" << ore::NV("Store", inst) << ")";
+                           << "Object address is cmpxchg/rmw-ed somewhere, marking escape (" << ore::NV("Store", StringRef(str)) << ")";
                 });
                 required.use_info.escaped = true;
                 return false;
             }
             required.use_info.hasload = true;
             auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
+            Type *elty = storev->getType();
             if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
-                                                               cur.offset, storev->getType(),
+                                                               cur.offset, elty,
                                                                true, required.DL)) {
                 LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n");
+                required.use_info.has_unknown_objref |= hasObjref(elty);
+                required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             }
             required.use_info.refload = true;
@@ -325,7 +361,7 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 else {
                     next_offset = apoffset.getLimitedValue();
                     if (next_offset > UINT32_MAX) {
-                        LLVM_DEBUG(dbgs() << "GEP inst exceeeds 32-bit offset\n");
+                        LLVM_DEBUG(dbgs() << "GEP inst exceeds 32-bit offset\n");
                         next_offset = UINT32_MAX;
                     }
                 }
@@ -341,9 +377,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
         }
         LLVM_DEBUG(dbgs() << "Unknown instruction, marking escape\n");
         REMARK([&]() {
+            std::string str;
+            llvm::raw_string_ostream rso(str);
+            inst->print(rso);
             return OptimizationRemarkMissed(DEBUG_TYPE, "UnknownInst",
                                             inst)
-                   << "Unknown instruction, marking escape (" << ore::NV("Inst", inst) << ")";
+                   << "Unknown instruction, marking escape (" << ore::NV("Inst", StringRef(str)) << ")";
         });
         required.use_info.escaped = true;
         return false;
diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h
index 3bd80704a0888..20e9132d10b4c 100644
--- a/src/llvm-alloc-helpers.h
+++ b/src/llvm-alloc-helpers.h
@@ -46,6 +46,8 @@ namespace jl_alloc {
         bool hasaggr:1;
         bool multiloc:1;
         bool hasload:1;
+        // The alloc has a unboxed object at this offset.
+        bool hasunboxed:1;
         llvm::Type *elty;
         llvm::SmallVector<MemOp,4> accesses;
         Field(uint32_t size, llvm::Type *elty)
@@ -54,6 +56,7 @@ namespace jl_alloc {
               hasaggr(false),
               multiloc(false),
               hasload(false),
+              hasunboxed(false),
               elty(elty)
         {
         }
@@ -87,12 +90,17 @@ namespace jl_alloc {
         bool returned:1;
         // The object is used in an error function
         bool haserror:1;
+        // For checking attributes of "uninitialized" or "zeroed" or unknown
+        llvm::AllocFnKind allockind;
 
         // The alloc has a Julia object reference not in an explicit field.
         bool has_unknown_objref:1;
         // The alloc has an aggregate Julia object reference not in an explicit field.
         bool has_unknown_objrefaggr:1;
 
+        // The alloc has an unboxed object at an unknown offset.
+        bool has_unknown_unboxed:1;
+
         void reset()
         {
             escaped = false;
@@ -105,8 +113,10 @@ namespace jl_alloc {
             hasunknownmem = false;
             returned = false;
             haserror = false;
+            allockind = llvm::AllocFnKind::Unknown;
             has_unknown_objref = false;
             has_unknown_objrefaggr = false;
+            has_unknown_unboxed = false;
             uses.clear();
             preserves.clear();
             memops.clear();
@@ -153,7 +163,7 @@ namespace jl_alloc {
         }
     };
 
-    void runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options=EscapeAnalysisOptionalArgs());
+    void runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options=EscapeAnalysisOptionalArgs());
 }
 
 
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index b87a5a6799b0b..56bb1ab7c706b 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -13,7 +13,6 @@
 #include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/CFG.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Dominators.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
@@ -80,6 +79,7 @@ static void removeGCPreserve(CallInst *call, Instruction *val)
  *
  * * load
  * * `pointer_from_objref`
+ * * `gc_loaded`
  * * Any real llvm intrinsics
  * * gc preserve intrinsics
  * * `ccall` gcroot array (`jl_roots` operand bundle)
@@ -95,7 +95,6 @@ static void removeGCPreserve(CallInst *call, Instruction *val)
  * TODO:
  * * Return twice
  * * Handle phi node.
- * * Look through `pointer_from_objref`.
  * * Handle jl_box*
  */
 
@@ -136,12 +135,13 @@ struct Optimizer {
     // insert llvm.lifetime.* calls for `ptr` with size `sz` based on the use of `orig`.
     void insertLifetime(Value *ptr, Constant *sz, Instruction *orig);
 
-    void checkInst(Instruction *I);
+    void checkInst(CallInst *I);
 
     void replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
                                  Instruction *orig_i, Instruction *new_i);
     void removeAlloc(CallInst *orig_inst);
-    void moveToStack(CallInst *orig_inst, size_t sz, bool has_ref);
+    void moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocFnKind allockind);
+    void initializeAlloca(IRBuilder<> &prolog_builder, AllocaInst *buff, AllocFnKind allockind);
     void splitOnStack(CallInst *orig_inst);
     void optimizeTag(CallInst *orig_inst);
 
@@ -224,8 +224,11 @@ void Optimizer::optimizeAll()
         checkInst(orig);
         if (use_info.escaped) {
             REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
-                    << "GC allocation escaped " << ore::NV("GC Allocation", orig);
+                    << "GC allocation escaped " << ore::NV("GC Allocation", StringRef(str));
             });
             if (use_info.hastypeof)
                 optimizeTag(orig);
@@ -233,8 +236,11 @@ void Optimizer::optimizeAll()
         }
         if (use_info.haserror || use_info.returned) {
             REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
-                    << "GC allocation has error or was returned " << ore::NV("GC Allocation", orig);
+                    << "GC allocation has error or was returned " << ore::NV("GC Allocation", StringRef(str));
             });
             if (use_info.hastypeof)
                 optimizeTag(orig);
@@ -243,8 +249,11 @@ void Optimizer::optimizeAll()
         if (!use_info.addrescaped && !use_info.hasload && (!use_info.haspreserve ||
                                                            !use_info.refstore)) {
             REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemark(DEBUG_TYPE, "Dead Allocation", orig)
-                    << "GC allocation removed " << ore::NV("GC Allocation", orig);
+                    << "GC allocation removed " << ore::NV("GC Allocation", StringRef(str));
             });
             // No one took the address, no one reads anything and there's no meaningful
             // preserve of fields (either no preserve/ccall or no object reference fields)
@@ -252,10 +261,12 @@ void Optimizer::optimizeAll()
             removeAlloc(orig);
             continue;
         }
+        bool has_unboxed = use_info.has_unknown_unboxed;
         bool has_ref = use_info.has_unknown_objref;
         bool has_refaggr = use_info.has_unknown_objrefaggr;
         for (auto memop: use_info.memops) {
             auto &field = memop.second;
+            has_unboxed |= field.hasunboxed;
             if (field.hasobjref) {
                 has_ref = true;
                 // This can be relaxed a little based on hasload
@@ -268,8 +279,11 @@ void Optimizer::optimizeAll()
         }
         if (has_refaggr) {
             REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
-                    << "GC allocation has unusual object reference, unable to move to stack " << ore::NV("GC Allocation", orig);
+                    << "GC allocation has unusual object reference, unable to move to stack " << ore::NV("GC Allocation", StringRef(str));
             });
             if (use_info.hastypeof)
                 optimizeTag(orig);
@@ -277,19 +291,41 @@ void Optimizer::optimizeAll()
         }
         if (!use_info.hasunknownmem && !use_info.addrescaped) {
             REMARK([&](){
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemark(DEBUG_TYPE, "Stack Split Allocation", orig)
-                    << "GC allocation split on stack " << ore::NV("GC Allocation", orig);
+                    << "GC allocation split on stack " << ore::NV("GC Allocation", StringRef(str));
             });
             // No one actually care about the memory layout of this object, split it.
             splitOnStack(orig);
             continue;
         }
+        // The move to stack code below, if has_ref is set, changes the allocation to an array of jlvalue_t's. This is fine
+        // if all objects are jlvalue_t's. However, if part of the allocation is an unboxed value (e.g. it is a { float, jlvaluet }),
+        // then moveToStack will create a [2 x jlvaluet] bitcast to { float, jlvaluet }.
+        // This later causes the GC rooting pass, to miss-characterize the float as a pointer to a GC value
+        if (has_unboxed && has_ref) {
+            REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation could not be split since it contains both boxed and unboxed values, unable to move to stack " << ore::NV("GC Allocation", StringRef(str));
+            });
+            if (use_info.hastypeof)
+                optimizeTag(orig);
+            continue;
+        }
         REMARK([&](){
+            std::string str;
+            llvm::raw_string_ostream rso(str);
+            orig->print(rso);
             return OptimizationRemark(DEBUG_TYPE, "Stack Move Allocation", orig)
-                << "GC allocation moved to stack " << ore::NV("GC Allocation", orig);
+                << "GC allocation moved to stack " << ore::NV("GC Allocation", StringRef(str));
         });
         // The object has no fields with mix reference access
-        moveToStack(orig, sz, has_ref);
+        moveToStack(orig, sz, has_ref, use_info.allockind);
     }
 }
 
@@ -311,7 +347,9 @@ bool Optimizer::isSafepoint(Instruction *inst)
         return false;
     if (auto callee = call->getCalledFunction()) {
         // Known functions emitted in codegen that are not safepoints
-        if (callee == pass.pointer_from_objref_func || callee->getName() == "memcmp") {
+        if (callee == pass.pointer_from_objref_func
+            || callee == pass.gc_loaded_func
+            || callee->getName() == "memcmp") {
             return false;
         }
     }
@@ -354,7 +392,7 @@ ssize_t Optimizer::getGCAllocSize(Instruction *I)
     return -1;
 }
 
-void Optimizer::checkInst(Instruction *I)
+void Optimizer::checkInst(CallInst *I)
 {
     LLVM_DEBUG(dbgs() << "Running escape analysis on " << *I << "\n");
     jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, pass, *pass.DL};
@@ -363,7 +401,10 @@ void Optimizer::checkInst(Instruction *I)
         std::string suse_info;
         llvm::raw_string_ostream osuse_info(suse_info);
         use_info.dump(osuse_info);
-        return OptimizationRemarkAnalysis(DEBUG_TYPE, "EscapeAnalysis", I) << "escape analysis for " << ore::NV("GC Allocation", I) << "\n" << ore::NV("UseInfo", osuse_info.str());
+        std::string str;
+        llvm::raw_string_ostream rso(str);
+        I->print(rso);
+        return OptimizationRemarkAnalysis(DEBUG_TYPE, "EscapeAnalysis", I) << "escape analysis for " << ore::NV("GC Allocation", StringRef(str)) << "\n" << ore::NV("UseInfo", osuse_info.str());
     });
 }
 
@@ -386,12 +427,20 @@ void Optimizer::insertLifetimeEnd(Value *ptr, Constant *sz, Instruction *insert)
         }
         break;
     }
+#if JL_LLVM_VERSION >= 200000
+    CallInst::Create(pass.lifetime_end, {sz, ptr}, "", insert->getIterator());
+#else
     CallInst::Create(pass.lifetime_end, {sz, ptr}, "", insert);
+#endif
 }
 
 void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig)
 {
+#if JL_LLVM_VERSION >= 200000
+    CallInst::Create(pass.lifetime_start, {sz, ptr}, "", orig->getIterator());
+#else
     CallInst::Create(pass.lifetime_start, {sz, ptr}, "", orig);
+#endif
     BasicBlock *def_bb = orig->getParent();
     std::set<BasicBlock*> bbs{def_bb};
     auto &DT = getDomTree();
@@ -400,6 +449,8 @@ void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig)
         auto bb = use->getParent();
         if (!bbs.insert(bb).second)
             continue;
+        if (pred_empty(bb))
+            continue; // No predecessors so the block is dead
         assert(lifetime_stack.empty());
         Lifetime::Frame cur{bb};
         while (true) {
@@ -476,7 +527,7 @@ void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig)
     // within the BB.
     // If some successors are live and others are dead, it's the first instruction in
     // the successors that are dead.
-    std::vector<Instruction*> first_dead;
+    SmallVector<Instruction*, 0> first_dead;
     for (auto bb: bbs) {
         bool has_use = false;
         for (auto succ: successors(bb)) {
@@ -567,7 +618,7 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
     auto oldfType = call->getFunctionType();
     auto newfType = FunctionType::get(
             oldfType->getReturnType(),
-            makeArrayRef(argTys).slice(0, oldfType->getNumParams()),
+            ArrayRef<Type*>(argTys).slice(0, oldfType->getNumParams()),
             oldfType->isVarArg());
 
     // Accumulate an array of overloaded types for the given intrinsic
@@ -584,10 +635,18 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
         assert(matchvararg);
         (void)matchvararg;
     }
+#if JL_LLVM_VERSION >= 200000
+    auto newF = Intrinsic::getOrInsertDeclaration(call->getModule(), ID, overloadTys);
+#else
     auto newF = Intrinsic::getDeclaration(call->getModule(), ID, overloadTys);
+#endif
     assert(newF->getFunctionType() == newfType);
     newF->setCallingConv(call->getCallingConv());
+#if JL_LLVM_VERSION >= 200000
+    auto newCall = CallInst::Create(newF, args, "", call->getIterator());
+#else
     auto newCall = CallInst::Create(newF, args, "", call);
+#endif
     newCall->setTailCallKind(call->getTailCallKind());
     auto old_attrs = call->getAttributes();
     newCall->setAttributes(AttributeList::get(pass.getLLVMContext(), getFnAttrs(old_attrs),
@@ -597,9 +656,20 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
     call->eraseFromParent();
 }
 
+void Optimizer::initializeAlloca(IRBuilder<> &prolog_builder, AllocaInst *buff, AllocFnKind allockind)
+{
+    if ((allockind & AllocFnKind::Uninitialized) != AllocFnKind::Unknown)
+        return;
+    assert(!buff->isArrayAllocation());
+    Type *T = buff->getAllocatedType();
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    prolog_builder.CreateMemSet(buff, ConstantInt::get(Type::getInt8Ty(prolog_builder.getContext()), 0), DL.getTypeAllocSize(T), buff->getAlign());
+
+}
+
 // This function should not erase any safepoint so that the lifetime marker can find and cache
 // all the original safepoints.
-void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
+void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocFnKind allockind)
 {
     ++RemovedAllocs;
     ++StackAllocs;
@@ -629,7 +699,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
         auto asize = ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz / DL.getTypeAllocSize(pass.T_prjlvalue));
         buff = prolog_builder.CreateAlloca(pass.T_prjlvalue, asize);
         buff->setAlignment(Align(align));
-        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
+        ptr = cast<Instruction>(buff);
     }
     else {
         Type *buffty;
@@ -639,12 +709,14 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
             buffty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), sz);
         buff = prolog_builder.CreateAlloca(buffty);
         buff->setAlignment(Align(align));
-        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext(), buff->getType()->getPointerAddressSpace())));
+        ptr = cast<Instruction>(buff);
     }
     insertLifetime(ptr, ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz), orig_inst);
-    Instruction *new_inst = cast<Instruction>(prolog_builder.CreateBitCast(ptr, JuliaType::get_pjlvalue_ty(prolog_builder.getContext(), buff->getType()->getPointerAddressSpace())));
-    if (orig_inst->getModule()->getDataLayout().getAllocaAddrSpace() != 0)
-        new_inst = cast<Instruction>(prolog_builder.CreateAddrSpaceCast(new_inst, JuliaType::get_pjlvalue_ty(prolog_builder.getContext(), orig_inst->getType()->getPointerAddressSpace())));
+    if (sz != 0 && !has_ref) { // TODO: fix has_ref case too
+        IRBuilder<> builder(orig_inst);
+        initializeAlloca(builder, buff, allockind);
+    }
+    Instruction *new_inst = cast<Instruction>(ptr);
     new_inst->takeName(orig_inst);
 
     auto simple_replace = [&] (Instruction *orig_i, Instruction *new_i) {
@@ -686,16 +758,23 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
     auto replace_inst = [&] (Instruction *user) {
         Instruction *orig_i = cur.orig_i;
         Instruction *new_i = cur.new_i;
-        if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
+        if (isa<LoadInst>(user) || isa<StoreInst>(user) ||
+            isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
+            // TODO: these atomics are likely removable if the user is the first argument
             user->replaceUsesOfWith(orig_i, new_i);
         }
         else if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
             if (pass.pointer_from_objref_func == callee) {
-                call->replaceAllUsesWith(new_i);
+                call->replaceAllUsesWith(prolog_builder.CreateAddrSpaceCast(new_i, call->getCalledFunction()->getReturnType()));
                 call->eraseFromParent();
                 return;
             }
+            if (pass.gc_loaded_func == callee) {
+                // TODO: handle data pointer forwarding, length forwarding, and fence removal
+                user->replaceUsesOfWith(orig_i, Constant::getNullValue(orig_i->getType()));
+                return;
+            }
             if (pass.typeof_func == callee) {
                 ++RemovedTypeofs;
                 call->replaceAllUsesWith(tag);
@@ -728,23 +807,17 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
             user->replaceUsesOfWith(orig_i, replace);
         }
         else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
-            auto cast_t = PointerType::getWithSamePointeeType(cast<PointerType>(user->getType()), new_i->getType()->getPointerAddressSpace());
-            auto replace_i = new_i;
-            Type *new_t = new_i->getType();
-            if (cast_t != new_t) {
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(cast_t->getContext().supportsTypedPointers());
-                replace_i = new BitCastInst(replace_i, cast_t, "", user);
-                replace_i->setDebugLoc(user->getDebugLoc());
-                replace_i->takeName(user);
-            }
-            push_frame(user, replace_i);
+            push_frame(user, new_i);
         }
         else if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
             SmallVector<Value *, 4> IdxOperands(gep->idx_begin(), gep->idx_end());
             auto new_gep = GetElementPtrInst::Create(gep->getSourceElementType(),
                                                      new_i, IdxOperands,
+#if JL_LLVM_VERSION >= 200000
+                                                     gep->getName(), gep->getIterator());
+#else
                                                      gep->getName(), gep);
+#endif
             new_gep->setIsInBounds(gep->isInBounds());
             new_gep->takeName(gep);
             new_gep->copyMetadata(*gep);
@@ -873,8 +946,11 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
             if (pass.typeof_func == callee) {
                 ++RemovedTypeofs;
                 REMARK([&](){
+                    std::string str;
+                    llvm::raw_string_ostream rso(str);
+                    orig_inst->print(rso);
                     return OptimizationRemark(DEBUG_TYPE, "typeof", call)
-                        << "removed typeof call for GC allocation " << ore::NV("Alloc", orig_inst);
+                        << "removed typeof call for GC allocation " << ore::NV("Alloc", StringRef(str));
                 });
                 call->replaceAllUsesWith(tag);
                 // Push to the removed instructions to trigger `finalize` to
@@ -923,8 +999,9 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             allocty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), field.size);
         }
         slot.slot = prolog_builder.CreateAlloca(allocty);
-        insertLifetime(prolog_builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(prolog_builder.getContext())),
-                       ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), field.size), orig_inst);
+        IRBuilder<> builder(orig_inst);
+        insertLifetime(slot.slot, ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), field.size), orig_inst);
+        initializeAlloca(builder, slot.slot, use_info.allockind);
         slots.push_back(std::move(slot));
     }
     const auto nslots = slots.size();
@@ -976,15 +1053,14 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
         auto size = pass.DL->getTypeAllocSize(elty);
         Value *addr;
         if (offset % size == 0) {
-            addr = builder.CreateBitCast(slot.slot, elty->getPointerTo());
+            addr = slot.slot;
             if (offset != 0) {
                 addr = builder.CreateConstInBoundsGEP1_32(elty, addr, offset / size);
             }
         }
         else {
-            addr = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
+            addr = slot.slot;
             addr = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), addr, offset);
-            addr = builder.CreateBitCast(addr, elty->getPointerTo());
         }
         return addr;
     };
@@ -1004,7 +1080,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 assert(slot.offset == offset);
                 newload = builder.CreateLoad(pass.T_prjlvalue, slot.slot);
                 // Assume the addrspace is correct.
-                val = builder.CreateBitCast(newload, load_ty);
+                val = newload;
             }
             else {
                 newload = builder.CreateLoad(load_ty, slot_gep(slot, offset, load_ty, builder));
@@ -1040,10 +1116,9 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                     store_ty = T_pjlvalue;
                 }
                 else {
-                    store_ty = PointerType::getWithSamePointeeType(T_pjlvalue, cast<PointerType>(store_ty)->getAddressSpace());
-                    store_val = builder.CreateBitCast(store_val, store_ty);
+                    store_ty = PointerType::get(T_pjlvalue->getContext(), store_ty->getPointerAddressSpace());
                 }
-                if (cast<PointerType>(store_ty)->getAddressSpace() != AddressSpace::Tracked)
+                if (store_ty->getPointerAddressSpace() != AddressSpace::Tracked)
                     store_val = builder.CreateAddrSpaceCast(store_val, pass.T_prjlvalue);
                 newstore = builder.CreateStore(store_val, slot.slot);
             }
@@ -1058,6 +1133,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             return;
         }
         else if (isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
+            // TODO: Downgrade atomics here potentially
             auto slot_idx = find_slot(offset);
             auto &slot = slots[slot_idx];
             assert(slot.offset <= offset && slot.offset + slot.size >= offset);
@@ -1106,14 +1182,14 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                                 store->setOrdering(AtomicOrdering::NotAtomic);
                                 continue;
                             }
-                            auto ptr8 = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
+                            Value *ptr_slot = slot.slot;
                             if (offset > slot.offset)
-                                ptr8 = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), ptr8,
+                                ptr_slot = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), slot.slot,
                                                                           offset - slot.offset);
                             auto sub_size = std::min(slot.offset + slot.size, offset + size) -
                                 std::max(offset, slot.offset);
                             // TODO: alignment computation
-                            builder.CreateMemSet(ptr8, val_arg, sub_size, MaybeAlign(0));
+                            builder.CreateMemSet(ptr_slot, val_arg, sub_size, MaybeAlign(0));
                         }
                         call->eraseFromParent();
                         return;
@@ -1167,7 +1243,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             for (auto &bundle: bundles) {
                 if (bundle.getTag() != "jl_roots")
                     continue;
-                std::vector<Value*> operands;
+                SmallVector<Value*, 0> operands;
                 for (auto op: bundle.inputs()) {
                     if (op == orig_i || isa<Constant>(op))
                         continue;
@@ -1185,7 +1261,11 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 bundle = OperandBundleDef("jl_roots", std::move(operands));
                 break;
             }
+#if JL_LLVM_VERSION >= 200000
+            auto new_call = CallInst::Create(call, bundles, call->getIterator());
+#else
             auto new_call = CallInst::Create(call, bundles, call);
+#endif
             new_call->takeName(call);
             call->replaceAllUsesWith(new_call);
             call->eraseFromParent();
@@ -1230,8 +1310,16 @@ bool AllocOpt::doInitialization(Module &M)
 
     DL = &M.getDataLayout();
 
-    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { Type::getInt8PtrTy(M.getContext(), DL->getAllocaAddrSpace()) });
-    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { Type::getInt8PtrTy(M.getContext(), DL->getAllocaAddrSpace()) });
+#if JL_LLVM_VERSION >= 200000
+    lifetime_start = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::lifetime_start, { PointerType::get(M.getContext(), DL->getAllocaAddrSpace()) });
+#else
+    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { PointerType::get(M.getContext(), DL->getAllocaAddrSpace()) });
+#endif
+#if JL_LLVM_VERSION >= 200000
+    lifetime_end = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::lifetime_end, { PointerType::get(M.getContext(), DL->getAllocaAddrSpace()) });
+#else
+    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { PointerType::get(M.getContext(), DL->getAllocaAddrSpace()) });
+#endif
 
     return true;
 }
@@ -1247,44 +1335,13 @@ bool AllocOpt::runOnFunction(Function &F, function_ref<DominatorTree&()> GetDT)
     optimizer.optimizeAll();
     bool modified = optimizer.finalize();
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
+    assert(!verifyLLVMIR(F));
 #endif
     return modified;
 }
 
-struct AllocOptLegacy : public FunctionPass {
-    static char ID;
-    AllocOpt opt;
-    AllocOptLegacy() : FunctionPass(ID) {
-        llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
-    }
-    bool doInitialization(Module &m) override {
-        return opt.doInitialization(m);
-    }
-    bool runOnFunction(Function &F) override {
-        return opt.runOnFunction(F, [this]() -> DominatorTree & {return getAnalysis<DominatorTreeWrapperPass>().getDomTree();});
-    }
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.addRequired<DominatorTreeWrapperPass>();
-        AU.addPreserved<DominatorTreeWrapperPass>();
-        AU.setPreservesCFG();
-    }
-};
-
-char AllocOptLegacy::ID = 0;
-static RegisterPass<AllocOptLegacy> X("AllocOpt", "Promote heap allocation to stack",
-                                false /* Only looks at CFG */,
-                                false /* Analysis Pass */);
-
-}
-
-Pass *createAllocOptPass()
-{
-    return new AllocOptLegacy();
-}
 
+} // anonymous namespace
 PreservedAnalyses AllocOptPass::run(Function &F, FunctionAnalysisManager &AM) {
     AllocOpt opt;
     bool modified = opt.doInitialization(*F.getParent());
@@ -1299,9 +1356,3 @@ PreservedAnalyses AllocOptPass::run(Function &F, FunctionAnalysisManager &AM) {
         return PreservedAnalyses::all();
     }
 }
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddAllocOptPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createAllocOptPass());
-}
diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h
index 0ab140b42b8b7..d5d7ae3d50113 100644
--- a/src/llvm-codegen-shared.h
+++ b/src/llvm-codegen-shared.h
@@ -1,16 +1,19 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include <optional>
 #include <utility>
 #include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/SmallVector.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/IR/Attributes.h>
 #include <llvm/IR/DebugLoc.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/MDBuilder.h>
+#include <llvm/Support/ModRef.h>
+
 #include "julia.h"
 
-#define STR(csym)           #csym
-#define XSTR(csym)          STR(csym)
+static constexpr std::nullopt_t None = std::nullopt;
 
 enum AddressSpace {
     Generic = 0,
@@ -28,26 +31,26 @@ namespace JuliaType {
     }
 
     static inline llvm::PointerType* get_pjlvalue_ty(llvm::LLVMContext &C, unsigned addressSpace=0) {
-        return llvm::PointerType::get(get_jlvalue_ty(C), addressSpace);
+        return llvm::PointerType::get(C, addressSpace);
     }
 
     static inline llvm::PointerType* get_prjlvalue_ty(llvm::LLVMContext &C) {
-        return llvm::PointerType::get(get_jlvalue_ty(C), AddressSpace::Tracked);
+        return llvm::PointerType::get(C, AddressSpace::Tracked);
     }
 
     static inline llvm::PointerType* get_ppjlvalue_ty(llvm::LLVMContext &C) {
-        return llvm::PointerType::get(get_pjlvalue_ty(C), 0);
+        return llvm::PointerType::get(C, 0);
     }
 
     static inline llvm::PointerType* get_pprjlvalue_ty(llvm::LLVMContext &C) {
-        return llvm::PointerType::get(get_prjlvalue_ty(C), 0);
+        return llvm::PointerType::get(C, 0);
     }
 
     static inline auto get_jlfunc_ty(llvm::LLVMContext &C) {
         auto T_prjlvalue = get_prjlvalue_ty(C);
-        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        auto T_pprjlvalue = llvm::PointerType::get(C, 0);
         return llvm::FunctionType::get(T_prjlvalue, {
-                T_prjlvalue,  // function
+                T_prjlvalue, // function
                 T_pprjlvalue, // args[]
                 llvm::Type::getInt32Ty(C)}, // nargs
             false);
@@ -55,35 +58,40 @@ namespace JuliaType {
 
     static inline auto get_jlfunc2_ty(llvm::LLVMContext &C) {
         auto T_prjlvalue = get_prjlvalue_ty(C);
-        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        auto T_pprjlvalue = llvm::PointerType::get(C, 0);
+        return llvm::FunctionType::get(T_prjlvalue, {
+                T_prjlvalue, // function
+                T_pprjlvalue, // args[]
+                llvm::Type::getInt32Ty(C), // nargs
+                T_prjlvalue}, // linfo
+            false);
+    }
+
+    static inline auto get_jlfunc3_ty(llvm::LLVMContext &C) {
+        auto T_prjlvalue = get_prjlvalue_ty(C);
+        auto T_pprjlvalue = llvm::PointerType::get(C, 0);
+        auto T = get_pjlvalue_ty(C, Derived);
         return llvm::FunctionType::get(T_prjlvalue, {
-                T_prjlvalue,  // function
+                T, // function
                 T_pprjlvalue, // args[]
-                llvm::Type::getInt32Ty(C),
-                T_prjlvalue,  // linfo
-                }, // nargs
+                llvm::Type::getInt32Ty(C)}, // nargs
             false);
     }
 
     static inline auto get_jlfuncparams_ty(llvm::LLVMContext &C) {
         auto T_prjlvalue = get_prjlvalue_ty(C);
-        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        auto T_pprjlvalue = llvm::PointerType::get(C, 0);
         return llvm::FunctionType::get(T_prjlvalue, {
-                T_prjlvalue,  // function
+                T_prjlvalue, // function
                 T_pprjlvalue, // args[]
-                llvm::Type::getInt32Ty(C),
-                T_pprjlvalue,  // linfo->sparam_vals
-                }, // nargs
+                llvm::Type::getInt32Ty(C), // nargs
+                T_prjlvalue}, // linfo->sparam_vals
             false);
     }
 
     static inline auto get_voidfunc_ty(llvm::LLVMContext &C) {
         return llvm::FunctionType::get(llvm::Type::getVoidTy(C), /*isVarArg*/false);
     }
-
-    static inline auto get_pvoidfunc_ty(llvm::LLVMContext &C) {
-        return get_voidfunc_ty(C)->getPointerTo();
-    }
 }
 
 // return how many Tracked pointers are in T (count > 0),
@@ -92,11 +100,10 @@ struct CountTrackedPointers {
     unsigned count = 0;
     bool all = true;
     bool derived = false;
-    CountTrackedPointers(llvm::Type *T);
+    CountTrackedPointers(llvm::Type *T, bool ignore_loaded=false);
 };
 
-unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::Type *DTy, llvm::IRBuilder<> &irbuilder);
-std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
+llvm::SmallVector<llvm::Value*, 0> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
 
 static inline void llvm_dump(llvm::Value *v)
 {
@@ -147,72 +154,50 @@ static inline llvm::MDNode *get_tbaa_const(llvm::LLVMContext &ctxt) {
 }
 
 static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instruction *inst)
-{
-    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
-    if (llvm::isa<llvm::LoadInst>(inst) && md && md == get_tbaa_const(md->getContext()))
-        inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), llvm::None));
-    return inst;
-}
-
-// bitcast a value, but preserve its address space when dealing with pointer types
-static inline llvm::Value *emit_bitcast_with_builder(llvm::IRBuilder<> &builder, llvm::Value *v, llvm::Type *jl_value)
 {
     using namespace llvm;
-    if (isa<PointerType>(jl_value) &&
-        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
-        // Cast to the proper address space
-        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
-        return builder.CreateBitCast(v, jl_value_addr);
-    }
-    else {
-        return builder.CreateBitCast(v, jl_value);
+    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
+    if (llvm::isa<llvm::LoadInst>(inst) && md && md == get_tbaa_const(md->getContext())) {
+        inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), std::nullopt));
     }
+    return inst;
 }
 
 // Get PTLS through current task.
-static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *pgcstack)
+static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Value *pgcstack)
 {
     using namespace llvm;
-    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
-    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    auto i8 = builder.getInt8Ty();
     const int pgcstack_offset = offsetof(jl_task_t, gcstack);
-    return builder.CreateInBoundsGEP(
-            T_pjlvalue, emit_bitcast_with_builder(builder, pgcstack, T_ppjlvalue),
-            ConstantInt::get(T_size, -(pgcstack_offset / sizeof(void *))),
-            "current_task");
+    return builder.CreateConstInBoundsGEP1_32(i8, pgcstack, -pgcstack_offset, "current_task");
 }
 
 // Get PTLS through current task.
-static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *current_task, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa)
 {
     using namespace llvm;
-    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
-    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    auto i8 = builder.getInt8Ty();
+    auto T_ptr = builder.getPtrTy();
     const int ptls_offset = offsetof(jl_task_t, ptls);
-    llvm::Value *pptls = builder.CreateInBoundsGEP(
-            T_pjlvalue, current_task,
-            ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
-            "ptls_field");
-    LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue,
-            emit_bitcast_with_builder(builder, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
+    llvm::Value *pptls = builder.CreateConstInBoundsGEP1_32(i8, current_task, ptls_offset, "ptls_field");
+    LoadInst *ptls_load = builder.CreateAlignedLoad(T_ptr, pptls, Align(sizeof(void *)), "ptls_load");
     // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
     tbaa_decorate(tbaa, ptls_load);
-    return builder.CreateBitCast(ptls_load, T_ppjlvalue, "ptls");
+    return ptls_load;
 }
 
 // Get signal page through current task.
-static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa)
 {
     using namespace llvm;
     // return builder.CreateCall(prepare_call(reuse_signal_page_func));
-    auto T_psize = T_size->getPointerTo();
-    auto T_ppsize = T_psize->getPointerTo();
-    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
-    ptls = emit_bitcast_with_builder(builder, ptls, T_ppsize);
-    llvm::Value *psafepoint = builder.CreateInBoundsGEP(
-            T_psize, ptls, ConstantInt::get(T_size, nthfield));
+    auto T_ptr = builder.getPtrTy();
+    auto i8 = builder.getInt8Ty();
+    int nthfield = offsetof(jl_tls_states_t, safepoint);
+    llvm::Value *psafepoint = builder.CreateConstInBoundsGEP1_32(i8, ptls, nthfield);
     LoadInst *ptls_load = builder.CreateAlignedLoad(
-            T_psize, psafepoint, Align(sizeof(void *)), "safepoint");
+            T_ptr, psafepoint, Align(sizeof(void *)), "safepoint");
+    ptls_load->setOrdering(AtomicOrdering::Monotonic);
     tbaa_decorate(tbaa, ptls_load);
     return ptls_load;
 }
@@ -226,7 +211,7 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder)
 static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false)
 {
     using namespace llvm;
-    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, T_size, ptls, tbaa);
+    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa);
     emit_signal_fence(builder);
     Module *M = builder.GetInsertBlock()->getModule();
     LLVMContext &C = builder.getContext();
@@ -237,10 +222,9 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s
     else {
         Function *F = M->getFunction("julia.safepoint");
         if (!F) {
-            auto T_psize = T_size->getPointerTo();
-            FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false);
+            FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {PointerType::getUnqual(T_size->getContext())}, false);
             F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M);
-            F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            F->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
         }
         builder.CreateCall(F, {signal_page});
     }
@@ -251,25 +235,20 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T
 {
     using namespace llvm;
     Type *T_int8 = state->getType();
-    llvm::Value *ptls_i8 = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy());
-    Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state));
-    Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls_i8, ArrayRef<Value*>(offset), "gc_state");
+    unsigned offset = offsetof(jl_tls_states_t, gc_state);
+    Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state");
     if (old_state == nullptr) {
-        old_state = builder.CreateLoad(T_int8, gc_state);
+        old_state = builder.CreateLoad(T_int8, gc_state, "old_state");
         cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
     }
     builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
     if (auto *C = dyn_cast<ConstantInt>(old_state))
-        if (C->isZero())
-            return old_state;
-    if (auto *C = dyn_cast<ConstantInt>(state))
-        if (!C->isZero())
-            return old_state;
+        if (auto *C2 = dyn_cast<ConstantInt>(state))
+            if (C->getZExtValue() == C2->getZExtValue())
+                return old_state;
     BasicBlock *passBB = BasicBlock::Create(builder.getContext(), "safepoint", builder.GetInsertBlock()->getParent());
     BasicBlock *exitBB = BasicBlock::Create(builder.getContext(), "after_safepoint", builder.GetInsertBlock()->getParent());
-    Constant *zero8 = ConstantInt::get(T_int8, 0);
-    builder.CreateCondBr(builder.CreateAnd(builder.CreateICmpNE(old_state, zero8), // if (old_state && !state)
-                                           builder.CreateICmpEQ(state, zero8)),
+    builder.CreateCondBr(builder.CreateICmpEQ(old_state, state, "is_new_state"), // Safepoint whenever we change the GC state
                          passBB, exitBB);
     builder.SetInsertPoint(passBB);
     MDNode *tbaa = get_tbaa_const(builder.getContext());
@@ -289,7 +268,7 @@ static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm
 static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
 {
     using namespace llvm;
-    Value *old_state = builder.getInt8(0);
+    Value *old_state = builder.getInt8(JL_GC_STATE_UNSAFE);
     return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
 }
 
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
index 77f1baf6237c4..a6e963664b0f3 100644
--- a/src/llvm-cpufeatures.cpp
+++ b/src/llvm-cpufeatures.cpp
@@ -21,7 +21,6 @@
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/PassManager.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Target/TargetMachine.h>
 #include <llvm/Support/Debug.h>
@@ -38,20 +37,20 @@ STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false");
 extern JuliaOJIT *jl_ExecutionEngine;
 
 // whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
-Optional<bool> always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT {
+std::optional<bool> always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT {
     if (TT.isAArch64()) {
         auto intr_name = intr.getName();
         auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
         return typ == "f32" || typ == "f64";
     } else {
-        return {};
+        return None;
     }
 }
 
 static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTSAFEPOINT {
     auto unconditional = always_have_fma(intr, TT);
-    if (unconditional.hasValue())
-        return unconditional.getValue();
+    if (unconditional)
+        return *unconditional;
 
     auto intr_name = intr.getName();
     auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
@@ -60,7 +59,7 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS
     StringRef FS =
         FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
 
-    SmallVector<StringRef, 6> Features;
+    SmallVector<StringRef, 128> Features;
     FS.split(Features, ',');
     for (StringRef Feature : Features)
     if (TT.isARM()) {
@@ -68,7 +67,7 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS
         return typ == "f32" || typ == "f64";
       else if (Feature == "+vfp4sp")
         return typ == "f32";
-    } else {
+    } else if (TT.isX86()) {
       if (Feature == "+fma" || Feature == "+fma4")
         return typ == "f32" || typ == "f64";
     }
@@ -95,7 +94,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
     for (auto &F: M.functions()) {
         auto FN = F.getName();
 
-        if (FN.startswith("julia.cpu.have_fma.")) {
+        if (FN.starts_with("julia.cpu.have_fma.")) {
             for (Use &U: F.uses()) {
                 User *RU = U.getUser();
                 CallInst *I = cast<CallInst>(RU);
@@ -110,7 +109,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
             I->eraseFromParent();
         }
 #ifdef JL_VERIFY_PASSES
-        assert(!verifyModule(M, &errs()));
+        assert(!verifyLLVMIR(M));
 #endif
         return true;
     } else {
@@ -125,33 +124,3 @@ PreservedAnalyses CPUFeaturesPass::run(Module &M, ModuleAnalysisManager &AM)
     }
     return PreservedAnalyses::all();
 }
-
-namespace {
-struct CPUFeaturesLegacy : public ModulePass {
-    static char ID;
-    CPUFeaturesLegacy() JL_NOTSAFEPOINT : ModulePass(ID) {};
-
-    bool runOnModule(Module &M)
-    {
-        return lowerCPUFeatures(M);
-    }
-};
-
-char CPUFeaturesLegacy::ID = 0;
-static RegisterPass<CPUFeaturesLegacy>
-        Y("CPUFeatures",
-          "Lower calls to CPU feature testing intrinsics.",
-          false,
-          false);
-}
-
-Pass *createCPUFeaturesPass()
-{
-    return new CPUFeaturesLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddCPUFeaturesPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createCPUFeaturesPass());
-}
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
index b2428860c2882..7f1b076897fc8 100644
--- a/src/llvm-demote-float16.cpp
+++ b/src/llvm-demote-float16.cpp
@@ -1,8 +1,9 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// This pass finds floating-point operations on 16-bit (half precision) values, and replaces
-// them by equivalent operations on 32-bit (single precision) values surrounded by a fpext
-// and fptrunc. This ensures that the exact semantics of IEEE floating-point are preserved.
+// This pass finds floating-point operations on 16-bit values (half precision and bfloat),
+// and replaces them by equivalent operations on 32-bit (single precision) values surrounded
+// by a fpext and fptrunc. This ensures that the exact semantics of IEEE floating-point are
+// preserved.
 //
 // Without this pass, back-ends that do not natively support half-precision (e.g. x86_64)
 // similarly pattern-match half-precision operations with single-precision equivalents, but
@@ -20,7 +21,6 @@
 #include <llvm/Pass.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Verifier.h>
@@ -49,33 +49,36 @@ extern JuliaOJIT *jl_ExecutionEngine;
 
 namespace {
 
-static bool have_fp16(Function &caller, const Triple &TT) {
-    Attribute FSAttr = caller.getFnAttribute("target-features");
-    StringRef FS = "";
-    if (FSAttr.isValid())
-        FS = FSAttr.getValueAsString();
-    else if (jl_ExecutionEngine)
-        FS = jl_ExecutionEngine->getTargetFeatureString();
-    // else probably called from opt, just do nothing
-    if (TT.isAArch64()) {
-        if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
-            return true;
-        }
-    } else if (TT.getArch() == Triple::x86_64) {
-        if (FS.find("+avx512fp16") != llvm::StringRef::npos){
-            return true;
-        }
-    }
-    if (caller.hasFnAttribute("julia.hasfp16")) {
-        return true;
-    }
-    return false;
+static bool have_fp16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasfp16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
+
+    // llvm/llvm-project#97975: on some platforms, `half` uses excessive precision
+    if (TT.isPPC())
+        return false;
+
+    return true;
+}
+
+static bool have_bf16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasbf16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
+
+    // https://github.com/llvm/llvm-project/issues/97975#issuecomment-2218770199:
+    // on current versions of LLVM, bf16 always uses TypeSoftPromoteHalf
+    return true;
 }
 
 static bool demoteFloat16(Function &F)
 {
     auto TT = Triple(F.getParent()->getTargetTriple());
-    if (have_fp16(F, TT))
+    auto has_fp16 = have_fp16(F, TT);
+    auto has_bf16 = have_bf16(F, TT);
+    if (has_fp16 && has_bf16)
         return false;
 
     auto &ctx = F.getContext();
@@ -83,14 +86,17 @@ static bool demoteFloat16(Function &F)
     SmallVector<Instruction *, 0> erase;
     for (auto &BB : F) {
         for (auto &I : BB) {
-            // extend Float16 operands to Float32
+            // check whether there's any 16-bit floating point operands to extend
             bool Float16 = I.getType()->getScalarType()->isHalfTy();
-            for (size_t i = 0; !Float16 && i < I.getNumOperands(); i++) {
+            bool BFloat16 = I.getType()->getScalarType()->isBFloatTy();
+            for (size_t i = 0; !BFloat16 && !Float16 && i < I.getNumOperands(); i++) {
                 Value *Op = I.getOperand(i);
-                if (Op->getType()->getScalarType()->isHalfTy())
+                if (!has_fp16 && Op->getType()->getScalarType()->isHalfTy())
                     Float16 = true;
+                else if (!has_bf16 && Op->getType()->getScalarType()->isBFloatTy())
+                    BFloat16 = true;
             }
-            if (!Float16)
+            if (!Float16 && !BFloat16)
                 continue;
 
             switch (I.getOpcode()) {
@@ -114,11 +120,16 @@ static bool demoteFloat16(Function &F)
 
             IRBuilder<> builder(&I);
 
-            // extend Float16 operands to Float32
+            // extend 16-bit floating point operands
             SmallVector<Value *, 2> Operands(I.getNumOperands());
             for (size_t i = 0; i < I.getNumOperands(); i++) {
                 Value *Op = I.getOperand(i);
-                if (Op->getType()->getScalarType()->isHalfTy()) {
+                if (!has_fp16 && Op->getType()->getScalarType()->isHalfTy()) {
+                    // extend Float16 to Float32
+                    ++TotalExt;
+                    Op = builder.CreateFPExt(Op, Op->getType()->getWithNewType(T_float32));
+                } else if (!has_bf16 && Op->getType()->getScalarType()->isBFloatTy()) {
+                    // extend BFloat16 to Float32
                     ++TotalExt;
                     Op = builder.CreateFPExt(Op, Op->getType()->getWithNewType(T_float32));
                 }
@@ -126,7 +137,7 @@ static bool demoteFloat16(Function &F)
             }
 
             // recreate the instruction if any operands changed,
-            // truncating the result back to Float16
+            // truncating the result back to the original type
             Value *NewI;
             ++TotalChanged;
             switch (I.getOpcode()) {
@@ -184,7 +195,7 @@ static bool demoteFloat16(Function &F)
         for (auto V : erase)
             V->eraseFromParent();
 #ifdef JL_VERIFY_PASSES
-        assert(!verifyFunction(F, &errs()));
+        assert(!verifyLLVMIR(F));
 #endif
         return true;
     }
@@ -201,34 +212,3 @@ PreservedAnalyses DemoteFloat16Pass::run(Function &F, FunctionAnalysisManager &A
     }
     return PreservedAnalyses::all();
 }
-
-namespace {
-
-struct DemoteFloat16Legacy : public FunctionPass {
-    static char ID;
-    DemoteFloat16Legacy() : FunctionPass(ID){};
-
-private:
-    bool runOnFunction(Function &F) override {
-        return demoteFloat16(F);
-    }
-};
-
-char DemoteFloat16Legacy::ID = 0;
-static RegisterPass<DemoteFloat16Legacy>
-        Y("DemoteFloat16",
-          "Demote Float16 operations to Float32 equivalents.",
-          false,
-          false);
-} // end anonymous namespac
-
-Pass *createDemoteFloat16Pass()
-{
-    return new DemoteFloat16Legacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddDemoteFloat16Pass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createDemoteFloat16Pass());
-}
diff --git a/src/llvm-expand-atomic-modify.cpp b/src/llvm-expand-atomic-modify.cpp
new file mode 100644
index 0000000000000..e4152bb45fe42
--- /dev/null
+++ b/src/llvm-expand-atomic-modify.cpp
@@ -0,0 +1,489 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// TODO: move this feature into AtomicExpandImpl
+
+#include "llvm-version.h"
+#include "passes.h"
+
+#include <variant>
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Types.h>
+
+#include <llvm/Analysis/InstSimplifyFolder.h>
+#include <llvm/CodeGen/AtomicExpandUtils.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/InstIterator.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Operator.h>
+#include <llvm/IR/PassManager.h>
+#include <llvm/IR/Value.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Pass.h>
+#include <llvm/Support/Debug.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+#include <llvm/Transforms/Utils/LowerAtomic.h>
+
+#include "julia.h"
+#include "julia_assert.h"
+
+#define DEBUG_TYPE "expand-atomic-modify"
+#undef DEBUG
+
+using namespace llvm;
+
+// This pass takes fake call instructions that look like this which were emitted by the front end:
+//   (oldval, newval) = call atomicmodify.iN(ptr %op, ptr align(N) %ptr, i8 immarg %SSID, i8 immarg %Ordering, ...) !rmwattributes
+//   where op is a function with a prototype of `iN (iN arg, ...)`
+// Then rewrite that to
+//   oldval = atomicrmw op ptr, val ordering syncscope
+//   newval = op oldval, val
+// Or to an equivalent RMWCmpXchgLoop if `op` isn't valid for atomicrmw
+
+
+// from AtomicExpandImpl, with modification of failure order and added Attributes
+using CreateWeakCmpXchgInstFun =
+   std::function<void(IRBuilderBase &, Value *, Value *, Value *, Align,
+                     AtomicOrdering, SyncScope::ID, Instruction &Attributes,
+                     Value *&, Value *&)>;
+
+static void createWeakCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
+                                 Value *Loaded, Value *NewVal, Align AddrAlign,
+                                 AtomicOrdering MemOpOrder, SyncScope::ID SSID, Instruction &Attributes,
+                                 Value *&Success, Value *&NewLoaded) {
+  Type *OrigTy = NewVal->getType();
+
+  // This code can go away when cmpxchg supports FP types.
+  assert(!OrigTy->isPointerTy());
+  bool NeedBitcast = OrigTy->isFloatingPointTy();
+  if (NeedBitcast) {
+    IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
+    NewVal = Builder.CreateBitCast(NewVal, IntTy);
+    Loaded = Builder.CreateBitCast(Loaded, IntTy);
+  }
+
+  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
+      Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
+      AtomicOrdering::Monotonic, // why does LLVM use AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder) here
+      SSID);
+  Pair->copyMetadata(Attributes);
+  Success = Builder.CreateExtractValue(Pair, 1, "success");
+  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+
+  if (NeedBitcast)
+    NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
+}
+
+// from AtomicExpandImpl, with modification of values returned
+std::pair<Value *, Value *> insertRMWCmpXchgLoop(
+    IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
+    AtomicOrdering MemOpOrder, SyncScope::ID SSID, Instruction &Attributes,
+    const std::function<Value *(IRBuilderBase &, Value *)> &PerformOp,
+    const CreateWeakCmpXchgInstFun &CreateWeakCmpXchg) {
+  LLVMContext &Ctx = Builder.getContext();
+  BasicBlock *BB = Builder.GetInsertBlock();
+  Function *F = BB->getParent();
+
+  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+  //
+  // The standard expansion we produce is:
+  //     [...]
+  //     %init_loaded = load atomic iN* %addr
+  //     br label %loop
+  // loop:
+  //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+  //     %new = some_op iN %loaded, %incr
+  //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+  //     %new_loaded = extractvalue { iN, i1 } %pair, 0
+  //     %success = extractvalue { iN, i1 } %pair, 1
+  //     br i1 %success, label %atomicrmw.end, label %loop
+  // atomicrmw.end:
+  //     [...]
+  BasicBlock *ExitBB =
+      BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
+  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+  // The split call above "helpfully" added a branch at the end of BB (to the
+  // wrong place), but we want a load. It's easiest to just remove
+  // the branch entirely.
+  std::prev(BB->end())->eraseFromParent();
+  Builder.SetInsertPoint(BB);
+  LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
+  InitLoaded->setOrdering(AtomicOrdering::Unordered); // n.b. the original LLVM pass is missing this call so is actually mildly UB
+  Builder.CreateBr(LoopBB);
+
+  // Start the main loop block now that we've taken care of the preliminaries.
+  Builder.SetInsertPoint(LoopBB);
+  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
+  Loaded->addIncoming(InitLoaded, BB);
+
+  Value *NewVal = PerformOp(Builder, Loaded);
+
+  Value *NewLoaded = nullptr;
+  Value *Success = nullptr;
+
+  CreateWeakCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
+                MemOpOrder == AtomicOrdering::Unordered
+                    ? AtomicOrdering::Monotonic
+                    : MemOpOrder,
+                SSID, Attributes, Success, NewLoaded);
+  assert(Success && NewLoaded);
+
+  Loaded->addIncoming(NewLoaded, LoopBB);
+
+  Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+  return {NewLoaded, NewVal};
+}
+
+// from AtomicExpandImpl
+// IRBuilder to be used for replacement atomic instructions.
+struct ReplacementIRBuilder
+    : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
+  MDNode *MMRAMD = nullptr;
+
+  // Preserves the DebugLoc from I, and preserves still valid metadata.
+  // Enable StrictFP builder mode when appropriate.
+  explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
+      : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
+                  IRBuilderCallbackInserter(
+                      [this](Instruction *I) { addMMRAMD(I); })) {
+    SetInsertPoint(I);
+    this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
+    if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
+      this->setIsFPConstrained(true);
+
+    MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
+  }
+
+  void addMMRAMD(Instruction *I) {
+    if (canInstructionHaveMMRAs(*I))
+      I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
+  }
+};
+
+// Must check that either Target cannot observe or mutate global state
+// or that no trailing instructions does so either.
+// Depending on the choice, it can also decide whether it is better to move Target after RMW
+// or to move RMW before Target (or meet somewhere in the middle).
+// Currently conservatively implemented as there being no instruction in the
+// function which writes memory (which includes any atomics).
+// Excluding the Target itself, unless some other instruction might read memory to observe it.
+static bool canReorderWithRMW(Instruction &Target, bool verifyop)
+{
+  if (!verifyop)
+    return true;
+  Function &Op = *Target.getFunction();
+  // quick check: if Op is nosync and Target doesn't access any memory, then reordering is trivially valid
+  bool nosync = Op.hasNoSync();
+  if (nosync && !Target.mayReadOrWriteMemory())
+    return true;
+  // otherwise, scan the whole function to see if any function accesses memory
+  // in a way that would conflict with reordering the atomic read and write
+  bool mayRead = false;
+  for (auto &BB : Op) {
+    for (auto &I : BB) {
+      if (&I == &Target)
+        continue;
+      if (I.mayWriteToMemory())
+        return false;
+      if (!mayRead) {
+        mayRead = I.mayReadFromMemory();
+        if (!nosync && mayRead)
+          return false;
+      }
+    }
+  }
+  // if any other instruction read memory, then the ordering of any writes by the target instruction might be observed
+  return !(mayRead && Target.mayWriteToMemory());
+}
+
+static std::variant<AtomicRMWInst::BinOp,bool> patternMatchAtomicRMWOp(Value *Old, Use **ValOp, Value *RetVal)
+{
+  bool verifyop = RetVal == nullptr;
+  assert(verifyop ? isa<Argument>(Old) : isa<AtomicRMWInst>(Old));
+  Function *Op = verifyop ? cast<Argument>(Old)->getParent() : nullptr;
+  if (verifyop && (Op->isDeclaration() || Op->isInterposable() || Op->isIntrinsic()))
+    return false;
+   // TODO: peek forward from Old through any trivial casts which don't affect the instruction (e.g. i64 to f64 and back)
+  if (RetVal == nullptr) {
+    if (Old->use_empty()) {
+      if (ValOp) *ValOp = nullptr;
+      return AtomicRMWInst::Xchg;
+    }
+    if (!Old->hasOneUse())
+      return false;
+    ReturnInst *Ret = nullptr;
+    for (auto &BB : *Op) {
+      if (isa<ReturnInst>(BB.getTerminator())) {
+        if (Ret != nullptr)
+          return false;
+        Ret = cast<ReturnInst>(BB.getTerminator());
+      }
+    }
+    if (Ret == nullptr)
+      return false;
+    // Now examine the instruction list
+    RetVal = Ret->getReturnValue();
+    if (!RetVal->hasOneUse())
+      return false;
+  }
+  if (RetVal == Old) {
+    // special token indicating to convert to an atomic fence
+    if (ValOp) *ValOp = nullptr;
+    return AtomicRMWInst::Or;
+  }
+  if (Old->use_empty()) {
+    if (ValOp) *ValOp = nullptr;
+    return AtomicRMWInst::Xchg;
+  }
+  if (auto BinOp = dyn_cast<BinaryOperator>(RetVal)) {
+    if ((BinOp->getOperand(0) == Old || (BinOp->isCommutative() && BinOp->getOperand(1) == Old)) && canReorderWithRMW(*BinOp, verifyop)) {
+      if (ValOp) *ValOp = &BinOp->getOperandUse(BinOp->getOperand(0) == Old ? 1 : 0);
+      switch (BinOp->getOpcode()) {
+        case Instruction::Add:
+          return AtomicRMWInst::Add;
+        case Instruction::Sub:
+          return AtomicRMWInst::Sub;
+        case Instruction::And:
+          return AtomicRMWInst::And;
+        case Instruction::Or:
+          return AtomicRMWInst::Or;
+        case Instruction::Xor:
+          return AtomicRMWInst::Xor;
+        case Instruction::FAdd:
+          return AtomicRMWInst::FAdd;
+        case Instruction::FSub:
+          return AtomicRMWInst::FSub;
+        default:
+          break;
+      }
+    }
+    if (BinOp->getOpcode() == Instruction::Xor) {
+      if (auto CI = dyn_cast<ConstantInt>(BinOp->getOperand(1))) {
+        if (CI->isAllOnesValue()) {
+          BinOp = dyn_cast<BinaryOperator>(BinOp->getOperand(0));
+          if (BinOp && BinOp->hasOneUse() && BinOp->getOpcode() == Instruction::And) {
+            if ((BinOp->getOperand(0) == Old || (BinOp->isCommutative() && BinOp->getOperand(1) == Old)) && canReorderWithRMW(*BinOp, verifyop)) {
+              if (ValOp) *ValOp = &BinOp->getOperandUse(BinOp->getOperand(0) == Old ? 1 : 0);
+              return AtomicRMWInst::Nand;
+            }
+          }
+        }
+      }
+    }
+    return false;
+  } else if (auto Intr = dyn_cast<IntrinsicInst>(RetVal)) {
+    if (Intr->arg_size() == 2) {
+      if ((Intr->getOperand(0) == Old || (Intr->isCommutative() && Intr->getOperand(1) == Old)) && canReorderWithRMW(*Intr, verifyop)) {
+        if (ValOp) *ValOp = &Intr->getOperandUse(Intr->getOperand(0) == Old ? 1 : 0);
+        switch (Intr->getIntrinsicID()) {
+          case Intrinsic::minnum:
+            return AtomicRMWInst::FMin;
+          case Intrinsic::maxnum:
+            return AtomicRMWInst::FMax;
+          case Intrinsic::smax:
+            return AtomicRMWInst::Max;
+          case Intrinsic::umax:
+            return AtomicRMWInst::UMax;
+          case Intrinsic::smin:
+            return AtomicRMWInst::Min;
+          case Intrinsic::umin:
+            return AtomicRMWInst::UMin;
+#if JL_LLVM_VERSION >= 200000
+          case Intrinsic::usub_sat:
+           return AtomicRMWInst::USubSat;
+#endif
+        }
+      }
+    }
+    return false;
+  }
+  else if (auto Intr = dyn_cast<CallInst>(RetVal)) {
+    // TODO: decide inlining cost of Op, or check alwaysinline/inlinehint, before this?
+    for (auto &Arg : Intr->args()) {
+      if (Arg == Old) {
+        if (canReorderWithRMW(*Intr, verifyop)) {
+          if (ValOp) *ValOp = &Arg;
+          return true;
+        }
+        return false;
+      }
+    }
+  }
+  // TODO: does this need to deal with F->hasFnAttribute(Attribute::StrictFP)?
+  // TODO: does Fneg and Neg have expansions?
+  // TODO: be able to ignore some simple bitcasts (particularly f64 to i64)
+  // TODO: handle longer sequences (UIncWrap, UDecWrap, USubCond, and target-specific ones for CUDA)
+  return false;
+}
+
+void expandAtomicModifyToCmpXchg(CallInst &Modify,
+                                 const CreateWeakCmpXchgInstFun &CreateWeakCmpXchg) {
+  Value *Ptr = Modify.getOperand(0);
+  Function *Op = dyn_cast<Function>(Modify.getOperand(1));
+  if (!Op) {
+      Modify.getParent()->getParent()->print(errs());
+      llvm_unreachable("expected immarg for function argument");
+  }
+  AtomicOrdering Ordering = (AtomicOrdering)cast<ConstantInt>(Modify.getOperand(2))->getZExtValue();
+  SyncScope::ID SSID = (SyncScope::ID)cast<ConstantInt>(Modify.getOperand(3))->getZExtValue();
+  MaybeAlign Alignment = Modify.getParamAlign(0);
+  unsigned user_arg_start = Modify.getFunctionType()->getNumParams();
+  Type *Ty = Modify.getFunctionType()->getReturnType()->getStructElementType(0);
+
+  ReplacementIRBuilder Builder(&Modify, Modify.getModule()->getDataLayout());
+
+  CallInst *ModifyOp;
+  {
+    SmallVector<Value*> Args(1 + Modify.arg_size() - user_arg_start);
+    Args[0] = UndefValue::get(Ty); // Undef used as placeholder for Loaded / RMW;
+    for (size_t argi = 0; argi < Modify.arg_size() - user_arg_start; ++argi) {
+      Args[argi + 1] = Modify.getArgOperand(argi + user_arg_start);
+    }
+    SmallVector<OperandBundleDef> Defs;
+    Modify.getOperandBundlesAsDefs(Defs);
+    ModifyOp = Builder.CreateCall(Op, Args, Defs);
+    ModifyOp->setCallingConv(Op->getCallingConv());
+  }
+  Use *LoadedOp = &ModifyOp->getOperandUse(0);
+
+  Value *OldVal = nullptr;
+  Value *NewVal = nullptr;
+  auto BinOp = patternMatchAtomicRMWOp(Op->getArg(0), nullptr, nullptr);
+  if (BinOp != decltype(BinOp)(false)) {
+    Builder.SetInsertPoint(ModifyOp);
+    AtomicRMWInst *RMW = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Ptr, UndefValue::get(Ty), Alignment, Ordering, SSID); // Undef used as placeholder
+    RMW->copyMetadata(Modify);
+    Builder.SetInsertPoint(&Modify);
+    LoadedOp->set(RMW);
+    for (int attempts = 0; ; ) {
+      FreezeInst *TrackReturn = Builder.Insert(new FreezeInst(ModifyOp)); // Create a temporary TrackingVH so we can recover the NewVal after inlining
+      InlineFunctionInfo IFI;
+      if (!InlineFunction(*ModifyOp, IFI).isSuccess()) {
+        // Undo the attempt, since inlining failed
+        BinOp = false;
+        TrackReturn->eraseFromParent();
+        break;
+      }
+      ModifyOp = nullptr;
+      NewVal = TrackReturn->getOperand(0);
+      TrackReturn->eraseFromParent();
+      // NewVal might have been folded away by inlining so redo patternMatchAtomicRMWOp here
+      // tracing from RMW to NewVal, in case instsimplify folded something
+      Use *ValOp;
+      BinOp = patternMatchAtomicRMWOp(RMW, &ValOp, NewVal);
+      if (BinOp == decltype(BinOp)(true)) {
+        ModifyOp = cast<CallInst>(ValOp->getUser());
+        LoadedOp = ValOp;
+        assert(LoadedOp->get() == RMW);
+        RMW->moveBeforePreserving(ModifyOp->getIterator()); // NewValInst is a user of RMW, and RMW has no other dependants (per patternMatchAtomicRMWOp)
+        BinOp = false;
+        if (++attempts > 3)
+          break;
+        if (auto FOp = ModifyOp->getCalledFunction())
+          BinOp = patternMatchAtomicRMWOp(FOp->getArg(LoadedOp->getOperandNo()), nullptr, nullptr);
+        else
+          break;
+        if (BinOp == decltype(BinOp)(false))
+          break;
+      } else {
+        assert(BinOp != decltype(BinOp)(true));
+        auto RMWOp = std::get<AtomicRMWInst::BinOp>(BinOp);
+        assert(RMWOp != AtomicRMWInst::BAD_BINOP);
+        assert(isa<UndefValue>(RMW->getOperand(1))); // RMW was previously being used as the placeholder for Val
+        Value *Val;
+        if (ValOp != nullptr) {
+          RMW->moveBeforePreserving(cast<Instruction>(ValOp->getUser())->getIterator()); // ValOp is a user of RMW, and RMW has no other dependants (per patternMatchAtomicRMWOp)
+          Val = ValOp->get();
+        } else if (RMWOp == AtomicRMWInst::Xchg) {
+          Val = NewVal;
+        } else {
+          // convert to an atomic fence of the form: atomicrmw or %ptr, 0
+          assert(RMWOp == AtomicRMWInst::Or);
+          Val = ConstantInt::getNullValue(Ty);
+        }
+        RMW->setOperation(RMWOp);
+        RMW->setOperand(1, Val);
+        OldVal = RMW;
+        break;
+      }
+    }
+    if (BinOp == decltype(BinOp)(false)) {
+      LoadedOp->set(UndefValue::get(Ty));
+      RMW->eraseFromParent();
+    }
+  }
+
+  if (BinOp == decltype(BinOp)(false)) {
+    // FIXME: If FP exceptions are observable, we should force them off for the
+    // loop for the FP atomics.
+    std::tie(OldVal, NewVal) = insertRMWCmpXchgLoop(
+      Builder, Ty,  Ptr, *Alignment, Ordering, SSID, Modify,
+      [&](IRBuilderBase &Builder, Value *Loaded) JL_NOTSAFEPOINT {
+        LoadedOp->set(Loaded);
+        ModifyOp->moveBeforePreserving(*Builder.GetInsertBlock(), Builder.GetInsertPoint());
+        return ModifyOp;
+      },
+      CreateWeakCmpXchg);
+  }
+
+  for (auto user : make_early_inc_range(Modify.users())) {
+    if (auto EV = dyn_cast<ExtractValueInst>(user)) {
+      if (EV->getNumIndices() == 1) {
+        if (EV->use_empty()) {
+          EV->eraseFromParent();
+          continue;
+        }
+        else if (EV->getIndices()[0] == 0) {
+          EV->replaceAllUsesWith(OldVal);
+          EV->eraseFromParent();
+          continue;
+        } else if (EV->getIndices()[0] == 1) {
+          EV->replaceAllUsesWith(NewVal);
+          EV->eraseFromParent();
+          continue;
+        }
+      }
+    }
+  }
+  if (!Modify.use_empty()) {
+    auto OldNewVal = Builder.CreateInsertValue(UndefValue::get(Modify.getType()), OldVal, 0);
+    OldNewVal = Builder.CreateInsertValue(OldNewVal, NewVal, 1);
+    Modify.replaceAllUsesWith(OldNewVal);
+  }
+  Modify.eraseFromParent();
+}
+
+static bool expandAtomicModify(Function &F) {
+  SmallVector<CallInst*> AtomicInsts;
+
+  // Changing control-flow while iterating through it is a bad idea, so gather a
+  // list of all atomic instructions before we start.
+  for (Instruction &I : instructions(F))
+    if (auto CI = dyn_cast<CallInst>(&I)) {
+      auto callee = dyn_cast_or_null<Function>(CI->getCalledOperand());
+      if (callee && callee->getName().starts_with("julia.atomicmodify.")) {
+        assert(CI->getFunctionType() == callee->getFunctionType());
+        AtomicInsts.push_back(CI);
+      }
+    }
+
+  bool MadeChange = !AtomicInsts.empty();
+  for (auto *I : AtomicInsts)
+    expandAtomicModifyToCmpXchg(*I, createWeakCmpXchgInstFun);
+  return MadeChange;
+}
+
+PreservedAnalyses ExpandAtomicModifyPass::run(Function &F, FunctionAnalysisManager &AM)
+{
+    if (expandAtomicModify(F)) {
+        return PreservedAnalyses::none();
+    }
+    return PreservedAnalyses::all();
+}
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index e31bcb21199f5..48705e0c6934a 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -1,23 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm/ADT/Statistic.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#include "llvm-codegen-shared.h"
-#include "julia.h"
-#include "julia_internal.h"
-#include "llvm-pass-helpers.h"
+#include "llvm-gc-interface-passes.h"
 
 #define DEBUG_TYPE "final_gc_lowering"
 STATISTIC(NewGCFrameCount, "Number of lowered newGCFrameFunc intrinsics");
@@ -28,71 +11,26 @@ STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics");
 STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics");
 STATISTIC(SafepointCount, "Number of lowered safepoint intrinsics");
 
-using namespace llvm;
-
-// The final GC lowering pass. This pass lowers platform-agnostic GC
-// intrinsics to platform-dependent instruction sequences. The
-// intrinsics it targets are those produced by the late GC frame
-// lowering pass.
-//
-// This pass targets typical back-ends for which the standard Julia
-// runtime library is available. Atypical back-ends should supply
-// their own lowering pass.
-
-struct FinalLowerGC: private JuliaPassContext {
-    bool runOnFunction(Function &F);
-    bool doInitialization(Module &M);
-    bool doFinalization(Module &M);
-
-private:
-    Function *queueRootFunc;
-    Function *poolAllocFunc;
-    Function *bigAllocFunc;
-    Function *allocTypedFunc;
-    Instruction *pgcstack;
-    Type *T_size;
-
-    // Lowers a `julia.new_gc_frame` intrinsic.
-    Value *lowerNewGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.push_gc_frame` intrinsic.
-    void lowerPushGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.pop_gc_frame` intrinsic.
-    void lowerPopGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.get_gc_frame_slot` intrinsic.
-    Value *lowerGetGCFrameSlot(CallInst *target, Function &F);
-
-    // Lowers a `julia.gc_alloc_bytes` intrinsic.
-    Value *lowerGCAllocBytes(CallInst *target, Function &F);
-
-    // Lowers a `julia.queue_gc_root` intrinsic.
-    Value *lowerQueueGCRoot(CallInst *target, Function &F);
-
-    // Lowers a `julia.safepoint` intrinsic.
-    Value *lowerSafepoint(CallInst *target, Function &F);
-};
-
-Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
+void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
 {
     ++NewGCFrameCount;
     assert(target->arg_size() == 1);
     unsigned nRoots = cast<ConstantInt>(target->getArgOperand(0))->getLimitedValue(INT_MAX);
 
     // Create the GC frame.
-    IRBuilder<> builder(target->getNextNode());
+    IRBuilder<> builder(target);
     auto gcframe_alloca = builder.CreateAlloca(T_prjlvalue, ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2));
     gcframe_alloca->setAlignment(Align(16));
     // addrspacecast as needed for non-0 alloca addrspace
-    auto gcframe = cast<Instruction>(builder.CreateAddrSpaceCast(gcframe_alloca, T_prjlvalue->getPointerTo(0)));
+    auto gcframe = cast<Instruction>(builder.CreateAddrSpaceCast(gcframe_alloca, PointerType::getUnqual(T_prjlvalue->getContext())));
     gcframe->takeName(target);
 
     // Zero out the GC frame.
     auto ptrsize = F.getParent()->getDataLayout().getPointerSize();
     builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe);
 
-    return gcframe;
+    target->replaceAllUsesWith(gcframe);
+    target->eraseFromParent();
 }
 
 void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
@@ -102,27 +40,25 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
     auto gcframe = target->getArgOperand(0);
     unsigned nRoots = cast<ConstantInt>(target->getArgOperand(1))->getLimitedValue(INT_MAX);
 
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(&*(++BasicBlock::iterator(target)));
+    IRBuilder<> builder(target);
     StoreInst *inst = builder.CreateAlignedStore(
                 ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)),
-                builder.CreateBitCast(
-                        builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0),
-                        T_size->getPointerTo()),
+                builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0, "frame.nroots"),// GEP of 0 becomes a noop and eats the name
                 Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(F.getContext());
     inst = builder.CreateAlignedStore(
-            builder.CreateAlignedLoad(T_ppjlvalue, pgcstack, Align(sizeof(void*))),
+            builder.CreateAlignedLoad(T_ppjlvalue, pgcstack, Align(sizeof(void*)), "task.gcstack"),
             builder.CreatePointerCast(
-                    builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1),
-                    PointerType::get(T_ppjlvalue, 0)),
+                    builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1, "frame.prev"),
+                    PointerType::get(T_ppjlvalue->getContext(), 0)),
             Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
-    inst = builder.CreateAlignedStore(
+    builder.CreateAlignedStore(
             gcframe,
-            builder.CreateBitCast(pgcstack, PointerType::get(PointerType::get(T_prjlvalue, 0), 0)),
+            pgcstack,
             Align(sizeof(void*)));
+    target->eraseFromParent();
 }
 
 void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
@@ -131,21 +67,20 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
     assert(target->arg_size() == 1);
     auto gcframe = target->getArgOperand(0);
 
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(target);
+    IRBuilder<> builder(target);
     Instruction *gcpop =
         cast<Instruction>(builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1));
-    Instruction *inst = builder.CreateAlignedLoad(T_prjlvalue, gcpop, Align(sizeof(void*)));
+    Instruction *inst = builder.CreateAlignedLoad(T_prjlvalue, gcpop, Align(sizeof(void*)), "frame.prev");
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     inst = builder.CreateAlignedStore(
         inst,
-        builder.CreateBitCast(pgcstack,
-            PointerType::get(T_prjlvalue, 0)),
+        pgcstack,
         Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
+    target->eraseFromParent();
 }
 
-Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
+void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
 {
     ++GetGCFrameSlotCount;
     assert(target->arg_size() == 2);
@@ -153,8 +88,7 @@ Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
     auto index = target->getArgOperand(1);
 
     // Initialize an IR builder.
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(target);
+    IRBuilder<> builder(target);
 
     // The first two slots are reserved, so we'll add two to the index.
     index = builder.CreateAdd(index, ConstantInt::get(Type::getInt32Ty(F.getContext()), 2));
@@ -162,39 +96,37 @@ Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
     // Lower the intrinsic as a GEP.
     auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index);
     gep->takeName(target);
-    return gep;
+    target->replaceAllUsesWith(gep);
+    target->eraseFromParent();
 }
 
-Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
+void FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
 {
     ++QueueGCRootCount;
     assert(target->arg_size() == 1);
     target->setCalledFunction(queueRootFunc);
-    return target;
 }
 
-Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
+void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
 {
     ++SafepointCount;
     assert(target->arg_size() == 1);
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(target);
+    IRBuilder<> builder(target);
     Value* signal_page = target->getOperand(0);
-    Value* load = builder.CreateLoad(T_size, signal_page, true);
-    return load;
+    builder.CreateLoad(T_size, signal_page, true);
+    target->eraseFromParent();
 }
 
-Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
+void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
 {
     ++GCAllocBytesCount;
-    assert(target->arg_size() == 2);
+    assert(target->arg_size() == 3);
     CallInst *newI;
 
     IRBuilder<> builder(target);
-    builder.SetCurrentDebugLocation(target->getDebugLoc());
     auto ptls = target->getArgOperand(0);
-    Attribute derefAttr;
-
+    auto type = target->getArgOperand(2);
+    uint64_t derefBytes = 0;
     if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand(1))) {
         size_t sz = (size_t)CI->getZExtValue();
         // This is strongly architecture and OS dependent
@@ -203,235 +135,135 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
         if (offset < 0) {
             newI = builder.CreateCall(
                 bigAllocFunc,
-                { ptls, ConstantInt::get(T_size, sz + sizeof(void*)) });
-            derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*));
+                { ptls, ConstantInt::get(T_size, sz + sizeof(void*)), type });
+            if (sz > 0)
+                derefBytes = sz;
         }
         else {
             auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
             auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
-            newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize });
-            derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize);
+            newI = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize, type });
+            if (sz > 0)
+                derefBytes = sz;
         }
     } else {
         auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size);
-        size = builder.CreateAdd(size, ConstantInt::get(T_size, sizeof(void*)));
-        newI = builder.CreateCall(allocTypedFunc, { ptls, size, ConstantPointerNull::get(Type::getInt8PtrTy(F.getContext())) });
-        derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sizeof(void*));
+        // allocTypedFunc does not include the type tag in the allocation size!
+        newI = builder.CreateCall(allocTypedFunc, { ptls, size, type });
+        derefBytes = sizeof(void*);
     }
     newI->setAttributes(newI->getCalledFunction()->getAttributes());
-    newI->addRetAttr(derefAttr);
+    unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*));
+    newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align)));
+    if (derefBytes > 0)
+        newI->addDereferenceableRetAttr(derefBytes);
     newI->takeName(target);
-    return newI;
-}
-
-bool FinalLowerGC::doInitialization(Module &M) {
-    // Initialize platform-agnostic references.
-    initAll(M);
-
-    // Initialize platform-specific references.
-    queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
-    poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
-    bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
-    allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
-    T_size = M.getDataLayout().getIntPtrType(M.getContext());
-
-    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
-    unsigned j = 0;
-    for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
-        if (!functionList[i])
-            continue;
-        if (i != j)
-            functionList[j] = functionList[i];
-        j++;
-    }
-    if (j != 0)
-        appendToCompilerUsed(M, ArrayRef<GlobalValue*>(functionList, j));
-    return true;
+    target->replaceAllUsesWith(newI);
+    target->eraseFromParent();
 }
 
-bool FinalLowerGC::doFinalization(Module &M)
+static bool hasUse(const JuliaPassContext &ctx, const jl_intrinsics::IntrinsicDescription &v)
 {
-    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
-    queueRootFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr;
-    auto used = M.getGlobalVariable("llvm.compiler.used");
-    if (!used)
-        return false;
-    SmallPtrSet<Constant*, 16> InitAsSet(
-        functionList,
-        functionList + sizeof(functionList) / sizeof(void*));
-    bool changed = false;
-    SmallVector<Constant*, 16> init;
-    ConstantArray *CA = cast<ConstantArray>(used->getInitializer());
-    for (auto &Op : CA->operands()) {
-        Constant *C = cast_or_null<Constant>(Op);
-        if (InitAsSet.count(C->stripPointerCasts())) {
-            changed = true;
-            continue;
-        }
-        init.push_back(C);
-    }
-    if (!changed)
-        return false;
-    used->eraseFromParent();
-    if (init.empty())
-        return true;
-    ArrayType *ATy = ArrayType::get(Type::getInt8PtrTy(M.getContext()), init.size());
-    used = new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
-                                    ConstantArray::get(ATy, init), "llvm.compiler.used");
-    used->setSection("llvm.metadata");
-    return true;
+    auto Intr = ctx.getOrNull(v);
+    return Intr && !Intr->use_empty();
 }
 
-template<typename TIterator>
-static void replaceInstruction(
-    Instruction *oldInstruction,
-    Value *newInstruction,
-    TIterator &it)
+bool FinalLowerGC::shouldRunFinalGC()
 {
-    if (newInstruction != oldInstruction) {
-        oldInstruction->replaceAllUsesWith(newInstruction);
-        it = oldInstruction->eraseFromParent();
-    }
-    else {
-        ++it;
-    }
+    bool should_run = 0;
+    should_run |= hasUse(*this, jl_intrinsics::newGCFrame);
+    should_run |= hasUse(*this, jl_intrinsics::getGCFrameSlot);
+    should_run |= hasUse(*this, jl_intrinsics::pushGCFrame);
+    should_run |= hasUse(*this, jl_intrinsics::popGCFrame);
+    should_run |= hasUse(*this, jl_intrinsics::GCAllocBytes);
+    should_run |= hasUse(*this, jl_intrinsics::queueGCRoot);
+    should_run |= hasUse(*this, jl_intrinsics::safepoint);
+    return should_run;
 }
 
 bool FinalLowerGC::runOnFunction(Function &F)
 {
-    // Check availability of functions again since they might have been deleted.
-    initFunctions(*F.getParent());
-    if (!pgcstack_getter && !adoptthread_func) {
-        LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n");
-        return false;
-    }
-
-    // Look for a call to 'julia.get_pgcstack'.
+    initAll(*F.getParent());
     pgcstack = getPGCstack(F);
-    if (!pgcstack) {
-        LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n");
-        return false;
-    }
-    LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
+    if (!pgcstack || !shouldRunFinalGC())
+        goto verify_skip;
 
-    // Acquire intrinsic functions.
-    auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
-    auto pushGCFrameFunc = getOrNull(jl_intrinsics::pushGCFrame);
-    auto popGCFrameFunc = getOrNull(jl_intrinsics::popGCFrame);
-    auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot);
-    auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
-    auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
-    auto safepointFunc = getOrNull(jl_intrinsics::safepoint);
+    LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
+    queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
+    smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
+    bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
+    allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
+    T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
 
     // Lower all calls to supported intrinsics.
-    for (BasicBlock &BB : F) {
-        for (auto it = BB.begin(); it != BB.end();) {
-            auto *CI = dyn_cast<CallInst>(&*it);
-            if (!CI) {
-                ++it;
+    for (auto &BB : F) {
+        for (auto &I : make_early_inc_range(BB)) {
+            auto *CI = dyn_cast<CallInst>(&I);
+            if (!CI)
                 continue;
-            }
 
             Value *callee = CI->getCalledOperand();
             assert(callee);
 
-            if (callee == newGCFrameFunc) {
-                replaceInstruction(CI, lowerNewGCFrame(CI, F), it);
-            }
-            else if (callee == pushGCFrameFunc) {
-                lowerPushGCFrame(CI, F);
-                it = CI->eraseFromParent();
-            }
-            else if (callee == popGCFrameFunc) {
-                lowerPopGCFrame(CI, F);
-                it = CI->eraseFromParent();
-            }
-            else if (callee == getGCFrameSlotFunc) {
-                replaceInstruction(CI, lowerGetGCFrameSlot(CI, F), it);
-            }
-            else if (callee == GCAllocBytesFunc) {
-                replaceInstruction(CI, lowerGCAllocBytes(CI, F), it);
-            }
-            else if (callee == queueGCRootFunc) {
-                replaceInstruction(CI, lowerQueueGCRoot(CI, F), it);
-            }
-            else if (callee == safepointFunc) {
-                lowerSafepoint(CI, F);
-                it = CI->eraseFromParent();
-            }
-            else {
-                ++it;
-            }
+#define LOWER_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \
+            do { \
+                auto intrinsic = getOrNull(jl_intrinsics::INTRINSIC); \
+                if (intrinsic == callee) { \
+                    LOWER_INTRINSIC_FUNC(CI, F); \
+                } \
+            } while (0)
+
+            LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame);
+            LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot);
+            LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame);
+            LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame);
+            LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes);
+            LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot);
+            LOWER_INTRINSIC(safepoint, lowerSafepoint);
+
+#undef LOWER_INTRINSIC
         }
     }
-
     return true;
-}
-
-struct FinalLowerGCLegacy: public FunctionPass {
-    static char ID;
-    FinalLowerGCLegacy() : FunctionPass(ID), finalLowerGC(FinalLowerGC()) {}
-
-protected:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-    }
-
-private:
-    bool runOnFunction(Function &F) override;
-    bool doInitialization(Module &M) override;
-    bool doFinalization(Module &M) override;
-
-    FinalLowerGC finalLowerGC;
-};
+    // Verify that skipping was in fact correct
+    verify_skip:
+    #ifdef JL_VERIFY_PASSES
+        for (auto &BB : F) {
+            for (auto &I : make_early_inc_range(BB)) {
+                auto *CI = dyn_cast<CallInst>(&I);
+                if (!CI)
+                    continue;
 
-bool FinalLowerGCLegacy::runOnFunction(Function &F) {
-    return finalLowerGC.runOnFunction(F);
-}
-
-bool FinalLowerGCLegacy::doInitialization(Module &M) {
-    return finalLowerGC.doInitialization(M);
-}
-
-bool FinalLowerGCLegacy::doFinalization(Module &M) {
-    auto ret = finalLowerGC.doFinalization(M);
-#ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
-#endif
-    return ret;
+            Value *callee = CI->getCalledOperand();
+            assert(callee);
+            auto IS_INTRINSIC = [&](auto intrinsic) {
+                auto intrinsic2 = getOrNull(intrinsic);
+                if (intrinsic2 == callee) {
+                    errs() << "Final-GC-lowering didn't eliminate all intrinsics from '" << F.getName() << "', dumping entire module!\n\n";
+                    errs() << *F.getParent() << "\n";
+                    abort();
+                }
+            };
+            IS_INTRINSIC(jl_intrinsics::newGCFrame);
+            IS_INTRINSIC(jl_intrinsics::pushGCFrame);
+            IS_INTRINSIC(jl_intrinsics::popGCFrame);
+            IS_INTRINSIC(jl_intrinsics::getGCFrameSlot);
+            IS_INTRINSIC(jl_intrinsics::GCAllocBytes);
+            IS_INTRINSIC(jl_intrinsics::queueGCRoot);
+            IS_INTRINSIC(jl_intrinsics::safepoint);
+            }
+        }
+    #endif
+    return false;
 }
 
-
-PreservedAnalyses FinalLowerGCPass::run(Module &M, ModuleAnalysisManager &AM)
+PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
 {
-    auto finalLowerGC = FinalLowerGC();
-    bool modified = false;
-    modified |= finalLowerGC.doInitialization(M);
-    for (auto &F : M.functions()) {
-        if (F.isDeclaration())
-            continue;
-        modified |= finalLowerGC.runOnFunction(F);
-    }
-    modified |= finalLowerGC.doFinalization(M);
+    if (FinalLowerGC().runOnFunction(F)) {
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+        assert(!verifyLLVMIR(F));
 #endif
-    if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
 }
-
-char FinalLowerGCLegacy::ID = 0;
-static RegisterPass<FinalLowerGCLegacy> X("FinalLowerGC", "Final GC intrinsic lowering pass", false, false);
-
-Pass *createFinalLowerGCPass()
-{
-    return new FinalLowerGCLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddFinalLowerGCPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createFinalLowerGCPass());
-}
diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h
new file mode 100644
index 0000000000000..0d21ea0a66cd8
--- /dev/null
+++ b/src/llvm-gc-interface-passes.h
@@ -0,0 +1,419 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  LLVM passes that may be partially modified by a third-party GC implementation.
+*/
+
+#include "llvm-version.h"
+#include "passes.h"
+
+#include "llvm/IR/DerivedTypes.h"
+#include <llvm-c/Core.h>
+#include <llvm-c/Types.h>
+
+#include <llvm/ADT/Statistic.h>
+#include <llvm/ADT/BitVector.h>
+#include <llvm/ADT/SparseBitVector.h>
+#include <llvm/ADT/PostOrderIterator.h>
+#include <llvm/ADT/SetVector.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/SmallSet.h>
+#include <llvm/Analysis/CFG.h>
+#include <llvm/Analysis/DomTreeUpdater.h>
+#include <llvm/Analysis/InstSimplifyFolder.h>
+#include <llvm/IR/Value.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/Dominators.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/ModuleSlotTracker.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Pass.h>
+#include <llvm/Support/Debug.h>
+#include <llvm/Transforms/Utils/BasicBlockUtils.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+
+#include <llvm/InitializePasses.h>
+
+#include "llvm-codegen-shared.h"
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+#include "llvm-pass-helpers.h"
+#include <map>
+#include <string>
+#include <optional>
+
+#ifndef LLVM_GC_PASSES_H
+#define LLVM_GC_PASSES_H
+
+using namespace llvm;
+
+/* Julia GC Root Placement pass. For a general overview of the design of GC
+   root lowering, see the devdocs. This file is the actual implementation.
+
+   The actual algorithm is fairly straightforward. First recall the goal of this
+   pass:
+
+   Minimize the number of needed gc roots/stores to them subject to the constraint
+   that at every safepoint, any live gc-tracked pointer (i.e. for which there is
+   a path after this point that contains a use of this pointer) is in some gc slot.
+
+   In particular, in order to understand this algorithm, it is important to
+   realize that the only places where rootedness matters is at safepoints.
+
+   Now, the primary phases of the algorithm are:
+
+   1. Local Scan
+
+      During this step, each Basic Block is inspected and analyzed for local
+      properties. In particular, we want to determine the ordering of any of
+      the following activities:
+
+        - Any Def of a gc-tracked pointer. In general Defs are the results of
+          calls or loads from appropriate memory locations. Phi nodes and
+          selects do complicate this story slightly as described below.
+        - Any use of a gc-tracked or derived pointer. As described in the
+          devdocs, a use is in general one of
+              a) a load from a tracked/derived value
+              b) a store to a tracked/derived value
+              c) a store OF a tracked/derived value
+              d) a use of a value as a call operand (including operand bundles)
+        - Any safepoint
+
+      Crucially, we also perform pointer numbering during the local scan,
+      assigning every Def a unique integer and caching the integer for each
+      derived pointer. This allows us to operate only on the set of Defs (
+      represented by these integers) for the rest of the algorithm. We also
+      maintain some local utility information that is needed by later passes
+      (see the BBState struct for details).
+
+    2. Dataflow Computation
+
+      This computation operates entirely over the function's control flow graph
+      and does not look into a basic block. The algorithm is essentially
+      textbook iterative data flow for liveness computation. However, the
+      data flow equations are slightly more complicated because we also
+      forward propagate rootedness information in addition to backpropagating
+      liveness.
+
+    3. Live Set Computation
+
+      With the liveness information from the previous step, we can now compute,
+      for every safepoint, the set of values live at that particular safepoint.
+      There are three pieces of information being combined here:
+           i. Values that needed to be live due to local analysis (e.g. there
+              was a def, then a safepoint, then a use). This was computed during
+              local analysis.
+          ii. Values that are live across the basic block (i.e. they are live
+              at every safepoint within the basic block). This relies entirely
+              on the liveness information.
+         iii. Values that are now live-out from the basic block (i.e. they are
+              live at every safepoint following their def). During local
+              analysis, we keep, for every safepoint, those values that would
+              be live if they were live out. Here we can check if they are
+              actually live-out and make the appropriate additions to the live
+              set.
+
+       Lastly, we also explicitly compute, for each value, the list of values
+       that are simultaneously live at some safepoint. This is known as an
+       "interference graph" and is the input to the next step.
+
+    4. GC Root coloring
+
+      Two values which are not simultaneously live at a safepoint can share the
+      same slot. This is an important optimization, because otherwise long
+      functions would have exceptionally large GC slots, reducing performance
+      and bloating the size of the stack. Assigning values to these slots is
+      equivalent to doing graph coloring on the interference graph - the graph
+      where nodes are values and two values have an edge if they are
+      simultaneously live at a safepoint - which we computed in the previous
+      step. Now graph coloring in general is a hard problem. However, for SSA
+      form programs, (and most programs in general, by virtue of their
+      structure), the resulting interference graphs are chordal and can be
+      colored optimally in linear time by performing greedy coloring in a
+      perfect elimination order. Now, our interference graphs are likely not
+      entirely chordal due to some non-SSA corner cases. However, using the same
+      algorithm should still give a very good coloring while having sufficiently
+      low runtime.
+
+    5. JLCall frame optimizations
+
+      Unlike earlier iterations of the gc root placement logic, jlcall frames
+      are no longer treated as a special case and need not necessarily be sunk
+      into the gc frame. Additionally, we now emit lifetime
+      intrinsics, so regular stack slot coloring will merge any jlcall frames
+      not sunk into the gc frame. Nevertheless performing such sinking can still
+      be profitable. Since all arguments to a jlcall are guaranteed to be live
+      at that call in some gc slot, we can attempt to rearrange the slots within
+      the gc-frame, or reuse slots not assigned at that particular location
+      for the gcframe. However, even without this optimization, stack frames
+      are at most two times larger than optimal (because regular stack coloring
+      can merge the jlcall allocas).
+
+      N.B.: This step is not yet implemented.
+
+    6. Root placement
+
+      This performs the actual insertion of the GCFrame pushes/pops, zeros out
+      the gc frame and creates the stores to the gc frame according to the
+      stack slot assignment computed in the previous step. GC frames stores
+      are generally sunk right before the first safe point that use them
+      (this is beneficial for code where the primary path does not have
+      safepoints, but some other path - e.g. the error path does). However,
+      if the first safepoint is not dominated by the definition (this can
+      happen due to the non-ssa corner cases), the store is inserted right after
+      the definition.
+
+    7. Cleanup
+
+      This step performs necessary cleanup before passing the IR to codegen. In
+      particular, it removes any calls to julia_from_objref intrinsics and
+      removes the extra operand bundles from ccalls. In the future it could
+      also strip the addrspace information from all values as this
+      information is no longer needed.
+
+
+  There are a couple important special cases that deserve special attention:
+
+    A. PHIs and Selects
+
+      In general PHIs and selects are treated as separate defs for the purposes
+      of the algorithm and their operands as uses of those values. It is
+      important to consider however WHERE the uses of PHI's operands are
+      located. It is neither at the start of the basic block, because the values
+      do not dominate the block (so can't really consider them live-in), nor
+      at the end of the predecessor (because they are actually live out).
+      Instead it is best to think of those uses as living on the edge between
+      the appropriate predecessor and the block containing the PHI.
+
+      Another concern is PHIs of derived values. Since we cannot simply root
+      these values by storing them to a GC slot, we need to insert a new,
+      artificial PHI that tracks the base pointers for the derived values. E.g.
+      in:
+
+      A:
+        %Abase = load addrspace(10) *...
+        %Aderived = addrspacecast %Abase to addrspace(11)
+      B:
+        %Bbase = load addrspace(10) *...
+        %Bderived = addrspacecast %Bbase to addrspace(11)
+      C:
+        %phi = phi [%Aderived, %A
+                    %Bderived, %B]
+
+      we will insert another phi in C to track the relevant base pointers:
+
+        %philift = phi [%Abase, %A
+                        %Bbase, %B]
+
+      We then pretend, for the purposes of numbering that %phi was derived from
+      %philift. Note that in order to be able to do this, we need to be able to
+      perform this lifting either during numbering or instruction scanning.
+
+    B. Vectors of pointers/Union representations
+
+      Since this pass runs very late in the pass pipeline, it runs after the
+      various vectorization passes. As a result, we have to potentially deal
+      with vectors of gc-tracked pointers. For the purposes of most of the
+      algorithm, we simply assign every element of the vector a separate number
+      and no changes are needed. However, those parts of the algorithm that
+      look at IR need to be aware of the possibility of encountering vectors of
+      pointers.
+
+      Similarly, unions (e.g. in call returns) are represented as a struct of
+      a gc-tracked value and an argument selector. We simply assign a single
+      number to this struct and proceed as if it was a single pointer. However,
+      this again requires care at the IR level.
+
+    C. Non mem2reg'd allocas
+
+      Under some circumstances, allocas will still be present in the IR when
+      we get to this pass. We don't try very hard to handle this case, and
+      simply sink the alloca into the GCFrame.
+*/
+
+// 4096 bits == 64 words (64 bit words). Larger bit numbers are faster and doing something
+// substantially smaller here doesn't actually save much memory because of malloc overhead.
+// Too large is bad also though - 4096 was found to be a reasonable middle ground.
+using LargeSparseBitVector = SparseBitVector<4096>;
+
+struct BBState {
+    // Uses in this BB
+    // These do not get updated after local analysis
+    LargeSparseBitVector Defs;
+    LargeSparseBitVector PhiOuts;
+    LargeSparseBitVector UpExposedUses;
+    // These get updated during dataflow
+    LargeSparseBitVector LiveIn;
+    LargeSparseBitVector LiveOut;
+    // auto Safepoints = std::range(LastSafepoint, FirstSafepoint);
+    bool HasSafepoint = false;
+    // This lets us refine alloca tracking to avoid creating GC frames in
+    // some simple functions that only have the initial safepoint.
+    int FirstSafepoint = -1;
+    int LastSafepoint = -1;
+    int FirstSafepointAfterFirstDef = -1;
+};
+
+struct State {
+    Function *const F;
+    DominatorTree *DT;
+
+    // The maximum assigned value number
+    int MaxPtrNumber;
+    // The maximum assigned safepoint number
+    int MaxSafepointNumber;
+    // Cache of numbers assigned to IR values. This includes caching of numbers
+    // for derived values
+    std::map<Value *, int> AllPtrNumbering;
+    std::map<Value *, SmallVector<int, 0>> AllCompositeNumbering;
+    // The reverse of the previous maps
+    std::map<int, Value *> ReversePtrNumbering;
+    // Neighbors in the coloring interference graph. I.e. for each value, the
+    // indices of other values that are used simultaneously at some safe point.
+    SmallVector<LargeSparseBitVector, 0> Neighbors;
+    // The result of the local analysis
+    std::map<const BasicBlock *, BBState> BBStates;
+
+    // Refinement map. If all of the values are rooted
+    // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
+    // the key is already rooted (but not the other way around).
+    // A value that can be refined to -2 never need any rooting or write barrier.
+    // A value that can be refined to -1 don't need local root but still need write barrier.
+    // At the end of `LocalScan` this map has a few properties
+    // 1. Values are either < 0 or dominates the key
+    // 2. Therefore this is a DAG
+    std::map<int, SmallVector<int, 1>> Refinements;
+
+    // GC preserves map. All safepoints dominated by the map key, but not any
+    // of its uses need to preserve the values listed in the map value.
+    std::map<Instruction *, SmallVector<int, 0>> GCPreserves;
+
+    // The assignment of numbers to safepoints. These have the same ordering as
+    // LiveSets, LiveIfLiveOut, and CalleeRoots.
+    SmallVector<Instruction*, 0> SafepointNumbering;
+
+    // Safepoint number of instructions that can return twice. For now, all
+    // values live at these instructions will get their own, dedicated GC frame
+    // slots, because they have unobservable control flow, so we can't be sure
+    // where they're actually live.
+    SmallVector<int, 0> ReturnsTwice;
+
+    // The set of values live at a particular safepoint
+    SmallVector<LargeSparseBitVector, 0> LiveSets;
+    // Those values that - if live out from our parent basic block - are live
+    // at this safepoint.
+    SmallVector<SmallVector<int, 0>> LiveIfLiveOut;
+    // The set of values that are kept alive by the callee.
+    SmallVector<SmallVector<int, 0>> CalleeRoots;
+    // We don't bother doing liveness on Allocas that were not mem2reg'ed.
+    // they just get directly sunk into the root array.
+    DenseMap<AllocaInst *, unsigned> ArrayAllocas;
+    DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
+    SmallVector<std::pair<StoreInst *, unsigned>, 0> TrackedStores;
+    State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
+};
+
+
+struct LateLowerGCFrame:  private JuliaPassContext {
+    function_ref<DominatorTree &()> GetDT;
+    LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
+
+public:
+    bool runOnFunction(Function &F, bool *CFGModified = nullptr);
+
+private:
+    Value *pgcstack;
+    Function *smallAllocFunc;
+
+    bool MaybeNoteDef(State &S, BBState &BBS, Value *Def,
+                      SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
+    void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses, Function &F);
+    void NoteUse(State &S, BBState &BBS, Value *V, Function &F) {
+        NoteUse(S, BBS, V, BBS.UpExposedUses, F);
+    }
+
+    void LiftPhi(State &S, PHINode *Phi);
+    void LiftSelect(State &S, SelectInst *SI);
+    Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
+    SmallVector<Value*, 0> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
+    Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
+
+    int Number(State &S, Value *V);
+    int NumberBase(State &S, Value *Base);
+    SmallVector<int, 0> NumberAll(State &S, Value *V);
+    SmallVector<int, 0> NumberAllBase(State &S, Value *Base);
+
+    void NoteOperandUses(State &S, BBState &BBS, Instruction &UI);
+    void MaybeTrackDst(State &S, MemTransferInst *MI);
+    void MaybeTrackStore(State &S, StoreInst *I);
+    State LocalScan(Function &F);
+    void ComputeLiveness(State &S);
+    void ComputeLiveSets(State &S);
+    std::pair<SmallVector<int, 0>, int> ColorRoots(const State &S);
+    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
+    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, ArrayRef<int> Colors, int PreAssignedColors, Value *GCFrame);
+    void PlaceGCFrameReset(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
+    void PlaceRootsAndUpdateCalls(ArrayRef<int> Colors, int PreAssignedColors, State &S, std::map<Value *, std::pair<int, int>>);
+    void CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified);
+    bool CleanupIR(Function &F, State *S, bool *CFGModified);
+    void NoteUseChain(State &S, BBState &BBS, User *TheUser);
+    SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
+    void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
+    void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots);
+    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
+    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
+    Value* lowerGCAllocBytesLate(CallInst *target, Function &F);
+};
+
+// The final GC lowering pass. This pass lowers platform-agnostic GC
+// intrinsics to platform-dependent instruction sequences. The
+// intrinsics it targets are those produced by the late GC frame
+// lowering pass.
+//
+// This pass targets typical back-ends for which the standard Julia
+// runtime library is available. Atypical back-ends should supply
+// their own lowering pass.
+
+struct FinalLowerGC: private JuliaPassContext {
+    bool runOnFunction(Function &F);
+
+private:
+    Function *queueRootFunc;
+    Function *smallAllocFunc;
+    Function *bigAllocFunc;
+    Function *allocTypedFunc;
+    Value *pgcstack;
+    Type *T_size;
+
+    // Lowers a `julia.new_gc_frame` intrinsic.
+    void lowerNewGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.push_gc_frame` intrinsic.
+    void lowerPushGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.pop_gc_frame` intrinsic.
+    void lowerPopGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.get_gc_frame_slot` intrinsic.
+    void lowerGetGCFrameSlot(CallInst *target, Function &F);
+
+    // Lowers a `julia.gc_alloc_bytes` intrinsic.
+    void lowerGCAllocBytes(CallInst *target, Function &F);
+
+    // Lowers a `julia.queue_gc_root` intrinsic.
+    void lowerQueueGCRoot(CallInst *target, Function &F);
+
+    // Lowers a `julia.safepoint` intrinsic.
+    void lowerSafepoint(CallInst *target, Function &F);
+
+    // Check if the pass should be run
+    bool shouldRunFinalGC();
+};
+
+#endif // LLVM_GC_PASSES_H
diff --git a/src/llvm-gc-invariant-verifier.cpp b/src/llvm-gc-invariant-verifier.cpp
index 26288dc09379d..5badbca807569 100644
--- a/src/llvm-gc-invariant-verifier.cpp
+++ b/src/llvm-gc-invariant-verifier.cpp
@@ -14,7 +14,6 @@
 #include <llvm/Analysis/CFG.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/Constants.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Dominators.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
@@ -63,8 +62,8 @@ struct GCInvariantVerifier : public InstVisitor<GCInvariantVerifier> {
 };
 
 void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
-    unsigned FromAS = cast<PointerType>(I.getSrcTy())->getAddressSpace();
-    unsigned ToAS = cast<PointerType>(I.getDestTy())->getAddressSpace();
+    unsigned FromAS = I.getSrcTy()->getPointerAddressSpace();
+    unsigned ToAS = I.getDestTy()->getPointerAddressSpace();
     if (FromAS == 0)
         return;
     Check(ToAS != AddressSpace::Loaded && FromAS != AddressSpace::Loaded,
@@ -79,10 +78,10 @@ void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
 }
 
 void GCInvariantVerifier::checkStoreInst(Type *VTy, unsigned AS, Value &SI) {
-    if (VTy->isPointerTy()) {
+    if (VTy->isPtrOrPtrVectorTy()) {
         /* We currently don't obey this for arguments. That's ok - they're
            externally rooted. */
-        unsigned AS = cast<PointerType>(VTy)->getAddressSpace();
+        unsigned AS = VTy->getPointerAddressSpace();
         Check(AS != AddressSpace::CalleeRooted &&
               AS != AddressSpace::Derived,
               "Illegal store of decayed value", &SI);
@@ -108,15 +107,15 @@ void GCInvariantVerifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI) {
 
 void GCInvariantVerifier::visitLoadInst(LoadInst &LI) {
     Type *Ty = LI.getType();
-    if (Ty->isPointerTy()) {
-        unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
+    if (Ty->isPtrOrPtrVectorTy()) {
+        unsigned AS = Ty->getPointerAddressSpace();
         Check(AS != AddressSpace::CalleeRooted &&
               AS != AddressSpace::Derived,
               "Illegal load of gc relevant value", &LI);
     }
     Ty = LI.getPointerOperand()->getType();
-    if (Ty->isPointerTy()) {
-        unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
+    if (Ty->isPtrOrPtrVectorTy()) {
+        unsigned AS = Ty->getPointerAddressSpace();
         Check(AS != AddressSpace::CalleeRooted,
               "Illegal load of callee rooted value", &LI);
     }
@@ -130,18 +129,18 @@ void GCInvariantVerifier::visitReturnInst(ReturnInst &RI) {
     if (!RI.getReturnValue())
         return;
     Type *RTy = RI.getReturnValue()->getType();
-    if (!RTy->isPointerTy())
+    if (!RTy->isPtrOrPtrVectorTy())
         return;
-    unsigned AS = cast<PointerType>(RTy)->getAddressSpace();
+    unsigned AS = RTy->getPointerAddressSpace();
     Check(!isSpecialAS(AS) || AS == AddressSpace::Tracked,
           "Only gc tracked values may be directly returned", &RI);
 }
 
 void GCInvariantVerifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     Type *Ty = GEP.getType();
-    if (!Ty->isPointerTy())
+    if (!Ty->isPtrOrPtrVectorTy())
         return;
-    unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
+    unsigned AS = Ty->getPointerAddressSpace();
     if (!isSpecialAS(AS))
         return;
     /* We're actually ok with GEPs here, as long as they don't feed into any
@@ -162,13 +161,18 @@ void GCInvariantVerifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 void GCInvariantVerifier::visitCallInst(CallInst &CI) {
     Function *Callee = CI.getCalledFunction();
     if (Callee && (Callee->getName() == "julia.call" ||
-                   Callee->getName() == "julia.call2")) {
-        bool First = true;
+                   Callee->getName() == "julia.call2" ||
+                   Callee->getName() == "julia.call3")) {
+        unsigned Fixed = CI.getFunctionType()->getNumParams();
         for (Value *Arg : CI.args()) {
+            if (Fixed) {
+                Fixed--;
+                continue;
+            }
             Type *Ty = Arg->getType();
-            Check(Ty->isPointerTy() && cast<PointerType>(Ty)->getAddressSpace() == (First ? 0 : AddressSpace::Tracked),
-                "Invalid derived pointer in jlcall", &CI);
-            First = false;
+            Check(Ty->isPtrOrPtrVectorTy() &&
+                      Ty->getPointerAddressSpace() == AddressSpace::Tracked,
+                  "Invalid derived pointer in jlcall", &CI);
         }
     }
 }
@@ -193,37 +197,3 @@ PreservedAnalyses GCInvariantVerifierPass::run(Function &F, FunctionAnalysisMana
     }
     return PreservedAnalyses::all();
 }
-
-struct GCInvariantVerifierLegacy : public FunctionPass {
-    static char ID;
-    bool Strong;
-    GCInvariantVerifierLegacy(bool Strong=false) : FunctionPass(ID), Strong(Strong) {}
-
-public:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.setPreservesAll();
-    }
-
-    bool runOnFunction(Function &F) override {
-        GCInvariantVerifier GIV(Strong);
-        GIV.visit(F);
-        if (GIV.Broken) {
-            abort();
-        }
-        return false;
-    }
-};
-
-char GCInvariantVerifierLegacy::ID = 0;
-static RegisterPass<GCInvariantVerifierLegacy> X("GCInvariantVerifier", "GC Invariant Verification Pass", false, false);
-
-Pass *createGCInvariantVerifierPass(bool Strong) {
-    return new GCInvariantVerifierLegacy(Strong);
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddGCInvariantVerifierPass_impl(LLVMPassManagerRef PM, LLVMBool Strong)
-{
-    unwrap(PM)->add(createGCInvariantVerifierPass(Strong));
-}
diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp
index 8e03fe434a79c..2fb17f26eb694 100644
--- a/src/llvm-julia-licm.cpp
+++ b/src/llvm-julia-licm.cpp
@@ -13,7 +13,6 @@
 #include <llvm/Analysis/ScalarEvolution.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/IR/Dominators.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Transforms/Utils/LoopUtils.h>
 
@@ -60,7 +59,11 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest,
                                   MemorySSAUpdater &MSSAU,
                                   ScalarEvolution *SE,
                                   MemorySSA::InsertionPlace Place = MemorySSA::BeforeTerminator) {
+#if JL_LLVM_VERSION >= 200000
+  I.moveBefore(Dest.getIterator());
+#else
   I.moveBefore(&Dest);
+#endif
   if (MSSAU.getMemorySSA())
     if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
             MSSAU.getMemorySSA()->getMemoryAccess(&I)))
@@ -123,17 +126,6 @@ static bool makeLoopInvariant(Loop *L, Value *V, bool &Changed, Instruction *Ins
   return true; // All non-instructions are loop-invariant.
 }
 
-struct JuliaLICMPassLegacy : public LoopPass {
-    static char ID;
-    JuliaLICMPassLegacy() : LoopPass(ID) {};
-
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
-    protected:
-        void getAnalysisUsage(AnalysisUsage &AU) const override {
-            getLoopAnalysisUsage(AU);
-        }
-};
 struct JuliaLICM : public JuliaPassContext {
     function_ref<DominatorTree &()> GetDT;
     function_ref<LoopInfo &()> GetLI;
@@ -250,7 +242,11 @@ struct JuliaLICM : public JuliaPassContext {
                     });
                     for (unsigned i = 1; i < exit_pts.size(); i++) {
                         // Clone exit
+#if JL_LLVM_VERSION >= 200000
+                        auto CI = CallInst::Create(call, {}, exit_pts[i]->getIterator());
+#else
                         auto CI = CallInst::Create(call, {}, exit_pts[i]);
+#endif
                         exit_pts[i] = CI;
                         createNewInstruction(CI, call, MSSAU);
                         LLVM_DEBUG(dbgs() << "Cloned and sunk gc_preserve_end: " << *CI << "\n");
@@ -342,10 +338,9 @@ struct JuliaLICM : public JuliaPassContext {
                     moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
                     IRBuilder<> builder(preheader->getTerminator());
                     builder.SetCurrentDebugLocation(call->getDebugLoc());
-                    auto obj_i8 = builder.CreateBitCast(call, Type::getInt8PtrTy(call->getContext(), call->getType()->getPointerAddressSpace()));
                     // Note that this alignment is assuming the GC allocates at least pointer-aligned memory
                     auto align = Align(DL.getPointerSize(0));
-                    auto clear_obj = builder.CreateMemSet(obj_i8, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align);
+                    auto clear_obj = builder.CreateMemSet(call, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align);
                     if (MSSAU.getMemorySSA()) {
                         auto clear_mdef = MSSAU.createMemoryAccessInBB(clear_obj, nullptr, clear_obj->getParent(), MemorySSA::BeforeTerminator);
                         MSSAU.insertDef(cast<MemoryDef>(clear_mdef), true);
@@ -355,37 +350,15 @@ struct JuliaLICM : public JuliaPassContext {
             }
         }
         if (changed && SE) {
-            SE->forgetLoopDispositions(L);
+            SE->forgetLoopDispositions();
         }
 #ifdef JL_VERIFY_PASSES
-        assert(!verifyFunction(*L->getHeader()->getParent(), &errs()));
+        assert(!verifyLLVMIR(*L));
 #endif
         return changed;
     }
 };
 
-bool JuliaLICMPassLegacy::runOnLoop(Loop *L, LPPassManager &LPM) {
-    OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
-    auto GetDT = [this]() -> DominatorTree & {
-        return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    };
-    auto GetLI = [this]() -> LoopInfo & {
-        return getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-    };
-    auto GetMSSA = []() {
-        return nullptr;
-    };
-    auto GetSE = []() {
-        return nullptr;
-    };
-    auto juliaLICM = JuliaLICM(GetDT, GetLI, GetMSSA, GetSE);
-    return juliaLICM.runOnLoop(L, ORE);
-}
-
-char JuliaLICMPassLegacy::ID = 0;
-static RegisterPass<JuliaLICMPassLegacy>
-        Y("JuliaLICM", "LICM for julia specific intrinsics.",
-          false, false);
 } //namespace
 
 PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -417,14 +390,3 @@ PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
     }
     return PreservedAnalyses::all();
 }
-
-Pass *createJuliaLICMPass()
-{
-    return new JuliaLICMPassLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraJuliaLICMPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createJuliaLICMPass());
-}
diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc
index 39030d60a44fc..bd223499f37af 100644
--- a/src/llvm-julia-passes.inc
+++ b/src/llvm-julia-passes.inc
@@ -1,27 +1,26 @@
 //Module passes
 #ifdef MODULE_PASS
-MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass())
-MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass())
-MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass())
-MODULE_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass())
-MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass())
-MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass())
-MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass())
-MODULE_PASS("LowerPTLSPass", LowerPTLSPass, LowerPTLSPass())
+MODULE_PASS("CPUFeatures", CPUFeaturesPass())
+MODULE_PASS("RemoveNI", RemoveNIPass())
+MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass())
+MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass())
+MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass())
+MODULE_PASS("LowerPTLSPass", LowerPTLSPass())
 #endif
 
 //Function passes
 #ifdef FUNCTION_PASS
-FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass, DemoteFloat16Pass())
-FUNCTION_PASS("CombineMulAdd", CombineMulAddPass, CombineMulAddPass())
-FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass, LateLowerGCPass())
-FUNCTION_PASS("AllocOpt", AllocOptPass, AllocOptPass())
-FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass, PropagateJuliaAddrspacesPass())
-FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass, LowerExcHandlersPass())
-FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass, GCInvariantVerifierPass())
+FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass())
+FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass())
+FUNCTION_PASS("AllocOpt", AllocOptPass())
+FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass())
+FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass())
+FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass())
+FUNCTION_PASS("ExpandAtomicModify", ExpandAtomicModifyPass())
 #endif
 
 //Loop passes
 #ifdef LOOP_PASS
-LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass())
+LOOP_PASS("JuliaLICM", JuliaLICMPass())
+LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass())
 #endif
diff --git a/src/llvm-julia-task-dispatcher.h b/src/llvm-julia-task-dispatcher.h
new file mode 100644
index 0000000000000..dd4037378b6b6
--- /dev/null
+++ b/src/llvm-julia-task-dispatcher.h
@@ -0,0 +1,465 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+namespace {
+
+using namespace llvm::orc;
+
+template <typename U> struct future_value_storage {
+  // Union disables default construction/destruction semantics, allowing us to
+  // use placement new/delete for precise control over value lifetime
+  union {
+    U value_;
+  };
+
+  future_value_storage() {}
+  ~future_value_storage() {}
+};
+
+template <> struct future_value_storage<void> {
+  // No value_ member for void
+};
+
+struct JuliaTaskDispatcher : public TaskDispatcher {
+    /// Forward declarations
+    class future_base;
+    void dispatch(std::unique_ptr<Task> T) override;
+    void shutdown() override;
+    void work_until(future_base &F);
+private:
+  /// C++ does not support non-static thread_local variables, so this needs to
+  /// store both the task and the associated dispatcher queue so that shutdown
+  /// can wait for the correct tasks to finish.
+  thread_local static SmallVector<std::pair<std::unique_ptr<Task>, JuliaTaskDispatcher*>> TaskQueue;
+  std::mutex DispatchMutex;
+  std::condition_variable WorkFinishedCV;
+  SmallVector<future_base *> WaitingFutures;
+
+public:
+
+/// @name ORC Promise/Future Classes
+///
+/// ORC-aware promise/future implementation that integrates with the
+/// TaskDispatcher system to allow efficient cooperative multitasking while
+/// waiting for results (with certain limitations on what can be awaited).
+/// Together they provide building blocks for a full async/await-like runtime
+/// for llvm that supports multiple threads.
+///
+/// Unlike std::promise/std::future alone, these classes can help dispatch other
+/// tasks while waiting, preventing deadlocks and improving overall system
+/// throughput. They have a similar API, though with some important differences
+/// and some features simply not currently implemented.
+///
+/// @{
+
+template <typename T> class promise;
+template <typename T> class future;
+
+/// Status for future/promise state
+enum class FutureStatus : uint8_t { NotReady = 0, Ready = 1 };
+
+/// @}
+
+/// Type-erased base class for futures, generally for scheduler use to avoid
+/// needing virtual dispatches
+class future_base {
+public:
+  /// Check if the future is now ready with a value (precondition: get_promise()
+  /// must have been called)
+  bool ready() const {
+    if (!valid())
+      report_fatal_error("ready() called before get_promise()");
+    return state_->status_.load(std::memory_order_acquire) == FutureStatus::Ready;
+  }
+
+  /// Check if the future is in a valid state (not moved-from and get_promise() called)
+  bool valid() const { return state_ != nullptr; }
+
+  /// Wait for the future to be ready, helping with task dispatch
+  void wait(JuliaTaskDispatcher &D) {
+    // Keep helping with task dispatch until our future is ready
+    if (!ready()) {
+      D.work_until(*this);
+      if (state_->status_.load(std::memory_order_relaxed) != FutureStatus::Ready)
+        report_fatal_error(
+            "work_until() returned without this future being ready");
+    }
+  }
+
+protected:
+  struct state_base {
+    std::atomic<FutureStatus> status_{FutureStatus::NotReady};
+  };
+
+  future_base(state_base *state) : state_(state) {}
+  future_base() = default;
+
+  /// Only allow deleting the future once it is invalid
+  ~future_base() {
+    if (state_)
+      report_fatal_error("get() must be called before future destruction (ensuring promise::set_value memory is valid)");
+  }
+
+  // Move constructor and assignment
+  future_base(future_base &&other) noexcept : state_(other.state_) {
+    other.state_ = nullptr;
+  }
+
+  future_base &operator=(future_base &&other) noexcept {
+    if (this != &other) {
+      this->~future_base();
+      state_ = other.state_;
+      other.state_ = nullptr;
+    }
+    return *this;
+  }
+
+  state_base *state_;
+};
+
+/// TaskDispatcher-aware future class for cooperative await.
+///
+/// @tparam T The type of value this future will provide. Use void for futures
+/// that
+///           signal completion without providing a value.
+///
+/// This future implementation is similar to `std::future`, so most code can
+/// transition to it easily. However, it differs from `std::future` in a few
+/// key ways to be aware of:
+/// - No exception support (or the overhead for it).
+/// - The future is created before the promise, then the promise is created
+///   from the future.
+/// - The future is in an invalid state until get_promise() has been called.
+/// - Waiting operations (get(&D), wait(&D)) help dispatch other tasks while
+///   blocked, requiring an additional argument of which TaskDispatcher object
+///   of where all associated work will be scheduled.
+/// - While `wait` may be called multiple times and on multiple threads, all of
+///   them must have returned before calling `get` on exactly one thread.
+/// - Must call get() exactly once before destruction (enforced with
+///   `report_fatal_error`) after each call to `get_promise`. Internal state is
+///   freed when `get` returns, and allocated when `get_promise` is called.
+///
+/// Other notable features, in common with `std::future`:
+/// - Supports both value types and void specialization through the same
+/// interface.
+/// - Thread-safe through atomic operations.
+/// - Provides acquire-release ordering with `std::promise::set_value()`.
+/// - Concurrent access to any method (including to `ready`) on multiple threads
+///   is not allowed.
+/// - Holding any locks while calling `get()` is likely to lead to deadlock.
+///
+/// @warning Users should avoid borrowing references to futures. References may
+/// go out of scope and break the uniqueness contract, which may break the
+/// soundness of the types. Always use move semantics or pass by value.
+
+template <typename T> class future : public future_base {
+public:
+  future() : future_base(nullptr) {}
+  future(const future &) = delete;
+  future &operator=(const future &) = delete;
+  future(future &&) = default;
+  future &operator=(future &&) = default;
+
+  /// Get the value, helping with task dispatch while waiting.
+  /// This will destroy the underlying value, so this must be called exactly
+  /// once, which returns the future to the initial state.
+  T get(JuliaTaskDispatcher &D) {
+    if (!valid())
+      report_fatal_error("get() must only be called once, after get_promise()");
+    wait(D);
+    auto state_ = static_cast<state*>(this->state_);
+    this->state_ = nullptr;
+    return take_value(state_);
+  }
+
+  /// Get the associated promise (must only be called once)
+  promise<T> get_promise() {
+    if (valid())
+      report_fatal_error("get_promise() can only be called once");
+    auto state_ = new state();
+    this->state_ = state_;
+    return promise<T>(state_);
+  }
+
+private:
+  friend class promise<T>;
+
+  // Template the state struct with EBCO so that future<void> has no wasted
+  // overhead for the value. The declaration of future_value_storage is far
+  // above here since GCC doesn't implement it properly when nested.
+  struct state : future_base::state_base, future_value_storage<T> {};
+
+  template <typename U = T>
+  typename std::enable_if<!std::is_void<U>::value, U>::type take_value(state *state_) {
+    T result = std::move(state_->value_);
+    state_->value_.~T();
+    delete state_;
+    return result;
+  }
+
+  template <typename U = T>
+  typename std::enable_if<std::is_void<U>::value, U>::type take_value(state *state_) {
+    delete state_;
+  }
+};
+
+/// TaskDispatcher-aware promise class that provides values to associated
+/// futures.
+///
+/// @tparam T The type of value this promise will provide. Use void for promises
+/// that
+///           signal completion without providing a value.
+///
+/// This promise implementation provides the value-setting side of the
+/// promise/future pair and integrates with the ORC TaskDispatcher system. Key
+/// characteristics:
+/// - Created from a future via get_promise() rather than creating the future from the promise.
+/// - Must call get_future() on the thread that created it (it can be passed to another thread, but do not borrow a reference and use that to mutate it from another thread).
+/// - Must call set_value() exactly once per `get_promise()` call to provide the result.
+/// - Thread-safe from set_value to get.
+/// - Move-only semantics to prevent accidental copying.
+///
+/// The `promise` can usually be passed to another thread in one of two ways:
+/// - With move semantics:
+///     * `[P = F.get_promise()] () { P.set_value(); }`
+///     * `[P = std::move(P)] () { P.set_value(); }`
+///     * Advantages: clearer where `P` is owned, automatic deadlock detection
+///     on destruction,
+///       easier memory management if the future is returned from the function.
+/// - By reference:
+///     * `[&P] () { P.set_value(); }`
+///     * Advantages: simpler memory management if the future is consumed in the
+///     same function.
+///     * Disadvantages: more difficult memory management if the future is
+///     returned from the function, no deadlock detection.
+///
+/// @warning Users should avoid borrowing references to promises. References may
+/// go out of scope and break the uniqueness contract, which may break the
+/// soundness of the types. Always use move semantics or pass by value.
+///
+/// @par Error Handling:
+/// The promise/future system uses report_fatal_error() for misuse:
+/// - Calling set_value() more than once.
+/// - Destroying a future without calling get().
+/// - Calling get() more than once on a future.
+///
+/// @par Thread Safety:
+/// - Each promise/future must only be accessed by one thread, as concurrent
+///   calls to the API functions may result in crashes.
+/// - Multiple threads can safely access different promise/future pairs.
+/// - set_value() and get() operations are atomic and thread-safe.
+/// - Move operations should only be performed by a single thread.
+template <typename T> class promise {
+  friend class future<T>;
+
+public:
+  promise() : state_(nullptr) {}
+
+  ~promise() {
+    // Assert proper promise lifecycle: ensure set_value was called if promise was valid.
+    // This can catch deadlocks where a promise is created but set_value() is
+    // never called, though only if the promise is moved from instead of
+    // borrowed from the frame with the future.
+    // Empty promises (state_ == nullptr) are allowed to be destroyed without calling set_value.
+  }
+
+  promise(const promise &) = delete;
+  promise &operator=(const promise &) = delete;
+
+  promise(promise &&other) noexcept
+      : state_(other.state_) {
+    other.state_ = nullptr;
+  }
+
+  promise &operator=(promise &&other) noexcept {
+    if (this != &other) {
+      this->~promise();
+      state_ = other.state_;
+      other.state_ = nullptr;
+    }
+    return *this;
+  }
+
+
+  /// Set the value (must only be called once)
+  // In C++20, this std::conditional weirdness can probably be replaced just
+  // with requires. It ensures that we don't try to define a method for `void&`,
+  // but that if the user calls set_value(v) for any value v that they get a
+  // member function error, instead of no member named 'value_'.
+  template <typename U = T>
+  void
+  set_value(const typename std::conditional<std::is_void<T>::value,
+                                            std::nullopt_t, T>::type &value) const {
+    assert(state_ && "set_value() can only be called once");
+    new (&state_->value_) T(value);
+    state_->status_.store(FutureStatus::Ready, std::memory_order_release);
+    state_ = nullptr;
+  }
+
+  template <typename U = T>
+  void set_value(typename std::conditional<std::is_void<T>::value,
+                                           std::nullopt_t, T>::type &&value) const {
+    assert(state_ && "set_value() can only be called once");
+    new (&state_->value_) T(std::move(value));
+    state_->status_.store(FutureStatus::Ready, std::memory_order_release);
+    state_ = nullptr;
+  }
+
+  template <typename U = T>
+  typename std::enable_if<std::is_void<U>::value, void>::type
+  set_value(const std::nullopt_t &value) = delete;
+
+  template <typename U = T>
+  typename std::enable_if<std::is_void<U>::value, void>::type
+  set_value(std::nullopt_t &&value) = delete;
+
+  template <typename U = T>
+  typename std::enable_if<std::is_void<U>::value, void>::type set_value() const {
+    assert(state_ && "set_value() can only be called once");
+    state_->status_.store(FutureStatus::Ready, std::memory_order_release);
+    state_ = nullptr;
+  }
+
+  /// Swap with another promise
+  void swap(promise &other) noexcept {
+    using std::swap;
+    swap(state_, other.state_);
+  }
+
+private:
+  explicit promise(typename future<T>::state *state)
+      : state_(state) {}
+
+  mutable typename future<T>::state *state_;
+};
+
+}; // class JuliaTaskDispatcher
+
+thread_local SmallVector<std::pair<std::unique_ptr<Task>, JuliaTaskDispatcher *>> JuliaTaskDispatcher::TaskQueue;
+
+void JuliaTaskDispatcher::dispatch(std::unique_ptr<Task> T) {
+  TaskQueue.push_back(std::pair(std::move(T), this));
+}
+
+void JuliaTaskDispatcher::shutdown() {
+  // Keep processing until no tasks belonging to this dispatcher remain
+  while (true) {
+    // Check if any task belongs to this dispatcher
+    auto it = std::find_if(
+        TaskQueue.begin(), TaskQueue.end(),
+        [this](const auto &TaskPair) { return TaskPair.second == this; });
+
+    // If no tasks belonging to this dispatcher, we're done
+    if (it == TaskQueue.end())
+      return;
+
+    // Create a future/promise pair to wait for completion of this task
+    future<void> taskFuture;
+    // Replace the task with a GenericNamedTask that wraps the original task
+    // with a notification of completion that this thread can work_until.
+    auto originalTask = std::move(it->first);
+    it->first = makeGenericNamedTask(
+        [originalTask = std::move(originalTask),
+         taskPromise = taskFuture.get_promise()]() {
+          originalTask->run();
+          taskPromise.set_value();
+        },
+        "Shutdown task marker");
+
+    // Wait for the task to complete
+    taskFuture.get(*this);
+  }
+}
+
+void JuliaTaskDispatcher::work_until(future_base &F) {
+  while (!F.ready()) {
+    // First, process any tasks in our local queue
+    // Process in LIFO order (most recently added first) to avoid deadlocks
+    // when tasks have dependencies on each other
+    while (!TaskQueue.empty()) {
+      {
+        auto TaskPair = std::move(TaskQueue.back());
+        TaskQueue.pop_back();
+        TaskPair.first->run();
+      }
+
+      // Notify any threads that might be waiting for work to complete
+      {
+        std::lock_guard<std::mutex> Lock(DispatchMutex);
+        bool ShouldNotify = llvm::any_of(
+            WaitingFutures, [](future_base *F) { return F->ready(); });
+        if (ShouldNotify) {
+          WaitingFutures.clear();
+          WorkFinishedCV.notify_all();
+        }
+      }
+
+      // Check if our future is now ready
+      if (F.ready())
+        return;
+    }
+
+    // If we get here, our queue is empty but the future isn't ready
+    // We need to wait for other threads to finish work that should complete our
+    // future
+    {
+      std::unique_lock<std::mutex> Lock(DispatchMutex);
+      WaitingFutures.push_back(&F);
+      WorkFinishedCV.wait(Lock, [&F]() { return F.ready(); });
+    }
+  }
+}
+
+} // End namespace
+
+namespace std {
+template <typename T>
+void swap(::JuliaTaskDispatcher::promise<T> &lhs, ::JuliaTaskDispatcher::promise<T> &rhs) noexcept {
+  lhs.swap(rhs);
+}
+} // End namespace std
+
+// n.b. this actually is sometimes a safepoint
+Expected<SymbolMap>
+safelookup(ExecutionSession &ES,
+           const JITDylibSearchOrder &SearchOrder,
+           SymbolLookupSet Symbols, LookupKind K = LookupKind::Static,
+           SymbolState RequiredState = SymbolState::Ready,
+           RegisterDependenciesFunction RegisterDependencies = NoDependenciesToRegister) JL_NOTSAFEPOINT {
+  JuliaTaskDispatcher::future<MSVCPExpected<SymbolMap>> PromisedFuture;
+  auto NotifyComplete = [PromisedResult = PromisedFuture.get_promise()](Expected<SymbolMap> R) {
+    PromisedResult.set_value(std::move(R));
+  };
+  ES.lookup(K, SearchOrder, std::move(Symbols), RequiredState,
+        std::move(NotifyComplete), RegisterDependencies);
+  return PromisedFuture.get(static_cast<JuliaTaskDispatcher&>(ES.getExecutorProcessControl().getDispatcher()));
+}
+
+Expected<ExecutorSymbolDef>
+safelookup(ExecutionSession &ES,
+           const JITDylibSearchOrder &SearchOrder,
+           SymbolStringPtr Name,
+           SymbolState RequiredState = SymbolState::Ready) JL_NOTSAFEPOINT {
+  SymbolLookupSet Names({Name});
+
+  if (auto ResultMap = safelookup(ES, SearchOrder, std::move(Names), LookupKind::Static,
+                                  RequiredState, NoDependenciesToRegister)) {
+    assert(ResultMap->size() == 1 && "Unexpected number of results");
+    assert(ResultMap->count(Name) && "Missing result for symbol");
+    return std::move(ResultMap->begin()->second);
+  } else
+    return ResultMap.takeError();
+}
+
+Expected<ExecutorSymbolDef>
+safelookup(ExecutionSession &ES,
+           ArrayRef<JITDylib *> SearchOrder, SymbolStringPtr Name,
+           SymbolState RequiredState = SymbolState::Ready) JL_NOTSAFEPOINT {
+  return safelookup(ES, makeJITDylibSearchOrder(SearchOrder), Name, RequiredState);
+}
+
+Expected<ExecutorSymbolDef>
+safelookup(ExecutionSession &ES,
+           ArrayRef<JITDylib *> SearchOrder, StringRef Name,
+           SymbolState RequiredState = SymbolState::Ready) JL_NOTSAFEPOINT {
+  return safelookup(ES, SearchOrder, ES.intern(Name), RequiredState);
+}
diff --git a/src/llvm-late-gc-lowering-mmtk.cpp b/src/llvm-late-gc-lowering-mmtk.cpp
new file mode 100644
index 0000000000000..5539c8dbcf153
--- /dev/null
+++ b/src/llvm-late-gc-lowering-mmtk.cpp
@@ -0,0 +1,96 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "llvm-gc-interface-passes.h"
+
+Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
+{
+    assert(target->arg_size() == 3);
+
+    IRBuilder<> builder(target);
+    auto ptls = target->getArgOperand(0);
+    auto type = target->getArgOperand(2);
+    if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand(1))) {
+        size_t sz = (size_t)CI->getZExtValue();
+        // This is strongly architecture and OS dependent
+        int osize;
+        int offset = jl_gc_classify_pools(sz, &osize);
+        if (offset >= 0) {
+            // In this case instead of lowering julia.gc_alloc_bytes to jl_gc_small_alloc
+            // We do a slowpath/fastpath check and lower it only on the slowpath, returning
+            // the cursor and updating it in the fastpath.
+            auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
+            auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize);
+
+            // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk.
+            // Setting this to false will increase allocation overhead a lot, and should only be used for debugging.
+            const bool INLINE_FASTPATH_ALLOCATION = true;
+
+            if (INLINE_FASTPATH_ALLOCATION) {
+                // Assuming we use the first immix allocator.
+                // FIXME: We should get the allocator index and type from MMTk.
+                auto allocator_offset = offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix);
+
+                auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor));
+                auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()),  allocator_offset + offsetof(ImmixAllocator, limit));
+
+                auto cursor_ptr = builder.CreateInBoundsGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos);
+                auto cursor = builder.CreateAlignedLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, Align(sizeof(void *)), "cursor");
+
+                // offset = 8
+                auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8));
+                auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor);
+                auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor);
+                // alignment 16 (15 = 16 - 1)
+                auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta");
+                auto result = builder.CreateNSWAdd(cursor, delta, "result");
+
+                auto new_cursor = builder.CreateNSWAdd(result, pool_osize);
+
+                auto limit_ptr = builder.CreateInBoundsGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos);
+                auto limit = builder.CreateAlignedLoad(Type::getInt64Ty(target->getContext()), limit_ptr, Align(sizeof(void *)), "limit");
+
+                auto gt_limit = builder.CreateICmpSGT(new_cursor, limit);
+
+                auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction());
+                auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction());
+
+                auto next_instr = target->getNextNode();
+                SmallVector<uint32_t, 2> Weights{1, 9};
+
+                MDBuilder MDB(F.getContext());
+                SplitBlockAndInsertIfThenElse(gt_limit, next_instr, &slowpath, &fastpath, false, false, MDB.createBranchWeights(Weights));
+
+                builder.SetInsertPoint(next_instr);
+                auto phiNode = builder.CreatePHI(target->getCalledFunction()->getReturnType(), 2, "phi_fast_slow");
+
+                // slowpath
+                builder.SetInsertPoint(slowpath);
+                auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+                auto new_call = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize_i32, type });
+                new_call->setAttributes(new_call->getCalledFunction()->getAttributes());
+                builder.CreateBr(next_instr->getParent());
+
+                // fastpath
+                builder.SetInsertPoint(fastpath);
+                builder.CreateStore(new_cursor, cursor_ptr);
+
+                // ptls->gc_tls.gc_num.allocd += osize;
+                auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls_common) + offsetof(jl_gc_tls_states_common_t, gc_num));
+                auto pool_alloc_tls = builder.CreateInBoundsGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
+                auto pool_allocd = builder.CreateAlignedLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls, Align(sizeof(void *)));
+                auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
+                builder.CreateStore(pool_allocd_total, pool_alloc_tls);
+
+                auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
+                auto v_as_ptr = builder.CreateIntToPtr(v_raw, smallAllocFunc->getReturnType());
+                builder.CreateBr(next_instr->getParent());
+
+                phiNode->addIncoming(new_call, slowpath);
+                phiNode->addIncoming(v_as_ptr, fastpath);
+                phiNode->takeName(target);
+                return phiNode;
+            }
+        }
+    }
+    return target;
+}
diff --git a/src/llvm-late-gc-lowering-stock.cpp b/src/llvm-late-gc-lowering-stock.cpp
new file mode 100644
index 0000000000000..2a11487773396
--- /dev/null
+++ b/src/llvm-late-gc-lowering-stock.cpp
@@ -0,0 +1,9 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "llvm-gc-interface-passes.h"
+
+Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
+{
+    // Do nothing for the stock GC
+    return target;
+}
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index 6d87abd68d7c2..ae1351ae41ca1 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -1,380 +1,11 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm-c/Core.h>
-#include <llvm-c/Types.h>
-
-#include <llvm/ADT/BitVector.h>
-#include <llvm/ADT/SparseBitVector.h>
-#include <llvm/ADT/PostOrderIterator.h>
-#include <llvm/ADT/SetVector.h>
-#include <llvm/ADT/SmallVector.h>
-#include <llvm/ADT/SmallSet.h>
-#include "llvm/Analysis/CFG.h"
-#include <llvm/IR/Value.h>
-#include <llvm/IR/Constants.h>
-#include <llvm/IR/Dominators.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Instructions.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/MDBuilder.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/ModuleSlotTracker.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/Transforms/Utils/BasicBlockUtils.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#include <llvm/InitializePasses.h>
-
-#include "llvm-codegen-shared.h"
-#include "julia.h"
-#include "julia_internal.h"
-#include "julia_assert.h"
-#include "llvm-pass-helpers.h"
-#include <map>
+#include "llvm-gc-interface-passes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Casting.h"
 
 #define DEBUG_TYPE "late_lower_gcroot"
 
-using namespace llvm;
-
-/* Julia GC Root Placement pass. For a general overview of the design of GC
-   root lowering, see the devdocs. This file is the actual implementation.
-
-   The actual algorithm is fairly straightforward. First recall the goal of this
-   pass:
-
-   Minimize the number of needed gc roots/stores to them subject to the constraint
-   that at every safepoint, any live gc-tracked pointer (i.e. for which there is
-   a path after this point that contains a use of this pointer) is in some gc slot.
-
-   In particular, in order to understand this algorithm, it is important to
-   realize that the only places where rootedness matters is at safepoints.
-
-   Now, the primary phases of the algorithm are:
-
-   1. Local Scan
-
-      During this step, each Basic Block is inspected and analyzed for local
-      properties. In particular, we want to determine the ordering of any of
-      the following activities:
-
-        - Any Def of a gc-tracked pointer. In general Defs are the results of
-          calls or loads from appropriate memory locations. Phi nodes and
-          selects do complicate this story slightly as described below.
-        - Any use of a gc-tracked or derived pointer. As described in the
-          devdocs, a use is in general one of
-              a) a load from a tracked/derived value
-              b) a store to a tracked/derived value
-              c) a store OF a tracked/derived value
-              d) a use of a value as a call operand (including operand bundles)
-        - Any safepoint
-
-      Crucially, we also perform pointer numbering during the local scan,
-      assigning every Def a unique integer and caching the integer for each
-      derived pointer. This allows us to operate only on the set of Defs (
-      represented by these integers) for the rest of the algorithm. We also
-      maintain some local utility information that is needed by later passes
-      (see the BBState struct for details).
-
-    2. Dataflow Computation
-
-      This computation operates entirely over the function's control flow graph
-      and does not look into a basic block. The algorithm is essentially
-      textbook iterative data flow for liveness computation. However, the
-      data flow equations are slightly more complicated because we also
-      forward propagate rootedness information in addition to backpropagating
-      liveness.
-
-    3. Live Set Computation
-
-      With the liveness information from the previous step, we can now compute,
-      for every safepoint, the set of values live at that particular safepoint.
-      There are three pieces of information being combined here:
-           i. Values that needed to be live due to local analysis (e.g. there
-              was a def, then a safepoint, then a use). This was computed during
-              local analysis.
-          ii. Values that are live across the basic block (i.e. they are live
-              at every safepoint within the basic block). This relies entirely
-              on the liveness information.
-         iii. Values that are now live-out from the basic block (i.e. they are
-              live at every safepoint following their def). During local
-              analysis, we keep, for every safepoint, those values that would
-              be live if they were live out. Here we can check if they are
-              actually live-out and make the appropriate additions to the live
-              set.
-
-       Lastly, we also explicitly compute, for each value, the list of values
-       that are simultaneously live at some safepoint. This is known as an
-       "interference graph" and is the input to the next step.
-
-    4. GC Root coloring
-
-      Two values which are not simultaneously live at a safepoint can share the
-      same slot. This is an important optimization, because otherwise long
-      functions would have exceptionally large GC slots, reducing performance
-      and bloating the size of the stack. Assigning values to these slots is
-      equivalent to doing graph coloring on the interference graph - the graph
-      where nodes are values and two values have an edge if they are
-      simultaneously live at a safepoint - which we computed in the previous
-      step. Now graph coloring in general is a hard problem. However, for SSA
-      form programs, (and most programs in general, by virtue of their
-      structure), the resulting interference graphs are chordal and can be
-      colored optimally in linear time by performing greedy coloring in a
-      perfect elimination order. Now, our interference graphs are likely not
-      entirely chordal due to some non-SSA corner cases. However, using the same
-      algorithm should still give a very good coloring while having sufficiently
-      low runtime.
-
-    5. JLCall frame optimizations
-
-      Unlike earlier iterations of the gc root placement logic, jlcall frames
-      are no longer treated as a special case and need not necessarily be sunk
-      into the gc frame. Additionally, we now emit lifetime
-      intrinsics, so regular stack slot coloring will merge any jlcall frames
-      not sunk into the gc frame. Nevertheless performing such sinking can still
-      be profitable. Since all arguments to a jlcall are guaranteed to be live
-      at that call in some gc slot, we can attempt to rearrange the slots within
-      the gc-frame, or re-use slots not assigned at that particular location
-      for the gcframe. However, even without this optimization, stack frames
-      are at most two times larger than optimal (because regular stack coloring
-      can merge the jlcall allocas).
-
-      N.B.: This step is not yet implemented.
-
-    6. Root placement
-
-      This performs the actual insertion of the GCFrame pushes/pops, zeros out
-      the gc frame and creates the stores to the gc frame according to the
-      stack slot assignment computed in the previous step. GC frames stores
-      are generally sunk right before the first safe point that use them
-      (this is beneficial for code where the primary path does not have
-      safepoints, but some other path - e.g. the error path does). However,
-      if the first safepoint is not dominated by the definition (this can
-      happen due to the non-ssa corner cases), the store is inserted right after
-      the definition.
-
-    7. Cleanup
-
-      This step performs necessary cleanup before passing the IR to codegen. In
-      particular, it removes any calls to julia_from_objref intrinsics and
-      removes the extra operand bundles from ccalls. In the future it could
-      also strip the addrspace information from all values as this
-      information is no longer needed.
-
-
-  There are a couple important special cases that deserve special attention:
-
-    A. PHIs and Selects
-
-      In general PHIs and selects are treated as separate defs for the purposes
-      of the algorithm and their operands as uses of those values. It is
-      important to consider however WHERE the uses of PHI's operands are
-      located. It is neither at the start of the basic block, because the values
-      do not dominate the block (so can't really consider them live-in), nor
-      at the end of the predecessor (because they are actually live out).
-      Instead it is best to think of those uses as living on the edge between
-      the appropriate predecessor and the block containing the PHI.
-
-      Another concern is PHIs of derived values. Since we cannot simply root
-      these values by storing them to a GC slot, we need to insert a new,
-      artificial PHI that tracks the base pointers for the derived values. E.g.
-      in:
-
-      A:
-        %Abase = load addrspace(10) *...
-        %Aderived = addrspacecast %Abase to addrspace(11)
-      B:
-        %Bbase = load addrspace(10) *...
-        %Bderived = addrspacecast %Bbase to addrspace(11)
-      C:
-        %phi = phi [%Aderived, %A
-                    %Bderived, %B]
-
-      we will insert another phi in C to track the relevant base pointers:
-
-        %philift = phi [%Abase, %A
-                        %Bbase, %B]
-
-      We then pretend, for the purposes of numbering that %phi was derived from
-      %philift. Note that in order to be able to do this, we need to be able to
-      perform this lifting either during numbering or instruction scanning.
-
-    B. Vectors of pointers/Union representations
-
-      Since this pass runs very late in the pass pipeline, it runs after the
-      various vectorization passes. As a result, we have to potentially deal
-      with vectors of gc-tracked pointers. For the purposes of most of the
-      algorithm, we simply assign every element of the vector a separate number
-      and no changes are needed. However, those parts of the algorithm that
-      look at IR need to be aware of the possibility of encountering vectors of
-      pointers.
-
-      Similarly, unions (e.g. in call returns) are represented as a struct of
-      a gc-tracked value and an argument selector. We simply assign a single
-      number to this struct and proceed as if it was a single pointer. However,
-      this again requires care at the IR level.
-
-    C. Non mem2reg'd allocas
-
-      Under some circumstances, allocas will still be present in the IR when
-      we get to this pass. We don't try very hard to handle this case, and
-      simply sink the alloca into the GCFrame.
-*/
-
-// 4096 bits == 64 words (64 bit words). Larger bit numbers are faster and doing something
-// substantially smaller here doesn't actually save much memory because of malloc overhead.
-// Too large is bad also though - 4096 was found to be a reasonable middle ground.
-using LargeSparseBitVector = SparseBitVector<4096>;
-
-struct BBState {
-    // Uses in this BB
-    // These do not get updated after local analysis
-    LargeSparseBitVector Defs;
-    LargeSparseBitVector PhiOuts;
-    LargeSparseBitVector UpExposedUses;
-    // These get updated during dataflow
-    LargeSparseBitVector LiveIn;
-    LargeSparseBitVector LiveOut;
-    std::vector<int> Safepoints;
-    int TopmostSafepoint = -1;
-    bool HasSafepoint = false;
-    // Have we gone through this basic block in our local scan yet?
-    bool Done = false;
-};
-
-struct State {
-    Function *const F;
-    DominatorTree *DT;
-
-    // The maximum assigned value number
-    int MaxPtrNumber;
-    // The maximum assigned safepoint number
-    int MaxSafepointNumber;
-    // Cache of numbers assigned to IR values. This includes caching of numbers
-    // for derived values
-    std::map<Value *, int> AllPtrNumbering;
-    std::map<Value *, std::vector<int>> AllCompositeNumbering;
-    // The reverse of the previous maps
-    std::map<int, Value *> ReversePtrNumbering;
-    // Neighbors in the coloring interference graph. I.e. for each value, the
-    // indices of other values that are used simultaneously at some safe point.
-    std::vector<LargeSparseBitVector> Neighbors;
-    // The result of the local analysis
-    std::map<const BasicBlock *, BBState> BBStates;
-
-    // Refinement map. If all of the values are rooted
-    // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
-    // the key is already rooted (but not the other way around).
-    // A value that can be refined to -2 never need any rooting or write barrier.
-    // A value that can be refined to -1 don't need local root but still need write barrier.
-    // At the end of `LocalScan` this map has a few properties
-    // 1. Values are either < 0 or dominates the key
-    // 2. Therefore this is a DAG
-    std::map<int, SmallVector<int, 1>> Refinements;
-
-    // GC preserves map. All safepoints dominated by the map key, but not any
-    // of its uses need to preserve the values listed in the map value.
-    std::map<Instruction *, std::vector<int>> GCPreserves;
-
-    // The assignment of numbers to safepoints. The indices in the map
-    // are indices into the next three maps which store safepoint properties
-    std::map<Instruction *, int> SafepointNumbering;
-
-    // Reverse mapping index -> safepoint
-    std::vector<Instruction *> ReverseSafepointNumbering;
-
-    // Instructions that can return twice. For now, all values live at these
-    // instructions will get their own, dedicated GC frame slots, because they
-    // have unobservable control flow, so we can't be sure where they're
-    // actually live. All of these are also considered safepoints.
-    std::vector<Instruction *> ReturnsTwice;
-
-    // The set of values live at a particular safepoint
-    std::vector< LargeSparseBitVector > LiveSets;
-    // Those values that - if live out from our parent basic block - are live
-    // at this safepoint.
-    std::vector<std::vector<int>> LiveIfLiveOut;
-    // The set of values that are kept alive by the callee.
-    std::vector<std::vector<int>> CalleeRoots;
-    // We don't bother doing liveness on Allocas that were not mem2reg'ed.
-    // they just get directly sunk into the root array.
-    std::vector<AllocaInst *> Allocas;
-    DenseMap<AllocaInst *, unsigned> ArrayAllocas;
-    DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
-    std::vector<std::pair<StoreInst *, unsigned>> TrackedStores;
-    State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
-};
-
-
-
-struct LateLowerGCFrameLegacy: public FunctionPass {
-    static char ID;
-    LateLowerGCFrameLegacy() : FunctionPass(ID) {}
-
-protected:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.addRequired<DominatorTreeWrapperPass>();
-        AU.addPreserved<DominatorTreeWrapperPass>();
-        AU.setPreservesCFG();
-    }
-
-private:
-    bool runOnFunction(Function &F) override;
-};
-
-struct LateLowerGCFrame:  private JuliaPassContext {
-    function_ref<DominatorTree &()> GetDT;
-    LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
-
-public:
-    bool runOnFunction(Function &F, bool *CFGModified = nullptr);
-
-private:
-    CallInst *pgcstack;
-
-    void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
-    void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses);
-    void NoteUse(State &S, BBState &BBS, Value *V) {
-        NoteUse(S, BBS, V, BBS.UpExposedUses);
-    }
-
-    void LiftPhi(State &S, PHINode *Phi);
-    void LiftSelect(State &S, SelectInst *SI);
-    Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
-    std::vector<Value*> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
-    Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
-
-    int Number(State &S, Value *V);
-    int NumberBase(State &S, Value *Base);
-    std::vector<int> NumberAll(State &S, Value *V);
-    std::vector<int> NumberAllBase(State &S, Value *Base);
-
-    void NoteOperandUses(State &S, BBState &BBS, User &UI);
-    void MaybeTrackDst(State &S, MemTransferInst *MI);
-    void MaybeTrackStore(State &S, StoreInst *I);
-    State LocalScan(Function &F);
-    void ComputeLiveness(State &S);
-    void ComputeLiveSets(State &S);
-    std::vector<int> ColorRoots(const State &S);
-    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame, Instruction *InsertBefore);
-    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame);
-    void PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
-    bool CleanupIR(Function &F, State *S, bool *CFGModified);
-    void NoteUseChain(State &S, BBState &BBS, User *TheUser);
-    SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
-    void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
-    void RefineLiveSet(LargeSparseBitVector &LS, State &S, const std::vector<int> &CalleeRoots);
-    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
-    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
-};
-
 static unsigned getValueAddrSpace(Value *V) {
     return V->getType()->getPointerAddressSpace();
 }
@@ -394,16 +25,18 @@ static bool isSpecialPtr(Type *Ty) {
 
 // return how many Special pointers are in T (count > 0),
 // and if there is anything else in T (all == false)
-CountTrackedPointers::CountTrackedPointers(Type *T) {
+CountTrackedPointers::CountTrackedPointers(Type *T, bool ignore_loaded) {
     if (isa<PointerType>(T)) {
         if (isSpecialPtr(T)) {
+            if (ignore_loaded && T->getPointerAddressSpace() == AddressSpace::Loaded)
+                return;
             count++;
             if (T->getPointerAddressSpace() != AddressSpace::Tracked)
                 derived = true;
         }
     } else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
         for (Type *ElT : T->subtypes()) {
-            auto sub = CountTrackedPointers(ElT);
+            auto sub = CountTrackedPointers(ElT, ignore_loaded);
             count += sub.count;
             all &= sub.all;
             derived |= sub.derived;
@@ -419,6 +52,20 @@ CountTrackedPointers::CountTrackedPointers(Type *T) {
         all = false;
 }
 
+bool hasLoadedTy(Type *T) {
+    if (isa<PointerType>(T)) {
+        if (T->getPointerAddressSpace() == AddressSpace::Loaded)
+            return true;
+    } else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
+        for (Type *ElT : T->subtypes()) {
+            if (hasLoadedTy(ElT))
+                return true;
+        }
+    }
+    return false;
+}
+
+
 unsigned getCompositeNumElements(Type *T) {
     if (auto *ST = dyn_cast<StructType>(T))
         return ST->getNumElements();
@@ -431,7 +78,7 @@ unsigned getCompositeNumElements(Type *T) {
 }
 
 // Walk through a Type, and record the element path to every tracked value inside
-void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::vector<unsigned>> &Numberings) {
+void TrackCompositeType(Type *T, SmallVector<unsigned, 0> &Idxs, SmallVector<SmallVector<unsigned, 0>, 0> &Numberings) {
     if (isa<PointerType>(T)) {
         if (isSpecialPtr(T))
             Numberings.push_back(Idxs);
@@ -447,15 +94,14 @@ void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::v
     }
 }
 
-std::vector<std::vector<unsigned>> TrackCompositeType(Type *T) {
-    std::vector<unsigned> Idxs;
-    std::vector<std::vector<unsigned>> Numberings;
+SmallVector<SmallVector<unsigned, 0>, 0> TrackCompositeType(Type *T) {
+    SmallVector<unsigned, 0> Idxs;
+    SmallVector<SmallVector<unsigned, 0>, 0> Numberings;
     TrackCompositeType(T, Idxs, Numberings);
     return Numberings;
 }
 
 
-
 // Walk through simple expressions to until we hit something that requires root numbering
 // If the input value is a scalar (pointer), we may return a composite value as base
 // in which case the second member of the pair is the index of the value in the vector.
@@ -500,18 +146,19 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
             CurrentV = EEI->getVectorOperand();
         }
         else if (auto LI = dyn_cast<LoadInst>(CurrentV)) {
-            if (auto PtrT = dyn_cast<PointerType>(LI->getType()->getScalarType())) {
-                if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
-                    CurrentV = LI->getPointerOperand();
-                    fld_idx = -1;
-                    if (!isSpecialPtr(CurrentV->getType())) {
-                        // This could really be anything, but it's not loaded
-                        // from a tracked pointer, so it doesn't matter what
-                        // it is--just pick something simple.
-                        CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
-                    }
-                    continue;
+            if (hasLoadedTy(LI->getType())) {
+                // This is the old (now deprecated) implementation for loaded.
+                // New code should use the gc_loaded intrinsic to ensure that
+                // the load is paired with the correct Tracked value.
+                CurrentV = LI->getPointerOperand();
+                fld_idx = -1;
+                if (!isSpecialPtr(CurrentV->getType())) {
+                    // This could really be anything, but it's not loaded
+                    // from a tracked pointer, so it doesn't matter what
+                    // it is--just pick something simple.
+                    CurrentV = ConstantPointerNull::get(PointerType::get(V->getContext(), 0));
                 }
+                continue;
             }
             // In general a load terminates a walk
             break;
@@ -526,44 +173,70 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
             (void)LI;
             break;
         }
-        else if (auto II = dyn_cast<IntrinsicInst>(CurrentV)) {
-            // Some intrinsics behave like LoadInst followed by a SelectInst
-            // This should never happen in a derived addrspace (since those cannot be stored to memory)
-            // so we don't need to lift these operations, but we do need to check if it's loaded and continue walking the base pointer
+        else if (auto *II = dyn_cast<IntrinsicInst>(CurrentV)) {
             if (II->getIntrinsicID() == Intrinsic::masked_load ||
                 II->getIntrinsicID() == Intrinsic::masked_gather) {
+                // Some intrinsics behave like LoadInst followed by a SelectInst
+                // This should never happen in a derived addrspace (since those cannot be stored to memory)
+                // so we don't need to lift these operations, but we do need to check if it's loaded and continue walking the base pointer
                 if (auto VTy = dyn_cast<VectorType>(II->getType())) {
-                    if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
-                        if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
-                            Value *Mask = II->getOperand(2);
-                            Value *Passthrough = II->getOperand(3);
-                            if (!isa<Constant>(Mask) || !cast<Constant>(Mask)->isAllOnesValue()) {
-                                assert(isa<UndefValue>(Passthrough) && "unimplemented");
-                                (void)Passthrough;
+                    if (hasLoadedTy(VTy->getElementType())) {
+                        Value *Mask = II->getOperand(2);
+                        Value *Passthrough = II->getOperand(3);
+                        if (!isa<Constant>(Mask) || !cast<Constant>(Mask)->isAllOnesValue()) {
+                            assert(isa<UndefValue>(Passthrough) && "unimplemented");
+                            (void)Passthrough;
+                        }
+                        CurrentV = II->getOperand(0);
+                        if (II->getIntrinsicID() == Intrinsic::masked_load) {
+                            fld_idx = -1;
+                            if (!isSpecialPtr(CurrentV->getType())) {
+                                CurrentV = ConstantPointerNull::get(PointerType::get(V->getContext(), 0));
                             }
-                            CurrentV = II->getOperand(0);
-                            if (II->getIntrinsicID() == Intrinsic::masked_load) {
-                                fld_idx = -1;
-                                if (!isSpecialPtr(CurrentV->getType())) {
-                                    CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
-                                }
-                            } else {
-                                if (auto VTy2 = dyn_cast<VectorType>(CurrentV->getType())) {
-                                    if (!isSpecialPtr(VTy2->getElementType())) {
-                                        CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
-                                        fld_idx = -1;
-                                    }
+                        } else {
+                            if (auto VTy2 = dyn_cast<VectorType>(CurrentV->getType())) {
+                                if (!isSpecialPtr(VTy2->getElementType())) {
+                                    CurrentV = ConstantPointerNull::get(PointerType::get(V->getContext(), 0));
+                                    fld_idx = -1;
                                 }
                             }
-                            continue;
                         }
+                        continue;
                     }
                 }
                 // In general a load terminates a walk
                 break;
             }
+            else if (II->getIntrinsicID() == Intrinsic::vector_extract) {
+                if (auto VTy = dyn_cast<VectorType>(II->getType())) {
+                    if (hasLoadedTy(VTy->getElementType())) {
+                        Value *Idx = II->getOperand(1);
+                        if (!isa<ConstantInt>(Idx)) {
+                            assert(isa<UndefValue>(Idx) && "unimplemented");
+                            (void)Idx;
+                        }
+                        CurrentV = II->getOperand(0);
+                        fld_idx = -1;
+                        continue;
+                    }
+                }
+                break;
+            } else {
+                // Unknown Intrinsic
+                break;
+            }
+        }
+        else if (auto CI = dyn_cast<CallInst>(CurrentV)) {
+            auto callee = CI->getCalledFunction();
+            if (callee && callee->getName() == "julia.gc_loaded") {
+                CurrentV = CI->getArgOperand(0);
+                continue;
+            }
+            // Unknown Call
+            break;
         }
         else {
+            // Unknown Instruction
             break;
         }
     }
@@ -593,7 +266,7 @@ Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValE
     }
     else if (ValExpr.second != -1) {
         auto Tracked = TrackCompositeType(V->getType());
-        auto Idxs = makeArrayRef(Tracked.at(ValExpr.second));
+        auto Idxs = ArrayRef<unsigned>(Tracked[ValExpr.second]);
         auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1);
         Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
         bool IsVector = isa<VectorType>(FinalT);
@@ -602,26 +275,26 @@ Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValE
         if (T->getAddressSpace() != AddressSpace::Tracked) {
             // if V isn't tracked, get the shadow def
             auto Numbers = NumberAllBase(S, V);
-            int BaseNumber = Numbers.at(ValExpr.second);
+            int BaseNumber = Numbers[ValExpr.second];
             if (BaseNumber >= 0)
                 V = GetPtrForNumber(S, BaseNumber, InsertBefore);
             else
                 V = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
             return V;
         }
+        IRBuilder<InstSimplifyFolder> foldbuilder(InsertBefore->getContext(), InstSimplifyFolder(InsertBefore->getModule()->getDataLayout()));
+        foldbuilder.SetInsertPoint(InsertBefore);
         if (Idxs.size() > IsVector)
-            V = ExtractValueInst::Create(V, IsVector ? IdxsNotVec : Idxs, "", InsertBefore);
+            V = foldbuilder.CreateExtractValue(V, IsVector ? IdxsNotVec : Idxs);
         if (IsVector)
-            V = ExtractElementInst::Create(V,
-                    ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back()),
-                    "", InsertBefore);
+            V = foldbuilder.CreateExtractElement(V, ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back()));
     }
     return V;
 }
 
-std::vector<Value*> LateLowerGCFrame::MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore) {
+SmallVector<Value*, 0> LateLowerGCFrame::MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore) {
     auto Numbers = NumberAllBase(S, BaseVec);
-    std::vector<Value*> V{Numbers.size()};
+    SmallVector<Value*, 0> V{Numbers.size()};
     Value *V_rnull = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
     for (unsigned i = 0; i < V.size(); ++i) {
         if (Numbers[i] >= 0) // ignores undef and poison values
@@ -637,7 +310,7 @@ Value *LateLowerGCFrame::GetPtrForNumber(State &S, unsigned Num, Instruction *In
     Value *Val = S.ReversePtrNumbering[Num];
     unsigned Idx = -1;
     if (!isa<PointerType>(Val->getType())) {
-        const std::vector<int> &AllNums = S.AllCompositeNumbering[Val];
+        const SmallVector<int, 0> &AllNums = S.AllCompositeNumbering[Val];
         for (Idx = 0; Idx < AllNums.size(); ++Idx) {
             if ((unsigned)AllNums[Idx] == Num)
                 break;
@@ -654,20 +327,17 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
         // already visited here--nothing to do
         return;
     }
-    std::vector<int> Numbers;
-    unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<VectorType>(SI->getType())) {
-        ElementCount EC = VTy->getElementCount();
-        Numbers.resize(EC.getKnownMinValue(), -1);
-    }
-    else
-        assert(isa<PointerType>(SI->getType()) && "unimplemented");
     assert(!isTrackedValue(SI));
+    SmallVector<int, 0> Numbers;
+    unsigned NumRoots = 1;
+    Type *STy = SI->getType();
+    if (!isa<PointerType>(STy))
+        Numbers.resize(CountTrackedPointers(STy).count, -1);
     // find the base root for the arguments
     Value *TrueBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getTrueValue(), false), SI);
     Value *FalseBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getFalseValue(), false), SI);
-    std::vector<Value*> TrueBases;
-    std::vector<Value*> FalseBases;
+    SmallVector<Value*, 0> TrueBases;
+    SmallVector<Value*, 0> FalseBases;
     if (!isa<PointerType>(TrueBase->getType())) {
         TrueBases = MaybeExtractVector(S, TrueBase, SI);
         assert(TrueBases.size() == Numbers.size());
@@ -700,14 +370,18 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
         if (isa<VectorType>(Cond->getType())) {
             Cond = ExtractElementInst::Create(Cond,
                     ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i),
+#if JL_LLVM_VERSION >= 200000
+                    "", SI->getIterator());
+#else
                     "", SI);
+#endif
         }
-        if (FalseElem->getType() != TrueElem->getType()) {
-            // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-            assert(FalseElem->getContext().supportsTypedPointers());
-            FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI);
-        }
+        assert(FalseElem->getType() == TrueElem->getType());
+#if JL_LLVM_VERSION >= 200000
+        SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI->getIterator());
+#else
         SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI);
+#endif
         int Number = ++S.MaxPtrNumber;
         S.AllPtrNumbering[SelectBase] = Number;
         S.ReversePtrNumbering[Number] = SelectBase;
@@ -737,22 +411,23 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
         return;
     // need to handle each element (may just be one scalar)
     SmallVector<PHINode *, 2> lifted;
-    std::vector<int> Numbers;
+    SmallVector<int, 0> Numbers;
     unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<FixedVectorType>(Phi->getType())) {
-        NumRoots = VTy->getNumElements();
+    Type *PTy = Phi->getType();
+    if (!isa<PointerType>(PTy)) {
+        NumRoots = CountTrackedPointers(PTy).count;
         Numbers.resize(NumRoots);
     }
-    else {
-        // TODO: SVE
-        assert(isa<PointerType>(Phi->getType()) && "unimplemented");
-    }
     for (unsigned i = 0; i < NumRoots; ++i) {
+#if JL_LLVM_VERSION >= 200000
+        PHINode *lift = PHINode::Create(T_prjlvalue, Phi->getNumIncomingValues(), "gclift", Phi->getIterator());
+#else
         PHINode *lift = PHINode::Create(T_prjlvalue, Phi->getNumIncomingValues(), "gclift", Phi);
+#endif
         int Number = ++S.MaxPtrNumber;
         S.AllPtrNumbering[lift] = Number;
         S.ReversePtrNumbering[Number] = lift;
-        if (!isa<VectorType>(Phi->getType()))
+        if (isa<PointerType>(PTy))
             S.AllPtrNumbering[Phi] = Number;
         else
             Numbers[i] = Number;
@@ -766,7 +441,7 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
         BasicBlock *IncomingBB = Phi->getIncomingBlock(i);
         Instruction *Terminator = IncomingBB->getTerminator();
         Value *Base = MaybeExtractScalar(S, FindBaseValue(S, Incoming, false), Terminator);
-        std::vector<Value*> IncomingBases;
+        SmallVector<Value*, 0> IncomingBases;
         if (!isa<PointerType>(Base->getType())) {
             IncomingBases = MaybeExtractVector(S, Base, Terminator);
             assert(IncomingBases.size() == NumRoots);
@@ -778,29 +453,7 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
                 BaseElem = Base;
             else
                 BaseElem = IncomingBases[i];
-            if (BaseElem->getType() != T_prjlvalue) {
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(BaseElem->getContext().supportsTypedPointers());
-                auto &remap = CastedRoots[i][BaseElem];
-                if (!remap) {
-                    if (auto constant = dyn_cast<Constant>(BaseElem)) {
-                        remap = ConstantExpr::getBitCast(constant, T_prjlvalue, "");
-                    } else {
-                        Instruction *InsertBefore;
-                        if (auto arg = dyn_cast<Argument>(BaseElem)) {
-                            InsertBefore = &*arg->getParent()->getEntryBlock().getFirstInsertionPt();
-                        } else {
-                            assert(isa<Instruction>(BaseElem) && "Unknown value type detected!");
-                            InsertBefore = cast<Instruction>(BaseElem)->getNextNonDebugInstruction();
-                        }
-                        while (isa<PHINode>(InsertBefore)) {
-                            InsertBefore = InsertBefore->getNextNonDebugInstruction();
-                        }
-                        remap = new BitCastInst(BaseElem, T_prjlvalue, "", InsertBefore);
-                    }
-                }
-                BaseElem = remap;
-            }
+            assert(BaseElem->getType() == T_prjlvalue);
             lift->addIncoming(BaseElem, IncomingBB);
         }
     }
@@ -826,11 +479,11 @@ int LateLowerGCFrame::NumberBase(State &S, Value *CurrentV)
         Number = -1;
     } else if (isa<SelectInst>(CurrentV) && !isTrackedValue(CurrentV)) {
         LiftSelect(S, cast<SelectInst>(CurrentV));
-        Number = S.AllPtrNumbering.at(CurrentV);
+        Number = S.AllPtrNumbering[CurrentV];
         return Number;
     } else if (isa<PHINode>(CurrentV) && !isTrackedValue(CurrentV)) {
         LiftPhi(S, cast<PHINode>(CurrentV));
-        Number = S.AllPtrNumbering.at(CurrentV);
+        Number = S.AllPtrNumbering[CurrentV];
         return Number;
     } else if (isa<ExtractValueInst>(CurrentV)) {
         auto Numbers = NumberAllBase(S, CurrentV);
@@ -853,7 +506,7 @@ int LateLowerGCFrame::Number(State &S, Value *V) {
         Number = NumberBase(S, CurrentV.first);
     } else {
         auto Numbers = NumberAllBase(S, CurrentV.first);
-        Number = Numbers.at(CurrentV.second);
+        Number = Numbers[CurrentV.second];
     }
     if (V != CurrentV.first)
         S.AllPtrNumbering[V] = Number;
@@ -861,18 +514,18 @@ int LateLowerGCFrame::Number(State &S, Value *V) {
 }
 
 // assign pointer numbers to a def instruction
-std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
+SmallVector<int, 0> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
     if (isa<PointerType>(CurrentV->getType())) {
         auto it = S.AllPtrNumbering.find(CurrentV);
         if (it != S.AllPtrNumbering.end())
-            return std::vector<int>({it->second});
+            return SmallVector<int, 0>({it->second});
     } else {
         auto it = S.AllCompositeNumbering.find(CurrentV);
         if (it != S.AllCompositeNumbering.end())
             return it->second;
     }
 
-    std::vector<int> Numbers;
+    SmallVector<int, 0> Numbers;
     auto tracked = CountTrackedPointers(CurrentV->getType());
     if (tracked.count == 0)
         return Numbers;
@@ -881,16 +534,16 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         Numbers.resize(tracked.count, -1);
     }
     else if (auto *SVI = dyn_cast<ShuffleVectorInst>(CurrentV)) {
-        std::vector<int> Numbers1 = NumberAll(S, SVI->getOperand(0));
-        std::vector<int> Numbers2 = NumberAll(S, SVI->getOperand(1));
+        SmallVector<int, 0> Numbers1 = NumberAll(S, SVI->getOperand(0));
+        SmallVector<int, 0> Numbers2 = NumberAll(S, SVI->getOperand(1));
         auto Mask = SVI->getShuffleMask();
         for (auto idx : Mask) {
             if (idx == -1) {
                 Numbers.push_back(-1);
             } else if ((unsigned)idx < Numbers1.size()) {
-                Numbers.push_back(Numbers1.at(idx));
+                Numbers.push_back(Numbers1[idx]);
             } else {
-                Numbers.push_back(Numbers2.at(idx - Numbers1.size()));
+                Numbers.push_back(Numbers2[idx - Numbers1.size()]);
             }
         }
     } else if (auto *IEI = dyn_cast<InsertElementInst>(CurrentV)) {
@@ -899,15 +552,31 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         Numbers = NumberAll(S, IEI->getOperand(0));
         int ElNumber = Number(S, IEI->getOperand(1));
         Numbers[idx] = ElNumber;
+    // C++17
+    // } else if (auto *II = dyn_cast<IntrinsicInst>(CurrentV); II && II->getIntrinsicID() == Intrinsic::vector_insert) {
+    } else if (isa<IntrinsicInst>(CurrentV) && cast<IntrinsicInst>(CurrentV)->getIntrinsicID() == Intrinsic::vector_insert) {
+        auto *II = dyn_cast<IntrinsicInst>(CurrentV);
+        // Vector insert is a bit like a shuffle so use the same approach
+        SmallVector<int, 0> Numbers1 = NumberAll(S, II->getOperand(0));
+        SmallVector<int, 0> Numbers2 = NumberAll(S, II->getOperand(1));
+        unsigned first_idx = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
+        for (unsigned i = 0; i < Numbers1.size(); ++i) {
+            if (i < first_idx)
+                Numbers.push_back(Numbers1[i]);
+            else if (i - first_idx < Numbers2.size())
+                Numbers.push_back(Numbers2[i - first_idx]);
+            else
+                Numbers.push_back(Numbers1[i]);
+        }
     } else if (auto *IVI = dyn_cast<InsertValueInst>(CurrentV)) {
         Numbers = NumberAll(S, IVI->getAggregateOperand());
         auto Tracked = TrackCompositeType(IVI->getType());
         assert(Tracked.size() == Numbers.size());
-        std::vector<int> InsertNumbers = NumberAll(S, IVI->getInsertedValueOperand());
+        SmallVector<int, 0> InsertNumbers = NumberAll(S, IVI->getInsertedValueOperand());
         auto Idxs = IVI->getIndices();
         unsigned j = 0;
         for (unsigned i = 0; i < Tracked.size(); ++i) {
-            auto Elem = makeArrayRef(Tracked[i]);
+            auto Elem = ArrayRef<unsigned>(Tracked[i]);
             if (Elem.size() < Idxs.size())
                 continue;
             if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
@@ -920,7 +589,7 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         assert(Tracked.size() == BaseNumbers.size());
         auto Idxs = EVI->getIndices();
         for (unsigned i = 0; i < Tracked.size(); ++i) {
-            auto Elem = makeArrayRef(Tracked[i]);
+            auto Elem = ArrayRef<unsigned>(Tracked[i]);
             if (Elem.size() < Idxs.size())
                 continue;
             if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
@@ -938,10 +607,10 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
             llvm_unreachable("Unexpected generating operation for derived values");
         }
         if (isa<PointerType>(CurrentV->getType())) {
-            auto Number = S.AllPtrNumbering.at(CurrentV);
+            auto Number = S.AllPtrNumbering[CurrentV];
             Numbers.resize(1, Number);
         } else {
-            Numbers = S.AllCompositeNumbering.at(CurrentV);
+            Numbers = S.AllCompositeNumbering[CurrentV];
         }
     } else {
         assert((isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) || isa<SelectInst>(CurrentV) ||
@@ -964,17 +633,17 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
 }
 
 // gets the pointer number for every gc tracked value inside V
-std::vector<int> LateLowerGCFrame::NumberAll(State &S, Value *V) {
+SmallVector<int, 0> LateLowerGCFrame::NumberAll(State &S, Value *V) {
     if (isa<PointerType>(V->getType())) {
         auto it = S.AllPtrNumbering.find(V);
         if (it != S.AllPtrNumbering.end())
-            return std::vector<int>({it->second});
+            return SmallVector<int, 0>({it->second});
     } else {
         auto it = S.AllCompositeNumbering.find(V);
         if (it != S.AllCompositeNumbering.end())
             return it->second;
     }
-    std::vector<int> Numbers;
+    SmallVector<int, 0> Numbers;
     auto tracked = CountTrackedPointers(V->getType());
     if (tracked.count == 0)
         return Numbers;
@@ -1007,16 +676,6 @@ std::vector<int> LateLowerGCFrame::NumberAll(State &S, Value *V) {
 }
 
 
-static void MaybeResize(BBState &BBS, unsigned Idx) {
-    /*
-    if (BBS.Defs.size() <= Idx) {
-        BBS.Defs.resize(Idx + 1);
-        BBS.UpExposedUses.resize(Idx + 1);
-        BBS.PhiOuts.resize(Idx + 1);
-    }
-    */
-}
-
 static bool HasBitSet(const LargeSparseBitVector &BV, unsigned Bit) {
     return BV.test(Bit);
 }
@@ -1025,57 +684,70 @@ static bool HasBitSet(const BitVector &BV, unsigned Bit) {
     return Bit < BV.size() && BV[Bit];
 }
 
-static void NoteDef(State &S, BBState &BBS, int Num, const std::vector<int> &SafepointsSoFar) {
+static void NoteDef(State &S, BBState &BBS, int Num) {
     assert(Num >= 0);
-    MaybeResize(BBS, Num);
     assert(!BBS.Defs.test(Num) && "SSA Violation or misnumbering?");
     BBS.Defs.set(Num);
     BBS.UpExposedUses.reset(Num);
     // This value could potentially be live at any following safe point
     // if it ends up live out, so add it to the LiveIfLiveOut lists for all
     // following safepoints.
-    for (int Safepoint : SafepointsSoFar) {
-        S.LiveIfLiveOut[Safepoint].push_back(Num);
-    }
+    if (BBS.HasSafepoint)
+        for (int Safepoint = BBS.FirstSafepoint; Safepoint >= BBS.LastSafepoint; --Safepoint)
+            S.LiveIfLiveOut[Safepoint].push_back(Num);
 }
 
-void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr) {
+bool LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def,
+                                    SmallVector<int, 1> &&RefinedPtr) {
     Type *RT = Def->getType();
     if (isa<PointerType>(RT)) {
         if (!isSpecialPtr(RT))
-            return;
+            return false;
         assert(isTrackedValue(Def) && "Returned value of GC interest, but not tracked?");
         int Num = Number(S, Def);
-        NoteDef(S, BBS, Num, SafepointsSoFar);
+        NoteDef(S, BBS, Num);
         if (!RefinedPtr.empty())
             S.Refinements[Num] = std::move(RefinedPtr);
+        return true;
     }
     else {
-        std::vector<int> Nums = NumberAll(S, Def);
+        SmallVector<int, 0> Nums = NumberAll(S, Def);
         for (int Num : Nums) {
-            NoteDef(S, BBS, Num, SafepointsSoFar);
+            NoteDef(S, BBS, Num);
             if (!RefinedPtr.empty())
                 S.Refinements[Num] = RefinedPtr;
         }
+        return !Nums.empty();
     }
 }
 
-static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, std::vector<int> CalleeRoots) {
+static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, SmallVectorImpl<int> &CalleeRoots) {
+    assert(BBS.FirstSafepoint == -1 || BBS.FirstSafepoint == S.MaxSafepointNumber);
     int Number = ++S.MaxSafepointNumber;
-    S.SafepointNumbering[CI] = Number;
-    S.ReverseSafepointNumbering.push_back(CI);
+    S.SafepointNumbering.push_back(CI);
     // Note which pointers are upward exposed live here. They need to be
     // considered live at this safepoint even when they have a def earlier
     // in this BB (i.e. even when they don't participate in the dataflow
     // computation)
     S.LiveSets.push_back(BBS.UpExposedUses);
-    S.LiveIfLiveOut.push_back(std::vector<int>{});
+    S.LiveIfLiveOut.push_back(SmallVector<int, 0>{});
     S.CalleeRoots.push_back(std::move(CalleeRoots));
+    BBS.HasSafepoint = true;
+    if (BBS.LastSafepoint == -1)
+        BBS.LastSafepoint = Number;
+    BBS.FirstSafepoint = Number;
     return Number;
 }
 
-void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses) {
+void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses, Function &F) {
     // Short circuit to avoid having to deal with vectors of constants, etc.
+//#ifndef NDEBUG
+//    if (isa<PointerType>(V->getType())) {
+//        if (isSpecialPtr(V->getType()))
+//            if (isa<UndefValue>(V) && !isa<PoisonValue>(V))
+//                F.dump();
+//    }
+//#endif
     if (isa<Constant>(V))
         return;
     if (isa<PointerType>(V->getType())) {
@@ -1083,23 +755,21 @@ void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitV
             int Num = Number(S, V);
             if (Num < 0)
                 return;
-            MaybeResize(BBS, Num);
             Uses.set(Num);
         }
     } else {
-        std::vector<int> Nums = NumberAll(S, V);
+        SmallVector<int, 0> Nums = NumberAll(S, V);
         for (int Num : Nums) {
             if (Num < 0)
                 continue;
-            MaybeResize(BBS, Num);
             Uses.set(Num);
         }
     }
 }
 
-void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, User &UI) {
+void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, Instruction &UI) {
     for (Use &U : UI.operands()) {
-        NoteUse(S, BBS, U);
+        NoteUse(S, BBS, U, *UI.getFunction());
     }
 }
 
@@ -1112,7 +782,7 @@ void RecursivelyVisit(callback f, Value *V) {
         if (isa<CallInst>(TheUser) || isa<LoadInst>(TheUser) ||
             isa<SelectInst>(TheUser) || isa<PHINode>(TheUser) || // TODO: should these be removed from this list?
             isa<StoreInst>(TheUser) || isa<PtrToIntInst>(TheUser) ||
-            isa<ICmpInst>(TheUser) || // ICmpEQ/ICmpNE can be used with ptr types
+            isa<ICmpInst>(TheUser) || isa<InsertElementInst>(TheUser)|| // ICmpEQ/ICmpNE can be used with ptr types
             isa<AtomicCmpXchgInst>(TheUser) || isa<AtomicRMWInst>(TheUser))
             continue;
         if (isa<GetElementPtrInst>(TheUser) || isa<BitCastInst>(TheUser) || isa<AddrSpaceCastInst>(TheUser)) {
@@ -1121,7 +791,8 @@ void RecursivelyVisit(callback f, Value *V) {
         }
         llvm_dump(V);
         llvm_dump(TheUser);
-        assert(false && "Unexpected instruction");
+        errs() << "Unexpected instruction\n";
+        abort();
     }
 }
 
@@ -1190,7 +861,7 @@ static bool isLoadFromImmut(LoadInst *LI)
     if (LI->getMetadata(LLVMContext::MD_invariant_load))
         return true;
     MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
-    if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"}))
+    if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype", "jtbaa_memoryptr", "jtbaa_memorylen", "jtbaa_memoryown"}))
         return true;
     return false;
 }
@@ -1245,6 +916,10 @@ static bool isLoadFromConstGV(Value *v, bool &task_local, PhiSet *seen = nullptr
             task_local = true;
             return true;
         }
+        if (callee && callee->getName() == "julia.gc_loaded") {
+            return isLoadFromConstGV(call->getArgOperand(0), task_local, seen) &&
+                   isLoadFromConstGV(call->getArgOperand(1), task_local, seen);
+        }
     }
     if (isa<Argument>(v)) {
         task_local = true;
@@ -1269,8 +944,7 @@ static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen)
     auto load_base = LI->getPointerOperand()->stripInBoundsOffsets();
     assert(load_base); // Static analyzer
     auto gv = dyn_cast<GlobalVariable>(load_base);
-    if (isTBAA(LI->getMetadata(LLVMContext::MD_tbaa),
-               {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) {
+    if (isLoadFromImmut(LI)) {
         if (gv)
             return true;
         return isLoadFromConstGV(load_base, task_local, seen);
@@ -1288,15 +962,6 @@ static uint64_t getLoadValueAlign(LoadInst *LI)
     return mdconst::extract<ConstantInt>(md->getOperand(0))->getLimitedValue();
 }
 
-static bool LooksLikeFrameRef(Value *V) {
-    if (isSpecialPtr(V->getType()))
-        return false;
-    V = V->stripInBoundsOffsets();
-    if (isSpecialPtr(V->getType()))
-        return false;
-    return isa<Argument>(V);
-}
-
 SmallVector<int, 1> LateLowerGCFrame::GetPHIRefinements(PHINode *Phi, State &S)
 {
     // The returned vector can violate the domination property of the Refinements map.
@@ -1483,11 +1148,55 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
     }
 }
 
+// Look through instructions to find all possible allocas that might become the sret argument
+static std::optional<SmallSetVector<AllocaInst *, 8>> FindSretAllocas(Value* SRetArg) {
+    SmallSetVector<AllocaInst *, 8> allocas;
+    if (AllocaInst *OneSRet = dyn_cast<AllocaInst>(SRetArg)) {
+        allocas.insert(OneSRet); // Found it directly
+    } else {
+        SmallSetVector<Value *, 8> worklist;
+        worklist.insert(SRetArg);
+        while (!worklist.empty()) {
+            Value *V = worklist.pop_back_val();
+            if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
+                allocas.insert(Alloca); // Found a candidate
+            } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
+                for (Value *Incoming : Phi->incoming_values()) {
+                    worklist.insert(Incoming);
+                }
+            } else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+                auto TrueBranch = SI->getTrueValue();
+                auto FalseBranch = SI->getFalseValue();
+                if (TrueBranch && FalseBranch) {
+                    worklist.insert(TrueBranch);
+                    worklist.insert(FalseBranch);
+                } else {
+                    llvm_dump(SI);
+                    dbgs() << "Malformed Select\n";
+                    return {};
+                }
+            } else {
+                llvm_dump(V);
+                dbgs() << "Unexpected SRet argument\n";
+                return {};
+            }
+        }
+    }
+    assert(allocas.size() > 0);
+    assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT {
+            return (SRetAlloca->getArraySize() == allocas[0]->getArraySize() &&
+            SRetAlloca->getAllocatedType() == allocas[0]->getAllocatedType());
+        }
+    ));
+    return allocas;
+}
+
 State LateLowerGCFrame::LocalScan(Function &F) {
     State S(F);
     SmallVector<int, 8> PHINumbers;
     for (BasicBlock &BB : F) {
         BBState &BBS = S.BBStates[&BB];
+        // Avoid tracking safepoints until we reach the first instruction the defines a value.
         for (auto it = BB.rbegin(); it != BB.rend(); ++it) {
             Instruction &I = *it;
             if (CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -1505,38 +1214,52 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     if (II->getIntrinsicID() == Intrinsic::masked_load ||
                         II->getIntrinsicID() == Intrinsic::masked_gather) {
                         if (auto VTy = dyn_cast<VectorType>(II->getType())) {
-                            if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
-                                if (isSpecialPtr(PtrT)) {
-                                    // LLVM sometimes tries to materialize these operations with undefined pointers in our non-integral address space.
-                                    // Hopefully LLVM didn't already propagate that information and poison our users. Set those to NULL now.
-                                    Value *passthru = II->getArgOperand(3);
-                                    if (isa<UndefValue>(passthru)) {
-                                        II->setArgOperand(3, Constant::getNullValue(passthru->getType()));
-                                    }
-                                    if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
-                                        // These are not real defs
-                                        continue;
-                                    }
+                            if (CountTrackedPointers(VTy->getElementType()).count) {
+                                // LLVM sometimes tries to materialize these operations with undefined pointers in our non-integral address space.
+                                // Hopefully LLVM didn't already propagate that information and poison our users. Set those to NULL now.
+                                Value *passthru = II->getArgOperand(3);
+                                if (isa<UndefValue>(passthru)) {
+                                    II->setArgOperand(3, Constant::getNullValue(passthru->getType()));
                                 }
                             }
+                            if (hasLoadedTy(VTy->getElementType())) {
+                                // These are not real defs
+                                continue;
+                            }
                         }
                     }
+                    if (II->getIntrinsicID() == Intrinsic::vector_extract || II->getIntrinsicID() == Intrinsic::vector_insert) {
+                        // These are not real defs
+                        continue;
+                    }
                 }
                 auto callee = CI->getCalledFunction();
                 if (callee && callee == typeof_func) {
-                    MaybeNoteDef(S, BBS, CI, BBS.Safepoints, SmallVector<int, 1>{-2});
+                    MaybeNoteDef(S, BBS, CI, SmallVector<int, 1>{-2});
+                }
+                else if (callee && callee->getName() == "julia.gc_loaded") {
+                    continue;
                 }
                 else {
-                    MaybeNoteDef(S, BBS, CI, BBS.Safepoints);
+                    if (MaybeNoteDef(S, BBS, CI))
+                        BBS.FirstSafepointAfterFirstDef = BBS.FirstSafepoint;
                 }
+                bool HasDefBefore = false;
                 if (CI->hasStructRetAttr()) {
                     Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
-                    assert(cast<PointerType>(CI->getArgOperand(0)->getType())->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
-                    auto tracked = CountTrackedPointers(ElT);
+                    auto tracked = CountTrackedPointers(ElT, true);
                     if (tracked.count) {
-                        AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
-                        assert(SRet);
-                        {
+                        HasDefBefore = true;
+                        auto allocas_opt = FindSretAllocas((CI->arg_begin()[0])->stripInBoundsOffsets());
+                        // We know that with the right optimizations we can forward a sret directly from an argument
+                        // This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too
+                        // If they are tracked.all we can just pass through but if they have a roots bundle it's possible we need to emit some copies ¯\_(ツ)_/¯
+                        if (!allocas_opt.has_value()) {
+                            llvm_dump(&F);
+                            abort();
+                        }
+                        auto allocas = allocas_opt.value();
+                        for (AllocaInst *SRet : allocas) {
                             if (!(SRet->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
                                 assert(!tracked.derived);
                                 if (tracked.all) {
@@ -1544,35 +1267,24 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                                 }
                                 else {
                                     Value *arg1 = (CI->arg_begin()[1])->stripInBoundsOffsets();
-                                    AllocaInst *SRet_gc = nullptr;
-                                    if (PHINode *Phi = dyn_cast<PHINode>(arg1)) {
-                                        for (Value *V : Phi->incoming_values()) {
-                                            if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
-                                                if (SRet_gc == nullptr) {
-                                                    SRet_gc = Alloca;
-                                                } else if (SRet_gc == Alloca) {
-                                                    continue;
-                                                } else {
-                                                    llvm_dump(Alloca);
-                                                    llvm_dump(SRet_gc);
-                                                    assert(false && "Allocas in Phi node should match");
-                                                }
-                                            } else {
-                                                llvm_dump(V->stripInBoundsOffsets());
-                                                assert(false && "Expected alloca");
-                                            }
-                                        }
-                                    } else {
-                                        SRet_gc = dyn_cast<AllocaInst>(arg1);
+                                    auto gc_allocas_opt = FindSretAllocas(arg1);
+                                    if (!gc_allocas_opt.has_value()) {
+                                        llvm_dump(&F);
+                                        abort();
                                     }
-                                    if (!SRet_gc) {
+                                    auto gc_allocas = gc_allocas_opt.value();
+                                    if (gc_allocas.size() == 0) {
                                         llvm_dump(CI);
-                                        llvm_dump(arg1);
-                                        assert(false && "Expected alloca");
+                                        errs() << "Expected one Alloca at least\n";
+                                        abort();
                                     }
-                                    Type *ElT = SRet_gc->getAllocatedType();
-                                    if (!(SRet_gc->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
-                                        S.ArrayAllocas[SRet_gc] = tracked.count * cast<ConstantInt>(SRet_gc->getArraySize())->getZExtValue();
+                                    else {
+                                        for (AllocaInst* SRet_gc : gc_allocas) {
+                                            Type *ElT = SRet_gc->getAllocatedType();
+                                            if (!(SRet_gc->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
+                                                S.ArrayAllocas[SRet_gc] = tracked.count * cast<ConstantInt>(SRet_gc->getArraySize())->getZExtValue();
+                                            }
+                                        }
                                     }
                                 }
                             }
@@ -1580,58 +1292,56 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     }
                 }
                 NoteOperandUses(S, BBS, I);
-                if (CI->canReturnTwice()) {
-                    S.ReturnsTwice.push_back(CI);
-                }
-                if (callee) {
-                    if (callee == gc_preserve_begin_func) {
-                        std::vector<int> args;
-                        for (Use &U : CI->args()) {
-                            Value *V = U;
-                            if (isa<Constant>(V))
-                                continue;
-                            if (isa<PointerType>(V->getType())) {
-                                if (isSpecialPtr(V->getType())) {
-                                    int Num = Number(S, V);
-                                    if (Num >= 0)
+                if (!CI->canReturnTwice()) {
+                    if (callee) {
+                        if (callee == gc_preserve_begin_func) {
+                            SmallVector<int, 0> args;
+                            for (Use &U : CI->args()) {
+                                Value *V = U;
+                                if (isa<Constant>(V))
+                                    continue;
+                                if (isa<PointerType>(V->getType())) {
+                                    if (isSpecialPtr(V->getType())) {
+                                        int Num = Number(S, V);
+                                        if (Num >= 0)
+                                            args.push_back(Num);
+                                    }
+                                } else {
+                                    SmallVector<int, 0> Nums = NumberAll(S, V);
+                                    for (int Num : Nums) {
+                                        if (Num < 0)
+                                            continue;
                                         args.push_back(Num);
-                                }
-                            } else {
-                                std::vector<int> Nums = NumberAll(S, V);
-                                for (int Num : Nums) {
-                                    if (Num < 0)
-                                        continue;
-                                    args.push_back(Num);
+                                    }
                                 }
                             }
+                            S.GCPreserves[CI] = args;
+                            continue;
+                        }
+                        // Known functions emitted in codegen that are not safepoints
+                        if (callee == pointer_from_objref_func || callee == gc_preserve_begin_func ||
+                            callee == gc_preserve_end_func || callee == typeof_func ||
+                            callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
+                            callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
+                            callee->getName() == XSTR(jl_lock_field) || callee->getName() == XSTR(jl_unlock_field) ||
+                            callee == write_barrier_func || callee == gc_loaded_func || callee == pop_handler_noexcept_func ||
+                            callee->getName() == "memcmp") {
+                            continue;
+                        }
+                        if (callee->getMemoryEffects().onlyReadsMemory() ||
+                            callee->getMemoryEffects().onlyAccessesArgPointees()) {
+                            continue;
                         }
-                        S.GCPreserves[CI] = args;
-                        continue;
                     }
-                    // Known functions emitted in codegen that are not safepoints
-                    if (callee == pointer_from_objref_func || callee == gc_preserve_begin_func ||
-                        callee == gc_preserve_end_func || callee == typeof_func ||
-                        callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
-                        callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
-                        callee == write_barrier_func ||
-                        callee->getName() == "memcmp") {
+                    if (isa<IntrinsicInst>(CI))
+                        // Intrinsics are never safepoints.
                         continue;
-                    }
-                    if (callee->hasFnAttribute(Attribute::ReadNone) ||
-                        callee->hasFnAttribute(Attribute::ReadOnly) ||
-                        callee->hasFnAttribute(Attribute::ArgMemOnly)) {
+                    auto effects = CI->getMemoryEffects();
+                    if (effects.onlyAccessesArgPointees() || effects.onlyReadsMemory())
+                        // Readonly functions and functions that cannot change GC state (which is inaccessiblemem) are not safepoints
                         continue;
-                    }
-                    if (MemTransferInst *MI = dyn_cast<MemTransferInst>(CI)) {
-                        MaybeTrackDst(S, MI);
-                    }
-                }
-                if (isa<IntrinsicInst>(CI) || CI->hasFnAttr(Attribute::ArgMemOnly) ||
-                    CI->hasFnAttr(Attribute::ReadNone) || CI->hasFnAttr(Attribute::ReadOnly)) {
-                    // Intrinsics are never safepoints.
-                    continue;
                 }
-                std::vector<int> CalleeRoots;
+                SmallVector<int, 0> CalleeRoots;
                 for (Use &U : CI->args()) {
                     // Find all callee rooted arguments.
                     // Record them instead of simply remove them from live values here
@@ -1649,11 +1359,16 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                         continue;
                     CalleeRoots.push_back(Num);
                 }
-                int SafepointNumber = NoteSafepoint(S, BBS, CI, std::move(CalleeRoots));
-                BBS.HasSafepoint = true;
-                BBS.TopmostSafepoint = SafepointNumber;
-                BBS.Safepoints.push_back(SafepointNumber);
-            } else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+                int SafepointNumber = NoteSafepoint(S, BBS, CI, CalleeRoots);
+                if (CI->canReturnTwice()) {
+                    S.ReturnsTwice.push_back(SafepointNumber);
+                    HasDefBefore = true;
+                }
+                if (HasDefBefore) // With sret, the Def happens before the instruction instead of after
+                    BBS.FirstSafepointAfterFirstDef = SafepointNumber;
+                continue;
+            }
+            if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
                 // If this is a load from an immutable, we know that
                 // this object will always be rooted as long as the
                 // object we're loading from is, so we can refine uses
@@ -1661,14 +1376,10 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                 // from.
                 SmallVector<int, 1> RefinedPtr{};
                 Type *Ty = LI->getType()->getScalarType();
+                bool refined_globally = false;
                 bool task_local = false;
                 if (isLoadFromImmut(LI) && isSpecialPtr(LI->getPointerOperand()->getType())) {
                     RefinedPtr.push_back(Number(S, LI->getPointerOperand()));
-                } else if (LI->getType()->isPointerTy() &&
-                        isSpecialPtr(Ty) &&
-                        LooksLikeFrameRef(LI->getPointerOperand())) {
-                    // Loads from a jlcall argument array
-                    RefinedPtr.push_back(-1);
                 }
                 else if (isLoadFromConstGV(LI, task_local)) {
                     // If this is a const load from a global,
@@ -1676,22 +1387,26 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     // If this is a task local constant, we don't need to root it within the
                     // task but we do need to issue write barriers for when the current task dies.
                     RefinedPtr.push_back(task_local ? -1 : -2);
+                    refined_globally = true;
                 }
-                if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
-                    MaybeNoteDef(S, BBS, LI, BBS.Safepoints, std::move(RefinedPtr));
-                }
+                if (!hasLoadedTy(Ty))
+                    if (MaybeNoteDef(S, BBS, LI, std::move(RefinedPtr)))
+                        if (!refined_globally)
+                            BBS.FirstSafepointAfterFirstDef = BBS.FirstSafepoint;
                 NoteOperandUses(S, BBS, I);
             } else if (auto *LI = dyn_cast<AtomicCmpXchgInst>(&I)) {
                 Type *Ty = LI->getNewValOperand()->getType()->getScalarType();
                 if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
-                    MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
+                    if (MaybeNoteDef(S, BBS, LI))
+                        BBS.FirstSafepointAfterFirstDef = BBS.FirstSafepoint;
                 }
                 NoteOperandUses(S, BBS, I);
                 // TODO: do we need MaybeTrackStore(S, LI);
             } else if (auto *LI = dyn_cast<AtomicRMWInst>(&I)) {
                 Type *Ty = LI->getType()->getScalarType();
                 if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
-                    MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
+                    if (MaybeNoteDef(S, BBS, LI))
+                        BBS.FirstSafepointAfterFirstDef = BBS.FirstSafepoint;
                 }
                 NoteOperandUses(S, BBS, I);
                 // TODO: do we need MaybeTrackStore(S, LI);
@@ -1707,7 +1422,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                             Number(S, SI->getFalseValue())
                         };
                     }
-                    MaybeNoteDef(S, BBS, SI, BBS.Safepoints, std::move(RefinedPtr));
+                    MaybeNoteDef(S, BBS, SI, std::move(RefinedPtr));
                     NoteOperandUses(S, BBS, I);
                 } else if (tracked.count) {
                     // We need to insert extra selects for the GC roots
@@ -1721,18 +1436,18 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     if (isa<PointerType>(Phi->getType()))
                         // TODO: Vector refinements
                         PHIRefinements = GetPHIRefinements(Phi, S);
-                    MaybeNoteDef(S, BBS, Phi, BBS.Safepoints, std::move(PHIRefinements));
+                    MaybeNoteDef(S, BBS, Phi, std::move(PHIRefinements));
                     if (isa<PointerType>(Phi->getType())) {
                         PHINumbers.push_back(Number(S, Phi));
                     } else {
-                        std::vector<int> Nums = NumberAll(S, Phi);
+                        SmallVector<int, 0> Nums = NumberAll(S, Phi);
                         for (int Num : Nums)
                             PHINumbers.push_back(Num);
                     }
                     unsigned nIncoming = Phi->getNumIncomingValues();
                     for (unsigned i = 0; i < nIncoming; ++i) {
                         BBState &IncomingBBS = S.BBStates[Phi->getIncomingBlock(i)];
-                        NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts);
+                        NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts, F);
                     }
                 } else if (tracked.count) {
                     // We need to insert extra phis for the GC roots
@@ -1753,27 +1468,28 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                             RefinedPtr.push_back(task_local ? -1 : -2);
                         }
                     }
-                    MaybeNoteDef(S, BBS, ASCI, BBS.Safepoints, std::move(RefinedPtr));
+                    MaybeNoteDef(S, BBS, ASCI, std::move(RefinedPtr));
                 }
             } else if (auto *AI = dyn_cast<AllocaInst>(&I)) {
                 Type *ElT = AI->getAllocatedType();
                 if (AI->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked) {
-                    S.Allocas.push_back(AI);
+                    S.ArrayAllocas[AI] = cast<ConstantInt>(AI->getArraySize())->getZExtValue();
                 }
             }
         }
         // Pre-seed the dataflow variables;
         BBS.LiveIn = BBS.UpExposedUses;
-        BBS.Done = true;
     }
     FixUpRefinements(PHINumbers, S);
     return S;
 }
 
+
+
 static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
     Type *T_int32 = Type::getInt32Ty(V->getContext());
     if (isptr) {
-        std::vector<Value*> IdxList{Idxs.size() + 1};
+        SmallVector<Value*, 0> IdxList{Idxs.size() + 1};
         IdxList[0] = ConstantInt::get(T_int32, 0);
         for (unsigned j = 0; j < Idxs.size(); ++j) {
             IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]);
@@ -1792,11 +1508,13 @@ static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned>
         auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1);
         Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
         bool IsVector = isa<VectorType>(FinalT);
+        IRBuilder<InstSimplifyFolder> foldbuilder(irbuilder.getContext(), InstSimplifyFolder(irbuilder.GetInsertBlock()->getModule()->getDataLayout()));
+        foldbuilder.restoreIP(irbuilder.saveIP());
+        foldbuilder.SetCurrentDebugLocation(irbuilder.getCurrentDebugLocation());
         if (Idxs.size() > IsVector)
-            V = irbuilder.Insert(ExtractValueInst::Create(V, IsVector ? IdxsNotVec : Idxs));
+            V = foldbuilder.CreateExtractValue(V, IsVector ? IdxsNotVec : Idxs);
         if (IsVector)
-            V = irbuilder.Insert(ExtractElementInst::Create(V,
-                    ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back())));
+            V = foldbuilder.CreateExtractElement(V, ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back()));
     }
     return V;
 }
@@ -1813,9 +1531,9 @@ static unsigned getFieldOffset(const DataLayout &DL, Type *STy, ArrayRef<unsigne
     return (unsigned)offset;
 }
 
-std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
+SmallVector<Value*, 0> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
     auto Tracked = TrackCompositeType(STy);
-    std::vector<Value*> Ptrs;
+    SmallVector<Value*, 0> Ptrs;
     unsigned perm_idx = 0;
     auto ignore_field = [&] (ArrayRef<unsigned> Idxs) {
         if (perm_idx >= perm_offsets.size())
@@ -1837,57 +1555,27 @@ std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBu
         return false;
     };
     for (unsigned i = 0; i < Tracked.size(); ++i) {
-        auto Idxs = makeArrayRef(Tracked[i]);
+        auto Idxs = ArrayRef<unsigned>(Tracked[i]);
         if (ignore_field(Idxs))
             continue;
         Value *Elem = ExtractScalar(Src, STy, isptr, Idxs, irbuilder);
-        Ptrs.push_back(Elem);
+        if (isTrackedValue(Elem)) // ignore addrspace Loaded when it appears
+            Ptrs.push_back(Elem);
     }
     return Ptrs;
 }
 
-unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, Type *DTy, IRBuilder<> &irbuilder) {
-    auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
-    for (unsigned i = 0; i < Ptrs.size(); ++i) {
-        Value *Elem = Ptrs[i];// Dst has type `[n x {}*]*`
-        Value *Slot = irbuilder.CreateConstInBoundsGEP2_32(DTy, Dst, 0, i);
-        assert(cast<PointerType>(Dst->getType())->isOpaqueOrPointeeTypeMatches(DTy));
-        StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
-        shadowStore->setOrdering(AtomicOrdering::NotAtomic);
-        // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
-    }
-    return Ptrs.size();
-}
-
-
-// turn a memcpy into a set of loads
-void LateLowerGCFrame::MaybeTrackDst(State &S, MemTransferInst *MI) {
-    //Value *Dst = MI->getRawDest()->stripInBoundsOffsets();
-    //if (AllocaInst *AI = dyn_cast<AllocaInst>(Dst)) {
-    //    Type *STy = AI->getAllocatedType();
-    //    if (!AI->isStaticAlloca() || (isa<PointerType>(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI))
-    //        return; // already numbered this
-    //    auto tracked = CountTrackedPointers(STy);
-    //    unsigned nroots = tracked.count * cast<ConstantInt>(AI->getArraySize())->getZExtValue();
-    //    if (nroots) {
-    //        assert(!tracked.derived);
-    //        if (!tracked.all) {
-    //            // materialize shadow LoadInst and StoreInst ops to make a copy of just the tracked values inside
-    //            //assert(MI->getLength() == DL.getTypeAllocSize(AI->getAllocatedType()) && !AI->isArrayAllocation()); // XXX: handle partial copy
-    //            Value *Src = MI->getSource();
-    //            Src = new BitCastInst(Src, STy->getPointerTo(MI->getSourceAddressSpace()), "", MI);
-    //            auto &Shadow = S.ShadowAllocas[AI];
-    //            if (!Shadow)
-    //                Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, nroots), 0, "", MI);
-    //            AI = Shadow;
-    //            unsigned count = TrackWithShadow(Src, STy, true, AI, IRBuilder<>(MI));
-    //            assert(count == tracked.count); (void)count;
-    //        }
-    //        S.ArrayAllocas[AI] = nroots;
-    //    }
-    //}
-    //// TODO: else???
-}
+//static unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) {
+//    auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
+//    for (unsigned i = 0; i < Ptrs.size(); ++i) {
+//        Value *Elem = Ptrs[i];
+//        Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(irbuilder.getInt8Ty(), Dst, i * sizeof(void*));
+//        StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
+//        shadowStore->setOrdering(AtomicOrdering::NotAtomic);
+//        // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
+//    }
+//    return Ptrs.size();
+//}
 
 void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) {
     Value *PtrBase = I->getPointerOperand()->stripInBoundsOffsets();
@@ -1962,10 +1650,11 @@ void LateLowerGCFrame::ComputeLiveness(State &S) {
 
 // For debugging
 JL_USED_FUNC static void dumpSafepointsForBBName(Function &F, State &S, const char *BBName) {
-    for (auto it : S.SafepointNumbering) {
-        if (it.first->getParent()->getName() == BBName) {
-            dbgs() << "Live at " << *it.first << "\n";
-            LargeSparseBitVector &LS = S.LiveSets[it.second];
+    for (Instruction *&it : S.SafepointNumbering) {
+        if (it->getParent()->getName() == BBName) {
+            int idx = &it - S.SafepointNumbering.begin();
+            dbgs() << "Live at " << idx << "\n";
+            LargeSparseBitVector &LS = S.LiveSets[idx];
             for (auto Idx : LS) {
                 dbgs() << "\t";
                 S.ReversePtrNumbering[Idx]->printAsOperand(dbgs());
@@ -2004,7 +1693,7 @@ static bool IsIndirectlyRooted(const State &S, LargeSparseBitVector &Visited, La
     return rooted;
 }
 
-void LateLowerGCFrame::RefineLiveSet(LargeSparseBitVector &LS, State &S, const std::vector<int> &CalleeRoots)
+void LateLowerGCFrame::RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots)
 {
     // It is possible that a value is not directly rooted by the refinements in the live set, but rather
     // indirectly by following the edges of the refinement graph to all the values that root it.
@@ -2044,9 +1733,8 @@ void LateLowerGCFrame::RefineLiveSet(LargeSparseBitVector &LS, State &S, const s
 void LateLowerGCFrame::ComputeLiveSets(State &S) {
     // Iterate over all safe points. Add to live sets all those variables that
     // are now live across their parent block.
-    for (auto it : S.SafepointNumbering) {
-        int idx = it.second;
-        Instruction *Safepoint = it.first;
+    for (Instruction *&Safepoint : S.SafepointNumbering) {
+        int idx = &Safepoint - S.SafepointNumbering.begin();
         BasicBlock *BB = Safepoint->getParent();
         BBState &BBS = S.BBStates[BB];
         LargeSparseBitVector LiveAcross = BBS.LiveIn;
@@ -2086,8 +1774,9 @@ void LateLowerGCFrame::ComputeLiveSets(State &S) {
     }
     // Compute the interference graph
     S.Neighbors.resize(S.MaxPtrNumber+1);
-    for (auto it : S.SafepointNumbering) {
-        const LargeSparseBitVector &LS = S.LiveSets[it.second];
+    for (Instruction *&Safepoint : S.SafepointNumbering) {
+        int idx = &Safepoint - S.SafepointNumbering.begin();
+        const LargeSparseBitVector &LS = S.LiveSets[idx];
         for (int idx : LS) {
             S.Neighbors[idx] |= LS;
         }
@@ -2104,12 +1793,12 @@ struct PEOIterator {
         unsigned weight;
         unsigned pos;
     };
-    std::vector<Element> Elements;
-    std::vector<std::vector<int>> Levels;
-    const std::vector<LargeSparseBitVector> &Neighbors;
-    PEOIterator(const std::vector<LargeSparseBitVector> &Neighbors) : Neighbors(Neighbors) {
+    SmallVector<Element, 0> Elements;
+    SmallVector<SmallVector<int, 0>> Levels;
+    const SmallVector<LargeSparseBitVector, 0> &Neighbors;
+    PEOIterator(const SmallVector<LargeSparseBitVector, 0> &Neighbors) : Neighbors(Neighbors) {
         // Initialize State
-        std::vector<int> FirstLevel;
+        SmallVector<int, 0> FirstLevel;
         for (unsigned i = 0; i < Neighbors.size(); ++i) {
             FirstLevel.push_back(i);
             Element E{0, i};
@@ -2121,7 +1810,7 @@ struct PEOIterator {
         // Find the element in the highest bucket
         int NextElement = -1;
         while (NextElement == -1 && !Levels.empty()) {
-            std::vector<int> &LastLevel = Levels.back();
+            SmallVector<int, 0> &LastLevel = Levels.back();
             while (NextElement == -1 && !LastLevel.empty()) {
                 NextElement = LastLevel.back();
                 LastLevel.pop_back();
@@ -2131,7 +1820,7 @@ struct PEOIterator {
         }
         if (NextElement == -1)
             return NextElement;
-        // Make sure not to try to re-use this later.
+        // Make sure not to try to reuse this later.
         Elements[NextElement].weight = (unsigned)-1;
         // Raise neighbors
         for (int Neighbor : Neighbors[NextElement]) {
@@ -2146,7 +1835,7 @@ struct PEOIterator {
             // Raise the neighbor to the next level.
             NElement.weight += 1;
             if (NElement.weight >= Levels.size())
-                Levels.push_back(std::vector<int>{});
+                Levels.push_back(SmallVector<int, 0>{});
             Levels[NElement.weight].push_back(Neighbor);
             NElement.pos = Levels[NElement.weight].size()-1;
         }
@@ -2156,7 +1845,7 @@ struct PEOIterator {
     }
 };
 
-JL_USED_FUNC static void dumpColorAssignments(const State &S, std::vector<int> &Colors)
+JL_USED_FUNC static void dumpColorAssignments(const State &S, const ArrayRef<int> &Colors)
 {
     for (unsigned i = 0; i < Colors.size(); ++i) {
         if (Colors[i] == -1)
@@ -2167,15 +1856,14 @@ JL_USED_FUNC static void dumpColorAssignments(const State &S, std::vector<int> &
     }
 }
 
-std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
-    std::vector<int> Colors;
+std::pair<SmallVector<int, 0>, int> LateLowerGCFrame::ColorRoots(const State &S) {
+    SmallVector<int, 0> Colors;
     Colors.resize(S.MaxPtrNumber + 1, -1);
     PEOIterator Ordering(S.Neighbors);
     int PreAssignedColors = 0;
     /* First assign permanent slots to things that need them due
        to returns_twice */
-    for (auto it : S.ReturnsTwice) {
-        int Num = S.SafepointNumbering.at(it);
+    for (int Num : S.ReturnsTwice) {
         const LargeSparseBitVector &LS = S.LiveSets[Num];
         for (int Idx : LS) {
             if (Colors[Idx] == -1)
@@ -2209,24 +1897,25 @@ std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
         NewColor += PreAssignedColors;
         Colors[ActiveElement] = NewColor;
     }
-    return Colors;
+    return {Colors, PreAssignedColors};
 }
 
 // Size of T is assumed to be `sizeof(void*)`
 Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V)
 {
     assert(T == T_size || isa<PointerType>(T));
-    auto TV = cast<PointerType>(V->getType());
-    auto cast = builder.CreateBitCast(V, T->getPointerTo(TV->getAddressSpace()));
-    return builder.CreateInBoundsGEP(T, cast, ConstantInt::get(T_size, -1));
+    return builder.CreateInBoundsGEP(T, V, ConstantInt::get(T_size, -1), V->getName() + ".tag_addr");
 }
 
 Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V)
 {
     auto addr = EmitTagPtr(builder, T_size, T_size, V);
     auto &M = *builder.GetInsertBlock()->getModule();
-    LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0));
+    LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0), V->getName() + ".tag");
     load->setOrdering(AtomicOrdering::Unordered);
+    // Mark as volatile to prevent optimizers from treating GC tag loads as constants
+    // since GC mark bits can change during runtime (issue #59547)
+    load->setVolatile(true);
     load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
     MDBuilder MDB(load->getContext());
     auto *NullInt = ConstantInt::get(T_size, 0);
@@ -2238,11 +1927,6 @@ Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *
     return load;
 }
 
-// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
-// constant store loop to produce a `memset_pattern16` with a global variable
-// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
-// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
-// for NI pointers.
 static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
 {
     if (!S)
@@ -2281,6 +1965,50 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
     return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag);
 }
 
+void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
+    auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
+    for (auto CI : WriteBarriers) {
+        auto parent = CI->getArgOperand(0);
+        if (std::all_of(CI->op_begin() + 1, CI->op_end(),
+                    [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
+            CI->eraseFromParent();
+            continue;
+        }
+        if (CFGModified) {
+            *CFGModified = true;
+        }
+
+        IRBuilder<> builder(CI);
+        builder.SetCurrentDebugLocation(CI->getDebugLoc());
+        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
+        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
+        auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
+        builder.SetInsertPoint(mayTrigTerm);
+        mayTrigTerm->getParent()->setName("may_trigger_wb");
+        Value *anyChldNotMarked = NULL;
+        for (unsigned i = 1; i < CI->arg_size(); i++) {
+            Value *child = CI->getArgOperand(i);
+            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
+            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
+            anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
+        }
+        assert(anyChldNotMarked); // handled by all_of test above
+        MDBuilder MDB(parent->getContext());
+        SmallVector<uint32_t, 2> Weights{1, 9};
+        auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
+                                                  MDB.createBranchWeights(Weights));
+        trigTerm->getParent()->setName("trigger_wb");
+        builder.SetInsertPoint(trigTerm);
+        if (CI->getCalledOperand() == write_barrier_func) {
+            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
+        }
+        else {
+            assert(false);
+        }
+        CI->eraseFromParent();
+    }
+}
+
 bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     auto T_int32 = Type::getInt32Ty(F.getContext());
     auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
@@ -2294,21 +2022,28 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     AllocaInst *Frame = nullptr;
     unsigned allocaAddressSpace = F.getParent()->getDataLayout().getAllocaAddrSpace();
     if (T_prjlvalue) {
-        T_pprjlvalue = T_prjlvalue->getPointerTo();
-        Frame = new AllocaInst(T_prjlvalue, allocaAddressSpace,
-            ConstantInt::get(T_int32, maxframeargs), "", StartOff);
+        T_pprjlvalue = PointerType::getUnqual(T_prjlvalue->getContext());
+        Frame = new AllocaInst(T_prjlvalue, allocaAddressSpace,ConstantInt::get(T_int32, maxframeargs), "jlcallframe",
+#if JL_LLVM_VERSION >= 200000
+            StartOff->getIterator()
+#else
+            StartOff
+#endif
+        );
     }
-    std::vector<CallInst*> write_barriers;
+    SmallVector<CallInst*, 0> write_barriers;
     for (BasicBlock &BB : F) {
         for (auto it = BB.begin(); it != BB.end();) {
             Instruction *I = &*it;
             // strip all constant alias information, as it might depend on the gc having
             // preserved a gc root, which stops being true after this pass (#32215)
             // similar to RewriteStatepointsForGC::stripNonValidData, but less aggressive
-            if (I->getMetadata(LLVMContext::MD_invariant_load))
-                I->setMetadata(LLVMContext::MD_invariant_load, NULL);
+            if (auto *LI = dyn_cast<LoadInst>(I)){
+                if (isSpecialPtr(LI->getPointerOperand()->getType()) && LI->getMetadata(LLVMContext::MD_invariant_load))
+                    LI->setMetadata(LLVMContext::MD_invariant_load, NULL);
+            }
             if (MDNode *TBAA = I->getMetadata(LLVMContext::MD_tbaa)) {
-                if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const"})) {
+                if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const", "jtbaa_memoryptr", "jtbaa_memorylen", "tbaa_memoryown"})) {
                     MDNode *MutableTBAA = createMutableTBAAAccessTag(TBAA);
                     if (MutableTBAA != TBAA)
                         I->setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
@@ -2337,7 +2072,21 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 /* No replacement */
             } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) {
                 auto *obj = CI->getOperand(0);
-                auto *ASCI = new AddrSpaceCastInst(obj, JuliaType::get_pjlvalue_ty(obj->getContext()), "", CI);
+#if JL_LLVM_VERSION >= 200000
+                auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI->getIterator());
+#else
+                auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI);
+#endif
+                ASCI->takeName(CI);
+                CI->replaceAllUsesWith(ASCI);
+                UpdatePtrNumbering(CI, ASCI, S);
+            } else if (gc_loaded_func != nullptr && callee == gc_loaded_func) {
+                auto *obj = CI->getOperand(1);
+#if JL_LLVM_VERSION >= 200000
+                auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI->getIterator());
+#else
+                auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI);
+#endif
                 ASCI->takeName(CI);
                 CI->replaceAllUsesWith(ASCI);
                 UpdatePtrNumbering(CI, ASCI, S);
@@ -2348,22 +2097,6 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 IRBuilder<> builder(CI);
                 builder.SetCurrentDebugLocation(CI->getDebugLoc());
 
-                // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
-                // `julia.gc_alloc_obj` except it doesn't set the tag.
-                auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
-                auto ptlsLoad = get_current_ptls_from_task(builder, T_size, CI->getArgOperand(0), tbaa_gcframe);
-                auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
-                auto newI = builder.CreateCall(
-                    allocBytesIntrinsic,
-                    {
-                        ptls,
-                        builder.CreateIntCast(
-                            CI->getArgOperand(1),
-                            allocBytesIntrinsic->getFunctionType()->getParamType(1),
-                            false)
-                    });
-                newI->takeName(CI);
-
                 // LLVM alignment/bit check is not happy about addrspacecast and refuse
                 // to remove write barrier because of it.
                 // We pretty much only load using `T_size` so try our best to strip
@@ -2385,8 +2118,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     if (isLoadFromConstGV(LI, task_local) && getLoadValueAlign(LI) < 16) {
                         Type *T_int64 = Type::getInt64Ty(LI->getContext());
                         auto op = ConstantAsMetadata::get(ConstantInt::get(T_int64, 16));
-                        LI->setMetadata(LLVMContext::MD_align,
-                                        MDNode::get(LI->getContext(), { op }));
+                        LI->setMetadata(LLVMContext::MD_align, MDNode::get(LI->getContext(), { op }));
                     }
                 }
                 // As a last resort, if we didn't manage to strip down the tag
@@ -2402,7 +2134,35 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                         builder.CreateAlignmentAssumption(DL, tag, 16);
                     }
                 }
-                // Set the tag.
+
+                // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
+                // `julia.gc_alloc_obj` except it specializes the call based on the constant
+                // size of the object to allocate, to save one indirection, and doesn't set
+                // the type tag. (Note that if the size is not a constant, it will call
+                // gc_alloc_obj, and will redundantly set the tag.)
+                auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
+                auto ptls = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
+                auto newI = builder.CreateCall(
+                    allocBytesIntrinsic,
+                    {
+                        ptls,
+                        builder.CreateIntCast(
+                            CI->getArgOperand(1),
+                            allocBytesIntrinsic->getFunctionType()->getParamType(1),
+                            false),
+                        builder.CreatePtrToInt(tag, T_size),
+                    });
+                newI->setAttributes(allocBytesIntrinsic->getAttributes());
+                newI->addDereferenceableRetAttr(CI->getRetDereferenceableBytes());
+                newI->takeName(CI);
+                // Now, finally, set the tag. We do this in IR instead of in the C alloc
+                // function, to provide possible optimization opportunities. (I think? TBH
+                // the most recent editor of this code is not entirely clear on why we
+                // prefer to set the tag in the generated code. Providing optimization
+                // opportunities is the most likely reason; the tradeoff is slightly
+                // larger code size and increased compilation time, compiling this
+                // instruction at every allocation site, rather than once in the C alloc
+                // function.)
                 auto &M = *builder.GetInsertBlock()->getModule();
                 StoreInst *store = builder.CreateAlignedStore(
                     tag, EmitTagPtr(builder, tag_type, T_size, newI), M.getDataLayout().getPointerABIAlignment(0));
@@ -2435,14 +2195,15 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 ++it;
                 continue;
             } else if ((call_func && callee == call_func) ||
-                       (call2_func && callee == call2_func)) {
+                       (call2_func && callee == call2_func) ||
+                       (call3_func && callee == call3_func)) {
                 assert(T_prjlvalue);
                 size_t nargs = CI->arg_size();
                 size_t nframeargs = nargs-1;
-                if (callee == call_func)
-                    nframeargs -= 1;
-                else if (callee == call2_func)
+                if (callee == call2_func)
                     nframeargs -= 2;
+                else
+                    nframeargs -= 1;
                 SmallVector<Value*, 4> ReplacementArgs;
                 auto arg_it = CI->arg_begin();
                 assert(arg_it != CI->arg_end());
@@ -2461,13 +2222,13 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     // the julia.call signature is varargs, the optimizer is allowed
                     // to rewrite pointee types. It'll go away with opaque pointer
                     // types anyway.
-                    Builder.CreateAlignedStore(Builder.CreateBitCast(*arg_it, T_prjlvalue),
+                    Builder.CreateAlignedStore(*arg_it,
                             Builder.CreateInBoundsGEP(T_prjlvalue, Frame, ConstantInt::get(T_int32, slot++)),
                             Align(sizeof(void*)));
                 }
                 ReplacementArgs.push_back(nframeargs == 0 ?
                     (llvm::Value*)ConstantPointerNull::get(T_pprjlvalue) :
-                    (allocaAddressSpace ? Builder.CreateAddrSpaceCast(Frame, T_prjlvalue->getPointerTo(0)) : Frame));
+                    Builder.CreateAddrSpaceCast(Frame, PointerType::getUnqual(T_prjlvalue->getContext())));
                 ReplacementArgs.push_back(ConstantInt::get(T_int32, nframeargs));
                 if (callee == call2_func) {
                     // move trailing arg to the end now
@@ -2475,8 +2236,14 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     ReplacementArgs.erase(ReplacementArgs.begin());
                     ReplacementArgs.push_back(front);
                 }
-                FunctionType *FTy = callee == call2_func ? JuliaType::get_jlfunc2_ty(CI->getContext()) : JuliaType::get_jlfunc_ty(CI->getContext());
+                FunctionType *FTy = callee == call3_func ? JuliaType::get_jlfunc3_ty(CI->getContext()) :
+                                    callee == call2_func ? JuliaType::get_jlfunc2_ty(CI->getContext()) :
+                                                           JuliaType::get_jlfunc_ty(CI->getContext());
+#if JL_LLVM_VERSION >= 200000
+                CallInst *NewCall = CallInst::Create(FTy, new_callee, ReplacementArgs, "", CI->getIterator());
+#else
                 CallInst *NewCall = CallInst::Create(FTy, new_callee, ReplacementArgs, "", CI);
+#endif
                 NewCall->setTailCallKind(CI->getTailCallKind());
                 auto callattrs = CI->getAttributes();
                 callattrs = AttributeList::get(CI->getContext(), getFnAttrs(callattrs), getRetAttrs(callattrs), {});
@@ -2487,16 +2254,52 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 NewCall->copyMetadata(*CI);
                 CI->replaceAllUsesWith(NewCall);
                 UpdatePtrNumbering(CI, NewCall, S);
-            } else if (CI->arg_size() == CI->getNumOperands()) {
-                /* No operand bundle to lower */
-                ++it;
-                continue;
             } else {
-                CallInst *NewCall = CallInst::Create(CI, None, CI);
-                NewCall->takeName(CI);
-                NewCall->copyMetadata(*CI);
-                CI->replaceAllUsesWith(NewCall);
-                UpdatePtrNumbering(CI, NewCall, S);
+                SmallVector<OperandBundleDef,2> bundles;
+                CI->getOperandBundlesAsDefs(bundles);
+                bool gc_transition = false;
+                Value *ptls = nullptr;
+                for (auto &bundle: bundles)
+                    if (bundle.getTag() == "gc-transition") {
+                        gc_transition = true;
+                        ptls = bundle.inputs()[0];
+                    }
+
+                // In theory LLVM wants us to lower this using RewriteStatepointsForGC
+                if (gc_transition) {
+                    // Insert the operations to switch to gc_safe if necessary.
+                    IRBuilder<> builder(CI);
+                    assert(ptls);
+                    // We dont use emit_state_set here because safepoints are unconditional for any code that reaches this
+                    // We are basically guaranteed to go from gc_unsafe to gc_safe and back, and both transitions need a safepoint
+                    // We also can't add any BBs here, so just avoiding the branches is good
+                    unsigned offset = offsetof(jl_tls_states_t, gc_state);
+                    Value *gc_state = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), ptls, offset, "gc_state");
+                    LoadInst *last_gc_state = builder.CreateAlignedLoad(Type::getInt8Ty(builder.getContext()), gc_state, Align(sizeof(void*)));
+                    last_gc_state->setOrdering(AtomicOrdering::Monotonic);
+                    builder.CreateAlignedStore(builder.getInt8(JL_GC_STATE_SAFE), gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
+                    MDNode *tbaa = get_tbaa_const(builder.getContext());
+                    emit_gc_safepoint(builder, T_size, ptls, tbaa, false);
+                    builder.SetInsertPoint(CI->getNextNode());
+                    builder.CreateAlignedStore(last_gc_state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
+                    emit_gc_safepoint(builder, T_size, ptls, tbaa, false);
+                }
+                if (CI->arg_size() == CI->getNumOperands()) {
+                    /* No operand bundle to lower */
+                    ++it;
+                    continue;
+                } else {
+                    // remove all operand bundles
+#if JL_LLVM_VERSION >= 200000
+                    CallInst *NewCall = CallInst::Create(CI, None, CI->getIterator());
+#else
+                    CallInst *NewCall = CallInst::Create(CI, None, CI);
+#endif
+                    NewCall->takeName(CI);
+                    NewCall->copyMetadata(*CI);
+                    CI->replaceAllUsesWith(NewCall);
+                    UpdatePtrNumbering(CI, NewCall, S);
+                }
             }
             if (!CI->use_empty()) {
                 CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
@@ -2506,43 +2309,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
             ChangesMade = true;
         }
     }
-    for (auto CI : write_barriers) {
-        auto parent = CI->getArgOperand(0);
-        if (std::all_of(CI->op_begin() + 1, CI->op_end(),
-                    [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
-            CI->eraseFromParent();
-            continue;
-        }
-        if (CFGModified) {
-            *CFGModified = true;
-        }
-        IRBuilder<> builder(CI);
-        builder.SetCurrentDebugLocation(CI->getDebugLoc());
-        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), 3);
-        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
-        auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
-        builder.SetInsertPoint(mayTrigTerm);
-        Value *anyChldNotMarked = NULL;
-        for (unsigned i = 1; i < CI->arg_size(); i++) {
-            Value *child = CI->getArgOperand(i);
-            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), 1);
-            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0));
-            anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
-        }
-        assert(anyChldNotMarked); // handled by all_of test above
-        MDBuilder MDB(parent->getContext());
-        SmallVector<uint32_t, 2> Weights{1, 9};
-        auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
-                                                  MDB.createBranchWeights(Weights));
-        builder.SetInsertPoint(trigTerm);
-        if (CI->getCalledOperand() == write_barrier_func) {
-            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
-        }
-        else {
-            assert(false);
-        }
-        CI->eraseFromParent();
-    }
+    CleanupWriteBarriers(F, S, write_barriers, CFGModified);
     if (maxframeargs == 0 && Frame) {
         Frame->eraseFromParent();
     }
@@ -2552,11 +2319,13 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     return ChangesMade;
 }
 
-static void AddInPredLiveOuts(BasicBlock *BB, LargeSparseBitVector &LiveIn, State &S)
+// Compute the set of all objects that are live in from all predecessors
+// TODO: reset any slots that contain values which are only live from some predecessors
+static void AddInPredecessorLiveOuts(BasicBlock *BB, LargeSparseBitVector &LiveIn, State &S)
 {
     bool First = true;
     std::set<BasicBlock *> Visited;
-    std::vector<BasicBlock *> WorkList;
+    SmallVector<BasicBlock *, 0> WorkList;
     WorkList.push_back(BB);
     while (!WorkList.empty()) {
         BB = &*WorkList.back();
@@ -2573,7 +2342,7 @@ static void AddInPredLiveOuts(BasicBlock *BB, LargeSparseBitVector &LiveIn, Stat
                 WorkList.push_back(Pred);
                 continue;
             } else {
-                int LastSP = S.BBStates[Pred].Safepoints.front();
+                int LastSP = S.BBStates[Pred].LastSafepoint;
                 if (First) {
                     LiveIn |= S.LiveSets[LastSP];
                     First = false;
@@ -2588,44 +2357,76 @@ static void AddInPredLiveOuts(BasicBlock *BB, LargeSparseBitVector &LiveIn, Stat
 }
 
 void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot,
-                                         const std::vector<int> &Colors, Value *GCFrame,
+                                         ArrayRef<int> Colors, Value *GCFrame,
                                          Instruction *InsertBefore) {
     // Get the slot address.
     auto slotAddress = CallInst::Create(
         getOrDeclare(jl_intrinsics::getGCFrameSlot),
         {GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
-        "", InsertBefore);
+#if JL_LLVM_VERSION >= 200000
+        "gc_slot_addr_" + StringRef(std::to_string(Colors[R] + MinColorRoot)), InsertBefore->getIterator());
+#else
+        "gc_slot_addr_" + StringRef(std::to_string(Colors[R] + MinColorRoot)), InsertBefore);
+#endif
 
     Value *Val = GetPtrForNumber(S, R, InsertBefore);
     // Pointee types don't have semantics, so the optimizer is
     // free to rewrite them if convenient. We need to change
     // it back here for the store.
-    if (Val->getType() != T_prjlvalue) {
-        // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-        assert(Val->getContext().supportsTypedPointers());
-        Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore);
-    }
+    assert(Val->getType() == T_prjlvalue);
+#if JL_LLVM_VERSION >= 200000
+    new StoreInst(Val, slotAddress, InsertBefore->getIterator());
+#else
+    new StoreInst(Val, slotAddress, InsertBefore);
+#endif
+}
+
+void LateLowerGCFrame::PlaceGCFrameReset(State &S, unsigned R, unsigned MinColorRoot,
+                                         ArrayRef<int> Colors, Value *GCFrame,
+                                         Instruction *InsertBefore) {
+    // Get the slot address.
+    auto slotAddress = CallInst::Create(
+        getOrDeclare(jl_intrinsics::getGCFrameSlot),
+        {GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
+#if JL_LLVM_VERSION >= 200000
+        "gc_slot_addr_" + StringRef(std::to_string(Colors[R] + MinColorRoot)), InsertBefore->getIterator());
+#else
+        "gc_slot_addr_" + StringRef(std::to_string(Colors[R] + MinColorRoot)), InsertBefore);
+#endif
+    // Reset the slot to NULL.
+    Value *Val = ConstantPointerNull::get(T_prjlvalue);
+#if JL_LLVM_VERSION >= 200000
+    new StoreInst(Val, slotAddress, InsertBefore->getIterator());
+#else
     new StoreInst(Val, slotAddress, InsertBefore);
+#endif
 }
 
 void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
-                                          const std::vector<int> &Colors, Value *GCFrame)
+                                          ArrayRef<int> Colors, int PreAssignedColors, Value *GCFrame)
 {
     for (auto &BB : *S.F) {
         const BBState &BBS = S.BBStates[&BB];
-        if (!BBS.HasSafepoint) {
+        if (!BBS.HasSafepoint)
             continue;
-        }
         LargeSparseBitVector LiveIn;
-        AddInPredLiveOuts(&BB, LiveIn, S);
+        AddInPredecessorLiveOuts(&BB, LiveIn, S);
         const LargeSparseBitVector *LastLive = &LiveIn;
-        for(auto rit = BBS.Safepoints.rbegin();
-              rit != BBS.Safepoints.rend(); ++rit ) {
-            const LargeSparseBitVector &NowLive = S.LiveSets[*rit];
+        for (int Safepoint = BBS.FirstSafepoint; Safepoint >= BBS.LastSafepoint; --Safepoint) {
+            const LargeSparseBitVector &NowLive = S.LiveSets[Safepoint];
+            // reset slots which are no longer alive
+            for (int Idx : *LastLive) {
+                if (Colors[Idx] >= PreAssignedColors && !HasBitSet(NowLive, Idx)) {
+                    PlaceGCFrameReset(S, Idx, MinColorRoot, Colors, GCFrame,
+                        S.SafepointNumbering[Safepoint]);
+                }
+            }
+            // store values which are alive in this safepoint but
+            // haven't been stored in the GC frame before
             for (int Idx : NowLive) {
                 if (!HasBitSet(*LastLive, Idx)) {
                     PlaceGCFrameStore(S, Idx, MinColorRoot, Colors, GCFrame,
-                      S.ReverseSafepointNumbering[*rit]);
+                      S.SafepointNumbering[Safepoint]);
                 }
             }
             LastLive = &NowLive;
@@ -2633,7 +2434,8 @@ void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
     }
 }
 
-void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>) {
+void LateLowerGCFrame::PlaceRootsAndUpdateCalls(ArrayRef<int> Colors, int PreAssignedColors, State &S,
+                                                std::map<Value *, std::pair<int, int>>) {
     auto F = S.F;
     auto T_int32 = Type::getInt32Ty(F->getContext());
     int MaxColor = -1;
@@ -2642,18 +2444,58 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
             MaxColor = C;
 
     // Insert instructions for the actual gc frame
-    if (MaxColor != -1 || !S.Allocas.empty() || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) {
+    if (MaxColor != -1 || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) {
         // Create and push a GC frame.
         auto gcframe = CallInst::Create(
             getOrDeclare(jl_intrinsics::newGCFrame),
             {ConstantInt::get(T_int32, 0)},
             "gcframe");
-        gcframe->insertBefore(&*F->getEntryBlock().begin());
+        gcframe->insertBefore(F->getEntryBlock().begin());
 
         auto pushGcframe = CallInst::Create(
             getOrDeclare(jl_intrinsics::pushGCFrame),
             {gcframe, ConstantInt::get(T_int32, 0)});
-        pushGcframe->insertAfter(pgcstack);
+        if (isa<Argument>(pgcstack))
+             pushGcframe->insertAfter(gcframe);
+         else
+             pushGcframe->insertAfter(cast<Instruction>(pgcstack));
+
+        // we don't run memsetopt after this, so run a basic approximation of it
+        // that removes any redundant memset calls in the prologue since getGCFrameSlot already includes the null store
+        Instruction *toerase = nullptr;
+        for (auto &I : F->getEntryBlock()) {
+            if (toerase)
+                toerase->eraseFromParent();
+            toerase = nullptr;
+            Value *ptr;
+            Value *value;
+            bool isvolatile;
+            if (auto *SI = dyn_cast<StoreInst>(&I)) {
+                ptr = SI->getPointerOperand();
+                value = SI->getValueOperand();
+                isvolatile = SI->isVolatile();
+            }
+            else if (auto *MSI = dyn_cast<MemSetInst>(&I)) {
+                ptr = MSI->getDest();
+                value = MSI->getValue();
+                isvolatile = MSI->isVolatile();
+            }
+            else {
+                continue;
+            }
+            ptr = ptr->stripInBoundsOffsets();
+            AllocaInst *AI = dyn_cast<AllocaInst>(ptr);
+            if (isa<GetElementPtrInst>(ptr))
+                break;
+            if (!S.ArrayAllocas.count(AI))
+                continue;
+            if (isvolatile || !isa<Constant>(value) || !cast<Constant>(value)->isNullValue())
+                break; // stop once we reach a pointer operation that couldn't be analyzed or isn't a null store
+            toerase = &I;
+        }
+        if (toerase)
+            toerase->eraseFromParent();
+        toerase = nullptr;
 
         // Replace Allocas
         unsigned AllocaSlot = 2; // first two words are metadata
@@ -2666,13 +2508,13 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
                 AllocaSlot = LLT_ALIGN(AllocaSlot, align);
             Instruction *slotAddress = CallInst::Create(
                 getOrDeclare(jl_intrinsics::getGCFrameSlot),
-                {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
+                {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)}, "gc_slot_addr" + StringRef(std::to_string(AllocaSlot - 2)));
             slotAddress->insertAfter(gcframe);
             slotAddress->takeName(AI);
 
             // Check for lifetime intrinsics on this alloca, we can't keep them
             // because we're changing the semantics
-            std::vector<CallInst*> ToDelete;
+            SmallVector<CallInst*, 0> ToDelete;
             RecursivelyVisit<IntrinsicInst>([&](Use &VU) {
                 IntrinsicInst *II = cast<IntrinsicInst>(VU.getUser());
                 if ((II->getIntrinsicID() != Intrinsic::lifetime_start &&
@@ -2683,23 +2525,11 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
             for (CallInst *II : ToDelete) {
                 II->eraseFromParent();
             }
-            if (slotAddress->getType() != AI->getType()) {
-                // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(slotAddress->getContext().supportsTypedPointers());
-                auto BCI  = new BitCastInst(slotAddress, AI->getType());
-                BCI->insertAfter(slotAddress);
-                slotAddress = BCI;
-            }
+            assert(slotAddress->getType() == AI->getType());
             AI->replaceAllUsesWith(slotAddress);
             AI->eraseFromParent();
             AI = NULL;
         };
-        for (AllocaInst *AI : S.Allocas) {
-            auto ns = cast<ConstantInt>(AI->getArraySize())->getZExtValue();
-            replace_alloca(AI);
-            AllocaSlot += ns;
-        }
         for (auto AI : S.ArrayAllocas) {
             replace_alloca(AI.first);
             AllocaSlot += AI.second;
@@ -2711,18 +2541,18 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
             for (unsigned i = 0; i < Store.second; ++i) {
                 auto slotAddress = CallInst::Create(
                     getOrDeclare(jl_intrinsics::getGCFrameSlot),
-                    {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
+                    {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)}, "gc_slot_addr" + StringRef(std::to_string(AllocaSlot - 2)));
                 slotAddress->insertAfter(gcframe);
                 auto ValExpr = std::make_pair(Base, isa<PointerType>(Base->getType()) ? -1 : i);
                 auto Elem = MaybeExtractScalar(S, ValExpr, SI);
-                if (Elem->getType() != T_prjlvalue) {
-                    // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                    assert(Elem->getContext().supportsTypedPointers());
-                    Elem = new BitCastInst(Elem, T_prjlvalue, "", SI);
-                }
-                //auto Idxs = makeArrayRef(Tracked[i]);
+                assert(Elem->getType() == T_prjlvalue);
+                //auto Idxs = ArrayRef<unsigned>(Tracked[i]);
                 //Value *Elem = ExtractScalar(Base, true, Idxs, SI);
+#if JL_LLVM_VERSION >= 200000
+                Value *shadowStore = new StoreInst(Elem, slotAddress, SI->getIterator());
+#else
                 Value *shadowStore = new StoreInst(Elem, slotAddress, SI);
+#endif
                 (void)shadowStore;
                 // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
                 AllocaSlot++;
@@ -2733,14 +2563,18 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
         pushGcframe->setArgOperand(1, NRoots);
 
         // Insert GC frame stores
-        PlaceGCFrameStores(S, AllocaSlot - 2, Colors, gcframe);
+        PlaceGCFrameStores(S, AllocaSlot - 2, Colors, PreAssignedColors, gcframe);
         // Insert GCFrame pops
         for (auto &BB : *F) {
             if (isa<ReturnInst>(BB.getTerminator())) {
                 auto popGcframe = CallInst::Create(
                     getOrDeclare(jl_intrinsics::popGCFrame),
                     {gcframe});
+#if JL_LLVM_VERSION >= 200000
+                popGcframe->insertBefore(BB.getTerminator()->getIterator());
+#else
                 popGcframe->insertBefore(BB.getTerminator());
+#endif
             }
         }
     }
@@ -2748,33 +2582,54 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
 
 bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
     initAll(*F.getParent());
+    smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
     LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
-    if (!pgcstack_getter && !adoptthread_func)
-        return CleanupIR(F, nullptr, CFGModified);
 
     pgcstack = getPGCstack(F);
-    if (!pgcstack)
-        return CleanupIR(F, nullptr, CFGModified);
-
-    State S = LocalScan(F);
-    ComputeLiveness(S);
-    std::vector<int> Colors = ColorRoots(S);
-    std::map<Value *, std::pair<int, int>> CallFrames; // = OptimizeCallFrames(S, Ordering);
-    PlaceRootsAndUpdateCalls(Colors, S, CallFrames);
-    CleanupIR(F, &S, CFGModified);
-    return true;
-}
+    if (pgcstack) {
+      State S = LocalScan(F);
+      // If there is no safepoint after the first reachable def, then we don't need any roots (even those for allocas)
+      if (std::any_of(S.BBStates.begin(), S.BBStates.end(),
+                  [&F](auto BBS) {
+                      if (BBS.first == &F.getEntryBlock())
+                          return BBS.second.FirstSafepointAfterFirstDef != -1;
+                      return BBS.second.HasSafepoint;
+                  })) {
+        ComputeLiveness(S);
+        auto Colors = ColorRoots(S);
+        std::map<Value *, std::pair<int, int>> CallFrames; // = OptimizeCallFrames(S, Ordering);
+        PlaceRootsAndUpdateCalls(Colors.first, Colors.second, S, CallFrames);
+      }
+      CleanupIR(F, &S, CFGModified);
+    }
+    else {
+      CleanupIR(F, nullptr, CFGModified);
+    }
 
-bool LateLowerGCFrameLegacy::runOnFunction(Function &F) {
-    auto GetDT = [this]() -> DominatorTree & {
-        return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    };
-    auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
-    bool modified = lateLowerGCFrame.runOnFunction(F);
-#ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
-#endif
-    return modified;
+    // We lower the julia.gc_alloc_bytes intrinsic in this pass to insert slowpath/fastpath blocks for MMTk
+    // For now, we do nothing for the Stock GC
+    auto GCAllocBytes = getOrNull(jl_intrinsics::GCAllocBytes);
+
+    if (GCAllocBytes) {
+        for (auto it = GCAllocBytes->user_begin(); it != GCAllocBytes->user_end(); ) {
+            if (auto *CI = dyn_cast<CallInst>(*it)) {
+                *CFGModified = true;
+
+                assert(CI->getCalledOperand() == GCAllocBytes);
+
+                auto newI = lowerGCAllocBytesLate(CI, F);
+                if (newI != CI) {
+                    ++it;
+                    CI->replaceAllUsesWith(newI);
+                    CI->eraseFromParent();
+                    continue;
+                }
+            }
+            ++it;
+        }
+    }
+
+    return true;
 }
 
 PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
@@ -2786,7 +2641,7 @@ PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
     bool CFGModified = false;
     bool modified = lateLowerGCFrame.runOnFunction(F, &CFGModified);
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
+    assert(!verifyLLVMIR(F));
 #endif
     if (modified) {
         if (CFGModified) {
@@ -2797,17 +2652,3 @@ PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
     }
     return PreservedAnalyses::all();
 }
-
-
-char LateLowerGCFrameLegacy::ID = 0;
-static RegisterPass<LateLowerGCFrameLegacy> X("LateLowerGCFrame", "Late Lower GCFrame Pass", false, false);
-
-Pass *createLateLowerGCFramePass() {
-    return new LateLowerGCFrameLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddLateLowerGCFramePass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createLateLowerGCFramePass());
-}
diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp
deleted file mode 100644
index 146c0fe701e9b..0000000000000
--- a/src/llvm-lower-handlers.cpp
+++ /dev/null
@@ -1,279 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm-c/Core.h>
-#include <llvm-c/Types.h>
-
-#include <llvm/ADT/DepthFirstIterator.h>
-#include <llvm/ADT/Statistic.h>
-#include <llvm/ADT/Triple.h>
-#include <llvm/Analysis/CFG.h>
-#include <llvm/IR/BasicBlock.h>
-#include <llvm/IR/Constants.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Instructions.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/Value.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/Transforms/Utils/BasicBlockUtils.h>
-
-#include "julia.h"
-#include "julia_assert.h"
-#include "llvm-codegen-shared.h"
-#include <map>
-
-#define DEBUG_TYPE "lower_handlers"
-#undef DEBUG
-STATISTIC(MaxExceptionHandlerDepth, "Maximum nesting of exception handlers");
-STATISTIC(ExceptionHandlerBuffers, "Number of exception handler buffers inserted");
-
-using namespace llvm;
-
-/* Lowers Julia Exception Handlers and colors EH frames.
- *
- *  Our task is to lower:
- * call void @julia.except_enter()
- * <...>
- * call void jl_pop_handler(1)
- *
- * to
- *
- * call void @jl_enter_handler(jl_handler *%buff)
- * <...>
- * call void jl_pop_handler(1)
- *
- * Where buff is an appropriate stack slot handler.
- *
- * We make the following assumptions:
- *  - All EH frames are completely nested.
- *  - The exception nestedness of a BB is not dynamic. I.e. we don't allow
- *    the following:
- *                br i1 %cond, %left, %right
- *                  /                \
- *           except.enter         br mid
- *           br mid                  |
- *                 \                 /
- *              br i1 %cond, %left2, %right2
- *                 /                 \
- *           jl_pop_hander          ret
- *           ret
- *
- *    The frontend doesn't emit structures like this. However, the optimizer
- *    could easily introduce them, so this pass should run early after IRGen.
- *
- * Because of these assumptions, the algorithm is very simple. We simply label
- * the handler depth at every basic block using a DFS search. For each enter
- * we encounter, we record the current depth and then allocate an exception
- * handler frame for every level.
- *
- * As an additional optimization, we also insert lifetime intrinsics for the
- * handler structures to tell LLVM that it is free to re-use the stack slot
- * while the handler is not being used.
- */
-
-namespace {
-/*
- * If the module doesn't have declarations for the jl_enter_handler and setjmp
- * functions, insert them.
- */
-static void ensure_enter_function(Module &M, const Triple &TT)
-{
-    auto T_int8  = Type::getInt8Ty(M.getContext());
-    auto T_pint8 = PointerType::get(T_int8, 0);
-    auto T_void = Type::getVoidTy(M.getContext());
-    auto T_int32 = Type::getInt32Ty(M.getContext());
-    if (!M.getNamedValue(XSTR(jl_enter_handler))) {
-        std::vector<Type*> ehargs(0);
-        ehargs.push_back(T_pint8);
-        Function::Create(FunctionType::get(T_void, ehargs, false),
-                         Function::ExternalLinkage, XSTR(jl_enter_handler), &M);
-    }
-    if (!M.getNamedValue(jl_setjmp_name)) {
-        std::vector<Type*> args2(0);
-        args2.push_back(T_pint8);
-        if (!TT.isOSWindows()) {
-            args2.push_back(T_int32);
-        }
-        Function::Create(FunctionType::get(T_int32, args2, false),
-                         Function::ExternalLinkage, jl_setjmp_name, &M)
-            ->addFnAttr(Attribute::ReturnsTwice);
-    }
-}
-
-static bool lowerExcHandlers(Function &F) {
-    Module &M = *F.getParent();
-    Triple TT(M.getTargetTriple());
-    Function *except_enter_func = M.getFunction("julia.except_enter");
-    if (!except_enter_func)
-        return false; // No EH frames in this module
-    ensure_enter_function(M, TT);
-    Function *leave_func = M.getFunction(XSTR(jl_pop_handler));
-    Function *jlenter_func = M.getFunction(XSTR(jl_enter_handler));
-    Function *setjmp_func = M.getFunction(jl_setjmp_name);
-
-    auto T_pint8 = Type::getInt8PtrTy(M.getContext(), 0);
-    Function *lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_pint8 });
-    Function *lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_pint8 });
-
-    /* Step 1: EH Depth Numbering */
-    std::map<llvm::CallInst *, int> EnterDepth;
-    std::map<llvm::CallInst *, int> LeaveDepth;
-    std::map<BasicBlock *, int> ExitDepth;
-    int MaxDepth = 0;
-    // Compute EH Depth at each basic block using a DFS traversal.
-    for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
-            E = df_end(&F.getEntryBlock()); I != E; ++I) {
-        auto *BB = *I;
-        int Depth = 0;
-        /* Here we use the assumption that all incoming edges have the same
-         * EH depth.
-         */
-        for (auto *Pred : predecessors(BB)) {
-            auto it = ExitDepth.find(Pred);
-            if (it != ExitDepth.end()) {
-                Depth = it->second;
-                break;
-            }
-        }
-        /* Compute the depth within the basic block */
-        for (auto &I : *BB) {
-            auto *CI = dyn_cast<CallInst>(&I);
-            if (!CI)
-                continue;
-            Function *Callee = CI->getCalledFunction();
-            if (!Callee)
-                continue;
-            if (Callee == except_enter_func)
-                EnterDepth[CI] = Depth++;
-            else if (Callee == leave_func) {
-                LeaveDepth[CI] = Depth;
-                Depth -= cast<ConstantInt>(CI->getArgOperand(0))->getLimitedValue();
-            }
-            assert(Depth >= 0);
-            if (Depth > MaxDepth)
-                MaxDepth = Depth;
-        }
-        /* Remember the depth at the BB boundary */
-        ExitDepth[BB] = Depth;
-    }
-    MaxExceptionHandlerDepth.updateMax(MaxDepth);
-    ExceptionHandlerBuffers += MaxDepth;
-
-    /* Step 2: EH Frame lowering */
-    // Allocate stack space for each handler. We allocate these as separate
-    // allocas so the optimizer can later merge and rearrange them if it wants
-    // to.
-    Value *handler_sz = ConstantInt::get(Type::getInt32Ty(F.getContext()),
-                                         sizeof(jl_handler_t));
-    Value *handler_sz64 = ConstantInt::get(Type::getInt64Ty(F.getContext()),
-                                           sizeof(jl_handler_t));
-    Instruction *firstInst = &F.getEntryBlock().front();
-    std::vector<Instruction *> buffs;
-    unsigned allocaAddressSpace = F.getParent()->getDataLayout().getAllocaAddrSpace();
-    for (int i = 0; i < MaxDepth; ++i) {
-        auto *buff = new AllocaInst(Type::getInt8Ty(F.getContext()), allocaAddressSpace,
-                handler_sz, Align(16), "", firstInst);
-        if (allocaAddressSpace) {
-            AddrSpaceCastInst *buff_casted = new AddrSpaceCastInst(buff, Type::getInt8PtrTy(F.getContext(), AddressSpace::Generic));
-            buff_casted->insertAfter(buff);
-            buffs.push_back(buff_casted);
-        } else {
-            buffs.push_back(buff);
-        }
-    }
-
-    // Lower enter funcs
-    for (auto it : EnterDepth) {
-        assert(it.second >= 0);
-        Instruction *buff = buffs[it.second];
-        CallInst *enter = it.first;
-        auto new_enter = CallInst::Create(jlenter_func, buff, "", enter);
-        Value *lifetime_args[] = {
-            handler_sz64,
-            buff
-        };
-        CallInst::Create(lifetime_start, lifetime_args, "", new_enter);
-        CallInst *sj;
-        if (!TT.isOSWindows()) {
-            // For LLVM 3.3 compatibility
-            Value *args[] = {buff,
-                            ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
-            sj = CallInst::Create(setjmp_func, args, "", enter);
-        } else {
-            sj = CallInst::Create(setjmp_func, buff, "", enter);
-        }
-        // We need to mark this on the call site as well. See issue #6757
-        sj->setCanReturnTwice();
-        if (auto dbg = enter->getMetadata(LLVMContext::MD_dbg)) {
-            new_enter->setMetadata(LLVMContext::MD_dbg, dbg);
-            sj->setMetadata(LLVMContext::MD_dbg, dbg);
-        }
-        enter->replaceAllUsesWith(sj);
-        enter->eraseFromParent();
-    }
-    // Insert lifetime end intrinsics after every leave.
-    for (auto it : LeaveDepth) {
-        int StartDepth = it.second - 1;
-        int npops = cast<ConstantInt>(it.first->getArgOperand(0))->getLimitedValue();
-        for (int i = 0; i < npops; ++i) {
-            assert(StartDepth-i >= 0);
-            Value *lifetime_args[] = {
-                handler_sz64,
-                buffs[StartDepth-i]
-            };
-            auto LifetimeEnd = CallInst::Create(lifetime_end, lifetime_args);
-            LifetimeEnd->insertAfter(it.first);
-        }
-    }
-    return true;
-}
-
-} // anonymous namespace
-
-PreservedAnalyses LowerExcHandlersPass::run(Function &F, FunctionAnalysisManager &AM)
-{
-    bool modified = lowerExcHandlers(F);
-#ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
-#endif
-    if (modified) {
-        return PreservedAnalyses::allInSet<CFGAnalyses>();
-    }
-    return PreservedAnalyses::all();
-}
-
-
-struct LowerExcHandlersLegacy : public FunctionPass {
-    static char ID;
-    LowerExcHandlersLegacy() : FunctionPass(ID)
-    {}
-    bool runOnFunction(Function &F) {
-        bool modified = lowerExcHandlers(F);
-#ifdef JL_VERIFY_PASSES
-        assert(!verifyFunction(F, &errs()));
-#endif
-        return modified;
-    }
-};
-
-char LowerExcHandlersLegacy::ID = 0;
-static RegisterPass<LowerExcHandlersLegacy> X("LowerExcHandlers", "Lower Julia Exception Handlers",
-                                         false /* Only looks at CFG */,
-                                         false /* Analysis Pass */);
-
-Pass *createLowerExcHandlersPass()
-{
-    return new LowerExcHandlersLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddLowerExcHandlersPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createLowerExcHandlersPass());
-}
diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp
deleted file mode 100644
index 29c0f7e2b10d6..0000000000000
--- a/src/llvm-muladd.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm-c/Core.h>
-#include <llvm-c/Types.h>
-
-#include <llvm/ADT/Statistic.h>
-#include <llvm/Analysis/OptimizationRemarkEmitter.h>
-#include <llvm/IR/Value.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/PassManager.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Instructions.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/Operator.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-
-#include "julia.h"
-#include "julia_assert.h"
-
-#define DEBUG_TYPE "combine-muladd"
-#undef DEBUG
-
-using namespace llvm;
-STATISTIC(TotalContracted, "Total number of multiplies marked for FMA");
-
-#ifndef __clang_gcanalyzer__
-#define REMARK(remark) ORE.emit(remark)
-#else
-#define REMARK(remark) (void) 0;
-#endif
-
-/**
- * Combine
- * ```
- * %v0 = fmul ... %a, %b
- * %v = fadd contract ... %v0, %c
- * ```
- * to
- * `%v = call contract @llvm.fmuladd.<...>(... %a, ... %b, ... %c)`
- * when `%v0` has no other use
- */
-
-// Return true if we changed the mulOp
-static bool checkCombine(Value *maybeMul, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
-{
-    auto mulOp = dyn_cast<Instruction>(maybeMul);
-    if (!mulOp || mulOp->getOpcode() != Instruction::FMul)
-        return false;
-    if (!mulOp->hasOneUse()) {
-        LLVM_DEBUG(dbgs() << "mulOp has multiple uses: " << *maybeMul << "\n");
-        REMARK([&](){
-            return OptimizationRemarkMissed(DEBUG_TYPE, "Multiuse FMul", mulOp)
-                << "fmul had multiple uses " << ore::NV("fmul", mulOp);
-        });
-        return false;
-    }
-    // On 5.0+ we only need to mark the mulOp as contract and the backend will do the work for us.
-    auto fmf = mulOp->getFastMathFlags();
-    if (!fmf.allowContract()) {
-        LLVM_DEBUG(dbgs() << "Marking mulOp for FMA: " << *maybeMul << "\n");
-        REMARK([&](){
-            return OptimizationRemark(DEBUG_TYPE, "Marked for FMA", mulOp)
-                << "marked for fma " << ore::NV("fmul", mulOp);
-        });
-        ++TotalContracted;
-        fmf.setAllowContract(true);
-        mulOp->copyFastMathFlags(fmf);
-        return true;
-    }
-    return false;
-}
-
-static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT
-{
-    OptimizationRemarkEmitter ORE(&F);
-    bool modified = false;
-    for (auto &BB: F) {
-        for (auto it = BB.begin(); it != BB.end();) {
-            auto &I = *it;
-            it++;
-            switch (I.getOpcode()) {
-            case Instruction::FAdd: {
-                if (!I.hasAllowContract())
-                    continue;
-                modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE);
-                break;
-            }
-            case Instruction::FSub: {
-                if (!I.hasAllowContract())
-                    continue;
-                modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE);
-                break;
-            }
-            default:
-                break;
-            }
-        }
-    }
-#ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
-#endif
-    return modified;
-}
-
-PreservedAnalyses CombineMulAddPass::run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT
-{
-    if (combineMulAdd(F)) {
-        return PreservedAnalyses::allInSet<CFGAnalyses>();
-    }
-    return PreservedAnalyses::all();
-}
-
-
-struct CombineMulAddLegacy : public FunctionPass {
-    static char ID;
-    CombineMulAddLegacy() : FunctionPass(ID)
-    {}
-
-private:
-    bool runOnFunction(Function &F) override {
-        return combineMulAdd(F);
-    }
-};
-
-char CombineMulAddLegacy::ID = 0;
-static RegisterPass<CombineMulAddLegacy> X("CombineMulAdd", "Combine mul and add to muladd",
-                                     false /* Only looks at CFG */,
-                                     false /* Analysis Pass */);
-
-Pass *createCombineMulAddPass()
-{
-    return new CombineMulAddLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddCombineMulAddPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createCombineMulAddPass());
-}
diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp
index e4ebbe9d3838a..5387ae1ed93ba 100644
--- a/src/llvm-multiversioning.cpp
+++ b/src/llvm-multiversioning.cpp
@@ -12,11 +12,11 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
+#include <llvm/ADT/SmallString.h>
 #include <llvm/ADT/BitVector.h>
 #include <llvm/ADT/Statistic.h>
-#include <llvm/ADT/Triple.h>
+#include <llvm/TargetParser/Triple.h>
 #include <llvm/IR/Module.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Constants.h>
@@ -24,7 +24,6 @@
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/Analysis/LoopInfo.h>
 #include <llvm/Analysis/CallGraph.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/DebugInfoMetadata.h>
 #include <llvm/IR/Verifier.h>
@@ -48,11 +47,11 @@
 
 using namespace llvm;
 
-extern Optional<bool> always_have_fma(Function&, const Triple &TT);
+extern std::optional<bool> always_have_fma(Function&, const Triple &TT);
 
 namespace {
 constexpr uint32_t clone_mask =
-    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16;
+    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16 | JL_TARGET_CLONE_BFLOAT16;
 
 // Treat identical mapping as missing and return `def` in that case.
 // We mainly need this to identify cloned function using value map after LLVM cloning
@@ -98,14 +97,15 @@ static uint32_t collect_func_info(Function &F, const Triple &TT, bool &has_vecca
                 }
                 if (auto callee = call->getCalledFunction()) {
                     auto name = callee->getName();
-                    if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
+                    if (name.starts_with("llvm.muladd.") || name.starts_with("llvm.fma.")) {
                         flag |= JL_TARGET_CLONE_MATH;
                     }
-                    else if (name.startswith("julia.cpu.")) {
-                        if (name.startswith("julia.cpu.have_fma.")) {
+                    else if (name.starts_with("julia.cpu.")) {
+                        if (name.starts_with("julia.cpu.have_fma.")) {
                             // for some platforms we know they always do (or don't) support
                             // FMA. in those cases we don't need to clone the function.
-                            if (!always_have_fma(*callee, TT).hasValue())
+                            // always_have_fma returns an optional<bool>
+                            if (!always_have_fma(*callee, TT))
                                 flag |= JL_TARGET_CLONE_CPU;
                         } else {
                             flag |= JL_TARGET_CLONE_CPU;
@@ -128,12 +128,14 @@ static uint32_t collect_func_info(Function &F, const Triple &TT, bool &has_vecca
             }
 
             for (size_t i = 0; i < I.getNumOperands(); i++) {
-                if(I.getOperand(i)->getType()->isHalfTy()){
+                if(I.getOperand(i)->getType()->isHalfTy()) {
                     flag |= JL_TARGET_CLONE_FLOAT16;
                 }
-                // Check for BFloat16 when they are added to julia can be done here
+                if(I.getOperand(i)->getType()->isBFloatTy()) {
+                    flag |= JL_TARGET_CLONE_BFLOAT16;
+                }
             }
-            uint32_t veccall_flags = JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16;
+            uint32_t veccall_flags = JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16 | JL_TARGET_CLONE_BFLOAT16;
             if (has_veccall && (flag & veccall_flags) == veccall_flags) {
                 return flag;
             }
@@ -179,12 +181,12 @@ struct TargetSpec {
     }
 };
 
-static Optional<std::vector<TargetSpec>> get_target_specs(Module &M) {
+static std::optional<SmallVector<TargetSpec, 0>> get_target_specs(Module &M) {
     auto md = M.getModuleFlag("julia.mv.specs");
     if (!md)
         return None;
     auto tup = cast<MDTuple>(md);
-    std::vector<TargetSpec> out(tup->getNumOperands());
+    SmallVector<TargetSpec, 0> out(tup->getNumOperands());
     for (unsigned i = 0; i < tup->getNumOperands(); i++) {
         out[i] = TargetSpec::fromMD(cast<MDTuple>(tup->getOperand(i).get()));
     }
@@ -192,7 +194,7 @@ static Optional<std::vector<TargetSpec>> get_target_specs(Module &M) {
 }
 
 static void set_target_specs(Module &M, ArrayRef<TargetSpec> specs) {
-    std::vector<Metadata *> md;
+    SmallVector<Metadata *, 0> md;
     md.reserve(specs.size());
     for (auto &spec: specs) {
         md.push_back(spec.toMD(M.getContext()));
@@ -203,28 +205,28 @@ static void set_target_specs(Module &M, ArrayRef<TargetSpec> specs) {
 static void annotate_module_clones(Module &M) {
     auto TT = Triple(M.getTargetTriple());
     CallGraph CG(M);
-    std::vector<Function *> orig_funcs;
+    SmallVector<Function *, 0> orig_funcs;
     for (auto &F: M) {
         if (F.isDeclaration())
             continue;
         orig_funcs.push_back(&F);
     }
     bool has_veccall = false;
-    std::vector<TargetSpec> specs;
+    SmallVector<TargetSpec, 0> specs;
     if (auto maybe_specs = get_target_specs(M)) {
         specs = std::move(*maybe_specs);
     } else {
-        auto full_specs = jl_get_llvm_clone_targets();
+        auto full_specs = jl_get_llvm_clone_targets(jl_options.cpu_target);
         specs.reserve(full_specs.size());
         for (auto &spec: full_specs) {
             specs.push_back(TargetSpec::fromSpec(spec));
         }
         set_target_specs(M, specs);
     }
-    std::vector<APInt> clones(orig_funcs.size(), APInt(specs.size(), 0));
+    SmallVector<APInt, 0> clones(orig_funcs.size(), APInt(specs.size(), 0));
     BitVector subtarget_cloned(orig_funcs.size());
 
-    std::vector<unsigned> func_infos(orig_funcs.size());
+    SmallVector<unsigned, 0> func_infos(orig_funcs.size());
     for (unsigned i = 0; i < orig_funcs.size(); i++) {
         func_infos[i] = collect_func_info(*orig_funcs[i], TT, has_veccall);
     }
@@ -340,7 +342,7 @@ struct CloneCtx {
         }
     };
     struct Group : Target {
-        std::vector<Target> clones;
+        SmallVector<Target, 0> clones;
         explicit Group(int base) :
             Target(base),
             clones{}
@@ -376,23 +378,24 @@ struct CloneCtx {
     void clone_partial(Group &grp, Target &tgt);
     uint32_t get_func_id(Function *F) const;
     std::pair<uint32_t,GlobalVariable*> get_reloc_slot(Function *F) const;
+
+    Function *create_trampoline(Function *F, GlobalVariable *slot, bool autoinit=false);
     void rewrite_alias(GlobalAlias *alias, Function* F);
 
     MDNode *tbaa_const;
-    std::vector<TargetSpec> specs;
-    std::vector<Group> groups{};
-    std::vector<Target *> linearized;
-    std::vector<Function*> fvars;
-    std::vector<Constant*> gvars;
+    SmallVector<TargetSpec, 0> specs;
+    SmallVector<Group, 0> groups{};
+    SmallVector<Target *, 0> linearized;
+    SmallVector<Function*, 0> fvars;
     Module &M;
     Type *T_size;
     Triple TT;
 
     // Map from original function to one based index in `fvars`
     std::map<const Function*,uint32_t> func_ids{};
-    std::vector<Function*> orig_funcs{};
+    SmallVector<Function*, 0> orig_funcs{};
     // GV addresses and their corresponding function id (i.e. 0-based index in `fvars`)
-    std::vector<std::pair<Constant*,uint32_t>> gv_relocs{};
+    SmallVector<std::pair<Constant*,uint32_t>, 0> gv_relocs{};
     // Mapping from function id (i.e. 0-based index in `fvars`) to GVs to be initialized.
     std::map<uint32_t,GlobalVariable*> const_relocs;
     std::map<Function *, GlobalVariable*> extern_relocs;
@@ -400,7 +403,7 @@ struct CloneCtx {
 };
 
 template<typename T>
-static inline std::vector<T*> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
+static inline SmallVector<T*, 0> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
 {
     // Get information about sysimg export functions from the two global variables.
     // Strip them from the Module so that it's easier to handle the uses.
@@ -408,7 +411,7 @@ static inline std::vector<T*> consume_gv(Module &M, const char *name, bool allow
     assert(gv && gv->hasInitializer());
     ArrayType *Ty = cast<ArrayType>(gv->getInitializer()->getType());
     unsigned nele = Ty->getArrayNumElements();
-    std::vector<T*> res(nele);
+    SmallVector<T*, 0> res(nele);
     ConstantArray *ary = nullptr;
     if (gv->getInitializer()->isNullValue()) {
         for (unsigned i = 0; i < nele; ++i)
@@ -440,7 +443,6 @@ CloneCtx::CloneCtx(Module &M, bool allow_bad_fvars)
     : tbaa_const(tbaa_make_child_with_context(M.getContext(), "jtbaa_const", nullptr, true).first),
       specs(*get_target_specs(M)),
       fvars(consume_gv<Function>(M, "jl_fvars", allow_bad_fvars)),
-      gvars(consume_gv<Constant>(M, "jl_gvars", false)),
       M(M),
       T_size(M.getDataLayout().getIntPtrType(M.getContext())),
       TT(M.getTargetTriple()),
@@ -449,7 +451,7 @@ CloneCtx::CloneCtx(Module &M, bool allow_bad_fvars)
     groups.emplace_back(0);
     linearized.resize(specs.size());
     linearized[0] = &groups[0];
-    std::vector<unsigned> group_ids(specs.size(), 0);
+    SmallVector<unsigned, 0> group_ids(specs.size(), 0);
     uint32_t ntargets = specs.size();
     for (uint32_t i = 1; i < ntargets; i++) {
         auto &spec = specs[i];
@@ -487,9 +489,57 @@ void CloneCtx::prepare_vmap(ValueToValueMapTy &vmap)
     // The `DISubprogram` cloning on LLVM 5.0 handles this
     // but it doesn't hurt to enforce the identity either.
     auto &MD = vmap.MD();
-    for (auto cu: M.debug_compile_units()) {
-        MD[cu].reset(cu);
+    if (M.getNamedMetadata("llvm.dbg.cu"))
+        for (auto cu: M.getNamedMetadata("llvm.dbg.cu")->operands()) {
+            MD[cu].reset(cu);
+    }
+}
+
+Function *CloneCtx::create_trampoline(Function *F, GlobalVariable *slot, bool autoinit)
+{
+    Function *trampoline =
+        Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, "", &M);
+
+    trampoline->copyAttributesFrom(F);
+    trampoline->setVisibility(GlobalValue::HiddenVisibility);
+    trampoline->setDSOLocal(true);
+
+    // drop multiversioning attributes
+    trampoline->removeFnAttr("julia.mv.reloc");
+    trampoline->removeFnAttr("julia.mv.clones");
+
+    auto BB = BasicBlock::Create(F->getContext(), "top", trampoline);
+    IRBuilder<> irbuilder(BB);
+
+    if (autoinit) {
+        irbuilder.CreateCall(F->getParent()->getOrInsertFunction(
+            XSTR(jl_autoinit_and_adopt_thread),
+            PointerType::get(F->getContext(), 0)
+        ));
+    }
+
+    auto ptr = irbuilder.CreateLoad(F->getType(), slot);
+    ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
+    ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(F->getContext(), None));
+
+    SmallVector<Value *, 0> Args;
+    for (auto &arg : trampoline->args())
+        Args.push_back(&arg);
+    auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, ArrayRef<Value *>(Args));
+    if (F->isVarArg()) {
+        assert(!TT.isARM() && !TT.isPPC() && "musttail not supported on ARM/PPC!");
+        call->setTailCallKind(CallInst::TCK_MustTail);
+    } else {
+        call->setTailCallKind(CallInst::TCK_Tail);
+
     }
+
+    if (F->getReturnType() == Type::getVoidTy(F->getContext()))
+        irbuilder.CreateRetVoid();
+    else
+        irbuilder.CreateRet(call);
+
+    return trampoline;
 }
 
 void CloneCtx::prepare_slots()
@@ -506,7 +556,12 @@ void CloneCtx::prepare_slots()
             else {
                 auto id = get_func_id(F);
                 const_relocs[id] = GV;
-                GV->setInitializer(Constant::getNullValue(F->getType()));
+
+                // Initialize with a single-use trampoline that calls `jl_autoinit_and_adopt_thread`,
+                // so that auto-initialization works with multi-versioned entrypoints.
+                Function *trampoline = create_trampoline(F, GV, /* autoinit */ true);
+                trampoline->setName(F->getName() + ".autoinit_trampoline");
+                GV->setInitializer(trampoline);
             }
         }
     }
@@ -514,7 +569,7 @@ void CloneCtx::prepare_slots()
 
 void CloneCtx::clone_decls()
 {
-    std::vector<std::string> suffixes(specs.size());
+    SmallVector<std::string, 0> suffixes(specs.size());
     for (unsigned i = 1; i < specs.size(); i++) {
         suffixes[i] = "." + std::to_string(i);
     }
@@ -531,7 +586,7 @@ void CloneCtx::clone_decls()
             new_F->setVisibility(F->getVisibility());
             new_F->setDSOLocal(true);
             auto base_func = F;
-            if (specs[i].flags & JL_TARGET_CLONE_ALL)
+            if (!(specs[i].flags & JL_TARGET_CLONE_ALL))
                 base_func = static_cast<Group*>(linearized[specs[i].base])->base_func(F);
             (*linearized[i]->vmap)[base_func] = new_F;
         }
@@ -586,7 +641,7 @@ void CloneCtx::clone_bodies()
             }
             for (auto &target : groups[i].clones) {
                 prepare_vmap(*target.vmap);
-                auto target_F = cast_or_null<Function>(map_get(*target.vmap, F));
+                auto target_F = cast_or_null<Function>(map_get(*target.vmap, group_F));
                 if (target_F) {
                     if (!F->isDeclaration()) {
                         clone_function(group_F, target_F, *target.vmap);
@@ -664,44 +719,21 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
 {
     assert(!is_vector(F->getFunctionType()));
 
-    Function *trampoline =
-        Function::Create(F->getFunctionType(), alias->getLinkage(), "", &M);
-    trampoline->copyAttributesFrom(F);
-    trampoline->takeName(alias);
-    trampoline->setVisibility(alias->getVisibility());
-    trampoline->setDSOLocal(alias->isDSOLocal());
-    // drop multiversioning attributes, add alias attribute for testing purposes
-    trampoline->removeFnAttr("julia.mv.reloc");
-    trampoline->removeFnAttr("julia.mv.clones");
-    trampoline->addFnAttr("julia.mv.alias");
-    alias->eraseFromParent();
-
     uint32_t id;
     GlobalVariable *slot;
     std::tie(id, slot) = get_reloc_slot(F);
+    assert(slot);
 
-    auto BB = BasicBlock::Create(F->getContext(), "top", trampoline);
-    IRBuilder<> irbuilder(BB);
-
-    auto ptr = irbuilder.CreateLoad(F->getType(), slot);
-    ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
-    ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(F->getContext(), None));
+    Function *trampoline = create_trampoline(F, slot, /* autoinit */ false);
+    trampoline->addFnAttr("julia.mv.alias"); // add alias attribute for testing purposes
 
-    std::vector<Value *> Args;
-    for (auto &arg : trampoline->args())
-        Args.push_back(&arg);
-    auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, makeArrayRef(Args));
-    if (F->isVarArg()) {
-        assert(!TT.isARM() && !TT.isPPC() && "musttail not supported on ARM/PPC!");
-        call->setTailCallKind(CallInst::TCK_MustTail);
-    } else {
-        call->setTailCallKind(CallInst::TCK_Tail);
-    }
+    trampoline->takeName(alias);
+    trampoline->setLinkage(alias->getLinkage());
+    trampoline->setVisibility(alias->getVisibility());
+    trampoline->setDSOLocal(alias->isDSOLocal());
+    trampoline->setDLLStorageClass(alias->getDLLStorageClass());
 
-    if (F->getReturnType() == Type::getVoidTy(F->getContext()))
-        irbuilder.CreateRetVoid();
-    else
-        irbuilder.CreateRet(call);
+    alias->eraseFromParent();
 }
 
 void CloneCtx::fix_gv_uses()
@@ -755,7 +787,7 @@ std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F) const
     if (F->isDeclaration()) {
         auto extern_decl = extern_relocs.find(F);
         assert(extern_decl != extern_relocs.end() && "Missing extern relocation slot!");
-        return {(uint32_t)-1, extern_decl->second};
+        return {UINT32_MAX, extern_decl->second};
     }
     else {
         auto id = get_func_id(F);
@@ -766,7 +798,11 @@ std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F) const
 }
 
 template<typename Stack>
+#if JL_LLVM_VERSION >= 200000
+static Value *rewrite_inst_use(const Stack& stack, Type *T_size, Value *replace, InsertPosition& insert_before)
+#else
 static Value *rewrite_inst_use(const Stack& stack, Type *T_size, Value *replace, Instruction *insert_before)
+#endif
 {
     SmallVector<Constant*, 8> args;
     uint32_t nlevel = stack.size();
@@ -827,9 +863,17 @@ static void replaceUsesWithLoad(Function &F, Type *T_size, I2GV should_replace,
             GlobalVariable *slot = should_replace(*use_i);
             if (!slot)
                 continue;
+#if JL_LLVM_VERSION >= 200000
+            InsertPosition insert_before = use_i->getIterator();
+#else
             Instruction *insert_before = use_i;
+#endif
             if (auto phi = dyn_cast<PHINode>(use_i))
+#if JL_LLVM_VERSION >= 200000
+                insert_before = phi->getIncomingBlock(*info.use)->getTerminator()->getIterator();
+#else
                 insert_before = phi->getIncomingBlock(*info.use)->getTerminator();
+#endif
             Instruction *ptr = new LoadInst(F.getType(), slot, "", false, insert_before);
             ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
             ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ptr->getContext(), None));
@@ -875,46 +919,28 @@ static Constant *get_ptrdiff32(Type *T_size, Constant *ptr, Constant *base)
     if (ptr->getType()->isPointerTy())
         ptr = ConstantExpr::getPtrToInt(ptr, T_size);
     auto ptrdiff = ConstantExpr::getSub(ptr, base);
-    return sizeof(void*) == 8 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
+    return T_size->getPrimitiveSizeInBits() > 32 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
 }
 
-template<typename T>
-static Constant *emit_offset_table(Module &M, Type *T_size, const std::vector<T*> &vars, StringRef name, StringRef suffix)
+static void emit_table(Module &M, Type *T_size, ArrayRef<Constant*> vars, StringRef name, StringRef suffix)
 {
-    auto T_int32 = Type::getInt32Ty(M.getContext());
     uint32_t nvars = vars.size();
-    Constant *base = nullptr;
-    if (nvars > 0) {
-        base = ConstantExpr::getBitCast(vars[0], T_size->getPointerTo());
-        auto ga = GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage,
-                                       name + "_base" + suffix,
-                                       base, &M);
-        ga->setVisibility(GlobalValue::HiddenVisibility);
-        ga->setDSOLocal(true);
-    } else {
-        auto gv = new GlobalVariable(M, T_size, true, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), name + "_base" + suffix);
-        gv->setVisibility(GlobalValue::HiddenVisibility);
-        gv->setDSOLocal(true);
-        base = gv;
-    }
-    auto vbase = ConstantExpr::getPtrToInt(base, T_size);
-    std::vector<Constant*> offsets(nvars + 1);
-    offsets[0] = ConstantInt::get(T_int32, nvars);
-    if (nvars > 0) {
-        offsets[1] = ConstantInt::get(T_int32, 0);
-        for (uint32_t i = 1; i < nvars; i++)
-            offsets[i + 1] = get_ptrdiff32(T_size, vars[i], vbase);
-    }
-    ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1);
-    auto gv = new GlobalVariable(M, vars_type, true,
-                                  GlobalVariable::ExternalLinkage,
-                                  ConstantArray::get(vars_type, offsets),
-                                  name + "_offsets" + suffix);
+    SmallVector<Constant*,0> castvars(nvars);
+    for (size_t i = 0; i < nvars; i++)
+        castvars[i] = ConstantExpr::getBitCast(vars[i], PointerType::getUnqual(T_size->getContext()));
+    auto gv = new GlobalVariable(M, T_size, true, GlobalValue::ExternalLinkage, ConstantInt::get(T_size, nvars), name + "_count" + suffix);
+    gv->setVisibility(GlobalValue::HiddenVisibility);
+    gv->setDSOLocal(true);
+    ArrayType *vars_type = ArrayType::get(PointerType::getUnqual(T_size->getContext()), nvars);
+    gv = new GlobalVariable(M, vars_type, false,
+                            GlobalVariable::ExternalLinkage,
+                            ConstantArray::get(vars_type, castvars),
+                            name + "_ptrs" + suffix);
     gv->setVisibility(GlobalValue::HiddenVisibility);
     gv->setDSOLocal(true);
-    return vbase;
 }
 
+
 void CloneCtx::emit_metadata()
 {
     uint32_t nfvars = fvars.size();
@@ -929,11 +955,8 @@ void CloneCtx::emit_metadata()
     }
 
     // Store back the information about exported functions.
-    auto fbase = emit_offset_table(M, T_size, fvars, "jl_fvar", suffix);
-    auto gbase = emit_offset_table(M, T_size, gvars, "jl_gvar", suffix);
-
+    emit_table(M, T_size, ArrayRef<Constant*>((Constant* const*)fvars.data(), fvars.size()), "jl_fvar", suffix);
     M.getGlobalVariable("jl_fvar_idxs")->setName("jl_fvar_idxs" + suffix);
-    M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs" + suffix);
 
     uint32_t ntargets = specs.size();
 
@@ -942,11 +965,11 @@ void CloneCtx::emit_metadata()
     {
         auto T_int32 = Type::getInt32Ty(M.getContext());
         std::sort(gv_relocs.begin(), gv_relocs.end(),
-                         [] (const std::pair<Constant*,uint32_t> &lhs,
-                             const std::pair<Constant*,uint32_t> &rhs) {
+                         [] (const std::pair<Constant*, uint32_t> &lhs,
+                             const std::pair<Constant*, uint32_t> &rhs) {
                              return lhs.second < rhs.second;
                          });
-        std::vector<Constant*> values{nullptr};
+        SmallVector<Constant*, 0> values{nullptr};
         uint32_t gv_reloc_idx = 0;
         uint32_t ngv_relocs = gv_relocs.size();
         for (uint32_t id = 0; id < nfvars; id++) {
@@ -958,28 +981,31 @@ void CloneCtx::emit_metadata()
                  gv_reloc_idx++) {
                 shared_relocs.insert(id);
                 values.push_back(id_v);
-                values.push_back(get_ptrdiff32(T_size, gv_relocs[gv_reloc_idx].first, gbase));
+                values.push_back(gv_relocs[gv_reloc_idx].first);
             }
             auto it = const_relocs.find(id);
             if (it != const_relocs.end()) {
                 shared_relocs.insert(id);
                 values.push_back(id_v);
-                values.push_back(get_ptrdiff32(T_size, it->second, gbase));
+                values.push_back(it->second);
             }
         }
         values[0] = ConstantInt::get(T_int32, values.size() / 2);
         ArrayType *vars_type = ArrayType::get(T_int32, values.size());
-        auto gv = new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage,
-                                      ConstantArray::get(vars_type, values),
-                                      "jl_clone_slots" + suffix);
+        auto gv = new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage, nullptr, "jl_clone_slots" + suffix);
+        auto gbase = ConstantExpr::getPtrToInt(gv, T_size);
+        for (size_t i = 2; i < values.size(); i += 2)
+            values[i] = get_ptrdiff32(T_size, values[i], gbase);
+        gv->setInitializer(ConstantArray::get(vars_type, values));
         gv->setVisibility(GlobalValue::HiddenVisibility);
         gv->setDSOLocal(true);
     }
 
-    // Generate `jl_dispatch_fvars_idxs` and `jl_dispatch_fvars_offsets`
+    // Generate `jl_dispatch_fvars_idxs` and `jl_dispatch_fvars`
     {
-        std::vector<uint32_t> idxs;
-        std::vector<Constant*> offsets;
+        SmallVector<uint32_t, 0> idxs;
+        SmallVector<Constant*, 0> fptrs;
+        Type *Tfptr = PointerType::getUnqual(T_size->getContext());
         for (uint32_t i = 0; i < ntargets; i++) {
             auto tgt = linearized[i];
             auto &spec = specs[i];
@@ -995,7 +1021,7 @@ void CloneCtx::emit_metadata()
                         idxs.push_back(j);
                     }
                     if (i != 0) {
-                        offsets.push_back(get_ptrdiff32(T_size, grp->base_func(fvars[j]), fbase));
+                        fptrs.push_back(grp->base_func(fvars[j]));
                     }
                 }
             }
@@ -1009,12 +1035,12 @@ void CloneCtx::emit_metadata()
                         count++;
                         idxs.push_back(jl_sysimg_tag_mask | j);
                         auto f = map_get(*tgt->vmap, base_f, base_f);
-                        offsets.push_back(get_ptrdiff32(T_size, cast<Function>(f), fbase));
+                        fptrs.push_back(cast<Function>(f));
                     }
                     else if (auto f = map_get(*tgt->vmap, base_f)) {
                         count++;
                         idxs.push_back(j);
-                        offsets.push_back(get_ptrdiff32(T_size, cast<Function>(f), fbase));
+                        fptrs.push_back(cast<Function>(f));
                     }
                 }
             }
@@ -1026,11 +1052,13 @@ void CloneCtx::emit_metadata()
                                       idxval, "jl_clone_idxs" + suffix);
         gv1->setVisibility(GlobalValue::HiddenVisibility);
         gv1->setDSOLocal(true);
-        ArrayType *offsets_type = ArrayType::get(Type::getInt32Ty(M.getContext()), offsets.size());
+        for (size_t i = 0; i < fptrs.size(); i++)
+            fptrs[i] = ConstantExpr::getBitCast(fptrs[i], Tfptr);
+        ArrayType *offsets_type = ArrayType::get(Tfptr, fptrs.size());
         auto gv2 = new GlobalVariable(M, offsets_type, true,
                                       GlobalVariable::ExternalLinkage,
-                                      ConstantArray::get(offsets_type, offsets),
-                                      "jl_clone_offsets" + suffix);
+                                      ConstantArray::get(offsets_type, fptrs),
+                                      "jl_clone_ptrs" + suffix);
         gv2->setVisibility(GlobalValue::HiddenVisibility);
         gv2->setDSOLocal(true);
     }
@@ -1063,9 +1091,7 @@ static bool runMultiVersioning(Module &M, bool allow_bad_fvars)
     }
 
     GlobalVariable *fvars = M.getGlobalVariable("jl_fvars");
-    GlobalVariable *gvars = M.getGlobalVariable("jl_gvars");
-    if (allow_bad_fvars && (!fvars || !fvars->hasInitializer() || !isa<ConstantArray>(fvars->getInitializer()) ||
-                            !gvars || !gvars->hasInitializer() || !isa<ConstantArray>(gvars->getInitializer())))
+    if (allow_bad_fvars && (!fvars || !fvars->hasInitializer() || !isa<ConstantArray>(fvars->getInitializer())))
         return false;
 
     CloneCtx clone(M, allow_bad_fvars);
@@ -1104,34 +1130,12 @@ static bool runMultiVersioning(Module &M, bool allow_bad_fvars)
     // and collected all the shared/target-specific relocations.
     clone.emit_metadata();
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+    assert(!verifyLLVMIR(M));
 #endif
 
     return true;
 }
 
-struct MultiVersioningLegacy: public ModulePass {
-    static char ID;
-    MultiVersioningLegacy(bool allow_bad_fvars=false)
-        : ModulePass(ID), allow_bad_fvars(allow_bad_fvars)
-    {}
-
-private:
-    bool runOnModule(Module &M) override;
-    bool allow_bad_fvars;
-};
-
-bool MultiVersioningLegacy::runOnModule(Module &M)
-{
-    return runMultiVersioning(M, allow_bad_fvars);
-}
-
-
-char MultiVersioningLegacy::ID = 0;
-static RegisterPass<MultiVersioningLegacy> X("JuliaMultiVersioning", "JuliaMultiVersioning Pass",
-                                       false /* Only looks at CFG */,
-                                       false /* Analysis Pass */);
-
 } // anonymous namespace
 
 void multiversioning_preannotate(Module &M)
@@ -1149,14 +1153,3 @@ PreservedAnalyses MultiVersioningPass::run(Module &M, ModuleAnalysisManager &AM)
     }
     return PreservedAnalyses::all();
 }
-
-Pass *createMultiVersioningPass(bool allow_bad_fvars)
-{
-    return new MultiVersioningLegacy(allow_bad_fvars);
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddMultiVersioningPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createMultiVersioningPass(false));
-}
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index b006f191937f5..a6a16a7f4956c 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -7,6 +7,8 @@
 
 #include "llvm-version.h"
 
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DerivedTypes.h"
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Metadata.h>
 #include <llvm/IR/Module.h>
@@ -16,6 +18,9 @@
 #include "julia_assert.h"
 #include "llvm-pass-helpers.h"
 
+#define STR(csym)           #csym
+#define XSTR(csym)          STR(csym)
+
 using namespace llvm;
 
 JuliaPassContext::JuliaPassContext()
@@ -25,9 +30,9 @@ JuliaPassContext::JuliaPassContext()
 
         pgcstack_getter(nullptr), adoptthread_func(nullptr), gc_flush_func(nullptr),
         gc_preserve_begin_func(nullptr), gc_preserve_end_func(nullptr),
-        pointer_from_objref_func(nullptr), alloc_obj_func(nullptr),
-        typeof_func(nullptr), write_barrier_func(nullptr),
-        call_func(nullptr), call2_func(nullptr), module(nullptr)
+        pointer_from_objref_func(nullptr), gc_loaded_func(nullptr), alloc_obj_func(nullptr),
+        typeof_func(nullptr), write_barrier_func(nullptr), pop_handler_noexcept_func(nullptr),
+        call_func(nullptr), call2_func(nullptr), call3_func(nullptr), module(nullptr)
 {
 }
 
@@ -48,11 +53,14 @@ void JuliaPassContext::initFunctions(Module &M)
     gc_preserve_begin_func = M.getFunction("llvm.julia.gc_preserve_begin");
     gc_preserve_end_func = M.getFunction("llvm.julia.gc_preserve_end");
     pointer_from_objref_func = M.getFunction("julia.pointer_from_objref");
+    gc_loaded_func = M.getFunction("julia.gc_loaded");
     typeof_func = M.getFunction("julia.typeof");
     write_barrier_func = M.getFunction("julia.write_barrier");
     alloc_obj_func = M.getFunction("julia.gc_alloc_obj");
+    pop_handler_noexcept_func = M.getFunction(XSTR(jl_pop_handler_noexcept));
     call_func = M.getFunction("julia.call");
     call2_func = M.getFunction("julia.call2");
+    call3_func = M.getFunction("julia.call3");
 }
 
 void JuliaPassContext::initAll(Module &M)
@@ -67,19 +75,26 @@ void JuliaPassContext::initAll(Module &M)
     T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
 }
 
-llvm::CallInst *JuliaPassContext::getPGCstack(llvm::Function &F) const
+llvm::Value *JuliaPassContext::getPGCstack(llvm::Function &F) const
 {
-    if (!pgcstack_getter && !adoptthread_func)
-        return nullptr;
-    for (auto &I : F.getEntryBlock()) {
-        if (CallInst *callInst = dyn_cast<CallInst>(&I)) {
-            Value *callee = callInst->getCalledOperand();
-            if ((pgcstack_getter && callee == pgcstack_getter) ||
-                (adoptthread_func && callee == adoptthread_func)) {
-                return callInst;
+    if (pgcstack_getter || adoptthread_func) {
+        for (auto &I : F.getEntryBlock()) {
+            if (CallInst *callInst = dyn_cast<CallInst>(&I)) {
+                Value *callee = callInst->getCalledOperand();
+                if ((pgcstack_getter && callee == pgcstack_getter) ||
+                    (adoptthread_func && callee == adoptthread_func)) {
+                    return callInst;
+                }
             }
         }
     }
+    for (auto &arg : F.args()) {
+        // Check for the "gcstack" attribute
+        AttributeSet attrs = F.getAttributes().getParamAttrs(arg.getArgNo());
+        if (attrs.hasAttribute("gcstack")) {
+            return &arg;
+        }
+    }
     return nullptr;
 }
 
@@ -121,9 +136,14 @@ namespace jl_intrinsics {
 
     // Annotates a function with attributes suitable for GC allocation
     // functions. Specifically, the return value is marked noalias and nonnull.
-    // The allocation size is set to the first argument.
     static Function *addGCAllocAttributes(Function *target)
     {
+        auto FnAttrs = AttrBuilder(target->getContext());
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef));
+        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        target->addFnAttrs(FnAttrs);
         addRetAttr(target, Attribute::NoAlias);
         addRetAttr(target, Attribute::NonNull);
         return target;
@@ -151,7 +171,9 @@ namespace jl_intrinsics {
             auto intrinsic = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx), T_size },
+                    { PointerType::get(ctx, 0),
+                        T_size,
+                        T_size }, // type
                     false),
                 Function::ExternalLinkage,
                 GC_ALLOC_BYTES_NAME);
@@ -214,7 +236,7 @@ namespace jl_intrinsics {
                     false),
                 Function::ExternalLinkage,
                 QUEUE_GC_ROOT_NAME);
-            intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
             return intrinsic;
         });
 
@@ -222,7 +244,7 @@ namespace jl_intrinsics {
         SAFEPOINT_NAME,
         [](Type *T_size) {
             auto &ctx = T_size->getContext();
-            auto T_psize = T_size->getPointerTo();
+            auto T_psize = PointerType::getUnqual(ctx);
             auto intrinsic = Function::Create(
                 FunctionType::get(
                     Type::getVoidTy(ctx),
@@ -230,14 +252,14 @@ namespace jl_intrinsics {
                     false),
                 Function::ExternalLinkage,
                 SAFEPOINT_NAME);
-            intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
             return intrinsic;
         });
 }
 
 namespace jl_well_known {
     static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
-    static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
+    static const char *GC_SMALL_ALLOC_NAME = XSTR(jl_gc_small_alloc);
     static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
     static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed);
 
@@ -251,7 +273,7 @@ namespace jl_well_known {
             auto bigAllocFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx), T_size },
+                    { PointerType::get(ctx, 0), T_size , T_size},
                     false),
                 Function::ExternalLinkage,
                 GC_BIG_ALLOC_NAME);
@@ -259,20 +281,20 @@ namespace jl_well_known {
             return addGCAllocAttributes(bigAllocFunc);
         });
 
-    const WellKnownFunctionDescription GCPoolAlloc(
-        GC_POOL_ALLOC_NAME,
+    const WellKnownFunctionDescription GCSmallAlloc(
+        GC_SMALL_ALLOC_NAME,
         [](Type *T_size) {
             auto &ctx = T_size->getContext();
             auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
-            auto poolAllocFunc = Function::Create(
+            auto smallAllocFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx), Type::getInt32Ty(ctx), Type::getInt32Ty(ctx) },
+                    { PointerType::get(ctx, 0), Type::getInt32Ty(ctx), Type::getInt32Ty(ctx), T_size },
                     false),
                 Function::ExternalLinkage,
-                GC_POOL_ALLOC_NAME);
-            poolAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
-            return addGCAllocAttributes(poolAllocFunc);
+                GC_SMALL_ALLOC_NAME);
+            smallAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
+            return addGCAllocAttributes(smallAllocFunc);
         });
 
     const WellKnownFunctionDescription GCQueueRoot(
@@ -287,7 +309,7 @@ namespace jl_well_known {
                     false),
                 Function::ExternalLinkage,
                 GC_QUEUE_ROOT_NAME);
-            func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
             return func;
         });
 
@@ -299,9 +321,9 @@ namespace jl_well_known {
             auto allocTypedFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx),
+                    { PointerType::get(ctx, 0),
                         T_size,
-                        Type::getInt8PtrTy(ctx) },
+                        T_size }, // type
                     false),
                 Function::ExternalLinkage,
                 GC_ALLOC_TYPED_NAME);
diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h
index 727f463dc50ef..d79470818c287 100644
--- a/src/llvm-pass-helpers.h
+++ b/src/llvm-pass-helpers.h
@@ -56,11 +56,14 @@ struct JuliaPassContext {
     llvm::Function *gc_preserve_begin_func;
     llvm::Function *gc_preserve_end_func;
     llvm::Function *pointer_from_objref_func;
+    llvm::Function *gc_loaded_func;
     llvm::Function *alloc_obj_func;
     llvm::Function *typeof_func;
     llvm::Function *write_barrier_func;
+    llvm::Function *pop_handler_noexcept_func;
     llvm::Function *call_func;
     llvm::Function *call2_func;
+    llvm::Function *call3_func;
 
     // Creates a pass context. Type and function pointers
     // are set to `nullptr`. Metadata nodes are initialized.
@@ -82,8 +85,9 @@ struct JuliaPassContext {
 
     // Gets a call to the `julia.get_pgcstack' intrinsic in the entry
     // point of the given function, if there exists such a call.
+    // Otherwise, gets a swiftself argument, if there exists such an argument.
     // Otherwise, `nullptr` is returned.
-    llvm::CallInst *getPGCstack(llvm::Function &F) const;
+    llvm::Value *getPGCstack(llvm::Function &F) const;
 
     // Gets the intrinsic or well-known function that conforms to
     // the given description if it exists in the module. If not,
@@ -144,8 +148,8 @@ namespace jl_well_known {
     // `jl_gc_big_alloc`: allocates bytes.
     extern const WellKnownFunctionDescription GCBigAlloc;
 
-    // `jl_gc_pool_alloc`: allocates bytes.
-    extern const WellKnownFunctionDescription GCPoolAlloc;
+    // `jl_gc_small_alloc`: allocates bytes.
+    extern const WellKnownFunctionDescription GCSmallAlloc;
 
     // `jl_gc_queue_root`: queues a GC root.
     extern const WellKnownFunctionDescription GCQueueRoot;
@@ -154,4 +158,6 @@ namespace jl_well_known {
     extern const WellKnownFunctionDescription GCAllocTyped;
 }
 
+void setName(llvm::Value *V, const llvm::Twine &Name, int debug_info);
+
 #endif
diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp
index 9f6cfa1beb38e..9763837122cdc 100644
--- a/src/llvm-propagate-addrspaces.cpp
+++ b/src/llvm-propagate-addrspaces.cpp
@@ -11,7 +11,6 @@
 #include <llvm/IR/ValueMap.h>
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Dominators.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
@@ -43,8 +42,8 @@ using namespace llvm;
 struct PropagateJuliaAddrspacesVisitor : public InstVisitor<PropagateJuliaAddrspacesVisitor> {
     DenseMap<Value *, Value *> LiftingMap;
     SmallPtrSet<Value *, 4> Visited;
-    std::vector<Instruction *> ToDelete;
-    std::vector<std::pair<Instruction *, Instruction *>> ToInsert;
+    SmallVector<Instruction *, 0> ToDelete;
+    SmallVector<std::pair<Instruction *, Instruction *>, 0> ToInsert;
 
 public:
     Value *LiftPointer(Module *M, Value *V, Instruction *InsertPt=nullptr);
@@ -57,18 +56,18 @@ struct PropagateJuliaAddrspacesVisitor : public InstVisitor<PropagateJuliaAddrsp
     void visitMemTransferInst(MemTransferInst &MTI);
 
 private:
-    void PoisonValues(std::vector<Value *> &Worklist);
+    void PoisonValues(SmallVectorImpl<Value *> &Worklist);
 };
 
 static unsigned getValueAddrSpace(Value *V) {
-    return cast<PointerType>(V->getType())->getAddressSpace();
+    return V->getType()->getPointerAddressSpace();
 }
 
 static bool isSpecialAS(unsigned AS) {
     return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial;
 }
 
-void PropagateJuliaAddrspacesVisitor::PoisonValues(std::vector<Value *> &Worklist) {
+void PropagateJuliaAddrspacesVisitor::PoisonValues(SmallVectorImpl<Value *> &Worklist) {
     while (!Worklist.empty()) {
         Value *CurrentV = Worklist.back();
         Worklist.pop_back();
@@ -83,7 +82,7 @@ void PropagateJuliaAddrspacesVisitor::PoisonValues(std::vector<Value *> &Worklis
 
 Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruction *InsertPt) {
     SmallVector<Value *, 4> Stack;
-    std::vector<Value *> Worklist;
+    SmallVector<Value *, 0> Worklist;
     std::set<Value *> LocalVisited;
     unsigned allocaAddressSpace = M->getDataLayout().getAllocaAddrSpace();
     Worklist.push_back(V);
@@ -106,7 +105,6 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
             }
             else if (auto *GEP = dyn_cast<GetElementPtrInst>(CurrentV)) {
                 if (LiftingMap.count(GEP)) {
-                    CurrentV = LiftingMap[GEP];
                     break;
                 } else if (Visited.count(GEP)) {
                     return nullptr;
@@ -141,7 +139,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
                 break;
             } else {
                 // Ok, we've reached a leaf - check if it is eligible for lifting
-                if (!CurrentV->getType()->isPointerTy() ||
+                if (!CurrentV->getType()->isPtrOrPtrVectorTy() ||
                     isSpecialAS(getValueAddrSpace(CurrentV))) {
                     // If not, poison all (recursive) users of this value, to prevent
                     // looking at them again in future iterations.
@@ -157,7 +155,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
     }
 
     // Go through and insert lifted versions of all instructions on the list.
-    std::vector<Value *> ToRevisit;
+    SmallVector<Value *, 0> ToRevisit;
     for (Value *V : Stack) {
         if (LiftingMap.count(V))
             continue;
@@ -165,14 +163,14 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
             Instruction *InstV = cast<Instruction>(V);
             Instruction *NewV = InstV->clone();
             ToInsert.push_back(std::make_pair(NewV, InstV));
-            Type *NewRetTy = PointerType::getWithSamePointeeType(cast<PointerType>(InstV->getType()), allocaAddressSpace);
+            Type *NewRetTy = PointerType::get(InstV->getContext(), allocaAddressSpace);
             NewV->mutateType(NewRetTy);
             LiftingMap[InstV] = NewV;
             ToRevisit.push_back(NewV);
         }
     }
     auto CollapseCastsAndLift = [&](Value *CurrentV, Instruction *InsertPt) -> Value * {
-        PointerType *TargetType = PointerType::getWithSamePointeeType(cast<PointerType>(CurrentV->getType()), allocaAddressSpace);
+        PointerType *TargetType = PointerType::get(CurrentV->getContext(), allocaAddressSpace);
         while (!LiftingMap.count(CurrentV)) {
             if (isa<BitCastInst>(CurrentV))
                 CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
@@ -186,13 +184,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
         }
         if (LiftingMap.count(CurrentV))
             CurrentV = LiftingMap[CurrentV];
-        if (CurrentV->getType() != TargetType) {
-            // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-            assert(CurrentV->getContext().supportsTypedPointers());
-            auto *BCI = new BitCastInst(CurrentV, TargetType);
-            ToInsert.push_back(std::make_pair(BCI, InsertPt));
-            CurrentV = BCI;
-        }
+        assert(CurrentV->getType() == TargetType);
         return CurrentV;
     };
 
@@ -252,7 +244,11 @@ void PropagateJuliaAddrspacesVisitor::visitMemSetInst(MemSetInst &MI) {
     Value *Replacement = LiftPointer(MI.getModule(), MI.getRawDest());
     if (!Replacement)
         return;
+#if JL_LLVM_VERSION >= 200000
+    Function *TheFn = Intrinsic::getOrInsertDeclaration(MI.getModule(), Intrinsic::memset,
+#else
     Function *TheFn = Intrinsic::getDeclaration(MI.getModule(), Intrinsic::memset,
+#endif
         {Replacement->getType(), MI.getOperand(1)->getType()});
     MI.setCalledFunction(TheFn);
     MI.setArgOperand(0, Replacement);
@@ -277,7 +273,11 @@ void PropagateJuliaAddrspacesVisitor::visitMemTransferInst(MemTransferInst &MTI)
     }
     if (Dest == MTI.getRawDest() && Src == MTI.getRawSource())
         return;
+#if JL_LLVM_VERSION >= 200000
+    Function *TheFn = Intrinsic::getOrInsertDeclaration(MTI.getModule(), MTI.getIntrinsicID(),
+#else
     Function *TheFn = Intrinsic::getDeclaration(MTI.getModule(), MTI.getIntrinsicID(),
+#endif
         {Dest->getType(), Src->getType(),
          MTI.getOperand(2)->getType()});
     MTI.setCalledFunction(TheFn);
@@ -289,7 +289,11 @@ bool propagateJuliaAddrspaces(Function &F) {
     PropagateJuliaAddrspacesVisitor visitor;
     visitor.visit(F);
     for (auto it : visitor.ToInsert)
+#if JL_LLVM_VERSION >= 200000
+        it.first->insertBefore(it.second->getIterator());
+#else
         it.first->insertBefore(it.second);
+#endif
     for (Instruction *I : visitor.ToDelete)
         I->eraseFromParent();
     visitor.ToInsert.clear();
@@ -298,32 +302,11 @@ bool propagateJuliaAddrspaces(Function &F) {
     visitor.Visited.clear();
     return true;
 }
-
-struct PropagateJuliaAddrspacesLegacy : FunctionPass {
-    static char ID;
-
-    PropagateJuliaAddrspacesLegacy() : FunctionPass(ID) {}
-    bool runOnFunction(Function &F) override {
-        bool modified = propagateJuliaAddrspaces(F);
-#ifdef JL_VERIFY_PASSES
-        assert(!verifyFunction(F, &errs()));
-#endif
-        return modified;
-    }
-};
-
-char PropagateJuliaAddrspacesLegacy::ID = 0;
-static RegisterPass<PropagateJuliaAddrspacesLegacy> X("PropagateJuliaAddrspaces", "Propagate (non-)rootedness information", false, false);
-
-Pass *createPropagateJuliaAddrspaces() {
-    return new PropagateJuliaAddrspacesLegacy();
-}
-
 PreservedAnalyses PropagateJuliaAddrspacesPass::run(Function &F, FunctionAnalysisManager &AM) {
     bool modified = propagateJuliaAddrspaces(F);
 
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
+    assert(!verifyLLVMIR(F));
 #endif
     if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
@@ -331,9 +314,3 @@ PreservedAnalyses PropagateJuliaAddrspacesPass::run(Function &F, FunctionAnalysi
         return PreservedAnalyses::all();
     }
 }
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddPropagateJuliaAddrspaces_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createPropagateJuliaAddrspaces());
-}
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index 840efaebee032..a7bc79afd3eb4 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -9,9 +9,8 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
-#include <llvm/ADT/Triple.h>
+#include <llvm/TargetParser/Triple.h>
 #include <llvm/IR/Module.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Constants.h>
@@ -64,7 +63,7 @@ struct LowerPTLS {
 
 void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const
 {
-    addFnAttr(pgcstack, Attribute::ReadNone);
+    pgcstack->addFnAttr(Attribute::getWithMemoryEffects(pgcstack->getContext(), MemoryEffects::none()));
     addFnAttr(pgcstack, Attribute::NoUnwind);
 }
 
@@ -86,14 +85,14 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
 
         // The add instruction clobbers flags
         if (offset) {
-            std::vector<Type*> args(0);
+            SmallVector<Type*, 0> args(0);
             args.push_back(offset->getType());
-            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), args, false),
+            auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), args, false),
                                      dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false);
             tls = builder.CreateCall(tp, {offset}, "pgcstack");
         }
         else {
-            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false),
+            auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), false),
                                      const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}",
                                      false);
             tls = builder.CreateCall(tp, {}, "tls_pgcstack");
@@ -110,6 +109,8 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
             asm_str = "mrs $0, tpidr_el0";
         } else if (TargetTriple.isARM()) {
             asm_str = "mrc p15, 0, $0, c13, c0, 3";
+        } else if (TargetTriple.isRISCV()) {
+            asm_str = "mv $0, tp";
         } else if (TargetTriple.getArch() == Triple::x86_64) {
             asm_str = "movq %fs:0, $0";
         } else if (TargetTriple.getArch() == Triple::x86) {
@@ -119,11 +120,10 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
         }
         if (!offset)
             offset = ConstantInt::getSigned(T_size, jl_tls_offset);
-        auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), false), asm_str, "=r", false);
+        auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), false), asm_str, "=r", false);
         tls = builder.CreateCall(tp, {}, "thread_ptr");
-        tls = builder.CreateGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
+        tls = builder.CreateInBoundsGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
     }
-    tls = builder.CreateBitCast(tls, T_pppjlvalue->getPointerTo());
     return builder.CreateLoad(T_pppjlvalue, tls, "tls_pgcstack");
 }
 
@@ -170,34 +170,47 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
             *CFGModified = true;
         // emit slow branch code
         CallInst *adopt = cast<CallInst>(pgcstack->clone());
-        Function *adoptFunc = M->getFunction(XSTR(jl_adopt_thread));
+        Function *adoptFunc = M->getFunction(XSTR(jl_autoinit_and_adopt_thread));
         if (adoptFunc == NULL) {
             adoptFunc = Function::Create(pgcstack_getter->getFunctionType(),
                 pgcstack_getter->getLinkage(), pgcstack_getter->getAddressSpace(),
-                XSTR(jl_adopt_thread), M);
+                XSTR(jl_autoinit_and_adopt_thread), M);
             adoptFunc->copyAttributesFrom(pgcstack_getter);
             adoptFunc->copyMetadata(pgcstack_getter, 0);
         }
         adopt->setCalledFunction(adoptFunc);
+#if JL_LLVM_VERSION >= 200000
+        adopt->insertBefore(slowTerm->getIterator());
+#else
         adopt->insertBefore(slowTerm);
+#endif
         phi->addIncoming(adopt, slowTerm->getParent());
         // emit fast branch code
         builder.SetInsertPoint(fastTerm->getParent());
         fastTerm->removeFromParent();
         MDNode *tbaa = tbaa_gcframe;
-        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true);
+        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true);
         builder.Insert(fastTerm);
         phi->addIncoming(pgcstack, fastTerm->getParent());
         // emit pre-return cleanup
         if (CountTrackedPointers(pgcstack->getParent()->getParent()->getReturnType()).count == 0) {
+#if JL_LLVM_VERSION >= 200000
+            auto last_gc_state = PHINode::Create(Type::getInt8Ty(pgcstack->getContext()), 2, "", phi->getIterator());
+#else
             auto last_gc_state = PHINode::Create(Type::getInt8Ty(pgcstack->getContext()), 2, "", phi);
+#endif
             // if we called jl_adopt_thread, we must end this cfunction back in the safe-state
             last_gc_state->addIncoming(ConstantInt::get(Type::getInt8Ty(M->getContext()), JL_GC_STATE_SAFE), slowTerm->getParent());
             last_gc_state->addIncoming(prior, fastTerm->getParent());
             for (auto &BB : *pgcstack->getParent()->getParent()) {
                 if (isa<ReturnInst>(BB.getTerminator())) {
+                    // Don't use emit_gc_safe_leave here, as that introduces a new BB while iterating BBs
                     builder.SetInsertPoint(BB.getTerminator());
-                    emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true);
+                    Value *ptls = get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa_gcframe);
+                    unsigned offset = offsetof(jl_tls_states_t, gc_state);
+                    Value *gc_state = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), ptls, offset, "gc_state");
+                    builder.CreateAlignedStore(last_gc_state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
+                    emit_gc_safepoint(builder, T_size, ptls, tbaa, true);
                 }
             }
         }
@@ -228,7 +241,11 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
             builder.SetInsertPoint(pgcstack);
             auto phi = builder.CreatePHI(T_pppjlvalue, 2, "pgcstack");
             pgcstack->replaceAllUsesWith(phi);
+#if JL_LLVM_VERSION >= 200000
+            pgcstack->moveBefore(slowTerm->getIterator());
+#else
             pgcstack->moveBefore(slowTerm);
+#endif
             // refresh the basic block in the builder
             builder.SetInsertPoint(pgcstack);
             auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot);
@@ -277,7 +294,11 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
         if (TargetTriple.isOSDarwin()) {
             assert(sizeof(k) == sizeof(uintptr_t));
             Constant *key = ConstantInt::get(T_size, (uintptr_t)k);
+#if JL_LLVM_VERSION >= 200000
+            auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, val, {key}, "", pgcstack->getIterator());
+#else
             auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, val, {key}, "", pgcstack);
+#endif
             new_pgcstack->takeName(pgcstack);
             pgcstack->replaceAllUsesWith(new_pgcstack);
             pgcstack->eraseFromParent();
@@ -307,7 +328,7 @@ bool LowerPTLS::run(bool *CFGModified)
                 assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t));
                 FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {T_size}, false);
             }
-            T_pgcstack_getter = FT_pgcstack_getter->getPointerTo();
+            T_pgcstack_getter = PointerType::getUnqual(FT_pgcstack_getter->getContext());
             T_pppjlvalue = cast<PointerType>(FT_pgcstack_getter->getReturnType());
             if (imaging_mode) {
                 pgcstack_func_slot = create_hidden_global(T_pgcstack_getter, "jl_pgcstack_func_slot");
@@ -317,18 +338,20 @@ bool LowerPTLS::run(bool *CFGModified)
             need_init = false;
         }
 
-        for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) {
+        for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end(); ) {
             auto call = cast<CallInst>(*it);
             ++it;
             auto f = call->getCaller();
             Value *pgcstack = NULL;
-            for (Function::arg_iterator arg = f->arg_begin(); arg != f->arg_end();++arg) {
-                if (arg->hasSwiftSelfAttr()){
+            for (Function::arg_iterator arg = f->arg_begin(); arg != f->arg_end(); ++arg) {
+                AttributeSet attrs = f->getAttributes().getParamAttrs(arg->getArgNo());
+                if (attrs.hasAttribute("gcstack")) {
                     pgcstack = &*arg;
                     break;
                 }
             }
             if (pgcstack) {
+                pgcstack->takeName(call);
                 call->replaceAllUsesWith(pgcstack);
                 call->eraseFromParent();
                 continue;
@@ -342,31 +365,6 @@ bool LowerPTLS::run(bool *CFGModified)
     };
     return runOnGetter(false) + runOnGetter(true);
 }
-
-struct LowerPTLSLegacy: public ModulePass {
-    static char ID;
-    LowerPTLSLegacy(bool imaging_mode=false)
-        : ModulePass(ID),
-          imaging_mode(imaging_mode)
-    {}
-
-    bool imaging_mode;
-    bool runOnModule(Module &M) override {
-        LowerPTLS lower(M, imaging_mode);
-        bool modified = lower.run(nullptr);
-#ifdef JL_VERIFY_PASSES
-        assert(!verifyModule(M, &errs()));
-#endif
-        return modified;
-    }
-};
-
-char LowerPTLSLegacy::ID = 0;
-
-static RegisterPass<LowerPTLSLegacy> X("LowerPTLS", "LowerPTLS Pass",
-                                 false /* Only looks at CFG */,
-                                 false /* Analysis Pass */);
-
 } // anonymous namespace
 
 PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
@@ -374,7 +372,7 @@ PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
     bool CFGModified = false;
     bool modified = lower.run(&CFGModified);
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+    assert(!verifyLLVMIR(M));
 #endif
     if (modified) {
         if (CFGModified) {
@@ -385,14 +383,3 @@ PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
     }
     return PreservedAnalyses::all();
 }
-
-Pass *createLowerPTLSPass(bool imaging_mode)
-{
-    return new LowerPTLSLegacy(imaging_mode);
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddLowerPTLSPass_impl(LLVMPassManagerRef PM, LLVMBool imaging_mode)
-{
-    unwrap(PM)->add(createLowerPTLSPass(imaging_mode));
-}
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index b964c20e3353e..5be48688be1f3 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -7,7 +7,6 @@
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/InstIterator.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/Cloning.h>
@@ -45,15 +44,7 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
 
         DstTy = SrcTy;
         if (auto Ty = dyn_cast<PointerType>(SrcTy)) {
-            if (Ty->isOpaque()) {
-                DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace()));
-            }
-            else {
-                //Remove once opaque pointer transition is complete
-                DstTy = PointerType::get(
-                        remapType(Ty->getNonOpaquePointerElementType()),
-                        ASRemapper(Ty->getAddressSpace()));
-            }
+            DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace()));
         }
         else if (auto Ty = dyn_cast<FunctionType>(SrcTy)) {
             SmallVector<Type *, 4> Params;
@@ -154,18 +145,7 @@ class AddrspaceRemoveValueMaterializer : public ValueMaterializer {
                     Ops.push_back(NewOp ? cast<Constant>(NewOp) : Op);
                 }
 
-                if (CE->getOpcode() == Instruction::GetElementPtr) {
-                    // GEP const exprs need to know the type of the source.
-                    // asserts remapType(typeof arg0) == typeof mapValue(arg0).
-                    Constant *Src = CE->getOperand(0);
-                    auto ptrty = cast<PointerType>(Src->getType()->getScalarType());
-                    //Remove once opaque pointer transition is complete
-                    if (!ptrty->isOpaque()) {
-                        Type *SrcTy = remapType(ptrty->getNonOpaquePointerElementType());
-                        DstV = CE->getWithOperands(Ops, Ty, false, SrcTy);
-                    }
-                }
-                else
+                if (CE->getOpcode() != Instruction::GetElementPtr)
                     DstV = CE->getWithOperands(Ops, Ty);
             }
         }
@@ -209,7 +189,12 @@ bool RemoveNoopAddrSpaceCasts(Function *F)
                 LLVM_DEBUG(
                         dbgs() << "Removing noop address space cast:\n"
                                << I << "\n");
-                ASC->replaceAllUsesWith(ASC->getOperand(0));
+                if (ASC->getType() == ASC->getOperand(0)->getType()) {
+                    ASC->replaceAllUsesWith(ASC->getOperand(0));
+                } else {
+                    // uncanonicalized addrspacecast; just use the value
+                    ASC->replaceAllUsesWith(ASC->getOperand(0));
+                }
                 NoopCasts.push_back(ASC);
             }
         }
@@ -271,7 +256,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
                 Name,
                 (GlobalVariable *)nullptr,
                 GV->getThreadLocalMode(),
-                GV->getType()->getAddressSpace());
+                cast<PointerType>(TypeRemapper.remapType(GV->getType()))->getAddressSpace());
         NGV->copyAttributesFrom(GV);
         VMap[GV] = NGV;
     }
@@ -291,7 +276,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 
         auto *NGA = GlobalAlias::create(
                 TypeRemapper.remapType(GA->getValueType()),
-                GA->getType()->getPointerAddressSpace(),
+                cast<PointerType>(TypeRemapper.remapType(GA->getType()))->getAddressSpace(),
                 GA->getLinkage(),
                 Name,
                 &M);
@@ -336,7 +321,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 
         GlobalVariable *NGV = cast<GlobalVariable>(VMap[GV]);
         if (GV->hasInitializer())
-            NGV->setInitializer(MapValue(GV->getInitializer(), VMap));
+            NGV->setInitializer(MapValue(GV->getInitializer(), VMap, RF_None, &TypeRemapper, &Materializer));
 
         SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
         GV->getAllMetadata(MDs);
@@ -349,7 +334,12 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 
         GV->setInitializer(nullptr);
     }
-
+    // Same workaround as in CloneCtx::prepare_vmap to avoid LLVM bug when cloning
+    auto &MD = VMap.MD();
+    if (M.getNamedMetadata("llvm.dbg.cu"))
+        for (auto cu: M.getNamedMetadata("llvm.dbg.cu")->operands()) {
+            MD[cu].reset(cu);
+        }
     // Similarly, copy over and rewrite function bodies
     for (Function *F : Functions) {
         Function *NF = cast<Function>(VMap[F]);
@@ -401,7 +391,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
     for (GlobalAlias *GA : Aliases) {
         GlobalAlias *NGA = cast<GlobalAlias>(VMap[GA]);
         if (const Constant *C = GA->getAliasee())
-            NGA->setAliasee(MapValue(C, VMap));
+            NGA->setAliasee(MapValue(C, VMap, RF_None, &TypeRemapper, &Materializer));
 
         GA->setAliasee(nullptr);
     }
@@ -424,51 +414,22 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
     for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE;) {
         Function *F = &*FI++;
         if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) {
-            F->replaceAllUsesWith(Remangled.getValue());
+            F->replaceAllUsesWith(*Remangled);
             F->eraseFromParent();
         }
     }
 
+
     return true;
 }
 
 
-struct RemoveAddrspacesPassLegacy : public ModulePass {
-    static char ID;
-    AddrspaceRemapFunction ASRemapper;
-    RemoveAddrspacesPassLegacy(
-            AddrspaceRemapFunction ASRemapper = removeAllAddrspaces)
-        : ModulePass(ID), ASRemapper(ASRemapper){};
-
-public:
-    bool runOnModule(Module &M) override {
-        bool modified = removeAddrspaces(M, ASRemapper);
-#ifdef JL_VERIFY_PASSES
-        assert(!verifyModule(M, &errs()));
-#endif
-        return modified;
-    }
-};
-
-char RemoveAddrspacesPassLegacy::ID = 0;
-static RegisterPass<RemoveAddrspacesPassLegacy>
-        X("RemoveAddrspaces",
-          "Remove IR address space information.",
-          false,
-          false);
-
-Pass *createRemoveAddrspacesPass(
-        AddrspaceRemapFunction ASRemapper = removeAllAddrspaces)
-{
-    return new RemoveAddrspacesPassLegacy(ASRemapper);
-}
-
 RemoveAddrspacesPass::RemoveAddrspacesPass() : RemoveAddrspacesPass(removeAllAddrspaces) {}
 
 PreservedAnalyses RemoveAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
     bool modified = removeAddrspaces(M, ASRemapper);
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+    assert(!verifyLLVMIR(M));
 #endif
     if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
@@ -490,32 +451,7 @@ unsigned removeJuliaAddrspaces(unsigned AS)
         return AS;
 }
 
-struct RemoveJuliaAddrspacesPassLegacy : public ModulePass {
-    static char ID;
-    RemoveAddrspacesPassLegacy Pass;
-    RemoveJuliaAddrspacesPassLegacy() : ModulePass(ID), Pass(removeJuliaAddrspaces){};
-
-    bool runOnModule(Module &M) override { return Pass.runOnModule(M); }
-};
-
-char RemoveJuliaAddrspacesPassLegacy::ID = 0;
-static RegisterPass<RemoveJuliaAddrspacesPassLegacy>
-        Y("RemoveJuliaAddrspaces",
-          "Remove IR address space information.",
-          false,
-          false);
-
-Pass *createRemoveJuliaAddrspacesPass()
-{
-    return new RemoveJuliaAddrspacesPassLegacy();
-}
 
 PreservedAnalyses RemoveJuliaAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
     return RemoveAddrspacesPass(removeJuliaAddrspaces).run(M, AM);
 }
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddRemoveJuliaAddrspacesPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createRemoveJuliaAddrspacesPass());
-}
diff --git a/src/llvm-remove-ni.cpp b/src/llvm-remove-ni.cpp
index 5e8f54b98e417..85275dddb101e 100644
--- a/src/llvm-remove-ni.cpp
+++ b/src/llvm-remove-ni.cpp
@@ -6,7 +6,6 @@
 #include <llvm/Pass.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/PassManager.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/Support/Debug.h>
 
 #include "julia.h"
@@ -43,33 +42,3 @@ PreservedAnalyses RemoveNIPass::run(Module &M, ModuleAnalysisManager &AM)
     }
     return PreservedAnalyses::all();
 }
-
-namespace {
-struct RemoveNILegacy : public ModulePass {
-    static char ID;
-    RemoveNILegacy() : ModulePass(ID) {};
-
-    bool runOnModule(Module &M)
-    {
-        return removeNI(M);
-    }
-};
-
-char RemoveNILegacy::ID = 0;
-static RegisterPass<RemoveNILegacy>
-        Y("RemoveNI",
-          "Remove non-integral address space.",
-          false,
-          false);
-}
-
-Pass *createRemoveNIPass()
-{
-    return new RemoveNILegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddRemoveNIPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createRemoveNIPass());
-}
diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp
index 21e2ec574d650..3faa9d9728e67 100644
--- a/src/llvm-simdloop.cpp
+++ b/src/llvm-simdloop.cpp
@@ -11,7 +11,6 @@
 //    as independent of each other.
 //
 // The pass hinges on a call to a marker function that has metadata attached to it.
-// To construct the pass call `createLowerSimdLoopPass`.
 
 #include "support/dtypes.h"
 
@@ -21,7 +20,7 @@
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/LoopPass.h>
 #include <llvm/Analysis/OptimizationRemarkEmitter.h>
-#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/Analysis/MemorySSA.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Metadata.h>
 #include <llvm/IR/Verifier.h>
@@ -42,6 +41,7 @@ STATISTIC(ReductionChainLength, "Total sum of instructions folded from reduction
 STATISTIC(MaxChainLength, "Max length of reduction chain");
 STATISTIC(AddChains, "Addition reduction chains");
 STATISTIC(MulChains, "Multiply reduction chains");
+STATISTIC(TotalContracted, "Total number of multiplies marked for FMA");
 
 #ifndef __clang_gcanalyzer__
 #define REMARK(remark) ORE.emit(remark)
@@ -50,6 +50,49 @@ STATISTIC(MulChains, "Multiply reduction chains");
 #endif
 namespace {
 
+/**
+ * Combine
+ * ```
+ * %v0 = fmul ... %a, %b
+ * %v = fadd contract ... %v0, %c
+ * ```
+ * to
+ * %v0 = fmul contract ... %a, %b
+ * %v = fadd contract ... %v0, %c
+ * when `%v0` has no other use
+ */
+
+static bool checkCombine(Value *maybeMul, Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
+{
+    auto mulOp = dyn_cast<Instruction>(maybeMul);
+    if (!mulOp || mulOp->getOpcode() != Instruction::FMul)
+        return false;
+    if (!L.contains(mulOp))
+        return false;
+    if (!mulOp->hasOneUse()) {
+        LLVM_DEBUG(dbgs() << "mulOp has multiple uses: " << *maybeMul << "\n");
+        REMARK([&](){
+            return OptimizationRemarkMissed(DEBUG_TYPE, "Multiuse FMul", mulOp)
+                << "fmul had multiple uses " << ore::NV("fmul", mulOp);
+        });
+        return false;
+    }
+    // On 5.0+ we only need to mark the mulOp as contract and the backend will do the work for us.
+    auto fmf = mulOp->getFastMathFlags();
+    if (!fmf.allowContract()) {
+        LLVM_DEBUG(dbgs() << "Marking mulOp for FMA: " << *maybeMul << "\n");
+        REMARK([&](){
+            return OptimizationRemark(DEBUG_TYPE, "Marked for FMA", mulOp)
+                << "marked for fma " << ore::NV("fmul", mulOp);
+        });
+        ++TotalContracted;
+        fmf.setAllowContract(true);
+        mulOp->copyFastMathFlags(fmf);
+        return true;
+    }
+    return false;
+}
+
 static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFEPOINT
 {
     switch (J->getOpcode()) {
@@ -73,7 +116,7 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFE
 /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv,
 /// mark the ops as permitting reassociation/commuting.
 /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer
-static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
+static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT
 {
     typedef SmallVector<Instruction*, 8> chainVector;
     chainVector chain;
@@ -84,7 +127,7 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe
         // Find the user of instruction I that is within loop L.
         for (User *UI : I->users()) { /*}*/
             Instruction *U = cast<Instruction>(UI);
-            if (L->contains(U)) {
+            if (L.contains(U)) {
                 if (J) {
                     LLVM_DEBUG(dbgs() << "LSL: not a reduction var because op has two internal uses: " << *I << "\n");
                     REMARK([&]() {
@@ -151,128 +194,116 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe
         });
         (*K)->setHasAllowReassoc(true);
         (*K)->setHasAllowContract(true);
+        switch ((*K)->getOpcode()) {
+            case Instruction::FAdd: {
+                if (!(*K)->hasAllowContract())
+                    continue;
+                // (*K)->getOperand(0)->print(dbgs());
+                // (*K)->getOperand(1)->print(dbgs());
+                checkCombine((*K)->getOperand(0), L, ORE);
+                checkCombine((*K)->getOperand(1), L, ORE);
+                break;
+            }
+            case Instruction::FSub: {
+                if (!(*K)->hasAllowContract())
+                    continue;
+                // (*K)->getOperand(0)->print(dbgs());
+                // (*K)->getOperand(1)->print(dbgs());
+                checkCombine((*K)->getOperand(0), L, ORE);
+                checkCombine((*K)->getOperand(1), L, ORE);
+                break;
+            }
+            default:
+                break;
+            }
+        if (SE)
+            SE->forgetValue(*K);
         ++length;
     }
     ReductionChainLength += length;
     MaxChainLength.updateMax(length);
 }
 
-static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Function &)> GetLI) JL_NOTSAFEPOINT
+static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT
 {
-    bool Changed = false;
-    std::vector<Instruction*> ToDelete;
-    for (User *U : marker->users()) {
-        ++TotalMarkedLoops;
-        Instruction *I = cast<Instruction>(U);
-        ToDelete.push_back(I);
-
-        BasicBlock *B = I->getParent();
-        OptimizationRemarkEmitter ORE(B->getParent());
-        LoopInfo &LI = GetLI(*B->getParent());
-        Loop *L = LI.getLoopFor(B);
-        if (!L) {
-            I->removeFromParent();
-            continue;
-        }
-
-        LLVM_DEBUG(dbgs() << "LSL: loopinfo marker found\n");
-        bool simd = false;
-        bool ivdep = false;
-        SmallVector<Metadata *, 8> MDs;
-
-        BasicBlock *Lh = L->getHeader();
-        LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
-
-        // Reserve first location for self reference to the LoopID metadata node.
-        TempMDTuple TempNode = MDNode::getTemporary(Lh->getContext(), None);
-        MDs.push_back(TempNode.get());
-
-        // Walk `julia.loopinfo` metadata and filter out `julia.simdloop` and `julia.ivdep`
-        if (I->hasMetadataOtherThanDebugLoc()) {
-            MDNode *JLMD= I->getMetadata("julia.loopinfo");
-            if (JLMD) {
-                LLVM_DEBUG(dbgs() << "LSL: has julia.loopinfo metadata with " << JLMD->getNumOperands() <<" operands\n");
-                for (unsigned i = 0, ie = JLMD->getNumOperands(); i < ie; ++i) {
-                    Metadata *Op = JLMD->getOperand(i);
-                    const MDString *S = dyn_cast<MDString>(Op);
-                    if (S) {
-                        LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n");
-                        if (S->getString().startswith("julia")) {
-                            if (S->getString().equals("julia.simdloop"))
-                                simd = true;
-                            if (S->getString().equals("julia.ivdep"))
-                                ivdep = true;
-                            continue;
-                        }
-                    }
-                    MDs.push_back(Op);
-                }
-            }
-        }
-
-        LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n");
-
-        REMARK([=]() {
-            return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", I->getDebugLoc(), B)
-                << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }";
-        });
-
-        MDNode *n = L->getLoopID();
-        if (n) {
-            // Loop already has a LoopID so copy over Metadata
-            // original loop id is operand 0
-            for (unsigned i = 1, ie = n->getNumOperands(); i < ie; ++i) {
-                Metadata *Op = n->getOperand(i);
-                MDs.push_back(Op);
+    MDNode *LoopID = L.getLoopID();
+    if (!LoopID)
+        return false;
+    bool simd = false;
+    bool ivdep = false;
+
+    BasicBlock *Lh = L.getHeader();
+    LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
+
+    SmallVector<Metadata*, 4> MDs(1);
+    // First Operand is self-reference
+    // Drop `julia.` prefixes
+    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+        Metadata *Op = LoopID->getOperand(i);
+        const MDString *S = dyn_cast<MDString>(Op);
+        if (S) {
+            LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n");
+            if (S->getString().starts_with("julia")) {
+                if (S->getString() == "julia.simdloop")
+                    simd = true;
+                if (S->getString() == "julia.ivdep")
+                    ivdep = true;
+                continue;
             }
         }
-        MDNode *LoopID = MDNode::getDistinct(Lh->getContext(), MDs);
-        // Replace the temporary node with a self-reference.
-        LoopID->replaceOperandWith(0, LoopID);
-        L->setLoopID(LoopID);
-        assert(L->getLoopID());
+        MDs.push_back(Op);
+    }
 
+    LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n");
+    if (!simd && !ivdep)
+        return false;
+    ++TotalMarkedLoops;
+    LLVMContext &Context = L.getHeader()->getContext();
+    LoopID = MDNode::get(Context, MDs);
+    // Set operand 0 to refer to the loop id itself
+    LoopID->replaceOperandWith(0, LoopID);
+    L.setLoopID(LoopID);
+
+    REMARK([&]() {
+        return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getStartLoc(), L.getHeader())
+            << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }";
+    });
+
+    // If ivdep is true we assume that there is no memory dependency between loop iterations
+    // This is a fairly strong assumption and does often not hold true for generic code.
+    if (ivdep) {
+        ++IVDepLoops;
         MDNode *m = MDNode::get(Lh->getContext(), ArrayRef<Metadata *>(LoopID));
-
-        // If ivdep is true we assume that there is no memory dependency between loop iterations
-        // This is a fairly strong assumption and does often not hold true for generic code.
-        if (ivdep) {
-            ++IVDepLoops;
-            // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
-            for (BasicBlock *BB : L->blocks()) {
-               for (Instruction &I : *BB) {
-                   if (I.mayReadOrWriteMemory()) {
-                       ++IVDepInstructions;
-                       I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
-                   }
-               }
+        // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
+        for (BasicBlock *BB : L.blocks()) {
+            for (Instruction &I : *BB) {
+                if (I.mayReadOrWriteMemory()) {
+                    ++IVDepInstructions;
+                    I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
+                }
             }
-            assert(L->isAnnotatedParallel());
         }
+        assert(L.isAnnotatedParallel());
+    }
 
-        if (simd) {
-            ++SimdLoops;
-            // Mark floating-point reductions as okay to reassociate/commute.
-            for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
-                if (PHINode *Phi = dyn_cast<PHINode>(I))
-                    enableUnsafeAlgebraIfReduction(Phi, L, ORE);
-                else
-                    break;
-            }
+    if (simd) {
+        ++SimdLoops;
+        // Mark floating-point reductions as okay to reassociate/commute.
+        for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
+            if (PHINode *Phi = dyn_cast<PHINode>(I))
+                enableUnsafeAlgebraIfReduction(Phi, L, ORE, SE);
+            else
+                break;
         }
 
-        I->removeFromParent();
-
-        Changed = true;
+        if (SE)
+            SE->forgetLoopDispositions();
     }
 
-    for (Instruction *I : ToDelete)
-        I->deleteValue();
-    marker->eraseFromParent();
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+    assert(!verifyLLVMIR(L));
 #endif
-    return Changed;
+    return true;
 }
 
 } // end anonymous namespace
@@ -283,78 +314,21 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Fu
 /// prevent SIMDization.
 
 
-PreservedAnalyses LowerSIMDLoopPass::run(Module &M, ModuleAnalysisManager &AM)
-{
-    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
-
-    if (!loopinfo_marker)
-        return PreservedAnalyses::all();
-
-    FunctionAnalysisManager &FAM =
-      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+PreservedAnalyses LowerSIMDLoopPass::run(Loop &L, LoopAnalysisManager &AM,
+                          LoopStandardAnalysisResults &AR, LPMUpdater &U)
 
-    auto GetLI = [&FAM](Function &F) -> LoopInfo & {
-        return FAM.getResult<LoopAnalysis>(F);
-    };
-
-    if (markLoopInfo(M, loopinfo_marker, GetLI)) {
-        auto preserved = PreservedAnalyses::allInSet<CFGAnalyses>();
-        preserved.preserve<LoopAnalysis>();
+{
+    OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
+    if (processLoop(L, ORE, &AR.SE)) {
+#ifdef JL_DEBUG_BUILD
+        if (AR.MSSA)
+            AR.MSSA->verifyMemorySSA();
+#endif
+        auto preserved = getLoopPassPreservedAnalyses();
+        preserved.preserveSet<CFGAnalyses>();
+        preserved.preserve<MemorySSAAnalysis>();
         return preserved;
     }
 
     return PreservedAnalyses::all();
 }
-
-namespace {
-class LowerSIMDLoopLegacy : public ModulePass {
-    //LowerSIMDLoop Impl;
-
-public:
-  static char ID;
-
-  LowerSIMDLoopLegacy() : ModulePass(ID) {
-  }
-
-  bool runOnModule(Module &M) override {
-    bool Changed = false;
-
-    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
-
-    auto GetLI = [this](Function &F) JL_NOTSAFEPOINT -> LoopInfo & {
-        return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
-    };
-
-    if (loopinfo_marker)
-        Changed |= markLoopInfo(M, loopinfo_marker, GetLI);
-
-    return Changed;
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override
-  {
-      ModulePass::getAnalysisUsage(AU);
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.setPreservesCFG();
-  }
-};
-
-} // end anonymous namespace
-
-char LowerSIMDLoopLegacy::ID = 0;
-
-static RegisterPass<LowerSIMDLoopLegacy> X("LowerSIMDLoop", "LowerSIMDLoop Pass",
-                                     false /* Only looks at CFG */,
-                                     false /* Analysis Pass */);
-
-Pass *createLowerSimdLoopPass()
-{
-    return new LowerSIMDLoopLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddLowerSimdLoopPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createLowerSimdLoopPass());
-}
diff --git a/src/llvm-version.h b/src/llvm-version.h
index 01638b8d44a6e..061d80deb02f9 100644
--- a/src/llvm-version.h
+++ b/src/llvm-version.h
@@ -10,21 +10,12 @@
 #define JL_LLVM_VERSION (LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 \
                         + LLVM_VERSION_PATCH)
 
-#if JL_LLVM_VERSION < 140000
-    #error Only LLVM versions >= 14.0.0 are supported by Julia
+#if JL_LLVM_VERSION < 170000
+    #error Only LLVM versions >= 17.0.0 are supported by Julia
 #endif
 
-#if JL_LLVM_VERSION >= 160000
-#define JL_LLVM_OPAQUE_POINTERS 1
-#endif
-
-// Pre GCC 12 libgcc defined the ABI for Float16->Float32
-// to take an i16. GCC 12 silently changed the ABI to now pass
-// Float16 in Float32 registers.
-#if JL_LLVM_VERSION < 150000 || defined(_CPU_PPC64_) || defined(_CPU_PPC_)
-#define JULIA_FLOAT16_ABI 1
-#else
-#define JULIA_FLOAT16_ABI 2
+#if JL_LLVM_VERSION < 19000 && defined(_CPU_RISCV64_)
+    #error Only LLVM versions >= 19.0.0 are supported by Julia on RISC-V
 #endif
 
 #ifdef __cplusplus
diff --git a/src/llvm_api.cpp b/src/llvm_api.cpp
index d56fb3a0497fa..a4ce0e3f01c85 100644
--- a/src/llvm_api.cpp
+++ b/src/llvm_api.cpp
@@ -10,7 +10,6 @@
 #endif
 
 #include "jitlayers.h"
-#include "passes.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/Error.h>
@@ -21,6 +20,7 @@
 #include <llvm/Support/CBindingWrapping.h>
 #include <llvm/Support/MemoryBuffer.h>
 
+#if JL_LLVM_VERSION < 180000
 namespace llvm {
 namespace orc {
 class OrcV2CAPIHelper {
@@ -38,7 +38,7 @@ class OrcV2CAPIHelper {
 };
 } // namespace orc
 } // namespace llvm
-
+#endif
 
 typedef struct JLOpaqueJuliaOJIT *JuliaOJITRef;
 typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef;
@@ -46,20 +46,17 @@ typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef;
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JuliaOJIT, JuliaOJITRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::JITDylib, LLVMOrcJITDylibRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ExecutionSession, LLVMOrcExecutionSessionRef)
+#if JL_LLVM_VERSION >= 180000
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::SymbolStringPoolEntryUnsafe::PoolEntry,
+                                   LLVMOrcSymbolStringPoolEntryRef)
+#else
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::OrcV2CAPIHelper::PoolEntry,
                                    LLVMOrcSymbolStringPoolEntryRef)
+#endif
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility,
                                    LLVMOrcMaterializationResponsibilityRef)
 
-typedef struct LLVMOpaqueModulePassManager *LLVMModulePassManagerRef;
-typedef struct LLVMOpaqueFunctionPassManager *LLVMFunctionPassManagerRef;
-typedef struct LLVMOpaqueLoopPassManager *LLVMLoopPassManagerRef;
-
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::ModulePassManager, LLVMModulePassManagerRef)
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::FunctionPassManager, LLVMFunctionPassManagerRef)
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::LoopPassManager, LLVMLoopPassManagerRef)
-
 extern "C" {
 
 JL_DLLEXPORT_CODEGEN JuliaOJITRef JLJITGetJuliaOJIT_impl(void)
@@ -113,7 +110,11 @@ JL_DLLEXPORT_CODEGEN LLVMOrcSymbolStringPoolEntryRef
 JLJITMangleAndIntern_impl(JuliaOJITRef JIT,
                                             const char *Name)
 {
+#if JL_LLVM_VERSION >= 180000
+    return wrap(orc::SymbolStringPoolEntryUnsafe::take(unwrap(JIT)->mangle(Name)).rawPtr());
+#else
     return wrap(orc::OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(JIT)->mangle(Name)));
+#endif
 }
 
 JL_DLLEXPORT_CODEGEN const char *
@@ -122,7 +123,7 @@ JLJITGetTripleString_impl(JuliaOJITRef JIT)
     return unwrap(JIT)->getTargetTriple().str().c_str();
 }
 
-JL_DLLEXPORT_CODEGEN const char
+JL_DLLEXPORT_CODEGEN char
 JLJITGetGlobalPrefix_impl(JuliaOJITRef JIT)
 {
     return unwrap(JIT)->getDataLayout().getGlobalPrefix();
@@ -140,27 +141,4 @@ JLJITGetIRCompileLayer_impl(JuliaOJITRef JIT)
     return wrap(&unwrap(JIT)->getIRCompileLayer());
 }
 
-#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT_CODEGEN void LLVMExtraMPMAdd##CLASS##_impl(LLVMModulePassManagerRef PM) \
-    { \
-        unwrap(PM)->addPass(CREATE_PASS); \
-    }
-#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT_CODEGEN void LLVMExtraFPMAdd##CLASS##_impl(LLVMFunctionPassManagerRef PM) \
-    { \
-        unwrap(PM)->addPass(CREATE_PASS); \
-    }
-#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT_CODEGEN void LLVMExtraLPMAdd##CLASS##_impl(LLVMLoopPassManagerRef PM) \
-    { \
-        unwrap(PM)->addPass(CREATE_PASS); \
-    }
-
-#include "llvm-julia-passes.inc"
-
-#undef MODULE_PASS
-#undef CGSCC_PASS
-#undef FUNCTION_PASS
-#undef LOOP_PASS
-
 } // extern "C"
diff --git a/src/llvmcalltest.cpp b/src/llvmcalltest.cpp
index 93c442445d79a..da63f73ea87f3 100644
--- a/src/llvmcalltest.cpp
+++ b/src/llvmcalltest.cpp
@@ -6,6 +6,7 @@
 
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
 #include <llvm/Support/raw_ostream.h>
 
 #include "julia.h"
@@ -25,7 +26,7 @@ extern "C" {
 DLLEXPORT const char *MakeIdentityFunction(jl_value_t* jl_AnyTy) {
     LLVMContext Ctx;
     // FIXME: get TrackedTy via jl_type_to_llvm(Ctx, jl_AnyTy)
-    Type *TrackedTy = PointerType::get(StructType::get(Ctx), AddressSpace::Tracked);
+    Type *TrackedTy = PointerType::get(Ctx, AddressSpace::Tracked);
     Module *M = new llvm::Module("shadow", Ctx);
     Function *F = Function::Create(
         FunctionType::get(
diff --git a/src/mach_excServer.c b/src/mach_excServer.c
index 7e99331fa8554..669fd0e2313aa 100644
--- a/src/mach_excServer.c
+++ b/src/mach_excServer.c
@@ -20,7 +20,7 @@
  * terms of an Apple operating system software license agreement.
  *
  * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * https://www.opensource.apple.com/apsl/ and read it before using this file.
  *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
diff --git a/src/macroexpand.scm b/src/macroexpand.scm
index e0e809eee08f1..2990d98eefe6e 100644
--- a/src/macroexpand.scm
+++ b/src/macroexpand.scm
@@ -126,6 +126,16 @@
                                  (else '())))
                                (else '()))))))
 
+   ;; for/generator
+   (pattern-lambda (for assgn body)
+                   (if (eq? (car assgn) 'block)
+                    `(varlist ,@(map cadr (cdr assgn)))
+                     (cons 'varlist (cadr assgn))))
+   (pattern-lambda (generator body (filter filt . assgn))
+                    (cons 'varlist (map (lambda (x) (cadr x)) assgn)))
+   (pattern-lambda (generator body . assgn)
+                    (cons 'varlist (map (lambda (x) (cadr x)) assgn)))
+
    ;; macro definition
    (pattern-lambda (macro (call name . argl) body)
                    `(-> (tuple ,@argl) ,body))
@@ -184,18 +194,18 @@
       (unescape (cadr e))
       e))
 
-(define (unescape-global-lhs e env m parent-scope inarg)
+(define (unescape-global-lhs e env m lno parent-scope inarg)
   (cond ((not (pair? e)) e)
-        ((eq? (car e) 'escape) (unescape-global-lhs (cadr e) env m parent-scope inarg))
+        ((eq? (car e) 'escape) (unescape-global-lhs (cadr e) env m lno parent-scope inarg))
         ((memq (car e) '(parameters tuple))
          (list* (car e) (map (lambda (e)
-                          (unescape-global-lhs e env m parent-scope inarg))
+                          (unescape-global-lhs e env m lno parent-scope inarg))
                         (cdr e))))
         ((and (memq (car e) '(|::| kw)) (length= e 3))
-         (list (car e) (unescape-global-lhs (cadr e) env m parent-scope inarg)
-                       (resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
+         (list (car e) (unescape-global-lhs (cadr e) env m lno parent-scope inarg)
+                       (resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg)))
         (else
-         (resolve-expansion-vars-with-new-env e env m parent-scope inarg))))
+         (resolve-expansion-vars-with-new-env e env m lno parent-scope inarg))))
 
 (define (typedef-expr-name e)
   (cond ((atom? e) e)
@@ -219,30 +229,26 @@
               lst)))
 
 ;; get the name from a function formal argument expression, allowing `(escape x)`
-(define (try-arg-name v)
-  (cond ((symbol? v) (list v))
+(define (try-arg-name v (escaped #f))
+  (cond ((symbol? v) (if escaped '() (list v)))
         ((atom? v) '())
         (else
          (case (car v)
-           ((|::|) (if (length= v 2) '() (try-arg-name (cadr v))))
-           ((... kw =) (try-arg-name (cadr v)))
-           ((escape) (list v))
-           ((hygienic-scope) (try-arg-name (cadr v)))
+           ((|::|) (if (length= v 2) '() (try-arg-name (cadr v) escaped)))
+           ((... kw =) (try-arg-name (cadr v) escaped))
+           ((escape) (if escaped (list (cadr v)) '()))
+           ((hygienic-scope) (try-arg-name (cadr v) escaped))
+           ((tuple) (apply nconc (map (lambda (e) (try-arg-name e escaped)) (cdr v))))
            ((meta)  ;; allow certain per-argument annotations
             (if (nospecialize-meta? v #t)
-                (try-arg-name (caddr v))
+                (try-arg-name (caddr v) escaped)
                 '()))
            (else '())))))
 
 ;; get names from a formal argument list, specifying whether to include escaped ones
 (define (safe-arg-names lst (escaped #f))
   (apply nconc
-         (map (lambda (v)
-                (let ((vv (try-arg-name v)))
-                  (if (eq? escaped (and (pair? vv) (pair? (car vv)) (eq? (caar vv) 'escape)))
-                      (if escaped (list (cadar vv)) vv)
-                      '())))
-              lst)))
+         (map (lambda (v) (try-arg-name v escaped)) lst)))
 
 ;; arg names, looking only at positional args
 (define (safe-llist-positional-args lst (escaped #f))
@@ -280,18 +286,18 @@
 ;; resolve-expansion-vars-with-new-env, but turn on `inarg` if we get inside
 ;; a formal argument list. `e` in general might be e.g. `(f{T}(x)::T) where T`,
 ;; and we want `inarg` to be true for the `(x)` part.
-(define (resolve-in-lhs e env m parent-scope inarg)
-  (define (recur x) (resolve-in-lhs x env m parent-scope inarg))
-  (define (other x) (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+(define (resolve-in-lhs e env m lno parent-scope inarg)
+  (define (recur x) (resolve-in-lhs x env m lno parent-scope inarg))
+  (define (other x) (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg))
   (case (and (pair? e) (car e))
     ((where) `(where ,(recur (cadr e)) ,@(map other (cddr e))))
     ((|::|)  `(|::| ,(recur (cadr e)) ,(other (caddr e))))
     ((call)  `(call ,(other (cadr e))
                     ,@(map (lambda (x)
-                             (resolve-expansion-vars-with-new-env x env m parent-scope #t))
+                             (resolve-expansion-vars-with-new-env x env m lno parent-scope #t))
                            (cddr e))))
     ((tuple) `(tuple ,@(map (lambda (x)
-                              (resolve-expansion-vars-with-new-env x env m parent-scope #t))
+                              (resolve-expansion-vars-with-new-env x env m lno parent-scope #t))
                             (cdr e))))
     (else (other e))))
 
@@ -328,7 +334,7 @@
                    (keywords-introduced-by x))
               env)))))))
 
-(define (resolve-expansion-vars-with-new-env x env m parent-scope inarg (outermost #f))
+(define (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg (outermost #f))
   (resolve-expansion-vars-
    x
    (if (and (pair? x) (eq? (car x) 'let))
@@ -336,14 +342,50 @@
        ;; the same expression
        env
        (new-expansion-env-for x env outermost))
-   m parent-scope inarg))
+   m lno parent-scope inarg))
 
 (define (reescape ux x)
   (if (and (pair? x) (eq? (car x) 'escape))
-    (reescape '(escape ,ux) (cadr x)))
-    ux)
-
-(define (resolve-expansion-vars- e env m parent-scope inarg)
+    (reescape `(escape ,ux) (cadr x))
+    ux))
+
+;; type has special behavior: identifiers inside are
+;; field names, not expressions.
+(define (resolve-struct-field-expansion x env m lno parent-scope inarg)
+  (let ((ux (unescape x)))
+    (cond
+        ((atom? ux) ux)
+        ((and (pair? ux) (eq? (car ux) '|::|))
+         `(|::| ,(unescape (cadr ux))
+           ,(resolve-expansion-vars- (reescape (caddr ux) x) env m lno parent-scope inarg)))
+        ((and (pair? ux) (memq (car ux) '(const atomic)))
+         `(,(car ux) ,(resolve-struct-field-expansion (reescape (cadr ux) x) env m lno parent-scope inarg)))
+        (else
+         (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg)))))
+
+(define (resolve-letlike-assign bind env newenv m lno parent-scope inarg)
+  (if (assignment? bind)
+    (make-assignment
+      ;; expand binds in newenv with dummy RHS
+      (cadr (resolve-expansion-vars- (make-assignment (cadr bind) 0)
+                                    newenv m lno parent-scope inarg))
+      ;; expand initial values in old env
+      (resolve-expansion-vars- (caddr bind) env m lno parent-scope inarg))
+    ;; Just expand everything else that's not an assignment. N.B.: This includes
+    ;; assignments inside escapes, which probably need special handling (TODO).
+    (resolve-expansion-vars- bind newenv m lno parent-scope inarg)))
+
+(define (for-ranges-list ranges)
+  (if (eq? (car ranges) 'escape)
+    (map (lambda (range) `(escape ,range)) (for-ranges-list (cadr ranges)))
+    (if (eq? (car ranges) 'block)
+      (cdr ranges)
+      (list ranges))))
+
+(define (just-line? ex)
+  (and (pair? ex) (eq? (car ex) 'line) (atom? (cadr ex)) (or (atom? (caddr ex)) (nothing? (caddr ex)))))
+
+(define (resolve-expansion-vars- e env m lno parent-scope inarg)
   (cond ((or (eq? e 'begin) (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal) (underscore-symbol? e))
          e)
         ((symbol? e)
@@ -362,31 +404,35 @@
                      (env (car scope))
                      (m (cadr scope))
                      (parent-scope (cdr parent-scope)))
-                (resolve-expansion-vars-with-new-env (cadr e) env m parent-scope inarg))))
+                (resolve-expansion-vars-with-new-env (cadr e) env m lno parent-scope inarg))))
            ((global)
             `(global
                ,@(map (lambda (arg)
                        (if (assignment? arg)
-                           `(= ,(unescape-global-lhs (cadr arg) env m parent-scope inarg)
-                               ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg))
-                           (unescape-global-lhs arg env m parent-scope inarg)))
+                           `(= ,(unescape-global-lhs (cadr arg) env m lno parent-scope inarg)
+                               ,(resolve-expansion-vars-with-new-env (caddr arg) env m lno parent-scope inarg))
+                           (unescape-global-lhs arg env m lno parent-scope inarg)))
                       (cdr e))))
-           ((using import export meta line inbounds boundscheck loopinfo inline noinline) (map unescape e))
+           ((toplevel) ; re-wrap Expr(:toplevel) in the current hygienic-scope(s)
+            `(toplevel
+               ,@(map (lambda (arg)
+                       ;; Minor optimization: A lot of toplevel exprs have just bare line numbers in them.
+                       ;; don't bother with the full rewrapping in that case (even though
+                       ;; this would be semantically legal) - lowering won't touch them anyways.
+                       (if (just-line? arg) arg
+                        (let loop ((parent-scope parent-scope) (m m) (lno lno) (arg arg))
+                          (let ((wrapped `(hygienic-scope ,arg ,m ,@lno)))
+                            (if (null? parent-scope) wrapped
+                              (loop (cdr parent-scope) (cadar parent-scope) (caddar parent-scope) wrapped))))))
+                        (cdr e))))
+           ((using import export meta line inbounds boundscheck loopinfo inline noinline purity) (map unescape e))
            ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted.
            ((symboliclabel) e)
            ((symbolicgoto) e)
            ((struct)
-            `(struct ,(cadr e) ,(resolve-expansion-vars- (caddr e) env m parent-scope inarg)
-                     ;; type has special behavior: identifiers inside are
-                     ;; field names, not expressions.
+            `(struct ,(cadr e) ,(resolve-expansion-vars- (caddr e) env m lno parent-scope inarg)
                      ,(map (lambda (x)
-                             (let ((ux (unescape x)))
-                                  (cond ((atom? ux) ux)
-                                        ((and (pair? ux) (eq? (car ux) '|::|))
-                                         `(|::| ,(unescape (cadr ux))
-                                           ,(resolve-expansion-vars- (reescape (caddr ux) x) env m parent-scope inarg)))
-                                        (else
-                                         (resolve-expansion-vars-with-new-env x env m parent-scope inarg)))))
+                            (resolve-struct-field-expansion x env m lno parent-scope inarg))
                            (cadddr e))))
 
            ((parameters)
@@ -397,17 +443,17 @@
                                 (x (if (and (not inarg) (symbol? ux))
                                        `(kw ,ux ,x)
                                        x)))
-                           (resolve-expansion-vars- x env m parent-scope #f)))
+                           (resolve-expansion-vars- x env m lno parent-scope #f)))
                        (cdr e))))
 
            ((->)
-            `(-> ,(resolve-in-lhs (tuple-wrap-arrow-sig (cadr e)) env m parent-scope inarg)
-                 ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
+            `(-> ,(resolve-in-lhs (tuple-wrap-arrow-sig (cadr e)) env m lno parent-scope inarg)
+                 ,(resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg)))
 
            ((= function)
-             `(,(car e) ,(resolve-in-lhs (cadr e) env m parent-scope inarg)
+             `(,(car e) ,(resolve-in-lhs (cadr e) env m lno parent-scope inarg)
                         ,@(map (lambda (x)
-                                   (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+                                   (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg))
                                  (cddr e))))
 
            ((kw)
@@ -421,55 +467,67 @@
                 `(kw (|::|
                       ,@(if argname
                             (list (if inarg
-                                      (resolve-expansion-vars- argname env m parent-scope inarg)
+                                      (resolve-expansion-vars- argname env m lno parent-scope inarg)
                                       ;; in keyword arg A=B, don't transform "A"
                                       (unescape argname)))
                             '())
-                      ,(resolve-expansion-vars- type env m parent-scope inarg))
-                     ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))))
+                      ,(resolve-expansion-vars- type env m lno parent-scope inarg))
+                     ,(resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg))))
              (else
               `(kw ,(if inarg
-                        (resolve-expansion-vars- (cadr e) env m parent-scope inarg)
+                        (resolve-expansion-vars- (cadr e) env m lno parent-scope inarg)
                         (unescape (cadr e)))
-                   ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))))
+                   ,(resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg)))))
 
            ((let)
             (let* ((newenv (new-expansion-env-for e env))
-                   (body   (resolve-expansion-vars- (caddr e) newenv m parent-scope inarg))
+                   (body   (resolve-expansion-vars- (caddr e) newenv m lno parent-scope inarg))
                    (binds  (let-binds e)))
               `(let (block
                      ,@(map
                         (lambda (bind)
-                          (if (assignment? bind)
-                              (make-assignment
-                               ;; expand binds in old env with dummy RHS
-                               (cadr (resolve-expansion-vars- (make-assignment (cadr bind) 0)
-                                                              newenv m parent-scope inarg))
-                               ;; expand initial values in old env
-                               (resolve-expansion-vars- (caddr bind) env m parent-scope inarg))
-                              (resolve-expansion-vars- bind newenv m parent-scope inarg)))
+                          (resolve-letlike-assign bind env newenv m lno parent-scope inarg))
                         binds))
                  ,body)))
+           ((for)
+            (let* ((newenv (new-expansion-env-for e env))
+                   (body   (resolve-expansion-vars- (caddr e) newenv m lno parent-scope inarg))
+                   (expanded-ranges (map (lambda (range)
+                      (resolve-letlike-assign range env newenv m lno parent-scope inarg)) (for-ranges-list (cadr e)))))
+              (if (length= expanded-ranges 1)
+                `(for ,@expanded-ranges ,body))
+                `(for (block ,@expanded-ranges) ,body)))
+           ((generator)
+            (let* ((newenv (new-expansion-env-for e env))
+                   (body   (resolve-expansion-vars- (cadr e) newenv m lno parent-scope inarg))
+                   (filt? (eq? (car (caddr e)) 'filter))
+                   (range-exprs (if filt? (cddr (caddr e)) (cddr e)))
+                   (filt (if filt? (resolve-expansion-vars- (cadr (caddr e)) newenv m lno parent-scope inarg)))
+                   (expanded-ranges (map (lambda (range)
+                      (resolve-letlike-assign range env newenv m lno parent-scope inarg)) range-exprs)))
+              (if filt?
+                `(generator ,body (filter ,filt ,@expanded-ranges))
+                `(generator ,body ,@expanded-ranges))))
            ((hygienic-scope) ; TODO: move this lowering to resolve-scopes, instead of reimplementing it here badly
-             (let ((parent-scope (cons (list env m) parent-scope))
+             (let ((parent-scope (cons (list env m lno) parent-scope))
                    (body (cadr e))
                    (m (caddr e))
                    (lno  (cdddr e)))
-              (resolve-expansion-vars-with-new-env body env m parent-scope inarg #t)))
+              (resolve-expansion-vars-with-new-env body '() m lno parent-scope inarg #t)))
            ((tuple)
             (cons (car e)
                   (map (lambda (x)
                          (if (assignment? x)
                              `(= ,(unescape (cadr x))
-                                 ,(resolve-expansion-vars-with-new-env (caddr x) env m parent-scope inarg))
-                             (resolve-expansion-vars-with-new-env x env m parent-scope inarg)))
+                                 ,(resolve-expansion-vars-with-new-env (caddr x) env m lno parent-scope inarg))
+                             (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg)))
                        (cdr e))))
 
            ;; todo: trycatch
            (else
             (cons (car e)
                   (map (lambda (x)
-                         (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+                         (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg))
                        (cdr e))))))))
 
 ;; decl-var that also identifies f in f()=...
@@ -570,11 +628,11 @@
         (cdr v)
         '())))
 
-(define (resolve-expansion-vars e m)
+(define (resolve-expansion-vars e m lno)
   ;; expand binding form patterns
   ;; keep track of environment, rename locals to gensyms
   ;; and wrap globals in (globalref module var) for macro's home module
-  (resolve-expansion-vars-with-new-env e '() m '() #f #t))
+  (resolve-expansion-vars-with-new-env e '() m lno '() #f #t))
 
 (define (julia-expand-quotes e)
   (cond ((not (pair? e)) e)
@@ -590,11 +648,12 @@
   (cond ((not (pair? e)) e)
         ((eq? (car e) 'inert) e)
         ((eq? (car e) 'module) e)
+        ((eq? (car e) 'toplevel) e)
         ((eq? (car e) 'hygienic-scope)
          (let ((form (cadr e)) ;; form is the expression returned from expand-macros
                (modu (caddr e)) ;; m is the macro's def module
                (lno  (cdddr e))) ;; lno is (optionally) the line number node
-           (resolve-expansion-vars form modu)))
+           (resolve-expansion-vars form modu lno)))
         (else
          (map julia-expand-macroscopes- e))))
 
diff --git a/src/method.c b/src/method.c
index 06a05361a927d..cee941ae77ddb 100644
--- a/src/method.c
+++ b/src/method.c
@@ -10,19 +10,12 @@
 #include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
+#include "builtin_proto.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-extern jl_value_t *jl_builtin_getfield;
-extern jl_value_t *jl_builtin_tuple;
-jl_methtable_t *jl_kwcall_mt;
-jl_method_t *jl_opaque_closure_method;
-
-jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
-    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
-
 static void check_c_types(const char *where, jl_value_t *rt, jl_value_t *at)
 {
     if (jl_is_svec(rt))
@@ -42,234 +35,290 @@ static void check_c_types(const char *where, jl_value_t *rt, jl_value_t *at)
     }
 }
 
+void jl_add_scanned_method(jl_module_t *m, jl_method_t *meth)
+{
+    JL_LOCK(&m->lock);
+    if (m->scanned_methods == jl_nothing) {
+        m->scanned_methods = (jl_value_t*)jl_alloc_vec_any(0);
+        jl_gc_wb(m, m->scanned_methods);
+    }
+    jl_array_ptr_1d_push((jl_array_t*)m->scanned_methods, (jl_value_t*)meth);
+    JL_UNLOCK(&m->lock);
+}
+
+JL_DLLEXPORT void jl_scan_method_source_now(jl_method_t *m, jl_value_t *src)
+{
+    if (!jl_atomic_fetch_or(&m->did_scan_source, 1)) {
+        jl_code_info_t *code = NULL;
+        JL_GC_PUSH1(&code);
+        if (!jl_is_code_info(src))
+            code = jl_uncompress_ir(m, NULL, src);
+        else
+            code = (jl_code_info_t*)src;
+        jl_array_t *stmts = code->code;
+        size_t i, l = jl_array_nrows(stmts);
+        int any_implicit = 0;
+        for (i = 0; i < l; i++) {
+            jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
+            if (jl_is_globalref(stmt)) {
+                jl_globalref_t *gr = (jl_globalref_t*)stmt;
+                jl_binding_t *b = gr->binding;
+                if (!b)
+                    b = jl_get_module_binding(gr->mod, gr->name, 1);
+                any_implicit |= jl_maybe_add_binding_backedge(b, (jl_value_t*)m, m);
+            }
+        }
+        if (any_implicit && !(jl_atomic_fetch_or(&m->did_scan_source, 0x2) & 0x2))
+            jl_add_scanned_method(m->module, m);
+        JL_GC_POP();
+    }
+}
+
 // Resolve references to non-locally-defined variables to become references to global
 // variables in `module` (unless the rvalue is one of the type parameters in `sparam_vals`).
-static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_svec_t *sparam_vals,
-                                   int binding_effects, int eager_resolve)
+static jl_value_t *resolve_definition_effects(jl_value_t *expr, jl_module_t *module, jl_svec_t *sparam_vals, jl_value_t *binding_edge,
+                                              int binding_effects)
 {
     if (jl_is_symbol(expr)) {
-        if (module == NULL)
-            return expr;
-        return jl_module_globalref(module, (jl_sym_t*)expr);
+        jl_errorf("Found raw symbol %s in code returned from lowering. Expected all symbols to have been resolved to GlobalRef or slots.",
+                  jl_symbol_name((jl_sym_t*)expr));
     }
-    else if (jl_is_returnnode(expr)) {
-        jl_value_t *retval = jl_returnnode_value(expr);
-        if (retval) {
-            jl_value_t *val = resolve_globals(retval, module, sparam_vals, binding_effects, eager_resolve);
-            if (val != retval) {
-                JL_GC_PUSH1(&val);
-                expr = jl_new_struct(jl_returnnode_type, val);
-                JL_GC_POP();
-            }
-        }
+
+    if (!jl_is_expr(expr)) {
         return expr;
     }
-    else if (jl_is_gotoifnot(expr)) {
-        jl_value_t *cond = resolve_globals(jl_gotoifnot_cond(expr), module, sparam_vals, binding_effects, eager_resolve);
-        if (cond != jl_gotoifnot_cond(expr)) {
-            intptr_t label = jl_gotoifnot_label(expr);
-            JL_GC_PUSH1(&cond);
-            expr = jl_new_struct_uninit(jl_gotoifnot_type);
-            set_nth_field(jl_gotoifnot_type, expr, 0, cond, 0);
-            jl_gotoifnot_label(expr) = label;
-            JL_GC_POP();
-        }
+
+    jl_expr_t *e = (jl_expr_t*)expr;
+    // These exprs are not fully linearized
+    if (e->head == jl_assign_sym) {
+        jl_exprargset(e, 1, resolve_definition_effects(jl_exprarg(e, 1), module, sparam_vals, binding_edge, binding_effects));
+        return expr;
+    } else if (e->head == jl_new_opaque_closure_sym) {
+        jl_exprargset(e, 4, resolve_definition_effects(jl_exprarg(e, 4), module, sparam_vals, binding_edge, binding_effects));
         return expr;
     }
-    else if (jl_is_expr(expr)) {
-        jl_expr_t *e = (jl_expr_t*)expr;
-        if (e->head == jl_global_sym && binding_effects) {
-            // execute the side-effects of "global x" decl immediately:
-            // creates uninitialized mutable binding in module for each global
-            jl_eval_global_expr(module, e, 1);
-            expr = jl_nothing;
+    size_t nargs = jl_array_nrows(e->args);
+    if (e->head == jl_opaque_closure_method_sym) {
+        if (nargs != 5) {
+            jl_error("opaque_closure_method: invalid syntax");
         }
-        if (jl_is_toplevel_only_expr(expr) || e->head == jl_const_sym ||
-            e->head == jl_coverageeffect_sym || e->head == jl_copyast_sym ||
-            e->head == jl_quote_sym || e->head == jl_inert_sym ||
-            e->head == jl_meta_sym || e->head == jl_inbounds_sym ||
-            e->head == jl_boundscheck_sym || e->head == jl_loopinfo_sym ||
-            e->head == jl_aliasscope_sym || e->head == jl_popaliasscope_sym ||
-            e->head == jl_inline_sym || e->head == jl_noinline_sym) {
-            // ignore these
+        jl_value_t *name = jl_exprarg(e, 0);
+        jl_value_t *oc_nargs = jl_exprarg(e, 1);
+        int isva = jl_exprarg(e, 2) == jl_true;
+        jl_value_t *functionloc = jl_exprarg(e, 3);
+        jl_value_t *ci = jl_exprarg(e, 4);
+        if (!jl_is_code_info(ci)) {
+            jl_error("opaque_closure_method: lambda should be a CodeInfo");
+        } else if (!jl_is_long(oc_nargs)) {
+            jl_type_error("opaque_closure_method", (jl_value_t*)jl_long_type, oc_nargs);
         }
-        else {
-            size_t i = 0, nargs = jl_array_len(e->args);
-            if (e->head == jl_opaque_closure_method_sym) {
-                if (nargs != 5) {
-                    jl_error("opaque_closure_method: invalid syntax");
-                }
-                jl_value_t *name = jl_exprarg(e, 0);
-                jl_value_t *oc_nargs = jl_exprarg(e, 1);
-                int isva = jl_exprarg(e, 2) == jl_true;
-                jl_value_t *functionloc = jl_exprarg(e, 3);
-                jl_value_t *ci = jl_exprarg(e, 4);
-                if (!jl_is_code_info(ci)) {
-                    jl_error("opaque_closure_method: lambda should be a CodeInfo");
-                } else if (!jl_is_long(oc_nargs)) {
-                    jl_type_error("opaque_closure_method", (jl_value_t*)jl_long_type, oc_nargs);
-                }
-                jl_method_t *m = jl_make_opaque_closure_method(module, name, jl_unbox_long(oc_nargs), functionloc, (jl_code_info_t*)ci, isva);
-                return (jl_value_t*)m;
+        jl_method_t *m = jl_make_opaque_closure_method(module, name,
+            jl_unbox_long(oc_nargs), functionloc, (jl_code_info_t*)ci, isva, /*isinferred*/0);
+        return (jl_value_t*)m;
+    }
+    if (e->head == jl_cfunction_sym) {
+        JL_NARGS(cfunction method definition, 5, 5); // (type, func, rt, at, cc)
+        jl_task_t *ct = jl_current_task;
+        jl_value_t *typ = jl_exprarg(e, 0);
+        if (!jl_is_type(typ))
+            jl_error("first parameter to :cfunction must be a type");
+        if (typ == (jl_value_t*)jl_voidpointer_type) {
+            jl_value_t *a = jl_exprarg(e, 1);
+            JL_TYPECHK(cfunction method definition, quotenode, a);
+            *(jl_value_t**)a = jl_toplevel_eval(module, *(jl_value_t**)a);
+            jl_gc_wb(a, *(jl_value_t**)a);
+        }
+        jl_value_t *rt = jl_exprarg(e, 2);
+        jl_value_t *at = jl_exprarg(e, 3);
+        if (!jl_is_type(rt)) {
+            JL_TRY {
+                rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
             }
-            if (e->head == jl_cfunction_sym) {
-                JL_NARGS(cfunction method definition, 5, 5); // (type, func, rt, at, cc)
-                jl_value_t *typ = jl_exprarg(e, 0);
-                if (!jl_is_type(typ))
-                    jl_error("first parameter to :cfunction must be a type");
-                if (typ == (jl_value_t*)jl_voidpointer_type) {
-                    jl_value_t *a = jl_exprarg(e, 1);
-                    JL_TYPECHK(cfunction method definition, quotenode, a);
-                    *(jl_value_t**)a = jl_toplevel_eval(module, *(jl_value_t**)a);
-                    jl_gc_wb(a, *(jl_value_t**)a);
-                }
-                jl_value_t *rt = jl_exprarg(e, 2);
-                jl_value_t *at = jl_exprarg(e, 3);
-                if (!jl_is_type(rt)) {
-                    JL_TRY {
-                        rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
-                    }
-                    JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
-                            jl_error("could not evaluate cfunction return type (it might depend on a local variable)");
-                        else
-                            jl_rethrow();
-                    }
-                    jl_exprargset(e, 2, rt);
-                }
-                if (!jl_is_svec(at)) {
-                    JL_TRY {
-                        at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
-                    }
-                    JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
-                            jl_error("could not evaluate cfunction argument type (it might depend on a local variable)");
-                        else
-                            jl_rethrow();
-                    }
-                    jl_exprargset(e, 3, at);
-                }
-                check_c_types("cfunction method definition", rt, at);
-                JL_TYPECHK(cfunction method definition, quotenode, jl_exprarg(e, 4));
-                JL_TYPECHK(cfunction method definition, symbol, *(jl_value_t**)jl_exprarg(e, 4));
-                return expr;
+            JL_CATCH {
+                if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
+                    jl_error("could not evaluate cfunction return type (it might depend on a local variable)");
+                else
+                    jl_rethrow();
+            }
+            jl_exprargset(e, 2, rt);
+        }
+        if (!jl_is_svec(at)) {
+            JL_TRY {
+                at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
+            }
+            JL_CATCH {
+                if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
+                    jl_error("could not evaluate cfunction argument type (it might depend on a local variable)");
+                else
+                    jl_rethrow();
             }
-            if (e->head == jl_foreigncall_sym) {
-                JL_NARGSV(ccall method definition, 5); // (fptr, rt, at, nreq, (cc, effects))
-                jl_value_t *rt = jl_exprarg(e, 1);
-                jl_value_t *at = jl_exprarg(e, 2);
-                if (!jl_is_type(rt)) {
+            jl_exprargset(e, 3, at);
+        }
+        check_c_types("cfunction method definition", rt, at);
+        JL_TYPECHK(cfunction method definition, quotenode, jl_exprarg(e, 4));
+        JL_TYPECHK(cfunction method definition, symbol, *(jl_value_t**)jl_exprarg(e, 4));
+        return expr;
+    }
+    if (e->head == jl_foreigncall_sym) {
+        JL_NARGSV(ccall method definition, 5); // (fptr, rt, at, nreq, (cc, effects, gc_safe))
+        jl_task_t *ct = jl_current_task;
+        jl_value_t *fptr = jl_exprarg(e, 0);
+        // Handle dot expressions in tuple arguments for ccall by converting to GlobalRef eagerly
+        jl_sym_t *tuple_sym = jl_symbol("tuple");
+        if (jl_is_quotenode(fptr)) {
+            if (jl_is_string(jl_quotenode_value(fptr)) || jl_is_tuple(jl_quotenode_value(fptr)))
+                fptr = jl_quotenode_value(fptr);
+        }
+        if (jl_is_tuple(fptr)) {
+            // convert literal Tuple to Expr tuple
+            jl_expr_t *tupex = jl_exprn(tuple_sym, jl_nfields(fptr));
+            jl_value_t *v = NULL;
+            JL_GC_PUSH2(&tupex, &v);
+            for (long i = 0; i < jl_nfields(fptr); i++) {
+                v = jl_fieldref(fptr, i);
+                if (!jl_is_string(v))
+                    v = jl_new_struct(jl_quotenode_type, v);
+                jl_exprargset(tupex, i, v);
+            }
+            jl_exprargset(e, 0, tupex);
+            fptr = (jl_value_t*)tupex;
+            JL_GC_POP();
+        }
+        if (jl_is_expr(fptr) && ((jl_expr_t*)fptr)->head == tuple_sym) {
+            // verify Expr tuple can be interpreted and handle
+            jl_expr_t *tuple_expr = (jl_expr_t*)fptr;
+            size_t nargs_tuple = jl_expr_nargs(tuple_expr);
+            if (nargs_tuple == 0)
+                jl_error("ccall function name cannot be empty tuple");
+            if (nargs_tuple > 2)
+                jl_error("ccall function name tuple can have at most 2 elements");
+            // Validate tuple elements are not more complicated than inference/codegen can safely handle
+            for (size_t i = 0; i < nargs_tuple; i++) {
+                jl_value_t *arg = jl_exprarg(tuple_expr, i);
+                // Handle dot expressions by converting to a GlobalRef
+                if (jl_is_expr(arg) && ((jl_expr_t*)arg)->head == jl_dot_sym) {
+                    jl_expr_t *dot_expr = (jl_expr_t*)arg;
+                    if (jl_expr_nargs(dot_expr) != 2)
+                        jl_error("ccall function name: invalid dot expression");
+                    jl_value_t *mod_expr = jl_exprarg(dot_expr, 0);
+                    jl_value_t *sym_expr = jl_exprarg(dot_expr, 1);
+                    if (!(jl_is_quotenode(sym_expr) && jl_is_symbol(jl_quotenode_value(sym_expr))))
+                        jl_type_error("ccall name dot expression", (jl_value_t*)jl_symbol_type, sym_expr);
                     JL_TRY {
-                        rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
+                        // Evaluate the module expression
+                        jl_value_t *mod_val = jl_toplevel_eval(module, mod_expr);
+                        JL_TYPECHK(ccall name dot expression, module, mod_val);
+                        JL_GC_PROMISE_ROOTED(mod_val);
+                        // Create GlobalRef from evaluated module and quoted symbol
+                        jl_sym_t *sym = (jl_sym_t*)jl_quotenode_value(sym_expr);
+                        jl_value_t *globalref = jl_module_globalref((jl_module_t*)mod_val, sym);
+                        jl_exprargset(tuple_expr, i, globalref);
                     }
                     JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
-                            jl_error("could not evaluate ccall return type (it might depend on a local variable)");
+                        if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
+                            jl_error("could not evaluate ccall function/library name (it might depend on a local variable)");
                         else
                             jl_rethrow();
                     }
-                    jl_exprargset(e, 1, rt);
                 }
-                if (!jl_is_svec(at)) {
-                    JL_TRY {
-                        at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
+                else if (jl_is_quotenode(arg)) {
+                    if (i == 0) {
+                        // function name must be a symbol or string, library can be anything
+                        jl_value_t *quoted_val = jl_quotenode_value(arg);
+                        if (!jl_is_symbol(quoted_val) && !jl_is_string(quoted_val))
+                            jl_type_error("ccall function name", (jl_value_t*)jl_symbol_type, jl_quotenode_value(arg));
                     }
-                    JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
-                            jl_error("could not evaluate ccall argument type (it might depend on a local variable)");
-                        else
-                            jl_rethrow();
-                    }
-                    jl_exprargset(e, 2, at);
                 }
-                check_c_types("ccall method definition", rt, at);
-                JL_TYPECHK(ccall method definition, long, jl_exprarg(e, 3));
-                JL_TYPECHK(ccall method definition, quotenode, jl_exprarg(e, 4));
-                jl_value_t *cc = jl_quotenode_value(jl_exprarg(e, 4));
-                if (!jl_is_symbol(cc)) {
-                    JL_TYPECHK(ccall method definition, tuple, cc);
-                    if (jl_nfields(cc) != 2) {
-                        jl_error("In ccall calling convention, expected two argument tuple or symbol.");
-                    }
-                    JL_TYPECHK(ccall method definition, symbol, jl_get_nth_field(cc, 0));
-                    JL_TYPECHK(ccall method definition, uint8, jl_get_nth_field(cc, 1));
+                else if (!jl_is_globalref(arg) && jl_isa_ast_node(arg)) {
+                    jl_type_error(i == 0 ? "ccall function name" : "ccall library name", (jl_value_t*)jl_symbol_type, arg);
                 }
-                jl_exprargset(e, 0, resolve_globals(jl_exprarg(e, 0), module, sparam_vals, binding_effects, 1));
-                i++;
             }
-            if (e->head == jl_method_sym || e->head == jl_module_sym) {
-                i++;
+        }
+        else if (jl_is_string(fptr) || (jl_is_quotenode(fptr) && jl_is_symbol(jl_quotenode_value(fptr)))) {
+            // convert String to Expr (String,)
+            // convert QuoteNode(Symbol) to Expr (QuoteNode(Symbol),)
+            jl_expr_t *tupex = jl_exprn(tuple_sym, 1);
+            jl_exprargset(tupex, 0, fptr);
+            jl_exprargset(e, 0, tupex);
+            fptr = (jl_value_t*)tupex;
+        }
+        jl_value_t *rt = jl_exprarg(e, 1);
+        jl_value_t *at = jl_exprarg(e, 2);
+        if (!jl_is_type(rt)) {
+            JL_TRY {
+                rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
+            }
+            JL_CATCH {
+                if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
+                    jl_error("could not evaluate ccall return type (it might depend on a local variable)");
+                else
+                    jl_rethrow();
             }
-            for (; i < nargs; i++) {
-                // TODO: this should be making a copy, not mutating the source
-                jl_exprargset(e, i, resolve_globals(jl_exprarg(e, i), module, sparam_vals, binding_effects, eager_resolve));
+            jl_exprargset(e, 1, rt);
+        }
+        if (!jl_is_svec(at)) {
+            JL_TRY {
+                at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
             }
-            if (e->head == jl_call_sym && jl_expr_nargs(e) == 3 &&
-                    jl_is_globalref(jl_exprarg(e, 0)) &&
-                    jl_is_globalref(jl_exprarg(e, 1)) &&
-                    jl_is_quotenode(jl_exprarg(e, 2))) {
-                // replace module_expr.sym with GlobalRef(module, sym)
-                // for expressions pattern-matching to `getproperty(module_expr, :sym)` in a top-module
-                // (this is expected to help inference performance)
-                // TODO: this was broken by linear-IR
-                jl_value_t *s = jl_fieldref(jl_exprarg(e, 2), 0);
-                jl_value_t *me = jl_exprarg(e, 1);
-                jl_value_t *fe = jl_exprarg(e, 0);
-                jl_module_t *fe_mod = jl_globalref_mod(fe);
-                jl_sym_t *fe_sym = jl_globalref_name(fe);
-                jl_module_t *me_mod = jl_globalref_mod(me);
-                jl_sym_t *me_sym = jl_globalref_name(me);
-                if (fe_mod->istopmod && !strcmp(jl_symbol_name(fe_sym), "getproperty") && jl_is_symbol(s)) {
-                    if (eager_resolve || jl_binding_resolved_p(me_mod, me_sym)) {
-                        jl_binding_t *b = jl_get_binding(me_mod, me_sym);
-                        if (b && b->constp) {
-                            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-                            if (v && jl_is_module(v))
-                                return jl_module_globalref((jl_module_t*)v, (jl_sym_t*)s);
-                        }
-                    }
-                }
+            JL_CATCH {
+                if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
+                    jl_error("could not evaluate ccall argument type (it might depend on a local variable)");
+                else
+                    jl_rethrow();
             }
-            if (e->head == jl_call_sym && nargs > 0 &&
-                    jl_is_globalref(jl_exprarg(e, 0))) {
-                // TODO: this hack should be deleted once llvmcall is fixed
-                jl_value_t *fe = jl_exprarg(e, 0);
-                jl_module_t *fe_mod = jl_globalref_mod(fe);
-                jl_sym_t *fe_sym = jl_globalref_name(fe);
-                if (jl_binding_resolved_p(fe_mod, fe_sym)) {
-                    // look at some known called functions
-                    jl_binding_t *b = jl_get_binding(fe_mod, fe_sym);
-                    if (b && b->constp && jl_atomic_load_relaxed(&b->value) == jl_builtin_tuple) {
-                        size_t j;
-                        for (j = 1; j < nargs; j++) {
-                            if (!jl_is_quotenode(jl_exprarg(e, j)))
-                                break;
-                        }
-                        if (j == nargs) {
-                            jl_value_t *val = NULL;
-                            JL_TRY {
-                                val = jl_interpret_toplevel_expr_in(module, (jl_value_t*)e, NULL, sparam_vals);
-                            }
-                            JL_CATCH {
-                            }
-                            if (val)
-                                return val;
-                        }
-                    }
+            jl_exprargset(e, 2, at);
+        }
+        check_c_types("ccall method definition", rt, at);
+        JL_TYPECHK(ccall method definition, long, jl_exprarg(e, 3));
+        JL_TYPECHK(ccall method definition, quotenode, jl_exprarg(e, 4));
+        jl_value_t *cc = jl_quotenode_value(jl_exprarg(e, 4));
+        if (!jl_is_symbol(cc)) {
+            JL_TYPECHK(ccall method definition, tuple, cc);
+            if (jl_nfields(cc) != 3) {
+                jl_error("In ccall calling convention, expected two argument tuple or symbol.");
+            }
+            JL_TYPECHK(ccall method definition, symbol, jl_get_nth_field(cc, 0));
+            JL_TYPECHK(ccall method definition, uint16, jl_get_nth_field(cc, 1));
+            JL_TYPECHK(ccall method definition, bool, jl_get_nth_field(cc, 2));
+        }
+    }
+    if (e->head == jl_call_sym && nargs > 0 &&
+            jl_is_globalref(jl_exprarg(e, 0))) {
+        // TODO: this hack should be deleted once llvmcall is fixed
+        jl_value_t *fe = jl_exprarg(e, 0);
+        jl_module_t *fe_mod = jl_globalref_mod(fe);
+        jl_sym_t *fe_sym = jl_globalref_name(fe);
+        // look at some known called functions
+        jl_binding_t *b = jl_get_binding(fe_mod, fe_sym);
+        if (jl_get_latest_binding_value_if_const(b) == BUILTIN(tuple)) {
+            size_t j;
+            for (j = 1; j < nargs; j++) {
+                if (!jl_is_quotenode(jl_exprarg(e, j)))
+                    break;
+            }
+            if (j == nargs) {
+                jl_value_t *val = NULL;
+                JL_TRY {
+                    val = jl_interpret_toplevel_expr_in(module, (jl_value_t*)e, NULL, sparam_vals);
                 }
+                JL_CATCH {
+                    val = NULL; // To make the analyzer happy see #define JL_TRY
+                }
+                if (val)
+                    return val;
             }
         }
     }
     return expr;
 }
 
-JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
+JL_DLLEXPORT void jl_resolve_definition_effects_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals, jl_value_t *binding_edge,
                               int binding_effects)
 {
-    size_t i, l = jl_array_len(stmts);
+    size_t i, l = jl_array_nrows(stmts);
     for (i = 0; i < l; i++) {
         jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
-        jl_array_ptr_set(stmts, i, resolve_globals(stmt, m, sparam_vals, binding_effects, 0));
+        jl_array_ptr_set(stmts, i, resolve_definition_effects(stmt, m, sparam_vals, binding_edge, binding_effects));
     }
 }
 
@@ -278,34 +327,159 @@ jl_value_t *expr_arg1(jl_value_t *expr) {
     return jl_array_ptr_ref(args, 0);
 }
 
+static jl_value_t *alloc_edges(arraylist_t *edges_list)
+{
+    jl_value_t *jledges = (jl_value_t*)jl_alloc_svec(edges_list->len);
+    jl_value_t *jledges2 = NULL;
+    jl_value_t *codelocs = NULL;
+    JL_GC_PUSH3(&jledges, &jledges2, &codelocs);
+    size_t i;
+    for (i = 0; i < edges_list->len; i++) {
+        arraylist_t *edge = (arraylist_t*)edges_list->items[i];
+        jl_value_t *file = (jl_value_t*)edge->items[0];
+        int32_t line = 0; // not preserved by lowering (and probably lost even before that)
+        arraylist_t *edges_list2 = (arraylist_t*)edge->items[1];
+        size_t j, nlocs = (edge->len - 2) / 3;
+        codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs * 3);
+        for (j = 0; j < nlocs; j++) {
+            jl_array_data(codelocs,int32_t)[3 * j + 0] = (intptr_t)edge->items[3 * j + 0 + 2];
+            jl_array_data(codelocs,int32_t)[3 * j + 1] = (intptr_t)edge->items[3 * j + 1 + 2];
+            jl_array_data(codelocs,int32_t)[3 * j + 2] = (intptr_t)edge->items[3 * j + 2 + 2];
+        }
+        codelocs = (jl_value_t*)jl_compress_codelocs(line, codelocs, nlocs);
+        jledges2 = alloc_edges(edges_list2);
+        jl_value_t *debuginfo = jl_new_struct(jl_debuginfo_type, file, jl_nothing, jledges2, codelocs);
+        jledges2 = NULL;
+        jl_svecset(jledges, i, debuginfo);
+        free(edges_list2);
+        free(edge);
+    }
+    JL_GC_POP();
+    return jledges;
+}
+
+static void add_edge(arraylist_t *edges_list, arraylist_t *inlinestack, int32_t *p_to, int32_t *p_pc)
+{
+    jl_value_t *locinfo = (jl_value_t*)arraylist_pop(inlinestack);
+    jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 0);
+    int32_t line = jl_unbox_int32(jl_fieldref(locinfo, 1));
+    size_t i;
+    arraylist_t *edge = NULL;
+    for (i = 0; i < edges_list->len; i++) {
+        edge = (arraylist_t*)edges_list->items[i];
+        if (edge->items[0] == filesym)
+            break;
+    }
+    if (i == edges_list->len) {
+        edge = (arraylist_t*)malloc(sizeof(arraylist_t));
+        arraylist_t *edge_list2 = (arraylist_t*)malloc(sizeof(arraylist_t));
+        arraylist_new(edge, 0);
+        arraylist_new(edge_list2, 0);
+        arraylist_push(edge, (void*)filesym);
+        arraylist_push(edge, (void*)edge_list2);
+        arraylist_push(edges_list, (void*)edge);
+    }
+    *p_to = i + 1;
+    int32_t to = 0, pc = 0;
+    if (inlinestack->len) {
+        arraylist_t *edge_list2 = (arraylist_t*)edge->items[1];
+        add_edge(edge_list2, inlinestack, &to, &pc);
+    }
+    for (i = 2; i < edge->len; i += 3) {
+        if ((intptr_t)edge->items[i + 0] == line &&
+            (intptr_t)edge->items[i + 1] == to &&
+            (intptr_t)edge->items[i + 2] == pc) {
+            break;
+        }
+    }
+    if (i == edge->len) {
+        arraylist_push(edge, (void*)(intptr_t)line);
+        arraylist_push(edge, (void*)(intptr_t)to);
+        arraylist_push(edge, (void*)(intptr_t)pc);
+    }
+    *p_pc = (i - 2) / 3 + 1;
+}
+
+jl_debuginfo_t *jl_linetable_to_debuginfo(jl_array_t *codelocs_any, jl_array_t *linetable)
+{
+    size_t nlocs = jl_array_nrows(codelocs_any);
+    jl_value_t *toplocinfo = jl_array_ptr_ref(linetable, 0);
+    jl_sym_t *topfile = (jl_sym_t*)jl_fieldref_noalloc(toplocinfo, 0);
+    int32_t topline = jl_unbox_int32(jl_fieldref(toplocinfo, 1));
+    arraylist_t inlinestack;
+    arraylist_new(&inlinestack, 0);
+    arraylist_t edges_list;
+    arraylist_new(&edges_list, 0);
+    jl_value_t *jledges = NULL;
+    jl_value_t *codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs * 3);
+    jl_debuginfo_t *debuginfo = NULL;
+    JL_GC_PUSH3(&jledges, &codelocs, &debuginfo);
+    int32_t *codelocs32 = jl_array_data(codelocs,int32_t);
+    size_t j;
+    for (j = 0; j < nlocs; j++) {
+        size_t lineidx = jl_unbox_long(jl_array_ptr_ref((jl_array_t*)codelocs_any, j)); // 1 indexed!
+        while (lineidx != 0) {
+            jl_value_t *locinfo = jl_array_ptr_ref(linetable, lineidx - 1);
+            lineidx = jl_unbox_int32(jl_fieldref(locinfo, 2));
+            arraylist_push(&inlinestack, locinfo);
+        }
+        int32_t line = 0, to = 0, pc = 0;
+        if (inlinestack.len) {
+            jl_value_t *locinfo = (jl_value_t*)arraylist_pop(&inlinestack);
+            jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 0);
+            if (filesym == topfile)
+                line = jl_unbox_int32(jl_fieldref(locinfo, 1));
+            else
+                arraylist_push(&inlinestack, locinfo);
+            if (inlinestack.len) {
+                add_edge(&edges_list, &inlinestack, &to, &pc);
+            }
+        }
+        codelocs32[j * 3 + 0] = line;
+        codelocs32[j * 3 + 1] = to;
+        codelocs32[j * 3 + 2] = pc;
+    }
+    codelocs = (jl_value_t*)jl_compress_codelocs(topline, codelocs, nlocs);
+    jledges = alloc_edges(&edges_list);
+    debuginfo = (jl_debuginfo_t*)jl_new_struct(jl_debuginfo_type, topfile, jl_nothing, jledges, codelocs);
+    JL_GC_POP();
+    return debuginfo;
+}
+
 // copy a :lambda Expr into its CodeInfo representation,
 // including popping of known meta nodes
-static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
+jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
 {
+    jl_code_info_t *li = NULL;
+    JL_GC_PUSH1(&li);
+    li = jl_new_code_info_uninit();
+
+    jl_expr_t *arglist = (jl_expr_t*)jl_exprarg(ir, 0);
+    li->nargs = jl_array_len(arglist);
+
     assert(jl_is_expr(ir));
     jl_expr_t *bodyex = (jl_expr_t*)jl_exprarg(ir, 2);
-    jl_value_t *codelocs = jl_exprarg(ir, 3);
-    li->linetable = jl_exprarg(ir, 4);
-    size_t nlocs = jl_array_len(codelocs);
-    li->codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs);
-    size_t j;
-    for (j = 0; j < nlocs; j++) {
-        jl_arrayset((jl_array_t*)li->codelocs, jl_box_int32(jl_unbox_long(jl_arrayref((jl_array_t*)codelocs, j))),
-                    j);
-    }
+
+    jl_array_t *codelocs_any = (jl_array_t*)jl_exprarg(ir, 3);
+    jl_array_t *linetable = (jl_array_t*)jl_exprarg(ir, 4);
+    li->debuginfo = jl_linetable_to_debuginfo(codelocs_any, linetable);
+    jl_gc_wb(li, li->debuginfo);
+
     assert(jl_is_expr(bodyex));
     jl_array_t *body = bodyex->args;
     li->code = body;
     jl_gc_wb(li, li->code);
-    size_t n = jl_array_len(body);
+    size_t n = jl_array_nrows(body);
     jl_value_t **bd = (jl_value_t**)jl_array_ptr_data((jl_array_t*)li->code);
-    li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, n);
+    li->ssaflags = jl_alloc_array_1d(jl_array_uint32_type, n);
     jl_gc_wb(li, li->ssaflags);
     int inbounds_depth = 0; // number of stacked inbounds
-    // isempty(inline_flags): no user annotation
-    // last(inline_flags) == 1: inline region
-    // last(inline_flags) == 0: noinline region
+    // isempty(inline_flags): no user callsite inline annotation
+    // last(inline_flags) == 1: callsite inline region
+    // last(inline_flags) == 0: callsite noinline region
     arraylist_t *inline_flags = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
+    arraylist_t *purity_exprs = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
+    size_t j;
     for (j = 0; j < n; j++) {
         jl_value_t *st = bd[j];
         int is_flag_stmt = 0;
@@ -328,14 +502,32 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
                 else if (ma == (jl_value_t*)jl_no_constprop_sym)
                     li->constprop = 2;
                 else if (jl_is_expr(ma) && ((jl_expr_t*)ma)->head == jl_purity_sym) {
-                    if (jl_expr_nargs(ma) == 7) {
-                        li->purity.overrides.ipo_consistent = jl_unbox_bool(jl_exprarg(ma, 0));
-                        li->purity.overrides.ipo_effect_free = jl_unbox_bool(jl_exprarg(ma, 1));
-                        li->purity.overrides.ipo_nothrow = jl_unbox_bool(jl_exprarg(ma, 2));
-                        li->purity.overrides.ipo_terminates_globally = jl_unbox_bool(jl_exprarg(ma, 3));
-                        li->purity.overrides.ipo_terminates_locally = jl_unbox_bool(jl_exprarg(ma, 4));
-                        li->purity.overrides.ipo_notaskstate = jl_unbox_bool(jl_exprarg(ma, 5));
-                        li->purity.overrides.ipo_inaccessiblememonly = jl_unbox_bool(jl_exprarg(ma, 6));
+                    if (jl_expr_nargs(ma) == NUM_EFFECTS_OVERRIDES) {
+                        // N.B. this code allows multiple :purity expressions to be present in a single `:meta` node
+                        int8_t consistent = jl_unbox_bool(jl_exprarg(ma, 0));
+                        if (consistent) li->purity.overrides.ipo_consistent = consistent;
+                        int8_t effect_free = jl_unbox_bool(jl_exprarg(ma, 1));
+                        if (effect_free) li->purity.overrides.ipo_effect_free = effect_free;
+                        int8_t nothrow = jl_unbox_bool(jl_exprarg(ma, 2));
+                        if (nothrow) li->purity.overrides.ipo_nothrow = nothrow;
+                        int8_t terminates_globally = jl_unbox_bool(jl_exprarg(ma, 3));
+                        if (terminates_globally) li->purity.overrides.ipo_terminates_globally = terminates_globally;
+                        int8_t terminates_locally = jl_unbox_bool(jl_exprarg(ma, 4));
+                        if (terminates_locally) li->purity.overrides.ipo_terminates_locally = terminates_locally;
+                        int8_t notaskstate = jl_unbox_bool(jl_exprarg(ma, 5));
+                        if (notaskstate) li->purity.overrides.ipo_notaskstate = notaskstate;
+                        int8_t inaccessiblememonly = jl_unbox_bool(jl_exprarg(ma, 6));
+                        if (inaccessiblememonly) li->purity.overrides.ipo_inaccessiblememonly = inaccessiblememonly;
+                        int8_t noub = jl_unbox_bool(jl_exprarg(ma, 7));
+                        if (noub) li->purity.overrides.ipo_noub = noub;
+                        int8_t noub_if_noinbounds = jl_unbox_bool(jl_exprarg(ma, 8));
+                        if (noub_if_noinbounds) li->purity.overrides.ipo_noub_if_noinbounds = noub_if_noinbounds;
+                        int8_t consistent_overlay = jl_unbox_bool(jl_exprarg(ma, 9));
+                        if (consistent_overlay) li->purity.overrides.ipo_consistent_overlay = consistent_overlay;
+                        int8_t nortcall = jl_unbox_bool(jl_exprarg(ma, 10));
+                        if (nortcall) li->purity.overrides.ipo_nortcall = nortcall;
+                    } else {
+                        assert(jl_expr_nargs(ma) == 0);
                     }
                 }
                 else
@@ -380,31 +572,62 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
             }
             bd[j] = jl_nothing;
         }
-        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_return_sym) {
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_purity_sym) {
+            is_flag_stmt = 1;
+            size_t na = jl_expr_nargs(st);
+            if (na == NUM_EFFECTS_OVERRIDES)
+                arraylist_push(purity_exprs, (void*)st);
+            else {
+                assert(na == 0);
+                arraylist_pop(purity_exprs);
+            }
+            bd[j] = jl_nothing;
+        }
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_boundscheck_sym)
+            // Don't set IR_FLAG_INBOUNDS on boundscheck at the same level
+            is_flag_stmt = 1;
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_return_sym)
             jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0)));
+        else if (jl_is_globalref(st)) {
+            jl_globalref_t *gr = (jl_globalref_t*)st;
+            if (jl_object_in_image((jl_value_t*)gr->mod))
+                li->has_image_globalref = 1;
         }
-        else if (jl_is_expr(st) && (((jl_expr_t*)st)->head == jl_foreigncall_sym || ((jl_expr_t*)st)->head == jl_cfunction_sym)) {
-            li->has_fcall = 1;
+        else {
+            if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_assign_sym)
+                st = jl_exprarg(st, 1);
+            if (jl_is_expr(st) && (((jl_expr_t*)st)->head == jl_foreigncall_sym || ((jl_expr_t*)st)->head == jl_cfunction_sym))
+                li->has_fcall = 1;
         }
         if (is_flag_stmt)
-            jl_array_uint8_set(li->ssaflags, j, 0);
+            jl_array_uint32_set(li->ssaflags, j, 0);
         else {
-            uint8_t flag = 0;
+            uint32_t flag = 0;
             if (inbounds_depth > 0)
-                flag |= 1 << 0;
+                flag |= IR_FLAG_INBOUNDS;
             if (inline_flags->len > 0) {
-                void* inline_flag = inline_flags->items[inline_flags->len - 1];
+                void* inline_flag = inline_flags->items[inline_flags->len-1];
                 flag |= 1 << (inline_flag ? 1 : 2);
             }
-            jl_array_uint8_set(li->ssaflags, j, flag);
+            int n_purity_exprs = purity_exprs->len;
+            if (n_purity_exprs > 0) {
+                // apply all purity overrides
+                for (int i = 0; i < n_purity_exprs; i++) {
+                    void* purity_expr = purity_exprs->items[i];
+                    for (int j = 0; j < NUM_EFFECTS_OVERRIDES; j++) {
+                        flag |= jl_unbox_bool(jl_exprarg((jl_value_t*)purity_expr, j)) ? (1 << (NUM_IR_FLAGS+j)) : 0;
+                    }
+                }
+            }
+            jl_array_uint32_set(li->ssaflags, j, flag);
         }
     }
-    assert(inline_flags->len == 0); // malformed otherwise
-    arraylist_free(inline_flags);
-    free(inline_flags);
+    assert(inline_flags->len == 0 && purity_exprs->len == 0); // malformed otherwise
+    arraylist_free(inline_flags); arraylist_free(purity_exprs);
+    free(inline_flags); free(purity_exprs);
     jl_array_t *vinfo = (jl_array_t*)jl_exprarg(ir, 1);
     jl_array_t *vis = (jl_array_t*)jl_array_ptr_ref(vinfo, 0);
-    size_t nslots = jl_array_len(vis);
+    size_t nslots = jl_array_nrows(vis);
     jl_value_t *ssavalue_types = jl_array_ptr_ref(vinfo, 2);
     assert(jl_is_long(ssavalue_types));
     size_t nssavalue = jl_unbox_long(ssavalue_types);
@@ -436,6 +659,8 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
         jl_array_ptr_set(li->slotnames, i, name);
         jl_array_uint8_set(li->slotflags, i, vinfo_mask & jl_unbox_long(jl_array_ptr_ref(vi, 2)));
     }
+    JL_GC_POP();
+    return li;
 }
 
 JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
@@ -447,13 +672,11 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
     mi->def.value = NULL;
     mi->specTypes = NULL;
     mi->sparam_vals = jl_emptysvec;
-    jl_atomic_store_relaxed(&mi->uninferred, NULL);
     mi->backedges = NULL;
-    mi->callbacks = NULL;
     jl_atomic_store_relaxed(&mi->cache, NULL);
-    mi->inInference = 0;
     mi->cache_with_orig = 0;
-    jl_atomic_store_relaxed(&mi->precompiled, 0);
+    jl_atomic_store_relaxed(&mi->flags, 0);
+    jl_atomic_store_relaxed(&mi->dispatch_status, 0);
     return mi;
 }
 
@@ -462,66 +685,33 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
     jl_task_t *ct = jl_current_task;
     jl_code_info_t *src =
         (jl_code_info_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_info_t),
-                                       jl_code_info_type);
+                                     jl_code_info_type);
     src->code = NULL;
-    src->codelocs = NULL;
+    src->debuginfo = NULL;
     src->ssavaluetypes = NULL;
     src->ssaflags = NULL;
     src->method_for_inference_limit_heuristics = jl_nothing;
-    src->linetable = jl_nothing;
     src->slotflags = NULL;
     src->slotnames = NULL;
     src->slottypes = jl_nothing;
-    src->parent = (jl_method_instance_t*)jl_nothing;
     src->rettype = (jl_value_t*)jl_any_type;
+    src->edges = (jl_value_t*)jl_emptysvec;
+    src->parent = (jl_method_instance_t*)jl_nothing;
     src->min_world = 1;
     src->max_world = ~(size_t)0;
-    src->inferred = 0;
     src->propagate_inbounds = 0;
     src->has_fcall = 0;
+    src->has_image_globalref = 0;
     src->nospecializeinfer = 0;
-    src->edges = jl_nothing;
     src->constprop = 0;
     src->inlining = 0;
     src->purity.bits = 0;
+    src->nargs = 0;
+    src->isva = 0;
     src->inlining_cost = UINT16_MAX;
     return src;
 }
 
-jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
-{
-    jl_code_info_t *src = NULL;
-    JL_GC_PUSH1(&src);
-    src = jl_new_code_info_uninit();
-    jl_code_info_set_ir(src, ir);
-    JL_GC_POP();
-    return src;
-}
-
-void jl_add_function_to_lineinfo(jl_code_info_t *ci, jl_value_t *func)
-{
-    // func may contain jl_symbol (function name), jl_method_t, or jl_method_instance_t
-    jl_array_t *li = (jl_array_t*)ci->linetable;
-    size_t i, n = jl_array_len(li);
-    jl_value_t *rt = NULL, *lno = NULL, *inl = NULL;
-    JL_GC_PUSH3(&rt, &lno, &inl);
-    for (i = 0; i < n; i++) {
-        jl_value_t *ln = jl_array_ptr_ref(li, i);
-        assert(jl_typetagis(ln, jl_lineinfonode_type));
-        jl_value_t *mod = jl_fieldref_noalloc(ln, 0);
-        jl_value_t *file = jl_fieldref_noalloc(ln, 2);
-        lno = jl_fieldref(ln, 3);
-        inl = jl_fieldref(ln, 4);
-        // respect a given linetable if available
-        jl_value_t *ln_func = jl_fieldref_noalloc(ln, 1);
-        if (jl_is_symbol(ln_func) && (jl_sym_t*)ln_func == jl_symbol("none") && jl_is_int32(inl) && jl_unbox_int32(inl) == 0)
-            ln_func = func;
-        rt = jl_new_struct(jl_lineinfonode_type, mod, ln_func, file, lno, inl);
-        jl_array_ptr_set(li, i, rt);
-    }
-    JL_GC_POP();
-}
-
 // invoke (compiling if necessary) the jlcall function pointer for a method template
 static jl_value_t *jl_call_staged(jl_method_t *def, jl_value_t *generator,
         size_t world, jl_svec_t *sparam_vals, jl_value_t **args, uint32_t nargs)
@@ -531,8 +721,7 @@ static jl_value_t *jl_call_staged(jl_method_t *def, jl_value_t *generator,
     size_t totargs = 2 + n_sparams + def->nargs;
     JL_GC_PUSHARGS(gargs, totargs);
     gargs[0] = jl_box_ulong(world);
-    gargs[1] = jl_box_long(def->line);
-    gargs[1] = jl_new_struct(jl_linenumbernode_type, gargs[1], def->file);
+    gargs[1] = (jl_value_t*)def;
     memcpy(&gargs[2], jl_svec_data(sparam_vals), n_sparams * sizeof(void*));
     memcpy(&gargs[2 + n_sparams], args, (def->nargs - def->isva) * sizeof(void*));
     if (def->isva)
@@ -542,102 +731,168 @@ static jl_value_t *jl_call_staged(jl_method_t *def, jl_value_t *generator,
     return code;
 }
 
-// Lower `ex` into Julia IR, and (if it expands into a CodeInfo) resolve global-variable
-// references in light of the provided type parameters.
-// Like `jl_expand`, if there is an error expanding the provided expression, the return value
-// will be an error expression (an `Expr` with `error_sym` as its head), which should be eval'd
-// in the caller's context.
-JL_DLLEXPORT jl_code_info_t *jl_expand_and_resolve(jl_value_t *ex, jl_module_t *module,
-                                                   jl_svec_t *sparam_vals) {
-    jl_code_info_t *func = (jl_code_info_t*)jl_expand((jl_value_t*)ex, module);
-    JL_GC_PUSH1(&func);
-    if (jl_is_code_info(func)) {
-        jl_array_t *stmts = (jl_array_t*)func->code;
-        jl_resolve_globals_in_ir(stmts, module, sparam_vals, 1);
+JL_DLLEXPORT jl_code_instance_t *jl_cached_uninferred(jl_code_instance_t *codeinst, size_t world)
+{
+    for (; codeinst; codeinst = jl_atomic_load_relaxed(&codeinst->next)) {
+        if (codeinst->owner != (void*)jl_uninferred_sym)
+            continue;
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= world && world <= jl_atomic_load_relaxed(&codeinst->max_world)) {
+            return codeinst;
+        }
     }
-    JL_GC_POP();
-    return func;
+    return NULL;
+}
+
+JL_DLLEXPORT jl_code_instance_t *jl_cache_uninferred(jl_method_instance_t *mi, jl_code_instance_t *checked, size_t world, jl_code_instance_t *newci)
+{
+    while (!jl_mi_try_insert(mi, checked, newci)) {
+        jl_code_instance_t *new_checked = jl_atomic_load_relaxed(&mi->cache);
+        // Check if another thread inserted a CodeInstance that covers this world
+        jl_code_instance_t *other = jl_cached_uninferred(new_checked, world);
+        if (other)
+            return other;
+        checked = new_checked;
+    }
+    // Successfully inserted
+    return newci;
 }
 
 // Return a newly allocated CodeInfo for the function signature
 // effectively described by the tuple (specTypes, env, Method) inside linfo
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world)
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *mi, size_t world, jl_code_instance_t **cache)
 {
-    jl_value_t *uninferred = jl_atomic_load_relaxed(&linfo->uninferred);
-    if (uninferred) {
-        assert(jl_is_code_info(uninferred)); // make sure this did not get `nothing` put here
-        return (jl_code_info_t*)jl_copy_ast((jl_value_t*)uninferred);
+    jl_code_instance_t *cache_ci = jl_atomic_load_relaxed(&mi->cache);
+    jl_code_instance_t *uninferred_ci = jl_cached_uninferred(cache_ci, world);
+    if (uninferred_ci) {
+        // The uninferred code is in `inferred`, but that is a bit of a misnomer here.
+        // This is the cached output the generated function (or top-level thunk).
+        // This cache has a non-standard owner (indicated by `->owner === :uninferred`),
+        // so it doesn't get confused for inference results.
+        jl_code_info_t *src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred_ci->inferred);
+        assert(jl_is_code_info(src)); // make sure this did not get `nothing` put here
+        return (jl_code_info_t*)jl_copy_ast((jl_value_t*)src);
     }
 
     JL_TIMING(STAGED_FUNCTION, STAGED_FUNCTION);
-    jl_value_t *tt = linfo->specTypes;
-    jl_method_t *def = linfo->def.method;
-    jl_timing_show_method_instance(linfo, JL_TIMING_DEFAULT_BLOCK);
+    jl_value_t *tt = mi->specTypes;
+    jl_method_t *def = mi->def.method;
+    jl_timing_show_method_instance(mi, JL_TIMING_DEFAULT_BLOCK);
     jl_value_t *generator = def->generator;
     assert(generator != NULL);
     assert(jl_is_method(def));
     jl_code_info_t *func = NULL;
     jl_value_t *ex = NULL;
-    JL_GC_PUSH2(&ex, &func);
+    jl_value_t *kind = NULL;
+    jl_code_info_t *uninferred = NULL;
+    jl_code_instance_t *ci = NULL;
+    JL_GC_PUSH5(&ex, &func, &uninferred, &ci, &kind);
     jl_task_t *ct = jl_current_task;
-    int last_lineno = jl_lineno;
+    int last_lineno = jl_atomic_load_relaxed(&jl_lineno);
     int last_in = ct->ptls->in_pure_callback;
     size_t last_age = ct->world_age;
 
     JL_TRY {
         ct->ptls->in_pure_callback = 1;
-        ct->world_age = def->primary_world;
+        ct->world_age = jl_atomic_load_relaxed(&def->primary_world);
+        if (ct->world_age > jl_atomic_load_acquire(&jl_world_counter))
+            jl_error("The generator method cannot run until it is added to a method table.");
 
         // invoke code generator
         jl_tupletype_t *ttdt = (jl_tupletype_t*)jl_unwrap_unionall(tt);
-        ex = jl_call_staged(def, generator, world, linfo->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
+        ex = jl_call_staged(def, generator, world, mi->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
 
         // do some post-processing
-        if (jl_is_code_info(ex)) {
-            func = (jl_code_info_t*)ex;
-            jl_array_t *stmts = (jl_array_t*)func->code;
-            jl_resolve_globals_in_ir(stmts, def->module, linfo->sparam_vals, 1);
+        if (!jl_is_code_info(ex)) {
+            jl_error("As of Julia 1.12, generated functions must return `CodeInfo`. See `Base.generated_body_to_codeinfo`.");
         }
-        else {
-            // Lower the user's expression and resolve references to the type parameters
-            func = jl_expand_and_resolve(ex, def->module, linfo->sparam_vals);
-            if (!jl_is_code_info(func)) {
-                if (jl_is_expr(func) && ((jl_expr_t*)func)->head == jl_error_sym) {
-                    ct->ptls->in_pure_callback = 0;
-                    jl_toplevel_eval(def->module, (jl_value_t*)func);
-                }
-                jl_error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure, a comprehension or a generator.");
-            }
-        }
-        jl_add_function_to_lineinfo(func, (jl_value_t*)def->name);
+        func = (jl_code_info_t*)ex;
+        jl_array_t *stmts = (jl_array_t*)func->code;
+        jl_resolve_definition_effects_in_ir(stmts, def->module, mi->sparam_vals, NULL, 1);
+        ex = NULL;
 
         // If this generated function has an opaque closure, cache it for
-        // correctness of method identity
-        for (int i = 0; i < jl_array_len(func->code); ++i) {
+        // correctness of method identity. In particular, other methods that call
+        // this method may end up referencing it in a PartialOpaque lattice element
+        // type. If the method identity were to change (for the same world age)
+        // in between invocations of this method, that return type inference would
+        // no longer be correct.
+        int needs_cache_for_correctness = 0;
+        for (int i = 0; i < jl_array_nrows(func->code); ++i) {
             jl_value_t *stmt = jl_array_ptr_ref(func->code, i);
             if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_new_opaque_closure_sym) {
+                if (jl_expr_nargs(stmt) >= 4 && jl_is_bool(jl_exprarg(stmt, 3)) && !jl_unbox_bool(jl_exprarg(stmt, 3))) {
+                    // If this new_opaque_closure is prohibited from sourcing PartialOpaque,
+                    // there is no problem
+                    continue;
+                }
                 if (jl_options.incremental && jl_generating_output())
                     jl_error("Impossible to correctly handle OpaqueClosure inside @generated returned during precompile process.");
-                jl_value_t *uninferred = jl_copy_ast((jl_value_t*)func);
-                jl_value_t *old = NULL;
-                if (jl_atomic_cmpswap(&linfo->uninferred, &old, uninferred)) {
-                    jl_gc_wb(linfo, uninferred);
+                needs_cache_for_correctness = 1;
+                break;
+            }
+        }
+
+        if ((func->edges == jl_nothing || func->edges == (jl_value_t*)jl_emptysvec) && func->max_world == ~(size_t)0) {
+            if (func->min_world != 1) {
+                jl_error("Generated function result with `edges == nothing` and `max_world == typemax(UInt)` must have `min_world == 1`");
+            }
+        }
+
+        if (cache || needs_cache_for_correctness) {
+            // TODO: this should poison the runtime, so that attempts to call save in staticdata afterwards will abort,
+            // since enabling `needs_cache_for_correctness` is unsound in the presence of cache files
+            uninferred = (jl_code_info_t*)jl_copy_ast((jl_value_t*)func);
+            ci = jl_new_codeinst_for_uninferred(mi, uninferred);
+            jl_code_instance_t *cached_ci = jl_cache_uninferred(mi, cache_ci, world, ci);
+            if (cached_ci != ci) {
+                func = (jl_code_info_t*)jl_copy_ast(jl_atomic_load_relaxed(&cached_ci->inferred));
+                assert(jl_is_code_info(func));
+            }
+            else if (uninferred->edges != jl_nothing) {
+                // N.B.: This needs to match `store_backedges` on the julia side
+                jl_value_t *edges = uninferred->edges;
+                size_t l;
+                jl_value_t **data;
+                if (jl_is_svec(edges)) {
+                    l = jl_svec_len(edges);
+                    data = jl_svec_data(edges);
                 }
                 else {
-                    assert(jl_is_code_info(old));
-                    func = (jl_code_info_t*)old;
+                    l = jl_array_dim0(edges);
+                    data = jl_array_data(edges, jl_value_t*);
+                }
+                for (size_t i = 0; i < l; ) {
+                    kind = data[i++];
+                    if (jl_is_method_instance(kind)) {
+                        jl_method_instance_add_backedge((jl_method_instance_t*)kind, jl_nothing, ci);
+                    }
+                    else if (jl_is_binding(kind)) {
+                        jl_add_binding_backedge((jl_binding_t*)kind, (jl_value_t*)ci);
+                    }
+                    else if (jl_is_mtable(kind)) {
+                        assert(i < l);
+                        ex = data[i++];
+                        if ((jl_methtable_t*)kind == jl_method_table)
+                            jl_method_table_add_backedge(ex, ci);
+                    }
+                    else {
+                        assert(i < l);
+                        ex = data[i++];
+                        jl_method_instance_add_backedge((jl_method_instance_t*)ex, kind, ci);
+                    }
                 }
-                break;
             }
+            if (cache)
+                *cache = cached_ci;
         }
 
         ct->ptls->in_pure_callback = last_in;
-        jl_lineno = last_lineno;
+        jl_atomic_store_relaxed(&jl_lineno, last_lineno);
         ct->world_age = last_age;
     }
     JL_CATCH {
         ct->ptls->in_pure_callback = last_in;
-        jl_lineno = last_lineno;
+        jl_atomic_store_relaxed(&jl_lineno, last_lineno);
         jl_rethrow();
     }
     JL_GC_POP();
@@ -688,14 +943,13 @@ JL_DLLEXPORT void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     m->nospecializeinfer = src->nospecializeinfer;
     m->constprop = src->constprop;
     m->purity.bits = src->purity.bits;
-    jl_add_function_to_lineinfo(src, (jl_value_t*)m->name);
 
     jl_array_t *copy = NULL;
     jl_svec_t *sparam_vars = jl_outer_unionall_vars(m->sig);
     JL_GC_PUSH3(&copy, &sparam_vars, &src);
     assert(jl_typetagis(src->code, jl_array_any_type));
     jl_array_t *stmts = (jl_array_t*)src->code;
-    size_t i, n = jl_array_len(stmts);
+    size_t i, n = jl_array_nrows(stmts);
     copy = jl_alloc_vec_any(n);
     for (i = 0; i < n; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
@@ -767,20 +1021,31 @@ JL_DLLEXPORT void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
             }
         }
         else {
-            st = resolve_globals(st, m->module, sparam_vars, 1, 0);
+            st = resolve_definition_effects(st, m->module, sparam_vars, (jl_value_t*)m, 1);
         }
         jl_array_ptr_set(copy, i, st);
     }
     src = jl_copy_code_info(src);
+    src->isva = m->isva; // TODO: It would be nice to reverse this
+    // If nargs hasn't been set yet, do it now. This can happen if an old CodeInfo is deserialized.
+    if (src->nargs == 0)
+        src->nargs = m->nargs;
+    assert(m->nargs == src->nargs);
     src->code = copy;
     jl_gc_wb(src, copy);
     m->slot_syms = jl_compress_argnames(src->slotnames);
     jl_gc_wb(m, m->slot_syms);
-    if (gen_only)
+    if (gen_only) {
         m->source = NULL;
-    else
-        m->source = (jl_value_t*)jl_compress_ir(m, src);
-    jl_gc_wb(m, m->source);
+    }
+    else {
+        m->debuginfo = src->debuginfo;
+        jl_gc_wb(m, m->debuginfo);
+        m->source = (jl_value_t*)src;
+        jl_gc_wb(m, m->source);
+        m->source = (jl_value_t*)jl_compress_ir(m, NULL);
+        jl_gc_wb(m, m->source);
+    }
     JL_GC_POP();
 }
 
@@ -790,7 +1055,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     jl_method_t *m =
         (jl_method_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_t), jl_method_type);
     jl_atomic_store_relaxed(&m->specializations, (jl_value_t*)jl_emptysvec);
-    jl_atomic_store_relaxed(&m->speckeyset, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&m->speckeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
     m->sig = NULL;
     m->slot_syms = NULL;
     m->roots = NULL;
@@ -800,6 +1065,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     m->module = module;
     m->external_mt = NULL;
     m->source = NULL;
+    m->debuginfo = NULL;
     jl_atomic_store_relaxed(&m->unspecialized, NULL);
     m->generator = NULL;
     m->name = NULL;
@@ -812,10 +1078,12 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     m->recursion_relation = NULL;
     m->isva = 0;
     m->nargs = 0;
-    m->primary_world = 1;
-    m->deleted_world = ~(size_t)0;
+    jl_atomic_store_relaxed(&m->primary_world, ~(size_t)0);
+    jl_atomic_store_relaxed(&m->dispatch_status, 0);
+    jl_atomic_store_relaxed(&m->interferences, (jl_genericmemory_t*)jl_an_empty_memory_any);
     m->is_for_opaque_closure = 0;
     m->nospecializeinfer = 0;
+    jl_atomic_store_relaxed(&m->did_scan_source, 0);
     m->constprop = 0;
     m->purity.bits = 0;
     m->max_varargs = UINT8_MAX;
@@ -830,27 +1098,27 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
 // it will be the signature supplied in an `invoke` call.
 // If you don't need `invokesig`, you can set it to NULL on input.
 // Initialize iteration with `i = 0`. Returns `i` for the next backedge to be extracted.
-int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_method_instance_t **caller) JL_NOTSAFEPOINT
+int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_code_instance_t **caller) JL_NOTSAFEPOINT
 {
     jl_value_t *item = jl_array_ptr_ref(list, i);
-    if (jl_is_method_instance(item)) {
-        // Not an `invoke` call, it's just the MethodInstance
+    if (!item || jl_is_code_instance(item)) {
+        // Not an `invoke` call, it's just the CodeInstance
         if (invokesig != NULL)
             *invokesig = NULL;
-        *caller = (jl_method_instance_t*)item;
+        *caller = (jl_code_instance_t*)item;
         return i + 1;
     }
     assert(jl_is_type(item));
     // An `invoke` call, it's a (sig, MethodInstance) pair
     if (invokesig != NULL)
         *invokesig = item;
-    *caller = (jl_method_instance_t*)jl_array_ptr_ref(list, i + 1);
+    *caller = (jl_code_instance_t*)jl_array_ptr_ref(list, i + 1);
     if (*caller)
-        assert(jl_is_method_instance(*caller));
+        assert(jl_is_code_instance(*caller));
     return i + 2;
 }
 
-int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_method_instance_t *caller)
+int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_code_instance_t *caller)
 {
     if (invokesig)
         jl_array_ptr_set(list, i++, invokesig);
@@ -858,7 +1126,15 @@ int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_method_inst
     return i;
 }
 
-void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *caller)
+int clear_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_code_instance_t *caller)
+{
+    if (invokesig)
+        jl_array_ptr_set(list, i++, NULL);
+    jl_array_ptr_set(list, i++, NULL);
+    return i;
+}
+
+void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_code_instance_t *caller)
 {
     if (invokesig)
         jl_array_ptr_1d_push(list, invokesig);
@@ -866,10 +1142,37 @@ void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *ca
     return;
 }
 
+void jl_mi_done_backedges(jl_method_instance_t *mi JL_PROPAGATES_ROOT, uint8_t old_flags) {
+    uint8_t flags_now = 0;
+    jl_array_t *backedges = jl_mi_get_backedges_mutate(mi, &flags_now);
+    if (backedges && !old_flags) {
+        if (flags_now & MI_FLAG_BACKEDGES_DIRTY) {
+            size_t n = jl_array_nrows(backedges);
+            size_t i = 0;
+            size_t insb = 0;
+            while (i < n) {
+                jl_value_t *invokesig;
+                jl_code_instance_t *caller;
+                i = get_next_edge(backedges, i, &invokesig, &caller);
+                if (!caller)
+                    continue;
+                insb = set_next_edge(backedges, insb, invokesig, caller);
+            }
+            if (insb == n) {
+                // All were deleted
+                mi->backedges = NULL;
+            } else {
+                jl_array_del_end(backedges, n - insb);
+            }
+        }
+        jl_atomic_fetch_and_relaxed(&mi->flags, ~MI_FLAG_BACKEDGES_ALL);
+    }
+}
+
 // method definition ----------------------------------------------------------
 
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
-    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva)
+    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva, int isinferred)
 {
     jl_method_t *m = jl_new_method_uninit(module);
     JL_GC_PUSH1(&m);
@@ -888,88 +1191,71 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name
     jl_value_t *file = jl_linenode_file(functionloc);
     m->file = jl_is_symbol(file) ? (jl_sym_t*)file : jl_empty_sym;
     m->line = jl_linenode_line(functionloc);
-    jl_method_set_source(m, ci);
+    if (isinferred) {
+        m->slot_syms = jl_compress_argnames(ci->slotnames);
+        jl_gc_wb(m, m->slot_syms);
+    } else {
+        jl_method_set_source(m, ci);
+    }
     JL_GC_POP();
     return m;
 }
 
-// empty generic function def
-JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
-                                                 jl_module_t *module,
-                                                 _Atomic(jl_value_t*) *bp,
-                                                 jl_binding_t *bnd)
+JL_DLLEXPORT void jl_check_gf(jl_value_t *gf, jl_sym_t *name)
 {
-    jl_value_t *gf = NULL;
-
-    assert(name && bp);
-    if (bnd && jl_atomic_load_relaxed(&bnd->value) != NULL && !bnd->constp)
+    if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
         jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
-    gf = jl_atomic_load_relaxed(bp);
-    if (gf != NULL) {
-        if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
-            jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
-    }
-    if (bnd)
-        bnd->constp = 1; // XXX: use jl_declare_constant and jl_checked_assignment
-    if (gf == NULL) {
-        gf = (jl_value_t*)jl_new_generic_function(name, module);
-        jl_atomic_store(bp, gf); // TODO: fix constp assignment data race
-        if (bnd) jl_gc_wb(bnd, gf);
-    }
-    return gf;
 }
 
-static jl_methtable_t *nth_methtable(jl_value_t *a JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_module_t *mod, jl_sym_t *name)
 {
-    if (jl_is_datatype(a)) {
-        if (n == 0) {
-            jl_methtable_t *mt = ((jl_datatype_t*)a)->name->mt;
-            if (mt != NULL)
-                return mt;
-        }
-        else if (jl_is_tuple_type(a)) {
-            if (jl_nparams(a) >= n)
-                return nth_methtable(jl_tparam(a, n - 1), 0);
-        }
-    }
-    else if (jl_is_typevar(a)) {
-        return nth_methtable(((jl_tvar_t*)a)->ub, n);
-    }
-    else if (jl_is_unionall(a)) {
-        return nth_methtable(((jl_unionall_t*)a)->body, n);
+    JL_LOCK(&world_counter_lock);
+    size_t new_world = jl_atomic_load_relaxed(&jl_world_counter) + 1;
+    jl_binding_t *b = jl_get_module_binding(mod, name, 1);
+    jl_value_t *gf = jl_get_existing_strong_gf(b, new_world);
+    if (gf) {
+        jl_check_gf(gf, name);
+        JL_UNLOCK(&world_counter_lock);
+        return gf;
     }
-    else if (jl_is_uniontype(a)) {
-        jl_uniontype_t *u = (jl_uniontype_t*)a;
-        jl_methtable_t *m1 = nth_methtable(u->a, n);
-        if ((jl_value_t*)m1 != jl_nothing) {
-            jl_methtable_t *m2 = nth_methtable(u->b, n);
-            if (m1 == m2)
-                return m1;
-        }
-    }
-    return (jl_methtable_t*)jl_nothing;
+    gf = (jl_value_t*)jl_new_generic_function(name, mod, new_world);
+    // From this point on (if we didn't error), we're committed to raising the world age,
+    // because we've used it to declare the type name.
+    jl_declare_constant_val3(b, mod, name, gf, PARTITION_KIND_CONST, new_world);
+    jl_atomic_store_release(&jl_world_counter, new_world);
+    JL_GC_PROMISE_ROOTED(gf);
+    JL_UNLOCK(&world_counter_lock);
+    return gf;
 }
 
+
 // get the MethodTable for dispatch, or `nothing` if cannot be determined
 JL_DLLEXPORT jl_methtable_t *jl_method_table_for(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    return nth_methtable(argtypes, 1);
+    return jl_method_table;
 }
 
-jl_methtable_t *jl_kwmethod_table_for(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+// get a MethodCache for dispatch
+JL_DLLEXPORT jl_methcache_t *jl_method_cache_for(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    jl_methtable_t *kwmt = nth_methtable(argtypes, 3);
-    if ((jl_value_t*)kwmt == jl_nothing)
-        return NULL;
-    return kwmt;
+    return jl_method_table->cache;
+}
+
+jl_methcache_t *jl_kwmethod_cache_for(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return jl_method_table->cache;
 }
 
 JL_DLLEXPORT jl_methtable_t *jl_method_get_table(jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    return method->external_mt ? (jl_methtable_t*)method->external_mt : jl_method_table_for(method->sig);
+    return method->external_mt ? (jl_methtable_t*)method->external_mt : jl_method_table;
 }
 
-jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
+// get an arbitrary MethodCache for dispatch optimizations of method
+JL_DLLEXPORT jl_methcache_t *jl_method_get_cache(jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return jl_method_get_table(method)->cache;
+}
 
 JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
                                         jl_methtable_t *mt,
@@ -985,25 +1271,29 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     size_t nargs = jl_svec_len(atypes);
     assert(nargs > 0);
     int isva = jl_is_vararg(jl_svecref(atypes, nargs - 1));
-    if (!jl_is_type(jl_svecref(atypes, 0)) || (isva && nargs == 1))
+    jl_value_t *ft = jl_svecref(atypes, 0);
+    if (!jl_is_type(ft) || (isva && nargs == 1))
         jl_error("function type in method definition is not a type");
     jl_sym_t *name;
     jl_method_t *m = NULL;
     jl_value_t *argtype = NULL;
-    JL_GC_PUSH3(&f, &m, &argtype);
+    JL_GC_PUSH4(&ft, &f, &m, &argtype);
     size_t i, na = jl_svec_len(atypes);
 
-    argtype = jl_apply_tuple_type(atypes);
+    argtype = jl_apply_tuple_type(atypes, 1);
     if (!jl_is_datatype(argtype))
         jl_error("invalid type in method definition (Union{})");
 
-    jl_methtable_t *external_mt = mt;
     if (!mt)
-        mt = jl_method_table_for(argtype);
-    if ((jl_value_t*)mt == jl_nothing)
-        jl_error("Method dispatch is unimplemented currently for this method signature");
-    if (mt->frozen)
-        jl_error("cannot add methods to a builtin function");
+        mt = jl_method_table;
+    jl_methtable_t *external_mt = mt == jl_method_table ? NULL : mt;
+
+    //if (!external_mt) {
+    //    jl_value_t **ttypes = { jl_builtin_type, jl_tparam0(jl_anytuple_type) };
+    //    jl_value_t *invalidt = jl_apply_tuple_type_v(ttypes, 2); // Tuple{Union{Builtin,OpaqueClosure}, Vararg}
+    //    if (!jl_has_empty_intersection(argtype, invalidt))
+    //        jl_error("cannot add methods to builtin function");
+    //}
 
     assert(jl_is_linenode(functionloc));
     jl_sym_t *file = (jl_sym_t*)jl_linenode_file(functionloc);
@@ -1012,21 +1302,13 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     int32_t line = jl_linenode_line(functionloc);
 
     // TODO: derive our debug name from the syntax instead of the type
-    jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(argtype) : mt;
     // if we have a kwcall, try to derive the name from the callee argument method table
-    name = (kwmt ? kwmt : mt)->name;
-    if (kwmt == jl_type_type_mt || kwmt == jl_nonfunction_mt || external_mt) {
-        // our value for `name` is bad, try to guess what the syntax might have had,
-        // like `jl_static_show_func_sig` might have come up with
-        jl_datatype_t *dt = jl_nth_argument_datatype(argtype, mt == jl_kwcall_mt ? 3 : 1);
-        if (dt != NULL) {
-            name = dt->name->name;
-            if (jl_is_type_type((jl_value_t*)dt)) {
-                dt = (jl_datatype_t*)jl_argument_datatype(jl_tparam0(dt));
-                if ((jl_value_t*)dt != jl_nothing) {
-                    name = dt->name->name;
-                }
-            }
+    jl_datatype_t *dtname = (jl_datatype_t*)jl_argument_datatype(jl_kwcall_type && ft == (jl_value_t*)jl_kwcall_type && nargs >= 3 ? jl_svecref(atypes, 2) : ft);
+    name = (jl_value_t*)dtname != jl_nothing ? dtname->name->singletonname : jl_any_type->name->singletonname;
+    if (jl_is_type_type((jl_value_t*)dtname)) {
+        dtname = (jl_datatype_t*)jl_argument_datatype(jl_tparam0(dtname));
+        if ((jl_value_t*)dtname != jl_nothing) {
+            name = dtname->name->singletonname;
         }
     }
 
@@ -1039,9 +1321,16 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
 
     for (i = 0; i < na; i++) {
         jl_value_t *elt = jl_svecref(atypes, i);
-        int isvalid = jl_is_type(elt) || jl_is_typevar(elt) || jl_is_vararg(elt);
-        if (elt == jl_bottom_type || (jl_is_vararg(elt) && jl_unwrap_vararg(elt) == jl_bottom_type))
-            isvalid = 0;
+        if (jl_is_vararg(elt)) {
+            if (i < na-1)
+                jl_exceptionf(jl_argumenterror_type,
+                              "Vararg on non-final argument in method definition for %s at %s:%d",
+                              jl_symbol_name(name),
+                              jl_symbol_name(file),
+                              line);
+            elt = jl_unwrap_vararg(elt);
+        }
+        int isvalid = (jl_is_type(elt) || jl_is_typevar(elt) || jl_is_vararg(elt)) && elt != jl_bottom_type;
         if (!isvalid) {
             jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(f->slotnames, i);
             if (argname == jl_unused_sym)
@@ -1059,12 +1348,6 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
                               jl_symbol_name(file),
                               line);
         }
-        if (jl_is_vararg(elt) && i < na-1)
-            jl_exceptionf(jl_argumenterror_type,
-                          "Vararg on non-final argument in method definition for %s at %s:%d",
-                          jl_symbol_name(name),
-                          jl_symbol_name(file),
-                          line);
     }
     for (i = jl_svec_len(tvars); i > 0; i--) {
         jl_value_t *tv = jl_svecref(tvars, i - 1);
@@ -1086,6 +1369,9 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
                       jl_symbol_name(file),
                       line);
     }
+    ft = jl_rewrap_unionall(ft, argtype);
+    if (!external_mt && !jl_has_empty_intersection(ft, (jl_value_t*)jl_builtin_type)) // disallow adding methods to Any, Function, Builtin, and subtypes, or Unions of those
+        jl_errorf("cannot add methods to builtin function `%s`", jl_symbol_name(name));
 
     m = jl_new_method_uninit(module);
     m->external_mt = (jl_value_t*)external_mt;
@@ -1099,16 +1385,6 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     m->line = line;
     jl_method_set_source(m, f);
 
-#ifdef RECORD_METHOD_ORDER
-    if (jl_all_methods == NULL)
-        jl_all_methods = jl_alloc_vec_any(0);
-#endif
-    if (jl_all_methods != NULL) {
-        while (jl_array_len(jl_all_methods) < m->primary_world)
-            jl_array_ptr_1d_push(jl_all_methods, NULL);
-        jl_array_ptr_1d_push(jl_all_methods, (jl_value_t*)m);
-    }
-
     jl_method_table_insert(mt, m, NULL);
     if (jl_newmeth_tracer)
         jl_call_tracer(jl_newmeth_tracer, (jl_value_t*)m);
@@ -1117,6 +1393,135 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     return m;
 }
 
+void jl_ctor_def(jl_value_t *ty, jl_value_t *functionloc)
+{
+    jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(ty);
+    JL_TYPECHK(ctor_def, datatype, (jl_value_t*)dt);
+    JL_TYPECHK(ctor_def, linenumbernode, functionloc);
+    jl_svec_t *fieldtypes = jl_get_fieldtypes(dt);
+    size_t nfields = jl_svec_len(fieldtypes);
+    size_t nparams = jl_subtype_env_size(ty);
+    jl_module_t *inmodule = dt->name->module;
+    jl_sym_t *file = (jl_sym_t*)jl_linenode_file(functionloc);
+    if (!jl_is_symbol(file))
+        file = jl_empty_sym;
+    int32_t line = jl_linenode_line(functionloc);
+
+    // argdata is svec(svec(types...), svec(typevars...), functionloc)
+    jl_svec_t *argdata = jl_alloc_svec(3);
+    jl_array_t *fieldkinds = NULL;
+    jl_code_info_t *body = NULL;
+    JL_GC_PUSH3(&argdata, &fieldkinds, &body);
+    jl_svecset(argdata, 2, functionloc);
+    jl_svec_t *tvars = jl_alloc_svec(nparams);
+    jl_svecset(argdata, 1, tvars);
+    jl_unionall_t *ua = (jl_unionall_t*)ty;
+    for (size_t i = 0; i < nparams; i++) {
+        assert(jl_is_unionall(ua));
+        jl_svecset(tvars, i, ua->var);
+        ua = (jl_unionall_t*)ua->body;
+    }
+    jl_svec_t *names = dt->name->names;
+
+    // define outer constructor (if all typevars are present (thus not definitely unconstrained) by the fields or other typevars which themselves are constrained)
+    int constrains_all_tvars = 1;
+    for (size_t i = nparams; i > 0; i--) {
+        jl_tvar_t *tv = (jl_tvar_t*)jl_svecref(tvars, i - 1);
+        int constrains_tvar = 0;
+        for (size_t i = 0; i < nfields; i++) {
+            jl_value_t *ft = jl_svecref(fieldtypes, i);
+            if (jl_has_typevar(ft, tv)) {
+                constrains_tvar = 1;
+                break;
+            }
+        }
+        for (size_t j = i; j < nparams; j++) {
+            jl_tvar_t *tv2 = (jl_tvar_t*)jl_svecref(tvars, j);
+            if (jl_has_typevar(tv2->ub, tv)) { // lb doesn't constrain, but jl_has_typevar doesn't have a way to specify that we care about may-constrain and not merely containment
+                constrains_tvar = 1;
+                break;
+            }
+            if (tv2 == tv) {
+                constrains_tvar = 0;
+                break;
+            }
+        }
+        if (!constrains_tvar) {
+            constrains_all_tvars = 0;
+            break;
+        }
+    }
+    if (constrains_all_tvars) {
+        jl_svec_t *atypes = jl_alloc_svec(nfields + 1);
+        jl_svecset(argdata, 0, atypes);
+        jl_svecset(atypes, 0, jl_wrap_Type(ty));
+        for (size_t i = 0; i < nfields; i++) {
+            jl_value_t *ft = jl_svecref(fieldtypes, i);
+            jl_svecset(atypes, i + 1, ft);
+        }
+        body = jl_outer_ctor_body(ty, nfields, nparams, inmodule, jl_symbol_name(file), line);
+        if (names) {
+            jl_array_t *slotnames = body->slotnames;
+            for (size_t i = 0; i < nfields; i++) {
+                jl_array_ptr_set(slotnames, i + 1, jl_svecref(names, i));
+            }
+        }
+        jl_method_def(argdata, NULL, body, inmodule);
+        if (nparams == 0) {
+            int all_Any = 1; // check if all fields are Any and the type is not parameterized, since inner constructor would be the same signature and code
+            for (size_t i = 0; i < nfields; i++) {
+                jl_value_t *ft = jl_svecref(fieldtypes, i);
+                if (ft != (jl_value_t*)jl_any_type) {
+                    all_Any = 0;
+                    break;
+                }
+            }
+            if (all_Any) {
+                JL_GC_POP();
+                return;
+            }
+        }
+    }
+
+    // define inner constructor
+    jl_svec_t *atypes = jl_svec_fill(nfields + 1, (jl_value_t*)jl_any_type);
+    jl_svecset(argdata, 0, atypes);
+    jl_value_t *typedt = (jl_value_t*)jl_wrap_Type((jl_value_t*)dt);
+    jl_svecset(atypes, 0, typedt);
+    fieldkinds = jl_alloc_vec_any(nfields);
+    for (size_t i = 0; i < nfields; i++) {
+        jl_value_t *ft = jl_svecref(fieldtypes, i);
+        int kind = ft == (jl_value_t*)jl_any_type ? -1 : 0;
+        // TODO: if more efficient to do so, we could reference the sparam instead of fieldtype
+        //if (jl_is_typevar(ft)) {
+        //    for (size_t i = 0; i < nparams; i++) {
+        //        if (jl_svecref(tvars, i) == ft) {
+        //            kind = i + 1;
+        //            break; // if repeated, must consider only the innermost
+        //        }
+        //    }
+        //}
+        jl_array_ptr_set(fieldkinds, i, jl_box_long(kind));
+    }
+    // rewrap_unionall(Type{dt}, ty)
+    for (size_t i = nparams; i > 0; i--) {
+        jl_value_t *tv = jl_svecref(tvars, i - 1);
+        typedt = jl_new_struct(jl_unionall_type, tv, typedt);
+        jl_svecset(atypes, 0, typedt);
+    }
+    tvars = jl_emptysvec;
+    jl_svecset(argdata, 1, tvars);
+    body = jl_inner_ctor_body(fieldkinds, inmodule, jl_symbol_name(file), line);
+    if (names) {
+        jl_array_t *slotnames = body->slotnames;
+        for (size_t i = 0; i < nfields; i++) {
+            jl_array_ptr_set(slotnames, i + 1, jl_svecref(names, i));
+        }
+    }
+    jl_method_def(argdata, NULL, body, inmodule);
+    JL_GC_POP();
+}
+
 // root blocks
 
 // This section handles method roots. Roots are GC-preserved items needed to
@@ -1153,10 +1558,6 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
 //   at the time of writing the system image (such occur first in the list of
 //   roots). These are the cases with `key = 0` that do not prevent
 //   serialization.
-// - CodeInstances have a `relocatability` field which when 1 indicates that
-//   every root is "safe," meaning it was either added at sysimg creation or is
-//   tagged with a non-zero `key`. Even a single unsafe root will cause this to
-//   have value 0.
 
 // Get the key of the current (final) block of roots
 static uint64_t current_root_id(jl_array_t *root_blocks)
@@ -1164,10 +1565,10 @@ static uint64_t current_root_id(jl_array_t *root_blocks)
     if (!root_blocks)
         return 0;
     assert(jl_is_array(root_blocks));
-    size_t nx2 = jl_array_len(root_blocks);
+    size_t nx2 = jl_array_nrows(root_blocks);
     if (nx2 == 0)
         return 0;
-    uint64_t *blocks = (uint64_t*)jl_array_data(root_blocks);
+    uint64_t *blocks = jl_array_data(root_blocks, uint64_t);
     return blocks[nx2-2];
 }
 
@@ -1176,8 +1577,8 @@ static void add_root_block(jl_array_t *root_blocks, uint64_t modid, size_t len)
 {
     assert(jl_is_array(root_blocks));
     jl_array_grow_end(root_blocks, 2);
-    uint64_t *blocks = (uint64_t*)jl_array_data(root_blocks);
-    int nx2 = jl_array_len(root_blocks);
+    uint64_t *blocks = jl_array_data(root_blocks, uint64_t);
+    int nx2 = jl_array_nrows(root_blocks);
     blocks[nx2-2] = modid;
     blocks[nx2-1] = len;
 }
@@ -1207,7 +1608,7 @@ JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_
     assert(jl_is_method(m));
     prepare_method_for_roots(m, modid);
     if (current_root_id(m->root_blocks) != modid)
-        add_root_block(m->root_blocks, modid, jl_array_len(m->roots));
+        add_root_block(m->root_blocks, modid, jl_array_nrows(m->roots));
     jl_array_ptr_1d_push(m->roots, root);
     JL_GC_POP();
 }
@@ -1219,7 +1620,7 @@ void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots)
     assert(jl_is_method(m));
     assert(jl_is_array(roots));
     prepare_method_for_roots(m, modid);
-    add_root_block(m->root_blocks, modid, jl_array_len(m->roots));
+    add_root_block(m->root_blocks, modid, jl_array_nrows(m->roots));
     jl_array_ptr_1d_append(m->roots, roots);
     JL_GC_POP();
 }
@@ -1233,7 +1634,7 @@ int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i)
         rr->index = i;
         return i < m->nroots_sysimg;
     }
-    rle_index_to_reference(rr, i, (uint64_t*)jl_array_data(m->root_blocks), jl_array_len(m->root_blocks), 0);
+    rle_index_to_reference(rr, i, jl_array_data(m->root_blocks, uint64_t), jl_array_nrows(m->root_blocks), 0);
     if (rr->key)
         return 1;
     return i < m->nroots_sysimg;
@@ -1248,7 +1649,7 @@ jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index)
         return jl_array_ptr_ref(m->roots, index);
     }
     rle_reference rr = {key, index};
-    size_t i = rle_reference_to_index(&rr, (uint64_t*)jl_array_data(m->root_blocks), jl_array_len(m->root_blocks), 0);
+    size_t i = rle_reference_to_index(&rr, jl_array_data(m->root_blocks, uint64_t), jl_array_nrows(m->root_blocks), 0);
     return jl_array_ptr_ref(m->roots, i);
 }
 
@@ -1257,11 +1658,11 @@ int nroots_with_key(jl_method_t *m, uint64_t key)
 {
     size_t nroots = 0;
     if (m->roots)
-        nroots = jl_array_len(m->roots);
+        nroots = jl_array_nrows(m->roots);
     if (!m->root_blocks)
         return key == 0 ? nroots : 0;
-    uint64_t *rletable = (uint64_t*)jl_array_data(m->root_blocks);
-    size_t j, nblocks2 = jl_array_len(m->root_blocks);
+    uint64_t *rletable = jl_array_data(m->root_blocks, uint64_t);
+    size_t j, nblocks2 = jl_array_nrows(m->root_blocks);
     int nwithkey = 0;
     for (j = 0; j < nblocks2; j+=2) {
         if (rletable[j] == key)
diff --git a/src/module.c b/src/module.c
index 89c4c6cdb674e..3cfee6ad0a8c8 100644
--- a/src/module.c
+++ b/src/module.c
@@ -1,4 +1,5 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
+//
 
 /*
   modules and top-level bindings
@@ -11,7 +12,507 @@
 extern "C" {
 #endif
 
-JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, uint8_t default_names)
+// In this translation unit and this translation unit only emit this symbol `extern` for use by julia
+EXTERN_INLINE_DEFINE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT;
+
+static jl_binding_partition_t *new_binding_partition(void)
+{
+    jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_gc_alloc(jl_current_task->ptls, sizeof(jl_binding_partition_t), jl_binding_partition_type);
+    bpart->restriction = NULL;
+    bpart->kind = (size_t)PARTITION_KIND_GUARD;
+    jl_atomic_store_relaxed(&bpart->min_world, 0);
+    jl_atomic_store_relaxed(&bpart->max_world, (size_t)-1);
+    jl_atomic_store_relaxed(&bpart->next, NULL);
+    return bpart;
+}
+
+struct implicit_search_gap {
+    _Atomic(jl_binding_partition_t *) *insert;
+    jl_binding_partition_t *replace;
+    jl_value_t *parent;
+
+    size_t min_world;
+    size_t max_world;
+    size_t inherited_flags;
+};
+
+STATIC_INLINE jl_binding_partition_t *jl_get_binding_partition__(jl_binding_t *b JL_PROPAGATES_ROOT, size_t world, struct implicit_search_gap *gap) JL_GLOBALLY_ROOTED
+{
+    // Iterate through the list of binding partitions, keeping track of where to insert a new one for an implicit
+    // resolution if necessary.
+    while (gap->replace) {
+        size_t replace_min_world = jl_atomic_load_relaxed(&gap->replace->min_world);
+        if (world >= replace_min_world)
+            break;
+        gap->insert = &gap->replace->next;
+        gap->max_world = replace_min_world - 1;
+        gap->parent = (jl_value_t*)gap->replace;
+        gap->replace = jl_atomic_load_relaxed(gap->insert);
+    }
+    if (gap->replace && world <= jl_atomic_load_relaxed(&gap->replace->max_world)) {
+        return gap->replace;
+    }
+    gap->min_world = gap->replace ? jl_atomic_load_relaxed(&gap->replace->max_world) + 1 : 0;
+    if (gap->replace)
+        gap->inherited_flags = gap->replace->kind & PARTITION_MASK_FLAG;
+    else
+        gap->inherited_flags = 0;
+    return NULL;
+}
+
+STATIC_INLINE jl_binding_partition_t *jl_get_binding_partition_if_present(jl_binding_t *b JL_PROPAGATES_ROOT, size_t world, struct implicit_search_gap *gap)
+{
+    gap->parent = (jl_value_t*)b;
+    gap->insert = &b->partitions;
+    gap->replace = jl_atomic_load_relaxed(gap->insert);
+    gap->min_world = 0;
+    gap->max_world = ~(size_t)0;
+    gap->inherited_flags = 0;
+    return jl_get_binding_partition__(b, world, gap);
+}
+
+struct implicit_search_resolution {
+    enum jl_partition_kind ultimate_kind;
+    jl_value_t *binding_or_const;
+    size_t min_world;
+    size_t max_world;
+    int saw_cycle;
+    int should_be_reexported;  // Set if resolved through a using with JL_MODULE_USING_REEXPORT
+    //// Not semantic, but used for reflection.
+    // If non-null, the unique module from which this binding was imported
+    jl_module_t *debug_only_import_from;
+    // If non-null, the unique binding imported. For PARTITION_KIND_IMPLICIT_GLOBAL, always matches binding_or_const.
+    // Must have trust_cache = 0.
+    jl_binding_t *debug_only_ultimate_binding;
+};
+
+static size_t WORLDMAX(size_t a, size_t b) { return a > b ? a : b; }
+static size_t WORLDMIN(size_t a, size_t b) { return a > b ? b : a; }
+
+static void update_implicit_resolution(struct implicit_search_resolution *to_update, struct implicit_search_resolution resolution)
+{
+    to_update->min_world = WORLDMAX(to_update->min_world, resolution.min_world);
+    to_update->max_world = WORLDMIN(to_update->max_world, resolution.max_world);
+    to_update->saw_cycle |= resolution.saw_cycle;
+    to_update->should_be_reexported |= resolution.should_be_reexported;
+    if (resolution.ultimate_kind == PARTITION_FAKE_KIND_CYCLE) {
+        // Cycles get ignored. This causes the resolution resolution to only be partial, so we can't
+        // cache it. This gets tracked in saw_cycle;
+        to_update->saw_cycle = 1;
+        return;
+    }
+    if (resolution.ultimate_kind == PARTITION_KIND_GUARD) {
+        // Ignore guard imports
+        return;
+    }
+    if (to_update->ultimate_kind == PARTITION_KIND_GUARD) {
+        assert(resolution.binding_or_const);
+        to_update->ultimate_kind = resolution.ultimate_kind;
+        to_update->binding_or_const = resolution.binding_or_const;
+        to_update->debug_only_import_from = resolution.debug_only_import_from;
+        to_update->debug_only_ultimate_binding = resolution.debug_only_ultimate_binding;
+        return;
+    }
+    if (resolution.ultimate_kind == to_update->ultimate_kind &&
+        resolution.binding_or_const == to_update->binding_or_const) {
+        if (resolution.debug_only_import_from != to_update->debug_only_import_from) {
+            to_update->debug_only_import_from = NULL;
+        }
+        if (resolution.debug_only_ultimate_binding != to_update->debug_only_ultimate_binding) {
+            to_update->debug_only_ultimate_binding = NULL;
+        }
+        return;
+    }
+    to_update->ultimate_kind = PARTITION_KIND_FAILED;
+    to_update->binding_or_const = NULL;
+    to_update->debug_only_import_from = NULL;
+    to_update->debug_only_ultimate_binding = NULL;
+}
+
+static jl_binding_partition_t *jl_implicit_import_resolved(jl_binding_t *b, struct implicit_search_gap gap, struct implicit_search_resolution resolution)
+{
+    size_t new_kind = resolution.ultimate_kind | gap.inherited_flags;
+    // If the resolution indicates this should be reexported, add the implicit export flag
+    if (resolution.should_be_reexported) {
+        new_kind |= PARTITION_FLAG_IMPLICITLY_EXPORTED;
+    }
+    size_t new_max_world = gap.max_world < resolution.max_world ? gap.max_world : resolution.max_world;
+    size_t new_min_world = gap.min_world > resolution.min_world ? gap.min_world : resolution.min_world;
+    jl_binding_partition_t *next = gap.replace;
+    if (jl_is_binding_partition(gap.parent)) {
+        // Check if we can merge this into the previous binding partition
+        jl_binding_partition_t *prev = (jl_binding_partition_t *)gap.parent;
+        assert(new_max_world != ~(size_t)0); // It is inconsistent to have a gap with `gap.parent` set, but max_world == ~(size_t)0
+        size_t expected_prev_min_world = new_max_world + 1;
+        if (prev->restriction == resolution.binding_or_const && prev->kind == new_kind) {
+retry:
+            if (!jl_atomic_cmpswap(&prev->min_world, &expected_prev_min_world, new_min_world)) {
+                if (expected_prev_min_world <= new_min_world) {
+                    return prev;
+                }
+                else if (expected_prev_min_world <= new_max_world) {
+                    // Concurrent modification of the partition. However, our lookup is still valid,
+                    // so we should still be able to extend the partition.
+                    goto retry;
+                }
+                // There remains a gap - proceed
+            } else {
+                if (next) {
+                    size_t next_min_world = jl_atomic_load_relaxed(&next->min_world);
+                    expected_prev_min_world = new_min_world;
+                    for (;;) {
+                        // We've updated the previous partition - check if we've closed a gap
+                        size_t next_max_world = jl_atomic_load_relaxed(&next->max_world);
+                        if (next_max_world >= expected_prev_min_world-1 && next->kind == new_kind && next->restriction == resolution.binding_or_const) {
+                            if (jl_atomic_cmpswap(&prev->min_world, &expected_prev_min_world, next_min_world)) {
+                                jl_binding_partition_t *nextnext = jl_atomic_load_relaxed(&next->next);
+                                if (!jl_atomic_cmpswap(&prev->next, &next, nextnext)) {
+                                    // `next` may have been merged into its subsequent partition - we need to retry
+                                    assert(next);
+                                    continue;
+                                }
+                                // N.B.: This can lose modifications to next->{min_world, next}.
+                                // However, those modifications could only have been for another implicit
+                                // partition, so we are ok to lose them and recompute them later if necessary.
+                            }
+                            assert(expected_prev_min_world <= new_min_world);
+                        }
+                        break;
+                    }
+                }
+                return prev;
+            }
+        }
+    }
+    jl_binding_partition_t *new_bpart = new_binding_partition();
+    jl_atomic_store_relaxed(&new_bpart->max_world, new_max_world);
+    new_bpart->kind = new_kind;
+    new_bpart->restriction = resolution.binding_or_const;
+    jl_gc_wb_fresh(new_bpart, new_bpart->restriction);
+
+    if (next) {
+        // See if we can merge the next partition into this one
+        size_t next_max_world = jl_atomic_load_relaxed(&next->max_world);
+        if (next_max_world == new_min_world - 1 && next->kind == new_kind && next->restriction == resolution.binding_or_const) {
+            // See above for potentially losing modifications to next.
+            new_min_world = jl_atomic_load_acquire(&next->min_world);
+            next = jl_atomic_load_relaxed(&next->next);
+        }
+    }
+
+    jl_atomic_store_relaxed(&new_bpart->min_world, new_min_world);
+    jl_atomic_store_relaxed(&new_bpart->next, next);
+    if (!jl_atomic_cmpswap(gap.insert, &gap.replace, new_bpart))
+        return NULL;
+    jl_gc_wb(gap.parent, new_bpart);
+    return new_bpart;
+}
+
+// find a binding from a module's `usings` list
+struct implicit_search_resolution jl_resolve_implicit_import(jl_binding_t *b, modstack_t *st, size_t world, int trust_cache)
+{
+    // First check if we've hit a cycle in this resolution
+    {
+        modstack_t *tmp = st;
+        for (; tmp != NULL; tmp = tmp->prev) {
+            if (tmp->b == b) {
+                return (struct implicit_search_resolution){ PARTITION_FAKE_KIND_CYCLE, NULL, 0, ~(size_t)0, 1, 0, NULL, NULL };
+            }
+        }
+    }
+
+    jl_module_t *m = b->globalref->mod;
+    jl_sym_t *var = b->globalref->name;
+
+    modstack_t top = { b, st };
+    struct implicit_search_resolution impstate;
+    struct implicit_search_resolution depimpstate;
+    size_t min_world = 0;
+    size_t max_world = ~(size_t)0;
+    impstate = depimpstate = (struct implicit_search_resolution){ PARTITION_KIND_GUARD, NULL, min_world, max_world, 0, 0, NULL, NULL };
+
+    JL_LOCK(&m->lock);
+    int i = (int)module_usings_length(m) - 1;
+    JL_UNLOCK(&m->lock);
+    for (; i >= 0 && impstate.ultimate_kind != PARTITION_KIND_FAILED; --i) {
+        JL_LOCK(&m->lock);
+        struct _jl_module_using data = *module_usings_getidx(m, i);
+        JL_UNLOCK(&m->lock);
+        if (data.min_world > world) {
+            max_world = WORLDMIN(max_world, data.min_world - 1);
+            continue;
+        }
+        if (data.max_world < world) {
+            min_world = WORLDMAX(min_world, data.max_world + 1);
+            continue;
+        }
+
+        min_world = WORLDMAX(min_world, data.min_world);
+        max_world = WORLDMIN(max_world, data.max_world);
+
+        jl_module_t *imp = data.mod;
+        uint8_t has_reexports = jl_atomic_load_relaxed(&imp->has_reexports);
+        JL_GC_PROMISE_ROOTED(imp);
+        jl_binding_t *tempb = jl_get_module_binding(imp, var, 0);
+        if (!tempb) {
+            // If the binding has never been allocated, it could not have been marked exported, so
+            // it is irrelevant for our resolution. We can move on.
+            // Exception: if this module has reexports, the binding might be reexported from another module,
+            // so we need to create the binding to trigger implicit resolution
+            if (!has_reexports)
+                continue;
+            tempb = jl_get_module_binding(imp, var, 1);  // Create the binding
+        }
+
+        struct implicit_search_gap gap;
+        jl_binding_partition_t *tempbpart;
+        tempbpart = jl_get_binding_partition_if_present(tempb, world, &gap);
+        size_t tempbpart_flags = tempbpart ? (tempbpart->kind & PARTITION_MASK_FLAG) : gap.inherited_flags;
+
+        while (tempbpart && jl_bkind_is_some_explicit_import(jl_binding_kind(tempbpart))) {
+            max_world = WORLDMIN(max_world, jl_atomic_load_relaxed(&tempbpart->max_world));
+            min_world = WORLDMAX(min_world, jl_atomic_load_relaxed(&tempbpart->min_world));
+
+            tempb = (jl_binding_t*)tempbpart->restriction;
+            tempbpart = jl_get_binding_partition_if_present(tempb, world, &gap);
+        }
+
+        int tempbpart_valid = tempbpart && (trust_cache || !jl_bkind_is_some_implicit(jl_binding_kind(tempbpart)));
+        size_t tembppart_max_world = tempbpart_valid ? jl_atomic_load_relaxed(&tempbpart->max_world) : gap.max_world;
+        size_t tembppart_min_world = tempbpart ? WORLDMAX(jl_atomic_load_relaxed(&tempbpart->min_world), gap.min_world) : gap.min_world;
+
+        max_world = WORLDMIN(max_world, tembppart_max_world);
+        min_world = WORLDMAX(min_world, tembppart_min_world);
+
+        uint8_t is_any_exported = jl_bpart_is_exported(tempbpart_flags);
+        if (!is_any_exported && (tempbpart_valid || !has_reexports)) {
+            // Partition not exported - skip.
+            continue;
+        }
+
+        struct implicit_search_resolution *comparison = &impstate;
+        if (impstate.ultimate_kind != PARTITION_KIND_GUARD) {
+            if (tempbpart_flags & PARTITION_FLAG_DEPRECATED) {
+                // Deprecated, but we already have a non-deprecated binding for this - skip.
+                continue;
+            }
+        } else if (tempbpart_flags & PARTITION_FLAG_DEPRECATED) {
+            if (depimpstate.ultimate_kind == PARTITION_KIND_FAILED) {
+                // We've already decided that the deprecated bindings are ambiguous, so skip this, but
+                // keep going to look for non-deprecated bindings.
+                continue;
+            }
+            comparison = &depimpstate;
+        }
+
+        struct implicit_search_resolution imp_resolution = { PARTITION_KIND_GUARD, NULL, min_world, max_world, 0, 0, NULL, NULL };
+        if (!tempbpart_valid) {
+            imp_resolution = jl_resolve_implicit_import(tempb, &top, world, trust_cache);
+            // imp_resolution is the resolution for import into tempb (which may have been cached for tempb
+            // if we're not in a cycle). `imp_resolution.should_be_reexported` indicates whether the binding
+            // should be reexported from `imp`, not `m`, so check here.
+            if (!(tempbpart_flags & PARTITION_FLAG_EXPORTED) && !imp_resolution.should_be_reexported)
+                continue;
+            imp_resolution.should_be_reexported = 0;
+        } else {
+            enum jl_partition_kind kind = jl_binding_kind(tempbpart);
+            if (kind == PARTITION_KIND_IMPLICIT_GLOBAL) {
+                imp_resolution.binding_or_const = tempbpart->restriction;
+                imp_resolution.debug_only_ultimate_binding = (jl_binding_t*)tempbpart->restriction;
+                imp_resolution.ultimate_kind = PARTITION_KIND_IMPLICIT_GLOBAL;
+            } else if (kind == PARTITION_KIND_GLOBAL || kind == PARTITION_KIND_DECLARED || kind == PARTITION_KIND_BACKDATED_CONST) {
+                imp_resolution.binding_or_const = (jl_value_t *)tempb;
+                imp_resolution.debug_only_ultimate_binding = tempb;
+                imp_resolution.ultimate_kind = PARTITION_KIND_IMPLICIT_GLOBAL;
+            } else if (jl_bkind_is_defined_constant(kind)) {
+                assert(tempbpart->restriction);
+                imp_resolution.binding_or_const = tempbpart->restriction;
+                imp_resolution.debug_only_ultimate_binding = tempb;
+                imp_resolution.ultimate_kind = PARTITION_KIND_IMPLICIT_CONST;
+            }
+        }
+        // If this using has the reexport flag, mark that the binding should be reexported
+        if (data.flags & JL_MODULE_USING_REEXPORT) {
+            imp_resolution.should_be_reexported = 1;
+        }
+        imp_resolution.debug_only_import_from = imp;
+        update_implicit_resolution(comparison, imp_resolution);
+
+        if (!tempbpart && !imp_resolution.saw_cycle) {
+            // Independent of whether or not we trust the cache, we have independently computed the implicit resolution
+            // for this import, so we can put it in the cache.
+            jl_implicit_import_resolved(tempb, gap, imp_resolution);
+        }
+    }
+
+    if (impstate.ultimate_kind == PARTITION_KIND_GUARD && depimpstate.ultimate_kind != PARTITION_KIND_GUARD) {
+        depimpstate.min_world = WORLDMAX(depimpstate.min_world, min_world);
+        depimpstate.max_world = WORLDMIN(depimpstate.max_world, max_world);
+        return depimpstate;
+    }
+    impstate.min_world = WORLDMAX(impstate.min_world, min_world);
+    impstate.max_world = WORLDMIN(impstate.max_world, max_world);
+    return impstate;
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_maybe_reresolve_implicit(jl_binding_t *b, size_t new_max_world)
+{
+    struct implicit_search_gap gap;
+    while (1) {
+        jl_binding_partition_t *bpart = jl_get_binding_partition_if_present(b, new_max_world+1, &gap);
+        assert(bpart == jl_atomic_load_relaxed(&b->partitions));
+        assert(bpart);
+        struct implicit_search_resolution resolution = jl_resolve_implicit_import(b, NULL, new_max_world+1, 0);
+        int resolution_unchanged = bpart->restriction == resolution.binding_or_const && jl_binding_kind(bpart) == resolution.ultimate_kind;
+        size_t bpart_min_world = jl_atomic_load_relaxed(&bpart->min_world);
+        if (resolution.min_world == bpart_min_world) {
+            // The resolution has the same world bounds - it must be unchanged
+            assert(resolution_unchanged);
+            return bpart;
+        } else if (resolution_unchanged) {
+            // If the resolution is unchanged, we can still keep the bpart
+            assert(resolution.min_world > bpart_min_world);
+            return bpart;
+        }
+        assert(resolution.min_world == new_max_world+1 && "Missed an invalidation or bad resolution bounds");
+        size_t expected_max_world = ~(size_t)0;
+        if (jl_atomic_cmpswap(&bpart->max_world, &expected_max_world, new_max_world))
+        {
+            gap.min_world = new_max_world+1;
+            gap.inherited_flags = bpart->kind & PARTITION_MASK_FLAG;
+            jl_binding_partition_t *new_bpart = jl_implicit_import_resolved(b, gap, resolution);
+            if (new_bpart)
+                return new_bpart;
+        }
+    }
+}
+
+JL_DLLEXPORT void jl_update_loaded_bpart(jl_binding_t *b, jl_binding_partition_t *bpart)
+{
+    struct implicit_search_resolution resolution = jl_resolve_implicit_import(b, NULL, jl_atomic_load_acquire(&jl_world_counter), 0);
+    jl_atomic_store_relaxed(&bpart->min_world, resolution.min_world);
+    jl_atomic_store_relaxed(&bpart->max_world, resolution.max_world);
+    bpart->restriction = resolution.binding_or_const;
+    bpart->kind = resolution.ultimate_kind;
+}
+
+STATIC_INLINE jl_binding_partition_t *jl_get_binding_partition_(jl_binding_t *b JL_PROPAGATES_ROOT, jl_value_t *parent, _Atomic(jl_binding_partition_t *)*insert, size_t world, size_t max_world, modstack_t *st) JL_GLOBALLY_ROOTED
+{
+    assert(jl_is_binding(b));
+    struct implicit_search_gap gap;
+    gap.parent = parent;
+    gap.insert = insert;
+    gap.inherited_flags = 0;
+    gap.min_world = 0;
+    gap.max_world = max_world;
+    while (1) {
+        gap.replace = jl_atomic_load_relaxed(gap.insert);
+        jl_binding_partition_t *bpart = jl_get_binding_partition__(b, world, &gap);
+        if (bpart)
+            return bpart;
+        struct implicit_search_resolution resolution = jl_resolve_implicit_import(b, NULL, world, 1);
+        jl_binding_partition_t *new_bpart = jl_implicit_import_resolved(b, gap, resolution);
+        if (new_bpart)
+            return new_bpart;
+    }
+}
+
+jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) {
+    if (!b)
+        return NULL;
+    // Duplicate the code for the entry frame for branch prediction
+    return jl_get_binding_partition_(b, (jl_value_t*)b, &b->partitions, world, ~(size_t)0, NULL);
+}
+
+jl_binding_partition_t *jl_get_binding_partition_with_hint(jl_binding_t *b, jl_binding_partition_t *prev, size_t world) JL_GLOBALLY_ROOTED {
+    // Helper for getting a binding partition for an older world after we've already looked up the partition for a newer world
+    assert(b);
+    size_t prev_min_world = jl_atomic_load_relaxed(&prev->min_world);
+    return jl_get_binding_partition_(b, (jl_value_t*)prev, &prev->next, world, prev_min_world-1, NULL);
+}
+
+jl_binding_partition_t *jl_get_binding_partition_all(jl_binding_t *b, size_t min_world, size_t max_world) {
+    if (!b)
+        return NULL;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, min_world);
+    if (!bpart)
+        return NULL;
+    if (jl_atomic_load_relaxed(&bpart->max_world) < max_world)
+        return NULL;
+    return bpart;
+}
+
+JL_DLLEXPORT int jl_get_binding_leaf_partitions_restriction_kind(jl_binding_t *b JL_PROPAGATES_ROOT, struct restriction_kind_pair *rkp, size_t min_world, size_t max_world) {
+    if (!b)
+        return 0;
+
+    int first = 1;
+    size_t validated_min_world = max_world == ~(size_t)0 ? ~(size_t)0 : max_world + 1;
+    jl_binding_partition_t *bpart = NULL;
+    int maybe_depwarn = 0;
+    while (validated_min_world > min_world) {
+        bpart = bpart ? jl_get_binding_partition_with_hint(b, bpart, validated_min_world - 1) :
+                        jl_get_binding_partition(b, validated_min_world - 1);
+        size_t bpart_min_world = jl_atomic_load_relaxed(&bpart->min_world);
+        while (validated_min_world > min_world && validated_min_world > bpart_min_world) {
+            jl_binding_t *curb = b;
+            jl_binding_partition_t *curbpart = bpart;
+            size_t cur_min_world = bpart_min_world;
+            size_t cur_max_world = validated_min_world - 1;
+            jl_walk_binding_inplace_worlds(&curb, &curbpart, &cur_min_world, &cur_max_world, &maybe_depwarn, cur_max_world);
+            enum jl_partition_kind kind = jl_binding_kind(curbpart);
+            if (kind == PARTITION_KIND_IMPLICIT_CONST)
+                kind = PARTITION_KIND_CONST;
+            if (first == 1) {
+                rkp->kind = kind;
+                rkp->restriction = curbpart->restriction;
+                if (rkp->kind == PARTITION_KIND_GLOBAL || rkp->kind == PARTITION_KIND_DECLARED)
+                    rkp->binding_if_global = curb;
+                first = 0;
+            } else {
+                if (kind != rkp->kind || curbpart->restriction != rkp->restriction)
+                    return 0;
+                if ((rkp->kind == PARTITION_KIND_GLOBAL || rkp->kind == PARTITION_KIND_DECLARED) && rkp->binding_if_global != curb)
+                    return 0;
+            }
+            validated_min_world = cur_min_world;
+        }
+    }
+    rkp->maybe_depwarn = maybe_depwarn;
+    return 1;
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_binding_leaf_partitions_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT, int *maybe_depwarn, size_t min_world, size_t max_world) {
+    struct restriction_kind_pair rkp = { NULL, NULL, PARTITION_KIND_GUARD, 0 };
+    if (!jl_get_binding_leaf_partitions_restriction_kind(b, &rkp, min_world, max_world))
+        return NULL;
+    if (jl_bkind_is_real_constant(rkp.kind)) {
+        *maybe_depwarn = rkp.maybe_depwarn;
+        return rkp.restriction;
+    }
+    return NULL;
+}
+
+JL_DLLEXPORT size_t jl_binding_backedges_length(jl_binding_t *b)
+{
+    JL_LOCK(&b->globalref->mod->lock);
+    size_t len = 0;
+    if (b->backedges)
+        len = jl_array_len(b->backedges);
+    JL_UNLOCK(&b->globalref->mod->lock);
+    return len;
+}
+
+JL_DLLEXPORT jl_value_t *jl_binding_backedges_getindex(jl_binding_t *b, size_t i)
+{
+    JL_LOCK(&b->globalref->mod->lock);
+    assert(b->backedges);
+    jl_value_t *ret = jl_array_ptr_ref(b->backedges, i-1);
+    JL_UNLOCK(&b->globalref->mod->lock);
+    return ret;
+}
+
+static jl_module_t *jl_new_module__(jl_sym_t *name, jl_module_t *parent)
 {
     jl_task_t *ct = jl_current_task;
     const jl_uuid_t uuid_zero = {0, 0};
@@ -20,55 +521,168 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui
     jl_set_typetagof(m, jl_module_tag, 0);
     assert(jl_is_symbol(name));
     m->name = name;
-    m->parent = parent;
+    m->parent = parent ? parent : m;
     m->istopmod = 0;
     m->uuid = uuid_zero;
-    static unsigned int mcounter; // simple counter backup, in case hrtime is not incrementing
-    m->build_id.lo = jl_hrtime() + (++mcounter);
+    static _Atomic(unsigned int) mcounter; // simple counter backup, in case hrtime is not incrementing
+    unsigned int count = jl_atomic_fetch_add_relaxed(&mcounter, 1);
+    // TODO: this is used for ir decompression and is liable to hash collisions so use more of the bits
+    m->build_id.lo = bitmix(jl_hrtime() + count, jl_rand());
     if (!m->build_id.lo)
         m->build_id.lo++; // build id 0 is invalid
     m->build_id.hi = ~(uint64_t)0;
-    m->primary_world = 0;
     jl_atomic_store_relaxed(&m->counter, 1);
+    m->usings_backedges = jl_nothing;
+    m->scanned_methods = jl_nothing;
     m->nospecialize = 0;
     m->optlevel = -1;
     m->compile = -1;
     m->infer = -1;
     m->max_methods = -1;
+    jl_atomic_store_relaxed(&m->has_reexports, 0);
+    m->file = jl_empty_sym;
+    m->line = 0;
     m->hash = parent == NULL ? bitmix(name->hash, jl_module_type->hash) :
         bitmix(name->hash, parent->hash);
     JL_MUTEX_INIT(&m->lock, "module->lock");
     jl_atomic_store_relaxed(&m->bindings, jl_emptysvec);
-    jl_atomic_store_relaxed(&m->bindingkeyset, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&m->bindingkeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
     arraylist_new(&m->usings, 0);
+    return m;
+}
+
+static void jl_add_default_names(jl_module_t *m, uint8_t default_using_core, uint8_t self_name)
+{
+    if (jl_core_module) {
+        // Bootstrap: Before jl_core_module is defined, we don't have enough infrastructure
+        // for bindings, so Core itself gets special handling in jltypes.c
+        if (default_using_core) {
+            jl_module_initial_using(m, jl_core_module);
+        }
+        if (self_name) {
+            // export own name, so "using Foo" makes "Foo" itself visible
+            jl_set_initial_const(m, m->name, (jl_value_t*)m, 1);
+        }
+    }
+}
+
+jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, uint8_t default_using_core, uint8_t self_name)
+{
+    jl_module_t *m = jl_new_module__(name, parent);
     JL_GC_PUSH1(&m);
-    if (jl_core_module && default_names) {
-        jl_module_using(m, jl_core_module);
+    JL_LOCK(&world_counter_lock);
+    jl_add_default_names(m, default_using_core, self_name);
+    JL_UNLOCK(&world_counter_lock);
+    JL_GC_POP();
+    return m;
+}
+
+
+// Precondition: world_counter_lock is held
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val3(
+    jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val,
+    enum jl_partition_kind constant_kind, size_t new_world)
+{
+    jl_binding_partition_t *new_prev_bpart = NULL;
+    JL_GC_PUSH2(&val, &new_prev_bpart);
+    if (!b) {
+        b = jl_get_module_binding(mod, var, 1);
     }
-    // export own name, so "using Foo" makes "Foo" itself visible
-    if (default_names) {
-        jl_set_const(m, name, (jl_value_t*)m);
+    jl_binding_partition_t *new_bpart = NULL;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, new_world);
+    while (!new_bpart) {
+        enum jl_partition_kind kind = jl_binding_kind(bpart);
+        if (jl_bkind_is_some_constant(kind) && !jl_bkind_is_some_implicit(kind)) {
+            if (!val) {
+                new_bpart = bpart;
+                break;
+            }
+            jl_value_t *old = bpart->restriction;
+            JL_GC_PROMISE_ROOTED(old);
+            if (val == old || (val && old && jl_egal(val, old))) {
+                new_bpart = bpart;
+                break;
+            }
+        } else if (jl_bkind_is_some_explicit_import(kind)) {
+            jl_errorf("cannot declare %s.%s constant; it was already declared as an import",
+                      jl_symbol_name(mod->name), jl_symbol_name(var));
+        } else if (kind == PARTITION_KIND_GLOBAL) {
+            jl_errorf("cannot declare %s.%s constant; it was already declared global",
+                      jl_symbol_name(mod->name), jl_symbol_name(var));
+        }
+        if (jl_atomic_load_relaxed(&bpart->min_world) == new_world) {
+            bpart->kind = constant_kind | (bpart->kind & PARTITION_MASK_FLAG);
+            bpart->restriction = val;
+            if (val)
+                jl_gc_wb(bpart, val);
+            new_bpart = bpart;
+        } else {
+            new_bpart = jl_replace_binding_locked(b, bpart, val, constant_kind, new_world);
+        }
+        int need_backdate = new_world && val;
+        if (need_backdate) {
+            // We will backdate as long as this partition was never explicitly
+            // declared const, global, or imported.
+            jl_binding_partition_t *prev_bpart = bpart;
+            for (;;) {
+                enum jl_partition_kind prev_kind = jl_binding_kind(prev_bpart);
+                if (jl_bkind_is_some_constant(prev_kind) || prev_kind == PARTITION_KIND_GLOBAL ||
+                    jl_bkind_is_some_import(prev_kind)) {
+                    need_backdate = 0;
+                    break;
+                }
+                size_t prev_bpart_min_world = jl_atomic_load_relaxed(&prev_bpart->min_world);
+                if (prev_bpart_min_world == 0)
+                    break;
+                prev_bpart = jl_get_binding_partition(b, prev_bpart_min_world - 1);
+            }
+        }
+        // If backdate is required, replace each existing partition by a new one.
+        // We can't use one binding to cover the entire range, because we need to
+        // keep the flags partitioned.
+        if (need_backdate) {
+            jl_binding_partition_t *prev_bpart = bpart;
+            jl_binding_partition_t *backdate_bpart = new_binding_partition();
+            new_prev_bpart = backdate_bpart;
+            while (1) {
+                backdate_bpart->kind = (size_t)PARTITION_KIND_BACKDATED_CONST | (prev_bpart->kind & 0xf0);
+                backdate_bpart->restriction = val;
+                jl_atomic_store_relaxed(&backdate_bpart->min_world,
+                    jl_atomic_load_relaxed(&prev_bpart->min_world));
+                jl_gc_wb_fresh(backdate_bpart, val);
+                jl_atomic_store_relaxed(&backdate_bpart->max_world,
+                    jl_atomic_load_relaxed(&prev_bpart->max_world));
+                prev_bpart = jl_atomic_load_relaxed(&prev_bpart->next);
+                if (!prev_bpart)
+                    break;
+                jl_binding_partition_t *next_prev_bpart = new_binding_partition();
+                jl_atomic_store_relaxed(&backdate_bpart->next, next_prev_bpart);
+                jl_gc_wb(backdate_bpart, next_prev_bpart);
+                backdate_bpart = next_prev_bpart;
+            }
+            jl_atomic_store_release(&new_bpart->next, new_prev_bpart);
+            jl_gc_wb(new_bpart, new_prev_bpart);
+        }
     }
-    jl_module_export(m, name);
     JL_GC_POP();
-    return m;
+    return new_bpart;
 }
 
 JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name, jl_module_t *parent)
 {
-    return jl_new_module_(name, parent, 1);
+    return jl_new_module_(name, parent, 1, 1);
 }
 
 uint32_t jl_module_next_counter(jl_module_t *m)
 {
-    return jl_atomic_fetch_add(&m->counter, 1);
+    return jl_atomic_fetch_add_relaxed(&m->counter, 1);
 }
 
 JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports, uint8_t default_names)
 {
     // TODO: should we prohibit this during incremental compilation?
     // TODO: the parent module is a lie
-    jl_module_t *m = jl_new_module_(name, jl_main_module, default_names);
+    jl_module_t *m = jl_new_module_(name, jl_main_module, default_names, default_names);
     JL_GC_PUSH1(&m);
     if (std_imports)
         jl_add_standard_imports(m);
@@ -163,9 +777,11 @@ static jl_globalref_t *jl_new_globalref(jl_module_t *mod, jl_sym_t *name, jl_bin
     jl_task_t *ct = jl_current_task;
     jl_globalref_t *g = (jl_globalref_t*)jl_gc_alloc(ct->ptls, sizeof(jl_globalref_t), jl_globalref_type);
     g->mod = mod;
-    jl_gc_wb(g, g->mod);
+    jl_gc_wb_fresh(g, g->mod);
     g->name = name;
+    jl_gc_wb_fresh(g, g->name);
     g->binding = b;
+    jl_gc_wb_fresh(g, g->binding);
     return g;
 }
 
@@ -175,275 +791,366 @@ static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name)
     assert(jl_is_module(mod) && jl_is_symbol(name));
     jl_binding_t *b = (jl_binding_t*)jl_gc_alloc(ct->ptls, sizeof(jl_binding_t), jl_binding_type);
     jl_atomic_store_relaxed(&b->value, NULL);
-    jl_atomic_store_relaxed(&b->owner, NULL);
-    jl_atomic_store_relaxed(&b->ty, NULL);
+    jl_atomic_store_relaxed(&b->partitions, NULL);
     b->globalref = NULL;
-    b->constp = 0;
-    b->exportp = 0;
-    b->imported = 0;
-    b->deprecated = 0;
-    b->usingfailed = 0;
-    b->padding = 0;
+    b->backedges = NULL;
+    jl_atomic_store_relaxed(&b->flags, 0);
     JL_GC_PUSH1(&b);
     b->globalref = jl_new_globalref(mod, name, b);
+    jl_gc_wb(b, b->globalref);
     JL_GC_POP();
     return b;
 }
 
-static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var) JL_GLOBALLY_ROOTED;
+extern jl_mutex_t jl_modules_mutex;
 
-// get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var)
+static int is_module_open(jl_module_t *m)
 {
-    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    JL_LOCK(&jl_modules_mutex);
+    int open = ptrhash_has(&jl_current_modules, (void*)m);
+    if (!open && jl_module_init_order != NULL) {
+        size_t i, l = jl_array_len(jl_module_init_order);
+        for (i = 0; i < l; i++) {
+            if (m == (jl_module_t*)jl_array_ptr_ref(jl_module_init_order, i)) {
+                open = 1;
+                break;
+            }
+        }
+    }
+    JL_UNLOCK(&jl_modules_mutex);
+    return open;
+}
+
+extern void check_safe_newbinding(jl_module_t *m, jl_sym_t *var)
+{
+    if (jl_current_task->ptls->in_pure_callback)
+        jl_errorf("new strong globals cannot be created in a generated function. Declare them outside using `global x::Any`.");
+    if (jl_options.incremental && jl_generating_output() && !is_module_open(m)) {
+        jl_errorf("Creating a new global in closed module `%s` (`%s`) breaks incremental compilation "
+                    "because the side effects will not be permanent.",
+                    jl_symbol_name(m->name), jl_symbol_name(var));
+    }
+}
+
+static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b) JL_GLOBALLY_ROOTED;
 
-    if (b) {
-        jl_binding_t *b2 = NULL;
-        if (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b) {
-            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
-            if (from == m)
+// Checks that the binding in general is currently writable, but does not perform any checks on the
+// value to be written into the binding.
+JL_DLLEXPORT void jl_check_binding_currently_writable(jl_binding_t *b, jl_module_t *m, jl_sym_t *s)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (jl_options.depwarn && (bpart->kind & PARTITION_FLAG_DEPWARN)) {
+        jl_binding_deprecation_warning(b);
+    }
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (kind != PARTITION_KIND_GLOBAL && kind != PARTITION_KIND_DECLARED) {
+        if (jl_bkind_is_some_guard(kind)) {
+            jl_errorf("Global %s.%s does not exist and cannot be assigned.\n"
+                        "Note: Julia 1.9 and 1.10 inadvertently omitted this error check (#56933).\n"
+                        "Hint: Declare it using `global %s` inside `%s` before attempting assignment.",
+                        jl_symbol_name(m->name), jl_symbol_name(s),
+                        jl_symbol_name(s), jl_symbol_name(m->name));
+        }
+        else if (jl_bkind_is_some_constant(kind) && kind != PARTITION_KIND_IMPLICIT_CONST) {
+            jl_errorf("invalid assignment to constant %s.%s. This redefinition may be permitted using the `const` keyword.",
+                        jl_symbol_name(m->name), jl_symbol_name(s));
+        }
+        else {
+            jl_module_t *from = jl_binding_dbgmodule(b);
+            if (from == m || !from)
                 jl_errorf("cannot assign a value to imported variable %s.%s",
-                          jl_symbol_name(from->name), jl_symbol_name(var));
+                          jl_symbol_name(m->name), jl_symbol_name(s));
             else
                 jl_errorf("cannot assign a value to imported variable %s.%s from module %s",
-                          jl_symbol_name(from->name), jl_symbol_name(var), jl_symbol_name(m->name));
+                          jl_symbol_name(from->name), jl_symbol_name(s), jl_symbol_name(m->name));
         }
     }
+}
 
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var)
+{
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    jl_check_binding_currently_writable(b, m, var);
     return b;
 }
 
 // return module of binding
 JL_DLLEXPORT jl_module_t *jl_get_module_of_binding(jl_module_t *m, jl_sym_t *var)
 {
-    jl_binding_t *b = jl_get_binding(m, var);
-    if (b == NULL)
-        return NULL;
-    return b->globalref->mod; // TODO: deprecate this?
+    size_t world = jl_current_task->world_age;
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, world);
+    jl_walk_binding_inplace(&b, &bpart, world);
+    if (jl_binding_kind(bpart) == PARTITION_KIND_IMPLICIT_CONST) {
+        struct implicit_search_resolution resolution = jl_resolve_implicit_import(b, NULL, world, 0);
+        if (!resolution.debug_only_ultimate_binding)
+            jl_error("Constant binding was imported from multiple modules");
+        b = resolution.debug_only_ultimate_binding;
+    }
+    return b ? b->globalref->mod : m;
 }
 
-// get binding for adding a method
-// like jl_get_binding_wr, but has different error paths
-JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_t *var)
+static NOINLINE void print_backdate_admonition(jl_binding_t *b) JL_NOTSAFEPOINT
 {
-    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    jl_safe_printf(
+        "WARNING: Detected access to binding `%s.%s` in a world prior to its definition world.\n"
+        "  Julia 1.12 has introduced more strict world age semantics for global bindings.\n"
+        "  !!! This code may malfunction under Revise.\n"
+        "  !!! This code will error in future versions of Julia.\n"
+        "Hint: Add an appropriate `invokelatest` around the access to this binding.\n"
+        "To make this warning an error, and hence obtain a stack trace, use `julia --depwarn=error`.\n",
+        jl_symbol_name(b->globalref->mod->name), jl_symbol_name(b->globalref->name));
+}
 
-    jl_binding_t *b2 = NULL;
-    if (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b) {
-        jl_value_t *f = jl_atomic_load_relaxed(&b2->value);
-        jl_module_t *from = jl_binding_dbgmodule(b, m, var);
-        if (f == NULL) {
-            // we must have implicitly imported this with using, so call jl_binding_dbgmodule to try to get the name of the module we got this from
-            jl_errorf("invalid method definition in %s: exported function %s.%s does not exist",
-                      jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
-        }
-        // TODO: we might want to require explicitly importing types to add constructors
-        //       or we might want to drop this error entirely
-        if (!b->imported && !(b2->constp && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) {
-            jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended",
-                      jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
-        }
-        return b2;
+static inline void check_backdated_binding(jl_binding_t *b, enum jl_partition_kind kind) JL_NOTSAFEPOINT
+{
+    if (__unlikely(kind == PARTITION_KIND_BACKDATED_CONST)) {
+        // We don't want functions that inference executes speculatively to print this warning, so turn those into
+        // an error for inference purposes.
+        if (jl_current_task->ptls->in_pure_callback || jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
+            jl_undefined_var_error(b->globalref->name, (jl_value_t*)b->globalref->mod);
+        if (!(jl_atomic_fetch_or_relaxed(&b->flags, BINDING_FLAG_DID_PRINT_BACKDATE_ADMONITION) & BINDING_FLAG_DID_PRINT_BACKDATE_ADMONITION))
+            print_backdate_admonition(b);
     }
+}
 
-    return b;
+JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b)
+{
+    return jl_get_binding_value_in_world(b, jl_current_task->world_age);
 }
 
-typedef struct _modstack_t {
-    jl_module_t *m;
-    jl_sym_t *var;
-    struct _modstack_t *prev;
-} modstack_t;
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_in_world(jl_binding_t *b, size_t world)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, world);
+    jl_walk_binding_inplace(&b, &bpart, world);
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_guard(kind))
+        return NULL;
+    if (jl_bkind_is_some_constant(kind)) {
+        check_backdated_binding(b, kind);
+        return bpart->restriction;
+    }
+    assert(!jl_bkind_is_some_import(kind));
+    return jl_atomic_load_relaxed(&b->value);
+}
 
-static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
+static jl_value_t *jl_get_binding_value_depwarn(jl_binding_t *b, size_t world)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, world);
+    if (jl_options.depwarn) {
+        int needs_depwarn = 0;
+        jl_walk_binding_inplace_depwarn(&b, &bpart, world, &needs_depwarn);
+        if (needs_depwarn)
+            jl_binding_deprecation_warning(b);
+    }
+    else {
+        jl_walk_binding_inplace(&b, &bpart, world);
+    }
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_guard(kind))
+        return NULL;
+    if (jl_bkind_is_some_constant(kind)) {
+        check_backdated_binding(b, kind);
+        return bpart->restriction;
+    }
+    assert(!jl_bkind_is_some_import(kind));
+    return jl_atomic_load_relaxed(&b->value);
+}
 
-static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_seqcst(jl_binding_t *b)
+{
+    size_t world = jl_current_task->world_age;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, world);
+    jl_walk_binding_inplace(&b, &bpart, world);
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_guard(kind))
+        return NULL;
+    if (jl_bkind_is_some_constant(kind)) {
+        check_backdated_binding(b, kind);
+        return bpart->restriction;
+    }
+    assert(!jl_bkind_is_some_import(kind));
+    return jl_atomic_load(&b->value);
+}
 
-#ifndef __clang_gcanalyzer__
-// The analyzer doesn't like looking through the arraylist, so just model the
-// access for it using this function
-static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT {
-    return (jl_module_t*)m->usings.items[i];
+JL_DLLEXPORT jl_value_t *jl_get_latest_binding_value_if_const(jl_binding_t *b)
+{
+    // See note below. Note that this is for some deprecated uses, and should not be added to new code.
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, world);
+    jl_walk_binding_inplace(&b, &bpart, world);
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_guard(kind))
+        return NULL;
+    if (!jl_bkind_is_real_constant(kind))
+        return NULL;
+    return bpart->restriction;
 }
-#endif
 
-static int eq_bindings(jl_binding_t *owner, jl_binding_t *alias)
+JL_DLLEXPORT jl_value_t *jl_get_latest_binding_value_if_resolved_and_const_debug_only(jl_binding_t *b)
 {
-    assert(owner == jl_atomic_load_relaxed(&owner->owner));
-    if (owner == alias)
-        return 1;
-    alias = jl_atomic_load_relaxed(&alias->owner);
-    if (owner == alias)
-        return 1;
-    if (owner->constp && alias->constp && jl_atomic_load_relaxed(&owner->value) && jl_atomic_load_relaxed(&alias->value) == jl_atomic_load_relaxed(&owner->value))
-        return 1;
-    return 0;
+    // Unlike jl_get_latest_binding_value_if_const this doesn't try to allocate new binding partitions if they
+    // don't already exist, making this JL_NOTSAFEPOINT. However, as a result, this may fail to return
+    // a value - even if one does exist. It should only be used for reflection/debugging when the integrity
+    // of the runtime is not guaranteed.
+    if (!b)
+        return NULL;
+    jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions);
+    if (!bpart)
+        return NULL;
+    size_t max_world = jl_atomic_load_relaxed(&bpart->max_world);
+    if (max_world != ~(size_t)0)
+        return NULL;
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_guard(kind))
+        return NULL;
+    if (!jl_bkind_is_real_constant(kind))
+        return NULL;
+    return bpart->restriction;
 }
 
-// find a binding from a module's `usings` list
-static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, jl_module_t **from, modstack_t *st, int warn)
+JL_DLLEXPORT jl_value_t *jl_get_latest_binding_value_if_resolved_debug_only(jl_binding_t *b)
 {
-    jl_binding_t *b = NULL;
-    jl_module_t *owner = NULL;
-    JL_LOCK(&m->lock);
-    int i = (int)m->usings.len - 1;
-    JL_UNLOCK(&m->lock);
-    for (; i >= 0; --i) {
-        JL_LOCK(&m->lock);
-        jl_module_t *imp = module_usings_getidx(m, i);
-        JL_UNLOCK(&m->lock);
-        jl_binding_t *tempb = jl_get_module_binding(imp, var, 0);
-        if (tempb != NULL && tempb->exportp) {
-            tempb = jl_resolve_owner(NULL, imp, var, st); // find the owner for tempb
-            if (tempb == NULL)
-                // couldn't resolve; try next using (see issue #6105)
-                continue;
-            assert(jl_atomic_load_relaxed(&tempb->owner) == tempb);
-            if (b != NULL && !tempb->deprecated && !b->deprecated && !eq_bindings(tempb, b)) {
-                if (warn) {
-                    // set usingfailed=1 to avoid repeating this warning
-                    // the owner will still be NULL, so it can be later imported or defined
-                    tempb = jl_get_module_binding(m, var, 1);
-                    tempb->usingfailed = 1;
-                    jl_printf(JL_STDERR,
-                              "WARNING: both %s and %s export \"%s\"; uses of it in module %s must be qualified\n",
-                              jl_symbol_name(owner->name),
-                              jl_symbol_name(imp->name), jl_symbol_name(var),
-                              jl_symbol_name(m->name));
-                }
-                return NULL;
-            }
-            if (owner == NULL || !tempb->deprecated) {
-                owner = imp;
-                b = tempb;
-            }
-        }
+    // See note above. Use for debug/reflection purposes only.
+    if (!b)
+        return NULL;
+    jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions);
+    if (!bpart)
+        return NULL;
+    size_t max_world = jl_atomic_load_relaxed(&bpart->max_world);
+    if (max_world != ~(size_t)0)
+        return NULL;
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_guard(kind))
+        return NULL;
+    if (jl_bkind_is_some_import(kind))
+        return NULL;
+    if (jl_bkind_is_some_constant(kind)) {
+        return bpart->restriction;
     }
-    *from = owner;
-    return b;
+    return jl_atomic_load_relaxed(&b->value);
+}
+
+JL_DLLEXPORT jl_value_t *jl_bpart_get_restriction_value(jl_binding_partition_t *bpart)
+{
+    jl_value_t *v = bpart->restriction;
+    if (!v)
+        jl_throw(jl_undefref_exception);
+    return v;
 }
 
 // for error message printing: look up the module that exported a binding to m as var
 // this might not be the same as the owner of the binding, since the binding itself may itself have been imported from elsewhere
-static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var)
+static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b)
 {
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 != b && !b->imported) {
-        // for implicitly imported globals, try to re-resolve it to find the module we got it from most directly
-        jl_module_t *from = NULL;
-        b = using_resolve_binding(m, var, &from, NULL, 0);
-        if (b) {
-            if (b2 == NULL || jl_atomic_load_relaxed(&b->owner) == jl_atomic_load_relaxed(&b2->owner))
-                return from;
-            // if we did not find it (or accidentally found a different one), ignore this
-        }
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_explicit_import(kind) || kind == PARTITION_KIND_IMPLICIT_GLOBAL) {
+        return ((jl_binding_t*)bpart->restriction)->globalref->mod;
     }
-    return m;
+    if (kind == PARTITION_KIND_IMPLICIT_CONST) {
+        struct implicit_search_resolution resolution = jl_resolve_implicit_import(b, NULL, jl_current_task->world_age, 1);
+        return resolution.debug_only_import_from;
+    }
+    return b->globalref->mod;
 }
 
-static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t *b);
-
-// get binding for reading. might return NULL for unbound.
-static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m, jl_sym_t *var, modstack_t *st)
+// Look at the given binding and decide whether to add a new method to an existing generic function
+// or ask for the creation of a new generic function (NULL return), checking various error conditions
+// along the way.
+JL_DLLEXPORT jl_value_t *jl_get_existing_strong_gf(jl_binding_t *b, size_t new_world)
 {
-    if (b == NULL)
-        b = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 == NULL) {
-        if (b->usingfailed)
+    assert(new_world > jl_atomic_load_relaxed(&jl_world_counter));
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, new_world);
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_constant(kind) && kind != PARTITION_KIND_IMPLICIT_CONST)
+        return bpart->restriction;
+    if (jl_bkind_is_some_guard(kind) || kind == PARTITION_KIND_DECLARED) {
+        check_safe_newbinding(b->globalref->mod, b->globalref->name);
+        return NULL;
+    }
+    if (!jl_bkind_is_some_import(kind)) {
+        jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(b->globalref->name));
+    }
+    jl_binding_t *ownerb = b;
+    jl_walk_binding_inplace(&ownerb, &bpart, new_world);
+    jl_value_t *f = NULL;
+    if (jl_bkind_is_some_constant(jl_binding_kind(bpart)))
+        f = bpart->restriction;
+    if (f == NULL) {
+        if (jl_bkind_is_some_implicit(kind)) {
+            check_safe_newbinding(b->globalref->mod, b->globalref->name);
             return NULL;
-        modstack_t top = { m, var, st };
-        modstack_t *tmp = st;
-        for (; tmp != NULL; tmp = tmp->prev) {
-            if (tmp->m == m && tmp->var == var) {
-                // import cycle without finding actual location
-                return NULL;
-            }
         }
-        jl_module_t *from = NULL; // for error message printing
-        b2 = using_resolve_binding(m, var, &from, &top, 1);
-        if (b2 == NULL)
+        jl_module_t *from = jl_binding_dbgmodule(b);
+        assert(from); // Can only be NULL if implicit, which we excluded above
+        jl_errorf("invalid method definition in %s: exported function %s.%s does not exist",
+                    jl_module_debug_name(b->globalref->mod), jl_module_debug_name(from), jl_symbol_name(b->globalref->name));
+    }
+    int istype = f && jl_is_type(f);
+    if (!istype) {
+        if (jl_bkind_is_some_implicit(kind)) {
+            check_safe_newbinding(b->globalref->mod, b->globalref->name);
             return NULL;
-        assert(from);
-        JL_GC_PROMISE_ROOTED(from); // gc-analysis does not understand output parameters
-        if (b2->deprecated) {
-            if (jl_atomic_load_relaxed(&b2->value) == jl_nothing) {
-                // silently skip importing deprecated values assigned to nothing (to allow later mutation)
-                return NULL;
-            }
         }
-        // do a full import to prevent the result of this lookup from
-        // changing, for example if this var is assigned to later.
-        jl_binding_t *owner = NULL;
-        if (!jl_atomic_cmpswap(&b->owner, &owner, b2)) {
-            // concurrent import
-            return owner;
-        }
-        if (b2->deprecated) {
-            b->deprecated = 1; // we will warn about this below, but we might want to warn at the use sites too
-            if (m != jl_main_module && m != jl_base_module &&
-                jl_options.depwarn != JL_OPTIONS_DEPWARN_OFF) {
-                /* with #22763, external packages wanting to replace
-                   deprecated Base bindings should simply export the new
-                   binding */
-                jl_printf(JL_STDERR,
-                          "WARNING: using deprecated binding %s.%s in %s.\n",
-                          jl_symbol_name(from->name), jl_symbol_name(var),
-                          jl_symbol_name(m->name));
-                jl_binding_dep_message(from, var, b2);
-            }
+        else if (kind != PARTITION_KIND_IMPORTED) {
+            // TODO: we might want to require explicitly importing types to add constructors
+            //       or we might want to drop this error entirely
+            jl_module_t *from = jl_binding_dbgmodule(b);
+            assert(from); // Can only be NULL if implicit, which we excluded above
+            jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended",
+                        jl_module_debug_name(b->globalref->mod), jl_module_debug_name(from), jl_symbol_name(b->globalref->name));
         }
     }
-    assert(jl_atomic_load_relaxed(&b2->owner) == b2);
-    return b2;
-}
-
-JL_DLLEXPORT jl_binding_t *jl_get_binding_if_bound(jl_module_t *m, jl_sym_t *var)
-{
-    jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b == NULL ? NULL : jl_atomic_load_relaxed(&b->owner);
+    else if (kind != PARTITION_KIND_IMPORTED) {
+        int should_error = strcmp(jl_symbol_name(b->globalref->name), "=>") == 0;
+        jl_module_t *from = jl_binding_dbgmodule(b);
+        if (should_error) {
+            jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended",
+                        jl_module_debug_name(b->globalref->mod), from ? jl_module_debug_name(from) : "<multiple modules>", jl_symbol_name(b->globalref->name));
+        }
+        else if (!(jl_atomic_fetch_or_relaxed(&b->flags, BINDING_FLAG_DID_PRINT_IMPLICIT_IMPORT_ADMONITION) &
+                                              BINDING_FLAG_DID_PRINT_IMPLICIT_IMPORT_ADMONITION)) {
+            jl_printf(JL_STDERR, "WARNING: Constructor for type \"%s\" was extended in `%s` without explicit qualification or import.\n"
+                                 "  NOTE: Assumed \"%s\" refers to `%s.%s`. This behavior is deprecated and may differ in future versions.\n"
+                                 "  NOTE: This behavior may have differed in Julia versions prior to 1.12.\n"
+                                 "  Hint: If you intended to create a new generic function of the same name, use `function %s end`.\n"
+                                 "  Hint: To silence the warning, qualify `%s` as `%s.%s` in the method signature or explicitly `import %s: %s`.\n",
+                jl_symbol_name(b->globalref->name), jl_module_debug_name(b->globalref->mod),
+                jl_symbol_name(b->globalref->name), jl_module_debug_name(from), jl_symbol_name(b->globalref->name),
+                jl_symbol_name(b->globalref->name), jl_symbol_name(b->globalref->name), jl_module_debug_name(from), jl_symbol_name(b->globalref->name),
+                jl_module_debug_name(from), jl_symbol_name(b->globalref->name));
+        }
+    }
+    return f;
 }
 
-
-// get the current likely owner of binding when accessing m.var, without resolving the binding (it may change later)
-JL_DLLEXPORT jl_binding_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
-{
-    jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    jl_module_t *from = m;
-    if (b == NULL || (!b->usingfailed && jl_atomic_load_relaxed(&b->owner) == NULL))
-        b = using_resolve_binding(m, var, &from, NULL, 0);
-    else
-        b = jl_atomic_load_relaxed(&b->owner);
-    return b;
-}
+static void jl_binding_dep_message(jl_binding_t *b);
 
 // get type of binding m.var, without resolving the binding
 JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
     if (b == NULL)
         return jl_nothing;
-    b = jl_atomic_load_relaxed(&b->owner);
-    if (b == NULL)
+    jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_guard(kind) || kind == PARTITION_KIND_DECLARED)
         return jl_nothing;
-    jl_value_t *ty = jl_atomic_load_relaxed(&b->ty);
-    return ty ? ty : jl_nothing;
+    if (jl_bkind_is_some_constant(kind)) {
+        // TODO: We would like to return the type of the constant, but
+        // currently code relies on this returning any to bypass conversion
+        // before an attempted assignment to a constant.
+        // return bpart->restriction;
+        return (jl_value_t*)jl_any_type;
+    }
+    return bpart->restriction;
 }
 
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m, jl_sym_t *var)
 {
-    return jl_resolve_owner(NULL, m, var, NULL);
-}
-
-JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var)
-{
-    jl_binding_t *b = jl_get_binding(m, var);
-    if (b == NULL)
-        jl_undefined_var_error(var);
-    // XXX: this only considers if the original is deprecated, not the binding in m
-    if (b->deprecated)
-        jl_binding_deprecation_warning(m, var, b);
-    return b;
+    return jl_get_module_binding(m, var, 1);
 }
 
 JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var)
@@ -458,16 +1165,19 @@ JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && b->imported;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && jl_binding_kind(bpart) == PARTITION_KIND_IMPORTED;
 }
 
-extern const char *jl_filename;
-extern int jl_lineno;
+extern _Atomic(const char *) jl_filename;
+extern _Atomic(int) jl_lineno;
 
 static char const dep_message_prefix[] = "_dep_message_";
 
-static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t *b)
+static void jl_binding_dep_message(jl_binding_t *b)
 {
+    jl_module_t *m = b->globalref->mod;
+    jl_sym_t *name = b->globalref->name;
     size_t prefix_len = strlen(dep_message_prefix);
     size_t name_len = strlen(jl_symbol_name(name));
     char *dep_binding_name = (char*)alloca(prefix_len+name_len+1);
@@ -477,7 +1187,7 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
     jl_binding_t *dep_message_binding = jl_get_binding(m, jl_symbol(dep_binding_name));
     jl_value_t *dep_message = NULL;
     if (dep_message_binding != NULL)
-        dep_message = jl_atomic_load_relaxed(&dep_message_binding->value);
+        dep_message = jl_get_binding_value(dep_message_binding);
     JL_GC_PUSH1(&dep_message);
     if (dep_message != NULL) {
         if (jl_is_string(dep_message)) {
@@ -488,7 +1198,7 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
         }
     }
     else {
-        jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+        jl_value_t *v = jl_get_binding_value(b);
         dep_message = v; // use as gc-root
         if (v) {
             if (jl_is_type(v) || jl_is_module(v)) {
@@ -497,14 +1207,14 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
                 jl_printf(JL_STDERR, " instead.");
             }
             else {
-                jl_methtable_t *mt = jl_gf_mtable(v);
-                if (mt != NULL) {
+                jl_typename_t *tn = ((jl_datatype_t*)jl_typeof(v))->name;
+                if (tn != NULL) {
                     jl_printf(JL_STDERR, ", use ");
-                    if (mt->module != jl_core_module) {
-                        jl_static_show(JL_STDERR, (jl_value_t*)mt->module);
+                    if (tn->module != jl_core_module) {
+                        jl_static_show(JL_STDERR, (jl_value_t*)tn->module);
                         jl_printf(JL_STDERR, ".");
                     }
-                    jl_printf(JL_STDERR, "%s", jl_symbol_name(mt->name));
+                    jl_printf(JL_STDERR, "%s", jl_symbol_name(tn->singletonname));
                     jl_printf(JL_STDERR, " instead.");
                 }
             }
@@ -514,172 +1224,343 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
     JL_GC_POP();
 }
 
+JL_DLLEXPORT void check_safe_import_from(jl_module_t *m)
+{
+    if (jl_options.incremental && jl_generating_output() && m == jl_main_module) {
+        jl_errorf("Any `import` or `using` from `Main` is prohibited during incremental compilation.");
+    }
+}
+
+static int eq_bindings(jl_binding_partition_t *owner, jl_binding_t *alias, size_t world)
+{
+    jl_binding_t *ownerb = NULL;
+    jl_binding_partition_t *alias_bpart = jl_get_binding_partition(alias, world);
+    if (owner == alias_bpart)
+        return 1;
+    jl_walk_binding_inplace(&ownerb, &owner, world);
+    jl_walk_binding_inplace(&alias, &alias_bpart, world);
+    if (jl_bkind_is_some_constant(jl_binding_kind(owner)) &&
+        jl_bkind_is_some_constant(jl_binding_kind(alias_bpart)) &&
+        owner->restriction &&
+        alias_bpart->restriction == owner->restriction)
+        return 1;
+    return owner == alias_bpart;
+}
+
 // NOTE: we use explici since explicit is a C++ keyword
-static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *asname, jl_sym_t *s, int explici)
+JL_DLLEXPORT void jl_module_import(jl_task_t *ct, jl_module_t *to, jl_module_t *from, jl_sym_t *asname, jl_sym_t *s, int explici)
 {
+    check_safe_import_from(from);
     jl_binding_t *b = jl_get_binding(from, s);
-    if (b == NULL) {
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (bpart->kind & PARTITION_FLAG_DEPRECATED) {
+        if (jl_get_binding_value(b) == jl_nothing) {
+            // silently skip importing deprecated values assigned to nothing (to allow later mutation)
+            return;
+        }
+        else if (to != jl_main_module && to != jl_base_module &&
+                    jl_options.depwarn != JL_OPTIONS_DEPWARN_OFF) {
+            /* with #22763, external packages wanting to replace
+                deprecated Base bindings should simply export the new
+                binding */
+            jl_printf(JL_STDERR,
+                        "WARNING: importing deprecated binding %s.%s into %s%s%s.\n",
+                        jl_symbol_name(from->name), jl_symbol_name(s),
+                        jl_symbol_name(to->name),
+                        asname == s ? "" : " as ",
+                        asname == s ? "" : jl_symbol_name(asname));
+            jl_binding_dep_message(b);
+        }
+    }
+
+    jl_binding_t *ownerb = b;
+    jl_binding_partition_t *ownerbpart = bpart;
+    jl_walk_binding_inplace(&ownerb, &ownerbpart, ct->world_age);
+
+    if (jl_bkind_is_some_guard(jl_binding_kind(ownerbpart))) {
         jl_printf(JL_STDERR,
-                  "WARNING: could not import %s.%s into %s\n",
+                  "WARNING: Imported binding %s.%s was undeclared at import time during import to %s.\n",
                   jl_symbol_name(from->name), jl_symbol_name(s),
                   jl_symbol_name(to->name));
     }
+
+    jl_binding_t *bto = jl_get_module_binding(to, asname, 1);
+    if (bto == b) {
+        // importing a binding on top of itself. harmless.
+        return;
+    }
+    JL_LOCK(&world_counter_lock);
+    size_t new_world = jl_atomic_load_acquire(&jl_world_counter)+1;
+    jl_binding_partition_t *btopart = jl_get_binding_partition(bto, new_world);
+    enum jl_partition_kind btokind = jl_binding_kind(btopart);
+    if (jl_bkind_is_some_implicit(btokind)) {
+        jl_binding_partition_t *new_bpart = jl_replace_binding_locked(bto, btopart, (jl_value_t*)b, (explici != 0) ? PARTITION_KIND_IMPORTED : PARTITION_KIND_EXPLICIT, new_world);
+        if (jl_atomic_load_relaxed(&new_bpart->max_world) == ~(size_t)0)
+            jl_add_binding_backedge(b, (jl_value_t*)bto);
+        jl_atomic_store_release(&jl_world_counter, new_world);
+    }
     else {
-        assert(jl_atomic_load_relaxed(&b->owner) == b);
-        if (b->deprecated) {
-            if (jl_atomic_load_relaxed(&b->value) == jl_nothing) {
-                // silently skip importing deprecated values assigned to nothing (to allow later mutation)
-                return;
+        if (eq_bindings(bpart, bto, new_world)) {
+            // already imported - potentially upgrade _EXPLICIT to _IMPORTED
+            if (btokind == PARTITION_KIND_EXPLICIT && explici != 0) {
+                jl_replace_binding_locked(bto, btopart, (jl_value_t*)b, PARTITION_KIND_IMPORTED, new_world);
+                jl_atomic_store_release(&jl_world_counter, new_world);
             }
-            else if (to != jl_main_module && to != jl_base_module &&
-                     jl_options.depwarn != JL_OPTIONS_DEPWARN_OFF) {
-                /* with #22763, external packages wanting to replace
-                   deprecated Base bindings should simply export the new
-                   binding */
-                jl_printf(JL_STDERR,
-                          "WARNING: importing deprecated binding %s.%s into %s%s%s.\n",
-                          jl_symbol_name(from->name), jl_symbol_name(s),
-                          jl_symbol_name(to->name),
-                          asname == s ? "" : " as ",
-                          asname == s ? "" : jl_symbol_name(asname));
-                jl_binding_dep_message(from, s, b);
-            }
-        }
-
-        jl_binding_t *bto = jl_get_module_binding(to, asname, 1);
-        if (bto == b) {
-            // importing a binding on top of itself. harmless.
-            return;
         }
-        jl_binding_t *ownerto = NULL;
-        if (jl_atomic_cmpswap(&bto->owner, &ownerto, b)) {
-            bto->imported |= (explici != 0);
-            bto->deprecated |= b->deprecated; // we already warned about this above, but we might want to warn at the use sites too
+        else if (jl_bkind_is_some_import(btokind)) {
+            // already imported from somewhere else
+            jl_printf(JL_STDERR,
+                        "WARNING: ignoring conflicting import of %s.%s into %s\n",
+                        jl_symbol_name(from->name), jl_symbol_name(s),
+                        jl_symbol_name(to->name));
         }
         else {
-            if (eq_bindings(b, bto)) {
-                // already imported
-                bto->imported |= (explici != 0);
-            }
-            else if (ownerto != bto) {
-                // already imported from somewhere else
-                jl_printf(JL_STDERR,
-                          "WARNING: ignoring conflicting import of %s.%s into %s\n",
-                          jl_symbol_name(from->name), jl_symbol_name(s),
-                          jl_symbol_name(to->name));
-            }
-            else {
-                // conflict with name owned by destination module
-                jl_printf(JL_STDERR,
-                          "WARNING: import of %s.%s into %s conflicts with an existing identifier; ignored.\n",
-                          jl_symbol_name(from->name), jl_symbol_name(s),
-                          jl_symbol_name(to->name));
-            }
+            // conflict with name owned by destination module
+            jl_printf(JL_STDERR,
+                        "WARNING: import of %s.%s into %s conflicts with an existing identifier; ignored.\n",
+                        jl_symbol_name(from->name), jl_symbol_name(s),
+                        jl_symbol_name(to->name));
         }
     }
+    JL_UNLOCK(&world_counter_lock);
+}
+
+JL_DLLEXPORT void jl_import_module(jl_task_t *ct, jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym_t *asname)
+{
+    assert(m);
+    jl_sym_t *name = asname ? asname : import->name;
+    // TODO: this is a bit race-y with what error message we might print
+    jl_binding_t *b = jl_get_module_binding(m, name, 1);
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, world);
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (!jl_bkind_is_some_implicit(kind) && kind != PARTITION_KIND_DECLARED) {
+        // Unlike regular constant declaration, we allow this as long as we eventually end up at a constant.
+        jl_walk_binding_inplace(&b, &bpart, world);
+        if (jl_bkind_is_some_constant(jl_binding_kind(bpart))) {
+            // Already declared (e.g. on another thread) or imported.
+            if (bpart->restriction == (jl_value_t*)import)
+                return;
+        }
+        jl_errorf("importing %s into %s conflicts with an existing global",
+                    jl_symbol_name(name), jl_symbol_name(m->name));
+    }
+    jl_declare_constant_val2(b, m, name, (jl_value_t*)import, PARTITION_KIND_CONST_IMPORT);
 }
 
-JL_DLLEXPORT void jl_module_import(jl_module_t *to, jl_module_t *from, jl_sym_t *s)
-{
-    module_import_(to, from, s, s, 1);
-}
-
-JL_DLLEXPORT void jl_module_import_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname)
-{
-    module_import_(to, from, asname, s, 1);
-}
-
-JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s)
+void jl_add_usings_backedge(jl_module_t *from, jl_module_t *to)
 {
-    module_import_(to, from, s, s, 0);
+    JL_LOCK(&from->lock);
+    if (from->usings_backedges == jl_nothing) {
+        from->usings_backedges = (jl_value_t*)jl_alloc_vec_any(0);
+        jl_gc_wb(from, from->usings_backedges);
+    }
+    jl_array_ptr_1d_push((jl_array_t*)from->usings_backedges, (jl_value_t*)to);
+    JL_UNLOCK(&from->lock);
 }
 
-JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname)
+void jl_module_initial_using(jl_module_t *to, jl_module_t *from)
 {
-    module_import_(to, from, asname, s, 0);
+    struct _jl_module_using new_item = {
+        .mod = from,
+        .min_world = 0,
+        .max_world = ~(size_t)0,
+        .flags = 0
+    };
+    arraylist_grow(&to->usings, sizeof(struct _jl_module_using)/sizeof(void*));
+    memcpy(&to->usings.items[to->usings.len-4], &new_item, sizeof(struct _jl_module_using));
+    jl_gc_wb(to, from);
+    jl_add_usings_backedge(from, to);
 }
 
-
-JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from)
+JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from, size_t flags)
 {
     if (to == from)
         return;
+    check_safe_import_from(from);
+    JL_LOCK(&world_counter_lock);
     JL_LOCK(&to->lock);
-    for (size_t i = 0; i < to->usings.len; i++) {
-        if (from == to->usings.items[i]) {
+
+    // Check if this module is already in the usings list
+    size_t existing_idx = (size_t)-1;
+    for (size_t i = 0; i < module_usings_length(to); i++) {
+        if (from == module_usings_getmod(to, i)) {
+            existing_idx = i;
+            break;
+        }
+    }
+
+    size_t new_world = jl_atomic_load_acquire(&jl_world_counter)+1;
+
+    if (existing_idx == (size_t)-1) {
+        // Add new using entry
+        struct _jl_module_using new_item = {
+            .mod = from,
+            .min_world = new_world,
+            .max_world = ~(size_t)0,
+            .flags = flags
+        };
+        arraylist_grow(&to->usings, sizeof(struct _jl_module_using)/sizeof(void*));
+        memcpy(&to->usings.items[to->usings.len-4], &new_item, sizeof(struct _jl_module_using));
+        jl_gc_wb(to, from);
+    } else {
+        // Update existing entry to add new flags
+        struct _jl_module_using *existing = module_usings_getidx(to, existing_idx);
+        // Early out if reexport is already set (strongest form), or if all requested flags are already set
+        if ((existing->flags & JL_MODULE_USING_REEXPORT) || (existing->flags & flags) == flags) {
             JL_UNLOCK(&to->lock);
+            JL_UNLOCK(&world_counter_lock);
             return;
         }
+        existing->flags |= flags;
     }
-    arraylist_push(&to->usings, from);
-    jl_gc_wb(to, from);
+
+    // Set has_reexports flag if this is a reexport using
+    if (flags & JL_MODULE_USING_REEXPORT) {
+        jl_atomic_store_relaxed(&to->has_reexports, 1);
+    }
+
     JL_UNLOCK(&to->lock);
 
-    // print a warning if something visible via this "using" conflicts with
-    // an existing identifier. note that an identifier added later may still
-    // silently override a "using" name. see issue #2054.
+    // Go through all exported bindings. If we have a binding for this in the
+    // importing module and it is some import or guard, we need to recompute
+    // it.
     jl_svec_t *table = jl_atomic_load_relaxed(&from->bindings);
     for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
-        if (b->exportp && (jl_atomic_load_relaxed(&b->owner) == b || b->imported)) {
+        jl_binding_partition_t *frombpart = jl_get_binding_partition(b, new_world);
+        if (jl_bpart_is_exported(frombpart->kind)) {
             jl_sym_t *var = b->globalref->name;
             jl_binding_t *tob = jl_get_module_binding(to, var, 0);
-            if (tob && jl_atomic_load_relaxed(&tob->owner) != NULL &&
-                // don't warn for conflicts with the module name itself.
-                // see issue #4715
-                var != to->name &&
-                !eq_bindings(jl_atomic_load_relaxed(&tob->owner), b)) {
-                jl_printf(JL_STDERR,
-                          "WARNING: using %s.%s in module %s conflicts with an existing identifier.\n",
-                          jl_symbol_name(from->name), jl_symbol_name(var),
-                          jl_symbol_name(to->name));
+            if (tob) {
+                jl_binding_partition_t *tobpart = jl_atomic_load_relaxed(&tob->partitions);
+                if (tobpart) {
+                    enum jl_partition_kind kind = jl_binding_kind(tobpart);
+                    if (jl_bkind_is_some_implicit(kind)) {
+                        jl_replace_binding_locked(tob, tobpart, NULL, PARTITION_FAKE_KIND_IMPLICIT_RECOMPUTE, new_world);
+                    }
+                }
             }
         }
         table = jl_atomic_load_relaxed(&from->bindings);
     }
+
+    jl_add_usings_backedge(from, to);
+
+    jl_atomic_store_release(&jl_world_counter, new_world);
+    JL_UNLOCK(&world_counter_lock);
 }
 
-JL_DLLEXPORT void jl_module_export(jl_module_t *from, jl_sym_t *s)
+JL_DLLEXPORT jl_value_t *jl_get_module_usings_backedges(jl_module_t *m)
 {
-    jl_binding_t *b = jl_get_module_binding(from, s, 1);
-    b->exportp = 1;
+    // We assume the caller holds the world_counter_lock, which is the only place we set this
+    // TODO: We may want to make this more precise with the module lock
+    return m->usings_backedges;
 }
 
-JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var)
+JL_DLLEXPORT size_t jl_module_scanned_methods_length(jl_module_t *m)
 {
-    jl_binding_t *b = jl_get_binding(m, var);
-    return b && (jl_atomic_load_relaxed(&b->value) != NULL);
+    JL_LOCK(&m->lock);
+    size_t len = 0;
+    if (m->scanned_methods != jl_nothing)
+        len = jl_array_len(m->scanned_methods);
+    JL_UNLOCK(&m->lock);
+    return len;
 }
 
-JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var)
+JL_DLLEXPORT jl_value_t *jl_module_scanned_methods_getindex(jl_module_t *m, size_t i)
 {
-    jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && (b->exportp || jl_atomic_load_relaxed(&b->owner) == b);
+    JL_LOCK(&m->lock);
+    assert(m->scanned_methods != jl_nothing);
+    jl_value_t *ret = jl_array_ptr_ref(m->scanned_methods, i-1);
+    JL_UNLOCK(&m->lock);
+    return ret;
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_module_binding_or_nothing(jl_module_t *m, jl_sym_t *s)
+{
+    jl_binding_t *b = jl_get_module_binding(m, s, 0);
+    if (!b)
+        return jl_nothing;
+    return (jl_value_t*)b;
+}
+
+int jl_module_public_(jl_module_t *from, jl_sym_t *s, int exported, size_t new_world)
+{
+    // caller must hold world_counter_lock
+    jl_binding_t *b = jl_get_module_binding(from, s, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, new_world);
+    int was_exported = (bpart->kind & PARTITION_FLAG_EXPORTED) != 0;
+    if (jl_atomic_load_relaxed(&b->flags) & BINDING_FLAG_PUBLICP) {
+        // check for conflicting declarations
+        if (was_exported && !exported)
+            jl_errorf("cannot declare %s.%s public; it is already declared exported",
+                      jl_symbol_name(from->name), jl_symbol_name(s));
+        if (!was_exported && exported)
+            jl_errorf("cannot declare %s.%s exported; it is already declared public",
+                      jl_symbol_name(from->name), jl_symbol_name(s));
+    }
+    jl_atomic_fetch_or_relaxed(&b->flags, BINDING_FLAG_PUBLICP);
+    if (was_exported != exported) {
+        jl_replace_binding_locked2(b, bpart, bpart->restriction, bpart->kind | PARTITION_FLAG_EXPORTED, new_world);
+        return 1;
+    }
+    return 0;
+}
+
+JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var, int allow_import) // unlike most queries here, this is currently seq_cst
+{
+    jl_binding_t *b = jl_get_module_binding(m, var, allow_import);
+    if (!b)
+        return 0;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!bpart)
+        return 0;
+    if (!allow_import) {
+        if (!bpart || jl_bkind_is_some_import(jl_binding_kind(bpart)))
+            return 0;
+    } else {
+        jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    }
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    if (jl_bkind_is_some_guard(kind))
+        return 0;
+    if (jl_bkind_is_defined_constant(kind)) {
+        if (__unlikely(kind == PARTITION_KIND_BACKDATED_CONST)) {
+            return !(jl_current_task->ptls->in_pure_callback || jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR);
+        }
+        // N.B.: No backdated admonition for isdefined
+        return 1;
+    }
+    return jl_atomic_load(&b->value) != NULL;
 }
 
 JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && b->exportp;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && jl_bpart_is_exported(bpart->kind);
 }
 
-JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var)
+JL_DLLEXPORT int jl_module_public_p(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && jl_atomic_load_relaxed(&b->owner) != NULL;
+    return b && (jl_atomic_load_relaxed(&b->flags) & BINDING_FLAG_PUBLICP);
 }
 
-static uint_t bindingkey_hash(size_t idx, jl_svec_t *data)
+uint_t bindingkey_hash(size_t idx, jl_value_t *data)
 {
-    jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx);
+    jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx); // This must always happen inside the lock
     jl_sym_t *var = b->globalref->name;
     return var->hash;
 }
 
-static int bindingkey_eq(size_t idx, const void *var, jl_svec_t *data, uint_t hv)
+static int bindingkey_eq(size_t idx, const void *var, jl_value_t *data, uint_t hv)
 {
+    if (idx >= jl_svec_len(data))
+        return 0; // We got a OOB access, probably due to a data race
     jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx);
     jl_sym_t *name = b->globalref->name;
     return var == name;
@@ -689,11 +1570,12 @@ JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m, jl_sym_t *var,
 {
     uint_t hv = var->hash;
     for (int locked = 0; ; locked++) {
-        jl_array_t *bindingkeyset = jl_atomic_load_acquire(&m->bindingkeyset);
+        jl_genericmemory_t *bindingkeyset = jl_atomic_load_acquire(&m->bindingkeyset);
         jl_svec_t *bindings = jl_atomic_load_relaxed(&m->bindings);
-        ssize_t idx = jl_smallintset_lookup(bindingkeyset, bindingkey_eq, var, bindings, hv); // acquire
+        ssize_t idx = jl_smallintset_lookup(bindingkeyset, bindingkey_eq, var, (jl_value_t*)bindings, hv, 0); // acquire
         if (idx != -1) {
             jl_binding_t *b = (jl_binding_t*)jl_svecref(bindings, idx); // relaxed
+            JL_GC_PROMISE_ROOTED(b);
             if (locked)
                 JL_UNLOCK(&m->lock);
             return b;
@@ -725,7 +1607,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m, jl_sym_t *var,
             jl_binding_t *b = new_binding(m, var);
             assert(jl_svecref(bindings, i) == jl_nothing);
             jl_svecset(bindings, i, b); // relaxed
-            jl_smallintset_insert(&m->bindingkeyset, (jl_value_t*)m, bindingkey_hash, i, bindings); // release
+            jl_smallintset_insert(&m->bindingkeyset, (jl_value_t*)m, bindingkey_hash, i, (jl_value_t*)bindings); // release
             JL_UNLOCK(&m->lock);
             return b;
         }
@@ -733,23 +1615,27 @@ JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m, jl_sym_t *var,
 }
 
 
-JL_DLLEXPORT jl_value_t *jl_get_globalref_value(jl_globalref_t *gr)
+// get the value (or null) in the world
+jl_value_t *jl_get_globalref_value(jl_globalref_t *gr, size_t world)
 {
     jl_binding_t *b = gr->binding;
-    b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
-    // ignores b->deprecated
-    return b == NULL ? NULL : jl_atomic_load_relaxed(&b->value);
+    if (!b)
+        b = jl_get_module_binding(gr->mod, gr->name, 1);
+    return jl_get_binding_value_depwarn(b, world);
+}
+
+// get the value (or null) in the world
+jl_value_t *jl_get_global_value(jl_module_t *m, jl_sym_t *var, size_t world)
+{
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    return jl_get_binding_value_depwarn(b, world);
 }
 
+// get the global (or null) in the latest world
 JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
 {
-    jl_binding_t *b = jl_get_binding(m, var);
-    if (b == NULL)
-        return NULL;
-    // XXX: this only considers if the original is deprecated, not the binding in m
-    if (b->deprecated)
-        jl_binding_deprecation_warning(m, var, b);
-    return jl_atomic_load_relaxed(&b->value);
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    return jl_get_binding_value_depwarn(b, jl_atomic_load_acquire(&jl_world_counter));
 }
 
 JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
@@ -758,143 +1644,369 @@ JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *va
     jl_checked_assignment(bp, m, var, val);
 }
 
+void jl_set_initial_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT, int exported)
+{
+    // this function is only valid during initialization, so there is no risk of data races her are not too important to use
+    int kind = PARTITION_KIND_CONST | (exported ? PARTITION_FLAG_EXPORTED : 0);
+    // jl_declare_constant_val3(NULL, m, var, (jl_value_t*)jl_any_type, kind, 0);
+    jl_binding_t *bp = jl_get_module_binding(m, var, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(bp, 0);
+    assert(jl_atomic_load_relaxed(&bpart->min_world) == 0);
+    jl_atomic_store_relaxed(&bpart->max_world, ~(size_t)0); // jl_check_new_binding_implicit likely incorrectly truncated it
+    if (exported)
+        jl_atomic_fetch_or_relaxed(&bp->flags, BINDING_FLAG_PUBLICP);
+    bpart->kind = kind | (bpart->kind & PARTITION_MASK_FLAG);
+    bpart->restriction = val;
+    jl_gc_wb(bpart, val);
+}
+
 JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
 {
-    // this function is mostly only used during initialization, so the data races here are not too important to us
-    jl_binding_t *bp = jl_get_binding_wr(m, var);
-    if (jl_atomic_load_relaxed(&bp->value) == NULL) {
-        jl_value_t *old_ty = NULL;
-        jl_atomic_cmpswap_relaxed(&bp->ty, &old_ty, (jl_value_t*)jl_any_type);
-        uint8_t constp = 0;
-        // if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) {
-        if (constp = bp->constp, bp->constp = 1, constp == 0) {
-            jl_value_t *old = NULL;
-            if (jl_atomic_cmpswap(&bp->value, &old, val)) {
-                jl_gc_wb_binding(bp, val);
-                return;
-            }
+    // this function is dangerous and unsound. do not use.
+    jl_binding_t *bp = jl_get_module_binding(m, var, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(bp, jl_current_task->world_age);
+    jl_atomic_store_relaxed(&bpart->min_world, 0);
+    jl_atomic_store_release(&bpart->max_world, ~(size_t)0);
+    bpart->kind = PARTITION_KIND_CONST | (bpart->kind & PARTITION_MASK_FLAG);
+    bpart->restriction = val;
+    jl_gc_wb(bpart, val);
+}
+
+void jl_invalidate_binding_refs(jl_globalref_t *ref, jl_binding_partition_t *invalidated_bpart, jl_binding_partition_t *new_bpart, size_t new_world)
+{
+    jl_value_t *invalidate_code_for_globalref = NULL;
+    if (jl_base_module != NULL)
+        invalidate_code_for_globalref = jl_get_global(jl_base_module, jl_symbol("invalidate_code_for_globalref!"));
+    if (!invalidate_code_for_globalref)
+        jl_error("Binding invalidation is not permitted during bootstrap.");
+    jl_value_t **fargs;
+    JL_GC_PUSHARGS(fargs, 5);
+    fargs[0] = (jl_value_t*)invalidate_code_for_globalref;
+    fargs[1] = (jl_value_t*)ref;
+    fargs[2] = (jl_value_t*)invalidated_bpart;
+    fargs[3] = (jl_value_t*)new_bpart;
+    fargs[4] = jl_box_ulong(new_world);
+    jl_apply(fargs, 5);
+    JL_GC_POP();
+}
+
+JL_DLLEXPORT void jl_add_binding_backedge(jl_binding_t *b, jl_value_t *edge)
+{
+    JL_LOCK(&b->globalref->mod->lock);
+    if (!b->backedges) {
+        b->backedges = jl_alloc_vec_any(0);
+        jl_gc_wb(b, b->backedges);
+    } else if (jl_array_len(b->backedges) > 0 &&
+               jl_array_ptr_ref(b->backedges, jl_array_len(b->backedges)-1) == edge) {
+        // Optimization: Deduplicate repeated insertion of the same edge (e.g. during
+        // definition of a method that contains many references to the same global)
+        JL_UNLOCK(&b->globalref->mod->lock);
+        return;
+    }
+    jl_array_ptr_1d_push(b->backedges, edge);
+    JL_UNLOCK(&b->globalref->mod->lock);
+}
+
+// Called for all GlobalRefs found in lowered code. Adds backedges for cross-module
+// GlobalRefs.
+JL_DLLEXPORT int jl_maybe_add_binding_backedge(jl_binding_t *b, jl_value_t *edge, jl_method_t *for_method)
+{
+    if (!edge)
+        return 0;
+    jl_module_t *defining_module = for_method->module;
+    // N.B.: The logic for evaluating whether a backedge is required must
+    // match the invalidation logic.
+    if (b->globalref->mod == defining_module) {
+        // No backedge required - invalidation will forward scan
+        jl_atomic_fetch_or(&b->flags, BINDING_FLAG_ANY_IMPLICIT_EDGES);
+        if (!(jl_atomic_fetch_or(&for_method->did_scan_source, 0x2) & 0x2))
+            jl_add_scanned_method(for_method->module, for_method);
+        return 1;
+    }
+    jl_add_binding_backedge(b, (jl_value_t*)edge);
+    return 0;
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_replace_binding_locked(jl_binding_t *b,
+    jl_binding_partition_t *old_bpart, jl_value_t *restriction_val, enum jl_partition_kind kind, size_t new_world)
+{
+    // Copy flags from old bpart
+    return jl_replace_binding_locked2(b, old_bpart, restriction_val, (size_t)kind | (size_t)(old_bpart->kind & PARTITION_MASK_FLAG),
+        new_world);
+}
+
+extern JL_DLLEXPORT _Atomic(size_t) jl_first_image_replacement_world;
+JL_DLLEXPORT jl_binding_partition_t *jl_replace_binding_locked2(jl_binding_t *b,
+    jl_binding_partition_t *old_bpart, jl_value_t *restriction_val, size_t kind, size_t new_world)
+{
+    check_safe_newbinding(b->globalref->mod, b->globalref->name);
+
+    // Check if this is a replacing a binding in the system or a package image.
+    // Until the first such replacement, we can fast-path validation.
+    // For these purposes, we consider the `Main` module to be a non-sysimg module.
+    // This is legal, because we special case the `Main` in check_safe_import_from.
+    if (jl_object_in_image((jl_value_t*)b) && b->globalref->mod != jl_main_module && jl_atomic_load_relaxed(&jl_first_image_replacement_world) == ~(size_t)0) {
+        // During incremental compilation replacement of image bindings is forbidden;
+        // We use this to avoid inserting backedges while loading pkgimages.
+        // `check_safe_newbinding` checks an equivalent condition on `b->globalref->mod`,
+        // but doesn't quite query `jl_object_in_image`, so assert here to be extra sure.
+        assert(!(jl_options.incremental && jl_generating_output()));
+        jl_atomic_store_relaxed(&jl_first_image_replacement_world, new_world);
+    }
+
+    assert(jl_atomic_load_relaxed(&b->partitions) == old_bpart);
+    jl_binding_partition_t *new_bpart = new_binding_partition();
+    JL_GC_PUSH1(&new_bpart);
+    jl_atomic_store_relaxed(&new_bpart->min_world, new_world);
+    if ((kind & PARTITION_MASK_KIND) == PARTITION_FAKE_KIND_IMPLICIT_RECOMPUTE) {
+        assert(!restriction_val);
+        struct implicit_search_resolution resolution = jl_resolve_implicit_import(b, NULL, new_world, 0);
+        new_bpart->kind = resolution.ultimate_kind | (kind & PARTITION_MASK_FLAG);
+        // If the resolution indicates this should be reexported, add the implicit export flag
+        if (resolution.should_be_reexported) {
+            new_bpart->kind |= PARTITION_FLAG_IMPLICITLY_EXPORTED;
         }
+        new_bpart->restriction = resolution.binding_or_const;
+        assert(resolution.min_world <= new_world && resolution.max_world == ~(size_t)0);
+        if (new_bpart->kind == old_bpart->kind && new_bpart->restriction == old_bpart->restriction) {
+            JL_GC_POP();
+            return old_bpart;
+        }
+    }
+    else {
+        new_bpart->kind = kind;
+        new_bpart->restriction = restriction_val;
+        jl_gc_wb_fresh(new_bpart, restriction_val);
+    }
+    jl_atomic_store_release(&old_bpart->max_world, new_world-1);
+    jl_atomic_store_relaxed(&new_bpart->next, old_bpart);
+    jl_gc_wb_fresh(new_bpart, old_bpart);
+
+    if ((jl_bpart_is_exported(old_bpart->kind) || jl_bpart_is_exported(kind)) && jl_require_world != ~(size_t)0) {
+        jl_atomic_store_release(&b->globalref->mod->export_set_changed_since_require_world, 1);
+    }
+
+    jl_atomic_store_release(&b->partitions, new_bpart);
+    jl_gc_wb(b, new_bpart);
+    JL_GC_POP();
+
+    if (jl_typeinf_world != 1) {
+        jl_task_t *ct = jl_current_task;
+        size_t last_world = ct->world_age;
+        ct->world_age = jl_typeinf_world;
+        jl_invalidate_binding_refs(b->globalref, old_bpart, new_bpart, new_world-1);
+        ct->world_age = last_world;
+    }
+
+    return new_bpart;
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_replace_binding(jl_binding_t *b,
+        jl_binding_partition_t *old_bpart, jl_value_t *restriction_val, enum jl_partition_kind kind) {
+
+    JL_LOCK(&world_counter_lock);
+
+    if (jl_atomic_load_relaxed(&b->partitions) != old_bpart) {
+        JL_UNLOCK(&world_counter_lock);
+        return NULL;
     }
-    jl_errorf("invalid redefinition of constant %s", jl_symbol_name(var));
+
+    size_t new_world = jl_atomic_load_acquire(&jl_world_counter)+1;
+    jl_binding_partition_t *bpart = jl_replace_binding_locked(b, old_bpart, restriction_val, kind, new_world);
+    if (bpart && jl_atomic_load_relaxed(&bpart->min_world) == new_world)
+        jl_atomic_store_release(&jl_world_counter, new_world);
+
+    JL_UNLOCK(&world_counter_lock);
+    return bpart;
 }
 
 JL_DLLEXPORT int jl_globalref_is_const(jl_globalref_t *gr)
 {
     jl_binding_t *b = gr->binding;
-    b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
-    return b && b->constp;
+    if (!b)
+        b = jl_get_module_binding(gr->mod, gr->name, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    return jl_bkind_is_real_constant(jl_binding_kind(bpart));
 }
 
-JL_DLLEXPORT int jl_globalref_boundp(jl_globalref_t *gr)
+JL_DLLEXPORT void jl_disable_binding(jl_globalref_t *gr)
 {
     jl_binding_t *b = gr->binding;
-    b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
-    return b && jl_atomic_load_relaxed(&b->value) != NULL;
+    if (!b)
+        b = jl_get_module_binding(gr->mod, gr->name, 1);
+
+    for (;;) {
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_atomic_load_acquire(&jl_world_counter));
+
+        if (jl_binding_kind(bpart) == PARTITION_KIND_GUARD) {
+            // Already guard
+            return;
+        }
+
+        if (!jl_replace_binding(b, bpart, NULL, PARTITION_KIND_GUARD))
+            continue;
+
+        return;
+    }
 }
 
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_binding(m, var);
-    return b && b->constp;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    return b && jl_bkind_is_real_constant(jl_binding_kind(bpart));
 }
 
 // set the deprecated flag for a binding:
 //   0=not deprecated, 1=renamed, 2=moved to another package
+static const size_t DEPWARN_FLAGS = PARTITION_FLAG_DEPRECATED | PARTITION_FLAG_DEPWARN;
 JL_DLLEXPORT void jl_deprecate_binding(jl_module_t *m, jl_sym_t *var, int flag)
 {
-    // XXX: this deprecates the original value, which might be imported from elsewhere
     jl_binding_t *b = jl_get_binding(m, var);
-    if (b) b->deprecated = flag;
+    size_t new_flags = flag == 1 ? PARTITION_FLAG_DEPRECATED | PARTITION_FLAG_DEPWARN :
+                       flag == 2 ? PARTITION_FLAG_DEPRECATED :
+                                   0;
+    JL_LOCK(&world_counter_lock);
+    size_t new_world = jl_atomic_load_acquire(&jl_world_counter)+1;
+    jl_binding_partition_t *old_bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if ((old_bpart->kind & DEPWARN_FLAGS) == new_flags) {
+        JL_UNLOCK(&world_counter_lock);
+        return;
+    }
+    jl_replace_binding_locked2(b, old_bpart, old_bpart->restriction,
+        (old_bpart->kind & ~DEPWARN_FLAGS) | new_flags, new_world);
+    jl_atomic_store_release(&jl_world_counter, new_world);
+    JL_UNLOCK(&world_counter_lock);
+}
+
+static int should_depwarn(jl_binding_t *b, uint8_t flag)
+{
+    // We consider bindings deprecated, if:
+    //
+    // 1. The binding itself is deprecated, or
+    // 2. We implicitly import any deprecated binding.
+    //
+    // However, we do not consider the binding deprecated if the import was an explicit
+    // (`using` or `import`). The logic here is that the thing that needs to be adjusted
+    // is not the use itself, but rather the `using` or `import` (which already prints
+    // an appropriate warning).
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (bpart->kind & flag)
+        return 1;
+    return 0;
+}
+
+JL_DLLEXPORT void jl_binding_deprecation_check(jl_binding_t *b)
+{
+    if (jl_options.depwarn && should_depwarn(b, PARTITION_FLAG_DEPWARN))
+        jl_binding_deprecation_warning(b);
 }
 
 JL_DLLEXPORT int jl_is_binding_deprecated(jl_module_t *m, jl_sym_t *var)
 {
-    if (jl_binding_resolved_p(m, var)) {
-        // XXX: this only considers if the original is deprecated, not this precise binding
-        jl_binding_t *b = jl_get_binding(m, var);
-        return b && b->deprecated;
-    }
-    return 0;
+    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    if (!b)
+        return 0;
+    return should_depwarn(b, PARTITION_FLAG_DEPRECATED);
 }
 
-void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b)
+void jl_binding_deprecation_warning(jl_binding_t *b)
 {
-    // Only print a warning for deprecated == 1 (renamed).
-    // For deprecated == 2 (moved to a package) the binding is to a function
-    // that throws an error, so we don't want to print a warning too.
-    if (b->deprecated == 1 && jl_options.depwarn) {
-        if (jl_options.depwarn != JL_OPTIONS_DEPWARN_ERROR)
-            jl_printf(JL_STDERR, "WARNING: ");
-        assert(jl_atomic_load_relaxed(&b->owner) == b);
-        jl_printf(JL_STDERR, "%s.%s is deprecated",
-                  jl_symbol_name(m->name), jl_symbol_name(s));
-        jl_binding_dep_message(m, s, b);
+    if (jl_options.depwarn != JL_OPTIONS_DEPWARN_ERROR)
+        jl_printf(JL_STDERR, "WARNING: ");
+    jl_printf(JL_STDERR, "Use of ");
 
-        if (jl_options.depwarn != JL_OPTIONS_DEPWARN_ERROR) {
-            if (jl_lineno != 0) {
-                jl_printf(JL_STDERR, "  likely near %s:%d\n", jl_filename, jl_lineno);
-            }
-        }
+    jl_printf(JL_STDERR, "%s.%s is deprecated",
+                jl_symbol_name(b->globalref->mod->name), jl_symbol_name(b->globalref->name));
+    jl_binding_dep_message(b);
 
-        if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR) {
-            jl_errorf("use of deprecated variable: %s.%s",
-                      jl_symbol_name(m->name),
-                      jl_symbol_name(s));
+    if (jl_options.depwarn != JL_OPTIONS_DEPWARN_ERROR) {
+        if (jl_atomic_load_relaxed(&jl_lineno) != 0) {
+            jl_printf(JL_STDERR, "  likely near %s:%d\n", jl_atomic_load_relaxed(&jl_filename), jl_atomic_load_relaxed(&jl_lineno));
         }
     }
+
+    if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR) {
+        jl_errorf("use of deprecated variable: %s.%s",
+                    jl_symbol_name(b->globalref->mod->name),
+                    jl_symbol_name(b->globalref->name));
+    }
 }
 
-JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs)
+// For a generally writable binding (checked using jl_check_binding_currently_writable in this world age), check whether
+// we can actually write the value `rhs` to it.
+jl_value_t *jl_check_binding_assign_value(jl_binding_t *b JL_PROPAGATES_ROOT, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED, const char *msg)
 {
-    jl_value_t *old_ty = NULL;
-    if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type)) {
-        if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) {
-            JL_GC_PUSH1(&rhs); // callee-rooted
-            if (!jl_isa(rhs, old_ty))
-                jl_errorf("cannot assign an incompatible value to the global %s.%s.",
-                          jl_symbol_name(mod->name), jl_symbol_name(var));
-            JL_GC_POP();
-        }
+    JL_GC_PUSH1(&rhs); // callee-rooted
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    assert(kind == PARTITION_KIND_DECLARED || kind == PARTITION_KIND_GLOBAL);
+    jl_value_t *old_ty = kind == PARTITION_KIND_DECLARED ? (jl_value_t*)jl_any_type : bpart->restriction;
+    JL_GC_PROMISE_ROOTED(old_ty);
+    if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty && !jl_isa(rhs, old_ty)) {
+        jl_type_error_global(msg, mod, var, old_ty, rhs);
     }
-    if (b->constp) {
-        jl_value_t *old = NULL;
-        if (jl_atomic_cmpswap(&b->value, &old, rhs)) {
-            jl_gc_wb_binding(b, rhs);
-            return;
-        }
-        if (jl_egal(rhs, old))
-            return;
-        if (jl_typeof(rhs) != jl_typeof(old) || jl_is_type(rhs) || jl_is_module(rhs)) {
-            jl_errorf("invalid redefinition of constant %s.%s",
-                      jl_symbol_name(mod->name), jl_symbol_name(var));
+    JL_GC_POP();
+    return old_ty;
+}
 
-        }
-        jl_safe_printf("WARNING: redefinition of constant %s.%s. This may fail, cause incorrect answers, or produce other errors.\n",
-                       jl_symbol_name(mod->name), jl_symbol_name(var));
+JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs)
+{
+    if (jl_check_binding_assign_value(b, mod, var, rhs, "setglobal!") != NULL) {
+        jl_atomic_store_release(&b->value, rhs);
+        jl_gc_wb(b, rhs);
     }
-    jl_atomic_store_release(&b->value, rhs);
-    jl_gc_wb_binding(b, rhs);
 }
 
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var)
+JL_DLLEXPORT jl_value_t *jl_checked_swap(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs)
+{
+    jl_check_binding_assign_value(b, mod, var, rhs, "swapglobal!");
+    jl_value_t *old = jl_atomic_exchange(&b->value, rhs);
+    jl_gc_wb(b, rhs);
+    if (__unlikely(old == NULL))
+        jl_undefined_var_error(var, (jl_value_t*)mod);
+    return old;
+}
+
+JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *expected, jl_value_t *rhs)
 {
-    // n.b. jl_get_binding_wr should have ensured b->owner == b as mod.var
-    if (jl_atomic_load_relaxed(&b->owner) != b || (jl_atomic_load_relaxed(&b->value) != NULL && !b->constp)) {
-        jl_errorf("cannot declare %s.%s constant; it already has a value",
+    jl_value_t *ty = jl_check_binding_assign_value(b, mod, var, rhs, "replaceglobal!");
+    return replace_value(ty, &b->value, (jl_value_t*)b, expected, rhs, 1, mod, var);
+}
+
+JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    enum jl_partition_kind kind = jl_binding_kind(bpart);
+    assert(!jl_bkind_is_some_guard(kind) && !jl_bkind_is_some_import(kind));
+    if (jl_bkind_is_some_constant(kind))
+        jl_errorf("invalid assignment to constant %s.%s",
                   jl_symbol_name(mod->name), jl_symbol_name(var));
-    }
-    b->constp = 1;
+    jl_value_t *ty = bpart->restriction;
+    JL_GC_PROMISE_ROOTED(ty);
+    return modify_value(ty, &b->value, (jl_value_t*)b, op, rhs, 1, b, mod, var);
+}
+
+JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs )
+{
+    jl_check_binding_assign_value(b, mod, var, rhs, "setglobalonce!");
+    jl_value_t *old = NULL;
+    if (jl_atomic_cmpswap(&b->value, &old, rhs))
+        jl_gc_wb(b, rhs);
+    return old;
 }
 
 JL_DLLEXPORT jl_value_t *jl_module_usings(jl_module_t *m)
 {
     JL_LOCK(&m->lock);
-    int j = m->usings.len;
+    int j = module_usings_length(m);
     jl_array_t *a = jl_alloc_array_1d(jl_array_any_type, j);
     JL_GC_PUSH1(&a);
     for (int i = 0; j > 0; i++) {
         j--;
-        jl_module_t *imp = (jl_module_t*)m->usings.items[i];
+        jl_module_t *imp = module_usings_getmod(m, i);
         jl_array_ptr_set(a, j, (jl_value_t*)imp);
     }
     JL_UNLOCK(&m->lock); // may gc
@@ -902,26 +2014,118 @@ JL_DLLEXPORT jl_value_t *jl_module_usings(jl_module_t *m)
     return (jl_value_t*)a;
 }
 
-JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported)
+void _append_symbol_to_bindings_array(jl_array_t* a, jl_sym_t *name) {
+    jl_array_grow_end(a, 1);
+    //XXX: change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
+    jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)name);
+}
+
+static void _materialize_reexported_bindings(jl_module_t *m, size_t world, jl_array_t *visited_modules)
 {
-    jl_array_t *a = jl_alloc_array_1d(jl_array_symbol_type, 0);
-    JL_GC_PUSH1(&a);
+    size_t len = jl_array_len(visited_modules);
+    for (size_t i = 0; i < len; i++) {
+        if (jl_array_ptr_ref(visited_modules, i) == (jl_value_t*)m)
+            return;
+    }
+    jl_array_ptr_1d_push(visited_modules, (jl_value_t*)m);
+
+    JL_LOCK(&m->lock);
+    size_t usings_len = module_usings_length(m);
+    JL_UNLOCK(&m->lock);
+
+    for (size_t i = 0; i < usings_len; i++) {
+        JL_LOCK(&m->lock);
+        struct _jl_module_using data = *module_usings_getidx(m, i);
+        JL_UNLOCK(&m->lock);
+
+        if (data.min_world > world || data.max_world < world)
+            continue;
+
+        if (data.flags & JL_MODULE_USING_REEXPORT) {
+            jl_module_t *from = data.mod;
+            JL_GC_PROMISE_ROOTED(from);
+
+            _materialize_reexported_bindings(from, world, visited_modules);
+
+            jl_svec_t *table = jl_atomic_load_relaxed(&from->bindings);
+            for (size_t j = 0; j < jl_svec_len(table); j++) {
+                jl_binding_t *b = (jl_binding_t*)jl_svecref(table, j);
+                if ((void*)b == jl_nothing)
+                    break;
+
+                jl_binding_partition_t *frombpart = jl_get_binding_partition(b, world);
+                if (jl_bpart_is_exported(frombpart->kind)) {
+                    jl_sym_t *var = b->globalref->name;
+                    jl_binding_t *tob = jl_get_module_binding(m, var, 1);
+                    jl_get_binding_partition(tob, world);  // Force implicit resolution
+                }
+                table = jl_atomic_load_relaxed(&from->bindings);
+            }
+        }
+    }
+}
+
+void append_module_names(jl_array_t* a, jl_module_t *m, int all, int imported, int usings)
+{
+    // Materialize reexported bindings first
+    size_t world = jl_current_task->world_age;
+    jl_array_t *visited_modules = jl_alloc_vec_any(0);
+    JL_GC_PUSH1(&visited_modules);
+    _materialize_reexported_bindings(m, world, visited_modules);
+    JL_GC_POP();
+
     jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
     for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
         jl_sym_t *asname = b->globalref->name;
         int hidden = jl_symbol_name(asname)[0]=='#';
-        if ((b->exportp ||
-             (imported && b->imported) ||
-             (jl_atomic_load_relaxed(&b->owner) == b && !b->imported && (all || m == jl_main_module))) &&
-            (all || (!b->deprecated && !hidden))) {
-            jl_array_grow_end(a, 1);
-            // n.b. change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
-            jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)asname);
-        }
-        table = jl_atomic_load_relaxed(&m->bindings);
+        int main_public = (m == jl_main_module && !(asname == jl_eval_sym || asname == jl_include_sym));
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        enum jl_partition_kind kind = jl_binding_kind(bpart);
+        if (((jl_atomic_load_relaxed(&b->flags) & BINDING_FLAG_PUBLICP) ||
+             jl_bpart_is_exported(bpart->kind) ||
+             (imported && (kind == PARTITION_KIND_CONST_IMPORT || kind == PARTITION_KIND_IMPORTED)) ||
+             (usings && kind == PARTITION_KIND_EXPLICIT) ||
+             ((kind == PARTITION_KIND_GLOBAL || kind == PARTITION_KIND_CONST || kind == PARTITION_KIND_DECLARED) && (all || main_public))) &&
+            (all || (!(bpart->kind & PARTITION_FLAG_DEPRECATED) && !hidden)))
+            _append_symbol_to_bindings_array(a, asname);
+    }
+}
+
+void append_exported_names(jl_array_t* a, jl_module_t *m, int all)
+{
+    size_t world = jl_current_task->world_age;
+
+    // First, materialize all reexported bindings
+    jl_array_t *visited_modules = jl_alloc_vec_any(0);
+    JL_GC_PUSH1(&visited_modules);
+    _materialize_reexported_bindings(m, world, visited_modules);
+    JL_GC_POP();
+
+    // Now collect all exported bindings
+    jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
+    for (size_t i = 0; i < jl_svec_len(table); i++) {
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+        if ((void*)b == jl_nothing)
+            break;
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, world);
+        if (jl_bpart_is_exported(bpart->kind) && (all || !(bpart->kind & PARTITION_FLAG_DEPRECATED)))
+            _append_symbol_to_bindings_array(a, b->globalref->name);
+    }
+}
+
+JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported, int usings)
+{
+    jl_array_t *a = jl_alloc_array_1d(jl_array_symbol_type, 0);
+    JL_GC_PUSH1(&a);
+    append_module_names(a, m, all, imported, usings);
+    if (usings) {
+        // If `usings` is specified, traverse the list of `using`-ed modules and incorporate
+        // the names exported by those modules into the list.
+        for (int i = module_usings_length(m)-1; i >= 0; i--)
+            append_exported_names(a, module_usings_getmod(m, i), all);
     }
     JL_GC_POP();
     return (jl_value_t*)a;
@@ -938,11 +2142,20 @@ jl_module_t *jl_module_root(jl_module_t *m)
     }
 }
 
+JL_DLLEXPORT jl_sym_t *jl_module_getloc(jl_module_t *m, int32_t *line)
+{
+    if (line) {
+        *line = m->line;
+    }
+    return m->file;
+}
+
 JL_DLLEXPORT jl_uuid_t jl_module_build_id(jl_module_t *m) { return m->build_id; }
 JL_DLLEXPORT jl_uuid_t jl_module_uuid(jl_module_t* m) { return m->uuid; }
 
 // TODO: make this part of the module constructor and read-only?
 JL_DLLEXPORT void jl_set_module_uuid(jl_module_t *m, jl_uuid_t uuid) { m->uuid = uuid; }
+JL_DLLEXPORT void jl_set_module_parent(jl_module_t *m, jl_module_t *parent) { m->parent = parent; }
 
 int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT
 {
@@ -964,24 +2177,36 @@ JL_DLLEXPORT void jl_clear_implicit_imports(jl_module_t *m)
     JL_LOCK(&m->lock);
     jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
     for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
-        if (jl_atomic_load_relaxed(&b->owner) && jl_atomic_load_relaxed(&b->owner) != b && !b->imported)
-            jl_atomic_store_relaxed(&b->owner, NULL);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        if (jl_bkind_is_some_implicit(jl_binding_kind(bpart))) {
+            jl_atomic_store_relaxed(&b->partitions, NULL);
+        }
     }
     JL_UNLOCK(&m->lock);
 }
 
+JL_DLLEXPORT void jl_add_to_module_init_list(jl_value_t *mod)
+{
+    if (jl_module_init_order == NULL)
+        jl_module_init_order = jl_alloc_vec_any(0);
+    jl_array_ptr_1d_push(jl_module_init_order, mod);
+}
+
+JL_DLLEXPORT jl_svec_t *jl_module_get_bindings(jl_module_t *m)
+{
+    return jl_atomic_load_relaxed(&m->bindings);
+}
+
 JL_DLLEXPORT void jl_init_restored_module(jl_value_t *mod)
 {
     if (!jl_generating_output() || jl_options.incremental) {
         jl_module_run_initializer((jl_module_t*)mod);
     }
     else {
-        if (jl_module_init_order == NULL)
-            jl_module_init_order = jl_alloc_vec_any(0);
-        jl_array_ptr_1d_push(jl_module_init_order, mod);
+        jl_add_to_module_init_list(mod);
     }
 }
 
diff --git a/src/mtarraylist.c b/src/mtarraylist.c
new file mode 100644
index 0000000000000..0a0f3fe867e39
--- /dev/null
+++ b/src/mtarraylist.c
@@ -0,0 +1,81 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// this file provides some alternate API functions for small_arraylist (push and add)
+// which can be safely observed from other threads concurrently
+// there is only permitted to be a single writer thread (or a mutex)
+// but there can be any number of observers
+
+typedef struct {
+    _Atomic(size_t) len;
+    size_t max;
+    _Atomic(_Atomic(void*)*) items;
+    _Atomic(void*) _space[SMALL_AL_N_INLINE];
+} small_mtarraylist_t;
+
+// change capacity to at least newlen
+static void mtarraylist_resizeto(small_mtarraylist_t *a, size_t len, size_t newlen) JL_NOTSAFEPOINT
+{
+    size_t max = a->max;
+    if (newlen > max) {
+        size_t nm = max * 2;
+        if (nm == 0)
+            nm = 1;
+        while (newlen > nm)
+            nm *= 2;
+        void *olditems = (void*)jl_atomic_load_relaxed(&a->items);
+        void *p = calloc_s(nm * sizeof(void*));
+        memcpy(p, olditems, len * sizeof(void*));
+        jl_atomic_store_release(&a->items, (_Atomic(void*)*)p);
+        a->max = nm;
+        if (olditems != (void*)&a->_space[0]) {
+            jl_task_t *ct = jl_current_task;
+            small_arraylist_push(&ct->ptls->lazily_freed_mtarraylist_buffers, olditems);
+        }
+    }
+}
+
+// single-threaded
+void mtarraylist_push(small_arraylist_t *_a, void *elt)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    size_t len = jl_atomic_load_relaxed(&a->len);
+    mtarraylist_resizeto(a, len, len + 1);
+    jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[len], elt);
+    jl_atomic_store_release(&a->len, len + 1);
+}
+
+// single-threaded
+void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    size_t len = jl_atomic_load_relaxed(&a->len);
+    mtarraylist_resizeto(a, len, idx + 1);
+    jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[idx], elt);
+    if (jl_atomic_load_relaxed(&a->len) < idx + 1)
+        jl_atomic_store_release(&a->len, idx + 1);
+}
+
+// concurrent-safe
+size_t mtarraylist_length(small_arraylist_t *_a)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    return jl_atomic_load_relaxed(&a->len);
+}
+
+// concurrent-safe
+void *mtarraylist_get(small_arraylist_t *_a, size_t idx)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    size_t len = jl_atomic_load_acquire(&a->len);
+    if (idx >= len)
+        return NULL;
+    return jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&a->items)[idx]);
+}
diff --git a/src/null_sysimage.c b/src/null_sysimage.c
new file mode 100644
index 0000000000000..da50640d11b18
--- /dev/null
+++ b/src/null_sysimage.c
@@ -0,0 +1,13 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <stddef.h>
+#include "processor.h"
+
+/**
+ * These symbols support statically linking the sysimage with libjulia-internal.
+ *
+ * Here we provide dummy definitions that are used when these are not linked
+ * together (the default build configuration). The 0 value of jl_image_unpack
+ * is used as a sentinel to indicate that the sysimage should be loaded externally.
+ **/
+jl_image_unpack_func_t *jl_image_unpack = NULL;
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
index d73beff0f8587..8561449216d00 100644
--- a/src/opaque_closure.c
+++ b/src/opaque_closure.c
@@ -28,7 +28,7 @@ JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *sourc
 }
 
 static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
-    jl_value_t *source_, jl_value_t *captures, int do_compile)
+    jl_value_t *source_, jl_value_t *captures, int do_compile, size_t world)
 {
     if (!jl_is_tuple_type((jl_value_t*)argt)) {
         jl_error("OpaqueClosure argument tuple must be a tuple type");
@@ -50,20 +50,30 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
     JL_GC_PUSH2(&sigtype, &selected_rt);
     sigtype = jl_argtype_with_function(captures, (jl_value_t*)argt);
 
-    jl_method_instance_t *mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
+    jl_method_instance_t *mi = NULL;
+    if (source->source) {
+        mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
+    }
+    else {
+        mi = (jl_method_instance_t *)jl_atomic_load_relaxed(&source->specializations);
+        if (!jl_subtype(sigtype, mi->specTypes)) {
+            jl_error("sigtype mismatch in optimized opaque closure");
+        }
+    }
     jl_task_t *ct = jl_current_task;
-    size_t world = ct->world_age;
     jl_code_instance_t *ci = NULL;
     if (do_compile) {
         ci = jl_compile_method_internal(mi, world);
     }
 
-    jl_fptr_args_t invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+    jl_fptr_args_t callptr = (jl_fptr_args_t)jl_interpret_opaque_closure;
     void *specptr = NULL;
 
     if (ci) {
-        invoke = (jl_fptr_args_t)jl_atomic_load_relaxed(&ci->invoke);
-        specptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
+        uint8_t specsigflags;
+        jl_callptr_t invoke;
+        jl_read_codeinst_invoke(ci, &specsigflags, &invoke, &specptr, 1);
+        callptr = (jl_fptr_args_t)invoke; // codegen puts the object (or a jl_fptr_interpret_call token )here for us, even though it was the wrong type to put here
 
         selected_rt = ci->rettype;
         // If we're not allowed to generate a specsig with this, rt, fall
@@ -72,25 +82,27 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
         if (!jl_subtype(rt_lb, selected_rt)) {
             // TODO: It would be better to try to get a specialization with the
             // correct rt check here (or we could codegen a wrapper).
-            specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+            specptr = NULL; // this will force codegen of the unspecialized version
+            callptr = (jl_fptr_args_t)jl_interpret_opaque_closure;
             jl_value_t *ts[2] = {rt_lb, (jl_value_t*)ci->rettype};
             selected_rt = jl_type_union(ts, 2);
         }
         if (!jl_subtype(ci->rettype, rt_ub)) {
             // TODO: It would be better to try to get a specialization with the
             // correct rt check here (or we could codegen a wrapper).
-            specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+            specptr = NULL; // this will force codegen of the unspecialized version
+            callptr = (jl_fptr_args_t)jl_interpret_opaque_closure;
             selected_rt = jl_type_intersection(rt_ub, selected_rt);
         }
 
-        if (invoke == (jl_fptr_args_t) jl_fptr_interpret_call) {
-            invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+        if (callptr == (jl_fptr_args_t)jl_fptr_interpret_call) {
+            callptr = (jl_fptr_args_t)jl_interpret_opaque_closure;
         }
-        else if (invoke == (jl_fptr_args_t)jl_fptr_args && specptr) {
-            invoke = (jl_fptr_args_t)specptr;
+        else if (callptr == (jl_fptr_args_t)jl_fptr_args && specptr != NULL) {
+            callptr = (jl_fptr_args_t)specptr;
         }
-        else if (invoke == (jl_fptr_args_t)jl_fptr_const_return) {
-            invoke = jl_isa(ci->rettype_const, selected_rt) ?
+        else if (callptr == (jl_fptr_args_t)jl_fptr_const_return) {
+            callptr = jl_isa(ci->rettype_const, selected_rt) ?
                 (jl_fptr_args_t)jl_fptr_const_opaque_closure :
                 (jl_fptr_args_t)jl_fptr_const_opaque_closure_typeerror;
             captures = ci->rettype_const;
@@ -100,21 +112,22 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
     jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, selected_rt);
     JL_GC_PROMISE_ROOTED(oc_type);
 
-    if (!specptr) {
-        sigtype = jl_argtype_with_function_type((jl_value_t*)oc_type, (jl_value_t*)argt);
+    if (specptr == NULL) {
         jl_method_instance_t *mi_generic = jl_specializations_get_linfo(jl_opaque_closure_method, sigtype, jl_emptysvec);
 
-        // OC wrapper methods are not world dependent
-        ci = jl_get_method_inferred(mi_generic, selected_rt, 1, ~(size_t)0);
-        if (!jl_atomic_load_acquire(&ci->invoke))
-            jl_generate_fptr_for_oc_wrapper(ci);
+        // OC wrapper methods are not world dependent and have no edges or other info
+        ci = jl_get_method_inferred(mi_generic, selected_rt, 1, ~(size_t)0, NULL, NULL);
+        if (!jl_atomic_load_acquire(&ci->invoke)) {
+            jl_emit_codeinst_to_jit(ci, NULL); // confusing this actually calls jl_emit_oc_wrapper and never actually compiles ci (which would be impossible since it cannot have source)
+            jl_compile_codeinst(ci);
+        }
         specptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
     }
     jl_opaque_closure_t *oc = (jl_opaque_closure_t*)jl_gc_alloc(ct->ptls, sizeof(jl_opaque_closure_t), oc_type);
     oc->source = source;
     oc->captures = captures;
     oc->world = world;
-    oc->invoke = invoke;
+    oc->invoke = callptr;
     oc->specptr = specptr;
 
     JL_GC_POP();
@@ -126,52 +139,54 @@ jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_
 {
     jl_value_t *captures = jl_f_tuple(NULL, env, nenv);
     JL_GC_PUSH1(&captures);
-    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, source_, captures, do_compile);
+    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, source_, captures, do_compile, jl_current_task->world_age);
     JL_GC_POP();
     return oc;
 }
 
-jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
-    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
-
-JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
-        jl_method_instance_t *mi, jl_value_t *rettype,
-        jl_value_t *inferred_const, jl_value_t *inferred,
-        int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
-        uint8_t relocatability);
-
-JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
-    jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile)
+JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info_in_world(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
+    jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile, int isinferred, size_t world)
 {
-    if (!ci->inferred)
-        jl_error("CodeInfo must already be inferred");
     jl_value_t *root = NULL, *sigtype = NULL;
     jl_code_instance_t *inst = NULL;
-    JL_GC_PUSH3(&root, &sigtype, &inst);
+    jl_svec_t *edges = NULL;
+    JL_GC_PUSH4(&root, &sigtype, &inst, &edges);
     root = jl_box_long(lineno);
     root = jl_new_struct(jl_linenumbernode_type, root, file);
-    jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva);
+    jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva, isinferred);
     root = (jl_value_t*)meth;
-    meth->primary_world = jl_current_task->world_age;
-
-    sigtype = jl_argtype_with_function(env, (jl_value_t*)argt);
-    jl_method_instance_t *mi = jl_specializations_get_linfo((jl_method_t*)root, sigtype, jl_emptysvec);
-    inst = jl_new_codeinst(mi, rt_ub, NULL, (jl_value_t*)ci,
-        0, meth->primary_world, -1, 0, 0, jl_nothing, 0);
-    jl_mi_cache_insert(mi, inst);
+    // these are only legal in the current world since they are not in any tables
+    jl_atomic_store_release(&meth->primary_world, world);
+
+    if (isinferred) {
+        jl_value_t *argslotty = jl_array_ptr_ref(ci->slottypes, 0);
+        sigtype = jl_argtype_with_function_type(argslotty, (jl_value_t*)argt);
+        jl_method_instance_t *mi = jl_specializations_get_linfo((jl_method_t*)root, sigtype, jl_emptysvec);
+        edges = (jl_svec_t*)ci->edges;
+        if (!jl_is_svec(edges))
+            edges = jl_emptysvec; // OC doesn't really have edges, so just drop them for now
+        inst = jl_new_codeinst(mi, jl_nothing, rt_ub, (jl_value_t*)jl_any_type, NULL, (jl_value_t*)ci,
+            0, world, world, 0, jl_nothing, ci->debuginfo, edges);
+        jl_mi_cache_insert(mi, inst);
+    }
 
-    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env, do_compile);
+    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env, do_compile, world);
     JL_GC_POP();
     return oc;
 }
 
+JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
+    jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile, int isinferred)
+{
+    return jl_new_opaque_closure_from_code_info_in_world(argt, rt_lb, rt_ub, mod, ci, lineno, file, nargs, isva, env, do_compile, isinferred, jl_current_task->world_age);
+}
+
 JL_CALLABLE(jl_new_opaque_closure_jlcall)
 {
-    if (nargs < 4)
+    if (nargs < 5)
         jl_error("new_opaque_closure: Not enough arguments");
     return (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)args[0],
-        args[1], args[2], args[3], &args[4], nargs-4, 1);
+        args[1], args[2], args[4], &args[5], nargs-5, 1);
 }
 
 // check whether the specified number of arguments is compatible with the
@@ -195,7 +210,7 @@ int jl_tupletype_length_compat(jl_value_t *v, size_t nargs)
 
 JL_CALLABLE(jl_f_opaque_closure_call)
 {
-    jl_opaque_closure_t* oc = (jl_opaque_closure_t*)F;
+    jl_opaque_closure_t *oc = (jl_opaque_closure_t*)F;
     jl_value_t *argt = jl_tparam0(jl_typeof(oc));
     if (!jl_tupletype_length_compat(argt, nargs))
         jl_method_error(F, args, nargs + 1, oc->world);
diff --git a/src/options.h b/src/options.h
index b535d5ad4566f..0715069faab32 100644
--- a/src/options.h
+++ b/src/options.h
@@ -33,11 +33,6 @@
 // delete julia IR for non-inlineable functions after they're codegen'd
 #define JL_DELETE_NON_INLINEABLE 1
 
-// fill in the jl_all_methods in world-counter order
-// so that it is possible to map (in a debugger) from
-// an inferred world validity range back to the offending definition
-// #define RECORD_METHOD_ORDER
-
 // GC options -----------------------------------------------------------------
 
 // debugging options
@@ -64,19 +59,25 @@
 #endif
 #endif
 
+// GC_ASSERT_PARENT_VALIDITY will check whether an object is valid when **pushing**
+// it to the mark queue
+// #define GC_ASSERT_PARENT_VALIDITY
+
 // profiling options
 
 // GC_FINAL_STATS prints total GC stats at exit
 // #define GC_FINAL_STATS
 
-// MEMPROFILE prints pool summary statistics after every GC
-//#define MEMPROFILE
+// MEMPROFILE prints pool and large objects summary statistics after every GC
+// #define MEMPROFILE
 
 // GC_TIME prints time taken by each phase of GC
 // #define GC_TIME
 
-// OBJPROFILE counts objects by type
-// #define OBJPROFILE
+// pool allocator configuration options
+
+// GC_SMALL_PAGE allocates objects in 4k pages
+// #define GC_SMALL_PAGE
 
 
 // method dispatch profiling --------------------------------------------------
@@ -105,13 +106,15 @@
 
 // When not using COPY_STACKS the task-system is less memory efficient so
 // you probably want to choose a smaller default stack size (factor of 8-10)
+#if !defined(JL_STACK_SIZE)
 #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
 #define JL_STACK_SIZE (64*1024*1024)
 #elif defined(_P64)
-#define JL_STACK_SIZE (4*1024*1024)
+#define JL_STACK_SIZE (8*1024*1024)
 #else
 #define JL_STACK_SIZE (2*1024*1024)
 #endif
+#endif
 
 // allow a suspended Task to restart on a different thread
 #define MIGRATE_TASKS
@@ -134,30 +137,13 @@
 // GC threads
 #define NUM_GC_THREADS_NAME             "JULIA_NUM_GC_THREADS"
 
+// heap size hint
+#define HEAP_SIZE_HINT                  "JULIA_HEAP_SIZE_HINT"
+
 // affinitization behavior
 #define MACHINE_EXCLUSIVE_NAME          "JULIA_EXCLUSIVE"
 #define DEFAULT_MACHINE_EXCLUSIVE       0
 
-// partr -- parallel tasks runtime options ------------------------------------
-
-// multiq
-    // number of heaps = MULTIQ_HEAP_C * nthreads
-#define MULTIQ_HEAP_C                   4
-    // how many in each heap
-#define MULTIQ_TASKS_PER_HEAP           129
-
-// parfor
-    // tasks = niters / (GRAIN_K * nthreads)
-#define GRAIN_K                         4
-
-// synchronization
-    // narrivers = ((GRAIN_K * nthreads) ^ ARRIVERS_P) + 1
-    // limit for number of recursive parfors
-#define ARRIVERS_P                      2
-    // nreducers = narrivers * REDUCERS_FRAC
-#define REDUCERS_FRAC                   1
-
-
 // sanitizer defaults ---------------------------------------------------------
 
 // Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers
diff --git a/src/partr.c b/src/partr.c
deleted file mode 100644
index 428389db7f218..0000000000000
--- a/src/partr.c
+++ /dev/null
@@ -1,531 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-
-#include "julia.h"
-#include "julia_internal.h"
-#include "gc.h"
-#include "threading.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-// thread sleep state
-
-// default to DEFAULT_THREAD_SLEEP_THRESHOLD; set via $JULIA_THREAD_SLEEP_THRESHOLD
-uint64_t sleep_threshold;
-
-// thread should not be sleeping--it might need to do work.
-static const int16_t not_sleeping = 0;
-
-// it is acceptable for the thread to be sleeping.
-static const int16_t sleeping = 1;
-
-// this thread is dead.
-static const int16_t sleeping_like_the_dead JL_UNUSED = 2;
-
-// invariant: No thread is ever asleep unless sleep_check_state is sleeping (or we have a wakeup signal pending).
-// invariant: Any particular thread is not asleep unless that thread's sleep_check_state is sleeping.
-// invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
-// information: Observing thread not-sleeping is sufficient to ensure the target thread will subsequently inspect its local queue.
-// information: Observing thread is-sleeping says it may be necessary to notify it at least once to wakeup. It may already be awake however for a variety of reasons.
-// information: These observations require sequentially-consistent fences to be inserted between each of those operational phases.
-// [^store_buffering_1]: These fences are used to avoid the cycle 2b -> 1a -> 1b -> 2a -> 2b where
-// * Dequeuer:
-//   * 1: `jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping)`
-// * Enqueuer:
-//   * 2: `jl_atomic_load_relaxed(&ptls->sleep_check_state)` in `jl_wakeup_thread` returns `not_sleeping`
-// i.e., the dequeuer misses the enqueue and enqueuer misses the sleep state transition.
-// [^store_buffering_2]: and also
-// * Enqueuer:
-//   * 1a: `jl_atomic_store_relaxed(jl_uv_n_waiters, 1)` in `JL_UV_LOCK`
-//   * 1b: "cheap read" of `handle->pending` in `uv_async_send` (via `JL_UV_LOCK`) loads `0`
-// * Dequeuer:
-//   * 2a: store `2` to `handle->pending` in `uv_async_send` (via `JL_UV_LOCK` in `jl_task_get_next`)
-//   * 2b: `jl_atomic_load_relaxed(jl_uv_n_waiters)` in `jl_task_get_next` returns `0`
-// i.e., the dequeuer misses the `n_waiters` is set and enqueuer misses the `uv_stop` flag (in `signal_async`) transition to cleared
-
-JULIA_DEBUG_SLEEPWAKE(
-uint64_t wakeup_enter;
-uint64_t wakeup_leave;
-uint64_t io_wakeup_enter;
-uint64_t io_wakeup_leave;
-);
-
-JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT
-{
-    // Try to acquire the lock on this task.
-    int16_t was = jl_atomic_load_relaxed(&task->tid);
-    if (was == tid)
-        return 1;
-    if (was == -1)
-        return jl_atomic_cmpswap(&task->tid, &was, tid);
-    return 0;
-}
-
-JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT
-{
-    if (tpid < 0 || tpid >= jl_n_threadpools)
-        return 0;
-    task->threadpoolid = tpid;
-    return 1;
-}
-
-// GC functions used
-extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache,
-                                         jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT;
-
-// parallel task runtime
-// ---
-
-JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max, uint32_t unbias)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    // one-extend unbias back to 64-bits
-    return cong(max, -(uint64_t)-unbias, &ptls->rngseed);
-}
-
-// initialize the threading infrastructure
-// (called only by the main thread)
-void jl_init_threadinginfra(void)
-{
-    /* initialize the synchronization trees pool */
-    sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD;
-    char *cp = getenv(THREAD_SLEEP_THRESHOLD_NAME);
-    if (cp) {
-        if (!strncasecmp(cp, "infinite", 8))
-            sleep_threshold = UINT64_MAX;
-        else
-            sleep_threshold = (uint64_t)strtol(cp, NULL, 10);
-    }
-}
-
-
-void JL_NORETURN jl_finish_task(jl_task_t *t);
-
-
-static inline int may_mark(void) JL_NOTSAFEPOINT
-{
-    return (jl_atomic_load(&gc_n_threads_marking) > 0);
-}
-
-// gc thread mark function
-void jl_gc_mark_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid and create heap)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    while (1) {
-        uv_mutex_lock(&gc_threads_lock);
-        while (!may_mark()) {
-            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
-        }
-        uv_mutex_unlock(&gc_threads_lock);
-        gc_mark_loop_parallel(ptls, 0);
-    }
-}
-
-// gc thread sweep function
-void jl_gc_sweep_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid and create heap)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    while (1) {
-        uv_sem_wait(&gc_sweep_assists_needed);
-        while (1) {
-            jl_gc_pagemeta_t *pg = pop_lf_page_metadata_back(&global_page_pool_lazily_freed);
-            if (pg == NULL) {
-                break;
-            }
-            jl_gc_free_page(pg);
-            push_lf_page_metadata_back(&global_page_pool_freed, pg);
-        }
-    }
-}
-
-// thread function: used by all mutator threads except the main thread
-void jl_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid, create heap, set up root task)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-    void *stack_lo, *stack_hi;
-    jl_init_stack_limits(0, &stack_lo, &stack_hi);
-    // warning: this changes `jl_current_task`, so be careful not to call that from this function
-    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
-    JL_GC_PROMISE_ROOTED(ct);
-
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    (void)jl_gc_unsafe_enter(ptls);
-    jl_finish_task(ct); // noreturn
-}
-
-
-int jl_running_under_rr(int recheck)
-{
-#ifdef _OS_LINUX_
-#define RR_CALL_BASE 1000
-#define SYS_rrcall_check_presence (RR_CALL_BASE + 8)
-    static _Atomic(int) is_running_under_rr = 0;
-    int rr = jl_atomic_load_relaxed(&is_running_under_rr);
-    if (rr == 0 || recheck) {
-        int ret = syscall(SYS_rrcall_check_presence, 0, 0, 0, 0, 0, 0);
-        if (ret == -1)
-            // Should always be ENOSYS, but who knows what people do for
-            // unknown syscalls with their seccomp filters, so just say
-            // that we don't have rr.
-            rr = 2;
-        else
-            rr = 1;
-        jl_atomic_store_relaxed(&is_running_under_rr, rr);
-    }
-    return rr == 1;
-#else
-    return 0;
-#endif
-}
-
-
-//  sleep_check_after_threshold() -- if sleep_threshold ns have passed, return 1
-static int sleep_check_after_threshold(uint64_t *start_cycles)
-{
-    JULIA_DEBUG_SLEEPWAKE( return 1 ); // hammer on the sleep/wake logic much harder
-    /**
-     * This wait loop is a bit of a worst case for rr - it needs timer access,
-     * which are slow and it busy loops in user space, which prevents the
-     * scheduling logic from switching to other threads. Just don't bother
-     * trying to wait here
-     */
-    if (jl_running_under_rr(0))
-        return 1;
-    if (!(*start_cycles)) {
-        *start_cycles = jl_hrtime();
-        return 0;
-    }
-    uint64_t elapsed_cycles = jl_hrtime() - (*start_cycles);
-    if (elapsed_cycles >= sleep_threshold) {
-        *start_cycles = 0;
-        return 1;
-    }
-    return 0;
-}
-
-
-static int wake_thread(int16_t tid) JL_NOTSAFEPOINT
-{
-    jl_ptls_t other = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
-    int8_t state = sleeping;
-
-    if (jl_atomic_load_relaxed(&other->sleep_check_state) == sleeping) {
-        if (jl_atomic_cmpswap_relaxed(&other->sleep_check_state, &state, not_sleeping)) {
-            JL_PROBE_RT_SLEEP_CHECK_WAKE(other, state);
-            uv_mutex_lock(&other->sleep_lock);
-            uv_cond_signal(&other->wake_signal);
-            uv_mutex_unlock(&other->sleep_lock);
-            return 1;
-        }
-    }
-    return 0;
-}
-
-
-static void wake_libuv(void) JL_NOTSAFEPOINT
-{
-    JULIA_DEBUG_SLEEPWAKE( io_wakeup_enter = cycleclock() );
-    jl_wake_libuv();
-    JULIA_DEBUG_SLEEPWAKE( io_wakeup_leave = cycleclock() );
-}
-
-/* ensure thread tid is awake if necessary */
-JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
-{
-    jl_task_t *ct = jl_current_task;
-    int16_t self = jl_atomic_load_relaxed(&ct->tid);
-    if (tid != self)
-        jl_fence(); // [^store_buffering_1]
-    jl_task_t *uvlock = jl_atomic_load_relaxed(&jl_uv_mutex.owner);
-    JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
-    if (tid == self || tid == -1) {
-        // we're already awake, but make sure we'll exit uv_run
-        jl_ptls_t ptls = ct->ptls;
-        if (jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping) {
-            jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping);
-            JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls);
-        }
-        if (uvlock == ct)
-            uv_stop(jl_global_event_loop());
-    }
-    else {
-        // something added to the sticky-queue: notify that thread
-        if (wake_thread(tid) && uvlock != ct) {
-            // check if we need to notify uv_run too
-            jl_fence();
-            jl_ptls_t other = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
-            jl_task_t *tid_task = jl_atomic_load_relaxed(&other->current_task);
-            // now that we have changed the thread to not-sleeping, ensure that
-            // either it has not yet acquired the libuv lock, or that it will
-            // observe the change of state to not_sleeping
-            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == tid_task)
-                wake_libuv();
-        }
-    }
-    // check if the other threads might be sleeping
-    if (tid == -1) {
-        // something added to the multi-queue: notify all threads
-        // in the future, we might want to instead wake some fraction of threads,
-        // and let each of those wake additional threads if they find work
-        int anysleep = 0;
-        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
-        for (tid = 0; tid < nthreads; tid++) {
-            if (tid != self)
-                anysleep |= wake_thread(tid);
-        }
-        // check if we need to notify uv_run too
-        if (uvlock != ct && anysleep) {
-            jl_fence();
-            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) != NULL)
-                wake_libuv();
-        }
-    }
-    JULIA_DEBUG_SLEEPWAKE( wakeup_leave = cycleclock() );
-}
-
-
-// get the next runnable task
-static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
-{
-    jl_gc_safepoint();
-    jl_task_t *task = (jl_task_t*)jl_apply_generic(trypoptask, &q, 1);
-    if (jl_is_task(task)) {
-        int self = jl_atomic_load_relaxed(&jl_current_task->tid);
-        jl_set_task_tid(task, self);
-        return task;
-    }
-    return NULL;
-}
-
-static int check_empty(jl_value_t *checkempty)
-{
-    return jl_apply_generic(checkempty, NULL, 0) == jl_true;
-}
-
-jl_task_t *wait_empty JL_GLOBALLY_ROOTED;
-void jl_wait_empty_begin(void);
-void jl_wait_empty_end(void);
-
-void jl_task_wait_empty(void)
-{
-    jl_task_t *ct = jl_current_task;
-    if (jl_atomic_load_relaxed(&ct->tid) == 0 && jl_base_module) {
-        jl_wait_empty_begin();
-        jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("wait"));
-        wait_empty = ct;
-        size_t lastage = ct->world_age;
-        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        if (f)
-            jl_apply_generic(f, NULL, 0);
-        ct->world_age = lastage;
-        wait_empty = NULL;
-        jl_wait_empty_end();
-    }
-}
-
-static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
-{
-    // sleep_check_state is only transitioned from not_sleeping to sleeping
-    // by the thread itself. As a result, if this returns false, it will
-    // continue returning false. If it returns true, we know the total
-    // modification order of the fences.
-    jl_fence(); // [^store_buffering_1] [^store_buffering_2]
-    return jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping;
-}
-
-extern _Atomic(unsigned) _threadedregion;
-
-JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, jl_value_t *checkempty)
-{
-    jl_task_t *ct = jl_current_task;
-    uint64_t start_cycles = 0;
-
-    while (1) {
-        jl_task_t *task = get_next_task(trypoptask, q);
-        if (task)
-            return task;
-
-        // quick, race-y check to see if there seems to be any stuff in there
-        jl_cpu_pause();
-        if (!check_empty(checkempty)) {
-            start_cycles = 0;
-            continue;
-        }
-
-        jl_cpu_pause();
-        jl_ptls_t ptls = ct->ptls;
-        if (sleep_check_after_threshold(&start_cycles) || (ptls->tid == 0 && (!jl_atomic_load_relaxed(&_threadedregion) || wait_empty))) {
-            // acquire sleep-check lock
-            jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping);
-            jl_fence(); // [^store_buffering_1]
-            JL_PROBE_RT_SLEEP_CHECK_SLEEP(ptls);
-            if (!check_empty(checkempty)) { // uses relaxed loads
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                    JL_PROBE_RT_SLEEP_CHECK_TASKQ_WAKE(ptls);
-                }
-                continue;
-            }
-            task = get_next_task(trypoptask, q); // note: this should not yield
-            if (ptls != ct->ptls) {
-                // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
-                ptls = ct->ptls;
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
-                }
-                if (task)
-                    return task;
-                continue;
-            }
-            if (task) {
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
-                }
-                return task;
-            }
-
-
-            // IO is always permitted, but outside a threaded region, only
-            // thread 0 will process messages.
-            // Inside a threaded region, any thread can listen for IO messages,
-            // and one thread should win this race and watch the event loop,
-            // but we bias away from idle threads getting parked here.
-            //
-            // The reason this works is somewhat convoluted, and closely tied to [^store_buffering_1]:
-            //  - After decrementing _threadedregion, the thread is required to
-            //    call jl_wakeup_thread(0), that will kick out any thread who is
-            //    already there, and then eventually thread 0 will get here.
-            //  - Inside a _threadedregion, there must exist at least one
-            //    thread that has a happens-before relationship on the libuv lock
-            //    before reaching this decision point in the code who will see
-            //    the lock as unlocked and thus must win this race here.
-            int uvlock = 0;
-            if (jl_atomic_load_relaxed(&_threadedregion)) {
-                uvlock = jl_mutex_trylock(&jl_uv_mutex);
-            }
-            else if (ptls->tid == 0) {
-                uvlock = 1;
-                JL_UV_LOCK();
-            }
-            else {
-                // Since we might have started some IO work, we might need
-                // to ensure tid = 0 will go watch that new event source.
-                // If trylock would have succeeded, that may have been our
-                // responsibility, so need to make sure thread 0 will take care
-                // of us.
-                if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == NULL) // aka trylock
-                    jl_wakeup_thread(0);
-            }
-            if (uvlock) {
-                int enter_eventloop = may_sleep(ptls);
-                int active = 0;
-                if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0)
-                    // if we won the race against someone who actually needs
-                    // the lock to do real work, we need to let them have it instead
-                    enter_eventloop = 0;
-                if (enter_eventloop) {
-                    uv_loop_t *loop = jl_global_event_loop();
-                    loop->stop_flag = 0;
-                    JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() );
-                    active = uv_run(loop, UV_RUN_ONCE);
-                    JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_leave = cycleclock() );
-                    jl_gc_safepoint();
-                }
-                JL_UV_UNLOCK();
-                // optimization: check again first if we may have work to do.
-                // Otherwise we got a spurious wakeup since some other thread
-                // that just wanted to steal libuv from us. We will just go
-                // right back to sleep on the individual wake signal to let
-                // them take it from us without conflict.
-                if (active || !may_sleep(ptls)) {
-                    start_cycles = 0;
-                    continue;
-                }
-                if (!enter_eventloop && !jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == 0) {
-                    // thread 0 is the only thread permitted to run the event loop
-                    // so it needs to stay alive, just spin-looping if necessary
-                    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                        jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                        JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
-                    }
-                    start_cycles = 0;
-                    continue;
-                }
-            }
-
-            // the other threads will just wait for an individual wake signal to resume
-            JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
-            int8_t gc_state = jl_gc_safe_enter(ptls);
-            uv_mutex_lock(&ptls->sleep_lock);
-            while (may_sleep(ptls)) {
-                if (ptls->tid == 0 && wait_empty) {
-                    task = wait_empty;
-                    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                        jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                        JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
-                    }
-                    break;
-                }
-                uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
-            }
-            assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
-            uv_mutex_unlock(&ptls->sleep_lock);
-            JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
-            jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
-            start_cycles = 0;
-            if (task) {
-                assert(task == wait_empty);
-                wait_empty = NULL;
-                return task;
-            }
-        }
-        else {
-            // maybe check the kernel for new messages too
-            jl_process_events();
-        }
-    }
-}
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/src/passes.h b/src/passes.h
index 2bb33d6eec60d..0c5a124ade952 100644
--- a/src/passes.h
+++ b/src/passes.h
@@ -15,10 +15,6 @@ struct DemoteFloat16Pass : PassInfoMixin<DemoteFloat16Pass> {
     static bool isRequired() { return true; }
 };
 
-struct CombineMulAddPass : PassInfoMixin<CombineMulAddPass> {
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
-};
-
 struct LateLowerGCPass : PassInfoMixin<LateLowerGCPass> {
     PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
@@ -33,10 +29,6 @@ struct PropagateJuliaAddrspacesPass : PassInfoMixin<PropagateJuliaAddrspacesPass
     static bool isRequired() { return true; }
 };
 
-struct LowerExcHandlersPass : PassInfoMixin<LowerExcHandlersPass> {
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
-    static bool isRequired() { return true; }
-};
 
 struct GCInvariantVerifierPass : PassInfoMixin<GCInvariantVerifierPass> {
     bool Strong;
@@ -46,30 +38,30 @@ struct GCInvariantVerifierPass : PassInfoMixin<GCInvariantVerifierPass> {
     static bool isRequired() { return true; }
 };
 
-// Module Passes
-struct CPUFeaturesPass : PassInfoMixin<CPUFeaturesPass> {
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
+struct FinalLowerGCPass : PassInfoMixin<FinalLowerGCPass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
-struct RemoveNIPass : PassInfoMixin<RemoveNIPass> {
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
-    static bool isRequired() { return true; }
+struct ExpandAtomicModifyPass : PassInfoMixin<ExpandAtomicModifyPass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
 };
 
-struct LowerSIMDLoopPass : PassInfoMixin<LowerSIMDLoopPass> {
+
+// Module Passes
+struct CPUFeaturesPass : PassInfoMixin<CPUFeaturesPass> {
     PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
-struct FinalLowerGCPass : PassInfoMixin<FinalLowerGCPass> {
+struct RemoveNIPass : PassInfoMixin<RemoveNIPass> {
     PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
 struct MultiVersioningPass : PassInfoMixin<MultiVersioningPass> {
     bool external_use;
-    MultiVersioningPass(bool external_use = false) : external_use(external_use) {}
+    MultiVersioningPass(bool external_use = false) JL_NOTSAFEPOINT : external_use(external_use) {}
     PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
@@ -103,6 +95,11 @@ struct JuliaLICMPass : PassInfoMixin<JuliaLICMPass> {
                           LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT;
 };
 
+struct LowerSIMDLoopPass : PassInfoMixin<LowerSIMDLoopPass> {
+    PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                          LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT;
+};
+
 #define MODULE_MARKER_PASS(NAME) \
     struct NAME##MarkerPass : PassInfoMixin<NAME##MarkerPass> { \
         PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \
@@ -146,4 +143,8 @@ MODULE_MARKER_PASS(BeforeCleanup)
 MODULE_MARKER_PASS(AfterCleanup)
 MODULE_MARKER_PASS(AfterOptimization)
 
+bool verifyLLVMIR(const Module &M) JL_NOTSAFEPOINT;
+bool verifyLLVMIR(const Function &F) JL_NOTSAFEPOINT;
+bool verifyLLVMIR(const Loop &L) JL_NOTSAFEPOINT;
+
 #endif
diff --git a/src/pipeline.cpp b/src/pipeline.cpp
index ca5992b6f3135..0481e04b8d19e 100644
--- a/src/pipeline.cpp
+++ b/src/pipeline.cpp
@@ -12,37 +12,34 @@
 // analysis passes
 #include <llvm/Analysis/Passes.h>
 #include <llvm/Analysis/BasicAliasAnalysis.h>
+#include <llvm/Analysis/GlobalsModRef.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/Analysis/TypeBasedAliasAnalysis.h>
 #include <llvm/Analysis/ScopedNoAliasAA.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Vectorize.h>
-#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
-#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
-#include <llvm/Transforms/Scalar/GVN.h>
-#include <llvm/Transforms/IPO/AlwaysInliner.h>
-#include <llvm/Transforms/InstCombine/InstCombine.h>
-#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
-#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
+#include <llvm/Transforms/IPO/InferFunctionAttrs.h>
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Passes/PassPlugin.h>
 
 // NewPM needs to manually include all the pass headers
+#include <llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h>
 #include <llvm/Transforms/IPO/AlwaysInliner.h>
 #include <llvm/Transforms/IPO/Annotation2Metadata.h>
 #include <llvm/Transforms/IPO/ConstantMerge.h>
 #include <llvm/Transforms/IPO/ForceFunctionAttrs.h>
+#include <llvm/Transforms/IPO/GlobalDCE.h>
+#include <llvm/Transforms/IPO/GlobalOpt.h>
+#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
 #include <llvm/Transforms/InstCombine/InstCombine.h>
 #include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
 #include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
 #include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
 #include <llvm/Transforms/Scalar/ADCE.h>
 #include <llvm/Transforms/Scalar/AnnotationRemarks.h>
+#include <llvm/Transforms/Scalar/BDCE.h>
+#include "llvm/Transforms/Scalar/ConstraintElimination.h"
 #include <llvm/Transforms/Scalar/CorrelatedValuePropagation.h>
 #include <llvm/Transforms/Scalar/DCE.h>
 #include <llvm/Transforms/Scalar/DeadStoreElimination.h>
@@ -66,17 +63,22 @@
 #include <llvm/Transforms/Scalar/LowerConstantIntrinsics.h>
 #include <llvm/Transforms/Scalar/LowerExpectIntrinsic.h>
 #include <llvm/Transforms/Scalar/MemCpyOptimizer.h>
+#include <llvm/Transforms/Scalar/MergedLoadStoreMotion.h>
 #include <llvm/Transforms/Scalar/Reassociate.h>
 #include <llvm/Transforms/Scalar/SCCP.h>
 #include <llvm/Transforms/Scalar/SROA.h>
 #include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
 #include <llvm/Transforms/Scalar/SimplifyCFG.h>
 #include <llvm/Transforms/Scalar/WarnMissedTransforms.h>
+#include <llvm/Transforms/Utils/LibCallsShrinkWrap.h>
 #include <llvm/Transforms/Utils/InjectTLIMappings.h>
+#include <llvm/Transforms/Utils/Mem2Reg.h>
+#include <llvm/Transforms/Utils/RelLookupTableConverter.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
 #include <llvm/Transforms/Vectorize/LoopVectorize.h>
 #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
 #include <llvm/Transforms/Vectorize/VectorCombine.h>
-
 #ifdef _COMPILER_GCC_
 #pragma GCC diagnostic pop
 #endif
@@ -89,13 +91,12 @@
 #include "julia_assert.h"
 #include "passes.h"
 
-
 using namespace llvm;
 
 namespace {
     //Shamelessly stolen from Clang's approach to sanitizers
     //TODO do we want to enable other sanitizers?
-    static void addSanitizerPasses(ModulePassManager &MPM, OptimizationLevel O) JL_NOTSAFEPOINT {
+    static void addSanitizerPasses(ModulePassManager &MPM, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
         // Coverage sanitizer
         // if (CodeGenOpts.hasSanitizeCoverage()) {
         //   auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
@@ -104,67 +105,63 @@ namespace {
         //       CodeGenOpts.SanitizeCoverageIgnorelistFiles));
         // }
 
-    #ifdef _COMPILER_MSAN_ENABLED_
-        auto MSanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) JL_NOTSAFEPOINT {
-        // if (LangOpts.Sanitize.has(Mask)) {
-            // int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins;
-            // bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
-
-            // MemorySanitizerOptions options(TrackOrigins, Recover, CompileKernel,{
-            //                             CodeGenOpts.SanitizeMemoryParamRetval);
-            MemorySanitizerOptions options;
-            MPM.addPass(ModuleMemorySanitizerPass(options));
-            FunctionPassManager FPM;
-            FPM.addPass(MemorySanitizerPass(options));
-            if (O != OptimizationLevel::O0) {
-            // MemorySanitizer inserts complex instrumentation that mostly
-            // follows the logic of the original code, but operates on
-            // "shadow" values. It can benefit from re-running some
-            // general purpose optimization passes.
-            FPM.addPass(EarlyCSEPass());
-            // TODO: Consider add more passes like in
-            // addGeneralOptsForMemorySanitizer. EarlyCSEPass makes visible
-            // difference on size. It's not clear if the rest is still
-            // useful. InstCombinePass breaks
-            // compiler-rt/test/msan/select_origin.cpp.
-            }
-            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-        // }
-        };
-        MSanPass(/*SanitizerKind::Memory, */false);
-        // MSanPass(SanitizerKind::KernelMemory, true);
-    #endif
-
-    #ifdef _COMPILER_TSAN_ENABLED_
-        // if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
-        MPM.addPass(ModuleThreadSanitizerPass());
-        MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
-        // }
-    #endif
+        if (options.sanitize_memory) {
+            auto MSanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) JL_NOTSAFEPOINT {
+                // if (LangOpts.Sanitize.has(Mask)) {
+                // int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins;
+                // bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
+
+                // MemorySanitizerOptions options(TrackOrigins, Recover, CompileKernel,{
+                //                             CodeGenOpts.SanitizeMemoryParamRetval);
+                MemorySanitizerOptions options;
+                MPM.addPass(MemorySanitizerPass(options));
+                FunctionPassManager FPM;
+                if (O != OptimizationLevel::O0) {
+                    // MemorySanitizer inserts complex instrumentation that mostly
+                    // follows the logic of the original code, but operates on
+                    // "shadow" values. It can benefit from re-running some
+                    // general purpose optimization passes.
+                    FPM.addPass(EarlyCSEPass());
+                    // TODO: Consider add more passes like in
+                    // addGeneralOptsForMemorySanitizer. EarlyCSEPass makes visible
+                    // difference on size. It's not clear if the rest is still
+                    // useful. InstCombinePass breaks
+                    // compiler-rt/test/msan/select_origin.cpp.
+                }
+                MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+                // }
+            };
+            MSanPass(/*SanitizerKind::Memory, */false);
+            // MSanPass(SanitizerKind::KernelMemory, true);
+        }
 
+        if (options.sanitize_thread) {
+            MPM.addPass(ModuleThreadSanitizerPass());
+            MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
+        }
 
-    #ifdef _COMPILER_ASAN_ENABLED_
-        auto ASanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) JL_NOTSAFEPOINT {
-        //   if (LangOpts.Sanitize.has(Mask)) {
-            // bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts);
-            // bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator;
-            // llvm::AsanDtorKind DestructorKind =
-            //     CodeGenOpts.getSanitizeAddressDtor();
-            // AddressSanitizerOptions Opts;
-            // Opts.CompileKernel = CompileKernel;
-            // Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
-            // Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
-            // Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
-            // MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
-            // MPM.addPass(ModuleAddressSanitizerPass(
-            //     Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
-            //Let's assume the defaults are actually fine for our purposes
-            MPM.addPass(ModuleAddressSanitizerPass(AddressSanitizerOptions()));
-        //   }
-        };
-        ASanPass(/*SanitizerKind::Address, */false);
-        // ASanPass(SanitizerKind::KernelAddress, true);
-    #endif
+        if (options.sanitize_address) {
+            auto ASanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) JL_NOTSAFEPOINT {
+                //   if (LangOpts.Sanitize.has(Mask)) {
+                // bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts);
+                // bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator;
+                // llvm::AsanDtorKind DestructorKind =
+                //     CodeGenOpts.getSanitizeAddressDtor();
+                // AddressSanitizerOptions Opts;
+                // Opts.CompileKernel = CompileKernel;
+                // Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
+                // Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
+                // Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
+                // MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
+                //Let's assume the defaults are actually fine for our purposes
+                // MPM.addPass(AddressSanitizerPass(
+                //     Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
+                MPM.addPass(AddressSanitizerPass(AddressSanitizerOptions(), true, false));
+                //   }
+            };
+            ASanPass(/*SanitizerKind::Address, */false);
+            // ASanPass(SanitizerKind::KernelAddress, true);
+        }
 
         // auto HWASanPass = [&](SanitizerMask Mask, bool CompileKernel) {
         //   if (LangOpts.Sanitize.has(Mask)) {
@@ -182,10 +179,11 @@ namespace {
         // }
     }
 
-#ifdef JL_DEBUG_BUILD
+#ifdef JL_VERIFY_PASSES
     static inline void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
-        if (!llvm_only)
-            MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass()));
+        if (!llvm_only){
+            MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass(true)));
+        }
         MPM.addPass(VerifierPass());
     }
 #endif
@@ -202,15 +200,11 @@ namespace {
             .convertSwitchRangeToICmp(true)
             .convertSwitchToLookupTable(true)
             .forwardSwitchCondToPhi(true)
-            //These mess with loop rotation, so only do them after that
+            .needCanonicalLoops(false)
             .hoistCommonInsts(true)
-            // Causes an SRET assertion error in late-gc-lowering
-            // .sinkCommonInsts(true)
+            .sinkCommonInsts(true)
             ;
     }
-#if JL_LLVM_VERSION < 150000
-#define LICMOptions()
-#endif
 
 // At any given time exactly one of each pair of overloads is strictly unused
 #ifdef _COMPILER_GCC_
@@ -247,7 +241,11 @@ namespace {
     std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeEarlySimplificationCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
         static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
         if (!PB) return;
+#if JL_LLVM_VERSION >= 200000
+        PB->invokePipelineEarlySimplificationEPCallbacks(MPM, O, ThinOrFullLTOPhase::None);
+#else
         PB->invokePipelineEarlySimplificationEPCallbacks(MPM, O);
+#endif
     }
     template<typename PB_t>
     std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeCGSCCCallbacks(CGSCCPassManager &CGPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
@@ -259,7 +257,11 @@ namespace {
     std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeOptimizerEarlyCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
         static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
         if (!PB) return;
+#if JL_LLVM_VERSION >= 200000
+        PB->invokeOptimizerEarlyEPCallbacks(MPM, O, ThinOrFullLTOPhase::None);
+#else
         PB->invokeOptimizerEarlyEPCallbacks(MPM, O);
+#endif
     }
     template<typename PB_t>
     std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeLateLoopOptimizationCallbacks(LoopPassManager &LPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
@@ -289,7 +291,11 @@ namespace {
     std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeOptimizerLastCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
         static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
         if (!PB) return;
+#if JL_LLVM_VERSION >= 200000
+        PB->invokeOptimizerLastEPCallbacks(MPM, O, ThinOrFullLTOPhase::None);
+#else
         PB->invokeOptimizerLastEPCallbacks(MPM, O);
+#endif
     }
 
     // Fallbacks
@@ -328,168 +334,222 @@ namespace {
 
 static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     MPM.addPass(BeforeEarlySimplificationMarkerPass());
-#ifdef JL_DEBUG_BUILD
+#ifdef JL_VERIFY_PASSES
     addVerificationPasses(MPM, options.llvm_only);
 #endif
-    // Place after verification in case we want to force it anyways
-    MPM.addPass(ForceFunctionAttrsPass());
-    invokePipelineStartCallbacks(MPM, PB, O);
-    MPM.addPass(Annotation2MetadataPass());
-    MPM.addPass(ConstantMergePass());
-    {
-        FunctionPassManager FPM;
-        FPM.addPass(LowerExpectIntrinsicPass());
-        if (O.getSpeedupLevel() >= 2) {
-            JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass()));
+    if (options.enable_early_simplifications) {
+      // Place after verification in case we want to force it anyways
+      MPM.addPass(ForceFunctionAttrsPass());
+      invokePipelineStartCallbacks(MPM, PB, O);
+      MPM.addPass(Annotation2MetadataPass());
+      MPM.addPass(InferFunctionAttrsPass());
+      MPM.addPass(ConstantMergePass());
+      {
+          FunctionPassManager FPM;
+          FPM.addPass(LowerExpectIntrinsicPass());
+          if (O.getSpeedupLevel() >= 2) {
+              JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass()));
+          }
+          // DCE must come before simplifycfg
+          // codegen can generate unused statements when generating builtin calls,
+          // and those dead statements can alter how simplifycfg optimizes the CFG
+          FPM.addPass(DCEPass());
+          FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions()));
+          if (O.getSpeedupLevel() >= 1) {
+              FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
+              FPM.addPass(EarlyCSEPass());
+          }
+          MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+          if (O.getSpeedupLevel() >= 1) {
+            FunctionPassManager GlobalFPM;
+            MPM.addPass(GlobalOptPass());
+            GlobalFPM.addPass(PromotePass());
+            GlobalFPM.addPass(InstCombinePass());
         }
-        FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions()));
-        if (O.getSpeedupLevel() >= 1) {
-            FPM.addPass(DCEPass());
-            FPM.addPass(SROAPass());
-        }
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+      }
+      invokeEarlySimplificationCallbacks(MPM, PB, O);
     }
-    invokeEarlySimplificationCallbacks(MPM, PB, O);
     MPM.addPass(AfterEarlySimplificationMarkerPass());
 }
 
 static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     MPM.addPass(BeforeEarlyOptimizationMarkerPass());
-    invokeOptimizerEarlyCallbacks(MPM, PB, O);
-    {
-        CGSCCPassManager CGPM;
-        invokeCGSCCCallbacks(CGPM, PB, O);
-        if (O.getSpeedupLevel() >= 2) {
-            FunctionPassManager FPM;
-            JULIA_PASS(FPM.addPass(AllocOptPass()));
-            FPM.addPass(Float2IntPass());
-            FPM.addPass(LowerConstantIntrinsicsPass());
-            CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
-        }
-        MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
-    }
-    if (options.dump_native) {
-        JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use)));
-    }
-    JULIA_PASS(MPM.addPass(CPUFeaturesPass()));
-    if (O.getSpeedupLevel() >= 1) {
-        FunctionPassManager FPM;
-        if (O.getSpeedupLevel() >= 2) {
-            FPM.addPass(SROAPass());
-            // SROA can duplicate PHI nodes which can block LowerSIMD
+    if (options.enable_early_optimizations) {
+      invokeOptimizerEarlyCallbacks(MPM, PB, O);
+      {
+          CGSCCPassManager CGPM;
+          invokeCGSCCCallbacks(CGPM, PB, O);
+          if (O.getSpeedupLevel() >= 2) {
+              FunctionPassManager FPM;
+              JULIA_PASS(FPM.addPass(AllocOptPass()));
+              FPM.addPass(Float2IntPass());
+              FPM.addPass(LowerConstantIntrinsicsPass());
+              CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+          }
+          MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+      }
+      if (O.getSpeedupLevel() >= 2) {
+          MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+      }
+      // MPM.addPass(createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+      if (options.dump_native) {
+          MPM.addPass(StripDeadPrototypesPass());
+          JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use)));
+      }
+      JULIA_PASS(MPM.addPass(CPUFeaturesPass()));
+      if (O.getSpeedupLevel() >= 1) {
+          FunctionPassManager FPM;
+          if (O.getSpeedupLevel() >= 2) {
+            FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
+            FPM.addPass(EarlyCSEPass(true));
             FPM.addPass(InstCombinePass());
+            FPM.addPass(AggressiveInstCombinePass());
             FPM.addPass(JumpThreadingPass());
             FPM.addPass(CorrelatedValuePropagationPass());
+            FPM.addPass(LibCallsShrinkWrapPass());
             FPM.addPass(ReassociatePass());
-            FPM.addPass(EarlyCSEPass());
+            FPM.addPass(ConstraintEliminationPass());
             JULIA_PASS(FPM.addPass(AllocOptPass()));
         } else { // if (O.getSpeedupLevel() >= 1) (exactly)
-            FPM.addPass(InstCombinePass());
             FPM.addPass(EarlyCSEPass());
+            FPM.addPass(InstCombinePass());
         }
         invokePeepholeEPCallbacks(FPM, PB, O);
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), /*UseMemorySSA = */true));
+      }
+      MPM.addPass(GlobalOptPass());
+      MPM.addPass(GlobalDCEPass());
     }
     MPM.addPass(AfterEarlyOptimizationMarkerPass());
 }
 
 static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     FPM.addPass(BeforeLoopOptimizationMarkerPass());
-    {
-        LoopPassManager LPM;
-        if (O.getSpeedupLevel() >= 2) {
-            LPM.addPass(LoopRotatePass());
+    if (options.enable_loop_optimizations) {
+        {
+            LoopPassManager LPM;
+            LPM.addPass(LowerSIMDLoopPass());
+            if (O.getSpeedupLevel() >= 2) {
+                LPM.addPass(LoopInstSimplifyPass());
+                LPM.addPass(LoopSimplifyCFGPass());
+                LPM.addPass(BeforeLICMMarkerPass());
+                auto opts = LICMOptions();
+                opts.AllowSpeculation = false;
+                LPM.addPass(LICMPass(opts));
+                LPM.addPass(JuliaLICMPass());
+                LPM.addPass(LoopRotatePass(true, false));
+                LPM.addPass(LICMPass(LICMOptions()));
+                LPM.addPass(JuliaLICMPass());
+                LPM.addPass(AfterLICMMarkerPass());
+                LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true));
+            }
+            invokeLateLoopOptimizationCallbacks(LPM, PB, O);
+            //We don't know if the loop callbacks support MSSA
+            FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
         }
-        invokeLateLoopOptimizationCallbacks(LPM, PB, O);
-        //We don't know if the loop callbacks support MSSA
-        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
-    }
-    if (O.getSpeedupLevel() >= 2) {
-        LoopPassManager LPM;
-        LPM.addPass(BeforeLICMMarkerPass());
-        LPM.addPass(LICMPass(LICMOptions()));
-        LPM.addPass(JuliaLICMPass());
-        LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true));
-        LPM.addPass(LICMPass(LICMOptions()));
-        LPM.addPass(JuliaLICMPass());
-        LPM.addPass(AfterLICMMarkerPass());
-        //LICM needs MemorySSA now, so we must use it
-        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
-    }
-    if (O.getSpeedupLevel() >= 2) {
-        FPM.addPass(IRCEPass());
-    }
-    {
-        LoopPassManager LPM;
-        LPM.addPass(BeforeLoopSimplificationMarkerPass());
-        if (O.getSpeedupLevel() >= 2) {
-            LPM.addPass(LoopInstSimplifyPass());
-            LPM.addPass(LoopIdiomRecognizePass());
-            LPM.addPass(IndVarSimplifyPass());
-            LPM.addPass(LoopDeletionPass());
-            // This unroll will only unroll loops when the trip count is known and small,
-            // so that no loop remains
-            LPM.addPass(LoopFullUnrollPass());
+        if (O.getSpeedupLevel() >= 2)
+            FPM.addPass(IRCEPass());
+        {
+            LoopPassManager LPM;
+            LPM.addPass(BeforeLoopSimplificationMarkerPass());
+            if (O.getSpeedupLevel() >= 2) {
+                LPM.addPass(LoopIdiomRecognizePass());
+                LPM.addPass(IndVarSimplifyPass());
+                LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true));
+                LPM.addPass(LoopDeletionPass());
+                // This unroll will only unroll loops when the trip count is known and small,
+                // so that no loop remains
+                LPM.addPass(LoopFullUnrollPass());
+            }
+            invokeLoopOptimizerEndCallbacks(LPM, PB, O);
+            LPM.addPass(AfterLoopSimplificationMarkerPass());
+            FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions()));
+            FPM.addPass(InstCombinePass());
+            //We don't know if the loop end callbacks support MSSA
+            FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
         }
-        invokeLoopOptimizerEndCallbacks(LPM, PB, O);
-        LPM.addPass(AfterLoopSimplificationMarkerPass());
-        //We don't know if the loop end callbacks support MSSA
-        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
     }
     FPM.addPass(AfterLoopOptimizationMarkerPass());
 }
 
 static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     FPM.addPass(BeforeScalarOptimizationMarkerPass());
-    if (O.getSpeedupLevel() >= 2) {
-        JULIA_PASS(FPM.addPass(AllocOptPass()));
-        FPM.addPass(SROAPass());
-        FPM.addPass(InstSimplifyPass());
-        FPM.addPass(GVNPass());
-        FPM.addPass(MemCpyOptPass());
-        FPM.addPass(SCCPPass());
-        FPM.addPass(CorrelatedValuePropagationPass());
-        FPM.addPass(DCEPass());
-        FPM.addPass(IRCEPass());
-        FPM.addPass(InstCombinePass());
-        FPM.addPass(JumpThreadingPass());
-    }
-    if (O.getSpeedupLevel() >= 3) {
-        FPM.addPass(GVNPass());
-    }
-    if (O.getSpeedupLevel() >= 2) {
-        FPM.addPass(DSEPass());
-        invokePeepholeEPCallbacks(FPM, PB, O);
-        FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
-        JULIA_PASS(FPM.addPass(AllocOptPass()));
-        {
-            LoopPassManager LPM;
-            LPM.addPass(LoopDeletionPass());
-            LPM.addPass(LoopInstSimplifyPass());
-            FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
+    if (options.enable_scalar_optimizations) {
+        if (O.getSpeedupLevel() >= 2) {
+            JULIA_PASS(FPM.addPass(AllocOptPass()));
+            FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
+            FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
+            FPM.addPass(MergedLoadStoreMotionPass());
+            FPM.addPass(GVNPass());
+            FPM.addPass(SCCPPass());
+            FPM.addPass(BDCEPass());
+            FPM.addPass(InstCombinePass());
+            FPM.addPass(CorrelatedValuePropagationPass());
+            FPM.addPass(ADCEPass());
+            FPM.addPass(MemCpyOptPass());
+            FPM.addPass(DSEPass());
+            FPM.addPass(IRCEPass());
+            FPM.addPass(JumpThreadingPass());
+            FPM.addPass(ConstraintEliminationPass());
+        } else if (O.getSpeedupLevel() >= 1) {
+            JULIA_PASS(FPM.addPass(AllocOptPass()));
+            FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
+            FPM.addPass(MemCpyOptPass());
+            FPM.addPass(SCCPPass());
+            FPM.addPass(BDCEPass());
+            FPM.addPass(InstCombinePass());
+            FPM.addPass(ADCEPass());
         }
-        FPM.addPass(LoopDistributePass());
+        if (O.getSpeedupLevel() >= 3) {
+            FPM.addPass(GVNPass());
+        }
+        if (O.getSpeedupLevel() >= 2) {
+            FPM.addPass(DSEPass());
+            invokePeepholeEPCallbacks(FPM, PB, O);
+            FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
+            JULIA_PASS(FPM.addPass(AllocOptPass()));
+            {
+                LoopPassManager LPM;
+                LPM.addPass(LICMPass(LICMOptions()));
+                LPM.addPass(JuliaLICMPass());
+                FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
+            }
+            FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
+            FPM.addPass(InstCombinePass());
+        } else if (O.getSpeedupLevel() >= 1)
+            FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
+
+        invokeScalarOptimizerCallbacks(FPM, PB, O);
     }
-    invokeScalarOptimizerCallbacks(FPM, PB, O);
     FPM.addPass(AfterScalarOptimizationMarkerPass());
 }
 
 static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     FPM.addPass(BeforeVectorizationMarkerPass());
-    //TODO look into loop vectorize options
-    FPM.addPass(InjectTLIMappings());
-    FPM.addPass(LoopVectorizePass());
-    FPM.addPass(LoopLoadEliminationPass());
-    FPM.addPass(InstCombinePass());
-    FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
-    FPM.addPass(SLPVectorizerPass());
-    invokeVectorizerCallbacks(FPM, PB, O);
-    FPM.addPass(VectorCombinePass());
-    FPM.addPass(ADCEPass());
-    //TODO add BDCEPass here?
-    // This unroll will unroll vectorized loops
-    // as well as loops that we tried but failed to vectorize
-    FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
+    if (options.enable_vector_pipeline) {
+        //TODO look into loop vectorize options
+        // Rerotate loops that might have been unrotated in the simplification
+        LoopPassManager LPM;
+        LPM.addPass(LoopRotatePass());
+        LPM.addPass(LoopDeletionPass());
+        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
+        FPM.addPass(LoopDistributePass());
+        FPM.addPass(InjectTLIMappings());
+        FPM.addPass(LoopVectorizePass());
+        FPM.addPass(LoopLoadEliminationPass());
+        FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
+        FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(LICMOptions()), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+        FPM.addPass(EarlyCSEPass());
+        FPM.addPass(CorrelatedValuePropagationPass());
+        FPM.addPass(InstCombinePass());
+        FPM.addPass(SLPVectorizerPass());
+        FPM.addPass(VectorCombinePass());
+        invokeVectorizerCallbacks(FPM, PB, O);
+        FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
+        FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
+        FPM.addPass(InstSimplifyPass());
+        FPM.addPass(AfterVectorizationMarkerPass());
+    }
     FPM.addPass(AfterVectorizationMarkerPass());
 }
 
@@ -499,30 +559,36 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *
         //TODO barrier pass?
         {
             FunctionPassManager FPM;
-            JULIA_PASS(FPM.addPass(LowerExcHandlersPass()));
             JULIA_PASS(FPM.addPass(GCInvariantVerifierPass(false)));
             MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
         }
         // Needed **before** LateLowerGCFrame on LLVM < 12
         // due to bug in `CreateAlignmentAssumption`.
+        assert(options.remove_ni);
         JULIA_PASS(MPM.addPass(RemoveNIPass()));
-        JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGCPass())));
-        JULIA_PASS(MPM.addPass(FinalLowerGCPass()));
-        if (O.getSpeedupLevel() >= 2) {
+        {
             FunctionPassManager FPM;
-            FPM.addPass(GVNPass());
-            FPM.addPass(SCCPPass());
-            FPM.addPass(DCEPass());
+            JULIA_PASS(FPM.addPass(LateLowerGCPass()));
+            JULIA_PASS(FPM.addPass(FinalLowerGCPass()));
+            JULIA_PASS(FPM.addPass(ExpandAtomicModifyPass())); // after LateLowerGCPass so that all IPO is valid
             MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
         }
         JULIA_PASS(MPM.addPass(LowerPTLSPass(options.dump_native)));
+        MPM.addPass(RemoveJuliaAddrspacesPass()); //TODO: Make this conditional on arches (GlobalISel doesn't like our addrsspaces)
         if (O.getSpeedupLevel() >= 1) {
             FunctionPassManager FPM;
+            if (O.getSpeedupLevel() >= 2) {
+                FPM.addPass(DSEPass());
+                FPM.addPass(GVNPass());
+                FPM.addPass(SCCPPass());
+                FPM.addPass(DCEPass());
+            }
             FPM.addPass(InstCombinePass());
             FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
             MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
         }
-    } else {
+    }
+    else if (!options.remove_ni) {
         JULIA_PASS(MPM.addPass(RemoveNIPass()));
     }
     MPM.addPass(AfterIntrinsicLoweringMarkerPass());
@@ -530,22 +596,23 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *
 
 static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     MPM.addPass(BeforeCleanupMarkerPass());
-    if (O.getSpeedupLevel() >= 2) {
-        FunctionPassManager FPM;
-        JULIA_PASS(FPM.addPass(CombineMulAddPass()));
-        FPM.addPass(DivRemPairsPass());
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-    }
-    invokeOptimizerLastCallbacks(MPM, PB, O);
-    MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
-    addSanitizerPasses(MPM, O);
-    {
-        FunctionPassManager FPM;
-        JULIA_PASS(FPM.addPass(DemoteFloat16Pass()));
+    if (options.cleanup) {
         if (O.getSpeedupLevel() >= 2) {
-            FPM.addPass(GVNPass());
+            FunctionPassManager FPM;
+            FPM.addPass(DivRemPairsPass());
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+        }
+        invokeOptimizerLastCallbacks(MPM, PB, O);
+        MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
+        addSanitizerPasses(MPM, O, options);
+        {
+            FunctionPassManager FPM;
+            JULIA_PASS(FPM.addPass(DemoteFloat16Pass()));
+            if (O.getSpeedupLevel() >= 2) {
+                FPM.addPass(GVNPass());
+            }
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
         }
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
     }
     MPM.addPass(AfterCleanupMarkerPass());
 }
@@ -553,9 +620,9 @@ static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimi
 static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     MPM.addPass(BeforeOptimizationMarkerPass());
     buildEarlySimplificationPipeline(MPM, PB, O, options);
-    MPM.addPass(AlwaysInlinerPass());
+    if (options.always_inline)
+        MPM.addPass(AlwaysInlinerPass());
     buildEarlyOptimizerPipeline(MPM, PB, O, options);
-    MPM.addPass(LowerSIMDLoopPass());
     {
         FunctionPassManager FPM;
         buildLoopOptimizerPipeline(FPM, PB, O, options);
@@ -563,7 +630,8 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL
         if (O.getSpeedupLevel() >= 2) {
             buildVectorPipeline(FPM, PB, O, options);
         }
-        FPM.addPass(WarnMissedTransformationsPass());
+        if (options.warn_missed_transformations)
+            FPM.addPass(WarnMissedTransformationsPass());
         MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
     }
     buildIntrinsicLoweringPipeline(MPM, PB, O, options);
@@ -571,36 +639,6 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL
     MPM.addPass(AfterOptimizationMarkerPass());
 }
 
-extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, int Speedup, int Size,
-    int lower_intrinsics, int dump_native, int external_use, int llvm_only) JL_NOTSAFEPOINT
-{
-    OptimizationLevel O;
-    switch (Size) {
-        case 1:
-            O = OptimizationLevel::Os;
-            break;
-        default:
-            O = OptimizationLevel::Oz;
-            break;
-        case 0:
-            switch (Speedup) {
-                case 0:
-                    O = OptimizationLevel::O0;
-                    break;
-                case 1:
-                    O = OptimizationLevel::O1;
-                    break;
-                case 2:
-                    O = OptimizationLevel::O2;
-                    break;
-                default:
-                    O = OptimizationLevel::O3;
-                    break;
-            }
-    }
-    buildPipeline(*reinterpret_cast<ModulePassManager*>(MPM), reinterpret_cast<PassBuilder*>(PB), O,
-                    OptimizationOptions{!!lower_intrinsics, !!dump_native, !!external_use, !!llvm_only});
-}
 
 #undef JULIA_PASS
 
@@ -608,29 +646,29 @@ namespace {
 
     void adjustPIC(PassInstrumentationCallbacks &PIC) JL_NOTSAFEPOINT {
 //Borrowed from LLVM PassBuilder.cpp:386
-#define MODULE_PASS(NAME, CLASS, CREATE_PASS)                                         \
+#define MODULE_PASS(NAME, CREATE_PASS)                                         \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)      \
+#define MODULE_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS)      \
 PIC.addClassToPassName(CLASS, NAME);
 #define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS)                                       \
+#define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)    \
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS)    \
 PIC.addClassToPassName(CLASS, NAME);
 #define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 #define LOOPNEST_PASS(NAME, CREATE_PASS)                                       \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define LOOP_PASS(NAME, CLASS, CREATE_PASS)                                           \
+#define LOOP_PASS(NAME, CREATE_PASS)                                           \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)        \
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS)        \
 PIC.addClassToPassName(CLASS, NAME);
 #define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define CGSCC_PASS(NAME, CLASS, CREATE_PASS)                                          \
+#define CGSCC_PASS(NAME, CREATE_PASS)                                          \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)       \
+#define CGSCC_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS)       \
 PIC.addClassToPassName(CLASS, NAME);
 #define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
@@ -673,13 +711,6 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
         PIC.addClassToPassName("AfterOptimizationMarkerPass", "AfterOptimization");
     }
 
-    auto createPIC(StandardInstrumentations &SI) JL_NOTSAFEPOINT {
-        auto PIC = std::make_unique<PassInstrumentationCallbacks>();
-        adjustPIC(*PIC);
-        SI.registerCallbacks(*PIC);
-        return PIC;
-    }
-
     FunctionAnalysisManager createFAM(OptimizationLevel O, TargetMachine &TM) JL_NOTSAFEPOINT {
 
         FunctionAnalysisManager FAM;
@@ -708,9 +739,8 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 }
 
 NewPM::NewPM(std::unique_ptr<TargetMachine> TM, OptimizationLevel O, OptimizationOptions options) :
-    TM(std::move(TM)), SI(false), PIC(createPIC(SI)),
-    PB(this->TM.get(), PipelineTuningOptions(), None, PIC.get()),
-    MPM(createMPM(PB, O, options)), O(O) {}
+    TM(std::move(TM)), O(O), options(options), TimePasses() {}
+
 
 NewPM::~NewPM() = default;
 
@@ -736,14 +766,30 @@ void NewPM::run(Module &M) {
     //We must recreate the analysis managers every time
     //so that analyses from previous runs of the pass manager
     //do not hang around for the next run
-    AnalysisManagers AM{*TM, PB, O};
+    StandardInstrumentations SI(M.getContext(),false);
+    PassInstrumentationCallbacks PIC;
+    adjustPIC(PIC);
+    TimePasses.registerCallbacks(PIC);
+    FunctionAnalysisManager FAM(createFAM(O, *TM.get()));
+    LoopAnalysisManager LAM;
+    CGSCCAnalysisManager CGAM;
+    ModuleAnalysisManager MAM;
+    SI.registerCallbacks(PIC, &MAM);
+    SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this
+    PassBuilder PB(TM.get(), PipelineTuningOptions(), None, &PIC);
+    PB.registerLoopAnalyses(LAM);
+    PB.registerFunctionAnalyses(FAM);
+    PB.registerCGSCCAnalyses(CGAM);
+    PB.registerModuleAnalyses(MAM);
+    PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+    ModulePassManager MPM = createMPM(PB, O, options);
 #ifndef __clang_gcanalyzer__ /* the analyzer cannot prove we have not added instrumentation callbacks with safepoints */
-    MPM.run(M, AM.MAM);
+    MPM.run(M, MAM);
 #endif
 }
 
 void NewPM::printTimers() {
-    SI.getTimePasses().print();
+    TimePasses.print();
 }
 
 OptimizationLevel getOptLevel(int optlevel) {
@@ -761,7 +807,7 @@ OptimizationLevel getOptLevel(int optlevel) {
 }
 
 //This part is also basically stolen from LLVM's PassBuilder.cpp file
-static llvm::Optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJuliaPipelineOptions(StringRef name) {
+static std::optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJuliaPipelineOptions(StringRef name) {
     if (name.consume_front("julia")) {
         auto O = OptimizationLevel::O2;
         auto options = OptimizationOptions::defaults();
@@ -773,7 +819,19 @@ static llvm::Optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJu
             OPTION(lower_intrinsics),
             OPTION(dump_native),
             OPTION(external_use),
-            OPTION(llvm_only)
+            OPTION(llvm_only),
+            OPTION(always_inline),
+            OPTION(enable_early_simplifications),
+            OPTION(enable_early_optimizations),
+            OPTION(enable_scalar_optimizations),
+            OPTION(enable_loop_optimizations),
+            OPTION(enable_vector_pipeline),
+            OPTION(remove_ni),
+            OPTION(cleanup),
+            OPTION(warn_missed_transformations),
+            OPTION(sanitize_memory),
+            OPTION(sanitize_thread),
+            OPTION(sanitize_address),
 #undef OPTION
         };
         while (!name.empty()) {
@@ -811,7 +869,37 @@ static llvm::Optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJu
         }
         return {{O, options}};
     }
-    return {};
+    return None;
+}
+
+bool verifyLLVMIR(const Module &M) JL_NOTSAFEPOINT {
+    JL_TIMING(VERIFY_IR, VERIFY_Module);
+    if (verifyModule(M, &errs())) {
+        errs() << "Failed to verify module '" << M.getModuleIdentifier() << "', dumping entire module!\n\n";
+        errs() << M << "\n";
+        return true;
+    }
+    return false;
+}
+
+bool verifyLLVMIR(const Function &F) JL_NOTSAFEPOINT {
+    JL_TIMING(VERIFY_IR, VERIFY_Function);
+    if (verifyFunction(F, &errs())) {
+        errs() << "Failed to verify function '" << F.getName() << "', dumping entire module!\n\n";
+        errs() << *F.getParent() << "\n";
+        return true;
+    }
+    return false;
+}
+
+bool verifyLLVMIR(const Loop &L) JL_NOTSAFEPOINT {
+    JL_TIMING(VERIFY_IR, VERIFY_Loop);
+    if (verifyFunction(*L.getHeader()->getParent(), &errs())) {
+        errs() << "Failed to verify loop '" << L << "', dumping entire module!\n\n";
+        errs() << *L.getHeader()->getModule() << "\n";
+        return true;
+    }
+    return false;
 }
 
 // new pass manager plugin
@@ -827,7 +915,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT {
     PB.registerPipelineParsingCallback(
         [](StringRef Name, FunctionPassManager &PM,
            ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
+#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
 #include "llvm-julia-passes.inc"
 #undef FUNCTION_PASS
             if (Name.consume_front("GCInvariantVerifier")) {
@@ -849,7 +937,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT {
     PB.registerPipelineParsingCallback(
         [](StringRef Name, ModulePassManager &PM,
            ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-#define MODULE_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
+#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
 #include "llvm-julia-passes.inc"
 #undef MODULE_PASS
             if (Name.consume_front("LowerPTLSPass")) {
@@ -892,7 +980,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT {
     PB.registerPipelineParsingCallback(
         [](StringRef Name, LoopPassManager &PM,
            ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-#define LOOP_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
+#define LOOP_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
 #include "llvm-julia-passes.inc"
 #undef LOOP_PASS
             return false;
@@ -908,3 +996,9 @@ extern "C" JL_DLLEXPORT_CODEGEN
 ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() JL_NOTSAFEPOINT {
       return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks};
 }
+
+void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
+{
+    PM->add(new TargetLibraryInfoWrapperPass(triple));
+    PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
+}
diff --git a/src/precompile.c b/src/precompile.c
index a7174492cf0e1..16cff13c60316 100644
--- a/src/precompile.c
+++ b/src/precompile.c
@@ -35,27 +35,50 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
         //   uint64: length of src text
         //   char*: src text
         // At the end we write int32(0) as a terminal sentinel.
-        size_t len = jl_array_len(udeps);
+        size_t len = jl_array_nrows(udeps);
         ios_t srctext;
+        jl_value_t *replace_depot_func = NULL;
+        jl_value_t *normalize_depots_func = NULL;
+        jl_value_t *deptuple = NULL;
+        jl_value_t *depots = NULL;
+        jl_task_t *ct = jl_current_task;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        JL_GC_PUSH4(&deptuple, &depots, &replace_depot_func, &normalize_depots_func);
+        replace_depot_func = jl_eval_global_var(jl_base_module, jl_symbol("replace_depot_path"), jl_current_task->world_age);
+        normalize_depots_func = jl_eval_global_var(jl_base_module, jl_symbol("normalize_depots_for_relocation"), jl_current_task->world_age);
+        depots = jl_apply(&normalize_depots_func, 1);
+        jl_datatype_t *deptuple_p[5] = {jl_module_type, jl_string_type, jl_uint64_type, jl_uint32_type, jl_float64_type};
+        jl_value_t *jl_deptuple_type = jl_apply_tuple_type_v((jl_value_t**)deptuple_p, 5);
+        JL_GC_PROMISE_ROOTED(jl_deptuple_type);
+#define jl_is_deptuple(v) (jl_typeis((v), jl_deptuple_type))
         for (size_t i = 0; i < len; i++) {
-            jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
-            jl_value_t *depmod = jl_fieldref(deptuple, 0);  // module
+            deptuple = jl_array_ptr_ref(udeps, i);
+            jl_value_t *depmod = jl_fieldref_noalloc(deptuple, 0);  // module
             // Dependencies declared with `include_dependency` are excluded
             // because these may not be Julia code (and could be huge)
+            JL_TYPECHK(write_srctext, deptuple, deptuple);
             if (depmod != (jl_value_t*)jl_main_module) {
-                jl_value_t *dep = jl_fieldref(deptuple, 1);  // file abspath
-                const char *depstr = jl_string_data(dep);
-                if (!depstr[0])
+                jl_value_t *abspath = jl_fieldref_noalloc(deptuple, 1);  // file abspath
+                const char *abspathstr = jl_string_data(abspath);
+                if (!abspathstr[0])
                     continue;
-                ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0);
+                ios_t *srctp = ios_file(&srctext, abspathstr, 1, 0, 0, 0);
                 if (!srctp) {
                     jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n",
-                            jl_string_data(dep));
+                              abspathstr);
                     continue;
                 }
-                size_t slen = jl_string_len(dep);
+
+                jl_value_t *replace_depot_args[3];
+                replace_depot_args[0] = replace_depot_func;
+                replace_depot_args[1] = abspath;
+                replace_depot_args[2] = depots;
+                jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 3);
+
+                size_t slen = jl_string_len(depalias);
                 write_int32(f, slen);
-                ios_write(f, depstr, slen);
+                ios_write(f, jl_string_data(depalias), slen);
                 posfile = ios_pos(f);
                 write_uint64(f, 0);   // placeholder for length of this file in bytes
                 uint64_t filelen = (uint64_t) ios_copyall(f, &srctext);
@@ -65,6 +88,9 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
                 ios_seek_end(f);
             }
         }
+        ct->world_age = last_age;
+#undef jl_is_deptuple
+        JL_GC_POP();
     }
     write_int32(f, 0); // mark the end of the source text
 }
@@ -75,38 +101,31 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
         return;
     }
 
-    jl_task_wait_empty();
+    jl_task_wait_empty(); // wait for most work to finish (except possibly finalizers)
+    jl_gc_collect(JL_GC_FULL);
+    jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers
+    jl_task_t *ct = jl_current_task;
+    jl_gc_enable_finalizers(ct, 0); // now disable finalizers, as they could schedule more work or make other unexpected changes to reachability
+    jl_task_wait_empty(); // then make sure we are the only thread alive that could be running user code past here
 
-    if (!jl_module_init_order) {
+    jl_array_t *worklist = jl_module_init_order;
+    if (!worklist) {
         jl_printf(JL_STDERR, "WARNING: --output requested, but no modules defined during run\n");
         return;
     }
 
-    jl_array_t *worklist = jl_module_init_order;
     jl_array_t *udeps = NULL;
     JL_GC_PUSH2(&worklist, &udeps);
     jl_module_init_order = jl_alloc_vec_any(0);
-    int i, l = jl_array_len(worklist);
+    int i, l = jl_array_nrows(worklist);
     for (i = 0; i < l; i++) {
-        jl_value_t *m = jl_ptrarrayref(worklist, i);
+        jl_value_t *m = jl_array_ptr_ref(worklist, i);
         jl_value_t *f = jl_get_global((jl_module_t*)m, jl_symbol("__init__"));
         if (f) {
             jl_array_ptr_1d_push(jl_module_init_order, m);
-            int setting = jl_get_module_compile((jl_module_t*)m);
-            if (setting != JL_OPTIONS_COMPILE_OFF &&
-                setting != JL_OPTIONS_COMPILE_MIN) {
-                // TODO: this would be better handled if moved entirely to jl_precompile
-                // since it's a slightly duplication of effort
-                jl_value_t *tt = jl_is_type(f) ? (jl_value_t*)jl_wrap_Type(f) : jl_typeof(f);
-                JL_GC_PUSH1(&tt);
-                tt = jl_apply_tuple_type_v(&tt, 1);
-                jl_compile_hint((jl_tupletype_t*)tt);
-                JL_GC_POP();
-            }
         }
     }
 
-    assert(jl_precompile_toplevel_module == NULL);
     void *native_code = NULL;
 
     bool_t emit_native = jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm;
@@ -120,7 +139,7 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
     int64_t srctextpos = 0 ;
     jl_create_system_image(emit_native ? &native_code : NULL,
                            jl_options.incremental ? worklist : NULL,
-                           emit_split, &s, &z, &udeps, &srctextpos);
+                           emit_split, &s, &z, &udeps, &srctextpos, jl_module_init_order);
 
     if (!emit_split)
         z = s;
@@ -147,7 +166,7 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
                         jl_options.outputunoptbc,
                         jl_options.outputo,
                         jl_options.outputasm,
-                        z, targets);
+                        z, targets, NULL);
         jl_postoutput_hook();
     }
 
@@ -165,7 +184,12 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
             jl_printf(JL_STDERR, "\n  ** incremental compilation may be broken for this module **\n\n");
         }
     }
+    if (jl_options.trim) {
+        exit(0); // Some finalizers need to run and we've blown up the bindings table
+        // TODO: Is this still needed
+    }
     JL_GC_POP();
+    jl_gc_enable_finalizers(ct, 1);
 }
 
 #ifdef __cplusplus
diff --git a/src/precompile_utils.c b/src/precompile_utils.c
index 055ec4b3330f1..d75f8e5dd88e7 100644
--- a/src/precompile_utils.c
+++ b/src/precompile_utils.c
@@ -1,321 +1,45 @@
-// f{<:Union{...}}(...) is a common pattern
-// and expanding the Union may give a leaf function
-static void _compile_all_tvar_union(jl_value_t *methsig)
-{
-    int tvarslen = jl_subtype_env_size(methsig);
-    jl_value_t *sigbody = methsig;
-    jl_value_t **roots;
-    JL_GC_PUSHARGS(roots, 1 + 2 * tvarslen);
-    jl_value_t **env = roots + 1;
-    int *idx = (int*)alloca(sizeof(int) * tvarslen);
-    int i;
-    for (i = 0; i < tvarslen; i++) {
-        assert(jl_is_unionall(sigbody));
-        idx[i] = 0;
-        env[2 * i] = (jl_value_t*)((jl_unionall_t*)sigbody)->var;
-        env[2 * i + 1] = jl_bottom_type; // initialize the list with Union{}, since T<:Union{} is always a valid option
-        sigbody = ((jl_unionall_t*)sigbody)->body;
-    }
-
-    for (i = 0; i < tvarslen; /* incremented by inner loop */) {
-        jl_value_t **sig = &roots[0];
-        JL_TRY {
-            // TODO: wrap in UnionAll for each tvar in env[2*i + 1] ?
-            // currently doesn't matter much, since jl_compile_hint doesn't work on abstract types
-            *sig = (jl_value_t*)jl_instantiate_type_with(sigbody, env, tvarslen);
-        }
-        JL_CATCH {
-            goto getnext; // sigh, we found an invalid type signature. should we warn the user?
-        }
-        if (!jl_has_concrete_subtype(*sig))
-            goto getnext; // signature wouldn't be callable / is invalid -- skip it
-        if (jl_is_concrete_type(*sig)) {
-            if (jl_compile_hint((jl_tupletype_t *)*sig))
-                goto getnext; // success
-        }
-
-    getnext:
-        for (i = 0; i < tvarslen; i++) {
-            jl_tvar_t *tv = (jl_tvar_t*)env[2 * i];
-            if (jl_is_uniontype(tv->ub)) {
-                size_t l = jl_count_union_components(tv->ub);
-                size_t j = idx[i];
-                if (j == l) {
-                    env[2 * i + 1] = jl_bottom_type;
-                    idx[i] = 0;
-                }
-                else {
-                    jl_value_t *ty = jl_nth_union_component(tv->ub, j);
-                    if (!jl_is_concrete_type(ty))
-                        ty = (jl_value_t*)jl_new_typevar(tv->name, tv->lb, ty);
-                    env[2 * i + 1] = ty;
-                    idx[i] = j + 1;
-                    break;
-                }
-            }
-            else {
-                env[2 * i + 1] = (jl_value_t*)tv;
-            }
-        }
-    }
-    JL_GC_POP();
-}
-
-// f(::Union{...}, ...) is a common pattern
-// and expanding the Union may give a leaf function
-static void _compile_all_union(jl_value_t *sig)
-{
-    jl_tupletype_t *sigbody = (jl_tupletype_t*)jl_unwrap_unionall(sig);
-    size_t count_unions = 0;
-    size_t i, l = jl_svec_len(sigbody->parameters);
-    jl_svec_t *p = NULL;
-    jl_value_t *methsig = NULL;
-
-    for (i = 0; i < l; i++) {
-        jl_value_t *ty = jl_svecref(sigbody->parameters, i);
-        if (jl_is_uniontype(ty))
-            ++count_unions;
-        else if (ty == jl_bottom_type)
-            return; // why does this method exist?
-        else if (jl_is_datatype(ty) && !jl_has_free_typevars(ty) &&
-                 ((!jl_is_kind(ty) && ((jl_datatype_t*)ty)->isconcretetype) ||
-                  ((jl_datatype_t*)ty)->name == jl_type_typename))
-            return; // no amount of union splitting will make this a leaftype signature
-    }
-
-    if (count_unions == 0 || count_unions >= 6) {
-        _compile_all_tvar_union(sig);
-        return;
-    }
-
-    int *idx = (int*)alloca(sizeof(int) * count_unions);
-    for (i = 0; i < count_unions; i++) {
-        idx[i] = 0;
-    }
-
-    JL_GC_PUSH2(&p, &methsig);
-    int idx_ctr = 0, incr = 0;
-    while (!incr) {
-        p = jl_alloc_svec_uninit(l);
-        for (i = 0, idx_ctr = 0, incr = 1; i < l; i++) {
-            jl_value_t *ty = jl_svecref(sigbody->parameters, i);
-            if (jl_is_uniontype(ty)) {
-                assert(idx_ctr < count_unions);
-                size_t l = jl_count_union_components(ty);
-                size_t j = idx[idx_ctr];
-                jl_svecset(p, i, jl_nth_union_component(ty, j));
-                ++j;
-                if (incr) {
-                    if (j == l) {
-                        idx[idx_ctr] = 0;
-                    }
-                    else {
-                        idx[idx_ctr] = j;
-                        incr = 0;
-                    }
-                }
-                ++idx_ctr;
-            }
-            else {
-                jl_svecset(p, i, ty);
-            }
-        }
-        methsig = jl_apply_tuple_type(p);
-        methsig = jl_rewrap_unionall(methsig, sig);
-        _compile_all_tvar_union(methsig);
-    }
-
-    JL_GC_POP();
-}
-
-static int compile_all_collect__(jl_typemap_entry_t *ml, void *env)
-{
-    jl_array_t *allmeths = (jl_array_t*)env;
-    jl_method_t *m = ml->func.method;
-    if (m->external_mt)
-        return 1;
-    if (m->source) {
-        // method has a non-generated definition; can be compiled generically
-        jl_array_ptr_1d_push(allmeths, (jl_value_t*)m);
-    }
-    return 1;
-}
-
-static int compile_all_collect_(jl_methtable_t *mt, void *env)
-{
-    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), compile_all_collect__, env);
-    return 1;
-}
-
-static void jl_compile_all_defs(jl_array_t *mis)
-{
-    jl_array_t *allmeths = jl_alloc_vec_any(0);
-    JL_GC_PUSH1(&allmeths);
-
-    jl_foreach_reachable_mtable(compile_all_collect_, allmeths);
-
-    size_t i, l = jl_array_len(allmeths);
-    for (i = 0; i < l; i++) {
-        jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(allmeths, i);
-        if (jl_is_datatype(m->sig) && jl_isa_compileable_sig((jl_tupletype_t*)m->sig, jl_emptysvec, m)) {
-            // method has a single compilable specialization, e.g. its definition
-            // signature is concrete. in this case we can just hint it.
-            jl_compile_hint((jl_tupletype_t*)m->sig);
-        }
-        else {
-            // first try to create leaf signatures from the signature declaration and compile those
-            _compile_all_union(m->sig);
-
-            // finally, compile a fully generic fallback that can work for all arguments
-            jl_method_instance_t *unspec = jl_get_unspecialized(m);
-            if (unspec)
-                jl_array_ptr_1d_push(mis, (jl_value_t*)unspec);
-        }
-    }
-
-    JL_GC_POP();
-}
-
-static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closure)
-{
-    assert(jl_is_method_instance(mi));
-    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
-    while (codeinst) {
-        int do_compile = 0;
-        if (jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return) {
-            jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
-            if (inferred &&
-                inferred != jl_nothing &&
-                jl_ir_flag_inferred(inferred) &&
-                (jl_ir_inlining_cost(inferred) == UINT16_MAX)) {
-                do_compile = 1;
-            }
-            else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) {
-                do_compile = 1;
-            }
-        }
-        if (do_compile) {
-            jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
-            return 1;
-        }
-        codeinst = jl_atomic_load_relaxed(&codeinst->next);
-    }
-    return 1;
-}
-
-static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *closure)
-{
-    jl_method_t *m = def->func.method;
-    if (m->external_mt)
-        return 1;
-    if ((m->name == jl_symbol("__init__") || m->ccallable) && jl_is_dispatch_tupletype(m->sig)) {
-        // ensure `__init__()` and @ccallables get strongly-hinted, specialized, and compiled
-        jl_method_instance_t *mi = jl_specializations_get_linfo(m, m->sig, jl_emptysvec);
-        jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
-    }
-    else {
-        jl_value_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
-        if (!jl_is_svec(specializations)) {
-            precompile_enq_specialization_((jl_method_instance_t*)specializations, closure);
-        }
-        else {
-            size_t i, l = jl_svec_len(specializations);
-            for (i = 0; i < l; i++) {
-                jl_value_t *mi = jl_svecref(specializations, i);
-                if (mi != jl_nothing)
-                    precompile_enq_specialization_((jl_method_instance_t*)mi, closure);
-            }
-        }
-    }
-    if (m->ccallable)
-        jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)m->ccallable);
-    return 1;
-}
-
-static int precompile_enq_all_specializations_(jl_methtable_t *mt, void *env)
-{
-    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), precompile_enq_all_specializations__, env);
-}
-
-static void *jl_precompile_(jl_array_t *m, int external_linkage)
-{
-    jl_array_t *m2 = NULL;
-    jl_method_instance_t *mi = NULL;
-    JL_GC_PUSH2(&m2, &mi);
-    m2 = jl_alloc_vec_any(0);
-    for (size_t i = 0; i < jl_array_len(m); i++) {
-        jl_value_t *item = jl_array_ptr_ref(m, i);
-        if (jl_is_method_instance(item)) {
-            mi = (jl_method_instance_t*)item;
-            size_t min_world = 0;
-            size_t max_world = ~(size_t)0;
-            if (mi != jl_atomic_load_relaxed(&mi->def.method->unspecialized) && !jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->sparam_vals, mi->def.method))
-                mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), &min_world, &max_world, 0);
-            if (mi)
-                jl_array_ptr_1d_push(m2, (jl_value_t*)mi);
-        }
-        else {
-            assert(jl_is_simplevector(item));
-            assert(jl_svec_len(item) == 2);
-            jl_array_ptr_1d_push(m2, item);
-        }
-    }
-    void *native_code = jl_create_native(m2, NULL, NULL, 0, 1, external_linkage,
-                                         jl_atomic_load_acquire(&jl_world_counter));
-    JL_GC_POP();
-    return native_code;
-}
-
-static void *jl_precompile(int all)
-{
-    // array of MethodInstances and ccallable aliases to include in the output
-    jl_array_t *m = jl_alloc_vec_any(0);
-    JL_GC_PUSH1(&m);
-    if (all)
-        jl_compile_all_defs(m);
-    jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m);
-    void *native_code = jl_precompile_(m, 0);
-    JL_GC_POP();
-    return native_code;
-}
-
-static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_methods, jl_array_t *new_specializations)
-{
-    if (!worklist)
-        return NULL;
-    // this "found" array will contain function
-    // type signatures that were inferred but haven't been compiled
-    jl_array_t *m = jl_alloc_vec_any(0);
-    JL_GC_PUSH1(&m);
-    size_t i, n = jl_array_len(worklist);
-    for (i = 0; i < n; i++) {
-        jl_module_t *mod = (jl_module_t*)jl_array_ptr_ref(worklist, i);
-        assert(jl_is_module(mod));
-        foreach_mtable_in_module(mod, precompile_enq_all_specializations_, m);
-    }
-    n = jl_array_len(extext_methods);
-    for (i = 0; i < n; i++) {
-        jl_method_t *method = (jl_method_t*)jl_array_ptr_ref(extext_methods, i);
-        assert(jl_is_method(method));
-        jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
-        if (!jl_is_svec(specializations)) {
-            precompile_enq_specialization_((jl_method_instance_t*)specializations, m);
-        }
-        else {
-            size_t j, l = jl_svec_len(specializations);
-            for (j = 0; j < l; j++) {
-                jl_value_t *mi = jl_svecref(specializations, j);
-                if (mi != jl_nothing)
-                    precompile_enq_specialization_((jl_method_instance_t*)mi, m);
-            }
-        }
-    }
-    n = jl_array_len(new_specializations);
-    for (i = 0; i < n; i++) {
-        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i);
-        precompile_enq_specialization_(ci->def, m);
-    }
-    void *native_code = jl_precompile_(m, 1);
-    JL_GC_POP();
-    return native_code;
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+static int suppress_precompile = 0;
+JL_DLLEXPORT void jl_suppress_precompile(int suppress)
+{
+    suppress_precompile = suppress;
+}
+
+
+static void jl_rebuild_methtables(arraylist_t *MIs, htable_t *mtables) JL_GC_DISABLED
+{
+    // Rebuild MethodTable to contain only those methods for which we compiled code.
+    // This can have significant soundness problems if there previously existed
+    // any ambiguous methods, but it would probably be pretty hard to do this
+    // fully correctly (with the necessary inserted guard entries).
+    htable_t ms;
+    htable_new(&ms, 0);
+    for (size_t i = 0; i < MIs->len; i++) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)MIs->items[i];
+        jl_method_t *m = mi->def.method;
+        // Check if the method is already in the new table, if not then insert it there
+        void **inserted = ptrhash_bp(&ms, m);
+        if (*inserted != HT_NOTFOUND)
+            continue;
+        *inserted = (void*)m;
+        jl_methtable_t *old_mt = jl_method_get_table(m);
+        if ((jl_value_t *)old_mt == jl_nothing)
+            continue;
+        if (!ptrhash_has(mtables, old_mt))
+            ptrhash_put(mtables, old_mt, jl_new_method_table(old_mt->name, old_mt->module));
+        jl_methtable_t *mt = (jl_methtable_t*)ptrhash_get(mtables, old_mt);
+        //TODO: should this be a function like unsafe_insert_method, since all that is wanted is the jl_typemap_insert on a copy of the existing entry
+        size_t min_world = jl_atomic_load_relaxed(&m->primary_world);
+        size_t max_world = ~(size_t)0;
+        int dispatch_status = jl_atomic_load_relaxed(&m->dispatch_status);
+        jl_atomic_store_relaxed(&m->primary_world, ~(size_t)0);
+        jl_atomic_store_relaxed(&m->dispatch_status, 0);
+        jl_typemap_entry_t *newentry = jl_method_table_add(mt, m, NULL);
+        jl_atomic_store_relaxed(&m->primary_world, min_world);
+        jl_atomic_store_relaxed(&m->dispatch_status, dispatch_status);
+        jl_atomic_store_relaxed(&newentry->min_world, min_world);
+        jl_atomic_store_relaxed(&newentry->max_world, max_world); // short-circuit jl_method_table_insert
+    }
+    htable_free(&ms);
 }
diff --git a/src/processor.cpp b/src/processor.cpp
index 24a434af91ad3..1a25171082d82 100644
--- a/src/processor.cpp
+++ b/src/processor.cpp
@@ -4,6 +4,10 @@
 
 #include "llvm-version.h"
 #include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/StringMap.h>
+#include <llvm/TargetParser/Host.h>
 #include <llvm/Support/MathExtras.h>
 #include <llvm/Support/raw_ostream.h>
 
@@ -12,7 +16,6 @@
 #include "julia.h"
 #include "julia_internal.h"
 
-#include <map>
 #include <algorithm>
 
 #include "julia_assert.h"
@@ -21,8 +24,6 @@
 #include <dlfcn.h>
 #endif
 
-#include <iostream>
-
 // CPU target string is a list of strings separated by `;` each string starts with a CPU
 // or architecture name and followed by an optional list of features separated by `,`.
 // A "generic" or empty CPU name means the basic required feature set of the target ISA
@@ -107,13 +108,13 @@ static inline bool test_nbit(const T1 &bits, T2 _bitidx)
 }
 
 template<typename T>
-static inline void unset_bits(T &bits)
+static inline void unset_bits(T &bits) JL_NOTSAFEPOINT
 {
     (void)bits;
 }
 
 template<typename T, typename T1, typename... Rest>
-static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest)
+static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest) JL_NOTSAFEPOINT
 {
     auto bitidx = static_cast<uint32_t>(_bitidx);
     auto u32idx = bitidx / 32;
@@ -142,7 +143,7 @@ static inline void set_bit(T &bits, T1 _bitidx, bool val)
 template<size_t n>
 struct FeatureList {
     uint32_t eles[n];
-    uint32_t &operator[](size_t pos)
+    uint32_t &operator[](size_t pos) JL_NOTSAFEPOINT
     {
         return eles[pos];
     }
@@ -154,7 +155,7 @@ struct FeatureList {
     {
         int cnt = 0;
         for (size_t i = 0; i < n; i++)
-            cnt += llvm::countPopulation(eles[i]);
+            cnt += llvm::popcount(eles[i]);
         return cnt;
     }
     inline bool empty() const
@@ -255,7 +256,7 @@ static inline void mask_features(const FeatureList<n> masks, uint32_t *features)
 }
 
 // Turn feature list to a string the LLVM accept
-static inline std::string join_feature_strs(const std::vector<std::string> &strs)
+static inline std::string join_feature_strs(const llvm::ArrayRef<std::string> &strs)
 {
     size_t nstr = strs.size();
     if (!nstr)
@@ -275,7 +276,7 @@ static inline void append_ext_features(std::string &features, const std::string
     features.append(ext_features);
 }
 
-static inline void append_ext_features(std::vector<std::string> &features,
+static inline void append_ext_features(llvm::SmallVectorImpl<std::string> &features,
                                        const std::string &ext_features)
 {
     if (ext_features.empty())
@@ -297,12 +298,6 @@ static inline void append_ext_features(std::vector<std::string> &features,
  * Target specific type/constant definitions, always enable.
  */
 
-struct FeatureName {
-    const char *name;
-    uint32_t bit; // bit index into a `uint32_t` array;
-    uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
-};
-
 template<typename CPU, size_t n>
 struct CPUSpec {
     const char *name;
@@ -391,7 +386,7 @@ JL_UNUSED static uint32_t find_feature_bit(const FeatureName *features, size_t n
             return feature.bit;
         }
     }
-    return (uint32_t)-1;
+    return UINT32_MAX;
 }
 
 // This is how we save the target identification.
@@ -399,13 +394,11 @@ JL_UNUSED static uint32_t find_feature_bit(const FeatureName *features, size_t n
 // 1. CPU ID is less stable (they are not bound to hardware/OS API)
 // 2. We need to support CPU names that are not recognized by us and therefore doesn't have an ID
 // 3. CPU name is trivial to parse
-static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
-                                                         uint32_t nfeature,
-                                                         const uint32_t *features_en,
-                                                         const uint32_t *features_dis,
-                                                         llvm::StringRef ext_features)
+static inline llvm::SmallVector<uint8_t, 0>
+serialize_target_data(llvm::StringRef name, uint32_t nfeature, const uint32_t *features_en,
+                      const uint32_t *features_dis, llvm::StringRef ext_features)
 {
-    std::vector<uint8_t> res;
+    llvm::SmallVector<uint8_t, 0> res;
     auto add_data = [&] (const void *data, size_t sz) {
         if (sz == 0)
             return;
@@ -426,10 +419,9 @@ static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
 }
 
 template<size_t n>
-static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
-                                                         const FeatureList<n> &features_en,
-                                                         const FeatureList<n> &features_dis,
-                                                         llvm::StringRef ext_features)
+static inline llvm::SmallVector<uint8_t, 0>
+serialize_target_data(llvm::StringRef name, const FeatureList<n> &features_en,
+                      const FeatureList<n> &features_dis, llvm::StringRef ext_features)
 {
     return serialize_target_data(name, n, &features_en[0], &features_dis[0], ext_features);
 }
@@ -448,7 +440,7 @@ struct TargetData {
 // In addition to the serialized data, the first `uint32_t` gives the number of targets saved
 // and each target has a `uint32_t` flag before the serialized target data.
 template<size_t n>
-static inline std::vector<TargetData<n>> deserialize_target_data(const uint8_t *data)
+static inline llvm::SmallVector<TargetData<n>, 0> deserialize_target_data(const uint8_t *data)
 {
     auto load_data = [&] (void *dest, size_t sz) {
         memcpy(dest, data, sz);
@@ -463,7 +455,7 @@ static inline std::vector<TargetData<n>> deserialize_target_data(const uint8_t *
     };
     uint32_t ntarget;
     load_data(&ntarget, 4);
-    std::vector<TargetData<n>> res(ntarget);
+    llvm::SmallVector<TargetData<n>, 0> res(ntarget);
     for (uint32_t i = 0; i < ntarget; i++) {
         auto &target = res[i];
         load_data(&target.en.flags, 4);
@@ -505,12 +497,27 @@ static inline int get_clone_base(const char *start, const char *end)
 // Parse cmdline string. This handles `clone_all` and `base` special features.
 // Other feature names will be passed to `feature_cb` for target dependent parsing.
 template<size_t n, typename F>
-static inline std::vector<TargetData<n>>
+static inline llvm::SmallVector<TargetData<n>, 0>
 parse_cmdline(const char *option, F &&feature_cb)
 {
     if (!option)
-        option = "native";
-    std::vector<TargetData<n>> res;
+        abort();
+
+    // Preprocess the option string to expand "sysimage" keyword
+    std::string processed_option;
+    if (strncmp(option, "sysimage", 8) == 0 && (option[8] == '\0' || option[8] == ';')) {
+        // Replace "sysimage" with the actual sysimage CPU target
+        jl_value_t *target_str = jl_get_sysimage_cpu_target();
+        if (target_str != nullptr) {
+            processed_option = std::string(jl_string_data(target_str), jl_string_len(target_str));
+            if (option[8] == ';') {
+                processed_option += option + 8;  // append the rest after "sysimage"
+            }
+            option = processed_option.c_str();
+        }
+    }
+
+    llvm::SmallVector<TargetData<n>, 0> res;
     TargetData<n> arg{};
     auto reset_arg = [&] {
         res.push_back(arg);
@@ -617,73 +624,80 @@ parse_cmdline(const char *option, F &&feature_cb)
 
 // Cached version of command line parsing
 template<size_t n, typename F>
-static inline std::vector<TargetData<n>> &get_cmdline_targets(F &&feature_cb)
+static inline llvm::SmallVector<TargetData<n>, 0> &get_cmdline_targets(const char *cpu_target, F &&feature_cb)
 {
-    static std::vector<TargetData<n>> targets =
-        parse_cmdline<n>(jl_options.cpu_target, std::forward<F>(feature_cb));
+    static llvm::SmallVector<TargetData<n>, 0> targets =
+        parse_cmdline<n>(cpu_target, std::forward<F>(feature_cb));
     return targets;
 }
 
 // Load sysimg, use the `callback` for dispatch and perform all relocations
 // for the selected target.
 template<typename F>
-static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
+static inline jl_image_t parse_sysimg(jl_image_buf_t image, F &&callback, void *ctx)
 {
     JL_TIMING(LOAD_IMAGE, LOAD_Processor);
     jl_image_t res{};
 
-    const jl_image_pointers_t *pointers;
-    jl_dlsym(hdl, "jl_image_pointers", (void**)&pointers, 1);
+    if (image.kind != JL_IMAGE_KIND_SO)
+        return res;
 
+    const jl_image_pointers_t *pointers = (const jl_image_pointers_t *)image.pointers;
     const void *ids = pointers->target_data;
-    uint32_t target_idx = callback(ids);
+
+    // Set the sysimage CPU target from the stored string
+    if (pointers->cpu_target_string) {
+        jl_set_sysimage_cpu_target(pointers->cpu_target_string);
+    }
+
+    jl_value_t* rejection_reason = nullptr;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t target_idx = callback(ctx, ids, &rejection_reason);
+    if (target_idx == UINT32_MAX) {
+        jl_error(jl_string_ptr(rejection_reason));
+    }
+    JL_GC_POP();
 
     if (pointers->header->version != 1) {
         jl_error("Image file is not compatible with this version of Julia");
     }
 
-    std::vector<const char *> fvars(pointers->header->nfvars);
-    std::vector<const char *> gvars(pointers->header->ngvars);
+    llvm::SmallVector<void*, 0> fvars(pointers->header->nfvars);
+    llvm::SmallVector<const char*, 0> gvars(pointers->header->ngvars);
 
-    std::vector<std::pair<uint32_t, const char *>> clones;
+    llvm::SmallVector<std::pair<uint32_t, void*>, 0> clones;
 
     for (unsigned i = 0; i < pointers->header->nshards; i++) {
         auto shard = pointers->shards[i];
 
-        // .data base
-        char *data_base = (char *)shard.gvar_base;
-
-        // .text base
-        const char *text_base = shard.fvar_base;
-
-        const int32_t *offsets = shard.fvar_offsets;
-        uint32_t nfunc = offsets[0];
+        void **fvar_shard = shard.fvar_ptrs;
+        uintptr_t nfunc = *shard.fvar_count;
         assert(nfunc <= pointers->header->nfvars);
-        offsets++;
         const int32_t *reloc_slots = shard.clone_slots;
         const uint32_t nreloc = reloc_slots[0];
-        reloc_slots += 1;
+        reloc_slots++;
         const uint32_t *clone_idxs = shard.clone_idxs;
-        const int32_t *clone_offsets = shard.clone_offsets;
+        void **clone_ptrs = shard.clone_ptrs;
         uint32_t tag_len = clone_idxs[0];
-        clone_idxs += 1;
+        clone_idxs++;
 
         assert(tag_len & jl_sysimg_tag_mask);
-        std::vector<const int32_t*> base_offsets = {offsets};
+        llvm::SmallVector<void**, 0> base_ptrs(0);
+        base_ptrs.push_back(fvar_shard);
         // Find target
-        for (uint32_t i = 0;i < target_idx;i++) {
+        for (uint32_t i = 0; i < target_idx; i++) {
             uint32_t len = jl_sysimg_val_mask & tag_len;
             if (jl_sysimg_tag_mask & tag_len) {
-                if (i != 0)
-                    clone_offsets += nfunc;
                 clone_idxs += len + 1;
+                if (i != 0)
+                    clone_ptrs += nfunc;
             }
             else {
-                clone_offsets += len;
+                clone_ptrs += len;
                 clone_idxs += len + 2;
             }
             tag_len = clone_idxs[-1];
-            base_offsets.push_back(tag_len & jl_sysimg_tag_mask ? clone_offsets : nullptr);
+            base_ptrs.push_back(tag_len & jl_sysimg_tag_mask ? clone_ptrs : nullptr);
         }
 
         bool clone_all = (tag_len & jl_sysimg_tag_mask) != 0;
@@ -691,22 +705,22 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
         if (clone_all) {
             // clone_all
             if (target_idx != 0) {
-                offsets = clone_offsets;
+                fvar_shard = clone_ptrs;
             }
         }
         else {
             uint32_t base_idx = clone_idxs[0];
             assert(base_idx < target_idx);
             if (target_idx != 0) {
-                offsets = base_offsets[base_idx];
-                assert(offsets);
+                fvar_shard = base_ptrs[base_idx];
+                assert(fvar_shard);
             }
             clone_idxs++;
             unsigned start = clones.size();
             clones.resize(start + tag_len);
             auto idxs = shard.fvar_idxs;
             for (unsigned i = 0; i < tag_len; i++) {
-                clones[start + i] = {(clone_idxs[i] & ~jl_sysimg_val_mask) | idxs[clone_idxs[i] & jl_sysimg_val_mask], clone_offsets[i] + text_base};
+                clones[start + i] = {(clone_idxs[i] & ~jl_sysimg_val_mask) | idxs[clone_idxs[i] & jl_sysimg_val_mask], clone_ptrs[i]};
             }
         }
         // Do relocation
@@ -714,13 +728,13 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
         uint32_t len = jl_sysimg_val_mask & tag_len;
         for (uint32_t i = 0; i < len; i++) {
             uint32_t idx = clone_idxs[i];
-            int32_t offset;
+            void *fptr;
             if (clone_all) {
-                offset = offsets[idx];
+                fptr = fvar_shard[idx];
             }
             else if (idx & jl_sysimg_tag_mask) {
                 idx = idx & jl_sysimg_val_mask;
-                offset = clone_offsets[i];
+                fptr = clone_ptrs[i];
             }
             else {
                 continue;
@@ -730,9 +744,10 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
                 auto reloc_idx = ((const uint32_t*)reloc_slots)[reloc_i * 2];
                 if (reloc_idx == idx) {
                     found = true;
+                    const char *data_base = (const char*)shard.clone_slots;
                     auto slot = (const void**)(data_base + reloc_slots[reloc_i * 2 + 1]);
                     assert(slot);
-                    *slot = offset + text_base;
+                    *slot = fptr;
                 }
                 else if (reloc_idx > idx) {
                     break;
@@ -744,34 +759,35 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
 
         auto fidxs = shard.fvar_idxs;
         for (uint32_t i = 0; i < nfunc; i++) {
-            fvars[fidxs[i]] = text_base + offsets[i];
+            fvars[fidxs[i]] = fvar_shard[i];
         }
 
+        // .data base
         auto gidxs = shard.gvar_idxs;
         unsigned ngvars = shard.gvar_offsets[0];
         assert(ngvars <= pointers->header->ngvars);
+        char *data_base = (char*)shard.gvar_offsets;
         for (uint32_t i = 0; i < ngvars; i++) {
             gvars[gidxs[i]] = data_base + shard.gvar_offsets[i+1];
         }
     }
 
     if (!fvars.empty()) {
-        auto offsets = (int32_t *) malloc(sizeof(int32_t) * fvars.size());
-        res.fptrs.base = fvars[0];
+        auto ptrs = (void**) malloc(sizeof(void*) * fvars.size());
         for (size_t i = 0; i < fvars.size(); i++) {
             assert(fvars[i] && "Missing function pointer!");
-            offsets[i] = fvars[i] - res.fptrs.base;
+            ptrs[i] = fvars[i];
         }
-        res.fptrs.offsets = offsets;
-        res.fptrs.noffsets = fvars.size();
+        res.fptrs.ptrs = ptrs;
+        res.fptrs.nptrs = fvars.size();
     }
 
     if (!gvars.empty()) {
-        auto offsets = (int32_t *) malloc(sizeof(int32_t) * gvars.size());
-        res.gvars_base = (uintptr_t *)gvars[0];
+        auto offsets = (int32_t*)malloc(sizeof(int32_t) * gvars.size());
+        res.gvars_base = (const char*)pointers->header;
         for (size_t i = 0; i < gvars.size(); i++) {
             assert(gvars[i] && "Missing global variable pointer!");
-            offsets[i] = gvars[i] - (const char *)res.gvars_base;
+            offsets[i] = gvars[i] - res.gvars_base;
         }
         res.gvars_offsets = offsets;
         res.ngvars = gvars.size();
@@ -779,29 +795,22 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
 
     if (!clones.empty()) {
         assert(!fvars.empty());
-        std::sort(clones.begin(), clones.end());
-        auto clone_offsets = (int32_t *) malloc(sizeof(int32_t) * clones.size());
+        std::sort(clones.begin(), clones.end(),
+            [](const std::pair<uint32_t, const void*> &a, const std::pair<uint32_t, const void*> &b) {
+                return (a.first & jl_sysimg_val_mask) < (b.first & jl_sysimg_val_mask);
+        });
+        auto clone_ptrs = (void**) malloc(sizeof(void*) * clones.size());
         auto clone_idxs = (uint32_t *) malloc(sizeof(uint32_t) * clones.size());
         for (size_t i = 0; i < clones.size(); i++) {
             clone_idxs[i] = clones[i].first;
-            clone_offsets[i] = clones[i].second - res.fptrs.base;
+            clone_ptrs[i] = clones[i].second;
         }
         res.fptrs.clone_idxs = clone_idxs;
-        res.fptrs.clone_offsets = clone_offsets;
+        res.fptrs.clone_ptrs = clone_ptrs;
         res.fptrs.nclones = clones.size();
     }
 
-#ifdef _OS_WINDOWS_
-    res.base = (intptr_t)hdl;
-#else
-    Dl_info dlinfo;
-    if (dladdr((void*)pointers, &dlinfo) != 0) {
-        res.base = (intptr_t)dlinfo.dli_fbase;
-    }
-    else {
-        res.base = 0;
-    }
-#endif
+    res.base = image.base;
 
     {
         void *pgcstack_func_slot = pointers->ptls->pgcstack_func_slot;
@@ -812,7 +821,7 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
         *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
     }
 
-    res.small_typeof = pointers->small_typeof;
+    res.jl_small_typeof = pointers->jl_small_typeof;
 
     return res;
 }
@@ -848,24 +857,27 @@ static inline void check_cmdline(T &&cmdline, bool imaging)
 }
 
 struct SysimgMatch {
-    uint32_t best_idx{(uint32_t)-1};
+    uint32_t best_idx{UINT32_MAX};
     int vreg_size{0};
 };
 
 // Find the best match in the sysimg.
 // Select the best one based on the largest vector register and largest compatible feature set.
 template<typename S, typename T, typename F>
-static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size)
+static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size, jl_value_t **rejection_reason)
 {
     SysimgMatch match;
     bool match_name = false;
     int feature_size = 0;
+    llvm::SmallVector<const char *, 0> rejection_reasons;
+    rejection_reasons.reserve(sysimg.size());
     for (uint32_t i = 0; i < sysimg.size(); i++) {
         auto &imgt = sysimg[i];
         if (!(imgt.en.features & target.dis.features).empty()) {
             // Check sysimg enabled features against runtime disabled features
             // This is valid (and all what we can do)
             // even if one or both of the targets are unknown.
+            rejection_reasons.push_back("Rejecting this target due to use of runtime-disabled features\n");
             continue;
         }
         if (imgt.name == target.name) {
@@ -876,25 +888,44 @@ static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_v
             }
         }
         else if (match_name) {
+            rejection_reasons.push_back("Rejecting this target since another target has a cpu name match\n");
             continue;
         }
         int new_vsz = max_vector_size(imgt.en.features);
-        if (match.vreg_size > new_vsz)
+        if (match.vreg_size > new_vsz) {
+            rejection_reasons.push_back("Rejecting this target since another target has a larger vector register size\n");
             continue;
+        }
         int new_feature_size = imgt.en.features.nbits();
         if (match.vreg_size < new_vsz) {
             match.best_idx = i;
             match.vreg_size = new_vsz;
             feature_size = new_feature_size;
+            rejection_reasons.push_back("Updating best match to this target due to larger vector register size\n");
             continue;
         }
-        if (new_feature_size < feature_size)
+        if (new_feature_size < feature_size) {
+            rejection_reasons.push_back("Rejecting this target since another target has a larger feature set\n");
             continue;
+        }
         match.best_idx = i;
         feature_size = new_feature_size;
+        rejection_reasons.push_back("Updating best match to this target\n");
+    }
+    if (match.best_idx == UINT32_MAX) {
+        // Construct a nice error message for debugging purposes
+        std::string error_msg = "Unable to find compatible target in cached code image.\n";
+        for (size_t i = 0; i < rejection_reasons.size(); i++) {
+            error_msg += "Target ";
+            error_msg += std::to_string(i);
+            error_msg += " (";
+            error_msg += sysimg[i].name;
+            error_msg += "): ";
+            error_msg += rejection_reasons[i];
+        }
+        if (rejection_reason)
+            *rejection_reason = jl_pchar_to_string(error_msg.data(), error_msg.size());
     }
-    if (match.best_idx == (uint32_t)-1)
-        jl_error("Unable to find compatible target in system image.");
     return match;
 }
 
@@ -933,6 +964,34 @@ static inline void dump_cpu_spec(uint32_t cpu, const FeatureList<n> &features,
 
 }
 
+static std::string jl_get_cpu_name_llvm(void)
+{
+    return llvm::sys::getHostCPUName().str();
+}
+
+static std::string jl_get_cpu_features_llvm(void)
+{
+#if JL_LLVM_VERSION >= 190000
+    auto HostFeatures = llvm::sys::getHostCPUFeatures();
+#else
+    llvm::StringMap<bool> HostFeatures;
+    llvm::sys::getHostCPUFeatures(HostFeatures);
+#endif
+    std::string attr;
+    for (auto &ele: HostFeatures) {
+        if (ele.getValue()) {
+            if (!attr.empty()) {
+                attr.append(",+");
+            }
+            else {
+                attr.append("+");
+            }
+            attr.append(ele.getKey().str());
+        }
+    }
+    return attr;
+}
+
 #if defined(_CPU_X86_) || defined(_CPU_X86_64_)
 
 #include "processor_x86.cpp"
@@ -946,3 +1005,57 @@ static inline void dump_cpu_spec(uint32_t cpu, const FeatureList<n> &features,
 #include "processor_fallback.cpp"
 
 #endif
+
+// Global variable to store the CPU target string used for the sysimage
+static std::string sysimage_cpu_target;
+
+JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
+{
+    return jl_cstr_to_string(host_cpu_name().c_str());
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
+{
+    return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
+}
+
+extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets() {
+    auto specs = jl_get_llvm_clone_targets(jl_options.cpu_target);
+    const uint32_t base_flags = 0;
+    llvm::SmallVector<uint8_t, 0> data;
+    auto push_i32 = [&] (uint32_t v) {
+        uint8_t buff[4];
+        memcpy(buff, &v, 4);
+        data.insert(data.end(), buff, buff + 4);
+    };
+    push_i32(specs.size());
+    for (uint32_t i = 0; i < specs.size(); i++) {
+        push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
+        auto &specdata = specs[i].data;
+        data.insert(data.end(), specdata.begin(), specdata.end());
+    }
+
+    jl_value_t *arr = (jl_value_t*)jl_alloc_array_1d(jl_array_uint8_type, data.size());
+    uint8_t *out = jl_array_data(arr, uint8_t);
+    memcpy(out, data.data(), data.size());
+    return arr;
+}
+
+extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **fnames, size_t *nf) {
+    *fnames = feature_names;
+    *nf = nfeature_names;
+}
+
+extern "C" JL_DLLEXPORT jl_value_t *jl_get_sysimage_cpu_target(void) {
+    if (sysimage_cpu_target.empty()) {
+        return jl_cstr_to_string("native");
+    }
+    return jl_cstr_to_string(sysimage_cpu_target.c_str());
+}
+
+// Function to set the sysimage CPU target (called during initialization)
+void jl_set_sysimage_cpu_target(const char *cpu_target) {
+    if (cpu_target) {
+        sysimage_cpu_target = cpu_target;
+    }
+}
diff --git a/src/processor.h b/src/processor.h
index 2255cf4c10daa..091defadd4951 100644
--- a/src/processor.h
+++ b/src/processor.h
@@ -41,6 +41,8 @@ enum {
     JL_TARGET_CLONE_CPU = 1 << 8,
     // Clone when the function uses fp16
     JL_TARGET_CLONE_FLOAT16 = 1 << 9,
+    // Clone when the function uses bf16
+    JL_TARGET_CLONE_BFLOAT16 = 1 << 10,
 };
 
 #define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) JL_FEATURE_DEF(name, bit, llvmver)
@@ -62,34 +64,33 @@ JL_DLLEXPORT int jl_test_cpu_feature(jl_cpu_feature_t feature);
 static const uint32_t jl_sysimg_tag_mask = 0x80000000u;
 static const uint32_t jl_sysimg_val_mask = ~((uint32_t)0x80000000u);
 
+// A parsed image file
 typedef struct _jl_image_fptrs_t {
-    // base function pointer
-    const char *base;
     // number of functions
-    uint32_t noffsets;
-    // function pointer offsets
-    const int32_t *offsets;
+    uint32_t nptrs;
+    // function pointers
+    void **ptrs;
 
     // Following fields contains the information about the selected target.
     // All of these fields are 0 if the selected targets have all the functions cloned.
-    // Instead the offsets are stored in `noffsets` and `offsets`.
+    // Instead the offsets are stored in `nptrs` and `ptrs`.
 
     // number of cloned functions
     uint32_t nclones;
-    // function pointer offsets of cloned functions
-    const int32_t *clone_offsets;
+    // function pointer of cloned functions
+    void **clone_ptrs;
     // sorted indices of the cloned functions (including the tag bit)
     const uint32_t *clone_idxs;
 } jl_image_fptrs_t;
 
-typedef struct {
+struct _jl_image_t {
     uint64_t base;
-    uintptr_t *gvars_base;
+    const char *gvars_base;
     const int32_t *gvars_offsets;
     uint32_t ngvars;
     jl_image_fptrs_t fptrs;
-    void **small_typeof;
-} jl_image_t;
+    void **jl_small_typeof;
+};
 
 // The header for each image
 // Details important counts about the image
@@ -107,31 +108,25 @@ typedef struct {
 
 // Per-shard data for image shards. Each image contains header->nshards of these.
 typedef struct {
-
-    // This is the base function pointer
-    // (all other function pointers are stored as offsets to this address)
-    const char *fvar_base;
-    // The array of function pointer offsets (`int32_t`) from the base pointer.
+    // The array of function pointers (`void*`).
     // This includes all julia functions in sysimg as well as all other functions that are cloned.
     // The default function pointer is used if the function is cloned.
-    // The first element is the size of the array, which should **NOT** be used as the number
+    // The first element is the size of the array, which should **NOT** be used is the number
     // of julia functions in the sysimg.
     // Each entry in this array uniquely identifies a function we are interested in
     // (the function may have multiple function pointers corresponding to different versions).
-    // In other sysimg info, all references to functions are stored as their `uint32_t` index
-    // in this array.
-    const int32_t *fvar_offsets;
+    const uintptr_t *fvar_count;
+    void **fvar_ptrs;
     // This is the mapping of shard function index -> global function index
     // staticdata.c relies on the same order of functions in the global function array being
     // the same as what it saw when serializing the global function array. However, partitioning
     // into multiple shards will cause functions to be reordered. This array is used to map
     // back to the original function array for loading.
     const uint32_t *fvar_idxs;
-    // This is the base data pointer
-    // (all other data pointers in this shard are stored as offsets to this address)
-    uintptr_t *gvar_base;
     // This array of global variable offsets (`int32_t`) from the base pointer.
     // Similar to fvar_offsets, but for gvars
+    // This is also the base data pointer
+    // (all data pointers in this shard are stored as offsets to this address)
     const int32_t *gvar_offsets;
     // This is the mapping of shard global variable index -> global global variable index
     // Similar to fvar_idxs, but for gvars
@@ -159,14 +154,12 @@ typedef struct {
     //  this array as the original/base function offsets.
     //  For other targets, this variable contains an offset array with the length defined in
     //  `jl_dispatch_fvars_idxs`. Tagged indices need relocations.
-    const int32_t *clone_offsets;
+    void **clone_ptrs;
     //  Target-specific function indices.
     //  For each target, this includes a tagged `uint32_t` length, an optional `uint32_t` index
     //  of the base target followed by an array of tagged function indices.
     //  The base target index is required to be smaller than the index of the current target
     //  and must be the default (`0`) or a `clone_all` target.
-    //  If it's not `0`, the function pointer array for the `clone_all` target will be used as
-    //  the base function pointer offsets instead.
     //  The tag bits for both the length and the indices are the top bit.
     //  A tagged length indicates that all of the functions are cloned and the indices follows
     //  are the ones that requires relocation. The base target index is omitted in this case.
@@ -175,10 +168,8 @@ typedef struct {
     //  all other cloned functions that requires relocation.
     //  A tagged index means that the function pointer should be filled into the GOT slots
     //  identified by `jl_dispatch_reloc_slots`. There could be more than one slot per function.
-    //  (Note that a tagged index could corresponds to a functions pointer that's the same as
+    //  (Note that a tagged index could corresponds to a function's pointer that's the same as
     //  the base one since this is the only way we currently represent relocations.)
-    //  A tagged length implicitly tags all the indices and the indices will not have the tag bit
-    //  set. The lengths in this variable is needed to decode `jl_dispatch_fvars_offsets`.
     const uint32_t *clone_idxs;
 } jl_image_shard_t;
 
@@ -197,13 +188,15 @@ typedef struct {
     const jl_image_shard_t *shards; // points to header->nshards length array
     // The TLS data pointer
     const jl_image_ptls_t *ptls;
-    // A copy of small_typeof[]
-    void **small_typeof;
+    // A copy of jl_small_typeof[]
+    void **jl_small_typeof;
 
     //  serialized target data
     //  This contains the number of targets
     //  in addition to the name and feature set of each target.
     const void *target_data;
+    // Original CPU target string used to build this sysimage
+    const char *cpu_target_string;
 } jl_image_pointers_t;
 
 /**
@@ -216,22 +209,40 @@ typedef struct {
  *
  * Return the data about the function pointers selected.
  */
-jl_image_t jl_init_processor_sysimg(void *hdl);
-jl_image_t jl_init_processor_pkgimg(void *hdl);
+jl_image_t jl_init_processor_sysimg(jl_image_buf_t image, const char *cpu_target);
+jl_image_t jl_init_processor_pkgimg(jl_image_buf_t image);
+
+// Internal function to set the sysimage CPU target during initialization
+void jl_set_sysimage_cpu_target(const char *cpu_target);
 
 // Return the name of the host CPU as a julia string.
 JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
 // Return the features of the host CPU as a julia string.
 JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void);
+// Return the CPU target string used to build the current sysimage
+JL_DLLEXPORT jl_value_t *jl_get_sysimage_cpu_target(void);
 // Dump the name and feature set of the host CPU
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits);
+// Check if the CPU has native FMA instructions;
 // For debugging only
 JL_DLLEXPORT void jl_dump_host_cpu(void);
-JL_DLLEXPORT void jl_check_pkgimage_clones(char* data);
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data);
 
 JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero);
 JL_DLLEXPORT int32_t jl_get_zero_subnormals(void);
 JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault);
 JL_DLLEXPORT int32_t jl_get_default_nans(void);
+
+/**
+ * System image contents.
+ *
+ * These symbols are typically dummy values, unless statically linking
+ * libjulia-* and the sysimage together (see null_sysimage.c), in which
+ * case they allow accessing the local copy of the sysimage.
+ **/
+typedef void jl_image_unpack_func_t(void *handle, jl_image_buf_t *image);
+extern jl_image_unpack_func_t *jl_image_unpack;
+
 #ifdef __cplusplus
 }
 
@@ -240,14 +251,14 @@ JL_DLLEXPORT int32_t jl_get_default_nans(void);
 #include <vector>
 
 extern JL_DLLEXPORT bool jl_processor_print_help;
-
+// NOLINTBEGIN(clang-diagnostic-return-type-c-linkage)
 /**
  * Returns the CPU name and feature string to be used by LLVM JIT.
  *
  * If the detected/specified CPU name is not available on the LLVM version specified,
  * a fallback CPU name will be used. Unsupported features will be ignored.
  */
-extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags) JL_NOTSAFEPOINT;
+extern "C" JL_DLLEXPORT std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(const char *cpu_target, bool imaging, uint32_t &flags) JL_NOTSAFEPOINT;
 
 /**
  * Returns the CPU name and feature string to be used by LLVM disassembler.
@@ -262,7 +273,7 @@ struct jl_target_spec_t {
     // LLVM feature string
     std::string cpu_features;
     // serialized identification data
-    std::vector<uint8_t> data;
+    llvm::SmallVector<uint8_t, 0> data;
     // Clone condition.
     uint32_t flags;
     // Base target index.
@@ -271,9 +282,16 @@ struct jl_target_spec_t {
 /**
  * Return the list of targets to clone
  */
-extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void) JL_NOTSAFEPOINT;
-std::string jl_get_cpu_name_llvm(void) JL_NOTSAFEPOINT;
-std::string jl_get_cpu_features_llvm(void) JL_NOTSAFEPOINT;
+extern "C" JL_DLLEXPORT llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(const char *cpu_target) JL_NOTSAFEPOINT;
+// NOLINTEND(clang-diagnostic-return-type-c-linkage)
+struct FeatureName {
+    const char *name;
+    uint32_t bit; // bit index into a `uint32_t` array;
+    uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
+};
+
+extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets();
+extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **feature_names, size_t *nfeatures);
 #endif
 
 #endif
diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp
index 0a8090a8a6d9c..0fba135c0b17e 100644
--- a/src/processor_arm.cpp
+++ b/src/processor_arm.cpp
@@ -11,7 +11,7 @@
 
 // This nesting is required to allow compilation on musl
 #define USE_DYN_GETAUXVAL
-#if defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
+#if (defined(_OS_LINUX_) || defined(_OS_FREEBSD_)) && defined(_CPU_AARCH64_)
 #  undef USE_DYN_GETAUXVAL
 #  include <sys/auxv.h>
 #elif defined(__GLIBC_PREREQ)
@@ -164,7 +164,13 @@ enum class CPU : uint32_t {
     apple_a12,
     apple_a13,
     apple_a14,
+    apple_a15,
+    apple_a16,
+    apple_a17,
     apple_m1,
+    apple_m2,
+    apple_m3,
+    apple_m4,
     apple_s4,
     apple_s5,
 
@@ -203,7 +209,7 @@ static constexpr auto feature_masks = get_feature_masks(
 #undef JL_FEATURE_DEF
     -1);
 static const auto real_feature_masks =
-    feature_masks & FeatureList<feature_sz>{{(uint32_t)-1, (uint32_t)-1, 0}};
+    feature_masks & FeatureList<feature_sz>{{UINT32_MAX, UINT32_MAX, 0}};
 
 namespace Feature {
 enum : uint32_t {
@@ -349,7 +355,13 @@ constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
 constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
 constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
 constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
+constexpr auto apple_a15 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
+constexpr auto apple_a16 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
+constexpr auto apple_a17 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
 constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
+constexpr auto apple_m2 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
+constexpr auto apple_m3 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
+constexpr auto apple_m4 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
 // Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def
 // and sysctl -a hw.optional
 constexpr auto apple_s4 = apple_a12;
@@ -431,7 +443,13 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12},
     {"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13},
     {"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14},
+    {"apple-a15", CPU::apple_a15, CPU::apple_a14, 160000, Feature::apple_a15},
+    {"apple-a16", CPU::apple_a16, CPU::apple_a14, 160000, Feature::apple_a16},
+    {"apple-a17", CPU::apple_a17, CPU::apple_a16, 190000, Feature::apple_a17},
     {"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1},
+    {"apple-m2", CPU::apple_m2, CPU::apple_m1, 160000, Feature::apple_m2},
+    {"apple-m3", CPU::apple_m3, CPU::apple_m2, 180000, Feature::apple_m3},
+    {"apple-m4", CPU::apple_m4, CPU::apple_m3, 190000, Feature::apple_m4},
     {"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4},
     {"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5},
     {"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000,
@@ -461,7 +479,7 @@ static constexpr auto feature_masks = get_feature_masks(
 #undef JL_FEATURE_DEF
     -1);
 static const auto real_feature_masks =
-    feature_masks & FeatureList<feature_sz>{{(uint32_t)-1, (uint32_t)-1, 0}};
+    feature_masks & FeatureList<feature_sz>{{UINT32_MAX, UINT32_MAX, 0}};
 
 namespace Feature {
 enum : uint32_t {
@@ -699,16 +717,19 @@ static inline const char *find_cpu_name(uint32_t cpu)
 
 static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
 {
+    using namespace llvm;
     char buffer[128];
     size_t bufferlen = 128;
     sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0);
-
-    if(strcmp(buffer,"Apple M1") == 0)
-        return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
-    else if(strcmp(buffer,"Apple M1 Max") == 0)
-        return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
-    else if(strcmp(buffer,"Apple M1 Pro") == 0)
+    StringRef cpu_name(buffer);
+    if (cpu_name.find("M1") != StringRef ::npos)
         return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
+    else if (cpu_name.find("M2") != StringRef ::npos)
+        return std::make_pair((uint32_t)CPU::apple_m2, Feature::apple_m2);
+    else if (cpu_name.find("M3") != StringRef ::npos)
+        return std::make_pair((uint32_t)CPU::apple_m3, Feature::apple_m3);
+    else if (cpu_name.find("M4") != StringRef ::npos)
+        return std::make_pair((uint32_t)CPU::apple_m4, Feature::apple_m4);
     else
         return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
 }
@@ -724,7 +745,16 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
 #  define AT_HWCAP2 26
 #endif
 
-#if defined(USE_DYN_GETAUXVAL)
+#if defined(_OS_FREEBSD_)
+static inline unsigned long jl_getauxval(unsigned long type)
+{
+    unsigned long val;
+    if (elf_aux_info((int)type, &val, sizeof(val)) != 0) {
+        return 0;
+    }
+    return val;
+}
+#elif defined(USE_DYN_GETAUXVAL)
 static unsigned long getauxval_procfs(unsigned long type)
 {
     int fd = open("/proc/self/auxv", O_RDONLY);
@@ -749,7 +779,7 @@ static inline unsigned long jl_getauxval(unsigned long type)
     // First, try resolving getauxval in libc
     auto libc = jl_dlopen(nullptr, JL_RTLD_LOCAL);
     static unsigned long (*getauxval_p)(unsigned long) = NULL;
-    if (getauxval_p == NULL && jl_dlsym(libc, "getauxval", (void **)&getauxval_p, 0)) {
+    if (getauxval_p == NULL && jl_dlsym(libc, "getauxval", (void **)&getauxval_p, 0, 0)) {
         return getauxval_p(type);
     }
 
@@ -817,7 +847,7 @@ template<typename T, typename F>
 static inline bool try_read_procfs_line(llvm::StringRef line, const char *prefix, T &out,
                                         bool &flag, F &&reset)
 {
-    if (!line.startswith(prefix))
+    if (!line.starts_with(prefix))
         return false;
     if (flag)
         reset();
@@ -1020,7 +1050,10 @@ static CPU get_cpu_name(CPUID cpuid)
         default: return CPU::generic;
         }
     case 0x61: // 'a': Apple
-        // https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html
+        // Data here is partially based on these sources:
+        // https://github.com/apple-oss-distributions/xnu/blob/main/osfmk/arm/cpuid.h
+        // https://asahilinux.org/docs/hw/soc/soc-codenames/#socs
+        // https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64Processors.td
         switch (cpuid.part) {
         case 0x0: // Swift
             return CPU::apple_swift;
@@ -1045,15 +1078,57 @@ static CPU get_cpu_name(CPUID cpuid)
             return CPU::apple_a12;
         case 0xF: // Tempest M9
             return CPU::apple_s4;
-        case 0x12: // Lightning
-        case 0x13: // Thunder
+        case 0x12: // H12 Cebu p-Core "Lightning"
+        case 0x13: // H12 Cebu e-Core "Thunder"
             return CPU::apple_a13;
-        case 0x20: // Icestorm
-        case 0x21: // Firestorm
+        case 0x20: // H13 Sicily e-Core "Icestorm"
+        case 0x21: // H13 Sicily p-Core "Firestorm"
             return CPU::apple_a14;
-        case 0x22: // Icestorm m1
-        case 0x23: // Firestorm m1
+        case 0x22: // H13G Tonga e-Core "Icestorm" used in Apple M1
+        case 0x23: // H13G Tonga p-Core "Firestorm" used in Apple M1
+        case 0x24: // H13J Jade Chop e-Core "Icestorm" used in Apple M1 Pro
+        case 0x25: // H13J Jade Chop p-Core "Firestorm" used in Apple M1 Pro
+        case 0x28: // H13J Jade Die e-Core "Icestorm" used in Apple M1 Max / Ultra
+        case 0x29: // H13J Jade Die p-Core "Firestorm" used in Apple M1 Max / Ultra
             return CPU::apple_m1;
+        case 0x30: // H14 Ellis e-Core "Blizzard" used in Apple A15
+        case 0x31: // H14 Ellis p-Core "Avalanche" used in Apple A15
+            return CPU::apple_a15;
+        case 0x32: // H14G Staten e-Core "Blizzard" used in Apple M2
+        case 0x33: // H14G Staten p-Core "Avalanche" used in Apple M2
+        case 0x34: // H14S Rhodes Chop e-Core "Blizzard" used in Apple M2 Pro
+        case 0x35: // H14S Rhodes Chop p-Core "Avalanche" used in Apple M2 Pro
+        case 0x38: // H14C Rhodes Die e-Core "Blizzard" used in Apple M2 Max / Ultra
+        case 0x39: // H14C Rhodes Die p-Core "Avalanche" used in Apple M2 Max / Ultra
+            return CPU::apple_m2;
+        case 0x40: // H15 Crete e-Core "Sawtooth" used in Apple A16
+        case 0x41: // H15 Crete p-Core "Everest" used in Apple A16
+            return CPU::apple_a16;
+        case 0x42: // H15 Ibiza e-Core "Sawtooth" used in Apple M3
+        case 0x43: // H15 Ibiza p-Core "Everest" used in Apple M3
+        case 0x44: // H15 Lobos e-Core "Sawtooth" used in Apple M3 Pro
+        case 0x45: // H15 Lobos p-Core "Everest" used in Apple M3 Pro
+        case 0x49: // H15 Palma e-Core "Sawtooth" used in Apple M3 Max
+        case 0x48: // H15 Palma p-Core "Everest" used in Apple M3 Max
+            return CPU::apple_m3;
+        //case 0x46: // M11 e-Core "Sawtooth" used in Apple S9
+        //case 0x47:  does not exist
+            //return CPU::apple_s9;
+        case 0x50: // H15 Coll e-Core "Sawtooth" used in Apple A17 Pro
+        case 0x51: // H15 Coll p-Core "Everest" used in Apple A17 Pro
+            return CPU::apple_a17;
+        case 0x52: // H16G Donan e-Core used in Apple M4
+        case 0x53: // H16H Donan p-Core used in Apple M4
+        case 0x54: // H16S Brava S e-Core used in Apple M4 Pro
+        case 0x55: // H16S Brava S p-Core used in Apple M4 Pro
+        case 0x58: // H16C Brava C e-Core used in Apple M4 Max
+        case 0x59: // H16C Brava C p-Core used in Apple M4 Max
+            return CPU::apple_m4;
+        //case 0x60: // H17P Tahiti e-Core used in Apple A18 Pro
+        //case 0x61: // H17P Tahiti p-Core used in Apple A18 Pro
+        //case 0x6a: // H17A Tupai e-Core used in Apple A18
+        //case 0x6b: // H17A Tupai p-Core used in Apple A18
+            //return CPU::apple_a18;
         default: return CPU::generic;
         }
     case 0x68: // 'h': Huaxintong Semiconductor
@@ -1195,7 +1270,7 @@ static bool check_cpu_arch_ver(uint32_t cpu, arm_arch arch)
     return true;
 }
 
-static void shrink_big_little(std::vector<std::pair<uint32_t,CPUID>> &list,
+static void shrink_big_little(llvm::SmallVectorImpl<std::pair<uint32_t,CPUID>> &list,
                               const CPU *cpus, uint32_t ncpu)
 {
     auto find = [&] (uint32_t name) {
@@ -1260,7 +1335,7 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
 #endif
 
     std::set<uint32_t> cpus;
-    std::vector<std::pair<uint32_t,CPUID>> list;
+    llvm::SmallVector<std::pair<uint32_t,CPUID>, 0> list;
     // Ideally the feature detection above should be enough.
     // However depending on the kernel version not all features are available
     // and it's also impossible to detect the ISA version which contains
@@ -1481,7 +1556,7 @@ static inline void disable_depends(FeatureList<n> &features)
     ::disable_depends(features, Feature::deps, sizeof(Feature::deps) / sizeof(FeatureDep));
 }
 
-static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
+static const llvm::SmallVector<TargetData<feature_sz>, 0> &get_cmdline_targets(const char *cpu_target)
 {
     auto feature_cb = [] (const char *str, size_t len, FeatureList<feature_sz> &list) {
 #ifdef _CPU_AARCH64_
@@ -1493,12 +1568,12 @@ static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
         }
 #endif
         auto fbit = find_feature_bit(feature_names, nfeature_names, str, len);
-        if (fbit == (uint32_t)-1)
+        if (fbit == UINT32_MAX)
             return false;
         set_bit(list, fbit, true);
         return true;
     };
-    auto &targets = ::get_cmdline_targets<feature_sz>(feature_cb);
+    auto &targets = ::get_cmdline_targets<feature_sz>(cpu_target, feature_cb);
     for (auto &t: targets) {
         if (auto nname = normalize_cpu_name(t.name)) {
             t.name = nname;
@@ -1507,7 +1582,7 @@ static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
     return targets;
 }
 
-static std::vector<TargetData<feature_sz>> jit_targets;
+static llvm::SmallVector<TargetData<feature_sz>, 0> jit_targets;
 
 static TargetData<feature_sz> arg_target_data(const TargetData<feature_sz> &arg, bool require_host)
 {
@@ -1561,10 +1636,11 @@ static int max_vector_size(const FeatureList<feature_sz> &features)
 #endif
 }
 
-static uint32_t sysimg_init_cb(const void *id)
+static uint32_t sysimg_init_cb(void *ctx, const void *id, jl_value_t **rejection_reason)
 {
     // First see what target is requested for the JIT.
-    auto &cmdline = get_cmdline_targets();
+    const char *cpu_target = (const char *)ctx;
+    auto &cmdline = get_cmdline_targets(cpu_target);
     TargetData<feature_sz> target = arg_target_data(cmdline[0], true);
     // Then find the best match in the sysimg
     auto sysimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
@@ -1573,7 +1649,9 @@ static uint32_t sysimg_init_cb(const void *id)
             t.name = nname;
         }
     }
-    auto match = match_sysimg_targets(sysimg, target, max_vector_size);
+    auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
+    if (match.best_idx == UINT32_MAX)
+        return match.best_idx;
     // Now we've decided on which sysimg version to use.
     // Make sure the JIT target is compatible with it and save the JIT target.
     if (match.vreg_size != max_vector_size(target.en.features) &&
@@ -1586,7 +1664,7 @@ static uint32_t sysimg_init_cb(const void *id)
     return match.best_idx;
 }
 
-static uint32_t pkgimg_init_cb(const void *id)
+static uint32_t pkgimg_init_cb(void *ctx, const void *id, jl_value_t **rejection_reason JL_REQUIRE_ROOTED_SLOT)
 {
     TargetData<feature_sz> target = jit_targets.front();
     auto pkgimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
@@ -1595,14 +1673,13 @@ static uint32_t pkgimg_init_cb(const void *id)
             t.name = nname;
         }
     }
-    auto match = match_sysimg_targets(pkgimg, target, max_vector_size);
-
+    auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason);
     return match.best_idx;
 }
 
-static void ensure_jit_target(bool imaging)
+static void ensure_jit_target(const char *cpu_target, bool imaging)
 {
-    auto &cmdline = get_cmdline_targets();
+    auto &cmdline = get_cmdline_targets(cpu_target);
     check_cmdline(cmdline, imaging);
     if (!jit_targets.empty())
         return;
@@ -1647,7 +1724,7 @@ static void ensure_jit_target(bool imaging)
     }
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_noext(const TargetData<feature_sz> &data)
 {
     std::string name = data.name;
@@ -1670,7 +1747,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     if (name == "apple-a7")
         name = "cyclone";
 #endif
-    std::vector<std::string> feature_strs;
+    llvm::SmallVector<std::string, 0> feature_strs;
     for (auto &fename: feature_names) {
         if (fename.llvmver > JL_LLVM_VERSION)
             continue;
@@ -1738,7 +1815,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     return std::make_pair(std::move(name), std::move(feature_strs));
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_vec(const TargetData<feature_sz> &data)
 {
     auto res0 = get_llvm_target_noext(data);
@@ -1797,40 +1874,52 @@ JL_DLLEXPORT void jl_dump_host_cpu(void)
                   cpus, ncpu_names);
 }
 
-JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
 {
-    return jl_cstr_to_string(host_cpu_name().c_str());
-}
-
-JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
-{
-    return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
+#ifdef _CPU_AARCH64_
+    return jl_true;
+#else
+    TargetData<feature_sz> target = jit_targets.front();
+    FeatureList<feature_sz> features = target.en.features;
+    if (bits == 32 && test_nbit(features, Feature::vfp4sp))
+        return jl_true;
+    else if ((bits == 64 || bits == 32) && test_nbit(features, Feature::vfp4))
+        return jl_true;
+    else
+        return jl_false;
+#endif
 }
 
-jl_image_t jl_init_processor_sysimg(void *hdl)
+jl_image_t jl_init_processor_sysimg(jl_image_buf_t image, const char *cpu_target)
 {
     if (!jit_targets.empty())
         jl_error("JIT targets already initialized");
-    return parse_sysimg(hdl, sysimg_init_cb);
+    return parse_sysimg(image, sysimg_init_cb, (void *)cpu_target);
 }
 
-jl_image_t jl_init_processor_pkgimg(void *hdl)
+jl_image_t jl_init_processor_pkgimg(jl_image_buf_t image)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
     if (jit_targets.size() > 1)
         jl_error("Expected only one JIT target");
-    return parse_sysimg(hdl, pkgimg_init_cb);
+    return parse_sysimg(image, pkgimg_init_cb, NULL);
 }
 
-JL_DLLEXPORT void jl_check_pkgimage_clones(char *data)
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
 {
-    pkgimg_init_cb(data);
+    jl_value_t *rejection_reason = NULL;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t match_idx = pkgimg_init_cb(NULL, data, &rejection_reason);
+    JL_GC_POP();
+    if (match_idx == UINT32_MAX)
+        return rejection_reason;
+    return jl_nothing;
 }
 
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(const char *cpu_target, bool imaging, uint32_t &flags)
 {
-    ensure_jit_target(imaging);
+    ensure_jit_target(cpu_target, imaging);
     flags = jit_targets[0].en.flags;
     return get_llvm_target_vec(jit_targets[0]);
 }
@@ -1848,12 +1937,56 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
     return res;
 }
 
-std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+#ifndef __clang_gcanalyzer__
+llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(const char *cpu_target)
 {
-    if (jit_targets.empty())
-        jl_error("JIT targets not initialized");
-    std::vector<jl_target_spec_t> res;
-    for (auto &target: jit_targets) {
+
+    auto &cmdline = get_cmdline_targets(cpu_target);
+    check_cmdline(cmdline, true);
+    llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
+    for (auto &arg: cmdline) {
+        auto data = arg_target_data(arg, image_targets.empty());
+        image_targets.push_back(std::move(data));
+    }
+    auto ntargets = image_targets.size();
+    if (image_targets.empty())
+        jl_error("No targets specified");
+    llvm::SmallVector<jl_target_spec_t, 0> res;
+    // Now decide the clone condition.
+    for (size_t i = 1; i < ntargets; i++) {
+        auto &t = image_targets[i];
+        if (t.en.flags & JL_TARGET_CLONE_ALL)
+            continue;
+        auto &features0 = image_targets[t.base].en.features;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
+        static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
+        for (auto fe: clone_fp16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_FLOAT16;
+                break;
+            }
+        }
+        // The most useful one in general...
+        t.en.flags |= JL_TARGET_CLONE_LOOP;
+#ifdef _CPU_ARM_
+        static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
+        for (auto fe: clone_math) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_MATH;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_simd[] = {Feature::neon};
+        for (auto fe: clone_simd) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_SIMD;
+                break;
+            }
+        }
+#endif
+    }
+    for (auto &target: image_targets) {
         auto features_en = target.en.features;
         auto features_dis = target.dis.features;
         for (auto &fename: feature_names) {
@@ -1874,6 +2007,8 @@ std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
     return res;
 }
 
+#endif
+
 extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
 {
     if (feature >= 32 * feature_sz)
diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp
index d50edc8e9b621..c8c8feb072345 100644
--- a/src/processor_fallback.cpp
+++ b/src/processor_fallback.cpp
@@ -2,6 +2,9 @@
 
 // Fallback processor detection and dispatch
 
+static constexpr FeatureName *feature_names = nullptr;
+static constexpr uint32_t nfeature_names = 0;
+
 namespace Fallback {
 
 static inline const std::string &host_cpu_name()
@@ -10,15 +13,15 @@ static inline const std::string &host_cpu_name()
     return name;
 }
 
-static const std::vector<TargetData<1>> &get_cmdline_targets(void)
+static const llvm::SmallVector<TargetData<1>, 0> &get_cmdline_targets(const char *cpu_target)
 {
     auto feature_cb = [] (const char*, size_t, FeatureList<1>&) {
         return false;
     };
-    return ::get_cmdline_targets<1>(feature_cb);
+    return ::get_cmdline_targets<1>(cpu_target, feature_cb);
 }
 
-static std::vector<TargetData<1>> jit_targets;
+static llvm::SmallVector<TargetData<1>, 0> jit_targets;
 
 static TargetData<1> arg_target_data(const TargetData<1> &arg, bool require_host)
 {
@@ -33,10 +36,11 @@ static TargetData<1> arg_target_data(const TargetData<1> &arg, bool require_host
     return res;
 }
 
-static uint32_t sysimg_init_cb(const void *id)
+static uint32_t sysimg_init_cb(void *ctx, const void *id, jl_value_t **rejection_reason)
 {
     // First see what target is requested for the JIT.
-    auto &cmdline = get_cmdline_targets();
+    const char *cpu_target = (const char *)ctx;
+    auto &cmdline = get_cmdline_targets(cpu_target);
     TargetData<1> target = arg_target_data(cmdline[0], true);
     // Find the last name match or use the default one.
     uint32_t best_idx = 0;
@@ -51,7 +55,7 @@ static uint32_t sysimg_init_cb(const void *id)
     return best_idx;
 }
 
-static uint32_t pkgimg_init_cb(const void *id)
+static uint32_t pkgimg_init_cb(void *ctx, const void *id, jl_value_t **rejection_reason)
 {
     TargetData<1> target = jit_targets.front();
     // Find the last name match or use the default one.
@@ -67,9 +71,9 @@ static uint32_t pkgimg_init_cb(const void *id)
     return best_idx;
 }
 
-static void ensure_jit_target(bool imaging)
+static void ensure_jit_target(const char *cpu_target, bool imaging)
 {
-    auto &cmdline = get_cmdline_targets();
+    auto &cmdline = get_cmdline_targets(cpu_target);
     check_cmdline(cmdline, imaging);
     if (!jit_targets.empty())
         return;
@@ -85,13 +89,13 @@ static void ensure_jit_target(bool imaging)
     }
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_noext(const TargetData<1> &data)
 {
-    return std::make_pair(data.name, std::vector<std::string>{});
+    return std::make_pair(data.name, llvm::SmallVector<std::string, 0>{});
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_vec(const TargetData<1> &data)
 {
     auto res0 = get_llvm_target_noext(data);
@@ -112,25 +116,25 @@ get_llvm_target_str(const TargetData<1> &data)
 
 using namespace Fallback;
 
-jl_image_t jl_init_processor_sysimg(void *hdl)
+jl_image_t jl_init_processor_sysimg(jl_image_buf_t image, const char *cpu_target)
 {
     if (!jit_targets.empty())
         jl_error("JIT targets already initialized");
-    return parse_sysimg(hdl, sysimg_init_cb);
+    return parse_sysimg(image, sysimg_init_cb, (void *)cpu_target);
 }
 
-jl_image_t jl_init_processor_pkgimg(void *hdl)
+jl_image_t jl_init_processor_pkgimg(jl_image_buf_t image)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
     if (jit_targets.size() > 1)
         jl_error("Expected only one JIT target");
-    return parse_sysimg(hdl, pkgimg_init_cb);
+    return parse_sysimg(image, pkgimg_init_cb, NULL);
 }
 
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(const char *cpu_target, bool imaging, uint32_t &flags)
 {
-    ensure_jit_target(imaging);
+    ensure_jit_target(cpu_target, imaging);
     flags = jit_targets[0].en.flags;
     return get_llvm_target_vec(jit_targets[0]);
 }
@@ -141,13 +145,27 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
                 jl_get_cpu_features_llvm(), {{}, 0}, {{}, 0}, 0});
     return res;
 }
-
-extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+#ifndef __clang_gcanalyzer__
+llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(const char *cpu_target)
 {
-    if (jit_targets.empty())
-        jl_error("JIT targets not initialized");
-    std::vector<jl_target_spec_t> res;
-    for (auto &target: jit_targets) {
+
+    auto &cmdline = get_cmdline_targets(cpu_target);
+    check_cmdline(cmdline, true);
+    llvm::SmallVector<TargetData<1>, 0> image_targets;
+    for (auto &arg: cmdline) {
+        auto data = arg_target_data(arg, image_targets.empty());
+        image_targets.push_back(std::move(data));
+    }
+    auto ntargets = image_targets.size();
+    // Now decide the clone condition.
+    for (size_t i = 1; i < ntargets; i++) {
+        auto &t = image_targets[i];
+        t.en.flags |= JL_TARGET_CLONE_ALL;
+    }
+    if (image_targets.empty())
+        jl_error("No image targets found");
+    llvm::SmallVector<jl_target_spec_t, 0> res;
+    for (auto &target: image_targets) {
         jl_target_spec_t ele;
         std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
         ele.data = serialize_target_data(target.name, target.en.features,
@@ -158,15 +176,11 @@ extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
     }
     return res;
 }
+#endif
 
-JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
-{
-    return jl_cstr_to_string(host_cpu_name().c_str());
-}
-
-JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
 {
-    return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
+    return jl_false; // Match behaviour of have_fma in src/llvm-cpufeatures.cpp (assume false)
 }
 
 JL_DLLEXPORT void jl_dump_host_cpu(void)
@@ -175,9 +189,15 @@ JL_DLLEXPORT void jl_dump_host_cpu(void)
     jl_safe_printf("Features: %s\n", jl_get_cpu_features_llvm().c_str());
 }
 
-JL_DLLEXPORT void jl_check_pkgimage_clones(char *data)
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
 {
-    pkgimg_init_cb(data);
+    jl_value_t *rejection_reason = NULL;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t match_idx = pkgimg_init_cb(NULL, data, &rejection_reason);
+    JL_GC_POP();
+    if (match_idx == UINT32_MAX)
+        return rejection_reason;
+    return jl_nothing;
 }
 
 extern "C" int jl_test_cpu_feature(jl_cpu_feature_t)
diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp
index b9e7d8c0f0daf..bd624943083ae 100644
--- a/src/processor_x86.cpp
+++ b/src/processor_x86.cpp
@@ -4,6 +4,7 @@
 
 // CPUID
 
+#include "julia.h"
 extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
 {
     asm volatile (
@@ -94,9 +95,11 @@ enum class CPU : uint32_t {
     amd_znver1,
     amd_znver2,
     amd_znver3,
+    amd_znver4,
+    amd_znver5,
 };
 
-static constexpr size_t feature_sz = 11;
+static constexpr size_t feature_sz = 12;
 static constexpr FeatureName feature_names[] = {
 #define JL_FEATURE_DEF(name, bit, llvmver) {#name, bit, llvmver},
 #define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) {str, bit, llvmver},
@@ -139,11 +142,13 @@ static constexpr FeatureDep deps[] = {
     {vpclmulqdq, avx},
     {vpclmulqdq, pclmul},
     {avxvnni, avx2},
+    {avxvnniint8, avx2},
+    {avxvnniint16, avx2},
+    {avxifma, avx2},
+    {avxneconvert, avx2},
     {avx512f, avx2},
     {avx512dq, avx512f},
     {avx512ifma, avx512f},
-    {avx512pf, avx512f},
-    {avx512er, avx512f},
     {avx512cd, avx512f},
     {avx512bw, avx512f},
     {avx512bf16, avx512bw},
@@ -159,6 +164,8 @@ static constexpr FeatureDep deps[] = {
     {avx512fp16, avx512vl},
     {amx_int8, amx_tile},
     {amx_bf16, amx_tile},
+    {amx_fp16, amx_tile},
+    {amx_complex, amx_tile},
     {sse4a, sse3},
     {xop, fma4},
     {fma4, avx},
@@ -166,6 +173,9 @@ static constexpr FeatureDep deps[] = {
     {xsaveopt, xsave},
     {xsavec, xsave},
     {xsaves, xsave},
+    {sha512, avx2},
+    {sm3, avx},
+    {sm4, avx2},
 };
 
 // We require cx16 on 64bit by default. This can be overwritten with `-cx16`
@@ -181,7 +191,7 @@ constexpr auto tremont = goldmont_plus | get_feature_masks(clwb, gfni);
 constexpr auto knl = get_feature_masks(sse3, ssse3, sse41, sse42, cx16, sahf, popcnt,
                                        aes, pclmul, avx, xsave, xsaveopt, rdrnd, f16c, fsgsbase,
                                        avx2, bmi, bmi2, fma, lzcnt, movbe, adx, rdseed, prfchw,
-                                       avx512f, avx512er, avx512cd, avx512pf, prefetchwt1);
+                                       avx512f, avx512cd);
 constexpr auto knm = knl | get_feature_masks(avx512vpopcntdq);
 constexpr auto yonah = get_feature_masks(sse3);
 constexpr auto prescott = yonah;
@@ -234,6 +244,9 @@ constexpr auto znver1 = haswell | get_feature_masks(adx, aes, clflushopt, clzero
                                                     rdseed, sha, sse4a, xsavec);
 constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
 constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq);
+constexpr auto znver4 = znver3 | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi,
+                                                   avx512vbmi2, avx512vnni, avx512bitalg, avx512vpopcntdq, avx512bf16, gfni, shstk, xsaves);
+constexpr auto znver5 = znver4 | get_feature_masks(avxvnni, movdiri, movdir64b, avx512vp2intersect, prefetchi, avxvnni);
 
 }
 
@@ -295,6 +308,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"znver1", CPU::amd_znver1, CPU::generic, 0, Feature::znver1},
     {"znver2", CPU::amd_znver2, CPU::generic, 0, Feature::znver2},
     {"znver3", CPU::amd_znver3, CPU::amd_znver2, 120000, Feature::znver3},
+    {"znver4", CPU::amd_znver4, CPU::amd_znver3, 160000, Feature::znver4},
+    {"znver5", CPU::amd_znver5, CPU::amd_znver4, 190000, Feature::znver5},
 };
 static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);
 
@@ -562,10 +577,19 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
         if (model >= 0x30)
             return CPU::amd_znver2;
         return CPU::amd_znver1;
-    case 0x19:  // AMD Family 19h
-        if (model <= 0x0f || model == 0x21)
+    case 25:  // AMD Family 19h
+        if (model <= 0x0f || (model >= 0x20 && model <= 0x5f))
             return CPU::amd_znver3;  // 00h-0Fh, 21h: Zen3
+        if ((model >= 0x10 && model <= 0x1f) ||
+            (model >= 0x60 && model <= 0x74) ||
+            (model >= 0x78 && model <= 0x7b) ||
+            (model >= 0xA0 && model <= 0xAf)) {
+                return CPU::amd_znver4;
+            }
         return CPU::amd_znver3; // fallback
+    case 26:
+        // if (model <= 0x77)
+        return CPU::amd_znver5;
     }
 }
 
@@ -573,7 +597,7 @@ template<typename T>
 static inline void features_disable_avx512(T &features)
 {
     using namespace Feature;
-    unset_bits(features, avx512f, avx512dq, avx512ifma, avx512pf, avx512er, avx512cd,
+    unset_bits(features, avx512f, avx512dq, avx512ifma, avx512cd,
                avx512bw, avx512vl, avx512vbmi, avx512vpopcntdq, avx512vbmi2, avx512vnni,
                avx512bitalg, avx512vp2intersect, avx512bf16);
 }
@@ -651,11 +675,12 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
         int32_t info7[4];
         jl_cpuidex(info7, 7, 1);
         features[9] = info7[0];
+        features[10] = info7[1];
     }
     if (maxleaf >= 0x14) {
         int32_t info14[4];
         jl_cpuidex(info14, 0x14, 0);
-        features[10] = info14[1];
+        features[11] = info14[1];
     }
 
     // Fix up AVX bits to account for OS support and match LLVM model
@@ -696,7 +721,20 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
     else {
         cpu = uint32_t(CPU::generic);
     }
-
+    /* Feature bits to register map
+    feature[0] = ecx
+    feature[1] = edx
+    feature[2] = leaf 7 ebx
+    feature[3] = leaf 7 ecx
+    feature[4] = leaf 7 edx
+    feature[5] = leaf 0x80000001 ecx
+    feature[6] = leaf 0x80000001 edx
+    feature[7] = leaf 0xd subleaf 1 eax
+    feature[8] = leaf 0x80000008 ebx
+    feature[9] = leaf 7 ebx subleaf 1 eax
+    feature[10] = leaf 7 ebx subleaf 1 ebx
+    feature[11] = leaf 0x14 ebx
+    */
     return std::make_pair(cpu, features);
 }
 
@@ -771,16 +809,16 @@ static inline void disable_depends(FeatureList<n> &features)
     ::disable_depends(features, Feature::deps, sizeof(Feature::deps) / sizeof(FeatureDep));
 }
 
-static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
+static const llvm::SmallVector<TargetData<feature_sz>, 0> &get_cmdline_targets(const char *cpu_target)
 {
     auto feature_cb = [] (const char *str, size_t len, FeatureList<feature_sz> &list) {
         auto fbit = find_feature_bit(feature_names, nfeature_names, str, len);
-        if (fbit == (uint32_t)-1)
+        if (fbit == UINT32_MAX)
             return false;
         set_bit(list, fbit, true);
         return true;
     };
-    auto &targets = ::get_cmdline_targets<feature_sz>(feature_cb);
+    auto &targets = ::get_cmdline_targets<feature_sz>(cpu_target, feature_cb);
     for (auto &t: targets) {
         if (auto nname = normalize_cpu_name(t.name)) {
             t.name = nname;
@@ -789,7 +827,7 @@ static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
     return targets;
 }
 
-static std::vector<TargetData<feature_sz>> jit_targets;
+static llvm::SmallVector<TargetData<feature_sz>, 0> jit_targets;
 
 static TargetData<feature_sz> arg_target_data(const TargetData<feature_sz> &arg, bool require_host)
 {
@@ -840,10 +878,11 @@ static int max_vector_size(const FeatureList<feature_sz> &features)
     return 16;
 }
 
-static uint32_t sysimg_init_cb(const void *id)
+static uint32_t sysimg_init_cb(void *ctx, const void *id, jl_value_t** rejection_reason)
 {
     // First see what target is requested for the JIT.
-    auto &cmdline = get_cmdline_targets();
+    const char *cpu_target = (const char *)ctx;
+    auto &cmdline = get_cmdline_targets(cpu_target);
     TargetData<feature_sz> target = arg_target_data(cmdline[0], true);
     // Then find the best match in the sysimg
     auto sysimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
@@ -868,7 +907,9 @@ static uint32_t sysimg_init_cb(const void *id)
                  "virtualized environment.  Please read "
                  "https://docs.julialang.org/en/v1/devdocs/sysimg/ for more.");
     }
-    auto match = match_sysimg_targets(sysimg, target, max_vector_size);
+    auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
+    if (match.best_idx == UINT32_MAX)
+        return match.best_idx;
     // Now we've decided on which sysimg version to use.
     // Make sure the JIT target is compatible with it and save the JIT target.
     if (match.vreg_size != max_vector_size(target.en.features) &&
@@ -884,7 +925,7 @@ static uint32_t sysimg_init_cb(const void *id)
     return match.best_idx;
 }
 
-static uint32_t pkgimg_init_cb(const void *id)
+static uint32_t pkgimg_init_cb(void *ctx, const void *id, jl_value_t **rejection_reason)
 {
     TargetData<feature_sz> target = jit_targets.front();
     auto pkgimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
@@ -893,13 +934,15 @@ static uint32_t pkgimg_init_cb(const void *id)
             t.name = nname;
         }
     }
-    auto match = match_sysimg_targets(pkgimg, target, max_vector_size);
+    auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason);
     return match.best_idx;
 }
 
-static void ensure_jit_target(bool imaging)
+//This function serves as a fallback during bootstrapping, at that point we don't have a sysimage with native code
+// so we won't call sysimg_init_cb, else this function shouldn't do anything.
+static void ensure_jit_target(const char *cpu_target, bool imaging)
 {
-    auto &cmdline = get_cmdline_targets();
+    auto &cmdline = get_cmdline_targets(cpu_target);
     check_cmdline(cmdline, imaging);
     if (!jit_targets.empty())
         return;
@@ -933,7 +976,6 @@ static void ensure_jit_target(bool imaging)
                                                   Feature::vaes, Feature::vpclmulqdq,
                                                   Feature::sse4a, Feature::avx512f,
                                                   Feature::avx512dq, Feature::avx512ifma,
-                                                  Feature::avx512pf, Feature::avx512er,
                                                   Feature::avx512cd, Feature::avx512bw,
                                                   Feature::avx512vl, Feature::avx512vbmi,
                                                   Feature::avx512vpopcntdq, Feature::avxvnni,
@@ -959,10 +1001,17 @@ static void ensure_jit_target(bool imaging)
                 break;
             }
         }
+        static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
+        for (auto fe: clone_bf16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
+                break;
+            }
+        }
     }
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_noext(const TargetData<feature_sz> &data)
 {
     std::string name = data.name;
@@ -981,7 +1030,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
         name = "x86-64";
 #endif
     }
-    std::vector<std::string> features;
+    llvm::SmallVector<std::string, 0> features;
     for (auto &fename: feature_names) {
         if (fename.llvmver > JL_LLVM_VERSION)
             continue;
@@ -1005,7 +1054,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     return std::make_pair(std::move(name), std::move(features));
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_vec(const TargetData<feature_sz> &data)
 {
     auto res0 = get_llvm_target_noext(data);
@@ -1032,57 +1081,132 @@ JL_DLLEXPORT void jl_dump_host_cpu(void)
                   cpus, ncpu_names);
 }
 
-JL_DLLEXPORT void jl_check_pkgimage_clones(char *data)
-{
-    pkgimg_init_cb(data);
-}
-
-JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
 {
-    return jl_cstr_to_string(host_cpu_name().c_str());
+    jl_value_t *rejection_reason = NULL;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t match_idx = pkgimg_init_cb(NULL, data, &rejection_reason);
+    JL_GC_POP();
+    if (match_idx == UINT32_MAX)
+        return rejection_reason;
+    return jl_nothing;
 }
 
-JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
 {
-    return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
+    TargetData<feature_sz> target = jit_targets.front();
+    FeatureList<feature_sz> features = target.en.features;
+    if ((bits == 32 || bits == 64) && (test_nbit(features, Feature::fma) || test_nbit(features, Feature::fma4)))
+        return jl_true;
+    else
+        return jl_false;
 }
 
-jl_image_t jl_init_processor_sysimg(void *hdl)
+jl_image_t jl_init_processor_sysimg(jl_image_buf_t image, const char *cpu_target)
 {
     if (!jit_targets.empty())
         jl_error("JIT targets already initialized");
-    return parse_sysimg(hdl, sysimg_init_cb);
+    return parse_sysimg(image, sysimg_init_cb, (void *)cpu_target);
 }
 
-jl_image_t jl_init_processor_pkgimg(void *hdl)
+jl_image_t jl_init_processor_pkgimg(jl_image_buf_t image)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
     if (jit_targets.size() > 1)
         jl_error("Expected only one JIT target");
-    return parse_sysimg(hdl, pkgimg_init_cb);
+    return parse_sysimg(image, pkgimg_init_cb, NULL);
 }
 
-extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(const char *cpu_target, bool imaging, uint32_t &flags)
 {
-    ensure_jit_target(imaging);
+    ensure_jit_target(cpu_target, imaging);
     flags = jit_targets[0].en.flags;
     return get_llvm_target_vec(jit_targets[0]);
 }
 
-extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
+const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
 {
     static const auto res = get_llvm_target_str(TargetData<feature_sz>{"generic", "",
             {feature_masks, 0}, {{}, 0}, 0});
     return res;
 }
-
-extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+//This function parses the -C command line to figure out which targets to multiversion to.
+#ifndef __clang_gcanalyzer__
+llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(const char *cpu_target)
 {
-    if (jit_targets.empty())
-        jl_error("JIT targets not initialized");
-    std::vector<jl_target_spec_t> res;
-    for (auto &target: jit_targets) {
+
+    auto &cmdline = get_cmdline_targets(cpu_target);
+    check_cmdline(cmdline, true);
+    llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
+    for (auto &arg: cmdline) {
+        auto data = arg_target_data(arg, image_targets.empty());
+        image_targets.push_back(std::move(data));
+    }
+
+    auto ntargets = image_targets.size();
+    // Now decide the clone condition.
+    for (size_t i = 1; i < ntargets; i++) {
+        auto &t = image_targets[i];
+        if (t.en.flags & JL_TARGET_CLONE_ALL)
+            continue;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
+        // The most useful one in general...
+        t.en.flags |= JL_TARGET_CLONE_LOOP;
+        auto &features0 = image_targets[t.base].en.features;
+        // Special case for KNL/KNM since they're so different
+        if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
+            if ((t.name == "knl" || t.name == "knm") &&
+                image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
+                t.en.flags |= JL_TARGET_CLONE_ALL;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
+        static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
+                                                  Feature::sse41, Feature::sse42,
+                                                  Feature::avx, Feature::avx2,
+                                                  Feature::vaes, Feature::vpclmulqdq,
+                                                  Feature::sse4a, Feature::avx512f,
+                                                  Feature::avx512dq, Feature::avx512ifma,
+                                                  Feature::avx512cd, Feature::avx512bw,
+                                                  Feature::avx512vl, Feature::avx512vbmi,
+                                                  Feature::avx512vpopcntdq, Feature::avxvnni,
+                                                  Feature::avx512vbmi2, Feature::avx512vnni,
+                                                  Feature::avx512bitalg, Feature::avx512bf16,
+                                                  Feature::avx512vp2intersect, Feature::avx512fp16};
+        for (auto fe: clone_math) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_MATH;
+                break;
+            }
+        }
+        for (auto fe: clone_simd) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_SIMD;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
+        for (auto fe: clone_fp16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_FLOAT16;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
+        for (auto fe: clone_bf16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
+                break;
+            }
+        }
+    }
+    if (image_targets.empty())
+        jl_error("No targets specified");
+    llvm::SmallVector<jl_target_spec_t, 0> res;
+    for (auto &target: image_targets) {
         auto features_en = target.en.features;
         auto features_dis = target.dis.features;
         for (auto &fename: feature_names) {
@@ -1102,6 +1226,7 @@ extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(
     }
     return res;
 }
+#endif
 
 extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
 {
diff --git a/src/rtutils.c b/src/rtutils.c
index eefd1b25f9bc4..e73d0de6c69aa 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -5,6 +5,8 @@
 */
 #include "platform.h"
 
+#include <float.h>
+#include <math.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -119,6 +121,17 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error_rt(const char *fname, const char *co
     jl_throw(ex);
 }
 
+JL_DLLEXPORT void JL_NORETURN jl_type_error_global(const char *fname, jl_module_t *mod, jl_sym_t *sym,
+                                               jl_value_t *expected JL_MAYBE_UNROOTED,
+                                               jl_value_t *got JL_MAYBE_UNROOTED)
+{
+    jl_value_t *gr = jl_module_globalref(mod, sym);
+    JL_GC_PUSH2(&expected, &got);
+    jl_value_t *ex = jl_new_struct(jl_typeerror_type, jl_symbol(fname), gr, expected, got);
+    jl_throw(ex);
+}
+
+
 // with function name or description only
 JL_DLLEXPORT void JL_NORETURN jl_type_error(const char *fname,
                                             jl_value_t *expected JL_MAYBE_UNROOTED,
@@ -127,16 +140,43 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error(const char *fname,
     jl_type_error_rt(fname, "", expected, got);
 }
 
-JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var)
+JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var, jl_value_t *scope)
 {
-    if (!jl_undefvarerror_type)
-        jl_errorf("UndefVarError(%s)", jl_symbol_name(var));
-    jl_throw(jl_new_struct(jl_undefvarerror_type, var));
+    if (!jl_undefvarerror_type) {
+        const char *s1 = "";
+        const char *s2 = "";
+        if (scope) {
+            if (jl_is_symbol(scope)) {
+                s1 = ", :";
+                s2 = jl_symbol_name((jl_sym_t*)scope);
+            }
+            else if (jl_is_module(scope)) {
+                s1 = ", module ";
+                s2 = jl_symbol_name(((jl_module_t*)scope)->name);
+            }
+            else {
+                s1 = ", ";
+                s2 = "unknown scope";
+            }
+        }
+        jl_errorf("UndefVarError(%s%s%s)", jl_symbol_name(var), s1, s2);
+    }
+    jl_value_t *active_age = NULL;
+    JL_GC_PUSH2(&scope, &active_age);
+    active_age = jl_box_long(jl_current_task->world_age);
+    jl_throw(jl_new_struct(jl_undefvarerror_type, var, active_age, scope));
 }
 
-JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_sym_t *type_name, jl_sym_t *var)
+JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_datatype_t *t, jl_sym_t *var)
 {
-    jl_errorf("type %s has no field %s", jl_symbol_name(type_name), jl_symbol_name(var));
+    jl_throw(jl_new_struct(jl_fielderror_type, t, var));
+}
+
+JL_DLLEXPORT void JL_NORETURN jl_argument_error(char *str) // == jl_exceptionf(jl_argumenterror_type, "%s", str)
+{
+    jl_value_t *msg = jl_pchar_to_string((char*)str, strlen(str));
+    JL_GC_PUSH1(&msg);
+    jl_throw(jl_new_struct(jl_argumenterror_type, msg));
 }
 
 JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str) // == jl_exceptionf(jl_atomicerror_type, "%s", str)
@@ -201,14 +241,6 @@ JL_DLLEXPORT void JL_NORETURN jl_bounds_error_ints(jl_value_t *v JL_MAYBE_UNROOT
     jl_throw(jl_new_struct((jl_datatype_t*)jl_boundserror_type, v, t));
 }
 
-JL_DLLEXPORT void JL_NORETURN jl_eof_error(void)
-{
-    jl_datatype_t *eof_error =
-        (jl_datatype_t*)jl_get_global(jl_base_module, jl_symbol("EOFError"));
-    assert(eof_error != NULL);
-    jl_throw(jl_new_struct(eof_error));
-}
-
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t)
 {
     if (!jl_isa(x,t))
@@ -222,24 +254,23 @@ JL_DLLEXPORT void __stack_chk_fail(void)
 {
     /* put your panic function or similar in here */
     fprintf(stderr, "fatal error: stack corruption detected\n");
-    jl_gc_debug_critical_error();
+    jl_gc_debug_fprint_critical_error(ios_safe_stderr);
     abort(); // end with abort, since the compiler destroyed the stack upon entry to this function, there's no going back now
 }
 #endif
 
 // exceptions -----------------------------------------------------------------
 
-JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
+JL_DLLEXPORT void jl_enter_handler(jl_task_t *ct, jl_handler_t *eh)
 {
-    jl_task_t *ct = jl_current_task;
     // Must have no safepoint
     eh->prev = ct->eh;
     eh->gcstack = ct->gcstack;
+    eh->scope = ct->scope;
     eh->gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
     eh->locks_len = ct->ptls->locks.len;
     eh->defer_signal = ct->ptls->defer_signal;
     eh->world_age = ct->world_age;
-    ct->eh = eh;
 #ifdef ENABLE_TIMINGS
     eh->timing_stack = ct->ptls->timing_stack;
 #endif
@@ -250,9 +281,8 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
 // * We leave a try block through normal control flow
 // * An exception causes a nonlocal jump to the catch block. In this case
 //   there's additional cleanup required, eg pushing the exception stack.
-JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
+JL_DLLEXPORT void jl_eh_restore_state(jl_task_t *ct, jl_handler_t *eh)
 {
-    jl_task_t *ct = jl_current_task;
 #ifdef _OS_WINDOWS_
     if (ct->ptls->needs_resetstkoflw) {
         _resetstkoflw();
@@ -262,11 +292,12 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
     // `eh` may be not equal to `ct->eh`. See `jl_pop_handler`
     // This function should **NOT** have any safepoint before the ones at the
     // end.
-    sig_atomic_t old_defer_signal = ct->ptls->defer_signal;
-    int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    jl_ptls_t ptls = ct->ptls;
+    sig_atomic_t old_defer_signal = ptls->defer_signal;
     ct->eh = eh->prev;
     ct->gcstack = eh->gcstack;
-    small_arraylist_t *locks = &ct->ptls->locks;
+    ct->scope = eh->scope;
+    small_arraylist_t *locks = &ptls->locks;
     int unlocks = locks->len > eh->locks_len;
     if (unlocks) {
         for (size_t i = locks->len; i > eh->locks_len; i--)
@@ -274,43 +305,69 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
         locks->len = eh->locks_len;
     }
     ct->world_age = eh->world_age;
-    ct->ptls->defer_signal = eh->defer_signal;
-    if (old_gc_state != eh->gc_state) {
-        jl_atomic_store_release(&ct->ptls->gc_state, eh->gc_state);
-        if (old_gc_state) {
-            jl_gc_safepoint_(ct->ptls);
-        }
-    }
-    if (old_defer_signal && !eh->defer_signal) {
-        jl_sigint_safepoint(ct->ptls);
-    }
+    ptls->defer_signal = eh->defer_signal;
+    int8_t old_gc_state = jl_atomic_load_relaxed(&ptls->gc_state);
+    if (old_gc_state != eh->gc_state)
+        jl_atomic_store_release(&ptls->gc_state, eh->gc_state);
+    if (!old_gc_state || !eh->gc_state) // it was or is unsafe now
+        jl_gc_safepoint_(ptls);
+    jl_value_t *exception = ptls->sig_exception;
+    JL_GC_PROMISE_ROOTED(exception);
+    if (exception) {
+        int8_t oldstate = jl_gc_unsafe_enter(ptls);
+        /* The temporary ptls->bt_data is rooted by special purpose code in the
+        GC. This exists only for the purpose of preserving bt_data until we
+        set ptls->bt_size=0 below. */
+        jl_push_excstack(ct, &ct->excstack, exception,
+                         ptls->bt_data, ptls->bt_size);
+        ptls->bt_size = 0;
+        ptls->sig_exception = NULL;
+        jl_gc_unsafe_leave(ptls, oldstate);
+    }
+    if (old_defer_signal && !eh->defer_signal)
+        jl_sigint_safepoint(ptls);
     if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers) &&
             unlocks && eh->locks_len == 0) {
         jl_gc_run_pending_finalizers(ct);
     }
 }
 
-JL_DLLEXPORT void jl_pop_handler(int n)
+JL_DLLEXPORT void jl_eh_restore_state_noexcept(jl_task_t *ct, jl_handler_t *eh)
+{
+    assert(ct->gcstack == eh->gcstack && "Incorrect GC usage under try catch");
+    ct->scope = eh->scope;
+    ct->eh = eh->prev;
+    ct->ptls->defer_signal = eh->defer_signal; // optional, but certain try-finally (in stream.jl) may be slightly harder to write without this
+}
+
+JL_DLLEXPORT void jl_pop_handler(jl_task_t *ct, int n)
 {
-    jl_task_t *ct = jl_current_task;
     if (__unlikely(n <= 0))
         return;
     jl_handler_t *eh = ct->eh;
     while (--n > 0)
         eh = eh->prev;
-    jl_eh_restore_state(eh);
+    jl_eh_restore_state(ct, eh);
 }
 
-JL_DLLEXPORT size_t jl_excstack_state(void) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_pop_handler_noexcept(jl_task_t *ct, int n)
+{
+    if (__unlikely(n <= 0))
+        return;
+    jl_handler_t *eh = ct->eh;
+    while (--n > 0)
+        eh = eh->prev;
+    jl_eh_restore_state_noexcept(ct, eh);
+}
+
+JL_DLLEXPORT size_t jl_excstack_state(jl_task_t *ct) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
     jl_excstack_t *s = ct->excstack;
     return s ? s->top : 0;
 }
 
-JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_restore_excstack(jl_task_t *ct, size_t state) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
     jl_excstack_t *s = ct->excstack;
     if (s) {
         assert(s->top >= state);
@@ -325,27 +382,27 @@ static void jl_copy_excstack(jl_excstack_t *dest, jl_excstack_t *src) JL_NOTSAFE
     dest->top = src->top;
 }
 
-static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
+static void jl_reserve_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
                                 size_t reserved_size)
 {
     jl_excstack_t *s = *stack;
     if (s && s->reserved_size >= reserved_size)
         return;
     size_t bufsz = sizeof(jl_excstack_t) + sizeof(uintptr_t)*reserved_size;
-    jl_task_t *ct = jl_current_task;
     jl_excstack_t *new_s = (jl_excstack_t*)jl_gc_alloc_buf(ct->ptls, bufsz);
     new_s->top = 0;
     new_s->reserved_size = reserved_size;
     if (s)
         jl_copy_excstack(new_s, s);
     *stack = new_s;
+    jl_gc_wb(ct, new_s);
 }
 
-void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
+void jl_push_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
                       jl_value_t *exception JL_ROOTED_ARGUMENT,
                       jl_bt_element_t *bt_data, size_t bt_size)
 {
-    jl_reserve_excstack(stack, (*stack ? (*stack)->top : 0) + bt_size + 2);
+    jl_reserve_excstack(ct, stack, (*stack ? (*stack)->top : 0) + bt_size + 2);
     jl_excstack_t *s = *stack;
     jl_bt_element_t *rawstack = jl_excstack_raw(s);
     memcpy(rawstack + s->top, bt_data, sizeof(jl_bt_element_t)*bt_size);
@@ -364,7 +421,10 @@ JL_DLLEXPORT void *(jl_symbol_name)(jl_sym_t *s)
 // WARNING: THIS FUNCTION IS NEVER CALLED BUT INLINE BY CCALL
 JL_DLLEXPORT void *jl_array_ptr(jl_array_t *a)
 {
-    return a->data;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout;
+    if (layout->flags.arrayelem_isunion || layout->size == 0)
+        return (char*)a->ref.mem->ptr + (size_t)jl_array_data_(a);
+    return jl_array_data_(a);
 }
 JL_DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a)
 {
@@ -527,20 +587,12 @@ JL_DLLEXPORT void jl_flush_cstdio(void) JL_NOTSAFEPOINT
     fflush(stderr);
 }
 
-JL_DLLEXPORT jl_value_t *jl_stdout_obj(void) JL_NOTSAFEPOINT
-{
-    if (jl_base_module == NULL)
-        return NULL;
-    jl_binding_t *stdout_obj = jl_get_module_binding(jl_base_module, jl_symbol("stdout"), 0);
-    return stdout_obj ? jl_atomic_load_relaxed(&stdout_obj->value) : NULL;
-}
-
 JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT
 {
     if (jl_base_module == NULL)
         return NULL;
     jl_binding_t *stderr_obj = jl_get_module_binding(jl_base_module, jl_symbol("stderr"), 0);
-    return stderr_obj ? jl_atomic_load_relaxed(&stderr_obj->value) : NULL;
+    return stderr_obj ? jl_get_latest_binding_value_if_resolved_debug_only(stderr_obj) : NULL;
 }
 
 // toys for debugging ---------------------------------------------------------
@@ -572,7 +624,7 @@ static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const
 JL_DLLEXPORT int jl_id_start_char(uint32_t wc) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_id_char(uint32_t wc) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT int jl_is_identifier(char *str) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_is_identifier(const char *str) JL_NOTSAFEPOINT
 {
     size_t i = 0;
     uint32_t wc = u8_nextchar(str, &i);
@@ -632,22 +684,19 @@ JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROO
 
 static int is_globname_binding(jl_value_t *v, jl_datatype_t *dv) JL_NOTSAFEPOINT
 {
-    jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL;
+    jl_sym_t *globname = dv->name->singletonname;
     if (globname && dv->name->module) {
         jl_binding_t *b = jl_get_module_binding(dv->name->module, globname, 0);
-        if (b && jl_atomic_load_relaxed(&b->owner) && b->constp) {
-            jl_value_t *bv = jl_atomic_load_relaxed(&b->value);
-            // The `||` makes this function work for both function instances and function types.
-            if (bv == v || jl_typeof(bv) == v)
-                return 1;
-        }
+        jl_value_t *bv = jl_get_latest_binding_value_if_resolved_and_const_debug_only(b);
+        if (bv && ((jl_value_t*)dv == v ? jl_typeof(bv) == v : bv == v))
+            return 1;
     }
     return 0;
 }
 
 static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname_out) JL_NOTSAFEPOINT
 {
-    jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL;
+    jl_sym_t *globname = dv->name->singletonname;
     *globname_out = globname;
     if (globname && !strchr(jl_symbol_name(globname), '#') && !strchr(jl_symbol_name(globname), '@')) {
         return 1;
@@ -655,22 +704,97 @@ static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname
     return 0;
 }
 
-static size_t jl_static_show_x_sym_escaped(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
+static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap, int raw) JL_NOTSAFEPOINT
 {
     size_t n = 0;
-
-    char *sn = jl_symbol_name(name);
-    int hidden = 0;
-    if (!(jl_is_identifier(sn) || jl_is_operator(sn))) {
-        hidden = 1;
+    if (wrap)
+        n += jl_printf(out, "\"");
+    if (!raw && !u8_isvalid(str, len)) {
+        // alternate print algorithm that preserves data if it's not UTF-8
+        static const char hexdig[] = "0123456789abcdef";
+        for (size_t i = 0; i < len; i++) {
+            uint8_t c = str[i];
+            if (c == '\\' || c == '"' || c == '$')
+                n += jl_printf(out, "\\%c", c);
+            else if (c >= 32 && c < 0x7f)
+                n += jl_printf(out, "%c", c);
+            else
+                n += jl_printf(out, "\\x%c%c", hexdig[c>>4], hexdig[c&0xf]);
+        }
     }
-
-    if (hidden) {
-        n += jl_printf(out, "var\"");
+    else {
+        int special = 0;
+        for (size_t i = 0; i < len; i++) {
+            uint8_t c = str[i];
+            if (raw && ((c == '\\' && i == len-1) || c == '"')) {
+                special = 1;
+                break;
+            }
+            else if (!raw && (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$')) {
+                special = 1;
+                break;
+            }
+        }
+        if (!special) {
+            jl_uv_puts(out, str, len);
+            n += len;
+        }
+        else if (raw) {
+            // REF: Base.escape_raw_string
+            int escapes = 0;
+            for (size_t i = 0; i < len; i++) {
+                uint8_t c = str[i];
+                if (c == '\\') {
+                    escapes++;
+                }
+                else {
+                     if (c == '"')
+                         for (escapes++; escapes > 0; escapes--)
+                             n += jl_printf(out, "\\");
+                     escapes = 0;
+                }
+                n += jl_printf(out, "%c", str[i]);
+            }
+            for (; escapes > 0; escapes--)
+                n += jl_printf(out, "\\");
+        }
+        else {
+            char buf[512];
+            size_t i = 0;
+            while (i < len) {
+                size_t r = u8_escape(buf, sizeof(buf), str, &i, len, "\"$", 0);
+                jl_uv_puts(out, buf, r - 1);
+                n += r - 1;
+            }
+        }
     }
-    n += jl_printf(out, "%s", sn);
-    if (hidden) {
+    if (wrap)
         n += jl_printf(out, "\"");
+    return n;
+}
+
+static int jl_is_quoted_sym(const char *sn)
+{
+    static const char *const quoted_syms[] = {":", "::", ":=", "=", "==", "===", "=>", "`"};
+    for (int i = 0; i < sizeof quoted_syms / sizeof *quoted_syms; i++)
+        if (!strcmp(sn, quoted_syms[i]))
+            return 1;
+    return 0;
+}
+
+// TODO: in theory, we need a separate function for showing symbols in an
+// expression context (where `Symbol("foo\x01bar")` is ok) and a syntactic
+// context (where var"" must be used).
+static size_t jl_static_show_symbol(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
+{
+    size_t n = 0;
+    const char *sn = jl_symbol_name(name);
+    if (jl_is_identifier(sn) || (jl_is_operator(sn) && !jl_is_quoted_sym(sn))) {
+        n += jl_printf(out, "%s", sn);
+    }
+    else {
+        n += jl_printf(out, "var");
+        n += jl_static_show_string(out, sn, strlen(sn), 1, 1);
     }
     return n;
 }
@@ -699,6 +823,51 @@ static int jl_static_is_function_(jl_datatype_t *vt) JL_NOTSAFEPOINT {
     return 0;
 }
 
+static size_t jl_static_show_float(JL_STREAM *out, double v,
+                                   jl_datatype_t *vt) JL_NOTSAFEPOINT
+{
+    size_t n = 0;
+    // TODO: non-canonical NaNs do not round-trip
+    // TOOD: BFloat16
+    const char *size_suffix = vt == jl_float16_type ? "16" :
+                              vt == jl_float32_type ? "32" :
+                                                      "";
+    // Requires minimum 1 (sign) + 17 (sig) + 1 (dot) + 5 ("e-123") + 1 (null)
+    char buf[32];
+    // Base B significand digits required to print n base-b significand bits
+    // (including leading 1):  N = 2 + floor(n/log(b, B))
+    //   Float16   5
+    //   Float32   9
+    //   Float64  17
+    // REF: https://dl.acm.org/doi/pdf/10.1145/93542.93559
+    if (isnan(v)) {
+        n += jl_printf(out, "NaN%s", size_suffix);
+    }
+    else if (isinf(v)) {
+        n += jl_printf(out, "%sInf%s", v < 0 ? "-" : "", size_suffix);
+    }
+    else if (vt == jl_float64_type) {
+        n += jl_printf(out, "%#.17g", v);
+    }
+    else if (vt == jl_float32_type) {
+        size_t m = snprintf(buf, sizeof buf, "%.9g", v);
+        // If the exponent was printed, replace it with 'f'
+        char *p = (char *)memchr(buf, 'e', m);
+        if (p)
+            *p = 'f';
+        jl_uv_puts(out, buf, m);
+        n += m;
+        // If no exponent was printed, we must add one
+        if (!p)
+            n += jl_printf(out, "f0");
+    }
+    else {
+        assert(vt == jl_float16_type);
+        n += jl_printf(out, "Float16(%#.5g)", v);
+    }
+    return n;
+}
+
 // `v` might be pointing to a field inlined in a structure therefore
 // `jl_typeof(v)` may not be the same with `vt` and only `vt` should be
 // used to determine the type of the value.
@@ -736,6 +905,9 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     else if (v == (jl_value_t*)jl_methtable_type) {
         n += jl_printf(out, "Core.MethodTable");
     }
+    else if (v == (jl_value_t*)jl_methcache_type) {
+        n += jl_printf(out, "Core.MethodCache");
+    }
     else if (v == (jl_value_t*)jl_any_type) {
         n += jl_printf(out, "Any");
     }
@@ -756,7 +928,8 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         else {
             n += jl_static_show_x(out, (jl_value_t*)li->def.module, depth, ctx);
             n += jl_printf(out, ".<toplevel thunk> -> ");
-            n += jl_static_show_x(out, jl_atomic_load_relaxed(&li->uninferred), depth, ctx);
+            n += jl_static_show_x(out, jl_atomic_load_relaxed(&jl_cached_uninferred(
+                jl_atomic_load_relaxed(&li->cache), 1)->inferred), depth, ctx);
         }
     }
     else if (vt == jl_typename_type) {
@@ -788,11 +961,6 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         // Types are printed as a fully qualified name, with parameters, e.g.
         // `Base.Set{Int}`, and function types are printed as e.g. `typeof(Main.f)`
         jl_datatype_t *dv = (jl_datatype_t*)v;
-        jl_sym_t *globname;
-        int globfunc = is_globname_binding(v, dv) && is_globfunction(v, dv, &globname);
-        jl_sym_t *sym = globfunc ? globname : dv->name->name;
-        char *sn = jl_symbol_name(sym);
-        size_t quote = 0;
         if (dv->name == jl_tuple_typename) {
             if (dv == jl_tuple_type)
                 return jl_printf(out, "Tuple");
@@ -824,9 +992,25 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             }
             return n;
         }
+        if (jl_genericmemory_type && dv->name == jl_genericmemory_typename) {
+            jl_value_t *isatomic = jl_tparam0(dv);
+            jl_value_t *el_type = jl_tparam1(dv);
+            jl_value_t *addrspace = jl_tparam2(dv);
+            if (isatomic == (jl_value_t*)jl_not_atomic_sym && addrspace && jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0) {
+                n += jl_printf(out, "Memory{");
+                n += jl_static_show_x(out, el_type, depth, ctx);
+                n += jl_printf(out, "}");
+                return n;
+            }
+        }
         if (ctx.quiet) {
-            return jl_printf(out, "%s", jl_symbol_name(dv->name->name));
+            return jl_static_show_symbol(out, dv->name->name);
         }
+        jl_sym_t *globname;
+        int globfunc = is_globname_binding(v, dv) && is_globfunction(v, dv, &globname);
+        jl_sym_t *sym = globfunc ? globname : dv->name->name;
+        char *sn = jl_symbol_name(sym);
+        size_t quote = 0;
         if (globfunc) {
             n += jl_printf(out, "typeof(");
         }
@@ -839,7 +1023,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 quote = 1;
             }
         }
-        n += jl_static_show_x_sym_escaped(out, sym);
+        n += jl_static_show_symbol(out, sym);
         if (globfunc) {
             n += jl_printf(out, ")");
             if (quote) {
@@ -864,17 +1048,21 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         int f = *(uint32_t*)jl_data_ptr(v);
         n += jl_printf(out, "#<intrinsic #%d %s>", f, jl_intrinsic_name(f));
     }
+    else if (vt == jl_long_type) {
+        // Avoid unnecessary Int64(x)/Int32(x)
+        n += jl_printf(out, "%" PRIdPTR, *(intptr_t*)v);
+    }
     else if (vt == jl_int64_type) {
-        n += jl_printf(out, "%" PRId64, *(int64_t*)v);
+        n += jl_printf(out, "Int64(%" PRId64 ")", *(int64_t*)v);
     }
     else if (vt == jl_int32_type) {
-        n += jl_printf(out, "%" PRId32, *(int32_t*)v);
+        n += jl_printf(out, "Int32(%" PRId32 ")", *(int32_t*)v);
     }
     else if (vt == jl_int16_type) {
-        n += jl_printf(out, "%" PRId16, *(int16_t*)v);
+        n += jl_printf(out, "Int16(%" PRId16 ")", *(int16_t*)v);
     }
     else if (vt == jl_int8_type) {
-        n += jl_printf(out, "%" PRId8, *(int8_t*)v);
+        n += jl_printf(out, "Int8(%" PRId8 ")", *(int8_t*)v);
     }
     else if (vt == jl_uint64_type) {
         n += jl_printf(out, "0x%016" PRIx64, *(uint64_t*)v);
@@ -888,18 +1076,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     else if (vt == jl_uint8_type) {
         n += jl_printf(out, "0x%02" PRIx8, *(uint8_t*)v);
     }
-    else if (jl_pointer_type && jl_is_cpointer_type((jl_value_t*)vt)) {
-#ifdef _P64
-        n += jl_printf(out, "0x%016" PRIx64, *(uint64_t*)v);
-#else
-        n += jl_printf(out, "0x%08" PRIx32, *(uint32_t*)v);
-#endif
+    else if (vt == jl_float16_type) {
+        n += jl_static_show_float(out, julia_half_to_float(*(uint16_t *)v), vt);
     }
     else if (vt == jl_float32_type) {
-        n += jl_printf(out, "%gf", *(float*)v);
+        n += jl_static_show_float(out, *(float *)v, vt);
     }
     else if (vt == jl_float64_type) {
-        n += jl_printf(out, "%g", *(double*)v);
+        n += jl_static_show_float(out, *(double *)v, vt);
     }
     else if (vt == jl_bool_type) {
         n += jl_printf(out, "%s", *(uint8_t*)v ? "true" : "false");
@@ -907,10 +1091,11 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     else if (v == jl_nothing || (jl_nothing && (jl_value_t*)vt == jl_typeof(jl_nothing))) {
         n += jl_printf(out, "nothing");
     }
+    else if (v == (jl_value_t*)jl_method_table) {
+        n += jl_printf(out, "Core.methodtable");
+    }
     else if (vt == jl_string_type) {
-        n += jl_printf(out, "\"");
-        jl_uv_puts(out, jl_string_data(v), jl_string_len(v)); n += jl_string_len(v);
-        n += jl_printf(out, "\"");
+        n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1, 0);
     }
     else if (v == jl_bottom_type) {
         n += jl_printf(out, "Union{}");
@@ -959,7 +1144,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 n += jl_printf(out, ")");
             n += jl_printf(out, "<:");
         }
-        n += jl_static_show_x_sym_escaped(out, var->name);
+        n += jl_static_show_symbol(out, var->name);
         if (showbounds && (ub != (jl_value_t*)jl_any_type || lb != jl_bottom_type)) {
             // show type-var upper bound if it is defined, or if we showed the lower bound
             int ua = jl_is_unionall(ub);
@@ -977,18 +1162,11 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             n += jl_static_show_x(out, (jl_value_t*)m->parent, depth, ctx);
             n += jl_printf(out, ".");
         }
-        n += jl_printf(out, "%s", jl_symbol_name(m->name));
+        n += jl_static_show_symbol(out, m->name);
     }
     else if (vt == jl_symbol_type) {
-        char *sn = jl_symbol_name((jl_sym_t*)v);
-        int quoted = !jl_is_identifier(sn) && jl_operator_precedence(sn) == 0;
-        if (quoted)
-            n += jl_printf(out, "Symbol(\"");
-        else
-            n += jl_printf(out, ":");
-        n += jl_printf(out, "%s", sn);
-        if (quoted)
-            n += jl_printf(out, "\")");
+        n += jl_printf(out, ":");
+        n += jl_static_show_symbol(out, (jl_sym_t*)v);
     }
     else if (vt == jl_ssavalue_type) {
         n += jl_printf(out, "SSAValue(%" PRIuPTR ")",
@@ -996,8 +1174,12 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (vt == jl_globalref_type) {
         n += jl_static_show_x(out, (jl_value_t*)jl_globalref_mod(v), depth, ctx);
-        char *name = jl_symbol_name(jl_globalref_name(v));
-        n += jl_printf(out, jl_is_identifier(name) ? ".%s" : ".:(%s)", name);
+        jl_sym_t *name = jl_globalref_name(v);
+        n += jl_printf(out, ".");
+        if (jl_is_operator(jl_symbol_name(name)))
+            n += jl_printf(out, ":(%s)", jl_symbol_name(name));
+        else
+            n += jl_static_show_symbol(out, name);
     }
     else if (vt == jl_gotonode_type) {
         n += jl_printf(out, "goto %" PRIuPTR, jl_gotonode_label(v));
@@ -1030,42 +1212,70 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (vt == jl_expr_type) {
         jl_expr_t *e = (jl_expr_t*)v;
-        if (e->head == jl_assign_sym && jl_array_len(e->args) == 2) {
-            n += jl_static_show_x(out, jl_exprarg(e,0), depth, ctx);
+        if (e->head == jl_assign_sym && jl_array_nrows(e->args) == 2) {
+            n += jl_static_show_x(out, jl_exprarg(e, 0), depth, ctx);
             n += jl_printf(out, " = ");
-            n += jl_static_show_x(out, jl_exprarg(e,1), depth, ctx);
+            n += jl_static_show_x(out, jl_exprarg(e, 1), depth, ctx);
         }
         else {
-            char sep = ' ';
-            n += jl_printf(out, "Expr(:%s", jl_symbol_name(e->head));
-            size_t i, len = jl_array_len(e->args);
+            n += jl_printf(out, "Expr(");
+            n += jl_static_show_x(out, (jl_value_t*)e->head, depth, ctx);
+            size_t i, len = jl_array_nrows(e->args);
             for (i = 0; i < len; i++) {
-                n += jl_printf(out, ",%c", sep);
-                n += jl_static_show_x(out, jl_exprarg(e,i), depth, ctx);
+                n += jl_printf(out, ", ");
+                n += jl_static_show_x(out, jl_exprarg(e, i), depth, ctx);
             }
             n += jl_printf(out, ")");
         }
     }
     else if (jl_array_type && jl_is_array_type(vt)) {
         n += jl_printf(out, "Array{");
-        n += jl_static_show_x(out, (jl_value_t*)jl_tparam0(vt), depth, ctx);
-        n += jl_printf(out, ", (");
+        jl_value_t *el_type = jl_tparam0(vt);
+        n += jl_static_show_x(out, el_type, depth, ctx);
+        jl_array_t *av = (jl_array_t*)v;
         size_t i, ndims = jl_array_ndims(v);
+        n += jl_printf(out, ", %" PRIdPTR "}(dims=(", ndims);
         if (ndims == 1)
             n += jl_printf(out, "%" PRIdPTR ",", jl_array_dim0(v));
         else
             for (i = 0; i < ndims; i++)
                 n += jl_printf(out, (i > 0 ? ", %" PRIdPTR : "%" PRIdPTR), jl_array_dim(v, i));
-        n += jl_printf(out, ")}[");
-        size_t j, tlen = jl_array_len(v);
-        jl_array_t *av = (jl_array_t*)v;
-        jl_value_t *el_type = jl_tparam0(vt);
-        char *typetagdata = (!av->flags.ptrarray && jl_is_uniontype(el_type)) ? jl_array_typetagdata(av) : NULL;
+        n += jl_printf(out, "), mem=");
+        n += jl_static_show_x(out, (jl_value_t*)av->ref.mem, depth, ctx);
+        n += jl_printf(out, ")");
+    }
+    else if (jl_genericmemoryref_type && jl_is_genericmemoryref_type(vt)) {
+        jl_genericmemoryref_t *ref = (jl_genericmemoryref_t*)v;
+        n += jl_printf(out, "GenericMemoryRef(offset=");
+        size_t offset = (size_t)ref->ptr_or_offset;
+        if (ref->mem) {
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typeof(ref->mem))->layout;
+            if (layout->size != 0 && !layout->flags.arrayelem_isunion)
+                offset = ((char*)offset - (char*)ref->mem->ptr) / layout->size;
+        }
+        n += jl_printf(out, "%" PRIdPTR, offset);
+        n += jl_printf(out, ", ptr_or_offset=%p, mem=", ref->ptr_or_offset);
+        n += jl_static_show_x(out, (jl_value_t*)ref->mem, depth, ctx);
+    }
+    else if (jl_genericmemory_type && jl_is_genericmemory_type(vt)) {
+        jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+        //jl_value_t *isatomic = jl_tparam0(vt);
+        jl_value_t *el_type = jl_tparam1(vt);
+        jl_value_t *addrspace = jl_tparam2(vt);
+        n += jl_static_show_x(out, (jl_value_t*)vt, depth, ctx);
+        size_t j, tlen = m->length;
+        n += jl_printf(out, "(%" PRIdPTR ", %p)[", tlen, m->ptr);
+        if (!(addrspace && jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0)) {
+            n += jl_printf(out, "...]");
+            return n;
+        }
+        const char *typetagdata = NULL;
+        const jl_datatype_layout_t *layout = vt->layout;
         int nlsep = 0;
-        if (av->flags.ptrarray) {
+        if (layout->flags.arrayelem_isboxed) {
             // print arrays with newlines, unless the elements are probably small
             for (j = 0; j < tlen; j++) {
-                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
+                jl_value_t **ptr = ((jl_value_t**)m->ptr) + j;
                 jl_value_t *p = *ptr;
                 if (p != NULL && (uintptr_t)p >= 4096U) {
                     jl_value_t *p_ty = jl_typeof(p);
@@ -1078,21 +1288,35 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 }
             }
         }
-        if (nlsep && tlen > 1)
-            n += jl_printf(out, "\n  ");
-        for (j = 0; j < tlen; j++) {
-            if (av->flags.ptrarray) {
-                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
-                n += jl_static_show_x(out, *ptr, depth, ctx);
-            }
-            else {
-                char *ptr = ((char*)av->data) + j * av->elsize;
-                n += jl_static_show_x_(out, (jl_value_t*)ptr,
-                        typetagdata ? (jl_datatype_t*)jl_nth_union_component(el_type, typetagdata[j]) : (jl_datatype_t*)el_type,
-                        depth, ctx);
+        else if (layout->flags.arrayelem_isunion) {
+            typetagdata = jl_genericmemory_typetagdata(m);
+        }
+        if (layout->size == 0 && tlen >= 3) {
+            n += jl_static_show_x_(out, (jl_value_t*)m->ptr, (jl_datatype_t*)el_type, depth, ctx);
+            n += jl_printf(out, ", ...");
+        }
+        else {
+            if (nlsep && tlen > 1)
+                n += jl_printf(out, "\n  ");
+            for (size_t j = 0; j < tlen; j++) {
+                if (layout->flags.arrayelem_isboxed) {
+                    jl_value_t **ptr = ((jl_value_t**)m->ptr) + j;
+                    n += jl_static_show_x(out, *ptr, depth, ctx);
+                }
+                else {
+                    char *ptr = ((char*)m->ptr) + j * layout->size;
+                    if (layout->flags.arrayelem_islocked) {
+                        // Skip the lock at the beginning for locked arrays
+                        size_t lock_size = sizeof(jl_mutex_t);
+                        ptr += lock_size;
+                    }
+                    n += jl_static_show_x_(out, (jl_value_t*)ptr,
+                            (jl_datatype_t*)(typetagdata ? jl_nth_union_component(el_type, typetagdata[j]) : el_type),
+                            depth, ctx);
+                }
+                if (j != tlen - 1)
+                    n += jl_printf(out, nlsep ? ",\n  " : ", ");
             }
-            if (j != tlen - 1)
-                n += jl_printf(out, nlsep ? ",\n  " : ", ");
         }
         n += jl_printf(out, "]");
     }
@@ -1128,7 +1352,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             }
         }
 
-        n += jl_static_show_x_sym_escaped(out, sym);
+        n += jl_static_show_symbol(out, sym);
 
         if (globfunc) {
             if (quote) {
@@ -1164,8 +1388,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             jl_value_t *names = isnamedtuple ? jl_tparam0(vt) : (jl_value_t*)jl_field_names(vt);
             for (; i < tlen; i++) {
                 if (!istuple) {
-                    jl_value_t *fname = isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i);
-                    n += jl_printf(out, "%s=", jl_symbol_name((jl_sym_t*)fname));
+                    jl_sym_t *fname = (jl_sym_t*)(isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i));
+                    if (fname == NULL || !jl_is_symbol(fname))
+                        n += jl_static_show_x(out, (jl_value_t*)fname, depth, ctx);
+                    else if (jl_is_operator(jl_symbol_name(fname)))
+                        n += jl_printf(out, "(%s)", jl_symbol_name(fname));
+                    else
+                        n += jl_static_show_symbol(out, fname);
+                    n += jl_printf(out, "=");
                 }
                 size_t offs = jl_field_offset(vt, i);
                 char *fld_ptr = (char*)v + offs;
@@ -1298,9 +1528,8 @@ size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_c
         return n;
     }
     if ((jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) &&
-            ((jl_datatype_t*)ftype)->name->mt != jl_type_type_mt &&
-            ((jl_datatype_t*)ftype)->name->mt != jl_nonfunction_mt) {
-        n += jl_printf(s, "%s", jl_symbol_name(((jl_datatype_t*)ftype)->name->mt->name));
+            !jl_is_type_type(ftype) && !jl_is_type_type((jl_value_t*)((jl_datatype_t*)ftype)->super)) { // aka !iskind
+        n += jl_static_show_symbol(s, ((jl_datatype_t*)ftype)->name->singletonname);
     }
     else {
         n += jl_printf(s, "(::");
@@ -1349,6 +1578,23 @@ size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_c
     return n;
 }
 
+JL_DLLEXPORT size_t jl_safe_static_show(JL_STREAM *s, jl_value_t *v) JL_NOTSAFEPOINT
+{
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
+    jl_jmp_buf buf;
+    jl_set_safe_restore(&buf);
+    volatile size_t sz = 0;
+    if (!jl_setjmp(buf, 0)) {
+        sz += jl_static_show(s, (jl_value_t*)v);
+        sz += jl_printf(s, "\n");
+    }
+    else {
+        sz += jl_printf(s, "\n!!! ERROR in jl_ -- ABORTING !!!\n");
+    }
+    jl_set_safe_restore(old_buf);
+    return sz;
+}
+
 JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT
 {
     jl_jmp_buf *old_buf = jl_get_safe_restore();
@@ -1376,15 +1622,18 @@ JL_DLLEXPORT void jl_test_failure_breakpoint(jl_value_t *v)
 
 // logging tools --------------------------------------------------------------
 
+// DO NOT USE THIS FUNCTION FOR NEW CODE
+// The internal should not be doing anything that requires logging, which means most functions would trigger UB if calling this
 void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
             jl_value_t *file, jl_value_t *line, jl_value_t *kwargs,
             jl_value_t *msg)
 {
-    static jl_value_t *logmsg_func = NULL;
-    if (!logmsg_func && jl_base_module) {
-        jl_value_t *corelogging = jl_get_global(jl_base_module, jl_symbol("CoreLogging"));
+    jl_value_t *logmsg_func = NULL;
+    jl_task_t *ct = jl_current_task;
+    if (jl_base_module) {
+        jl_value_t *corelogging = jl_get_global_value(jl_base_module, jl_symbol("CoreLogging"), ct->world_age);
         if (corelogging && jl_is_module(corelogging)) {
-            logmsg_func = jl_get_global((jl_module_t*)corelogging, jl_symbol("logmsg_shim"));
+            logmsg_func = jl_get_global_value((jl_module_t*)corelogging, jl_symbol("logmsg_shim"), ct->world_age);
         }
     }
     if (!logmsg_func) {
@@ -1399,10 +1648,10 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
         }
         jl_printf(str, "\n@ ");
         if (jl_is_string(file)) {
-            jl_uv_puts(str, jl_string_data(file), jl_string_len(file));
+            jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0, 0);
         }
         else if (jl_is_symbol(file)) {
-            jl_printf(str, "%s", jl_symbol_name((jl_sym_t*)file));
+            jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0, 0);
         }
         jl_printf(str, ":");
         jl_static_show(str, line);
diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp
index 23793254c205d..898ab1a025420 100644
--- a/src/runtime_ccall.cpp
+++ b/src/runtime_ccall.cpp
@@ -1,10 +1,9 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "llvm-version.h"
-#include <map>
 #include <string>
 #include <llvm/ADT/StringMap.h>
-#include <llvm/Support/Host.h>
+#include <llvm/TargetParser/Host.h>
 #include <llvm/Support/raw_ostream.h>
 
 #include "julia.h"
@@ -22,9 +21,10 @@
 using namespace llvm;
 
 // --- library symbol lookup ---
+jl_value_t *jl_libdl_dlopen_func JL_GLOBALLY_ROOTED;
 
 // map from user-specified lib names to handles
-static std::map<std::string, void*> libMap;
+static StringMap<void*> libMap;
 static jl_mutex_t libmap_lock;
 extern "C"
 void *jl_get_library_(const char *f_lib, int throw_err)
@@ -58,64 +58,45 @@ void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *
     if (!handle)
         jl_atomic_store_release(hnd, (handle = jl_get_library(f_lib)));
     void * ptr;
-    jl_dlsym(handle, f_name, &ptr, 1);
+    jl_dlsym(handle, f_name, &ptr, 1, 1);
     return ptr;
 }
 
 // jl_load_and_lookup, but with library computed at run time on first call
 extern "C" JL_DLLEXPORT
-void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name)
+void *jl_lazy_load_and_lookup(jl_value_t *lib_val, jl_value_t *f_name)
 {
-    char *f_lib;
+    void *lib_ptr;
+    const char *fname_str;
 
-    if (jl_is_symbol(lib_val))
-        f_lib = jl_symbol_name((jl_sym_t*)lib_val);
-    else if (jl_is_string(lib_val))
-        f_lib = jl_string_data(lib_val);
+    if (jl_is_symbol(f_name))
+        fname_str = jl_symbol_name((jl_sym_t*)f_name);
+    else if (jl_is_string(f_name))
+        fname_str = jl_string_data(f_name);
     else
-        jl_type_error("ccall", (jl_value_t*)jl_symbol_type, lib_val);
+        jl_type_error("ccall function name", (jl_value_t*)jl_symbol_type, f_name);
+
+    if (lib_val) {
+        if (jl_is_symbol(lib_val))
+            lib_ptr = jl_get_library(jl_symbol_name((jl_sym_t*)lib_val));
+        else if (jl_is_string(lib_val))
+            lib_ptr = jl_get_library(jl_string_data(lib_val));
+        else if (jl_libdl_dlopen_func != NULL) {
+            lib_ptr = jl_unbox_voidpointer(jl_apply_generic(jl_libdl_dlopen_func, &lib_val, 1));
+        } else
+            jl_type_error("ccall", (jl_value_t*)jl_symbol_type, lib_val);
+    }
+    else {
+        // If the user didn't supply a library name, try to find it now from the runtime value of f_name
+        lib_ptr = jl_get_library(jl_dlfind(fname_str));
+    }
+
     void *ptr;
-    jl_dlsym(jl_get_library(f_lib), f_name, &ptr, 1);
+    jl_dlsym(lib_ptr, fname_str, &ptr, 1, 1);
     return ptr;
 }
 
 // miscellany
-std::string jl_get_cpu_name_llvm(void)
-{
-    return llvm::sys::getHostCPUName().str();
-}
-
-std::string jl_get_cpu_features_llvm(void)
-{
-    StringMap<bool> HostFeatures;
-    llvm::sys::getHostCPUFeatures(HostFeatures);
-    std::string attr;
-    for (auto &ele: HostFeatures) {
-        if (ele.getValue()) {
-            if (!attr.empty()) {
-                attr.append(",+");
-            }
-            else {
-                attr.append("+");
-            }
-            attr.append(ele.getKey().str());
-        }
-    }
-    // Explicitly disabled features need to be added at the end so that
-    // they are not re-enabled by other features that implies them by default.
-    for (auto &ele: HostFeatures) {
-        if (!ele.getValue()) {
-            if (!attr.empty()) {
-                attr.append(",-");
-            }
-            else {
-                attr.append("-");
-            }
-            attr.append(ele.getKey().str());
-        }
-    }
-    return attr;
-}
 
 extern "C" JL_DLLEXPORT
 jl_value_t *jl_get_JIT(void)
@@ -138,7 +119,7 @@ jl_value_t *jl_get_JIT(void)
 //           %L    The local hostname.
 //           %l    The local hostname, including the domain name.
 //           %u    The local username.
-std::string jl_format_filename(StringRef output_pattern)
+std::string jl_format_filename(StringRef output_pattern) JL_NOTSAFEPOINT
 {
     std::string buf;
     raw_string_ostream outfile(buf);
@@ -200,7 +181,7 @@ std::string jl_format_filename(StringRef output_pattern)
     return outfile.str();
 }
 
-extern "C" JL_DLLEXPORT char *jl_format_filename(const char *output_pattern)
+extern "C" JL_DLLEXPORT char *jl_format_filename(const char *output_pattern) JL_NOTSAFEPOINT
 {
     return strdup(jl_format_filename(StringRef(output_pattern)).c_str());
 }
@@ -327,7 +308,8 @@ jl_value_t *jl_get_cfunction_trampoline(
                 permanent = true;
         }
         if (permanent) {
-            result = jl_gc_permobj(sizeof(jl_taggedvalue_t) + jl_datatype_size(result_type), result_type);
+            jl_task_t *ct = jl_current_task;
+            result = jl_gc_permobj(ct->ptls, sizeof(jl_taggedvalue_t) + jl_datatype_size(result_type), result_type, 0);
             memset(result, 0, jl_datatype_size(result_type));
         }
         else {
@@ -351,13 +333,131 @@ jl_value_t *jl_get_cfunction_trampoline(
     uv_mutex_lock(&trampoline_lock);
     tramp = trampoline_alloc();
     ((void**)result)[0] = tramp;
-    tramp = init_trampoline(tramp, nval);
+    init_trampoline(tramp, nval);
     ptrhash_put(cache, (void*)fobj, result);
     uv_mutex_unlock(&trampoline_lock);
     return result;
 }
 JL_GCC_IGNORE_STOP
 
+struct cfuncdata_t {
+    _Atomic(void *) fptr;
+    _Atomic(size_t) last_world;
+    jl_code_instance_t** plast_codeinst;
+    jl_code_instance_t* last_codeinst;
+    void *unspecialized;
+    jl_value_t *const *const declrt;
+    jl_value_t *const *const sigt;
+    size_t flags;
+};
+
+extern "C" JL_DLLEXPORT
+void *jl_jit_abi_converter_fallback(jl_task_t *ct, void *unspecialized, jl_value_t *declrt, jl_value_t *sigt, size_t nargs, int specsig,
+                                    jl_code_instance_t *codeinst, jl_callptr_t invoke, void *target, int target_specsig)
+{
+    if (unspecialized)
+        return unspecialized;
+    jl_errorf("cfunction not available in this build of Julia");
+}
+
+static const inline char *name_from_method_instance(jl_method_instance_t *mi) JL_NOTSAFEPOINT
+{
+    assert(jl_is_method_instance(mi));
+    return jl_is_method(mi->def.method) ? jl_symbol_name(mi->def.method->name) : "top-level scope";
+}
+
+static jl_mutex_t cfun_lock;
+// release jl_world_counter
+// store theFptr
+// release last_world_v
+//
+// acquire last_world_v
+// read theFptr
+// acquire jl_world_counter
+extern "C" JL_DLLEXPORT
+void *jl_get_abi_converter(jl_task_t *ct, void *data)
+{
+    cfuncdata_t *cfuncdata = (cfuncdata_t*)data;
+    jl_value_t *sigt = *cfuncdata->sigt;
+    JL_GC_PROMISE_ROOTED(sigt);
+    jl_value_t *declrt = *cfuncdata->declrt;
+    JL_GC_PROMISE_ROOTED(declrt);
+    bool specsig = cfuncdata->flags & 1;
+    size_t nargs = jl_nparams(sigt);
+    jl_value_t *mi;
+    jl_code_instance_t *codeinst;
+    size_t world;
+    // check first, while behind this lock, of the validity of the current contents of this cfunc thunk
+    JL_LOCK(&cfun_lock);
+    do {
+        size_t last_world_v = jl_atomic_load_relaxed(&cfuncdata->last_world);
+        void *f = jl_atomic_load_relaxed(&cfuncdata->fptr);
+        jl_code_instance_t *last_ci = cfuncdata->plast_codeinst ? *cfuncdata->plast_codeinst : nullptr;
+        world = jl_atomic_load_acquire(&jl_world_counter);
+        ct->world_age = world;
+        if (world == last_world_v) {
+            JL_UNLOCK(&cfun_lock);
+            return f;
+        }
+        mi = jl_get_specialization1((jl_tupletype_t*)sigt, world, 0);
+        if (f != nullptr) {
+            if (last_ci == nullptr) {
+                if (mi == jl_nothing) {
+                    jl_atomic_store_release(&cfuncdata->last_world, world);
+                    JL_UNLOCK(&cfun_lock);
+                    return f;
+                }
+            }
+            else {
+                if ((jl_value_t*)jl_get_ci_mi(last_ci) == mi && jl_atomic_load_relaxed(&last_ci->max_world) >= world) { // same dispatch and source
+                    jl_atomic_store_release(&cfuncdata->last_world, world);
+                    JL_UNLOCK(&cfun_lock);
+                    return f;
+                }
+            }
+        }
+        JL_UNLOCK(&cfun_lock);
+        // next, try to figure out what the target should look like (outside of the lock since this is very slow)
+        codeinst = mi != jl_nothing ? jl_type_infer((jl_method_instance_t*)mi, world, SOURCE_MODE_ABI, jl_options.trim) : nullptr;
+        // relock for the remainder of the function
+        JL_LOCK(&cfun_lock);
+    } while (jl_atomic_load_acquire(&jl_world_counter) != world); // restart entirely, since jl_world_counter changed thus jl_get_specialization1 might have changed
+    // double-check if the values were set on another thread
+    size_t last_world_v = jl_atomic_load_relaxed(&cfuncdata->last_world);
+    void *f = jl_atomic_load_relaxed(&cfuncdata->fptr);
+    if (world == last_world_v) {
+        JL_UNLOCK(&cfun_lock);
+        return f; // another thread fixed this up while we were away
+    }
+    auto assign_fptr = [cfuncdata, world, codeinst](void *f) {
+        cfuncdata->plast_codeinst = &cfuncdata->last_codeinst;
+        cfuncdata->last_codeinst = codeinst;
+        jl_atomic_store_relaxed(&cfuncdata->fptr, f);
+        jl_atomic_store_release(&cfuncdata->last_world, world);
+        JL_UNLOCK(&cfun_lock);
+        return f;
+    };
+    bool is_opaque_closure = false;
+    jl_abi_t from_abi = { sigt, declrt, nargs, specsig, is_opaque_closure };
+    if (codeinst == nullptr) {
+        // Generate an adapter to a dynamic dispatch
+        if (cfuncdata->unspecialized == nullptr)
+            cfuncdata->unspecialized = jl_jit_abi_converter(ct, from_abi, nullptr);
+
+        return assign_fptr(cfuncdata->unspecialized);
+    }
+
+    jl_value_t *astrt = codeinst->rettype;
+    if (astrt != (jl_value_t*)jl_bottom_type &&
+        jl_type_intersection(astrt, declrt) == jl_bottom_type) {
+        // Do not warn if the function never returns since it is
+        // occasionally required by the C API (typically error callbacks)
+        // even though we're likely to encounter memory errors in that case
+        jl_printf(JL_STDERR, "WARNING: cfunction: return type of %s does not match\n", name_from_method_instance((jl_method_instance_t*)mi));
+    }
+    return assign_fptr(jl_jit_abi_converter(ct, from_abi, codeinst));
+}
+
 void jl_init_runtime_ccall(void)
 {
     JL_MUTEX_INIT(&libmap_lock, "libmap_lock");
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index ed320aa9a6c35..31dd3e085033c 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -5,16 +5,15 @@
 //
 // this file assumes a little-endian processor, although that isn't too hard to fix
 // it also assumes two's complement negative numbers, which might be a bit harder to fix
-//
-// TODO: add half-float support
 
 #include "APInt-C.h"
 #include "julia.h"
 #include "julia_internal.h"
+#include "llvm-version.h"
 
 const unsigned int host_char_bit = 8;
 
-// float16 intrinsics
+// float16 conversion helpers
 
 static inline float half_to_float(uint16_t ival) JL_NOTSAFEPOINT
 {
@@ -162,8 +161,11 @@ static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT
     uint32_t f;
     memcpy(&f, &param, sizeof(float));
     if (isnan(param)) {
-        uint32_t t = 0x8000 ^ (0x8000 & ((uint16_t)(f >> 0x10)));
-        return t ^ ((uint16_t)(f >> 0xd));
+        // Match the behaviour of arm64's fcvt or x86's vcvtps2ph by quieting
+        // all NaNs (avoids creating infinities), preserving the sign, and using
+        // the upper bits of the payload.
+        //      sign              exp      quiet    payload
+        return (f>>16 & 0x8000) | 0x7c00 | 0x0200 | (f>>13 & 0x03ff);
     }
     int i = ((f & ~0x007fffff) >> 23);
     uint8_t sh = shifttable[i];
@@ -185,56 +187,208 @@ static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT
     return h;
 }
 
-JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param)
+static inline uint16_t double_to_half(double param) JL_NOTSAFEPOINT
 {
+    float temp = (float)param;
+    uint32_t tempi;
+    memcpy(&tempi, &temp, sizeof(temp));
+
+    // if Float16(res) is subnormal
+    if ((tempi&0x7fffffffu) < 0x38800000u) {
+        // shift so that the mantissa lines up where it would for normal Float16
+        uint32_t shift = 113u-((tempi & 0x7f800000u)>>23u);
+        if (shift<23u) {
+            tempi |= 0x00800000; // set implicit bit
+            tempi >>= shift;
+        }
+    }
+
+    // if we are halfway between 2 Float16 values
+    if ((tempi & 0x1fffu) == 0x1000u) {
+        memcpy(&tempi, &temp, sizeof(temp));
+        // adjust the value by 1 ULP in the direction that will make Float16(temp) give the right answer
+        tempi += (fabs(temp) < fabs(param)) - (fabs(param) < fabs(temp));
+        memcpy(&temp, &tempi, sizeof(temp));
+    }
+
+    return float_to_half(temp);
+}
+
+// x86-specific helpers for emulating the (B)Float16 ABI
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+#include <xmmintrin.h>
+__attribute__((unused)) static inline __m128 return_in_xmm(uint16_t input) JL_NOTSAFEPOINT {
+    __m128 xmm_output;
+    asm (
+        "movd %[input], %%xmm0\n\t"
+        "movss %%xmm0, %[xmm_output]\n\t"
+        : [xmm_output] "=x" (xmm_output)
+        : [input] "r" ((uint32_t)input)
+        : "xmm0"
+    );
+    return xmm_output;
+}
+__attribute__((unused)) static inline uint16_t take_from_xmm(__m128 xmm_input) JL_NOTSAFEPOINT {
+    uint32_t output;
+    asm (
+        "movss %[xmm_input], %%xmm0\n\t"
+        "movd %%xmm0, %[output]\n\t"
+        : [output] "=r" (output)
+        : [xmm_input] "x" (xmm_input)
+        : "xmm0"
+    );
+    return (uint16_t)output;
+}
+#endif
+
+// float16 conversion API
+
+// for use in APInt and other soft-float ABIs (i.e. without the ABI shenanigans from below)
+JL_DLLEXPORT uint16_t julia_float_to_half(float param) {
+    return float_to_half(param);
+}
+JL_DLLEXPORT uint16_t julia_double_to_half(double param) {
+    return double_to_half(param);
+}
+JL_DLLEXPORT float julia_half_to_float(uint16_t param) {
     return half_to_float(param);
 }
 
-JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param)
+// starting with GCC 12 and Clang 15, we have _Float16 on most platforms
+// (but not on Windows; this may be a bug in the MSYS2 GCC compilers)
+#if ((defined(__GNUC__) && __GNUC__ > 11) || \
+     (defined(__clang__) && __clang_major__ > 14)) && \
+    !defined(_CPU_PPC64_) && !defined(_CPU_PPC_) && \
+    !defined(_OS_WINDOWS_) && !defined(_CPU_RISCV64_)
+    #define FLOAT16_TYPE _Float16
+    #define FLOAT16_TO_UINT16(x) (*(uint16_t*)&(x))
+    #define FLOAT16_FROM_UINT16(x) (*(_Float16*)&(x))
+// on older compilers, we need to emulate the platform-specific ABI
+#elif defined(_CPU_X86_) || (defined(_CPU_X86_64_) && !defined(_OS_WINDOWS_))
+    // on x86, we can use __m128; except on Windows where x64 calling
+    // conventions expect to pass __m128 by reference.
+    #define FLOAT16_TYPE __m128
+    #define FLOAT16_TO_UINT16(x) take_from_xmm(x)
+    #define FLOAT16_FROM_UINT16(x) return_in_xmm(x)
+#elif defined(_CPU_PPC64_) || defined(_CPU_PPC_)
+    // on PPC, pass Float16 as if it were an integer, similar to the old x86 ABI
+    // before _Float16
+    #define FLOAT16_TYPE uint16_t
+    #define FLOAT16_TO_UINT16(x) (x)
+    #define FLOAT16_FROM_UINT16(x) (x)
+#else
+    // otherwise, pass using floating-point calling conventions
+    #define FLOAT16_TYPE float
+    #define FLOAT16_TO_UINT16(x) ((uint16_t)*(uint32_t*)&(x))
+    #define FLOAT16_FROM_UINT16(x) ({ uint32_t tmp = (uint32_t)(x); *(float*)&tmp; })
+#endif
+
+JL_DLLEXPORT float julia__gnu_h2f_ieee(FLOAT16_TYPE param)
 {
-    return float_to_half(param);
+    uint16_t param16 = FLOAT16_TO_UINT16(param);
+    return half_to_float(param16);
 }
 
-JL_DLLEXPORT uint16_t julia__truncdfhf2(double param)
+JL_DLLEXPORT FLOAT16_TYPE julia__gnu_f2h_ieee(float param)
 {
-    float res = (float)param;
-    uint32_t resi;
-    memcpy(&resi, &res, sizeof(res));
-    if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal
-        // shift so that the mantissa lines up where it would for normal Float16
-        uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
-        if (shift<23u) {
-            resi |= 0x00800000; // set implicit bit
-            resi >>= shift;
-        }
-    }
-    if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values
-        memcpy(&resi, &res, sizeof(res));
-        // adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
-        resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res));
-        memcpy(&res, &resi, sizeof(res));
+    uint16_t res = float_to_half(param);
+    return FLOAT16_FROM_UINT16(res);
+}
+
+JL_DLLEXPORT FLOAT16_TYPE julia__truncdfhf2(double param)
+{
+    uint16_t res = double_to_half(param);
+    return FLOAT16_FROM_UINT16(res);
+}
+
+
+// bfloat16 conversion helpers
+
+static inline uint16_t float_to_bfloat(float param) JL_NOTSAFEPOINT
+{
+    if (isnan(param))
+        return 0x7fc0;
+
+    uint32_t bits = *((uint32_t*) &param);
+
+    // round to nearest even
+    bits += 0x7fff + ((bits >> 16) & 1);
+    return (uint16_t)(bits >> 16);
+}
+
+static inline uint16_t double_to_bfloat(double param) JL_NOTSAFEPOINT
+{
+    float temp = (float)param;
+    uint32_t tempi;
+    memcpy(&tempi, &temp, sizeof(temp));
+
+    // bfloat16 uses the same exponent as float32, so we don't need special handling
+    // for subnormals when truncating float64 to bfloat16.
+
+    // if we are halfway between 2 bfloat16 values
+    if ((tempi & 0x1ffu) == 0x100u) {
+        // adjust the value by 1 ULP in the direction that will make bfloat16(temp) give the right answer
+        tempi += (fabs(temp) < fabs(param)) - (fabs(param) < fabs(temp));
+        memcpy(&temp, &tempi, sizeof(temp));
     }
-    return float_to_half(res);
-}
-
-//JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) { return (double)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) { return (int32_t)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) { return (int64_t)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) { return (uint32_t)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) { return (uint64_t)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) { return julia__gnu_f2h_ieee((float)n); }
-//JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) { return julia__gnu_f2h_ieee((float)n); }
-//JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) { return julia__gnu_f2h_ieee((float)n); }
-//JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) { return julia__gnu_f2h_ieee((float)n); }
-//HANDLE_LIBCALL(F16, F128, __extendhftf2)
-//HANDLE_LIBCALL(F16, F80, __extendhfxf2)
-//HANDLE_LIBCALL(F80, F16, __truncxfhf2)
-//HANDLE_LIBCALL(F128, F16, __trunctfhf2)
-//HANDLE_LIBCALL(PPCF128, F16, __trunctfhf2)
-//HANDLE_LIBCALL(F16, I128, __fixhfti)
-//HANDLE_LIBCALL(F16, I128, __fixunshfti)
-//HANDLE_LIBCALL(I128, F16, __floattihf)
-//HANDLE_LIBCALL(I128, F16, __floatuntihf)
+
+    return float_to_bfloat(temp);
+}
+
+static inline float bfloat_to_float(uint16_t param) JL_NOTSAFEPOINT
+{
+    uint32_t bits = ((uint32_t)param) << 16;
+    float result;
+    memcpy(&result, &bits, sizeof(result));
+    return result;
+}
+
+// bfloat16 conversion API
+
+// for use in APInt (without the ABI shenanigans from below)
+uint16_t julia_float_to_bfloat(float param) {
+    return float_to_bfloat(param);
+}
+float julia_bfloat_to_float(uint16_t param) {
+    return bfloat_to_float(param);
+}
+
+// starting with GCC 13 and Clang 17, we have __bf16 on most platforms
+// (but not on Windows; this may be a bug in the MSYS2 GCC compilers)
+#if ((defined(__GNUC__) && __GNUC__ > 12) || \
+     (defined(__clang__) && __clang_major__ > 16)) && \
+    !defined(_CPU_PPC64_) && !defined(_CPU_PPC_) && \
+    !defined(_OS_WINDOWS_) && !defined(_CPU_RISCV64_)
+    #define BFLOAT16_TYPE __bf16
+    #define BFLOAT16_TO_UINT16(x) (*(uint16_t*)&(x))
+    #define BFLOAT16_FROM_UINT16(x) (*(__bf16*)&(x))
+// on older compilers, we need to emulate the platform-specific ABI.
+// for more details, see similar code above that deals with Float16.
+#elif defined(_CPU_X86_) || (defined(_CPU_X86_64_) && !defined(_OS_WINDOWS_))
+    #define BFLOAT16_TYPE __m128
+    #define BFLOAT16_TO_UINT16(x) take_from_xmm(x)
+    #define BFLOAT16_FROM_UINT16(x) return_in_xmm(x)
+#elif defined(_CPU_PPC64_) || defined(_CPU_PPC_)
+    #define BFLOAT16_TYPE uint16_t
+    #define BFLOAT16_TO_UINT16(x) (x)
+    #define BFLOAT16_FROM_UINT16(x) (x)
+#else
+    #define BFLOAT16_TYPE float
+    #define BFLOAT16_TO_UINT16(x) ((uint16_t)*(uint32_t*)&(x))
+    #define BFLOAT16_FROM_UINT16(x) ({ uint32_t tmp = (uint32_t)(x); *(float*)&tmp; })
+#endif
+
+JL_DLLEXPORT BFLOAT16_TYPE julia__truncsfbf2(float param) JL_NOTSAFEPOINT
+{
+    uint16_t res = float_to_bfloat(param);
+    return BFLOAT16_FROM_UINT16(res);
+}
+
+JL_DLLEXPORT BFLOAT16_TYPE julia__truncdfbf2(double param) JL_NOTSAFEPOINT
+{
+    uint16_t res = double_to_bfloat(param);
+    return BFLOAT16_FROM_UINT16(res);
+}
 
 
 // run time version of bitcast intrinsic
@@ -434,9 +588,9 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
     char *pp = (char*)jl_unbox_long(p);
     jl_datatype_t *rettyp = jl_apply_cmpswap_type(ety);
     JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    jl_value_t *result = NULL;
+    JL_GC_PUSH1(&result);
     if (ety == (jl_value_t*)jl_any_type) {
-        jl_value_t *result;
-        JL_GC_PUSH1(&result);
         result = expected;
         int success;
         while (1) {
@@ -445,8 +599,6 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
                 break;
         }
         result = jl_new_struct(rettyp, result, success ? jl_true : jl_false);
-        JL_GC_POP();
-        return result;
     }
     else {
         if (jl_typeof(x) != ety)
@@ -454,8 +606,20 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
         size_t nb = jl_datatype_size(ety);
         if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
             jl_error("atomic_pointerreplace: invalid pointer for atomic operation");
-        return jl_atomic_cmpswap_bits((jl_datatype_t*)ety, rettyp, pp, expected, x, nb);
+        int isptr = jl_field_isptr(rettyp, 0);
+        jl_task_t *ct = jl_current_task;
+        result = jl_gc_alloc(ct->ptls, isptr ? nb : jl_datatype_size(rettyp), isptr ? ety : (jl_value_t*)rettyp);
+        int success = jl_atomic_cmpswap_bits((jl_datatype_t*)ety, result, pp, expected, x, nb);
+        if (isptr) {
+            jl_value_t *z = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), rettyp);
+            *(jl_value_t**)z = result;
+            result = z;
+            nb = sizeof(jl_value_t*);
+        }
+        *((uint8_t*)result + nb) = success ? 1 : 0;
     }
+    JL_GC_POP();
+    return result;
 }
 
 JL_DLLEXPORT jl_value_t *jl_atomic_fence(jl_value_t *order_sym)
@@ -470,7 +634,6 @@ JL_DLLEXPORT jl_value_t *jl_atomic_fence(jl_value_t *order_sym)
 JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
 {
     JL_TYPECHK(cglobal, type, ty);
-    JL_GC_PUSH1(&v);
     jl_value_t *rt =
         ty == (jl_value_t*)jl_nothing_type ? (jl_value_t*)jl_voidpointer_type : // a common case
             (jl_value_t*)jl_apply_type1((jl_value_t*)jl_pointer_type, ty);
@@ -479,44 +642,22 @@ JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
     if (!jl_is_concrete_type(rt))
         jl_error("cglobal: type argument not concrete");
 
+    if (jl_is_pointer(v))
+        return jl_bitcast(rt, v);
+
     if (jl_is_tuple(v) && jl_nfields(v) == 1)
         v = jl_fieldref(v, 0);
 
-    if (jl_is_pointer(v)) {
-        v = jl_bitcast(rt, v);
-        JL_GC_POP();
-        return v;
-    }
-
-    char *f_lib = NULL;
+    jl_value_t *f_lib = NULL;
+    JL_GC_PUSH2(&v, &f_lib);
     if (jl_is_tuple(v) && jl_nfields(v) > 1) {
-        jl_value_t *t1 = jl_fieldref(v, 1);
-        if (jl_is_symbol(t1))
-            f_lib = jl_symbol_name((jl_sym_t*)t1);
-        else if (jl_is_string(t1))
-            f_lib = jl_string_data(t1);
-        else
-            JL_TYPECHK(cglobal, symbol, t1)
+        f_lib = jl_fieldref(v, 1);
         v = jl_fieldref(v, 0);
     }
-
-    char *f_name = NULL;
-    if (jl_is_symbol(v))
-        f_name = jl_symbol_name((jl_sym_t*)v);
-    else if (jl_is_string(v))
-        f_name = jl_string_data(v);
-    else
-        JL_TYPECHK(cglobal, symbol, v)
-
-    if (!f_lib)
-        f_lib = (char*)jl_dlfind(f_name);
-
-    void *ptr;
-    jl_dlsym(jl_get_library(f_lib), f_name, &ptr, 1);
-    jl_value_t *jv = jl_gc_alloc_1w();
-    jl_set_typeof(jv, rt);
-    *(void**)jl_data_ptr(jv) = ptr;
+    void *ptr = jl_lazy_load_and_lookup(f_lib, v);
     JL_GC_POP();
+    jl_value_t *jv = jl_gc_alloc(jl_current_task->ptls, sizeof(void*), rt);
+    *(void**)jl_data_ptr(jv) = ptr;
     return jv;
 }
 
@@ -595,24 +736,30 @@ static inline unsigned jl_##name##nbits(unsigned runtime_nbits, void *pa) JL_NOT
 // nbits::number of bits in the *input*
 // c_type::c_type corresponding to nbits
 #define un_fintrinsic_ctype(OP, name, c_type) \
-static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
+static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \
 { \
     c_type a = *(c_type*)pa; \
-    OP((c_type*)pr, a); \
+    OP(ty, (c_type*)pr, a); \
 }
 
-#define un_fintrinsic_half(OP, name) \
-static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
-{ \
-    uint16_t a = *(uint16_t*)pa; \
-    float A = julia__gnu_h2f_ieee(a); \
-    if (osize == 16) { \
-        float R; \
-        OP(&R, A); \
-        *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
-    } else { \
-        OP((uint16_t*)pr, A); \
-    } \
+#define un_fintrinsic_half(OP, name)                                            \
+    static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) \
+        JL_NOTSAFEPOINT                                                         \
+    {                                                                           \
+        uint16_t a = *(uint16_t *)pa;                                           \
+        float R, A = half_to_float(a);                                          \
+        OP(ty, &R, A);                                                          \
+        *(uint16_t *)pr = float_to_half(R);                                     \
+    }
+
+#define un_fintrinsic_bfloat(OP, name)                                          \
+    static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) \
+        JL_NOTSAFEPOINT                                                         \
+    {                                                                           \
+        uint16_t a = *(uint16_t *)pa;                                           \
+        float R, A = bfloat_to_float(a);                                        \
+        OP(ty, &R, A);                                                          \
+        *(uint16_t *)pr = float_to_bfloat(R);                                   \
     }
 
 // float or integer inputs
@@ -633,11 +780,23 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pr)
 { \
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
-    float A = julia__gnu_h2f_ieee(a); \
-    float B = julia__gnu_h2f_ieee(b); \
+    float A = half_to_float(a); \
+    float B = half_to_float(b); \
+    runtime_nbits = 16; \
+    float R = OP(A, B); \
+    *(uint16_t*)pr = float_to_half(R); \
+}
+
+#define bi_intrinsic_bfloat(OP, name) \
+static void jl_##name##bf16(unsigned runtime_nbits, void *pa, void *pb, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    float A = bfloat_to_float(a); \
+    float B = bfloat_to_float(b); \
     runtime_nbits = 16; \
     float R = OP(A, B); \
-    *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
+    *(uint16_t*)pr = float_to_bfloat(R); \
 }
 
 // float or integer inputs, bool output
@@ -658,8 +817,19 @@ static int jl_##name##16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEP
 { \
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
-    float A = julia__gnu_h2f_ieee(a); \
-    float B = julia__gnu_h2f_ieee(b); \
+    float A = half_to_float(a); \
+    float B = half_to_float(b); \
+    runtime_nbits = 16; \
+    return OP(A, B); \
+}
+
+#define bool_intrinsic_bfloat(OP, name) \
+static int jl_##name##bf16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    float A = bfloat_to_float(a); \
+    float B = bfloat_to_float(b); \
     runtime_nbits = 16; \
     return OP(A, B); \
 }
@@ -699,12 +869,26 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pc,
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
     uint16_t c = *(uint16_t*)pc; \
-    float A = julia__gnu_h2f_ieee(a); \
-    float B = julia__gnu_h2f_ieee(b); \
-    float C = julia__gnu_h2f_ieee(c); \
+    float A = half_to_float(a); \
+    float B = half_to_float(b); \
+    float C = half_to_float(c); \
     runtime_nbits = 16; \
     float R = OP(A, B, C); \
-    *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
+    *(uint16_t*)pr = float_to_half(R); \
+}
+
+#define ter_intrinsic_bfloat(OP, name) \
+static void jl_##name##bf16(unsigned runtime_nbits, void *pa, void *pb, void *pc, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    uint16_t c = *(uint16_t*)pc; \
+    float A = bfloat_to_float(a); \
+    float B = bfloat_to_float(b); \
+    float C = bfloat_to_float(c); \
+    runtime_nbits = 16; \
+    float R = OP(A, B, C); \
+    *(uint16_t*)pr = float_to_bfloat(R); \
 }
 
 
@@ -820,7 +1004,7 @@ static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsign
 
 // conversion operator
 
-typedef void (*intrinsic_cvt_t)(unsigned, void*, unsigned, void*);
+typedef void (*intrinsic_cvt_t)(jl_datatype_t*, void*, jl_datatype_t*, void*);
 typedef unsigned (*intrinsic_cvt_check_t)(unsigned, unsigned, void*);
 #define cvt_iintrinsic(LLVMOP, name) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
@@ -837,24 +1021,22 @@ static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const
     if (!jl_is_primitivetype(aty))
         jl_errorf("%s: value is not a primitive type", name);
     void *pa = jl_data_ptr(a);
-    unsigned isize = jl_datatype_size(aty);
     unsigned osize = jl_datatype_size(ty);
     void *pr = alloca(osize);
-    unsigned isize_bits = isize * host_char_bit;
-    unsigned osize_bits = osize * host_char_bit;
-    op(isize_bits, pa, osize_bits, pr);
+    op((jl_datatype_t*)aty, pa, (jl_datatype_t*)ty, pr);
     return jl_new_bits(ty, pr);
 }
 
 // floating point
 
 #define un_fintrinsic_withtype(OP, name) \
+un_fintrinsic_bfloat(OP, jl_##name##bf16) \
 un_fintrinsic_half(OP, jl_##name##16) \
 un_fintrinsic_ctype(OP, jl_##name##32, float) \
 un_fintrinsic_ctype(OP, jl_##name##64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
 { \
-    return jl_fintrinsic_1(ty, a, #name, jl_##name##16, jl_##name##32, jl_##name##64); \
+    return jl_fintrinsic_1(ty, a, #name, jl_##name##bf16, jl_##name##16, jl_##name##32, jl_##name##64); \
 }
 
 #define un_fintrinsic(OP, name) \
@@ -864,33 +1046,31 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a) \
     return jl_##name##_withtype(jl_typeof(a), a); \
 }
 
-typedef void (fintrinsic_op1)(unsigned, void*, void*);
+typedef void (fintrinsic_op1)(unsigned, jl_value_t*, void*, void*);
 
-static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop)
+static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *bfloatop, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop)
 {
     jl_task_t *ct = jl_current_task;
-    if (!jl_is_primitivetype(jl_typeof(a)))
+    jl_datatype_t *aty = (jl_datatype_t *)jl_typeof(a);
+    if (!jl_is_primitivetype(aty))
         jl_errorf("%s: value is not a primitive type", name);
     if (!jl_is_primitivetype(ty))
         jl_errorf("%s: type is not a primitive type", name);
     unsigned sz2 = jl_datatype_size(ty);
     jl_value_t *newv = jl_gc_alloc(ct->ptls, sz2, ty);
     void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv);
-    unsigned sz = jl_datatype_size(jl_typeof(a));
-    switch (sz) {
-    /* choose the right size c-type operation based on the input */
-    case 2:
-        halfop(sz2 * host_char_bit, pa, pr);
-        break;
-    case 4:
-        floatop(sz2 * host_char_bit, pa, pr);
-        break;
-    case 8:
-        doubleop(sz2 * host_char_bit, pa, pr);
-        break;
-    default:
-        jl_errorf("%s: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64", name);
-    }
+
+    if (aty == jl_float16_type)
+        halfop(sz2 * host_char_bit, ty, pa, pr);
+    else if (aty == jl_bfloat16_type)
+        bfloatop(sz2 * host_char_bit, ty, pa, pr);
+    else if (aty == jl_float32_type)
+        floatop(sz2 * host_char_bit, ty, pa, pr);
+    else if (aty == jl_float64_type)
+        doubleop(sz2 * host_char_bit, ty, pa, pr);
+    else
+        jl_errorf("%s: runtime floating point intrinsics require both arguments to be Float16, BFloat16, Float32, or Float64", name);
+
     return newv;
 }
 
@@ -1058,6 +1238,7 @@ static inline jl_value_t *jl_intrinsiclambda_checkeddiv(jl_value_t *ty, void *pa
 // floating point
 
 #define bi_fintrinsic(OP, name) \
+    bi_intrinsic_bfloat(OP, name) \
     bi_intrinsic_half(OP, name) \
     bi_intrinsic_ctype(OP, name, 32, float) \
     bi_intrinsic_ctype(OP, name, 64, double) \
@@ -1065,6 +1246,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
 { \
     jl_task_t *ct = jl_current_task; \
     jl_value_t *ty = jl_typeof(a); \
+    jl_datatype_t *aty = (jl_datatype_t *)ty; \
     if (jl_typeof(b) != ty) \
         jl_error(#name ": types of a and b must match"); \
     if (!jl_is_primitivetype(ty)) \
@@ -1072,55 +1254,50 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
     int sz = jl_datatype_size(ty); \
     jl_value_t *newv = jl_gc_alloc(ct->ptls, sz, ty); \
     void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pr = jl_data_ptr(newv); \
-    switch (sz) { \
-    /* choose the right size c-type operation */ \
-    case 2: \
+    if (aty == jl_float16_type) \
         jl_##name##16(16, pa, pb, pr); \
-        break; \
-    case 4: \
+    else if (aty == jl_bfloat16_type) \
+        jl_##name##bf16(16, pa, pb, pr); \
+    else if (aty == jl_float32_type) \
         jl_##name##32(32, pa, pb, pr); \
-        break; \
-    case 8: \
+    else if (aty == jl_float64_type) \
         jl_##name##64(64, pa, pb, pr); \
-        break; \
-    default: \
-        jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); \
-    } \
+    else \
+        jl_error(#name ": runtime floating point intrinsics require both arguments to be Float16, BFloat16, Float32, or Float64"); \
     return newv; \
 }
 
 #define bool_fintrinsic(OP, name) \
+    bool_intrinsic_bfloat(OP, name) \
     bool_intrinsic_half(OP, name) \
     bool_intrinsic_ctype(OP, name, 32, float) \
     bool_intrinsic_ctype(OP, name, 64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
 { \
     jl_value_t *ty = jl_typeof(a); \
+    jl_datatype_t *aty = (jl_datatype_t *)ty; \
     if (jl_typeof(b) != ty) \
         jl_error(#name ": types of a and b must match"); \
     if (!jl_is_primitivetype(ty)) \
         jl_error(#name ": values are not primitive types"); \
     void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b); \
-    int sz = jl_datatype_size(ty); \
     int cmp; \
-    switch (sz) { \
-    /* choose the right size c-type operation */ \
-    case 2: \
+    if (aty == jl_float16_type) \
         cmp = jl_##name##16(16, pa, pb); \
-        break; \
-    case 4: \
+    else if (aty == jl_bfloat16_type) \
+        cmp = jl_##name##bf16(16, pa, pb); \
+    else if (aty == jl_float32_type) \
         cmp = jl_##name##32(32, pa, pb); \
-        break; \
-    case 8: \
+    else if (aty == jl_float64_type) \
         cmp = jl_##name##64(64, pa, pb); \
-        break; \
-    default: \
-        jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); \
-    } \
+    else \
+        jl_error(#name ": runtime floating point intrinsics require both arguments to be Float16, BFloat16, Float32, or Float64"); \
+ \
     return cmp ? jl_true : jl_false; \
 }
 
 #define ter_fintrinsic(OP, name) \
+    ter_intrinsic_bfloat(OP, name) \
     ter_intrinsic_half(OP, name) \
     ter_intrinsic_ctype(OP, name, 32, float) \
     ter_intrinsic_ctype(OP, name, 64, double) \
@@ -1128,6 +1305,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c)
 { \
     jl_task_t *ct = jl_current_task; \
     jl_value_t *ty = jl_typeof(a); \
+    jl_datatype_t *aty = (jl_datatype_t *)ty; \
     if (jl_typeof(b) != ty || jl_typeof(c) != ty) \
         jl_error(#name ": types of a, b, and c must match"); \
     if (!jl_is_primitivetype(ty)) \
@@ -1135,33 +1313,27 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c)
     int sz = jl_datatype_size(ty); \
     jl_value_t *newv = jl_gc_alloc(ct->ptls, sz, ty); \
     void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pc = jl_data_ptr(c), *pr = jl_data_ptr(newv); \
-    switch (sz) { \
-    /* choose the right size c-type operation */ \
-    case 2: \
-        jl_##name##16(16, pa, pb, pc, pr); \
-        break; \
-    case 4: \
+    if (aty == jl_float16_type) \
+            jl_##name##16(16, pa, pb, pc, pr); \
+    else if (aty == jl_bfloat16_type) \
+            jl_##name##bf16(16, pa, pb, pc, pr); \
+    else if (aty == jl_float32_type) \
         jl_##name##32(32, pa, pb, pc, pr); \
-        break; \
-    case 8: \
+    else if (aty == jl_float64_type) \
         jl_##name##64(64, pa, pb, pc, pr); \
-        break; \
-    default: \
-        jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); \
-    } \
+    else \
+        jl_error(#name ": runtime floating point intrinsics require both arguments to be Float16, BFloat16, Float32, or Float64"); \
     return newv; \
 }
 
 // arithmetic
 #define neg(a) -a
-#define neg_float(pr, a) *pr = -a
+#define neg_float(ty, pr, a) *pr = -a
 un_iintrinsic_fast(LLVMNeg, neg, neg_int, u)
 #define add(a,b) a + b
 bi_iintrinsic_fast(LLVMAdd, add, add_int, u)
-bi_iintrinsic_fast(LLVMAdd, add, add_ptr, u)
 #define sub(a,b) a - b
 bi_iintrinsic_fast(LLVMSub, sub, sub_int, u)
-bi_iintrinsic_fast(LLVMSub, sub, sub_ptr, u)
 #define mul(a,b) a * b
 bi_iintrinsic_fast(LLVMMul, mul, mul_int, u)
 #define div(a,b) a / b
@@ -1174,13 +1346,50 @@ bi_iintrinsic_fast(LLVMURem, rem, urem_int, u)
 bi_iintrinsic_fast(jl_LLVMSMod, smod, smod_int,  )
 #define frem(a, b) \
     fp_select2(a, b, fmod)
-
 un_fintrinsic(neg_float,neg_float)
 bi_fintrinsic(add,add_float)
 bi_fintrinsic(sub,sub_float)
 bi_fintrinsic(mul,mul_float)
 bi_fintrinsic(div,div_float)
 
+float min_float(float x, float y) JL_NOTSAFEPOINT
+{
+    float diff = x - y;
+    float argmin = signbit(diff) ? x : y;
+    int is_nan = isnan(x) || isnan(y);
+    return is_nan ? diff : argmin;
+}
+
+double min_double(double x, double y) JL_NOTSAFEPOINT
+{
+    double diff = x - y;
+    double argmin = signbit(diff) ? x : y;
+    int is_nan = isnan(x) || isnan(y);
+    return is_nan ? diff : argmin;
+}
+
+#define _min(a, b) sizeof(a) == sizeof(float) ? min_float(a, b) : min_double(a, b)
+bi_fintrinsic(_min, min_float)
+
+float max_float(float x, float y) JL_NOTSAFEPOINT
+{
+    float diff = x - y;
+    float argmax = signbit(diff) ? y : x;
+    int is_nan = isnan(x) || isnan(y);
+    return is_nan ? diff : argmax;
+}
+
+double max_double(double x, double y) JL_NOTSAFEPOINT
+{
+    double diff = x - y;
+    double argmax = signbit(diff) ? y : x;
+    int is_nan = isnan(x) || isnan(y);
+    return is_nan ? diff : argmax;
+}
+
+#define _max(a, b) sizeof(a) == sizeof(float) ? max_float(a, b) : max_double(a, b)
+bi_fintrinsic(_max, max_float)
+
 // ternary operators //
 // runtime fma is broken on windows, define julia_fma(f) ourself with fma_emulated as reference.
 #if defined(_OS_WINDOWS_)
@@ -1350,14 +1559,14 @@ bi_iintrinsic_cnvtb_fast(LLVMAShr, ashr_op, ashr_int, , 1)
 //un_iintrinsic_fast(LLVMByteSwap, bswap_op, bswap_int, u)
 un_iintrinsic_slow(LLVMByteSwap, bswap_int, u)
 //#define ctpop_op(a) __builtin_ctpop(a)
-//uu_iintrinsic_fast(LLVMCountPopulation, ctpop_op, ctpop_int, u)
-uu_iintrinsic_slow(LLVMCountPopulation, ctpop_int, u)
+//uu_iintrinsic_fast(LLVMPopcount, ctpop_op, ctpop_int, u)
+uu_iintrinsic_slow(LLVMPopcount, ctpop_int, u)
 //#define ctlz_op(a) __builtin_ctlz(a)
-//uu_iintrinsic_fast(LLVMCountLeadingZeros, ctlz_op, ctlz_int, u)
-uu_iintrinsic_slow(LLVMCountLeadingZeros, ctlz_int, u)
+//uu_iintrinsic_fast(LLVMCountl_zero, ctlz_op, ctlz_int, u)
+uu_iintrinsic_slow(LLVMCountl_zero, ctlz_int, u)
 //#define cttz_op(a) __builtin_cttz(a)
-//uu_iintrinsic_fast(LLVMCountTrailingZeros, cttz_op, cttz_int, u)
-uu_iintrinsic_slow(LLVMCountTrailingZeros, cttz_int, u)
+//uu_iintrinsic_fast(LLVMCountr_zero, cttz_op, cttz_int, u)
+uu_iintrinsic_slow(LLVMCountr_zero, cttz_int, u)
 #define not_op(a) ~a
 un_iintrinsic_fast(LLVMFlipAllBits, not_op, not_int, u)
 
@@ -1370,28 +1579,74 @@ cvt_iintrinsic(LLVMUItoFP, uitofp)
 cvt_iintrinsic(LLVMFPtoSI, fptosi)
 cvt_iintrinsic(LLVMFPtoUI, fptoui)
 
-#define fptrunc(pr, a) \
-        if (!(osize < 8 * sizeof(a))) \
-            jl_error("fptrunc: output bitsize must be < input bitsize"); \
-        else if (osize == 16) \
-            *(uint16_t*)pr = julia__gnu_f2h_ieee(a); \
-        else if (osize == 32) \
-            *(float*)pr = a; \
-        else if (osize == 64) \
-            *(double*)pr = a; \
-        else \
-            jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
-#define fpext(pr, a) \
-        if (!(osize >= 8 * sizeof(a))) \
-            jl_error("fpext: output bitsize must be >= input bitsize"); \
-        if (osize == 32) \
-            *(float*)pr = a; \
-        else if (osize == 64) \
-            *(double*)pr = a; \
-        else \
-            jl_error("fpext: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
-un_fintrinsic_withtype(fptrunc,fptrunc)
-un_fintrinsic_withtype(fpext,fpext)
+#define fintrinsic_read_float16(p)   half_to_float(*(uint16_t *)p)
+#define fintrinsic_read_bfloat16(p)  bfloat_to_float(*(uint16_t *)p)
+#define fintrinsic_read_float32(p)   *(float *)p
+#define fintrinsic_read_float64(p)   *(double *)p
+
+#define fintrinsic_write_float16(p, x)  *(uint16_t *)p = double_to_half(x)
+#define fintrinsic_write_bfloat16(p, x) *(uint16_t *)p = double_to_bfloat(x)
+#define fintrinsic_write_float32(p, x)  *(float *)p = x
+#define fintrinsic_write_float64(p, x)  *(double *)p = x
+
+/*
+ * aty: Type of value argument (input)
+ * pa:  Pointer to value argument data
+ * ty:  Type argument (output)
+ * pr:  Pointer to result data
+ */
+
+static inline void fptrunc(jl_datatype_t *aty, void *pa, jl_datatype_t *ty, void *pr)
+{
+    unsigned isize = jl_datatype_size(aty), osize = jl_datatype_size(ty);
+    if (!(osize < isize)) {
+        jl_error("fptrunc: output bitsize must be < input bitsize");
+        return;
+    }
+
+#define fptrunc_convert(in, out)                                \
+    else if (aty == jl_##in##_type && ty == jl_##out##_type)    \
+        fintrinsic_write_##out(pr, fintrinsic_read_##in(pa))
+
+    if (0)
+        ;
+    fptrunc_convert(float32, float16);
+    fptrunc_convert(float64, float16);
+    fptrunc_convert(float32, bfloat16);
+    fptrunc_convert(float64, bfloat16);
+    fptrunc_convert(float64, float32);
+    else
+        jl_error("fptrunc: runtime floating point intrinsics require both arguments to be Float16, BFloat16, Float32, or Float64");
+#undef fptrunc_convert
+}
+
+static inline void fpext(jl_datatype_t *aty, void *pa, jl_datatype_t *ty, void *pr)
+{
+    unsigned isize = jl_datatype_size(aty), osize = jl_datatype_size(ty);
+    if (!(osize > isize)) {
+        jl_error("fpext: output bitsize must be > input bitsize");
+        return;
+    }
+
+#define fpext_convert(in, out)                                  \
+    else if (aty == jl_##in##_type && ty == jl_##out##_type)    \
+        fintrinsic_write_##out(pr, fintrinsic_read_##in(pa))
+
+    if (0)
+        ;
+    fpext_convert(float16, float32);
+    fpext_convert(float16, float64);
+    fpext_convert(bfloat16, float32);
+    fpext_convert(bfloat16, float64);
+    fpext_convert(float32, float64);
+    else
+        jl_error("fptrunc: runtime floating point intrinsics require both arguments to be Float16, BFloat16, Float32, or Float64");
+#undef fpext_convert
+}
+
+cvt_iintrinsic(fptrunc, fptrunc)
+cvt_iintrinsic(fpext, fpext)
+
 
 // checked arithmetic
 /**
@@ -1438,12 +1693,12 @@ checked_iintrinsic_div(LLVMRem_uov, checked_urem_int, u)
 #define flipsign(a, b) \
         (b >= 0) ? a : -a
 bi_iintrinsic_fast(jl_LLVMFlipSign, flipsign, flipsign_int,  )
-#define abs_float(pr, a)      *pr = fp_select(a, fabs)
-#define ceil_float(pr, a)     *pr = fp_select(a, ceil)
-#define floor_float(pr, a)    *pr = fp_select(a, floor)
-#define trunc_float(pr, a)    *pr = fp_select(a, trunc)
-#define rint_float(pr, a)     *pr = fp_select(a, rint)
-#define sqrt_float(pr, a)     *pr = fp_select(a, sqrt)
+#define abs_float(ty, pr, a)      *pr = fp_select(a, fabs)
+#define ceil_float(ty, pr, a)     *pr = fp_select(a, ceil)
+#define floor_float(ty, pr, a)    *pr = fp_select(a, floor)
+#define trunc_float(ty, pr, a)    *pr = fp_select(a, trunc)
+#define rint_float(ty, pr, a)     *pr = fp_select(a, rint)
+#define sqrt_float(ty, pr, a)     *pr = fp_select(a, sqrt)
 #define copysign_float(a, b)  fp_select2(a, b, copysign)
 
 un_fintrinsic(abs_float,abs_float)
@@ -1454,16 +1709,31 @@ un_fintrinsic(trunc_float,trunc_llvm)
 un_fintrinsic(rint_float,rint_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm_fast)
+jl_value_t *jl_cpu_has_fma(int bits);
+
+JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
+{
+    JL_TYPECHK(have_fma, datatype, typ); // TODO what about float16/bfloat16?
+    if (typ == (jl_value_t*)jl_float32_type)
+        return jl_cpu_has_fma(32);
+    else if (typ == (jl_value_t*)jl_float64_type)
+        return jl_cpu_has_fma(64);
+    else
+        return jl_false;
+}
 
-JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
+JL_DLLEXPORT jl_value_t *jl_add_ptr(jl_value_t *ptr, jl_value_t *offset)
 {
-    JL_TYPECHK(arraylen, array, a);
-    return jl_box_long(jl_array_len((jl_array_t*)a));
+    JL_TYPECHK(add_ptr, pointer, ptr);
+    JL_TYPECHK(add_ptr, ulong, offset);
+    char *ptrval = (char*)jl_unbox_long(ptr) + jl_unbox_ulong(offset);
+    return jl_new_bits(jl_typeof(ptr), &ptrval);
 }
 
-JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
+JL_DLLEXPORT jl_value_t *jl_sub_ptr(jl_value_t *ptr, jl_value_t *offset)
 {
-    JL_TYPECHK(have_fma, datatype, typ);
-    // TODO: run-time feature check?
-    return jl_false;
+    JL_TYPECHK(sub_ptr, pointer, ptr);
+    JL_TYPECHK(sub_ptr, ulong, offset);
+    char *ptrval = (char*)jl_unbox_long(ptr) - jl_unbox_ulong(offset);
+    return jl_new_bits(jl_typeof(ptr), &ptrval);
 }
diff --git a/src/safepoint.c b/src/safepoint.c
index c6f9a42059d1a..6a1fb127f5ff9 100644
--- a/src/safepoint.c
+++ b/src/safepoint.c
@@ -30,7 +30,8 @@ char *jl_safepoint_pages = NULL;
 // so that both safepoint load and pending signal load falls in this page.
 // The initialization of the `safepoint` pointer is done `ti_initthread`
 // in `threading.c`.
-uint8_t jl_safepoint_enable_cnt[3] = {0, 0, 0};
+// The fourth page is the count of suspended threads
+uint16_t jl_safepoint_enable_cnt[4] = {0, 0, 0, 0};
 
 // This lock should be acquired before enabling/disabling the safepoint
 // or accessing one of the following variables:
@@ -43,17 +44,18 @@ uint8_t jl_safepoint_enable_cnt[3] = {0, 0, 0};
 // load/store so that threads waiting for the GC doesn't have to also
 // fight on the safepoint lock...
 uv_mutex_t safepoint_lock;
-uv_cond_t safepoint_cond;
+uv_cond_t safepoint_cond_begin;
+uv_cond_t safepoint_cond_end;
 
 static void jl_safepoint_enable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
-    assert(0 <= idx && idx < 3);
+    assert(0 <= idx && idx <= 3);
     if (jl_safepoint_enable_cnt[idx]++ != 0) {
         // We expect this to be enabled at most twice
         // one for the GC, one for SIGINT.
         // Update this if this is not the case anymore in the future.
-        assert(jl_safepoint_enable_cnt[idx] <= 2);
+        assert(jl_safepoint_enable_cnt[idx] <= (idx == 3 ? INT16_MAX : 2));
         return;
     }
     // Now that we are requested to mprotect the page and it wasn't already.
@@ -62,14 +64,15 @@ static void jl_safepoint_enable(int idx) JL_NOTSAFEPOINT
     DWORD old_prot;
     VirtualProtect(pageaddr, jl_page_size, PAGE_NOACCESS, &old_prot);
 #else
-    mprotect(pageaddr, jl_page_size, PROT_NONE);
+    int r = mprotect(pageaddr, jl_page_size, PROT_NONE);
+    (void)r; //if (r) perror("mprotect");
 #endif
 }
 
 static void jl_safepoint_disable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
-    assert(0 <= idx && idx < 3);
+    assert(0 <= idx && idx <= 3);
     if (--jl_safepoint_enable_cnt[idx] != 0) {
         assert(jl_safepoint_enable_cnt[idx] > 0);
         return;
@@ -81,39 +84,115 @@ static void jl_safepoint_disable(int idx) JL_NOTSAFEPOINT
     DWORD old_prot;
     VirtualProtect(pageaddr, jl_page_size, PAGE_READONLY, &old_prot);
 #else
-    mprotect(pageaddr, jl_page_size, PROT_READ);
+    int r = mprotect(pageaddr, jl_page_size, PROT_READ);
+    (void)r; //if (r) perror("mprotect");
 #endif
 }
 
 void jl_safepoint_init(void)
 {
     uv_mutex_init(&safepoint_lock);
-    uv_cond_init(&safepoint_cond);
+    uv_cond_init(&safepoint_cond_begin);
+    uv_cond_init(&safepoint_cond_end);
     // jl_page_size isn't available yet.
     size_t pgsz = jl_getpagesize();
 #ifdef _OS_WINDOWS_
-    char *addr = (char*)VirtualAlloc(NULL, pgsz * 3, MEM_COMMIT, PAGE_READONLY);
+    char *addr = (char*)VirtualAlloc(NULL, pgsz * 4, MEM_COMMIT, PAGE_READONLY);
 #else
-    char *addr = (char*)mmap(0, pgsz * 3, PROT_READ,
+    char *addr = (char*)mmap(0, pgsz * 4, PROT_READ,
                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (addr == MAP_FAILED)
         addr = NULL;
 #endif
     if (addr == NULL) {
         jl_printf(JL_STDERR, "could not allocate GC synchronization page\n");
-        jl_gc_debug_critical_error();
+        jl_gc_debug_fprint_critical_error(ios_safe_stderr);
         abort();
     }
+//    // If we able to skip past the faulting safepoint instruction conditionally,
+//    // then we can make this safepoint page unconditional. But otherwise we
+//    // only enable this page when required, though it gives us less
+//    // fine-grained control over individual resume.
+//    char *pageaddr = addr + pgsz * 3;
+//#ifdef _OS_WINDOWS_
+//    DWORD old_prot;
+//    VirtualProtect(pageaddr, pgsz, PAGE_NOACCESS, &old_prot);
+//#else
+//    int r = mprotect(pageaddr, pgsz, PROT_NONE);
+//    (void)r; //if (r) perror("mprotect");
+//#endif
     // The signal page is for the gc safepoint.
     // The page before it is the sigint pending flag.
     jl_safepoint_pages = addr;
 }
 
-int jl_safepoint_start_gc(void)
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
 {
-    // The thread should have set this already
-    assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) == JL_GC_STATE_WAITING);
+    JL_TIMING(GC, GC_Stop);
+#ifdef USE_TRACY
+    TracyCZoneCtx ctx = JL_TIMING_DEFAULT_BLOCK->tracy_ctx;
+    TracyCZoneColor(ctx, 0x696969);
+#endif
+    assert(gc_n_threads);
+    if (gc_n_threads > 1)
+        jl_wake_libuv();
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL) {
+            // This acquire load pairs with the release stores
+            // in the signal handler of safepoint so we are sure that
+            // all the stores on those threads are visible.
+            // We're currently also using atomic store release in mutator threads
+            // (in jl_gc_state_set), but we may want to use signals to flush the
+            // memory operations on those threads lazily instead.
+            while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state)) {
+                // Use system mutexes rather than spin locking to minimize wasted CPU time
+                // while we wait for other threads reach a safepoint.
+                // This is particularly important when run under rr.
+                if (jl_options.timeout_for_safepoint_straggler_s == -1) { // timeout was not specified: no need to dump the backtrace
+                    uv_mutex_lock(&safepoint_lock);
+                    if (!jl_atomic_load_relaxed(&ptls2->gc_state)) {
+                        uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
+                    }
+                    uv_mutex_unlock(&safepoint_lock);
+                }
+                else {
+                    const int64_t timeout = jl_options.timeout_for_safepoint_straggler_s * 1000000000LL; // convert to nanoseconds
+                    int ret = 0;
+                    uv_mutex_lock(&safepoint_lock);
+                    if (!jl_atomic_load_relaxed(&ptls2->gc_state)) {
+                        ret = uv_cond_timedwait(&safepoint_cond_begin, &safepoint_lock, timeout);
+                    }
+                    uv_mutex_unlock(&safepoint_lock);
+                    // If we woke up because of a timeout, print the backtrace of the straggler
+                    if (ret == UV_ETIMEDOUT) {
+                        jl_safe_printf("===== Thread %d failed to reach safepoint after %d seconds, printing backtrace below =====\n", ptls2->tid + 1, jl_options.timeout_for_safepoint_straggler_s);
+                        // Try to record the backtrace of the straggler using `jl_try_record_thread_backtrace`
+                        jl_ptls_t ptls = jl_current_task->ptls;
+                        size_t bt_size = jl_try_record_thread_backtrace(ptls2, ptls->bt_data, JL_MAX_BT_SIZE);
+                        // Print the backtrace of the straggler
+                        for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(ptls->bt_data + i)) {
+                            jl_fprint_bt_entry_codeloc(ios_safe_stderr, ptls->bt_data + i);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+int jl_safepoint_start_gc(jl_task_t *ct)
+{
+    // The thread should have just set this before entry
+    assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) == JL_GC_STATE_WAITING);
     uv_mutex_lock(&safepoint_lock);
+    uv_cond_broadcast(&safepoint_cond_begin);
+    // make sure we are permitted to run GC now (we might be required to stop instead)
+    while (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {
+        uv_mutex_unlock(&safepoint_lock);
+        jl_safepoint_wait_thread_resume(ct);
+        uv_mutex_lock(&safepoint_lock);
+    }
     // In case multiple threads enter the GC at the same time, only allow
     // one of them to actually run the collection. We can't just let the
     // master thread do the GC since it might be running unmanaged code
@@ -121,7 +200,7 @@ int jl_safepoint_start_gc(void)
     uint32_t running = 0;
     if (!jl_atomic_cmpswap(&jl_gc_running, &running, 1)) {
         uv_mutex_unlock(&safepoint_lock);
-        jl_safepoint_wait_gc();
+        jl_safepoint_wait_gc(ct);
         return 0;
     }
     // Foreign thread adoption disables the GC and waits for it to finish, however, that may
@@ -148,20 +227,36 @@ void jl_safepoint_end_gc(void)
     jl_safepoint_disable(2);
     jl_safepoint_disable(1);
     jl_atomic_store_release(&jl_gc_running, 0);
-#  ifdef __APPLE__
+#  ifdef _OS_DARWIN_
     // This wakes up other threads on mac.
     jl_mach_gc_end();
 #  endif
     uv_mutex_unlock(&safepoint_lock);
-    uv_cond_broadcast(&safepoint_cond);
+    uv_cond_broadcast(&safepoint_cond_end);
 }
 
-void jl_safepoint_wait_gc(void)
+void jl_set_gc_and_wait(jl_task_t *ct) // n.b. not used on _OS_DARWIN_
 {
-    jl_task_t *ct = jl_current_task; (void)ct;
-    JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct);
-    // The thread should have set this is already
-    assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0);
+    // reading own gc state doesn't need atomic ops since no one else
+    // should store to it.
+    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
+    uv_mutex_lock(&safepoint_lock);
+    uv_cond_broadcast(&safepoint_cond_begin);
+    uv_mutex_unlock(&safepoint_lock);
+    jl_safepoint_wait_gc(ct);
+    jl_atomic_store_release(&ct->ptls->gc_state, state);
+    jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
+}
+
+// this is the core of jl_set_gc_and_wait
+void jl_safepoint_wait_gc(jl_task_t *ct) JL_NOTSAFEPOINT
+{
+    if (ct) {
+        JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct);
+        // The thread should have set this is already
+        assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != JL_GC_STATE_UNSAFE);
+    }
     // Use normal volatile load in the loop for speed until GC finishes.
     // Then use an acquire load to make sure the GC result is visible on this thread.
     while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) {
@@ -170,9 +265,145 @@ void jl_safepoint_wait_gc(void)
         // This is particularly important when run under rr.
         uv_mutex_lock(&safepoint_lock);
         if (jl_atomic_load_relaxed(&jl_gc_running))
-            uv_cond_wait(&safepoint_cond, &safepoint_lock);
+            uv_cond_wait(&safepoint_cond_end, &safepoint_lock);
+        uv_mutex_unlock(&safepoint_lock);
+    }
+}
+
+// equivalent to jl_set_gc_and_wait, but waiting on resume-thread lock instead
+void jl_safepoint_wait_thread_resume(jl_task_t *ct)
+{
+    // n.b. we do not permit a fast-path here that skips the lock acquire since
+    // we otherwise have no synchronization point to ensure that this thread
+    // will observe the change to the safepoint, even though the other thread
+    // might have already observed our gc_state.
+    // if (!jl_atomic_load_relaxed(&ct->ptls->suspend_count)) return;
+    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
+    uv_mutex_lock(&ct->ptls->sleep_lock);
+    if (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {
+        // defer this broadcast until we determine whether uv_cond_wait is really going to be needed
+        uv_mutex_unlock(&ct->ptls->sleep_lock);
+        uv_mutex_lock(&safepoint_lock);
+        uv_cond_broadcast(&safepoint_cond_begin);
+        uv_mutex_unlock(&safepoint_lock);
+        uv_mutex_lock(&ct->ptls->sleep_lock);
+        while (jl_atomic_load_relaxed(&ct->ptls->suspend_count))
+            uv_cond_wait(&ct->ptls->wake_signal, &ct->ptls->sleep_lock);
+    }
+    // must exit gc while still holding the mutex_unlock, so we know other
+    // threads in jl_safepoint_suspend_thread will observe this thread in the
+    // correct GC state, and not still stuck in JL_GC_STATE_WAITING
+    jl_atomic_store_release(&ct->ptls->gc_state, state);
+    uv_mutex_unlock(&ct->ptls->sleep_lock);
+}
+// This takes the sleep lock and puts the thread in GC_SAFE
+int8_t jl_safepoint_take_sleep_lock(jl_ptls_t ptls)
+{
+    int8_t gc_state = jl_gc_safe_enter(ptls);
+    uv_mutex_lock(&ptls->sleep_lock);
+    if (jl_atomic_load_relaxed(&ptls->suspend_count)) {
+        // This dance with the locks is because we are not allowed to hold both these locks at the same time
+        // This avoids a situation where  jl_safepoint_suspend_thread loads our GC state and sees GC_UNSAFE
+        // But we are in the process of becoming GC_SAFE, and also trigger the old safepoint, this causes us
+        // to go sleep in scheduler and the suspender thread to go to sleep in safepoint_cond_begin meaning we hang
+        // To avoid this we do the broadcast below to force it to observe the new gc_state
+        uv_mutex_unlock(&ptls->sleep_lock);
+        uv_mutex_lock(&safepoint_lock);
+        uv_cond_broadcast(&safepoint_cond_begin);
         uv_mutex_unlock(&safepoint_lock);
+        uv_mutex_lock(&ptls->sleep_lock);
+    }
+    return gc_state;
+}
+
+// n.b. suspended threads may still run in the GC or GC safe regions
+// but shouldn't be observable, depending on which enum the user picks (only 1 and 2 are typically recommended here)
+// waitstate = 0 : do not wait for suspend to finish
+// waitstate = 1 : wait for gc_state != 0 (JL_GC_STATE_WAITING or JL_GC_STATE_SAFE)
+// waitstate = 2 : wait for gc_state != 0 (JL_GC_STATE_WAITING or JL_GC_STATE_SAFE) and that GC is not running on that thread
+// waitstate = 3 : wait for full suspend (gc_state == JL_GC_STATE_WAITING) -- this may never happen if thread is sleeping currently
+// if another thread comes along and calls jl_safepoint_resume, we also return early
+// return new suspend count on success, 0 on failure
+int jl_safepoint_suspend_thread(int tid, int waitstate)
+{
+    if (0 > tid || tid >= jl_atomic_load_acquire(&jl_n_threads))
+        return 0;
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    if (ct2 == NULL) {
+        // this thread is not alive yet or already dead
+        return 0;
+    }
+    uv_mutex_lock(&safepoint_lock);
+    uv_mutex_lock(&ptls2->sleep_lock);
+    int16_t suspend_count = jl_atomic_load_relaxed(&ptls2->suspend_count) + 1;
+    jl_atomic_store_relaxed(&ptls2->suspend_count, suspend_count);
+    if (suspend_count == 1) { // first to suspend
+        jl_safepoint_enable(3);
+        jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 3 + sizeof(void*)));
+        if (jl_atomic_load(&_threadedregion) != 0 || tid == jl_atomic_load_relaxed(&io_loop_tid))
+            jl_wake_libuv(); // our integration with libuv right now doesn't handle except by waking it
+    }
+    uv_mutex_unlock(&ptls2->sleep_lock);
+    if (waitstate) {
+        // wait for suspend (or another thread to call resume)
+        if (waitstate >= 2) {
+            // We currently cannot distinguish if a thread is helping run GC or
+            // not, so assume it is running GC and wait for GC to finish first.
+            // It will be unable to reenter helping with GC because we have
+            // changed its safepoint page.
+            uv_mutex_unlock(&safepoint_lock);
+            jl_set_gc_and_wait(jl_current_task);
+            uv_mutex_lock(&safepoint_lock);
+        }
+        while (jl_atomic_load_acquire(&ptls2->suspend_count) != 0) {
+            int8_t state2 = jl_atomic_load_acquire(&ptls2->gc_state);
+            if (waitstate <= 2 && state2 != JL_GC_STATE_UNSAFE)
+                break;
+            if (waitstate == 3 && state2 == JL_GC_STATE_WAITING)
+                break;
+            uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
+        }
     }
+    uv_mutex_unlock(&safepoint_lock);
+    return suspend_count;
+}
+
+// return old suspend count on success, 0 on failure
+// n.b. threads often do not resume until after all suspended threads have been resumed!
+int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT
+{
+    if (0 > tid || tid >= jl_atomic_load_acquire(&jl_n_threads))
+        return 0;
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    if (ct2 == NULL) {
+        // this thread is not alive yet or already dead
+        return 0;
+    }
+    uv_mutex_lock(&safepoint_lock);
+    uv_mutex_lock(&ptls2->sleep_lock);
+    int16_t suspend_count = jl_atomic_load_relaxed(&ptls2->suspend_count);
+    if (suspend_count == 1) { // last to unsuspend
+        if (tid == 0)
+            jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size));
+        else
+            jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 2 + sizeof(void*)));
+        uv_cond_signal(&ptls2->wake_signal);
+#ifdef _OS_DARWIN_
+        jl_safepoint_resume_thread_mach(ptls2, tid);
+#endif
+        uv_cond_broadcast(&safepoint_cond_begin);
+    }
+    if (suspend_count != 0) {
+        jl_atomic_store_relaxed(&ptls2->suspend_count, suspend_count - 1);
+        if (suspend_count == 1)
+            jl_safepoint_disable(3);
+    }
+    uv_mutex_unlock(&ptls2->sleep_lock);
+    uv_mutex_unlock(&safepoint_lock);
+    return suspend_count;
 }
 
 void jl_safepoint_enable_sigint(void)
diff --git a/src/scheduler.c b/src/scheduler.c
new file mode 100644
index 0000000000000..b13e4072c7d73
--- /dev/null
+++ b/src/scheduler.c
@@ -0,0 +1,577 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "julia.h"
+#include "julia_internal.h"
+#include "threading.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// thread sleep state
+
+// default to DEFAULT_THREAD_SLEEP_THRESHOLD; set via $JULIA_THREAD_SLEEP_THRESHOLD
+uint64_t sleep_threshold;
+
+// thread should not be sleeping--it might need to do work.
+static const int16_t not_sleeping = 0;
+
+// it is acceptable for the thread to be sleeping.
+static const int16_t sleeping = 1;
+
+// this thread is dead.
+static const int16_t sleeping_like_the_dead JL_UNUSED = 2;
+
+// a running count of how many threads are currently not_sleeping
+// plus a running count of the number of in-flight wake-ups
+// n.b. this may temporarily exceed jl_n_threads
+_Atomic(int) n_threads_running = 0;
+
+// invariant: No thread is ever asleep unless sleep_check_state is sleeping (or we have a wakeup signal pending).
+// invariant: Any particular thread is not asleep unless that thread's sleep_check_state is sleeping.
+// invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
+// information: Observing thread not-sleeping is sufficient to ensure the target thread will subsequently inspect its local queue.
+// information: Observing thread is-sleeping says it may be necessary to notify it at least once to wakeup. It may already be awake however for a variety of reasons.
+// information: These observations require sequentially-consistent fences to be inserted between each of those operational phases.
+// [^store_buffering_1]: These fences are used to avoid the cycle 2b -> 1a -> 1b -> 2a -> 2b where
+// * Dequeuer:
+//   * 1: `jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping)`
+// * Enqueuer:
+//   * 2: `jl_atomic_load_relaxed(&ptls->sleep_check_state)` in `jl_wakeup_thread` returns `not_sleeping`
+// i.e., the dequeuer misses the enqueue and enqueuer misses the sleep state transition.
+// [^store_buffering_2]: and also
+// * Enqueuer:
+//   * 1a: `jl_atomic_store_relaxed(jl_uv_n_waiters, 1)` in `JL_UV_LOCK`
+//   * 1b: "cheap read" of `handle->pending` in `uv_async_send` (via `JL_UV_LOCK`) loads `0`
+// * Dequeuer:
+//   * 2a: store `2` to `handle->pending` in `uv_async_send` (via `JL_UV_LOCK` in `jl_task_get_next`)
+//   * 2b: `jl_atomic_load_relaxed(jl_uv_n_waiters)` in `jl_task_get_next` returns `0`
+// i.e., the dequeuer misses the `n_waiters` is set and enqueuer misses the `uv_stop` flag (in `signal_async`) transition to cleared
+
+JULIA_DEBUG_SLEEPWAKE(
+uint64_t wakeup_enter;
+uint64_t wakeup_leave;
+uint64_t io_wakeup_enter;
+uint64_t io_wakeup_leave;
+);
+
+JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT
+{
+    // Try to acquire the lock on this task.
+    int16_t was = jl_atomic_load_relaxed(&task->tid);
+    if (was == tid)
+        return 1;
+    if (was == -1)
+        return jl_atomic_cmpswap(&task->tid, &was, tid) || was == tid;
+    return 0;
+}
+
+JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT
+{
+    if (tpid < -1 || tpid >= jl_n_threadpools)
+        return 0;
+    task->threadpoolid = tpid;
+    return 1;
+}
+
+// initialize the threading infrastructure
+// (called only by the main thread)
+void jl_init_threadinginfra(void)
+{
+    /* initialize the synchronization trees pool */
+    sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD;
+    char *cp = getenv(THREAD_SLEEP_THRESHOLD_NAME);
+    if (cp) {
+        if (!strncasecmp(cp, "infinite", 8))
+            sleep_threshold = UINT64_MAX;
+        else
+            sleep_threshold = (uint64_t)strtol(cp, NULL, 10);
+    }
+}
+
+
+void JL_NORETURN jl_finish_task(jl_task_t *ct);
+
+// thread function: used by all mutator threads except the main thread
+void jl_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid, create heap, set up root task)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_STATE_SAFE, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    (void)jl_gc_unsafe_enter(ptls);
+    jl_finish_task(ct); // noreturn
+}
+
+
+
+void jl_init_thread_scheduler(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    uv_mutex_init(&ptls->sleep_lock);
+    uv_cond_init(&ptls->wake_signal);
+    // record that there is now another thread that may be used to schedule work
+    // we will decrement this again in scheduler_delete_thread, only slightly
+    // in advance of pthread_join (which hopefully itself also had been
+    // adopted by now and is included in n_threads_running too)
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+    // n.b. this is the only point in the code where we ignore the invariants on the ordering of n_threads_running
+    // since we are being initialized from foreign code, we could not necessarily have expected or predicted that to happen
+}
+
+int jl_running_under_rr(int recheck)
+{
+#ifdef _OS_LINUX_
+#define RR_CALL_BASE 1000
+#define SYS_rrcall_check_presence (RR_CALL_BASE + 8)
+    static _Atomic(int) is_running_under_rr = 0;
+    int rr = jl_atomic_load_relaxed(&is_running_under_rr);
+    if (rr == 0 || recheck) {
+        int ret = syscall(SYS_rrcall_check_presence, 0, 0, 0, 0, 0, 0);
+        if (ret == -1)
+            // Should always be ENOSYS, but who knows what people do for
+            // unknown syscalls with their seccomp filters, so just say
+            // that we don't have rr.
+            rr = 2;
+        else
+            rr = 1;
+        jl_atomic_store_relaxed(&is_running_under_rr, rr);
+    }
+    return rr == 1;
+#else
+    return 0;
+#endif
+}
+
+
+//  sleep_check_after_threshold() -- if sleep_threshold ns have passed, return 1
+static int sleep_check_after_threshold(uint64_t *start_cycles) JL_NOTSAFEPOINT
+{
+    JULIA_DEBUG_SLEEPWAKE( return 1 ); // hammer on the sleep/wake logic much harder
+    /**
+     * This wait loop is a bit of a worst case for rr - it needs timer access,
+     * which are slow and it busy loops in user space, which prevents the
+     * scheduling logic from switching to other threads. Just don't bother
+     * trying to wait here
+     */
+    if (jl_running_under_rr(0))
+        return 1;
+    if (!(*start_cycles)) {
+        *start_cycles = jl_hrtime();
+        return 0;
+    }
+    uint64_t elapsed_cycles = jl_hrtime() - (*start_cycles);
+    if (elapsed_cycles >= sleep_threshold) {
+        *start_cycles = 0;
+        return 1;
+    }
+    return 0;
+}
+
+void surprise_wakeup(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    // equivalent to wake_thread, without the assert on wasrunning
+    int8_t state = jl_atomic_load_relaxed(&ptls->sleep_check_state);
+    if (state == sleeping) {
+        if (jl_atomic_cmpswap_relaxed(&ptls->sleep_check_state, &state, not_sleeping)) {
+            // this notification will never be consumed, so we may have now
+            // introduced some inaccuracy into the count, but that is
+            // unavoidable with any asynchronous interruption
+            jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+        }
+    }
+}
+
+
+static int set_not_sleeping(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+        if (jl_atomic_exchange_relaxed(&ptls->sleep_check_state, not_sleeping) != not_sleeping) {
+            return 1;
+        }
+    }
+    int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, -1); // consume in-flight wakeup
+    assert(wasrunning > 1); (void)wasrunning;
+    return 0;
+}
+
+static int wake_thread(int16_t tid) JL_NOTSAFEPOINT
+{
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+
+    if (jl_atomic_load_relaxed(&ptls2->sleep_check_state) != not_sleeping) {
+        int8_t state = sleeping;
+        if (jl_atomic_cmpswap_relaxed(&ptls2->sleep_check_state, &state, not_sleeping)) {
+            int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1); // increment in-flight wakeup count
+            assert(wasrunning); (void)wasrunning;
+            JL_PROBE_RT_SLEEP_CHECK_WAKE(ptls2, state);
+            uv_mutex_lock(&ptls2->sleep_lock);
+            uv_cond_signal(&ptls2->wake_signal);
+            uv_mutex_unlock(&ptls2->sleep_lock);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+
+static void wake_libuv(void) JL_NOTSAFEPOINT
+{
+    JULIA_DEBUG_SLEEPWAKE( io_wakeup_enter = cycleclock() );
+    jl_wake_libuv();
+    JULIA_DEBUG_SLEEPWAKE( io_wakeup_leave = cycleclock() );
+}
+
+void wakeup_thread(jl_task_t *ct, int16_t tid) JL_NOTSAFEPOINT { // Pass in ptls when we have it already available to save a lookup
+    int16_t self = jl_atomic_load_relaxed(&ct->tid);
+    if (tid != self)
+        jl_fence(); // [^store_buffering_1]
+    jl_task_t *uvlock = jl_atomic_load_relaxed(&jl_uv_mutex.owner);
+    JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
+    if (tid == self || tid == -1) {
+        // we're already awake, but make sure we'll exit uv_run
+        // and that n_threads_running is updated if this is now considered in-flight
+        jl_ptls_t ptls = ct->ptls;
+        if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+            if (jl_atomic_exchange_relaxed(&ptls->sleep_check_state, not_sleeping) != not_sleeping) {
+                int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+                assert(wasrunning); (void)wasrunning;
+                JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls);
+            }
+        }
+        if (uvlock == ct)
+            uv_stop(jl_global_event_loop());
+    }
+    else {
+        // something added to the sticky-queue: notify that thread
+        if (wake_thread(tid) && uvlock != ct) {
+            // check if we need to notify uv_run too
+            jl_fence();
+            jl_ptls_t other = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+            jl_task_t *tid_task = jl_atomic_load_relaxed(&other->current_task);
+            // now that we have changed the thread to not-sleeping, ensure that
+            // either it has not yet acquired the libuv lock, or that it will
+            // observe the change of state to not_sleeping
+            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == tid_task)
+                wake_libuv();
+        }
+    }
+    // check if the other threads might be sleeping
+    if (tid == -1) {
+        // something added to the multi-queue: notify all threads
+        // in the future, we might want to instead wake some fraction of threads,
+        // and let each of those wake additional threads if they find work
+        int anysleep = 0;
+        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+        for (tid = 0; tid < nthreads; tid++) {
+            if (tid != self)
+                anysleep |= wake_thread(tid);
+        }
+        // check if we need to notify uv_run too
+        if (uvlock != ct && anysleep) {
+            jl_fence();
+            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) != NULL)
+                wake_libuv();
+        }
+    }
+    JULIA_DEBUG_SLEEPWAKE( wakeup_leave = cycleclock() );
+}
+
+/* ensure thread tid is awake if necessary */
+JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
+{
+    jl_task_t *ct = jl_current_task;
+    wakeup_thread(ct, tid);
+}
+
+// get the next runnable task
+static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
+{
+    jl_gc_safepoint();
+    jl_task_t *task = (jl_task_t*)jl_apply_generic(trypoptask, &q, 1);
+    if (jl_is_task(task)) {
+        int self = jl_atomic_load_relaxed(&jl_current_task->tid);
+        jl_set_task_tid(task, self);
+        return task;
+    }
+    return NULL;
+}
+
+static int check_empty(jl_value_t *checkempty)
+{
+    return jl_apply_generic(checkempty, NULL, 0) == jl_true;
+}
+
+jl_task_t *wait_empty JL_GLOBALLY_ROOTED;
+void jl_wait_empty_begin(void);
+void jl_wait_empty_end(void);
+
+void jl_task_wait_empty(void)
+{
+    jl_task_t *ct = jl_current_task;
+    if (jl_atomic_load_relaxed(&ct->tid) == 0 && jl_base_module) {
+        jl_wait_empty_begin();
+        size_t lastage = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        jl_value_t *f = jl_get_global_value(jl_base_module, jl_symbol("wait"), ct->world_age);
+        wait_empty = ct;
+        if (f) {
+            JL_GC_PUSH1(&f);
+            jl_apply_generic(f, NULL, 0);
+            JL_GC_POP();
+        }
+        // we are back from jl_task_get_next now
+        ct->world_age = lastage;
+        wait_empty = NULL;
+        // TODO: move this lock acquire to before the wait_empty return and the
+        // unlock to the caller, so that we ensure new work (from uv_unref
+        // objects) didn't unexpectedly get scheduled and start running behind
+        // our back during the function return
+        JL_UV_LOCK();
+        jl_wait_empty_end();
+        JL_UV_UNLOCK();
+    }
+}
+
+static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    // sleep_check_state is only transitioned from not_sleeping to sleeping
+    // by the thread itself. As a result, if this returns false, it will
+    // continue returning false. If it returns true, we know the total
+    // modification order of the fences.
+    jl_fence(); // [^store_buffering_1] [^store_buffering_2]
+    return jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping;
+}
+
+
+JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, jl_value_t *checkempty)
+{
+    jl_task_t *ct = jl_current_task;
+    uint64_t start_cycles = 0;
+
+    while (1) {
+        jl_task_t *task = get_next_task(trypoptask, q);
+        if (task)
+            return task;
+
+        // quick, race-y check to see if there seems to be any stuff in there
+        jl_cpu_pause();
+        if (!check_empty(checkempty)) {
+            start_cycles = 0;
+            continue;
+        }
+
+        jl_cpu_pause();
+        jl_ptls_t ptls = ct->ptls;
+        if (sleep_check_after_threshold(&start_cycles) || (ptls->tid == jl_atomic_load_relaxed(&io_loop_tid) && (!jl_atomic_load_relaxed(&_threadedregion) || wait_empty))) {
+            // acquire sleep-check lock
+            assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
+            jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping);
+            jl_fence(); // [^store_buffering_1]
+            JL_PROBE_RT_SLEEP_CHECK_SLEEP(ptls);
+            if (!check_empty(checkempty)) { // uses relaxed loads
+                if (set_not_sleeping(ptls)) {
+                    JL_PROBE_RT_SLEEP_CHECK_TASKQ_WAKE(ptls);
+                }
+                continue;
+            }
+            volatile int isrunning = 1;
+            JL_TRY {
+                task = get_next_task(trypoptask, q); // note: this should not yield
+                if (ptls != ct->ptls) {
+                    // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
+                    ptls = ct->ptls;
+                    if (set_not_sleeping(ptls)) {
+                        JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                    }
+                    continue; // jump to JL_CATCH
+                }
+                if (task) {
+                    if (set_not_sleeping(ptls)) {
+                        JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                    }
+                    continue; // jump to JL_CATCH
+                }
+
+                // IO is always permitted, but outside a threaded region, only
+                // thread 0 will process messages.
+                // Inside a threaded region, any thread can listen for IO messages,
+                // and one thread should win this race and watch the event loop,
+                // but we bias away from idle threads getting parked here.
+                //
+                // The reason this works is somewhat convoluted, and closely tied to [^store_buffering_1]:
+                //  - After decrementing _threadedregion, the thread is required to
+                //    call jl_wakeup_thread(0), that will kick out any thread who is
+                //    already there, and then eventually thread 0 will get here.
+                //  - Inside a _threadedregion, there must exist at least one
+                //    thread that has a happens-before relationship on the libuv lock
+                //    before reaching this decision point in the code who will see
+                //    the lock as unlocked and thus must win this race here.
+                int uvlock = 0;
+                if (jl_atomic_load_relaxed(&_threadedregion)) {
+                    uvlock = jl_mutex_trylock(&jl_uv_mutex);
+                }
+                else if (ptls->tid == jl_atomic_load_relaxed(&io_loop_tid)) {
+                    uvlock = 1;
+                    JL_UV_LOCK();
+                }
+                else {
+                    // Since we might have started some IO work, we might need
+                    // to ensure tid = 0 will go watch that new event source.
+                    // If trylock would have succeeded, that may have been our
+                    // responsibility, so need to make sure thread 0 will take care
+                    // of us.
+                    if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == NULL) // aka trylock
+                        jl_wakeup_thread(jl_atomic_load_relaxed(&io_loop_tid));
+
+                }
+                if (uvlock) {
+                    int enter_eventloop = may_sleep(ptls);
+                    int active = 0;
+                    if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0)
+                        // if we won the race against someone who actually needs
+                        // the lock to do real work, we need to let them have it instead
+                        enter_eventloop = 0;
+                    if (enter_eventloop) {
+                        uv_loop_t *loop = jl_global_event_loop();
+                        loop->stop_flag = 0;
+                        JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() );
+                        active = uv_run(loop, UV_RUN_ONCE);
+                        JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_leave = cycleclock() );
+                        jl_gc_safepoint();
+                    }
+                    JL_UV_UNLOCK();
+                    // optimization: check again first if we may have work to do.
+                    // Otherwise we got a spurious wakeup since some other thread
+                    // that just wanted to steal libuv from us. We will just go
+                    // right back to sleep on the individual wake signal to let
+                    // them take it from us without conflict.
+                    if (active || !may_sleep(ptls)) {
+                        if (set_not_sleeping(ptls)) {
+                            JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+                        }
+                        start_cycles = 0;
+                        continue; // jump to JL_CATCH
+                    }
+                    if (!enter_eventloop && !jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == jl_atomic_load_relaxed(&io_loop_tid)) {
+                        // thread 0 is the only thread permitted to run the event loop
+                        // so it needs to stay alive, just spin-looping if necessary
+                        if (set_not_sleeping(ptls)) {
+                            JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+                        }
+                        start_cycles = 0;
+                        continue; // jump to JL_CATCH
+                    }
+                }
+
+                // any thread which wants us running again will have to observe
+                // sleep_check_state==sleeping and increment n_threads_running for us
+                int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+                assert(wasrunning);
+                isrunning = 0;
+                if (wasrunning == 1) {
+                    // This was the last running thread, and there is no thread with !may_sleep
+                    // so make sure io_loop_tid is notified to check wait_empty
+                    // TODO: this also might be a good time to check again that
+                    // libuv's queue is truly empty, instead of during delete_thread
+                    int16_t tid2 = 0;
+                    if (ptls->tid != tid2) {
+                        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid2];
+                        uv_mutex_lock(&ptls2->sleep_lock);
+                        uv_cond_signal(&ptls2->wake_signal);
+                        uv_mutex_unlock(&ptls2->sleep_lock);
+                    }
+                }
+
+                // the other threads will just wait for an individual wake signal to resume
+                JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
+                int8_t gc_state = jl_safepoint_take_sleep_lock(ptls); // This puts the thread in GC_SAFE and takes the sleep lock
+                while (may_sleep(ptls)) {
+                    if (ptls->tid == 0) {
+                        task = wait_empty;
+                        if (task && jl_atomic_load_relaxed(&n_threads_running) == 0) {
+                            wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+                            assert(!wasrunning);
+                            wasrunning = !set_not_sleeping(ptls);
+                            assert(!wasrunning);
+                            JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                            if (!ptls->finalizers_inhibited)
+                                ptls->finalizers_inhibited++; // this annoyingly is rather sticky (we should like to reset it at the end of jl_task_wait_empty)
+                            break;
+                        }
+                        task = NULL;
+                    }
+                    // else should we warn the user of certain deadlock here if tid == 0 && n_threads_running == 0?
+                    uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
+                }
+                assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
+                assert(jl_atomic_load_relaxed(&n_threads_running));
+                start_cycles = 0;
+                uv_mutex_unlock(&ptls->sleep_lock);
+                JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
+                jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
+                if (task) {
+                    assert(task == wait_empty);
+                    wait_empty = NULL;
+                    continue;
+                }
+            }
+            JL_CATCH {
+                // probably SIGINT, but possibly a user mistake in trypoptask
+                if (!isrunning)
+                    jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+                set_not_sleeping(ptls);
+                jl_rethrow();
+            }
+            if (task)
+                return task;
+        }
+        else {
+            // maybe check the kernel for new messages too
+            jl_process_events();
+        }
+    }
+}
+
+void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    int notsleeping = jl_atomic_exchange_relaxed(&ptls->sleep_check_state, sleeping_like_the_dead) == not_sleeping;
+    jl_fence();
+    if (notsleeping) {
+        if (jl_atomic_load_relaxed(&n_threads_running) == 1) {
+            jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[jl_atomic_load_relaxed(&io_loop_tid)];
+            // This was the last running thread, and there is no thread with !may_sleep
+            // so make sure tid 0 is notified to check wait_empty
+            uv_mutex_lock(&ptls2->sleep_lock);
+            uv_cond_signal(&ptls2->wake_signal);
+            uv_mutex_unlock(&ptls2->sleep_lock);
+        }
+    }
+    else {
+        jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+    }
+    wakeup_thread(jl_atomic_load_relaxed(&ptls->current_task), 0); // force thread 0 to see that we do not have the IO lock (and am dead)
+    jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/serialize.h b/src/serialize.h
index afcdcc31d66c4..549c1588073ff 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -7,66 +7,6 @@
 extern "C" {
 #endif
 
-#define TAG_SYMBOL              2
-#define TAG_SSAVALUE            3
-#define TAG_DATATYPE            4
-#define TAG_SLOTNUMBER          5
-#define TAG_SVEC                6
-#define TAG_ARRAY               7
-#define TAG_NULL                8
-#define TAG_EXPR                9
-#define TAG_PHINODE            10
-#define TAG_PHICNODE           11
-#define TAG_LONG_SYMBOL        12
-#define TAG_LONG_SVEC          13
-#define TAG_LONG_EXPR          14
-#define TAG_LONG_PHINODE       15
-#define TAG_LONG_PHICNODE      16
-#define TAG_METHODROOT         17
-#define TAG_STRING             18
-#define TAG_SHORT_INT64        19
-#define TAG_SHORT_GENERAL      20
-#define TAG_CNULL              21
-#define TAG_ARRAY1D            22
-#define TAG_SINGLETON          23
-#define TAG_MODULE             24
-#define TAG_TVAR               25
-#define TAG_METHOD_INSTANCE    26
-#define TAG_METHOD             27
-#define TAG_CODE_INSTANCE      28
-#define TAG_COMMONSYM          29
-#define TAG_NEARBYGLOBAL       30
-#define TAG_GLOBALREF          31
-#define TAG_CORE               32
-#define TAG_BASE               33
-#define TAG_BITYPENAME         34
-#define TAG_NEARBYMODULE       35
-#define TAG_INT32              36
-#define TAG_INT64              37
-#define TAG_UINT8              38
-#define TAG_VECTORTY           39
-#define TAG_PTRTY              40
-#define TAG_LONG_SSAVALUE      41
-#define TAG_LONG_METHODROOT    42
-#define TAG_SHORTER_INT64      43
-#define TAG_SHORT_INT32        44
-#define TAG_CALL1              45
-#define TAG_CALL2              46
-#define TAG_LINEINFO           47
-#define TAG_SHORT_BACKREF      48
-#define TAG_BACKREF            49
-#define TAG_UNIONALL           50
-#define TAG_GOTONODE           51
-#define TAG_QUOTENODE          52
-#define TAG_GENERAL            53
-#define TAG_GOTOIFNOT          54
-#define TAG_RETURNNODE         55
-#define TAG_ARGUMENT           56
-#define TAG_RELOC_METHODROOT   57
-#define TAG_BINDING            58
-
-#define LAST_TAG 58
-
 #define write_uint8(s, n) ios_putc((n), (s))
 #define read_uint8(s) ((uint8_t)ios_getc((s)))
 #define write_int8(s, n) write_uint8((s), (n))
@@ -134,12 +74,6 @@ static inline uint32_t read_uint32(ios_t *s) JL_NOTSAFEPOINT
 #define read_uint(s) read_uint32(s)
 #endif
 
-
-void *jl_lookup_ser_tag(jl_value_t *v);
-void *jl_lookup_common_symbol(jl_value_t *v);
-jl_value_t *jl_deser_tag(uint8_t tag);
-jl_value_t *jl_deser_symbol(uint8_t tag);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/signal-handling.c b/src/signal-handling.c
index e241fd22ecb18..1da687654dd81 100644
--- a/src/signal-handling.c
+++ b/src/signal-handling.c
@@ -18,46 +18,48 @@ extern "C" {
 #include <threading.h>
 
 // Profiler control variables
-// Note: these "static" variables are also used in "signals-*.c"
-static volatile jl_bt_element_t *bt_data_prof = NULL;
-static volatile size_t bt_size_max = 0;
-static volatile size_t bt_size_cur = 0;
+uv_mutex_t live_tasks_lock;
+uv_mutex_t bt_data_prof_lock;
+volatile jl_bt_element_t *profile_bt_data_prof = NULL;
+volatile size_t profile_bt_size_max = 0;
+volatile size_t profile_bt_size_cur = 0;
 static volatile uint64_t nsecprof = 0;
-static volatile int running = 0;
-static const    uint64_t GIGA = 1000000000ULL;
+volatile int profile_running = 0;
+volatile int profile_all_tasks = 0;
+static const uint64_t GIGA = 1000000000ULL;
 // Timers to take samples at intervals
 JL_DLLEXPORT void jl_profile_stop_timer(void);
-JL_DLLEXPORT int jl_profile_start_timer(void);
+JL_DLLEXPORT int jl_profile_start_timer(uint8_t);
 
 ///////////////////////
 // Utility functions //
 ///////////////////////
 JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
 {
-    bt_size_max = maxsize;
+    profile_bt_size_max = maxsize;
     nsecprof = delay_nsec;
-    if (bt_data_prof != NULL)
-        free((void*)bt_data_prof);
-    bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
-    if (bt_data_prof == NULL && maxsize > 0)
+    if (profile_bt_data_prof != NULL)
+        free((void*)profile_bt_data_prof);
+    profile_bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
+    if (profile_bt_data_prof == NULL && maxsize > 0)
         return -1;
-    bt_size_cur = 0;
+    profile_bt_size_cur = 0;
     return 0;
 }
 
 JL_DLLEXPORT uint8_t *jl_profile_get_data(void)
 {
-    return (uint8_t*) bt_data_prof;
+    return (uint8_t*) profile_bt_data_prof;
 }
 
 JL_DLLEXPORT size_t jl_profile_len_data(void)
 {
-    return bt_size_cur;
+    return profile_bt_size_cur;
 }
 
 JL_DLLEXPORT size_t jl_profile_maxlen_data(void)
 {
-    return bt_size_max;
+    return profile_bt_size_max;
 }
 
 JL_DLLEXPORT uint64_t jl_profile_delay_nsec(void)
@@ -67,12 +69,12 @@ JL_DLLEXPORT uint64_t jl_profile_delay_nsec(void)
 
 JL_DLLEXPORT void jl_profile_clear_data(void)
 {
-    bt_size_cur = 0;
+    profile_bt_size_cur = 0;
 }
 
 JL_DLLEXPORT int jl_profile_is_running(void)
 {
-    return running;
+    return profile_running;
 }
 
 // Any function that acquires this lock must be either a unmanaged thread
@@ -100,7 +102,7 @@ void jl_init_profile_lock(void)
 #endif
 }
 
-uintptr_t jl_lock_profile_rd_held(void)
+static uintptr_t jl_lock_profile_rd_held(void) JL_NOTSAFEPOINT
 {
 #ifndef _OS_WINDOWS_
     return (uintptr_t)pthread_getspecific(debuginfo_asyncsafe_held);
@@ -109,43 +111,73 @@ uintptr_t jl_lock_profile_rd_held(void)
 #endif
 }
 
-void jl_lock_profile(void)
+int jl_lock_profile(void)
 {
     uintptr_t held = jl_lock_profile_rd_held();
-    if (held++ == 0)
+    if (held == -1)
+        return 0;
+    if (held == 0) {
+        held = -1;
+#ifndef _OS_WINDOWS_
+        pthread_setspecific(debuginfo_asyncsafe_held, (void*)held);
+#else
+        TlsSetValue(debuginfo_asyncsafe_held, (void*)held);
+#endif
         uv_rwlock_rdlock(&debuginfo_asyncsafe);
+        held = 0;
+    }
+    held++;
 #ifndef _OS_WINDOWS_
     pthread_setspecific(debuginfo_asyncsafe_held, (void*)held);
 #else
     TlsSetValue(debuginfo_asyncsafe_held, (void*)held);
 #endif
+    return 1;
 }
 
 JL_DLLEXPORT void jl_unlock_profile(void)
 {
     uintptr_t held = jl_lock_profile_rd_held();
-    assert(held);
-    if (--held == 0)
-        uv_rwlock_rdunlock(&debuginfo_asyncsafe);
+    assert(held && held != -1);
+    held--;
 #ifndef _OS_WINDOWS_
     pthread_setspecific(debuginfo_asyncsafe_held, (void*)held);
 #else
     TlsSetValue(debuginfo_asyncsafe_held, (void*)held);
 #endif
+    if (held == 0)
+        uv_rwlock_rdunlock(&debuginfo_asyncsafe);
 }
 
-void jl_lock_profile_wr(void)
+int jl_lock_profile_wr(void)
 {
+    uintptr_t held = jl_lock_profile_rd_held();
+    if (held)
+        return 0;
+    held = -1;
+#ifndef _OS_WINDOWS_
+    pthread_setspecific(debuginfo_asyncsafe_held, (void*)held);
+#else
+    TlsSetValue(debuginfo_asyncsafe_held, (void*)held);
+#endif
     uv_rwlock_wrlock(&debuginfo_asyncsafe);
+    return 1;
 }
 
 void jl_unlock_profile_wr(void)
 {
+    uintptr_t held = jl_lock_profile_rd_held();
+    assert(held == -1);
+    held = 0;
+#ifndef _OS_WINDOWS_
+    pthread_setspecific(debuginfo_asyncsafe_held, (void*)held);
+#else
+    TlsSetValue(debuginfo_asyncsafe_held, (void*)held);
+#endif
     uv_rwlock_wrunlock(&debuginfo_asyncsafe);
 }
 
 
-#ifndef _OS_WINDOWS_
 static uint64_t profile_cong_rng_seed = 0;
 static int *profile_round_robin_thread_order = NULL;
 static int profile_round_robin_thread_order_size = 0;
@@ -155,8 +187,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed)
     // The "modern Fisher–Yates shuffle" - O(n) algorithm
     // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
     for (int i = size; i-- > 1; ) {
-        uint64_t unbias = UINT64_MAX; // slightly biased, but i is very small
-        size_t j = cong(i, unbias, seed);
+        size_t j = cong(i + 1, seed); // cong is an open interval so we add 1
         uint64_t tmp = carray[j];
         carray[j] = carray[i];
         carray[i] = tmp;
@@ -177,7 +208,6 @@ static int *profile_get_randperm(int size)
     jl_shuffle_int_array_inplace(profile_round_robin_thread_order, size, &profile_cong_rng_seed);
     return profile_round_robin_thread_order;
 }
-#endif
 
 
 JL_DLLEXPORT int jl_profile_is_buffer_full(void)
@@ -185,7 +215,104 @@ JL_DLLEXPORT int jl_profile_is_buffer_full(void)
     // Declare buffer full if there isn't enough room to sample even just the
     // thread metadata and one max-sized frame. The `+ 6` is for the two block
     // terminator `0`'s plus the 4 metadata entries.
-    return bt_size_cur + ((JL_BT_MAX_ENTRY_SIZE + 1) + 6) > bt_size_max;
+    return profile_bt_size_cur + ((JL_BT_MAX_ENTRY_SIZE + 1) + 6) > profile_bt_size_max;
+}
+
+NOINLINE int failed_to_sample_task_fun(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT;
+NOINLINE int failed_to_stop_thread_fun(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT;
+
+#define PROFILE_TASK_DEBUG_FORCE_SAMPLING_FAILURE (0)
+#define PROFILE_TASK_DEBUG_FORCE_STOP_THREAD_FAILURE (0)
+
+void jl_profile_task(void)
+{
+    if (jl_profile_is_buffer_full()) {
+        // Buffer full: Delete the timer
+        jl_profile_stop_timer();
+        return;
+    }
+
+    jl_task_t *t = NULL;
+    int got_mutex = 0;
+    if (uv_mutex_trylock(&live_tasks_lock) != 0) {
+        goto collect_backtrace;
+    }
+    got_mutex = 1;
+
+    {
+        arraylist_t *tasks = jl_get_all_tasks_arraylist();
+        uint64_t seed = jl_rand();
+        const int n_max_random_attempts = 4;
+        // randomly select a task that is not done
+        for (int i = 0; i < n_max_random_attempts; i++) {
+            t = (jl_task_t*)tasks->items[cong(tasks->len, &seed)];
+            assert(t == NULL || jl_is_task(t));
+            if (t == NULL) {
+                continue;
+            }
+            int t_state = jl_atomic_load_relaxed(&t->_state);
+            if (t_state == JL_TASK_STATE_DONE) {
+                continue;
+            }
+            break;
+        }
+        arraylist_free(tasks);
+        free(tasks);
+    }
+
+collect_backtrace:
+
+    uv_mutex_lock(&bt_data_prof_lock);
+    if (profile_running == 0) {
+        uv_mutex_unlock(&bt_data_prof_lock);
+        if (got_mutex) {
+            uv_mutex_unlock(&live_tasks_lock);
+        }
+        return;
+    }
+
+    jl_record_backtrace_result_t r = {0, INT16_MAX};
+    jl_bt_element_t *bt_data_prof = (jl_bt_element_t*)(profile_bt_data_prof + profile_bt_size_cur);
+    size_t bt_size_max = profile_bt_size_max - profile_bt_size_cur - 1;
+    if (t == NULL || PROFILE_TASK_DEBUG_FORCE_SAMPLING_FAILURE) {
+        // failed to find a task
+        r.bt_size = failed_to_sample_task_fun(bt_data_prof, bt_size_max, 0);
+    }
+    else {
+        if (!PROFILE_TASK_DEBUG_FORCE_STOP_THREAD_FAILURE) {
+            r = jl_record_backtrace(t, bt_data_prof, bt_size_max, 1);
+        }
+        // we failed to get a backtrace
+        if (r.bt_size == 0) {
+            r.bt_size = failed_to_stop_thread_fun(bt_data_prof, bt_size_max, 0);
+        }
+    }
+
+    // update the profile buffer size
+    profile_bt_size_cur += r.bt_size;
+
+    // store threadid but add 1 as 0 is preserved to indicate end of block
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = (uintptr_t)r.tid + 1;
+
+    // store task id (never null)
+    profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)t;
+
+    // store cpu cycle clock
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();
+
+    // the thread profiler uses this block to record whether the thread is not sleeping (1) or sleeping (2)
+    // let's use a dummy value which is not 1 or 2 to
+    // indicate that we are profiling a task, and therefore, this block is not about the thread state
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = 3;
+
+    // Mark the end of this block with two 0's
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+
+    uv_mutex_unlock(&bt_data_prof_lock);
+    if (got_mutex) {
+        uv_mutex_unlock(&live_tasks_lock);
+    }
 }
 
 static uint64_t jl_last_sigint_trigger = 0;
@@ -254,10 +381,11 @@ JL_DLLEXPORT void jl_exit_on_sigint(int on)
 }
 
 static uintptr_t jl_get_pc_from_ctx(const void *_ctx);
-void jl_show_sigill(void *_ctx);
+void jl_fprint_sigill(ios_t *s, void *_ctx);
 #if defined(_CPU_X86_64_) || defined(_CPU_X86_) \
     || (defined(_OS_LINUX_) && defined(_CPU_AARCH64_)) \
-    || (defined(_OS_LINUX_) && defined(_CPU_ARM_))
+    || (defined(_OS_LINUX_) && defined(_CPU_ARM_)) \
+    || (defined(_OS_LINUX_) && defined(_CPU_RISCV64_))
 static size_t jl_safe_read_mem(const volatile char *ptr, char *out, size_t len)
 {
     jl_jmp_buf *old_buf = jl_get_safe_restore();
@@ -286,24 +414,38 @@ void jl_set_profile_peek_duration(double t)
     profile_peek_duration = t;
 }
 
-uintptr_t profile_show_peek_cond_loc;
-JL_DLLEXPORT void jl_set_peek_cond(uintptr_t cond)
+jl_mutex_t profile_show_peek_cond_lock;
+static uv_async_t *profile_show_peek_cond_loc;
+JL_DLLEXPORT void jl_set_peek_cond(uv_async_t *cond)
 {
+    JL_LOCK_NOGC(&profile_show_peek_cond_lock);
     profile_show_peek_cond_loc = cond;
+    JL_UNLOCK_NOGC(&profile_show_peek_cond_lock);
 }
 
 static void jl_check_profile_autostop(void)
 {
-    if ((profile_autostop_time != -1.0) && (jl_hrtime() > profile_autostop_time)) {
+    if (profile_show_peek_cond_loc != NULL && profile_autostop_time != -1.0 && jl_hrtime() > profile_autostop_time) {
         profile_autostop_time = -1.0;
         jl_profile_stop_timer();
+        // Disable trace compilation when profile collection ends
+        jl_force_trace_compile_timing_disable();
         jl_safe_printf("\n==============================================================\n");
-        jl_safe_printf("Profile collected. A report will print at the next yield point\n");
+        jl_safe_printf("Profile collected. A report will print at the next yield point.\n");
+        jl_safe_printf("Disabling --trace-compile\n");
         jl_safe_printf("==============================================================\n\n");
-        uv_async_send((uv_async_t*)profile_show_peek_cond_loc);
+        JL_LOCK_NOGC(&profile_show_peek_cond_lock);
+        if (profile_show_peek_cond_loc != NULL)
+            uv_async_send(profile_show_peek_cond_loc);
+        JL_UNLOCK_NOGC(&profile_show_peek_cond_lock);
     }
 }
 
+static void stack_overflow_warning(void)
+{
+    jl_safe_printf("Warning: detected a stack overflow; program state may be corrupted, so further execution might be unreliable.\n");
+}
+
 #if defined(_WIN32)
 #include "signals-win.c"
 #else
@@ -330,15 +472,19 @@ static uintptr_t jl_get_pc_from_ctx(const void *_ctx)
     return ((CONTEXT*)_ctx)->Rip;
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
     return ((ucontext_t*)_ctx)->uc_mcontext.pc;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    return ((ucontext_t*)_ctx)->uc_mcontext.mc_gpregs.gp_elr;
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     return ((ucontext_t*)_ctx)->uc_mcontext.arm_pc;
+#elif defined(_OS_LINUX_) && defined(_CPU_RISCV64_)
+    return ((ucontext_t*)_ctx)->uc_mcontext.__gregs[REG_PC];
 #else
     // TODO for PPC
     return 0;
 #endif
 }
 
-void jl_show_sigill(void *_ctx)
+void jl_fprint_sigill(ios_t *s, void *_ctx)
 {
     char *pc = (char*)jl_get_pc_from_ctx(_ctx);
     // unsupported platform
@@ -349,31 +495,31 @@ void jl_show_sigill(void *_ctx)
     size_t len = jl_safe_read_mem(pc, (char*)inst, sizeof(inst));
     // ud2
     if (len >= 2 && inst[0] == 0x0f && inst[1] == 0x0b) {
-        jl_safe_printf("Unreachable reached at %p\n", (void*)pc);
+        jl_safe_fprintf(s, "Unreachable reached at %p\n", (void*)pc);
     }
     else {
-        jl_safe_printf("Invalid instruction at %p: ", (void*)pc);
+        jl_safe_fprintf(s, "Invalid instruction at %p: ", (void*)pc);
         for (int i = 0;i < len;i++) {
             if (i == 0) {
-                jl_safe_printf("0x%02" PRIx8, inst[i]);
+                jl_safe_fprintf(s, "0x%02" PRIx8, inst[i]);
             }
             else {
-                jl_safe_printf(", 0x%02" PRIx8, inst[i]);
+                jl_safe_fprintf(s, ", 0x%02" PRIx8, inst[i]);
             }
         }
-        jl_safe_printf("\n");
+        jl_safe_fprintf(s, "\n");
     }
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
     uint32_t inst = 0;
     size_t len = jl_safe_read_mem(pc, (char*)&inst, 4);
     if (len < 4)
-        jl_safe_printf("Fault when reading instruction: %d bytes read\n", (int)len);
+        jl_safe_fprintf(s, "Fault when reading instruction: %d bytes read\n", (int)len);
     if (inst == 0xd4200020) { // brk #0x1
         // The signal might actually be SIGTRAP instead, doesn't hurt to handle it here though.
-        jl_safe_printf("Unreachable reached at %p\n", pc);
+        jl_safe_fprintf(s, "Unreachable reached at %p\n", pc);
     }
     else {
-        jl_safe_printf("Invalid instruction at %p: 0x%08" PRIx32 "\n", pc, inst);
+        jl_safe_fprintf(s, "Invalid instruction at %p: 0x%08" PRIx32 "\n", pc, inst);
     }
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
@@ -382,39 +528,55 @@ void jl_show_sigill(void *_ctx)
         uint16_t inst[2] = {0, 0};
         size_t len = jl_safe_read_mem(pc, (char*)&inst, 4);
         if (len < 2)
-            jl_safe_printf("Fault when reading Thumb instruction: %d bytes read\n", (int)len);
+            jl_safe_fprintf(s, "Fault when reading Thumb instruction: %d bytes read\n", (int)len);
         // LLVM and GCC uses different code for the trap...
         if (inst[0] == 0xdefe || inst[0] == 0xdeff) {
             // The signal might actually be SIGTRAP instead, doesn't hurt to handle it here though.
-            jl_safe_printf("Unreachable reached in Thumb mode at %p: 0x%04" PRIx16 "\n",
-                           (void*)pc, inst[0]);
+            jl_safe_fprintf(s, "Unreachable reached in Thumb mode at %p: 0x%04" PRIx16 "\n",
+                            (void*)pc, inst[0]);
         }
         else {
-            jl_safe_printf("Invalid Thumb instruction at %p: 0x%04" PRIx16 ", 0x%04" PRIx16 "\n",
-                           (void*)pc, inst[0], inst[1]);
+            jl_safe_fprintf(s, "Invalid Thumb instruction at %p: 0x%04" PRIx16 ", 0x%04" PRIx16 "\n",
+                            (void*)pc, inst[0], inst[1]);
         }
     }
     else {
         uint32_t inst = 0;
         size_t len = jl_safe_read_mem(pc, (char*)&inst, 4);
         if (len < 4)
-            jl_safe_printf("Fault when reading instruction: %d bytes read\n", (int)len);
+            jl_safe_fprintf(s, "Fault when reading instruction: %d bytes read\n", (int)len);
         // LLVM and GCC uses different code for the trap...
         if (inst == 0xe7ffdefe || inst == 0xe7f000f0) {
             // The signal might actually be SIGTRAP instead, doesn't hurt to handle it here though.
-            jl_safe_printf("Unreachable reached in ARM mode at %p: 0x%08" PRIx32 "\n",
-                           (void*)pc, inst);
+            jl_safe_fprintf(s, "Unreachable reached in ARM mode at %p: 0x%08" PRIx32 "\n",
+                            (void*)pc, inst);
         }
         else {
-            jl_safe_printf("Invalid ARM instruction at %p: 0x%08" PRIx32 "\n", (void*)pc, inst);
+            jl_safe_fprintf(s, "Invalid ARM instruction at %p: 0x%08" PRIx32 "\n", (void*)pc, inst);
         }
     }
+#elif defined(_OS_LINUX_) && defined(_CPU_RISCV64_)
+    uint32_t inst = 0;
+    size_t len = jl_safe_read_mem(pc, (char*)&inst, 4);
+    if (len < 2)
+        jl_safe_printf("Fault when reading instruction: %d bytes read\n", (int)len);
+    if (inst == 0x00100073 || // ebreak
+        inst == 0xc0001073 || // unimp (pseudo-instruction for illegal `csrrw x0, cycle, x0`)
+        (inst & ((1 << 16) - 1)) == 0x0000) { // c.unimp (compressed form)
+        // The signal might actually be SIGTRAP instead, doesn't hurt to handle it here though.
+        jl_safe_printf("Unreachable reached at %p\n", pc);
+    }
+    else {
+        jl_safe_printf("Invalid instruction at %p: 0x%08" PRIx32 "\n", pc, inst);
+    }
 #else
     // TODO for PPC
     (void)_ctx;
 #endif
 }
 
+void surprise_wakeup(jl_ptls_t ptls) JL_NOTSAFEPOINT;
+
 // make it invalid for a task to return from this point to its stack
 // this is generally quite an foolish operation, but does free you up to do
 // arbitrary things on this stack now without worrying about corrupt state that
@@ -426,22 +588,32 @@ void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT
         ct->gcstack = NULL;
         ct->eh = NULL;
         ct->world_age = 1;
-        ct->ptls->locks.len = 0;
-        ct->ptls->in_pure_callback = 0;
-        ct->ptls->in_finalizer = 0;
-        ct->ptls->defer_signal = 0;
-        jl_atomic_store_release(&ct->ptls->gc_state, 0); // forceably exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
+        // Force all locks to drop. Is this a good idea? Of course not. But the alternative would probably deadlock instead of crashing.
+        jl_ptls_t ptls = ct->ptls;
+        small_arraylist_t *locks = &ptls->locks;
+        for (size_t i = locks->len; i > 0; i--)
+            jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
+        locks->len = 0;
+        ptls->in_pure_callback = 0;
+        ptls->in_finalizer = 0;
+        ptls->defer_signal = 0;
+        // forcibly exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
+        jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_UNSAFE);
+        surprise_wakeup(ptls);
+        // allow continuing to use a Task that should have already died--unsafe necromancy!
+        jl_atomic_store_relaxed(&ct->_state, JL_TASK_STATE_RUNNABLE);
     }
 }
 
 // what to do on a critical error on a thread
-void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *ct)
+void jl_fprint_critical_error(ios_t *s, int sig, int si_code, bt_context_t *context, jl_task_t *ct)
 {
     jl_bt_element_t *bt_data = ct ? ct->ptls->bt_data : NULL;
     size_t *bt_size = ct ? &ct->ptls->bt_size : NULL;
     size_t i, n = ct ? *bt_size : 0;
     if (sig) {
-        // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
+        // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jl_fprint_backtrace in jl_exit)
+        // and also resets the state of ct and ptls so that some code can run on this task again
         jl_task_frame_noreturn(ct);
 #ifndef _OS_WINDOWS_
         sigset_t sset;
@@ -464,21 +636,24 @@ void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *c
         pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
 #endif
         if (si_code)
-            jl_safe_printf("\n[%d] signal (%d.%d): %s\n", getpid(), sig, si_code, strsignal(sig));
+            jl_safe_fprintf(s, "\n[%d] signal %d (%d): %s\n", getpid(), sig, si_code, strsignal(sig));
         else
-            jl_safe_printf("\n[%d] signal (%d): %s\n", getpid(), sig, strsignal(sig));
+            jl_safe_fprintf(s, "\n[%d] signal %d: %s\n", getpid(), sig, strsignal(sig));
+        if (sig == SIGQUIT) {
+            jl_print_task_backtraces(0);
+        }
     }
-    jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno);
+    jl_safe_fprintf(s, "in expression starting at %s:%d\n", jl_atomic_load_relaxed(&jl_filename), jl_atomic_load_relaxed(&jl_lineno));
     if (context && ct) {
         // Must avoid extended backtrace frames here unless we're sure bt_data
         // is properly rooted.
         *bt_size = n = rec_backtrace_ctx(bt_data, JL_MAX_BT_SIZE, context, NULL);
     }
     for (i = 0; i < n; i += jl_bt_entry_size(bt_data + i)) {
-        jl_print_bt_entry_codeloc(bt_data + i);
+        jl_fprint_bt_entry_codeloc(s, bt_data + i);
     }
-    jl_gc_debug_print_status();
-    jl_gc_debug_critical_error();
+    jl_gc_debug_fprint_status(s);
+    jl_gc_debug_fprint_critical_error(s);
 }
 
 #ifdef __cplusplus
diff --git a/src/signals-mach.c b/src/signals-mach.c
index 073ab2ebc33a6..1ef3e9d23094a 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -9,6 +9,7 @@
 #include <mach/task.h>
 #include <mach/mig_errors.h>
 #include <AvailabilityMacros.h>
+#include <stdint.h>
 #include "mach_excServer.c"
 
 #ifdef MAC_OS_X_VERSION_10_9
@@ -44,57 +45,107 @@ static void attach_exception_port(thread_port_t thread, int segv_only);
 
 // low 16 bits are the thread id, the next 8 bits are the original gc_state
 static arraylist_t suspended_threads;
-extern uv_mutex_t safepoint_lock;
-extern uv_cond_t safepoint_cond;
-void jl_mach_gc_end(void)
+extern uv_cond_t safepoint_cond_begin;
+
+#define GC_STATE_SHIFT 8*sizeof(int16_t)
+static inline int8_t decode_gc_state(uintptr_t item)
 {
-    // Requires the safepoint lock to be held
+    return (int8_t)(item >> GC_STATE_SHIFT);
+}
+
+static inline int16_t decode_tid(uintptr_t item)
+{
+    return (int16_t)item;
+}
+
+static inline uintptr_t encode_item(int16_t tid, int8_t gc_state)
+{
+    return (uintptr_t)tid | ((uintptr_t)gc_state << GC_STATE_SHIFT);
+}
+
+// see jl_safepoint_wait_thread_resume
+void jl_safepoint_resume_thread_mach(jl_ptls_t ptls2, int16_t tid2)
+{
+    // must be called with uv_mutex_lock(&safepoint_lock) and uv_mutex_lock(&ptls2->sleep_lock) held (in that order)
     for (size_t i = 0; i < suspended_threads.len; i++) {
         uintptr_t item = (uintptr_t)suspended_threads.items[i];
-        int16_t tid = (int16_t)item;
-        int8_t gc_state = (int8_t)(item >> 8);
-        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+
+        int16_t tid = decode_tid(item);
+        int8_t gc_state = decode_gc_state(item);
+        if (tid != tid2)
+            continue;
         jl_atomic_store_release(&ptls2->gc_state, gc_state);
         thread_resume(pthread_mach_thread_np(ptls2->system_id));
+        suspended_threads.items[i] = suspended_threads.items[--suspended_threads.len];
+        break;
     }
-    suspended_threads.len = 0;
+    // thread hadn't actually reached a jl_mach_gc_wait call where we suspended it
 }
 
-// Suspend the thread and return `1` if the GC is running.
-// Otherwise return `0`
-static int jl_mach_gc_wait(jl_ptls_t ptls2,
-                           mach_port_t thread, int16_t tid)
+void jl_mach_gc_end(void)
+{
+    // must be called with uv_mutex_lock(&safepoint_lock) held
+    size_t j = 0;
+    for (size_t i = 0; i < suspended_threads.len; i++) {
+        uintptr_t item = (uintptr_t)suspended_threads.items[i];
+        int16_t tid = decode_tid(item);
+        int8_t gc_state = decode_gc_state(item);
+        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+        uv_mutex_lock(&ptls2->sleep_lock);
+        if (jl_atomic_load_relaxed(&ptls2->suspend_count) == 0) {
+            jl_atomic_store_release(&ptls2->gc_state, gc_state);
+            thread_resume(pthread_mach_thread_np(ptls2->system_id));
+        }
+        else {
+            // this is the check for jl_safepoint_wait_thread_resume
+            suspended_threads.items[j++] = (void*)item;
+        }
+        uv_mutex_unlock(&ptls2->sleep_lock);
+    }
+    suspended_threads.len = j;
+}
+
+// implement jl_set_gc_and_wait from a different thread
+static void jl_mach_gc_wait(jl_ptls_t ptls2, mach_port_t thread, int16_t tid)
 {
+    // relaxed, since we don't mind missing one--we will hit another soon (immediately probably)
     uv_mutex_lock(&safepoint_lock);
-    if (!jl_atomic_load_relaxed(&jl_gc_running)) {
-        // relaxed, since gets set to zero only while the safepoint_lock was held
-        // this means we can tell if GC is done before we got the message or
-        // the safepoint was enabled for SIGINT.
-        uv_mutex_unlock(&safepoint_lock);
-        return 0;
+    // Since this gets set to zero only while the safepoint_lock was held this
+    // means we can tell for sure if GC is done before we got the message or
+    // the safepoint was enabled for SIGINT instead.
+    int doing_gc = jl_atomic_load_relaxed(&jl_gc_running);
+    int do_suspend = doing_gc;
+    int relaxed_suspend_count = !doing_gc && jl_atomic_load_relaxed(&ptls2->suspend_count) != 0;
+    if (relaxed_suspend_count) {
+        uv_mutex_lock(&ptls2->sleep_lock);
+        do_suspend = jl_atomic_load_relaxed(&ptls2->suspend_count) != 0;
+        // only do_suspend while holding the sleep_lock, otherwise we might miss a resume
+    }
+    if (do_suspend) {
+        // Set the gc state of the thread, suspend and record it
+        //
+        // TODO: TSAN will complain that it never saw the faulting task do an
+        // atomic release (it was in the kernel). And our attempt here does
+        // nothing, since we are a different thread, and it is not transitive).
+        //
+        // This also means we are not making this thread available for GC work.
+        // Eventually, we should probably release this signal to the original
+        // thread, (return KERN_FAILURE instead of KERN_SUCCESS) so that it
+        // triggers a SIGSEGV and gets handled by the usual codepath for unix.
+        int8_t gc_state = jl_atomic_load_acquire(&ptls2->gc_state);
+        jl_atomic_store_release(&ptls2->gc_state, JL_GC_STATE_WAITING);
+        uintptr_t item = encode_item(tid, gc_state);
+        arraylist_push(&suspended_threads, (void*)item);
+        thread_suspend(thread);
     }
-    // Otherwise, set the gc state of the thread, suspend and record it
-    // TODO: TSAN will complain that it never saw the faulting task do an
-    // atomic release (it was in the kernel). And our attempt here does
-    // nothing, since we are a different thread, and it is not transitive).
-    //
-    // This also means we are not making this thread available for GC work.
-    // Eventually, we should probably release this signal to the original
-    // thread, (return KERN_FAILURE instead of KERN_SUCCESS) so that it
-    // triggers a SIGSEGV and gets handled by the usual codepath for unix.
-    int8_t gc_state = ptls2->gc_state;
-    jl_atomic_store_release(&ptls2->gc_state, JL_GC_STATE_WAITING);
-    uintptr_t item = tid | (((uintptr_t)gc_state) << 16);
-    arraylist_push(&suspended_threads, (void*)item);
-    thread_suspend(thread);
+    if (relaxed_suspend_count)
+        uv_mutex_unlock(&ptls2->sleep_lock);
+    uv_cond_broadcast(&safepoint_cond_begin);
     uv_mutex_unlock(&safepoint_lock);
-    return 1;
 }
 
 static mach_port_t segv_port = 0;
 
-#define STR(x) #x
-#define XSTR(x) STR(x)
 #define HANDLE_MACH_ERROR(msg, retval) \
     if (retval != KERN_SUCCESS) { mach_error(msg XSTR(: __FILE__:__LINE__:), (retval)); abort(); }
 
@@ -169,37 +220,92 @@ typedef arm_exception_state64_t host_exception_state_t;
 #define HOST_EXCEPTION_STATE_COUNT ARM_EXCEPTION_STATE64_COUNT
 #endif
 
-static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state,
-                             void (*fptr)(void))
+// create a fake function that describes the variable manipulations in jl_call_in_state
+__attribute__((naked)) static void fake_stack_pop(void)
 {
 #ifdef _CPU_X86_64_
-    uintptr_t rsp = state->__rsp;
+    __asm__ volatile (
+        "  .cfi_signal_frame\n"
+        "  .cfi_def_cfa %rsp, 0\n" // CFA here uses %rsp directly
+        "  .cfi_offset %rip, 0\n" // previous value of %rip at CFA
+        "  .cfi_offset %rsp, 8\n" // previous value of %rsp at CFA
+        "  nop\n"
+    );
 #elif defined(_CPU_AARCH64_)
-    uintptr_t rsp = state->__sp;
+    __asm__ volatile (
+        "  .cfi_signal_frame\n"
+        "  .cfi_def_cfa sp, 0\n" // use sp as fp here
+        "  .cfi_offset lr, 0\n"
+        "  .cfi_offset sp, 8\n"
+        // Anything else got smashed, since we didn't explicitly copy all of the
+        // state object to the stack (to build a real sigreturn frame).
+        // This is also not quite valid, since the AArch64 DWARF spec lacks the ability to define how to restore the LR register correctly,
+        // so normally libunwind implementations on linux detect this function specially and hack around the invalid info:
+        // https://github.com/llvm/llvm-project/commit/c82deed6764cbc63966374baf9721331901ca958
+        " nop\n"
+    );
 #else
-#error "julia: throw-in-context not supported on this platform"
+CFI_NORETURN
 #endif
-    if (ptls2 == NULL || ptls2->signal_stack == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) {
-        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
-    }
-    else {
-        rsp = (uintptr_t)ptls2->signal_stack + sig_stack_size;
-    }
-    assert(rsp % 16 == 0);
+}
 
+static void jl_call_in_state(host_thread_state_t *state, void (*fptr)(void))
+{
+#ifdef _CPU_X86_64_
+    uintptr_t sp = state->__rsp;
+#elif defined(_CPU_AARCH64_)
+    uintptr_t sp = state->__sp;
+#endif
+    sp = (sp - 256) & ~(uintptr_t)15; // redzone and re-alignment
+    assert(sp % 16 == 0);
+    sp -= 16;
 #ifdef _CPU_X86_64_
-    rsp -= sizeof(void*);
-    state->__rsp = rsp; // set stack pointer
+    // set return address to NULL
+    *(uintptr_t*)sp = 0;
+    // pushq %sp
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__rsp;
+    // pushq %rip
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__rip;
+    // pushq .fake_stack_pop + 1; aka call from fake_stack_pop
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = (uintptr_t)&fake_stack_pop + 1;
+    state->__rsp = sp; // set stack pointer
     state->__rip = (uint64_t)fptr; // "call" the function
 #elif defined(_CPU_AARCH64_)
-    state->__sp = rsp;
-    state->__pc = (uint64_t)fptr;
-    state->__lr = 0;
+    // push {%sp, %pc + 4}
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__sp;
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = (uintptr_t)state->__pc;
+    state->__sp = sp; // x31
+    state->__pc = (uint64_t)fptr; // pc
+    state->__lr = (uintptr_t)&fake_stack_pop + 4; // x30
 #else
 #error "julia: throw-in-context not supported on this platform"
 #endif
 }
 
+static void jl_longjmp_in_state(host_thread_state_t *state, jl_jmp_buf jmpbuf)
+{
+
+    if (!jl_simulate_longjmp(jmpbuf, (bt_context_t*)state)) {
+        // for sanitizer builds, fallback to calling longjmp on the original stack
+        // (this will fail for stack overflow, but that is hardly sanitizer-legal anyways)
+#ifdef _CPU_X86_64_
+    state->__rdi = (uintptr_t)jmpbuf;
+    state->__rsi = 1;
+#elif defined(_CPU_AARCH64_)
+    state->__x[0] = (uintptr_t)jmpbuf;
+    state->__x[1] = 1;
+#else
+#error "julia: jl_longjmp_in_state not supported on this platform"
+#endif
+        jl_call_in_state(state, (void (*)(void))longjmp);
+    }
+}
+
 #ifdef _CPU_X86_64_
 int is_write_fault(host_exception_state_t exc_state) {
     return exc_reg_is_write_fault(exc_state.__err);
@@ -221,14 +327,26 @@ static void jl_throw_in_thread(jl_ptls_t ptls2, mach_port_t thread, jl_value_t *
     host_thread_state_t state;
     kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
-    if (1) { // XXX: !jl_has_safe_restore(ptls2)
+    if (ptls2->safe_restore) {
+        jl_longjmp_in_state(&state, *ptls2->safe_restore);
+    }
+    else {
         assert(exception);
         ptls2->bt_size =
             rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, (bt_context_t *)&state,
-                              NULL /*current_task?*/);
+                            NULL /*current_task?*/);
         ptls2->sig_exception = exception;
+        ptls2->io_wait = 0;
+        jl_task_t *ct = jl_atomic_load_relaxed(&ptls2->current_task);
+        jl_handler_t *eh = ct->eh;
+        if (eh != NULL) {
+            asan_unpoison_task_stack(ct, &eh->eh_ctx);
+            jl_longjmp_in_state(&state, eh->eh_ctx);
+        }
+        else {
+            jl_no_exc_handler(exception, ct);
+        }
     }
-    jl_call_in_state(ptls2, &state, &jl_sig_throw);
     ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
 }
@@ -236,14 +354,18 @@ static void jl_throw_in_thread(jl_ptls_t ptls2, mach_port_t thread, jl_value_t *
 static void segv_handler(int sig, siginfo_t *info, void *context)
 {
     assert(sig == SIGSEGV || sig == SIGBUS);
-    if (jl_get_safe_restore()) { // restarting jl_ or jl_unwind_stepn
-        jl_task_t *ct = jl_get_current_task();
-        jl_ptls_t ptls = ct == NULL ? NULL : ct->ptls;
-        jl_call_in_state(ptls, (host_thread_state_t*)jl_to_bt_context(context), &jl_sig_throw);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or jl_unwind_stepn
+        jl_longjmp_in_state((host_thread_state_t*)jl_to_bt_context(context), *saferestore);
+        return;
     }
-    else {
-        sigdie_handler(sig, info, context);
+    jl_task_t *ct = jl_get_current_task();
+    if ((sig != SIGBUS || info->si_code == BUS_ADRERR) &&
+    !(ct == NULL || ct->ptls == NULL || jl_atomic_load_relaxed(&ct->ptls->gc_state) == JL_GC_STATE_WAITING || ct->eh == NULL)
+    && is_addr_on_stack(ct, info->si_addr)) { // stack overflow and not a BUS_ADRALN (alignment error)
+        stack_overflow_warning();
     }
+    sigdie_handler(sig, info, context);
 }
 
 // n.b. mach_exc_server expects us to define this symbol locally
@@ -279,25 +401,27 @@ kern_return_t catch_mach_exception_raise(
     int nthreads = jl_atomic_load_acquire(&jl_n_threads);
     for (tid = 0; tid < nthreads; tid++) {
         jl_ptls_t _ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+        if (jl_atomic_load_relaxed(&_ptls2->current_task) == NULL) {
+            // this thread is dead
+            continue;
+        }
         if (pthread_mach_thread_np(_ptls2->system_id) == thread) {
             ptls2 = _ptls2;
             break;
         }
     }
-    if (!ptls2 || ptls2->current_task == NULL) {
+    if (!ptls2) {
         // We don't know about this thread, let the kernel try another handler
         // instead. This shouldn't actually happen since we only register the
         // handler for the threads we know about.
         jl_safe_printf("ERROR: Exception handler triggered on unmanaged thread.\n");
         return KERN_INVALID_ARGUMENT;
     }
-    // XXX: jl_throw_in_thread or segv_handler will eventually check this, but
-    //      we would like to avoid some of this work if we could detect this earlier
-    // if (jl_has_safe_restore(ptls2)) {
-    //     jl_throw_in_thread(ptls2, thread, jl_stackovf_exception);
-    //     return KERN_SUCCESS;
-    // }
-    if (ptls2->gc_state == JL_GC_STATE_WAITING)
+    if (ptls2->safe_restore) {
+        jl_throw_in_thread(ptls2, thread, NULL);
+        return KERN_SUCCESS;
+    }
+    if (jl_atomic_load_acquire(&ptls2->gc_state) == JL_GC_STATE_WAITING)
         return KERN_FAILURE;
     if (exception == EXC_ARITHMETIC) {
         jl_throw_in_thread(ptls2, thread, jl_diverror_exception);
@@ -310,8 +434,7 @@ kern_return_t catch_mach_exception_raise(
     kern_return_t ret = thread_get_state(thread, HOST_EXCEPTION_STATE, (thread_state_t)&exc_state, &exc_count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
     if (jl_addr_is_safepoint(fault_addr) && !is_write_fault(exc_state)) {
-        if (jl_mach_gc_wait(ptls2, thread, tid))
-            return KERN_SUCCESS;
+        jl_mach_gc_wait(ptls2, thread, tid);
         if (ptls2->tid != 0)
             return KERN_SUCCESS;
         if (ptls2->defer_signal) {
@@ -323,10 +446,11 @@ kern_return_t catch_mach_exception_raise(
         }
         return KERN_SUCCESS;
     }
-    if (ptls2->current_task->eh == NULL)
+    if (jl_atomic_load_relaxed(&ptls2->current_task)->eh == NULL)
         return KERN_FAILURE;
     jl_value_t *excpt;
     if (is_addr_on_stack(jl_atomic_load_relaxed(&ptls2->current_task), (void*)fault_addr)) {
+        stack_overflow_warning();
         excpt = jl_stackovf_exception;
     }
     else if (is_write_fault(exc_state)) // false for alignment errors
@@ -372,7 +496,7 @@ kern_return_t catch_mach_exception_raise_state_identity(
 static void attach_exception_port(thread_port_t thread, int segv_only)
 {
     kern_return_t ret;
-    // http://www.opensource.apple.com/source/xnu/xnu-2782.1.97/osfmk/man/thread_set_exception_ports.html
+    // https://www.opensource.apple.com/source/xnu/xnu-2782.1.97/osfmk/man/thread_set_exception_ports.html
     exception_mask_t mask = EXC_MASK_BAD_ACCESS;
     if (!segv_only)
         mask |= EXC_MASK_ARITHMETIC;
@@ -380,12 +504,12 @@ static void attach_exception_port(thread_port_t thread, int segv_only)
     HANDLE_MACH_ERROR("thread_set_exception_ports", ret);
 }
 
-static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
+static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx) JL_NOTSAFEPOINT
 {
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     if (ptls2 == NULL) // this thread is not alive
         return 0;
-    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    jl_task_t *ct2 = jl_atomic_load_relaxed(&ptls2->current_task);
     if (ct2 == NULL) // this thread is already dead
         return 0;
 
@@ -403,18 +527,18 @@ static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
     return 1;
 }
 
-static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t **ctx)
+static int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
 {
     (void)timeout;
-    static host_thread_state_t state;
+    host_thread_state_t state;
     if (!jl_thread_suspend_and_get_state2(tid, &state)) {
-        *ctx = NULL;
-        return;
+        return 0;
     }
-    *ctx = (unw_context_t*)&state;
+    *ctx = *(unw_context_t*)&state;
+    return 1;
 }
 
-static void jl_thread_resume(int tid, int sig)
+void jl_thread_resume(int tid)
 {
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
@@ -455,8 +579,7 @@ static void jl_try_deliver_sigint(void)
 
 static void JL_NORETURN jl_exit_thread0_cb(int signo)
 {
-CFI_NORETURN
-    jl_critical_error(signo, 0, NULL, jl_current_task);
+    jl_fprint_critical_error(ios_safe_stderr, signo, 0, NULL, jl_current_task);
     jl_atexit_hook(128);
     jl_raise(signo);
 }
@@ -487,7 +610,7 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 #else
 #error Fill in first integer argument here
 #endif
-    jl_call_in_state(ptls2, &state, (void (*)(void))&jl_exit_thread0_cb);
+    jl_call_in_state(&state, (void (*)(void))&jl_exit_thread0_cb);
     unsigned int count = MACH_THREAD_STATE_COUNT;
     ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
@@ -589,8 +712,98 @@ static void jl_unlock_profile_mach(int dlsymlock, int keymgr_locked)
     jl_unlock_profile();
 }
 
-#define jl_lock_profile()       int keymgr_locked = jl_lock_profile_mach(1)
-#define jl_unlock_profile()     jl_unlock_profile_mach(1, keymgr_locked)
+int jl_thread_suspend(int16_t tid, bt_context_t *ctx)
+{
+    int lockret = jl_lock_profile_mach(1);
+    int success = jl_thread_suspend_and_get_state(tid, 1, ctx);
+    jl_unlock_profile_mach(1, lockret);
+    return success;
+}
+
+void jl_with_stackwalk_lock(void (*f)(void*), void *ctx)
+{
+    int lockret = jl_lock_profile_mach(1);
+    f(ctx);
+    jl_unlock_profile_mach(1, lockret);
+}
+
+// assumes holding `jl_lock_profile_mach`
+void jl_profile_thread_mach(int tid)
+{
+    // if there is no space left, return early
+    if (jl_profile_is_buffer_full()) {
+        jl_profile_stop_timer();
+        return;
+    }
+    if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+        _dyld_dlopen_atfork_prepare();
+    if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
+        _dyld_atfork_prepare(); // briefly acquire the dlsym lock
+    host_thread_state_t state;
+    int valid_thread = jl_thread_suspend_and_get_state2(tid, &state);
+    unw_context_t *uc = (unw_context_t*)&state;
+    if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
+        _dyld_atfork_parent(); // quickly release the dlsym lock
+    if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+        _dyld_dlopen_atfork_parent();
+    if (!valid_thread)
+        return;
+    if (profile_running) {
+#ifdef LLVMLIBUNWIND
+        /*
+            *  Unfortunately compact unwind info is incorrectly generated for quite a number of
+            *  libraries by quite a large number of compilers. We can fall back to DWARF unwind info
+            *  in some cases, but in quite a number of cases (especially libraries not compiled in debug
+            *  mode, only the compact unwind info may be available). Even more unfortunately, there is no
+            *  way to detect such bogus compact unwind info (other than noticing the resulting segfault).
+            *  What we do here is ugly, but necessary until the compact unwind info situation improves.
+            *  We try to use the compact unwind info and if that results in a segfault, we retry with DWARF info.
+            *  Note that in a small number of cases this may result in bogus stack traces, but at least the topmost
+            *  entry will always be correct, and the number of cases in which this is an issue is rather small.
+            *  Other than that, this implementation is not incorrect as the other thread is paused while we are profiling
+            *  and during stack unwinding we only ever read memory, but never write it.
+            */
+
+        forceDwarf = 0;
+        unw_getcontext(&profiler_uc); // will resume from this point if the next lines segfault at any point
+
+        if (forceDwarf == 0) {
+            // Save the backtrace
+            profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur, profile_bt_size_max - profile_bt_size_cur - 1, uc, NULL);
+        }
+        else if (forceDwarf == 1) {
+            profile_bt_size_cur += rec_backtrace_ctx_dwarf((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur, profile_bt_size_max - profile_bt_size_cur - 1, uc, NULL);
+        }
+        else if (forceDwarf == -1) {
+            jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
+        }
+
+        forceDwarf = -2;
+#else
+        profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur, profile_bt_size_max - profile_bt_size_cur - 1, uc, NULL);
+#endif
+        jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+
+        // store threadid but add 1 as 0 is preserved to indicate end of block
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = ptls->tid + 1;
+
+        // store task id (never null)
+        profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
+
+        // store cpu cycle clock
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();
+
+        // store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block)
+        int state = jl_atomic_load_relaxed(&ptls->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING;
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = state;
+
+        // Mark the end of this block with two 0's
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+    }
+    // We're done! Resume the thread.
+    jl_thread_resume(tid);
+}
 
 void *mach_profile_listener(void *arg)
 {
@@ -609,88 +822,21 @@ void *mach_profile_listener(void *arg)
         // sample each thread, round-robin style in reverse order
         // (so that thread zero gets notified last)
         int keymgr_locked = jl_lock_profile_mach(0);
-
         int nthreads = jl_atomic_load_acquire(&jl_n_threads);
-        int *randperm = profile_get_randperm(nthreads);
-        for (int idx = nthreads; idx-- > 0; ) {
-            // Stop the threads in the random or reverse round-robin order.
-            int i = randperm[idx];
-            // if there is no space left, break early
-            if (jl_profile_is_buffer_full()) {
-                jl_profile_stop_timer();
-                break;
-            }
-
-            if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
-                _dyld_dlopen_atfork_prepare();
-            if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
-                _dyld_atfork_prepare(); // briefly acquire the dlsym lock
-            host_thread_state_t state;
-            int valid_thread = jl_thread_suspend_and_get_state2(i, &state);
-            unw_context_t *uc = (unw_context_t*)&state;
-            if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
-                _dyld_atfork_parent(); // quickly release the dlsym lock
-            if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
-                _dyld_dlopen_atfork_parent();
-            if (!valid_thread)
-                continue;
-            if (running) {
-#ifdef LLVMLIBUNWIND
-                /*
-                 *  Unfortunately compact unwind info is incorrectly generated for quite a number of
-                 *  libraries by quite a large number of compilers. We can fall back to DWARF unwind info
-                 *  in some cases, but in quite a number of cases (especially libraries not compiled in debug
-                 *  mode, only the compact unwind info may be available). Even more unfortunately, there is no
-                 *  way to detect such bogus compact unwind info (other than noticing the resulting segfault).
-                 *  What we do here is ugly, but necessary until the compact unwind info situation improves.
-                 *  We try to use the compact unwind info and if that results in a segfault, we retry with DWARF info.
-                 *  Note that in a small number of cases this may result in bogus stack traces, but at least the topmost
-                 *  entry will always be correct, and the number of cases in which this is an issue is rather small.
-                 *  Other than that, this implementation is not incorrect as the other thread is paused while we are profiling
-                 *  and during stack unwinding we only ever read memory, but never write it.
-                 */
-
-                forceDwarf = 0;
-                unw_getcontext(&profiler_uc); // will resume from this point if the next lines segfault at any point
-
-                if (forceDwarf == 0) {
-                    // Save the backtrace
-                    bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
-                }
-                else if (forceDwarf == 1) {
-                    bt_size_cur += rec_backtrace_ctx_dwarf((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
-                }
-                else if (forceDwarf == -1) {
-                    jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
-                }
-
-                forceDwarf = -2;
-#else
-                bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
-#endif
-                jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
-
-                // store threadid but add 1 as 0 is preserved to indicate end of block
-                bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
-
-                // store task id (never null)
-                bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
-
-                // store cpu cycle clock
-                bt_data_prof[bt_size_cur++].uintptr = cycleclock();
-
-                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
-                bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
-
-                // Mark the end of this block with two 0's
-                bt_data_prof[bt_size_cur++].uintptr = 0;
-                bt_data_prof[bt_size_cur++].uintptr = 0;
+        if (profile_all_tasks) {
+            // Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace`
+            jl_profile_task();
+        }
+        else {
+            int *randperm = profile_get_randperm(nthreads);
+            for (int idx = nthreads; idx-- > 0; ) {
+                // Stop the threads in random order.
+                int i = randperm[idx];
+                jl_profile_thread_mach(i);
             }
-            // We're done! Resume the thread.
-            jl_thread_resume(i, 0);
         }
         jl_unlock_profile_mach(0, keymgr_locked);
-        if (running) {
+        if (profile_running) {
             jl_check_profile_autostop();
             // Reset the alarm
             kern_return_t ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
@@ -699,7 +845,8 @@ void *mach_profile_listener(void *arg)
     }
 }
 
-JL_DLLEXPORT int jl_profile_start_timer(void)
+
+JL_DLLEXPORT int jl_profile_start_timer(uint8_t all_tasks)
 {
     kern_return_t ret;
     if (!profile_started) {
@@ -728,7 +875,8 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
     timerprof.tv_sec = nsecprof/GIGA;
     timerprof.tv_nsec = nsecprof%GIGA;
 
-    running = 1;
+    profile_running = 1;
+    profile_all_tasks = all_tasks;
     // ensure the alarm is running
     ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
     HANDLE_MACH_ERROR("clock_alarm", ret);
@@ -738,5 +886,8 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
 
 JL_DLLEXPORT void jl_profile_stop_timer(void)
 {
-    running = 0;
+    uv_mutex_lock(&bt_data_prof_lock);
+    profile_running = 0;
+    profile_all_tasks = 0;
+    uv_mutex_unlock(&bt_data_prof_lock);
 }
diff --git a/src/signals-unix.c b/src/signals-unix.c
index 4c21d25d3622c..16e70ef0f764e 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -9,6 +9,10 @@
 #include <pthread.h>
 #include <time.h>
 #include <errno.h>
+
+#include "julia.h"
+#include "julia_internal.h"
+
 #if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
 #define MAP_ANONYMOUS MAP_ANON
 #endif
@@ -24,10 +28,12 @@
 #endif
 
 // Figure out the best signals/timers to use for this platform
-#ifdef __APPLE__ // Darwin's mach ports allow signal-free thread management
+#if defined(__APPLE__) // Darwin's mach ports allow signal-free thread management
 #define HAVE_MACH
 #define HAVE_KEVENT
-#else // generic Linux or BSD
+#elif defined(__OpenBSD__)
+#define HAVE_KEVENT
+#else // generic Linux or FreeBSD
 #define HAVE_TIMER
 #endif
 
@@ -35,16 +41,14 @@
 #include <sys/event.h>
 #endif
 
-// 8M signal stack, same as default stack size and enough
-// for reasonable finalizers.
-// Should also be enough for parallel GC when we have it =)
-#define sig_stack_size (8 * 1024 * 1024)
+// 8M signal stack, same as default stack size (though we barely use this)
+static const size_t sig_stack_size = 8 * 1024 * 1024;
 
 #include "julia_assert.h"
 
 // helper function for returning the unw_context_t inside a ucontext_t
 // (also used by stackwalk.c)
-bt_context_t *jl_to_bt_context(void *sigctx)
+bt_context_t *jl_to_bt_context(void *sigctx) JL_NOTSAFEPOINT
 {
 #ifdef __APPLE__
     return (bt_context_t*)&((ucontext64_t*)sigctx)->uc_mcontext64->__ss;
@@ -62,7 +66,11 @@ bt_context_t *jl_to_bt_context(void *sigctx)
 static int thread0_exit_count = 0;
 static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size);
 
-static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c) JL_NOTSAFEPOINT;
+static void jl_longjmp_in_ctx(int sig, void *_ctx, jl_jmp_buf jmpbuf);
+
+#if !defined(_OS_DARWIN_)
+static inline uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
 {
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
@@ -76,32 +84,34 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
     return ctx->uc_mcontext.arm_sp;
-#elif defined(_OS_DARWIN_) && defined(_CPU_X86_64_)
-    const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
-    return ctx->uc_mcontext64->__ss.__rsp;
-#elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
-    const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
-    return ctx->uc_mcontext64->__ss.__sp;
+#elif defined(_OS_LINUX_) && (defined(_CPU_RISCV64_))
+    const ucontext_t *ctx = (const ucontext_t*)_ctx;
+    return ctx->uc_mcontext.__gregs[REG_SP];
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
     return ctx->uc_mcontext.mc_rsp;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    const ucontext_t *ctx = (const ucontext_t*)_ctx;
+    return ctx->uc_mcontext.mc_gpregs.gp_sp;
+#elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
+    const struct sigcontext *ctx = (const struct sigcontext *)_ctx;
+    return ctx->sc_rsp;
 #else
     // TODO Add support for PowerPC(64)?
     return 0;
 #endif
 }
 
-static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
+static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) JL_NOTSAFEPOINT
 {
     // One guard page for signal_stack.
-    return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size ||
-             (char*)ptr > (char*)ptls->signal_stack + sig_stack_size);
+    return ptls->signal_stack == NULL ||
+           ((char*)ptr >= (char*)ptls->signal_stack - jl_page_size &&
+            (char*)ptr <= (char*)ptls->signal_stack + (ptls->signal_stack_size ? ptls->signal_stack_size : sig_stack_size));
 }
 
-// Modify signal context `_ctx` so that `fptr` will execute when the signal
-// returns. `fptr` will execute on the signal stack, and must not return.
-// jl_call_in_ctx is also currently executing on that signal stack,
-// so be careful not to smash it
+// Modify signal context `_ctx` so that `fptr` will execute when the signal returns
+// The function `fptr` itself must not return.
 JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_ctx)
 {
     // Modifying the ucontext should work but there is concern that
@@ -111,47 +121,48 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     // checks that the syscall is made in the signal handler and that
     // the ucontext address is valid. Hopefully the value of the ucontext
     // will not be part of the validation...
-    if (!ptls || !ptls->signal_stack) {
-        sigset_t sset;
-        sigemptyset(&sset);
-        sigaddset(&sset, sig);
-        pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
-        fptr();
-        return;
-    }
     uintptr_t rsp = jl_get_rsp_from_ctx(_ctx);
-    if (is_addr_on_sigstack(ptls, (void*)rsp)) {
-        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
-    }
-    else {
-        rsp = (uintptr_t)ptls->signal_stack + sig_stack_size;
-    }
-    assert(rsp % 16 == 0);
+    rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.gregs[REG_RSP] = rsp;
     ctx->uc_mcontext.gregs[REG_RIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.mc_rsp = rsp;
     ctx->uc_mcontext.mc_rip = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.gregs[REG_ESP] = rsp;
     ctx->uc_mcontext.gregs[REG_EIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.mc_esp = rsp;
     ctx->uc_mcontext.mc_eip = (uintptr_t)fptr;
+#elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
+    struct sigcontext *ctx = (struct sigcontext *)_ctx;
+    rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
+    ctx->sc_rsp = rsp;
+    ctx->sc_rip = fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     ctx->uc_mcontext.sp = rsp;
     ctx->uc_mcontext.regs[29] = 0; // Clear link register (x29)
     ctx->uc_mcontext.pc = (uintptr_t)fptr;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    ucontext_t *ctx = (ucontext_t*)_ctx;
+    ctx->uc_mcontext.mc_gpregs.gp_sp = rsp;
+    ctx->uc_mcontext.mc_gpregs.gp_x[29] = 0; // Clear link register (x29)
+    ctx->uc_mcontext.mc_gpregs.gp_elr = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     uintptr_t target = (uintptr_t)fptr;
@@ -171,22 +182,11 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     ctx->uc_mcontext.arm_sp = rsp;
     ctx->uc_mcontext.arm_lr = 0; // Clear link register
     ctx->uc_mcontext.arm_pc = target;
-#elif defined(_OS_DARWIN_) && (defined(_CPU_X86_64_) || defined(_CPU_AARCH64_))
-    // Only used for SIGFPE.
-    // This doesn't seems to be reliable when the SIGFPE is generated
-    // from a divide-by-zero exception, which is now handled by
-    // `catch_exception_raise`. It works fine when a signal is received
-    // due to `kill`/`raise` though.
-    ucontext64_t *ctx = (ucontext64_t*)_ctx;
-#if defined(_CPU_X86_64_)
-    rsp -= sizeof(void*);
-    ctx->uc_mcontext64->__ss.__rsp = rsp;
-    ctx->uc_mcontext64->__ss.__rip = (uintptr_t)fptr;
-#else
-    ctx->uc_mcontext64->__ss.__sp = rsp;
-    ctx->uc_mcontext64->__ss.__pc = (uintptr_t)fptr;
-    ctx->uc_mcontext64->__ss.__lr = 0;
-#endif
+#elif defined(_OS_LINUX_) && (defined(_CPU_RISCV64_))
+    ucontext_t *ctx = (ucontext_t*)_ctx;
+    ctx->uc_mcontext.__gregs[REG_SP] = rsp;
+    ctx->uc_mcontext.__gregs[REG_RA] = 0; // Clear return address address (ra)
+    ctx->uc_mcontext.__gregs[REG_PC] = (uintptr_t)fptr;
 #else
 #pragma message("julia: throw-in-context not supported on this platform")
     // TODO Add support for PowerPC(64)?
@@ -197,30 +197,38 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     fptr();
 #endif
 }
+#endif
 
 static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *e, int sig, void *sigctx)
 {
     jl_ptls_t ptls = ct->ptls;
-    if (!jl_get_safe_restore()) {
-        ptls->bt_size =
-            rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
-                              ct->gcstack);
-        ptls->sig_exception = e;
+    assert(!jl_get_safe_restore());
+    ptls->bt_size =
+        rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
+                            ct->gcstack);
+    ptls->sig_exception = e;
+    ptls->io_wait = 0;
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        jl_longjmp_in_ctx(sig, sigctx, eh->eh_ctx);
+    }
+    else {
+        jl_no_exc_handler(e, ct);
     }
-    jl_call_in_ctx(ptls, &jl_sig_throw, sig, sigctx);
 }
 
 static pthread_t signals_thread;
 
-static int is_addr_on_stack(jl_task_t *ct, void *addr)
+static int is_addr_on_stack(jl_task_t *ct, void *addr) JL_NOTSAFEPOINT
 {
-    if (ct->copy_stack) {
+    if (ct->ctx.copy_stack) {
         jl_ptls_t ptls = ct->ptls;
         return ((char*)addr > (char*)ptls->stackbase - ptls->stacksize &&
                 (char*)addr < (char*)ptls->stackbase);
     }
-    return ((char*)addr > (char*)ct->stkbuf &&
-            (char*)addr < (char*)ct->stkbuf + ct->bufsz);
+    return ((char*)addr > (char*)ct->ctx.stkbuf &&
+            (char*)addr < (char*)ct->ctx.stkbuf + ct->ctx.bufsz);
 }
 
 static void sigdie_handler(int sig, siginfo_t *info, void *context)
@@ -228,16 +236,23 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
     signal(sig, SIG_DFL);
     uv_tty_reset_mode();
     if (sig == SIGILL)
-        jl_show_sigill(context);
-    jl_critical_error(sig, info->si_code, jl_to_bt_context(context), jl_get_current_task());
+        jl_fprint_sigill(ios_safe_stderr, context);
+    jl_task_t *ct = jl_get_current_task();
+    jl_fprint_critical_error(ios_safe_stderr, sig, info->si_code, jl_to_bt_context(context), ct);
+    if (ct)
+        jl_atomic_store_relaxed(&ct->ptls->safepoint, (size_t*)NULL + 1);
     if (info->si_code == 0 ||
         info->si_code == SI_USER ||
 #ifdef SI_KERNEL
         info->si_code == SI_KERNEL ||
 #endif
         info->si_code == SI_QUEUE ||
+#ifdef SI_MESGQ
         info->si_code == SI_MESGQ ||
+#endif
+#ifdef SI_ASYNCIO
         info->si_code == SI_ASYNCIO ||
+#endif
 #ifdef SI_SIGIO
         info->si_code == SI_SIGIO ||
 #endif
@@ -252,7 +267,8 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
              sig != SIGFPE &&
              sig != SIGTRAP)
         raise(sig);
-    // fall-through return to re-execute faulting statement (but without the error handler)
+    // fall-through return to re-execute faulting statement (but without the
+    // error handler and the pgcstack having been destroyed)
 }
 
 #if defined(_CPU_X86_64_) || defined(_CPU_X86_)
@@ -287,10 +303,35 @@ int exc_reg_is_write_fault(uintptr_t esr) {
 }
 #endif
 
+static int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx);
+
 #if defined(HAVE_MACH)
 #include "signals-mach.c"
 #else
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <link.h>
+
+typedef struct {
+    int16_t tid;
+    bt_context_t *ctx;
+    int success;
+} callback_data_t;
+static int with_dl_iterate_phdr_lock(struct dl_phdr_info *info, size_t size, void *data)
+{
+    jl_lock_profile();
+    callback_data_t *cb_data = (callback_data_t*)data;
+    cb_data->success = jl_thread_suspend_and_get_state(cb_data->tid, 1, cb_data->ctx);
+    jl_unlock_profile();
+    return 1; // only call this once
+}
 
+int jl_thread_suspend(int16_t tid, bt_context_t *ctx)
+{
+    callback_data_t cb_data = {tid, ctx, 0};
+    dl_iterate_phdr(with_dl_iterate_phdr_lock, &cb_data);
+    return cb_data.success;
+}
 
 #if defined(_OS_LINUX_) && (defined(_CPU_X86_64_) || defined(_CPU_X86_))
 int is_write_fault(void *context) {
@@ -322,6 +363,18 @@ int is_write_fault(void *context) {
     ucontext_t *ctx = (ucontext_t*)context;
     return exc_reg_is_write_fault(ctx->uc_mcontext.mc_err);
 }
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+// FreeBSD seems not to expose a means of accessing ESR via `ucontext_t` on AArch64.
+// TODO: Is there an alternative approach that can be taken? ESR may become accessible
+// in a future release though.
+int is_write_fault(void *context) {
+    return 0;
+}
+#elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
+int is_write_fault(void *context) {
+    struct sigcontext *ctx = (struct sigcontext *)context;
+    return exc_reg_is_write_fault(ctx->sc_err);
+}
 #else
 #pragma message("Implement this query for consistent PROT_NONE handling")
 int is_write_fault(void *context) {
@@ -329,17 +382,19 @@ int is_write_fault(void *context) {
 }
 #endif
 
-static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
+static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context) JL_NOTSAFEPOINT
 {
-    return (is_addr_on_sigstack(ptls, ptr) &&
+    return (ptls->signal_stack != NULL &&
+            is_addr_on_sigstack(ptls, ptr) &&
             is_addr_on_sigstack(ptls, (void*)jl_get_rsp_from_ctx(context)));
 }
 
 JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
 {
     assert(sig == SIGSEGV || sig == SIGBUS);
-    if (jl_get_safe_restore()) { // restarting jl_ or profile
-        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        jl_longjmp_in_ctx(sig, context, *saferestore);
         return;
     }
     jl_task_t *ct = jl_get_current_task();
@@ -348,10 +403,16 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
         return;
     }
     if (sig == SIGSEGV && info->si_code == SEGV_ACCERR && jl_addr_is_safepoint((uintptr_t)info->si_addr) && !is_write_fault(context)) {
-        jl_set_gc_and_wait();
+        jl_set_gc_and_wait(ct);
         // Do not raise sigint on worker thread
         if (jl_atomic_load_relaxed(&ct->tid) != 0)
             return;
+        // n.b. if the user might have seen that we were in a state where it
+        // was safe to run GC concurrently, we might briefly enter a state
+        // where our execution is not consistent with the gc_state of this
+        // thread. That will quickly be rectified when we rerun the faulting
+        // instruction and end up right back here, or we start to run the
+        // exception handler and immediately hit the safepoint there.
         if (ct->ptls->defer_signal) {
             jl_safepoint_defer_sigint();
         }
@@ -364,6 +425,7 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
     if (ct->eh == NULL)
         sigdie_handler(sig, info, context);
     if ((sig != SIGBUS || info->si_code == BUS_ADRERR) && is_addr_on_stack(ct, info->si_addr)) { // stack overflow and not a BUS_ADRALN (alignment error)
+        stack_overflow_warning();
         jl_throw_in_ctx(ct, jl_stackovf_exception, sig, context);
     }
     else if (jl_is_on_sigstack(ct->ptls, info->si_addr, context)) {
@@ -383,68 +445,91 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
     }
 }
 
-#if !defined(JL_DISABLE_LIBUNWIND)
-static unw_context_t *signal_context;
-pthread_mutex_t in_signal_lock;
-static pthread_cond_t exit_signal_cond;
-static pthread_cond_t signal_caught_cond;
+pthread_mutex_t in_signal_lock; // shared with jl_delete_thread
+static bt_context_t *usr2_signal_context; // protected by in_signal_lock
+static int exit_signal_cond = -1;
+static int signal_caught_cond = -1;
+static int signals_inflight = 0;
 
-static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t **ctx)
+static int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
 {
-    struct timespec ts;
-    clock_gettime(CLOCK_REALTIME, &ts);
-    ts.tv_sec += timeout;
+    int err;
     pthread_mutex_lock(&in_signal_lock);
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
     if (ct2 == NULL) {
         // this thread is not alive or already dead
-        *ctx = NULL;
         pthread_mutex_unlock(&in_signal_lock);
-        return;
+        return 0;
     }
-    jl_atomic_store_release(&ptls2->signal_request, 1);
-    pthread_kill(ptls2->system_id, SIGUSR2);
-    // wait for thread to acknowledge
-    int err = pthread_cond_timedwait(&signal_caught_cond, &in_signal_lock, &ts);
-    if (err == ETIMEDOUT) {
-        sig_atomic_t request = 1;
-        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
-            *ctx = NULL;
+    while (signals_inflight) {
+        // something is wrong, or there is already a usr2 in flight elsewhere
+        // try to wait for it to finish or wait for timeout
+        struct pollfd event = {signal_caught_cond, POLLIN, 0};
+        do {
+            err = poll(&event, 1, timeout * 1000);
+        } while (err == -1 && errno == EINTR);
+        if (err == -1 || (event.revents & POLLIN) == 0) {
+            // not ready after timeout: cancel this request
             pthread_mutex_unlock(&in_signal_lock);
-            return;
+            return 0;
         }
-        // Request is either now 0 (meaning the other thread is waiting for
-        //   exit_signal_cond already),
-        // Or it is now -1 (meaning the other thread
-        //   is waiting for in_signal_lock, and we need to release that lock
-        //   here for a bit, until the other thread has a chance to get to the
-        //   exit_signal_cond)
-        if (request == -1) {
-            err = pthread_cond_wait(&signal_caught_cond, &in_signal_lock);
-            assert(!err);
+        // consume it before continuing
+        eventfd_t got;
+        do {
+            err = read(signal_caught_cond, &got, sizeof(eventfd_t));
+        } while (err == -1 && errno == EINTR);
+        if (err != sizeof(eventfd_t)) abort();
+        assert(signals_inflight >= got);
+        signals_inflight -= got;
+    }
+    signals_inflight++;
+    sig_atomic_t request = jl_atomic_exchange(&ptls2->signal_request, 1);
+    assert(request == 0 || request == -1);
+    request = 1;
+    err = pthread_kill(ptls2->system_id, SIGUSR2);
+    if (err == 0) {
+        // wait for thread to acknowledge or timeout
+        struct pollfd event = {signal_caught_cond, POLLIN, 0};
+        do {
+            err = poll(&event, 1, timeout * 1000);
+        } while (err == -1 && errno == EINTR);
+        if (err != 1 || (event.revents & POLLIN) == 0)
+            err = -1;
+    }
+    if (err == -1) {
+        // not ready after timeout: try to cancel this request
+        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
+            signals_inflight--;
+            pthread_mutex_unlock(&in_signal_lock);
+            return 0;
         }
     }
+    eventfd_t got;
+    do {
+        err = read(signal_caught_cond, &got, sizeof(eventfd_t));
+    } while (err == -1 && errno == EINTR);
+    if (err != sizeof(eventfd_t)) abort();
+    assert(signals_inflight >= got);
+    signals_inflight -= got;
+    signals_inflight++;
     // Now the other thread is waiting on exit_signal_cond (verify that here by
     // checking it is 0, and add an acquire barrier for good measure)
-    int request = jl_atomic_load_acquire(&ptls2->signal_request);
-    assert(request == 0); (void) request;
-    *ctx = signal_context;
+    request = jl_atomic_load_acquire(&ptls2->signal_request);
+    assert(request == 0 || request == -1); (void) request;
+    jl_atomic_store_release(&ptls2->signal_request, 4); // prepare to resume normally, but later code may change this
+    *ctx = *usr2_signal_context;
+    return 1;
 }
 
-static void jl_thread_resume(int tid, int sig)
+void jl_thread_resume(int tid)
 {
-    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
-    jl_atomic_store_release(&ptls2->signal_request, sig == -1 ? 3 : 1);
-    pthread_cond_broadcast(&exit_signal_cond);
-    pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge
-    // The other thread is waiting to leave exit_signal_cond (verify that here by
-    // checking it is 0, and add an acquire barrier for good measure)
-    int request = jl_atomic_load_acquire(&ptls2->signal_request);
-    assert(request == 0); (void) request;
+    int err;
+    eventfd_t got = 1;
+    err = write(exit_signal_cond, &got, sizeof(eventfd_t));
+    if (err != sizeof(eventfd_t)) abort();
     pthread_mutex_unlock(&in_signal_lock);
 }
-#endif
 
 // Throw jl_interrupt_exception if the master thread is in a signal async region
 // or if SIGINT happens too often.
@@ -453,9 +538,12 @@ static void jl_try_deliver_sigint(void)
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
+    pthread_mutex_lock(&in_signal_lock);
+    signals_inflight++;
     jl_atomic_store_release(&ptls2->signal_request, 2);
     // This also makes sure `sleep` is aborted.
     pthread_kill(ptls2->system_id, SIGUSR2);
+    pthread_mutex_unlock(&in_signal_lock);
 }
 
 // Write only by signal handling thread, read only by main thread
@@ -464,7 +552,8 @@ static int thread0_exit_signo = 0;
 static void JL_NORETURN jl_exit_thread0_cb(void)
 {
 CFI_NORETURN
-    jl_critical_error(thread0_exit_signo, 0, NULL, jl_current_task);
+    jl_atomic_fetch_add(&jl_gc_disable_counter, -1);
+    jl_fprint_critical_error(ios_safe_stderr, thread0_exit_signo, 0, NULL, jl_current_task);
     jl_atexit_hook(128);
     jl_raise(thread0_exit_signo);
 }
@@ -472,14 +561,14 @@ CFI_NORETURN
 static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 {
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
-    unw_context_t *signal_context;
+    bt_context_t signal_context;
     // This also makes sure `sleep` is aborted.
-    jl_thread_suspend_and_get_state(0, 30, &signal_context);
-    if (signal_context != NULL) {
+    if (jl_thread_suspend_and_get_state(0, 30, &signal_context)) {
         thread0_exit_signo = signo;
         ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE
         memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0]));
-        jl_thread_resume(0, -1); // resume with message 3 (call jl_exit_thread0_cb)
+        jl_atomic_store_release(&ptls2->signal_request, 3);
+        jl_thread_resume(0); // resume with message 3 (call jl_exit_thread0_cb)
     }
     else {
         // thread 0 is gone? just do the exit ourself
@@ -488,12 +577,13 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 }
 
 // request:
-// -1: beginning processing [invalid outside here]
+// -1: processing
 //  0: nothing [not from here]
-//  1: get state
+//  1: get state & wait for request
 //  2: throw sigint if `!defer_signal && io_wait` or if force throw threshold
 //     is reached
 //  3: raise `thread0_exit_signo` and try to exit
+//  4: no-op
 void usr2_handler(int sig, siginfo_t *info, void *ctx)
 {
     jl_task_t *ct = jl_get_current_task();
@@ -503,26 +593,36 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
     if (ptls == NULL)
         return;
     int errno_save = errno;
-    // acknowledge that we saw the signal_request
-    sig_atomic_t request = jl_atomic_exchange(&ptls->signal_request, -1);
-#if !defined(JL_DISABLE_LIBUNWIND)
+    sig_atomic_t request = jl_atomic_load(&ptls->signal_request);
+    if (request == 0)
+        return;
+    if (!jl_atomic_cmpswap(&ptls->signal_request, &request, -1))
+        return;
     if (request == 1) {
-        pthread_mutex_lock(&in_signal_lock);
-        signal_context = jl_to_bt_context(ctx);
-        // acknowledge that we set the signal_caught_cond broadcast
-        request = jl_atomic_exchange(&ptls->signal_request, 0);
-        assert(request == -1); (void) request;
-        pthread_cond_broadcast(&signal_caught_cond);
-        pthread_cond_wait(&exit_signal_cond, &in_signal_lock);
-        request = jl_atomic_exchange(&ptls->signal_request, 0);
-        assert(request == 1 || request == 3);
-        // acknowledge that we got the resume signal
-        pthread_cond_broadcast(&signal_caught_cond);
-        pthread_mutex_unlock(&in_signal_lock);
+        usr2_signal_context = jl_to_bt_context(ctx);
+        // acknowledge that we saw the signal_request and set usr2_signal_context
+        int err;
+        eventfd_t got = 1;
+        err = write(signal_caught_cond, &got, sizeof(eventfd_t));
+        if (err != sizeof(eventfd_t)) abort();
+        sig_atomic_t processing = -1;
+        jl_atomic_cmpswap(&ptls->signal_request, &processing, 0);
+        // wait for exit signal
+        do {
+            err = read(exit_signal_cond, &got, sizeof(eventfd_t));
+        } while (err == -1 && errno == EINTR);
+        if (err != sizeof(eventfd_t)) abort();
+        assert(got == 1);
+        request = jl_atomic_exchange(&ptls->signal_request, -1);
+        usr2_signal_context = NULL;
+        assert(request == 2 || request == 3 || request == 4);
     }
-    else
-#endif
-    jl_atomic_exchange(&ptls->signal_request, 0); // returns -1
+    int err;
+    eventfd_t got = 1;
+    err = write(signal_caught_cond, &got, sizeof(eventfd_t));
+    if (err != sizeof(eventfd_t)) abort();
+    sig_atomic_t processing = -1;
+    jl_atomic_cmpswap(&ptls->signal_request, &processing, 0);
     if (request == 2) {
         int force = jl_check_force_sigint();
         if (force || (!ptls->defer_signal && ptls->io_wait)) {
@@ -531,7 +631,11 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
                 jl_safe_printf("WARNING: Force throwing a SIGINT\n");
             // Force a throw
             jl_clear_force_sigint();
-            jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
+            jl_jmp_buf *saferestore = jl_get_safe_restore();
+            if (saferestore) // restarting jl_ or profile
+                jl_longjmp_in_ctx(sig, ctx, *saferestore);
+            else
+                jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
         }
     }
     else if (request == 3) {
@@ -558,7 +662,7 @@ int timer_graceperiod_elapsed(void)
 static timer_t timerprof;
 static struct itimerspec itsprof;
 
-JL_DLLEXPORT int jl_profile_start_timer(void)
+JL_DLLEXPORT int jl_profile_start_timer(uint8_t all_tasks)
 {
     struct sigevent sigprof;
 
@@ -567,10 +671,12 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
     sigprof.sigev_notify = SIGEV_SIGNAL;
     sigprof.sigev_signo = SIGUSR1;
     sigprof.sigev_value.sival_ptr = &timerprof;
-    // Because SIGUSR1 is multipurpose, set `running` before so that we know that the first SIGUSR1 came from the timer
-    running = 1;
+    // Because SIGUSR1 is multipurpose, set `profile_running` before so that we know that the first SIGUSR1 came from the timer
+    profile_running = 1;
+    profile_all_tasks = all_tasks;
     if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1) {
-        running = 0;
+        profile_running = 0;
+        profile_all_tasks = 0;
         return -2;
     }
 
@@ -580,7 +686,8 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
     itsprof.it_value.tv_sec = nsecprof / GIGA;
     itsprof.it_value.tv_nsec = nsecprof % GIGA;
     if (timer_settime(timerprof, 0, &itsprof, NULL) == -1) {
-        running = 0;
+        profile_running = 0;
+        profile_all_tasks = 0;
         return -3;
     }
     return 0;
@@ -588,11 +695,24 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
 
 JL_DLLEXPORT void jl_profile_stop_timer(void)
 {
-    if (running) {
+    uv_mutex_lock(&bt_data_prof_lock);
+    if (profile_running) {
         timer_delete(timerprof);
         last_timer_delete_time = jl_hrtime();
-        running = 0;
+        profile_running = 0;
     }
+    uv_mutex_unlock(&bt_data_prof_lock);
+}
+
+#elif defined(__OpenBSD__)
+
+JL_DLLEXPORT int jl_profile_start_timer(void)
+{
+    return -1;
+}
+
+JL_DLLEXPORT void jl_profile_stop_timer(void)
+{
 }
 
 #else
@@ -618,30 +738,41 @@ static void allocate_segv_handler(void)
     }
 }
 
-static void *alloc_sigstack(size_t *ssize)
-{
-    void *stk = jl_malloc_stack(ssize, NULL);
-    if (stk == NULL)
-        jl_errorf("fatal error allocating signal stack: mmap: %s", strerror(errno));
-    return stk;
-}
-
 void jl_install_thread_signal_handler(jl_ptls_t ptls)
 {
-    size_t ssize = sig_stack_size;
-    void *signal_stack = alloc_sigstack(&ssize);
-    ptls->signal_stack = signal_stack;
+#ifdef HAVE_MACH
+    attach_exception_port(pthread_mach_thread_np(ptls->system_id), 0);
+#endif
     stack_t ss;
-    ss.ss_flags = 0;
-    ss.ss_size = ssize - 16;
-    ss.ss_sp = signal_stack;
-    if (sigaltstack(&ss, NULL) < 0) {
+    if (sigaltstack(NULL, &ss) < 0)
         jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
+    if ((ss.ss_flags & SS_DISABLE) != SS_DISABLE)
+        return; // someone else appears to have already set this up, so just use that
+    size_t ssize = sig_stack_size;
+    void *signal_stack = jl_malloc_stack(&ssize, NULL);
+    ss.ss_flags = 0;
+    ss.ss_size = ssize;
+    assert(ssize != 0);
+
+#ifndef _OS_OPENBSD_
+    /* fallback to malloc(), but it isn't possible on OpenBSD */
+    if (signal_stack == NULL) {
+        signal_stack = malloc(ssize);
+        ssize = 0;
+        if (signal_stack == NULL)
+            jl_safe_printf("\nwarning: julia signal alt stack could not be allocated (StackOverflowError will be fatal on this thread).\n");
+        else
+            jl_safe_printf("\nwarning: julia signal stack allocated without guard page (launch foreign threads earlier to avoid this warning).\n");
     }
-
-#ifdef HAVE_MACH
-    attach_exception_port(pthread_mach_thread_np(ptls->system_id), 0);
 #endif
+
+    if (signal_stack != NULL) {
+        ss.ss_sp = signal_stack;
+        if (sigaltstack(&ss, NULL) < 0)
+            jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
+        ptls->signal_stack = signal_stack;
+        ptls->signal_stack_size = ssize;
+    }
 }
 
 const static int sigwait_sigs[] = {
@@ -685,9 +816,10 @@ static void kqueue_signal(int *sigqueue, struct kevent *ev, int sig)
 void trigger_profile_peek(void)
 {
     jl_safe_printf("\n======================================================================================\n");
-    jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile\n", profile_peek_duration);
+    jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile.\n", profile_peek_duration);
+    jl_safe_printf("--trace-compile is enabled during profile collection.\n");
     jl_safe_printf("======================================================================================\n");
-    if (bt_size_max == 0){
+    if (profile_bt_size_max == 0) {
         // If the buffer hasn't been initialized, initialize with default size
         // Keep these values synchronized with Profile.default_init()
         if (jl_profile_init(10000000, 1000000) == -1) {
@@ -695,17 +827,100 @@ void trigger_profile_peek(void)
             return;
         }
     }
-    bt_size_cur = 0; // clear profile buffer
-    if (jl_profile_start_timer() < 0)
+    profile_bt_size_cur = 0; // clear profile buffer
+    if (jl_profile_start_timer(0) < 0)
         jl_safe_printf("ERROR: Could not start profile timer\n");
     else
         profile_autostop_time = jl_hrtime() + (profile_peek_duration * 1e9);
 }
 
+#if !defined(JL_DISABLE_LIBUNWIND)
+
+static jl_bt_element_t signal_bt_data[JL_MAX_BT_SIZE + 1];
+static size_t signal_bt_size = 0;
+static void do_critical_profile(void)
+{
+    bt_context_t signal_context;
+    // sample each thread, round-robin style in reverse order
+    // (so that thread zero gets notified last)
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    for (int i = nthreads; i-- > 0; ) {
+        // notify thread to stop
+        if (!jl_thread_suspend(i, &signal_context))
+            continue;
+
+        // do backtrace on thread contexts for critical signals
+        // this part must be signal-handler safe
+        signal_bt_size += rec_backtrace_ctx(signal_bt_data + signal_bt_size,
+                JL_MAX_BT_SIZE / nthreads - 1,
+                &signal_context, NULL);
+        signal_bt_data[signal_bt_size++].uintptr = 0;
+        jl_thread_resume(i);
+    }
+}
+
+static void do_profile(void)
+{
+    bt_context_t signal_context;
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    int *randperm = profile_get_randperm(nthreads);
+    for (int idx = nthreads; idx-- > 0; ) {
+        // Stop the threads in the random order.
+        int tid = randperm[idx];
+        // do backtrace for profiler
+        if (!profile_running)
+            return;
+        if (jl_profile_is_buffer_full()) {
+            // Buffer full: Delete the timer
+            jl_profile_stop_timer();
+            return;
+        }
+        // notify thread to stop
+        if (!jl_thread_suspend(tid, &signal_context))
+            return;
+        // unwinding can fail, so keep track of the current state
+        // and restore from the SEGV handler if anything happens.
+        jl_jmp_buf *old_buf = jl_get_safe_restore();
+        jl_jmp_buf buf;
+
+        jl_set_safe_restore(&buf);
+        if (jl_setjmp(buf, 0)) {
+            jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
+        }
+        else {
+            // Get backtrace data
+            profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur,
+                    profile_bt_size_max - profile_bt_size_cur - 1, &signal_context, NULL);
+        }
+        jl_set_safe_restore(old_buf);
+
+        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+
+        // store threadid but add 1 as 0 is preserved to indicate end of block
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = ptls2->tid + 1;
+
+        // store task id (never null)
+        profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
+
+        // store cpu cycle clock
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();
+
+        // store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block)
+        int state = jl_atomic_load_relaxed(&ptls2->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING;
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = state;
+
+        // Mark the end of this block with two 0's
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+
+        // notify thread to resume
+        jl_thread_resume(tid);
+    }
+}
+#endif
+
 static void *signal_listener(void *arg)
 {
-    static jl_bt_element_t bt_data[JL_MAX_BT_SIZE + 1];
-    static size_t bt_size = 0;
     sigset_t sset;
     int sig, critical, profile;
     jl_sigsetset(&sset);
@@ -801,13 +1016,13 @@ static void *signal_listener(void *arg)
         int doexit = critical;
 #ifdef SIGINFO
         if (sig == SIGINFO) {
-            if (running != 1)
+            if (profile_running != 1)
                 trigger_profile_peek();
             doexit = 0;
         }
 #else
         if (sig == SIGUSR1) {
-            if (running != 1 && timer_graceperiod_elapsed())
+            if (profile_running != 1 && timer_graceperiod_elapsed())
                 trigger_profile_peek();
             doexit = 0;
         }
@@ -837,83 +1052,22 @@ static void *signal_listener(void *arg)
             }
         }
 
-        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
-        bt_size = 0;
+        signal_bt_size = 0;
 #if !defined(JL_DISABLE_LIBUNWIND)
-        unw_context_t *signal_context;
-        // sample each thread, round-robin style in reverse order
-        // (so that thread zero gets notified last)
-        if (critical || profile) {
-            jl_lock_profile();
-            int *randperm;
-            if (profile)
-                 randperm = profile_get_randperm(nthreads);
-            for (int idx = nthreads; idx-- > 0; ) {
-                // Stop the threads in the random or reverse round-robin order.
-                int i = profile ? randperm[idx] : idx;
-                // notify thread to stop
-                jl_thread_suspend_and_get_state(i, 1, &signal_context);
-                if (signal_context == NULL)
-                    continue;
-
-                // do backtrace on thread contexts for critical signals
-                // this part must be signal-handler safe
-                if (critical) {
-                    bt_size += rec_backtrace_ctx(bt_data + bt_size,
-                            JL_MAX_BT_SIZE / nthreads - 1,
-                            signal_context, NULL);
-                    bt_data[bt_size++].uintptr = 0;
-                }
-
-                // do backtrace for profiler
-                if (profile && running) {
-                    if (jl_profile_is_buffer_full()) {
-                        // Buffer full: Delete the timer
-                        jl_profile_stop_timer();
-                    }
-                    else {
-                        // unwinding can fail, so keep track of the current state
-                        // and restore from the SEGV handler if anything happens.
-                        jl_jmp_buf *old_buf = jl_get_safe_restore();
-                        jl_jmp_buf buf;
-
-                        jl_set_safe_restore(&buf);
-                        if (jl_setjmp(buf, 0)) {
-                            jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
-                        } else {
-                            // Get backtrace data
-                            bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
-                                    bt_size_max - bt_size_cur - 1, signal_context, NULL);
-                        }
-                        jl_set_safe_restore(old_buf);
-
-                        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
-
-                        // store threadid but add 1 as 0 is preserved to indicate end of block
-                        bt_data_prof[bt_size_cur++].uintptr = ptls2->tid + 1;
-
-                        // store task id (never null)
-                        bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
-
-                        // store cpu cycle clock
-                        bt_data_prof[bt_size_cur++].uintptr = cycleclock();
-
-                        // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
-                        bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls2->sleep_check_state) + 1;
-
-                        // Mark the end of this block with two 0's
-                        bt_data_prof[bt_size_cur++].uintptr = 0;
-                        bt_data_prof[bt_size_cur++].uintptr = 0;
-                    }
-                }
-
-                // notify thread to resume
-                jl_thread_resume(i, sig);
+        if (critical) {
+            do_critical_profile();
+        }
+        else if (profile) {
+            if (profile_all_tasks) {
+                // Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace`
+                jl_profile_task();
+            }
+            else {
+                do_profile();
             }
-            jl_unlock_profile();
         }
 #ifndef HAVE_MACH
-        if (profile && running) {
+        if (profile_running) {
             jl_check_profile_autostop();
 #if defined(HAVE_TIMER)
             timer_settime(timerprof, 0, &itsprof, NULL);
@@ -930,24 +1084,32 @@ static void *signal_listener(void *arg)
 //#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L && !HAVE_KEVENT
 //            si_code = info.si_code;
 //#endif
-            jl_exit_thread0(sig, bt_data, bt_size);
+            // Let's forbid threads from running GC while we're trying to exit,
+            // also let's make sure we're not in the middle of GC.
+            jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
+            jl_safepoint_wait_gc(NULL);
+            jl_exit_thread0(sig, signal_bt_data, signal_bt_size);
         }
         else if (critical) {
             // critical in this case actually means SIGINFO request
 #ifndef SIGINFO // SIGINFO already prints something similar automatically
-            int nrunning = 0;
+            int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+            int n_threads_running = 0;
             for (int idx = nthreads; idx-- > 0; ) {
                 jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[idx];
-                nrunning += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
+                n_threads_running += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
             }
-            jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), nrunning, nthreads);
+            jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), n_threads_running, nthreads);
 #endif
 
             jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
             size_t i;
-            for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
-                jl_print_bt_entry_codeloc(bt_data + i);
+            for (i = 0; i < signal_bt_size; i += jl_bt_entry_size(signal_bt_data + i)) {
+                jl_fprint_bt_entry_codeloc(ios_safe_stderr, signal_bt_data + i);
             }
+            jl_safe_printf("\n");
+            // Enable trace compilation to stderr with timing during profile collection
+            jl_force_trace_compile_timing_enable();
         }
     }
     return NULL;
@@ -962,10 +1124,12 @@ void restore_signals(void)
     jl_sigsetset(&sset);
     pthread_sigmask(SIG_SETMASK, &sset, 0);
 
-#if !defined(HAVE_MACH) && !defined(JL_DISABLE_LIBUNWIND)
+#if !defined(HAVE_MACH)
+    exit_signal_cond = eventfd(0, EFD_CLOEXEC);
+    signal_caught_cond = eventfd(0, EFD_CLOEXEC);
     if (pthread_mutex_init(&in_signal_lock, NULL) != 0 ||
-        pthread_cond_init(&exit_signal_cond, NULL) != 0 ||
-        pthread_cond_init(&signal_caught_cond, NULL) != 0) {
+            exit_signal_cond == -1 ||
+            signal_caught_cond == -1) {
         jl_error("SIGUSR pthread init failed");
     }
 #endif
@@ -978,8 +1142,9 @@ void restore_signals(void)
 static void fpe_handler(int sig, siginfo_t *info, void *context)
 {
     (void)info;
-    if (jl_get_safe_restore()) { // restarting jl_ or profile
-        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        jl_longjmp_in_ctx(sig, context, *saferestore);
         return;
     }
     jl_task_t *ct = jl_get_current_task();
@@ -989,21 +1154,63 @@ static void fpe_handler(int sig, siginfo_t *info, void *context)
         jl_throw_in_ctx(ct, jl_diverror_exception, sig, context);
 }
 
+static void jl_longjmp_in_ctx(int sig, void *_ctx, jl_jmp_buf jmpbuf)
+{
+#if defined(_OS_DARWIN_)
+    jl_longjmp_in_state((host_thread_state_t*)jl_to_bt_context(_ctx), jmpbuf);
+#else
+    if (jl_simulate_longjmp(jmpbuf, jl_to_bt_context(_ctx)))
+        return;
+    sigset_t sset;
+    sigemptyset(&sset);
+    sigaddset(&sset, sig);
+    pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
+    jl_longjmp(jmpbuf, 1);
+#endif
+}
+
 static void sigint_handler(int sig)
 {
     jl_sigint_passed = 1;
 }
 
+#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
+static void sigtrap_handler(int sig, siginfo_t *info, void *context)
+{
+    uintptr_t pc = ((ucontext_t*)context)->uc_mcontext->__ss.__pc; // TODO: Do this in linux as well
+    uint32_t* code = (uint32_t*)(pc);                              // https://gcc.gnu.org/legacy-ml/gcc-patches/2013-11/msg02228.html
+    if (*code == 0xd4200020) { // brk #0x1 which is what LLVM defines as trap
+        signal(sig, SIG_DFL);
+        sig = SIGILL; // redefine this as as an "unreachable reached" error message
+        sigdie_handler(sig, info, context);
+    }
+}
+#endif
+
 void jl_install_default_signal_handlers(void)
 {
     struct sigaction actf;
     memset(&actf, 0, sizeof(struct sigaction));
     sigemptyset(&actf.sa_mask);
     actf.sa_sigaction = fpe_handler;
-    actf.sa_flags = SA_ONSTACK | SA_SIGINFO;
+    actf.sa_flags = SA_SIGINFO;
     if (sigaction(SIGFPE, &actf, NULL) < 0) {
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
     }
+#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
+    struct sigaction acttrap;
+    memset(&acttrap, 0, sizeof(struct sigaction));
+    sigemptyset(&acttrap.sa_mask);
+    acttrap.sa_sigaction = sigtrap_handler;
+    acttrap.sa_flags = SA_SIGINFO;
+    if (sigaction(SIGTRAP, &acttrap, NULL) < 0) {
+        jl_errorf("fatal error: sigaction: %s", strerror(errno));
+    }
+#else
+    if (signal(SIGTRAP, SIG_IGN) == SIG_ERR) {
+        jl_error("fatal error: Couldn't set SIGTRAP");
+    }
+#endif
     struct sigaction actint;
     memset(&actint, 0, sizeof(struct sigaction));
     sigemptyset(&actint.sa_mask);
@@ -1015,9 +1222,6 @@ void jl_install_default_signal_handlers(void)
     if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
         jl_error("fatal error: Couldn't set SIGPIPE");
     }
-    if (signal(SIGTRAP, SIG_IGN) == SIG_ERR) {
-        jl_error("fatal error: Couldn't set SIGTRAP");
-    }
 
 #if defined(HAVE_MACH)
     allocate_mach_handler();
@@ -1026,7 +1230,7 @@ void jl_install_default_signal_handlers(void)
     memset(&act, 0, sizeof(struct sigaction));
     sigemptyset(&act.sa_mask);
     act.sa_sigaction = usr2_handler;
-    act.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTART;
+    act.sa_flags = SA_SIGINFO | SA_RESTART;
     if (sigaction(SIGUSR2, &act, NULL) < 0) {
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
     }
diff --git a/src/signals-win.c b/src/signals-win.c
index 5dd6b34558ca6..1702bc338b16f 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -4,7 +4,12 @@
 // Note that this file is `#include`d by "signal-handling.c"
 #include <mmsystem.h> // hidden by LEAN_AND_MEAN
 
-#define sig_stack_size 131072 // 128k reserved for SEGV handling
+// Loader lock functions from ntdll
+// See https://devblogs.microsoft.com/oldnewthing/20140808-00/?p=293
+extern NTSTATUS NTAPI LdrLockLoaderLock(ULONG Flags, ULONG *State, ULONG_PTR *Cookie);
+extern NTSTATUS NTAPI LdrUnlockLoaderLock(ULONG Flags, ULONG_PTR Cookie);
+
+static const size_t sig_stack_size = 131072; // 128k reserved for backtrace_fiber for stack overflow handling
 
 // Copied from MINGW_FLOAT_H which may not be found due to a collision with the builtin gcc float.h
 // eventually we can probably integrate this into OpenLibm.
@@ -86,15 +91,43 @@ void __cdecl crt_sig_handler(int sig, int num)
         }
         break;
     default: // SIGSEGV, SIGTERM, SIGILL, SIGABRT
-        if (sig == SIGSEGV && jl_get_safe_restore()) {
-            signal(sig, (void (__cdecl *)(int))crt_sig_handler);
-            jl_sig_throw();
+        if (sig == SIGSEGV) { // restarting jl_ or profile
+            jl_jmp_buf *saferestore = jl_get_safe_restore();
+            if (saferestore) {
+                signal(sig, (void (__cdecl *)(int))crt_sig_handler);
+                jl_longjmp(*saferestore, 1);
+                return;
+            }
         }
         memset(&Context, 0, sizeof(Context));
         RtlCaptureContext(&Context);
+
+        ios_t s;
+        ios_mem(&s, 0);
         if (sig == SIGILL)
-            jl_show_sigill(&Context);
-        jl_critical_error(sig, 0, &Context, jl_get_current_task());
+            jl_fprint_sigill(&s, &Context);
+        jl_fprint_critical_error(&s, sig, 0, &Context, jl_get_current_task());
+
+        // First write to stderr
+        ios_write_direct(ios_safe_stderr, &s);
+
+        // Then write to Application log
+        HANDLE event_source = RegisterEventSourceW(NULL, L"julia");
+        if (event_source != INVALID_HANDLE_VALUE) {
+            ios_putc('\0', &s);
+            const wchar_t *strings[] = { ios_utf8_to_wchar(s.buf) };
+            ReportEventW(
+                event_source, EVENTLOG_ERROR_TYPE, /* category */ 0, /* event_id */ (DWORD)0xE0000000L,
+               /* user_sid */ NULL, /* n_strings */ 1, /* data_size */ 0, strings, /* data */ NULL
+            );
+            free((void *)strings[0]);
+
+            if (jl_options.alert_on_critical_error) {
+                MessageBoxW(NULL, /* message */ L"error: libjulia received a fatal signal.\n\n"
+                                                L"See Application log in Event Viewer for more information.",
+                            /* title */ L"fatal error in libjulia", MB_OK | MB_ICONEXCLAMATION | MB_SYSTEMMODAL);
+            }
+        }
         raise(sig);
     }
 }
@@ -109,6 +142,8 @@ static jl_ptls_t stkerror_ptls;
 static int have_backtrace_fiber;
 static void JL_NORETURN start_backtrace_fiber(void)
 {
+    // print the warning (this mysteriously needs a lot of stack for the WriteFile syscall)
+    stack_overflow_warning();
     // collect the backtrace
     stkerror_ptls->bt_size =
         rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx,
@@ -124,42 +159,41 @@ void restore_signals(void)
     SetConsoleCtrlHandler(NULL, 0);
 }
 
-void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c);
+
+static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *excpt, PCONTEXT ctxThread)
 {
-    jl_task_t *ct = jl_current_task;
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        if (!jl_simulate_longjmp(*saferestore, ctxThread))
+            abort();
+        return;
+    }
+    assert(ct && excpt);
     jl_ptls_t ptls = ct->ptls;
-#if defined(_CPU_X86_64_)
-    DWORD64 Rsp = (ctxThread->Rsp & (DWORD64)-16) - 8;
-#elif defined(_CPU_X86_)
-    DWORD32 Esp = (ctxThread->Esp & (DWORD32)-16) - 4;
-#else
-#error WIN16 not supported :P
-#endif
-    if (!jl_get_safe_restore()) {
-        assert(excpt != NULL);
-        ptls->bt_size = 0;
-        if (excpt != jl_stackovf_exception) {
-            ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
-                                              ct->gcstack);
-        }
-        else if (have_backtrace_fiber) {
-            uv_mutex_lock(&backtrace_lock);
-            stkerror_ctx = ctxThread;
-            stkerror_ptls = ptls;
-            jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
-            uv_mutex_unlock(&backtrace_lock);
-        }
-        ptls->sig_exception = excpt;
-    }
-#if defined(_CPU_X86_64_)
-    *(DWORD64*)Rsp = 0;
-    ctxThread->Rsp = Rsp;
-    ctxThread->Rip = (DWORD64)&jl_sig_throw;
-#elif defined(_CPU_X86_)
-    *(DWORD32*)Esp = 0;
-    ctxThread->Esp = Esp;
-    ctxThread->Eip = (DWORD)&jl_sig_throw;
-#endif
+    ptls->bt_size = 0;
+    if (excpt != jl_stackovf_exception) {
+        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
+                                          ct->gcstack);
+    }
+    else if (have_backtrace_fiber) {
+        uv_mutex_lock(&backtrace_lock);
+        stkerror_ctx = ctxThread;
+        stkerror_ptls = ptls;
+        jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
+        uv_mutex_unlock(&backtrace_lock);
+    }
+    ptls->sig_exception = excpt;
+    ptls->io_wait = 0;
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        if (!jl_simulate_longjmp(eh->eh_ctx, ctxThread))
+            abort();
+    }
+    else {
+        jl_no_exc_handler(excpt, ct);
+    }
 }
 
 HANDLE hMainThread = INVALID_HANDLE_VALUE;
@@ -193,7 +227,8 @@ static void jl_try_deliver_sigint(void)
             jl_safe_printf("error: GetThreadContext failed\n");
             return;
         }
-        jl_throw_in_ctx(jl_interrupt_exception, &ctxThread);
+        jl_task_t *ct = jl_atomic_load_relaxed(&ptls2->current_task);
+        jl_throw_in_ctx(ct, jl_interrupt_exception, &ctxThread);
         ctxThread.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
         if (!SetThreadContext(hMainThread, &ctxThread)) {
             jl_safe_printf("error: SetThreadContext failed\n");
@@ -237,20 +272,20 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
         case EXCEPTION_INT_DIVIDE_BY_ZERO:
             if (ct->eh != NULL) {
                 fpreset();
-                jl_throw_in_ctx(jl_diverror_exception, ExceptionInfo->ContextRecord);
+                jl_throw_in_ctx(ct, jl_diverror_exception, ExceptionInfo->ContextRecord);
                 return EXCEPTION_CONTINUE_EXECUTION;
             }
             break;
         case EXCEPTION_STACK_OVERFLOW:
             if (ct->eh != NULL) {
                 ptls->needs_resetstkoflw = 1;
-                jl_throw_in_ctx(jl_stackovf_exception, ExceptionInfo->ContextRecord);
+                jl_throw_in_ctx(ct, jl_stackovf_exception, ExceptionInfo->ContextRecord);
                 return EXCEPTION_CONTINUE_EXECUTION;
             }
             break;
         case EXCEPTION_ACCESS_VIOLATION:
             if (jl_addr_is_safepoint(ExceptionInfo->ExceptionRecord->ExceptionInformation[1])) {
-                jl_set_gc_and_wait();
+                jl_set_gc_and_wait(ct);
                 // Do not raise sigint on worker thread
                 if (ptls->tid != 0)
                     return EXCEPTION_CONTINUE_EXECUTION;
@@ -259,17 +294,17 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
                 }
                 else if (jl_safepoint_consume_sigint()) {
                     jl_clear_force_sigint();
-                    jl_throw_in_ctx(jl_interrupt_exception, ExceptionInfo->ContextRecord);
+                    jl_throw_in_ctx(ct, jl_interrupt_exception, ExceptionInfo->ContextRecord);
                 }
                 return EXCEPTION_CONTINUE_EXECUTION;
             }
             if (jl_get_safe_restore()) {
-                jl_throw_in_ctx(NULL, ExceptionInfo->ContextRecord);
+                jl_throw_in_ctx(NULL, NULL, ExceptionInfo->ContextRecord);
                 return EXCEPTION_CONTINUE_EXECUTION;
             }
             if (ct->eh != NULL) {
                 if (ExceptionInfo->ExceptionRecord->ExceptionInformation[0] == 1) { // writing to read-only memory (e.g. mmap)
-                    jl_throw_in_ctx(jl_readonlymemory_exception, ExceptionInfo->ContextRecord);
+                    jl_throw_in_ctx(ct, jl_readonlymemory_exception, ExceptionInfo->ContextRecord);
                     return EXCEPTION_CONTINUE_EXECUTION;
                 }
             }
@@ -277,59 +312,91 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
             break;
         }
     }
+    ios_t full_error, summary;
+    ios_mem(&full_error, 0);
     if (ExceptionInfo->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
-        jl_safe_printf("\n");
-        jl_show_sigill(ExceptionInfo->ContextRecord);
+        jl_safe_fprintf(&full_error, "\n");
+        jl_fprint_sigill(&full_error, ExceptionInfo->ContextRecord);
     }
-    jl_safe_printf("\nPlease submit a bug report with steps to reproduce this fault, and any error messages that follow (in their entirety). Thanks.\nException: ");
+    jl_safe_fprintf(&full_error, "\nPlease submit a bug report with steps to reproduce this fault, and any error messages that follow (in their entirety). Thanks.\n");
+    ios_mem(&summary, 128);
+    jl_safe_fprintf(&summary, "Exception: ");
     switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
     case EXCEPTION_ACCESS_VIOLATION:
-        jl_safe_printf("EXCEPTION_ACCESS_VIOLATION"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_ACCESS_VIOLATION"); break;
     case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
-        jl_safe_printf("EXCEPTION_ARRAY_BOUNDS_EXCEEDED"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_ARRAY_BOUNDS_EXCEEDED"); break;
     case EXCEPTION_BREAKPOINT:
-        jl_safe_printf("EXCEPTION_BREAKPOINT"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_BREAKPOINT"); break;
     case EXCEPTION_DATATYPE_MISALIGNMENT:
-        jl_safe_printf("EXCEPTION_DATATYPE_MISALIGNMENT"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_DATATYPE_MISALIGNMENT"); break;
     case EXCEPTION_FLT_DENORMAL_OPERAND:
-        jl_safe_printf("EXCEPTION_FLT_DENORMAL_OPERAND"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_FLT_DENORMAL_OPERAND"); break;
     case EXCEPTION_FLT_DIVIDE_BY_ZERO:
-        jl_safe_printf("EXCEPTION_FLT_DIVIDE_BY_ZERO"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_FLT_DIVIDE_BY_ZERO"); break;
     case EXCEPTION_FLT_INEXACT_RESULT:
-        jl_safe_printf("EXCEPTION_FLT_INEXACT_RESULT"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_FLT_INEXACT_RESULT"); break;
     case EXCEPTION_FLT_INVALID_OPERATION:
-        jl_safe_printf("EXCEPTION_FLT_INVALID_OPERATION"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_FLT_INVALID_OPERATION"); break;
     case EXCEPTION_FLT_OVERFLOW:
-        jl_safe_printf("EXCEPTION_FLT_OVERFLOW"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_FLT_OVERFLOW"); break;
     case EXCEPTION_FLT_STACK_CHECK:
-        jl_safe_printf("EXCEPTION_FLT_STACK_CHECK"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_FLT_STACK_CHECK"); break;
     case EXCEPTION_FLT_UNDERFLOW:
-        jl_safe_printf("EXCEPTION_FLT_UNDERFLOW"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_FLT_UNDERFLOW"); break;
     case EXCEPTION_ILLEGAL_INSTRUCTION:
-        jl_safe_printf("EXCEPTION_ILLEGAL_INSTRUCTION"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_ILLEGAL_INSTRUCTION"); break;
     case EXCEPTION_IN_PAGE_ERROR:
-        jl_safe_printf("EXCEPTION_IN_PAGE_ERROR"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_IN_PAGE_ERROR"); break;
     case EXCEPTION_INT_DIVIDE_BY_ZERO:
-        jl_safe_printf("EXCEPTION_INT_DIVIDE_BY_ZERO"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_INT_DIVIDE_BY_ZERO"); break;
     case EXCEPTION_INT_OVERFLOW:
-        jl_safe_printf("EXCEPTION_INT_OVERFLOW"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_INT_OVERFLOW"); break;
     case EXCEPTION_INVALID_DISPOSITION:
-        jl_safe_printf("EXCEPTION_INVALID_DISPOSITION"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_INVALID_DISPOSITION"); break;
     case EXCEPTION_NONCONTINUABLE_EXCEPTION:
-        jl_safe_printf("EXCEPTION_NONCONTINUABLE_EXCEPTION"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_NONCONTINUABLE_EXCEPTION"); break;
     case EXCEPTION_PRIV_INSTRUCTION:
-        jl_safe_printf("EXCEPTION_PRIV_INSTRUCTION"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_PRIV_INSTRUCTION"); break;
     case EXCEPTION_SINGLE_STEP:
-        jl_safe_printf("EXCEPTION_SINGLE_STEP"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_SINGLE_STEP"); break;
     case EXCEPTION_STACK_OVERFLOW:
-        jl_safe_printf("EXCEPTION_STACK_OVERFLOW"); break;
+        jl_safe_fprintf(&summary, "EXCEPTION_STACK_OVERFLOW"); break;
     default:
-        jl_safe_printf("UNKNOWN"); break;
+        jl_safe_fprintf(&summary, "UNKNOWN"); break;
+    }
+    jl_safe_fprintf(&summary, " at 0x%zx -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
+    jl_fprint_native_codeloc(&summary, (uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
+    ios_write(&full_error, summary.buf, ios_pos(&summary));
+    ios_puts("\nSee Application log in Event Viewer for more information.\n", &summary);
+
+    jl_fprint_critical_error(&full_error, 0, 0, ExceptionInfo->ContextRecord, ct);
+
+    // First print to STDERR
+    ios_write_direct(ios_safe_stderr, &full_error);
+
+    // Secondly print to Application log
+    HANDLE event_source = RegisterEventSourceW(NULL, L"julia");
+    if (event_source != INVALID_HANDLE_VALUE) {
+        ios_putc('\0', &full_error);
+        const wchar_t *strings[] = { ios_utf8_to_wchar(full_error.buf) };
+        ReportEventW(
+            event_source, EVENTLOG_ERROR_TYPE, /* category */ 0, /* event_id */ (DWORD)0xE0000000L,
+           /* user_sid */ NULL, /* n_strings */ 1, /* data_size */ 0, strings, /* data */ NULL
+        );
+        free((void *)strings[0]);
+
+        if (jl_options.alert_on_critical_error) {
+            ios_putc('\0', &summary);
+            const wchar_t *message = ios_utf8_to_wchar(summary.buf);
+            MessageBoxW(NULL, message, /* title */ L"fatal error in libjulia",
+                        MB_OK | MB_ICONEXCLAMATION | MB_SYSTEMMODAL);
+            free((void *)message);
+        }
     }
-    jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
-    jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
 
-    jl_critical_error(0, 0, ExceptionInfo->ContextRecord, ct);
+    ios_close(&summary);
+    ios_close(&full_error);
     static int recursion = 0;
     if (recursion++)
         exit(1);
@@ -342,85 +409,184 @@ JL_DLLEXPORT void jl_install_sigint_handler(void)
     SetConsoleCtrlHandler((PHANDLER_ROUTINE)sigint_handler,1);
 }
 
-static volatile HANDLE hBtThread = 0;
+static TIMECAPS timecaps;
+static HANDLE hBtThread = 0;
+static uv_cond_t bt_data_prof_cond = CONDITION_VARIABLE_INIT;
+
+#ifdef _CPU_X86_64_
+// Callback data structure for profile timeout
+typedef struct {
+    _Atomic(int) *abort_ptr;
+    int tid;
+} profile_timeout_data_t;
+
+static void CALLBACK profile_timeout_cb(PVOID lpParam, BOOLEAN TimerOrWaitFired)
+{
+    profile_timeout_data_t *data = (profile_timeout_data_t*)lpParam;
+    if (TimerOrWaitFired && data != NULL && data->abort_ptr != NULL) {
+        // Timeout reached, signal an abort should occur
+        // jl_safe_fprintf(ios_safe_stderr, "profile_timeout_cb called.\n");
+        if (jl_atomic_exchange(data->abort_ptr, 2) == 1) {
+            // jl_safe_fprintf(ios_safe_stderr, "profile_timeout_cb jl_thread_resume.\n");
+            jl_thread_resume(data->tid);
+            data->tid = -1;
+        }
+    }
+}
+#endif
+
+static int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
+{
+    (void)timeout;
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL) // this thread is not alive
+        return 0;
+    jl_task_t *ct2 = jl_atomic_load_relaxed(&ptls2->current_task);
+    if (ct2 == NULL) // this thread is already dead
+        return 0;
+    HANDLE hThread = ptls2->system_id;
+    assert(GetCurrentThreadId() != GetThreadId(hThread));
+    if ((DWORD)-1 == SuspendThread(hThread)) {
+        // jl_safe_fprintf(ios_safe_stderr, "failed to suspend thread %d: %lu\n", tid, GetLastError());
+        return 0;
+    }
+    assert(sizeof(*ctx) == sizeof(CONTEXT));
+    memset(ctx, 0, sizeof(CONTEXT));
+    ctx->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+    if (!GetThreadContext(hThread, ctx)) {
+        if ((DWORD)-1 == ResumeThread(hThread))
+            abort();
+        return 0;
+    }
+    return 1;
+}
+
+void jl_thread_resume(int tid)
+{
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    HANDLE hThread = ptls2->system_id;
+    if ((DWORD)-1 == ResumeThread(hThread)) {
+        jl_safe_fprintf(ios_safe_stderr, "failed to resume main thread! aborting.\n");
+        abort();
+    }
+}
+
+int jl_thread_suspend(int16_t tid, bt_context_t *ctx)
+{
+    jl_lock_profile(); // prevent concurrent mutation
+    uv_mutex_lock(&jl_in_stackwalk); // prevent multi-threaded dbghelp calls
+    int success = jl_thread_suspend_and_get_state(tid, 0, ctx);
+    uv_mutex_unlock(&jl_in_stackwalk);
+    jl_unlock_profile();
+    return success;
+}
 
 static DWORD WINAPI profile_bt( LPVOID lparam )
 {
     // Note: illegal to use jl_* functions from this thread except for profiling-specific functions
+    // Dummy event for RegisterWaitForSingleObject (to use timeout callback)
+    HANDLE hProfileEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
+    if (hProfileEvent == NULL) {
+        jl_safe_fprintf(ios_safe_stderr, "failed to create profile event.\n");
+        abort();
+    }
     while (1) {
         DWORD timeout_ms = nsecprof / (GIGA / 1000);
         Sleep(timeout_ms > 0 ? timeout_ms : 1);
-        if (running) {
-            if (jl_profile_is_buffer_full()) {
-                jl_profile_stop_timer(); // does not change the thread state
-                SuspendThread(GetCurrentThread());
-                continue;
-            }
-            else {
-                uv_mutex_lock(&jl_in_stackwalk);
-                jl_lock_profile();
-                if ((DWORD)-1 == SuspendThread(hMainThread)) {
-                    fputs("failed to suspend main thread. aborting profiling.", stderr);
+        if (jl_profile_is_buffer_full())
+            jl_profile_stop_timer(); // does not change the thread state
+        if (!profile_running) {
+            uv_mutex_lock(&bt_data_prof_lock);
+            while (!profile_running)
+                uv_cond_wait(&bt_data_prof_cond, &bt_data_prof_lock);
+            uv_mutex_unlock(&bt_data_prof_lock);
+        }
+        else if (profile_all_tasks) {
+            // Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace`
+            jl_profile_task();
+        }
+        else {
+            // Profile all threads, similar to Unix implementation
+            bt_context_t c;
+            int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+            int *randperm = profile_get_randperm(nthreads);
+            for (int idx = nthreads; idx-- > 0; ) {
+                int tid = randperm[idx];
+                if (!profile_running)
                     break;
-                }
-                CONTEXT ctxThread;
-                memset(&ctxThread, 0, sizeof(CONTEXT));
-                ctxThread.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
-                if (!GetThreadContext(hMainThread, &ctxThread)) {
-                    fputs("failed to get context from main thread. aborting profiling.", stderr);
+                if (jl_profile_is_buffer_full()) {
                     jl_profile_stop_timer();
+                    break;
                 }
-                else {
-                    // Get backtrace data
-                    bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
-                            bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
+                if (!jl_thread_suspend(tid, &c))
+                    continue;
+                jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+                jl_task_t *t2 = jl_atomic_load_relaxed(&ptls->current_task);
+                int state = jl_atomic_load_relaxed(&ptls->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING;
 
-                    jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; // given only profiling hMainThread
+                // Set up timeout handler for stackwalk
+#ifdef _CPU_X86_64_
+                _Atomic(int) abort_profiling = 0;
+                profile_timeout_data_t timeout_data;
+                timeout_data.abort_ptr = &abort_profiling;
+                timeout_data.tid = tid;
+                jl_set_profile_abort_ptr(&abort_profiling);
+                HANDLE hWaitHandle = NULL;
+                if (!RegisterWaitForSingleObject(&hWaitHandle, hProfileEvent, profile_timeout_cb,
+                                                 &timeout_data, 100, WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD)) {
+                    // Failed to register wait, proceed without timeout protection
+                    hWaitHandle = NULL;
+                }
+#endif
+                // Get backtrace data
+                profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur,
+                        profile_bt_size_max - profile_bt_size_cur - 1, &c, NULL);
+#ifdef _CPU_X86_64_
+                // Clear abort pointer from TLS
+                jl_set_profile_abort_ptr(NULL);
+                // Wait for callback to complete or cancel before continuing
+                if (hWaitHandle != NULL)
+                    UnregisterWaitEx(hWaitHandle, INVALID_HANDLE_VALUE);
+                if (timeout_data.tid != -1)
+#endif
+                    jl_thread_resume(tid);
 
-                    // store threadid but add 1 as 0 is preserved to indicate end of block
-                    bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
+                // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = tid + 1;
 
-                    // store task id (never null)
-                    bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
+                // META_OFFSET_TASKID store task id (never null)
+                profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)t2;
 
-                    // store cpu cycle clock
-                    bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+                // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();
 
-                    // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
-                    bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
+                // store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block)
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = state;
 
-                    // Mark the end of this block with two 0's
-                    bt_data_prof[bt_size_cur++].uintptr = 0;
-                    bt_data_prof[bt_size_cur++].uintptr = 0;
-                }
-                jl_unlock_profile();
-                uv_mutex_unlock(&jl_in_stackwalk);
-                if ((DWORD)-1 == ResumeThread(hMainThread)) {
-                    jl_profile_stop_timer();
-                    fputs("failed to resume main thread! aborting.", stderr);
-                    jl_gc_debug_critical_error();
-                    abort();
-                }
-                jl_check_profile_autostop();
+                // Mark the end of this block with two 0's
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
             }
+            jl_check_profile_autostop();
         }
     }
-    jl_unlock_profile();
-    uv_mutex_unlock(&jl_in_stackwalk);
+    // this is unreachable, but would be the relevant cleanup
+    uv_mutex_lock(&bt_data_prof_lock);
+    hBtThread = NULL;
+    uv_mutex_unlock(&bt_data_prof_lock);
     jl_profile_stop_timer();
-    hBtThread = 0;
+    CloseHandle(hProfileEvent);
     return 0;
 }
 
-static volatile TIMECAPS timecaps;
-
-JL_DLLEXPORT int jl_profile_start_timer(void)
+JL_DLLEXPORT int jl_profile_start_timer(uint8_t all_tasks)
 {
+    uv_mutex_lock(&bt_data_prof_lock);
     if (hBtThread == NULL) {
-
         TIMECAPS _timecaps;
         if (MMSYSERR_NOERROR != timeGetDevCaps(&_timecaps, sizeof(_timecaps))) {
-            fputs("failed to get timer resolution", stderr);
+            uv_mutex_unlock(&bt_data_prof_lock);
+            jl_safe_fprintf(ios_safe_stderr, "failed to get timer resolution.\n");
             return -2;
         }
         timecaps = _timecaps;
@@ -432,30 +598,33 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
             0,                      // argument to thread function
             0,                      // use default creation flags
             0);                     // returns the thread identifier
-        if (hBtThread == NULL)
+        if (hBtThread == NULL) {
+            uv_mutex_unlock(&bt_data_prof_lock);
+            jl_safe_fprintf(ios_safe_stderr, "failed to allocate profile thread.\n");
             return -1;
-        (void)SetThreadPriority(hBtThread, THREAD_PRIORITY_ABOVE_NORMAL);
-    }
-    else {
-        if ((DWORD)-1 == ResumeThread(hBtThread)) {
-            fputs("failed to resume profiling thread.", stderr);
-            return -2;
         }
+        (void)SetThreadPriority(hBtThread, THREAD_PRIORITY_ABOVE_NORMAL);
     }
-    if (running == 0) {
+    if (profile_running == 0) {
         // Failure to change the timer resolution is not fatal. However, it is important to
         // ensure that the timeBeginPeriod/timeEndPeriod is paired.
         if (TIMERR_NOERROR != timeBeginPeriod(timecaps.wPeriodMin))
             timecaps.wPeriodMin = 0;
     }
-    running = 1; // set `running` finally
+    profile_all_tasks = all_tasks;
+    profile_running = 1; // set `profile_running` finally
+    uv_cond_broadcast(&bt_data_prof_cond);
+    uv_mutex_unlock(&bt_data_prof_lock);
     return 0;
 }
 JL_DLLEXPORT void jl_profile_stop_timer(void)
 {
-    if (running && timecaps.wPeriodMin)
+    uv_mutex_lock(&bt_data_prof_lock);
+    if (profile_running && timecaps.wPeriodMin)
         timeEndPeriod(timecaps.wPeriodMin);
-    running = 0;
+    profile_running = 0;
+    profile_all_tasks = 0;
+    uv_mutex_unlock(&bt_data_prof_lock);
 }
 
 void jl_install_default_signal_handlers(void)
diff --git a/src/simplevector.c b/src/simplevector.c
index 65217715ae55f..853b2493dac58 100644
--- a/src/simplevector.c
+++ b/src/simplevector.c
@@ -22,7 +22,8 @@ JL_DLLEXPORT jl_svec_t *(ijl_svec)(size_t n, ...)
 jl_svec_t *(jl_perm_symsvec)(size_t n, ...)
 {
     if (n == 0) return jl_emptysvec;
-    jl_svec_t *jv = (jl_svec_t*)jl_gc_permobj((n + 1) * sizeof(void*), jl_simplevector_type);
+    jl_task_t *ct = jl_current_task;
+    jl_svec_t *jv = (jl_svec_t*)jl_gc_permobj(ct->ptls, (n + 1) * sizeof(void*), jl_simplevector_type, 0);
     jl_set_typetagof(jv, jl_simplevector_tag, jl_astaggedvalue(jv)->bits.gc);
     jl_svec_set_len_unsafe(jv, n);
     va_list args;
@@ -56,6 +57,19 @@ JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b)
     return v;
 }
 
+JL_DLLEXPORT jl_svec_t *jl_svec3(void *a, void *b, void *c)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 4,
+                                           jl_simplevector_type);
+    jl_set_typetagof(v, jl_simplevector_tag, 0);
+    jl_svec_set_len_unsafe(v, 3);
+    jl_svec_data(v)[0] = (jl_value_t*)a;
+    jl_svec_data(v)[1] = (jl_value_t*)b;
+    jl_svec_data(v)[2] = (jl_value_t*)c;
+    return v;
+}
+
 JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n)
 {
     jl_task_t *ct = jl_current_task;
@@ -79,7 +93,7 @@ JL_DLLEXPORT jl_svec_t *jl_svec_copy(jl_svec_t *a)
 {
     size_t n = jl_svec_len(a);
     jl_svec_t *c = jl_alloc_svec_uninit(n);
-    memmove_refs((void**)jl_svec_data(c), (void**)jl_svec_data(a), n);
+    memmove_refs((_Atomic(void*)*)jl_svec_data(c), (_Atomic(void*)*)jl_svec_data(a), n);
     return c;
 }
 
@@ -96,10 +110,3 @@ JL_DLLEXPORT size_t (jl_svec_len)(jl_svec_t *t) JL_NOTSAFEPOINT
 {
     return jl_svec_len(t);
 }
-
-JL_DLLEXPORT jl_value_t *jl_svec_ref(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i)
-{
-    jl_value_t *v = jl_svecref(t, (size_t)i);
-    assert(v != NULL);
-    return v;
-}
diff --git a/src/smallintset.c b/src/smallintset.c
index fa647b57e7d3e..a80a18009c9db 100644
--- a/src/smallintset.c
+++ b/src/smallintset.c
@@ -24,87 +24,103 @@
 extern "C" {
 #endif
 
-static inline size_t jl_intref(const jl_array_t *arr, size_t idx) JL_NOTSAFEPOINT
+static inline size_t ignore_tombstone(size_t val, size_t tombstone) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
-        return jl_atomic_load_relaxed(&((_Atomic(uint8_t)*)jl_array_data(arr))[idx]);
-    else if (el == (jl_value_t*)jl_uint16_type)
-        return jl_atomic_load_relaxed(&((_Atomic(uint16_t)*)jl_array_data(arr))[idx]);
-    else if (el == (jl_value_t*)jl_uint32_type)
-        return jl_atomic_load_relaxed(&((_Atomic(uint32_t)*)jl_array_data(arr))[idx]);
+    return val == tombstone ? 0 : val;
+}
+static inline size_t jl_intref(const jl_genericmemory_t *arr, size_t idx) JL_NOTSAFEPOINT
+{
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint8_t)*)arr->ptr)[idx]), (uint8_t)-1);
+    else if (el == jl_memory_uint16_type)
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint16_t)*)arr->ptr)[idx]), (uint16_t)-1);
+    else if (el == jl_memory_uint32_type)
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint32_t)*)arr->ptr)[idx]), UINT32_MAX);
     else
         abort();
 }
 
-static inline size_t jl_intref_acquire(const jl_array_t *arr, size_t idx) JL_NOTSAFEPOINT
+static inline size_t acquire_tombstone(size_t val, size_t tombstone) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
-        return jl_atomic_load_acquire(&((_Atomic(uint8_t)*)jl_array_data(arr))[idx]);
-    else if (el == (jl_value_t*)jl_uint16_type)
-        return jl_atomic_load_acquire(&((_Atomic(uint16_t)*)jl_array_data(arr))[idx]);
-    else if (el == (jl_value_t*)jl_uint32_type)
-        return jl_atomic_load_acquire(&((_Atomic(uint32_t)*)jl_array_data(arr))[idx]);
+    return val == tombstone ? (size_t)-1 : val;
+}
+static inline size_t jl_intref_acquire(const jl_genericmemory_t *arr, size_t idx) JL_NOTSAFEPOINT
+{
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint8_t)*)arr->ptr)[idx]), (uint8_t)-1);
+    else if (el == jl_memory_uint16_type)
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint16_t)*)arr->ptr)[idx]), (uint16_t)-1);
+    else if (el == jl_memory_uint32_type)
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint32_t)*)arr->ptr)[idx]), UINT32_MAX);
     else
         abort();
 }
 
-static inline void jl_intset_release(const jl_array_t *arr, size_t idx, size_t val) JL_NOTSAFEPOINT
+static inline void jl_intset_release(const jl_genericmemory_t *arr, size_t idx, size_t val) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
-        jl_atomic_store_release(&((_Atomic(uint8_t)*)jl_array_data(arr))[idx], val);
-    else if (el == (jl_value_t*)jl_uint16_type)
-        jl_atomic_store_release(&((_Atomic(uint16_t)*)jl_array_data(arr))[idx], val);
-    else if (el == (jl_value_t*)jl_uint32_type)
-        jl_atomic_store_release(&((_Atomic(uint32_t)*)jl_array_data(arr))[idx], val);
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
+        jl_atomic_store_release(&((_Atomic(uint8_t)*)arr->ptr)[idx], val);
+    else if (el == jl_memory_uint16_type)
+        jl_atomic_store_release(&((_Atomic(uint16_t)*)arr->ptr)[idx], val);
+    else if (el == jl_memory_uint32_type)
+        jl_atomic_store_release(&((_Atomic(uint32_t)*)arr->ptr)[idx], val);
     else
         abort();
 }
 
-static inline size_t jl_max_int(const jl_array_t *arr)
+static inline size_t jl_max_int(const jl_genericmemory_t *arr) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
         return 0xFF;
-    else if (el == (jl_value_t*)jl_uint16_type)
+    else if (el == jl_memory_uint16_type)
         return 0xFFFF;
-    else if (el == (jl_value_t*)jl_uint32_type)
+    else if (el == jl_memory_uint32_type)
         return 0xFFFFFFFF;
-    else if (el == (jl_value_t*)jl_any_type)
+    else if (el == jl_memory_any_type)
         return 0;
     else
         abort();
 }
 
-static jl_array_t *jl_alloc_int_1d(size_t np, size_t len)
+void smallintset_empty(const jl_genericmemory_t *a) JL_NOTSAFEPOINT
+{
+    size_t elsize;
+    jl_value_t *el = (jl_value_t*)jl_typetagof(a);
+    if (el == jl_memory_uint8_type)
+        elsize = sizeof(uint8_t);
+    else if (el == jl_memory_uint16_type)
+        elsize = sizeof(uint16_t);
+    else if (el == jl_memory_uint32_type)
+        elsize = sizeof(uint32_t);
+    else if (el == jl_memory_any_type)
+        elsize = 0;
+    else
+        abort();
+    memset(a->ptr, 0, a->length * elsize);
+}
+
+static jl_genericmemory_t *jl_alloc_int_1d(size_t np, size_t len)
 {
     jl_value_t *ty;
-    if (np < 0xFF) {
-        ty = jl_array_uint8_type;
-     }
-    else if (np < 0xFFFF) {
-        static jl_value_t *int16 JL_ALWAYS_LEAFTYPE = NULL;
-        if (int16 == NULL)
-            int16 = jl_apply_array_type((jl_value_t*)jl_uint16_type, 1);
-        ty = int16;
-    }
-    else {
-        assert(np < 0x7FFFFFFF);
-        static jl_value_t *int32 JL_ALWAYS_LEAFTYPE = NULL;
-        if (int32 == NULL)
-            int32 = jl_apply_array_type((jl_value_t*)jl_uint32_type, 1);
-        ty = int32;
-    }
-    jl_array_t *a = jl_alloc_array_1d(ty, len);
-    memset(a->data, 0, len * a->elsize);
+    if (np < 0xFF)
+        ty = jl_memory_uint8_type;
+    else if (np < 0xFFFF)
+        ty = jl_memory_uint16_type;
+    else
+        ty = jl_memory_uint32_type;
+    assert(np < 0x7FFFFFFF);
+    jl_genericmemory_t *a = jl_alloc_genericmemory(ty, len);
+    smallintset_empty(a);
     return a;
 }
 
-ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *key, jl_svec_t *data, uint_t hv)
+ssize_t jl_smallintset_lookup(jl_genericmemory_t *cache, smallintset_eq eq, const void *key, jl_value_t *data, uint_t hv, int pop)
 {
-    size_t sz = jl_array_len(cache);
+    size_t sz = cache->length;
     if (sz == 0)
         return -1;
     JL_GC_PUSH1(&cache);
@@ -118,8 +134,10 @@ ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *
             JL_GC_POP();
             return -1;
         }
-        if (eq(val1 - 1, key, data, hv)) {
+        if (val1 != -1 && eq(val1 - 1, key, data, hv)) {
             JL_GC_POP();
+            if (pop)
+                jl_intset_release(cache, index, (size_t)-1); // replace with tombstone
             return val1 - 1;
         }
         index = (index + 1) & (sz - 1);
@@ -129,9 +147,9 @@ ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *
     return -1;
 }
 
-static int smallintset_insert_(jl_array_t *a, uint_t hv, size_t val1)
+static int smallintset_insert_(jl_genericmemory_t *a, uint_t hv, size_t val1) JL_NOTSAFEPOINT
 {
-    size_t sz = jl_array_len(a);
+    size_t sz = a->length;
     if (sz <= 1)
         return 0;
     size_t orig, index, iter;
@@ -149,16 +167,17 @@ static int smallintset_insert_(jl_array_t *a, uint_t hv, size_t val1)
     } while (iter <= maxprobe && index != orig);
     return 0;
 }
+//}
 
-static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np);
-
-void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data)
+void jl_smallintset_insert(_Atomic(jl_genericmemory_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_value_t *data)
 {
-    jl_array_t *a = jl_atomic_load_relaxed(pcache);
-    if (val + 1 >  jl_max_int(a))
-        smallintset_rehash(pcache, parent, hash, data, jl_array_len(a), val + 1);
+    jl_genericmemory_t *a = jl_atomic_load_relaxed(pcache);
+    if (val + 1 >= jl_max_int(a)) {
+        a = smallintset_rehash(a, hash, data, a->length, val + 1);
+        jl_atomic_store_release(pcache, a);
+        if (parent) jl_gc_wb(parent, a);
+    }
     while (1) {
-        a = jl_atomic_load_relaxed(pcache);
         if (smallintset_insert_(a, hash(val, data), val + 1))
             return;
 
@@ -168,21 +187,22 @@ void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, sma
         /* lots of time rehashing all the keys over and over. */
         size_t newsz;
         a = jl_atomic_load_relaxed(pcache);
-        size_t sz = jl_array_len(a);
+        size_t sz = a->length;
         if (sz < HT_N_INLINE)
             newsz = HT_N_INLINE;
         else if (sz >= (1 << 19) || (sz <= (1 << 8)))
             newsz = sz << 1;
         else
             newsz = sz << 2;
-        smallintset_rehash(pcache, parent, hash, data, newsz, 0);
+        a = smallintset_rehash(a, hash, data, newsz, 0);
+        jl_atomic_store_release(pcache, a);
+        if (parent) jl_gc_wb(parent, a);
     }
 }
 
-static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np)
+jl_genericmemory_t* smallintset_rehash(jl_genericmemory_t* a, smallintset_hash hash, jl_value_t *data, size_t newsz, size_t np)
 {
-    jl_array_t *a = jl_atomic_load_relaxed(pcache);
-    size_t sz = jl_array_len(a);
+    size_t sz = a->length;
     size_t i;
     for (i = 0; i < sz; i += 1) {
         size_t val = jl_intref(a, i);
@@ -190,7 +210,7 @@ static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent,
             np = val;
     }
     while (1) {
-        jl_array_t *newa = jl_alloc_int_1d(np, newsz);
+        jl_genericmemory_t *newa = jl_alloc_int_1d(np + 1, newsz);
         JL_GC_PUSH1(&newa);
         for (i = 0; i < sz; i += 1) {
             size_t val1 = jl_intref(a, i);
@@ -201,16 +221,12 @@ static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent,
             }
         }
         JL_GC_POP();
-        if (i == sz) {
-            jl_atomic_store_release(pcache, newa);
-            jl_gc_wb(parent, newa);
-            return;
-        }
+        if (i == sz)
+            return newa;
         newsz <<= 1;
     }
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/stackwalk.c b/src/stackwalk.c
index 18bf4b2126938..96f263dfd7f68 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -5,6 +5,7 @@
   utilities for walking the stack and looking up information about code addresses
 */
 #include <inttypes.h>
+#include "gc-common.h"
 #include "julia.h"
 #include "julia_internal.h"
 #include "threading.h"
@@ -13,6 +14,7 @@
 // define `jl_unw_get` as a macro, since (like setjmp)
 // returning from the callee function will invalidate the context
 #ifdef _OS_WINDOWS_
+#include <winternl.h>
 uv_mutex_t jl_in_stackwalk;
 #define jl_unw_get(context) (RtlCaptureContext(context), 0)
 #elif !defined(JL_DISABLE_LIBUNWIND)
@@ -74,15 +76,17 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
     volatile int need_more_space = 0;
     uintptr_t return_ip = 0;
     uintptr_t thesp = 0;
-#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-    uv_mutex_lock(&jl_in_stackwalk);
+#if defined(_OS_WINDOWS_)
+#if !defined(_CPU_X86_64_)
     if (!from_signal_handler) {
         // Workaround 32-bit windows bug missing top frame
         // See for example https://bugs.chromium.org/p/crashpad/issues/detail?id=53
         skip--;
     }
 #endif
-#if !defined(_OS_WINDOWS_)
+    jl_lock_profile();
+#endif
+#if !defined(_OS_WINDOWS_) // no point on windows, since RtlVirtualUnwind won't give us a second chance if the segfault happens in ntdll
     jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
     jl_set_safe_restore(&buf);
@@ -97,9 +101,13 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
             }
             uintptr_t oldsp = thesp;
             have_more_frames = jl_unw_step(cursor, from_signal_handler, &return_ip, &thesp);
-            if (oldsp >= thesp && !jl_running_under_rr(0)) {
-                // The stack pointer is clearly bad, as it must grow downwards.
+            if ((n < 2 ? oldsp > thesp : oldsp >= thesp) && !jl_running_under_rr(0)) {
+                // The stack pointer is clearly bad, as it must grow downwards,
                 // But sometimes the external unwinder doesn't check that.
+                // Except for n==0 when there is no oldsp and n==1 on all platforms but i686/x86_64.
+                // (on x86, the platform first pushes the new stack frame, then does the
+                // call, on almost all other platforms, the platform first does the call,
+                // then the user pushes the link register to the frame).
                 have_more_frames = 0;
             }
             if (return_ip == 0) {
@@ -131,11 +139,11 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
             // * The way that libunwind handles it in `unw_get_proc_name`:
             //   https://lists.nongnu.org/archive/html/libunwind-devel/2014-06/msg00025.html
             uintptr_t call_ip = return_ip;
+            #if defined(_CPU_ARM_)
             // ARM instruction pointer encoding uses the low bit as a flag for
             // thumb mode, which must be cleared before further use. (Note not
             // needed for ARM AArch64.) See
             // https://github.com/libunwind/libunwind/pull/131
-            #ifdef _CPU_ARM_
             call_ip &= ~(uintptr_t)0x1;
             #endif
             // Now there's two main cases to adjust for:
@@ -183,9 +191,8 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
         if (n > 0) n -= 1;
     }
     jl_set_safe_restore(old_buf);
-#endif
-#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-    uv_mutex_unlock(&jl_in_stackwalk);
+#else
+    jl_unlock_profile();
 #endif
     *bt_size = n;
     return need_more_space;
@@ -207,7 +214,7 @@ NOINLINE size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize,
 //
 // The first `skip` frames are omitted, in addition to omitting the frame from
 // `rec_backtrace` itself.
-NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip)
+NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT
 {
     bt_context_t context;
     memset(&context, 0, sizeof(context));
@@ -223,6 +230,24 @@ NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip
     return bt_size;
 }
 
+NOINLINE int failed_to_sample_task_fun(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT
+{
+    if (maxsize < 1) {
+        return 0;
+    }
+    bt_data[0].uintptr = (uintptr_t) &failed_to_sample_task_fun;
+    return 1;
+}
+
+NOINLINE int failed_to_stop_thread_fun(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT
+{
+    if (maxsize < 1) {
+        return 0;
+    }
+    bt_data[0].uintptr = (uintptr_t) &failed_to_stop_thread_fun;
+    return 1;
+}
+
 static jl_value_t *array_ptr_void_type JL_ALWAYS_LEAFTYPE = NULL;
 // Return backtrace information as an svec of (bt1, bt2, [sp])
 //
@@ -260,21 +285,21 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip)
             uintptr_t *sp_ptr = NULL;
             if (returnsp) {
                 jl_array_grow_end(sp, maxincr);
-                sp_ptr = (uintptr_t*)jl_array_data(sp) + offset;
+                sp_ptr = jl_array_data(sp, uintptr_t) + offset;
             }
             size_t size_incr = 0;
-            have_more_frames = jl_unw_stepn(&cursor, (jl_bt_element_t*)jl_array_data(ip) + offset,
+            have_more_frames = jl_unw_stepn(&cursor, jl_array_data(ip, jl_bt_element_t) + offset,
                                             &size_incr, sp_ptr, maxincr, skip, &pgcstack, 0);
             skip = 0;
             offset += size_incr;
         }
-        jl_array_del_end(ip, jl_array_len(ip) - offset);
+        jl_array_del_end(ip, jl_array_nrows(ip) - offset);
         if (returnsp)
-            jl_array_del_end(sp, jl_array_len(sp) - offset);
+            jl_array_del_end(sp, jl_array_nrows(sp) - offset);
 
         size_t n = 0;
-        jl_bt_element_t *bt_data = (jl_bt_element_t*)jl_array_data(ip);
-        while (n < jl_array_len(ip)) {
+        jl_bt_element_t *bt_data = jl_array_data(ip, jl_bt_element_t);
+        while (n < jl_array_nrows(ip)) {
             jl_bt_element_t *bt_entry = bt_data + n;
             if (!jl_bt_is_native(bt_entry)) {
                 size_t njlvals = jl_bt_num_jlvals(bt_entry);
@@ -303,7 +328,11 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size,
     bt = *btout = jl_alloc_array_1d(array_ptr_void_type, bt_size);
     static_assert(sizeof(jl_bt_element_t) == sizeof(void*),
                   "jl_bt_element_t is presented as Ptr{Cvoid} on julia side");
-    memcpy(bt->data, bt_data, bt_size * sizeof(jl_bt_element_t));
+    if (bt_data != NULL) {
+        memcpy(jl_array_data(bt, jl_bt_element_t), bt_data, bt_size * sizeof(jl_bt_element_t));
+    } else {
+        assert(bt_size == 0);
+    }
     bt2 = *bt2out = jl_alloc_array_1d(jl_array_any_type, 0);
     // Scan the backtrace buffer for any gc-managed values
     for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
@@ -375,21 +404,16 @@ JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int ma
 }
 
 #if defined(_OS_WINDOWS_)
+
 // XXX: these caches should be per-thread
 #ifdef _CPU_X86_64_
-static UNWIND_HISTORY_TABLE HistoryTable;
-#else
-static struct {
-    DWORD64 dwAddr;
-    DWORD64 ImageBase;
-} HistoryTable;
-#endif
+static __thread UNWIND_HISTORY_TABLE HistoryTable;
+static __thread _Atomic(int) *abort_profile_ptr = NULL;
+
 static PVOID CALLBACK JuliaFunctionTableAccess64(
         _In_  HANDLE hProcess,
         _In_  DWORD64 AddrBase)
 {
-    //jl_printf(JL_STDOUT, "lookup %d\n", AddrBase);
-#ifdef _CPU_X86_64_
     DWORD64 ImageBase;
     PRUNTIME_FUNCTION fn = RtlLookupFunctionEntry(AddrBase, &ImageBase, &HistoryTable);
     if (fn)
@@ -398,16 +422,11 @@ static PVOID CALLBACK JuliaFunctionTableAccess64(
     PVOID ftable = SymFunctionTableAccess64(hProcess, AddrBase);
     uv_mutex_unlock(&jl_in_stackwalk);
     return ftable;
-#else
-    return SymFunctionTableAccess64(hProcess, AddrBase);
-#endif
 }
 static DWORD64 WINAPI JuliaGetModuleBase64(
         _In_  HANDLE hProcess,
         _In_  DWORD64 dwAddr)
 {
-    //jl_printf(JL_STDOUT, "lookup base %d\n", dwAddr);
-#ifdef _CPU_X86_64_
     DWORD64 ImageBase;
     PRUNTIME_FUNCTION fn = RtlLookupFunctionEntry(dwAddr, &ImageBase, &HistoryTable);
     if (fn)
@@ -416,7 +435,22 @@ static DWORD64 WINAPI JuliaGetModuleBase64(
     DWORD64 fbase = SymGetModuleBase64(hProcess, dwAddr);
     uv_mutex_unlock(&jl_in_stackwalk);
     return fbase;
+}
 #else
+static __thread struct {
+    DWORD64 dwAddr;
+    DWORD64 ImageBase;
+} HistoryTable;
+static PVOID CALLBACK JuliaFunctionTableAccess64(
+        _In_  HANDLE hProcess,
+        _In_  DWORD64 AddrBase)
+{
+    return SymFunctionTableAccess64(hProcess, AddrBase);
+}
+static DWORD64 WINAPI JuliaGetModuleBase64(
+        _In_  HANDLE hProcess,
+        _In_  DWORD64 dwAddr)
+{
     if (dwAddr == HistoryTable.dwAddr)
         return HistoryTable.ImageBase;
     DWORD64 ImageBase = jl_getUnwindInfo(dwAddr);
@@ -426,25 +460,105 @@ static DWORD64 WINAPI JuliaGetModuleBase64(
         return ImageBase;
     }
     return SymGetModuleBase64(hProcess, dwAddr);
-#endif
 }
+#endif
 
 // Might be called from unmanaged thread.
-volatile int needsSymRefreshModuleList;
-BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
+static PVOID dll_notification_cookie;
+
+// Structure definitions for LdrDllNotification
+typedef struct _LDR_DLL_LOADED_NOTIFICATION_DATA {
+    ULONG Flags;
+    PCUNICODE_STRING FullDllName;
+    PCUNICODE_STRING BaseDllName;
+    PVOID DllBase;
+    ULONG SizeOfImage;
+} LDR_DLL_LOADED_NOTIFICATION_DATA;
+typedef const LDR_DLL_LOADED_NOTIFICATION_DATA *PCLDR_DLL_LOADED_NOTIFICATION_DATA;
 
-JL_DLLEXPORT void jl_refresh_dbg_module_list(void)
+typedef struct _LDR_DLL_UNLOADED_NOTIFICATION_DATA {
+    ULONG Flags;
+    PCUNICODE_STRING FullDllName;
+    PCUNICODE_STRING BaseDllName;
+    PVOID DllBase;
+    ULONG SizeOfImage;
+} LDR_DLL_UNLOADED_NOTIFICATION_DATA;
+typedef const LDR_DLL_UNLOADED_NOTIFICATION_DATA *PCLDR_DLL_UNLOADED_NOTIFICATION_DATA;
+
+typedef union _LDR_DLL_NOTIFICATION_DATA {
+    LDR_DLL_LOADED_NOTIFICATION_DATA Loaded;
+    LDR_DLL_UNLOADED_NOTIFICATION_DATA Unloaded;
+} LDR_DLL_NOTIFICATION_DATA;
+typedef const LDR_DLL_NOTIFICATION_DATA *PCLDR_DLL_NOTIFICATION_DATA;
+
+#define LDR_DLL_NOTIFICATION_REASON_LOADED   1
+#define LDR_DLL_NOTIFICATION_REASON_UNLOADED 2
+
+// Forward declarations for ntdll functions
+typedef VOID CALLBACK (*PLDR_DLL_NOTIFICATION_FUNCTION)(
+  ULONG                       NotificationReason,
+  PCLDR_DLL_NOTIFICATION_DATA NotificationData,
+  PVOID                       Context
+);
+NTSTATUS NTAPI LdrRegisterDllNotification(ULONG Flags, PLDR_DLL_NOTIFICATION_FUNCTION NotificationFunction, PVOID Context, PVOID *Cookie);
+NTSTATUS NTAPI LdrUnregisterDllNotification(PVOID Cookie);
+
+// Callback for LdrRegisterDllNotification
+static VOID CALLBACK dll_notification_callback(
+    ULONG NotificationReason,
+    PCLDR_DLL_NOTIFICATION_DATA NotificationData,
+    PVOID Context)
 {
-    if (needsSymRefreshModuleList && hSymRefreshModuleList != NULL) {
-        hSymRefreshModuleList(GetCurrentProcess());
-        needsSymRefreshModuleList = 0;
+    (void)Context;
+    uv_mutex_lock(&jl_in_stackwalk);
+    // Store DLL information and update symbol handler based on notification reason
+    if (NotificationReason == LDR_DLL_NOTIFICATION_REASON_LOADED) {
+        const LDR_DLL_LOADED_NOTIFICATION_DATA *data = &NotificationData->Loaded;
+        SymLoadModuleExW(GetCurrentProcess(), NULL,
+                         data->FullDllName->Buffer,
+                         data->BaseDllName->Buffer,
+                         (uintptr_t)data->DllBase,
+                         data->SizeOfImage,
+                         NULL,
+                         0);
+    }
+    else if (NotificationReason == LDR_DLL_NOTIFICATION_REASON_UNLOADED) {
+        const LDR_DLL_UNLOADED_NOTIFICATION_DATA *data = &NotificationData->Unloaded;
+        SymUnloadModule64(GetCurrentProcess(), (uintptr_t)data->DllBase);
     }
+    uv_mutex_unlock(&jl_in_stackwalk);
 }
+
+// Initialize stackwalk infrastructure (DLL tracking and profiling)
+void jl_init_stackwalk(void)
+{
+    uv_mutex_init(&jl_in_stackwalk);
+    SymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES | SYMOPT_IGNORE_CVREC);
+    if (!SymInitialize(GetCurrentProcess(), "", 1))
+        jl_safe_printf("WARNING: failed to initialize stack walk info\n");
+    LdrRegisterDllNotification(0, dll_notification_callback, NULL, &dll_notification_cookie);
+}
+
+// Finalize stackwalk infrastructure
+void jl_fin_stackwalk(void)
+{
+    if (dll_notification_cookie) {
+        LdrUnregisterDllNotification(dll_notification_cookie);
+        dll_notification_cookie = NULL;
+    }
+}
+
+// Set the abort_profile_ptr in TLS
+#ifdef _CPU_X86_64_
+JL_DLLEXPORT void jl_set_profile_abort_ptr(_Atomic(int) *abort_ptr) JL_NOTSAFEPOINT
+{
+    abort_profile_ptr = abort_ptr;
+}
+#endif
+
 static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *Context)
 {
     int result;
-    uv_mutex_lock(&jl_in_stackwalk);
-    jl_refresh_dbg_module_list();
 #if !defined(_CPU_X86_64_)
     memset(&cursor->stackframe, 0, sizeof(cursor->stackframe));
     cursor->stackframe.AddrPC.Offset = Context->Eip;
@@ -454,14 +568,15 @@ static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *Context)
     cursor->stackframe.AddrStack.Mode = AddrModeFlat;
     cursor->stackframe.AddrFrame.Mode = AddrModeFlat;
     cursor->context = *Context;
+    uv_mutex_lock(&jl_in_stackwalk);
     result = StackWalk64(IMAGE_FILE_MACHINE_I386, GetCurrentProcess(), hMainThread,
             &cursor->stackframe, &cursor->context, NULL, JuliaFunctionTableAccess64,
             JuliaGetModuleBase64, NULL);
+    uv_mutex_unlock(&jl_in_stackwalk);
 #else
     *cursor = *Context;
     result = 1;
 #endif
-    uv_mutex_unlock(&jl_in_stackwalk);
     return result;
 }
 
@@ -493,8 +608,10 @@ static int jl_unw_step(bt_cursor_t *cursor, int from_signal_handler, uintptr_t *
         return cursor->stackframe.AddrPC.Offset != 0;
     }
 
+    uv_mutex_lock(&jl_in_stackwalk);
     BOOL result = StackWalk64(IMAGE_FILE_MACHINE_I386, GetCurrentProcess(), hMainThread,
         &cursor->stackframe, &cursor->context, NULL, JuliaFunctionTableAccess64, JuliaGetModuleBase64, NULL);
+    uv_mutex_unlock(&jl_in_stackwalk);
     return result;
 #else
     *ip = (uintptr_t)cursor->Rip;
@@ -507,12 +624,23 @@ static int jl_unw_step(bt_cursor_t *cursor, int from_signal_handler, uintptr_t *
         return cursor->Rip != 0;
     }
 
+    // Set can-abort flag
+    _Atomic(int) *abort_ptr = abort_profile_ptr;
+    if (abort_ptr && jl_atomic_exchange_relaxed(abort_ptr, 1) != 0) {
+        jl_atomic_store_relaxed(abort_ptr, 3);
+        return 0; // aborted
+    }
+
     DWORD64 ImageBase = JuliaGetModuleBase64(GetCurrentProcess(), cursor->Rip - !from_signal_handler);
-    if (!ImageBase)
-        return 0;
+    PRUNTIME_FUNCTION FunctionEntry = ImageBase ? (PRUNTIME_FUNCTION)JuliaFunctionTableAccess64(
+        GetCurrentProcess(), cursor->Rip - !from_signal_handler) : NULL;
+
+    // Check if can-abort flag was removed, or remove it
+    if (abort_ptr && jl_atomic_exchange_relaxed(abort_ptr, 0) != 1) {
+        jl_atomic_store_relaxed(abort_ptr, 3);
+        return 0; // abort
+    }
 
-    PRUNTIME_FUNCTION FunctionEntry = (PRUNTIME_FUNCTION)JuliaFunctionTableAccess64(
-        GetCurrentProcess(), cursor->Rip - !from_signal_handler);
     if (!FunctionEntry) {
         // Not code or bad unwind?
         return 0;
@@ -605,7 +733,7 @@ JL_DLLEXPORT jl_value_t *jl_lookup_code_address(void *ip, int skipC)
             jl_svecset(r, 1, jl_empty_sym);
         free(frame.file_name);
         jl_svecset(r, 2, jl_box_long(frame.line));
-        jl_svecset(r, 3, frame.linfo != NULL ? (jl_value_t*)frame.linfo : jl_nothing);
+        jl_svecset(r, 3, frame.ci != NULL ? (jl_value_t*)frame.ci : jl_nothing);
         jl_svecset(r, 4, jl_box_bool(frame.fromC));
         jl_svecset(r, 5, jl_box_bool(frame.inlined));
     }
@@ -614,22 +742,22 @@ JL_DLLEXPORT jl_value_t *jl_lookup_code_address(void *ip, int skipC)
     return rs;
 }
 
-static void jl_safe_print_codeloc(const char* func_name, const char* file_name,
-                                  int line, int inlined) JL_NOTSAFEPOINT
+static void jl_safe_fprint_codeloc(ios_t *s, const char* func_name, const char* file_name,
+                                   int line, int inlined) JL_NOTSAFEPOINT
 {
     const char *inlined_str = inlined ? " [inlined]" : "";
     if (line != -1) {
-        jl_safe_printf("%s at %s:%d%s\n", func_name, file_name, line, inlined_str);
+        jl_safe_fprintf(s, "%s at %s:%d%s\n", func_name, file_name, line, inlined_str);
     }
     else {
-        jl_safe_printf("%s at %s (unknown line)%s\n", func_name, file_name, inlined_str);
+        jl_safe_fprintf(s, "%s at %s (unknown line)%s\n", func_name, file_name, inlined_str);
     }
 }
 
 // Print function, file and line containing native instruction pointer `ip` by
 // looking up debug info. Prints multiple such frames when `ip` points to
 // inlined code.
-void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT
+void jl_fprint_native_codeloc(ios_t *s, uintptr_t ip) JL_NOTSAFEPOINT
 {
     // This function is not allowed to reference any TLS variables since
     // it can be called from an unmanaged thread on OSX.
@@ -641,61 +769,125 @@ void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT
     for (i = 0; i < n; i++) {
         jl_frame_t frame = frames[i];
         if (!frame.func_name) {
-            jl_safe_printf("unknown function (ip: %p)\n", (void*)ip);
+            jl_safe_fprintf(s, "unknown function (ip: %p) at %s\n", (void*)ip, frame.file_name ? frame.file_name : "(unknown file)");
         }
         else {
-            jl_safe_print_codeloc(frame.func_name, frame.file_name, frame.line, frame.inlined);
+            jl_safe_fprint_codeloc(s, frame.func_name, frame.file_name, frame.line, frame.inlined);
             free(frame.func_name);
-            free(frame.file_name);
         }
+        free(frame.file_name);
     }
     free(frames);
 }
 
+const char *jl_debuginfo_file1(jl_debuginfo_t *debuginfo)
+{
+    jl_value_t *def = debuginfo->def;
+    if (jl_is_method_instance(def))
+        def = ((jl_method_instance_t*)def)->def.value;
+    if (jl_is_method(def))
+        def = (jl_value_t*)((jl_method_t*)def)->file;
+    if (jl_is_symbol(def))
+        return jl_symbol_name((jl_sym_t*)def);
+    return "<unknown>";
+}
+
+const char *jl_debuginfo_file(jl_debuginfo_t *debuginfo)
+{
+    jl_debuginfo_t *linetable = debuginfo->linetable;
+    while ((jl_value_t*)linetable != jl_nothing) {
+        debuginfo = linetable;
+        linetable = debuginfo->linetable;
+    }
+    return jl_debuginfo_file1(debuginfo);
+}
+
+jl_module_t *jl_debuginfo_module1(jl_value_t *debuginfo_def)
+{
+    if (jl_is_method_instance(debuginfo_def))
+        debuginfo_def = ((jl_method_instance_t*)debuginfo_def)->def.value;
+    if (jl_is_method(debuginfo_def))
+        debuginfo_def = (jl_value_t*)((jl_method_t*)debuginfo_def)->module;
+    if (jl_is_module(debuginfo_def))
+        return (jl_module_t*)debuginfo_def;
+    return NULL;
+}
+
+const char *jl_debuginfo_name(jl_value_t *func)
+{
+    if (func == NULL)
+        return "macro expansion";
+    if (jl_is_method_instance(func))
+        func = ((jl_method_instance_t*)func)->def.value;
+    if (jl_is_method(func))
+        func = (jl_value_t*)((jl_method_t*)func)->name;
+    if (jl_is_symbol(func))
+        return jl_symbol_name((jl_sym_t*)func);
+    if (jl_is_module(func))
+        return "top-level scope";
+    return "<unknown>";
+}
+
+// func == module : top-level
+// func == NULL : macro expansion
+static void jl_fprint_debugloc(ios_t *s, jl_debuginfo_t *debuginfo, jl_value_t *func, size_t ip, int inlined) JL_NOTSAFEPOINT
+{
+    if (!jl_is_symbol(debuginfo->def)) // this is a path or
+        func = debuginfo->def; // this is inlined code
+    struct jl_codeloc_t stmt = jl_uncompress1_codeloc(debuginfo->codelocs, ip);
+    intptr_t edges_idx = stmt.to;
+    if (edges_idx) {
+        jl_debuginfo_t *edge = (jl_debuginfo_t*)jl_svecref(debuginfo->edges, edges_idx - 1);
+        assert(jl_typetagis(edge, jl_debuginfo_type));
+        jl_fprint_debugloc(s, edge, NULL, stmt.pc, 1);
+    }
+    intptr_t ip2 = stmt.line;
+    if (ip2 >= 0 && ip > 0 && (jl_value_t*)debuginfo->linetable != jl_nothing) {
+        jl_fprint_debugloc(s, debuginfo->linetable, func, ip2, 0);
+    }
+    else {
+        if (ip2 < 0) // set broken debug info to ignored
+            ip2 = 0;
+        const char *func_name = jl_debuginfo_name(func);
+        const char *file = jl_debuginfo_file(debuginfo);
+        jl_safe_fprint_codeloc(s, func_name, file, ip2, inlined);
+    }
+}
+
 // Print code location for backtrace buffer entry at *bt_entry
-void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
+void jl_fprint_bt_entry_codeloc(ios_t *s, jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
 {
     if (jl_bt_is_native(bt_entry)) {
-        jl_print_native_codeloc(bt_entry[0].uintptr);
+        jl_fprint_native_codeloc(s, bt_entry[0].uintptr);
     }
     else if (jl_bt_entry_tag(bt_entry) == JL_BT_INTERP_FRAME_TAG) {
-        size_t ip = jl_bt_entry_header(bt_entry);
+        size_t ip = jl_bt_entry_header(bt_entry); // zero-indexed
         jl_value_t *code = jl_bt_entry_jlvalue(bt_entry, 0);
-        if (jl_is_method_instance(code)) {
+        jl_value_t *def = (jl_value_t*)jl_core_module; // just used as a token here that isa Module
+        if (jl_is_code_instance(code)) {
+            jl_code_instance_t *ci = (jl_code_instance_t*)code;
+            def = (jl_value_t*)ci->def;
+            code = jl_atomic_load_relaxed(&ci->inferred);
+        } else if (jl_is_method_instance(code)) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)code;
+            def = code;
             // When interpreting a method instance, need to unwrap to find the code info
-            code = jl_atomic_load_relaxed(&((jl_method_instance_t*)code)->uninferred);
+            code = mi->def.method->source;
         }
         if (jl_is_code_info(code)) {
             jl_code_info_t *src = (jl_code_info_t*)code;
             // See also the debug info handling in codegen.cpp.
-            // NB: debuginfoloc is 1-based!
-            intptr_t debuginfoloc = ((int32_t*)jl_array_data(src->codelocs))[ip];
-            while (debuginfoloc != 0) {
-                jl_line_info_node_t *locinfo = (jl_line_info_node_t*)
-                    jl_array_ptr_ref(src->linetable, debuginfoloc - 1);
-                assert(jl_typetagis(locinfo, jl_lineinfonode_type));
-                const char *func_name = "Unknown";
-                jl_value_t *method = locinfo->method;
-                if (jl_is_method_instance(method))
-                    method = ((jl_method_instance_t*)method)->def.value;
-                if (jl_is_method(method))
-                    method = (jl_value_t*)((jl_method_t*)method)->name;
-                if (jl_is_symbol(method))
-                    func_name = jl_symbol_name((jl_sym_t*)method);
-                jl_safe_print_codeloc(func_name, jl_symbol_name(locinfo->file),
-                                      locinfo->line, locinfo->inlined_at);
-                debuginfoloc = locinfo->inlined_at;
-            }
+            jl_fprint_debugloc(s, src->debuginfo, def, ip + 1, 0);
         }
         else {
             // If we're using this function something bad has already happened;
             // be a bit defensive to avoid crashing while reporting the crash.
-            jl_safe_printf("No code info - unknown interpreter state!\n");
+            jl_safe_fprintf(s, "No code info - unknown interpreter state!\n");
         }
     }
     else {
-        jl_safe_printf("Non-native bt entry with tag and header bits 0x%" PRIxPTR "\n",
-                       bt_entry[1].uintptr);
+        jl_safe_fprintf(s, "Non-native bt entry with tag and header bits 0x%" PRIxPTR "\n",
+                        bt_entry[1].uintptr);
     }
 }
 
@@ -791,7 +983,7 @@ _os_tsd_get_direct(unsigned long slot)
 // Unconditionally defined ptrauth_strip (instead of using the ptrauth.h header)
 // since libsystem will likely be compiled with -mbranch-protection, and we currently are not.
 // code from https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
-static inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) {
+static inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) JL_NOTSAFEPOINT {
   // On the stack the link register is protected with Pointer
   // Authentication Code when compiled with -mbranch-protection.
   // Let's strip the PAC unconditionally because xpaclri is in the NOP space,
@@ -809,7 +1001,7 @@ static inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) {
 
 __attribute__((always_inline, pure))
 static __inline__ void**
-_os_tsd_get_base(void)
+_os_tsd_get_base(void) JL_NOTSAFEPOINT
 {
 #if defined(__arm__)
     uintptr_t tsd;
@@ -831,7 +1023,7 @@ _os_tsd_get_base(void)
 #ifdef _os_tsd_get_base
 __attribute__((always_inline))
 static __inline__ void*
-_os_tsd_get_direct(unsigned long slot)
+_os_tsd_get_direct(unsigned long slot) JL_NOTSAFEPOINT
 {
     return _os_tsd_get_base()[slot];
 }
@@ -839,14 +1031,14 @@ _os_tsd_get_direct(unsigned long slot)
 
 __attribute__((always_inline, pure))
 static __inline__ uintptr_t
-_os_ptr_munge_token(void)
+_os_ptr_munge_token(void) JL_NOTSAFEPOINT
 {
     return (uintptr_t)_os_tsd_get_direct(__TSD_PTR_MUNGE);
 }
 
 __attribute__((always_inline, pure))
 JL_UNUSED static __inline__ uintptr_t
-_os_ptr_munge(uintptr_t ptr)
+_os_ptr_munge(uintptr_t ptr) JL_NOTSAFEPOINT
 {
     return ptr ^ _os_ptr_munge_token();
 }
@@ -854,149 +1046,207 @@ _os_ptr_munge(uintptr_t ptr)
 #endif
 
 
-extern bt_context_t *jl_to_bt_context(void *sigctx);
+extern bt_context_t *jl_to_bt_context(void *sigctx) JL_NOTSAFEPOINT;
 
-void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
+// Some notes: this simulates a longjmp call occurring in context `c`, as if the
+// user was to set the PC in `c` to call longjmp and the PC in the longjmp to
+// return here. This helps work around many cases where siglongjmp out of a
+// signal handler is not supported (e.g. missing a _sigunaltstack call).
+// Additionally note that this doesn't restore the MXCSR or FP control word
+// (which some, but not most longjmp implementations do).  It also doesn't
+// support shadow stacks, so if those are in use, you might need to use a direct
+// jl_longjmp instead to leave the signal frame instead of relying on simulating
+// it and attempting to return normally.
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    ptls->bt_size = 0;
-    if (t == ct) {
-        ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
-        return;
-    }
-    if (t->copy_stack || !t->started || t->stkbuf == NULL)
-        return;
-    int16_t old = -1;
-    if (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid)
-        return;
-    bt_context_t *context = NULL;
-#if defined(_OS_WINDOWS_)
-    bt_context_t c;
-    memset(&c, 0, sizeof(c));
-    _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext;
-#if defined(_CPU_X86_64_)
-    c.Rbx = mctx->Rbx;
-    c.Rsp = mctx->Rsp;
-    c.Rbp = mctx->Rbp;
-    c.Rsi = mctx->Rsi;
-    c.Rdi = mctx->Rdi;
-    c.R12 = mctx->R12;
-    c.R13 = mctx->R13;
-    c.R14 = mctx->R14;
-    c.R15 = mctx->R15;
-    c.Rip = mctx->Rip;
-    memcpy(&c.Xmm6, &mctx->Xmm6, 10 * sizeof(mctx->Xmm6)); // Xmm6-Xmm15
-#else
-    c.Eip = mctx->Eip;
-    c.Esp = mctx->Esp;
-    c.Ebp = mctx->Ebp;
-#endif
-    context = &c;
-#elif defined(JL_HAVE_UNW_CONTEXT)
-    context = &t->ctx.ctx;
-#elif defined(JL_HAVE_UCONTEXT)
-    context = jl_to_bt_context(&t->ctx.ctx);
-#elif defined(JL_HAVE_ASM)
-    bt_context_t c;
-    memset(&c, 0, sizeof(c));
- #if defined(_OS_LINUX_) && defined(__GLIBC__)
-    __jmp_buf *mctx = &t->ctx.ctx.uc_mcontext->__jmpbuf;
-    mcontext_t *mc = &c.uc_mcontext;
-  #if defined(_CPU_X86_)
+#if (defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_))
+    // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
+    return 0;
+#elif defined(_OS_WINDOWS_)
+    _JUMP_BUFFER* _ctx = (_JUMP_BUFFER*)mctx;
+    #if defined(_CPU_X86_64_)
+    c->Rbx = _ctx->Rbx;
+    c->Rsp = _ctx->Rsp;
+    c->Rbp = _ctx->Rbp;
+    c->Rsi = _ctx->Rsi;
+    c->Rdi = _ctx->Rdi;
+    c->R12 = _ctx->R12;
+    c->R13 = _ctx->R13;
+    c->R14 = _ctx->R14;
+    c->R15 = _ctx->R15;
+    c->Rip = _ctx->Rip;
+    memcpy(&c->Xmm6, &_ctx->Xmm6, 10 * sizeof(_ctx->Xmm6)); // Xmm6-Xmm15
+    // c->MxCsr = _ctx->MxCsr;
+    // c->FloatSave.ControlWord = _ctx->FpCsr;
+    // c->SegGS[0] = _ctx->Frame;
+    c->Rax = 1;
+    c->Rsp += sizeof(void*);
+    assert(c->Rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_X86_)
+    c->Ebp = _ctx->Ebp;
+    c->Ebx = _ctx->Ebx;
+    c->Edi = _ctx->Edi;
+    c->Esi = _ctx->Esi;
+    c->Esp = _ctx->Esp;
+    c->Eip = _ctx->Eip;
+    // c->SegFS[0] = _ctx->Registration;
+    // c->FloatSave.ControlWord = _ctx->FpCsr;
+    c->Eax = 1;
+    c->Esp += sizeof(void*);
+    assert(c->Esp % 16 == 0);
+    return 1;
+    #else
+    #error Windows is currently only supported on x86 and x86_64
+    #endif
+#elif defined(_OS_LINUX_) && defined(__GLIBC__)
+    __jmp_buf *_ctx = &mctx->__jmpbuf;
+    #if defined(_CPU_AARCH64_)
+    // Only on aarch64-linux libunwind uses a different struct than system's one:
+    // <https://github.com/libunwind/libunwind/blob/e63e024b72d35d4404018fde1a546fde976da5c5/include/libunwind-aarch64.h#L193-L205>.
+    struct unw_sigcontext *mc = &c->uc_mcontext;
+    #else
+    mcontext_t *mc = &c->uc_mcontext;
+    #endif
+    #if defined(_CPU_X86_)
     // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
     // https://github.com/bminor/glibc/blame/master/sysdeps/i386/jmpbuf-offsets.h
     // https://github.com/bminor/musl/blame/master/src/setjmp/i386/longjmp.s
-    mc->gregs[REG_EBX] = (*mctx)[0];
-    mc->gregs[REG_ESI] = (*mctx)[1];
-    mc->gregs[REG_EDI] = (*mctx)[2];
-    mc->gregs[REG_EBP] = (*mctx)[3];
-    mc->gregs[REG_ESP] = (*mctx)[4];
-    mc->gregs[REG_EIP] = (*mctx)[5];
+    mc->gregs[REG_EBX] = (*_ctx)[0];
+    mc->gregs[REG_ESI] = (*_ctx)[1];
+    mc->gregs[REG_EDI] = (*_ctx)[2];
+    mc->gregs[REG_EBP] = (*_ctx)[3];
+    mc->gregs[REG_ESP] = (*_ctx)[4];
+    mc->gregs[REG_EIP] = (*_ctx)[5];
     // ifdef PTR_DEMANGLE ?
     mc->gregs[REG_ESP] = ptr_demangle(mc->gregs[REG_ESP]);
     mc->gregs[REG_EIP] = ptr_demangle(mc->gregs[REG_EIP]);
-    context = &c;
-  #elif defined(_CPU_X86_64_)
+    mc->gregs[REG_EAX] = 1;
+    assert(mc->gregs[REG_ESP] % 16 == 0);
+    return 1;
+    #elif defined(_CPU_X86_64_)
     // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/__longjmp.S
     // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/jmpbuf-offsets.h
     // https://github.com/bminor/musl/blame/master/src/setjmp/x86_64/setjmp.s
-    mc->gregs[REG_RBX] = (*mctx)[0];
-    mc->gregs[REG_RBP] = (*mctx)[1];
-    mc->gregs[REG_R12] = (*mctx)[2];
-    mc->gregs[REG_R13] = (*mctx)[3];
-    mc->gregs[REG_R14] = (*mctx)[4];
-    mc->gregs[REG_R15] = (*mctx)[5];
-    mc->gregs[REG_RSP] = (*mctx)[6];
-    mc->gregs[REG_RIP] = (*mctx)[7];
+    mc->gregs[REG_RBX] = (*_ctx)[0];
+    mc->gregs[REG_RBP] = (*_ctx)[1];
+    mc->gregs[REG_R12] = (*_ctx)[2];
+    mc->gregs[REG_R13] = (*_ctx)[3];
+    mc->gregs[REG_R14] = (*_ctx)[4];
+    mc->gregs[REG_R15] = (*_ctx)[5];
+    mc->gregs[REG_RSP] = (*_ctx)[6];
+    mc->gregs[REG_RIP] = (*_ctx)[7];
     // ifdef PTR_DEMANGLE ?
     mc->gregs[REG_RBP] = ptr_demangle(mc->gregs[REG_RBP]);
     mc->gregs[REG_RSP] = ptr_demangle(mc->gregs[REG_RSP]);
     mc->gregs[REG_RIP] = ptr_demangle(mc->gregs[REG_RIP]);
-    context = &c;
-  #elif defined(_CPU_ARM_)
+    mc->gregs[REG_RAX] = 1;
+    assert(mc->gregs[REG_RSP] % 16 == 0);
+    return 1;
+    #elif defined(_CPU_ARM_)
     // https://github.com/bminor/glibc/blame/master/sysdeps/arm/__longjmp.S
     // https://github.com/bminor/glibc/blame/master/sysdeps/arm/include/bits/setjmp.h
     // https://github.com/bminor/musl/blame/master/src/setjmp/arm/longjmp.S
-    mc->arm_sp = (*mctx)[0];
-    mc->arm_lr = (*mctx)[1];
-    mc->arm_r4 = (*mctx)[2]; // aka v1
-    mc->arm_r5 = (*mctx)[3]; // aka v2
-    mc->arm_r6 = (*mctx)[4]; // aka v3
-    mc->arm_r7 = (*mctx)[5]; // aka v4
-    mc->arm_r8 = (*mctx)[6]; // aka v5
-    mc->arm_r9 = (*mctx)[7]; // aka v6 aka sb
-    mc->arm_r10 = (*mctx)[8]; // aka v7 aka sl
-    mc->arm_fp = (*mctx)[10]; // aka v8 aka r11
+    mc->arm_sp = (*_ctx)[0];
+    mc->arm_lr = (*_ctx)[1];
+    mc->arm_r4 = (*_ctx)[2]; // aka v1
+    mc->arm_r5 = (*_ctx)[3]; // aka v2
+    mc->arm_r6 = (*_ctx)[4]; // aka v3
+    mc->arm_r7 = (*_ctx)[5]; // aka v4
+    mc->arm_r8 = (*_ctx)[6]; // aka v5
+    mc->arm_r9 = (*_ctx)[7]; // aka v6 aka sb
+    mc->arm_r10 = (*_ctx)[8]; // aka v7 aka sl
+    mc->arm_fp = (*_ctx)[10]; // aka v8 aka r11
     // ifdef PTR_DEMANGLE ?
     mc->arm_sp = ptr_demangle(mc->arm_sp);
     mc->arm_lr = ptr_demangle(mc->arm_lr);
     mc->arm_pc = mc->arm_lr;
-    context = &c;
-  #elif defined(_CPU_AARCH64_)
+    mc->arm_r0 = 1;
+    assert(mc->arm_sp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
     // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/__longjmp.S
     // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/jmpbuf-offsets.h
     // https://github.com/bminor/musl/blame/master/src/setjmp/aarch64/longjmp.s
     // https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/aarch64/unwind_i.h#L62
     unw_fpsimd_context_t *mcfp = (unw_fpsimd_context_t*)&mc->__reserved;
-    mc->regs[19] = (*mctx)[0];
-    mc->regs[20] = (*mctx)[1];
-    mc->regs[21] = (*mctx)[2];
-    mc->regs[22] = (*mctx)[3];
-    mc->regs[23] = (*mctx)[4];
-    mc->regs[24] = (*mctx)[5];
-    mc->regs[25] = (*mctx)[6];
-    mc->regs[26] = (*mctx)[7];
-    mc->regs[27] = (*mctx)[8];
-    mc->regs[28] = (*mctx)[9];
-    mc->regs[29] = (*mctx)[10]; // aka fp
-    mc->regs[30] = (*mctx)[11]; // aka lr
-    // Yes, they did skip 12 why writing the code originally; and, no, I do not know why.
-    mc->sp = (*mctx)[13];
-    mcfp->vregs[7] = (*mctx)[14]; // aka d8
-    mcfp->vregs[8] = (*mctx)[15]; // aka d9
-    mcfp->vregs[9] = (*mctx)[16]; // aka d10
-    mcfp->vregs[10] = (*mctx)[17]; // aka d11
-    mcfp->vregs[11] = (*mctx)[18]; // aka d12
-    mcfp->vregs[12] = (*mctx)[19]; // aka d13
-    mcfp->vregs[13] = (*mctx)[20]; // aka d14
-    mcfp->vregs[14] = (*mctx)[21]; // aka d15
+    mc->regs[19] = (*_ctx)[0];
+    mc->regs[20] = (*_ctx)[1];
+    mc->regs[21] = (*_ctx)[2];
+    mc->regs[22] = (*_ctx)[3];
+    mc->regs[23] = (*_ctx)[4];
+    mc->regs[24] = (*_ctx)[5];
+    mc->regs[25] = (*_ctx)[6];
+    mc->regs[26] = (*_ctx)[7];
+    mc->regs[27] = (*_ctx)[8];
+    mc->regs[28] = (*_ctx)[9];
+    mc->regs[29] = (*_ctx)[10]; // aka fp
+    mc->regs[30] = (*_ctx)[11]; // aka lr
+    // Yes, they did skip 12 when writing the code originally; and, no, I do not know why.
+    mc->sp = (*_ctx)[13];
+    mcfp->vregs[7] = (*_ctx)[14]; // aka d8
+    mcfp->vregs[8] = (*_ctx)[15]; // aka d9
+    mcfp->vregs[9] = (*_ctx)[16]; // aka d10
+    mcfp->vregs[10] = (*_ctx)[17]; // aka d11
+    mcfp->vregs[11] = (*_ctx)[18]; // aka d12
+    mcfp->vregs[12] = (*_ctx)[19]; // aka d13
+    mcfp->vregs[13] = (*_ctx)[20]; // aka d14
+    mcfp->vregs[14] = (*_ctx)[21]; // aka d15
     // ifdef PTR_DEMANGLE ?
     mc->sp = ptr_demangle(mc->sp);
     mc->regs[30] = ptr_demangle(mc->regs[30]);
     mc->pc = mc->regs[30];
-    context = &c;
-  #else
-   #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown linux")
-   (void)mc;
-   (void)c;
-   (void)mctx;
-  #endif
- #elif defined(_OS_DARWIN_)
-    sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
-  #if defined(_CPU_X86_64_)
+    mc->regs[0] = 1;
+    assert(mc->sp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_RISCV64_)
+    // https://github.com/bminor/glibc/blob/master/sysdeps/riscv/bits/setjmp.h
+    // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_riscv
+    mc->__gregs[1] = (*_ctx)->__pc;        // ra
+    mc->__gregs[8] = (*_ctx)->__regs[0];   // s0
+    mc->__gregs[9] = (*_ctx)->__regs[1];   // s1
+    mc->__gregs[18] = (*_ctx)->__regs[2];  // s2
+    mc->__gregs[19] = (*_ctx)->__regs[3];  // s3
+    mc->__gregs[20] = (*_ctx)->__regs[4];  // s4
+    mc->__gregs[21] = (*_ctx)->__regs[5];  // s5
+    mc->__gregs[22] = (*_ctx)->__regs[6];  // s6
+    mc->__gregs[23] = (*_ctx)->__regs[7];  // s7
+    mc->__gregs[24] = (*_ctx)->__regs[8];  // s8
+    mc->__gregs[25] = (*_ctx)->__regs[9];  // s9
+    mc->__gregs[26] = (*_ctx)->__regs[10]; // s10
+    mc->__gregs[27] = (*_ctx)->__regs[11]; // s11
+    mc->__gregs[2] = (*_ctx)->__sp;        // sp
+    #ifndef __riscv_float_abi_soft
+    mc->__fpregs.__d.__f[8] = (unsigned long long) (*_ctx)->__fpregs[0];   // fs0
+    mc->__fpregs.__d.__f[9] = (unsigned long long) (*_ctx)->__fpregs[1];   // fs1
+    mc->__fpregs.__d.__f[18] = (unsigned long long) (*_ctx)->__fpregs[2];  // fs2
+    mc->__fpregs.__d.__f[19] = (unsigned long long) (*_ctx)->__fpregs[3];  // fs3
+    mc->__fpregs.__d.__f[20] = (unsigned long long) (*_ctx)->__fpregs[4];  // fs4
+    mc->__fpregs.__d.__f[21] = (unsigned long long) (*_ctx)->__fpregs[5];  // fs5
+    mc->__fpregs.__d.__f[22] = (unsigned long long) (*_ctx)->__fpregs[6];  // fs6
+    mc->__fpregs.__d.__f[23] = (unsigned long long) (*_ctx)->__fpregs[7];  // fs7
+    mc->__fpregs.__d.__f[24] = (unsigned long long) (*_ctx)->__fpregs[8];  // fs8
+    mc->__fpregs.__d.__f[25] = (unsigned long long) (*_ctx)->__fpregs[9];  // fs9
+    mc->__fpregs.__d.__f[26] = (unsigned long long) (*_ctx)->__fpregs[10]; // fs10
+    mc->__fpregs.__d.__f[27] = (unsigned long long) (*_ctx)->__fpregs[11]; // fs11
+    #endif
+    // ifdef PTR_DEMANGLE ?
+    mc->__gregs[REG_SP] = ptr_demangle(mc->__gregs[REG_SP]);
+    mc->__gregs[REG_RA] = ptr_demangle(mc->__gregs[REG_RA]);
+    mc->__gregs[REG_PC] = mc->__gregs[REG_RA];
+    mc->__gregs[REG_A0] = 1;
+    assert(mc->__gregs[REG_SP] % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown linux")
+    (void)mc;
+    (void)mctx;
+    return 0;
+    #endif
+#elif defined(_OS_DARWIN_)
+    #if defined(_CPU_X86_64_)
     // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
-    x86_thread_state64_t *mc = (x86_thread_state64_t*)&c;
+    x86_thread_state64_t *mc = (x86_thread_state64_t*)c;
     mc->__rbx = ((uint64_t*)mctx)[0];
     mc->__rbp = ((uint64_t*)mctx)[1];
     mc->__rsp = ((uint64_t*)mctx)[2];
@@ -1005,18 +1255,20 @@ void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
     mc->__r14 = ((uint64_t*)mctx)[5];
     mc->__r15 = ((uint64_t*)mctx)[6];
     mc->__rip = ((uint64_t*)mctx)[7];
-    // added in libsystem_plaform 177.200.16 (macOS Mojave 10.14.3)
+    // added in libsystem_platform 177.200.16 (macOS Mojave 10.14.3)
     // prior to that _os_ptr_munge_token was (hopefully) typically 0,
     // so x ^ 0 == x and this is a no-op
     mc->__rbp = _OS_PTR_UNMUNGE(mc->__rbp);
     mc->__rsp = _OS_PTR_UNMUNGE(mc->__rsp);
     mc->__rip = _OS_PTR_UNMUNGE(mc->__rip);
-    context = &c;
-  #elif defined(_CPU_AARCH64_)
+    mc->__rax = 1;
+    assert(mc->__rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
     // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/arm64/setjmp.s
     // https://github.com/apple/darwin-xnu/blob/main/osfmk/mach/arm/_structs.h
     // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_arm64
-    arm_thread_state64_t *mc = (arm_thread_state64_t*)&c;
+    arm_thread_state64_t *mc = (arm_thread_state64_t*)c;
     mc->__x[19] = ((uint64_t*)mctx)[0];
     mc->__x[20] = ((uint64_t*)mctx)[1];
     mc->__x[21] = ((uint64_t*)mctx)[2];
@@ -1047,15 +1299,17 @@ void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
     // libunwind is broken for signed-pointers, but perhaps best not to leave the signed pointer lying around either
     mc->__pc = ptrauth_strip(mc->__lr, 0);
     mc->__pad = 0; // aka __ra_sign_state = not signed
-    context = &c;
-  #else
-   #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown darwin")
+    mc->__x[0] = 1;
+    assert(mc->__sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown darwin")
     (void)mctx;
-    (void)c;
-  #endif
- #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
-    sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
-    mcontext_t *mc = &c.uc_mcontext;
+    return 0;
+#endif
+#elif defined(_OS_FREEBSD_)
+    mcontext_t *mc = &c->uc_mcontext;
+    #if defined(_CPU_X86_64_)
     // https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S
     mc->mc_rip = ((long*)mctx)[0];
     mc->mc_rbx = ((long*)mctx)[1];
@@ -1065,22 +1319,129 @@ void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
     mc->mc_r13 = ((long*)mctx)[5];
     mc->mc_r14 = ((long*)mctx)[6];
     mc->mc_r15 = ((long*)mctx)[7];
+    mc->mc_rax = 1;
+    mc->mc_rsp += sizeof(void*);
+    assert(mc->mc_rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
+    mc->mc_gpregs.gp_x[19] = ((long*)mctx)[0];
+    mc->mc_gpregs.gp_x[20] = ((long*)mctx)[1];
+    mc->mc_gpregs.gp_x[21] = ((long*)mctx)[2];
+    mc->mc_gpregs.gp_x[22] = ((long*)mctx)[3];
+    mc->mc_gpregs.gp_x[23] = ((long*)mctx)[4];
+    mc->mc_gpregs.gp_x[24] = ((long*)mctx)[5];
+    mc->mc_gpregs.gp_x[25] = ((long*)mctx)[6];
+    mc->mc_gpregs.gp_x[26] = ((long*)mctx)[7];
+    mc->mc_gpregs.gp_x[27] = ((long*)mctx)[8];
+    mc->mc_gpregs.gp_x[28] = ((long*)mctx)[9];
+    mc->mc_gpregs.gp_x[29] = ((long*)mctx)[10];
+    mc->mc_gpregs.gp_lr = ((long*)mctx)[11];
+    mc->mc_gpregs.gp_sp = ((long*)mctx)[12];
+    mc->mc_fpregs.fp_q[7] = ((long*)mctx)[13];
+    mc->mc_fpregs.fp_q[8] = ((long*)mctx)[14];
+    mc->mc_fpregs.fp_q[9] = ((long*)mctx)[15];
+    mc->mc_fpregs.fp_q[10] = ((long*)mctx)[16];
+    mc->mc_fpregs.fp_q[11] = ((long*)mctx)[17];
+    mc->mc_fpregs.fp_q[12] = ((long*)mctx)[18];
+    mc->mc_fpregs.fp_q[13] = ((long*)mctx)[19];
+    mc->mc_fpregs.fp_q[14] = ((long*)mctx)[20];
+    mc->mc_gpregs.gp_x[0] = 1;
+    assert(mc->mc_gpregs.gp_sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown freebsd")
+    (void)mctx;
+    return 0;
+    #endif
+#else
+return 0;
+#endif
+}
+
+JL_DLLEXPORT size_t jl_try_record_thread_backtrace(jl_ptls_t ptls2, jl_bt_element_t *bt_data, size_t max_bt_size) JL_NOTSAFEPOINT
+{
+    int16_t tid = ptls2->tid;
+    jl_task_t *t = NULL;
+    bt_context_t *context = NULL;
+    bt_context_t c;
+    if (!jl_thread_suspend(tid, &c)) {
+        return 0;
+    }
+    // thread is stopped, safe to read the task it was running before we stopped it
+    t = jl_atomic_load_relaxed(&ptls2->current_task);
     context = &c;
- #else
-  #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system")
-  (void)c;
- #endif
-#elif defined(JL_HAVE_ASYNCIFY)
- #pragma message("jl_rec_backtrace not defined for ASYNCIFY")
-#elif defined(JL_HAVE_SIGALTSTACK)
- #pragma message("jl_rec_backtrace not defined for SIGALTSTACK")
+    size_t bt_size = rec_backtrace_ctx(bt_data, max_bt_size, context, ptls2->previous_task ? NULL : t->gcstack);
+    jl_thread_resume(tid);
+    return bt_size;
+}
+
+JL_DLLEXPORT jl_record_backtrace_result_t jl_record_backtrace(jl_task_t *t, jl_bt_element_t *bt_data, size_t max_bt_size, int all_tasks_profiler) JL_NOTSAFEPOINT
+{
+    int16_t tid = INT16_MAX;
+    jl_record_backtrace_result_t result = {0, tid};
+    jl_task_t *ct = NULL;
+    jl_ptls_t ptls = NULL;
+    if (!all_tasks_profiler) {
+        ct = jl_current_task;
+        ptls = ct->ptls;
+        ptls->bt_size = 0;
+        tid = ptls->tid;
+    }
+    if (t == ct) {
+        result.bt_size = rec_backtrace(bt_data, max_bt_size, 0);
+        result.tid = tid;
+        return result;
+    }
+    bt_context_t *context = NULL;
+    bt_context_t c;
+    int16_t old;
+    for (old = -1; !jl_atomic_cmpswap(&t->tid, &old, tid) && old != tid; old = -1) {
+        // if this task is already running somewhere, we need to stop the thread it is running on and query its state
+        if (!jl_thread_suspend(old, &c)) {
+            if (jl_atomic_load_relaxed(&t->tid) != old)
+                continue;
+            return result;
+        }
+        if (jl_atomic_load_relaxed(&t->tid) == old) {
+            jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[old];
+            if (ptls2->previous_task == t || // we might print the wrong stack here, since we can't know whether we executed the swapcontext yet or not, but it at least avoids trying to access the state inside uc_mcontext which might not be set yet
+                (ptls2->previous_task == NULL && jl_atomic_load_relaxed(&ptls2->current_task) == t)) { // this case should be always accurate
+                // use the thread context for the unwind state
+                context = &c;
+            }
+            break;
+        }
+        // got the wrong thread stopped, try again
+        jl_thread_resume(old);
+    }
+    if (context == NULL && (!t->ctx.copy_stack && t->ctx.started && t->ctx.ctx != NULL)) {
+        // need to read the context from the task stored state
+        jl_jmp_buf *mctx = &t->ctx.ctx->uc_mcontext;
+#if defined(JL_TASK_SWITCH_WINDOWS)
+        memset(&c, 0, sizeof(c));
+        if (jl_simulate_longjmp(*mctx, &c))
+            context = &c;
+#elif defined(JL_TASK_SWITCH_LIBUNWIND)
+        context = t->ctx.ctx;
+#elif defined(JL_TASK_SWITCH_ASM)
+        memset(&c, 0, sizeof(c));
+        if (jl_simulate_longjmp(*mctx, &c))
+            context = &c;
 #else
- #pragma message("jl_rec_backtrace not defined for unknown task system")
+     #pragma message("jl_record_backtrace not defined for unknown task system")
 #endif
-    if (context)
-        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context, t->gcstack);
+    }
+    size_t bt_size = 0;
+    if (context) {
+        bt_size = rec_backtrace_ctx(bt_data, max_bt_size, context, all_tasks_profiler ? NULL : t->gcstack);
+    }
     if (old == -1)
         jl_atomic_store_relaxed(&t->tid, old);
+    else if (old != tid)
+        jl_thread_resume(old);
+    result.bt_size = bt_size;
+    result.tid = old;
+    return result;
 }
 
 //--------------------------------------------------
@@ -1088,83 +1449,122 @@ void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT void jl_gdblookup(void* ip)
 {
-    jl_print_native_codeloc((uintptr_t)ip);
+    jl_fprint_native_codeloc(ios_safe_stderr, (uintptr_t)ip);
 }
 
 // Print backtrace for current exception in catch block
-JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_fprint_backtrace(ios_t *s) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     if (ct->ptls == NULL)
         return;
-    jl_excstack_t *s = ct->excstack;
-    if (!s)
+    jl_excstack_t *stack = ct->excstack;
+    if (!stack)
         return;
-    size_t i, bt_size = jl_excstack_bt_size(s, s->top);
-    jl_bt_element_t *bt_data = jl_excstack_bt_data(s, s->top);
+    size_t i, bt_size = jl_excstack_bt_size(stack, stack->top);
+    jl_bt_element_t *bt_data = jl_excstack_bt_data(stack, stack->top);
     for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
-        jl_print_bt_entry_codeloc(bt_data + i);
+        jl_fprint_bt_entry_codeloc(s, bt_data + i);
     }
 }
 
-// Print backtrace for specified task
-JL_DLLEXPORT void jlbacktracet(jl_task_t *t) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    jl_rec_backtrace(t);
-    size_t i, bt_size = ptls->bt_size;
-    jl_bt_element_t *bt_data = ptls->bt_data;
+    jl_fprint_backtrace(ios_safe_stderr);
+}
+
+JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
+{
+    jl_fprint_backtrace(ios_safe_stderr);
+}
+
+// Print backtrace for specified task to `s`
+JL_DLLEXPORT void jl_fprint_backtracet(ios_t *s, jl_task_t *t) JL_NOTSAFEPOINT
+{
+    jl_bt_element_t *bt_data;
+    jl_task_t *ct = jl_get_current_task();
+    size_t max_bt_size;
+    if (ct && ct->ptls != NULL) {
+        jl_ptls_t ptls = ct->ptls;
+        ptls->bt_size = 0;
+        bt_data = ptls->bt_data;
+        max_bt_size = JL_MAX_BT_SIZE;
+    } else {
+        max_bt_size = 1024; //8kb of stack should be safe
+        bt_data = (jl_bt_element_t *)alloca(max_bt_size * sizeof(jl_bt_element_t));
+    }
+    jl_record_backtrace_result_t r = jl_record_backtrace(t, bt_data, max_bt_size, 0);
+    size_t bt_size = r.bt_size;
+    size_t i;
     for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
-        jl_print_bt_entry_codeloc(bt_data + i);
+        jl_fprint_bt_entry_codeloc(s, bt_data + i);
     }
+    if (bt_size == 0)
+        jl_safe_fprintf(s, "      no backtrace recorded\n");
 }
 
-JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jlbacktracet(jl_task_t *t) JL_NOTSAFEPOINT
 {
-    jlbacktrace();
+    jl_fprint_backtracet(ios_safe_stderr, t);
 }
 
-// Print backtraces for all live tasks, for all threads.
-// WARNING: this is dangerous and can crash if used outside of gdb, if
-// all of Julia's threads are not stopped!
-JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
+// Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr
+JL_DLLEXPORT void jl_fprint_task_backtraces(ios_t *s, int show_done) JL_NOTSAFEPOINT
 {
     size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
     jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     for (size_t i = 0; i < nthreads; i++) {
         jl_ptls_t ptls2 = allstates[i];
-        arraylist_t *live_tasks = &ptls2->heap.live_tasks;
-        size_t n = live_tasks->len;
-        jl_safe_printf("==== Thread %d created %zu live tasks\n",
-                ptls2->tid + 1, n + 1);
-        jl_safe_printf("     ---- Root task (%p)\n", ptls2->root_task);
-        jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
-                ptls2->root_task->sticky, ptls2->root_task->started,
-                jl_atomic_load_relaxed(&ptls2->root_task->_state),
-                jl_atomic_load_relaxed(&ptls2->root_task->tid) + 1);
-        jlbacktracet(ptls2->root_task);
-
-        void **lst = live_tasks->items;
-        for (size_t j = 0; j < live_tasks->len; j++) {
-            jl_task_t *t = (jl_task_t *)lst[j];
+        if (gc_is_collector_thread(i)) {
+            jl_safe_fprintf(s, "==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1);
+            continue;
+        }
+        if (ptls2 == NULL) {
+            continue;
+        }
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        int t_state = JL_TASK_STATE_DONE;
+        jl_task_t *t = ptls2->root_task;
+        if (t != NULL)
+            t_state = jl_atomic_load_relaxed(&t->_state);
+        jl_safe_fprintf(s, "==== Thread %d created %zu live tasks\n",
+                ptls2->tid + 1, n + (t_state != JL_TASK_STATE_DONE));
+        if (show_done || t_state != JL_TASK_STATE_DONE) {
+            jl_safe_fprintf(s, "     ---- Root task (%p)\n", ptls2->root_task);
+            if (t != NULL) {
+                jl_safe_fprintf(s, "          (sticky: %d, started: %d, state: %d, tid: %d)\n",
+                        t->sticky, t->ctx.started, t_state,
+                        jl_atomic_load_relaxed(&t->tid) + 1);
+                jl_fprint_backtracet(s, t);
+            }
+            jl_safe_fprintf(s, "     ---- End root task\n");
+        }
+
+        for (size_t j = 0; j < n; j++) {
+            jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, j);
+            if (t == NULL)
+                continue;
             int t_state = jl_atomic_load_relaxed(&t->_state);
-            if (!show_done && t_state == JL_TASK_STATE_DONE) {
+            if (!show_done && t_state == JL_TASK_STATE_DONE)
                 continue;
-            }
-            jl_safe_printf("     ---- Task %zu (%p)\n", j + 1, t);
-            jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
-                    t->sticky, t->started, t_state,
+            jl_safe_fprintf(s, "     ---- Task %zu (%p)\n", j + 1, t);
+            // n.b. this information might not be consistent with the stack printing after it, since it could start running or change tid, etc.
+            jl_safe_fprintf(s, "          (sticky: %d, started: %d, state: %d, tid: %d)\n",
+                    t->sticky, t->ctx.started, t_state,
                     jl_atomic_load_relaxed(&t->tid) + 1);
-            if (t->stkbuf != NULL)
-                jlbacktracet(t);
-            else
-                jl_safe_printf("      no stack\n");
-            jl_safe_printf("     ---- End task %zu\n", j + 1);
+            jl_fprint_backtracet(ios_safe_stderr, t);
+            jl_safe_fprintf(s, "     ---- End task %zu\n", j + 1);
         }
-        jl_safe_printf("==== End thread %d\n", ptls2->tid + 1);
+        jl_safe_fprintf(s, "==== End thread %d\n", ptls2->tid + 1);
     }
-    jl_safe_printf("==== Done\n");
+    jl_safe_fprintf(s, "==== Done\n");
+}
+
+// Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr
+JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
+{
+    jl_fprint_task_backtraces(ios_safe_stderr, show_done);
 }
 
 #ifdef __cplusplus
diff --git a/src/staticdata.c b/src/staticdata.c
index df080bc68c88f..ee8fa97b7be36 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -44,8 +44,6 @@
 
   - step 3 combines the different sections (fields of `jl_serializer_state`) into one
 
-  - step 4 writes the values of the hard-coded tagged items and `ccallable_list`
-
 Much of the "real work" during deserialization is done by `get_item_for_reloc`. But a few items require specific
 attention:
 - uniquing: during deserialization, the target item (an "external" type or MethodInstance) must be checked against
@@ -71,10 +69,11 @@ External links:
 */
 #include <stdlib.h>
 #include <string.h>
-#include <stdbool.h>
 #include <stdio.h> // printf
 #include <inttypes.h> // PRIxPTR
 
+#include <zstd.h>
+
 #include "julia.h"
 #include "julia_internal.h"
 #include "julia_gcext.h"
@@ -82,13 +81,36 @@ External links:
 #include "processor.h"
 #include "serialize.h"
 
-#ifndef _OS_WINDOWS_
+#ifdef _OS_WINDOWS_
+#include <memoryapi.h>
+#else
 #include <dlfcn.h>
+#include <sys/mman.h>
 #endif
 
 #include "valgrind.h"
 #include "julia_assert.h"
 
+static const size_t WORLD_AGE_REVALIDATION_SENTINEL = 0x1;
+JL_DLLEXPORT size_t jl_require_world = ~(size_t)0;
+JL_DLLEXPORT _Atomic(size_t) jl_first_image_replacement_world = ~(size_t)0;
+
+// This structure is used to store hash tables for the memoization
+// of queries in staticdata.c (currently only `type_in_worklist`).
+typedef struct {
+    htable_t type_in_worklist;
+} jl_query_cache;
+
+static void init_query_cache(jl_query_cache *cache) JL_NOTSAFEPOINT
+{
+    htable_new(&cache->type_in_worklist, 0);
+}
+
+static void destroy_query_cache(jl_query_cache *cache) JL_NOTSAFEPOINT
+{
+    htable_free(&cache->type_in_worklist);
+}
+
 #include "staticdata_utils.c"
 #include "precompile_utils.c"
 
@@ -99,189 +121,23 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-#define NUM_TAGS    158
+#define NUM_TAGS    6
 
-// An array of references that need to be restored from the sysimg
-// This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
-jl_value_t **const*const get_tags(void) {
+// An array of special references that need to be restored from the sysimg
+static void get_tags(jl_value_t **tags[NUM_TAGS])
+{
     // Make sure to keep an extra slot at the end to sentinel length
-    static void * _tags[NUM_TAGS] = {NULL};
-
-    // Lazyily-initialize this list
-    if (_tags[0] == NULL) {
-        unsigned int i = 0;
-#define INSERT_TAG(sym) _tags[i++] = &(sym)
-        // builtin types
-        INSERT_TAG(jl_any_type);
-        INSERT_TAG(jl_symbol_type);
-        INSERT_TAG(jl_ssavalue_type);
-        INSERT_TAG(jl_datatype_type);
-        INSERT_TAG(jl_slotnumber_type);
-        INSERT_TAG(jl_simplevector_type);
-        INSERT_TAG(jl_array_type);
-        INSERT_TAG(jl_expr_type);
-        INSERT_TAG(jl_binding_type);
-        INSERT_TAG(jl_globalref_type);
-        INSERT_TAG(jl_string_type);
-        INSERT_TAG(jl_module_type);
-        INSERT_TAG(jl_tvar_type);
-        INSERT_TAG(jl_method_instance_type);
-        INSERT_TAG(jl_method_type);
-        INSERT_TAG(jl_code_instance_type);
-        INSERT_TAG(jl_linenumbernode_type);
-        INSERT_TAG(jl_lineinfonode_type);
-        INSERT_TAG(jl_gotonode_type);
-        INSERT_TAG(jl_quotenode_type);
-        INSERT_TAG(jl_gotoifnot_type);
-        INSERT_TAG(jl_argument_type);
-        INSERT_TAG(jl_returnnode_type);
-        INSERT_TAG(jl_const_type);
-        INSERT_TAG(jl_partial_struct_type);
-        INSERT_TAG(jl_partial_opaque_type);
-        INSERT_TAG(jl_interconditional_type);
-        INSERT_TAG(jl_method_match_type);
-        INSERT_TAG(jl_pinode_type);
-        INSERT_TAG(jl_phinode_type);
-        INSERT_TAG(jl_phicnode_type);
-        INSERT_TAG(jl_upsilonnode_type);
-        INSERT_TAG(jl_type_type);
-        INSERT_TAG(jl_bottom_type);
-        INSERT_TAG(jl_ref_type);
-        INSERT_TAG(jl_pointer_type);
-        INSERT_TAG(jl_llvmpointer_type);
-        INSERT_TAG(jl_vararg_type);
-        INSERT_TAG(jl_abstractarray_type);
-        INSERT_TAG(jl_densearray_type);
-        INSERT_TAG(jl_nothing_type);
-        INSERT_TAG(jl_function_type);
-        INSERT_TAG(jl_typeofbottom_type);
-        INSERT_TAG(jl_unionall_type);
-        INSERT_TAG(jl_typename_type);
-        INSERT_TAG(jl_builtin_type);
-        INSERT_TAG(jl_code_info_type);
-        INSERT_TAG(jl_opaque_closure_type);
-        INSERT_TAG(jl_task_type);
-        INSERT_TAG(jl_uniontype_type);
-        INSERT_TAG(jl_abstractstring_type);
-        INSERT_TAG(jl_array_any_type);
-        INSERT_TAG(jl_intrinsic_type);
-        INSERT_TAG(jl_methtable_type);
-        INSERT_TAG(jl_typemap_level_type);
-        INSERT_TAG(jl_typemap_entry_type);
-        INSERT_TAG(jl_voidpointer_type);
-        INSERT_TAG(jl_uint8pointer_type);
-        INSERT_TAG(jl_newvarnode_type);
-        INSERT_TAG(jl_anytuple_type_type);
-        INSERT_TAG(jl_anytuple_type);
-        INSERT_TAG(jl_namedtuple_type);
-        INSERT_TAG(jl_emptytuple_type);
-        INSERT_TAG(jl_array_symbol_type);
-        INSERT_TAG(jl_array_uint8_type);
-        INSERT_TAG(jl_array_int32_type);
-        INSERT_TAG(jl_array_uint64_type);
-        INSERT_TAG(jl_int32_type);
-        INSERT_TAG(jl_int64_type);
-        INSERT_TAG(jl_bool_type);
-        INSERT_TAG(jl_uint8_type);
-        INSERT_TAG(jl_uint16_type);
-        INSERT_TAG(jl_uint32_type);
-        INSERT_TAG(jl_uint64_type);
-        INSERT_TAG(jl_char_type);
-        INSERT_TAG(jl_weakref_type);
-        INSERT_TAG(jl_int8_type);
-        INSERT_TAG(jl_int16_type);
-        INSERT_TAG(jl_float16_type);
-        INSERT_TAG(jl_float32_type);
-        INSERT_TAG(jl_float64_type);
-        INSERT_TAG(jl_floatingpoint_type);
-        INSERT_TAG(jl_number_type);
-        INSERT_TAG(jl_signed_type);
-        INSERT_TAG(jl_pair_type);
-
-        // special typenames
-        INSERT_TAG(jl_tuple_typename);
-        INSERT_TAG(jl_pointer_typename);
-        INSERT_TAG(jl_llvmpointer_typename);
-        INSERT_TAG(jl_array_typename);
-        INSERT_TAG(jl_type_typename);
-        INSERT_TAG(jl_namedtuple_typename);
-        INSERT_TAG(jl_vecelement_typename);
-        INSERT_TAG(jl_opaque_closure_typename);
-
-        // special exceptions
-        INSERT_TAG(jl_errorexception_type);
-        INSERT_TAG(jl_argumenterror_type);
-        INSERT_TAG(jl_typeerror_type);
-        INSERT_TAG(jl_methoderror_type);
-        INSERT_TAG(jl_loaderror_type);
-        INSERT_TAG(jl_initerror_type);
-        INSERT_TAG(jl_undefvarerror_type);
-        INSERT_TAG(jl_stackovf_exception);
-        INSERT_TAG(jl_diverror_exception);
-        INSERT_TAG(jl_interrupt_exception);
-        INSERT_TAG(jl_boundserror_type);
-        INSERT_TAG(jl_memory_exception);
-        INSERT_TAG(jl_undefref_exception);
-        INSERT_TAG(jl_readonlymemory_exception);
-        INSERT_TAG(jl_atomicerror_type);
-
-        // other special values
-        INSERT_TAG(jl_emptysvec);
-        INSERT_TAG(jl_emptytuple);
-        INSERT_TAG(jl_false);
-        INSERT_TAG(jl_true);
-        INSERT_TAG(jl_an_empty_string);
-        INSERT_TAG(jl_an_empty_vec_any);
-        INSERT_TAG(jl_module_init_order);
-        INSERT_TAG(jl_core_module);
-        INSERT_TAG(jl_base_module);
-        INSERT_TAG(jl_main_module);
-        INSERT_TAG(jl_top_module);
-        INSERT_TAG(jl_typeinf_func);
-        INSERT_TAG(jl_type_type_mt);
-        INSERT_TAG(jl_nonfunction_mt);
-        INSERT_TAG(jl_kwcall_mt);
-        INSERT_TAG(jl_kwcall_func);
-        INSERT_TAG(jl_opaque_closure_method);
-
-        // some Core.Builtin Functions that we want to be able to reference:
-        INSERT_TAG(jl_builtin_throw);
-        INSERT_TAG(jl_builtin_is);
-        INSERT_TAG(jl_builtin_typeof);
-        INSERT_TAG(jl_builtin_sizeof);
-        INSERT_TAG(jl_builtin_issubtype);
-        INSERT_TAG(jl_builtin_isa);
-        INSERT_TAG(jl_builtin_typeassert);
-        INSERT_TAG(jl_builtin__apply_iterate);
-        INSERT_TAG(jl_builtin_isdefined);
-        INSERT_TAG(jl_builtin_nfields);
-        INSERT_TAG(jl_builtin_tuple);
-        INSERT_TAG(jl_builtin_svec);
-        INSERT_TAG(jl_builtin_getfield);
-        INSERT_TAG(jl_builtin_setfield);
-        INSERT_TAG(jl_builtin_swapfield);
-        INSERT_TAG(jl_builtin_modifyfield);
-        INSERT_TAG(jl_builtin_replacefield);
-        INSERT_TAG(jl_builtin_fieldtype);
-        INSERT_TAG(jl_builtin_arrayref);
-        INSERT_TAG(jl_builtin_const_arrayref);
-        INSERT_TAG(jl_builtin_arrayset);
-        INSERT_TAG(jl_builtin_arraysize);
-        INSERT_TAG(jl_builtin_apply_type);
-        INSERT_TAG(jl_builtin_applicable);
-        INSERT_TAG(jl_builtin_invoke);
-        INSERT_TAG(jl_builtin__expr);
-        INSERT_TAG(jl_builtin_ifelse);
-        INSERT_TAG(jl_builtin__typebody);
-        INSERT_TAG(jl_builtin_donotdelete);
-        INSERT_TAG(jl_builtin_compilerbarrier);
-        INSERT_TAG(jl_builtin_getglobal);
-        INSERT_TAG(jl_builtin_setglobal);
-        // n.b. must update NUM_TAGS when you add something here
+    unsigned int i = 0;
+#define INSERT_TAG(sym) tags[i++] = (jl_value_t**)&(sym)
+    INSERT_TAG(jl_method_table);
+    INSERT_TAG(jl_module_init_order);
+    INSERT_TAG(jl_typeinf_func);
+    INSERT_TAG(jl_compile_and_emit_func);
+    INSERT_TAG(jl_libdl_dlopen_func);
+    // n.b. must update NUM_TAGS when you add something here
 #undef INSERT_TAG
-        assert(i == NUM_TAGS - 1);
-    }
-    return (jl_value_t**const*const) _tags;
+    assert(i == NUM_TAGS - 1);
+    tags[i] = NULL;
 }
 
 // hash of definitions for predefined tagged object
@@ -290,11 +146,8 @@ static uintptr_t nsym_tag;
 // array of definitions for the predefined tagged object types
 // (reverse of symbol_table)
 static arraylist_t deser_sym;
-// Predefined tags that do not have special handling in `externally_linked`
-static htable_t external_objects;
 
 static htable_t serialization_order; // to break cycles, mark all objects that are serialized
-static htable_t unique_ready; // as we serialize types, we need to know if all reachable objects are also already serialized. This tracks whether `immediate` has been set for all of them.
 static htable_t nullptrs;
 // FIFO queue for objects to be serialized. Anything requiring fixup upon deserialization
 // must be "toplevel" in this queue. For types, parameters and field types must appear
@@ -308,6 +161,8 @@ static arraylist_t object_worklist;  // used to mimic recursion by jl_serialize_
 // jl_linkage_blobs.items[2i:2i+1] correspond to build_ids[i]   (0-offset indexing)
 arraylist_t jl_linkage_blobs;
 arraylist_t jl_image_relocs;
+// Keep track of which image corresponds to which top module.
+arraylist_t jl_top_mods;
 
 // Eytzinger tree of images. Used for very fast jl_object_in_image queries
 // See https://algorithmica.org/en/eytzinger
@@ -316,7 +171,22 @@ arraylist_t eytzinger_idxs;
 static uintptr_t img_min;
 static uintptr_t img_max;
 
-static int ptr_cmp(const void *l, const void *r)
+// HT_NOTFOUND is a valid integer ID, so we store the integer ids mangled.
+// This pair of functions mangles/demanges
+static size_t from_seroder_entry(void *entry) JL_NOTSAFEPOINT
+{
+    return (size_t)((char*)entry - (char*)HT_NOTFOUND - 1);
+}
+
+static void *to_seroder_entry(size_t idx) JL_NOTSAFEPOINT
+{
+    return (void*)((char*)HT_NOTFOUND + 1 + idx);
+}
+
+static htable_t new_methtables;
+//static size_t precompilation_world;
+
+static int ptr_cmp(const void *l, const void *r) JL_NOTSAFEPOINT
 {
     uintptr_t left = *(const uintptr_t*)l;
     uintptr_t right = *(const uintptr_t*)r;
@@ -324,7 +194,7 @@ static int ptr_cmp(const void *l, const void *r)
 }
 
 // Build an eytzinger tree from a sorted array
-static int eytzinger(uintptr_t *src, uintptr_t *dest, size_t i, size_t k, size_t n)
+static int eytzinger(uintptr_t *src, uintptr_t *dest, size_t i, size_t k, size_t n) JL_NOTSAFEPOINT
 {
     if (k <= n) {
         i = eytzinger(src, dest, i, 2 * k, n);
@@ -364,7 +234,7 @@ static size_t eyt_obj_idx(jl_value_t *obj) JL_NOTSAFEPOINT
 }
 
 //used in staticdata.c after we add an image
-void rebuild_image_blob_tree(void)
+void rebuild_image_blob_tree(void) JL_NOTSAFEPOINT
 {
     size_t inc = 1 + jl_linkage_blobs.len - eytzinger_image_tree.len;
     assert(eytzinger_idxs.len == eytzinger_image_tree.len);
@@ -422,35 +292,33 @@ size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
     return idx;
 }
 
-uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
+JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
 {
     return eyt_obj_in_img(obj);
 }
 
+// Map an object to it's "owning" top module
+JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT
+{
+    size_t idx = external_blob_index(v);
+    size_t lbids = n_linkage_blobs();
+    if (idx < lbids) {
+        return (jl_value_t*)jl_top_mods.items[idx];
+    }
+    // The object is runtime allocated
+    return (jl_value_t*)jl_nothing;
+}
+
 // hash of definitions for predefined function pointers
+// (reverse is jl_builtin_f_addrs)
 static htable_t fptr_to_id;
+
 void *native_functions;   // opaque jl_native_code_desc_t blob used for fetching data from LLVM
 
 // table of struct field addresses to rewrite during saving
 static htable_t field_replace;
+static htable_t bits_replace;
 
-// array of definitions for the predefined function pointers
-// (reverse of fptr_to_id)
-// This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C.
-static const jl_fptr_args_t id_to_fptrs[] = {
-    &jl_f_throw, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
-    &jl_f_typeassert, &jl_f__apply_iterate, &jl_f__apply_pure,
-    &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined,
-    &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call,
-    &jl_f_getfield, &jl_f_setfield, &jl_f_swapfield, &jl_f_modifyfield,
-    &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields,
-    &jl_f_arrayref, &jl_f_const_arrayref, &jl_f_arrayset, &jl_f_arraysize, &jl_f_apply_type,
-    &jl_f_applicable, &jl_f_invoke, &jl_f_sizeof, &jl_f__expr, &jl_f__typevar,
-    &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype,
-    &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type,
-    &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete, &jl_f_compilerbarrier,
-    &jl_f_getglobal, &jl_f_setglobal, &jl_f_finalizer, &jl_f__compute_sparams, &jl_f__svec_ref,
-    NULL };
 
 typedef struct {
     ios_t *s;                   // the main stream
@@ -459,13 +327,15 @@ typedef struct {
     ios_t *relocs;              // for (de)serializing relocs_list and gctags_list
     ios_t *gvar_record;         // serialized array mapping gvid => spos
     ios_t *fptr_record;         // serialized array mapping fptrid => spos
+    arraylist_t memowner_list;  // a list of memory locations that have shared owners
+    arraylist_t memref_list;    // a list of memoryref locations
     arraylist_t relocs_list;    // a list of (location, target) pairs, see description at top
     arraylist_t gctags_list;    //      "
     arraylist_t uniquing_types; // a list of locations that reference types that must be de-duplicated
+    arraylist_t uniquing_super; // a list of datatypes, used in super fields, that need to be marked in uniquing_types once they are reached, for handling unique-ing of them on deserialization
     arraylist_t uniquing_objs;  // a list of locations that reference non-types that must be de-duplicated
     arraylist_t fixup_types;    // a list of locations of types requiring (re)caching
     arraylist_t fixup_objs;     // a list of locations of objects requiring (re)caching
-    arraylist_t ccallable_list; // @ccallable entry points to install
     // mapping from a buildid_idx to a depmods_idx
     jl_array_t *buildid_depmods_idxs;
     // record of build_ids for all external linkages, in order of serialization for the current sysimg/pkgimg
@@ -477,17 +347,18 @@ typedef struct {
     jl_array_t *link_ids_gctags;
     jl_array_t *link_ids_gvars;
     jl_array_t *link_ids_external_fnvars;
+    jl_array_t *method_roots_list;
+    htable_t method_roots_index;
+    uint64_t worklist_key;
+    jl_query_cache *query_cache;
     jl_ptls_t ptls;
-    htable_t callers_with_edges;
     jl_image_t *image;
     int8_t incremental;
 } jl_serializer_state;
 
-static jl_value_t *jl_idtable_type = NULL;
-static jl_typename_t *jl_idtable_typename = NULL;
 static jl_value_t *jl_bigint_type = NULL;
+static jl_debuginfo_t *jl_nulldebuginfo;
 static int gmp_limb_size = 0;
-static jl_sym_t *jl_docmeta_sym = NULL;
 
 #ifdef _P64
 #define RELOC_TAG_OFFSET 61
@@ -511,6 +382,8 @@ enum RefTags {
     ExternalLinkage     // reference to some other pkgimage
 };
 
+#define SYS_EXTERNAL_LINK_UNIT sizeof(void*)
+
 // calling conventions for internal entry points.
 // this is used to set the method-instance->invoke field
 typedef enum {
@@ -518,6 +391,7 @@ typedef enum {
     JL_API_BOXED,
     JL_API_CONST,
     JL_API_WITH_PARAMETERS,
+    JL_API_OC_CALL,
     JL_API_INTERPRETED,
     JL_API_BUILTIN,
     JL_API_MAX
@@ -555,12 +429,11 @@ typedef struct {
 } pkgcachesizes;
 
 // --- Static Compile ---
-static void *jl_sysimg_handle = NULL;
-static jl_image_t sysimage;
+static jl_image_buf_t jl_sysimage_buf = { JL_IMAGE_KIND_NONE };
 
-static inline uintptr_t *sysimg_gvars(uintptr_t *base, const int32_t *offsets, size_t idx)
+static inline uintptr_t *sysimg_gvars(const char *base, const int32_t *offsets, size_t idx)
 {
-    return base + offsets[idx] / sizeof(base[0]);
+    return (uintptr_t*)(base + offsets[idx]);
 }
 
 JL_DLLEXPORT int jl_running_on_valgrind(void)
@@ -568,24 +441,6 @@ JL_DLLEXPORT int jl_running_on_valgrind(void)
     return RUNNING_ON_VALGRIND;
 }
 
-static void jl_load_sysimg_so(void)
-{
-    int imaging_mode = jl_generating_output() && !jl_options.incremental;
-    // in --build mode only use sysimg data, not precompiled native code
-    if (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES) {
-        assert(sysimage.fptrs.base);
-    }
-    else {
-        memset(&sysimage.fptrs, 0, sizeof(sysimage.fptrs));
-    }
-    const char *sysimg_data;
-    jl_dlsym(jl_sysimg_handle, "jl_system_image_data", (void **)&sysimg_data, 1);
-    size_t *plen;
-    jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1);
-    jl_restore_system_image_data(sysimg_data, *plen);
-}
-
-
 // --- serializer ---
 
 #define NBOX_C 1024
@@ -612,50 +467,58 @@ static int jl_needs_serialization(jl_serializer_state *s, jl_value_t *v) JL_NOTS
     else if (jl_typetagis(v, jl_uint8_tag << 4)) {
         return 0;
     }
-    else if (jl_typetagis(v, jl_task_tag << 4)) {
+    else if (v == (jl_value_t*)s->ptls->root_task) {
         return 0;
     }
 
     return 1;
 }
 
-
-static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT
+static int caching_tag(jl_value_t *v, jl_query_cache *query_cache) JL_NOTSAFEPOINT
 {
     if (jl_is_method_instance(v)) {
         jl_method_instance_t *mi = (jl_method_instance_t*)v;
         jl_value_t *m = mi->def.value;
         if (jl_is_method(m) && jl_object_in_image(m))
-            return 1 + type_in_worklist(mi->specTypes);
+            return 1 + type_in_worklist(mi->specTypes, query_cache);
+    }
+    if (jl_is_binding(v)) {
+        jl_globalref_t *gr = ((jl_binding_t*)v)->globalref;
+        if (!gr)
+            return 0;
+        if (!jl_object_in_image((jl_value_t*)gr->mod))
+            return 0;
+        return 1;
     }
     if (jl_is_datatype(v)) {
         jl_datatype_t *dt = (jl_datatype_t*)v;
         if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars)
             return 0; // aka !is_cacheable from jltypes.c
         if (jl_object_in_image((jl_value_t*)dt->name))
-            return 1 + type_in_worklist(v);
+            return 1 + type_in_worklist(v, query_cache);
     }
     jl_value_t *dtv = jl_typeof(v);
     if (jl_is_datatype_singleton((jl_datatype_t*)dtv)) {
-        return 1 - type_in_worklist(dtv); // these are already recached in the datatype in the image
+        return 1 - type_in_worklist(dtv, query_cache); // these are already recached in the datatype in the image
     }
     return 0;
 }
 
-static int needs_recaching(jl_value_t *v) JL_NOTSAFEPOINT
+static int needs_recaching(jl_value_t *v, jl_query_cache *query_cache) JL_NOTSAFEPOINT
 {
-    return caching_tag(v) == 2;
+    return caching_tag(v, query_cache) == 2;
 }
 
-static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT
+static int needs_uniquing(jl_value_t *v, jl_query_cache *query_cache) JL_NOTSAFEPOINT
 {
     assert(!jl_object_in_image(v));
-    return caching_tag(v) == 1;
+    return caching_tag(v, query_cache) == 1;
 }
 
 static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT
 {
-    ptrhash_put(&field_replace, (void*)addr, newval);
+    if (*addr != newval)
+        ptrhash_put(&field_replace, (void*)addr, newval);
 }
 
 static jl_value_t *get_replaceable_field(jl_value_t **addr, int mutabl) JL_GC_DISABLED
@@ -685,34 +548,76 @@ static uintptr_t jl_fptr_id(void *fptr)
         return *(uintptr_t*)pbp;
 }
 
+static int effects_foldable(uint32_t effects)
+{
+    // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true)
+    return ((effects & 0x7) == 0) && // is_consistent(effects)
+           (((effects >> 10) & 0x03) == 0) && // is_noub(effects)
+           (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects)
+           ((effects >> 6) & 0x01); // is_terminates(effects)
+}
+
+
 // `jl_queue_for_serialization` adds items to `serialization_order`
 #define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0)
 static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED;
 
-
 static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m) JL_GC_DISABLED
 {
     jl_queue_for_serialization(s, m->name);
     jl_queue_for_serialization(s, m->parent);
-    jl_queue_for_serialization(s, m->bindings);
-    jl_queue_for_serialization(s, m->bindingkeyset);
-    if (jl_options.strip_metadata) {
+    if (!jl_options.strip_metadata)
+        jl_queue_for_serialization(s, m->file);
+    jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindingkeyset));
+    if (jl_options.trim) {
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->bindings), 0, 1);
         jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
         for (size_t i = 0; i < jl_svec_len(table); i++) {
-            jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
+            jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
             if ((void*)b == jl_nothing)
                 break;
-            jl_sym_t *name = b->globalref->name;
-            if (name == jl_docmeta_sym && jl_atomic_load_relaxed(&b->value))
-                record_field_change((jl_value_t**)&b->value, jl_nothing);
+            jl_value_t *val = jl_get_binding_value_in_world(b, jl_atomic_load_relaxed(&jl_world_counter));
+            // keep binding objects that are defined in the latest world and ...
+            if (val &&
+                // ... point to modules ...
+                (jl_is_module(val) ||
+                 // ... or point to __init__ methods ...
+                 !strcmp(jl_symbol_name(b->globalref->name), "__init__") ||
+                 // ... or point to Base functions accessed by the runtime
+                 (m == jl_base_module && (!strcmp(jl_symbol_name(b->globalref->name), "wait") ||
+                                          !strcmp(jl_symbol_name(b->globalref->name), "task_done_hook") ||
+                                          !strcmp(jl_symbol_name(b->globalref->name), "_uv_hook_close"))))) {
+                jl_queue_for_serialization(s, b);
+            }
         }
     }
+    else {
+        jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindings));
+    }
 
-    for (size_t i = 0; i < m->usings.len; i++) {
-        jl_queue_for_serialization(s, (jl_value_t*)m->usings.items[i]);
+    for (size_t i = 0; i < module_usings_length(m); i++) {
+        jl_queue_for_serialization(s, module_usings_getmod(m, i));
+    }
+
+    if (jl_options.trim || jl_options.strip_ir) {
+        record_field_change((jl_value_t**)&m->usings_backedges, jl_nothing);
+        record_field_change((jl_value_t**)&m->scanned_methods, jl_nothing);
+    }
+    else {
+        jl_queue_for_serialization(s, m->usings_backedges);
+        jl_queue_for_serialization(s, m->scanned_methods);
     }
 }
 
+static int codeinst_may_be_runnable(jl_code_instance_t *ci, int incremental) {
+    size_t max_world = jl_atomic_load_relaxed(&ci->max_world);
+    if (max_world == ~(size_t)0)
+        return 1;
+    if (incremental)
+        return 0;
+    return jl_atomic_load_relaxed(&ci->min_world) <= jl_typeinf_world && jl_typeinf_world <= max_world;
+}
+
 // Anything that requires uniquing or fixing during deserialization needs to be "toplevel"
 // in serialization (i.e., have its own entry in `serialization_order`). Consequently,
 // objects that act as containers for other potentially-"problematic" objects must add such "children"
@@ -725,54 +630,90 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
 {
     jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
     jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate);
+    const jl_datatype_layout_t *layout = t->layout;
 
     if (!recursive)
         goto done_fields;
 
     if (s->incremental && jl_is_datatype(v) && immediate) {
         jl_datatype_t *dt = (jl_datatype_t*)v;
-        // ensure super is queued (though possibly not yet handled, since it may have cycles)
-        jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, 1);
         // ensure all type parameters are recached
         jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1);
-        jl_value_t *singleton = dt->instance;
-        if (singleton && needs_uniquing(singleton)) {
-            assert(jl_needs_serialization(s, singleton)); // should be true, since we visited dt
+        if (jl_is_datatype_singleton(dt) && needs_uniquing(dt->instance, s->query_cache)) {
+            assert(jl_needs_serialization(s, dt->instance)); // should be true, since we visited dt
             // do not visit dt->instance for our template object as it leads to unwanted cycles here
             // (it may get serialized from elsewhere though)
             record_field_change(&dt->instance, jl_nothing);
         }
-        immediate = 0; // do not handle remaining fields immediately (just field types remains)
+        goto done_fields; // for now
     }
-    if (s->incremental && jl_is_method_instance(v)) {
+    if (jl_is_method_instance(v)) {
         jl_method_instance_t *mi = (jl_method_instance_t*)v;
-        jl_value_t *def = mi->def.value;
-        if (needs_uniquing(v)) {
-            // we only need 3 specific fields of this (the rest are not used)
-            jl_queue_for_serialization(s, mi->def.value);
-            jl_queue_for_serialization(s, mi->specTypes);
-            jl_queue_for_serialization(s, (jl_value_t*)mi->sparam_vals);
-            recursive = 0;
+        if (s->incremental) {
+            jl_value_t *def = mi->def.value;
+            if (needs_uniquing(v, s->query_cache)) {
+                // we only need 3 specific fields of this (the rest are not used)
+                jl_queue_for_serialization(s, mi->def.value);
+                jl_queue_for_serialization(s, mi->specTypes);
+                jl_queue_for_serialization(s, (jl_value_t*)mi->sparam_vals);
+                goto done_fields;
+            }
+            else if (jl_is_method(def) && jl_object_in_image(def)) {
+                // we only need 3 specific fields of this (the rest are restored afterward, if valid)
+                // in particular, cache is repopulated by jl_mi_cache_insert for all foreign function,
+                // so must not be present here
+                record_field_change((jl_value_t**)&mi->cache, NULL);
+            }
+            else {
+                assert(!needs_recaching(v, s->query_cache));
+            }
+            // Any back-edges will be re-validated and added by staticdata.jl, so
+            // drop them from the image here
+            record_field_change((jl_value_t**)&mi->backedges, NULL);
+            // n.b. opaque closures cannot be inspected and relied upon like a
+            // normal method since they can get improperly introduced by generated
+            // functions, so if they appeared at all, we will probably serialize
+            // them wrong and segfault. The jl_code_for_staged function should
+            // prevent this from happening, so we do not need to detect that user
+            // error now.
+        }
+        // don't recurse into all backedges memory (yet)
+        jl_value_t *backedges = get_replaceable_field((jl_value_t**)&mi->backedges, 1);
+        if (backedges) {
+            assert(!jl_options.trim && !jl_options.strip_ir);
+            jl_queue_for_serialization_(s, (jl_value_t*)((jl_array_t*)backedges)->ref.mem, 0, 1);
+            size_t i = 0, n = jl_array_nrows(backedges);
+            while (i < n) {
+                jl_value_t *invokeTypes;
+                jl_code_instance_t *caller;
+                i = get_next_edge((jl_array_t*)backedges, i, &invokeTypes, &caller);
+                if (invokeTypes)
+                    jl_queue_for_serialization(s, invokeTypes);
+            }
+        }
+    }
+    if (jl_is_binding(v)) {
+        jl_binding_t *b = (jl_binding_t*)v;
+        if (s->incremental && needs_uniquing(v, s->query_cache)) {
+            jl_queue_for_serialization(s, b->globalref->mod);
+            jl_queue_for_serialization(s, b->globalref->name);
             goto done_fields;
         }
-        else if (jl_is_method(def) && jl_object_in_image(def)) {
-            // we only need 3 specific fields of this (the rest are restored afterward, if valid)
-            // in particular, cache is repopulated by jl_mi_cache_insert for all foreign function,
-            // so must not be present here
-            record_field_change((jl_value_t**)&mi->uninferred, NULL);
-            record_field_change((jl_value_t**)&mi->backedges, NULL);
-            record_field_change((jl_value_t**)&mi->callbacks, NULL);
-            record_field_change((jl_value_t**)&mi->cache, NULL);
+        if (jl_options.trim || jl_options.strip_ir) {
+            record_field_change((jl_value_t**)&b->backedges, NULL);
         }
         else {
-            assert(!needs_recaching(v));
+            // don't recurse into all backedges memory (yet)
+            jl_value_t *backedges = get_replaceable_field((jl_value_t**)&b->backedges, 1);
+            if (backedges) {
+                jl_queue_for_serialization_(s, (jl_value_t*)((jl_array_t*)backedges)->ref.mem, 0, 1);
+                for (size_t i = 0, n = jl_array_nrows(backedges); i < n; i++) {
+                    jl_value_t *b = jl_array_ptr_ref(backedges, i);
+                    if (!jl_is_code_instance(b) && !jl_is_method_instance(b) && !jl_is_method(b)) // otherwise usually a Binding?
+                        jl_queue_for_serialization(s, b);
+                }
+            }
         }
-        // n.b. opaque closures cannot be inspected and relied upon like a
-        // normal method since they can get improperly introduced by generated
-        // functions, so if they appeared at all, we will probably serialize
-        // them wrong and segfault. The jl_code_for_staged function should
-        // prevent this from happening, so we do not need to detect that user
-        // error now.
     }
     if (s->incremental && jl_is_globalref(v)) {
         jl_globalref_t *gr = (jl_globalref_t*)v;
@@ -783,27 +724,126 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     if (jl_is_typename(v)) {
         jl_typename_t *tn = (jl_typename_t*)v;
         // don't recurse into several fields (yet)
-        jl_queue_for_serialization_(s, (jl_value_t*)tn->cache, 0, 1);
-        jl_queue_for_serialization_(s, (jl_value_t*)tn->linearcache, 0, 1);
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&tn->cache), 0, 1);
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&tn->linearcache), 0, 1);
         if (s->incremental) {
             assert(!jl_object_in_image((jl_value_t*)tn->module));
             assert(!jl_object_in_image((jl_value_t*)tn->wrapper));
         }
     }
-    if (s->incremental && jl_is_code_instance(v)) {
+    if (jl_is_mtable(v)) {
+        jl_methtable_t *mt = (jl_methtable_t*)v;
+        // Any back-edges will be re-validated and added by staticdata.jl, so
+        // drop them from the image here
+        if (s->incremental || jl_options.trim || jl_options.strip_ir) {
+            record_field_change((jl_value_t**)&mt->backedges, jl_an_empty_memory_any);
+        }
+        else {
+            // don't recurse into all backedges memory (yet)
+            jl_value_t *allbackedges = get_replaceable_field((jl_value_t**)&mt->backedges, 1);
+            jl_queue_for_serialization_(s, allbackedges, 0, 1);
+            for (size_t i = 0, n = ((jl_genericmemory_t*)allbackedges)->length; i < n; i += 2) {
+                jl_value_t *tn = jl_genericmemory_ptr_ref(allbackedges, i);
+                jl_queue_for_serialization(s, tn);
+                jl_value_t *backedges = jl_genericmemory_ptr_ref(allbackedges, i + 1);
+                if (backedges && backedges != jl_nothing) {
+                    jl_queue_for_serialization_(s, (jl_value_t*)((jl_array_t*)backedges)->ref.mem, 0, 1);
+                    jl_queue_for_serialization(s, backedges);
+                    for (size_t i = 0, n = jl_array_nrows(backedges); i < n; i += 2) {
+                        jl_value_t *t = jl_array_ptr_ref(backedges, i);
+                        assert(!jl_is_code_instance(t));
+                        jl_queue_for_serialization(s, t);
+                    }
+                }
+            }
+        }
+    }
+    if (jl_is_code_instance(v)) {
         jl_code_instance_t *ci = (jl_code_instance_t*)v;
-        // make sure we don't serialize other reachable cache entries of foreign methods
-        if (jl_object_in_image((jl_value_t*)ci->def->def.value)) {
-            // TODO: if (ci in ci->defs->cache)
-            record_field_change((jl_value_t**)&ci->next, NULL);
+        jl_method_instance_t *mi = jl_get_ci_mi(ci);
+        if (s->incremental) {
+            // make sure we don't serialize other reachable cache entries of foreign methods
+            // Should this now be:
+            // if (ci !in ci->defs->cache)
+            //     record_field_change((jl_value_t**)&ci->next, NULL);
+            // Why are we checking that the method/module this originates from is in_image?
+            // and then disconnect this CI?
+            if (jl_object_in_image((jl_value_t*)mi->def.value)) {
+                // TODO: if (ci in ci->defs->cache)
+                record_field_change((jl_value_t**)&ci->next, NULL);
+            }
+        }
+        jl_value_t *inferred = jl_atomic_load_relaxed(&ci->inferred);
+        if (inferred && inferred != jl_nothing && !jl_is_uint8(inferred)) { // disregard if there is nothing here to delete (e.g. builtins, unspecialized)
+            jl_method_t *def = mi->def.method;
+            if (jl_is_method(def)) { // don't delete toplevel code
+                int is_relocatable = !s->incremental || jl_is_code_info(inferred) ||
+                    (jl_is_string(inferred) && jl_string_len(inferred) > 0 && jl_string_data(inferred)[jl_string_len(inferred) - 1]);
+                int discard = 0;
+                if (!is_relocatable) {
+                    discard = 1;
+                }
+                else if (def->source == NULL) {
+                    // don't delete code from optimized opaque closures that can't be reconstructed (and builtins)
+                }
+                else if (!codeinst_may_be_runnable(ci, s->incremental) || // delete all code that cannot run
+                         jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) { // delete all code that just returns a constant
+                    discard = 1;
+                }
+                else if (native_functions && // don't delete any code if making a ji file
+                         (ci->owner == jl_nothing) && // don't delete code for external interpreters
+                         !effects_foldable(jl_atomic_load_relaxed(&ci->ipo_purity_bits)) && // don't delete code we may want for irinterp
+                         jl_ir_inlining_cost(inferred) == UINT16_MAX) { // don't delete inlineable code
+                    // delete the code now: if we thought it was worth keeping, it would have been converted to object code
+                    discard = 1;
+                }
+                if (discard) {
+                    // keep only the inlining cost, so inference can later decide if it is worth getting the source back
+                    if (jl_is_string(inferred) || jl_is_code_info(inferred))
+                        inferred = jl_box_uint8(jl_encode_inlining_cost(jl_ir_inlining_cost(inferred)));
+                    else
+                        inferred = jl_nothing;
+                    record_field_change((jl_value_t**)&ci->inferred, inferred);
+                }
+                else if (s->incremental && jl_is_string(inferred)) {
+                    // New roots for external methods
+                    if (jl_object_in_image((jl_value_t*)def)) {
+                        void **pfound = ptrhash_bp(&s->method_roots_index, def);
+                        if (*pfound == HT_NOTFOUND) {
+                            *pfound = def;
+                            size_t nwithkey = nroots_with_key(def, s->worklist_key);
+                            if (nwithkey) {
+                                jl_array_ptr_1d_push(s->method_roots_list, (jl_value_t*)def);
+                                jl_array_t *newroots = jl_alloc_vec_any(nwithkey);
+                                jl_array_ptr_1d_push(s->method_roots_list, (jl_value_t*)newroots);
+                                rle_iter_state rootiter = rle_iter_init(0);
+                                uint64_t *rletable = NULL;
+                                size_t nblocks2 = 0;
+                                size_t nroots = jl_array_nrows(def->roots);
+                                size_t k = 0;
+                                if (def->root_blocks) {
+                                    rletable = jl_array_data(def->root_blocks, uint64_t);
+                                    nblocks2 = jl_array_nrows(def->root_blocks);
+                                }
+                                while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2)) {
+                                    if (rootiter.key == s->worklist_key) {
+                                        jl_value_t *newroot = jl_array_ptr_ref(def->roots, rootiter.i);
+                                        jl_queue_for_serialization(s, newroot);
+                                        jl_array_ptr_set(newroots, k++, newroot);
+                                    }
+                                }
+                                assert(k == nwithkey);
+                            }
+                        }
+                    }
+                }
+            }
         }
     }
 
-
     if (immediate) // must be things that can be recursively handled, and valid as type parameters
         assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v));
 
-    const jl_datatype_layout_t *layout = t->layout;
     if (layout->npointers == 0) {
         // bitstypes do not require recursion
     }
@@ -816,22 +856,29 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     }
     else if (jl_is_array(v)) {
         jl_array_t *ar = (jl_array_t*)v;
-        const char *data = (const char*)jl_array_data(ar);
-        if (ar->flags.ptrarray) {
-            size_t i, l = jl_array_len(ar);
+        jl_value_t *mem = get_replaceable_field((jl_value_t**)&ar->ref.mem, 1);
+        jl_queue_for_serialization_(s, mem, 1, immediate);
+    }
+    else if (jl_is_genericmemory(v)) {
+        jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+        const char *data = (const char*)m->ptr;
+        if (jl_genericmemory_how(m) == 3) {
+            assert(jl_is_string(jl_genericmemory_data_owner_field(m)));
+        }
+        else if (layout->flags.arrayelem_isboxed) {
+            size_t i, l = m->length;
             for (i = 0; i < l; i++) {
                 jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[i], 1);
                 jl_queue_for_serialization_(s, fld, 1, immediate);
             }
         }
-        else if (ar->flags.hasptr) {
-            uint16_t elsz = ar->elsize;
-            size_t i, l = jl_array_len(ar);
-            jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(ar));
-            size_t j, np = et->layout->npointers;
+        else if (layout->first_ptr >= 0) {
+            uint16_t elsz = layout->size;
+            size_t i, l = m->length;
+            size_t j, np = layout->npointers;
             for (i = 0; i < l; i++) {
                 for (j = 0; j < np; j++) {
-                    uint32_t ptr = jl_ptr_offset(et, j);
+                    uint32_t ptr = jl_ptr_offset(t, j);
                     jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], 1);
                     jl_queue_for_serialization_(s, fld, 1, immediate);
                 }
@@ -839,17 +886,49 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
             }
         }
     }
-    else if (jl_typetagis(v, jl_module_tag << 4)) {
+    else if (jl_is_module(v)) {
         jl_queue_module_for_serialization(s, (jl_module_t*)v);
     }
     else if (layout->nfields > 0) {
+        if (jl_options.trim) {
+            if (jl_is_method(v)) {
+                jl_method_t *m = (jl_method_t *)v;
+                if (jl_is_svec(jl_atomic_load_relaxed(&m->specializations)))
+                    jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->specializations), 0, 1);
+            }
+            else if (jl_is_mtable(v)) {
+                jl_methtable_t *mt = (jl_methtable_t*)v;
+                jl_methtable_t *newmt = (jl_methtable_t*)ptrhash_get(&new_methtables, mt);
+                if (newmt != HT_NOTFOUND)
+                    record_field_change((jl_value_t **)&mt->defs, (jl_value_t*)jl_atomic_load_relaxed(&newmt->defs));
+                else
+                    record_field_change((jl_value_t **)&mt->defs, jl_nothing);
+            }
+            else if (jl_is_mcache(v)) {
+                jl_methcache_t *mc = (jl_methcache_t*)v;
+                jl_value_t *cache = jl_atomic_load_relaxed(&mc->cache);
+                if (!jl_typetagis(cache, jl_typemap_entry_type) || ((jl_typemap_entry_t*)cache)->sig != jl_tuple_type) { // aka Builtins (maybe sometimes OpaqueClosure too)
+                    record_field_change((jl_value_t **)&mc->cache, jl_nothing);
+                }
+                record_field_change((jl_value_t **)&mc->leafcache, jl_an_empty_memory_any);
+            }
+            // TODO: prune any partitions and partition data that has been deleted in the current world
+            //else if (jl_is_binding(v)) {
+            //    jl_binding_t *b = (jl_binding_t*)v;
+            //}
+            //else if (jl_is_binding_partition(v)) {
+            //    jl_binding_partition_t *bpart = (jl_binding_partition_t*)v;
+            //}
+        }
         char *data = (char*)jl_data_ptr(v);
         size_t i, np = layout->npointers;
+        size_t fldidx = 1;
         for (i = 0; i < np; i++) {
             uint32_t ptr = jl_ptr_offset(t, i);
-            int mutabl = t->name->mutabl;
-            if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
-                mutabl = 0;
+            size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*);
+            while (offset >= (fldidx == layout->nfields ? jl_datatype_size(t) : jl_field_offset(t, fldidx)))
+                fldidx++;
+            int mutabl = !jl_field_isconst(t, fldidx - 1);
             jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
             jl_queue_for_serialization_(s, fld, 1, immediate);
         }
@@ -859,62 +938,80 @@ done_fields: ;
 
     // We've encountered an item we need to cache
     void **bp = ptrhash_bp(&serialization_order, v);
-    assert(*bp != (void*)(uintptr_t)-1);
-    if (s->incremental) {
-        void **bp2 = ptrhash_bp(&unique_ready, v);
-        if (*bp2 == HT_NOTFOUND)
-            assert(*bp == (void*)(uintptr_t)-2);
-        else if (*bp != (void*)(uintptr_t)-2)
-            return;
-    }
-    else {
-        assert(*bp == (void*)(uintptr_t)-2);
-    }
+    assert(*bp == (void*)(uintptr_t)-2);
     arraylist_push(&serialization_queue, (void*) v);
     size_t idx = serialization_queue.len - 1;
     assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");
+    *bp = to_seroder_entry(idx);
 
-    *bp = (void*)((char*)HT_NOTFOUND + 1 + idx);
+    // DataType is very unusual, in that some of the fields need to be pre-order, and some
+    // (notably super) must not be (even if `jl_queue_for_serialization_` would otherwise
+    // try to promote itself to be immediate)
+    if (s->incremental && jl_is_datatype(v) && immediate && recursive) {
+        jl_datatype_t *dt = (jl_datatype_t*)v;
+        void **bp = ptrhash_bp(&serialization_order, (void*)dt->super);
+        if (*bp != (void*)-2) {
+            // if super is already on the stack of things to handle when this returns, do
+            // not try to handle it now
+            jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, immediate);
+        }
+        immediate = 0;
+        char *data = (char*)jl_data_ptr(v);
+        size_t i, np = layout->npointers;
+        for (i = 0; i < np; i++) {
+            uint32_t ptr = jl_ptr_offset(t, i);
+            if (ptr * sizeof(jl_value_t*) == offsetof(jl_datatype_t, super))
+                continue; // skip the super field, since it might not be quite validly ordered
+            int mutabl = 1;
+            jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
+            jl_queue_for_serialization_(s, fld, 1, immediate);
+        }
+    }
 }
 
+
 static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
 {
     if (!jl_needs_serialization(s, v))
         return;
 
-    jl_value_t *t = jl_typeof(v);
+    jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
+    // check early from errors, so we have a little bit of contextual state for debugging them
+    if (t == jl_task_type) {
+        jl_error("Task cannot be serialized");
+    }
+    if (s->incremental && needs_uniquing(v, s->query_cache) && t == jl_binding_type) {
+        jl_binding_t *b = (jl_binding_t*)v;
+        if (b->globalref == NULL)
+            jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
+    }
+    if (jl_is_foreign_type(t) == 1) {
+        jl_error("Cannot serialize instances of foreign datatypes");
+    }
+
     // Items that require postorder traversal must visit their children prior to insertion into
     // the worklist/serialization_order (and also before their first use)
     if (s->incremental && !immediate) {
-        if (jl_is_datatype(t) && needs_uniquing(v))
+        if (jl_is_datatype(t) && needs_uniquing(v, s->query_cache))
             immediate = 1;
-        if (jl_is_datatype_singleton((jl_datatype_t*)t) && needs_uniquing(v))
+        if (jl_is_datatype_singleton((jl_datatype_t*)t) && needs_uniquing(v, s->query_cache))
             immediate = 1;
     }
 
     void **bp = ptrhash_bp(&serialization_order, v);
-    if (*bp == HT_NOTFOUND) {
-        *bp = (void*)(uintptr_t)(immediate ? -2 : -1);
-    }
-    else {
-        if (!s->incremental || !immediate || !recursive)
-            return;
-        void **bp2 = ptrhash_bp(&unique_ready, v);
-        if (*bp2 == HT_NOTFOUND)
-            *bp2 = v; // now is unique_ready
-        else {
-            assert(*bp != (void*)(uintptr_t)-1);
-            return; // already was unique_ready
-        }
-        assert(*bp != (void*)(uintptr_t)-2); // should be unique_ready then
-        if (*bp == (void*)(uintptr_t)-1)
-            *bp = (void*)(uintptr_t)-2; // now immediate
-    }
+    assert(!immediate || *bp != (void*)(uintptr_t)-2);
+    if (*bp == HT_NOTFOUND)
+        *bp = (void*)(uintptr_t)-1; // now enqueued
+    else if (!s->incremental || !immediate || !recursive || *bp != (void*)(uintptr_t)-1)
+        return;
 
-    if (immediate)
+    if (immediate) {
+        *bp = (void*)(uintptr_t)-2; // now immediate
         jl_insert_into_serialization_queue(s, v, recursive, immediate);
-    else
+    }
+    else {
         arraylist_push(&object_worklist, (void*)v);
+    }
 }
 
 // Do a pre-order traversal of the to-serialize worklist, in the identical order
@@ -974,16 +1071,17 @@ static void write_pointer(ios_t *s) JL_NOTSAFEPOINT
 }
 
 // Records the buildid holding `v` and returns the tagged offset within the corresponding image
-static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) {
+static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_GC_DISABLED
+{
     size_t i = external_blob_index(v);
     if (i < n_linkage_blobs()) {
         // We found the sysimg/pkg that this item links against
         // Compute the relocation code
         size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
-        offset /= sizeof(void*);
-        assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to external image too large");
-        assert(n_linkage_blobs() == jl_array_len(s->buildid_depmods_idxs));
-        size_t depsidx = ((uint32_t*)jl_array_data(s->buildid_depmods_idxs))[i]; // map from build_id_idx -> deps_idx
+        assert((offset % SYS_EXTERNAL_LINK_UNIT) == 0);
+        offset /= SYS_EXTERNAL_LINK_UNIT;
+        assert(n_linkage_blobs() == jl_array_nrows(s->buildid_depmods_idxs));
+        size_t depsidx = jl_array_data(s->buildid_depmods_idxs, uint32_t)[i]; // map from build_id_idx -> deps_idx
         assert(depsidx < INT32_MAX);
         if (depsidx < ((uintptr_t)1 << (RELOC_TAG_OFFSET - DEPS_IDX_OFFSET)) && offset < ((uintptr_t)1 << DEPS_IDX_OFFSET))
             // if it fits in a SysimageLinkage type, use that representation
@@ -991,8 +1089,9 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
         // otherwise, we store the image key in `link_ids`
         assert(link_ids && jl_is_array(link_ids));
         jl_array_grow_end(link_ids, 1);
-        uint32_t *link_id_data  = (uint32_t*)jl_array_data(link_ids);  // wait until after the `grow`
-        link_id_data[jl_array_len(link_ids) - 1] = depsidx;
+        uint32_t *link_id_data  = jl_array_data(link_ids, uint32_t);  // wait until after the `grow`
+        link_id_data[jl_array_nrows(link_ids) - 1] = depsidx;
+        assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to external image too large");
         return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset;
     }
     return 0;
@@ -1002,22 +1101,22 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
 // but symbols, small integers, and a couple of special items (`nothing` and the root Task)
 // have special handling.
 #define backref_id(s, v, link_ids) _backref_id(s, (jl_value_t*)(v), link_ids)
-static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_NOTSAFEPOINT
+static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_GC_DISABLED
 {
     assert(v != NULL && "cannot get backref to NULL object");
-    void *idx = HT_NOTFOUND;
     if (jl_is_symbol(v)) {
         void **pidx = ptrhash_bp(&symbol_table, v);
-        idx = *pidx;
+        void *idx = *pidx;
         if (idx == HT_NOTFOUND) {
             size_t l = strlen(jl_symbol_name((jl_sym_t*)v));
             write_uint32(s->symbols, l);
             ios_write(s->symbols, jl_symbol_name((jl_sym_t*)v), l + 1);
             size_t offset = ++nsym_tag;
             assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many symbols");
-            idx = (void*)((char*)HT_NOTFOUND + ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + offset);
+            idx = to_seroder_entry(offset - 1);
             *pidx = idx;
         }
+        return ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + from_seroder_entry(idx);
     }
     else if (v == (jl_value_t*)s->ptls->root_task) {
         return (uintptr_t)TagRef << RELOC_TAG_OFFSET;
@@ -1045,27 +1144,27 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *
         assert(item && "no external linkage identified");
         return item;
     }
+    void *idx = ptrhash_get(&serialization_order, v);
     if (idx == HT_NOTFOUND) {
-        idx = ptrhash_get(&serialization_order, v);
-        if (idx == HT_NOTFOUND) {
-            jl_(jl_typeof(v));
-            jl_(v);
-        }
-        assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass");
-        assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass");
-        assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass");
+        jl_(jl_typeof(v));
+        jl_(v);
     }
-    return (char*)idx - 1 - (char*)HT_NOTFOUND;
+    assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass");
+    assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass");
+    assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass");
+    return ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + from_seroder_entry(idx);
 }
 
 
 static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t offset) JL_NOTSAFEPOINT
 {
-    if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) {
+    if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld, s->query_cache)) {
         if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld)))
             arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset);
-        else
+        else if (jl_is_method_instance(fld) || jl_is_binding(fld))
             arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset);
+        else
+            assert(0 && "unknown object type with needs_uniquing set");
     }
 }
 
@@ -1109,44 +1208,116 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
     newm->parent = NULL;
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, parent)));
     arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent, s->link_ids_relocs));
-    newm->bindings = NULL;
+    jl_atomic_store_relaxed(&newm->bindings, NULL);
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, bindings)));
-    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->bindings, s->link_ids_relocs));
-    newm->bindingkeyset = NULL;
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_atomic_load_relaxed(&m->bindings), s->link_ids_relocs));
+    jl_atomic_store_relaxed(&newm->bindingkeyset, NULL);
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, bindingkeyset)));
-    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->bindingkeyset, s->link_ids_relocs));
-    newm->primary_world = ~(size_t)0;
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_atomic_load_relaxed(&m->bindingkeyset), s->link_ids_relocs));
+    newm->file = NULL;
+    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, file)));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_options.strip_metadata ? jl_empty_sym : m->file , s->link_ids_relocs));
+    if (jl_options.strip_metadata)
+        newm->line = 0;
+    newm->usings_backedges = NULL;
+    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings_backedges)));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, get_replaceable_field(&m->usings_backedges, 1), s->link_ids_relocs));
+    newm->scanned_methods = NULL;
+    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, scanned_methods)));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, get_replaceable_field(&m->scanned_methods, 1), s->link_ids_relocs));
+
+    // After reload, everything that has happened in this process happened semantically at
+    // (for .incremental) or before jl_require_world, so reset this flag.
+    jl_atomic_store_relaxed(&newm->export_set_changed_since_require_world, 0);
 
     // write out the usings list
     memset(&newm->usings._space, 0, sizeof(newm->usings._space));
     if (m->usings.items == &m->usings._space[0]) {
-        newm->usings.items = (void**)offsetof(jl_module_t, usings._space);
+        newm->usings.items = &newm->usings._space[0];
+        // Push these relocations here, to keep them in order. This pairs with the `newm->usings.items = ` below.
         arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings.items)));
         arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item));
         size_t i;
-        for (i = 0; i < m->usings.len; i++) {
-            arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings._space[i])));
-            arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings._space[i], s->link_ids_relocs));
+        for (i = 0; i < module_usings_length(m); i++) {
+            struct _jl_module_using *newm_data = module_usings_getidx(newm, i);
+            struct _jl_module_using *data = module_usings_getidx(m, i);
+            // TODO: Remove dead entries
+            newm_data->min_world = data->min_world;
+            newm_data->max_world = data->max_world;
+            newm_data->flags = data->flags;
+            if (s->incremental) {
+                if (data->max_world != ~(size_t)0)
+                    newm_data->max_world = 0;
+                newm_data->min_world = jl_require_world;
+            }
+            arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings._space[4*i])));
+            arraylist_push(&s->relocs_list, (void*)backref_id(s, data->mod, s->link_ids_relocs));
         }
+        newm->usings.items = (void**)offsetof(jl_module_t, usings._space);
     }
     else {
         newm->usings.items = (void**)tot;
         arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings.items)));
         arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item));
+        newm = NULL; // `write_*(s->s)` below may invalidate `newm`, so defensively set it to NULL
         size_t i;
-        for (i = 0; i < m->usings.len; i++) {
-            write_pointerfield(s, (jl_value_t*)m->usings.items[i]);
-            tot += sizeof(void*);
+        for (i = 0; i < module_usings_length(m); i++) {
+            struct _jl_module_using *data = module_usings_getidx(m, i);
+            write_pointerfield(s, (jl_value_t*)data->mod);
+            if (s->incremental) {
+                // TODO: Drop dead ones entirely?
+                write_uint(s->s, jl_require_world);
+                write_uint(s->s, data->max_world == ~(size_t)0 ? ~(size_t)0 : 1);
+            } else {
+                write_uint(s->s, data->min_world);
+                write_uint(s->s, data->max_world);
+            }
+            write_uint(s->s, data->flags);
+            static_assert(sizeof(struct _jl_module_using) == 4*sizeof(void*), "_jl_module_using mismatch");
+            tot += sizeof(struct _jl_module_using);
         }
-        for (; i < m->usings.max; i++) {
+        for (; i < module_usings_max(m); i++) {
             write_pointer(s->s);
-            tot += sizeof(void*);
+            write_uint(s->s, 0);
+            write_uint(s->s, 0);
+            write_uint(s->s, 0);
+            tot += sizeof(struct _jl_module_using);
         }
     }
     assert(ios_pos(s->s) - reloc_offset == tot);
 }
 
-static void record_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAFEPOINT
+static void record_memoryref(jl_serializer_state *s, size_t reloc_offset, jl_genericmemoryref_t ref) {
+    ios_t *f = s->s;
+    // make some header modifications in-place
+    jl_genericmemoryref_t *newref = (jl_genericmemoryref_t*)&f->buf[reloc_offset];
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(ref.mem))->layout;
+    if (!layout->flags.arrayelem_isunion && layout->size != 0) {
+        newref->ptr_or_offset = (void*)((char*)ref.ptr_or_offset - (char*)ref.mem->ptr); // relocation offset (bytes)
+        arraylist_push(&s->memref_list, (void*)reloc_offset); // relocation location
+        arraylist_push(&s->memref_list, NULL); // relocation target (ignored)
+    }
+}
+
+static void record_memoryrefs_inside(jl_serializer_state *s, jl_datatype_t *t, size_t reloc_offset, const char *data)
+{
+    assert(jl_is_datatype(t));
+    size_t i, nf = jl_datatype_nfields(t);
+    for (i = 0; i < nf; i++) {
+        size_t offset = jl_field_offset(t, i);
+        if (jl_field_isptr(t, i))
+            continue;
+        jl_value_t *ft = jl_field_type_concrete(t, i);
+        if (jl_is_uniontype(ft))
+            continue;
+        if (jl_is_genericmemoryref_type(ft))
+            record_memoryref(s, reloc_offset + offset, *(jl_genericmemoryref_t*)(data + offset));
+        else
+            record_memoryrefs_inside(s, (jl_datatype_t*)ft, reloc_offset + offset, data + offset);
+    }
+}
+
+static void record_gvars(jl_serializer_state *s, arraylist_t *globals) JL_GC_DISABLED
 {
     for (size_t i = 0; i < globals->len; i++)
         jl_queue_for_serialization(s, globals->items[i]);
@@ -1165,7 +1336,7 @@ static void record_external_fns(jl_serializer_state *s, arraylist_t *external_fn
 #ifndef JL_NDEBUG
     for (size_t i = 0; i < external_fns->len; i++) {
         jl_code_instance_t *ci = (jl_code_instance_t*)external_fns->items[i];
-        assert(jl_atomic_load_relaxed(&ci->specsigflags) & 0b100);
+        assert(jl_atomic_load_relaxed(&ci->flags) & JL_CI_FLAGS_FROM_IMAGE);
     }
 #endif
 }
@@ -1188,25 +1359,36 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
         JL_GC_PROMISE_ROOTED(v);
         assert(!(s->incremental && jl_object_in_image(v)));
         jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
-        assert((t->instance == NULL || t->instance == v) && "detected singleton construction corruption");
+        assert((!jl_is_datatype_singleton(t) || t->instance == v) && "detected singleton construction corruption");
+        int mutabl = t->name->mutabl;
         ios_t *f = s->s;
         if (t->smalltag) {
             if (t->layout->npointers == 0 || t == jl_string_type) {
-                if (jl_datatype_nfields(t) == 0 || t->name->mutabl == 0 || t == jl_string_type) {
+                if (jl_datatype_nfields(t) == 0 || mutabl == 0 || t == jl_string_type) {
                     f = s->const_data;
                 }
             }
         }
 
-        // realign stream to expected gc alignment (16 bytes)
+        // realign stream to expected gc alignment (16 bytes) after tag
         uintptr_t skip_header_pos = ios_pos(f) + sizeof(jl_taggedvalue_t);
+        uintptr_t object_id_expected = mutabl &&
+                 t != jl_datatype_type &&
+                 t != jl_typename_type &&
+                 t != jl_string_type &&
+                 t != jl_simplevector_type &&
+                 t != jl_module_type;
+        if (object_id_expected)
+            skip_header_pos += sizeof(size_t);
         write_padding(f, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos);
 
         // write header
-        if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t))
+        if (object_id_expected)
+            write_uint(f, jl_object_id(v));
+        if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t, s->query_cache))
             arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(f)|1));
         if (f == s->const_data)
-            write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED);
+            write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED | GC_IN_IMAGE);
         else
             write_gctaggedfield(s, t);
         size_t reloc_offset = ios_pos(f);
@@ -1214,8 +1396,14 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
         layout_table.items[item] = (void*)(reloc_offset | (f == s->const_data)); // store the inverse mapping of `serialization_order` (`id` => object-as-streampos)
 
         if (s->incremental) {
-            if (needs_uniquing(v)) {
-                if (jl_is_method_instance(v)) {
+            if (needs_uniquing(v, s->query_cache)) {
+                if (jl_is_binding(v)) {
+                    jl_binding_t *b = (jl_binding_t*)v;
+                    write_pointerfield(s, (jl_value_t*)b->globalref->mod);
+                    write_pointerfield(s, (jl_value_t*)b->globalref->name);
+                    continue;
+                }
+                else if (jl_is_method_instance(v)) {
                     assert(f == s->s);
                     jl_method_instance_t *mi = (jl_method_instance_t*)v;
                     write_pointerfield(s, mi->def.value);
@@ -1223,120 +1411,149 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     write_pointerfield(s, (jl_value_t*)mi->sparam_vals);
                     continue;
                 }
-                else if (!jl_is_datatype(v)) {
+                else if (jl_is_datatype(v)) {
+                    for (size_t i = 0; i < s->uniquing_super.len; i++) {
+                        if (s->uniquing_super.items[i] == (void*)v) {
+                            s->uniquing_super.items[i] = arraylist_pop(&s->uniquing_super);
+                            arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(reloc_offset|3));
+                        }
+                    }
+                }
+                else {
                     assert(jl_is_datatype_singleton(t) && "unreachable");
                 }
             }
-            else if (needs_recaching(v)) {
+            else if (needs_recaching(v, s->query_cache)) {
                 arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset);
             }
-            else if (jl_typetagis(v, jl_binding_type)) {
-                jl_binding_t *b = (jl_binding_t*)v;
-                if (b->globalref == NULL || jl_object_in_image((jl_value_t*)b->globalref->mod))
-                    jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
-            }
         }
 
         // write data
         if (jl_is_array(v)) {
             assert(f == s->s);
             // Internal data for types in julia.h with `jl_array_t` field(s)
-#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
             jl_array_t *ar = (jl_array_t*)v;
-            jl_value_t *et = jl_tparam0(jl_typeof(v));
-            size_t alen = jl_array_len(ar);
-            size_t datasize = alen * ar->elsize;
-            size_t tot = datasize;
-            int isbitsunion = jl_array_isbitsunion(ar);
-            if (isbitsunion)
-                tot += alen;
-            else if (ar->elsize == 1)
-                tot += 1;
-            int ndimwords = jl_array_ndimwords(ar->flags.ndims);
-            size_t headersize = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
             // copy header
+            size_t headersize = sizeof(jl_array_t) + jl_array_ndims(ar)*sizeof(size_t);
             ios_write(f, (char*)v, headersize);
-            size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
-            if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
-                alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
             // make some header modifications in-place
             jl_array_t *newa = (jl_array_t*)&f->buf[reloc_offset];
-            if (newa->flags.ndims == 1)
-                newa->maxsize = alen;
-            newa->offset = 0;
-            newa->flags.how = 0;
-            newa->flags.pooled = 0;
-            newa->flags.isshared = 0;
-
-            // write data
-            if (!ar->flags.ptrarray && !ar->flags.hasptr) {
-                // Non-pointer eltypes get encoded in the const_data section
-                uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
-                write_padding(s->const_data, data - ios_pos(s->const_data));
-                // write data and relocations
-                newa->data = NULL; // relocation offset
-                data /= sizeof(void*);
-                assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
-                arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
-                arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
-                if (jl_is_cpointer_type(et)) {
-                    // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
-                    const intptr_t *data = (const intptr_t*)jl_array_data(ar);
-                    size_t i;
-                    for (i = 0; i < alen; i++) {
-                        if (data[i] != -1)
-                            write_pointer(s->const_data);
-                        else
-                            ios_write(s->const_data, (char*)&data[i], sizeof(data[i]));
-                    }
-                }
-                else {
-                    if (isbitsunion) {
-                        ios_write(s->const_data, (char*)jl_array_data(ar), datasize);
-                        ios_write(s->const_data, jl_array_typetagdata(ar), alen);
+            newa->ref.mem = NULL; // relocation offset
+            arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, ref.mem))); // relocation location
+            jl_value_t *mem = get_replaceable_field((jl_value_t**)&ar->ref.mem, 1);
+            arraylist_push(&s->relocs_list, (void*)backref_id(s, mem, s->link_ids_relocs)); // relocation target
+            record_memoryref(s, reloc_offset + offsetof(jl_array_t, ref), ar->ref);
+        }
+        else if (jl_is_genericmemory(v)) {
+            assert(f == s->s);
+            // Internal data for types in julia.h with `jl_genericmemory_t` field(s)
+            jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+            const jl_datatype_layout_t *layout = t->layout;
+            size_t len = m->length;
+            // if (jl_genericmemory_how(m) == 3) {
+            //     jl_value_t *owner = jl_genericmemory_data_owner_field(m);
+            //     write_uint(f, len);
+            //     write_pointerfield(s, owner);
+            //     write_pointerfield(s, owner);
+            //     jl_genericmemory_t *new_mem = (jl_genericmemory_t*)&f->buf[reloc_offset];
+            //     assert(new_mem->ptr == NULL);
+            //     new_mem->ptr = (void*)((char*)m->ptr - (char*)owner); // relocation offset
+            // }
+            // else
+            {
+                size_t datasize = len * layout->size;
+                size_t tot = datasize;
+                int isbitsunion = layout->flags.arrayelem_isunion;
+                if (isbitsunion)
+                    tot += len;
+                size_t headersize = sizeof(jl_genericmemory_t);
+                // copy header
+                ios_write(f, (char*)v, headersize);
+                // write data
+                if (!layout->flags.arrayelem_isboxed && layout->first_ptr < 0) {
+                    // set owner to NULL
+                    write_pointer(f);
+                    // Non-pointer eltypes get encoded in the const_data section
+                    size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
+                    if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
+                        alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
+                    uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
+                    write_padding(s->const_data, data - ios_pos(s->const_data));
+                    // write data and relocations
+                    jl_genericmemory_t *new_mem = (jl_genericmemory_t*)&f->buf[reloc_offset];
+                    new_mem->ptr = NULL; // relocation offset
+                    data /= sizeof(void*);
+                    assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
+                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
+                    jl_value_t *et = jl_tparam1(t);
+                    if (jl_is_cpointer_type(et)) {
+                        // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
+                        const intptr_t *data = (const intptr_t*)m->ptr;
+                        size_t i;
+                        for (i = 0; i < len; i++) {
+                            if (data[i] != -1)
+                                write_pointer(s->const_data);
+                            else
+                                ios_write(s->const_data, (char*)&data[i], sizeof(data[i]));
+                        }
                     }
                     else {
-                        ios_write(s->const_data, (char*)jl_array_data(ar), tot);
+                        if (isbitsunion) {
+                            ios_write(s->const_data, (char*)m->ptr, datasize);
+                            ios_write(s->const_data, jl_genericmemory_typetagdata(m), len);
+                        }
+                        else {
+                            ios_write(s->const_data, (char*)m->ptr, tot);
+                        }
+                    }
+                    if (len == 0) { // TODO: should we have a zero-page, instead of writing each type's fragment separately?
+                        write_padding(s->const_data, layout->size ? layout->size : isbitsunion);
+                    }
+                    else if (jl_genericmemory_how(m) == 3) {
+                        assert(jl_is_string(jl_genericmemory_data_owner_field(m)));
+                        write_padding(s->const_data, 1);
                     }
                 }
-            }
-            else {
-                // Pointer eltypes are encoded in the mutable data section
-                size_t data = LLT_ALIGN(ios_pos(f), alignment_amt);
-                size_t padding_amt = data - ios_pos(f);
-                headersize += padding_amt;
-                newa->data = (void*)headersize; // relocation offset
-                arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
-                arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
-                write_padding(f, padding_amt);
-                if (ar->flags.hasptr) {
-                    // copy all of the data first
-                    const char *data = (const char*)jl_array_data(ar);
-                    ios_write(f, data, datasize);
-                    // the rewrite all of the embedded pointers to null+relocation
-                    uint16_t elsz = ar->elsize;
-                    size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
-                    size_t i;
-                    for (i = 0; i < alen; i++) {
-                        for (j = 0; j < np; j++) {
-                            size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*);
-                            jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], 1);
-                            size_t fld_pos = reloc_offset + headersize + offset;
-                            if (fld != NULL) {
-                                arraylist_push(&s->relocs_list, (void*)(uintptr_t)fld_pos); // relocation location
-                                arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target
-                                record_uniquing(s, fld, fld_pos);
+                else {
+                    // Pointer eltypes are encoded in the mutable data section
+                    headersize = LLT_ALIGN(headersize, JL_SMALL_BYTE_ALIGNMENT);
+                    size_t data = LLT_ALIGN(ios_pos(f), JL_SMALL_BYTE_ALIGNMENT);
+                    write_padding(f, data - ios_pos(f));
+                    assert(reloc_offset + headersize == ios_pos(f));
+                    jl_genericmemory_t *new_mem = (jl_genericmemory_t*)&f->buf[reloc_offset];
+                    new_mem->ptr = (void*)headersize; // relocation offset
+                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
+                    if (!layout->flags.arrayelem_isboxed) {
+                        // copy all of the data first
+                        const char *data = (const char*)m->ptr;
+                        ios_write(f, data, datasize);
+                        // the rewrite all of the embedded pointers to null+relocation
+                        uint16_t elsz = layout->size;
+                        size_t j, np = layout->first_ptr < 0 ? 0 : layout->npointers;
+                        size_t i;
+                        for (i = 0; i < len; i++) {
+                            for (j = 0; j < np; j++) {
+                                size_t offset = i * elsz + jl_ptr_offset(t, j) * sizeof(jl_value_t*);
+                                jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], 1);
+                                size_t fld_pos = reloc_offset + headersize + offset;
+                                if (fld != NULL) {
+                                    arraylist_push(&s->relocs_list, (void*)(uintptr_t)fld_pos); // relocation location
+                                    arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target
+                                    record_uniquing(s, fld, fld_pos);
+                                }
+                                memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
                             }
-                            memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
                         }
                     }
-                }
-                else {
-                    jl_value_t **data = (jl_value_t**)jl_array_data(ar);
-                    size_t i;
-                    for (i = 0; i < alen; i++) {
-                        jl_value_t *e = get_replaceable_field(&data[i], 1);
-                        write_pointerfield(s, e);
+                    else {
+                        jl_value_t **data = (jl_value_t**)m->ptr;
+                        size_t i;
+                        for (i = 0; i < len; i++) {
+                            jl_value_t *e = get_replaceable_field(&data[i], 1);
+                            write_pointerfield(s, e);
+                        }
                     }
                 }
             }
@@ -1346,7 +1563,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             jl_write_module(s, item, (jl_module_t*)v);
         }
         else if (jl_typetagis(v, jl_task_tag << 4)) {
-            jl_error("Task cannot be serialized");
+            abort(); // unreachable
         }
         else if (jl_is_svec(v)) {
             assert(f == s->s);
@@ -1362,11 +1579,10 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             write_uint8(f, '\0'); // null-terminated strings for easier C-compatibility
         }
         else if (jl_is_foreign_type(t) == 1) {
-            jl_error("Cannot serialize instances of foreign datatypes");
+            abort(); // unreachable
         }
         else if (jl_datatype_nfields(t) == 0) {
             // The object has no fields, so we just snapshot its byte representation
-            assert(!t->layout->npointers);
             assert(t->layout->npointers == 0);
             ios_write(f, (char*)v, jl_datatype_size(t));
         }
@@ -1400,7 +1616,23 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 write_padding(f, offset - tot);
                 tot = offset;
                 size_t fsz = jl_field_size(t, i);
-                if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(intptr_t*)slot != -1) {
+                jl_value_t *replace = (jl_value_t*)ptrhash_get(&bits_replace, (void*)slot);
+                if (replace != HT_NOTFOUND && fsz > 0) {
+                    assert(t->name->mutabl && !jl_field_isptr(t, i));
+                    jl_value_t *rty = jl_typeof(replace);
+                    size_t sz = jl_datatype_size(rty);
+                    ios_write(f, (const char*)replace, sz);
+                    jl_value_t *ft = jl_field_type_concrete(t, i);
+                    int isunion = jl_is_uniontype(ft);
+                    unsigned nth = 0;
+                    if (!jl_find_union_component(ft, rty, &nth))
+                        assert(0 && "invalid field assignment to isbits union");
+                    assert(sz <= fsz - isunion);
+                    write_padding(f, fsz - sz - isunion);
+                    if (isunion)
+                        write_uint8(f, nth);
+                }
+                else if (t->name->mutabl && jl_is_cpointer_type(jl_field_type_concrete(t, i)) && *(intptr_t*)slot != -1) {
                     // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
                     assert(!jl_field_isptr(t, i));
                     write_pointer(f);
@@ -1412,11 +1644,12 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             }
 
             size_t np = t->layout->npointers;
+            size_t fldidx = 1;
             for (i = 0; i < np; i++) {
                 size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*);
-                int mutabl = t->name->mutabl;
-                if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
-                    mutabl = 0;
+                while (offset >= (fldidx == nf ? jl_datatype_size(t) : jl_field_offset(t, fldidx)))
+                    fldidx++;
+                int mutabl = !jl_field_isconst(t, fldidx - 1);
                 jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], mutabl);
                 size_t fld_pos = offset + reloc_offset;
                 if (fld != NULL) {
@@ -1427,21 +1660,42 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
             }
 
-            // A few objects need additional handling beyond the generic serialization above
+            // Need do a tricky fieldtype walk an record all memoryref we find inlined in this value
+            record_memoryrefs_inside(s, t, reloc_offset, data);
 
+            // A few objects need additional handling beyond the generic serialization above
             if (s->incremental && jl_typetagis(v, jl_typemap_entry_type)) {
                 assert(f == s->s);
                 jl_typemap_entry_t *newentry = (jl_typemap_entry_t*)&s->s->buf[reloc_offset];
-                if (newentry->max_world == ~(size_t)0) {
-                    if (newentry->min_world > 1) {
-                        newentry->min_world = ~(size_t)0;
+                if (jl_atomic_load_relaxed(&newentry->max_world) == ~(size_t)0) {
+                    if (jl_atomic_load_relaxed(&newentry->min_world) > 1) {
+                        jl_atomic_store_relaxed(&newentry->min_world, ~(size_t)0);
+                        jl_atomic_store_relaxed(&newentry->max_world, WORLD_AGE_REVALIDATION_SENTINEL);
                         arraylist_push(&s->fixup_objs, (void*)reloc_offset);
                     }
                 }
                 else {
                     // garbage newentry - delete it :(
-                    newentry->min_world = 1;
-                    newentry->max_world = 0;
+                    jl_atomic_store_relaxed(&newentry->min_world, 1);
+                    jl_atomic_store_relaxed(&newentry->max_world, 0);
+                }
+            }
+            else if (s->incremental && jl_is_binding_partition(v)) {
+                jl_binding_partition_t *newbpart = (jl_binding_partition_t*)&s->s->buf[reloc_offset];
+                size_t max_world = jl_atomic_load_relaxed(&newbpart->max_world);
+                if (max_world == ~(size_t)0) {
+                    // Still valid. Will be considered to be defined in jl_require_world
+                    // after reload, which is the first world before new code runs.
+                    // We use this as a quick check to determine whether a binding was
+                    // invalidated. If a binding was first defined in or before
+                    // jl_require_world, then we can assume that all precompile processes
+                    // will have seen it consistently.
+                    jl_atomic_store_relaxed(&newbpart->min_world, jl_require_world);
+                }
+                else {
+                    // The world will not be reachable after loading
+                    jl_atomic_store_relaxed(&newbpart->min_world, 1);
+                    jl_atomic_store_relaxed(&newbpart->max_world, 0);
                 }
             }
             else if (jl_is_method(v)) {
@@ -1450,56 +1704,61 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 jl_method_t *m = (jl_method_t*)v;
                 jl_method_t *newm = (jl_method_t*)&f->buf[reloc_offset];
                 if (s->incremental) {
-                    if (newm->deleted_world != ~(size_t)0)
-                        newm->deleted_world = 1;
-                    else
+                    if (jl_atomic_load_relaxed(&newm->primary_world) > 1) {
+                        jl_atomic_store_relaxed(&newm->primary_world, ~(size_t)0); // min-world
+                        int dispatch_status = jl_atomic_load_relaxed(&newm->dispatch_status);
+                        int new_dispatch_status = 0;
+                        if (!(dispatch_status & METHOD_SIG_LATEST_ONLY))
+                            new_dispatch_status |= METHOD_SIG_PRECOMPILE_MANY;
+                        jl_atomic_store_relaxed(&newm->dispatch_status, new_dispatch_status);
                         arraylist_push(&s->fixup_objs, (void*)reloc_offset);
-                    newm->primary_world = ~(size_t)0;
-                } else {
+                    }
+                }
+                else {
                     newm->nroots_sysimg = m->roots ? jl_array_len(m->roots) : 0;
                 }
-                if (m->ccallable)
-                    arraylist_push(&s->ccallable_list, (void*)reloc_offset);
             }
             else if (jl_is_method_instance(v)) {
                 assert(f == s->s);
                 jl_method_instance_t *newmi = (jl_method_instance_t*)&f->buf[reloc_offset];
-                jl_atomic_store_relaxed(&newmi->precompiled, 0);
+                jl_atomic_store_relaxed(&newmi->flags, 0);
+                if (s->incremental) {
+                    jl_atomic_store_relaxed(&newmi->dispatch_status, 0);
+                }
             }
             else if (jl_is_code_instance(v)) {
                 assert(f == s->s);
+
                 // Handle the native-code pointers
-                assert(f == s->s);
-                jl_code_instance_t *m = (jl_code_instance_t*)v;
-                jl_code_instance_t *newm = (jl_code_instance_t*)&f->buf[reloc_offset];
+                jl_code_instance_t *ci = (jl_code_instance_t*)v;
+                jl_code_instance_t *newci = (jl_code_instance_t*)&f->buf[reloc_offset];
 
                 if (s->incremental) {
-                    arraylist_push(&s->fixup_objs, (void*)reloc_offset);
-                    if (m->min_world > 1)
-                        newm->min_world = ~(size_t)0;     // checks that we reprocess this upon deserialization
-                    if (m->max_world != ~(size_t)0)
-                        newm->max_world = 0;
+                    if (jl_atomic_load_relaxed(&ci->max_world) == ~(size_t)0) {
+                        //assert(jl_atomic_load_relaxed(&ci->edges) != jl_emptysvec); // some code (such as !==) might add a method lookup restriction but not keep the edges
+                        jl_atomic_store_release(&newci->min_world, ~(size_t)0);
+                        jl_atomic_store_release(&newci->max_world, WORLD_AGE_REVALIDATION_SENTINEL);
+                        arraylist_push(&s->fixup_objs, (void*)reloc_offset);
+                    }
                     else {
-                        if (m->inferred && ptrhash_has(&s->callers_with_edges, m->def))
-                            newm->max_world = 1;  // sentinel value indicating this will need validation
-                        if (m->min_world > 0 && m->inferred) {
-                            // TODO: also check if this object is part of the codeinst cache
-                            // will check on deserialize if this cache entry is still valid
-                        }
+                        // garbage object - delete it :(
+                        jl_atomic_store_release(&newci->min_world, 1);
+                        jl_atomic_store_release(&newci->max_world, 0);
                     }
                 }
-
-                newm->invoke = NULL;
-                newm->specsigflags = 0;
-                newm->specptr.fptr = NULL;
+                jl_atomic_store_relaxed(&newci->time_compile, 0.0);
+                jl_atomic_store_relaxed(&newci->invoke, NULL);
+                // preserve only JL_CI_FLAGS_NATIVE_CACHE_VALID bits
+                jl_atomic_store_relaxed(&newci->flags, jl_atomic_load_relaxed(&newci->flags) & JL_CI_FLAGS_NATIVE_CACHE_VALID);
+                jl_atomic_store_relaxed(&newci->specptr.fptr, NULL);
                 int8_t fptr_id = JL_API_NULL;
                 int8_t builtin_id = 0;
-                if (m->invoke == jl_fptr_const_return) {
+                if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) {
                     fptr_id = JL_API_CONST;
                 }
                 else {
-                    if (jl_is_method(m->def->def.method)) {
-                        builtin_id = jl_fptr_id(m->specptr.fptr);
+                    if (jl_is_method(jl_get_ci_mi(ci)->def.method)) {
+                        builtin_id = jl_fptr_id(jl_atomic_load_relaxed(&ci->specptr.fptr));
                         if (builtin_id) { // found in the table of builtins
                             assert(builtin_id >= 2);
                             fptr_id = JL_API_BUILTIN;
@@ -1507,7 +1766,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                         else {
                             int32_t invokeptr_id = 0;
                             int32_t specfptr_id = 0;
-                            jl_get_function_id(native_functions, m, &invokeptr_id, &specfptr_id); // see if we generated code for it
+                            jl_get_function_id(native_functions, ci, &invokeptr_id, &specfptr_id); // see if we generated code for it
                             if (invokeptr_id) {
                                 if (invokeptr_id == -1) {
                                     fptr_id = JL_API_BOXED;
@@ -1515,6 +1774,15 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                                 else if (invokeptr_id == -2) {
                                     fptr_id = JL_API_WITH_PARAMETERS;
                                 }
+                                else if (invokeptr_id == -3) {
+                                    abort();
+                                }
+                                else if (invokeptr_id == -4) {
+                                    fptr_id = JL_API_OC_CALL;
+                                }
+                                else if (invokeptr_id == -5) {
+                                    abort();
+                                }
                                 else {
                                     assert(invokeptr_id > 0);
                                     ios_ensureroom(s->fptr_record, invokeptr_id * sizeof(void*));
@@ -1539,7 +1807,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                         }
                     }
                 }
-                newm->invoke = NULL; // relocation offset
+                jl_atomic_store_relaxed(&newci->invoke, NULL); // relocation offset
                 if (fptr_id != JL_API_NULL) {
                     assert(fptr_id < BuiltinFunctionTag && "too many functions to serialize");
                     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, invoke))); // relocation location
@@ -1559,14 +1827,14 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     size_t nf = dt->layout->nfields;
                     size_t np = dt->layout->npointers;
                     size_t fieldsize = 0;
-                    uint8_t is_foreign_type = dt->layout->fielddesc_type == 3;
+                    uint8_t is_foreign_type = dt->layout->flags.fielddesc_type == 3;
                     if (!is_foreign_type) {
-                        fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type);
+                        fieldsize = jl_fielddesc_size(dt->layout->flags.fielddesc_type);
                     }
                     char *flddesc = (char*)dt->layout;
                     size_t fldsize = sizeof(jl_datatype_layout_t) + nf * fieldsize;
                     if (!is_foreign_type && dt->layout->first_ptr != -1)
-                        fldsize += np << dt->layout->fielddesc_type;
+                        fldsize += np << dt->layout->flags.fielddesc_type;
                     uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
                     write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream
                     newdt->layout = NULL; // relocation offset
@@ -1582,6 +1850,9 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                         ios_write(s->const_data, (char*)&dyn, sizeof(jl_fielddescdyn_t));
                     }
                 }
+                void *superidx = ptrhash_get(&serialization_order, dt->super);
+                if (s->incremental && superidx != HT_NOTFOUND && from_seroder_entry(superidx) > item && needs_uniquing((jl_value_t*)dt->super, s->query_cache))
+                    arraylist_push(&s->uniquing_super, dt->super);
             }
             else if (jl_is_typename(v)) {
                 assert(f == s->s);
@@ -1616,16 +1887,16 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     arraylist_push(&s->fixup_objs, (void*)reloc_offset);
                 }
             }
-            else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
+            else if (jl_is_genericmemoryref(v)) {
                 assert(f == s->s);
-                // will need to rehash this, later (after types are fully constructed)
-                arraylist_push(&s->fixup_objs, (void*)reloc_offset);
+                record_memoryref(s, reloc_offset, *(jl_genericmemoryref_t*)v);
             }
             else {
                 write_padding(f, jl_datatype_size(t) - tot);
             }
         }
     }
+    assert(s->uniquing_super.len == 0);
 }
 
 // In deserialization, create Symbols and set up the
@@ -1686,7 +1957,7 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
         case FunctionRef:
             if (offset & BuiltinFunctionTag) {
                 offset &= ~BuiltinFunctionTag;
-                assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer id");
+                assert(offset < jl_n_builtins && "unknown function pointer id");
             }
             else {
                 assert(offset < JL_API_MAX && "unknown function pointer id");
@@ -1741,18 +2012,22 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
     case FunctionRef:
         if (offset & BuiltinFunctionTag) {
             offset &= ~BuiltinFunctionTag;
-            assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer ID");
-            return (uintptr_t)id_to_fptrs[offset];
+            assert(offset < jl_n_builtins && "unknown function pointer ID");
+            return (uintptr_t)jl_builtin_f_addrs[offset];
         }
         switch ((jl_callingconv_t)offset) {
         case JL_API_BOXED:
-            if (s->image->fptrs.base)
+            if (s->image->fptrs.nptrs)
                 return (uintptr_t)jl_fptr_args;
-            JL_FALLTHROUGH;
+            return (uintptr_t)NULL;
         case JL_API_WITH_PARAMETERS:
-            if (s->image->fptrs.base)
+            if (s->image->fptrs.nptrs)
                 return (uintptr_t)jl_fptr_sparam;
             return (uintptr_t)NULL;
+        case JL_API_OC_CALL:
+            if (s->image->fptrs.nptrs)
+                return (uintptr_t)jl_f_opaque_closure_call;
+            return (uintptr_t)NULL;
         case JL_API_CONST:
             return (uintptr_t)jl_fptr_const_return;
         case JL_API_INTERPRETED:
@@ -1772,20 +2047,20 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         size_t depsidx = 0;
 #endif
         assert(s->buildid_depmods_idxs && depsidx < jl_array_len(s->buildid_depmods_idxs));
-        size_t i = ((uint32_t*)jl_array_data(s->buildid_depmods_idxs))[depsidx];
+        size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
         assert(2*i < jl_linkage_blobs.len);
-        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*SYS_EXTERNAL_LINK_UNIT;
     }
     case ExternalLinkage: {
         assert(link_ids);
         assert(link_index);
         assert(0 <= *link_index && *link_index < jl_array_len(link_ids));
-        uint32_t depsidx = ((uint32_t*)jl_array_data(link_ids))[*link_index];
+        uint32_t depsidx = jl_array_data(link_ids, uint32_t)[*link_index];
         *link_index += 1;
         assert(depsidx < jl_array_len(s->buildid_depmods_idxs));
-        size_t i = ((uint32_t*)jl_array_data(s->buildid_depmods_idxs))[depsidx];
+        size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
         assert(2*i < jl_linkage_blobs.len);
-        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*SYS_EXTERNAL_LINK_UNIT;
     }
     }
     abort();
@@ -1872,6 +2147,37 @@ static void jl_read_reloclist(jl_serializer_state *s, jl_array_t *link_ids, uint
     assert(!link_ids || link_index == jl_array_len(link_ids));
 }
 
+static void jl_read_memreflist(jl_serializer_state *s)
+{
+    uintptr_t base = (uintptr_t)s->s->buf;
+    uintptr_t last_pos = 0;
+    uint8_t *current = (uint8_t *)(s->relocs->buf + s->relocs->bpos);
+    while (1) {
+        // Read the offset of the next object
+        size_t pos_diff = 0;
+        size_t cnt = 0;
+        while (1) {
+            assert(s->relocs->bpos <= s->relocs->size);
+            assert((char *)current <= (char *)(s->relocs->buf + s->relocs->size));
+            int8_t c = *current++;
+            s->relocs->bpos += 1;
+
+            pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++);
+            if ((c >> 7) == 0)
+                break;
+        }
+        if (pos_diff == 0)
+            break;
+
+        uintptr_t pos = last_pos + pos_diff;
+        last_pos = pos;
+        jl_genericmemoryref_t *pv = (jl_genericmemoryref_t*)(base + pos);
+        size_t offset = (size_t)pv->ptr_or_offset;
+        pv->ptr_or_offset = (void*)((char*)pv->mem->ptr + offset);
+    }
+}
+
+
 static void jl_read_arraylist(ios_t *s, arraylist_t *list)
 {
     size_t list_len = read_uint(s);
@@ -1880,7 +2186,7 @@ static void jl_read_arraylist(ios_t *s, arraylist_t *list)
     ios_read(s, (char*)list->items, list_len * sizeof(void*));
 }
 
-void gc_sweep_sysimg(void)
+void gc_sweep_sysimg(void) JL_NOTSAFEPOINT
 {
     size_t nblobs = n_linkage_blobs();
     if (nblobs == 0)
@@ -1920,7 +2226,7 @@ void gc_sweep_sysimg(void)
 // the image proper. For example, new methods added to external callables require
 // insertion into the appropriate method table.
 #define jl_write_value(s, v) _jl_write_value((s), (jl_value_t*)(v))
-static void _jl_write_value(jl_serializer_state *s, jl_value_t *v)
+static void _jl_write_value(jl_serializer_state *s, jl_value_t *v) JL_GC_DISABLED
 {
     if (v == NULL) {
         write_reloc_t(s->s, 0);
@@ -1974,11 +2280,10 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image)
     jl_image_fptrs_t fvars = image->fptrs;
     // make these NULL now so we skip trying to restore GlobalVariable pointers later
     image->gvars_base = NULL;
-    image->fptrs.base = NULL;
-    if (fvars.base == NULL)
+    if (fvars.nptrs == 0)
         return;
 
-    memcpy(image->small_typeof, &small_typeof, sizeof(small_typeof));
+    memcpy(image->jl_small_typeof, &jl_small_typeof, sizeof(jl_small_typeof));
 
     int img_fvars_max = s->fptr_record->size / sizeof(void*);
     size_t i;
@@ -1998,26 +2303,25 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image)
                 offset = ~offset;
             }
             jl_code_instance_t *codeinst = (jl_code_instance_t*)(base + offset);
-            uintptr_t base = (uintptr_t)fvars.base;
-            assert(jl_is_method(codeinst->def->def.method) && codeinst->invoke != jl_fptr_const_return);
-            assert(specfunc ? codeinst->invoke != NULL : codeinst->invoke == NULL);
-            linfos[i] = codeinst->def;     // now it's a MethodInstance
-            int32_t offset = fvars.offsets[i];
+            assert(jl_is_method(jl_get_ci_mi(codeinst)->def.method) && jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return);
+            assert(specfunc ? jl_atomic_load_relaxed(&codeinst->invoke) != NULL : jl_atomic_load_relaxed(&codeinst->invoke) == NULL);
+            linfos[i] = jl_get_ci_mi(codeinst);     // now it's a MethodInstance
+            void *fptr = fvars.ptrs[i];
             for (; clone_idx < fvars.nclones; clone_idx++) {
                 uint32_t idx = fvars.clone_idxs[clone_idx] & jl_sysimg_val_mask;
                 if (idx < i)
                     continue;
                 if (idx == i)
-                    offset = fvars.clone_offsets[clone_idx];
+                    fptr = fvars.clone_ptrs[clone_idx];
                 break;
             }
-            void *fptr = (void*)(base + offset);
             if (specfunc) {
-                codeinst->specptr.fptr = fptr;
-                codeinst->specsigflags = 0b111; // TODO: set only if confirmed to be true
+                jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
+                // TODO: set JL_CI_FLAGS_SPECPTR_SPECIALIZED only if confirmed to be true
+                jl_atomic_store_relaxed(&codeinst->flags, jl_atomic_load_relaxed(&codeinst->flags) | JL_CI_FLAGS_SPECPTR_SPECIALIZED | JL_CI_FLAGS_INVOKE_MATCHES_SPECPTR | JL_CI_FLAGS_FROM_IMAGE);
             }
             else {
-                codeinst->invoke = (jl_callptr_t)fptr;
+                jl_atomic_store_relaxed(&codeinst->invoke, (jl_callptr_t)fptr);
             }
         }
     }
@@ -2025,7 +2329,7 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image)
     jl_register_fptrs(image->base, &fvars, linfos, img_fvars_max);
 }
 
-static uint32_t write_gvars(jl_serializer_state *s, arraylist_t *globals, arraylist_t *external_fns) JL_NOTSAFEPOINT
+static uint32_t write_gvars(jl_serializer_state *s, arraylist_t *globals, arraylist_t *external_fns) JL_GC_DISABLED
 {
     size_t len = globals->len + external_fns->len;
     ios_ensureroom(s->gvar_record, len * sizeof(reloc_t));
@@ -2038,7 +2342,7 @@ static uint32_t write_gvars(jl_serializer_state *s, arraylist_t *globals, arrayl
     }
     for (size_t i = 0; i < external_fns->len; i++) {
         jl_code_instance_t *ci = (jl_code_instance_t*)external_fns->items[i];
-        assert(ci && (jl_atomic_load_relaxed(&ci->specsigflags) & 0b001));
+        assert(ci && (jl_atomic_load_relaxed(&ci->flags) & JL_CI_FLAGS_SPECPTR_SPECIALIZED));
         uintptr_t item = backref_id(s, (void*)ci, s->link_ids_external_fnvars);
         uintptr_t reloc = get_reloc_for_item(item, 0);
         write_reloc_t(s->gvar_record, reloc);
@@ -2085,44 +2389,23 @@ static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image, uint32_
         uintptr_t v = *gv;
         if (i < external_fns_begin) {
             if (!jl_is_binding(v))
-                v = (uintptr_t)jl_as_global_root((jl_value_t*)v);
-        } else {
+                v = (uintptr_t)jl_as_global_root((jl_value_t*)v, 1);
+        }
+        else {
             jl_code_instance_t *codeinst = (jl_code_instance_t*) v;
-            assert(codeinst && (codeinst->specsigflags & 0b01) && codeinst->specptr.fptr);
-            v = (uintptr_t)codeinst->specptr.fptr;
+            assert(codeinst && (jl_atomic_load_relaxed(&codeinst->flags) & JL_CI_FLAGS_SPECPTR_SPECIALIZED) && jl_atomic_load_relaxed(&codeinst->specptr.fptr));
+            v = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
         }
         *gv = v;
     }
 }
 
-
-static void jl_compile_extern(jl_method_t *m, void *sysimg_handle) JL_GC_DISABLED
-{
-    // install ccallable entry point in JIT
-    assert(m); // makes clang-sa happy
-    jl_svec_t *sv = m->ccallable;
-    int success = jl_compile_extern_c(NULL, NULL, sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1));
-    if (!success)
-        jl_safe_printf("WARNING: @ccallable was already defined for this method name\n"); // enjoy a very bad time
-    assert(success || !sysimg_handle);
-}
-
-
-static void jl_reinit_ccallable(arraylist_t *ccallable_list, char *base, void *sysimg_handle)
-{
-    for (size_t i = 0; i < ccallable_list->len; i++) {
-        uintptr_t item = (uintptr_t)ccallable_list->items[i];
-        jl_method_t *m = (jl_method_t*)(base + item);
-        jl_compile_extern(m, sysimg_handle);
-    }
-}
-
-
 // Code below helps slim down the images by
 // removing cached types not referenced in the stream
 static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED
 {
     size_t l = jl_svec_len(cache), i;
+    size_t sz = 0;
     if (l == 0)
         return cache;
     for (i = 0; i < l; i++) {
@@ -2131,14 +2414,19 @@ static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED
             continue;
         if (ptrhash_get(&serialization_order, ti) == HT_NOTFOUND)
             jl_svecset(cache, i, jl_nothing);
+        else
+            sz += 1;
     }
+    if (sz < HT_N_INLINE)
+        sz = HT_N_INLINE;
+
     void *idx = ptrhash_get(&serialization_order, cache);
     assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
-    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == cache);
-    cache = cache_rehash_set(cache, l);
+    assert(serialization_queue.items[from_seroder_entry(idx)] == cache);
+    cache = cache_rehash_set(cache, sz);
     // redirect all references to the old cache to relocate to the new cache object
     ptrhash_put(&serialization_order, cache, idx);
-    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = cache;
+    serialization_queue.items[from_seroder_entry(idx)] = cache;
     return cache;
 }
 
@@ -2156,35 +2444,146 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache)
         jl_svecset(cache, ins++, jl_nothing);
 }
 
-static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig)
+static void jl_prune_mi_backedges(jl_array_t *backedges)
 {
-    jl_code_info_t *ci = NULL;
-    JL_GC_PUSH1(&ci);
-    int compressed = 0;
-    if (!jl_is_code_info(ci_)) {
-        compressed = 1;
-        ci = jl_uncompress_ir(m, NULL, (jl_value_t*)ci_);
-    }
-    else {
-        ci = (jl_code_info_t*)ci_;
+    if (backedges == NULL)
+        return;
+    size_t i = 0, ins = 0, n = jl_array_nrows(backedges);
+    while (i < n) {
+        jl_value_t *invokeTypes;
+        jl_code_instance_t *caller;
+        i = get_next_edge(backedges, i, &invokeTypes, &caller);
+        if (ptrhash_get(&serialization_order, caller) != HT_NOTFOUND)
+            ins = set_next_edge(backedges, ins, invokeTypes, caller);
+    }
+    jl_array_del_end(backedges, n - ins);
+}
+
+static void jl_prune_tn_backedges(jl_array_t *backedges)
+{
+    size_t i = 0, ins = 0, n = jl_array_nrows(backedges);
+    for (i = 1; i < n; i += 2) {
+        jl_value_t *ci = jl_array_ptr_ref(backedges, i);
+        if (ptrhash_get(&serialization_order, ci) != HT_NOTFOUND) {
+            jl_array_ptr_set(backedges, ins++, jl_array_ptr_ref(backedges, i - 1));
+            jl_array_ptr_set(backedges, ins++, ci);
+        }
+    }
+    jl_array_del_end(backedges, n - ins);
+}
+
+static void jl_prune_mt_backedges(jl_genericmemory_t *allbackedges)
+{
+    for (size_t i = 0, n = allbackedges->length; i < n; i += 2) {
+        jl_value_t *tn = jl_genericmemory_ptr_ref(allbackedges, i);
+        jl_value_t *backedges = jl_genericmemory_ptr_ref(allbackedges, i + 1);
+        if (tn && tn != jl_nothing && backedges)
+            jl_prune_tn_backedges((jl_array_t*)backedges);
+    }
+}
+
+static void jl_prune_binding_backedges(jl_array_t *backedges)
+{
+    if (backedges == NULL)
+        return;
+    size_t i = 0, ins = 0, n = jl_array_nrows(backedges);
+    for (i = 0; i < n; i++) {
+        jl_value_t *b = jl_array_ptr_ref(backedges, i);
+        if (ptrhash_get(&serialization_order, b) != HT_NOTFOUND) {
+            jl_array_ptr_set(backedges, ins, b);
+            ins++;
+        }
+    }
+    jl_array_del_end(backedges, n - ins);
+}
+
+uint_t bindingkey_hash(size_t idx, jl_value_t *data);
+uint_t speccache_hash(size_t idx, jl_value_t *data);
+
+static void jl_prune_idset(_Atomic(jl_svec_t*) *pkeys, _Atomic(jl_genericmemory_t*) *pkeyset, uint_t (*key_hash)(size_t, jl_value_t*), jl_value_t *parent) JL_GC_DISABLED
+{
+    jl_svec_t *keys = jl_atomic_load_relaxed(pkeys);
+    size_t l = jl_svec_len(keys), i;
+    if (l == 0)
+        return;
+    arraylist_t keys_list;
+    arraylist_new(&keys_list, 0);
+    for (i = 0; i < l; i++) {
+        jl_value_t *k = jl_svecref(keys, i);
+        if (k == jl_nothing)
+            continue;
+        if (ptrhash_get(&serialization_order, k) != HT_NOTFOUND)
+            arraylist_push(&keys_list, k);
+    }
+    jl_genericmemory_t *keyset = jl_atomic_load_relaxed(pkeyset);
+    _Atomic(jl_genericmemory_t*)keyset2;
+    jl_atomic_store_relaxed(&keyset2, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_svec_t *keys2 = jl_alloc_svec_uninit(keys_list.len);
+    for (i = 0; i < keys_list.len; i++) {
+        jl_binding_t *ref = (jl_binding_t*)keys_list.items[i];
+        jl_svecset(keys2, i, ref);
+        jl_smallintset_insert(&keyset2, parent, key_hash, i, (jl_value_t*)keys2);
+    }
+    void *idx = ptrhash_get(&serialization_order, keys);
+    assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
+    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == keys);
+    ptrhash_put(&serialization_order, keys2, idx);
+    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = keys2;
+
+    idx = ptrhash_get(&serialization_order, keyset);
+    assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
+    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == keyset);
+    ptrhash_put(&serialization_order, jl_atomic_load_relaxed(&keyset2), idx);
+    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = jl_atomic_load_relaxed(&keyset2);
+    jl_atomic_store_relaxed(pkeys, keys2);
+    jl_gc_wb(parent, keys2);
+    jl_atomic_store_relaxed(pkeyset, jl_atomic_load_relaxed(&keyset2));
+    jl_gc_wb(parent, jl_atomic_load_relaxed(&keyset2));
+}
+
+static void jl_prune_method_specializations(jl_method_t *m) JL_GC_DISABLED
+{
+    jl_value_t *specializations_ = jl_atomic_load_relaxed(&m->specializations);
+    if (!jl_is_svec(specializations_)) {
+        if (ptrhash_get(&serialization_order, specializations_) == HT_NOTFOUND)
+            record_field_change((jl_value_t **)&m->specializations, (jl_value_t*)jl_emptysvec);
+        return;
     }
-    // leave codelocs length the same so the compiler can assume that; just zero it
-    memset(jl_array_data(ci->codelocs), 0, jl_array_len(ci->codelocs)*sizeof(int32_t));
-    // empty linetable
-    if (jl_is_array(ci->linetable))
-        jl_array_del_end((jl_array_t*)ci->linetable, jl_array_len(ci->linetable));
+    jl_prune_idset((_Atomic(jl_svec_t*)*)&m->specializations, &m->speckeyset, speccache_hash, (jl_value_t*)m);
+}
+
+static void jl_prune_module_bindings(jl_module_t *m) JL_GC_DISABLED
+{
+    jl_prune_idset(&m->bindings, &m->bindingkeyset, bindingkey_hash, (jl_value_t*)m);
+}
+
+static void strip_slotnames(jl_array_t *slotnames, int n)
+{
     // replace slot names with `?`, except unused_sym since the compiler looks at it
     jl_sym_t *questionsym = jl_symbol("?");
-    int i, l = jl_array_len(ci->slotnames);
-    for (i = 0; i < l; i++) {
-        jl_value_t *s = jl_array_ptr_ref(ci->slotnames, i);
+    int i;
+    for (i = 0; i < n; i++) {
+        jl_value_t *s = jl_array_ptr_ref(slotnames, i);
         if (s != (jl_value_t*)jl_unused_sym)
-            jl_array_ptr_set(ci->slotnames, i, questionsym);
+            jl_array_ptr_set(slotnames, i, questionsym);
     }
-    if (orig) {
-        m->slot_syms = jl_compress_argnames(ci->slotnames);
-        jl_gc_wb(m, m->slot_syms);
+}
+
+static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, jl_code_instance_t *codeinst)
+{
+    jl_code_info_t *ci = NULL;
+    JL_GC_PUSH1(&ci);
+    int compressed = 0;
+    if (!jl_is_code_info(ci_)) {
+        compressed = 1;
+        ci = jl_uncompress_ir(m, codeinst, (jl_value_t*)ci_);
     }
+    else {
+        ci = (jl_code_info_t*)ci_;
+    }
+    strip_slotnames(ci->slotnames, jl_array_len(ci->slotnames));
+    ci->debuginfo = jl_nulldebuginfo;
+    jl_gc_wb(ci, ci->debuginfo);
     jl_value_t *ret = (jl_value_t*)ci;
     if (compressed)
         ret = (jl_value_t*)jl_compress_ir(m, ci);
@@ -2198,23 +2597,25 @@ static void strip_specializations_(jl_method_instance_t *mi)
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
         jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
-        if (inferred && inferred != jl_nothing) {
+        if (inferred && inferred != jl_nothing && !jl_is_uint8(inferred)) {
             if (jl_options.strip_ir) {
                 record_field_change((jl_value_t**)&codeinst->inferred, jl_nothing);
             }
             else if (jl_options.strip_metadata) {
-                jl_value_t *stripped = strip_codeinfo_meta(mi->def.method, inferred, 0);
+                jl_value_t *stripped = strip_codeinfo_meta(mi->def.method, inferred, codeinst);
                 if (jl_atomic_cmpswap_relaxed(&codeinst->inferred, &inferred, stripped)) {
                     jl_gc_wb(codeinst, stripped);
                 }
             }
         }
+        if (jl_options.strip_ir)
+            record_field_change((jl_value_t**)&codeinst->edges, (jl_value_t*)jl_emptysvec);
+        if (jl_options.strip_metadata)
+            record_field_change((jl_value_t**)&codeinst->debuginfo, (jl_value_t*)jl_nulldebuginfo);
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
-    if (jl_options.strip_ir) {
-        record_field_change((jl_value_t**)&mi->uninferred, NULL);
+    if (jl_options.trim || jl_options.strip_ir) {
         record_field_change((jl_value_t**)&mi->backedges, NULL);
-        record_field_change((jl_value_t**)&mi->callbacks, NULL);
     }
 }
 
@@ -2224,29 +2625,52 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
     if (m->source) {
         int stripped_ir = 0;
         if (jl_options.strip_ir) {
-            if (m->unspecialized) {
-                jl_code_instance_t *unspec = jl_atomic_load_relaxed(&m->unspecialized->cache);
-                if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
-                    // we have a generic compiled version, so can remove the IR
-                    record_field_change(&m->source, jl_nothing);
-                    stripped_ir = 1;
+            int should_strip_ir = jl_options.trim;
+            if (!should_strip_ir) {
+                if (jl_atomic_load_relaxed(&m->unspecialized)) {
+                    jl_code_instance_t *unspec = jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&m->unspecialized)->cache);
+                    if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
+                        // we have a generic compiled version, so can remove the IR
+                        should_strip_ir = 1;
+                    }
                 }
             }
-            if (!stripped_ir) {
+            if (!should_strip_ir) {
                 int mod_setting = jl_get_module_compile(m->module);
-                // if the method is declared not to be compiled, keep IR for interpreter
                 if (!(mod_setting == JL_OPTIONS_COMPILE_OFF || mod_setting == JL_OPTIONS_COMPILE_MIN)) {
-                    record_field_change(&m->source, jl_nothing);
-                    stripped_ir = 1;
+                    // if the method is declared not to be compiled, keep IR for interpreter
+                    should_strip_ir = 1;
                 }
             }
+            if (should_strip_ir) {
+                record_field_change(&m->source, jl_nothing);
+                record_field_change((jl_value_t**)&m->roots, NULL);
+                stripped_ir = 1;
+            }
         }
-        if (jl_options.strip_metadata && !stripped_ir) {
-            m->source = strip_codeinfo_meta(m, m->source, 1);
-            jl_gc_wb(m, m->source);
+        if (jl_options.strip_metadata) {
+            if (!stripped_ir) {
+                m->source = strip_codeinfo_meta(m, m->source, NULL);
+                jl_gc_wb(m, m->source);
+            }
+            jl_array_t *slotnames = jl_uncompress_argnames(m->slot_syms);
+            JL_GC_PUSH1(&slotnames);
+            int tostrip = jl_array_len(slotnames);
+            // for keyword methods, strip only nargs to keep the keyword names at the end for reflection
+            if (jl_tparam0(jl_unwrap_unionall(m->sig)) == (jl_value_t*)jl_kwcall_type)
+                tostrip = m->nargs;
+            strip_slotnames(slotnames, tostrip);
+            m->slot_syms = jl_compress_argnames(slotnames);
+            jl_gc_wb(m, m->slot_syms);
+            JL_GC_POP();
         }
     }
-    jl_value_t *specializations = m->specializations;
+    if (jl_options.strip_metadata) {
+        record_field_change((jl_value_t**)&m->file, (jl_value_t*)jl_empty_sym);
+        m->line = 0;
+        record_field_change((jl_value_t**)&m->debuginfo, (jl_value_t*)jl_nulldebuginfo);
+    }
+    jl_value_t *specializations = jl_atomic_load_relaxed(&m->specializations);
     if (!jl_is_svec(specializations)) {
         strip_specializations_((jl_method_instance_t*)specializations);
     }
@@ -2258,33 +2682,154 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
                 strip_specializations_((jl_method_instance_t*)mi);
         }
     }
-    if (m->unspecialized)
-        strip_specializations_(m->unspecialized);
+    if (jl_atomic_load_relaxed(&m->unspecialized))
+        strip_specializations_(jl_atomic_load_relaxed(&m->unspecialized));
     if (jl_options.strip_ir && m->root_blocks)
         record_field_change((jl_value_t**)&m->root_blocks, NULL);
     return 1;
 }
 
-static int strip_all_codeinfos_(jl_methtable_t *mt, void *_env)
+static int strip_all_codeinfos_mt(jl_methtable_t *mt, void *_env)
+{
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), strip_all_codeinfos__, NULL);
+}
+
+static void jl_strip_all_codeinfos(jl_array_t *mod_array)
 {
-    if (jl_options.strip_ir && mt->backedges)
-        record_field_change((jl_value_t**)&mt->backedges, NULL);
-    return jl_typemap_visitor(mt->defs, strip_all_codeinfos__, NULL);
+    jl_foreach_reachable_mtable(strip_all_codeinfos_mt, mod_array, NULL);
 }
 
-static void jl_strip_all_codeinfos(void)
+static int strip_module(jl_module_t *m, jl_sym_t *docmeta_sym)
 {
-    jl_foreach_reachable_mtable(strip_all_codeinfos_, NULL);
+    size_t world = jl_atomic_load_relaxed(&jl_world_counter);
+    jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
+    for (size_t i = 0; i < jl_svec_len(table); i++) {
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+        if ((void*)b == jl_nothing)
+            break;
+        jl_sym_t *name = b->globalref->name;
+        jl_value_t *v = jl_get_binding_value_in_world(b, world);
+        if (v) {
+            if (jl_is_module(v)) {
+                jl_module_t *child = (jl_module_t*)v;
+                if (child != m && child->parent == m && child->name == name) {
+                    // this is the original/primary binding for the submodule
+                    if (!strip_module(child, docmeta_sym))
+                        return 0;
+                }
+            }
+        }
+        if (name == docmeta_sym) {
+            if (jl_atomic_load_relaxed(&b->value))
+                record_field_change((jl_value_t**)&b->value, jl_nothing);
+            // TODO: this is a pretty stupidly unsound way to do this, but it is way to late here to do this correctly (by calling delete_binding and getting an updated world age then dropping all partitions from older worlds)
+            jl_binding_partition_t *bp = jl_atomic_load_relaxed(&b->partitions);
+            while (bp) {
+                if (jl_bkind_is_defined_constant(jl_binding_kind(bp))) {
+                    // XXX: bp->kind = PARTITION_KIND_UNDEF_CONST;
+                    record_field_change((jl_value_t**)&bp->restriction, NULL);
+                }
+                bp = jl_atomic_load_relaxed(&bp->next);
+            }
+        }
+    }
+    return 1;
+}
+
+
+static void jl_strip_all_docmeta(jl_array_t *mod_array)
+{
+    jl_sym_t *docmeta_sym = NULL;
+    if (jl_base_module) {
+        jl_value_t *docs = jl_get_global(jl_base_module, jl_symbol("Docs"));
+        if (docs && jl_is_module(docs)) {
+            docmeta_sym = (jl_sym_t*)jl_get_global((jl_module_t*)docs, jl_symbol("META"));
+        }
+    }
+    if (!docmeta_sym)
+        return;
+    for (size_t i = 0; i < jl_array_nrows(mod_array); i++) {
+        jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
+        assert(jl_is_module(m));
+        if (m->parent == m) // some toplevel modules (really just Base) aren't actually
+            strip_module(m, docmeta_sym);
+    }
 }
 
 // --- entry points ---
 
-jl_array_t *jl_global_roots_table;
+jl_genericmemory_t *jl_global_roots_list;
+jl_genericmemory_t *jl_global_roots_keyset;
 jl_mutex_t global_roots_lock;
 
+jl_mutex_t precompile_field_replace_lock;
+jl_svec_t *precompile_field_replace JL_GLOBALLY_ROOTED;
+
+static inline jl_value_t *get_checked_fieldindex(const char *name, jl_datatype_t *st, jl_value_t *v, jl_value_t *arg, int mutabl)
+{
+    if (mutabl) {
+        if (st == jl_module_type)
+            jl_error("cannot assign variables in other modules");
+        if (!st->name->mutabl)
+            jl_errorf("%s: immutable struct of type %s cannot be changed", name, jl_symbol_name(st->name->name));
+    }
+    size_t idx;
+    if (jl_is_long(arg)) {
+        idx = jl_unbox_long(arg) - 1;
+        if (idx >= jl_datatype_nfields(st))
+            jl_bounds_error(v, arg);
+    }
+    else if (jl_is_symbol(arg)) {
+        idx = jl_field_index(st, (jl_sym_t*)arg, 1);
+        arg = jl_box_long(idx);
+    }
+    else {
+        jl_value_t *ts[2] = {(jl_value_t*)jl_long_type, (jl_value_t*)jl_symbol_type};
+        jl_value_t *t = jl_type_union(ts, 2);
+        jl_type_error(name, t, arg);
+    }
+    if (mutabl && jl_field_isconst(st, idx)) {
+        jl_errorf("%s: const field .%s of type %s cannot be changed", name,
+                jl_symbol_name((jl_sym_t*)jl_svecref(jl_field_names(st), idx)), jl_symbol_name(st->name->name));
+    }
+    return arg;
+}
+
+JL_DLLEXPORT void jl_set_precompile_field_replace(jl_value_t *val, jl_value_t *field, jl_value_t *newval)
+{
+    if (!jl_generating_output())
+        return;
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(val);
+    jl_value_t *idx = get_checked_fieldindex("setfield!", st, val, field, 1);
+    JL_GC_PUSH1(&idx);
+    size_t idxval = jl_unbox_long(idx);
+    jl_value_t *ft = jl_field_type_concrete(st, idxval);
+    if (!jl_isa(newval, ft))
+        jl_type_error("setfield!", ft, newval);
+    JL_LOCK(&precompile_field_replace_lock);
+    if (precompile_field_replace == NULL) {
+        precompile_field_replace = jl_alloc_svec(3);
+        jl_svecset(precompile_field_replace, 0, jl_alloc_vec_any(0));
+        jl_svecset(precompile_field_replace, 1, jl_alloc_vec_any(0));
+        jl_svecset(precompile_field_replace, 2, jl_alloc_vec_any(0));
+    }
+    jl_array_ptr_1d_push((jl_array_t*)jl_svecref(precompile_field_replace, 0), val);
+    jl_array_ptr_1d_push((jl_array_t*)jl_svecref(precompile_field_replace, 1), idx);
+    jl_array_ptr_1d_push((jl_array_t*)jl_svecref(precompile_field_replace, 2), newval);
+    JL_GC_POP();
+    JL_UNLOCK(&precompile_field_replace_lock);
+}
+
+
 JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
 {
-    if (jl_is_concrete_type(val) || jl_is_bool(val) || jl_is_symbol(val) ||
+    if (jl_is_datatype(val)) {
+        jl_datatype_t *dt = (jl_datatype_t*)val;
+        if (jl_unwrap_unionall(dt->name->wrapper) == val)
+            return 1;
+        return (jl_is_tuple_type(val) ? dt->isconcretetype : !dt->hasfreetypevars); // aka is_cacheable from jltypes.c
+    }
+    if (jl_is_bool(val) || jl_is_symbol(val) ||
             val == (jl_value_t*)jl_any_type || val == (jl_value_t*)jl_bottom_type || val == (jl_value_t*)jl_core_module)
         return 1;
     if (val == ((jl_datatype_t*)jl_typeof(val))->instance)
@@ -2292,10 +2837,21 @@ JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOT
     return 0;
 }
 
-JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val JL_MAYBE_UNROOTED)
+static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT JL_GLOBALLY_ROOTED
+{
+    t = jl_unwrap_unionall(t);
+    if (jl_is_datatype(t))
+        return ((jl_datatype_t*)t)->name->wrapper;
+    return NULL;
+}
+
+JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert)
 {
     if (jl_is_globally_rooted(val))
         return val;
+    jl_value_t *tw = extract_wrapper(val);
+    if (tw && (val == tw || jl_types_egal(val, tw)))
+        return tw;
     if (jl_is_uint8(val))
         return jl_box_uint8(jl_unbox_uint8(val));
     if (jl_is_int32(val)) {
@@ -2308,89 +2864,117 @@ JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val JL_MAYBE_UNROOTED)
         if ((uint64_t)(n+512) < 1024)
             return jl_box_int64(n);
     }
-    JL_GC_PUSH1(&val);
+    // check table before acquiring lock to reduce writer contention
+    jl_value_t *rval = jl_idset_get(jl_global_roots_list, jl_global_roots_keyset, val);
+    if (rval)
+        return rval;
     JL_LOCK(&global_roots_lock);
-    jl_value_t *rval = jl_eqtable_getkey(jl_global_roots_table, val, NULL);
+    rval = jl_idset_get(jl_global_roots_list, jl_global_roots_keyset, val);
     if (rval) {
         val = rval;
     }
+    else if (insert) {
+        ssize_t idx;
+        jl_global_roots_list = jl_idset_put_key(jl_global_roots_list, val, &idx);
+        jl_global_roots_keyset = jl_idset_put_idx(jl_global_roots_list, jl_global_roots_keyset, idx);
+    }
     else {
-        jl_global_roots_table = jl_eqtable_put(jl_global_roots_table, val, jl_nothing, NULL);
+        val = NULL;
     }
     JL_UNLOCK(&global_roots_lock);
-    JL_GC_POP();
     return val;
 }
 
-static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *newly_inferred, uint64_t worklist_key,
-                           /* outputs */  jl_array_t **extext_methods, jl_array_t **new_specializations,
-                                          jl_array_t **method_roots_list, jl_array_t **ext_targets, jl_array_t **edges)
-{
-    // extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
-    // ext_targets: [invokesig1, callee1, matches1, ...] non-worklist callees of worklist-owned methods
-    //              ordinary dispatch: invokesig=NULL, callee is MethodInstance
-    //              `invoke` dispatch: invokesig is signature, callee is MethodInstance
-    //              abstract call: callee is signature
-    // edges: [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods
-    assert(edges_map == NULL);
-
-    // Save the inferred code from newly inferred, external methods
-    *new_specializations = queue_external_cis(newly_inferred);
-
-    // Collect method extensions and edges data
-    JL_GC_PUSH1(&edges_map);
-    if (edges)
-        edges_map = jl_alloc_vec_any(0);
-    *extext_methods = jl_alloc_vec_any(0);
-    jl_collect_methtable_from_mod(jl_type_type_mt, *extext_methods);
-    jl_collect_methtable_from_mod(jl_nonfunction_mt, *extext_methods);
-    size_t i, len = jl_array_len(mod_array);
-    for (i = 0; i < len; i++) {
-        jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
-        assert(jl_is_module(m));
-        if (m->parent == m) // some toplevel modules (really just Base) aren't actually
-            jl_collect_extext_methods_from_mod(*extext_methods, m);
-    }
-
-    if (edges) {
-        size_t world = jl_atomic_load_acquire(&jl_world_counter);
-        jl_collect_missing_backedges(jl_type_type_mt);
-        jl_collect_missing_backedges(jl_nonfunction_mt);
-        // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges also accumulate data in callers_with_edges.
-        // Process this to extract `edges` and `ext_targets`.
-        *ext_targets = jl_alloc_vec_any(0);
-        *edges = jl_alloc_vec_any(0);
-        *method_roots_list = jl_alloc_vec_any(0);
-        // Collect the new method roots
-        jl_collect_new_roots(*method_roots_list, *new_specializations, worklist_key);
-        jl_collect_edges(*edges, *ext_targets, *new_specializations, world);
-    }
-    assert(edges_map == NULL); // jl_collect_edges clears this when done
-
-    JL_GC_POP();
-}
-
 // In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage
 static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
-                                           jl_array_t *worklist, jl_array_t *extext_methods,
-                                           jl_array_t *new_specializations, jl_array_t *method_roots_list,
-                                           jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED
+                                           jl_array_t *module_init_order, jl_array_t *worklist, jl_array_t *extext_methods,
+                                           jl_array_t *new_ext_cis, jl_query_cache *query_cache)
 {
     htable_new(&field_replace, 0);
+    htable_new(&bits_replace, 0);
     // strip metadata and IR when requested
-    if (jl_options.strip_metadata || jl_options.strip_ir)
-        jl_strip_all_codeinfos();
+    if (jl_options.strip_metadata || jl_options.strip_ir) {
+        if (jl_options.strip_metadata) {
+            jl_nulldebuginfo = (jl_debuginfo_t*)jl_get_global(jl_core_module, jl_symbol("NullDebugInfo"));
+            if (jl_nulldebuginfo == NULL)
+                jl_errorf("Core.NullDebugInfo required for --strip-metadata option");
+        }
+        jl_strip_all_codeinfos(mod_array);
+        jl_strip_all_docmeta(mod_array);
+    }
+    // collect needed methods and replace method tables that are in the tags array
+    htable_new(&new_methtables, 0);
+    arraylist_t MIs;
+    arraylist_new(&MIs, 0);
+    arraylist_t gvars;
+    arraylist_new(&gvars, 0);
+    arraylist_t external_fns;
+    arraylist_new(&external_fns, 0);
+    // prepare hash table with any fields the user wanted us to rewrite during serialization
+    if (precompile_field_replace) {
+        jl_array_t *vals = (jl_array_t*)jl_svecref(precompile_field_replace, 0);
+        jl_array_t *fields = (jl_array_t*)jl_svecref(precompile_field_replace, 1);
+        jl_array_t *newvals = (jl_array_t*)jl_svecref(precompile_field_replace, 2);
+        size_t i, l = jl_array_nrows(vals);
+        assert(jl_array_nrows(fields) == l && jl_array_nrows(newvals) == l);
+        for (i = 0; i < l; i++) {
+            jl_value_t *val = jl_array_ptr_ref(vals, i);
+            size_t field = jl_unbox_long(jl_array_ptr_ref(fields, i));
+            jl_value_t *newval = jl_array_ptr_ref(newvals, i);
+            jl_datatype_t *st = (jl_datatype_t*)jl_typeof(val);
+            size_t offs = jl_field_offset(st, field);
+            char *fldaddr = (char*)val + offs;
+            if (jl_field_isptr(st, field)) {
+                record_field_change((jl_value_t**)fldaddr, newval);
+            }
+            else if (jl_field_size(st, field) > 0) {
+                // replace the bits
+                ptrhash_put(&bits_replace, (void*)fldaddr, newval);
+                // and any pointers inside
+                jl_datatype_t *rty = (jl_datatype_t*)jl_typeof(newval);
+                const jl_datatype_layout_t *layout = rty->layout;
+                size_t j, np = layout->npointers;
+                for (j = 0; j < np; j++) {
+                    uint32_t ptr = jl_ptr_offset(rty, j);
+                    record_field_change((jl_value_t**)fldaddr + ptr, *(((jl_value_t**)newval) + ptr));
+                }
+            }
+        }
+    }
 
     int en = jl_gc_enable(0);
+    if (native_functions) {
+        size_t num_gvars, num_external_fns;
+        jl_get_llvm_gv_inits(native_functions, &num_gvars, NULL);
+        arraylist_grow(&gvars, num_gvars);
+        jl_get_llvm_gv_inits(native_functions, &num_gvars, gvars.items);
+        jl_get_llvm_external_fns(native_functions, &num_external_fns, NULL);
+        arraylist_grow(&external_fns, num_external_fns);
+        jl_get_llvm_external_fns(native_functions, &num_external_fns,
+                                 (jl_code_instance_t *)external_fns.items);
+        if (jl_options.trim) {
+            size_t num_mis;
+            jl_get_llvm_cis(native_functions, &num_mis, NULL);
+            arraylist_grow(&MIs, num_mis);
+            jl_get_llvm_cis(native_functions, &num_mis, (jl_code_instance_t**)MIs.items);
+            for (size_t i = 0; i < num_mis; i++) {
+                jl_code_instance_t *ci = (jl_code_instance_t*)MIs.items[i];
+                MIs.items[i] = (void*)jl_get_ci_mi(ci);
+            }
+        }
+    }
+    if (jl_options.trim) {
+        jl_rebuild_methtables(&MIs, &new_methtables);
+    }
+
     nsym_tag = 0;
     htable_new(&symbol_table, 0);
-    htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs));
+    htable_new(&fptr_to_id, jl_n_builtins);
     uintptr_t i;
-    for (i = 0; id_to_fptrs[i] != NULL; i++) {
-        ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2));
+    for (i = 0; i < jl_n_builtins; i++) {
+        ptrhash_put(&fptr_to_id, (void*)(uintptr_t)jl_builtin_f_addrs[i], (void*)(i + 2));
     }
     htable_new(&serialization_order, 25000);
-    htable_new(&unique_ready, 0);
     htable_new(&nullptrs, 0);
     arraylist_new(&object_worklist, 0);
     arraylist_new(&serialization_queue, 0);
@@ -2402,6 +2986,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     ios_mem(&gvar_record, 0);
     ios_mem(&fptr_record, 0);
     jl_serializer_state s = {0};
+    s.query_cache = query_cache;
     s.incremental = !(worklist == NULL);
     s.s = &sysimg;
     s.const_data = &const_data;
@@ -2410,28 +2995,30 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
     s.ptls = jl_current_task->ptls;
+    arraylist_new(&s.memowner_list, 0);
+    arraylist_new(&s.memref_list, 0);
     arraylist_new(&s.relocs_list, 0);
     arraylist_new(&s.gctags_list, 0);
     arraylist_new(&s.uniquing_types, 0);
+    arraylist_new(&s.uniquing_super, 0);
     arraylist_new(&s.uniquing_objs, 0);
     arraylist_new(&s.fixup_types, 0);
     arraylist_new(&s.fixup_objs, 0);
-    arraylist_new(&s.ccallable_list, 0);
     s.buildid_depmods_idxs = image_to_depmodidx(mod_array);
     s.link_ids_relocs = jl_alloc_array_1d(jl_array_int32_type, 0);
     s.link_ids_gctags = jl_alloc_array_1d(jl_array_int32_type, 0);
     s.link_ids_gvars = jl_alloc_array_1d(jl_array_int32_type, 0);
     s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_int32_type, 0);
-    htable_new(&s.callers_with_edges, 0);
-    jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL;
-
-    arraylist_t gvars;
-    arraylist_t external_fns;
-    arraylist_new(&gvars, 0);
-    arraylist_new(&external_fns, 0);
-    if (native_functions) {
-        jl_get_llvm_gvs(native_functions, &gvars);
-        jl_get_llvm_external_fns(native_functions, &external_fns);
+    s.method_roots_list = NULL;
+    htable_new(&s.method_roots_index, 0);
+    jl_value_t **_tags[NUM_TAGS];
+    jl_value_t ***tags = s.incremental ? NULL : _tags;
+    if (worklist) {
+        s.method_roots_list = jl_alloc_vec_any(0);
+        s.worklist_key = jl_worklist_key(worklist);
+    }
+    else {
+        get_tags(_tags);
     }
 
     if (worklist == NULL) {
@@ -2443,19 +3030,13 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
             }
         }
     }
-    jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL;
-    jl_idtable_typename = jl_base_module ? ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_idtable_type))->name : NULL;
     jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL;
     if (jl_bigint_type) {
         gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")),
                                                     jl_symbol("BITS_PER_LIMB"))) / 8;
     }
-    if (jl_base_module) {
-        jl_value_t *docs = jl_get_global(jl_base_module, jl_symbol("Docs"));
-        if (docs && jl_is_module(docs)) {
-            jl_docmeta_sym = (jl_sym_t*)jl_get_global((jl_module_t*)docs, jl_symbol("META"));
-        }
-    }
+    jl_genericmemory_t *global_roots_list = NULL;
+    jl_genericmemory_t *global_roots_keyset = NULL;
 
     { // step 1: record values (recursively) that need to go in the image
         size_t i;
@@ -2464,50 +3045,91 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
                 jl_value_t *tag = *tags[i];
                 jl_queue_for_serialization(&s, tag);
             }
-            jl_queue_for_serialization(&s, jl_global_roots_table);
+            for (i = 0; i < jl_n_builtins; i++)
+                jl_queue_for_serialization(&s, jl_builtin_instances[i]);
+#define XX(name, type) jl_queue_for_serialization(&s, (jl_value_t*)jl_##name);
+            JL_EXPORTED_DATA_POINTERS(XX)
+#undef XX
+#define XX(name, type) jl_queue_for_serialization(&s, (jl_value_t*)jl_##name);
+            JL_CONST_GLOBAL_VARS(XX)
+#undef XX
             jl_queue_for_serialization(&s, s.ptls->root_task->tls);
         }
         else {
-            // To ensure we don't have to manually update the list, go through all tags and queue any that are not otherwise
-            // judged to be externally-linked
-            htable_new(&external_objects, NUM_TAGS);
-            for (size_t i = 0; tags[i] != NULL; i++) {
-                jl_value_t *tag = *tags[i];
-                ptrhash_put(&external_objects, tag, tag);
-            }
             // Queue the worklist itself as the first item we serialize
             jl_queue_for_serialization(&s, worklist);
-            jl_queue_for_serialization(&s, jl_module_init_order);
-            // Classify the CodeInstances with respect to their need for validation
-            classify_callers(&s.callers_with_edges, edges);
+            jl_queue_for_serialization(&s, module_init_order);
         }
         // step 1.1: as needed, serialize the data needed for insertion into the running system
         if (extext_methods) {
-            assert(ext_targets);
-            assert(edges);
             // Queue method extensions
             jl_queue_for_serialization(&s, extext_methods);
             // Queue the new specializations
-            jl_queue_for_serialization(&s, new_specializations);
-            // Queue the new roots
-            jl_queue_for_serialization(&s, method_roots_list);
-            // Queue the edges
-            jl_queue_for_serialization(&s, ext_targets);
-            jl_queue_for_serialization(&s, edges);
+            jl_queue_for_serialization(&s, new_ext_cis);
         }
         jl_serialize_reachable(&s);
         // step 1.2: ensure all gvars are part of the sysimage too
         record_gvars(&s, &gvars);
         record_external_fns(&s, &external_fns);
+        if (jl_options.trim)
+            record_gvars(&s, &MIs);
         jl_serialize_reachable(&s);
-        // step 1.3: prune (garbage collect) some special weak references from
-        // built-in type caches
+        // Beyond this point, all content should already have been visited, so now we can prune
+        // the rest and add some internal root arrays.
+        // step 1.3: include some other special roots
+        if (s.incremental) {
+            // Queue the new roots array
+            jl_queue_for_serialization(&s, s.method_roots_list);
+            jl_serialize_reachable(&s);
+        }
+        // step 1.4: prune (garbage collect) special weak references from the jl_global_roots_list
+        if (worklist == NULL) {
+            global_roots_list = jl_alloc_memory_any(0);
+            global_roots_keyset = jl_alloc_memory_any(0);
+            for (size_t i = 0; i < jl_global_roots_list->length; i++) {
+                jl_value_t *val = jl_genericmemory_ptr_ref(jl_global_roots_list, i);
+                if (val && ptrhash_get(&serialization_order, val) != HT_NOTFOUND) {
+                    ssize_t idx;
+                    global_roots_list = jl_idset_put_key(global_roots_list, val, &idx);
+                    global_roots_keyset = jl_idset_put_idx(global_roots_list, global_roots_keyset, idx);
+                }
+            }
+            jl_queue_for_serialization(&s, global_roots_list);
+            jl_queue_for_serialization(&s, global_roots_keyset);
+            jl_serialize_reachable(&s);
+        }
+        // step 1.5: prune (garbage collect) some special weak references known caches
         for (i = 0; i < serialization_queue.len; i++) {
-            jl_typename_t *tn = (jl_typename_t*)serialization_queue.items[i];
-            if (jl_is_typename(tn)) {
-                tn->cache = jl_prune_type_cache_hash(tn->cache);
-                jl_gc_wb(tn, tn->cache);
-                jl_prune_type_cache_linear(tn->linearcache);
+            jl_value_t *v = (jl_value_t*)serialization_queue.items[i];
+            if (jl_is_method(v)) {
+                if (jl_options.trim)
+                    jl_prune_method_specializations((jl_method_t*)v);
+            }
+            else if (jl_is_module(v)) {
+                if (jl_options.trim)
+                    jl_prune_module_bindings((jl_module_t*)v);
+            }
+            else if (jl_is_typename(v)) {
+                jl_typename_t *tn = (jl_typename_t*)v;
+                jl_atomic_store_relaxed(&tn->cache,
+                    jl_prune_type_cache_hash(jl_atomic_load_relaxed(&tn->cache)));
+                jl_gc_wb(tn, jl_atomic_load_relaxed(&tn->cache));
+                jl_prune_type_cache_linear(jl_atomic_load_relaxed(&tn->linearcache));
+            }
+            else if (jl_is_method_instance(v)) {
+                jl_method_instance_t *mi = (jl_method_instance_t*)v;
+                jl_value_t *backedges = get_replaceable_field((jl_value_t**)&mi->backedges, 1);
+                jl_prune_mi_backedges((jl_array_t*)backedges);
+            }
+            else if (jl_is_binding(v)) {
+                jl_binding_t *b = (jl_binding_t*)v;
+                jl_value_t *backedges = get_replaceable_field((jl_value_t**)&b->backedges, 1);
+                jl_prune_binding_backedges((jl_array_t*)backedges);
+            }
+            else if (jl_is_mtable(v)) {
+                jl_methtable_t *mt = (jl_methtable_t*)v;
+                jl_value_t *backedges = get_replaceable_field((jl_value_t**)&mt->backedges, 1);
+                jl_prune_mt_backedges((jl_genericmemory_t*)backedges);
             }
         }
     }
@@ -2527,8 +3149,8 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
         jl_printf(
             JL_STDERR,
-            "ERROR: system image too large: sysimg.size is %jd but the limit is %" PRIxPTR "\n",
-            (intmax_t)sysimg.size,
+            "ERROR: system image too large: sysimg.size is 0x%" PRIxPTR " but the limit is 0x%" PRIxPTR "\n",
+            (uintptr_t)sysimg.size,
             ((uintptr_t)1 << RELOC_TAG_OFFSET)
         );
         jl_exit(1);
@@ -2536,13 +3158,12 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     if (const_data.size / sizeof(void*) > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
         jl_printf(
             JL_STDERR,
-            "ERROR: system image too large: const_data.size is %jd but the limit is %" PRIxPTR "\n",
-            (intmax_t)const_data.size,
+            "ERROR: system image too large: const_data.size is 0x%" PRIxPTR " but the limit is 0x%" PRIxPTR "\n",
+            (uintptr_t)const_data.size,
             ((uintptr_t)1 << RELOC_TAG_OFFSET)*sizeof(void*)
         );
         jl_exit(1);
     }
-    htable_free(&s.callers_with_edges);
 
     // step 3: combine all of the sections into one file
     assert(ios_pos(f) % JL_CACHE_BYTE_ALIGNMENT == 0);
@@ -2573,6 +3194,8 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     jl_finish_relocs(base + sysimg_offset, sysimg_size, &s.relocs_list);
     jl_write_offsetlist(s.relocs, sysimg_size, &s.gctags_list);
     jl_write_offsetlist(s.relocs, sysimg_size, &s.relocs_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.memowner_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.memref_list);
     if (s.incremental) {
         jl_write_arraylist(s.relocs, &s.uniquing_types);
         jl_write_arraylist(s.relocs, &s.uniquing_objs);
@@ -2606,74 +3229,105 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
                 jl_value_t *tag = *tags[i];
                 jl_write_value(&s, tag);
             }
-            jl_write_value(&s, jl_global_roots_table);
+            for (i = 0; i < jl_n_builtins; i++)
+                jl_write_value(&s, jl_builtin_instances[i]);
+#define XX(name, type) jl_write_value(&s, (jl_value_t*)jl_##name);
+            JL_EXPORTED_DATA_POINTERS(XX)
+#undef XX
+#define XX(name, type) jl_write_value(&s, (jl_value_t*)jl_##name);
+            JL_CONST_GLOBAL_VARS(XX)
+#undef XX
+            jl_write_value(&s, global_roots_list);
+            jl_write_value(&s, global_roots_keyset);
             jl_write_value(&s, s.ptls->root_task->tls);
             write_uint32(f, jl_get_gs_ctr());
-            write_uint(f, jl_atomic_load_acquire(&jl_world_counter));
+            size_t world = jl_atomic_load_acquire(&jl_world_counter);
+            // assert(world == precompilation_world); // This triggers on a normal build of julia
+            write_uint(f, world);
             write_uint(f, jl_typeinf_world);
         }
         else {
             jl_write_value(&s, worklist);
             // save module initialization order
-            if (jl_module_init_order != NULL) {
-                size_t i, l = jl_array_len(jl_module_init_order);
-                for (i = 0; i < l; i++) {
-                    // verify that all these modules were saved
-                    assert(ptrhash_get(&serialization_order, jl_array_ptr_ref(jl_module_init_order, i)) != HT_NOTFOUND);
-                }
+            size_t i, l = jl_array_len(module_init_order);
+            for (i = 0; i < l; i++) {
+                // verify that all these modules were saved
+                assert(ptrhash_get(&serialization_order, jl_array_ptr_ref(module_init_order, i)) != HT_NOTFOUND);
             }
-            jl_write_value(&s, jl_module_init_order);
+            jl_write_value(&s, module_init_order);
             jl_write_value(&s, extext_methods);
-            jl_write_value(&s, new_specializations);
-            jl_write_value(&s, method_roots_list);
-            jl_write_value(&s, ext_targets);
-            jl_write_value(&s, edges);
+            jl_write_value(&s, new_ext_cis);
+            jl_write_value(&s, s.method_roots_list);
         }
         write_uint32(f, jl_array_len(s.link_ids_gctags));
-        ios_write(f, (char*)jl_array_data(s.link_ids_gctags), jl_array_len(s.link_ids_gctags) * sizeof(uint32_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_gctags, uint32_t), jl_array_len(s.link_ids_gctags) * sizeof(uint32_t));
         write_uint32(f, jl_array_len(s.link_ids_relocs));
-        ios_write(f, (char*)jl_array_data(s.link_ids_relocs), jl_array_len(s.link_ids_relocs) * sizeof(uint32_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_relocs, uint32_t), jl_array_len(s.link_ids_relocs) * sizeof(uint32_t));
         write_uint32(f, jl_array_len(s.link_ids_gvars));
-        ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars) * sizeof(uint32_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_gvars, uint32_t), jl_array_len(s.link_ids_gvars) * sizeof(uint32_t));
         write_uint32(f, jl_array_len(s.link_ids_external_fnvars));
-        ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars), jl_array_len(s.link_ids_external_fnvars) * sizeof(uint32_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars, uint32_t), jl_array_len(s.link_ids_external_fnvars) * sizeof(uint32_t));
         write_uint32(f, external_fns_begin);
-        jl_write_arraylist(s.s, &s.ccallable_list);
     }
 
     assert(object_worklist.len == 0);
     arraylist_free(&object_worklist);
     arraylist_free(&serialization_queue);
     arraylist_free(&layout_table);
-    arraylist_free(&s.ccallable_list);
+    arraylist_free(&s.uniquing_types);
+    arraylist_free(&s.uniquing_super);
+    arraylist_free(&s.uniquing_objs);
+    arraylist_free(&s.fixup_types);
+    arraylist_free(&s.fixup_objs);
+    arraylist_free(&s.memowner_list);
+    arraylist_free(&s.memref_list);
     arraylist_free(&s.relocs_list);
     arraylist_free(&s.gctags_list);
     arraylist_free(&gvars);
     arraylist_free(&external_fns);
+    htable_free(&s.method_roots_index);
     htable_free(&field_replace);
-    if (worklist)
-        htable_free(&external_objects);
+    htable_free(&bits_replace);
     htable_free(&serialization_order);
-    htable_free(&unique_ready);
     htable_free(&nullptrs);
     htable_free(&symbol_table);
     htable_free(&fptr_to_id);
+    htable_free(&new_methtables);
     nsym_tag = 0;
 
     jl_gc_enable(en);
 }
 
-static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_array_t *mod_array, jl_array_t **udeps, int64_t *srctextpos, int64_t *checksumpos)
+static int ci_not_internal_cache(jl_code_instance_t *ci)
 {
-    assert(jl_precompile_toplevel_module == NULL);
-    jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
+    jl_method_instance_t *mi = jl_get_ci_mi(ci);
+    return !(jl_atomic_load_relaxed(&ci->flags) & JL_CI_FLAGS_NATIVE_CACHE_VALID) || jl_object_in_image(mi->def.value);
+}
 
+static uint8_t jl_get_toplevel_syntax_version(void)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_module_t *toplevel = (jl_module_t*)jl_get_global_value(jl_base_module, jl_symbol("__toplevel__"), ct->world_age);
+    JL_GC_PROMISE_ROOTED(toplevel);
+    jl_value_t *syntax_version = jl_get_global_value(toplevel, jl_symbol("_internal_syntax_version"), ct->world_age);
+    return jl_unbox_uint8(syntax_version);
+}
+
+static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_array_t *mod_array, jl_array_t **udeps, int64_t *srctextpos, int64_t *checksumpos)
+{
     *checksumpos = write_header(f, 0);
     write_uint8(f, jl_cache_flags());
+    // write the syntax version marker. Note that unlike a VersionNumber, this is
+    // private to the serialization format and only needs to be reloaded by the
+    // same version of Julia that wrote it. As a result, we don't store the full
+    // VersionNumber, only an index of which of the supported syntax versions to
+    // select.
+    write_uint8(f, jl_get_toplevel_syntax_version());
     // write description of contents (name, uuid, buildid)
     write_worklist_for_header(f, worklist);
-    // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist
-    // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header.
+    // Determine unique (module, abspath, fsize, hash, mtime) dependencies for the files defining modules in the worklist
+    // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header
+    // (abspath will be converted to a relocateable @depot path before writing, cf. Base.replace_depot_path).
     // Also write Preferences.
     // last word of the dependency list is the end of the data / start of the srctextpos
     *srctextpos = write_dependency_list(f, worklist, udeps);  // srctextpos: position of srctext entry in header index (update later)
@@ -2684,10 +3338,8 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a
 }
 
 JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *worklist, bool_t emit_split,
-                                         ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos)
+                                         ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos, jl_array_t *module_init_order)
 {
-    jl_gc_collect(JL_GC_FULL);
-    jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
     JL_TIMING(SYSIMG_DUMP, SYSIMG_DUMP);
 
     // iff emit_split
@@ -2705,29 +3357,24 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
         ff = f;
     }
 
-    jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_specializations = NULL;
-    jl_array_t *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
+    jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_ext_cis = NULL;
     int64_t checksumpos = 0;
     int64_t checksumpos_ff = 0;
     int64_t datastartpos = 0;
-    JL_GC_PUSH6(&mod_array, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
+    JL_GC_PUSH3(&mod_array, &extext_methods, &new_ext_cis);
 
+    mod_array = jl_get_loaded_modules();  // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
     if (worklist) {
-        mod_array = jl_get_loaded_modules();  // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
-        // Generate _native_data`
         if (_native_data != NULL) {
-            jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
-                                          &extext_methods, &new_specializations, NULL, NULL, NULL);
-            jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
-            *_native_data = jl_precompile_worklist(worklist, extext_methods, new_specializations);
-            jl_precompile_toplevel_module = NULL;
-            extext_methods = NULL;
-            new_specializations = NULL;
+            if (suppress_precompile)
+                newly_inferred = NULL;
+            *_native_data = jl_create_native(NULL, 0, 1, jl_atomic_load_acquire(&jl_world_counter), NULL, suppress_precompile ? (jl_array_t*)jl_an_empty_vec_any : worklist, 0, module_init_order);
         }
         jl_write_header_for_incremental(f, worklist, mod_array, udeps, srctextpos, &checksumpos);
         if (emit_split) {
             checksumpos_ff = write_header(ff, 1);
             write_uint8(ff, jl_cache_flags());
+            write_uint8(ff, jl_get_toplevel_syntax_version());
             write_mod_list(ff, mod_array);
         }
         else {
@@ -2735,17 +3382,48 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
         }
     }
     else if (_native_data != NULL) {
-        *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
+        *_native_data = jl_create_native(NULL, jl_options.trim, 0, jl_atomic_load_acquire(&jl_world_counter), mod_array, NULL, jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL, module_init_order);
     }
+    if (_native_data != NULL)
+        native_functions = *_native_data;
 
+    jl_gc_collect(JL_GC_FULL);
+    jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
     // Make sure we don't run any Julia code concurrently after this point
     // since it will invalidate our serialization preparations
     jl_gc_enable_finalizers(ct, 0);
     assert((ct->reentrant_timing & 0b1110) == 0);
     ct->reentrant_timing |= 0b1000;
     if (worklist) {
-        jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
-                                      &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
+        // extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
+
+        // Save the inferred code from newly inferred, external methods
+        if (native_functions) {
+            arraylist_t CIs;
+            arraylist_new(&CIs, 0);
+            size_t num_cis;
+            jl_get_llvm_cis(native_functions, &num_cis, NULL);
+            arraylist_grow(&CIs, num_cis);
+            jl_get_llvm_cis(native_functions, &num_cis, (jl_code_instance_t**)CIs.items);
+            // Create a filtered list of the compiled code instances that are
+            // possibly not referenced via any other way but valid for the
+            // Method cache field of an external method
+            new_ext_cis = jl_alloc_vec_any(0);
+            for (size_t i = 0; i < num_cis; i++) {
+                jl_code_instance_t *ci = (jl_code_instance_t*)CIs.items[i];
+                if (ci_not_internal_cache(ci))
+                    jl_array_ptr_1d_push(new_ext_cis, (jl_value_t*)ci);
+            }
+            arraylist_free(&CIs);
+        }
+        else {
+            new_ext_cis = jl_compute_new_ext_cis();
+        }
+
+        // Collect method extensions
+        extext_methods = jl_alloc_vec_any(0);
+        jl_collect_extext_methods(extext_methods, mod_array);
+
         if (!emit_split) {
             write_int32(f, 0); // No clone_targets
             write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f));
@@ -2755,16 +3433,16 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
         }
         datastartpos = ios_pos(ff);
     }
-    if (_native_data != NULL)
-        native_functions = *_native_data;
-    jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
+
+    jl_query_cache query_cache;
+    init_query_cache(&query_cache);
+    jl_save_system_image_to_stream(ff, mod_array, module_init_order, worklist, extext_methods, new_ext_cis, &query_cache);
     if (_native_data != NULL)
         native_functions = NULL;
     // make sure we don't run any Julia code concurrently before this point
     // Re-enable running julia code for postoutput hooks, atexit, etc.
     jl_gc_enable_finalizers(ct, 1);
     ct->reentrant_timing &= ~0b1000u;
-    jl_precompile_toplevel_module = NULL;
 
     if (worklist) {
         // Go back and update the checksum in the header
@@ -2786,6 +3464,8 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
         }
     }
 
+    destroy_query_cache(&query_cache);
+
     JL_GC_POP();
     *s = f;
     if (emit_split)
@@ -2793,33 +3473,180 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     return;
 }
 
-JL_DLLEXPORT size_t ios_write_direct(ios_t *dest, ios_t *src);
-
-// Takes in a path of the form "usr/lib/julia/sys.so" (jl_restore_system_image should be passed the same string)
-JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname)
+// Takes in a path of the form "usr/lib/julia/sys.so"
+JL_DLLEXPORT jl_image_buf_t jl_preload_sysimg(const char *fname)
 {
-    if (jl_sysimg_handle)
-        return; // embedded target already called jl_set_sysimg_so
+    if (jl_sysimage_buf.kind != JL_IMAGE_KIND_NONE)
+        return jl_sysimage_buf;
 
     char *dot = (char*) strrchr(fname, '.');
     int is_ji = (dot && !strcmp(dot, ".ji"));
 
-    // Get handle to sys.so
-    if (!is_ji) // .ji extension => load .ji file only
-        jl_set_sysimg_so(jl_load_dynamic_library(fname, JL_RTLD_LOCAL | JL_RTLD_NOW, 1));
+    if (is_ji) {
+        // .ji extension => load .ji file only
+        ios_t f;
+
+        if (ios_file(&f, fname, 1, 0, 0, 0) == NULL)
+            jl_errorf("System image file \"%s\" not found.", fname);
+        ios_bufmode(&f, bm_none);
+
+        ios_seek_end(&f);
+        size_t len = ios_pos(&f);
+        char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0);
+        ios_seek(&f, 0);
+
+        if (ios_readall(&f, sysimg, len) != len)
+            jl_errorf("Error reading system image file.");
+
+        ios_close(&f);
+
+        jl_sysimage_buf = (jl_image_buf_t) {
+            .kind = JL_IMAGE_KIND_JI,
+            .pointers = NULL,
+            .data = sysimg,
+            .size = len,
+            .base = 0,
+        };
+        return jl_sysimage_buf;
+    } else {
+        // Get handle to sys.so
+        return jl_set_sysimg_so(jl_load_dynamic_library(fname, JL_RTLD_LOCAL | JL_RTLD_NOW, 1));
+    }
+}
+
+
+static void jl_prefetch_system_image(const char *data, size_t size)
+{
+    size_t page_size = jl_getpagesize(); /* jl_page_size is not set yet when loading sysimg */
+    void *start = (void *)((uintptr_t)data & ~(page_size - 1));
+    size_t size_aligned = LLT_ALIGN(size, page_size);
+#ifdef _OS_WINDOWS_
+    WIN32_MEMORY_RANGE_ENTRY entry = {start, size_aligned};
+    PrefetchVirtualMemory(GetCurrentProcess(), 1, &entry, 0);
+#else
+    madvise(start, size_aligned, MADV_WILLNEED);
+#endif
+}
+
+JL_DLLEXPORT void jl_image_unpack_uncomp(void *handle, jl_image_buf_t *image)
+{
+    size_t *plen;
+    jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1, 0);
+    jl_dlsym(handle, "jl_system_image_data", (void **)&image->data, 1, 0);
+    jl_dlsym(handle, "jl_image_pointers", (void**)&image->pointers, 1, 0);
+    image->size = *plen;
+    jl_prefetch_system_image(image->data, image->size);
+}
+
+JL_DLLEXPORT void jl_image_unpack_zstd(void *handle, jl_image_buf_t *image)
+{
+    size_t *plen;
+    const char *data;
+    jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1, 0);
+    jl_dlsym(handle, "jl_system_image_data", (void **)&data, 1, 0);
+    jl_dlsym(handle, "jl_image_pointers", (void **)&image->pointers, 1, 0);
+    jl_prefetch_system_image(data, *plen);
+    image->size = ZSTD_getFrameContentSize(data, *plen);
+    size_t page_size = jl_getpagesize(); /* jl_page_size is not set yet when loading sysimg */
+    size_t aligned_size = LLT_ALIGN(image->size, page_size);
+    int fail = 0;
+#if defined(_OS_WINDOWS_)
+    size_t large_page_size = GetLargePageMinimum();
+    image->data = NULL;
+    if (large_page_size > 0 && image->size > 4 * large_page_size) {
+        size_t aligned_size = LLT_ALIGN(image->size, large_page_size);
+        image->data = (char *)VirtualAlloc(
+            NULL, aligned_size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
+    }
+    if (!image->data) {
+        /* Try small pages if large pages failed. */
+        image->data = (char *)VirtualAlloc(NULL, aligned_size, MEM_COMMIT | MEM_RESERVE,
+                                           PAGE_READWRITE);
+    }
+    fail = !image->data;
+#else
+    image->data = (char *)mmap(NULL, aligned_size, PROT_READ | PROT_WRITE,
+                               MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    fail = image->data == (void *)-1;
+#endif
+    if (fail) {
+        const char *err;
+#if defined(_OS_WINDOWS_)
+        char err_buf[256];
+        win32_formatmessage(GetLastError(), err_buf, sizeof(err_buf));
+        err = err_buf;
+#else
+        err = strerror(errno);
+#endif
+        jl_printf(JL_STDERR, "ERROR: failed to allocate memory for system image: %s\n",
+                  err);
+        jl_exit(1);
+    }
+
+    ZSTD_decompress((void *)image->data, image->size, data, *plen);
+    size_t len = (*plen) & ~(page_size - 1);
+#ifdef _OS_WINDOWS_
+    if (len)
+        VirtualFree((void *)data, len, MEM_RELEASE);
+#else
+    munmap((void *)data, len);
+#endif
+}
+
+// From a shared library handle, verify consistency and return a jl_image_buf_t
+static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
+{
+    // verify that the linker resolved the symbols in this image against ourselves (libjulia-internal)
+    void** (*get_jl_RTLD_DEFAULT_handle_addr)(void) = NULL;
+    if (handle != jl_RTLD_DEFAULT_handle) {
+        int symbol_found = jl_dlsym(handle, "get_jl_RTLD_DEFAULT_handle_addr", (void **)&get_jl_RTLD_DEFAULT_handle_addr, 0, 0);
+        if (!symbol_found || (void*)&jl_RTLD_DEFAULT_handle != (get_jl_RTLD_DEFAULT_handle_addr()))
+            jl_error("Image file failed consistency check: maybe opened the wrong version?");
+    }
+
+    jl_image_unpack_func_t **unpack;
+    jl_image_buf_t image = {
+        .kind = JL_IMAGE_KIND_SO,
+        .pointers = NULL,
+        .data = NULL,
+        .size = 0,
+        .base = 0,
+    };
+
+    // verification passed, lookup the buffer pointers
+    if (jl_image_unpack == NULL || is_pkgimage) {
+        // in the usual case, the sysimage was not statically linked to libjulia-internal
+        // look up the external sysimage symbols via the dynamic linker
+        jl_dlsym(handle, "jl_image_unpack", (void **)&unpack, 1, 0);
+    }
+    else {
+        // the sysimage was statically linked directly against libjulia-internal
+        // use the internal symbols
+        unpack = &jl_image_unpack;
+    }
+    (*unpack)(handle, &image);
+
+#ifdef _OS_WINDOWS_
+    image.base = (intptr_t)handle;
+#else
+    Dl_info dlinfo;
+    if (dladdr((void*)image.pointers, &dlinfo) != 0)
+        image.base = (intptr_t)dlinfo.dli_fbase;
+    else
+        image.base = 0;
+#endif
+
+    return image;
 }
 
 // Allow passing in a module handle directly, rather than a path
-JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
+JL_DLLEXPORT jl_image_buf_t jl_set_sysimg_so(void *handle)
 {
-    void* *jl_RTLD_DEFAULT_handle_pointer;
-    int symbol_found = jl_dlsym(handle, "jl_RTLD_DEFAULT_handle_pointer", (void **)&jl_RTLD_DEFAULT_handle_pointer, 0);
-    if (!symbol_found || (void*)&jl_RTLD_DEFAULT_handle != *jl_RTLD_DEFAULT_handle_pointer)
-        jl_error("System image file failed consistency check: maybe opened the wrong version?");
-    if (jl_options.cpu_target == NULL)
-        jl_options.cpu_target = "native";
-    jl_sysimg_handle = handle;
-    sysimage = jl_init_processor_sysimg(handle);
+    if (jl_sysimage_buf.kind != JL_IMAGE_KIND_NONE)
+        return jl_sysimage_buf;
+
+    jl_sysimage_buf = get_image_buf(handle, /* is_pkgimage */ 0);
+    return jl_sysimage_buf;
 }
 
 #ifndef JL_NDEBUG
@@ -2832,15 +3659,115 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
 #endif
 
 extern void rebuild_image_blob_tree(void);
-extern void export_small_typeof(void);
+extern void export_jl_small_typeof(void);
+extern void export_jl_sysimg_globals(void);
+
+// When an image is loaded with ignore_native, all subsequent image loads must ignore
+// native code in the cache-file since we can't gurantuee that there are no call edges
+// into the native code of the image. See https://github.com/JuliaLang/julia/pull/52123#issuecomment-1959965395.
+int IMAGE_NATIVE_CODE_TAINTED = 0;
 
-static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
+// TODO: This should possibly be in Julia
+static int jl_validate_binding_partition(jl_binding_t *b, jl_binding_partition_t *bpart, size_t mod_idx, int unchanged_implicit, int no_replacement)
+{
+    if (jl_atomic_load_relaxed(&bpart->max_world) != ~(size_t)0)
+        return 1;
+    size_t raw_kind = bpart->kind;
+    enum jl_partition_kind kind = (enum jl_partition_kind)(raw_kind & PARTITION_MASK_KIND);
+    if (!unchanged_implicit && jl_bkind_is_some_implicit(kind)) {
+        // TODO: Should we actually update this in place or delete it from the partitions list
+        // and allocate a fresh bpart?
+        jl_update_loaded_bpart(b, bpart);
+        bpart->kind |= (raw_kind & PARTITION_MASK_FLAG);
+        if (jl_atomic_load_relaxed(&bpart->min_world) > jl_require_world)
+            goto invalidated;
+    }
+    {
+        if (!jl_bkind_is_some_explicit_import(kind) && kind != PARTITION_KIND_IMPLICIT_GLOBAL)
+            return 1;
+        jl_binding_t *imported_binding = (jl_binding_t*)bpart->restriction;
+        jl_binding_partition_t *latest_imported_bpart = jl_atomic_load_relaxed(&imported_binding->partitions);
+        if (no_replacement)
+            goto add_backedge;
+        if (!latest_imported_bpart)
+            return 1;
+        if (jl_atomic_load_relaxed(&latest_imported_bpart->min_world) <=
+            jl_atomic_load_relaxed(&bpart->min_world)) {
+    add_backedge:
+            // Imported binding is still valid
+            if ((kind == PARTITION_KIND_EXPLICIT || kind == PARTITION_KIND_IMPORTED) &&
+                    external_blob_index((jl_value_t*)imported_binding) != mod_idx) {
+                jl_add_binding_backedge(imported_binding, (jl_value_t*)b);
+            }
+            return 1;
+        }
+        else {
+            // Binding partition was invalidated
+            assert(jl_atomic_load_relaxed(&bpart->min_world) == jl_require_world);
+            jl_atomic_store_relaxed(&bpart->min_world,
+                jl_atomic_load_relaxed(&latest_imported_bpart->min_world));
+        }
+    }
+invalidated:
+    // We need to go through and re-validate any bindings in the same image that
+    // may have imported us.
+    if (b->backedges) {
+        JL_LOCK(&b->globalref->mod->lock);
+        for (size_t i = 0; i < jl_array_len(b->backedges); i++) {
+            jl_value_t *edge = jl_array_ptr_ref(b->backedges, i);
+            if (!jl_is_binding(edge))
+                continue;
+            jl_binding_t *bedge = (jl_binding_t*)edge;
+            if (!jl_atomic_load_relaxed(&bedge->partitions))
+                continue;
+            JL_UNLOCK(&b->globalref->mod->lock);
+            jl_validate_binding_partition(bedge, jl_atomic_load_relaxed(&bedge->partitions), mod_idx, 0, 0);
+            JL_LOCK(&b->globalref->mod->lock);
+        }
+        JL_UNLOCK(&b->globalref->mod->lock);
+    }
+    if (bpart->kind & PARTITION_FLAG_EXPORTED) {
+        jl_module_t *mod = b->globalref->mod;
+        jl_sym_t *name = b->globalref->name;
+        JL_LOCK(&mod->lock);
+        jl_atomic_store_release(&mod->export_set_changed_since_require_world, 1);
+        if (mod->usings_backedges != jl_nothing) {
+            for (size_t i = 0; i < jl_array_len(mod->usings_backedges); i++) {
+                jl_module_t *edge = (jl_module_t*)jl_array_ptr_ref(mod->usings_backedges, i);
+                jl_binding_t *importee = jl_get_module_binding(edge, name, 0);
+                if (!importee)
+                    continue;
+                if (!jl_atomic_load_relaxed(&importee->partitions))
+                    continue;
+                JL_UNLOCK(&mod->lock);
+                jl_validate_binding_partition(importee, jl_atomic_load_relaxed(&importee->partitions), mod_idx, 0, 0);
+                JL_LOCK(&mod->lock);
+            }
+        }
+        JL_UNLOCK(&mod->lock);
+        return 0;
+    }
+    return 1;
+}
+
+static int all_usings_unchanged_implicit(jl_module_t *mod)
+{
+    int unchanged_implicit = 1;
+    for (size_t i = 0; unchanged_implicit && i < module_usings_length(mod); i++) {
+        jl_module_t *usee = module_usings_getmod(mod, i);
+        unchanged_implicit &= !jl_atomic_load_acquire(&usee->export_set_changed_since_require_world);
+    }
+    return unchanged_implicit;
+}
+
+static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image,
+                                                 jl_array_t *depmods, uint64_t checksum,
                                 /* outputs */    jl_array_t **restored,         jl_array_t **init_order,
-                                                 jl_array_t **extext_methods,
-                                                 jl_array_t **new_specializations, jl_array_t **method_roots_list,
-                                                 jl_array_t **ext_targets, jl_array_t **edges,
-                                                 char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED
+                                                 jl_array_t **extext_methods, jl_array_t **internal_methods,
+                                                 jl_array_t **new_ext_cis, jl_array_t **method_roots_list,
+                                                 pkgcachesizes *cachesizes) JL_GC_DISABLED
 {
+    jl_task_t *ct = jl_current_task;
     int en = jl_gc_enable(0);
     ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record;
     jl_serializer_state s = {0};
@@ -2852,12 +3779,22 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     s.relocs = &relocs;
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
-    s.ptls = jl_current_task->ptls;
-    jl_value_t **const*const tags = get_tags();
+    s.ptls = ct->ptls;
+    jl_value_t **_tags[NUM_TAGS];
+    jl_value_t ***tags = s.incremental ? NULL : _tags;
+    if (!s.incremental)
+        get_tags(_tags);
+
     htable_t new_dt_objs;
     htable_new(&new_dt_objs, 0);
     arraylist_new(&deser_sym, 0);
 
+    if (jl_options.use_sysimage_native_code != JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES || IMAGE_NATIVE_CODE_TAINTED) {
+        memset(&image->fptrs, 0, sizeof(image->fptrs));
+        image->gvars_base = NULL;
+        IMAGE_NATIVE_CODE_TAINTED = 1;
+    }
+
     // step 1: read section map
     assert(ios_pos(f) == 0 && f->bm == bm_mem);
     size_t sizeof_sysdata = read_uint(f);
@@ -2899,76 +3836,75 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
     assert(!ios_eof(f));
     s.s = f;
-    uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_specializations = 0, offset_method_roots_list = 0;
-    uintptr_t offset_ext_targets = 0, offset_edges = 0;
+    uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_ext_cis = 0, offset_method_roots_list = 0;
     if (!s.incremental) {
         size_t i;
         for (i = 0; tags[i] != NULL; i++) {
             jl_value_t **tag = tags[i];
             *tag = jl_read_value(&s);
         }
+        for (i = 0; i < jl_n_builtins; i++)
+            jl_builtin_instances[i] = jl_read_value(&s);
+#define XX(name, type) jl_##name = (type)jl_read_value(&s);
+        JL_EXPORTED_DATA_POINTERS(XX)
+#undef XX
+#define XX(name, type) jl_##name = (type)jl_read_value(&s);
+        JL_CONST_GLOBAL_VARS(XX)
+#undef XX
 #define XX(name) \
-        small_typeof[(jl_##name##_tag << 4) / sizeof(*small_typeof)] = jl_##name##_type;
+        ijl_small_typeof[(jl_##name##_tag << 4) / sizeof(*ijl_small_typeof)] = jl_##name##_type;
         JL_SMALL_TYPEOF(XX)
 #undef XX
-        export_small_typeof();
-        jl_global_roots_table = (jl_array_t*)jl_read_value(&s);
-        // set typeof extra-special values now that we have the type set by tags above
-        jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header;
+        export_jl_small_typeof();
+        export_jl_sysimg_globals();
+        jl_global_roots_list = (jl_genericmemory_t*)jl_read_value(&s);
+        jl_global_roots_keyset = (jl_genericmemory_t*)jl_read_value(&s);
         s.ptls->root_task->tls = jl_read_value(&s);
         jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls);
-        jl_init_int32_int64_cache();
-        jl_init_box_caches();
 
         uint32_t gs_ctr = read_uint32(f);
-        jl_atomic_store_release(&jl_world_counter, read_uint(f));
+        jl_require_world = read_uint(f);
+        jl_atomic_store_release(&jl_world_counter, jl_require_world);
         jl_typeinf_world = read_uint(f);
         jl_set_gs_ctr(gs_ctr);
     }
     else {
-        jl_atomic_fetch_add(&jl_world_counter, 1);
         offset_restored = jl_read_offset(&s);
         offset_init_order = jl_read_offset(&s);
         offset_extext_methods = jl_read_offset(&s);
-        offset_new_specializations = jl_read_offset(&s);
+        offset_new_ext_cis = jl_read_offset(&s);
         offset_method_roots_list = jl_read_offset(&s);
-        offset_ext_targets = jl_read_offset(&s);
-        offset_edges = jl_read_offset(&s);
     }
     s.buildid_depmods_idxs = depmod_to_imageidx(depmods);
     size_t nlinks_gctags = read_uint32(f);
     if (nlinks_gctags > 0) {
         s.link_ids_gctags = jl_alloc_array_1d(jl_array_int32_type, nlinks_gctags);
-        ios_read(f, (char*)jl_array_data(s.link_ids_gctags), nlinks_gctags * sizeof(uint32_t));
+        ios_read(f, (char*)jl_array_data(s.link_ids_gctags, uint32_t), nlinks_gctags * sizeof(uint32_t));
     }
     size_t nlinks_relocs = read_uint32(f);
     if (nlinks_relocs > 0) {
         s.link_ids_relocs = jl_alloc_array_1d(jl_array_int32_type, nlinks_relocs);
-        ios_read(f, (char*)jl_array_data(s.link_ids_relocs), nlinks_relocs * sizeof(uint32_t));
+        ios_read(f, (char*)jl_array_data(s.link_ids_relocs, uint32_t), nlinks_relocs * sizeof(uint32_t));
     }
     size_t nlinks_gvars = read_uint32(f);
     if (nlinks_gvars > 0) {
         s.link_ids_gvars = jl_alloc_array_1d(jl_array_int32_type, nlinks_gvars);
-        ios_read(f, (char*)jl_array_data(s.link_ids_gvars), nlinks_gvars * sizeof(uint32_t));
+        ios_read(f, (char*)jl_array_data(s.link_ids_gvars, uint32_t), nlinks_gvars * sizeof(uint32_t));
     }
     size_t nlinks_external_fnvars = read_uint32(f);
     if (nlinks_external_fnvars > 0) {
         s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_int32_type, nlinks_external_fnvars);
-        ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars), nlinks_external_fnvars * sizeof(uint32_t));
+        ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars, uint32_t), nlinks_external_fnvars * sizeof(uint32_t));
     }
     uint32_t external_fns_begin = read_uint32(f);
-    jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list);
     if (s.incremental) {
-        assert(restored && init_order && extext_methods && new_specializations && method_roots_list && ext_targets && edges);
+        assert(restored && init_order && extext_methods && internal_methods && new_ext_cis && method_roots_list);
         *restored = (jl_array_t*)jl_delayed_reloc(&s, offset_restored);
         *init_order = (jl_array_t*)jl_delayed_reloc(&s, offset_init_order);
         *extext_methods = (jl_array_t*)jl_delayed_reloc(&s, offset_extext_methods);
-        *new_specializations = (jl_array_t*)jl_delayed_reloc(&s, offset_new_specializations);
+        *new_ext_cis = (jl_array_t*)jl_delayed_reloc(&s, offset_new_ext_cis);
         *method_roots_list = (jl_array_t*)jl_delayed_reloc(&s, offset_method_roots_list);
-        *ext_targets = (jl_array_t*)jl_delayed_reloc(&s, offset_ext_targets);
-        *edges = (jl_array_t*)jl_delayed_reloc(&s, offset_edges);
-        if (!*new_specializations)
-            *new_specializations = jl_alloc_vec_any(0);
+        *internal_methods = jl_alloc_vec_any(0);
     }
     s.s = NULL;
 
@@ -2979,14 +3915,14 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
 
     char *image_base = (char*)&sysimg.buf[0];
     reloc_t *relocs_base = (reloc_t*)&relocs.buf[0];
-    if (base)
-        *base = image_base;
 
     s.s = &sysimg;
     jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD | GC_IN_IMAGE); // gctags
     size_t sizeof_tags = ios_pos(&relocs);
     (void)sizeof_tags;
     jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs
+    jl_read_memreflist(&s); // memowner_list relocs (must come before memref_list reads the pointers and after general relocs computes the pointers)
+    jl_read_memreflist(&s); // memref_list relocs
     // s.link_ids_gvars will be processed in `jl_update_all_gvars`
     // s.link_ids_external_fns will be processed in `jl_update_all_gvars`
     jl_update_all_gvars(&s, image, external_fns_begin); // gvars relocs
@@ -3015,35 +3951,48 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     arraylist_new(&cleanup_list, 0);
     arraylist_t delay_list;
     arraylist_new(&delay_list, 0);
+    JL_LOCK(&typecache_lock); // Might GC--prevent other threads from changing any type caches while we inspect them all
     for (size_t i = 0; i < s.uniquing_types.len; i++) {
         uintptr_t item = (uintptr_t)s.uniquing_types.items[i];
         // check whether we are operating on the typetag
         // (needing to ignore GC bits) or a regular field
-        int tag = (item & 1) == 1;
-        // check whether this is a gvar index
-        int gvar = (item & 2) == 2;
+        // and check whether this is a gvar index
+        int tag = (item & 3);
         item &= ~(uintptr_t)3;
         uintptr_t *pfld;
         jl_value_t **obj, *newobj;
-        if (gvar) {
+        if (tag == 3) {
+            obj = (jl_value_t**)(image_base + item);
+            pfld = NULL;
+            for (size_t i = 0; i < delay_list.len; i += 2) {
+                if (obj == (jl_value_t **)delay_list.items[i + 0]) {
+                    pfld = (uintptr_t*)delay_list.items[i + 1];
+                    delay_list.items[i + 1] = arraylist_pop(&delay_list);
+                    delay_list.items[i + 0] = arraylist_pop(&delay_list);
+                    break;
+                }
+            }
+            assert(pfld);
+        }
+        else if (tag == 2) {
             if (image->gvars_base == NULL)
                 continue;
             item >>= 2;
             assert(item < s.gvar_record->size / sizeof(reloc_t));
             pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item);
             obj = *(jl_value_t***)pfld;
-            assert(tag == 0);
         }
         else {
             pfld = (uintptr_t*)(image_base + item);
-            if (tag)
+            if (tag == 1)
                 obj = (jl_value_t**)jl_typeof(jl_valueof(pfld));
             else
                 obj = *(jl_value_t***)pfld;
             if ((char*)obj > (char*)pfld) {
+                // this must be the super field
                 assert(tag == 0);
-                arraylist_push(&delay_list, pfld);
                 arraylist_push(&delay_list, obj);
+                arraylist_push(&delay_list, pfld);
                 ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid
                 *pfld = (uintptr_t)NULL;
                 continue;
@@ -3072,8 +4021,9 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                     // leave most fields undefined for now, but we may need instance later,
                     // and we overwrite the name field (field 0) now so preserve it too
                     if (dt->instance) {
-                        assert(dt->instance == jl_nothing);
-                        newdt->instance = dt->instance = jl_gc_permobj(0, newdt);
+                        if (dt->instance == jl_nothing)
+                            dt->instance = jl_gc_permobj(ct->ptls, 0, newdt, 0);
+                        newdt->instance = dt->instance;
                     }
                     static_assert(offsetof(jl_datatype_t, name) == 0, "");
                     newdt->name = dt->name;
@@ -3088,30 +4038,18 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         }
         else {
             assert(!(image_base < (char*)otyp && (char*)otyp <= image_base + sizeof_sysimg));
-            assert(jl_is_datatype_singleton((jl_datatype_t*)otyp) && "unreachable");
             newobj = ((jl_datatype_t*)otyp)->instance;
-            assert(newobj != jl_nothing);
+            assert(newobj && newobj != jl_nothing);
             arraylist_push(&cleanup_list, (void*)obj);
         }
-        if (tag)
+        if (tag == 1)
             *pfld = (uintptr_t)newobj | GC_OLD | GC_IN_IMAGE;
         else
             *pfld = (uintptr_t)newobj;
         assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
         assert(jl_typetagis(obj, otyp));
     }
-    // A few fields (reached via super) might be self-recursive. This is rare, but handle them now.
-    // They cannot be instances though, since the type must fully exist before the singleton field can be allocated
-    for (size_t i = 0; i < delay_list.len; ) {
-        uintptr_t *pfld = (uintptr_t*)delay_list.items[i++];
-        jl_value_t **obj = (jl_value_t **)delay_list.items[i++];
-        assert(jl_is_datatype(obj));
-        jl_datatype_t *dt = (jl_datatype_t*)obj[0];
-        assert(jl_is_datatype(dt));
-        jl_value_t *newobj = (jl_value_t*)dt;
-        *pfld = (uintptr_t)newobj;
-        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
-    }
+    assert(delay_list.len == 0);
     arraylist_free(&delay_list);
     // now that all the fields of dt are assigned and unique, copy them into
     // their final newdt memory location: this ensures we do not accidentally
@@ -3141,6 +4079,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     }
     arraylist_grow(&cleanup_list, -cleanup_list.len);
     // finally cache all our new types now
+    jl_safepoint_suspend_all_threads(ct); // past this point, it is now not safe to observe the intermediate states on other threads via reflection, so temporarily pause those
     for (size_t i = 0; i < new_dt_objs.size; i += 2) {
         void *dt = table[i + 1];
         if (dt != HT_NOTFOUND) {
@@ -3154,16 +4093,18 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         assert(jl_is_datatype(obj));
         jl_cache_type_((jl_datatype_t*)obj);
     }
+    JL_UNLOCK(&typecache_lock); // Might GC
+    jl_safepoint_resume_all_threads(ct); // TODO: move this later to also protect MethodInstance allocations, but we would need to acquire all jl_specializations_get_linfo and jl_module_globalref locks, which is hard
     // Perform fixups: things like updating world ages, inserting methods & specializations, etc.
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     for (size_t i = 0; i < s.uniquing_objs.len; i++) {
         uintptr_t item = (uintptr_t)s.uniquing_objs.items[i];
         // check whether this is a gvar index
-        int gvar = (item & 2) == 2;
+        int tag = (item & 3);
+        assert(tag == 0 || tag == 2);
         item &= ~(uintptr_t)3;
         uintptr_t *pfld;
         jl_value_t **obj, *newobj;
-        if (gvar) {
+        if (tag == 2) {
             if (image->gvars_base == NULL)
                 continue;
             item >>= 2;
@@ -3190,6 +4131,18 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                 obj[0] = newobj;
             }
         }
+        else if (otyp == (uintptr_t)jl_binding_type) {
+            jl_value_t *m = obj[0];
+            if (jl_is_binding(m)) {
+                newobj = m; // already done
+            }
+            else {
+                arraylist_push(&cleanup_list, (void*)obj);
+                jl_value_t *name = obj[1];
+                newobj = (jl_value_t*)jl_get_module_binding((jl_module_t*)m, (jl_sym_t*)name, 1);
+                obj[0] = newobj;
+            }
+        }
         else {
             abort(); // should be unreachable
         }
@@ -3205,68 +4158,77 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         jl_value_t *t = jl_typeof(item);
         if (t == (jl_value_t*)jl_method_instance_type)
             memset(o, 0xba, sizeof(jl_value_t*) * 3); // only specTypes and sparams fields stored
+        else if (t == (jl_value_t*)jl_binding_type)
+            memset(o, 0xba, sizeof(jl_value_t*) * 3); // stored as mod/name
         o->bits.in_image = 1;
     }
     arraylist_free(&cleanup_list);
     for (size_t i = 0; i < s.fixup_objs.len; i++) {
         uintptr_t item = (uintptr_t)s.fixup_objs.items[i];
         jl_value_t *obj = (jl_value_t*)(image_base + item);
-        if (jl_typetagis(obj, jl_typemap_entry_type)) {
-            jl_typemap_entry_t *entry = (jl_typemap_entry_t*)obj;
-            entry->min_world = world;
-        }
-        else if (jl_is_method(obj)) {
-            jl_method_t *m = (jl_method_t*)obj;
-            m->primary_world = world;
+        if (jl_typetagis(obj, jl_typemap_entry_type) || jl_is_method(obj) || jl_is_code_instance(obj)) {
+            jl_array_ptr_1d_push(*internal_methods, obj);
+            assert(s.incremental);
         }
         else if (jl_is_method_instance(obj)) {
             jl_method_instance_t *newobj = jl_specializations_get_or_insert((jl_method_instance_t*)obj);
             assert(newobj == (jl_method_instance_t*)obj); // strict insertion expected
             (void)newobj;
         }
-        else if (jl_is_code_instance(obj)) {
-            jl_code_instance_t *ci = (jl_code_instance_t*)obj;
-            assert(s.incremental);
-            ci->min_world = world;
-            if (ci->max_world != 0)
-                jl_array_ptr_1d_push(*new_specializations, (jl_value_t*)ci);
-        }
         else if (jl_is_globalref(obj)) {
-            continue; // wait until all the module binding tables have been initialized
+            jl_globalref_t *r = (jl_globalref_t*)obj;
+            if (r->binding == NULL) {
+                jl_globalref_t *gr = (jl_globalref_t*)jl_module_globalref(r->mod, r->name);
+                r->binding = gr->binding;
+                jl_gc_wb(r, gr->binding);
+            }
         }
         else if (jl_is_module(obj)) {
-            // rebuild the binding table for module v
+            // rebuild the usings table for module v
             // TODO: maybe want to hold the lock on `v`, but that only strongly matters for async / thread safety
             // and we are already bad at that
             jl_module_t *mod = (jl_module_t*)obj;
             mod->build_id.hi = checksum;
-            mod->primary_world = world;
             if (mod->usings.items != &mod->usings._space[0]) {
                 // arraylist_t assumes we called malloc to get this memory, so make that true now
                 void **newitems = (void**)malloc_s(mod->usings.max * sizeof(void*));
                 memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*));
                 mod->usings.items = newitems;
             }
+            size_t mod_idx = external_blob_index((jl_value_t*)mod);
+            if (s.incremental) {
+                // Rebuild cross-image usings backedges
+                for (size_t i = 0; i < module_usings_length(mod); ++i) {
+                    struct _jl_module_using *data = module_usings_getidx(mod, i);
+                    if (external_blob_index((jl_value_t*)data->mod) != mod_idx) {
+                        jl_add_usings_backedge(data->mod, mod);
+                    }
+                }
+            }
         }
         else {
-            // rehash IdDict
-            //assert(((jl_datatype_t*)(jl_typeof(obj)))->name == jl_idtable_typename);
-            jl_array_t **a = (jl_array_t**)obj;
-            assert(jl_typetagis(*a, jl_array_any_type));
-            *a = jl_idtable_rehash(*a, jl_array_len(*a));
-            jl_gc_wb(obj, *a);
+            abort();
         }
     }
-    // Now pick up the globalref binding pointer field
-    for (size_t i = 0; i < s.fixup_objs.len; i++) {
-        uintptr_t item = (uintptr_t)s.fixup_objs.items[i];
-        jl_value_t *obj = (jl_value_t*)(image_base + item);
-        if (jl_is_globalref(obj)) {
-            jl_globalref_t *r = (jl_globalref_t*)obj;
-            if (r->binding == NULL) {
-                jl_globalref_t *gr = (jl_globalref_t*)jl_module_globalref(r->mod, r->name);
-                r->binding = gr->binding;
-                jl_gc_wb(r, gr->binding);
+    if (s.incremental) {
+        int no_replacement = jl_atomic_load_relaxed(&jl_first_image_replacement_world) == ~(size_t)0;
+        for (size_t i = 0; i < s.fixup_objs.len; i++) {
+            uintptr_t item = (uintptr_t)s.fixup_objs.items[i];
+            jl_value_t *obj = (jl_value_t*)(image_base + item);
+            if (jl_is_module(obj)) {
+                jl_module_t *mod = (jl_module_t*)obj;
+                size_t mod_idx = external_blob_index((jl_value_t*)mod);
+                jl_svec_t *table = jl_atomic_load_relaxed(&mod->bindings);
+                int unchanged_implicit = no_replacement || all_usings_unchanged_implicit(mod);
+                for (size_t i = 0; i < jl_svec_len(table); i++) {
+                    jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+                    if ((jl_value_t*)b == jl_nothing)
+                        continue;
+                    jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions);
+                    if (!jl_validate_binding_partition(b, bpart, mod_idx, unchanged_implicit, no_replacement)) {
+                        unchanged_implicit = all_usings_unchanged_implicit(mod);
+                    }
+                }
             }
         }
     }
@@ -3312,11 +4274,6 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
 
     s.s = &sysimg;
     jl_update_all_fptrs(&s, image); // fptr relocs and registration
-    if (!ccallable_list) {
-        // TODO: jl_sysimg_handle or img_handle?
-        jl_reinit_ccallable(&s.ccallable_list, image_base, jl_sysimg_handle);
-        arraylist_free(&s.ccallable_list);
-    }
     s.s = NULL;
 
     ios_close(&fptr_record);
@@ -3331,11 +4288,26 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     arraylist_push(&jl_linkage_blobs, (void*)image_base);
     arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg));
     arraylist_push(&jl_image_relocs, (void*)relocs_base);
+    if (restored == NULL) {
+        arraylist_push(&jl_top_mods, (void*)jl_top_module);
+    } else {
+        size_t len = jl_array_nrows(*restored);
+        assert(len > 0);
+        jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(*restored, len-1);
+        // Ordinarily set during deserialization, but our compiler stub image,
+        // just returns a reference to the sysimage version, so we set it here.
+        topmod->build_id.hi = checksum;
+        assert(jl_is_module(topmod));
+        arraylist_push(&jl_top_mods, (void*)topmod);
+    }
     jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
     rebuild_image_blob_tree();
 
     // jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);
     jl_gc_enable(en);
+
+    if (s.incremental)
+        jl_add_methods(*extext_methods);
 }
 
 static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_t *checksum, int64_t *dataendpos, int64_t *datastartpos)
@@ -3346,9 +4318,11 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_
                 "Precompile file header verification checks failed.");
     }
     uint8_t flags = read_uint8(f);
-    if (pkgimage && !jl_match_cache_flags(flags)) {
+    if (pkgimage && !jl_match_cache_flags_current(flags)) {
         return jl_get_exceptionf(jl_errorexception_type, "Pkgimage flags mismatch");
     }
+    // Syntax version mismatch is not fatal to load
+    (void)read_uint8(f); // syntax_version
     if (!pkgimage) {
         // skip past the worklist
         size_t len;
@@ -3365,7 +4339,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_
 }
 
 // TODO?: refactor to make it easier to create the "package inspector"
-static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc)
+static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc)
 {
     JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg);
     jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, pkgname);
@@ -3379,19 +4353,18 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
 
     assert(datastartpos > 0 && datastartpos < dataendpos);
     needs_permalloc = jl_options.permalloc_pkgimg || needs_permalloc;
+
     jl_value_t *restored = NULL;
-    jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
+    jl_array_t *init_order = NULL, *extext_methods = NULL, *internal_methods = NULL, *new_ext_cis = NULL, *method_roots_list = NULL;
     jl_svec_t *cachesizes_sv = NULL;
-    char *base;
-    arraylist_t ccallable_list;
-    JL_GC_PUSH8(&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &cachesizes_sv);
+    JL_GC_PUSH7(&restored, &init_order, &extext_methods, &internal_methods, &new_ext_cis, &method_roots_list, &cachesizes_sv);
 
     { // make a permanent in-memory copy of f (excluding the header)
         ios_bufmode(f, bm_none);
         JL_SIGATOMIC_BEGIN();
         size_t len = dataendpos - datastartpos;
         char *sysimg;
-        bool success = !needs_permalloc;
+        int success = !needs_permalloc;
         ios_seek(f, datastartpos);
         if (needs_permalloc)
             sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0);
@@ -3408,20 +4381,39 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
                 ios_close(f);
             ios_static_buffer(f, sysimg, len);
             pkgcachesizes cachesizes;
-            jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes);
+            jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &internal_methods, &new_ext_cis, &method_roots_list, &cachesizes);
             JL_SIGATOMIC_END();
 
-            // Insert method extensions
-            jl_insert_methods(extext_methods);
-            // No special processing of `new_specializations` is required because recaching handled it
             // Add roots to methods
-            jl_copy_roots(method_roots_list, jl_worklist_key((jl_array_t*)restored));
-            // Handle edges
-            size_t world = jl_atomic_load_acquire(&jl_world_counter);
-            jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_specializations, world); // restore external backedges (needs to be last)
-            // reinit ccallables
-            jl_reinit_ccallable(&ccallable_list, base, NULL);
-            arraylist_free(&ccallable_list);
+            int failed = jl_copy_roots(method_roots_list, jl_worklist_key((jl_array_t*)restored));
+            if (failed != 0) {
+                jl_printf(JL_STDERR, "Error copying roots to methods from Module: %s\n", pkgname);
+                abort();
+            }
+            // Insert method extensions and handle edges
+            int new_methods = jl_array_nrows(extext_methods) > 0;
+            if (!new_methods) {
+                size_t i, l = jl_array_nrows(internal_methods);
+                for (i = 0; i < l; i++) {
+                    jl_value_t *obj = jl_array_ptr_ref(internal_methods, i);
+                    if (jl_is_method(obj)) {
+                        new_methods = 1;
+                        break;
+                    }
+                }
+            }
+            JL_LOCK(&world_counter_lock);
+            // allocate a world for the new methods, and insert them there, invalidating content as needed
+            size_t world = jl_atomic_load_relaxed(&jl_world_counter);
+            if (new_methods)
+                world += 1;
+            jl_activate_methods(extext_methods, internal_methods, world, pkgname);
+            // TODO: inject internal_methods into caches here, so the system can see them immediately as potential candidates (before validation)
+            // allow users to start running in this updated world
+            if (new_methods)
+                jl_atomic_store_release(&jl_world_counter, world);
+            // now permit more methods to be added again
+            JL_UNLOCK(&world_counter_lock);
 
             if (completeinfo) {
                 cachesizes_sv = jl_alloc_svec(7);
@@ -3432,11 +4424,10 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
                 jl_svecset(cachesizes_sv, 4, jl_box_long(cachesizes.reloclist));
                 jl_svecset(cachesizes_sv, 5, jl_box_long(cachesizes.gvarlist));
                 jl_svecset(cachesizes_sv, 6, jl_box_long(cachesizes.fptrlist));
-                restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods, new_specializations, method_roots_list,
-                                                   ext_targets, edges, cachesizes_sv);
+                restored = (jl_value_t*)jl_svec(5, restored, init_order, internal_methods, method_roots_list, cachesizes_sv);
             }
             else {
-                restored = (jl_value_t*)jl_svec(2, restored, init_order);
+                restored = (jl_value_t*)jl_svec(3, restored, init_order, internal_methods);
             }
         }
     }
@@ -3448,13 +4439,13 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
 static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uint32_t checksum)
 {
     JL_TIMING(LOAD_IMAGE, LOAD_Sysimg);
-    jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+    jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL);
 }
 
-JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc)
+JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(jl_image_buf_t buf, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc)
 {
     ios_t f;
-    ios_static_buffer(&f, (char*)buf, sz);
+    ios_static_buffer(&f, (char*)buf.data, buf.size);
     jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, completeinfo, pkgname, needs_permalloc);
     ios_close(&f);
     return ret;
@@ -3468,57 +4459,32 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *d
             "Cache file \"%s\" not found.\n", fname);
     }
     jl_image_t pkgimage = {};
-    jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, completeinfo, pkgname, true);
+    jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, completeinfo, pkgname, 1);
     ios_close(&f);
     return ret;
 }
 
-// TODO: need to enforce that the alignment of the buffer is suitable for vectors
-JL_DLLEXPORT void jl_restore_system_image(const char *fname)
+JL_DLLEXPORT void jl_restore_system_image(jl_image_t *image, jl_image_buf_t buf)
 {
-#ifndef JL_NDEBUG
-    char *dot = fname ? (char*)strrchr(fname, '.') : NULL;
-    int is_ji = (dot && !strcmp(dot, ".ji"));
-    assert((is_ji || jl_sysimg_handle) && "System image file not preloaded");
-#endif
+    ios_t f;
 
-    if (jl_sysimg_handle) {
-        // load the pre-compiled sysimage from jl_sysimg_handle
-        jl_load_sysimg_so();
-    }
-    else {
-        ios_t f;
-        if (ios_file(&f, fname, 1, 0, 0, 0) == NULL)
-            jl_errorf("System image file \"%s\" not found.", fname);
-        ios_bufmode(&f, bm_none);
-        JL_SIGATOMIC_BEGIN();
-        ios_seek_end(&f);
-        size_t len = ios_pos(&f);
-        char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0);
-        ios_seek(&f, 0);
-        if (ios_readall(&f, sysimg, len) != len)
-            jl_errorf("Error reading system image file.");
-        ios_close(&f);
-        uint32_t checksum = jl_crc32c(0, sysimg, len);
-        ios_static_buffer(&f, sysimg, len);
-        jl_restore_system_image_from_stream(&f, &sysimage, checksum);
-        ios_close(&f);
-        JL_SIGATOMIC_END();
-    }
-}
+    if (buf.kind == JL_IMAGE_KIND_NONE)
+        return;
+
+    if (buf.kind == JL_IMAGE_KIND_SO)
+        assert(image->fptrs.ptrs); // jl_init_processor_sysimg should already be run
 
-JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len)
-{
-    ios_t f;
     JL_SIGATOMIC_BEGIN();
-    ios_static_buffer(&f, (char*)buf, len);
-    uint32_t checksum = jl_crc32c(0, buf, len);
-    jl_restore_system_image_from_stream(&f, &sysimage, checksum);
+    ios_static_buffer(&f, (char *)buf.data, buf.size);
+
+    uint32_t checksum = jl_crc32c(0, buf.data, buf.size);
+    jl_restore_system_image_from_stream(&f, image, checksum);
+
     ios_close(&f);
     JL_SIGATOMIC_END();
 }
 
-JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods, int completeinfo, const char *pkgname)
+JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods, int completeinfo, const char *pkgname, int ignore_native)
 {
     void *pkgimg_handle = jl_dlopen(fname, JL_RTLD_LAZY);
     if (!pkgimg_handle) {
@@ -3532,18 +4498,27 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j
 #endif
         jl_errorf("Error opening package file %s: %s\n", fname, reason);
     }
-    const char *pkgimg_data;
-    jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1);
-    size_t *plen;
-    jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1);
 
-    jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle);
+    jl_image_buf_t buf = get_image_buf(pkgimg_handle, /* is_pkgimage */ 1);
 
-    jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, false);
+    jl_gc_notify_image_load(buf.data, buf.size);
+
+    // Despite the name, this function actually parses the pkgimage
+    jl_image_t pkgimage = jl_init_processor_pkgimg(buf);
+
+    if (ignore_native) {
+        // Must disable using native code in possible downstream users of this code:
+        // https://github.com/JuliaLang/julia/pull/52123#issuecomment-1959965395.
+        // The easiest way to do that is to disable it in all of them.
+        IMAGE_NATIVE_CODE_TAINTED = 1;
+    }
+
+    jl_value_t* mod = jl_restore_incremental_from_buf(buf, &pkgimage, depmods, completeinfo, pkgname, 0);
 
     return mod;
 }
 
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c
index bf1a830b608de..16ebcd4b4ee12 100644
--- a/src/staticdata_utils.c
+++ b/src/staticdata_utils.c
@@ -1,5 +1,5 @@
 // inverse of backedges graph (caller=>callees hash)
-jl_array_t *edges_map JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this
+jl_array_t *internal_methods JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this
 
 static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT
 {
@@ -45,16 +45,15 @@ int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeo
         jl_datatype_t *dt = (jl_datatype_t*)t;
         assert(jl_object_in_image((jl_value_t*)dt->name) && "type_in_worklist mistake?");
         jl_datatype_t *super = dt->super;
-        // check if super is news, since then we must be new also
-        // (it is also possible that super is indeterminate now, wait for `t`
-        // to be resolved, then will be determined later and fixed up by the
-        // delay_list, for this and any other references to it).
-        while (super != jl_any_type) {
-            assert(super);
+        // fast-path: check if super is in news, since then we must be new also
+        // (it is also possible that super is indeterminate or NULL right now,
+        // waiting for `t` to be resolved, then will be determined later as
+        // soon as possible afterwards).
+        while (super != NULL && super != jl_any_type) {
             if (ptrhash_has(news, (void*)super))
                 return 1;
             if (!(image_base < (char*)super && (char*)super <= image_base + sizeof_sysimg))
-               break; // fast-path for rejection of super
+               break; // the rest must all be non-new
             // otherwise super might be something that was not cached even though a later supertype might be
             // for example while handling `Type{Mask{4, U} where U}`, if we have `Mask{4, U} <: AbstractSIMDVector{4}`
             super = super->super;
@@ -74,7 +73,7 @@ int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeo
 static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT
 {
     assert(jl_is_array(worklist));
-    size_t len = jl_array_len(worklist);
+    size_t len = jl_array_nrows(worklist);
     if (len > 0) {
         jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(worklist, len-1);
         assert(jl_is_module(topmod));
@@ -86,127 +85,395 @@ static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT
 static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/;
 // Mutex for newly_inferred
 jl_mutex_t newly_inferred_mutex;
+extern jl_mutex_t world_counter_lock;
+static _Atomic(uint8_t) jl_tag_newly_inferred_enabled = 0;
+
+/**
+ * @brief Enable tagging of all newly inferred CodeInstances.
+ */
+JL_DLLEXPORT void jl_tag_newly_inferred_enable(void)
+{
+    jl_atomic_fetch_add(&jl_tag_newly_inferred_enabled, 1);  // FIXME overflow?
+}
+/**
+ * @brief Disable tagging of all newly inferred CodeInstances.
+ */
+JL_DLLEXPORT void jl_tag_newly_inferred_disable(void)
+{
+    jl_atomic_fetch_add(&jl_tag_newly_inferred_enabled, -1);  // FIXME underflow?
+}
+
 
 // Register array of newly-inferred MethodInstances
 // This gets called as the first step of Base.include_package_for_output
 JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred)
 {
-    assert(_newly_inferred == NULL || jl_is_array(_newly_inferred));
+    assert(_newly_inferred == NULL || _newly_inferred == jl_nothing || jl_is_array(_newly_inferred));
+    if (_newly_inferred == jl_nothing)
+        _newly_inferred = NULL;
     newly_inferred = (jl_array_t*) _newly_inferred;
 }
 
+static jl_array_t *queue_external_cis(jl_array_t *list, jl_query_cache *query_cache);
+
+JL_DLLEXPORT jl_array_t* jl_compute_new_ext_cis(void)
+{
+    if (newly_inferred == NULL)
+        return jl_alloc_vec_any(0);
+    jl_query_cache query_cache;
+    init_query_cache(&query_cache);
+    jl_array_t *new_ext_cis = queue_external_cis(newly_inferred, &query_cache);
+    destroy_query_cache(&query_cache);
+    return new_ext_cis;
+}
+
 JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* ci)
 {
+    if (!newly_inferred)
+        return;
+    uint8_t tag_newly_inferred = jl_atomic_load_relaxed(&jl_tag_newly_inferred_enabled);
+    if (tag_newly_inferred) {
+        jl_method_instance_t *mi = jl_get_ci_mi((jl_code_instance_t*)ci);
+        uint8_t miflags = jl_atomic_load_relaxed(&mi->flags);
+        jl_atomic_store_relaxed(&mi->flags, miflags | JL_MI_FLAGS_MASK_PRECOMPILED);
+    }
     JL_LOCK(&newly_inferred_mutex);
-    size_t end = jl_array_len(newly_inferred);
+    size_t end = jl_array_nrows(newly_inferred);
     jl_array_grow_end(newly_inferred, 1);
-    jl_arrayset(newly_inferred, ci, end);
+    jl_array_ptr_set(newly_inferred, end, ci);
     JL_UNLOCK(&newly_inferred_mutex);
 }
 
 
+static jl_array_t *inference_entrance_backtraces JL_GLOBALLY_ROOTED /*FIXME*/ = NULL;
+// Mutex for inference_entrance_backtraces
+jl_mutex_t inference_entrance_backtraces_mutex;
+
+// Register array of inference entrance backtraces
+JL_DLLEXPORT void jl_set_inference_entrance_backtraces(jl_value_t* _inference_entrance_backtraces)
+{
+    assert(_inference_entrance_backtraces == NULL || _inference_entrance_backtraces == jl_nothing || jl_is_array(_inference_entrance_backtraces));
+    if (_inference_entrance_backtraces == jl_nothing)
+        _inference_entrance_backtraces = NULL;
+    JL_LOCK(&inference_entrance_backtraces_mutex);
+    inference_entrance_backtraces = (jl_array_t*) _inference_entrance_backtraces;
+    JL_UNLOCK(&inference_entrance_backtraces_mutex);
+}
+
+
+JL_DLLEXPORT void jl_push_inference_entrance_backtraces(jl_value_t* ci)
+{
+    JL_LOCK(&inference_entrance_backtraces_mutex);
+    if (inference_entrance_backtraces == NULL) {
+        JL_UNLOCK(&inference_entrance_backtraces_mutex);
+        return;
+    }
+    jl_value_t* backtrace = jl_backtrace_from_here(0, 1);
+    size_t end = jl_array_nrows(inference_entrance_backtraces);
+    jl_array_grow_end(inference_entrance_backtraces, 2);
+    jl_array_ptr_set(inference_entrance_backtraces, end, ci);
+    jl_array_ptr_set(inference_entrance_backtraces, end + 1, backtrace);
+    JL_UNLOCK(&inference_entrance_backtraces_mutex);
+}
+
 // compute whether a type references something internal to worklist
 // and thus could not have existed before deserialize
 // and thus does not need delayed unique-ing
-static int type_in_worklist(jl_value_t *v) JL_NOTSAFEPOINT
+static int type_in_worklist(jl_value_t *v, jl_query_cache *cache) JL_NOTSAFEPOINT
 {
     if (jl_object_in_image(v))
         return 0; // fast-path for rejection
+
+    void *cached = HT_NOTFOUND;
+    if (cache != NULL)
+        cached = ptrhash_get(&cache->type_in_worklist, v);
+
+    // fast-path for memoized results
+    if (cached != HT_NOTFOUND)
+        return cached == v;
+
+    int result = 0;
     if (jl_is_uniontype(v)) {
         jl_uniontype_t *u = (jl_uniontype_t*)v;
-        return type_in_worklist(u->a) ||
-               type_in_worklist(u->b);
+        result = type_in_worklist(u->a, cache) ||
+                 type_in_worklist(u->b, cache);
     }
     else if (jl_is_unionall(v)) {
         jl_unionall_t *ua = (jl_unionall_t*)v;
-        return type_in_worklist((jl_value_t*)ua->var) ||
-               type_in_worklist(ua->body);
+        result = type_in_worklist((jl_value_t*)ua->var, cache) ||
+                 type_in_worklist(ua->body, cache);
     }
     else if (jl_is_typevar(v)) {
         jl_tvar_t *tv = (jl_tvar_t*)v;
-        return type_in_worklist(tv->lb) ||
-               type_in_worklist(tv->ub);
+        result = type_in_worklist(tv->lb, cache) ||
+                 type_in_worklist(tv->ub, cache);
     }
     else if (jl_is_vararg(v)) {
         jl_vararg_t *tv = (jl_vararg_t*)v;
-        if (tv->T && type_in_worklist(tv->T))
-            return 1;
-        if (tv->N && type_in_worklist(tv->N))
-            return 1;
+        result = ((tv->T && type_in_worklist(tv->T, cache)) ||
+                  (tv->N && type_in_worklist(tv->N, cache)));
     }
     else if (jl_is_datatype(v)) {
         jl_datatype_t *dt = (jl_datatype_t*)v;
-        if (!jl_object_in_image((jl_value_t*)dt->name))
-            return 1;
-        jl_svec_t *tt = dt->parameters;
-        size_t i, l = jl_svec_len(tt);
-        for (i = 0; i < l; i++)
-            if (type_in_worklist(jl_tparam(dt, i)))
-                return 1;
+        if (!jl_object_in_image((jl_value_t*)dt->name)) {
+            result = 1;
+        }
+        else {
+            jl_svec_t *tt = dt->parameters;
+            size_t i, l = jl_svec_len(tt);
+            for (i = 0; i < l; i++) {
+                if (type_in_worklist(jl_tparam(dt, i), cache)) {
+                    result = 1;
+                    break;
+                }
+            }
+        }
     }
     else {
-        return type_in_worklist(jl_typeof(v));
+        return type_in_worklist(jl_typeof(v), cache);
     }
-    return 0;
+
+    // Memoize result
+    if (cache != NULL)
+        ptrhash_put(&cache->type_in_worklist, (void*)v, result ? (void*)v : NULL);
+
+    return result;
 }
 
+// Stack frame for iterative has_backedge_to_worklist implementation
+enum backedge_state {
+    STATE_VISITING,                 // Initial visit, setup phase
+    STATE_PROCESSING_EDGES,         // Processing backedges loop
+    STATE_FINISHING                 // Cleanup and result propagation
+};
+
+typedef struct {
+    jl_method_instance_t *mi;           // Current method instance
+    size_t edge_index;                  // Current position in backedges array
+    size_t backedges_len;               // Total backedges count
+    jl_array_t *backedges;              // Backedges array
+    int depth;                          // Stack depth when this frame was created
+    int cycle;                          // Cycle depth tracking
+    int found;                          // Result found flag
+    int child_result;                   // Result from child recursive call
+    enum backedge_state state;
+} backedge_stack_frame_t;
+
 // When we infer external method instances, ensure they link back to the
 // package. Otherwise they might be, e.g., for external macros.
 // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
-static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, arraylist_t *stack)
+static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, arraylist_t *stack, jl_query_cache *query_cache)
 {
-    jl_module_t *mod = mi->def.module;
-    if (jl_is_method(mod))
-        mod = ((jl_method_t*)mod)->module;
-    assert(jl_is_module(mod));
-    if (mi->precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) {
-        return 1;
-    }
-    if (!mi->backedges) {
-        return 0;
-    }
-    void **bp = ptrhash_bp(visited, mi);
-    // HT_NOTFOUND: not yet analyzed
-    // HT_NOTFOUND + 1: no link back
-    // HT_NOTFOUND + 2: does link back
-    // HT_NOTFOUND + 3: does link back, and included in new_specializations already
-    // HT_NOTFOUND + 4 + depth: in-progress
-    int found = (char*)*bp - (char*)HT_NOTFOUND;
-    if (found)
-        return found - 1;
-    arraylist_push(stack, (void*)mi);
-    int depth = stack->len;
-    *bp = (void*)((char*)HT_NOTFOUND + 4 + depth); // preliminarily mark as in-progress
-    size_t i = 0, n = jl_array_len(mi->backedges);
-    int cycle = depth;
-    while (i < n) {
-        jl_method_instance_t *be;
-        i = get_next_edge(mi->backedges, i, NULL, &be);
-        int child_found = has_backedge_to_worklist(be, visited, stack);
-        if (child_found == 1 || child_found == 2) {
-            // found what we were looking for, so terminate early
-            found = 1;
-            break;
-        }
-        else if (child_found >= 3 && child_found - 3 < cycle) {
-            // record the cycle will resolve at depth "cycle"
-            cycle = child_found - 3;
-            assert(cycle);
+    // Use arraylist_t for explicit stack of processing frames
+    arraylist_t frame_stack;
+    arraylist_new(&frame_stack, 0);
+
+    // Push initial frame
+    backedge_stack_frame_t initial_frame = {
+        .mi = mi,
+        .edge_index = 0,
+        .backedges_len = 0,
+        .backedges = NULL,
+        .depth = 0,
+        .cycle = 0,
+        .found = 0,
+        .child_result = 0,
+        .state = STATE_VISITING
+    };
+    arraylist_push(&frame_stack, memcpy(malloc(sizeof(backedge_stack_frame_t)), &initial_frame, sizeof(backedge_stack_frame_t)));
+
+    int final_result = 0;
+    while (1) {
+        backedge_stack_frame_t *current = (backedge_stack_frame_t*)frame_stack.items[frame_stack.len - 1];
+        JL_GC_PROMISE_ROOTED(current->mi);
+        JL_GC_PROMISE_ROOTED(current->backedges);
+
+        switch (current->state) {
+            case STATE_VISITING: {
+                jl_module_t *mod = current->mi->def.module;
+                if (jl_is_method(mod))
+                    mod = ((jl_method_t*)mod)->module;
+                assert(jl_is_module(mod));
+                uint8_t is_precompiled = jl_atomic_load_relaxed(&current->mi->flags) & JL_MI_FLAGS_MASK_PRECOMPILED;
+
+                if (is_precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(current->mi->specTypes, query_cache)) {
+                    if (frame_stack.len > 1) {
+                        final_result = 1;
+                        goto propagate_to_parent;
+                    }
+                    current->found = 1;
+                    // Continue to setup below, then go to finishing
+                }
+                else if (!current->mi->backedges) {
+                    if (frame_stack.len > 1) {
+                        final_result = 0;
+                        goto propagate_to_parent;
+                    }
+                    current->found = 0;
+                    // Setup minimal state for cleanup, skip backedges processing
+                    arraylist_push(stack, (void*)current->mi);
+                    current->depth = stack->len;
+                    void **bp = ptrhash_bp(visited, current->mi);
+                    *bp = (void*)((char*)HT_NOTFOUND + 4 + current->depth);
+                    current->cycle = current->depth;
+                    current->state = STATE_FINISHING;
+                    break;
+                }
+
+                void **bp = ptrhash_bp(visited, current->mi);
+                // HT_NOTFOUND: not yet analyzed
+                // HT_NOTFOUND + 1: no link back
+                // HT_NOTFOUND + 2: does link back
+                // HT_NOTFOUND + 3: does link back, and included in new_ext_cis already
+                // HT_NOTFOUND + 4 + depth: in-progress
+                int found = (char*)*bp - (char*)HT_NOTFOUND;
+                if (found) {
+                    if (frame_stack.len > 1) {
+                        final_result = found - 1;
+                        goto propagate_to_parent;
+                    }
+                    current->found = found - 1;
+                }
+
+                // Setup for processing
+                arraylist_push(stack, (void*)current->mi);
+                current->depth = stack->len;
+                *bp = (void*)((char*)HT_NOTFOUND + 4 + current->depth); // preliminarily mark as in-progress
+                current->backedges = jl_mi_get_backedges(current->mi);
+                current->backedges_len = current->backedges ? jl_array_nrows(current->backedges) : 0;
+                current->cycle = current->depth;
+                current->edge_index = 0;
+                // Don't reset current->found if it was already set by early termination logic above
+                if (current->found == 0) {
+                    current->state = STATE_PROCESSING_EDGES;
+                }
+                else {
+                    // Early termination case - skip processing and go straight to finishing
+                    current->state = STATE_FINISHING;
+                }
+                break;
+            }
+
+            case STATE_PROCESSING_EDGES: {
+                // If we have a child result to process, handle it first
+                if (current->child_result != 0) {
+                    if (current->child_result == 1 || current->child_result == 2) {
+                        // found what we were looking for, so terminate early
+                        current->found = 1;
+                        current->state = STATE_FINISHING;
+                        break;
+                    }
+                    else if (current->child_result >= 3 && current->child_result - 3 < current->cycle) {
+                        // record the cycle will resolve at depth "cycle"
+                        current->cycle = current->child_result - 3;
+                        assert(current->cycle);
+                    }
+                    current->child_result = 0; // Clear after processing
+                }
+
+                // Process backedges iteratively
+                while (current->edge_index < current->backedges_len && current->backedges) {
+                    jl_code_instance_t *be;
+                    current->edge_index = get_next_edge(current->backedges, current->edge_index, NULL, &be);
+                    if (!be)
+                        continue;
+                    JL_GC_PROMISE_ROOTED(be); // get_next_edge propagates the edge for us here
+
+                    jl_method_instance_t *child_mi = jl_get_ci_mi(be);
+
+                    // Check if we need to recurse (push new frame) or handle result
+                    jl_module_t *child_mod = child_mi->def.module;
+                    if (jl_is_method(child_mod))
+                        child_mod = ((jl_method_t*)child_mod)->module;
+                    assert(jl_is_module(child_mod));
+                    uint8_t child_is_precompiled = jl_atomic_load_relaxed(&child_mi->flags) & JL_MI_FLAGS_MASK_PRECOMPILED;
+
+                    // Early termination check for child
+                    if (child_is_precompiled || !jl_object_in_image((jl_value_t*)child_mod) || type_in_worklist(child_mi->specTypes, query_cache)) {
+                        // found what we were looking for, so terminate early
+                        current->found = 1;
+                        break;
+                    }
+
+                    if (!child_mi->backedges) {
+                        // This child returns 0, continue with next edge
+                        continue;
+                    }
+
+                    void **child_bp = ptrhash_bp(visited, child_mi);
+                    int child_found = (char*)*child_bp - (char*)HT_NOTFOUND;
+                    if (child_found) {
+                        int child_result = child_found - 1;
+                        if (child_result == 1 || child_result == 2) {
+                            // found what we were looking for, so terminate early
+                            current->found = 1;
+                            break;
+                        }
+                        else if (child_result >= 3 && child_result - 3 < current->cycle) {
+                            // record the cycle will resolve at depth "cycle"
+                            current->cycle = child_result - 3;
+                            assert(current->cycle);
+                        }
+                    }
+                    else {
+                        // Need to process child - push new frame and pause current processing
+                        backedge_stack_frame_t child_frame = {
+                            .mi = child_mi,
+                            .edge_index = 0,
+                            .backedges_len = 0,
+                            .backedges = NULL,
+                            .depth = 0,
+                            .cycle = 0,
+                            .found = 0,
+                            .child_result = 0,
+                            .state = STATE_VISITING
+                        };
+                        arraylist_push(&frame_stack, memcpy(malloc(sizeof(backedge_stack_frame_t)), &child_frame, sizeof(backedge_stack_frame_t)));
+                        goto continue_main_loop; // Resume processing after child completes
+                    }
+                }
+
+                current->state = STATE_FINISHING;
+                break;
+            }
+
+            case STATE_FINISHING: {
+                if (!current->found && current->cycle != current->depth) {
+                    final_result = current->cycle + 3;
+                    goto propagate_to_parent;
+                }
+
+                // If we are the top of the current cycle, now mark all other parts of
+                // our cycle with what we found.
+                // Or if we found a backedge, also mark all of the other parts of the
+                // cycle as also having an backedge.
+                while (stack->len >= current->depth) {
+                    void *mi_ptr = arraylist_pop(stack);
+                    void **bp = ptrhash_bp(visited, mi_ptr);
+                    assert((char*)*bp - (char*)HT_NOTFOUND == 5 + stack->len);
+                    *bp = (void*)((char*)HT_NOTFOUND + 1 + current->found);
+                }
+
+                final_result = current->found;
+                goto propagate_to_parent;
+            }
         }
+
+        continue_main_loop:
+            continue;
+
+        propagate_to_parent:
+            // Propagate result to parent
+            free(arraylist_pop(&frame_stack));
+            if (frame_stack.len == 0)
+                break;
+            backedge_stack_frame_t *parent = (backedge_stack_frame_t*)frame_stack.items[frame_stack.len - 1];
+            parent->child_result = final_result;
     }
-    if (!found && cycle != depth)
-        return cycle + 3;
-    // If we are the top of the current cycle, now mark all other parts of
-    // our cycle with what we found.
-    // Or if we found a backedge, also mark all of the other parts of the
-    // cycle as also having an backedge.
-    while (stack->len >= depth) {
-        void *mi = arraylist_pop(stack);
-        bp = ptrhash_bp(visited, mi);
-        assert((char*)*bp - (char*)HT_NOTFOUND == 5 + stack->len);
-        *bp = (void*)((char*)HT_NOTFOUND + 1 + found);
-    }
-    return found;
+    // Cleanup remaining frames
+    assert(frame_stack.len == 0);
+    arraylist_free(&frame_stack);
+    return final_result;
 }
 
 // Given the list of CodeInstances that were inferred during the build, select
@@ -214,7 +481,7 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
 // from the worklist or explicitly added by a `precompile` statement, and
 // (4) are the most recently computed result for that method.
 // These will be preserved in the image.
-static jl_array_t *queue_external_cis(jl_array_t *list)
+static jl_array_t *queue_external_cis(jl_array_t *list, jl_query_cache *query_cache)
 {
     if (list == NULL)
         return NULL;
@@ -222,165 +489,53 @@ static jl_array_t *queue_external_cis(jl_array_t *list)
     htable_t visited;
     arraylist_t stack;
     assert(jl_is_array(list));
-    size_t n0 = jl_array_len(list);
+    size_t n0 = jl_array_nrows(list);
     htable_new(&visited, n0);
     arraylist_new(&stack, 0);
-    jl_array_t *new_specializations = jl_alloc_vec_any(0);
-    JL_GC_PUSH1(&new_specializations);
+    jl_array_t *new_ext_cis = jl_alloc_vec_any(0);
+    JL_GC_PUSH1(&new_ext_cis);
     for (i = n0; i-- > 0; ) {
         jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(list, i);
         assert(jl_is_code_instance(ci));
-        if (!ci->relocatability)
-            continue;
-        jl_method_instance_t *mi = ci->def;
+        jl_method_instance_t *mi = jl_get_ci_mi(ci);
         jl_method_t *m = mi->def.method;
-        if (ci->inferred && jl_is_method(m) && jl_object_in_image((jl_value_t*)m->module)) {
-            int found = has_backedge_to_worklist(mi, &visited, &stack);
+        int dispatch_status = jl_atomic_load_relaxed(&m->dispatch_status);
+        if (!(dispatch_status & METHOD_SIG_LATEST_WHICH))
+            continue; // ignore replaced methods
+        if (ci->owner == jl_nothing && jl_atomic_load_relaxed(&ci->inferred) && jl_is_method(m) && jl_object_in_image((jl_value_t*)m->module)) {
+            int found = has_backedge_to_worklist(mi, &visited, &stack, query_cache);
             assert(found == 0 || found == 1 || found == 2);
             assert(stack.len == 0);
-            if (found == 1 && ci->max_world == ~(size_t)0) {
-                void **bp = ptrhash_bp(&visited, mi);
-                if (*bp != (void*)((char*)HT_NOTFOUND + 3)) {
-                    *bp = (void*)((char*)HT_NOTFOUND + 3);
-                    jl_array_ptr_1d_push(new_specializations, (jl_value_t*)ci);
-                }
+            if (found == 1) {
+                jl_array_ptr_1d_push(new_ext_cis, (jl_value_t*)ci);
             }
         }
     }
     htable_free(&visited);
     arraylist_free(&stack);
     JL_GC_POP();
-    // reverse new_specializations
-    n0 = jl_array_len(new_specializations);
-    jl_value_t **news = (jl_value_t**)jl_array_data(new_specializations);
+    // reverse new_ext_cis
+    n0 = jl_array_nrows(new_ext_cis);
+    jl_value_t **news = jl_array_data(new_ext_cis, jl_value_t*);
     for (i = 0; i < n0; i++) {
         jl_value_t *temp = news[i];
         news[i] = news[n0 - i - 1];
         news[n0 - i - 1] = temp;
     }
-    return new_specializations;
-}
-
-// New roots for external methods
-static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_specializations, uint64_t key)
-{
-    htable_t mset;
-    htable_new(&mset, 0);
-    size_t l = new_specializations ? jl_array_len(new_specializations) : 0;
-    for (size_t i = 0; i < l; i++) {
-        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i);
-        assert(jl_is_code_instance(ci));
-        jl_method_t *m = ci->def->def.method;
-        assert(jl_is_method(m));
-        ptrhash_put(&mset, (void*)m, (void*)m);
-    }
-    int nwithkey;
-    void *const *table = mset.table;
-    jl_array_t *newroots = NULL;
-    JL_GC_PUSH1(&newroots);
-    for (size_t i = 0; i < mset.size; i += 2) {
-        if (table[i+1] != HT_NOTFOUND) {
-            jl_method_t *m = (jl_method_t*)table[i];
-            assert(jl_is_method(m));
-            nwithkey = nroots_with_key(m, key);
-            if (nwithkey) {
-                jl_array_ptr_1d_push(roots, (jl_value_t*)m);
-                newroots = jl_alloc_vec_any(nwithkey);
-                jl_array_ptr_1d_push(roots, (jl_value_t*)newroots);
-                rle_iter_state rootiter = rle_iter_init(0);
-                uint64_t *rletable = NULL;
-                size_t nblocks2 = 0, nroots = jl_array_len(m->roots), k = 0;
-                if (m->root_blocks) {
-                    rletable = (uint64_t*)jl_array_data(m->root_blocks);
-                    nblocks2 = jl_array_len(m->root_blocks);
-                }
-                while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2))
-                    if (rootiter.key == key)
-                        jl_array_ptr_set(newroots, k++, jl_array_ptr_ref(m->roots, rootiter.i));
-                assert(k == nwithkey);
-            }
-        }
-    }
-    JL_GC_POP();
-    htable_free(&mset);
+    return new_ext_cis;
 }
 
-// Create the forward-edge map (caller => callees)
-// the intent of these functions is to invert the backedges tree
-// for anything that points to a method not part of the worklist
-//
-// from MethodTables
-static void jl_collect_missing_backedges(jl_methtable_t *mt)
-{
-    jl_array_t *backedges = mt->backedges;
-    if (backedges) {
-        size_t i, l = jl_array_len(backedges);
-        for (i = 1; i < l; i += 2) {
-            jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i);
-            jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1);  // signature of abstract callee
-            jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL);
-            if (edges == NULL) {
-                edges = jl_alloc_vec_any(0);
-                JL_GC_PUSH1(&edges);
-                edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL);
-                JL_GC_POP();
-            }
-            jl_array_ptr_1d_push(edges, NULL);
-            jl_array_ptr_1d_push(edges, missing_callee);
-        }
-    }
-}
-
-
-// from MethodInstances
-static void collect_backedges(jl_method_instance_t *callee, int internal)
-{
-    jl_array_t *backedges = callee->backedges;
-    if (backedges) {
-        size_t i = 0, l = jl_array_len(backedges);
-        while (i < l) {
-            jl_value_t *invokeTypes;
-            jl_method_instance_t *caller;
-            i = get_next_edge(backedges, i, &invokeTypes, &caller);
-            jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL);
-            if (edges == NULL) {
-                edges = jl_alloc_vec_any(0);
-                JL_GC_PUSH1(&edges);
-                edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL);
-                JL_GC_POP();
-            }
-            jl_array_ptr_1d_push(edges, invokeTypes);
-            jl_array_ptr_1d_push(edges, (jl_value_t*)callee);
-        }
-    }
-}
-
-
-// For functions owned by modules not on the worklist, call this on each method.
+// For every method:
 // - if the method is owned by a worklist module, add it to the list of things to be
-//   fully serialized
-// - Collect all backedges (may be needed later when we invert this list).
+//   verified on reloading
+// - if the method is extext, record that it needs to be reinserted later in the method table
 static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure)
 {
     jl_array_t *s = (jl_array_t*)closure;
     jl_method_t *m = ml->func.method;
-    if (s && !jl_object_in_image((jl_value_t*)m->module)) {
-        jl_array_ptr_1d_push(s, (jl_value_t*)m);
-    }
-    if (edges_map == NULL)
-        return 1;
-    jl_value_t *specializations = jl_atomic_load_relaxed(&m->specializations);
-    if (!jl_is_svec(specializations)) {
-        jl_method_instance_t *callee = (jl_method_instance_t*)specializations;
-        collect_backedges(callee, !s);
-    }
-    else {
-        size_t i, l = jl_svec_len(specializations);
-        for (i = 0; i < l; i++) {
-            jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
-            if ((jl_value_t*)callee != jl_nothing)
-                collect_backedges(callee, !s);
-        }
+    if (!jl_object_in_image((jl_value_t*)m->module)) {
+        if (s)
+            jl_array_ptr_1d_push(s, (jl_value_t*)m); // extext
     }
     return 1;
 }
@@ -388,186 +543,17 @@ static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure)
 static int jl_collect_methtable_from_mod(jl_methtable_t *mt, void *env)
 {
     if (!jl_object_in_image((jl_value_t*)mt))
-        env = NULL; // do not collect any methods from here
+        env = NULL; // mark internal, not extext
     jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), jl_collect_methcache_from_mod, env);
-    if (env && edges_map)
-        jl_collect_missing_backedges(mt);
     return 1;
 }
 
 // Collect methods of external functions defined by modules in the worklist
 // "extext" = "extending external"
 // Also collect relevant backedges
-static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m)
-{
-    foreach_mtable_in_module(m, jl_collect_methtable_from_mod, s);
-}
-
-static void jl_record_edges(jl_method_instance_t *caller, arraylist_t *wq, jl_array_t *edges)
-{
-    jl_array_t *callees = NULL;
-    JL_GC_PUSH2(&caller, &callees);
-    callees = (jl_array_t*)jl_eqtable_pop(edges_map, (jl_value_t*)caller, NULL, NULL);
-    if (callees != NULL) {
-        jl_array_ptr_1d_push(edges, (jl_value_t*)caller);
-        jl_array_ptr_1d_push(edges, (jl_value_t*)callees);
-        size_t i, l = jl_array_len(callees);
-        for (i = 1; i < l; i += 2) {
-            jl_method_instance_t *c = (jl_method_instance_t*)jl_array_ptr_ref(callees, i);
-            if (c && jl_is_method_instance(c)) {
-                arraylist_push(wq, c);
-            }
-        }
-    }
-    JL_GC_POP();
-}
-
-
-// Extract `edges` and `ext_targets` from `edges_map`
-// `edges` = [caller1, targets_indexes1, ...], the list of methods and their edges
-// `ext_targets` is [invokesig1, callee1, matches1, ...], the edges for each target
-static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *external_cis, size_t world)
+static void jl_collect_extext_methods(jl_array_t *s, jl_array_t *mod_array)
 {
-    htable_t external_mis;
-    htable_new(&external_mis, 0);
-    if (external_cis) {
-        for (size_t i = 0; i < jl_array_len(external_cis); i++) {
-            jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(external_cis, i);
-            jl_method_instance_t *mi = ci->def;
-            ptrhash_put(&external_mis, (void*)mi, (void*)mi);
-        }
-    }
-    arraylist_t wq;
-    arraylist_new(&wq, 0);
-    void **table = (void**)jl_array_data(edges_map);    // edges_map is caller => callees
-    size_t table_size = jl_array_len(edges_map);
-    for (size_t i = 0; i < table_size; i += 2) {
-        assert(table == jl_array_data(edges_map) && table_size == jl_array_len(edges_map) &&
-               "edges_map changed during iteration");
-        jl_method_instance_t *caller = (jl_method_instance_t*)table[i];
-        jl_array_t *callees = (jl_array_t*)table[i + 1];
-        if (callees == NULL)
-            continue;
-        assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        if (!jl_object_in_image((jl_value_t*)caller->def.method->module) ||
-            ptrhash_get(&external_mis, caller) != HT_NOTFOUND) {
-            jl_record_edges(caller, &wq, edges);
-        }
-    }
-    htable_free(&external_mis);
-    while (wq.len) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)arraylist_pop(&wq);
-        jl_record_edges(caller, &wq, edges);
-    }
-    arraylist_free(&wq);
-    edges_map = NULL;
-    htable_t edges_map2;
-    htable_new(&edges_map2, 0);
-    htable_t edges_ids;
-    size_t l = edges ? jl_array_len(edges) : 0;
-    htable_new(&edges_ids, l);
-    for (size_t i = 0; i < l / 2; i++) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i * 2);
-        void *target = (void*)((char*)HT_NOTFOUND + i + 1);
-        ptrhash_put(&edges_ids, (void*)caller, target);
-    }
-    // process target list to turn it into a memoized validity table
-    // and compute the old methods list, ready for serialization
-    jl_value_t *matches = NULL;
-    jl_array_t *callee_ids = NULL;
-    jl_value_t *sig = NULL;
-    JL_GC_PUSH3(&matches, &callee_ids, &sig);
-    for (size_t i = 0; i < l; i += 2) {
-        jl_array_t *callees = (jl_array_t*)jl_array_ptr_ref(edges, i + 1);
-        size_t l = jl_array_len(callees);
-        callee_ids = jl_alloc_array_1d(jl_array_int32_type, l + 1);
-        int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-        idxs[0] = 0;
-        size_t nt = 0;
-        for (size_t j = 0; j < l; j += 2) {
-            jl_value_t *invokeTypes = jl_array_ptr_ref(callees, j);
-            jl_value_t *callee = jl_array_ptr_ref(callees, j + 1);
-            assert(callee && "unsupported edge");
-
-            if (jl_is_method_instance(callee)) {
-                jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
-                if (!jl_object_in_image((jl_value_t*)mt))
-                    continue;
-            }
-
-            // (nullptr, c) => call
-            // (invokeTypes, c) => invoke
-            // (nullptr, invokeTypes) => missing call
-            // (invokeTypes, nullptr) => missing invoke (unused--inferred as Any)
-            void *target = ptrhash_get(&edges_map2, invokeTypes ? (void*)invokeTypes : (void*)callee);
-            if (target == HT_NOTFOUND) {
-                size_t min_valid = 0;
-                size_t max_valid = ~(size_t)0;
-                if (invokeTypes) {
-                    assert(jl_is_method_instance(callee));
-                    jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
-                    if ((jl_value_t*)mt == jl_nothing) {
-                        callee_ids = NULL; // invalid
-                        break;
-                    }
-                    else {
-                        matches = jl_gf_invoke_lookup_worlds(invokeTypes, (jl_value_t*)mt, world, &min_valid, &max_valid);
-                        if (matches == jl_nothing) {
-                            callee_ids = NULL; // invalid
-                            break;
-                        }
-                        matches = (jl_value_t*)((jl_method_match_t*)matches)->method;
-                    }
-                }
-                else {
-                    if (jl_is_method_instance(callee)) {
-                        jl_method_instance_t *mi = (jl_method_instance_t*)callee;
-                        sig = jl_type_intersection(mi->def.method->sig, (jl_value_t*)mi->specTypes);
-                    }
-                    else {
-                        sig = callee;
-                    }
-                    int ambig = 0;
-                    matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
-                            INT32_MAX, 0, world, &min_valid, &max_valid, &ambig);
-                    sig = NULL;
-                    if (matches == jl_nothing) {
-                        callee_ids = NULL; // invalid
-                        break;
-                    }
-                    size_t k;
-                    for (k = 0; k < jl_array_len(matches); k++) {
-                        jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k);
-                        jl_array_ptr_set(matches, k, match->method);
-                    }
-                }
-                jl_array_ptr_1d_push(ext_targets, invokeTypes);
-                jl_array_ptr_1d_push(ext_targets, callee);
-                jl_array_ptr_1d_push(ext_targets, matches);
-                target = (void*)((char*)HT_NOTFOUND + jl_array_len(ext_targets) / 3);
-                ptrhash_put(&edges_map2, (void*)callee, target);
-            }
-            idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1;
-        }
-        jl_array_ptr_set(edges, i + 1, callee_ids); // swap callees for ids
-        if (!callee_ids)
-            continue;
-        idxs[0] = nt;
-        // record place of every method in edges
-        // add method edges to the callee_ids list
-        for (size_t j = 0; j < l; j += 2) {
-            jl_value_t *callee = jl_array_ptr_ref(callees, j + 1);
-            if (callee && jl_is_method_instance(callee)) {
-                void *target = ptrhash_get(&edges_ids, (void*)callee);
-                if (target != HT_NOTFOUND) {
-                    idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1;
-                }
-            }
-        }
-        jl_array_del_end(callee_ids, l - nt);
-    }
-    JL_GC_POP();
-    htable_free(&edges_map2);
+    jl_foreach_reachable_mtable(jl_collect_methtable_from_mod, mod_array, s);
 }
 
 // Headers
@@ -576,7 +562,7 @@ static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets, jl_arra
 static void write_mod_list(ios_t *s, jl_array_t *a)
 {
     size_t i;
-    size_t len = jl_array_len(a);
+    size_t len = jl_array_nrows(a);
     for (i = 0; i < len; i++) {
         jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(a, i);
         assert(jl_is_module(m));
@@ -594,45 +580,79 @@ static void write_mod_list(ios_t *s, jl_array_t *a)
     write_int32(s, 0);
 }
 
-// OPT_LEVEL should always be the upper bits
 #define OPT_LEVEL 6
+#define DEBUG_LEVEL 1
 
 JL_DLLEXPORT uint8_t jl_cache_flags(void)
 {
     // OOICCDDP
     uint8_t flags = 0;
     flags |= (jl_options.use_pkgimages & 1); // 0-bit
-    flags |= (jl_options.debug_level & 3) << 1; // 1-2 bit
+    flags |= (jl_options.debug_level & 3) << DEBUG_LEVEL; // 1-2 bit
     flags |= (jl_options.check_bounds & 3) << 3; // 3-4 bit
     flags |= (jl_options.can_inline & 1) << 5; // 5-bit
     flags |= (jl_options.opt_level & 3) << OPT_LEVEL; // 6-7 bit
     return flags;
 }
 
-JL_DLLEXPORT uint8_t jl_match_cache_flags(uint8_t flags)
+
+JL_DLLEXPORT uint8_t jl_match_cache_flags(uint8_t requested_flags, uint8_t actual_flags)
 {
-    // 1. Check which flags are relevant
-    uint8_t current_flags = jl_cache_flags();
-    uint8_t supports_pkgimage = (current_flags & 1);
-    uint8_t is_pkgimage = (flags & 1);
+    uint8_t supports_pkgimage = (requested_flags & 1);
+    uint8_t is_pkgimage = (actual_flags & 1);
 
     // For .ji packages ignore other flags
     if (!supports_pkgimage && !is_pkgimage) {
         return 1;
     }
 
-    // 2. Check all flags, execept opt level must be exact
-    uint8_t mask = (1 << OPT_LEVEL)-1;
-    if ((flags & mask) != (current_flags & mask))
+    // If package images are optional, ignore that bit (it will be unset in requested_flags)
+    if (jl_options.use_pkgimages == JL_OPTIONS_USE_PKGIMAGES_EXISTING) {
+        actual_flags &= ~1;
+    }
+
+    // 2. Check all flags, except opt level and debug level must be exact
+    uint8_t mask = (~(3u << OPT_LEVEL) & ~(3u << DEBUG_LEVEL)) & 0x7f;
+    if ((actual_flags & mask) != (requested_flags & mask))
         return 0;
-    // 3. allow for higher optimization flags in cache
-    flags >>= OPT_LEVEL;
-    current_flags >>= OPT_LEVEL;
-    return flags >= current_flags;
+    // 3. allow for higher optimization and debug level flags in cache to minimize required compile option combinations
+    return ((actual_flags >> OPT_LEVEL) & 3) >= ((requested_flags >> OPT_LEVEL) & 3) &&
+           ((actual_flags >> DEBUG_LEVEL) & 3) >= ((requested_flags >> DEBUG_LEVEL) & 3);
+}
+
+JL_DLLEXPORT uint8_t jl_match_cache_flags_current(uint8_t flags)
+{
+    return jl_match_cache_flags(jl_cache_flags(), flags);
+}
+
+// return char* from String field in Base.GIT_VERSION_INFO
+static const char *git_info_string(const char *fld)
+{
+    static jl_value_t *GIT_VERSION_INFO = NULL;
+    if (!GIT_VERSION_INFO)
+        GIT_VERSION_INFO = jl_get_global(jl_base_module, jl_symbol("GIT_VERSION_INFO"));
+    jl_value_t *f = jl_get_field(GIT_VERSION_INFO, fld);
+    assert(jl_is_string(f));
+    return jl_string_data(f);
+}
+
+static const char *jl_git_branch(void)
+{
+    static const char *branch = NULL;
+    if (!branch) branch = git_info_string("branch");
+    return branch;
 }
 
+static const char *jl_git_commit(void)
+{
+    static const char *commit = NULL;
+    if (!commit) commit = git_info_string("commit");
+    return commit;
+}
+
+
 // "magic" string and version header of .ji file
-static const int JI_FORMAT_VERSION = 12;
+static const int JI_FORMAT_VERSION = 13;
 static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature
 static const uint16_t BOM = 0xFEFF; // byte-order marker
 static int64_t write_header(ios_t *s, uint8_t pkgimage)
@@ -655,13 +675,18 @@ static int64_t write_header(ios_t *s, uint8_t pkgimage)
     return checksumpos;
 }
 
+static int is_serialization_root_module(jl_module_t *mod) JL_NOTSAFEPOINT
+{
+    return mod->parent == jl_main_module || mod->parent == jl_base_module || mod->parent == mod;
+}
+
 // serialize information about the result of deserializing this file
 static void write_worklist_for_header(ios_t *s, jl_array_t *worklist)
 {
-    int i, l = jl_array_len(worklist);
+    int i, l = jl_array_nrows(worklist);
     for (i = 0; i < l; i++) {
         jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, i);
-        if (workmod->parent == jl_main_module || workmod->parent == workmod) {
+        if (is_serialization_root_module(workmod)) {
             size_t l = strlen(jl_symbol_name(workmod->name));
             write_int32(s, l);
             ios_write(s, jl_symbol_name(workmod->name), l);
@@ -675,7 +700,7 @@ static void write_worklist_for_header(ios_t *s, jl_array_t *worklist)
 
 static void write_module_path(ios_t *s, jl_module_t *depmod) JL_NOTSAFEPOINT
 {
-    if (depmod->parent == jl_main_module || depmod->parent == depmod)
+    if (is_serialization_root_module(depmod))
         return;
     const char *mname = jl_symbol_name(depmod->name);
     size_t slen = strlen(mname);
@@ -692,43 +717,75 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
 {
     int64_t initial_pos = 0;
     int64_t pos = 0;
-    static jl_array_t *deps = NULL;
-    if (!deps)
-        deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies"));
-
-    // unique(deps) to eliminate duplicates while preserving order:
-    // we preserve order so that the topmost included .jl file comes first
-    static jl_value_t *unique_func = NULL;
-    if (!unique_func)
-        unique_func = jl_get_global(jl_base_module, jl_symbol("unique"));
-    jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps};
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
     ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-    jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
-    ct->world_age = last_age;
+    jl_value_t *depots = NULL, *prefs_hash = NULL, *prefs_list = NULL;
+    jl_value_t *unique_func = NULL;
+    jl_value_t *replace_depot_func = NULL;
+    jl_value_t *normalize_depots_func = NULL;
+    jl_value_t *toplevel = NULL;
+    jl_value_t *prefs_hash_func = NULL;
+    jl_value_t *get_compiletime_prefs_func = NULL;
+    JL_GC_PUSH8(&depots, &prefs_list, &unique_func, &replace_depot_func, &normalize_depots_func, &toplevel, &prefs_hash_func, &get_compiletime_prefs_func);
+
+    jl_array_t *udeps = (jl_array_t*)jl_get_global_value(jl_base_module, jl_symbol("_require_dependencies"), ct->world_age);
+    *udepsp = udeps;
+
+    // unique(udeps) to eliminate duplicates while preserving order:
+    // we preserve order so that the topmost included .jl file comes first
+    if (udeps) {
+        unique_func = jl_eval_global_var(jl_base_module, jl_symbol("unique"), ct->world_age);
+        jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)udeps};
+        udeps = (jl_array_t*)jl_apply(uniqargs, 2);
+        *udepsp = udeps;
+        JL_TYPECHK(write_dependency_list, array_any, (jl_value_t*)udeps);
+    }
+
+    replace_depot_func = jl_get_global_value(jl_base_module, jl_symbol("replace_depot_path"), ct->world_age);
+    normalize_depots_func = jl_eval_global_var(jl_base_module, jl_symbol("normalize_depots_for_relocation"), ct->world_age);
+
+    depots = jl_apply(&normalize_depots_func, 1);
+
+    jl_datatype_t *deptuple_p[5] = {jl_module_type, jl_string_type, jl_uint64_type, jl_uint32_type, jl_float64_type};
+    jl_value_t *jl_deptuple_type = jl_apply_tuple_type_v((jl_value_t**)deptuple_p, 5);
+    JL_GC_PROMISE_ROOTED(jl_deptuple_type);
+#define jl_is_deptuple(v) (jl_typeis((v), jl_deptuple_type))
 
     // write a placeholder for total size so that we can quickly seek past all of the
     // dependencies if we don't need them
     initial_pos = ios_pos(s);
     write_uint64(s, 0);
-    size_t i, l = udeps ? jl_array_len(udeps) : 0;
+    size_t i, l = udeps ? jl_array_nrows(udeps) : 0;
     for (i = 0; i < l; i++) {
         jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
-        jl_value_t *dep = jl_fieldref(deptuple, 1);              // file abspath
-        size_t slen = jl_string_len(dep);
+        JL_TYPECHK(write_dependency_list, deptuple, deptuple);
+        jl_value_t *deppath = jl_fieldref_noalloc(deptuple, 1);
+
+        if (replace_depot_func) {
+            jl_value_t *replace_depot_args[3];
+            replace_depot_args[0] = replace_depot_func;
+            replace_depot_args[1] = deppath;
+            replace_depot_args[2] = depots;
+            deppath = (jl_value_t*)jl_apply(replace_depot_args, 3);
+            JL_TYPECHK(write_dependency_list, string, deppath);
+        }
+
+        size_t slen = jl_string_len(deppath);
         write_int32(s, slen);
-        ios_write(s, jl_string_data(dep), slen);
-        write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2)));  // mtime
-        jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0);  // evaluating module
+        ios_write(s, jl_string_data(deppath), slen);
+        write_uint64(s, jl_unbox_uint64(jl_fieldref(deptuple, 2)));    // fsize
+        write_uint32(s, jl_unbox_uint32(jl_fieldref(deptuple, 3)));    // hash
+        write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 4)));  // mtime
+        jl_module_t *depmod = (jl_module_t*)jl_fieldref_noalloc(deptuple, 0);  // evaluating module
         jl_module_t *depmod_top = depmod;
-        while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top)
+        while (!is_serialization_root_module(depmod_top))
             depmod_top = depmod_top->parent;
         unsigned provides = 0;
-        size_t j, lj = jl_array_len(worklist);
+        size_t j, lj = jl_array_nrows(worklist);
         for (j = 0; j < lj; j++) {
             jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, j);
-            if (workmod->parent == jl_main_module || workmod->parent == workmod) {
+            if (is_serialization_root_module(workmod)) {
                 ++provides;
                 if (workmod == depmod_top) {
                     write_int32(s, provides);
@@ -742,39 +799,33 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
     write_int32(s, 0); // terminator, for ease of reading
 
     // Calculate Preferences hash for current package.
-    jl_value_t *prefs_hash = NULL;
-    jl_value_t *prefs_list = NULL;
-    JL_GC_PUSH1(&prefs_list);
     if (jl_base_module) {
         // Toplevel module is the module we're currently compiling, use it to get our preferences hash
-        jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__"));
-        jl_value_t * prefs_hash_func = jl_get_global(jl_base_module, jl_symbol("get_preferences_hash"));
-        jl_value_t * get_compiletime_prefs_func = jl_get_global(jl_base_module, jl_symbol("get_compiletime_preferences"));
-
-        if (toplevel && prefs_hash_func && get_compiletime_prefs_func) {
-            // Temporary invoke in newest world age
-            size_t last_age = ct->world_age;
-            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        toplevel = jl_get_global_value(jl_base_module, jl_symbol("__toplevel__"), ct->world_age);
+        prefs_hash_func = jl_eval_global_var(jl_base_module, jl_symbol("get_preferences_hash"), ct->world_age);
+        get_compiletime_prefs_func = jl_eval_global_var(jl_base_module, jl_symbol("get_compiletime_preferences"), ct->world_age);
 
+        if (toplevel) {
             // call get_compiletime_prefs(__toplevel__)
             jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL};
             prefs_list = (jl_value_t*)jl_apply(args, 2);
+            JL_TYPECHK(write_dependency_list, array, prefs_list);
 
             // Call get_preferences_hash(__toplevel__, prefs_list)
             args[0] = prefs_hash_func;
             args[2] = prefs_list;
             prefs_hash = (jl_value_t*)jl_apply(args, 3);
-
-            // Reset world age to normal
-            ct->world_age = last_age;
+            JL_TYPECHK(write_dependency_list, uint64, prefs_hash);
         }
     }
+    ct->world_age = last_age;
 
     // If we successfully got the preferences, write it out, otherwise write `0` for this `.ji` file.
     if (prefs_hash != NULL && prefs_list != NULL) {
-        size_t i, l = jl_array_len(prefs_list);
+        size_t i, l = jl_array_nrows(prefs_list);
         for (i = 0; i < l; i++) {
             jl_value_t *pref_name = jl_array_ptr_ref(prefs_list, i);
+            JL_TYPECHK(write_dependency_list, string, pref_name);
             size_t slen = jl_string_len(pref_name);
             write_int32(s, slen);
             ios_write(s, jl_string_data(pref_name), slen);
@@ -791,7 +842,8 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
         write_int32(s, 0);
         write_uint64(s, 0);
     }
-    JL_GC_POP(); // for prefs_list
+    JL_GC_POP(); // for depots, prefs_list
+#undef jl_is_deptuple
 
     // write a dummy file position to indicate the beginning of the source-text
     pos = ios_pos(s);
@@ -806,357 +858,93 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
 // Deserialization
 
 // Add methods to external (non-worklist-owned) functions
-static void jl_insert_methods(jl_array_t *list)
+// mutating external to point at the new methodtable entry instead of the new method
+static void jl_add_methods(jl_array_t *external)
 {
-    size_t i, l = jl_array_len(list);
+    size_t i, l = jl_array_nrows(external);
     for (i = 0; i < l; i++) {
-        jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(list, i);
+        jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(external, i);
         assert(jl_is_method(meth));
         assert(!meth->is_for_opaque_closure);
         jl_methtable_t *mt = jl_method_get_table(meth);
         assert((jl_value_t*)mt != jl_nothing);
-        jl_method_table_insert(mt, meth, NULL);
-    }
-}
-
-static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key)
-{
-    size_t i, l = jl_array_len(method_roots_list);
-    for (i = 0; i < l; i+=2) {
-        jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(method_roots_list, i);
-        jl_array_t *roots = (jl_array_t*)jl_array_ptr_ref(method_roots_list, i+1);
-        if (roots) {
-            assert(jl_is_array(roots));
-            jl_append_method_roots(m, key, roots);
-        }
+        jl_typemap_entry_t *entry = jl_method_table_add(mt, meth, NULL);
+        jl_array_ptr_set(external, i, entry);
     }
 }
 
-
-// verify that these edges intersect with the same methods as before
-static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld)
+extern _Atomic(int) allow_new_worlds;
+static void jl_activate_methods(jl_array_t *external, jl_array_t *internal, size_t world, const char *pkgname)
 {
-    JL_TIMING(VERIFY_IMAGE, VERIFY_Edges);
-    size_t i, l = jl_array_len(targets) / 3;
-    static jl_value_t *ulong_array JL_ALWAYS_LEAFTYPE = NULL;
-    if (ulong_array == NULL)
-        ulong_array = jl_apply_array_type((jl_value_t*)jl_ulong_type, 1);
-    jl_array_t *maxvalids = jl_alloc_array_1d(ulong_array, l);
-    memset(jl_array_data(maxvalids), 0, l * sizeof(size_t));
-    jl_value_t *loctag = NULL;
-    jl_value_t *matches = NULL;
-    jl_value_t *sig = NULL;
-    JL_GC_PUSH4(&maxvalids, &matches, &sig, &loctag);
+    size_t i, l = jl_array_nrows(internal);
     for (i = 0; i < l; i++) {
-        jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3);
-        jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1);
-        jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2);
-        size_t min_valid = 0;
-        size_t max_valid = ~(size_t)0;
-        if (invokesig) {
-            assert(callee && "unsupported edge");
-            jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
-            if ((jl_value_t*)mt == jl_nothing) {
-                max_valid = 0;
-            }
-            else {
-                matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, minworld, &min_valid, &max_valid);
-                if (matches == jl_nothing) {
-                     max_valid = 0;
-                }
-                else {
-                    matches = (jl_value_t*)((jl_method_match_t*)matches)->method;
-                    if (matches != expected) {
-                        max_valid = 0;
-                    }
-                }
-            }
+        // allow_new_worlds doesn't matter here, since we aren't actually changing anything external
+        jl_value_t *obj = jl_array_ptr_ref(internal, i);
+        if (jl_typetagis(obj, jl_typemap_entry_type)) {
+            jl_typemap_entry_t *entry = (jl_typemap_entry_t*)obj;
+            assert(jl_atomic_load_relaxed(&entry->min_world) == ~(size_t)0);
+            assert(jl_atomic_load_relaxed(&entry->max_world) == WORLD_AGE_REVALIDATION_SENTINEL);
+            jl_atomic_store_release(&entry->min_world, world);
+            jl_atomic_store_release(&entry->max_world, ~(size_t)0);
         }
-        else {
-            if (jl_is_method_instance(callee)) {
-                jl_method_instance_t *mi = (jl_method_instance_t*)callee;
-                sig = jl_type_intersection(mi->def.method->sig, (jl_value_t*)mi->specTypes);
-            }
-            else {
-                sig = callee;
-            }
-            assert(jl_is_array(expected));
-            int ambig = 0;
-            // TODO: possibly need to included ambiguities too (for the optimizer correctness)?
-            // len + 1 is to allow us to log causes of invalidation (SnoopCompile's @snoopr)
-            matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
-                    _jl_debug_method_invalidation ? INT32_MAX : jl_array_len(expected),
-                    0, minworld, &min_valid, &max_valid, &ambig);
-            sig = NULL;
-            if (matches == jl_nothing) {
-                max_valid = 0;
-            }
-            else {
-                // setdiff!(matches, expected)
-                size_t j, k, ins = 0;
-                if (jl_array_len(matches) != jl_array_len(expected)) {
-                    max_valid = 0;
-                }
-                for (k = 0; k < jl_array_len(matches); k++) {
-                    jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, k))->method;
-                    size_t l = jl_array_len(expected);
-                    for (j = 0; j < l; j++)
-                        if (match == (jl_method_t*)jl_array_ptr_ref(expected, j))
-                            break;
-                    if (j == l) {
-                        // intersection has a new method or a method was
-                        // deleted--this is now probably no good, just invalidate
-                        // everything about it now
-                        max_valid = 0;
-                        if (!_jl_debug_method_invalidation)
-                            break;
-                        jl_array_ptr_set(matches, ins++, match);
-                    }
-                }
-                if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation)
-                    jl_array_del_end((jl_array_t*)matches, jl_array_len(matches) - ins);
-            }
+        else if (jl_is_method(obj)) {
+            jl_method_t *m = (jl_method_t*)obj;
+            assert(jl_atomic_load_relaxed(&m->primary_world) == ~(size_t)0);
+            jl_atomic_store_release(&m->primary_world, world);
         }
-        ((size_t*)(jl_array_data(maxvalids)))[i] = max_valid;
-        if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation) {
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, invokesig ? (jl_value_t*)invokesig : callee);
-            loctag = jl_cstr_to_string("insert_backedges_callee");
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-            loctag = jl_box_int32((int32_t)i);
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, matches);
-        }
-        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)invokesig);
-        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)callee);
-        //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr);
-    }
-    JL_GC_POP();
-    return maxvalids;
-}
-
-// Combine all edges relevant to a method to initialize the maxvalids list
-static jl_array_t *jl_verify_methods(jl_array_t *edges, jl_array_t *maxvalids)
-{
-    JL_TIMING(VERIFY_IMAGE, VERIFY_Methods);
-    jl_value_t *loctag = NULL;
-    jl_array_t *maxvalids2 = NULL;
-    JL_GC_PUSH2(&loctag, &maxvalids2);
-    size_t i, l = jl_array_len(edges) / 2;
-    maxvalids2 = jl_alloc_array_1d(jl_typeof(maxvalids), l);
-    size_t *maxvalids2_data = (size_t*)jl_array_data(maxvalids2);
-    memset(maxvalids2_data, 0, l * sizeof(size_t));
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
-        assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
-        assert(jl_typetagis((jl_value_t*)callee_ids, jl_array_int32_type));
-        if (callee_ids == NULL) {
-            // serializing the edges had failed
-            maxvalids2_data[i] = 0;
+        else if (jl_is_code_instance(obj)) {
+            jl_code_instance_t *ci = (jl_code_instance_t*)obj;
+            assert(jl_atomic_load_relaxed(&ci->min_world) == ~(size_t)0);
+            assert(jl_atomic_load_relaxed(&ci->max_world) == WORLD_AGE_REVALIDATION_SENTINEL);
+            jl_atomic_store_relaxed(&ci->min_world, world);
+            // n.b. ci->max_world is not updated until edges are verified
         }
         else {
-            int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-            size_t j;
-            maxvalids2_data[i] = ~(size_t)0;
-            for (j = 0; j < idxs[0]; j++) {
-                int32_t idx = idxs[j + 1];
-                size_t max_valid = ((size_t*)(jl_array_data(maxvalids)))[idx];
-                if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation) {
-                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
-                    loctag = jl_cstr_to_string("verify_methods");
-                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-                    loctag = jl_box_int32((int32_t)idx);
-                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-                }
-                if (max_valid < maxvalids2_data[i])
-                    maxvalids2_data[i] = max_valid;
-                if (max_valid == 0)
-                    break;
-            }
-        }
-        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller);
-        //ios_puts(maxvalid2_data[i] == ~(size_t)0 ? "valid\n" : "INVALID\n", ios_stderr);
-    }
-    JL_GC_POP();
-    return maxvalids2;
-}
-
-
-// Visit the entire call graph, starting from edges[idx] to determine if that method is valid
-// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
-// and slightly modified with an early termination option once the computation reaches its minimum
-static int jl_verify_graph_edge(size_t *maxvalids2_data, jl_array_t *edges, size_t idx, arraylist_t *visited, arraylist_t *stack)
-{
-    if (maxvalids2_data[idx] == 0) {
-        visited->items[idx] = (void*)1;
-        return 0;
-    }
-    size_t cycle = (size_t)visited->items[idx];
-    if (cycle != 0)
-        return cycle - 1; // depth remaining
-    jl_value_t *cause = NULL;
-    arraylist_push(stack, (void*)idx);
-    size_t depth = stack->len;
-    visited->items[idx] = (void*)(1 + depth);
-    jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1);
-    assert(jl_typetagis((jl_value_t*)callee_ids, jl_array_int32_type));
-    int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-    size_t i, n = jl_array_len(callee_ids);
-    cycle = depth;
-    for (i = idxs[0] + 1; i < n; i++) {
-        int32_t childidx = idxs[i];
-        int child_cycle = jl_verify_graph_edge(maxvalids2_data, edges, childidx, visited, stack);
-        size_t child_max_valid = maxvalids2_data[childidx];
-        if (child_max_valid < maxvalids2_data[idx]) {
-            maxvalids2_data[idx] = child_max_valid;
-            cause = jl_array_ptr_ref(edges, childidx * 2);
-        }
-        if (child_max_valid == 0) {
-            // found what we were looking for, so terminate early
-            break;
-        }
-        else if (child_cycle && child_cycle < cycle) {
-            // record the cycle will resolve at depth "cycle"
-            cycle = child_cycle;
+            abort();
         }
     }
-    size_t max_valid = maxvalids2_data[idx];
-    if (max_valid != 0 && cycle != depth)
-        return cycle;
-    // If we are the top of the current cycle, now mark all other parts of
-    // our cycle with what we found.
-    // Or if we found a failed edge, also mark all of the other parts of the
-    // cycle as also having an failed edge.
-    while (stack->len >= depth) {
-        size_t childidx = (size_t)arraylist_pop(stack);
-        assert(visited->items[childidx] == (void*)(2 + stack->len));
-        if (idx != childidx) {
-            if (max_valid < maxvalids2_data[childidx])
-                maxvalids2_data[childidx] = max_valid;
+    l = jl_array_nrows(external);
+    if (l) {
+        if (!jl_atomic_load_relaxed(&allow_new_worlds)) {
+            jl_printf(JL_STDERR, "WARNING: Method changes for %s have been disabled via a call to disable_new_worlds.\n", pkgname);
+            return;
         }
-        visited->items[childidx] = (void*)1;
-        if (_jl_debug_method_invalidation && max_valid != ~(size_t)0) {
-            jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(edges, childidx * 2);
-            jl_value_t *loctag = NULL;
-            JL_GC_PUSH1(&loctag);
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
-            loctag = jl_cstr_to_string("verify_methods");
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)cause);
-            JL_GC_POP();
+        for (i = 0; i < l; i++) {
+            jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_array_ptr_ref(external, i);
+            //uint64_t t0 = uv_hrtime();
+            jl_method_table_activate(entry);
+            //jl_printf(JL_STDERR, "%f ", (double)(uv_hrtime() - t0) / 1e6);
+            //jl_static_show(JL_STDERR, entry->func.value);
+            //jl_printf(JL_STDERR, "\n");
         }
     }
-    return 0;
 }
 
-// Visit all entries in edges, verify if they are valid
-static void jl_verify_graph(jl_array_t *edges, jl_array_t *maxvalids2)
+static int jl_copy_roots(jl_array_t *method_roots_list, uint64_t key)
 {
-    JL_TIMING(VERIFY_IMAGE, VERIFY_Graph);
-    arraylist_t stack, visited;
-    arraylist_new(&stack, 0);
-    size_t i, n = jl_array_len(edges) / 2;
-    arraylist_new(&visited, n);
-    memset(visited.items, 0, n * sizeof(size_t));
-    size_t *maxvalids2_data = (size_t*)jl_array_data(maxvalids2);
-    for (i = 0; i < n; i++) {
-        assert(visited.items[i] == (void*)0 || visited.items[i] == (void*)1);
-        int child_cycle = jl_verify_graph_edge(maxvalids2_data, edges, i, &visited, &stack);
-        assert(child_cycle == 0); (void)child_cycle;
-        assert(stack.len == 0);
-        assert(visited.items[i] == (void*)1);
-    }
-    arraylist_free(&stack);
-    arraylist_free(&visited);
-}
-
-// Restore backedges to external targets
-// `edges` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods.
-// `ext_targets` is [invokesig1, callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods.
-static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *ci_list, size_t minworld)
-{
-    // determine which CodeInstance objects are still valid in our image
-    jl_array_t *valids = jl_verify_edges(ext_targets, minworld);
-    JL_GC_PUSH1(&valids);
-    valids = jl_verify_methods(edges, valids); // consumes edges valids, initializes methods valids
-    jl_verify_graph(edges, valids); // propagates methods valids for each edge
-    size_t i, l;
-
-    // next build a map from external MethodInstances to their CodeInstance for insertion
-    l = jl_array_len(ci_list);
-    htable_t visited;
-    htable_new(&visited, l);
-    for (i = 0; i < l; i++) {
-        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(ci_list, i);
-        assert(ci->min_world == minworld);
-        if (ci->max_world == 1) { // sentinel value: has edges to external callables
-            ptrhash_put(&visited, (void*)ci->def, (void*)ci);
-        }
-        else {
-            assert(ci->max_world == ~(size_t)0);
-            jl_method_instance_t *caller = ci->def;
-            if (ci->inferred && jl_rettype_inferred(caller, minworld, ~(size_t)0) == jl_nothing) {
-                jl_mi_cache_insert(caller, ci);
-            }
-            //jl_static_show((jl_stream*)ios_stderr, (jl_value_t*)caller);
-            //ios_puts("free\n", ios_stderr);
-        }
-    }
-
-    // next enable any applicable new codes
-    l = jl_array_len(edges) / 2;
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
-        size_t maxvalid = ((size_t*)(jl_array_data(valids)))[i];
-        if (maxvalid == ~(size_t)0) {
-            // if this callee is still valid, add all the backedges
-            jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
-            int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-            for (size_t j = 0; j < idxs[0]; j++) {
-                int32_t idx = idxs[j + 1];
-                jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3);
-                jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1);
-                if (callee && jl_is_method_instance(callee)) {
-                    jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller);
-                }
-                else {
-                    jl_value_t *sig = callee == NULL ? invokesig : callee;
-                    jl_methtable_t *mt = jl_method_table_for(sig);
-                    // FIXME: rarely, `callee` has an unexpected `Union` signature,
-                    // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344
-                    // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)`
-                    // This workaround exposes us to (rare) 265-violations.
-                    if ((jl_value_t*)mt != jl_nothing)
-                        jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller);
+    size_t i, l = jl_array_nrows(method_roots_list);
+    int failed = 0;
+    for (i = 0; i < l; i+=2) {
+        jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(method_roots_list, i);
+        jl_array_t *roots = (jl_array_t*)jl_array_ptr_ref(method_roots_list, i+1);
+        if (roots) {
+            assert(jl_is_array(roots));
+            if (m->root_blocks) {
+                // check for key collision
+                uint64_t *blocks = jl_array_data(m->root_blocks, uint64_t);
+                size_t nx2 = jl_array_nrows(m->root_blocks);
+                for (size_t i = 0; i < nx2; i+=2) {
+                    if (blocks[i] == key) {
+                        // found duplicate block
+                        failed = -1;
+                    }
                 }
             }
-        }
-        // then enable any methods associated with it
-        void *ci = ptrhash_get(&visited, (void*)caller);
-        //assert(ci != HT_NOTFOUND);
-        if (ci != HT_NOTFOUND) {
-            // have some new external code to use
-            assert(jl_is_code_instance(ci));
-            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-            assert(codeinst->min_world == minworld && codeinst->inferred);
-            codeinst->max_world = maxvalid;
-            if (jl_rettype_inferred(caller, minworld, maxvalid) == jl_nothing) {
-                jl_mi_cache_insert(caller, codeinst);
-            }
-        }
-    }
 
-    htable_free(&visited);
-    JL_GC_POP();
-}
-
-static void classify_callers(htable_t *callers_with_edges, jl_array_t *edges)
-{
-    size_t l = edges ? jl_array_len(edges) / 2 : 0;
-    for (size_t i = 0; i < l; i++) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
-        ptrhash_put(callers_with_edges, (void*)caller, (void*)caller);
+            jl_append_method_roots(m, key, roots);
+        }
     }
+    return failed;
 }
 
 static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *depmods)
@@ -1165,7 +953,7 @@ static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *depmods)
         return jl_get_exceptionf(jl_errorexception_type,
                 "Main module uuid state is invalid for module deserialization.");
     }
-    size_t i, l = jl_array_len(depmods);
+    size_t i, l = jl_array_nrows(depmods);
     for (i = 0; ; i++) {
         size_t len = read_int32(s);
         if (len == 0 && i == l)
@@ -1227,11 +1015,11 @@ static jl_array_t *image_to_depmodidx(jl_array_t *depmods)
 {
     if (!depmods)
         return NULL;
-    assert(jl_array_len(depmods) < INT32_MAX && "too many dependencies to serialize");
+    assert(jl_array_nrows(depmods) < INT32_MAX && "too many dependencies to serialize");
     size_t lbids = n_linkage_blobs();
-    size_t ldeps = jl_array_len(depmods);
+    size_t ldeps = jl_array_nrows(depmods);
     jl_array_t *depmodidxs = jl_alloc_array_1d(jl_array_int32_type, lbids);
-    int32_t *dmidxs = (int32_t*)jl_array_data(depmodidxs);
+    int32_t *dmidxs = jl_array_data(depmodidxs, int32_t);
     memset(dmidxs, -1, lbids * sizeof(int32_t));
     dmidxs[0] = 0; // the sysimg can also be found at idx 0, by construction
     for (size_t i = 0, j = 0; i < ldeps; i++) {
@@ -1251,9 +1039,9 @@ static jl_array_t *depmod_to_imageidx(jl_array_t *depmods)
 {
     if (!depmods)
         return NULL;
-    size_t ldeps = jl_array_len(depmods);
+    size_t ldeps = jl_array_nrows(depmods);
     jl_array_t *imageidxs = jl_alloc_array_1d(jl_array_int32_type, ldeps + 1);
-    int32_t *imgidxs = (int32_t*)jl_array_data(imageidxs);
+    int32_t *imgidxs = jl_array_data(imageidxs, int32_t);
     imgidxs[0] = 0;
     for (size_t i = 0; i < ldeps; i++) {
         jl_value_t *depmod = jl_array_ptr_ref(depmods, i);
diff --git a/src/subtype.c b/src/subtype.c
index 5b05bb288ffc4..e7a426ae7bc79 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -39,20 +39,24 @@ extern "C" {
 // Union type decision points are discovered while the algorithm works.
 // If a new Union decision is encountered, the `more` flag is set to tell
 // the forall/exists loop to grow the stack.
-// TODO: the stack probably needs to be artificially large because of some
-// deeper problem (see #21191) and could be shrunk once that is fixed
+
+typedef struct jl_bits_stack_t {
+    uint32_t data[16];
+    struct jl_bits_stack_t *next;
+} jl_bits_stack_t;
+
 typedef struct {
     int16_t depth;
     int16_t more;
     int16_t used;
-    uint32_t stack[100];  // stack of bits represented as a bit vector
+    jl_bits_stack_t stack;
 } jl_unionstate_t;
 
 typedef struct {
     int16_t depth;
     int16_t more;
     int16_t used;
-    void *stack;
+    uint8_t *stack;
 } jl_saved_unionstate_t;
 
 // Linked list storing the type variable environment. A new jl_varbinding_t
@@ -61,14 +65,15 @@ typedef struct {
 // Most of the complexity is due to the "diagonal rule", requiring us to
 // identify which type vars range over only concrete types.
 typedef struct jl_varbinding_t {
-    jl_tvar_t *var;
-    jl_value_t *lb;
-    jl_value_t *ub;
+    jl_tvar_t *var; // store NULL to "delete" this from env (temporarily)
+    jl_value_t *JL_NONNULL lb;
+    jl_value_t *JL_NONNULL ub;
     int8_t right;       // whether this variable came from the right side of `A <: B`
-    int8_t occurs;      // occurs in any position
     int8_t occurs_inv;  // occurs in invariant position
     int8_t occurs_cov;  // # of occurrences in covariant position
     int8_t concrete;    // 1 if another variable has a constraint forcing this one to be concrete
+    int8_t max_offset;  // record the maximum positive offset of the variable (up to 32)
+                        // max_offset < 0 if this variable occurs outside VarargNum.
     // constraintkind: in covariant position, we try three different ways to compute var ∩ type:
     // let ub = var.ub ∩ type
     // 0 - var.ub <: type ? var : ub
@@ -77,6 +82,7 @@ typedef struct jl_varbinding_t {
     int8_t constraintkind;
     int8_t intvalued; // intvalued: must be integer-valued; i.e. occurs as N in Vararg{_,N}
     int8_t limited;
+    int8_t intersected; // whether this variable has been intersected
     int16_t depth0;         // # of invariant constructors nested around the UnionAll type for this var
     // array of typevars that our bounds depend on, whose UnionAlls need to be
     // moved outside ours.
@@ -84,6 +90,14 @@ typedef struct jl_varbinding_t {
     struct jl_varbinding_t *prev;
 } jl_varbinding_t;
 
+typedef struct jl_ivarbinding_t {
+    jl_tvar_t **var;
+    jl_value_t **lb;
+    jl_value_t **ub;
+    jl_varbinding_t *root;
+    struct jl_ivarbinding_t *next;
+} jl_ivarbinding_t;
+
 // subtype algorithm state
 typedef struct jl_stenv_t {
     // N.B.: varbindings are created on the stack and rooted there
@@ -121,37 +135,111 @@ static jl_varbinding_t *lookup(jl_stenv_t *e, jl_tvar_t *v) JL_GLOBALLY_ROOTED J
 }
 #endif
 
+// union-stack tools
+
 static int statestack_get(jl_unionstate_t *st, int i) JL_NOTSAFEPOINT
 {
-    assert(i >= 0 && i < sizeof(st->stack) * 8);
+    assert(i >= 0 && i < 32767); // limited by the depth bit.
     // get the `i`th bit in an array of 32-bit words
-    return (st->stack[i>>5] & (1u<<(i&31))) != 0;
+    jl_bits_stack_t *stack = &st->stack;
+    while (i >= sizeof(stack->data) * 8) {
+        // We should have set this bit.
+        assert(stack->next);
+        stack = stack->next;
+        i -= sizeof(stack->data) * 8;
+    }
+    return (stack->data[i>>5] & (1u<<(i&31))) != 0;
 }
 
 static void statestack_set(jl_unionstate_t *st, int i, int val) JL_NOTSAFEPOINT
 {
-    assert(i >= 0 && i < sizeof(st->stack) * 8);
+    assert(i >= 0 && i < 32767); // limited by the depth bit.
+    jl_bits_stack_t *stack = &st->stack;
+    while (i >= sizeof(stack->data) * 8) {
+        if (__unlikely(stack->next == NULL)) {
+            stack->next = (jl_bits_stack_t *)malloc(sizeof(jl_bits_stack_t));
+            stack->next->next = NULL;
+        }
+        stack = stack->next;
+        i -= sizeof(stack->data) * 8;
+    }
     if (val)
-        st->stack[i>>5] |= (1u<<(i&31));
+        stack->data[i>>5] |= (1u<<(i&31));
     else
-        st->stack[i>>5] &= ~(1u<<(i&31));
+        stack->data[i>>5] &= ~(1u<<(i&31));
+}
+
+#define has_next_union_state(e, R) ((((R) ? &(e)->Runions : &(e)->Lunions)->more) != 0)
+
+static int next_union_state(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
+{
+    jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
+    if (state->more == 0)
+        return 0;
+    // reset `used` and let `pick_union_decision` clean the stack.
+    state->used = state->more;
+    statestack_set(state, state->used - 1, 1);
+    return 1;
 }
 
-#define push_unionstate(saved, src)                                     \
-    do {                                                                \
-        (saved)->depth = (src)->depth;                                  \
-        (saved)->more = (src)->more;                                    \
-        (saved)->used = (src)->used;                                    \
-        (saved)->stack = alloca(((src)->used+7)/8);                     \
-        memcpy((saved)->stack, &(src)->stack, ((src)->used+7)/8);       \
+static int pick_union_decision(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
+{
+    jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
+    if (state->depth >= state->used) {
+        statestack_set(state, state->used, 0);
+        state->used++;
+    }
+    int ui = statestack_get(state, state->depth);
+    state->depth++;
+    if (ui == 0)
+        state->more = state->depth; // memorize that this was the deepest available choice
+    return ui;
+}
+
+static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
+{
+    do {
+        if (pick_union_decision(e, R))
+            u = ((jl_uniontype_t*)u)->b;
+        else
+            u = ((jl_uniontype_t*)u)->a;
+    } while (jl_is_uniontype(u));
+    return u;
+}
+
+#define push_unionstate(saved, src)                                  \
+    do {                                                             \
+        (saved)->depth = (src)->depth;                               \
+        (saved)->more = (src)->more;                                 \
+        (saved)->used = (src)->used;                                 \
+        jl_bits_stack_t *srcstack = &(src)->stack;                   \
+        int pushbits = ((saved)->used+7)/8;                          \
+        (saved)->stack = (uint8_t *)alloca(pushbits);                \
+        for (int n = 0; n < pushbits; n += sizeof(srcstack->data)) { \
+            assert(srcstack != NULL);                                \
+            int rest = pushbits - n;                                 \
+            if (rest > sizeof(srcstack->data))                       \
+                rest = sizeof(srcstack->data);                       \
+            memcpy(&(saved)->stack[n], &srcstack->data, rest);       \
+            srcstack = srcstack->next;                               \
+        }                                                            \
     } while (0);
 
-#define pop_unionstate(dst, saved)                                      \
-    do {                                                                \
-        (dst)->depth = (saved)->depth;                                  \
-        (dst)->more = (saved)->more;                                    \
-        (dst)->used = (saved)->used;                                    \
-        memcpy(&(dst)->stack, (saved)->stack, ((saved)->used+7)/8);     \
+#define pop_unionstate(dst, saved)                                  \
+    do {                                                            \
+        (dst)->depth = (saved)->depth;                              \
+        (dst)->more = (saved)->more;                                \
+        (dst)->used = (saved)->used;                                \
+        jl_bits_stack_t *dststack = &(dst)->stack;                  \
+        int popbits = ((saved)->used+7)/8;                          \
+        for (int n = 0; n < popbits; n += sizeof(dststack->data)) { \
+            assert(dststack != NULL);                               \
+            int rest = popbits - n;                                 \
+            if (rest > sizeof(dststack->data))                      \
+                rest = sizeof(dststack->data);                      \
+            memcpy(&dststack->data, &(saved)->stack[n], rest);      \
+            dststack = dststack->next;                              \
+        }                                                           \
     } while (0);
 
 static int current_env_length(jl_stenv_t *e)
@@ -170,7 +258,7 @@ typedef struct {
     int rdepth;
     int8_t _space[24]; // == 8 * 3
     jl_gcframe_t gcframe;
-    jl_value_t *roots[24];
+    jl_value_t *roots[24]; // == 8 * 3
 } jl_savedenv_t;
 
 static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
@@ -186,7 +274,7 @@ static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
         }
         else {
             roots = se->roots;
-            nroots = se->gcframe.nroots >> 2;
+            nroots = JL_GC_DECODE_NROOTS(se->gcframe.nroots);
         }
     }
     jl_varbinding_t *v = e->vars;
@@ -197,9 +285,9 @@ static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
             roots[i++] = v->ub;
             roots[i++] = (jl_value_t*)v->innervars;
         }
-        se->buf[j++] = v->occurs;
         se->buf[j++] = v->occurs_inv;
         se->buf[j++] = v->occurs_cov;
+        se->buf[j++] = v->max_offset;
         v = v->prev;
     }
     assert(i == nroots); (void)nroots;
@@ -254,9 +342,21 @@ static void free_env(jl_savedenv_t *se) JL_NOTSAFEPOINT
     se->buf = NULL;
 }
 
+static void free_stenv(jl_stenv_t *e) JL_NOTSAFEPOINT
+{
+    for (int R = 0; R < 2; R++) {
+        jl_bits_stack_t *temp = R ? e->Runions.stack.next : e->Lunions.stack.next;
+        while (temp != NULL) {
+            jl_bits_stack_t *next = temp->next;
+            free(temp);
+            temp = next;
+        }
+    }
+}
+
 static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPOINT
 {
-    jl_value_t **roots = NULL;
+    jl_value_t *JL_NONNULL *roots = NULL;
     int nroots = 0;
     if (root) {
         if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
@@ -267,7 +367,7 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO
         }
         else {
             roots = se->roots;
-            nroots = se->gcframe.nroots >> 2;
+            nroots = JL_GC_DECODE_NROOTS(se->gcframe.nroots);
         }
     }
     jl_varbinding_t *v = e->vars;
@@ -278,9 +378,9 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO
             v->ub = roots[i++];
             v->innervars = (jl_array_t*)roots[i++];
         }
-        v->occurs = se->buf[j++];
         v->occurs_inv = se->buf[j++];
         v->occurs_cov = se->buf[j++];
+        v->max_offset = se->buf[j++];
         v = v->prev;
     }
     assert(i == nroots); (void)nroots;
@@ -289,15 +389,6 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO
         memset(&e->envout[e->envidx], 0, (e->envsz - e->envidx)*sizeof(void*));
 }
 
-static void clean_occurs(jl_stenv_t *e)
-{
-    jl_varbinding_t *v = e->vars;
-    while (v) {
-        v->occurs = 0;
-        v = v->prev;
-    }
-}
-
 #define flip_offset(e) ((e)->Loffset *= -1)
 
 // type utilities
@@ -586,42 +677,6 @@ static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b, int overesi)
 
 static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param);
 
-static int next_union_state(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
-{
-    jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
-    if (state->more == 0)
-        return 0;
-    // reset `used` and let `pick_union_decision` clean the stack.
-    state->used = state->more;
-    statestack_set(state, state->used - 1, 1);
-    return 1;
-}
-
-static int pick_union_decision(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
-{
-    jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
-    if (state->depth >= state->used) {
-        statestack_set(state, state->used, 0);
-        state->used++;
-    }
-    int ui = statestack_get(state, state->depth);
-    state->depth++;
-    if (ui == 0)
-        state->more = state->depth; // memorize that this was the deepest available choice
-    return ui;
-}
-
-static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
-{
-    do {
-        if (pick_union_decision(e, R))
-            u = ((jl_uniontype_t*)u)->b;
-        else
-            u = ((jl_uniontype_t*)u)->a;
-    } while (jl_is_uniontype(u));
-    return u;
-}
-
 static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int limit_slow);
 
 // subtype for variable bounds consistency check. needs its own forall/exists environment.
@@ -666,8 +721,6 @@ static int subtype_left_var(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int par
 // of determining whether the variable is concrete.
 static void record_var_occurrence(jl_varbinding_t *vb, jl_stenv_t *e, int param) JL_NOTSAFEPOINT
 {
-    if (vb != NULL)
-        vb->occurs = 1;
     if (vb != NULL && param) {
         // saturate counters at 2; we don't need values bigger than that
         if (param == 2 && e->invdepth > vb->depth0) {
@@ -677,6 +730,10 @@ static void record_var_occurrence(jl_varbinding_t *vb, jl_stenv_t *e, int param)
         else if (vb->occurs_cov < 2) {
             vb->occurs_cov++;
         }
+        // Always set `max_offset` to `-1` during the 1st round intersection.
+        // Would be recovered in `intersect_varargs`/`subtype_tuple_varargs` if needed.
+        if (!vb->intersected)
+            vb->max_offset = -1;
     }
 }
 
@@ -796,7 +853,7 @@ static int subtype_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int R, int pa
 // check that a type is concrete or quasi-concrete (Type{T}).
 // this is used to check concrete typevars:
 // issubtype is false if the lower bound of a concrete type var is not concrete.
-static int is_leaf_bound(jl_value_t *v) JL_NOTSAFEPOINT
+int is_leaf_bound(jl_value_t *v) JL_NOTSAFEPOINT
 {
     if (v == jl_bottom_type)
         return 1;
@@ -846,7 +903,7 @@ static jl_value_t *fix_inferred_var_bound(jl_tvar_t *var, jl_value_t *ty JL_MAYB
         JL_GC_PUSH2(&ans, &vs);
         vs = jl_find_free_typevars(ty);
         int i;
-        for (i = 0; i < jl_array_len(vs); i++) {
+        for (i = 0; i < jl_array_nrows(vs); i++) {
             ans = jl_type_unionall((jl_tvar_t*)jl_array_ptr_ref(vs, i), ans);
         }
         ans = (jl_value_t*)jl_new_typevar(var->name, jl_bottom_type, ans);
@@ -872,10 +929,20 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
     // in the environment, rename to get a fresh var.
     JL_GC_PUSH1(&u);
     while (btemp != NULL) {
-        if (btemp->var == u->var ||
-            // outer var can only refer to inner var if bounds changed
+        int aliased = btemp->var == u->var ||
+            // outer var can only refer to inner var if bounds changed (mainly for subtyping path)
             (btemp->lb != btemp->var->lb && jl_has_typevar(btemp->lb, u->var)) ||
-            (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var))) {
+            (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var));
+        if (!aliased && btemp->innervars != NULL) {
+            for (size_t i = 0; i < jl_array_len(btemp->innervars); i++) {
+                jl_tvar_t *ivar = (jl_tvar_t*)jl_array_ptr_ref(btemp->innervars, i);
+                if (ivar == u->var) {
+                    aliased = 1;
+                    break;
+                }
+            }
+        }
+        if (aliased) {
             u = jl_rename_unionall(u);
             break;
         }
@@ -888,7 +955,7 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
     u = unalias_unionall(u, e);
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0,
                            e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
@@ -941,8 +1008,8 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8
             jl_value_t *vl = btemp->lb;
             // TODO: this takes a significant amount of time
             if (btemp->depth0 != vb.depth0 &&
-                ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_invariant(vu, vb.var)) ||
-                 (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_invariant(vl, vb.var)))) {
+                ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_inside(vu, vb.var, 0, 0)) ||
+                 (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_inside(vl, vb.var, 0, 0)))) {
                 ans = 0; break;
             }
             btemp = btemp->prev;
@@ -953,7 +1020,7 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8
     if (R && ans && e->envidx < e->envsz) {
         jl_value_t *val;
         if (vb.intvalued && vb.lb == (jl_value_t*)jl_any_type)
-            val = (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0); // special token result that represents N::Int in the envout
+            val = (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0, 0); // special token result that represents N::Int in the envout
         else if (!vb.occurs_inv && vb.lb != jl_bottom_type)
             val = is_leaf_bound(vb.lb) ? vb.lb : (jl_value_t*)jl_new_typevar(u->var->name, jl_bottom_type, vb.lb);
         else if (vb.lb == vb.ub)
@@ -1002,45 +1069,37 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
 
 static int subtype_tuple_varargs(
     jl_vararg_t *vtx, jl_vararg_t *vty,
-    size_t vx, size_t vy,
+    jl_value_t *lastx, jl_value_t *lasty,
+    size_t vx, size_t vy, size_t x_reps,
     jl_stenv_t *e, int param)
 {
     jl_value_t *xp0 = jl_unwrap_vararg(vtx); jl_value_t *xp1 = jl_unwrap_vararg_num(vtx);
     jl_value_t *yp0 = jl_unwrap_vararg(vty); jl_value_t *yp1 = jl_unwrap_vararg_num(vty);
 
+    jl_varbinding_t *xlv = NULL, *ylv = NULL;
+    if (xp1 && jl_is_typevar(xp1))
+        xlv = lookup(e, (jl_tvar_t*)xp1);
+    if (yp1 && jl_is_typevar(yp1))
+        ylv = lookup(e, (jl_tvar_t*)yp1);
+
+    int8_t max_offsetx = xlv ? xlv->max_offset : 0;
+    int8_t max_offsety = ylv ? ylv->max_offset : 0;
+
+    jl_value_t *xl = xlv ? xlv->lb : xp1;
+    jl_value_t *yl = ylv ? ylv->lb : yp1;
+
     if (!xp1) {
-        jl_value_t *yl = yp1;
-        if (yl) {
-            // Unconstrained on the left, constrained on the right
-            if (jl_is_typevar(yl)) {
-                jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
-                if (ylv)
-                    yl = ylv->lb;
-            }
-            if (jl_is_long(yl)) {
-                return 0;
-            }
-        }
+        // Unconstrained on the left, constrained on the right
+        if (yl && jl_is_long(yl))
+            return 0;
     }
     else {
-        jl_value_t *xl = jl_unwrap_vararg_num(vtx);
-        if (jl_is_typevar(xl)) {
-            jl_varbinding_t *xlv = lookup(e, (jl_tvar_t*)xl);
-            if (xlv)
-                xl = xlv->lb;
-        }
         if (jl_is_long(xl)) {
             if (jl_unbox_long(xl) + 1 == vx) {
                 // LHS is exhausted. We're a subtype if the RHS is either
                 // exhausted as well or unbounded (in which case we need to
                 // set it to 0).
-                jl_value_t *yl = jl_unwrap_vararg_num(vty);
                 if (yl) {
-                    if (jl_is_typevar(yl)) {
-                        jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
-                        if (ylv)
-                            yl = ylv->lb;
-                    }
                     if (jl_is_long(yl)) {
                         return jl_unbox_long(yl) + 1 == vy;
                     }
@@ -1053,13 +1112,32 @@ static int subtype_tuple_varargs(
             }
         }
     }
-
-    // in Vararg{T1} <: Vararg{T2}, need to check subtype twice to
-    // simulate the possibility of multiple arguments, which is needed
-    // to implement the diagonal rule correctly.
-    if (!subtype(xp0, yp0, e, param)) return 0;
-    if (!subtype(xp0, yp0, e, 1)) return 0;
-
+    {
+        int x_same = vx > 1 || (lastx && obviously_egal(xp0, lastx));
+        int y_same = vy > 1 || (lasty && obviously_egal(yp0, lasty));
+        // keep track of number of consecutive identical subtyping
+        x_reps = y_same && x_same ? x_reps + 1 : 1;
+        if (x_reps > 2) {
+            // an identical type on the left doesn't need to be compared to the same
+            // element type on the right more than twice.
+        }
+        else if (x_same && e->Runions.depth == 0 && y_same &&
+            !jl_has_free_typevars(xp0) && !jl_has_free_typevars(yp0)) {
+            // fast path for repeated elements
+        }
+        else if ((e->Runions.depth == 0 ? !jl_has_free_typevars(xp0) : jl_is_concrete_type(xp0)) && !jl_has_free_typevars(yp0)) {
+            // fast path for separable sub-formulas
+            if (!jl_subtype(xp0, yp0))
+                return 0;
+        }
+        else {
+            // in Vararg{T1} <: Vararg{T2}, need to check subtype twice to
+            // simulate the possibility of multiple arguments, which is needed
+            // to implement the diagonal rule correctly.
+            if (!subtype(xp0, yp0, e, param)) return 0;
+            if (x_reps < 2 && !subtype(xp0, yp0, e, 1)) return 0;
+        }
+    }
 constrain_length:
     if (!yp1) {
         return 1;
@@ -1090,6 +1168,8 @@ static int subtype_tuple_varargs(
         // appropriately.
         e->invdepth++;
         int ans = subtype((jl_value_t*)jl_any_type, yp1, e, 2);
+        if (ylv && !ylv->intersected)
+            ylv->max_offset = max_offsety;
         e->invdepth--;
         return ans;
     }
@@ -1103,12 +1183,14 @@ static int subtype_tuple_varargs(
     if (bxp1) {
         if (bxp1->intvalued == 0)
             bxp1->intvalued = 1;
+        assert(bxp1->lb); // make static analyzer happy
         if (jl_is_long(bxp1->lb))
             xp1 = bxp1->lb;
     }
     if (byp1) {
         if (byp1->intvalued == 0)
             byp1->intvalued = 1;
+        assert(byp1->lb); // make static analyzer happy
         if (jl_is_long(byp1->lb))
             yp1 = byp1->lb;
     }
@@ -1130,6 +1212,10 @@ static int subtype_tuple_varargs(
         e->Loffset = 0;
     }
     JL_GC_POP();
+    if (ylv && !ylv->intersected)
+        ylv->max_offset = max_offsety;
+    if (xlv && !xlv->intersected)
+        xlv->max_offset = max_offsetx;
     e->invdepth--;
     return ans;
 }
@@ -1182,7 +1268,8 @@ static int subtype_tuple_tail(jl_datatype_t *xd, jl_datatype_t *yd, int8_t R, jl
             return subtype_tuple_varargs(
                 (jl_vararg_t*)xi,
                 (jl_vararg_t*)yi,
-                vx, vy, e, param);
+                lastx, lasty,
+                vx, vy, x_reps, e, param);
         }
 
         if (j >= ly)
@@ -1203,7 +1290,7 @@ static int subtype_tuple_tail(jl_datatype_t *xd, jl_datatype_t *yd, int8_t R, jl
              (yi == lastx && !vx && vy && jl_is_concrete_type(xi)))) {
             // fast path for repeated elements
         }
-        else if (e->Runions.depth == 0 && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) {
+        else if ((e->Runions.depth == 0 ? !jl_has_free_typevars(xi) : jl_is_concrete_type(xi)) && !jl_has_free_typevars(yi)) {
             // fast path for separable sub-formulas
             if (!jl_subtype(xi, yi))
                 return 0;
@@ -1291,6 +1378,9 @@ static int subtype_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, in
     return ans;
 }
 
+static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e);
+static int has_exists_typevar(jl_value_t *x, jl_stenv_t *e) JL_NOTSAFEPOINT;
+
 // `param` means we are currently looking at a parameter of a type constructor
 // (as opposed to being outside any type constructor, or comparing variable bounds).
 // this is used to record the positions where type variables occur for the
@@ -1298,11 +1388,35 @@ static int subtype_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, in
 static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
 {
     if (jl_is_uniontype(x)) {
-        if (x == y) return 1;
+        if (obviously_egal(x, y))
+            return 1;
+        if (e->Runions.depth == 0 && jl_is_typevar(y) && !jl_has_free_typevars(x)) {
+            // Similar to fast path for repeated elements: if there have been no outer
+            // unions on the right, and the right side is a typevar, then we can handle the
+            // typevar first before picking a union element, under the theory that it may
+            // be easy to match or reject this whole union in comparing and setting the lb
+            // and ub of the variable binding, without needing to examine each element.
+            // However, if x contains any free typevars, then each element with a free
+            // typevar must be handled separately from the union of all elements without
+            // free typevars, since the typevars presence might lead to those elements
+            // getting eliminated (omit_bad_union) or degenerate (Union{Ptr{T}, Ptr}) or
+            // combined (Union{T, S} where {T, S <: T}).
+            jl_tvar_t *yvar = (jl_tvar_t *)y;
+            jl_varbinding_t *yb = lookup(e, yvar);
+            while (e->intersection && yb != NULL && yb->lb == yb->ub && jl_is_typevar(yb->lb)) {
+                yvar = (jl_tvar_t *)yb->lb;
+                yb = lookup(e, yvar);
+            }
+            // Note: `x <: ∃y` performs a local ∀-∃ check between `x` and `yb->ub`.
+            // We need to ensure that there's no ∃ typevar as otherwise that check
+            // might cause false alarm due to the accumulated env change.
+            if (yb == NULL || yb->right == 0 || !has_exists_typevar(yb->ub, e))
+                return subtype_var(yvar, x, e, 1, param);
+        }
         x = pick_union_element(x, e, 0);
     }
     if (jl_is_uniontype(y)) {
-        if (x == ((jl_uniontype_t*)y)->a || x == ((jl_uniontype_t*)y)->b)
+        if (obviously_in_union(y, x))
             return 1;
         if (jl_is_unionall(x))
             return subtype_unionall(y, (jl_unionall_t*)x, e, 0, param);
@@ -1341,7 +1455,8 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
                 if (yy) record_var_occurrence(yy, e, param);
                 if (yr) {
                     record_var_occurrence(xx, e, param);
-                    return subtype(xx->lb, yy->ub, e, 0);
+                    int trysub = e->intersection ? try_subtype_by_bounds(xx->lb, yy->ub, e) : 0;
+                    return trysub || subtype(xx->lb, yy->ub, e, 0);
                 }
                 return var_lt((jl_tvar_t*)x, y, e, param);
             }
@@ -1356,11 +1471,14 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
         }
         if (jl_is_unionall(y)) {
             jl_varbinding_t *xb = lookup(e, (jl_tvar_t*)x);
-            if (xb == NULL ? !e->ignore_free : !xb->right) {
+            jl_value_t *xub = xb == NULL ? ((jl_tvar_t *)x)->ub : xb->ub;
+            if ((xb == NULL ? !e->ignore_free : !xb->right) && xub != y) {
                 // We'd better unwrap `y::UnionAll` eagerly if `x` isa ∀-var.
                 // This makes sure the following cases work correct:
                 // 1) `∀T <: Union{∃S, SomeType{P}} where {P}`: `S == Any` ==> `S >: T`
                 // 2) `∀T <: Union{∀T, SomeType{P}} where {P}`:
+                // note: if xub == y we'd better try `subtype_var` as `subtype_left_var`
+                // hit `==` based fast path.
                 return subtype_unionall(x, (jl_unionall_t*)y, e, 1, param);
             }
         }
@@ -1473,39 +1591,33 @@ static int is_definite_length_tuple_type(jl_value_t *x)
     return k == JL_VARARG_NONE || k == JL_VARARG_INT;
 }
 
-static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int *count, int *noRmore);
-
-static int may_contain_union_decision(jl_value_t *x, jl_stenv_t *e, jl_typeenv_t *log) JL_NOTSAFEPOINT
+static int is_exists_typevar(jl_value_t *x, jl_stenv_t *e)
 {
-    if (x == NULL || x == (jl_value_t*)jl_any_type || x == jl_bottom_type)
+    if (!jl_is_typevar(x))
         return 0;
-    if (jl_is_unionall(x))
-        return may_contain_union_decision(((jl_unionall_t *)x)->body, e, log);
-    if (jl_is_datatype(x)) {
-        jl_datatype_t *xd = (jl_datatype_t *)x;
-        for (int i = 0; i < jl_nparams(xd); i++) {
-            jl_value_t *param = jl_tparam(xd, i);
-            if (jl_is_vararg(param))
-                param = jl_unwrap_vararg(param);
-            if (may_contain_union_decision(param, e, log))
-                return 1;
+    jl_varbinding_t *vb = lookup(e, (jl_tvar_t *)x);
+    return vb && vb->right;
+}
+
+static int has_exists_typevar(jl_value_t *x, jl_stenv_t *e) JL_NOTSAFEPOINT
+{
+    jl_typeenv_t *env = NULL;
+    jl_varbinding_t *v = e->vars;
+    while (v != NULL) {
+        if (v->right) {
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = v->var;
+            newenv->val = NULL;
+            newenv->prev = env;
+            env = newenv;
         }
-        return 0;
-    }
-    if (!jl_is_typevar(x))
-        return jl_is_type(x);
-    jl_typeenv_t *t = log;
-    while (t != NULL) {
-        if (x == (jl_value_t *)t->var)
-            return 1;
-        t = t->prev;
+        v = v->prev;
     }
-    jl_typeenv_t newlog = { (jl_tvar_t*)x, NULL, log };
-    jl_varbinding_t *xb = lookup(e, (jl_tvar_t *)x);
-    return may_contain_union_decision(xb ? xb->lb : ((jl_tvar_t *)x)->lb, e, &newlog) ||
-           may_contain_union_decision(xb ? xb->ub : ((jl_tvar_t *)x)->ub, e, &newlog);
+    return env != NULL && jl_has_bound_typevars(x, env);
 }
 
+static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param);
+
 static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int limit_slow)
 {
     int16_t oldRmore = e->Runions.more;
@@ -1517,25 +1629,20 @@ static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t
     int kindy = !jl_has_free_typevars(y);
     if (kindx && kindy)
         return jl_subtype(x, y);
-    if (may_contain_union_decision(y, e, NULL) && pick_union_decision(e, 1) == 0) {
+    int has_exists = (!kindx && has_exists_typevar(x, e)) ||
+                     (!kindy && has_exists_typevar(y, e));
+    if (!has_exists) {
+        // We can use ∀_∃_subtype safely for ∃ free inputs.
+        // This helps to save some bits in union stack.
         jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
         e->Lunions.used = e->Runions.used = 0;
         e->Lunions.depth = e->Runions.depth = 0;
         e->Lunions.more = e->Runions.more = 0;
-        int count = 0, noRmore = 0;
-        sub = _forall_exists_subtype(x, y, e, param, &count, &noRmore);
+        sub = forall_exists_subtype(x, y, e, param);
         pop_unionstate(&e->Runions, &oldRunions);
-        // we should not try the slow path if `forall_exists_subtype` has tested all cases;
-        // Once limit_slow == 1, also skip it if
-        // 1) `forall_exists_subtype` return false
-        // 2) the left `Union` looks big
-        if (limit_slow == -1)
-            limit_slow = kindx || kindy;
-        if (noRmore || (limit_slow && (count > 3  || !sub)))
-            e->Runions.more = oldRmore;
+        return sub;
     }
-    else {
-        // slow path
+    if (is_exists_typevar(x, e) != is_exists_typevar(y, e)) {
         e->Lunions.used = 0;
         while (1) {
             e->Lunions.more = 0;
@@ -1544,10 +1651,90 @@ static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t
             if (!sub || !next_union_state(e, 0))
                 break;
         }
+        return sub;
     }
+    if (limit_slow == -1)
+        limit_slow = kindx || kindy;
+    jl_savedenv_t se;
+    save_env(e, &se, 1);
+    int count, limited = 0, ini_count = 0;
+    jl_saved_unionstate_t latestLunions = {0, 0, 0, NULL};
+    while (1) {
+        count = ini_count;
+        if (ini_count == 0)
+            e->Lunions.used = 0;
+        else
+            pop_unionstate(&e->Lunions, &latestLunions);
+        while (1) {
+            e->Lunions.more = 0;
+            e->Lunions.depth = 0;
+            if (count < 4) count++;
+            sub = subtype(x, y, e, param);
+            if (limit_slow && count == 4)
+                limited = 1;
+            if (!sub || !next_union_state(e, 0))
+                break;
+            if (limited || e->Runions.more == oldRmore) {
+                // re-save env and freeze the ∃decision for previous ∀Union
+                // Note: We could ignore the rest `∃Union` decisions if `x` and `y`
+                // contain no ∃ typevar, as they have no effect on env.
+                ini_count = count;
+                push_unionstate(&latestLunions, &e->Lunions);
+                re_save_env(e, &se, 1);
+                e->Runions.more = oldRmore;
+            }
+        }
+        if (sub || e->Runions.more == oldRmore)
+            break;
+        assert(e->Runions.more > oldRmore);
+        next_union_state(e, 1);
+        restore_env(e, &se, 1); // also restore Rdepth here
+        e->Runions.more = oldRmore;
+    }
+    if (!sub)
+        assert(e->Runions.more == oldRmore);
+    else if (limited)
+        e->Runions.more = oldRmore;
+    free_env(&se);
     return sub;
 }
 
+static int equal_var(jl_tvar_t *v, jl_value_t *x, jl_stenv_t *e)
+{
+    assert(e->Loffset == 0);
+    // Theoretically bounds change would be merged for union inputs.
+    // But intersection is not happy as splitting helps to avoid circular env.
+    assert(!e->intersection || !jl_is_uniontype(x));
+    jl_varbinding_t *vb = lookup(e, v);
+    if (e->intersection && vb != NULL && vb->lb == vb->ub && jl_is_typevar(vb->lb))
+        return equal_var((jl_tvar_t *)vb->lb, x, e);
+    record_var_occurrence(vb, e, 2);
+    if (vb == NULL)
+        return e->ignore_free || (
+            local_forall_exists_subtype(x, v->lb, e, 2, !jl_has_free_typevars(x)) &&
+            local_forall_exists_subtype(v->ub, x, e, 0, 0));
+    if (!vb->right)
+        return local_forall_exists_subtype(x, vb->lb, e, 2, !jl_has_free_typevars(x)) &&
+               local_forall_exists_subtype(vb->ub, x, e, 0, 0);
+    if (vb->lb == x)
+        return var_lt(v, x, e, 0);
+    if (!subtype_ccheck(x, vb->ub, e))
+        return 0;
+    jl_value_t *lb = simple_join(vb->lb, x);
+    JL_GC_PUSH1(&lb);
+    if (!e->intersection || !jl_is_typevar(lb) || !reachable_var(lb, v, e))
+        vb->lb = lb;
+    JL_GC_POP();
+    if (vb->ub == x)
+        return 1;
+    if (!subtype_ccheck(vb->lb, x, e))
+        return 0;
+    // skip `simple_meet` here as we have proven `x <: vb->ub`
+    if (!e->intersection || !reachable_var(x, v, e))
+        vb->ub = x;
+    return 1;
+}
+
 static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     if (obviously_egal(x, y)) return 1;
@@ -1578,6 +1765,12 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
     }
 
+    if (e->Loffset == 0 && jl_is_typevar(y) && jl_is_type(x) && (!e->intersection || !jl_is_uniontype(x))) {
+        // Fastpath for Type == TypeVar.
+        // Avoid duplicated `<:` check between adjacent `var_gt` and `var_lt`
+        return equal_var((jl_tvar_t *)y, x, e);
+    }
+
     jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
 
     int sub = local_forall_exists_subtype(x, y, e, 2, -1);
@@ -1614,7 +1807,7 @@ static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_savede
     }
 }
 
-static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int *count, int *noRmore)
+static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
 {
     // The depth recursion has the following shape, after simplification:
     // ∀₁
@@ -1626,12 +1819,8 @@ static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, i
 
     e->Lunions.used = 0;
     int sub;
-    if (count) *count = 0;
-    if (noRmore) *noRmore = 1;
     while (1) {
         sub = exists_subtype(x, y, e, &se, param);
-        if (count) *count = (*count < 4) ? *count + 1 : 4;
-        if (noRmore) *noRmore = *noRmore && e->Runions.more == 0;
         if (!sub || !next_union_state(e, 0))
             break;
         re_save_env(e, &se, 1);
@@ -1641,11 +1830,6 @@ static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, i
     return sub;
 }
 
-static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
-{
-    return _forall_exists_subtype(x, y, e, param, NULL, NULL);
-}
-
 static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
 {
     e->vars = NULL;
@@ -1665,6 +1849,8 @@ static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
     e->Lunions.depth = 0;      e->Runions.depth = 0;
     e->Lunions.more = 0;       e->Runions.more = 0;
     e->Lunions.used = 0;       e->Runions.used = 0;
+    e->Lunions.stack.next = NULL;
+    e->Runions.stack.next = NULL;
 }
 
 // subtyping entry points
@@ -1991,7 +2177,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
                     if (var_occurs_invariant(body, (jl_tvar_t*)b))
                         return 0;
                 }
-                if (nparams_expanded_x > npy && jl_is_typevar(b) && concrete_min(a1) > 1) {
+                if (nparams_expanded_x > npy && jl_is_typevar(b) && is_leaf_typevar((jl_tvar_t *)b) && concrete_min(a1) > 1) {
                     // diagonal rule for 2 or more elements: they must all be concrete on the LHS
                     *subtype = 0;
                     return 1;
@@ -2002,7 +2188,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
                 }
                 for (; i < nparams_expanded_x; i++) {
                     jl_value_t *a = (vx != JL_VARARG_NONE && i >= npx - 1) ? vxt : jl_tparam(x, i);
-                    if (i > npy && jl_is_typevar(b)) { // i == npy implies a == a1
+                    if (i > npy && jl_is_typevar(b) && is_leaf_typevar((jl_tvar_t *)b)) { // i == npy implies a == a1
                         // diagonal rule: all the later parameters are also constrained to be type-equal to the first
                         jl_value_t *a2 = a;
                         jl_value_t *au = jl_unwrap_unionall(a);
@@ -2094,6 +2280,7 @@ JL_DLLEXPORT int jl_subtype_env(jl_value_t *x, jl_value_t *y, jl_value_t **env,
     }
     init_stenv(&e, env, envsz);
     int subtype = forall_exists_subtype(x, y, &e, 0);
+    free_stenv(&e);
     assert(obvious_subtype == 3 || obvious_subtype == subtype || jl_has_free_typevars(x) || jl_has_free_typevars(y));
 #ifndef NDEBUG
     if (obvious_subtype == 0 || (obvious_subtype == 1 && envsz == 0))
@@ -2186,6 +2373,7 @@ JL_DLLEXPORT int jl_types_equal(jl_value_t *a, jl_value_t *b)
     {
         init_stenv(&e, NULL, 0);
         int subtype = forall_exists_subtype(a, b, &e, 0);
+        free_stenv(&e);
         assert(subtype_ab == 3 || subtype_ab == subtype || jl_has_free_typevars(a) || jl_has_free_typevars(b));
 #ifndef NDEBUG
         if (subtype_ab != 0 && subtype_ab != 1) // ensures that running in a debugger doesn't change the result
@@ -2202,6 +2390,7 @@ JL_DLLEXPORT int jl_types_equal(jl_value_t *a, jl_value_t *b)
     {
         init_stenv(&e, NULL, 0);
         int subtype = forall_exists_subtype(b, a, &e, 0);
+        free_stenv(&e);
         assert(subtype_ba == 3 || subtype_ba == subtype || jl_has_free_typevars(a) || jl_has_free_typevars(b));
 #ifndef NDEBUG
         if (subtype_ba != 0 && subtype_ba != 1) // ensures that running in a debugger doesn't change the result
@@ -2356,28 +2545,53 @@ static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e,
         return y;
     if (y == (jl_value_t*)jl_any_type && !jl_is_typevar(x))
         return x;
-    // band-aid for #46736
-    if (obviously_egal(x, y))
+    // band-aid for #46736 #56040
+    if (obviously_in_union(x, y))
+        return y;
+    if (obviously_in_union(y, x))
         return x;
 
+    jl_varbinding_t *vars = NULL;
+    jl_varbinding_t *bbprev = NULL;
+    jl_varbinding_t *xb = jl_is_typevar(x) ? lookup(e, (jl_tvar_t *)x) : NULL;
+    jl_varbinding_t *yb = jl_is_typevar(y) ? lookup(e, (jl_tvar_t *)y) : NULL;
+    int simple_x = !jl_has_free_typevars(!jl_is_typevar(x) ? x : xb ? xb->ub : ((jl_tvar_t *)x)->ub);
+    int simple_y = !jl_has_free_typevars(!jl_is_typevar(y) ? y : yb ? yb->ub : ((jl_tvar_t *)y)->ub);
+    if (simple_x && simple_y && !(xb && yb)) {
+        vars = e->vars;
+        e->vars = xb ? xb : yb;
+        if (e->vars != NULL) {
+            bbprev = e->vars->prev;
+            e->vars->prev = NULL;
+        }
+    }
     jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int savedepth = e->invdepth;
     e->invdepth = depth;
     jl_value_t *res = intersect_all(x, y, e);
     e->invdepth = savedepth;
     pop_unionstate(&e->Runions, &oldRunions);
+    if (bbprev) e->vars->prev = bbprev;
+    if (vars) e->vars = vars;
     return res;
 }
 
 static jl_value_t *intersect_union(jl_value_t *x, jl_uniontype_t *u, jl_stenv_t *e, int8_t R, int param)
 {
-    if (param == 2 || (!jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u))) {
+    int no_free = !jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u);
+    if (param == 2 || no_free) {
         jl_value_t *a=NULL, *b=NULL;
         JL_GC_PUSH2(&a, &b);
+        jl_varbinding_t *vars = NULL;
+        if (no_free) {
+            vars = e->vars;
+            e->vars = NULL;
+        }
         jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
         a = R ? intersect_all(x, u->a, e) : intersect_all(u->a, x, e);
         b = R ? intersect_all(x, u->b, e) : intersect_all(u->b, x, e);
         pop_unionstate(&e->Runions, &oldRunions);
+        if (vars) e->vars = vars;
         jl_value_t *i = simple_join(a,b);
         JL_GC_POP();
         return i;
@@ -2450,7 +2664,7 @@ static jl_value_t *bound_var_below(jl_tvar_t *tv, jl_varbinding_t *bb, jl_stenv_
 
 static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOTSAFEPOINT;
 
-// similar to `subtype_by_bounds`, used to avoid stack-overflow caused by circulation constraints.
+// similar to `subtype_by_bounds`, used to avoid stack-overflow caused by circular constraints.
 static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
 {
     if (jl_is_uniontype(a))
@@ -2459,22 +2673,21 @@ static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
     else if (jl_is_uniontype(b))
         return try_subtype_by_bounds(a, ((jl_uniontype_t *)b)->a, e) ||
                try_subtype_by_bounds(a, ((jl_uniontype_t *)b)->b, e);
-    else if (jl_egal(a, b))
+    else if (a == jl_bottom_type || b == (jl_value_t *)jl_any_type || obviously_egal(a, b))
         return 1;
     else if (!jl_is_typevar(b))
         return 0;
-    jl_varbinding_t *vb = e->vars;
-    while (vb != NULL) {
-        if (subtype_by_bounds(b, (jl_value_t *)vb->var, e) && obviously_in_union(a, vb->ub))
-            return 1;
-        vb = vb->prev;
-    }
-    return 0;
+    else if (jl_is_typevar(a) && subtype_by_bounds(a, b, e))
+        return 1;
+    // check if `Union{a, ...} <: b`.
+    jl_varbinding_t *vb = lookup(e, (jl_tvar_t *)b);
+    jl_value_t *blb = vb ? vb->lb : ((jl_tvar_t *)b)->lb;
+    return obviously_in_union(a, blb);
 }
 
 static int try_subtype_in_env(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
 {
-    if (a == jl_bottom_type || b == (jl_value_t *)jl_any_type || try_subtype_by_bounds(a, b, e))
+    if (try_subtype_by_bounds(a, b, e))
         return 1;
     jl_savedenv_t se;
     save_env(e, &se, 1);
@@ -2501,31 +2714,22 @@ static void set_bound(jl_value_t **bound, jl_value_t *val, jl_tvar_t *v, jl_sten
 // subtype, treating all vars as existential
 static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
-    jl_varbinding_t *v = e->vars;
-    int len = 0;
-    if (x == jl_bottom_type || y == (jl_value_t*)jl_any_type)
+    if (x == jl_bottom_type || y == (jl_value_t*)jl_any_type || obviously_in_union(y, x))
         return 1;
-    while (v != NULL) {
-        len++;
-        v = v->prev;
-    }
-    int8_t *rs = (int8_t*)malloc_s(len);
+    int8_t *rs = (int8_t*)alloca(current_env_length(e));
+    jl_varbinding_t *v = e->vars;
     int n = 0;
-    v = e->vars;
-    while (n < len) {
-        assert(v != NULL);
+    while (v != NULL) {
         rs[n++] = v->right;
         v->right = 1;
         v = v->prev;
     }
     int issub = subtype_in_env(x, y, e);
     n = 0; v = e->vars;
-    while (n < len) {
-        assert(v != NULL);
+    while (v != NULL) {
         v->right = rs[n++];
         v = v->prev;
     }
-    free(rs);
     return issub;
 }
 
@@ -2573,6 +2777,8 @@ static int check_unsat_bound(jl_value_t *t, jl_tvar_t *v, jl_stenv_t *e) JL_NOTS
 }
 
 
+static int intersect_var_ccheck_in_env(jl_value_t *xlb, jl_value_t *xub, jl_value_t *ylb, jl_value_t *yub, jl_stenv_t *e, int flip);
+
 static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int8_t R, int param)
 {
     jl_varbinding_t *bb = lookup(e, b);
@@ -2584,20 +2790,14 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         return R ? intersect(a, bb->lb, e, param) : intersect(bb->lb, a, e, param);
     if (!jl_is_type(a) && !jl_is_typevar(a))
         return set_var_to_const(bb, a, e, R);
-    jl_savedenv_t se;
     if (param == 2) {
         jl_value_t *ub = NULL;
         JL_GC_PUSH1(&ub);
         if (!jl_has_free_typevars(a)) {
-            save_env(e, &se, 1);
-            int issub = subtype_in_env_existential(bb->lb, a, e);
-            restore_env(e, &se, 1);
-            if (issub) {
-                issub = subtype_in_env_existential(a, bb->ub, e);
-                restore_env(e, &se, 1);
-            }
-            free_env(&se);
-            if (!issub) {
+            if (R) flip_offset(e);
+            int ccheck = intersect_var_ccheck_in_env(bb->lb, bb->ub, a, a, e, !R);
+            if (R) flip_offset(e);
+            if (!ccheck) {
                 JL_GC_POP();
                 return jl_bottom_type;
             }
@@ -2607,6 +2807,7 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
             e->triangular++;
             ub = R ? intersect_aside(a, bb->ub, e, bb->depth0) : intersect_aside(bb->ub, a, e, bb->depth0);
             e->triangular--;
+            jl_savedenv_t se;
             save_env(e, &se, 1);
             int issub = subtype_in_env_existential(bb->lb, ub, e);
             restore_env(e, &se, 1);
@@ -2636,7 +2837,7 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
     jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, bb->depth0) : intersect_aside(bb->ub, a, e, bb->depth0);
     if (ub == jl_bottom_type)
         return jl_bottom_type;
-    if (bb->constraintkind == 1 || e->triangular) {
+    if (bb->constraintkind == 1 || (e->triangular && param == 1)) {
         if (e->triangular && check_unsat_bound(ub, b, e))
             return jl_bottom_type;
         set_bound(&bb->ub, ub, b, e);
@@ -2721,32 +2922,30 @@ static jl_value_t *omit_bad_union(jl_value_t *u, jl_tvar_t *t)
         jl_tvar_t *var = ((jl_unionall_t *)u)->var;
         jl_value_t *ub = var->ub, *body = ((jl_unionall_t *)u)->body;
         assert(var != t);
-        if (!jl_has_typevar(var->lb, t)) {
-            JL_GC_PUSH3(&ub, &body, &var);
-            body = omit_bad_union(body, t);
-            if (!jl_has_typevar(body, var)) {
-                res = body;
+        JL_GC_PUSH3(&ub, &body, &var);
+        body = omit_bad_union(body, t);
+        if (!jl_has_typevar(body, var)) {
+            res = body;
+        }
+        else if (jl_has_typevar(var->lb, t)) {
+            res = jl_bottom_type;
+        }
+        else {
+            ub = omit_bad_union(ub, t);
+            if (ub == jl_bottom_type && var->lb != ub) {
+                res = jl_bottom_type;
             }
-            else {
-                ub = omit_bad_union(ub, t);
-                if (ub == jl_bottom_type && var->lb != ub) {
+            else if (obviously_egal(var->lb, ub)) {
+                res = jl_substitute_var_nothrow(body, var, ub, 2);
+                if (res == NULL)
                     res = jl_bottom_type;
+            }
+            else {
+                if (ub != var->ub) {
+                    var = jl_new_typevar(var->name, var->lb, ub);
+                    body = jl_substitute_var(body, ((jl_unionall_t *)u)->var, (jl_value_t *)var);
                 }
-                else if (obviously_egal(var->lb, ub)) {
-                    JL_TRY {
-                        res = jl_substitute_var(body, var, ub);
-                    }
-                    JL_CATCH {
-                        res = jl_bottom_type;
-                    }
-                }
-                else {
-                    if (ub != var->ub) {
-                        var = jl_new_typevar(var->name, var->lb, ub);
-                        body = jl_substitute_var(body, ((jl_unionall_t *)u)->var, (jl_value_t *)var);
-                    }
-                    res = jl_new_struct(jl_unionall_type, var, body);
-                }
+                res = jl_new_struct(jl_unionall_type, var, body);
             }
         }
         JL_GC_POP();
@@ -2767,12 +2966,56 @@ static jl_value_t *omit_bad_union(jl_value_t *u, jl_tvar_t *t)
     return res;
 }
 
+// TODO: fuse with reachable_var?
+static int has_typevar_via_flatten_env(jl_value_t *x, jl_tvar_t *t, jl_ivarbinding_t *allvars, int8_t *checked) {
+    if (jl_is_unionall(x)) {
+        jl_tvar_t *var = ((jl_unionall_t *)x)->var;
+        if (has_typevar_via_flatten_env(var->lb, t, allvars, checked) ||
+            has_typevar_via_flatten_env(var->ub, t, allvars, checked))
+            return 1;
+        return has_typevar_via_flatten_env(((jl_unionall_t *)x)->body, t, allvars, checked);
+    }
+    else if (jl_is_uniontype(x)) {
+        return has_typevar_via_flatten_env(((jl_uniontype_t *)x)->a, t, allvars, checked) ||
+            has_typevar_via_flatten_env(((jl_uniontype_t *)x)->b, t, allvars, checked);
+    }
+    else if (jl_is_vararg(x)) {
+        jl_vararg_t *v = (jl_vararg_t *)x;
+        return (v->T && has_typevar_via_flatten_env(v->T, t, allvars, checked)) ||
+            (v->N && has_typevar_via_flatten_env(v->N, t, allvars, checked));
+    }
+    else if (jl_is_datatype(x)) {
+        for (size_t i = 0; i < jl_nparams(x); i++) {
+            if (has_typevar_via_flatten_env(jl_tparam(x, i), t, allvars, checked))
+                return 1;
+        }
+        return 0;
+    }
+    else if (jl_is_typevar(x)) {
+        if (t == (jl_tvar_t *)x)
+            return 1;
+        size_t ind = 0;
+        jl_ivarbinding_t *itemp = allvars;
+        while (itemp && *itemp->var != (jl_tvar_t *)x)
+        {
+            ind++;
+            itemp = itemp->next;
+        }
+        if (itemp == NULL || checked[ind])
+            return 0;
+        checked[ind] = 1;
+        return has_typevar_via_flatten_env(*itemp->lb, t, allvars, checked) ||
+            has_typevar_via_flatten_env(*itemp->ub, t, allvars, checked);
+    }
+    return 0;
+}
+
 // Caller might not have rooted `res`
 static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_unionall_t *u, jl_stenv_t *e)
 {
-    jl_value_t *varval = NULL;
-    jl_tvar_t *newvar = vb->var;
-    JL_GC_PUSH2(&res, &newvar);
+    jl_value_t *varval = NULL, *ilb = NULL, *iub = NULL, *nivar = NULL;
+    jl_tvar_t *newvar = vb->var, *ivar = NULL;
+    JL_GC_PUSH6(&res, &newvar, &ivar, &nivar, &ilb, &iub);
     // try to reduce var to a single value
     if (jl_is_long(vb->ub) && jl_is_typevar(vb->lb)) {
         varval = vb->ub;
@@ -2805,126 +3048,304 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
     if (!varval && (vb->lb != vb->var->lb || vb->ub != vb->var->ub))
         newvar = jl_new_typevar(vb->var->name, vb->lb, vb->ub);
 
-    // remove/replace/rewrap free occurrences of this var in the environment
-    jl_varbinding_t *wrap = NULL;
+    // flatten all innervar into a (reversed) list
+    size_t icount = 0;
+    if (vb->innervars)
+        icount += jl_array_nrows(vb->innervars);
+    for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
+        if (btemp->innervars != NULL)
+            icount += jl_array_nrows(btemp->innervars);
+    }
+    jl_svec_t *p = NULL;
+    jl_value_t **iparams;
+    jl_value_t **roots;
+    JL_GC_PUSHARGS(roots, icount < 22 ? 3*icount : 1);
+    if (icount < 22) {
+        iparams = roots;
+    }
+    else {
+        p = jl_alloc_svec(3*icount);
+        roots[0] = (jl_value_t*)p;
+        iparams = jl_svec_data(p);
+    }
+    jl_ivarbinding_t *allvars = NULL;
+    size_t niparams = 0;
+    if (vb->innervars) {
+        for (size_t i = 0; i < jl_array_nrows(vb->innervars); i++) {
+            jl_tvar_t *ivar = (jl_tvar_t *)jl_array_ptr_ref(vb->innervars, i);
+            jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+            inew->var = (jl_tvar_t **)&iparams[niparams++]; *inew->var = ivar;
+            inew->lb = &iparams[niparams++]; *inew->lb = ivar->lb;
+            inew->ub = &iparams[niparams++]; *inew->ub = ivar->ub;
+            inew->root = vb;
+            inew->next = allvars;
+            allvars = inew;
+        }
+    }
     for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
-        if (jl_has_typevar(btemp->lb, vb->var)) {
-            if (vb->lb == (jl_value_t*)btemp->var) {
+        jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+        inew->var = &btemp->var;
+        inew->lb = &btemp->lb;
+        inew->ub = &btemp->ub;
+        inew->root = btemp;
+        inew->next = allvars;
+        allvars = inew;
+        if (btemp->innervars) {
+            for (size_t i = 0; i < jl_array_nrows(btemp->innervars); i++) {
+                jl_tvar_t *ivar = (jl_tvar_t *)jl_array_ptr_ref(btemp->innervars, i);
+                jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+                inew->var = (jl_tvar_t **)&iparams[niparams++]; *inew->var = ivar;
+                inew->lb = &iparams[niparams++]; *inew->lb = ivar->lb;
+                inew->ub = &iparams[niparams++]; *inew->ub = ivar->ub;
+                inew->root = btemp;
+                inew->next = allvars;
+                allvars = inew;
+            }
+        }
+    }
+
+    // remove/replace/rewrap free occurrences of this var in the environment
+    int wrapped = 0;
+    jl_ivarbinding_t *pwrap = NULL;
+    int vcount = icount + current_env_length(e);
+    int8_t *checked = (int8_t *)alloca(vcount);
+    for (jl_ivarbinding_t *btemp = allvars, *pbtemp = NULL; btemp != NULL; btemp = btemp->next) {
+        int bdepth0 = btemp->root->depth0;
+        int innerflag = 0;
+        ivar = *btemp->var;
+        ilb = *btemp->lb;
+        iub = *btemp->ub;
+        if (jl_has_typevar(ilb, vb->var)) {
+            assert(btemp->root->var == ivar || bdepth0 == vb->depth0);
+            if (vb->lb == (jl_value_t*)ivar) {
+                JL_GC_POP();
                 JL_GC_POP();
                 return jl_bottom_type;
             }
             if (varval) {
                 JL_TRY {
-                    btemp->lb = jl_substitute_var(btemp->lb, vb->var, varval);
+                    *btemp->lb = jl_substitute_var(ilb, vb->var, varval);
                 }
                 JL_CATCH {
                     res = jl_bottom_type;
                 }
             }
-            else if (btemp->lb == (jl_value_t*)vb->var) {
-                btemp->lb = vb->lb;
-            }
-            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
-                // if our variable is T, and some outer variable has constraint S = Ref{T},
-                // move the `where T` outside `where S` instead of putting it here. issue #21243.
-                if (newvar != vb->var)
-                    btemp->lb = jl_substitute_var(btemp->lb, vb->var, (jl_value_t*)newvar);
-                wrap = btemp;
+            else if (ilb == (jl_value_t*)vb->var) {
+                *btemp->lb = vb->lb;
             }
             else {
-                btemp->lb = jl_new_struct(jl_unionall_type, vb->var, btemp->lb);
+                innerflag |= 1;
             }
-            assert((jl_value_t*)btemp->var != btemp->lb);
         }
-        if (jl_has_typevar(btemp->ub, vb->var)) {
-            if (vb->ub == (jl_value_t*)btemp->var) {
-                btemp->ub = omit_bad_union(btemp->ub, vb->var);
-                if (btemp->ub == jl_bottom_type && btemp->ub != btemp->lb) {
+        if (jl_has_typevar(iub, vb->var)) {
+            assert(btemp->root->var == ivar || bdepth0 == vb->depth0);
+            if (vb->ub == (jl_value_t*)ivar) {
+                *btemp->ub = omit_bad_union(iub, vb->var);
+                if (*btemp->ub == jl_bottom_type && *btemp->ub != *btemp->lb) {
+                    JL_GC_POP();
                     JL_GC_POP();
                     return jl_bottom_type;
                 }
             }
             if (varval) {
-                JL_TRY {
-                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, varval);
-                }
-                JL_CATCH {
+                iub = jl_substitute_var_nothrow(iub, vb->var, varval, 2);
+                if (iub == NULL)
                     res = jl_bottom_type;
-                }
+                else
+                    *btemp->ub = iub;
             }
-            else if (btemp->ub == (jl_value_t*)vb->var) {
+            else if (iub == (jl_value_t*)vb->var) {
                 // TODO: this loses some constraints, such as in this test, where we replace T4<:S3 (e.g. T4==S3 since T4 only appears covariantly once) with T4<:Any
                 // a = Tuple{Float64,T3,T4} where T4 where T3
                 // b = Tuple{S2,Tuple{S3},S3} where S2 where S3
                 // Tuple{Float64, T3, T4} where {S3, T3<:Tuple{S3}, T4<:S3}
-                btemp->ub = vb->ub;
+                *btemp->ub = vb->ub;
             }
-            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
-                if (newvar != vb->var)
-                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, (jl_value_t*)newvar);
-                wrap = btemp;
+            else {
+                innerflag |= 2;
             }
-            else
-                btemp->ub = jl_new_struct(jl_unionall_type, vb->var, btemp->ub);
-            assert((jl_value_t*)btemp->var != btemp->ub);
+            if (innerflag) {
+                memset(checked, 0, vcount);
+                if (bdepth0 != vb->depth0 ||
+                    has_typevar_via_flatten_env(vb->lb, ivar, allvars, checked) ||
+                    has_typevar_via_flatten_env(vb->ub, ivar, allvars, checked)) {
+                    if (innerflag & 1)
+                        *btemp->lb = jl_new_struct(jl_unionall_type, vb->var, ilb);
+                    if (innerflag & 2)
+                        *btemp->ub = jl_new_struct(jl_unionall_type, vb->var, iub);
+                }
+                else {
+                    assert(btemp->root != vb);
+                    // if our variable is T, and some outer variable has constraint S = Ref{T},
+                    // move the `where T` outside `where S` instead of putting it here. issue #21243.
+                    if (newvar != vb->var) {
+                        if (innerflag & 1)
+                            *btemp->lb = jl_substitute_var(ilb, vb->var, (jl_value_t*)newvar);
+                        if (innerflag & 2)
+                            *btemp->ub = jl_substitute_var(iub, vb->var, (jl_value_t*)newvar);
+                    }
+                    if (!wrapped)
+                        pwrap = pbtemp;
+                    wrapped = 1;
+                }
+            }
+            assert((jl_value_t*)ivar != *btemp->lb);
+            assert((jl_value_t*)ivar != *btemp->ub);
+        }
+        pbtemp = btemp;
+    }
+
+    // Insert the newvar into the (reversed) var list if needed.
+    if (wrapped) {
+        jl_ivarbinding_t *wrap = pwrap == NULL ? allvars : pwrap->next;
+        jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+        inew->var = &newvar;
+        inew->lb = &newvar->lb;
+        inew->ub = &newvar->ub;;
+        inew->root = wrap->root;
+        inew->next = wrap;
+        if (pwrap != NULL)
+            pwrap->next = inew;
+        else
+            allvars = inew;
+        vcount++;
+    }
+
+    // Re-sort the innervar inside the (reversed) var list.
+    // `jl_has_typevar` is used as the partial-ordering predicate.
+    // If this is slow, we could possibly switch to a simpler graph sort, such as Tarjan's SCC.
+    if (icount > 0) {
+        jl_ivarbinding_t *pib1 = NULL;
+#ifndef NDEBUG
+        size_t sort_count = 0;
+#endif
+        while (1) {
+            jl_ivarbinding_t *ib1 = pib1 == NULL ? allvars : pib1->next;
+            if (ib1 == NULL) break;
+            assert((++sort_count) <= (vcount * (vcount + 1)) >> 1);
+            int lbfree = jl_has_free_typevars(*ib1->lb);
+            int ubfree = jl_has_free_typevars(*ib1->ub);
+            if (lbfree || ubfree) {
+                int changed = 0;
+                jl_ivarbinding_t *pib2 = ib1, *ib2 = ib1->next;
+                while (ib2 != NULL) {
+                    int isinnervar = ib2->root->var != *ib2->var;
+                    if (isinnervar && ib1->root->depth0 == ib2->root->depth0 &&
+                        ((lbfree && jl_has_typevar(*ib1->lb, *ib2->var)) ||
+                         (ubfree && jl_has_typevar(*ib1->ub, *ib2->var)))) {
+                        pib2->next = ib2->next;
+                        ib2->next = ib1;
+                        ib2->root = ib1->root;
+                        if (pib1)
+                            pib1->next = ib2;
+                        else
+                            allvars = ib2;
+                        changed = 1;
+                        break;
+                    }
+                    pib2 = ib2;
+                    ib2 = ib2->next;
+                }
+                if (changed) continue;
+            }
+            pib1 = ib1;
+        }
+    }
+
+    // Freeze the innervars' lb/ub and perform substitution if needed.
+    for (jl_ivarbinding_t *btemp1 = allvars; btemp1 != NULL; btemp1 = btemp1->next) {
+        ivar = *btemp1->var;
+        ilb = *btemp1->lb;
+        iub = *btemp1->ub;
+        int isinnervar = btemp1->root->var != ivar;
+        if (isinnervar && (ivar->lb != ilb || ivar->ub != iub)) {
+            nivar = (jl_value_t *)jl_new_typevar(ivar->name, ilb, iub);
+            if (jl_has_typevar(res, ivar))
+                res = jl_substitute_var(res, ivar, nivar);
+            for (jl_ivarbinding_t *btemp2 = btemp1->next; btemp2 != NULL; btemp2 = btemp2->next) {
+                ilb = *btemp2->lb;
+                iub = *btemp2->ub;
+                if (jl_has_typevar(ilb, ivar))
+                    *btemp2->lb = jl_substitute_var(ilb, ivar, nivar);
+                if (jl_has_typevar(iub, ivar))
+                    *btemp2->ub = jl_substitute_var(iub, ivar, nivar);
+            }
+            if (!wrapped && !varval) {
+                // newvar also needs bounds substitution.
+                if (jl_has_typevar(vb->lb, ivar))
+                    vb->lb = jl_substitute_var(vb->lb, ivar, nivar);
+                if (jl_has_typevar(vb->ub, ivar))
+                    vb->ub = jl_substitute_var(vb->ub, ivar, nivar);
+            }
+            *btemp1->var = (jl_tvar_t *)nivar;
         }
     }
 
-    if (wrap) {
-        // We only assign the newvar with the outmost var.
-        // This make sure we never create a UnionAll with 2 identical vars.
-        if (wrap->innervars == NULL)
-            wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
-        jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)newvar);
-        // TODO: should we move all the innervars here too?
+    // Switch back the innervars' storage.
+    while (1) {
+        jl_ivarbinding_t *btemp = allvars;
+        jl_varbinding_t *root = btemp ? btemp->root : vb;
+        size_t icount = 0;
+        while (btemp && btemp->root == root) {
+            btemp = btemp->next;
+            icount++;
+        }
+        if (root != vb) icount--;
+        if (root->innervars != NULL) {
+            jl_array_t *rinnervars = root->innervars;
+            JL_GC_PROMISE_ROOTED(rinnervars);
+            size_t len = jl_array_nrows(rinnervars);
+            if (icount > len)
+                jl_array_grow_end(rinnervars, icount - len);
+            if (icount < len)
+                jl_array_del_end(rinnervars, len - icount);
+        }
+        else if (icount > 0) {
+            root->innervars = jl_alloc_array_1d(jl_array_any_type, icount);
+        }
+        btemp = allvars;
+        for (size_t i = icount; i > 0; i--) {
+            jl_array_ptr_set(root->innervars, i - 1, (jl_value_t*)*btemp->var);
+            btemp = btemp->next;
+        }
+        if (root == vb) break;
+        assert(*btemp->var == root->var);
+        allvars = btemp->next;
+        assert(allvars == NULL || allvars->root != root);
     }
+    JL_GC_POP();
 
     // if `v` still occurs, re-wrap body in `UnionAll v` or eliminate the UnionAll
     if (jl_has_typevar(res, vb->var)) {
         if (varval) {
-            JL_TRY {
-                // you can construct `T{x} where x` even if T's parameter is actually
-                // limited. in that case we might get an invalid instantiation here.
-                res = jl_substitute_var(res, vb->var, varval);
-                // simplify chains of UnionAlls where bounds become equal
-                while (jl_is_unionall(res) && obviously_egal(((jl_unionall_t*)res)->var->lb,
-                                                             ((jl_unionall_t*)res)->var->ub))
-                    res = jl_instantiate_unionall((jl_unionall_t*)res, ((jl_unionall_t*)res)->var->lb);
+            // you can construct `T{x} where x` even if T's parameter is actually
+            // limited. in that case we might get an invalid instantiation here.
+            res = jl_substitute_var_nothrow(res, vb->var, varval, 2);
+            // simplify chains of UnionAlls where bounds become equal
+            while (res != NULL && jl_is_unionall(res) && obviously_egal(((jl_unionall_t*)res)->var->lb,
+                                                         ((jl_unionall_t*)res)->var->ub)) {
+                jl_unionall_t * ures = (jl_unionall_t *)res;
+                res = jl_substitute_var_nothrow(ures->body, ures->var, ures->var->lb, 2);
             }
-            JL_CATCH {
+            if (res == NULL)
                 res = jl_bottom_type;
-            }
         }
         else {
+            // re-fresh newvar if bounds changed.
+            if (vb->lb != newvar->lb || vb->ub != newvar->ub)
+                newvar = jl_new_typevar(newvar->name, vb->lb, vb->ub);
             if (newvar != vb->var)
                 res = jl_substitute_var(res, vb->var, (jl_value_t*)newvar);
             varval = (jl_value_t*)newvar;
-            if (!wrap)
+            if (!wrapped)
                 res = jl_type_unionall((jl_tvar_t*)newvar, res);
         }
     }
 
     if (vb->innervars != NULL) {
-        for (size_t i = 0; i < jl_array_len(vb->innervars); i++) {
+        for (size_t i = 0; i < jl_array_nrows(vb->innervars); i++) {
             jl_tvar_t *var = (jl_tvar_t*)jl_array_ptr_ref(vb->innervars, i);
-            // the `btemp->prev` walk is only giving a sort of post-order guarantee (since we are
-            // iterating 2 trees at once), so once we set `wrap`, there might remain other branches
-            // of the type walk that now still may have incomplete bounds: finish those now too
-            jl_varbinding_t *wrap = NULL;
-            for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
-                if (btemp->depth0 == vb->depth0 && (jl_has_typevar(btemp->lb, var) || jl_has_typevar(btemp->ub, var))) {
-                    wrap = btemp;
-                }
-            }
-            if (wrap) {
-                if (wrap->innervars == NULL)
-                    wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
-                jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)var);
-            }
-            else if (res != jl_bottom_type) {
-                if (jl_has_typevar(res, var))
-                    res = jl_type_unionall((jl_tvar_t*)var, res);
-            }
+            res = jl_type_unionall(var, res);
         }
     }
 
@@ -2943,9 +3364,6 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
 static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param, jl_varbinding_t *vb)
 {
     jl_varbinding_t *btemp = e->vars;
-    // if the var for this unionall (based on identity) already appears somewhere
-    // in the environment, rename to get a fresh var.
-    // TODO: might need to look inside types in btemp->lb and btemp->ub
     int envsize = 0;
     while (btemp != NULL) {
         envsize++;
@@ -2953,13 +3371,9 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
             vb->limited = 1;
             return t;
         }
-        if (btemp->var == u->var || btemp->lb == (jl_value_t*)u->var ||
-            btemp->ub == (jl_value_t*)u->var) {
-            u = jl_rename_unionall(u);
-            break;
-        }
         btemp = btemp->prev;
     }
+    u = unalias_unionall(u, e);
     JL_GC_PUSH1(&u);
     vb->var = u->var;
     e->vars = vb;
@@ -3050,7 +3464,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
 {
     jl_value_t *res = NULL;
     jl_savedenv_t se;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0,
                            e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&res, &vb.lb, &vb.ub, &vb.innervars);
     save_env(e, &se, 1);
@@ -3058,6 +3472,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
     if (is_leaf_typevar(u->var) && noinv && always_occurs_cov(u->body, u->var, param))
         vb.constraintkind = 1;
     res = intersect_unionall_(t, u, e, R, param, &vb);
+    vb.intersected = 1;
     if (vb.limited) {
         // if the environment got too big, avoid tree recursion and propagate the flag
         if (e->vars)
@@ -3078,7 +3493,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
                 vb.ub = vb.var->ub;
             }
             restore_env(e, &se, vb.constraintkind == 1 ? 1 : 0);
-            vb.occurs = vb.occurs_cov = vb.occurs_inv = 0;
+            vb.occurs_cov = vb.occurs_inv = 0;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
@@ -3125,7 +3540,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
             ii = (jl_value_t*)vmy;
         else {
             JL_GC_PUSH1(&ii);
-            ii = (jl_value_t*)jl_wrap_vararg(ii, NULL, 1);
+            ii = (jl_value_t*)jl_wrap_vararg(ii, NULL, 1, 0);
             JL_GC_POP();
         }
         return ii;
@@ -3134,10 +3549,12 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
     assert(e->Loffset == 0);
     e->Loffset = offset;
     jl_varbinding_t *xb = NULL, *yb = NULL;
+    int8_t max_offsetx = 0, max_offsety = 0;
     if (xp2) {
         assert(jl_is_typevar(xp2));
         xb = lookup(e, (jl_tvar_t*)xp2);
         if (xb) xb->intvalued = 1;
+        if (xb) max_offsetx = xb->max_offset;
         if (!yp2)
             i2 = bound_var_below((jl_tvar_t*)xp2, xb, e, 0);
     }
@@ -3145,6 +3562,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
         assert(jl_is_typevar(yp2));
         yb = lookup(e, (jl_tvar_t*)yp2);
         if (yb) yb->intvalued = 1;
+        if (yb) max_offsety = yb->max_offset;
         if (!xp2)
             i2 = bound_var_below((jl_tvar_t*)yp2, yb, e, 1);
     }
@@ -3159,14 +3577,27 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
     }
     assert(e->Loffset == offset);
     e->Loffset = 0;
-    if (i2 == jl_bottom_type)
+    if (i2 == jl_bottom_type) {
         ii = (jl_value_t*)jl_bottom_type;
-    else if (xp2 && obviously_egal(xp1, ii) && obviously_egal(xp2, i2))
-        ii = (jl_value_t*)vmx;
-    else if (yp2 && obviously_egal(yp1, ii) && obviously_egal(yp2, i2))
-        ii = (jl_value_t*)vmy;
-    else
-        ii = (jl_value_t*)jl_wrap_vararg(ii, i2, 1);
+    }
+    else {
+        if (xb && !xb->intersected) {
+            xb->max_offset = max_offsetx;
+            if (offset > xb->max_offset && xb->max_offset >= 0)
+                xb->max_offset = offset > 32 ? 32 : offset;
+        }
+        if (yb && !yb->intersected) {
+            yb->max_offset = max_offsety;
+            if (-offset > yb->max_offset && yb->max_offset >= 0)
+                yb->max_offset = -offset > 32 ? 32 : -offset;
+        }
+        if (xp2 && obviously_egal(xp1, ii) && obviously_egal(xp2, i2))
+            ii = (jl_value_t*)vmx;
+        else if (yp2 && obviously_egal(yp1, ii) && obviously_egal(yp2, i2))
+            ii = (jl_value_t*)vmy;
+        else
+            ii = (jl_value_t*)jl_wrap_vararg(ii, i2, 1, 0);
+    }
     JL_GC_POP();
     return ii;
 }
@@ -3185,6 +3616,24 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
         llx += jl_unbox_long(jl_unwrap_vararg_num((jl_vararg_t *)jl_tparam(xd, lx-1))) - 1;
     if (vvy == JL_VARARG_INT)
         lly += jl_unbox_long(jl_unwrap_vararg_num((jl_vararg_t *)jl_tparam(yd, ly-1))) - 1;
+    if (vvx == JL_VARARG_BOUND && (vvy == JL_VARARG_BOUND || vvy == JL_VARARG_UNBOUND)) {
+        jl_value_t *xlen = jl_unwrap_vararg_num((jl_vararg_t*)jl_tparam(xd, lx-1));
+        assert(xlen && jl_is_typevar(xlen));
+        jl_varbinding_t *xb = lookup(e, (jl_tvar_t*)xlen);
+        if (xb && xb->intersected && xb->max_offset > 0) {
+            assert(xb->max_offset <= 32);
+            llx += xb->max_offset;
+        }
+    }
+    if (vvy == JL_VARARG_BOUND && (vvx == JL_VARARG_BOUND || vvx == JL_VARARG_UNBOUND)) {
+        jl_value_t *ylen = jl_unwrap_vararg_num((jl_vararg_t*)jl_tparam(yd, ly-1));
+        assert(ylen && jl_is_typevar(ylen));
+        jl_varbinding_t *yb = lookup(e, (jl_tvar_t*)ylen);
+        if (yb && yb->intersected && yb->max_offset > 0) {
+            assert(yb->max_offset <= 32);
+            lly += yb->max_offset;
+        }
+    }
 
     if ((vvx == JL_VARARG_NONE || vvx == JL_VARARG_INT) &&
         (vvy == JL_VARARG_NONE || vvy == JL_VARARG_INT)) {
@@ -3217,8 +3666,8 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
             assert(i == j && i == np);
             break;
         }
-        if (xi && jl_is_vararg(xi)) vx = vvx != JL_VARARG_INT;
-        if (yi && jl_is_vararg(yi)) vy = vvy != JL_VARARG_INT;
+        if (xi && jl_is_vararg(xi)) vx = vvx == JL_VARARG_UNBOUND || (vvx == JL_VARARG_BOUND && i == llx - 1);
+        if (yi && jl_is_vararg(yi)) vy = vvy == JL_VARARG_UNBOUND || (vvy == JL_VARARG_BOUND && j == lly - 1);
         if (xi == NULL || yi == NULL) {
             if (vx && intersect_vararg_length(xi, lly+1-llx, e, 0)) {
                 np = j;
@@ -3309,7 +3758,7 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
         else if (isy)
             res = (jl_value_t*)yd;
         else if (p)
-            res = jl_apply_tuple_type(p);
+            res = jl_apply_tuple_type(p, 1);
         else
             res = jl_apply_tuple_type_v(params, np);
     }
@@ -3431,6 +3880,89 @@ static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOT
     return compareto_var(x, (jl_tvar_t*)y, e, -1) || compareto_var(y, (jl_tvar_t*)x, e, 1);
 }
 
+static int intersect_var_ccheck_in_env(jl_value_t *xlb, jl_value_t *xub, jl_value_t *ylb, jl_value_t *yub, jl_stenv_t *e, int flip)
+{
+    int easy_check1 = xlb == jl_bottom_type ||
+                      yub == (jl_value_t *)jl_any_type ||
+                      (e->Loffset == 0 && obviously_in_union(yub, xlb));
+    int easy_check2 = ylb == jl_bottom_type ||
+                      xub == (jl_value_t *)jl_any_type ||
+                      (e->Loffset == 0 && obviously_in_union(xub, ylb));
+    int nofree1 = 0, nofree2 = 0;
+    if (!easy_check1) {
+        nofree1 = !jl_has_free_typevars(xlb) && !jl_has_free_typevars(yub);
+        if (nofree1 && e->Loffset == 0) {
+            easy_check1 = jl_subtype(xlb, yub);
+            if (!easy_check1)
+                return 0;
+        }
+    }
+    if (!easy_check2) {
+        nofree2 = !jl_has_free_typevars(ylb) && !jl_has_free_typevars(xub);
+        if (nofree2 && e->Loffset == 0) {
+            easy_check2 = jl_subtype(ylb, xub);
+            if (!easy_check2)
+                return 0;
+        }
+    }
+    if (easy_check1 && easy_check2)
+        return 1;
+    int ccheck = 0;
+    if ((easy_check1 || nofree1) && (easy_check2 || nofree2)) {
+        jl_varbinding_t *vars = e->vars;
+        e->vars = NULL;
+        ccheck = easy_check1 || subtype_in_env(xlb, yub, e);
+        if (ccheck && !easy_check2) {
+            flip_offset(e);
+            ccheck = subtype_in_env(ylb, xub, e);
+            flip_offset(e);
+        }
+        e->vars = vars;
+        return ccheck;
+    }
+    jl_savedenv_t se;
+    save_env(e, &se, 1);
+    // first try normal flip.
+    if (flip) flip_vars(e);
+    ccheck = easy_check1 || subtype_in_env(xlb, yub, e);
+    if (ccheck && !easy_check2) {
+        flip_offset(e);
+        ccheck = subtype_in_env(ylb, xub, e);
+        flip_offset(e);
+    }
+    if (flip) flip_vars(e);
+    if (!ccheck) {
+        // then try reverse flip.
+        restore_env(e, &se, 1);
+        if (!flip) flip_vars(e);
+        ccheck = easy_check1 || subtype_in_env(xlb, yub, e);
+        if (ccheck && !easy_check2) {
+            flip_offset(e);
+            ccheck = subtype_in_env(ylb, xub, e);
+            flip_offset(e);
+        }
+        if (!flip) flip_vars(e);
+    }
+    if (!ccheck) {
+        // then try existential.
+        restore_env(e, &se, 1);
+        if (easy_check1)
+            ccheck = 1;
+        else {
+            ccheck = subtype_in_env_existential(xlb, yub, e);
+            restore_env(e, &se, 1);
+        }
+        if (ccheck && !easy_check2) {
+            flip_offset(e);
+            ccheck = subtype_in_env_existential(ylb, xub, e);
+            flip_offset(e);
+            restore_env(e, &se, 1);
+        }
+    }
+    free_env(&se);
+    return ccheck;
+}
+
 static int has_typevar_via_env(jl_value_t *x, jl_tvar_t *t, jl_stenv_t *e)
 {
     if (e->Loffset == 0) {
@@ -3553,7 +4085,7 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 if (xlb == xub && ylb == yub &&
                     jl_has_typevar(xlb, (jl_tvar_t *)y) &&
                     jl_has_typevar(ylb, (jl_tvar_t *)x)) {
-                    // specical case for e.g.
+                    // special case for e.g.
                     // 1) Val{Y}<:X<:Val{Y} && Val{X}<:Y<:Val{X}
                     // 2) Y<:X<:Y && Val{X}<:Y<:Val{X} => Val{Y}<:Y<:Val{Y}
                     ccheck = 0;
@@ -3563,14 +4095,8 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                     ccheck = 1;
                 }
                 else {
-                    if (R) flip_vars(e);
-                    ccheck = subtype_in_env(xlb, yub, e);
-                    if (ccheck) {
-                        flip_offset(e);
-                        ccheck = subtype_in_env(ylb, xub, e);
-                        flip_offset(e);
-                    }
-                    if (R) flip_vars(e);
+                    // try many subtype check to avoid false `Union{}`
+                    ccheck = intersect_var_ccheck_in_env(xlb, xub, ylb, yub, e, R);
                 }
                 if (R) flip_offset(e);
                 if (!ccheck)
@@ -3626,12 +4152,14 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
         if (jl_subtype(y, x)) return y;
     }
     if (jl_is_uniontype(x)) {
-        if (y == ((jl_uniontype_t*)x)->a || y == ((jl_uniontype_t*)x)->b)
+        if (obviously_in_union(x, y))
             return y;
+        if (jl_is_uniontype(y) && obviously_in_union(y, x))
+            return x;
         return intersect_union(y, (jl_uniontype_t*)x, e, 0, param);
     }
     if (jl_is_uniontype(y)) {
-        if (x == ((jl_uniontype_t*)y)->a || x == ((jl_uniontype_t*)y)->b)
+        if (obviously_in_union(y, x))
             return x;
         if (jl_is_unionall(x) && (jl_has_free_typevars(x) || jl_has_free_typevars(y)))
             return intersect_unionall(y, (jl_unionall_t*)x, e, 0, param);
@@ -3745,73 +4273,12 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
     return jl_bottom_type;
 }
 
-static int merge_env(jl_stenv_t *e, jl_savedenv_t *se, int count)
+static int merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se, int count)
 {
-    if (count == 0)
-        alloc_env(e, se, 1);
-    jl_value_t **roots = NULL;
-    int nroots = 0;
-    if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
-        jl_svec_t *sv = (jl_svec_t*)se->roots[0];
-        assert(jl_is_svec(sv));
-        roots = jl_svec_data(sv);
-        nroots = jl_svec_len(sv);
-    }
-    else {
-        roots = se->roots;
-        nroots = se->gcframe.nroots >> 2;
-    }
-    int n = 0;
-    jl_varbinding_t *v = e->vars;
-    v = e->vars;
-    while (v != NULL) {
-        if (count == 0) {
-            // need to initialize this
-            se->buf[n] = 0;
-            se->buf[n+1] = 0;
-            se->buf[n+2] = 0;
-        }
-        if (v->occurs) {
-            // only merge lb/ub/innervars if this var occurs.
-            jl_value_t *b1, *b2;
-            b1 = roots[n];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = v->lb;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            roots[n] = b1 ? simple_meet(b1, b2, 0) : b2;
-            b1 = roots[n+1];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = v->ub;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            roots[n+1] = b1 ? simple_join(b1, b2) : b2;
-            b1 = roots[n+2];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = (jl_value_t*)v->innervars;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            if (b2 && b1 != b2) {
-                if (b1)
-                    jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
-                else
-                    roots[n+2] = b2;
-            }
-            // record the meeted vars.
-            se->buf[n] = 1;
-        }
-        // always merge occurs_inv/cov by max (never decrease)
-        if (v->occurs_inv > se->buf[n+1])
-            se->buf[n+1] = v->occurs_inv;
-        if (v->occurs_cov > se->buf[n+2])
-            se->buf[n+2] = v->occurs_cov;
-        n = n + 3;
-        v = v->prev;
+    if (count == 0) {
+        save_env(e, me, 1);
+        return 1;
     }
-    assert(n == nroots); (void)nroots;
-    return count + 1;
-}
-
-// merge untouched vars' info.
-static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
-{
     jl_value_t **merged = NULL;
     jl_value_t **saved = NULL;
     int nroots = 0;
@@ -3829,51 +4296,53 @@ static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
     else {
         saved = se->roots;
         merged = me->roots;
-        nroots = se->gcframe.nroots >> 2;
+        nroots = JL_GC_DECODE_NROOTS(se->gcframe.nroots);
     }
     assert(nroots == current_env_length(e) * 3);
     assert(nroots % 3 == 0);
-    for (int n = 0; n < nroots; n = n + 3) {
-        if (merged[n] == NULL)
-            merged[n] = saved[n];
-        if (merged[n+1] == NULL)
-            merged[n+1] = saved[n+1];
-        jl_value_t *b1, *b2;
+    int m = 0, n = 0;
+    jl_varbinding_t *v = e->vars;
+    while (v != NULL) {
+        jl_value_t *b0, *b1, *b2;
+        // merge `lb`
+        b0 = saved[n];
+        b1 = merged[n];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = v->lb;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+        merged[n] = (b1 == b0 || b2 == b0) ? b0 : simple_meet(b1, b2, 0);
+        // merge `ub`
+        b0 = saved[n+1];
+        b1 = merged[n+1];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = v->ub;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+        merged[n+1] = (b1 == b0 || b2 == b0) ? b0 : simple_join(b1, b2);
+        // merge `innervars`
         b1 = merged[n+2];
         JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-        b2 = saved[n+2];
-        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know this came from our GC frame
+        b2 = (jl_value_t*)v->innervars;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
         if (b2 && b1 != b2) {
             if (b1)
                 jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
             else
                 merged[n+2] = b2;
         }
-        me->buf[n] |= se->buf[n];
-    }
-}
-
-static void expand_local_env(jl_stenv_t *e, jl_value_t *res)
-{
-    jl_varbinding_t *v = e->vars;
-    // Here we pull in some typevar missed in fastpath.
-    while (v != NULL) {
-        v->occurs = v->occurs || jl_has_typevar(res, v->var);
-        assert(v->occurs == 0 || v->occurs == 1);
-        v = v->prev;
-    }
-    v = e->vars;
-    while (v != NULL) {
-        if (v->occurs == 1) {
-            jl_varbinding_t *v2 = e->vars;
-            while (v2 != NULL) {
-                if (v2 != v && v2->occurs == 0)
-                    v2->occurs = -(jl_has_typevar(v->lb, v2->var) || jl_has_typevar(v->ub, v2->var));
-                v2 = v2->prev;
-            }
-        }
+        // merge occurs_inv/cov by max (never decrease)
+        if (v->occurs_inv > me->buf[m])
+            me->buf[m] = v->occurs_inv;
+        if (v->occurs_cov > me->buf[m+1])
+            me->buf[m+1] = v->occurs_cov;
+        // merge max_offset by min
+        if (!v->intersected && v->max_offset < me->buf[m+2])
+            me->buf[m+2] = v->max_offset;
+        m = m + 3;
+        n = n + 3;
         v = v->prev;
     }
+    assert(n == nroots); (void)nroots;
+    return count + 1;
 }
 
 static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
@@ -3886,26 +4355,31 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
     jl_savedenv_t se, me;
     save_env(e, &se, 1);
     int niter = 0, total_iter = 0;
-    clean_occurs(e);
     is[0] = intersect(x, y, e, 0); // root
-    if (is[0] != jl_bottom_type) {
-        expand_local_env(e, is[0]);
-        niter = merge_env(e, &me, niter);
+    if (is[0] == jl_bottom_type) {
+        restore_env(e, &se, 1);
+    }
+    else if (!e->emptiness_only && has_next_union_state(e, 1)) {
+        niter = merge_env(e, &me, &se, niter);
+        restore_env(e, &se, 1);
     }
-    restore_env(e, &se, 1);
     while (next_union_state(e, 1)) {
         if (e->emptiness_only && is[0] != jl_bottom_type)
             break;
         e->Runions.depth = 0;
         e->Runions.more = 0;
 
-        clean_occurs(e);
         is[1] = intersect(x, y, e, 0);
-        if (is[1] != jl_bottom_type) {
-            expand_local_env(e, is[1]);
-            niter = merge_env(e, &me, niter);
+        if (is[1] == jl_bottom_type) {
+            restore_env(e, &se, 1);
+        }
+        else if (niter > 0 || (!e->emptiness_only && has_next_union_state(e, 1))) {
+            niter = merge_env(e, &me, &se, niter);
+            restore_env(e, &se, 1);
+        }
+        else {
+            assert(is[0] == jl_bottom_type);
         }
-        restore_env(e, &se, 1);
         if (is[0] == jl_bottom_type)
             is[0] = is[1];
         else if (is[1] != jl_bottom_type) {
@@ -3913,13 +4387,18 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
             is[0] = jl_type_union(is, 2);
         }
         total_iter++;
-        if (niter > 4 || total_iter > 400000) {
+        if (has_next_union_state(e, 1) && (niter > 4 || total_iter > 400000)) {
             is[0] = y;
+            // we give up precise intersection here, just restore the saved env
+            restore_env(e, &se, 1);
+            if (niter > 0) {
+                free_env(&me);
+                niter = 0;
+            }
             break;
         }
     }
     if (niter) {
-        final_merge_env(e, &me, &se);
         restore_env(e, &me, 1);
         free_env(&me);
     }
@@ -3946,7 +4425,9 @@ static jl_value_t *intersect_types(jl_value_t *x, jl_value_t *y, int emptiness_o
     init_stenv(&e, NULL, 0);
     e.intersection = e.ignore_free = 1;
     e.emptiness_only = emptiness_only;
-    return intersect_all(x, y, &e);
+    jl_value_t *ans = intersect_all(x, y, &e);
+    free_stenv(&e);
+    return ans;
 }
 
 JL_DLLEXPORT jl_value_t *jl_intersect_types(jl_value_t *x, jl_value_t *y)
@@ -4046,7 +4527,7 @@ static jl_value_t *switch_union_tuple(jl_value_t *a, jl_value_t *b)
         ts[1] = jl_tparam(b, i);
         jl_svecset(vec, i, jl_type_union(ts, 2));
     }
-    jl_value_t *ans = jl_apply_tuple_type(vec);
+    jl_value_t *ans = jl_apply_tuple_type(vec, 1);
     JL_GC_POP();
     return ans;
 }
@@ -4123,6 +4604,7 @@ jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t *
             memset(env, 0, szb*sizeof(void*));
         e.envsz = szb;
         *ans = intersect_all(a, b, &e);
+        free_stenv(&e);
         if (*ans == jl_bottom_type) goto bot;
         // TODO: code dealing with method signatures is not able to handle unions, so if
         // `a` and `b` are both tuples, we need to be careful and may not return a union,
@@ -4220,8 +4702,263 @@ int jl_subtype_matching(jl_value_t *a, jl_value_t *b, jl_svec_t **penv)
     return sub;
 }
 
+// type utils
+static void check_diagonal(jl_value_t *t, jl_varbinding_t *troot, int param)
+{
+    if (jl_is_uniontype(t)) {
+        int i, len = 0;
+        jl_varbinding_t *v;
+        for (v = troot; v != NULL; v = v->prev)
+            len++;
+        int8_t *occurs = (int8_t *)alloca(len);
+        for (v = troot, i = 0; v != NULL; v = v->prev, i++)
+            occurs[i] = v->occurs_inv | (v->occurs_cov << 2);
+        check_diagonal(((jl_uniontype_t *)t)->a, troot, param);
+        for (v = troot, i = 0; v != NULL; v = v->prev, i++) {
+            int8_t occurs_inv = occurs[i] & 3;
+            int8_t occurs_cov = occurs[i] >> 2;
+            occurs[i] = v->occurs_inv | (v->occurs_cov << 2);
+            v->occurs_inv = occurs_inv;
+            v->occurs_cov = occurs_cov;
+        }
+        check_diagonal(((jl_uniontype_t *)t)->b, troot, param);
+        for (v = troot, i = 0; v != NULL; v = v->prev, i++) {
+            if (v->occurs_inv < (occurs[i] & 3))
+                v->occurs_inv = occurs[i] & 3;
+            if (v->occurs_cov < (occurs[i] >> 2))
+                v->occurs_cov = occurs[i] >> 2;
+        }
+    }
+    else if (jl_is_unionall(t)) {
+        assert(troot != NULL);
+        jl_varbinding_t *v1 = troot, *v2 = troot->prev;
+        while (v2 != NULL) {
+            if (v2->var == ((jl_unionall_t *)t)->var) {
+                v1->prev = v2->prev;
+                break;
+            }
+            v1 = v2;
+            v2 = v2->prev;
+        }
+        check_diagonal(((jl_unionall_t *)t)->body, troot, param);
+        v1->prev = v2;
+    }
+    else if (jl_is_datatype(t)) {
+        int nparam = jl_is_tuple_type(t) ? 1 : 2;
+        if (nparam < param) nparam = param;
+        for (size_t i = 0; i < jl_nparams(t); i++) {
+            check_diagonal(jl_tparam(t, i), troot, nparam);
+        }
+    }
+    else if (jl_is_vararg(t)) {
+        jl_value_t *T = jl_unwrap_vararg(t);
+        jl_value_t *N = jl_unwrap_vararg_num(t);
+        int n = (N && jl_is_long(N)) ? jl_unbox_long(N) : 2;
+        if (T && n > 0) check_diagonal(T, troot, param);
+        if (T && n > 1) check_diagonal(T, troot, param);
+        if (N)          check_diagonal(N, troot, 2);
+    }
+    else if (jl_is_typevar(t)) {
+        jl_varbinding_t *v = troot;
+        for (; v != NULL; v = v->prev) {
+            if (v->var == (jl_tvar_t *)t) {
+                if (param == 1 && v->occurs_cov < 2) v->occurs_cov++;
+                if (param == 2 && v->occurs_inv < 2) v->occurs_inv++;
+                break;
+            }
+        }
+        if (v == NULL)
+            check_diagonal(((jl_tvar_t *)t)->ub, troot, 0);
+    }
+}
+
+static jl_value_t *insert_nondiagonal(jl_value_t *type, jl_varbinding_t *troot, int widen2ub)
+{
+    if (jl_is_typevar(type)) {
+        int concretekind = widen2ub > 1 ? 0 : 1;
+        jl_varbinding_t *v = troot;
+        for (; v != NULL; v = v->prev) {
+            if (v->occurs_inv == 0 &&
+                v->occurs_cov > concretekind &&
+                v->var == (jl_tvar_t *)type)
+                break;
+        }
+        if (v != NULL) {
+            if (widen2ub) {
+                type = insert_nondiagonal(((jl_tvar_t *)type)->ub, troot, 2);
+            }
+            else {
+                // we must replace each covariant occurrence of newvar with a different newvar2<:newvar (diagonal rule)
+                if (v->innervars == NULL)
+                    v->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+                jl_value_t *newvar = NULL, *lb = v->var->lb, *ub = (jl_value_t *)v->var;
+                jl_array_t *innervars = v->innervars;
+                JL_GC_PUSH4(&newvar, &lb, &ub, &innervars);
+                newvar = (jl_value_t *)jl_new_typevar(v->var->name, lb, ub);
+                jl_array_ptr_1d_push(innervars, newvar);
+                JL_GC_POP();
+                type = newvar;
+            }
+        }
+    }
+    else if (jl_is_unionall(type)) {
+        jl_value_t *body = ((jl_unionall_t*)type)->body;
+        jl_tvar_t *var = ((jl_unionall_t*)type)->var;
+        jl_varbinding_t *v = troot;
+        for (; v != NULL; v = v->prev) {
+            if (v->var == var)
+                break;
+        }
+        if (v) v->var = NULL; // Temporarily remove `type->var` from binding list.
+        jl_value_t *newbody = insert_nondiagonal(body, troot, widen2ub);
+        if (v) v->var = var; // And restore it after inner insertation.
+        jl_value_t *newvar = NULL;
+        JL_GC_PUSH3(&newbody, &newvar, &type);
+        if (body == newbody || jl_has_typevar(newbody, var)) {
+            if (body != newbody)
+                type = jl_new_struct(jl_unionall_type, var, newbody);
+            // n.b. we do not widen lb, since that would be the wrong direction
+            newvar = insert_nondiagonal(var->ub, troot, widen2ub);
+            if (newvar != var->ub) {
+                newvar = (jl_value_t*)jl_new_typevar(var->name, var->lb, newvar);
+                newbody = jl_apply_type1(type, newvar);
+                type = jl_type_unionall((jl_tvar_t*)newvar, newbody);
+            }
+        }
+        JL_GC_POP();
+    }
+    else if (jl_is_uniontype(type)) {
+        jl_value_t *a = ((jl_uniontype_t*)type)->a;
+        jl_value_t *b = ((jl_uniontype_t*)type)->b;
+        jl_value_t *newa = NULL;
+        jl_value_t *newb = NULL;
+        JL_GC_PUSH2(&newa, &newb);
+        newa = insert_nondiagonal(a, troot, widen2ub);
+        newb = insert_nondiagonal(b, troot, widen2ub);
+        if (newa != a || newb != b)
+            type = simple_union(newa, newb);
+        JL_GC_POP();
+    }
+    else if (jl_is_vararg(type)) {
+        // As for Vararg we'd better widen it's var to ub as otherwise they are still diagonal
+        jl_value_t *t = jl_unwrap_vararg(type);
+        jl_value_t *n = jl_unwrap_vararg_num(type);
+        if (widen2ub == 0)
+            widen2ub = !(n && jl_is_long(n)) || jl_unbox_long(n) > 1;
+        jl_value_t *newt = insert_nondiagonal(t, troot, widen2ub);
+        if (t != newt) {
+            JL_GC_PUSH1(&newt);
+            type = (jl_value_t *)jl_wrap_vararg(newt, n, 0, 0);
+            JL_GC_POP();
+        }
+    }
+    else if (jl_is_datatype(type)) {
+        if (jl_is_tuple_type(type)) {
+            jl_svec_t *newparams = NULL;
+            jl_value_t *newelt = NULL;
+            JL_GC_PUSH2(&newparams, &newelt);
+            for (size_t i = 0; i < jl_nparams(type); i++) {
+                jl_value_t *elt = jl_tparam(type, i);
+                newelt = insert_nondiagonal(elt, troot, widen2ub);
+                if (elt != newelt) {
+                    if (!newparams)
+                        newparams = jl_svec_copy(((jl_datatype_t*)type)->parameters);
+                    jl_svecset(newparams, i, newelt);
+                }
+            }
+            if (newparams)
+                type = (jl_value_t*)jl_apply_tuple_type(newparams, 1);
+            JL_GC_POP();
+        }
+    }
+    return type;
+}
+
+static jl_value_t *_widen_diagonal(jl_value_t *t, jl_varbinding_t *troot) {
+    check_diagonal(t, troot, 0);
+    int any_concrete = 0;
+    for (jl_varbinding_t *v = troot; v != NULL; v = v->prev)
+        any_concrete |= v->occurs_cov > 1 && v->occurs_inv == 0;
+    if (!any_concrete)
+        return t; // no diagonal
+    return insert_nondiagonal(t, troot, 0);
+}
+
+static jl_value_t *widen_diagonal(jl_value_t *t, jl_unionall_t *u, jl_varbinding_t *troot)
+{
+    jl_varbinding_t vb = { u->var, NULL, NULL, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, troot };
+    jl_value_t *nt = NULL;
+    JL_GC_PUSH2(&vb.innervars, &nt);
+    if (jl_is_unionall(u->body))
+        nt = widen_diagonal(t, (jl_unionall_t *)u->body, &vb);
+    else
+        nt = _widen_diagonal(t, &vb);
+    if (vb.innervars != NULL) {
+        for (size_t i = 0; i < jl_array_nrows(vb.innervars); i++) {
+            jl_tvar_t *var = (jl_tvar_t*)jl_array_ptr_ref(vb.innervars, i);
+            nt = jl_type_unionall(var, nt);
+        }
+    }
+    JL_GC_POP();
+    return nt;
+}
+
+JL_DLLEXPORT jl_value_t *jl_widen_diagonal(jl_value_t *t, jl_unionall_t *ua)
+{
+    return widen_diagonal(t, ua, NULL);
+}
 
 // specificity comparison
+static int count_missing_wrap(jl_value_t *x, jl_typeenv_t *env)
+{
+    if (!jl_has_free_typevars(x))
+        return 0;
+    jl_typeenv_t *wrapped = NULL;
+    int count = 0;
+    for (jl_typeenv_t *env2 = env; env2 != NULL; env2 = env2->prev) {
+        int need_wrap = 0;
+        for (jl_typeenv_t *env3 = wrapped; env3 != NULL && need_wrap == 0; env3 = env3->prev) {
+            if (env3->var == env2->var)
+                need_wrap = -1;
+            else if (jl_has_typevar(env3->var->lb, env2->var) || jl_has_typevar(env3->var->ub, env2->var))
+                need_wrap = 1;
+        }
+        need_wrap = need_wrap == 0 ? jl_has_typevar(x, env2->var) :
+                    need_wrap == -1 ? 0 : 1;
+        if (need_wrap) {
+            count++;
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = env2->var;
+            newenv->val = NULL;
+            newenv->prev = wrapped;
+            wrapped = newenv;
+        }
+    }
+    return count;
+}
+
+static int obvious_subtype_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *subtype, int wrapx, int wrapy)
+{
+    if (wrapx != 0 || wrapy != 0) {
+        int wrap_count = wrapx - wrapy;
+        while (wrap_count > 0 && jl_is_unionall(y))
+        {
+            y = ((jl_unionall_t*)y)->body;
+            wrap_count--;
+        }
+        while (wrap_count < 0 && jl_is_unionall(x))
+        {
+            x = ((jl_unionall_t*)x)->body;
+            wrap_count++;
+        }
+        if (wrap_count > 0) {
+            if (obvious_subtype(jl_unwrap_unionall(x), y, y0, subtype) && !*subtype)
+                return 1;
+            return 0;
+        }
+    }
+    return obvious_subtype(x, y, y0, subtype);
+}
 
 static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, jl_typeenv_t *env)
 {
@@ -4244,12 +4981,14 @@ static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0,
         a = b;
         b = temp;
     }
+    int wrapa = count_missing_wrap(a, env);
+    int wrapb = count_missing_wrap(b, env);
     // first check if a <: b has an obvious answer
     int subtype_ab = 2;
     if (b == (jl_value_t*)jl_any_type || a == jl_bottom_type) {
         subtype_ab = 1;
     }
-    else if (obvious_subtype(a, b, b0, &subtype_ab)) {
+    else if (obvious_subtype_msp(a, b, b0, &subtype_ab, wrapa, wrapb)) {
 #ifdef NDEBUG
         if (subtype_ab == 0)
             return 0;
@@ -4263,7 +5002,7 @@ static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0,
     if (a == (jl_value_t*)jl_any_type || b == jl_bottom_type) {
         subtype_ba = 1;
     }
-    else if (obvious_subtype(b, a, a0, &subtype_ba)) {
+    else if (obvious_subtype_msp(b, a, a0, &subtype_ba, wrapb, wrapa)) {
 #ifdef NDEBUG
         if (subtype_ba == 0)
             return 0;
@@ -4328,7 +5067,9 @@ static int sub_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, jl_typeenv_t *e
         return 1;
     }
     int obvious_sub = 2;
-    if (obvious_subtype(x, y, y0, &obvious_sub)) {
+    int wrapx = count_missing_wrap(x, env);
+    int wrapy = count_missing_wrap(y, env);
+    if (obvious_subtype_msp(x, y, y0, &obvious_sub, wrapx, wrapy)) {
 #ifdef NDEBUG
         return obvious_sub;
 #endif
@@ -4793,6 +5534,26 @@ JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b)
     return type_morespecific_(a, b, a, b, 0, NULL);
 }
 
+// Equivalent to `jl_type_morespecific` of the signatures, except that more recent
+// methods are more specific, iff the methods signatures are type-equal
+JL_DLLEXPORT int jl_method_morespecific(jl_method_t *ma, jl_method_t *mb)
+{
+    jl_value_t *a = (jl_value_t*)ma->sig;
+    jl_value_t *b = (jl_value_t*)mb->sig;
+    if (obviously_disjoint(a, b, 1))
+        return 0;
+    if (jl_has_free_typevars(a) || jl_has_free_typevars(b))
+        return 0;
+    if (jl_subtype(b, a)) {
+        if (jl_types_equal(a, b))
+            return jl_atomic_load_relaxed(&ma->primary_world) > jl_atomic_load_relaxed(&mb->primary_world);
+        return 0;
+    }
+    if (jl_subtype(a, b))
+        return 1;
+    return type_morespecific_(a, b, a, b, 0, NULL);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/support/Makefile b/src/support/Makefile
index 1ee98a4eabdee..7366fb3480142 100644
--- a/src/support/Makefile
+++ b/src/support/Makefile
@@ -3,10 +3,10 @@ JULIAHOME := $(abspath $(SRCDIR)/../..)
 BUILDDIR := .
 include $(JULIAHOME)/Make.inc
 
-JCFLAGS += $(CFLAGS)
-JCXXFLAGS += $(CXXFLAGS)
-JCPPFLAGS += $(CPPFLAGS)
-JLDFLAGS += $(LDFLAGS)
+JCFLAGS_COMMON += $(CFLAGS) $(JL_CFLAGS)
+JCXXFLAGS_COMMON += $(CXXFLAGS) $(JL_CXXFLAGS)
+JCPPFLAGS_COMMON += $(CPPFLAGS) $(JL_CPPFLAGS)
+JLDFLAGS += $(LDFLAGS) $(JL_LDFLAGS)
 
 SRCS := hashing timefuncs ptrhash operators utf8 ios htable bitvector \
 	int2str libsupportinit arraylist strtod rle
@@ -24,12 +24,12 @@ HEADERS := $(wildcard *.h) $(LIBUV_INC)/uv.h
 OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
 
-FLAGS := $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DJL_LIBRARY_EXPORTS_INTERNAL -DUTF8PROC_EXPORTS
-FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
-JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
+FLAGS_COMMON := $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DJL_LIBRARY_EXPORTS_INTERNAL -DUTF8PROC_EXPORTS
+FLAGS_COMMON += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
+JCFLAGS_COMMON += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
 
-DEBUGFLAGS += $(FLAGS)
-SHIPFLAGS += $(FLAGS)
+SHIPFLAGS_COMMON += $(FLAGS_COMMON)
+DEBUGFLAGS_COMMON += $(FLAGS_COMMON)
 
 default: release
 
@@ -48,13 +48,13 @@ $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.S | $(BUILDDIR)
 $(BUILDDIR)/host/Makefile:
 	mkdir -p $(BUILDDIR)/host
 	@# add Makefiles to the build directories for convenience (pointing back to the source location of each)
-	@echo '# -- This file is automatically generated in julia/Makefile -- #' > $@
-	@echo 'BUILDDIR=$(BUILDDIR)/host' >> $@
-	@echo 'BUILDING_HOST_TOOLS=1' >> $@
-	@echo 'include $(SRCDIR)/Makefile' >> $@
+	@printf "%s\n" '# -- This file is automatically generated in julia/Makefile -- #' > $@
+	@printf "%s\n" 'BUILDDIR=$(BUILDDIR)/host' >> $@
+	@printf "%s\n" 'BUILDING_HOST_TOOLS=1' >> $@
+	@printf "%s\n" 'include $(SRCDIR)/Makefile' >> $@
 
-release: $(BUILDDIR)/libsupport.a
-debug: $(BUILDDIR)/libsupport-debug.a
+release: $(BUILDDIR)/libsupport.a regenerate-compile_commands
+debug: $(BUILDDIR)/libsupport-debug.a regenerate-compile_commands
 
 $(BUILDDIR)/libsupport.a: $(OBJS) | $(BUILDIR)
 	rm -rf $@
@@ -70,6 +70,51 @@ $(BUILDDIR)/host/libsupport.a: $(BUILDDIR)/host/Makefile
 $(BUILDDIR)/host/libsupport-debug.a: $(BUILDDIR)/host/Makefile
 	$(MAKE) -C $(BUILDDIR)/host libsupport-debug.a
 
+# Common flag patterns for all clang tooling (clang-sa, clang-tidy, compile-database)
+CLANG_TOOLING_S_FLAGS = $(CLANGSA_FLAGS) $(DEBUGFLAGS_CLANG) $(JCPPFLAGS_CLANG)
+CLANG_TOOLING_C_FLAGS = $(CLANGSA_FLAGS) $(DEBUGFLAGS_CLANG) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG)
+
+# Included files in support
+INCLUDED_SUPPORT_FILES := hashing.c:MurmurHash3.c
+
+# Compilation database generation
+.PHONY: regenerate-compile_commands
+regenerate-compile_commands:
+	TMPFILE=$$(mktemp $(abspath $(BUILDDIR)/compile_commands.json.XXXXXX)); \
+	{ \
+		CLANG_TOOLING_S_FLAGS="$$($(JULIAHOME)/contrib/escape_json.sh clang $(JCPPFLAGS) $(DEBUGFLAGS))"; \
+		CLANG_TOOLING_C_FLAGS="$$($(JULIAHOME)/contrib/escape_json.sh clang $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS))"; \
+		echo "["; \
+		first=true; \
+		for src in $(SRCS); do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			if [ -f $(SRCDIR)/$$src.S ]; then \
+				cmd="$${CLANG_TOOLING_S_FLAGS}, \"$$src.S\""; \
+				file_path="$$src.S"; \
+			else \
+				cmd="$${CLANG_TOOLING_C_FLAGS}, \"$$src.c\""; \
+				file_path="$$src.c"; \
+			fi; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$file_path" "$$cmd"; \
+		done; \
+		for included_pair in $(INCLUDED_SUPPORT_FILES); do \
+			[ "$$first" = "true" ] && first=false || echo ","; \
+			including_file=$${included_pair%%:*}; \
+			included_file=$${included_pair##*:}; \
+			cmd="$${CLANG_TOOLING_C_FLAGS}, \"$$including_file\""; \
+			printf '{\n  "directory": "%s",\n  "file": "%s",\n  "arguments": [%s]\n}' "$(abspath $(SRCDIR))" "$$included_file" "$$cmd"; \
+		done; \
+		echo "]"; \
+	} > $$TMPFILE; \
+	if ! cmp -s $$TMPFILE $(BUILDDIR)/compile_commands.json; then \
+		mv $$TMPFILE $(BUILDDIR)/compile_commands.json; \
+	else \
+		rm -f $$TMPFILE; \
+	fi
+
+compile-database: regenerate-compile_commands
+	@echo "Compilation database created for src/support"
+
 clean:
 	rm -f $(BUILDDIR)/*.o
 	rm -f $(BUILDDIR)/*.dbg.obj
@@ -78,4 +123,7 @@ clean:
 	rm -f $(BUILDDIR)/core*
 	rm -f $(BUILDDIR)/libsupport.a
 	rm -f $(BUILDDIR)/libsupport-debug.a
+	rm -f $(BUILDDIR)/compile_commands.json*
 	rm -f $(BUILDDIR)/host/*
+
+.PHONY: compile-database
diff --git a/src/support/arraylist.h b/src/support/arraylist.h
index 6ad2f0e2f28c9..8d4ef61ba251c 100644
--- a/src/support/arraylist.h
+++ b/src/support/arraylist.h
@@ -5,7 +5,7 @@
 
 #define AL_N_INLINE 29
 
-#define SMALL_AL_N_INLINE 6
+#define SMALL_AL_N_INLINE 5
 
 #ifdef __cplusplus
 extern "C" {
@@ -13,32 +13,33 @@ extern "C" {
 
 #include "analyzer_annotations.h"
 
-typedef struct {
+typedef struct { // 32 words
     size_t len;
     size_t max;
     void **items;
     void *_space[AL_N_INLINE];
 } arraylist_t;
 
-arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT;
-void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT;
 
-void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
-void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT;
 
-typedef struct {
-    uint32_t len;
-    uint32_t max;
+typedef struct { // 8 words
+    size_t len;
+    size_t max;
     void **items;
     void *_space[SMALL_AL_N_INLINE];
 } small_arraylist_t;
 
-small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT;
-void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
 
-void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
-void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void small_arraylist_grow(small_arraylist_t *a, uint32_t n) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
diff --git a/src/support/dtypes.h b/src/support/dtypes.h
index da570921c101c..44db067019cd9 100644
--- a/src/support/dtypes.h
+++ b/src/support/dtypes.h
@@ -25,6 +25,11 @@
 #include <stdlib.h>
 #include <sys/stat.h>
 #define WIN32_LEAN_AND_MEAN
+/* Clang does not like fvisibility=hidden with windows headers. This adds the visibility attribute there.
+   Arguably this is a clang bug. */
+# ifndef _COMPILER_MICROSOFT_
+#  define DECLSPEC_IMPORT __declspec(dllimport) __attribute__ ((visibility("default")))
+# endif
 #include <windows.h>
 
 #if defined(_COMPILER_MICROSOFT_) && !defined(_SSIZE_T_) && !defined(_SSIZE_T_DEFINED)
@@ -37,21 +42,6 @@ typedef intptr_t ssize_t;
 
 #endif /* defined(_COMPILER_MICROSOFT_) && !defined(_SSIZE_T_) && !defined(_SSIZE_T_DEFINED) */
 
-#if !defined(_COMPILER_GCC_)
-
-#define strtoull                                            _strtoui64
-#define strtoll                                             _strtoi64
-#define strcasecmp                                          _stricmp
-#define strncasecmp                                         _strnicmp
-#define snprintf                                            _snprintf
-#define stat                                                _stat
-
-#define STDIN_FILENO                                        0
-#define STDOUT_FILENO                                       1
-#define STDERR_FILENO                                       2
-
-#endif /* !_COMPILER_GCC_ */
-
 #endif /* _OS_WINDOWS_ */
 
 
@@ -71,15 +61,21 @@ typedef intptr_t ssize_t;
 */
 
 #ifdef _OS_WINDOWS_
+# ifndef _COMPILER_MICROSOFT_
+#  define JL_VISIBILITY_DEFAULT __attribute__ ((visibility("default")))
+# else
+#  define JL_VISIBILITY_DEFAULT
+#  define JL_VISIBILITY_HIDDEN
+# endif
 #define STDCALL  __stdcall
 # ifdef JL_LIBRARY_EXPORTS_INTERNAL
-#  define JL_DLLEXPORT __declspec(dllexport)
+#  define JL_DLLEXPORT __declspec(dllexport) JL_VISIBILITY_DEFAULT
 # endif
 # ifdef JL_LIBRARY_EXPORTS_CODEGEN
-#  define JL_DLLEXPORT_CODEGEN __declspec(dllexport)
+#  define JL_DLLEXPORT_CODEGEN __declspec(dllexport) JL_VISIBILITY_DEFAULT
 # endif
 #define JL_HIDDEN
-#define JL_DLLIMPORT   __declspec(dllimport)
+#define JL_DLLIMPORT   __declspec(dllimport) JL_VISIBILITY_DEFAULT
 #else
 #define STDCALL
 #define JL_DLLIMPORT __attribute__ ((visibility("default")))
@@ -96,27 +92,23 @@ typedef intptr_t ssize_t;
 #include <endian.h>
 #define LITTLE_ENDIAN  __LITTLE_ENDIAN
 #define BIG_ENDIAN     __BIG_ENDIAN
-#define PDP_ENDIAN     __PDP_ENDIAN
 #define BYTE_ORDER     __BYTE_ORDER
 #endif
 
-#if defined(__APPLE__) || defined(__FreeBSD__)
+#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__)
 #include <machine/endian.h>
 #define __LITTLE_ENDIAN  LITTLE_ENDIAN
 #define __BIG_ENDIAN     BIG_ENDIAN
-#define __PDP_ENDIAN     PDP_ENDIAN
 #define __BYTE_ORDER     BYTE_ORDER
 #endif
 
 #ifdef _OS_WINDOWS_
 #define __LITTLE_ENDIAN    1234
 #define __BIG_ENDIAN       4321
-#define __PDP_ENDIAN       3412
 #define __BYTE_ORDER       __LITTLE_ENDIAN
 #define __FLOAT_WORD_ORDER __LITTLE_ENDIAN
 #define LITTLE_ENDIAN      __LITTLE_ENDIAN
 #define BIG_ENDIAN         __BIG_ENDIAN
-#define PDP_ENDIAN         __PDP_ENDIAN
 #define BYTE_ORDER         __BYTE_ORDER
 #endif
 
@@ -127,6 +119,9 @@ typedef intptr_t ssize_t;
 #define STATIC_INLINE static inline
 #define FORCE_INLINE static inline __attribute__((always_inline))
 
+#define EXTERN_INLINE_DECLARE inline __attribute__ ((visibility("default")))
+#define EXTERN_INLINE_DEFINE extern inline JL_DLLEXPORT
+
 #if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
 #  define NOINLINE __declspec(noinline)
 #  define NOINLINE_DECL(f) __declspec(noinline) f
diff --git a/src/support/htable.h b/src/support/htable.h
index 4f821493beee8..77de893c835c5 100644
--- a/src/support/htable.h
+++ b/src/support/htable.h
@@ -33,10 +33,10 @@ typedef struct {
 // initialize hash table, reserving space for `size` expected number of
 // elements. (Expect `h->size > size` for efficient occupancy factor.)
 htable_t *htable_new(htable_t *h, size_t size) JL_NOTSAFEPOINT;
-void htable_free(htable_t *h);
+void htable_free(htable_t *h) JL_NOTSAFEPOINT;
 
 // clear and (possibly) change size
-void htable_reset(htable_t *h, size_t sz);
+void htable_reset(htable_t *h, size_t sz) JL_NOTSAFEPOINT;
 
 // Lookup and mutation. See htable.inc for detail.
 #define HTPROT(HTNAME)                                                  \
diff --git a/src/support/ios.c b/src/support/ios.c
index c98c529991642..2d4722f9370a1 100644
--- a/src/support/ios.c
+++ b/src/support/ios.c
@@ -10,6 +10,7 @@
 #include <stdio.h> // for printf
 
 #include "dtypes.h"
+#include "uv.h"
 
 #ifdef _OS_WINDOWS_
 #include <malloc.h>
@@ -210,8 +211,10 @@ static char *_buf_realloc(ios_t *s, size_t sz)
         if (temp == NULL)
             return NULL;
         s->ownbuf = 1;
-        if (s->size > 0)
+        if (s->size > 0) {
+            assert(s->buf != NULL);
             memcpy(temp, s->buf, (size_t)s->size);
+        }
     }
 
     s->buf = temp;
@@ -600,12 +603,12 @@ int ios_eof(ios_t *s)
 {
     if (s->state == bst_rd && s->bpos < s->size)
         return 0;
+    if (s->_eof)
+        return 1;
     if (s->bm == bm_mem)
-        return (s->_eof ? 1 : 0);
+        return 0;
     if (s->fd == -1)
         return 1;
-    if (s->_eof)
-        return 1;
     return 0;
     /*
     if (_fd_available(s->fd))
@@ -615,6 +618,12 @@ int ios_eof(ios_t *s)
     */
 }
 
+void ios_reseteof(ios_t *s)
+{
+    if (s->bm != bm_mem && s->fd != -1)
+        s->_eof = 0;
+}
+
 int ios_eof_blocking(ios_t *s)
 {
     if (s->state == bst_rd && s->bpos < s->size)
@@ -721,8 +730,10 @@ char *ios_take_buffer(ios_t *s, size_t *psize)
         buf = (char*)LLT_ALLOC((size_t)s->size + 1);
         if (buf == NULL)
             return NULL;
-        if (s->size)
+        if (s->size) {
+            assert(s->buf != NULL);
             memcpy(buf, s->buf, (size_t)s->size);
+        }
     }
     else if (s->size == s->maxsize) {
         buf = (char*)LLT_REALLOC(s->buf, (size_t)s->size + 1);
@@ -978,6 +989,15 @@ ios_t *ios_file(ios_t *s, const char *fname, int rd, int wr, int create, int tru
     return NULL;
 }
 
+#ifdef _OS_WINDOWS_
+const wchar_t *ios_utf8_to_wchar(const char *str) {
+    ssize_t wlen = uv_wtf8_length_as_utf16(str);
+    wchar_t *wstr = (wchar_t *)malloc_s(sizeof(wchar_t) * wlen);
+    uv_wtf8_to_utf16(str, wstr, wlen);
+    return wstr;
+}
+#endif // _OS_WINDOWS_
+
 // Portable ios analogue of mkstemp: modifies fname to replace
 // trailing XXXX's with unique ID and returns the file handle s
 // for writing and reading.
@@ -1051,6 +1071,7 @@ ios_t *ios_fd(ios_t *s, long fd, int isfile, int own)
 ios_t *ios_stdin = NULL;
 ios_t *ios_stdout = NULL;
 ios_t *ios_stderr = NULL;
+ios_t *ios_safe_stderr = NULL;
 
 void ios_init_stdstreams(void)
 {
@@ -1064,6 +1085,12 @@ void ios_init_stdstreams(void)
     ios_stderr = (ios_t*)malloc_s(sizeof(ios_t));
     ios_fd(ios_stderr, STDERR_FILENO, 0, 0);
     ios_stderr->bm = bm_none;
+
+    // this 'safe' variant must use `bm_none` to avoid memory allocation
+    // in an async-signal context
+    ios_safe_stderr = (ios_t*)malloc_s(sizeof(ios_t));
+    ios_fd(ios_safe_stderr, STDERR_FILENO, 0, 0);
+    ios_safe_stderr->bm = bm_none;
 }
 
 /* higher level interface */
@@ -1219,7 +1246,9 @@ char *ios_readline(ios_t *s)
     ios_mem(&dest, 0);
     ios_copyuntil(&dest, s, '\n', 1);
     size_t n;
-    return ios_take_buffer(&dest, &n);
+    char * ret = ios_take_buffer(&dest, &n);
+    ios_close(&dest);
+    return ret;
 }
 
 extern int vasprintf(char **strp, const char *fmt, va_list ap);
diff --git a/src/support/ios.h b/src/support/ios.h
index 6eab9e21c45b6..bbaea0014dc81 100644
--- a/src/support/ios.h
+++ b/src/support/ios.h
@@ -14,8 +14,6 @@ extern "C" {
 // this flag controls when data actually moves out to the underlying I/O
 // channel. memory streams are a special case of this where the data
 // never moves out.
-
-//make it compatible with UV Handles
 typedef enum { bm_none=1000, bm_line, bm_block, bm_mem } bufmode_t;
 typedef enum { bst_none, bst_rd, bst_wr } bufstate_t;
 
@@ -88,6 +86,7 @@ extern void (*ios_set_io_wait_func)(int);
 JL_DLLEXPORT size_t ios_read(ios_t *s, char *dest, size_t n) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t ios_readall(ios_t *s, char *dest, size_t n) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t ios_write(ios_t *s, const char *data, size_t n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t ios_write_direct(ios_t *dest, ios_t *src) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int64_t ios_seek(ios_t *s, int64_t pos) JL_NOTSAFEPOINT; // absolute seek
 JL_DLLEXPORT int64_t ios_seek_end(ios_t *s) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int64_t ios_skip(ios_t *s, int64_t offs);  // relative seek
@@ -127,6 +126,7 @@ JL_DLLEXPORT ios_t *ios_fd(ios_t *s, long fd, int isfile, int own);
 extern JL_DLLEXPORT ios_t *ios_stdin;
 extern JL_DLLEXPORT ios_t *ios_stdout;
 extern JL_DLLEXPORT ios_t *ios_stderr;
+extern JL_DLLEXPORT ios_t *ios_safe_stderr; // safe for async-signal context
 void ios_init_stdstreams(void);
 
 /* high-level functions - output */
@@ -153,6 +153,10 @@ int ios_ungetc(int c, ios_t *s);
 //wint_t ios_ungetwc(ios_t *s, wint_t wc);
 #define ios_puts(str, s) ios_write(s, str, strlen(str))
 
+#ifdef _OS_WINDOWS_
+const wchar_t *ios_utf8_to_wchar(const char *str);
+#endif
+
 /*
   With memory streams, mixed reads and writes are equivalent to performing
   sequences of *p++, as either an lvalue or rvalue. File streams behave
diff --git a/src/support/platform.h b/src/support/platform.h
index 56f8cafbc89fa..c11e5237fca9d 100644
--- a/src/support/platform.h
+++ b/src/support/platform.h
@@ -8,7 +8,7 @@
  * based of compiler-specific pre-defined macros. It is based on the
  * information that can be found at the following address:
  *
- *     http://sourceforge.net/p/predef/wiki/Home/
+ *     https://sourceforge.net/p/predef/wiki/Home/
  *
  * Possible values include:
  *      Compiler:
@@ -16,6 +16,7 @@
  *          _COMPILER_GCC_
  *      OS:
  *          _OS_FREEBSD_
+ *          _OS_OPENBSD_
  *          _OS_LINUX_
  *          _OS_WINDOWS_
  *          _OS_DARWIN_
@@ -26,6 +27,7 @@
  *          _CPU_X86_64_
  *          _CPU_AARCH64_
  *          _CPU_ARM_
+ *          _CPU_RISCV64_
  *          _CPU_WASM_
  */
 
@@ -33,12 +35,12 @@
 *                               Compiler                                       *
 *******************************************************************************/
 
-#if defined(__clang__)
+#if defined(_MSC_VER)
+#define _COMPILER_MICROSOFT_
+#elif defined(__clang__)
 #define _COMPILER_CLANG_
 #elif defined(__GNUC__)
 #define _COMPILER_GCC_
-#elif defined(_MSC_VER)
-#define _COMPILER_MICROSOFT_
 #else
 #error Unsupported compiler
 #endif
@@ -81,6 +83,8 @@
 
 #if defined(__FreeBSD__)
 #define _OS_FREEBSD_
+#elif defined(__OpenBSD__)
+#define _OS_OPENBSD_
 #elif defined(__linux__)
 #define _OS_LINUX_
 #elif defined(_WIN32) || defined(_WIN64)
@@ -103,6 +107,8 @@
 #define _CPU_AARCH64_
 #elif defined(__arm__) || defined(_M_ARM)
 #define _CPU_ARM_
+#elif defined(__riscv) && __riscv_xlen == 64
+#define _CPU_RISCV64_
 #elif defined(__PPC64__)
 #define _CPU_PPC64_
 #elif defined(_ARCH_PPC)
diff --git a/src/support/strptime.c b/src/support/strptime.c
index ab75ee05ee8db..27c86c9e4f2b8 100644
--- a/src/support/strptime.c
+++ b/src/support/strptime.c
@@ -134,7 +134,7 @@ static const char * const nadt[5] = {
 
 /*
  * Table to determine the ordinal date for the start of a month.
- * Ref: http://en.wikipedia.org/wiki/ISO_week_date
+ * Ref: https://en.wikipedia.org/wiki/ISO_week_date
  */
 static const int start_of_month[2][13] = {
 	/* non-leap year */
@@ -147,7 +147,7 @@ static const int start_of_month[2][13] = {
  * Calculate the week day of the first day of a year. Valid for
  * the Gregorian calendar, which began Sept 14, 1752 in the UK
  * and its colonies. Ref:
- * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
+ * https://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
  */
 
 static int
diff --git a/src/support/strtod.c b/src/support/strtod.c
index 24f556d0c086b..e0ad1bf33435a 100644
--- a/src/support/strtod.c
+++ b/src/support/strtod.c
@@ -11,7 +11,7 @@
 extern "C" {
 #endif
 
-#if !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_) && !defined(__OpenBSD__)
 // This code path should be used for systems that support the strtod_l function
 
 // Cache locale object
diff --git a/src/support/utf8.c b/src/support/utf8.c
index 42a420fb0c499..46a6515e9b753 100644
--- a/src/support/utf8.c
+++ b/src/support/utf8.c
@@ -27,11 +27,10 @@
 
 #ifdef _OS_WINDOWS_
 #include <malloc.h>
-#define snprintf _snprintf
 #else
-#ifndef __FreeBSD__
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__)
 #include <alloca.h>
-#endif /* __FreeBSD__ */
+#endif /* !__FreeBSD__ && !__OpenBSD__ */
 #endif
 #include <assert.h>
 
@@ -410,7 +409,7 @@ int u8_escape_wchar(char *buf, size_t sz, uint32_t ch)
 }
 
 size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
-                 int escape_quotes, int ascii)
+                 const char *escapes, int ascii)
 {
     size_t i = *pi, i0;
     uint32_t ch;
@@ -420,12 +419,9 @@ size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
 
     while (i<end && buf<blim) {
         // sz-11: leaves room for longest escape sequence
-        if (escape_quotes && src[i] == '"') {
-            buf += buf_put2c(buf, "\\\"");
-            i++;
-        }
-        else if (src[i] == '\\') {
-            buf += buf_put2c(buf, "\\\\");
+        if ((src[i] == '\\') || (escapes && strchr(escapes, src[i]))) {
+            *buf++ = '\\';
+            *buf++ = src[i];
             i++;
         }
         else {
@@ -571,8 +567,8 @@ int u8_isvalid(const char *str, size_t len)
             return 0;
         // Check for surrogate chars
         if (byt == 0xed && *pnt > 0x9f) return 0;
-	// Check for overlong encoding
-	if (byt == 0xe0 && *pnt < 0xa0) return 0;
+        // Check for overlong encoding
+        if (byt == 0xe0 && *pnt < 0xa0) return 0;
         pnt += 2;
     } else {                        // 4-byte sequence
         // Must have 3 valid continuation characters
diff --git a/src/support/utf8.h b/src/support/utf8.h
index 1d8e31c043838..eab86f602ee61 100644
--- a/src/support/utf8.h
+++ b/src/support/utf8.h
@@ -12,7 +12,7 @@ extern "C" {
 /* is c the start of a utf8 sequence? */
 #define isutf(c) (((c)&0xC0)!=0x80)
 
-#define UEOF ((uint32_t)-1)
+#define UEOF (UINT32_MAX)
 
 /* convert UTF-8 data to wide character */
 size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
@@ -63,7 +63,7 @@ int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
 
    sz is buf size in bytes. must be at least 12.
 
-   if escape_quotes is nonzero, quote characters will be escaped.
+   if escapes is given, given characters will also be escaped (in addition to \\).
 
    if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives.
 
@@ -75,7 +75,7 @@ int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
    returns number of bytes placed in buf, including a NUL terminator.
 */
 size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
-                 int escape_quotes, int ascii);
+                 const char *escapes, int ascii);
 
 /* utility predicates used by the above */
 int octal_digit(char c);
diff --git a/src/support/win32-clang-ABI-bug/optional b/src/support/win32-clang-ABI-bug/optional
new file mode 100644
index 0000000000000..fd2f7646e1766
--- /dev/null
+++ b/src/support/win32-clang-ABI-bug/optional
@@ -0,0 +1,532 @@
+//===- optional.h - Simple variant for passing optional values --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///  This file provides optional, a template class modeled in the spirit of
+///  OCaml's 'opt' variant.  The idea is to strongly type whether or not
+///  a value can be optional.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef JL_OPTIONAL_H
+#define JL_OPTIONAL_H
+
+//#include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <new>
+#include <utility>
+#include <type_traits>
+#include "llvm-version.h"
+
+namespace std {
+
+//#include "llvm/ADT/None.h"
+/// A simple null object to allow implicit construction of optional<T>
+/// and similar types without having to spell out the specialization's name.
+// (constant value 1 in an attempt to workaround MSVC build issue... )
+enum class nullopt_t { nullopt = 1 };
+const nullopt_t nullopt = nullopt_t::nullopt;
+
+class raw_ostream;
+
+namespace optional_detail {
+
+/// Storage for any type.
+//
+// The specialization condition intentionally uses
+// llvm::is_trivially_{copy/move}_constructible instead of
+// std::is_trivially_{copy/move}_constructible. GCC versions prior to 7.4 may
+// instantiate the copy/move constructor of `T` when
+// std::is_trivially_{copy/move}_constructible is instantiated.  This causes
+// compilation to fail if we query the trivially copy/move constructible
+// property of a class which is not copy/move constructible.
+//
+// The current implementation of OptionalStorage insists that in order to use
+// the trivial specialization, the value_type must be trivially copy
+// constructible and trivially copy assignable due to =default implementations
+// of the copy/move constructor/assignment.  It does not follow that this is
+// necessarily the case std::is_trivially_copyable is true (hence the expanded
+// specialization condition).
+//
+// The move constructible / assignable conditions emulate the remaining behavior
+// of std::is_trivially_copyable.
+template <typename T,
+          bool = (std::is_trivially_copy_constructible<T>::value &&
+                  std::is_trivially_copy_assignable<T>::value &&
+                  (std::is_trivially_move_constructible<T>::value ||
+                   !std::is_move_constructible<T>::value) &&
+                  (std::is_trivially_move_assignable<T>::value ||
+                   !std::is_move_assignable<T>::value))>
+class OptionalStorage {
+  union {
+    char empty;
+    T val;
+  };
+  bool hasVal = false;
+
+public:
+  ~OptionalStorage() { reset(); }
+
+  constexpr OptionalStorage() noexcept : empty() {}
+
+  constexpr OptionalStorage(OptionalStorage const &other) : OptionalStorage() {
+    if (other.has_value()) {
+      emplace(other.val);
+    }
+  }
+  constexpr OptionalStorage(OptionalStorage &&other) : OptionalStorage() {
+    if (other.has_value()) {
+      emplace(std::move(other.val));
+    }
+  }
+
+  template <class... Args>
+  constexpr explicit OptionalStorage(in_place_t, Args &&...args)
+      : val(std::forward<Args>(args)...), hasVal(true) {}
+
+  void reset() noexcept {
+    if (hasVal) {
+      val.~T();
+      hasVal = false;
+    }
+  }
+
+  constexpr bool has_value() const noexcept { return hasVal; }
+  constexpr bool hasValue() const noexcept { return hasVal; }
+
+  T &value() &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  T &getValue() &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  constexpr T const &value() const &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  constexpr T const &getValue() const &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  T &&value() &&noexcept {
+    assert(hasVal);
+    return std::move(val);
+  }
+  T &&getValue() &&noexcept {
+    assert(hasVal);
+    return std::move(val);
+  }
+
+  template <class... Args> void emplace(Args &&...args) {
+    reset();
+    ::new ((void *)std::addressof(val)) T(std::forward<Args>(args)...);
+    hasVal = true;
+  }
+
+  OptionalStorage &operator=(T const &y) {
+    if (has_value()) {
+      val = y;
+    } else {
+      ::new ((void *)std::addressof(val)) T(y);
+      hasVal = true;
+    }
+    return *this;
+  }
+  OptionalStorage &operator=(T &&y) {
+    if (has_value()) {
+      val = std::move(y);
+    } else {
+      ::new ((void *)std::addressof(val)) T(std::move(y));
+      hasVal = true;
+    }
+    return *this;
+  }
+
+  OptionalStorage &operator=(OptionalStorage const &other) {
+    if (other.has_value()) {
+      if (has_value()) {
+        val = other.val;
+      } else {
+        ::new ((void *)std::addressof(val)) T(other.val);
+        hasVal = true;
+      }
+    } else {
+      reset();
+    }
+    return *this;
+  }
+
+  OptionalStorage &operator=(OptionalStorage &&other) {
+    if (other.has_value()) {
+      if (has_value()) {
+        val = std::move(other.val);
+      } else {
+        ::new ((void *)std::addressof(val)) T(std::move(other.val));
+        hasVal = true;
+      }
+    } else {
+      reset();
+    }
+    return *this;
+  }
+};
+
+template <typename T> class OptionalStorage<T, true> {
+  union {
+    char empty;
+    T val;
+  };
+  bool hasVal = false;
+
+public:
+  ~OptionalStorage() = default;
+
+  constexpr OptionalStorage() noexcept : empty{} {}
+
+  constexpr OptionalStorage(OptionalStorage const &other) = default;
+  constexpr OptionalStorage(OptionalStorage &&other) = default;
+
+  OptionalStorage &operator=(OptionalStorage const &other) = default;
+  OptionalStorage &operator=(OptionalStorage &&other) = default;
+
+  template <class... Args>
+  constexpr explicit OptionalStorage(in_place_t, Args &&...args)
+      : val(std::forward<Args>(args)...), hasVal(true) {}
+
+  void reset() noexcept {
+    if (hasVal) {
+      val.~T();
+      hasVal = false;
+    }
+  }
+
+  constexpr bool has_value() const noexcept { return hasVal; }
+  constexpr bool hasValue() const noexcept { return hasVal; }
+
+  T &value() &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  T &getValue() &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  constexpr T const &value() const &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  constexpr T const &getValue() const &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  T &&value() &&noexcept {
+    assert(hasVal);
+    return std::move(val);
+  }
+  T &&getValue() &&noexcept {
+    assert(hasVal);
+    return std::move(val);
+  }
+
+  template <class... Args> void emplace(Args &&...args) {
+    reset();
+    ::new ((void *)std::addressof(val)) T(std::forward<Args>(args)...);
+    hasVal = true;
+  }
+
+  OptionalStorage &operator=(T const &y) {
+    if (has_value()) {
+      val = y;
+    } else {
+      ::new ((void *)std::addressof(val)) T(y);
+      hasVal = true;
+    }
+    return *this;
+  }
+  OptionalStorage &operator=(T &&y) {
+    if (has_value()) {
+      val = std::move(y);
+    } else {
+      ::new ((void *)std::addressof(val)) T(std::move(y));
+      hasVal = true;
+    }
+    return *this;
+  }
+};
+
+} // namespace optional_detail
+
+template <typename T> class optional {
+  optional_detail::OptionalStorage<T> Storage;
+
+public:
+  using value_type = T;
+
+  constexpr optional() = default;
+  constexpr optional(nullopt_t) {}
+
+  constexpr optional(const T &y) : Storage(in_place, y) {}
+  constexpr optional(const optional &O) = default;
+
+  constexpr optional(T &&y) : Storage(in_place, std::move(y)) {}
+  constexpr optional(optional &&O) = default;
+
+  template <typename... ArgTypes>
+  constexpr optional(in_place_t, ArgTypes &&...Args)
+      : Storage(in_place, std::forward<ArgTypes>(Args)...) {}
+
+  optional &operator=(T &&y) {
+    Storage = std::move(y);
+    return *this;
+  }
+  optional &operator=(optional &&O) = default;
+
+  /// Create a new object by constructing it in place with the given arguments.
+  template <typename... ArgTypes> void emplace(ArgTypes &&... Args) {
+    Storage.emplace(std::forward<ArgTypes>(Args)...);
+  }
+
+  static constexpr optional create(const T *y) {
+    return y ? optional(*y) : optional();
+  }
+
+  optional &operator=(const T &y) {
+    Storage = y;
+    return *this;
+  }
+  optional &operator=(const optional &O) = default;
+
+  void reset() { Storage.reset(); }
+
+  constexpr const T *getPointer() const { return &Storage.value(); }
+  T *getPointer() { return &Storage.value(); }
+  constexpr const T &value() const & { return Storage.value(); }
+  constexpr const T &getValue() const & { return Storage.value(); }
+  T &value() & { return Storage.value(); }
+  T &getValue() & { return Storage.value(); }
+
+  constexpr explicit operator bool() const { return has_value(); }
+  constexpr bool has_value() const { return Storage.has_value(); }
+  constexpr bool hasValue() const { return Storage.has_value(); }
+  constexpr const T *operator->() const { return getPointer(); }
+  T *operator->() { return getPointer(); }
+  constexpr const T &operator*() const & { return value(); }
+  T &operator*() & { return value(); }
+
+  template <typename U> constexpr T value_or(U &&alt) const & {
+    return has_value() ? value() : std::forward<U>(alt);
+  }
+  template <typename U>
+  [[deprecated("Use value_or instead.")]] constexpr T
+  getValueOr(U &&alt) const & {
+    return has_value() ? value() : std::forward<U>(alt);
+  }
+
+  /// Apply a function to the value if present; otherwise return nullopt.
+  template <class Function>
+  auto map(const Function &F) const & -> optional<decltype(F(value()))> {
+    if (*this)
+      return F(value());
+    return nullopt;
+  }
+
+  T &&value() && { return std::move(Storage.value()); }
+  T &&getValue() && { return std::move(Storage.value()); }
+  T &&operator*() && { return std::move(Storage.value()); }
+
+  template <typename U> T value_or(U &&alt) && {
+    return has_value() ? std::move(value()) : std::forward<U>(alt);
+  }
+  template <typename U>
+  [[deprecated("Use value_or instead.")]] T getValueOr(U &&alt) && {
+    return has_value() ? std::move(value()) : std::forward<U>(alt);
+  }
+
+  /// Apply a function to the value if present; otherwise return nullopt.
+  template <class Function>
+  auto map(const Function &F)
+      && -> optional<decltype(F(std::move(*this).value()))> {
+    if (*this)
+      return F(std::move(*this).value());
+    return nullopt;
+  }
+};
+
+//template <class T> llvm::hash_code hash_value(const optional<T> &O) {
+//  return O ? hash_combine(true, *O) : hash_value(false);
+//}
+
+template <typename T, typename U>
+constexpr bool operator==(const optional<T> &X, const optional<U> &Y) {
+  if (X && Y)
+    return *X == *Y;
+  return X.has_value() == Y.has_value();
+}
+
+template <typename T, typename U>
+constexpr bool operator!=(const optional<T> &X, const optional<U> &Y) {
+  return !(X == Y);
+}
+
+template <typename T, typename U>
+constexpr bool operator<(const optional<T> &X, const optional<U> &Y) {
+  if (X && Y)
+    return *X < *Y;
+  return X.has_value() < Y.has_value();
+}
+
+template <typename T, typename U>
+constexpr bool operator<=(const optional<T> &X, const optional<U> &Y) {
+  return !(Y < X);
+}
+
+template <typename T, typename U>
+constexpr bool operator>(const optional<T> &X, const optional<U> &Y) {
+  return Y < X;
+}
+
+template <typename T, typename U>
+constexpr bool operator>=(const optional<T> &X, const optional<U> &Y) {
+  return !(X < Y);
+}
+
+template <typename T>
+constexpr bool operator==(const optional<T> &X, nullopt_t) {
+  return !X;
+}
+
+template <typename T>
+constexpr bool operator==(nullopt_t, const optional<T> &X) {
+  return X == nullopt;
+}
+
+template <typename T>
+constexpr bool operator!=(const optional<T> &X, nullopt_t) {
+  return !(X == nullopt);
+}
+
+template <typename T>
+constexpr bool operator!=(nullopt_t, const optional<T> &X) {
+  return X != nullopt;
+}
+
+template <typename T> constexpr bool operator<(const optional<T> &, nullopt_t) {
+  return false;
+}
+
+template <typename T> constexpr bool operator<(nullopt_t, const optional<T> &X) {
+  return X.has_value();
+}
+
+template <typename T>
+constexpr bool operator<=(const optional<T> &X, nullopt_t) {
+  return !(nullopt < X);
+}
+
+template <typename T>
+constexpr bool operator<=(nullopt_t, const optional<T> &X) {
+  return !(X < nullopt);
+}
+
+template <typename T> constexpr bool operator>(const optional<T> &X, nullopt_t) {
+  return nullopt < X;
+}
+
+template <typename T> constexpr bool operator>(nullopt_t, const optional<T> &X) {
+  return X < nullopt;
+}
+
+template <typename T>
+constexpr bool operator>=(const optional<T> &X, nullopt_t) {
+  return nullopt <= X;
+}
+
+template <typename T>
+constexpr bool operator>=(nullopt_t, const optional<T> &X) {
+  return X <= nullopt;
+}
+
+template <typename T>
+constexpr bool operator==(const optional<T> &X, const T &Y) {
+  return X && *X == Y;
+}
+
+template <typename T>
+constexpr bool operator==(const T &X, const optional<T> &Y) {
+  return Y && X == *Y;
+}
+
+template <typename T>
+constexpr bool operator!=(const optional<T> &X, const T &Y) {
+  return !(X == Y);
+}
+
+template <typename T>
+constexpr bool operator!=(const T &X, const optional<T> &Y) {
+  return !(X == Y);
+}
+
+template <typename T>
+constexpr bool operator<(const optional<T> &X, const T &Y) {
+  return !X || *X < Y;
+}
+
+template <typename T>
+constexpr bool operator<(const T &X, const optional<T> &Y) {
+  return Y && X < *Y;
+}
+
+template <typename T>
+constexpr bool operator<=(const optional<T> &X, const T &Y) {
+  return !(Y < X);
+}
+
+template <typename T>
+constexpr bool operator<=(const T &X, const optional<T> &Y) {
+  return !(Y < X);
+}
+
+template <typename T>
+constexpr bool operator>(const optional<T> &X, const T &Y) {
+  return Y < X;
+}
+
+template <typename T>
+constexpr bool operator>(const T &X, const optional<T> &Y) {
+  return Y < X;
+}
+
+template <typename T>
+constexpr bool operator>=(const optional<T> &X, const T &Y) {
+  return !(X < Y);
+}
+
+template <typename T>
+constexpr bool operator>=(const T &X, const optional<T> &Y) {
+  return !(X < Y);
+}
+
+raw_ostream &operator<<(raw_ostream &OS, nullopt_t);
+
+template <typename T, typename = decltype(std::declval<raw_ostream &>()
+                                          << std::declval<const T &>())>
+raw_ostream &operator<<(raw_ostream &OS, const optional<T> &O) {
+  if (O)
+    OS << *O;
+  else
+    OS << nullopt;
+  return OS;
+}
+
+} // end namespace
+
+#endif // JL_OPTIONAL_H
diff --git a/src/symbol.c b/src/symbol.c
index c9c0c0e533924..86f19f0f0e21c 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -15,6 +15,7 @@
 extern "C" {
 #endif
 
+uv_mutex_t symtab_lock;
 static _Atomic(jl_sym_t*) symtab = NULL;
 
 #define MAX_SYM_LEN ((size_t)INTPTR_MAX - sizeof(jl_taggedvalue_t) - sizeof(jl_sym_t) - 1)
@@ -28,16 +29,17 @@ static uintptr_t hash_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
 
 static size_t symbol_nbytes(size_t len) JL_NOTSAFEPOINT
 {
-    return (sizeof(jl_taggedvalue_t) + sizeof(jl_sym_t) + len + 1 + 7) & -8;
+    return ((sizeof(jl_sym_t) + len + 1 + 7) & -8);
 }
 
 static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
 {
-    jl_sym_t *sym;
     size_t nb = symbol_nbytes(len);
-    jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc_nolock(nb, 0, sizeof(void*), 0);
-    sym = (jl_sym_t*)jl_valueof(tag);
-    // set to old marked so that we won't look at it in the GC or write barrier.
+    jl_task_t *ct = jl_current_task;
+    // jl_sym_t is an object and needs to be allocated with jl_gc_permobj
+    // but its type is set below with jl_set_typetagof since
+    // jl_symbol_type might not have been initialized
+    jl_sym_t *sym = (jl_sym_t*)jl_gc_permobj(ct->ptls, nb, NULL, sizeof(void*));
     jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED);
     jl_atomic_store_relaxed(&sym->left, NULL);
     jl_atomic_store_relaxed(&sym->right, NULL);
@@ -86,15 +88,15 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT // (or throw)
     _Atomic(jl_sym_t*) *slot;
     jl_sym_t *node = symtab_lookup(&symtab, str, len, &slot);
     if (node == NULL) {
-        uv_mutex_lock(&gc_perm_lock);
+        uv_mutex_lock(&symtab_lock);
         // Someone might have updated it, check and look up again
         if (jl_atomic_load_relaxed(slot) != NULL && (node = symtab_lookup(slot, str, len, &slot))) {
-            uv_mutex_unlock(&gc_perm_lock);
+            uv_mutex_unlock(&symtab_lock);
             return node;
         }
         node = mk_symbol(str, len);
         jl_atomic_store_release(slot, node);
-        uv_mutex_unlock(&gc_perm_lock);
+        uv_mutex_unlock(&symtab_lock);
     }
     return node;
 }
@@ -129,7 +131,7 @@ JL_DLLEXPORT jl_sym_t *jl_gensym(void)
 {
     char name[16];
     char *n;
-    uint32_t ctr = jl_atomic_fetch_add(&gs_ctr, 1);
+    uint32_t ctr = jl_atomic_fetch_add_relaxed(&gs_ctr, 1);
     n = uint2str(&name[2], sizeof(name)-2, ctr, 10);
     *(--n) = '#'; *(--n) = '#';
     return jl_symbol(n);
@@ -153,7 +155,7 @@ JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len)
     name[1] = '#';
     name[2 + len] = '#';
     memcpy(name + 2, str, len);
-    uint32_t ctr = jl_atomic_fetch_add(&gs_ctr, 1);
+    uint32_t ctr = jl_atomic_fetch_add_relaxed(&gs_ctr, 1);
     n = uint2str(gs_name, sizeof(gs_name), ctr, 10);
     memcpy(name + 3 + len, n, sizeof(gs_name) - (n - gs_name));
     jl_sym_t *sym = _jl_symbol(name, alloc_len - (n - gs_name)- 1);
diff --git a/src/sys.c b/src/sys.c
index d55c5df7ab066..e6875e6a34f1d 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -102,7 +102,6 @@ JL_DLLEXPORT int32_t jl_nb_available(ios_t *s)
 
 // --- dir/file stuff ---
 
-JL_DLLEXPORT int jl_sizeof_uv_fs_t(void) { return sizeof(uv_fs_t); }
 JL_DLLEXPORT char *jl_uv_fs_t_ptr(uv_fs_t *req) { return (char*)req->ptr; }
 JL_DLLEXPORT char *jl_uv_fs_t_path(uv_fs_t *req) { return (char*)req->path; }
 
@@ -280,14 +279,15 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
             return str;
         }
         a = jl_alloc_array_1d(jl_array_uint8_type, n - nchomp);
-        memcpy(jl_array_data(a), s->buf + s->bpos, n - nchomp);
+        memcpy(jl_array_data(a, uint8_t), s->buf + s->bpos, n - nchomp);
         s->bpos += n;
     }
     else {
         a = jl_alloc_array_1d(jl_array_uint8_type, 80);
         ios_t dest;
         ios_mem(&dest, 0);
-        ios_setbuf(&dest, (char*)a->data, 80, 0);
+        char *mem = jl_array_data(a, char);
+        ios_setbuf(&dest, (char*)mem, 80, 0);
         size_t n = ios_copyuntil(&dest, s, delim, 1);
         if (chomp && n > 0 && dest.buf[n - 1] == delim) {
             n--;
@@ -298,13 +298,11 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
             assert(truncret == 0);
             (void)truncret; // ensure the variable is used to avoid warnings
         }
-        if (dest.buf != a->data) {
+        if (dest.buf != mem) {
             a = jl_take_buffer(&dest);
         }
         else {
-            a->length = n;
-            a->nrows = n;
-            ((char*)a->data)[n] = '\0';
+            a->dimsize[0] = n;
         }
         if (str) {
             JL_GC_PUSH1(&a);
@@ -463,7 +461,7 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
 #elif defined(_OS_WINDOWS_)
     //Try to get WIN7 API method
     GAPC gapc;
-    if (jl_dlsym(jl_kernel32_handle, "GetActiveProcessorCount", (void **)&gapc, 0)) {
+    if (jl_dlsym(jl_kernel32_handle, "GetActiveProcessorCount", (void **)&gapc, 0, 0)) {
         return gapc(ALL_PROCESSOR_GROUPS);
     }
     else { //fall back on GetSystemInfo
@@ -479,25 +477,10 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT int jl_effective_threads(void) JL_NOTSAFEPOINT
 {
-    int cpu = jl_cpu_threads();
-    int masksize = uv_cpumask_size();
-    if (masksize < 0 || jl_running_under_rr(0))
-        return cpu;
-    uv_thread_t tid = uv_thread_self();
-    char *cpumask = (char *)calloc(masksize, sizeof(char));
-    int err = uv_thread_getaffinity(&tid, cpumask, masksize);
-    if (err) {
-        free(cpumask);
-        jl_safe_printf("WARNING: failed to get thread affinity (%s %d)\n", uv_err_name(err),
-                       err);
-        return cpu;
-    }
-    int n = 0;
-    for (size_t i = 0; i < masksize; i++) {
-        n += cpumask[i];
-    }
-    free(cpumask);
-    return n < cpu ? n : cpu;
+    // We want the more conservative estimate of the two.
+    int cpu_threads = jl_cpu_threads();
+    int available_parallelism = uv_available_parallelism();
+    return available_parallelism < cpu_threads ? available_parallelism : cpu_threads;
 }
 
 
@@ -513,8 +496,8 @@ JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT
 #ifdef __APPLE__
 #include <crt_externs.h>
 #else
-#if !defined(_OS_WINDOWS_) || defined(_COMPILER_GCC_)
-extern char **environ;
+#if !defined(_OS_WINDOWS_) || (defined(_COMPILER_GCC_) && defined(_POSIX_C_SOURCE))
+extern JL_DLLIMPORT char **environ;
 #endif
 #endif
 
@@ -632,10 +615,42 @@ JL_DLLEXPORT long jl_SC_CLK_TCK(void)
 #ifndef _OS_WINDOWS_
     return sysconf(_SC_CLK_TCK);
 #else
-    return 0;
+    return 1000; /* uv_cpu_info returns times in ms on Windows */
 #endif
 }
 
+#ifdef _OS_OPENBSD_
+// Helper for jl_pathname_for_handle()
+struct dlinfo_data {
+    void       *searched;
+    const char *result;
+};
+
+static int dlinfo_helper(struct dl_phdr_info *info, size_t size, void *vdata)
+{
+    struct dlinfo_data *data = (struct dlinfo_data *)vdata;
+    void *handle;
+
+    /* ensure dl_phdr_info at compile-time to be compatible with the one at runtime */
+    if (sizeof(*info) < size)
+        return -1;
+
+    /* dlopen the name */
+    handle = dlopen(info->dlpi_name, RTLD_LAZY | RTLD_NOLOAD);
+    if (handle == NULL)
+        return 0;
+
+    /* check if the opened library is the same as the searched handle */
+    if (data->searched == handle)
+        data->result = info->dlpi_name;
+
+    dlclose(handle);
+
+    /* continue if still not found */
+    return (data->result != NULL);
+}
+#endif
+
 // Takes a handle (as returned from dlopen()) and returns the absolute path to the image loaded
 JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
 {
@@ -678,6 +693,14 @@ JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
     free(pth16);
     return filepath;
 
+#elif defined(_OS_OPENBSD_)
+    struct dlinfo_data data = {
+        .searched = handle,
+        .result = NULL,
+    };
+    dl_iterate_phdr(&dlinfo_helper, &data);
+    return data.result;
+
 #else // Linux, FreeBSD, ...
 
     struct link_map *map;
@@ -748,26 +771,11 @@ JL_DLLEXPORT jl_sym_t *jl_get_ARCH(void) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT size_t jl_maxrss(void)
 {
-#if defined(_OS_WINDOWS_)
-    PROCESS_MEMORY_COUNTERS counter;
-    GetProcessMemoryInfo( GetCurrentProcess( ), &counter, sizeof(counter) );
-    return (size_t)counter.PeakWorkingSetSize;
-
-// FIXME: `rusage` is available on OpenBSD, DragonFlyBSD and NetBSD as well.
-//        All of them return `ru_maxrss` in kilobytes.
-#elif defined(_OS_LINUX_) || defined(_OS_DARWIN_) || defined (_OS_FREEBSD_)
-    struct rusage rusage;
-    getrusage( RUSAGE_SELF, &rusage );
-
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
-    return (size_t)(rusage.ru_maxrss * 1024);
-#else
-    return (size_t)rusage.ru_maxrss;
-#endif
-
-#else
-    return (size_t)0;
-#endif
+    uv_rusage_t rusage;
+    if (uv_getrusage(&rusage) == 0) {
+        return rusage.ru_maxrss * 1024;
+    }
+    return 0;
 }
 
 // Simple `rand()` like function, with global seed and added thread-safety
@@ -776,13 +784,12 @@ static _Atomic(uint64_t) g_rngseed;
 JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT
 {
     uint64_t max = UINT64_MAX;
-    uint64_t unbias = UINT64_MAX;
     uint64_t rngseed0 = jl_atomic_load_relaxed(&g_rngseed);
     uint64_t rngseed;
     uint64_t rnd;
     do {
         rngseed = rngseed0;
-        rnd = cong(max, unbias, &rngseed);
+        rnd = cong(max, &rngseed);
     } while (!jl_atomic_cmpswap_relaxed(&g_rngseed, &rngseed0, rngseed));
     return rnd;
 }
diff --git a/src/task.c b/src/task.c
index 1dab8688cb079..18d21b2343053 100644
--- a/src/task.c
+++ b/src/task.c
@@ -5,15 +5,6 @@
   lightweight processes (symmetric coroutines)
 */
 
-// need this to get the real definition of ucontext_t,
-// if we're going to use the ucontext_t implementation there
-//#if defined(__APPLE__) && defined(JL_HAVE_UCONTEXT)
-//#pragma push_macro("_XOPEN_SOURCE")
-//#define _XOPEN_SOURCE
-//#include <ucontext.h>
-//#pragma pop_macro("_XOPEN_SOURCE")
-//#endif
-
 // this is needed for !COPY_STACKS to work on linux
 #ifdef _FORTIFY_SOURCE
 // disable __longjmp_chk validation so that we can jump between stacks
@@ -37,32 +28,43 @@
 #include "threading.h"
 #include "julia_assert.h"
 
+#ifdef _COMPILER_TSAN_ENABLED_
+#include <sanitizer/tsan_interface.h>
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #if defined(_COMPILER_ASAN_ENABLED_)
-static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) {
+#if __GLIBC__
+#include <dlfcn.h>
+// Bypass the ASAN longjmp wrapper - we are unpoisoning the stack ourselves,
+// since ASAN normally unpoisons far too much.
+// c.f. interceptor in jl_dlopen as well
+void (*real_siglongjmp)(jmp_buf _Buf, int _Value) = NULL;
+#endif
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) {
     if (to->copy_stack)
-        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+        __sanitizer_start_switch_fiber(&from->asan_fake_stack, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize);
     else
-        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, to->stkbuf, to->bufsz);
+        __sanitizer_start_switch_fiber(&from->asan_fake_stack, to->stkbuf, to->bufsz);
 }
-static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) {
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) {
     if (to->copy_stack)
-        __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+        __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize);
     else
         __sanitizer_start_switch_fiber(NULL, to->stkbuf, to->bufsz);
 }
-static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) {
-    __sanitizer_finish_switch_fiber(current->ctx.asan_fake_stack, NULL, NULL);
+static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) {
+    __sanitizer_finish_switch_fiber(current->asan_fake_stack, NULL, NULL);
         //(const void**)&last->stkbuf,
         //&last->bufsz);
 }
 #else
-static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT {}
-static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) JL_NOTSAFEPOINT {}
-static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) JL_NOTSAFEPOINT {}
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) JL_NOTSAFEPOINT {}
 #endif
 
 #if defined(_COMPILER_TSAN_ENABLED_)
@@ -78,19 +80,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
         jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
         __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
     } while (0)
-#ifdef COPY_STACKS
-#define tsan_destroy_copyctx(_ptls, _ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        if (_tsan_macro_ctx != &(_ptls)->root_task->ctx) { \
-            __tsan_destroy_fiber(_tsan_macro_ctx->tsan_state); \
-        } \
-        _tsan_macro_ctx->tsan_state = NULL; \
-    } while (0)
-#define tsan_switch_to_copyctx(_ctx) do { \
-        struct jl_stack_context_t *_tsan_macro_ctx = (_ctx); \
-        __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
-    } while (0)
-#endif
 #else
 // just do minimal type-checking on the arguments
 #define tsan_destroy_ctx(_ptls, _ctx) do { \
@@ -101,16 +90,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
         jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
         (void)_tsan_macro_ctx; \
     } while (0)
-#ifdef COPY_STACKS
-#define tsan_destroy_copyctx(_ptls, _ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        (void)_tsan_macro_ctx; \
-    } while (0)
-#define tsan_switch_to_copyctx(_ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        (void)_tsan_macro_ctx; \
-    } while (0)
-#endif
 #endif
 
 // empirically, jl_finish_task needs about 64k stack space to infer/run
@@ -127,18 +106,10 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
 #define ROOT_TASK_STACK_ADJUSTMENT 3000000
 #endif
 
-#ifdef JL_HAVE_ASYNCIFY
-// Switching logic is implemented in JavaScript
-#define STATIC_OR_JS JL_DLLEXPORT
-#else
-#define STATIC_OR_JS static
-#endif
-
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT;
-STATIC_OR_JS void jl_set_fiber(jl_ucontext_t *t);
-STATIC_OR_JS void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t);
-STATIC_OR_JS void jl_start_fiber_swap(jl_ucontext_t *savet, jl_ucontext_t *t);
-STATIC_OR_JS void jl_start_fiber_set(jl_ucontext_t *t);
+static void jl_set_fiber(jl_ucontext_t *t);
+static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t);
+static void jl_start_fiber_swap(jl_ucontext_t *savet, jl_ucontext_t *t);
+static void jl_start_fiber_set(jl_ucontext_t *t);
 
 #ifdef ALWAYS_COPY_STACKS
 # ifndef COPY_STACKS
@@ -197,7 +168,7 @@ static void JL_NO_ASAN JL_NO_MSAN memcpy_stack_a16(uint64_t *to, uint64_t *from,
     memcpy_noasan((char*)to_addr, (char*)from_addr, shadow_nb);
     memcpy_a16_noasan(jl_assume_aligned(to, 16), jl_assume_aligned(from, 16), nb);
 #elif defined(_COMPILER_MSAN_ENABLED_)
-# warning This function is imcompletely implemented for MSAN (TODO).
+# warning This function is incompletely implemented for MSAN (TODO).
     memcpy((char*)jl_assume_aligned(to, 16), (char*)jl_assume_aligned(from, 16), nb);
 #else
     memcpy((char*)jl_assume_aligned(to, 16), (char*)jl_assume_aligned(from, 16), nb);
@@ -214,17 +185,17 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     assert(stackbase > frame_addr);
     size_t nb = stackbase - frame_addr;
     void *buf;
-    if (lastt->bufsz < nb) {
-        asan_free_copy_stack(lastt->stkbuf, lastt->bufsz);
+    if (lastt->ctx.bufsz < nb) {
+        asan_free_copy_stack(lastt->ctx.stkbuf, lastt->ctx.bufsz);
         buf = (void*)jl_gc_alloc_buf(ptls, nb);
-        lastt->stkbuf = buf;
-        lastt->bufsz = nb;
+        lastt->ctx.stkbuf = buf;
+        lastt->ctx.bufsz = nb;
     }
     else {
-        buf = lastt->stkbuf;
+        buf = lastt->ctx.stkbuf;
     }
     *pt = NULL; // clear the gc-root for the target task before copying the stack for saving
-    lastt->copy_stack = nb;
+    lastt->ctx.copy_stack = nb;
     lastt->sticky = 1;
     memcpy_stack_a16((uint64_t*)buf, (uint64_t*)frame_addr, nb);
     // this task's stack could have been modified after
@@ -233,145 +204,166 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     jl_gc_wb_back(lastt);
 }
 
-JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, char *p)
+JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
 {
     size_t nb = t->copy_stack;
     char *_x = (char*)ptls->stackbase - nb;
     if (!p) {
         // switch to a stackframe that's beyond the bounds of the last switch
-        p = _x;
-        if ((char*)&_x > _x) {
-            p = (char*)alloca((char*)&_x - _x);
+        p = _x - 4096;
+        if ((char*)&_x > p) {
+            p = (char*)alloca((char*)&_x - p);
         }
         restore_stack(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
     }
     void *_y = t->stkbuf;
     assert(_x != NULL && _y != NULL);
+#if defined(_OS_WINDOWS_) // this platform does not implement CFI_NORETURN correctly or at all in libunwind (or equivalent) which requires a workaround
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+    void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
+    assert(*return_address == __builtin_return_address(0));
+    *return_address = NULL;
+#else
+#pragma message("warning: CFI_NORETURN not implemented for this platform, so profiling of copy_stacks may segfault in this build")
+#endif
+#else
+CFI_NORETURN
+#endif
     memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
 
 #if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx.copy_ctx);
+    jl_setcontext(t->copy_ctx);
 #else
-    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+    jl_longjmp(t->copy_ctx->uc_mcontext, 1);
 #endif
     abort(); // unreachable
 }
 
-JL_NO_ASAN static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
+JL_NO_ASAN static void restore_stack2(jl_ucontext_t *t, jl_ptls_t ptls, jl_ucontext_t *lastt)
 {
     assert(t->copy_stack && !lastt->copy_stack);
     size_t nb = t->copy_stack;
-    char *_x = (char*)ptls->stackbase - nb;
-    void *_y = t->stkbuf;
-    assert(_x != NULL && _y != NULL);
-    memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
-#if defined(JL_HAVE_UNW_CONTEXT)
+    if (nb > 1) {
+        char *_x = (char*)ptls->stackbase - nb;
+        void *_y = t->stkbuf;
+        assert(_x != NULL && _y != NULL);
+        memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb);
+    }
+#if defined(_OS_WINDOWS_)
+    // jl_swapcontext and setjmp are the same on Windows, so we can just use jl_swapcontext directly
+    tsan_switch_to_ctx(t);
+    jl_swapcontext(lastt->ctx, t->copy_ctx);
+#else
+#if defined(JL_TASK_SWITCH_LIBUNWIND)
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx.ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
-#elif defined(JL_HAVE_ASM) || defined(JL_HAVE_SIGALTSTACK) || defined(_OS_WINDOWS_)
-    if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0))
+#elif defined(JL_TASK_SWITCH_ASM)
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
 #else
 #error COPY_STACKS is incompatible with this platform
 #endif
-    tsan_switch_to_copyctx(&t->ctx);
-#if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx.copy_ctx);
+    tsan_switch_to_ctx(t);
+    jl_longjmp(t->copy_ctx->uc_mcontext, 1);
+#endif
+}
+
+JL_NO_ASAN static void NOINLINE restore_stack3(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
+{
+#if !defined(JL_TASK_SWITCH_ASM)
+    char *_x = (char*)ptls->stackbase;
+    if (!p) {
+        // switch to a stackframe that's well beyond the bounds of the next switch
+        p = _x - 4096;
+        if ((char*)&_x > p) {
+            p = (char*)alloca((char*)&_x - p);
+        }
+        restore_stack3(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
+    }
+#endif
+#if defined(_OS_WINDOWS_) // this platform does not implement CFI_NORETURN correctly or at all in libunwind (or equivalent) which requires a workaround
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+    void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
+    assert(*return_address == __builtin_return_address(0));
+    *return_address = NULL;
+#endif
 #else
-    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+CFI_NORETURN
 #endif
+    tsan_switch_to_ctx(t);
+    jl_start_fiber_set(t); // (doesn't return)
+    abort();
 }
+
 #endif
 
 /* Rooted by the base module */
-static _Atomic(jl_function_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL;
+static _Atomic(jl_value_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL;
 
-void JL_NORETURN jl_finish_task(jl_task_t *t)
+void JL_NORETURN jl_finish_task(jl_task_t *ct)
 {
-    jl_task_t *ct = jl_current_task;
     JL_PROBE_RT_FINISH_TASK(ct);
     JL_SIGATOMIC_BEGIN();
-    if (jl_atomic_load_relaxed(&t->_isexception))
-        jl_atomic_store_release(&t->_state, JL_TASK_STATE_FAILED);
+    if (ct->metrics_enabled) {
+        // [task] user_time -finished-> wait_time
+        assert(jl_atomic_load_relaxed(&ct->first_enqueued_at) != 0);
+        uint64_t now = jl_hrtime();
+        jl_atomic_store_relaxed(&ct->finished_at, now);
+        jl_atomic_fetch_add_relaxed(&ct->running_time_ns, now - jl_atomic_load_relaxed(&ct->last_started_running_at));
+    }
+    if (jl_atomic_load_relaxed(&ct->_isexception))
+        jl_atomic_store_release(&ct->_state, JL_TASK_STATE_FAILED);
     else
-        jl_atomic_store_release(&t->_state, JL_TASK_STATE_DONE);
-    if (t->copy_stack) { // early free of stkbuf
-        asan_free_copy_stack(t->stkbuf, t->bufsz);
-        t->stkbuf = NULL;
+        jl_atomic_store_release(&ct->_state, JL_TASK_STATE_DONE);
+    if (ct->ctx.copy_stack) { // early free of stkbuf
+        asan_free_copy_stack(ct->ctx.stkbuf, ct->ctx.bufsz);
+        ct->ctx.stkbuf = NULL;
     }
     // ensure that state is cleared
     ct->ptls->in_finalizer = 0;
     ct->ptls->in_pure_callback = 0;
     ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
     // let the runtime know this task is dead and find a new task to run
-    jl_function_t *done = jl_atomic_load_relaxed(&task_done_hook_func);
+    jl_value_t *done = jl_atomic_load_relaxed(&task_done_hook_func);
     if (done == NULL) {
-        done = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("task_done_hook"));
+        done = (jl_value_t*)jl_get_global_value(jl_base_module, jl_symbol("task_done_hook"), ct->world_age);
         if (done != NULL)
             jl_atomic_store_release(&task_done_hook_func, done);
     }
     if (done != NULL) {
-        jl_value_t *args[2] = {done, (jl_value_t*)t};
+        jl_value_t *args[2] = {done, (jl_value_t*)ct};
         JL_TRY {
             jl_apply(args, 2);
         }
         JL_CATCH {
-            jl_no_exc_handler(jl_current_exception(), ct);
+            jl_no_exc_handler(jl_current_exception(ct), ct);
         }
     }
-    jl_gc_debug_critical_error();
+    jl_gc_debug_fprint_critical_error(ios_safe_stderr);
     abort();
 }
 
-JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid)
-{
-    size_t off = 0;
-#ifndef _OS_WINDOWS_
-    jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
-    if (ptls0->root_task == task) {
-        // See jl_init_root_task(). The root task of the main thread
-        // has its buffer enlarged by an artificial 3000000 bytes, but
-        // that means that the start of the buffer usually points to
-        // inaccessible memory. We need to correct for this.
-        off = ROOT_TASK_STACK_ADJUSTMENT;
-    }
-#endif
-    jl_ptls_t ptls2 = task->ptls;
-    *ptid = -1;
-    if (ptls2) {
-        *ptid = jl_atomic_load_relaxed(&task->tid);
-#ifdef COPY_STACKS
-        if (task->copy_stack) {
-            *size = ptls2->stacksize;
-            return (char *)ptls2->stackbase - *size;
-        }
-#endif
-    }
-    *size = task->bufsz - off;
-    return (void *)((char *)task->stkbuf + off);
-}
-
 JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
                                        char **active_start, char **active_end,
                                        char **total_start, char **total_end)
 {
-    if (!task->started) {
+    if (!task->ctx.started) {
         *total_start = *active_start = 0;
         *total_end = *active_end = 0;
         return;
     }
 
     jl_ptls_t ptls2 = task->ptls;
-    if (task->copy_stack && ptls2) {
+    if (task->ctx.copy_stack && ptls2) {
         *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize;
         *total_end = *active_end = (char*)ptls2->stackbase;
     }
-    else if (task->stkbuf) {
-        *total_start = *active_start = (char*)task->stkbuf;
+    else if (task->ctx.stkbuf) {
+        *total_start = *active_start = (char*)task->ctx.stkbuf;
 #ifndef _OS_WINDOWS_
         jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
         if (ptls0->root_task == task) {
@@ -384,12 +376,12 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
         }
 #endif
 
-        *total_end = *active_end = (char*)task->stkbuf + task->bufsz;
+        *total_end = *active_end = (char*)task->ctx.stkbuf + task->ctx.bufsz;
 #ifdef COPY_STACKS
         // save_stack stores the stack of an inactive task in stkbuf, and the
         // actual number of used bytes in copy_stack.
-        if (task->copy_stack > 1)
-            *active_end = (char*)task->stkbuf + task->copy_stack;
+        if (task->ctx.copy_stack > 1)
+            *active_end = (char*)task->ctx.stkbuf + task->ctx.copy_stack;
 #endif
     }
     else {
@@ -450,20 +442,16 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
 #endif
 
     int killed = jl_atomic_load_relaxed(&lastt->_state) != JL_TASK_STATE_RUNNABLE;
-    if (!t->started && !t->copy_stack) {
+    if (!t->ctx.started && !t->ctx.copy_stack) {
         // may need to allocate the stack
-        if (t->stkbuf == NULL) {
-            t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
-            if (t->stkbuf == NULL) {
+        if (t->ctx.stkbuf == NULL) {
+            t->ctx.stkbuf = jl_malloc_stack(&t->ctx.bufsz, t);
+            if (t->ctx.stkbuf == NULL) {
 #ifdef COPY_STACKS
                 // fall back to stack copying if mmap fails
-                t->copy_stack = 1;
+                t->ctx.copy_stack = 1;
+                t->ctx.bufsz = 0;
                 t->sticky = 1;
-                t->bufsz = 0;
-                if (always_copy_stacks)
-                    memcpy(&t->ctx.copy_ctx, &ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
-                else
-                    memcpy(&t->ctx.ctx, &ptls->base_ctx, sizeof(t->ctx.ctx));
 #else
                 jl_throw(jl_memory_exception);
 #endif
@@ -471,28 +459,45 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
         }
     }
 
+    union {
+        _jl_ucontext_t ctx;
+        jl_stack_context_t copy_ctx;
+    } lasttstate;
+
     if (killed) {
         *pt = NULL; // can't fail after here: clear the gc-root for the target task now
         lastt->gcstack = NULL;
         lastt->eh = NULL;
-        if (!lastt->copy_stack && lastt->stkbuf) {
+        if (!lastt->ctx.copy_stack && lastt->ctx.stkbuf) {
             // early free of stkbuf back to the pool
             jl_release_task_stack(ptls, lastt);
         }
     }
     else {
+        if (lastt->ctx.copy_stack) { // save the old copy-stack
+#ifdef _OS_WINDOWS_
+            lasttstate.copy_ctx.uc_stack.ss_sp = (char*)ptls->stackbase - ptls->stacksize;
+            lasttstate.copy_ctx.uc_stack.ss_size = ptls->stacksize;
+#endif
 #ifdef COPY_STACKS
-        if (lastt->copy_stack) { // save the old copy-stack
-            save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
-            if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0)) {
-                sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
-                // TODO: mutex unlock the thread we just switched from
+            if (jl_setjmp(lasttstate.copy_ctx.uc_mcontext, 0)) {
+#ifdef MIGRATE_TASKS
+                ptls = lastt->ptls;
+#endif
+                lastt->ctx.copy_ctx = NULL;
+                sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx);
                 return;
             }
-        }
-        else
+            save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
+            lastt->ctx.copy_ctx = &lasttstate.copy_ctx;
+#else
+            abort();
 #endif
-        *pt = NULL; // can't fail after here: clear the gc-root for the target task now
+        }
+        else {
+            *pt = NULL; // can't fail after here: clear the gc-root for the target task now
+            lastt->ctx.ctx = &lasttstate.ctx;
+        }
     }
 
     // set up global state for new task and clear global state for old task
@@ -507,41 +512,44 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
     ptls->previous_task = lastt;
 #endif
 
-    if (t->started) {
+    if (t->ctx.started) {
+        if (t->ctx.copy_stack) {
 #ifdef COPY_STACKS
-        if (t->copy_stack) {
-            if (lastt->copy_stack) {
+            if (lastt->ctx.copy_stack) {
                 // Switching from copystack to copystack. Clear any shadow stack
                 // memory above the saved shadow stack.
-                uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->copy_stack;
+                uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->ctx.copy_stack;
                 uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
                 if (stackbottom < stacktop)
-                    asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+                    asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
+            }
+            if (!killed && !lastt->ctx.copy_stack) {
+                sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+                restore_stack2(&t->ctx, ptls, &lastt->ctx); // half jl_swap_fiber and half restore_stack
             }
-            if (!killed && !lastt->copy_stack) {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
-                restore_stack2(t, ptls, lastt);
-            } else {
-                tsan_switch_to_copyctx(&t->ctx);
+            else {
+                tsan_switch_to_ctx(&t->ctx);
                 if (killed) {
-                    sanitizer_start_switch_fiber_killed(ptls, t);
-                    tsan_destroy_copyctx(ptls, &lastt->ctx);
-                } else {
-                    sanitizer_start_switch_fiber(ptls, lastt, t);
+                    sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
+                    tsan_destroy_ctx(ptls, &lastt->ctx);
+                }
+                else {
+                    sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
                 }
 
-                if (lastt->copy_stack) {
-                    restore_stack(t, ptls, NULL); // (doesn't return)
+                if (lastt->ctx.copy_stack) {
+                    restore_stack(&t->ctx, ptls, NULL); // (doesn't return)
+                    abort();
                 }
                 else {
-                    restore_stack(t, ptls, (char*)1); // (doesn't return)
+                    restore_stack(&t->ctx, ptls, (char*)1); // (doesn't return)
+                    abort();
                 }
             }
-        }
-        else
 #endif
-        {
-            if (lastt->copy_stack) {
+        }
+        else {
+            if (lastt->ctx.copy_stack) {
                 // Switching away from a copystack to a non-copystack. Clear
                 // the whole shadow stack now, because otherwise we won't know
                 // how much stack memory to clear the next time we switch to
@@ -550,22 +558,23 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
                 uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
                 // We're not restoring the stack, but we still need to unpoison the
                 // stack, so it starts with a pristine stack.
-                asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+                asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
             }
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_set_fiber(&t->ctx); // (doesn't return)
                 abort(); // unreachable
             }
             else {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
-                if (lastt->copy_stack) {
+                sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+                if (lastt->ctx.copy_stack) {
                     // Resume at the jl_setjmp earlier in this function,
                     // don't do a full task swap
                     tsan_switch_to_ctx(&t->ctx);
                     jl_set_fiber(&t->ctx); // (doesn't return)
+                    abort();
                 }
                 else {
                     jl_swap_fiber(&lastt->ctx, &t->ctx);
@@ -574,41 +583,58 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
         }
     }
     else {
-        if (lastt->copy_stack) {
+#ifdef _COMPILER_TSAN_ENABLED_
+        t->ctx.tsan_state = __tsan_create_fiber(0);
+#endif
+        if (lastt->ctx.copy_stack) {
             uintptr_t stacktop = (uintptr_t)ptls->stackbase;
             uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
             // We're not restoring the stack, but we still need to unpoison the
             // stack, so it starts with a pristine stack.
-            asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+            asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
         }
-        if (t->copy_stack && always_copy_stacks) {
+        if (t->ctx.copy_stack) {
+#ifdef COPY_STACKS
             tsan_switch_to_ctx(&t->ctx);
+            // create a temporary non-copy_stack context for starting this fiber
+            jl_ucontext_t ctx = t->ctx;
+            ctx.ctx = NULL;
+            ctx.stkbuf = (char*)ptls->stackbase - ptls->stacksize;
+            ctx.bufsz = ptls->stacksize;
+            ctx.copy_stack = 0;
+            ctx.started = 0;
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
-            } else {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
+                if (lastt->ctx.copy_stack)
+                    restore_stack3(&ctx, ptls, NULL); // (doesn't return)
+                else
+                    jl_start_fiber_set(&ctx);
+                abort();
+            }
+            sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+            if (lastt->ctx.copy_stack) {
+                restore_stack3(&ctx, ptls, NULL); // (doesn't return)
+                abort();
+            }
+            else {
+                jl_start_fiber_swap(&lastt->ctx, &ctx);
             }
-#ifdef COPY_STACKS
-#if defined(_OS_WINDOWS_)
-            jl_setcontext(&t->ctx.copy_ctx);
 #else
-            jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
-#endif
+            abort();
 #endif
-            abort(); // unreachable
         }
         else {
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
             }
-            sanitizer_start_switch_fiber(ptls, lastt, t);
-            if (lastt->copy_stack) {
-                // Resume at the jl_setjmp earlier in this function
+            sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+            if (lastt->ctx.copy_stack) {
+                // copy_stack resumes at the jl_setjmp earlier in this function, so don't swap here
                 tsan_switch_to_ctx(&t->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
@@ -618,7 +644,14 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
             }
         }
     }
-    sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
+
+#ifdef MIGRATE_TASKS
+    ptls = lastt->ptls;
+#endif
+    assert(ptls);
+    assert(lastt == jl_atomic_load_relaxed(&ptls->current_task));
+    lastt->ctx.ctx = NULL;
+    sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx);
 }
 
 JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
@@ -630,7 +663,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
         return;
     }
     int8_t gc_state = jl_gc_unsafe_enter(ptls);
-    if (t->started && t->stkbuf == NULL)
+    if (t->ctx.started && t->ctx.stkbuf == NULL)
         jl_error("attempt to switch to exited task");
     if (ptls->in_finalizer)
         jl_error("task switch not allowed from inside gc finalizer");
@@ -655,7 +688,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
     ptls->previous_task = NULL;
     assert(t != ct);
     assert(jl_atomic_load_relaxed(&t->tid) == ptls->tid);
-    if (!t->sticky && !t->copy_stack)
+    if (!t->sticky && !t->ctx.copy_stack)
         jl_atomic_store_release(&t->tid, -1);
 #else
     assert(ptls == ct->ptls);
@@ -688,12 +721,40 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
     // NULL exception objects are used when rethrowing. we don't have a handler to process
     // the exception stack, so at least report the exception at the top of the stack.
     if (!e)
-        e = jl_current_exception();
+        e = jl_current_exception(ct);
 
-    jl_printf((JL_STREAM*)STDERR_FILENO, "fatal: error thrown and no exception handler available.\n");
-    jl_static_show((JL_STREAM*)STDERR_FILENO, e);
-    jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-    jlbacktrace(); // written to STDERR_FILENO
+    // Write error to memory first
+    ios_t s;
+    ios_mem(&s, 1024);
+    jl_safe_fprintf(&s, "fatal: error thrown and no exception handler available.\n");
+    jl_static_show((JL_STREAM*)&s, e);
+    jl_safe_fprintf(&s, "\n");
+    jl_fprint_backtrace(&s);
+
+    // Then to STDERR
+    ios_write_direct(ios_stderr, &s);
+
+    // Finally write to system log (if supported)
+#ifdef _OS_WINDOWS_
+    HANDLE event_source = RegisterEventSourceW(NULL, L"julia");
+    if (event_source != INVALID_HANDLE_VALUE) {
+        ios_putc('\0', &s);
+        const wchar_t *strings[] = { ios_utf8_to_wchar(s.buf) };
+        ReportEventW(
+            event_source, EVENTLOG_ERROR_TYPE, /* category */ 0, /* event_id */ (DWORD)0xE0000000L,
+           /* user_sid */ NULL, /* n_strings */ 1, /* data_size */ 0, strings, /* data */ NULL
+        );
+        free((void *)strings[0]);
+
+        if (jl_options.alert_on_critical_error) {
+            MessageBoxW(NULL, /* message */ L"fatal: error thrown and no exception handler available\n\n"
+                                            L"See Application log in Event Viewer for more information.",
+                        /* title */ L"fatal error in libjulia", MB_OK | MB_ICONEXCLAMATION | MB_SYSTEMMODAL);
+        }
+    }
+#endif
+
+    ios_close(&s);
     if (ct == NULL)
         jl_raise(6);
     jl_exit(1);
@@ -712,48 +773,31 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
 #define pop_timings_stack() /* Nothing */
 #endif
 
-#define throw_internal_body(altstack)                                          \
-    assert(!jl_get_safe_restore());                                            \
-    jl_ptls_t ptls = ct->ptls;                                                 \
-    ptls->io_wait = 0;                                                         \
-    jl_gc_unsafe_enter(ptls);                                                  \
-    if (exception) {                                                           \
-        /* The temporary ptls->bt_data is rooted by special purpose code in the\
-           GC. This exists only for the purpose of preserving bt_data until we \
-           set ptls->bt_size=0 below. */                                       \
-        jl_push_excstack(&ct->excstack, exception,                             \
-                          ptls->bt_data, ptls->bt_size);                       \
-        ptls->bt_size = 0;                                                     \
-    }                                                                          \
-    assert(ct->excstack && ct->excstack->top);                                 \
-    jl_handler_t *eh = ct->eh;                                                 \
-    if (eh != NULL) {                                                          \
-        if (altstack) ptls->sig_exception = NULL;                              \
-        pop_timings_stack()                                                    \
-        asan_unpoison_task_stack(ct, &eh->eh_ctx);                             \
-        jl_longjmp(eh->eh_ctx, 1);                                             \
-    }                                                                          \
-    else {                                                                     \
-        jl_no_exc_handler(exception, ct);                                      \
-    }                                                                          \
-    assert(0);
-
 static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
 {
-CFI_NORETURN
     JL_GC_PUSH1(&exception);
-    throw_internal_body(0);
-    jl_unreachable();
-}
-
-/* On the signal stack, we don't want to create any asan frames, but we do on the
-   normal, stack, so we split this function in two, depending on which context
-   we're calling it in. This also lets us avoid making a GC frame on the altstack,
-   which might end up getting corrupted if we recur here through another signal. */
-JL_NO_ASAN static void JL_NORETURN throw_internal_altstack(jl_task_t *ct, jl_value_t *exception)
-{
-CFI_NORETURN
-    throw_internal_body(1);
+    jl_ptls_t ptls = ct->ptls;
+    ptls->io_wait = 0;
+    jl_gc_unsafe_enter(ptls);
+    if (exception) {
+        /* The temporary ptls->bt_data is rooted by special purpose code in the\
+           GC. This exists only for the purpose of preserving bt_data until we
+           set ptls->bt_size=0 below. */
+        jl_push_excstack(ct, &ct->excstack, exception,
+                         ptls->bt_data, ptls->bt_size);
+        ptls->bt_size = 0;
+    }
+    assert(ct->excstack && ct->excstack->top);
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        pop_timings_stack()
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        jl_longjmp(eh->eh_ctx, 1);
+    }
+    else {
+        jl_no_exc_handler(exception, ct);
+    }
+    assert(0);
     jl_unreachable();
 }
 
@@ -783,24 +827,6 @@ JL_DLLEXPORT void jl_rethrow(void)
     throw_internal(ct, NULL);
 }
 
-// Special case throw for errors detected inside signal handlers.  This is not
-// (cannot be) called directly in the signal handler itself, but is returned to
-// after the signal handler exits.
-JL_DLLEXPORT JL_NO_ASAN void JL_NORETURN jl_sig_throw(void)
-{
-CFI_NORETURN
-    jl_jmp_buf *safe_restore = jl_get_safe_restore();
-    jl_task_t *ct = jl_current_task;
-    if (safe_restore) {
-        asan_unpoison_task_stack(ct, safe_restore);
-        jl_longjmp(*safe_restore, 1);
-    }
-    jl_ptls_t ptls = ct->ptls;
-    jl_value_t *e = ptls->sig_exception;
-    JL_GC_PROMISE_ROOTED(e);
-    throw_internal_altstack(ct, e);
-}
-
 JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED)
 {
     // TODO: Should uses of `rethrow(exc)` be replaced with a normal throw, now
@@ -855,153 +881,182 @@ The jl_rng_split function forks a task's RNG state in a way that is essentially
 guaranteed to avoid collisions between the RNG streams of all tasks. The main
 RNG is the xoshiro256++ RNG whose state is stored in rngState[0..3]. There is
 also a small internal RNG used for task forking stored in rngState[4]. This
-state is used to iterate a LCG (linear congruential generator), which is then
-put through four different variations of the strongest PCG output function,
-referred to as PCG-RXS-M-XS-64 [1]. This output function is invertible: it maps
-a 64-bit state to 64-bit output; which is one of the reasons it's not
-recommended for general purpose RNGs unless space is at a premium, but in our
-usage invertibility is actually a benefit, as is explained below.
+state is used to iterate a linear congruential generator (LCG), which is then
+combined with xoshiro256's state and put through four different variations of
+the strongest PCG output function, referred to as PCG-RXS-M-XS-64 [1].
 
 The goal of jl_rng_split is to perturb the state of each child task's RNG in
-such a way each that for an entire tree of tasks spawned starting with a given
-state in a root task, no two tasks have the same RNG state. Moreover, we want to
-do this in a way that is deterministic and repeatable based on (1) the root
-task's seed, (2) how many random numbers are generated, and (3) the task tree
-structure. The RNG state of a parent task is allowed to affect the initial RNG
-state of a child task, but the mere fact that a child was spawned should not
-alter the RNG output of the parent. This second requirement rules out using the
-main RNG to seed children -- some separate state must be maintained and changed
-upon forking a child task while leaving the main RNG state unchanged.
-
-The basic approach is that used by the DotMix [2] and SplitMix [3] RNG systems:
-each task is uniquely identified by a sequence of "pedigree" numbers, indicating
-where in the task tree it was spawned. This vector of pedigree coordinates is
-then reduced to a single value by computing a dot product with a common vector
-of random weights. The DotMix paper provides a proof that this dot product hash
-value (referred to as a "compression function") is collision resistant in the
-sense the the pairwise collision probability of two distinct tasks is 1/N where
-N is the number of possible weight values. Both DotMix and SplitMix use a prime
-value of N because the proof requires that the difference between two distinct
-pedigree coordinates must be invertible, which is guaranteed by N being prime.
-We take a different approach: we instead limit pedigree coordinates to being
-binary instead -- when a task spawns a child, both tasks share the same pedigree
-prefix, with the parent appending a zero and the child appending a one. This way
-a binary pedigree vector uniquely identifies each task. Moreover, since the
-coordinates are binary, the difference between coordinates is always one which
-is its own inverse regardless of whether N is prime or not. This allows us to
-compute the dot product modulo 2^64 using native machine arithmetic, which is
-considerably more efficient and simpler to implement than arithmetic in a prime
-modulus. It also means that when accumulating the dot product incrementally, as
-described in SplitMix, we don't need to multiply weights by anything, we simply
-add the random weight for the current task tree depth to the parent's dot
-product to derive the child's dot product.
-
-We use the LCG in rngState[4] to derive generate pseudorandom weights for the
-dot product. Each time a child is forked, we update the LCG in both parent and
-child tasks. In the parent, that's all we have to do -- the main RNG state
-remains unchanged (recall that spawning a child should *not* affect subsequence
-RNG draws in the parent). The next time the parent forks a child, the dot
-product weight used will be different, corresponding to being a level deeper in
-the binary task tree. In the child, we use the LCG state to generate four
-pseudorandom 64-bit weights (more below) and add each weight to one of the
-xoshiro256 state registers, rngState[0..3]. If we assume the main RNG remains
-unused in all tasks, then each register rngState[0..3] accumulates a different
-Dot/SplitMix dot product hash as additional child tasks are spawned. Each one is
-collision resistant with a pairwise collision chance of only 1/2^64. Assuming
-that the four pseudorandom 64-bit weight streams are sufficiently independent,
-the pairwise collision probability for distinct tasks is 1/2^256. If we somehow
-managed to spawn a trillion tasks, the probability of a collision would be on
-the order of 1/10^54. Practically impossible. Put another way, this is the same
-as the probability of two SHA256 hash values accidentally colliding, which we
-generally consider so unlikely as not to be worth worrying about.
-
-What about the random "junk" that's in the xoshiro256 state registers from
-normal use of the RNG? For a tree of tasks spawned with no intervening samples
-taken from the main RNG, all tasks start with the same junk which doesn't affect
-the chance of collision. The Dot/SplitMix papers even suggest adding a random
-base value to the dot product, so we can consider whatever happens to be in the
-xoshiro256 registers to be that. What if the main RNG gets used between task
-forks? In that case, the initial state registers will be different. The DotMix
-collision resistance proof doesn't apply without modification, but we can
-generalize the setup by adding a different base constant to each compression
-function and observe that we still have a 1/N chance of the weight value
-matching that exact difference. This proves collision resistance even between
-tasks whose dot product hashes are computed with arbitrary offsets. We can
-conclude that this scheme provides collision resistance even in the face of
-different starting states of the main RNG. Does this seem too good to be true?
-Perhaps another way of thinking about it will help. Suppose we seeded each task
-completely randomly. Then there would also be a 1/2^256 chance of collision,
-just as the DotMix proof gives. Essentially what the proof is telling us is that
-if the weights are chosen uniformly and uncorrelated with the rest of the
-compression function, then the dot product construction is a good enough way to
-pseudorandomly seed each task. From that perspective, it's easier to believe
-that adding an arbitrary constant to each seed doesn't worsen its randomness.
-
-This leaves us with the question of how to generate four pseudorandom weights to
-add to the rngState[0..3] registers at each depth of the task tree. The scheme
-used here is that a single 64-bit LCG state is iterated in both parent and child
-at each task fork, and four different variations of the PCG-RXS-M-XS-64 output
-function are applied to that state to generate four different pseudorandom
-weights. Another obvious way to generate four weights would be to iterate the
-LCG four times per task split. There are two main reasons we've chosen to use
-four output variants instead:
-
-1. Advancing four times per fork reduces the set of possible weights that each
-   register can be perturbed by from 2^64 to 2^60. Since collision resistance is
-   proportional to the number of possible weight values, that would reduce
-   collision resistance.
-
-2. It's easier to compute four PCG output variants in parallel. Iterating the
-   LCG is inherently sequential. Each PCG variant can be computed independently
-   from the LCG state. All four can even be computed at once with SIMD vector
-   instructions, but the compiler doesn't currently choose to do that.
-
-A key question is whether the approach of using four variations of PCG-RXS-M-XS
-is sufficiently random both within and between streams to provide the collision
-resistance we expect. We obviously can't test that with 256 bits, but we have
-tested it with a reduced state analogue using four PCG-RXS-M-XS-8 output
-variations applied to a common 8-bit LCG. Test results do indicate sufficient
-independence: a single register has collisions at 2^5 while four registers only
-start having collisions at 2^20, which is actually better scaling of collision
-resistance than we expect in theory. In theory, with one byte of resistance we
-have a 50% chance of some collision at 20, which matches, but four bytes gives a
-50% chance of collision at 2^17 and our (reduced size analogue) construction is
-still collision free at 2^19. This may be due to the next observation, which guarantees collision avoidance for certain shapes of task trees as a result of using an
-invertible RNG to generate weights.
-
-In the specific case where a parent task spawns a sequence of child tasks with
-no intervening usage of its main RNG, the parent and child tasks are actually
-_guaranteed_ to have different RNG states. This is true because the four PCG
-streams each produce every possible 2^64 bit output exactly once in the full
-2^64 period of the LCG generator. This is considered a weakness of PCG-RXS-M-XS
-when used as a general purpose RNG, but is quite beneficial in this application.
-Since each of up to 2^64 children will be perturbed by different weights, they
-cannot have hash collisions. What about parent colliding with child? That can
-only happen if all four main RNG registers are perturbed by exactly zero. This
-seems unlikely, but could it occur? Consider this part of each output function:
-
-    p ^= p >> ((p >> 59) + 5);
-    p *= m[i];
-    p ^= p >> 43
-
-It's easy to check that this maps zero to zero. An unchanged parent RNG can only
-happen if all four `p` values are zero at the end of this, which implies that
-they were all zero at the beginning. However, that is impossible since the four
-`p` values differ from `x` by different additive constants, so they cannot all
-be zero. Stated more generally, this non-collision property: assuming the main
-RNG isn't used between task forks, sibling and parent tasks cannot have RNG
-collisions. If the task tree structure is more deeply nested or if there are
-intervening uses of the main RNG, we're back to relying on "merely" 256 bits of
-collision resistance, but it's nice to know that in what is likely the most
-common case, RNG collisions are actually impossible. This fact may also explain
-better-than-theoretical collision resistance observed in our experiment with a
-reduced size analogue of our hashing system.
+such a way that for an entire tree of tasks spawned starting with a given root
+task state, no two tasks have the same RNG state. Moreover, we want to do this
+in a way that is deterministic and repeatable based on (1) the root task's seed,
+(2) how many random numbers are generated, and (3) the task tree structure. The
+RNG state of a parent task is allowed to affect the initial RNG state of a child
+task, but the mere fact that a child was spawned should not alter the RNG output
+of the parent. This second requirement rules out using the main RNG to seed
+children: if we use the main RNG, we either advance it, which affects the
+parent's RNG stream or, if we don't advance it, then every child would have an
+identical RNG stream. Therefore some separate state must be maintained and
+changed upon forking a child task while leaving the main RNG state unchanged.
+
+The basic approach is a generalization and simplification of that used in the
+DotMix [2] and SplitMix [3] RNG systems: each task is uniquely identified by a
+sequence of "pedigree" numbers, indicating where in the task tree it was
+spawned. This vector of pedigree coordinates is then reduced to a single value
+by computing a "dot product" with a shared vector of random weights. I write
+"dot product" in quotes because what we use is not an actual dot product. The
+linear dot product construction used in both DotMix and SplitMix was found by
+@foobar_iv2 [4] to allow easy construction of linear relationships between the
+main RNG states of tasks, which was in turn reflected in observable linear
+relationships between the outputs of their RNGs. This relationship was between a
+minimum of four tasks, so doesn't constitute a collision, per se, but is clearly
+undesirable and highlights a hazard of the plain dot product construction.
+
+As in DotMix and SplitMix, each task is assigned unique task "pedigree"
+coordinates. Our pedigree construction is a bit different and uses only binary
+coordinates rather than arbitrary integers. Each pedigree is an infinite
+sequence of ones and zeros with only finitely many ones. Each task has a "fork
+index": the root task has index 0; the fork index of the jth child task of a
+parent task with fork index i is i+j. The root task's coordinates are all zeros;
+each child task's coordinates are the same as its parents except at its fork
+index, where the parent has a zero while the child has a one; each task's
+coordinates after its fork index are all zeros. The last common ancestor of two
+tasks has coordinates that are the longest common prefix of their coordinates.
+
+Also as in DotMix and SplitMix, we generate a sequence of pseudorandom "weights"
+to combine with the coordinates of each task. This sequence is common across all
+tasks, and different mix values for tasks stem entirely from task coordinates
+being different. In DotMix and SplitMix the mix function is a literal dot
+product: the pseudorandom weights are multiplied by corresponding task
+coordinate and summed. While this does provably make collisions as unlikely as
+random seeding, this linear construction can be used to create linearly
+correlated states between more than two tasks. However, it turns out that the
+compression mixing construction need not be linear, nor commutative, nor
+associative. In fact, the mixing function need only be bijective in both
+arguments. This allows us to use a much more non-trivial mixing function and
+avoid any linear or other obvious correlations between related sets of tasks.
+
+We maintain an LCG in rngState[4] to generate pseudorandom weights. An LCG by
+itself is a very bad RNG, but we combine this one with xoshiro256 state
+registers in a non-trivial way and then apply the PCG-RXS-M-XS-64 output
+function to that. Even if the xoshiro256 states are all zeros, which they should
+never be, the output would be the same as PCG-RXS-M-XS-64, which is a solid
+statistical RNG. Each time a child is forked, we update the LCG in both parent
+and child tasks, corresponding to increasing the fork index. In the parent,
+that's all we have to do -- the main RNG state remains unchanged. Recall that
+spawning a child should not affect subsequent RNG draws in the parent. The next
+time the parent forks a child, the mixing weight used will be different. In the
+child, we use the LCG state to perturb the child's main RNG state registers,
+rngState[0..3].
+
+To generalize SplitMix's optimized dot product construction, we also compute
+each task's compression function value incrementally by combining the parent's
+compression value with pseudorandom weight corresponding with the child's fork
+index. Formally, if the parent's compression value is c then we can compute the
+child's compression value as c′ = f(c, wᵢ) where w is the vector of pseudorandom
+weights. What is f? It can be any function that is bijective in each argument
+for all values of the other argument:
+
+    * For all c: w ↦ f(c, w) is bijective
+    * For all w: c ↦ f(c, w) is bijective
+
+The proof that these requirements are sufficient to ensure collision resistance
+is in the linked discussion [4]. DotMix/SplitMix are a special case where f is
+just addition. Instead we use a much less simple mixing function:
+
+    1. We use (2c+1)(2w+1)÷2 % 2^64 to mix the bits of c and w
+    2. We then apply the PCG-RXS-M-XS-64 output function
+
+The first step thoroughly mixes the bits of the previous compression value and
+the pseudorandom weight value using multiplication, which is non-commutative
+with xoshiro's operations (xor, shift, rotate). This mixing function is a
+bijection on each argument witnessed by these inverses:
+
+    * c′ ↦ (2c′+1)(2w+1)⁻¹÷2 % 2^64
+    * w′ ↦ (2c+1)⁻¹(2w′+1)÷2 % 2^64
+
+Here (2w+1)⁻¹ is the modular inverse of (2w+1) mod 2^64, guaranteed to exist
+since 2w+1 is odd. The second PCG output step is a bijection and designed to be
+significantly non-linear -- non-linear enough to mask the linearity of the LCG
+that drives the PCG-RXS-M-XS-64 RNG and allows it to pass statistical RNG test
+suites despite having the same size state and output. In particular, since this
+mixing function is highly non-associative and non-linear, we (hopefully) don't
+have any discernible relationship between these values:
+
+    * c₀₀ = c
+    * c₁₀ = f(c, wᵢ)
+    * c₀₁ = f(c, wⱼ)
+    * c₁₁ = f(f(c, wᵢ), wⱼ)
+
+When f is simply `+` then these have a very obvious linear relationship:
+
+    c₀₀ + c₁₁ == c₁₀ + c₀₁
+
+This relationship holds regardless of what wᵢ and wⱼ are and allows easy
+creation of correlated tasks with the way we were previously using the
+DotMix/SplitMix construction. SplitMix itself does not output the raw dot
+product, probably because the authors were aware of this linearity issue;
+instead: they apply the MurmurHash3 finalizer to the dot-product to get an
+output that masks linear relationships. I had failed to understand the
+importance of that finalizer. One possible fix for our task splitting
+correlation issue would have been to also apply a non-linear finalizer
+(MurmurHash3 is one of the best) to our dot product before using it to perturb
+the xoshiro256 state. There are two problems with that fix, however:
+
+1. It requires accumulating the dot product somewhere. The old approach
+   accumulates dot products directly in the xoshiro registers; if we were to
+   accumulate and then finalize, the dot product has to be stored somewhere
+   in each task. We want our tasks to be as small as possible, so adding
+   another 64-bit field that we never change would be unfortunate.
+
+2. We still need to apply the PCG finalizer to the internal LCG in order to
+   generate dot product weights. SplitMix uses a shared static array of
+   1024 pre-generated random weights; we could do the same, but that limits
+   the number of task splits to a max of 1024 before weights have to be
+   reused. We can't use the LCG directly because it's highly linear and we
+   need four variations of the internal RNG stream for the four xoshiro256
+   registers. That means we'd have to apply the PCG finalizer, add it to
+   our dot product accumulator field in the child task, then apply the
+   MurmurHash3 finalizer to that dot product and use the result to perturb
+   the main RNG state.
+
+We avoid both problems by recognizing that the mixing function can be much less
+simple while still allowing the essential collision resistance proof to go
+through. We replace addition with a highly non-linear, non-associative mixing
+function that includes the PCG output function. This allows us to continue to use
+the xoshiro state registers for mixing function accumulation as well as for its
+primary purpose. It also obviates the need for double finalization: it would
+have been disastrous to use LCG state directly as weights for a linear
+construction like SplitMix, but using it as the input to a non-linear mixer that
+includes the strongest PCG output function is reasonable (and precisely what
+PCG-RXS-M-XS-64 does). Since the output of the mixing function is already
+non-linearly finalized, there's no need to apply yet another finalizer.
+
+Since there are four xoshiro256 registers that we want to behave independently
+as mix accumulators, we use four different variations on the mixing function,
+keyed by register index (0-3). Each variation first xors the LCG state with a
+different random constant before combining that value above with the old
+register state via multiplication. The PCG-RXS-M-XS-64 output function is then
+applied to that mixed state, with a different multiplier constant for each
+variation / register index. Xor is used in the first step since we multiply the
+result with the state immediately after and multiplication distributes over `+`
+and commutes with `*`, making both suspect options. Multiplication doesn't
+distribute over or commute with xor. We also use a different odd multiplier in
+PCG-RXS-M-XS-64 for each RNG register. These four sources of variation
+(different initial state, different xor constants, different xoshiro256 state,
+different PCG multipliers) are hopefully sufficient for each of the four outputs
+to behave statistically independently, in the sense that even if two different
+tasks happen to have a state collision in one 64-bit register, it is highly
+improbable that all four registers collide at the same time, giving an actual
+main RNG state collision.
 
 [1]: https://www.pcg-random.org/pdf/hmc-cs-2014-0905.pdf
 
 [2]: http://supertech.csail.mit.edu/papers/dprng.pdf
 
 [3]: https://gee.cs.oswego.edu/dl/papers/oopsla14.pdf
+
+[4]:
+https://discourse.julialang.org/t/linear-relationship-between-xoshiro-tasks/110454
 */
 void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT
 {
@@ -1010,55 +1065,61 @@ void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSA
     src[4] = dst[4] = x * 0xd1342543de82ef95 + 1;
     // high spectrum multiplier from https://arxiv.org/abs/2001.05304
 
+    // random xor constants
     static const uint64_t a[4] = {
-        0xe5f8fa077b92a8a8, // random additive offsets...
-        0x7a0cd918958c124d,
-        0x86222f7d388588d4,
-        0xd30cbd35f2b64f52
+        0x214c146c88e47cb7,
+        0xa66d8cc21285aafa,
+        0x68c7ef2d7b1a54d4,
+        0xb053a7d7aa238c61
     };
+    // random odd multipliers
     static const uint64_t m[4] = {
         0xaef17502108ef2d9, // standard PCG multiplier
-        0xf34026eeb86766af, // random odd multipliers...
+        0xf34026eeb86766af,
         0x38fd70ad58dd9fbb,
         0x6677f9b93ab0c04d
     };
 
-    // PCG-RXS-M-XS output with four variants
+    // PCG-RXS-M-XS-64 output with four variants
     for (int i = 0; i < 4; i++) {
-        uint64_t p = x + a[i];
-        p ^= p >> ((p >> 59) + 5);
-        p *= m[i];
-        p ^= p >> 43;
-        dst[i] = src[i] + p; // SplitMix dot product
+        uint64_t c = src[i];
+        uint64_t w = x ^ a[i];
+        c += w*(2*c + 1); // c = (2c+1)(2w+1)÷2 % 2^64 (double bijection)
+        c ^= c >> ((c >> 59) + 5);
+        c *= m[i];
+        c ^= c >> 43;
+        dst[i] = c;
     }
 }
 
-JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize)
+JL_DLLEXPORT jl_task_t *jl_new_task(jl_value_t *start, jl_value_t *completion_future, size_t ssize)
 {
     jl_task_t *ct = jl_current_task;
     jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type);
     jl_set_typetagof(t, jl_task_tag, 0);
     JL_PROBE_RT_NEW_TASK(ct, t);
-    t->copy_stack = 0;
+    t->ctx.copy_stack = 0;
     if (ssize == 0) {
         // stack size unspecified; use default
         if (always_copy_stacks) {
-            t->copy_stack = 1;
-            t->bufsz = 0;
+            t->ctx.copy_stack = 1;
+            t->ctx.bufsz = 0;
         }
         else {
-            t->bufsz = JL_STACK_SIZE;
+            t->ctx.bufsz = JL_STACK_SIZE;
         }
-        t->stkbuf = NULL;
+        t->ctx.stkbuf = NULL;
     }
     else {
         // user requested dedicated stack of a certain size
         if (ssize < MINSTKSZ)
             ssize = MINSTKSZ;
-        t->bufsz = ssize;
-        t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
-        if (t->stkbuf == NULL)
+        t->ctx.bufsz = ssize;
+        t->ctx.stkbuf = jl_malloc_stack(&t->ctx.bufsz, t);
+        if (t->ctx.stkbuf == NULL) {
+            t->ctx.bufsz = 0;
             jl_throw(jl_memory_exception);
+        }
     }
     t->next = jl_nothing;
     t->queue = jl_nothing;
@@ -1068,8 +1129,8 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->result = jl_nothing;
     t->donenotify = completion_future;
     jl_atomic_store_relaxed(&t->_isexception, 0);
-    // Inherit logger state from parent task
-    t->logstate = ct->logstate;
+    // Inherit scope from parent task
+    t->scope = ct->scope;
     // Fork task-local random state from parent
     jl_rng_split(t->rngState, ct->rngState);
     // there is no active exception handler available on this stack yet
@@ -1077,30 +1138,26 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->sticky = 1;
     t->gcstack = NULL;
     t->excstack = NULL;
-    t->started = 0;
+    t->ctx.started = 0;
     t->priority = 0;
-    jl_atomic_store_relaxed(&t->tid, t->copy_stack ? jl_atomic_load_relaxed(&ct->tid) : -1); // copy_stacks are always pinned since they can't be moved
+    jl_atomic_store_relaxed(&t->tid, -1);
     t->threadpoolid = ct->threadpoolid;
     t->ptls = NULL;
     t->world_age = ct->world_age;
     t->reentrant_timing = 0;
+    t->metrics_enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled) != 0;
+    jl_atomic_store_relaxed(&t->first_enqueued_at, 0);
+    jl_atomic_store_relaxed(&t->last_started_running_at, 0);
+    jl_atomic_store_relaxed(&t->running_time_ns, 0);
+    jl_atomic_store_relaxed(&t->finished_at, 0);
     jl_timing_task_init(t);
 
-#ifdef COPY_STACKS
-    if (!t->copy_stack) {
-#if defined(JL_DEBUG_BUILD)
-        memset(&t->ctx, 0, sizeof(t->ctx));
-#endif
-    }
-    else {
-        if (always_copy_stacks)
-            memcpy(&t->ctx.copy_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
-        else
-            memcpy(&t->ctx.ctx, &ct->ptls->base_ctx, sizeof(t->ctx.ctx));
-    }
-#endif
+    if (t->ctx.copy_stack)
+        t->ctx.copy_ctx = NULL;
+    else
+        t->ctx.ctx = NULL;
 #ifdef _COMPILER_TSAN_ENABLED_
-    t->ctx.tsan_state = __tsan_create_fiber(0);
+    t->ctx.tsan_state = NULL;
 #endif
 #ifdef _COMPILER_ASAN_ENABLED_
     t->ctx.asan_fake_stack = NULL;
@@ -1115,47 +1172,6 @@ JL_DLLEXPORT jl_task_t *jl_get_current_task(void)
     return pgcstack == NULL ? NULL : container_of(pgcstack, jl_task_t, gcstack);
 }
 
-
-#ifdef JL_HAVE_ASYNCIFY
-JL_DLLEXPORT jl_ucontext_t *task_ctx_ptr(jl_task_t *t)
-{
-    return &t->ctx.ctx;
-}
-
-JL_DLLEXPORT jl_value_t *jl_get_root_task(void)
-{
-    jl_task_t *ct = jl_current_task;
-    return (jl_value_t*)ct->ptls->root_task;
-}
-
-JL_DLLEXPORT void jl_task_wait()
-{
-    static jl_function_t *wait_func = NULL;
-    if (!wait_func) {
-        wait_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("wait"));
-    }
-    jl_task_t *ct = jl_current_task;
-    size_t last_age = ct->world_age;
-    ct->world_age = jl_get_world_counter();
-    jl_apply(&wait_func, 1);
-    ct->world_age = last_age;
-}
-
-JL_DLLEXPORT void jl_schedule_task(jl_task_t *task)
-{
-    static jl_function_t *sched_func = NULL;
-    if (!sched_func) {
-        sched_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("schedule"));
-    }
-    jl_task_t *ct = jl_current_task;
-    size_t last_age = ct->world_age;
-    ct->world_age = jl_get_world_counter();
-    jl_value_t *args[] = {(jl_value_t*)sched_func, (jl_value_t*)task};
-    jl_apply(args, 2);
-    ct->world_age = last_age;
-}
-#endif
-
 // Do one-time initializations for task system
 void jl_init_tasks(void) JL_GC_DISABLED
 {
@@ -1176,13 +1192,24 @@ void jl_init_tasks(void) JL_GC_DISABLED
         exit(1);
     }
 #endif
+#if defined(_COMPILER_ASAN_ENABLED_) && __GLIBC__
+    void *libc_handle = dlopen("libc.so.6", RTLD_NOW | RTLD_NOLOAD);
+    if (libc_handle) {
+        *(void**)&real_siglongjmp = dlsym(libc_handle, "siglongjmp");
+        dlclose(libc_handle);
+    }
+    if (real_siglongjmp == NULL) {
+        jl_safe_printf("failed to get real siglongjmp\n");
+        exit(1);
+    }
+#endif
 }
 
 #if defined(_COMPILER_ASAN_ENABLED_)
-STATIC_OR_JS void NOINLINE JL_NORETURN _start_task(void);
+static void NOINLINE JL_NORETURN _start_task(void);
 #endif
 
-STATIC_OR_JS void NOINLINE JL_NORETURN JL_NO_ASAN start_task(void)
+static void NOINLINE JL_NORETURN JL_NO_ASAN start_task(void)
 {
 CFI_NORETURN
 #if defined(_COMPILER_ASAN_ENABLED_)
@@ -1194,11 +1221,11 @@ CFI_NORETURN
     jl_task_t *ct = jl_current_task;
 #endif
     jl_ptls_t ptls = ct->ptls;
-    sanitizer_finish_switch_fiber(ptls->previous_task, ct);
+    sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &ct->ctx);
     _start_task();
 }
 
-STATIC_OR_JS void NOINLINE JL_NORETURN _start_task(void)
+static void NOINLINE JL_NORETURN _start_task(void)
 {
 CFI_NORETURN
 #endif
@@ -1208,6 +1235,7 @@ CFI_NORETURN
 #else
     jl_task_t *ct = jl_current_task;
 #endif
+    ct->ctx.ctx = NULL;
     jl_ptls_t ptls = ct->ptls;
     jl_value_t *res;
     assert(ptls->finalizers_inhibited == 0);
@@ -1215,16 +1243,22 @@ CFI_NORETURN
 #ifdef MIGRATE_TASKS
     jl_task_t *pt = ptls->previous_task;
     ptls->previous_task = NULL;
-    if (!pt->sticky && !pt->copy_stack)
+    if (!pt->sticky && !pt->ctx.copy_stack)
         jl_atomic_store_release(&pt->tid, -1);
 #endif
 
-    ct->started = 1;
+    ct->ctx.started = 1;
+    if (ct->metrics_enabled) {
+        // [task] wait_time -started-> user_time
+        assert(jl_atomic_load_relaxed(&ct->first_enqueued_at) != 0);
+        assert(jl_atomic_load_relaxed(&ct->last_started_running_at) == 0);
+        jl_atomic_store_relaxed(&ct->last_started_running_at, jl_hrtime());
+    }
     JL_PROBE_RT_START_TASK(ct);
     jl_timing_block_task_enter(ct, ptls, NULL);
     if (jl_atomic_load_relaxed(&ct->_isexception)) {
         record_backtrace(ptls, 0);
-        jl_push_excstack(&ct->excstack, ct->result,
+        jl_push_excstack(ct, &ct->excstack, ct->result,
                          ptls->bt_data, ptls->bt_size);
         res = ct->result;
     }
@@ -1238,7 +1272,7 @@ CFI_NORETURN
             res = jl_apply(&ct->start, 1);
         }
         JL_CATCH {
-            res = jl_current_exception();
+            res = jl_current_exception(ct);
             jl_atomic_store_relaxed(&ct->_isexception, 1);
             goto skip_pop_exception;
         }
@@ -1247,73 +1281,48 @@ skip_pop_exception:;
     ct->result = res;
     jl_gc_wb(ct, ct->result);
     jl_finish_task(ct);
-    jl_gc_debug_critical_error();
+    jl_gc_debug_fprint_critical_error(ios_safe_stderr);
     abort();
 }
 
 
-#if defined(JL_HAVE_UCONTEXT)
-#ifdef _OS_WINDOWS_
-#define setcontext jl_setcontext
-#define swapcontext jl_swapcontext
-#define makecontext jl_makecontext
-#endif
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
+#ifdef JL_TASK_SWITCH_WINDOWS
+static int make_fiber(jl_ucontext_t *t, _jl_ucontext_t *ctx)
 {
-#ifndef _OS_WINDOWS_
-    int r = getcontext(t);
-    if (r != 0)
-        jl_error("getcontext failed");
-#endif
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    t->uc_stack.ss_sp = stk;
-    t->uc_stack.ss_size = *ssize;
-#ifdef _OS_WINDOWS_
-    makecontext(t, &start_task);
-#else
-    t->uc_link = NULL;
-    makecontext(t, &start_task, 0);
-#endif
-    return (char*)stk;
+    ctx->uc_stack.ss_sp = (char*)t->stkbuf;
+    ctx->uc_stack.ss_size = t->bufsz;
+    jl_makecontext(ctx, &start_task);
+    return 1;
 }
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    setcontext(&t->ctx);
+    _jl_ucontext_t ctx;
+    make_fiber(t, &ctx);
+    jl_setcontext(&ctx);
 }
 static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
+    _jl_ucontext_t ctx;
+    make_fiber(t, &ctx);
     assert(lastt);
     tsan_switch_to_ctx(t);
-    swapcontext(&lastt->ctx, &t->ctx);
+    jl_swapcontext(lastt->ctx, &ctx);
 }
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     tsan_switch_to_ctx(t);
-    swapcontext(&lastt->ctx, &t->ctx);
+    jl_swapcontext(lastt->ctx, t->ctx);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    setcontext(&t->ctx);
+    jl_setcontext(t->ctx);
 }
 #endif
 
-#if defined(JL_HAVE_UNW_CONTEXT) || defined(JL_HAVE_ASM)
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
-{
-    char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
-    if (stkbuf == NULL)
-        return NULL;
-#ifndef __clang_gcanalyzer__
-    ((char**)t)[0] = stkbuf; // stash the stack pointer somewhere for start_fiber
-    ((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber
-#endif
-    return stkbuf;
-}
+#if defined(JL_TASK_SWITCH_LIBUNWIND)
+#ifdef _OS_WINDOWS_
+#error unw_context_t not defined in Windows
 #endif
-
-#if defined(JL_HAVE_UNW_CONTEXT)
 static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 {
     volatile int returns = 0;
@@ -1327,34 +1336,34 @@ static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, &t->ctx);
+    int r = unw_init_local(&c, t->ctx);
     if (r < 0)
         abort();
-    jl_unw_swapcontext(&lastt->ctx, &c);
+    jl_unw_swapcontext(lastt->ctx, &c);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, &t->ctx);
+    int r = unw_init_local(&c, t->ctx);
     if (r < 0)
         abort();
     unw_resume(&c);
 }
-#elif defined(JL_HAVE_ASM)
+#elif defined(JL_TASK_SWITCH_ASM)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
     tsan_switch_to_ctx(t);
     jl_set_fiber(t); // doesn't return
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    jl_longjmp(t->ctx.uc_mcontext, 1);
+    jl_longjmp(t->ctx->uc_mcontext, 1);
 }
 #endif
 
-#if defined(JL_HAVE_UNW_CONTEXT) && !defined(JL_HAVE_ASM)
+#if defined(JL_TASK_SWITCH_LIBUNWIND) && !defined(JL_TASK_SWITCH_ASM)
 #if defined(_CPU_X86_) || defined(_CPU_X86_64_)
 #define PUSH_RET(ctx, stk) \
     do { \
@@ -1371,14 +1380,14 @@ static void jl_set_fiber(jl_ucontext_t *t)
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
-    int r = unw_getcontext(&t->ctx);
+    int r = unw_getcontext(t->ctx);
     if (r)
         abort();
-    if (unw_init_local(&c, &t->ctx))
+    if (unw_init_local(&c, t->ctx))
         abort();
     PUSH_RET(&c, stk);
 #if defined __linux__
@@ -1394,43 +1403,46 @@ static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
     unw_cursor_t c;
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
-    r = unw_getcontext(&t->ctx);
+    r = unw_getcontext(t->ctx);
     if (r != 0)
         abort();
-    if (unw_init_local(&c, &t->ctx))
+    if (unw_init_local(&c, t->ctx))
         abort();
     PUSH_RET(&c, stk);
     if (unw_set_reg(&c, UNW_REG_SP, (uintptr_t)stk))
         abort();
     if (unw_set_reg(&c, UNW_REG_IP, fn))
         abort();
-    jl_unw_swapcontext(&lastt->ctx, &c);
+    jl_unw_swapcontext(lastt->ctx, &c);
 }
 #endif
 
-#if defined(JL_HAVE_ASM)
+#if defined(JL_TASK_SWITCH_ASM)
+#ifdef _OS_WINDOWS_
+#error JL_TASK_SWITCH_ASM not defined in Windows
+#endif
 JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
-#ifdef JL_HAVE_UNW_CONTEXT
+#ifdef JL_TASK_SWITCH_LIBUNWIND
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
 #else
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
 #endif
     tsan_switch_to_ctx(t);
@@ -1438,8 +1450,9 @@ JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *
 }
 JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+CFI_NORETURN
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
 #ifdef _CPU_X86_64_
@@ -1478,6 +1491,14 @@ JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
                     // because all our addresses are word-aligned.
         " udf #0" // abort
         : : "r" (stk), "r"(fn) : "memory" );
+#elif defined(_CPU_RISCV64_)
+    asm volatile(
+        " mv sp, %0;\n"
+        " mv ra, zero;\n" // Clear return address register
+        " mv fp, zero;\n" // Clear frame pointer
+        " jr %1;\n" // call `fn` with fake stack frame
+        " ebreak" // abort
+        : : "r"(stk), "r"(fn) : "memory" );
 #elif defined(_CPU_PPC64_)
     // N.B.: There is two iterations of the PPC64 ABI.
     // v2 is current and used here. Make sure you have the
@@ -1502,121 +1523,12 @@ JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
         " trap; \n"
         : : "r"(stk), "r"(fn) : "memory");
 #else
-#error JL_HAVE_ASM defined but not implemented for this CPU type
+#error JL_TASK_SWITCH_ASM defined but not implemented for this CPU type
 #endif
     __builtin_unreachable();
 }
 #endif
 
-#if defined(JL_HAVE_SIGALTSTACK)
-#if defined(_COMPILER_TSAN_ENABLED_)
-#error TSAN support not currently implemented for this tasking model
-#endif
-
-static void start_basefiber(int sig)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0))
-        start_task(); // sanitizer_finish_switch_fiber is part of start_task
-}
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
-{
-    stack_t uc_stack, osigstk;
-    struct sigaction sa, osa;
-    sigset_t set, oset;
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    // setup
-    jl_ptls_t ptls = jl_current_task->ptls;
-    _jl_ucontext_t base_ctx;
-    memcpy(&base_ctx, &ptls->base_ctx, sizeof(base_ctx));
-    sigfillset(&set);
-    if (pthread_sigmask(SIG_BLOCK, &set, &oset) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("pthread_sigmask failed");
-    }
-    uc_stack.ss_sp = stk;
-    uc_stack.ss_size = *ssize;
-    uc_stack.ss_flags = 0;
-    if (sigaltstack(&uc_stack, &osigstk) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaltstack failed");
-    }
-    memset(&sa, 0, sizeof(sa));
-    sigemptyset(&sa.sa_mask);
-    sa.sa_handler = start_basefiber;
-    sa.sa_flags = SA_ONSTACK;
-    if (sigaction(SIGUSR2, &sa, &osa) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaction failed");
-    }
-    // emit signal
-    pthread_kill(pthread_self(), SIGUSR2); // initializes jl_basectx
-    sigdelset(&set, SIGUSR2);
-    sigsuspend(&set);
-    // cleanup
-    if (sigaction(SIGUSR2, &osa, NULL) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaction failed");
-    }
-    if (osigstk.ss_size < MINSTKSZ && (osigstk.ss_flags | SS_DISABLE))
-       osigstk.ss_size = MINSTKSZ;
-    if (sigaltstack(&osigstk, NULL) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaltstack failed");
-    }
-    if (pthread_sigmask(SIG_SETMASK, &oset, NULL) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("pthread_sigmask failed");
-    }
-    if (&ptls->base_ctx != t) {
-        memcpy(&t, &ptls->base_ctx, sizeof(base_ctx));
-        memcpy(&ptls->base_ctx, &base_ctx, sizeof(base_ctx)); // restore COPY_STACKS context
-    }
-    return (char*)stk;
-}
-static void jl_start_fiber_set(jl_ucontext_t *t) {
-    jl_longjmp(t->ctx.uc_mcontext, 1); // (doesn't return)
-}
-static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
-{
-    assert(lastt);
-    if (lastt && jl_setjmp(lastt->ctx.uc_mcontext, 0))
-        return;
-    tsan_switch_to_ctx(t);
-    jl_start_fiber_set(t);
-}
-static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
-{
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
-        return;
-    tsan_switch_to_ctx(t);
-    jl_start_fiber_set(t); // doesn't return
-}
-static void jl_set_fiber(jl_ucontext_t *t)
-{
-    jl_longjmp(t->ctx.uc_mcontext, 1);
-}
-#endif
-
-#if defined(JL_HAVE_ASYNCIFY)
-#if defined(_COMPILER_TSAN_ENABLED_)
-#error TSAN support not currently implemented for this tasking model
-#endif
-
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
-{
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    t->stackbottom = stk;
-    t->stacktop = ((char*)stk) + *ssize;
-    return (char*)stk;
-}
-// jl_*_fiber implemented in js
-#endif
-
 // Initialize a root task using the given stack.
 jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 {
@@ -1632,8 +1544,6 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     } bootstrap_task = {0};
     jl_set_pgcstack(&bootstrap_task.value.gcstack);
     bootstrap_task.value.ptls = ptls;
-    if (jl_nothing == NULL) // make a placeholder
-        jl_nothing = jl_gc_permobj(0, jl_nothing_type);
     jl_task_t *ct = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
     jl_set_typetagof(ct, jl_task_tag, 0);
     memset(ct, 0, sizeof(jl_task_t));
@@ -1646,14 +1556,14 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     }
 #endif
     if (always_copy_stacks) {
-        ct->copy_stack = 1;
-        ct->stkbuf = NULL;
-        ct->bufsz = 0;
+        ct->ctx.copy_stack = 1;
+        ct->ctx.stkbuf = NULL;
+        ct->ctx.bufsz = 0;
     }
     else {
-        ct->copy_stack = 0;
-        ct->stkbuf = stack;
-        ct->bufsz = ssize;
+        ct->ctx.copy_stack = 0;
+        ct->ctx.stkbuf = stack;
+        ct->ctx.bufsz = ssize;
     }
 
 #ifdef USE_TRACY
@@ -1661,7 +1571,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     strcpy(unique_string, "Root");
     ct->name = unique_string;
 #endif
-    ct->started = 1;
+    ct->ctx.started = 1;
     ct->next = jl_nothing;
     ct->queue = jl_nothing;
     ct->tls = jl_nothing;
@@ -1670,7 +1580,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     ct->result = jl_nothing;
     ct->donenotify = jl_nothing;
     jl_atomic_store_relaxed(&ct->_isexception, 0);
-    ct->logstate = jl_nothing;
+    ct->scope = jl_nothing;
     ct->eh = NULL;
     ct->gcstack = NULL;
     ct->excstack = NULL;
@@ -1680,11 +1590,25 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     ct->ptls = ptls;
     ct->world_age = 1; // OK to run Julia code on this task
     ct->reentrant_timing = 0;
+    jl_atomic_store_relaxed(&ct->running_time_ns, 0);
+    jl_atomic_store_relaxed(&ct->finished_at, 0);
+    ct->metrics_enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled) != 0;
+    if (ct->metrics_enabled) {
+        // [task] created -started-> user_time
+        uint64_t now = jl_hrtime();
+        jl_atomic_store_relaxed(&ct->first_enqueued_at, now);
+        jl_atomic_store_relaxed(&ct->last_started_running_at, now);
+    }
+    else {
+        jl_atomic_store_relaxed(&ct->first_enqueued_at, 0);
+        jl_atomic_store_relaxed(&ct->last_started_running_at, 0);
+    }
     ptls->root_task = ct;
     jl_atomic_store_relaxed(&ptls->current_task, ct);
     JL_GC_PROMISE_ROOTED(ct);
     jl_set_pgcstack(&ct->gcstack);
     assert(jl_current_task == ct);
+    assert(jl_current_task->ptls == ptls);
 
 #ifdef _COMPILER_TSAN_ENABLED_
     ct->ctx.tsan_state = __tsan_get_current_fiber();
@@ -1700,21 +1624,18 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     if (always_copy_stacks) {
         // when this is set, we will attempt to corrupt the process stack to switch tasks,
         // although this is unreliable, and thus not recommended
-        ptls->stackbase = stack_hi;
-        ptls->stacksize = ssize;
-#ifdef _OS_WINDOWS_
-        ptls->copy_stack_ctx.uc_stack.ss_sp = stack_hi;
-        ptls->copy_stack_ctx.uc_stack.ss_size = ssize;
-#endif
-        if (jl_setjmp(ptls->copy_stack_ctx.uc_mcontext, 0))
-            start_task(); // sanitizer_finish_switch_fiber is part of start_task
+        ptls->stackbase = jl_get_frame_addr();
+        ptls->stacksize =  (char*)ptls->stackbase - (char*)stack_lo;
     }
     else {
-        ssize = JL_STACK_SIZE;
-        char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
+        size_t bufsz = JL_STACK_SIZE;
+        void *stkbuf = jl_malloc_stack(&bufsz, NULL);
         if (stkbuf != NULL) {
-            ptls->stackbase = stkbuf + ssize;
-            ptls->stacksize = ssize;
+            ptls->stackbase = (char*)stkbuf + bufsz;
+            ptls->stacksize = bufsz;
+        }
+        else {
+            ptls->stacksize = 0;
         }
     }
 #endif
@@ -1727,7 +1648,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 
 JL_DLLEXPORT int jl_is_task_started(jl_task_t *t) JL_NOTSAFEPOINT
 {
-    return t->started;
+    return t->ctx.started;
 }
 
 JL_DLLEXPORT int16_t jl_get_task_tid(jl_task_t *t) JL_NOTSAFEPOINT
diff --git a/src/threading.c b/src/threading.c
index e2eb686e3061a..96495fef44c2b 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -1,15 +1,17 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
-
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <inttypes.h>
-
 #include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
 
+#ifdef _COMPILER_TSAN_ENABLED_
+#include <sanitizer/tsan_interface.h>
+#endif
+
 #ifdef USE_ITTAPI
 #include "ittapi/ittnotify.h"
 #endif
@@ -18,7 +20,7 @@
 // For variant 1 JL_ELF_TLS_INIT_SIZE is the size of the thread control block (TCB)
 // For variant 2 JL_ELF_TLS_INIT_SIZE is 0
 #if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
-#  if defined(_CPU_X86_64_) || defined(_CPU_X86_)
+#  if defined(_CPU_X86_64_) || defined(_CPU_X86_) || defined(_CPU_RISCV64_)
 #    define JL_ELF_TLS_VARIANT 2
 #    define JL_ELF_TLS_INIT_SIZE 0
 #  elif defined(_CPU_AARCH64_)
@@ -49,6 +51,8 @@ JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0;
 JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0;
 JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0;
 
+JL_DLLEXPORT _Atomic(uint8_t) jl_task_metrics_enabled = 0;
+
 JL_DLLEXPORT void *jl_get_ptls_states(void)
 {
     // mostly deprecated: use current_task instead
@@ -74,6 +78,16 @@ JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
 
 JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
 {
+#ifdef _OS_DARWIN_
+    jl_task_t *ct = jl_get_current_task();
+    if (ct != NULL && ct->ptls) {
+        if (sr == NULL)
+            pthread_setspecific(jl_safe_restore_key, (void*)sr);
+        ct->ptls->safe_restore = sr;
+        if (sr == NULL)
+            return;
+    }
+#endif
     pthread_setspecific(jl_safe_restore_key, (void*)sr);
 }
 #endif
@@ -82,51 +96,17 @@ JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
 // The tls_states buffer:
 //
 // On platforms that do not use ELF (i.e. where `__thread` is emulated with
-// lower level API) (Mac, Windows), we use the platform runtime API to create
+// lower level API) (Windows), we use the platform runtime API to create
 // TLS variable directly.
 // This is functionally equivalent to using `__thread` but can be
 // more efficient since we can have better control over the creation and
 // initialization of the TLS buffer.
 //
-// On platforms that use ELF (Linux, FreeBSD), we use a `__thread` variable
+// On platforms that support native TLS (ELF platforms + Macos) we use a `__thread` variable
 // as the fallback in the shared object. For better efficiency, we also
 // create a `__thread` variable in the main executable using a static TLS
 // model.
-#if defined(_OS_DARWIN_)
-// Mac doesn't seem to have static TLS model so the runtime TLS getter
-// registration will only add overhead to TLS access. The `__thread` variables
-// are emulated with `pthread_key_t` so it is actually faster to use it directly.
-static pthread_key_t jl_pgcstack_key;
-
-__attribute__((constructor)) void jl_init_tls(void)
-{
-    pthread_key_create(&jl_pgcstack_key, NULL);
-}
-
-JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
-{
-    return (jl_gcframe_t**)pthread_getspecific(jl_pgcstack_key);
-}
-
-void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
-{
-    pthread_setspecific(jl_pgcstack_key, (void*)pgcstack);
-}
-
-void jl_pgcstack_getkey(jl_get_pgcstack_func **f, pthread_key_t *k)
-{
-    // for codegen
-    *f = pthread_getspecific;
-    *k = jl_pgcstack_key;
-}
-
-
-JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, pthread_key_t k)
-{
-    jl_safe_printf("ERROR: Attempt to change TLS address.\n");
-}
-
-#elif defined(_OS_WINDOWS_)
+#if defined(_OS_WINDOWS_)
 // Apparently windows doesn't have a static TLS model (or one that can be
 // reliably used from a shared library) either..... Use `TLSAlloc` instead.
 
@@ -250,10 +230,6 @@ void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
 {
     *jl_pgcstack_key() = pgcstack;
 }
-#  if JL_USE_IFUNC
-JL_DLLEXPORT __attribute__((weak))
-void jl_register_pgcstack_getter(void);
-#  endif
 static jl_gcframe_t **jl_get_pgcstack_init(void);
 static jl_get_pgcstack_func *jl_get_pgcstack_cb = jl_get_pgcstack_init;
 static jl_gcframe_t **jl_get_pgcstack_init(void)
@@ -266,15 +242,8 @@ static jl_gcframe_t **jl_get_pgcstack_init(void)
     // This is clearly not thread-safe but should be fine since we
     // make sure the tls states callback is finalized before adding
     // multiple threads
-#  if JL_USE_IFUNC
-    if (jl_register_pgcstack_getter)
-        jl_register_pgcstack_getter();
-    else
-#  endif
-    {
-        jl_get_pgcstack_cb = jl_get_pgcstack_fallback;
-        jl_pgcstack_key = &jl_pgcstack_addr_fallback;
-    }
+    jl_get_pgcstack_cb = jl_get_pgcstack_fallback;
+    jl_pgcstack_key = &jl_pgcstack_addr_fallback;
     return jl_get_pgcstack_cb();
 }
 
@@ -314,6 +283,9 @@ static uv_mutex_t tls_lock; // controls write-access to these variables:
 _Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED;
 int jl_all_tls_states_size;
 static uv_cond_t cond;
+// concurrent reads are permitted, using the same pattern as mtsmall_arraylist
+// it is implemented separately because the API of direct jl_all_tls_states use is already widely prevalent
+void jl_init_thread_scheduler(jl_ptls_t ptls) JL_NOTSAFEPOINT;
 
 // return calling thread's ID
 JL_DLLEXPORT int16_t jl_threadid(void)
@@ -332,7 +304,21 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT
         if (tid < n)
             return (int8_t)i;
     }
-    return 0; // everything else uses threadpool 0 (though does not become part of any threadpool)
+    return -1; // everything else uses threadpool -1 (does not belong to any threadpool)
+}
+
+// get thread local rng
+JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT
+{
+    return jl_current_task->ptls->rngseed;
+}
+
+typedef void (*unw_tls_ensure_func)(void) JL_NOTSAFEPOINT;
+
+// get thread local rng
+JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT
+{
+    jl_current_task->ptls->rngseed = new_seed;
 }
 
 jl_ptls_t jl_init_threadtls(int16_t tid)
@@ -343,33 +329,37 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
 #endif
     if (jl_get_pgcstack() != NULL)
         abort();
-    jl_ptls_t ptls = (jl_ptls_t)calloc(1, sizeof(jl_tls_states_t));
+    jl_ptls_t ptls;
+#if defined(_OS_WINDOWS_)
+    ptls = (jl_ptls_t)_aligned_malloc(sizeof(jl_tls_states_t), alignof(jl_tls_states_t));
+    if (ptls == NULL)
+        abort();
+#else
+    if (posix_memalign((void**)&ptls, alignof(jl_tls_states_t), sizeof(jl_tls_states_t)))
+        abort();
+#endif
+    memset(ptls, 0, sizeof(jl_tls_states_t));
+
 #ifndef _OS_WINDOWS_
     pthread_setspecific(jl_task_exit_key, (void*)ptls);
 #endif
-    ptls->system_id = (jl_thread_t)(uintptr_t)uv_thread_self();
+    ptls->system_id = uv_thread_self();
     ptls->rngseed = jl_rand();
-    if (tid == 0)
+    if (tid == 0) {
         ptls->disable_gc = 1;
 #ifdef _OS_WINDOWS_
-    if (tid == 0) {
-        if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
-                             GetCurrentProcess(), &hMainThread, 0,
-                             FALSE, DUPLICATE_SAME_ACCESS)) {
-            jl_printf(JL_STDERR, "WARNING: failed to access handle to main thread\n");
-            hMainThread = INVALID_HANDLE_VALUE;
-        }
-    }
+        hMainThread = ptls->system_id;
 #endif
-    jl_atomic_store_relaxed(&ptls->gc_state, 0); // GC unsafe
+    }
+    jl_atomic_store_relaxed(&ptls->gc_state, JL_GC_STATE_UNSAFE); // GC unsafe
     // Conditionally initialize the safepoint address. See comment in
     // `safepoint.c`
     if (tid == 0) {
-        ptls->safepoint = (size_t*)(jl_safepoint_pages + jl_page_size);
+        jl_atomic_store_relaxed(&ptls->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size));
     }
     else {
-        ptls->safepoint = (size_t*)(jl_safepoint_pages + jl_page_size * 2 +
-                                    sizeof(size_t));
+        jl_atomic_store_relaxed(&ptls->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 2 +
+                                sizeof(size_t)));
     }
     jl_bt_element_t *bt_data = (jl_bt_element_t*)
         malloc_s(sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1));
@@ -377,15 +367,13 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     ptls->bt_data = bt_data;
     small_arraylist_new(&ptls->locks, 0);
     jl_init_thread_heap(ptls);
-
-    uv_mutex_init(&ptls->sleep_lock);
-    uv_cond_init(&ptls->wake_signal);
+    jl_init_thread_scheduler(ptls);
 
     uv_mutex_lock(&tls_lock);
-    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     if (tid == -1)
         tid = jl_atomic_load_relaxed(&jl_n_threads);
     ptls->tid = tid;
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     if (jl_all_tls_states_size <= tid) {
         int i, newsize = jl_all_tls_states_size + tid + 2;
         jl_ptls_t *newpptls = (jl_ptls_t*)calloc(newsize, sizeof(jl_ptls_t));
@@ -403,18 +391,49 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     jl_fence();
     uv_mutex_unlock(&tls_lock);
 
+#if !defined(_OS_WINDOWS_) && !defined(JL_DISABLE_LIBUNWIND) && !defined(LLVMLIBUNWIND)
+    // ensures libunwind TLS space for this thread is allocated eagerly
+    // to make unwinding async-signal-safe even when using thread local caches.
+    unw_tls_ensure_func jl_unw_ensure_tls = NULL;
+    jl_dlsym(jl_RTLD_DEFAULT_handle, "unw_ensure_tls", (void**)&jl_unw_ensure_tls, 0, 1);
+    if (jl_unw_ensure_tls)
+        jl_unw_ensure_tls();
+#endif
+
     return ptls;
 }
 
+static _Atomic(jl_value_t*) init_task_lock_func JL_GLOBALLY_ROOTED = NULL;
+
+static void jl_init_task_lock(jl_task_t *ct)
+{
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_get_world_counter();
+    jl_value_t *done = jl_atomic_load_relaxed(&init_task_lock_func);
+    if (done == NULL) {
+        done = (jl_value_t*)jl_get_global_value(jl_base_module, jl_symbol("init_task_lock"), ct->world_age);
+        if (done != NULL)
+            jl_atomic_store_release(&init_task_lock_func, done);
+    }
+    if (done != NULL) {
+        jl_value_t *args[2] = {done, (jl_value_t*)ct};
+        JL_TRY {
+            jl_apply(args, 2);
+        }
+        JL_CATCH {
+            jl_no_exc_handler(jl_current_exception(ct), ct);
+        }
+    }
+    ct->world_age = last_age;
+}
+
 JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
 {
     // `jl_init_threadtls` puts us in a GC unsafe region, so ensure GC isn't running.
     // we can't use a normal safepoint because we don't have signal handlers yet.
-    // we also can't use jl_safepoint_wait_gc because that assumes we're in a task.
     jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
-    while (jl_atomic_load_acquire(&jl_gc_running)) {
-        jl_cpu_pause();
-    }
+    // pass NULL as a special token to indicate we are running on an unmanaged task
+    jl_safepoint_wait_gc(NULL);
     // this check is coupled with the one in `jl_safepoint_wait_gc`, where we observe if a
     // foreign thread has asked to disable the GC, guaranteeing the order of events.
 
@@ -428,10 +447,53 @@ JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
     JL_GC_PROMISE_ROOTED(ct);
     uv_random(NULL, NULL, &ct->rngState, sizeof(ct->rngState), 0, NULL);
     jl_atomic_fetch_add(&jl_gc_disable_counter, -1);
+    jl_init_task_lock(ct);
     return &ct->gcstack;
 }
 
+JL_DLLEXPORT jl_gcframe_t **jl_autoinit_and_adopt_thread(void)
+{
+    if (!jl_is_initialized()) {
+        void *retaddr = __builtin_extract_return_addr(__builtin_return_address(0));
+        void *handle = jl_find_dynamic_library_by_addr(retaddr, 0, 0);
+        if (handle == NULL) {
+            fprintf(stderr, "error: runtime auto-initialization failed due to bad sysimage lookup\n"
+                            "       (this should not happen, please file a bug report)\n");
+            exit(1);
+        }
+        jl_init_with_image_handle(handle);
+        return &jl_get_current_task()->gcstack;
+    }
+
+    return jl_adopt_thread();
+}
+
+void jl_safepoint_suspend_all_threads(jl_task_t *ct)
+{
+    // TODO: prevent jl_n_threads changing or jl_safepoint_resume_thread calls on another thread
+    //uv_mutex_lock(&tls_lock);
+    //disallow_resume = ct->tid;
+    //uv_mutex_unlock(&tls_lock);
+    for (int16_t tid = 0; tid < jl_atomic_load_relaxed(&jl_n_threads); tid++) {
+        if (tid != jl_atomic_load_relaxed(&ct->tid))
+            jl_safepoint_suspend_thread(tid, 1);
+    };
+}
+
+void jl_safepoint_resume_all_threads(jl_task_t *ct)
+{
+    //uv_mutex_lock(&tls_lock);
+    //if (disallow_resume != ct->tid) return;
+    //uv_mutex_unlock(&tls_lock);
+    for (int16_t tid = 0; tid < jl_atomic_load_relaxed(&jl_n_threads); tid++) {
+        if (tid != jl_atomic_load_relaxed(&ct->tid))
+            jl_safepoint_resume_thread(tid);
+    };
+}
+
 void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT;
+void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT;
+void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
 
 static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 {
@@ -442,41 +504,75 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
     // safepoint until GC exit, in case GC was running concurrently while in
     // prior unsafe-region (before we let it release the stack memory)
     (void)jl_gc_unsafe_enter(ptls);
-    jl_atomic_store_relaxed(&ptls->sleep_check_state, 2); // dead, interpreted as sleeping and unwakeable
-    jl_fence();
-    jl_wakeup_thread(0); // force thread 0 to see that we do not have the IO lock (and am dead)
+    scheduler_delete_thread(ptls);
+    // need to clear pgcstack and eh, but we can clear everything now too
+    jl_task_t *ct = jl_atomic_load_relaxed(&ptls->current_task);
+    jl_task_frame_noreturn(ct);
+    if (jl_set_task_tid(ptls->root_task, ptls->tid)) {
+        // the system will probably free this stack memory soon
+        // so prevent any other thread from accessing it later
+        if (ct != ptls->root_task)
+            jl_task_frame_noreturn(ptls->root_task);
+    }
+    else {
+        // Uh oh. The user cleared the sticky bit so it started running
+        // elsewhere, then called pthread_exit on this thread from another
+        // Task, which will free the stack memory of that root task soon. This
+        // is not recoverable. Though we could just hang here, a fatal message
+        // is likely better.
+        jl_safe_printf("fatal: thread exited from wrong Task.\n");
+        abort();
+    }
+    ptls->previous_exception = NULL;
+    // allow the page root_task is on to be freed
+    ptls->root_task = NULL;
+    jl_free_thread_gc_state(ptls);
+    // park in safe-region from here on (this may run GC again)
+    (void)jl_gc_safe_enter(ptls);
+    // try to free some state we do not need anymore
+#ifndef _OS_WINDOWS_
+    void *signal_stack = ptls->signal_stack;
+    size_t signal_stack_size = ptls->signal_stack_size;
+    if (signal_stack != NULL) {
+        stack_t ss;
+        if (sigaltstack(NULL, &ss))
+            jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
+        if (ss.ss_sp == signal_stack) {
+            ss.ss_flags = SS_DISABLE;
+            if (sigaltstack(&ss, NULL) != 0) {
+                jl_errorf("warning: sigaltstack: %s (will leak this memory)", strerror(errno));
+                signal_stack = NULL;
+            }
+        }
+        if (signal_stack != NULL) {
+            if (signal_stack_size)
+                _jl_free_stack(ptls ,signal_stack, signal_stack_size);
+            else
+                free(signal_stack);
+        }
+        ptls->signal_stack = NULL;
+    }
+#endif
     // Acquire the profile write lock, to ensure we are not racing with the `kill`
     // call in the profile code which will also try to look at this thread.
     // We have no control over when the user calls pthread_join, so we must do
     // this here by blocking. This also synchronizes our read of `current_task`
     // (which is the flag we currently use to check the liveness state of a thread).
 #ifdef _OS_WINDOWS_
-    jl_lock_profile_wr();
+    int havelock = jl_lock_profile_wr();
+    assert(havelock); (void)havelock;
 #elif defined(JL_DISABLE_LIBUNWIND)
     // nothing
 #elif defined(__APPLE__)
-    jl_lock_profile_wr();
+    int havelock = jl_lock_profile_wr();
+    assert(havelock); (void)havelock;
 #else
     pthread_mutex_lock(&in_signal_lock);
 #endif
-    // need to clear pgcstack and eh, but we can clear everything now too
-    jl_task_frame_noreturn(jl_atomic_load_relaxed(&ptls->current_task));
-    if (jl_set_task_tid(ptls->root_task, ptls->tid)) {
-        // the system will probably free this stack memory soon
-        // so prevent any other thread from accessing it later
-        jl_task_frame_noreturn(ptls->root_task);
-    }
-    else {
-        // Uh oh. The user cleared the sticky bit so it started running
-        // elsewhere, then called pthread_exit on this thread. This is not
-        // recoverable. Though we could just hang here, a fatal message is better.
-        jl_safe_printf("fatal: thread exited from wrong Task.\n");
-        abort();
-    }
-    jl_atomic_store_relaxed(&ptls->current_task, NULL); // dead
+    jl_atomic_store_relaxed(&ptls->current_task, NULL); // indicate dead
     // finally, release all of the locks we had grabbed
 #ifdef _OS_WINDOWS_
-    jl_unlock_profile_wr();
+    if (havelock) jl_unlock_profile_wr();
 #elif defined(JL_DISABLE_LIBUNWIND)
     // nothing
 #elif defined(__APPLE__)
@@ -484,8 +580,8 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 #else
     pthread_mutex_unlock(&in_signal_lock);
 #endif
-    // then park in safe-region
-    (void)jl_gc_safe_enter(ptls);
+    free(ptls->bt_data);
+    small_arraylist_free(&ptls->locks);
 }
 
 //// debugging hack: if we are exiting too fast for error message printing on threads,
@@ -493,7 +589,6 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 //// the other threads time to fail and emit their failure message
 //__attribute__((destructor)) static void _waitthreaddeath(void) { sleep(1); }
 
-JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 jl_mutex_t typecache_lock;
 
 JL_DLLEXPORT ssize_t jl_tls_offset = -1;
@@ -586,6 +681,8 @@ static void jl_check_tls(void)
     asm("mrs %0, tpidr_el0" : "=r"(tp));
 #elif defined(__ARM_ARCH) && __ARM_ARCH >= 7
     asm("mrc p15, 0, %0, c13, c0, 3" : "=r"(tp));
+#elif defined(_CPU_RISCV64_)
+    asm("mv %0, tp" : "=r"(tp));
 #else
 #  error "Cannot emit thread pointer for this architecture."
 #endif
@@ -620,15 +717,15 @@ void jl_init_threading(void)
     // and `jl_n_threads_per_pool`.
     jl_n_threadpools = 2;
     int16_t nthreads = JULIA_NUM_THREADS;
-    int16_t nthreadsi = 0;
+    // if generating output default to 0 interactive threads, otherwise default to 1
+    int16_t nthreadsi = jl_generating_output() ? 0 : 1;
     char *endptr, *endptri;
 
     if (jl_options.nthreads != 0) { // --threads specified
         nthreads = jl_options.nthreads_per_pool[0];
         if (nthreads < 0)
             nthreads = jl_effective_threads();
-        if (jl_options.nthreadpools == 2)
-            nthreadsi = jl_options.nthreads_per_pool[1];
+        nthreadsi = (jl_options.nthreadpools == 1) ? 0 : jl_options.nthreads_per_pool[1];
     }
     else if ((cp = getenv(NUM_THREADS_NAME))) { // ENV[NUM_THREADS_NAME] specified
         if (!strncmp(cp, "auto", 4)) {
@@ -641,20 +738,26 @@ void jl_init_threading(void)
             if (errno != 0 || endptr == cp || nthreads <= 0)
                 nthreads = 1;
             cp = endptr;
+            if (nthreads == 1) // User asked for 1 thread so lets assume they dont want an interactive thread
+                nthreadsi = 0;
         }
         if (*cp == ',') {
             cp++;
-            if (!strncmp(cp, "auto", 4))
+            if (!strncmp(cp, "auto", 4)) {
                 nthreadsi = 1;
+                cp += 4;
+            }
             else {
                 errno = 0;
                 nthreadsi = strtol(cp, &endptri, 10);
                 if (errno != 0 || endptri == cp || nthreadsi < 0)
-                    nthreadsi = 0;
+                    nthreadsi = 1;
+                cp = endptri;
             }
         }
     }
 
+    int cpu = jl_effective_threads();
     jl_n_markthreads = jl_options.nmarkthreads - 1;
     jl_n_sweepthreads = jl_options.nsweepthreads;
     if (jl_n_markthreads == -1) { // --gcthreads not specified
@@ -675,35 +778,49 @@ void jl_init_threading(void)
         }
         else {
             // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified,
-            // set the number of mark threads to half of compute threads
+            // set the number of mark threads to the number of compute threads
             // and number of sweep threads to 0
-            if (nthreads <= 1) {
-                jl_n_markthreads = 0;
-            }
-            else {
-                jl_n_markthreads = (nthreads / 2) - 1;
+            jl_n_markthreads = nthreads - 1; // -1 for the master (mutator) thread which may also do marking
+            // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified,
+            // cap the number of threads that may run the mark phase to
+            // the number of CPU cores
+            if (jl_n_markthreads + 1 >= cpu) {
+                jl_n_markthreads = cpu - 1;
             }
         }
     }
+    // warn the user if they try to run with a number
+    // of GC threads which is larger than the number
+    // of physical cores
+    if (jl_n_markthreads + 1 > cpu) {
+        jl_safe_printf("WARNING: running Julia with %d GC threads on %d CPU cores\n", jl_n_markthreads + 1, cpu);
+    }
     int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads;
 
-    jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads;
-    jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int));
-    jl_n_threads_per_pool[0] = nthreadsi;
-    jl_n_threads_per_pool[1] = nthreads;
+    if (strstr(jl_gc_active_impl(), "MMTk")) {
+        ngcthreads = 0;
+    }
 
+    jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads;
+    jl_n_threads_per_pool = (int*)calloc_s(jl_n_threadpools * sizeof(int));
+    jl_n_threads_per_pool[JL_THREADPOOL_ID_INTERACTIVE] = nthreadsi;
+    jl_n_threads_per_pool[JL_THREADPOOL_ID_DEFAULT] = nthreads;
+    assert(jl_all_tls_states_size > 0);
     jl_atomic_store_release(&jl_all_tls_states, (jl_ptls_t*)calloc(jl_all_tls_states_size, sizeof(jl_ptls_t)));
     jl_atomic_store_release(&jl_n_threads, jl_all_tls_states_size);
     jl_n_gcthreads = ngcthreads;
-    gc_first_tid = nthreads;
+    gc_first_tid = nthreads + nthreadsi;
 }
 
-static uv_barrier_t thread_init_done;
+uv_barrier_t thread_init_done;
 
 void jl_start_threads(void)
 {
     int nthreads = jl_atomic_load_relaxed(&jl_n_threads);
-    int ngcthreads = jl_n_gcthreads;
+    int ninteractive_threads = jl_n_threads_per_pool[JL_THREADPOOL_ID_INTERACTIVE];
+    int ndefault_threads = jl_n_threads_per_pool[JL_THREADPOOL_ID_DEFAULT];
+    int nmutator_threads = nthreads - jl_n_gcthreads;
+
     int cpumasksize = uv_cpumask_size();
     char *cp;
     int i, exclusive;
@@ -718,52 +835,49 @@ void jl_start_threads(void)
     if (cp && strcmp(cp, "0") != 0)
         exclusive = 1;
 
-    // exclusive use: affinitize threads, master thread on proc 0, rest
-    // according to a 'compact' policy
+    // exclusive use: affinitize threads, master thread on proc 0, threads in
+    // default pool according to a 'compact' policy
     // non-exclusive: no affinity settings; let the kernel move threads about
     if (exclusive) {
-        if (nthreads > jl_cpu_threads()) {
+        if (ndefault_threads > jl_effective_threads()) {
             jl_printf(JL_STDERR, "ERROR: Too many threads requested for %s option.\n", MACHINE_EXCLUSIVE_NAME);
             exit(1);
         }
         memset(mask, 0, cpumasksize);
-        mask[0] = 1;
-        uvtid = uv_thread_self();
-        uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
-        mask[0] = 0;
+
+        // If there are no interactive threads, the master thread is in the
+        // default pool and we must affinitize it
+        if (ninteractive_threads == 0) {
+            mask[0] = 1;
+            uvtid = uv_thread_self();
+            uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+            mask[0] = 0;
+        }
     }
 
     // create threads
     uv_barrier_init(&thread_init_done, nthreads);
 
     // GC/System threads need to be after the worker threads.
-    int nworker_threads = nthreads - ngcthreads;
-
-    for (i = 1; i < nthreads; ++i) {
+    for (i = 1; i < nmutator_threads; ++i) {
         jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
         t->tid = i;
         t->barrier = &thread_init_done;
-        if (i < nworker_threads) {
-            uv_thread_create(&uvtid, jl_threadfun, t);
-            if (exclusive) {
-                mask[i] = 1;
-                uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
-                mask[i] = 0;
-            }
-        }
-        else if (i == nthreads - 1 && jl_n_sweepthreads == 1) {
-            uv_thread_create(&uvtid, jl_gc_sweep_threadfun, t);
-        }
-        else {
-            uv_thread_create(&uvtid, jl_gc_mark_threadfun, t);
+        uv_thread_create(&uvtid, jl_threadfun, t);
+
+        // Interactive pool threads get the low IDs, so check if this is a
+        // default pool thread.  The master thread is already on CPU 0.
+        if (exclusive && i >= ninteractive_threads) {
+            assert(i - ninteractive_threads < cpumasksize);
+            mask[i - ninteractive_threads] = 1;
+            uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+            mask[i - ninteractive_threads] = 0;
         }
-        uv_thread_detach(&uvtid);
     }
-
-    uv_barrier_wait(&thread_init_done);
 }
 
-_Atomic(unsigned) _threadedregion; // HACK: keep track of whether to prioritize IO or threading
+_Atomic(unsigned) _threadedregion; // keep track of whether to prioritize IO or threading
+_Atomic(uint16_t) io_loop_tid; // mark which thread is assigned to run the uv_loop
 
 JL_DLLEXPORT int jl_in_threaded_region(void)
 {
@@ -784,7 +898,27 @@ JL_DLLEXPORT void jl_exit_threaded_region(void)
         JL_UV_UNLOCK();
         // make sure thread 0 is not using the sleep_lock
         // so that it may enter the libuv event loop instead
-        jl_wakeup_thread(0);
+        jl_fence();
+        jl_wakeup_thread(jl_atomic_load_relaxed(&io_loop_tid));
+    }
+}
+
+JL_DLLEXPORT void jl_set_io_loop_tid(int16_t tid)
+{
+    if (tid < 0 || tid >= jl_atomic_load_relaxed(&jl_n_threads)) {
+        // TODO: do we care if this thread has exited or not started yet,
+        // since ptls2 might not be defined yet and visible on all threads yet
+        return;
+    }
+    jl_atomic_store_relaxed(&io_loop_tid, tid);
+    jl_fence();
+    if (jl_atomic_load_relaxed(&_threadedregion) == 0) {
+        // make sure the previous io_loop_tid leaves the libuv event loop
+        JL_UV_LOCK();
+        JL_UV_UNLOCK();
+        // make sure thread io_loop_tid is not using the sleep_lock
+        // so that it may enter the libuv event loop instead
+        jl_wakeup_thread(tid);
     }
 }
 
@@ -794,7 +928,16 @@ void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT
 {
     jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL);
     lock->count = 0;
+#if defined(_COMPILER_TSAN_ENABLED_) && defined(ENABLE_TIMINGS)
+    __tsan_mutex_pre_divert(lock, 0);
+#endif
     jl_profile_lock_init(lock, name);
+#ifdef _COMPILER_TSAN_ENABLED_
+#ifdef ENABLE_TIMINGS
+    __tsan_mutex_post_divert(lock, 0);
+#endif
+    __tsan_mutex_create(lock, __tsan_mutex_write_reentrant);
+#endif
 }
 
 void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
@@ -804,11 +947,17 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
         lock->count++;
         return;
     }
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_pre_divert(lock, 0);
+#endif
     // Don't use JL_TIMING for instant acquires, results in large blowup of events
     jl_profile_lock_start_wait(lock);
     if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
         lock->count = 1;
         jl_profile_lock_acquired(lock);
+#ifdef _COMPILER_TSAN_ENABLED_
+        __tsan_mutex_post_divert(lock, 0);
+#endif
         return;
     }
     JL_TIMING(LOCK_SPIN, LOCK_SPIN);
@@ -816,21 +965,32 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
         if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
             lock->count = 1;
             jl_profile_lock_acquired(lock);
+#ifdef _COMPILER_TSAN_ENABLED_
+            __tsan_mutex_post_divert(lock, 0);
+#endif
             return;
         }
-        if (safepoint) {
-            jl_gc_safepoint_(self->ptls);
-        }
         if (jl_running_under_rr(0)) {
             // when running under `rr`, use system mutexes rather than spin locking
+            int8_t gc_state;
+            if (safepoint)
+                gc_state = jl_gc_safe_enter(self->ptls);
             uv_mutex_lock(&tls_lock);
             if (jl_atomic_load_relaxed(&lock->owner))
                 uv_cond_wait(&cond, &tls_lock);
             uv_mutex_unlock(&tls_lock);
+            if (safepoint)
+                jl_gc_safe_leave(self->ptls, gc_state);
+        }
+        else if (safepoint) {
+            jl_gc_safepoint_(self->ptls);
         }
         jl_cpu_suspend();
         owner = jl_atomic_load_relaxed(&lock->owner);
     }
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_post_divert(lock, 0);
+#endif
 }
 
 static void jl_lock_frame_push(jl_task_t *self, jl_mutex_t *lock)
@@ -856,23 +1016,43 @@ static void jl_lock_frame_pop(jl_task_t *self)
 
 void _jl_mutex_lock(jl_task_t *self, jl_mutex_t *lock)
 {
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_pre_lock(lock, __tsan_mutex_write_reentrant);
+#endif
     JL_SIGATOMIC_BEGIN_self();
     _jl_mutex_wait(self, lock, 1);
     jl_lock_frame_push(self, lock);
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_post_lock(lock, __tsan_mutex_write_reentrant, 1);
+#endif
 }
 
 int _jl_mutex_trylock_nogc(jl_task_t *self, jl_mutex_t *lock)
 {
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_pre_lock(lock, __tsan_mutex_try_lock | __tsan_mutex_write_reentrant);
+#endif
     jl_task_t *owner = jl_atomic_load_acquire(&lock->owner);
+    int ret = 0;
     if (owner == self) {
         lock->count++;
-        return 1;
+        ret = 1;
+        goto done;
     }
     if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
         lock->count = 1;
-        return 1;
+        ret = 1;
+        goto done;
     }
-    return 0;
+done:
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_post_lock(lock,
+                           __tsan_mutex_try_lock |
+                               (ret ? 0 : __tsan_mutex_try_lock_failed) |
+                               __tsan_mutex_write_reentrant,
+                           1);
+#endif
+    return ret;
 }
 
 int _jl_mutex_trylock(jl_task_t *self, jl_mutex_t *lock)
@@ -888,6 +1068,9 @@ int _jl_mutex_trylock(jl_task_t *self, jl_mutex_t *lock)
 void _jl_mutex_unlock_nogc(jl_mutex_t *lock)
 {
 #ifndef __clang_gcanalyzer__
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_pre_unlock(lock, 0);
+#endif
     assert(jl_atomic_load_relaxed(&lock->owner) == jl_current_task &&
            "Unlocking a lock in a different thread.");
     if (--lock->count == 0) {
@@ -902,6 +1085,9 @@ void _jl_mutex_unlock_nogc(jl_mutex_t *lock)
         }
         jl_profile_lock_release_end(lock);
     }
+#ifdef _COMPILER_TSAN_ENABLED_
+    __tsan_mutex_post_unlock(lock, 0);
+#endif
 #endif
 }
 
@@ -923,6 +1109,52 @@ JL_DLLEXPORT int jl_alignment(size_t sz)
     return jl_gc_alignment(sz);
 }
 
+// Return values:
+//     0  == success
+//     1  == invalid thread id provided
+//     2  == ptls2 was NULL
+//     <0 == uv_thread_getaffinity exit code
+JL_DLLEXPORT int jl_getaffinity(int16_t tid, char *mask, int cpumasksize) {
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    if (tid < 0 || tid >= nthreads)
+        return 1;
+
+    // TODO: use correct lock. system_id is only legal if the thread is alive.
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL)
+        return 2;
+    uv_thread_t uvtid = ptls2->system_id;
+
+    int ret_uv = uv_thread_getaffinity(&uvtid, mask, cpumasksize);
+    if (ret_uv != 0)
+        return ret_uv;
+
+    return 0; // success
+}
+
+// Return values:
+//     0  == success
+//     1  == invalid thread id provided
+//     2  == ptls2 was NULL
+//     <0 == uv_thread_getaffinity exit code
+JL_DLLEXPORT int jl_setaffinity(int16_t tid, char *mask, int cpumasksize) {
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    if (tid < 0 || tid >= nthreads)
+        return 1;
+
+    // TODO: use correct lock. system_id is only legal if the thread is alive.
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL)
+        return 2;
+    uv_thread_t uvtid = ptls2->system_id;
+
+    int ret_uv = uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+    if (ret_uv != 0)
+        return ret_uv;
+
+    return 0; // success
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/threading.h b/src/threading.h
index 73d2cd73fb70d..cb26537699713 100644
--- a/src/threading.h
+++ b/src/threading.h
@@ -12,6 +12,8 @@ extern "C" {
 
 #define PROFILE_JL_THREADING            0
 
+extern uv_barrier_t thread_init_done;
+
 extern _Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED; /* thread local storage */
 
 typedef struct _jl_threadarg_t {
@@ -25,8 +27,8 @@ jl_ptls_t jl_init_threadtls(int16_t tid) JL_NOTSAFEPOINT;
 
 // provided by a threading infrastructure
 void jl_init_threadinginfra(void);
-void jl_gc_mark_threadfun(void *arg);
-void jl_gc_sweep_threadfun(void *arg);
+void jl_parallel_gc_threadfun(void *arg);
+void jl_concurrent_gc_threadfun(void *arg);
 void jl_threadfun(void *arg);
 
 #ifdef __cplusplus
diff --git a/src/timing.c b/src/timing.c
index d933f082c816e..dd37bca3f5c35 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -6,16 +6,23 @@
 #include "options.h"
 #include "stdio.h"
 
-#if defined(USE_TRACY) || defined(USE_ITTAPI)
+#if defined(USE_TRACY) || defined(USE_ITTAPI) || defined(USE_NVTX)
 #define DISABLE_FREQUENT_EVENTS
 #endif
 
-jl_module_t *jl_module_root(jl_module_t *m);
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+JL_DLLEXPORT int jl_timing_enabled(void) {
+#ifdef ENABLE_TIMINGS
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+
 #ifdef ENABLE_TIMINGS
 
 #ifndef HAVE_TIMING_SUPPORT
@@ -49,6 +56,10 @@ static arraylist_t jl_timing_ittapi_events;
 static jl_mutex_t jl_timing_ittapi_events_lock;
 #endif //USE_ITTAPI
 
+#ifdef USE_NVTX
+static nvtxDomainHandle_t jl_timing_nvtx_domain;
+#endif
+
 #ifdef USE_TIMING_COUNTS
 static int cmp_counts_events(const void *a, const void *b) {
     jl_timing_counts_event_t *event_a = *(jl_timing_counts_event_t **)a;
@@ -139,6 +150,13 @@ void jl_init_timing(void)
     qsort(jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST,
           sizeof(const char *), indirect_strcmp);
 
+#ifdef USE_NVTX
+    jl_timing_nvtx_domain = nvtxDomainCreateA("julia");
+    for (int i = 0; i < JL_TIMING_SUBSYSTEM_LAST; i++) {
+        nvtxDomainNameCategoryA(jl_timing_nvtx_domain, i + 1, jl_timing_subsystems[i]);
+    }
+#endif
+
     int i __attribute__((unused)) = 0;
 #ifdef USE_ITTAPI
     i = 0;
@@ -174,6 +192,7 @@ void jl_init_timing(void)
     error |= jl_timing_set_enable("METHOD_LOOKUP_FAST", 0);
     error |= jl_timing_set_enable("AST_COMPRESS", 0);
     error |= jl_timing_set_enable("AST_UNCOMPRESS", 0);
+    error |= jl_timing_set_enable("TYPE_CACHE_INSERT", 0);
     if (error)
         jl_error("invalid timing subsystem encountered in jl_init_timing");
 #endif
@@ -317,6 +336,25 @@ JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, c
     event->ittapi_event = _jl_timing_ittapi_event_create(name);
 #endif // USE_ITTAPI
 
+#ifdef USE_NVTX
+    nvtxEventAttributes_t nvtx_attrs = {0};
+    nvtx_attrs.version = NVTX_VERSION;
+    nvtx_attrs.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+
+    nvtxStringHandle_t nvtx_message = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, name);
+    nvtx_attrs.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
+    nvtx_attrs.message.registered = nvtx_message;
+
+    // 0 is the default (unnamed) category
+    nvtx_attrs.category = maybe_subsystem == JL_TIMING_SUBSYSTEM_LAST ? 0 : maybe_subsystem+1;
+
+    // simple Knuth hash to get nice colors
+    nvtx_attrs.colorType = NVTX_COLOR_ARGB;
+    nvtx_attrs.color = (nvtx_attrs.category * 2654435769) >> 8;
+
+    event->nvtx_attrs = nvtx_attrs;
+#endif // USE_NVTX
+
 #ifdef USE_TRACY
     event->tracy_srcloc.name = name;
     event->tracy_srcloc.function = function;
@@ -342,10 +380,12 @@ JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_
 JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) {
     assert(!block->is_running);
     if (!_jl_timing_enabled(block->event->subsystem)) return;
+    if (jl_get_pgcstack() == NULL) return; // not setup on this thread
 
     uint64_t t = cycleclock(); (void)t;
     _COUNTS_START(&block->counts_ctx, t);
     _ITTAPI_START(block);
+    _NVTX_START(block);
     _TRACY_START(block);
 
     jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack;
@@ -361,6 +401,7 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) {
     if (block->is_running) {
         uint64_t t = cycleclock(); (void)t;
         _ITTAPI_STOP(block);
+        _NVTX_STOP(block);
         _TRACY_STOP(block->tracy_ctx);
         _COUNTS_STOP(block, t);
 
diff --git a/src/timing.h b/src/timing.h
index 30f6ad0ab3b5c..833f5f68d34f5 100644
--- a/src/timing.h
+++ b/src/timing.h
@@ -55,6 +55,8 @@ JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, c
 JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event);
 JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *cur_block);
 JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block);
+JL_DLLEXPORT int jl_timing_enabled(void);
+
 
 #ifdef __cplusplus
 }
@@ -66,7 +68,7 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block);
 #define HAVE_TIMING_SUPPORT
 #endif
 
-#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_TIMING_COUNTS )
+#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_NVTX ) || defined( USE_TIMING_COUNTS )
 #define ENABLE_TIMINGS
 #endif
 
@@ -115,6 +117,12 @@ typedef struct ___tracy_source_location_data TracySrcLocData;
 #include <ittapi/ittnotify.h>
 #endif
 
+#ifdef USE_NVTX
+#pragma GCC visibility push(default)
+#include <nvtx3/nvToolsExt.h>
+#pragma GCC visibility pop
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -160,8 +168,7 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str);
         X(METHOD_LOOKUP_SLOW)    \
         X(METHOD_LOOKUP_FAST)    \
         X(CODEINST_COMPILE)      \
-        X(LLVM_OPT)              \
-        X(LLVM_ORC)              \
+        X(LLVM_JIT)              \
         X(METHOD_MATCH)          \
         X(TYPE_CACHE_LOOKUP)     \
         X(TYPE_CACHE_INSERT)     \
@@ -175,12 +182,14 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str);
         X(LOAD_MODULE)           \
         X(LOAD_IMAGE)            \
         X(VERIFY_IMAGE)          \
+        X(VERIFY_IR)             \
         X(SAVE_MODULE)           \
         X(INIT_MODULE)           \
         X(LOCK_SPIN)             \
         X(STACKWALK)             \
         X(DL_OPEN)               \
         X(JULIA_INIT)            \
+        X(CORE_COMPILER)        \
 
 
 #define JL_TIMING_COUNTERS \
@@ -276,6 +285,20 @@ typedef struct _jl_timing_counts_t {
 #define _ITTAPI_STOP(block)
 #endif
 
+
+#ifdef USE_NVTX
+#define _NVTX_EVENT_MEMBER              nvtxEventAttributes_t nvtx_attrs;
+#define _NVTX_BLOCK_MEMBER              nvtxRangeId_t nvtx_rangeid;
+#define _NVTX_START(block)              (block)->nvtx_rangeid = nvtxDomainRangeStartEx(jl_timing_nvtx_domain, &(block)->event->nvtx_attrs)
+#define _NVTX_STOP(block)               nvtxDomainRangeEnd(jl_timing_nvtx_domain, (block)->nvtx_rangeid)
+#else
+#define _NVTX_EVENT_MEMBER
+#define _NVTX_BLOCK_MEMBER
+#define _NVTX_START(block)
+#define _NVTX_STOP(block)
+#endif
+
+
 /**
  * Top-level jl_timing implementation
  **/
@@ -292,6 +315,7 @@ extern const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST];
 struct _jl_timing_event_t { // typedef in julia.h
     _TRACY_EVENT_MEMBER
     _ITTAPI_EVENT_MEMBER
+    _NVTX_EVENT_MEMBER
     _COUNTS_EVENT_MEMBER
 
     int subsystem;
@@ -310,6 +334,7 @@ struct _jl_timing_block_t { // typedef in julia.h
 
     _TRACY_BLOCK_MEMBER
     _ITTAPI_BLOCK_MEMBER
+    _NVTX_BLOCK_MEMBER
     _COUNTS_BLOCK_MEMBER
 
     uint8_t is_running;
@@ -362,6 +387,12 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
 #define _ITTAPI_COUNTER_MEMBER
 #endif
 
+#ifdef USE_NVTX
+#define _NVTX_COUNTER_MEMBER void * __nvtx_null;
+#else
+#define _NVTX_COUNTER_MEMBER
+#endif
+
 #ifdef USE_TRACY
 # define _TRACY_COUNTER_MEMBER jl_tracy_counter_t tracy_counter;
 # else
@@ -376,6 +407,7 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
 
 typedef struct {
     _ITTAPI_COUNTER_MEMBER
+    _NVTX_COUNTER_MEMBER
     _TRACY_COUNTER_MEMBER
     _COUNTS_MEMBER
 } jl_timing_counter_t;
diff --git a/src/toplevel.c b/src/toplevel.c
index 51ff93488426f..b78ad6ec46b3c 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -26,54 +26,50 @@ extern "C" {
 #endif
 
 // current line number in a file
-JL_DLLEXPORT int jl_lineno = 0; // need to update jl_critical_error if this is TLS
+JL_DLLEXPORT _Atomic(int) jl_lineno = 0; // need to update jl_fprint_critical_error if this is TLS
 // current file name
-JL_DLLEXPORT const char *jl_filename = "none"; // need to update jl_critical_error if this is TLS
+JL_DLLEXPORT _Atomic(const char *) jl_filename = "none"; // need to update jl_fprint_critical_error if this is TLS
+
+static jl_value_t *jl_eval_toplevel_stmts(jl_module_t *JL_NONNULL m, jl_array_t *stmts, int fast, int need_value, const char **toplevel_filename, int *toplevel_lineno);
 
 htable_t jl_current_modules;
 jl_mutex_t jl_modules_mutex;
 
 // During incremental compilation, the following gets set
-JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module = NULL;   // the toplevel module currently being defined
+jl_module_t *jl_precompile_toplevel_module = NULL;   // the first toplevel module being defined
 
-JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m)
+jl_module_t *jl_add_standard_imports(jl_module_t *m)
 {
     jl_module_t *base_module = jl_base_relative_to(m);
     assert(base_module != NULL);
     // using Base
-    jl_module_using(m, base_module);
+    jl_module_initial_using(m, base_module);
+    return base_module;
 }
 
 // create a new top-level module
 void jl_init_main_module(void)
 {
     assert(jl_main_module == NULL);
-    jl_main_module = jl_new_module(jl_symbol("Main"), NULL);
-    jl_main_module->parent = jl_main_module;
-    jl_set_const(jl_main_module, jl_symbol("Core"),
-                 (jl_value_t*)jl_core_module);
-    jl_set_const(jl_core_module, jl_symbol("Main"),
-                 (jl_value_t*)jl_main_module);
-}
-
-static jl_function_t *jl_module_get_initializer(jl_module_t *m JL_PROPAGATES_ROOT)
-{
-    return (jl_function_t*)jl_get_global(m, jl_symbol("__init__"));
+    jl_main_module = jl_new_module_(jl_symbol("Main"), NULL, 0, 1); // baremodule Main; end
+    jl_set_initial_const(jl_core_module, jl_symbol("Main"), (jl_value_t*)jl_main_module, 0); // const Main.Core = Core
+    jl_set_initial_const(jl_main_module, jl_symbol("Core"), (jl_value_t*)jl_core_module, 0); // const Core.Main = Main
 }
 
-
 void jl_module_run_initializer(jl_module_t *m)
 {
     JL_TIMING(INIT_MODULE, INIT_MODULE);
     jl_timing_show_module(m, JL_TIMING_DEFAULT_BLOCK);
-    jl_function_t *f = jl_module_get_initializer(m);
-    if (f == NULL)
-        return;
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
     JL_TRY {
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        jl_apply(&f, 1);
+        jl_value_t *f = jl_get_global_value(m, jl_symbol("__init__"), ct->world_age);
+        if (f != NULL) {
+            JL_GC_PUSH1(&f);
+            jl_apply(&f, 1);
+            JL_GC_POP();
+        }
         ct->world_age = last_age;
     }
     JL_CATCH {
@@ -82,17 +78,16 @@ void jl_module_run_initializer(jl_module_t *m)
         }
         else {
             jl_rethrow_other(jl_new_struct(jl_initerror_type, m->name,
-                                           jl_current_exception()));
+                                           jl_current_exception(ct)));
         }
     }
 }
 
 static void jl_register_root_module(jl_module_t *m)
 {
-    static jl_value_t *register_module_func = NULL;
+    jl_value_t *register_module_func = NULL;
     assert(jl_base_module);
-    if (register_module_func == NULL)
-        register_module_func = jl_get_global(jl_base_module, jl_symbol("register_root_module"));
+    register_module_func = jl_get_global(jl_base_module, jl_symbol("register_root_module"));
     assert(register_module_func);
     jl_value_t *args[2];
     args[0] = register_module_func;
@@ -102,142 +97,85 @@ static void jl_register_root_module(jl_module_t *m)
 
 jl_array_t *jl_get_loaded_modules(void)
 {
-    static jl_value_t *loaded_modules_array = NULL;
-    if (loaded_modules_array == NULL && jl_base_module != NULL)
+    jl_value_t *loaded_modules_array = NULL;
+    if (jl_base_module != NULL)
         loaded_modules_array = jl_get_global(jl_base_module, jl_symbol("loaded_modules_array"));
     if (loaded_modules_array != NULL)
-        return (jl_array_t*)jl_call0((jl_function_t*)loaded_modules_array);
+        return (jl_array_t*)jl_call0((jl_value_t*)loaded_modules_array);
     return NULL;
 }
 
-static int jl_is__toplevel__mod(jl_module_t *mod)
+static int jl_is__toplevel__mod(jl_module_t *mod, jl_task_t *ct)
 {
     return jl_base_module &&
-        (jl_value_t*)mod == jl_get_global(jl_base_module, jl_symbol("__toplevel__"));
+        (jl_value_t*)mod == jl_get_global_value(jl_base_module, jl_symbol("__toplevel__"), ct->world_age);
 }
 
-// TODO: add locks around global state mutation operations
-static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex)
+JL_DLLEXPORT void jl_setup_new_module(jl_module_t *m, jl_value_t *syntax_version)
 {
     jl_task_t *ct = jl_current_task;
-    assert(ex->head == jl_module_sym);
-    if (jl_array_len(ex->args) != 3 || !jl_is_expr(jl_exprarg(ex, 2))) {
-        jl_error("syntax: malformed module expression");
-    }
-
-    if (((jl_expr_t *)(jl_exprarg(ex, 2)))->head != jl_symbol("block")) {
-        jl_error("syntax: module expression third argument must be a block");
-    }
-
-    int std_imports = (jl_exprarg(ex, 0) == jl_true);
-    jl_sym_t *name = (jl_sym_t*)jl_exprarg(ex, 1);
-    if (!jl_is_symbol(name)) {
-        jl_type_error("module", (jl_value_t*)jl_symbol_type, (jl_value_t*)name);
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    jl_value_t *f = jl_get_global_value(jl_base_module, jl_symbol("_setup_module!"), ct->world_age);
+    if (f != NULL) {
+        jl_value_t **fargs;
+        JL_GC_PUSHARGS(fargs, 3);
+        fargs[0] = f;
+        fargs[1] = (jl_value_t*)m;
+        fargs[2] = syntax_version;
+        jl_apply(fargs, 3);
+        JL_GC_POP();
     }
+    ct->world_age = last_age;
+}
 
-    int is_parent__toplevel__ = jl_is__toplevel__mod(parent_module);
-    jl_module_t *newm = jl_new_module(name, is_parent__toplevel__ ? NULL : parent_module);
+JL_DLLEXPORT jl_module_t *jl_begin_new_module(jl_module_t *parent_module, jl_sym_t *name, jl_value_t *syntax_version,
+                                              int std_imports, const char *filename, int lineno)
+{
+    jl_task_t *ct = jl_current_task;
+    int is_parent__toplevel__ = jl_is__toplevel__mod(parent_module, ct);
+    // If we have `Base`, don't also try to import `Core` - the `Base` exports are a superset.
+    // While we allow multiple imports of the same binding from different modules, various error printing
+    // performs reflection on which module a binding came from and we'd prefer users see "Base" here.
+    jl_module_t *newm = jl_new_module_(name, is_parent__toplevel__ ? NULL : parent_module, std_imports && jl_base_module != NULL ? 0 : 1, 1);
     jl_value_t *form = (jl_value_t*)newm;
     JL_GC_PUSH1(&form);
     JL_LOCK(&jl_modules_mutex);
     ptrhash_put(&jl_current_modules, (void*)newm, (void*)((uintptr_t)HT_NOTFOUND + 1));
     JL_UNLOCK(&jl_modules_mutex);
-
-    jl_module_t *old_toplevel_module = jl_precompile_toplevel_module;
-
     // copy parent environment info into submodule
     newm->uuid = parent_module->uuid;
-    if (is_parent__toplevel__) {
-        newm->parent = newm;
-        jl_register_root_module(newm);
-        if (jl_options.incremental) {
-            jl_precompile_toplevel_module = newm;
-        }
-    }
-    else {
-        jl_binding_t *b = jl_get_binding_wr(parent_module, name);
-        jl_declare_constant(b, parent_module, name);
-        jl_value_t *old = NULL;
-        if (!jl_atomic_cmpswap(&b->value, &old, (jl_value_t*)newm)) {
-            if (!jl_is_module(old)) {
-                jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
-            }
-            if (jl_generating_output())
-                jl_errorf("cannot replace module %s during compilation", jl_symbol_name(name));
-            jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(name));
-            old = jl_atomic_exchange(&b->value, (jl_value_t*)newm);
-        }
-        jl_gc_wb_binding(b, newm);
-        if (old != NULL) {
-            // create a hidden gc root for the old module
-            JL_LOCK(&jl_modules_mutex);
-            uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)old);
-            *refcnt += 1;
-            JL_UNLOCK(&jl_modules_mutex);
-        }
+    newm->file = jl_symbol(filename);
+    jl_gc_wb_knownold(newm, newm->file);
+    newm->line = lineno;
+
+    // add standard imports unless baremodule
+    if (std_imports && jl_base_module != NULL) {
+        jl_setup_new_module(newm, syntax_version);
     }
 
-    if (parent_module == jl_main_module && name == jl_symbol("Base")) {
+    if (parent_module == jl_main_module && name == jl_symbol("Base") && jl_base_module == NULL) {
         // pick up Base module during bootstrap
         jl_base_module = newm;
     }
 
-    size_t last_age = ct->world_age;
-
-    // add standard imports unless baremodule
-    jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args;
-    int lineno = 0;
-    const char *filename = "none";
-    if (jl_array_len(exprs) > 0) {
-        jl_value_t *lineex = jl_array_ptr_ref(exprs, 0);
-        if (jl_is_linenode(lineex)) {
-            lineno = jl_linenode_line(lineex);
-            jl_value_t *file = jl_linenode_file(lineex);
-            if (jl_is_symbol(file))
-                filename = jl_symbol_name((jl_sym_t*)file);
-        }
-    }
-    if (std_imports) {
-        if (jl_base_module != NULL) {
-            jl_add_standard_imports(newm);
+    if (is_parent__toplevel__) {
+        jl_register_root_module(newm);
+        if (jl_options.incremental && jl_precompile_toplevel_module == NULL) {
+            jl_precompile_toplevel_module = newm;
         }
-        // add `eval` function
-        form = jl_call_scm_on_ast_and_loc("module-default-defs", (jl_value_t*)name, newm, filename, lineno);
-        jl_toplevel_eval_flex(newm, form, 0, 1);
-        form = NULL;
     }
-
-    for (int i = 0; i < jl_array_len(exprs); i++) {
-        // process toplevel form
-        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, jl_filename, jl_lineno);
-        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        (void)jl_toplevel_eval_flex(newm, form, 1, 1);
+    else {
+        jl_declare_constant_val(NULL, parent_module, name, (jl_value_t*)newm);
     }
-    newm->primary_world = jl_atomic_load_acquire(&jl_world_counter);
-    ct->world_age = last_age;
+    JL_GC_POP();
 
-#if 0
-    // some optional post-processing steps
-    size_t i;
-    jl_svec_t *table = jl_atomic_load_relaxed(&newm->bindings);
-    for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
-        if ((void*)b != jl_nothing) {
-            // remove non-exported macros
-            if (jl_symbol_name(b->name)[0]=='@' &&
-                !b->exportp && b->owner == b)
-                b->value = NULL;
-            // error for unassigned exports
-            /*
-            if (b->exportp && b->owner==b && b->value==NULL)
-                jl_errorf("identifier %s exported from %s is not initialized",
-                          jl_symbol_name(b->name), jl_symbol_name(newm->name));
-            */
-        }
-    }
-#endif
+    return newm;
+}
 
+JL_DLLEXPORT void jl_end_new_module(jl_module_t *newm) {
+    jl_value_t *form = NULL;
+    JL_GC_PUSH1(&form);
     JL_LOCK(&jl_modules_mutex);
     uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)newm);
     assert(*refcnt > (uintptr_t)HT_NOTFOUND);
@@ -248,13 +186,12 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         jl_module_init_order = jl_alloc_vec_any(0);
     jl_array_ptr_1d_push(jl_module_init_order, (jl_value_t*)newm);
 
-    // defer init of children until parent is done being defined
-    // then initialize all in definition-finished order
-    // at build time, don't run them at all (defer for runtime)
-    form = NULL;
+    // Defer init of direct children until parent is done being defined then
+    // initialize all in definition-finished order.
+    // At build time, don't run them at all - defer for runtime
     if (!jl_generating_output()) {
         if (!ptrhash_has(&jl_current_modules, (void*)newm->parent)) {
-            size_t i, l = jl_array_len(jl_module_init_order);
+            size_t i, l = jl_array_nrows(jl_module_init_order);
             size_t ns = 0;
             form = (jl_value_t*)jl_alloc_vec_any(0);
             for (i = 0; i < l; i++) {
@@ -273,7 +210,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     JL_UNLOCK(&jl_modules_mutex);
 
     if (form) {
-        size_t i, l = jl_array_len(form);
+        size_t i, l = jl_array_nrows(form);
         for (i = 0; i < l; i++) {
             jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(form, i);
             JL_GC_PROMISE_ROOTED(m);
@@ -281,25 +218,66 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         }
     }
 
-    jl_precompile_toplevel_module = old_toplevel_module;
-
     JL_GC_POP();
+}
+
+static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex, const char **toplevel_filename, int *toplevel_lineno)
+{
+    assert(ex->head == jl_module_sym);
+
+    jl_value_t *syntax_version = jl_nothing;
+    int idx = 0;
+    if (!jl_is_bool(jl_exprarg(ex, idx))) {
+        syntax_version = jl_exprarg(ex, idx++);
+    }
+
+    if (jl_array_nrows(ex->args) != idx+3 || !jl_is_expr(jl_exprarg(ex, idx+2))) {
+        jl_error("syntax: malformed module expression");
+    }
+
+    int std_imports = (jl_exprarg(ex, idx++) == jl_true);
+    jl_sym_t *name = (jl_sym_t*)jl_exprarg(ex, idx++);
+    if (!jl_is_symbol(name)) {
+        jl_type_error("module", (jl_value_t*)jl_symbol_type, (jl_value_t*)name);
+    }
+
+    if (((jl_expr_t *)(jl_exprarg(ex, idx)))->head != jl_symbol("block")) {
+        jl_error("syntax: module expression third argument must be a block");
+    }
+    jl_array_t *stmts = ((jl_expr_t*)jl_exprarg(ex, idx))->args;
+
+    int lineno = 0;
+    const char *filename = "none";
+    if (jl_array_nrows(stmts) > 0) {
+        jl_value_t *lineex = jl_array_ptr_ref(stmts, 0);
+        if (jl_is_linenode(lineex)) {
+            lineno = jl_linenode_line(lineex);
+            jl_value_t *file = jl_linenode_file(lineex);
+            if (jl_is_symbol(file))
+                filename = jl_symbol_name((jl_sym_t*)file);
+        }
+    }
+
+    jl_module_t *newm = jl_begin_new_module(parent_module, name, syntax_version, std_imports, filename, lineno);
+    JL_GC_PROMISE_ROOTED(newm); // Rooted in jl_current_modules
+    jl_eval_toplevel_stmts(newm, stmts, 1, 0, toplevel_filename, toplevel_lineno);
+    jl_end_new_module(newm);
+
     return (jl_value_t*)newm;
 }
 
-static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f, int fast)
+static jl_value_t *jl_eval_dot_expr(jl_task_t *ct, jl_module_t *m, jl_value_t *x, jl_value_t *f, int fast, const char **toplevel_filename, int *toplevel_lineno)
 {
-    jl_task_t *ct = jl_current_task;
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 3);
-    args[1] = jl_toplevel_eval_flex(m, x, fast, 0);
-    args[2] = jl_toplevel_eval_flex(m, f, fast, 0);
+    args[1] = jl_toplevel_eval_flex(m, x, fast, 0, toplevel_filename, toplevel_lineno);
+    args[2] = jl_toplevel_eval_flex(m, f, fast, 0, toplevel_filename, toplevel_lineno);
     if (jl_is_module(args[1])) {
         JL_TYPECHK(getglobal, symbol, args[2]);
-        args[0] = jl_eval_global_var((jl_module_t*)args[1], (jl_sym_t*)args[2]);
+        args[0] = jl_eval_global_var((jl_module_t*)args[1], (jl_sym_t*)args[2], ct->world_age);
     }
     else {
-        args[0] = jl_eval_global_var(jl_base_relative_to(m), jl_symbol("getproperty"));
+        args[0] = jl_eval_global_var(jl_base_relative_to(m), jl_symbol("getproperty"), ct->world_age);
         size_t last_age = ct->world_age;
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         args[0] = jl_apply(args, 3);
@@ -309,31 +287,78 @@ static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f
     return args[0];
 }
 
-void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type) {
+extern void check_safe_newbinding(jl_module_t *m, jl_sym_t *var);
+void jl_declare_global(jl_module_t *m, jl_value_t *arg, jl_value_t *set_type, int strong) {
     // create uninitialized mutable binding for "global x" decl sometimes or probably
-    size_t i, l = jl_array_len(ex->args);
-    for (i = 0; i < l; i++) {
-        jl_value_t *arg = jl_exprarg(ex, i);
-        jl_module_t *gm;
-        jl_sym_t *gs;
-        if (jl_is_globalref(arg)) {
-            gm = jl_globalref_mod(arg);
-            gs = jl_globalref_name(arg);
-        }
-        else {
-            assert(jl_is_symbol(arg));
-            gm = m;
-            gs = (jl_sym_t*)arg;
+    jl_module_t *gm;
+    jl_sym_t *gs;
+    assert(!jl_is_expr(arg)); // Should have been resolved before this
+    if (jl_is_globalref(arg)) {
+        gm = jl_globalref_mod(arg);
+        gs = jl_globalref_name(arg);
+    }
+    else {
+        assert(jl_is_symbol(arg));
+        gm = m;
+        gs = (jl_sym_t*)arg;
+    }
+    JL_LOCK(&world_counter_lock);
+    size_t new_world = jl_atomic_load_relaxed(&jl_world_counter) + 1;
+    jl_binding_t *b = jl_get_module_binding(gm, gs, 1);
+    jl_binding_partition_t *bpart = NULL;
+    if (!strong && set_type)
+        jl_error("Weak global definitions cannot have types");
+    enum jl_partition_kind new_kind = strong ? PARTITION_KIND_GLOBAL : PARTITION_KIND_DECLARED;
+    jl_value_t *global_type = set_type;
+    if (strong && !global_type)
+        global_type = (jl_value_t*)jl_any_type;
+    while (1) {
+        bpart = jl_get_binding_partition(b, new_world);
+        enum jl_partition_kind kind = jl_binding_kind(bpart);
+        if (kind != PARTITION_KIND_GLOBAL) {
+            if (jl_bkind_is_some_implicit(kind) || kind == PARTITION_KIND_DECLARED) {
+                if (kind == new_kind) {
+                    if (!set_type)
+                        goto done;
+                    goto check_type;
+                }
+                check_safe_newbinding(gm, gs);
+                if (jl_atomic_load_relaxed(&bpart->min_world) == new_world) {
+                    bpart->kind = new_kind | (bpart->kind & PARTITION_MASK_FLAG);
+                    bpart->restriction = global_type;
+                    if (global_type)
+                        jl_gc_wb(bpart, global_type);
+                    continue;
+                } else {
+                    jl_replace_binding_locked(b, bpart, global_type, new_kind, new_world);
+                }
+                break;
+            } else if (set_type) {
+                if (jl_bkind_is_some_constant(kind)) {
+                    jl_errorf("cannot set type for constant %s.%s.",
+                            jl_symbol_name(gm->name), jl_symbol_name(gs));
+                } else {
+                    jl_errorf("cannot set type for imported binding %s.%s.",
+                            jl_symbol_name(gm->name), jl_symbol_name(gs));
+                }
+            }
         }
-        if (!jl_binding_resolved_p(gm, gs)) {
-            jl_binding_t *b = jl_get_binding_wr(gm, gs);
-            if (set_type) {
-                jl_value_t *old_ty = NULL;
-                // maybe set the type too, perhaps
-                jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
+        if (set_type)
+        {
+check_type: ;
+            jl_value_t *old_ty = bpart->restriction;
+            JL_GC_PROMISE_ROOTED(old_ty);
+            if (!jl_types_equal(set_type, old_ty)) {
+                jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.",
+                        jl_symbol_name(gm->name), jl_symbol_name(gs));
             }
+
         }
+        goto done;
     }
+    jl_atomic_store_release(&jl_world_counter, new_world);
+done:
+    JL_UNLOCK(&world_counter_lock);
 }
 
 // module referenced by (top ...) from within m
@@ -352,7 +377,7 @@ JL_DLLEXPORT jl_module_t *jl_base_relative_to(jl_module_t *m)
     return jl_top_module;
 }
 
-static void expr_attributes(jl_value_t *v, int *has_ccall, int *has_defs, int *has_opaque)
+static void expr_attributes(jl_value_t *v, jl_array_t *body, int *has_ccall, int *has_defs, int *has_opaque)
 {
     if (!jl_is_expr(v))
         return;
@@ -366,7 +391,7 @@ static void expr_attributes(jl_value_t *v, int *has_ccall, int *has_defs, int *h
         // might still need to be optimized.
         return;
     }
-    else if (head == jl_const_sym || head == jl_copyast_sym) {
+    else if (head == jl_copyast_sym) {
         // Note: `copyast` is included here since it indicates the presence of
         // `quote` and probably `eval`.
         *has_defs = 1;
@@ -390,15 +415,14 @@ static void expr_attributes(jl_value_t *v, int *has_ccall, int *has_defs, int *h
     else if (head == jl_call_sym && jl_expr_nargs(e) > 0) {
         jl_value_t *called = NULL;
         jl_value_t *f = jl_exprarg(e, 0);
+        if (jl_is_ssavalue(f)) {
+            f = jl_array_ptr_ref(body, ((jl_ssavalue_t*)f)->id - 1);
+        }
         if (jl_is_globalref(f)) {
             jl_module_t *mod = jl_globalref_mod(f);
             jl_sym_t *name = jl_globalref_name(f);
-            if (jl_binding_resolved_p(mod, name)) {
-                jl_binding_t *b = jl_get_binding(mod, name);
-                if (b && b->constp) {
-                    called = jl_atomic_load_relaxed(&b->value);
-                }
-            }
+            jl_binding_t *b = jl_get_binding(mod, name);
+            called = jl_get_latest_binding_value_if_const(b);
         }
         else if (jl_is_quotenode(f)) {
             called = jl_quotenode_value(f);
@@ -407,17 +431,18 @@ static void expr_attributes(jl_value_t *v, int *has_ccall, int *has_defs, int *h
             if (jl_is_intrinsic(called) && jl_unbox_int32(called) == (int)llvmcall) {
                 *has_ccall = 1;
             }
-            if (called == jl_builtin__typebody) {
+            // TODO: rely on latestworld instead of function callee detection here (or add it to jl_is_toplevel_only_expr)
+            if (called == BUILTIN(_typebody) || called == BUILTIN(declare_const)) {
                 *has_defs = 1;
             }
         }
         return;
     }
     int i;
-    for (i = 0; i < jl_array_len(e->args); i++) {
+    for (i = 0; i < jl_array_nrows(e->args); i++) {
         jl_value_t *a = jl_exprarg(e, i);
         if (jl_is_expr(a))
-            expr_attributes(a, has_ccall, has_defs, has_opaque);
+            expr_attributes(a, body, has_ccall, has_defs, has_opaque);
     }
 }
 
@@ -429,9 +454,9 @@ int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile)
     int has_ccall = 0, has_defs = 0, has_opaque = 0;
     if (include_force_compile && jl_has_meta(body, jl_force_compile_sym))
         return 1;
-    for(i=0; i < jl_array_len(body); i++) {
+    for(i=0; i < jl_array_nrows(body); i++) {
         jl_value_t *stmt = jl_array_ptr_ref(body,i);
-        expr_attributes(stmt, &has_ccall, &has_defs, &has_opaque);
+        expr_attributes(stmt, body, &has_ccall, &has_defs, &has_opaque);
         if (has_ccall)
             return 1;
     }
@@ -442,7 +467,7 @@ static void body_attributes(jl_array_t *body, int *has_ccall, int *has_defs, int
 {
     size_t i;
     *has_loops = 0;
-    for(i=0; i < jl_array_len(body); i++) {
+    for(i=0; i < jl_array_nrows(body); i++) {
         jl_value_t *stmt = jl_array_ptr_ref(body,i);
         if (!*has_loops) {
             if (jl_is_gotonode(stmt)) {
@@ -454,115 +479,18 @@ static void body_attributes(jl_array_t *body, int *has_ccall, int *has_defs, int
                     *has_loops = 1;
             }
         }
-        expr_attributes(stmt, has_ccall, has_defs, has_opaque);
+        expr_attributes(stmt, body, has_ccall, has_defs, has_opaque);
     }
     *forced_compile = jl_has_meta(body, jl_force_compile_sym);
 }
 
-static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_ROOTED
-{
-    JL_TIMING(LOAD_IMAGE, LOAD_Require);
-    jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "%s", jl_symbol_name(var));
-
-    static jl_value_t *require_func = NULL;
-    int build_mode = jl_generating_output();
-    jl_module_t *m = NULL;
-    jl_task_t *ct = jl_current_task;
-    if (require_func == NULL && jl_base_module != NULL) {
-        require_func = jl_get_global(jl_base_module, jl_symbol("require"));
-    }
-    if (require_func != NULL) {
-        size_t last_age = ct->world_age;
-        ct->world_age = (build_mode ? jl_base_module->primary_world : jl_atomic_load_acquire(&jl_world_counter));
-        jl_value_t *reqargs[3];
-        reqargs[0] = require_func;
-        reqargs[1] = (jl_value_t*)mod;
-        reqargs[2] = (jl_value_t*)var;
-        m = (jl_module_t*)jl_apply(reqargs, 3);
-        ct->world_age = last_age;
-    }
-    if (m == NULL || !jl_is_module(m)) {
-        jl_errorf("failed to load module %s", jl_symbol_name(var));
-    }
-    return m;
-}
-
-// either:
-//   - sets *name and returns the module to import *name from
-//   - sets *name to NULL and returns a module to import
-static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PROPAGATES_ROOT,
-                                     jl_array_t *args, jl_sym_t **name, const char *keyword) JL_GLOBALLY_ROOTED
-{
-    if (jl_array_len(args) == 0)
-        jl_errorf("malformed \"%s\" statement", keyword);
-    jl_sym_t *var = (jl_sym_t*)jl_array_ptr_ref(args, 0);
-    size_t i = 1;
-    jl_module_t *m = NULL;
-    *name = NULL;
-    if (!jl_is_symbol(var))
-        jl_type_error(keyword, (jl_value_t*)jl_symbol_type, (jl_value_t*)var);
-
-    if (from != NULL) {
-        m = from;
-        i = 0;
-    }
-    else if (var != jl_dot_sym) {
-        // `A.B`: call the loader to obtain the root A in the current environment.
-        if (jl_core_module && var == jl_core_module->name) {
-            m = jl_core_module;
-        }
-        else if (jl_base_module && var == jl_base_module->name) {
-            m = jl_base_module;
-        }
-        else {
-            m = call_require(where, var);
-        }
-        if (i == jl_array_len(args))
-            return m;
-    }
-    else {
-        // `.A.B.C`: strip off leading dots by following parent links
-        m = where;
-        while (1) {
-            if (i >= jl_array_len(args))
-                jl_error("invalid module path");
-            var = (jl_sym_t*)jl_array_ptr_ref(args, i);
-            if (var != jl_dot_sym)
-                break;
-            i++;
-            assert(m);
-            m = m->parent;
-        }
-    }
-
-    while (1) {
-        var = (jl_sym_t*)jl_array_ptr_ref(args, i);
-        if (!jl_is_symbol(var))
-            jl_type_error(keyword, (jl_value_t*)jl_symbol_type, (jl_value_t*)var);
-        if (var == jl_dot_sym)
-            jl_errorf("invalid %s path: \".\" in identifier path", keyword);
-        if (i == jl_array_len(args)-1)
-            break;
-        m = (jl_module_t*)jl_eval_global_var(m, var);
-        JL_GC_PROMISE_ROOTED(m);
-        if (!jl_is_module(m))
-            jl_errorf("invalid %s path: \"%s\" does not name a module", keyword, jl_symbol_name(var));
-        i++;
-    }
-    *name = var;
-    return m;
-}
-
 int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT
 {
     return jl_is_expr(e) &&
         (((jl_expr_t*)e)->head == jl_module_sym ||
-         ((jl_expr_t*)e)->head == jl_import_sym ||
-         ((jl_expr_t*)e)->head == jl_using_sym ||
          ((jl_expr_t*)e)->head == jl_export_sym ||
+         ((jl_expr_t*)e)->head == jl_public_sym ||
          ((jl_expr_t*)e)->head == jl_thunk_sym ||
-         ((jl_expr_t*)e)->head == jl_global_sym ||
-         ((jl_expr_t*)e)->head == jl_const_sym ||
          ((jl_expr_t*)e)->head == jl_toplevel_sym ||
          ((jl_expr_t*)e)->head == jl_error_sym ||
          ((jl_expr_t*)e)->head == jl_incomplete_sym);
@@ -574,132 +502,134 @@ int jl_needs_lowering(jl_value_t *e) JL_NOTSAFEPOINT
         return 0;
     jl_expr_t *ex = (jl_expr_t*)e;
     jl_sym_t *head = ex->head;
-    if (head == jl_module_sym || head == jl_import_sym || head == jl_using_sym ||
-        head == jl_export_sym || head == jl_thunk_sym || head == jl_toplevel_sym ||
-        head == jl_error_sym || head == jl_incomplete_sym || head == jl_method_sym) {
-        return 0;
-    }
-    if (head == jl_global_sym || head == jl_const_sym) {
-        size_t i, l = jl_array_len(ex->args);
-        for (i = 0; i < l; i++) {
-            jl_value_t *a = jl_exprarg(ex, i);
-            if (!jl_is_symbol(a) && !jl_is_globalref(a))
-                return 1;
-        }
+    if (head == jl_module_sym || head == jl_export_sym || head == jl_public_sym ||
+        head == jl_thunk_sym || head == jl_toplevel_sym || head == jl_error_sym ||
+        head == jl_incomplete_sym || head == jl_method_sym) {
         return 0;
     }
     return 1;
 }
 
-static jl_method_instance_t *method_instance_for_thunk(jl_code_info_t *src, jl_module_t *module)
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_for_uninferred(jl_method_instance_t *mi, jl_code_info_t *src)
 {
-    jl_method_instance_t *li = jl_new_method_instance_uninit();
-    jl_atomic_store_relaxed(&li->uninferred, (jl_value_t*)src);
-    li->specTypes = (jl_value_t*)jl_emptytuple_type;
-    li->def.module = module;
-    return li;
-}
+    jl_svec_t *edges = jl_emptysvec;
+    if (src->edges && jl_is_svec(src->edges))
+        edges = (jl_svec_t*)src->edges;
 
-static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym_t *asname)
-{
-    assert(m);
-    jl_sym_t *name = asname ? asname : import->name;
-    // TODO: this is a bit race-y with what error message we might print
-    jl_binding_t *b = jl_get_module_binding(m, name, 0);
-    jl_binding_t *b2;
-    if (b != NULL && (b2 = jl_atomic_load_relaxed(&b->owner)) != NULL) {
-        if (b2->constp && jl_atomic_load_relaxed(&b2->value) == (jl_value_t*)import)
-            return;
-        if (b2 != b)
-            jl_errorf("importing %s into %s conflicts with an existing global",
-                      jl_symbol_name(name), jl_symbol_name(m->name));
-    }
-    else {
-        b = jl_get_binding_wr(m, name);
-    }
-    jl_declare_constant(b, m, name);
-    jl_checked_assignment(b, m, name, (jl_value_t*)import);
-    b->imported = 1;
+    // Do not compress this, we expect it to be shortlived.
+    jl_code_instance_t *ci = jl_new_codeinst(mi, (jl_value_t*)jl_uninferred_sym,
+        (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, jl_nothing,
+        (jl_value_t*)src, 0, src->min_world, src->max_world,
+        0, NULL, NULL, edges);
+    return ci;
 }
 
-// in `import A.B: x, y, ...`, evaluate the `A.B` part if it exists
-static jl_module_t *eval_import_from(jl_module_t *m JL_PROPAGATES_ROOT, jl_expr_t *ex, const char *keyword)
+JL_DLLEXPORT jl_method_instance_t *jl_method_instance_for_thunk(jl_code_info_t *src, jl_module_t *module)
 {
-    if (jl_expr_nargs(ex) == 1 && jl_is_expr(jl_exprarg(ex, 0))) {
-        jl_expr_t *fr = (jl_expr_t*)jl_exprarg(ex, 0);
-        if (fr->head == jl_colon_sym) {
-            if (jl_expr_nargs(fr) > 0 && jl_is_expr(jl_exprarg(fr, 0))) {
-                jl_expr_t *path = (jl_expr_t*)jl_exprarg(fr, 0);
-                if (((jl_expr_t*)path)->head == jl_dot_sym) {
-                    jl_sym_t *name = NULL;
-                    jl_module_t *from = eval_import_path(m, NULL, path->args, &name, keyword);
-                    if (name != NULL) {
-                        from = (jl_module_t*)jl_eval_global_var(from, name);
-                        if (!jl_is_module(from))
-                            jl_errorf("invalid %s path: \"%s\" does not name a module", keyword, jl_symbol_name(name));
-                    }
-                    return from;
-                }
-            }
-            jl_errorf("malformed \"%s:\" statement", keyword);
-        }
-    }
-    return NULL;
-}
+    jl_method_instance_t *mi = jl_new_method_instance_uninit();
+    mi->specTypes = (jl_value_t*)jl_emptytuple_type;
+    mi->def.module = module;
+    JL_GC_PUSH1(&mi);
 
-static void check_macro_rename(jl_sym_t *from, jl_sym_t *to, const char *keyword)
-{
-    char *n1 = jl_symbol_name(from), *n2 = jl_symbol_name(to);
-    if (n1[0] == '@' && n2[0] != '@')
-        jl_errorf("cannot rename macro \"%s\" to non-macro \"%s\" in \"%s\"", n1, n2, keyword);
-    if (n1[0] != '@' && n2[0] == '@')
-        jl_errorf("cannot rename non-macro \"%s\" to macro \"%s\" in \"%s\"", n1, n2, keyword);
+    jl_code_instance_t *ci = jl_new_codeinst_for_uninferred(mi, src);
+    jl_atomic_store_relaxed(&mi->cache, ci);
+    jl_gc_wb(mi, ci);
+
+    JL_GC_POP();
+    return mi;
 }
 
 // Eval `throw(ErrorException(msg)))` in module `m`.
 // Used in `jl_toplevel_eval_flex` instead of `jl_throw` so that the error
 // location in julia code gets into the backtrace.
-static void jl_eval_throw(jl_module_t *m, jl_value_t *exc)
+static void jl_eval_throw(jl_module_t *m, jl_value_t *exc, const char *filename, int lineno)
 {
     jl_value_t *throw_ex = (jl_value_t*)jl_exprn(jl_call_sym, 2);
     JL_GC_PUSH1(&throw_ex);
-    jl_exprargset(throw_ex, 0, jl_builtin_throw);
+    jl_exprargset(throw_ex, 0, BUILTIN(throw));
     jl_exprargset(throw_ex, 1, exc);
-    jl_toplevel_eval_flex(m, throw_ex, 0, 0);
+    jl_toplevel_eval_flex(m, throw_ex, 0, 0, &filename, &lineno);
     JL_GC_POP();
 }
 
 // Format error message and call jl_eval
-static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...)
+static void jl_eval_errorf(jl_module_t *m, const char *filename, int lineno, const char* fmt, ...)
 {
     va_list args;
     va_start(args, fmt);
     jl_value_t *exc = jl_vexceptionf(jl_errorexception_type, fmt, args);
     va_end(args);
     JL_GC_PUSH1(&exc);
-    jl_eval_throw(m, exc);
+    jl_eval_throw(m, exc, filename, lineno);
+    JL_GC_POP();
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(
+    jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val,
+    enum jl_partition_kind constant_kind)
+{
+    JL_LOCK(&world_counter_lock);
+    size_t new_world = jl_atomic_load_relaxed(&jl_world_counter) + 1;
+    jl_binding_partition_t *bpart = jl_declare_constant_val3(b, mod, var, val, constant_kind, new_world);
+    if (jl_atomic_load_relaxed(&bpart->min_world) == new_world)
+        jl_atomic_store_release(&jl_world_counter, new_world);
+    JL_UNLOCK(&world_counter_lock);
+    return bpart;
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val)
+{
+    return jl_declare_constant_val2(b, mod, var, val, val ? PARTITION_KIND_CONST : PARTITION_KIND_UNDEF_CONST);
+}
+
+static jl_value_t *jl_eval_toplevel_stmts(jl_module_t *JL_NONNULL m, jl_array_t *stmts, int fast, int need_value, const char **toplevel_filename, int *toplevel_lineno)
+{
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    jl_value_t *root = NULL;
+    JL_GC_PUSH1(&root);
+    jl_value_t *res = jl_nothing;
+    int i;
+    for (i = 0; i < jl_array_nrows(stmts); i++) {
+        root = jl_array_ptr_ref(stmts, i);
+        if (jl_needs_lowering(root)) {
+            root = jl_svecref(jl_lower(root, m, *toplevel_filename, *toplevel_lineno, ~(size_t)0,
+                                       need_value), 0);
+        }
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        res = jl_toplevel_eval_flex(m, root, fast, 1, toplevel_filename, toplevel_lineno);
+    }
+    ct->world_age = last_age;
     JL_GC_POP();
+    return res;
 }
 
-jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded)
+JL_DLLEXPORT jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded, const char **toplevel_filename, int *toplevel_lineno)
 {
     jl_task_t *ct = jl_current_task;
+    if (jl_is_globalref(e)) {
+        return jl_eval_globalref((jl_globalref_t*)e, ct->world_age);
+    }
+    if (jl_is_symbol(e)) {
+        char *n = jl_symbol_name((jl_sym_t*)e), *n0 = n;
+        while (*n == '_') ++n;
+        if (*n == 0 && n > n0)
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno, "all-underscore identifiers are write-only and their values cannot be used in expressions");
+        return jl_eval_global_var(m, (jl_sym_t*)e, ct->world_age);
+    }
     if (!jl_is_expr(e)) {
         if (jl_is_linenode(e)) {
-            jl_lineno = jl_linenode_line(e);
+            *toplevel_lineno = jl_linenode_line(e);
             jl_value_t *file = jl_linenode_file(e);
             if (file != jl_nothing) {
                 assert(jl_is_symbol(file));
-                jl_filename = jl_symbol_name((jl_sym_t*)file);
+                *toplevel_filename = jl_symbol_name((jl_sym_t*)file);
             }
+            // Not thread safe. For debugging and last resort error messages (jl_fprint_critical_error) only.
+            jl_atomic_store_relaxed(&jl_filename, *toplevel_filename);
+            jl_atomic_store_relaxed(&jl_lineno, *toplevel_lineno);
             return jl_nothing;
         }
-        if (jl_is_symbol(e)) {
-            char *n = jl_symbol_name((jl_sym_t*)e), *n0 = n;
-            while (*n == '_') ++n;
-            if (*n == 0 && n > n0)
-                jl_eval_errorf(m, "all-underscore identifier used as rvalue");
-        }
         return jl_interpret_toplevel_expr_in(m, e, NULL, NULL);
     }
 
@@ -707,12 +637,12 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
     if (ex->head == jl_dot_sym && jl_expr_nargs(ex) != 1) {
         if (jl_expr_nargs(ex) != 2)
-            jl_eval_errorf(m, "syntax: malformed \".\" expression");
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno, "syntax: malformed \".\" expression");
         jl_value_t *lhs = jl_exprarg(ex, 0);
         jl_value_t *rhs = jl_exprarg(ex, 1);
         // only handle `a.b` syntax here, so qualified names can be eval'd in pure contexts
         if (jl_is_quotenode(rhs) && jl_is_symbol(jl_fieldref(rhs, 0))) {
-            return jl_eval_dot_expr(m, lhs, rhs, fast);
+            return jl_eval_dot_expr(ct, m, lhs, rhs, fast, toplevel_filename, toplevel_lineno);
         }
     }
 
@@ -722,173 +652,66 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
     jl_method_instance_t *mfunc = NULL;
     jl_code_info_t *thk = NULL;
-    JL_GC_PUSH3(&mfunc, &thk, &ex);
+    jl_value_t *root = NULL;
+    JL_GC_PUSH4(&mfunc, &thk, &ex, &root);
 
     size_t last_age = ct->world_age;
-    if (!expanded && jl_needs_lowering(e)) {
-        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        ex = (jl_expr_t*)jl_expand_with_loc_warn(e, m, jl_filename, jl_lineno);
-        ct->world_age = last_age;
+    if (!expanded && (jl_needs_lowering(e))) {
+        ex = (jl_expr_t*)jl_svecref(jl_lower(e, m, *toplevel_filename, *toplevel_lineno, ~(size_t)0, 1), 0);
     }
     jl_sym_t *head = jl_is_expr(ex) ? ex->head : NULL;
 
     if (head == jl_module_sym) {
-        jl_value_t *val = jl_eval_module_expr(m, ex);
+        jl_value_t *val = jl_eval_module_expr(m, ex, toplevel_filename, toplevel_lineno);
         JL_GC_POP();
         return val;
     }
-    else if (head == jl_using_sym) {
-        jl_sym_t *name = NULL;
-        jl_module_t *from = eval_import_from(m, ex, "using");
-        size_t i = 0;
-        if (from) {
-            i = 1;
-            ex = (jl_expr_t*)jl_exprarg(ex, 0);
-        }
-        for (; i < jl_expr_nargs(ex); i++) {
-            jl_value_t *a = jl_exprarg(ex, i);
-            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_dot_sym) {
-                name = NULL;
-                jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)a)->args, &name, "using");
-                jl_module_t *u = import;
-                if (name != NULL)
-                    u = (jl_module_t*)jl_eval_global_var(import, name);
-                if (from) {
-                    // `using A: B` syntax
-                    jl_module_use(m, import, name);
-                }
-                else {
-                    if (!jl_is_module(u))
-                        jl_eval_errorf(m, "invalid using path: \"%s\" does not name a module",
-                                       jl_symbol_name(name));
-                    // `using A.B` syntax
-                    jl_module_using(m, u);
-                    if (m == jl_main_module && name == NULL) {
-                        // TODO: for now, `using A` in Main also creates an explicit binding for `A`
-                        // This will possibly be extended to all modules.
-                        import_module(m, u, NULL);
-                    }
-                }
-                continue;
-            }
-            else if (from && jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_as_sym && jl_expr_nargs(a) == 2 &&
-                     jl_is_expr(jl_exprarg(a, 0)) && ((jl_expr_t*)jl_exprarg(a, 0))->head == jl_dot_sym) {
-                jl_sym_t *asname = (jl_sym_t*)jl_exprarg(a, 1);
-                if (jl_is_symbol(asname)) {
-                    jl_expr_t *path = (jl_expr_t*)jl_exprarg(a, 0);
-                    name = NULL;
-                    jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)path)->args, &name, "using");
-                    assert(name);
-                    check_macro_rename(name, asname, "using");
-                    // `using A: B as C` syntax
-                    jl_module_use_as(m, import, name, asname);
-                    continue;
-                }
-            }
-            jl_eval_errorf(m, "syntax: malformed \"using\" statement");
-        }
-        JL_GC_POP();
-        return jl_nothing;
-    }
-    else if (head == jl_import_sym) {
-        jl_sym_t *name = NULL;
-        jl_module_t *from = eval_import_from(m, ex, "import");
-        size_t i = 0;
-        if (from) {
-            i = 1;
-            ex = (jl_expr_t*)jl_exprarg(ex, 0);
-        }
-        for (; i < jl_expr_nargs(ex); i++) {
-            jl_value_t *a = jl_exprarg(ex, i);
-            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_dot_sym) {
-                name = NULL;
-                jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)a)->args, &name, "import");
-                if (name == NULL) {
-                    // `import A` syntax
-                    import_module(m, import, NULL);
-                }
-                else {
-                    // `import A.B` or `import A: B` syntax
-                    jl_module_import(m, import, name);
-                }
-                continue;
-            }
-            else if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_as_sym && jl_expr_nargs(a) == 2 &&
-                     jl_is_expr(jl_exprarg(a, 0)) && ((jl_expr_t*)jl_exprarg(a, 0))->head == jl_dot_sym) {
-                jl_sym_t *asname = (jl_sym_t*)jl_exprarg(a, 1);
-                if (jl_is_symbol(asname)) {
-                    jl_expr_t *path = (jl_expr_t*)jl_exprarg(a, 0);
-                    name = NULL;
-                    jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)path)->args, &name, "import");
-                    if (name == NULL) {
-                        // `import A as B` syntax
-                        import_module(m, import, asname);
-                    }
-                    else {
-                        check_macro_rename(name, asname, "import");
-                        // `import A.B as C` syntax
-                        jl_module_import_as(m, import, name, asname);
-                    }
-                    continue;
-                }
+    else if (head == jl_export_sym || head == jl_public_sym) {
+        int exp = (head == jl_export_sym);
+        volatile int any_new = 0;
+        JL_LOCK(&world_counter_lock);
+        size_t new_world = jl_atomic_load_acquire(&jl_world_counter)+1;
+        JL_TRY {
+            for (size_t i = 0; i < jl_array_nrows(ex->args); i++) {
+                jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(ex->args, i);
+                if (!jl_is_symbol(name))
+                    jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                         exp ? "syntax: malformed \"export\" statement" :
+                               "syntax: malformed \"public\" statement");
+                if (jl_module_public_(m, name, exp, new_world))
+                    any_new = 1;
             }
-            jl_eval_errorf(m, "syntax: malformed \"import\" statement");
-        }
-        JL_GC_POP();
-        return jl_nothing;
-    }
-    else if (head == jl_export_sym) {
-        for (size_t i = 0; i < jl_array_len(ex->args); i++) {
-            jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(ex->args, i);
-            if (!jl_is_symbol(name))
-                jl_eval_errorf(m, "syntax: malformed \"export\" statement");
-            jl_module_export(m, name);
-        }
-        JL_GC_POP();
-        return jl_nothing;
-    }
-    else if (head == jl_global_sym) {
-        jl_eval_global_expr(m, ex, 0);
-        JL_GC_POP();
-        return jl_nothing;
-    }
-    else if (head == jl_const_sym) {
-        jl_sym_t *arg = (jl_sym_t*)jl_exprarg(ex, 0);
-        jl_module_t *gm;
-        jl_sym_t *gs;
-        if (jl_is_globalref(arg)) {
-            gm = jl_globalref_mod(arg);
-            gs = jl_globalref_name(arg);
         }
-        else {
-            assert(jl_is_symbol(arg));
-            gm = m;
-            gs = (jl_sym_t*)arg;
+        JL_CATCH {
+            if (any_new)
+                jl_atomic_store_release(&jl_world_counter, new_world);
+            JL_UNLOCK(&world_counter_lock);
+            jl_rethrow();
         }
-        jl_binding_t *b = jl_get_binding_wr(gm, gs);
-        jl_declare_constant(b, gm, gs);
+        if (any_new)
+            jl_atomic_store_release(&jl_world_counter, new_world);
+        JL_UNLOCK(&world_counter_lock);
         JL_GC_POP();
         return jl_nothing;
     }
     else if (head == jl_toplevel_sym) {
-        jl_value_t *res = jl_nothing;
-        int i;
-        for (i = 0; i < jl_array_len(ex->args); i++) {
-            res = jl_toplevel_eval_flex(m, jl_array_ptr_ref(ex->args, i), fast, 0);
-        }
+        jl_value_t *res = jl_eval_toplevel_stmts(m, ex->args, fast, 1,
+                                                 toplevel_filename, toplevel_lineno);
         JL_GC_POP();
         return res;
     }
     else if (head == jl_error_sym || head == jl_incomplete_sym) {
         if (jl_expr_nargs(ex) == 0)
-            jl_eval_errorf(m, "malformed \"%s\" expression", jl_symbol_name(head));
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "malformed \"%s\" expression", jl_symbol_name(head));
         if (jl_is_string(jl_exprarg(ex, 0)))
-            jl_eval_errorf(m, "syntax: %s", jl_string_data(jl_exprarg(ex, 0)));
-        jl_eval_throw(m, jl_exprarg(ex, 0));
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "syntax: %s", jl_string_data(jl_exprarg(ex, 0)));
+        jl_eval_throw(m, jl_exprarg(ex, 0), *toplevel_filename, *toplevel_lineno);
     }
     else if (jl_is_symbol(ex)) {
         JL_GC_POP();
-        return jl_eval_global_var(m, (jl_sym_t*)ex);
+        return jl_eval_global_var(m, (jl_sym_t*)ex, ct->world_age);
     }
     else if (head == NULL) {
         JL_GC_POP();
@@ -899,7 +722,8 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
     assert(head == jl_thunk_sym);
     thk = (jl_code_info_t*)jl_exprarg(ex, 0);
     if (!jl_is_code_info(thk) || !jl_typetagis(thk->code, jl_array_any_type)) {
-        jl_eval_errorf(m, "malformed \"thunk\" statement");
+        jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+            "malformed \"thunk\" statement");
     }
     body_attributes((jl_array_t*)thk->code, &has_ccall, &has_defs, &has_loops, &has_opaque, &forced_compile);
 
@@ -911,16 +735,13 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
             jl_get_module_compile(m) != JL_OPTIONS_COMPILE_OFF &&
             jl_get_module_compile(m) != JL_OPTIONS_COMPILE_MIN)) {
         // use codegen
-        mfunc = method_instance_for_thunk(thk, m);
-        jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
-        // Don't infer blocks containing e.g. method definitions, since it's probably not
-        // worthwhile and also unsound (see #24316).
-        // TODO: This is still not correct since an `eval` can happen elsewhere, but it
-        // helps in common cases.
+        mfunc = jl_method_instance_for_thunk(thk, m);
+        jl_resolve_definition_effects_in_ir((jl_array_t*)thk->code, m, NULL, NULL, 0);
+        // Don't infer blocks containing e.g. method definitions, since it's probably not worthwhile.
         size_t world = jl_atomic_load_acquire(&jl_world_counter);
         ct->world_age = world;
         if (!has_defs && jl_get_module_infer(m) != 0) {
-            (void)jl_type_infer(mfunc, world, 0);
+            (void)jl_type_infer(mfunc, world, SOURCE_MODE_ABI, jl_options.trim);
         }
         result = jl_invoke(/*func*/NULL, /*args*/NULL, /*nargs*/0, mfunc);
         ct->world_age = last_age;
@@ -929,9 +750,12 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         // use interpreter
         assert(thk);
         if (has_opaque) {
-            jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
+            jl_resolve_definition_effects_in_ir((jl_array_t*)thk->code, m, NULL, NULL, 0);
         }
+        size_t world = jl_atomic_load_acquire(&jl_world_counter);
+        ct->world_age = world;
         result = jl_interpret_toplevel_thunk(m, thk);
+        ct->world_age = last_age;
     }
 
     JL_GC_POP();
@@ -940,18 +764,22 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval(jl_module_t *m, jl_value_t *v)
 {
-    return jl_toplevel_eval_flex(m, v, 1, 0);
+    const char *filename = jl_atomic_load_relaxed(&jl_filename);
+    int lineno = jl_atomic_load_relaxed(&jl_lineno);
+    return jl_toplevel_eval_flex(m, v, 1, 0, &filename, &lineno);
 }
 
 // Check module `m` is open for `eval/include`, or throw an error.
-static void jl_check_open_for(jl_module_t *m, const char* funcname)
+JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname)
 {
+    if (jl_current_task->ptls->in_pure_callback)
+        jl_errorf("%s cannot be used in a generated function", fname);
     if (jl_options.incremental && jl_generating_output()) {
         if (m != jl_main_module) { // TODO: this was grand-fathered in
             JL_LOCK(&jl_modules_mutex);
             int open = ptrhash_has(&jl_current_modules, (void*)m);
             if (!open && jl_module_init_order != NULL) {
-                size_t i, l = jl_array_len(jl_module_init_order);
+                size_t i, l = jl_array_nrows(jl_module_init_order);
                 for (i = 0; i < l; i++) {
                     if (m == (jl_module_t*)jl_array_ptr_ref(jl_module_init_order, i)) {
                         open = 1;
@@ -960,63 +788,43 @@ static void jl_check_open_for(jl_module_t *m, const char* funcname)
                 }
             }
             JL_UNLOCK(&jl_modules_mutex);
-            if (!open && !jl_is__toplevel__mod(m)) {
+            if (!open && !jl_is__toplevel__mod(m, jl_current_task)) {
                 const char* name = jl_symbol_name(m->name);
                 jl_errorf("Evaluation into the closed module `%s` breaks incremental compilation "
                           "because the side effects will not be permanent. "
                           "This is likely due to some other module mutating `%s` with `%s` during "
-                          "precompilation - don't do this.", name, name, funcname);
+                          "precompilation - don't do this.", name, name, fname);
             }
         }
     }
 }
 
-JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname)
-{
-    if (jl_current_task->ptls->in_pure_callback)
-        jl_errorf("%s cannot be used in a generated function", fname);
-    jl_check_open_for(m, fname);
-}
-
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex)
 {
-    jl_task_t *ct = jl_current_task;
-    if (ct->ptls->in_pure_callback)
-        jl_error("eval cannot be used in a generated function");
-    jl_check_open_for(m, "eval");
+    jl_check_top_level_effect(m, "eval");
     jl_value_t *v = NULL;
-    int last_lineno = jl_lineno;
-    const char *last_filename = jl_filename;
-    jl_lineno = 1;
-    jl_filename = "none";
+    int last_lineno = jl_atomic_load_relaxed(&jl_lineno);
+    const char *last_filename = jl_atomic_load_relaxed(&jl_filename);
+    jl_task_t *ct = jl_current_task;
+    jl_atomic_store_relaxed(&jl_lineno, 1);
+    jl_atomic_store_relaxed(&jl_filename, "none");
+    size_t last_age = ct->world_age;
     JL_TRY {
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         v = jl_toplevel_eval(m, ex);
     }
     JL_CATCH {
-        jl_lineno = last_lineno;
-        jl_filename = last_filename;
+        jl_atomic_store_relaxed(&jl_lineno, last_lineno);
+        jl_atomic_store_relaxed(&jl_filename, last_filename);
         jl_rethrow();
     }
-    jl_lineno = last_lineno;
-    jl_filename = last_filename;
+    jl_atomic_store_relaxed(&jl_lineno, last_lineno);
+    jl_atomic_store_relaxed(&jl_filename, last_filename);
+    ct->world_age = last_age;
     assert(v);
     return v;
 }
 
-JL_DLLEXPORT jl_value_t *jl_infer_thunk(jl_code_info_t *thk, jl_module_t *m)
-{
-    jl_method_instance_t *li = method_instance_for_thunk(thk, m);
-    JL_GC_PUSH1(&li);
-    jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
-    jl_task_t *ct = jl_current_task;
-    jl_code_info_t *src = jl_type_infer(li, ct->world_age, 0);
-    JL_GC_POP();
-    if (src)
-        return src->rettype;
-    return (jl_value_t*)jl_any_type;
-}
-
-
 //------------------------------------------------------------------------------
 // Code loading: combined parse+eval for include()
 
@@ -1029,10 +837,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     if (!jl_is_string(text) || !jl_is_string(filename)) {
         jl_errorf("Expected `String`s for `text` and `filename`");
     }
-    jl_task_t *ct = jl_current_task;
-    if (ct->ptls->in_pure_callback)
-        jl_error("cannot use include inside a generated function");
-    jl_check_open_for(module, "include");
+    jl_check_top_level_effect(module, "include");
 
     jl_value_t *result = jl_nothing;
     jl_value_t *ast = NULL;
@@ -1040,50 +845,48 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     JL_GC_PUSH3(&ast, &result, &expression);
 
     ast = jl_svecref(jl_parse(jl_string_data(text), jl_string_len(text),
-                              filename, 1, 0, (jl_value_t*)jl_all_sym), 0);
+                              filename, 1, 0, (jl_value_t*)jl_all_sym, module), 0);
     if (!jl_is_expr(ast) || ((jl_expr_t*)ast)->head != jl_toplevel_sym) {
         jl_errorf("jl_parse_all() must generate a top level expression");
     }
 
-    int last_lineno = jl_lineno;
-    const char *last_filename = jl_filename;
-    size_t last_age = ct->world_age;
+    jl_task_t *ct = jl_current_task;
+    int last_lineno = jl_atomic_load_relaxed(&jl_lineno);
+    const char *last_filename = jl_atomic_load_relaxed(&jl_filename);
     int lineno = 0;
-    jl_lineno = 0;
-    jl_filename = jl_string_data(filename);
-    int err = 0;
+    jl_atomic_store_relaxed(&jl_lineno, 0);
+    const char *filename_str = jl_string_data(filename);
+    jl_atomic_store_relaxed(&jl_filename, filename_str);
 
     JL_TRY {
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         for (size_t i = 0; i < jl_expr_nargs(ast); i++) {
             expression = jl_exprarg(ast, i);
             if (jl_is_linenode(expression)) {
                 // filename is already set above.
                 lineno = jl_linenode_line(expression);
-                jl_lineno = lineno;
+                jl_atomic_store_relaxed(&jl_lineno, lineno);
                 continue;
             }
-            expression = jl_expand_with_loc_warn(expression, module,
-                                                 jl_string_data(filename), lineno);
+            expression = jl_svecref(jl_lower(expression, module, jl_string_data(filename), lineno, ~(size_t)0, 1), 0);
             ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-            result = jl_toplevel_eval_flex(module, expression, 1, 1);
+            result = jl_toplevel_eval_flex(module, expression, 1, 1, &filename_str, &lineno);
         }
+        ct->world_age = last_age;
     }
     JL_CATCH {
-        result = jl_box_long(jl_lineno); // (ab)use result to root error line
-        err = 1;
-        goto finally; // skip jl_restore_excstack
-    }
-finally:
-    ct->world_age = last_age;
-    jl_lineno = last_lineno;
-    jl_filename = last_filename;
-    if (err) {
+        result = jl_box_long(lineno); // (ab)use result to root error line
+        jl_atomic_store_relaxed(&jl_lineno, last_lineno);
+        jl_atomic_store_relaxed(&jl_filename, last_filename);
         if (jl_loaderror_type == NULL)
             jl_rethrow();
         else
             jl_rethrow_other(jl_new_struct(jl_loaderror_type, filename, result,
-                                           jl_current_exception()));
+                                           jl_current_exception(ct)));
     }
+    jl_atomic_store_relaxed(&jl_lineno, last_lineno);
+    jl_atomic_store_relaxed(&jl_filename, last_filename);
     JL_GC_POP();
     return result;
 }
@@ -1166,6 +969,21 @@ JL_DLLEXPORT jl_value_t *jl_prepend_cwd(jl_value_t *str)
     return jl_cstr_to_string(path);
 }
 
+JL_DLLEXPORT jl_value_t *jl_prepend_string(jl_value_t *prefix, jl_value_t *str)
+{
+    char path[1024];
+    const char *pstr = (const char*)jl_string_data(prefix);
+    size_t sz = strlen(pstr);
+    const char *fstr = (const char*)jl_string_data(str);
+    if (strlen(fstr) + sz >= sizeof(path)) {
+        jl_errorf("use a bigger buffer for jl_fullpath");
+    }
+    strcpy(path, pstr);
+    strcpy(path + sz, fstr);
+    return jl_cstr_to_string(path);
+}
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/typemap.c b/src/typemap.c
index 1bdbe52a974dd..241120e506cfc 100644
--- a/src/typemap.c
+++ b/src/typemap.c
@@ -23,29 +23,29 @@ static int jl_is_any(jl_value_t *t1)
     return t1 == (jl_value_t*)jl_any_type;
 }
 
-static jl_value_t *jl_type_extract_name(jl_value_t *t1 JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+static jl_value_t *jl_type_extract_name(jl_value_t *t1 JL_PROPAGATES_ROOT, int invariant) JL_NOTSAFEPOINT
 {
     if (jl_is_unionall(t1))
         t1 = jl_unwrap_unionall(t1);
     if (jl_is_vararg(t1)) {
-        return jl_type_extract_name(jl_unwrap_vararg(t1));
+        return jl_type_extract_name(jl_unwrap_vararg(t1), invariant);
     }
     else if (jl_is_typevar(t1)) {
-        return jl_type_extract_name(((jl_tvar_t*)t1)->ub);
+        return jl_type_extract_name(((jl_tvar_t*)t1)->ub, 0);
     }
     else if (t1 == jl_bottom_type || t1 == (jl_value_t*)jl_typeofbottom_type || t1 == (jl_value_t*)jl_typeofbottom_type->super) {
         return (jl_value_t*)jl_typeofbottom_type->name; // put Union{} and typeof(Union{}) and Type{Union{}} together for convenience
     }
     else if (jl_is_datatype(t1)) {
         jl_datatype_t *dt = (jl_datatype_t*)t1;
-        if (!jl_is_kind(t1))
-            return (jl_value_t*)dt->name;
-        return NULL;
+        if (jl_is_kind(t1) && !invariant)
+            return (jl_value_t*)jl_type_typename;
+        return (jl_value_t*)dt->name;
     }
     else if (jl_is_uniontype(t1)) {
         jl_uniontype_t *u1 = (jl_uniontype_t*)t1;
-        jl_value_t *tn1 = jl_type_extract_name(u1->a);
-        jl_value_t *tn2 = jl_type_extract_name(u1->b);
+        jl_value_t *tn1 = jl_type_extract_name(u1->a, invariant);
+        jl_value_t *tn2 = jl_type_extract_name(u1->b, invariant);
         if (tn1 == tn2)
             return tn1;
         // TODO: if invariant is false, instead find the nearest common ancestor
@@ -71,7 +71,7 @@ static int jl_type_extract_name_precise(jl_value_t *t1, int invariant)
     }
     else if (jl_is_datatype(t1)) {
         jl_datatype_t *dt = (jl_datatype_t*)t1;
-        if ((invariant || !dt->name->abstract) && !jl_is_kind(t1))
+        if (invariant || !dt->name->abstract || dt->name == jl_type_typename)
             return 1;
         return 0;
     }
@@ -81,8 +81,8 @@ static int jl_type_extract_name_precise(jl_value_t *t1, int invariant)
             return 0;
         if (!jl_type_extract_name_precise(u1->b, invariant))
             return 0;
-        jl_value_t *tn1 = jl_type_extract_name(u1->a);
-        jl_value_t *tn2 = jl_type_extract_name(u1->b);
+        jl_value_t *tn1 = jl_type_extract_name(u1->a, invariant);
+        jl_value_t *tn2 = jl_type_extract_name(u1->b, invariant);
         if (tn1 == tn2)
             return 1;
         return 0;
@@ -277,37 +277,37 @@ static int is_cache_leaf(jl_value_t *ty, int tparam)
     return (jl_is_concrete_type(ty) && (tparam || !jl_is_kind(ty)));
 }
 
-static _Atomic(jl_value_t*) *mtcache_hash_lookup_bp(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
+static _Atomic(jl_value_t*) *mtcache_hash_lookup_bp(jl_genericmemory_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
 {
-    if (cache == (jl_array_t*)jl_an_empty_vec_any)
+    if (cache == (jl_genericmemory_t*)jl_an_empty_memory_any)
         return NULL;
     _Atomic(jl_value_t*) *pml = jl_table_peek_bp(cache, ty);
     JL_GC_PROMISE_ROOTED(pml); // clang-sa doesn't trust our JL_PROPAGATES_ROOT claim
     return pml;
 }
 
-static void mtcache_hash_insert(_Atomic(jl_array_t*) *cache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
+static void mtcache_hash_insert(_Atomic(jl_genericmemory_t*) *pcache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
 {
     int inserted = 0;
-    jl_array_t *a = jl_atomic_load_relaxed(cache);
-    if (a == (jl_array_t*)jl_an_empty_vec_any) {
-        a = jl_alloc_vec_any(16);
-        jl_atomic_store_release(cache, a);
+    jl_genericmemory_t *a = jl_atomic_load_relaxed(pcache);
+    if (a == (jl_genericmemory_t*)jl_an_empty_memory_any) {
+        a = jl_alloc_memory_any(16);
+        jl_atomic_store_release(pcache, a);
         if (parent)
             jl_gc_wb(parent, a);
     }
     a = jl_eqtable_put(a, key, val, &inserted);
     assert(inserted);
-    if (a != jl_atomic_load_relaxed(cache)) {
-        jl_atomic_store_release(cache, a);
+    if (a != jl_atomic_load_relaxed(pcache)) {
+        jl_atomic_store_release(pcache, a);
         if (parent)
             jl_gc_wb(parent, a);
     }
 }
 
-static jl_typemap_t *mtcache_hash_lookup(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
+static jl_typemap_t *mtcache_hash_lookup(jl_genericmemory_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
 {
-    if (cache == (jl_array_t*)jl_an_empty_vec_any)
+    if (cache == (jl_genericmemory_t*)jl_an_empty_memory_any)
         return (jl_typemap_t*)jl_nothing;
     jl_typemap_t *ml = (jl_typemap_t*)jl_eqtable_get(cache, ty, jl_nothing);
     return ml;
@@ -315,17 +315,17 @@ static jl_typemap_t *mtcache_hash_lookup(jl_array_t *cache JL_PROPAGATES_ROOT, j
 
 // ----- Sorted Type Signature Lookup Matching ----- //
 
-static int jl_typemap_array_visitor(jl_array_t *a, jl_typemap_visitor_fptr fptr, void *closure)
+static int jl_typemap_memory_visitor(jl_genericmemory_t *a, jl_typemap_visitor_fptr fptr, void *closure)
 {
-    size_t i, l = jl_array_len(a);
-    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
+    size_t i, l = a->length;
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) a->ptr;
     for (i = 1; i < l; i += 2) {
         jl_value_t *d = jl_atomic_load_relaxed(&data[i]);
         JL_GC_PROMISE_ROOTED(d);
         if (d == NULL)
             continue;
-        if (jl_is_array(d)) {
-            if (!jl_typemap_array_visitor((jl_array_t*)d, fptr, closure))
+        if (jl_is_genericmemory(d)) {
+            if (!jl_typemap_memory_visitor((jl_genericmemory_t*)d, fptr, closure))
                 return 0;
         }
         else {
@@ -352,23 +352,23 @@ int jl_typemap_visitor(jl_typemap_t *cache, jl_typemap_visitor_fptr fptr, void *
 {
     if (jl_typeof(cache) == (jl_value_t*)jl_typemap_level_type) {
         jl_typemap_level_t *node = (jl_typemap_level_t*)cache;
-        jl_array_t *a;
+        jl_genericmemory_t *a;
         JL_GC_PUSH1(&a);
         a = jl_atomic_load_relaxed(&node->targ);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         a = jl_atomic_load_relaxed(&node->arg1);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         a = jl_atomic_load_relaxed(&node->tname);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         a = jl_atomic_load_relaxed(&node->name1);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         if (!jl_typemap_node_visitor(jl_atomic_load_relaxed(&node->linear), fptr, closure))
             goto exit;
@@ -451,12 +451,12 @@ static int concrete_intersects(jl_value_t *t, jl_value_t *ty, int8_t tparam)
 
 // tparam bit 0 is ::Type{T} (vs. T)
 // tparam bit 1 is typename(T) (vs. T)
-static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty, int8_t tparam,
+static int jl_typemap_intersection_memory_visitor(jl_genericmemory_t *a, jl_value_t *ty, int8_t tparam,
                                                  int8_t offs, struct typemap_intersection_env *closure)
 {
     JL_GC_PUSH1(&a);
-    size_t i, l = jl_array_len(a);
-    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
+    size_t i, l = a->length;
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) a->ptr;
     unsigned height = 0;
     jl_datatype_t *tydt = jl_any_type;
     if (tparam & 2) {
@@ -469,7 +469,7 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
             tydt = (jl_datatype_t*)ttype;
         }
         else if (ttype) {
-            ttype = jl_type_extract_name(ttype);
+            ttype = jl_type_extract_name(ttype, tparam & 1);
             tydt = ttype ? (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)ttype)->wrapper) : NULL;
         }
         if (tydt == jl_any_type)
@@ -492,8 +492,8 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
                     tname_intersection_dt(tydt, (jl_typename_t*)t, height)) {
                 if ((tparam & 1) && t == (jl_value_t*)jl_typeofbottom_type->name) // skip Type{Union{}} and Type{typeof(Union{})}, since the caller should have already handled those
                     continue;
-                if (jl_is_array(ml)) {
-                    if (!jl_typemap_intersection_array_visitor((jl_array_t*)ml, ty, tparam & ~2, offs, closure))
+                if (jl_is_genericmemory(ml)) {
+                    if (!jl_typemap_intersection_memory_visitor((jl_genericmemory_t*)ml, ty, tparam & ~2, offs, closure))
                         goto exit;
                 }
                 else {
@@ -531,9 +531,9 @@ static int jl_typemap_intersection_node_visitor(jl_typemap_entry_t *ml, struct t
     // that can be absolutely critical for speed
     register jl_typemap_intersection_visitor_fptr fptr = closure->fptr;
     for (;  ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
-        if (closure->max_valid < ml->min_world)
+        if (closure->max_valid < jl_atomic_load_relaxed(&ml->min_world))
             continue;
-        if (closure->min_valid > ml->max_world)
+        if (closure->min_valid > jl_atomic_load_relaxed(&ml->max_world))
             continue;
         jl_svec_t **penv = NULL;
         if (closure->env) {
@@ -569,10 +569,8 @@ int has_covariant_var(jl_datatype_t *ttypes, jl_tvar_t *tv)
 
 void typemap_slurp_search(jl_typemap_entry_t *ml, struct typemap_intersection_env *closure)
 {
-    // n.b. we could consider mt->max_args here too, so this optimization
-    //      usually works even if the user forgets the `slurp...` argument, but
-    //      there is discussion that parameter may be going away? (and it is
-    //      already not accurately up-to-date for all tables currently anyways)
+    // TODO: we should consider nparams(closure->type) here too, so this optimization
+    //      usually works even if the user forgets the `slurp...` argument
     if (closure->search_slurp && ml->va) {
         jl_value_t *sig = jl_unwrap_unionall((jl_value_t*)ml->sig);
         size_t nargs = jl_nparams(sig);
@@ -627,21 +625,21 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
             if (jl_has_free_typevars(ty))
                 ty = jl_rewrap_unionall(ty, closure->type);
             JL_GC_PUSH1(&ty);
-            jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-            jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
+            jl_genericmemory_t *targ = jl_atomic_load_relaxed(&cache->targ);
+            jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname);
             int maybe_type = 0;
             int maybe_kind = 0;
             int exclude_typeofbottom = 0;
             jl_value_t *typetype = NULL;
             jl_value_t *name = NULL;
             // pre-check: optimized pre-intersection test to see if `ty` could intersect with any Type or Kind
-            if (targ != (jl_array_t*)jl_an_empty_vec_any || tname != (jl_array_t*)jl_an_empty_vec_any) {
+            if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any || tname != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 maybe_kind = jl_has_intersect_kind_not_type(ty);
                 maybe_type = maybe_kind || jl_has_intersect_type_not_kind(ty);
                 if (maybe_type && !maybe_kind) {
                     typetype = jl_unwrap_unionall(ty);
                     typetype = jl_is_type_type(typetype) ? jl_tparam0(typetype) : NULL;
-                    name = typetype ? jl_type_extract_name(typetype) : NULL;
+                    name = typetype ? jl_type_extract_name(typetype, 1) : NULL;
                     if (!typetype)
                         exclude_typeofbottom = !jl_subtype((jl_value_t*)jl_typeofbottom_type, ty);
                     else if (jl_is_typevar(typetype))
@@ -651,7 +649,7 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                 }
             }
             // First check for intersections with methods defined on Type{T}, where T was a concrete type
-            if (targ != (jl_array_t*)jl_an_empty_vec_any && maybe_type &&
+            if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any && maybe_type &&
                     (!typetype || jl_has_free_typevars(typetype) || is_cache_leaf(typetype, 1))) { // otherwise cannot contain this particular kind, so don't bother with checking
                 if (!exclude_typeofbottom) {
                     // detect Type{Union{}}, Type{Type{Union{}}}, and Type{typeof(Union{}} and do those early here
@@ -680,18 +678,18 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                         // attempt semi-direct lookup of types via their names
                         // consider the type name first
                         jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)name);
-                        if (jl_is_array(ml)) {
+                        if (jl_is_genericmemory(ml)) {
                             if (typetype && !jl_has_free_typevars(typetype)) {
                                 // direct lookup of leaf types
                                 if (is_cache_leaf(typetype, 1)) {
-                                    ml = mtcache_hash_lookup((jl_array_t*)ml, typetype);
+                                    ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, typetype);
                                     if (ml != jl_nothing) {
                                         if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                                     }
                                 }
                             }
                             else {
-                                if (!jl_typemap_intersection_array_visitor((jl_array_t*)ml, ty, 1, offs, closure)) { JL_GC_POP(); return 0; }
+                                if (!jl_typemap_intersection_memory_visitor((jl_genericmemory_t*)ml, ty, 1, offs, closure)) { JL_GC_POP(); return 0; }
                             }
                         }
                         else if (ml != jl_nothing) {
@@ -699,43 +697,43 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                         }
                     }
                     else {
-                        // else an array scan is required to consider all the possible subtypes
-                        if (!jl_typemap_intersection_array_visitor(targ, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
+                        // else a scan is required to consider all the possible subtypes
+                        if (!jl_typemap_intersection_memory_visitor(targ, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
                     }
                 }
             }
-            jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
-            if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
+            if (cachearg1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 if (is_cache_leaf(ty, 0)) {
                     jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
                     // direct lookup of leaf types
                     jl_value_t *ml = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
-                    if (jl_is_array(ml))
-                        ml = mtcache_hash_lookup((jl_array_t*)ml, ty);
+                    if (jl_is_genericmemory(ml))
+                        ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, ty);
                     if (ml != jl_nothing) {
                         if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                     }
                 }
                 else {
-                    jl_value_t *name = jl_type_extract_name(ty);
+                    jl_value_t *name = jl_type_extract_name(ty, 0);
                     if (name && jl_type_extract_name_precise(ty, 0)) {
                         // direct lookup of leaf types
                         jl_value_t *ml = mtcache_hash_lookup(cachearg1, name);
-                        if (jl_is_array(ml)) {
-                            if (!jl_typemap_intersection_array_visitor((jl_array_t*)ml, ty, 0, offs, closure)) { JL_GC_POP(); return 0; }
+                        if (jl_is_genericmemory(ml)) {
+                            if (!jl_typemap_intersection_memory_visitor((jl_genericmemory_t*)ml, ty, 0, offs, closure)) { JL_GC_POP(); return 0; }
                         }
                         else {
                             if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                         }
                     }
                     else {
-                        // else an array scan is required to check subtypes
-                        if (!jl_typemap_intersection_array_visitor(cachearg1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
+                        // else a scan is required to check subtypes
+                        if (!jl_typemap_intersection_memory_visitor(cachearg1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
                     }
                 }
             }
             // Next check for intersections with methods defined on Type{T}, where T was not concrete (it might even have been a TypeVar), but had an extractable TypeName
-            if (tname != (jl_array_t*)jl_an_empty_vec_any && maybe_type) {
+            if (tname != (jl_genericmemory_t*)jl_an_empty_memory_any && maybe_type) {
                 if (!exclude_typeofbottom || (!typetype && jl_isa((jl_value_t*)jl_typeofbottom_type, ty))) {
                     // detect Type{Union{}}, Type{Type{Union{}}}, and Type{typeof(Union{}} and do those early here
                     // otherwise the possibility of encountering `Type{Union{}}` in this intersection may
@@ -775,14 +773,14 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                     }
                 }
                 else {
-                    // else an array scan is required to check subtypes of typetype too
+                    // else a scan is required to check subtypes of typetype too
                     tname = jl_atomic_load_relaxed(&cache->tname);  // may be GC'd earlier
-                    if (!jl_typemap_intersection_array_visitor(tname, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
+                    if (!jl_typemap_intersection_memory_visitor(tname, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
                 }
             }
-            jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
-            if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
-                jl_value_t *name = jl_type_extract_name(ty);
+            jl_genericmemory_t *name1 = jl_atomic_load_relaxed(&cache->name1);
+            if (name1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
+                jl_value_t *name = jl_type_extract_name(ty, 0);
                 if (name && jl_type_extract_name_precise(ty, 0)) {
                     jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)name)->wrapper);
                     // direct lookup of concrete types
@@ -798,8 +796,8 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                     }
                 }
                 else {
-                    // else an array scan is required to check subtypes
-                    if (!jl_typemap_intersection_array_visitor(name1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
+                    // else a scan is required to check subtypes
+                    if (!jl_typemap_intersection_memory_visitor(name1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
                 }
             }
             JL_GC_POP();
@@ -836,9 +834,7 @@ static jl_typemap_entry_t *jl_typemap_entry_assoc_by_type(
     size_t n = jl_nparams(unw);
     int typesisva = n == 0 ? 0 : jl_is_vararg(jl_tparam(unw, n-1));
     for (; ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
-        if (search->max_valid < ml->min_world)
-            continue;
-        if (search->min_valid > ml->max_world)
+        if (search->world < jl_atomic_load_relaxed(&ml->min_world) || search->world > jl_atomic_load_relaxed(&ml->max_world))
             continue;
         size_t lensig = jl_nparams(jl_unwrap_unionall((jl_value_t*)ml->sig));
         if (lensig == n || (ml->va && lensig <= n+1)) {
@@ -877,24 +873,7 @@ static jl_typemap_entry_t *jl_typemap_entry_assoc_by_type(
                     }
                 }
                 if (ismatch) {
-                    if (search->world < ml->min_world) {
-                        // ignore method table entries that are part of a later world
-                        if (search->max_valid >= ml->min_world)
-                            search->max_valid = ml->min_world - 1;
-                    }
-                    else if (search->world > ml->max_world) {
-                        // ignore method table entries that have been replaced in the current world
-                        if (search->min_valid <= ml->max_world)
-                            search->min_valid = ml->max_world + 1;
-                    }
-                    else {
-                        // intersect the env valid range with method's valid range
-                        if (search->min_valid < ml->min_world)
-                            search->min_valid = ml->min_world;
-                        if (search->max_valid > ml->max_world)
-                            search->max_valid = ml->max_world;
-                        return ml;
-                    }
+                    return ml;
                 }
             }
             if (resetenv)
@@ -908,7 +887,7 @@ static jl_typemap_entry_t *jl_typemap_entry_lookup_by_type(
         jl_typemap_entry_t *ml, struct jl_typemap_assoc *search)
 {
     for (; ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
-        if (search->world < ml->min_world || search->world > ml->max_world)
+        if (search->world < jl_atomic_load_relaxed(&ml->min_world) || search->world > jl_atomic_load_relaxed(&ml->max_world))
             continue;
         // unroll the first few cases here, to the extent that is possible to do fast and easily
         jl_value_t *types = search->types;
@@ -989,12 +968,12 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
             if (jl_is_type_type(ty)) {
                 jl_value_t *a0 = jl_tparam0(ty);
                 if (is_cache_leaf(a0, 1)) {
-                    jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-                    if (targ != (jl_array_t*)jl_an_empty_vec_any) {
+                    jl_genericmemory_t *targ = jl_atomic_load_relaxed(&cache->targ);
+                    if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                         jl_typename_t *name = a0 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a0)->name;
                         jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)name);
-                        if (jl_is_array(ml))
-                            ml = mtcache_hash_lookup((jl_array_t*)ml, a0);
+                        if (jl_is_genericmemory(ml))
+                            ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, a0);
                         if (ml != jl_nothing) {
                             jl_typemap_entry_t *li = jl_typemap_assoc_by_type((jl_typemap_t*)ml, search, offs + 1, subtype);
                             if (li) return li;
@@ -1004,12 +983,12 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 }
             }
             if (is_cache_leaf(ty, 0)) {
-                jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
-                if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any) {
+                jl_genericmemory_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
+                if (cachearg1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                     jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
                     jl_value_t *ml = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
-                    if (jl_is_array(ml))
-                        ml = mtcache_hash_lookup((jl_array_t*)ml, ty);
+                    if (jl_is_genericmemory(ml))
+                        ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, ty);
                     if (ml != jl_nothing) {
                         jl_typemap_entry_t *li = jl_typemap_assoc_by_type((jl_typemap_t*)ml, search, offs + 1, subtype);
                         if (li) return li;
@@ -1020,9 +999,9 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
         }
         if (ty || subtype) {
             // now look at the optimized TypeName caches
-            jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
-            if (tname != (jl_array_t*)jl_an_empty_vec_any) {
-                jl_value_t *a0 = ty && jl_is_type_type(ty) ? jl_type_extract_name(jl_tparam0(ty)) : NULL;
+            jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname);
+            if (tname != (jl_genericmemory_t*)jl_an_empty_memory_any) {
+                jl_value_t *a0 = ty && jl_is_type_type(ty) ? jl_type_extract_name(jl_tparam0(ty), 1) : NULL;
                 if (a0) { // TODO: if we start analyzing Union types in jl_type_extract_name, then a0 might be over-approximated here, leading us to miss possible subtypes
                     jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper);
                     while (1) {
@@ -1039,9 +1018,10 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 }
                 else {
                     if (!ty || !jl_has_empty_intersection((jl_value_t*)jl_type_type, ty)) {
+                        jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname); // reload after type-intersect
                         // couldn't figure out unique `a0` initial point, so scan all for matches
-                        size_t i, l = jl_array_len(tname);
-                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
+                        size_t i, l = tname->length;
+                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(tname);
                         JL_GC_PUSH1(&tname);
                         for (i = 1; i < l; i += 2) {
                             jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -1057,10 +1037,10 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                     }
                 }
             }
-            jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
-            if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *name1 = jl_atomic_load_relaxed(&cache->name1);
+            if (name1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 if (ty) {
-                    jl_value_t *a0 = jl_type_extract_name(ty);
+                    jl_value_t *a0 = jl_type_extract_name(ty, 0);
                     if (a0) { // TODO: if we start analyzing Union types in jl_type_extract_name, then a0 might be over-approximated here, leading us to miss possible subtypes
                         jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper);
                         while (1) {
@@ -1079,8 +1059,8 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 }
                 else {
                     // doing subtype, but couldn't figure out unique `ty`, so scan all for supertypes
-                    size_t i, l = jl_array_len(name1);
-                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(name1);
+                    size_t i, l = name1->length;
+                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(name1);
                     JL_GC_PUSH1(&name1);
                     for (i = 1; i < l; i += 2) {
                         jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -1119,7 +1099,7 @@ jl_typemap_entry_t *jl_typemap_entry_assoc_exact(jl_typemap_entry_t *ml, jl_valu
     // some manually-unrolled common special cases
     while (ml->simplesig == (void*)jl_nothing && ml->guardsigs == jl_emptysvec && ml->isleafsig) {
         // use a tight loop for as long as possible
-        if (world >= ml->min_world && world <= ml->max_world) {
+        if (world >= jl_atomic_load_relaxed(&ml->min_world) && world <= jl_atomic_load_relaxed(&ml->max_world)) {
             if (n == jl_nparams(ml->sig) && jl_typeof(arg1) == jl_tparam(ml->sig, 0)) {
                 if (n == 1)
                     return ml;
@@ -1144,7 +1124,7 @@ jl_typemap_entry_t *jl_typemap_entry_assoc_exact(jl_typemap_entry_t *ml, jl_valu
     }
 
     for (; ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
-        if (world < ml->min_world || world > ml->max_world)
+        if (world < jl_atomic_load_relaxed(&ml->min_world) || world > jl_atomic_load_relaxed(&ml->max_world))
             continue; // ignore replaced methods
         size_t lensig = jl_nparams(ml->sig);
         if (lensig == n || (ml->va && lensig <= n+1)) {
@@ -1198,27 +1178,27 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
         jl_value_t *a1 = (offs == 0 ? arg1 : args[offs - 1]);
         jl_value_t *ty = jl_typeof(a1);
         assert(jl_is_datatype(ty));
-        jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-        if (targ != (jl_array_t*)jl_an_empty_vec_any && is_cache_leaf(a1, 1)) {
+        jl_genericmemory_t *targ = jl_atomic_load_relaxed(&cache->targ);
+        if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any && is_cache_leaf(a1, 1)) {
             jl_typename_t *name = a1 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a1)->name;
             jl_value_t *ml_or_cache = mtcache_hash_lookup(targ, (jl_value_t*)name);
-            if (jl_is_array(ml_or_cache))
-                ml_or_cache = mtcache_hash_lookup((jl_array_t*)ml_or_cache, a1);
+            if (jl_is_genericmemory(ml_or_cache))
+                ml_or_cache = mtcache_hash_lookup((jl_genericmemory_t*)ml_or_cache, a1);
             jl_typemap_entry_t *ml = jl_typemap_assoc_exact(ml_or_cache, arg1, args, n, offs+1, world);
             if (ml) return ml;
         }
-        jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
-        if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any && is_cache_leaf(ty, 0)) {
+        jl_genericmemory_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
+        if (cachearg1 != (jl_genericmemory_t*)jl_an_empty_memory_any && is_cache_leaf(ty, 0)) {
             jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
             jl_value_t *ml_or_cache = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
-            if (jl_is_array(ml_or_cache))
-                ml_or_cache = mtcache_hash_lookup((jl_array_t*)ml_or_cache, ty);
+            if (jl_is_genericmemory(ml_or_cache))
+                ml_or_cache = mtcache_hash_lookup((jl_genericmemory_t*)ml_or_cache, ty);
             jl_typemap_entry_t *ml = jl_typemap_assoc_exact((jl_typemap_t*)ml_or_cache, arg1, args, n, offs+1, world);
             if (ml) return ml;
         }
-        jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
-        if (jl_is_kind(ty) && tname != (jl_array_t*)jl_an_empty_vec_any) {
-            jl_value_t *name = jl_type_extract_name(a1);
+        jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname);
+        if (jl_is_kind(ty) && tname != (jl_genericmemory_t*)jl_an_empty_memory_any) {
+            jl_value_t *name = jl_type_extract_name(a1, 1);
             if (name) {
                 if (ty != (jl_value_t*)jl_datatype_type)
                     a1 = jl_unwrap_unionall(((jl_typename_t*)name)->wrapper);
@@ -1235,8 +1215,8 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
             }
             else {
                 // couldn't figure out unique `name` initial point, so must scan all for matches
-                size_t i, l = jl_array_len(tname);
-                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
+                size_t i, l = tname->length;
+                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(tname);
                 JL_GC_PUSH1(&tname);
                 for (i = 1; i < l; i += 2) {
                     jl_typemap_t *ml_or_cache = jl_atomic_load_relaxed(&data[i]);
@@ -1251,8 +1231,8 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
                 JL_GC_POP();
             }
         }
-        jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
-        if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
+        jl_genericmemory_t *name1 = jl_atomic_load_relaxed(&cache->name1);
+        if (name1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
             while (1) {
                 name1 = jl_atomic_load_relaxed(&cache->name1); // reload after tree descent (which may hit safepoints)
                 jl_typemap_t *ml_or_cache = mtcache_hash_lookup(
@@ -1297,25 +1277,26 @@ static jl_typemap_level_t *jl_new_typemap_level(void)
     jl_typemap_level_t *cache =
         (jl_typemap_level_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_level_t),
                                          jl_typemap_level_type);
-    jl_atomic_store_relaxed(&cache->arg1, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&cache->targ, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&cache->name1, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&cache->tname, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->arg1, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&cache->targ, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&cache->name1, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&cache->tname, (jl_genericmemory_t*)jl_an_empty_memory_any);
     jl_atomic_store_relaxed(&cache->linear, (jl_typemap_entry_t*)jl_nothing);
     jl_atomic_store_relaxed(&cache->any, jl_nothing);
     return cache;
 }
 
-static void jl_typemap_array_insert_(
-        jl_typemap_t *map, _Atomic(jl_array_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
+static void jl_typemap_memory_insert_(
+        jl_typemap_t *map, _Atomic(jl_genericmemory_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
         jl_value_t *parent, int8_t tparam, int8_t offs, jl_value_t *doublesplit);
 
 static jl_value_t *jl_method_convert_list_to_cache(
         jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t tparam, int8_t offs, int8_t doublesplit)
 {
-    jl_value_t *cache = doublesplit ? jl_an_empty_vec_any : (jl_value_t*)jl_new_typemap_level();
+    _Atomic(jl_genericmemory_t*) dblcache = (jl_genericmemory_t*)jl_an_empty_memory_any;
+    jl_typemap_level_t *cache = doublesplit ? NULL : jl_new_typemap_level();
     jl_typemap_entry_t *next = NULL;
-    JL_GC_PUSH3(&cache, &next, &ml);
+    JL_GC_PUSH4(&cache, &dblcache, &next, &ml);
     while (ml != (void*)jl_nothing) {
         next = jl_atomic_load_relaxed(&ml->next);
         jl_atomic_store_relaxed(&ml->next, (jl_typemap_entry_t*)jl_nothing);
@@ -1336,14 +1317,14 @@ static jl_value_t *jl_method_convert_list_to_cache(
                 assert(jl_is_type_type(key));
                 key = jl_tparam0(key);
             }
-            jl_typemap_array_insert_(map, (_Atomic(jl_array_t*)*)&cache, key, ml, NULL, 0, offs, NULL);
+            jl_typemap_memory_insert_(map, &dblcache, key, ml, NULL, 0, offs, NULL);
         }
         else
-            jl_typemap_level_insert_(map, (jl_typemap_level_t*)cache, ml, offs);
+            jl_typemap_level_insert_(map, cache, ml, offs);
         ml = next;
     }
     JL_GC_POP();
-    return cache;
+    return doublesplit ? (jl_value_t*)jl_atomic_load_relaxed(&dblcache) : (jl_value_t*)cache;
 }
 
 static void jl_typemap_list_insert_(
@@ -1351,6 +1332,9 @@ static void jl_typemap_list_insert_(
         jl_typemap_entry_t *newrec)
 {
     jl_typemap_entry_t *l = jl_atomic_load_relaxed(pml);
+
+    // Pick the first intersection point that guarantees that the list ordering
+    // will be (leaf sigs..., simple sigs..., other sigs...)
     while ((jl_value_t*)l != jl_nothing) {
         if (newrec->isleafsig || !l->isleafsig)
             if (newrec->issimplesig || !l->issimplesig)
@@ -1359,6 +1343,7 @@ static void jl_typemap_list_insert_(
         parent = (jl_value_t*)l;
         l = jl_atomic_load_relaxed(&l->next);
     }
+
     jl_atomic_store_relaxed(&newrec->next, l);
     jl_gc_wb(newrec, l);
     jl_atomic_store_release(pml, newrec);
@@ -1371,11 +1356,12 @@ static void jl_typemap_insert_generic(
         jl_typemap_entry_t *newrec, int8_t tparam, int8_t offs, jl_value_t *doublesplit)
 {
     jl_value_t *ml = jl_atomic_load_relaxed(pml);
-    if (jl_is_array(ml)) {
+    if (jl_is_genericmemory(ml)) {
         assert(doublesplit);
-        jl_typemap_array_insert_(map, (_Atomic(jl_array_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
+        jl_typemap_memory_insert_(map, (_Atomic(jl_genericmemory_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
         return;
     }
+
     if (jl_typeof(ml) == (jl_value_t*)jl_typemap_level_type) {
         assert(!doublesplit);
         jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
@@ -1389,7 +1375,7 @@ static void jl_typemap_insert_generic(
         jl_atomic_store_release(pml, ml);
         jl_gc_wb(parent, ml);
         if (doublesplit)
-            jl_typemap_array_insert_(map, (_Atomic(jl_array_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
+            jl_typemap_memory_insert_(map, (_Atomic(jl_genericmemory_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
         else
             jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
@@ -1399,16 +1385,16 @@ static void jl_typemap_insert_generic(
         parent, newrec);
 }
 
-static void jl_typemap_array_insert_(
-        jl_typemap_t *map, _Atomic(jl_array_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
+static void jl_typemap_memory_insert_(
+        jl_typemap_t *map, _Atomic(jl_genericmemory_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
         jl_value_t *parent, int8_t tparam, int8_t offs, jl_value_t *doublesplit)
 {
-    jl_array_t *cache = jl_atomic_load_relaxed(pcache);
+    jl_genericmemory_t *cache = jl_atomic_load_relaxed(pcache);
     _Atomic(jl_value_t*) *pml = mtcache_hash_lookup_bp(cache, key);
     if (pml == NULL)
         mtcache_hash_insert(pcache, parent, key, (jl_typemap_t*)newrec);
     else
-        jl_typemap_insert_generic(map, pml, (jl_value_t*)cache, newrec, tparam, offs + (doublesplit ? 0 : 1), doublesplit);
+        jl_typemap_insert_generic(map, pml, (jl_value_t*) cache, newrec, tparam, offs + (doublesplit ? 0 : 1), doublesplit);
 }
 
 static void jl_typemap_level_insert_(
@@ -1451,13 +1437,13 @@ static void jl_typemap_level_insert_(
             jl_value_t *a0 = jl_tparam0(t1);
             if (is_cache_leaf(a0, 1)) {
                 jl_typename_t *name = a0 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a0)->name;
-                jl_typemap_array_insert_(map, &cache->targ, (jl_value_t*)name, newrec, (jl_value_t*)cache, 1, offs, jl_is_datatype(name->wrapper) ? NULL : a0);
+                jl_typemap_memory_insert_(map, &cache->targ, (jl_value_t*)name, newrec, (jl_value_t*)cache, 1, offs, jl_is_datatype(name->wrapper) ? NULL : a0);
                 return;
             }
         }
         if (is_cache_leaf(t1, 0)) {
             jl_typename_t *name = t1 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)t1)->name;
-            jl_typemap_array_insert_(map, &cache->arg1, (jl_value_t*)name, newrec, (jl_value_t*)cache, 0, offs, jl_is_datatype(name->wrapper) ? NULL : t1);
+            jl_typemap_memory_insert_(map, &cache->arg1, (jl_value_t*)name, newrec, (jl_value_t*)cache, 0, offs, jl_is_datatype(name->wrapper) ? NULL : t1);
             return;
         }
 
@@ -1465,14 +1451,14 @@ static void jl_typemap_level_insert_(
         jl_value_t *a0;
         t1 = jl_unwrap_unionall(t1);
         if (jl_is_type_type(t1)) {
-            a0 = jl_type_extract_name(jl_tparam0(t1));
+            a0 = jl_type_extract_name(jl_tparam0(t1), 1);
             jl_datatype_t *super = a0 ? (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper) : jl_any_type;
-            jl_typemap_array_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, 1, offs, NULL);
+            jl_typemap_memory_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, 1, offs, NULL);
             return;
         }
-        a0 = jl_type_extract_name(t1);
+        a0 = jl_type_extract_name(t1, 0);
         if (a0 && a0 != (jl_value_t*)jl_any_type->name) {
-            jl_typemap_array_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, 0, offs, NULL);
+            jl_typemap_memory_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, 0, offs, NULL);
             return;
         }
     }
@@ -1516,8 +1502,8 @@ jl_typemap_entry_t *jl_typemap_alloc(
     newrec->func.value = newvalue;
     newrec->guardsigs = guardsigs;
     jl_atomic_store_relaxed(&newrec->next, (jl_typemap_entry_t*)jl_nothing);
-    newrec->min_world = min_world;
-    newrec->max_world = max_world;
+    jl_atomic_store_relaxed(&newrec->min_world, min_world);
+    jl_atomic_store_relaxed(&newrec->max_world, max_world);
     newrec->va = isva;
     newrec->issimplesig = issimplesig;
     newrec->isleafsig = isleafsig;
diff --git a/src/utils.scm b/src/utils.scm
index 97464b9a14e5a..80fc44615a49a 100644
--- a/src/utils.scm
+++ b/src/utils.scm
@@ -48,6 +48,13 @@
                 (any (lambda (y) (expr-contains-p p y filt))
                      (cdr expr))))))
 
+(define (expr-replace p expr repl)
+  (cond ((p expr) (repl expr))
+        ((and (pair? expr) (not (quoted? expr)))
+         (cons (car expr)
+               (map (lambda (x) (expr-replace p x repl)) (cdr expr))))
+        (else expr)))
+
 ;; find all subexprs satisfying `p`, applying `key` to each one
 (define (expr-find-all p expr key (filt (lambda (x) #t)))
   (if (filt expr)
@@ -112,3 +119,16 @@
          (cons (car lst) (filter (lambda (x) (not (pred x))) (cdr lst))))
         (else
          (cons (car lst) (keep-first pred (cdr lst))))))
+
+(define (take lst n)
+  (let loop ((lst lst) (n n) (out '()))
+    (if (= n 0) (reverse out)
+        (loop (cdr lst) (- n 1) (cons (car lst) out)))))
+
+(define (drop lst n)
+  (if (= n 0) lst
+      (drop (cdr lst) (- n 1))))
+
+;; functional update at position i
+(define (list-set lst i val)
+  (append (take lst i) (list val) (drop lst (+ i 1))))
diff --git a/src/win32_ucontext.c b/src/win32_ucontext.c
index c6d4372308004..ca54877f97728 100644
--- a/src/win32_ucontext.c
+++ b/src/win32_ucontext.c
@@ -62,6 +62,8 @@ void jl_makecontext(win32_ucontext_t *ucp, void (*func)(void))
     Registration[0].Handler = &__julia_personality;
     Registration[1].Next = (PEXCEPTION_REGISTRATION_RECORD)0xFFFFFFFF;
     Registration[1].Handler = UnHandler;
+#else
+#error jl_makecontext not defined for CPU type
 #endif
     stack_top -= sizeof(void*);
     *(void**)stack_top = 0; // push rta
diff --git a/src/work-stealing-queue.h b/src/work-stealing-queue.h
index 38429e02886e9..9ec283b610e62 100644
--- a/src/work-stealing-queue.h
+++ b/src/work-stealing-queue.h
@@ -3,6 +3,8 @@
 #ifndef WORK_STEALING_QUEUE_H
 #define WORK_STEALING_QUEUE_H
 
+#include <stdalign.h>
+
 #include "julia_atomics.h"
 #include "assert.h"
 
@@ -34,10 +36,17 @@ static inline ws_array_t *create_ws_array(size_t capacity, int32_t eltsz) JL_NOT
     return a;
 }
 
+static inline void free_ws_array(ws_array_t *a)
+{
+    free(a->buffer);
+    free(a);
+}
+
 typedef struct {
-    _Atomic(int64_t) top;
-    _Atomic(int64_t) bottom;
-    _Atomic(ws_array_t *) array;
+    // align to JL_CACHE_BYTE_ALIGNMENT
+    alignas(JL_CACHE_BYTE_ALIGNMENT) _Atomic(int64_t) top;
+    alignas(JL_CACHE_BYTE_ALIGNMENT) _Atomic(int64_t) bottom;
+    alignas(JL_CACHE_BYTE_ALIGNMENT) _Atomic(ws_array_t *) array;
 } ws_queue_t;
 
 static inline ws_array_t *ws_queue_push(ws_queue_t *q, void *elt, int32_t eltsz) JL_NOTSAFEPOINT
diff --git a/stdlib/.gitignore b/stdlib/.gitignore
index dec1745520d4c..5996091c5a0ef 100644
--- a/stdlib/.gitignore
+++ b/stdlib/.gitignore
@@ -21,6 +21,16 @@
 /SparseArrays
 /SHA-*
 /SHA
+/LazyArtifacts-*
+/LazyArtifacts
+/Distributed-*
+/Distributed
+/StyledStrings-*
+/StyledStrings
+/JuliaSyntaxHighlighting-*
+/JuliaSyntaxHighlighting
+/LinearAlgebra-*
+/LinearAlgebra
 /*_jll/StdlibArtifacts.toml
 /*/Manifest.toml
 /*.image
diff --git a/stdlib/ArgTools.version b/stdlib/ArgTools.version
index 0ae273bb18db6..8a9f29fc4bfcf 100644
--- a/stdlib/ArgTools.version
+++ b/stdlib/ArgTools.version
@@ -1,4 +1,4 @@
 ARGTOOLS_BRANCH = master
-ARGTOOLS_SHA1 = 08b11b2707593d4d7f92e5f1b9dba7668285ff82
+ARGTOOLS_SHA1 = 89d19599208c02bfa9609d4578ab72eabe6e8eee
 ARGTOOLS_GIT_URL := https://github.com/JuliaIO/ArgTools.jl.git
 ARGTOOLS_TAR_URL = https://api.github.com/repos/JuliaIO/ArgTools.jl/tarball/$1
diff --git a/stdlib/Artifacts/Project.toml b/stdlib/Artifacts/Project.toml
index 7251b79cea8c1..c4e5cc031375c 100644
--- a/stdlib/Artifacts/Project.toml
+++ b/stdlib/Artifacts/Project.toml
@@ -1,5 +1,6 @@
 name = "Artifacts"
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Artifacts/docs/src/index.md b/stdlib/Artifacts/docs/src/index.md
index 80f4c62cbf77f..1bd75832fb8d3 100644
--- a/stdlib/Artifacts/docs/src/index.md
+++ b/stdlib/Artifacts/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Artifacts/docs/src/index.md"
+```
+
 # Artifacts
 
 ```@meta
@@ -18,4 +22,7 @@ Artifacts.artifact_meta
 Artifacts.artifact_hash
 Artifacts.find_artifacts_toml
 Artifacts.@artifact_str
+Artifacts.artifact_exists
+Artifacts.artifact_path
+Artifacts.select_downloadable_artifacts
 ```
diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl
index 70593bfadae05..4af706606d326 100644
--- a/stdlib/Artifacts/src/Artifacts.jl
+++ b/stdlib/Artifacts/src/Artifacts.jl
@@ -1,9 +1,17 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+Artifacts.jl is a Julia module that is used for managing and accessing
+artifacts in Julia packages. Artifacts are containers for
+platform-specific binaries, datasets, text, or any other kind of data
+that would be convenient to place within an immutable, life-cycled datastore.
+"""
 module Artifacts
 
 import Base: get, SHA1
-using Base.BinaryPlatforms, Base.TOML
+using Base.BinaryPlatforms: AbstractPlatform, Platform, HostPlatform
+using Base.BinaryPlatforms: tags, triplet, select_platform
+using Base.TOML: TOML
 
 export artifact_exists, artifact_path, artifact_meta, artifact_hash,
        select_downloadable_artifacts, find_artifacts_toml, @artifact_str
@@ -18,7 +26,7 @@ function parse_toml(path::String)
     Base.parsed_toml(path)
 end
 
-# keep in sync with Base.project_names and Base.manifest_names
+# keep in sync with Base.project_names
 const artifact_names = ("JuliaArtifacts.toml", "Artifacts.toml")
 
 const ARTIFACTS_DIR_OVERRIDE = Ref{Union{String,Nothing}}(nothing)
@@ -67,8 +75,8 @@ function parse_mapping(mapping::String, name::String, override_file::String)
     end
     return mapping
 end
-function parse_mapping(mapping::Dict, name::String, override_file::String)
-    return Dict(k => parse_mapping(v, name, override_file) for (k, v) in mapping)
+function parse_mapping(mapping::Dict{String, Any}, name::String, override_file::String)
+    return Dict{String, Any}(k => parse_mapping(v, name, override_file) for (k, v) in mapping)
 end
 # Fallthrough for invalid Overrides.toml files
 parse_mapping(mapping, name::String, override_file::String) = nothing
@@ -96,7 +104,7 @@ overriding to another artifact by its content-hash.
 const ARTIFACT_OVERRIDES = Ref{Union{Dict{Symbol,Any},Nothing}}(nothing)
 function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
     if ARTIFACT_OVERRIDES[] !== nothing && !force
-        return ARTIFACT_OVERRIDES[]
+        return ARTIFACT_OVERRIDES[]::Dict{Symbol,Any}
     end
 
     # We organize our artifact location overrides into two camps:
@@ -106,13 +114,8 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
     # Overrides per UUID/bound name are intercepted upon Artifacts.toml load, and new
     # entries within the "hash" overrides are generated on-the-fly.  Thus, all redirects
     # mechanistically happen through the "hash" overrides.
-    overrides = Dict{Symbol,Any}(
-        # Overrides by UUID
-        :UUID => Dict{Base.UUID,Dict{String,Union{String,SHA1}}}(),
-
-        # Overrides by hash
-        :hash => Dict{SHA1,Union{String,SHA1}}(),
-    )
+    overrides_uuid = Dict{Base.UUID,Dict{String,Union{String,SHA1}}}()
+    overrides_hash = Dict{SHA1,Union{String,SHA1}}()
 
     for override_file in reverse(artifacts_dirs("Overrides.toml"))
         !isfile(override_file) && continue
@@ -131,7 +134,6 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
             # Next, determine if this is a hash override or a UUID/name override
             if isa(mapping, String) || isa(mapping, SHA1)
                 # if this mapping is a direct mapping (e.g. a String), store it as a hash override
-                local hash_str
                 hash = tryparse(Base.SHA1, k)
                 if hash === nothing
                     @error("Invalid override in '$(override_file)': Invalid SHA1 hash '$(k)'")
@@ -139,12 +141,12 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
                 end
 
                 # If this mapping is the empty string, un-override it
-                if mapping == ""
-                    delete!(overrides[:hash], hash)
+                if mapping isa String && isempty(mapping)
+                    delete!(overrides_hash, hash)
                 else
-                    overrides[:hash][hash] = mapping
+                    overrides_hash[hash] = mapping
                 end
-            elseif isa(mapping, Dict)
+            elseif isa(mapping, Dict{String, Any})
                 # Convert `k` into a uuid
                 uuid = tryparse(Base.UUID, k)
                 if uuid === nothing
@@ -153,19 +155,18 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
                 end
 
                 # If this mapping is itself a dict, store it as a set of UUID/artifact name overrides
-                ovruuid = overrides[:UUID]::Dict{Base.UUID,Dict{String,Union{String,SHA1}}}
-                if !haskey(ovruuid, uuid)
-                    ovruuid[uuid] = Dict{String,Union{String,SHA1}}()
+                if !haskey(overrides_uuid, uuid)
+                    overrides_uuid[uuid] = Dict{String,Union{String,SHA1}}()
                 end
 
                 # For each name in the mapping, update appropriately
                 for (name, override_value) in mapping
                     # If the mapping for this name is the empty string, un-override it
-                    if override_value == ""
-                        delete!(ovruuid[uuid], name)
+                    if override_value isa String && isempty(override_value)
+                        delete!(overrides_uuid[uuid], name)
                     else
                         # Otherwise, store it!
-                        ovruuid[uuid][name] = override_value
+                        overrides_uuid[uuid][name] = override_value::Union{Base.SHA1, String}
                     end
                 end
             else
@@ -174,6 +175,12 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
         end
     end
 
+    overrides = Dict{Symbol,Any}()
+    # Overrides by UUID
+    overrides[:UUID] = overrides_uuid
+    # Overrides by hash
+    overrides[:hash] = overrides_hash
+
     ARTIFACT_OVERRIDES[] = overrides
     return overrides
 end
@@ -190,11 +197,13 @@ Query the loaded `<DEPOT>/artifacts/Overrides.toml` settings for artifacts that
 redirected to a particular path or another content-hash.
 """
 function query_override(hash::SHA1; overrides::Dict{Symbol,Any} = load_overrides())
-    return map_override_path(get(overrides[:hash], hash, nothing))
+    overrides_hash = overrides[:hash]::Dict{SHA1,Union{String,SHA1}}
+    return map_override_path(get(overrides_hash, hash, nothing))
 end
 function query_override(pkg::Base.UUID, artifact_name::String; overrides::Dict{Symbol,Any} = load_overrides())
-    if haskey(overrides[:UUID], pkg)
-        return map_override_path(get(overrides[:UUID][pkg], artifact_name, nothing))
+    overrides_uuid = overrides[:UUID]::Dict{Base.UUID,Dict{String,Union{String,SHA1}}}
+    if haskey(overrides_uuid, pkg)
+        return map_override_path(get(overrides_uuid[pkg], artifact_name, nothing))
     end
     return nothing
 end
@@ -284,7 +293,7 @@ function unpack_platform(entry::Dict{String,Any}, name::String,
     delete!(tags, "os")
     delete!(tags, "arch")
     delete!(tags, "git-tree-sha1")
-    return Platform(entry["arch"], entry["os"], tags)
+    return Platform(entry["arch"]::String, entry["os"]::String, tags)
 end
 
 function pack_platform!(meta::Dict, p::AbstractPlatform)
@@ -326,8 +335,11 @@ function process_overrides(artifact_dict::Dict, pkg_uuid::Base.UUID)
     # Insert just-in-time hash overrides by looking up the names of anything we need to
     # override for this UUID, and inserting new overrides for those hashes.
     overrides = load_overrides()
-    if haskey(overrides[:UUID], pkg_uuid)
-        pkg_overrides = overrides[:UUID][pkg_uuid]::Dict{String, <:Any}
+    overrides_uuid = overrides[:UUID]::Dict{Base.UUID,Dict{String,Union{String,SHA1}}}
+    overrides_hash = overrides[:hash]::Dict{SHA1,Union{String,SHA1}}
+
+    if haskey(overrides_uuid, pkg_uuid)
+        pkg_overrides = overrides_uuid[pkg_uuid]::Dict{String, <:Any}
 
         for name in keys(artifact_dict)
             # Skip names that we're not overriding
@@ -336,14 +348,16 @@ function process_overrides(artifact_dict::Dict, pkg_uuid::Base.UUID)
             end
 
             # If we've got a platform-specific friend, override all hashes:
-            if isa(artifact_dict[name], Array)
-                for entry in artifact_dict[name]
-                    hash = SHA1(entry["git-tree-sha1"])
-                    overrides[:hash][hash] = overrides[:UUID][pkg_uuid][name]
+            artifact_dict_name = artifact_dict[name]
+            if isa(artifact_dict_name, Vector{Any})
+                for entry in artifact_dict_name
+                    entry = entry::Dict{String,Any}
+                    hash = SHA1(entry["git-tree-sha1"]::String)
+                    overrides_hash[hash] = overrides_uuid[pkg_uuid][name]
                 end
-            elseif isa(artifact_dict[name], Dict)
-                hash = SHA1(artifact_dict[name]["git-tree-sha1"])
-                overrides[:hash][hash] = overrides[:UUID][pkg_uuid][name]
+            elseif isa(artifact_dict_name, Dict{String, Any})
+                hash = SHA1(artifact_dict_name["git-tree-sha1"]::String)
+                overrides_hash[hash] = overrides_uuid[pkg_uuid][name]
             end
         end
     end
@@ -386,9 +400,9 @@ function artifact_meta(name::String, artifact_dict::Dict, artifacts_toml::String
 
     # If it's an array, find the entry that best matches our current platform
     if isa(meta, Vector)
-        dl_dict = Dict{AbstractPlatform,Dict{String,Any}}()
+        dl_dict = Dict{Platform,Dict{String,Any}}()
         for x in meta
-            x::Dict{String}
+            x = x::Dict{String, Any}
             dl_dict[unpack_platform(x, name, artifacts_toml)] = x
         end
         meta = select_platform(dl_dict, platform)
@@ -399,9 +413,12 @@ function artifact_meta(name::String, artifact_dict::Dict, artifacts_toml::String
     end
 
     # This is such a no-no, we are going to call it out right here, right now.
-    if meta !== nothing && !haskey(meta, "git-tree-sha1")
-        @error("Invalid artifacts file at $(artifacts_toml): artifact '$name' contains no `git-tree-sha1`!")
-        return nothing
+    if meta !== nothing
+        meta = meta::Dict{String, Any}
+        if !haskey(meta, "git-tree-sha1")
+            @error("Invalid artifacts file at $(artifacts_toml): artifact '$name' contains no `git-tree-sha1`!")
+            return nothing
+        end
     end
 
     # Return the full meta-dict.
@@ -426,7 +443,7 @@ function artifact_hash(name::String, artifacts_toml::String;
         return nothing
     end
 
-    return SHA1(meta["git-tree-sha1"])
+    return SHA1(meta["git-tree-sha1"]::String)
 end
 
 function select_downloadable_artifacts(artifact_dict::Dict, artifacts_toml::String;
@@ -525,11 +542,19 @@ function jointail(dir, tail)
     end
 end
 
-function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, @nospecialize(lazyartifacts))
-    moduleroot = Base.moduleroot(__module__)
-    if haskey(Base.module_keys, moduleroot)
+function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, ::Val{LazyArtifacts}) where LazyArtifacts
+    world = Base._require_world_age[]
+    if world == typemax(UInt)
+        world = Base.get_world_counter()
+    end
+    return Base.invoke_in_world(world, __artifact_str, __module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, Val(LazyArtifacts))::String
+end
+
+function __artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, ::Val{LazyArtifacts}) where LazyArtifacts
+    pkg = Base.PkgId(__module__)
+    if pkg.uuid !== nothing
         # Process overrides for this UUID, if we know what it is
-        process_overrides(artifact_dict, Base.module_keys[moduleroot].uuid)
+        process_overrides(artifact_dict, pkg.uuid)
     end
 
     # If the artifact exists, we're in the happy path and we can immediately
@@ -544,11 +569,12 @@ function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dic
     # If not, try determining what went wrong:
     meta = artifact_meta(name, artifact_dict, artifacts_toml; platform)
     if meta !== nothing && get(meta, "lazy", false)
-        if lazyartifacts isa Module && isdefined(lazyartifacts, :ensure_artifact_installed)
-            if nameof(lazyartifacts) in (:Pkg, :Artifacts)
+        if LazyArtifacts isa Module && isdefined(LazyArtifacts, :ensure_artifact_installed)
+            if nameof(LazyArtifacts) in (:Pkg, :Artifacts, :PkgArtifacts)
                 Base.depwarn("using Pkg instead of using LazyArtifacts is deprecated", :var"@artifact_str", force=true)
             end
-            return jointail(lazyartifacts.ensure_artifact_installed(string(name), artifacts_toml; platform), path_tail)
+            path_base = (@invokelatest LazyArtifacts.ensure_artifact_installed(string(name), meta, artifacts_toml; platform))::String
+            return jointail(path_base, path_tail)
         end
         error("Artifact $(repr(name)) is a lazy artifact; package developers must call `using LazyArtifacts` in $(__module__) before using lazy artifacts.")
     end
@@ -625,10 +651,9 @@ function artifact_slash_lookup(name::String, artifact_dict::Dict,
     if meta === nothing
         error("Cannot locate artifact '$(name)' for $(triplet(platform)) in '$(artifacts_toml)'")
     end
-    hash = SHA1(meta["git-tree-sha1"])
+    hash = SHA1(meta["git-tree-sha1"]::String)
     return artifact_name, artifact_path_tail, hash
 end
-
 """
     macro artifact_str(name)
 
@@ -676,35 +701,34 @@ macro artifact_str(name, platform=nothing)
     local artifact_dict = load_artifacts_toml(artifacts_toml)
 
     # Invalidate calling .ji file if Artifacts.toml file changes
-    Base.include_dependency(artifacts_toml)
+    Base.include_dependency(artifacts_toml, track_content = true)
 
     # Check if the user has provided `LazyArtifacts`, and thus supports lazy artifacts
     # If not, check to see if `Pkg` or `Pkg.Artifacts` has been imported.
-    lazyartifacts = nothing
-    for module_name in (:LazyArtifacts, :Pkg, :Artifacts)
+    LazyArtifacts = nothing
+    for module_name in (:LazyArtifacts, :Pkg, :Artifacts, :PkgArtifacts)
         if isdefined(__module__, module_name)
-            lazyartifacts = GlobalRef(__module__, module_name)
+            LazyArtifacts = GlobalRef(__module__, module_name)
             break
         end
     end
 
     # If `name` is a constant, (and we're using the default `Platform`) we can actually load
     # and parse the `Artifacts.toml` file now, saving the work from runtime.
-    if isa(name, AbstractString) && platform === nothing
-        # To support slash-indexing, we need to split the artifact name from the path tail:
+    if platform === nothing
         platform = HostPlatform()
+    end
+    if isa(name, AbstractString) && isa(platform, AbstractPlatform)
+        # To support slash-indexing, we need to split the artifact name from the path tail:
         artifact_name, artifact_path_tail, hash = artifact_slash_lookup(name, artifact_dict, artifacts_toml, platform)
         return quote
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), $(lazyartifacts))::String
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), Val($(LazyArtifacts)))::String
         end
     else
-        if platform === nothing
-            platform = :($(HostPlatform)())
-        end
         return quote
             local platform = $(esc(platform))
             local artifact_name, artifact_path_tail, hash = artifact_slash_lookup($(esc(name)), $(artifact_dict), $(artifacts_toml), platform)
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, $(lazyartifacts))::String
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, Val($(LazyArtifacts)))::String
         end
     end
 end
@@ -742,6 +766,5 @@ precompile(NamedTuple{(:pkg_uuid,)}, (Tuple{Base.UUID},))
 precompile(Core.kwfunc(load_artifacts_toml), (NamedTuple{(:pkg_uuid,), Tuple{Base.UUID}}, typeof(load_artifacts_toml), String))
 precompile(parse_mapping, (String, String, String))
 precompile(parse_mapping, (Dict{String, Any}, String, String))
-
-
+precompile(Tuple{typeof(Artifacts.__artifact_str), Module, String, Base.SubString{String}, String, Base.Dict{String, Any}, Base.SHA1, Base.BinaryPlatforms.Platform, Base.Val{Artifacts}})
 end # module Artifacts
diff --git a/stdlib/Artifacts/test/Artifacts.toml b/stdlib/Artifacts/test/Artifacts.toml
index 4b715b74c128b..5faf1012dec54 100644
--- a/stdlib/Artifacts/test/Artifacts.toml
+++ b/stdlib/Artifacts/test/Artifacts.toml
@@ -1,146 +1,163 @@
 [[HelloWorldC]]
 arch = "aarch64"
-git-tree-sha1 = "95fce80ec703eeb5f4270fef6821b38d51387499"
+git-tree-sha1 = "0835a23111b12d2aa5e1f7a852ed71e0b92e3425"
 os = "macos"
 
     [[HelloWorldC.download]]
-    sha256 = "23f45918421881de8e9d2d471c70f6b99c26edd1dacd7803d2583ba93c8bbb28"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-apple-darwin.tar.gz"
+    sha256 = "4406a35689feaf532ff0347a11896449571e8a1c919e5550b01dfe10f2e64822"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.aarch64-apple-darwin.tar.gz"
 [[HelloWorldC]]
 arch = "aarch64"
-git-tree-sha1 = "1ccbaad776766366943fd5a66a8cbc9877ee8df9"
+git-tree-sha1 = "c82465bd6d0aa1369ff2fd961b73884d1f5de49a"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "82bca07ff25a75875936116ca977285160a2afcc4f58dd160c7b1600f55da655"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-gnu.tar.gz"
+    sha256 = "5bfa84332c7ee485ca8e2eee216ad9fa77b2c43d5f261baa823e301b7c789ec4"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.aarch64-linux-gnu.tar.gz"
 [[HelloWorldC]]
 arch = "aarch64"
-git-tree-sha1 = "dc43ab874611cfc26641741c31b8230276d7d664"
+git-tree-sha1 = "cb4b8c88778c6cd93b6df38ec5b95a2678434f5d"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "36b7c554f1cb04d5282b991c66a10b2100085ac8deb2156bf52b4f7c4e406c04"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-musl.tar.gz"
+    sha256 = "924df1c2a386f79a2727a2f989393102649a24863214f2e88cb4a677d3d22e14"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.aarch64-linux-musl.tar.gz"
+[[HelloWorldC]]
+arch = "aarch64"
+git-tree-sha1 = "7db155cf8485fbeb23d30a305f76ece191db9dc4"
+os = "freebsd"
+
+    [[HelloWorldC.download]]
+    sha256 = "d86d992f428df1264d55d7ac886ccd0a0539fda82363bf5dda872d12ea742528"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.aarch64-unknown-freebsd.tar.gz"
 [[HelloWorldC]]
 arch = "armv6l"
 call_abi = "eabihf"
-git-tree-sha1 = "b7128521583d02d2dbe9c8de6fe156b79df781d9"
+git-tree-sha1 = "20a32b71145b67e708f63fb5880a7243727aec0f"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "5e094b9c6e4c6a77ecc8dfc2b841ac1f2157f6a81f4c47f1e0d3e9a04eec7945"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-gnueabihf.tar.gz"
+    sha256 = "6f0997b0aad387ba6e2402530642bb4ded85b0243460d2e4b13d94f2c8340a44"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.armv6l-linux-gnueabihf.tar.gz"
 [[HelloWorldC]]
 arch = "armv6l"
 call_abi = "eabihf"
-git-tree-sha1 = "edb3893a154519d6786234f5c83994c34e11feed"
+git-tree-sha1 = "c1179604ea37fa66ee6d5d592c7bbfd1f20292c3"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "0a2203f061ba2ef7ce4c452ec7874be3acc6db1efac8091f85d113c3404e6bb6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-musleabihf.tar.gz"
+    sha256 = "0aca47bce6f09c38a7939277a593deb988123fe59f7992225a1ede8e174f1b06"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.armv6l-linux-musleabihf.tar.gz"
 [[HelloWorldC]]
 arch = "armv7l"
 call_abi = "eabihf"
-git-tree-sha1 = "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+git-tree-sha1 = "0a8e7b523ef6be31311aefe9983a488616e58201"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "a4392a4c8f834c97f9d8822ddfb1813d8674fa602eeaf04d6359c0a9e98478ec"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-gnueabihf.tar.gz"
+    sha256 = "f29f4da556d2b4ee9eaff7740aa0f9436406b75b0f1ec428e881a47ab7b7477b"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.armv7l-linux-gnueabihf.tar.gz"
 [[HelloWorldC]]
 arch = "armv7l"
 call_abi = "eabihf"
-git-tree-sha1 = "169c261b321c4dc95894cdd2db9d0d0caa84677f"
+git-tree-sha1 = "ca94b4d87f1a276066a2994733142e35046c41dd"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "ed1aacbf197a6c78988725a39defad130ed31a2258f8e7846f73b459821f21d3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-musleabihf.tar.gz"
+    sha256 = "5fb4019d6d797e5e3860cfec90cab12f6865fa624e87b51c20220a44bb94846a"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.armv7l-linux-musleabihf.tar.gz"
 [[HelloWorldC]]
 arch = "i686"
-git-tree-sha1 = "fd35f9155dc424602d01fbf983eb76be3217a28f"
+git-tree-sha1 = "91376c8b0bc90c47076cab4e55bf77e86bb59076"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "048fcff5ff47a3cc1e84a2688935fcd658ad1c7e7c52c0e81fe88ce6c3697aba"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-gnu.tar.gz"
+    sha256 = "b775c985231cd0626afd0111902a764c75c9a8a123b12e1f386a1c2af3cef799"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.i686-linux-gnu.tar.gz"
 [[HelloWorldC]]
 arch = "i686"
-git-tree-sha1 = "8db14df0f1d2a3ed9c6a7b053a590ca6527eb95e"
+git-tree-sha1 = "b50220be02e9c839749f91a70694ae68c2712c8e"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "d521b4420392b8365de5ed0ef38a3b6c822665d7c257d3eef6f725c205bb3d78"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-musl.tar.gz"
+    sha256 = "6aecc06cf803ad16703744610deb243a21b39e19ae1951a38977610881698f9e"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.i686-linux-musl.tar.gz"
 [[HelloWorldC]]
 arch = "i686"
-git-tree-sha1 = "56f82168947b8dc7bb98038f063209b9f864eaff"
+git-tree-sha1 = "cc9cfa3272d4d3844d6fcf8b6b971bd68dbc792f"
 os = "windows"
 
     [[HelloWorldC.download]]
-    sha256 = "de578cf5ee2f457e9ff32089cbe17d03704a929980beddf4c41f4c0eb32f19c6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-w64-mingw32.tar.gz"
+    sha256 = "bbf3276bcfc8223061c3b1cf8725425bfc33ac2929214ba57eecfd170d30f096"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.i686-w64-mingw32.tar.gz"
 [[HelloWorldC]]
 arch = "powerpc64le"
-git-tree-sha1 = "9c8902b62f5b1aaa7c2839c804bed7c3a0912c7b"
+git-tree-sha1 = "5e9c87fc4e3372c27a77061a49d97fa5002df0e4"
+libc = "glibc"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "e2a728b29124fc7408d6e47cc6fc943d0336d1386e56a3775a0665b34528881b"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.powerpc64le-linux-gnu.tar.gz"
+[[HelloWorldC]]
+arch = "riscv64"
+git-tree-sha1 = "3c9b23e46b82ab59141bbbc042158af4037d846d"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "63ddbfbb6ea0cafef544cc25415e7ebee6ee0a69db0878d0d4e1ed27c0ae0ab5"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.powerpc64le-linux-gnu.tar.gz"
+    sha256 = "59e2250eab04924eb7167d3232e4b0176c53097e4b21f2f3e3621f1e39f43107"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.riscv64-linux-gnu.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
+git-tree-sha1 = "2e1742c9c0addd693b0b025f7a1e7aa4c50a0e6c"
 os = "macos"
 
     [[HelloWorldC.download]]
-    sha256 = "f5043338613672b12546c59359c7997c5381a9a60b86aeb951dee74de428d5e3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-apple-darwin.tar.gz"
+    sha256 = "c4f0c83ae4f72a039c33beb26ebb1d4c0fb739f34360102be79909a0dc17f47f"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-apple-darwin.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "1ed3d81088f16e3a1fa4e3d4c4c509b8c117fecf"
+git-tree-sha1 = "8c8251b0c21615bce0701995eded26ac7697b5cc"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "a18212e7984b08b23bec06e8bf9286a89b9fa2e8ee0dd46af3b852fe22013a4f"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-gnu.tar.gz"
+    sha256 = "974f7e1d1cdbebad149e51fed4f1b7c6a0b5ccfa350f7d252dfcf66c2dbf9f63"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-linux-gnu.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "c04ef757b8bb773d17a0fd0ea396e52db1c7c385"
+git-tree-sha1 = "cfaaf0517421585561e3b30dd6f53f6c14b2835f"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "7a3d1b09410989508774f00e073ea6268edefcaba7617fc5085255ec8e82555b"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-musl.tar.gz"
+    sha256 = "25d3d6ecc753f4dbbcaab0db7b6c20b29b0a79b0c31f7a26a0cf18c365d27809"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-linux-musl.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "5f7e7abf7d545a1aaa368f22e3e01ea0268870b1"
+git-tree-sha1 = "8e8a17876a9c1147bae6a53a175344b805ee72d4"
 os = "freebsd"
 
     [[HelloWorldC.download]]
-    sha256 = "56aedffe38fe20294e93cfc2eb0a193c8e2ddda5a697b302e77ff48ac1195198"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-unknown-freebsd.tar.gz"
+    sha256 = "61a3f945941adbf75c87c1c28f05e95b187959fedf29ecaa36519c5d1941bf23"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-unknown-freebsd.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
+git-tree-sha1 = "6e1eb164b0651aa44621eac4dfa340d6e60295ef"
 os = "windows"
 
     [[HelloWorldC.download]]
-    sha256 = "aad77a16cbc9752f6ec62549a28c7e9f3f7f57919f6fa9fb924e0c669b11f8c4"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-w64-mingw32.tar.gz"
+    sha256 = "1f10e46f7b073136f7f668de89096d631ae8bb8903547d588f6817f0b780b2fc"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-w64-mingw32.tar.gz"
 
 [socrates]
 git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
diff --git a/stdlib/Artifacts/test/runtests.jl b/stdlib/Artifacts/test/runtests.jl
index 67117217be549..cb81c16347abf 100644
--- a/stdlib/Artifacts/test/runtests.jl
+++ b/stdlib/Artifacts/test/runtests.jl
@@ -1,12 +1,92 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+import Base: SHA1
 
 using Artifacts, Test, Base.BinaryPlatforms
-using Artifacts: with_artifacts_directory, pack_platform!, unpack_platform
+using Artifacts: with_artifacts_directory, pack_platform!, unpack_platform, load_overrides
+using TOML
 
 # prepare for the package tests by ensuring the required artifacts are downloaded now
 artifacts_dir = mktempdir()
 run(addenv(`$(Base.julia_cmd()) --color=no $(joinpath(@__DIR__, "refresh_artifacts.jl")) $(artifacts_dir)`, "TERM"=>"dumb"))
 
+@testset "Load Overrides" begin
+    """
+        create_test_overrides_toml(temp_dir::String)
+
+    Create "Overrides.toml" in the given `temp_dir`.
+    """
+    function create_test_overrides_toml(temp_dir::String)
+        # Define the overrides
+        overrides = Dict(
+            "78f35e74ff113f02274ce60dab6e92b4546ef806" => "/path/to/replacement",
+            "c76f8cda85f83a06d17de6c57aabf9e294eb2537" => "fb886e813a4aed4147d5979fcdf27457d20aa35d",
+            "d57dbccd-ca19-4d82-b9b8-9d660942965b" => Dict(
+                "c_simple" => "/path/to/c_simple_dir",
+                "libfoo" => "fb886e813a4aed4147d5979fcdf27457d20aa35d"
+            )
+        )
+
+        # Get the artifacts directory
+        artifacts_dir = joinpath(temp_dir, "artifacts")
+
+        # Ensure the artifacts directory exists
+        isdir(artifacts_dir) || mkdir(artifacts_dir)
+
+        # Get the path to the Overrides.toml file
+        overrides_path = joinpath(artifacts_dir, "Overrides.toml")
+
+        # Create the Overrides.toml file
+        open(overrides_path, "w") do io
+            TOML.print(io, overrides)
+        end
+    end
+
+    # Specify the expected test result when depot path does not exist or no overriding happened
+    empty_output = Dict{Symbol, Any}(
+        :UUID => Dict{Base.UUID, Dict{String, Union{SHA1, String}}}(),
+        :hash => Dict{SHA1, Union{SHA1, String}}()
+    )
+
+    # Specify the expected test result when overriding happened
+    expected_output = Dict{Symbol, Any}(
+        :UUID => Dict{Base.UUID, Dict{String, Union{SHA1, String}}}(Base.UUID("d57dbccd-ca19-4d82-b9b8-9d660942965b") => Dict("c_simple" => "/path/to/c_simple_dir", "libfoo" => SHA1("fb886e813a4aed4147d5979fcdf27457d20aa35d"))),
+        :hash => Dict{SHA1, Union{SHA1, String}}(SHA1("78f35e74ff113f02274ce60dab6e92b4546ef806") => "/path/to/replacement", SHA1("c76f8cda85f83a06d17de6c57aabf9e294eb2537") => SHA1("fb886e813a4aed4147d5979fcdf27457d20aa35d"))
+    )
+
+    # Test `load_overrides()` works with *no* "Overrides.toml" file
+    @test load_overrides() == empty_output
+
+    # Create a temporary directory
+    mktempdir() do temp_dir
+        # Back up the old `DEPOT_PATH``
+        old_depot_path = copy(Base.DEPOT_PATH)
+
+        # Set `DEPOT_PATH` to that directory
+        empty!(Base.DEPOT_PATH)
+        push!(Base.DEPOT_PATH, temp_dir)
+
+        try
+            # Create "Overrides.toml" for the test
+            create_test_overrides_toml(temp_dir)
+
+            # Test `load_overrides()` works *with* "Overrides.toml" file but non-nothing ARTIFACT_OVERRIDES[]
+            @test load_overrides() == empty_output
+
+            # Test `load_overrides()` works *with* "Overrides.toml" file with force parameter, which overrides even when `ARTIFACT_OVERRIDES[] !== nothing``
+            @test load_overrides(force=true) == expected_output
+        finally # Make sure `DEPOT_PATH` will be restored to the status quo in the event of a bug
+            # Restore the old `DEPOT_PATH` to avoid messing with any other code
+            empty!(Base.DEPOT_PATH)
+            append!(Base.DEPOT_PATH, old_depot_path)
+        end
+    end
+    # Temporary directory and test "Overrides.toml" file will be automatically deleted when out of scope
+    # This means after this block, the system *should* behave like this test never happened.
+
+    # Test the "Overrides.toml" file is cleared back to the status quo
+    @test load_overrides(force=true) == empty_output
+end
+
 @testset "Artifact Paths" begin
     mktempdir() do tempdir
         with_artifacts_directory(tempdir) do
@@ -115,20 +195,37 @@ end
     with_artifacts_directory(artifacts_dir) do
         win64 = Platform("x86_64", "windows")
         mac64 = Platform("x86_64", "macos")
-        @test basename(@artifact_str("HelloWorldC", win64)) == "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
-        @test basename(@artifact_str("HelloWorldC", mac64)) == "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
+        @test basename(@artifact_str("HelloWorldC", win64)) == "6e1eb164b0651aa44621eac4dfa340d6e60295ef"
+        @test basename(@artifact_str("HelloWorldC", mac64)) == "2e1742c9c0addd693b0b025f7a1e7aa4c50a0e6c"
     end
 end
 
+@testset "artifact_hash()" begin
+    # Use the Linus OS on an ARMv7L architecture for the tests to make tests reproducible
+    armv7l_linux = Platform("armv7l", "linux")
+
+    # Check the first key in Artifacts.toml is hashed correctly
+    @test artifact_hash("HelloWorldC", joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux) ==
+            SHA1("0a8e7b523ef6be31311aefe9983a488616e58201")
+
+    # Check the second key in Artifacts.toml is hashed correctly
+    @test artifact_hash("socrates", joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux) ==
+            SHA1("43563e7631a7eafae1f9f8d9d332e3de44ad7239")
+
+    # Check artifact_hash() works for any AbstractString
+    @test artifact_hash(SubString("HelloWorldC0", 1, 11), joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux) ==
+            SHA1("0a8e7b523ef6be31311aefe9983a488616e58201")
+end
+
 @testset "select_downloadable_artifacts()" begin
     armv7l_linux = Platform("armv7l", "linux")
     artifacts = select_downloadable_artifacts(joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux)
     @test length(keys(artifacts)) == 1
-    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "0a8e7b523ef6be31311aefe9983a488616e58201"
 
     artifacts = select_downloadable_artifacts(joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux, include_lazy=true)
     @test length(keys(artifacts)) == 2
-    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "0a8e7b523ef6be31311aefe9983a488616e58201"
     @test artifacts["socrates"]["git-tree-sha1"] == "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
 end
 
@@ -161,6 +258,10 @@ end
 @testset "`Artifacts.artifact_names` and friends" begin
     n = length(Artifacts.artifact_names)
     @test length(Base.project_names) == n
-    @test length(Base.manifest_names) == n
+    @test length(Base.manifest_names) == 2n # there are two manifest names per project name
     @test length(Base.preferences_names) == n
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Artifacts))
+end
diff --git a/stdlib/Base64/Project.toml b/stdlib/Base64/Project.toml
index 68d63837fc385..14796beb7e21a 100644
--- a/stdlib/Base64/Project.toml
+++ b/stdlib/Base64/Project.toml
@@ -1,5 +1,6 @@
 name = "Base64"
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Base64/docs/src/index.md b/stdlib/Base64/docs/src/index.md
index 6bc647f8a2e67..26e9d70f2ff9f 100644
--- a/stdlib/Base64/docs/src/index.md
+++ b/stdlib/Base64/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Base64/docs/src/index.md"
+```
+
 # Base64
 
 ```@docs
diff --git a/stdlib/Base64/src/buffer.jl b/stdlib/Base64/src/buffer.jl
index 44a9c0931ac95..009a6d56cfde8 100644
--- a/stdlib/Base64/src/buffer.jl
+++ b/stdlib/Base64/src/buffer.jl
@@ -2,37 +2,37 @@
 
 # Data buffer for pipes.
 mutable struct Buffer
-    data::Vector{UInt8}
-    ptr::Ptr{UInt8}
+    const data::Memory{UInt8}
+    offset::Int
     size::Int
 
     function Buffer(bufsize)
-        data = Vector{UInt8}(undef, bufsize)
-        return new(data, pointer(data), 0)
+        data = Memory{UInt8}(undef, bufsize)
+        return new(data, 0, 0)
     end
 end
 
 Base.empty!(buffer::Buffer) = buffer.size = 0
-Base.getindex(buffer::Buffer, i::Integer) = unsafe_load(buffer.ptr, i)
-Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = unsafe_store!(buffer.ptr, v, i)
+Base.getindex(buffer::Buffer, i::Integer) = buffer.data[buffer.offset + i]
+Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = buffer.data[buffer.offset + i] = v
 Base.firstindex(buffer::Buffer) = 1
 Base.lastindex(buffer::Buffer) = buffer.size
-Base.pointer(buffer::Buffer) = buffer.ptr
-capacity(buffer::Buffer) = Int(pointer(buffer.data, lastindex(buffer.data) + 1) - buffer.ptr)
+Base.pointer(buffer::Buffer) = pointer(buffer.data) + buffer.offset
+capacity(buffer::Buffer) = length(buffer.data) - buffer.offset
 
 function consumed!(buffer::Buffer, n::Integer)
     @assert n ≤ buffer.size
-    buffer.ptr += n
+    buffer.offset += n
     buffer.size -= n
 end
 
 function read_to_buffer(io::IO, buffer::Buffer)
-    offset = buffer.ptr - pointer(buffer.data)
+    offset = buffer.offset
     copyto!(buffer.data, 1, buffer.data, offset + 1, buffer.size)
-    buffer.ptr = pointer(buffer.data)
+    buffer.offset = 0
     if !eof(io)
         n = min(bytesavailable(io), capacity(buffer) - buffer.size)
-        unsafe_read(io, buffer.ptr + buffer.size, n)
+        unsafe_read(io, pointer(buffer) + buffer.size, n)
         buffer.size += n
     end
     return
diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl
index 056293528e142..f9db189960c80 100644
--- a/stdlib/Base64/src/decode.jl
+++ b/stdlib/Base64/src/decode.jl
@@ -32,17 +32,19 @@ julia> String(read(iob64_decode))
 "Hello!"
 ```
 """
-struct Base64DecodePipe <: IO
-    io::IO
+struct Base64DecodePipe{T <: IO} <: IO
+    io::T
     buffer::Buffer
     rest::Vector{UInt8}
 
-    function Base64DecodePipe(io::IO)
+    function Base64DecodePipe{T}(io::T) where {T <: IO}
         buffer = Buffer(512)
-        return new(io, buffer, UInt8[])
+        return new{T}(io, buffer, UInt8[])
     end
 end
 
+Base64DecodePipe(io::IO) = Base64DecodePipe{IO}(io)
+
 Base.isreadable(pipe::Base64DecodePipe) = !isempty(pipe.rest) || isreadable(pipe.io)
 Base.iswritable(::Base64DecodePipe) = false
 
@@ -172,21 +174,23 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest)
     # Write output.
     p::Ptr{UInt8} = ptr
     p_end = ptr + n
-    function output(b)
-        if p < p_end
-            unsafe_store!(p, b)
-            p += 1
-        else
-            push!(rest, b)
-        end
-    end
-    k ≥ 1 && output(b1 << 2 | b2 >> 4)
-    k ≥ 2 && output(b2 << 4 | b3 >> 2)
-    k ≥ 3 && output(b3 << 6 | b4     )
+    k ≥ 1 && (p = _output(b1 << 2 | b2 >> 4, p, p_end, rest))
+    k ≥ 2 && (p = _output(b2 << 4 | b3 >> 2, p, p_end, rest))
+    k ≥ 3 && (p = _output(b3 << 6 | b4     , p, p_end, rest))
 
     return i, p, k == 0
 end
 
+function _output(b, p, p_end, rest)
+    if p < p_end
+        unsafe_store!(p, b)
+        return p + 1
+    else
+        push!(rest, b)
+        return p
+    end
+end
+
 """
     base64decode(string)
 
diff --git a/stdlib/Base64/src/encode.jl b/stdlib/Base64/src/encode.jl
index 588b49aa28d97..d690db651fcd8 100644
--- a/stdlib/Base64/src/encode.jl
+++ b/stdlib/Base64/src/encode.jl
@@ -24,26 +24,28 @@ julia> write(iob64_encode, "Hello!")
 
 julia> close(iob64_encode);
 
-julia> str = String(take!(io))
+julia> str = takestring!(io)
 "SGVsbG8h"
 
 julia> String(base64decode(str))
 "Hello!"
 ```
 """
-struct Base64EncodePipe <: IO
-    io::IO
+struct Base64EncodePipe{T <: IO} <: IO
+    io::T
     buffer::Buffer
 
-    function Base64EncodePipe(io::IO)
+    function Base64EncodePipe{T}(io::T) where {T <: IO}
         # The buffer size must be at least 3.
         buffer = Buffer(512)
-        pipe = new(io, buffer)
+        pipe = new{T}(io, buffer)
         finalizer(_ -> close(pipe), buffer)
         return pipe
     end
 end
 
+Base64EncodePipe(io::IO) = Base64EncodePipe{IO}(io)
+
 Base.isreadable(::Base64EncodePipe) = false
 Base.iswritable(pipe::Base64EncodePipe) = iswritable(pipe.io)
 
@@ -211,6 +213,6 @@ function base64encode(f::Function, args...; context=nothing)
         f(IOContext(b, context), args...)
     end
     close(b)
-    return String(take!(s))
+    return takestring!(s)
 end
 base64encode(args...; context=nothing) = base64encode(write, args...; context=context)
diff --git a/stdlib/Base64/test/runtests.jl b/stdlib/Base64/test/runtests.jl
index 11d0a3cca4348..145576f6ea3f4 100644
--- a/stdlib/Base64/test/runtests.jl
+++ b/stdlib/Base64/test/runtests.jl
@@ -1,7 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, Random
-import Base64:
+using Base64:
+    Base64,
     Base64EncodePipe,
     base64encode,
     Base64DecodePipe,
@@ -142,3 +143,7 @@ end
         @test String(base64decode(splace(longEncodedText))) == longDecodedText
     end
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Base64))
+end
diff --git a/stdlib/CRC32c/Project.toml b/stdlib/CRC32c/Project.toml
index c1de88cbc7c52..d3ab5ff019503 100644
--- a/stdlib/CRC32c/Project.toml
+++ b/stdlib/CRC32c/Project.toml
@@ -1,5 +1,6 @@
 name = "CRC32c"
 uuid = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/CRC32c/docs/src/index.md b/stdlib/CRC32c/docs/src/index.md
index 24a073d1e3938..c00a792232c70 100644
--- a/stdlib/CRC32c/docs/src/index.md
+++ b/stdlib/CRC32c/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/CRC32c/docs/src/index.md"
+```
+
 # CRC32c
 
 Standard library module for computing the CRC-32c checksum.
diff --git a/stdlib/CRC32c/src/CRC32c.jl b/stdlib/CRC32c/src/CRC32c.jl
index 35d2d4cb339d6..c3a2fc1394cce 100644
--- a/stdlib/CRC32c/src/CRC32c.jl
+++ b/stdlib/CRC32c/src/CRC32c.jl
@@ -8,6 +8,7 @@ See [`CRC32c.crc32c`](@ref) for more information.
 module CRC32c
 
 import Base.FastContiguousSubArray
+import Base: DenseUInt8OrInt8
 
 export crc32c
 
@@ -15,9 +16,9 @@ export crc32c
     crc32c(data, crc::UInt32=0x00000000)
 
 Compute the CRC-32c checksum of the given `data`, which can be
-an `Array{UInt8}`, a contiguous subarray thereof, or a `String`.  Optionally, you can pass
-a starting `crc` integer to be mixed in with the checksum.  The `crc` parameter
-can be used to compute a checksum on data divided into chunks: performing
+an `Array{UInt8}`, a contiguous subarray thereof, an `AbstractVector{UInt8}`, or a `String`.
+Optionally, you can pass a starting `crc` integer to be mixed in with the checksum.
+The `crc` parameter can be used to compute a checksum on data divided into chunks: performing
 `crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`.
 (Technically, a little-endian checksum is computed.)
 
@@ -29,13 +30,31 @@ calling [`take!`](@ref).
 
 For a `String`, note that the result is specific to the UTF-8 encoding
 (a different checksum would be obtained from a different Unicode encoding).
-To checksum an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`,
+To checksum an `a::AbstractArray` of some other bitstype without padding,
+you can do `crc32c(vec(reinterpret(UInt8,a)))`,
 but note that the result may be endian-dependent.
 """
 function crc32c end
 
+function crc32c(a::AbstractVector{UInt8}, crc::UInt32=0x00000000)
+    # use block size 24576=8192*3, since that is the threshold for
+    # 3-way parallel SIMD code in the underlying jl_crc32c C function.
+    last = lastindex(a)
+    nb = length(a)
+    buf = Memory{UInt8}(undef, Int(min(nb, 24576)))
+    while nb > 0
+        n = min(nb, 24576)
+        copyto!(buf, 1, a, last - nb + 1, n)
+        crc = Base.unsafe_crc32c(buf, n % Csize_t, crc)
+        nb -= n
+    end
+    return crc
+end
+
+function crc32c(a::DenseUInt8OrInt8, crc::UInt32=0x00000000)
+    Base._crc32c(a, crc)
+end
 
-crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) = Base._crc32c(a, crc)
 crc32c(s::Union{String, SubString{String}}, crc::UInt32=0x00000000) = Base._crc32c(s, crc)
 
 """
@@ -47,6 +66,5 @@ mixed with a starting `crc` integer.  If `nb` is not supplied, then
 """
 crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) = Base._crc32c(io, nb, crc)
 crc32c(io::IO, crc::UInt32=0x00000000) = Base._crc32c(io, crc)
-crc32c(io::IOStream, crc::UInt32=0x00000000) = Base._crc32c(io, crc)
 
 end
diff --git a/stdlib/CRC32c/test/runtests.jl b/stdlib/CRC32c/test/runtests.jl
index e9e933ee2451c..f4097a919c7bb 100644
--- a/stdlib/CRC32c/test/runtests.jl
+++ b/stdlib/CRC32c/test/runtests.jl
@@ -3,12 +3,23 @@
 using Test, Random
 using CRC32c
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")
+isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
+using .Main.OffsetArrays: Origin
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays: Fill
+
 function test_crc32c(crc32c)
     # CRC32c checksum (test data generated from @andrewcooke's CRC.jl package)
     for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)]
         s = String(UInt8[1:n;])
         ss = SubString(String(UInt8[0:(n+1);]), 2:(n+1))
         @test crc32c(UInt8[1:n;]) == crc == crc32c(s) == crc32c(ss)
+        @test crc == crc32c(UInt8(1):UInt8(n))
+        m = Memory{UInt8}(undef, n)
+        m .= 1:n
+        @test crc == crc32c(m)
     end
 
     # test that crc parameter is equivalent to checksum of concatenated data,
@@ -45,6 +56,30 @@ function test_crc32c(crc32c)
             rm(f, force=true)
         end
     end
+
+    # test longer arrays to cover all the code paths in crc32c.c
+    LONG = 8192 # from crc32c.c
+    SHORT = 256 # from crc32c.c
+    n = LONG*3+SHORT*3+SHORT*2+64+7
+    bigg = vcat(reinterpret(UInt8, hton.(0x74d7f887 .^ (1:n÷4))), UInt8[1:n%4;])
+    for (offset,crc) in [(0, 0x13a5ecd5), (1, 0xecf34b7e), (2, 0xfa71b596), (3, 0xbfd24745), (4, 0xf0cb3370), (5, 0xb0ec88b5), (6, 0x258c20a8), (7, 0xa9bd638d)]
+        @test crc == crc32c(@view bigg[1+offset:end])
+    end
+
+    # test crc of AbstractVector{UInt8}
+    @test crc32c(Origin(0)(b"hello")) == crc32c(b"hello")
+    weird_vectors = [
+        view(rand(UInt8, 300000), 1:2:300000),
+        vec(reinterpret(UInt8, collect(Int64(1):Int64(4)))),
+        vec(reinterpret(UInt8, Int64(1):Int64(4))),
+        view([0x01, 0x02], UInt(1):UInt(2)),
+        Fill(0x00, UInt(100)),
+        Fill(0x00, big(100)),
+        reinterpret(UInt8, BitVector((true, false, true, false))),
+    ]
+    for a in weird_vectors
+        @test crc32c(a) == crc32c(collect(a))
+    end
 end
 unsafe_crc32c_sw(a, n, crc) =
     ccall(:jl_crc32c_sw, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)
@@ -55,6 +90,8 @@ function crc32c_sw(s::Union{String, SubString{String}}, crc::UInt32=0x00000000)
     unsafe_crc32c_sw(s, sizeof(s), crc)
 end
 
+crc32c_sw(a::AbstractVector{UInt8}, crc::UInt32=0x00000000) =
+    crc32c_sw(copyto!(Vector{UInt8}(undef, length(a)), a))
 function crc32c_sw(io::IO, nb::Integer, crc::UInt32=0x00000000)
     nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
     buf = Vector{UInt8}(undef, min(nb, 24576))
@@ -68,3 +105,7 @@ end
 crc32c_sw(io::IO, crc::UInt32=0x00000000) = crc32c_sw(io, typemax(Int64), crc)
 test_crc32c(crc32c)
 test_crc32c(crc32c_sw)
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(CRC32c))
+end
diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml
index 4c7aa35a99730..2f8143a77d740 100644
--- a/stdlib/CompilerSupportLibraries_jll/Project.toml
+++ b/stdlib/CompilerSupportLibraries_jll/Project.toml
@@ -2,9 +2,9 @@ name = "CompilerSupportLibraries_jll"
 uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 
 # NOTE: When updating this, also make sure to update the value
-# `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable
+# `CSL_NEXT_GLIBCXX_VERSION` in `Make.inc`, to properly disable
 # automatic usage of BB-built CSLs on extremely up-to-date systems!
-version = "1.0.5+0"
+version = "1.3.0+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
index bd7a0571f9d5a..9a0729c50d01f 100644
--- a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
+++ b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
@@ -4,80 +4,172 @@
 
 baremodule CompilerSupportLibraries_jll
 using Base, Libdl, Base.BinaryPlatforms
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
-const PATH_list = String[]
-const LIBPATH_list = String[]
-
-export libgfortran, libstdcxx, libgomp
+export libgfortran, libstdcxx, libgomp, libatomic, libgcc_s
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libgfortran_handle::Ptr{Cvoid} = C_NULL
+
+libatomic_path::String = ""
+const libatomic = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libatomic-1.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libatomic.1.dylib")
+    elseif Sys.isfreebsd()
+        BundledLazyLibraryPath("libatomic.so.3")
+    elseif Sys.islinux()
+        BundledLazyLibraryPath("libatomic.so.1")
+    else
+        error("CompilerSupportLibraries_jll: Library 'libatomic' is not available for $(Sys.KERNEL)")
+    end
+)
+
+if Sys.iswindows() || Sys.isapple() || arch(HostPlatform()) ∈ ("x86_64", "i686")
+    global libquadmath_path::String = ""
+    const libquadmath = LazyLibrary(
+        if Sys.iswindows()
+            BundledLazyLibraryPath("libquadmath-0.dll")
+        elseif Sys.isapple()
+            BundledLazyLibraryPath("libquadmath.0.dylib")
+        elseif (Sys.islinux() || Sys.isfreebsd()) && arch(HostPlatform()) ∈ ("x86_64", "i686")
+            BundledLazyLibraryPath("libquadmath.so.0")
+        else
+            error("CompilerSupportLibraries_jll: Library 'libquadmath' is not available for $(Sys.KERNEL)")
+        end
+    )
+end
+
+libgcc_s_path::String = ""
+const libgcc_s = LazyLibrary(
+    if Sys.iswindows()
+        if arch(HostPlatform()) == "x86_64"
+            BundledLazyLibraryPath("libgcc_s_seh-1.dll")
+        else
+            BundledLazyLibraryPath("libgcc_s_sjlj-1.dll")
+        end
+    elseif Sys.isapple()
+        if arch(HostPlatform()) == "aarch64" || libgfortran_version(HostPlatform()) == v"5"
+            BundledLazyLibraryPath("libgcc_s.1.1.dylib")
+        else
+            BundledLazyLibraryPath("libgcc_s.1.dylib")
+        end
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libgcc_s.so.1")
+    else
+        error("CompilerSupportLibraries_jll: Library 'libgcc_s' is not available for $(Sys.KERNEL)")
+    end
+)
+
 libgfortran_path::String = ""
-libstdcxx_handle::Ptr{Cvoid} = C_NULL
+const libgfortran = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath(string("libgfortran-", libgfortran_version(HostPlatform()).major, ".dll"))
+    elseif Sys.isapple()
+        BundledLazyLibraryPath(string("libgfortran.", libgfortran_version(HostPlatform()).major, ".dylib"))
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath(string("libgfortran.so.", libgfortran_version(HostPlatform()).major))
+    else
+        error("CompilerSupportLibraries_jll: Library 'libgfortran' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = @static if @isdefined(libquadmath)
+        LazyLibrary[libgcc_s, libquadmath]
+    else
+        LazyLibrary[libgcc_s]
+    end
+)
+
 libstdcxx_path::String = ""
-libgomp_handle::Ptr{Cvoid} = C_NULL
-libgomp_path::String = ""
+const libstdcxx = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libstdc++-6.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libstdc++.6.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libstdc++.so.6")
+    else
+        error("CompilerSupportLibraries_jll: Library 'libstdcxx' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = LazyLibrary[libgcc_s]
+)
 
-if Sys.iswindows()
-    if arch(HostPlatform()) == "x86_64"
-        const libgcc_s = "libgcc_s_seh-1.dll"
+libgomp_path::String = ""
+const libgomp = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libgomp-1.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libgomp.1.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libgomp.so.1")
     else
-        const libgcc_s = "libgcc_s_sjlj-1.dll"
-    end
-    const libgfortran = string("libgfortran-", libgfortran_version(HostPlatform()).major, ".dll")
-    const libstdcxx = "libstdc++-6.dll"
-    const libgomp = "libgomp-1.dll"
-    const libssp = "libssp-0.dll"
-elseif Sys.isapple()
-    if arch(HostPlatform()) == "aarch64" || libgfortran_version(HostPlatform()) == v"5"
-        const libgcc_s = "@rpath/libgcc_s.1.1.dylib"
+        error("CompilerSupportLibraries_jll: Library 'libgomp' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows()
+        LazyLibrary[libgcc_s]
     else
-        const libgcc_s = "@rpath/libgcc_s.1.dylib"
+        LazyLibrary[]
+    end
+)
+
+# only define if isfile
+let
+    if Sys.iswindows() || Sys.isapple() || libc(HostPlatform()) != "musl"
+        _libssp_path = if Sys.iswindows()
+            BundledLazyLibraryPath("libssp-0.dll")
+        elseif Sys.isapple()
+            BundledLazyLibraryPath("libssp.0.dylib")
+        elseif Sys.islinux() && libc(HostPlatform()) != "musl"
+            BundledLazyLibraryPath("libssp.so.0")
+        end
+        if isfile(string(_libssp_path))
+            global libssp_path::String = ""
+            @eval const libssp = LazyLibrary($(_libssp_path))
+        end
     end
-    const libgfortran = string("@rpath/", "libgfortran.", libgfortran_version(HostPlatform()).major, ".dylib")
-    const libstdcxx = "@rpath/libstdc++.6.dylib"
-    const libgomp = "@rpath/libgomp.1.dylib"
-    const libssp = "@rpath/libssp.0.dylib"
-else
-    const libgcc_s = "libgcc_s.so.1"
-    const libgfortran = string("libgfortran.so.", libgfortran_version(HostPlatform()).major)
-    const libstdcxx = "libstdc++.so.6"
-    const libgomp = "libgomp.so.1"
-    if libc(HostPlatform()) != "musl"
-        const libssp = "libssp.so.0"
+end
+
+# Conform to LazyJLLWrappers API
+function eager_mode()
+    if @isdefined(libatomic)
+        dlopen(libatomic)
+    end
+    dlopen(libgcc_s)
+    dlopen(libgomp)
+    if @isdefined libquadmath
+        dlopen(libquadmath)
+    end
+    if @isdefined libssp
+        dlopen(libssp)
     end
+    dlopen(libgfortran)
+    dlopen(libstdcxx)
 end
+is_available() = true
 
 function __init__()
-    global libgcc_s_handle = dlopen(libgcc_s)
-    global libgcc_s_path = dlpath(libgcc_s_handle)
-    global libgfortran_handle = dlopen(libgfortran)
-    global libgfortran_path = dlpath(libgfortran_handle)
-    global libstdcxx_handle = dlopen(libstdcxx)
-    global libstdcxx_path = dlpath(libstdcxx_handle)
-    global libgomp_handle = dlopen(libgomp)
-    global libgomp_path = dlpath(libgomp_handle)
-    @static if libc(HostPlatform()) != "musl"
-        dlopen(libssp; throw_error = false)
+    global libatomic_path = string(libatomic.path)
+    global libgcc_s_path = string(libgcc_s.path)
+    global libgomp_path = string(libgomp.path)
+    if @isdefined libquadmath_path
+        global libquadmath_path = string(libquadmath.path)
+    end
+    if @isdefined libssp_path
+        global libssp_path = string(libssp.path)
     end
+    global libgfortran_path = string(libgfortran.path)
+    global libstdcxx_path = string(libstdcxx.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libgcc_s_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libgfortran_path() = libgfortran_path
-get_libstdcxx_path() = libstdcxx_path
-get_libgomp_path() = libgomp_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module CompilerSupportLibraries_jll
diff --git a/stdlib/Dates/Project.toml b/stdlib/Dates/Project.toml
index fe225055bad98..45da6ad1a0152 100644
--- a/stdlib/Dates/Project.toml
+++ b/stdlib/Dates/Project.toml
@@ -1,5 +1,6 @@
 name = "Dates"
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
 
 [deps]
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
diff --git a/stdlib/Dates/docs/src/index.md b/stdlib/Dates/docs/src/index.md
index aa46f7b827f10..38b4f7ae86d29 100644
--- a/stdlib/Dates/docs/src/index.md
+++ b/stdlib/Dates/docs/src/index.md
@@ -1,7 +1,3 @@
-```@meta
-EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Dates/docs/src/index.md"
-```
-
 # Dates
 
 ```@meta
@@ -22,7 +18,7 @@ represents a continuously increasing machine timeline based on the UT second [^1
 [`DateTime`](@ref) type is not aware of time zones (*naive*, in Python parlance),
 analogous to a *LocalDateTime* in Java 8. Additional time zone functionality
 can be added through the [TimeZones.jl package](https://github.com/JuliaTime/TimeZones.jl/), which
-compiles the [IANA time zone database](http://www.iana.org/time-zones). Both [`Date`](@ref) and
+compiles the [IANA time zone database](https://www.iana.org/time-zones). Both [`Date`](@ref) and
 [`DateTime`](@ref) are based on the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) standard, which follows the proleptic Gregorian calendar.
 One note is that the ISO 8601 standard is particular about BC/BCE dates. In general, the last
 day of the BC/BCE era, 1-12-31 BC/BCE, was followed by 1-1-1 AD/CE, thus no year zero exists.
@@ -97,7 +93,7 @@ parser know which periods to parse in each slot.
 
 As in the case of constructors above such as `Date(2013)`, delimited `DateFormat`s allow for
 missing parts of dates and times so long as the preceding parts are given. The other parts are given the usual
-default values.  For example, `Date("1981-03", dateformat"y-m-d")` returns `1981-03-01`, whilst
+default values. For example, `Date("1981-03", dateformat"y-m-d")` returns `1981-03-01`, whilst
 `Date("31/12", dateformat"d/m/y")` gives `0001-12-31`.  (Note that the default year is
 1 AD/CE.)
 An empty string, however, always throws an `ArgumentError`.
@@ -343,12 +339,12 @@ First the mapping is loaded into the `LOCALES` variable:
 julia> french_months = ["janvier", "février", "mars", "avril", "mai", "juin",
                         "juillet", "août", "septembre", "octobre", "novembre", "décembre"];
 
-julia> french_monts_abbrev = ["janv","févr","mars","avril","mai","juin",
+julia> french_months_abbrev = ["janv","févr","mars","avril","mai","juin",
                               "juil","août","sept","oct","nov","déc"];
 
 julia> french_days = ["lundi","mardi","mercredi","jeudi","vendredi","samedi","dimanche"];
 
-julia> Dates.LOCALES["french"] = Dates.DateLocale(french_months, french_monts_abbrev, french_days, [""]);
+julia> Dates.LOCALES["french"] = Dates.DateLocale(french_months, french_months_abbrev, french_days, [""]);
 ```
 
  The above mentioned functions can then be used to perform the queries:
@@ -549,7 +545,7 @@ it could represent, in days, a value of 28, 29, 30, or 31 depending on the year
 Or a year could represent 365 or 366 days in the case of a leap year. [`Period`](@ref) types are
 simple [`Int64`](@ref) wrappers and are constructed by wrapping any `Int64` convertible type, i.e. `Year(1)`
 or `Month(3.0)`. Arithmetic between [`Period`](@ref) of the same type behave like integers, and
-limited `Period-Real` arithmetic is available.  You can extract the underlying integer with
+limited `Period-Real` arithmetic is available. You can extract the underlying integer with
 [`Dates.value`](@ref).
 
 ```jldoctest
@@ -688,9 +684,9 @@ value in the days field is uncertain.
 See the [API reference](@ref stdlib-dates-api) for additional information
 on methods exported from the `Dates` module.
 
-# [API reference](@id stdlib-dates-api)
+## [API reference](@id stdlib-dates-api)
 
-## Dates and Time Types
+### Dates and Time Types
 
 ```@docs
 Dates.Period
@@ -705,7 +701,7 @@ Dates.TimeZone
 Dates.UTC
 ```
 
-## Dates Functions
+### Dates Functions
 
 ```@docs
 Dates.DateTime(::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64)
@@ -734,7 +730,7 @@ Dates.now(::Type{Dates.UTC})
 Base.eps(::Union{Type{DateTime}, Type{Date}, Type{Time}, TimeType})
 ```
 
-### Accessor Functions
+#### Accessor Functions
 
 ```@docs
 Dates.year
@@ -762,7 +758,7 @@ Dates.monthday
 Dates.yearmonthday
 ```
 
-### Query Functions
+#### Query Functions
 
 ```@docs
 Dates.dayname
@@ -781,7 +777,7 @@ Dates.quarterofyear
 Dates.dayofquarter
 ```
 
-### Adjuster Functions
+#### Adjuster Functions
 
 ```@docs
 Base.trunc(::Dates.TimeType, ::Type{Dates.Period})
@@ -801,7 +797,7 @@ Dates.tonext(::Function, ::Dates.TimeType)
 Dates.toprev(::Function, ::Dates.TimeType)
 ```
 
-### Periods
+#### Periods
 
 ```@docs
 Dates.Period(::Any)
@@ -812,7 +808,7 @@ Dates.default
 Dates.periods
 ```
 
-### Rounding Functions
+#### Rounding Functions
 
 `Date` and `DateTime` values can be rounded to a specified resolution (e.g., 1 month or 15 minutes)
 with `floor`, `ceil`, or `round`.
@@ -841,7 +837,7 @@ Dates.date2epochdays
 Dates.datetime2epochms
 ```
 
-### Conversion Functions
+#### Conversion Functions
 
 ```@docs
 Dates.today
diff --git a/stdlib/Dates/src/Dates.jl b/stdlib/Dates/src/Dates.jl
index a111ea24089c4..763ffa1dde798 100644
--- a/stdlib/Dates/src/Dates.jl
+++ b/stdlib/Dates/src/Dates.jl
@@ -5,7 +5,7 @@
 
 The `Dates` module provides `Date`, `DateTime`, `Time` types, and related functions.
 
-The types are not aware of time zones, based on UT seconds
+The types are not aware of time zones, are based on UT seconds
 (86400 seconds a day, avoiding leap seconds), and
 use the proleptic Gregorian calendar, as specified in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601).
 For time zone functionality, see the TimeZones.jl package.
@@ -32,7 +32,7 @@ for more information.
 """
 module Dates
 
-import Base: ==, isless, div, fld, mod, rem, gcd, lcm, +, -, *, /, %, broadcast
+import Base: ==, isless, div, fld, mod, rem, gcd, lcm, +, -, *, /, %
 using Printf: @sprintf
 
 using Base.Iterators
@@ -60,6 +60,7 @@ export Period, DatePeriod, TimePeriod,
        yearmonthday, yearmonth, monthday, year, month, week, day,
        hour, minute, second, millisecond, dayofmonth,
        microsecond, nanosecond,
+       isoweekdate, isoyear, weeksinyear,
        # query.jl
        dayofweek, isleapyear, daysinmonth, daysinyear, dayofyear, dayname, dayabbr,
        dayofweekofmonth, daysofweekinmonth, monthname, monthabbr,
@@ -77,8 +78,10 @@ export Period, DatePeriod, TimePeriod,
        firstdayofmonth, lastdayofmonth,
        firstdayofyear, lastdayofyear,
        firstdayofquarter, lastdayofquarter,
-       adjust, tonext, toprev, tofirst, tolast,
+       tonext, toprev, tofirst, tolast,
        # io.jl
        ISODateTimeFormat, ISODateFormat, ISOTimeFormat, DateFormat, RFC1123Format, @dateformat_str
 
+public format
+
 end # module
diff --git a/stdlib/Dates/src/accessors.jl b/stdlib/Dates/src/accessors.jl
index 05e9017303ef1..6199acc695bd8 100644
--- a/stdlib/Dates/src/accessors.jl
+++ b/stdlib/Dates/src/accessors.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Convert # of Rata Die days to proleptic Gregorian calendar y,m,d,w
-# Reference: http://mysite.verizon.net/aesir_research/date/date0.htm
+# Reference: https://www.researchgate.net/profile/Peter-Baum/publication/316558298_Date_Algorithms/links/5f90c3f992851c14bcdb0da6/Date-Algorithms.pdf
 function yearmonthday(days)
     z = days + 306; h = 100z - 25; a = fld(h, 3652425); b = a - fld(a, 4)
     y = fld(100b + h, 36525); c = b + z - 365y - fld(y, 4); m = div(5c + 456, 153)
@@ -32,6 +32,8 @@ function day(days)
     y = fld(100b + h, 36525); c = b + z - 365y - fld(y, 4); m = div(5c + 456, 153)
     return c - div(153m - 457, 5)
 end
+
+# ISO year utils
 # https://en.wikipedia.org/wiki/Talk:ISO_week_date#Algorithms
 const WEEK_INDEX = (15, 23, 3, 11)
 function week(days)
@@ -41,6 +43,80 @@ function week(days)
     return div(w, 28) + 1
 end
 
+"""
+Return the number of ISO weeks in the given year (see https://en.wikipedia.org/wiki/ISO_week_date).
+
+# Examples
+```jldoctest
+julia> weeksinyear(Year(2022))
+52
+
+julia> weeksinyear(Year(2020))
+53
+```
+!!! compat "Julia 1.13"
+    This function requires Julia 1.13 or later.
+"""
+function weeksinyear(y::Year)
+    firstday = firstdayofyear(Date(y))
+    lastday = lastdayofyear(Date(y))
+
+    if dayofweek(firstday) == 4 || dayofweek(lastday) == 4
+        return 53
+    end
+    return 52
+end
+
+"""
+Return the ISO year that contains `dt` (see https://en.wikipedia.org/wiki/ISO_week_date).
+
+# Examples
+```jldoctest
+julia> isoyear(Date(2022, 1, 1))
+2021 years
+
+julia> isoyear(Date(2021, 12, 31))
+2021 years
+```
+!!! compat "Julia 1.13"
+    This function requires Julia 1.13 or later.
+"""
+function isoyear(dt::DateTime)
+    thisyear = Year(dt)
+    thismonth = Month(dt)
+    weeknumber = week(dt)
+    if weeknumber >= 52 && thismonth.value == 1
+        # If it is january, then its the iso year from before
+        return Year(thisyear.value - 1)
+    elseif weeknumber == 1 && thismonth.value == 12
+        # If it is december, then its the next year
+        return Year(thisyear.value + 1)
+    else
+        return thisyear
+    end
+end
+isoyear(dt::Date) = isoyear(DateTime(dt))
+
+"""
+Return the ISO week date that corresponds to `dt` (see
+https://en.wikipedia.org/wiki/ISO_week_date).
+
+The return type is a tuple of `Year`, `Week` and `Int64` (from 1 to 7).
+
+# Examples
+```jldoctest
+julia> isoweekdate(Date(2023, 03, 06))
+(2023, 10, 1)
+
+julia> isoweekdate(Date(2023, 01, 01))
+(2022, 52, 7)
+```
+!!! compat "Julia 1.13"
+    This function requires Julia 1.13 or later.
+"""
+isoweekdate(dt::DateTime) = (isoyear(dt).value, week(dt), dayofweek(dt))
+isoweekdate(dt::Date) = isoweekdate(DateTime(dt))
+
 function quarter(days)
     m = month(days)
     return m < 4 ? 1 : m < 7 ? 2 : m < 10 ? 3 : 4
@@ -79,7 +155,7 @@ for func in (:year, :month, :quarter)
     name = string(func)
     @eval begin
         @doc """
-            $($name)(dt::TimeType) -> Int64
+            $($name)(dt::TimeType)::Int64
 
         The $($name) of a `Date` or `DateTime` as an [`Int64`](@ref).
         """ $func(dt::TimeType)
@@ -87,7 +163,7 @@ for func in (:year, :month, :quarter)
 end
 
 """
-    week(dt::TimeType) -> Int64
+    week(dt::TimeType)::Int64
 
 Return the [ISO week date](https://en.wikipedia.org/wiki/ISO_week_date) of a `Date` or
 `DateTime` as an [`Int64`](@ref). Note that the first week of a year is the week that
@@ -113,7 +189,7 @@ for func in (:day, :dayofmonth)
     name = string(func)
     @eval begin
         @doc """
-            $($name)(dt::TimeType) -> Int64
+            $($name)(dt::TimeType)::Int64
 
         The day of month of a `Date` or `DateTime` as an [`Int64`](@ref).
         """ $func(dt::TimeType)
@@ -121,7 +197,7 @@ for func in (:day, :dayofmonth)
 end
 
 """
-    hour(dt::DateTime) -> Int64
+    hour(dt::DateTime)::Int64
 
 The hour of day of a `DateTime` as an [`Int64`](@ref).
 """
@@ -131,7 +207,7 @@ for func in (:minute, :second, :millisecond)
     name = string(func)
     @eval begin
         @doc """
-            $($name)(dt::DateTime) -> Int64
+            $($name)(dt::DateTime)::Int64
 
         The $($name) of a `DateTime` as an [`Int64`](@ref).
         """ $func(dt::DateTime)
@@ -155,7 +231,7 @@ for func in (:hour, :minute, :second, :millisecond, :microsecond, :nanosecond)
     name = string(func)
     @eval begin
         @doc """
-            $($name)(t::Time) -> Int64
+            $($name)(t::Time)::Int64
 
         The $($name) of a `Time` as an [`Int64`](@ref).
         """ $func(t::Time)
diff --git a/stdlib/Dates/src/adjusters.jl b/stdlib/Dates/src/adjusters.jl
index 245e2678a9d77..745515b003ce8 100644
--- a/stdlib/Dates/src/adjusters.jl
+++ b/stdlib/Dates/src/adjusters.jl
@@ -23,7 +23,7 @@ Base.trunc(t::Time, p::Type{Microsecond}) = t - Nanosecond(t)
 Base.trunc(t::Time, p::Type{Nanosecond})  = t
 
 """
-    trunc(dt::TimeType, ::Type{Period}) -> TimeType
+    trunc(dt::TimeType, ::Type{Period})::TimeType
 
 Truncates the value of `dt` according to the provided `Period` type.
 
@@ -37,7 +37,7 @@ Dates.trunc(::Dates.TimeType, ::Type{Dates.Period})
 
 # Adjusters
 """
-    firstdayofweek(dt::TimeType) -> TimeType
+    firstdayofweek(dt::TimeType)::TimeType
 
 Adjusts `dt` to the Monday of its week.
 
@@ -53,7 +53,7 @@ firstdayofweek(dt::Date) = Date(UTD(value(dt) - dayofweek(dt) + 1))
 firstdayofweek(dt::DateTime) = DateTime(firstdayofweek(Date(dt)))
 
 """
-    lastdayofweek(dt::TimeType) -> TimeType
+    lastdayofweek(dt::TimeType)::TimeType
 
 Adjusts `dt` to the Sunday of its week.
 
@@ -69,7 +69,7 @@ lastdayofweek(dt::Date) = Date(UTD(value(dt) + (7 - dayofweek(dt))))
 lastdayofweek(dt::DateTime) = DateTime(lastdayofweek(Date(dt)))
 
 """
-    firstdayofmonth(dt::TimeType) -> TimeType
+    firstdayofmonth(dt::TimeType)::TimeType
 
 Adjusts `dt` to the first day of its month.
 
@@ -85,7 +85,7 @@ firstdayofmonth(dt::Date) = Date(UTD(value(dt) - day(dt) + 1))
 firstdayofmonth(dt::DateTime) = DateTime(firstdayofmonth(Date(dt)))
 
 """
-    lastdayofmonth(dt::TimeType) -> TimeType
+    lastdayofmonth(dt::TimeType)::TimeType
 
 Adjusts `dt` to the last day of its month.
 
@@ -104,7 +104,7 @@ end
 lastdayofmonth(dt::DateTime) = DateTime(lastdayofmonth(Date(dt)))
 
 """
-    firstdayofyear(dt::TimeType) -> TimeType
+    firstdayofyear(dt::TimeType)::TimeType
 
 Adjusts `dt` to the first day of its year.
 
@@ -120,7 +120,7 @@ firstdayofyear(dt::Date) = Date(UTD(value(dt) - dayofyear(dt) + 1))
 firstdayofyear(dt::DateTime) = DateTime(firstdayofyear(Date(dt)))
 
 """
-    lastdayofyear(dt::TimeType) -> TimeType
+    lastdayofyear(dt::TimeType)::TimeType
 
 Adjusts `dt` to the last day of its year.
 
@@ -139,7 +139,7 @@ end
 lastdayofyear(dt::DateTime) = DateTime(lastdayofyear(Date(dt)))
 
 """
-    firstdayofquarter(dt::TimeType) -> TimeType
+    firstdayofquarter(dt::TimeType)::TimeType
 
 Adjusts `dt` to the first day of its quarter.
 
@@ -162,7 +162,7 @@ end
 firstdayofquarter(dt::DateTime) = DateTime(firstdayofquarter(Date(dt)))
 
 """
-    lastdayofquarter(dt::TimeType) -> TimeType
+    lastdayofquarter(dt::TimeType)::TimeType
 
 Adjusts `dt` to the last day of its quarter.
 
@@ -204,6 +204,41 @@ function adjust(df::DateFunction, start, step, limit)
     throw(ArgumentError("Adjustment limit reached: $limit iterations"))
 end
 
+"""
+    adjust(df, start[, step, limit])::TimeType
+    adjust(df, start)::TimeType
+
+Adjusts the date in `start` until the `f::Function` passed using `df` returns `true`.
+The optional `step` parameter dictates the change in `start` on every iteration.
+If `limit` iterations occur, then an [`ArgumentError`](@ref) is thrown.
+
+The default values for parameters `start` and `limit` are 1 Day and 10,000 respectively.
+
+# Examples
+```jldoctest
+julia> Dates.adjust(date -> month(date) == 10, Date(2022, 1, 1), step=Month(3), limit=10)
+2022-10-01
+
+julia> Dates.adjust(date -> year(date) == 2025, Date(2022, 1, 1), step=Year(1), limit=4)
+2025-01-01
+
+julia> Dates.adjust(date -> day(date) == 15, Date(2022, 1, 1), step=Year(1), limit=3)
+ERROR: ArgumentError: Adjustment limit reached: 3 iterations
+Stacktrace:
+[...]
+
+julia> Dates.adjust(date -> month(date) == 10, Date(2022, 1, 1))
+2022-10-01
+
+julia> Dates.adjust(date -> year(date) == 2025, Date(2022, 1, 1))
+2025-01-01
+
+julia> Dates.adjust(date -> year(date) == 2224, Date(2022, 1, 1))
+ERROR: ArgumentError: Adjustment limit reached: 10000 iterations
+Stacktrace:
+[...]
+```
+"""
 function adjust(func::Function, start; step::Period=Day(1), limit::Int=10000)
     return adjust(DateFunction(func, start), start, step, limit)
 end
@@ -211,7 +246,7 @@ end
 # Constructors using DateFunctions
 
 """
-    Date(f::Function, y[, m, d]; step=Day(1), limit=10000) -> Date
+    Date(f::Function, y[, m, d]; step=Day(1), limit=10000)::Date
 
 Create a `Date` through the adjuster API. The starting point will be constructed from the
 provided `y, m, d` arguments, and will be adjusted until `f::Function` returns `true`.
@@ -238,7 +273,7 @@ function Date(func::Function, y, m=1, d=1; step::Period=Day(1), limit::Int=10000
 end
 
 """
-    DateTime(f::Function, y[, m, d, h, mi, s]; step=Day(1), limit=10000) -> DateTime
+    DateTime(f::Function, y[, m, d, h, mi, s]; step=Day(1), limit=10000)::DateTime
 
 Create a `DateTime` through the adjuster API. The starting point will be constructed from
 the provided `y, m, d...` arguments, and will be adjusted until `f::Function` returns
@@ -329,7 +364,7 @@ ISDAYOFWEEK = Dict(Mon => DateFunction(ismonday, Date(0)),
 
 # "same" indicates whether the current date can be considered or not
 """
-    tonext(dt::TimeType, dow::Int; same::Bool=false) -> TimeType
+    tonext(dt::TimeType, dow::Int; same::Bool=false)::TimeType
 
 Adjusts `dt` to the next day of week corresponding to `dow` with `1 = Monday, 2 = Tuesday,
 etc`. Setting `same=true` allows the current `dt` to be considered as the next `dow`,
@@ -339,7 +374,7 @@ tonext(dt::TimeType, dow::Int; same::Bool=false) = adjust(ISDAYOFWEEK[dow], same
 
 # Return the next TimeType where func evals true using step in incrementing
 """
-    tonext(func::Function, dt::TimeType; step=Day(1), limit=10000, same=false) -> TimeType
+    tonext(func::Function, dt::TimeType; step=Day(1), limit=10000, same=false)::TimeType
 
 Adjusts `dt` by iterating at most `limit` iterations by `step` increments until `func`
 returns `true`. `func` must take a single `TimeType` argument and return a [`Bool`](@ref).
@@ -350,7 +385,7 @@ function tonext(func::Function, dt::TimeType; step::Period=Day(1), limit::Int=10
 end
 
 """
-    toprev(dt::TimeType, dow::Int; same::Bool=false) -> TimeType
+    toprev(dt::TimeType, dow::Int; same::Bool=false)::TimeType
 
 Adjusts `dt` to the previous day of week corresponding to `dow` with `1 = Monday, 2 =
 Tuesday, etc`. Setting `same=true` allows the current `dt` to be considered as the previous
@@ -359,7 +394,7 @@ Tuesday, etc`. Setting `same=true` allows the current `dt` to be considered as t
 toprev(dt::TimeType, dow::Int; same::Bool=false) = adjust(ISDAYOFWEEK[dow], same ? dt : dt + Day(-1), Day(-1), 7)
 
 """
-    toprev(func::Function, dt::TimeType; step=Day(-1), limit=10000, same=false) -> TimeType
+    toprev(func::Function, dt::TimeType; step=Day(-1), limit=10000, same=false)::TimeType
 
 Adjusts `dt` by iterating at most `limit` iterations by `step` increments until `func`
 returns `true`. `func` must take a single `TimeType` argument and return a [`Bool`](@ref).
@@ -371,7 +406,7 @@ end
 
 # Return the first TimeType that falls on dow in the Month or Year
 """
-    tofirst(dt::TimeType, dow::Int; of=Month) -> TimeType
+    tofirst(dt::TimeType, dow::Int; of=Month)::TimeType
 
 Adjusts `dt` to the first `dow` of its month. Alternatively, `of=Year` will adjust to the
 first `dow` of the year.
@@ -383,7 +418,7 @@ end
 
 # Return the last TimeType that falls on dow in the Month or Year
 """
-    tolast(dt::TimeType, dow::Int; of=Month) -> TimeType
+    tolast(dt::TimeType, dow::Int; of=Month)::TimeType
 
 Adjusts `dt` to the last `dow` of its month. Alternatively, `of=Year` will adjust to the
 last `dow` of the year.
diff --git a/stdlib/Dates/src/arithmetic.jl b/stdlib/Dates/src/arithmetic.jl
index a847f749d0154..83a2873b43409 100644
--- a/stdlib/Dates/src/arithmetic.jl
+++ b/stdlib/Dates/src/arithmetic.jl
@@ -7,7 +7,8 @@
 # TimeType arithmetic
 (+)(x::TimeType) = x
 (-)(x::T, y::T) where {T<:TimeType} = x.instant - y.instant
-(-)(x::TimeType, y::TimeType) = -(promote(x, y)...)
+(-)(x::T, y::T) where {T<:AbstractDateTime} = x.instant - y.instant
+(-)(x::AbstractDateTime, y::AbstractDateTime) = -(promote(x, y)...)
 
 # Date-Time arithmetic
 """
diff --git a/stdlib/Dates/src/conversions.jl b/stdlib/Dates/src/conversions.jl
index 30f1f2581d1fa..65df4c06b64db 100644
--- a/stdlib/Dates/src/conversions.jl
+++ b/stdlib/Dates/src/conversions.jl
@@ -3,7 +3,7 @@
 # Conversion/Promotion
 
 """
-    Date(dt::DateTime) -> Date
+    Date(dt::DateTime)
 
 Convert a `DateTime` to a `Date`. The hour, minute, second, and millisecond parts of
 the `DateTime` are truncated, so only the year, month and day parts are used in
@@ -12,7 +12,7 @@ construction.
 Date(dt::TimeType) = convert(Date, dt)
 
 """
-    DateTime(dt::Date) -> DateTime
+    DateTime(dt::Date)
 
 Convert a `Date` to a `DateTime`. The hour, minute, second, and millisecond parts of
 the new `DateTime` are assumed to be zero.
@@ -20,7 +20,7 @@ the new `DateTime` are assumed to be zero.
 DateTime(dt::TimeType) = convert(DateTime, dt)
 
 """
-    Time(dt::DateTime) -> Time
+    Time(dt::DateTime)
 
 Convert a `DateTime` to a `Time`. The hour, minute, second, and millisecond parts of
 the `DateTime` are used to create the new `Time`. Microsecond and nanoseconds are zero by default.
@@ -40,7 +40,7 @@ Base.convert(::Type{Day},dt::Date) = Day(value(dt))            # Converts Date t
 const UNIXEPOCH = value(DateTime(1970)) #Rata Die milliseconds for 1970-01-01T00:00:00
 
 """
-    unix2datetime(x) -> DateTime
+    unix2datetime(x)::DateTime
 
 Take the number of seconds since unix epoch `1970-01-01T00:00:00` and convert to the
 corresponding `DateTime`.
@@ -52,7 +52,7 @@ function unix2datetime(x)
 end
 
 """
-    datetime2unix(dt::DateTime) -> Float64
+    datetime2unix(dt::DateTime)::Float64
 
 Take the given `DateTime` and return the number of seconds
 since the unix epoch `1970-01-01T00:00:00` as a [`Float64`](@ref).
@@ -60,7 +60,7 @@ since the unix epoch `1970-01-01T00:00:00` as a [`Float64`](@ref).
 datetime2unix(dt::DateTime) = (value(dt) - UNIXEPOCH) / 1000.0
 
 """
-    now() -> DateTime
+    now()::DateTime
 
 Return a `DateTime` corresponding to the user's system time including the system timezone
 locale.
@@ -72,20 +72,20 @@ function now()
 end
 
 """
-    today() -> Date
+    today()::Date
 
 Return the date portion of `now()`.
 """
 today() = Date(now())
 
 """
-    now(::Type{UTC}) -> DateTime
+    now(::Type{UTC})::DateTime
 
 Return a `DateTime` corresponding to the user's system time as UTC/GMT.
 For other time zones, see the TimeZones.jl package.
 
-# Example
-```julia
+# Examples
+```jldoctest; filter = r"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d{3})?" => "2023-01-04T10:52:24.864"
 julia> now(UTC)
 2023-01-04T10:52:24.864
 ```
@@ -93,7 +93,7 @@ julia> now(UTC)
 now(::Type{UTC}) = unix2datetime(time())
 
 """
-    rata2datetime(days) -> DateTime
+    rata2datetime(days)::DateTime
 
 Take the number of Rata Die days since epoch `0000-12-31T00:00:00` and return the
 corresponding `DateTime`.
@@ -101,7 +101,7 @@ corresponding `DateTime`.
 rata2datetime(days) = DateTime(yearmonthday(days)...)
 
 """
-    datetime2rata(dt::TimeType) -> Int64
+    datetime2rata(dt::TimeType)::Int64
 
 Return the number of Rata Die days since epoch from the given `Date` or `DateTime`.
 """
@@ -111,7 +111,7 @@ datetime2rata(dt::TimeType) = days(dt)
 const JULIANEPOCH = value(DateTime(-4713, 11, 24, 12))
 
 """
-    julian2datetime(julian_days) -> DateTime
+    julian2datetime(julian_days)::DateTime
 
 Take the number of Julian calendar days since epoch `-4713-11-24T12:00:00` and return the
 corresponding `DateTime`.
@@ -122,7 +122,7 @@ function julian2datetime(f)
 end
 
 """
-    datetime2julian(dt::DateTime) -> Float64
+    datetime2julian(dt::DateTime)::Float64
 
 Take the given `DateTime` and return the number of Julian calendar days since the julian
 epoch `-4713-11-24T12:00:00` as a [`Float64`](@ref).
diff --git a/stdlib/Dates/src/io.jl b/stdlib/Dates/src/io.jl
index 257e86064c2fb..76b3c8d4e0dfc 100644
--- a/stdlib/Dates/src/io.jl
+++ b/stdlib/Dates/src/io.jl
@@ -111,7 +111,25 @@ end
 
 ### Parse tokens
 
-for c in "yYmdHIMS"
+for c in "yY"
+    @eval begin
+        @inline function tryparsenext(d::DatePart{$c}, str, i, len)
+            val = tryparsenext_sign(str, i, len)
+            if val !== nothing
+                coefficient, i = val
+            else
+                coefficient = 1
+            end
+            # The sign character does not affect fixed length `DatePart`s
+            val = tryparsenext_base10(str, i, len, min_width(d), max_width(d))
+            val === nothing && return nothing
+            y, ii = val
+            return y * coefficient, ii
+        end
+    end
+end
+
+for c in "mdHIMS"
     @eval begin
         @inline function tryparsenext(d::DatePart{$c}, str, i, len)
             return tryparsenext_base10(str, i, len, min_width(d), max_width(d))
@@ -350,7 +368,7 @@ const DATEFORMAT_REGEX_HASH = Ref(hash(keys(CONVERSION_SPECIFIERS)))
 const DATEFORMAT_REGEX_CACHE = Ref(compute_dateformat_regex(CONVERSION_SPECIFIERS))
 
 """
-    DateFormat(format::AbstractString, locale="english") -> DateFormat
+    DateFormat(format::AbstractString, locale="english")
 
 Construct a date formatting object that can be used for parsing date strings or
 formatting a date object as a string. The following character codes can be used to construct the `format`
@@ -460,7 +478,7 @@ but creates the DateFormat object once during macro expansion.
 
 See [`DateFormat`](@ref) for details about format specifiers.
 """
-macro dateformat_str(str)
+macro dateformat_str(str::String)
     DateFormat(str)
 end
 
@@ -472,7 +490,7 @@ end
 Describes the ISO8601 formatting for a date and time. This is the default value for `Dates.format`
 of a `DateTime`.
 
-# Example
+# Examples
 ```jldoctest
 julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), ISODateTimeFormat)
 "2018-08-08T12:00:43.001"
@@ -486,7 +504,7 @@ default_format(::Type{DateTime}) = ISODateTimeFormat
 
 Describes the ISO8601 formatting for a date. This is the default value for `Dates.format` of a `Date`.
 
-# Example
+# Examples
 ```jldoctest
 julia> Dates.format(Date(2018, 8, 8), ISODateFormat)
 "2018-08-08"
@@ -500,7 +518,7 @@ default_format(::Type{Date}) = ISODateFormat
 
 Describes the ISO8601 formatting for a time. This is the default value for `Dates.format` of a `Time`.
 
-# Example
+# Examples
 ```jldoctest
 julia> Dates.format(Time(12, 0, 43, 1), ISOTimeFormat)
 "12:00:43.001"
@@ -514,7 +532,7 @@ default_format(::Type{Time}) = ISOTimeFormat
 
 Describes the RFC1123 formatting for a date and time.
 
-# Example
+# Examples
 ```jldoctest
 julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), RFC1123Format)
 "Wed, 08 Aug 2018 12:00:43"
@@ -528,7 +546,7 @@ const RFC1123Format = DateFormat("e, dd u yyyy HH:MM:SS")
 const Locale = Union{DateLocale, String}
 
 """
-    DateTime(dt::AbstractString, format::AbstractString; locale="english") -> DateTime
+    DateTime(dt::AbstractString, format::AbstractString; locale="english")
 
 Construct a `DateTime` by parsing the `dt` date time string following the
 pattern given in the `format` string (see [`DateFormat`](@ref)  for syntax).
@@ -538,7 +556,7 @@ pattern given in the `format` string (see [`DateFormat`](@ref)  for syntax).
     that you create a [`DateFormat`](@ref) object instead and use that as the second
     argument to avoid performance loss when using the same format repeatedly.
 
-# Example
+# Examples
 ```jldoctest
 julia> DateTime("2020-01-01", "yyyy-mm-dd")
 2020-01-01T00:00:00
@@ -556,7 +574,7 @@ function DateTime(dt::AbstractString, format::AbstractString; locale::Locale=ENG
 end
 
 """
-    DateTime(dt::AbstractString, df::DateFormat=ISODateTimeFormat) -> DateTime
+    DateTime(dt::AbstractString, df::DateFormat=ISODateTimeFormat)
 
 Construct a `DateTime` by parsing the `dt` date time string following the
 pattern given in the [`DateFormat`](@ref) object, or $ISODateTimeFormat if omitted.
@@ -568,7 +586,7 @@ repeatedly parsing similarly formatted date time strings with a pre-created
 DateTime(dt::AbstractString, df::DateFormat=ISODateTimeFormat) = parse(DateTime, dt, df)
 
 """
-    Date(d::AbstractString, format::AbstractString; locale="english") -> Date
+    Date(d::AbstractString, format::AbstractString; locale="english")
 
 Construct a `Date` by parsing the `d` date string following the pattern given
 in the `format` string (see [`DateFormat`](@ref) for syntax).
@@ -578,7 +596,7 @@ in the `format` string (see [`DateFormat`](@ref) for syntax).
     that you create a [`DateFormat`](@ref) object instead and use that as the second
     argument to avoid performance loss when using the same format repeatedly.
 
-# Example
+# Examples
 ```jldoctest
 julia> Date("2020-01-01", "yyyy-mm-dd")
 2020-01-01
@@ -596,7 +614,7 @@ function Date(d::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
 end
 
 """
-    Date(d::AbstractString, df::DateFormat=ISODateFormat) -> Date
+    Date(d::AbstractString, df::DateFormat=ISODateFormat)
 
 Construct a `Date` by parsing the `d` date string following the
 pattern given in the [`DateFormat`](@ref) object, or $ISODateFormat if omitted.
@@ -608,7 +626,7 @@ repeatedly parsing similarly formatted date strings with a pre-created
 Date(d::AbstractString, df::DateFormat=ISODateFormat) = parse(Date, d, df)
 
 """
-    Time(t::AbstractString, format::AbstractString; locale="english") -> Time
+    Time(t::AbstractString, format::AbstractString; locale="english")
 
 Construct a `Time` by parsing the `t` time string following the pattern given
 in the `format` string (see [`DateFormat`](@ref) for syntax).
@@ -618,7 +636,7 @@ in the `format` string (see [`DateFormat`](@ref) for syntax).
     that you create a [`DateFormat`](@ref) object instead and use that as the second
     argument to avoid performance loss when using the same format repeatedly.
 
-# Example
+# Examples
 ```jldoctest
 julia> Time("12:34pm", "HH:MMp")
 12:34:00
@@ -636,7 +654,7 @@ function Time(t::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
 end
 
 """
-    Time(t::AbstractString, df::DateFormat=ISOTimeFormat) -> Time
+    Time(t::AbstractString, df::DateFormat=ISOTimeFormat)
 
 Construct a `Time` by parsing the `t` date time string following the
 pattern given in the [`DateFormat`](@ref) object, or $ISOTimeFormat if omitted.
@@ -665,7 +683,7 @@ end
 
 
 """
-    format(dt::TimeType, format::AbstractString; locale="english") -> AbstractString
+    format(dt::TimeType, format::AbstractString; locale="english")::AbstractString
 
 Construct a string by using a `TimeType` object and applying the provided `format`. The
 following character codes can be used to construct the `format` string:
@@ -695,7 +713,7 @@ except that it does not truncate values longer than the width.
 When creating a `format` you can use any non-code characters as a separator. For example to
 generate the string "1996-01-15T00:00:00" you could use `format`: "yyyy-mm-ddTHH:MM:SS".
 Note that if you need to use a code character as a literal you can use the escape character
-backslash. The string "1996y01m" can be produced with the format "yyyy\\ymm\\m".
+backslash. The string "1996y01m" can be produced with the format raw"yyyy\\ymm\\m".
 """
 function format(dt::TimeType, f::AbstractString; locale::Locale=ENGLISH)
     format(dt, DateFormat(f, locale))
diff --git a/stdlib/Dates/src/parse.jl b/stdlib/Dates/src/parse.jl
index 62d44177de877..3730e8877339e 100644
--- a/stdlib/Dates/src/parse.jl
+++ b/stdlib/Dates/src/parse.jl
@@ -156,6 +156,18 @@ If successful, returns a 2-element tuple `(values, pos)`:
     end
 end
 
+@inline function tryparsenext_sign(str::AbstractString, i::Int, len::Int)
+    i > len && return nothing
+    c, ii = iterate(str, i)::Tuple{Char, Int}
+    if c == '+'
+        return 1, ii
+    elseif c == '-'
+        return -1, ii
+    else
+        return nothing
+    end
+end
+
 @inline function tryparsenext_base10(str::AbstractString, i::Int, len::Int, min_width::Int=1, max_width::Int=0)
     i > len && return nothing
     min_pos = min_width <= 0 ? i : i + min_width - 1
@@ -200,14 +212,22 @@ function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeF
     i, end_pos = firstindex(s), lastindex(s)
     i > end_pos && throw(ArgumentError("Cannot parse an empty string as a DateTime"))
 
+    coefficient = 1
     local dy
     dm = dd = Int64(1)
     th = tm = ts = tms = Int64(0)
 
+    # Optional sign
+    let val = tryparsenext_sign(s, i, end_pos)
+        if val !== nothing
+            coefficient, i = val
+        end
+    end
+
     let val = tryparsenext_base10(s, i, end_pos, 1)
         val === nothing && @goto error
         dy, i = val
-        i > end_pos && @goto error
+        i > end_pos && @goto done
     end
 
     c, i = iterate(s, i)::Tuple{Char, Int}
@@ -272,7 +292,7 @@ function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeF
     end
 
     @label done
-    return DateTime(dy, dm, dd, th, tm, ts, tms)
+    return DateTime(dy * coefficient, dm, dd, th, tm, ts, tms)
 
     @label error
     throw(ArgumentError("Invalid DateTime string"))
@@ -301,7 +321,7 @@ function Base.tryparse(::Type{T}, str::AbstractString, df::DateFormat=default_fo
 end
 
 """
-    parse_components(str::AbstractString, df::DateFormat) -> Array{Any}
+    parse_components(str::AbstractString, df::DateFormat)::Array{Any}
 
 Parse the string into its components according to the directives in the `DateFormat`.
 Each component will be a distinct type, typically a subtype of Period. The order of the
diff --git a/stdlib/Dates/src/periods.jl b/stdlib/Dates/src/periods.jl
index 876680dd456a5..42072f329ba11 100644
--- a/stdlib/Dates/src/periods.jl
+++ b/stdlib/Dates/src/periods.jl
@@ -2,7 +2,7 @@
 
 #Period types
 """
-    Dates.value(x::Period) -> Int64
+    Dates.value(x::Period)::Int64
 
 For a given period, return the value associated with that period.  For example,
 `value(Millisecond(10))` returns 10 as an integer.
@@ -28,7 +28,7 @@ for period in (:Year, :Quarter, :Month, :Week, :Day, :Hour, :Minute, :Second, :M
     for typ_str in typs
         @eval begin
             @doc """
-                $($period_str)(dt::$($typ_str)) -> $($period_str)
+                $($period_str)(dt::$($typ_str))
 
             The $($accessor_str) part of a $($typ_str) as a `$($period_str)`.$($reference)
             """ $period(dt::$(Symbol(typ_str))) = $period($(Symbol(accessor_str))(dt))
@@ -56,7 +56,7 @@ Base.isfinite(::Union{Type{P}, P}) where {P<:Period} = true
 
 # Default values (as used by TimeTypes)
 """
-    default(p::Period) -> Period
+    default(p::Period)::Period
 
 Return a sensible "default" value for the input Period by returning `T(1)` for Year,
 Month, and Day, and `T(0)` for Hour, Minute, Second, and Millisecond.
@@ -102,6 +102,7 @@ div(x::Period, y::Period, r::RoundingMode) = div(promote(x, y)..., r)
 Base.gcdx(a::T, b::T) where {T<:Period} = ((g, x, y) = gcdx(value(a), value(b)); return T(g), x, y)
 Base.abs(a::T) where {T<:Period} = T(abs(value(a)))
 Base.sign(x::Period) = sign(value(x))
+Base.signbit(x::Period) = signbit(value(x))
 
 # return (next coarser period, conversion factor):
 coarserperiod(::Type{P}) where {P<:Period} = (P, 1)
@@ -121,7 +122,7 @@ coarserperiod(::Type{Month}) = (Year, 12)
     CompoundPeriod
 
 A `CompoundPeriod` is useful for expressing time periods that are not a fixed multiple of
-smaller periods. For example, "a year and a  day" is not a fixed number of days, but can
+smaller periods. For example, "a year and a day" is not a fixed number of days, but can
 be expressed using a `CompoundPeriod`. In fact, a `CompoundPeriod` is automatically
 generated by addition of different period types, e.g. `Year(1) + Day(1)` produces a
 `CompoundPeriod` result.
@@ -164,7 +165,7 @@ struct CompoundPeriod <: AbstractTime
 end
 
 """
-    Dates.periods(::CompoundPeriod) -> Vector{Period}
+    Dates.periods(::CompoundPeriod)::Vector{Period}
 
 Return the `Vector` of `Period`s that comprise the given `CompoundPeriod`.
 
@@ -174,7 +175,7 @@ Return the `Vector` of `Period`s that comprise the given `CompoundPeriod`.
 periods(x::CompoundPeriod) = x.periods
 
 """
-    CompoundPeriod(periods) -> CompoundPeriod
+    CompoundPeriod(periods)
 
 Construct a `CompoundPeriod` from a `Vector` of `Period`s. All `Period`s of the same type
 will be added together.
@@ -203,7 +204,7 @@ CompoundPeriod(p::Period...) = CompoundPeriod(Period[p...])
 
 
 """
-    canonicalize(::CompoundPeriod) -> CompoundPeriod
+    canonicalize(::CompoundPeriod)::CompoundPeriod
 
 Reduces the `CompoundPeriod` into its canonical form by applying the following rules:
 
@@ -443,18 +444,18 @@ Base.isless(x::CompoundPeriod, y::Period) = x < CompoundPeriod(y)
 Base.isless(x::CompoundPeriod, y::CompoundPeriod) = tons(x) < tons(y)
 # truncating conversions to milliseconds, nanoseconds and days:
 # overflow can happen for periods longer than ~300,000 years
-toms(c::Nanosecond)  = div(value(c), 1000000)
-toms(c::Microsecond) = div(value(c), 1000)
+toms(c::Nanosecond)  = div(value(c), 1000000, RoundNearest)
+toms(c::Microsecond) = div(value(c), 1000, RoundNearest)
 toms(c::Millisecond) = value(c)
 toms(c::Second)      = 1000 * value(c)
 toms(c::Minute)      = 60000 * value(c)
 toms(c::Hour)        = 3600000 * value(c)
 toms(c::Period)      = 86400000 * days(c)
-toms(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(toms, c.periods))
+toms(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : sum(p -> convert(Float64, toms(p))::Float64, c.periods)
 tons(x)              = toms(x) * 1000000
 tons(x::Microsecond) = value(x) * 1000
 tons(x::Nanosecond)  = value(x)
-tons(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(tons, c.periods))
+tons(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : sum(p -> convert(Float64, tons(p))::Float64, c.periods)
 days(c::Millisecond) = div(value(c), 86400000)
 days(c::Second)      = div(value(c), 86400)
 days(c::Minute)      = div(value(c), 1440)
@@ -464,7 +465,7 @@ days(c::Week)        = 7 * value(c)
 days(c::Year)        = 365.2425 * value(c)
 days(c::Quarter)     = 91.310625 * value(c)
 days(c::Month)       = 30.436875 * value(c)
-days(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(days, c.periods))
+days(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : sum(p -> convert(Float64, days(p))::Float64, c.periods)
 seconds(x::Nanosecond) = value(x) / 1000000000
 seconds(x::Microsecond) = value(x) / 1000000
 seconds(x::Millisecond) = value(x) / 1000
diff --git a/stdlib/Dates/src/query.jl b/stdlib/Dates/src/query.jl
index 4f3b5a5c4b095..958a4f7c95bc6 100644
--- a/stdlib/Dates/src/query.jl
+++ b/stdlib/Dates/src/query.jl
@@ -87,7 +87,7 @@ dayofweek(days) = mod1(days, 7)
 
 # Number of days in year
 """
-    daysinyear(dt::TimeType) -> Int
+    daysinyear(dt::TimeType)::Int
 
 Return 366 if the year of `dt` is a leap year, otherwise return 365.
 
@@ -108,7 +108,7 @@ dayofyear(y, m, d) = MONTHDAYS[m] + d + (m > 2 && isleapyear(y))
 
 ### Days of the Week
 """
-    dayofweek(dt::TimeType) -> Int64
+    dayofweek(dt::TimeType)::Int64
 
 Return the day of the week as an [`Int64`](@ref) with `1 = Monday, 2 = Tuesday, etc.`.
 
@@ -151,8 +151,8 @@ dayname(day::Integer; locale::AbstractString="english") = dayname(day, LOCALES[l
 dayabbr(day::Integer; locale::AbstractString="english") = dayabbr(day, LOCALES[locale])
 
 """
-    dayname(dt::TimeType; locale="english") -> String
-    dayname(day::Integer; locale="english") -> String
+    dayname(dt::TimeType; locale="english")::String
+    dayname(day::Integer; locale="english")::String
 
 Return the full day name corresponding to the day of the week of the `Date` or `DateTime` in
 the given `locale`. Also accepts `Integer`.
@@ -171,8 +171,8 @@ function dayname(dt::TimeType;locale::AbstractString="english")
 end
 
 """
-    dayabbr(dt::TimeType; locale="english") -> String
-    dayabbr(day::Integer; locale="english") -> String
+    dayabbr(dt::TimeType; locale="english")::String
+    dayabbr(day::Integer; locale="english")::String
 
 Return the abbreviated name corresponding to the day of the week of the `Date` or `DateTime`
 in the given `locale`. Also accepts `Integer`.
@@ -201,7 +201,7 @@ issunday(dt::TimeType) = dayofweek(dt) == Sun
 
 # i.e. 1st Monday? 2nd Monday? 3rd Wednesday? 5th Sunday?
 """
-    dayofweekofmonth(dt::TimeType) -> Int
+    dayofweekofmonth(dt::TimeType)::Int
 
 For the day of week of `dt`, return which number it is in `dt`'s month. So if the day of
 the week of `dt` is Monday, then `1 = First Monday of the month, 2 = Second Monday of the
@@ -231,7 +231,7 @@ const THIRTY = BitSet([1, 2, 8, 9, 15, 16, 22, 23, 29, 30])
 const THIRTYONE = BitSet([1, 2, 3, 8, 9, 10, 15, 16, 17, 22, 23, 24, 29, 30, 31])
 
 """
-    daysofweekinmonth(dt::TimeType) -> Int
+    daysofweekinmonth(dt::TimeType)::Int
 
 For the day of week of `dt`, return the total number of that day of the week in `dt`'s
 month. Returns 4 or 5. Useful in temporal expressions for specifying the last day of a week
@@ -561,8 +561,8 @@ monthname(month::Integer; locale::AbstractString="english") = monthname(month, L
 monthabbr(month::Integer; locale::AbstractString="english") = monthabbr(month, LOCALES[locale])
 
 """
-    monthname(dt::TimeType; locale="english") -> String
-    monthname(month::Integer, locale="english") -> String
+    monthname(dt::TimeType; locale="english")::String
+    monthname(month::Integer, locale="english")::String
 
 
 Return the full name of the month of the `Date` or `DateTime` or `Integer` in the given `locale`.
@@ -581,8 +581,8 @@ function monthname(dt::TimeType; locale::AbstractString="english")
 end
 
 """
-    monthabbr(dt::TimeType; locale="english") -> String
-    monthabbr(month::Integer, locale="english") -> String
+    monthabbr(dt::TimeType; locale="english")::String
+    monthabbr(month::Integer, locale="english")::String
 
 Return the abbreviated month name of the `Date` or `DateTime` or `Integer` in the given `locale`.
 
@@ -600,7 +600,7 @@ function monthabbr(dt::TimeType; locale::AbstractString="english")
 end
 
 """
-    daysinmonth(dt::TimeType) -> Int
+    daysinmonth(dt::TimeType)::Int
 
 Return the number of days in the month of `dt`. Value will be 28, 29, 30, or 31.
 
@@ -620,7 +620,7 @@ daysinmonth(dt::TimeType) = ((y, m) = yearmonth(dt); return daysinmonth(y, m))
 
 ### Years
 """
-    isleapyear(dt::TimeType) -> Bool
+    isleapyear(dt::TimeType)::Bool
 
 Return `true` if the year of `dt` is a leap year.
 
@@ -636,7 +636,7 @@ false
 isleapyear(dt::TimeType) = isleapyear(year(dt))
 
 """
-    dayofyear(dt::TimeType) -> Int
+    dayofyear(dt::TimeType)::Int
 
 Return the day of the year for `dt` with January 1st being day 1.
 """
@@ -646,7 +646,7 @@ daysinyear(dt::TimeType) = 365 + isleapyear(dt)
 
 ### Quarters
 """
-    quarterofyear(dt::TimeType) -> Int
+    quarterofyear(dt::TimeType)::Int
 
 Return the quarter that `dt` resides in. Range of value is 1:4.
 """
@@ -655,7 +655,7 @@ quarterofyear(dt::TimeType) = quarter(dt)
 const QUARTERDAYS = (0, 31, 59, 0, 30, 61, 0, 31, 62, 0, 31, 61)
 
 """
-    dayofquarter(dt::TimeType) -> Int
+    dayofquarter(dt::TimeType)::Int
 
 Return the day of the current quarter of `dt`. Range of value is 1:92.
 """
diff --git a/stdlib/Dates/src/rounding.jl b/stdlib/Dates/src/rounding.jl
index b5b6e52decba8..30986608d06e2 100644
--- a/stdlib/Dates/src/rounding.jl
+++ b/stdlib/Dates/src/rounding.jl
@@ -11,7 +11,7 @@ const ConvertiblePeriod = Union{TimePeriod, Week, Day}
 const TimeTypeOrPeriod = Union{TimeType, ConvertiblePeriod}
 
 """
-    epochdays2date(days) -> Date
+    epochdays2date(days)::Date
 
 Take the number of days since the rounding epoch (`0000-01-01T00:00:00`) and return the
 corresponding `Date`.
@@ -19,7 +19,7 @@ corresponding `Date`.
 epochdays2date(i) = Date(UTD(DATEEPOCH + Int64(i)))
 
 """
-    epochms2datetime(milliseconds) -> DateTime
+    epochms2datetime(milliseconds)::DateTime
 
 Take the number of milliseconds since the rounding epoch (`0000-01-01T00:00:00`) and
 return the corresponding `DateTime`.
@@ -27,7 +27,7 @@ return the corresponding `DateTime`.
 epochms2datetime(i) = DateTime(UTM(DATETIMEEPOCH + Int64(i)))
 
 """
-    date2epochdays(dt::Date) -> Int64
+    date2epochdays(dt::Date)::Int64
 
 Take the given `Date` and return the number of days since the rounding epoch
 (`0000-01-01T00:00:00`) as an [`Int64`](@ref).
@@ -35,7 +35,7 @@ Take the given `Date` and return the number of days since the rounding epoch
 date2epochdays(dt::Date) = value(dt) - DATEEPOCH
 
 """
-    datetime2epochms(dt::DateTime) -> Int64
+    datetime2epochms(dt::DateTime)::Int64
 
 Take the given `DateTime` and return the number of milliseconds since the rounding epoch
 (`0000-01-01T00:00:00`) as an [`Int64`](@ref).
@@ -84,6 +84,12 @@ function Base.floor(dt::DateTime, p::TimePeriod)
     return epochms2datetime(milliseconds - mod(milliseconds, value(Millisecond(p))))
 end
 
+function Base.floor(t::Time, p::TimePeriod)
+    value(p) < 1 && throw(DomainError(p))
+    nanoseconds = value(t)
+    return Time(Nanosecond(nanoseconds - mod(nanoseconds, value(Nanosecond(p)))))
+end
+
 """
     floor(x::Period, precision::T) where T <: Union{TimePeriod, Week, Day} -> T
 
@@ -114,7 +120,7 @@ function Base.floor(x::ConvertiblePeriod, precision::T) where T <: ConvertiblePe
 end
 
 """
-    floor(dt::TimeType, p::Period) -> TimeType
+    floor(dt::TimeType, p::Period)::TimeType
 
 Return the nearest `Date` or `DateTime` less than or equal to `dt` at resolution `p`.
 
@@ -135,7 +141,7 @@ julia> floor(DateTime(2016, 8, 6, 12, 0, 0), Day)
 Base.floor(::Dates.TimeType, ::Dates.Period)
 
 """
-    ceil(dt::TimeType, p::Period) -> TimeType
+    ceil(dt::TimeType, p::Period)::TimeType
 
 Return the nearest `Date` or `DateTime` greater than or equal to `dt` at resolution `p`.
 
diff --git a/stdlib/Dates/src/types.jl b/stdlib/Dates/src/types.jl
index a96d183bbc590..29b25eef087de 100644
--- a/stdlib/Dates/src/types.jl
+++ b/stdlib/Dates/src/types.jl
@@ -142,8 +142,28 @@ abstract type AbstractDateTime <: TimeType end
 """
     DateTime
 
-`DateTime` wraps a `UTInstant{Millisecond}` and interprets it according to the proleptic
-Gregorian calendar.
+`DateTime` represents a point in time according to the proleptic Gregorian calendar.
+The finest resolution of the time is millisecond (i.e., microseconds or
+nanoseconds cannot be represented by this type). The type supports fixed-point
+arithmetic, and thus is prone to underflowing (and overflowing). A notable
+consequence is rounding when adding a `Microsecond` or a `Nanosecond`:
+
+```jldoctest
+julia> dt = DateTime(2023, 8, 19, 17, 45, 32, 900)
+2023-08-19T17:45:32.900
+
+julia> dt + Millisecond(1)
+2023-08-19T17:45:32.901
+
+julia> dt + Microsecond(1000) # 1000us == 1ms
+2023-08-19T17:45:32.901
+
+julia> dt + Microsecond(999) # 999us rounded to 1000us
+2023-08-19T17:45:32.901
+
+julia> dt + Microsecond(1499) # 1499 rounded to 1000us
+2023-08-19T17:45:32.901
+```
 """
 struct DateTime <: AbstractDateTime
     instant::UTInstant{Millisecond}
@@ -183,7 +203,7 @@ function totaldays(y, m, d)
 end
 
 # If the year is divisible by 4, except for every 100 years, except for every 400 years
-isleapyear(y) = (y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))
+isleapyear(y::Integer) = (y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))
 
 # Number of days in month
 const DAYSINMONTH = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
@@ -197,7 +217,7 @@ daysinmonth(y,m) = DAYSINMONTH[m] + (m == 2 && isleapyear(y))
 # we can validate arguments in tryparse.
 
 """
-    validargs(::Type{<:TimeType}, args...) -> Union{ArgumentError, Nothing}
+    validargs(::Type{<:TimeType}, args...)::Union{ArgumentError, Nothing}
 
 Determine whether the given arguments constitute valid inputs for the given type.
 Returns either an `ArgumentError`, or [`nothing`](@ref) in case of success.
@@ -216,7 +236,7 @@ end
 ### CONSTRUCTORS ###
 # Core constructors
 """
-    DateTime(y, [m, d, h, mi, s, ms]) -> DateTime
+    DateTime(y, [m, d, h, mi, s, ms])::DateTime
 
 Construct a `DateTime` type by parts. Arguments must be convertible to [`Int64`](@ref).
 """
@@ -248,7 +268,7 @@ end
 DateTime(dt::Base.Libc.TmStruct) = DateTime(1900 + dt.year, 1 + dt.month, dt.mday, dt.hour, dt.min, dt.sec)
 
 """
-    Date(y, [m, d]) -> Date
+    Date(y, [m, d])::Date
 
 Construct a `Date` type by parts. Arguments must be convertible to [`Int64`](@ref).
 """
@@ -267,7 +287,7 @@ end
 Date(dt::Base.Libc.TmStruct) = Date(1900 + dt.year, 1 + dt.month, dt.mday)
 
 """
-    Time(h, [mi, s, ms, us, ns]) -> Time
+    Time(h, [mi, s, ms, us, ns])::Time
 
 Construct a `Time` type by parts. Arguments must be convertible to [`Int64`](@ref).
 """
@@ -313,7 +333,7 @@ end
 # To allow any order/combination of Periods
 
 """
-    DateTime(periods::Period...) -> DateTime
+    DateTime(periods::Period...)::DateTime
 
 Construct a `DateTime` type by `Period` type parts. Arguments may be in any order. DateTime
 parts not provided will default to the value of `Dates.default(period)`.
@@ -334,7 +354,7 @@ function DateTime(period::Period, periods::Period...)
 end
 
 """
-    Date(period::Period...) -> Date
+    Date(period::Period...)::Date
 
 Construct a `Date` type by `Period` type parts. Arguments may be in any order. `Date` parts
 not provided will default to the value of `Dates.default(period)`.
@@ -350,7 +370,7 @@ function Date(period::Period, periods::Period...)
 end
 
 """
-    Time(period::TimePeriod...) -> Time
+    Time(period::TimePeriod...)::Time
 
 Construct a `Time` type by `Period` type parts. Arguments may be in any order. `Time` parts
 not provided will default to the value of `Dates.default(period)`.
@@ -408,10 +428,10 @@ calendar(dt::DateTime) = ISOCalendar
 calendar(dt::Date) = ISOCalendar
 
 """
-    eps(::Type{DateTime}) -> Millisecond
-    eps(::Type{Date}) -> Day
-    eps(::Type{Time}) -> Nanosecond
-    eps(::TimeType) -> Period
+    eps(::Type{DateTime})::Millisecond
+    eps(::Type{Date})::Day
+    eps(::Type{Time})::Nanosecond
+    eps(::TimeType)::Period
 
 Return the smallest unit value supported by the `TimeType`.
 
@@ -472,3 +492,8 @@ end
 
 Base.OrderStyle(::Type{<:AbstractTime}) = Base.Ordered()
 Base.ArithmeticStyle(::Type{<:AbstractTime}) = Base.ArithmeticWraps()
+
+# minimal Base.TOML support
+Date(d::Base.TOML.Date) = Date(d.year, d.month, d.day)
+Time(t::Base.TOML.Time) = Time(t.hour, t.minute, t.second, t.ms)
+DateTime(dt::Base.TOML.DateTime) = DateTime(Date(dt.date), Time(dt.time))
diff --git a/stdlib/Dates/test/accessors.jl b/stdlib/Dates/test/accessors.jl
index b690a81d70e49..598b17b64ffed 100644
--- a/stdlib/Dates/test/accessors.jl
+++ b/stdlib/Dates/test/accessors.jl
@@ -153,7 +153,7 @@ end
     @test Dates.week(Dates.Date(2010, 1, 1)) == 53
     @test Dates.week(Dates.Date(2010, 1, 2)) == 53
     @test Dates.week(Dates.Date(2010, 1, 2)) == 53
-    # Tests from http://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=1999
+    # Tests from https://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=1999
     dt = Dates.DateTime(1999, 12, 27)
     dt1 = Dates.Date(1999, 12, 27)
     check = (52, 52, 52, 52, 52, 52, 52, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2)
@@ -163,7 +163,7 @@ end
         dt = dt + Dates.Day(1)
         dt1 = dt1 + Dates.Day(1)
     end
-    # Tests from http://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2000
+    # Tests from https://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2000
     dt = Dates.DateTime(2000, 12, 25)
     dt1 = Dates.Date(2000, 12, 25)
     for i = 1:21
@@ -172,7 +172,7 @@ end
         dt = dt + Dates.Day(1)
         dt1 = dt1 + Dates.Day(1)
     end
-    # Test from http://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2030
+    # Test from https://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2030
     dt = Dates.DateTime(2030, 12, 23)
     dt1 = Dates.Date(2030, 12, 23)
     for i = 1:21
@@ -181,7 +181,7 @@ end
         dt = dt + Dates.Day(1)
         dt1 = dt1 + Dates.Day(1)
     end
-    # Tests from http://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2004
+    # Tests from https://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2004
     dt = Dates.DateTime(2004, 12, 20)
     dt1 = Dates.Date(2004, 12, 20)
     check = (52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 53, 53, 53, 1, 1, 1, 1, 1, 1, 1)
@@ -192,6 +192,69 @@ end
         dt1 = dt1 + Dates.Day(1)
     end
 end
+@testset "ISO year utils" begin
+    # Tests from https://www.epochconverter.com/weeks
+    @test Dates.weeksinyear(Dates.Year(2023)) == 52
+    @test Dates.weeksinyear(Dates.Year(2022)) == 52
+    @test Dates.weeksinyear(Dates.Year(2021)) == 52
+    @test Dates.weeksinyear(Dates.Year(2020)) == 53
+    @test Dates.weeksinyear(Dates.Year(2019)) == 52
+    @test Dates.weeksinyear(Dates.Year(2018)) == 52
+    @test Dates.weeksinyear(Dates.Year(2017)) == 52
+    @test Dates.weeksinyear(Dates.Year(2016)) == 52
+    @test Dates.weeksinyear(Dates.Year(2015)) == 53
+    @test Dates.weeksinyear(Dates.Year(2014)) == 52
+    @test Dates.weeksinyear(Dates.Year(2013)) == 52
+    @test Dates.weeksinyear(Dates.Year(2012)) == 52
+    @test Dates.weeksinyear(Dates.Year(2011)) == 52
+    @test Dates.weeksinyear(Dates.Year(2010)) == 52
+    @test Dates.weeksinyear(Dates.Year(2009)) == 53
+
+    # From python datetime isocalendar
+    @test Dates.isoweekdate(Dates.Date(2023, 03, 06)) == (2023, 10, 1)
+    @test Dates.isoweekdate(Dates.Date(2023, 03, 07)) == (2023, 10, 2)
+    @test Dates.isoweekdate(Dates.Date(2023, 03, 08)) == (2023, 10, 3)
+    @test Dates.isoweekdate(Dates.Date(2022, 12, 29)) == (2022, 52, 4)
+    @test Dates.isoweekdate(Dates.Date(2022, 12, 30)) == (2022, 52, 5)
+    @test Dates.isoweekdate(Dates.Date(2022, 12, 31)) == (2022, 52, 6)
+    @test Dates.isoweekdate(Dates.Date(2023, 01, 01)) == (2022, 52, 7)
+    @test Dates.isoweekdate(Dates.Date(2023, 01, 02)) == (2023, 1, 1)
+    @test Dates.isoweekdate(Dates.Date(2023, 01, 03)) == (2023, 1, 2)
+    @test Dates.isoweekdate(Dates.Date(2021, 12, 28)) == (2021, 52, 2)
+    @test Dates.isoweekdate(Dates.Date(2021, 12, 29)) == (2021, 52, 3)
+    @test Dates.isoweekdate(Dates.Date(2021, 12, 30)) == (2021, 52, 4)
+    @test Dates.isoweekdate(Dates.Date(2021, 12, 31)) == (2021, 52, 5)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 01)) == (2021, 52, 6)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 02)) == (2021, 52, 7)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 03)) == (2022, 1, 1)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 04)) == (2022, 1, 2)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 05)) == (2022, 1, 3)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 06)) == (2022, 1, 4)
+    @test Dates.isoweekdate(Dates.Date(2020, 12, 29)) == (2020, 53, 2)
+    @test Dates.isoweekdate(Dates.Date(2020, 12, 30)) == (2020, 53, 3)
+    @test Dates.isoweekdate(Dates.Date(2020, 12, 31)) == (2020, 53, 4)
+    @test Dates.isoweekdate(Dates.Date(2021, 01, 01)) == (2020, 53, 5)
+    @test Dates.isoweekdate(Dates.Date(2021, 01, 02)) == (2020, 53, 6)
+    @test Dates.isoweekdate(Dates.Date(2021, 01, 03)) == (2020, 53, 7)
+    @test Dates.isoweekdate(Dates.Date(2021, 01, 04)) == (2021, 1, 1)
+    @test Dates.isoweekdate(Dates.Date(2021, 01, 05)) == (2021, 1, 2)
+    @test Dates.isoweekdate(Dates.Date(2021, 12, 31)) == (2021, 52, 5)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 01)) == (2021, 52, 6)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 02)) == (2021, 52, 7)
+    @test Dates.isoweekdate(Dates.Date(2020, 12, 31)) == (2020, 53, 4)
+    @test Dates.isoweekdate(Dates.Date(2021, 01, 01)) == (2020, 53, 5)
+    @test Dates.isoweekdate(Dates.Date(2021, 01, 02)) == (2020, 53, 6)
+    @test Dates.isoweekdate(Dates.Date(2021, 12, 31)) == (2021, 52, 5)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 01)) == (2021, 52, 6)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 02)) == (2021, 52, 7)
+    @test Dates.isoweekdate(Dates.Date(2022, 01, 03)) == (2022, 1, 1)
+    @test Dates.isoweekdate(Dates.Date(2019, 12, 31)) == (2020, 1, 2)
+    @test Dates.isoweekdate(Dates.Date(2020, 01, 01)) == (2020, 1, 3)
+    @test Dates.isoweekdate(Dates.Date(2020, 01, 02)) == (2020, 1, 4)
+    @test Dates.isoweekdate(Dates.Date(2018, 12, 31)) == (2019, 1, 1)
+    @test Dates.isoweekdate(Dates.Date(2019, 01, 01)) == (2019, 1, 2)
+    @test Dates.isoweekdate(Dates.Date(2019, 01, 02)) == (2019, 1, 3)
+end
 @testset "Vectorized accessors" begin
     a = Dates.Date(2014, 1, 1)
     dr = [a, a, a, a, a, a, a, a, a, a]
diff --git a/stdlib/Dates/test/arithmetic.jl b/stdlib/Dates/test/arithmetic.jl
index 2e684815a3c86..333ba3a7c0088 100644
--- a/stdlib/Dates/test/arithmetic.jl
+++ b/stdlib/Dates/test/arithmetic.jl
@@ -11,10 +11,18 @@ using Dates
     @test Dates.CompoundPeriod(a - b) == Dates.Hour(12)
 end
 
+struct MonthlyDate <: TimeType
+    instant::Dates.UTInstant{Month}
+end
+struct OtherTime <: Dates.AbstractDateTime
+    instant::Dates.UTInstant{Nanosecond}
+end
 @testset "TimeType arithmetic" begin
-    a = Date(2023, 5, 1)
-    b = DateTime(2023, 5, 2)
-    @test b - a == Day(1)
+    @test_throws MethodError DateTime(2023, 5, 2) - Date(2023, 5, 1)
+    # check that - between two same-type TimeTypes works by default
+    @test MonthlyDate(Dates.UTInstant(Month(10))) - MonthlyDate(Dates.UTInstant(Month(1))) == Month(9)
+    # ... and between two same-type AbstractDateTimes
+    @test OtherTime(Dates.UTInstant(Nanosecond(2))) - OtherTime(Dates.UTInstant(Nanosecond(1))) == Nanosecond(1)
 end
 
 @testset "Wrapping arithmetic for Months" begin
@@ -263,6 +271,24 @@ end
         @test dt - Dates.Millisecond(1) == Dates.DateTime(1972, 6, 30, 23, 59, 58, 999)
         @test dt + Dates.Millisecond(-1) == Dates.DateTime(1972, 6, 30, 23, 59, 58, 999)
     end
+    @testset "DateTime-Microsecond arithmetic" begin
+        dt = Dates.DateTime(1999, 12, 27)
+        @test dt + Dates.Microsecond(1) == dt
+        @test dt + Dates.Microsecond(501) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt + Dates.Microsecond(1499) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt - Dates.Microsecond(1) == dt
+        @test dt - Dates.Microsecond(501) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+        @test dt - Dates.Microsecond(1499) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+    end
+    @testset "DateTime-Nanosecond arithmetic" begin
+        dt = Dates.DateTime(1999, 12, 27)
+        @test dt + Dates.Nanosecond(1) == dt
+        @test dt + Dates.Nanosecond(500_001) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt + Dates.Nanosecond(1_499_999) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt - Dates.Nanosecond(1) == dt
+        @test dt - Dates.Nanosecond(500_001) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+        @test dt - Dates.Nanosecond(1_499_999) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+    end
 end
 @testset "Date arithmetic" begin
     @testset "Date-Year arithmetic" begin
diff --git a/stdlib/Dates/test/io.jl b/stdlib/Dates/test/io.jl
index 2c99ac45d0c58..8159b33574bd5 100644
--- a/stdlib/Dates/test/io.jl
+++ b/stdlib/Dates/test/io.jl
@@ -5,7 +5,7 @@ module IOTests
 using Test
 using Dates
 
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")
 include(joinpath(BASE_TEST_PATH, "testhelpers", "withlocales.jl"))
 
 @testset "string/show representation of Date" begin
@@ -47,7 +47,7 @@ end
 end
 
 @testset "DateTime parsing" begin
-    # Useful reference for different locales: http://library.princeton.edu/departments/tsd/katmandu/reference/months.html
+    # Useful reference for different locales: https://library.princeton.edu/departments/tsd/katmandu/reference/months.html
 
     # Allow parsing of strings which are not representable as a TimeType
     str = "02/15/1996 25:00"
@@ -325,6 +325,23 @@ end
     # From Matt Bauman
     f = "yyyy-mm-ddTHH:MM:SS"
     @test Dates.DateTime("2014-05-28T16:46:04", f) == Dates.DateTime(2014, 5, 28, 16, 46, 04)
+
+    f = "yyyymmdd"
+    @test Dates.DateTime("20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(-2024, 5, 21)
+    @test Dates.DateTime("+20240521", f) == Dates.DateTime(2024, 5, 21)
+    f = "YYYYmmdd"
+    @test Dates.DateTime("20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(-2024, 5, 21)
+    @test Dates.DateTime("+20240521", f) == Dates.DateTime(2024, 5, 21)
+    f = "-yyyymmdd"
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test_throws ArgumentError Dates.DateTime("+20240521", f)
+    @test_throws ArgumentError Dates.DateTime("20240521", f)
+    f = "-YYYYmmdd"
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test_throws ArgumentError Dates.DateTime("+20240521", f)
+    @test_throws ArgumentError Dates.DateTime("20240521", f)
 end
 
 @testset "Error handling" begin
@@ -403,6 +420,17 @@ end
     @test_throws ArgumentError parse(Date, "Foo, 12 Nov 2016 07:45:36", Dates.RFC1123Format)
 end
 
+@testset "ISODateTimeFormat" begin
+    dt = Dates.DateTime(2024, 5, 21, 10, 57, 22)
+    neg_dt = Dates.DateTime(-2024, 5, 21, 10, 57, 22)
+    @test parse(Dates.DateTime, "2024-05-21T10:57:22", Dates.ISODateTimeFormat) == dt
+    @test parse(Dates.DateTime, "+2024-05-21T10:57:22", Dates.ISODateTimeFormat) == dt
+    @test parse(Dates.DateTime, "-2024-05-21T10:57:22", Dates.ISODateTimeFormat) == neg_dt
+
+    @test_throws ArgumentError parse(Dates.DateTime, "-", Dates.ISODateTimeFormat)
+    @test_throws ArgumentError parse(Dates.DateTime, "+", Dates.ISODateTimeFormat)
+end
+
 @testset "Issue 15195" begin
     f = "YY"
     @test Dates.format(Dates.Date(1999), f) == "1999"
@@ -470,6 +498,9 @@ end
 # Issue #44003
 @test tryparse(Dates.Date, "2017", Dates.DateFormat(".s")) === nothing
 
+# Issue #52989
+@test Dates.DateTime("2000") == Dates.DateTime(2000)
+
 @testset "parse milliseconds, Issue #22100" begin
     @test Dates.DateTime("2017-Mar-17 00:00:00.0000", "y-u-d H:M:S.s") == Dates.DateTime(2017, 3, 17)
     @test Dates.parse_components(".1", Dates.DateFormat(".s")) == [Dates.Millisecond(100)]
@@ -618,4 +649,9 @@ end
     end
 end
 
+@testset "Issue #50328: parsing negative years" begin
+    @test Date("-2013-10-10") == Date(-2013, 10, 10)
+    @test Date("-2013") == Date(-2013, 01, 01)
+end
+
 end
diff --git a/stdlib/Dates/test/periods.jl b/stdlib/Dates/test/periods.jl
index 82e7ca27ab6f5..9c7d0deef8a11 100644
--- a/stdlib/Dates/test/periods.jl
+++ b/stdlib/Dates/test/periods.jl
@@ -30,6 +30,9 @@ using Test
     @test sign(t) == sign(t2) == 1
     @test sign(-t) == sign(-t2) == -1
     @test sign(Dates.Year(0)) == 0
+    @test signbit(t) == signbit(t2) == false
+    @test signbit(-t) == signbit(-t2) == true
+    @test signbit(Dates.Year(0)) == false
 end
 @testset "div/mod/gcd/lcm/rem" begin
     @test Dates.Year(10) % Dates.Year(4) == Dates.Year(2)
@@ -329,6 +332,14 @@ end
     @test Dates.default(Dates.Nanosecond) == zero(Dates.Nanosecond)
 end
 @testset "Conversions" begin
+    @test Dates.toms(1499 * us) == 1
+    @test Dates.toms(501 * us) == 1
+    @test Dates.toms(us) == 0
+
+    @test Dates.toms(1_499_999 * ns) == 1
+    @test Dates.toms(500_001 * ns) == 1
+    @test Dates.toms(ns) == 0
+
     @test Dates.toms(ms) == Dates.value(Dates.Millisecond(ms)) == 1
     @test Dates.toms(s)  == Dates.value(Dates.Millisecond(s)) == 1000
     @test Dates.toms(mi) == Dates.value(Dates.Millisecond(mi)) == 60000
diff --git a/stdlib/Dates/test/rounding.jl b/stdlib/Dates/test/rounding.jl
index 85c90981423d3..03c57c7a5bce3 100644
--- a/stdlib/Dates/test/rounding.jl
+++ b/stdlib/Dates/test/rounding.jl
@@ -188,7 +188,27 @@ end
     @test round(x, Dates.Microsecond) == Dates.Microsecond(2001000)
     @test round(x, Dates.Nanosecond) == x
 end
-
+@testset "Rounding Time" begin
+    x = Time(9, 25, 45, 25, 650, 500)
+    @test floor(x, Dates.Hour) == Time(9)
+    @test floor(x, Dates.Minute) == Time(9, 25)
+    @test floor(x, Dates.Second) == Time(9, 25, 45)
+    @test floor(x, Dates.Millisecond) == Time(9, 25, 45, 25)
+    @test floor(x, Dates.Microsecond) == Time(9, 25, 45, 25, 650)
+    @test floor(x, Dates.Nanosecond) == x
+    @test ceil(x, Dates.Hour) == Time(10)
+    @test ceil(x, Dates.Minute) == Time(9, 26)
+    @test ceil(x, Dates.Second) == Time(9, 25, 46)
+    @test ceil(x, Dates.Millisecond) == Time(9, 25, 45, 26)
+    @test ceil(x, Dates.Microsecond) == Time(9, 25, 45, 25, 651)
+    @test ceil(x, Dates.Nanosecond) == x
+    @test round(x, Dates.Hour) == Time(9)
+    @test round(x, Dates.Minute) == Time(9, 26)
+    @test round(x, Dates.Second) == Time(9, 25, 45)
+    @test round(x, Dates.Millisecond) == Time(9, 25, 45, 26)
+    @test round(x, Dates.Microsecond) == Time(9, 25, 45, 25, 651)
+    @test round(x, Dates.Nanosecond) == x
+end
 @testset "Rounding DateTime to Date" begin
     now_ = DateTime(2020, 9, 1, 13)
     for p in (Year, Month, Day)
diff --git a/stdlib/Dates/test/runtests.jl b/stdlib/Dates/test/runtests.jl
index de063135427a9..ad2ee43cedfb1 100644
--- a/stdlib/Dates/test/runtests.jl
+++ b/stdlib/Dates/test/runtests.jl
@@ -2,8 +2,14 @@
 
 module DateTests
 
+using Test, Dates
+
 for file in readlines(joinpath(@__DIR__, "testgroups"))
     include(file * ".jl")
 end
 
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Dates))
+end
+
 end
diff --git a/stdlib/Dates/test/types.jl b/stdlib/Dates/test/types.jl
index 3bd11f80540d7..29395ccf3a271 100644
--- a/stdlib/Dates/test/types.jl
+++ b/stdlib/Dates/test/types.jl
@@ -41,6 +41,7 @@ end
     @test Dates.isleapyear(-1) == false
     @test Dates.isleapyear(4) == true
     @test Dates.isleapyear(-4) == true
+    @test_throws MethodError Dates.isleapyear(Dates.Year(1992))
 end
 # Create "test" check manually
 y = Dates.Year(1)
@@ -74,6 +75,12 @@ ms = Dates.Millisecond(1)
                          Dates.Hour(4), Dates.Second(10)) == Dates.DateTime(1, 2, 1, 4, 0, 10)
 end
 
+@testset "DateTime construction from Date and Time" begin
+    @test Dates.DateTime(Dates.Date(2023, 08, 07), Dates.Time(12)) == Dates.DateTime(2023, 08, 07, 12, 0, 0, 0)
+    @test_throws InexactError Dates.DateTime(Dates.Date(2023, 08, 07), Dates.Time(12, 0, 0, 0, 42))
+    @test_throws InexactError Dates.DateTime(Dates.Date(2023, 08, 07), Dates.Time(12, 0, 0, 0, 0, 42))
+end
+
 @testset "Date construction by parts" begin
     test = Dates.Date(Dates.UTD(734869))
     @test Dates.Date(2013) == test
@@ -256,7 +263,11 @@ end
 end
 
 @testset "issue #31524" begin
-    dt1 = Libc.strptime("%Y-%M-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z")
+    # Ensure the result doesn't depend on local timezone, especially on macOS
+    # where an extra internal call to `mktime` is affected by timezone settings.
+    dt1 = withenv("TZ" => "UTC") do
+        Libc.strptime("%Y-%m-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z")
+    end
     dt2 = Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0)
 
     time = Time(dt1)
diff --git a/stdlib/DelimitedFiles.version b/stdlib/DelimitedFiles.version
index d741690a96838..0e52d2a73628e 100644
--- a/stdlib/DelimitedFiles.version
+++ b/stdlib/DelimitedFiles.version
@@ -1,4 +1,4 @@
 DELIMITEDFILES_BRANCH = main
-DELIMITEDFILES_SHA1 = db79c842f95f55b1f8d8037c0d3363ab21cd3b90
+DELIMITEDFILES_SHA1 = aac8c59e58cbf961fa15baf4d866901d9d1e6980
 DELIMITEDFILES_GIT_URL := https://github.com/JuliaData/DelimitedFiles.jl.git
 DELIMITEDFILES_TAR_URL = https://api.github.com/repos/JuliaData/DelimitedFiles.jl/tarball/$1
diff --git a/stdlib/Distributed.version b/stdlib/Distributed.version
new file mode 100644
index 0000000000000..c4a5c812477e9
--- /dev/null
+++ b/stdlib/Distributed.version
@@ -0,0 +1,4 @@
+DISTRIBUTED_BRANCH = master
+DISTRIBUTED_SHA1 = cd9219573d736b036077dff3cadddf369516d495
+DISTRIBUTED_GIT_URL := https://github.com/JuliaLang/Distributed.jl
+DISTRIBUTED_TAR_URL = https://api.github.com/repos/JuliaLang/Distributed.jl/tarball/$1
diff --git a/stdlib/Distributed/Project.toml b/stdlib/Distributed/Project.toml
deleted file mode 100644
index ecec870290041..0000000000000
--- a/stdlib/Distributed/Project.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-name = "Distributed"
-uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
-
-[deps]
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
-
-[extras]
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["LinearAlgebra", "Test"]
diff --git a/stdlib/Distributed/docs/src/index.md b/stdlib/Distributed/docs/src/index.md
deleted file mode 100644
index 00b40de49b396..0000000000000
--- a/stdlib/Distributed/docs/src/index.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# [Distributed Computing](@id man-distributed)
-Tools for distributed parallel processing.
-
-```@docs
-Distributed.addprocs
-Distributed.nprocs
-Distributed.nworkers
-Distributed.procs()
-Distributed.procs(::Integer)
-Distributed.workers
-Distributed.rmprocs
-Distributed.interrupt
-Distributed.myid
-Distributed.pmap
-Distributed.RemoteException
-Distributed.ProcessExitedException
-Distributed.Future
-Distributed.RemoteChannel
-Distributed.fetch(::Distributed.Future)
-Distributed.fetch(::RemoteChannel)
-Distributed.remotecall(::Any, ::Integer, ::Any...)
-Distributed.remotecall_wait(::Any, ::Integer, ::Any...)
-Distributed.remotecall_fetch(::Any, ::Integer, ::Any...)
-Distributed.remote_do(::Any, ::Integer, ::Any...)
-Distributed.put!(::RemoteChannel, ::Any...)
-Distributed.put!(::Distributed.Future, ::Any)
-Distributed.take!(::RemoteChannel, ::Any...)
-Distributed.isready(::RemoteChannel, ::Any...)
-Distributed.isready(::Distributed.Future)
-Distributed.AbstractWorkerPool
-Distributed.WorkerPool
-Distributed.CachingPool
-Distributed.default_worker_pool
-Distributed.clear!(::CachingPool)
-Distributed.remote
-Distributed.remotecall(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.remotecall_wait(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.remotecall_fetch(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.remote_do(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.@spawnat
-Distributed.@fetch
-Distributed.@fetchfrom
-Distributed.@distributed
-Distributed.@everywhere
-Distributed.clear!(::Any, ::Any; ::Any)
-Distributed.remoteref_id
-Distributed.channel_from_id
-Distributed.worker_id_from_socket
-Distributed.cluster_cookie()
-Distributed.cluster_cookie(::Any)
-```
-
-## Cluster Manager Interface
-
-This interface provides a mechanism to launch and manage Julia workers on different cluster environments.
-There are two types of managers present in Base: `LocalManager`, for launching additional workers on the
-same host, and `SSHManager`, for launching on remote hosts via `ssh`. TCP/IP sockets are used to connect
-and transport messages between processes. It is possible for Cluster Managers to provide a different transport.
-
-```@docs
-Distributed.ClusterManager
-Distributed.WorkerConfig
-Distributed.launch
-Distributed.manage
-Distributed.kill(::ClusterManager, ::Int, ::WorkerConfig)
-Distributed.connect(::ClusterManager, ::Int, ::WorkerConfig)
-Distributed.init_worker
-Distributed.start_worker
-Distributed.process_messages
-Distributed.default_addprocs_params
-```
diff --git a/stdlib/Distributed/src/Distributed.jl b/stdlib/Distributed/src/Distributed.jl
deleted file mode 100644
index a7c5b1778b144..0000000000000
--- a/stdlib/Distributed/src/Distributed.jl
+++ /dev/null
@@ -1,119 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Tools for distributed parallel processing.
-"""
-module Distributed
-
-# imports for extension
-import Base: getindex, wait, put!, take!, fetch, isready, push!, length,
-             hash, ==, kill, close, isopen, showerror, iterate, IteratorSize
-
-# imports for use
-using Base: Process, Semaphore, JLOptions, buffer_writes, @async_unwrap,
-            VERSION_STRING, binding_module, atexit, julia_exename,
-            julia_cmd, AsyncGenerator, acquire, release, invokelatest,
-            shell_escape_posixly, shell_escape_csh,
-            shell_escape_wincmd, escape_microsoft_c_args,
-            uv_error, something, notnothing, isbuffered, mapany
-using Base.Threads: Event
-
-using Serialization, Sockets
-import Serialization: serialize, deserialize
-import Sockets: connect, wait_connected
-
-# NOTE: clusterserialize.jl imports additional symbols from Serialization for use
-
-export
-    @spawn,
-    @spawnat,
-    @fetch,
-    @fetchfrom,
-    @everywhere,
-    @distributed,
-
-    AbstractWorkerPool,
-    addprocs,
-    CachingPool,
-    clear!,
-    ClusterManager,
-    default_worker_pool,
-    init_worker,
-    interrupt,
-    launch,
-    manage,
-    myid,
-    nprocs,
-    nworkers,
-    pmap,
-    procs,
-    remote,
-    remotecall,
-    remotecall_fetch,
-    remotecall_wait,
-    remote_do,
-    rmprocs,
-    workers,
-    WorkerPool,
-    RemoteChannel,
-    Future,
-    WorkerConfig,
-    RemoteException,
-    ProcessExitedException,
-
-    process_messages,
-    remoteref_id,
-    channel_from_id,
-    worker_id_from_socket,
-    cluster_cookie,
-    start_worker,
-
-# Used only by shared arrays.
-    check_same_host
-
-function _require_callback(mod::Base.PkgId)
-    if Base.toplevel_load[] && myid() == 1 && nprocs() > 1
-        # broadcast top-level (e.g. from Main) import/using from node 1 (only)
-        @sync for p in procs()
-            p == 1 && continue
-            # Extensions are already loaded on workers by their triggers being loaded
-            # so no need to fire the callback upon extension being loaded on master.
-            Base.loading_extension && continue
-            @async_unwrap remotecall_wait(p) do
-                Base.require(mod)
-                nothing
-            end
-        end
-    end
-end
-
-const REF_ID = Threads.Atomic{Int}(1)
-next_ref_id() = Threads.atomic_add!(REF_ID, 1)
-
-struct RRID
-    whence::Int
-    id::Int
-
-    RRID() = RRID(myid(), next_ref_id())
-    RRID(whence, id) = new(whence, id)
-end
-
-hash(r::RRID, h::UInt) = hash(r.whence, hash(r.id, h))
-==(r::RRID, s::RRID) = (r.whence==s.whence && r.id==s.id)
-
-include("clusterserialize.jl")
-include("cluster.jl")   # cluster setup and management, addprocs
-include("messages.jl")
-include("process_messages.jl")  # process incoming messages
-include("remotecall.jl")  # the remotecall* api
-include("macros.jl")      # @spawn and friends
-include("workerpool.jl")
-include("pmap.jl")
-include("managers.jl")    # LocalManager and SSHManager
-include("precompile.jl")
-
-function __init__()
-    init_parallel()
-end
-
-end
diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl
deleted file mode 100644
index d8cc052967d50..0000000000000
--- a/stdlib/Distributed/src/cluster.jl
+++ /dev/null
@@ -1,1388 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    ClusterManager
-
-Supertype for cluster managers, which control workers processes as a cluster.
-Cluster managers implement how workers can be added, removed and communicated with.
-`SSHManager` and `LocalManager` are subtypes of this.
-"""
-abstract type ClusterManager end
-
-"""
-    WorkerConfig
-
-Type used by [`ClusterManager`](@ref)s to control workers added to their clusters. Some fields
-are used by all cluster managers to access a host:
-  * `io` -- the connection used to access the worker (a subtype of `IO` or `Nothing`)
-  * `host` -- the host address (either a `String` or `Nothing`)
-  * `port` -- the port on the host used to connect to the worker (either an `Int` or `Nothing`)
-
-Some are used by the cluster manager to add workers to an already-initialized host:
-  * `count` -- the number of workers to be launched on the host
-  * `exename` -- the path to the Julia executable on the host, defaults to `"\$(Sys.BINDIR)/julia"` or
-    `"\$(Sys.BINDIR)/julia-debug"`
-  * `exeflags` -- flags to use when launching Julia remotely
-
-The `userdata` field is used to store information for each worker by external managers.
-
-Some fields are used by `SSHManager` and similar managers:
-  * `tunnel` -- `true` (use tunneling), `false` (do not use tunneling), or [`nothing`](@ref) (use default for the manager)
-  * `multiplex` -- `true` (use SSH multiplexing for tunneling) or `false`
-  * `forward` -- the forwarding option used for `-L` option of ssh
-  * `bind_addr` -- the address on the remote host to bind to
-  * `sshflags` -- flags to use in establishing the SSH connection
-  * `max_parallel` -- the maximum number of workers to connect to in parallel on the host
-
-Some fields are used by both `LocalManager`s and `SSHManager`s:
-  * `connect_at` -- determines whether this is a worker-to-worker or driver-to-worker setup call
-  * `process` -- the process which will be connected (usually the manager will assign this during [`addprocs`](@ref))
-  * `ospid` -- the process ID according to the host OS, used to interrupt worker processes
-  * `environ` -- private dictionary used to store temporary information by Local/SSH managers
-  * `ident` -- worker as identified by the [`ClusterManager`](@ref)
-  * `connect_idents` -- list of worker ids the worker must connect to if using a custom topology
-  * `enable_threaded_blas` -- `true`, `false`, or `nothing`, whether to use threaded BLAS or not on the workers
-"""
-mutable struct WorkerConfig
-    # Common fields relevant to all cluster managers
-    io::Union{IO, Nothing}
-    host::Union{String, Nothing}
-    port::Union{Int, Nothing}
-
-    # Used when launching additional workers at a host
-    count::Union{Int, Symbol, Nothing}
-    exename::Union{String, Cmd, Nothing}
-    exeflags::Union{Cmd, Nothing}
-
-    # External cluster managers can use this to store information at a per-worker level
-    # Can be a dict if multiple fields need to be stored.
-    userdata::Any
-
-    # SSHManager / SSH tunnel connections to workers
-    tunnel::Union{Bool, Nothing}
-    multiplex::Union{Bool, Nothing}
-    forward::Union{String, Nothing}
-    bind_addr::Union{String, Nothing}
-    sshflags::Union{Cmd, Nothing}
-    max_parallel::Union{Int, Nothing}
-
-    # Used by Local/SSH managers
-    connect_at::Any
-
-    process::Union{Process, Nothing}
-    ospid::Union{Int, Nothing}
-
-    # Private dictionary used to store temporary information by Local/SSH managers.
-    environ::Union{Dict, Nothing}
-
-    # Connections to be setup depending on the network topology requested
-    ident::Any      # Worker as identified by the Cluster Manager.
-    # List of other worker idents this worker must connect with. Used with topology T_CUSTOM.
-    connect_idents::Union{Array, Nothing}
-
-    # Run multithreaded blas on worker
-    enable_threaded_blas::Union{Bool, Nothing}
-
-    function WorkerConfig()
-        wc = new()
-        for n in 1:fieldcount(WorkerConfig)
-            setfield!(wc, n, nothing)
-        end
-        wc
-    end
-end
-
-@enum WorkerState W_CREATED W_CONNECTED W_TERMINATING W_TERMINATED
-mutable struct Worker
-    id::Int
-    msg_lock::Threads.ReentrantLock # Lock for del_msgs, add_msgs, and gcflag
-    del_msgs::Array{Any,1} # XXX: Could del_msgs and add_msgs be Channels?
-    add_msgs::Array{Any,1}
-    @atomic gcflag::Bool
-    state::WorkerState
-    c_state::Condition      # wait for state changes
-    ct_time::Float64        # creation time
-    conn_func::Any          # used to setup connections lazily
-
-    r_stream::IO
-    w_stream::IO
-    w_serializer::ClusterSerializer  # writes can happen from any task hence store the
-                                     # serializer as part of the Worker object
-    manager::ClusterManager
-    config::WorkerConfig
-    version::Union{VersionNumber, Nothing}   # Julia version of the remote process
-    initialized::Event
-
-    function Worker(id::Int, r_stream::IO, w_stream::IO, manager::ClusterManager;
-                             version::Union{VersionNumber, Nothing}=nothing,
-                             config::WorkerConfig=WorkerConfig())
-        w = Worker(id)
-        w.r_stream = r_stream
-        w.w_stream = buffer_writes(w_stream)
-        w.w_serializer = ClusterSerializer(w.w_stream)
-        w.manager = manager
-        w.config = config
-        w.version = version
-        set_worker_state(w, W_CONNECTED)
-        register_worker_streams(w)
-        w
-    end
-
-    Worker(id::Int) = Worker(id, nothing)
-    function Worker(id::Int, conn_func)
-        @assert id > 0
-        if haskey(map_pid_wrkr, id)
-            return map_pid_wrkr[id]
-        end
-        w=new(id, Threads.ReentrantLock(), [], [], false, W_CREATED, Condition(), time(), conn_func)
-        w.initialized = Event()
-        register_worker(w)
-        w
-    end
-
-    Worker() = Worker(get_next_pid())
-end
-
-function set_worker_state(w, state)
-    w.state = state
-    notify(w.c_state; all=true)
-end
-
-function check_worker_state(w::Worker)
-    if w.state === W_CREATED
-        if !isclusterlazy()
-            if PGRP.topology === :all_to_all
-                # Since higher pids connect with lower pids, the remote worker
-                # may not have connected to us yet. Wait for some time.
-                wait_for_conn(w)
-            else
-                error("peer $(w.id) is not connected to $(myid()). Topology : " * string(PGRP.topology))
-            end
-        else
-            w.ct_time = time()
-            if myid() > w.id
-                t = @async exec_conn_func(w)
-            else
-                # route request via node 1
-                t = @async remotecall_fetch((p,to_id) -> remotecall_fetch(exec_conn_func, p, to_id), 1, w.id, myid())
-            end
-            errormonitor(t)
-            wait_for_conn(w)
-        end
-    end
-end
-
-exec_conn_func(id::Int) = exec_conn_func(worker_from_id(id)::Worker)
-function exec_conn_func(w::Worker)
-    try
-        f = notnothing(w.conn_func)
-        # Will be called if some other task tries to connect at the same time.
-        w.conn_func = () -> wait_for_conn(w)
-        f()
-    catch e
-        w.conn_func = () -> throw(e)
-        rethrow()
-    end
-    nothing
-end
-
-function wait_for_conn(w)
-    if w.state === W_CREATED
-        timeout =  worker_timeout() - (time() - w.ct_time)
-        timeout <= 0 && error("peer $(w.id) has not connected to $(myid())")
-
-        @async (sleep(timeout); notify(w.c_state; all=true))
-        wait(w.c_state)
-        w.state === W_CREATED && error("peer $(w.id) didn't connect to $(myid()) within $timeout seconds")
-    end
-    nothing
-end
-
-## process group creation ##
-
-mutable struct LocalProcess
-    id::Int
-    bind_addr::String
-    bind_port::UInt16
-    cookie::String
-    LocalProcess() = new(1)
-end
-
-worker_timeout() = parse(Float64, get(ENV, "JULIA_WORKER_TIMEOUT", "60.0"))
-
-
-## worker creation and setup ##
-"""
-    start_worker([out::IO=stdout], cookie::AbstractString=readline(stdin); close_stdin::Bool=true, stderr_to_stdout::Bool=true)
-
-`start_worker` is an internal function which is the default entry point for
-worker processes connecting via TCP/IP. It sets up the process as a Julia cluster
-worker.
-
-host:port information is written to stream `out` (defaults to stdout).
-
-The function reads the cookie from stdin if required, and  listens on a free port
-(or if specified, the port in the `--bind-to` command line option) and schedules
-tasks to process incoming TCP connections and requests. It also (optionally)
-closes stdin and redirects stderr to stdout.
-
-It does not return.
-"""
-start_worker(cookie::AbstractString=readline(stdin); kwargs...) = start_worker(stdout, cookie; kwargs...)
-function start_worker(out::IO, cookie::AbstractString=readline(stdin); close_stdin::Bool=true, stderr_to_stdout::Bool=true)
-    init_multi()
-
-    if close_stdin # workers will not use it
-        redirect_stdin(devnull)
-        close(stdin)
-    end
-    stderr_to_stdout && redirect_stderr(stdout)
-
-    init_worker(cookie)
-    interface = IPv4(LPROC.bind_addr)
-    if LPROC.bind_port == 0
-        port_hint = 9000 + (getpid() % 1000)
-        (port, sock) = listenany(interface, UInt16(port_hint))
-        LPROC.bind_port = port
-    else
-        sock = listen(interface, LPROC.bind_port)
-    end
-    errormonitor(@async while isopen(sock)
-        client = accept(sock)
-        process_messages(client, client, true)
-    end)
-    print(out, "julia_worker:")  # print header
-    print(out, "$(string(LPROC.bind_port))#") # print port
-    print(out, LPROC.bind_addr)
-    print(out, '\n')
-    flush(out)
-
-    Sockets.nagle(sock, false)
-    Sockets.quickack(sock, true)
-
-    if ccall(:jl_running_on_valgrind,Cint,()) != 0
-        println(out, "PID = $(getpid())")
-    end
-
-    try
-        # To prevent hanging processes on remote machines, newly launched workers exit if the
-        # master process does not connect in time.
-        check_master_connect()
-        while true; wait(); end
-    catch err
-        print(stderr, "unhandled exception on $(myid()): $(err)\nexiting.\n")
-    end
-
-    close(sock)
-    exit(0)
-end
-
-
-function redirect_worker_output(ident, stream)
-    t = @async while !eof(stream)
-        line = readline(stream)
-        if startswith(line, "      From worker ")
-            # stdout's of "additional" workers started from an initial worker on a host are not available
-            # on the master directly - they are routed via the initial worker's stdout.
-            println(line)
-        else
-            println("      From worker $(ident):\t$line")
-        end
-    end
-    errormonitor(t)
-end
-
-struct LaunchWorkerError <: Exception
-    msg::String
-end
-
-Base.showerror(io::IO, e::LaunchWorkerError) = print(io, e.msg)
-
-# The default TCP transport relies on the worker listening on a free
-# port available and printing its bind address and port.
-# The master process uses this to connect to the worker and subsequently
-# setup a all-to-all network.
-function read_worker_host_port(io::IO)
-    t0 = time_ns()
-
-    # Wait at most for JULIA_WORKER_TIMEOUT seconds to read host:port
-    # info from the worker
-    timeout = worker_timeout() * 1e9
-    # We expect the first line to contain the host:port string. However, as
-    # the worker may be launched via ssh or a cluster manager like SLURM,
-    # ignore any informational / warning lines printed by the launch command.
-    # If we do not find the host:port string in the first 1000 lines, treat it
-    # as an error.
-
-    ntries = 1000
-    leader = String[]
-    try
-        while ntries > 0
-            readtask = @async readline(io)
-            yield()
-            while !istaskdone(readtask) && ((time_ns() - t0) < timeout)
-                sleep(0.05)
-            end
-            !istaskdone(readtask) && break
-
-            conninfo = fetch(readtask)
-            if isempty(conninfo) && !isopen(io)
-                throw(LaunchWorkerError("Unable to read host:port string from worker. Launch command exited with error?"))
-            end
-
-            ntries -= 1
-            bind_addr, port = parse_connection_info(conninfo)
-            if !isempty(bind_addr)
-                return bind_addr, port
-            end
-
-            # collect unmatched lines
-            push!(leader, conninfo)
-        end
-        close(io)
-        if ntries > 0
-            throw(LaunchWorkerError("Timed out waiting to read host:port string from worker."))
-        else
-            throw(LaunchWorkerError("Unexpected output from worker launch command. Host:port string not found."))
-        end
-    finally
-        for line in leader
-            println("\tFrom worker startup:\t", line)
-        end
-    end
-end
-
-function parse_connection_info(str)
-    m = match(r"^julia_worker:(\d+)#(.*)", str)
-    if m !== nothing
-        (String(m.captures[2]), parse(UInt16, m.captures[1]))
-    else
-        ("", UInt16(0))
-    end
-end
-
-"""
-    init_worker(cookie::AbstractString, manager::ClusterManager=DefaultClusterManager())
-
-Called by cluster managers implementing custom transports. It initializes a newly launched
-process as a worker. Command line argument `--worker[=<cookie>]` has the effect of initializing a
-process as a worker using TCP/IP sockets for transport.
-`cookie` is a [`cluster_cookie`](@ref).
-"""
-function init_worker(cookie::AbstractString, manager::ClusterManager=DefaultClusterManager())
-    myrole!(:worker)
-
-    # On workers, the default cluster manager connects via TCP sockets. Custom
-    # transports will need to call this function with their own manager.
-    global cluster_manager
-    cluster_manager = manager
-
-    # Since our pid has yet to be set, ensure no RemoteChannel / Future  have been created or addprocs() called.
-    @assert nprocs() <= 1
-    @assert isempty(PGRP.refs)
-    @assert isempty(client_refs)
-
-    # System is started in head node mode, cleanup related entries
-    empty!(PGRP.workers)
-    empty!(map_pid_wrkr)
-
-    cluster_cookie(cookie)
-    nothing
-end
-
-
-# The main function for adding worker processes.
-# `manager` is of type ClusterManager. The respective managers are responsible
-# for launching the workers. All keyword arguments (plus a few default values)
-# are available as a dictionary to the `launch` methods
-#
-# Only one addprocs can be in progress at any time
-#
-const worker_lock = ReentrantLock()
-
-"""
-    addprocs(manager::ClusterManager; kwargs...) -> List of process identifiers
-
-Launches worker processes via the specified cluster manager.
-
-For example, Beowulf clusters are supported via a custom cluster manager implemented in
-the package `ClusterManagers.jl`.
-
-The number of seconds a newly launched worker waits for connection establishment from the
-master can be specified via variable `JULIA_WORKER_TIMEOUT` in the worker process's
-environment. Relevant only when using TCP/IP as transport.
-
-To launch workers without blocking the REPL, or the containing function
-if launching workers programmatically, execute `addprocs` in its own task.
-
-# Examples
-
-```julia
-# On busy clusters, call `addprocs` asynchronously
-t = @async addprocs(...)
-```
-
-```julia
-# Utilize workers as and when they come online
-if nprocs() > 1   # Ensure at least one new worker is available
-   ....   # perform distributed execution
-end
-```
-
-```julia
-# Retrieve newly launched worker IDs, or any error messages
-if istaskdone(t)   # Check if `addprocs` has completed to ensure `fetch` doesn't block
-    if nworkers() == N
-        new_pids = fetch(t)
-    else
-        fetch(t)
-    end
-end
-```
-"""
-function addprocs(manager::ClusterManager; kwargs...)
-    init_multi()
-
-    cluster_mgmt_from_master_check()
-
-    lock(worker_lock)
-    try
-        addprocs_locked(manager::ClusterManager; kwargs...)
-    finally
-        unlock(worker_lock)
-    end
-end
-
-function addprocs_locked(manager::ClusterManager; kwargs...)
-    params = merge(default_addprocs_params(manager), Dict{Symbol,Any}(kwargs))
-    topology(Symbol(params[:topology]))
-
-    if PGRP.topology !== :all_to_all
-        params[:lazy] = false
-    end
-
-    if PGRP.lazy === nothing || nprocs() == 1
-        PGRP.lazy = params[:lazy]
-    elseif isclusterlazy() != params[:lazy]
-        throw(ArgumentError(string("Active workers with lazy=", isclusterlazy(),
-                                    ". Cannot set lazy=", params[:lazy])))
-    end
-
-    # References to launched workers, filled when each worker is fully initialized and
-    # has connected to all nodes.
-    launched_q = Int[]   # Asynchronously filled by the launch method
-
-    # The `launch` method should add an object of type WorkerConfig for every
-    # worker launched. It provides information required on how to connect
-    # to it.
-
-    # FIXME: launched should be a Channel, launch_ntfy should be a Threads.Condition
-    # but both are part of the public interface. This means we currently can't use
-    # `Threads.@spawn` in the code below.
-    launched = WorkerConfig[]
-    launch_ntfy = Condition()
-
-    # call manager's `launch` is a separate task. This allows the master
-    # process initiate the connection setup process as and when workers come
-    # online
-    t_launch = @async launch(manager, params, launched, launch_ntfy)
-
-    @sync begin
-        while true
-            if isempty(launched)
-                istaskdone(t_launch) && break
-                @async (sleep(1); notify(launch_ntfy))
-                wait(launch_ntfy)
-            end
-
-            if !isempty(launched)
-                wconfig = popfirst!(launched)
-                let wconfig=wconfig
-                    @async setup_launched_worker(manager, wconfig, launched_q)
-                end
-            end
-        end
-    end
-
-    Base.wait(t_launch)      # catches any thrown errors from the launch task
-
-    # Since all worker-to-worker setups may not have completed by the time this
-    # function returns to the caller, send the complete list to all workers.
-    # Useful for nprocs(), nworkers(), etc to return valid values on the workers.
-    all_w = workers()
-    for pid in all_w
-        remote_do(set_valid_processes, pid, all_w)
-    end
-
-    sort!(launched_q)
-end
-
-function set_valid_processes(plist::Array{Int})
-    for pid in setdiff(plist, workers())
-        myid() != pid && Worker(pid)
-    end
-end
-
-"""
-    default_addprocs_params(mgr::ClusterManager) -> Dict{Symbol, Any}
-
-Implemented by cluster managers. The default keyword parameters passed when calling
-`addprocs(mgr)`. The minimal set of options is available by calling
-`default_addprocs_params()`
-"""
-default_addprocs_params(::ClusterManager) = default_addprocs_params()
-default_addprocs_params() = Dict{Symbol,Any}(
-    :topology => :all_to_all,
-    :dir      => pwd(),
-    :exename  => joinpath(Sys.BINDIR, julia_exename()),
-    :exeflags => ``,
-    :env      => [],
-    :enable_threaded_blas => false,
-    :lazy => true)
-
-
-function setup_launched_worker(manager, wconfig, launched_q)
-    pid = create_worker(manager, wconfig)
-    push!(launched_q, pid)
-
-    # When starting workers on remote multi-core hosts, `launch` can (optionally) start only one
-    # process on the remote machine, with a request to start additional workers of the
-    # same type. This is done by setting an appropriate value to `WorkerConfig.cnt`.
-    cnt = something(wconfig.count, 1)
-    if cnt === :auto
-        cnt = wconfig.environ[:cpu_threads]
-    end
-    cnt = cnt - 1   # Removing self from the requested number
-
-    if cnt > 0
-        launch_n_additional_processes(manager, pid, wconfig, cnt, launched_q)
-    end
-end
-
-
-function launch_n_additional_processes(manager, frompid, fromconfig, cnt, launched_q)
-    @sync begin
-        exename = notnothing(fromconfig.exename)
-        exeflags = something(fromconfig.exeflags, ``)
-        cmd = `$exename $exeflags`
-
-        new_addresses = remotecall_fetch(launch_additional, frompid, cnt, cmd)
-        for address in new_addresses
-            (bind_addr, port) = address
-
-            wconfig = WorkerConfig()
-            for x in [:host, :tunnel, :multiplex, :sshflags, :exeflags, :exename, :enable_threaded_blas]
-                Base.setproperty!(wconfig, x, Base.getproperty(fromconfig, x))
-            end
-            wconfig.bind_addr = bind_addr
-            wconfig.port = port
-
-            let wconfig=wconfig
-                @async begin
-                    pid = create_worker(manager, wconfig)
-                    remote_do(redirect_output_from_additional_worker, frompid, pid, port)
-                    push!(launched_q, pid)
-                end
-            end
-        end
-    end
-end
-
-function create_worker(manager, wconfig)
-    # only node 1 can add new nodes, since nobody else has the full list of address:port
-    @assert LPROC.id == 1
-    timeout = worker_timeout()
-
-    # initiate a connect. Does not wait for connection completion in case of TCP.
-    w = Worker()
-    local r_s, w_s
-    try
-        (r_s, w_s) = connect(manager, w.id, wconfig)
-    catch ex
-        try
-            deregister_worker(w.id)
-            kill(manager, w.id, wconfig)
-        finally
-            rethrow(ex)
-        end
-    end
-
-    w = Worker(w.id, r_s, w_s, manager; config=wconfig)
-    # install a finalizer to perform cleanup if necessary
-    finalizer(w) do w
-        if myid() == 1
-            manage(w.manager, w.id, w.config, :finalize)
-        end
-    end
-
-    # set when the new worker has finished connections with all other workers
-    ntfy_oid = RRID()
-    rr_ntfy_join = lookup_ref(ntfy_oid)
-    rr_ntfy_join.waitingfor = myid()
-
-    # Start a new task to handle inbound messages from connected worker in master.
-    # Also calls `wait_connected` on TCP streams.
-    process_messages(w.r_stream, w.w_stream, false)
-
-    # send address information of all workers to the new worker.
-    # Cluster managers set the address of each worker in `WorkerConfig.connect_at`.
-    # A new worker uses this to setup an all-to-all network if topology :all_to_all is specified.
-    # Workers with higher pids connect to workers with lower pids. Except process 1 (master) which
-    # initiates connections to all workers.
-
-    # Connection Setup Protocol:
-    # - Master sends 16-byte cookie followed by 16-byte version string and a JoinPGRP message to all workers
-    # - On each worker
-    #   - Worker responds with a 16-byte version followed by a JoinCompleteMsg
-    #   - Connects to all workers less than its pid. Sends the cookie, version and an IdentifySocket message
-    #   - Workers with incoming connection requests write back their Version and an IdentifySocketAckMsg message
-    # - On master, receiving a JoinCompleteMsg triggers rr_ntfy_join (signifies that worker setup is complete)
-
-    join_list = []
-    if PGRP.topology === :all_to_all
-        # need to wait for lower worker pids to have completed connecting, since the numerical value
-        # of pids is relevant to the connection process, i.e., higher pids connect to lower pids and they
-        # require the value of config.connect_at which is set only upon connection completion
-        for jw in PGRP.workers
-            if (jw.id != 1) && (jw.id < w.id)
-                (jw.state === W_CREATED) && wait(jw.c_state)
-                push!(join_list, jw)
-            end
-        end
-
-    elseif PGRP.topology === :custom
-        # wait for requested workers to be up before connecting to them.
-        filterfunc(x) = (x.id != 1) && isdefined(x, :config) &&
-            (notnothing(x.config.ident) in something(wconfig.connect_idents, []))
-
-        wlist = filter(filterfunc, PGRP.workers)
-        waittime = 0
-        while wconfig.connect_idents !== nothing &&
-              length(wlist) < length(wconfig.connect_idents)
-            if waittime >= timeout
-                error("peer workers did not connect within $timeout seconds")
-            end
-            sleep(1.0)
-            waittime += 1
-            wlist = filter(filterfunc, PGRP.workers)
-        end
-
-        for wl in wlist
-            (wl.state === W_CREATED) && wait(wl.c_state)
-            push!(join_list, wl)
-        end
-    end
-
-    all_locs = mapany(x -> isa(x, Worker) ?
-                      (something(x.config.connect_at, ()), x.id) :
-                      ((), x.id, true),
-                      join_list)
-    send_connection_hdr(w, true)
-    enable_threaded_blas = something(wconfig.enable_threaded_blas, false)
-    join_message = JoinPGRPMsg(w.id, all_locs, PGRP.topology, enable_threaded_blas, isclusterlazy())
-    send_msg_now(w, MsgHeader(RRID(0,0), ntfy_oid), join_message)
-
-    @async manage(w.manager, w.id, w.config, :register)
-    # wait for rr_ntfy_join with timeout
-    timedout = false
-    @async (sleep($timeout); timedout = true; put!(rr_ntfy_join, 1))
-    wait(rr_ntfy_join)
-    if timedout
-        error("worker did not connect within $timeout seconds")
-    end
-    lock(client_refs) do
-        delete!(PGRP.refs, ntfy_oid)
-    end
-
-    return w.id
-end
-
-
-# Called on the first worker on a remote host. Used to optimize launching
-# of multiple workers on a remote host (to leverage multi-core)
-
-additional_io_objs=Dict()
-function launch_additional(np::Integer, cmd::Cmd)
-    io_objs = Vector{Any}(undef, np)
-    addresses = Vector{Any}(undef, np)
-
-    for i in 1:np
-        io = open(detach(cmd), "r+")
-        write_cookie(io)
-        io_objs[i] = io.out
-    end
-
-    for (i,io) in enumerate(io_objs)
-        (host, port) = read_worker_host_port(io)
-        addresses[i] = (host, port)
-        additional_io_objs[port] = io
-    end
-
-    return addresses
-end
-
-function redirect_output_from_additional_worker(pid, port)
-    io = additional_io_objs[port]
-    redirect_worker_output("$pid", io)
-    delete!(additional_io_objs, port)
-    nothing
-end
-
-function check_master_connect()
-    timeout = worker_timeout() * 1e9
-    # If we do not have at least process 1 connect to us within timeout
-    # we log an error and exit, unless we're running on valgrind
-    if ccall(:jl_running_on_valgrind,Cint,()) != 0
-        return
-    end
-    @async begin
-        start = time_ns()
-        while !haskey(map_pid_wrkr, 1) && (time_ns() - start) < timeout
-            sleep(1.0)
-        end
-
-        if !haskey(map_pid_wrkr, 1)
-            print(stderr, "Master process (id 1) could not connect within $(timeout/1e9) seconds.\nexiting.\n")
-            exit(1)
-        end
-    end
-end
-
-
-"""
-    cluster_cookie() -> cookie
-
-Return the cluster cookie.
-"""
-cluster_cookie() = (init_multi(); LPROC.cookie)
-
-"""
-    cluster_cookie(cookie) -> cookie
-
-Set the passed cookie as the cluster cookie, then returns it.
-"""
-function cluster_cookie(cookie)
-    init_multi()
-    # The cookie must be an ASCII string with length <=  HDR_COOKIE_LEN
-    @assert isascii(cookie)
-    @assert length(cookie) <= HDR_COOKIE_LEN
-
-    cookie = rpad(cookie, HDR_COOKIE_LEN)
-
-    LPROC.cookie = cookie
-    cookie
-end
-
-
-let next_pid = 2    # 1 is reserved for the client (always)
-    global get_next_pid
-    function get_next_pid()
-        retval = next_pid
-        next_pid += 1
-        retval
-    end
-end
-
-mutable struct ProcessGroup
-    name::String
-    workers::Array{Any,1}
-    refs::Dict{RRID,Any}                  # global references
-    topology::Symbol
-    lazy::Union{Bool, Nothing}
-
-    ProcessGroup(w::Array{Any,1}) = new("pg-default", w, Dict(), :all_to_all, nothing)
-end
-const PGRP = ProcessGroup([])
-
-function topology(t)
-    @assert t in [:all_to_all, :master_worker, :custom]
-    if (PGRP.topology==t) || ((myid()==1) && (nprocs()==1)) || (myid() > 1)
-        PGRP.topology = t
-    else
-        error("Workers with Topology $(PGRP.topology) already exist. Requested Topology $(t) cannot be set.")
-    end
-    t
-end
-
-isclusterlazy() = something(PGRP.lazy, false)
-
-get_bind_addr(pid::Integer) = get_bind_addr(worker_from_id(pid))
-get_bind_addr(w::LocalProcess) = LPROC.bind_addr
-function get_bind_addr(w::Worker)
-    if w.config.bind_addr === nothing
-        if w.id != myid()
-            w.config.bind_addr = remotecall_fetch(get_bind_addr, w.id, w.id)
-        end
-    end
-    w.config.bind_addr
-end
-
-# globals
-const LPROC = LocalProcess()
-const LPROCROLE = Ref{Symbol}(:master)
-const HDR_VERSION_LEN=16
-const HDR_COOKIE_LEN=16
-const map_pid_wrkr = Dict{Int, Union{Worker, LocalProcess}}()
-const map_sock_wrkr = IdDict()
-const map_del_wrkr = Set{Int}()
-
-# whether process is a master or worker in a distributed setup
-myrole() = LPROCROLE[]
-function myrole!(proctype::Symbol)
-    LPROCROLE[] = proctype
-end
-
-# cluster management related API
-"""
-    myid()
-
-Get the id of the current process.
-
-# Examples
-```julia-repl
-julia> myid()
-1
-
-julia> remotecall_fetch(() -> myid(), 4)
-4
-```
-"""
-myid() = LPROC.id
-
-"""
-    nprocs()
-
-Get the number of available processes.
-
-# Examples
-```julia-repl
-julia> nprocs()
-3
-
-julia> workers()
-2-element Array{Int64,1}:
- 2
- 3
-```
-"""
-function nprocs()
-    if myid() == 1 || (PGRP.topology === :all_to_all && !isclusterlazy())
-        n = length(PGRP.workers)
-        # filter out workers in the process of being setup/shutdown.
-        for jw in PGRP.workers
-            if !isa(jw, LocalProcess) && (jw.state !== W_CONNECTED)
-                n = n - 1
-            end
-        end
-        return n
-    else
-        return length(PGRP.workers)
-    end
-end
-
-"""
-    nworkers()
-
-Get the number of available worker processes. This is one less than [`nprocs()`](@ref). Equal to
-`nprocs()` if `nprocs() == 1`.
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> nprocs()
-3
-
-julia> nworkers()
-2
-```
-"""
-function nworkers()
-    n = nprocs()
-    n == 1 ? 1 : n-1
-end
-
-"""
-    procs()
-
-Return a list of all process identifiers, including pid 1 (which is not included by [`workers()`](@ref)).
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> procs()
-3-element Array{Int64,1}:
- 1
- 2
- 3
-```
-"""
-function procs()
-    if myid() == 1 || (PGRP.topology === :all_to_all  && !isclusterlazy())
-        # filter out workers in the process of being setup/shutdown.
-        return Int[x.id for x in PGRP.workers if isa(x, LocalProcess) || (x.state === W_CONNECTED)]
-    else
-        return Int[x.id for x in PGRP.workers]
-    end
-end
-
-function id_in_procs(id)  # faster version of `id in procs()`
-    if myid() == 1 || (PGRP.topology === :all_to_all  && !isclusterlazy())
-        for x in PGRP.workers
-            if (x.id::Int) == id && (isa(x, LocalProcess) || (x::Worker).state === W_CONNECTED)
-                return true
-            end
-        end
-    else
-        for x in PGRP.workers
-            if (x.id::Int) == id
-                return true
-            end
-        end
-    end
-    return false
-end
-
-"""
-    procs(pid::Integer)
-
-Return a list of all process identifiers on the same physical node.
-Specifically all workers bound to the same ip-address as `pid` are returned.
-"""
-function procs(pid::Integer)
-    if myid() == 1
-        all_workers = [x for x in PGRP.workers if isa(x, LocalProcess) || (x.state === W_CONNECTED)]
-        if (pid == 1) || (isa(map_pid_wrkr[pid].manager, LocalManager))
-            Int[x.id for x in filter(w -> (w.id==1) || (isa(w.manager, LocalManager)), all_workers)]
-        else
-            ipatpid = get_bind_addr(pid)
-            Int[x.id for x in filter(w -> get_bind_addr(w) == ipatpid, all_workers)]
-        end
-    else
-        remotecall_fetch(procs, 1, pid)
-    end
-end
-
-"""
-    workers()
-
-Return a list of all worker process identifiers.
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> workers()
-2-element Array{Int64,1}:
- 2
- 3
-```
-"""
-function workers()
-    allp = procs()
-    if length(allp) == 1
-       allp
-    else
-       filter(x -> x != 1, allp)
-    end
-end
-
-function cluster_mgmt_from_master_check()
-    if myid() != 1
-        throw(ErrorException("Only process 1 can add and remove workers"))
-    end
-end
-
-"""
-    rmprocs(pids...; waitfor=typemax(Int))
-
-Remove the specified workers. Note that only process 1 can add or remove
-workers.
-
-Argument `waitfor` specifies how long to wait for the workers to shut down:
-  - If unspecified, `rmprocs` will wait until all requested `pids` are removed.
-  - An [`ErrorException`](@ref) is raised if all workers cannot be terminated before
-    the requested `waitfor` seconds.
-  - With a `waitfor` value of 0, the call returns immediately with the workers
-    scheduled for removal in a different task. The scheduled [`Task`](@ref) object is
-    returned. The user should call [`wait`](@ref) on the task before invoking any other
-    parallel calls.
-
-# Examples
-```julia-repl
-\$ julia -p 5
-
-julia> t = rmprocs(2, 3, waitfor=0)
-Task (runnable) @0x0000000107c718d0
-
-julia> wait(t)
-
-julia> workers()
-3-element Array{Int64,1}:
- 4
- 5
- 6
-```
-"""
-function rmprocs(pids...; waitfor=typemax(Int))
-    cluster_mgmt_from_master_check()
-
-    pids = vcat(pids...)
-    if waitfor == 0
-        t = @async _rmprocs(pids, typemax(Int))
-        yield()
-        return t
-    else
-        _rmprocs(pids, waitfor)
-        # return a dummy task object that user code can wait on.
-        return @async nothing
-    end
-end
-
-function _rmprocs(pids, waitfor)
-    lock(worker_lock)
-    try
-        rmprocset = Union{LocalProcess, Worker}[]
-        for p in pids
-            if p == 1
-                @warn "rmprocs: process 1 not removed"
-            else
-                if haskey(map_pid_wrkr, p)
-                    w = map_pid_wrkr[p]
-                    set_worker_state(w, W_TERMINATING)
-                    kill(w.manager, p, w.config)
-                    push!(rmprocset, w)
-                end
-            end
-        end
-
-        start = time_ns()
-        while (time_ns() - start) < waitfor*1e9
-            all(w -> w.state === W_TERMINATED, rmprocset) && break
-            sleep(min(0.1, waitfor - (time_ns() - start)/1e9))
-        end
-
-        unremoved = [wrkr.id for wrkr in filter(w -> w.state !== W_TERMINATED, rmprocset)]
-        if length(unremoved) > 0
-            estr = string("rmprocs: pids ", unremoved, " not terminated after ", waitfor, " seconds.")
-            throw(ErrorException(estr))
-        end
-    finally
-        unlock(worker_lock)
-    end
-end
-
-
-"""
-    ProcessExitedException(worker_id::Int)
-
-After a client Julia process has exited, further attempts to reference the dead child will
-throw this exception.
-"""
-struct ProcessExitedException <: Exception
-    worker_id::Int
-end
-
-# No-arg constructor added for compatibility with Julia 1.0 & 1.1, should be deprecated in the future
-ProcessExitedException() = ProcessExitedException(-1)
-
-worker_from_id(i) = worker_from_id(PGRP, i)
-function worker_from_id(pg::ProcessGroup, i)
-    if !isempty(map_del_wrkr) && in(i, map_del_wrkr)
-        throw(ProcessExitedException(i))
-    end
-    w = get(map_pid_wrkr, i, nothing)
-    if w === nothing
-        if myid() == 1
-            error("no process with id $i exists")
-        end
-        w = Worker(i)
-        map_pid_wrkr[i] = w
-    else
-        w = w::Union{Worker, LocalProcess}
-    end
-    w
-end
-
-"""
-    worker_id_from_socket(s) -> pid
-
-A low-level API which, given a `IO` connection or a `Worker`,
-returns the `pid` of the worker it is connected to.
-This is useful when writing custom [`serialize`](@ref) methods for a type,
-which optimizes the data written out depending on the receiving process id.
-"""
-function worker_id_from_socket(s)
-    w = get(map_sock_wrkr, s, nothing)
-    if isa(w,Worker)
-        if s === w.r_stream || s === w.w_stream
-            return w.id
-        end
-    end
-    if isa(s,IOStream) && fd(s)==-1
-        # serializing to a local buffer
-        return myid()
-    end
-    return -1
-end
-
-
-register_worker(w) = register_worker(PGRP, w)
-function register_worker(pg, w)
-    push!(pg.workers, w)
-    map_pid_wrkr[w.id] = w
-end
-
-function register_worker_streams(w)
-    map_sock_wrkr[w.r_stream] = w
-    map_sock_wrkr[w.w_stream] = w
-end
-
-deregister_worker(pid) = deregister_worker(PGRP, pid)
-function deregister_worker(pg, pid)
-    pg.workers = filter(x -> !(x.id == pid), pg.workers)
-    w = pop!(map_pid_wrkr, pid, nothing)
-    if isa(w, Worker)
-        if isdefined(w, :r_stream)
-            pop!(map_sock_wrkr, w.r_stream, nothing)
-            if w.r_stream != w.w_stream
-                pop!(map_sock_wrkr, w.w_stream, nothing)
-            end
-        end
-
-        if myid() == 1 && (myrole() === :master) && isdefined(w, :config)
-            # Notify the cluster manager of this workers death
-            manage(w.manager, w.id, w.config, :deregister)
-            if PGRP.topology !== :all_to_all || isclusterlazy()
-                for rpid in workers()
-                    try
-                        remote_do(deregister_worker, rpid, pid)
-                    catch
-                    end
-                end
-            end
-        end
-    end
-    push!(map_del_wrkr, pid)
-
-    # delete this worker from our remote reference client sets
-    ids = []
-    tonotify = []
-    lock(client_refs) do
-        for (id, rv) in pg.refs
-            if in(pid, rv.clientset)
-                push!(ids, id)
-            end
-            if rv.waitingfor == pid
-                push!(tonotify, (id, rv))
-            end
-        end
-        for id in ids
-            del_client(pg, id, pid)
-        end
-
-        # throw exception to tasks waiting for this pid
-        for (id, rv) in tonotify
-            close(rv.c, ProcessExitedException(pid))
-            delete!(pg.refs, id)
-        end
-    end
-    return
-end
-
-
-function interrupt(pid::Integer)
-    @assert myid() == 1
-    w = map_pid_wrkr[pid]
-    if isa(w, Worker)
-        manage(w.manager, w.id, w.config, :interrupt)
-    end
-    return
-end
-
-"""
-    interrupt(pids::Integer...)
-
-Interrupt the current executing task on the specified workers. This is equivalent to
-pressing Ctrl-C on the local machine. If no arguments are given, all workers are interrupted.
-"""
-interrupt(pids::Integer...) = interrupt([pids...])
-
-"""
-    interrupt(pids::AbstractVector=workers())
-
-Interrupt the current executing task on the specified workers. This is equivalent to
-pressing Ctrl-C on the local machine. If no arguments are given, all workers are interrupted.
-"""
-function interrupt(pids::AbstractVector=workers())
-    @assert myid() == 1
-    @sync begin
-        for pid in pids
-            @async interrupt(pid)
-        end
-    end
-end
-
-wp_bind_addr(p::LocalProcess) = p.bind_addr
-wp_bind_addr(p) = p.config.bind_addr
-
-function check_same_host(pids)
-    if myid() != 1
-        return remotecall_fetch(check_same_host, 1, pids)
-    else
-        # We checkfirst if all test pids have been started using the local manager,
-        # else we check for the same bind_to addr. This handles the special case
-        # where the local ip address may change - as during a system sleep/awake
-        if all(p -> (p==1) || (isa(map_pid_wrkr[p].manager, LocalManager)), pids)
-            return true
-        else
-            first_bind_addr = notnothing(wp_bind_addr(map_pid_wrkr[pids[1]]))
-            return all(p -> notnothing(wp_bind_addr(map_pid_wrkr[p])) == first_bind_addr, pids[2:end])
-        end
-    end
-end
-
-function terminate_all_workers()
-    myid() != 1 && return
-
-    if nprocs() > 1
-        try
-            rmprocs(workers(); waitfor=5.0)
-        catch _ex
-            @warn "Forcibly interrupting busy workers" exception=_ex
-            # Might be computation bound, interrupt them and try again
-            interrupt(workers())
-            try
-                rmprocs(workers(); waitfor=5.0)
-            catch _ex2
-                @error "Unable to terminate all workers" exception=_ex2,catch_backtrace()
-            end
-        end
-    end
-end
-
-# initialize the local proc network address / port
-function init_bind_addr()
-    opts = JLOptions()
-    if opts.bindto != C_NULL
-        bind_to = split(unsafe_string(opts.bindto), ":")
-        bind_addr = string(parse(IPAddr, bind_to[1]))
-        if length(bind_to) > 1
-            bind_port = parse(Int,bind_to[2])
-        else
-            bind_port = 0
-        end
-    else
-        bind_port = 0
-        try
-            bind_addr = string(getipaddr())
-        catch
-            # All networking is unavailable, initialize bind_addr to the loopback address
-            # Will cause an exception to be raised only when used.
-            bind_addr = "127.0.0.1"
-        end
-    end
-    global LPROC
-    LPROC.bind_addr = bind_addr
-    LPROC.bind_port = UInt16(bind_port)
-end
-
-using Random: randstring
-
-let inited = false
-    # do initialization that's only needed when there is more than 1 processor
-    global function init_multi()
-        if !inited
-            inited = true
-            push!(Base.package_callbacks, _require_callback)
-            atexit(terminate_all_workers)
-            init_bind_addr()
-            cluster_cookie(randstring(HDR_COOKIE_LEN))
-        end
-        return nothing
-    end
-end
-
-function init_parallel()
-    start_gc_msgs_task()
-
-    # start in "head node" mode, if worker, will override later.
-    global PGRP
-    global LPROC
-    LPROC.id = 1
-    @assert isempty(PGRP.workers)
-    register_worker(LPROC)
-end
-
-write_cookie(io::IO) = print(io.in, string(cluster_cookie(), "\n"))
-
-function get_threads_spec(opts)
-    if opts.nthreads > 0
-        @assert opts.nthreadpools >= 1
-        @assert opts.nthreads_per_pool != C_NULL
-        thr = "$(unsafe_load(opts.nthreads_per_pool))"
-        if opts.nthreadpools == 2
-            thr = "$(thr),$(unsafe_load(opts.nthreads_per_pool, 2))"
-        end
-        `--threads=$(thr)`
-    else
-        ``
-    end
-end
-
-function get_gcthreads_spec(opts)
-    if opts.nmarkthreads > 0 || opts.nsweepthreads > 0
-        `--gcthreads=$(opts.nmarkthreads),$(opts.nsweepthreads)`
-    else
-        ``
-    end
-end
-
-# Starts workers specified by (-n|--procs) and --machine-file command line options
-function process_opts(opts)
-    # startup worker.
-    # opts.startupfile, opts.load, etc should should not be processed for workers.
-    if opts.worker == 1
-        # does not return
-        if opts.cookie != C_NULL
-            start_worker(unsafe_string(opts.cookie))
-        else
-            start_worker()
-        end
-    end
-
-    # Propagate --threads to workers
-    threads = get_threads_spec(opts)
-    # Propagate --gcthreads to workers
-    gcthreads = get_gcthreads_spec(opts)
-
-    exeflags = `$threads $gcthreads`
-
-    # add processors
-    if opts.nprocs > 0
-        addprocs(opts.nprocs; exeflags=exeflags)
-    end
-
-    # load processes from machine file
-    if opts.machine_file != C_NULL
-        addprocs(load_machine_file(unsafe_string(opts.machine_file)); exeflags=exeflags)
-    end
-    return nothing
-end
-
-
-function load_machine_file(path::AbstractString)
-    machines = []
-    for line in split(read(path, String),'\n'; keepempty=false)
-        s = split(line, '*'; keepempty=false)
-        map!(strip, s, s)
-        if length(s) > 1
-            cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
-            push!(machines,(s[2], cnt))
-        else
-            push!(machines,line)
-        end
-    end
-    return machines
-end
diff --git a/stdlib/Distributed/src/clusterserialize.jl b/stdlib/Distributed/src/clusterserialize.jl
deleted file mode 100644
index 0acd4ce68c45b..0000000000000
--- a/stdlib/Distributed/src/clusterserialize.jl
+++ /dev/null
@@ -1,254 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Serialization: serialize_cycle, deserialize_cycle, writetag,
-                     serialize_typename, deserialize_typename,
-                     TYPENAME_TAG, TASK_TAG, reset_state, serialize_type
-using Serialization.__deserialized_types__
-
-import Serialization: object_number, lookup_object_number, remember_object
-
-mutable struct ClusterSerializer{I<:IO} <: AbstractSerializer
-    io::I
-    counter::Int
-    table::IdDict{Any,Any}
-    pending_refs::Vector{Int}
-
-    pid::Int                                     # Worker we are connected to.
-    tn_obj_sent::Set{UInt64}                     # TypeName objects sent
-    glbs_sent::Dict{Symbol, Tuple{UInt64, UInt64}}   # (key,value) -> (symbol, (hash_value, objectid))
-    glbs_in_tnobj::Dict{UInt64, Vector{Symbol}}  # Track globals referenced in
-                                                 # anonymous functions.
-    anonfunc_id::UInt64
-
-    function ClusterSerializer{I}(io::I) where I<:IO
-        new(io, 0, IdDict(), Int[], worker_id_from_socket(io),
-            Set{UInt64}(), Dict{UInt64, UInt64}(), Dict{UInt64, Vector{Symbol}}(), 0)
-    end
-end
-ClusterSerializer(io::IO) = ClusterSerializer{typeof(io)}(io)
-
-const object_numbers = WeakKeyDict()
-const obj_number_salt = Ref(0)
-function object_number(s::ClusterSerializer, @nospecialize(l))
-    global obj_number_salt, object_numbers
-    if haskey(object_numbers, l)
-        return object_numbers[l]
-    end
-    # a hash function that always gives the same number to the same
-    # object on the same machine, and is unique over all machines.
-    ln = obj_number_salt[]+(UInt64(myid())<<44)
-    obj_number_salt[] += 1
-    object_numbers[l] = ln
-    return ln::UInt64
-end
-
-const known_object_data = Dict{UInt64,Any}()
-
-function lookup_object_number(s::ClusterSerializer, n::UInt64)
-    return get(known_object_data, n, nothing)
-end
-
-function remember_object(s::ClusterSerializer, @nospecialize(o), n::UInt64)
-    known_object_data[n] = o
-    if isa(o, Core.TypeName) && !haskey(object_numbers, o)
-        # set up reverse mapping for serialize
-        object_numbers[o] = n
-    end
-    return nothing
-end
-
-function deserialize(s::ClusterSerializer, ::Type{Core.TypeName})
-    full_body_sent = deserialize(s)
-    number = read(s.io, UInt64)
-    if !full_body_sent
-        tn = lookup_object_number(s, number)::Core.TypeName
-        remember_object(s, tn, number)
-        deserialize_cycle(s, tn)
-    else
-        tn = deserialize_typename(s, number)
-    end
-
-    # retrieve arrays of global syms sent if any and deserialize them all.
-    foreach(sym->deserialize_global_from_main(s, sym), deserialize(s))
-    return tn
-end
-
-function serialize(s::ClusterSerializer, t::Core.TypeName)
-    serialize_cycle(s, t) && return
-    writetag(s.io, TYPENAME_TAG)
-
-    identifier = object_number(s, t)
-    send_whole = !(identifier in s.tn_obj_sent)
-    serialize(s, send_whole)
-    write(s.io, identifier)
-    if send_whole
-        # Track globals referenced in this anonymous function.
-        # This information is used to resend modified globals when we
-        # only send the identifier.
-        prev = s.anonfunc_id
-        s.anonfunc_id = identifier
-        serialize_typename(s, t)
-        s.anonfunc_id = prev
-        push!(s.tn_obj_sent, identifier)
-        finalizer(t) do x
-            cleanup_tname_glbs(s, identifier)
-        end
-    end
-
-    # Send global refs if required.
-    syms = syms_2b_sent(s, identifier)
-    serialize(s, syms)
-    foreach(sym->serialize_global_from_main(s, sym), syms)
-    nothing
-end
-
-function serialize(s::ClusterSerializer, g::GlobalRef)
-    # Record if required and then invoke the default GlobalRef serializer.
-    sym = g.name
-    if g.mod === Main && isdefined(g.mod, sym)
-        if (binding_module(Main, sym) === Main) && (s.anonfunc_id != 0) &&
-            !startswith(string(sym), "#") # Anonymous functions are handled via FULL_GLOBALREF_TAG
-
-            push!(get!(s.glbs_in_tnobj, s.anonfunc_id, []), sym)
-        end
-    end
-
-    invoke(serialize, Tuple{AbstractSerializer, GlobalRef}, s, g)
-end
-
-# Send/resend a global binding if
-# a) has not been sent previously, i.e., we are seeing this binding for the first time, or,
-# b) hash value has changed or
-# c) hash value is same but of a different object, i.e. objectid has changed or
-# d) is a bits type
-function syms_2b_sent(s::ClusterSerializer, identifier)
-    lst = Symbol[]
-    check_syms = get(s.glbs_in_tnobj, identifier, Symbol[])
-    for sym in check_syms
-        v = getfield(Main, sym)
-
-        if isbits(v)
-            push!(lst, sym)
-        else
-            if haskey(s.glbs_sent, sym)
-                # We have sent this binding before, see if it has changed.
-                hval, oid = s.glbs_sent[sym]
-                if hval != hash(sym, hash(v)) || oid != objectid(v)
-                    push!(lst, sym)
-                end
-            else
-                push!(lst, sym)
-            end
-        end
-    end
-    return unique(lst)
-end
-
-function serialize_global_from_main(s::ClusterSerializer, sym)
-    v = getfield(Main, sym)
-
-    if !isbits(v)
-        s.glbs_sent[sym] = (hash(sym, hash(v)), objectid(v))
-    end
-
-    serialize(s, isconst(Main, sym))
-    serialize(s, v)
-end
-
-function deserialize_global_from_main(s::ClusterSerializer, sym)
-    sym_isconst = deserialize(s)
-    v = deserialize(s)
-    if isdefined(Main, sym) && (sym_isconst || isconst(Main, sym))
-        if isequal(getfield(Main, sym), v)
-            # same value; ok
-            return nothing
-        else
-            @warn "Cannot transfer global variable $sym; it already has a value."
-            return nothing
-        end
-    end
-    if sym_isconst
-        ccall(:jl_set_const, Cvoid, (Any, Any, Any), Main, sym, v)
-    else
-        setglobal!(Main, sym, v)
-    end
-    return nothing
-end
-
-function cleanup_tname_glbs(s::ClusterSerializer, identifier)
-    delete!(s.glbs_in_tnobj, identifier)
-end
-
-# TODO: cleanup from s.tn_obj_sent
-
-
-# Specialized serialize-deserialize implementations for CapturedException to partially
-# recover from any deserialization errors in `CapturedException.ex`
-
-function serialize(s::ClusterSerializer, ex::CapturedException)
-    serialize_type(s, typeof(ex))
-    serialize(s, string(typeof(ex.ex))) # String type should not result in a deser error
-    serialize(s, ex.processed_bt)       # Currently should not result in a deser error
-    serialize(s, ex.ex)                 # can result in a UndefVarError on the remote node
-                                        # if a type used in ex.ex is undefined on the remote node.
-end
-
-function original_ex(s::ClusterSerializer, ex_str, remote_stktrace)
-    local pid_str = ""
-    try
-        pid_str = string(" from worker ", worker_id_from_socket(s.io))
-    catch
-    end
-
-    stk_str = remote_stktrace ? "Remote" : "Local"
-    ErrorException(string("Error deserializing a remote exception", pid_str, "\n",
-                          "Remote(original) exception of type ", ex_str, "\n",
-                          stk_str,  " stacktrace : "))
-end
-
-function deserialize(s::ClusterSerializer, t::Type{<:CapturedException})
-    ex_str = deserialize(s)
-    local bt
-    local capex
-    try
-        bt = deserialize(s)
-    catch e
-        throw(CompositeException([
-            original_ex(s, ex_str, false),
-            CapturedException(e, catch_backtrace())
-        ]))
-    end
-
-    try
-        capex = deserialize(s)
-    catch e
-        throw(CompositeException([
-            CapturedException(original_ex(s, ex_str, true), bt),
-            CapturedException(e, catch_backtrace())
-        ]))
-    end
-
-    return CapturedException(capex, bt)
-end
-
-"""
-    clear!(syms, pids=workers(); mod=Main)
-
-Clears global bindings in modules by initializing them to `nothing`.
-`syms` should be of type [`Symbol`](@ref) or a collection of `Symbol`s . `pids` and `mod`
-identify the processes and the module in which global variables are to be
-reinitialized. Only those names found to be defined under `mod` are cleared.
-
-An exception is raised if a global constant is requested to be cleared.
-"""
-function clear!(syms, pids=workers(); mod=Main)
-    @sync for p in pids
-        @async_unwrap remotecall_wait(clear_impl!, p, syms, mod)
-    end
-end
-clear!(sym::Symbol, pid::Int; mod=Main) = clear!([sym], [pid]; mod=mod)
-clear!(sym::Symbol, pids=workers(); mod=Main) = clear!([sym], pids; mod=mod)
-clear!(syms, pid::Int; mod=Main) = clear!(syms, [pid]; mod=mod)
-
-clear_impl!(syms, mod::Module) = foreach(x->clear_impl!(x,mod), syms)
-clear_impl!(sym::Symbol, mod::Module) = isdefined(mod, sym) && @eval(mod, global $sym = nothing)
diff --git a/stdlib/Distributed/src/macros.jl b/stdlib/Distributed/src/macros.jl
deleted file mode 100644
index a767c7a40d9c9..0000000000000
--- a/stdlib/Distributed/src/macros.jl
+++ /dev/null
@@ -1,361 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-let nextidx = Threads.Atomic{Int}(0)
-    global nextproc
-    function nextproc()
-        idx = Threads.atomic_add!(nextidx, 1)
-        return workers()[(idx % nworkers()) + 1]
-    end
-end
-
-spawnat(p, thunk) = remotecall(thunk, p)
-
-spawn_somewhere(thunk) = spawnat(nextproc(),thunk)
-
-"""
-    @spawn expr
-
-Create a closure around an expression and run it on an automatically-chosen process,
-returning a [`Future`](@ref) to the result.
-This macro is deprecated; `@spawnat :any expr` should be used instead.
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> f = @spawn myid()
-Future(2, 1, 5, nothing)
-
-julia> fetch(f)
-2
-
-julia> f = @spawn myid()
-Future(3, 1, 7, nothing)
-
-julia> fetch(f)
-3
-```
-
-!!! compat "Julia 1.3"
-    As of Julia 1.3 this macro is deprecated. Use `@spawnat :any` instead.
-"""
-macro spawn(expr)
-    thunk = esc(:(()->($expr)))
-    var = esc(Base.sync_varname)
-    quote
-        local ref = spawn_somewhere($thunk)
-        if $(Expr(:islocal, var))
-            put!($var, ref)
-        end
-        ref
-    end
-end
-
-"""
-    @spawnat p expr
-
-Create a closure around an expression and run the closure
-asynchronously on process `p`. Return a [`Future`](@ref) to the result.
-If `p` is the quoted literal symbol `:any`, then the system will pick a
-processor to use automatically.
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> f = @spawnat 2 myid()
-Future(2, 1, 3, nothing)
-
-julia> fetch(f)
-2
-
-julia> f = @spawnat :any myid()
-Future(3, 1, 7, nothing)
-
-julia> fetch(f)
-3
-```
-
-!!! compat "Julia 1.3"
-    The `:any` argument is available as of Julia 1.3.
-"""
-macro spawnat(p, expr)
-    thunk = esc(:(()->($expr)))
-    var = esc(Base.sync_varname)
-    if p === QuoteNode(:any)
-        spawncall = :(spawn_somewhere($thunk))
-    else
-        spawncall = :(spawnat($(esc(p)), $thunk))
-    end
-    quote
-        local ref = $spawncall
-        if $(Expr(:islocal, var))
-            put!($var, ref)
-        end
-        ref
-    end
-end
-
-"""
-    @fetch expr
-
-Equivalent to `fetch(@spawnat :any expr)`.
-See [`fetch`](@ref) and [`@spawnat`](@ref).
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> @fetch myid()
-2
-
-julia> @fetch myid()
-3
-
-julia> @fetch myid()
-4
-
-julia> @fetch myid()
-2
-```
-"""
-macro fetch(expr)
-    thunk = esc(:(()->($expr)))
-    :(remotecall_fetch($thunk, nextproc()))
-end
-
-"""
-    @fetchfrom
-
-Equivalent to `fetch(@spawnat p expr)`.
-See [`fetch`](@ref) and [`@spawnat`](@ref).
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> @fetchfrom 2 myid()
-2
-
-julia> @fetchfrom 4 myid()
-4
-```
-"""
-macro fetchfrom(p, expr)
-    thunk = esc(:(()->($expr)))
-    :(remotecall_fetch($thunk, $(esc(p))))
-end
-
-# extract a list of modules to import from an expression
-extract_imports!(imports, x) = imports
-function extract_imports!(imports, ex::Expr)
-    if Meta.isexpr(ex, (:import, :using))
-        push!(imports, ex)
-    elseif Meta.isexpr(ex, :let)
-        extract_imports!(imports, ex.args[2])
-    elseif Meta.isexpr(ex, (:toplevel, :block))
-        for arg in ex.args
-            extract_imports!(imports, arg)
-        end
-    end
-    return imports
-end
-extract_imports(x) = extract_imports!(Any[], x)
-
-"""
-    @everywhere [procs()] expr
-
-Execute an expression under `Main` on all `procs`.
-Errors on any of the processes are collected into a
-[`CompositeException`](@ref) and thrown. For example:
-
-    @everywhere bar = 1
-
-will define `Main.bar` on all current processes. Any processes added later
-(say with [`addprocs()`](@ref)) will not have the expression defined.
-
-Unlike [`@spawnat`](@ref), `@everywhere` does not capture any local variables.
-Instead, local variables can be broadcast using interpolation:
-
-    foo = 1
-    @everywhere bar = \$foo
-
-The optional argument `procs` allows specifying a subset of all
-processes to have execute the expression.
-
-Similar to calling `remotecall_eval(Main, procs, expr)`, but with two extra features:
-
-    - `using` and `import` statements run on the calling process first, to ensure
-      packages are precompiled.
-    - The current source file path used by `include` is propagated to other processes.
-"""
-macro everywhere(ex)
-    procs = GlobalRef(@__MODULE__, :procs)
-    return esc(:($(Distributed).@everywhere $procs() $ex))
-end
-
-macro everywhere(procs, ex)
-    imps = extract_imports(ex)
-    return quote
-        $(isempty(imps) ? nothing : Expr(:toplevel, imps...)) # run imports locally first
-        let ex = Expr(:toplevel, :(task_local_storage()[:SOURCE_PATH] = $(get(task_local_storage(), :SOURCE_PATH, nothing))), $(esc(Expr(:quote, ex)))),
-            procs = $(esc(procs))
-            remotecall_eval(Main, procs, ex)
-        end
-    end
-end
-
-"""
-    remotecall_eval(m::Module, procs, expression)
-
-Execute an expression under module `m` on the processes
-specified in `procs`.
-Errors on any of the processes are collected into a
-[`CompositeException`](@ref) and thrown.
-
-See also [`@everywhere`](@ref).
-"""
-function remotecall_eval(m::Module, procs, ex)
-    @sync begin
-        run_locally = 0
-        for pid in procs
-            if pid == myid()
-                run_locally += 1
-            else
-                @async_unwrap remotecall_wait(Core.eval, pid, m, ex)
-            end
-        end
-        yield() # ensure that the remotecalls have had a chance to start
-
-        # execute locally last as we do not want local execution to block serialization
-        # of the request to remote nodes.
-        for _ in 1:run_locally
-            @async Core.eval(m, ex)
-        end
-    end
-    nothing
-end
-
-# optimized version of remotecall_eval for a single pid
-# and which also fetches the return value
-function remotecall_eval(m::Module, pid::Int, ex)
-    return remotecall_fetch(Core.eval, pid, m, ex)
-end
-
-
-# Statically split range [firstIndex,lastIndex] into equal sized chunks for np processors
-function splitrange(firstIndex::Int, lastIndex::Int, np::Int)
-    each, extras = divrem(lastIndex-firstIndex+1, np)
-    nchunks = each > 0 ? np : extras
-    chunks = Vector{UnitRange{Int}}(undef, nchunks)
-    lo = firstIndex
-    for i in 1:nchunks
-        hi = lo + each - 1
-        if extras > 0
-            hi += 1
-            extras -= 1
-        end
-        chunks[i] = lo:hi
-        lo = hi+1
-    end
-    return chunks
-end
-
-function preduce(reducer, f, R)
-    chunks = splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
-    all_w = workers()[1:length(chunks)]
-
-    w_exec = Task[]
-    for (idx,pid) in enumerate(all_w)
-        t = Task(()->remotecall_fetch(f, pid, reducer, R, first(chunks[idx]), last(chunks[idx])))
-        schedule(t)
-        push!(w_exec, t)
-    end
-    reduce(reducer, Any[fetch(t) for t in w_exec])
-end
-
-function pfor(f, R)
-    t = @async @sync for c in splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
-        @spawnat :any f(R, first(c), last(c))
-    end
-    errormonitor(t)
-end
-
-function make_preduce_body(var, body)
-    quote
-        function (reducer, R, lo::Int, hi::Int)
-            $(esc(var)) = R[lo]
-            ac = $(esc(body))
-            if lo != hi
-                for $(esc(var)) in R[(lo+1):hi]
-                    ac = reducer(ac, $(esc(body)))
-                end
-            end
-            ac
-        end
-    end
-end
-
-function make_pfor_body(var, body)
-    quote
-        function (R, lo::Int, hi::Int)
-            for $(esc(var)) in R[lo:hi]
-                $(esc(body))
-            end
-        end
-    end
-end
-
-"""
-    @distributed
-
-A distributed memory, parallel for loop of the form :
-
-    @distributed [reducer] for var = range
-        body
-    end
-
-The specified range is partitioned and locally executed across all workers. In case an
-optional reducer function is specified, `@distributed` performs local reductions on each worker
-with a final reduction on the calling process.
-
-Note that without a reducer function, `@distributed` executes asynchronously, i.e. it spawns
-independent tasks on all available workers and returns immediately without waiting for
-completion. To wait for completion, prefix the call with [`@sync`](@ref), like :
-
-    @sync @distributed for var = range
-        body
-    end
-"""
-macro distributed(args...)
-    na = length(args)
-    if na==1
-        loop = args[1]
-    elseif na==2
-        reducer = args[1]
-        loop = args[2]
-    else
-        throw(ArgumentError("wrong number of arguments to @distributed"))
-    end
-    if !isa(loop,Expr) || loop.head !== :for
-        error("malformed @distributed loop")
-    end
-    var = loop.args[1].args[1]
-    r = loop.args[1].args[2]
-    body = loop.args[2]
-    if Meta.isexpr(body, :block) && body.args[end] isa LineNumberNode
-        resize!(body.args, length(body.args) - 1)
-    end
-    if na==1
-        syncvar = esc(Base.sync_varname)
-        return quote
-            local ref = pfor($(make_pfor_body(var, body)), $(esc(r)))
-            if $(Expr(:islocal, syncvar))
-                put!($syncvar, ref)
-            end
-            ref
-        end
-    else
-        return :(preduce($(esc(reducer)), $(make_preduce_body(var, body)), $(esc(r))))
-    end
-end
diff --git a/stdlib/Distributed/src/managers.jl b/stdlib/Distributed/src/managers.jl
deleted file mode 100644
index 57f58598e85dc..0000000000000
--- a/stdlib/Distributed/src/managers.jl
+++ /dev/null
@@ -1,757 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Built-in SSH and Local Managers
-
-struct SSHManager <: ClusterManager
-    machines::Dict
-
-    function SSHManager(machines)
-        # machines => array of machine elements
-        # machine => address or (address, cnt)
-        # address => string of form `[user@]host[:port] bind_addr[:bind_port]`
-        # cnt => :auto or number
-        # :auto launches NUM_CORES number of workers at address
-        # number launches the specified number of workers at address
-        mhist = Dict()
-        for m in machines
-            if isa(m, Tuple)
-                host=m[1]
-                cnt=m[2]
-            else
-                host=m
-                cnt=1
-            end
-            current_cnt = get(mhist, host, 0)
-
-            if isa(cnt, Number)
-                mhist[host] = isa(current_cnt, Number) ? current_cnt + Int(cnt) : Int(cnt)
-            else
-                mhist[host] = cnt
-            end
-        end
-        new(mhist)
-    end
-end
-
-
-function check_addprocs_args(manager, kwargs)
-    valid_kw_names = keys(default_addprocs_params(manager))
-    for keyname in keys(kwargs)
-        !(keyname in valid_kw_names) && throw(ArgumentError("Invalid keyword argument $(keyname)"))
-    end
-end
-
-# SSHManager
-
-# start and connect to processes via SSH, optionally through an SSH tunnel.
-# the tunnel is only used from the head (process 1); the nodes are assumed
-# to be mutually reachable without a tunnel, as is often the case in a cluster.
-# Default value of kw arg max_parallel is the default value of MaxStartups in sshd_config
-# A machine is either a <hostname> or a tuple of (<hostname>, count)
-"""
-    addprocs(machines; tunnel=false, sshflags=\`\`, max_parallel=10, kwargs...) -> List of process identifiers
-
-Add worker processes on remote machines via SSH. Configuration is done with keyword
-arguments (see below). In particular, the `exename` keyword can be used to specify
-the path to the `julia` binary on the remote machine(s).
-
-`machines` is a vector of "machine specifications" which are given as strings of
-the form `[user@]host[:port] [bind_addr[:port]]`. `user` defaults to current user and `port`
-to the standard SSH port. If `[bind_addr[:port]]` is specified, other workers will connect
-to this worker at the specified `bind_addr` and `port`.
-
-It is possible to launch multiple processes on a remote host by using a tuple in the
-`machines` vector or the form `(machine_spec, count)`, where `count` is the number of
-workers to be launched on the specified host. Passing `:auto` as the worker count will
-launch as many workers as the number of CPU threads on the remote host.
-
-**Examples**:
-```julia
-addprocs([
-    "remote1",               # one worker on 'remote1' logging in with the current username
-    "user@remote2",          # one worker on 'remote2' logging in with the 'user' username
-    "user@remote3:2222",     # specifying SSH port to '2222' for 'remote3'
-    ("user@remote4", 4),     # launch 4 workers on 'remote4'
-    ("user@remote5", :auto), # launch as many workers as CPU threads on 'remote5'
-])
-```
-
-**Keyword arguments**:
-
-* `tunnel`: if `true` then SSH tunneling will be used to connect to the worker from the
-  master process. Default is `false`.
-
-* `multiplex`: if `true` then SSH multiplexing is used for SSH tunneling. Default is `false`.
-
-* `ssh`: the name or path of the SSH client executable used to start the workers.
-  Default is `"ssh"`.
-
-* `sshflags`: specifies additional ssh options, e.g. ``` sshflags=\`-i /home/foo/bar.pem\` ```
-
-* `max_parallel`: specifies the maximum number of workers connected to in parallel at a
-  host. Defaults to 10.
-
-* `shell`: specifies the type of shell to which ssh connects on the workers.
-
-    + `shell=:posix`: a POSIX-compatible Unix/Linux shell
-      (sh, ksh, bash, dash, zsh, etc.). The default.
-
-    + `shell=:csh`: a Unix C shell (csh, tcsh).
-
-    + `shell=:wincmd`: Microsoft Windows `cmd.exe`.
-
-* `dir`: specifies the working directory on the workers. Defaults to the host's current
-  directory (as found by `pwd()`)
-
-* `enable_threaded_blas`: if `true` then  BLAS will run on multiple threads in added
-  processes. Default is `false`.
-
-* `exename`: name of the `julia` executable. Defaults to `"\$(Sys.BINDIR)/julia"` or
-  `"\$(Sys.BINDIR)/julia-debug"` as the case may be. It is recommended that a common Julia
-  version is used on all remote machines because serialization and code distribution might
-  fail otherwise.
-
-* `exeflags`: additional flags passed to the worker processes.
-
-* `topology`: Specifies how the workers connect to each other. Sending a message between
-  unconnected workers results in an error.
-
-    + `topology=:all_to_all`: All processes are connected to each other. The default.
-
-    + `topology=:master_worker`: Only the driver process, i.e. `pid` 1 connects to the
-      workers. The workers do not connect to each other.
-
-    + `topology=:custom`: The `launch` method of the cluster manager specifies the
-      connection topology via fields `ident` and `connect_idents` in `WorkerConfig`.
-      A worker with a cluster manager identity `ident` will connect to all workers specified
-      in `connect_idents`.
-
-* `lazy`: Applicable only with `topology=:all_to_all`. If `true`, worker-worker connections
-  are setup lazily, i.e. they are setup at the first instance of a remote call between
-  workers. Default is true.
-
-* `env`: provide an array of string pairs such as
-  `env=["JULIA_DEPOT_PATH"=>"/depot"]` to request that environment variables
-  are set on the remote machine. By default only the environment variable
-  `JULIA_WORKER_TIMEOUT` is passed automatically from the local to the remote
-  environment.
-
-* `cmdline_cookie`: pass the authentication cookie via the `--worker` commandline
-   option. The (more secure) default behaviour of passing the cookie via ssh stdio
-   may hang with Windows workers that use older (pre-ConPTY) Julia or Windows versions,
-   in which case `cmdline_cookie=true` offers a work-around.
-
-!!! compat "Julia 1.6"
-    The keyword arguments `ssh`, `shell`, `env` and `cmdline_cookie`
-    were added in Julia 1.6.
-
-Environment variables:
-
-If the master process fails to establish a connection with a newly launched worker within
-60.0 seconds, the worker treats it as a fatal situation and terminates.
-This timeout can be controlled via environment variable `JULIA_WORKER_TIMEOUT`.
-The value of `JULIA_WORKER_TIMEOUT` on the master process specifies the number of seconds a
-newly launched worker waits for connection establishment.
-"""
-function addprocs(machines::AbstractVector; kwargs...)
-    manager = SSHManager(machines)
-    check_addprocs_args(manager, kwargs)
-    addprocs(manager; kwargs...)
-end
-
-default_addprocs_params(::SSHManager) =
-    merge(default_addprocs_params(),
-          Dict{Symbol,Any}(
-              :ssh            => "ssh",
-              :sshflags       => ``,
-              :shell          => :posix,
-              :cmdline_cookie => false,
-              :env            => [],
-              :tunnel         => false,
-              :multiplex      => false,
-              :max_parallel   => 10))
-
-function launch(manager::SSHManager, params::Dict, launched::Array, launch_ntfy::Condition)
-    # Launch one worker on each unique host in parallel. Additional workers are launched later.
-    # Wait for all launches to complete.
-    @sync for (i, (machine, cnt)) in enumerate(manager.machines)
-        let machine=machine, cnt=cnt
-             @async try
-                launch_on_machine(manager, $machine, $cnt, params, launched, launch_ntfy)
-            catch e
-                print(stderr, "exception launching on machine $(machine) : $(e)\n")
-            end
-        end
-    end
-    notify(launch_ntfy)
-end
-
-
-Base.show(io::IO, manager::SSHManager) = print(io, "SSHManager(machines=", manager.machines, ")")
-
-
-function parse_machine(machine::AbstractString)
-    hoststr = ""
-    portnum = nothing
-
-    if machine[begin] == '['  # ipv6 bracket notation (RFC 2732)
-        ipv6_end = findlast(']', machine)
-        if ipv6_end === nothing
-            throw(ArgumentError("invalid machine definition format string: invalid port format \"$machine\""))
-        end
-        hoststr = machine[begin+1 : prevind(machine,ipv6_end)]
-        machine_def = split(machine[ipv6_end : end] , ':')
-    else    # ipv4
-        machine_def = split(machine, ':')
-        hoststr = machine_def[1]
-    end
-
-    if length(machine_def) > 2
-        throw(ArgumentError("invalid machine definition format string: invalid port format \"$machine_def\""))
-    end
-
-    if length(machine_def) == 2
-        portstr = machine_def[2]
-
-        portnum = tryparse(Int, portstr)
-        if portnum === nothing
-            msg = "invalid machine definition format string: invalid port format \"$machine_def\""
-            throw(ArgumentError(msg))
-        end
-
-        if portnum < 1 || portnum > 65535
-            msg = "invalid machine definition format string: invalid port number \"$machine_def\""
-            throw(ArgumentError(msg))
-        end
-    end
-    (hoststr, portnum)
-end
-
-function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, params::Dict, launched::Array, launch_ntfy::Condition)
-    shell = params[:shell]
-    ssh = params[:ssh]
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-    tunnel = params[:tunnel]
-    multiplex = params[:multiplex]
-    cmdline_cookie = params[:cmdline_cookie]
-    env = Dict{String,String}(params[:env])
-
-    # machine could be of the format [user@]host[:port] bind_addr[:bind_port]
-    # machine format string is split on whitespace
-    machine_bind = split(machine)
-    if isempty(machine_bind)
-        throw(ArgumentError("invalid machine definition format string: \"$machine\$"))
-    end
-    if length(machine_bind) > 1
-        exeflags = `--bind-to $(machine_bind[2]) $exeflags`
-    end
-    if cmdline_cookie
-        exeflags = `$exeflags --worker=$(cluster_cookie())`
-    else
-        exeflags = `$exeflags --worker`
-    end
-
-    host, portnum = parse_machine(machine_bind[1])
-    portopt = portnum === nothing ? `` : `-p $portnum`
-    sshflags = `$(params[:sshflags]) $portopt`
-
-    if tunnel
-        # First it checks if ssh multiplexing has been already enabled and the master process is running.
-        # If it's already running, later ssh sessions also use the same ssh multiplexing session even if
-        # `multiplex` is not explicitly specified; otherwise the tunneling session launched later won't
-        # go to background and hang. This is because of OpenSSH implementation.
-        if success(`$ssh $sshflags -O check $host`)
-            multiplex = true
-        elseif multiplex
-            # automatically create an SSH multiplexing session at the next SSH connection
-            controlpath = "~/.ssh/julia-%r@%h:%p"
-            sshflags = `$sshflags -o ControlMaster=auto -o ControlPath=$controlpath -o ControlPersist=no`
-        end
-    end
-
-    # Build up the ssh command
-
-    # pass on some environment variables by default
-    for var in ["JULIA_WORKER_TIMEOUT"]
-        if !haskey(env, var) && haskey(ENV, var)
-            env[var] = ENV[var]
-        end
-    end
-
-    # Julia process with passed in command line flag arguments
-    if shell === :posix
-        # ssh connects to a POSIX shell
-
-        cmds = "exec $(shell_escape_posixly(exename)) $(shell_escape_posixly(exeflags))"
-        # set environment variables
-        for (var, val) in env
-            occursin(r"^[a-zA-Z_][a-zA-Z_0-9]*\z", var) ||
-                throw(ArgumentError("invalid env key $var"))
-            cmds = "export $(var)=$(shell_escape_posixly(val))\n$cmds"
-        end
-        # change working directory
-        cmds = "cd -- $(shell_escape_posixly(dir))\n$cmds"
-
-        # shell login (-l) with string command (-c) to launch julia process
-        remotecmd = shell_escape_posixly(`sh -l -c $cmds`)
-
-    elseif shell === :csh
-        # ssh connects to (t)csh
-
-        remotecmd = "exec $(shell_escape_csh(exename)) $(shell_escape_csh(exeflags))"
-
-        # set environment variables
-        for (var, val) in env
-            occursin(r"^[a-zA-Z_][a-zA-Z_0-9]*\z", var) ||
-                throw(ArgumentError("invalid env key $var"))
-            remotecmd = "setenv $(var) $(shell_escape_csh(val))\n$remotecmd"
-        end
-        # change working directory
-        if dir !== nothing && dir != ""
-            remotecmd = "cd $(shell_escape_csh(dir))\n$remotecmd"
-        end
-
-    elseif shell === :wincmd
-        # ssh connects to Windows cmd.exe
-
-        any(c -> c == '"', exename) && throw(ArgumentError("invalid exename"))
-
-        remotecmd = shell_escape_wincmd(escape_microsoft_c_args(exename, exeflags...))
-        # change working directory
-        if dir !== nothing && dir != ""
-            any(c -> c == '"', dir) && throw(ArgumentError("invalid dir"))
-            remotecmd = "pushd \"$(dir)\" && $remotecmd"
-        end
-        # set environment variables
-        for (var, val) in env
-            occursin(r"^[a-zA-Z0-9_()[\]{}\$\\/#',;\.@!?*+-]+\z", var) || throw(ArgumentError("invalid env key $var"))
-            remotecmd = "set $(var)=$(shell_escape_wincmd(val))&& $remotecmd"
-        end
-
-    else
-        throw(ArgumentError("invalid shell"))
-    end
-
-    # remote launch with ssh with given ssh flags / host / port information
-    # -T → disable pseudo-terminal allocation
-    # -a → disable forwarding of auth agent connection
-    # -x → disable X11 forwarding
-    # -o ClearAllForwardings → option if forwarding connections and
-    #                          forwarded connections are causing collisions
-    cmd = `$ssh -T -a -x -o ClearAllForwardings=yes $sshflags $host $remotecmd`
-
-    # launch the remote Julia process
-
-    # detach launches the command in a new process group, allowing it to outlive
-    # the initial julia process (Ctrl-C and teardown methods are handled through messages)
-    # for the launched processes.
-    io = open(detach(cmd), "r+")
-    cmdline_cookie || write_cookie(io)
-
-    wconfig = WorkerConfig()
-    wconfig.io = io.out
-    wconfig.host = host
-    wconfig.tunnel = tunnel
-    wconfig.multiplex = multiplex
-    wconfig.sshflags = sshflags
-    wconfig.exeflags = exeflags
-    wconfig.exename = exename
-    wconfig.count = cnt
-    wconfig.max_parallel = params[:max_parallel]
-    wconfig.enable_threaded_blas = params[:enable_threaded_blas]
-
-
-    push!(launched, wconfig)
-    notify(launch_ntfy)
-end
-
-
-function manage(manager::SSHManager, id::Integer, config::WorkerConfig, op::Symbol)
-    id = Int(id)
-    if op === :interrupt
-        ospid = config.ospid
-        if ospid !== nothing
-            host = notnothing(config.host)
-            sshflags = notnothing(config.sshflags)
-            if !success(`ssh -T -a -x -o ClearAllForwardings=yes -n $sshflags $host "kill -2 $ospid"`)
-                @error "Error sending a Ctrl-C to julia worker $id on $host"
-            end
-        else
-            # This state can happen immediately after an addprocs
-            @error "Worker $id cannot be presently interrupted."
-        end
-    end
-end
-
-let tunnel_port = 9201
-    global next_tunnel_port
-    function next_tunnel_port()
-        retval = tunnel_port
-        if tunnel_port > 32000
-            tunnel_port = 9201
-        else
-            tunnel_port += 1
-        end
-        retval
-    end
-end
-
-
-"""
-    ssh_tunnel(user, host, bind_addr, port, sshflags, multiplex) -> localport
-
-Establish an SSH tunnel to a remote worker.
-Return a port number `localport` such that `localhost:localport` connects to `host:port`.
-"""
-function ssh_tunnel(user, host, bind_addr, port, sshflags, multiplex)
-    port = Int(port)
-    cnt = ntries = 100
-
-    # the connection is forwarded to `port` on the remote server over the local port `localport`
-    while cnt > 0
-        localport = next_tunnel_port()
-        if multiplex
-            # It assumes that an ssh multiplexing session has been already started by the remote worker.
-            cmd = `ssh $sshflags -O forward -L $localport:$bind_addr:$port $user@$host`
-        else
-            # if we cannot do port forwarding, fail immediately
-            # the -f option backgrounds the ssh session
-            # `sleep 60` command specifies that an allotted time of 60 seconds is allowed to start the
-            # remote julia process and establish the network connections specified by the process topology.
-            # If no connections are made within 60 seconds, ssh will exit and an error will be printed on the
-            # process that launched the remote process.
-            ssh = `ssh -T -a -x -o ExitOnForwardFailure=yes`
-            cmd = detach(`$ssh -f $sshflags $user@$host -L $localport:$bind_addr:$port sleep 60`)
-        end
-        if success(cmd)
-            return localport
-        end
-        cnt -= 1
-    end
-
-    throw(ErrorException(
-        string("unable to create SSH tunnel after ", ntries, " tries. No free port?")))
-end
-
-
-# LocalManager
-struct LocalManager <: ClusterManager
-    np::Int
-    restrict::Bool  # Restrict binding to 127.0.0.1 only
-end
-
-"""
-    addprocs(np::Integer=Sys.CPU_THREADS; restrict=true, kwargs...) -> List of process identifiers
-
-Launch `np` workers on the local host using the in-built `LocalManager`.
-
-Local workers inherit the current package environment (i.e., active project,
-[`LOAD_PATH`](@ref), and [`DEPOT_PATH`](@ref)) from the main process.
-
-**Keyword arguments**:
- - `restrict::Bool`: if `true` (default) binding is restricted to `127.0.0.1`.
- - `dir`, `exename`, `exeflags`, `env`, `topology`, `lazy`, `enable_threaded_blas`: same effect
-   as for `SSHManager`, see documentation for [`addprocs(machines::AbstractVector)`](@ref).
-
-!!! compat "Julia 1.9"
-    The inheriting of the package environment and the `env` keyword argument were
-    added in Julia 1.9.
-"""
-function addprocs(np::Integer=Sys.CPU_THREADS; restrict=true, kwargs...)
-    manager = LocalManager(np, restrict)
-    check_addprocs_args(manager, kwargs)
-    addprocs(manager; kwargs...)
-end
-
-Base.show(io::IO, manager::LocalManager) = print(io, "LocalManager()")
-
-function launch(manager::LocalManager, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-    bind_to = manager.restrict ? `127.0.0.1` : `$(LPROC.bind_addr)`
-    env = Dict{String,String}(params[:env])
-
-    # TODO: Maybe this belongs in base/initdefs.jl as a package_environment() function
-    #       together with load_path() etc. Might be useful to have when spawning julia
-    #       processes outside of Distributed.jl too.
-    # JULIA_(LOAD|DEPOT)_PATH are used to populate (LOAD|DEPOT)_PATH on startup,
-    # but since (LOAD|DEPOT)_PATH might have changed they are re-serialized here.
-    # Users can opt-out of this by passing `env = ...` to addprocs(...).
-    pathsep = Sys.iswindows() ? ";" : ":"
-    if get(env, "JULIA_LOAD_PATH", nothing) === nothing
-        env["JULIA_LOAD_PATH"] = join(LOAD_PATH, pathsep)
-    end
-    if get(env, "JULIA_DEPOT_PATH", nothing) === nothing
-        env["JULIA_DEPOT_PATH"] = join(DEPOT_PATH, pathsep)
-    end
-
-    # If we haven't explicitly asked for threaded BLAS, prevent OpenBLAS from starting
-    # up with multiple threads, thereby sucking up a bunch of wasted memory on Windows.
-    if !params[:enable_threaded_blas] &&
-       get(env, "OPENBLAS_NUM_THREADS", nothing) === nothing
-        env["OPENBLAS_NUM_THREADS"] = "1"
-    end
-    # Set the active project on workers using JULIA_PROJECT.
-    # Users can opt-out of this by (i) passing `env = ...` or (ii) passing
-    # `--project=...` as `exeflags` to addprocs(...).
-    project = Base.ACTIVE_PROJECT[]
-    if project !== nothing && get(env, "JULIA_PROJECT", nothing) === nothing
-        env["JULIA_PROJECT"] = project
-    end
-
-    for i in 1:manager.np
-        cmd = `$(julia_cmd(exename)) $exeflags --bind-to $bind_to --worker`
-        io = open(detach(setenv(addenv(cmd, env), dir=dir)), "r+")
-        write_cookie(io)
-
-        wconfig = WorkerConfig()
-        wconfig.process = io
-        wconfig.io = io.out
-        wconfig.enable_threaded_blas = params[:enable_threaded_blas]
-        push!(launched, wconfig)
-    end
-
-    notify(c)
-end
-
-function manage(manager::LocalManager, id::Integer, config::WorkerConfig, op::Symbol)
-    if op === :interrupt
-        kill(config.process, 2)
-    end
-end
-
-"""
-    launch(manager::ClusterManager, params::Dict, launched::Array, launch_ntfy::Condition)
-
-Implemented by cluster managers. For every Julia worker launched by this function, it should
-append a `WorkerConfig` entry to `launched` and notify `launch_ntfy`. The function MUST exit
-once all workers, requested by `manager` have been launched. `params` is a dictionary of all
-keyword arguments [`addprocs`](@ref) was called with.
-"""
-launch
-
-"""
-    manage(manager::ClusterManager, id::Integer, config::WorkerConfig. op::Symbol)
-
-Implemented by cluster managers. It is called on the master process, during a worker's
-lifetime, with appropriate `op` values:
-
-- with `:register`/`:deregister` when a worker is added / removed from the Julia worker pool.
-- with `:interrupt` when `interrupt(workers)` is called. The `ClusterManager`
-  should signal the appropriate worker with an interrupt signal.
-- with `:finalize` for cleanup purposes.
-"""
-manage
-
-# DefaultClusterManager for the default TCP transport - used by both SSHManager and LocalManager
-
-struct DefaultClusterManager <: ClusterManager
-end
-
-const tunnel_hosts_map = Dict{String, Semaphore}()
-
-"""
-    connect(manager::ClusterManager, pid::Int, config::WorkerConfig) -> (instrm::IO, outstrm::IO)
-
-Implemented by cluster managers using custom transports. It should establish a logical
-connection to worker with id `pid`, specified by `config` and return a pair of `IO`
-objects. Messages from `pid` to current process will be read off `instrm`, while messages to
-be sent to `pid` will be written to `outstrm`. The custom transport implementation must
-ensure that messages are delivered and received completely and in order.
-`connect(manager::ClusterManager.....)` sets up TCP/IP socket connections in-between
-workers.
-"""
-function connect(manager::ClusterManager, pid::Int, config::WorkerConfig)
-    if config.connect_at !== nothing
-        # this is a worker-to-worker setup call.
-        return connect_w2w(pid, config)
-    end
-
-    # master connecting to workers
-    if config.io !== nothing
-        (bind_addr, port::Int) = read_worker_host_port(config.io)
-        pubhost = something(config.host, bind_addr)
-        config.host = pubhost
-        config.port = port
-    else
-        pubhost = notnothing(config.host)
-        port = notnothing(config.port)
-        bind_addr = something(config.bind_addr, pubhost)
-    end
-
-    tunnel = something(config.tunnel, false)
-
-    s = split(pubhost,'@')
-    user = ""
-    if length(s) > 1
-        user = s[1]
-        pubhost = s[2]
-    else
-        if haskey(ENV, "USER")
-            user = ENV["USER"]
-        elseif tunnel
-            error("USER must be specified either in the environment ",
-                  "or as part of the hostname when tunnel option is used")
-        end
-    end
-
-    if tunnel
-        if !haskey(tunnel_hosts_map, pubhost)
-            tunnel_hosts_map[pubhost] = Semaphore(something(config.max_parallel, typemax(Int)))
-        end
-        sem = tunnel_hosts_map[pubhost]
-
-        sshflags = notnothing(config.sshflags)
-        multiplex = something(config.multiplex, false)
-        acquire(sem)
-        try
-            (s, bind_addr, forward) = connect_to_worker_with_tunnel(pubhost, bind_addr, port, user, sshflags, multiplex)
-            config.forward = forward
-        finally
-            release(sem)
-        end
-    else
-        (s, bind_addr) = connect_to_worker(bind_addr, port)
-    end
-
-    config.bind_addr = bind_addr
-
-    # write out a subset of the connect_at required for further worker-worker connection setups
-    config.connect_at = (bind_addr, port)
-
-    if config.io !== nothing
-        let pid = pid
-            redirect_worker_output(pid, notnothing(config.io))
-        end
-    end
-
-    (s, s)
-end
-
-function connect_w2w(pid::Int, config::WorkerConfig)
-    (rhost, rport) = notnothing(config.connect_at)::Tuple{String, Int}
-    config.host = rhost
-    config.port = rport
-    (s, bind_addr) = connect_to_worker(rhost, rport)
-    (s,s)
-end
-
-const client_port = Ref{UInt16}(0)
-
-function socket_reuse_port(iptype)
-    if ccall(:jl_has_so_reuseport, Int32, ()) == 1
-        sock = TCPSocket(delay = false)
-
-        # Some systems (e.g. Linux) require the port to be bound before setting REUSEPORT
-        bind_early = Sys.islinux()
-
-        bind_early && bind_client_port(sock, iptype)
-        rc = ccall(:jl_tcp_reuseport, Int32, (Ptr{Cvoid},), sock.handle)
-        if rc < 0
-            close(sock)
-
-            # This is an issue only on systems with lots of client connections, hence delay the warning
-            nworkers() > 128 && @warn "Error trying to reuse client port number, falling back to regular socket" maxlog=1
-
-            # provide a clean new socket
-            return TCPSocket()
-        end
-        bind_early || bind_client_port(sock, iptype)
-        return sock
-    else
-        return TCPSocket()
-    end
-end
-
-function bind_client_port(sock::TCPSocket, iptype)
-    bind_host = iptype(0)
-    if Sockets.bind(sock, bind_host, client_port[])
-        _addr, port = getsockname(sock)
-        client_port[] = port
-    end
-    return sock
-end
-
-function connect_to_worker(host::AbstractString, port::Integer)
-    # Avoid calling getaddrinfo if possible - involves a DNS lookup
-    # host may be a stringified ipv4 / ipv6 address or a dns name
-    bind_addr = nothing
-    try
-        bind_addr = parse(IPAddr,host)
-    catch
-        bind_addr = getaddrinfo(host)
-    end
-
-    iptype = typeof(bind_addr)
-    sock = socket_reuse_port(iptype)
-    connect(sock, bind_addr, UInt16(port))
-
-    (sock, string(bind_addr))
-end
-
-
-function connect_to_worker_with_tunnel(host::AbstractString, bind_addr::AbstractString, port::Integer, tunnel_user::AbstractString, sshflags, multiplex)
-    localport = ssh_tunnel(tunnel_user, host, bind_addr, UInt16(port), sshflags, multiplex)
-    s = connect("localhost", localport)
-    forward = "$localport:$bind_addr:$port"
-    (s, bind_addr, forward)
-end
-
-
-function cancel_ssh_tunnel(config::WorkerConfig)
-    host = notnothing(config.host)
-    sshflags = notnothing(config.sshflags)
-    tunnel = something(config.tunnel, false)
-    multiplex = something(config.multiplex, false)
-    if tunnel && multiplex
-        forward = notnothing(config.forward)
-        run(`ssh $sshflags -O cancel -L $forward $host`)
-    end
-end
-
-
-"""
-    kill(manager::ClusterManager, pid::Int, config::WorkerConfig)
-
-Implemented by cluster managers.
-It is called on the master process, by [`rmprocs`](@ref).
-It should cause the remote worker specified by `pid` to exit.
-`kill(manager::ClusterManager.....)` executes a remote `exit()`
-on `pid`.
-"""
-function kill(manager::ClusterManager, pid::Int, config::WorkerConfig)
-    remote_do(exit, pid)
-    nothing
-end
-
-function kill(manager::SSHManager, pid::Int, config::WorkerConfig)
-    remote_do(exit, pid)
-    cancel_ssh_tunnel(config)
-    nothing
-end
-
-function kill(manager::LocalManager, pid::Int, config::WorkerConfig; exit_timeout = 15, term_timeout = 15)
-    # First, try sending `exit()` to the remote over the usual control channels
-    remote_do(exit, pid)
-
-    timer_task = @async begin
-        sleep(exit_timeout)
-
-        # Check to see if our child exited, and if not, send an actual kill signal
-        if !process_exited(config.process)
-            @warn("Failed to gracefully kill worker $(pid), sending SIGTERM")
-            kill(config.process, Base.SIGTERM)
-
-            sleep(term_timeout)
-            if !process_exited(config.process)
-                @warn("Worker $(pid) ignored SIGTERM, sending SIGKILL")
-                kill(config.process, Base.SIGKILL)
-            end
-        end
-    end
-    errormonitor(timer_task)
-    return nothing
-end
diff --git a/stdlib/Distributed/src/messages.jl b/stdlib/Distributed/src/messages.jl
deleted file mode 100644
index fe3e5ab90b028..0000000000000
--- a/stdlib/Distributed/src/messages.jl
+++ /dev/null
@@ -1,215 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-abstract type AbstractMsg end
-
-
-## Wire format description
-#
-# Each message has three parts, which are written in order to the worker's stream.
-#  1) A header of type MsgHeader is serialized to the stream (via `serialize`).
-#  2) A message of type AbstractMsg is then serialized.
-#  3) Finally, a fixed boundary of 10 bytes is written.
-
-# Message header stored separately from body to be able to send back errors if
-# a deserialization error occurs when reading the message body.
-struct MsgHeader
-    response_oid::RRID
-    notify_oid::RRID
-    MsgHeader(respond_oid=RRID(0,0), notify_oid=RRID(0,0)) =
-        new(respond_oid, notify_oid)
-end
-
-# Special oid (0,0) uses to indicate a null ID.
-# Used instead of Union{Int, Nothing} to decrease wire size of header.
-null_id(id) =  id == RRID(0, 0)
-
-struct CallMsg{Mode} <: AbstractMsg
-    f::Any
-    args::Tuple
-    kwargs
-end
-struct CallWaitMsg <: AbstractMsg
-    f::Any
-    args::Tuple
-    kwargs
-end
-struct RemoteDoMsg <: AbstractMsg
-    f::Any
-    args::Tuple
-    kwargs
-end
-struct ResultMsg <: AbstractMsg
-    value::Any
-end
-
-
-# Worker initialization messages
-struct IdentifySocketMsg <: AbstractMsg
-    from_pid::Int
-end
-
-struct IdentifySocketAckMsg <: AbstractMsg
-end
-
-struct JoinPGRPMsg <: AbstractMsg
-    self_pid::Int
-    other_workers::Array
-    topology::Symbol
-    enable_threaded_blas::Bool
-    lazy::Bool
-end
-struct JoinCompleteMsg <: AbstractMsg
-    cpu_threads::Int
-    ospid::Int
-end
-
-# Avoiding serializing AbstractMsg containers results in a speedup
-# of approximately 10%. Can be removed once module Serialization
-# has been suitably improved.
-
-const msgtypes = Any[CallWaitMsg, IdentifySocketAckMsg, IdentifySocketMsg,
-                     JoinCompleteMsg, JoinPGRPMsg, RemoteDoMsg, ResultMsg,
-                     CallMsg{:call}, CallMsg{:call_fetch}]
-
-for (idx, tname) in enumerate(msgtypes)
-    exprs = Any[ :(serialize(s, o.$fld)) for fld in fieldnames(tname) ]
-    @eval function serialize_msg(s::AbstractSerializer, o::$tname)
-        write(s.io, UInt8($idx))
-        $(exprs...)
-        return nothing
-    end
-end
-
-let msg_cases = :(@assert false "Message type index ($idx) expected to be between 1:$($(length(msgtypes)))")
-    for i = length(msgtypes):-1:1
-        mti = msgtypes[i]
-        msg_cases = :(if idx == $i
-                          $(Expr(:call, QuoteNode(mti), fill(:(deserialize(s)), fieldcount(mti))...))
-                      else
-                          $msg_cases
-                      end)
-    end
-    @eval function deserialize_msg(s::AbstractSerializer)
-        idx = read(s.io, UInt8)
-        return $msg_cases
-    end
-end
-
-function send_msg_unknown(s::IO, header, msg)
-    error("attempt to send to unknown socket")
-end
-
-function send_msg(s::IO, header, msg)
-    id = worker_id_from_socket(s)
-    if id > -1
-        return send_msg(worker_from_id(id), header, msg)
-    end
-    send_msg_unknown(s, header, msg)
-end
-
-function send_msg_now(s::IO, header, msg::AbstractMsg)
-    id = worker_id_from_socket(s)
-    if id > -1
-        return send_msg_now(worker_from_id(id), header, msg)
-    end
-    send_msg_unknown(s, header, msg)
-end
-function send_msg_now(w::Worker, header, msg)
-    send_msg_(w, header, msg, true)
-end
-
-function send_msg(w::Worker, header, msg)
-    send_msg_(w, header, msg, false)
-end
-
-function flush_gc_msgs(w::Worker)
-    if !isdefined(w, :w_stream)
-        return
-    end
-    add_msgs = nothing
-    del_msgs = nothing
-    @lock w.msg_lock begin
-        if !w.gcflag # No work needed for this worker
-            return
-        end
-        @atomic w.gcflag = false
-        if !isempty(w.add_msgs)
-            add_msgs = w.add_msgs
-            w.add_msgs = Any[]
-        end
-
-        if !isempty(w.del_msgs)
-            del_msgs = w.del_msgs
-            w.del_msgs = Any[]
-        end
-    end
-    if add_msgs !== nothing
-        remote_do(add_clients, w, add_msgs)
-    end
-    if del_msgs !== nothing
-        remote_do(del_clients, w, del_msgs)
-    end
-    return
-end
-
-# Boundary inserted between messages on the wire, used for recovering
-# from deserialization errors. Picked arbitrarily.
-# A size of 10 bytes indicates ~ ~1e24 possible boundaries, so chance of collision
-# with message contents is negligible.
-const MSG_BOUNDARY = UInt8[0x79, 0x8e, 0x8e, 0xf5, 0x6e, 0x9b, 0x2e, 0x97, 0xd5, 0x7d]
-
-# Faster serialization/deserialization of MsgHeader and RRID
-function serialize_hdr_raw(io, hdr)
-    write(io, hdr.response_oid.whence, hdr.response_oid.id, hdr.notify_oid.whence, hdr.notify_oid.id)
-end
-
-function deserialize_hdr_raw(io)
-    data = read!(io, Ref{NTuple{4,Int}}())[]
-    return MsgHeader(RRID(data[1], data[2]), RRID(data[3], data[4]))
-end
-
-function send_msg_(w::Worker, header, msg, now::Bool)
-    check_worker_state(w)
-    if myid() != 1 && !isa(msg, IdentifySocketMsg) && !isa(msg, IdentifySocketAckMsg)
-        wait(w.initialized)
-    end
-    io = w.w_stream
-    lock(io)
-    try
-        reset_state(w.w_serializer)
-        serialize_hdr_raw(io, header)
-        invokelatest(serialize_msg, w.w_serializer, msg)  # io is wrapped in w_serializer
-        write(io, MSG_BOUNDARY)
-
-        if !now && w.gcflag
-            flush_gc_msgs(w)
-        else
-            flush(io)
-        end
-    finally
-        unlock(io)
-    end
-end
-
-function flush_gc_msgs()
-    try
-        for w in (PGRP::ProcessGroup).workers
-            if isa(w,Worker) && (w.state == W_CONNECTED) && w.gcflag
-                flush_gc_msgs(w)
-            end
-        end
-    catch e
-        bt = catch_backtrace()
-        @async showerror(stderr, e, bt)
-    end
-end
-
-function send_connection_hdr(w::Worker, cookie=true)
-    # For a connection initiated from the remote side to us, we only send the version,
-    # else when we initiate a connection we first send the cookie followed by our version.
-    # The remote side validates the cookie.
-    if cookie
-        write(w.w_stream, LPROC.cookie)
-    end
-    write(w.w_stream, rpad(VERSION_STRING, HDR_VERSION_LEN)[1:HDR_VERSION_LEN])
-end
diff --git a/stdlib/Distributed/src/pmap.jl b/stdlib/Distributed/src/pmap.jl
deleted file mode 100644
index f884d47fff98e..0000000000000
--- a/stdlib/Distributed/src/pmap.jl
+++ /dev/null
@@ -1,300 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-struct BatchProcessingError <: Exception
-    data
-    ex
-end
-
-"""
-    pgenerate([::AbstractWorkerPool], f, c...) -> iterator
-
-Apply `f` to each element of `c` in parallel using available workers and tasks.
-
-For multiple collection arguments, apply `f` elementwise.
-
-Results are returned in order as they become available.
-
-Note that `f` must be made available to all worker processes; see
-[Code Availability and Loading Packages](@ref code-availability)
-for details.
-"""
-function pgenerate(p::AbstractWorkerPool, f, c)
-    if length(p) == 0
-        return AsyncGenerator(f, c; ntasks=()->nworkers(p))
-    end
-    batches = batchsplit(c, min_batch_count = length(p) * 3)
-    return Iterators.flatten(AsyncGenerator(remote(p, b -> asyncmap(f, b)), batches))
-end
-pgenerate(p::AbstractWorkerPool, f, c1, c...) = pgenerate(p, a->f(a...), zip(c1, c...))
-pgenerate(f, c) = pgenerate(default_worker_pool(), f, c)
-pgenerate(f, c1, c...) = pgenerate(a->f(a...), zip(c1, c...))
-
-"""
-    pmap(f, [::AbstractWorkerPool], c...; distributed=true, batch_size=1, on_error=nothing, retry_delays=[], retry_check=nothing) -> collection
-
-Transform collection `c` by applying `f` to each element using available
-workers and tasks.
-
-For multiple collection arguments, apply `f` elementwise.
-
-Note that `f` must be made available to all worker processes; see
-[Code Availability and Loading Packages](@ref code-availability) for details.
-
-If a worker pool is not specified, all available workers, i.e., the default worker pool
-is used.
-
-By default, `pmap` distributes the computation over all specified workers. To use only the
-local process and distribute over tasks, specify `distributed=false`.
-This is equivalent to using [`asyncmap`](@ref). For example,
-`pmap(f, c; distributed=false)` is equivalent to `asyncmap(f,c; ntasks=()->nworkers())`
-
-`pmap` can also use a mix of processes and tasks via the `batch_size` argument. For batch sizes
-greater than 1, the collection is processed in multiple batches, each of length `batch_size` or less.
-A batch is sent as a single request to a free worker, where a local [`asyncmap`](@ref) processes
-elements from the batch using multiple concurrent tasks.
-
-Any error stops `pmap` from processing the remainder of the collection. To override this behavior
-you can specify an error handling function via argument `on_error` which takes in a single argument, i.e.,
-the exception. The function can stop the processing by rethrowing the error, or, to continue, return any value
-which is then returned inline with the results to the caller.
-
-Consider the following two examples. The first one returns the exception object inline,
-the second a 0 in place of any exception:
-```julia-repl
-julia> pmap(x->iseven(x) ? error("foo") : x, 1:4; on_error=identity)
-4-element Array{Any,1}:
- 1
-  ErrorException("foo")
- 3
-  ErrorException("foo")
-
-julia> pmap(x->iseven(x) ? error("foo") : x, 1:4; on_error=ex->0)
-4-element Array{Int64,1}:
- 1
- 0
- 3
- 0
-```
-
-Errors can also be handled by retrying failed computations. Keyword arguments `retry_delays` and
-`retry_check` are passed through to [`retry`](@ref) as keyword arguments `delays` and `check`
-respectively. If batching is specified, and an entire batch fails, all items in
-the batch are retried.
-
-Note that if both `on_error` and `retry_delays` are specified, the `on_error` hook is called
-before retrying. If `on_error` does not throw (or rethrow) an exception, the element will not
-be retried.
-
-Example: On errors, retry `f` on an element a maximum of 3 times without any delay between retries.
-```julia
-pmap(f, c; retry_delays = zeros(3))
-```
-
-Example: Retry `f` only if the exception is not of type [`InexactError`](@ref), with exponentially increasing
-delays up to 3 times. Return a `NaN` in place for all `InexactError` occurrences.
-```julia
-pmap(f, c; on_error = e->(isa(e, InexactError) ? NaN : rethrow()), retry_delays = ExponentialBackOff(n = 3))
-```
-"""
-function pmap(f, p::AbstractWorkerPool, c; distributed=true, batch_size=1, on_error=nothing,
-                                           retry_delays=[], retry_check=nothing)
-    f_orig = f
-    # Don't do remote calls if there are no workers.
-    if (length(p) == 0) || (length(p) == 1 && fetch(p.channel) == myid())
-        distributed = false
-    end
-
-    # Don't do batching if not doing remote calls.
-    if !distributed
-        batch_size = 1
-    end
-
-    # If not batching, do simple remote call.
-    if batch_size == 1
-        if on_error !== nothing
-            f = wrap_on_error(f, on_error)
-        end
-
-        if distributed
-            f = remote(p, f)
-        end
-
-        if length(retry_delays) > 0
-            f = wrap_retry(f, retry_delays, retry_check)
-        end
-
-        return asyncmap(f, c; ntasks=()->nworkers(p))
-    else
-        # During batch processing, We need to ensure that if on_error is set, it is called
-        # for each element in error, and that we return as many elements as the original list.
-        # retry, if set, has to be called element wise and we will do a best-effort
-        # to ensure that we do not call mapped function on the same element more than length(retry_delays).
-        # This guarantee is not possible in case of worker death / network errors, wherein
-        # we will retry the entire batch on a new worker.
-
-        handle_errors = ((on_error !== nothing) || (length(retry_delays) > 0))
-
-        # Unlike the non-batch case, in batch mode, we trap all errors and the on_error hook (if present)
-        # is processed later in non-batch mode.
-        if handle_errors
-            f = wrap_on_error(f, (x,e)->BatchProcessingError(x,e); capture_data=true)
-        end
-
-        f = wrap_batch(f, p, handle_errors)
-        results = asyncmap(f, c; ntasks=()->nworkers(p), batch_size=batch_size)
-
-        # process errors if any.
-        if handle_errors
-            process_batch_errors!(p, f_orig, results, on_error, retry_delays, retry_check)
-        end
-
-        return results
-    end
-end
-
-pmap(f, p::AbstractWorkerPool, c1, c...; kwargs...) = pmap(a->f(a...), p, zip(c1, c...); kwargs...)
-pmap(f, c; kwargs...) = pmap(f, default_worker_pool(), c; kwargs...)
-pmap(f, c1, c...; kwargs...) = pmap(a->f(a...), zip(c1, c...); kwargs...)
-
-function wrap_on_error(f, on_error; capture_data=false)
-    return x -> begin
-        try
-            f(x)
-        catch e
-            if capture_data
-                on_error(x, e)
-            else
-                on_error(e)
-            end
-        end
-    end
-end
-
-function wrap_retry(f, retry_delays, retry_check)
-    retry(delays=retry_delays, check=retry_check) do x
-        try
-            f(x)
-        catch e
-            rethrow(extract_exception(e))
-        end
-    end
-end
-
-function wrap_batch(f, p, handle_errors)
-    f = asyncmap_batch(f)
-    return batch -> begin
-        try
-            remotecall_fetch(f, p, batch)
-        catch e
-            if handle_errors
-                return Any[BatchProcessingError(b, e) for b in batch]
-            else
-                rethrow()
-            end
-        end
-    end
-end
-
-asyncmap_batch(f) = batch -> asyncmap(x->f(x...), batch)
-extract_exception(e) = isa(e, RemoteException) ? e.captured.ex : e
-
-
-function process_batch_errors!(p, f, results, on_error, retry_delays, retry_check)
-    # Handle all the ones in error in another pmap, with batch size set to 1
-    reprocess = Tuple{Int,BatchProcessingError}[]
-    for (idx, v) in enumerate(results)
-        if isa(v, BatchProcessingError)
-            push!(reprocess, (idx,v))
-        end
-    end
-
-    if length(reprocess) > 0
-        errors = [x[2] for x in reprocess]
-        exceptions = Any[x.ex for x in errors]
-        state = iterate(retry_delays)
-        state !== nothing && (state = state[2])
-        error_processed = let state=state
-            if (length(retry_delays)::Int > 0) &&
-                    (retry_check === nothing || all([retry_check(state,ex)[2] for ex in exceptions]))
-                # BatchProcessingError.data is a tuple of original args
-                pmap(x->f(x...), p, Any[x.data for x in errors];
-                        on_error = on_error, retry_delays = collect(retry_delays)[2:end::Int], retry_check = retry_check)
-            elseif on_error !== nothing
-                map(on_error, exceptions)
-            else
-                throw(CompositeException(exceptions))
-            end
-        end
-
-        for (idx, v) in enumerate(error_processed)
-            results[reprocess[idx][1]] = v
-        end
-    end
-    nothing
-end
-
-"""
-    head_and_tail(c, n) -> head, tail
-
-Return `head`: the first `n` elements of `c`;
-and `tail`: an iterator over the remaining elements.
-
-```jldoctest
-julia> b, c = Distributed.head_and_tail(1:10, 3)
-([1, 2, 3], Base.Iterators.Rest{UnitRange{Int64}, Int64}(1:10, 3))
-
-julia> collect(c)
-7-element Vector{Int64}:
-  4
-  5
-  6
-  7
-  8
-  9
- 10
-```
-"""
-function head_and_tail(c, n)
-    head = Vector{eltype(c)}(undef, n)
-    n == 0 && return (head, c)
-    i = 1
-    y = iterate(c)
-    y === nothing && return (resize!(head, 0), ())
-    head[i] = y[1]
-    while i < n
-        y = iterate(c, y[2])
-        y === nothing && return (resize!(head, i), ())
-        i += 1
-        head[i] = y[1]
-    end
-    return head, Iterators.rest(c, y[2])
-end
-
-"""
-    batchsplit(c; min_batch_count=1, max_batch_size=100) -> iterator
-
-Split a collection into at least `min_batch_count` batches.
-
-Equivalent to `partition(c, max_batch_size)` when `length(c) >> max_batch_size`.
-"""
-function batchsplit(c; min_batch_count=1, max_batch_size=100)
-    if min_batch_count < 1
-        throw(ArgumentError("min_batch_count must be ≥ 1, got $min_batch_count"))
-    end
-
-    if max_batch_size < 1
-        throw(ArgumentError("max_batch_size must be ≥ 1, got $max_batch_size"))
-    end
-
-    # Split collection into batches, then peek at the first few batches
-    batches = Iterators.partition(c, max_batch_size)
-    head, tail = head_and_tail(batches, min_batch_count)
-
-    # If there are not enough batches, use a smaller batch size
-    if length(head) < min_batch_count
-        batch_size = max(1, div(sum(length, head), min_batch_count))
-        return Iterators.partition(collect(Iterators.flatten(head)), batch_size)
-    end
-
-    return Iterators.flatten((head, tail))
-end
diff --git a/stdlib/Distributed/src/precompile.jl b/stdlib/Distributed/src/precompile.jl
deleted file mode 100644
index 87380f627db7a..0000000000000
--- a/stdlib/Distributed/src/precompile.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-precompile(Tuple{typeof(Distributed.remotecall),Function,Int,Module,Vararg{Any, 100}})
-precompile(Tuple{typeof(Distributed.procs)})
-precompile(Tuple{typeof(Distributed.finalize_ref), Distributed.Future})
-# This is disabled because it doesn't give much benefit
-# and the code in Distributed is poorly typed causing many invalidations
-# TODO: Maybe reenable now that Distributed is not in sysimage.
-#=
-    precompile_script *= """
-    using Distributed
-    addprocs(2)
-    pmap(x->iseven(x) ? 1 : 0, 1:4)
-    @distributed (+) for i = 1:100 Int(rand(Bool)) end
-    """
-=#
diff --git a/stdlib/Distributed/src/process_messages.jl b/stdlib/Distributed/src/process_messages.jl
deleted file mode 100644
index 7bbf7cfde943b..0000000000000
--- a/stdlib/Distributed/src/process_messages.jl
+++ /dev/null
@@ -1,386 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# data stored by the owner of a remote reference
-def_rv_channel() = Channel(1)
-mutable struct RemoteValue
-    c::AbstractChannel
-    clientset::BitSet # Set of workerids that have a reference to this channel.
-                      # Keeping ids instead of a count aids in cleaning up upon
-                      # a worker exit.
-
-    waitingfor::Int   # processor we need to hear from to fill this, or 0
-
-    synctake::Union{ReentrantLock, Nothing}  # A lock used to synchronize the
-                      # specific case of a local put! / remote take! on an
-                      # unbuffered store. github issue #29932
-
-    function RemoteValue(c)
-        c_is_buffered = false
-        try
-            c_is_buffered = isbuffered(c)
-        catch
-        end
-
-        if c_is_buffered
-            return new(c, BitSet(), 0, nothing)
-        else
-            return new(c, BitSet(), 0, ReentrantLock())
-        end
-    end
-end
-
-wait(rv::RemoteValue) = wait(rv.c)
-
-# A wrapper type to handle issue #29932 which requires locking / unlocking of
-# RemoteValue.synctake outside of lexical scope.
-struct SyncTake
-    v::Any
-    rv::RemoteValue
-end
-
-## core messages: do, call, fetch, wait, ref, put! ##
-struct RemoteException <: Exception
-    pid::Int
-    captured::CapturedException
-end
-
-"""
-    capture_exception(ex::RemoteException, bt)
-
-Returns `ex::RemoteException` which has already captured a backtrace (via it's [`CapturedException`](@ref) field `captured`).
-"""
-Base.capture_exception(ex::RemoteException, bt) = ex
-
-"""
-    RemoteException(captured)
-
-Exceptions on remote computations are captured and rethrown locally.  A `RemoteException`
-wraps the `pid` of the worker and a captured exception. A `CapturedException` captures the
-remote exception and a serializable form of the call stack when the exception was raised.
-"""
-RemoteException(captured) = RemoteException(myid(), captured)
-function showerror(io::IO, re::RemoteException)
-    (re.pid != myid()) && print(io, "On worker ", re.pid, ":\n")
-    showerror(io, re.captured)
-end
-
-function run_work_thunk(thunk::Function, print_error::Bool)
-    local result
-    try
-        result = thunk()
-    catch err
-        ce = CapturedException(err, catch_backtrace())
-        result = RemoteException(ce)
-        print_error && showerror(stderr, ce)
-    end
-    return result
-end
-function run_work_thunk(rv::RemoteValue, thunk)
-    put!(rv, run_work_thunk(thunk, false))
-    nothing
-end
-
-function schedule_call(rid, thunk)
-    return lock(client_refs) do
-        rv = RemoteValue(def_rv_channel())
-        (PGRP::ProcessGroup).refs[rid] = rv
-        push!(rv.clientset, rid.whence)
-        errormonitor(@async run_work_thunk(rv, thunk))
-        return rv
-    end
-end
-
-
-function deliver_result(sock::IO, msg, oid, value)
-    #print("$(myid()) sending result $oid\n")
-    if msg === :call_fetch || isa(value, RemoteException)
-        val = value
-    else
-        val = :OK
-    end
-    try
-        send_msg_now(sock, MsgHeader(oid), ResultMsg(val))
-    catch e
-        # terminate connection in case of serialization error
-        # otherwise the reading end would hang
-        @error "Fatal error on process $(myid())" exception=e,catch_backtrace()
-        wid = worker_id_from_socket(sock)
-        close(sock)
-        if myid()==1
-            rmprocs(wid)
-        elseif wid == 1
-            exit(1)
-        else
-            remote_do(rmprocs, 1, wid)
-        end
-    end
-end
-
-## message event handlers ##
-function process_messages(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool=true)
-    errormonitor(@async process_tcp_streams(r_stream, w_stream, incoming))
-end
-
-function process_tcp_streams(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool)
-    Sockets.nagle(r_stream, false)
-    Sockets.quickack(r_stream, true)
-    wait_connected(r_stream)
-    if r_stream != w_stream
-        Sockets.nagle(w_stream, false)
-        Sockets.quickack(w_stream, true)
-        wait_connected(w_stream)
-    end
-    message_handler_loop(r_stream, w_stream, incoming)
-end
-
-"""
-    process_messages(r_stream::IO, w_stream::IO, incoming::Bool=true)
-
-Called by cluster managers using custom transports. It should be called when the custom
-transport implementation receives the first message from a remote worker. The custom
-transport must manage a logical connection to the remote worker and provide two
-`IO` objects, one for incoming messages and the other for messages addressed to the
-remote worker.
-If `incoming` is `true`, the remote peer initiated the connection.
-Whichever of the pair initiates the connection sends the cluster cookie and its
-Julia version number to perform the authentication handshake.
-
-See also [`cluster_cookie`](@ref).
-"""
-function process_messages(r_stream::IO, w_stream::IO, incoming::Bool=true)
-    errormonitor(@async message_handler_loop(r_stream, w_stream, incoming))
-end
-
-function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool)
-    wpid=0          # the worker r_stream is connected to.
-    boundary = similar(MSG_BOUNDARY)
-    try
-        version = process_hdr(r_stream, incoming)
-        serializer = ClusterSerializer(r_stream)
-
-        # The first message will associate wpid with r_stream
-        header = deserialize_hdr_raw(r_stream)
-        msg = deserialize_msg(serializer)
-        handle_msg(msg, header, r_stream, w_stream, version)
-        wpid = worker_id_from_socket(r_stream)
-        @assert wpid > 0
-
-        readbytes!(r_stream, boundary, length(MSG_BOUNDARY))
-
-        while true
-            reset_state(serializer)
-            header = deserialize_hdr_raw(r_stream)
-            # println("header: ", header)
-
-            try
-                msg = invokelatest(deserialize_msg, serializer)
-            catch e
-                # Deserialization error; discard bytes in stream until boundary found
-                boundary_idx = 1
-                while true
-                    # This may throw an EOF error if the terminal boundary was not written
-                    # correctly, triggering the higher-scoped catch block below
-                    byte = read(r_stream, UInt8)
-                    if byte == MSG_BOUNDARY[boundary_idx]
-                        boundary_idx += 1
-                        if boundary_idx > length(MSG_BOUNDARY)
-                            break
-                        end
-                    else
-                        boundary_idx = 1
-                    end
-                end
-
-                # remotecalls only rethrow RemoteExceptions. Any other exception is treated as
-                # data to be returned. Wrap this exception in a RemoteException.
-                remote_err = RemoteException(myid(), CapturedException(e, catch_backtrace()))
-                # println("Deserialization error. ", remote_err)
-                if !null_id(header.response_oid)
-                    ref = lookup_ref(header.response_oid)
-                    put!(ref, remote_err)
-                end
-                if !null_id(header.notify_oid)
-                    deliver_result(w_stream, :call_fetch, header.notify_oid, remote_err)
-                end
-                continue
-            end
-            readbytes!(r_stream, boundary, length(MSG_BOUNDARY))
-
-            # println("got msg: ", typeof(msg))
-            handle_msg(msg, header, r_stream, w_stream, version)
-        end
-    catch e
-        # Check again as it may have been set in a message handler but not propagated to the calling block above
-        if wpid < 1
-            wpid = worker_id_from_socket(r_stream)
-        end
-
-        if wpid < 1
-            println(stderr, e, CapturedException(e, catch_backtrace()))
-            println(stderr, "Process($(myid())) - Unknown remote, closing connection.")
-        elseif !(wpid in map_del_wrkr)
-            werr = worker_from_id(wpid)
-            oldstate = werr.state
-            set_worker_state(werr, W_TERMINATED)
-
-            # If unhandleable error occurred talking to pid 1, exit
-            if wpid == 1
-                if isopen(w_stream)
-                    @error "Fatal error on process $(myid())" exception=e,catch_backtrace()
-                end
-                exit(1)
-            end
-
-            # Will treat any exception as death of node and cleanup
-            # since currently we do not have a mechanism for workers to reconnect
-            # to each other on unhandled errors
-            deregister_worker(wpid)
-        end
-
-        close(r_stream)
-        close(w_stream)
-
-        if (myid() == 1) && (wpid > 1)
-            if oldstate != W_TERMINATING
-                println(stderr, "Worker $wpid terminated.")
-                rethrow()
-            end
-        end
-
-        return nothing
-    end
-end
-
-function process_hdr(s, validate_cookie)
-    if validate_cookie
-        cookie = read(s, HDR_COOKIE_LEN)
-        if length(cookie) < HDR_COOKIE_LEN
-            error("Cookie read failed. Connection closed by peer.")
-        end
-
-        self_cookie = cluster_cookie()
-        for i in 1:HDR_COOKIE_LEN
-            if UInt8(self_cookie[i]) != cookie[i]
-                error("Process($(myid())) - Invalid connection credentials sent by remote.")
-            end
-        end
-    end
-
-    # When we have incompatible julia versions trying to connect to each other,
-    # and can be detected, raise an appropriate error.
-    # For now, just return the version.
-    version = read(s, HDR_VERSION_LEN)
-    if length(version) < HDR_VERSION_LEN
-        error("Version read failed. Connection closed by peer.")
-    end
-
-    return VersionNumber(strip(String(version)))
-end
-
-function handle_msg(msg::CallMsg{:call}, header, r_stream, w_stream, version)
-    schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
-end
-function handle_msg(msg::CallMsg{:call_fetch}, header, r_stream, w_stream, version)
-    errormonitor(@async begin
-        v = run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), false)
-        if isa(v, SyncTake)
-            try
-                deliver_result(w_stream, :call_fetch, header.notify_oid, v.v)
-            finally
-                unlock(v.rv.synctake)
-            end
-        else
-            deliver_result(w_stream, :call_fetch, header.notify_oid, v)
-        end
-        nothing
-    end)
-end
-
-function handle_msg(msg::CallWaitMsg, header, r_stream, w_stream, version)
-    errormonitor(@async begin
-        rv = schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
-        deliver_result(w_stream, :call_wait, header.notify_oid, fetch(rv.c))
-        nothing
-    end)
-end
-
-function handle_msg(msg::RemoteDoMsg, header, r_stream, w_stream, version)
-    errormonitor(@async run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), true))
-end
-
-function handle_msg(msg::ResultMsg, header, r_stream, w_stream, version)
-    put!(lookup_ref(header.response_oid), msg.value)
-end
-
-function handle_msg(msg::IdentifySocketMsg, header, r_stream, w_stream, version)
-    # register a new peer worker connection
-    w = Worker(msg.from_pid, r_stream, w_stream, cluster_manager; version=version)
-    send_connection_hdr(w, false)
-    send_msg_now(w, MsgHeader(), IdentifySocketAckMsg())
-    notify(w.initialized)
-end
-
-function handle_msg(msg::IdentifySocketAckMsg, header, r_stream, w_stream, version)
-    w = map_sock_wrkr[r_stream]
-    w.version = version
-end
-
-function handle_msg(msg::JoinPGRPMsg, header, r_stream, w_stream, version)
-    LPROC.id = msg.self_pid
-    controller = Worker(1, r_stream, w_stream, cluster_manager; version=version)
-    notify(controller.initialized)
-    register_worker(LPROC)
-    topology(msg.topology)
-
-    if !msg.enable_threaded_blas
-        Base.disable_library_threading()
-    end
-
-    lazy = msg.lazy
-    PGRP.lazy = lazy
-
-    @sync for (connect_at, rpid) in msg.other_workers
-        wconfig = WorkerConfig()
-        wconfig.connect_at = connect_at
-
-        let rpid=rpid, wconfig=wconfig
-            if lazy
-                # The constructor registers the object with a global registry.
-                Worker(rpid, ()->connect_to_peer(cluster_manager, rpid, wconfig))
-            else
-                @async connect_to_peer(cluster_manager, rpid, wconfig)
-            end
-        end
-    end
-
-    send_connection_hdr(controller, false)
-    send_msg_now(controller, MsgHeader(RRID(0,0), header.notify_oid), JoinCompleteMsg(Sys.CPU_THREADS, getpid()))
-end
-
-function connect_to_peer(manager::ClusterManager, rpid::Int, wconfig::WorkerConfig)
-    try
-        (r_s, w_s) = connect(manager, rpid, wconfig)
-        w = Worker(rpid, r_s, w_s, manager; config=wconfig)
-        process_messages(w.r_stream, w.w_stream, false)
-        send_connection_hdr(w, true)
-        send_msg_now(w, MsgHeader(), IdentifySocketMsg(myid()))
-        notify(w.initialized)
-    catch e
-        @error "Error on $(myid()) while connecting to peer $rpid, exiting" exception=e,catch_backtrace()
-        exit(1)
-    end
-end
-
-function handle_msg(msg::JoinCompleteMsg, header, r_stream, w_stream, version)
-    w = map_sock_wrkr[r_stream]
-    environ = something(w.config.environ, Dict())
-    environ[:cpu_threads] = msg.cpu_threads
-    w.config.environ = environ
-    w.config.ospid = msg.ospid
-    w.version = version
-
-    ntfy_channel = lookup_ref(header.notify_oid)
-    put!(ntfy_channel, w.id)
-
-    push!(default_worker_pool(), w.id)
-end
diff --git a/stdlib/Distributed/src/remotecall.jl b/stdlib/Distributed/src/remotecall.jl
deleted file mode 100644
index 0b1143d855510..0000000000000
--- a/stdlib/Distributed/src/remotecall.jl
+++ /dev/null
@@ -1,800 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-import Base: eltype
-
-abstract type AbstractRemoteRef end
-
-"""
-    client_refs
-
-Tracks whether a particular `AbstractRemoteRef`
-(identified by its RRID) exists on this worker.
-
-The `client_refs` lock is also used to synchronize access to `.refs` and associated `clientset` state.
-"""
-const client_refs = WeakKeyDict{AbstractRemoteRef, Nothing}() # used as a WeakKeySet
-
-"""
-    Future(w::Int, rrid::RRID, v::Union{Some, Nothing}=nothing)
-
-A `Future` is a placeholder for a single computation
-of unknown termination status and time.
-For multiple potential computations, see `RemoteChannel`.
-See `remoteref_id` for identifying an `AbstractRemoteRef`.
-"""
-mutable struct Future <: AbstractRemoteRef
-    where::Int
-    whence::Int
-    id::Int
-    lock::ReentrantLock
-    @atomic v::Union{Some{Any}, Nothing}
-
-    Future(w::Int, rrid::RRID, v::Union{Some, Nothing}=nothing) =
-        (r = new(w,rrid.whence,rrid.id,ReentrantLock(),v); return test_existing_ref(r))
-
-    Future(t::NTuple{4, Any}) = new(t[1],t[2],t[3],ReentrantLock(),t[4])  # Useful for creating dummy, zeroed-out instances
-end
-
-"""
-    RemoteChannel(pid::Integer=myid())
-
-Make a reference to a `Channel{Any}(1)` on process `pid`.
-The default `pid` is the current process.
-
-    RemoteChannel(f::Function, pid::Integer=myid())
-
-Create references to remote channels of a specific size and type. `f` is a function that
-when executed on `pid` must return an implementation of an `AbstractChannel`.
-
-For example, `RemoteChannel(()->Channel{Int}(10), pid)`, will return a reference to a
-channel of type `Int` and size 10 on `pid`.
-
-The default `pid` is the current process.
-"""
-mutable struct RemoteChannel{T<:AbstractChannel} <: AbstractRemoteRef
-    where::Int
-    whence::Int
-    id::Int
-
-    function RemoteChannel{T}(w::Int, rrid::RRID) where T<:AbstractChannel
-        r = new(w, rrid.whence, rrid.id)
-        return test_existing_ref(r)
-    end
-
-    function RemoteChannel{T}(t::Tuple) where T<:AbstractChannel
-        return new(t[1],t[2],t[3])
-    end
-end
-
-function test_existing_ref(r::AbstractRemoteRef)
-    found = getkey(client_refs, r, nothing)
-    if found !== nothing
-        @assert r.where > 0
-        if isa(r, Future)
-            # this is only for copying the reference from Future to RemoteRef (just created)
-            fv_cache = @atomic :acquire found.v
-            rv_cache = @atomic :monotonic r.v
-            if fv_cache === nothing && rv_cache !== nothing
-                # we have recd the value from another source, probably a deserialized ref, send a del_client message
-                send_del_client(r)
-                @lock found.lock begin
-                    @atomicreplace found.v nothing => rv_cache
-                end
-            end
-        end
-        return found::typeof(r)
-    end
-
-    client_refs[r] = nothing
-    finalizer(finalize_ref, r)
-    return r
-end
-
-function finalize_ref(r::AbstractRemoteRef)
-    if r.where > 0 # Handle the case of the finalizer having been called manually
-        if trylock(client_refs.lock) # trylock doesn't call wait which causes yields
-            try
-                delete!(client_refs.ht, r) # direct removal avoiding locks
-                if isa(r, RemoteChannel)
-                    send_del_client_no_lock(r)
-                else
-                    # send_del_client only if the reference has not been set
-                    v_cache = @atomic :monotonic r.v
-                    v_cache === nothing && send_del_client_no_lock(r)
-                    @atomic :monotonic r.v = nothing
-                end
-                r.where = 0
-            finally
-                unlock(client_refs.lock)
-            end
-        else
-            finalizer(finalize_ref, r)
-            return nothing
-        end
-    end
-    nothing
-end
-
-"""
-    Future(pid::Integer=myid())
-
-Create a `Future` on process `pid`.
-The default `pid` is the current process.
-"""
-Future(pid::Integer=myid()) = Future(pid, RRID())
-Future(w::LocalProcess) = Future(w.id)
-Future(w::Worker) = Future(w.id)
-
-RemoteChannel(pid::Integer=myid()) = RemoteChannel{Channel{Any}}(pid, RRID())
-
-function RemoteChannel(f::Function, pid::Integer=myid())
-    remotecall_fetch(pid, f, RRID()) do f, rrid
-        rv=lookup_ref(rrid, f)
-        RemoteChannel{typeof(rv.c)}(myid(), rrid)
-    end
-end
-
-Base.eltype(::Type{RemoteChannel{T}}) where {T} = eltype(T)
-
-hash(r::AbstractRemoteRef, h::UInt) = hash(r.whence, hash(r.id, h))
-==(r::AbstractRemoteRef, s::AbstractRemoteRef) = (r.whence==s.whence && r.id==s.id)
-
-"""
-    remoteref_id(r::AbstractRemoteRef) -> RRID
-
-`Future`s and `RemoteChannel`s are identified by fields:
-
-* `where` - refers to the node where the underlying object/storage
-  referred to by the reference actually exists.
-
-* `whence` - refers to the node the remote reference was created from.
-  Note that this is different from the node where the underlying object
-  referred to actually exists. For example calling `RemoteChannel(2)`
-  from the master process would result in a `where` value of 2 and
-  a `whence` value of 1.
-
-* `id` is unique across all references created from the worker specified by `whence`.
-
-Taken together,  `whence` and `id` uniquely identify a reference across all workers.
-
-`remoteref_id` is a low-level API which returns a `RRID`
-object that wraps `whence` and `id` values of a remote reference.
-"""
-remoteref_id(r::AbstractRemoteRef) = RRID(r.whence, r.id)
-
-"""
-    channel_from_id(id) -> c
-
-A low-level API which returns the backing `AbstractChannel` for an `id` returned by
-[`remoteref_id`](@ref).
-The call is valid only on the node where the backing channel exists.
-"""
-function channel_from_id(id)
-    rv = lock(client_refs) do
-        return get(PGRP.refs, id, false)
-    end
-    if rv === false
-        throw(ErrorException("Local instance of remote reference not found"))
-    end
-    return rv.c
-end
-
-lookup_ref(rrid::RRID, f=def_rv_channel) = lookup_ref(PGRP, rrid, f)
-function lookup_ref(pg, rrid, f)
-    return lock(client_refs) do
-        rv = get(pg.refs, rrid, false)
-        if rv === false
-            # first we've heard of this ref
-            rv = RemoteValue(invokelatest(f))
-            pg.refs[rrid] = rv
-            push!(rv.clientset, rrid.whence)
-        end
-        return rv
-    end::RemoteValue
-end
-
-"""
-    isready(rr::Future)
-
-Determine whether a [`Future`](@ref) has a value stored to it.
-
-If the argument `Future` is owned by a different node, this call will block to wait for the answer.
-It is recommended to wait for `rr` in a separate task instead
-or to use a local [`Channel`](@ref) as a proxy:
-
-```julia
-p = 1
-f = Future(p)
-errormonitor(@async put!(f, remotecall_fetch(long_computation, p)))
-isready(f)  # will not block
-```
-"""
-function isready(rr::Future)
-    v_cache = @atomic rr.v
-    v_cache === nothing || return true
-
-    rid = remoteref_id(rr)
-    return if rr.where == myid()
-        isready(lookup_ref(rid).c)
-    else
-        remotecall_fetch(rid->isready(lookup_ref(rid).c), rr.where, rid)
-    end
-end
-
-"""
-    isready(rr::RemoteChannel, args...)
-
-Determine whether a [`RemoteChannel`](@ref) has a value stored to it.
-Note that this function can cause race conditions, since by the
-time you receive its result it may no longer be true. However,
-it can be safely used on a [`Future`](@ref) since they are assigned only once.
-"""
-function isready(rr::RemoteChannel, args...)
-    rid = remoteref_id(rr)
-    return if rr.where == myid()
-        isready(lookup_ref(rid).c, args...)
-    else
-        remotecall_fetch(rid->isready(lookup_ref(rid).c, args...), rr.where, rid)
-    end
-end
-
-del_client(rr::AbstractRemoteRef) = del_client(remoteref_id(rr), myid())
-
-del_client(id, client) = del_client(PGRP, id, client)
-function del_client(pg, id, client)
-    lock(client_refs) do
-        _del_client(pg, id, client)
-    end
-    nothing
-end
-
-function _del_client(pg, id, client)
-    rv = get(pg.refs, id, false)
-    if rv !== false
-        delete!(rv.clientset, client)
-        if isempty(rv.clientset)
-            delete!(pg.refs, id)
-            #print("$(myid()) collected $id\n")
-        end
-    end
-    nothing
-end
-
-function del_clients(pairs::Vector)
-    for p in pairs
-        del_client(p[1], p[2])
-    end
-end
-
-# The task below is coalescing the `flush_gc_msgs` call
-# across multiple producers, see `send_del_client`,
-# and `send_add_client`.
-# XXX: Is this worth the additional complexity?
-#      `flush_gc_msgs` has to iterate over all connected workers.
-const any_gc_flag = Threads.Condition()
-function start_gc_msgs_task()
-    errormonitor(
-        Threads.@spawn begin
-            while true
-                lock(any_gc_flag) do
-                    # this might miss events
-                    wait(any_gc_flag)
-                end
-                # Use invokelatest() so that custom message transport streams
-                # for workers can be defined in a newer world age than the Task
-                # which runs the loop here.
-                invokelatest(flush_gc_msgs) # handles throws internally
-            end
-        end
-    )
-end
-
-# Function can be called within a finalizer
-function send_del_client(rr)
-    if rr.where == myid()
-        del_client(rr)
-    elseif id_in_procs(rr.where) # process only if a valid worker
-        process_worker(rr)
-    end
-end
-
-function send_del_client_no_lock(rr)
-    # for gc context to avoid yields
-    if rr.where == myid()
-        _del_client(PGRP, remoteref_id(rr), myid())
-    elseif id_in_procs(rr.where) # process only if a valid worker
-        process_worker(rr)
-    end
-end
-
-function publish_del_msg!(w::Worker, msg)
-    lock(w.msg_lock) do
-        push!(w.del_msgs, msg)
-        @atomic w.gcflag = true
-    end
-    lock(any_gc_flag) do
-        notify(any_gc_flag)
-    end
-end
-
-function process_worker(rr)
-    w = worker_from_id(rr.where)::Worker
-    msg = (remoteref_id(rr), myid())
-
-    # Needs to acquire a lock on the del_msg queue
-    T = Threads.@spawn begin
-        publish_del_msg!($w, $msg)
-    end
-    Base.errormonitor(T)
-
-    return
-end
-
-function add_client(id, client)
-    lock(client_refs) do
-        rv = lookup_ref(id)
-        push!(rv.clientset, client)
-    end
-    nothing
-end
-
-function add_clients(pairs::Vector)
-    for p in pairs
-        add_client(p[1], p[2]...)
-    end
-end
-
-function send_add_client(rr::AbstractRemoteRef, i)
-    if rr.where == myid()
-        add_client(remoteref_id(rr), i)
-    elseif (i != rr.where) && id_in_procs(rr.where)
-        # don't need to send add_client if the message is already going
-        # to the processor that owns the remote ref. it will add_client
-        # itself inside deserialize().
-        w = worker_from_id(rr.where)
-        lock(w.msg_lock) do
-            push!(w.add_msgs, (remoteref_id(rr), i))
-            @atomic w.gcflag = true
-        end
-        lock(any_gc_flag) do
-            notify(any_gc_flag)
-        end
-    end
-end
-
-channel_type(rr::RemoteChannel{T}) where {T} = T
-
-function serialize(s::ClusterSerializer, f::Future)
-    v_cache = @atomic f.v
-    if v_cache === nothing
-        p = worker_id_from_socket(s.io)
-        (p !== f.where) && send_add_client(f, p)
-    end
-    invoke(serialize, Tuple{ClusterSerializer, Any}, s, f)
-end
-
-function serialize(s::ClusterSerializer, rr::RemoteChannel)
-    p = worker_id_from_socket(s.io)
-    (p !== rr.where) && send_add_client(rr, p)
-    invoke(serialize, Tuple{ClusterSerializer, Any}, s, rr)
-end
-
-function deserialize(s::ClusterSerializer, t::Type{<:Future})
-    fc = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t) # deserialized copy
-    f2 = Future(fc.where, RRID(fc.whence, fc.id), fc.v) # ctor adds to client_refs table
-
-    # 1) send_add_client() is not executed when the ref is being serialized
-    #    to where it exists, hence do it here.
-    # 2) If we have received a 'fetch'ed Future or if the Future ctor found an
-    #    already 'fetch'ed instance in client_refs (Issue #25847), we should not
-    #    track it in the backing RemoteValue store.
-    f2v_cache = @atomic f2.v
-    if f2.where == myid() && f2v_cache === nothing
-        add_client(remoteref_id(f2), myid())
-    end
-    f2
-end
-
-function deserialize(s::ClusterSerializer, t::Type{<:RemoteChannel})
-    rr = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t)
-    if rr.where == myid()
-        # send_add_client() is not executed when the ref is being
-        # serialized to where it exists
-        add_client(remoteref_id(rr), myid())
-    end
-    # call ctor to make sure this rr gets added to the client_refs table
-    RemoteChannel{channel_type(rr)}(rr.where, RRID(rr.whence, rr.id))
-end
-
-# Future and RemoteChannel are serializable only in a running cluster.
-# Serialize zeroed-out values to non ClusterSerializer objects
-function serialize(s::AbstractSerializer, ::Future)
-    zero_fut = Future((0,0,0,nothing))
-    invoke(serialize, Tuple{AbstractSerializer, Any}, s, zero_fut)
-end
-
-function serialize(s::AbstractSerializer, ::RemoteChannel)
-    zero_rc = RemoteChannel{Channel{Any}}((0,0,0))
-    invoke(serialize, Tuple{AbstractSerializer, Any}, s, zero_rc)
-end
-
-
-# make a thunk to call f on args in a way that simulates what would happen if
-# the function were sent elsewhere
-function local_remotecall_thunk(f, args, kwargs)
-    return ()->invokelatest(f, args...; kwargs...)
-end
-
-function remotecall(f, w::LocalProcess, args...; kwargs...)
-    rr = Future(w)
-    schedule_call(remoteref_id(rr), local_remotecall_thunk(f, args, kwargs))
-    return rr
-end
-
-function remotecall(f, w::Worker, args...; kwargs...)
-    rr = Future(w)
-    send_msg(w, MsgHeader(remoteref_id(rr)), CallMsg{:call}(f, args, kwargs))
-    return rr
-end
-
-"""
-    remotecall(f, id::Integer, args...; kwargs...) -> Future
-
-Call a function `f` asynchronously on the given arguments on the specified process.
-Return a [`Future`](@ref).
-Keyword arguments, if any, are passed through to `f`.
-"""
-remotecall(f, id::Integer, args...; kwargs...) = remotecall(f, worker_from_id(id), args...; kwargs...)
-
-function remotecall_fetch(f, w::LocalProcess, args...; kwargs...)
-    v=run_work_thunk(local_remotecall_thunk(f,args, kwargs), false)
-    return isa(v, RemoteException) ? throw(v) : v
-end
-
-function remotecall_fetch(f, w::Worker, args...; kwargs...)
-    # can be weak, because the program will have no way to refer to the Ref
-    # itself, it only gets the result.
-    oid = RRID()
-    rv = lookup_ref(oid)
-    rv.waitingfor = w.id
-    send_msg(w, MsgHeader(RRID(0,0), oid), CallMsg{:call_fetch}(f, args, kwargs))
-    v = take!(rv)
-    lock(client_refs) do
-        delete!(PGRP.refs, oid)
-    end
-    return isa(v, RemoteException) ? throw(v) : v
-end
-
-"""
-    remotecall_fetch(f, id::Integer, args...; kwargs...)
-
-Perform `fetch(remotecall(...))` in one message.
-Keyword arguments, if any, are passed through to `f`.
-Any remote exceptions are captured in a
-[`RemoteException`](@ref) and thrown.
-
-See also [`fetch`](@ref) and [`remotecall`](@ref).
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> remotecall_fetch(sqrt, 2, 4)
-2.0
-
-julia> remotecall_fetch(sqrt, 2, -4)
-ERROR: On worker 2:
-DomainError with -4.0:
-sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
-...
-```
-"""
-remotecall_fetch(f, id::Integer, args...; kwargs...) =
-    remotecall_fetch(f, worker_from_id(id), args...; kwargs...)
-
-remotecall_wait(f, w::LocalProcess, args...; kwargs...) = wait(remotecall(f, w, args...; kwargs...))
-
-function remotecall_wait(f, w::Worker, args...; kwargs...)
-    prid = RRID()
-    rv = lookup_ref(prid)
-    rv.waitingfor = w.id
-    rr = Future(w)
-    send_msg(w, MsgHeader(remoteref_id(rr), prid), CallWaitMsg(f, args, kwargs))
-    v = fetch(rv.c)
-    lock(client_refs) do
-        delete!(PGRP.refs, prid)
-    end
-    isa(v, RemoteException) && throw(v)
-    return rr
-end
-
-"""
-    remotecall_wait(f, id::Integer, args...; kwargs...)
-
-Perform a faster `wait(remotecall(...))` in one message on the `Worker` specified by worker id `id`.
-Keyword arguments, if any, are passed through to `f`.
-
-See also [`wait`](@ref) and [`remotecall`](@ref).
-"""
-remotecall_wait(f, id::Integer, args...; kwargs...) =
-    remotecall_wait(f, worker_from_id(id), args...; kwargs...)
-
-function remote_do(f, w::LocalProcess, args...; kwargs...)
-    # the LocalProcess version just performs in local memory what a worker
-    # does when it gets a :do message.
-    # same for other messages on LocalProcess.
-    thk = local_remotecall_thunk(f, args, kwargs)
-    schedule(Task(thk))
-    nothing
-end
-
-function remote_do(f, w::Worker, args...; kwargs...)
-    send_msg(w, MsgHeader(), RemoteDoMsg(f, args, kwargs))
-    nothing
-end
-
-
-"""
-    remote_do(f, id::Integer, args...; kwargs...) -> nothing
-
-Executes `f` on worker `id` asynchronously.
-Unlike [`remotecall`](@ref), it does not store the
-result of computation, nor is there a way to wait for its completion.
-
-A successful invocation indicates that the request has been accepted for execution on
-the remote node.
-
-While consecutive `remotecall`s to the same worker are serialized in the order they are
-invoked, the order of executions on the remote worker is undetermined. For example,
-`remote_do(f1, 2); remotecall(f2, 2); remote_do(f3, 2)` will serialize the call
-to `f1`, followed by `f2` and `f3` in that order. However, it is not guaranteed that `f1`
-is executed before `f3` on worker 2.
-
-Any exceptions thrown by `f` are printed to [`stderr`](@ref) on the remote worker.
-
-Keyword arguments, if any, are passed through to `f`.
-"""
-remote_do(f, id::Integer, args...; kwargs...) = remote_do(f, worker_from_id(id), args...; kwargs...)
-
-# have the owner of rr call f on it
-function call_on_owner(f, rr::AbstractRemoteRef, args...)
-    rid = remoteref_id(rr)
-    if rr.where == myid()
-        f(rid, args...)
-    else
-        remotecall_fetch(f, rr.where, rid, args...)
-    end
-end
-
-function wait_ref(rid, caller, args...)
-    v = fetch_ref(rid, args...)
-    if isa(v, RemoteException)
-        if myid() == caller
-            throw(v)
-        else
-            return v
-        end
-    end
-    nothing
-end
-
-"""
-    wait(r::Future)
-
-Wait for a value to become available for the specified [`Future`](@ref).
-"""
-wait(r::Future) = (v_cache = @atomic r.v; v_cache !== nothing && return r; call_on_owner(wait_ref, r, myid()); r)
-
-"""
-    wait(r::RemoteChannel, args...)
-
-Wait for a value to become available on the specified [`RemoteChannel`](@ref).
-"""
-wait(r::RemoteChannel, args...) = (call_on_owner(wait_ref, r, myid(), args...); r)
-
-"""
-    fetch(x::Future)
-
-Wait for and get the value of a [`Future`](@ref). The fetched value is cached locally.
-Further calls to `fetch` on the same reference return the cached value. If the remote value
-is an exception, throws a [`RemoteException`](@ref) which captures the remote exception and backtrace.
-"""
-function fetch(r::Future)
-    v_cache = @atomic r.v
-    v_cache !== nothing && return something(v_cache)
-
-    if r.where == myid()
-        rv, v_cache = @lock r.lock begin
-            v_cache = @atomic :monotonic r.v
-            rv = v_cache === nothing ? lookup_ref(remoteref_id(r)) : nothing
-            rv, v_cache
-        end
-
-        if v_cache !== nothing
-            return something(v_cache)
-        else
-            v_local = fetch(rv.c)
-        end
-    else
-        v_local = call_on_owner(fetch_ref, r)
-    end
-
-    v_cache = @atomic r.v
-
-    if v_cache === nothing # call_on_owner case
-        v_old, status = @lock r.lock begin
-            @atomicreplace r.v nothing => Some(v_local)
-        end
-        # status == true - when value obtained through call_on_owner
-        # status == false - any other situation: atomicreplace fails, because by the time the lock is obtained cache will be populated
-        # why? local put! performs caching and putting into channel under r.lock
-
-        # for local put! use the cached value, for call_on_owner cases just take the v_local as it was just cached in r.v
-
-        # remote calls getting the value from `call_on_owner` used to return the value directly without wrapping it in `Some(x)`
-        # so we're doing the same thing here
-        if status
-            send_del_client(r)
-            return v_local
-        else # this `v_cache` is returned at the end of the function
-            v_cache = v_old
-        end
-    end
-
-    send_del_client(r)
-    something(v_cache)
-end
-
-fetch_ref(rid, args...) = fetch(lookup_ref(rid).c, args...)
-
-"""
-    fetch(c::RemoteChannel)
-
-Wait for and get a value from a [`RemoteChannel`](@ref). Exceptions raised are the
-same as for a [`Future`](@ref). Does not remove the item fetched.
-"""
-fetch(r::RemoteChannel, args...) = call_on_owner(fetch_ref, r, args...)::eltype(r)
-
-isready(rv::RemoteValue, args...) = isready(rv.c, args...)
-
-"""
-    put!(rr::Future, v)
-
-Store a value to a [`Future`](@ref) `rr`.
-`Future`s are write-once remote references.
-A `put!` on an already set `Future` throws an `Exception`.
-All asynchronous remote calls return `Future`s and set the
-value to the return value of the call upon completion.
-"""
-function put!(r::Future, v)
-    if r.where == myid()
-        rid = remoteref_id(r)
-        rv = lookup_ref(rid)
-        isready(rv) && error("Future can be set only once")
-        @lock r.lock begin
-            put!(rv, v) # this notifies the tasks waiting on the channel in fetch
-            set_future_cache(r, v) # set the cache before leaving the lock, so that the notified tasks already see it cached
-        end
-        del_client(rid, myid())
-    else
-        @lock r.lock begin # same idea as above if there were any local tasks fetching on this Future
-            call_on_owner(put_future, r, v, myid())
-            set_future_cache(r, v)
-        end
-    end
-    r
-end
-
-function set_future_cache(r::Future, v)
-    _, ok = @atomicreplace r.v nothing => Some(v)
-    ok || error("internal consistency error detected for Future")
-end
-
-function put_future(rid, v, caller)
-    rv = lookup_ref(rid)
-    isready(rv) && error("Future can be set only once")
-    put!(rv, v)
-    # The caller has the value and hence can be removed from the remote store.
-    del_client(rid, caller)
-    nothing
-end
-
-
-put!(rv::RemoteValue, args...) = put!(rv.c, args...)
-function put_ref(rid, caller, args...)
-    rv = lookup_ref(rid)
-    put!(rv, args...)
-    if myid() == caller && rv.synctake !== nothing
-        # Wait till a "taken" value is serialized out - github issue #29932
-        lock(rv.synctake)
-        unlock(rv.synctake)
-    end
-    nothing
-end
-
-"""
-    put!(rr::RemoteChannel, args...)
-
-Store a set of values to the [`RemoteChannel`](@ref).
-If the channel is full, blocks until space is available.
-Return the first argument.
-"""
-put!(rr::RemoteChannel, args...) = (call_on_owner(put_ref, rr, myid(), args...); rr)
-
-# take! is not supported on Future
-
-take!(rv::RemoteValue, args...) = take!(rv.c, args...)
-function take_ref(rid, caller, args...)
-    rv = lookup_ref(rid)
-    synctake = false
-    if myid() != caller && rv.synctake !== nothing
-        # special handling for local put! / remote take! on unbuffered channel
-        # github issue #29932
-        synctake = true
-        lock(rv.synctake)
-    end
-
-    v = try
-        take!(rv, args...)
-    catch e
-        # avoid unmatched unlock when exception occurs
-        # github issue #33972
-        synctake && unlock(rv.synctake)
-        rethrow(e)
-    end
-
-    isa(v, RemoteException) && (myid() == caller) && throw(v)
-
-    if synctake
-        return SyncTake(v, rv)
-    else
-        return v
-    end
-end
-
-"""
-    take!(rr::RemoteChannel, args...)
-
-Fetch value(s) from a [`RemoteChannel`](@ref) `rr`,
-removing the value(s) in the process.
-"""
-take!(rr::RemoteChannel, args...) = call_on_owner(take_ref, rr, myid(), args...)::eltype(rr)
-
-# close and isopen are not supported on Future
-
-close_ref(rid) = (close(lookup_ref(rid).c); nothing)
-close(rr::RemoteChannel) = call_on_owner(close_ref, rr)
-
-isopen_ref(rid) = isopen(lookup_ref(rid).c)
-isopen(rr::RemoteChannel) = call_on_owner(isopen_ref, rr)
-
-getindex(r::RemoteChannel) = fetch(r)
-getindex(r::Future) = fetch(r)
-
-getindex(r::Future, args...) = getindex(fetch(r), args...)
-function getindex(r::RemoteChannel, args...)
-    if r.where == myid()
-        return getindex(fetch(r), args...)
-    end
-    return remotecall_fetch(getindex, r.where, r, args...)
-end
-
-function iterate(c::RemoteChannel, state=nothing)
-    if isopen(c) || isready(c)
-        try
-            return (take!(c), nothing)
-        catch e
-            if isa(e, InvalidStateException) ||
-                (isa(e, RemoteException) &&
-                isa(e.captured.ex, InvalidStateException) &&
-                e.captured.ex.state === :closed)
-                return nothing
-            end
-            rethrow()
-        end
-    else
-        return nothing
-    end
-end
-
-IteratorSize(::Type{<:RemoteChannel}) = SizeUnknown()
diff --git a/stdlib/Distributed/src/workerpool.jl b/stdlib/Distributed/src/workerpool.jl
deleted file mode 100644
index 5dd1c07044e09..0000000000000
--- a/stdlib/Distributed/src/workerpool.jl
+++ /dev/null
@@ -1,370 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    AbstractWorkerPool
-
-Supertype for worker pools such as [`WorkerPool`](@ref) and [`CachingPool`](@ref).
-An `AbstractWorkerPool` should implement:
-  - [`push!`](@ref) - add a new worker to the overall pool (available + busy)
-  - [`put!`](@ref) - put back a worker to the available pool
-  - [`take!`](@ref) - take a worker from the available pool (to be used for remote function execution)
-  - [`length`](@ref) - number of workers available in the overall pool
-  - [`isready`](@ref) - return false if a `take!` on the pool would block, else true
-
-The default implementations of the above (on a `AbstractWorkerPool`) require fields
-  - `channel::Channel{Int}`
-  - `workers::Set{Int}`
-where `channel` contains free worker pids and `workers` is the set of all workers associated with this pool.
-"""
-abstract type AbstractWorkerPool end
-
-mutable struct WorkerPool <: AbstractWorkerPool
-    channel::Channel{Int}
-    workers::Set{Int}
-    ref::RemoteChannel
-
-    WorkerPool(c::Channel, ref::RemoteChannel) = new(c, Set{Int}(), ref)
-end
-
-function WorkerPool()
-    wp = WorkerPool(Channel{Int}(typemax(Int)), RemoteChannel())
-    put!(wp.ref, WeakRef(wp))
-    wp
-end
-
-"""
-    WorkerPool(workers::Union{Vector{Int},AbstractRange{Int}})
-
-Create a `WorkerPool` from a vector or range of worker ids.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> WorkerPool([2, 3])
-WorkerPool(Channel{Int64}(sz_max:9223372036854775807,sz_curr:2), Set([2, 3]), RemoteChannel{Channel{Any}}(1, 1, 6))
-
-julia> WorkerPool(2:4)
-WorkerPool(Channel{Int64}(sz_max:9223372036854775807,sz_curr:2), Set([4, 2, 3]), RemoteChannel{Channel{Any}}(1, 1, 7))
-```
-"""
-function WorkerPool(workers::Union{Vector{Int},AbstractRange{Int}})
-    pool = WorkerPool()
-    foreach(w->push!(pool, w), workers)
-    return pool
-end
-
-# On workers where this pool has been serialized to, instantiate with a dummy local channel.
-WorkerPool(ref::RemoteChannel) = WorkerPool(Channel{Int}(1), ref)
-
-function serialize(S::AbstractSerializer, pool::WorkerPool)
-    # Allow accessing a worker pool from other processors. When serialized,
-    # initialize the `ref` to point to self and only send the ref.
-    # Other workers will forward all put!, take!, calls to the process owning
-    # the ref (and hence the pool).
-    Serialization.serialize_type(S, typeof(pool))
-    serialize(S, pool.ref)
-end
-
-deserialize(S::AbstractSerializer, t::Type{T}) where {T<:WorkerPool} = T(deserialize(S))
-
-wp_local_push!(pool::AbstractWorkerPool, w::Int) = (push!(pool.workers, w); put!(pool.channel, w); pool)
-wp_local_length(pool::AbstractWorkerPool) = length(pool.workers)
-wp_local_isready(pool::AbstractWorkerPool) = isready(pool.channel)
-
-function wp_local_put!(pool::AbstractWorkerPool, w::Int)
-    # In case of default_worker_pool, the master is implicitly considered a worker, i.e.,
-    # it is not present in pool.workers.
-    # Confirm the that the worker is part of a pool before making it available.
-    w in pool.workers && put!(pool.channel, w)
-    w
-end
-
-function wp_local_workers(pool::AbstractWorkerPool)
-    if length(pool) == 0 && pool === default_worker_pool()
-        return [1]
-    else
-        return collect(pool.workers)
-    end
-end
-
-function wp_local_nworkers(pool::AbstractWorkerPool)
-    if length(pool) == 0 && pool === default_worker_pool()
-        return 1
-    else
-        return length(pool.workers)
-    end
-end
-
-function wp_local_take!(pool::AbstractWorkerPool)
-    # Find an active worker
-    worker = 0
-    while true
-        if length(pool) == 0
-            if pool === default_worker_pool()
-                # No workers, the master process is used as a worker
-                worker = 1
-                break
-            else
-                throw(ErrorException("No active worker available in pool"))
-            end
-        end
-
-        worker = take!(pool.channel)
-        if id_in_procs(worker)
-            break
-        else
-            delete!(pool.workers, worker) # Remove invalid worker from pool
-        end
-    end
-    return worker
-end
-
-function remotecall_pool(rc_f, f, pool::AbstractWorkerPool, args...; kwargs...)
-    worker = take!(pool)
-    try
-        rc_f(f, worker, args...; kwargs...)
-    finally
-        put!(pool, worker)
-    end
-end
-
-# Check if pool is local or remote and forward calls if required.
-# NOTE: remotecall_fetch does it automatically, but this will be more efficient as
-# it avoids the overhead associated with a local remotecall.
-
-for (func, rt) = ((:length, Int), (:isready, Bool), (:workers, Vector{Int}), (:nworkers, Int), (:take!, Int))
-    func_local = Symbol(string("wp_local_", func))
-    @eval begin
-        function ($func)(pool::WorkerPool)
-            if pool.ref.where != myid()
-                return remotecall_fetch(ref->($func_local)(fetch(ref).value), pool.ref.where, pool.ref)::$rt
-            else
-                return ($func_local)(pool)
-            end
-        end
-
-        # default impl
-        ($func)(pool::AbstractWorkerPool) = ($func_local)(pool)
-    end
-end
-
-for func = (:push!, :put!)
-    func_local = Symbol(string("wp_local_", func))
-    @eval begin
-        function ($func)(pool::WorkerPool, w::Int)
-            if pool.ref.where != myid()
-                return remotecall_fetch((ref, w)->($func_local)(fetch(ref).value, w), pool.ref.where, pool.ref, w)
-            else
-                return ($func_local)(pool, w)
-            end
-        end
-
-        # default impl
-        ($func)(pool::AbstractWorkerPool, w::Int) = ($func_local)(pool, w)
-    end
-end
-
-
-"""
-    remotecall(f, pool::AbstractWorkerPool, args...; kwargs...) -> Future
-
-[`WorkerPool`](@ref) variant of `remotecall(f, pid, ....)`. Wait for and take a free worker from `pool` and perform a `remotecall` on it.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> wp = WorkerPool([2, 3]);
-
-julia> A = rand(3000);
-
-julia> f = remotecall(maximum, wp, A)
-Future(2, 1, 6, nothing)
-```
-In this example, the task ran on pid 2, called from pid 1.
-"""
-remotecall(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall, f, pool, args...; kwargs...)
-
-
-"""
-    remotecall_wait(f, pool::AbstractWorkerPool, args...; kwargs...) -> Future
-
-[`WorkerPool`](@ref) variant of `remotecall_wait(f, pid, ....)`. Wait for and take a free worker from `pool` and
-perform a `remotecall_wait` on it.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> wp = WorkerPool([2, 3]);
-
-julia> A = rand(3000);
-
-julia> f = remotecall_wait(maximum, wp, A)
-Future(3, 1, 9, nothing)
-
-julia> fetch(f)
-0.9995177101692958
-```
-"""
-remotecall_wait(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall_wait, f, pool, args...; kwargs...)
-
-
-"""
-    remotecall_fetch(f, pool::AbstractWorkerPool, args...; kwargs...) -> result
-
-[`WorkerPool`](@ref) variant of `remotecall_fetch(f, pid, ....)`. Waits for and takes a free worker from `pool` and
-performs a `remotecall_fetch` on it.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> wp = WorkerPool([2, 3]);
-
-julia> A = rand(3000);
-
-julia> remotecall_fetch(maximum, wp, A)
-0.9995177101692958
-```
-"""
-remotecall_fetch(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall_fetch, f, pool, args...; kwargs...)
-
-"""
-    remote_do(f, pool::AbstractWorkerPool, args...; kwargs...) -> nothing
-
-[`WorkerPool`](@ref) variant of `remote_do(f, pid, ....)`. Wait for and take a free worker from `pool` and
-perform a `remote_do` on it.
-"""
-remote_do(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remote_do, f, pool, args...; kwargs...)
-
-const _default_worker_pool = Ref{Union{AbstractWorkerPool, Nothing}}(nothing)
-
-"""
-    default_worker_pool()
-
-[`AbstractWorkerPool`](@ref) containing idle [`workers`](@ref) - used by `remote(f)` and [`pmap`](@ref)
-(by default). Unless one is explicitly set via `default_worker_pool!(pool)`, the default worker pool is
-initialized to a [`WorkerPool`](@ref).
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> default_worker_pool()
-WorkerPool(Channel{Int64}(sz_max:9223372036854775807,sz_curr:3), Set([4, 2, 3]), RemoteChannel{Channel{Any}}(1, 1, 4))
-```
-"""
-function default_worker_pool()
-    # On workers retrieve the default worker pool from the master when accessed
-    # for the first time
-    if _default_worker_pool[] === nothing
-        if myid() == 1
-            _default_worker_pool[] = WorkerPool()
-        else
-            _default_worker_pool[] = remotecall_fetch(()->default_worker_pool(), 1)
-        end
-    end
-    return _default_worker_pool[]
-end
-
-"""
-    default_worker_pool!(pool::AbstractWorkerPool)
-
-Set a [`AbstractWorkerPool`](@ref) to be used by `remote(f)` and [`pmap`](@ref) (by default).
-"""
-function default_worker_pool!(pool::AbstractWorkerPool)
-    _default_worker_pool[] = pool
-end
-
-"""
-    remote([p::AbstractWorkerPool], f) -> Function
-
-Return an anonymous function that executes function `f` on an available worker
-(drawn from [`WorkerPool`](@ref) `p` if provided) using [`remotecall_fetch`](@ref).
-"""
-remote(f) = (args...; kwargs...)->remotecall_fetch(f, default_worker_pool(), args...; kwargs...)
-remote(p::AbstractWorkerPool, f) = (args...; kwargs...)->remotecall_fetch(f, p, args...; kwargs...)
-
-mutable struct CachingPool <: AbstractWorkerPool
-    channel::Channel{Int}
-    workers::Set{Int}
-
-    # Mapping between a tuple (worker_id, f) and a RemoteChannel
-    map_obj2ref::IdDict{Tuple{Int, Function}, RemoteChannel}
-
-    function CachingPool()
-        wp = new(Channel{Int}(typemax(Int)), Set{Int}(), IdDict{Tuple{Int, Function}, RemoteChannel}())
-        finalizer(clear!, wp)
-        wp
-    end
-end
-
-serialize(s::AbstractSerializer, cp::CachingPool) = throw(ErrorException("CachingPool objects are not serializable."))
-
-"""
-    CachingPool(workers::Vector{Int})
-
-An implementation of an `AbstractWorkerPool`.
-[`remote`](@ref), [`remotecall_fetch`](@ref),
-[`pmap`](@ref) (and other remote calls which execute functions remotely)
-benefit from caching the serialized/deserialized functions on the worker nodes,
-especially closures (which may capture large amounts of data).
-
-The remote cache is maintained for the lifetime of the returned `CachingPool` object.
-To clear the cache earlier, use `clear!(pool)`.
-
-For global variables, only the bindings are captured in a closure, not the data.
-`let` blocks can be used to capture global data.
-
-# Examples
-```julia
-const foo = rand(10^8);
-wp = CachingPool(workers())
-let foo = foo
-    pmap(i -> sum(foo) + i, wp, 1:100);
-end
-```
-
-The above would transfer `foo` only once to each worker.
-
-"""
-function CachingPool(workers::Vector{Int})
-    pool = CachingPool()
-    for w in workers
-        push!(pool, w)
-    end
-    return pool
-end
-
-"""
-    clear!(pool::CachingPool) -> pool
-
-Removes all cached functions from all participating workers.
-"""
-function clear!(pool::CachingPool)
-    for (_,rr) in pool.map_obj2ref
-        finalize(rr)
-    end
-    empty!(pool.map_obj2ref)
-    pool
-end
-
-exec_from_cache(rr::RemoteChannel, args...; kwargs...) = fetch(rr)(args...; kwargs...)
-function exec_from_cache(f_ref::Tuple{Function, RemoteChannel}, args...; kwargs...)
-    put!(f_ref[2], f_ref[1])        # Cache locally
-    f_ref[1](args...; kwargs...)
-end
-
-function remotecall_pool(rc_f, f, pool::CachingPool, args...; kwargs...)
-    worker = take!(pool)
-    f_ref = get(pool.map_obj2ref, (worker, f), (f, RemoteChannel(worker)))
-    isa(f_ref, Tuple) && (pool.map_obj2ref[(worker, f)] = f_ref[2])   # Add to tracker
-
-    try
-        rc_f(exec_from_cache, worker, f_ref, args...; kwargs...)
-    finally
-        put!(pool, worker)
-    end
-end
diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl
deleted file mode 100644
index 43e02c92b5a81..0000000000000
--- a/stdlib/Distributed/test/distributed_exec.jl
+++ /dev/null
@@ -1,1921 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, Distributed, Random, Serialization, Sockets
-import Distributed: launch, manage
-
-@test cluster_cookie() isa String
-
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
-
-@test Distributed.extract_imports(:(begin; import Foo, Bar; let; using Baz; end; end)) ==
-      Any[:(import Foo, Bar), :(using Baz)]
-
-# Test a few "remote" invocations when no workers are present
-@test remote(myid)() == 1
-@test pmap(identity, 1:100) == [1:100...]
-@test 100 == @distributed (+) for i in 1:100
-        1
-    end
-
-addprocs_with_testenv(4)
-@test nprocs() == 5
-
-# distributed loading of packages
-
-# setup
-@everywhere begin
-    old_act_proj = Base.ACTIVE_PROJECT[]
-    pushfirst!(Base.LOAD_PATH, "@")
-    Base.ACTIVE_PROJECT[] = joinpath(Sys.BINDIR, "..", "share", "julia", "test", "TestPkg")
-end
-
-# cause precompilation of TestPkg to avoid race condition
-Base.compilecache(Base.identify_package("TestPkg"))
-
-@everywhere using TestPkg
-@everywhere using TestPkg
-
-@everywhere begin
-    Base.ACTIVE_PROJECT[] = old_act_proj
-    popfirst!(Base.LOAD_PATH)
-end
-
-@everywhere using Test, Random, LinearAlgebra
-
-id_me = myid()
-id_other = filter(x -> x != id_me, procs())[rand(1:(nprocs()-1))]
-
-# Test role
-@everywhere using Distributed
-@test Distributed.myrole() === :master
-for wid = workers()
-    wrole = remotecall_fetch(wid) do
-        Distributed.myrole()
-    end
-    @test wrole === :worker
-end
-
-# Test remote()
-let
-    pool = default_worker_pool()
-
-    count = 0
-    count_condition = Condition()
-
-    function remote_wait(c)
-        @async_logerr begin
-            count += 1
-            remote(take!)(c)
-            count -= 1
-            notify(count_condition)
-        end
-        yield()
-    end
-
-    testchannels = [RemoteChannel() for i in 1:nworkers()]
-    testcount = 0
-    @test isready(pool) == true
-    for c in testchannels
-        @test count == testcount
-        remote_wait(c)
-        testcount += 1
-    end
-    @test count == testcount
-    @test isready(pool) == false
-
-    for c in testchannels
-        @test count == testcount
-        put!(c, "foo")
-        testcount -= 1
-        (count == testcount) || wait(count_condition)
-        @test count == testcount
-        @test isready(pool) == true
-    end
-
-    @test count == 0
-
-    for c in testchannels
-        @test count == testcount
-        remote_wait(c)
-        testcount += 1
-    end
-    @test count == testcount
-    @test isready(pool) == false
-
-    for c in reverse(testchannels)
-        @test count == testcount
-        put!(c, "foo")
-        testcount -= 1
-        (count == testcount) || wait(count_condition)
-        @test count == testcount
-        @test isready(pool) == true
-    end
-
-    @test count == 0
-end
-
-# Test Futures
-function testf(id)
-    f=Future(id)
-    @test isready(f) == false
-    @test f.v === nothing
-    put!(f, :OK)
-    @test isready(f) == true
-    @test f.v !== nothing
-
-    @test_throws ErrorException put!(f, :OK) # Cannot put! to a already set future
-    @test_throws MethodError take!(f) # take! is unsupported on a Future
-
-    @test fetch(f) === :OK
-end
-
-testf(id_me)
-testf(id_other)
-
-function poll_while(f::Function; timeout_seconds::Integer = 120)
-    start_time = time_ns()
-    while f()
-        sleep(1)
-        if ( ( time_ns() - start_time )/1e9 ) > timeout_seconds
-            @error "Timed out" timeout_seconds
-            return false
-        end
-    end
-    return true
-end
-
-function _getenv_include_thread_unsafe()
-    environment_variable_name = "JULIA_TEST_INCLUDE_THREAD_UNSAFE"
-    default_value = "false"
-    environment_variable_value = strip(get(ENV, environment_variable_name, default_value))
-    b = parse(Bool, environment_variable_value)::Bool
-    return b
-end
-const _env_include_thread_unsafe = _getenv_include_thread_unsafe()
-function include_thread_unsafe_tests()
-    if Threads.maxthreadid() > 1
-        if _env_include_thread_unsafe
-            return true
-        end
-        msg = "Skipping a thread-unsafe test because `Threads.maxthreadid() > 1`"
-        @warn msg Threads.maxthreadid()
-        Test.@test_broken false
-        return false
-    end
-    return true
-end
-
-# Distributed GC tests for Futures
-function test_futures_dgc(id)
-    f = remotecall(myid, id)
-    fid = remoteref_id(f)
-
-    # remote value should be deleted after a fetch
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == true
-    @test f.v === nothing
-    @test fetch(f) == id
-    @test f.v !== nothing
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
-
-    # if unfetched, it should be deleted after a finalize
-    f = remotecall(myid, id)
-    fid = remoteref_id(f)
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == true
-    @test f.v === nothing
-    finalize(f)
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
-end
-
-test_futures_dgc(id_me)
-test_futures_dgc(id_other)
-
-# if sent to another worker, it should not be deleted till all references are fetched.
-wid1 = workers()[1]
-wid2 = workers()[2]
-f = remotecall(myid, wid1)
-fid = remoteref_id(f)
-
-fstore = RemoteChannel(wid2)
-put!(fstore, f)
-
-@test fetch(f) == wid1
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == true
-remotecall_fetch(r->(fetch(fetch(r)); yield()), wid2, fstore)
-sleep(0.5) # to ensure that wid2 gc messages have been executed on wid1
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == false
-
-# put! should release remote reference since it would have been cached locally
-f = Future(wid1)
-fid = remoteref_id(f)
-
-# should not be created remotely till accessed
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == false
-# create it remotely
-isready(f)
-
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == true
-put!(f, :OK)
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == false
-@test fetch(f) === :OK
-
-# RemoteException should be thrown on a put! when another process has set the value
-f = Future(wid1)
-fid = remoteref_id(f)
-
-fstore = RemoteChannel(wid2)
-put!(fstore, f) # send f to wid2
-put!(f, :OK) # set value from master
-
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == true
-
-testval = remotecall_fetch(wid2, fstore) do x
-    try
-        put!(fetch(x), :OK)
-        return 0
-    catch e
-        if isa(e, RemoteException)
-            return 1
-        else
-            return 2
-        end
-    end
-end
-@test testval == 1
-
-# Issue number #25847
-@everywhere function f25847(ref)
-    fetch(ref)
-    return true
-end
-
-f = remotecall_wait(identity, id_other, ones(10))
-rrid = Distributed.RRID(f.whence, f.id)
-remotecall_fetch(f25847, id_other, f)
-@test BitSet([id_me]) == remotecall_fetch(()->Distributed.PGRP.refs[rrid].clientset, id_other)
-
-remotecall_fetch(f25847, id_other, f)
-@test BitSet([id_me]) == remotecall_fetch(()->Distributed.PGRP.refs[rrid].clientset, id_other)
-
-finalize(f)
-yield() # flush gc msgs
-@test poll_while(() -> remotecall_fetch(chk_rrid->(yield(); haskey(Distributed.PGRP.refs, chk_rrid)), id_other, rrid))
-
-# Distributed GC tests for RemoteChannels
-function test_remoteref_dgc(id)
-    rr = RemoteChannel(id)
-    put!(rr, :OK)
-    rrid = remoteref_id(rr)
-
-    # remote value should be deleted after finalizing the ref
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == true
-    @test fetch(rr) === :OK
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == true
-    finalize(rr)
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid))
-end
-test_remoteref_dgc(id_me)
-test_remoteref_dgc(id_other)
-
-# if sent to another worker, it should not be deleted till the other worker has also finalized.
-let wid1 = workers()[1],
-    wid2 = workers()[2],
-    rr = RemoteChannel(wid1),
-    rrid = remoteref_id(rr),
-    fstore = RemoteChannel(wid2)
-
-    put!(fstore, rr)
-    if include_thread_unsafe_tests()
-        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
-    end
-    finalize(rr) # finalize locally
-    yield() # flush gc msgs
-    if include_thread_unsafe_tests()
-        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
-    end
-    remotecall_fetch(r -> (finalize(take!(r)); yield(); nothing), wid2, fstore) # finalize remotely
-    sleep(0.5) # to ensure that wid2 messages have been executed on wid1
-    @test poll_while(() -> remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid))
-end
-
-# Tests for issue #23109 - should not hang.
-f = @spawnat :any rand(1, 1)
-Base.Experimental.@sync begin
-    for _ in 1:10
-        @async fetch(f)
-    end
-end
-
-wid1, wid2 = workers()[1:2]
-f = @spawnat wid1 rand(1,1)
-Base.Experimental.@sync begin
-    @async fetch(f)
-    @async remotecall_fetch(()->fetch(f), wid2)
-end
-
-
-@test fetch(@spawnat id_other myid()) == id_other
-@test (@fetchfrom id_other myid()) == id_other
-
-pids=[]
-for i in 1:nworkers()
-    push!(pids, @fetch myid())
-end
-@test sort(pids) == sort(workers())
-
-
-# test getindex on Futures and RemoteChannels
-function test_indexing(rr)
-    a = rand(5,5)
-    put!(rr, a)
-    @test rr[2,3] == a[2,3]
-    @test rr[] == a
-end
-
-test_indexing(Future())
-test_indexing(Future(id_other))
-test_indexing(RemoteChannel())
-test_indexing(RemoteChannel(id_other))
-
-# Test ser/deser to non-ClusterSerializer objects.
-function test_regular_io_ser(ref::Distributed.AbstractRemoteRef)
-    io = IOBuffer()
-    serialize(io, ref)
-    seekstart(io)
-    ref2 = deserialize(io)
-    for fld in fieldnames(typeof(ref))
-        v = getfield(ref2, fld)
-        if isa(v, Number)
-            @test v === zero(typeof(v))
-        elseif fld === :lock
-            @test v isa ReentrantLock
-            @test !islocked(v)
-        elseif v !== nothing
-            error(string("Add test for field ", fld))
-        end
-    end
-end
-
-test_regular_io_ser(Future())
-test_regular_io_ser(RemoteChannel())
-
-# Test @distributed load balancing - all processors should get either M or M+1
-# iterations out of the loop range for some M.
-ids = @distributed((a,b)->[a;b], for i=1:7; myid(); end)
-workloads = Int[sum(ids .== i) for i in 2:nprocs()]
-@test maximum(workloads) - minimum(workloads) <= 1
-
-# @distributed reduction should work even with very short ranges
-@test @distributed(+, for i=1:2; i; end) == 3
-
-@test_throws ArgumentError sleep(-1)
-@test_throws ArgumentError timedwait(()->false, 0.1, pollint=-0.5)
-
-# specify pids for pmap
-@test sort(workers()[1:2]) == sort(unique(pmap(x->(sleep(0.1);myid()), WorkerPool(workers()[1:2]), 1:10)))
-
-# Testing buffered  and unbuffered reads
-# This large array should write directly to the socket
-a = fill(1, 10^6)
-@test a == remotecall_fetch((x)->x, id_other, a)
-
-# Not a bitstype, should be buffered
-s = [randstring() for x in 1:10^5]
-@test s == remotecall_fetch((x)->x, id_other, s)
-
-#large number of small requests
-num_small_requests = 10000
-@test fill(id_other, num_small_requests) == [remotecall_fetch(myid, id_other) for i in 1:num_small_requests]
-
-# test parallel sends of large arrays from multiple tasks to the same remote worker
-ntasks = 10
-rr_list = [Channel(1) for x in 1:ntasks]
-
-for rr in rr_list
-    local rr
-    let rr = rr
-        @async try
-            for i in 1:10
-                a = rand(2*10^5)
-                @test a == remotecall_fetch(x->x, id_other, a)
-                yield()
-            end
-            put!(rr, :OK)
-        catch
-            put!(rr, :ERROR)
-        end
-    end
-end
-
-@test [fetch(rr) for rr in rr_list] == [:OK for x in 1:ntasks]
-
-function test_channel(c)
-    @test isopen(c) == true
-    put!(c, 1)
-    put!(c, "Hello")
-    put!(c, 5.0)
-
-    @test isready(c) == true
-    @test isopen(c) == true
-    @test fetch(c) == 1
-    @test fetch(c) == 1   # Should not have been popped previously
-    @test take!(c) == 1
-    @test take!(c) == "Hello"
-    @test fetch(c) == 5.0
-    @test take!(c) == 5.0
-    @test isready(c) == false
-    @test isopen(c) == true
-    close(c)
-    @test isopen(c) == false
-end
-
-test_channel(Channel(10))
-test_channel(RemoteChannel(()->Channel(10)))
-
-c=Channel{Int}(1)
-@test_throws MethodError put!(c, "Hello")
-
-# test channel iterations
-function test_iteration(in_c, out_c)
-    t=@async for v in in_c
-        put!(out_c, v)
-    end
-
-    @test isopen(in_c) == true
-    put!(in_c, 1)
-    @test take!(out_c) == 1
-    put!(in_c, "Hello")
-    close(in_c)
-    @test take!(out_c) == "Hello"
-    @test isopen(in_c) == false
-    @test_throws InvalidStateException put!(in_c, :foo)
-    yield()
-    @test istaskdone(t) == true
-end
-
-test_iteration(Channel(10), Channel(10))
-test_iteration(RemoteChannel(() -> Channel(10)), RemoteChannel(() -> Channel(10)))
-
-@everywhere function test_iteration_take(ch)
-    count = 0
-    for x in ch
-        count += 1
-    end
-    return count
-end
-
-@everywhere function test_iteration_put(ch, total)
-    for i in 1:total
-        put!(ch, i)
-    end
-    close(ch)
-end
-
-let ch = RemoteChannel(() -> Channel(1))
-    @async test_iteration_put(ch, 10)
-    @test 10 == @fetchfrom id_other test_iteration_take(ch)
-    # now reverse
-    ch = RemoteChannel(() -> Channel(1))
-    @spawnat id_other test_iteration_put(ch, 10)
-    @test 10 == test_iteration_take(ch)
-end
-
-# make sure exceptions propagate when waiting on Tasks
-@test_throws CompositeException (@sync (@async error("oops")))
-try
-    @sync begin
-        for i in 1:5
-            @async error(i)
-        end
-    end
-    error("unexpected")
-catch ex
-    @test typeof(ex) == CompositeException
-    @test length(ex) == 5
-    @test typeof(ex.exceptions[1]) == TaskFailedException
-    @test typeof(ex.exceptions[1].task.exception) == ErrorException
-    # test start, next, and done
-    for (i, i_ex) in enumerate(ex)
-        @test i == parse(Int, i_ex.task.exception.msg)
-    end
-    # test showerror
-    err_str = sprint(showerror, ex)
-    err_one_str = sprint(showerror, ex.exceptions[1])
-    @test err_str == err_one_str * "\n\n...and 4 more exceptions.\n"
-end
-@test sprint(showerror, CompositeException()) == "CompositeException()\n"
-
-function test_remoteexception_thrown(expr)
-    try
-        expr()
-        error("unexpected")
-    catch ex
-        @test typeof(ex) == RemoteException
-        @test typeof(ex.captured) == CapturedException
-        @test typeof(ex.captured.ex) == ErrorException
-        @test ex.captured.ex.msg == "foobar"
-    end
-end
-
-for id in [id_other, id_me]
-    local id
-    test_remoteexception_thrown() do
-        remotecall_fetch(id) do
-            throw(ErrorException("foobar"))
-        end
-    end
-    test_remoteexception_thrown() do
-        remotecall_wait(id) do
-            throw(ErrorException("foobar"))
-        end
-    end
-    test_remoteexception_thrown() do
-        wait(remotecall(id) do
-            throw(ErrorException("foobar"))
-        end)
-    end
-end
-
-# make sure the stackframe from the remote error can be serialized
-let ex
-    try
-        remotecall_fetch(id_other) do
-            @eval module AModuleLocalToOther
-                foo() = throw(ErrorException("A.error"))
-                foo()
-            end
-        end
-    catch ex
-    end
-    @test (ex::RemoteException).pid == id_other
-    @test ((ex.captured::CapturedException).ex::ErrorException).msg == "A.error"
-    bt = ex.captured.processed_bt::Array{Any,1}
-    @test length(bt) > 1
-    frame, repeated = bt[1]::Tuple{Base.StackTraces.StackFrame, Int}
-    @test frame.func === :foo
-    @test frame.linfo === nothing
-    @test repeated == 1
-end
-
-# pmap tests. Needs at least 4 processors dedicated to the below tests. Which we currently have
-# since the distributed tests are now spawned as a separate set.
-
-# Test all combinations of pmap keyword args.
-pmap_args = [
-                (:distributed, [:default, false]),
-                (:batch_size, [:default,2]),
-                (:on_error, [:default, e -> (e.msg == "foobar" ? true : rethrow())]),
-                (:retry_delays, [:default, fill(0.001, 1000)]),
-                (:retry_check, [:default, (s,e) -> (s,endswith(e.msg,"foobar"))]),
-            ]
-
-kwdict = Dict()
-function walk_args(i)
-    if i > length(pmap_args)
-        kwargs = []
-        for (k,v) in kwdict
-            if v !== :default
-                push!(kwargs, (k,v))
-            end
-        end
-
-        data = 1:100
-
-        testw = kwdict[:distributed] === false ? [1] : workers()
-
-        if kwdict[:retry_delays] !== :default
-            mapf = x -> iseven(myid()) ? error("notfoobar") : (x*2, myid())
-            results_test = pmap_res -> begin
-                results = [x[1] for x in pmap_res]
-                pids = [x[2] for x in pmap_res]
-                @test results == [2:2:200...]
-                for p in testw
-                    if isodd(p)
-                        @test p in pids
-                    else
-                        @test !(p in pids)
-                    end
-                end
-            end
-        elseif kwdict[:on_error] === :default
-            mapf = x -> (x*2, myid())
-            results_test = pmap_res -> begin
-                results = [x[1] for x in pmap_res]
-                pids = [x[2] for x in pmap_res]
-                @test results == [2:2:200...]
-                for p in testw
-                    @test p in pids
-                end
-            end
-        else
-            mapf = x -> iseven(x) ? error("foobar") : (x*2, myid())
-            results_test = pmap_res -> begin
-                w = testw
-                for (idx,x) in enumerate(data)
-                    if iseven(x)
-                        @test pmap_res[idx] == true
-                    else
-                        @test pmap_res[idx][1] == x*2
-                        @test pmap_res[idx][2] in w
-                    end
-                end
-            end
-        end
-
-        try
-            results_test(pmap(mapf, data; kwargs...))
-        catch
-            println("pmap executing with args : ", kwargs)
-            rethrow()
-        end
-
-        return
-    end
-
-    kwdict[pmap_args[i][1]] = pmap_args[i][2][1]
-    walk_args(i+1)
-
-    kwdict[pmap_args[i][1]] = pmap_args[i][2][2]
-    walk_args(i+1)
-end
-
-# Start test for various kw arg combinations
-walk_args(1)
-
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "generic_map_tests.jl"))
-empty_pool = WorkerPool([myid()])
-pmap_fallback = (f, c...) -> pmap(f, empty_pool, c...)
-generic_map_tests(pmap_fallback)
-
-# pmap with various types. Test for equivalence with map
-run_map_equivalence_tests(pmap)
-@test pmap(uppercase, "Hello World!") == map(uppercase, "Hello World!")
-
-
-# Simple test for pmap throws error
-let error_thrown = false
-    try
-        pmap(x -> x == 50 ? error("foobar") : x, 1:100)
-    catch e
-        @test e.captured.ex.msg == "foobar"
-        error_thrown = true
-    end
-    @test error_thrown
-end
-
-# Test pmap with a generator type iterator
-@test [1:100...] == pmap(x->x, Base.Generator(x->(sleep(0.0001); x), 1:100))
-
-# Test pgenerate
-n = 10
-as = [rand(4,4) for i in 1:n]
-bs = deepcopy(as)
-cs = collect(Distributed.pgenerate(x->(sleep(rand()*0.1); svd(x)), bs))
-svdas = map(svd, as)
-for i in 1:n
-    @test cs[i].U ≈ svdas[i].U
-    @test cs[i].S ≈ svdas[i].S
-    @test cs[i].V ≈ svdas[i].V
-end
-
-# Test that the default worker pool cycles through all workers
-pmap(_->myid(), 1:nworkers())  # priming run
-@test nworkers() == length(unique(pmap(_->myid(), 1:100)))
-
-# Test same behaviour when executed on a worker
-@test nworkers() == length(unique(remotecall_fetch(()->pmap(_->myid(), 1:100), id_other)))
-
-# Same tests with custom worker pools.
-wp = WorkerPool(workers())
-@test nworkers() == length(unique(pmap(_->myid(), wp, 1:100)))
-@test nworkers() == length(unique(remotecall_fetch(wp->pmap(_->myid(), wp, 1:100), id_other, wp)))
-wp = WorkerPool(2:3)
-@test sort(unique(pmap(_->myid(), wp, 1:100))) == [2,3]
-
-# CachingPool tests
-wp = CachingPool(workers())
-@test [1:100...] == pmap(x->x, wp, 1:100)
-
-clear!(wp)
-@test length(wp.map_obj2ref) == 0
-
-# default_worker_pool! tests
-wp_default = Distributed.default_worker_pool()
-try
-    local wp = CachingPool(workers())
-    Distributed.default_worker_pool!(wp)
-    @test [1:100...] == pmap(x->x, wp, 1:100)
-    @test !isempty(wp.map_obj2ref)
-    clear!(wp)
-    @test isempty(wp.map_obj2ref)
-finally
-    Distributed.default_worker_pool!(wp_default)
-end
-
-# The below block of tests are usually run only on local development systems, since:
-# - tests which print errors
-# - addprocs tests are memory intensive
-# - ssh addprocs requires sshd to be running locally with passwordless login enabled.
-# The test block is enabled by defining env JULIA_TESTFULL=1
-
-DoFullTest = Base.get_bool_env("JULIA_TESTFULL", false)
-
-if DoFullTest
-    println("Testing exception printing on remote worker from a `remote_do` call")
-    println("Please ensure the remote error and backtrace is displayed on screen")
-
-    remote_do(id_other) do
-        throw(ErrorException("TESTING EXCEPTION ON REMOTE DO. PLEASE IGNORE"))
-    end
-    sleep(0.5)  # Give some time for the above error to be printed
-
-    println("\n\nThe following 'invalid connection credentials' error messages are to be ignored.")
-    all_w = workers()
-    # Test sending fake data to workers. The worker processes will print an
-    # error message but should not terminate.
-    for w in Distributed.PGRP.workers
-        if isa(w, Distributed.Worker)
-            local s = connect(w.config.host, w.config.port)
-            write(s, randstring(32))
-        end
-    end
-    @test workers() == all_w
-    @test all([p == remotecall_fetch(myid, p) for p in all_w])
-
-if Sys.isunix() # aka have ssh
-    function test_n_remove_pids(new_pids)
-        for p in new_pids
-            w_in_remote = sort(remotecall_fetch(workers, p))
-            try
-                @test intersect(new_pids, w_in_remote) == new_pids
-            catch
-                print("p       :     $p\n")
-                print("newpids :     $new_pids\n")
-                print("w_in_remote : $w_in_remote\n")
-                print("intersect   : $(intersect(new_pids, w_in_remote))\n\n\n")
-                rethrow()
-            end
-        end
-
-        remotecall_fetch(rmprocs, 1, new_pids)
-    end
-
-    print("\n\nTesting SSHManager. A minimum of 4GB of RAM is recommended.\n")
-    print("Please ensure: \n")
-    print("1) sshd is running locally with passwordless login enabled.\n")
-    print("2) Env variable USER is defined and is the ssh user.\n")
-    print("3) Port 9300 is not in use.\n")
-
-    sshflags = `-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o LogLevel=ERROR `
-    #Issue #9951
-    hosts=[]
-    localhost_aliases = ["localhost", string(getipaddr()), "127.0.0.1"]
-    num_workers = parse(Int,(get(ENV, "JULIA_ADDPROCS_NUM", "9")))
-
-    for i in 1:(num_workers/length(localhost_aliases))
-        append!(hosts, localhost_aliases)
-    end
-
-    print("\nTesting SSH addprocs with $(length(hosts)) workers...\n")
-    new_pids = addprocs_with_testenv(hosts; sshflags=sshflags)
-    @test length(new_pids) == length(hosts)
-    test_n_remove_pids(new_pids)
-
-    print("\nMixed ssh addprocs with :auto\n")
-    new_pids = addprocs_with_testenv(["localhost", ("127.0.0.1", :auto), "localhost"]; sshflags=sshflags)
-    @test length(new_pids) == (2 + Sys.CPU_THREADS)
-    test_n_remove_pids(new_pids)
-
-    print("\nMixed ssh addprocs with numeric counts\n")
-    new_pids = addprocs_with_testenv([("localhost", 2), ("127.0.0.1", 2), "localhost"]; sshflags=sshflags)
-    @test length(new_pids) == 5
-    test_n_remove_pids(new_pids)
-
-    print("\nssh addprocs with tunnel\n")
-    new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, sshflags=sshflags)
-    @test length(new_pids) == num_workers
-    test_n_remove_pids(new_pids)
-
-    print("\nssh addprocs with tunnel (SSH multiplexing)\n")
-    new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, multiplex=true, sshflags=sshflags)
-    @test length(new_pids) == num_workers
-    controlpath = joinpath(homedir(), ".ssh", "julia-$(ENV["USER"])@localhost:22")
-    @test issocket(controlpath)
-    test_n_remove_pids(new_pids)
-    @test :ok == timedwait(()->!issocket(controlpath), 10.0; pollint=0.5)
-
-    print("\nAll supported formats for hostname\n")
-    h1 = "localhost"
-    user = ENV["USER"]
-    h2 = "$user@$h1"
-    h3 = "$h2:22"
-    h4 = "$h3 $(string(getipaddr()))"
-    h5 = "$h4:9300"
-
-    new_pids = addprocs_with_testenv([h1, h2, h3, h4, h5]; sshflags=sshflags)
-    @test length(new_pids) == 5
-    test_n_remove_pids(new_pids)
-
-    print("\nkeyword arg exename\n")
-    for exename in [`$(joinpath(Sys.BINDIR, Base.julia_exename()))`, "$(joinpath(Sys.BINDIR, Base.julia_exename()))"]
-        for addp_func in [()->addprocs_with_testenv(["localhost"]; exename=exename, exeflags=test_exeflags, sshflags=sshflags),
-                          ()->addprocs_with_testenv(1; exename=exename, exeflags=test_exeflags)]
-
-            local new_pids = addp_func()
-            @test length(new_pids) == 1
-            test_n_remove_pids(new_pids)
-        end
-    end
-
-end # unix-only
-end # full-test
-
-let t = @task 42
-    schedule(t, ErrorException(""), error=true)
-    @test_throws TaskFailedException(t) Base.wait(t)
-end
-
-# issue #8207
-let A = Any[]
-    @distributed (+) for i in (push!(A,1); 1:2)
-        i
-    end
-    @test length(A) == 1
-end
-
-# issue #13168
-function f13168(n)
-    val = 0
-    for i = 1:n
-        val += sum(rand(n, n)^2)
-    end
-    return val
-end
-let t = schedule(@task f13168(100))
-    @test t.state === :runnable
-    @test t.queue !== nothing
-    @test_throws ErrorException schedule(t)
-    yield()
-    @test t.state === :done
-    @test t.queue === nothing
-    @test_throws ErrorException schedule(t)
-    @test isa(fetch(t), Float64)
-end
-
-# issue #13122
-@test remotecall_fetch(identity, workers()[1], C_NULL) === C_NULL
-
-# issue #11062
-function t11062()
-    @async v11062 = 1
-    v11062 = 2
-end
-
-@test t11062() == 2
-
-# issue #15406
-v15406 = remotecall_wait(() -> 1, id_other)
-fetch(v15406)
-remotecall_wait(fetch, id_other, v15406)
-
-
-# issue #43396
-# Covers the remote fetch where the value returned is `nothing`
-# May be caused by attempting to unwrap a non-`Some` type with `something`
-# `call_on_owner` ref fetches return values not wrapped in `Some`
-# and have to be returned directly
-@test nothing === fetch(remotecall(() -> nothing, workers()[1]))
-@test 10 === fetch(remotecall(() -> 10, workers()[1]))
-
-
-# Test various forms of remotecall* invocations
-
-@everywhere f_args(v1, v2=0; kw1=0, kw2=0) = v1+v2+kw1+kw2
-
-function test_f_args(result, args...; kwargs...)
-    @test fetch(remotecall(args...; kwargs...)) == result
-    @test fetch(remotecall_wait(args...; kwargs...)) == result
-    @test remotecall_fetch(args...; kwargs...) == result
-
-    # A visual test - remote_do should NOT print any errors
-    remote_do(args...; kwargs...)
-end
-
-for tid in [id_other, id_me, default_worker_pool()]
-    test_f_args(1, f_args, tid, 1)
-    test_f_args(3, f_args, tid, 1, 2)
-    test_f_args(5, f_args, tid, 1; kw1=4)
-    test_f_args(13, f_args, tid, 1; kw1=4, kw2=8)
-    test_f_args(15, f_args, tid, 1, 2; kw1=4, kw2=8)
-end
-
-# Test remote_do
-f=Future(id_me)
-remote_do(fut->put!(fut, myid()), id_me, f)
-@test fetch(f) == id_me
-
-f=Future(id_other)
-remote_do(fut->put!(fut, myid()), id_other, f)
-@test fetch(f) == id_other
-
-# Github issue #29932
-rc_unbuffered = RemoteChannel(()->Channel{Vector{Float64}}(0))
-@test eltype(rc_unbuffered) == Vector{Float64}
-
-@async begin
-    # Trigger direct write (no buffering) of largish array
-    array_sz = Int(Base.SZ_UNBUFFERED_IO/8) + 1
-    largev = zeros(array_sz)
-    for i in 1:10
-        largev[1] = float(i)
-        put!(rc_unbuffered, largev)
-    end
-end
-
-@test remotecall_fetch(rc -> begin
-        for i in 1:10
-            take!(rc)[1] != float(i) && error("Failed")
-        end
-        return :OK
-    end, id_other, rc_unbuffered) === :OK
-
-# github issue 33972
-rc_unbuffered_other = RemoteChannel(()->Channel{Int}(0), id_other)
-close(rc_unbuffered_other)
-try; take!(rc_unbuffered_other); catch; end
-@test !remotecall_fetch(rc -> islocked(Distributed.lookup_ref(remoteref_id(rc)).synctake),
-                        id_other, rc_unbuffered_other)
-
-# github PR #14456
-n = DoFullTest ? 6 : 5
-for i = 1:10^n
-    fetch(@spawnat myid() myid())
-end
-
-# issue #15451
-@test remotecall_fetch(x->(y->2y)(x)+1, workers()[1], 3) == 7
-
-# issue #16091
-mutable struct T16091 end
-wid = workers()[1]
-@test try
-    remotecall_fetch(()->T16091, wid)
-    false
-catch ex
-    ((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091)
-end
-@test try
-    remotecall_fetch(identity, wid, T16091)
-    false
-catch ex
-    ((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091)
-end
-
-f16091a() = 1
-remotecall_fetch(()->eval(:(f16091a() = 2)), wid)
-@test remotecall_fetch(f16091a, wid) === 2
-@test remotecall_fetch((myid)->remotecall_fetch(f16091a, myid), wid, myid()) === 1
-
-# these will only heisen-fail, since it depends on the gensym counter collisions:
-f16091b = () -> 1
-remotecall_fetch(()->eval(:(f16091b = () -> 2)), wid)
-@test remotecall_fetch(f16091b, 2) === 1
-# Global anonymous functions are over-written...
-@test remotecall_fetch((myid)->remotecall_fetch(f16091b, myid), wid, myid()) === 1
-
-# ...while local anonymous functions are by definition, local.
-let
-    f16091c = () -> 1
-    @test remotecall_fetch(f16091c, 2) === 1
-    @test remotecall_fetch(
-        myid -> begin
-            let
-                f16091c = () -> 2
-                remotecall_fetch(f16091c, myid)
-            end
-        end, wid, myid()) === 2
-end
-
-# issue #16451
-rng=RandomDevice()
-retval = @distributed (+) for _ in 1:10
-    rand(rng)
-end
-@test retval > 0.0 && retval < 10.0
-
-rand(rng)
-retval = @distributed (+) for _ in 1:10
-    rand(rng)
-end
-@test retval > 0.0 && retval < 10.0
-
-# serialization tests
-wrkr1 = workers()[1]
-wrkr2 = workers()[end]
-
-@test remotecall_fetch(p->remotecall_fetch(myid, p), wrkr1, wrkr2) == wrkr2
-
-# Send f to wrkr1 and wrkr2. Then try calling f on wrkr2 from wrkr1
-f_myid = ()->myid()
-@test wrkr1 == remotecall_fetch(f_myid, wrkr1)
-@test wrkr2 == remotecall_fetch(f_myid, wrkr2)
-@test wrkr2 == remotecall_fetch((f, p)->remotecall_fetch(f, p), wrkr1, f_myid, wrkr2)
-
-# Deserialization error recovery test
-# locally defined module, but unavailable on workers
-module LocalFoo
-    global foo=1
-end
-
-let
-    @test_throws RemoteException remotecall_fetch(()->LocalFoo.foo, 2)
-
-    bad_thunk = ()->NonexistentModule.f()
-    @test_throws RemoteException remotecall_fetch(bad_thunk, 2)
-
-    # Test that the stream is still usable
-    @test remotecall_fetch(()->:test,2) === :test
-    ref = remotecall(bad_thunk, 2)
-    @test_throws RemoteException fetch(ref)
-end
-
-# Test calling @everywhere from a module not defined on the workers
-module LocalBar
-    using Distributed
-    bar() = @everywhere new_bar()=myid()
-end
-LocalBar.bar()
-for p in procs()
-    @test p == remotecall_fetch(new_bar, p)
-end
-
-# @everywhere (remotecall_eval) behaviors (#22589)
-let (p, p2) = filter!(p -> p != myid(), procs())
-    @test (myid() + 1) == @everywhere myid() (myid() + 1)
-    @test (p * 2) == @everywhere p (myid() * 2)
-    @test 1 == @everywhere p defined_on_p = 1
-    @test !@isdefined defined_on_p
-    @test !isdefined(Main, :defined_on_p)
-    @test remotecall_fetch(isdefined, p, Main, :defined_on_p)
-    @test !remotecall_fetch(isdefined, p2, Main, :defined_on_p)
-    @test nothing === @everywhere [p, p] defined_on_p += 1
-    @test 3 === @everywhere p defined_on_p
-    let ref = Ref(0)
-        @test nothing ===
-            @everywhere [myid(), p, myid(), myid(), p] begin
-                Test.@test Main === @__MODULE__
-                $ref[] += 1
-            end
-        @test ref[] == 3
-    end
-    function test_throw_on(procs, msg)
-        try
-            @everywhere procs error($msg)
-            error("test failed to throw")
-        catch excpt
-            if procs isa Int
-                ex = Any[excpt]
-            else
-                ex = (excpt::CompositeException).exceptions
-            end
-            for (p, ex) in zip(procs, ex)
-                local p
-                if procs isa Int || p != myid()
-                    @test (ex::RemoteException).pid == p
-                    ex = ((ex::RemoteException).captured::CapturedException).ex
-                else
-                    ex = (ex::TaskFailedException).task.exception
-                end
-                @test (ex::ErrorException).msg == msg
-            end
-        end
-    end
-    test_throw_on(p, "everywhere on p")
-    test_throw_on(myid(), "everywhere on myid")
-    test_throw_on([p, myid()], "everywhere on myid and p")
-    test_throw_on([p2, p], "everywhere on p and p2")
-end
-
-# Test addprocs enable_threaded_blas parameter
-
-function get_remote_num_threads(processes_added)
-    return [remotecall_fetch(BLAS.get_num_threads, proc_id) for proc_id in processes_added]
-end
-
-function test_blas_config(pid, expected)
-    for worker in Distributed.PGRP.workers
-        if worker.id == pid
-            @test worker.config.enable_threaded_blas == expected
-            return
-        end
-    end
-end
-
-function test_add_procs_threaded_blas()
-    master_blas_thread_count = BLAS.get_num_threads()
-    if master_blas_thread_count === nothing
-        @warn "Skipping blas num threads tests due to unsupported blas version"
-        return
-    end
-
-    # Test with default enable_threaded_blas false
-    processes_added = addprocs_with_testenv(2)
-    for proc_id in processes_added
-        test_blas_config(proc_id, false)
-    end
-
-    # Master thread should not have changed
-    @test BLAS.get_num_threads() == master_blas_thread_count
-
-    # Threading disabled in children by default
-    thread_counts_by_process = get_remote_num_threads(processes_added)
-    for thread_count in thread_counts_by_process
-        @test thread_count == 1
-    end
-    rmprocs(processes_added)
-
-    processes_added = addprocs_with_testenv(2, enable_threaded_blas=true)
-    for proc_id in processes_added
-        test_blas_config(proc_id, true)
-    end
-
-    @test BLAS.get_num_threads() == master_blas_thread_count
-
-    # BLAS.set_num_threads(`num`) doesn't  cause BLAS.get_num_threads to return `num`
-    # depending on the machine, the BLAS version, and BLAS configuration, so
-    # we need a very lenient test.
-    thread_counts_by_process = get_remote_num_threads(processes_added)
-    for thread_count in thread_counts_by_process
-        @test thread_count >= 1
-    end
-    rmprocs(processes_added)
-end
-test_add_procs_threaded_blas()
-
-#19687
-if false ### TODO: The logic that is supposed to implement this is racy - Disabled for now
-# ensure no race conditions between rmprocs and addprocs
-for i in 1:5
-    p = addprocs_with_testenv(1)[1]
-    @spawnat p sleep(5)
-    rmprocs(p; waitfor=0)
-end
-
-# Test if a wait has been called on rmprocs(...;waitfor=0), further remotecalls
-# don't throw errors.
-for i in 1:5
-    p = addprocs_with_testenv(1)[1]
-    np = nprocs()
-    @spawnat p sleep(5)
-    Base.wait(rmprocs(p; waitfor=0))
-    for pid in procs()
-        @test pid == remotecall_fetch(myid, pid)
-    end
-    @test nprocs() == np - 1
-end
-
-# Test that an exception is thrown if workers are unable to be removed within requested time.
-if DoFullTest
-    pids=addprocs_with_testenv(4);
-    @test_throws ErrorException rmprocs(pids; waitfor=0.001);
-    # wait for workers to be removed
-    while any(in(procs()), pids)
-        sleep(0.1)
-    end
-end
-end
-
-# Test addprocs/rmprocs from master node only
-for f in [ ()->addprocs(1; exeflags=test_exeflags), ()->rmprocs(workers()) ]
-    local f
-    try
-        remotecall_fetch(f, id_other)
-        error("Unexpected")
-    catch ex
-        @test isa(ex, RemoteException)
-        @test ex.captured.ex.msg == "Only process 1 can add and remove workers"
-    end
-end
-
-# Test the following addprocs error conditions
-# - invalid host name - github issue #20372
-# - julia exe exiting with an error
-# - timeout reading host:port from worker stdout
-# - host:port not found in worker stdout in the first 1000 lines
-
-struct ErrorSimulator <: ClusterManager
-    mode
-end
-
-function launch(manager::ErrorSimulator, params::Dict, launched::Array, c::Condition)
-    exename = params[:exename]
-    dir = params[:dir]
-
-    cmd = `$(Base.julia_cmd(exename)) --startup-file=no`
-    if manager.mode === :timeout
-        cmd = `$cmd -e "sleep(10)"`
-    elseif manager.mode === :ntries
-        cmd = `$cmd -e "[println(x) for x in 1:1001]"`
-    elseif manager.mode === :exit
-        cmd = `$cmd -e "exit(-1)"`
-    else
-        error("Unknown mode")
-    end
-    io = open(detach(setenv(cmd, dir=dir)))
-
-    wconfig = WorkerConfig()
-    wconfig.process = io
-    wconfig.io = io.out
-    push!(launched, wconfig)
-    notify(c)
-end
-
-testruns = Any[]
-
-if DoFullTest
-    append!(testruns, [(()->addprocs_with_testenv(["errorhost20372"]), "Unable to read host:port string from worker. Launch command exited with error?", ())])
-end
-
-append!(testruns, [
-    (()->addprocs_with_testenv(ErrorSimulator(:exit)), "Unable to read host:port string from worker. Launch command exited with error?", ()),
-    (()->addprocs_with_testenv(ErrorSimulator(:ntries)), "Unexpected output from worker launch command. Host:port string not found.", ()),
-    (()->addprocs_with_testenv(ErrorSimulator(:timeout)), "Timed out waiting to read host:port string from worker.", ("JULIA_WORKER_TIMEOUT"=>"1",))
-])
-
-for (addp_testf, expected_errstr, env) in testruns
-    old_stdout = stdout
-    stdout_out, stdout_in = redirect_stdout()
-    stdout_txt = @async filter!(readlines(stdout_out)) do s
-            return !startswith(s, "\tFrom worker startup:\t")
-        end
-    try
-        withenv(env...) do
-            addp_testf()
-        end
-        error("Unexpected")
-    catch ex
-        redirect_stdout(old_stdout)
-        close(stdout_in)
-        @test isempty(fetch(stdout_txt))
-        @test isa(ex, CompositeException)
-        @test ex.exceptions[1].task.exception.msg == expected_errstr
-    end
-end
-
-
-# Auto serialization of globals from Main.
-# bitstypes
-global v1 = 1
-@test remotecall_fetch(()->v1, id_other) == v1
-@test remotecall_fetch(()->isdefined(Main, :v1), id_other)
-for i in 2:5
-    global v1 = i
-    @test remotecall_fetch(()->v1, id_other) == i
-end
-
-# non-bitstypes
-global v2 = zeros(10)
-for i in 1:5
-    v2[i] = i
-    @test remotecall_fetch(()->v2, id_other) == v2
-end
-
-# Different global bindings to the same object
-global v3 = fill(1., 10)
-global v4 = v3
-@test remotecall_fetch(()->v3, id_other) == remotecall_fetch(()->v4, id_other)
-@test remotecall_fetch(()->isdefined(Main, :v3), id_other)
-@test remotecall_fetch(()->isdefined(Main, :v4), id_other)
-
-# Global references to Types and Modules should work if they are locally defined
-global v5 = Int
-global v6 = Distributed
-@test remotecall_fetch(()->v5, id_other) === Int
-@test remotecall_fetch(()->v6, id_other) === Distributed
-
-struct FooStructLocal end
-module FooModLocal end
-v5 = FooStructLocal
-v6 = FooModLocal
-@test_throws RemoteException remotecall_fetch(()->v5, id_other)
-@test_throws RemoteException remotecall_fetch(()->v6, id_other)
-
-@everywhere struct FooStructEverywhere end
-@everywhere module FooModEverywhere end
-v5 = FooStructEverywhere
-v6 = FooModEverywhere
-@test remotecall_fetch(()->v5, id_other) === FooStructEverywhere
-@test remotecall_fetch(()->v6, id_other) === FooModEverywhere
-
-# hash value same but different object instance
-v7 = ones(10)
-oid1 = objectid(v7)
-hval1 = hash(v7)
-@test v7 == @fetchfrom id_other v7
-remote_oid1 = @fetchfrom id_other objectid(v7)
-
-v7 = ones(10)
-@test oid1 != objectid(v7)
-@test hval1 == hash(v7)
-@test remote_oid1 != @fetchfrom id_other objectid(v7)
-
-
-# Github issue #31252
-v31252 = :a
-@test :a == @fetchfrom id_other v31252
-
-v31252 = :b
-@test :b == @fetchfrom id_other v31252
-
-v31252 = :a
-@test :a == @fetchfrom id_other v31252
-
-
-# Test that a global is not being repeatedly serialized when
-# a) referenced multiple times in the closure
-# b) hash value has not changed.
-
-@everywhere begin
-    using Serialization
-    global testsercnt_d = Dict()
-    mutable struct TestSerCnt
-        v
-    end
-    import Base.hash, Base.==
-    hash(x::TestSerCnt, h::UInt) = hash(hash(x.v), h)
-    ==(x1::TestSerCnt, x2::TestSerCnt) = (x1.v == x2.v)
-
-    function Serialization.serialize(s::AbstractSerializer, t::TestSerCnt)
-        Serialization.serialize_type(s, TestSerCnt)
-        serialize(s, t.v)
-        global testsercnt_d
-        cnt = get!(testsercnt_d, objectid(t), 0)
-        testsercnt_d[objectid(t)] = cnt+1
-    end
-
-    Serialization.deserialize(s::AbstractSerializer, ::Type{TestSerCnt}) = TestSerCnt(deserialize(s))
-end
-
-# hash value of tsc is not changed
-global tsc = TestSerCnt(zeros(10))
-for i in 1:5
-    remotecall_fetch(()->tsc, id_other)
-end
-# should have been serialized only once
-@test testsercnt_d[objectid(tsc)] == 1
-
-# hash values are changed
-n=5
-testsercnt_d[objectid(tsc)] = 0
-for i in 1:n
-    tsc.v[i] = i
-    remotecall_fetch(()->tsc, id_other)
-end
-# should have been serialized as many times as the loop
-@test testsercnt_d[objectid(tsc)] == n
-
-# Multiple references in a closure should be serialized only once.
-global mrefs = TestSerCnt(fill(1.,10))
-@test remotecall_fetch(()->(mrefs.v, 2*mrefs.v, 3*mrefs.v), id_other) == (fill(1.,10), fill(2.,10), fill(3.,10))
-@test testsercnt_d[objectid(mrefs)] == 1
-
-
-# nested anon functions
-global f1 = x->x
-global f2 = x->f1(x)
-v = rand()
-@test remotecall_fetch(f2, id_other, v) == v
-@test remotecall_fetch(x->f2(x), id_other, v) == v
-
-# consts
-const c1 = fill(1., 10)
-@test remotecall_fetch(()->c1, id_other) == c1
-@test remotecall_fetch(()->isconst(Main, :c1), id_other)
-
-# Test same calls with local vars
-function wrapped_var_ser_tests()
-    # bitstypes
-    local lv1 = 1
-    @test remotecall_fetch(()->lv1, id_other) == lv1
-    @test !remotecall_fetch(()->isdefined(Main, :lv1), id_other)
-    for i in 2:5
-        lv1 = i
-        @test remotecall_fetch(()->lv1, id_other) == i
-    end
-
-    # non-bitstypes
-    local lv2 = zeros(10)
-    for i in 1:5
-        lv2[i] = i
-        @test remotecall_fetch(()->lv2, id_other) == lv2
-    end
-
-    # nested anon functions
-    local lf1 = x->x
-    local lf2 = x->lf1(x)
-    v = rand()
-    @test remotecall_fetch(lf2, id_other, v) == v
-    @test remotecall_fetch(x->lf2(x), id_other, v) == v
-end
-
-wrapped_var_ser_tests()
-
-# Test internal data structures being cleaned up upon gc.
-global ids_cleanup = fill(1., 6)
-global ids_func = ()->ids_cleanup
-
-clust_ser = (Distributed.worker_from_id(id_other)).w_serializer
-@test remotecall_fetch(ids_func, id_other) == ids_cleanup
-
-# TODO Add test for cleanup from `clust_ser.glbs_in_tnobj`
-
-# reported github issues - Mostly tests with globals and various distributed macros
-#2669, #5390
-v2669=10
-@test fetch(@spawnat :any (1+v2669)) == 11
-
-#12367
-refs = []
-if true
-    n = 10
-    for p in procs()
-        push!(refs, @spawnat p begin
-            @sync for i in 1:n
-                nothing
-            end
-        end)
-    end
-end
-foreach(wait, refs)
-
-#6760
-if true
-    a = 2
-    x = @distributed (vcat) for k=1:2
-        sin(a)
-    end
-end
-@test x == map(_->sin(2), 1:2)
-
-let thrown = false
-    try
-        remotecall_fetch(sqrt, 2, -1)
-    catch e
-        thrown = true
-        local b = IOBuffer()
-        showerror(b, e)
-        @test occursin("sqrt was called with a negative real argument", String(take!(b)))
-    end
-    @test thrown
-end
-
-# issue #34333
-let
-    @test fetch(remotecall(Float64, id_other, 1)) == Float64(1)
-    @test fetch(remotecall_wait(Float64, id_other, 1)) == Float64(1)
-    @test remotecall_fetch(Float64, id_other, 1) == Float64(1)
-end
-
-#19463
-function foo19463()
-    w1 = workers()[1]
-    w2 = workers()[2]
-    w3 = workers()[3]
-
-    b1 = () -> 1
-    b2 = () -> fetch(@spawnat w1 b1()) + 1
-    b3 = () -> fetch(@spawnat w2 b2()) + 1
-    b4 = () -> fetch(@spawnat w3 b3()) + 1
-    b4()
-end
-@test foo19463() == 4
-
-# Testing clear!
-function setup_syms(n, pids)
-    syms = []
-    for i in 1:n
-        symstr = string("clrtest", randstring())
-        sym = Symbol(symstr)
-        eval(:(global $sym = rand()))
-        for p in pids
-            eval(:(@test $sym == remotecall_fetch(()->$sym, $p)))
-            eval(:(@test remotecall_fetch(isdefined, $p, Main, Symbol($symstr))))
-        end
-        push!(syms, sym)
-    end
-    syms
-end
-
-function test_clear(syms, pids)
-    for p in pids
-        for sym in syms
-            remote_val = remotecall_fetch(()->getfield(Main, sym), p)
-            @test remote_val === nothing
-            @test remote_val != getfield(Main, sym)
-        end
-    end
-end
-
-syms = setup_syms(1, [id_other])
-clear!(syms[1], id_other)
-test_clear(syms, [id_other])
-
-syms = setup_syms(1, workers())
-clear!(syms[1], workers())
-test_clear(syms, workers())
-
-syms = setup_syms(3, [id_other])
-clear!(syms, id_other)
-test_clear(syms, [id_other])
-
-syms = setup_syms(3, workers())
-clear!(syms, workers())
-test_clear(syms, workers())
-
-# Test partial recovery from a deserialization error in CapturedException
-try
-    expr = quote
-                mutable struct DontExistOn1
-                    x
-                end
-                throw(BoundsError(DontExistOn1(1), 1))
-           end
-
-    remotecall_fetch(()->eval(expr), id_other)
-    error("unexpected")
-catch ex
-    @test isa(ex.captured.ex.exceptions[1].ex, ErrorException)
-    @test occursin("BoundsError", ex.captured.ex.exceptions[1].ex.msg)
-    @test ex.captured.ex.exceptions[2].ex == UndefVarError(:DontExistOn1)
-end
-
-let
-    # creates a new worker in a different folder and tries to include file
-    tmp_dir = mktempdir()
-    tmp_dir2 = joinpath(tmp_dir, "2")
-    tmp_file = joinpath(tmp_dir2, "testfile")
-    tmp_file2 = joinpath(tmp_dir2, "testfile2")
-    proc = addprocs_with_testenv(1, dir=tmp_dir)
-    try
-        mkdir(tmp_dir2)
-        write(tmp_file, "23.32 + 32 + myid() + include(\"testfile2\")")
-        write(tmp_file2, "myid() * 2")
-        function test_include_fails_to_open_file(fname)
-            try
-                include(fname)
-            catch exc
-                path = joinpath(@__DIR__, fname)
-                @test exc isa SystemError
-                @test exc.prefix == "opening file $(repr(path))"
-            end
-        end
-        test_include_fails_to_open_file("testfile")
-        test_include_fails_to_open_file("testfile2")
-        test_include_fails_to_open_file(joinpath("2", "testfile2"))
-        @test include(tmp_file) == 58.32
-        @test remotecall_fetch(include, proc[1], joinpath("2", "testfile")) == 55.32 + proc[1] * 3
-    finally
-        rmprocs(proc)
-        rm(tmp_file, force=true)
-        rm(tmp_file2, force=true)
-        rm(tmp_dir2, force=true)
-        #rm(tmp_dir, force=true)
-    end
-end
-# cookie and command line option `--worker` tests. remove workers, set cookie and test
-struct WorkerArgTester <: ClusterManager
-    worker_opt
-    write_cookie
-end
-
-function launch(manager::WorkerArgTester, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    cmd = `$exename $exeflags --bind-to $(Distributed.LPROC.bind_addr) $(manager.worker_opt)`
-    cmd = pipeline(detach(setenv(cmd, dir=dir)))
-    io = open(cmd, "r+")
-    manager.write_cookie && Distributed.write_cookie(io)
-
-    wconfig = WorkerConfig()
-    wconfig.process = io
-    wconfig.io = io.out
-    push!(launched, wconfig)
-
-    notify(c)
-end
-manage(::WorkerArgTester, ::Integer, ::WorkerConfig, ::Symbol) = nothing
-
-nprocs()>1 && rmprocs(workers())
-
-npids = addprocs_with_testenv(WorkerArgTester(`--worker`, true))
-@test remotecall_fetch(myid, npids[1]) == npids[1]
-rmprocs(npids)
-
-cluster_cookie("")  # An empty string is a valid cookie
-npids = addprocs_with_testenv(WorkerArgTester(`--worker=`, false))
-@test remotecall_fetch(myid, npids[1]) == npids[1]
-rmprocs(npids)
-
-cluster_cookie("foobar") # custom cookie
-npids = addprocs_with_testenv(WorkerArgTester(`--worker=foobar`, false))
-@test remotecall_fetch(myid, npids[1]) == npids[1]
-
-# tests for start_worker options to retain stdio (issue #31035)
-struct RetainStdioTester <: ClusterManager
-    close_stdin::Bool
-    stderr_to_stdout::Bool
-end
-
-function launch(manager::RetainStdioTester, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    jlcmd = "using Distributed; start_worker(\"\"; close_stdin=$(manager.close_stdin), stderr_to_stdout=$(manager.stderr_to_stdout));"
-    cmd = detach(setenv(`$exename $exeflags --bind-to $(Distributed.LPROC.bind_addr) -e $jlcmd`, dir=dir))
-    proc = open(cmd, "r+")
-
-    wconfig = WorkerConfig()
-    wconfig.process = proc
-    wconfig.io = proc.out
-    push!(launched, wconfig)
-
-    notify(c)
-end
-manage(::RetainStdioTester, ::Integer, ::WorkerConfig, ::Symbol) = nothing
-
-
-nprocs()>1 && rmprocs(workers())
-cluster_cookie("")
-
-for close_stdin in (true, false), stderr_to_stdout in (true, false)
-    local npids = addprocs_with_testenv(RetainStdioTester(close_stdin,stderr_to_stdout))
-    @test remotecall_fetch(myid, npids[1]) == npids[1]
-    if close_stdin
-        @test remotecall_fetch(()->stdin === devnull && !isreadable(stdin), npids[1])
-    else
-        @test remotecall_fetch(()->stdin !== devnull && isopen(stdin) && isreadable(stdin), npids[1])
-    end
-    @test stderr_to_stdout == remotecall_fetch(()->(stderr === stdout), npids[1])
-    rmprocs(npids)
-end
-
-# Issue # 22865
-# Must be run on a new cluster, i.e., all workers must be in the same state.
-@assert nprocs() == 1
-p1,p2 = addprocs_with_testenv(2)
-@everywhere f22865(p) = remotecall_fetch(x->x.*2, p, fill(1.,2))
-@test fill(2.,2) == remotecall_fetch(f22865, p1, p2)
-rmprocs(p1, p2)
-
-function reuseport_tests()
-    # Run the test on all processes.
-    results = asyncmap(procs()) do p
-        remotecall_fetch(p) do
-            ports_lower = []        # ports of pids lower than myid()
-            ports_higher = []       # ports of pids higher than myid()
-            for w in Distributed.PGRP.workers
-                w.id == myid() && continue
-                port = Sockets._sockname(w.r_stream, true)[2]
-                if (w.id == 1)
-                    # master connects to workers
-                    push!(ports_higher, port)
-                elseif w.id < myid()
-                    push!(ports_lower, port)
-                elseif w.id > myid()
-                    push!(ports_higher, port)
-                end
-            end
-            @assert (length(ports_lower) + length(ports_higher)) == nworkers()
-            for portset in [ports_lower, ports_higher]
-                if (length(portset) > 0) && (length(unique(portset)) != 1)
-                    @warn "SO_REUSEPORT TESTS FAILED. UNSUPPORTED/OLDER UNIX VERSION?"
-                    return 0
-                end
-            end
-            return myid()
-        end
-    end
-
-    # Ensure that the code has indeed been successfully executed everywhere
-    @test all(in(results), procs())
-end
-
-# Test that the client port is reused. SO_REUSEPORT may not be supported on
-# all UNIX platforms, Linux kernels prior to 3.9 and older versions of OSX
-@assert nprocs() == 1
-addprocs_with_testenv(4; lazy=false)
-if ccall(:jl_has_so_reuseport, Int32, ()) == 1
-    reuseport_tests()
-else
-    @info "SO_REUSEPORT is unsupported, skipping reuseport tests"
-end
-
-# issue #27933
-a27933 = :_not_defined_27933
-@test remotecall_fetch(()->a27933, first(workers())) === a27933
-
-# PR #28651
-for T in (UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64)
-    local n = @distributed (+) for i in Base.OneTo(T(10))
-        i
-    end
-    @test n == 55
-end
-
-# issue #28966
-let code = """
-    import Distributed
-    Distributed.addprocs(1)
-    Distributed.@everywhere f() = myid()
-    for w in Distributed.workers()
-        @assert Distributed.remotecall_fetch(f, w) == w
-    end
-    """
-    @test success(`$(Base.julia_cmd()) --startup-file=no -e $code`)
-end
-
-# PR 32431: tests for internal Distributed.head_and_tail
-let (h, t) = Distributed.head_and_tail(1:10, 3)
-    @test h == 1:3
-    @test collect(t) == 4:10
-end
-let (h, t) = Distributed.head_and_tail(1:10, 0)
-    @test h == []
-    @test collect(t) == 1:10
-end
-let (h, t) = Distributed.head_and_tail(1:3, 5)
-    @test h == 1:3
-    @test collect(t) == []
-end
-let (h, t) = Distributed.head_and_tail(1:3, 3)
-    @test h == 1:3
-    @test collect(t) == []
-end
-let (h, t) = Distributed.head_and_tail(Int[], 3)
-    @test h == []
-    @test collect(t) == []
-end
-let (h, t) = Distributed.head_and_tail(Int[], 0)
-    @test h == []
-    @test collect(t) == []
-end
-
-# issue #35937
-let e = @test_throws RemoteException pmap(1) do _
-            wait(@async error(42))
-        end
-    # check that the inner TaskFailedException is correctly formed & can be printed
-    es = sprint(showerror, e.value)
-    @test contains(es, ":\nTaskFailedException\nStacktrace:\n")
-    @test contains(es, "\n\n    nested task error:")
-    @test contains(es, "\n\n    nested task error: 42\n")
-end
-
-# issue #27429, propagate relative `include` path to workers
-@everywhere include("includefile.jl")
-for p in procs()
-    @test @fetchfrom(p, i27429) == 27429
-end
-
-# Propagation of package environments for local workers (#28781)
-let julia = `$(Base.julia_cmd()) --startup-file=no`; mktempdir() do tmp
-    project = mkdir(joinpath(tmp, "project"))
-    depots = [mkdir(joinpath(tmp, "depot1")), mkdir(joinpath(tmp, "depot2"))]
-    load_path = [mkdir(joinpath(tmp, "load_path")), "@stdlib", "@"]
-    pathsep = Sys.iswindows() ? ";" : ":"
-    env = Dict(
-        "JULIA_DEPOT_PATH" => join(depots, pathsep),
-        "JULIA_LOAD_PATH" => join(load_path, pathsep),
-        # Explicitly propagate `TMPDIR`, in the event that we're running on a
-        # CI system where `TMPDIR` is special.
-        "TMPDIR" => dirname(tmp),
-    )
-    setupcode = """
-    using Distributed, Test
-    @everywhere begin
-        depot_path() = DEPOT_PATH
-        load_path() = LOAD_PATH
-        active_project() = Base.ACTIVE_PROJECT[]
-    end
-    """
-    testcode = setupcode * """
-    for w in workers()
-        @test remotecall_fetch(depot_path, w)          == DEPOT_PATH
-        @test remotecall_fetch(load_path, w)           == LOAD_PATH
-        @test remotecall_fetch(Base.load_path, w)      == Base.load_path()
-        @test remotecall_fetch(active_project, w)      == Base.ACTIVE_PROJECT[]
-        @test remotecall_fetch(Base.active_project, w) == Base.active_project()
-    end
-    """
-    # No active project
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(active_project, w) === Base.ACTIVE_PROJECT[] === nothing
-    end
-    """
-    cmd = setenv(`$(julia) -p1 -e $(testcode * extracode)`, env)
-    @test success(cmd)
-    # --project
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(active_project, w) == Base.ACTIVE_PROJECT[] ==
-              $(repr(project))
-    end
-    """
-    cmd = setenv(`$(julia) --project=$(project) -p1 -e $(testcode * extracode)`, env)
-    @test success(cmd)
-    # JULIA_PROJECT
-    cmd = setenv(`$(julia) -p1 -e $(testcode * extracode)`,
-                 (env["JULIA_PROJECT"] = project; env))
-    @test success(cmd)
-    # Pkg.activate(...)
-    activateish = """
-    Base.ACTIVE_PROJECT[] = $(repr(project))
-    using Distributed
-    addprocs(1)
-    """
-    cmd = setenv(`$(julia) -e $(activateish * testcode * extracode)`, env)
-    @test success(cmd)
-    # JULIA_(LOAD|DEPOT)_PATH
-    shufflecode = """
-    d = reverse(DEPOT_PATH)
-    append!(empty!(DEPOT_PATH), d)
-    l = reverse(LOAD_PATH)
-    append!(empty!(LOAD_PATH), l)
-    """
-    addcode = """
-    using Distributed
-    addprocs(1) # after shuffling
-    """
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(load_path, w) == $(repr(reverse(load_path)))
-        @test remotecall_fetch(depot_path, w) == $(repr(reverse(depots)))
-    end
-    """
-    cmd = setenv(`$(julia) -e $(shufflecode * addcode * testcode * extracode)`, env)
-    @test success(cmd)
-    # Mismatch when shuffling after proc addition
-    failcode = shufflecode * setupcode * """
-    for w in workers()
-        @test remotecall_fetch(load_path, w) == reverse(LOAD_PATH) == $(repr(load_path))
-        @test remotecall_fetch(depot_path, w) == reverse(DEPOT_PATH) == $(repr(depots))
-    end
-    """
-    cmd = setenv(`$(julia) -p1 -e $(failcode)`, env)
-    @test success(cmd)
-    # Passing env or exeflags to addprocs(...) to override defaults
-    envcode = """
-    using Distributed
-    project = mktempdir()
-    env = Dict(
-        "JULIA_LOAD_PATH" => string(LOAD_PATH[1], $(repr(pathsep)), "@stdlib"),
-        "JULIA_DEPOT_PATH" => DEPOT_PATH[1],
-        "TMPDIR" => ENV["TMPDIR"],
-    )
-    addprocs(1; env = env, exeflags = `--project=\$(project)`)
-    env["JULIA_PROJECT"] = project
-    addprocs(1; env = env)
-    """ * setupcode * """
-    for w in workers()
-        @test remotecall_fetch(depot_path, w)          == [DEPOT_PATH[1]]
-        @test remotecall_fetch(load_path, w)           == [LOAD_PATH[1], "@stdlib"]
-        @test remotecall_fetch(active_project, w)      == project
-        @test remotecall_fetch(Base.active_project, w) == joinpath(project, "Project.toml")
-    end
-    """
-    cmd = setenv(`$(julia) -e $(envcode)`, env)
-    @test success(cmd)
-end end
-
-include("splitrange.jl")
-
-# Clear all workers for timeout tests (issue #45785)
-rmprocs(workers())
-begin
-    # First, assert that we get no messages when we close a cooperative worker
-    w = only(addprocs(1))
-    @test_nowarn begin
-        wait(rmprocs([w]))
-    end
-
-    # Next, ensure we get a log message when a worker does not cleanly exit
-    w = only(addprocs(1))
-    @test_logs (:warn, r"sending SIGTERM") begin
-        remote_do(w) do
-            # Cause the 'exit()' message that `rmprocs()` sends to do nothing
-            Core.eval(Base, :(exit() = nothing))
-        end
-        wait(rmprocs([w]))
-    end
-end
-
-# Run topology tests last after removing all workers, since a given
-# cluster at any time only supports a single topology.
-rmprocs(workers())
-include("topology.jl")
diff --git a/stdlib/Distributed/test/includefile.jl b/stdlib/Distributed/test/includefile.jl
deleted file mode 100644
index faea6c11aaf6a..0000000000000
--- a/stdlib/Distributed/test/includefile.jl
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# this is used to test that relative include paths work on other processes
-
-i27429 = 27429
diff --git a/stdlib/Distributed/test/managers.jl b/stdlib/Distributed/test/managers.jl
deleted file mode 100644
index 7971222c7511a..0000000000000
--- a/stdlib/Distributed/test/managers.jl
+++ /dev/null
@@ -1,26 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test
-using Distributed
-using Sockets
-using Distributed: parse_machine, SSHManager, LocalManager
-
-@test parse_machine("127.0.0.1") == ("127.0.0.1", nothing)
-@test parse_machine("127.0.0.1:80") == ("127.0.0.1", 80)
-@test parse_machine("[2001:db8::1]") == ("2001:db8::1", nothing)
-@test parse_machine("[2001:db8::1]:443") == ("2001:db8::1", 443)
-
-@test parse_machine("127.0.0.1:90") == ("127.0.0.1", 90)
-@test parse_machine("127.0.0.1:1") == ("127.0.0.1", 1)
-@test parse_machine("127.0.0.1:65535") == ("127.0.0.1", 65535)
-
-@test_throws ArgumentError parse_machine("127.0.0.1:-1")
-@test_throws ArgumentError parse_machine("127.0.0.1:0")
-@test_throws ArgumentError parse_machine("127.0.0.1:65536")
-@test_throws ArgumentError parse_machine("[2001:db8::1]:443:888")
-@test_throws ArgumentError parse_machine("[2001:db8::1")
-@test_throws ArgumentError parse_machine("[2001:db8::1]:aaa")
-
-@test occursin(r"^SSHManager\(machines=.*\)$",
-               sprint((t,x) -> show(t, "text/plain", x), SSHManager("127.0.0.1")))
-@test sprint((t,x) -> show(t, "text/plain", x), LocalManager(1, true)) == "LocalManager()"
diff --git a/stdlib/Distributed/test/runtests.jl b/stdlib/Distributed/test/runtests.jl
deleted file mode 100644
index d34d07cc48a21..0000000000000
--- a/stdlib/Distributed/test/runtests.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Run the distributed test outside of the main driver since it needs its own
-# set of dedicated workers.
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
-disttestfile = joinpath(@__DIR__, "distributed_exec.jl")
-
-cmd = `$test_exename $test_exeflags $disttestfile`
-
-if !success(pipeline(cmd; stdout=stdout, stderr=stderr)) && ccall(:jl_running_on_valgrind,Cint,()) == 0
-    error("Distributed test failed, cmd : $cmd")
-end
-
-include("managers.jl")
diff --git a/stdlib/Distributed/test/splitrange.jl b/stdlib/Distributed/test/splitrange.jl
deleted file mode 100644
index 1cb12e1952b7d..0000000000000
--- a/stdlib/Distributed/test/splitrange.jl
+++ /dev/null
@@ -1,35 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test
-using Distributed
-using Distributed: splitrange
-
-@test splitrange(1, 11, 1) == Array{UnitRange{Int64},1}([1:11])
-@test splitrange(0, 10, 1) == Array{UnitRange{Int64},1}([0:10])
-@test splitrange(-1, 9, 1) == Array{UnitRange{Int64},1}([-1:9])
-
-@test splitrange(1, 11, 2) == Array{UnitRange{Int64},1}([1:6,7:11])
-@test splitrange(0, 10, 2) == Array{UnitRange{Int64},1}([0:5,6:10])
-@test splitrange(-1, 9, 2) == Array{UnitRange{Int64},1}([-1:4,5:9])
-
-@test splitrange(1, 11, 3) == Array{UnitRange{Int64},1}([1:4,5:8,9:11])
-@test splitrange(0, 10, 3) == Array{UnitRange{Int64},1}([0:3,4:7,8:10])
-@test splitrange(-1, 9, 3) == Array{UnitRange{Int64},1}([-1:2,3:6,7:9])
-
-@test splitrange(1, 3, 3) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
-@test splitrange(1, 3, 4) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
-@test splitrange(0, 2, 3) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
-@test splitrange(0, 2, 4) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
-@test splitrange(-1, 1, 3) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
-@test splitrange(-1, 1, 4) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :OffsetArrays) || @eval Main @everywhere include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-oa = OffsetArray([123, -345], (-2,))
-
-@everywhere using Test
-@sync @distributed for i in eachindex(oa)
-    @test i ∈ (-1, 0)
-end
diff --git a/stdlib/Distributed/test/topology.jl b/stdlib/Distributed/test/topology.jl
deleted file mode 100644
index fc969323bc587..0000000000000
--- a/stdlib/Distributed/test/topology.jl
+++ /dev/null
@@ -1,143 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Random
-
-pids = addprocs_with_testenv(4; topology="master_worker")
-
-let p1 = pids[1], p2 = pids[2]
-    @test_throws RemoteException remotecall_fetch(()->remotecall_fetch(myid, p2), p1)
-end
-
-function test_worker_counts()
-    # check if the nprocs/nworkers/workers are the same on the remaining workers
-    np=nprocs()
-    nw=nworkers()
-    ws=sort(workers())
-
-    for p in workers()
-        @test (true, true, true) == remotecall_fetch(p, np, nw, ws) do x,y,z
-            (x==nprocs(), y==nworkers(), z==sort(workers()))
-        end
-    end
-end
-
-function remove_workers_and_test()
-    while nworkers() > 0
-        rmprocs(workers()[1])
-        test_worker_counts()
-        if nworkers() == nprocs()
-            break
-        end
-    end
-end
-
-remove_workers_and_test()
-
-# connect even pids to other even pids, odd to odd.
-mutable struct TopoTestManager <: ClusterManager
-    np::Integer
-end
-
-function launch(manager::TopoTestManager, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    cmd = `$exename $exeflags --bind-to $(Distributed.LPROC.bind_addr) --worker`
-    cmd = pipeline(detach(setenv(cmd, dir=dir)))
-    for i in 1:manager.np
-        io = open(cmd, "r+")
-        Distributed.write_cookie(io)
-
-        wconfig = WorkerConfig()
-        wconfig.process = io
-        wconfig.io = io.out
-        wconfig.ident = i
-        wconfig.connect_idents = Vector(i+2:2:manager.np)
-        push!(launched, wconfig)
-    end
-
-    notify(c)
-end
-
-const map_pid_ident=Dict()
-function manage(manager::TopoTestManager, id::Integer, config::WorkerConfig, op::Symbol)
-    if op === :register
-        map_pid_ident[id] = config.ident
-    elseif op === :interrupt
-        kill(config.process, 2)
-    end
-end
-
-addprocs_with_testenv(TopoTestManager(8); topology="custom")
-
-while true
-    if any(x->get(map_pid_ident, x, 0)==0, workers())
-        yield()
-    else
-        break
-    end
-end
-
-let p1, p2
-for p1 in workers()
-    for p2 in workers()
-        i1 = map_pid_ident[p1]
-        i2 = map_pid_ident[p2]
-        if (iseven(i1) && iseven(i2)) || (isodd(i1) && isodd(i2))
-            @test p2 == remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
-        else
-            @test_throws RemoteException remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
-        end
-    end
-end
-end
-
-remove_workers_and_test()
-
-# test `lazy` connection setup
-function def_count_conn()
-    @everywhere function count_connected_workers()
-        count(x -> isa(x, Distributed.Worker) && isdefined(x, :r_stream) && isopen(x.r_stream),
-                Distributed.PGRP.workers)
-    end
-end
-
-addprocs_with_testenv(8)
-def_count_conn()
-
-# Test for 10 random combinations
-wl = workers()
-combinations = []
-while length(combinations) < 10
-    from = rand(wl)
-    to = rand(wl)
-    if from == to || ((from,to) in combinations) || ((to,from) in combinations)
-        continue
-    else
-        push!(combinations, (from,to))
-    end
-end
-
-# Initially only master-worker connections ought to be setup
-expected_num_conns = 8
-let num_conns = sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers()))
-    @test num_conns == expected_num_conns
-end
-
-for (i, (from,to)) in enumerate(combinations)
-    remotecall_wait(topid->remotecall_fetch(myid, topid), from, to)
-    global expected_num_conns += 2    # one connection endpoint on both from and to
-    let num_conns = sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers()))
-        @test num_conns == expected_num_conns
-    end
-end
-
-# With lazy=false, all connections ought to be setup during `addprocs`
-rmprocs(workers())
-addprocs_with_testenv(8; lazy=false)
-def_count_conn()
-@test sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers())) == 64
-
-# Cannot add more workers with a different `lazy` value
-@test_throws ArgumentError addprocs_with_testenv(1; lazy=true)
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index c6db08779e947..bf92107ba1816 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,4 +1,4 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = f97c72fbd726e208a04c53791b35cc34c747569f
+DOWNLOADS_SHA1 = 4e20d029c723199c0b8ea0e2418ff240d25ddaef
 DOWNLOADS_GIT_URL := https://github.com/JuliaLang/Downloads.jl.git
 DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
diff --git a/stdlib/FileWatching/Project.toml b/stdlib/FileWatching/Project.toml
index 1da637fd4259d..5edcfdadd085d 100644
--- a/stdlib/FileWatching/Project.toml
+++ b/stdlib/FileWatching/Project.toml
@@ -1,5 +1,6 @@
 name = "FileWatching"
 uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/FileWatching/docs/src/index.md b/stdlib/FileWatching/docs/src/index.md
index a420d49232345..15d4e39a45117 100644
--- a/stdlib/FileWatching/docs/src/index.md
+++ b/stdlib/FileWatching/docs/src/index.md
@@ -1,11 +1,21 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/FileWatching/docs/src/index.md"
+```
+
 # [File Events](@id lib-filewatching)
 
 ```@docs
-FileWatching.poll_fd
-FileWatching.poll_file
-FileWatching.watch_file
-FileWatching.watch_folder
-FileWatching.unwatch_folder
+poll_fd
+poll_file
+watch_file
+watch_folder
+unwatch_folder
+```
+```@docs
+FileMonitor
+FolderMonitor
+PollingFileWatcher
+FDWatcher
 ```
 
 # Pidfile
diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl
index 2a654547ae6e3..ddaf36dfd33a4 100644
--- a/stdlib/FileWatching/src/FileWatching.jl
+++ b/stdlib/FileWatching/src/FileWatching.jl
@@ -6,7 +6,7 @@ Utilities for monitoring files and file descriptors for events.
 module FileWatching
 
 export
-    # one-shot API (returns results):
+    # one-shot API (returns results, race-y):
     watch_file, # efficient for small numbers of files
     watch_folder, # efficient for large numbers of files
     unwatch_folder,
@@ -22,11 +22,11 @@ export
     trymkpidlock
 
 import Base: @handle_as, wait, close, eventloop, notify_error, IOError,
-    _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
-    iolock_begin, iolock_end, associate_julia_struct, disassociate_julia_struct,
-    preserve_handle, unpreserve_handle, isreadable, iswritable, isopen,
-    |, getproperty, propertynames
-import Base.Filesystem.StatStruct
+    uv_req_data, uv_req_set_data, associate_julia_struct, disassociate_julia_struct,
+    _sizeof_uv_poll, _sizeof_uv_fs, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
+    iolock_begin, iolock_end, preserve_handle, unpreserve_handle,
+    isreadable, iswritable, isopen, |, getproperty, propertynames
+import Base.Filesystem: StatStruct, uv_fs_req_cleanup
 if Sys.iswindows()
     import Base.WindowsRawSocket
 end
@@ -38,13 +38,13 @@ const UV_CHANGE = Int32(2)
 struct FileEvent
     renamed::Bool
     changed::Bool
-    timedout::Bool
+    timedout::Bool # aka canceled
     FileEvent(r::Bool, c::Bool, t::Bool) = new(r, c, t)
 end
 FileEvent() = FileEvent(false, false, true)
 FileEvent(flags::Integer) = FileEvent((flags & UV_RENAME) != 0,
                                       (flags & UV_CHANGE) != 0,
-                                      false)
+                                      iszero(flags))
 |(a::FileEvent, b::FileEvent) =
     FileEvent(a.renamed | b.renamed,
               a.changed | b.changed,
@@ -78,34 +78,183 @@ isreadable(f::FDEvent) = f.readable
 iswritable(f::FDEvent) = f.writable
 |(a::FDEvent, b::FDEvent) = FDEvent(getfield(a, :events) | getfield(b, :events))
 
+# Callback functions
+
+function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
+    t = @handle_as handle FileMonitor
+    lock(t.notify)
+    try
+        if status != 0
+            t.ioerrno = status
+            notify_error(t.notify, _UVError("FileMonitor", status))
+            uvfinalize(t)
+        elseif events != t.events
+            events = t.events |= events
+            notify(t.notify, all=false)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
+    t = @handle_as handle FolderMonitor
+    lock(t.notify)
+    try
+        if status != 0
+            notify_error(t.notify, _UVError("FolderMonitor", status))
+        else
+            fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
+            push!(t.channel, fname => FileEvent(events))
+            notify(t.notify)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
+    t = @handle_as handle _FDWatcher
+    lock(t.notify)
+    try
+        if status != 0
+            notify_error(t.notify, _UVError("FDWatcher", status))
+        else
+            t.events |= events
+            if t.active[1] || t.active[2]
+                if isempty(t.notify)
+                    # if we keep hearing about events when nobody appears to be listening,
+                    # stop the poll to save cycles
+                    t.active = (false, false)
+                    ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
+                end
+            end
+            notify(t.notify, events)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_fspollcb(req::Ptr{Cvoid})
+    pfw = unsafe_pointer_to_objref(uv_req_data(req))::PollingFileWatcher
+    pfw.active = false
+    unpreserve_handle(pfw)
+    @assert pointer(pfw.stat_req) == req
+    r = Int32(ccall(:uv_fs_get_result, Cssize_t, (Ptr{Cvoid},), req))
+    statbuf = ccall(:uv_fs_get_statbuf, Ptr{UInt8}, (Ptr{Cvoid},), req)
+    curr_stat = StatStruct(pfw.file, statbuf, r)
+    uv_fs_req_cleanup(req)
+    lock(pfw.notify)
+    try
+        if !isempty(pfw.notify) # must discard the update if nobody watching
+            if pfw.ioerrno != r || (r == 0 && pfw.prev_stat != curr_stat)
+                if r == 0
+                    pfw.prev_stat = curr_stat
+                end
+                pfw.ioerrno = r
+                notify(pfw.notify, true)
+            end
+            pfw.timer = Timer(pfw.interval) do t
+                # async task
+                iolock_begin()
+                lock(pfw.notify)
+                try
+                    if pfw.timer === t # use identity check to test if this callback is stale by the time we got the lock
+                        pfw.timer = nothing
+                        @assert !pfw.active
+                        if isopen(pfw) && !isempty(pfw.notify)
+                            preserve_handle(pfw)
+                            uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},))
+                            err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                                eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid})
+                            err == 0 || notify(pfw.notify, _UVError("PollingFileWatcher (start)", err), error=true) # likely just ENOMEM
+                            pfw.active = true
+                        end
+                    end
+                finally
+                    unlock(pfw.notify)
+                end
+                iolock_end()
+                nothing
+            end
+        end
+    finally
+        unlock(pfw.notify)
+    end
+    nothing
+end
+
+# Types
+
+"""
+    FileMonitor(path::AbstractString)
+
+Watch file or directory `path` (which must exist) for changes until a change occurs. This
+function does not poll the file system and instead uses platform-specific functionality to
+receive notifications from the operating system (e.g. via inotify on Linux). See the NodeJS
+documentation linked below for details.
+
+`fm = FileMonitor(path)` acts like an auto-reset Event, so `wait(fm)` blocks until there has
+been at least one event in the file originally at the given path and then returns an object
+with boolean fields `renamed`, `changed`, `timedout` summarizing all changes that have
+occurred since the last call to `wait` returned.
+
+This behavior of this function varies slightly across platforms. See
+<https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+"""
 mutable struct FileMonitor
     @atomic handle::Ptr{Cvoid}
-    file::String
-    notify::Base.ThreadSynchronizer
-    events::Int32
-    active::Bool
+    const file::String
+    const notify::Base.ThreadSynchronizer
+    events::Int32 # accumulator for events that occurred since the last wait call, similar to Event with autoreset
+    ioerrno::Int32 # record the error, if any occurs (unlikely)
     FileMonitor(file::AbstractString) = FileMonitor(String(file))
     function FileMonitor(file::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
-        this = new(handle, file, Base.ThreadSynchronizer(), 0, false)
+        this = new(handle, file, Base.ThreadSynchronizer(), 0, 0)
         associate_julia_struct(handle, this)
         iolock_begin()
         err = ccall(:uv_fs_event_init, Cint, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
         if err != 0
             Libc.free(handle)
-            throw(_UVError("FileMonitor", err))
+            uv_error("FileMonitor", err)
         end
-        iolock_end()
         finalizer(uvfinalize, this)
+        uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
+        uv_error("FileMonitor (start)",
+                 ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
+                       this.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, file, 0))
+        iolock_end()
         return this
     end
 end
 
+
+"""
+    FolderMonitor(folder::AbstractString)
+
+Watch a file or directory `path` for changes until a change has occurred. This function does
+not poll the file system and instead uses platform-specific functionality to receive
+notifications from the operating system (e.g. via inotify on Linux). See the NodeJS
+documentation linked below for details.
+
+This acts similar to a Channel, so calling `take!` (or `wait`) blocks until some change has
+occurred. The `wait` function will return a pair where the first field is the name of the
+changed file (if available) and the second field is an object with boolean fields `renamed`
+and `changed`, giving the event that occurred on it.
+
+This behavior of this function varies slightly across platforms. See
+<https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+"""
 mutable struct FolderMonitor
     @atomic handle::Ptr{Cvoid}
     # notify::Channel{Any} # eltype = Union{Pair{String, FileEvent}, IOError}
-    notify::Base.ThreadSynchronizer
-    channel::Vector{Any} # eltype = Pair{String, FileEvent}
+    const notify::Base.ThreadSynchronizer
+    const channel::Vector{Any} # eltype = Pair{String, FileEvent}
     FolderMonitor(folder::AbstractString) = FolderMonitor(String(folder))
     function FolderMonitor(folder::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
@@ -118,6 +267,7 @@ mutable struct FolderMonitor
             throw(_UVError("FolderMonitor", err))
         end
         finalizer(uvfinalize, this)
+        uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
         uv_error("FolderMonitor (start)",
                  ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
                        handle, uv_jl_fseventscb_folder::Ptr{Cvoid}, folder, 0))
@@ -126,36 +276,55 @@ mutable struct FolderMonitor
     end
 end
 
+# this is similar to uv_fs_poll, but strives to avoid the design mistakes that make it unsuitable for any usable purpose
+# https://github.com/libuv/libuv/issues/4543
+"""
+    PollingFileWatcher(path::AbstractString, interval_s::Real=5.007)
+
+Monitor a file for changes by polling `stat` every `interval_s` seconds until a change
+occurs or `timeout_s` seconds have elapsed. The `interval_s` should be a long period; the
+default is 5.007 seconds. Call `stat` on it to get the most recent, but old, result.
+
+This acts like an auto-reset Event, so calling `wait` blocks until the `stat` result has
+changed since the previous value captured upon entry to the `wait` call. The `wait` function
+will return a pair of status objects `(previous, current)` once any `stat` change is
+detected since the previous time that `wait` was called. The `previous` status is always a
+`StatStruct`, but it may have all of the fields zeroed (indicating the file didn't
+previously exist, or wasn't previously accessible).
+
+The `current` status object may be a `StatStruct`, an `EOFError` (if the wait is canceled by
+closing this object), or some other `Exception` subtype (if the `stat` operation failed: for
+example, if the path is removed). Note that `stat` value may be outdated if the file has
+changed again multiple times.
+
+Using [`FileMonitor`](@ref) for this operation is preferred, since it is more reliable and
+efficient, although in some situations it may not be available.
+"""
 mutable struct PollingFileWatcher
-    @atomic handle::Ptr{Cvoid}
     file::String
-    interval::UInt32
-    notify::Base.ThreadSynchronizer
-    active::Bool
-    curr_error::Int32
-    curr_stat::StatStruct
+    interval::Float64
+    const notify::Base.ThreadSynchronizer # lock protects all fields which can be changed (including interval and file, if you really must)
+    timer::Union{Nothing,Timer}
+    const stat_req::Memory{UInt8}
+    active::Bool # whether there is already an uv_fspollcb in-flight, so to speak
+    closed::Bool # whether the user has explicitly destroyed this
+    ioerrno::Int32 # the stat errno as of the last result
+    prev_stat::StatStruct # the stat as of the last successful result
     PollingFileWatcher(file::AbstractString, interval::Float64=5.007) = PollingFileWatcher(String(file), interval)
     function PollingFileWatcher(file::String, interval::Float64=5.007) # same default as nodejs
-        handle = Libc.malloc(_sizeof_uv_fs_poll)
-        this = new(handle, file, round(UInt32, interval * 1000), Base.ThreadSynchronizer(), false, 0, StatStruct())
-        associate_julia_struct(handle, this)
-        iolock_begin()
-        err = ccall(:uv_fs_poll_init, Int32, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
-        if err != 0
-            Libc.free(handle)
-            throw(_UVError("PollingFileWatcher", err))
-        end
-        finalizer(uvfinalize, this)
-        iolock_end()
+        stat_req = Memory{UInt8}(undef, Int(_sizeof_uv_fs))
+        this = new(file, interval, Base.ThreadSynchronizer(), nothing, stat_req, false, false, 0, StatStruct())
+        uv_req_set_data(stat_req, this)
+        wait(this) # initialize with the current stat before return
         return this
     end
 end
 
 mutable struct _FDWatcher
     @atomic handle::Ptr{Cvoid}
-    fdnum::Int # this is NOT the file descriptor
+    const fdnum::Int # this is NOT the file descriptor
     refcount::Tuple{Int, Int}
-    notify::Base.ThreadSynchronizer
+    const notify::Base.ThreadSynchronizer
     events::Int32
     active::Tuple{Bool, Bool}
 
@@ -164,10 +333,13 @@ mutable struct _FDWatcher
         @static if Sys.isunix()
             _FDWatcher(fd::RawFD, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
             function _FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
-                if !readable && !writable
+                fdnum = Core.Intrinsics.bitcast(Int32, fd) + 1
+                if fdnum <= 0
+                    throw(ArgumentError("Passed file descriptor fd=$(fd) is not a valid file descriptor"))
+                elseif !readable && !writable
                     throw(ArgumentError("must specify at least one of readable or writable to create a FDWatcher"))
                 end
-                fdnum = Core.Intrinsics.bitcast(Int32, fd) + 1
+
                 iolock_begin()
                 if fdnum > length(FDWatchers)
                     old_len = length(FDWatchers)
@@ -232,12 +404,19 @@ mutable struct _FDWatcher
     @static if Sys.iswindows()
         _FDWatcher(fd::RawFD, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
         function _FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
+            fdnum = Core.Intrinsics.bitcast(Int32, fd) + 1
+            if fdnum <= 0
+                throw(ArgumentError("Passed file descriptor fd=$(fd) is not a valid file descriptor"))
+            end
+
             handle = Libc._get_osfhandle(fd)
             return _FDWatcher(handle, readable, writable)
         end
         _FDWatcher(fd::WindowsRawSocket, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
         function _FDWatcher(fd::WindowsRawSocket, readable::Bool, writable::Bool)
-            if !readable && !writable
+            if fd == Base.INVALID_OS_HANDLE
+                throw(ArgumentError("Passed file descriptor fd=$(fd) is not a valid file descriptor"))
+            elseif !readable && !writable
                 throw(ArgumentError("must specify at least one of readable or writable to create a FDWatcher"))
             end
 
@@ -264,9 +443,28 @@ mutable struct _FDWatcher
     end
 end
 
+"""
+    FDWatcher(fd::Union{RawFD,WindowsRawSocket}, readable::Bool, writable::Bool)
+
+Monitor a file descriptor `fd` for changes in the read or write availability.
+
+The keyword arguments determine which of read and/or write status should be monitored; at
+least one of them must be set to `true`.
+
+The returned value is an object with boolean fields `readable`, `writable`, and `timedout`,
+giving the result of the polling.
+
+This acts like a level-set event, so calling `wait` blocks until one of those conditions is
+met, but then continues to return without blocking until the condition is cleared (either
+there is no more to read, or no more space in the write buffer, or both).
+
+!!! warning
+    You must call `close` manually, when finished with this object, before the fd
+    argument is closed. Failure to do so risks serious crashes.
+"""
 mutable struct FDWatcher
     # WARNING: make sure `close` has been manually called on this watcher before closing / destroying `fd`
-    watcher::_FDWatcher
+    const watcher::_FDWatcher
     mask::FDEvent
     function FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
         return FDWatcher(fd, FDEvent(readable, writable, false, false))
@@ -290,12 +488,11 @@ end
 
 function getproperty(fdw::FDWatcher, s::Symbol)
     # support deprecated field names
-    s === :readable && return fdw.mask.readable
-    s === :writable && return fdw.mask.writable
+    s === :readable && return getfield(fdw, :mask).readable
+    s === :writable && return getfield(fdw, :mask).writable
     return getfield(fdw, s)
 end
 
-
 close(t::_FDWatcher, mask::FDEvent) = close(t, mask.readable, mask.writable)
 function close(t::_FDWatcher, readable::Bool, writable::Bool)
     iolock_begin()
@@ -317,23 +514,40 @@ function close(t::FDWatcher)
     close(t.watcher, mask)
 end
 
-function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
+function uvfinalize(uv::Union{FileMonitor, FolderMonitor})
     iolock_begin()
-    if uv.handle != C_NULL
-        disassociate_julia_struct(uv) # close (and free) without notify
-        ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), uv.handle)
+    handle = @atomicswap :monotonic uv.handle = C_NULL
+    if handle != C_NULL
+        disassociate_julia_struct(handle) # close (and free) without notify
+        ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), handle)
     end
     iolock_end()
 end
 
-function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
+function close(t::Union{FileMonitor, FolderMonitor})
     iolock_begin()
-    if t.handle != C_NULL
-        ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
+    handle = t.handle
+    if handle != C_NULL
+        ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), handle)
     end
     iolock_end()
 end
 
+function close(pfw::PollingFileWatcher)
+    timer = nothing
+    lock(pfw.notify)
+    try
+        pfw.closed = true
+        notify(pfw.notify, false)
+        timer = pfw.timer
+        pfw.timer = nothing
+    finally
+        unlock(pfw.notify)
+    end
+    timer === nothing || close(timer)
+    nothing
+end
+
 function _uv_hook_close(uv::_FDWatcher)
     # fyi: jl_atexit_hook can cause this to get called too
     Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
@@ -341,24 +555,11 @@ function _uv_hook_close(uv::_FDWatcher)
     nothing
 end
 
-function _uv_hook_close(uv::PollingFileWatcher)
-    lock(uv.notify)
-    try
-        uv.active = false
-        Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
-        notify(uv.notify, StatStruct())
-    finally
-        unlock(uv.notify)
-    end
-    nothing
-end
-
 function _uv_hook_close(uv::FileMonitor)
     lock(uv.notify)
     try
-        uv.active = false
         Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
-        notify(uv.notify, FileEvent())
+        notify(uv.notify)
     finally
         unlock(uv.notify)
     end
@@ -378,174 +579,11 @@ end
 
 isopen(fm::FileMonitor) = fm.handle != C_NULL
 isopen(fm::FolderMonitor) = fm.handle != C_NULL
-isopen(pfw::PollingFileWatcher) = pfw.handle != C_NULL
+isopen(pfw::PollingFileWatcher) = !pfw.closed
 isopen(pfw::_FDWatcher) = pfw.refcount != (0, 0)
 isopen(pfw::FDWatcher) = !pfw.mask.timedout
 
-function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
-    t = @handle_as handle FileMonitor
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FileMonitor", status))
-        else
-            t.events |= events
-            notify(t.notify, FileEvent(events))
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
-    t = @handle_as handle FolderMonitor
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FolderMonitor", status))
-        else
-            fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
-            push!(t.channel, fname => FileEvent(events))
-            notify(t.notify)
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
-    t = @handle_as handle _FDWatcher
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FDWatcher", status))
-        else
-            t.events |= events
-            if t.active[1] || t.active[2]
-                if isempty(t.notify)
-                    # if we keep hearing about events when nobody appears to be listening,
-                    # stop the poll to save cycles
-                    t.active = (false, false)
-                    ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
-                end
-            end
-            notify(t.notify, events)
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_fspollcb(handle::Ptr{Cvoid}, status::Int32, prev::Ptr, curr::Ptr)
-    t = @handle_as handle PollingFileWatcher
-    old_status = t.curr_error
-    t.curr_error = status
-    if status == 0
-        t.curr_stat = StatStruct(convert(Ptr{UInt8}, curr))
-    end
-    if status == 0 || status != old_status
-        prev_stat = StatStruct(convert(Ptr{UInt8}, prev))
-        lock(t.notify)
-        try
-            notify(t.notify, prev_stat)
-        finally
-            unlock(t.notify)
-        end
-    end
-    nothing
-end
-
-function __init__()
-    global uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint))
-    global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Ptr{Cvoid}))
-    global uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
-    global uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
-
-    Base.mkpidlock_hook = mkpidlock
-    Base.trymkpidlock_hook = trymkpidlock
-    Base.parse_pidfile_hook = Pidfile.parse_pidfile
-
-    nothing
-end
-
-function start_watching(t::_FDWatcher)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("FDWatcher is closed"))
-    readable = t.refcount[1] > 0
-    writable = t.refcount[2] > 0
-    if t.active[1] != readable || t.active[2] != writable
-        # make sure the READABLE / WRITEABLE state is updated
-        uv_error("FDWatcher (start)",
-                 ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}),
-                       t.handle,
-                       (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0),
-                       uv_jl_pollcb::Ptr{Cvoid}))
-        t.active = (readable, writable)
-    end
-    iolock_end()
-    nothing
-end
-
-function start_watching(t::PollingFileWatcher)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("PollingFileWatcher is closed"))
-    if !t.active
-        uv_error("PollingFileWatcher (start)",
-                 ccall(:uv_fs_poll_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, UInt32),
-                       t.handle, uv_jl_fspollcb::Ptr{Cvoid}, t.file, t.interval))
-        t.active = true
-    end
-    iolock_end()
-    nothing
-end
-
-function stop_watching(t::PollingFileWatcher)
-    iolock_begin()
-    lock(t.notify)
-    try
-        if t.active && isempty(t.notify)
-            t.active = false
-            uv_error("PollingFileWatcher (stop)",
-                     ccall(:uv_fs_poll_stop, Int32, (Ptr{Cvoid},), t.handle))
-        end
-    finally
-        unlock(t.notify)
-    end
-    iolock_end()
-    nothing
-end
-
-function start_watching(t::FileMonitor)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("FileMonitor is closed"))
-    if !t.active
-        uv_error("FileMonitor (start)",
-                 ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
-                       t.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, t.file, 0))
-        t.active = true
-    end
-    iolock_end()
-    nothing
-end
-
-function stop_watching(t::FileMonitor)
-    iolock_begin()
-    lock(t.notify)
-    try
-        if t.active && isempty(t.notify)
-            t.active = false
-            uv_error("FileMonitor (stop)",
-                     ccall(:uv_fs_event_stop, Int32, (Ptr{Cvoid},), t.handle))
-        end
-    finally
-        unlock(t.notify)
-    end
-    iolock_end()
-    nothing
-end
+Base.stat(pfw::PollingFileWatcher) = Base.checkstat(@lock pfw.notify pfw.prev_stat)
 
 # n.b. this _wait may return spuriously early with a timedout event
 function _wait(fdw::_FDWatcher, mask::FDEvent)
@@ -557,7 +595,20 @@ function _wait(fdw::_FDWatcher, mask::FDEvent)
         if !isopen(fdw) # !open
             throw(EOFError())
         elseif events.timedout
-            start_watching(fdw) # make sure the poll is active
+            fdw.handle == C_NULL && throw(ArgumentError("FDWatcher is closed"))
+            # start_watching to make sure the poll is active
+            readable = fdw.refcount[1] > 0
+            writable = fdw.refcount[2] > 0
+            if fdw.active[1] != readable || fdw.active[2] != writable
+                # make sure the READABLE / WRITEABLE state is updated
+                uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint))
+                uv_error("FDWatcher (start)",
+                         ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}),
+                               fdw.handle,
+                               (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0),
+                               uv_jl_pollcb::Ptr{Cvoid}))
+                fdw.active = (readable, writable)
+            end
             iolock_end()
             return FDEvent(wait(fdw.notify)::Int32)
         else
@@ -625,52 +676,88 @@ end
 
 function wait(pfw::PollingFileWatcher)
     iolock_begin()
-    preserve_handle(pfw)
     lock(pfw.notify)
-    local prevstat
+    prevstat = pfw.prev_stat
+    havechange = false
+    timer = nothing
     try
-        start_watching(pfw)
+        # we aren't too strict about the first interval after `wait`, but rather always
+        # check right away to see if it had immediately changed again, and then repeatedly
+        # after interval again until success
+        pfw.closed && throw(ArgumentError("PollingFileWatcher is closed"))
+        timer = pfw.timer
+        pfw.timer = nothing # disable Timer callback
+        # start_watching
+        if !pfw.active
+            preserve_handle(pfw)
+            uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},))
+            err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid})
+            err == 0 || uv_error("PollingFileWatcher (start)", err) # likely just ENOMEM
+            pfw.active = true
+        end
         iolock_end()
-        prevstat = wait(pfw.notify)::StatStruct
+        havechange = wait(pfw.notify)::Bool
         unlock(pfw.notify)
         iolock_begin()
-        lock(pfw.notify)
-    finally
-        unlock(pfw.notify)
-        unpreserve_handle(pfw)
+    catch
+        # stop_watching: cleanup any timers from before or after starting this wait before it failed, if there are no other watchers
+        latetimer = nothing
+        try
+            if isempty(pfw.notify)
+                latetimer = pfw.timer
+                pfw.timer = nothing
+            end
+        finally
+            unlock(pfw.notify)
+        end
+        if timer !== nothing || latetimer !== nothing
+            iolock_end()
+            timer === nothing || close(timer)
+            latetimer === nothing || close(latetimer)
+            iolock_begin()
+        end
+        rethrow()
     end
-    stop_watching(pfw)
     iolock_end()
-    if pfw.handle == C_NULL
+    timer === nothing || close(timer) # cleanup resources so we don't hang on exit
+    if !havechange # user canceled by calling close
         return prevstat, EOFError()
-    elseif pfw.curr_error != 0
-        return prevstat, _UVError("PollingFileWatcher", pfw.curr_error)
+    end
+    # grab the most up-to-date stat result as of this time, even if it was a bit newer than
+    # the notify call (unlikely, as there would need to be a concurrent call to wait)
+    lock(pfw.notify)
+    currstat = pfw.prev_stat
+    ioerrno = pfw.ioerrno
+    unlock(pfw.notify)
+    if ioerrno == 0
+        @assert currstat.ioerrno == 0
+        return prevstat, currstat
+    elseif ioerrno in (Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL)
+        return prevstat, StatStruct(pfw.file, Ptr{UInt8}(0), ioerrno)
     else
-        return prevstat, pfw.curr_stat
+        return prevstat, _UVError("PollingFileWatcher", ioerrno)
     end
 end
 
 function wait(m::FileMonitor)
-    iolock_begin()
+    m.handle == C_NULL && throw(EOFError())
     preserve_handle(m)
     lock(m.notify)
-    local events
     try
-        start_watching(m)
-        iolock_end()
-        events = wait(m.notify)::FileEvent
-        events |= FileEvent(m.events)
-        m.events = 0
-        unlock(m.notify)
-        iolock_begin()
-        lock(m.notify)
+        while true
+            m.handle == C_NULL && throw(EOFError())
+            events = @atomicswap :not_atomic m.events = 0
+            events == 0 || return FileEvent(events)
+            if m.ioerrno != 0
+                uv_error("FileMonitor", m.ioerrno)
+            end
+            wait(m.notify)
+        end
     finally
         unlock(m.notify)
         unpreserve_handle(m)
     end
-    stop_watching(m)
-    iolock_end()
-    return events
 end
 
 function wait(m::FolderMonitor)
@@ -689,6 +776,7 @@ function wait(m::FolderMonitor)
         end
     return evt::Pair{String, FileEvent}
 end
+Base.take!(m::FolderMonitor) = wait(m) # Channel-like API
 
 
 """
@@ -702,6 +790,10 @@ least one of them must be set to `true`.
 
 The returned value is an object with boolean fields `readable`, `writable`, and `timedout`,
 giving the result of the polling.
+
+This is a thin wrapper over calling `wait` on a [`FDWatcher`](@ref), which implements the
+functionality but requires the user to call `close` manually when finished with it, or risk
+serious crashes.
 """
 function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}}, timeout_s::Real=-1; readable=false, writable=false)
     mask = FDEvent(readable, writable, false, false)
@@ -727,7 +819,7 @@ function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}},
                     end
                 end
             catch ex
-                ex isa EOFError() || rethrow()
+                ex isa EOFError || rethrow()
                 return FDEvent()
             end
         else
@@ -759,6 +851,15 @@ giving the result of watching the file.
 
 This behavior of this function varies slightly across platforms. See
 <https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+
+This is a thin wrapper over calling `wait` on a [`FileMonitor`](@ref). This function has a
+small race window between consecutive calls to `watch_file` where the file might change
+without being detected. To avoid this race, use
+
+    fm = FileMonitor(path)
+    wait(fm)
+
+directly, re-using the same `fm` each time you `wait`.
 """
 function watch_file(s::String, timeout_s::Float64=-1.0)
     fm = FileMonitor(s)
@@ -769,7 +870,12 @@ function watch_file(s::String, timeout_s::Float64=-1.0)
                 close(fm)
             end
         end
-        return wait(fm)
+        try
+            return wait(fm)
+        catch ex
+            ex isa EOFError && return FileEvent()
+            rethrow()
+        end
     finally
         close(fm)
         @isdefined(timer) && close(timer)
@@ -780,7 +886,7 @@ watch_file(s::AbstractString, timeout_s::Real=-1) = watch_file(String(s), Float6
 """
     watch_folder(path::AbstractString, timeout_s::Real=-1)
 
-Watches a file or directory `path` for changes until a change has occurred or `timeout_s`
+Watch a file or directory `path` for changes until a change has occurred or `timeout_s`
 seconds have elapsed. This function does not poll the file system and instead uses platform-specific
 functionality to receive notifications from the operating system (e.g. via inotify on Linux).
 See the NodeJS documentation linked below for details.
@@ -794,10 +900,12 @@ giving the event.
 
 This behavior of this function varies slightly across platforms. See
 <https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+
+This function is a thin wrapper over calling `wait` on a [`FolderMonitor`](@ref), with added timeout support.
 """
 watch_folder(s::AbstractString, timeout_s::Real=-1) = watch_folder(String(s), timeout_s)
 function watch_folder(s::String, timeout_s::Real=-1)
-    fm = get!(watched_folders, s) do
+    fm = @lock watched_folders get!(watched_folders[], s) do
         return FolderMonitor(s)
     end
     local timer
@@ -844,12 +952,12 @@ It is not recommended to do this while another task is waiting for
 """
 unwatch_folder(s::AbstractString) = unwatch_folder(String(s))
 function unwatch_folder(s::String)
-    fm = pop!(watched_folders, s, nothing)
+    fm = @lock watched_folders pop!(watched_folders[], s, nothing)
     fm === nothing || close(fm)
     nothing
 end
 
-const watched_folders = Dict{String, FolderMonitor}()
+const watched_folders = Lockable(Dict{String, FolderMonitor}())
 
 """
     poll_file(path::AbstractString, interval_s::Real=5.007, timeout_s::Real=-1) -> (previous::StatStruct, current)
@@ -863,11 +971,15 @@ The `previous` status is always a `StatStruct`, but it may have all of the field
 (indicating the file didn't previously exist, or wasn't previously accessible).
 
 The `current` status object may be a `StatStruct`, an `EOFError` (indicating the timeout elapsed),
-or some other `Exception` subtype (if the `stat` operation failed - for example, if the path does not exist).
+or some other `Exception` subtype (if the `stat` operation failed: for example, if the path does not exist).
 
-To determine when a file was modified, compare `current isa StatStruct && mtime(prev) != mtime(current)` to detect
-notification of changes. However, using [`watch_file`](@ref) for this operation is preferred, since
-it is more reliable and efficient, although in some situations it may not be available.
+To determine when a file was modified, compare `!(current isa StatStruct && prev == current)` to detect
+notification of changes to the mtime or inode. However, using [`watch_file`](@ref) for this operation
+is preferred, since it is more reliable and efficient, although in some situations it may not be available.
+
+This is a thin wrapper over calling `wait` on a [`PollingFileWatcher`](@ref), which implements
+the functionality, but this function has a small race window between consecutive calls to
+`poll_file` where the file might change without being detected.
 """
 function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::Real=-1)
     pfw = PollingFileWatcher(s, Float64(interval_seconds))
@@ -878,12 +990,7 @@ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::R
                 close(pfw)
             end
         end
-        statdiff = wait(pfw)
-        if isa(statdiff[2], IOError)
-            # file didn't initially exist, continue watching for it to be created (or the error to change)
-            statdiff = wait(pfw)
-        end
-        return statdiff
+        return wait(pfw)
     finally
         close(pfw)
         @isdefined(timer) && close(timer)
@@ -893,4 +1000,11 @@ end
 include("pidfile.jl")
 import .Pidfile: mkpidlock, trymkpidlock
 
+function __init__()
+    Base.mkpidlock_hook = mkpidlock
+    Base.trymkpidlock_hook = trymkpidlock
+    Base.parse_pidfile_hook = Pidfile.parse_pidfile
+    nothing
+end
+
 end
diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl
index 6d40414e20db2..3a3ac7e754817 100644
--- a/stdlib/FileWatching/src/pidfile.jl
+++ b/stdlib/FileWatching/src/pidfile.jl
@@ -4,20 +4,20 @@ module Pidfile
 export mkpidlock, trymkpidlock
 
 using Base:
-    IOError, UV_EEXIST, UV_ESRCH,
-    Process
-
-using Base.Libc: rand
+    IOError, UV_EEXIST, UV_ESRCH, UV_ENOENT,
+    Process,
+    unsafe_takestring
 
 using Base.Filesystem:
     File, open, JL_O_CREAT, JL_O_RDWR, JL_O_RDONLY, JL_O_EXCL,
     rename, samefile, path_separator
 
-using ..FileWatching: watch_file
+using ..FileWatching: FileMonitor
 using Base.Sys: iswindows
 
 """
-    mkpidlock([f::Function], at::String, [pid::Cint, proc::Process]; kwopts...)
+    mkpidlock([f::Function], at::String, [pid::Cint]; kwopts...)
+    mkpidlock(at::String, proc::Process; kwopts...)
 
 Create a pidfile lock for the path "at" for the current process
 or the process identified by pid or proc. Can take a function to execute once locked,
@@ -32,7 +32,8 @@ Optional keyword arguments:
  - `mode`: file access mode (modified by the process umask). Defaults to world-readable.
  - `poll_interval`: Specify the maximum time to between attempts (if `watch_file` doesn't work)
  - `stale_age`: Delete an existing pidfile (ignoring the lock) if it is older than this many seconds, based on its mtime.
-     The file won't be deleted until 25x longer than this if the pid in the file appears that it may be valid.
+     The file won't be deleted until 5x longer than this if the pid in the file appears that it may be valid.
+     Or 25x longer if `refresh` is overridden to 0 to disable lock refreshing.
      By default this is disabled (`stale_age` = 0), but a typical recommended value would be about 3-5x an
      estimated normal completion time.
  - `refresh`: Keeps a lock from becoming stale by updating the mtime every interval of time that passes.
@@ -42,13 +43,13 @@ Optional keyword arguments:
 function mkpidlock end
 
 """
-    trymkpidlock([f::Function], at::String, [pid::Cint, proc::Process]; kwopts...)
+    trymkpidlock([f::Function], at::String, [pid::Cint]; kwopts...)
+    trymkpidlock(at::String, proc::Process; kwopts...)
 
 Like `mkpidlock` except returns `false` instead of waiting if the file is already locked.
 
 !!! compat "Julia 1.10"
     This function requires at least Julia 1.10.
-
 """
 function trymkpidlock end
 
@@ -63,7 +64,7 @@ mutable struct LockMonitor
         atdir, atname = splitdir(at)
         isempty(atdir) && (atdir = pwd())
         at = realpath(atdir) * path_separator * atname
-        fd = open_exclusive(at; stale_age=stale_age, kwopts...)
+        fd = open_exclusive(at; stale_age, refresh, kwopts...)
         update = nothing
         try
             write_pidfile(fd, pid)
@@ -75,6 +76,7 @@ mutable struct LockMonitor
             lock = new(at, fd, update)
             finalizer(close, lock)
         catch ex
+            update === nothing || close(update)
             tryrmopenfile(at)
             close(fd)
             rethrow(ex)
@@ -98,10 +100,13 @@ end
 function mkpidlock(at::String, proc::Process; kwopts...)
     lock = mkpidlock(at, getpid(proc); kwopts...)
     closer = @async begin
-        wait(proc)
-        close(lock)
+        try
+            wait(proc)
+        finally
+            close(lock)
+        end
     end
-    isdefined(Base, :errormonitor) && Base.errormonitor(closer)
+    Base.errormonitor(closer)
     return lock
 end
 
@@ -184,15 +189,16 @@ function isvalidpid(hostname::AbstractString, pid::Cuint)
 end
 
 """
-    stale_pidfile(path::String, stale_age::Real) :: Bool
+    stale_pidfile(path::String, stale_age::Real, refresh::Real) :: Bool
 
 Helper function for `open_exclusive` for deciding if a pidfile is stale.
 """
-function stale_pidfile(path::String, stale_age::Real)
+function stale_pidfile(path::String, stale_age::Real, refresh::Real)
     pid, hostname, age = parse_pidfile(path)
     age < -stale_age && @warn "filesystem time skew detected" path=path
+    longer_factor = refresh == 0 ? 25 : 5
     if age > stale_age
-        if (age > stale_age * 25) || !isvalidpid(hostname, pid)
+        if (age > stale_age * longer_factor) || !isvalidpid(hostname, pid)
             return true
         end
     end
@@ -219,7 +225,7 @@ struct PidlockedError <: Exception
 end
 
 """
-    open_exclusive(path::String; mode, poll_interval, wait, stale_age) :: File
+    open_exclusive(path::String; mode, poll_interval, wait, stale_age, refresh) :: File
 
 Create a new a file for read-write advisory-exclusive access.
 If `wait` is `false` then error out if the lock files exist
@@ -231,13 +237,14 @@ function open_exclusive(path::String;
                         mode::Integer = 0o444 #= read-only =#,
                         poll_interval::Real = 10 #= seconds =#,
                         wait::Bool = true #= return on failure if false =#,
-                        stale_age::Real = 0 #= disabled =#)
+                        stale_age::Real = 0 #= disabled =#,
+                        refresh::Real = stale_age/2)
     # fast-path: just try to open it
     file = tryopen_exclusive(path, mode)
     file === nothing || return file
     if !wait
         if file === nothing && stale_age > 0
-            if stale_age > 0 && stale_pidfile(path, stale_age)
+            if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
                 @warn "attempting to remove probably stale pidfile" path=path
                 tryrmopenfile(path)
             end
@@ -250,20 +257,44 @@ function open_exclusive(path::String;
         end
     end
     # fall-back: wait for the lock
-
+    watch = Lockable(Core.Box(nothing))
     while true
-        # start the file-watcher prior to checking for the pidfile existence
-        t = @async try
-            watch_file(path, poll_interval)
+        # now try again to create it
+        # try to start the file-watcher prior to checking for the pidfile existence
+        watch = try
+            FileMonitor(path)
         catch ex
             isa(ex, IOError) || rethrow(ex)
-            sleep(poll_interval) # if the watch failed, convert to just doing a sleep
+            ex.code != UV_ENOENT # if the file was deleted in the meantime, don't sleep at all, even if the lock fails
         end
-        # now try again to create it
-        file = tryopen_exclusive(path, mode)
-        file === nothing || return file
-        Base.wait(t) # sleep for a bit before trying again
-        if stale_age > 0 && stale_pidfile(path, stale_age)
+        timeout = nothing
+        if watch isa FileMonitor && stale_age > 0
+            let watch = watch
+                timeout = Timer(stale_age) do t
+                    close(watch)
+                end
+            end
+        end
+        try
+            file = tryopen_exclusive(path, mode)
+            file === nothing || return file
+            if watch isa FileMonitor
+                try
+                    Base.wait(watch) # will time-out after stale_age passes
+                catch ex
+                    isa(ex, EOFError) || isa(ex, IOError) || rethrow(ex)
+                end
+            end
+            if watch === true # if the watch failed, convert to just doing a sleep
+                sleep(poll_interval)
+            end
+        finally
+            # something changed about the path, so watch is now possibly monitoring the wrong file handle
+            # it will need to be recreated just before the next tryopen_exclusive attempt
+            timeout isa Timer && close(timeout)
+            watch isa FileMonitor && close(watch)
+        end
+        if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
             # if the file seems stale, try to remove it before attempting again
             # set stale_age to zero so we won't attempt again, even if the attempt fails
             stale_age -= stale_age
@@ -274,12 +305,12 @@ function open_exclusive(path::String;
 end
 
 function _rand_filename(len::Int=4) # modified from Base.Libc
-    slug = Base.StringVector(len)
+    slug = Base.StringMemory(len)
     chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     for i = 1:len
         slug[i] = chars[(Libc.rand() % length(chars)) + 1]
     end
-    return String(slug)
+    return unsafe_takestring(slug)
 end
 
 function tryrmopenfile(path::String)
diff --git a/stdlib/FileWatching/test/pidfile.jl b/stdlib/FileWatching/test/pidfile.jl
index c2cb0c88a1b1e..3464a24175632 100644
--- a/stdlib/FileWatching/test/pidfile.jl
+++ b/stdlib/FileWatching/test/pidfile.jl
@@ -203,18 +203,33 @@ end
 
 @assert !ispath("pidfile")
 @testset "open_exclusive: break lock" begin
-    # test for stale_age
-    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
-    try
-        write_pidfile(f, getpid())
-    finally
+    @testset "using stale_age without lock refreshing" begin
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10, refresh=0)::File
+        try
+            write_pidfile(f, getpid())
+        finally
+            close(f)
+        end
+        @test t < 2
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1, refresh=0)::File
         close(f)
+        @test 20 < t < 50
+        rm("pidfile")
+    end
+
+    @testset "using stale_age with lock refreshing on (default)" begin
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
+        try
+            write_pidfile(f, getpid())
+        finally
+            close(f)
+        end
+        @test t < 2
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=5)::File
+        close(f)
+        @test 20 < t < 50
+        rm("pidfile")
     end
-    @test t < 2
-    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1)::File
-    close(f)
-    @test 20 < t < 50
-    rm("pidfile")
 
     t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
     close(f)
diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl
index 75b17b5f0e511..def555154264d 100644
--- a/stdlib/FileWatching/test/runtests.jl
+++ b/stdlib/FileWatching/test/runtests.jl
@@ -2,6 +2,7 @@
 
 using Test, FileWatching
 using Base: uv_error, Experimental
+using Base.Filesystem: StatStruct
 
 @testset "FileWatching" begin
 
@@ -24,7 +25,7 @@ for i in 1:n
         uv_error("pipe", ccall(:uv_pipe, Cint, (Ptr{NTuple{2, Base.OS_HANDLE}}, Cint, Cint), Ref(pipe_fds, i), 0, 0))
     end
     Ctype = Sys.iswindows() ? Ptr{Cvoid} : Cint
-    FDmax = Sys.iswindows() ? 0x7fff : (n + 60 + (isdefined(Main, :Revise) * 30)) # expectations on reasonable values
+    FDmax = Sys.iswindows() ? typemax(Int32) : (n + 60 + (isdefined(Main, :Revise) * 30)) # expectations on reasonable values
     fd_in_limits =
         0 <= Int(Base.cconvert(Ctype, pipe_fds[i][1])) <= FDmax &&
         0 <= Int(Base.cconvert(Ctype, pipe_fds[i][2])) <= FDmax
@@ -161,19 +162,20 @@ test2_12992()
 #######################################################################
 # This section tests file watchers.                                   #
 #######################################################################
-F_GETPATH = Sys.islinux() || Sys.iswindows() || Sys.isapple()  # platforms where F_GETPATH is available
+F_GETPATH = Sys.islinux() || Sys.iswindows() || Sys.isapple() # platforms where F_GETPATH is available
 F_PATH = F_GETPATH ? "afile.txt" : ""
 dir = mktempdir()
 file = joinpath(dir, "afile.txt")
 
 # initialize a watch_folder instance and create afile.txt
 function test_init_afile()
-    @test isempty(FileWatching.watched_folders)
+    watched_folders = FileWatching.watched_folders
+    @test @lock watched_folders isempty(watched_folders[])
     @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))
     @test @elapsed(@test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))) <= 0.5
-    @test length(FileWatching.watched_folders) == 1
+    @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1
     @test unwatch_folder(dir) === nothing
-    @test isempty(FileWatching.watched_folders)
+    @test @lock watched_folders isempty(watched_folders[])
     @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent())))
     @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent()))) <= 0.5
     @test unwatch_folder(dir) === nothing
@@ -203,7 +205,7 @@ function test_init_afile()
     @test unwatch_folder(dir) === nothing
     @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))
     @test 0.9 <= @elapsed(@test(watch_folder(dir, 1) == ("" => FileWatching.FileEvent())))
-    @test length(FileWatching.watched_folders) == 1
+    @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1
     nothing
 end
 
@@ -218,7 +220,7 @@ function test_timeout(tval)
         @async test_file_poll(channel, 10, tval)
         tr = take!(channel)
     end
-    @test tr[1] === Base.Filesystem.StatStruct() && tr[2] === EOFError()
+    @test ispath(tr[1]::StatStruct) && tr[2] === EOFError()
     @test tval <= t_elapsed
 end
 
@@ -231,7 +233,7 @@ function test_touch(slval)
     write(f, "Hello World\n")
     close(f)
     tr = take!(channel)
-    @test ispath(tr[1]) && ispath(tr[2])
+    @test ispath(tr[1]::StatStruct) && ispath(tr[2]::StatStruct)
     fetch(t)
 end
 
@@ -276,7 +278,7 @@ function test_dirmonitor_wait(tval)
             end
         end
         fname, events = wait(fm)::Pair
-        @test fname == F_PATH
+        @test fname == basename(file)
         @test events.changed && !events.timedout && !events.renamed
         close(fm)
     end
@@ -435,16 +437,21 @@ end
 @test_throws(Base._UVError("FolderMonitor (start)", Base.UV_ENOENT),
              watch_folder("____nonexistent_file", 10))
 @test(@elapsed(
-    @test(poll_file("____nonexistent_file", 1, 3.1) ===
-          (Base.Filesystem.StatStruct(), EOFError()))) > 3)
+    @test(poll_file("____nonexistent_file", 1, 3.1) ==
+          (StatStruct(), EOFError()))) > 3)
 
 unwatch_folder(dir)
-@test isempty(FileWatching.watched_folders)
+@test @lock FileWatching.watched_folders isempty(FileWatching.watched_folders[])
 rm(file)
 rm(dir)
 
+# Test that creating a FDWatcher with a (probably) negative FD fails
+@test_throws ArgumentError FDWatcher(RawFD(-1), true, true)
+
 @testset "Pidfile" begin
     include("pidfile.jl")
 end
 
+@test isempty(Docs.undocumented_names(FileWatching))
+
 end # testset
diff --git a/stdlib/Future/Project.toml b/stdlib/Future/Project.toml
index ffdbaf94b9853..c09489812ce01 100644
--- a/stdlib/Future/Project.toml
+++ b/stdlib/Future/Project.toml
@@ -1,5 +1,6 @@
 name = "Future"
 uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+version = "1.11.0"
 
 [deps]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/stdlib/Future/docs/src/index.md b/stdlib/Future/docs/src/index.md
index dcb1a36541b6e..99250296f2c7d 100644
--- a/stdlib/Future/docs/src/index.md
+++ b/stdlib/Future/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Future/docs/src/index.md"
+```
+
 # Future
 
 The `Future` module implements future behavior of already existing functions,
diff --git a/stdlib/Future/src/Future.jl b/stdlib/Future/src/Future.jl
index 746f6e149a47d..65d230afeb720 100644
--- a/stdlib/Future/src/Future.jl
+++ b/stdlib/Future/src/Future.jl
@@ -28,7 +28,7 @@ copy!(dst::AbstractArray, src::AbstractArray) = Base.copy!(dst, src)
 ## randjump
 
 """
-    randjump(r::MersenneTwister, steps::Integer) -> MersenneTwister
+    randjump(r::MersenneTwister, steps::Integer)::MersenneTwister
 
 Create an initialized `MersenneTwister` object, whose state is moved forward
 (without generating numbers) from `r` by `steps` steps.
diff --git a/stdlib/Future/test/runtests.jl b/stdlib/Future/test/runtests.jl
index 6deffe74d891c..6e02f17358ab3 100644
--- a/stdlib/Future/test/runtests.jl
+++ b/stdlib/Future/test/runtests.jl
@@ -2,3 +2,7 @@
 
 using Test
 using Future
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Future))
+end
diff --git a/stdlib/GMP_jll/Project.toml b/stdlib/GMP_jll/Project.toml
index 510b6f6a49c60..c17e5311a7d80 100644
--- a/stdlib/GMP_jll/Project.toml
+++ b/stdlib/GMP_jll/Project.toml
@@ -1,9 +1,10 @@
 name = "GMP_jll"
 uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
-version = "6.2.1+2"
+version = "6.3.0+2"
 
 [deps]
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 
 [compat]
diff --git a/stdlib/GMP_jll/src/GMP_jll.jl b/stdlib/GMP_jll/src/GMP_jll.jl
index fde2fc15acf90..12a3fc21bd893 100644
--- a/stdlib/GMP_jll/src/GMP_jll.jl
+++ b/stdlib/GMP_jll/src/GMP_jll.jl
@@ -3,51 +3,68 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/GMP_jll.jl
 baremodule GMP_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+if !Sys.isapple()
+    using CompilerSupportLibraries_jll
+end
 
 export libgmp, libgmpxx
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libgmp_handle::Ptr{Cvoid} = C_NULL
+
 libgmp_path::String = ""
-libgmpxx_handle::Ptr{Cvoid} = C_NULL
+const libgmp = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libgmp-10.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libgmp.10.dylib")
+    else
+        BundledLazyLibraryPath("libgmp.so.10")
+    end
+)
+
 libgmpxx_path::String = ""
+const libgmpxx = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libgmpxx-4.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libgmpxx.4.dylib")
+    else
+        BundledLazyLibraryPath("libgmpxx.so.4")
+    end,
+    dependencies = if Sys.isfreebsd()
+        LazyLibrary[libgmp, libgcc_s]
+    elseif Sys.isapple()
+        LazyLibrary[libgmp]
+    else
+        LazyLibrary[libgmp, libstdcxx, libgcc_s]
+    end
+)
 
-if Sys.iswindows()
-    const libgmp = "libgmp-10.dll"
-    const libgmpxx = "libgmpxx-4.dll"
-elseif Sys.isapple()
-    const libgmp = "@rpath/libgmp.10.dylib"
-    const libgmpxx = "@rpath/libgmpxx.4.dylib"
-else
-    const libgmp = "libgmp.so.10"
-    const libgmpxx = "libgmpxx.so.4"
+function eager_mode()
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    dlopen(libgmp)
+    dlopen(libgmpxx)
 end
+is_available() = true
 
 function __init__()
-    global libgmp_handle = dlopen(libgmp)
-    global libgmp_path = dlpath(libgmp_handle)
-    global libgmpxx_handle = dlopen(libgmpxx)
-    global libgmpxx_path = dlpath(libgmpxx_handle)
+    global libgmp_path = string(libgmp.path)
+    global libgmpxx_path = string(libgmpxx.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libgmp_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libgmp_path() = libgmp_path
-get_libgmpxx_path() = libgmpxx_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module GMP_jll
diff --git a/stdlib/GMP_jll/test/runtests.jl b/stdlib/GMP_jll/test/runtests.jl
index 7c0d877945231..ad0e2dc8a4944 100644
--- a/stdlib/GMP_jll/test/runtests.jl
+++ b/stdlib/GMP_jll/test/runtests.jl
@@ -3,6 +3,6 @@
 using Test, Libdl, GMP_jll
 
 @testset "GMP_jll" begin
-    vn = VersionNumber(unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar}))))
-    @test vn == v"6.2.1"
+    vn = VersionNumber(unsafe_string(unsafe_load(cglobal(dlsym(libgmp, :__gmp_version), Ptr{Cchar}))))
+    @test vn == v"6.3.0"
 end
diff --git a/stdlib/InteractiveUtils/Project.toml b/stdlib/InteractiveUtils/Project.toml
index e13902375e005..53cc9218eff5d 100644
--- a/stdlib/InteractiveUtils/Project.toml
+++ b/stdlib/InteractiveUtils/Project.toml
@@ -1,5 +1,6 @@
 name = "InteractiveUtils"
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
 
 [deps]
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
diff --git a/stdlib/InteractiveUtils/docs/src/index.md b/stdlib/InteractiveUtils/docs/src/index.md
index 5ee8e57adc848..69b68a27e4e81 100644
--- a/stdlib/InteractiveUtils/docs/src/index.md
+++ b/stdlib/InteractiveUtils/docs/src/index.md
@@ -1,6 +1,12 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/InteractiveUtils/docs/src/index.md"
+```
+
 # [Interactive Utilities](@id man-interactive-utils)
 
-This module is intended for interactive work. It is loaded automatically in [interactive mode](@ref command-line-interface).
+The `InteractiveUtils` module provides utilities for interactive use of Julia,
+such as code introspection and clipboard access.
+It is intended for interactive work and is loaded automatically in [interactive mode](@ref command-line-interface).
 
 ```@docs
 InteractiveUtils.apropos
@@ -27,5 +33,7 @@ InteractiveUtils.@code_llvm
 InteractiveUtils.code_native
 InteractiveUtils.@code_native
 InteractiveUtils.@time_imports
+InteractiveUtils.@trace_compile
+InteractiveUtils.@trace_dispatch
 InteractiveUtils.clipboard
 ```
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index 25f06250c3f8f..60e2016cf910b 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -1,16 +1,22 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+The `InteractiveUtils` module provides utilities for interactive use of Julia,
+such as code introspection and clipboard access.
+It is intended for interactive work and is loaded automatically in interactive mode.
+"""
 module InteractiveUtils
 
 Base.Experimental.@optlevel 1
 
 export apropos, edit, less, code_warntype, code_llvm, code_native, methodswith, varinfo,
     versioninfo, subtypes, supertypes, @which, @edit, @less, @functionloc, @code_warntype,
-    @code_typed, @code_lowered, @code_llvm, @code_native, @time_imports, clipboard
+    @code_typed, @code_lowered, @code_llvm, @code_native, @time_imports, clipboard, @trace_compile, @trace_dispatch,
+    @activate
 
 import Base.Docs.apropos
 
-using Base: unwrap_unionall, rewrap_unionall, isdeprecated, Bottom, show_unquoted, summarysize,
+using Base: unwrap_unionall, rewrap_unionall, isdeprecated, Bottom, summarysize,
     signature_type, format_bytes
 using Base.Libc
 using Markdown
@@ -23,12 +29,12 @@ include("clipboard.jl")
 """
     varinfo(m::Module=Main, pattern::Regex=r""; all=false, imported=false, recursive=false, sortby::Symbol=:name, minsize::Int=0)
 
-Return a markdown table giving information about exported global variables in a module, optionally restricted
+Return a markdown table giving information about public global variables in a module, optionally restricted
 to those matching `pattern`.
 
 The memory consumption estimate is an approximate lower bound on the size of the internal structure of the object.
 
-- `all` : also list non-exported objects defined in the module, deprecated objects, and compiler-generated objects.
+- `all` : also list non-public objects defined in the module, deprecated objects, and compiler-generated objects.
 - `imported` : also list objects explicitly imported from other modules.
 - `recursive` : recursively include objects in sub-modules, observing the same settings in each.
 - `sortby` : the column to sort results by. Options are `:name` (default), `:size`, and `:summary`.
@@ -47,7 +53,7 @@ function varinfo(m::Module=Base.active_module(), pattern::Regex=r""; all::Bool =
             if !isdefined(m2, v) || !occursin(pattern, string(v))
                 continue
             end
-            value = getfield(m2, v)
+            value = getglobal(m2, v)
             isbuiltin = value === Base || value === Base.active_module() || value === Core
             if recursive && !isbuiltin && isa(value, Module) && value !== m2 && nameof(value) === v && parentmodule(value) === m2
                 push!(workqueue, (value, "$prep$v."))
@@ -96,11 +102,28 @@ See also: [`VERSION`](@ref).
 """
 function versioninfo(io::IO=stdout; verbose::Bool=false)
     println(io, "Julia Version $VERSION")
-    if !isempty(Base.GIT_VERSION_INFO.commit_short)
+    if !isempty(Base.GIT_VERSION_INFO.commit_short_raw)
         println(io, "Commit $(Base.GIT_VERSION_INFO.commit_short) ($(Base.GIT_VERSION_INFO.date_string))")
     end
-    if Base.isdebugbuild()
-        println(io, "DEBUG build")
+    official_release = Base.TAGGED_RELEASE_BANNER == "Official https://julialang.org release"
+    if Base.isdebugbuild() || !isempty(Base.TAGGED_RELEASE_BANNER) || (Base.GIT_VERSION_INFO.tagged_commit && !official_release)
+        println(io, "Build Info:")
+        if Base.isdebugbuild()
+            println(io, "  DEBUG build")
+        end
+        if !isempty(Base.TAGGED_RELEASE_BANNER)
+            println(io, "  ", Base.TAGGED_RELEASE_BANNER)
+        end
+        if Base.GIT_VERSION_INFO.tagged_commit && !official_release
+            println(io,
+                """
+
+                    Note: This is an unofficial build, please report bugs to the project
+                    responsible for this build and not to the Julia project unless you can
+                    reproduce the issue using official builds available at https://julialang.org
+                """
+            )
+        end
     end
     println(io, "Platform Info:")
     println(io, "  OS: ", Sys.iswindows() ? "Windows" : Sys.isapple() ?
@@ -125,7 +148,7 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
     if verbose
         cpuio = IOBuffer() # print cpu_summary with correct alignment
         Sys.cpu_summary(cpuio)
-        for (i, line) in enumerate(split(chomp(String(take!(cpuio))), "\n"))
+        for (i, line) in enumerate(split(chomp(takestring!(cpuio)), "\n"))
             prefix = i == 1 ? "  CPU: " : "       "
             println(io, prefix, line)
         end
@@ -142,9 +165,10 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
         println(io)
     end
     println(io, "  WORD_SIZE: ", Sys.WORD_SIZE)
-    println(io, "  LIBM: ",Base.libm_name)
     println(io, "  LLVM: libLLVM-",Base.libllvm_version," (", Sys.JIT, ", ", Sys.CPU_NAME, ")")
-    println(io, "  Threads: ", Threads.maxthreadid(), " on ", Sys.CPU_THREADS, " virtual cores")
+    println(io, "  GC: ", unsafe_string(ccall(:jl_gc_active_impl, Ptr{UInt8}, ())))
+    println(io, """Threads: $(Threads.nthreads(:default)) default, $(Threads.nthreads(:interactive)) interactive, \
+      $(Threads.ngcthreads()) GC (on $(Sys.CPU_THREADS) virtual cores)""")
 
     function is_nonverbose_env(k::String)
         return occursin(r"^JULIA_|^DYLD_|^LD_", k)
@@ -176,7 +200,7 @@ end
 
 # `methodswith` -- shows a list of methods using the type given
 """
-    methodswith(typ[, module or function]; supertypes::Bool=false])
+    methodswith(typ[, module or function]; supertypes::Bool=false)
 
 Return an array of methods with an argument of type `typ`.
 
@@ -185,6 +209,8 @@ The optional second argument restricts the search to a particular module or func
 
 If keyword `supertypes` is `true`, also return arguments with a parent type of `typ`,
 excluding type `Any`.
+
+See also: [`methods`](@ref).
 """
 function methodswith(@nospecialize(t::Type), @nospecialize(f::Base.Callable), meths = Method[]; supertypes::Bool=false)
     for d in methods(f)
@@ -206,8 +232,8 @@ end
 function _methodswith(@nospecialize(t::Type), m::Module, supertypes::Bool)
     meths = Method[]
     for nm in names(m)
-        if isdefined(m, nm)
-            f = getfield(m, nm)
+        if isdefinedglobal(m, nm)
+            f = getglobal(m, nm)
             if isa(f, Base.Callable)
                 methodswith(t, f, meths; supertypes = supertypes)
             end
@@ -238,8 +264,8 @@ function _subtypes_in!(mods::Array, x::Type)
         m = pop!(mods)
         xt = xt::DataType
         for s in names(m, all = true)
-            if isdefined(m, s) && !isdeprecated(m, s)
-                t = getfield(m, s)
+            if !isdeprecated(m, s) && isdefinedglobal(m, s)
+                t = getglobal(m, s)
                 dt = isa(t, UnionAll) ? unwrap_unionall(t) : t
                 if isa(dt, DataType)
                     if dt.name.name === s && dt.name.module == m && supertype(dt).name == xt.name
@@ -314,7 +340,7 @@ export peakflops
 function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
     # Base.depwarn("`peakflops` has moved to the LinearAlgebra module, " *
     #              "add `using LinearAlgebra` to your imports.", :peakflops)
-    let LinearAlgebra = Base.require(Base.PkgId(
+    let LinearAlgebra = Base.require_stdlib(Base.PkgId(
             Base.UUID((0x37e2e46d_f89d_539d,0xb4ee_838fcccc9c8e)), "LinearAlgebra"))
         return LinearAlgebra.peakflops(n, eltype=eltype, ntrials=ntrials, parallel=parallel)
     end
@@ -329,14 +355,15 @@ function report_bug(kind)
     if Base.locate_package(BugReportingId) === nothing
         @info "Package `BugReporting` not found - attempting temporary installation"
         # Create a temporary environment and add BugReporting
-        let Pkg = Base.require(Base.PkgId(
+        let Pkg = Base.require_stdlib(Base.PkgId(
             Base.UUID((0x44cfe95a_1eb2_52ea,0xb672_e2afdf69b78f)), "Pkg"))
             mktempdir() do tmp
                 old_load_path = copy(LOAD_PATH)
                 push!(empty!(LOAD_PATH), joinpath(tmp, "Project.toml"))
                 old_active_project = Base.ACTIVE_PROJECT[]
                 Base.ACTIVE_PROJECT[] = nothing
-                Pkg.add(Pkg.PackageSpec(BugReportingId.name, BugReportingId.uuid))
+                pkgspec = @invokelatest Pkg.PackageSpec(BugReportingId.name, BugReportingId.uuid)
+                @invokelatest Pkg.add(pkgspec)
                 BugReporting = Base.require(BugReportingId)
                 append!(empty!(LOAD_PATH), old_load_path)
                 Base.ACTIVE_PROJECT[] = old_active_project
@@ -345,7 +372,7 @@ function report_bug(kind)
     else
         BugReporting = Base.require(BugReportingId)
     end
-    return Base.invokelatest(BugReporting.make_interactive_report, kind, ARGS)
+    return @invokelatest BugReporting.make_interactive_report(kind, ARGS)
 end
 
 end
diff --git a/stdlib/InteractiveUtils/src/clipboard.jl b/stdlib/InteractiveUtils/src/clipboard.jl
index c2abda9a60cc3..1cbdff9f45537 100644
--- a/stdlib/InteractiveUtils/src/clipboard.jl
+++ b/stdlib/InteractiveUtils/src/clipboard.jl
@@ -100,7 +100,7 @@ elseif Sys.iswindows()
         pdata == C_NULL && return cleanup(:GlobalAlloc)
         plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), pdata)
         plock == C_NULL && return cleanup(:GlobalLock)
-        GC.@preserve x_u16 memcpy(plock, Base.unsafe_convert(Ptr{UInt16}, x_u16), sizeof(x_u16))
+        GC.@preserve x_u16 memcpy(plock, Base.unsafe_convert(Ptr{UInt16}, Base.cconvert(Ptr{UInt16}, x_u16)), sizeof(x_u16))
         unlock = ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), pdata)
         (unlock == 0 && Libc.GetLastError() == 0) || return cleanup(:GlobalUnlock) # this should never fail
         pset = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint, Ptr{UInt16}), 13, pdata) # CF_UNICODETEXT
@@ -154,7 +154,7 @@ Send a printed form of `x` to the operating system clipboard ("copy").
 clipboard(x)
 
 """
-    clipboard() -> String
+    clipboard()::String
 
 Return a string with the contents of the operating system clipboard ("paste").
 """
diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl
index 646028575d052..fad11c7c3038c 100644
--- a/stdlib/InteractiveUtils/src/codeview.jl
+++ b/stdlib/InteractiveUtils/src/codeview.jl
@@ -20,6 +20,28 @@ const llstyle = Dict{Symbol, Tuple{Bool, Union{Symbol, Int}}}(
     :funcname    => (false, :light_yellow),
 )
 
+struct ArgInfo
+    oc::Union{Core.OpaqueClosure,Nothing}
+    tt::Type{<:Tuple}
+
+    # Construct from a function object + argtypes
+    function ArgInfo(@nospecialize(f), @nospecialize(t))
+        if isa(f, Core.Builtin)
+            throw(ArgumentError("argument is not a generic function"))
+        elseif f isa Core.OpaqueClosure
+            return new(f, Base.to_tuple_type(t))
+        else
+            return new(nothing, signature_type(f, t))
+        end
+    end
+
+    # Construct from argtypes (incl. arg0)
+    function ArgInfo(@nospecialize(argtypes::Union{Tuple,Type{<:Tuple}}))
+        tt = Base.to_tuple_type(argtypes)
+        return new(nothing, tt)
+    end
+end
+
 function printstyled_ll(io::IO, x, s::Symbol, trailing_spaces="")
     printstyled(io, x, bold=llstyle[s][1], color=llstyle[s][2])
     print(io, trailing_spaces)
@@ -54,110 +76,136 @@ function is_expected_union(u::Union)
     return true
 end
 
+function print_warntype_codeinfo(io::IO, src::Core.CodeInfo, @nospecialize(rettype), nargs::Int; lineprinter, label_dynamic_calls)
+    if src.slotnames !== nothing
+        slotnames = Base.sourceinfo_slotnames(src)
+        io = IOContext(io, :SOURCE_SLOTNAMES => slotnames)
+        slottypes = src.slottypes
+        nargs > 0 && println(io, "Arguments")
+        for i = 1:length(slotnames)
+            if i == nargs + 1
+                println(io, "Locals")
+            end
+            print(io, "  ", slotnames[i])
+            if isa(slottypes, Vector{Any})
+                warntype_type_printer(io; type=slottypes[i], used=true)
+            end
+            println(io)
+        end
+    end
+    print(io, "Body")
+    warntype_type_printer(io; type=rettype, used=true)
+    println(io)
+    irshow_config = Base.IRShow.IRShowConfig(lineprinter(src), warntype_type_printer; label_dynamic_calls)
+    Base.IRShow.show_ir(io, src, irshow_config)
+    println(io)
+end
+
+function print_warntype_mi(io::IO, mi::Core.MethodInstance)
+    println(io, mi)
+    print(io, "  from ")
+    println(io, mi.def)
+    if !isempty(mi.sparam_vals)
+        println(io, "Static Parameters")
+        sig = mi.def.sig
+        warn_color = Base.warn_color() # more mild user notification
+        for i = 1:length(mi.sparam_vals)
+            sig = sig::UnionAll
+            name = sig.var.name
+            val = mi.sparam_vals[i]
+            print_highlighted(io::IO, v::String, color::Symbol) =
+                if highlighting[:warntype]
+                    Base.printstyled(io, v; color)
+                else
+                    Base.print(io, v)
+                end
+            if val isa TypeVar
+                if val.lb === Union{}
+                    print(io, "  ", name, " <: ")
+                    print_highlighted(io, "$(val.ub)", warn_color)
+                elseif val.ub === Any
+                    print(io, "  ", sig.var.name, " >: ")
+                    print_highlighted(io, "$(val.lb)", warn_color)
+                else
+                    print(io, "  ")
+                    print_highlighted(io, "$(val.lb)", warn_color)
+                    print(io, " <: ", sig.var.name, " <: ")
+                    print_highlighted(io, "$(val.ub)", warn_color)
+                end
+            elseif val isa typeof(Vararg)
+                print(io, "  ", name, "::")
+                print_highlighted(io, "Int", warn_color)
+            else
+                print(io, "  ", sig.var.name, " = ")
+                print_highlighted(io, "$(val)", :cyan) # show the "good" type
+            end
+            println(io)
+            sig = sig.body
+        end
+    end
+end
+
 """
     code_warntype([io::IO], f, types; debuginfo=:default)
 
 Prints lowered and type-inferred ASTs for the methods matching the given generic function
 and type signature to `io` which defaults to `stdout`. The ASTs are annotated in such a way
-as to cause "non-leaf" types which may be problematic for performance to be emphasized
+as to cause non-concrete types which may be problematic for performance to be emphasized
 (if color is available, displayed in red). This serves as a warning of potential type instability.
 
-Not all non-leaf types are particularly problematic for performance, and the performance
+Not all non-concrete types are particularly problematic for performance, and the performance
 characteristics of a particular type is an implementation detail of the compiler.
 `code_warntype` will err on the side of coloring types red if they might be a performance
 concern, so some types may be colored red even if they do not impact performance.
 Small unions of concrete types are usually not a concern, so these are highlighted in yellow.
 
-Keyword argument `debuginfo` may be one of `:source` or `:none` (default), to specify the verbosity of code comments.
+Keyword argument `debuginfo` may be one of `:source`, `:none` or `:default`, to specify the verbosity of code comments.
+Unless the user changes `Base.IRShow.default_debuginfo[]`, the value `:default` is equivalent to `:source`.
+
+See the [`@code_warntype`](@ref man-code-warntype) section in the Performance Tips page of the manual for more information.
 
-See [`@code_warntype`](@ref man-code-warntype) for more information.
+See also: [`@code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_llvm`](@ref), [`code_native`](@ref).
 """
-function code_warntype(io::IO, @nospecialize(f), @nospecialize(t=Base.default_tt(f));
+function code_warntype(io::IO, arginfo::ArgInfo;
+                       world=Base.get_world_counter(),
+                       interp::Base.Compiler.AbstractInterpreter=Base.Compiler.NativeInterpreter(world),
                        debuginfo::Symbol=:default, optimize::Bool=false, kwargs...)
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
     debuginfo = Base.IRShow.debuginfo(debuginfo)
     lineprinter = Base.IRShow.__debuginfo[debuginfo]
-    for (src, rettype) in code_typed(f, t; optimize, kwargs...)
-        if !(src isa Core.CodeInfo)
-            println(io, src)
-            println(io, "  failed to infer")
-            continue
-        end
-        lambda_io::IOContext = io
-        p = src.parent
-        nargs::Int = 0
-        if p isa Core.MethodInstance
-            println(io, p)
-            print(io, "  from ")
-            println(io, p.def)
-            p.def isa Method && (nargs = p.def.nargs)
-            if !isempty(p.sparam_vals)
-                println(io, "Static Parameters")
-                sig = p.def.sig
-                warn_color = Base.warn_color() # more mild user notification
-                for i = 1:length(p.sparam_vals)
-                    sig = sig::UnionAll
-                    name = sig.var.name
-                    val = p.sparam_vals[i]
-                    print_highlighted(io::IO, v::String, color::Symbol) =
-                        if highlighting[:warntype]
-                            Base.printstyled(io, v; color)
-                        else
-                            Base.print(io, v)
-                        end
-                    if val isa TypeVar
-                        if val.lb === Union{}
-                            print(io, "  ", name, " <: ")
-                            print_highlighted(io, "$(val.ub)", warn_color)
-                        elseif val.ub === Any
-                            print(io, "  ", sig.var.name, " >: ")
-                            print_highlighted(io, "$(val.lb)", warn_color)
-                        else
-                            print(io, "  ")
-                            print_highlighted(io, "$(val.lb)", warn_color)
-                            print(io, " <: ", sig.var.name, " <: ")
-                            print_highlighted(io, "$(val.ub)", warn_color)
-                        end
-                    elseif val isa typeof(Vararg)
-                        print(io, "  ", name, "::")
-                        print_highlighted(io, "Int", warn_color)
-                    else
-                        print(io, "  ", sig.var.name, " = ")
-                        print_highlighted(io, "$(val)", :cyan) # show the "good" type
-                    end
-                    println(io)
-                    sig = sig.body
-                end
-            end
-        end
-        if src.slotnames !== nothing
-            slotnames = Base.sourceinfo_slotnames(src)
-            lambda_io = IOContext(lambda_io, :SOURCE_SLOTNAMES => slotnames)
-            slottypes = src.slottypes
-            nargs > 0 && println(io, "Arguments")
-            for i = 1:length(slotnames)
-                if i == nargs + 1
-                    println(io, "Locals")
-                end
-                print(io, "  ", slotnames[i])
-                if isa(slottypes, Vector{Any})
-                    warntype_type_printer(io; type=slottypes[i], used=true)
-                end
-                println(io)
-            end
+    nargs::Int = 0
+    if arginfo.oc !== nothing
+        (; oc, tt) = arginfo
+        isa(oc.source, Method) && (nargs = oc.source.nargs)
+        print_warntype_codeinfo(io, Base.code_typed_opaque_closure(oc, tt)[1]..., nargs;
+                                lineprinter, label_dynamic_calls = optimize)
+        return nothing
+    end
+    tt = arginfo.tt
+    matches = findall(tt, Base.Compiler.method_table(interp))
+    matches === nothing && Base.raise_match_failure(:code_warntype, tt)
+    for match in matches.matches
+        match = match::Core.MethodMatch
+        src = Base.Compiler.typeinf_code(interp, match, optimize)
+        mi = Base.Compiler.specialize_method(match)
+        mi.def isa Method && (nargs = (mi.def::Method).nargs)
+        print_warntype_mi(io, mi)
+        if src isa Core.CodeInfo
+            print_warntype_codeinfo(io, src, src.rettype, nargs;
+                                    lineprinter, label_dynamic_calls = optimize)
+        else
+            println(io, "  inference not successful")
         end
-        print(io, "Body")
-        warntype_type_printer(io; type=rettype, used=true)
-        println(io)
-        irshow_config = Base.IRShow.IRShowConfig(lineprinter(src), warntype_type_printer)
-        Base.IRShow.show_ir(lambda_io, src, irshow_config)
-        println(io)
     end
     nothing
 end
-code_warntype(@nospecialize(f), @nospecialize(t=Base.default_tt(f)); kwargs...) =
-    code_warntype(stdout, f, t; kwargs...)
+code_warntype(io::IO, @nospecialize(f), @nospecialize(tt=Base.default_tt(f)); kwargs...) = code_warntype(io, ArgInfo(f, tt); kwargs...)
+code_warntype(io::IO, @nospecialize(argtypes::Union{Tuple,Type{<:Tuple}}); kwargs...) = code_warntype(io, ArgInfo(argtypes); kwargs...)
+code_warntype(f; kwargs...) = (@nospecialize; code_warntype(stdout, f; kwargs...))
+code_warntype(f, argtypes; kwargs...) = (@nospecialize; code_warntype(stdout, f, argtypes; kwargs...))
 
-import Base.CodegenParams
+using Base: CodegenParams
 
 const GENERIC_SIG_WARNING = "; WARNING: This code may not match what actually runs.\n"
 const OC_MISMATCH_WARNING =
@@ -168,40 +216,30 @@ const OC_MISMATCH_WARNING =
 
 # Printing code representations in IR and assembly
 
-function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool,
-                        raw::Bool, dump_module::Bool, syntax::Symbol,
-                        optimize::Bool, debuginfo::Symbol, binary::Bool)
-        params = CodegenParams(debug_info_kind=Cint(0),
-                               safepoint_on_entry=raw, gcstack_arg=raw)
-        _dump_function(f, t, native, wrapper, raw, dump_module, syntax,
-                       optimize, debuginfo, binary, params)
-end
-function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool,
+function _dump_function(arginfo::ArgInfo, native::Bool, wrapper::Bool,
                         raw::Bool, dump_module::Bool, syntax::Symbol,
-                        optimize::Bool, debuginfo::Symbol, binary::Bool, params::CodegenParams)
+                        optimize::Bool, debuginfo::Symbol, binary::Bool,
+                        params::CodegenParams=CodegenParams(debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=raw, gcstack_arg=raw))
     ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
-    if isa(f, Core.Builtin)
-        throw(ArgumentError("argument is not a generic function"))
-    end
     warning = ""
     # get the MethodInstance for the method match
-    if !isa(f, Core.OpaqueClosure)
+    if arginfo.oc === nothing
         world = Base.get_world_counter()
-        match = Base._which(signature_type(f, t); world)
-        mi = Core.Compiler.specialize_method(match)
+        match = Base._which(arginfo.tt; world)
+        mi = Base.specialize_method(match)
         # TODO: use jl_is_cacheable_sig instead of isdispatchtuple
         isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING)
     else
-        world = UInt64(f.world)
-        if Core.Compiler.is_source_inferred(f.source.source)
+        (; oc, tt) = arginfo
+        world = UInt64(oc.world)
+        if !isdefined(oc.source, :source)
             # OC was constructed from inferred source. There's only one
             # specialization and we can't infer anything more precise either.
-            world = f.source.primary_world
-            mi = f.source.specializations::Core.MethodInstance
-            Core.Compiler.hasintersect(typeof(f).parameters[1], t) || (warning = OC_MISMATCH_WARNING)
+            world = oc.source.primary_world
+            mi = oc.source.specializations::Core.MethodInstance
+            Base.hasintersect(typeof(oc).parameters[1], tt) || (warning = OC_MISMATCH_WARNING)
         else
-            mi = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), t.parameters...}, Core.svec())
-            actual = isdispatchtuple(mi.specTypes)
+            mi = Base.specialize_method(oc.source, Tuple{typeof(oc.captures), tt.parameters...}, Core.svec())
             isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING)
         end
     end
@@ -215,15 +253,29 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
         if syntax !== :att && syntax !== :intel
             throw(ArgumentError("'syntax' must be either :intel or :att"))
         end
-        if dump_module
-            # we want module metadata, so use LLVM to generate assembly output
-            str = _dump_function_native_assembly(mi, world, wrapper, syntax, debuginfo, binary, raw, params)
-        else
-            # if we don't want the module metadata, just disassemble what our JIT has
+        str = ""
+        if !dump_module
+            # if we don't want the module metadata, attempt to disassemble what our JIT has
             str = _dump_function_native_disassembly(mi, world, wrapper, syntax, debuginfo, binary)
         end
+        if isempty(str)
+            # if that failed (or we want metadata), use LLVM to generate more accurate assembly output
+            if arginfo.oc === nothing
+                src = Base.Compiler.typeinf_code(Base.Compiler.NativeInterpreter(world), mi, true)
+            else
+                src, rt = Base.get_oc_code_rt(nothing, arginfo.oc, arginfo.tt, true)
+            end
+            src isa Core.CodeInfo || error("failed to infer source for $mi")
+            str = _dump_function_native_assembly(mi, src, wrapper, syntax, debuginfo, binary, raw, params)
+        end
     else
-        str = _dump_function_llvm(mi, world, wrapper, !raw, dump_module, optimize, debuginfo, params)
+        if arginfo.oc === nothing
+            src = Base.Compiler.typeinf_code(Base.Compiler.NativeInterpreter(world), mi, true)
+        else
+            src, rt = Base.get_oc_code_rt(nothing, arginfo.oc, arginfo.tt, true)
+        end
+        src isa Core.CodeInfo || error("failed to infer source for $mi")
+        str = _dump_function_llvm(mi, src, wrapper, !raw, dump_module, optimize, debuginfo, params)
     end
     str = warning * str
     return str
@@ -243,11 +295,11 @@ struct LLVMFDump
     f::Ptr{Cvoid} # opaque
 end
 
-function _dump_function_native_assembly(mi::Core.MethodInstance, world::UInt,
+function _dump_function_native_assembly(mi::Core.MethodInstance, src::Core.CodeInfo,
                                         wrapper::Bool, syntax::Symbol, debuginfo::Symbol,
                                         binary::Bool, raw::Bool, params::CodegenParams)
     llvmf_dump = Ref{LLVMFDump}()
-    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump},mi::Any, world::UInt, wrapper::Bool,
+    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, src::Any, wrapper::Bool,
                              true::Bool, params::CodegenParams)::Cvoid
     llvmf_dump[].f == C_NULL && error("could not compile the specified method")
     str = @ccall jl_dump_function_asm(llvmf_dump::Ptr{LLVMFDump}, false::Bool,
@@ -257,12 +309,12 @@ function _dump_function_native_assembly(mi::Core.MethodInstance, world::UInt,
 end
 
 function _dump_function_llvm(
-        mi::Core.MethodInstance, world::UInt, wrapper::Bool,
+        mi::Core.MethodInstance, src::Core.CodeInfo, wrapper::Bool,
         strip_ir_metadata::Bool, dump_module::Bool,
         optimize::Bool, debuginfo::Symbol,
         params::CodegenParams)
     llvmf_dump = Ref{LLVMFDump}()
-    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, world::UInt,
+    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, src::Any,
                              wrapper::Bool, optimize::Bool, params::CodegenParams)::Cvoid
     llvmf_dump[].f == C_NULL && error("could not compile the specified method")
     str = @ccall jl_dump_function_ir(llvmf_dump::Ptr{LLVMFDump}, strip_ir_metadata::Bool,
@@ -280,48 +332,51 @@ If the `optimize` keyword is unset, the code will be shown before LLVM optimizat
 All metadata and dbg.* calls are removed from the printed bitcode. For the full IR, set the `raw` keyword to true.
 To dump the entire module that encapsulates the function (with declarations), set the `dump_module` keyword to true.
 Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
+
+See also: [`@code_llvm`](@ref), [`code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_native`](@ref).
 """
-function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool,
-                   dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default)
-    d = _dump_function(f, types, false, false, raw, dump_module, :intel, optimize, debuginfo, false)
+function code_llvm(io::IO, arginfo::ArgInfo;
+                   raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default,
+                   params::CodegenParams=CodegenParams(debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=raw, gcstack_arg=raw))
+    d = _dump_function(arginfo, false, false, raw, dump_module, :intel, optimize, debuginfo, false, params)
     if highlighting[:llvm] && get(io, :color, false)::Bool
         print_llvm(io, d)
     else
         print(io, d)
     end
 end
-code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
-    code_llvm(io, f, types, raw, dump_module, optimize, debuginfo)
-code_llvm(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw=false, dump_module=false, optimize=true, debuginfo::Symbol=:default) =
-    code_llvm(stdout, f, types; raw, dump_module, optimize, debuginfo)
+code_llvm(io::IO, @nospecialize(argtypes::Union{Tuple,Type{<:Tuple}}); kwargs...) = code_llvm(io, ArgInfo(argtypes); kwargs...)
+code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f)); kwargs...) = code_llvm(io, ArgInfo(f, types); kwargs...)
+code_llvm(args...; kwargs...) = (@nospecialize; code_llvm(stdout, args...; kwargs...))
 
 """
-    code_native([io=stdout,], f, types; syntax=:intel, debuginfo=:default, binary=false, dump_module=true)
+    code_native([io=stdout,], f, types; syntax=:intel, debuginfo=:default, binary=false, dump_module=true, raw=false)
 
 Prints the native assembly instructions generated for running the method matching the given
 generic function and type signature to `io`.
 
 * Set assembly syntax by setting `syntax` to `:intel` (default) for intel syntax or `:att` for AT&T syntax.
-* Specify verbosity of code comments by setting `debuginfo` to `:source` (default) or `:none`.
+* Specify verbosity of code comments by setting `debuginfo` to `:source` (equivalently, `:default`) or `:none`.
 * If `binary` is `true`, also print the binary machine code for each instruction precedented by an abbreviated address.
 * If `dump_module` is `false`, do not print metadata such as rodata or directives.
-* If `raw` is `false`, uninteresting instructions (like the safepoint function prologue) are elided.
+* If `raw` is `false` (default), uninteresting instructions (like the safepoint function prologue) are elided.
 
-See also: [`@code_native`](@ref), [`code_llvm`](@ref), [`code_typed`](@ref) and [`code_lowered`](@ref)
+See also: [`@code_native`](@ref), [`code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_llvm`](@ref).
 """
-function code_native(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f));
+function code_native(io::IO, arginfo::ArgInfo;
                      dump_module::Bool=true, syntax::Symbol=:intel, raw::Bool=false,
-                     debuginfo::Symbol=:default, binary::Bool=false)
-    d = _dump_function(f, types, true, false, raw, dump_module, syntax, true, debuginfo, binary)
+                     debuginfo::Symbol=:default, binary::Bool=false,
+                     params::CodegenParams=CodegenParams(debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=raw, gcstack_arg=raw))
+    d = _dump_function(arginfo, true, false, raw, dump_module, syntax, true, debuginfo, binary, params)
     if highlighting[:native] && get(io, :color, false)::Bool
         print_native(io, d)
     else
         print(io, d)
     end
 end
-code_native(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); dump_module::Bool=true, syntax::Symbol=:intel, raw::Bool=false, debuginfo::Symbol=:default, binary::Bool=false) =
-    code_native(stdout, f, types; dump_module, syntax, raw, debuginfo, binary)
-code_native(::IO, ::Any, ::Symbol) = error("invalid code_native call") # resolve ambiguous call
+code_native(io::IO, @nospecialize(argtypes::Union{Tuple,Type{<:Tuple}}); kwargs...) = code_native(io, ArgInfo(argtypes); kwargs...)
+code_native(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f)); kwargs...) = code_native(io, ArgInfo(f, types); kwargs...)
+code_native(args...; kwargs...) = (@nospecialize; code_native(stdout, args...; kwargs...))
 
 ## colorized IR and assembly printing
 
@@ -345,7 +400,7 @@ const llvm_types =
 const llvm_cond = r"^(?:[ou]?eq|[ou]?ne|[uso][gl][te]|ord|uno)$" # true|false
 
 function print_llvm_tokens(io, tokens)
-    m = match(r"^((?:[^\s:]+:)?)(\s*)(.*)", tokens)
+    m = match(r"^((?:[^\"\s:]+:|\"[^\"]*\":)?)(\s*)(.*)", tokens)
     if m !== nothing
         label, spaces, tokens = m.captures
         printstyled_ll(io, label, :label, spaces)
diff --git a/stdlib/InteractiveUtils/src/editless.jl b/stdlib/InteractiveUtils/src/editless.jl
index 539e9b12f4071..e7ac51a2d2092 100644
--- a/stdlib/InteractiveUtils/src/editless.jl
+++ b/stdlib/InteractiveUtils/src/editless.jl
@@ -77,7 +77,7 @@ already work:
 - pycharm
 - bbedit
 
-# Example:
+# Examples
 
 The following defines the usage of terminal-based `emacs`:
 
@@ -223,7 +223,10 @@ Edit a file or directory optionally providing a line number to edit the file at.
 Return to the `julia` prompt when you quit the editor. The editor can be changed
 by setting `JULIA_EDITOR`, `VISUAL` or `EDITOR` as an environment variable.
 
-See also [`define_editor`](@ref).
+!!! compat "Julia 1.9"
+    The `column` argument requires at least Julia 1.9.
+
+See also [`InteractiveUtils.define_editor`](@ref).
 """
 function edit(path::AbstractString, line::Integer=0, column::Integer=0)
     path isa String || (path = convert(String, path))
@@ -255,7 +258,7 @@ method to edit. For modules, open the main source file. The module needs to be l
     `edit` on modules requires at least Julia 1.1.
 
 To ensure that the file can be opened at the given line, you may need to call
-`define_editor` first.
+`InteractiveUtils.define_editor` first.
 """
 function edit(@nospecialize f)
     ms = methods(f).ms
@@ -266,7 +269,8 @@ function edit(@nospecialize f)
 end
 edit(m::Method) = edit(functionloc(m)...)
 edit(@nospecialize(f), idx::Integer) = edit(methods(f).ms[idx])
-edit(f, t)  = (@nospecialize; edit(functionloc(f, t)...))
+edit(f, t) = (@nospecialize; edit(functionloc(f, t)...))
+edit(@nospecialize argtypes::Union{Tuple, Type{<:Tuple}}) = edit(functionloc(argtypes)...)
 edit(file::Nothing, line::Integer) = error("could not find source file for function")
 edit(m::Module) = edit(pathof(m))
 
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index 53242a422140b..9a749752ef2c7 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -2,9 +2,91 @@
 
 # macro wrappers for various reflection functions
 
-import Base: typesof, insert!, replace_ref_begin_end!, infer_effects
+using Base: insert!, replace_ref_begin_end!,
+    infer_return_type, infer_exception_type, infer_effects, code_ircode, isexpr
+
+# defined in Base so it's possible to time all imports, including InteractiveUtils and its deps
+# via. `Base.@time_imports` etc.
+import Base: @time_imports, @trace_compile, @trace_dispatch
+
+typesof_expr(args::Vector{Any}, where_params::Union{Nothing, Vector{Any}} = nothing) = rewrap_where(:($make_tuple_type(Any[$(Any[esc(reescape(get_typeof, a)) for a in args]...)])), where_params)
+typesof_expr_unescaped(args::Vector{Any}, where_params::Union{Nothing, Vector{Any}} = nothing) = rewrap_where(:($make_tuple_type(Any[$(Any[reescape(get_typeof, a) for a in args]...)])), where_params)
+
+function make_tuple_type(types::Vector{Any})
+    vararg = -1
+    for i in eachindex(types)
+        i == 1 && continue # ignore function type
+        type = types[i]
+        if isa(type, Core.TypeofVararg)
+            vararg !== -1 && throw(ArgumentError("More than one `Core.Vararg` type present in argument tuple ($type detected after $(types[vararg])); if provided, it must be unique"))
+            vararg = i
+            if isdefined(type, :N)
+                n = length(types) - vararg + 1
+                n > type.N && throw(ArgumentError("Expected at most $(type.N) types after `$type`, found $n instead"))
+            end
+        elseif vararg !== -1
+            ref = types[vararg]
+            if isdefined(ref, :T) && !skip_type_check(ref.T) && !skip_type_check(type)
+                !(type <: ref.T) && throw(ArgumentError("Inconsistent type `$type` detected after `$ref`; `$type <: $(ref.T)` must hold"))
+            end
+        end
+    end
+    vararg === -1 && return Tuple{types...}
+    return Tuple{@view(types[1:vararg])...}
+end
+
+skip_type_check(@nospecialize(T)) = Core.has_free_typevars(T)
+
+function extract_where_parameters(ex::Expr)
+    isexpr(ex, :where) || return ex, nothing
+    ex.args[1], ex.args[2:end]
+end
+
+function rewrap_where(ex::Expr, where_params::Union{Nothing, Vector{Any}})
+    isnothing(where_params) && return ex
+    Expr(:where, ex, esc.(where_params)...)
+end
 
-separate_kwargs(args...; kwargs...) = (args, values(kwargs))
+function reescape(f::Function, @nospecialize ex)
+    isa(ex, Expr) || return f(ex)
+    unescaped = Meta.unescape(ex)
+    new = f(unescaped)
+    return Meta.reescape(new, ex)
+end
+
+get_typeof(ex::Ref) = ex[]
+function get_typeof(@nospecialize ex)
+    isexpr(ex, :(::), 1) && return ex.args[1]
+    isexpr(ex, :(::), 2) && return ex.args[2]
+    if isexpr(ex, :..., 1)
+        splatted = ex.args[1]
+        isexpr(splatted, :(::)) && return Expr(:curly, :(Core.Vararg), splatted.args[end])
+        return :(Any[Core.Typeof(x) for x in $splatted]...)
+    end
+    return :(Core.Typeof($ex))
+end
+
+function is_broadcasting_call(ex)
+    isa(ex, Expr) || return false
+    # Standard broadcasting: f.(x)
+    isexpr(ex, :.) && length(ex.args) ≥ 2 && isexpr(ex.args[2], :tuple) && return true
+    # Infix broadcasting: x .+ y, x .<< y, etc.
+    if isexpr(ex, :call)
+        f = ex.args[1]
+        f == :.. && return false
+        string(f)[1] == '.' && return true
+    end
+    return false
+end
+is_broadcasting_expr(ex) = is_broadcasting_call(ex) || is_broadcasting_assignment(ex)
+function is_broadcasting_assignment(ex)
+    isa(ex, Expr) || return false
+    isexpr(ex, :.) && return false
+    head = string(ex.head)
+    # x .= y, x .+= y, x .<<= y, etc.
+    head[begin] == '.' && head[end] == '=' && return true
+    return false
+end
 
 """
 Transform a dot expression into one where each argument has been replaced by a
@@ -12,108 +94,395 @@ variable "xj" (with j an integer from 1 to the returned i).
 The list `args` contains the original arguments that have been replaced.
 """
 function recursive_dotcalls!(ex, args, i=1)
-    if !(ex isa Expr) || ((ex.head !== :. || !(ex.args[2] isa Expr)) &&
-                          (ex.head !== :call || string(ex.args[1])[1] != '.'))
-        newarg = Symbol('x', i)
-        if Meta.isexpr(ex, :...)
-            push!(args, only(ex.args))
-            return Expr(:..., newarg), i+1
+    if is_broadcasting_expr(ex)
+        if is_broadcasting_assignment(ex)
+            (start, branches) = (1, ex.args)
+        elseif isexpr(ex, :.)
+            (start, branches) = (1, ex.args[2].args)
+        else
+            (start, branches) = (2, ex.args)
+        end
+        for j in start:length(branches)::Int
+            branch, i = recursive_dotcalls!(branches[j], args, i)
+            branches[j] = branch
+        end
+        return ex, i
+    elseif isexpr(ex, :parameters)
+        for j in eachindex(ex.args)
+            param, i = recursive_dotcalls!(ex.args[j], args, i)
+            ex.args[j] = param
+        end
+        return ex, i
+    end
+    newarg = Symbol('x', i)
+    if isexpr(ex, :...)
+        newarg = Expr(:..., newarg)
+        push!(args, only(ex.args))
+    elseif isexpr(ex, :kw)
+        newarg = Expr(:kw, ex.args[1], newarg)
+        push!(args, ex.args[end])
+    else
+        push!(args, ex)
+    end
+    return newarg, i+1
+end
+
+function extract_farg(@nospecialize arg)
+    !isexpr(arg, :(::), 1) && return arg
+    fT = arg.args[1]
+    :($construct_callable($fT))
+end
+
+function construct_callable(@nospecialize(func::Type))
+    # Support function singleton types such as `(::typeof(f))(args...)`
+    Base.issingletontype(func) && isdefined(func, :instance) && return func.instance
+    # Don't support type annotations otherwise, we don't want to give wrong answers
+    # for callables such as `(::Returns{Int})(args...)` where using `Returns{Int}`
+    # would give us code for the constructor, not for the callable object.
+    throw(ArgumentError("If the function type is explicitly provided via a type annotation, it must be a singleton whose only instance is the callable object.
+                         To remove this restriction, the reflection macro must set `use_signature_tuple = true` if the reflection function supports a single signature tuple type argument, such as `Tuple{typeof(f), argtypes...}`"))
+end
+
+function separate_kwargs(exs::Vector{Any})
+    args = []
+    kwargs = []
+    for ex in exs
+        if isexpr(ex, :kw)
+            push!(kwargs, ex)
+        elseif isexpr(ex, :parameters)
+            for kw in ex.args
+                push!(kwargs, kw)
+            end
         else
             push!(args, ex)
-            return newarg, i+1
         end
     end
-    (start, branches) = ex.head === :. ? (1, ex.args[2].args) : (2, ex.args)
-    length_branches = length(branches)::Int
-    for j in start:length_branches
-        branch, i = recursive_dotcalls!(branches[j], args, i)
-        branches[j] = branch
+    args, kwargs
+end
+
+function are_kwargs_valid(kwargs::Vector{Any})
+    for kwarg in kwargs
+        isexpr(kwarg, :..., 1) && continue
+        isexpr(kwarg, :kw, 2) && isa(kwarg.args[1], Symbol) && continue
+        isexpr(kwarg, :(::), 2) && continue
+        isa(kwarg, Symbol) && continue
+        isexpr(kwarg, :escape) && continue
+        isexpr(kwarg, :var"hygienic-scope") && continue
+        return false
+    end
+    return true
+end
+
+# Generate an expression that merges `kwargs` onto a single `NamedTuple`
+function generate_merged_namedtuple_type(kwargs::Vector{Any})
+    nts = Any[]
+    ntargs = Pair{Symbol, Any}[]
+    for ex in kwargs
+        if isexpr(ex, :..., 1)
+            if !isempty(ntargs)
+                # Construct a `NamedTuple` containing the previous parameters.
+                push!(nts, generate_namedtuple_type(ntargs))
+                empty!(ntargs)
+            end
+            push!(nts, Expr(:call, typeof_nt, ex.args[1]))
+        elseif isexpr(ex, :kw, 2)
+            push!(ntargs, ex.args[1]::Symbol => reescape(get_typeof, ex.args[2]))
+        elseif isexpr(ex, :(::), 2)
+            push!(ntargs, ex.args[1]::Symbol => reescape(get_typeof, ex))
+        else
+            push!(ntargs, ex => reescape(get_typeof, ex))
+        end
+    end
+    !isempty(ntargs) && push!(nts, generate_namedtuple_type(ntargs))
+    return :($merge_namedtuple_types($(nts...)))
+end
+
+function generate_namedtuple_type(ntargs::Vector{Pair{Symbol, Any}})
+    names = Expr(:tuple)
+    tt = Expr(:curly, :Tuple)
+    for (name, type) in ntargs
+        push!(names.args, QuoteNode(name))
+        push!(tt.args, type)
+    end
+    return :(NamedTuple{$names, $tt})
+end
+
+typeof_nt(nt::NamedTuple) = typeof(nt)
+typeof_nt(nt::Base.Pairs) = typeof(values(nt))
+
+function merge_namedtuple_types(nt::Type{<:NamedTuple}, nts::Type{<:NamedTuple}...)
+    @nospecialize
+    isempty(nts) && return nt
+    names = Symbol[]
+    types = Any[]
+    for nt in (nt, nts...)
+        for (name, type) in zip(fieldnames(nt), fieldtypes(nt))
+            i = findfirst(==(name), names)
+            if isnothing(i)
+                push!(names, name)
+                push!(types, type)
+            else
+                types[i] = type
+            end
+        end
+    end
+    return NamedTuple{Tuple(names), Tuple{types...}}
+end
+
+function gen_call(fcn, args, where_params, kws; use_signature_tuple::Bool, not_an_opaque_closure::Bool = true)
+    f, args... = args
+    args = collect(Any, args)
+    if !use_signature_tuple
+        f = esc(reescape(extract_farg, f))
+        tt = typesof_expr(args, where_params)
+        return :($fcn($f, $tt; $(kws...)))
+    end
+    # We use a signature tuple only if we are sure we won't get an opaque closure as first argument.
+    # If we do get one, we have to use the 2-argument form.
+    if isexpr(f, :(::)) || not_an_opaque_closure
+        # We have a type, not a value, so not an opaque closure.
+        sigt = typesof_expr(Any[f, args...], where_params)
+        return :($fcn($sigt; $(kws...)))
+    end
+    tt = typesof_expr(args, where_params)
+    sigt = typesof_expr_unescaped(Any[:f, esc.(args)...], where_params)
+    return quote
+        f = $(esc(f))
+        if isa(f, Core.OpaqueClosure)
+            $fcn(f, $tt; $(kws...))
+        else
+            $fcn($sigt; $(kws...))
+        end
     end
-    return ex, i
 end
 
-function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
-    if Meta.isexpr(ex0, :ref)
-        ex0 = replace_ref_begin_end!(ex0)
+function expand_ref_begin_end!(f::Function, ex, __module__::Module)
+    arr = ex.args[1]
+    args = copy(ex.args)
+    new = replace_ref_begin_end!(__module__, ex)
+    modified = ex.args .≠ args
+    if any(modified) && (isexpr(arr, :(::), 1) || isexpr(arr, :(::), 2) || isexpr(arr, :..., 1))
+        return Expr(:call, :error, "`begin` or `end` cannot be used with a type-annotated left-hand side argument for an indexing syntax")
+    end
+    call = f(ex)
+    !any(modified) && return call
+    fixup_hygiene_for_ref_temporary!(new)
+    # We have to mutate `ex`, then return `new` which evaluates `arr` before use.
+    ex.head = call.head
+    ex.args = call.args
+    return new
+end
+
+function fixup_hygiene_for_ref_temporary!(ex)
+    # Match the local variable `##S#...` so we may escape its definition.
+    # We don't want to use `escs = 1` in `replace_ref_begin_end_!` because
+    # then we delegate escaping to this function, whereas we otherwise manage
+    # ourselves the escaping in all other code paths.
+    isexpr(ex, :block) || return
+    decl = ex.args[1]
+    isexpr(decl, :local, 1) || return
+    assignment = decl.args[1]
+    isexpr(assignment, :(=), 2) || return
+    variable = assignment.args[1]
+    startswith(string(variable), "##S#") || return
+    decl.args[1] = esc(assignment)
+end
+
+is_code_macro(fcn) = startswith(string(fcn), "code_")
+
+"""
+    gen_call_with_extracted_types(__module__, fcn, ex, kws = Expr[]; is_source_reflection = !is_code_macro(fcn), supports_binding_reflection = false, use_signature_tuple = false)
+
+Destructures the input expression `ex` into a function call or a binding access, then generates a call to either:
+- `fcn(f, tt; kws...)`
+- `fcn(sigt; kws...)` # if `use_signature_tuple = true`
+- `fcn(mod, name; kws...)` # if `supports_binding_reflection = true`
+
+## `fcn` API requirements
+
+`fcn` is a user function expected to satisfy the following API:
+- `fcn(f, tt)`: `f` is a value (such as `sum`, unlike `typeof(sum)`), and `tt := Tuple{argtypes...}`
+  is a `Tuple` holding argument types. `f` may be a `Core.OpaqueClosure`.
+
+If `use_signature_tuple = true`:
+- `fcn(sigt)`: `sigt := Tuple{typeof(f), argtypes...}` represents the low-level signature tuple to be used for introspection.
+
+If `supports_binding_reflection = true`:
+- `fcn(mod::Module, name::Symbol)`: `name` is the name of a binding that may or may not exist in `mod`.
+
+!!! warning
+    This function is not public and may be subject to breaking changes. However, we recognize that it may
+    be very convenient for macro developers, and as it is already used by a certain number of packages,
+    we will do our best to avoid breakages.
+
+## Examples
+
+Here are a few usage patterns that may help you get started.
+
+For most "code" macros (`@code_typed`, `@code_llvm`, `@code_native` etc):
+```julia
+    gen_call_with_extracted_types(__module__, fcn, ex, kws; is_source_reflection = false, use_signature_tuple = true #= may be false =#)
+```
+
+For source reflection macros (`@which`, `@edit`, `@less` etc):
+```julia
+    gen_call_with_extracted_types(__module__, fcn, ex, kws; is_source_reflection = true, use_signature_tuple = true #= may be false =#)
+```
+
+# Extended help
+
+## Type annotations
+
+Type annotations may be used instead of concrete values for the callable or for any of the arguments. The generated code
+will directly use the right-hand side of the type annotation instead of extracting the type of a value at runtime.
+
+This is particularly useful for callable objects (notably, for those that are hard to construct by hand on the spot),
+or when wanting to provide a type that is not concrete. However, support for callable objects requires setting
+`use_signature_tuple` to true, which is not a default (see the corresponding section below).
+
+Constraints on type parameters are also supported with a `where` syntax, enabling these patterns:
+- `f(x::Vector{T}, y::T) where {T}`
+- `(::Returns{T})() where {T<:Real}`
+- `(::MyPolynomial{N,T})(::T, ::AbstractArray{T,N}) where {N,T}`
+
+Type-annotated expressions may be mixed with runtime values, as in `x + ::Float64`.
+
+## Broadcasting
+
+When `ex` is a broadcasting expression (a broadcasted assignment `a .+= b` or a broadcasted function call `a .+ b`),
+there is no actual function that corresponds to this expression because lowering maps it to more than one call.
+
+If `is_source_reflection` is true, we assume that `fcn` uses provenance information (e.g. used by `@edit` to go
+to a source location, or `@which` to get the method matching the input). In this case, we don't have a clear
+semantic source to give (shall it be `broadcasted`, or `materialize`, or something else?), so we return a throwing
+expression.
+
+However, if provenance is not of interest, we define an intermediate function on the spot that performs the broadcast,
+then carry on using this function. For example, for the input expression `a .+ b`, we emit the anonymous function
+`(a, b) -> a .+ b` then call `fcn` just as if the user had issued a call to this anonymous function. That should be the
+desired behavior for most macros that want to map an expression to the corresponding generated code, as in `@code_typed`
+or `@code_llvm` for instance.
+
+## Binding reflection
+
+Expressions of the form `a.b` (or `a.b.c` and so on) are by default interpreted as calls to `getproperty`.
+However, if the value corresponding to the left-hand side (`a`, `a.b`, etc) is a module, some implementations
+may instead be interested in the binding lookup, instead of the function call. If that is the case,
+`supports_binding_reflection` may be set to `true` which will emit a call to `fcn(a, :b)` (or `fcn(a.b, :c)` etc).
+
+## Tuple signature type
+
+If `use_signature_tuple = true`, then a single tuple consisting of `Tuple{ft, argtypes...}` will be formed
+and provided to `fcn`. `fcn` is then expected to use `ft` as the callable type with no further transformation.
+
+This behavior is required to enable support type-annotated callable objects.
+
+To understand this requirement, we'll use `code_typed` as an example. `code_typed(f, ())` interprets its input as the signature
+`Tuple{typeof(f)}`, and `code_typed(Returns{Int}, ())` interprets that as the signature `Tuple{Type{Returns{Int}}}`, corresponding
+to the type constructor.
+To remove the ambiguity, `code_typed` must support an implementation that directly accepts a function type. This implementation
+is assumed to be the method for `fcn(sigt::Type{<:Tuple})`.
+"""
+function gen_call_with_extracted_types(__module__, fcn, ex0, kws = Expr[]; is_source_reflection = !is_code_macro(fcn), supports_binding_reflection = false, use_signature_tuple = false)
+    # Ignore assignments (e.g. `@edit a = f(x)` gets turned into `@edit f(x)`)
+    if isa(ex0, Expr) && ex0.head === :(=) && isa(ex0.args[1], Symbol)
+        return gen_call_with_extracted_types(__module__, fcn, ex0.args[2], kws; is_source_reflection, supports_binding_reflection, use_signature_tuple)
+    end
+    where_params = nothing
+    if isa(ex0, Expr)
+        ex0, where_params = extract_where_parameters(ex0)
     end
     if isa(ex0, Expr)
-        if ex0.head === :do && Meta.isexpr(get(ex0.args, 1, nothing), :call)
+        if ex0.head === :do && isexpr(get(ex0.args, 1, nothing), :call)
+            # Normalize `f(args...) do ... end` calls to `f(do_anonymous_function, args...)`
             if length(ex0.args) != 2
                 return Expr(:call, :error, "ill-formed do call")
             end
-            i = findlast(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex0.args[1].args)
+            i = findlast(@nospecialize(a)->(isexpr(a, :kw) || isexpr(a, :parameters)), ex0.args[1].args)
             args = copy(ex0.args[1].args)
             insert!(args, (isnothing(i) ? 2 : 1+i::Int), ex0.args[2])
             ex0 = Expr(:call, args...)
         end
-        if ex0.head === :. || (ex0.head === :call && ex0.args[1] !== :.. && string(ex0.args[1])[1] == '.')
-            codemacro = startswith(string(fcn), "code_")
-            if codemacro && (ex0.head === :call || ex0.args[2] isa Expr)
-                # Manually wrap a dot call in a function
-                args = Any[]
-                ex, i = recursive_dotcalls!(copy(ex0), args)
-                xargs = [Symbol('x', j) for j in 1:i-1]
-                dotfuncname = gensym("dotfunction")
-                dotfuncdef = Expr(:local, Expr(:(=), Expr(:call, dotfuncname, xargs...), ex))
-                return quote
-                    $(esc(dotfuncdef))
-                    local args = $typesof($(map(esc, args)...))
-                    $(fcn)($(esc(dotfuncname)), args; $(kws...))
+        if is_broadcasting_expr(ex0) && !is_source_reflection
+            # Manually wrap top-level broadcasts in a function.
+            # We don't do that if `fcn` reflects into the source,
+            # because that destroys provenance information.
+            args = Any[]
+            ex, i = recursive_dotcalls!(copy(ex0), args)
+            xargs = [Symbol('x', j) for j in 1:i-1]
+            dotfuncname = gensym("dotfunction")
+            call = gen_call(fcn, Any[dotfuncname, args...], where_params, kws; use_signature_tuple)
+            return quote
+                let $(esc(:($dotfuncname($(xargs...)) = $ex)))
+                    $call
                 end
-            elseif !codemacro
-                fully_qualified_symbol = true # of the form A.B.C.D
-                ex1 = ex0
-                while ex1 isa Expr && ex1.head === :.
-                    fully_qualified_symbol = (length(ex1.args) == 2 &&
-                                              ex1.args[2] isa QuoteNode &&
-                                              ex1.args[2].value isa Symbol)
-                    fully_qualified_symbol || break
-                    ex1 = ex1.args[1]
+            end
+        elseif isexpr(ex0, :.) && is_source_reflection
+            # If `ex0` has the form A.B (or some chain A.B.C.D) and `fcn` reflects into the source,
+            # `A` (or `A.B.C`) may be a module, in which case `fcn` is probably more interested in
+            # the binding rather than the `getproperty` call.
+            # If binding reflection is not supported, we generate an error; `getproperty(::Module, field)`
+            # is not going to be interesting to reflect into, so best to allow future non-breaking support
+            # for binding reflection in case the macro may eventually support that.
+            fully_qualified_symbol = true
+            ex1 = ex0
+            while ex1 isa Expr && ex1.head === :.
+                fully_qualified_symbol = (length(ex1.args) == 2 &&
+                                            ex1.args[2] isa QuoteNode &&
+                                            ex1.args[2].value isa Symbol)
+                fully_qualified_symbol || break
+                ex1 = ex1.args[1]
+            end
+            fully_qualified_symbol &= ex1 isa Symbol
+            if fully_qualified_symbol || isexpr(ex1, :(::), 1)
+                call_reflection = gen_call(fcn, [getproperty; ex0.args], where_params, kws; use_signature_tuple)
+                isexpr(ex0.args[1], :(::), 1) && return call_reflection
+                if supports_binding_reflection
+                    binding_reflection = :($fcn(arg1, $(ex0.args[2]); $(kws...)))
+                else
+                    binding_reflection = :(error("expression is not a function call"))
                 end
-                fully_qualified_symbol &= ex1 isa Symbol
-                if fully_qualified_symbol
-                    return quote
-                        local arg1 = $(esc(ex0.args[1]))
-                        if isa(arg1, Module)
-                            $(if string(fcn) == "which"
-                                  :(which(arg1, $(ex0.args[2])))
-                              else
-                                  :(error("expression is not a function call"))
-                              end)
-                        else
-                            local args = $typesof($(map(esc, ex0.args)...))
-                            $(fcn)(Base.getproperty, args)
-                        end
+                return quote
+                    local arg1 = $(esc(ex0.args[1]))
+                    if isa(arg1, Module)
+                        $binding_reflection
+                    else
+                        $call_reflection
                     end
-                else
-                    return Expr(:call, :error, "dot expressions are not lowered to "
-                                * "a single function call, so @$fcn cannot analyze "
-                                * "them. You may want to use Meta.@lower to identify "
-                                * "which function call to target.")
                 end
             end
         end
-        if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex0.args)
-            return quote
-                local arg1 = $(esc(ex0.args[1]))
-                local args, kwargs = $separate_kwargs($(map(esc, ex0.args[2:end])...))
-                $(fcn)(Core.kwcall,
-                       Tuple{typeof(kwargs), Core.Typeof(arg1), map(Core.Typeof, args)...};
-                       $(kws...))
+        if is_broadcasting_expr(ex0)
+            return Expr(:call, :error, "dot expressions are not lowered to "
+                * "a single function call, so @$fcn cannot analyze "
+                * "them. You may want to use Meta.@lower to identify "
+                * "which function call to target.")
+        end
+        if any(@nospecialize(a)->(isexpr(a, :kw) || isexpr(a, :parameters)), ex0.args)
+            args, kwargs = separate_kwargs(ex0.args)
+            are_kwargs_valid(kwargs) || return quote
+                error("keyword argument format unrecognized; they must be of the form `x` or `x = <value>`")
+                $(esc(ex0)) # trigger syntax errors if any
             end
+            nt = generate_merged_namedtuple_type(kwargs)
+            nt = Ref(nt) # ignore `get_typeof` handling
+            return gen_call(fcn, Any[Core.kwcall, nt, args...], where_params, kws; use_signature_tuple)
         elseif ex0.head === :call
-            return Expr(:call, fcn, esc(ex0.args[1]),
-                        Expr(:call, typesof, map(esc, ex0.args[2:end])...),
-                        kws...)
+            args = copy(ex0.args)
+            if ex0.args[1] === :^ && length(ex0.args) >= 3 && isa(ex0.args[3], Int)
+                pushfirst!(args, Base.literal_pow)
+                args[4] = :(Val($(ex0.args[3])))
+            end
+            return gen_call(fcn, args, where_params, kws; use_signature_tuple, not_an_opaque_closure = false)
         elseif ex0.head === :(=) && length(ex0.args) == 2
             lhs, rhs = ex0.args
             if isa(lhs, Expr)
                 if lhs.head === :(.)
-                    return Expr(:call, fcn, Base.setproperty!,
-                                Expr(:call, typesof, map(esc, lhs.args)..., esc(rhs)), kws...)
+                    return gen_call(fcn, Any[Base.setproperty!, lhs.args..., rhs], where_params, kws; use_signature_tuple)
                 elseif lhs.head === :ref
-                    return Expr(:call, fcn, Base.setindex!,
-                                Expr(:call, typesof, esc(lhs.args[1]), esc(rhs), map(esc, lhs.args[2:end])...), kws...)
+                    return expand_ref_begin_end!(lhs, __module__) do ex
+                        gen_call(fcn, Any[setindex!, ex.args[1], rhs, ex.args[2:end]...], where_params, kws; use_signature_tuple)
+                    end
                 end
             end
         elseif ex0.head === :vcat || ex0.head === :typed_vcat
@@ -124,57 +493,43 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
                 f, hf = Base.typed_vcat, Base.typed_hvcat
                 args = ex0.args[2:end]
             end
-            if any(a->isa(a,Expr) && a.head === :row, args)
+            if any(@nospecialize(a)->isa(a,Expr) && a.head === :row, args)
                 rows = Any[ (isa(x,Expr) && x.head === :row ? x.args : Any[x]) for x in args ]
                 lens = map(length, rows)
-                return Expr(:call, fcn, hf,
-                            Expr(:call, typesof,
-                                 (ex0.head === :vcat ? [] : Any[esc(ex0.args[1])])...,
-                                 Expr(:tuple, lens...),
-                                 map(esc, vcat(rows...))...), kws...)
+                args = Any[Expr(:tuple, lens...); vcat(rows...)]
+                ex0.head === :typed_vcat && pushfirst!(args, ex0.args[1])
+                return gen_call(fcn, Any[hf, args...], where_params, kws; use_signature_tuple)
             else
-                return Expr(:call, fcn, f,
-                            Expr(:call, typesof, map(esc, ex0.args)...), kws...)
+                return gen_call(fcn, Any[f, ex0.args...], where_params, kws; use_signature_tuple)
+            end
+        elseif ex0.head === :ref
+            return expand_ref_begin_end!(ex0, __module__) do ex
+                gen_call(fcn, Any[getindex, ex.args...], where_params, kws; use_signature_tuple)
             end
         else
-            for (head, f) in (:ref => Base.getindex, :hcat => Base.hcat, :(.) => Base.getproperty, :vect => Base.vect, Symbol("'") => Base.adjoint, :typed_hcat => Base.typed_hcat, :string => string)
-                if ex0.head === head
-                    return Expr(:call, fcn, f,
-                                Expr(:call, typesof, map(esc, ex0.args)...), kws...)
-                end
+            for (head, f) in Any[:hcat => Base.hcat,
+                                 :(.) => Base.getproperty,
+                                 :vect => Base.vect,
+                                 Symbol("'") => Base.adjoint,
+                                 :typed_hcat => Base.typed_hcat,
+                                 :string => string]
+                ex0.head === head || continue
+                return gen_call(fcn, Any[f, ex0.args...], where_params, kws; use_signature_tuple)
             end
         end
     end
     if isa(ex0, Expr) && ex0.head === :macrocall # Make @edit @time 1+2 edit the macro by using the types of the *expressions*
-        return Expr(:call, fcn, esc(ex0.args[1]), Tuple{#=__source__=#LineNumberNode, #=__module__=#Module, Any[ Core.Typeof(a) for a in ex0.args[3:end] ]...}, kws...)
+        args = [#=__source__::=#LineNumberNode, #=__module__::=#Module, Core.Typeof.(ex0.args[3:end])...]
+        return gen_call(fcn, Any[ex0.args[1], Ref.(args)...], where_params, kws; use_signature_tuple)
     end
 
     ex = Meta.lower(__module__, ex0)
-    if !isa(ex, Expr)
-        return Expr(:call, :error, "expression is not a function call or symbol")
-    end
+    isa(ex, Expr) || return Expr(:call, :error, "expression is not a function call or symbol")
 
-    exret = Expr(:none)
-    if ex.head === :call
-        if any(e->(isa(e, Expr) && e.head === :(...)), ex0.args) &&
-            (ex.args[1] === GlobalRef(Core,:_apply_iterate) ||
-             ex.args[1] === GlobalRef(Base,:_apply_iterate))
-            # check for splatting
-            exret = Expr(:call, ex.args[2], fcn,
-                        Expr(:tuple, esc(ex.args[3]),
-                            Expr(:call, typesof, map(esc, ex.args[4:end])...)))
-        else
-            exret = Expr(:call, fcn, esc(ex.args[1]),
-                         Expr(:call, typesof, map(esc, ex.args[2:end])...), kws...)
-        end
-    end
-    if ex.head === :thunk || exret.head === :none
-        exret = Expr(:call, :error, "expression is not a function call, "
-                                  * "or is too complex for @$fcn to analyze; "
-                                  * "break it down to simpler parts if possible. "
-                                  * "In some cases, you may want to use Meta.@lower.")
-    end
-    return exret
+    return Expr(:call, :error, "expression is not a function call, \
+                                    or is too complex for @$fcn to analyze; \
+                                    break it down to simpler parts if possible. \
+                                    In some cases, you may want to use Meta.@lower.")
 end
 
 """
@@ -182,7 +537,7 @@ Same behaviour as `gen_call_with_extracted_types` except that keyword arguments
 of the form "foo=bar" are passed on to the called function as well.
 The keyword arguments must be given before the mandatory argument.
 """
-function gen_call_with_extracted_types_and_kwargs(__module__, fcn, ex0)
+function gen_call_with_extracted_types_and_kwargs(__module__, fcn, ex0; is_source_reflection = !is_code_macro(fcn), supports_binding_reflection = false, use_signature_tuple = false)
     kws = Expr[]
     arg = ex0[end] # Mandatory argument
     for i in 1:length(ex0)-1
@@ -196,13 +551,16 @@ function gen_call_with_extracted_types_and_kwargs(__module__, fcn, ex0)
             return Expr(:call, :error, "@$fcn expects only one non-keyword argument")
         end
     end
-    return gen_call_with_extracted_types(__module__, fcn, arg, kws)
+    return gen_call_with_extracted_types(__module__, fcn, arg, kws; is_source_reflection, supports_binding_reflection, use_signature_tuple)
 end
 
 for fname in [:which, :less, :edit, :functionloc]
     @eval begin
         macro ($fname)(ex0)
-            gen_call_with_extracted_types(__module__, $(Expr(:quote, fname)), ex0)
+            gen_call_with_extracted_types(__module__, $(Expr(:quote, fname)), ex0, Expr[];
+                                          is_source_reflection = true,
+                                          supports_binding_reflection = $(fname === :which),
+                                          use_signature_tuple = true)
         end
     end
 end
@@ -212,37 +570,19 @@ macro which(ex0::Symbol)
     return :(which($__module__, $ex0))
 end
 
-for fname in [:code_warntype, :code_llvm, :code_native, :infer_effects]
-    @eval begin
-        macro ($fname)(ex0...)
-            gen_call_with_extracted_types_and_kwargs(__module__, $(Expr(:quote, fname)), ex0)
-        end
-    end
-end
-
-macro code_typed(ex0...)
-    thecall = gen_call_with_extracted_types_and_kwargs(__module__, :code_typed, ex0)
-    quote
-        local results = $thecall
-        length(results) == 1 ? results[1] : results
-    end
-end
-
-macro code_lowered(ex0...)
-    thecall = gen_call_with_extracted_types_and_kwargs(__module__, :code_lowered, ex0)
-    quote
-        local results = $thecall
-        length(results) == 1 ? results[1] : results
+for fname in [:code_warntype, :code_llvm, :code_native,
+              :infer_return_type, :infer_effects, :infer_exception_type]
+    @eval macro ($fname)(ex0...)
+        gen_call_with_extracted_types_and_kwargs(__module__, $(QuoteNode(fname)), ex0; is_source_reflection = false, use_signature_tuple = $(in(fname, [:code_warntype, :code_llvm, :code_native])))
     end
 end
 
-macro time_imports(ex)
-    quote
-        try
-            Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1)
-            $(esc(ex))
-        finally
-            Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1)
+for fname in [:code_typed, :code_lowered, :code_ircode]
+    @eval macro ($fname)(ex0...)
+        thecall = gen_call_with_extracted_types_and_kwargs(__module__, $(QuoteNode(fname)), ex0; is_source_reflection = false, use_signature_tuple = true)
+        quote
+            local results = $thecall
+            length(results) == 1 ? results[1] : results
         end
     end
 end
@@ -297,6 +637,8 @@ Evaluates the arguments to the function or macro call, determines their types, a
     @code_typed optimize=true foo(x)
 
 to control whether additional optimizations, such as inlining, are also applied.
+
+See also: [`code_typed`](@ref), [`@code_warntype`](@ref), [`@code_lowered`](@ref), [`@code_llvm`](@ref), [`@code_native`](@ref).
 """
 :@code_typed
 
@@ -305,6 +647,8 @@ to control whether additional optimizations, such as inlining, are also applied.
 
 Evaluates the arguments to the function or macro call, determines their types, and calls
 [`code_lowered`](@ref) on the resulting expression.
+
+See also: [`code_lowered`](@ref), [`@code_warntype`](@ref), [`@code_typed`](@ref), [`@code_llvm`](@ref), [`@code_native`](@ref).
 """
 :@code_lowered
 
@@ -313,6 +657,8 @@ Evaluates the arguments to the function or macro call, determines their types, a
 
 Evaluates the arguments to the function or macro call, determines their types, and calls
 [`code_warntype`](@ref) on the resulting expression.
+
+See also: [`code_warntype`](@ref), [`@code_typed`](@ref), [`@code_lowered`](@ref), [`@code_llvm`](@ref), [`@code_native`](@ref).
 """
 :@code_warntype
 
@@ -331,6 +677,8 @@ by putting them and their value before the function call, like this:
 `raw` makes all metadata and dbg.* calls visible.
 `debuginfo` may be one of `:source` (default) or `:none`,  to specify the verbosity of code comments.
 `dump_module` prints the entire module that encapsulates the function.
+
+See also: [`code_llvm`](@ref), [`@code_warntype`](@ref), [`@code_typed`](@ref), [`@code_lowered`](@ref), [`@code_native`](@ref).
 """
 :@code_llvm
 
@@ -350,7 +698,7 @@ by putting it before the function call, like this:
 * If `binary` is `true`, also print the binary machine code for each instruction precedented by an abbreviated address.
 * If `dump_module` is `false`, do not print metadata such as rodata or directives.
 
-See also: [`code_native`](@ref), [`@code_llvm`](@ref), [`@code_typed`](@ref) and [`@code_lowered`](@ref)
+See also: [`code_native`](@ref), [`@code_warntype`](@ref), [`@code_typed`](@ref), [`@code_lowered`](@ref), [`@code_llvm`](@ref).
 """
 :@code_native
 
@@ -392,3 +740,94 @@ julia> @time_imports using CSV
 
 """
 :@time_imports
+
+"""
+    @trace_compile
+
+A macro to execute an expression and show any methods that were compiled (or recompiled in yellow),
+like the julia args `--trace-compile=stderr --trace-compile-timing` but specifically for a call.
+
+```julia-repl
+julia> @trace_compile rand(2,2) * rand(2,2)
+#=   39.1 ms =# precompile(Tuple{typeof(Base.rand), Int64, Int64})
+#=  102.0 ms =# precompile(Tuple{typeof(Base.:(*)), Array{Float64, 2}, Array{Float64, 2}})
+2×2 Matrix{Float64}:
+ 0.421704  0.864841
+ 0.211262  0.444366
+```
+
+!!! compat "Julia 1.12"
+    This macro requires at least Julia 1.12
+
+"""
+:@trace_compile
+
+"""
+    @trace_dispatch
+
+A macro to execute an expression and report methods that were compiled via dynamic dispatch,
+like the julia arg `--trace-dispatch=stderr` but specifically for a call.
+
+!!! compat "Julia 1.12"
+    This macro requires at least Julia 1.12
+
+"""
+:@trace_dispatch
+
+"""
+    @activate Component
+
+Activate a newly loaded copy of an otherwise builtin component. The `Component`
+to be activated will be resolved using the ordinary rules of module resolution
+in the current environment.
+
+When using `@activate`, additional options for a component may be specified in
+square brackets `@activate Compiler[:option1, :option]`
+
+Currently `Compiler` and `JuliaLowering` are the only available components that
+may be activatived.
+
+For `@activate Compiler`, the following options are available:
+1. `:reflection` - Activate the compiler for reflection purposes only.
+                   The ordinary reflection functionality in `Base` and `InteractiveUtils`.
+                   Will use the newly loaded compiler. Note however, that these reflection
+                   functions will still interact with the ordinary native cache (both loading
+                   and storing). An incorrect compiler implementation may thus corrupt runtime
+                   state if reflection is used. Use external packages like `Cthulhu.jl`
+                   introspecting compiler behavior with a separated cache partition.
+
+2. `:codegen`   - Activate the compiler for internal codegen purposes. The new compiler
+                  will be invoked whenever the runtime requests compilation.
+
+`@activate Compiler` without options is equivalent to `@activate Compiler[:reflection]`.
+
+"""
+macro activate(what)
+    options = Symbol[]
+    if isexpr(what, :ref)
+        Component = what.args[1]
+        for i = 2:length(what.args)
+            arg = what.args[i]
+            if !isa(arg, QuoteNode) || !isa(arg.value, Symbol)
+                error("Usage Error: Option $arg is not a symbol")
+            end
+            push!(options, arg.value)
+        end
+    else
+        Component = what
+    end
+    if !isa(Component, Symbol)
+        error("Usage Error: Component $Component is not a symbol")
+    end
+    allowed_components = (:Compiler, :JuliaLowering)
+    if !(Component in allowed_components)
+        error("Usage Error: Component $Component is not recognized. Expected one of $allowed_components")
+    end
+    if Component === :Compiler && isempty(options)
+        push!(options, :reflection)
+    end
+    options = map(options) do opt
+        Expr(:kw, opt, true)
+    end
+    return :(Base.require($__module__, $(QuoteNode(Component))).activate!(; $(options...)))
+end
diff --git a/stdlib/InteractiveUtils/test/highlighting.jl b/stdlib/InteractiveUtils/test/highlighting.jl
index bac52e2945b5e..f49464557f926 100644
--- a/stdlib/InteractiveUtils/test/highlighting.jl
+++ b/stdlib/InteractiveUtils/test/highlighting.jl
@@ -34,7 +34,10 @@ end
     c = Base.text_colors[Base.warn_color()]
     InteractiveUtils.highlighting[:warntype] = false
     code_warntype(IOContext(io, :color => true), f, Tuple{Int64})
-    @test !occursin(c, String(take!(io)))
+    @test !any([
+        occursin("Body", line) && occursin(c, line)
+        for line in split(String(take!(io)), "\n")
+    ])
     InteractiveUtils.highlighting[:warntype] = true
     code_warntype(IOContext(io, :color => true), f, Tuple{Int64})
     @test occursin(c, String(take!(io)))
@@ -72,7 +75,7 @@ end
     @test occursin("\e", String(take!(io)))
 end
 
-function hilight_llvm(s)
+function highlight_llvm(s)
     io = IOBuffer()
     InteractiveUtils.print_llvm(IOContext(io, :color=>true), s)
     r = String(take!(io))
@@ -82,7 +85,7 @@ function hilight_llvm(s)
     flush(stdout)
     r
 end
-function hilight_native(s, arch)
+function highlight_native(s, arch)
     io = IOBuffer()
     InteractiveUtils.print_native(IOContext(io, :color=>true), s, arch)
     r = String(take!(io))
@@ -92,8 +95,8 @@ function hilight_native(s, arch)
     flush(stdout)
     r
 end
-hilight_x86(s) = hilight_native(s, :x86)
-hilight_arm(s) = hilight_native(s, :arm)
+highlight_x86(s) = highlight_native(s, :x86)
+highlight_arm(s) = highlight_native(s, :arm)
 
 function esc_code(s)
     io = IOBuffer()
@@ -124,41 +127,48 @@ const XU = B * "}" * XB
 
 @testset "LLVM IR" begin
     @testset "comment" begin
-        @test hilight_llvm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+        @test highlight_llvm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
     end
-    @testset "lavel" begin
-        @test hilight_llvm("top:") == "$(L)top:$(XL)\n"
+    @testset "label" begin
+        @test highlight_llvm("top:") == "$(L)top:$(XL)\n"
 
-        @test hilight_llvm("L7:\t\t; preds = %top") ==
+        @test highlight_llvm("L7:\t\t; preds = %top") ==
             "$(L)L7:$(XL)\t\t$(C); preds = %top$(XC)\n"
+
+        @test highlight_llvm("  %\"box::GenericMemoryRef13\" = add i64 0, 0") ==
+            "  $(V)%\"box::GenericMemoryRef13\"$(XV) $EQU " *
+            "$(I)add$(XI) $(T)i64$(XT) $(N)0$(XN)$COM $(N)0$(XN)\n"
+
+        @test highlight_llvm("  \"label-as-string\":\t\t; preds = %top") ==
+            "  $(L)\"label-as-string\":$(XL)\t\t$(C); preds = %top$(XC)\n"
     end
     @testset "define" begin
-        @test hilight_llvm("define double @julia_func_1234(float) {") ==
+        @test highlight_llvm("define double @julia_func_1234(float) {") ==
             "$(K)define$(XK) $(T)double$(XT) " *
             "$(F)@julia_func_1234$(XF)$P$(T)float$(XT)$XP $U\n"
 
-        @test hilight_llvm("}") == "$XU\n"
+        @test highlight_llvm("}") == "$XU\n"
     end
 
     @testset "declare" begin
-        @test hilight_llvm("declare i32 @jl_setjmp(i8*) #2") ==
+        @test highlight_llvm("declare i32 @jl_setjmp(i8*) #2") ==
             "$(K)declare$(XK) $(T)i32$(XT) " *
             "$(F)@jl_setjmp$(XF)$P$(T)i8$(XT)$(D)*$(XD)$XP $(D)#2$(XD)\n"
     end
 
     @testset "type" begin
-        @test hilight_llvm("%jl_value_t = type opaque") ==
+        @test highlight_llvm("%jl_value_t = type opaque") ==
             "$(V)%jl_value_t$(XV) $EQU $(K)type$(XK) $(T)opaque$(XT)\n"
     end
 
     @testset "target" begin
         datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
-        @test hilight_llvm("target datalayout = \"$datalayout\"") ==
+        @test highlight_llvm("target datalayout = \"$datalayout\"") ==
             "$(K)target$(XK) $(K)datalayout$(XK) $EQU $(V)\"$datalayout\"$(XV)\n"
     end
 
     @testset "attributes" begin
-        @test hilight_llvm(
+        @test highlight_llvm(
             """attributes #1 = { uwtable "frame-pointer"="all" }""") ==
             "$(K)attributes$(XK) $(D)#1$(XD) $EQU " *
             "$U $(K)uwtable$(XK) $(V)\"frame-pointer\"$(XV)$EQU" *
@@ -166,57 +176,57 @@ const XU = B * "}" * XB
     end
 
     @testset "terminator" begin
-        @test hilight_llvm("  ret i8 %12") ==
+        @test highlight_llvm("  ret i8 %12") ==
             "  $(I)ret$(XI) $(T)i8$(XT) $(V)%12$(XV)\n"
 
-        @test hilight_llvm("  br i1 %2, label %L6, label %L4") ==
+        @test highlight_llvm("  br i1 %2, label %L6, label %L4") ==
             "  $(I)br$(XI) $(T)i1$(XT) $(V)%2$(XV)$COM " *
             "$(T)label$(XT) $(L)%L6$(XL)$COM $(T)label$(XT) $(L)%L4$(XL)\n"
 
-        @test hilight_llvm("  br label %L5") ==
+        @test highlight_llvm("  br label %L5") ==
             "  $(I)br$(XI) $(T)label$(XT) $(L)%L5$(XL)\n"
 
-        @test hilight_llvm("  unreachable") == "  $(I)unreachable$(XI)\n"
+        @test highlight_llvm("  unreachable") == "  $(I)unreachable$(XI)\n"
     end
 
     @testset "arithmetic" begin
-        @test hilight_llvm("   %11 = add nuw nsw i64 %value_phi10, 1") ==
+        @test highlight_llvm("   %11 = add nuw nsw i64 %value_phi10, 1") ==
             "   $(V)%11$(XV) $EQU $(I)add$(XI) $(K)nuw$(XK) $(K)nsw$(XK) " *
             "$(T)i64$(XT) $(V)%value_phi10$(XV)$COM $(N)1$(XN)\n"
 
-        @test hilight_llvm("   %13 = fadd double %12, -2.000000e+00") ==
+        @test highlight_llvm("   %13 = fadd double %12, -2.000000e+00") ==
             "   $(V)%13$(XV) $EQU $(I)fadd$(XI) " *
             "$(T)double$(XT) $(V)%12$(XV)$COM $(N)-2.000000e+00$(XN)\n"
 
-        @test hilight_llvm("      %21 = fmul contract double %20, 0x0123456789ABCDEF") ==
+        @test highlight_llvm("      %21 = fmul contract double %20, 0x0123456789ABCDEF") ==
             "      $(V)%21$(XV) $EQU $(I)fmul$(XI) $(K)contract$(XK) " *
             "$(T)double$(XT) $(V)%20$(XV)$COM $(N)0x0123456789ABCDEF$(XN)\n"
     end
 
     @testset "bitwise" begin
-        @test hilight_llvm("   %31 = shl i64 %value_phi4, 52") ==
+        @test highlight_llvm("   %31 = shl i64 %value_phi4, 52") ==
             "   $(V)%31$(XV) $EQU " *
             "$(I)shl$(XI) $(T)i64$(XT) $(V)%value_phi4$(XV)$COM $(N)52$(XN)\n"
     end
 
     @testset "aggregate" begin
-        @test hilight_llvm("    %4 = extractvalue { i64, i1 } %1, 0") ==
+        @test highlight_llvm("    %4 = extractvalue { i64, i1 } %1, 0") ==
             "    $(V)%4$(XV) $EQU $(I)extractvalue$(XI) " *
             "$U $(T)i64$(XT)$COM $(T)i1$(XT) $XU $(V)%1$(XV)$COM $(N)0$(XN)\n"
     end
 
     @testset "memory access" begin
-        @test hilight_llvm("  %dims = alloca [1 x i64], align 8") ==
+        @test highlight_llvm("  %dims = alloca [1 x i64], align 8") ==
             "  $(V)%dims$(XV) $EQU $(I)alloca$(XI) " *
             "$S$(N)1$(XN) $(D)x$(XD) $(T)i64$(XT)$XS$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("    %51 = load i32," *
+        @test highlight_llvm("    %51 = load i32," *
                            " i32* inttoptr (i64 226995504 to i32*), align 16") ==
             "    $(V)%51$(XV) $EQU $(I)load$(XI) $(T)i32$(XT)$COM " *
             "$(T)i32$(XT)$(D)*$(XD) $(K)inttoptr$(XK) $P$(T)i64$(XT) $(N)226995504$(XN) " *
             "$(K)to$(XK) $(T)i32$(XT)$(D)*$(XD)$XP$COM $(K)align$(XK) $(N)16$(XN)\n"
 
-        @test hilight_llvm("    %53 = load %jl_value_t addrspace(10)*, " *
+        @test highlight_llvm("    %53 = load %jl_value_t addrspace(10)*, " *
                            "%jl_value_t addrspace(10)* addrspace(11)* %52, align 8") ==
             "    $(V)%53$(XV) $EQU $(I)load$(XI) $(V)%jl_value_t$(XV) " *
             "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD)$COM " *
@@ -224,37 +234,37 @@ const XU = B * "}" * XB
             "$(K)addrspace$(XK)$P$(N)11$(XN)$XP$(D)*$(XD) " *
             "$(V)%52$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("    store i64 %61, i64 addrspace(11)* %60, align 8") ==
+        @test highlight_llvm("    store i64 %61, i64 addrspace(11)* %60, align 8") ==
             "    $(I)store$(XI) $(T)i64$(XT) $(V)%61$(XV)$COM " *
             "$(T)i64$(XT) $(K)addrspace$(XK)$P$(N)11$(XN)$XP$(D)*$(XD) " *
             "$(V)%60$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("  store volatile %jl_value_t addrspace(10)** %62, " *
+        @test highlight_llvm("  store volatile %jl_value_t addrspace(10)** %62, " *
                            "%jl_value_t addrspace(10)*** %63, align 8") ==
             "  $(I)store$(XI) $(K)volatile$(XK) $(V)%jl_value_t$(XV) " *
             "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)**$(XD) $(V)%62$(XV)$COM " *
             "$(V)%jl_value_t$(XV) $(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)***$(XD) " *
             "$(V)%63$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("     %71 = getelementptr i8, i8* %70, i64 8") ==
+        @test highlight_llvm("     %71 = getelementptr i8, i8* %70, i64 8") ==
             "     $(V)%71$(XV) $EQU $(I)getelementptr$(XI) $(T)i8$(XT)$COM " *
             "$(T)i8$(XT)$(D)*$(XD) $(V)%70$(XV)$COM $(T)i64$(XT) $(N)8$(XN)\n"
     end
 
     @testset "conversion" begin
-        @test hilight_llvm("  %22 = zext i1 %21 to i8") ==
+        @test highlight_llvm("  %22 = zext i1 %21 to i8") ==
             "  $(V)%22$(XV) $EQU $(I)zext$(XI) $(T)i1$(XT) $(V)%21$(XV) " *
             "$(K)to$(XK) $(T)i8$(XT)\n"
 
-        @test hilight_llvm("     %24 = sitofp i64 %23 to double") ==
+        @test highlight_llvm("     %24 = sitofp i64 %23 to double") ==
             "     $(V)%24$(XV) $EQU $(I)sitofp$(XI) $(T)i64$(XT) $(V)%23$(XV) " *
             "$(K)to$(XK) $(T)double$(XT)\n"
 
-        @test hilight_llvm("  %26 = ptrtoint i8* %25 to i64") ==
+        @test highlight_llvm("  %26 = ptrtoint i8* %25 to i64") ==
             "  $(V)%26$(XV) $EQU $(I)ptrtoint$(XI) $(T)i8$(XT)$(D)*$(XD) " *
             "$(V)%25$(XV) $(K)to$(XK) $(T)i64$(XT)\n"
 
-        @test hilight_llvm("  %28 = bitcast %jl_value_t addrspace(10)* %27 " *
+        @test highlight_llvm("  %28 = bitcast %jl_value_t addrspace(10)* %27 " *
                            "to [2 x i16] addrspace(10)*") ==
             "  $(V)%28$(XV) $EQU $(I)bitcast$(XI) $(V)%jl_value_t$(XV) " *
             "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD) $(V)%27$(XV) " *
@@ -263,20 +273,20 @@ const XU = B * "}" * XB
     end
 
     @testset "other" begin
-        @test hilight_llvm("  %31 = icmp slt i64 %30, 0") ==
+        @test highlight_llvm("  %31 = icmp slt i64 %30, 0") ==
             "  $(V)%31$(XV) $EQU $(I)icmp$(XI) $(I)slt$(XI) " *
             "$(T)i64$(XT) $(V)%30$(XV)$COM $(N)0$(XN)\n"
 
-        @test hilight_llvm("  %value_phi34 = phi double [ %33, %L50 ], [ %32, %L60 ]") ==
+        @test highlight_llvm("  %value_phi34 = phi double [ %33, %L50 ], [ %32, %L60 ]") ==
             "  $(V)%value_phi34$(XV) $EQU $(I)phi$(XI) $(T)double$(XT) " *
             "$S $(V)%33$(XV)$COM $(L)%L50$(XL) $XS$COM " *
             "$S $(V)%32$(XV)$COM $(L)%L60$(XL) $XS\n"
 
-        @test hilight_llvm("   %.v = select i1 %35, i64 %36, i64 63") ==
+        @test highlight_llvm("   %.v = select i1 %35, i64 %36, i64 63") ==
             "   $(V)%.v$(XV) $EQU $(I)select$(XI) $(T)i1$(XT) $(V)%35$(XV)$COM " *
             "$(T)i64$(XT) $(V)%36$(XV)$COM $(T)i64$(XT) $(N)63$(XN)\n"
 
-        @test hilight_llvm("   %38 = call i64 @llvm.cttz.i64(i64 %37, i1 false)") ==
+        @test highlight_llvm("   %38 = call i64 @llvm.cttz.i64(i64 %37, i1 false)") ==
             "   $(V)%38$(XV) $EQU $(I)call$(XI) $(T)i64$(XT) " *
             "$(F)@llvm.cttz.i64$(XF)$P$(T)i64$(XT) $(V)%37$(XV)$COM " *
             "$(T)i1$(XT) $(K)false$(XK)$XP\n"
@@ -285,133 +295,133 @@ end
 
 @testset "x86 ASM" begin
     @testset "comment" begin
-        @test hilight_x86("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+        @test highlight_x86("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
     end
     @testset "label" begin
-        @test hilight_x86("L123:") == "$(L)L123:$(XL)\n"
+        @test highlight_x86("L123:") == "$(L)L123:$(XL)\n"
     end
     @testset "directive" begin
-        @test hilight_x86("\t.text") == "\t$(D).text$(XD)\n"
+        @test highlight_x86("\t.text") == "\t$(D).text$(XD)\n"
     end
 
     @testset "0-operand" begin
         # AT&T
-        @test hilight_x86("\tretq") == "\t$(I)retq$(XI)\n"
+        @test highlight_x86("\tretq") == "\t$(I)retq$(XI)\n"
 
         # Intel
-        @test hilight_x86("\tret") == "\t$(I)ret$(XI)\n"
+        @test highlight_x86("\tret") == "\t$(I)ret$(XI)\n"
     end
     @testset "1-operand" begin
         # AT&T
-        @test hilight_x86("\tpopq\t%rax") == "\t$(I)popq$(XI)\t$(V)%rax$(XV)\n"
+        @test highlight_x86("\tpopq\t%rax") == "\t$(I)popq$(XI)\t$(V)%rax$(XV)\n"
 
-        @test hilight_x86("\tpushl\t\$4294967295\t# imm = 0xFFFFFFFF") ==
+        @test highlight_x86("\tpushl\t\$4294967295\t# imm = 0xFFFFFFFF") ==
             "\t$(I)pushl$(XI)\t$(N)\$4294967295$(XN)\t$(C)# imm = 0xFFFFFFFF$(XC)\n"
 
-        @test hilight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
+        @test highlight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
 
-        @test hilight_x86("\tnopw\t%cs:(%rax,%rax)") ==
+        @test highlight_x86("\tnopw\t%cs:(%rax,%rax)") ==
             "\t$(I)nopw$(XI)\t$(V)%cs$(XV)$COL$P$(V)%rax$(XV)$COM$(V)%rax$(XV)$XP\n"
 
         # Intel
-        @test hilight_x86("\tpop\trax") == "\t$(I)pop$(XI)\t$(V)rax$(XV)\n"
+        @test highlight_x86("\tpop\trax") == "\t$(I)pop$(XI)\t$(V)rax$(XV)\n"
 
-        @test hilight_x86("\tpush\t4294967295") ==
+        @test highlight_x86("\tpush\t4294967295") ==
             "\t$(I)push$(XI)\t$(N)4294967295$(XN)\n"
 
-        @test hilight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
+        @test highlight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
 
-        @test hilight_x86("\tnop\tword ptr cs:[rax + rax]") ==
+        @test highlight_x86("\tnop\tword ptr cs:[rax + rax]") ==
             "\t$(I)nop$(XI)\t$(K)word$(XK) $(K)ptr$(XK) " *
             "$(V)cs$(XV)$COL$S$(V)rax$(XV) $(D)+$(XD) $(V)rax$(XV)$XS\n"
     end
     @testset "2-operand" begin
         # AT&T
-        @test hilight_x86("\tshrq\t\$63, %rcx") ==
+        @test highlight_x86("\tshrq\t\$63, %rcx") ==
             "\t$(I)shrq$(XI)\t$(N)\$63$(XN)$COM $(V)%rcx$(XV)\n"
 
-        @test hilight_x86("\tvmovsd\t(%rsi,%rdx,8), %xmm1\t# xmm1 = mem[0],zero") ==
+        @test highlight_x86("\tvmovsd\t(%rsi,%rdx,8), %xmm1\t# xmm1 = mem[0],zero") ==
             "\t$(I)vmovsd$(XI)\t$P$(V)%rsi$(XV)$COM$(V)%rdx$(XV)$COM$(N)8$(XN)$XP" *
             "$COM $(V)%xmm1$(XV)\t$(C)# xmm1 = mem[0],zero$(XC)\n"
 
-        @test hilight_x86("\tmovabsq\t\$\"#string#338\", %rax") ==
+        @test highlight_x86("\tmovabsq\t\$\"#string#338\", %rax") ==
             "\t$(I)movabsq$(XI)\t$(F)\$\"#string#338\"$(XF)$COM $(V)%rax$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tshr\trcx, 63") ==
+        @test highlight_x86("\tshr\trcx, 63") ==
             "\t$(I)shr$(XI)\t$(V)rcx$(XV)$COM $(N)63$(XN)\n"
 
-        @test hilight_x86(
+        @test highlight_x86(
             "\tvmovsd\txmm1, dword ptr [rsi + 8*rdx]\t# xmm1 = mem[0],zero") ==
             "\t$(I)vmovsd$(XI)\t$(V)xmm1$(XV)$COM $(K)dword$(XK) $(K)ptr$(XK) " *
             "$S$(V)rsi$(XV) $(D)+$(XD) $(N)8$(XN)$(D)*$(XD)$(V)rdx$(XV)$XS" *
             "\t$(C)# xmm1 = mem[0],zero$(XC)\n"
 
-        @test hilight_x86("\tmovabs\trax, offset \"#string#338\"") ==
+        @test highlight_x86("\tmovabs\trax, offset \"#string#338\"") ==
             "\t$(I)movabs$(XI)\t$(V)rax$(XV)$COM " *
             "$(K)offset$(XK) $(F)\"#string#338\"$(XF)\n"
     end
     @testset "3-operand" begin
         # AT&T
-        @test hilight_x86("\tvaddsd\t(%rax), %xmm0, %xmm0") ==
+        @test highlight_x86("\tvaddsd\t(%rax), %xmm0, %xmm0") ==
             "\t$(I)vaddsd$(XI)\t$P$(V)%rax$(XV)$XP$COM " *
             "$(V)%xmm0$(XV)$COM $(V)%xmm0$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tvaddsd\txmm0, xmm0, qword ptr [rax]") ==
+        @test highlight_x86("\tvaddsd\txmm0, xmm0, qword ptr [rax]") ==
             "\t$(I)vaddsd$(XI)\t$(V)xmm0$(XV)$COM $(V)xmm0$(XV)$COM " *
             "$(K)qword$(XK) $(K)ptr$(XK) $S$(V)rax$(XV)$XS\n"
     end
     @testset "4-operand" begin
         # AT&T
-        @test hilight_x86("\tvroundsd\t\$4, %xmm1, %xmm1, %xmm1") ==
+        @test highlight_x86("\tvroundsd\t\$4, %xmm1, %xmm1, %xmm1") ==
             "\t$(I)vroundsd$(XI)\t$(N)\$4$(XN)$COM " *
             "$(V)%xmm1$(XV)$COM $(V)%xmm1$(XV)$COM $(V)%xmm1$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tvroundsd\txmm1, xmm1, xmm1, 4") ==
+        @test highlight_x86("\tvroundsd\txmm1, xmm1, xmm1, 4") ==
             "\t$(I)vroundsd$(XI)\t" *
             "$(V)xmm1$(XV)$COM $(V)xmm1$(XV)$COM $(V)xmm1$(XV)$COM $(N)4$(XN)\n"
     end
     @testset "AVX-512" begin
         # AT&T
-        @test hilight_x86("\tvmovaps\t(%eax), %zmm0") ==
+        @test highlight_x86("\tvmovaps\t(%eax), %zmm0") ==
             "\t$(I)vmovaps$(XI)\t$P$(V)%eax$(XV)$XP$COM $(V)%zmm0$(XV)\n"
 
-        @test hilight_x86("\tvpaddd\t%zmm3, %zmm1, %zmm1 {%k1}") ==
+        @test highlight_x86("\tvpaddd\t%zmm3, %zmm1, %zmm1 {%k1}") ==
             "\t$(I)vpaddd$(XI)\t$(V)%zmm3$(XV)$COM $(V)%zmm1$(XV)$COM " *
             "$(V)%zmm1$(XV) $U$(V)%k1$(XV)$XU\n"
 
-        @test hilight_x86("\tvdivpd\t%zmm3, %zmm1, %zmm0 {%k1} {z}") ==
+        @test highlight_x86("\tvdivpd\t%zmm3, %zmm1, %zmm0 {%k1} {z}") ==
             "\t$(I)vdivpd$(XI)\t$(V)%zmm3$(XV)$COM $(V)%zmm1$(XV)$COM " *
             "$(V)%zmm0$(XV) $U$(V)%k1$(XV)$XU $U$(K)z$(XK)$XU\n"
 
-        @test hilight_x86("\tvdivps\t(%ebx){1to16}, %zmm5, %zmm4") ==
+        @test highlight_x86("\tvdivps\t(%ebx){1to16}, %zmm5, %zmm4") ==
             "\t$(I)vdivps$(XI)\t$P$(V)%ebx$(XV)$XP$U$(K)1to16$(XK)$XU$COM " *
             "$(V)%zmm5$(XV)$COM $(V)%zmm4$(XV)\n"
 
-        @test hilight_x86("\tvcvtsd2si\t{rn-sae}, %xmm0, %eax") ==
+        @test highlight_x86("\tvcvtsd2si\t{rn-sae}, %xmm0, %eax") ==
             "\t$(I)vcvtsd2si$(XI)\t$U$(K)rn-sae$(XK)$XU$COM " *
             "$(V)%xmm0$(XV)$COM $(V)%eax$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tvmovaps\tzmm0, zmmword ptr [eax]") ==
+        @test highlight_x86("\tvmovaps\tzmm0, zmmword ptr [eax]") ==
             "\t$(I)vmovaps$(XI)\t$(V)zmm0$(XV)$COM " *
             "$(K)zmmword$(XK) $(K)ptr$(XK) $S$(V)eax$(XV)$XS\n"
 
-        @test hilight_x86("\tvpaddd\tzmm1 {k1}, zmm1, zmm3") ==
+        @test highlight_x86("\tvpaddd\tzmm1 {k1}, zmm1, zmm3") ==
             "\t$(I)vpaddd$(XI)\t$(V)zmm1$(XV) $U$(V)k1$(XV)$XU$COM " *
             "$(V)zmm1$(XV)$COM $(V)zmm3$(XV)\n"
 
-        @test hilight_x86("\tvdivpd\tzmm0 {k1} {z}, zmm1, zmm3") ==
+        @test highlight_x86("\tvdivpd\tzmm0 {k1} {z}, zmm1, zmm3") ==
             "\t$(I)vdivpd$(XI)\t$(V)zmm0$(XV) $U$(V)k1$(XV)$XU $U$(K)z$(XK)$XU$COM " *
             "$(V)zmm1$(XV)$COM $(V)zmm3$(XV)\n"
 
-        @test hilight_x86("\tvdivps\tzmm4, zmm5, dword ptr [ebx]{1to16}") ==
+        @test highlight_x86("\tvdivps\tzmm4, zmm5, dword ptr [ebx]{1to16}") ==
             "\t$(I)vdivps$(XI)\t$(V)zmm4$(XV)$COM $(V)zmm5$(XV)$COM " *
             "$(K)dword$(XK) $(K)ptr$(XK) $S$(V)ebx$(XV)$XS$U$(K)1to16$(XK)$XU\n"
 
-        @test hilight_x86("\tvcvtsd2si\teax, xmm0$(XV), {rn-sae}") ==
+        @test highlight_x86("\tvcvtsd2si\teax, xmm0$(XV), {rn-sae}") ==
             "\t$(I)vcvtsd2si$(XI)\t$(V)eax$(XV)$COM " *
             "$(V)xmm0$(XV)$COM $U$(K)rn-sae$(XK)$XU\n"
     end
@@ -419,74 +429,74 @@ end
 
 @testset "ARM ASM" begin
     @testset "comment" begin
-        @test hilight_arm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+        @test highlight_arm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
     end
     @testset "label" begin
-        @test hilight_arm("L45:") == "$(L)L45:$(XL)\n"
+        @test highlight_arm("L45:") == "$(L)L45:$(XL)\n"
     end
     @testset "directive" begin
-        @test hilight_arm("\t.text") == "\t$(D).text$(XD)\n"
+        @test highlight_arm("\t.text") == "\t$(D).text$(XD)\n"
     end
 
     @testset "0-operand" begin
-        @test hilight_arm("\tret") == "\t$(I)ret$(XI)\n"
+        @test highlight_arm("\tret") == "\t$(I)ret$(XI)\n"
     end
     @testset "1-operand" begin
-        @test hilight_arm("\tbl\t0x12") == "\t$(I)bl$(XI)\t$(N)0x12$(XN)\n"
+        @test highlight_arm("\tbl\t0x12") == "\t$(I)bl$(XI)\t$(N)0x12$(XN)\n"
 
-        @test hilight_arm("\tb\tL345") == "\t$(I)b$(XI)\t$(L)L345$(XL)\n"
+        @test highlight_arm("\tb\tL345") == "\t$(I)b$(XI)\t$(L)L345$(XL)\n"
 
-        @test hilight_arm("\tb.gt\tL67") == "\t$(I)b.gt$(XI)\t$(L)L67$(XL)\n"
+        @test highlight_arm("\tb.gt\tL67") == "\t$(I)b.gt$(XI)\t$(L)L67$(XL)\n"
 
-        @test hilight_arm("\tpop\t{r11, pc}") ==
+        @test highlight_arm("\tpop\t{r11, pc}") ==
             "\t$(I)pop$(XI)\t$U$(V)r11$(XV)$COM $(V)pc$(XV)$XU\n"
     end
     @testset "2-operand" begin
-        @test hilight_arm("\tcmp\tx10, #2047\t// =2047") ==
+        @test highlight_arm("\tcmp\tx10, #2047\t// =2047") ==
             "\t$(I)cmp$(XI)\t$(V)x10$(XV)$COM $(N)#2047$(XN)\t$(C)// =2047$(XC)\n"
 
-        @test hilight_arm("\tldr\td1, [x10]") ==
+        @test highlight_arm("\tldr\td1, [x10]") ==
             "\t$(I)ldr$(XI)\t$(V)d1$(XV)$COM $S$(V)x10$(XV)$XS\n"
 
-        @test hilight_arm("\tstr\tx30, [sp, #-16]!") ==
+        @test highlight_arm("\tstr\tx30, [sp, #-16]!") ==
             "\t$(I)str$(XI)\t$(V)x30$(XV)$COM " *
             "$S$(V)sp$(XV)$COM $(N)#-16$(XN)$XS$(K)!$(XK)\n"
 
-        @test hilight_arm("\tmov\tv0.16b, v1.16b") ==
+        @test highlight_arm("\tmov\tv0.16b, v1.16b") ==
             "\t$(I)mov$(XI)\t$(V)v0.16b$(XV)$COM $(V)v1.16b$(XV)\n"
     end
     @testset "3-operand" begin
-        @test hilight_arm("\tfmul\td2, d0, d2") ==
+        @test highlight_arm("\tfmul\td2, d0, d2") ==
             "\t$(I)fmul$(XI)\t$(V)d2$(XV)$COM $(V)d0$(XV)$COM $(V)d2$(XV)\n"
 
-        @test hilight_arm("\tmovk\tx10, #65535, lsl #32") ==
+        @test highlight_arm("\tmovk\tx10, #65535, lsl #32") ==
             "\t$(I)movk$(XI)\t$(V)x10$COM $(N)#65535$(XN)$COM $(K)lsl$(XK) $(N)#32$(XN)\n"
 
-        @test hilight_arm("\tcneg\tx8, x8, ge") ==
+        @test highlight_arm("\tcneg\tx8, x8, ge") ==
             "\t$(I)cneg$(XI)\t$(V)x8$(XV)$COM $(V)x8$(XV)$COM $(K)ge$(XK)\n"
     end
     @testset "4-operand" begin
-        @test hilight_arm("\tadd\tx8, x9, x8, lsl #52") ==
+        @test highlight_arm("\tadd\tx8, x9, x8, lsl #52") ==
             "\t$(I)add$(XI)\t$(V)x8$(XV)$COM $(V)x9$(XV)$COM $(V)x8$(XV)$COM " *
             "$(K)lsl$(XK) $(N)#52$(XN)\n"
 
-        @test hilight_arm("\tfcsel\td1, d0, d1, eq") ==
+        @test highlight_arm("\tfcsel\td1, d0, d1, eq") ==
             "\t$(I)fcsel$(XI)\t" *
             "$(V)d1$(XV)$COM $(V)d0$(XV)$COM $(V)d1$(XV)$COM $(K)eq$(XK)\n"
     end
     @testset "NEON" begin
-        hilight_arm("\tvmul.f32\tq8, q9, q8") ==
+        highlight_arm("\tvmul.f32\tq8, q9, q8") ==
             "\t$(I)vmul.f32$(XI)\t$(V)q8$(XV)$COM $(V)q9$(XV)$COM $(V)q8$(XV)\n"
-        hilight_arm("\tvcvt.s32.f64\ts2, d20") ==
+        highlight_arm("\tvcvt.s32.f64\ts2, d20") ==
             "\t$(I)vcvt.s32.f64$(XI)\t$(V)s2$(XV)$COM $(V)d20$(XV)\n"
-        hilight_arm("\tvld1.32\t{d18, d19}, [r1]") ==
+        highlight_arm("\tvld1.32\t{d18, d19}, [r1]") ==
             "\t$(I)vld1.32$(XI)\t$U$(V)d18$(XV)$COM $(V)d19$(XV)$XU$COM $S$(V)r1$(XV)$XS\n"
     end
     @testset "SVE" begin
-        hilight_arm("\tld1d\tz1.d, p0/z, [x0, x4, lsl #3]") ==
+        highlight_arm("\tld1d\tz1.d, p0/z, [x0, x4, lsl #3]") ==
             "\t$(I)ld1d$(XI)\t$(V)z1.d$(XV)$COM " *
             "$(V)p0$(XV)$(K)/z$(XK)$COM " *
             "$S$(V)x0$(XV)$COM $(V)x4$(XV)$COM $(K)lsl$(XK) $(N)#3$(XN)$XS\n"
-        hilight_arm("\tb.first\tL123") == "\t$(I)b.first$(XI)\t$(L)L123$(XL)"
+        highlight_arm("\tb.first\tL123") == "\t$(I)b.first$(XI)\t$(L)L123$(XL)"
     end
 end
diff --git a/stdlib/InteractiveUtils/test/runtests.jl b/stdlib/InteractiveUtils/test/runtests.jl
index 5f90491fd8151..d94f84bdb486f 100644
--- a/stdlib/InteractiveUtils/test/runtests.jl
+++ b/stdlib/InteractiveUtils/test/runtests.jl
@@ -138,6 +138,11 @@ tag = "ANY"
 @test !warntype_hastag(ImportIntrinsics15819.sqrt15819, Tuple{Float64}, tag)
 @test !warntype_hastag(ImportIntrinsics15819.sqrt15819, Tuple{Float32}, tag)
 
+@testset "code_warntype OpaqueClosure" begin
+    g = Base.Experimental.@opaque Tuple{Float64}->_ x -> 0.0
+    @test warntype_hastag(g, Tuple{Float64}, "::Float64")
+end
+
 end # module WarnType
 
 # Adds test for PR #17636
@@ -229,7 +234,7 @@ module Tmp14173
 end
 varinfo(Tmp14173) # warm up
 const MEMDEBUG = ccall(:jl_is_memdebug, Bool, ())
-@test @allocated(varinfo(Tmp14173)) < (MEMDEBUG ? 300000 : 100000)
+@test @allocated(varinfo(Tmp14173)) < (MEMDEBUG ? 300000 : 125000)
 
 # PR #24997: test that `varinfo` doesn't fail when encountering `missing`
 module A
@@ -279,31 +284,224 @@ let x..y = 0
     @test (@which 1..2).name === :..
 end
 
+# issue #53691
+let a = -1
+    @test (@which 2^a).name === :^
+    @test (@which 2^0x1).name === :^
+end
+
+let w = Vector{Any}(undef, 9)
+    @testset "@which x^literal" begin
+        w[1] = @which 2^0
+        w[2] = @which 2^1
+        w[3] = @which 2^2
+        w[4] = @which 2^3
+        w[5] = @which 2^-1
+        w[6] = @which 2^-2
+        w[7] = @which 2^10
+        w[8] = @which big(2.0)^1
+        w[9] = @which big(2.0)^-1
+        @test all(getproperty.(w, :name) .=== :literal_pow)
+        @test length(Set(w)) == length(w) # all methods distinct
+    end
+end
+
+# PR 53713
+if Int === Int64
+    # literal_pow only for exponents x: -2^63 <= x < 2^63 #53860 (all Int)
+    @test (@which 2^-9223372036854775809).name === :^
+    @test (@which 2^-9223372036854775808).name === :literal_pow
+    @test (@which 2^9223372036854775807).name === :literal_pow
+    @test (@which 2^9223372036854775808).name === :^
+elseif Int === Int32
+    # literal_pow only for exponents x: -2^31 <= x < 2^31 #53860 (all Int)
+    @test (@which 2^-2147483649).name === :^
+    @test (@which 2^-2147483648).name === :literal_pow
+    @test (@which 2^2147483647).name === :literal_pow
+    @test (@which 2^2147483648).name === :^
+end
+
 # issue #13464
 try
     @which x = 1
     error("unexpected")
 catch err13464
-    @test startswith(err13464.msg, "expression is not a function call, or is too complex")
+    @test startswith(err13464.msg, "expression is not a function call")
+end
+
+@testset "Single-argument forms" begin
+    a = which(+, (Int, Int))
+    b = which((typeof(+), Int, Int))
+    c = which(Tuple{typeof(+), Int, Int})
+    @test a == b == c
+
+    a = functionloc(+, (Int, Int))
+    b = functionloc((typeof(+), Int, Int))
+    c = functionloc(Tuple{typeof(+), Int, Int})
+    @test a == b == c
+end
+
+# PR 57909
+@testset "Support for type annotations as arguments" begin
+    @testset "`getindex`/`setindex!`" begin
+        @test (@which (::Vector{Int})[::Int]).name === :getindex
+        @test (@which [1, 2][::Int]).name === :getindex
+        @test (@which [1 2][begin, ::Int]).name === :getindex
+        @test (@which [1 2][::Int, end]).name === :getindex
+        @test (@which [1 2][begin, end]).name === :getindex
+        @test (@which [1 2][1:end]).name === :getindex
+        @test (@which [1 2][begin:end]).name === :getindex
+        @test_throws "`begin` or `end` cannot be used" (@which (::Vector{Int})[begin])
+        @test_throws "`begin` or `end` cannot be used" (@which (::Vector{Int})[end])
+        @test (@which (::Vector{Int})[::Int] = ::Int).name === :setindex!
+    end
+
+    @testset "`getproperty`/`setproperty!`" begin
+        @test (@which (::Base.RefValue{Int}).x).name === :getproperty
+        @test (@which (::Base.RefValue{Int}).x = ::Int).name === :setproperty!
+    end
+
+    @testset "Array syntax" begin
+        @test (@which [::Int]).name === :vect
+        @test (@which [undef_var::Int]).name === :vect
+        @test (@which [::Int 2]).name === :hcat
+        @test (@which [::Int; 2]).name === :vcat
+        @test (@which Int[::Int 2]).name === :typed_hcat
+        @test (@which Int[::Int; 2]).name === :typed_vcat
+        @test (@which [::Int 2;3 (::Int)]).name === :hvcat
+        @test (@which Int[::Int 2;3 (::Int)]).name === :typed_hvcat
+        @test (@which (::Type{Int})[::Int 2;3 (::Int)]).name === :typed_hvcat
+        @test (@which (::Type{T})[::T 2;3 (::T)] where {T<:Real}).name === :typed_hvcat
+    end
+
+    @test (@which (::Float64)^2).name === :literal_pow
+    @test (@which (::Vector{Float64})').name === :adjoint
+    @test (@which "$(::Symbol) is a symbol").sig === Tuple{typeof(string), Vararg{Union{Char, String, Symbol}}}
+    @test (@which (::Int)^4).name === :literal_pow
+    @test (@which +(some_x::Int, some_y::Float64)).name === :+
+    @test (@which +(::Any, ::Any, ::Any, ::Any...)).sig === Tuple{typeof(+), Any, Any, Any, Vararg{Any}}
+    @test (@which +(::Any, ::Any, ::Any, ::Vararg{Any})).sig === Tuple{typeof(+), Any, Any, Any, Vararg{Any}}
+    n = length(@code_typed +(::Float64, ::Vararg{Float64}))
+    @test n ≥ 2
+    @test length(@code_typed +(::Float64, ::Float64...)) == n
+    @test (@which +(1, ::Float64)).sig === Tuple{typeof(+), Number, Number}
+    @test (@which +((1, 2)...)).name === :+
+    @test (@which (::typeof(+))(::Int, ::Float64)).sig === Tuple{typeof(+), Number, Number}
+    @test (@code_typed .+(::Float64, ::Vector{Float64})) isa Pair
+    @test (@code_typed .+(::Float64, .*(::Vector{Float64}, ::Int))) isa Pair
+    @test (@which +(::T, ::T) where {T<:Number}).sig === Tuple{typeof(+), T, T} where {T<:Number}
+
+    @testset "Keyword arguments" begin
+        @test (@which round(::Float64; digits=3)).name === :round
+        @test (@which round(1.2; digits = ::Int)).name === :round
+        @test (@which round(1.2; digits::Int)).name === :round
+        @test (@code_typed round(::T; digits = ::T) where {T<:Float64})[2] === Union{}
+        @test (@code_typed round(::T; digits = ::T) where {T<:Int})[2] === Float64
+        base = 10
+        kwargs_1 = (; digits = 3)
+        kwargs_2 = (; sigdigits = 3)
+        @test (@which round(1.2; kwargs_1...)).name === :round
+        @test (@which round(1.2; digits = 1, kwargs_1...)).name === :round
+        @test (@code_typed round(1.2; digits = ::Float64, kwargs_1...))[2] === Float64 # picks `3::Int` from `kwargs_1`
+        @test (@code_typed round(1.2; kwargs_1..., digits = ::Float64))[2] === Union{} # picks `::Float64` from parameters
+        @test (@which round(1.2; digits = ::Float64, kwargs_1...)).name === :round
+        @test (@which round(1.2; sigdigits = ::Int, kwargs_1...)).name === :round
+        @test (@which round(1.2; kwargs_1..., kwargs_2..., base)).name === :round
+    end
+
+    @testset "Broadcasting" begin
+        @test (@code_typed optimize=false round.([1.0, 2.0]; digits = ::Int64))[2] == Vector{Float64}
+        @test (@code_typed optimize=false round.(::Vector{Float64}, base = 2; digits = ::Int64))[2] == Vector{Float64}
+        @test (@code_typed optimize=false round.(base = ::Int64, ::Vector{Float64}; digits = ::Int64))[2] == Vector{Float64}
+        @test (@code_typed optimize=false [1, 2] .= ::Int)[2] == Vector{Int}
+        @test (@code_typed optimize=false ::Vector{Int} .= ::Int)[2] == Vector{Int}
+        @test (@code_typed optimize=false ::Vector{Float64} .= 1 .+ ::Vector{Int})[2] == Vector{Float64}
+        @test (@code_typed optimize=false ::Vector{Float64} .= 1 .+ round.(base = ::Int, ::Vector{Int}; digits = 3))[2] == Vector{Float64}
+    end
+
+    @testset "Callable objects" begin
+        @test (@code_typed (::Base.Fix2{typeof(+), Float64})(3))[2] === Float64
+        @test (@code_typed optimize=false (::Returns{Float64})(::Int64; name::String))[2] === Float64
+        @test (@code_typed (::Returns{T})(3.0) where {T<:Real})[2] === Real
+    end
+
+    @testset "Opaque closures" begin
+        opaque_f(@nospecialize(x::Type), @nospecialize(y::Type)) = sizeof(x) == sizeof(y)
+        src, _ = only(code_typed(opaque_f, (Type, Type)))
+        src.slottypes[1] = Tuple{}
+
+        # from CodeInfo
+        oc = Core.OpaqueClosure(src; sig = Tuple{Type, Type}, rettype = Bool, nargs = 2)
+        ret = @code_typed oc(Int64, Float64)
+        @test [ret] == code_typed(oc)
+        _, rt = ret
+        @test rt === Bool
+
+        # from optimized IR
+        ir = Core.Compiler.inflate_ir(src)
+        oc = Core.OpaqueClosure(ir)
+        ret = @code_typed oc(Int64, Float64)
+        @test [ret] == code_typed(oc)
+        _, rt = ret
+        @test rt === Bool
+    end
+
+    @testset "Vararg handling" begin
+        @test_throws "More than one `Core.Vararg`" @eval @code_typed +(1, 2::Vararg{Int}, 3, 4::Vararg{Float64})
+        @test_throws "Inconsistent type `Float64`" @eval @code_typed +(1, 2, 3, 4::Vararg{Int}, 5.0)
+        @test_throws "Inconsistent type `Float64`" @eval @code_typed +(1, 2, 3, 4::Int..., 5.0)
+        @test_throws "Inconsistent type `Float64`" @eval @code_typed +(1, 2, 3, 4::Vararg{Int}, ::Float64)
+        @test_throws "Inconsistent type `Any`" @eval @code_typed +(1, 2, 3, 4::Vararg{Int}, ::Any)
+        @test_throws r"at most 2 types .* found 3 instead" @eval @code_typed +(1, 2, 3, 4::Vararg{Int,2}, 5, 6)
+        @test (@code_typed +(1, 2, 3, 4::Vararg{Int}))[2] === Int
+        @test (@code_typed +(1, 2, 3, 4::Vararg{Int}, 5))[2] === Int
+        @test (@code_typed +(1, 2, 3, 4::Vararg{Int, 3}))[2] === Int
+        @test (@code_typed +(1, 2, 3, 4::Vararg{Int, 3}, 5))[2] === Int
+        @test (@code_typed +(1, 2, 3, 4::Vararg{Int, 3}, 5, 6))[2] === Int
+        @test (@code_typed +(1, 2, 3, 4::Vararg))[2] === Any
+        @test (@code_typed +(1, 2, 3, 4::Vararg, 5.0))[2] === Any
+        @test (@code_typed +(1, 2, 3, ::Int...))[2] === Int
+        @test (@code_typed +(1, 2, 3, ::Int..., 5))[2] === Int
+        # We just ignore the checks with `where` parameters for simplicity of implementation.
+        @test isa((@code_typed +(::T, ::Vararg{T}, ::T) where {T}), Vector{Any})
+        @test isa((@code_typed +(::T, ::Vararg{T}, ::Float64) where {T<:Real}), Vector{Any})
+        @test isa((@code_typed +(::T, ::Vararg{T}, ::Float64) where {T<:Real}), Vector{Any})
+        @test isa((@code_typed +(::T, ::Vararg{T}, ::Int) where {T<:Real}), Vector{Any})
+        @test isa((@code_typed +(::T, ::Vararg{Vector{T}}, ::Int) where {T<:Real}), Vector{Any})
+        @test isa((@code_typed +(::T, ::Vararg{Int}, ::T) where {T<:Real}), Vector{Any})
+    end
+end
+
+module HygieneTest
+const Int = Float64
+using InteractiveUtils: @code_typed
+macro escape_argument(ex) :(@code_typed $(esc(ex)) * 2.0) end
+macro escape_type_annotation(ex) :(@code_typed identity(::(typeof($(esc(ex)))))) end
+macro escape_all(ex) esc(:(@__MODULE__().@code_typed $ex)) end
+end # module
+
+(; var"@escape_argument", var"@escape_type_annotation", var"@escape_all") = HygieneTest
+@testset "Macro hygiene interactions" begin
+    _f = sum
+    @test (@escape_argument _f(Int[]))[2] == Float64
+    @test (@escape_type_annotation _f(Int[]))[2] == Int
+    @test (@escape_all _f(Int[]))[2] == Int
 end
 
 module MacroTest
-export @macrotest
+var"@which" = parentmodule(@__MODULE__).var"@which"
 macro macrotest(x::Int, y::Symbol) end
-macro macrotest(x::Int, y::Int)
-    nothing #This is here because of #15280
-end
+macro macrotest(x::Int, y::Int) end
 end
 
 let
-    using .MacroTest
     a = 1
-    m = getfield(@__MODULE__, Symbol("@macrotest"))
-    @test which(m, Tuple{LineNumberNode, Module, Int, Symbol}) == @which @macrotest 1 a
-    @test which(m, Tuple{LineNumberNode, Module, Int, Int}) == @which @macrotest 1 1
+    m = MacroTest.var"@macrotest"
+    @test which(m, Tuple{LineNumberNode, Module, Int, Symbol}) == @eval MacroTest @which @macrotest 1 a
+    @test which(m, Tuple{LineNumberNode, Module, Int, Int}) == @eval MacroTest @which @macrotest 1 1
 
     @test first(methods(m, Tuple{LineNumberNode, Module, Int, Int})) == @which MacroTest.@macrotest 1 1
-    @test functionloc(@which @macrotest 1 1) == @functionloc @macrotest 1 1
+    @test functionloc(@eval MacroTest @which @macrotest 1 1) == @functionloc MacroTest.@macrotest 1 1
 end
 
 mutable struct A18434
@@ -330,7 +528,9 @@ let _true = Ref(true), f, g, h
 end
 
 # manually generate a broken function, which will break codegen
-# and make sure Julia doesn't crash
+# and make sure Julia doesn't crash (when using a non-asserts build)
+is_asserts() = ccall(:jl_is_assertsbuild, Cint, ()) == 1
+if !is_asserts()
 @eval @noinline Base.@constprop :none f_broken_code() = 0
 let m = which(f_broken_code, ())
    let src = Base.uncompressed_ast(m)
@@ -345,39 +545,59 @@ _true = true
 # and show that we can still work around it
 @noinline g_broken_code() = _true ? 0 : h_broken_code()
 @noinline h_broken_code() = (g_broken_code(); f_broken_code())
-let err = tempname(),
+let errf = tempname(),
     old_stderr = stderr,
-    new_stderr = open(err, "w")
+    new_stderr = open(errf, "w")
     try
         redirect_stderr(new_stderr)
+        @test occursin("f_broken_code", sprint(code_native, h_broken_code, ()))
+        Libc.flush_cstdio()
         println(new_stderr, "start")
         flush(new_stderr)
-        @test occursin("h_broken_code", sprint(code_native, h_broken_code, ()))
+        @test_throws "could not compile the specified method" sprint(io -> code_native(io, f_broken_code, (), dump_module=true))
+        Libc.flush_cstdio()
+        println(new_stderr, "middle")
+        flush(new_stderr)
+        @test !isempty(sprint(io -> code_native(io, f_broken_code, (), dump_module=false)))
+        Libc.flush_cstdio()
+        println(new_stderr, "later")
+        flush(new_stderr)
+        @test invokelatest(g_broken_code) == 0
         Libc.flush_cstdio()
         println(new_stderr, "end")
         flush(new_stderr)
-        @eval @test g_broken_code() == 0
     finally
+        Libc.flush_cstdio()
         redirect_stderr(old_stderr)
         close(new_stderr)
-        let errstr = read(err, String)
+        let errstr = read(errf, String)
             @test startswith(errstr, """start
-                end
                 Internal error: encountered unexpected error during compilation of f_broken_code:
-                ErrorException(\"unsupported or misplaced expression \"invalid\" in function f_broken_code\")
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
                 """) || errstr
-            @test !endswith(errstr, "\nend\n") || errstr
+            @test occursin("""\nmiddle
+                Internal error: encountered unexpected error during compilation of f_broken_code:
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
+                """, errstr) || errstr
+            @test occursin("""\nlater
+                Internal error: encountered unexpected error during compilation of f_broken_code:
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
+                """, errstr) || errstr
+            @test endswith(errstr, "\nend\n") || errstr
         end
-        rm(err)
+        rm(errf)
     end
 end
+end
 
 # Issue #33163
 A33163(x; y) = x + y
 B33163(x) = x
-@test (@code_typed A33163(1, y=2))[1].inferred
-@test !(@code_typed optimize=false A33163(1, y=2))[1].inferred
-@test !(@code_typed optimize=false B33163(1))[1].inferred
+let
+    (@code_typed A33163(1, y=2))[1]
+    (@code_typed optimize=false A33163(1, y=2))[1]
+    (@code_typed optimize=false B33163(1))[1]
+end
 
 @test_throws MethodError (@code_lowered wrongkeyword=true 3 + 4)
 
@@ -398,7 +618,22 @@ a14637 = A14637(0)
 @test (@code_typed optimize=true max.([1,7], UInt.([4])))[2] == Vector{UInt}
 @test (@code_typed Ref.([1,2])[1].x)[2] == Int
 @test (@code_typed max.(Ref(true).x))[2] == Bool
+@test (@code_typed optimize=false round.([1.0, 2.0]; digits = 3))[2] == Vector{Float64}
+@test (@code_typed optimize=false round.([1.0, 2.0], base = 2; digits = 3))[2] == Vector{Float64}
+@test (@code_typed optimize=false round.(base = 2, [1.0, 2.0], digits = 3))[2] == Vector{Float64}
+@test (@code_typed optimize=false [1, 2] .= 2)[2] == Vector{Int}
+@test (@code_typed optimize=false [1, 2] .<<= 2)[2] == Vector{Int}
+@test (@code_typed optimize=false [1, 2.0] .= 1 .+ [2, 3])[2] == Vector{Float64}
+@test (@code_typed optimize=false [1, 2.0] .= 1 .+ round.(base = 1, [1, 3]; digits = 3))[2] == Vector{Float64}
+@test (@code_typed optimize=false [1] .+ [2])[2] == Vector{Int}
 @test !isempty(@code_typed optimize=false max.(Ref.([5, 6])...))
+expansion = string(@macroexpand @code_typed optimize=false max.(Ref.([5, 6])...))
+@test contains(expansion, "(x1) =") # presence of wrapper function
+# Make sure broadcasts in nested arguments are not processed.
+v = Any[1]
+expansion = string(@macroexpand @code_typed v[1] = rand.(Ref(1)))
+@test contains(expansion, "Typeof(rand.(Ref(1)))")
+@test !contains(expansion, "(x1) =")
 
 # Issue # 45889
 @test !isempty(@code_typed 3 .+ 6)
@@ -411,10 +646,11 @@ a14637 = A14637(0)
 @test (@code_typed max.(1 .+ 3, 5 - 7))[2] == Int
 f36261(x,y) = 3x + 4y
 A36261 = Float64[1.0, 2.0, 3.0]
-@test (@code_typed f36261.(A36261, pi))[1].inferred
-@test (@code_typed f36261.(A36261, 1 .+ pi))[1].inferred
-@test (@code_typed f36261.(A36261, 1 + pi))[1].inferred
-
+let
+    @code_typed f36261.(A36261, pi)[1]
+    @code_typed f36261.(A36261, 1 .+ pi)[1]
+    @code_typed f36261.(A36261, 1 + pi)[1]
+end
 
 module ReflectionTest
 using Test, Random, InteractiveUtils
@@ -462,7 +698,9 @@ end # module ReflectionTest
 # Issue #18883, code_llvm/code_native for generated functions
 @generated f18883() = nothing
 @test !isempty(sprint(code_llvm, f18883, Tuple{}))
+@test !isempty(sprint(code_llvm, (typeof(f18883),)))
 @test !isempty(sprint(code_native, f18883, Tuple{}))
+@test !isempty(sprint(code_native, (typeof(f18883),)))
 
 ix86 = r"i[356]86"
 
@@ -483,9 +721,9 @@ if Sys.ARCH === :x86_64 || occursin(ix86, string(Sys.ARCH))
     output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
     @test !occursin(rgx, output)
 
-    code_native(buf, linear_foo, ())
-    output = String(take!(buf))
-    @test occursin(rgx, output)
+    code_native(buf, linear_foo, (), debuginfo = :none)
+    output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
+    @test !occursin(rgx, output)
 
     @testset "binary" begin
         # check the RET instruction (opcode: C3)
@@ -510,6 +748,9 @@ end
     @test_throws err @code_lowered ""
     @test_throws err @code_lowered 1
     @test_throws err @code_lowered 1.0
+
+    @test_throws "dot expressions are not lowered to a single function call" @which a .= 1 + 2
+    @test_throws "invalid keyword argument syntax" @eval @which round(1; digits(3))
 end
 
 using InteractiveUtils: editor
@@ -594,6 +835,10 @@ file, ln = functionloc(versioninfo, Tuple{})
 @test isfile(pathof(InteractiveUtils))
 @test isdir(pkgdir(InteractiveUtils))
 
+# compiler stdlib path updating
+file, ln = functionloc(Core.Compiler.tmeet, Tuple{Int, Float64})
+@test isfile(file)
+
 @testset "buildbot path updating" begin
     file, ln = functionloc(versioninfo, Tuple{})
     @test isfile(file)
@@ -640,10 +885,11 @@ end
 # macro options should accept both literals and variables
 let
     opt = false
-    @test !(first(@code_typed optimize=opt sum(1:10)).inferred)
+    @test length(first(@code_typed optimize=opt sum(1:10)).code) ==
+        length((@code_lowered sum(1:10)).code)
 end
 
-@testset "@time_imports" begin
+@testset "@time_imports, @trace_compile, @trace_dispatch" begin
     mktempdir() do dir
         cd(dir) do
             try
@@ -652,7 +898,16 @@ end
                 write(foo_file,
                     """
                     module Foo3242
-                    foo() = 1
+                    function foo()
+                        Base.Experimental.@force_compile
+                        foo(1)
+                    end
+                    foo(x) = x
+                    function bar()
+                        Base.Experimental.@force_compile
+                        bar(1)
+                    end
+                    bar(x) = x
                     end
                     """)
 
@@ -669,6 +924,27 @@ end
 
                 @test occursin("ms  Foo3242", String(buf))
 
+                fname = tempname()
+                f = open(fname, "w")
+                redirect_stderr(f) do
+                    @trace_compile @eval Foo3242.foo()
+                end
+                close(f)
+                buf = read(fname)
+                rm(fname)
+
+                @test occursin("ms =# precompile(", String(buf))
+
+                fname = tempname()
+                f = open(fname, "w")
+                redirect_stderr(f) do
+                    @trace_dispatch @eval Foo3242.bar()
+                end
+                close(f)
+                buf = read(fname)
+                rm(fname)
+
+                @test occursin("precompile(", String(buf))
             finally
                 filter!((≠)(dir), LOAD_PATH)
             end
@@ -697,9 +973,33 @@ let # `default_tt` should work with any function with one method
     end); true)
 end
 
+let # specifying calls as argtypes (incl. arg0) should be supported
+    @test (code_warntype(devnull, (typeof(function ()
+        sin(42)
+    end),)); true)
+    @test (code_warntype(devnull, (typeof(function (a::Int)
+        sin(42)
+    end), Int)); true)
+    @test (code_llvm(devnull, (typeof(function ()
+        sin(42)
+    end),)); true)
+    @test (code_llvm(devnull, (typeof(function (a::Int)
+        sin(42)
+    end), Int)); true)
+    @test (code_native(devnull, (typeof(function ()
+        sin(42)
+    end),)); true)
+    @test (code_native(devnull, (typeof(function (a::Int)
+        sin(42)
+    end), Int)); true)
+end
+
 @testset "code_llvm on opaque_closure" begin
     let ci = code_typed(+, (Int, Int))[1][1]
         ir = Core.Compiler.inflate_ir(ci)
+        ir.argtypes[1] = Tuple{}
+        @test ir.debuginfo.def === nothing
+        ir.debuginfo.def = Symbol(@__FILE__)
         oc = Core.OpaqueClosure(ir)
         @test (code_llvm(devnull, oc, Tuple{Int, Int}); true)
         let io = IOBuffer()
@@ -721,3 +1021,36 @@ end
 end
 
 @test Base.infer_effects(sin, (Int,)) == InteractiveUtils.@infer_effects sin(42)
+@test Base.infer_return_type(sin, (Int,)) == InteractiveUtils.@infer_return_type sin(42)
+@test Base.infer_exception_type(sin, (Int,)) == InteractiveUtils.@infer_exception_type sin(42)
+@test first(InteractiveUtils.@code_ircode sin(42)) isa Core.Compiler.IRCode
+@test first(InteractiveUtils.@code_ircode optimize_until="CC: INLINING" sin(42)) isa Core.Compiler.IRCode
+# Test.@inferred also uses `gen_call_with_extracted_types`
+@test Test.@inferred round(1.2) isa Float64
+@test Test.@inferred round(1.3; digits = 3) isa Float64
+# ensure proper inference of the macro output of `@inferred`
+@test Base.infer_return_type(x -> Test.@inferred(round(x)), (Float64,)) === Float64
+@test Base.infer_return_type(x -> Test.@inferred(round(x; digits = 3)), (Float64,)) === Float64
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(InteractiveUtils))
+end
+
+# issue https://github.com/JuliaIO/ImageMagick.jl/issues/235
+module OuterModule
+    module InternalModule
+        struct MyType
+            x::Int
+        end
+
+        Base.@deprecate_binding MyOldType MyType
+
+        export MyType
+    end
+    using .InternalModule
+    export MyType, MyOldType
+end # module
+@testset "Subtypes and deprecations" begin
+    using .OuterModule
+    @test_nowarn subtypes(Integer);
+end
diff --git a/stdlib/JuliaSyntaxHighlighting.version b/stdlib/JuliaSyntaxHighlighting.version
new file mode 100644
index 0000000000000..9fb676006add9
--- /dev/null
+++ b/stdlib/JuliaSyntaxHighlighting.version
@@ -0,0 +1,4 @@
+JULIASYNTAXHIGHLIGHTING_BRANCH = main
+JULIASYNTAXHIGHLIGHTING_SHA1 = 84fb1fd08824736de14aaa94265df756474e0bdf
+JULIASYNTAXHIGHLIGHTING_GIT_URL := https://github.com/julialang/JuliaSyntaxHighlighting.jl.git
+JULIASYNTAXHIGHLIGHTING_TAR_URL = https://api.github.com/repos/julialang/JuliaSyntaxHighlighting.jl/tarball/$1
diff --git a/stdlib/LLD_jll/Project.toml b/stdlib/LLD_jll/Project.toml
index 90d867ca0f7da..2133933023484 100644
--- a/stdlib/LLD_jll/Project.toml
+++ b/stdlib/LLD_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLD_jll"
 uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
-version = "15.0.7+5"
+version = "20.1.8+0"
 
 [deps]
 Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
@@ -9,8 +9,8 @@ libLLVM_jll = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.9"
-libLLVM_jll = "15.0.7"
+julia = "1.13"
+libLLVM_jll = "20.1.8"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/LLD_jll/src/LLD_jll.jl b/stdlib/LLD_jll/src/LLD_jll.jl
index 55ccec9cc4005..9b8365dddcf0b 100644
--- a/stdlib/LLD_jll/src/LLD_jll.jl
+++ b/stdlib/LLD_jll/src/LLD_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LLD_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LLVMLibUnwind_jll/Project.toml b/stdlib/LLVMLibUnwind_jll/Project.toml
index 36c24111d4d31..e102af311abec 100644
--- a/stdlib/LLVMLibUnwind_jll/Project.toml
+++ b/stdlib/LLVMLibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLVMLibUnwind_jll"
 uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
-version = "12.0.1+0"
+version = "19.1.4+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
index 5c4026291a673..c6e2750895c13 100644
--- a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
+++ b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
@@ -4,40 +4,34 @@
 
 baremodule LLVMLibUnwind_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
 
 export llvmlibunwind
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-llvmlibunwind_handle::Ptr{Cvoid} = C_NULL
+
 llvmlibunwind_path::String = ""
+const llvmlibunwind = LazyLibrary(BundledLazyLibraryPath("libunwind"))
 
-const llvmlibunwind = "libunwind"
+function eager_mode()
+    dlopen(llvmlibunwind)
+end
+is_available() = @static Sys.isapple() ? true : false
 
 function __init__()
-    # We only dlopen something on MacOS
-    @static if Sys.isapple()
-        global llvmlibunwind_handle = dlopen(llvmlibunwind)
-        global llvmlibunwind_path = dlpath(llvmlibunwind_handle)
-        global artifact_dir = dirname(Sys.BINDIR)
-        LIBPATH[] = dirname(llvmlibunwind_path)
-        push!(LIBPATH_list, LIBPATH[])
-    end
+    global llvmlibunwind_path = string(llvmlibunwind.path)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(llvmlibunwind_path)
+    push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = @static Sys.isapple() ? true : false
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_llvmlibunwind_path() = llvmlibunwind_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module LLVMLibUnwind_jll
diff --git a/stdlib/LLVMLibUnwind_jll/test/runtests.jl b/stdlib/LLVMLibUnwind_jll/test/runtests.jl
index e984593ab2c25..42afe50a875f6 100644
--- a/stdlib/LLVMLibUnwind_jll/test/runtests.jl
+++ b/stdlib/LLVMLibUnwind_jll/test/runtests.jl
@@ -1,16 +1,14 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test, Libdl
-using LLVMLibUnwind_jll: llvmlibunwind_handle
-
+using Test, Libdl, LLVMLibUnwind_jll
 @testset "LLVMLibUnwind_jll" begin
     if Sys.isapple()
-        @test dlsym(llvmlibunwind_handle, :unw_getcontext; throw_error=false) !== nothing
-        @test dlsym(llvmlibunwind_handle, :unw_init_local; throw_error=false) !== nothing
-        @test dlsym(llvmlibunwind_handle, :unw_init_local_dwarf; throw_error=false) !== nothing
-        @test dlsym(llvmlibunwind_handle, :unw_step; throw_error=false) !== nothing
-        @test dlsym(llvmlibunwind_handle, :unw_get_reg; throw_error=false) !== nothing
-        @test dlsym(llvmlibunwind_handle, :unw_set_reg; throw_error=false) !== nothing
-        @test dlsym(llvmlibunwind_handle, :unw_resume; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind, :unw_getcontext; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind, :unw_init_local; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind, :unw_init_local_dwarf; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind, :unw_step; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind, :unw_get_reg; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind, :unw_set_reg; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind, :unw_resume; throw_error=false) !== nothing
     end
 end
diff --git a/stdlib/LazyArtifacts.version b/stdlib/LazyArtifacts.version
new file mode 100644
index 0000000000000..8988e27bcb4ac
--- /dev/null
+++ b/stdlib/LazyArtifacts.version
@@ -0,0 +1,4 @@
+LAZYARTIFACTS_BRANCH = main
+LAZYARTIFACTS_SHA1 = e4cfc39598c238f75bdfdbdb3f82c9329a5af59c
+LAZYARTIFACTS_GIT_URL := https://github.com/JuliaPackaging/LazyArtifacts.jl.git
+LAZYARTIFACTS_TAR_URL = https://api.github.com/repos/JuliaPackaging/LazyArtifacts.jl/tarball/$1
diff --git a/stdlib/LazyArtifacts/Project.toml b/stdlib/LazyArtifacts/Project.toml
deleted file mode 100644
index ea9afc9d12dba..0000000000000
--- a/stdlib/LazyArtifacts/Project.toml
+++ /dev/null
@@ -1,12 +0,0 @@
-name = "LazyArtifacts"
-uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
-
-[deps]
-Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test"]
diff --git a/stdlib/LazyArtifacts/docs/src/index.md b/stdlib/LazyArtifacts/docs/src/index.md
deleted file mode 100644
index 9de6b219c6988..0000000000000
--- a/stdlib/LazyArtifacts/docs/src/index.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Lazy Artifacts
-
-```@meta
-DocTestSetup = :(using LazyArtifacts)
-```
-
-In order for a package to download artifacts lazily, `LazyArtifacts` must be
-explicitly listed as a dependency of that package.
-
-For further information on artifacts, see [Artifacts](@ref).
diff --git a/stdlib/LazyArtifacts/src/LazyArtifacts.jl b/stdlib/LazyArtifacts/src/LazyArtifacts.jl
deleted file mode 100644
index b783276ac6081..0000000000000
--- a/stdlib/LazyArtifacts/src/LazyArtifacts.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module LazyArtifacts
-
-# reexport the Artifacts API
-using Artifacts: Artifacts,
-       artifact_exists, artifact_path, artifact_meta, artifact_hash,
-       select_downloadable_artifacts, find_artifacts_toml, @artifact_str
-export artifact_exists, artifact_path, artifact_meta, artifact_hash,
-       select_downloadable_artifacts, find_artifacts_toml, @artifact_str
-
-# define a function for satisfying lazy Artifact downloads
-using Pkg.Artifacts: ensure_artifact_installed
-
-end
diff --git a/stdlib/LazyArtifacts/test/Artifacts.toml b/stdlib/LazyArtifacts/test/Artifacts.toml
deleted file mode 100644
index 4b715b74c128b..0000000000000
--- a/stdlib/LazyArtifacts/test/Artifacts.toml
+++ /dev/null
@@ -1,155 +0,0 @@
-[[HelloWorldC]]
-arch = "aarch64"
-git-tree-sha1 = "95fce80ec703eeb5f4270fef6821b38d51387499"
-os = "macos"
-
-    [[HelloWorldC.download]]
-    sha256 = "23f45918421881de8e9d2d471c70f6b99c26edd1dacd7803d2583ba93c8bbb28"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-apple-darwin.tar.gz"
-[[HelloWorldC]]
-arch = "aarch64"
-git-tree-sha1 = "1ccbaad776766366943fd5a66a8cbc9877ee8df9"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "82bca07ff25a75875936116ca977285160a2afcc4f58dd160c7b1600f55da655"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "aarch64"
-git-tree-sha1 = "dc43ab874611cfc26641741c31b8230276d7d664"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "36b7c554f1cb04d5282b991c66a10b2100085ac8deb2156bf52b4f7c4e406c04"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-musl.tar.gz"
-[[HelloWorldC]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "b7128521583d02d2dbe9c8de6fe156b79df781d9"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "5e094b9c6e4c6a77ecc8dfc2b841ac1f2157f6a81f4c47f1e0d3e9a04eec7945"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-gnueabihf.tar.gz"
-[[HelloWorldC]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "edb3893a154519d6786234f5c83994c34e11feed"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "0a2203f061ba2ef7ce4c452ec7874be3acc6db1efac8091f85d113c3404e6bb6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-musleabihf.tar.gz"
-[[HelloWorldC]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "a4392a4c8f834c97f9d8822ddfb1813d8674fa602eeaf04d6359c0a9e98478ec"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-gnueabihf.tar.gz"
-[[HelloWorldC]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "169c261b321c4dc95894cdd2db9d0d0caa84677f"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "ed1aacbf197a6c78988725a39defad130ed31a2258f8e7846f73b459821f21d3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-musleabihf.tar.gz"
-[[HelloWorldC]]
-arch = "i686"
-git-tree-sha1 = "fd35f9155dc424602d01fbf983eb76be3217a28f"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "048fcff5ff47a3cc1e84a2688935fcd658ad1c7e7c52c0e81fe88ce6c3697aba"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "i686"
-git-tree-sha1 = "8db14df0f1d2a3ed9c6a7b053a590ca6527eb95e"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "d521b4420392b8365de5ed0ef38a3b6c822665d7c257d3eef6f725c205bb3d78"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-musl.tar.gz"
-[[HelloWorldC]]
-arch = "i686"
-git-tree-sha1 = "56f82168947b8dc7bb98038f063209b9f864eaff"
-os = "windows"
-
-    [[HelloWorldC.download]]
-    sha256 = "de578cf5ee2f457e9ff32089cbe17d03704a929980beddf4c41f4c0eb32f19c6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-w64-mingw32.tar.gz"
-[[HelloWorldC]]
-arch = "powerpc64le"
-git-tree-sha1 = "9c8902b62f5b1aaa7c2839c804bed7c3a0912c7b"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "63ddbfbb6ea0cafef544cc25415e7ebee6ee0a69db0878d0d4e1ed27c0ae0ab5"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.powerpc64le-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
-os = "macos"
-
-    [[HelloWorldC.download]]
-    sha256 = "f5043338613672b12546c59359c7997c5381a9a60b86aeb951dee74de428d5e3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-apple-darwin.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "1ed3d81088f16e3a1fa4e3d4c4c509b8c117fecf"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "a18212e7984b08b23bec06e8bf9286a89b9fa2e8ee0dd46af3b852fe22013a4f"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "c04ef757b8bb773d17a0fd0ea396e52db1c7c385"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "7a3d1b09410989508774f00e073ea6268edefcaba7617fc5085255ec8e82555b"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-musl.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "5f7e7abf7d545a1aaa368f22e3e01ea0268870b1"
-os = "freebsd"
-
-    [[HelloWorldC.download]]
-    sha256 = "56aedffe38fe20294e93cfc2eb0a193c8e2ddda5a697b302e77ff48ac1195198"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-unknown-freebsd.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
-os = "windows"
-
-    [[HelloWorldC.download]]
-    sha256 = "aad77a16cbc9752f6ec62549a28c7e9f3f7f57919f6fa9fb924e0c669b11f8c4"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-w64-mingw32.tar.gz"
-
-[socrates]
-git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
-lazy = true
-
-    [[socrates.download]]
-    url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.gz"
-    sha256 = "e65d2f13f2085f2c279830e863292312a72930fee5ba3c792b14c33ce5c5cc58"
-
-    [[socrates.download]]
-    url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.bz2"
-    sha256 = "13fc17b97be41763b02cbb80e9d048302cec3bd3d446c2ed6e8210bddcd3ac76"
diff --git a/stdlib/LazyArtifacts/test/runtests.jl b/stdlib/LazyArtifacts/test/runtests.jl
deleted file mode 100644
index 1c8bbee269144..0000000000000
--- a/stdlib/LazyArtifacts/test/runtests.jl
+++ /dev/null
@@ -1,31 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using LazyArtifacts
-using Test
-
-mktempdir() do tempdir
-    LazyArtifacts.Artifacts.with_artifacts_directory(tempdir) do
-        redirect_stderr(devnull) do
-            socrates_dir = artifact"socrates"
-            @test isdir(socrates_dir)
-        end
-        ex = @test_throws ErrorException artifact"HelloWorldC"
-        @test startswith(ex.value.msg, "Artifact \"HelloWorldC\" was not found")
-    end
-end
-
-# Need to set depwarn flag before testing deprecations
-@test success(run(setenv(`$(Base.julia_cmd()) --depwarn=no --startup-file=no -e '
-    using Artifacts, Pkg
-    using Test
-    mktempdir() do tempdir
-        Artifacts.with_artifacts_directory(tempdir) do
-            redirect_stderr(devnull) do
-                socrates_dir = @test_logs(
-                        (:warn, "using Pkg instead of using LazyArtifacts is deprecated"),
-                        artifact"socrates")
-                @test isdir(socrates_dir)
-            end
-        end
-    end'`,
-    dir=@__DIR__)))
diff --git a/stdlib/LibCURL.version b/stdlib/LibCURL.version
index 216ab4e7aca22..78537e2aadae4 100644
--- a/stdlib/LibCURL.version
+++ b/stdlib/LibCURL.version
@@ -1,4 +1,4 @@
 LIBCURL_BRANCH = master
-LIBCURL_SHA1 = a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0
+LIBCURL_SHA1 = 9ea5c5d6f5b88615d9fe23379b7f951787b99fd3
 LIBCURL_GIT_URL := https://github.com/JuliaWeb/LibCURL.jl.git
 LIBCURL_TAR_URL = https://api.github.com/repos/JuliaWeb/LibCURL.jl/tarball/$1
diff --git a/stdlib/LibCURL_jll/Project.toml b/stdlib/LibCURL_jll/Project.toml
index 0ef46598b3118..dc561cb7fd475 100644
--- a/stdlib/LibCURL_jll/Project.toml
+++ b/stdlib/LibCURL_jll/Project.toml
@@ -1,14 +1,16 @@
 name = "LibCURL_jll"
 uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-version = "8.0.1+0"
+version = "8.17.0+0"
 
 [deps]
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-nghttp2_jll = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+OpenSSL_jll = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
+nghttp2_jll = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+Zstd_jll = "3161d3a3-bdf6-5164-811a-617609db77b4"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
@@ -17,4 +19,5 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 test = ["Test"]
 
 [compat]
-julia = "1.8"
+CompilerSupportLibraries_jll = "1.3.0"
+julia = "1.11"
diff --git a/stdlib/LibCURL_jll/src/LibCURL_jll.jl b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
index cd67bfac0006a..528ca92ffb9af 100644
--- a/stdlib/LibCURL_jll/src/LibCURL_jll.jl
+++ b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
@@ -3,44 +3,70 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/LibCURL_jll.jl
 
 baremodule LibCURL_jll
-using Base, Libdl, nghttp2_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+using Base, Libdl, nghttp2_jll, LibSSH2_jll, Zlib_jll, Zstd_jll
+if !Sys.iswindows()
+    using OpenSSL_jll
+end
+if Sys.iswindows() && Sys.WORD_SIZE == 32
+    using CompilerSupportLibraries_jll
+end
 
 export libcurl
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libcurl_handle::Ptr{Cvoid} = C_NULL
+
 libcurl_path::String = ""
+const libcurl = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libcurl-4.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libcurl.4.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libcurl.so.4")
+    else
+        error("LibCURL_jll: Library 'libcurl' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows()
+        if  Sys.WORD_SIZE == 32
+            LazyLibrary[libz, libzstd, libnghttp2, libssh2, libgcc_s]
+        else
+            LazyLibrary[libz, libzstd, libnghttp2, libssh2]
+        end
+    else
+        LazyLibrary[libz, libzstd, libnghttp2, libssh2, libssl, libcrypto]
+    end
+)
 
-if Sys.iswindows()
-    const libcurl = "libcurl-4.dll"
-elseif Sys.isapple()
-    const libcurl = "@rpath/libcurl.4.dylib"
-else
-    const libcurl = "libcurl.so.4"
+function eager_mode()
+    Zlib_jll.eager_mode()
+    Zstd_jll.eager_mode()
+    nghttp2_jll.eager_mode()
+    LibSSH2_jll.eager_mode()
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    @static if @isdefined OpenSSL_jll
+        OpenSSL_jll.eager_mode()
+    end
+    dlopen(libcurl)
 end
+is_available() = true
 
 function __init__()
-    global libcurl_handle = dlopen(libcurl)
-    global libcurl_path = dlpath(libcurl_handle)
+    global libcurl_path = string(libcurl.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libcurl_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libcurl_path() = libcurl_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module LibCURL_jll
diff --git a/stdlib/LibGit2/Project.toml b/stdlib/LibGit2/Project.toml
index da78f70fa1005..8432a32cd240b 100644
--- a/stdlib/LibGit2/Project.toml
+++ b/stdlib/LibGit2/Project.toml
@@ -1,8 +1,9 @@
 name = "LibGit2"
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
 
 [deps]
-Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+LibGit2_jll = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
 NetworkOptions = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
diff --git a/stdlib/LibGit2/docs/src/index.md b/stdlib/LibGit2/docs/src/index.md
index 3205c4c5d6987..aa4ebf2e784b6 100644
--- a/stdlib/LibGit2/docs/src/index.md
+++ b/stdlib/LibGit2/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/LibGit2/docs/src/index.md"
+```
+
 # LibGit2
 
 The LibGit2 module provides bindings to [libgit2](https://libgit2.org/), a portable C library that
diff --git a/stdlib/LibGit2/src/LibGit2.jl b/stdlib/LibGit2/src/LibGit2.jl
index 6a797937ccf0b..30d141be146d7 100644
--- a/stdlib/LibGit2/src/LibGit2.jl
+++ b/stdlib/LibGit2/src/LibGit2.jl
@@ -6,14 +6,15 @@ Interface to [libgit2](https://libgit2.org/).
 module LibGit2
 
 import Base: ==
-using Base: something, notnothing
-using Base64: base64decode
+using Base: something
 using NetworkOptions
 using Printf: @printf
 using SHA: sha1, sha256
 
 export with, GitRepo, GitConfig
 
+using LibGit2_jll
+
 const GITHUB_REGEX =
     r"^(?:(?:ssh://)?git@|git://|https://(?:[\w\.\+\-]+@)?)github.com[:/](([^/].+)/(.+?))(?:\.git)?$"i
 
@@ -55,7 +56,7 @@ struct State
 end
 
 """
-    head(pkg::AbstractString) -> String
+    head(pkg::AbstractString)::String
 
 Return current HEAD [`GitHash`](@ref) of
 the `pkg` repo as a string.
@@ -80,7 +81,7 @@ function need_update(repo::GitRepo)
 end
 
 """
-    iscommit(id::AbstractString, repo::GitRepo) -> Bool
+    iscommit(id::AbstractString, repo::GitRepo)::Bool
 
 Check if commit `id` (which is a [`GitHash`](@ref) in string form)
 is in the repository.
@@ -113,7 +114,7 @@ function iscommit(id::AbstractString, repo::GitRepo)
 end
 
 """
-    LibGit2.isdirty(repo::GitRepo, pathspecs::AbstractString=""; cached::Bool=false) -> Bool
+    LibGit2.isdirty(repo::GitRepo, pathspecs::AbstractString=""; cached::Bool=false)::Bool
 
 Check if there have been any changes to tracked files in the working tree (if
 `cached=false`) or the index (if `cached=true`).
@@ -167,7 +168,7 @@ function isdiff(repo::GitRepo, treeish::AbstractString, paths::AbstractString=""
 end
 
 """
-    diff_files(repo::GitRepo, branch1::AbstractString, branch2::AbstractString; kwarg...) -> Vector{AbstractString}
+    diff_files(repo::GitRepo, branch1::AbstractString, branch2::AbstractString; kwarg...)::Vector{AbstractString}
 
 Show which files have changed in the git repository `repo` between branches `branch1`
 and `branch2`.
@@ -223,7 +224,7 @@ function diff_files(repo::GitRepo, branch1::AbstractString, branch2::AbstractStr
 end
 
 """
-    is_ancestor_of(a::AbstractString, b::AbstractString, repo::GitRepo) -> Bool
+    is_ancestor_of(a::AbstractString, b::AbstractString, repo::GitRepo)::Bool
 
 Return `true` if `a`, a [`GitHash`](@ref) in string form, is an ancestor of
 `b`, a [`GitHash`](@ref) in string form.
@@ -261,15 +262,21 @@ The keyword arguments are:
   * `remoteurl::AbstractString=""`: the URL of `remote`. If not specified,
     will be assumed based on the given name of `remote`.
   * `refspecs=AbstractString[]`: determines properties of the fetch.
+  * `depth::Integer=0`: limit fetching to the specified number of commits from the tip
+    of each remote branch. `0` indicates a full fetch (the default).
+    Use `Consts.FETCH_DEPTH_UNSHALLOW` to fetch all missing data from a shallow clone.
+    Note: depth is, at the time of writing, only supported for network protocols (http, https, git, ssh), not for local filesystem paths.
+    (https://github.com/libgit2/libgit2/issues/6634)
   * `credentials=nothing`: provides credentials and/or settings when authenticating against
     a private `remote`.
   * `callbacks=Callbacks()`: user provided callbacks and payloads.
 
-Equivalent to `git fetch [<remoteurl>|<repo>] [<refspecs>]`.
+Equivalent to `git fetch [--depth <depth>] [<remoteurl>|<repo>] [<refspecs>]`.
 """
 function fetch(repo::GitRepo; remote::AbstractString="origin",
                remoteurl::AbstractString="",
                refspecs::Vector{<:AbstractString}=AbstractString[],
+               depth::Integer=0,
                credentials::Creds=nothing,
                callbacks::Callbacks=Callbacks())
     rmt = if isempty(remoteurl)
@@ -289,7 +296,12 @@ function fetch(repo::GitRepo; remote::AbstractString="origin",
 
     result = try
         remote_callbacks = RemoteCallbacks(callbacks)
-        fo = FetchOptions(callbacks=remote_callbacks)
+        @static if LibGit2.VERSION >= v"1.7.0"
+            fo = FetchOptions(callbacks=remote_callbacks, depth=Cuint(depth))
+        else
+            depth != 0 && throw(ArgumentError("Depth parameter for fetch requires libgit2 >= 1.7.0"))
+            fo = FetchOptions(callbacks=remote_callbacks)
+        end
         fetch(rmt, refspecs, msg="from $(url(rmt))", options=fo)
     catch err
         if isa(err, GitError) && err.code === Error.EAUTH
@@ -538,11 +550,16 @@ The keyword arguments are:
   * `remote_cb::Ptr{Cvoid}=C_NULL`: a callback which will be used to create the remote
     before it is cloned. If `C_NULL` (the default), no attempt will be made to create
     the remote - it will be assumed to already exist.
+  * `depth::Integer=0`: create a shallow clone with a history truncated to the
+    specified number of commits. `0` indicates a full clone (the default).
+    Use `Consts.FETCH_DEPTH_UNSHALLOW` to fetch all missing data from a shallow clone.
+    Note: shallow clones are, at the time of writing, only supported for network protocols (http, https, git, ssh), not for local filesystem paths.
+    (https://github.com/libgit2/libgit2/issues/6634)
   * `credentials::Creds=nothing`: provides credentials and/or settings when authenticating
     against a private repository.
   * `callbacks::Callbacks=Callbacks()`: user provided callbacks and payloads.
 
-Equivalent to `git clone [-b <branch>] [--bare] <repo_url> <repo_path>`.
+Equivalent to `git clone [-b <branch>] [--bare] [--depth <depth>] <repo_url> <repo_path>`.
 
 # Examples
 ```julia
@@ -551,12 +568,15 @@ repo1 = LibGit2.clone(repo_url, "test_path")
 repo2 = LibGit2.clone(repo_url, "test_path", isbare=true)
 julia_url = "https://github.com/JuliaLang/julia"
 julia_repo = LibGit2.clone(julia_url, "julia_path", branch="release-0.6")
+# Shallow clone with only the most recent commit
+shallow_repo = LibGit2.clone(repo_url, "shallow_path", depth=1)
 ```
 """
 function clone(repo_url::AbstractString, repo_path::AbstractString;
                branch::AbstractString="",
                isbare::Bool = false,
                remote_cb::Ptr{Cvoid} = C_NULL,
+               depth::Integer = 0,
                credentials::Creds=nothing,
                callbacks::Callbacks=Callbacks())
     cred_payload = reset!(CredentialPayload(credentials))
@@ -572,7 +592,12 @@ function clone(repo_url::AbstractString, repo_path::AbstractString;
     lbranch = Base.cconvert(Cstring, branch)
     GC.@preserve lbranch begin
         remote_callbacks = RemoteCallbacks(callbacks)
-        fetch_opts = FetchOptions(callbacks=remote_callbacks)
+        @static if LibGit2.VERSION >= v"1.7.0"
+            fetch_opts = FetchOptions(callbacks=remote_callbacks, depth=Cuint(depth))
+        else
+            depth != 0 && throw(ArgumentError("Shallow clone (depth parameter) requires libgit2 >= 1.7.0"))
+            fetch_opts = FetchOptions(callbacks=remote_callbacks)
+        end
         clone_opts = CloneOptions(
                     bare = Cint(isbare),
                     checkout_branch = isempty(lbranch) ? Cstring(C_NULL) : Base.unsafe_convert(Cstring, lbranch),
@@ -594,6 +619,44 @@ function clone(repo_url::AbstractString, repo_path::AbstractString;
     return repo
 end
 
+"""
+    connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION; kwargs...)
+
+Open a connection to a remote. `direction` can be either `DIRECTION_FETCH`
+or `DIRECTION_PUSH`.
+
+The keyword arguments are:
+  * `credentials::Creds=nothing`: provides credentials and/or settings when authenticating
+    against a private repository.
+  * `callbacks::Callbacks=Callbacks()`: user provided callbacks and payloads.
+"""
+function connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION;
+                 credentials::Creds=nothing,
+                 callbacks::Callbacks=Callbacks())
+    cred_payload = reset!(CredentialPayload(credentials))
+    if !haskey(callbacks, :credentials)
+        callbacks[:credentials] = (credentials_cb(), cred_payload)
+    elseif haskey(callbacks, :credentials) && credentials !== nothing
+        throw(ArgumentError(string(
+            "Unable to both use the provided `credentials` as a payload when the ",
+            "`callbacks` also contain a credentials payload.")))
+    end
+
+    remote_callbacks = RemoteCallbacks(callbacks)
+    try
+        connect(rmt, direction, remote_callbacks)
+    catch err
+        if isa(err, GitError) && err.code === Error.EAUTH
+            reject(cred_payload)
+        else
+            Base.shred!(cred_payload)
+        end
+        rethrow()
+    end
+    approve(cred_payload)
+    return rmt
+end
+
 """ git reset [<committish>] [--] <pathspecs>... """
 function reset!(repo::GitRepo, committish::AbstractString, pathspecs::AbstractString...)
     obj = GitObject(repo, isempty(committish) ? Consts.HEAD_FILE : committish)
@@ -688,7 +751,7 @@ function revcount(repo::GitRepo, commit1::AbstractString, commit2::AbstractStrin
 end
 
 """
-    merge!(repo::GitRepo; kwargs...) -> Bool
+    merge!(repo::GitRepo; kwargs...)::Bool
 
 Perform a git merge on the repository `repo`, merging commits
 with diverging history into the current branch. Return `true`
@@ -859,7 +922,7 @@ end
 
 
 """
-    authors(repo::GitRepo) -> Vector{Signature}
+    authors(repo::GitRepo)::Vector{Signature}
 
 Return all authors of commits to the `repo` repository.
 
@@ -892,7 +955,7 @@ function authors(repo::GitRepo)
 end
 
 """
-    snapshot(repo::GitRepo) -> State
+    snapshot(repo::GitRepo)::State
 
 Take a snapshot of the current state of the repository `repo`,
 storing the current HEAD, index, and any uncommitted work.
@@ -983,7 +1046,7 @@ function ensure_initialized()
 end
 
 @noinline function initialize()
-    @check ccall((:git_libgit2_init, :libgit2), Cint, ())
+    @check ccall((:git_libgit2_init, libgit2), Cint, ())
 
     cert_loc = NetworkOptions.ca_roots()
     cert_loc !== nothing && set_ssl_cert_locations(cert_loc)
@@ -991,7 +1054,7 @@ end
     atexit() do
         # refcount zero, no objects to be finalized
         if Threads.atomic_sub!(REFCOUNT, 1) == 1
-            ccall((:git_libgit2_shutdown, :libgit2), Cint, ())
+            ccall((:git_libgit2_shutdown, libgit2), Cint, ())
         end
     end
 end
@@ -1003,24 +1066,20 @@ function set_ssl_cert_locations(cert_loc)
     else # files, /dev/null, non-existent paths, etc.
         cert_file = cert_loc
     end
-    ret = @ccall "libgit2".git_libgit2_opts(
+    ret = @ccall libgit2.git_libgit2_opts(
         Consts.SET_SSL_CERT_LOCATIONS::Cint;
         cert_file::Cstring,
         cert_dir::Cstring)::Cint
     ret >= 0 && return ret
+    # On macOS and Windows LibGit2_jll is built without a TLS backend that supports
+    # certificate locations; don't throw on this expected error so we allow certificate
+    # location environment variables to be set for other purposes.
+    # We still try doing so to support other LibGit2 builds.
     err = Error.GitError(ret)
     err.class == Error.SSL &&
         err.msg == "TLS backend doesn't support certificate locations" ||
         throw(err)
-    var = nothing
-    for v in NetworkOptions.CA_ROOTS_VARS
-        haskey(ENV, v) && (var = v)
-    end
-    @assert var !== nothing # otherwise we shouldn't be here
-    msg = """
-    Your Julia is built with a SSL/TLS engine that libgit2 doesn't know how to configure to use a file or directory of certificate authority roots, but your environment specifies one via the $var variable. If you believe your system's root certificates are safe to use, you can `export JULIA_SSL_CA_ROOTS_PATH=""` in your environment to use those instead.
-    """
-    throw(Error.GitError(err.class, err.code, chomp(msg)))
+    return ret
 end
 
 """
@@ -1029,7 +1088,7 @@ end
 Sets the system tracing configuration to the specified level.
 """
 function trace_set(level::Union{Integer,Consts.GIT_TRACE_LEVEL}, cb=trace_cb())
-    @check @ccall "libgit2".git_trace_set(level::Cint, cb::Ptr{Cvoid})::Cint
+    @check @ccall libgit2.git_trace_set(level::Cint, cb::Ptr{Cvoid})::Cint
 end
 
 end # module
diff --git a/stdlib/LibGit2/src/blame.jl b/stdlib/LibGit2/src/blame.jl
index 3aa94e30200b4..e441189bdd423 100644
--- a/stdlib/LibGit2/src/blame.jl
+++ b/stdlib/LibGit2/src/blame.jl
@@ -11,9 +11,9 @@ which commits to probe - see [`BlameOptions`](@ref) for more information.
 function GitBlame(repo::GitRepo, path::AbstractString; options::BlameOptions=BlameOptions())
     ensure_initialized()
     blame_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_blame_file, :libgit2), Cint,
+    @check ccall((:git_blame_file, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{BlameOptions}),
-                   blame_ptr_ptr, repo.ptr, path, Ref(options))
+                   blame_ptr_ptr, repo, path, Ref(options))
     return GitBlame(repo, blame_ptr_ptr[])
 end
 
@@ -27,7 +27,7 @@ that function later.
 """
 function counthunks(blame::GitBlame)
     ensure_initialized()
-    return ccall((:git_blame_get_hunk_count, :libgit2), Int32, (Ptr{Cvoid},), blame.ptr)
+    return ccall((:git_blame_get_hunk_count, libgit2), Int32, (Ptr{Cvoid},), blame)
 end
 
 function Base.getindex(blame::GitBlame, i::Integer)
@@ -36,7 +36,7 @@ function Base.getindex(blame::GitBlame, i::Integer)
     end
     ensure_initialized()
     GC.@preserve blame begin
-        hunk_ptr = ccall((:git_blame_get_hunk_byindex, :libgit2),
+        hunk_ptr = ccall((:git_blame_get_hunk_byindex, libgit2),
                           Ptr{BlameHunk},
                           (Ptr{Cvoid}, Csize_t), blame.ptr, i-1)
         elem = unsafe_load(hunk_ptr)
diff --git a/stdlib/LibGit2/src/blob.jl b/stdlib/LibGit2/src/blob.jl
index efd7a14c9c6f7..914f8e170fb1c 100644
--- a/stdlib/LibGit2/src/blob.jl
+++ b/stdlib/LibGit2/src/blob.jl
@@ -2,11 +2,11 @@
 
 function Base.length(blob::GitBlob)
     ensure_initialized()
-    return ccall((:git_blob_rawsize, :libgit2), Int64, (Ptr{Cvoid},), blob.ptr)
+    return ccall((:git_blob_rawsize, libgit2), Int64, (Ptr{Cvoid},), blob)
 end
 
 """
-    rawcontent(blob::GitBlob) -> Vector{UInt8}
+    rawcontent(blob::GitBlob)::Vector{UInt8}
 
 Fetch the *raw* contents of the [`GitBlob`](@ref) `blob`. This is an
 `Array` containing the contents of the blob, which may be binary or may be Unicode.
@@ -20,12 +20,12 @@ is binary and not valid Unicode.
 """
 function rawcontent(blob::GitBlob)
     ensure_initialized()
-    ptr = ccall((:git_blob_rawcontent, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob.ptr)
+    ptr = ccall((:git_blob_rawcontent, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob)
     copy(unsafe_wrap(Array, ptr, (length(blob),), own = false))
 end
 
 """
-    content(blob::GitBlob) -> String
+    content(blob::GitBlob)::String
 
 Fetch the contents of the [`GitBlob`](@ref) `blob`. If the `blob` contains
 binary data (which can be determined using [`isbinary`](@ref)),
@@ -39,7 +39,7 @@ function content(blob::GitBlob)
 end
 
 """
-    isbinary(blob::GitBlob) -> Bool
+    isbinary(blob::GitBlob)::Bool
 
 Use a heuristic to guess if a file is binary: searching for NULL bytes and
 looking for a reasonable ratio of printable to non-printable characters among
@@ -47,7 +47,7 @@ the first 8000 bytes.
 """
 function isbinary(blob::GitBlob)
     ensure_initialized()
-    bin_flag = ccall((:git_blob_is_binary, :libgit2), Cint, (Ptr{Cvoid},), blob.ptr)
+    bin_flag = ccall((:git_blob_is_binary, libgit2), Cint, (Ptr{Cvoid},), blob)
     return bin_flag == 1
 end
 
@@ -67,9 +67,9 @@ id = LibGit2.addblob!(repo, blob_file)
 function addblob!(repo::GitRepo, path::AbstractString)
     ensure_initialized()
     id_ref = Ref{GitHash}()
-    @check ccall((:git_blob_create_from_disk, :libgit2), Cint,
+    @check ccall((:git_blob_create_from_disk, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
-                 id_ref, repo.ptr, path)
+                 id_ref, repo, path)
     return id_ref[]
 end
 
diff --git a/stdlib/LibGit2/src/callbacks.jl b/stdlib/LibGit2/src/callbacks.jl
index 3bc6463140d5f..2f48278e985a1 100644
--- a/stdlib/LibGit2/src/callbacks.jl
+++ b/stdlib/LibGit2/src/callbacks.jl
@@ -9,7 +9,7 @@ function mirror_callback(remote::Ptr{Ptr{Cvoid}}, repo_ptr::Ptr{Cvoid},
     ensure_initialized()
     # Create the remote with a mirroring url
     fetch_spec = "+refs/*:refs/*"
-    err = ccall((:git_remote_create_with_fetchspec, :libgit2), Cint,
+    err = ccall((:git_remote_create_with_fetchspec, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Cstring),
                 remote, repo_ptr, name, url, fetch_spec)
     err != 0 && return Cint(err)
@@ -26,7 +26,7 @@ function mirror_callback(remote::Ptr{Ptr{Cvoid}}, repo_ptr::Ptr{Cvoid},
 end
 
 """
-    LibGit2.is_passphrase_required(private_key) -> Bool
+    LibGit2.is_passphrase_required(private_key)::Bool
 
 Return `true` if the `private_key` file requires a passphrase, `false` otherwise.
 """
@@ -43,7 +43,7 @@ end
 function user_abort()
     ensure_initialized()
     # Note: Potentially it could be better to just throw a Julia error.
-    ccall((:giterr_set_str, :libgit2), Cvoid,
+    ccall((:git_error_set_str, libgit2), Cvoid,
           (Cint, Cstring), Cint(Error.Callback),
           "Aborting, user cancelled credential request.")
     return Cint(Error.EUSER)
@@ -51,7 +51,7 @@ end
 
 function prompt_limit()
     ensure_initialized()
-    ccall((:giterr_set_str, :libgit2), Cvoid,
+    ccall((:git_error_set_str, libgit2), Cvoid,
           (Cint, Cstring), Cint(Error.Callback),
           "Aborting, maximum number of prompts reached.")
     return Cint(Error.EAUTH)
@@ -59,7 +59,7 @@ end
 
 function exhausted_abort()
     ensure_initialized()
-    ccall((:giterr_set_str, :libgit2), Cvoid,
+    ccall((:git_error_set_str, libgit2), Cvoid,
           (Cint, Cstring), Cint(Error.Callback),
           "All authentication methods have failed.")
     return Cint(Error.EAUTH)
@@ -79,7 +79,7 @@ function authenticate_ssh(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPayload,
 
     # first try ssh-agent if credentials support its usage
     if p.use_ssh_agent && username_ptr != Cstring(C_NULL) && (!revised || !isfilled(cred))
-        err = ccall((:git_cred_ssh_key_from_agent, :libgit2), Cint,
+        err = ccall((:git_cred_ssh_key_from_agent, libgit2), Cint,
                     (Ptr{Ptr{Cvoid}}, Cstring), libgit2credptr, username_ptr)
 
         p.use_ssh_agent = false  # use ssh-agent only one time
@@ -175,7 +175,7 @@ function authenticate_ssh(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPayload,
     if !revised
         return exhausted_abort()
     end
-    return ccall((:git_cred_ssh_key_new, :libgit2), Cint,
+    return ccall((:git_cred_ssh_key_new, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring, Cstring, Cstring, Cstring),
                  libgit2credptr, cred.user, cred.pubkey, cred.prvkey, cred.pass)
 end
@@ -195,9 +195,9 @@ function authenticate_userpass(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPay
     if p.use_git_helpers && (!revised || !isfilled(cred))
         git_cred = GitCredential(p.config, p.url)
 
-         # Use `deepcopy` to ensure shredding the `git_cred` does not shred the `cred`s copy
+         # Use `copy` to ensure shredding the `git_cred` does not shred the `cred`s copy
         cred.user = something(git_cred.username, "")
-        cred.pass = deepcopy(something(git_cred.password, ""))
+        cred.pass = git_cred.password !== nothing ? copy(git_cred.password) : ""
         Base.shred!(git_cred)
         revised = true
 
@@ -235,14 +235,14 @@ function authenticate_userpass(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPay
         return exhausted_abort()
     end
 
-    return ccall((:git_cred_userpass_plaintext_new, :libgit2), Cint,
+    return ccall((:git_cred_userpass_plaintext_new, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring, Cstring),
                  libgit2credptr, cred.user, cred.pass)
 end
 
 
 """
-    credential_callback(...) -> Cint
+    credential_callback(...)::Cint
 
 A LibGit2 credential callback function which provides different credential acquisition
 functionality w.r.t. a connection protocol. The `payload_ptr` is required to contain a
@@ -292,7 +292,7 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
             cred = explicit
 
             # Copy explicit credentials to avoid mutating approved credentials.
-            # invalidation fix from cred being non-inferrable
+            # invalidation fix from cred being non-inferable
             p.credential = Base.invokelatest(deepcopy, cred)
 
             if isa(cred, SSHCredential)
@@ -307,7 +307,7 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
 
             # Perform a deepcopy as we do not want to mutate approved cached credentials
             if haskey(cache, cred_id)
-                # invalidation fix from cache[cred_id] being non-inferrable
+                # invalidation fix from cache[cred_id] being non-inferable
                 p.credential = Base.invokelatest(deepcopy, cache[cred_id])
             end
         end
@@ -339,7 +339,7 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
     if err == 0
         if p.explicit !== nothing
             ensure_initialized()
-            ccall((:giterr_set_str, :libgit2), Cvoid, (Cint, Cstring), Cint(Error.Callback),
+            ccall((:git_error_set_str, libgit2), Cvoid, (Cint, Cstring), Cint(Error.Callback),
                   "The explicitly provided credential is incompatible with the requested " *
                   "authentication methods.")
         end
diff --git a/stdlib/LibGit2/src/commit.jl b/stdlib/LibGit2/src/commit.jl
index 5d3c666af4bbb..ceb69d64ad94b 100644
--- a/stdlib/LibGit2/src/commit.jl
+++ b/stdlib/LibGit2/src/commit.jl
@@ -14,8 +14,8 @@ function message(c::GitCommit, raw::Bool=false)
     ensure_initialized()
     GC.@preserve c begin
         local msg_ptr::Cstring
-        msg_ptr = raw ? ccall((:git_commit_message_raw, :libgit2), Cstring, (Ptr{Cvoid},), c.ptr) :
-                        ccall((:git_commit_message, :libgit2), Cstring, (Ptr{Cvoid},), c.ptr)
+        msg_ptr = raw ? ccall((:git_commit_message_raw, libgit2), Cstring, (Ptr{Cvoid},), c.ptr) :
+                        ccall((:git_commit_message, libgit2), Cstring, (Ptr{Cvoid},), c.ptr)
         if msg_ptr == C_NULL
             return nothing
         end
@@ -33,7 +33,7 @@ the person who made changes to the relevant file(s). See also [`committer`](@ref
 function author(c::GitCommit)
     ensure_initialized()
     GC.@preserve c begin
-        ptr = ccall((:git_commit_author, :libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
+        ptr = ccall((:git_commit_author, libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
         @assert ptr != C_NULL
         sig = Signature(ptr)
     end
@@ -51,7 +51,7 @@ a `committer` who committed it.
 function committer(c::GitCommit)
     ensure_initialized()
     GC.@preserve c begin
-        ptr = ccall((:git_commit_committer, :libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
+        ptr = ccall((:git_commit_committer, libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
         sig = Signature(ptr)
     end
     return sig
@@ -73,21 +73,23 @@ function commit(repo::GitRepo,
     ensure_initialized()
     commit_id_ptr = Ref(GitHash())
     nparents = length(parents)
-    parentptrs = Ptr{Cvoid}[c.ptr for c in parents]
-    @check ccall((:git_commit_create, :libgit2), Cint,
-                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{UInt8},
-                  Ptr{SignatureStruct}, Ptr{SignatureStruct},
-                  Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid},
-                  Csize_t, Ptr{Ptr{Cvoid}}),
-                 commit_id_ptr, repo.ptr, isempty(refname) ? C_NULL : refname,
-                 author.ptr, committer.ptr,
-                 C_NULL, msg, tree.ptr,
-                 nparents, nparents > 0 ? parentptrs : C_NULL)
+    GC.@preserve parents begin
+        parentptrs = Ptr{Cvoid}[c.ptr for c in parents]
+        @check ccall((:git_commit_create, libgit2), Cint,
+                     (Ptr{GitHash}, Ptr{Cvoid}, Ptr{UInt8},
+                      Ptr{SignatureStruct}, Ptr{SignatureStruct},
+                      Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid},
+                      Csize_t, Ptr{Ptr{Cvoid}}),
+                     commit_id_ptr, repo, isempty(refname) ? C_NULL : refname,
+                     author, committer,
+                     C_NULL, msg, tree,
+                     nparents, nparents > 0 ? parentptrs : C_NULL)
+    end
     return commit_id_ptr[]
 end
 
 """
-    commit(repo::GitRepo, msg::AbstractString; kwargs...) -> GitHash
+    commit(repo::GitRepo, msg::AbstractString; kwargs...)::GitHash
 
 Wrapper around [`git_commit_create`](https://libgit2.org/libgit2/#HEAD/group/commit/git_commit_create).
 Create a commit in the repository `repo`. `msg` is the commit message. Return the OID of the new commit.
@@ -147,3 +149,45 @@ function commit(repo::GitRepo, msg::AbstractString;
     end
     return commit_id
 end
+
+"""
+    parentcount(c::GitCommit)
+
+Get the number of parents of this commit.
+
+See also [`parent`](@ref), [`parent_id`](@ref).
+"""
+parentcount(c::GitCommit) =
+    Int(ccall((:git_commit_parentcount, libgit2), Cuint, (Ptr{Cvoid},), c))
+
+"""
+    parent(c::GitCommit, n)
+
+Get the `n`-th (1-based) parent of the commit.
+
+See also [`parentcount`](@ref), [`parent_id`](@ref).
+"""
+function parent(c::GitCommit, n)
+    ptr_ref = Ref{Ptr{Cvoid}}()
+    @check ccall((:git_commit_parent, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cuint), ptr_ref, c, n - 1)
+    return GitCommit(c.owner, ptr_ref[])
+end
+
+"""
+    parent_id(c::GitCommit, n)
+
+Get the oid of the `n`-th (1-based) parent for a commit.
+
+See also [`parentcount`](@ref), [`parent`](@ref).
+"""
+function parent_id(c::GitCommit, n)
+    oid_ptr = ccall((:git_commit_parent_id, libgit2), Ptr{GitHash},
+                    (Ptr{Cvoid}, Cuint), c, n - 1)
+    if oid_ptr == C_NULL
+        # 0-based indexing mimicking the error message from libgit2
+        throw(GitError(Error.Invalid, Error.ENOTFOUND,
+                       "parent $(n - 1) does not exist"))
+    end
+    return unsafe_load(oid_ptr)
+end
diff --git a/stdlib/LibGit2/src/config.jl b/stdlib/LibGit2/src/config.jl
index a54cd352aa063..0bee705259ca6 100644
--- a/stdlib/LibGit2/src/config.jl
+++ b/stdlib/LibGit2/src/config.jl
@@ -13,7 +13,7 @@ function GitConfig(path::AbstractString,
     ensure_initialized()
     # create new config object
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_new, :libgit2), Cint, (Ptr{Ptr{Cvoid}},), cfg_ptr_ptr)
+    @check ccall((:git_config_new, libgit2), Cint, (Ptr{Ptr{Cvoid}},), cfg_ptr_ptr)
     cfg = GitConfig(cfg_ptr_ptr[])
     try
         addfile(cfg, path, level, repo, force)
@@ -34,8 +34,8 @@ used.
 function GitConfig(repo::GitRepo)
     ensure_initialized()
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_config, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), cfg_ptr_ptr, repo.ptr)
+    @check ccall((:git_repository_config, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), cfg_ptr_ptr, repo)
     return GitConfig(repo, cfg_ptr_ptr[])
 end
 
@@ -49,16 +49,16 @@ options outside a specific git repository.
 function GitConfig(level::Consts.GIT_CONFIG = Consts.CONFIG_LEVEL_DEFAULT)
     ensure_initialized()
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_open_default, :libgit2), Cint,
+    @check ccall((:git_config_open_default, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}},), cfg_ptr_ptr)
     cfg = GitConfig(cfg_ptr_ptr[])
     if level != Consts.CONFIG_LEVEL_DEFAULT
         glb_cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
         tmpcfg = cfg
         try
-            @check ccall((:git_config_open_level, :libgit2), Cint,
+            @check ccall((:git_config_open_level, libgit2), Cint,
                          (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint),
-                          glb_cfg_ptr_ptr, cfg.ptr, Cint(level))
+                          glb_cfg_ptr_ptr, cfg, Cint(level))
             cfg = GitConfig(glb_cfg_ptr_ptr[])
         finally
             close(tmpcfg)
@@ -90,22 +90,22 @@ function addfile(cfg::GitConfig, path::AbstractString,
                  force::Bool=false)
     ensure_initialized()
     @static if LibGit2.VERSION >= v"0.27.0"
-        @check ccall((:git_config_add_file_ondisk, :libgit2), Cint,
-                     (Ptr{Ptr{Cvoid}}, Cstring, Cint, Ptr{Cvoid}, Cint),
-                     cfg.ptr, path, Cint(level), isa(repo, GitRepo) ? repo.ptr : C_NULL, Cint(force))
+        @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
+                     (Ptr{Cvoid}, Cstring, Cint, Ptr{Cvoid}, Cint),
+                     cfg, path, Cint(level), isa(repo, GitRepo) ? repo : C_NULL, Cint(force))
     else
         repo === nothing || error("repo argument is not supported in this version of LibGit2")
-        @check ccall((:git_config_add_file_ondisk, :libgit2), Cint,
-                     (Ptr{Ptr{Cvoid}}, Cstring, Cint, Cint),
-                     cfg.ptr, path, Cint(level), Cint(force))
+        @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
+                     (Ptr{Cvoid}, Cstring, Cint, Cint),
+                     cfg, path, Cint(level), Cint(force))
     end
 end
 
 function get(::Type{<:AbstractString}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     buf_ref = Ref(Buffer())
-    @check ccall((:git_config_get_string_buf, :libgit2), Cint,
-                 (Ptr{Buffer}, Ptr{Cvoid}, Cstring), buf_ref, c.ptr, name)
+    @check ccall((:git_config_get_string_buf, libgit2), Cint,
+                 (Ptr{Buffer}, Ptr{Cvoid}, Cstring), buf_ref, c, name)
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -115,24 +115,24 @@ end
 function get(::Type{Bool}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
-    @check ccall((:git_config_get_bool, :libgit2), Cint,
-          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+    @check ccall((:git_config_get_bool, libgit2), Cint,
+          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return Bool(val_ptr[])
 end
 
 function get(::Type{Int32}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
-    @check ccall((:git_config_get_int32, :libgit2), Cint,
-          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+    @check ccall((:git_config_get_int32, libgit2), Cint,
+          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return val_ptr[]
 end
 
 function get(::Type{Int64}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cintmax_t(0))
-    @check ccall((:git_config_get_int64, :libgit2), Cint,
-          (Ptr{Cintmax_t}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+    @check ccall((:git_config_get_int64, libgit2), Cint,
+          (Ptr{Cintmax_t}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return val_ptr[]
 end
 
@@ -164,69 +164,69 @@ end
 
 function set!(c::GitConfig, name::AbstractString, value::AbstractString)
     ensure_initialized()
-    @check ccall((:git_config_set_string, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cstring), c.ptr, name, value)
+    @check ccall((:git_config_set_string, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring, Cstring), c, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Bool)
     ensure_initialized()
     bval = Int32(value)
-    @check ccall((:git_config_set_bool, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, bval)
+    @check ccall((:git_config_set_bool, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring, Cint), c, name, bval)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int32)
     ensure_initialized()
-    @check ccall((:git_config_set_int32, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, value)
+    @check ccall((:git_config_set_int32, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring, Cint), c, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int64)
     ensure_initialized()
-    @check ccall((:git_config_set_int64, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cintmax_t), c.ptr, name, value)
+    @check ccall((:git_config_set_int64, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring, Cintmax_t), c, name, value)
 end
 
 function GitConfigIter(cfg::GitConfig)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_iterator_new, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), ci_ptr, cfg.ptr)
+    @check ccall((:git_config_iterator_new, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), ci_ptr, cfg)
     return GitConfigIter(ci_ptr[])
 end
 
 function GitConfigIter(cfg::GitConfig, name::AbstractString)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_multivar_iterator_new, :libgit2), Cint,
+    @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                  ci_ptr, cfg.ptr, name, C_NULL)
+                  ci_ptr, cfg, name, C_NULL)
     return GitConfigIter(ci_ptr[])
 end
 
 function GitConfigIter(cfg::GitConfig, name::AbstractString, value::Regex)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_multivar_iterator_new, :libgit2), Cint,
+    @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                  ci_ptr, cfg.ptr, name, value.pattern)
+                  ci_ptr, cfg, name, value.pattern)
     return GitConfigIter(ci_ptr[])
 end
 
 function GitConfigIter(cfg::GitConfig, name::Regex)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_iterator_glob_new, :libgit2), Cint,
+    @check ccall((:git_config_iterator_glob_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                  ci_ptr, cfg.ptr, name.pattern)
+                  ci_ptr, cfg, name.pattern)
     return GitConfigIter(ci_ptr[])
 end
 
 function Base.iterate(ci::GitConfigIter, state=nothing)
     ensure_initialized()
     entry_ptr_ptr = Ref{Ptr{ConfigEntry}}(C_NULL)
-    err = ccall((:git_config_next, :libgit2), Cint,
-                 (Ptr{Ptr{ConfigEntry}}, Ptr{Cvoid}), entry_ptr_ptr, ci.ptr)
+    err = ccall((:git_config_next, libgit2), Cint,
+                 (Ptr{Ptr{ConfigEntry}}, Ptr{Cvoid}), entry_ptr_ptr, ci)
     if err == Cint(Error.GIT_OK)
         return (unsafe_load(entry_ptr_ptr[]), nothing)
     elseif err == Cint(Error.ITEROVER)
diff --git a/stdlib/LibGit2/src/consts.jl b/stdlib/LibGit2/src/consts.jl
index f3a460108db6b..1a523b381982b 100644
--- a/stdlib/LibGit2/src/consts.jl
+++ b/stdlib/LibGit2/src/consts.jl
@@ -2,7 +2,7 @@
 
 module Consts
 
-import ..LibGit2: version, ensure_initialized
+import ..LibGit2: version
 
 const HEAD_FILE  = "HEAD"
 const FETCH_HEAD  = "FETCH_HEAD"
@@ -10,12 +10,15 @@ const REMOTE_ORIGIN = "origin"
 
 # objs
 @enum(OBJECT,
-      OBJ_ANY    = -2,
-      OBJ_BAD    = -1,
-      OBJ_COMMIT = 1,
-      OBJ_TREE   = 2,
-      OBJ_BLOB   = 3,
-      OBJ_TAG    = 4)
+      OBJ_ANY       = -2,
+      OBJ_BAD       = -1,
+      OBJ_COMMIT    = 1,
+      OBJ_TREE      = 2,
+      OBJ_BLOB      = 3,
+      OBJ_TAG       = 4,
+      OBJ_OFS_DELTA = 6,
+      OBJ_REF_DELTA = 7)
+const OBJ_INVALID = OBJ_BAD
 
 #revwalk
 const SORT_NONE        = Cint(0)
@@ -26,8 +29,10 @@ const SORT_REVERSE     = Cint(1 << 2)
 # refs
 const REF_INVALID  = Cint(0)
 const REF_OID      = Cint(1)
+const REF_DIRECT   = REF_OID
 const REF_SYMBOLIC = Cint(2)
 const REF_LISTALL  = REF_OID | REF_SYMBOLIC
+const REF_ALL      = REF_LISTALL
 
 # blame
 const BLAME_NORMAL                          = Cuint(0)
@@ -36,10 +41,11 @@ const BLAME_TRACK_COPIES_SAME_COMMIT_MOVES  = Cuint(1 << 1)
 const BLAME_TRACK_COPIES_SAME_COMMIT_COPIES = Cuint(1 << 2)
 const BLAME_TRACK_COPIES_ANY_COMMIT_COPIES  = Cuint(1 << 3)
 const BLAME_FIRST_PARENT                    = Cuint(1 << 4)
+const BLAME_USE_MAILMAP                     = Cuint(1 << 5)
+const BLAME_IGNORE_WHITESPACE               = Cuint(1 << 6)
 
 # checkout
-const CHECKOUT_NONE                    = Cuint(0)
-const CHECKOUT_SAFE                    = Cuint(1 << 0)
+const CHECKOUT_SAFE                    = Cuint(0)
 const CHECKOUT_FORCE                   = Cuint(1 << 1)
 const CHECKOUT_RECREATE_MISSING        = Cuint(1 << 2)
 const CHECKOUT_ALLOW_CONFLICTS         = Cuint(1 << 4)
@@ -57,6 +63,10 @@ const CHECKOUT_DONT_OVERWRITE_IGNORED  = Cuint(1 << 19)
 const CHECKOUT_CONFLICT_STYLE_MERGE    = Cuint(1 << 20)
 const CHECKOUT_CONFLICT_STYLE_DIFF3    = Cuint(1 << 21)
 const CHECKOUT_DONT_REMOVE_EXISTING    = Cuint(1 << 22)
+const CHECKOUT_DONT_WRITE_INDEX        = Cuint(1 << 23)
+const CHECKOUT_DRY_RUN                 = Cuint(1 << 24)
+const CHECKOUT_CONFLICT_STYLE_ZDIFF3   = Cuint(1 << 25)
+const CHECKOUT_NONE                    = Cuint(1 << 30)
 
 const CHECKOUT_UPDATE_SUBMODULES       = Cuint(1 << 16)
 const CHECKOUT_UPDATE_SUBMODULES_IF_CHANGED = Cuint(1 << 17)
@@ -87,6 +97,11 @@ const DIFF_IGNORE_CASE                = Cuint(1 << 10)
 const DIFF_DISABLE_PATHSPEC_MATCH     = Cuint(1 << 12)
 const DIFF_SKIP_BINARY_CHECK          = Cuint(1 << 13)
 const DIFF_ENABLE_FAST_UNTRACKED_DIRS = Cuint(1 << 14)
+const DIFF_UPDATE_INDEX               = Cuint(1 << 15)
+const DIFF_INCLUDE_UNREADABLE         = Cuint(1 << 16)
+const DIFF_INCLUDE_UNREADABLE_AS_UNTRACKED = Cuint(1 << 17)
+const DIFF_INDENT_HEURISTIC           = Cuint(1 << 18)
+const DIFF_IGNORE_BLANK_LINES         = Cuint(1 << 19)
 
 const DIFF_FORCE_TEXT               = Cuint(1 << 20)
 const DIFF_FORCE_BINARY             = Cuint(1 << 21)
@@ -97,16 +112,20 @@ const DIFF_SHOW_UNTRACKED_CONTENT   = Cuint(1 << 25)
 const DIFF_SHOW_UNMODIFIED          = Cuint(1 << 26)
 const DIFF_PATIENCE                 = Cuint(1 << 28)
 const DIFF_MINIMAL                  = Cuint(1 << 29)
+const DIFF_SHOW_BINARY              = Cuint(1 << 30)
 
 const DIFF_FLAG_BINARY     = Cuint(1 << 0)
 const DIFF_FLAG_NOT_BINARY = Cuint(1 << 1)
 const DIFF_FLAG_VALID_OID  = Cuint(1 << 2)
+const DIFF_FLAG_EXISTS     = Cuint(1 << 3)
+const DIFF_FLAG_VALID_SIZE = Cuint(1 << 4)
 
 const DIFF_FORMAT_PATCH        = Cuint(1)
 const DIFF_FORMAT_PATCH_HEADER = Cuint(2)
 const DIFF_FORMAT_RAW          = Cuint(3)
 const DIFF_FORMAT_NAME_ONLY    = Cuint(4)
 const DIFF_FORMAT_NAME_STATUS  = Cuint(5)
+const DIFF_FORMAT_PATCH_ID     = Cuint(6)
 
 @enum(DELTA_STATUS, DELTA_UNMODIFIED = Cint(0),
                     DELTA_ADDED      = Cint(1),
@@ -116,7 +135,9 @@ const DIFF_FORMAT_NAME_STATUS  = Cuint(5)
                     DELTA_COPIED     = Cint(5),
                     DELTA_IGNORED    = Cint(6),
                     DELTA_UNTRACKED  = Cint(7),
-                    DELTA_TYPECHANGE = Cint(8))
+                    DELTA_TYPECHANGE = Cint(8),
+                    DELTA_UNREADABLE = Cint(9),
+                    DELTA_CONFLICTED = Cint(10))
 
 # index
 const IDXENTRY_NAMEMASK   = (0x0fff)
@@ -165,7 +186,8 @@ const INDEX_STAGE_ANY = Cint(-1)
 @enum(GIT_MERGE, MERGE_FIND_RENAMES     = 1 << 0,
                  MERGE_FAIL_ON_CONFLICT = 1 << 1,
                  MERGE_SKIP_REUC        = 1 << 2,
-                 MERGE_NO_RECURSIVE     = 1 << 3)
+                 MERGE_NO_RECURSIVE     = 1 << 3,
+                 MERGE_VIRTUAL_BASE     = 1 << 4)
 
 @enum(GIT_MERGE_FILE, MERGE_FILE_DEFAULT                  = 0,       # Defaults
                       MERGE_FILE_STYLE_MERGE              = 1 << 0,  # Create standard conflicted merge files
@@ -175,7 +197,13 @@ const INDEX_STAGE_ANY = Cint(-1)
                       MERGE_FILE_IGNORE_WHITESPACE_CHANGE = 1 << 4,  # Ignore changes in amount of whitespace
                       MERGE_FILE_IGNORE_WHITESPACE_EOL    = 1 << 5,  # Ignore whitespace at end of line
                       MERGE_FILE_DIFF_PATIENCE            = 1 << 6,  # Use the "patience diff" algorithm
-                      MERGE_FILE_DIFF_MINIMAL             = 1 << 7)  # Take extra time to find minimal diff
+                      MERGE_FILE_DIFF_MINIMAL             = 1 << 7,  # Take extra time to find minimal diff
+                      MERGE_FILE_STYLE_ZDIFF3             = 1 << 8,  # Create zdiff3 ("zealous diff3")-style files
+
+                      # Do not produce file conflicts when common regions have
+                      # changed; keep the conflict markers in the file and accept
+                      # that as the merge result.
+                      MERGE_FILE_ACCEPT_CONFLICTS         = 1 << 9)
 """ Option flags for git merge file favoritism.
   * `MERGE_FILE_FAVOR_NORMAL`: if both sides of the merge have changes to a section,
     make a note of the conflict in the index which `git checkout` will use to create
@@ -316,6 +344,7 @@ const STATUS_OPT_INCLUDE_UNREADABLE_AS_UNTRACKED  = Cuint(1 << 15)
 # certificate types from `enum git_cert_t` in `cert.h`.
 const CERT_TYPE_TLS = 1 # GIT_CERT_X509
 const CERT_TYPE_SSH = 2 # GIT_CERT_HOSTKEY_LIBSSH2
+const CERT_TYPE_STRARRAY = 3 # GIT_CERT_STRARRAY
 
 # certificate callback return values
 const PASSTHROUGH = -30
@@ -326,6 +355,7 @@ const CERT_ACCEPT =  0
 const CERT_SSH_MD5    = 1 << 0
 const CERT_SSH_SHA1   = 1 << 1
 const CERT_SSH_SHA256 = 1 << 2
+const CERT_SSH_RAW    = 1 << 3
 
 # libssh2 known host constants
 const LIBSSH2_KNOWNHOST_TYPE_PLAIN  = 1
@@ -341,6 +371,10 @@ const LIBSSH2_KNOWNHOST_CHECK_MISMATCH = 1
 const LIBSSH2_KNOWNHOST_CHECK_NOTFOUND = 2
 const LIBSSH2_KNOWNHOST_CHECK_FAILURE  = 3
 
+# Constants for fetch depth (shallowness of fetch).
+const FETCH_DEPTH_FULL = 0
+const FETCH_DEPTH_UNSHALLOW = 2147483647
+
 @enum(GIT_SUBMODULE_IGNORE, SUBMODULE_IGNORE_UNSPECIFIED  = -1, # use the submodule's configuration
                             SUBMODULE_IGNORE_NONE         = 1,  # any change or untracked == dirty
                             SUBMODULE_IGNORE_UNTRACKED    = 2,  # dirty if tracked files change
@@ -357,9 +391,11 @@ Option flags for `GitRepo`.
 @enum(GIT_REPOSITORY_OPEN, REPOSITORY_OPEN_DEFAULT   = 0,
                            REPOSITORY_OPEN_NO_SEARCH = 1<<0,
                            REPOSITORY_OPEN_CROSS_FS  = 1<<1,
-                           REPOSITORY_OPEN_BARE      = 1<<2)
+                           REPOSITORY_OPEN_BARE      = 1<<2,
+                           REPOSITORY_OPEN_NO_DOTGIT = 1<<3,
+                           REPOSITORY_OPEN_FROM_ENV  = 1<<4)
 
-@enum(GIT_BRANCH, BRANCH_LOCAL = 1, BRANCH_REMOTE = 2)
+@enum(GIT_BRANCH, BRANCH_LOCAL = 1, BRANCH_REMOTE = 2, BRANCH_ALL = 1 | 2)
 
 @enum(GIT_FILEMODE, FILEMODE_UNREADABLE          = 0o000000,
                     FILEMODE_TREE                = 0o040000,
@@ -381,7 +417,32 @@ Option flags for `GitRepo`.
                    FEATURE_SSH     = Cuint(1 << 2),
                    FEATURE_NSEC    = Cuint(1 << 3))
 
-if version() >= v"0.24.0"
+if version() >= v"1.8.0"
+    @doc """
+    Priority level of a config file.
+
+    These priority levels correspond to the natural escalation logic (from higher to lower) when searching for config entries in git.
+
+    * `CONFIG_LEVEL_DEFAULT` - Open the global, XDG and system configuration files if any available.
+    * `CONFIG_LEVEL_PROGRAMDATA` - System-wide on Windows, for compatibility with portable git
+    * `CONFIG_LEVEL_SYSTEM` - System-wide configuration file; `/etc/gitconfig` on Linux systems
+    * `CONFIG_LEVEL_XDG` - XDG compatible configuration file; typically `~/.config/git/config`
+    * `CONFIG_LEVEL_GLOBAL` - User-specific configuration file (also called Global configuration file); typically `~/.gitconfig`
+    * `CONFIG_LEVEL_LOCAL` - Repository specific configuration file; `\$WORK_DIR/.git/config` on non-bare repos
+    * `CONFIG_LEVEL_WORKTREE` - Worktree specific configuration file; `\$GIT_DIR/config.worktree`
+    * `CONFIG_LEVEL_APP` - Application specific configuration file; freely defined by applications
+    * `CONFIG_HIGHEST_LEVEL` - Represents the highest level available config file (i.e. the most specific config file available that actually is loaded)
+    """
+    @enum(GIT_CONFIG, CONFIG_LEVEL_DEFAULT     = 0,
+                      CONFIG_LEVEL_PROGRAMDATA = 1,
+                      CONFIG_LEVEL_SYSTEM      = 2,
+                      CONFIG_LEVEL_XDG         = 3,
+                      CONFIG_LEVEL_GLOBAL      = 4,
+                      CONFIG_LEVEL_LOCAL       = 5,
+                      CONFIG_LEVEL_WORKTREE    = 6,
+                      CONFIG_LEVEL_APP         = 7,
+                      CONFIG_HIGHEST_LEVEL     =-1)
+elseif version() >= v"0.24.0"
     @doc """
     Priority level of a config file.
 
@@ -432,19 +493,49 @@ Global library options.
 
 These are used to select which global option to set or get and are used in `git_libgit2_opts()`.
 """
-@enum(GIT_OPT, GET_MWINDOW_SIZE         = 0,
-               SET_MWINDOW_SIZE         = 1,
-               GET_MWINDOW_MAPPED_LIMIT = 2,
-               SET_MWINDOW_MAPPED_LIMIT = 3,
-               GET_SEARCH_PATH          = 4,
-               SET_SEARCH_PATH          = 5,
-               SET_CACHE_OBJECT_LIMIT   = 6,
-               SET_CACHE_MAX_SIZE       = 7,
-               ENABLE_CACHING           = 8,
-               GET_CACHED_MEMORY        = 9,
-               GET_TEMPLATE_PATH        = 10,
-               SET_TEMPLATE_PATH        = 11,
-               SET_SSL_CERT_LOCATIONS   = 12)
+@enum(GIT_OPT, GET_MWINDOW_SIZE = 0,
+               SET_MWINDOW_SIZE,
+               GET_MWINDOW_MAPPED_LIMIT,
+               SET_MWINDOW_MAPPED_LIMIT,
+               GET_SEARCH_PATH,
+               SET_SEARCH_PATH,
+               SET_CACHE_OBJECT_LIMIT,
+               SET_CACHE_MAX_SIZE,
+               ENABLE_CACHING,
+               GET_CACHED_MEMORY,
+               GET_TEMPLATE_PATH,
+               SET_TEMPLATE_PATH,
+               SET_SSL_CERT_LOCATIONS,
+               SET_USER_AGENT,
+               ENABLE_STRICT_OBJECT_CREATION,
+               ENABLE_STRICT_SYMBOLIC_REF_CREATION,
+               SET_SSL_CIPHERS,
+               GET_USER_AGENT,
+               ENABLE_OFS_DELTA,
+               ENABLE_FSYNC_GITDIR,
+               GET_WINDOWS_SHAREMODE,
+               SET_WINDOWS_SHAREMODE,
+               ENABLE_STRICT_HASH_VERIFICATION,
+               SET_ALLOCATOR,
+               ENABLE_UNSAVED_INDEX_SAFETY,
+               GET_PACK_MAX_OBJECTS,
+               SET_PACK_MAX_OBJECTS,
+               DISABLE_PACK_KEEP_FILE_CHECKS,
+               ENABLE_HTTP_EXPECT_CONTINUE,
+               GET_MWINDOW_FILE_LIMIT,
+               SET_MWINDOW_FILE_LIMIT,
+               SET_ODB_PACKED_PRIORITY,
+               SET_ODB_LOOSE_PRIORITY,
+               GET_EXTENSIONS,
+               SET_EXTENSIONS,
+               GET_OWNER_VALIDATION,
+               SET_OWNER_VALIDATION,
+               GET_HOMEDIR,
+               SET_HOMEDIR,
+               SET_SERVER_CONNECT_TIMEOUT,
+               GET_SERVER_CONNECT_TIMEOUT,
+               SET_SERVER_TIMEOUT,
+               GET_SERVER_TIMEOUT)
 
 """
 Option flags for `GitProxy`.
@@ -468,4 +559,14 @@ Option flags for `GitProxy`.
     TRACE_TRACE
 end
 
+# The type of object id
+@enum(GIT_OID_TYPE,
+      OID_DEFAULT = 0,
+      OID_SHA1 = 1)
+
+# Direction of the connection.
+@enum(GIT_DIRECTION,
+      DIRECTION_FETCH = 0,
+      DIRECTION_PUSH = 1)
+
 end
diff --git a/stdlib/LibGit2/src/diff.jl b/stdlib/LibGit2/src/diff.jl
index f2aa2feb2c2e9..0f69c10f6b053 100644
--- a/stdlib/LibGit2/src/diff.jl
+++ b/stdlib/LibGit2/src/diff.jl
@@ -27,15 +27,15 @@ function diff_tree(repo::GitRepo, tree::GitTree, pathspecs::AbstractString=""; c
     ensure_initialized()
     diff_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     if cached
-        @check ccall((:git_diff_tree_to_index, :libgit2), Cint,
+        @check ccall((:git_diff_tree_to_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                     diff_ptr_ptr, repo.ptr, tree.ptr, C_NULL, isempty(pathspecs) ? C_NULL : pathspecs)
+                     diff_ptr_ptr, repo, tree, C_NULL, isempty(pathspecs) ? C_NULL : pathspecs)
     else
-        @check ccall((:git_diff_tree_to_workdir_with_index, :libgit2), Cint,
+        @check ccall((:git_diff_tree_to_workdir_with_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                     diff_ptr_ptr, repo.ptr, tree.ptr, isempty(pathspecs) ? C_NULL : pathspecs)
+                     diff_ptr_ptr, repo, tree, isempty(pathspecs) ? C_NULL : pathspecs)
     end
-    return GitDiff(repo, diff_ptr_ptr[])
+    return GitDiff(diff_ptr_ptr[])
 end
 
 """
@@ -51,10 +51,10 @@ to compare a commit on another branch with the current latest commit on `master`
 function diff_tree(repo::GitRepo, oldtree::GitTree, newtree::GitTree)
     ensure_initialized()
     diff_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_diff_tree_to_tree, :libgit2), Cint,
+    @check ccall((:git_diff_tree_to_tree, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                   diff_ptr_ptr, repo.ptr, oldtree.ptr, newtree.ptr, C_NULL)
-    return GitDiff(repo, diff_ptr_ptr[])
+                   diff_ptr_ptr, repo, oldtree, newtree, C_NULL)
+    return GitDiff(diff_ptr_ptr[])
 end
 
 """
@@ -67,14 +67,14 @@ files were changed, how many insertions were made, and how many deletions were m
 function GitDiffStats(diff::GitDiff)
     ensure_initialized()
     diff_stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_diff_get_stats, :libgit2), Cint,
+    @check ccall((:git_diff_get_stats, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}),
-                  diff_stat_ptr_ptr, diff.ptr)
-    return GitDiffStats(diff.owner, diff_stat_ptr_ptr[])
+                  diff_stat_ptr_ptr, diff)
+    return GitDiffStats(diff_stat_ptr_ptr[])
 end
 
 """
-    files_changed(diff_stat::GitDiffStats) -> Csize_t
+    files_changed(diff_stat::GitDiffStats)::Csize_t
 
 Return how many files were changed (added/modified/deleted) in the [`GitDiff`](@ref)
 summarized by `diff_stat`. The result may vary depending on the [`DiffOptionsStruct`](@ref)
@@ -83,11 +83,11 @@ are to be included or not).
 """
 function files_changed(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_files_changed, :libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_files_changed, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 """
-    insertions(diff_stat::GitDiffStats) -> Csize_t
+    insertions(diff_stat::GitDiffStats)::Csize_t
 
 Return the total number of insertions (lines added) in the [`GitDiff`](@ref)
 summarized by `diff_stat`. The result may vary depending on the [`DiffOptionsStruct`](@ref)
@@ -96,11 +96,11 @@ are to be included or not).
 """
 function insertions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_insertions, :libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_insertions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 """
-    deletions(diff_stat::GitDiffStats) -> Csize_t
+    deletions(diff_stat::GitDiffStats)::Csize_t
 
 Return the total number of deletions (lines removed) in the [`GitDiff`](@ref)
 summarized by `diff_stat`. The result may vary depending on the [`DiffOptionsStruct`](@ref)
@@ -109,12 +109,12 @@ are to be included or not).
 """
 function deletions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_deletions, :libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_deletions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 function count(diff::GitDiff)
     ensure_initialized()
-    return ccall((:git_diff_num_deltas, :libgit2), Cint, (Ptr{Cvoid},), diff.ptr)
+    return ccall((:git_diff_num_deltas, libgit2), Cint, (Ptr{Cvoid},), diff)
 end
 
 function Base.getindex(diff::GitDiff, i::Integer)
@@ -122,10 +122,12 @@ function Base.getindex(diff::GitDiff, i::Integer)
         throw(BoundsError(diff, (i,)))
     end
     ensure_initialized()
-    delta_ptr = ccall((:git_diff_get_delta, :libgit2),
-                      Ptr{DiffDelta},
-                      (Ptr{Cvoid}, Csize_t), diff.ptr, i-1)
-    return unsafe_load(delta_ptr)
+    GC.@preserve diff begin # preserve `diff` object until return of `unsafe_load`
+        delta_ptr = ccall((:git_diff_get_delta, libgit2),
+                          Ptr{DiffDelta},
+                          (Ptr{Cvoid}, Csize_t), diff, i-1)
+        return unsafe_load(delta_ptr)
+    end
 end
 
 function Base.show(io::IO, diff_stat::GitDiffStats)
@@ -140,3 +142,34 @@ function Base.show(io::IO, diff::GitDiff)
     println(io, "Number of deltas: $(count(diff))")
     show(io, GitDiffStats(diff))
 end
+
+"""
+    GitDiff(content::AbstractString)
+
+Parse a diff from a buffer. The `content` should be in unified diff format.
+Returns a [`GitDiff`](@ref) object.
+
+This is equivalent to [`git_diff_from_buffer`](https://libgit2.org/libgit2/#HEAD/group/diff/git_diff_from_buffer).
+
+# Examples
+```julia
+diff_str = \"\"\"
+diff --git a/file.txt b/file.txt
+index 1234567..abcdefg 100644
+--- a/file.txt
++++ b/file.txt
+@@ -1 +1 @@
+-old content
++new content
+\"\"\"
+diff = LibGit2.GitDiff(diff_str)
+```
+"""
+function GitDiff(content::AbstractString)
+    ensure_initialized()
+    diff_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
+    @check ccall((:git_diff_from_buffer, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Cstring, Csize_t),
+                 diff_ptr_ptr, content, sizeof(content))
+    return GitDiff(diff_ptr_ptr[])
+end
diff --git a/stdlib/LibGit2/src/error.jl b/stdlib/LibGit2/src/error.jl
index 219b8cdf88e69..6647d803d3193 100644
--- a/stdlib/LibGit2/src/error.jl
+++ b/stdlib/LibGit2/src/error.jl
@@ -3,6 +3,7 @@
 module Error
 
 import ..LibGit2: ensure_initialized
+using LibGit2_jll
 
 export GitError
 
@@ -18,7 +19,7 @@ export GitError
             EUNMERGED       = Cint(-10), # merge in progress prevented op
             ENONFASTFORWARD = Cint(-11), # ref not fast-forwardable
             EINVALIDSPEC    = Cint(-12), # name / ref not in valid format
-            EMERGECONFLICT  = Cint(-13), # merge conflict prevented op
+            ECONFLICT       = Cint(-13), # Checkout conflicts prevented operation
             ELOCKED         = Cint(-14), # lock file prevented op
             EMODIFIED       = Cint(-15), # ref value does not match expected
             EAUTH           = Cint(-16), # authentication error
@@ -26,13 +27,23 @@ export GitError
             EAPPLIED        = Cint(-18), # patch/merge has already been applied
             EPEEL           = Cint(-19), # the requested peel operation is not possible
             EEOF            = Cint(-20), # unexpected EOF
+            EINVALID        = Cint(-21), # Invalid operation or input
+            EUNCOMMITTED    = Cint(-22), # Uncommitted changes in index prevented operation
+            EDIRECTORY      = Cint(-23), # The operation is not valid for a directory
+            EMERGECONFLICT  = Cint(-24), # A merge conflict exists and cannot continue
+
             PASSTHROUGH     = Cint(-30), # internal only
             ITEROVER        = Cint(-31), # signals end of iteration
             RETRY           = Cint(-32), # internal only
             EMISMATCH       = Cint(-33), # hashsum mismatch in object
             EINDEXDIRTY     = Cint(-34), # unsaved changes in the index would be overwritten
             EAPPLYFAIL      = Cint(-35), # patch application failed
-            EOWNER          = Cint(-36)) # the object is not owned by the current user
+            EOWNER          = Cint(-36), # the object is not owned by the current user
+            TIMEOUT         = Cint(-37), # The operation timed out
+            EUNCHANGED      = Cint(-38), # There were no changes
+            ENOTSUPPORTED   = Cint(-39), # An option is not supported
+            EREADONLY       = Cint(-40), # The subject is read-only
+)
 
 @enum(Class, None,
              NoMemory,
@@ -68,7 +79,9 @@ export GitError
              Patch,
              WorkTree,
              SHA1,
-             HTTP)
+             HTTP,
+             Internal,
+             Grafts)
 
 struct ErrorStruct
     message::Ptr{UInt8}
@@ -84,7 +97,7 @@ Base.show(io::IO, err::GitError) = print(io, "GitError(Code:$(err.code), Class:$
 
 function last_error()
     ensure_initialized()
-    err = ccall((:giterr_last, :libgit2), Ptr{ErrorStruct}, ())
+    err = ccall((:git_error_last, libgit2), Ptr{ErrorStruct}, ())
     if err != C_NULL
         err_obj   = unsafe_load(err)
         err_class = Class(err_obj.class)
diff --git a/stdlib/LibGit2/src/gitcredential.jl b/stdlib/LibGit2/src/gitcredential.jl
index 7ff20ca1fdf2c..2f109faf5eba9 100644
--- a/stdlib/LibGit2/src/gitcredential.jl
+++ b/stdlib/LibGit2/src/gitcredential.jl
@@ -54,7 +54,7 @@ end
 
 
 """
-    ismatch(url, git_cred) -> Bool
+    ismatch(url, git_cred)::Bool
 
 Checks if the `git_cred` is valid for the given `url`.
 """
@@ -183,16 +183,16 @@ end
 
 function run!(helper::GitCredentialHelper, operation::AbstractString, cred::GitCredential)
     cmd = `$(helper.cmd) $operation`
-    p = open(cmd, "r+")
-
-    # Provide the helper with the credential information we know
-    write(p, cred)
-    write(p, "\n")
-    t = @async close(p.in)
-
-    # Process the response from the helper
-    Base.read!(p, cred)
-    wait(p)
+    open(cmd, "r+") do p
+        # Provide the helper with the credential information we know
+        write(p, cred)
+        write(p, "\n")
+        t = @async close(p.in)
+
+        # Process the response from the helper
+        Base.read!(p, cred)
+        wait(t)
+    end
 
     return cred
 end
@@ -211,7 +211,7 @@ approve(helper::GitCredentialHelper, cred::GitCredential) = run(helper, "store",
 reject(helper::GitCredentialHelper, cred::GitCredential) = run(helper, "erase", cred)
 
 """
-    credential_helpers(config, git_cred) -> Vector{GitCredentialHelper}
+    credential_helpers(config, git_cred)::Vector{GitCredentialHelper}
 
 Return all of the `GitCredentialHelper`s found within the provided `config` which are valid
 for the specified `git_cred`.
@@ -239,7 +239,7 @@ function credential_helpers(cfg::GitConfig, cred::GitCredential)
 end
 
 """
-    default_username(config, git_cred) -> Union{String, Nothing}
+    default_username(config, git_cred)::Union{String, Nothing}
 
 Return the default username, if any, provided by the `config` which is valid for the
 specified `git_cred`.
diff --git a/stdlib/LibGit2/src/index.jl b/stdlib/LibGit2/src/index.jl
index b8baf624540b0..c567e603404ea 100644
--- a/stdlib/LibGit2/src/index.jl
+++ b/stdlib/LibGit2/src/index.jl
@@ -8,13 +8,13 @@ Load the index file for the repository `repo`.
 function GitIndex(repo::GitRepo)
     ensure_initialized()
     idx_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_index, :libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), idx_ptr_ptr, repo.ptr)
+    @check ccall((:git_repository_index, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), idx_ptr_ptr, repo)
     return GitIndex(repo, idx_ptr_ptr[])
 end
 
 """
-    read!(idx::GitIndex, force::Bool = false) -> GitIndex
+    read!(idx::GitIndex, force::Bool = false)::GitIndex
 
 Update the contents of `idx` by reading changes made on disk. For example, `idx`
 might be updated if a file has been added to the repository since it was created.
@@ -25,23 +25,23 @@ has changed since the last time it was loaded into `idx`.
 """
 function read!(idx::GitIndex, force::Bool = false)
     ensure_initialized()
-    @check ccall((:git_index_read, :libgit2), Cint, (Ptr{Cvoid}, Cint), idx.ptr, Cint(force))
+    @check ccall((:git_index_read, libgit2), Cint, (Ptr{Cvoid}, Cint), idx, Cint(force))
     return idx
 end
 
 """
-    write!(idx::GitIndex) -> GitIndex
+    write!(idx::GitIndex)::GitIndex
 
 Write the state of index `idx` to disk using a file lock.
 """
 function write!(idx::GitIndex)
     ensure_initialized()
-    @check ccall((:git_index_write, :libgit2), Cint, (Ptr{Cvoid},), idx.ptr)
+    @check ccall((:git_index_write, libgit2), Cint, (Ptr{Cvoid},), idx)
     return idx
 end
 
 """
-    write_tree!(idx::GitIndex) -> GitHash
+    write_tree!(idx::GitIndex)::GitHash
 
 Write the index `idx` as a [`GitTree`](@ref) on disk. Trees will be recursively
 created for each subtree in `idx`. The returned [`GitHash`](@ref) can be used to
@@ -51,8 +51,34 @@ repository cannot be bare. `idx` must not contain any files with conflicts.
 function write_tree!(idx::GitIndex)
     ensure_initialized()
     oid_ptr = Ref(GitHash())
-    @check ccall((:git_index_write_tree, :libgit2), Cint,
-                 (Ptr{GitHash}, Ptr{Cvoid}), oid_ptr, idx.ptr)
+    @check ccall((:git_index_write_tree, libgit2), Cint,
+                 (Ptr{GitHash}, Ptr{Cvoid}), oid_ptr, idx)
+    return oid_ptr[]
+end
+
+"""
+    write_tree_to!(repo::GitRepo, idx::GitIndex)::GitHash
+
+Write the index `idx` as a [`GitTree`](@ref) to the given repository `repo`.
+This is similar to [`write_tree!`](@ref) but allows writing the index to a
+different repository than the one it may be associated with.
+
+Trees will be recursively created for each subtree in `idx`. The returned
+[`GitHash`](@ref) can be used to create a [`GitCommit`](@ref).
+
+This is equivalent to [`git_index_write_tree_to`](https://libgit2.org/libgit2/#HEAD/group/index/git_index_write_tree_to).
+
+# Examples
+```julia
+idx = LibGit2.GitIndex(source_repo)
+tree_oid = LibGit2.write_tree_to!(target_repo, idx)
+```
+"""
+function write_tree_to!(repo::GitRepo, idx::GitIndex)
+    ensure_initialized()
+    oid_ptr = Ref(GitHash())
+    @check ccall((:git_index_write_tree_to, libgit2), Cint,
+                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{Cvoid}), oid_ptr, idx, repo)
     return oid_ptr[]
 end
 
@@ -73,8 +99,8 @@ Read the tree `tree` (or the tree pointed to by `treehash` in the repository own
 """
 function read_tree!(idx::GitIndex, tree::GitTree)
     ensure_initialized()
-    @check ccall((:git_index_read_tree, :libgit2), Cint,
-                 (Ptr{Cvoid}, Ptr{Cvoid}), idx.ptr, tree.ptr)
+    @check ccall((:git_index_read_tree, libgit2), Cint,
+                 (Ptr{Cvoid}, Ptr{Cvoid}), idx, tree)
 end
 read_tree!(idx::GitIndex, hash::AbstractGitHash) =
     read_tree!(idx, GitTree(repository(idx), hash))
@@ -104,9 +130,9 @@ with respect to ignored files:
 function add!(idx::GitIndex, files::AbstractString...;
               flags::Cuint = Consts.INDEX_ADD_DEFAULT)
     ensure_initialized()
-    @check ccall((:git_index_add_all, :libgit2), Cint,
+    @check ccall((:git_index_add_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Cuint, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), flags, C_NULL, C_NULL)
+                 idx, collect(files), flags, C_NULL, C_NULL)
 end
 
 """
@@ -120,9 +146,9 @@ database.
 """
 function update!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
-    @check ccall((:git_index_update_all, :libgit2), Cint,
+    @check ccall((:git_index_update_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), C_NULL, C_NULL)
+                 idx, collect(files), C_NULL, C_NULL)
 end
 
 """
@@ -134,9 +160,9 @@ of the `repo`).
 """
 function remove!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
-    @check ccall((:git_index_remove_all, :libgit2), Cint,
+    @check ccall((:git_index_remove_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), C_NULL, C_NULL)
+                 idx, collect(files), C_NULL, C_NULL)
 end
 
 function add!(repo::GitRepo, files::AbstractString...;
@@ -173,13 +199,13 @@ end
 
 function count(idx::GitIndex)
     ensure_initialized()
-    return ccall((:git_index_entrycount, :libgit2), Csize_t, (Ptr{Cvoid},), idx.ptr)
+    return ccall((:git_index_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), idx)
 end
 
 function Base.getindex(idx::GitIndex, i::Integer)
     ensure_initialized()
     GC.@preserve idx begin
-        ie_ptr = ccall((:git_index_get_byindex, :libgit2),
+        ie_ptr = ccall((:git_index_get_byindex, libgit2),
                        Ptr{IndexEntry},
                        (Ptr{Cvoid}, Csize_t), idx.ptr, i-1)
         ie_ptr == C_NULL && return nothing
@@ -191,14 +217,14 @@ end
 function Base.findall(path::String, idx::GitIndex)
     ensure_initialized()
     pos_ref = Ref{Csize_t}(0)
-    ret = ccall((:git_index_find, :libgit2), Cint,
-                  (Ref{Csize_t}, Ptr{Cvoid}, Cstring), pos_ref, idx.ptr, path)
+    ret = ccall((:git_index_find, libgit2), Cint,
+                  (Ref{Csize_t}, Ptr{Cvoid}, Cstring), pos_ref, idx, path)
     ret == Cint(Error.ENOTFOUND) && return nothing
     return pos_ref[]+1
 end
 
 """
-    stage(ie::IndexEntry) -> Cint
+    stage(ie::IndexEntry)::Cint
 
 Get the stage number of `ie`. The stage number `0` represents the current state
 of the working tree, but other numbers can be used in the case of a merge conflict.
@@ -210,7 +236,7 @@ of a multi-branch "octopus" merge, stages `2`, `3`, and `4` might be used).
 """
 function stage(ie::IndexEntry)
     ensure_initialized()
-    return ccall((:git_index_entry_stage, :libgit2), Cint, (Ptr{IndexEntry},), Ref(ie))
+    return ccall((:git_index_entry_stage, libgit2), Cint, (Ptr{IndexEntry},), Ref(ie))
 end
 
 function Base.show(io::IO, idx::GitIndex)
diff --git a/stdlib/LibGit2/src/merge.jl b/stdlib/LibGit2/src/merge.jl
index 0b2ddab1e8512..83d81db4e12e0 100644
--- a/stdlib/LibGit2/src/merge.jl
+++ b/stdlib/LibGit2/src/merge.jl
@@ -16,27 +16,27 @@ branch head described using `GitReference`.
 function GitAnnotated(repo::GitRepo, commit_id::GitHash)
     ensure_initialized()
     ann_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_annotated_commit_lookup, :libgit2), Cint,
+    @check ccall((:git_annotated_commit_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}),
-                   ann_ptr_ptr, repo.ptr, Ref(commit_id))
+                   ann_ptr_ptr, repo, Ref(commit_id))
     return GitAnnotated(repo, ann_ptr_ptr[])
 end
 
 function GitAnnotated(repo::GitRepo, ref::GitReference)
     ensure_initialized()
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_annotated_commit_from_ref, :libgit2), Cint,
+    @check ccall((:git_annotated_commit_from_ref, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}),
-                   ann_ref_ref, repo.ptr, ref.ptr)
+                   ann_ref_ref, repo, ref)
     return GitAnnotated(repo, ann_ref_ref[])
 end
 
 function GitAnnotated(repo::GitRepo, fh::FetchHead)
     ensure_initialized()
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_annotated_commit_from_fetchhead, :libgit2), Cint,
+    @check ccall((:git_annotated_commit_from_fetchhead, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Ptr{GitHash}),
-                   ann_ref_ref, repo.ptr, fh.name, fh.url, Ref(fh.oid))
+                   ann_ref_ref, repo, fh.name, fh.url, Ref(fh.oid))
     return GitAnnotated(repo, ann_ref_ref[])
 end
 
@@ -49,7 +49,7 @@ end
 function GitHash(ann::GitAnnotated)
     ensure_initialized()
     GC.@preserve ann begin
-        oid = unsafe_load(ccall((:git_annotated_commit_id, :libgit2), Ptr{GitHash}, (Ptr{Cvoid},), ann.ptr))
+        oid = unsafe_load(ccall((:git_annotated_commit_id, libgit2), Ptr{GitHash}, (Ptr{Cvoid},), ann.ptr))
     end
     return oid
 end
@@ -88,9 +88,11 @@ function merge_analysis(repo::GitRepo, anns::Vector{GitAnnotated})
     preference = Ref{Cint}(0)
     anns_ref = Ref(Base.map(a->a.ptr, anns), 1)
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge_analysis, :libgit2), Cint,
-                  (Ptr{Cint}, Ptr{Cint}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t),
-                   analysis, preference, repo.ptr, anns_ref, anns_size)
+    GC.@preserve anns begin
+        @check ccall((:git_merge_analysis, libgit2), Cint,
+                     (Ptr{Cint}, Ptr{Cint}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t),
+                     analysis, preference, repo, anns_ref, anns_size)
+    end
     return analysis[], preference[]
 end
 
@@ -120,7 +122,7 @@ end
 
 # Merge changes into current head
 """
-    merge!(repo::GitRepo, anns::Vector{GitAnnotated}; kwargs...) -> Bool
+    merge!(repo::GitRepo, anns::Vector{GitAnnotated}; kwargs...)::Bool
 
 Merge changes from the annotated commits (captured as [`GitAnnotated`](@ref) objects)
 `anns` into the HEAD of the repository `repo`. The keyword arguments are:
@@ -147,11 +149,13 @@ function merge!(repo::GitRepo, anns::Vector{GitAnnotated};
                 checkout_opts::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge, :libgit2), Cint,
-                  (Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t,
-                   Ptr{MergeOptions}, Ptr{CheckoutOptions}),
-                   repo.ptr, Base.map(x->x.ptr, anns), anns_size,
-                   Ref(merge_opts), Ref(checkout_opts))
+    GC.@preserve anns begin
+        @check ccall((:git_merge, libgit2), Cint,
+                     (Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t,
+                      Ptr{MergeOptions}, Ptr{CheckoutOptions}),
+                     repo, Base.map(x->x.ptr, anns), anns_size,
+                     Ref(merge_opts), Ref(checkout_opts))
+    end
     @info "Review and commit merged changes"
     return true
 end
@@ -159,7 +163,7 @@ end
 # Internal implementation of merge.
 # Returns `true` if merge was successful, otherwise `false`
 """
-    merge!(repo::GitRepo, anns::Vector{GitAnnotated}, fastforward::Bool; kwargs...) -> Bool
+    merge!(repo::GitRepo, anns::Vector{GitAnnotated}, fastforward::Bool; kwargs...)::Bool
 
 Merge changes from the annotated commits (captured as [`GitAnnotated`](@ref) objects)
 `anns` into the HEAD of the repository `repo`. If `fastforward` is `true`, *only* a
@@ -250,7 +254,7 @@ function merge!(repo::GitRepo, anns::Vector{GitAnnotated}, fastforward::Bool;
 end
 
 """
-    merge_base(repo::GitRepo, one::AbstractString, two::AbstractString) -> GitHash
+    merge_base(repo::GitRepo, one::AbstractString, two::AbstractString)::GitHash
 
 Find a merge base (a common ancestor) between the commits `one` and `two`.
 `one` and `two` may both be in string form. Return the `GitHash` of the merge base.
@@ -261,9 +265,9 @@ function merge_base(repo::GitRepo, one::AbstractString, two::AbstractString)
     oid2_ptr = Ref(GitHash(two))
     moid_ptr = Ref(GitHash())
     moid = try
-        @check ccall((:git_merge_base, :libgit2), Cint,
+        @check ccall((:git_merge_base, libgit2), Cint,
                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{GitHash}, Ptr{GitHash}),
-                moid_ptr, repo.ptr, oid1_ptr, oid2_ptr)
+                moid_ptr, repo, oid1_ptr, oid2_ptr)
         moid_ptr[]
     catch e
         GitHash()
diff --git a/stdlib/LibGit2/src/oid.jl b/stdlib/LibGit2/src/oid.jl
index 937684439419f..48b830384e3ee 100644
--- a/stdlib/LibGit2/src/oid.jl
+++ b/stdlib/LibGit2/src/oid.jl
@@ -13,7 +13,7 @@ function GitHash(ptr::Ptr{UInt8})
     end
     ensure_initialized()
     oid_ptr = Ref(GitHash())
-    @check ccall((:git_oid_fromraw, :libgit2), Cint,
+    @check ccall((:git_oid_fromraw, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{UInt8}), oid_ptr, ptr)
     return oid_ptr[]
 end
@@ -43,7 +43,7 @@ function GitHash(id::AbstractString)
     end
     ensure_initialized()
     oid_ptr = Ref{GitHash}()
-    @check ccall((:git_oid_fromstrn, :libgit2), Cint,
+    @check ccall((:git_oid_fromstrn, libgit2), Cint,
               (Ptr{GitHash}, Ptr{UInt8}, Csize_t), oid_ptr, bstr, len)
     return oid_ptr[]
 end
@@ -56,7 +56,7 @@ Construct a `GitShortHash` from the data stored in the given [`Buffer`](@ref).
 function GitShortHash(buf::Buffer)
     ensure_initialized()
     oid_ptr = Ref{GitHash}()
-    @check ccall((:git_oid_fromstrn, :libgit2), Cint,
+    @check ccall((:git_oid_fromstrn, libgit2), Cint,
               (Ptr{GitHash}, Ptr{UInt8}, Csize_t), oid_ptr, buf.ptr, buf.size)
     GitShortHash(oid_ptr[], buf.size)
 end
@@ -71,7 +71,7 @@ function GitShortHash(id::AbstractString)
     bstr = String(id)
     len = sizeof(bstr)
     oid_ptr = Ref{GitHash}()
-    @check ccall((:git_oid_fromstrn, :libgit2), Cint,
+    @check ccall((:git_oid_fromstrn, libgit2), Cint,
               (Ptr{GitHash}, Ptr{UInt8}, Csize_t), oid_ptr, bstr, len)
     GitShortHash(oid_ptr[], len)
 end
@@ -113,7 +113,7 @@ function GitHash(ref::GitReference)
     reftype(ref) != Consts.REF_OID && return GitHash()
     ensure_initialized()
     GC.@preserve ref begin
-        oid_ptr = ccall((:git_reference_target, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), ref.ptr)
+        oid_ptr = ccall((:git_reference_target, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), ref.ptr)
         oid_ptr == C_NULL && return GitHash()
         oid = GitHash(oid_ptr)
     end
@@ -131,9 +131,9 @@ function GitHash(repo::GitRepo, ref_name::AbstractString)
     isempty(repo) && return GitHash()
     ensure_initialized()
     oid_ptr  = Ref(GitHash())
-    @check ccall((:git_reference_name_to_id, :libgit2), Cint,
+    @check ccall((:git_reference_name_to_id, libgit2), Cint,
                     (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
-                     oid_ptr, repo.ptr, ref_name)
+                     oid_ptr, repo, ref_name)
     return oid_ptr[]
 end
 
@@ -144,7 +144,7 @@ Get the identifier (`GitHash`) of `obj`.
 """
 function GitHash(obj::GitObject)
     ensure_initialized()
-    GitHash(ccall((:git_object_id, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj.ptr))
+    GitHash(ccall((:git_object_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj))
 end
 
 ==(obj1::GitObject, obj2::GitObject) = GitHash(obj1) == GitHash(obj2)
@@ -159,15 +159,15 @@ unambiguously identify the object in the repository.
 function GitShortHash(obj::GitObject)
     ensure_initialized()
     buf_ref = Ref(Buffer())
-    @check ccall((:git_object_short_id, :libgit2), Cint,
-                 (Ptr{Buffer},Ptr{Cvoid}), buf_ref, obj.ptr)
+    @check ccall((:git_object_short_id, libgit2), Cint,
+                 (Ptr{Buffer},Ptr{Cvoid}), buf_ref, obj)
     sid = GitShortHash(buf_ref[])
     free(buf_ref)
     return sid
 end
 
 """
-    raw(id::GitHash) -> Vector{UInt8}
+    raw(id::GitHash)::Vector{UInt8}
 
 Obtain the raw bytes of the [`GitHash`](@ref) as a vector of length $OID_RAWSZ.
 """
@@ -187,7 +187,7 @@ Base.hash(id::GitHash, h::UInt) = hash(id.val, h)
 
 function Base.cmp(id1::GitHash, id2::GitHash)
     ensure_initialized()
-    Int(ccall((:git_oid_cmp, :libgit2), Cint,
+    Int(ccall((:git_oid_cmp, libgit2), Cint,
               (Ptr{GitHash}, Ptr{GitHash}),
               Ref(id1), Ref(id2)))
 end
@@ -195,7 +195,7 @@ function Base.cmp(id1::GitShortHash, id2::GitShortHash)
     ensure_initialized()
     # shortened hashes appear at the beginning of the order, i.e.
     # 000 < 01 < 010 < 011 < 0112
-    c = Int(ccall((:git_oid_ncmp, :libgit2), Cint,
+    c = Int(ccall((:git_oid_ncmp, libgit2), Cint,
                   (Ptr{GitHash}, Ptr{GitHash}, Csize_t),
                   Ref(id1.hash), Ref(id2.hash), min(id1.len, id2.len)))
     return c == 0 ? cmp(id1.len, id2.len) : c
@@ -207,7 +207,7 @@ Base.cmp(id1::GitShortHash, id2::GitHash) = cmp(id1, GitShortHash(id2, OID_HEXSZ
 Base.isless(id1::AbstractGitHash, id2::AbstractGitHash)  = cmp(id1, id2) < 0
 
 """
-    iszero(id::GitHash) -> Bool
+    iszero(id::GitHash)::Bool
 
 Determine whether all hexadecimal digits of the given [`GitHash`](@ref) are zero.
 """
diff --git a/stdlib/LibGit2/src/rebase.jl b/stdlib/LibGit2/src/rebase.jl
index 51b52ef006c38..b9083191246a5 100644
--- a/stdlib/LibGit2/src/rebase.jl
+++ b/stdlib/LibGit2/src/rebase.jl
@@ -5,21 +5,21 @@ function GitRebase(repo::GitRepo, branch::GitAnnotated, upstream::GitAnnotated;
                    opts::RebaseOptions = RebaseOptions())
     ensure_initialized()
     rebase_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_rebase_init, :libgit2), Cint,
+    @check ccall((:git_rebase_init, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid},
                    Ptr{Cvoid}, Ptr{RebaseOptions}),
-                   rebase_ptr_ptr, repo.ptr, branch.ptr, upstream.ptr,
-                   onto === nothing ? C_NULL : onto.ptr, Ref(opts))
+                   rebase_ptr_ptr, repo, branch, upstream,
+                   onto === nothing ? C_NULL : onto, Ref(opts))
     return GitRebase(repo, rebase_ptr_ptr[])
 end
 
 function count(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_entrycount, :libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), rb)
 end
 
 """
-    current(rb::GitRebase) -> Csize_t
+    current(rb::GitRebase)::Csize_t
 
 Return the index of the current [`RebaseOperation`](@ref). If no operation has
 yet been applied (because the [`GitRebase`](@ref) has been constructed but `next`
@@ -28,7 +28,7 @@ has not yet been called or iteration over `rb` has not yet begun), return
 """
 function current(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_current, :libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_current, libgit2), Csize_t, (Ptr{Cvoid},), rb)
 end
 
 function Base.getindex(rb::GitRebase, i::Integer)
@@ -37,7 +37,7 @@ function Base.getindex(rb::GitRebase, i::Integer)
     end
     ensure_initialized()
     GC.@preserve rb begin
-        rb_op_ptr = ccall((:git_rebase_operation_byindex, :libgit2),
+        rb_op_ptr = ccall((:git_rebase_operation_byindex, libgit2),
                           Ptr{RebaseOperation},
                           (Ptr{Cvoid}, Csize_t), rb.ptr, i-1)
         rb_op = unsafe_load(rb_op_ptr)
@@ -49,7 +49,7 @@ function Base.iterate(rb::GitRebase, state=nothing)
     ensure_initialized()
     rb_op_ptr_ptr = Ref{Ptr{RebaseOperation}}(C_NULL)
     GC.@preserve rb begin
-        err = ccall((:git_rebase_next, :libgit2), Cint,
+        err = ccall((:git_rebase_next, libgit2), Cint,
                     (Ptr{Ptr{RebaseOperation}}, Ptr{Cvoid}),
                     rb_op_ptr_ptr, rb.ptr)
         if err == Cint(Error.GIT_OK)
@@ -78,9 +78,9 @@ function commit(rb::GitRebase, sig::GitSignature)
     ensure_initialized()
     oid_ptr = Ref(GitHash())
     try
-        @check ccall((:git_rebase_commit, :libgit2), Error.Code,
+        @check ccall((:git_rebase_commit, libgit2), Error.Code,
                      (Ptr{GitHash}, Ptr{Cvoid}, Ptr{SignatureStruct}, Ptr{SignatureStruct}, Ptr{UInt8}, Ptr{UInt8}),
-                      oid_ptr, rb.ptr, C_NULL, sig.ptr, C_NULL, C_NULL)
+                      oid_ptr, rb, C_NULL, sig, C_NULL, C_NULL)
     catch err
         # TODO: return current HEAD instead
         err isa GitError && err.code === Error.EAPPLIED && return nothing
@@ -90,7 +90,7 @@ function commit(rb::GitRebase, sig::GitSignature)
 end
 
 """
-    abort(rb::GitRebase) -> Csize_t
+    abort(rb::GitRebase)::Csize_t
 
 Cancel the in-progress rebase, undoing all changes made so far and returning
 the parent repository of `rb` and its working directory to their state before
@@ -100,12 +100,12 @@ rebase had completed), and `-1` for other errors.
 """
 function abort(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_abort, :libgit2), Csize_t,
-                      (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_abort, libgit2), Csize_t,
+                      (Ptr{Cvoid},), rb)
 end
 
 """
-    finish(rb::GitRebase, sig::GitSignature) -> Csize_t
+    finish(rb::GitRebase, sig::GitSignature)::Csize_t
 
 Complete the rebase described by `rb`. `sig` is a [`GitSignature`](@ref)
 to specify the identity of the user finishing the rebase. Return `0` if the
@@ -113,7 +113,7 @@ rebase finishes successfully, `-1` if there is an error.
 """
 function finish(rb::GitRebase, sig::GitSignature)
     ensure_initialized()
-    return ccall((:git_rebase_finish, :libgit2), Csize_t,
+    return ccall((:git_rebase_finish, libgit2), Csize_t,
                   (Ptr{Cvoid}, Ptr{SignatureStruct}),
-                   rb.ptr, sig.ptr)
+                   rb, sig)
 end
diff --git a/stdlib/LibGit2/src/reference.jl b/stdlib/LibGit2/src/reference.jl
index c05b09ddfc518..4794fce1d496e 100644
--- a/stdlib/LibGit2/src/reference.jl
+++ b/stdlib/LibGit2/src/reference.jl
@@ -3,9 +3,9 @@
 function GitReference(repo::GitRepo, refname::AbstractString)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_lookup, :libgit2), Cint,
+    @check ccall((:git_reference_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                   ref_ptr_ptr, repo.ptr, refname)
+                   ref_ptr_ptr, repo, refname)
     return GitReference(repo, ref_ptr_ptr[])
 end
 
@@ -13,9 +13,9 @@ function GitReference(repo::GitRepo, obj_oid::GitHash, refname::AbstractString =
                       force::Bool=false, msg::AbstractString="")
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_create, :libgit2), Cint,
+    @check ccall((:git_reference_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Ptr{GitHash}, Cint, Cstring),
-                   ref_ptr_ptr, repo.ptr, refname, Ref(obj_oid), Cint(force),
+                   ref_ptr_ptr, repo, refname, Ref(obj_oid), Cint(force),
                    isempty(msg) ? C_NULL : msg)
     return GitReference(repo, ref_ptr_ptr[])
 end
@@ -28,21 +28,21 @@ to this branch will have no parents.
 """
 function isorphan(repo::GitRepo)
     ensure_initialized()
-    r = @check ccall((:git_repository_head_unborn, :libgit2), Cint,
-                     (Ptr{Cvoid},), repo.ptr)
+    r = @check ccall((:git_repository_head_unborn, libgit2), Cint,
+                     (Ptr{Cvoid},), repo)
     r != 0
 end
 
 """
-    LibGit2.head(repo::GitRepo) -> GitReference
+    LibGit2.head(repo::GitRepo)::GitReference
 
 Return a `GitReference` to the current HEAD of `repo`.
 """
 function head(repo::GitRepo)
     ensure_initialized()
     head_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_head, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), head_ptr_ptr, repo.ptr)
+    @check ccall((:git_repository_head, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), head_ptr_ptr, repo)
     return GitReference(repo, head_ptr_ptr[])
 end
 
@@ -68,7 +68,7 @@ function shortname(ref::GitReference)
     isempty(ref) && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        name_ptr = ccall((:git_reference_shorthand, :libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        name_ptr = ccall((:git_reference_shorthand, libgit2), Cstring, (Ptr{Cvoid},), ref)
         name_ptr == C_NULL && return ""
         name = unsafe_string(name_ptr)
     end
@@ -76,7 +76,7 @@ function shortname(ref::GitReference)
 end
 
 """
-    LibGit2.reftype(ref::GitReference) -> Cint
+    LibGit2.reftype(ref::GitReference)::Cint
 
 Return a `Cint` corresponding to the type of `ref`:
   * `0` if the reference is invalid
@@ -85,7 +85,7 @@ Return a `Cint` corresponding to the type of `ref`:
 """
 function reftype(ref::GitReference)
     ensure_initialized()
-    return ccall((:git_reference_type, :libgit2), Cint, (Ptr{Cvoid},), ref.ptr)
+    return ccall((:git_reference_type, libgit2), Cint, (Ptr{Cvoid},), ref)
 end
 
 """
@@ -100,7 +100,7 @@ function fullname(ref::GitReference)
     reftype(ref) == Consts.REF_OID && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        rname = ccall((:git_reference_symbolic_target, :libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        rname = ccall((:git_reference_symbolic_target, libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
         rname == C_NULL && return ""
         name = unsafe_string(rname)
     end
@@ -116,7 +116,7 @@ function name(ref::GitReference)
     isempty(ref) && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        name_ptr = ccall((:git_reference_name, :libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        name_ptr = ccall((:git_reference_name, libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
         name_ptr == C_NULL && return ""
         name = unsafe_string(name_ptr)
     end
@@ -128,7 +128,7 @@ function branch(ref::GitReference)
     ensure_initialized()
     str_ptr_ptr = Ref{Cstring}()
     GC.@preserve ref begin
-        @check ccall((:git_branch_name, :libgit2), Cint,
+        @check ccall((:git_branch_name, libgit2), Cint,
                       (Ptr{Cstring}, Ptr{Cvoid},), str_ptr_ptr, ref.ptr)
         str = unsafe_string(str_ptr_ptr[])
     end
@@ -138,32 +138,32 @@ end
 function ishead(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_branch_is_head, :libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+    err = ccall((:git_branch_is_head, libgit2), Cint,
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
 function isbranch(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_reference_is_branch, :libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+    err = ccall((:git_reference_is_branch, libgit2), Cint,
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
 function istag(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_reference_is_tag, :libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+    err = ccall((:git_reference_is_tag, libgit2), Cint,
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
 function isremote(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_reference_is_remote, :libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+    err = ccall((:git_reference_is_remote, libgit2), Cint,
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -199,23 +199,23 @@ then `ref` will be peeled until an object other than a [`GitTag`](@ref) is obtai
 function peel(::Type{T}, ref::GitReference) where T<:GitObject
     ensure_initialized()
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_peel, :libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), obj_ptr_ptr, ref.ptr, Consts.OBJECT(T))
+    @check ccall((:git_reference_peel, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), obj_ptr_ptr, ref, Consts.OBJECT(T))
     return T(ref.owner, obj_ptr_ptr[])
 end
 peel(ref::GitReference) = peel(GitObject, ref)
 
 """
-    LibGit2.ref_list(repo::GitRepo) -> Vector{String}
+    LibGit2.ref_list(repo::GitRepo)::Vector{String}
 
 Get a list of all reference names in the `repo` repository.
 """
 function ref_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_reference_list, :libgit2), Cint,
-                      (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_reference_list, libgit2), Cint,
+                      (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
+    res = collect(sa_ref[])
     free(sa_ref)
     res
 end
@@ -235,9 +235,9 @@ function create_branch(repo::GitRepo,
                        force::Bool=false)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_branch_create, :libgit2), Cint,
+    @check ccall((:git_branch_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Cint),
-                   ref_ptr_ptr, repo.ptr, bname, commit_obj.ptr, Cint(force))
+                   ref_ptr_ptr, repo, bname, commit_obj, Cint(force))
     return GitReference(repo, ref_ptr_ptr[])
 end
 
@@ -248,24 +248,24 @@ Delete the branch pointed to by `branch`.
 """
 function delete_branch(branch::GitReference)
     ensure_initialized()
-    @check ccall((:git_branch_delete, :libgit2), Cint, (Ptr{Cvoid},), branch.ptr)
+    @check ccall((:git_branch_delete, libgit2), Cint, (Ptr{Cvoid},), branch)
 end
 
 """
-    LibGit2.head!(repo::GitRepo, ref::GitReference) -> GitReference
+    LibGit2.head!(repo::GitRepo, ref::GitReference)::GitReference
 
 Set the HEAD of `repo` to the object pointed to by `ref`.
 """
 function head!(repo::GitRepo, ref::GitReference)
     ensure_initialized()
     ref_name = name(ref)
-    @check ccall((:git_repository_set_head, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring), repo.ptr, ref_name)
+    @check ccall((:git_repository_set_head, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring), repo, ref_name)
     return ref
 end
 
 """
-    lookup_branch(repo::GitRepo, branch_name::AbstractString, remote::Bool=false) -> Union{GitReference, Nothing}
+    lookup_branch(repo::GitRepo, branch_name::AbstractString, remote::Bool=false)::Union{GitReference, Nothing}
 
 Determine if the branch specified by `branch_name` exists in the repository `repo`.
 If `remote` is `true`, `repo` is assumed to be a remote git repository. Otherwise, it
@@ -280,9 +280,9 @@ function lookup_branch(repo::GitRepo,
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     branch_type = remote ? Consts.BRANCH_REMOTE : Consts.BRANCH_LOCAL
-    err = ccall((:git_branch_lookup, :libgit2), Cint,
+    err = ccall((:git_branch_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Cint),
-                  ref_ptr_ptr, repo.ptr, branch_name, branch_type)
+                  ref_ptr_ptr, repo, branch_name, branch_type)
     if err != Int(Error.GIT_OK)
         if err == Int(Error.ENOTFOUND)
             return nothing
@@ -296,7 +296,7 @@ function lookup_branch(repo::GitRepo,
 end
 
 """
-    upstream(ref::GitReference) -> Union{GitReference, Nothing}
+    upstream(ref::GitReference)::Union{GitReference, Nothing}
 
 Determine if the branch containing `ref` has a specified upstream branch.
 
@@ -307,8 +307,8 @@ function upstream(ref::GitReference)
     isempty(ref) && return nothing
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    err = ccall((:git_branch_upstream, :libgit2), Cint,
-                  (Ref{Ptr{Cvoid}}, Ptr{Cvoid},), ref_ptr_ptr, ref.ptr)
+    err = ccall((:git_branch_upstream, libgit2), Cint,
+                  (Ref{Ptr{Cvoid}}, Ptr{Cvoid},), ref_ptr_ptr, ref)
     if err != Int(Error.GIT_OK)
         if err == Int(Error.ENOTFOUND)
             return nothing
@@ -326,17 +326,17 @@ repository(ref::GitReference) = ref.owner
 function target!(ref::GitReference, new_oid::GitHash; msg::AbstractString="")
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_set_target, :libgit2), Cint,
+    @check ccall((:git_reference_set_target, libgit2), Cint,
              (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Cstring),
-             ref_ptr_ptr, ref.ptr, Ref(new_oid), isempty(msg) ? C_NULL : msg)
+             ref_ptr_ptr, ref, Ref(new_oid), isempty(msg) ? C_NULL : msg)
     return GitReference(ref.owner, ref_ptr_ptr[])
 end
 
 function GitBranchIter(repo::GitRepo, flags::Cint=Cint(Consts.BRANCH_LOCAL))
     ensure_initialized()
     bi_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_branch_iterator_new, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), bi_ptr, repo.ptr, flags)
+    @check ccall((:git_branch_iterator_new, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), bi_ptr, repo, flags)
     return GitBranchIter(repo, bi_ptr[])
 end
 
@@ -344,9 +344,9 @@ function Base.iterate(bi::GitBranchIter, state=nothing)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     btype = Ref{Cint}()
-    err = ccall((:git_branch_next, :libgit2), Cint,
+    err = ccall((:git_branch_next, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cint}, Ptr{Cvoid}),
-                  ref_ptr_ptr, btype, bi.ptr)
+                  ref_ptr_ptr, btype, bi)
     if err == Cint(Error.GIT_OK)
         return ((GitReference(bi.owner, ref_ptr_ptr[]), btype[]), nothing)
     elseif err == Cint(Error.ITEROVER)
diff --git a/stdlib/LibGit2/src/remote.jl b/stdlib/LibGit2/src/remote.jl
index 384a3b21bdbfa..9d4c13c4f4542 100644
--- a/stdlib/LibGit2/src/remote.jl
+++ b/stdlib/LibGit2/src/remote.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-    GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString) -> GitRemote
+    GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString)::GitRemote
 
 Look up a remote git repository using its name and URL. Uses the default fetch refspec.
 
@@ -14,14 +14,14 @@ remote = LibGit2.GitRemote(repo, "upstream", repo_url)
 function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_create, :libgit2), Cint,
+    @check ccall((:git_remote_create, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url)
+                rmt_ptr_ptr, repo, rmt_name, rmt_url)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
 """
-    GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString, fetch_spec::AbstractString) -> GitRemote
+    GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString, fetch_spec::AbstractString)::GitRemote
 
 Look up a remote git repository using the repository's name and URL,
 as well as specifications for how to fetch from the remote
@@ -37,14 +37,14 @@ remote = LibGit2.GitRemote(repo, "upstream", repo_url, refspec)
 function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString, fetch_spec::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_create_with_fetchspec, :libgit2), Cint,
+    @check ccall((:git_remote_create_with_fetchspec, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url, fetch_spec)
+                rmt_ptr_ptr, repo, rmt_name, rmt_url, fetch_spec)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
 """
-    GitRemoteAnon(repo::GitRepo, url::AbstractString) -> GitRemote
+    GitRemoteAnon(repo::GitRepo, url::AbstractString)::GitRemote
 
 Look up a remote git repository using only its URL, not its name.
 
@@ -57,14 +57,27 @@ remote = LibGit2.GitRemoteAnon(repo, repo_url)
 function GitRemoteAnon(repo::GitRepo, url::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_create_anonymous, :libgit2), Cint,
+    @check ccall((:git_remote_create_anonymous, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, url)
+                rmt_ptr_ptr, repo, url)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
 """
-    lookup_remote(repo::GitRepo, remote_name::AbstractString) -> Union{GitRemote, Nothing}
+    GitRemoteDetached(url::AbstractString)::GitRemote
+
+Create a remote without a connected local repo.
+"""
+function GitRemoteDetached(url::AbstractString)
+    ensure_initialized()
+    rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
+    @check ccall((:git_remote_create_detached, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Cstring), rmt_ptr_ptr, url)
+    return GitRemote(rmt_ptr_ptr[])
+end
+
+"""
+    lookup_remote(repo::GitRepo, remote_name::AbstractString)::Union{GitRemote, Nothing}
 
 Determine if the `remote_name` specified exists within the `repo`. Return
 either a [`GitRemote`](@ref) to the remote name if it exists, or [`nothing`](@ref)
@@ -80,9 +93,9 @@ LibGit2.lookup_remote(repo, remote_name) # will return nothing
 function lookup_remote(repo::GitRepo, remote_name::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    err = ccall((:git_remote_lookup, :libgit2), Cint,
+    err = ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, remote_name)
+                rmt_ptr_ptr, repo, remote_name)
     if err == Int(Error.GIT_OK)
         return GitRemote(repo, rmt_ptr_ptr[])
     elseif err == Int(Error.ENOTFOUND)
@@ -95,9 +108,9 @@ end
 function get(::Type{GitRemote}, repo::GitRepo, rmt_name::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_lookup, :libgit2), Cint,
+    @check ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name)
+                rmt_ptr_ptr, repo, rmt_name)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -120,9 +133,11 @@ julia> LibGit2.url(remote)
 """
 function url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_url, :libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    url_ptr == C_NULL && return ""
-    return unsafe_string(url_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        url_ptr = ccall((:git_remote_url, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        url_ptr == C_NULL && return ""
+        return unsafe_string(url_ptr)
+    end
 end
 
 """
@@ -144,9 +159,11 @@ julia> LibGit2.push_url(LibGit2.get(LibGit2.GitRemote, repo, "origin"))
 """
 function push_url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_pushurl, :libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    url_ptr == C_NULL && return ""
-    return unsafe_string(url_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        url_ptr = ccall((:git_remote_pushurl, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        url_ptr == C_NULL && return ""
+        return unsafe_string(url_ptr)
+    end
 end
 
 """
@@ -170,13 +187,15 @@ julia> name(remote)
 """
 function name(rmt::GitRemote)
     ensure_initialized()
-    name_ptr = ccall((:git_remote_name, :libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    name_ptr == C_NULL && return ""
-    return unsafe_string(name_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        name_ptr = ccall((:git_remote_name, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        name_ptr == C_NULL && return ""
+        return unsafe_string(name_ptr)
+    end
 end
 
 """
-    fetch_refspecs(rmt::GitRemote) -> Vector{String}
+    fetch_refspecs(rmt::GitRemote)::Vector{String}
 
 Get the *fetch* refspecs for the specified `rmt`. These refspecs contain
 information about which branch(es) to fetch from.
@@ -194,15 +213,15 @@ String["+refs/heads/*:refs/remotes/upstream/*"]
 function fetch_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_remote_get_fetch_refspecs, :libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_remote_get_fetch_refspecs, libgit2), Cint,
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt)
+    res = collect(sa_ref[])
     free(sa_ref)
     res
 end
 
 """
-    push_refspecs(rmt::GitRemote) -> Vector{String}
+    push_refspecs(rmt::GitRemote)::Vector{String}
 
 Get the *push* refspecs for the specified `rmt`. These refspecs contain
 information about which branch(es) to push to.
@@ -224,9 +243,9 @@ String["refs/heads/master"]
 function push_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_remote_get_push_refspecs, :libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_remote_get_push_refspecs, libgit2), Cint,
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt)
+    res = collect(sa_ref[])
     free(sa_ref)
     res
 end
@@ -247,8 +266,8 @@ String["+refs/heads/*:refs/remotes/upstream/*"]
 """
 function add_fetch!(repo::GitRepo, rmt::GitRemote, fetch_spec::String)
     ensure_initialized()
-    @check ccall((:git_remote_add_fetch, :libgit2), Cint,
-                 (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
+    @check ccall((:git_remote_add_fetch, libgit2), Cint,
+                 (Ptr{Cvoid}, Cstring, Cstring), repo,
                  name(rmt), fetch_spec)
 end
 
@@ -276,8 +295,8 @@ String["refs/heads/master"]
 """
 function add_push!(repo::GitRepo, rmt::GitRemote, push_spec::String)
     ensure_initialized()
-    @check ccall((:git_remote_add_push, :libgit2), Cint,
-                 (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
+    @check ccall((:git_remote_add_push, libgit2), Cint,
+                 (Ptr{Cvoid}, Cstring, Cstring), repo,
                  name(rmt), push_spec)
 end
 
@@ -296,9 +315,9 @@ function fetch(rmt::GitRemote, refspecs::Vector{<:AbstractString};
                msg::AbstractString="")
     ensure_initialized()
     msg = "libgit2.fetch: $msg"
-    @check ccall((:git_remote_fetch, :libgit2), Cint,
+    @check ccall((:git_remote_fetch, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{FetchOptions}, Cstring),
-                 rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options), msg)
+                 rmt, isempty(refspecs) ? C_NULL : refspecs, Ref(options), msg)
 end
 
 """
@@ -321,21 +340,21 @@ The keyword arguments are:
 function push(rmt::GitRemote, refspecs::Vector{<:AbstractString};
               force::Bool = false, options::PushOptions = PushOptions())
     ensure_initialized()
-    @check ccall((:git_remote_push, :libgit2), Cint,
+    @check ccall((:git_remote_push, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{PushOptions}),
-                 rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options))
+                 rmt, isempty(refspecs) ? C_NULL : refspecs, Ref(options))
 end
 
 """
-    remote_delete(repo::GitRepo, remote_name::AbstractString) -> Nothing
+    remote_delete(repo::GitRepo, remote_name::AbstractString) -> nothing
 
 Delete the `remote_name` from the git `repo`.
 """
 function remote_delete(repo::GitRepo, remote_name::AbstractString)
     ensure_initialized()
-    @check ccall((:git_remote_delete, :libgit2), Cint,
+    @check ccall((:git_remote_delete, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring),
-                 repo.ptr, remote_name)
+                 repo, remote_name)
 end
 
 Base.show(io::IO, rmt::GitRemote) = print(io, "GitRemote:\nRemote name: ", name(rmt), " url: ", url(rmt))
@@ -352,9 +371,9 @@ function set_remote_fetch_url end
 
 function set_remote_fetch_url(repo::GitRepo, remote_name::AbstractString, url::AbstractString)
     ensure_initialized()
-    @check ccall((:git_remote_set_url, :libgit2), Cint,
+    @check ccall((:git_remote_set_url, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
-                 repo.ptr, remote_name, url)
+                 repo, remote_name, url)
 end
 
 function set_remote_fetch_url(path::AbstractString, remote_name::AbstractString, url::AbstractString)
@@ -375,9 +394,9 @@ function set_remote_push_url end
 
 function set_remote_push_url(repo::GitRepo, remote_name::AbstractString, url::AbstractString)
     ensure_initialized()
-    @check ccall((:git_remote_set_pushurl, :libgit2), Cint,
+    @check ccall((:git_remote_set_pushurl, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
-                 repo.ptr, remote_name, url)
+                 repo, remote_name, url)
 end
 
 function set_remote_push_url(path::AbstractString, remote_name::AbstractString, url::AbstractString)
@@ -414,3 +433,65 @@ function set_remote_url(path::AbstractString, remote_name::AbstractString, url::
         set_remote_url(repo, remote_name, url)
     end
 end
+
+function connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION,
+                 callbacks::RemoteCallbacks)
+    @check ccall((:git_remote_connect, libgit2),
+                 Cint, (Ptr{Cvoid}, Cint, Ref{RemoteCallbacks}, Ptr{Cvoid}, Ptr{Cvoid}),
+                 rmt, direction, callbacks, C_NULL, C_NULL)
+    return rmt
+end
+
+"""
+    connected(rmt::GitRemote)
+
+Check whether the remote is connected
+"""
+function connected(rmt::GitRemote)
+    return ccall((:git_remote_connected, libgit2), Cint, (Ptr{Cvoid},), rmt) != 0
+end
+
+"""
+    disconnect(rmt::GitRemote)
+
+Close the connection to the remote.
+"""
+function disconnect(rmt::GitRemote)
+    @check ccall((:git_remote_disconnect, libgit2), Cint, (Ptr{Cvoid},), rmt)
+    return
+end
+
+"""
+    default_branch(rmt::GitRemote)
+
+Retrieve the name of the remote's default branch.
+
+This function must only be called after connecting (See [`connect`](@ref)).
+"""
+function default_branch(rmt::GitRemote)
+    buf_ref = Ref(Buffer())
+    @check ccall((:git_remote_default_branch, libgit2), Cint,
+                 (Ptr{Buffer}, Ptr{Cvoid}), buf_ref, rmt)
+    buf = buf_ref[]
+    str = unsafe_string(buf.ptr, buf.size)
+    free(buf_ref)
+    return str
+end
+
+"""
+    ls(rmt::GitRemote)::Vector{GitRemoteHead}
+
+Get the remote repository's reference advertisement list.
+
+This function must only be called after connecting (See [`connect`](@ref)).
+"""
+function ls(rmt::GitRemote)
+    nheads = Ref{Csize_t}()
+    head_refs = Ref{Ptr{Ptr{_GitRemoteHead}}}()
+    @check ccall((:git_remote_ls, libgit2), Cint,
+                 (Ptr{Ptr{Ptr{_GitRemoteHead}}}, Ptr{Csize_t}, Ptr{Cvoid}),
+                 head_refs, nheads, rmt)
+    head_ptr = head_refs[]
+    return [GitRemoteHead(unsafe_load(unsafe_load(head_ptr, i)))
+            for i in 1:nheads[]]
+end
diff --git a/stdlib/LibGit2/src/repository.jl b/stdlib/LibGit2/src/repository.jl
index 994d0a9f32875..97d414fc664e4 100644
--- a/stdlib/LibGit2/src/repository.jl
+++ b/stdlib/LibGit2/src/repository.jl
@@ -8,7 +8,7 @@ Open a git repository at `path`.
 function GitRepo(path::AbstractString)
     ensure_initialized()
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_open, :libgit2), Cint,
+    @check ccall((:git_repository_open, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring), repo_ptr_ptr, path)
     return GitRepo(repo_ptr_ptr[])
 end
@@ -23,7 +23,7 @@ function GitRepoExt(path::AbstractString, flags::Cuint = Cuint(Consts.REPOSITORY
     ensure_initialized()
     separator = @static Sys.iswindows() ? ";" : ":"
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_open_ext, :libgit2), Cint,
+    @check ccall((:git_repository_open_ext, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring, Cuint, Cstring),
                  repo_ptr_ptr, path, flags, separator)
     return GitRepo(repo_ptr_ptr[])
@@ -32,12 +32,12 @@ end
 function cleanup(r::GitRepo)
     if r.ptr != C_NULL
         ensure_initialized()
-        @check ccall((:git_repository__cleanup, :libgit2), Cint, (Ptr{Cvoid},), r.ptr)
+        @check ccall((:git_repository__cleanup, libgit2), Cint, (Ptr{Cvoid},), r)
     end
 end
 
 """
-    LibGit2.init(path::AbstractString, bare::Bool=false) -> GitRepo
+    LibGit2.init(path::AbstractString, bare::Bool=false)::GitRepo
 
 Open a new git repository at `path`. If `bare` is `false`,
 the working tree will be created in `path/.git`. If `bare`
@@ -46,13 +46,13 @@ is `true`, no working directory will be created.
 function init(path::AbstractString, bare::Bool=false)
     ensure_initialized()
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_init, :libgit2), Cint,
+    @check ccall((:git_repository_init, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Cstring, Cuint), repo_ptr_ptr, path, bare)
     return GitRepo(repo_ptr_ptr[])
 end
 
 """
-    LibGit2.head_oid(repo::GitRepo) -> GitHash
+    LibGit2.head_oid(repo::GitRepo)::GitHash
 
 Lookup the object id of the current HEAD of git
 repository `repo`.
@@ -85,7 +85,7 @@ function headname(repo::GitRepo)
 end
 
 """
-    isbare(repo::GitRepo) -> Bool
+    isbare(repo::GitRepo)::Bool
 
 Determine if `repo` is bare. Suppose the top level directory of `repo` is `DIR`.
 A non-bare repository is one in which the git directory (see [`gitdir`](@ref)) is
@@ -97,11 +97,11 @@ tree, and no tracking information for remote branches or configurations is prese
 function isbare(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return ccall((:git_repository_is_bare, :libgit2), Cint, (Ptr{Cvoid},), repo.ptr) == 1
+    return ccall((:git_repository_is_bare, libgit2), Cint, (Ptr{Cvoid},), repo) == 1
 end
 
 """
-    isattached(repo::GitRepo) -> Bool
+    isattached(repo::GitRepo)::Bool
 
 Determine if `repo` is detached - that is, whether its HEAD points to a commit
 (detached) or whether HEAD points to a branch tip (attached).
@@ -109,7 +109,28 @@ Determine if `repo` is detached - that is, whether its HEAD points to a commit
 function isattached(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    ccall((:git_repository_head_detached, :libgit2), Cint, (Ptr{Cvoid},), repo.ptr) != 1
+    ccall((:git_repository_head_detached, libgit2), Cint, (Ptr{Cvoid},), repo) != 1
+end
+
+"""
+    isshallow(repo::GitRepo)::Bool
+
+Determine if `repo` is a shallow clone. A shallow clone has a truncated history,
+created by cloning with a specific depth (e.g., `LibGit2.clone(url, path, depth=1)`).
+
+# Examples
+```julia
+shallow_repo = LibGit2.clone(url, "shallow_path", depth=1)
+LibGit2.isshallow(shallow_repo)  # returns true
+
+normal_repo = LibGit2.clone(url, "normal_path")
+LibGit2.isshallow(normal_repo)  # returns false
+```
+"""
+function isshallow(repo::GitRepo)
+    ensure_initialized()
+    @assert repo.ptr != C_NULL
+    ccall((:git_repository_is_shallow, libgit2), Cint, (Ptr{Cvoid},), repo) == 1
 end
 
 @doc """
@@ -139,14 +160,21 @@ function (::Type{T})(repo::GitRepo, spec::AbstractString) where T<:GitObject
     ensure_initialized()
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @assert repo.ptr != C_NULL
-    @check ccall((:git_revparse_single, :libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), obj_ptr_ptr, repo.ptr, spec)
+    @check ccall((:git_revparse_single, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), obj_ptr_ptr, repo, spec)
+    obj_ptr = obj_ptr_ptr[]
     # check object is of correct type
     if T != GitObject && T != GitUnknownObject
-        t = Consts.OBJECT(obj_ptr_ptr[])
-        t == Consts.OBJECT(T) || throw(GitError(Error.Object, Error.ERROR, "Expected object of type $T, received object of type $(objtype(t))"))
+        t = Consts.OBJECT(obj_ptr)
+        if t != Consts.OBJECT(T)
+            if obj_ptr != C_NULL
+                # free result
+                ccall((:git_object_free, libgit2), Cvoid, (Ptr{Cvoid},), obj_ptr)
+            end
+            throw(GitError(Error.Object, Error.ERROR, "Expected object of type $T, received object of type $(objtype(t))"))
+        end
     end
-    return T(repo, obj_ptr_ptr[])
+    return T(repo, obj_ptr)
 end
 
 function (::Type{T})(repo::GitRepo, oid::GitHash) where T<:GitObject
@@ -155,9 +183,9 @@ function (::Type{T})(repo::GitRepo, oid::GitHash) where T<:GitObject
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
     @assert repo.ptr != C_NULL
-    @check ccall((:git_object_lookup, :libgit2), Cint,
+    @check ccall((:git_object_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Consts.OBJECT),
-                 obj_ptr_ptr, repo.ptr, oid_ptr, Consts.OBJECT(T))
+                 obj_ptr_ptr, repo, oid_ptr, Consts.OBJECT(T))
 
     return T(repo, obj_ptr_ptr[])
 end
@@ -167,9 +195,9 @@ function (::Type{T})(repo::GitRepo, oid::GitShortHash) where T<:GitObject
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
     @assert repo.ptr != C_NULL
-    @check ccall((:git_object_lookup_prefix, :libgit2), Cint,
+    @check ccall((:git_object_lookup_prefix, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Csize_t, Consts.OBJECT),
-                 obj_ptr_ptr, repo.ptr, oid_ptr, oid.len, Consts.OBJECT(T))
+                 obj_ptr_ptr, repo, oid_ptr, oid.len, Consts.OBJECT(T))
 
     return T(repo, obj_ptr_ptr[])
 end
@@ -190,8 +218,10 @@ See also [`workdir`](@ref), [`path`](@ref).
 function gitdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return unsafe_string(ccall((:git_repository_path, :libgit2), Cstring,
-                        (Ptr{Cvoid},), repo.ptr))
+    GC.@preserve repo begin
+        return unsafe_string(ccall((:git_repository_path, libgit2), Cstring,
+                                   (Ptr{Cvoid},), repo))
+    end
 end
 
 """
@@ -211,10 +241,12 @@ See also [`gitdir`](@ref), [`path`](@ref).
 function workdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    sptr = ccall((:git_repository_workdir, :libgit2), Cstring,
-                (Ptr{Cvoid},), repo.ptr)
-    sptr == C_NULL && throw(GitError(Error.Object, Error.ERROR, "No working directory found."))
-    return unsafe_string(sptr)
+    GC.@preserve repo begin
+        sptr = ccall((:git_repository_workdir, libgit2), Cstring,
+                     (Ptr{Cvoid},), repo)
+        sptr == C_NULL && throw(GitError(Error.Object, Error.ERROR, "No working directory found."))
+        return unsafe_string(sptr)
+    end
 end
 
 """
@@ -255,8 +287,8 @@ function peel(::Type{T}, obj::GitObject) where T<:GitObject
     ensure_initialized()
     new_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
-    @check ccall((:git_object_peel, :libgit2), Cint,
-                (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), new_ptr_ptr, obj.ptr, Consts.OBJECT(T))
+    @check ccall((:git_object_peel, libgit2), Cint,
+                (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), new_ptr_ptr, obj, Consts.OBJECT(T))
 
     return T(obj.owner, new_ptr_ptr[])
 end
@@ -285,9 +317,9 @@ function GitDescribeResult(committish::GitObject;
                            options::DescribeOptions=DescribeOptions())
     ensure_initialized()
     result_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_describe_commit, :libgit2), Cint,
+    @check ccall((:git_describe_commit, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
-                 result_ptr_ptr, committish.ptr, Ref(options))
+                 result_ptr_ptr, committish, Ref(options))
     return GitDescribeResult(committish.owner, result_ptr_ptr[])
 end
 
@@ -312,14 +344,14 @@ function GitDescribeResult(repo::GitRepo; options::DescribeOptions=DescribeOptio
     ensure_initialized()
     result_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @assert repo.ptr != C_NULL
-    @check ccall((:git_describe_workdir, :libgit2), Cint,
+    @check ccall((:git_describe_workdir, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
-                 result_ptr_ptr, repo.ptr, Ref(options))
+                 result_ptr_ptr, repo, Ref(options))
     return GitDescribeResult(repo, result_ptr_ptr[])
 end
 
 """
-    LibGit2.format(result::GitDescribeResult; kwarg...) -> String
+    LibGit2.format(result::GitDescribeResult; kwarg...)::String
 
 Produce a formatted string based on a `GitDescribeResult`.
 Formatting options are controlled by the keyword argument:
@@ -329,9 +361,9 @@ Formatting options are controlled by the keyword argument:
 function format(result::GitDescribeResult; options::DescribeFormatOptions=DescribeFormatOptions())
     ensure_initialized()
     buf_ref = Ref(Buffer())
-    @check ccall((:git_describe_format, :libgit2), Cint,
+    @check ccall((:git_describe_format, libgit2), Cint,
                  (Ptr{Buffer}, Ptr{Cvoid}, Ptr{DescribeFormatOptions}),
-                 buf_ref, result.ptr, Ref(options))
+                 buf_ref, result, Ref(options))
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -355,9 +387,9 @@ function checkout_tree(repo::GitRepo, obj::GitObject;
                        options::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_checkout_tree, :libgit2), Cint,
+    @check ccall((:git_checkout_tree, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr, obj.ptr, Ref(options))
+                 repo, obj, Ref(options))
 end
 
 """
@@ -371,10 +403,10 @@ function checkout_index(repo::GitRepo, idx::Union{GitIndex, Nothing} = nothing;
                         options::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_checkout_index, :libgit2), Cint,
+    @check ccall((:git_checkout_index, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr,
-                 idx === nothing ? C_NULL : idx.ptr,
+                 repo,
+                 idx === nothing ? C_NULL : idx,
                  Ref(options))
 end
 
@@ -391,9 +423,9 @@ Update the index and working tree of `repo` to match the commit pointed to by HE
 function checkout_head(repo::GitRepo; options::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_checkout_head, :libgit2), Cint,
+    @check ccall((:git_checkout_head, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr, Ref(options))
+                 repo, Ref(options))
 end
 
 """
@@ -410,19 +442,19 @@ The keyword argument `options` sets checkout and merge options for the cherrypic
 function cherrypick(repo::GitRepo, commit::GitCommit; options::CherrypickOptions = CherrypickOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_cherrypick, :libgit2), Cint,
+    @check ccall((:git_cherrypick, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CherrypickOptions}),
-                 repo.ptr, commit.ptr, Ref(options))
+                 repo, commit, Ref(options))
 end
 
 """Updates some entries, determined by the `pathspecs`, in the index from the target commit tree."""
 function reset!(repo::GitRepo, obj::Union{GitObject, Nothing}, pathspecs::AbstractString...)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_reset_default, :libgit2), Cint,
+    @check ccall((:git_reset_default, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{StrArrayStruct}),
-                 repo.ptr,
-                 obj === nothing ? C_NULL : obj.ptr,
+                 repo,
+                 obj === nothing ? C_NULL : obj,
                  collect(pathspecs))
     return head_oid(repo)
 end
@@ -432,9 +464,9 @@ function reset!(repo::GitRepo, obj::GitObject, mode::Cint;
                checkout_opts::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_reset, :libgit2), Cint,
+    @check ccall((:git_reset, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Cint, Ptr{CheckoutOptions}),
-                  repo.ptr, obj.ptr, mode, Ref(checkout_opts))
+                  repo, obj, mode, Ref(checkout_opts))
     return head_oid(repo)
 end
 
@@ -456,14 +488,14 @@ function clone(repo_url::AbstractString, repo_path::AbstractString,
     ensure_initialized()
     clone_opts_ref = Ref(clone_opts)
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_clone, :libgit2), Cint,
+    @check ccall((:git_clone, libgit2), Cint,
             (Ptr{Ptr{Cvoid}}, Cstring, Cstring, Ref{CloneOptions}),
             repo_ptr_ptr, repo_url, repo_path, clone_opts_ref)
     return GitRepo(repo_ptr_ptr[])
 end
 
 """
-    fetchheads(repo::GitRepo) -> Vector{FetchHead}
+    fetchheads(repo::GitRepo)::Vector{FetchHead}
 
 Return the list of all the fetch heads for `repo`, each represented as a [`FetchHead`](@ref),
 including their names, URLs, and merge statuses.
@@ -490,9 +522,9 @@ function fetchheads(repo::GitRepo)
     fh = FetchHead[]
     ffcb = fetchhead_foreach_cb()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_repository_fetchhead_foreach, :libgit2), Cint,
+    @check ccall((:git_repository_fetchhead_foreach, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Any),
-                 repo.ptr, ffcb, fh)
+                 repo, ffcb, fh)
     return fh
 end
 
@@ -505,9 +537,9 @@ function remotes(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @assert repo.ptr != C_NULL
-    @check ccall((:git_remote_list, :libgit2), Cint,
-                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_remote_list, libgit2), Cint,
+                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
+    res = collect(sa_ref[])
     free(sa_ref)
     return res
 end
diff --git a/stdlib/LibGit2/src/signature.jl b/stdlib/LibGit2/src/signature.jl
index 9c13bc2256ef5..17013121db9ad 100644
--- a/stdlib/LibGit2/src/signature.jl
+++ b/stdlib/LibGit2/src/signature.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function Signature(ptr::Ptr{SignatureStruct})
+    @assert ptr != C_NULL
     sig   = unsafe_load(ptr)::SignatureStruct
     name  = unsafe_string(sig.name)
     email = unsafe_string(sig.email)
@@ -13,7 +14,7 @@ Signature(sig::GitSignature) = Signature(sig.ptr)
 function Signature(name::AbstractString, email::AbstractString)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
-    @check ccall((:git_signature_now, :libgit2), Cint,
+    @check ccall((:git_signature_now, libgit2), Cint,
                  (Ptr{Ptr{SignatureStruct}}, Cstring, Cstring), sig_ptr_ptr, name, email)
     sig = GitSignature(sig_ptr_ptr[])
     s = Signature(sig.ptr)
@@ -31,7 +32,7 @@ end
 function Base.convert(::Type{GitSignature}, sig::Signature)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
-    @check ccall((:git_signature_new, :libgit2), Cint,
+    @check ccall((:git_signature_new, libgit2), Cint,
                  (Ptr{Ptr{SignatureStruct}}, Cstring, Cstring, Int64, Cint),
                  sig_ptr_ptr, sig.name, sig.email, sig.time, sig.time_offset)
     return GitSignature(sig_ptr_ptr[])
@@ -66,7 +67,7 @@ end
 function default_signature(repo::GitRepo)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
-    @check ccall((:git_signature_default, :libgit2), Cint,
-                 (Ptr{Ptr{SignatureStruct}}, Ptr{Cvoid}), sig_ptr_ptr, repo.ptr)
+    @check ccall((:git_signature_default, libgit2), Cint,
+                 (Ptr{Ptr{SignatureStruct}}, Ptr{Cvoid}), sig_ptr_ptr, repo)
     return GitSignature(sig_ptr_ptr[])
 end
diff --git a/stdlib/LibGit2/src/status.jl b/stdlib/LibGit2/src/status.jl
index cd871681e4ae9..3eb11c391db67 100644
--- a/stdlib/LibGit2/src/status.jl
+++ b/stdlib/LibGit2/src/status.jl
@@ -12,23 +12,23 @@ submodules or not. See [`StatusOptions`](@ref) for more information.
 function GitStatus(repo::GitRepo; status_opts=StatusOptions())
     ensure_initialized()
     stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_status_list_new, :libgit2), Cint,
+    @check ccall((:git_status_list_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{StatusOptions}),
-                  stat_ptr_ptr, repo.ptr, Ref(status_opts))
+                  stat_ptr_ptr, repo, Ref(status_opts))
     return GitStatus(repo, stat_ptr_ptr[])
 end
 
 function Base.length(status::GitStatus)
     ensure_initialized()
-    return Int(ccall((:git_status_list_entrycount, :libgit2), Csize_t,
-                      (Ptr{Ptr{Cvoid}},), status.ptr))
+    return Int(ccall((:git_status_list_entrycount, libgit2), Csize_t,
+                         (Ptr{Cvoid},), status))
 end
 
 function Base.getindex(status::GitStatus, i::Integer)
     1 <= i <= length(status) || throw(BoundsError())
     ensure_initialized()
     GC.@preserve status begin
-        entry_ptr = ccall((:git_status_byindex, :libgit2),
+        entry_ptr = ccall((:git_status_byindex, libgit2),
                           Ptr{StatusEntry},
                           (Ptr{Cvoid}, Csize_t),
                           status.ptr, i-1)
@@ -39,7 +39,7 @@ function Base.getindex(status::GitStatus, i::Integer)
 end
 
 """
-    LibGit2.status(repo::GitRepo, path::String) -> Union{Cuint, Cvoid}
+    LibGit2.status(repo::GitRepo, path::String)::Union{Cuint, Cvoid}
 
 Lookup the status of the file at `path` in the git
 repository `repo`. For instance, this can be used
@@ -49,9 +49,9 @@ and needs to be staged and committed.
 function status(repo::GitRepo, path::String)
     ensure_initialized()
     status_ptr = Ref{Cuint}(0)
-    ret =  ccall((:git_status_file, :libgit2), Cint,
+    ret =  ccall((:git_status_file, libgit2), Cint,
                   (Ref{Cuint}, Ptr{Cvoid}, Cstring),
-                  status_ptr, repo.ptr, path)
+                  status_ptr, repo, path)
     (ret == Cint(Error.ENOTFOUND) || ret == Cint(Error.EAMBIGUOUS)) && return nothing
     return status_ptr[]
 end
diff --git a/stdlib/LibGit2/src/strarray.jl b/stdlib/LibGit2/src/strarray.jl
index db0803680f72b..78e38a9502128 100644
--- a/stdlib/LibGit2/src/strarray.jl
+++ b/stdlib/LibGit2/src/strarray.jl
@@ -1,6 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-
 function Base.cconvert(::Type{Ptr{StrArrayStruct}}, x::Vector)
     str_ref = Base.cconvert(Ref{Cstring}, x)
     sa_ref = Ref(StrArrayStruct(Base.unsafe_convert(Ref{Cstring}, str_ref), length(x)))
@@ -10,6 +9,8 @@ function Base.unsafe_convert(::Type{Ptr{StrArrayStruct}}, rr::Tuple{Ref{StrArray
     Base.unsafe_convert(Ptr{StrArrayStruct}, first(rr))
 end
 
-function Base.convert(::Type{Vector{String}}, sa::StrArrayStruct)
-    [unsafe_string(unsafe_load(sa.strings, i)) for i = 1:sa.count]
+Base.length(sa::StrArrayStruct) = sa.count
+function Base.iterate(sa::StrArrayStruct, state=1)
+    state > sa.count && return nothing
+    (unsafe_string(unsafe_load(sa.strings, state)), state+1)
 end
diff --git a/stdlib/LibGit2/src/tag.jl b/stdlib/LibGit2/src/tag.jl
index 4209a4e2f917d..d703acbc7c2b7 100644
--- a/stdlib/LibGit2/src/tag.jl
+++ b/stdlib/LibGit2/src/tag.jl
@@ -1,16 +1,16 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-    LibGit2.tag_list(repo::GitRepo) -> Vector{String}
+    LibGit2.tag_list(repo::GitRepo)::Vector{String}
 
 Get a list of all tags in the git repository `repo`.
 """
 function tag_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_tag_list, :libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_tag_list, libgit2), Cint,
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
+    res = collect(sa_ref[])
     free(sa_ref)
     res
 end
@@ -22,8 +22,8 @@ Remove the git tag `tag` from the repository `repo`.
 """
 function tag_delete(repo::GitRepo, tag::AbstractString)
     ensure_initialized()
-    @check ccall((:git_tag_delete, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring), repo.ptr, tag)
+    @check ccall((:git_tag_delete, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring), repo, tag)
 end
 
 """
@@ -46,9 +46,9 @@ function tag_create(repo::GitRepo, tag::AbstractString, commit::Union{AbstractSt
         commit_obj === nothing && return oid_ptr[] # return empty oid
         with(convert(GitSignature, sig)) do git_sig
             ensure_initialized()
-            @check ccall((:git_tag_create, :libgit2), Cint,
+            @check ccall((:git_tag_create, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Ptr{SignatureStruct}, Cstring, Cint),
-                  oid_ptr, repo.ptr, tag, commit_obj.ptr, git_sig.ptr, msg, Cint(force))
+                  oid_ptr, repo, tag, commit_obj, git_sig, msg, Cint(force))
         end
     end
     return oid_ptr[]
@@ -62,7 +62,7 @@ The name of `tag` (e.g. `"v0.5"`).
 function name(tag::GitTag)
     ensure_initialized()
     GC.@preserve tag begin
-        str_ptr = ccall((:git_tag_name, :libgit2), Cstring, (Ptr{Cvoid},), tag.ptr)
+        str_ptr = ccall((:git_tag_name, libgit2), Cstring, (Ptr{Cvoid},), tag.ptr)
         str_ptr == C_NULL && throw(Error.GitError(Error.ERROR))
         str = unsafe_string(str_ptr)
     end
@@ -78,7 +78,7 @@ The `GitHash` of the target object of `tag`.
 function target(tag::GitTag)
     ensure_initialized()
     GC.@preserve tag begin
-        oid_ptr = ccall((:git_tag_target_id, :libgit2), Ptr{GitHash}, (Ptr{Cvoid},), tag.ptr)
+        oid_ptr = ccall((:git_tag_target_id, libgit2), Ptr{GitHash}, (Ptr{Cvoid},), tag.ptr)
         oid_ptr == C_NULL && throw(Error.GitError(Error.ERROR))
         str = unsafe_load(oid_ptr)
     end
diff --git a/stdlib/LibGit2/src/tree.jl b/stdlib/LibGit2/src/tree.jl
index 1ef8a2eb75003..9173653e26f33 100644
--- a/stdlib/LibGit2/src/tree.jl
+++ b/stdlib/LibGit2/src/tree.jl
@@ -2,10 +2,32 @@
 
 function GitTree(c::GitCommit)
     tree_out = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_commit_tree, :libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), tree_out, c)
+    @check ccall((:git_commit_tree, libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), tree_out, c)
     GitTree(repository(c), tree_out[])
 end
 
+"""
+    GitTree(repo::GitRepo, tree_oid::GitHash)
+
+Look up a tree object in the repository using its GitHash.
+This constructor wraps the libgit2 git_tree_lookup function.
+
+# Examples
+```julia
+tree_hash = LibGit2.GitHash(repo, "HEAD^{tree}")
+tree = LibGit2.GitTree(repo, tree_hash)
+```
+"""
+function GitTree(repo::GitRepo, tree_oid::GitHash)
+    ensure_initialized()
+    tree_out = Ref{Ptr{Cvoid}}(C_NULL)
+    oid_ptr = Ref(tree_oid)
+    @check ccall((:git_tree_lookup, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}),
+                 tree_out, repo, oid_ptr)
+    return GitTree(repo, tree_out[])
+end
+
 """
     treewalk(f, tree::GitTree, post::Bool=false)
 
@@ -35,9 +57,9 @@ function treewalk(f, tree::GitTree, post::Bool = false)
             entry = GitTreeEntry(tree, entry_ptr, false)
             return f(root, entry)
         end, Cint, (Cstring, Ptr{Cvoid}, Ref{Vector{Any}}))
-    err = ccall((:git_tree_walk, :libgit2), Cint,
+    err = ccall((:git_tree_walk, libgit2), Cint,
                 (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Any),
-                tree.ptr, post, cbf, payload)
+                tree, post, cbf, payload)
     if err < 0
         err_class, _ = Error.last_error()
         if err_class != Error.Callback
@@ -58,19 +80,21 @@ Return the filename of the object on disk to which `te` refers.
 """
 function filename(te::GitTreeEntry)
     ensure_initialized()
-    str = ccall((:git_tree_entry_name, :libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
-    str != C_NULL && return unsafe_string(str)
+    GC.@preserve te begin
+        str = ccall((:git_tree_entry_name, libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
+        str != C_NULL && return unsafe_string(str)
+    end
     return nothing
 end
 
 """
-    filemode(te::GitTreeEntry) -> Cint
+    filemode(te::GitTreeEntry)::Cint
 
 Return the UNIX filemode of the object on disk to which `te` refers as an integer.
 """
 function filemode(te::GitTreeEntry)
     ensure_initialized()
-    return ccall((:git_tree_entry_filemode, :libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    return ccall((:git_tree_entry_filemode, libgit2), Cint, (Ptr{Cvoid},), te)
 end
 
 """
@@ -81,7 +105,7 @@ one of the types which [`objtype`](@ref) returns, e.g. a `GitTree` or `GitBlob`.
 """
 function entrytype(te::GitTreeEntry)
     ensure_initialized()
-    otype = ccall((:git_tree_entry_type, :libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    otype = ccall((:git_tree_entry_type, libgit2), Cint, (Ptr{Cvoid},), te)
     return objtype(Consts.OBJECT(otype))
 end
 
@@ -93,7 +117,7 @@ Return the [`GitHash`](@ref) of the object to which `te` refers.
 function entryid(te::GitTreeEntry)
     ensure_initialized()
     GC.@preserve te begin
-        oid_ptr = ccall((:git_tree_entry_id, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), te.ptr)
+        oid_ptr = ccall((:git_tree_entry_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), te.ptr)
         oid = GitHash(oid_ptr)
     end
     return oid
@@ -101,7 +125,7 @@ end
 
 function count(tree::GitTree)
     ensure_initialized()
-    return ccall((:git_tree_entrycount, :libgit2), Csize_t, (Ptr{Cvoid},), tree.ptr)
+    return ccall((:git_tree_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), tree)
 end
 
 function Base.getindex(tree::GitTree, i::Integer)
@@ -109,9 +133,9 @@ function Base.getindex(tree::GitTree, i::Integer)
         throw(BoundsError(tree, i))
     end
     ensure_initialized()
-    te_ptr = ccall((:git_tree_entry_byindex, :libgit2),
+    te_ptr = ccall((:git_tree_entry_byindex, libgit2),
                    Ptr{Cvoid},
-                   (Ptr{Cvoid}, Csize_t), tree.ptr, i-1)
+                   (Ptr{Cvoid}, Csize_t), tree, i-1)
     return GitTreeEntry(tree, te_ptr, false)
 end
 
@@ -133,7 +157,7 @@ function (::Type{T})(te::GitTreeEntry) where T<:GitObject
     ensure_initialized()
     repo = repository(te)
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_tree_entry_to_object, :libgit2), Cint,
+    @check ccall((:git_tree_entry_to_object, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}),
                    obj_ptr_ptr, repo, te)
     return T(repo, obj_ptr_ptr[])
@@ -162,7 +186,7 @@ function _getindex(tree::GitTree, target::AbstractString)
     end
 
     entry = Ref{Ptr{Cvoid}}(C_NULL)
-    err = ccall((:git_tree_entry_bypath, :libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), entry, tree, target)
+    err = ccall((:git_tree_entry_bypath, libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), entry, tree, target)
     err == Int(Error.ENOTFOUND) && return nothing
     err < 0 && throw(Error.GitError(err))
     entry = GitTreeEntry(tree, entry[], true #= N.B.: Most other lookups need false here =#)
@@ -170,7 +194,7 @@ function _getindex(tree::GitTree, target::AbstractString)
 end
 
 """
-    getindex(tree::GitTree, target::AbstractString) -> GitObject
+    getindex(tree::GitTree, target::AbstractString)::GitObject
 
 Look up `target` path in the `tree`, returning a [`GitObject`](@ref) (a [`GitBlob`](@ref) in
 the case of a file, or another [`GitTree`](@ref) if looking up a directory).
@@ -192,3 +216,35 @@ end
 function Base.haskey(tree::GitTree, target::AbstractString)
     return _getindex(tree, target) !== nothing
 end
+
+"""
+    apply_to_tree(repo::GitRepo, preimage::GitTree, diff::GitDiff, options::ApplyOptions=ApplyOptions())
+
+Apply a [`GitDiff`](@ref) to a [`GitTree`](@ref), returning the resulting index.
+The `preimage` is the tree to which the diff will be applied. The `diff` should be
+generated from the `preimage` to some other tree.
+
+The returned [`GitIndex`](@ref) contains the result of applying the diff and can be
+written as a tree using [`write_tree_to!`](@ref).
+
+This is equivalent to [`git_apply_to_tree`](https://libgit2.org/libgit2/#HEAD/group/apply/git_apply_to_tree).
+
+# Examples
+```julia
+repo = LibGit2.GitRepo(repo_path)
+tree1 = LibGit2.GitTree(repo, "HEAD^{tree}")
+tree2 = LibGit2.GitTree(repo, "HEAD~1^{tree}")
+diff = LibGit2.diff_tree(repo, tree1, tree2)
+result_index = LibGit2.apply_to_tree(repo, tree1, diff)
+tree_oid = LibGit2.write_tree_to!(repo, result_index)
+```
+"""
+function apply_to_tree(repo::GitRepo, preimage::GitTree, diff::GitDiff, options::ApplyOptions=ApplyOptions())
+    ensure_initialized()
+    out_index_ptr = Ref{Ptr{Cvoid}}(C_NULL)
+    opts_ptr = Ref(options)
+    @check ccall((:git_apply_to_tree, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{ApplyOptions}),
+                 out_index_ptr, repo, preimage, diff, opts_ptr)
+    return GitIndex(repo, out_index_ptr[])
+end
diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl
index 0b653f9b6ad21..ad8c54f16d16c 100644
--- a/stdlib/LibGit2/src/types.jl
+++ b/stdlib/LibGit2/src/types.jl
@@ -2,7 +2,7 @@
 
 using Base: something
 import Base.@kwdef
-import .Consts: GIT_SUBMODULE_IGNORE, GIT_MERGE_FILE_FAVOR, GIT_MERGE_FILE, GIT_CONFIG
+import .Consts: GIT_SUBMODULE_IGNORE, GIT_MERGE_FILE_FAVOR, GIT_MERGE_FILE, GIT_CONFIG, GIT_OID_TYPE
 
 const OID_RAWSZ = 20
 const OID_HEXSZ = OID_RAWSZ * 2
@@ -78,7 +78,7 @@ When fetching data from LibGit2, a typical usage would look like:
 ```julia
 sa_ref = Ref(StrArrayStruct())
 @check ccall(..., (Ptr{StrArrayStruct},), sa_ref)
-res = convert(Vector{String}, sa_ref[])
+res = collect(sa_ref[])
 free(sa_ref)
 ```
 In particular, note that `LibGit2.free` should be called afterward on the `Ref` object.
@@ -99,7 +99,7 @@ StrArrayStruct() = StrArrayStruct(C_NULL, 0)
 
 function free(sa_ref::Base.Ref{StrArrayStruct})
     ensure_initialized()
-    ccall((:git_strarray_free, :libgit2), Cvoid, (Ptr{StrArrayStruct},), sa_ref)
+    ccall((:git_strarray_free, libgit2), Cvoid, (Ptr{StrArrayStruct},), sa_ref)
 end
 
 """
@@ -126,7 +126,7 @@ Buffer() = Buffer(C_NULL, 0, 0)
 
 function free(buf_ref::Base.Ref{Buffer})
     ensure_initialized()
-    ccall((:git_buf_free, :libgit2), Cvoid, (Ptr{Buffer},), buf_ref)
+    ccall((:git_buf_free, libgit2), Cvoid, (Ptr{Buffer},), buf_ref)
 end
 
 """
@@ -237,6 +237,9 @@ Matches the [`git_remote_callbacks`](https://libgit2.org/libgit2/#HEAD/type/git_
     @static if LibGit2.VERSION >= v"0.99.0"
         resolve_url::Ptr{Cvoid}        = C_NULL
     end
+    @static if LibGit2.VERSION >= v"1.9.0"
+        update_refs::Ptr{Cvoid}        = C_NULL
+    end
 end
 @assert Base.allocatedinline(RemoteCallbacks)
 
@@ -346,6 +349,9 @@ The fields represent:
     @static if LibGit2.VERSION >= v"0.25.0"
         proxy_opts::ProxyOptions       = ProxyOptions()
     end
+    @static if LibGit2.VERSION >= v"1.7.0"
+        depth::Cuint                   = Cuint(Consts.FETCH_DEPTH_FULL)
+    end
     @static if LibGit2.VERSION >= v"1.4.0"
         follow_redirects::Cuint        = Cuint(0)
     end
@@ -439,6 +445,9 @@ The fields represent:
     # options controlling how the diff text is generated
     context_lines::UInt32                    = UInt32(3)
     interhunk_lines::UInt32                  = UInt32(0)
+    @static if LibGit2.VERSION >= v"1.7.0"
+        oid_type::GIT_OID_TYPE               = Consts.OID_DEFAULT
+    end
     id_abbrev::UInt16                        = UInt16(7)
     max_size::Int64                          = Int64(512*1024*1024) #512Mb
     old_prefix::Cstring                      = Cstring(C_NULL)
@@ -672,6 +681,8 @@ The fields represent:
      for more information.
   * `custom_headers`: only relevant if the LibGit2 version is greater than or equal to `0.24.0`.
      Extra headers needed for the push operation.
+  * `remote_push_options`: only relevant if the LibGit2 version is greater than or equal to `1.8.0`.
+     "Push options" to deliver to the remote.
 """
 @kwdef struct PushOptions
     version::Cuint                     = Cuint(1)
@@ -686,6 +697,9 @@ The fields represent:
     @static if LibGit2.VERSION >= v"0.24.0"
         custom_headers::StrArrayStruct = StrArrayStruct()
     end
+    @static if LibGit2.VERSION >= v"1.8.0"
+        remote_push_options::StrArrayStruct = StrArrayStruct()
+    end
 end
 @assert Base.allocatedinline(PushOptions)
 
@@ -845,6 +859,28 @@ The fields represent:
 end
 @assert Base.allocatedinline(StatusOptions)
 
+"""
+    LibGit2.ApplyOptions
+
+Options for applying a diff.
+Matches the [`git_apply_options`](https://libgit2.org/libgit2/#HEAD/type/git_apply_options) struct.
+
+The fields represent:
+  * `version`: version of the struct in use, in case this changes later. For now, always `1`.
+  * `delta_cb`: optional callback that will be made before each delta is applied.
+  * `hunk_cb`: optional callback that will be made before each hunk is applied.
+  * `payload`: the payload for the callback functions.
+  * `flags`: flags controlling how the apply is performed (e.g., check mode).
+"""
+@kwdef struct ApplyOptions
+    version::Cuint           = Cuint(1)
+    delta_cb::Ptr{Cvoid}     = C_NULL
+    hunk_cb::Ptr{Cvoid}      = C_NULL
+    payload::Any             = nothing
+    flags::Cuint             = Cuint(0)
+end
+@assert Base.allocatedinline(ApplyOptions)
+
 """
     LibGit2.StatusEntry
 
@@ -907,10 +943,19 @@ Matches the [`git_config_entry`](https://libgit2.org/libgit2/#HEAD/type/git_conf
 struct ConfigEntry
     name::Cstring
     value::Cstring
+    @static if LibGit2.VERSION >= v"1.8.0"
+        backend_type::Cstring
+        origin_path::Cstring
+    end
     include_depth::Cuint
     level::GIT_CONFIG
-    free::Ptr{Cvoid}
-    payload::Ptr{Cvoid} # User is not permitted to read or write this field
+    @static if LibGit2.VERSION < v"1.9.0"
+        free::Ptr{Cvoid}
+    end
+    @static if LibGit2.VERSION < v"1.8.0"
+        # In 1.8.0, the unused payload value has been removed
+        payload::Ptr{Cvoid}
+    end
 end
 @assert Base.allocatedinline(ConfigEntry)
 
@@ -919,7 +964,18 @@ function Base.show(io::IO, ce::ConfigEntry)
 end
 
 """
-    LibGit2.split_cfg_entry(ce::LibGit2.ConfigEntry) -> Tuple{String,String,String,String}
+    LibGit2.ConfigBackendEntry
+
+Matches the [`git_config_backend_entry`](https://libgit2.org/libgit2/#HEAD/type/git_config_backend_entry) struct.
+"""
+struct ConfigBackendEntry
+    entry::ConfigEntry
+    free::Ptr{Cvoid}
+end
+@assert Base.allocatedinline(ConfigBackendEntry)
+
+"""
+    LibGit2.split_cfg_entry(ce::LibGit2.ConfigEntry)::Tuple{String,String,String,String}
 
 Break the `ConfigEntry` up to the following pieces: section, subsection, name, and value.
 
@@ -1004,12 +1060,12 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
     (:GitRepo,           nothing,                 :AbstractGitObject, :git_repository),
     (:GitConfig,         :(Union{GitRepo, Nothing}), :AbstractGitObject, :git_config),
     (:GitIndex,          :(Union{GitRepo, Nothing}), :AbstractGitObject, :git_index),
-    (:GitRemote,         :GitRepo,                :AbstractGitObject, :git_remote),
+    (:GitRemote,         :(Union{GitRepo, Nothing}), :AbstractGitObject, :git_remote),
     (:GitRevWalker,      :GitRepo,                :AbstractGitObject, :git_revwalk),
     (:GitReference,      :GitRepo,                :AbstractGitObject, :git_reference),
     (:GitDescribeResult, :GitRepo,                :AbstractGitObject, :git_describe_result),
-    (:GitDiff,           :GitRepo,                :AbstractGitObject, :git_diff),
-    (:GitDiffStats,      :GitRepo,                :AbstractGitObject, :git_diff_stats),
+    (:GitDiff,           nothing,                 :AbstractGitObject, :git_diff),
+    (:GitDiffStats,      nothing,                 :AbstractGitObject, :git_diff_stats),
     (:GitAnnotated,      :GitRepo,                :AbstractGitObject, :git_annotated_commit),
     (:GitRebase,         :GitRepo,                :AbstractGitObject, :git_rebase),
     (:GitBlame,          :GitRepo,                :AbstractGitObject, :git_blame),
@@ -1039,7 +1095,6 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
                 return obj
             end
         end
-        @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, x::$typ) = x.ptr
     else
         @eval mutable struct $typ <: $sup
             owner::$owntyp
@@ -1054,21 +1109,21 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
                 return obj
             end
         end
-        @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, x::$typ) = x.ptr
         if isa(owntyp, Expr) && owntyp.args[1] === :Union && owntyp.args[3] === :Nothing
             @eval begin
                 $typ(ptr::Ptr{Cvoid}, fin::Bool=true) = $typ(nothing, ptr, fin)
             end
         end
     end
+    @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, obj::$typ) = obj.ptr
     @eval function Base.close(obj::$typ)
         if obj.ptr != C_NULL
             ensure_initialized()
-            ccall(($(string(cname, :_free)), :libgit2), Cvoid, (Ptr{Cvoid},), obj.ptr)
+            ccall(($(string(cname, :_free)), libgit2), Cvoid, (Ptr{Cvoid},), obj)
             obj.ptr = C_NULL
             if Threads.atomic_sub!(REFCOUNT, 1) == 1
                 # will the last finalizer please turn out the lights?
-                ccall((:git_libgit2_shutdown, :libgit2), Cint, ())
+                ccall((:git_libgit2_shutdown, libgit2), Cint, ())
             end
         end
     end
@@ -1098,10 +1153,11 @@ end
 function Base.close(obj::GitSignature)
     if obj.ptr != C_NULL
         ensure_initialized()
-        ccall((:git_signature_free, :libgit2), Cvoid, (Ptr{SignatureStruct},), obj.ptr)
+        ccall((:git_signature_free, libgit2), Cvoid, (Ptr{SignatureStruct},), obj)
         obj.ptr = C_NULL
     end
 end
+Base.unsafe_convert(::Type{Ptr{SignatureStruct}}, obj::GitSignature) = obj.ptr
 
 # Structure has the same layout as SignatureStruct
 mutable struct Signature
@@ -1120,15 +1176,20 @@ The fields represent:
     * `final_commit_id`: the [`GitHash`](@ref) of the commit where this section was last changed.
     * `final_start_line_number`: the *one based* line number in the file where the
        hunk starts, in the *final* version of the file.
-    * `final_signature`: the signature of the person who last modified this hunk. You will
+    * `final_signature`: the signature of the author of `final_commit_id`. You will
+       need to pass this to `Signature` to access its fields.
+    * `final_committer`: the signature of the committer of `final_commit_id`. You will
        need to pass this to `Signature` to access its fields.
     * `orig_commit_id`: the [`GitHash`](@ref) of the commit where this hunk was first found.
     * `orig_path`: the path to the file where the hunk originated. This may be different
        than the current/final path, for instance if the file has been moved.
     * `orig_start_line_number`: the *one based* line number in the file where the
        hunk starts, in the *original* version of the file at `orig_path`.
-    * `orig_signature`: the signature of the person who introduced this hunk. You will
+    * `orig_signature`: the signature of the author who introduced this hunk. You will
        need to pass this to `Signature` to access its fields.
+    * `orig_committer`: the signature of the committer who introduced this hunk. You will
+       need to pass this to `Signature` to access its fields.
+    * `summary`: a string summary.
     * `boundary`: `'1'` if the original commit is a "boundary" commit (for instance, if it's
        equal to an oldest commit set in `options`).
 """
@@ -1138,12 +1199,21 @@ The fields represent:
     final_commit_id::GitHash              = GitHash()
     final_start_line_number::Csize_t      = Csize_t(0)
     final_signature::Ptr{SignatureStruct} = Ptr{SignatureStruct}(C_NULL)
+    @static if LibGit2.VERSION >= v"1.9.0"
+        final_committer::Ptr{SignatureStruct} = Ptr{SignatureStruct}(C_NULL)
+    end
 
     orig_commit_id::GitHash               = GitHash()
     orig_path::Cstring                    = Cstring(C_NULL)
     orig_start_line_number::Csize_t       = Csize_t(0)
     orig_signature::Ptr{SignatureStruct}  = Ptr{SignatureStruct}(C_NULL)
+    @static if LibGit2.VERSION >= v"1.9.0"
+        orig_committer::Ptr{SignatureStruct}  = Ptr{SignatureStruct}(C_NULL)
+    end
 
+    @static if LibGit2.VERSION >= v"1.9.0"
+        summary::Cstring                      = Cstring(C_NULL)
+    end
     boundary::Char                        = '\0'
 end
 @assert Base.allocatedinline(BlameHunk)
@@ -1197,7 +1267,7 @@ Consts.OBJECT(::Type{GitObject})        = Consts.OBJ_ANY
 
 function Consts.OBJECT(ptr::Ptr{Cvoid})
     ensure_initialized()
-    ccall((:git_object_type, :libgit2), Consts.OBJECT, (Ptr{Cvoid},), ptr)
+    ccall((:git_object_type, libgit2), Consts.OBJECT, (Ptr{Cvoid},), ptr)
 end
 
 """
@@ -1224,7 +1294,7 @@ end
 abstract type AbstractCredential end
 
 """
-    isfilled(cred::AbstractCredential) -> Bool
+    isfilled(cred::AbstractCredential)::Bool
 
 Verifies that a credential is ready for use in authentication.
 """
@@ -1396,7 +1466,7 @@ function Base.shred!(p::CredentialPayload)
 end
 
 """
-    reset!(payload, [config]) -> CredentialPayload
+    reset!(payload, [config])::CredentialPayload
 
 Reset the `payload` state back to the initial values so that it can be used again within
 the credential callback. If a `config` is provided the configuration will also be updated.
@@ -1418,7 +1488,7 @@ function reset!(p::CredentialPayload, config::GitConfig=p.config)
 end
 
 """
-    approve(payload::CredentialPayload; shred::Bool=true) -> Nothing
+    approve(payload::CredentialPayload; shred::Bool=true) -> nothing
 
 Store the `payload` credential for re-use in a future authentication. Should only be called
 when authentication was successful.
@@ -1449,7 +1519,7 @@ function approve(p::CredentialPayload; shred::Bool=true)
 end
 
 """
-    reject(payload::CredentialPayload; shred::Bool=true) -> Nothing
+    reject(payload::CredentialPayload; shred::Bool=true) -> nothing
 
 Discard the `payload` credential from begin re-used in future authentication. Should only be
 called when authentication was unsuccessful.
@@ -1480,3 +1550,26 @@ end
 
 # Useful for functions which can handle various kinds of credentials
 const Creds = Union{CredentialPayload, AbstractCredential, CachedCredentials, Nothing}
+
+struct _GitRemoteHead
+    available_local::Cint
+    oid::GitHash
+    loid::GitHash
+    name::Cstring
+    symref_target::Cstring
+end
+
+struct GitRemoteHead
+    available_local::Bool
+    oid::GitHash
+    loid::GitHash
+    name::String
+    symref_target::Union{Nothing,String}
+    function GitRemoteHead(head::_GitRemoteHead)
+        name = unsafe_string(head.name)
+        symref_target = (head.symref_target != C_NULL ?
+            unsafe_string(head.symref_target) : nothing)
+        return new(head.available_local != 0,
+                   head.oid, head.loid, name, symref_target)
+    end
+end
diff --git a/stdlib/LibGit2/src/utils.jl b/stdlib/LibGit2/src/utils.jl
index 5234e9b6fc291..6fc4acbd6bf77 100644
--- a/stdlib/LibGit2/src/utils.jl
+++ b/stdlib/LibGit2/src/utils.jl
@@ -29,7 +29,7 @@ $
 """x
 
 """
-    version() -> VersionNumber
+    version()::VersionNumber
 
 Return the version of libgit2 in use, as a [`VersionNumber`](@ref man-version-number-literals).
 """
@@ -37,7 +37,7 @@ function version()
     major = Ref{Cint}(0)
     minor = Ref{Cint}(0)
     patch = Ref{Cint}(0)
-    @check ccall((:git_libgit2_version, :libgit2), Cint,
+    @check ccall((:git_libgit2_version, libgit2), Cint,
                  (Ref{Cint}, Ref{Cint}, Ref{Cint}), major, minor, patch)
     return VersionNumber(major[], minor[], patch[])
 end
@@ -72,7 +72,7 @@ Return a list of git features the current version of libgit2 supports, such as
 threading or using HTTPS or SSH.
 """
 function features()
-    feat = ccall((:git_libgit2_features, :libgit2), Cint, ())
+    feat = ccall((:git_libgit2_features, libgit2), Cint, ())
     res = Consts.GIT_FEATURE[]
     for f in instances(Consts.GIT_FEATURE)
         isset(feat, Cuint(f)) && Base.push!(res, f)
@@ -93,7 +93,7 @@ elseif Sys.isunix()
 end
 
 """
-    LibGit2.git_url(; kwargs...) -> String
+    LibGit2.git_url(; kwargs...)::String
 
 Create a string based upon the URL components provided. When the `scheme` keyword is not
 provided the URL produced will use the alternative [scp-like syntax](https://git-scm.com/docs/git-clone#_git_urls_a_id_urls_a).
@@ -162,7 +162,7 @@ function git_url(;
     end
     seekstart(io)
 
-    return String(take!(io))
+    return takestring!(io)
 end
 
 function credential_identifier(scheme::AbstractString, host::AbstractString)
diff --git a/stdlib/LibGit2/src/walker.jl b/stdlib/LibGit2/src/walker.jl
index 468e6899a7aa8..239009a014c1e 100644
--- a/stdlib/LibGit2/src/walker.jl
+++ b/stdlib/LibGit2/src/walker.jl
@@ -21,16 +21,16 @@ Since the `GitHash` is unique to a commit, `cnt` will be `1`.
 function GitRevWalker(repo::GitRepo)
     ensure_initialized()
     w_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_revwalk_new, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), w_ptr, repo.ptr)
+    @check ccall((:git_revwalk_new, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), w_ptr, repo)
     return GitRevWalker(repo, w_ptr[])
 end
 
 function Base.iterate(w::GitRevWalker, state=nothing)
     ensure_initialized()
     id_ptr = Ref(GitHash())
-    err = ccall((:git_revwalk_next, :libgit2), Cint,
-                (Ptr{GitHash}, Ptr{Cvoid}), id_ptr, w.ptr)
+    err = ccall((:git_revwalk_next, libgit2), Cint,
+                (Ptr{GitHash}, Ptr{Cvoid}), id_ptr, w)
     if err == Cint(Error.GIT_OK)
         return (id_ptr[], nothing)
     elseif err == Cint(Error.ITEROVER)
@@ -51,7 +51,7 @@ during the walk.
 """
 function push_head!(w::GitRevWalker)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_head, :libgit2), Cint, (Ptr{Cvoid},), w.ptr)
+    @check ccall((:git_revwalk_push_head, libgit2), Cint, (Ptr{Cvoid},), w)
     return w
 end
 
@@ -64,20 +64,20 @@ of that year as `cid` and then passing the resulting `w` to [`LibGit2.map`](@ref
 """
 function push!(w::GitRevWalker, cid::GitHash)
     ensure_initialized()
-    @check ccall((:git_revwalk_push, :libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w.ptr, Ref(cid))
+    @check ccall((:git_revwalk_push, libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w, Ref(cid))
     return w
 end
 
 function push!(w::GitRevWalker, range::AbstractString)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_range, :libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w.ptr, range)
+    @check ccall((:git_revwalk_push_range, libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w, range)
     return w
 end
 
 function Base.sort!(w::GitRevWalker; by::Cint = Consts.SORT_NONE, rev::Bool=false)
     ensure_initialized()
     rev && (by |= Consts.SORT_REVERSE)
-    @check ccall((:git_revwalk_sorting, :libgit2), Cint, (Ptr{Cvoid}, Cint), w.ptr, by)
+    @check ccall((:git_revwalk_sorting, libgit2), Cint, (Ptr{Cvoid}, Cint), w, by)
     return w
 end
 
diff --git a/stdlib/LibGit2/test/bad_ca_roots.jl b/stdlib/LibGit2/test/bad_ca_roots.jl
index 4882065167bdb..4caed4ed90beb 100644
--- a/stdlib/LibGit2/test/bad_ca_roots.jl
+++ b/stdlib/LibGit2/test/bad_ca_roots.jl
@@ -12,20 +12,24 @@ const CAN_SET_CA_ROOTS_PATH = !Sys.isapple() && !Sys.iswindows()
 # Given this is a sub-processed test file, not using @testsets avoids
 # leaking the report print into the Base test runner report
 begin # empty CA roots file
-    # these fail for different reasons on different platforms:
-    # - on Apple & Windows you cannot set the CA roots path location
-    # - on Linux & FreeBSD you you can but these are invalid files
+    # different behavior on different platforms:
+    # - on Apple & Windows you cannot set the CA roots path location; don't error
+    # - on Linux & FreeBSD you can but these are invalid files
+
     ENV["JULIA_SSL_CA_ROOTS_PATH"] = "/dev/null"
-    @test_throws LibGit2.GitError LibGit2.ensure_initialized()
+    if CAN_SET_CA_ROOTS_PATH
+        @test_throws LibGit2.GitError LibGit2.ensure_initialized()
+    else
+        @test LibGit2.ensure_initialized() === nothing
+    end
+
     ENV["JULIA_SSL_CA_ROOTS_PATH"] = tempname()
-    @test_throws LibGit2.GitError LibGit2.ensure_initialized()
-    # test that it still fails if called a second time
-    @test_throws LibGit2.GitError LibGit2.ensure_initialized()
-    if !CAN_SET_CA_ROOTS_PATH
-        # test that this doesn't work on macOS & Windows
-        ENV["JULIA_SSL_CA_ROOTS_PATH"] = NetworkOptions.bundled_ca_roots()
+    if CAN_SET_CA_ROOTS_PATH
+        @test_throws LibGit2.GitError LibGit2.ensure_initialized()
+        # test that it still fails if called a second time
         @test_throws LibGit2.GitError LibGit2.ensure_initialized()
-        delete!(ENV, "JULIA_SSL_CA_ROOTS_PATH")
+    else
+        @test LibGit2.ensure_initialized() === nothing
         @test LibGit2.ensure_initialized() === nothing
     end
 end
diff --git a/stdlib/LibGit2/test/libgit2-tests.jl b/stdlib/LibGit2/test/libgit2-tests.jl
index 7dbbd10af6f67..f5f9d3cc697cc 100644
--- a/stdlib/LibGit2/test/libgit2-tests.jl
+++ b/stdlib/LibGit2/test/libgit2-tests.jl
@@ -3,117 +3,16 @@
 module LibGit2Tests
 
 import LibGit2
+using LibGit2_jll
 using Test
 using Random, Serialization, Sockets
 
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
-import .Main.FakePTYs: with_fake_pty
-
-const timeout = 60
-
-function challenge_prompt(code::Expr, challenges)
-    input_code = tempname()
-    open(input_code, "w") do fp
-        serialize(fp, code)
-    end
-    output_file = tempname()
-    torun = """
-        import LibGit2
-        using Serialization
-        result = open($(repr(input_code))) do fp
-            eval(deserialize(fp))
-        end
-        open($(repr(output_file)), "w") do fp
-            serialize(fp, result)
-        end"""
-    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
-    try
-        challenge_prompt(cmd, challenges)
-        return open(output_file, "r") do fp
-            deserialize(fp)
-        end
-    finally
-        isfile(output_file) && rm(output_file)
-        isfile(input_code) && rm(input_code)
-    end
-    return nothing
-end
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")
+isdefined(Main, :ChallengePrompts) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ChallengePrompts.jl"))
+using .Main.ChallengePrompts: challenge_prompt as basic_challenge_prompt
 
-function challenge_prompt(cmd::Cmd, challenges)
-    function format_output(output)
-        str = read(seekstart(output), String)
-        isempty(str) && return ""
-        return "Process output found:\n\"\"\"\n$str\n\"\"\""
-    end
-    out = IOBuffer()
-    with_fake_pty() do pts, ptm
-        p = run(detach(cmd), pts, pts, pts, wait=false) # getpass uses stderr by default
-        Base.close_stdio(pts)
-
-        # Kill the process if it takes too long. Typically occurs when process is waiting
-        # for input.
-        timer = Channel{Symbol}(1)
-        watcher = @async begin
-            waited = 0
-            while waited < timeout && process_running(p)
-                sleep(1)
-                waited += 1
-            end
-
-            if process_running(p)
-                kill(p)
-                put!(timer, :timeout)
-            elseif success(p)
-                put!(timer, :success)
-            else
-                put!(timer, :failure)
-            end
-
-            # SIGKILL stubborn processes
-            if process_running(p)
-                sleep(3)
-                process_running(p) && kill(p, Base.SIGKILL)
-            end
-            wait(p)
-        end
-
-        wroteall = false
-        try
-            for (challenge, response) in challenges
-                write(out, readuntil(ptm, challenge, keep=true))
-                if !isopen(ptm)
-                    error("Could not locate challenge: \"$challenge\". ",
-                          format_output(out))
-                end
-                write(ptm, response)
-            end
-            wroteall = true
-
-            # Capture output from process until `pts` is closed
-            write(out, ptm)
-        catch ex
-            if !(wroteall && ex isa Base.IOError && ex.code == Base.UV_EIO)
-                # ignore EIO from `ptm` after `pts` dies
-                error("Process failed possibly waiting for a response. ",
-                      format_output(out))
-            end
-        end
-
-        status = fetch(timer)
-        close(ptm)
-        if status !== :success
-            if status === :timeout
-                error("Process timed out possibly waiting for a response. ",
-                      format_output(out))
-            else
-                error("Failed process. ", format_output(out), "\n", p)
-            end
-        end
-        wait(watcher)
-    end
-    nothing
-end
+challenge_prompt(code::Expr, challenges) = basic_challenge_prompt(code, challenges; pkgs=["LibGit2"])
+challenge_prompt(cmd::Cmd, challenges) = basic_challenge_prompt(cmd, challenges)
 
 const LIBGIT2_MIN_VER = v"1.0.0"
 const LIBGIT2_HELPER_PATH = joinpath(@__DIR__, "libgit2-helpers.jl")
@@ -129,7 +28,7 @@ end
 function get_global_dir()
     buf = Ref(LibGit2.Buffer())
 
-    LibGit2.@check @ccall "libgit2".git_libgit2_opts(
+    LibGit2.@check @ccall libgit2.git_libgit2_opts(
         LibGit2.Consts.GET_SEARCH_PATH::Cint;
         LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
         buf::Ptr{LibGit2.Buffer})::Cint
@@ -139,7 +38,7 @@ function get_global_dir()
 end
 
 function set_global_dir(dir)
-    LibGit2.@check @ccall "libgit2".git_libgit2_opts(
+    LibGit2.@check @ccall libgit2.git_libgit2_opts(
         LibGit2.Consts.SET_SEARCH_PATH::Cint;
         LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
         dir::Cstring)::Cint
@@ -196,7 +95,7 @@ end
     p = ["XXX","YYY"]
     a = Base.cconvert(Ptr{LibGit2.StrArrayStruct}, p)
     b = Base.unsafe_convert(Ptr{LibGit2.StrArrayStruct}, a)
-    @test p == convert(Vector{String}, unsafe_load(b))
+    @test p == collect(unsafe_load(b))
     @noinline gcuse(a) = a
     gcuse(a)
 end
@@ -845,6 +744,23 @@ mktempdir() do dir
             cred_payload = LibGit2.CredentialPayload()
             @test_throws ArgumentError LibGit2.clone(cache_repo, test_repo, callbacks=callbacks, credentials=cred_payload)
         end
+        @testset "shallow clone" begin
+            @static if LibGit2.VERSION >= v"1.7.0"
+                # Note: Shallow clones are not supported with local file:// transport
+                # This is a limitation in libgit2 - shallow clones only work with
+                # network protocols (http, https, git, ssh)
+                # See online-tests.jl for tests with remote repositories
+
+                # Test normal clone is not shallow
+                normal_path = joinpath(dir, "Example.NotShallow")
+                LibGit2.with(LibGit2.clone(cache_repo, normal_path)) do repo
+                    @test !LibGit2.isshallow(repo)
+                end
+            else
+                # Test that depth parameter throws error on older libgit2
+                @test_throws ArgumentError LibGit2.clone(cache_repo, joinpath(dir, "Example.Shallow"), depth=1)
+            end
+        end
     end
 
     @testset "Update cache repository" begin
@@ -928,6 +844,14 @@ mktempdir() do dir
                     @test cmtr.email == test_sig.email
                     @test LibGit2.message(cmt) == commit_msg1
 
+                    # test that the parent is correct
+                    @test LibGit2.parentcount(cmt) == 0
+                    LibGit2.with(LibGit2.GitCommit(repo, commit_oid3)) do cmt3
+                        @test LibGit2.parentcount(cmt3) == 1
+                        @test LibGit2.parent_id(cmt3, 1) == commit_oid1
+                        @test LibGit2.GitHash(LibGit2.parent(cmt3, 1)) == commit_oid1
+                    end
+
                     # test showing the commit
                     showstr = split(sprint(show, cmt), "\n")
                     # the time of the commit will vary so just test the first two parts
@@ -1163,7 +1087,7 @@ mktempdir() do dir
 
                 # test workaround for git_tree_walk issue
                 # https://github.com/libgit2/libgit2/issues/4693
-                ccall((:giterr_set_str, :libgit2), Cvoid, (Cint, Cstring),
+                ccall((:git_error_set_str, libgit2), Cvoid, (Cint, Cstring),
                       Cint(LibGit2.Error.Invalid), "previous error")
                 try
                     # file needs to exist in tree in order to trigger the stop walk condition
@@ -1175,6 +1099,14 @@ mktempdir() do dir
                         rethrow()
                     end
                 end
+
+                # Test GitTree constructor with GitHash
+                tree1 = LibGit2.GitTree(repo, "HEAD^{tree}")
+                tree_hash = LibGit2.GitHash(tree1)
+                tree2 = LibGit2.GitTree(repo, tree_hash)
+                @test isa(tree2, LibGit2.GitTree)
+                @test LibGit2.GitHash(tree1) == LibGit2.GitHash(tree2)
+                @test LibGit2.count(tree1) == LibGit2.count(tree2)
             end
         end
 
@@ -1240,11 +1172,26 @@ mktempdir() do dir
                 @test !LibGit2.isdirty(repo, cached=true)
                 @test !LibGit2.isdiff(repo, "HEAD", cached=true)
             end
+
+            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+                diff_str = "diff --git a/test.txt b/test.txt\nindex 0000000..1111111 100644\n"
+                @test_throws LibGit2.GitError LibGit2.GitDiff(diff_str)
+
+                tree1 = LibGit2.GitTree(repo, "HEAD~1^{tree}")
+                tree2 = LibGit2.GitTree(repo, "HEAD^{tree}")
+                diff = LibGit2.diff_tree(repo, tree1, tree2)
+                idx = LibGit2.apply_to_tree(repo, tree1, diff)
+                @test idx isa LibGit2.GitIndex
+                oid = LibGit2.write_tree_to!(repo, idx)
+                @test oid isa LibGit2.GitHash
+                close(idx)
+            end
         end
     end
 
     function setup_clone_repo(cache_repo::AbstractString, path::AbstractString; name="AAAA", email="BBBB@BBBB.COM")
         repo = LibGit2.clone(cache_repo, path)
+        LibGit2.fetch(repo)
         # need to set this for merges to succeed
         cfg = LibGit2.GitConfig(repo)
         LibGit2.set!(cfg, "user.name", name)
@@ -3146,16 +3093,35 @@ mktempdir() do dir
                 key = joinpath(root, common_name * ".key")
                 cert = joinpath(root, common_name * ".crt")
                 pem = joinpath(root, common_name * ".pem")
+                conf = joinpath(root, common_name * ".conf")
+
+                # Make sure test doesn't depend on system OpenSSL config (which may be broken)
+                open(conf, "w") do io
+                    write(io, """
+                        [req]
+                        distinguished_name = req_distinguished_name
+
+                        [req_distinguished_name]
+                        CN = $common_name
+                        """)
+                end
 
                 # Generated a certificate which has the CN set correctly but no subjectAltName
-                run(pipeline(`openssl req -new -x509 -newkey rsa:2048 -sha256 -nodes -keyout $key -out $cert -days 1 -subj "/CN=$common_name"`, stderr=devnull))
+                err = IOBuffer()
+                p = run(pipeline(addenv(
+                    `openssl req -new -x509 -newkey rsa:2048 -sha256 -nodes -keyout $key -out $cert -days 1 -subj "/CN=$common_name"`,
+                    "OPENSSL_CONF" => conf), stderr=err); wait=false)
+                wait(p)
+                @testset let err = String(take!(err))
+                    @test success(p)
+                end
                 run(`openssl x509 -in $cert -out $pem -outform PEM`)
 
                 local pobj, port
                 for attempt in 1:10
                     # Find an available port by listening, but there's a race condition where
                     # another process could grab this port, so retry on failure
-                    port, server = listenany(49152)
+                    port, server = listenany(49052 + rand(1:100) + attempt*10)
                     close(server)
 
                     # Make a fake Julia package and minimal HTTPS server with our generated
diff --git a/stdlib/LibGit2/test/online-tests.jl b/stdlib/LibGit2/test/online-tests.jl
index 96b6bf5b22371..c4d3cf452e78b 100644
--- a/stdlib/LibGit2/test/online-tests.jl
+++ b/stdlib/LibGit2/test/online-tests.jl
@@ -87,6 +87,39 @@ mktempdir() do dir
                 @test ex.code == LibGit2.Error.EAUTH
             end
         end
+
+        @testset "Shallow clone" begin
+            @static if LibGit2.VERSION >= v"1.7.0"
+                # Test shallow clone with depth=1
+                repo_path = joinpath(dir, "Example.Shallow")
+                c = LibGit2.CredentialPayload(allow_prompt=false, allow_git_helpers=false)
+                repo = LibGit2.clone(repo_url, repo_path, depth=1, credentials=c)
+                try
+                    @test isdir(repo_path)
+                    @test isdir(joinpath(repo_path, ".git"))
+                    @test LibGit2.isshallow(repo)
+                finally
+                    close(repo)
+                end
+            end
+        end
+    end
+end
+
+@testset "Remote" begin
+    repo_url = "https://github.com/JuliaLang/Example.jl"
+    LibGit2.with(LibGit2.GitRemoteDetached(repo_url)) do remote
+        @test !LibGit2.connected(remote)
+        c = LibGit2.CredentialPayload(allow_prompt=false, allow_git_helpers=false)
+        LibGit2.connect(remote, LibGit2.Consts.DIRECTION_FETCH, credentials=c)
+        @test LibGit2.connected(remote)
+        remote_heads = LibGit2.ls(remote)
+        default_branch = LibGit2.default_branch(remote)
+        @test !isempty(remote_heads)
+        @test startswith(default_branch, "refs/heads/")
+        @test any(head.name == default_branch for head in remote_heads)
+        LibGit2.disconnect(remote)
+        @test !LibGit2.connected(remote)
     end
 end
 
diff --git a/stdlib/LibGit2/test/runtests.jl b/stdlib/LibGit2/test/runtests.jl
index 88aea77f25671..4d2f4f9104c4e 100644
--- a/stdlib/LibGit2/test/runtests.jl
+++ b/stdlib/LibGit2/test/runtests.jl
@@ -1,6 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test
+using Test, LibGit2
+
 @testset verbose=true "LibGit2 $test" for test in eachline(joinpath(@__DIR__, "testgroups"))
     include("$test.jl")
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(LibGit2))
+end
diff --git a/stdlib/LibGit2_jll/Project.toml b/stdlib/LibGit2_jll/Project.toml
index 4c16c1fb72e42..960e0e1d7c16b 100644
--- a/stdlib/LibGit2_jll/Project.toml
+++ b/stdlib/LibGit2_jll/Project.toml
@@ -1,15 +1,19 @@
 name = "LibGit2_jll"
 uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
-version = "1.6.1+0"
+version = "1.9.1+0"
 
 [deps]
-MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+OpenSSL_jll = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
 LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+PCRE2_jll = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 
 [compat]
 julia = "1.9"
+CompilerSupportLibraries_jll = "1.3.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/LibGit2_jll/src/LibGit2_jll.jl b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
index f8e814f1f7c30..350ed85a503f7 100644
--- a/stdlib/LibGit2_jll/src/LibGit2_jll.jl
+++ b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
@@ -3,44 +3,69 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/LibGit2_jll.jl
 
 baremodule LibGit2_jll
-using Base, Libdl, MbedTLS_jll, LibSSH2_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+using Base, Libdl, LibSSH2_jll, PCRE2_jll, Zlib_jll
+if !(Sys.iswindows() || Sys.isapple())
+    using OpenSSL_jll
+end
+if Sys.iswindows() && Sys.WORD_SIZE == 32
+    using CompilerSupportLibraries_jll
+end
 
 export libgit2
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libgit2_handle::Ptr{Cvoid} = C_NULL
+
 libgit2_path::String = ""
+const libgit2 = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libgit2.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libgit2.1.9.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libgit2.so.1.9")
+    else
+        error("LibGit2_jll: Library 'libgit2' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows()
+        if Sys.WORD_SIZE == 32
+            LazyLibrary[libssh2, libgcc_s, libpcre2_8, libz]
+        else
+            LazyLibrary[libssh2, libpcre2_8, libz]
+        end
+    elseif Sys.isfreebsd() || Sys.islinux()
+        LazyLibrary[libssh2, libssl, libcrypto, libpcre2_8, libz]
+    else
+        LazyLibrary[libssh2, libpcre2_8, libz]
+    end
+)
 
-if Sys.iswindows()
-    const libgit2 = "libgit2.dll"
-elseif Sys.isapple()
-    const libgit2 = "@rpath/libgit2.1.6.dylib"
-else
-    const libgit2 = "libgit2.so.1.6"
+function eager_mode()
+    LibSSH2_jll.eager_mode()
+    @static if @isdefined OpenSSL_jll
+        OpenSSL_jll.eager_mode()
+    end
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    dlopen(libgit2)
 end
+is_available() = true
 
 function __init__()
-    global libgit2_handle = dlopen(libgit2)
-    global libgit2_path = dlpath(libgit2_handle)
+    global libgit2_path = string(libgit2.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libgit2_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libgit2_path() = libgit2_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module LibGit2_jll
diff --git a/stdlib/LibGit2_jll/test/runtests.jl b/stdlib/LibGit2_jll/test/runtests.jl
index 32ada173f01a0..a971c1e8aa402 100644
--- a/stdlib/LibGit2_jll/test/runtests.jl
+++ b/stdlib/LibGit2_jll/test/runtests.jl
@@ -7,5 +7,5 @@ using Test, Libdl, LibGit2_jll
     minor = Ref{Cint}(0)
     patch = Ref{Cint}(0)
     @test ccall((:git_libgit2_version, libgit2), Cint, (Ref{Cint}, Ref{Cint}, Ref{Cint}), major, minor, patch) == 0
-    @test VersionNumber(major[], minor[], patch[]) == v"1.6.1"
+    @test VersionNumber(major[], minor[], patch[]) == v"1.9.1"
 end
diff --git a/stdlib/LibSSH2_jll/Project.toml b/stdlib/LibSSH2_jll/Project.toml
index 8334a86d1c23a..c4bf18ca39d7c 100644
--- a/stdlib/LibSSH2_jll/Project.toml
+++ b/stdlib/LibSSH2_jll/Project.toml
@@ -1,14 +1,17 @@
 name = "LibSSH2_jll"
 uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-version = "1.10.2+0"
+version = "1.11.3+1"
 
 [deps]
-MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+OpenSSL_jll = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 
 [compat]
 julia = "1.8"
+CompilerSupportLibraries_jll = "1.3.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
index a809f7a912d6b..6c273bbdecc4d 100644
--- a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
+++ b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
@@ -3,45 +3,74 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/LibSSH2_jll.jl
 
 baremodule LibSSH2_jll
-using Base, Libdl, MbedTLS_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+using Base, Libdl
+if Sys.isfreebsd() || Sys.isapple()
+    using Zlib_jll
+end
+if Sys.iswindows() && Sys.WORD_SIZE == 32
+    using CompilerSupportLibraries_jll
+end
+if !Sys.iswindows()
+    using OpenSSL_jll
+end
 
 export libssh2
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libssh2_handle::Ptr{Cvoid} = C_NULL
+
 libssh2_path::String = ""
+const libssh2 = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libssh2.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libssh2.1.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libssh2.so.1")
+    else
+        error("LibSSH2_jll: Library 'libssh2' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows()
+        if Sys.WORD_SIZE == 32
+            LazyLibrary[libgcc_s]
+        else
+            LazyLibrary[]
+        end
+    elseif Sys.islinux()
+        LazyLibrary[libcrypto]
+    elseif Sys.isfreebsd() || Sys.isapple()
+        LazyLibrary[libz, libcrypto]
+    end
+)
 
-if Sys.iswindows()
-    const libssh2 = "libssh2.dll"
-elseif Sys.isapple()
-    const libssh2 = "@rpath/libssh2.1.dylib"
-else
-    const libssh2 = "libssh2.so.1"
+function eager_mode()
+    @static if @isdefined Zlib_jll
+        Zlib_jll.eager_mode()
+    end
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    @static if @isdefined OpenSSL_jll
+        OpenSSL_jll.eager_mode()
+    end
+    dlopen(libssh2)
 end
+is_available() = true
 
 function __init__()
-    global libssh2_handle = dlopen(libssh2)
-    global libssh2_path = dlpath(libssh2_handle)
+    global libssh2_path = string(libssh2.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libssh2_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libssh2_path() = libssh2_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module LibSSH2_jll
diff --git a/stdlib/LibSSH2_jll/test/runtests.jl b/stdlib/LibSSH2_jll/test/runtests.jl
index 58cfd9ac024cc..9a05270317752 100644
--- a/stdlib/LibSSH2_jll/test/runtests.jl
+++ b/stdlib/LibSSH2_jll/test/runtests.jl
@@ -3,6 +3,9 @@
 using Test, Libdl, LibSSH2_jll
 
 @testset "LibSSH2_jll" begin
-    # We use a `startswith()` here because when built from source, this returns "1.9.0_DEV"
-    vn = startswith(unsafe_string(ccall((:libssh2_version, libssh2), Cstring, (Cint,), 0)), "1.9.0")
+    vn = unsafe_string(ccall((:libssh2_version, libssh2), Cstring, (Cint,), 0))
+    # Depending on how LibSSH2_jll was installed (downloaded from
+    # BinaryBuilder or built from source here), the version number is
+    # either "1.11.1" or "1.11.1_DEV", respectively.
+    @test startswith(vn, "1.11.1")
 end
diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml
index 2954809921440..3c9470e971a30 100644
--- a/stdlib/LibUV_jll/Project.toml
+++ b/stdlib/LibUV_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUV_jll"
 uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
-version = "2.0.1+13"
+version = "2.0.1+21"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibUV_jll/src/LibUV_jll.jl b/stdlib/LibUV_jll/src/LibUV_jll.jl
index f6714fae536e9..5bf9b7ef3b0fb 100644
--- a/stdlib/LibUV_jll/src/LibUV_jll.jl
+++ b/stdlib/LibUV_jll/src/LibUV_jll.jl
@@ -4,43 +4,8 @@
 
 baremodule LibUV_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
-const PATH_list = String[]
-const LIBPATH_list = String[]
-
-export libuv
-
-# These get calculated in __init__()
-const PATH = Ref("")
-const LIBPATH = Ref("")
-artifact_dir::String = ""
-libuv_handle::Ptr{Cvoid} = C_NULL
-libuv_path::String = ""
-
-if Sys.iswindows()
-    const libuv = "libuv-2.dll"
-elseif Sys.isapple()
-    const libuv = "@rpath/libuv.2.dylib"
-else
-    const libuv = "libuv.so.2"
-end
-
-function __init__()
-    global libuv_handle = dlopen(libuv)
-    global libuv_path = dlpath(libuv_handle)
-    global artifact_dir = dirname(Sys.BINDIR)
-    LIBPATH[] = dirname(libuv_path)
-    push!(LIBPATH_list, LIBPATH[])
-end
-
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
+# NOTE: This file is currently empty, as we link libuv statically for now.
 is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libuv_path() = libuv_path
 
 end  # module LibUV_jll
diff --git a/stdlib/LibUV_jll/test/runtests.jl b/stdlib/LibUV_jll/test/runtests.jl
index 26c50b92c0c2d..0615edebaa070 100644
--- a/stdlib/LibUV_jll/test/runtests.jl
+++ b/stdlib/LibUV_jll/test/runtests.jl
@@ -1,8 +1,3 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, Libdl, LibUV_jll
-
-@testset "LibUV_jll" begin
-    vn = VersionNumber(unsafe_string(ccall((:uv_version_string, libuv), Cstring, ())))
-    @test vn == v"2.0.0-dev"
-end
diff --git a/stdlib/LibUnwind_jll/Project.toml b/stdlib/LibUnwind_jll/Project.toml
index 1f5f695a26ba4..be1cbb1ceba2a 100644
--- a/stdlib/LibUnwind_jll/Project.toml
+++ b/stdlib/LibUnwind_jll/Project.toml
@@ -1,10 +1,12 @@
 name = "LibUnwind_jll"
 uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
-version = "1.5.0+4"
+version = "1.8.3+0"
 
 [deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
 
 [compat]
 julia = "1.6"
diff --git a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
index 12abeaf598151..01d12556deca9 100644
--- a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
+++ b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
@@ -4,40 +4,45 @@
 
 baremodule LibUnwind_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+using Zlib_jll
+if !Sys.isfreebsd()
+    using CompilerSupportLibraries_jll
+end
 
 export libunwind
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libunwind_handle::Ptr{Cvoid} = C_NULL
-libunwind_path::String = ""
 
-const libunwind = "libunwind.so.8"
+libunwind_path::String = ""
+const libunwind = LazyLibrary(
+    BundledLazyLibraryPath("libunwind.so.8"),
+    dependencies = LazyLibrary[libz]
+)
+
+function eager_mode()
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    Zlib_jll.eager_mode()
+    dlopen(libunwind)
+end
+is_available() = @static(Sys.islinux() || Sys.isfreebsd()) ? true : false
 
 function __init__()
-    # We only do something on Linux/FreeBSD
-    @static if Sys.islinux() || Sys.isfreebsd()
-        global libunwind_handle = dlopen(libunwind)
-        global libunwind_path = dlpath(libunwind_handle)
-        global artifact_dir = dirname(Sys.BINDIR)
-        LIBPATH[] = dirname(libunwind_path)
-        push!(LIBPATH_list, LIBPATH[])
-    end
+    global libunwind_path = string(libunwind.path)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libunwind_path)
+    push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = @static (Sys.islinux() || Sys.isfreebsd()) ? true : false
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libunwind_path() = libunwind_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module LibUnwind_jll
diff --git a/stdlib/LibUnwind_jll/test/runtests.jl b/stdlib/LibUnwind_jll/test/runtests.jl
index 1cb33dd6729e3..c87ccad988dec 100644
--- a/stdlib/LibUnwind_jll/test/runtests.jl
+++ b/stdlib/LibUnwind_jll/test/runtests.jl
@@ -4,6 +4,6 @@ using Test, Libdl, LibUnwind_jll
 
 @testset "LibUnwind_jll" begin
     if !Sys.isapple() && !Sys.iswindows()
-        @test dlsym(LibUnwind_jll.libunwind_handle, :unw_backtrace; throw_error=false) !== nothing
+        @test dlsym(LibUnwind_jll.libunwind, :unw_backtrace; throw_error=false) !== nothing
     end
 end
diff --git a/stdlib/Libdl/Project.toml b/stdlib/Libdl/Project.toml
index 26e5bf0cdefd7..7fab4b9334260 100644
--- a/stdlib/Libdl/Project.toml
+++ b/stdlib/Libdl/Project.toml
@@ -1,5 +1,6 @@
 name = "Libdl"
 uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Libdl/docs/src/index.md b/stdlib/Libdl/docs/src/index.md
index 62f9837831d55..3cd69486e5155 100644
--- a/stdlib/Libdl/docs/src/index.md
+++ b/stdlib/Libdl/docs/src/index.md
@@ -1,3 +1,11 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Libdl/docs/src/index.md"
+```
+
+```@docs
+Libdl
+```
+
 # Dynamic Linker
 
 ```@docs
@@ -13,3 +21,12 @@ Libdl.dlpath
 Libdl.find_library
 Libdl.DL_LOAD_PATH
 ```
+
+# Lazy Library Loading
+
+```@docs
+Libdl.LazyLibrary
+Libdl.LazyLibraryPath
+Libdl.BundledLazyLibraryPath
+Libdl.add_dependency!
+```
diff --git a/stdlib/Libdl/src/Libdl.jl b/stdlib/Libdl/src/Libdl.jl
index df3f62c807fed..8556387786571 100644
--- a/stdlib/Libdl/src/Libdl.jl
+++ b/stdlib/Libdl/src/Libdl.jl
@@ -1,13 +1,21 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+"""
+The Libdl module in Julia provides specialized and lower-level facilities for dynamic linking with shared libraries. While Julia
+inherently supports linking to runtime shared libraries through the `ccall` intrinsic, `Libdl` extends this capability by offering additional, more
+granular control. It enables users to search for shared libraries both in memory and the filesystem, manually load them with specific runtime linker options, and look up
+library symbols as low-level pointers.
+"""
 module Libdl
 # Just re-export Base.Libc.Libdl:
 export DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
     RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW, dlclose, dlopen, dlopen_e, dlsym, dlsym_e,
-    dlpath, find_library, dlext, dllist
+    dlpath, find_library, dlext, dllist, LazyLibrary, LazyLibraryPath, BundledLazyLibraryPath
+
+public add_dependency!
 
 import Base.Libc.Libdl: DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
                         RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW, dlclose, dlopen, dlopen_e, dlsym, dlsym_e,
-                        dlpath, find_library, dlext, dllist
+                        dlpath, find_library, dlext, dllist, LazyLibrary, LazyLibraryPath,
+                        BundledLazyLibraryPath, default_rtld_flags, add_dependency!
 
 end # module
diff --git a/stdlib/Libdl/test/runtests.jl b/stdlib/Libdl/test/runtests.jl
index 6863e28959b5e..159fa9d4f559d 100644
--- a/stdlib/Libdl/test/runtests.jl
+++ b/stdlib/Libdl/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test
-import Libdl
+using Libdl
 
 # these could fail on an embedded installation
 # but for now, we don't handle that case
@@ -27,8 +27,6 @@ end
 @test_throws ArgumentError Libdl.dlsym(C_NULL, :foo)
 @test_throws ArgumentError Libdl.dlsym_e(C_NULL, :foo)
 
-cd(@__DIR__) do
-
 # Find the library directory by finding the path of libjulia-internal (or libjulia-internal-debug,
 # as the case may be) to get the private library directory
 private_libdir = if Base.DARWIN_FRAMEWORK
@@ -267,4 +265,82 @@ mktempdir() do dir
     end
 end
 
+## Tests for LazyLibrary
+@testset "LazyLibrary" begin
+    lclf_path = joinpath(private_libdir, "libccalllazyfoo.$(Libdl.dlext)")
+    lclb_path = joinpath(private_libdir, "libccalllazybar.$(Libdl.dlext)")
+
+    # Ensure that our modified copy of `libccalltest` is not currently loaded
+    @test !any(contains.(dllist(), lclf_path))
+    @test !any(contains.(dllist(), lclb_path))
+
+    # Create a `LazyLibrary` structure that loads `libccalllazybar`
+    global lclf_loaded = false
+    global lclb_loaded = false
+
+    # We don't provide `dlclose()` on `LazyLibrary`'s since it is dangerous, you have to manage it yourself:
+    function close_libs()
+        global lclf_loaded = false
+        global lclb_loaded = false
+        if libccalllazybar.handle != C_NULL
+            dlclose(libccalllazybar.handle)
+        end
+        if libccalllazyfoo.handle != C_NULL
+            dlclose(libccalllazyfoo.handle)
+        end
+        @atomic libccalllazyfoo.handle = C_NULL
+        @atomic libccalllazybar.handle = C_NULL
+        @test !any(contains.(dllist(), lclf_path))
+        @test !any(contains.(dllist(), lclb_path))
+    end
+
+    let libccalllazyfoo = LazyLibrary(lclf_path; on_load_callback=() -> global lclf_loaded = true),
+        libccalllazybar = LazyLibrary(lclb_path; dependencies=[libccalllazyfoo], on_load_callback=() -> global lclb_loaded = true)
+        eval(:(const libccalllazyfoo = $libccalllazyfoo))
+        eval(:(const libccalllazybar = $libccalllazybar))
+    end
+    Core.@latestworld
+
+    # Creating `LazyLibrary` doesn't actually load anything
+    @test !lclf_loaded
+    @test !lclb_loaded
+
+    # Explicitly calling `dlopen()` does:
+    dlopen(libccalllazybar)
+    @test lclf_loaded
+    @test lclb_loaded
+    close_libs()
+
+    # Test that the library gets loaded when you use `ccall()`
+    compiled_bar() = ccall((:bar, libccalllazybar), Cint, (Cint,), 2)
+    @test ccall((:bar, libccalllazybar), Cint, (Cint,), 2) == compiled_bar() == 6
+    @test lclf_loaded
+    @test lclb_loaded
+    close_libs()
+
+    # Test that `@ccall` works:
+    @test @ccall(libccalllazybar.bar(2::Cint)::Cint) == 6
+    @test lclf_loaded
+    @test lclb_loaded
+    close_libs()
+
+    # Test that `dlpath()` works
+    @test dlpath(libccalllazybar) == realpath(string(libccalllazybar.path))
+    @test lclf_loaded
+    close_libs()
+
+    # Test that `cglobal()` works, both compiled and runtime emulation
+    compiled_cglobal() = cglobal((:bar, libccalllazybar))
+    @test cglobal((:bar, libccalllazybar)) === compiled_cglobal() === dlsym(dlopen(libccalllazybar), :bar)
+    @test lclf_loaded
+    close_libs()
+
+    # Test that we can use lazily-evaluated library names:
+    libname = LazyLibraryPath(private_libdir, "libccalllazyfoo.$(Libdl.dlext)")
+    lazy_name_lazy_lib = LazyLibrary(libname)
+    @test dlpath(lazy_name_lazy_lib) == realpath(string(libname))
+end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Libdl))
 end
diff --git a/stdlib/LinearAlgebra.version b/stdlib/LinearAlgebra.version
new file mode 100644
index 0000000000000..29af69363d54a
--- /dev/null
+++ b/stdlib/LinearAlgebra.version
@@ -0,0 +1,4 @@
+LINEARALGEBRA_BRANCH = master
+LINEARALGEBRA_SHA1 = b599095ef3da7ba7e950ee4700a3ba0fea047949
+LINEARALGEBRA_GIT_URL := https://github.com/JuliaLang/LinearAlgebra.jl.git
+LINEARALGEBRA_TAR_URL = https://api.github.com/repos/JuliaLang/LinearAlgebra.jl/tarball/$1
diff --git a/stdlib/LinearAlgebra/Project.toml b/stdlib/LinearAlgebra/Project.toml
deleted file mode 100644
index 46653aa795209..0000000000000
--- a/stdlib/LinearAlgebra/Project.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-name = "LinearAlgebra"
-uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-
-[deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
-OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[targets]
-test = ["Test", "Random"]
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
deleted file mode 100644
index 00ce21ed6fcae..0000000000000
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ /dev/null
@@ -1,860 +0,0 @@
-# [Linear Algebra](@id man-linalg)
-
-```@meta
-DocTestSetup = :(using LinearAlgebra)
-```
-
-In addition to (and as part of) its support for multi-dimensional arrays, Julia provides native implementations
-of many common and useful linear algebra operations which can be loaded with `using LinearAlgebra`. Basic operations, such as [`tr`](@ref), [`det`](@ref),
-and [`inv`](@ref) are all supported:
-
-```jldoctest
-julia> A = [1 2 3; 4 1 6; 7 8 1]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  1  6
- 7  8  1
-
-julia> tr(A)
-3
-
-julia> det(A)
-104.0
-
-julia> inv(A)
-3×3 Matrix{Float64}:
- -0.451923   0.211538    0.0865385
-  0.365385  -0.192308    0.0576923
-  0.240385   0.0576923  -0.0673077
-```
-
-As well as other useful operations, such as finding eigenvalues or eigenvectors:
-
-```jldoctest
-julia> A = [-4. -17.; 2. 2.]
-2×2 Matrix{Float64}:
- -4.0  -17.0
-  2.0    2.0
-
-julia> eigvals(A)
-2-element Vector{ComplexF64}:
- -1.0 - 5.0im
- -1.0 + 5.0im
-
-julia> eigvecs(A)
-2×2 Matrix{ComplexF64}:
-  0.945905-0.0im        0.945905+0.0im
- -0.166924+0.278207im  -0.166924-0.278207im
-```
-
-In addition, Julia provides many [factorizations](@ref man-linalg-factorizations) which can be used to
-speed up problems such as linear solve or matrix exponentiation by pre-factorizing a matrix into a form
-more amenable (for performance or memory reasons) to the problem. See the documentation on [`factorize`](@ref)
-for more information. As an example:
-
-```jldoctest
-julia> A = [1.5 2 -4; 3 -1 -6; -10 2.3 4]
-3×3 Matrix{Float64}:
-   1.5   2.0  -4.0
-   3.0  -1.0  -6.0
- -10.0   2.3   4.0
-
-julia> factorize(A)
-LU{Float64, Matrix{Float64}, Vector{Int64}}
-L factor:
-3×3 Matrix{Float64}:
-  1.0    0.0       0.0
- -0.15   1.0       0.0
- -0.3   -0.132196  1.0
-U factor:
-3×3 Matrix{Float64}:
- -10.0  2.3     4.0
-   0.0  2.345  -3.4
-   0.0  0.0    -5.24947
-```
-
-Since `A` is not Hermitian, symmetric, triangular, tridiagonal, or bidiagonal, an LU factorization may be the
-best we can do. Compare with:
-
-```jldoctest
-julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
-3×3 Matrix{Float64}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-
-julia> factorize(B)
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-3×3 Tridiagonal{Float64, Vector{Float64}}:
- -1.64286   0.0   ⋅
-  0.0      -2.8  0.0
-   ⋅        0.0  5.0
-U factor:
-3×3 UnitUpperTriangular{Float64, Matrix{Float64}}:
- 1.0  0.142857  -0.8
-  ⋅   1.0       -0.6
-  ⋅    ⋅         1.0
-permutation:
-3-element Vector{Int64}:
- 1
- 2
- 3
-```
-
-Here, Julia was able to detect that `B` is in fact symmetric, and used a more appropriate factorization.
-Often it's possible to write more efficient code for a matrix that is known to have certain properties e.g.
-it is symmetric, or tridiagonal. Julia provides some special types so that you can "tag" matrices as having
-these properties. For instance:
-
-```jldoctest
-julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
-3×3 Matrix{Float64}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-
-julia> sB = Symmetric(B)
-3×3 Symmetric{Float64, Matrix{Float64}}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-```
-
-`sB` has been tagged as a matrix that's (real) symmetric, so for later operations we might perform on it,
-such as eigenfactorization or computing matrix-vector products, efficiencies can be found by only referencing
-half of it. For example:
-
-```jldoctest
-julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
-3×3 Matrix{Float64}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-
-julia> sB = Symmetric(B)
-3×3 Symmetric{Float64, Matrix{Float64}}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-
-julia> x = [1; 2; 3]
-3-element Vector{Int64}:
- 1
- 2
- 3
-
-julia> sB\x
-3-element Vector{Float64}:
- -1.7391304347826084
- -1.1086956521739126
- -1.4565217391304346
-```
-
-The `\` operation here performs the linear solution. The left-division operator is pretty
-powerful and it's easy to write compact, readable code that is flexible enough to solve all
-sorts of systems of linear equations.
-
-## Special matrices
-
-[Matrices with special symmetries and structures](http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274)
-arise often in linear algebra and are frequently associated with various matrix factorizations.
-Julia features a rich collection of special matrix types, which allow for fast computation with
-specialized routines that are specially developed for particular matrix types.
-
-The following tables summarize the types of special matrices that have been implemented in Julia,
-as well as whether hooks to various optimized methods for them in LAPACK are available.
-
-| Type                          | Description                                                                                   |
-|:----------------------------- |:--------------------------------------------------------------------------------------------- |
-| [`Symmetric`](@ref)           | [Symmetric matrix](https://en.wikipedia.org/wiki/Symmetric_matrix)                            |
-| [`Hermitian`](@ref)           | [Hermitian matrix](https://en.wikipedia.org/wiki/Hermitian_matrix)                            |
-| [`UpperTriangular`](@ref)     | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix)                    |
-| [`UnitUpperTriangular`](@ref) | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal |
-| [`LowerTriangular`](@ref)     | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix)                    |     |
-| [`UnitLowerTriangular`](@ref) | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal |
-| [`UpperHessenberg`](@ref)     | Upper [Hessenberg matrix](https://en.wikipedia.org/wiki/Hessenberg_matrix)
-| [`Tridiagonal`](@ref)         | [Tridiagonal matrix](https://en.wikipedia.org/wiki/Tridiagonal_matrix)                        |
-| [`SymTridiagonal`](@ref)      | Symmetric tridiagonal matrix                                                                  |
-| [`Bidiagonal`](@ref)          | Upper/lower [bidiagonal matrix](https://en.wikipedia.org/wiki/Bidiagonal_matrix)              |
-| [`Diagonal`](@ref)            | [Diagonal matrix](https://en.wikipedia.org/wiki/Diagonal_matrix)                              |
-| [`UniformScaling`](@ref)      | [Uniform scaling operator](https://en.wikipedia.org/wiki/Uniform_scaling)                     |
-
-### Elementary operations
-
-| Matrix type                   | `+` | `-` | `*` | `\` | Other functions with optimized methods                      |
-|:----------------------------- |:--- |:--- |:--- |:--- |:----------------------------------------------------------- |
-| [`Symmetric`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref)                |
-| [`Hermitian`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref)                |
-| [`UpperTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
-| [`UnitUpperTriangular`](@ref) |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
-| [`LowerTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
-| [`UnitLowerTriangular`](@ref) |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
-| [`UpperHessenberg`](@ref)     |     |     |     | MM  | [`inv`](@ref), [`det`](@ref)                                |
-| [`SymTridiagonal`](@ref)      | M   | M   | MS  | MV  | [`eigmax`](@ref), [`eigmin`](@ref)                          |
-| [`Tridiagonal`](@ref)         | M   | M   | MS  | MV  |                                                             |
-| [`Bidiagonal`](@ref)          | M   | M   | MS  | MV  |                                                             |
-| [`Diagonal`](@ref)            | M   | M   | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref), [`/`](@ref) |
-| [`UniformScaling`](@ref)      | M   | M   | MVS | MVS | [`/`](@ref)                                                 |
-
-Legend:
-
-| Key        | Description                                                   |
-|:---------- |:------------------------------------------------------------- |
-| M (matrix) | An optimized method for matrix-matrix operations is available |
-| V (vector) | An optimized method for matrix-vector operations is available |
-| S (scalar) | An optimized method for matrix-scalar operations is available |
-
-### Matrix factorizations
-
-| Matrix type                   | LAPACK | [`eigen`](@ref) | [`eigvals`](@ref) | [`eigvecs`](@ref) | [`svd`](@ref) | [`svdvals`](@ref) |
-|:----------------------------- |:------ |:------------- |:----------------- |:----------------- |:------------- |:----------------- |
-| [`Symmetric`](@ref)           | SY     |               | ARI               |                   |               |                   |
-| [`Hermitian`](@ref)           | HE     |               | ARI               |                   |               |                   |
-| [`UpperTriangular`](@ref)     | TR     | A             | A                 | A                 |               |                   |
-| [`UnitUpperTriangular`](@ref) | TR     | A             | A                 | A                 |               |                   |
-| [`LowerTriangular`](@ref)     | TR     | A             | A                 | A                 |               |                   |
-| [`UnitLowerTriangular`](@ref) | TR     | A             | A                 | A                 |               |                   |
-| [`SymTridiagonal`](@ref)      | ST     | A             | ARI               | AV                |               |                   |
-| [`Tridiagonal`](@ref)         | GT     |               |                   |                   |               |                   |
-| [`Bidiagonal`](@ref)          | BD     |               |                   |                   | A             | A                 |
-| [`Diagonal`](@ref)            | DI     |               | A                 |                   |               |                   |
-
-Legend:
-
-| Key          | Description                                                                                                                     | Example              |
-|:------------ |:------------------------------------------------------------------------------------------------------------------------------- |:-------------------- |
-| A (all)      | An optimized method to find all the characteristic values and/or vectors is available                                           | e.g. `eigvals(M)`    |
-| R (range)    | An optimized method to find the `il`th through the `ih`th characteristic values are available                                   | `eigvals(M, il, ih)` |
-| I (interval) | An optimized method to find the characteristic values in the interval [`vl`, `vh`] is available                                 | `eigvals(M, vl, vh)` |
-| V (vectors)  | An optimized method to find the characteristic vectors corresponding to the characteristic values `x=[x1, x2,...]` is available | `eigvecs(M, x)`      |
-
-### The uniform scaling operator
-
-A [`UniformScaling`](@ref) operator represents a scalar times the identity operator, `λ*I`. The identity
-operator `I` is defined as a constant and is an instance of `UniformScaling`. The size of these
-operators are generic and match the other matrix in the binary operations [`+`](@ref), [`-`](@ref),
-[`*`](@ref) and [`\`](@ref). For `A+I` and `A-I` this means that `A` must be square. Multiplication
-with the identity operator `I` is a noop (except for checking that the scaling factor is one)
-and therefore almost without overhead.
-
-To see the `UniformScaling` operator in action:
-
-```jldoctest
-julia> U = UniformScaling(2);
-
-julia> a = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> a + U
-2×2 Matrix{Int64}:
- 3  2
- 3  6
-
-julia> a * U
-2×2 Matrix{Int64}:
- 2  4
- 6  8
-
-julia> [a U]
-2×4 Matrix{Int64}:
- 1  2  2  0
- 3  4  0  2
-
-julia> b = [1 2 3; 4 5 6]
-2×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
-
-julia> b - U
-ERROR: DimensionMismatch: matrix is not square: dimensions are (2, 3)
-Stacktrace:
-[...]
-```
-
-If you need to solve many systems of the form `(A+μI)x = b` for the same `A` and different `μ`, it might be beneficial
-to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](@ref) function.
-Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` (equivalent to `(A+μ*I)x \ b`) and related
-operations like determinants.
-
-## [Matrix factorizations](@id man-linalg-factorizations)
-
-[Matrix factorizations (a.k.a. matrix decompositions)](https://en.wikipedia.org/wiki/Matrix_decomposition)
-compute the factorization of a matrix into a product of matrices, and are one of the central concepts
-in (numerical) linear algebra.
-
-The following table summarizes the types of matrix factorizations that have been implemented in
-Julia. Details of their associated methods can be found in the [Standard functions](@ref) section
-of the Linear Algebra documentation.
-
-| Type               | Description                                                                                                    |
-|:------------------ |:-------------------------------------------------------------------------------------------------------------- |
-| `BunchKaufman`     | Bunch-Kaufman factorization                                                                                    |
-| `Cholesky`         | [Cholesky factorization](https://en.wikipedia.org/wiki/Cholesky_decomposition)                                 |
-| `CholeskyPivoted`  | [Pivoted](https://en.wikipedia.org/wiki/Pivot_element) Cholesky factorization                                  |
-| `LDLt`             | [LDL(T) factorization](https://en.wikipedia.org/wiki/Cholesky_decomposition#LDL_decomposition)                 |
-| `LU`               | [LU factorization](https://en.wikipedia.org/wiki/LU_decomposition)                                             |
-| `QR`               | [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition)                                             |
-| `QRCompactWY`      | Compact WY form of the QR factorization                                                                        |
-| `QRPivoted`        | Pivoted [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition)                                     |
-| `LQ`               | [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition) of `transpose(A)`                           |
-| `Hessenberg`       | [Hessenberg decomposition](http://mathworld.wolfram.com/HessenbergDecomposition.html)                          |
-| `Eigen`            | [Spectral decomposition](https://en.wikipedia.org/wiki/Eigendecomposition_of_a_matrix)                         |
-| `GeneralizedEigen` | [Generalized spectral decomposition](https://en.wikipedia.org/wiki/Eigendecomposition_of_a_matrix#Generalized_eigenvalue_problem)                            |
-| `SVD`              | [Singular value decomposition](https://en.wikipedia.org/wiki/Singular_value_decomposition)                     |
-| `GeneralizedSVD`   | [Generalized SVD](https://en.wikipedia.org/wiki/Generalized_singular_value_decomposition#Higher_order_version) |
-| `Schur`            | [Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition)                                       |
-| `GeneralizedSchur` | [Generalized Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition#Generalized_Schur_decomposition) |
-
-Adjoints and transposes of [`Factorization`](@ref) objects are lazily wrapped in
-`AdjointFactorization` and `TransposeFactorization` objects, respectively. Generically,
-transpose of real `Factorization`s are wrapped as `AdjointFactorization`.
-
-## [Orthogonal matrices (`AbstractQ`)](@id man-linalg-abstractq)
-
-Some matrix factorizations generate orthogonal/unitary "matrix" factors. These
-factorizations include QR-related factorizations obtained from calls to [`qr`](@ref), i.e.,
-`QR`, `QRCompactWY` and `QRPivoted`, the Hessenberg factorization obtained from calls to
-[`hessenberg`](@ref), and the LQ factorization obtained from [`lq`](@ref). While these
-orthogonal/unitary factors admit a matrix representation, their internal representation
-is, for performance and memory reasons, different. Hence, they should be rather viewed as
-matrix-backed, function-based linear operators. In particular, reading, for instance, a
-column of its matrix representation requires running "matrix"-vector multiplication code,
-rather than simply reading out data from memory (possibly filling parts of the vector with
-structural zeros). Another clear distinction from other, non-triangular matrix types is
-that the underlying multiplication code allows for in-place modification during multiplication.
-Furthermore, objects of specific `AbstractQ` subtypes as those created via [`qr`](@ref),
-[`hessenberg`](@ref) and [`lq`](@ref) can behave like a square or a rectangular matrix
-depending on context:
-
-```julia
-julia> using LinearAlgebra
-
-julia> Q = qr(rand(3,2)).Q
-3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
-
-julia> Matrix(Q)
-3×2 Matrix{Float64}:
- -0.320597   0.865734
- -0.765834  -0.475694
- -0.557419   0.155628
-
-julia> Q*I
-3×3 Matrix{Float64}:
- -0.320597   0.865734  -0.384346
- -0.765834  -0.475694  -0.432683
- -0.557419   0.155628   0.815514
-
-julia> Q*ones(2)
-3-element Vector{Float64}:
-  0.5451367118802273
- -1.241527373086654
- -0.40179067589600226
-
-julia> Q*ones(3)
-3-element Vector{Float64}:
-  0.16079054743832022
- -1.674209978965636
-  0.41372375588835797
-
-julia> ones(1,2) * Q'
-1×3 Matrix{Float64}:
- 0.545137  -1.24153  -0.401791
-
-julia> ones(1,3) * Q'
-1×3 Matrix{Float64}:
- 0.160791  -1.67421  0.413724
-```
-
-Due to this distinction from dense or structured matrices, the abstract `AbstractQ` type
-does not subtype `AbstractMatrix`, but instead has its own type hierarchy. Custom types
-that subtype `AbstractQ` can rely on generic fallbacks if the following interface is satisfied.
-For example, for
-
-```julia
-struct MyQ{T} <: LinearAlgebra.AbstractQ{T}
-    # required fields
-end
-```
-
-provide overloads for
-
-```julia
-Base.size(Q::MyQ) # size of corresponding square matrix representation
-Base.convert(::Type{AbstractQ{T}}, Q::MyQ) # eltype promotion [optional]
-LinearAlgebra.lmul!(Q::MyQ, x::AbstractVecOrMat) # left-multiplication
-LinearAlgebra.rmul!(A::AbstractMatrix, Q::MyQ) # right-multiplication
-```
-
-If `eltype` promotion is not of interest, the `convert` method is unnecessary, since by
-default `convert(::Type{AbstractQ{T}}, Q::AbstractQ{T})` returns `Q` itself.
-Adjoints of `AbstractQ`-typed objects are lazily wrapped in an `AdjointQ` wrapper type,
-which requires its own `LinearAlgebra.lmul!` and `LinearAlgebra.rmul!` methods. Given this
-set of methods, any `Q::MyQ` can be used like a matrix, preferably in a multiplicative
-context: multiplication via `*` with scalars, vectors and matrices from left and right,
-obtaining a matrix representation of `Q` via `Matrix(Q)` (or `Q*I`) and indexing into the
-matrix representation all work. In contrast, addition and subtraction as well as more
-generally broadcasting over elements in the matrix representation fail because that would
-be highly inefficient. For such use cases, consider computing the matrix representation
-up front and cache it for future reuse.
-
-## Standard functions
-
-Linear algebra functions in Julia are largely implemented by calling functions from [LAPACK](http://www.netlib.org/lapack/).
-Sparse matrix factorizations call functions from [SuiteSparse](http://suitesparse.com).
-Other sparse solvers are available as Julia packages.
-
-```@docs
-Base.:*(::AbstractMatrix, ::AbstractMatrix)
-Base.:\(::AbstractMatrix, ::AbstractVecOrMat)
-Base.:/(::AbstractVecOrMat, ::AbstractVecOrMat)
-LinearAlgebra.SingularException
-LinearAlgebra.PosDefException
-LinearAlgebra.ZeroPivotException
-LinearAlgebra.dot
-LinearAlgebra.dot(::Any, ::Any, ::Any)
-LinearAlgebra.cross
-LinearAlgebra.axpy!
-LinearAlgebra.axpby!
-LinearAlgebra.rotate!
-LinearAlgebra.reflect!
-LinearAlgebra.factorize
-LinearAlgebra.Diagonal
-LinearAlgebra.Bidiagonal
-LinearAlgebra.SymTridiagonal
-LinearAlgebra.Tridiagonal
-LinearAlgebra.Symmetric
-LinearAlgebra.Hermitian
-LinearAlgebra.LowerTriangular
-LinearAlgebra.UpperTriangular
-LinearAlgebra.UnitLowerTriangular
-LinearAlgebra.UnitUpperTriangular
-LinearAlgebra.UpperHessenberg
-LinearAlgebra.UniformScaling
-LinearAlgebra.I
-LinearAlgebra.UniformScaling(::Integer)
-LinearAlgebra.Factorization
-LinearAlgebra.LU
-LinearAlgebra.lu
-LinearAlgebra.lu!
-LinearAlgebra.Cholesky
-LinearAlgebra.CholeskyPivoted
-LinearAlgebra.cholesky
-LinearAlgebra.cholesky!
-LinearAlgebra.lowrankupdate
-LinearAlgebra.lowrankdowndate
-LinearAlgebra.lowrankupdate!
-LinearAlgebra.lowrankdowndate!
-LinearAlgebra.LDLt
-LinearAlgebra.ldlt
-LinearAlgebra.ldlt!
-LinearAlgebra.QR
-LinearAlgebra.QRCompactWY
-LinearAlgebra.QRPivoted
-LinearAlgebra.qr
-LinearAlgebra.qr!
-LinearAlgebra.LQ
-LinearAlgebra.lq
-LinearAlgebra.lq!
-LinearAlgebra.BunchKaufman
-LinearAlgebra.bunchkaufman
-LinearAlgebra.bunchkaufman!
-LinearAlgebra.Eigen
-LinearAlgebra.GeneralizedEigen
-LinearAlgebra.eigvals
-LinearAlgebra.eigvals!
-LinearAlgebra.eigmax
-LinearAlgebra.eigmin
-LinearAlgebra.eigvecs
-LinearAlgebra.eigen
-LinearAlgebra.eigen!
-LinearAlgebra.Hessenberg
-LinearAlgebra.hessenberg
-LinearAlgebra.hessenberg!
-LinearAlgebra.Schur
-LinearAlgebra.GeneralizedSchur
-LinearAlgebra.schur
-LinearAlgebra.schur!
-LinearAlgebra.ordschur
-LinearAlgebra.ordschur!
-LinearAlgebra.SVD
-LinearAlgebra.GeneralizedSVD
-LinearAlgebra.svd
-LinearAlgebra.svd!
-LinearAlgebra.svdvals
-LinearAlgebra.svdvals!
-LinearAlgebra.Givens
-LinearAlgebra.givens
-LinearAlgebra.triu
-LinearAlgebra.triu!
-LinearAlgebra.tril
-LinearAlgebra.tril!
-LinearAlgebra.diagind
-LinearAlgebra.diag
-LinearAlgebra.diagm
-LinearAlgebra.rank
-LinearAlgebra.norm
-LinearAlgebra.opnorm
-LinearAlgebra.normalize!
-LinearAlgebra.normalize
-LinearAlgebra.cond
-LinearAlgebra.condskeel
-LinearAlgebra.tr
-LinearAlgebra.det
-LinearAlgebra.logdet
-LinearAlgebra.logabsdet
-Base.inv(::AbstractMatrix)
-LinearAlgebra.pinv
-LinearAlgebra.nullspace
-Base.kron
-Base.kron!
-LinearAlgebra.exp(::StridedMatrix{<:LinearAlgebra.BlasFloat})
-Base.cis(::AbstractMatrix)
-Base.:^(::AbstractMatrix, ::Number)
-Base.:^(::Number, ::AbstractMatrix)
-LinearAlgebra.log(::StridedMatrix)
-LinearAlgebra.sqrt(::StridedMatrix)
-LinearAlgebra.cos(::StridedMatrix{<:Real})
-LinearAlgebra.sin(::StridedMatrix{<:Real})
-LinearAlgebra.sincos(::StridedMatrix{<:Real})
-LinearAlgebra.tan(::StridedMatrix{<:Real})
-LinearAlgebra.sec(::StridedMatrix)
-LinearAlgebra.csc(::StridedMatrix)
-LinearAlgebra.cot(::StridedMatrix)
-LinearAlgebra.cosh(::StridedMatrix)
-LinearAlgebra.sinh(::StridedMatrix)
-LinearAlgebra.tanh(::StridedMatrix)
-LinearAlgebra.sech(::StridedMatrix)
-LinearAlgebra.csch(::StridedMatrix)
-LinearAlgebra.coth(::StridedMatrix)
-LinearAlgebra.acos(::StridedMatrix)
-LinearAlgebra.asin(::StridedMatrix)
-LinearAlgebra.atan(::StridedMatrix)
-LinearAlgebra.asec(::StridedMatrix)
-LinearAlgebra.acsc(::StridedMatrix)
-LinearAlgebra.acot(::StridedMatrix)
-LinearAlgebra.acosh(::StridedMatrix)
-LinearAlgebra.asinh(::StridedMatrix)
-LinearAlgebra.atanh(::StridedMatrix)
-LinearAlgebra.asech(::StridedMatrix)
-LinearAlgebra.acsch(::StridedMatrix)
-LinearAlgebra.acoth(::StridedMatrix)
-LinearAlgebra.lyap
-LinearAlgebra.sylvester
-LinearAlgebra.issuccess
-LinearAlgebra.issymmetric
-LinearAlgebra.isposdef
-LinearAlgebra.isposdef!
-LinearAlgebra.istril
-LinearAlgebra.istriu
-LinearAlgebra.isdiag
-LinearAlgebra.ishermitian
-Base.transpose
-LinearAlgebra.transpose!
-LinearAlgebra.Transpose
-LinearAlgebra.TransposeFactorization
-Base.adjoint
-LinearAlgebra.adjoint!
-LinearAlgebra.Adjoint
-LinearAlgebra.AdjointFactorization
-Base.copy(::Union{Transpose,Adjoint})
-LinearAlgebra.stride1
-LinearAlgebra.checksquare
-LinearAlgebra.peakflops
-LinearAlgebra.hermitianpart
-LinearAlgebra.hermitianpart!
-```
-
-## Low-level matrix operations
-
-In many cases there are in-place versions of matrix operations that allow you to supply
-a pre-allocated output vector or matrix.  This is useful when optimizing critical code in order
-to avoid the overhead of repeated allocations. These in-place operations are suffixed with `!`
-below (e.g. `mul!`) according to the usual Julia convention.
-
-```@docs
-LinearAlgebra.mul!
-LinearAlgebra.lmul!
-LinearAlgebra.rmul!
-LinearAlgebra.ldiv!
-LinearAlgebra.rdiv!
-```
-
-## BLAS functions
-
-In Julia (as in much of scientific computation), dense linear-algebra operations are based on
-the [LAPACK library](http://www.netlib.org/lapack/), which in turn is built on top of basic linear-algebra
-building-blocks known as the [BLAS](http://www.netlib.org/blas/). There are highly optimized
-implementations of BLAS available for every computer architecture, and sometimes in high-performance
-linear algebra routines it is useful to call the BLAS functions directly.
-
-`LinearAlgebra.BLAS` provides wrappers for some of the BLAS functions. Those BLAS functions
-that overwrite one of the input arrays have names ending in `'!'`.  Usually, a BLAS function has
-four methods defined, for [`Float32`](@ref), [`Float64`](@ref), [`ComplexF32`](@ref Complex),
-and [`ComplexF64`](@ref Complex) arrays.
-
-### [BLAS character arguments](@id stdlib-blas-chars)
-
-Many BLAS functions accept arguments that determine whether to transpose an argument (`trans`),
-which triangle of a matrix to reference (`uplo` or `ul`),
-whether the diagonal of a triangular matrix can be assumed to
-be all ones (`dA`) or which side of a matrix multiplication
-the input argument belongs on (`side`). The possibilities are:
-
-#### [Multiplication order](@id stdlib-blas-side)
-
-| `side` | Meaning                                                             |
-|:-------|:--------------------------------------------------------------------|
-| `'L'`  | The argument goes on the *left* side of a matrix-matrix operation.  |
-| `'R'`  | The argument goes on the *right* side of a matrix-matrix operation. |
-
-#### [Triangle referencing](@id stdlib-blas-uplo)
-
-| `uplo`/`ul` | Meaning                                               |
-|:------------|:------------------------------------------------------|
-| `'U'`       | Only the *upper* triangle of the matrix will be used. |
-| `'L'`       | Only the *lower* triangle of the matrix will be used. |
-
-#### [Transposition operation](@id stdlib-blas-trans)
-
-| `trans`/`tX` | Meaning                                                 |
-|:-------------|:--------------------------------------------------------|
-| `'N'`        | The input matrix `X` is not transposed or conjugated.   |
-| `'T'`        | The input matrix `X` will be transposed.                |
-| `'C'`        | The input matrix `X` will be conjugated and transposed. |
-
-#### [Unit diagonal](@id stdlib-blas-diag)
-
-| `diag`/`dX` | Meaning                                                   |
-|:------------|:----------------------------------------------------------|
-| `'N'`       | The diagonal values of the matrix `X` will be read.       |
-| `'U'`       | The diagonal of the matrix `X` is assumed to be all ones. |
-
-```@docs
-LinearAlgebra.BLAS
-LinearAlgebra.BLAS.set_num_threads
-LinearAlgebra.BLAS.get_num_threads
-```
-
-BLAS functions can be divided into three groups, also called three levels,
-depending on when they were first proposed, the type of input parameters,
-and the complexity of the operation.
-
-### Level 1 BLAS functions
-
-The level 1 BLAS functions were first proposed in [(Lawson, 1979)][Lawson-1979] and
-define operations between scalars and vectors.
-
-[Lawson-1979]: https://dl.acm.org/doi/10.1145/355841.355847
-
-```@docs
-# xROTG
-# xROTMG
-LinearAlgebra.BLAS.rot!
-# xROTM
-# xSWAP
-LinearAlgebra.BLAS.scal!
-LinearAlgebra.BLAS.scal
-LinearAlgebra.BLAS.blascopy!
-# xAXPY!
-# xAXPBY!
-LinearAlgebra.BLAS.dot
-LinearAlgebra.BLAS.dotu
-LinearAlgebra.BLAS.dotc
-# xxDOT
-LinearAlgebra.BLAS.nrm2
-LinearAlgebra.BLAS.asum
-LinearAlgebra.BLAS.iamax
-```
-
-### Level 2 BLAS functions
-
-The level 2 BLAS functions were published in [(Dongarra, 1988)][Dongarra-1988],
-and define matrix-vector operations.
-
-[Dongarra-1988]: https://dl.acm.org/doi/10.1145/42288.42291
-
-**return a vector**
-
-```@docs
-LinearAlgebra.BLAS.gemv!
-LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.gbmv!
-LinearAlgebra.BLAS.gbmv
-LinearAlgebra.BLAS.hemv!
-LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any)
-# hbmv!, hbmv
-LinearAlgebra.BLAS.hpmv!
-LinearAlgebra.BLAS.symv!
-LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.sbmv!
-LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.spmv!
-LinearAlgebra.BLAS.trmv!
-LinearAlgebra.BLAS.trmv
-# xTBMV
-# xTPMV
-LinearAlgebra.BLAS.trsv!
-LinearAlgebra.BLAS.trsv
-# xTBSV
-# xTPSV
-```
-
-**return a matrix**
-
-```@docs
-LinearAlgebra.BLAS.ger!
-# xGERU
-# xGERC
-LinearAlgebra.BLAS.her!
-# xHPR
-# xHER2
-# xHPR2
-LinearAlgebra.BLAS.syr!
-LinearAlgebra.BLAS.spr!
-# xSYR2
-# xSPR2
-```
-
-### Level 3 BLAS functions
-
-The level 3 BLAS functions were published in [(Dongarra, 1990)][Dongarra-1990],
-and define matrix-matrix operations.
-
-[Dongarra-1990]: https://dl.acm.org/doi/10.1145/77626.79170
-
-```@docs
-LinearAlgebra.BLAS.gemm!
-LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.symm!
-LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.hemm!
-LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.syrk!
-LinearAlgebra.BLAS.syrk
-LinearAlgebra.BLAS.herk!
-LinearAlgebra.BLAS.herk
-LinearAlgebra.BLAS.syr2k!
-LinearAlgebra.BLAS.syr2k
-LinearAlgebra.BLAS.her2k!
-LinearAlgebra.BLAS.her2k
-LinearAlgebra.BLAS.trmm!
-LinearAlgebra.BLAS.trmm
-LinearAlgebra.BLAS.trsm!
-LinearAlgebra.BLAS.trsm
-```
-
-## LAPACK functions
-
-`LinearAlgebra.LAPACK` provides wrappers for some of the LAPACK functions for linear algebra.
- Those functions that overwrite one of the input arrays have names ending in `'!'`.
-
-Usually a function has 4 methods defined, one each for [`Float64`](@ref), [`Float32`](@ref),
-`ComplexF64` and `ComplexF32` arrays.
-
-Note that the LAPACK API provided by Julia can and will change in the future. Since this API is
-not user-facing, there is no commitment to support/deprecate this specific set of functions in
-future releases.
-
-```@docs
-LinearAlgebra.LAPACK
-LinearAlgebra.LAPACK.gbtrf!
-LinearAlgebra.LAPACK.gbtrs!
-LinearAlgebra.LAPACK.gebal!
-LinearAlgebra.LAPACK.gebak!
-LinearAlgebra.LAPACK.gebrd!
-LinearAlgebra.LAPACK.gelqf!
-LinearAlgebra.LAPACK.geqlf!
-LinearAlgebra.LAPACK.geqrf!
-LinearAlgebra.LAPACK.geqp3!
-LinearAlgebra.LAPACK.gerqf!
-LinearAlgebra.LAPACK.geqrt!
-LinearAlgebra.LAPACK.geqrt3!
-LinearAlgebra.LAPACK.getrf!
-LinearAlgebra.LAPACK.tzrzf!
-LinearAlgebra.LAPACK.ormrz!
-LinearAlgebra.LAPACK.gels!
-LinearAlgebra.LAPACK.gesv!
-LinearAlgebra.LAPACK.getrs!
-LinearAlgebra.LAPACK.getri!
-LinearAlgebra.LAPACK.gesvx!
-LinearAlgebra.LAPACK.gelsd!
-LinearAlgebra.LAPACK.gelsy!
-LinearAlgebra.LAPACK.gglse!
-LinearAlgebra.LAPACK.geev!
-LinearAlgebra.LAPACK.gesdd!
-LinearAlgebra.LAPACK.gesvd!
-LinearAlgebra.LAPACK.ggsvd!
-LinearAlgebra.LAPACK.ggsvd3!
-LinearAlgebra.LAPACK.geevx!
-LinearAlgebra.LAPACK.ggev!
-LinearAlgebra.LAPACK.ggev3!
-LinearAlgebra.LAPACK.gtsv!
-LinearAlgebra.LAPACK.gttrf!
-LinearAlgebra.LAPACK.gttrs!
-LinearAlgebra.LAPACK.orglq!
-LinearAlgebra.LAPACK.orgqr!
-LinearAlgebra.LAPACK.orgql!
-LinearAlgebra.LAPACK.orgrq!
-LinearAlgebra.LAPACK.ormlq!
-LinearAlgebra.LAPACK.ormqr!
-LinearAlgebra.LAPACK.ormql!
-LinearAlgebra.LAPACK.ormrq!
-LinearAlgebra.LAPACK.gemqrt!
-LinearAlgebra.LAPACK.posv!
-LinearAlgebra.LAPACK.potrf!
-LinearAlgebra.LAPACK.potri!
-LinearAlgebra.LAPACK.potrs!
-LinearAlgebra.LAPACK.pstrf!
-LinearAlgebra.LAPACK.ptsv!
-LinearAlgebra.LAPACK.pttrf!
-LinearAlgebra.LAPACK.pttrs!
-LinearAlgebra.LAPACK.trtri!
-LinearAlgebra.LAPACK.trtrs!
-LinearAlgebra.LAPACK.trcon!
-LinearAlgebra.LAPACK.trevc!
-LinearAlgebra.LAPACK.trrfs!
-LinearAlgebra.LAPACK.stev!
-LinearAlgebra.LAPACK.stebz!
-LinearAlgebra.LAPACK.stegr!
-LinearAlgebra.LAPACK.stein!
-LinearAlgebra.LAPACK.syconv!
-LinearAlgebra.LAPACK.sysv!
-LinearAlgebra.LAPACK.sytrf!
-LinearAlgebra.LAPACK.sytri!
-LinearAlgebra.LAPACK.sytrs!
-LinearAlgebra.LAPACK.hesv!
-LinearAlgebra.LAPACK.hetrf!
-LinearAlgebra.LAPACK.hetri!
-LinearAlgebra.LAPACK.hetrs!
-LinearAlgebra.LAPACK.syev!
-LinearAlgebra.LAPACK.syevr!
-LinearAlgebra.LAPACK.syevd!
-LinearAlgebra.LAPACK.sygvd!
-LinearAlgebra.LAPACK.bdsqr!
-LinearAlgebra.LAPACK.bdsdc!
-LinearAlgebra.LAPACK.gecon!
-LinearAlgebra.LAPACK.gehrd!
-LinearAlgebra.LAPACK.orghr!
-LinearAlgebra.LAPACK.gees!
-LinearAlgebra.LAPACK.gges!
-LinearAlgebra.LAPACK.gges3!
-LinearAlgebra.LAPACK.trexc!
-LinearAlgebra.LAPACK.trsen!
-LinearAlgebra.LAPACK.tgsen!
-LinearAlgebra.LAPACK.trsyl!
-LinearAlgebra.LAPACK.hseqr!
-```
-
-```@meta
-DocTestSetup = nothing
-```
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
deleted file mode 100644
index 386de771d666f..0000000000000
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ /dev/null
@@ -1,699 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Linear algebra module. Provides array arithmetic,
-matrix factorizations and other linear algebra related
-functionality.
-"""
-module LinearAlgebra
-
-import Base: \, /, *, ^, +, -, ==
-import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech,
-    asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, collect, conj, convert, copy,
-    copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor,
-    getindex, hcat, getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle,
-    kron, kron!, length, log, map, ndims, one, oneunit, parent, permutedims,
-    power_by_squaring, promote_rule, real, sec, sech, setindex!, show, similar, sin,
-    sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat,
-    vec, view, zero
-using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, print_matrix,
-    @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing,
-    splat
-using Base.Broadcast: Broadcasted, broadcasted
-using Base.PermutedDimsArrays: CommutativeOps
-using OpenBLAS_jll
-using libblastrampoline_jll
-import Libdl
-
-export
-# Modules
-    LAPACK,
-    BLAS,
-
-# Types
-    Adjoint,
-    Transpose,
-    SymTridiagonal,
-    Tridiagonal,
-    Bidiagonal,
-    Factorization,
-    BunchKaufman,
-    Cholesky,
-    CholeskyPivoted,
-    ColumnNorm,
-    Eigen,
-    GeneralizedEigen,
-    GeneralizedSVD,
-    GeneralizedSchur,
-    Hessenberg,
-    LU,
-    LDLt,
-    NoPivot,
-    RowNonZero,
-    QR,
-    QRPivoted,
-    LQ,
-    Schur,
-    SVD,
-    Hermitian,
-    RowMaximum,
-    Symmetric,
-    LowerTriangular,
-    UpperTriangular,
-    UnitLowerTriangular,
-    UnitUpperTriangular,
-    UpperHessenberg,
-    Diagonal,
-    UniformScaling,
-
-# Functions
-    axpy!,
-    axpby!,
-    bunchkaufman,
-    bunchkaufman!,
-    cholesky,
-    cholesky!,
-    cond,
-    condskeel,
-    copyto!,
-    copy_transpose!,
-    cross,
-    adjoint,
-    adjoint!,
-    det,
-    diag,
-    diagind,
-    diagm,
-    dot,
-    eigen,
-    eigen!,
-    eigmax,
-    eigmin,
-    eigvals,
-    eigvals!,
-    eigvecs,
-    factorize,
-    givens,
-    hermitianpart,
-    hermitianpart!,
-    hessenberg,
-    hessenberg!,
-    isdiag,
-    ishermitian,
-    isposdef,
-    isposdef!,
-    issuccess,
-    issymmetric,
-    istril,
-    istriu,
-    kron,
-    kron!,
-    ldiv!,
-    ldlt!,
-    ldlt,
-    logabsdet,
-    logdet,
-    lowrankdowndate,
-    lowrankdowndate!,
-    lowrankupdate,
-    lowrankupdate!,
-    lu,
-    lu!,
-    lyap,
-    mul!,
-    lmul!,
-    rmul!,
-    norm,
-    normalize,
-    normalize!,
-    nullspace,
-    ordschur!,
-    ordschur,
-    pinv,
-    qr,
-    qr!,
-    lq,
-    lq!,
-    opnorm,
-    rank,
-    rdiv!,
-    reflect!,
-    rotate!,
-    schur,
-    schur!,
-    svd,
-    svd!,
-    svdvals!,
-    svdvals,
-    sylvester,
-    tr,
-    transpose,
-    transpose!,
-    tril,
-    triu,
-    tril!,
-    triu!,
-
-# Operators
-    \,
-    /,
-
-# Constants
-    I
-
-const BlasFloat = Union{Float64,Float32,ComplexF64,ComplexF32}
-const BlasReal = Union{Float64,Float32}
-const BlasComplex = Union{ComplexF64,ComplexF32}
-
-if USE_BLAS64
-    const BlasInt = Int64
-else
-    const BlasInt = Int32
-end
-
-
-abstract type Algorithm end
-struct DivideAndConquer <: Algorithm end
-struct QRIteration <: Algorithm end
-
-abstract type PivotingStrategy end
-struct NoPivot <: PivotingStrategy end
-struct RowNonZero <: PivotingStrategy end
-struct RowMaximum <: PivotingStrategy end
-struct ColumnNorm <: PivotingStrategy end
-
-# Check that stride of matrix/vector is 1
-# Writing like this to avoid splatting penalty when called with multiple arguments,
-# see PR 16416
-"""
-    stride1(A) -> Int
-
-Return the distance between successive array elements
-in dimension 1 in units of element size.
-
-# Examples
-```jldoctest
-julia> A = [1,2,3,4]
-4-element Vector{Int64}:
- 1
- 2
- 3
- 4
-
-julia> LinearAlgebra.stride1(A)
-1
-
-julia> B = view(A, 2:2:4)
-2-element view(::Vector{Int64}, 2:2:4) with eltype Int64:
- 2
- 4
-
-julia> LinearAlgebra.stride1(B)
-2
-```
-"""
-stride1(x) = stride(x,1)
-stride1(x::Array) = 1
-stride1(x::DenseArray) = stride(x, 1)::Int
-
-@inline chkstride1(A...) = _chkstride1(true, A...)
-@noinline _chkstride1(ok::Bool) = ok || error("matrix does not have contiguous columns")
-@inline _chkstride1(ok::Bool, A, B...) = _chkstride1(ok & (stride1(A) == 1), B...)
-
-"""
-    LinearAlgebra.checksquare(A)
-
-Check that a matrix is square, then return its common dimension.
-For multiple arguments, return a vector.
-
-# Examples
-```jldoctest
-julia> A = fill(1, (4,4)); B = fill(1, (5,5));
-
-julia> LinearAlgebra.checksquare(A, B)
-2-element Vector{Int64}:
- 4
- 5
-```
-"""
-function checksquare(A)
-    m,n = size(A)
-    m == n || throw(DimensionMismatch("matrix is not square: dimensions are $(size(A))"))
-    m
-end
-
-function checksquare(A...)
-    sizes = Int[]
-    for a in A
-        size(a,1)==size(a,2) || throw(DimensionMismatch("matrix is not square: dimensions are $(size(a))"))
-        push!(sizes, size(a,1))
-    end
-    return sizes
-end
-
-function char_uplo(uplo::Symbol)
-    if uplo === :U
-        return 'U'
-    elseif uplo === :L
-        return 'L'
-    else
-        throw_uplo()
-    end
-end
-
-function sym_uplo(uplo::Char)
-    if uplo == 'U'
-        return :U
-    elseif uplo == 'L'
-        return :L
-    else
-        throw_uplo()
-    end
-end
-
-@noinline throw_uplo() = throw(ArgumentError("uplo argument must be either :U (upper) or :L (lower)"))
-
-"""
-    ldiv!(Y, A, B) -> Y
-
-Compute `A \\ B` in-place and store the result in `Y`, returning the result.
-
-The argument `A` should *not* be a matrix.  Rather, instead of matrices it should be a
-factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)).
-The reason for this is that factorization itself is both expensive and typically allocates memory
-(although it can also be done in-place via, e.g., [`lu!`](@ref)),
-and performance-critical situations requiring `ldiv!` usually also require fine-grained
-control over the factorization of `A`.
-
-!!! note
-    Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
-    these are already in a factorized form
-
-# Examples
-```jldoctest
-julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2];
-
-julia> X = [1; 2.5; 3];
-
-julia> Y = zero(X);
-
-julia> ldiv!(Y, qr(A), X);
-
-julia> Y
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.051652892561983674
-  0.10020661157024757
-
-julia> A\\X
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.05165289256198333
-  0.10020661157024785
-```
-"""
-ldiv!(Y, A, B)
-
-"""
-    ldiv!(A, B)
-
-Compute `A \\ B` in-place and overwriting `B` to store the result.
-
-The argument `A` should *not* be a matrix.  Rather, instead of matrices it should be a
-factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)).
-The reason for this is that factorization itself is both expensive and typically allocates memory
-(although it can also be done in-place via, e.g., [`lu!`](@ref)),
-and performance-critical situations requiring `ldiv!` usually also require fine-grained
-control over the factorization of `A`.
-
-!!! note
-    Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
-    these are already in a factorized form
-
-# Examples
-```jldoctest
-julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2];
-
-julia> X = [1; 2.5; 3];
-
-julia> Y = copy(X);
-
-julia> ldiv!(qr(A), X);
-
-julia> X
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.051652892561983674
-  0.10020661157024757
-
-julia> A\\Y
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.05165289256198333
-  0.10020661157024785
-```
-"""
-ldiv!(A, B)
-
-
-"""
-    rdiv!(A, B)
-
-Compute `A / B` in-place and overwriting `A` to store the result.
-
-The argument `B` should *not* be a matrix.  Rather, instead of matrices it should be a
-factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)).
-The reason for this is that factorization itself is both expensive and typically allocates memory
-(although it can also be done in-place via, e.g., [`lu!`](@ref)),
-and performance-critical situations requiring `rdiv!` usually also require fine-grained
-control over the factorization of `B`.
-
-!!! note
-    Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
-    these are already in a factorized form
-"""
-rdiv!(A, B)
-
-"""
-    copy_oftype(A, T)
-
-Creates a copy of `A` with eltype `T`. No assertions about mutability of the result are
-made. When `eltype(A) == T`, then this calls `copy(A)` which may be overloaded for custom
-array types. Otherwise, this calls `convert(AbstractArray{T}, A)`.
-"""
-copy_oftype(A::AbstractArray{T}, ::Type{T}) where {T} = copy(A)
-copy_oftype(A::AbstractArray{T,N}, ::Type{S}) where {T,N,S} = convert(AbstractArray{S,N}, A)
-
-"""
-    copymutable_oftype(A, T)
-
-Copy `A` to a mutable array with eltype `T` based on `similar(A, T)`.
-
-The resulting matrix typically has similar algebraic structure as `A`. For
-example, supplying a tridiagonal matrix results in another tridiagonal matrix.
-In general, the type of the output corresponds to that of `similar(A, T)`.
-
-In LinearAlgebra, mutable copies (of some desired eltype) are created to be passed
-to in-place algorithms (such as `ldiv!`, `rdiv!`, `lu!` and so on). If the specific
-algorithm is known to preserve the algebraic structure, use `copymutable_oftype`.
-If the algorithm is known to return a dense matrix (or some wrapper backed by a dense
-matrix), then use `copy_similar`.
-
-See also: `Base.copymutable`, `copy_similar`.
-"""
-copymutable_oftype(A::AbstractArray, ::Type{S}) where {S} = copyto!(similar(A, S), A)
-
-"""
-    copy_similar(A, T)
-
-Copy `A` to a mutable array with eltype `T` based on `similar(A, T, size(A))`.
-
-Compared to `copymutable_oftype`, the result can be more flexible. In general, the type
-of the output corresponds to that of the three-argument method `similar(A, T, size(A))`.
-
-See also: `copymutable_oftype`.
-"""
-copy_similar(A::AbstractArray, ::Type{T}) where {T} = copyto!(similar(A, T, size(A)), A)
-
-
-include("adjtrans.jl")
-include("transpose.jl")
-
-include("exceptions.jl")
-include("generic.jl")
-
-include("blas.jl")
-include("matmul.jl")
-include("lapack.jl")
-
-include("dense.jl")
-include("tridiag.jl")
-include("triangular.jl")
-
-include("factorization.jl")
-include("eigen.jl")
-include("svd.jl")
-include("symmetric.jl")
-include("cholesky.jl")
-include("lu.jl")
-include("bunchkaufman.jl")
-include("diagonal.jl")
-include("symmetriceigen.jl")
-include("bidiag.jl")
-include("uniformscaling.jl")
-include("qr.jl")
-include("lq.jl")
-include("hessenberg.jl")
-include("abstractq.jl")
-include("givens.jl")
-include("special.jl")
-include("bitarray.jl")
-include("ldlt.jl")
-include("schur.jl")
-include("structuredbroadcast.jl")
-include("deprecated.jl")
-
-const ⋅ = dot
-const × = cross
-export ⋅, ×
-
-wrapper_char(::AbstractArray) = 'N'
-wrapper_char(::Adjoint) = 'C'
-wrapper_char(::Adjoint{<:Real}) = 'T'
-wrapper_char(::Transpose) = 'T'
-wrapper_char(A::Hermitian) = A.uplo == 'U' ? 'H' : 'h'
-wrapper_char(A::Hermitian{<:Real}) = A.uplo == 'U' ? 'S' : 's'
-wrapper_char(A::Symmetric) = A.uplo == 'U' ? 'S' : 's'
-
-function wrap(A::AbstractVecOrMat, tA::AbstractChar)
-    if tA == 'N'
-        return A
-    elseif tA == 'T'
-        return transpose(A)
-    elseif tA == 'C'
-        return adjoint(A)
-    elseif tA == 'H'
-        return Hermitian(A, :U)
-    elseif tA == 'h'
-        return Hermitian(A, :L)
-    elseif tA == 'S'
-        return Symmetric(A, :U)
-    else # tA == 's'
-        return Symmetric(A, :L)
-    end
-end
-
-_unwrap(A::AbstractVecOrMat) = A
-
-## convenience methods
-## return only the solution of a least squares problem while avoiding promoting
-## vectors to matrices.
-_cut_B(x::AbstractVector, r::UnitRange) = length(x)  > length(r) ? x[r]   : x
-_cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X
-
-# SymTridiagonal ev can be the same length as dv, but the last element is
-# ignored. However, some methods can fail if they read the entire ev
-# rather than just the meaningful elements. This is a helper function
-# for getting only the meaningful elements of ev. See #41089
-_evview(S::SymTridiagonal) = @view S.ev[begin:begin + length(S.dv) - 2]
-
-## append right hand side with zeros if necessary
-_zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
-_zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
-
-# convert to Vector, if necessary
-_makevector(x::Vector) = x
-_makevector(x::AbstractVector) = Vector(x)
-
-# append a zero element / drop the last element
-_pushzero(A) = (B = similar(A, length(A)+1); @inbounds B[begin:end-1] .= A; @inbounds B[end] = zero(eltype(B)); B)
-_droplast!(A) = deleteat!(A, lastindex(A))
-
-# some trait like this would be cool
-# onedefined(::Type{T}) where {T} = hasmethod(one, (T,))
-# but we are actually asking for oneunit(T), that is, however, defined for generic T as
-# `T(one(T))`, so the question is equivalent for whether one(T) is defined
-onedefined(::Type) = false
-onedefined(::Type{<:Number}) = true
-
-# initialize return array for op(A, B)
-_init_eltype(::typeof(*), ::Type{TA}, ::Type{TB}) where {TA,TB} =
-    (onedefined(TA) && onedefined(TB)) ?
-        typeof(matprod(oneunit(TA), oneunit(TB))) :
-        promote_op(matprod, TA, TB)
-_init_eltype(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
-    (onedefined(TA) && onedefined(TB)) ?
-        typeof(op(oneunit(TA), oneunit(TB))) :
-        promote_op(op, TA, TB)
-_initarray(op, ::Type{TA}, ::Type{TB}, C) where {TA,TB} =
-    similar(C, _init_eltype(op, TA, TB), size(C))
-
-# General fallback definition for handling under- and overdetermined system as well as square problems
-# While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs
-# which is required by LAPACK but not SuiteSparse which allows real-complex solves in some cases. Hence,
-# we restrict this method to only the LAPACK factorizations in LinearAlgebra.
-# The definition is put here since it explicitly references all the Factorization structs so it has
-# to be located after all the files that define the structs.
-const LAPACKFactorizations{T,S} = Union{
-    BunchKaufman{T,S},
-    Cholesky{T,S},
-    LQ{T,S},
-    LU{T,S},
-    QR{T,S},
-    QRCompactWY{T,S},
-    QRPivoted{T,S},
-    SVD{T,<:Real,S}}
-
-(\)(F::LAPACKFactorizations, B::AbstractVecOrMat) = ldiv(F, B)
-(\)(F::AdjointFactorization{<:Any,<:LAPACKFactorizations}, B::AbstractVecOrMat) = ldiv(F, B)
-(\)(F::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat) = ldiv(F, B)
-
-function ldiv(F::Factorization, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    m, n = size(F)
-    if m != size(B, 1)
-        throw(DimensionMismatch("arguments must have the same number of rows"))
-    end
-
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    FF = Factorization{TFB}(F)
-
-    # For wide problem we (often) compute a minimum norm solution. The solution
-    # is larger than the right hand side so we use size(F, 2).
-    BB = _zeros(TFB, B, n)
-
-    if n > size(B, 1)
-        # Underdetermined
-        copyto!(view(BB, 1:m, :), B)
-    else
-        copyto!(BB, B)
-    end
-
-    ldiv!(FF, BB)
-
-    # For tall problems, we compute a least squares solution so only part
-    # of the rhs should be returned from \ while ldiv! uses (and returns)
-    # the complete rhs
-    return _cut_B(BB, 1:n)
-end
-# disambiguate
-(\)(F::LAPACKFactorizations{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    @invoke \(F::Factorization{T}, B::VecOrMat{Complex{T}})
-(\)(F::AdjointFactorization{T,<:LAPACKFactorizations}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    ldiv(F, B)
-(\)(F::TransposeFactorization{T,<:LU}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    ldiv(F, B)
-
-"""
-    LinearAlgebra.peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
-
-`peakflops` computes the peak flop rate of the computer by using double precision
-[`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it
-multiplies two `Float64` matrices of size `n x n`, where `n = 4096`. If the underlying BLAS is using
-multiple threads, higher flop rates are realized. The number of BLAS threads can be set with
-[`BLAS.set_num_threads(n)`](@ref).
-
-If the keyword argument `eltype` is provided, `peakflops` will construct matrices with elements
-of type `eltype` for calculating the peak flop rate.
-
-By default, `peakflops` will use the best timing from 3 trials. If the `ntrials` keyword argument
-is provided, `peakflops` will use those many trials for picking the best timing.
-
-If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all
-the worker processors. The flop rate of the entire parallel computer is returned. When
-running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size
-of the problem that is solved on each processor.
-
-!!! compat "Julia 1.1"
-    This function requires at least Julia 1.1. In Julia 1.0 it is available from
-    the standard library `InteractiveUtils`.
-"""
-function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
-    t = zeros(Float64, ntrials)
-    for i=1:ntrials
-        a = ones(eltype,n,n)
-        t[i] = @elapsed a2 = a*a
-        @assert a2[1,1] == n
-    end
-
-    if parallel
-        let Distributed = Base.require(Base.PkgId(
-                Base.UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
-            return sum(Distributed.pmap(peakflops, fill(n, Distributed.nworkers())))
-        end
-    else
-        return 2*Float64(n)^3 / minimum(t)
-    end
-end
-
-
-function versioninfo(io::IO=stdout)
-    indent = "  "
-    config = BLAS.get_config()
-    build_flags = join(string.(config.build_flags), ", ")
-    println(io, "BLAS: ", BLAS.libblastrampoline, " (", build_flags, ")")
-    for lib in config.loaded_libs
-        interface = uppercase(string(lib.interface))
-        println(io, indent, "--> ", lib.libname, " (", interface, ")")
-    end
-    println(io, "Threading:")
-    println(io, indent, "Threads.threadpoolsize() = ", Threads.threadpoolsize())
-    println(io, indent, "Threads.maxthreadid() = ", Base.Threads.maxthreadid())
-    println(io, indent, "LinearAlgebra.BLAS.get_num_threads() = ", BLAS.get_num_threads())
-    println(io, "Relevant environment variables:")
-    env_var_names = [
-        "JULIA_NUM_THREADS",
-        "MKL_DYNAMIC",
-        "MKL_NUM_THREADS",
-         # OpenBLAS has a hierarchy of environment variables for setting the
-         # number of threads, see
-         # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
-        ("OPENBLAS_NUM_THREADS", "GOTO_NUM_THREADS", "OMP_NUM_THREADS"),
-    ]
-    printed_at_least_one_env_var = false
-    print_var(io, indent, name) = println(io, indent, name, " = ", ENV[name])
-    for name in env_var_names
-        if name isa Tuple
-            # If `name` is a Tuple, then find the first environment which is
-            # defined, and disregard the following ones.
-            for nm in name
-                if haskey(ENV, nm)
-                    print_var(io, indent, nm)
-                    printed_at_least_one_env_var = true
-                    break
-                end
-            end
-        else
-            if haskey(ENV, name)
-                print_var(io, indent, name)
-                printed_at_least_one_env_var = true
-            end
-        end
-    end
-    if !printed_at_least_one_env_var
-        println(io, indent, "[none]")
-    end
-    return nothing
-end
-
-function __init__()
-    try
-        BLAS.lbt_forward(OpenBLAS_jll.libopenblas_path; clear=true)
-        BLAS.check()
-    catch ex
-        Base.showerror_nostdio(ex, "WARNING: Error during initialization of module LinearAlgebra")
-    end
-    # register a hook to disable BLAS threading
-    Base.at_disable_library_threading(() -> BLAS.set_num_threads(1))
-
-    # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
-    if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS")
-        @static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64"
-            BLAS.set_num_threads(max(1, Sys.CPU_THREADS))
-        else
-            BLAS.set_num_threads(max(1, Sys.CPU_THREADS ÷ 2))
-        end
-    end
-end
-
-end # module LinearAlgebra
diff --git a/stdlib/LinearAlgebra/src/abstractq.jl b/stdlib/LinearAlgebra/src/abstractq.jl
deleted file mode 100644
index 93358d052d50b..0000000000000
--- a/stdlib/LinearAlgebra/src/abstractq.jl
+++ /dev/null
@@ -1,575 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-abstract type AbstractQ{T} end
-
-struct AdjointQ{T,S<:AbstractQ{T}} <: AbstractQ{T}
-    Q::S
-end
-
-parent(adjQ::AdjointQ) = adjQ.Q
-eltype(::Type{<:AbstractQ{T}}) where {T} = T
-ndims(::AbstractQ) = 2
-
-# inversion/adjoint/transpose
-inv(Q::AbstractQ) = Q'
-adjoint(Q::AbstractQ) = AdjointQ(Q)
-transpose(Q::AbstractQ{<:Real}) = AdjointQ(Q)
-transpose(Q::AbstractQ) = error("transpose not implemented for $(typeof(Q)). Consider using adjoint instead of transpose.")
-adjoint(adjQ::AdjointQ) = adjQ.Q
-
-# promotion with AbstractMatrix, at least for equal eltypes
-promote_rule(::Type{<:AbstractMatrix{T}}, ::Type{<:AbstractQ{T}}) where {T} =
-    (@inline; Union{AbstractMatrix{T},AbstractQ{T}})
-
-# conversion
-# the following eltype promotion should be defined for each subtype `QType`
-# convert(::Type{AbstractQ{T}}, Q::QType) where {T} = QType{T}(Q)
-# and then care has to be taken that
-# QType{T}(Q::QType{T}) where T = ...
-# is implemented as a no-op
-
-# the following conversion method ensures functionality when the above method is not defined
-# (as for HessenbergQ), but no eltype conversion is required either (say, in multiplication)
-convert(::Type{AbstractQ{T}}, Q::AbstractQ{T}) where {T} = Q
-convert(::Type{AbstractQ{T}}, adjQ::AdjointQ{T}) where {T} = adjQ
-convert(::Type{AbstractQ{T}}, adjQ::AdjointQ) where {T} = convert(AbstractQ{T}, adjQ.Q)'
-
-# ... to matrix
-collect(Q::AbstractQ) = copyto!(Matrix{eltype(Q)}(undef, size(Q)), Q)
-Matrix{T}(Q::AbstractQ) where {T} = convert(Matrix{T}, Q*I) # generic fallback, yields square matrix
-Matrix{T}(adjQ::AdjointQ{S}) where {T,S} = convert(Matrix{T}, lmul!(adjQ, Matrix{S}(I, size(adjQ))))
-Matrix(Q::AbstractQ{T}) where {T} = Matrix{T}(Q)
-Array{T}(Q::AbstractQ) where {T} = Matrix{T}(Q)
-Array(Q::AbstractQ) = Matrix(Q)
-convert(::Type{T}, Q::AbstractQ) where {T<:AbstractArray} = T(Q)
-# legacy
-@deprecate(convert(::Type{AbstractMatrix{T}}, Q::AbstractQ) where {T},
-    convert(LinearAlgebra.AbstractQ{T}, Q))
-
-function size(Q::AbstractQ, dim::Integer)
-    if dim < 1
-        throw(BoundsError())
-    elseif dim <= 2 # && 1 <= dim
-        return size(Q)[dim]
-    else # 2 < dim
-        return 1
-    end
-end
-size(adjQ::AdjointQ) = reverse(size(adjQ.Q))
-
-# comparison
-(==)(Q::AbstractQ, A::AbstractMatrix) = lmul!(Q, Matrix{eltype(Q)}(I, size(A))) == A
-(==)(A::AbstractMatrix, Q::AbstractQ) = Q == A
-(==)(Q::AbstractQ, P::AbstractQ) = Matrix(Q) == Matrix(P)
-isapprox(Q::AbstractQ, A::AbstractMatrix; kwargs...) =
-    isapprox(lmul!(Q, Matrix{eltype(Q)}(I, size(A))), A, kwargs...)
-isapprox(A::AbstractMatrix, Q::AbstractQ; kwargs...) = isapprox(Q, A, kwargs...)
-isapprox(Q::AbstractQ, P::AbstractQ; kwargs...) = isapprox(Matrix(Q), Matrix(P), kwargs...)
-
-# pseudo-array behaviour, required for indexing with `begin` or `end`
-axes(Q::AbstractQ) = map(Base.oneto, size(Q))
-axes(Q::AbstractQ, d::Integer) = d in (1, 2) ? axes(Q)[d] : Base.OneTo(1)
-
-copymutable(Q::AbstractQ{T}) where {T} = lmul!(Q, Matrix{T}(I, size(Q)))
-copy(Q::AbstractQ) = copymutable(Q)
-
-# getindex
-@inline function getindex(Q::AbstractQ, inds...)
-    @boundscheck Base.checkbounds_indices(Bool, axes(Q), inds) || Base.throw_boundserror(Q, inds)
-    return _getindex(Q, inds...)
-end
-@inline getindex(Q::AbstractQ, ::Colon) = copymutable(Q)[:]
-@inline getindex(Q::AbstractQ, ::Colon, ::Colon) = copy(Q)
-
-@inline _getindex(Q::AbstractQ, inds...) = @inbounds copymutable(Q)[inds...]
-@inline function _getindex(Q::AbstractQ, ::Colon, J::AbstractVector{<:Integer})
-    Y = zeros(eltype(Q), size(Q, 2), length(J))
-    @inbounds for (i,j) in enumerate(J)
-        Y[j,i] = oneunit(eltype(Q))
-    end
-    lmul!(Q, Y)
-end
-@inline _getindex(Q::AbstractQ, I::AbstractVector{Int}, J::AbstractVector{Int}) = @inbounds Q[:,J][I,:]
-@inline function _getindex(Q::AbstractQ, ::Colon, j::Int)
-    y = zeros(eltype(Q), size(Q, 2))
-    y[j] = oneunit(eltype(Q))
-    lmul!(Q, y)
-end
-@inline _getindex(Q::AbstractQ, i::Int, j::Int) = @inbounds Q[:,j][i]
-
-# needed because AbstractQ does not subtype AbstractMatrix
-qr(Q::AbstractQ{T}, arg...; kwargs...) where {T} = qr!(Matrix{_qreltype(T)}(Q), arg...; kwargs...)
-lq(Q::AbstractQ{T}, arg...; kwargs...) where {T} = lq!(Matrix{lq_eltype(T)}(Q), arg...; kwargs...)
-hessenberg(Q::AbstractQ{T}) where {T} = hessenberg!(Matrix{eigtype(T)}(Q))
-
-# needed when used interchangeably with AbstractMatrix (analogous to views of ranges)
-view(A::AbstractQ, I...) = getindex(A, I...)
-
-# specialization avoiding the fallback using slow `getindex`
-function copyto!(dest::AbstractMatrix, src::AbstractQ)
-    copyto!(dest, I)
-    lmul!(src, dest)
-end
-# needed to resolve method ambiguities
-function copyto!(dest::PermutedDimsArray{T,2,perm}, src::AbstractQ) where {T,perm}
-    if perm == (1, 2)
-        copyto!(parent(dest), src)
-    else
-        @assert perm == (2, 1) # there are no other permutations of two indices
-        if T <: Real
-            copyto!(parent(dest), I)
-            lmul!(src', parent(dest))
-        else
-            # LAPACK does not offer inplace lmul!(transpose(Q), B) for complex Q
-            tmp = similar(parent(dest))
-            copyto!(tmp, I)
-            rmul!(tmp, src)
-            permutedims!(parent(dest), tmp, (2, 1))
-        end
-    end
-    return dest
-end
-
-function show(io::IO, ::MIME{Symbol("text/plain")}, Q::AbstractQ)
-    print(io, Base.dims2string(size(Q)), ' ', summary(Q))
-end
-
-# multiplication
-# generically, treat AbstractQ like a matrix with its definite size
-qsize_check(Q::AbstractQ, B::AbstractVecOrMat) =
-    size(Q, 2) == size(B, 1) ||
-        throw(DimensionMismatch("second dimension of Q, $(size(Q,2)), must coincide with first dimension of B, $(size(B,1))"))
-qsize_check(A::AbstractVecOrMat, Q::AbstractQ) =
-    size(A, 2) == size(Q, 1) ||
-        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must coincide with first dimension of Q, $(size(Q,1))"))
-qsize_check(Q::AbstractQ, P::AbstractQ) =
-    size(Q, 2) == size(P, 1) ||
-        throw(DimensionMismatch("second dimension of A, $(size(Q,2)), must coincide with first dimension of B, $(size(P,1))"))
-
-(*)(Q::AbstractQ, J::UniformScaling) = Q*J.λ
-function (*)(Q::AbstractQ, b::Number)
-    T = promote_type(eltype(Q), typeof(b))
-    lmul!(convert(AbstractQ{T}, Q), Matrix{T}(b*I, size(Q)))
-end
-function (*)(Q::AbstractQ, B::AbstractVector)
-    T = promote_type(eltype(Q), eltype(B))
-    qsize_check(Q, B)
-    mul!(similar(B, T, size(Q, 1)), convert(AbstractQ{T}, Q), B)
-end
-function (*)(Q::AbstractQ, B::AbstractMatrix)
-    T = promote_type(eltype(Q), eltype(B))
-    qsize_check(Q, B)
-    mul!(similar(B, T, (size(Q, 1), size(B, 2))), convert(AbstractQ{T}, Q), B)
-end
-
-(*)(J::UniformScaling, Q::AbstractQ) = J.λ*Q
-function (*)(a::Number, Q::AbstractQ)
-    T = promote_type(typeof(a), eltype(Q))
-    rmul!(Matrix{T}(a*I, size(Q)), convert(AbstractQ{T}, Q))
-end
-function (*)(A::AbstractVector, Q::AbstractQ)
-    T = promote_type(eltype(A), eltype(Q))
-    qsize_check(A, Q)
-    return mul!(similar(A, T, length(A)), A, convert(AbstractQ{T}, Q))
-end
-function (*)(A::AbstractMatrix, Q::AbstractQ)
-    T = promote_type(eltype(A), eltype(Q))
-    qsize_check(A, Q)
-    return mul!(similar(A, T, (size(A, 1), size(Q, 2))), A, convert(AbstractQ{T}, Q))
-end
-(*)(u::AdjointAbsVec, Q::AbstractQ) = (Q'u')'
-
-### Q*Q (including adjoints)
-(*)(Q::AbstractQ, P::AbstractQ) = Q * (P*I)
-
-### mul!
-function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat,AbstractQ}) where {T}
-    require_one_based_indexing(C, B)
-    mB, nB = size(B, 1), size(B, 2)
-    mC, nC = size(C, 1), size(C, 2)
-    qsize_check(Q, B)
-    nB != nC && throw(DimensionMismatch())
-    if mB < mC
-        inds = CartesianIndices(axes(B))
-        copyto!(view(C, inds), B)
-        C[CartesianIndices((mB+1:mC, axes(C, 2)))] .= zero(T)
-        return lmul!(Q, C)
-    else
-        return lmul!(Q, copyto!(C, B))
-    end
-end
-function mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat, Q::AbstractQ{T}) where {T}
-    require_one_based_indexing(C, A)
-    mA, nA = size(A, 1), size(A, 2)
-    mC, nC = size(C, 1), size(C, 2)
-    mA != mC && throw(DimensionMismatch())
-    qsize_check(A, Q)
-    if nA < nC
-        inds = CartesianIndices(axes(A))
-        copyto!(view(C, inds), A)
-        C[CartesianIndices((axes(C, 1), nA+1:nC))] .= zero(T)
-        return rmul!(C, Q)
-    else
-        return rmul!(copyto!(C, A), Q)
-    end
-end
-
-### division
-\(Q::AbstractQ, A::AbstractVecOrMat) = Q'*A
-/(A::AbstractVecOrMat, Q::AbstractQ) = A*Q'
-ldiv!(Q::AbstractQ, A::AbstractVecOrMat) = lmul!(Q', A)
-ldiv!(C::AbstractVecOrMat, Q::AbstractQ, A::AbstractVecOrMat) = mul!(C, Q', A)
-rdiv!(A::AbstractVecOrMat, Q::AbstractQ) = rmul!(A, Q')
-
-logabsdet(Q::AbstractQ) = (d = det(Q); return log(abs(d)), sign(d))
-function logdet(A::AbstractQ)
-    d, s = logabsdet(A)
-    return d + log(s)
-end
-
-###########################################################
-################ Q from QR decompositions #################
-###########################################################
-
-"""
-    QRPackedQ <: LinearAlgebra.AbstractQ
-
-The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QR`](@ref) or
-[`QRPivoted`](@ref) format.
-"""
-struct QRPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
-    factors::S
-    τ::C
-
-    function QRPackedQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
-        require_one_based_indexing(factors, τ)
-        new{T,S,C}(factors, τ)
-    end
-end
-QRPackedQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
-    QRPackedQ{T,typeof(factors),typeof(τ)}(factors, τ)
-QRPackedQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
-    QRPackedQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRPackedQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           QRPackedQ{T,S,typeof(τ)}(factors, τ), false)
-
-"""
-    QRCompactWYQ <: LinearAlgebra.AbstractQ
-
-The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QRCompactWY`](@ref)
-format.
-"""
-struct QRCompactWYQ{S, M<:AbstractMatrix{S}, C<:AbstractMatrix{S}} <: AbstractQ{S}
-    factors::M
-    T::C
-
-    function QRCompactWYQ{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
-        require_one_based_indexing(factors, T)
-        new{S,M,C}(factors, T)
-    end
-end
-QRCompactWYQ(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
-    QRCompactWYQ{S,typeof(factors),typeof(T)}(factors, T)
-QRCompactWYQ{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
-    QRCompactWYQ(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRCompactWYQ{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
-           QRCompactWYQ{S,M,typeof(T)}(factors, T), false)
-
-QRPackedQ{T}(Q::QRPackedQ) where {T} = QRPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(AbstractVector{T}, Q.τ))
-QRCompactWYQ{S}(Q::QRCompactWYQ) where {S} = QRCompactWYQ(convert(AbstractMatrix{S}, Q.factors), convert(AbstractMatrix{S}, Q.T))
-
-# override generic square fallback
-Matrix{T}(Q::Union{QRCompactWYQ{S},QRPackedQ{S}}) where {T,S} =
-    convert(Matrix{T}, lmul!(Q, Matrix{S}(I, size(Q, 1), min(size(Q.factors)...))))
-Matrix(Q::Union{QRCompactWYQ{S},QRPackedQ{S}}) where {S} = Matrix{S}(Q)
-
-convert(::Type{AbstractQ{T}}, Q::QRPackedQ) where {T} = QRPackedQ{T}(Q)
-convert(::Type{AbstractQ{T}}, Q::QRCompactWYQ) where {T} = QRCompactWYQ{T}(Q)
-
-size(Q::Union{QRCompactWYQ,QRPackedQ}, dim::Integer) =
-    size(Q.factors, dim == 2 ? 1 : dim)
-size(Q::Union{QRCompactWYQ,QRPackedQ}) = (n = size(Q.factors, 1); (n, n))
-
-## Multiplication
-### QB
-lmul!(A::QRCompactWYQ{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.gemqrt!('L', 'N', A.factors, A.T, B)
-lmul!(A::QRPackedQ{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormqr!('L', 'N', A.factors, A.τ, B)
-function lmul!(A::QRPackedQ, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    mA, nA = size(A.factors)
-    mB, nB = size(B,1), size(B,2)
-    if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
-    end
-    Afactors = A.factors
-    @inbounds begin
-        for k = min(mA,nA):-1:1
-            for j = 1:nB
-                vBj = B[k,j]
-                for i = k+1:mB
-                    vBj += conj(Afactors[i,k])*B[i,j]
-                end
-                vBj = A.τ[k]*vBj
-                B[k,j] -= vBj
-                for i = k+1:mB
-                    B[i,j] -= Afactors[i,k]*vBj
-                end
-            end
-        end
-    end
-    B
-end
-
-### QcB
-lmul!(adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (Q = adjQ.Q; LAPACK.gemqrt!('L', 'T', Q.factors, Q.T, B))
-lmul!(adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (Q = adjQ.Q; LAPACK.gemqrt!('L', 'C', Q.factors, Q.T, B))
-lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (Q = adjQ.Q; LAPACK.ormqr!('L', 'T', Q.factors, Q.τ, B))
-lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (Q = adjQ.Q; LAPACK.ormqr!('L', 'C', Q.factors, Q.τ, B))
-function lmul!(adjA::AdjointQ{<:Any,<:QRPackedQ}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    A = adjA.Q
-    mA, nA = size(A.factors)
-    mB, nB = size(B,1), size(B,2)
-    if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
-    end
-    Afactors = A.factors
-    @inbounds begin
-        for k = 1:min(mA,nA)
-            for j = 1:nB
-                vBj = B[k,j]
-                for i = k+1:mB
-                    vBj += conj(Afactors[i,k])*B[i,j]
-                end
-                vBj = conj(A.τ[k])*vBj
-                B[k,j] -= vBj
-                for i = k+1:mB
-                    B[i,j] -= Afactors[i,k]*vBj
-                end
-            end
-        end
-    end
-    B
-end
-
-### AQ
-rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,<:StridedMatrix}) where {T<:BlasFloat} =
-    LAPACK.gemqrt!('R', 'N', B.factors, B.T, A)
-rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,<:StridedMatrix}) where {T<:BlasFloat} =
-    LAPACK.ormqr!('R', 'N', B.factors, B.τ, A)
-function rmul!(A::AbstractVecOrMat, Q::QRPackedQ)
-    require_one_based_indexing(A)
-    mQ, nQ = size(Q.factors)
-    mA, nA = size(A,1), size(A,2)
-    if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
-    end
-    Qfactors = Q.factors
-    @inbounds begin
-        for k = 1:min(mQ,nQ)
-            for i = 1:mA
-                vAi = A[i,k]
-                for j = k+1:mQ
-                    vAi += A[i,j]*Qfactors[j,k]
-                end
-                vAi = vAi*Q.τ[k]
-                A[i,k] -= vAi
-                for j = k+1:nA
-                    A[i,j] -= vAi*conj(Qfactors[j,k])
-                end
-            end
-        end
-    end
-    A
-end
-
-### AQc
-rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasReal} =
-    (Q = adjQ.Q; LAPACK.gemqrt!('R', 'T', Q.factors, Q.T, A))
-rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasComplex} =
-    (Q = adjQ.Q; LAPACK.gemqrt!('R', 'C', Q.factors, Q.T, A))
-rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasReal} =
-    (Q = adjQ.Q; LAPACK.ormqr!('R', 'T', Q.factors, Q.τ, A))
-rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} =
-    (Q = adjQ.Q; LAPACK.ormqr!('R', 'C', Q.factors, Q.τ, A))
-function rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:QRPackedQ})
-    require_one_based_indexing(A)
-    Q = adjQ.Q
-    mQ, nQ = size(Q.factors)
-    mA, nA = size(A,1), size(A,2)
-    if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
-    end
-    Qfactors = Q.factors
-    @inbounds begin
-        for k = min(mQ,nQ):-1:1
-            for i = 1:mA
-                vAi = A[i,k]
-                for j = k+1:mQ
-                    vAi += A[i,j]*Qfactors[j,k]
-                end
-                vAi = vAi*conj(Q.τ[k])
-                A[i,k] -= vAi
-                for j = k+1:nA
-                    A[i,j] -= vAi*conj(Qfactors[j,k])
-                end
-            end
-        end
-    end
-    A
-end
-
-det(Q::QRPackedQ) = _det_tau(Q.τ)
-det(Q::QRCompactWYQ) =
-    prod(i -> _det_tau(_diagview(Q.T[:, i:min(i + size(Q.T, 1), size(Q.T, 2))])),
-         1:size(Q.T, 1):size(Q.T, 2))
-
-_diagview(A) = @view A[diagind(A)]
-
-# Compute `det` from the number of Householder reflections.  Handle
-# the case `Q.τ` contains zeros.
-_det_tau(τs::AbstractVector{<:Real}) =
-    isodd(count(!iszero, τs)) ? -one(eltype(τs)) : one(eltype(τs))
-
-# In complex case, we need to compute the non-unit eigenvalue `λ = 1 - c*τ`
-# (where `c = v'v`) of each Householder reflector.  As we know that the
-# reflector must have the determinant of 1, it must satisfy `abs2(λ) == 1`.
-# Combining this with the constraint `c > 0`, it turns out that the eigenvalue
-# (hence the determinant) can be computed as `λ = -sign(τ)^2`.
-# See: https://github.com/JuliaLang/julia/pull/32887#issuecomment-521935716
-_det_tau(τs) = prod(τ -> iszero(τ) ? one(τ) : -sign(τ)^2, τs)
-
-###########################################################
-######## Q from Hessenberg decomposition ##################
-###########################################################
-
-"""
-    HessenbergQ <: AbstractQ
-
-Given a [`Hessenberg`](@ref) factorization object `F`, `F.Q` returns
-a `HessenbergQ` object, which is an implicit representation of the unitary
-matrix `Q` in the Hessenberg factorization `QHQ'` represented by `F`.
-This `F.Q` object can be efficiently multiplied by matrices or vectors,
-and can be converted to an ordinary matrix type with `Matrix(F.Q)`.
-"""
-struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector,sym} <: AbstractQ{T}
-    uplo::Char
-    factors::S
-    τ::W
-    function HessenbergQ{T,S,W,sym}(uplo::AbstractChar, factors, τ) where {T,S<:AbstractMatrix,W<:AbstractVector,sym}
-        new(uplo, factors, τ)
-    end
-end
-HessenbergQ(F::Hessenberg{<:Any,<:UpperHessenberg,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,false}(F.uplo, F.factors, F.τ)
-HessenbergQ(F::Hessenberg{<:Any,<:SymTridiagonal,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,true}(F.uplo, F.factors, F.τ)
-
-size(Q::HessenbergQ, dim::Integer) = size(getfield(Q, :factors), dim == 2 ? 1 : dim)
-size(Q::HessenbergQ) = size(Q, 1), size(Q, 2)
-
-# HessenbergQ from LAPACK/BLAS (as opposed to Julia libraries like GenericLinearAlgebra)
-const BlasHessenbergQ{T,sym} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T},sym} where {T<:BlasFloat,sym}
-
-## reconstruct the original matrix
-Matrix{T}(Q::BlasHessenbergQ{<:Any,false}) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ))
-Matrix{T}(Q::BlasHessenbergQ{<:Any,true}) where {T} = convert(Matrix{T}, LAPACK.orgtr!(Q.uplo, copy(Q.factors), Q.τ))
-
-lmul!(Q::BlasHessenbergQ{T,false}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
-    LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-lmul!(adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,false}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (Q = adjQ.Q; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-rmul!(X::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
-    (Q = adjQ.Q; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-
-lmul!(Q::BlasHessenbergQ{T,true}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormtr!('L', Q.uplo, 'N', Q.factors, Q.τ, X)
-rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
-    LAPACK.ormtr!('R', Q.uplo, 'N', Q.factors, Q.τ, X)
-lmul!(adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,true}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (Q = adjQ.Q; LAPACK.ormtr!('L', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-rmul!(X::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
-    (Q = adjQ.Q; LAPACK.ormtr!('R', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-
-lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')'
-rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
-lmul!(adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T}  = rmul!(X', adjQ')'
-rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
-
-# flexible left-multiplication (and adjoint right-multiplication)
-qsize_check(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractVecOrMat) =
-    size(B, 1) in size(Q.factors) ||
-        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(Q.factors))"))
-qsize_check(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) =
-    (Q = adjQ.Q; size(A, 2) in size(Q.factors) ||
-        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))")))
-
-det(Q::HessenbergQ) = _det_tau(Q.τ)
-
-###########################################################
-################ Q from LQ decomposition ##################
-###########################################################
-
-struct LQPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
-    factors::S
-    τ::C
-end
-
-LQPackedQ{T}(Q::LQPackedQ) where {T} = LQPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(AbstractVector{T}, Q.τ))
-@deprecate(AbstractMatrix{T}(Q::LQPackedQ) where {T},
-    convert(AbstractQ{T}, Q),
-    false)
-Matrix{T}(A::LQPackedQ) where {T} = convert(Matrix{T}, LAPACK.orglq!(copy(A.factors), A.τ))
-convert(::Type{AbstractQ{T}}, Q::LQPackedQ) where {T} = LQPackedQ{T}(Q)
-
-# size(Q::LQPackedQ) yields the shape of Q's square form
-size(Q::LQPackedQ) = (n = size(Q.factors, 2); return n, n)
-
-## Multiplication
-# out-of-place right application of LQPackedQs
-#
-# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension
-# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q),
-# and if so effectively apply Q's square form to A without additional shenanigans; and
-# (2) if the preceding dimensions do not match, check whether the appropriate dimension of
-# A instead matches the number of rows of the matrix of which Q is a factor (i.e.
-# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending
-# A as necessary for check (1) to pass (if possible) and then applying Q's square form
-
-qsize_check(adjQ::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVecOrMat) =
-    size(B, 1) in size(adjQ.Q.factors) ||
-        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(adjQ.Q.factors))"))
-qsize_check(A::AbstractVecOrMat, Q::LQPackedQ) =
-    size(A, 2) in size(Q.factors) ||
-        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))"))
-
-# in-place right-application of LQPackedQs
-# these methods require that the applied-to matrix's (A's) number of columns
-# match the number of columns (nQ) of the LQPackedQ (Q) (necessary for in-place
-# operation, and the underlying LAPACK routine (ormlq) treats the implicit Q
-# as its (nQ-by-nQ) square form)
-rmul!(A::StridedVecOrMat{T}, B::LQPackedQ{T}) where {T<:BlasFloat} =
-    LAPACK.ormlq!('R', 'N', B.factors, B.τ, A)
-rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} =
-    (B = adjB.Q; LAPACK.ormlq!('R', 'T', B.factors, B.τ, A))
-rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.Q; LAPACK.ormlq!('R', 'C', B.factors, B.τ, A))
-
-### QB / QcB
-lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B)
-lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (A = adjA.Q; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B))
-lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (A = adjA.Q; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B))
-
-# In LQ factorization, `Q` is expressed as the product of the adjoint of the
-# reflectors.  Thus, `det` has to be conjugated.
-det(Q::LQPackedQ) = conj(_det_tau(Q.τ))
diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
deleted file mode 100644
index 875e8cefcb66e..0000000000000
--- a/stdlib/LinearAlgebra/src/adjtrans.jl
+++ /dev/null
@@ -1,512 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-### basic definitions (types, aliases, constructors, abstractarray interface, sundry similar)
-
-# note that Adjoint and Transpose must be able to wrap not only vectors and matrices
-# but also factorizations, rotations, and other linear algebra objects, including
-# user-defined such objects. so do not restrict the wrapped type.
-"""
-    Adjoint
-
-Lazy wrapper type for an adjoint view of the underlying linear algebra object,
-usually an `AbstractVector`/`AbstractMatrix`.
-Usually, the `Adjoint` constructor should not be called directly, use [`adjoint`](@ref)
-instead. To materialize the view use [`copy`](@ref).
-
-This type is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims).
-
-# Examples
-```jldoctest
-julia> A = [3+2im 9+2im; 0 0]
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 0+0im  0+0im
-
-julia> Adjoint(A)
-2×2 adjoint(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
- 3-2im  0+0im
- 9-2im  0+0im
-```
-"""
-struct Adjoint{T,S} <: AbstractMatrix{T}
-    parent::S
-end
-"""
-    Transpose
-
-Lazy wrapper type for a transpose view of the underlying linear algebra object,
-usually an `AbstractVector`/`AbstractMatrix`.
-Usually, the `Transpose` constructor should not be called directly, use [`transpose`](@ref)
-instead. To materialize the view use [`copy`](@ref).
-
-This type is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims).
-
-# Examples
-```jldoctest
-julia> A = [2 3; 0 0]
-2×2 Matrix{Int64}:
- 2  3
- 0  0
-
-julia> Transpose(A)
-2×2 transpose(::Matrix{Int64}) with eltype Int64:
- 2  0
- 3  0
-```
-"""
-struct Transpose{T,S} <: AbstractMatrix{T}
-    parent::S
-end
-
-# basic outer constructors
-Adjoint(A) = Adjoint{Base.promote_op(adjoint,eltype(A)),typeof(A)}(A)
-Transpose(A) = Transpose{Base.promote_op(transpose,eltype(A)),typeof(A)}(A)
-
-"""
-    adj_or_trans(::AbstractArray) -> adjoint|transpose|identity
-    adj_or_trans(::Type{<:AbstractArray}) -> adjoint|transpose|identity
-
-Return [`adjoint`](@ref) from an `Adjoint` type or object and
-[`transpose`](@ref) from a `Transpose` type or object. Otherwise,
-return [`identity`](@ref). Note that `Adjoint` and `Transpose` have
-to be the outer-most wrapper object for a non-`identity` function to be
-returned.
-"""
-adj_or_trans(::T) where {T<:AbstractArray} = adj_or_trans(T)
-adj_or_trans(::Type{<:AbstractArray}) = identity
-adj_or_trans(::Type{<:Adjoint}) = adjoint
-adj_or_trans(::Type{<:Transpose}) = transpose
-
-"""
-    inplace_adj_or_trans(::AbstractArray) -> adjoint!|transpose!|copyto!
-    inplace_adj_or_trans(::Type{<:AbstractArray}) -> adjoint!|transpose!|copyto!
-
-Return [`adjoint!`](@ref) from an `Adjoint` type or object and
-[`transpose!`](@ref) from a `Transpose` type or object. Otherwise,
-return [`copyto!`](@ref). Note that `Adjoint` and `Transpose` have
-to be the outer-most wrapper object for a non-`identity` function to be
-returned.
-"""
-inplace_adj_or_trans(::T) where {T <: AbstractArray} = inplace_adj_or_trans(T)
-inplace_adj_or_trans(::Type{<:AbstractArray}) = copyto!
-inplace_adj_or_trans(::Type{<:Adjoint}) = adjoint!
-inplace_adj_or_trans(::Type{<:Transpose}) = transpose!
-
-_unwrap(A::Adjoint)   = parent(A)
-_unwrap(A::Transpose) = parent(A)
-
-Base.dataids(A::Union{Adjoint, Transpose}) = Base.dataids(A.parent)
-Base.unaliascopy(A::Union{Adjoint,Transpose}) = typeof(A)(Base.unaliascopy(A.parent))
-
-# wrapping lowercase quasi-constructors
-"""
-    A'
-    adjoint(A)
-
-Lazy adjoint (conjugate transposition). Note that `adjoint` is applied recursively to
-elements.
-
-For number types, `adjoint` returns the complex conjugate, and therefore it is equivalent to
-the identity function for real numbers.
-
-This operation is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims).
-
-# Examples
-```jldoctest
-julia> A = [3+2im 9+2im; 0  0]
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 0+0im  0+0im
-
-julia> B = A' # equivalently adjoint(A)
-2×2 adjoint(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
- 3-2im  0+0im
- 9-2im  0+0im
-
-julia> B isa Adjoint
-true
-
-julia> adjoint(B) === A # the adjoint of an adjoint unwraps the parent
-true
-
-julia> Adjoint(B) # however, the constructor always wraps its argument
-2×2 adjoint(adjoint(::Matrix{Complex{Int64}})) with eltype Complex{Int64}:
- 3+2im  9+2im
- 0+0im  0+0im
-
-julia> B[1,2] = 4 + 5im; # modifying B will modify A automatically
-
-julia> A
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 4-5im  0+0im
-```
-
-For real matrices, the `adjoint` operation is equivalent to a `transpose`.
-
-```jldoctest
-julia> A = reshape([x for x in 1:4], 2, 2)
-2×2 Matrix{Int64}:
- 1  3
- 2  4
-
-julia> A'
-2×2 adjoint(::Matrix{Int64}) with eltype Int64:
- 1  2
- 3  4
-
-julia> adjoint(A) == transpose(A)
-true
-```
-
-The adjoint of an `AbstractVector` is a row-vector:
-```jldoctest
-julia> x = [3, 4im]
-2-element Vector{Complex{Int64}}:
- 3 + 0im
- 0 + 4im
-
-julia> x'
-1×2 adjoint(::Vector{Complex{Int64}}) with eltype Complex{Int64}:
- 3+0im  0-4im
-
-julia> x'x # compute the dot product, equivalently x' * x
-25 + 0im
-```
-
-For a matrix of matrices, the individual blocks are recursively operated on:
-```jldoctest
-julia> A = reshape([x + im*x for x in 1:4], 2, 2)
-2×2 Matrix{Complex{Int64}}:
- 1+1im  3+3im
- 2+2im  4+4im
-
-julia> C = reshape([A, 2A, 3A, 4A], 2, 2)
-2×2 Matrix{Matrix{Complex{Int64}}}:
- [1+1im 3+3im; 2+2im 4+4im]  [3+3im 9+9im; 6+6im 12+12im]
- [2+2im 6+6im; 4+4im 8+8im]  [4+4im 12+12im; 8+8im 16+16im]
-
-julia> C'
-2×2 adjoint(::Matrix{Matrix{Complex{Int64}}}) with eltype Adjoint{Complex{Int64}, Matrix{Complex{Int64}}}:
- [1-1im 2-2im; 3-3im 4-4im]    [2-2im 4-4im; 6-6im 8-8im]
- [3-3im 6-6im; 9-9im 12-12im]  [4-4im 8-8im; 12-12im 16-16im]
-```
-"""
-adjoint(A::AbstractVecOrMat) = Adjoint(A)
-
-"""
-    transpose(A)
-
-Lazy transpose. Mutating the returned object should appropriately mutate `A`. Often,
-but not always, yields `Transpose(A)`, where `Transpose` is a lazy transpose wrapper. Note
-that this operation is recursive.
-
-This operation is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims), which is non-recursive.
-
-# Examples
-```jldoctest
-julia> A = [3 2; 0 0]
-2×2 Matrix{Int64}:
- 3  2
- 0  0
-
-julia> B = transpose(A)
-2×2 transpose(::Matrix{Int64}) with eltype Int64:
- 3  0
- 2  0
-
-julia> B isa Transpose
-true
-
-julia> transpose(B) === A # the transpose of a transpose unwraps the parent
-true
-
-julia> Transpose(B) # however, the constructor always wraps its argument
-2×2 transpose(transpose(::Matrix{Int64})) with eltype Int64:
- 3  2
- 0  0
-
-julia> B[1,2] = 4; # modifying B will modify A automatically
-
-julia> A
-2×2 Matrix{Int64}:
- 3  2
- 4  0
-```
-
-For complex matrices, the `adjoint` operation is equivalent to a conjugate-transpose.
-```jldoctest
-julia> A = reshape([Complex(x, x) for x in 1:4], 2, 2)
-2×2 Matrix{Complex{Int64}}:
- 1+1im  3+3im
- 2+2im  4+4im
-
-julia> adjoint(A) == conj(transpose(A))
-true
-```
-
-The `transpose` of an `AbstractVector` is a row-vector:
-```jldoctest
-julia> v = [1,2,3]
-3-element Vector{Int64}:
- 1
- 2
- 3
-
-julia> transpose(v) # returns a row-vector
-1×3 transpose(::Vector{Int64}) with eltype Int64:
- 1  2  3
-
-julia> transpose(v) * v # compute the dot product
-14
-```
-
-For a matrix of matrices, the individual blocks are recursively operated on:
-```jldoctest
-julia> C = [1 3; 2 4]
-2×2 Matrix{Int64}:
- 1  3
- 2  4
-
-julia> D = reshape([C, 2C, 3C, 4C], 2, 2) # construct a block matrix
-2×2 Matrix{Matrix{Int64}}:
- [1 3; 2 4]  [3 9; 6 12]
- [2 6; 4 8]  [4 12; 8 16]
-
-julia> transpose(D) # blocks are recursively transposed
-2×2 transpose(::Matrix{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
- [1 2; 3 4]   [2 4; 6 8]
- [3 6; 9 12]  [4 8; 12 16]
-```
-"""
-transpose(A::AbstractVecOrMat) = Transpose(A)
-
-# unwrapping lowercase quasi-constructors
-adjoint(A::Adjoint) = A.parent
-transpose(A::Transpose) = A.parent
-adjoint(A::Transpose{<:Real}) = A.parent
-transpose(A::Adjoint{<:Real}) = A.parent
-
-# printing
-function Base.showarg(io::IO, v::Adjoint, toplevel)
-    print(io, "adjoint(")
-    Base.showarg(io, parent(v), false)
-    print(io, ')')
-    toplevel && print(io, " with eltype ", eltype(v))
-    return nothing
-end
-function Base.showarg(io::IO, v::Transpose, toplevel)
-    print(io, "transpose(")
-    Base.showarg(io, parent(v), false)
-    print(io, ')')
-    toplevel && print(io, " with eltype ", eltype(v))
-    return nothing
-end
-
-# some aliases for internal convenience use
-const AdjOrTrans{T,S} = Union{Adjoint{T,S},Transpose{T,S}} where {T,S}
-const AdjointAbsVec{T} = Adjoint{T,<:AbstractVector}
-const AdjointAbsMat{T} = Adjoint{T,<:AbstractMatrix}
-const TransposeAbsVec{T} = Transpose{T,<:AbstractVector}
-const TransposeAbsMat{T} = Transpose{T,<:AbstractMatrix}
-const AdjOrTransAbsVec{T} = AdjOrTrans{T,<:AbstractVector}
-const AdjOrTransAbsMat{T} = AdjOrTrans{T,<:AbstractMatrix}
-
-# for internal use below
-wrapperop(_) = identity
-wrapperop(::Adjoint) = adjoint
-wrapperop(::Transpose) = transpose
-
-# the following fallbacks can be removed if Adjoint/Transpose are restricted to AbstractVecOrMat
-size(A::AdjOrTrans) = reverse(size(A.parent))
-axes(A::AdjOrTrans) = reverse(axes(A.parent))
-# AbstractArray interface, basic definitions
-length(A::AdjOrTrans) = length(A.parent)
-size(v::AdjOrTransAbsVec) = (1, length(v.parent))
-size(A::AdjOrTransAbsMat) = reverse(size(A.parent))
-axes(v::AdjOrTransAbsVec) = (Base.OneTo(1), axes(v.parent)...)
-axes(A::AdjOrTransAbsMat) = reverse(axes(A.parent))
-IndexStyle(::Type{<:AdjOrTransAbsVec}) = IndexLinear()
-IndexStyle(::Type{<:AdjOrTransAbsMat}) = IndexCartesian()
-@propagate_inbounds Base.isassigned(v::AdjOrTransAbsVec, i::Int) = isassigned(v.parent, i-1+first(axes(v.parent)[1]))
-@propagate_inbounds Base.isassigned(v::AdjOrTransAbsMat, i::Int, j::Int) = isassigned(v.parent, j, i)
-@propagate_inbounds getindex(v::AdjOrTransAbsVec{T}, i::Int) where {T} = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])::T
-@propagate_inbounds getindex(A::AdjOrTransAbsMat{T}, i::Int, j::Int) where {T} = wrapperop(A)(A.parent[j, i])::T
-@propagate_inbounds setindex!(v::AdjOrTransAbsVec, x, i::Int) = (setindex!(v.parent, wrapperop(v)(x), i-1+first(axes(v.parent)[1])); v)
-@propagate_inbounds setindex!(A::AdjOrTransAbsMat, x, i::Int, j::Int) = (setindex!(A.parent, wrapperop(A)(x), j, i); A)
-# AbstractArray interface, additional definitions to retain wrapper over vectors where appropriate
-@propagate_inbounds getindex(v::AdjOrTransAbsVec, ::Colon, is::AbstractArray{Int}) = wrapperop(v)(v.parent[is])
-@propagate_inbounds getindex(v::AdjOrTransAbsVec, ::Colon, ::Colon) = wrapperop(v)(v.parent[:])
-
-# conversion of underlying storage
-convert(::Type{Adjoint{T,S}}, A::Adjoint) where {T,S} = Adjoint{T,S}(convert(S, A.parent))::Adjoint{T,S}
-convert(::Type{Transpose{T,S}}, A::Transpose) where {T,S} = Transpose{T,S}(convert(S, A.parent))::Transpose{T,S}
-
-# Strides and pointer for transposed strided arrays — but only if the elements are actually stored in memory
-Base.strides(A::Adjoint{<:Real, <:AbstractVector}) = (stride(A.parent, 2), stride(A.parent, 1))
-Base.strides(A::Transpose{<:Any, <:AbstractVector}) = (stride(A.parent, 2), stride(A.parent, 1))
-# For matrices it's slightly faster to use reverse and avoid calling stride twice
-Base.strides(A::Adjoint{<:Real, <:AbstractMatrix}) = reverse(strides(A.parent))
-Base.strides(A::Transpose{<:Any, <:AbstractMatrix}) = reverse(strides(A.parent))
-
-Base.unsafe_convert(::Type{Ptr{T}}, A::Adjoint{<:Real, <:AbstractVecOrMat}) where {T} = Base.unsafe_convert(Ptr{T}, A.parent)
-Base.unsafe_convert(::Type{Ptr{T}}, A::Transpose{<:Any, <:AbstractVecOrMat}) where {T} = Base.unsafe_convert(Ptr{T}, A.parent)
-
-Base.elsize(::Type{<:Adjoint{<:Real, P}}) where {P<:AbstractVecOrMat} = Base.elsize(P)
-Base.elsize(::Type{<:Transpose{<:Any, P}}) where {P<:AbstractVecOrMat} = Base.elsize(P)
-
-# for vectors, the semantics of the wrapped and unwrapped types differ
-# so attempt to maintain both the parent and wrapper type insofar as possible
-similar(A::AdjOrTransAbsVec) = wrapperop(A)(similar(A.parent))
-similar(A::AdjOrTransAbsVec, ::Type{T}) where {T} = wrapperop(A)(similar(A.parent, Base.promote_op(wrapperop(A), T)))
-# for matrices, the semantics of the wrapped and unwrapped types are generally the same
-# and as you are allocating with similar anyway, you might as well get something unwrapped
-similar(A::AdjOrTrans) = similar(A.parent, eltype(A), axes(A))
-similar(A::AdjOrTrans, ::Type{T}) where {T} = similar(A.parent, T, axes(A))
-similar(A::AdjOrTrans, ::Type{T}, dims::Dims{N}) where {T,N} = similar(A.parent, T, dims)
-
-# AbstractMatrix{T} constructor for adjtrans vector: preserve wrapped type
-AbstractMatrix{T}(A::AdjOrTransAbsVec) where {T} = wrapperop(A)(AbstractVector{T}(A.parent))
-
-# sundry basic definitions
-parent(A::AdjOrTrans) = A.parent
-vec(v::TransposeAbsVec{<:Number}) = parent(v)
-vec(v::AdjointAbsVec{<:Real}) = parent(v)
-
-### concatenation
-# preserve Adjoint/Transpose wrapper around vectors
-# to retain the associated semantics post-concatenation
-hcat(avs::Union{Number,AdjointAbsVec}...) = _adjoint_hcat(avs...)
-hcat(tvs::Union{Number,TransposeAbsVec}...) = _transpose_hcat(tvs...)
-_adjoint_hcat(avs::Union{Number,AdjointAbsVec}...) = adjoint(vcat(map(adjoint, avs)...))
-_transpose_hcat(tvs::Union{Number,TransposeAbsVec}...) = transpose(vcat(map(transpose, tvs)...))
-typed_hcat(::Type{T}, avs::Union{Number,AdjointAbsVec}...) where {T} = adjoint(typed_vcat(T, map(adjoint, avs)...))
-typed_hcat(::Type{T}, tvs::Union{Number,TransposeAbsVec}...) where {T} = transpose(typed_vcat(T, map(transpose, tvs)...))
-# otherwise-redundant definitions necessary to prevent hitting the concat methods in LinearAlgebra/special.jl
-hcat(avs::Adjoint{<:Any,<:Vector}...) = _adjoint_hcat(avs...)
-hcat(tvs::Transpose{<:Any,<:Vector}...) = _transpose_hcat(tvs...)
-hcat(avs::Adjoint{T,Vector{T}}...) where {T} = _adjoint_hcat(avs...)
-hcat(tvs::Transpose{T,Vector{T}}...) where {T} = _transpose_hcat(tvs...)
-# TODO unify and allow mixed combinations
-
-
-### higher order functions
-# preserve Adjoint/Transpose wrapper around vectors
-# to retain the associated semantics post-map/broadcast
-#
-# note that the caller's operation f operates in the domain of the wrapped vectors' entries.
-# hence the adjoint->f->adjoint shenanigans applied to the parent vectors' entries.
-map(f, avs::AdjointAbsVec...) = adjoint(map((xs...) -> adjoint(f(adjoint.(xs)...)), parent.(avs)...))
-map(f, tvs::TransposeAbsVec...) = transpose(map((xs...) -> transpose(f(transpose.(xs)...)), parent.(tvs)...))
-quasiparentt(x) = parent(x); quasiparentt(x::Number) = x # to handle numbers in the defs below
-quasiparenta(x) = parent(x); quasiparenta(x::Number) = conj(x) # to handle numbers in the defs below
-broadcast(f, avs::Union{Number,AdjointAbsVec}...) = adjoint(broadcast((xs...) -> adjoint(f(adjoint.(xs)...)), quasiparenta.(avs)...))
-broadcast(f, tvs::Union{Number,TransposeAbsVec}...) = transpose(broadcast((xs...) -> transpose(f(transpose.(xs)...)), quasiparentt.(tvs)...))
-# Hack to preserve behavior after #32122; this needs to be done with a broadcast style instead to support dotted fusion
-Broadcast.broadcast_preserving_zero_d(f, avs::Union{Number,AdjointAbsVec}...) = adjoint(broadcast((xs...) -> adjoint(f(adjoint.(xs)...)), quasiparenta.(avs)...))
-Broadcast.broadcast_preserving_zero_d(f, tvs::Union{Number,TransposeAbsVec}...) = transpose(broadcast((xs...) -> transpose(f(transpose.(xs)...)), quasiparentt.(tvs)...))
-# TODO unify and allow mixed combinations with a broadcast style
-
-
-### reductions
-# faster to sum the Array than to work through the wrapper (but only in commutative reduction ops as in Base/permuteddimsarray.jl)
-Base._mapreduce_dim(f, op::CommutativeOps, init::Base._InitialValue, A::Transpose, dims::Colon) =
-    Base._mapreduce_dim(f∘transpose, op, init, parent(A), dims)
-Base._mapreduce_dim(f, op::CommutativeOps, init::Base._InitialValue, A::Adjoint, dims::Colon) =
-    Base._mapreduce_dim(f∘adjoint, op, init, parent(A), dims)
-# in prod, use fast path only in the commutative case to avoid surprises
-Base._mapreduce_dim(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, init::Base._InitialValue, A::Transpose{<:Union{Real,Complex}}, dims::Colon) =
-    Base._mapreduce_dim(f∘transpose, op, init, parent(A), dims)
-Base._mapreduce_dim(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, init::Base._InitialValue, A::Adjoint{<:Union{Real,Complex}}, dims::Colon) =
-    Base._mapreduce_dim(f∘adjoint, op, init, parent(A), dims)
-# count allows for optimization only if the parent array has Bool eltype
-Base._count(::typeof(identity), A::Transpose{Bool}, ::Colon, init) = Base._count(identity, parent(A), :, init)
-Base._count(::typeof(identity), A::Adjoint{Bool}, ::Colon, init) = Base._count(identity, parent(A), :, init)
-Base._any(f, A::Transpose, ::Colon) = Base._any(f∘transpose, parent(A), :)
-Base._any(f, A::Adjoint, ::Colon) = Base._any(f∘adjoint, parent(A), :)
-Base._all(f, A::Transpose, ::Colon) = Base._all(f∘transpose, parent(A), :)
-Base._all(f, A::Adjoint, ::Colon) = Base._all(f∘adjoint, parent(A), :)
-# sum(A'; dims)
-Base.mapreducedim!(f, op::CommutativeOps, B::AbstractArray, A::TransposeAbsMat) =
-    (Base.mapreducedim!(f∘transpose, op, switch_dim12(B), parent(A)); B)
-Base.mapreducedim!(f, op::CommutativeOps, B::AbstractArray, A::AdjointAbsMat) =
-    (Base.mapreducedim!(f∘adjoint, op, switch_dim12(B), parent(A)); B)
-Base.mapreducedim!(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, B::AbstractArray, A::TransposeAbsMat{<:Union{Real,Complex}}) =
-    (Base.mapreducedim!(f∘transpose, op, switch_dim12(B), parent(A)); B)
-Base.mapreducedim!(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, B::AbstractArray, A::AdjointAbsMat{<:Union{Real,Complex}}) =
-    (Base.mapreducedim!(f∘adjoint, op, switch_dim12(B), parent(A)); B)
-
-switch_dim12(B::AbstractVector) = permutedims(B)
-switch_dim12(B::AbstractVector{<:Number}) = transpose(B) # avoid allocs due to permutedims
-switch_dim12(B::AbstractArray{<:Any,0}) = B
-switch_dim12(B::AbstractArray) = PermutedDimsArray(B, (2, 1, ntuple(Base.Fix1(+,2), ndims(B) - 2)...))
-
-### linear algebra
-
-(-)(A::Adjoint)   = Adjoint(  -A.parent)
-(-)(A::Transpose) = Transpose(-A.parent)
-
-tr(A::Adjoint) = adjoint(tr(parent(A)))
-tr(A::Transpose) = transpose(tr(parent(A)))
-
-## multiplication *
-
-function _dot_nonrecursive(u, v)
-    lu = length(u)
-    if lu != length(v)
-        throw(DimensionMismatch("first array has length $(lu) which does not match the length of the second, $(length(v))."))
-    end
-    if lu == 0
-        zero(eltype(u)) * zero(eltype(v))
-    else
-        sum(uu*vv for (uu, vv) in zip(u, v))
-    end
-end
-
-# Adjoint/Transpose-vector * vector
-*(u::AdjointAbsVec{<:Number}, v::AbstractVector{<:Number}) = dot(u.parent, v)
-*(u::TransposeAbsVec{T}, v::AbstractVector{T}) where {T<:Real} = dot(u.parent, v)
-*(u::AdjOrTransAbsVec, v::AbstractVector) = _dot_nonrecursive(u, v)
-
-
-# vector * Adjoint/Transpose-vector
-*(u::AbstractVector, v::AdjOrTransAbsVec) = broadcast(*, u, v)
-# Adjoint/Transpose-vector * Adjoint/Transpose-vector
-# (necessary for disambiguation with fallback methods in linalg/matmul)
-*(u::AdjointAbsVec, v::AdjointAbsVec) = throw(MethodError(*, (u, v)))
-*(u::TransposeAbsVec, v::TransposeAbsVec) = throw(MethodError(*, (u, v)))
-
-# AdjOrTransAbsVec{<:Any,<:AdjOrTransAbsVec} is a lazy conj vectors
-# We need to expand the combinations to avoid ambiguities
-(*)(u::TransposeAbsVec, v::AdjointAbsVec{<:Any,<:TransposeAbsVec}) = _dot_nonrecursive(u, v)
-(*)(u::AdjointAbsVec,   v::AdjointAbsVec{<:Any,<:TransposeAbsVec}) = _dot_nonrecursive(u, v)
-(*)(u::TransposeAbsVec, v::TransposeAbsVec{<:Any,<:AdjointAbsVec}) = _dot_nonrecursive(u, v)
-(*)(u::AdjointAbsVec,   v::TransposeAbsVec{<:Any,<:AdjointAbsVec}) = _dot_nonrecursive(u, v)
-
-## pseudoinversion
-pinv(v::AdjointAbsVec, tol::Real = 0) = pinv(v.parent, tol).parent
-pinv(v::TransposeAbsVec, tol::Real = 0) = pinv(conj(v.parent)).parent
-
-
-## left-division \
-\(u::AdjOrTransAbsVec, v::AdjOrTransAbsVec) = pinv(u) * v
-
-
-## right-division /
-/(u::AdjointAbsVec, A::AbstractMatrix) = adjoint(adjoint(A) \ u.parent)
-/(u::TransposeAbsVec, A::AbstractMatrix) = transpose(transpose(A) \ u.parent)
-/(u::AdjointAbsVec, A::TransposeAbsMat) = adjoint(conj(A.parent) \ u.parent) # technically should be adjoint(copy(adjoint(copy(A))) \ u.parent)
-/(u::TransposeAbsVec, A::AdjointAbsMat) = transpose(conj(A.parent) \ u.parent) # technically should be transpose(copy(transpose(copy(A))) \ u.parent)
-
-## complex conjugate
-conj(A::Transpose) = adjoint(A.parent)
-conj(A::Adjoint) = transpose(A.parent)
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::AdjOrTrans,i::Integer,j::Integer,s::AbstractString)
-    Base.replace_in_print_matrix(parent(A), j, i, s)
-end
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
deleted file mode 100644
index 0014ba1ec8ab0..0000000000000
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ /dev/null
@@ -1,974 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Bidiagonal matrices
-struct Bidiagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
-    dv::V      # diagonal
-    ev::V      # sub/super diagonal
-    uplo::Char # upper bidiagonal ('U') or lower ('L')
-    function Bidiagonal{T,V}(dv, ev, uplo::AbstractChar) where {T,V<:AbstractVector{T}}
-        require_one_based_indexing(dv, ev)
-        if length(ev) != max(length(dv)-1, 0)
-            throw(DimensionMismatch("length of diagonal vector is $(length(dv)), length of off-diagonal vector is $(length(ev))"))
-        end
-        (uplo != 'U' && uplo != 'L') && throw_uplo()
-        new{T,V}(dv, ev, uplo)
-    end
-end
-function Bidiagonal{T,V}(dv, ev, uplo::Symbol) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(dv, ev, char_uplo(uplo))
-end
-function Bidiagonal{T}(dv::AbstractVector, ev::AbstractVector, uplo::Union{Symbol,AbstractChar}) where {T}
-    Bidiagonal(convert(AbstractVector{T}, dv)::AbstractVector{T},
-               convert(AbstractVector{T}, ev)::AbstractVector{T},
-               uplo)
-end
-function Bidiagonal{T,V}(A::Bidiagonal) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(A.dv, A.ev, A.uplo)
-end
-
-"""
-    Bidiagonal(dv::V, ev::V, uplo::Symbol) where V <: AbstractVector
-
-Constructs an upper (`uplo=:U`) or lower (`uplo=:L`) bidiagonal matrix using the
-given diagonal (`dv`) and off-diagonal (`ev`) vectors. The result is of type `Bidiagonal`
-and provides efficient specialized linear solvers, but may be converted into a regular
-matrix with [`convert(Array, _)`](@ref) (or `Array(_)` for short). The length of `ev`
-must be one less than the length of `dv`.
-
-# Examples
-```jldoctest
-julia> dv = [1, 2, 3, 4]
-4-element Vector{Int64}:
- 1
- 2
- 3
- 4
-
-julia> ev = [7, 8, 9]
-3-element Vector{Int64}:
- 7
- 8
- 9
-
-julia> Bu = Bidiagonal(dv, ev, :U) # ev is on the first superdiagonal
-4×4 Bidiagonal{Int64, Vector{Int64}}:
- 1  7  ⋅  ⋅
- ⋅  2  8  ⋅
- ⋅  ⋅  3  9
- ⋅  ⋅  ⋅  4
-
-julia> Bl = Bidiagonal(dv, ev, :L) # ev is on the first subdiagonal
-4×4 Bidiagonal{Int64, Vector{Int64}}:
- 1  ⋅  ⋅  ⋅
- 7  2  ⋅  ⋅
- ⋅  8  3  ⋅
- ⋅  ⋅  9  4
-```
-"""
-function Bidiagonal(dv::V, ev::V, uplo::Symbol) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(dv, ev, uplo)
-end
-function Bidiagonal(dv::V, ev::V, uplo::AbstractChar) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(dv, ev, uplo)
-end
-
-#To allow Bidiagonal's where the "dv" is Vector{T} and "ev" Vector{S},
-#where T and S can be promoted
-function Bidiagonal(dv::Vector{T}, ev::Vector{S}, uplo::Symbol) where {T,S}
-    TS = promote_type(T,S)
-    return Bidiagonal{TS,Vector{TS}}(dv, ev, uplo)
-end
-
-"""
-    Bidiagonal(A, uplo::Symbol)
-
-Construct a `Bidiagonal` matrix from the main diagonal of `A` and
-its first super- (if `uplo=:U`) or sub-diagonal (if `uplo=:L`).
-
-# Examples
-```jldoctest
-julia> A = [1 1 1 1; 2 2 2 2; 3 3 3 3; 4 4 4 4]
-4×4 Matrix{Int64}:
- 1  1  1  1
- 2  2  2  2
- 3  3  3  3
- 4  4  4  4
-
-julia> Bidiagonal(A, :U) # contains the main diagonal and first superdiagonal of A
-4×4 Bidiagonal{Int64, Vector{Int64}}:
- 1  1  ⋅  ⋅
- ⋅  2  2  ⋅
- ⋅  ⋅  3  3
- ⋅  ⋅  ⋅  4
-
-julia> Bidiagonal(A, :L) # contains the main diagonal and first subdiagonal of A
-4×4 Bidiagonal{Int64, Vector{Int64}}:
- 1  ⋅  ⋅  ⋅
- 2  2  ⋅  ⋅
- ⋅  3  3  ⋅
- ⋅  ⋅  4  4
-```
-"""
-function Bidiagonal(A::AbstractMatrix, uplo::Symbol)
-    Bidiagonal(diag(A, 0), diag(A, uplo === :U ? 1 : -1), uplo)
-end
-
-
-Bidiagonal(A::Bidiagonal) = A
-Bidiagonal{T}(A::Bidiagonal{T}) where {T} = A
-Bidiagonal{T}(A::Bidiagonal) where {T} = Bidiagonal{T}(A.dv, A.ev, A.uplo)
-
-bidiagzero(::Bidiagonal{T}, i, j) where {T} = zero(T)
-function bidiagzero(A::Bidiagonal{<:AbstractMatrix}, i, j)
-    Tel = eltype(eltype(A.dv))
-    if i < j && A.uplo == 'U' #= top right zeros =#
-        return zeros(Tel, size(A.ev[i], 1), size(A.ev[j-1], 2))
-    elseif j < i && A.uplo == 'L' #= bottom left zeros =#
-        return zeros(Tel, size(A.ev[i-1], 1), size(A.ev[j], 2))
-    else
-        return zeros(Tel, size(A.dv[i], 1), size(A.dv[j], 2))
-    end
-end
-
-@inline function Base.isassigned(A::Bidiagonal, i::Int, j::Int)
-    @boundscheck checkbounds(Bool, A, i, j) || return false
-    if i == j
-        return @inbounds isassigned(A.dv, i)
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds isassigned(A.ev, i)
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds isassigned(A.ev, j)
-    else
-        return true
-    end
-end
-
-@inline function Base.isstored(A::Bidiagonal, i::Int, j::Int)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return @inbounds Base.isstored(A.dv, i)
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds Base.isstored(A.ev, i)
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds Base.isstored(A.ev, j)
-    else
-        return false
-    end
-end
-
-@inline function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return @inbounds A.dv[i]
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds A.ev[i]
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds A.ev[j]
-    else
-        return bidiagzero(A, i, j)
-    end
-end
-
-@inline function setindex!(A::Bidiagonal, x, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        @inbounds A.dv[i] = x
-    elseif A.uplo == 'U' && (i == j - 1)
-        @inbounds A.ev[i] = x
-    elseif A.uplo == 'L' && (i == j + 1)
-        @inbounds A.ev[j] = x
-    elseif !iszero(x)
-        throw(ArgumentError(string("cannot set entry ($i, $j) off the ",
-            "$(istriu(A) ? "upper" : "lower") bidiagonal band to a nonzero value ($x)")))
-    end
-    return x
-end
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::Bidiagonal,i::Integer,j::Integer,s::AbstractString)
-    if A.uplo == 'U'
-        i==j || i==j-1 ? s : Base.replace_with_centered_mark(s)
-    else
-        i==j || i==j+1 ? s : Base.replace_with_centered_mark(s)
-    end
-end
-
-#Converting from Bidiagonal to dense Matrix
-function Matrix{T}(A::Bidiagonal) where T
-    n = size(A, 1)
-    B = Matrix{T}(undef, n, n)
-    n == 0 && return B
-    n > 1 && fill!(B, zero(T))
-    @inbounds for i = 1:n - 1
-        B[i,i] = A.dv[i]
-        if A.uplo == 'U'
-            B[i,i+1] = A.ev[i]
-        else
-            B[i+1,i] = A.ev[i]
-        end
-    end
-    B[n,n] = A.dv[n]
-    return B
-end
-Matrix(A::Bidiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(A)
-Array(A::Bidiagonal) = Matrix(A)
-promote_rule(::Type{Matrix{T}}, ::Type{<:Bidiagonal{S}}) where {T,S} =
-    @isdefined(T) && @isdefined(S) ? Matrix{promote_type(T,S)} : Matrix
-promote_rule(::Type{Matrix}, ::Type{<:Bidiagonal}) = Matrix
-
-#Converting from Bidiagonal to Tridiagonal
-function Tridiagonal{T}(A::Bidiagonal) where T
-    dv = convert(AbstractVector{T}, A.dv)
-    ev = convert(AbstractVector{T}, A.ev)
-    z = fill!(similar(ev), zero(T))
-    A.uplo == 'U' ? Tridiagonal(z, dv, ev) : Tridiagonal(ev, dv, z)
-end
-promote_rule(::Type{<:Tridiagonal{T}}, ::Type{<:Bidiagonal{S}}) where {T,S} =
-    @isdefined(T) && @isdefined(S) ? Tridiagonal{promote_type(T,S)} : Tridiagonal
-promote_rule(::Type{<:Tridiagonal}, ::Type{<:Bidiagonal}) = Tridiagonal
-
-# When asked to convert Bidiagonal to AbstractMatrix{T}, preserve structure by converting to Bidiagonal{T} <: AbstractMatrix{T}
-AbstractMatrix{T}(A::Bidiagonal) where {T} = convert(Bidiagonal{T}, A)
-
-convert(::Type{T}, m::AbstractMatrix) where {T<:Bidiagonal} = m isa T ? m : T(m)::T
-
-similar(B::Bidiagonal, ::Type{T}) where {T} = Bidiagonal(similar(B.dv, T), similar(B.ev, T), B.uplo)
-similar(B::Bidiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(B.dv, T, dims)
-
-tr(B::Bidiagonal) = sum(B.dv)
-
-function kron(A::Diagonal, B::Bidiagonal)
-    # `_droplast!` is only guaranteed to work with `Vector`
-    kdv = _makevector(kron(diag(A), B.dv))
-    kev = _droplast!(_makevector(kron(diag(A), _pushzero(B.ev))))
-    Bidiagonal(kdv, kev, B.uplo)
-end
-
-###################
-# LAPACK routines #
-###################
-
-#Singular values
-svdvals!(M::Bidiagonal{<:BlasReal}) = LAPACK.bdsdc!(M.uplo, 'N', M.dv, M.ev)[1]
-function svd!(M::Bidiagonal{<:BlasReal}; full::Bool = false)
-    d, e, U, Vt, Q, iQ = LAPACK.bdsdc!(M.uplo, 'I', M.dv, M.ev)
-    SVD(U, d, Vt)
-end
-function svd(M::Bidiagonal; kw...)
-    svd!(copy(M), kw...)
-end
-
-####################
-# Generic routines #
-####################
-
-function show(io::IO, M::Bidiagonal)
-    # TODO: make this readable and one-line
-    summary(io, M)
-    print(io, ":\n diag:")
-    print_matrix(io, (M.dv)')
-    print(io, M.uplo == 'U' ? "\n super:" : "\n sub:")
-    print_matrix(io, (M.ev)')
-end
-
-size(M::Bidiagonal) = (length(M.dv), length(M.dv))
-function size(M::Bidiagonal, d::Integer)
-    if d < 1
-        throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    elseif d <= 2
-        return length(M.dv)
-    else
-        return 1
-    end
-end
-
-#Elementary operations
-for func in (:conj, :copy, :real, :imag)
-    @eval ($func)(M::Bidiagonal) = Bidiagonal(($func)(M.dv), ($func)(M.ev), M.uplo)
-end
-
-adjoint(B::Bidiagonal) = Adjoint(B)
-transpose(B::Bidiagonal) = Transpose(B)
-adjoint(B::Bidiagonal{<:Number}) = Bidiagonal(conj(B.dv), conj(B.ev), B.uplo == 'U' ? :L : :U)
-transpose(B::Bidiagonal{<:Number}) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? :L : :U)
-permutedims(B::Bidiagonal) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? 'L' : 'U')
-function permutedims(B::Bidiagonal, perm)
-    Base.checkdims_perm(B, B, perm)
-    NTuple{2}(perm) == (2, 1) ? permutedims(B) : B
-end
-function Base.copy(aB::Adjoint{<:Any,<:Bidiagonal})
-    B = aB.parent
-    return Bidiagonal(map(x -> copy.(adjoint.(x)), (B.dv, B.ev))..., B.uplo == 'U' ? :L : :U)
-end
-function Base.copy(tB::Transpose{<:Any,<:Bidiagonal})
-    B = tB.parent
-    return Bidiagonal(map(x -> copy.(transpose.(x)), (B.dv, B.ev))..., B.uplo == 'U' ? :L : :U)
-end
-
-iszero(M::Bidiagonal) = iszero(M.dv) && iszero(M.ev)
-isone(M::Bidiagonal) = all(isone, M.dv) && iszero(M.ev)
-function istriu(M::Bidiagonal, k::Integer=0)
-    if M.uplo == 'U'
-        if k <= 0
-            return true
-        elseif k == 1
-            return iszero(M.dv)
-        else # k >= 2
-            return iszero(M.dv) && iszero(M.ev)
-        end
-    else # M.uplo == 'L'
-        if k <= -1
-            return true
-        elseif k == 0
-            return iszero(M.ev)
-        else # k >= 1
-            return iszero(M.ev) && iszero(M.dv)
-        end
-    end
-end
-function istril(M::Bidiagonal, k::Integer=0)
-    if M.uplo == 'U'
-        if k >= 1
-            return true
-        elseif k == 0
-            return iszero(M.ev)
-        else # k <= -1
-            return iszero(M.ev) && iszero(M.dv)
-        end
-    else # M.uplo == 'L'
-        if k >= 0
-            return true
-        elseif k == -1
-            return iszero(M.dv)
-        else # k <= -2
-            return iszero(M.dv) && iszero(M.ev)
-        end
-    end
-end
-isdiag(M::Bidiagonal) = iszero(M.ev)
-
-function tril!(M::Bidiagonal{T}, k::Integer=0) where T
-    n = length(M.dv)
-    if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
-    elseif M.uplo == 'U' && k < 0
-        fill!(M.dv, zero(T))
-        fill!(M.ev, zero(T))
-    elseif k < -1
-        fill!(M.dv, zero(T))
-        fill!(M.ev, zero(T))
-    elseif M.uplo == 'U' && k == 0
-        fill!(M.ev, zero(T))
-    elseif M.uplo == 'L' && k == -1
-        fill!(M.dv, zero(T))
-    end
-    return M
-end
-
-function triu!(M::Bidiagonal{T}, k::Integer=0) where T
-    n = length(M.dv)
-    if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
-    elseif M.uplo == 'L' && k > 0
-        fill!(M.dv, zero(T))
-        fill!(M.ev, zero(T))
-    elseif k > 1
-        fill!(M.dv, zero(T))
-        fill!(M.ev, zero(T))
-    elseif M.uplo == 'L' && k == 0
-        fill!(M.ev, zero(T))
-    elseif M.uplo == 'U' && k == 1
-        fill!(M.dv, zero(T))
-    end
-    return M
-end
-
-function diag(M::Bidiagonal{T}, n::Integer=0) where T
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of n
-    if n == 0
-        return copyto!(similar(M.dv, length(M.dv)), M.dv)
-    elseif (n == 1 && M.uplo == 'U') ||  (n == -1 && M.uplo == 'L')
-        return copyto!(similar(M.ev, length(M.ev)), M.ev)
-    elseif -size(M,1) <= n <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-abs(n)), zero(T))
-    else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
-    end
-end
-
-function +(A::Bidiagonal, B::Bidiagonal)
-    if A.uplo == B.uplo || length(A.dv) == 0
-        Bidiagonal(A.dv+B.dv, A.ev+B.ev, A.uplo)
-    else
-        newdv = A.dv+B.dv
-        Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(B.ev), newdv, typeof(newdv)(A.ev)) : (typeof(newdv)(A.ev), newdv, typeof(newdv)(B.ev)))...)
-    end
-end
-
-function -(A::Bidiagonal, B::Bidiagonal)
-    if A.uplo == B.uplo || length(A.dv) == 0
-        Bidiagonal(A.dv-B.dv, A.ev-B.ev, A.uplo)
-    else
-        newdv = A.dv-B.dv
-        Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-B.ev), newdv, typeof(newdv)(A.ev)) : (typeof(newdv)(A.ev), newdv, typeof(newdv)(-B.ev)))...)
-    end
-end
-
--(A::Bidiagonal)=Bidiagonal(-A.dv,-A.ev,A.uplo)
-*(A::Bidiagonal, B::Number) = Bidiagonal(A.dv*B, A.ev*B, A.uplo)
-*(B::Number, A::Bidiagonal) = Bidiagonal(B*A.dv, B*A.ev, A.uplo)
-/(A::Bidiagonal, B::Number) = Bidiagonal(A.dv/B, A.ev/B, A.uplo)
-\(B::Number, A::Bidiagonal) = Bidiagonal(B\A.dv, B\A.ev, A.uplo)
-
-function ==(A::Bidiagonal, B::Bidiagonal)
-    if A.uplo == B.uplo
-        return A.dv == B.dv && A.ev == B.ev
-    else
-        return iszero(A.ev) && iszero(B.ev) && A.dv == B.dv
-    end
-end
-
-const BandedMatrix = Union{Bidiagonal,Diagonal,Tridiagonal,SymTridiagonal} # or BiDiTriSym
-const BiTriSym = Union{Bidiagonal,Tridiagonal,SymTridiagonal}
-const BiTri = Union{Bidiagonal,Tridiagonal}
-@inline mul!(C::AbstractVector, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::AbstractMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-
-function check_A_mul_B!_sizes(C, A, B)
-    mA, nA = size(A)
-    mB, nB = size(B)
-    mC, nC = size(C)
-    if mA != mC
-        throw(DimensionMismatch("first dimension of A, $mA, and first dimension of output C, $mC, must match"))
-    elseif nA != mB
-        throw(DimensionMismatch("second dimension of A, $nA, and first dimension of B, $mB, must match"))
-    elseif nB != nC
-        throw(DimensionMismatch("second dimension of output C, $nC, and second dimension of B, $nB, must match"))
-    end
-end
-
-# function to get the internally stored vectors for Bidiagonal and [Sym]Tridiagonal
-# to avoid allocations in _mul! below (#24324, #24578)
-_diag(A::Tridiagonal, k) = k == -1 ? A.dl : k == 0 ? A.d : A.du
-_diag(A::SymTridiagonal, k) = k == 0 ? A.dv : A.ev
-function _diag(A::Bidiagonal, k)
-    if k == 0
-        return A.dv
-    elseif (A.uplo == 'L' && k == -1) || (A.uplo == 'U' && k == 1)
-        return A.ev
-    else
-        return diag(A, k)
-    end
-end
-
-function _mul!(C::AbstractMatrix, A::BiTriSym, B::BiTriSym, _add::MulAddMul = MulAddMul())
-    check_A_mul_B!_sizes(C, A, B)
-    n = size(A,1)
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    # We use `_rmul_or_fill!` instead of `_modify!` here since using
-    # `_modify!` in the following loop will not update the
-    # off-diagonal elements for non-zero beta.
-    _rmul_or_fill!(C, _add.beta)
-    iszero(_add.alpha) && return C
-    Al = _diag(A, -1)
-    Ad = _diag(A, 0)
-    Au = _diag(A, 1)
-    Bl = _diag(B, -1)
-    Bd = _diag(B, 0)
-    Bu = _diag(B, 1)
-    @inbounds begin
-        # first row of C
-        C[1,1] += _add(A[1,1]*B[1,1] + A[1, 2]*B[2, 1])
-        C[1,2] += _add(A[1,1]*B[1,2] + A[1,2]*B[2,2])
-        C[1,3] += _add(A[1,2]*B[2,3])
-        # second row of C
-        C[2,1] += _add(A[2,1]*B[1,1] + A[2,2]*B[2,1])
-        C[2,2] += _add(A[2,1]*B[1,2] + A[2,2]*B[2,2] + A[2,3]*B[3,2])
-        C[2,3] += _add(A[2,2]*B[2,3] + A[2,3]*B[3,3])
-        C[2,4] += _add(A[2,3]*B[3,4])
-        for j in 3:n-2
-            Ajj₋1   = Al[j-1]
-            Ajj     = Ad[j]
-            Ajj₊1   = Au[j]
-            Bj₋1j₋2 = Bl[j-2]
-            Bj₋1j₋1 = Bd[j-1]
-            Bj₋1j   = Bu[j-1]
-            Bjj₋1   = Bl[j-1]
-            Bjj     = Bd[j]
-            Bjj₊1   = Bu[j]
-            Bj₊1j   = Bl[j]
-            Bj₊1j₊1 = Bd[j+1]
-            Bj₊1j₊2 = Bu[j+1]
-            C[j,j-2]  += _add( Ajj₋1*Bj₋1j₋2)
-            C[j, j-1] += _add(Ajj₋1*Bj₋1j₋1 + Ajj*Bjj₋1)
-            C[j, j  ] += _add(Ajj₋1*Bj₋1j   + Ajj*Bjj       + Ajj₊1*Bj₊1j)
-            C[j, j+1] += _add(Ajj  *Bjj₊1   + Ajj₊1*Bj₊1j₊1)
-            C[j, j+2] += _add(Ajj₊1*Bj₊1j₊2)
-        end
-        # row before last of C
-        C[n-1,n-3] += _add(A[n-1,n-2]*B[n-2,n-3])
-        C[n-1,n-2] += _add(A[n-1,n-1]*B[n-1,n-2] + A[n-1,n-2]*B[n-2,n-2])
-        C[n-1,n-1] += _add(A[n-1,n-2]*B[n-2,n-1] + A[n-1,n-1]*B[n-1,n-1] + A[n-1,n]*B[n,n-1])
-        C[n-1,n  ] += _add(A[n-1,n-1]*B[n-1,n  ] + A[n-1,  n]*B[n  ,n  ])
-        # last row of C
-        C[n,n-2] += _add(A[n,n-1]*B[n-1,n-2])
-        C[n,n-1] += _add(A[n,n-1]*B[n-1,n-1] + A[n,n]*B[n,n-1])
-        C[n,n  ] += _add(A[n,n-1]*B[n-1,n  ] + A[n,n]*B[n,n  ])
-    end # inbounds
-    C
-end
-
-function _mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C)
-    check_A_mul_B!_sizes(C, A, B)
-    n = size(A,1)
-    iszero(n) && return C
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    _rmul_or_fill!(C, _add.beta)  # see the same use above
-    iszero(_add.alpha) && return C
-    Al = _diag(A, -1)
-    Ad = _diag(A, 0)
-    Au = _diag(A, 1)
-    Bd = B.diag
-    @inbounds begin
-        # first row of C
-        C[1,1] += _add(A[1,1]*B[1,1])
-        C[1,2] += _add(A[1,2]*B[2,2])
-        # second row of C
-        C[2,1] += _add(A[2,1]*B[1,1])
-        C[2,2] += _add(A[2,2]*B[2,2])
-        C[2,3] += _add(A[2,3]*B[3,3])
-        for j in 3:n-2
-            C[j, j-1] += _add(Al[j-1]*Bd[j-1])
-            C[j, j  ] += _add(Ad[j  ]*Bd[j  ])
-            C[j, j+1] += _add(Au[j  ]*Bd[j+1])
-        end
-        # row before last of C
-        C[n-1,n-2] += _add(A[n-1,n-2]*B[n-2,n-2])
-        C[n-1,n-1] += _add(A[n-1,n-1]*B[n-1,n-1])
-        C[n-1,n  ] += _add(A[n-1,  n]*B[n  ,n  ])
-        # last row of C
-        C[n,n-1] += _add(A[n,n-1]*B[n-1,n-1])
-        C[n,n  ] += _add(A[n,n  ]*B[n,  n  ])
-    end # inbounds
-    C
-end
-
-function _mul!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat, _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, B)
-    nA = size(A,1)
-    nB = size(B,2)
-    if !(size(C,1) == size(B,1) == nA)
-        throw(DimensionMismatch("A has first dimension $nA, B has $(size(B,1)), C has $(size(C,1)) but all must match"))
-    end
-    if size(C,2) != nB
-        throw(DimensionMismatch("A has second dimension $nA, B has $(size(B,2)), C has $(size(C,2)) but all must match"))
-    end
-    iszero(nA) && return C
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    nA <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    l = _diag(A, -1)
-    d = _diag(A, 0)
-    u = _diag(A, 1)
-    @inbounds begin
-        for j = 1:nB
-            b₀, b₊ = B[1, j], B[2, j]
-            _modify!(_add, d[1]*b₀ + u[1]*b₊, C, (1, j))
-            for i = 2:nA - 1
-                b₋, b₀, b₊ = b₀, b₊, B[i + 1, j]
-                _modify!(_add, l[i - 1]*b₋ + d[i]*b₀ + u[i]*b₊, C, (i, j))
-            end
-            _modify!(_add, l[nA - 1]*b₀ + d[nA]*b₊, C, (nA, j))
-        end
-    end
-    C
-end
-
-function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::BiTriSym, _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, A)
-    check_A_mul_B!_sizes(C, A, B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    n = size(A,1)
-    m = size(B,2)
-    if n <= 3 || m <= 1
-        return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    end
-    Bl = _diag(B, -1)
-    Bd = _diag(B, 0)
-    Bu = _diag(B, 1)
-    @inbounds begin
-        # first and last column of C
-        B11 = Bd[1]
-        B21 = Bl[1]
-        Bmm = Bd[m]
-        Bm₋1m = Bu[m-1]
-        for i in 1:n
-            _modify!(_add, A[i,1] * B11 + A[i, 2] * B21, C, (i, 1))
-            _modify!(_add, A[i, m-1] * Bm₋1m + A[i, m] * Bmm, C, (i, m))
-        end
-        # middle columns of C
-        for j = 2:m-1
-            Bj₋1j = Bu[j-1]
-            Bjj = Bd[j]
-            Bj₊1j = Bl[j]
-            for i = 1:n
-                _modify!(_add, A[i, j-1] * Bj₋1j + A[i, j]*Bjj + A[i, j+1] * Bj₊1j, C, (i, j))
-            end
-        end
-    end # inbounds
-    C
-end
-
-function _mul!(C::AbstractMatrix, A::Diagonal, B::BiTriSym, _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C)
-    check_A_mul_B!_sizes(C, A, B)
-    n = size(A,1)
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    _rmul_or_fill!(C, _add.beta)  # see the same use above
-    iszero(_add.alpha) && return C
-    Ad = A.diag
-    Bl = _diag(B, -1)
-    Bd = _diag(B, 0)
-    Bu = _diag(B, 1)
-    @inbounds begin
-        # first row of C
-        C[1,1] += _add(A[1,1]*B[1,1])
-        C[1,2] += _add(A[1,1]*B[1,2])
-        # second row of C
-        C[2,1] += _add(A[2,2]*B[2,1])
-        C[2,2] += _add(A[2,2]*B[2,2])
-        C[2,3] += _add(A[2,2]*B[2,3])
-        for j in 3:n-2
-            Ajj       = Ad[j]
-            C[j, j-1] += _add(Ajj*Bl[j-1])
-            C[j, j  ] += _add(Ajj*Bd[j])
-            C[j, j+1] += _add(Ajj*Bu[j])
-        end
-        # row before last of C
-        C[n-1,n-2] += _add(A[n-1,n-1]*B[n-1,n-2])
-        C[n-1,n-1] += _add(A[n-1,n-1]*B[n-1,n-1])
-        C[n-1,n  ] += _add(A[n-1,n-1]*B[n-1,n  ])
-        # last row of C
-        C[n,n-1] += _add(A[n,n]*B[n,n-1])
-        C[n,n  ] += _add(A[n,n]*B[n,n  ])
-    end # inbounds
-    C
-end
-
-function *(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    C = mul!(similar(A, TS, size(A)), A, B)
-    return B.uplo == 'U' ? UpperTriangular(C) : C
-end
-
-function *(A::LowerOrUnitLowerTriangular, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    C = mul!(similar(A, TS, size(A)), A, B)
-    return B.uplo == 'L' ? LowerTriangular(C) : C
-end
-
-function *(A::Bidiagonal, B::UpperOrUnitUpperTriangular)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    C = mul!(similar(B, TS, size(B)), A, B)
-    return A.uplo == 'U' ? UpperTriangular(C) : C
-end
-
-function *(A::Bidiagonal, B::LowerOrUnitLowerTriangular)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    C = mul!(similar(B, TS, size(B)), A, B)
-    return A.uplo == 'L' ? LowerTriangular(C) : C
-end
-
-function *(A::Diagonal, B::SymTridiagonal)
-    TS = promote_op(*, eltype(A), eltype(B))
-    out = Tridiagonal(similar(A, TS, size(A, 1)-1), similar(A, TS, size(A, 1)), similar(A, TS, size(A, 1)-1))
-    mul!(out, A, B)
-end
-
-function *(A::SymTridiagonal, B::Diagonal)
-    TS = promote_op(*, eltype(A), eltype(B))
-    out = Tridiagonal(similar(A, TS, size(A, 1)-1), similar(A, TS, size(A, 1)), similar(A, TS, size(A, 1)-1))
-    mul!(out, A, B)
-end
-
-function dot(x::AbstractVector, B::Bidiagonal, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    nx, ny = length(x), length(y)
-    (nx == size(B, 1) == ny) || throw(DimensionMismatch())
-    if nx ≤ 1
-        nx == 0 && return dot(zero(eltype(x)), zero(eltype(B)), zero(eltype(y)))
-        return dot(x[1], B.dv[1], y[1])
-    end
-    ev, dv = B.ev, B.dv
-    @inbounds if B.uplo == 'U'
-        x₀ = x[1]
-        r = dot(x[1], dv[1], y[1])
-        for j in 2:nx-1
-            x₋, x₀ = x₀, x[j]
-            r += dot(adjoint(ev[j-1])*x₋ + adjoint(dv[j])*x₀, y[j])
-        end
-        r += dot(adjoint(ev[nx-1])*x₀ + adjoint(dv[nx])*x[nx], y[nx])
-        return r
-    else # B.uplo == 'L'
-        x₀ = x[1]
-        x₊ = x[2]
-        r = dot(adjoint(dv[1])*x₀ + adjoint(ev[1])*x₊, y[1])
-        for j in 2:nx-1
-            x₀, x₊ = x₊, x[j+1]
-            r += dot(adjoint(dv[j])*x₀ + adjoint(ev[j])*x₊, y[j])
-        end
-        r += dot(x₊, dv[nx], y[nx])
-        return r
-    end
-end
-
-#Linear solvers
-#Generic solver using naive substitution
-ldiv!(A::Bidiagonal, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-function ldiv!(c::AbstractVecOrMat, A::Bidiagonal, b::AbstractVecOrMat)
-    require_one_based_indexing(c, A, b)
-    N = size(A, 2)
-    mb, nb = size(b, 1), size(b, 2)
-    if N != mb
-        throw(DimensionMismatch("second dimension of A, $N, does not match first dimension of b, $mb"))
-    end
-    mc, nc = size(c, 1), size(c, 2)
-    if mc != mb || nc != nb
-        throw(DimensionMismatch("size of result, ($mc, $nc), does not match the size of b, ($mb, $nb)"))
-    end
-
-    if N == 0
-        return copyto!(c, b)
-    end
-
-    zi = findfirst(iszero, A.dv)
-    isnothing(zi) || throw(SingularException(zi))
-
-    @inbounds for j in 1:nb
-        if A.uplo == 'L' #do colwise forward substitution
-            c[1,j] = bi1 = A.dv[1] \ b[1,j]
-            for i in 2:N
-                c[i,j] = bi1 = A.dv[i] \ (b[i,j] - A.ev[i - 1] * bi1)
-            end
-        else #do colwise backward substitution
-            c[N,j] = bi1 = A.dv[N] \ b[N,j]
-            for i in (N - 1):-1:1
-                c[i,j] = bi1 = A.dv[i] \ (b[i,j] - A.ev[i] * bi1)
-            end
-        end
-    end
-    return c
-end
-ldiv!(A::AdjOrTrans{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-ldiv!(c::AbstractVecOrMat, A::AdjOrTrans{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
-    (t = adj_or_trans(A); _rdiv!(t(c), t(b), t(A)); return c)
-
-### Generic promotion methods and fallbacks
-\(A::Bidiagonal, B::AbstractVecOrMat) = ldiv!(_initarray(\, eltype(A), eltype(B), B), A, B)
-\(xA::AdjOrTrans{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(xA) \ B
-
-### Triangular specializations
-for tri in (:UpperTriangular, :UnitUpperTriangular)
-    @eval function \(B::Bidiagonal, U::$tri)
-        A = ldiv!(_initarray(\, eltype(B), eltype(U), U), B, U)
-        return B.uplo == 'U' ? UpperTriangular(A) : A
-    end
-    @eval function \(U::$tri, B::Bidiagonal)
-        A = ldiv!(_initarray(\, eltype(U), eltype(B), U), U, B)
-        return B.uplo == 'U' ? UpperTriangular(A) : A
-    end
-end
-for tri in (:LowerTriangular, :UnitLowerTriangular)
-    @eval function \(B::Bidiagonal, L::$tri)
-        A = ldiv!(_initarray(\, eltype(B), eltype(L), L), B, L)
-        return B.uplo == 'L' ? LowerTriangular(A) : A
-    end
-    @eval function \(L::$tri, B::Bidiagonal)
-        A = ldiv!(_initarray(\, eltype(L), eltype(B), L), L, B)
-        return B.uplo == 'L' ? LowerTriangular(A) : A
-    end
-end
-
-### Diagonal specialization
-function \(B::Bidiagonal, D::Diagonal)
-    A = ldiv!(_initarray(\, eltype(B), eltype(D), D), B, D)
-    return B.uplo == 'U' ? UpperTriangular(A) : LowerTriangular(A)
-end
-
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    mc, nc = size(C)
-    if mc != m || nc != n
-        throw(DimensionMismatch("expect output to have size ($m, $n), but got ($mc, $nc)"))
-    end
-
-    zi = findfirst(iszero, B.dv)
-    isnothing(zi) || throw(SingularException(zi))
-
-    if B.uplo == 'L'
-        diagB = B.dv[n]
-        for i in 1:m
-            C[i,n] = A[i,n] / diagB
-        end
-        for j in n-1:-1:1
-            diagB = B.dv[j]
-            offdiagB = B.ev[j]
-            for i in 1:m
-                C[i,j] = (A[i,j] - C[i,j+1]*offdiagB)/diagB
-            end
-        end
-    else
-        diagB = B.dv[1]
-        for i in 1:m
-            C[i,1] = A[i,1] / diagB
-        end
-        for j in 2:n
-            diagB = B.dv[j]
-            offdiagB = B.ev[j-1]
-            for i = 1:m
-                C[i,j] = (A[i,j] - C[i,j-1]*offdiagB)/diagB
-            end
-        end
-    end
-    C
-end
-rdiv!(A::AbstractMatrix, B::Bidiagonal) = @inline _rdiv!(A, A, B)
-rdiv!(A::AbstractMatrix, B::AdjOrTrans{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
-_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::AdjOrTrans{<:Any,<:Bidiagonal}) =
-    (t = adj_or_trans(B); ldiv!(t(C), t(B), t(A)); return C)
-
-/(A::AbstractMatrix, B::Bidiagonal) = _rdiv!(_initarray(/, eltype(A), eltype(B), A), A, B)
-
-### Triangular specializations
-for tri in (:UpperTriangular, :UnitUpperTriangular)
-    @eval function /(U::$tri, B::Bidiagonal)
-        A = _rdiv!(_initarray(/, eltype(U), eltype(B), U), U, B)
-        return B.uplo == 'U' ? UpperTriangular(A) : A
-    end
-    @eval function /(B::Bidiagonal, U::$tri)
-        A = _rdiv!(_initarray(/, eltype(B), eltype(U), U), B, U)
-        return B.uplo == 'U' ? UpperTriangular(A) : A
-    end
-end
-for tri in (:LowerTriangular, :UnitLowerTriangular)
-    @eval function /(L::$tri, B::Bidiagonal)
-        A = _rdiv!(_initarray(/, eltype(L), eltype(B), L), L, B)
-        return B.uplo == 'L' ? LowerTriangular(A) : A
-    end
-    @eval function /(B::Bidiagonal, L::$tri)
-        A = _rdiv!(_initarray(/, eltype(B), eltype(L), L), B, L)
-        return B.uplo == 'L' ? LowerTriangular(A) : A
-    end
-end
-
-### Diagonal specialization
-function /(D::Diagonal, B::Bidiagonal)
-    A = _rdiv!(_initarray(/, eltype(D), eltype(B), D), D, B)
-    return B.uplo == 'U' ? UpperTriangular(A) : LowerTriangular(A)
-end
-
-/(A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) = A / copy(B)
-/(A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) = A / copy(B)
-# disambiguation
-/(A::AdjointAbsVec, B::Bidiagonal) = adjoint(adjoint(B) \ parent(A))
-/(A::TransposeAbsVec, B::Bidiagonal) = transpose(transpose(B) \ parent(A))
-/(A::AdjointAbsVec, B::Transpose{<:Any,<:Bidiagonal}) = adjoint(adjoint(B) \ parent(A))
-/(A::TransposeAbsVec, B::Transpose{<:Any,<:Bidiagonal}) = transpose(transpose(B) \ parent(A))
-/(A::AdjointAbsVec, B::Adjoint{<:Any,<:Bidiagonal}) = adjoint(adjoint(B) \ parent(A))
-/(A::TransposeAbsVec, B::Adjoint{<:Any,<:Bidiagonal}) = transpose(transpose(B) \ parent(A))
-
-factorize(A::Bidiagonal) = A
-function inv(B::Bidiagonal{T}) where T
-    n = size(B, 1)
-    dest = zeros(typeof(inv(oneunit(T))), (n, n))
-    ldiv!(dest, B, Diagonal{typeof(one(T)/one(T))}(I, n))
-    return B.uplo == 'U' ? UpperTriangular(dest) : LowerTriangular(dest)
-end
-
-# Eigensystems
-eigvals(M::Bidiagonal) = copy(M.dv)
-function eigvecs(M::Bidiagonal{T}) where T
-    n = length(M.dv)
-    Q = Matrix{T}(undef, n,n)
-    blks = [0; findall(iszero, M.ev); n]
-    v = zeros(T, n)
-    if M.uplo == 'U'
-        for idx_block = 1:length(blks) - 1, i = blks[idx_block] + 1:blks[idx_block + 1] #index of eigenvector
-            fill!(v, zero(T))
-            v[blks[idx_block] + 1] = one(T)
-            for j = blks[idx_block] + 1:i - 1 #Starting from j=i, eigenvector elements will be 0
-                v[j+1] = (M.dv[i] - M.dv[j])/M.ev[j] * v[j]
-            end
-            c = norm(v)
-            for j = 1:n
-                Q[j, i] = v[j] / c
-            end
-        end
-    else
-        for idx_block = 1:length(blks) - 1, i = blks[idx_block + 1]:-1:blks[idx_block] + 1 #index of eigenvector
-            fill!(v, zero(T))
-            v[blks[idx_block+1]] = one(T)
-            for j = (blks[idx_block+1] - 1):-1:max(1, (i - 1)) #Starting from j=i, eigenvector elements will be 0
-                v[j] = (M.dv[i] - M.dv[j+1])/M.ev[j] * v[j+1]
-            end
-            c = norm(v)
-            for j = 1:n
-                Q[j, i] = v[j] / c
-            end
-        end
-    end
-    Q #Actually Triangular
-end
-eigen(M::Bidiagonal) = Eigen(eigvals(M), eigvecs(M))
-
-Base._sum(A::Bidiagonal, ::Colon) = sum(A.dv) + sum(A.ev)
-function Base._sum(A::Bidiagonal, dims::Integer)
-    res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
-    n = length(A.dv)
-    if n == 0
-        # Just to be sure. This shouldn't happen since there is a check whether
-        # length(A.dv) == length(A.ev) + 1 in the constructor.
-        return res
-    elseif n == 1
-        res[1] = A.dv[1]
-        return res
-    end
-    @inbounds begin
-        if (dims == 1 && A.uplo == 'U') || (dims == 2 && A.uplo == 'L')
-            res[1] = A.dv[1]
-            for i = 2:length(A.dv)
-                res[i] = A.ev[i-1] + A.dv[i]
-            end
-        elseif (dims == 1 && A.uplo == 'L') || (dims == 2 && A.uplo == 'U')
-            for i = 1:length(A.dv)-1
-                res[i] = A.ev[i] + A.dv[i]
-            end
-            res[end] = A.dv[end]
-        elseif dims >= 3
-            if A.uplo == 'U'
-                for i = 1:length(A.dv)-1
-                    res[i,i]   = A.dv[i]
-                    res[i,i+1] = A.ev[i]
-                end
-            else
-                for i = 1:length(A.dv)-1
-                    res[i,i]   = A.dv[i]
-                    res[i+1,i] = A.ev[i]
-                end
-            end
-            res[end,end] = A.dv[end]
-        end
-    end
-    res
-end
diff --git a/stdlib/LinearAlgebra/src/bitarray.jl b/stdlib/LinearAlgebra/src/bitarray.jl
deleted file mode 100644
index d1857c3c38659..0000000000000
--- a/stdlib/LinearAlgebra/src/bitarray.jl
+++ /dev/null
@@ -1,272 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-function dot(x::BitVector, y::BitVector)
-    # simplest way to mimic Array dot behavior
-    length(x) == length(y) || throw(DimensionMismatch())
-    s = 0
-    xc = x.chunks
-    yc = y.chunks
-    @inbounds for i = 1:length(xc)
-        s += count_ones(xc[i] & yc[i])
-    end
-    s
-end
-
-## slower than the unpacked version, which is MUCH slower
-#  than blas'd (this one saves storage though, keeping it commented
-#  just in case)
-#function aTb(A::BitMatrix, B::BitMatrix)
-    #(mA, nA) = size(A)
-    #(mB, nB) = size(B)
-    #C = falses(nA, nB)
-    #if mA != mB; throw(DimensionMismatch()) end
-    #if mA == 0; return C; end
-    #col_ch = num_bit_chunks(mA)
-    ## TODO: avoid using aux chunks and copy (?)
-    #aux_chunksA = zeros(UInt64, col_ch)
-    #aux_chunksB = [zeros(UInt64, col_ch) for j=1:nB]
-    #for j = 1:nB
-        #Base.copy_chunks!(aux_chunksB[j], 1, B.chunks, (j-1)*mA+1, mA)
-    #end
-    #for i = 1:nA
-        #Base.copy_chunks!(aux_chunksA, 1, A.chunks, (i-1)*mA+1, mA)
-        #for j = 1:nB
-            #for k = 1:col_ch
-                ## TODO: improve
-                #C[i, j] += count_ones(aux_chunksA[k] & aux_chunksB[j][k])
-            #end
-        #end
-    #end
-    #C
-#end
-
-#aCb(A::BitMatrix{T}, B::BitMatrix{S}) where {T,S} = aTb(A, B)
-
-function triu(B::BitMatrix, k::Integer=0)
-    m,n = size(B)
-    if !(-m + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least",
-            "$(-m + 1) and at most $(n + 1) in an $m-by-$n matrix")))
-    end
-    A = falses(m,n)
-    Ac = A.chunks
-    Bc = B.chunks
-    for i = max(k+1,1):n
-        j = clamp((i - 1) * m + 1, 1, i * m)
-        Base.copy_chunks!(Ac, j, Bc, j, min(i-k, m))
-    end
-    A
-end
-
-function tril(B::BitMatrix, k::Integer=0)
-    m,n = size(B)
-    if !(-m - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-m - 1) and at most $(n - 1) in an $m-by-$n matrix")))
-    end
-    A = falses(m, n)
-    Ac = A.chunks
-    Bc = B.chunks
-    for i = 1:min(n, m+k)
-        j = clamp((i - 1) * m + i - k, 1, i * m)
-        Base.copy_chunks!(Ac, j, Bc, j, max(m-i+k+1, 0))
-    end
-    A
-end
-
-## diag
-
-function diag(B::BitMatrix)
-    n = minimum(size(B))
-    v = similar(B, n)
-    for i = 1:n
-        v[i] = B[i,i]
-    end
-    v
-end
-
-## norm and rank
-
-svd(A::BitMatrix) = svd(float(A))
-qr(A::BitMatrix) = qr(float(A))
-
-## kron
-
-@inline function kron!(R::BitVector, a::BitVector, b::BitVector)
-    m = length(a)
-    n = length(b)
-    @boundscheck length(R) == n*m || throw(DimensionMismatch())
-    Rc = R.chunks
-    bc = b.chunks
-    for j = 1:m
-        a[j] && Base.copy_chunks!(Rc, (j-1)*n+1, bc, 1, n)
-    end
-    return R
-end
-
-function kron(a::BitVector, b::BitVector)
-    m = length(a)
-    n = length(b)
-    R = falses(n * m)
-    return @inbounds kron!(R, a, b)
-end
-
-function kron!(R::BitMatrix, a::BitMatrix, b::BitMatrix)
-    mA,nA = size(a)
-    mB,nB = size(b)
-    @boundscheck size(R) == (mA*mB, nA*nB) || throw(DimensionMismatch())
-
-    for i = 1:mA
-        ri = (1:mB) .+ ((i-1)*mB)
-        for j = 1:nA
-            if a[i,j]
-                rj = (1:nB) .+ ((j-1)*nB)
-                R[ri,rj] = b
-            end
-        end
-    end
-    return R
-end
-
-function kron(a::BitMatrix, b::BitMatrix)
-    mA,nA = size(a)
-    mB,nB = size(b)
-    R = falses(mA*mB, nA*nB)
-    return @inbounds kron!(R, a, b)
-end
-
-## Structure query functions
-
-issymmetric(A::BitMatrix) = size(A, 1)==size(A, 2) && count(!iszero, A - copy(A'))==0
-ishermitian(A::BitMatrix) = issymmetric(A)
-
-function nonzero_chunks(chunks::Vector{UInt64}, pos0::Int, pos1::Int)
-    k0, l0 = Base.get_chunks_id(pos0)
-    k1, l1 = Base.get_chunks_id(pos1)
-
-    delta_k = k1 - k0
-
-    z = UInt64(0)
-    u = ~z
-    if delta_k == 0
-        msk_0 = (u << l0) & ~(u << l1 << 1)
-    else
-        msk_0 = (u << l0)
-        msk_1 = ~(u << l1 << 1)
-    end
-
-    @inbounds begin
-        (chunks[k0] & msk_0) == z || return true
-        delta_k == 0 && return false
-        for i = k0 + 1 : k1 - 1
-            chunks[i] == z || return true
-        end
-        (chunks[k1] & msk_1)==z || return true
-    end
-    return false
-end
-
-function istriu(A::BitMatrix)
-    m, n = size(A)
-    for j = 1:min(n,m-1)
-        stride = (j-1) * m
-        nonzero_chunks(A.chunks, stride+j+1, stride+m) && return false
-    end
-    return true
-end
-
-function istril(A::BitMatrix)
-    m, n = size(A)
-    (m == 0 || n == 0) && return true
-    for j = 2:n
-        stride = (j-1) * m
-        nonzero_chunks(A.chunks, stride+1, stride+min(j-1,m)) && return false
-    end
-    return true
-end
-
-# fast 8x8 bit transpose from Henry S. Warrens's "Hacker's Delight"
-# http://www.hackersdelight.org/hdcodetxt/transpose8.c.txt
-function transpose8x8(x::UInt64)
-    y = x
-    t = xor(y, y >>> 7) & 0x00aa00aa00aa00aa
-    y = xor(y, t, t << 7)
-    t = xor(y, y >>> 14) & 0x0000cccc0000cccc
-    y = xor(y, t, t << 14)
-    t = xor(y, y >>> 28) & 0x00000000f0f0f0f0
-    return xor(y, t, t << 28)
-end
-
-function form_8x8_chunk(Bc::Vector{UInt64}, i1::Int, i2::Int, m::Int, cgap::Int, cinc::Int, nc::Int, msk8::UInt64)
-    x = UInt64(0)
-
-    k, l = Base.get_chunks_id(i1 + (i2 - 1) * m)
-    r = 0
-    for j = 1:8
-        k > nc && break
-        x |= ((Bc[k] >>> l) & msk8) << r
-        if l + 8 >= 64 && nc > k
-            r0 = 8 - Base._mod64(l + 8)
-            x |= (Bc[k + 1] & (msk8 >>> r0)) << (r + r0)
-        end
-        k += cgap + (l + cinc >= 64 ? 1 : 0)
-        l = Base._mod64(l + cinc)
-        r += 8
-    end
-    return x
-end
-
-# note: assumes B is filled with 0's
-function put_8x8_chunk(Bc::Vector{UInt64}, i1::Int, i2::Int, x::UInt64, m::Int, cgap::Int, cinc::Int, nc::Int, msk8::UInt64)
-    k, l = Base.get_chunks_id(i1 + (i2 - 1) * m)
-    r = 0
-    for j = 1:8
-        k > nc && break
-        Bc[k] |= ((x >>> r) & msk8) << l
-        if l + 8 >= 64 && nc > k
-            r0 = 8 - Base._mod64(l + 8)
-            Bc[k + 1] |= ((x >>> (r + r0)) & (msk8 >>> r0))
-        end
-        k += cgap + (l + cinc >= 64 ? 1 : 0)
-        l = Base._mod64(l + cinc)
-        r += 8
-    end
-    return
-end
-
-adjoint(B::Union{BitVector,BitMatrix}) = Adjoint(B)
-transpose(B::Union{BitVector,BitMatrix}) = Transpose(B)
-Base.copy(B::Adjoint{Bool,BitMatrix}) = transpose!(falses(size(B)), B.parent)
-Base.copy(B::Transpose{Bool,BitMatrix}) = transpose!(falses(size(B)), B.parent)
-function transpose!(C::BitMatrix, B::BitMatrix)
-    @boundscheck size(C) == reverse(size(B)) || throw(DimensionMismatch())
-    l1, l2 = size(B)
-
-    cgap1, cinc1 = Base._div64(l1), Base._mod64(l1)
-    cgap2, cinc2 = Base._div64(l2), Base._mod64(l2)
-
-    Bc = B.chunks
-    Cc = C.chunks
-
-    nc = length(Bc)
-
-    for i = 1:8:l1
-        msk8_1 = UInt64(0xff)
-        if (l1 < i + 7)
-            msk8_1 >>>= i + 7 - l1
-        end
-
-        for j = 1:8:l2
-            x = form_8x8_chunk(Bc, i, j, l1, cgap1, cinc1, nc, msk8_1)
-            x = transpose8x8(x)
-
-            msk8_2 = UInt64(0xff)
-            if (l2 < j + 7)
-                msk8_2 >>>= j + 7 - l2
-            end
-
-            put_8x8_chunk(Cc, j, i, x, l2, cgap2, cinc2, nc, msk8_2)
-        end
-    end
-    return C
-end
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
deleted file mode 100644
index 8da19baee5045..0000000000000
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ /dev/null
@@ -1,2140 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Interface to BLAS subroutines.
-"""
-module BLAS
-
-import Base: copyto!
-using Base: require_one_based_indexing, USE_BLAS64
-
-export
-# Note: `xFUNC_NAME` is a placeholder for not exported BLAS functions
-#   ref: http://www.netlib.org/blas/blasqr.pdf
-# Level 1
-    # xROTG
-    # xROTMG
-    rot!,
-    # xROTM
-    # xSWAP
-    scal!,
-    scal,
-    blascopy!,
-    # xAXPY!,
-    # xAXPBY!,
-    # xDOT
-    dotc,
-    dotu,
-    # xxDOT
-    nrm2,
-    asum,
-    iamax,
-# Level 2
-    gemv!,
-    gemv,
-    gbmv!,
-    gbmv,
-    hemv!,
-    hemv,
-    # xHBMV
-    hpmv!,
-    symv!,
-    symv,
-    sbmv!,
-    sbmv,
-    spmv!,
-    trmv!,
-    trmv,
-    # xTBMV
-    # xTPMV
-    trsv!,
-    trsv,
-    # xTBSV
-    # xTPSV
-    ger!,
-    # xGERU
-    # xGERC
-    her!,
-    # xHPR
-    # xHER2
-    # xHPR2
-    syr!,
-    spr!,
-    # xSYR2
-    # xSPR2
-# Level 3
-    gemm!,
-    gemm,
-    symm!,
-    symm,
-    hemm!,
-    hemm,
-    syrk!,
-    syrk,
-    herk!,
-    herk,
-    syr2k!,
-    syr2k,
-    her2k!,
-    her2k,
-    trmm!,
-    trmm,
-    trsm!,
-    trsm
-
-using ..LinearAlgebra: libblastrampoline, BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismatch, checksquare, stride1, chkstride1
-
-include("lbt.jl")
-
-# Legacy bindings that some packages (such as NNlib.jl) use.
-# We maintain these for backwards-compatibility but new packages
-# should not look at these, instead preferring to parse the output
-# of BLAS.get_config()
-const libblas = libblastrampoline
-const liblapack = libblastrampoline
-
-vendor() = :lbt
-
-"""
-    get_config()
-
-Return an object representing the current `libblastrampoline` configuration.
-
-!!! compat "Julia 1.7"
-    `get_config()` requires at least Julia 1.7.
-"""
-get_config() = lbt_get_config()
-
-if USE_BLAS64
-    macro blasfunc(x)
-        return Expr(:quote, Symbol(x, "64_"))
-    end
-else
-    macro blasfunc(x)
-        return Expr(:quote, x)
-    end
-end
-
-_tryparse_env_int(key) = tryparse(Int, get(ENV, key, ""))
-
-
-"""
-    set_num_threads(n::Integer)
-    set_num_threads(::Nothing)
-
-Set the number of threads the BLAS library should use equal to `n::Integer`.
-
-Also accepts `nothing`, in which case julia tries to guess the default number of threads.
-Passing `nothing` is discouraged and mainly exists for historical reasons.
-"""
-set_num_threads(nt::Integer)::Nothing = lbt_set_num_threads(Int32(nt))
-function set_num_threads(::Nothing)
-    nt = something(
-        _tryparse_env_int("OPENBLAS_NUM_THREADS"),
-        _tryparse_env_int("OMP_NUM_THREADS"),
-        _tryparse_env_int("VECLIB_MAXIMUM_THREADS"),
-        max(1, Sys.CPU_THREADS ÷ 2),
-    )
-    return set_num_threads(nt)
-end
-
-"""
-    get_num_threads()
-
-Get the number of threads the BLAS library is using.
-
-!!! compat "Julia 1.6"
-    `get_num_threads` requires at least Julia 1.6.
-"""
-get_num_threads()::Int = lbt_get_num_threads()
-
-function check()
-    # TODO: once we have bitfields of the BLAS functions that are actually forwarded,
-    # ensure that we have a complete set here (warning on an incomplete BLAS implementation)
-    config = get_config()
-
-    # Ensure that one of our loaded libraries satisfies our interface requirement
-    interface = USE_BLAS64 ? :ilp64 : :lp64
-    if !any(lib.interface == interface for lib in config.loaded_libs)
-        interfacestr = uppercase(string(interface))
-        @error("No loaded BLAS libraries were built with $(interfacestr) support")
-        println("Quitting.")
-        exit()
-    end
-end
-
-"Check that upper/lower (for special matrices) is correctly specified"
-function chkuplo(uplo::AbstractChar)
-    if !(uplo == 'U' || uplo == 'L')
-        throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
-    end
-    uplo
-end
-
-# Level 1
-# A help function to pick the pointer and inc for 1d like inputs.
-@inline function vec_pointer_stride(x::AbstractArray, stride0check = nothing)
-    Base._checkcontiguous(Bool, x) && return pointer(x), 1 # simplify runtime check when possible
-    st, ptr = checkedstride(x), pointer(x)
-    isnothing(stride0check) || (st == 0 && throw(stride0check))
-    ptr += min(st, 0) * sizeof(eltype(x)) * (length(x) - 1)
-    ptr, st
-end
-function checkedstride(x::AbstractArray)
-    szs::Dims = size(x)
-    sts::Dims = strides(x)
-    _, st, n = Base.merge_adjacent_dim(szs, sts)
-    n === ndims(x) && return st
-    throw(ArgumentError("only support vector like inputs"))
-end
-## copy
-
-"""
-    blascopy!(n, X, incx, Y, incy)
-
-Copy `n` elements of array `X` with stride `incx` to array `Y` with stride `incy`. Returns `Y`.
-"""
-function blascopy! end
-
-for (fname, elty) in ((:dcopy_,:Float64),
-                      (:scopy_,:Float32),
-                      (:zcopy_,:ComplexF64),
-                      (:ccopy_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
-        function blascopy!(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 n, DX, incx, DY, incy)
-            DY
-        end
-    end
-end
-
-
-## rot
-
-"""
-    rot!(n, X, incx, Y, incy, c, s)
-
-Overwrite `X` with `c*X + s*Y` and `Y` with `-conj(s)*X + c*Y` for the first `n` elements of array `X` with stride `incx` and
-first `n` elements of array `Y` with stride `incy`. Returns `X` and `Y`.
-
-!!! compat "Julia 1.5"
-    `rot!` requires at least Julia 1.5.
-"""
-function rot! end
-
-for (fname, elty, cty, sty, lib) in ((:drot_, :Float64, :Float64, :Float64, libblastrampoline),
-                                     (:srot_, :Float32, :Float32, :Float32, libblastrampoline),
-                                     (:zdrot_, :ComplexF64, :Float64, :Float64, libblastrampoline),
-                                     (:csrot_, :ComplexF32, :Float32, :Float32, libblastrampoline),
-                                     (:zrot_, :ComplexF64, :Float64, :ComplexF64, libblastrampoline),
-                                     (:crot_, :ComplexF32, :Float32, :ComplexF32, libblastrampoline))
-    @eval begin
-        # SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S)
-        function rot!(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer, C::$cty, S::$sty)
-            ccall((@blasfunc($fname), $lib), Cvoid,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$cty}, Ref{$sty}),
-                 n, DX, incx, DY, incy, C, S)
-            DX, DY
-        end
-    end
-end
-
-## scal
-
-"""
-    scal!(n, a, X, incx)
-    scal!(a, X)
-
-Overwrite `X` with `a*X` for the first `n` elements of array `X` with stride `incx`. Returns `X`.
-
-If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used.
-"""
-function scal! end
-
-"""
-    scal(n, a, X, incx)
-    scal(a, X)
-
-Return `X` scaled by `a` for the first `n` elements of array `X` with stride `incx`.
-
-If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used.
-"""
-function scal end
-
-for (fname, elty) in ((:dscal_,:Float64),
-                      (:sscal_,:Float32),
-                      (:zscal_,:ComplexF64),
-                      (:cscal_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DSCAL(N,DA,DX,INCX)
-        function scal!(n::Integer, DA::$elty, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
-                  n, DA, DX, incx)
-            DX
-        end
-
-        function scal!(DA::$elty, DX::AbstractArray{$elty})
-            p, st = vec_pointer_stride(DX, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve DX scal!(length(DX), DA, p, abs(st))
-            DX
-        end
-    end
-end
-scal(n, DA, DX, incx) = scal!(n, DA, copy(DX), incx)
-scal(DA, DX) = scal!(DA, copy(DX))
-
-## dot
-
-"""
-    dot(n, X, incx, Y, incy)
-
-Dot product of two vectors consisting of `n` elements of array `X` with stride `incx` and
-`n` elements of array `Y` with stride `incy`.
-
-# Examples
-```jldoctest
-julia> BLAS.dot(10, fill(1.0, 10), 1, fill(1.0, 20), 2)
-10.0
-```
-"""
-function dot end
-
-"""
-    dotc(n, X, incx, U, incy)
-
-Dot function for two complex vectors, consisting of `n` elements of array `X`
-with stride `incx` and `n` elements of array `U` with stride `incy`,
-conjugating the first vector.
-
-# Examples
-```jldoctest
-julia> BLAS.dotc(10, fill(1.0im, 10), 1, fill(1.0+im, 20), 2)
-10.0 - 10.0im
-```
-"""
-function dotc end
-
-"""
-    dotu(n, X, incx, Y, incy)
-
-Dot function for two complex vectors consisting of `n` elements of array `X`
-with stride `incx` and `n` elements of array `Y` with stride `incy`.
-
-# Examples
-```jldoctest
-julia> BLAS.dotu(10, fill(1.0im, 10), 1, fill(1.0+im, 20), 2)
--10.0 + 10.0im
-```
-"""
-function dotu end
-
-for (fname, elty) in ((:cblas_ddot,:Float64),
-                      (:cblas_sdot,:Float32))
-    @eval begin
-                #       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-                # *     .. Scalar Arguments ..
-                #       INTEGER INCX,INCY,N
-                # *     ..
-                # *     .. Array Arguments ..
-                #       DOUBLE PRECISION DX(*),DY(*)
-        function dot(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), $elty,
-                (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt),
-                 n, DX, incx, DY, incy)
-        end
-    end
-end
-for (fname, elty) in ((:cblas_zdotc_sub,:ComplexF64),
-                      (:cblas_cdotc_sub,:ComplexF32))
-    @eval begin
-                #       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-                # *     .. Scalar Arguments ..
-                #       INTEGER INCX,INCY,N
-                # *     ..
-                # *     .. Array Arguments ..
-                #       DOUBLE PRECISION DX(*),DY(*)
-        function dotc(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            result = Ref{$elty}()
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}),
-                 n, DX, incx, DY, incy, result)
-            result[]
-        end
-    end
-end
-for (fname, elty) in ((:cblas_zdotu_sub,:ComplexF64),
-                      (:cblas_cdotu_sub,:ComplexF32))
-    @eval begin
-                #       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-                # *     .. Scalar Arguments ..
-                #       INTEGER INCX,INCY,N
-                # *     ..
-                # *     .. Array Arguments ..
-                #       DOUBLE PRECISION DX(*),DY(*)
-        function dotu(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            result = Ref{$elty}()
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}),
-                 n, DX, incx, DY, incy, result)
-            result[]
-        end
-    end
-end
-
-for (elty, f) in ((Float32, :dot), (Float64, :dot),
-                  (ComplexF32, :dotc), (ComplexF64, :dotc),
-                  (ComplexF32, :dotu), (ComplexF64, :dotu))
-    @eval begin
-        function $f(x::AbstractArray{$elty}, y::AbstractArray{$elty})
-            n, m = length(x), length(y)
-            n == m || throw(DimensionMismatch(lazy"dot product arguments have lengths $n and $m"))
-            GC.@preserve x y $f(n, vec_pointer_stride(x)..., vec_pointer_stride(y)...)
-        end
-    end
-end
-
-## nrm2
-
-"""
-    nrm2(n, X, incx)
-
-2-norm of a vector consisting of `n` elements of array `X` with stride `incx`.
-
-# Examples
-```jldoctest
-julia> BLAS.nrm2(4, fill(1.0, 8), 2)
-2.0
-
-julia> BLAS.nrm2(1, fill(1.0, 8), 2)
-1.0
-```
-"""
-function nrm2 end
-
-for (fname, elty, ret_type) in ((:dnrm2_,:Float64,:Float64),
-                                (:snrm2_,:Float32,:Float32),
-                                (:dznrm2_,:ComplexF64,:Float64),
-                                (:scnrm2_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE DNRM2(N,X,INCX)
-        function nrm2(n::Integer, X::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), $ret_type,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 n, X, incx)
-        end
-    end
-end
-# openblas returns 0 for negative stride
-function nrm2(x::AbstractArray)
-    p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    GC.@preserve x nrm2(length(x), p, abs(st))
-end
-
-## asum
-
-"""
-    asum(n, X, incx)
-
-Sum of the magnitudes of the first `n` elements of array `X` with stride `incx`.
-
-For a real array, the magnitude is the absolute value. For a complex array, the
-magnitude is the sum of the absolute value of the real part and the absolute value
-of the imaginary part.
-
-# Examples
-```jldoctest
-julia> BLAS.asum(5, fill(1.0im, 10), 2)
-5.0
-
-julia> BLAS.asum(2, fill(1.0im, 10), 5)
-2.0
-```
-"""
-function asum end
-
-for (fname, elty, ret_type) in ((:dasum_,:Float64,:Float64),
-                                (:sasum_,:Float32,:Float32),
-                                (:dzasum_,:ComplexF64,:Float64),
-                                (:scasum_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE ASUM(N, X, INCX)
-        function asum(n::Integer, X::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), $ret_type,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 n, X, incx)
-        end
-    end
-end
-function asum(x::AbstractArray)
-    p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    GC.@preserve x asum(length(x), p, abs(st))
-end
-
-## axpy
-
-"""
-    axpy!(a, X, Y)
-
-Overwrite `Y` with `X*a + Y`, where `a` is a scalar. Return `Y`.
-
-# Examples
-```jldoctest
-julia> x = [1.; 2; 3];
-
-julia> y = [4. ;; 5 ;; 6];
-
-julia> BLAS.axpy!(2, x, y)
-1×3 Matrix{Float64}:
- 6.0  9.0  12.0
-```
-"""
-function axpy! end
-
-for (fname, elty) in ((:daxpy_,:Float64),
-                      (:saxpy_,:Float32),
-                      (:zaxpy_,:ComplexF64),
-                      (:caxpy_,:ComplexF32))
-    @eval begin
-                # SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
-                # DY <- DA*DX + DY
-                #*     .. Scalar Arguments ..
-                #      DOUBLE PRECISION DA
-                #      INTEGER INCX,INCY,N
-                #*     .. Array Arguments ..
-                #      DOUBLE PRECISION DX(*),DY(*)
-        function axpy!(n::Integer, alpha::($elty), dx::Union{Ptr{$elty}, AbstractArray{$elty}}, incx::Integer, dy::Union{Ptr{$elty}, AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 n, alpha, dx, incx, dy, incy)
-            dy
-        end
-    end
-end
-
-function axpy!(alpha::Number, x::AbstractArray{T}, y::AbstractArray{T}) where T<:BlasFloat
-    if length(x) != length(y)
-        throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
-    end
-    GC.@preserve x y axpy!(length(x), T(alpha), vec_pointer_stride(x)...,
-        vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...)
-    y
-end
-
-function axpy!(alpha::Number, x::Array{T}, rx::AbstractRange{Ti},
-               y::Array{T}, ry::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer}
-    if length(rx) != length(ry)
-        throw(DimensionMismatch("ranges of differing lengths"))
-    end
-    if minimum(rx) < 1 || maximum(rx) > length(x)
-        throw(ArgumentError(lazy"range out of bounds for x, of length $(length(x))"))
-    end
-    if minimum(ry) < 1 || maximum(ry) > length(y)
-        throw(ArgumentError(lazy"range out of bounds for y, of length $(length(y))"))
-    end
-    GC.@preserve x y axpy!(
-        length(rx),
-        T(alpha),
-        pointer(x, minimum(rx)),
-        step(rx),
-        pointer(y, minimum(ry)),
-        step(ry))
-
-    return y
-end
-
-"""
-    axpby!(a, X, b, Y)
-
-Overwrite `Y` with `X*a + Y*b`, where `a` and `b` are scalars. Return `Y`.
-
-# Examples
-```jldoctest
-julia> x = [1., 2, 3];
-
-julia> y = [4., 5, 6];
-
-julia> BLAS.axpby!(2., x, 3., y)
-3-element Vector{Float64}:
- 14.0
- 19.0
- 24.0
-```
-"""
-function axpby! end
-
-for (fname, elty) in ((:daxpby_,:Float64), (:saxpby_,:Float32),
-                      (:zaxpby_,:ComplexF64), (:caxpby_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DAXPBY(N,DA,DX,INCX,DB,DY,INCY)
-        # DY <- DA*DX + DB*DY
-        #*     .. Scalar Arguments ..
-        #      DOUBLE PRECISION DA,DB
-        #      INTEGER INCX,INCY,N
-        #*     .. Array Arguments ..
-        #      DOUBLE PRECISION DX(*),DY(*)
-        function axpby!(n::Integer, alpha::($elty), dx::Union{Ptr{$elty},
-                        AbstractArray{$elty}}, incx::Integer, beta::($elty),
-                        dy::Union{Ptr{$elty}, AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
-                n, alpha, dx, incx, beta, dy, incy)
-            dy
-        end
-    end
-end
-
-function axpby!(alpha::Number, x::AbstractArray{T}, beta::Number, y::AbstractArray{T}) where T<:BlasFloat
-    require_one_based_indexing(x, y)
-    if length(x) != length(y)
-        throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
-    end
-    GC.@preserve x y axpby!(length(x), T(alpha), vec_pointer_stride(x)..., T(beta),
-        vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...)
-    y
-end
-
-## iamax
-for (fname, elty) in ((:idamax_,:Float64),
-                      (:isamax_,:Float32),
-                      (:izamax_,:ComplexF64),
-                      (:icamax_,:ComplexF32))
-    @eval begin
-        function iamax(n::Integer, dx::Union{Ptr{$elty}, AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblastrampoline),BlasInt,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                n, dx, incx)
-        end
-    end
-end
-function iamax(dx::AbstractArray)
-    p, st = vec_pointer_stride(dx)
-    st <= 0 && return BlasInt(0)
-    iamax(length(dx), p, st)
-end
-
-"""
-    iamax(n, dx, incx)
-    iamax(dx)
-
-Find the index of the element of `dx` with the maximum absolute value. `n` is the length of `dx`, and `incx` is the
-stride. If `n` and `incx` are not provided, they assume default values of `n=length(dx)` and `incx=stride1(dx)`.
-"""
-iamax
-
-# Level 2
-## mv
-### gemv
-for (fname, elty) in ((:dgemv_,:Float64),
-                      (:sgemv_,:Float32),
-                      (:zgemv_,:ComplexF64),
-                      (:cgemv_,:ComplexF32))
-    @eval begin
-             #SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             #*     .. Scalar Arguments ..
-             #      DOUBLE PRECISION ALPHA,BETA
-             #      INTEGER INCX,INCY,LDA,M,N
-             #      CHARACTER TRANS
-             #*     .. Array Arguments ..
-             #      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function gemv!(trans::AbstractChar, alpha::Union{($elty), Bool},
-                       A::AbstractVecOrMat{$elty}, X::AbstractVector{$elty},
-                       beta::Union{($elty), Bool}, Y::AbstractVector{$elty})
-            require_one_based_indexing(A, X, Y)
-            m,n = size(A,1),size(A,2)
-            if trans == 'N' && (length(X) != n || length(Y) != m)
-                throw(DimensionMismatch(lazy"A has dimensions $(size(A)), X has length $(length(X)) and Y has length $(length(Y))"))
-            elseif trans == 'C' && (length(X) != m || length(Y) != n)
-                throw(DimensionMismatch(lazy"the adjoint of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
-            elseif trans == 'T' && (length(X) != m || length(Y) != n)
-                throw(DimensionMismatch(lazy"the transpose of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
-            end
-            chkstride1(A)
-            lda = stride(A,2)
-            pX, sX = vec_pointer_stride(X, ArgumentError("input vector with 0 stride is not allowed"))
-            pY, sY = vec_pointer_stride(Y, ArgumentError("dest vector with 0 stride is not allowed"))
-            pA = pointer(A)
-            if lda < 0
-                pA += (size(A, 2) - 1) * lda * sizeof($elty)
-                lda = -lda
-                trans == 'N' ? (sX = -sX) : (sY = -sY)
-            end
-            lda >= size(A,1) || size(A,2) <= 1 || error("when `size(A,2) > 1`, `abs(stride(A,2))` must be at least `size(A,1)`")
-            lda = max(1, size(A,1), lda)
-            GC.@preserve A X Y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
-                 trans, size(A,1), size(A,2), alpha,
-                 pA, lda, pX, sX,
-                 beta, pY, sY, 1)
-            Y
-        end
-        function gemv(trans::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, X::AbstractVector{$elty})
-            gemv!(trans, alpha, A, X, zero($elty), similar(X, $elty, size(A, (trans == 'N' ? 1 : 2))))
-        end
-        function gemv(trans::AbstractChar, A::AbstractMatrix{$elty}, X::AbstractVector{$elty})
-            gemv!(trans, one($elty), A, X, zero($elty), similar(X, $elty, size(A, (trans == 'N' ? 1 : 2))))
-        end
-    end
-end
-
-"""
-    gemv!(tA, alpha, A, x, beta, y)
-
-Update the vector `y` as `alpha*A*x + beta*y` or `alpha*A'x + beta*y`
-according to [`tA`](@ref stdlib-blas-trans).
-`alpha` and `beta` are scalars. Return the updated `y`.
-"""
-gemv!
-
-"""
-    gemv(tA, alpha, A, x)
-
-Return `alpha*A*x` or `alpha*A'x` according to [`tA`](@ref stdlib-blas-trans).
-`alpha` is a scalar.
-"""
-gemv(tA, alpha, A, x)
-
-"""
-    gemv(tA, A, x)
-
-Return `A*x` or `A'x` according to [`tA`](@ref stdlib-blas-trans).
-"""
-gemv(tA, A, x)
-
-### (GB) general banded matrix-vector multiplication
-
-"""
-    gbmv!(trans, m, kl, ku, alpha, A, x, beta, y)
-
-Update vector `y` as `alpha*A*x + beta*y` or `alpha*A'*x + beta*y` according to [`trans`](@ref stdlib-blas-trans).
-The matrix `A` is a general band matrix of dimension `m` by `size(A,2)` with `kl`
-sub-diagonals and `ku` super-diagonals. `alpha` and `beta` are scalars. Return the updated `y`.
-"""
-function gbmv! end
-
-"""
-    gbmv(trans, m, kl, ku, alpha, A, x)
-
-Return `alpha*A*x` or `alpha*A'*x` according to [`trans`](@ref stdlib-blas-trans).
-The matrix `A` is a general band matrix of dimension `m` by `size(A,2)` with `kl` sub-diagonals and `ku`
-super-diagonals, and `alpha` is a scalar.
-"""
-function gbmv end
-
-for (fname, elty) in ((:dgbmv_,:Float64),
-                      (:sgbmv_,:Float32),
-                      (:zgbmv_,:ComplexF64),
-                      (:cgbmv_,:ComplexF32))
-    @eval begin
-             # SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             # *     .. Scalar Arguments ..
-             #       DOUBLE PRECISION ALPHA,BETA
-             #       INTEGER INCX,INCY,KL,KU,LDA,M,N
-             #       CHARACTER TRANS
-             # *     .. Array Arguments ..
-             #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function gbmv!(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer,
-                       alpha::Union{($elty), Bool}, A::AbstractMatrix{$elty},
-                       x::AbstractVector{$elty}, beta::Union{($elty), Bool},
-                       y::AbstractVector{$elty})
-            require_one_based_indexing(A, x, y)
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Clong),
-                 trans, m, size(A,2), kl,
-                 ku, alpha, A, max(1,stride(A,2)),
-                 px, stx, beta, py, sty, 1)
-            y
-        end
-        function gbmv(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            n = size(A,2)
-            leny = trans == 'N' ? m : n
-            gbmv!(trans, m, kl, ku, alpha, A, x, zero($elty), similar(x, $elty, leny))
-        end
-        function gbmv(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            gbmv(trans, m, kl, ku, one($elty), A, x)
-        end
-    end
-end
-
-### symv
-
-"""
-    symv!(ul, alpha, A, x, beta, y)
-
-Update the vector `y` as `alpha*A*x + beta*y`. `A` is assumed to be symmetric.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-`alpha` and `beta` are scalars. Return the updated `y`.
-"""
-function symv! end
-
-for (fname, elty, lib) in ((:dsymv_,:Float64,libblastrampoline),
-                           (:ssymv_,:Float32,libblastrampoline),
-                           (:zsymv_,:ComplexF64,libblastrampoline),
-                           (:csymv_,:ComplexF32,libblastrampoline))
-    # Note that the complex symv are not BLAS but auiliary functions in LAPACK
-    @eval begin
-             #      SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             #     .. Scalar Arguments ..
-             #      DOUBLE PRECISION ALPHA,BETA
-             #      INTEGER INCX,INCY,LDA,N
-             #      CHARACTER UPLO
-             #     .. Array Arguments ..
-             #      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function symv!(uplo::AbstractChar, alpha::Union{($elty), Bool},
-                       A::AbstractMatrix{$elty}, x::AbstractVector{$elty},
-                       beta::Union{($elty), Bool}, y::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x, y)
-            m, n = size(A)
-            if m != n
-                throw(DimensionMismatch(lazy"matrix A is $m by $n but must be square"))
-            end
-            if n != length(x)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), and x has length $(length(x))"))
-            end
-            if m != length(y)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))"))
-            end
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), $lib), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Clong),
-                 uplo, n, alpha, A,
-                 max(1,stride(A,2)), px, stx, beta,
-                 py, sty, 1)
-            y
-        end
-        function symv(uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-                symv!(uplo, alpha, A, x, zero($elty), similar(x))
-        end
-        function symv(uplo::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            symv(uplo, one($elty), A, x)
-        end
-    end
-end
-
-"""
-    symv(ul, alpha, A, x)
-
-Return `alpha*A*x`. `A` is assumed to be symmetric.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-`alpha` is a scalar.
-"""
-symv(ul, alpha, A, x)
-
-"""
-    symv(ul, A, x)
-
-Return `A*x`. `A` is assumed to be symmetric.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-symv(ul, A, x)
-
-### hemv
-"""
-    hemv!(ul, alpha, A, x, beta, y)
-
-Update the vector `y` as `alpha*A*x + beta*y`. `A` is assumed to be Hermitian.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-`alpha` and `beta` are scalars. Return the updated `y`.
-"""
-function hemv! end
-
-for (fname, elty) in ((:zhemv_,:ComplexF64),
-                      (:chemv_,:ComplexF32))
-    @eval begin
-        function hemv!(uplo::AbstractChar, α::Union{$elty, Bool}, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, β::Union{$elty, Bool}, y::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x, y)
-            m, n = size(A)
-            if m != n
-                throw(DimensionMismatch(lazy"matrix A is $m by $n but must be square"))
-            end
-            if n != length(x)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), and x has length $(length(x))"))
-            end
-            if m != length(y)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))"))
-            end
-            chkstride1(A)
-            lda = max(1, stride(A, 2))
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Clong),
-                uplo, n, α, A,
-                lda, px, stx, β,
-                py, sty, 1)
-            y
-        end
-        function hemv(uplo::AbstractChar, α::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            hemv!(uplo, α, A, x, zero($elty), similar(x))
-        end
-        function hemv(uplo::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            hemv(uplo, one($elty), A, x)
-        end
-    end
-end
-
-"""
-    hemv(ul, alpha, A, x)
-
-Return `alpha*A*x`. `A` is assumed to be Hermitian.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-`alpha` is a scalar.
-"""
-hemv(ul, alpha, A, x)
-
-"""
-    hemv(ul, A, x)
-
-Return `A*x`. `A` is assumed to be Hermitian.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-hemv(ul, A, x)
-
-### hpmv!, (HP) Hermitian packed matrix-vector operation defined as y := alpha*A*x + beta*y.
-for (fname, elty) in ((:zhpmv_, :ComplexF64),
-                      (:chpmv_, :ComplexF32))
-    @eval begin
-        # SUBROUTINE ZHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-        # Y <- ALPHA*AP*X + BETA*Y
-        # *     .. Scalar Arguments ..
-        #       DOUBLE PRECISION ALPHA,BETA
-        #       INTEGER INCX,INCY,N
-        #       CHARACTER UPLO
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION A(N,N),X(N),Y(N)
-        function hpmv!(uplo::AbstractChar,
-                       n::Integer,
-                       α::$elty,
-                       AP::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       x::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       incx::Integer,
-                       β::$elty,
-                       y::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       incy::Integer)
-
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{UInt8},     # uplo,
-                   Ref{BlasInt},   # n,
-                   Ref{$elty},     # α,
-                   Ptr{$elty},     # AP,
-                   Ptr{$elty},     # x,
-                   Ref{BlasInt},   # incx,
-                   Ref{$elty},     # β,
-                   Ptr{$elty},     # y, output
-                   Ref{BlasInt},   # incy
-                   Clong),         # length of uplo
-                  uplo,
-                  n,
-                  α,
-                  AP,
-                  x,
-                  incx,
-                  β,
-                  y,
-                  incy,
-                  1)
-            return y
-        end
-    end
-end
-
-function hpmv!(uplo::AbstractChar,
-               α::Number, AP::AbstractArray{T}, x::AbstractArray{T},
-               β::Number, y::AbstractArray{T}) where {T <: BlasComplex}
-    require_one_based_indexing(AP, x, y)
-    N = length(x)
-    if N != length(y)
-        throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))"))
-    end
-    if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch(lazy"Packed hermitian matrix A has size smaller than length(x) = $(N)."))
-    end
-    chkstride1(AP)
-    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-    GC.@preserve x y hpmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty)
-    y
-end
-
-"""
-    hpmv!(uplo, α, AP, x, β, y)
-
-Update vector `y` as `α*A*x + β*y`, where `A` is a Hermitian matrix provided
-in packed format `AP`.
-
-With `uplo = 'U'`, the array AP must contain the upper triangular part of the
-Hermitian matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]`
-respectively, and so on.
-
-With `uplo = 'L'`, the array AP must contain the lower triangular part of the
-Hermitian matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]`
-respectively, and so on.
-
-The scalar inputs `α` and `β` must be complex or real numbers.
-
-The array inputs `x`, `y` and `AP` must all be of `ComplexF32` or `ComplexF64` type.
-
-Return the updated `y`.
-
-!!! compat "Julia 1.5"
-    `hpmv!` requires at least Julia 1.5.
-"""
-hpmv!
-
-### sbmv, (SB) symmetric banded matrix-vector multiplication
-for (fname, elty) in ((:dsbmv_,:Float64),
-                      (:ssbmv_,:Float32))
-    @eval begin
-             #       SUBROUTINE DSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             # *     .. Scalar Arguments ..
-             #       DOUBLE PRECISION ALPHA,BETA
-             #       INTEGER INCX,INCY,K,LDA,N
-             #       CHARACTER UPLO
-             # *     .. Array Arguments ..
-             #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function sbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x, y)
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
-                 uplo, size(A,2), k, alpha,
-                 A, max(1,stride(A,2)), px, stx,
-                 beta, py, sty, 1)
-            y
-        end
-        function sbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            n = size(A,2)
-            sbmv!(uplo, k, alpha, A, x, zero($elty), similar(x, $elty, n))
-        end
-        function sbmv(uplo::AbstractChar, k::Integer, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            sbmv(uplo, k, one($elty), A, x)
-        end
-    end
-end
-
-"""
-    sbmv(uplo, k, alpha, A, x)
-
-Return `alpha*A*x` where `A` is a symmetric band matrix of order `size(A,2)` with `k`
-super-diagonals stored in the argument `A`.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-sbmv(uplo, k, alpha, A, x)
-
-"""
-    sbmv(uplo, k, A, x)
-
-Return `A*x` where `A` is a symmetric band matrix of order `size(A,2)` with `k`
-super-diagonals stored in the argument `A`.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-sbmv(uplo, k, A, x)
-
-"""
-    sbmv!(uplo, k, alpha, A, x, beta, y)
-
-Update vector `y` as `alpha*A*x + beta*y` where `A` is a symmetric band matrix of order
-`size(A,2)` with `k` super-diagonals stored in the argument `A`. The storage layout for `A`
-is described the reference BLAS module, level-2 BLAS at
-<http://www.netlib.org/lapack/explore-html/>.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `A` is used.
-
-Return the updated `y`.
-"""
-sbmv!
-
-### spmv!, (SP) symmetric packed matrix-vector operation defined as y := alpha*A*x + beta*y.
-for (fname, elty) in ((:dspmv_, :Float64),
-                      (:sspmv_, :Float32))
-    @eval begin
-        # SUBROUTINE DSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-        # Y <- ALPHA*AP*X + BETA*Y
-        # *     .. Scalar Arguments ..
-        #       DOUBLE PRECISION ALPHA,BETA
-        #       INTEGER INCX,INCY,N
-        #       CHARACTER UPLO
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION A(N,N),X(N),Y(N)
-        function spmv!(uplo::AbstractChar,
-                       n::Integer,
-                       α::$elty,
-                       AP::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       x::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       incx::Integer,
-                       β::$elty,
-                       y::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       incy::Integer)
-
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{UInt8},     # uplo,
-                   Ref{BlasInt},   # n,
-                   Ref{$elty},     # α,
-                   Ptr{$elty},     # AP,
-                   Ptr{$elty},     # x,
-                   Ref{BlasInt},   # incx,
-                   Ref{$elty},     # β,
-                   Ptr{$elty},     # y, out
-                   Ref{BlasInt},   # incy
-                   Clong),         # length of uplo
-                  uplo,
-                  n,
-                  α,
-                  AP,
-                  x,
-                  incx,
-                  β,
-                  y,
-                  incy,
-                  1)
-            return y
-        end
-    end
-end
-
-function spmv!(uplo::AbstractChar,
-               α::Real, AP::AbstractArray{T}, x::AbstractArray{T},
-               β::Real, y::AbstractArray{T}) where {T <: BlasReal}
-    require_one_based_indexing(AP, x, y)
-    N = length(x)
-    if N != length(y)
-        throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))"))
-    end
-    if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N)."))
-    end
-    chkstride1(AP)
-    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-    GC.@preserve x y spmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty)
-    y
-end
-
-"""
-    spmv!(uplo, α, AP, x, β, y)
-
-Update vector `y` as `α*A*x + β*y`, where `A` is a symmetric matrix provided
-in packed format `AP`.
-
-With `uplo = 'U'`, the array AP must contain the upper triangular part of the
-symmetric matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]`
-respectively, and so on.
-
-With `uplo = 'L'`, the array AP must contain the lower triangular part of the
-symmetric matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]`
-respectively, and so on.
-
-The scalar inputs `α` and `β` must be real.
-
-The array inputs `x`, `y` and `AP` must all be of `Float32` or `Float64` type.
-
-Return the updated `y`.
-
-!!! compat "Julia 1.5"
-    `spmv!` requires at least Julia 1.5.
-"""
-spmv!
-
-### spr!, (SP) symmetric packed matrix-vector operation defined as A := alpha*x*x' + A
-for (fname, elty) in ((:dspr_, :Float64),
-                      (:sspr_, :Float32))
-    @eval begin
-        function spr!(uplo::AbstractChar,
-                      n::Integer,
-                      α::$elty,
-                      x::Union{Ptr{$elty}, AbstractArray{$elty}},
-                      incx::Integer,
-                      AP::Union{Ptr{$elty}, AbstractArray{$elty}})
-
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{UInt8},     # uplo,
-                   Ref{BlasInt},   # n,
-                   Ref{$elty},     # α,
-                   Ptr{$elty},     # x,
-                   Ref{BlasInt},   # incx,
-                   Ptr{$elty},     # AP,
-                   Clong),         # length of uplo
-                  uplo,
-                  n,
-                  α,
-                  x,
-                  incx,
-                  AP,
-                  1)
-            return AP
-        end
-    end
-end
-
-function spr!(uplo::AbstractChar,
-              α::Real, x::AbstractArray{T},
-              AP::AbstractArray{T}) where {T <: BlasReal}
-    chkuplo(uplo)
-    require_one_based_indexing(AP, x)
-    N = length(x)
-    if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N)."))
-    end
-    chkstride1(AP)
-    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    return GC.@preserve x spr!(uplo, N, T(α), px, stx , AP)
-end
-
-"""
-    spr!(uplo, α, x, AP)
-
-Update matrix `A` as `A+α*x*x'`, where `A` is a symmetric matrix provided
-in packed format `AP` and `x` is a vector.
-
-With `uplo = 'U'`, the array AP must contain the upper triangular part of the
-symmetric matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]`
-respectively, and so on.
-
-With `uplo = 'L'`, the array AP must contain the lower triangular part of the
-symmetric matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]`
-respectively, and so on.
-
-The scalar input `α` must be real.
-
-The array inputs `x` and `AP` must all be of `Float32` or `Float64` type.
-Return the updated `AP`.
-
-!!! compat "Julia 1.8"
-    `spr!` requires at least Julia 1.8.
-"""
-spr!
-
-### hbmv, (HB) Hermitian banded matrix-vector multiplication
-for (fname, elty) in ((:zhbmv_,:ComplexF64),
-                      (:chbmv_,:ComplexF32))
-    @eval begin
-             #       SUBROUTINE ZHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             # *     .. Scalar Arguments ..
-             #       DOUBLE PRECISION ALPHA,BETA
-             #       INTEGER INCX,INCY,K,LDA,N
-             #       CHARACTER UPLO
-             # *     .. Array Arguments ..
-             #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function hbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x, y)
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
-                 uplo, size(A,2), k, alpha,
-                 A, max(1,stride(A,2)), px, stx,
-                 beta, py, sty, 1)
-            y
-        end
-        function hbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            n = size(A,2)
-            hbmv!(uplo, k, alpha, A, x, zero($elty), similar(x, $elty, n))
-        end
-        function hbmv(uplo::AbstractChar, k::Integer, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            hbmv(uplo, k, one($elty), A, x)
-        end
-    end
-end
-
-### trmv, Triangular matrix-vector multiplication
-
-"""
-    trmv(ul, tA, dA, A, b)
-
-Return `op(A)*b`, where `op` is determined by [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-"""
-function trmv end
-
-"""
-    trmv!(ul, tA, dA, A, b)
-
-Return `op(A)*b`, where `op` is determined by [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-The multiplication occurs in-place on `b`.
-"""
-function trmv! end
-
-for (fname, elty) in ((:dtrmv_,:Float64),
-                        (:strmv_,:Float32),
-                        (:ztrmv_,:ComplexF64),
-                        (:ctrmv_,:ComplexF32))
-    @eval begin
-                #       SUBROUTINE DTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-                # *     .. Scalar Arguments ..
-                #       INTEGER INCX,LDA,N
-                #       CHARACTER DIAG,TRANS,UPLO
-                # *     .. Array Arguments ..
-                #       DOUBLE PRECISION A(LDA,*),X(*)
-        function trmv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x)
-            n = checksquare(A)
-            if n != length(x)
-                throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
-            end
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Clong, Clong, Clong),
-                 uplo, trans, diag, n,
-                 A, max(1,stride(A,2)), px, stx, 1, 1, 1)
-            x
-        end
-        function trmv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            trmv!(uplo, trans, diag, A, copy(x))
-        end
-    end
-end
-
-### trsv, Triangular matrix-vector solve
-
-"""
-    trsv!(ul, tA, dA, A, b)
-
-Overwrite `b` with the solution to `A*x = b` or one of the other two variants determined by
-[`tA`](@ref stdlib-blas-trans) and [`ul`](@ref stdlib-blas-uplo).
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-Return the updated `b`.
-"""
-function trsv! end
-
-"""
-    trsv(ul, tA, dA, A, b)
-
-Return the solution to `A*x = b` or one of the other two variants determined by
-[`tA`](@ref stdlib-blas-trans) and [`ul`](@ref stdlib-blas-uplo).
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-"""
-function trsv end
-
-for (fname, elty) in ((:dtrsv_,:Float64),
-                        (:strsv_,:Float32),
-                        (:ztrsv_,:ComplexF64),
-                        (:ctrsv_,:ComplexF32))
-    @eval begin
-                #       SUBROUTINE DTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-                #       .. Scalar Arguments ..
-                #       INTEGER INCX,LDA,N
-                #       CHARACTER DIAG,TRANS,UPLO
-                #       .. Array Arguments ..
-                #       DOUBLE PRECISION A(LDA,*),X(*)
-        function trsv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x)
-            n = checksquare(A)
-            if n != length(x)
-                throw(DimensionMismatch(lazy"size of A is $n != length(x) = $(length(x))"))
-            end
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Clong, Clong, Clong),
-                 uplo, trans, diag, n,
-                 A, max(1,stride(A,2)), px, stx, 1, 1, 1)
-            x
-        end
-        function trsv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            trsv!(uplo, trans, diag, A, copy(x))
-        end
-    end
-end
-
-### ger
-
-"""
-    ger!(alpha, x, y, A)
-
-Rank-1 update of the matrix `A` with vectors `x` and `y` as `alpha*x*y' + A`.
-"""
-function ger! end
-
-for (fname, elty) in ((:dger_,:Float64),
-                      (:sger_,:Float32),
-                      (:zgerc_,:ComplexF64),
-                      (:cgerc_,:ComplexF32))
-    @eval begin
-        function ger!(α::$elty, x::AbstractVector{$elty}, y::AbstractVector{$elty}, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A, x, y)
-            m, n = size(A)
-            if m != length(x) || n != length(y)
-                throw(DimensionMismatch(lazy"A has size ($m,$n), x has length $(length(x)), y has length $(length(y))"))
-            end
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}),
-                 m, n, α, px, stx, py, sty, A, max(1,stride(A,2)))
-            A
-        end
-    end
-end
-
-### syr
-
-"""
-    syr!(uplo, alpha, x, A)
-
-Rank-1 update of the symmetric matrix `A` with vector `x` as `alpha*x*transpose(x) + A`.
-[`uplo`](@ref stdlib-blas-uplo) controls which triangle of `A` is updated. Returns `A`.
-"""
-function syr! end
-
-for (fname, elty, lib) in ((:dsyr_,:Float64,libblastrampoline),
-                           (:ssyr_,:Float32,libblastrampoline),
-                           (:zsyr_,:ComplexF64,libblastrampoline),
-                           (:csyr_,:ComplexF32,libblastrampoline))
-    @eval begin
-        function syr!(uplo::AbstractChar, α::$elty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x)
-            n = checksquare(A)
-            if length(x) != n
-                throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
-            end
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x ccall((@blasfunc($fname), $lib), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 uplo, n, α, px, stx, A, max(1,stride(A, 2)))
-            A
-        end
-    end
-end
-
-### her
-
-"""
-    her!(uplo, alpha, x, A)
-
-Methods for complex arrays only. Rank-1 update of the Hermitian matrix `A` with vector `x`
-as `alpha*x*x' + A`.
-[`uplo`](@ref stdlib-blas-uplo) controls which triangle of `A` is updated. Returns `A`.
-"""
-function her! end
-
-for (fname, elty, relty) in ((:zher_,:ComplexF64, :Float64),
-                             (:cher_,:ComplexF32, :Float32))
-    @eval begin
-        function her!(uplo::AbstractChar, α::$relty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x)
-            n = checksquare(A)
-            if length(x) != n
-                throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
-            end
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Clong),
-                 uplo, n, α, px, stx, A, max(1,stride(A,2)), 1)
-            A
-        end
-    end
-end
-
-# Level 3
-## (GE) general matrix-matrix multiplication
-
-"""
-    gemm!(tA, tB, alpha, A, B, beta, C)
-
-Update `C` as `alpha*A*B + beta*C` or the other three variants according to
-[`tA`](@ref stdlib-blas-trans) and `tB`. Return the updated `C`.
-"""
-function gemm! end
-
-for (gemm, elty) in
-        ((:dgemm_,:Float64),
-         (:sgemm_,:Float32),
-         (:zgemm_,:ComplexF64),
-         (:cgemm_,:ComplexF32))
-    @eval begin
-             # SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-             # *     .. Scalar Arguments ..
-             #       DOUBLE PRECISION ALPHA,BETA
-             #       INTEGER K,LDA,LDB,LDC,M,N
-             #       CHARACTER TRANSA,TRANSB
-             # *     .. Array Arguments ..
-             #       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-        function gemm!(transA::AbstractChar, transB::AbstractChar,
-                       alpha::Union{($elty), Bool},
-                       A::AbstractVecOrMat{$elty}, B::AbstractVecOrMat{$elty},
-                       beta::Union{($elty), Bool},
-                       C::AbstractVecOrMat{$elty})
-#           if any([stride(A,1), stride(B,1), stride(C,1)] .!= 1)
-#               error("gemm!: BLAS module requires contiguous matrix columns")
-#           end  # should this be checked on every call?
-            require_one_based_indexing(A, B, C)
-            m = size(A, transA == 'N' ? 1 : 2)
-            ka = size(A, transA == 'N' ? 2 : 1)
-            kb = size(B, transB == 'N' ? 1 : 2)
-            n = size(B, transB == 'N' ? 2 : 1)
-            if ka != kb || m != size(C,1) || n != size(C,2)
-                throw(DimensionMismatch(lazy"A has size ($m,$ka), B has size ($kb,$n), C has size $(size(C))"))
-            end
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            ccall((@blasfunc($gemm), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Clong, Clong),
-                 transA, transB, m, n,
-                 ka, alpha, A, max(1,stride(A,2)),
-                 B, max(1,stride(B,2)), beta, C,
-                 max(1,stride(C,2)), 1, 1)
-            C
-        end
-        function gemm(transA::AbstractChar, transB::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            gemm!(transA, transB, alpha, A, B, zero($elty), similar(B, $elty, (size(A, transA == 'N' ? 1 : 2), size(B, transB == 'N' ? 2 : 1))))
-        end
-        function gemm(transA::AbstractChar, transB::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            gemm(transA, transB, one($elty), A, B)
-        end
-    end
-end
-
-"""
-    gemm(tA, tB, alpha, A, B)
-
-Return `alpha*A*B` or the other three variants according to [`tA`](@ref stdlib-blas-trans) and `tB`.
-"""
-gemm(tA, tB, alpha, A, B)
-
-"""
-    gemm(tA, tB, A, B)
-
-Return `A*B` or the other three variants according to [`tA`](@ref stdlib-blas-trans) and `tB`.
-"""
-gemm(tA, tB, A, B)
-
-
-## (SY) symmetric matrix-matrix and matrix-vector multiplication
-for (mfname, elty) in ((:dsymm_,:Float64),
-                       (:ssymm_,:Float32),
-                       (:zsymm_,:ComplexF64),
-                       (:csymm_,:ComplexF32))
-    @eval begin
-             #     SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-             #     .. Scalar Arguments ..
-             #     DOUBLE PRECISION ALPHA,BETA
-             #     INTEGER LDA,LDB,LDC,M,N
-             #     CHARACTER SIDE,UPLO
-             #     .. Array Arguments ..
-             #     DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-        function symm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool},
-                       A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty},
-                       beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B, C)
-            m, n = size(C)
-            j = checksquare(A)
-            M, N = size(B)
-            if side == 'L'
-                if j != m
-                    throw(DimensionMismatch(lazy"A has first dimension $j but needs to match first dimension of C, $m"))
-                end
-                if N != n
-                    throw(DimensionMismatch(lazy"B has second dimension $N but needs to match second dimension of C, $n"))
-                end
-                if j != M
-                    throw(DimensionMismatch(lazy"A has second dimension $j but needs to match first dimension of B, $M"))
-                end
-            else
-                if j != n
-                    throw(DimensionMismatch(lazy"B has second dimension $j but needs to match second dimension of C, $n"))
-                end
-                if N != j
-                    throw(DimensionMismatch(lazy"A has second dimension $N but needs to match first dimension of B, $j"))
-                end
-                if M != m
-                    throw(DimensionMismatch(lazy"A has first dimension $M but needs to match first dimension of C, $m"))
-                end
-            end
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            ccall((@blasfunc($mfname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
-                 Clong, Clong),
-                 side, uplo, m, n,
-                 alpha, A, max(1,stride(A,2)), B,
-                 max(1,stride(B,2)), beta, C, max(1,stride(C,2)),
-                 1, 1)
-            C
-        end
-        function symm(side::AbstractChar, uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            symm!(side, uplo, alpha, A, B, zero($elty), similar(B))
-        end
-        function symm(side::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            symm(side, uplo, one($elty), A, B)
-        end
-    end
-end
-
-"""
-    symm(side, ul, alpha, A, B)
-
-Return `alpha*A*B` or `alpha*B*A` according to [`side`](@ref stdlib-blas-side).
-`A` is assumed to be symmetric. Only
-the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-symm(side, ul, alpha, A, B)
-
-"""
-    symm(side, ul, A, B)
-
-Return `A*B` or `B*A` according to [`side`](@ref stdlib-blas-side).
-`A` is assumed to be symmetric. Only the [`ul`](@ref stdlib-blas-uplo)
-triangle of `A` is used.
-"""
-symm(side, ul, A, B)
-
-"""
-    symm!(side, ul, alpha, A, B, beta, C)
-
-Update `C` as `alpha*A*B + beta*C` or `alpha*B*A + beta*C` according to [`side`](@ref stdlib-blas-side).
-`A` is assumed to be symmetric. Only the [`ul`](@ref stdlib-blas-uplo) triangle of
-`A` is used. Return the updated `C`.
-"""
-symm!
-
-## (HE) Hermitian matrix-matrix and matrix-vector multiplication
-for (mfname, elty) in ((:zhemm_,:ComplexF64),
-                       (:chemm_,:ComplexF32))
-    @eval begin
-             #     SUBROUTINE DHEMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-             #     .. Scalar Arguments ..
-             #     DOUBLE PRECISION ALPHA,BETA
-             #     INTEGER LDA,LDB,LDC,M,N
-             #     CHARACTER SIDE,UPLO
-             #     .. Array Arguments ..
-             #     DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-        function hemm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool},
-                       A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty},
-                       beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B, C)
-            m, n = size(C)
-            j = checksquare(A)
-            M, N = size(B)
-            if side == 'L'
-                if j != m
-                    throw(DimensionMismatch(lazy"A has first dimension $j but needs to match first dimension of C, $m"))
-                end
-                if N != n
-                    throw(DimensionMismatch(lazy"B has second dimension $N but needs to match second dimension of C, $n"))
-                end
-                if j != M
-                    throw(DimensionMismatch(lazy"A has second dimension $j but needs to match first dimension of B, $M"))
-                end
-            else
-                if j != n
-                    throw(DimensionMismatch(lazy"B has second dimension $j but needs to match second dimension of C, $n"))
-                end
-                if N != j
-                    throw(DimensionMismatch(lazy"A has second dimension $N but needs to match first dimension of B, $j"))
-                end
-                if M != m
-                    throw(DimensionMismatch(lazy"A has first dimension $M but needs to match first dimension of C, $m"))
-                end
-            end
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            ccall((@blasfunc($mfname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
-                 Clong, Clong),
-                 side, uplo, m, n,
-                 alpha, A, max(1,stride(A,2)), B,
-                 max(1,stride(B,2)), beta, C, max(1,stride(C,2)),
-                 1, 1)
-            C
-        end
-        function hemm(side::AbstractChar, uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            hemm!(side, uplo, alpha, A, B, zero($elty), similar(B))
-        end
-        function hemm(side::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            hemm(side, uplo, one($elty), A, B)
-        end
-    end
-end
-
-"""
-    hemm(side, ul, alpha, A, B)
-
-Return `alpha*A*B` or `alpha*B*A` according to [`side`](@ref stdlib-blas-side).
-`A` is assumed to be Hermitian. Only the [`ul`](@ref stdlib-blas-uplo) triangle
-of `A` is used.
-"""
-hemm(side, ul, alpha, A, B)
-
-"""
-    hemm(side, ul, A, B)
-
-Return `A*B` or `B*A` according to [`side`](@ref stdlib-blas-side). `A` is assumed
-to be Hermitian. Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-hemm(side, ul, A, B)
-
-"""
-    hemm!(side, ul, alpha, A, B, beta, C)
-
-Update `C` as `alpha*A*B + beta*C` or `alpha*B*A + beta*C` according to
-[`side`](@ref stdlib-blas-side). `A` is assumed to be Hermitian. Only the
-[`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. Return the updated `C`.
-"""
-hemm!
-
-## syrk
-
-"""
-    syrk!(uplo, trans, alpha, A, beta, C)
-
-Rank-k update of the symmetric matrix `C` as `alpha*A*transpose(A) + beta*C` or
-`alpha*transpose(A)*A + beta*C` according to [`trans`](@ref stdlib-blas-trans).
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`.
-"""
-function syrk! end
-
-"""
-    syrk(uplo, trans, alpha, A)
-
-Return either the upper triangle or the lower triangle of `A`,
-according to [`uplo`](@ref stdlib-blas-uplo),
-of `alpha*A*transpose(A)` or `alpha*transpose(A)*A`,
-according to [`trans`](@ref stdlib-blas-trans).
-"""
-function syrk end
-
-for (fname, elty) in ((:dsyrk_,:Float64),
-                      (:ssyrk_,:Float32),
-                      (:zsyrk_,:ComplexF64),
-                      (:csyrk_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-        # *     .. Scalar Arguments ..
-        #       REAL ALPHA,BETA
-        #       INTEGER K,LDA,LDC,N
-        #       CHARACTER TRANS,UPLO
-        # *     .. Array Arguments ..
-        #       REAL A(LDA,*),C(LDC,*)
-        function syrk!(uplo::AbstractChar, trans::AbstractChar,
-                      alpha::Union{($elty), Bool}, A::AbstractVecOrMat{$elty},
-                      beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, C)
-            n = checksquare(C)
-            nn = size(A, trans == 'N' ? 1 : 2)
-            if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end
-            k  = size(A, trans == 'N' ? 2 : 1)
-            chkstride1(A)
-            chkstride1(C)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                   Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
-                  uplo, trans, n, k,
-                  alpha, A, max(1,stride(A,2)), beta,
-                  C, max(1,stride(C,2)), 1, 1)
-            C
-        end
-    end
-end
-function syrk(uplo::AbstractChar, trans::AbstractChar, alpha::Number, A::AbstractVecOrMat)
-    T = eltype(A)
-    n = size(A, trans == 'N' ? 1 : 2)
-    syrk!(uplo, trans, convert(T,alpha), A, zero(T), similar(A, T, (n, n)))
-end
-syrk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat) = syrk(uplo, trans, one(eltype(A)), A)
-
-"""
-    herk!(uplo, trans, alpha, A, beta, C)
-
-Methods for complex arrays only. Rank-k update of the Hermitian matrix `C` as
-`alpha*A*A' + beta*C` or `alpha*A'*A + beta*C` according to [`trans`](@ref stdlib-blas-trans).
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is updated. Returns `C`.
-"""
-function herk! end
-
-"""
-    herk(uplo, trans, alpha, A)
-
-Methods for complex arrays only. Returns the [`uplo`](@ref stdlib-blas-uplo)
-triangle of `alpha*A*A'` or `alpha*A'*A`, according to [`trans`](@ref stdlib-blas-trans).
-"""
-function herk end
-
-for (fname, elty, relty) in ((:zherk_, :ComplexF64, :Float64),
-                             (:cherk_, :ComplexF32, :Float32))
-    @eval begin
-        # SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-        # *     .. Scalar Arguments ..
-        #       REAL ALPHA,BETA
-        #       INTEGER K,LDA,LDC,N
-        #       CHARACTER TRANS,UPLO
-        # *     ..
-        # *     .. Array Arguments ..
-        #       COMPLEX A(LDA,*),C(LDC,*)
-        function herk!(uplo::AbstractChar, trans::AbstractChar,
-                        α::Union{$relty, Bool}, A::AbstractVecOrMat{$elty},
-                        β::Union{$relty, Bool}, C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, C)
-            n = checksquare(C)
-            nn = size(A, trans == 'N' ? 1 : 2)
-            if nn != n
-                throw(DimensionMismatch(lazy"the matrix to update has dimension $n but the implied dimension of the update is $(size(A, trans == 'N' ? 1 : 2))"))
-            end
-            chkstride1(A)
-            chkstride1(C)
-            k  = size(A, trans == 'N' ? 2 : 1)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ref{$relty}, Ptr{$elty}, Ref{BlasInt}, Ref{$relty},
-                    Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
-                    uplo, trans, n, k,
-                    α, A, max(1,stride(A,2)), β,
-                    C, max(1,stride(C,2)), 1, 1)
-            C
-        end
-        function herk(uplo::AbstractChar, trans::AbstractChar, α::$relty, A::AbstractVecOrMat{$elty})
-            n = size(A, trans == 'N' ? 1 : 2)
-            herk!(uplo, trans, α, A, zero($relty), similar(A, (n,n)))
-        end
-        herk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty}) = herk(uplo, trans, one($relty), A)
-    end
-end
-
-## syr2k
-for (fname, elty) in ((:dsyr2k_,:Float64),
-                      (:ssyr2k_,:Float32),
-                      (:zsyr2k_,:ComplexF64),
-                      (:csyr2k_,:ComplexF32))
-    @eval begin
-            #       SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-            #
-            #       .. Scalar Arguments ..
-            #       REAL PRECISION ALPHA,BETA
-            #       INTEGER K,LDA,LDB,LDC,N
-            #       CHARACTER TRANS,UPLO
-            #       ..
-            #       .. Array Arguments ..
-            #       REAL PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-        function syr2k!(uplo::AbstractChar, trans::AbstractChar,
-                        alpha::($elty), A::AbstractVecOrMat{$elty}, B::AbstractVecOrMat{$elty},
-                        beta::($elty), C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B, C)
-            n = checksquare(C)
-            nn = size(A, trans == 'N' ? 1 : 2)
-            if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end
-            k  = size(A, trans == 'N' ? 2 : 1)
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
-                 uplo, trans, n, k,
-                 alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)), beta,
-                 C, max(1,stride(C,2)), 1, 1)
-            C
-        end
-    end
-end
-
-"""
-    syr2k!(uplo, trans, alpha, A, B, beta, C)
-
-Rank-2k update of the symmetric matrix `C` as
-`alpha*A*transpose(B) + alpha*B*transpose(A) + beta*C` or
-`alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C`
-according to [`trans`](@ref stdlib-blas-trans).
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Returns `C`.
-"""
-function syr2k! end
-
-"""
-    syr2k(uplo, trans, alpha, A, B)
-
-Returns the [`uplo`](@ref stdlib-blas-uplo) triangle of
-`alpha*A*transpose(B) + alpha*B*transpose(A)` or
-`alpha*transpose(A)*B + alpha*transpose(B)*A`,
-according to [`trans`](@ref stdlib-blas-trans).
-"""
-function syr2k(uplo::AbstractChar, trans::AbstractChar, alpha::Number, A::AbstractVecOrMat, B::AbstractVecOrMat)
-    T = eltype(A)
-    n = size(A, trans == 'N' ? 1 : 2)
-    syr2k!(uplo, trans, convert(T,alpha), A, B, zero(T), similar(A, T, (n, n)))
-end
-"""
-    syr2k(uplo, trans, A, B)
-
-Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*transpose(B) + B*transpose(A)`
-or `transpose(A)*B + transpose(B)*A`, according to [`trans`](@ref stdlib-blas-trans).
-"""
-syr2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat, B::AbstractVecOrMat) = syr2k(uplo, trans, one(eltype(A)), A, B)
-
-for (fname, elty1, elty2) in ((:zher2k_,:ComplexF64,:Float64), (:cher2k_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-        #
-        #       .. Scalar Arguments ..
-        #       COMPLEX ALPHA
-        #       REAL BETA
-        #       INTEGER K,LDA,LDB,LDC,N
-        #       CHARACTER TRANS,UPLO
-        #       ..
-        #       .. Array Arguments ..
-        #       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-        function her2k!(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1),
-                        A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1},
-                        beta::($elty2), C::AbstractMatrix{$elty1})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B, C)
-            n = checksquare(C)
-            nn = size(A, trans == 'N' ? 1 : 2)
-            if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            k  = size(A, trans == 'N' ? 2 : 1)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ref{$elty1}, Ptr{$elty1}, Ref{BlasInt}, Ptr{$elty1}, Ref{BlasInt},
-                    Ref{$elty2},  Ptr{$elty1}, Ref{BlasInt}, Clong, Clong),
-                    uplo, trans, n, k,
-                    alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)),
-                    beta, C, max(1,stride(C,2)), 1, 1)
-            C
-        end
-        function her2k(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1), A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1})
-            n = size(A, trans == 'N' ? 1 : 2)
-            her2k!(uplo, trans, alpha, A, B, zero($elty2), similar(A, $elty1, (n,n)))
-        end
-        her2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1}) =
-            her2k(uplo, trans, one($elty1), A, B)
-    end
-end
-
-"""
-    her2k!(uplo, trans, alpha, A, B, beta, C)
-
-Rank-2k update of the Hermitian matrix `C` as
-`alpha*A*B' + alpha*B*A' + beta*C` or `alpha*A'*B + alpha*B'*A + beta*C`
-according to [`trans`](@ref stdlib-blas-trans). The scalar `beta` has to be real.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`.
-"""
-function her2k! end
-
-"""
-    her2k(uplo, trans, alpha, A, B)
-
-Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `alpha*A*B' + alpha*B*A'`
-or `alpha*A'*B + alpha*B'*A`, according to [`trans`](@ref stdlib-blas-trans).
-"""
-her2k(uplo, trans, alpha, A, B)
-
-"""
-    her2k(uplo, trans, A, B)
-
-Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*B' + B*A'`
-or `A'*B + B'*A`, according to [`trans`](@ref stdlib-blas-trans).
-"""
-her2k(uplo, trans, A, B)
-
-## (TR) Triangular matrix and vector multiplication and solution
-
-"""
-    trmm!(side, ul, tA, dA, alpha, A, B)
-
-Update `B` as `alpha*A*B` or one of the other three variants determined by
-[`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-Return the updated `B`.
-"""
-function trmm! end
-
-"""
-    trmm(side, ul, tA, dA, alpha, A, B)
-
-Return `alpha*A*B` or one of the other three variants determined by
-[`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-"""
-function trmm end
-
-"""
-    trsm!(side, ul, tA, dA, alpha, A, B)
-
-Overwrite `B` with the solution to `A*X = alpha*B` or one of the other three variants
-determined by [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-Returns the updated `B`.
-"""
-function trsm! end
-
-"""
-    trsm(side, ul, tA, dA, alpha, A, B)
-
-Return the solution to `A*X = alpha*B` or one of the other three variants determined by
-determined by [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-"""
-function trsm end
-
-for (mmname, smname, elty) in
-        ((:dtrmm_,:dtrsm_,:Float64),
-         (:strmm_,:strsm_,:Float32),
-         (:ztrmm_,:ztrsm_,:ComplexF64),
-         (:ctrmm_,:ctrsm_,:ComplexF32))
-    @eval begin
-        #       SUBROUTINE DTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-        # *     .. Scalar Arguments ..
-        #       DOUBLE PRECISION ALPHA
-        #       INTEGER LDA,LDB,M,N
-        #       CHARACTER DIAG,SIDE,TRANSA,UPLO
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION A(LDA,*),B(LDB,*)
-        function trmm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::Number,
-                       A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B)
-            m, n = size(B)
-            nA = checksquare(A)
-            if nA != (side == 'L' ? m : n)
-                throw(DimensionMismatch(lazy"size of A, $(size(A)), doesn't match $side size of B with dims, $(size(B))"))
-            end
-            chkstride1(A)
-            chkstride1(B)
-            ccall((@blasfunc($mmname), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                   Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Clong, Clong, Clong, Clong),
-                  side, uplo, transa, diag, m, n,
-                  alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)),
-                  1, 1, 1, 1)
-            B
-        end
-        function trmm(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar,
-                      alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            trmm!(side, uplo, transa, diag, alpha, A, copy(B))
-        end
-        #       SUBROUTINE DTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-        # *     .. Scalar Arguments ..
-        #       DOUBLE PRECISION ALPHA
-        #       INTEGER LDA,LDB,M,N
-        #       CHARACTER DIAG,SIDE,TRANSA,UPLO
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION A(LDA,*),B(LDB,*)
-        function trsm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar,
-                       alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B)
-            m, n = size(B)
-            k = checksquare(A)
-            if k != (side == 'L' ? m : n)
-                throw(DimensionMismatch(lazy"size of A is ($k,$k), size of B is ($m,$n), side is $side, and transa='$transa'"))
-            end
-            chkstride1(A)
-            chkstride1(B)
-            ccall((@blasfunc($smname), libblastrampoline), Cvoid,
-                   (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
-                    Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Clong, Clong, Clong, Clong),
-                   side, uplo, transa, diag,
-                   m, n, alpha, A,
-                   max(1,stride(A,2)), B, max(1,stride(B,2)),
-                   1, 1, 1, 1)
-            B
-        end
-        function trsm(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            trsm!(side, uplo, transa, diag, alpha, A, copy(B))
-        end
-    end
-end
-
-end # module
-
-function copyto!(dest::Array{T}, rdest::AbstractRange{Ti},
-                 src::Array{T}, rsrc::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer}
-    if minimum(rdest) < 1 || maximum(rdest) > length(dest)
-        throw(ArgumentError(lazy"range out of bounds for dest, of length $(length(dest))"))
-    end
-    if minimum(rsrc) < 1 || maximum(rsrc) > length(src)
-        throw(ArgumentError(lazy"range out of bounds for src, of length $(length(src))"))
-    end
-    if length(rdest) != length(rsrc)
-        throw(DimensionMismatch(lazy"ranges must be of the same length"))
-    end
-    GC.@preserve src dest BLAS.blascopy!(
-        length(rsrc),
-        pointer(src, minimum(rsrc)),
-        step(rsrc),
-        pointer(dest, minimum(rdest)),
-        step(rdest))
-
-    return dest
-end
diff --git a/stdlib/LinearAlgebra/src/bunchkaufman.jl b/stdlib/LinearAlgebra/src/bunchkaufman.jl
deleted file mode 100644
index d1019a1a4ea5a..0000000000000
--- a/stdlib/LinearAlgebra/src/bunchkaufman.jl
+++ /dev/null
@@ -1,392 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Create an extractor that extracts the modified original matrix, e.g.
-## LD for BunchKaufman, UL for CholeskyDense, LU for LUDense and
-## define size methods for Factorization types using it.
-
-"""
-    BunchKaufman <: Factorization
-
-Matrix factorization type of the Bunch-Kaufman factorization of a symmetric or
-Hermitian matrix `A` as `P'UDU'P` or `P'LDL'P`, depending on whether the upper
-(the default) or the lower triangle is stored in `A`. If `A` is complex symmetric
-then `U'` and `L'` denote the unconjugated transposes, i.e. `transpose(U)` and
-`transpose(L)`, respectively. This is the return type of [`bunchkaufman`](@ref),
-the corresponding matrix factorization function.
-
-If `S::BunchKaufman` is the factorization object, the components can be obtained
-via `S.D`, `S.U` or `S.L` as appropriate given `S.uplo`, and `S.p`.
-
-Iterating the decomposition produces the components `S.D`, `S.U` or `S.L`
-as appropriate given `S.uplo`, and `S.p`.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 2 3]
-2×2 Matrix{Int64}:
- 1  2
- 2  3
-
-julia> S = bunchkaufman(A) # A gets wrapped internally by Symmetric(A)
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-2×2 Tridiagonal{Float64, Vector{Float64}}:
- -0.333333  0.0
-  0.0       3.0
-U factor:
-2×2 UnitUpperTriangular{Float64, Matrix{Float64}}:
- 1.0  0.666667
-  ⋅   1.0
-permutation:
-2-element Vector{Int64}:
- 1
- 2
-
-julia> d, u, p = S; # destructuring via iteration
-
-julia> d == S.D && u == S.U && p == S.p
-true
-
-julia> S = bunchkaufman(Symmetric(A, :L))
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-2×2 Tridiagonal{Float64, Vector{Float64}}:
- 3.0   0.0
- 0.0  -0.333333
-L factor:
-2×2 UnitLowerTriangular{Float64, Matrix{Float64}}:
- 1.0        ⋅
- 0.666667  1.0
-permutation:
-2-element Vector{Int64}:
- 2
- 1
-```
-"""
-struct BunchKaufman{T,S<:AbstractMatrix,P<:AbstractVector{<:Integer}} <: Factorization{T}
-    LD::S
-    ipiv::P
-    uplo::Char
-    symmetric::Bool
-    rook::Bool
-    info::BlasInt
-
-    function BunchKaufman{T,S,P}(LD, ipiv, uplo, symmetric, rook, info) where {T,S<:AbstractMatrix,P<:AbstractVector}
-        require_one_based_indexing(LD)
-        new{T,S,P}(LD, ipiv, uplo, symmetric, rook, info)
-    end
-end
-BunchKaufman(A::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer}, uplo::AbstractChar,
-             symmetric::Bool, rook::Bool, info::BlasInt) where {T} =
-        BunchKaufman{T,typeof(A),typeof(ipiv)}(A, ipiv, uplo, symmetric, rook, info)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(BunchKaufman{T,S}(LD, ipiv, uplo, symmetric, rook, info) where {T,S},
-           BunchKaufman{T,S,typeof(ipiv)}(LD, ipiv, uplo, symmetric, rook, info), false)
-
-# iteration for destructuring into components
-Base.iterate(S::BunchKaufman) = (S.D, Val(:UL))
-Base.iterate(S::BunchKaufman, ::Val{:UL}) = (S.uplo == 'L' ? S.L : S.U, Val(:p))
-Base.iterate(S::BunchKaufman, ::Val{:p}) = (S.p, Val(:done))
-Base.iterate(S::BunchKaufman, ::Val{:done}) = nothing
-
-
-"""
-    bunchkaufman!(A, rook::Bool=false; check = true) -> BunchKaufman
-
-`bunchkaufman!` is the same as [`bunchkaufman`](@ref), but saves space by overwriting the
-input `A`, instead of creating a copy.
-"""
-function bunchkaufman!(A::RealHermSymComplexSym{<:BlasReal,<:StridedMatrix},
-                       rook::Bool = false; check::Bool = true)
-    LD, ipiv, info = rook ? LAPACK.sytrf_rook!(A.uplo, A.data) : LAPACK.sytrf!(A.uplo, A.data)
-    check && checknonsingular(info)
-    BunchKaufman(LD, ipiv, A.uplo, true, rook, info)
-end
-function bunchkaufman!(A::Hermitian{<:BlasComplex,<:StridedMatrix},
-                       rook::Bool = false; check::Bool = true)
-    LD, ipiv, info = rook ? LAPACK.hetrf_rook!(A.uplo, A.data) : LAPACK.hetrf!(A.uplo, A.data)
-    check && checknonsingular(info)
-    BunchKaufman(LD, ipiv, A.uplo, false, rook, info)
-end
-function bunchkaufman!(A::StridedMatrix{<:BlasFloat}, rook::Bool = false; check::Bool = true)
-    if ishermitian(A)
-        return bunchkaufman!(Hermitian(A), rook; check = check)
-    elseif issymmetric(A)
-        return bunchkaufman!(Symmetric(A), rook; check = check)
-    else
-        throw(ArgumentError("Bunch-Kaufman decomposition is only valid for symmetric or Hermitian matrices"))
-    end
-end
-
-"""
-    bunchkaufman(A, rook::Bool=false; check = true) -> S::BunchKaufman
-
-Compute the Bunch-Kaufman [^Bunch1977] factorization of a symmetric or
-Hermitian matrix `A` as `P'*U*D*U'*P` or `P'*L*D*L'*P`, depending on
-which triangle is stored in `A`, and return a [`BunchKaufman`](@ref) object.
-Note that if `A` is complex symmetric then `U'` and `L'` denote
-the unconjugated transposes, i.e. `transpose(U)` and `transpose(L)`.
-
-Iterating the decomposition produces the components `S.D`, `S.U` or `S.L`
-as appropriate given `S.uplo`, and `S.p`.
-
-If `rook` is `true`, rook pivoting is used. If `rook` is false,
-rook pivoting is not used.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-The following functions are available for `BunchKaufman` objects:
-[`size`](@ref), `\\`, [`inv`](@ref), [`issymmetric`](@ref),
-[`ishermitian`](@ref), [`getindex`](@ref).
-
-[^Bunch1977]: J R Bunch and L Kaufman, Some stable methods for calculating inertia and solving symmetric linear systems, Mathematics of Computation 31:137 (1977), 163-179. [url](http://www.ams.org/journals/mcom/1977-31-137/S0025-5718-1977-0428694-0/).
-
-# Examples
-```jldoctest
-julia> A = [1 2; 2 3]
-2×2 Matrix{Int64}:
- 1  2
- 2  3
-
-julia> S = bunchkaufman(A) # A gets wrapped internally by Symmetric(A)
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-2×2 Tridiagonal{Float64, Vector{Float64}}:
- -0.333333  0.0
-  0.0       3.0
-U factor:
-2×2 UnitUpperTriangular{Float64, Matrix{Float64}}:
- 1.0  0.666667
-  ⋅   1.0
-permutation:
-2-element Vector{Int64}:
- 1
- 2
-
-julia> d, u, p = S; # destructuring via iteration
-
-julia> d == S.D && u == S.U && p == S.p
-true
-
-julia> S.U*S.D*S.U' - S.P*A*S.P'
-2×2 Matrix{Float64}:
- 0.0  0.0
- 0.0  0.0
-
-julia> S = bunchkaufman(Symmetric(A, :L))
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-2×2 Tridiagonal{Float64, Vector{Float64}}:
- 3.0   0.0
- 0.0  -0.333333
-L factor:
-2×2 UnitLowerTriangular{Float64, Matrix{Float64}}:
- 1.0        ⋅
- 0.666667  1.0
-permutation:
-2-element Vector{Int64}:
- 2
- 1
-
-julia> S.L*S.D*S.L' - A[S.p, S.p]
-2×2 Matrix{Float64}:
- 0.0  0.0
- 0.0  0.0
-```
-"""
-bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} =
-    bunchkaufman!(eigencopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
-
-BunchKaufman{T}(B::BunchKaufman) where {T} =
-    BunchKaufman(convert(Matrix{T}, B.LD), B.ipiv, B.uplo, B.symmetric, B.rook, B.info)
-Factorization{T}(B::BunchKaufman) where {T} = BunchKaufman{T}(B)
-
-size(B::BunchKaufman) = size(getfield(B, :LD))
-size(B::BunchKaufman, d::Integer) = size(getfield(B, :LD), d)
-issymmetric(B::BunchKaufman) = B.symmetric
-ishermitian(B::BunchKaufman{T}) where T = T<:Real || !B.symmetric
-
-function _ipiv2perm_bk(v::AbstractVector{T}, maxi::Integer, uplo::AbstractChar, rook::Bool) where T
-    require_one_based_indexing(v)
-    p = T[1:maxi;]
-    uploL = uplo == 'L'
-    i = uploL ? 1 : maxi
-    # if uplo == 'U' we construct the permutation backwards
-    @inbounds while 1 <= i <= length(v)
-        vi = v[i]
-        if vi > 0 # the 1x1 blocks
-            p[i], p[vi] = p[vi], p[i]
-            i += uploL ? 1 : -1
-        else # the 2x2 blocks
-            if rook
-                p[i], p[-vi] = p[-vi], p[i]
-            end
-            if uploL
-                vp = rook ? -v[i+1] : -vi
-                p[i + 1], p[vp] = p[vp], p[i + 1]
-                i += 2
-            else # 'U'
-                vp = rook ? -v[i-1] : -vi
-                p[i - 1], p[vp] = p[vp], p[i - 1]
-                i -= 2
-            end
-        end
-    end
-    return p
-end
-
-function getproperty(B::BunchKaufman{T,<:StridedMatrix}, d::Symbol) where {T<:BlasFloat}
-    n = size(B, 1)
-    if d === :p
-        return _ipiv2perm_bk(getfield(B, :ipiv), n, getfield(B, :uplo), B.rook)
-    elseif d === :P
-        return Matrix{T}(I, n, n)[:,invperm(B.p)]
-    elseif d === :L || d === :U || d === :D
-        if getfield(B, :rook)
-            LUD, od = LAPACK.syconvf_rook!(getfield(B, :uplo), 'C', copy(getfield(B, :LD)), getfield(B, :ipiv))
-        else
-            LUD, od = LAPACK.syconv!(getfield(B, :uplo), copy(getfield(B, :LD)), getfield(B, :ipiv))
-        end
-        if d === :D
-            if getfield(B, :uplo) == 'L'
-                odl = od[1:n - 1]
-                return Tridiagonal(odl, diag(LUD), getfield(B, :symmetric) ? odl : conj.(odl))
-            else # 'U'
-                odu = od[2:n]
-                return Tridiagonal(getfield(B, :symmetric) ? odu : conj.(odu), diag(LUD), odu)
-            end
-        elseif d === :L
-            if getfield(B, :uplo) == 'L'
-                return UnitLowerTriangular(LUD)
-            else
-                throw(ArgumentError("factorization is U*D*transpose(U) but you requested L"))
-            end
-        else # :U
-            if B.uplo == 'U'
-                return UnitUpperTriangular(LUD)
-            else
-                throw(ArgumentError("factorization is L*D*transpose(L) but you requested U"))
-            end
-        end
-    else
-        getfield(B, d)
-    end
-end
-
-Base.propertynames(B::BunchKaufman, private::Bool=false) =
-    (:p, :P, :L, :U, :D, (private ? fieldnames(typeof(B)) : ())...)
-
-issuccess(B::BunchKaufman) = B.info == 0
-
-function adjoint(B::BunchKaufman)
-    if ishermitian(B)
-        return B
-    else
-        throw(ArgumentError("adjoint not implemented for complex symmetric matrices"))
-    end
-end
-
-function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, B::BunchKaufman)
-    if issuccess(B)
-        summary(io, B); println(io)
-        println(io, "D factor:")
-        show(io, mime, B.D)
-        println(io, "\n$(B.uplo) factor:")
-        show(io, mime, B.uplo == 'L' ? B.L : B.U)
-        println(io, "\npermutation:")
-        show(io, mime, B.p)
-    else
-        print(io, "Failed factorization of type $(typeof(B))")
-    end
-end
-
-function inv(B::BunchKaufman{<:BlasReal,<:StridedMatrix})
-    if B.rook
-        copytri!(LAPACK.sytri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
-    else
-        copytri!(LAPACK.sytri!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
-    end
-end
-
-function inv(B::BunchKaufman{<:BlasComplex,<:StridedMatrix})
-    if issymmetric(B)
-        if B.rook
-            copytri!(LAPACK.sytri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo)
-        else
-            copytri!(LAPACK.sytri!(B.uplo, copy(B.LD), B.ipiv), B.uplo)
-        end
-    else
-        if B.rook
-            copytri!(LAPACK.hetri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
-        else
-            copytri!(LAPACK.hetri!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
-        end
-    end
-end
-
-function ldiv!(B::BunchKaufman{T,<:StridedMatrix}, R::StridedVecOrMat{T}) where {T<:BlasReal}
-    if B.rook
-        LAPACK.sytrs_rook!(B.uplo, B.LD, B.ipiv, R)
-    else
-        LAPACK.sytrs!(B.uplo, B.LD, B.ipiv, R)
-    end
-end
-function ldiv!(B::BunchKaufman{T,<:StridedMatrix}, R::StridedVecOrMat{T}) where {T<:BlasComplex}
-    if B.rook
-        if issymmetric(B)
-            LAPACK.sytrs_rook!(B.uplo, B.LD, B.ipiv, R)
-        else
-            LAPACK.hetrs_rook!(B.uplo, B.LD, B.ipiv, R)
-        end
-    else
-        if issymmetric(B)
-            LAPACK.sytrs!(B.uplo, B.LD, B.ipiv, R)
-        else
-            LAPACK.hetrs!(B.uplo, B.LD, B.ipiv, R)
-        end
-    end
-end
-
-function logabsdet(F::BunchKaufman)
-    M = F.LD
-    p = F.ipiv
-    n = size(F.LD, 1)
-
-    if !issuccess(F)
-        return eltype(F)(-Inf), zero(eltype(F))
-    end
-    s = one(real(eltype(F)))
-    i = 1
-    abs_det = zero(real(eltype(F)))
-    while i <= n
-        if p[i] > 0
-            elm = M[i,i]
-            s *= sign(elm)
-            abs_det += log(abs(elm))
-            i += 1
-        else
-            # 2x2 pivot case. Make sure not to square before the subtraction by scaling
-            # with the off-diagonal element. This is safe because the off diagonal is
-            # always large for 2x2 pivots.
-            if F.uplo == 'U'
-                elm = M[i, i + 1]*(M[i,i]/M[i, i + 1]*M[i + 1, i + 1] -
-                    (issymmetric(F) ? M[i, i + 1] : conj(M[i, i + 1])))
-                s *= sign(elm)
-                abs_det += log(abs(elm))
-            else
-                elm = M[i + 1,i]*(M[i, i]/M[i + 1, i]*M[i + 1, i + 1] -
-                    (issymmetric(F) ? M[i + 1, i] : conj(M[i + 1, i])))
-                s *= sign(elm)
-                abs_det += log(abs(elm))
-            end
-            i += 2
-        end
-    end
-    return abs_det, s
-end
-
-## reconstruct the original matrix
-## TODO: understand the procedure described at
-## http://www.nag.com/numeric/FL/nagdoc_fl22/pdf/F07/f07mdf.pdf
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
deleted file mode 100644
index 82f138db7d7b9..0000000000000
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ /dev/null
@@ -1,835 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-##########################
-# Cholesky Factorization #
-##########################
-
-# The dispatch structure in the cholesky, and cholesky! methods is a bit
-# complicated and some explanation is therefore provided in the following
-#
-# In the methods below, LAPACK is called when possible, i.e. StridedMatrices with Float32,
-# Float64, ComplexF32, and ComplexF64 element types. For other element or
-# matrix types, the unblocked Julia implementation in _chol! is used. For cholesky
-# and cholesky! pivoting is supported through a RowMaximum() argument. A type argument is
-# necessary for type stability since the output of cholesky and cholesky! is either
-# Cholesky or CholeskyPivoted. The latter is only
-# supported for the four LAPACK element types. For other types, e.g. BigFloats RowMaximum() will
-# give an error. It is required that the input is Hermitian (including real symmetric) either
-# through the Hermitian and Symmetric views or exact symmetric or Hermitian elements which
-# is checked for and an error is thrown if the check fails.
-
-# The internal structure is as follows
-# - _chol! returns the factor and info without checking positive definiteness
-# - cholesky/cholesky! returns Cholesky without checking positive definiteness
-
-# FixMe? The dispatch below seems overly complicated. One simplification could be to
-# merge the two Cholesky types into one. It would remove the need for Val completely but
-# the cost would be extra unnecessary/unused fields for the unpivoted Cholesky and runtime
-# checks of those fields before calls to LAPACK to check which version of the Cholesky
-# factorization the type represents.
-"""
-    Cholesky <: Factorization
-
-Matrix factorization type of the Cholesky factorization of a dense symmetric/Hermitian
-positive definite matrix `A`. This is the return type of [`cholesky`](@ref),
-the corresponding matrix factorization function.
-
-The triangular Cholesky factor can be obtained from the factorization `F::Cholesky`
-via `F.L` and `F.U`, where `A ≈ F.U' * F.U ≈ F.L * F.L'`.
-
-The following functions are available for `Cholesky` objects: [`size`](@ref), [`\\`](@ref),
-[`inv`](@ref), [`det`](@ref), [`logdet`](@ref) and [`isposdef`](@ref).
-
-Iterating the decomposition produces the components `L` and `U`.
-
-# Examples
-```jldoctest
-julia> A = [4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-3×3 Matrix{Float64}:
-   4.0   12.0  -16.0
-  12.0   37.0  -43.0
- -16.0  -43.0   98.0
-
-julia> C = cholesky(A)
-Cholesky{Float64, Matrix{Float64}}
-U factor:
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 2.0  6.0  -8.0
-  ⋅   1.0   5.0
-  ⋅    ⋅    3.0
-
-julia> C.U
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 2.0  6.0  -8.0
-  ⋅   1.0   5.0
-  ⋅    ⋅    3.0
-
-julia> C.L
-3×3 LowerTriangular{Float64, Matrix{Float64}}:
-  2.0   ⋅    ⋅
-  6.0  1.0   ⋅
- -8.0  5.0  3.0
-
-julia> C.L * C.U == A
-true
-
-julia> l, u = C; # destructuring via iteration
-
-julia> l == C.L && u == C.U
-true
-```
-"""
-struct Cholesky{T,S<:AbstractMatrix} <: Factorization{T}
-    factors::S
-    uplo::Char
-    info::BlasInt
-
-    function Cholesky{T,S}(factors, uplo, info) where {T,S<:AbstractMatrix}
-        require_one_based_indexing(factors)
-        new(factors, uplo, info)
-    end
-end
-Cholesky(A::AbstractMatrix{T}, uplo::Symbol, info::Integer) where {T} =
-    Cholesky{T,typeof(A)}(A, char_uplo(uplo), info)
-Cholesky(A::AbstractMatrix{T}, uplo::AbstractChar, info::Integer) where {T} =
-    Cholesky{T,typeof(A)}(A, uplo, info)
-Cholesky(U::UpperTriangular{T}) where {T} = Cholesky{T,typeof(U.data)}(U.data, 'U', 0)
-Cholesky(L::LowerTriangular{T}) where {T} = Cholesky{T,typeof(L.data)}(L.data, 'L', 0)
-
-# iteration for destructuring into components
-Base.iterate(C::Cholesky) = (C.L, Val(:U))
-Base.iterate(C::Cholesky, ::Val{:U}) = (C.U, Val(:done))
-Base.iterate(C::Cholesky, ::Val{:done}) = nothing
-
-
-"""
-    CholeskyPivoted
-
-Matrix factorization type of the pivoted Cholesky factorization of a dense symmetric/Hermitian
-positive semi-definite matrix `A`. This is the return type of [`cholesky(_, ::RowMaximum)`](@ref),
-the corresponding matrix factorization function.
-
-The triangular Cholesky factor can be obtained from the factorization `F::CholeskyPivoted`
-via `F.L` and `F.U`, and the permutation via `F.p`, where `A[F.p, F.p] ≈ Ur' * Ur ≈ Lr * Lr'`
-with `Ur = F.U[1:F.rank, :]` and `Lr = F.L[:, 1:F.rank]`, or alternatively
-`A ≈ Up' * Up ≈ Lp * Lp'` with `Up = F.U[1:F.rank, invperm(F.p)]` and
-`Lp = F.L[invperm(F.p), 1:F.rank]`.
-
-The following functions are available for `CholeskyPivoted` objects:
-[`size`](@ref), [`\\`](@ref), [`inv`](@ref), [`det`](@ref), and [`rank`](@ref).
-
-Iterating the decomposition produces the components `L` and `U`.
-
-# Examples
-```jldoctest
-julia> X = [1.0, 2.0, 3.0, 4.0];
-
-julia> A = X * X';
-
-julia> C = cholesky(A, RowMaximum(), check = false)
-CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}
-U factor with rank 1:
-4×4 UpperTriangular{Float64, Matrix{Float64}}:
- 4.0  2.0  3.0  1.0
-  ⋅   0.0  6.0  2.0
-  ⋅    ⋅   9.0  3.0
-  ⋅    ⋅    ⋅   1.0
-permutation:
-4-element Vector{Int64}:
- 4
- 2
- 3
- 1
-
-julia> C.U[1:C.rank, :]' * C.U[1:C.rank, :] ≈ A[C.p, C.p]
-true
-
-julia> l, u = C; # destructuring via iteration
-
-julia> l == C.L && u == C.U
-true
-```
-"""
-struct CholeskyPivoted{T,S<:AbstractMatrix,P<:AbstractVector{<:Integer}} <: Factorization{T}
-    factors::S
-    uplo::Char
-    piv::P
-    rank::BlasInt
-    tol::Real
-    info::BlasInt
-
-    function CholeskyPivoted{T,S,P}(factors, uplo, piv, rank, tol, info) where {T,S<:AbstractMatrix,P<:AbstractVector}
-        require_one_based_indexing(factors)
-        new{T,S,P}(factors, uplo, piv, rank, tol, info)
-    end
-end
-CholeskyPivoted(A::AbstractMatrix{T}, uplo::AbstractChar, piv::AbstractVector{<:Integer},
-                rank::Integer, tol::Real, info::Integer) where T =
-    CholeskyPivoted{T,typeof(A),typeof(piv)}(A, uplo, piv, rank, tol, info)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(CholeskyPivoted{T,S}(factors, uplo, piv, rank, tol, info) where {T,S<:AbstractMatrix},
-           CholeskyPivoted{T,S,typeof(piv)}(factors, uplo, piv, rank, tol, info), false)
-
-
-# iteration for destructuring into components
-Base.iterate(C::CholeskyPivoted) = (C.L, Val(:U))
-Base.iterate(C::CholeskyPivoted, ::Val{:U}) = (C.U, Val(:done))
-Base.iterate(C::CholeskyPivoted, ::Val{:done}) = nothing
-
-
-# make a copy that allow inplace Cholesky factorization
-choltype(A) = promote_type(typeof(sqrt(oneunit(eltype(A)))), Float32)
-cholcopy(A::AbstractMatrix) = eigencopy_oftype(A, choltype(A))
-
-# _chol!. Internal methods for calling unpivoted Cholesky
-## BLAS/LAPACK element types
-function _chol!(A::StridedMatrix{<:BlasFloat}, ::Type{UpperTriangular})
-    C, info = LAPACK.potrf!('U', A)
-    return UpperTriangular(C), info
-end
-function _chol!(A::StridedMatrix{<:BlasFloat}, ::Type{LowerTriangular})
-    C, info = LAPACK.potrf!('L', A)
-    return LowerTriangular(C), info
-end
-
-## Non BLAS/LAPACK element types (generic)
-function _chol!(A::AbstractMatrix, ::Type{UpperTriangular})
-    require_one_based_indexing(A)
-    n = checksquare(A)
-    realdiag = eltype(A) <: Complex
-    @inbounds begin
-        for k = 1:n
-            Akk = realdiag ? real(A[k,k]) : A[k,k]
-            for i = 1:k - 1
-                Akk -= realdiag ? abs2(A[i,k]) : A[i,k]'A[i,k]
-            end
-            A[k,k] = Akk
-            Akk, info = _chol!(Akk, UpperTriangular)
-            if info != 0
-                return UpperTriangular(A), convert(BlasInt, k)
-            end
-            A[k,k] = Akk
-            AkkInv = inv(copy(Akk'))
-            for j = k + 1:n
-                for i = 1:k - 1
-                    A[k,j] -= A[i,k]'A[i,j]
-                end
-                A[k,j] = AkkInv*A[k,j]
-            end
-        end
-    end
-    return UpperTriangular(A), convert(BlasInt, 0)
-end
-function _chol!(A::AbstractMatrix, ::Type{LowerTriangular})
-    require_one_based_indexing(A)
-    n = checksquare(A)
-    realdiag = eltype(A) <: Complex
-    @inbounds begin
-        for k = 1:n
-            Akk = realdiag ? real(A[k,k]) : A[k,k]
-            for i = 1:k - 1
-                Akk -= realdiag ? abs2(A[k,i]) : A[k,i]*A[k,i]'
-            end
-            A[k,k] = Akk
-            Akk, info = _chol!(Akk, LowerTriangular)
-            if info != 0
-                return LowerTriangular(A), convert(BlasInt, k)
-            end
-            A[k,k] = Akk
-            AkkInv = inv(Akk)
-            for j = 1:k - 1
-                @simd for i = k + 1:n
-                    A[i,k] -= A[i,j]*A[k,j]'
-                end
-            end
-            for i = k + 1:n
-                A[i,k] *= AkkInv'
-            end
-        end
-     end
-    return LowerTriangular(A), convert(BlasInt, 0)
-end
-
-## Numbers
-function _chol!(x::Number, _)
-    rx = real(x)
-    iszero(rx) && return (rx, convert(BlasInt, 1))
-    rxr = sqrt(abs(rx))
-    rval =  convert(promote_type(typeof(x), typeof(rxr)), rxr)
-    return (rval, convert(BlasInt, rx != abs(x)))
-end
-
-## for StridedMatrices, check that matrix is symmetric/Hermitian
-
-# cholesky!. Destructive methods for computing Cholesky factorization of real symmetric
-# or Hermitian matrix
-## No pivoting (default)
-function cholesky!(A::RealHermSymComplexHerm, ::NoPivot = NoPivot(); check::Bool = true)
-    C, info = _chol!(A.data, A.uplo == 'U' ? UpperTriangular : LowerTriangular)
-    check && checkpositivedefinite(info)
-    return Cholesky(C.data, A.uplo, info)
-end
-
-### for AbstractMatrix, check that matrix is symmetric/Hermitian
-"""
-    cholesky!(A::AbstractMatrix, NoPivot(); check = true) -> Cholesky
-
-The same as [`cholesky`](@ref), but saves space by overwriting the input `A`,
-instead of creating a copy. An [`InexactError`](@ref) exception is thrown if
-the factorization produces a number not representable by the element type of
-`A`, e.g. for integer types.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 2 50]
-2×2 Matrix{Int64}:
- 1   2
- 2  50
-
-julia> cholesky!(A)
-ERROR: InexactError: Int64(6.782329983125268)
-Stacktrace:
-[...]
-```
-"""
-function cholesky!(A::AbstractMatrix, ::NoPivot = NoPivot(); check::Bool = true)
-    checksquare(A)
-    if !ishermitian(A) # return with info = -1 if not Hermitian
-        check && checkpositivedefinite(-1)
-        return Cholesky(A, 'U', convert(BlasInt, -1))
-    else
-        return cholesky!(Hermitian(A), NoPivot(); check = check)
-    end
-end
-@deprecate cholesky!(A::StridedMatrix, ::Val{false}; check::Bool = true) cholesky!(A, NoPivot(); check) false
-@deprecate cholesky!(A::RealHermSymComplexHerm, ::Val{false}; check::Bool = true) cholesky!(A, NoPivot(); check) false
-
-## With pivoting
-### BLAS/LAPACK element types
-function cholesky!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix},
-                   ::RowMaximum; tol = 0.0, check::Bool = true)
-    AA, piv, rank, info = LAPACK.pstrf!(A.uplo, A.data, tol)
-    C = CholeskyPivoted{eltype(AA),typeof(AA),typeof(piv)}(AA, A.uplo, piv, rank, tol, info)
-    check && chkfullrank(C)
-    return C
-end
-@deprecate cholesky!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
-
-### Non BLAS/LAPACK element types (generic). Since generic fallback for pivoted Cholesky
-### is not implemented yet we throw an error
-cholesky!(A::RealHermSymComplexHerm{<:Real}, ::RowMaximum; tol = 0.0, check::Bool = true) =
-    throw(ArgumentError("generic pivoted Cholesky factorization is not implemented yet"))
-@deprecate cholesky!(A::RealHermSymComplexHerm{<:Real}, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
-
-### for AbstractMatrix, check that matrix is symmetric/Hermitian
-"""
-    cholesky!(A::AbstractMatrix, RowMaximum(); tol = 0.0, check = true) -> CholeskyPivoted
-
-The same as [`cholesky`](@ref), but saves space by overwriting the input `A`,
-instead of creating a copy. An [`InexactError`](@ref) exception is thrown if the
-factorization produces a number not representable by the element type of `A`,
-e.g. for integer types.
-"""
-function cholesky!(A::AbstractMatrix, ::RowMaximum; tol = 0.0, check::Bool = true)
-    checksquare(A)
-    if !ishermitian(A)
-        C = CholeskyPivoted(A, 'U', Vector{BlasInt}(),convert(BlasInt, 1),
-                            tol, convert(BlasInt, -1))
-        check && chkfullrank(C)
-        return C
-    else
-        return cholesky!(Hermitian(A), RowMaximum(); tol = tol, check = check)
-    end
-end
-@deprecate cholesky!(A::StridedMatrix, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
-
-# cholesky. Non-destructive methods for computing Cholesky factorization of real symmetric
-# or Hermitian matrix
-## No pivoting (default)
-"""
-    cholesky(A, NoPivot(); check = true) -> Cholesky
-
-Compute the Cholesky factorization of a dense symmetric positive definite matrix `A`
-and return a [`Cholesky`](@ref) factorization. The matrix `A` can either be a [`Symmetric`](@ref) or [`Hermitian`](@ref)
-[`AbstractMatrix`](@ref) or a *perfectly* symmetric or Hermitian `AbstractMatrix`.
-
-The triangular Cholesky factor can be obtained from the factorization `F` via `F.L` and `F.U`,
-where `A ≈ F.U' * F.U ≈ F.L * F.L'`.
-
-The following functions are available for `Cholesky` objects: [`size`](@ref), [`\\`](@ref),
-[`inv`](@ref), [`det`](@ref), [`logdet`](@ref) and [`isposdef`](@ref).
-
-If you have a matrix `A` that is slightly non-Hermitian due to roundoff errors in its construction,
-wrap it in `Hermitian(A)` before passing it to `cholesky` in order to treat it as perfectly Hermitian.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-# Examples
-```jldoctest
-julia> A = [4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-3×3 Matrix{Float64}:
-   4.0   12.0  -16.0
-  12.0   37.0  -43.0
- -16.0  -43.0   98.0
-
-julia> C = cholesky(A)
-Cholesky{Float64, Matrix{Float64}}
-U factor:
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 2.0  6.0  -8.0
-  ⋅   1.0   5.0
-  ⋅    ⋅    3.0
-
-julia> C.U
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 2.0  6.0  -8.0
-  ⋅   1.0   5.0
-  ⋅    ⋅    3.0
-
-julia> C.L
-3×3 LowerTriangular{Float64, Matrix{Float64}}:
-  2.0   ⋅    ⋅
-  6.0  1.0   ⋅
- -8.0  5.0  3.0
-
-julia> C.L * C.U == A
-true
-```
-"""
-cholesky(A::AbstractMatrix, ::NoPivot=NoPivot(); check::Bool = true) =
-    cholesky!(cholcopy(A); check)
-@deprecate cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}}, ::Val{false}; check::Bool = true) cholesky(A, NoPivot(); check) false
-
-function cholesky(A::AbstractMatrix{Float16}, ::NoPivot=NoPivot(); check::Bool = true)
-    X = cholesky!(cholcopy(A); check = check)
-    return Cholesky{Float16}(X)
-end
-@deprecate cholesky(A::Union{StridedMatrix{Float16},RealHermSymComplexHerm{Float16,<:StridedMatrix}}, ::Val{false}; check::Bool = true) cholesky(A, NoPivot(); check) false
-
-## With pivoting
-"""
-    cholesky(A, RowMaximum(); tol = 0.0, check = true) -> CholeskyPivoted
-
-Compute the pivoted Cholesky factorization of a dense symmetric positive semi-definite matrix `A`
-and return a [`CholeskyPivoted`](@ref) factorization. The matrix `A` can either be a [`Symmetric`](@ref)
-or [`Hermitian`](@ref) [`AbstractMatrix`](@ref) or a *perfectly* symmetric or Hermitian `AbstractMatrix`.
-
-The triangular Cholesky factor can be obtained from the factorization `F` via `F.L` and `F.U`,
-and the permutation via `F.p`, where `A[F.p, F.p] ≈ Ur' * Ur ≈ Lr * Lr'` with `Ur = F.U[1:F.rank, :]`
-and `Lr = F.L[:, 1:F.rank]`, or alternatively `A ≈ Up' * Up ≈ Lp * Lp'` with
-`Up = F.U[1:F.rank, invperm(F.p)]` and `Lp = F.L[invperm(F.p), 1:F.rank]`.
-
-The following functions are available for `CholeskyPivoted` objects:
-[`size`](@ref), [`\\`](@ref), [`inv`](@ref), [`det`](@ref), and [`rank`](@ref).
-
-The argument `tol` determines the tolerance for determining the rank.
-For negative values, the tolerance is the machine precision.
-
-If you have a matrix `A` that is slightly non-Hermitian due to roundoff errors in its construction,
-wrap it in `Hermitian(A)` before passing it to `cholesky` in order to treat it as perfectly Hermitian.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-# Examples
-```jldoctest
-julia> X = [1.0, 2.0, 3.0, 4.0];
-
-julia> A = X * X';
-
-julia> C = cholesky(A, RowMaximum(), check = false)
-CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}
-U factor with rank 1:
-4×4 UpperTriangular{Float64, Matrix{Float64}}:
- 4.0  2.0  3.0  1.0
-  ⋅   0.0  6.0  2.0
-  ⋅    ⋅   9.0  3.0
-  ⋅    ⋅    ⋅   1.0
-permutation:
-4-element Vector{Int64}:
- 4
- 2
- 3
- 1
-
-julia> C.U[1:C.rank, :]' * C.U[1:C.rank, :] ≈ A[C.p, C.p]
-true
-
-julia> l, u = C; # destructuring via iteration
-
-julia> l == C.L && u == C.U
-true
-```
-"""
-cholesky(A::AbstractMatrix, ::RowMaximum; tol = 0.0, check::Bool = true) =
-    cholesky!(cholcopy(A), RowMaximum(); tol, check)
-@deprecate cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}}, ::Val{true}; tol = 0.0, check::Bool = true) cholesky(A, RowMaximum(); tol, check) false
-
-function cholesky(A::AbstractMatrix{Float16}, ::RowMaximum; tol = 0.0, check::Bool = true)
-    X = cholesky!(cholcopy(A), RowMaximum(); tol, check)
-    return CholeskyPivoted{Float16}(X)
-end
-
-## Number
-function cholesky(x::Number, uplo::Symbol=:U)
-    C, info = _chol!(x, uplo)
-    xf = fill(C, 1, 1)
-    Cholesky(xf, uplo, info)
-end
-
-
-function Cholesky{T}(C::Cholesky) where T
-    Cnew = convert(AbstractMatrix{T}, C.factors)
-    Cholesky{T, typeof(Cnew)}(Cnew, C.uplo, C.info)
-end
-Factorization{T}(C::Cholesky{T}) where {T} = C
-Factorization{T}(C::Cholesky) where {T} = Cholesky{T}(C)
-CholeskyPivoted{T}(C::CholeskyPivoted{T}) where {T} = C
-CholeskyPivoted{T}(C::CholeskyPivoted) where {T} =
-    CholeskyPivoted(AbstractMatrix{T}(C.factors),C.uplo,C.piv,C.rank,C.tol,C.info)
-Factorization{T}(C::CholeskyPivoted{T}) where {T} = C
-Factorization{T}(C::CholeskyPivoted) where {T} = CholeskyPivoted{T}(C)
-
-AbstractMatrix(C::Cholesky) = C.uplo == 'U' ? C.U'C.U : C.L*C.L'
-AbstractArray(C::Cholesky) = AbstractMatrix(C)
-Matrix(C::Cholesky) = Array(AbstractArray(C))
-Array(C::Cholesky) = Matrix(C)
-
-function AbstractMatrix(F::CholeskyPivoted)
-    ip = invperm(F.p)
-    U = F.U[1:F.rank,ip]
-    U'U
-end
-AbstractArray(F::CholeskyPivoted) = AbstractMatrix(F)
-Matrix(F::CholeskyPivoted) = Array(AbstractArray(F))
-Array(F::CholeskyPivoted) = Matrix(F)
-
-copy(C::Cholesky) = Cholesky(copy(C.factors), C.uplo, C.info)
-copy(C::CholeskyPivoted) = CholeskyPivoted(copy(C.factors), C.uplo, C.piv, C.rank, C.tol, C.info)
-
-size(C::Union{Cholesky, CholeskyPivoted}) = size(C.factors)
-size(C::Union{Cholesky, CholeskyPivoted}, d::Integer) = size(C.factors, d)
-
-function getproperty(C::Cholesky, d::Symbol)
-    Cfactors = getfield(C, :factors)
-    Cuplo    = getfield(C, :uplo)
-    if d === :U
-        return UpperTriangular(Cuplo === char_uplo(d) ? Cfactors : copy(Cfactors'))
-    elseif d === :L
-        return LowerTriangular(Cuplo === char_uplo(d) ? Cfactors : copy(Cfactors'))
-    elseif d === :UL
-        return (Cuplo === 'U' ? UpperTriangular(Cfactors) : LowerTriangular(Cfactors))
-    else
-        return getfield(C, d)
-    end
-end
-Base.propertynames(F::Cholesky, private::Bool=false) =
-    (:U, :L, :UL, (private ? fieldnames(typeof(F)) : ())...)
-
-function getproperty(C::CholeskyPivoted{T}, d::Symbol) where {T}
-    Cfactors = getfield(C, :factors)
-    Cuplo    = getfield(C, :uplo)
-    if d === :U
-        return UpperTriangular(sym_uplo(Cuplo) == d ? Cfactors : copy(Cfactors'))
-    elseif d === :L
-        return LowerTriangular(sym_uplo(Cuplo) == d ? Cfactors : copy(Cfactors'))
-    elseif d === :p
-        return getfield(C, :piv)
-    elseif d === :P
-        n = size(C, 1)
-        P = zeros(T, n, n)
-        for i = 1:n
-            P[getfield(C, :piv)[i], i] = one(T)
-        end
-        return P
-    else
-        return getfield(C, d)
-    end
-end
-Base.propertynames(F::CholeskyPivoted, private::Bool=false) =
-    (:U, :L, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
-
-issuccess(C::Union{Cholesky,CholeskyPivoted}) = C.info == 0
-
-adjoint(C::Union{Cholesky,CholeskyPivoted}) = C
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, C::Cholesky)
-    if issuccess(C)
-        summary(io, C); println(io)
-        println(io, "$(C.uplo) factor:")
-        show(io, mime, C.UL)
-    else
-        print(io, "Failed factorization of type $(typeof(C))")
-    end
-end
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, C::CholeskyPivoted)
-    summary(io, C); println(io)
-    println(io, "$(C.uplo) factor with rank $(rank(C)):")
-    show(io, mime, C.uplo == 'U' ? C.U : C.L)
-    println(io, "\npermutation:")
-    show(io, mime, C.p)
-end
-
-ldiv!(C::Cholesky{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.potrs!(C.uplo, C.factors, B)
-
-function ldiv!(C::Cholesky, B::AbstractVecOrMat)
-    if C.uplo == 'L'
-        return ldiv!(adjoint(LowerTriangular(C.factors)), ldiv!(LowerTriangular(C.factors), B))
-    else
-        return ldiv!(UpperTriangular(C.factors), ldiv!(adjoint(UpperTriangular(C.factors)), B))
-    end
-end
-
-function ldiv!(C::CholeskyPivoted{T,<:StridedMatrix}, B::StridedVector{T}) where T<:BlasFloat
-    invpermute!(LAPACK.potrs!(C.uplo, C.factors, permute!(B, C.piv)), C.piv)
-end
-function ldiv!(C::CholeskyPivoted{T,<:StridedMatrix}, B::StridedMatrix{T}) where T<:BlasFloat
-    n = size(C, 1)
-    for i=1:size(B, 2)
-        permute!(view(B, 1:n, i), C.piv)
-    end
-    LAPACK.potrs!(C.uplo, C.factors, B)
-    for i=1:size(B, 2)
-        invpermute!(view(B, 1:n, i), C.piv)
-    end
-    B
-end
-
-function ldiv!(C::CholeskyPivoted, B::AbstractVector)
-    if C.uplo == 'L'
-        ldiv!(adjoint(LowerTriangular(C.factors)),
-            ldiv!(LowerTriangular(C.factors), permute!(B, C.piv)))
-    else
-        ldiv!(UpperTriangular(C.factors),
-            ldiv!(adjoint(UpperTriangular(C.factors)), permute!(B, C.piv)))
-    end
-    invpermute!(B, C.piv)
-end
-
-function ldiv!(C::CholeskyPivoted, B::AbstractMatrix)
-    n = size(C, 1)
-    for i in 1:size(B, 2)
-        permute!(view(B, 1:n, i), C.piv)
-    end
-    if C.uplo == 'L'
-        ldiv!(adjoint(LowerTriangular(C.factors)),
-            ldiv!(LowerTriangular(C.factors), B))
-    else
-        ldiv!(UpperTriangular(C.factors),
-            ldiv!(adjoint(UpperTriangular(C.factors)), B))
-    end
-    for i in 1:size(B, 2)
-        invpermute!(view(B, 1:n, i), C.piv)
-    end
-    B
-end
-
-function rdiv!(B::AbstractMatrix, C::Cholesky)
-    if C.uplo == 'L'
-        return rdiv!(rdiv!(B, adjoint(LowerTriangular(C.factors))), LowerTriangular(C.factors))
-    else
-        return rdiv!(rdiv!(B, UpperTriangular(C.factors)), adjoint(UpperTriangular(C.factors)))
-    end
-end
-
-function LinearAlgebra.rdiv!(B::AbstractMatrix, C::CholeskyPivoted)
-    n = size(C, 2)
-    for i in 1:size(B, 1)
-        permute!(view(B, i, 1:n), C.piv)
-    end
-    if C.uplo == 'L'
-        rdiv!(rdiv!(B, adjoint(LowerTriangular(C.factors))),
-            LowerTriangular(C.factors))
-    else
-        rdiv!(rdiv!(B, UpperTriangular(C.factors)),
-            adjoint(UpperTriangular(C.factors)))
-    end
-    for i in 1:size(B, 1)
-        invpermute!(view(B, i, 1:n), C.piv)
-    end
-    B
-end
-
-isposdef(C::Union{Cholesky,CholeskyPivoted}) = C.info == 0
-
-function det(C::Cholesky)
-    dd = one(real(eltype(C)))
-    @inbounds for i in 1:size(C.factors,1)
-        dd *= real(C.factors[i,i])^2
-    end
-    return dd
-end
-
-function logdet(C::Cholesky)
-    dd = zero(real(eltype(C)))
-    @inbounds for i in 1:size(C.factors,1)
-        dd += log(real(C.factors[i,i]))
-    end
-    dd + dd # instead of 2.0dd which can change the type
-end
-
-function det(C::CholeskyPivoted)
-    if C.rank < size(C.factors, 1)
-        return zero(real(eltype(C)))
-    else
-        dd = one(real(eltype(C)))
-        for i in 1:size(C.factors,1)
-            dd *= real(C.factors[i,i])^2
-        end
-        return dd
-    end
-end
-
-function logdet(C::CholeskyPivoted)
-    if C.rank < size(C.factors, 1)
-        return real(eltype(C))(-Inf)
-    else
-        dd = zero(real(eltype(C)))
-        for i in 1:size(C.factors,1)
-            dd += log(real(C.factors[i,i]))
-        end
-        return dd + dd # instead of 2.0dd which can change the type
-    end
-end
-
-logabsdet(C::Union{Cholesky, CholeskyPivoted}) = logdet(C), one(eltype(C)) # since C is p.s.d.
-
-inv!(C::Cholesky{<:BlasFloat,<:StridedMatrix}) =
-    copytri!(LAPACK.potri!(C.uplo, C.factors), C.uplo, true)
-
-inv(C::Cholesky{<:BlasFloat,<:StridedMatrix}) = inv!(copy(C))
-
-function inv(C::CholeskyPivoted{<:BlasFloat,<:StridedMatrix})
-    ipiv = invperm(C.piv)
-    copytri!(LAPACK.potri!(C.uplo, copy(C.factors)), C.uplo, true)[ipiv, ipiv]
-end
-
-function chkfullrank(C::CholeskyPivoted)
-    if C.rank < size(C.factors, 1)
-        throw(RankDeficientException(C.info))
-    end
-end
-
-rank(C::CholeskyPivoted) = C.rank
-
-"""
-    lowrankupdate!(C::Cholesky, v::AbstractVector) -> CC::Cholesky
-
-Update a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U` then
-`CC = cholesky(C.U'C.U + v*v')` but the computation of `CC` only uses `O(n^2)`
-operations. The input factorization `C` is updated in place such that on exit `C == CC`.
-The vector `v` is destroyed during the computation.
-"""
-function lowrankupdate!(C::Cholesky, v::AbstractVector)
-    A = C.factors
-    n = length(v)
-    if size(C, 1) != n
-        throw(DimensionMismatch("updating vector must fit size of factorization"))
-    end
-    if C.uplo == 'U'
-        conj!(v)
-    end
-
-    for i = 1:n
-
-        # Compute Givens rotation
-        c, s, r = givensAlgorithm(A[i,i], v[i])
-
-        # Store new diagonal element
-        A[i,i] = r
-
-        # Update remaining elements in row/column
-        if C.uplo == 'U'
-            for j = i + 1:n
-                Aij = A[i,j]
-                vj  = v[j]
-                A[i,j]  =   c*Aij + s*vj
-                v[j]    = -s'*Aij + c*vj
-            end
-        else
-            for j = i + 1:n
-                Aji = A[j,i]
-                vj  = v[j]
-                A[j,i]  =   c*Aji + s*vj
-                v[j]    = -s'*Aji + c*vj
-            end
-        end
-    end
-    return C
-end
-
-"""
-    lowrankdowndate!(C::Cholesky, v::AbstractVector) -> CC::Cholesky
-
-Downdate a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U` then
-`CC = cholesky(C.U'C.U - v*v')` but the computation of `CC` only uses `O(n^2)`
-operations. The input factorization `C` is updated in place such that on exit `C == CC`.
-The vector `v` is destroyed during the computation.
-"""
-function lowrankdowndate!(C::Cholesky, v::AbstractVector)
-    A = C.factors
-    n = length(v)
-    if size(C, 1) != n
-        throw(DimensionMismatch("updating vector must fit size of factorization"))
-    end
-    if C.uplo == 'U'
-        conj!(v)
-    end
-
-    for i = 1:n
-
-        Aii = A[i,i]
-
-        # Compute Givens rotation
-        s = conj(v[i]/Aii)
-        s2 = abs2(s)
-        if s2 > 1
-            throw(LinearAlgebra.PosDefException(i))
-        end
-        c = sqrt(1 - abs2(s))
-
-        # Store new diagonal element
-        A[i,i] = c*Aii
-
-        # Update remaining elements in row/column
-        if C.uplo == 'U'
-            for j = i + 1:n
-                vj = v[j]
-                Aij = (A[i,j] - s*vj)/c
-                A[i,j] = Aij
-                v[j] = -s'*Aij + c*vj
-            end
-        else
-            for j = i + 1:n
-                vj = v[j]
-                Aji = (A[j,i] - s*vj)/c
-                A[j,i] = Aji
-                v[j] = -s'*Aji + c*vj
-            end
-        end
-    end
-    return C
-end
-
-"""
-    lowrankupdate(C::Cholesky, v::AbstractVector) -> CC::Cholesky
-
-Update a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U`
-then `CC = cholesky(C.U'C.U + v*v')` but the computation of `CC` only uses
-`O(n^2)` operations.
-"""
-lowrankupdate(C::Cholesky, v::AbstractVector) = lowrankupdate!(copy(C), copy(v))
-
-"""
-    lowrankdowndate(C::Cholesky, v::AbstractVector) -> CC::Cholesky
-
-Downdate a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U`
-then `CC = cholesky(C.U'C.U - v*v')` but the computation of `CC` only uses
-`O(n^2)` operations.
-"""
-lowrankdowndate(C::Cholesky, v::AbstractVector) = lowrankdowndate!(copy(C), copy(v))
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
deleted file mode 100644
index b8a44159de8bd..0000000000000
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ /dev/null
@@ -1,1695 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Linear algebra functions for dense matrices in column major format
-
-## BLAS cutoff threshold constants
-
-#TODO const DOT_CUTOFF = 128
-const ASUM_CUTOFF = 32
-const NRM2_CUTOFF = 32
-
-# Generic cross-over constant based on benchmarking on a single thread with an i7 CPU @ 2.5GHz
-# L1 cache: 32K, L2 cache: 256K, L3 cache: 6144K
-# This constant should ideally be determined by the actual CPU cache size
-const ISONE_CUTOFF = 2^21 # 2M
-
-function isone(A::AbstractMatrix)
-    m, n = size(A)
-    m != n && return false # only square matrices can satisfy x == one(x)
-    if sizeof(A) < ISONE_CUTOFF
-        _isone_triacheck(A, m)
-    else
-        _isone_cachefriendly(A, m)
-    end
-end
-
-@inline function _isone_triacheck(A::AbstractMatrix, m::Int)
-    @inbounds for i in 1:m, j in i:m
-        if i == j
-            isone(A[i,i]) || return false
-        else
-            iszero(A[i,j]) && iszero(A[j,i]) || return false
-        end
-    end
-    return true
-end
-
-# Inner loop over rows to be friendly to the CPU cache
-@inline function _isone_cachefriendly(A::AbstractMatrix, m::Int)
-    @inbounds for i in 1:m, j in 1:m
-        if i == j
-            isone(A[i,i]) || return false
-        else
-            iszero(A[j,i]) || return false
-        end
-    end
-    return true
-end
-
-
-"""
-    isposdef!(A) -> Bool
-
-Test whether a matrix is positive definite (and Hermitian) by trying to perform a
-Cholesky factorization of `A`, overwriting `A` in the process.
-See also [`isposdef`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1. 2.; 2. 50.];
-
-julia> isposdef!(A)
-true
-
-julia> A
-2×2 Matrix{Float64}:
- 1.0  2.0
- 2.0  6.78233
-```
-"""
-isposdef!(A::AbstractMatrix) =
-    ishermitian(A) && isposdef(cholesky!(Hermitian(A); check = false))
-
-"""
-    isposdef(A) -> Bool
-
-Test whether a matrix is positive definite (and Hermitian) by trying to perform a
-Cholesky factorization of `A`.
-
-See also [`isposdef!`](@ref), [`cholesky`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2; 2 50]
-2×2 Matrix{Int64}:
- 1   2
- 2  50
-
-julia> isposdef(A)
-true
-```
-"""
-isposdef(A::AbstractMatrix) =
-    ishermitian(A) && isposdef(cholesky(Hermitian(A); check = false))
-isposdef(x::Number) = imag(x)==0 && real(x) > 0
-
-function norm(x::StridedVector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}) where {T<:BlasFloat,TI<:Integer}
-    if minimum(rx) < 1 || maximum(rx) > length(x)
-        throw(BoundsError(x, rx))
-    end
-    GC.@preserve x BLAS.nrm2(length(rx), pointer(x)+(first(rx)-1)*sizeof(T), step(rx))
-end
-
-norm1(x::Union{Array{T},StridedVector{T}}) where {T<:BlasReal} =
-    length(x) < ASUM_CUTOFF ? generic_norm1(x) : BLAS.asum(x)
-
-norm2(x::Union{Array{T},StridedVector{T}}) where {T<:BlasFloat} =
-    length(x) < NRM2_CUTOFF ? generic_norm2(x) : BLAS.nrm2(x)
-
-"""
-    triu!(M, k::Integer)
-
-Return the upper triangle of `M` starting from the `k`th superdiagonal,
-overwriting `M` in the process.
-
-# Examples
-```jldoctest
-julia> M = [1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5]
-5×5 Matrix{Int64}:
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
-
-julia> triu!(M, 1)
-5×5 Matrix{Int64}:
- 0  2  3  4  5
- 0  0  3  4  5
- 0  0  0  4  5
- 0  0  0  0  5
- 0  0  0  0  0
-```
-"""
-function triu!(M::AbstractMatrix, k::Integer)
-    require_one_based_indexing(M)
-    m, n = size(M)
-    for j in 1:min(n, m + k)
-        for i in max(1, j - k + 1):m
-            M[i,j] = zero(M[i,j])
-        end
-    end
-    M
-end
-
-triu(M::Matrix, k::Integer) = triu!(copy(M), k)
-
-"""
-    tril!(M, k::Integer)
-
-Return the lower triangle of `M` starting from the `k`th superdiagonal, overwriting `M` in
-the process.
-
-# Examples
-```jldoctest
-julia> M = [1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5]
-5×5 Matrix{Int64}:
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
-
-julia> tril!(M, 2)
-5×5 Matrix{Int64}:
- 1  2  3  0  0
- 1  2  3  4  0
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
-```
-"""
-function tril!(M::AbstractMatrix, k::Integer)
-    require_one_based_indexing(M)
-    m, n = size(M)
-    for j in max(1, k + 1):n
-        @inbounds for i in 1:min(j - k - 1, m)
-            M[i,j] = zero(M[i,j])
-        end
-    end
-    M
-end
-tril(M::Matrix, k::Integer) = tril!(copy(M), k)
-
-"""
-    fillband!(A::AbstractMatrix, x, l, u)
-
-Fill the band between diagonals `l` and `u` with the value `x`.
-"""
-function fillband!(A::AbstractMatrix{T}, x, l, u) where T
-    require_one_based_indexing(A)
-    m, n = size(A)
-    xT = convert(T, x)
-    for j in 1:n
-        for i in max(1,j-u):min(m,j-l)
-            @inbounds A[i, j] = xT
-        end
-    end
-    return A
-end
-
-diagind(m::Integer, n::Integer, k::Integer=0) =
-    k <= 0 ? range(1-k, step=m+1, length=min(m+k, n)) : range(k*m+1, step=m+1, length=min(m, n-k))
-
-"""
-    diagind(M, k::Integer=0)
-
-An `AbstractRange` giving the indices of the `k`th diagonal of the matrix `M`.
-
-See also: [`diag`](@ref), [`diagm`](@ref), [`Diagonal`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2 3; 4 5 6; 7 8 9]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
- 7  8  9
-
-julia> diagind(A,-1)
-2:4:6
-```
-"""
-function diagind(A::AbstractMatrix, k::Integer=0)
-    require_one_based_indexing(A)
-    diagind(size(A,1), size(A,2), k)
-end
-
-"""
-    diag(M, k::Integer=0)
-
-The `k`th diagonal of a matrix, as a vector.
-
-See also [`diagm`](@ref), [`diagind`](@ref), [`Diagonal`](@ref), [`isdiag`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2 3; 4 5 6; 7 8 9]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
- 7  8  9
-
-julia> diag(A,1)
-2-element Vector{Int64}:
- 2
- 6
-```
-"""
-diag(A::AbstractMatrix, k::Integer=0) = A[diagind(A,k)]
-
-"""
-    diagm(kv::Pair{<:Integer,<:AbstractVector}...)
-    diagm(m::Integer, n::Integer, kv::Pair{<:Integer,<:AbstractVector}...)
-
-Construct a matrix from `Pair`s of diagonals and vectors.
-Vector `kv.second` will be placed on the `kv.first` diagonal.
-By default the matrix is square and its size is inferred
-from `kv`, but a non-square size `m`×`n` (padded with zeros as needed)
-can be specified by passing `m,n` as the first arguments.
-For repeated diagonal indices `kv.first` the values in the corresponding
-vectors `kv.second` will be added.
-
-`diagm` constructs a full matrix; if you want storage-efficient
-versions with fast arithmetic, see [`Diagonal`](@ref), [`Bidiagonal`](@ref)
-[`Tridiagonal`](@ref) and [`SymTridiagonal`](@ref).
-
-# Examples
-```jldoctest
-julia> diagm(1 => [1,2,3])
-4×4 Matrix{Int64}:
- 0  1  0  0
- 0  0  2  0
- 0  0  0  3
- 0  0  0  0
-
-julia> diagm(1 => [1,2,3], -1 => [4,5])
-4×4 Matrix{Int64}:
- 0  1  0  0
- 4  0  2  0
- 0  5  0  3
- 0  0  0  0
-
-julia> diagm(1 => [1,2,3], 1 => [1,2,3])
-4×4 Matrix{Int64}:
- 0  2  0  0
- 0  0  4  0
- 0  0  0  6
- 0  0  0  0
-```
-"""
-diagm(kv::Pair{<:Integer,<:AbstractVector}...) = _diagm(nothing, kv...)
-diagm(m::Integer, n::Integer, kv::Pair{<:Integer,<:AbstractVector}...) = _diagm((Int(m),Int(n)), kv...)
-function _diagm(size, kv::Pair{<:Integer,<:AbstractVector}...)
-    A = diagm_container(size, kv...)
-    for p in kv
-        inds = diagind(A, p.first)
-        for (i, val) in enumerate(p.second)
-            A[inds[i]] += val
-        end
-    end
-    return A
-end
-function diagm_size(size::Nothing, kv::Pair{<:Integer,<:AbstractVector}...)
-    mnmax = mapreduce(x -> length(x.second) + abs(Int(x.first)), max, kv; init=0)
-    return mnmax, mnmax
-end
-function diagm_size(size::Tuple{Int,Int}, kv::Pair{<:Integer,<:AbstractVector}...)
-    mmax = mapreduce(x -> length(x.second) - min(0,Int(x.first)), max, kv; init=0)
-    nmax = mapreduce(x -> length(x.second) + max(0,Int(x.first)), max, kv; init=0)
-    m, n = size
-    (m ≥ mmax && n ≥ nmax) || throw(DimensionMismatch("invalid size=$size"))
-    return m, n
-end
-function diagm_container(size, kv::Pair{<:Integer,<:AbstractVector}...)
-    T = promote_type(map(x -> eltype(x.second), kv)...)
-    # For some type `T`, `zero(T)` is not a `T` and `zeros(T, ...)` fails.
-    U = promote_type(T, typeof(zero(T)))
-    return zeros(U, diagm_size(size, kv...)...)
-end
-diagm_container(size, kv::Pair{<:Integer,<:BitVector}...) =
-    falses(diagm_size(size, kv...)...)
-
-"""
-    diagm(v::AbstractVector)
-    diagm(m::Integer, n::Integer, v::AbstractVector)
-
-Construct a matrix with elements of the vector as diagonal elements.
-By default, the matrix is square and its size is given by
-`length(v)`, but a non-square size `m`×`n` can be specified
-by passing `m,n` as the first arguments.
-
-# Examples
-```jldoctest
-julia> diagm([1,2,3])
-3×3 Matrix{Int64}:
- 1  0  0
- 0  2  0
- 0  0  3
-```
-"""
-diagm(v::AbstractVector) = diagm(0 => v)
-diagm(m::Integer, n::Integer, v::AbstractVector) = diagm(m, n, 0 => v)
-
-function tr(A::Matrix{T}) where T
-    n = checksquare(A)
-    t = zero(T)
-    @inbounds @simd for i in 1:n
-        t += A[i,i]
-    end
-    t
-end
-
-_kronsize(A::AbstractMatrix, B::AbstractMatrix) = map(*, size(A), size(B))
-_kronsize(A::AbstractMatrix, B::AbstractVector) = (size(A, 1)*length(B), size(A, 2))
-_kronsize(A::AbstractVector, B::AbstractMatrix) = (length(A)*size(B, 1), size(B, 2))
-
-"""
-    kron!(C, A, B)
-
-Computes the Kronecker product of `A` and `B` and stores the result in `C`,
-overwriting the existing content of `C`. This is the in-place version of [`kron`](@ref).
-
-!!! compat "Julia 1.6"
-    This function requires Julia 1.6 or later.
-"""
-function kron!(C::AbstractVecOrMat, A::AbstractVecOrMat, B::AbstractVecOrMat)
-    size(C) == _kronsize(A, B) || throw(DimensionMismatch("kron!"))
-    _kron!(C, A, B)
-end
-function kron!(c::AbstractVector, a::AbstractVector, b::AbstractVector)
-    length(c) == length(a) * length(b) || throw(DimensionMismatch("kron!"))
-    m = firstindex(c)
-    @inbounds for i in eachindex(a)
-        ai = a[i]
-        for k in eachindex(b)
-            c[m] = ai*b[k]
-            m += 1
-        end
-    end
-    return c
-end
-kron!(c::AbstractVecOrMat, a::AbstractVecOrMat, b::Number) = mul!(c, a, b)
-kron!(c::AbstractVecOrMat, a::Number, b::AbstractVecOrMat) = mul!(c, a, b)
-
-function _kron!(C, A::AbstractMatrix, B::AbstractMatrix)
-    m = firstindex(C)
-    @inbounds for j in axes(A,2), l in axes(B,2), i in axes(A,1)
-        Aij = A[i,j]
-        for k in axes(B,1)
-            C[m] = Aij*B[k,l]
-            m += 1
-        end
-    end
-    return C
-end
-function _kron!(C, A::AbstractMatrix, b::AbstractVector)
-    m = firstindex(C)
-    @inbounds for j in axes(A,2), i in axes(A,1)
-        Aij = A[i,j]
-        for k in eachindex(b)
-            C[m] = Aij*b[k]
-            m += 1
-        end
-    end
-    return C
-end
-function _kron!(C, a::AbstractVector, B::AbstractMatrix)
-    m = firstindex(C)
-    @inbounds for l in axes(B,2), i in eachindex(a)
-        ai = a[i]
-        for k in axes(B,1)
-            C[m] = ai*B[k,l]
-            m += 1
-        end
-    end
-    return C
-end
-
-"""
-    kron(A, B)
-
-Computes the Kronecker product of two vectors, matrices or numbers.
-
-For real vectors `v` and `w`, the Kronecker product is related to the outer product by
-`kron(v,w) == vec(w * transpose(v))` or
-`w * transpose(v) == reshape(kron(v,w), (length(w), length(v)))`.
-Note how the ordering of `v` and `w` differs on the left and right
-of these expressions (due to column-major storage).
-For complex vectors, the outer product `w * v'` also differs by conjugation of `v`.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> B = [im 1; 1 -im]
-2×2 Matrix{Complex{Int64}}:
- 0+1im  1+0im
- 1+0im  0-1im
-
-julia> kron(A, B)
-4×4 Matrix{Complex{Int64}}:
- 0+1im  1+0im  0+2im  2+0im
- 1+0im  0-1im  2+0im  0-2im
- 0+3im  3+0im  0+4im  4+0im
- 3+0im  0-3im  4+0im  0-4im
-
-julia> v = [1, 2]; w = [3, 4, 5];
-
-julia> w*transpose(v)
-3×2 Matrix{Int64}:
- 3   6
- 4   8
- 5  10
-
-julia> reshape(kron(v,w), (length(w), length(v)))
-3×2 Matrix{Int64}:
- 3   6
- 4   8
- 5  10
-```
-"""
-function kron(A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S}) where {T,S}
-    R = Matrix{promote_op(*,T,S)}(undef, _kronsize(A, B))
-    return kron!(R, A, B)
-end
-function kron(a::AbstractVector{T}, b::AbstractVector{S}) where {T,S}
-    c = Vector{promote_op(*,T,S)}(undef, length(a)*length(b))
-    return kron!(c, a, b)
-end
-kron(a::Number, b::Union{Number, AbstractVecOrMat}) = a * b
-kron(a::AbstractVecOrMat, b::Number) = a * b
-kron(a::AdjointAbsVec, b::AdjointAbsVec) = adjoint(kron(adjoint(a), adjoint(b)))
-kron(a::AdjOrTransAbsVec, b::AdjOrTransAbsVec) = transpose(kron(transpose(a), transpose(b)))
-
-# Matrix power
-(^)(A::AbstractMatrix, p::Integer) = p < 0 ? power_by_squaring(inv(A), -p) : power_by_squaring(A, p)
-function (^)(A::AbstractMatrix{T}, p::Integer) where T<:Integer
-    # make sure that e.g. [1 1;1 0]^big(3)
-    # gets promotes in a similar way as 2^big(3)
-    TT = promote_op(^, T, typeof(p))
-    return power_by_squaring(convert(AbstractMatrix{TT}, A), p)
-end
-function integerpow(A::AbstractMatrix{T}, p) where T
-    TT = promote_op(^, T, typeof(p))
-    return (TT == T ? A : convert(AbstractMatrix{TT}, A))^Integer(p)
-end
-function schurpow(A::AbstractMatrix, p)
-    if istriu(A)
-        # Integer part
-        retmat = A ^ floor(p)
-        # Real part
-        if p - floor(p) == 0.5
-            # special case: A^0.5 === sqrt(A)
-            retmat = retmat * sqrt(A)
-        else
-            retmat = retmat * powm!(UpperTriangular(float.(A)), real(p - floor(p)))
-        end
-    else
-        S,Q,d = Schur{Complex}(schur(A))
-        # Integer part
-        R = S ^ floor(p)
-        # Real part
-        if p - floor(p) == 0.5
-            # special case: A^0.5 === sqrt(A)
-            R = R * sqrt(S)
-        else
-            R = R * powm!(UpperTriangular(float.(S)), real(p - floor(p)))
-        end
-        retmat = Q * R * Q'
-    end
-
-    # if A has nonpositive real eigenvalues, retmat is a nonprincipal matrix power.
-    if isreal(retmat)
-        return real(retmat)
-    else
-        return retmat
-    end
-end
-function (^)(A::AbstractMatrix{T}, p::Real) where T
-    n = checksquare(A)
-
-    # Quicker return if A is diagonal
-    if isdiag(A)
-        TT = promote_op(^, T, typeof(p))
-        retmat = copymutable_oftype(A, TT)
-        for i in 1:n
-            retmat[i, i] = retmat[i, i] ^ p
-        end
-        return retmat
-    end
-
-    # For integer powers, use power_by_squaring
-    isinteger(p) && return integerpow(A, p)
-
-    # If possible, use diagonalization
-    if issymmetric(A)
-        return (Symmetric(A)^p)
-    end
-    if ishermitian(A)
-        return (Hermitian(A)^p)
-    end
-
-    # Otherwise, use Schur decomposition
-    return schurpow(A, p)
-end
-
-"""
-    ^(A::AbstractMatrix, p::Number)
-
-Matrix power, equivalent to ``\\exp(p\\log(A))``
-
-# Examples
-```jldoctest
-julia> [1 2; 0 3]^3
-2×2 Matrix{Int64}:
- 1  26
- 0  27
-```
-"""
-(^)(A::AbstractMatrix, p::Number) = exp(p*log(A))
-
-# Matrix exponential
-
-"""
-    exp(A::AbstractMatrix)
-
-Compute the matrix exponential of `A`, defined by
-
-```math
-e^A = \\sum_{n=0}^{\\infty} \\frac{A^n}{n!}.
-```
-
-For symmetric or Hermitian `A`, an eigendecomposition ([`eigen`](@ref)) is
-used, otherwise the scaling and squaring algorithm (see [^H05]) is chosen.
-
-[^H05]: Nicholas J. Higham, "The squaring and scaling method for the matrix exponential revisited", SIAM Journal on Matrix Analysis and Applications, 26(4), 2005, 1179-1193. [doi:10.1137/090768539](https://doi.org/10.1137/090768539)
-
-# Examples
-```jldoctest
-julia> A = Matrix(1.0I, 2, 2)
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-
-julia> exp(A)
-2×2 Matrix{Float64}:
- 2.71828  0.0
- 0.0      2.71828
-```
-"""
-exp(A::AbstractMatrix) = exp!(copy_similar(A, eigtype(eltype(A))))
-exp(A::AdjointAbsMat) = adjoint(exp(parent(A)))
-exp(A::TransposeAbsMat) = transpose(exp(parent(A)))
-
-"""
-    cis(A::AbstractMatrix)
-
-More efficient method for `exp(im*A)` of square matrix `A`
-(especially if `A` is `Hermitian` or real-`Symmetric`).
-
-See also [`cispi`](@ref), [`sincos`](@ref), [`exp`](@ref).
-
-!!! compat "Julia 1.7"
-    Support for using `cis` with matrices was added in Julia 1.7.
-
-# Examples
-```jldoctest
-julia> cis([π 0; 0 π]) ≈ -I
-true
-```
-"""
-cis(A::AbstractMatrix) = exp(im * A)  # fallback
-cis(A::AbstractMatrix{<:Base.HWNumber}) = exp_maybe_inplace(float.(im .* A))
-
-exp_maybe_inplace(A::StridedMatrix{<:Union{ComplexF32, ComplexF64}}) = exp!(A)
-exp_maybe_inplace(A) = exp(A)
-
-"""
-    ^(b::Number, A::AbstractMatrix)
-
-Matrix exponential, equivalent to ``\\exp(\\log(b)A)``.
-
-!!! compat "Julia 1.1"
-    Support for raising `Irrational` numbers (like `ℯ`)
-    to a matrix was added in Julia 1.1.
-
-# Examples
-```jldoctest
-julia> 2^[1 2; 0 3]
-2×2 Matrix{Float64}:
- 2.0  6.0
- 0.0  8.0
-
-julia> ℯ^[1 2; 0 3]
-2×2 Matrix{Float64}:
- 2.71828  17.3673
- 0.0      20.0855
-```
-"""
-Base.:^(b::Number, A::AbstractMatrix) = exp!(log(b)*A)
-# method for ℯ to explicitly elide the log(b) multiplication
-Base.:^(::Irrational{:ℯ}, A::AbstractMatrix) = exp(A)
-
-## Destructive matrix exponential using algorithm from Higham, 2008,
-## "Functions of Matrices: Theory and Computation", SIAM
-function exp!(A::StridedMatrix{T}) where T<:BlasFloat
-    n = checksquare(A)
-    if ishermitian(A)
-        return copytri!(parent(exp(Hermitian(A))), 'U', true)
-    end
-    ilo, ihi, scale = LAPACK.gebal!('B', A)    # modifies A
-    nA   = opnorm(A, 1)
-    ## For sufficiently small nA, use lower order Padé-Approximations
-    if (nA <= 2.1)
-        if nA > 0.95
-            C = T[17643225600.,8821612800.,2075673600.,302702400.,
-                     30270240.,   2162160.,    110880.,     3960.,
-                           90.,         1.]
-        elseif nA > 0.25
-            C = T[17297280.,8648640.,1995840.,277200.,
-                     25200.,   1512.,     56.,     1.]
-        elseif nA > 0.015
-            C = T[30240.,15120.,3360.,
-                    420.,   30.,   1.]
-        else
-            C = T[120.,60.,12.,1.]
-        end
-        A2 = A * A
-        # Compute U and V: Even/odd terms in Padé numerator & denom
-        # Expansion of k=1 in for loop
-        P = A2
-        U = mul!(C[4]*P, true, C[2]*I, true, true) #U = C[2]*I + C[4]*P
-        V = mul!(C[3]*P, true, C[1]*I, true, true) #V = C[1]*I + C[3]*P
-        for k in 2:(div(length(C), 2) - 1)
-            P *= A2
-            mul!(U, C[2k + 2], P, true, true) # U += C[2k+2]*P
-            mul!(V, C[2k + 1], P, true, true) # V += C[2k+1]*P
-        end
-
-        U = A * U
-
-        # Padé approximant:  (V-U)\(V+U)
-        tmp1, tmp2 = A, A2 # Reuse already allocated arrays
-        tmp1 .= V .- U
-        tmp2 .= V .+ U
-        X = LAPACK.gesv!(tmp1, tmp2)[1]
-    else
-        s  = log2(nA/5.4)               # power of 2 later reversed by squaring
-        if s > 0
-            si = ceil(Int,s)
-            A ./= convert(T,2^si)
-        end
-        CC = T[64764752532480000.,32382376266240000.,7771770303897600.,
-                1187353796428800.,  129060195264000.,  10559470521600.,
-                    670442572800.,      33522128640.,      1323241920.,
-                        40840800.,           960960.,           16380.,
-                             182.,                1.]
-        A2 = A * A
-        A4 = A2 * A2
-        A6 = A2 * A4
-        tmp1, tmp2 = similar(A6), similar(A6)
-
-        # Allocation economical version of:
-        # U  = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+
-        #           CC[8].*A6 .+ CC[6].*A4 .+ CC[4]*A2+CC[2]*I)
-        tmp1 .= CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2
-        tmp2 .= CC[8].*A6 .+ CC[6].*A4 .+ CC[4].*A2
-        mul!(tmp2, true,CC[2]*I, true, true) # tmp2 .+= CC[2]*I
-        U = mul!(tmp2, A6, tmp1, true, true)
-        U, tmp1 = mul!(tmp1, A, U), A # U = A * U0
-
-        # Allocation economical version of:
-        # V  = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+
-        #           CC[7].*A6 .+ CC[5].*A4 .+ CC[3]*A2 .+ CC[1]*I
-        tmp1 .= CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2
-        tmp2 .= CC[7].*A6 .+ CC[5].*A4 .+ CC[3].*A2
-        mul!(tmp2, true, CC[1]*I, true, true) # tmp2 .+= CC[1]*I
-        V = mul!(tmp2, A6, tmp1, true, true)
-
-        tmp1 .= V .+ U
-        tmp2 .= V .- U # tmp2 already contained V but this seems more readable
-        X = LAPACK.gesv!(tmp2, tmp1)[1] # X now contains r_13 in Higham 2008
-
-        if s > 0
-            # Repeated squaring to compute X = r_13^(2^si)
-            for t=1:si
-                mul!(tmp2, X, X)
-                X, tmp2 = tmp2, X
-            end
-        end
-    end
-
-    # Undo the balancing
-    for j = ilo:ihi
-        scj = scale[j]
-        for i = 1:n
-            X[j,i] *= scj
-        end
-        for i = 1:n
-            X[i,j] /= scj
-        end
-    end
-
-    if ilo > 1       # apply lower permutations in reverse order
-        for j in (ilo-1):-1:1; rcswap!(j, Int(scale[j]), X) end
-    end
-    if ihi < n       # apply upper permutations in forward order
-        for j in (ihi+1):n;    rcswap!(j, Int(scale[j]), X) end
-    end
-    X
-end
-
-## Swap rows i and j and columns i and j in X
-function rcswap!(i::Integer, j::Integer, X::AbstractMatrix{<:Number})
-    for k = 1:size(X,1)
-        X[k,i], X[k,j] = X[k,j], X[k,i]
-    end
-    for k = 1:size(X,2)
-        X[i,k], X[j,k] = X[j,k], X[i,k]
-    end
-end
-
-"""
-    log(A::AbstractMatrix)
-
-If `A` has no negative real eigenvalue, compute the principal matrix logarithm of `A`, i.e.
-the unique matrix ``X`` such that ``e^X = A`` and ``-\\pi < Im(\\lambda) < \\pi`` for all
-the eigenvalues ``\\lambda`` of ``X``. If `A` has nonpositive eigenvalues, a nonprincipal
-matrix function is returned whenever possible.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is
-used, if `A` is triangular an improved version of the inverse scaling and squaring method is
-employed (see [^AH12] and [^AHR13]). If `A` is real with no negative eigenvalues, then
-the real Schur form is computed. Otherwise, the complex Schur form is computed. Then
-the upper (quasi-)triangular algorithm in [^AHR13] is used on the upper (quasi-)triangular
-factor.
-
-[^AH12]: Awad H. Al-Mohy and Nicholas J. Higham, "Improved inverse  scaling and squaring algorithms for the matrix logarithm", SIAM Journal on Scientific Computing, 34(4), 2012, C153-C169. [doi:10.1137/110852553](https://doi.org/10.1137/110852553)
-
-[^AHR13]: Awad H. Al-Mohy, Nicholas J. Higham and Samuel D. Relton, "Computing the Fréchet derivative of the matrix logarithm and estimating the condition number", SIAM Journal on Scientific Computing, 35(4), 2013, C394-C410. [doi:10.1137/120885991](https://doi.org/10.1137/120885991)
-
-# Examples
-```jldoctest
-julia> A = Matrix(2.7182818*I, 2, 2)
-2×2 Matrix{Float64}:
- 2.71828  0.0
- 0.0      2.71828
-
-julia> log(A)
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-```
-"""
-function log(A::AbstractMatrix)
-    # If possible, use diagonalization
-    if ishermitian(A)
-        logHermA = log(Hermitian(A))
-        return ishermitian(logHermA) ? copytri!(parent(logHermA), 'U', true) : parent(logHermA)
-    elseif istriu(A)
-        return triu!(parent(log(UpperTriangular(A))))
-    elseif isreal(A)
-        SchurF = schur(real(A))
-        if istriu(SchurF.T)
-            logA = SchurF.Z * log(UpperTriangular(SchurF.T)) * SchurF.Z'
-        else
-            # real log exists whenever all eigenvalues are positive
-            is_log_real = !any(x -> isreal(x) && real(x) ≤ 0, SchurF.values)
-            if is_log_real
-                logA = SchurF.Z * log_quasitriu(SchurF.T) * SchurF.Z'
-            else
-                SchurS = Schur{Complex}(SchurF)
-                logA = SchurS.Z * log(UpperTriangular(SchurS.T)) * SchurS.Z'
-            end
-        end
-        return eltype(A) <: Complex ? complex(logA) : logA
-    else
-        SchurF = schur(A)
-        return SchurF.vectors * log(UpperTriangular(SchurF.T)) * SchurF.vectors'
-    end
-end
-
-log(A::AdjointAbsMat) = adjoint(log(parent(A)))
-log(A::TransposeAbsMat) = transpose(log(parent(A)))
-
-"""
-    sqrt(A::AbstractMatrix)
-
-If `A` has no negative real eigenvalues, compute the principal matrix square root of `A`,
-that is the unique matrix ``X`` with eigenvalues having positive real part such that
-``X^2 = A``. Otherwise, a nonprincipal square root is returned.
-
-If `A` is real-symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is
-used to compute the square root.   For such matrices, eigenvalues λ that
-appear to be slightly negative due to roundoff errors are treated as if they were zero.
-More precisely, matrices with all eigenvalues `≥ -rtol*(max |λ|)` are treated as semidefinite
-(yielding a Hermitian square root), with negative eigenvalues taken to be zero.
-`rtol` is a keyword argument to `sqrt` (in the Hermitian/real-symmetric case only) that
-defaults to machine precision scaled by `size(A,1)`.
-
-Otherwise, the square root is determined by means of the
-Björck-Hammarling method [^BH83], which computes the complex Schur form ([`schur`](@ref))
-and then the complex square root of the triangular factor.
-If a real square root exists, then an extension of this method [^H87] that computes the real
-Schur form and then the real square root of the quasi-triangular factor is instead used.
-
-[^BH83]:
-
-    Åke Björck and Sven Hammarling, "A Schur method for the square root of a matrix",
-    Linear Algebra and its Applications, 52-53, 1983, 127-140.
-    [doi:10.1016/0024-3795(83)80010-X](https://doi.org/10.1016/0024-3795(83)80010-X)
-
-[^H87]:
-
-    Nicholas J. Higham, "Computing real square roots of a real matrix",
-    Linear Algebra and its Applications, 88-89, 1987, 405-430.
-    [doi:10.1016/0024-3795(87)90118-2](https://doi.org/10.1016/0024-3795(87)90118-2)
-
-# Examples
-```jldoctest
-julia> A = [4 0; 0 4]
-2×2 Matrix{Int64}:
- 4  0
- 0  4
-
-julia> sqrt(A)
-2×2 Matrix{Float64}:
- 2.0  0.0
- 0.0  2.0
-```
-"""
-sqrt(::AbstractMatrix)
-
-function sqrt(A::AbstractMatrix{T}) where {T<:Union{Real,Complex}}
-    if checksquare(A) == 0
-        return copy(A)
-    elseif ishermitian(A)
-        sqrtHermA = sqrt(Hermitian(A))
-        return ishermitian(sqrtHermA) ? copytri!(parent(sqrtHermA), 'U', true) : parent(sqrtHermA)
-    elseif istriu(A)
-        return triu!(parent(sqrt(UpperTriangular(A))))
-    elseif isreal(A)
-        SchurF = schur(real(A))
-        if istriu(SchurF.T)
-            sqrtA = SchurF.Z * sqrt(UpperTriangular(SchurF.T)) * SchurF.Z'
-        else
-            # real sqrt exists whenever no eigenvalues are negative
-            is_sqrt_real = !any(x -> isreal(x) && real(x) < 0, SchurF.values)
-            # sqrt_quasitriu uses LAPACK functions for non-triu inputs
-            if typeof(sqrt(zero(T))) <: BlasFloat && is_sqrt_real
-                sqrtA = SchurF.Z * sqrt_quasitriu(SchurF.T) * SchurF.Z'
-            else
-                SchurS = Schur{Complex}(SchurF)
-                sqrtA = SchurS.Z * sqrt(UpperTriangular(SchurS.T)) * SchurS.Z'
-            end
-        end
-        return eltype(A) <: Complex ? complex(sqrtA) : sqrtA
-    else
-        SchurF = schur(A)
-        return SchurF.vectors * sqrt(UpperTriangular(SchurF.T)) * SchurF.vectors'
-    end
-end
-
-sqrt(A::AdjointAbsMat) = adjoint(sqrt(parent(A)))
-sqrt(A::TransposeAbsMat) = transpose(sqrt(parent(A)))
-
-function inv(A::StridedMatrix{T}) where T
-    checksquare(A)
-    if istriu(A)
-        Ai = triu!(parent(inv(UpperTriangular(A))))
-    elseif istril(A)
-        Ai = tril!(parent(inv(LowerTriangular(A))))
-    else
-        Ai = inv!(lu(A))
-        Ai = convert(typeof(parent(Ai)), Ai)
-    end
-    return Ai
-end
-
-"""
-    cos(A::AbstractMatrix)
-
-Compute the matrix cosine of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the cosine. Otherwise, the cosine is determined by calling [`exp`](@ref).
-
-# Examples
-```jldoctest
-julia> cos(fill(1.0, (2,2)))
-2×2 Matrix{Float64}:
-  0.291927  -0.708073
- -0.708073   0.291927
-```
-"""
-function cos(A::AbstractMatrix{<:Real})
-    if issymmetric(A)
-        return copytri!(parent(cos(Symmetric(A))), 'U')
-    end
-    T = complex(float(eltype(A)))
-    return real(exp!(T.(im .* A)))
-end
-function cos(A::AbstractMatrix{<:Complex})
-    if ishermitian(A)
-        return copytri!(parent(cos(Hermitian(A))), 'U', true)
-    end
-    T = complex(float(eltype(A)))
-    X = exp!(T.(im .* A))
-    @. X = (X + $exp!(T(-im*A))) / 2
-    return X
-end
-
-"""
-    sin(A::AbstractMatrix)
-
-Compute the matrix sine of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the sine. Otherwise, the sine is determined by calling [`exp`](@ref).
-
-# Examples
-```jldoctest
-julia> sin(fill(1.0, (2,2)))
-2×2 Matrix{Float64}:
- 0.454649  0.454649
- 0.454649  0.454649
-```
-"""
-function sin(A::AbstractMatrix{<:Real})
-    if issymmetric(A)
-        return copytri!(parent(sin(Symmetric(A))), 'U')
-    end
-    T = complex(float(eltype(A)))
-    return imag(exp!(T.(im .* A)))
-end
-function sin(A::AbstractMatrix{<:Complex})
-    if ishermitian(A)
-        return copytri!(parent(sin(Hermitian(A))), 'U', true)
-    end
-    T = complex(float(eltype(A)))
-    X = exp!(T.(im .* A))
-    Y = exp!(T.(.-im .* A))
-    @inbounds for i in eachindex(X)
-        x, y = X[i]/2, Y[i]/2
-        X[i] = Complex(imag(x)-imag(y), real(y)-real(x))
-    end
-    return X
-end
-
-"""
-    sincos(A::AbstractMatrix)
-
-Compute the matrix sine and cosine of a square matrix `A`.
-
-# Examples
-```jldoctest
-julia> S, C = sincos(fill(1.0, (2,2)));
-
-julia> S
-2×2 Matrix{Float64}:
- 0.454649  0.454649
- 0.454649  0.454649
-
-julia> C
-2×2 Matrix{Float64}:
-  0.291927  -0.708073
- -0.708073   0.291927
-```
-"""
-function sincos(A::AbstractMatrix{<:Real})
-    if issymmetric(A)
-        symsinA, symcosA = sincos(Symmetric(A))
-        sinA = copytri!(parent(symsinA), 'U')
-        cosA = copytri!(parent(symcosA), 'U')
-        return sinA, cosA
-    end
-    T = complex(float(eltype(A)))
-    c, s = reim(exp!(T.(im .* A)))
-    return s, c
-end
-function sincos(A::AbstractMatrix{<:Complex})
-    if ishermitian(A)
-        hermsinA, hermcosA = sincos(Hermitian(A))
-        sinA = copytri!(parent(hermsinA), 'U', true)
-        cosA = copytri!(parent(hermcosA), 'U', true)
-        return sinA, cosA
-    end
-    T = complex(float(eltype(A)))
-    X = exp!(T.(im .* A))
-    Y = exp!(T.(.-im .* A))
-    @inbounds for i in eachindex(X)
-        x, y = X[i]/2, Y[i]/2
-        X[i] = Complex(imag(x)-imag(y), real(y)-real(x))
-        Y[i] = x+y
-    end
-    return X, Y
-end
-
-"""
-    tan(A::AbstractMatrix)
-
-Compute the matrix tangent of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the tangent. Otherwise, the tangent is determined by calling [`exp`](@ref).
-
-# Examples
-```jldoctest
-julia> tan(fill(1.0, (2,2)))
-2×2 Matrix{Float64}:
- -1.09252  -1.09252
- -1.09252  -1.09252
-```
-"""
-function tan(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(tan(Hermitian(A))), 'U', true)
-    end
-    S, C = sincos(A)
-    S /= C
-    return S
-end
-
-"""
-    cosh(A::AbstractMatrix)
-
-Compute the matrix hyperbolic cosine of a square matrix `A`.
-"""
-function cosh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(cosh(Hermitian(A))), 'U', true)
-    end
-    X = exp(A)
-    @. X = (X + $exp!(float(-A))) / 2
-    return X
-end
-
-"""
-    sinh(A::AbstractMatrix)
-
-Compute the matrix hyperbolic sine of a square matrix `A`.
-"""
-function sinh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(sinh(Hermitian(A))), 'U', true)
-    end
-    X = exp(A)
-    @. X = (X - $exp!(float(-A))) / 2
-    return X
-end
-
-"""
-    tanh(A::AbstractMatrix)
-
-Compute the matrix hyperbolic tangent of a square matrix `A`.
-"""
-function tanh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(tanh(Hermitian(A))), 'U', true)
-    end
-    X = exp(A)
-    Y = exp!(float.(.-A))
-    @inbounds for i in eachindex(X)
-        x, y = X[i], Y[i]
-        X[i] = x - y
-        Y[i] = x + y
-    end
-    X /= Y
-    return X
-end
-
-"""
-    acos(A::AbstractMatrix)
-
-Compute the inverse matrix cosine of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the inverse cosine. Otherwise, the inverse cosine is determined by using
-[`log`](@ref) and [`sqrt`](@ref).  For the theory and logarithmic formulas used to compute
-this function, see [^AH16_1].
-
-[^AH16_1]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-
-# Examples
-```julia-repl
-julia> acos(cos([0.5 0.1; -0.2 0.3]))
-2×2 Matrix{ComplexF64}:
-  0.5-8.32667e-17im  0.1+0.0im
- -0.2+2.63678e-16im  0.3-3.46945e-16im
-```
-"""
-function acos(A::AbstractMatrix)
-    if ishermitian(A)
-        acosHermA = acos(Hermitian(A))
-        return isa(acosHermA, Hermitian) ? copytri!(parent(acosHermA), 'U', true) : parent(acosHermA)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(-im * log(U + im * sqrt(I - U^2))))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    asin(A::AbstractMatrix)
-
-Compute the inverse matrix sine of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the inverse sine. Otherwise, the inverse sine is determined by using [`log`](@ref)
-and [`sqrt`](@ref).  For the theory and logarithmic formulas used to compute this function,
-see [^AH16_2].
-
-[^AH16_2]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-
-# Examples
-```julia-repl
-julia> asin(sin([0.5 0.1; -0.2 0.3]))
-2×2 Matrix{ComplexF64}:
-  0.5-4.16334e-17im  0.1-5.55112e-17im
- -0.2+9.71445e-17im  0.3-1.249e-16im
-```
-"""
-function asin(A::AbstractMatrix)
-    if ishermitian(A)
-        asinHermA = asin(Hermitian(A))
-        return isa(asinHermA, Hermitian) ? copytri!(parent(asinHermA), 'U', true) : parent(asinHermA)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(-im * log(im * U + sqrt(I - U^2))))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    atan(A::AbstractMatrix)
-
-Compute the inverse matrix tangent of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the inverse tangent. Otherwise, the inverse tangent is determined by using
-[`log`](@ref).  For the theory and logarithmic formulas used to compute this function, see
-[^AH16_3].
-
-[^AH16_3]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-
-# Examples
-```julia-repl
-julia> atan(tan([0.5 0.1; -0.2 0.3]))
-2×2 Matrix{ComplexF64}:
-  0.5+1.38778e-17im  0.1-2.77556e-17im
- -0.2+6.93889e-17im  0.3-4.16334e-17im
-```
-"""
-function atan(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(atan(Hermitian(A))), 'U', true)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = im * UpperTriangular(SchurF.T)
-    R = triu!(parent(log((I + U) / (I - U)) / 2im))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    acosh(A::AbstractMatrix)
-
-Compute the inverse hyperbolic matrix cosine of a square matrix `A`.  For the theory and
-logarithmic formulas used to compute this function, see [^AH16_4].
-
-[^AH16_4]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-"""
-function acosh(A::AbstractMatrix)
-    if ishermitian(A)
-        acoshHermA = acosh(Hermitian(A))
-        return isa(acoshHermA, Hermitian) ? copytri!(parent(acoshHermA), 'U', true) : parent(acoshHermA)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(log(U + sqrt(U - I) * sqrt(U + I))))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    asinh(A::AbstractMatrix)
-
-Compute the inverse hyperbolic matrix sine of a square matrix `A`.  For the theory and
-logarithmic formulas used to compute this function, see [^AH16_5].
-
-[^AH16_5]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-"""
-function asinh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(asinh(Hermitian(A))), 'U', true)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(log(U + sqrt(I + U^2))))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    atanh(A::AbstractMatrix)
-
-Compute the inverse hyperbolic matrix tangent of a square matrix `A`.  For the theory and
-logarithmic formulas used to compute this function, see [^AH16_6].
-
-[^AH16_6]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-"""
-function atanh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(atanh(Hermitian(A))), 'U', true)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(log((I + U) / (I - U)) / 2))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-for (finv, f, finvh, fh, fn) in ((:sec, :cos, :sech, :cosh, "secant"),
-                                 (:csc, :sin, :csch, :sinh, "cosecant"),
-                                 (:cot, :tan, :coth, :tanh, "cotangent"))
-    name = string(finv)
-    hname = string(finvh)
-    @eval begin
-        @doc """
-            $($name)(A::AbstractMatrix)
-
-        Compute the matrix $($fn) of a square matrix `A`.
-        """ ($finv)(A::AbstractMatrix{T}) where {T} = inv(($f)(A))
-        @doc """
-            $($hname)(A::AbstractMatrix)
-
-        Compute the matrix hyperbolic $($fn) of square matrix `A`.
-        """ ($finvh)(A::AbstractMatrix{T}) where {T} = inv(($fh)(A))
-    end
-end
-
-for (tfa, tfainv, hfa, hfainv, fn) in ((:asec, :acos, :asech, :acosh, "secant"),
-                                       (:acsc, :asin, :acsch, :asinh, "cosecant"),
-                                       (:acot, :atan, :acoth, :atanh, "cotangent"))
-    tname = string(tfa)
-    hname = string(hfa)
-    @eval begin
-        @doc """
-            $($tname)(A::AbstractMatrix)
-        Compute the inverse matrix $($fn) of `A`. """ ($tfa)(A::AbstractMatrix{T}) where {T} = ($tfainv)(inv(A))
-        @doc """
-            $($hname)(A::AbstractMatrix)
-        Compute the inverse matrix hyperbolic $($fn) of `A`. """ ($hfa)(A::AbstractMatrix{T}) where {T} = ($hfainv)(inv(A))
-    end
-end
-
-"""
-    factorize(A)
-
-Compute a convenient factorization of `A`, based upon the type of the input matrix.
-`factorize` checks `A` to see if it is symmetric/triangular/etc. if `A` is passed
-as a generic matrix. `factorize` checks every element of `A` to verify/rule out
-each property. It will short-circuit as soon as it can rule out symmetry/triangular
-structure. The return value can be reused for efficient solving of multiple
-systems. For example: `A=factorize(A); x=A\\b; y=A\\C`.
-
-| Properties of `A`          | type of factorization                          |
-|:---------------------------|:-----------------------------------------------|
-| Positive-definite          | Cholesky (see [`cholesky`](@ref))  |
-| Dense Symmetric/Hermitian  | Bunch-Kaufman (see [`bunchkaufman`](@ref)) |
-| Sparse Symmetric/Hermitian | LDLt (see [`ldlt`](@ref))      |
-| Triangular                 | Triangular                                     |
-| Diagonal                   | Diagonal                                       |
-| Bidiagonal                 | Bidiagonal                                     |
-| Tridiagonal                | LU (see [`lu`](@ref))            |
-| Symmetric real tridiagonal | LDLt (see [`ldlt`](@ref))      |
-| General square             | LU (see [`lu`](@ref))            |
-| General non-square         | QR (see [`qr`](@ref))            |
-
-If `factorize` is called on a Hermitian positive-definite matrix, for instance, then `factorize`
-will return a Cholesky factorization.
-
-# Examples
-```jldoctest
-julia> A = Array(Bidiagonal(fill(1.0, (5, 5)), :U))
-5×5 Matrix{Float64}:
- 1.0  1.0  0.0  0.0  0.0
- 0.0  1.0  1.0  0.0  0.0
- 0.0  0.0  1.0  1.0  0.0
- 0.0  0.0  0.0  1.0  1.0
- 0.0  0.0  0.0  0.0  1.0
-
-julia> factorize(A) # factorize will check to see that A is already factorized
-5×5 Bidiagonal{Float64, Vector{Float64}}:
- 1.0  1.0   ⋅    ⋅    ⋅
-  ⋅   1.0  1.0   ⋅    ⋅
-  ⋅    ⋅   1.0  1.0   ⋅
-  ⋅    ⋅    ⋅   1.0  1.0
-  ⋅    ⋅    ⋅    ⋅   1.0
-```
-This returns a `5×5 Bidiagonal{Float64}`, which can now be passed to other linear algebra functions
-(e.g. eigensolvers) which will use specialized methods for `Bidiagonal` types.
-"""
-function factorize(A::AbstractMatrix{T}) where T
-    m, n = size(A)
-    if m == n
-        if m == 1 return A[1] end
-        utri    = true
-        utri1   = true
-        herm    = true
-        sym     = true
-        for j = 1:n-1, i = j+1:m
-            if utri1
-                if A[i,j] != 0
-                    utri1 = i == j + 1
-                    utri = false
-                end
-            end
-            if sym
-                sym &= A[i,j] == A[j,i]
-            end
-            if herm
-                herm &= A[i,j] == conj(A[j,i])
-            end
-            if !(utri1|herm|sym) break end
-        end
-        ltri = true
-        ltri1 = true
-        for j = 3:n, i = 1:j-2
-            ltri1 &= A[i,j] == 0
-            if !ltri1 break end
-        end
-        if ltri1
-            for i = 1:n-1
-                if A[i,i+1] != 0
-                    ltri &= false
-                    break
-                end
-            end
-            if ltri
-                if utri
-                    return Diagonal(A)
-                end
-                if utri1
-                    return Bidiagonal(diag(A), diag(A, -1), :L)
-                end
-                return LowerTriangular(A)
-            end
-            if utri
-                return Bidiagonal(diag(A), diag(A, 1), :U)
-            end
-            if utri1
-                # TODO: enable once a specialized, non-dense bunchkaufman method exists
-                # if (herm & (T <: Complex)) | sym
-                    # return bunchkaufman(SymTridiagonal(diag(A), diag(A, -1)))
-                # end
-                return lu(Tridiagonal(diag(A, -1), diag(A), diag(A, 1)))
-            end
-        end
-        if utri
-            return UpperTriangular(A)
-        end
-        if herm
-            cf = cholesky(A; check = false)
-            if cf.info == 0
-                return cf
-            else
-                return factorize(Hermitian(A))
-            end
-        end
-        if sym
-            return factorize(Symmetric(A))
-        end
-        return lu(A)
-    end
-    qr(A, ColumnNorm())
-end
-factorize(A::Adjoint)   =   adjoint(factorize(parent(A)))
-factorize(A::Transpose) = transpose(factorize(parent(A)))
-factorize(a::Number)    = a # same as how factorize behaves on Diagonal types
-
-## Moore-Penrose pseudoinverse
-
-"""
-    pinv(M; atol::Real=0, rtol::Real=atol>0 ? 0 : n*ϵ)
-    pinv(M, rtol::Real) = pinv(M; rtol=rtol) # to be deprecated in Julia 2.0
-
-Computes the Moore-Penrose pseudoinverse.
-
-For matrices `M` with floating point elements, it is convenient to compute
-the pseudoinverse by inverting only singular values greater than
-`max(atol, rtol*σ₁)` where `σ₁` is the largest singular value of `M`.
-
-The optimal choice of absolute (`atol`) and relative tolerance (`rtol`) varies
-both with the value of `M` and the intended application of the pseudoinverse.
-The default relative tolerance is `n*ϵ`, where `n` is the size of the smallest
-dimension of `M`, and `ϵ` is the [`eps`](@ref) of the element type of `M`.
-
-For inverting dense ill-conditioned matrices in a least-squares sense,
-`rtol = sqrt(eps(real(float(oneunit(eltype(M))))))` is recommended.
-
-For more information, see [^issue8859], [^B96], [^S84], [^KY88].
-
-# Examples
-```jldoctest
-julia> M = [1.5 1.3; 1.2 1.9]
-2×2 Matrix{Float64}:
- 1.5  1.3
- 1.2  1.9
-
-julia> N = pinv(M)
-2×2 Matrix{Float64}:
-  1.47287   -1.00775
- -0.930233   1.16279
-
-julia> M * N
-2×2 Matrix{Float64}:
- 1.0          -2.22045e-16
- 4.44089e-16   1.0
-```
-
-[^issue8859]: Issue 8859, "Fix least squares", [https://github.com/JuliaLang/julia/pull/8859](https://github.com/JuliaLang/julia/pull/8859)
-
-[^B96]: Åke Björck, "Numerical Methods for Least Squares Problems",  SIAM Press, Philadelphia, 1996, "Other Titles in Applied Mathematics", Vol. 51. [doi:10.1137/1.9781611971484](http://epubs.siam.org/doi/book/10.1137/1.9781611971484)
-
-[^S84]: G. W. Stewart, "Rank Degeneracy", SIAM Journal on Scientific and Statistical Computing, 5(2), 1984, 403-413. [doi:10.1137/0905030](http://epubs.siam.org/doi/abs/10.1137/0905030)
-
-[^KY88]: Konstantinos Konstantinides and Kung Yao, "Statistical analysis of effective singular values in matrix rank determination", IEEE Transactions on Acoustics, Speech and Signal Processing, 36(5), 1988, 757-763. [doi:10.1109/29.1585](https://doi.org/10.1109/29.1585)
-"""
-function pinv(A::AbstractMatrix{T}; atol::Real = 0.0, rtol::Real = (eps(real(float(oneunit(T))))*min(size(A)...))*iszero(atol)) where T
-    m, n = size(A)
-    Tout = typeof(zero(T)/sqrt(oneunit(T) + oneunit(T)))
-    if m == 0 || n == 0
-        return similar(A, Tout, (n, m))
-    end
-    if isdiag(A)
-        indA = diagind(A)
-        dA = view(A, indA)
-        maxabsA = maximum(abs, dA)
-        tol = max(rtol * maxabsA, atol)
-        B = fill!(similar(A, Tout, (n, m)), 0)
-        indB = diagind(B)
-        B[indB] .= (x -> abs(x) > tol ? pinv(x) : zero(x)).(dA)
-        return B
-    end
-    SVD         = svd(A)
-    tol         = max(rtol*maximum(SVD.S), atol)
-    Stype       = eltype(SVD.S)
-    Sinv        = fill!(similar(A, Stype, length(SVD.S)), 0)
-    index       = SVD.S .> tol
-    Sinv[index] .= pinv.(view(SVD.S, index))
-    return SVD.Vt' * (Diagonal(Sinv) * SVD.U')
-end
-function pinv(x::Number)
-    xi = inv(x)
-    return ifelse(isfinite(xi), xi, zero(xi))
-end
-
-## Basis for null space
-
-"""
-    nullspace(M; atol::Real=0, rtol::Real=atol>0 ? 0 : n*ϵ)
-    nullspace(M, rtol::Real) = nullspace(M; rtol=rtol) # to be deprecated in Julia 2.0
-
-Computes a basis for the nullspace of `M` by including the singular
-vectors of `M` whose singular values have magnitudes smaller than `max(atol, rtol*σ₁)`,
-where `σ₁` is `M`'s largest singular value.
-
-By default, the relative tolerance `rtol` is `n*ϵ`, where `n`
-is the size of the smallest dimension of `M`, and `ϵ` is the [`eps`](@ref) of
-the element type of `M`.
-
-# Examples
-```jldoctest
-julia> M = [1 0 0; 0 1 0; 0 0 0]
-3×3 Matrix{Int64}:
- 1  0  0
- 0  1  0
- 0  0  0
-
-julia> nullspace(M)
-3×1 Matrix{Float64}:
- 0.0
- 0.0
- 1.0
-
-julia> nullspace(M, rtol=3)
-3×3 Matrix{Float64}:
- 0.0  1.0  0.0
- 1.0  0.0  0.0
- 0.0  0.0  1.0
-
-julia> nullspace(M, atol=0.95)
-3×1 Matrix{Float64}:
- 0.0
- 0.0
- 1.0
-```
-"""
-function nullspace(A::AbstractVecOrMat; atol::Real = 0.0, rtol::Real = (min(size(A, 1), size(A, 2))*eps(real(float(oneunit(eltype(A))))))*iszero(atol))
-    m, n = size(A, 1), size(A, 2)
-    (m == 0 || n == 0) && return Matrix{eigtype(eltype(A))}(I, n, n)
-    SVD = svd(A; full=true)
-    tol = max(atol, SVD.S[1]*rtol)
-    indstart = sum(s -> s .> tol, SVD.S) + 1
-    return copy((@view SVD.Vt[indstart:end,:])')
-end
-
-"""
-    cond(M, p::Real=2)
-
-Condition number of the matrix `M`, computed using the operator `p`-norm. Valid values for
-`p` are `1`, `2` (default), or `Inf`.
-"""
-function cond(A::AbstractMatrix, p::Real=2)
-    if p == 2
-        v = svdvals(A)
-        maxv = maximum(v)
-        return iszero(maxv) ? oftype(real(maxv), Inf) : maxv / minimum(v)
-    elseif p == 1 || p == Inf
-        checksquare(A)
-        try
-            Ainv = inv(A)
-            return opnorm(A, p)*opnorm(Ainv, p)
-        catch e
-            if isa(e, LAPACKException) || isa(e, SingularException)
-                return convert(float(real(eltype(A))), Inf)
-            else
-                rethrow()
-            end
-        end
-    end
-    throw(ArgumentError("p-norm must be 1, 2 or Inf, got $p"))
-end
-
-## Lyapunov and Sylvester equation
-
-# AX + XB + C = 0
-
-"""
-    sylvester(A, B, C)
-
-Computes the solution `X` to the Sylvester equation `AX + XB + C = 0`, where `A`, `B` and
-`C` have compatible dimensions and `A` and `-B` have no eigenvalues with equal real part.
-
-# Examples
-```jldoctest
-julia> A = [3. 4.; 5. 6]
-2×2 Matrix{Float64}:
- 3.0  4.0
- 5.0  6.0
-
-julia> B = [1. 1.; 1. 2.]
-2×2 Matrix{Float64}:
- 1.0  1.0
- 1.0  2.0
-
-julia> C = [1. 2.; -2. 1]
-2×2 Matrix{Float64}:
-  1.0  2.0
- -2.0  1.0
-
-julia> X = sylvester(A, B, C)
-2×2 Matrix{Float64}:
- -4.46667   1.93333
-  3.73333  -1.8
-
-julia> A*X + X*B ≈ -C
-true
-```
-"""
-function sylvester(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix)
-    T = promote_type(float(eltype(A)), float(eltype(B)), float(eltype(C)))
-    return sylvester(copy_similar(A, T), copy_similar(B, T), copy_similar(C, T))
-end
-function sylvester(A::AbstractMatrix{T}, B::AbstractMatrix{T}, C::AbstractMatrix{T}) where {T<:BlasFloat}
-    RA, QA = schur(A)
-    RB, QB = schur(B)
-    D = QA' * C * QB
-    D .= .-D
-    Y, scale = LAPACK.trsyl!('N', 'N', RA, RB, D)
-    rmul!(QA * Y * QB', inv(scale))
-end
-
-Base.@propagate_inbounds function _sylvester_2x1!(A, B, C)
-    b = B[1]
-    a21, a12 = A[2, 1], A[1, 2]
-    m11 = b + A[1, 1]
-    m22 = b + A[2, 2]
-    d = m11 * m22 - a12 * a21
-    c1, c2 = C
-    C[1] = (a12 * c2 - m22 * c1) / d
-    C[2] = (a21 * c1 - m11 * c2) / d
-    return C
-end
-Base.@propagate_inbounds function _sylvester_1x2!(A, B, C)
-    a = A[1]
-    b21, b12 = B[2, 1], B[1, 2]
-    m11 = a + B[1, 1]
-    m22 = a + B[2, 2]
-    d = m11 * m22 - b21 * b12
-    c1, c2 = C
-    C[1] = (b21 * c2 - m22 * c1) / d
-    C[2] = (b12 * c1 - m11 * c2) / d
-    return C
-end
-function _sylvester_2x2!(A, B, C)
-    _, scale = LAPACK.trsyl!('N', 'N', A, B, C)
-    rmul!(C, -inv(scale))
-    return C
-end
-
-sylvester(a::Union{Real,Complex}, b::Union{Real,Complex}, c::Union{Real,Complex}) = -c / (a + b)
-
-# AX + XA' + C = 0
-
-"""
-    lyap(A, C)
-
-Computes the solution `X` to the continuous Lyapunov equation `AX + XA' + C = 0`, where no
-eigenvalue of `A` has a zero real part and no two eigenvalues are negative complex
-conjugates of each other.
-
-# Examples
-```jldoctest
-julia> A = [3. 4.; 5. 6]
-2×2 Matrix{Float64}:
- 3.0  4.0
- 5.0  6.0
-
-julia> B = [1. 1.; 1. 2.]
-2×2 Matrix{Float64}:
- 1.0  1.0
- 1.0  2.0
-
-julia> X = lyap(A, B)
-2×2 Matrix{Float64}:
-  0.5  -0.5
- -0.5   0.25
-
-julia> A*X + X*A' ≈ -B
-true
-```
-"""
-function lyap(A::AbstractMatrix, C::AbstractMatrix)
-    T = promote_type(float(eltype(A)), float(eltype(C)))
-    return lyap(copy_similar(A, T), copy_similar(C, T))
-end
-function lyap(A::AbstractMatrix{T}, C::AbstractMatrix{T}) where {T<:BlasFloat}
-    R, Q = schur(A)
-    D = Q' * C * Q
-    D .= .-D
-    Y, scale = LAPACK.trsyl!('N', T <: Complex ? 'C' : 'T', R, R, D)
-    rmul!(Q * Y * Q', inv(scale))
-end
-lyap(a::Union{Real,Complex}, c::Union{Real,Complex}) = -c/(2real(a))
diff --git a/stdlib/LinearAlgebra/src/deprecated.jl b/stdlib/LinearAlgebra/src/deprecated.jl
deleted file mode 100644
index 28c090634a2d8..0000000000000
--- a/stdlib/LinearAlgebra/src/deprecated.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# To be deprecated in 2.0
-rank(A::AbstractMatrix, tol::Real) = rank(A,rtol=tol)
-nullspace(A::AbstractVector, tol::Real) = nullspace(reshape(A, length(A), 1), rtol= tol)
-nullspace(A::AbstractMatrix, tol::Real) = nullspace(A, rtol=tol)
-pinv(A::AbstractMatrix{T}, tol::Real) where T = pinv(A, rtol=tol)
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
deleted file mode 100644
index 37359fd1074f8..0000000000000
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ /dev/null
@@ -1,921 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Diagonal matrices
-
-struct Diagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
-    diag::V
-
-    function Diagonal{T,V}(diag) where {T,V<:AbstractVector{T}}
-        require_one_based_indexing(diag)
-        new{T,V}(diag)
-    end
-end
-Diagonal{T,V}(d::Diagonal) where {T,V<:AbstractVector{T}} = Diagonal{T,V}(d.diag)
-Diagonal(v::AbstractVector{T}) where {T} = Diagonal{T,typeof(v)}(v)
-Diagonal{T}(v::AbstractVector) where {T} = Diagonal(convert(AbstractVector{T}, v)::AbstractVector{T})
-
-function Base.promote_rule(A::Type{<:Diagonal{<:Any,V}}, B::Type{<:Diagonal{<:Any,W}}) where {V,W}
-    X = promote_type(V, W)
-    T = eltype(X)
-    isconcretetype(T) && return Diagonal{T,X}
-    return typejoin(A, B)
-end
-
-"""
-    Diagonal(V::AbstractVector)
-
-Construct a lazy matrix with `V` as its diagonal.
-
-See also [`UniformScaling`](@ref) for the lazy identity matrix `I`,
-[`diagm`](@ref) to make a dense matrix, and [`diag`](@ref) to extract diagonal elements.
-
-# Examples
-```jldoctest
-julia> d = Diagonal([1, 10, 100])
-3×3 Diagonal{$Int, Vector{$Int}}:
- 1   ⋅    ⋅
- ⋅  10    ⋅
- ⋅   ⋅  100
-
-julia> diagm([7, 13])
-2×2 Matrix{$Int}:
- 7   0
- 0  13
-
-julia> ans + I
-2×2 Matrix{Int64}:
- 8   0
- 0  14
-
-julia> I(2)
-2×2 Diagonal{Bool, Vector{Bool}}:
- 1  ⋅
- ⋅  1
-```
-
-Note that a one-column matrix is not treated like a vector, but instead calls the
-method `Diagonal(A::AbstractMatrix)` which extracts 1-element `diag(A)`:
-
-```jldoctest
-julia> A = transpose([7.0 13.0])
-2×1 transpose(::Matrix{Float64}) with eltype Float64:
-  7.0
- 13.0
-
-julia> Diagonal(A)
-1×1 Diagonal{Float64, Vector{Float64}}:
- 7.0
-```
-"""
-Diagonal(V::AbstractVector)
-
-"""
-    Diagonal(A::AbstractMatrix)
-
-Construct a matrix from the diagonal of `A`.
-
-# Examples
-```jldoctest
-julia> A = permutedims(reshape(1:15, 5, 3))
-3×5 Matrix{Int64}:
-  1   2   3   4   5
-  6   7   8   9  10
- 11  12  13  14  15
-
-julia> Diagonal(A)
-3×3 Diagonal{$Int, Vector{$Int}}:
- 1  ⋅   ⋅
- ⋅  7   ⋅
- ⋅  ⋅  13
-
-julia> diag(A, 2)
-3-element Vector{$Int}:
-  3
-  9
- 15
-```
-"""
-Diagonal(A::AbstractMatrix) = Diagonal(diag(A))
-Diagonal{T}(A::AbstractMatrix) where T = Diagonal{T}(diag(A))
-function convert(::Type{T}, A::AbstractMatrix) where T<:Diagonal
-    checksquare(A)
-    isdiag(A) ? T(A) : throw(InexactError(:convert, T, A))
-end
-
-Diagonal(D::Diagonal) = D
-Diagonal{T}(D::Diagonal{T}) where {T} = D
-Diagonal{T}(D::Diagonal) where {T} = Diagonal{T}(D.diag)
-
-AbstractMatrix{T}(D::Diagonal) where {T} = Diagonal{T}(D)
-Matrix(D::Diagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(D)
-Array(D::Diagonal{T}) where {T} = Matrix(D)
-function Matrix{T}(D::Diagonal) where {T}
-    n = size(D, 1)
-    B = Matrix{T}(undef, n, n)
-    n > 1 && fill!(B, zero(T))
-    @inbounds for i in 1:n
-        B[i,i] = D.diag[i]
-    end
-    return B
-end
-
-"""
-    Diagonal{T}(undef, n)
-
-Construct an uninitialized `Diagonal{T}` of length `n`. See `undef`.
-"""
-Diagonal{T}(::UndefInitializer, n::Integer) where T = Diagonal(Vector{T}(undef, n))
-
-similar(D::Diagonal, ::Type{T}) where {T} = Diagonal(similar(D.diag, T))
-similar(D::Diagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(D.diag, T, dims)
-
-copyto!(D1::Diagonal, D2::Diagonal) = (copyto!(D1.diag, D2.diag); D1)
-
-size(D::Diagonal) = (n = length(D.diag); (n,n))
-
-function size(D::Diagonal,d::Integer)
-    if d<1
-        throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    end
-    return d<=2 ? length(D.diag) : 1
-end
-
-@inline function Base.isassigned(D::Diagonal, i::Int, j::Int)
-    @boundscheck checkbounds(Bool, D, i, j) || return false
-    if i == j
-        @inbounds r = isassigned(D.diag, i)
-    else
-        r = true
-    end
-    r
-end
-
-@inline function Base.isstored(D::Diagonal, i::Int, j::Int)
-    @boundscheck checkbounds(D, i, j)
-    if i == j
-        @inbounds r = Base.isstored(D.diag, i)
-    else
-        r = false
-    end
-    r
-end
-
-@inline function getindex(D::Diagonal, i::Int, j::Int)
-    @boundscheck checkbounds(D, i, j)
-    if i == j
-        @inbounds r = D.diag[i]
-    else
-        r = diagzero(D, i, j)
-    end
-    r
-end
-diagzero(::Diagonal{T}, i, j) where {T} = zero(T)
-diagzero(D::Diagonal{<:AbstractMatrix{T}}, i, j) where {T} = zeros(T, size(D.diag[i], 1), size(D.diag[j], 2))
-
-function setindex!(D::Diagonal, v, i::Int, j::Int)
-    @boundscheck checkbounds(D, i, j)
-    if i == j
-        @inbounds D.diag[i] = v
-    elseif !iszero(v)
-        throw(ArgumentError("cannot set off-diagonal entry ($i, $j) to a nonzero value ($v)"))
-    end
-    return v
-end
-
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::Diagonal,i::Integer,j::Integer,s::AbstractString)
-    i==j ? s : Base.replace_with_centered_mark(s)
-end
-
-parent(D::Diagonal) = D.diag
-
-ishermitian(D::Diagonal{<:Real}) = true
-ishermitian(D::Diagonal{<:Number}) = isreal(D.diag)
-ishermitian(D::Diagonal) = all(ishermitian, D.diag)
-issymmetric(D::Diagonal{<:Number}) = true
-issymmetric(D::Diagonal) = all(issymmetric, D.diag)
-isposdef(D::Diagonal) = all(isposdef, D.diag)
-
-factorize(D::Diagonal) = D
-
-real(D::Diagonal) = Diagonal(real(D.diag))
-imag(D::Diagonal) = Diagonal(imag(D.diag))
-
-iszero(D::Diagonal) = all(iszero, D.diag)
-isone(D::Diagonal) = all(isone, D.diag)
-isdiag(D::Diagonal) = all(isdiag, D.diag)
-isdiag(D::Diagonal{<:Number}) = true
-istriu(D::Diagonal, k::Integer=0) = k <= 0 || iszero(D.diag) ? true : false
-istril(D::Diagonal, k::Integer=0) = k >= 0 || iszero(D.diag) ? true : false
-function triu!(D::Diagonal{T}, k::Integer=0) where T
-    n = size(D,1)
-    if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
-    elseif k > 0
-        fill!(D.diag, zero(T))
-    end
-    return D
-end
-
-function tril!(D::Diagonal{T}, k::Integer=0) where T
-    n = size(D,1)
-    if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
-    elseif k < 0
-        fill!(D.diag, zero(T))
-    end
-    return D
-end
-
-(==)(Da::Diagonal, Db::Diagonal) = Da.diag == Db.diag
-(-)(A::Diagonal) = Diagonal(-A.diag)
-(+)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag + Db.diag)
-(-)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag - Db.diag)
-
-for f in (:+, :-)
-    @eval function $f(D::Diagonal, S::Symmetric)
-        return Symmetric($f(D, S.data), sym_uplo(S.uplo))
-    end
-    @eval function $f(S::Symmetric, D::Diagonal)
-        return Symmetric($f(S.data, D), sym_uplo(S.uplo))
-    end
-    @eval function $f(D::Diagonal{<:Real}, H::Hermitian)
-        return Hermitian($f(D, H.data), sym_uplo(H.uplo))
-    end
-    @eval function $f(H::Hermitian, D::Diagonal{<:Real})
-        return Hermitian($f(H.data, D), sym_uplo(H.uplo))
-    end
-end
-
-(*)(x::Number, D::Diagonal) = Diagonal(x * D.diag)
-(*)(D::Diagonal, x::Number) = Diagonal(D.diag * x)
-(/)(D::Diagonal, x::Number) = Diagonal(D.diag / x)
-(\)(x::Number, D::Diagonal) = Diagonal(x \ D.diag)
-(^)(D::Diagonal, a::Number) = Diagonal(D.diag .^ a)
-(^)(D::Diagonal, a::Real) = Diagonal(D.diag .^ a) # for disambiguation
-(^)(D::Diagonal, a::Integer) = Diagonal(D.diag .^ a) # for disambiguation
-Base.literal_pow(::typeof(^), D::Diagonal, valp::Val) =
-    Diagonal(Base.literal_pow.(^, D.diag, valp)) # for speed
-Base.literal_pow(::typeof(^), D::Diagonal, ::Val{-1}) = inv(D) # for disambiguation
-
-function _muldiag_size_check(A, B)
-    nA = size(A, 2)
-    mB = size(B, 1)
-    @noinline throw_dimerr(::AbstractMatrix, nA, mB) = throw(DimensionMismatch("second dimension of A, $nA, does not match first dimension of B, $mB"))
-    @noinline throw_dimerr(::AbstractVector, nA, mB) = throw(DimensionMismatch("second dimension of D, $nA, does not match length of V, $mB"))
-    nA == mB || throw_dimerr(B, nA, mB)
-    return nothing
-end
-# the output matrix should have the same size as the non-diagonal input matrix or vector
-@noinline throw_dimerr(szC, szA) = throw(DimensionMismatch("output matrix has size: $szC, but should have size $szA"))
-_size_check_out(C, ::Diagonal, A) = _size_check_out(C, A)
-_size_check_out(C, A, ::Diagonal) = _size_check_out(C, A)
-_size_check_out(C, A::Diagonal, ::Diagonal) = _size_check_out(C, A)
-function _size_check_out(C, A)
-    szA = size(A)
-    szC = size(C)
-    szA == szC || throw_dimerr(szC, szA)
-    return nothing
-end
-function _muldiag_size_check(C, A, B)
-    _muldiag_size_check(A, B)
-    _size_check_out(C, A, B)
-end
-
-function (*)(Da::Diagonal, Db::Diagonal)
-    _muldiag_size_check(Da, Db)
-    return Diagonal(Da.diag .* Db.diag)
-end
-
-function (*)(D::Diagonal, V::AbstractVector)
-    _muldiag_size_check(D, V)
-    return D.diag .* V
-end
-
-(*)(A::AbstractMatrix, D::Diagonal) =
-    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), A, D)
-(*)(A::HermOrSym, D::Diagonal) =
-    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), A, D)
-(*)(D::Diagonal, A::AbstractMatrix) =
-    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), D, A)
-(*)(D::Diagonal, A::HermOrSym) =
-    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), D, A)
-
-rmul!(A::AbstractMatrix, D::Diagonal) = @inline mul!(A, A, D)
-lmul!(D::Diagonal, B::AbstractVecOrMat) = @inline mul!(B, D, B)
-
-function (*)(A::AdjOrTransAbsMat, D::Diagonal)
-    Ac = copy_similar(A, promote_op(*, eltype(A), eltype(D.diag)))
-    rmul!(Ac, D)
-end
-function (*)(D::Diagonal, A::AdjOrTransAbsMat)
-    Ac = copy_similar(A, promote_op(*, eltype(A), eltype(D.diag)))
-    lmul!(D, Ac)
-end
-
-function __muldiag!(out, D::Diagonal, B, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-    require_one_based_indexing(out, B)
-    alpha, beta = _add.alpha, _add.beta
-    if iszero(alpha)
-        _rmul_or_fill!(out, beta)
-    else
-        if bis0
-            @inbounds for j in axes(B, 2)
-                @simd for i in axes(B, 1)
-                    out[i,j] = D.diag[i] * B[i,j] * alpha
-                end
-            end
-        else
-            @inbounds for j in axes(B, 2)
-                @simd for i in axes(B, 1)
-                    out[i,j] = D.diag[i] * B[i,j] * alpha + out[i,j] * beta
-                end
-            end
-        end
-    end
-    return out
-end
-function __muldiag!(out, A, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-    require_one_based_indexing(out, A)
-    alpha, beta = _add.alpha, _add.beta
-    if iszero(alpha)
-        _rmul_or_fill!(out, beta)
-    else
-        if bis0
-            @inbounds for j in axes(A, 2)
-                dja = D.diag[j] * alpha
-                @simd for i in axes(A, 1)
-                    out[i,j] = A[i,j] * dja
-                end
-            end
-        else
-            @inbounds for j in axes(A, 2)
-                dja = D.diag[j] * alpha
-                @simd for i in axes(A, 1)
-                    out[i,j] = A[i,j] * dja + out[i,j] * beta
-                end
-            end
-        end
-    end
-    return out
-end
-function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-    d1 = D1.diag
-    d2 = D2.diag
-    alpha, beta = _add.alpha, _add.beta
-    if iszero(alpha)
-        _rmul_or_fill!(out.diag, beta)
-    else
-        if bis0
-            @inbounds @simd for i in eachindex(out.diag)
-                out.diag[i] = d1[i] * d2[i] * alpha
-            end
-        else
-            @inbounds @simd for i in eachindex(out.diag)
-                out.diag[i] = d1[i] * d2[i] * alpha + out.diag[i] * beta
-            end
-        end
-    end
-    return out
-end
-function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-    require_one_based_indexing(out)
-    alpha, beta = _add.alpha, _add.beta
-    mA = size(D1, 1)
-    d1 = D1.diag
-    d2 = D2.diag
-    _rmul_or_fill!(out, beta)
-    if !iszero(alpha)
-        @inbounds @simd for i in 1:mA
-            out[i,i] += d1[i] * d2[i] * alpha
-        end
-    end
-    return out
-end
-
-function _mul_diag!(out, A, B, _add)
-    _muldiag_size_check(out, A, B)
-    __muldiag!(out, A, B, _add)
-    return out
-end
-
-_mul!(out::AbstractVecOrMat, D::Diagonal, V::AbstractVector, _add) =
-    _mul_diag!(out, D, V, _add)
-_mul!(out::AbstractMatrix, D::Diagonal, B::AbstractMatrix, _add) =
-    _mul_diag!(out, D, B, _add)
-_mul!(out::AbstractMatrix, A::AbstractMatrix, D::Diagonal, _add) =
-    _mul_diag!(out, A, D, _add)
-_mul!(C::Diagonal, Da::Diagonal, Db::Diagonal, _add) =
-    _mul_diag!(C, Da, Db, _add)
-_mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, _add) =
-    _mul_diag!(C, Da, Db, _add)
-
-function (*)(Da::Diagonal, A::AbstractMatrix, Db::Diagonal)
-    _muldiag_size_check(Da, A)
-    _muldiag_size_check(A, Db)
-    return broadcast(*, Da.diag, A, permutedims(Db.diag))
-end
-
-function (*)(Da::Diagonal, Db::Diagonal, Dc::Diagonal)
-    _muldiag_size_check(Da, Db)
-    _muldiag_size_check(Db, Dc)
-    return Diagonal(Da.diag .* Db.diag .* Dc.diag)
-end
-
-/(A::AbstractVecOrMat, D::Diagonal) = _rdiv!(similar(A, _init_eltype(/, eltype(A), eltype(D))), A, D)
-/(A::HermOrSym, D::Diagonal) = _rdiv!(similar(A, _init_eltype(/, eltype(A), eltype(D)), size(A)), A, D)
-
-rdiv!(A::AbstractVecOrMat, D::Diagonal) = @inline _rdiv!(A, A, D)
-# avoid copy when possible via internal 3-arg backend
-function _rdiv!(B::AbstractVecOrMat, A::AbstractVecOrMat, D::Diagonal)
-    require_one_based_indexing(A)
-    dd = D.diag
-    m, n = size(A, 1), size(A, 2)
-    if (k = length(dd)) != n
-        throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
-    end
-    @inbounds for j in 1:n
-        ddj = dd[j]
-        iszero(ddj) && throw(SingularException(j))
-        for i in 1:m
-            B[i, j] = A[i, j] / ddj
-        end
-    end
-    B
-end
-
-function \(D::Diagonal, B::AbstractVector)
-    j = findfirst(iszero, D.diag)
-    isnothing(j) || throw(SingularException(j))
-    return D.diag .\ B
-end
-\(D::Diagonal, B::AbstractMatrix) = ldiv!(similar(B, _init_eltype(\, eltype(D), eltype(B))), D, B)
-\(D::Diagonal, B::HermOrSym) = ldiv!(similar(B, _init_eltype(\, eltype(D), eltype(B)), size(B)), D, B)
-
-ldiv!(D::Diagonal, B::AbstractVecOrMat) = @inline ldiv!(B, D, B)
-function ldiv!(B::AbstractVecOrMat, D::Diagonal, A::AbstractVecOrMat)
-    require_one_based_indexing(A, B)
-    dd = D.diag
-    d = length(dd)
-    m, n = size(A, 1), size(A, 2)
-    m′, n′ = size(B, 1), size(B, 2)
-    m == d || throw(DimensionMismatch("right hand side has $m rows but D is $d by $d"))
-    (m, n) == (m′, n′) || throw(DimensionMismatch("expect output to be $m by $n, but got $m′ by $n′"))
-    j = findfirst(iszero, D.diag)
-    isnothing(j) || throw(SingularException(j))
-    @inbounds for j = 1:n, i = 1:m
-        B[i, j] = dd[i] \ A[i, j]
-    end
-    B
-end
-
-# Optimizations for \, / between Diagonals
-\(D::Diagonal, B::Diagonal) = ldiv!(similar(B, promote_op(\, eltype(D), eltype(B))), D, B)
-/(A::Diagonal, D::Diagonal) = _rdiv!(similar(A, promote_op(/, eltype(A), eltype(D))), A, D)
-function _rdiv!(Dc::Diagonal, Db::Diagonal, Da::Diagonal)
-    n, k = length(Db.diag), length(Da.diag)
-    n == k || throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
-    j = findfirst(iszero, Da.diag)
-    isnothing(j) || throw(SingularException(j))
-    Dc.diag .= Db.diag ./ Da.diag
-    Dc
-end
-ldiv!(Dc::Diagonal, Da::Diagonal, Db::Diagonal) = Diagonal(ldiv!(Dc.diag, Da, Db.diag))
-
-# optimizations for (Sym)Tridiagonal and Diagonal
-@propagate_inbounds _getudiag(T::Tridiagonal, i) = T.du[i]
-@propagate_inbounds _getudiag(S::SymTridiagonal, i) = S.ev[i]
-@propagate_inbounds _getdiag(T::Tridiagonal, i) = T.d[i]
-@propagate_inbounds _getdiag(S::SymTridiagonal, i) = symmetric(S.dv[i], :U)::symmetric_type(eltype(S.dv))
-@propagate_inbounds _getldiag(T::Tridiagonal, i) = T.dl[i]
-@propagate_inbounds _getldiag(S::SymTridiagonal, i) = transpose(S.ev[i])
-
-function (\)(D::Diagonal, S::SymTridiagonal)
-    T = promote_op(\, eltype(D), eltype(S))
-    du = similar(S.ev, T, max(length(S.dv)-1, 0))
-    d  = similar(S.dv, T, length(S.dv))
-    dl = similar(S.ev, T, max(length(S.dv)-1, 0))
-    ldiv!(Tridiagonal(dl, d, du), D, S)
-end
-(\)(D::Diagonal, T::Tridiagonal) = ldiv!(similar(T, promote_op(\, eltype(D), eltype(T))), D, T)
-function ldiv!(T::Tridiagonal, D::Diagonal, S::Union{SymTridiagonal,Tridiagonal})
-    m = size(S, 1)
-    dd = D.diag
-    if (k = length(dd)) != m
-        throw(DimensionMismatch("diagonal matrix is $k by $k but right hand side has $m rows"))
-    end
-    if length(T.d) != m
-        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
-    end
-    m == 0 && return T
-    j = findfirst(iszero, dd)
-    isnothing(j) || throw(SingularException(j))
-    ddj = dd[1]
-    T.d[1] = ddj \ _getdiag(S, 1)
-    @inbounds if m > 1
-        T.du[1] = ddj \ _getudiag(S, 1)
-        for j in 2:m-1
-            ddj = dd[j]
-            T.dl[j-1] = ddj \ _getldiag(S, j-1)
-            T.d[j]  = ddj \ _getdiag(S, j)
-            T.du[j] = ddj \ _getudiag(S, j)
-        end
-        ddj = dd[m]
-        T.dl[m-1] = ddj \ _getldiag(S, m-1)
-        T.d[m] = ddj \ _getdiag(S, m)
-    end
-    return T
-end
-
-function (/)(S::SymTridiagonal, D::Diagonal)
-    T = promote_op(\, eltype(D), eltype(S))
-    du = similar(S.ev, T, max(length(S.dv)-1, 0))
-    d  = similar(S.dv, T, length(S.dv))
-    dl = similar(S.ev, T, max(length(S.dv)-1, 0))
-    _rdiv!(Tridiagonal(dl, d, du), S, D)
-end
-(/)(T::Tridiagonal, D::Diagonal) = _rdiv!(similar(T, promote_op(/, eltype(T), eltype(D))), T, D)
-function _rdiv!(T::Tridiagonal, S::Union{SymTridiagonal,Tridiagonal}, D::Diagonal)
-    n = size(S, 2)
-    dd = D.diag
-    if (k = length(dd)) != n
-        throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
-    end
-    if length(T.d) != n
-        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
-    end
-    n == 0 && return T
-    j = findfirst(iszero, dd)
-    isnothing(j) || throw(SingularException(j))
-    ddj = dd[1]
-    T.d[1] = _getdiag(S, 1) / ddj
-    @inbounds if n > 1
-        T.dl[1] = _getldiag(S, 1) / ddj
-        for j in 2:n-1
-            ddj = dd[j]
-            T.dl[j] = _getldiag(S, j) / ddj
-            T.d[j] = _getdiag(S, j) / ddj
-            T.du[j-1] = _getudiag(S, j-1) / ddj
-        end
-        ddj = dd[n]
-        T.d[n] = _getdiag(S, n) / ddj
-        T.du[n-1] = _getudiag(S, n-1) / ddj
-    end
-    return T
-end
-
-# Optimizations for [l/r]mul!, l/rdiv!, *, / and \ between Triangular and Diagonal.
-# These functions are generally more efficient if we calculate the whole data field.
-# The following code implements them in a unified pattern to avoid missing.
-@inline function _setdiag!(data, f, diag, diag′ = nothing)
-    @inbounds for i in 1:length(diag)
-        data[i,i] = isnothing(diag′) ? f(diag[i]) : f(diag[i],diag′[i])
-    end
-    data
-end
-for Tri in (:UpperTriangular, :LowerTriangular)
-    UTri = Symbol(:Unit, Tri)
-    # 2 args
-    for (fun, f) in zip((:*, :rmul!, :rdiv!, :/), (:identity, :identity, :inv, :inv))
-        @eval $fun(A::$Tri, D::Diagonal) = $Tri($fun(A.data, D))
-        @eval $fun(A::$UTri, D::Diagonal) = $Tri(_setdiag!($fun(A.data, D), $f, D.diag))
-    end
-    for (fun, f) in zip((:*, :lmul!, :ldiv!, :\), (:identity, :identity, :inv, :inv))
-        @eval $fun(D::Diagonal, A::$Tri) = $Tri($fun(D, A.data))
-        @eval $fun(D::Diagonal, A::$UTri) = $Tri(_setdiag!($fun(D, A.data), $f, D.diag))
-    end
-    # 3-arg ldiv!
-    @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data))
-    @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag))
-    # 3-arg mul! is disambiguated in special.jl
-    # 5-arg mul!
-    @eval _mul!(C::$Tri, D::Diagonal, A::$Tri, _add) = $Tri(mul!(C.data, D, A.data, _add.alpha, _add.beta))
-    @eval function _mul!(C::$Tri, D::Diagonal, A::$UTri, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-        α, β = _add.alpha, _add.beta
-        iszero(α) && return _rmul_or_fill!(C, β)
-        diag′ = bis0 ? nothing : diag(C)
-        data = mul!(C.data, D, A.data, α, β)
-        $Tri(_setdiag!(data, _add, D.diag, diag′))
-    end
-    @eval _mul!(C::$Tri, A::$Tri, D::Diagonal, _add) = $Tri(mul!(C.data, A.data, D, _add.alpha, _add.beta))
-    @eval function _mul!(C::$Tri, A::$UTri, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-        α, β = _add.alpha, _add.beta
-        iszero(α) && return _rmul_or_fill!(C, β)
-        diag′ = bis0 ? nothing : diag(C)
-        data = mul!(C.data, A.data, D, α, β)
-        $Tri(_setdiag!(data, _add, D.diag, diag′))
-    end
-end
-
-@inline function kron!(C::AbstractMatrix, A::Diagonal, B::Diagonal)
-    valA = A.diag; nA = length(valA)
-    valB = B.diag; nB = length(valB)
-    nC = checksquare(C)
-    @boundscheck nC == nA*nB ||
-        throw(DimensionMismatch("expect C to be a $(nA*nB)x$(nA*nB) matrix, got size $(nC)x$(nC)"))
-    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
-    @inbounds for i = 1:nA, j = 1:nB
-        idx = (i-1)*nB+j
-        C[idx, idx] = valA[i] * valB[j]
-    end
-    return C
-end
-
-kron(A::Diagonal, B::Diagonal) = Diagonal(kron(A.diag, B.diag))
-
-function kron(A::Diagonal, B::SymTridiagonal)
-    kdv = kron(diag(A), B.dv)
-    # We don't need to drop the last element
-    kev = kron(diag(A), _pushzero(_evview(B)))
-    SymTridiagonal(kdv, kev)
-end
-function kron(A::Diagonal, B::Tridiagonal)
-    # `_droplast!` is only guaranteed to work with `Vector`
-    kd = _makevector(kron(diag(A), B.d))
-    kdl = _droplast!(_makevector(kron(diag(A), _pushzero(B.dl))))
-    kdu = _droplast!(_makevector(kron(diag(A), _pushzero(B.du))))
-    Tridiagonal(kdl, kd, kdu)
-end
-
-@inline function kron!(C::AbstractMatrix, A::Diagonal, B::AbstractMatrix)
-    require_one_based_indexing(B)
-    (mA, nA) = size(A)
-    (mB, nB) = size(B)
-    (mC, nC) = size(C)
-    @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
-        throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
-    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
-    m = 1
-    @inbounds for j = 1:nA
-        A_jj = A[j,j]
-        for k = 1:nB
-            for l = 1:mB
-                C[m] = A_jj * B[l,k]
-                m += 1
-            end
-            m += (nA - 1) * mB
-        end
-        m += mB
-    end
-    return C
-end
-
-@inline function kron!(C::AbstractMatrix, A::AbstractMatrix, B::Diagonal)
-    require_one_based_indexing(A)
-    (mA, nA) = size(A)
-    (mB, nB) = size(B)
-    (mC, nC) = size(C)
-    @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
-        throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
-    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
-    m = 1
-    @inbounds for j = 1:nA
-        for l = 1:mB
-            Bll = B[l,l]
-            for k = 1:mA
-                C[m] = A[k,j] * Bll
-                m += nB
-            end
-            m += 1
-        end
-        m -= nB
-    end
-    return C
-end
-
-conj(D::Diagonal) = Diagonal(conj(D.diag))
-transpose(D::Diagonal{<:Number}) = D
-transpose(D::Diagonal) = Diagonal(transpose.(D.diag))
-adjoint(D::Diagonal{<:Number}) = Diagonal(vec(adjoint(D.diag)))
-adjoint(D::Diagonal{<:Number,<:Base.ReshapedArray{<:Number,1,<:Adjoint}}) = Diagonal(adjoint(parent(D.diag)))
-adjoint(D::Diagonal) = Diagonal(adjoint.(D.diag))
-permutedims(D::Diagonal) = D
-permutedims(D::Diagonal, perm) = (Base.checkdims_perm(D, D, perm); D)
-
-function diag(D::Diagonal{T}, k::Integer=0) where T
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of k
-    if k == 0
-        return copyto!(similar(D.diag, length(D.diag)), D.diag)
-    elseif -size(D,1) <= k <= size(D,1)
-        return fill!(similar(D.diag, size(D,1)-abs(k)), zero(T))
-    else
-        throw(ArgumentError(string("requested diagonal, $k, must be at least $(-size(D, 1)) ",
-            "and at most $(size(D, 2)) for an $(size(D, 1))-by-$(size(D, 2)) matrix")))
-    end
-end
-tr(D::Diagonal) = sum(tr, D.diag)
-det(D::Diagonal) = prod(det, D.diag)
-function logdet(D::Diagonal{<:Complex}) # make sure branch cut is correct
-    z = sum(log, D.diag)
-    complex(real(z), rem2pi(imag(z), RoundNearest))
-end
-
-# Matrix functions
-for f in (:exp, :cis, :log, :sqrt,
-          :cos, :sin, :tan, :csc, :sec, :cot,
-          :cosh, :sinh, :tanh, :csch, :sech, :coth,
-          :acos, :asin, :atan, :acsc, :asec, :acot,
-          :acosh, :asinh, :atanh, :acsch, :asech, :acoth)
-    @eval $f(D::Diagonal) = Diagonal($f.(D.diag))
-end
-
-function inv(D::Diagonal{T}) where T
-    Di = similar(D.diag, typeof(inv(oneunit(T))))
-    for i = 1:length(D.diag)
-        if iszero(D.diag[i])
-            throw(SingularException(i))
-        end
-        Di[i] = inv(D.diag[i])
-    end
-    Diagonal(Di)
-end
-
-function pinv(D::Diagonal{T}) where T
-    Di = similar(D.diag, typeof(inv(oneunit(T))))
-    for i = 1:length(D.diag)
-        if !iszero(D.diag[i])
-            invD = inv(D.diag[i])
-            if isfinite(invD)
-                Di[i] = invD
-                continue
-            end
-        end
-        # fallback
-        Di[i] = zero(T)
-    end
-    Diagonal(Di)
-end
-function pinv(D::Diagonal{T}, tol::Real) where T
-    Di = similar(D.diag, typeof(inv(oneunit(T))))
-    if !isempty(D.diag)
-        maxabsD = maximum(abs, D.diag)
-        for i = 1:length(D.diag)
-            if abs(D.diag[i]) > tol*maxabsD
-                invD = inv(D.diag[i])
-                if isfinite(invD)
-                    Di[i] = invD
-                    continue
-                end
-            end
-            # fallback
-            Di[i] = zero(T)
-        end
-    end
-    Diagonal(Di)
-end
-
-#Eigensystem
-eigvals(D::Diagonal{<:Number}; permute::Bool=true, scale::Bool=true) = copy(D.diag)
-eigvals(D::Diagonal; permute::Bool=true, scale::Bool=true) =
-    [eigvals(x) for x in D.diag] #For block matrices, etc.
-eigvecs(D::Diagonal) = Matrix{eltype(D)}(I, size(D))
-function eigen(D::Diagonal; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=nothing)
-    if any(!isfinite, D.diag)
-        throw(ArgumentError("matrix contains Infs or NaNs"))
-    end
-    Td = Base.promote_op(/, eltype(D), eltype(D))
-    λ = eigvals(D)
-    if !isnothing(sortby)
-        p = sortperm(λ; alg=QuickSort, by=sortby)
-        λ = λ[p]
-        evecs = zeros(Td, size(D))
-        @inbounds for i in eachindex(p)
-            evecs[p[i],i] = one(Td)
-        end
-    else
-        evecs = Matrix{Td}(I, size(D))
-    end
-    Eigen(λ, evecs)
-end
-function eigen(Da::Diagonal, Db::Diagonal; sortby::Union{Function,Nothing}=nothing)
-    if any(!isfinite, Da.diag) || any(!isfinite, Db.diag)
-        throw(ArgumentError("matrices contain Infs or NaNs"))
-    end
-    if any(iszero, Db.diag)
-        throw(ArgumentError("right-hand side diagonal matrix is singular"))
-    end
-    return GeneralizedEigen(eigen(Db \ Da; sortby)...)
-end
-function eigen(A::AbstractMatrix, D::Diagonal; sortby::Union{Function,Nothing}=nothing)
-    if any(iszero, D.diag)
-        throw(ArgumentError("right-hand side diagonal matrix is singular"))
-    end
-    if size(A, 1) == size(A, 2) && isdiag(A)
-        return eigen(Diagonal(A), D; sortby)
-    elseif all(isposdef, D.diag)
-        S = promote_type(eigtype(eltype(A)), eltype(D))
-        return eigen(A, cholesky(Diagonal{S}(D)); sortby)
-    else
-        return eigen!(D \ A; sortby)
-    end
-end
-
-#Singular system
-svdvals(D::Diagonal{<:Number}) = sort!(abs.(D.diag), rev = true)
-svdvals(D::Diagonal) = [svdvals(v) for v in D.diag]
-function svd(D::Diagonal{T}) where {T<:Number}
-    d = D.diag
-    s = abs.(d)
-    piv = sortperm(s, rev = true)
-    S = s[piv]
-    Td  = typeof(oneunit(T)/oneunit(T))
-    U = zeros(Td, size(D))
-    Vt = copy(U)
-    for i in 1:length(d)
-        j = piv[i]
-        U[j,i] = d[j] / S[i]
-        Vt[i,j] = one(Td)
-    end
-    return SVD(U, S, Vt)
-end
-
-# disambiguation methods: * and / of Diagonal and Adj/Trans AbsVec
-*(u::AdjointAbsVec, D::Diagonal) = (D'u')'
-*(u::TransposeAbsVec, D::Diagonal) = transpose(transpose(D) * transpose(u))
-*(x::AdjointAbsVec,   D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
-*(x::TransposeAbsVec, D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
-/(u::AdjointAbsVec, D::Diagonal) = (D' \ u')'
-/(u::TransposeAbsVec, D::Diagonal) = transpose(transpose(D) \ transpose(u))
-# disambiguation methods: Call unoptimized version for user defined AbstractTriangular.
-*(A::AbstractTriangular, D::Diagonal) = @invoke *(A::AbstractMatrix, D::Diagonal)
-*(D::Diagonal, A::AbstractTriangular) = @invoke *(D::Diagonal, A::AbstractMatrix)
-
-dot(x::AbstractVector, D::Diagonal, y::AbstractVector) = _mapreduce_prod(dot, x, D, y)
-
-dot(A::Diagonal, B::Diagonal) = dot(A.diag, B.diag)
-function dot(D::Diagonal, B::AbstractMatrix)
-    size(D) == size(B) || throw(DimensionMismatch("Matrix sizes $(size(D)) and $(size(B)) differ"))
-    return dot(D.diag, view(B, diagind(B)))
-end
-
-dot(A::AbstractMatrix, B::Diagonal) = conj(dot(B, A))
-
-function _mapreduce_prod(f, x, D::Diagonal, y)
-    if !(length(x) == length(D.diag) == length(y))
-        throw(DimensionMismatch("x has length $(length(x)), D has size $(size(D)), and y has $(length(y))"))
-    end
-    if isempty(x) && isempty(D) && isempty(y)
-        return zero(promote_op(f, eltype(x), eltype(D), eltype(y)))
-    else
-        return mapreduce(t -> f(t[1], t[2], t[3]), +, zip(x, D.diag, y))
-    end
-end
-
-function cholesky!(A::Diagonal, ::NoPivot = NoPivot(); check::Bool = true)
-    info = 0
-    for (i, di) in enumerate(A.diag)
-        if isreal(di) && real(di) > 0
-            A.diag[i] = √di
-        elseif check
-            throw(PosDefException(i))
-        else
-            info = i
-            break
-        end
-    end
-    Cholesky(A, 'U', convert(BlasInt, info))
-end
-@deprecate cholesky!(A::Diagonal, ::Val{false}; check::Bool = true) cholesky!(A::Diagonal, NoPivot(); check) false
-@deprecate cholesky(A::Diagonal, ::Val{false}; check::Bool = true) cholesky(A::Diagonal, NoPivot(); check) false
-
-inv(C::Cholesky{<:Any,<:Diagonal}) = Diagonal(map(inv∘abs2, C.factors.diag))
-
-cholcopy(A::Diagonal) = copymutable_oftype(A, choltype(A))
-cholcopy(A::RealHermSymComplexHerm{<:Any,<:Diagonal}) = Diagonal(copy_similar(diag(A), choltype(A)))
-
-function getproperty(C::Cholesky{<:Any,<:Diagonal}, d::Symbol)
-    Cfactors = getfield(C, :factors)
-    if d in (:U, :L, :UL)
-        return Cfactors
-    else
-        return getfield(C, d)
-    end
-end
-
-Base._sum(A::Diagonal, ::Colon) = sum(A.diag)
-function Base._sum(A::Diagonal, dims::Integer)
-    res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
-    if dims <= 2
-        for i = 1:length(A.diag)
-            @inbounds res[i] = A.diag[i]
-        end
-    else
-        for i = 1:length(A.diag)
-            @inbounds res[i,i] = A.diag[i]
-        end
-    end
-    res
-end
-
-function logabsdet(A::Diagonal)
-     mapreduce(x -> (log(abs(x)), sign(x)), ((d1, s1), (d2, s2)) -> (d1 + d2, s1 * s2),
-               A.diag)
-end
-
-function Base.muladd(A::Diagonal, B::Diagonal, z::Diagonal)
-    Diagonal(A.diag .* B.diag .+ z.diag)
-end
diff --git a/stdlib/LinearAlgebra/src/eigen.jl b/stdlib/LinearAlgebra/src/eigen.jl
deleted file mode 100644
index 489bfa4665c7a..0000000000000
--- a/stdlib/LinearAlgebra/src/eigen.jl
+++ /dev/null
@@ -1,675 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Eigendecomposition
-"""
-    Eigen <: Factorization
-
-Matrix factorization type of the eigenvalue/spectral decomposition of a square
-matrix `A`. This is the return type of [`eigen`](@ref), the corresponding matrix
-factorization function.
-
-If `F::Eigen` is the factorization object, the eigenvalues can be obtained via
-`F.values` and the eigenvectors as the columns of the matrix `F.vectors`.
-(The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-# Examples
-```jldoctest
-julia> F = eigen([1.0 0.0 0.0; 0.0 3.0 0.0; 0.0 0.0 18.0])
-Eigen{Float64, Float64, Matrix{Float64}, Vector{Float64}}
-values:
-3-element Vector{Float64}:
-  1.0
-  3.0
- 18.0
-vectors:
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-
-julia> F.values
-3-element Vector{Float64}:
-  1.0
-  3.0
- 18.0
-
-julia> F.vectors
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-
-julia> vals, vecs = F; # destructuring via iteration
-
-julia> vals == F.values && vecs == F.vectors
-true
-```
-"""
-struct Eigen{T,V,S<:AbstractMatrix,U<:AbstractVector} <: Factorization{T}
-    values::U
-    vectors::S
-    Eigen{T,V,S,U}(values::AbstractVector{V}, vectors::AbstractMatrix{T}) where {T,V,S,U} =
-        new(values, vectors)
-end
-Eigen(values::AbstractVector{V}, vectors::AbstractMatrix{T}) where {T,V} =
-    Eigen{T,V,typeof(vectors),typeof(values)}(values, vectors)
-
-# Generalized eigenvalue problem.
-"""
-    GeneralizedEigen <: Factorization
-
-Matrix factorization type of the generalized eigenvalue/spectral decomposition of
-`A` and `B`. This is the return type of [`eigen`](@ref), the corresponding
-matrix factorization function, when called with two matrix arguments.
-
-If `F::GeneralizedEigen` is the factorization object, the eigenvalues can be obtained via
-`F.values` and the eigenvectors as the columns of the matrix `F.vectors`.
-(The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-# Examples
-```jldoctest
-julia> A = [1 0; 0 -1]
-2×2 Matrix{Int64}:
- 1   0
- 0  -1
-
-julia> B = [0 1; 1 0]
-2×2 Matrix{Int64}:
- 0  1
- 1  0
-
-julia> F = eigen(A, B)
-GeneralizedEigen{ComplexF64, ComplexF64, Matrix{ComplexF64}, Vector{ComplexF64}}
-values:
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-vectors:
-2×2 Matrix{ComplexF64}:
-  0.0+1.0im   0.0-1.0im
- -1.0+0.0im  -1.0-0.0im
-
-julia> F.values
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-
-julia> F.vectors
-2×2 Matrix{ComplexF64}:
-  0.0+1.0im   0.0-1.0im
- -1.0+0.0im  -1.0-0.0im
-
-julia> vals, vecs = F; # destructuring via iteration
-
-julia> vals == F.values && vecs == F.vectors
-true
-```
-"""
-struct GeneralizedEigen{T,V,S<:AbstractMatrix,U<:AbstractVector} <: Factorization{T}
-    values::U
-    vectors::S
-    GeneralizedEigen{T,V,S,U}(values::AbstractVector{V}, vectors::AbstractMatrix{T}) where {T,V,S,U} =
-        new(values, vectors)
-end
-GeneralizedEigen(values::AbstractVector{V}, vectors::AbstractMatrix{T}) where {T,V} =
-    GeneralizedEigen{T,V,typeof(vectors),typeof(values)}(values, vectors)
-
-# iteration for destructuring into components
-Base.iterate(S::Union{Eigen,GeneralizedEigen}) = (S.values, Val(:vectors))
-Base.iterate(S::Union{Eigen,GeneralizedEigen}, ::Val{:vectors}) = (S.vectors, Val(:done))
-Base.iterate(S::Union{Eigen,GeneralizedEigen}, ::Val{:done}) = nothing
-
-isposdef(A::Union{Eigen,GeneralizedEigen}) = isreal(A.values) && all(x -> x > 0, A.values)
-
-# pick a canonical ordering to avoid returning eigenvalues in "random" order
-# as is the LAPACK default (for complex λ — LAPACK sorts by λ for the Hermitian/Symmetric case)
-eigsortby(λ::Real) = λ
-eigsortby(λ::Complex) = (real(λ),imag(λ))
-function sorteig!(λ::AbstractVector, X::AbstractMatrix, sortby::Union{Function,Nothing}=eigsortby)
-    if sortby !== nothing && !issorted(λ, by=sortby)
-        p = sortperm(λ; alg=QuickSort, by=sortby)
-        permute!(λ, p)
-        Base.permutecols!!(X, p)
-    end
-    return λ, X
-end
-sorteig!(λ::AbstractVector, sortby::Union{Function,Nothing}=eigsortby) = sortby === nothing ? λ : sort!(λ, by=sortby)
-
-"""
-    eigen!(A; permute, scale, sortby)
-    eigen!(A, B; sortby)
-
-Same as [`eigen`](@ref), but saves space by overwriting the input `A` (and
-`B`), instead of creating a copy.
-"""
-function eigen!(A::StridedMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
-    n = size(A, 2)
-    n == 0 && return Eigen(zeros(T, 0), zeros(T, 0, 0))
-    issymmetric(A) && return eigen!(Symmetric(A), sortby=sortby)
-    A, WR, WI, VL, VR, _ = LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'V', 'N', A)
-    iszero(WI) && return Eigen(sorteig!(WR, VR, sortby)...)
-    evec = zeros(Complex{T}, n, n)
-    j = 1
-    while j <= n
-        if WI[j] == 0
-            evec[:,j] = view(VR, :, j)
-        else
-            for i = 1:n
-                evec[i,j]   = VR[i,j] + im*VR[i,j+1]
-                evec[i,j+1] = VR[i,j] - im*VR[i,j+1]
-            end
-            j += 1
-        end
-        j += 1
-    end
-    return Eigen(sorteig!(complex.(WR, WI), evec, sortby)...)
-end
-
-function eigen!(A::StridedMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
-    n = size(A, 2)
-    n == 0 && return Eigen(zeros(T, 0), zeros(T, 0, 0))
-    ishermitian(A) && return eigen!(Hermitian(A), sortby=sortby)
-    eval, evec = LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'V', 'N', A)[[2,4]]
-    return Eigen(sorteig!(eval, evec, sortby)...)
-end
-
-"""
-    eigen(A; permute::Bool=true, scale::Bool=true, sortby) -> Eigen
-
-Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
-which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. This corresponds to solving an eigenvalue problem of the form
-`Ax =  λx`, where `A` is a matrix, `x` is an eigenvector, and `λ` is an eigenvalue.
-(The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
-
-For general nonsymmetric matrices it is possible to specify how the matrix is balanced
-before the eigenvector calculation. The option `permute=true` permutes the matrix to become
-closer to upper triangular, and `scale=true` scales the matrix by its diagonal elements to
-make rows and columns more equal in norm. The default is `true` for both options.
-
-By default, the eigenvalues and vectors are sorted lexicographically by `(real(λ),imag(λ))`.
-A different comparison function `by(λ)` can be passed to `sortby`, or you can pass
-`sortby=nothing` to leave the eigenvalues in an arbitrary order.   Some special matrix types
-(e.g. [`Diagonal`](@ref) or [`SymTridiagonal`](@ref)) may implement their own sorting convention and not
-accept a `sortby` keyword.
-
-# Examples
-```jldoctest
-julia> F = eigen([1.0 0.0 0.0; 0.0 3.0 0.0; 0.0 0.0 18.0])
-Eigen{Float64, Float64, Matrix{Float64}, Vector{Float64}}
-values:
-3-element Vector{Float64}:
-  1.0
-  3.0
- 18.0
-vectors:
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-
-julia> F.values
-3-element Vector{Float64}:
-  1.0
-  3.0
- 18.0
-
-julia> F.vectors
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-
-julia> vals, vecs = F; # destructuring via iteration
-
-julia> vals == F.values && vecs == F.vectors
-true
-```
-"""
-function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where T
-    isdiag(A) && return eigen(Diagonal{eigtype(T)}(diag(A)); sortby)
-    ishermitian(A) && return eigen!(eigencopy_oftype(Hermitian(A), eigtype(T)); sortby)
-    AA = eigencopy_oftype(A, eigtype(T))
-    return eigen!(AA; permute, scale, sortby)
-end
-function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where {T <: Union{Float16,Complex{Float16}}}
-    isdiag(A) && return eigen(Diagonal{eigtype(T)}(diag(A)); sortby)
-    E = if ishermitian(A)
-        eigen!(eigencopy_oftype(Hermitian(A), eigtype(T)); sortby)
-    else
-        eigen!(eigencopy_oftype(A, eigtype(T)); permute, scale, sortby)
-    end
-    values = convert(AbstractVector{isreal(E.values) ? Float16 : Complex{Float16}}, E.values)
-    vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors)
-    return Eigen(values, vectors)
-end
-eigen(x::Number) = Eigen([x], fill(one(x), 1, 1))
-
-"""
-    eigvecs(A; permute::Bool=true, scale::Bool=true, `sortby`) -> Matrix
-
-Return a matrix `M` whose columns are the eigenvectors of `A`. (The `k`th eigenvector can
-be obtained from the slice `M[:, k]`.) The `permute`, `scale`, and `sortby` keywords are the same as
-for [`eigen`](@ref).
-
-# Examples
-```jldoctest
-julia> eigvecs([1.0 0.0 0.0; 0.0 3.0 0.0; 0.0 0.0 18.0])
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-```
-"""
-eigvecs(A::Union{Number, AbstractMatrix}; kws...) =
-    eigvecs(eigen(A; kws...))
-eigvecs(F::Union{Eigen, GeneralizedEigen}) = F.vectors
-
-eigvals(F::Union{Eigen, GeneralizedEigen}) = F.values
-
-"""
-    eigvals!(A; permute::Bool=true, scale::Bool=true, sortby) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
-The `permute`, `scale`, and `sortby` keywords are the same as for [`eigen`](@ref).
-
-!!! note
-    The input matrix `A` will not contain its eigenvalues after `eigvals!` is
-    called on it - `A` is used as a workspace.
-
-# Examples
-```jldoctest
-julia> A = [1. 2.; 3. 4.]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> eigvals!(A)
-2-element Vector{Float64}:
- -0.3722813232690143
-  5.372281323269014
-
-julia> A
-2×2 Matrix{Float64}:
- -0.372281  -1.0
-  0.0        5.37228
-```
-"""
-function eigvals!(A::StridedMatrix{<:BlasReal}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby)
-    issymmetric(A) && return sorteig!(eigvals!(Symmetric(A)), sortby)
-    _, valsre, valsim, _ = LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'N', 'N', A)
-    return sorteig!(iszero(valsim) ? valsre : complex.(valsre, valsim), sortby)
-end
-function eigvals!(A::StridedMatrix{<:BlasComplex}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby)
-    ishermitian(A) && return sorteig!(eigvals(Hermitian(A)), sortby)
-    return sorteig!(LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'N', 'N', A)[2], sortby)
-end
-
-# promotion type to use for eigenvalues of a Matrix{T}
-eigtype(T) = promote_type(Float32, typeof(zero(T)/sqrt(abs2(one(T)))))
-
-"""
-    eigvals(A; permute::Bool=true, scale::Bool=true, sortby) -> values
-
-Return the eigenvalues of `A`.
-
-For general non-symmetric matrices it is possible to specify how the matrix is balanced
-before the eigenvalue calculation. The `permute`, `scale`, and `sortby` keywords are
-the same as for [`eigen`](@ref).
-
-# Examples
-```jldoctest
-julia> diag_matrix = [1 0; 0 4]
-2×2 Matrix{Int64}:
- 1  0
- 0  4
-
-julia> eigvals(diag_matrix)
-2-element Vector{Float64}:
- 1.0
- 4.0
-```
-"""
-eigvals(A::AbstractMatrix{T}; kws...) where T =
-    eigvals!(eigencopy_oftype(A, eigtype(T)); kws...)
-
-"""
-For a scalar input, `eigvals` will return a scalar.
-
-# Example
-```jldoctest
-julia> eigvals(-2)
--2
-```
-"""
-eigvals(x::Number; kwargs...) = imag(x) == 0 ? real(x) : x
-
-"""
-    eigmax(A; permute::Bool=true, scale::Bool=true)
-
-Return the largest eigenvalue of `A`.
-The option `permute=true` permutes the matrix to become
-closer to upper triangular, and `scale=true` scales the matrix by its diagonal elements to
-make rows and columns more equal in norm.
-Note that if the eigenvalues of `A` are complex,
-this method will fail, since complex numbers cannot
-be sorted.
-
-# Examples
-```jldoctest
-julia> A = [0 im; -im 0]
-2×2 Matrix{Complex{Int64}}:
- 0+0im  0+1im
- 0-1im  0+0im
-
-julia> eigmax(A)
-1.0
-
-julia> A = [0 im; -1 0]
-2×2 Matrix{Complex{Int64}}:
-  0+0im  0+1im
- -1+0im  0+0im
-
-julia> eigmax(A)
-ERROR: DomainError with Complex{Int64}[0+0im 0+1im; -1+0im 0+0im]:
-`A` cannot have complex eigenvalues.
-Stacktrace:
-[...]
-```
-"""
-function eigmax(A::Union{Number, AbstractMatrix}; permute::Bool=true, scale::Bool=true)
-    v = eigvals(A, permute = permute, scale = scale)
-    if eltype(v)<:Complex
-        throw(DomainError(A, "`A` cannot have complex eigenvalues."))
-    end
-    maximum(v)
-end
-
-"""
-    eigmin(A; permute::Bool=true, scale::Bool=true)
-
-Return the smallest eigenvalue of `A`.
-The option `permute=true` permutes the matrix to become
-closer to upper triangular, and `scale=true` scales the matrix by its diagonal elements to
-make rows and columns more equal in norm.
-Note that if the eigenvalues of `A` are complex,
-this method will fail, since complex numbers cannot
-be sorted.
-
-# Examples
-```jldoctest
-julia> A = [0 im; -im 0]
-2×2 Matrix{Complex{Int64}}:
- 0+0im  0+1im
- 0-1im  0+0im
-
-julia> eigmin(A)
--1.0
-
-julia> A = [0 im; -1 0]
-2×2 Matrix{Complex{Int64}}:
-  0+0im  0+1im
- -1+0im  0+0im
-
-julia> eigmin(A)
-ERROR: DomainError with Complex{Int64}[0+0im 0+1im; -1+0im 0+0im]:
-`A` cannot have complex eigenvalues.
-Stacktrace:
-[...]
-```
-"""
-function eigmin(A::Union{Number, AbstractMatrix};
-                permute::Bool=true, scale::Bool=true)
-    v = eigvals(A, permute = permute, scale = scale)
-    if eltype(v)<:Complex
-        throw(DomainError(A, "`A` cannot have complex eigenvalues."))
-    end
-    minimum(v)
-end
-
-inv(A::Eigen) = A.vectors * inv(Diagonal(A.values)) / A.vectors
-det(A::Eigen) = prod(A.values)
-
-# Generalized eigenproblem
-function eigen!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
-    issymmetric(A) && isposdef(B) && return eigen!(Symmetric(A), Symmetric(B), sortby=sortby)
-    n = size(A, 1)
-    if LAPACK.version() < v"3.6.0"
-        alphar, alphai, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
-    else
-        alphar, alphai, beta, _, vr = LAPACK.ggev3!('N', 'V', A, B)
-    end
-    iszero(alphai) && return GeneralizedEigen(sorteig!(alphar ./ beta, vr, sortby)...)
-
-    vecs = zeros(Complex{T}, n, n)
-    j = 1
-    while j <= n
-        if alphai[j] == 0
-            vecs[:,j] = view(vr, :, j)
-        else
-            for i = 1:n
-                vecs[i,j  ] = vr[i,j] + im*vr[i,j+1]
-                vecs[i,j+1] = vr[i,j] - im*vr[i,j+1]
-            end
-            j += 1
-        end
-        j += 1
-    end
-    return GeneralizedEigen(sorteig!(complex.(alphar, alphai)./beta, vecs, sortby)...)
-end
-
-function eigen!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
-    ishermitian(A) && isposdef(B) && return eigen!(Hermitian(A), Hermitian(B), sortby=sortby)
-    if LAPACK.version() < v"3.6.0"
-        alpha, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
-    else
-        alpha, beta, _, vr = LAPACK.ggev3!('N', 'V', A, B)
-    end
-    return GeneralizedEigen(sorteig!(alpha./beta, vr, sortby)...)
-end
-
-"""
-    eigen(A, B; sortby) -> GeneralizedEigen
-
-Compute the generalized eigenvalue decomposition of `A` and `B`, returning a
-[`GeneralizedEigen`](@ref) factorization object `F` which contains the generalized eigenvalues in
-`F.values` and the generalized eigenvectors in the columns of the matrix `F.vectors`.
-This corresponds to solving a generalized eigenvalue problem of the form
-`Ax =  λBx`, where `A, B` are matrices, `x` is an eigenvector, and `λ` is an eigenvalue.
-(The `k`th generalized eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-By default, the eigenvalues and vectors are sorted lexicographically by `(real(λ),imag(λ))`.
-A different comparison function `by(λ)` can be passed to `sortby`, or you can pass
-`sortby=nothing` to leave the eigenvalues in an arbitrary order.
-
-# Examples
-```jldoctest
-julia> A = [1 0; 0 -1]
-2×2 Matrix{Int64}:
- 1   0
- 0  -1
-
-julia> B = [0 1; 1 0]
-2×2 Matrix{Int64}:
- 0  1
- 1  0
-
-julia> F = eigen(A, B);
-
-julia> F.values
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-
-julia> F.vectors
-2×2 Matrix{ComplexF64}:
-  0.0+1.0im   0.0-1.0im
- -1.0+0.0im  -1.0-0.0im
-
-julia> vals, vecs = F; # destructuring via iteration
-
-julia> vals == F.values && vecs == F.vectors
-true
-```
-"""
-function eigen(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    eigen!(copy_similar(A, S), copy_similar(B, S); kws...)
-end
-eigen(A::Number, B::Number) = eigen(fill(A,1,1), fill(B,1,1))
-
-"""
-    LinearAlgebra.eigencopy_oftype(A::AbstractMatrix, ::Type{S})
-
-Creates a dense copy of `A` with eltype `S` by calling `copy_similar(A, S)`.
-In the case of `Hermitian` or `Symmetric` matrices additionally retains the wrapper,
-together with the `uplo` field.
-"""
-eigencopy_oftype(A, S) = copy_similar(A, S)
-
-"""
-    eigvals!(A, B; sortby) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A` (and `B`),
-instead of creating copies.
-
-!!! note
-    The input matrices `A` and `B` will not contain their eigenvalues after
-    `eigvals!` is called. They are used as workspaces.
-
-# Examples
-```jldoctest
-julia> A = [1. 0.; 0. -1.]
-2×2 Matrix{Float64}:
- 1.0   0.0
- 0.0  -1.0
-
-julia> B = [0. 1.; 1. 0.]
-2×2 Matrix{Float64}:
- 0.0  1.0
- 1.0  0.0
-
-julia> eigvals!(A, B)
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-
-julia> A
-2×2 Matrix{Float64}:
- -0.0  -1.0
-  1.0  -0.0
-
-julia> B
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-```
-"""
-function eigvals!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
-    issymmetric(A) && isposdef(B) && return sorteig!(eigvals!(Symmetric(A), Symmetric(B)), sortby)
-    if LAPACK.version() < v"3.6.0"
-        alphar, alphai, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
-    else
-        alphar, alphai, beta, vl, vr = LAPACK.ggev3!('N', 'N', A, B)
-    end
-    return sorteig!((iszero(alphai) ? alphar : complex.(alphar, alphai))./beta, sortby)
-end
-function eigvals!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
-    ishermitian(A) && isposdef(B) && return sorteig!(eigvals!(Hermitian(A), Hermitian(B)), sortby)
-    if LAPACK.version() < v"3.6.0"
-        alpha, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
-    else
-        alpha, beta, vl, vr = LAPACK.ggev3!('N', 'N', A, B)
-    end
-    return sorteig!(alpha./beta, sortby)
-end
-
-"""
-    eigvals(A, B) -> values
-
-Compute the generalized eigenvalues of `A` and `B`.
-
-# Examples
-```jldoctest
-julia> A = [1 0; 0 -1]
-2×2 Matrix{Int64}:
- 1   0
- 0  -1
-
-julia> B = [0 1; 1 0]
-2×2 Matrix{Int64}:
- 0  1
- 1  0
-
-julia> eigvals(A,B)
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-```
-"""
-function eigvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return eigvals!(copy_similar(A, S), copy_similar(B, S); kws...)
-end
-
-"""
-    eigvecs(A, B) -> Matrix
-
-Return a matrix `M` whose columns are the generalized eigenvectors of `A` and `B`. (The `k`th eigenvector can
-be obtained from the slice `M[:, k]`.)
-
-# Examples
-```jldoctest
-julia> A = [1 0; 0 -1]
-2×2 Matrix{Int64}:
- 1   0
- 0  -1
-
-julia> B = [0 1; 1 0]
-2×2 Matrix{Int64}:
- 0  1
- 1  0
-
-julia> eigvecs(A, B)
-2×2 Matrix{ComplexF64}:
-  0.0+1.0im   0.0-1.0im
- -1.0+0.0im  -1.0-0.0im
-```
-"""
-eigvecs(A::AbstractMatrix, B::AbstractMatrix; kws...) = eigvecs(eigen(A, B; kws...))
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{Eigen,GeneralizedEigen})
-    summary(io, F); println(io)
-    println(io, "values:")
-    show(io, mime, F.values)
-    println(io, "\nvectors:")
-    show(io, mime, F.vectors)
-end
-
-function Base.hash(F::Eigen, h::UInt)
-    return hash(F.values, hash(F.vectors, hash(Eigen, h)))
-end
-function Base.:(==)(A::Eigen, B::Eigen)
-    return A.values == B.values && A.vectors == B.vectors
-end
-function Base.isequal(A::Eigen, B::Eigen)
-    return isequal(A.values, B.values) && isequal(A.vectors, B.vectors)
-end
-
-# Conversion methods
-
-## Can we determine the source/result is Real?  This is not stored in the type Eigen
-AbstractMatrix(F::Eigen) = F.vectors * Diagonal(F.values) / F.vectors
-AbstractArray(F::Eigen) = AbstractMatrix(F)
-Matrix(F::Eigen) = Array(AbstractArray(F))
-Array(F::Eigen) = Matrix(F)
diff --git a/stdlib/LinearAlgebra/src/exceptions.jl b/stdlib/LinearAlgebra/src/exceptions.jl
deleted file mode 100644
index a8d81aad3e067..0000000000000
--- a/stdlib/LinearAlgebra/src/exceptions.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-export LAPACKException,
-       SingularException,
-       PosDefException,
-       RankDeficientException,
-       ZeroPivotException
-
-struct LAPACKException <: Exception
-    info::BlasInt
-end
-
-"""
-    SingularException
-
-Exception thrown when the input matrix has one or more zero-valued eigenvalues, and is not invertible.
-A linear solve involving such a matrix cannot be computed.
-The `info` field indicates the location of (one of) the singular value(s).
-"""
-struct SingularException <: Exception
-    info::BlasInt
-end
-
-"""
-    PosDefException
-
-Exception thrown when the input matrix was not [positive definite](https://en.wikipedia.org/wiki/Definiteness_of_a_matrix).
-Some linear algebra functions and factorizations are only applicable to positive definite matrices.
-The `info` field indicates the location of (one of) the eigenvalue(s) which is (are) less than/equal to 0.
-"""
-struct PosDefException <: Exception
-    info::BlasInt
-end
-function Base.showerror(io::IO, ex::PosDefException)
-    print(io, "PosDefException: matrix is not ")
-    if ex.info == -1
-        print(io, "Hermitian")
-    else
-        print(io, "positive definite")
-    end
-    print(io, "; Cholesky factorization failed.")
-end
-
-struct RankDeficientException <: Exception
-    info::BlasInt
-end
-
-"""
-    ZeroPivotException <: Exception
-
-Exception thrown when a matrix factorization/solve encounters a zero in a pivot (diagonal)
-position and cannot proceed.  This may *not* mean that the matrix is singular:
-it may be fruitful to switch to a different factorization such as pivoted LU
-that can re-order variables to eliminate spurious zero pivots.
-The `info` field indicates the location of (one of) the zero pivot(s).
-"""
-struct ZeroPivotException <: Exception
-    info::BlasInt
-end
-function Base.showerror(io::IO, ex::ZeroPivotException)
-    print(io, "ZeroPivotException: factorization encountered one or more zero pivots. Consider switching to a pivoted LU factorization.")
-end
diff --git a/stdlib/LinearAlgebra/src/factorization.jl b/stdlib/LinearAlgebra/src/factorization.jl
deleted file mode 100644
index 8c35a23e6b6d5..0000000000000
--- a/stdlib/LinearAlgebra/src/factorization.jl
+++ /dev/null
@@ -1,202 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Matrix factorizations and decompositions
-"""
-    LinearAlgebra.Factorization
-
-Abstract type for [matrix factorizations](https://en.wikipedia.org/wiki/Matrix_decomposition)
-a.k.a. matrix decompositions.
-See [online documentation](@ref man-linalg-factorizations) for a list of available
-matrix factorizations.
-"""
-abstract type Factorization{T} end
-
-"""
-    AdjointFactorization
-
-Lazy wrapper type for the adjoint of the underlying `Factorization` object. Usually, the
-`AdjointFactorization` constructor should not be called directly, use
-[`adjoint(:: Factorization)`](@ref) instead.
-"""
-struct AdjointFactorization{T,S<:Factorization} <: Factorization{T}
-    parent::S
-end
-AdjointFactorization(F::Factorization) =
-    AdjointFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
-
-"""
-    TransposeFactorization
-
-Lazy wrapper type for the transpose of the underlying `Factorization` object. Usually, the
-`TransposeFactorization` constructor should not be called directly, use
-[`transpose(:: Factorization)`](@ref) instead.
-"""
-struct TransposeFactorization{T,S<:Factorization} <: Factorization{T}
-    parent::S
-end
-TransposeFactorization(F::Factorization) =
-    TransposeFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
-
-eltype(::Type{<:Factorization{T}}) where {T} = T
-size(F::AdjointFactorization) = reverse(size(parent(F)))
-size(F::TransposeFactorization) = reverse(size(parent(F)))
-size(F::Union{AdjointFactorization,TransposeFactorization}, d::Integer) = d in (1, 2) ? size(F)[d] : 1
-parent(F::Union{AdjointFactorization,TransposeFactorization}) = F.parent
-
-"""
-    adjoint(F::Factorization)
-
-Lazy adjoint of the factorization `F`. By default, returns an
-[`AdjointFactorization`](@ref) wrapper.
-"""
-adjoint(F::Factorization) = AdjointFactorization(F)
-"""
-    transpose(F::Factorization)
-
-Lazy transpose of the factorization `F`. By default, returns a [`TransposeFactorization`](@ref),
-except for `Factorization`s with real `eltype`, in which case returns an [`AdjointFactorization`](@ref).
-"""
-transpose(F::Factorization) = TransposeFactorization(F)
-transpose(F::Factorization{<:Real}) = AdjointFactorization(F)
-adjoint(F::AdjointFactorization) = F.parent
-transpose(F::TransposeFactorization) = F.parent
-transpose(F::AdjointFactorization{<:Real}) = F.parent
-conj(A::TransposeFactorization) = adjoint(A.parent)
-conj(A::AdjointFactorization) = transpose(A.parent)
-
-checkpositivedefinite(info) = info == 0 || throw(PosDefException(info))
-checknonsingular(info, ::RowMaximum) = info == 0 || throw(SingularException(info))
-checknonsingular(info, ::RowNonZero) = info == 0 || throw(SingularException(info))
-checknonsingular(info, ::NoPivot) = info == 0 || throw(ZeroPivotException(info))
-checknonsingular(info) = checknonsingular(info, RowMaximum())
-
-"""
-    issuccess(F::Factorization)
-
-Test that a factorization of a matrix succeeded.
-
-!!! compat "Julia 1.6"
-    `issuccess(::CholeskyPivoted)` requires Julia 1.6 or later.
-
-```jldoctest
-julia> F = cholesky([1 0; 0 1]);
-
-julia> issuccess(F)
-true
-
-julia> F = lu([1 0; 0 0]; check = false);
-
-julia> issuccess(F)
-false
-```
-"""
-issuccess(F::Factorization)
-
-function logdet(F::Factorization)
-    d, s = logabsdet(F)
-    return d + log(s)
-end
-
-function det(F::Factorization)
-    d, s = logabsdet(F)
-    return exp(d)*s
-end
-
-convert(::Type{T}, f::T) where {T<:Factorization} = f
-convert(::Type{T}, f::Factorization) where {T<:Factorization} = T(f)::T
-
-convert(::Type{T}, f::Factorization) where {T<:AbstractArray} = T(f)::T
-
-### General promotion rules
-Factorization{T}(F::Factorization{T}) where {T} = F
-# This no longer looks odd since the return _is_ a Factorization!
-Factorization{T}(A::AdjointFactorization) where {T} =
-    adjoint(Factorization{T}(parent(A)))
-Factorization{T}(A::TransposeFactorization) where {T} =
-    transpose(Factorization{T}(parent(A)))
-inv(F::Factorization{T}) where {T} = (n = size(F, 1); ldiv!(F, Matrix{T}(I, n, n)))
-
-Base.hash(F::Factorization, h::UInt) = mapreduce(f -> hash(getfield(F, f)), hash, 1:nfields(F); init=h)
-Base.:(==)(  F::T, G::T) where {T<:Factorization} = all(f -> getfield(F, f) == getfield(G, f), 1:nfields(F))
-Base.isequal(F::T, G::T) where {T<:Factorization} = all(f -> isequal(getfield(F, f), getfield(G, f)), 1:nfields(F))::Bool
-
-function Base.show(io::IO, x::AdjointFactorization)
-    print(io, "adjoint of ")
-    show(io, parent(x))
-end
-function Base.show(io::IO, x::TransposeFactorization)
-    print(io, "transpose of ")
-    show(io, parent(x))
-end
-function Base.show(io::IO, ::MIME"text/plain", x::AdjointFactorization)
-    print(io, "adjoint of ")
-    show(io, MIME"text/plain"(), parent(x))
-end
-function Base.show(io::IO, ::MIME"text/plain", x::TransposeFactorization)
-    print(io, "transpose of ")
-    show(io, MIME"text/plain"(), parent(x))
-end
-
-# With a real lhs and complex rhs with the same precision, we can reinterpret
-# the complex rhs as a real rhs with twice the number of columns or rows
-function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal}
-    require_one_based_indexing(B)
-    c2r = reshape(copy(transpose(reinterpret(T, reshape(B, (1, length(B)))))), size(B, 1), 2*size(B, 2))
-    x = ldiv!(F, c2r)
-    return reshape(copy(reinterpret(Complex{T}, copy(transpose(reshape(x, div(length(x), 2), 2))))), _ret_size(F, B))
-end
-# don't do the reinterpretation for [Adjoint/Transpose]Factorization
-(\)(F::TransposeFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    conj!(adjoint(parent(F)) \ conj.(B))
-(\)(F::AdjointFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    @invoke \(F::typeof(F), B::VecOrMat)
-
-function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where {T<:BlasReal}
-    require_one_based_indexing(B)
-    x = rdiv!(copy(reinterpret(T, B)), F)
-    return copy(reinterpret(Complex{T}, x))
-end
-# don't do the reinterpretation for [Adjoint/Transpose]Factorization
-(/)(B::VecOrMat{Complex{T}}, F::TransposeFactorization{T}) where {T<:BlasReal} =
-    conj!(adjoint(parent(F)) \ conj.(B))
-(/)(B::VecOrMat{Complex{T}}, F::AdjointFactorization{T}) where {T<:BlasReal} =
-    @invoke /(B::VecOrMat{Complex{T}}, F::Factorization{T})
-
-function (\)(F::Factorization, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    TFB = typeof(oneunit(eltype(F)) \ oneunit(eltype(B)))
-    ldiv!(F, copy_similar(B, TFB))
-end
-(\)(F::TransposeFactorization, B::AbstractVecOrMat) = conj!(adjoint(F.parent) \ conj.(B))
-
-function (/)(B::AbstractMatrix, F::Factorization)
-    require_one_based_indexing(B)
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    rdiv!(copy_similar(B, TFB), F)
-end
-(/)(A::AbstractMatrix, F::AdjointFactorization) = adjoint(adjoint(F) \ adjoint(A))
-(/)(A::AbstractMatrix, F::TransposeFactorization) = transpose(transpose(F) \ transpose(A))
-
-function ldiv!(Y::AbstractVector, A::Factorization, B::AbstractVector)
-    require_one_based_indexing(Y, B)
-    m, n = size(A)
-    if m > n
-        Bc = copy(B)
-        ldiv!(A, Bc)
-        return copyto!(Y, 1, Bc, 1, n)
-    else
-        return ldiv!(A, copyto!(Y, B))
-    end
-end
-function ldiv!(Y::AbstractMatrix, A::Factorization, B::AbstractMatrix)
-    require_one_based_indexing(Y, B)
-    m, n = size(A)
-    if m > n
-        Bc = copy(B)
-        ldiv!(A, Bc)
-        return copyto!(Y, view(Bc, 1:n, :))
-    else
-        copyto!(view(Y, 1:m, :), view(B, 1:m, :))
-        return ldiv!(A, Y)
-    end
-end
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
deleted file mode 100644
index 9cbe3f76ccfb9..0000000000000
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ /dev/null
@@ -1,1890 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## linalg.jl: Some generic Linear Algebra definitions
-
-# Elements of `out` may not be defined (e.g., for `BigFloat`). To make
-# `mul!(out, A, B)` work for such cases, `out .*ₛ beta` short-circuits
-# `out * beta`.  Using `broadcasted` to avoid the multiplication
-# inside this function.
-function *ₛ end
-Broadcast.broadcasted(::typeof(*ₛ), out, beta) =
-    iszero(beta::Number) ? false : broadcasted(*, out, beta)
-
-"""
-    MulAddMul(alpha, beta)
-
-A callable for operating short-circuiting version of `x * alpha + y * beta`.
-
-# Examples
-```jldoctest
-julia> using LinearAlgebra: MulAddMul
-
-julia> _add = MulAddMul(1, 0);
-
-julia> _add(123, nothing)
-123
-
-julia> MulAddMul(12, 34)(56, 78) == 56 * 12 + 78 * 34
-true
-```
-"""
-struct MulAddMul{ais1, bis0, TA, TB}
-    alpha::TA
-    beta::TB
-end
-
-@inline function MulAddMul(alpha::TA, beta::TB) where {TA,TB}
-    if isone(alpha)
-        if iszero(beta)
-            return MulAddMul{true,true,TA,TB}(alpha, beta)
-        else
-            return MulAddMul{true,false,TA,TB}(alpha, beta)
-        end
-    else
-        if iszero(beta)
-            return MulAddMul{false,true,TA,TB}(alpha, beta)
-        else
-            return MulAddMul{false,false,TA,TB}(alpha, beta)
-        end
-    end
-end
-
-MulAddMul() = MulAddMul{true,true,Bool,Bool}(true, false)
-
-@inline (::MulAddMul{true})(x) = x
-@inline (p::MulAddMul{false})(x) = x * p.alpha
-@inline (::MulAddMul{true, true})(x, _) = x
-@inline (p::MulAddMul{false, true})(x, _) = x * p.alpha
-@inline (p::MulAddMul{true, false})(x, y) = x + y * p.beta
-@inline (p::MulAddMul{false, false})(x, y) = x * p.alpha + y * p.beta
-
-"""
-    _modify!(_add::MulAddMul, x, C, idx)
-
-Short-circuiting version of `C[idx] = _add(x, C[idx])`.
-
-Short-circuiting the indexing `C[idx]` is necessary for avoiding `UndefRefError`
-when mutating an array of non-primitive numbers such as `BigFloat`.
-
-# Examples
-```jldoctest
-julia> using LinearAlgebra: MulAddMul, _modify!
-
-julia> _add = MulAddMul(1, 0);
-       C = Vector{BigFloat}(undef, 1);
-
-julia> _modify!(_add, 123, C, 1)
-
-julia> C
-1-element Vector{BigFloat}:
- 123.0
-```
-"""
-@inline @propagate_inbounds function _modify!(p::MulAddMul{ais1, bis0},
-                                              x, C, idx′) where {ais1, bis0}
-    # `idx′` may be an integer, a tuple of integer, or a `CartesianIndex`.
-    #  Let `CartesianIndex` constructor normalize them so that it can be
-    # used uniformly.  It also acts as a workaround for performance penalty
-    # of splatting a number (#29114):
-    idx = CartesianIndex(idx′)
-    if bis0
-        C[idx] = p(x)
-    else
-        C[idx] = p(x, C[idx])
-    end
-    return
-end
-
-@inline function _rmul_or_fill!(C::AbstractArray, beta::Number)
-    if isempty(C)
-        return C
-    end
-    if iszero(beta)
-        fill!(C, zero(eltype(C)))
-    else
-        rmul!(C, beta)
-    end
-    return C
-end
-
-
-function generic_mul!(C::AbstractArray, X::AbstractArray, s::Number, _add::MulAddMul)
-    if length(C) != length(X)
-        throw(DimensionMismatch("first array has length $(length(C)) which does not match the length of the second, $(length(X))."))
-    end
-    for (IC, IX) in zip(eachindex(C), eachindex(X))
-        @inbounds _modify!(_add, X[IX] * s, C, IC)
-    end
-    C
-end
-
-function generic_mul!(C::AbstractArray, s::Number, X::AbstractArray, _add::MulAddMul)
-    if length(C) != length(X)
-        throw(DimensionMismatch("first array has length $(length(C)) which does not
-match the length of the second, $(length(X))."))
-    end
-    for (IC, IX) in zip(eachindex(C), eachindex(X))
-        @inbounds _modify!(_add, s * X[IX], C, IC)
-    end
-    C
-end
-
-@inline function mul!(C::AbstractArray, s::Number, X::AbstractArray, alpha::Number, beta::Number)
-    if axes(C) == axes(X)
-        C .= (s .* X) .*ₛ alpha .+ C .*ₛ beta
-    else
-        generic_mul!(C, s, X, MulAddMul(alpha, beta))
-    end
-    return C
-end
-@inline function mul!(C::AbstractArray, X::AbstractArray, s::Number, alpha::Number, beta::Number)
-    if axes(C) == axes(X)
-        C .= (X .* s) .*ₛ alpha .+ C .*ₛ beta
-    else
-        generic_mul!(C, X, s, MulAddMul(alpha, beta))
-    end
-    return C
-end
-
-# For better performance when input and output are the same array
-# See https://github.com/JuliaLang/julia/issues/8415#issuecomment-56608729
-"""
-    rmul!(A::AbstractArray, b::Number)
-
-Scale an array `A` by a scalar `b` overwriting `A` in-place.  Use
-[`lmul!`](@ref) to multiply scalar from left.  The scaling operation
-respects the semantics of the multiplication [`*`](@ref) between an
-element of `A` and `b`.  In particular, this also applies to
-multiplication involving non-finite numbers such as `NaN` and `±Inf`.
-
-!!! compat "Julia 1.1"
-    Prior to Julia 1.1, `NaN` and `±Inf` entries in `A` were treated
-    inconsistently.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> rmul!(A, 2)
-2×2 Matrix{Int64}:
- 2  4
- 6  8
-
-julia> rmul!([NaN], 0.0)
-1-element Vector{Float64}:
- NaN
-```
-"""
-function rmul!(X::AbstractArray, s::Number)
-    @simd for I in eachindex(X)
-        @inbounds X[I] *= s
-    end
-    X
-end
-
-
-"""
-    lmul!(a::Number, B::AbstractArray)
-
-Scale an array `B` by a scalar `a` overwriting `B` in-place.  Use
-[`rmul!`](@ref) to multiply scalar from right.  The scaling operation
-respects the semantics of the multiplication [`*`](@ref) between `a`
-and an element of `B`.  In particular, this also applies to
-multiplication involving non-finite numbers such as `NaN` and `±Inf`.
-
-!!! compat "Julia 1.1"
-    Prior to Julia 1.1, `NaN` and `±Inf` entries in `B` were treated
-    inconsistently.
-
-# Examples
-```jldoctest
-julia> B = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> lmul!(2, B)
-2×2 Matrix{Int64}:
- 2  4
- 6  8
-
-julia> lmul!(0.0, [Inf])
-1-element Vector{Float64}:
- NaN
-```
-"""
-function lmul!(s::Number, X::AbstractArray)
-    @simd for I in eachindex(X)
-        @inbounds X[I] = s*X[I]
-    end
-    X
-end
-
-"""
-    rdiv!(A::AbstractArray, b::Number)
-
-Divide each entry in an array `A` by a scalar `b` overwriting `A`
-in-place.  Use [`ldiv!`](@ref) to divide scalar from left.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0; 3.0 4.0]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> rdiv!(A, 2.0)
-2×2 Matrix{Float64}:
- 0.5  1.0
- 1.5  2.0
-```
-"""
-function rdiv!(X::AbstractArray, s::Number)
-    @simd for I in eachindex(X)
-        @inbounds X[I] /= s
-    end
-    X
-end
-
-"""
-    ldiv!(a::Number, B::AbstractArray)
-
-Divide each entry in an array `B` by a scalar `a` overwriting `B`
-in-place.  Use [`rdiv!`](@ref) to divide scalar from right.
-
-# Examples
-```jldoctest
-julia> B = [1.0 2.0; 3.0 4.0]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> ldiv!(2.0, B)
-2×2 Matrix{Float64}:
- 0.5  1.0
- 1.5  2.0
-```
-"""
-function ldiv!(s::Number, X::AbstractArray)
-    @simd for I in eachindex(X)
-        @inbounds X[I] = s\X[I]
-    end
-    X
-end
-ldiv!(Y::AbstractArray, s::Number, X::AbstractArray) = Y .= s .\ X
-
-# Generic fallback. This assumes that B and Y have the same sizes.
-ldiv!(Y::AbstractArray, A::AbstractMatrix, B::AbstractArray) = ldiv!(A, copyto!(Y, B))
-
-
-"""
-    cross(x, y)
-    ×(x,y)
-
-Compute the cross product of two 3-vectors.
-
-# Examples
-```jldoctest
-julia> a = [0;1;0]
-3-element Vector{Int64}:
- 0
- 1
- 0
-
-julia> b = [0;0;1]
-3-element Vector{Int64}:
- 0
- 0
- 1
-
-julia> cross(a,b)
-3-element Vector{Int64}:
- 1
- 0
- 0
-```
-"""
-function cross(a::AbstractVector, b::AbstractVector)
-    if !(length(a) == length(b) == 3)
-        throw(DimensionMismatch("cross product is only defined for vectors of length 3"))
-    end
-    a1, a2, a3 = a
-    b1, b2, b3 = b
-    [a2*b3-a3*b2, a3*b1-a1*b3, a1*b2-a2*b1]
-end
-
-"""
-    triu(M)
-
-Upper triangle of a matrix.
-
-# Examples
-```jldoctest
-julia> a = fill(1.0, (4,4))
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> triu(a)
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 0.0  1.0  1.0  1.0
- 0.0  0.0  1.0  1.0
- 0.0  0.0  0.0  1.0
-```
-"""
-triu(M::AbstractMatrix) = triu!(copy(M))
-
-"""
-    tril(M)
-
-Lower triangle of a matrix.
-
-# Examples
-```jldoctest
-julia> a = fill(1.0, (4,4))
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> tril(a)
-4×4 Matrix{Float64}:
- 1.0  0.0  0.0  0.0
- 1.0  1.0  0.0  0.0
- 1.0  1.0  1.0  0.0
- 1.0  1.0  1.0  1.0
-```
-"""
-tril(M::AbstractMatrix) = tril!(copy(M))
-
-"""
-    triu(M, k::Integer)
-
-Return the upper triangle of `M` starting from the `k`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = fill(1.0, (4,4))
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> triu(a,3)
-4×4 Matrix{Float64}:
- 0.0  0.0  0.0  1.0
- 0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0
-
-julia> triu(a,-3)
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-```
-"""
-triu(M::AbstractMatrix,k::Integer) = triu!(copy(M),k)
-
-"""
-    tril(M, k::Integer)
-
-Return the lower triangle of `M` starting from the `k`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = fill(1.0, (4,4))
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> tril(a,3)
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> tril(a,-3)
-4×4 Matrix{Float64}:
- 0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0
- 1.0  0.0  0.0  0.0
-```
-"""
-tril(M::AbstractMatrix,k::Integer) = tril!(copy(M),k)
-
-"""
-    triu!(M)
-
-Upper triangle of a matrix, overwriting `M` in the process.
-See also [`triu`](@ref).
-"""
-triu!(M::AbstractMatrix) = triu!(M,0)
-
-"""
-    tril!(M)
-
-Lower triangle of a matrix, overwriting `M` in the process.
-See also [`tril`](@ref).
-"""
-tril!(M::AbstractMatrix) = tril!(M,0)
-
-diag(A::AbstractVector) = throw(ArgumentError("use diagm instead of diag to construct a diagonal matrix"))
-
-###########################################################################################
-# Dot products and norms
-
-# special cases of norm; note that they don't need to handle isempty(x)
-generic_normMinusInf(x) = float(mapreduce(norm, min, x))
-
-generic_normInf(x) = float(mapreduce(norm, max, x))
-
-generic_norm1(x) = mapreduce(float ∘ norm, +, x)
-
-# faster computation of norm(x)^2, avoiding overflow for integers
-norm_sqr(x) = norm(x)^2
-norm_sqr(x::Number) = abs2(x)
-norm_sqr(x::Union{T,Complex{T},Rational{T}}) where {T<:Integer} = abs2(float(x))
-
-function generic_norm2(x)
-    maxabs = normInf(x)
-    (ismissing(maxabs) || iszero(maxabs) || isinf(maxabs)) && return maxabs
-    (v, s) = iterate(x)::Tuple
-    T = typeof(maxabs)
-    if isfinite(length(x)*maxabs*maxabs) && !iszero(maxabs*maxabs) # Scaling not necessary
-        sum::promote_type(Float64, T) = norm_sqr(v)
-        while true
-            y = iterate(x, s)
-            y === nothing && break
-            (v, s) = y
-            sum += norm_sqr(v)
-        end
-        ismissing(sum) && return missing
-        return convert(T, sqrt(sum))
-    else
-        sum = abs2(norm(v)/maxabs)
-        while true
-            y = iterate(x, s)
-            y === nothing && break
-            (v, s) = y
-            sum += (norm(v)/maxabs)^2
-        end
-        ismissing(sum) && return missing
-        return convert(T, maxabs*sqrt(sum))
-    end
-end
-
-# Compute L_p norm ‖x‖ₚ = sum(abs(x).^p)^(1/p)
-# (Not technically a "norm" for p < 1.)
-function generic_normp(x, p)
-    (v, s) = iterate(x)::Tuple
-    if p > 1 || p < -1 # might need to rescale to avoid overflow
-        maxabs = p > 1 ? normInf(x) : normMinusInf(x)
-        (ismissing(maxabs) || iszero(maxabs) || isinf(maxabs)) && return maxabs
-        T = typeof(maxabs)
-    else
-        T = typeof(float(norm(v)))
-    end
-    spp::promote_type(Float64, T) = p
-    if -1 <= p <= 1 || (isfinite(length(x)*maxabs^spp) && !iszero(maxabs^spp)) # scaling not necessary
-        sum::promote_type(Float64, T) = norm(v)^spp
-        while true
-            y = iterate(x, s)
-            y === nothing && break
-            (v, s) = y
-            ismissing(v) && return missing
-            sum += norm(v)^spp
-        end
-        return convert(T, sum^inv(spp))
-    else # rescaling
-        sum = (norm(v)/maxabs)^spp
-        ismissing(sum) && return missing
-        while true
-            y = iterate(x, s)
-            y === nothing && break
-            (v, s) = y
-            ismissing(v) && return missing
-            sum += (norm(v)/maxabs)^spp
-        end
-        return convert(T, maxabs*sum^inv(spp))
-    end
-end
-
-normMinusInf(x) = generic_normMinusInf(x)
-normInf(x) = generic_normInf(x)
-norm1(x) = generic_norm1(x)
-norm2(x) = generic_norm2(x)
-normp(x, p) = generic_normp(x, p)
-
-
-"""
-    norm(A, p::Real=2)
-
-For any iterable container `A` (including arrays of any dimension) of numbers (or any
-element type for which `norm` is defined), compute the `p`-norm (defaulting to `p=2`) as if
-`A` were a vector of the corresponding length.
-
-The `p`-norm is defined as
-```math
-\\|A\\|_p = \\left( \\sum_{i=1}^n | a_i | ^p \\right)^{1/p}
-```
-with ``a_i`` the entries of ``A``, ``| a_i |`` the [`norm`](@ref) of ``a_i``, and
-``n`` the length of ``A``. Since the `p`-norm is computed using the [`norm`](@ref)s
-of the entries of `A`, the `p`-norm of a vector of vectors is not compatible with
-the interpretation of it as a block vector in general if `p != 2`.
-
-`p` can assume any numeric value (even though not all values produce a
-mathematically valid vector norm). In particular, `norm(A, Inf)` returns the largest value
-in `abs.(A)`, whereas `norm(A, -Inf)` returns the smallest. If `A` is a matrix and `p=2`,
-then this is equivalent to the Frobenius norm.
-
-The second argument `p` is not necessarily a part of the interface for `norm`, i.e. a custom
-type may only implement `norm(A)` without second argument.
-
-Use [`opnorm`](@ref) to compute the operator norm of a matrix.
-
-# Examples
-```jldoctest
-julia> v = [3, -2, 6]
-3-element Vector{Int64}:
-  3
- -2
-  6
-
-julia> norm(v)
-7.0
-
-julia> norm(v, 1)
-11.0
-
-julia> norm(v, Inf)
-6.0
-
-julia> norm([1 2 3; 4 5 6; 7 8 9])
-16.881943016134134
-
-julia> norm([1 2 3 4 5 6 7 8 9])
-16.881943016134134
-
-julia> norm(1:9)
-16.881943016134134
-
-julia> norm(hcat(v,v), 1) == norm(vcat(v,v), 1) != norm([v,v], 1)
-true
-
-julia> norm(hcat(v,v), 2) == norm(vcat(v,v), 2) == norm([v,v], 2)
-true
-
-julia> norm(hcat(v,v), Inf) == norm(vcat(v,v), Inf) != norm([v,v], Inf)
-true
-```
-"""
-function norm(itr, p::Real=2)
-    isempty(itr) && return float(norm(zero(eltype(itr))))
-    if p == 2
-        return norm2(itr)
-    elseif p == 1
-        return norm1(itr)
-    elseif p == Inf
-        return normInf(itr)
-    elseif p == 0
-        return typeof(float(norm(first(itr))))(count(!iszero, itr))
-    elseif p == -Inf
-        return normMinusInf(itr)
-    else
-        normp(itr, p)
-    end
-end
-
-"""
-    norm(x::Number, p::Real=2)
-
-For numbers, return ``\\left( |x|^p \\right)^{1/p}``.
-
-# Examples
-```jldoctest
-julia> norm(2, 1)
-2.0
-
-julia> norm(-2, 1)
-2.0
-
-julia> norm(2, 2)
-2.0
-
-julia> norm(-2, 2)
-2.0
-
-julia> norm(2, Inf)
-2.0
-
-julia> norm(-2, Inf)
-2.0
-```
-"""
-@inline function norm(x::Number, p::Real=2)
-    afx = abs(float(x))
-    if p == 0
-        if iszero(x)
-            return zero(afx)
-        elseif !isnan(x)
-            return oneunit(afx)
-        else
-            return afx
-        end
-    else
-        return afx
-    end
-end
-norm(::Missing, p::Real=2) = missing
-
-# special cases of opnorm
-function opnorm1(A::AbstractMatrix{T}) where T
-    require_one_based_indexing(A)
-    m, n = size(A)
-    Tnorm = typeof(float(real(zero(T))))
-    Tsum = promote_type(Float64, Tnorm)
-    nrm::Tsum = 0
-    @inbounds begin
-        for j = 1:n
-            nrmj::Tsum = 0
-            for i = 1:m
-                nrmj += norm(A[i,j])
-            end
-            nrm = max(nrm,nrmj)
-        end
-    end
-    return convert(Tnorm, nrm)
-end
-
-function opnorm2(A::AbstractMatrix{T}) where T
-    require_one_based_indexing(A)
-    m,n = size(A)
-    Tnorm = typeof(float(real(zero(T))))
-    if m == 0 || n == 0 return zero(Tnorm) end
-    if m == 1 || n == 1 return norm2(A) end
-    return svdvals(A)[1]
-end
-
-function opnormInf(A::AbstractMatrix{T}) where T
-    require_one_based_indexing(A)
-    m,n = size(A)
-    Tnorm = typeof(float(real(zero(T))))
-    Tsum = promote_type(Float64, Tnorm)
-    nrm::Tsum = 0
-    @inbounds begin
-        for i = 1:m
-            nrmi::Tsum = 0
-            for j = 1:n
-                nrmi += norm(A[i,j])
-            end
-            nrm = max(nrm,nrmi)
-        end
-    end
-    return convert(Tnorm, nrm)
-end
-
-
-"""
-    opnorm(A::AbstractMatrix, p::Real=2)
-
-Compute the operator norm (or matrix norm) induced by the vector `p`-norm,
-where valid values of `p` are `1`, `2`, or `Inf`. (Note that for sparse matrices,
-`p=2` is currently not implemented.) Use [`norm`](@ref) to compute the Frobenius
-norm.
-
-When `p=1`, the operator norm is the maximum absolute column sum of `A`:
-```math
-\\|A\\|_1 = \\max_{1 ≤ j ≤ n} \\sum_{i=1}^m | a_{ij} |
-```
-with ``a_{ij}`` the entries of ``A``, and ``m`` and ``n`` its dimensions.
-
-When `p=2`, the operator norm is the spectral norm, equal to the largest
-singular value of `A`.
-
-When `p=Inf`, the operator norm is the maximum absolute row sum of `A`:
-```math
-\\|A\\|_\\infty = \\max_{1 ≤ i ≤ m} \\sum _{j=1}^n | a_{ij} |
-```
-
-# Examples
-```jldoctest
-julia> A = [1 -2 -3; 2 3 -1]
-2×3 Matrix{Int64}:
- 1  -2  -3
- 2   3  -1
-
-julia> opnorm(A, Inf)
-6.0
-
-julia> opnorm(A, 1)
-5.0
-```
-"""
-function opnorm(A::AbstractMatrix, p::Real=2)
-    if p == 2
-        return opnorm2(A)
-    elseif p == 1
-        return opnorm1(A)
-    elseif p == Inf
-        return opnormInf(A)
-    else
-        throw(ArgumentError("invalid p-norm p=$p. Valid: 1, 2, Inf"))
-    end
-end
-
-"""
-    opnorm(x::Number, p::Real=2)
-
-For numbers, return ``\\left( |x|^p \\right)^{1/p}``.
-This is equivalent to [`norm`](@ref).
-"""
-@inline opnorm(x::Number, p::Real=2) = norm(x, p)
-
-"""
-    opnorm(A::Adjoint{<:Any,<:AbstracVector}, q::Real=2)
-    opnorm(A::Transpose{<:Any,<:AbstracVector}, q::Real=2)
-
-For Adjoint/Transpose-wrapped vectors, return the operator ``q``-norm of `A`, which is
-equivalent to the `p`-norm with value `p = q/(q-1)`. They coincide at `p = q = 2`.
-Use [`norm`](@ref) to compute the `p` norm of `A` as a vector.
-
-The difference in norm between a vector space and its dual arises to preserve
-the relationship between duality and the dot product, and the result is
-consistent with the operator `p`-norm of a `1 × n` matrix.
-
-# Examples
-```jldoctest
-julia> v = [1; im];
-
-julia> vc = v';
-
-julia> opnorm(vc, 1)
-1.0
-
-julia> norm(vc, 1)
-2.0
-
-julia> norm(v, 1)
-2.0
-
-julia> opnorm(vc, 2)
-1.4142135623730951
-
-julia> norm(vc, 2)
-1.4142135623730951
-
-julia> norm(v, 2)
-1.4142135623730951
-
-julia> opnorm(vc, Inf)
-2.0
-
-julia> norm(vc, Inf)
-1.0
-
-julia> norm(v, Inf)
-1.0
-```
-"""
-opnorm(v::TransposeAbsVec, q::Real) = q == Inf ? norm(v.parent, 1) : norm(v.parent, q/(q-1))
-opnorm(v::AdjointAbsVec, q::Real) = q == Inf ? norm(conj(v.parent), 1) : norm(conj(v.parent), q/(q-1))
-opnorm(v::AdjointAbsVec) = norm(conj(v.parent))
-opnorm(v::TransposeAbsVec) = norm(v.parent)
-
-norm(v::AdjOrTrans, p::Real) = norm(v.parent, p)
-
-"""
-    dot(x, y)
-    x ⋅ y
-
-Compute the dot product between two vectors. For complex vectors, the first
-vector is conjugated.
-
-`dot` also works on arbitrary iterable objects, including arrays of any dimension,
-as long as `dot` is defined on the elements.
-
-`dot` is semantically equivalent to `sum(dot(vx,vy) for (vx,vy) in zip(x, y))`,
-with the added restriction that the arguments must have equal lengths.
-
-`x ⋅ y` (where `⋅` can be typed by tab-completing `\\cdot` in the REPL) is a synonym for
-`dot(x, y)`.
-
-# Examples
-```jldoctest
-julia> dot([1; 1], [2; 3])
-5
-
-julia> dot([im; im], [1; 1])
-0 - 2im
-
-julia> dot(1:5, 2:6)
-70
-
-julia> x = fill(2., (5,5));
-
-julia> y = fill(3., (5,5));
-
-julia> dot(x, y)
-150.0
-```
-"""
-function dot end
-
-function dot(x, y) # arbitrary iterables
-    ix = iterate(x)
-    iy = iterate(y)
-    if ix === nothing
-        if iy !== nothing
-            throw(DimensionMismatch("x and y are of different lengths!"))
-        end
-        return dot(zero(eltype(x)), zero(eltype(y)))
-    end
-    if iy === nothing
-        throw(DimensionMismatch("x and y are of different lengths!"))
-    end
-    (vx, xs) = ix
-    (vy, ys) = iy
-    s = dot(vx, vy)
-    while true
-        ix = iterate(x, xs)
-        iy = iterate(y, ys)
-        ix === nothing && break
-        iy === nothing && break
-        (vx, xs), (vy, ys) = ix, iy
-        s += dot(vx, vy)
-    end
-    if !(iy === nothing && ix === nothing)
-        throw(DimensionMismatch("x and y are of different lengths!"))
-    end
-    return s
-end
-
-dot(x::Number, y::Number) = conj(x) * y
-
-function dot(x::AbstractArray, y::AbstractArray)
-    lx = length(x)
-    if lx != length(y)
-        throw(DimensionMismatch("first array has length $(lx) which does not match the length of the second, $(length(y))."))
-    end
-    if lx == 0
-        return dot(zero(eltype(x)), zero(eltype(y)))
-    end
-    s = zero(dot(first(x), first(y)))
-    for (Ix, Iy) in zip(eachindex(x), eachindex(y))
-        @inbounds s += dot(x[Ix], y[Iy])
-    end
-    s
-end
-
-function dot(x::Adjoint{<:Union{Real,Complex}}, y::Adjoint{<:Union{Real,Complex}})
-    return conj(dot(parent(x), parent(y)))
-end
-dot(x::Transpose, y::Transpose) = dot(parent(x), parent(y))
-
-"""
-    dot(x, A, y)
-
-Compute the generalized dot product `dot(x, A*y)` between two vectors `x` and `y`,
-without storing the intermediate result of `A*y`. As for the two-argument
-[`dot(_,_)`](@ref), this acts recursively. Moreover, for complex vectors, the
-first vector is conjugated.
-
-!!! compat "Julia 1.4"
-    Three-argument `dot` requires at least Julia 1.4.
-
-# Examples
-```jldoctest
-julia> dot([1; 1], [1 2; 3 4], [2; 3])
-26
-
-julia> dot(1:5, reshape(1:25, 5, 5), 2:6)
-4850
-
-julia> ⋅(1:5, reshape(1:25, 5, 5), 2:6) == dot(1:5, reshape(1:25, 5, 5), 2:6)
-true
-```
-"""
-dot(x, A, y) = dot(x, A*y) # generic fallback for cases that are not covered by specialized methods
-
-function dot(x::AbstractVector, A::AbstractMatrix, y::AbstractVector)
-    (axes(x)..., axes(y)...) == axes(A) || throw(DimensionMismatch())
-    T = typeof(dot(first(x), first(A), first(y)))
-    s = zero(T)
-    i₁ = first(eachindex(x))
-    x₁ = first(x)
-    @inbounds for j in eachindex(y)
-        yj = y[j]
-        if !iszero(yj)
-            temp = zero(adjoint(A[i₁,j]) * x₁)
-            @simd for i in eachindex(x)
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            s += dot(temp, yj)
-        end
-    end
-    return s
-end
-dot(x::AbstractVector, adjA::Adjoint, y::AbstractVector) = adjoint(dot(y, adjA.parent, x))
-dot(x::AbstractVector, transA::Transpose{<:Real}, y::AbstractVector) = adjoint(dot(y, transA.parent, x))
-
-###########################################################################################
-
-"""
-    rank(A::AbstractMatrix; atol::Real=0, rtol::Real=atol>0 ? 0 : n*ϵ)
-    rank(A::AbstractMatrix, rtol::Real)
-
-Compute the numerical rank of a matrix by counting how many outputs of
-`svdvals(A)` are greater than `max(atol, rtol*σ₁)` where `σ₁` is `A`'s largest
-calculated singular value. `atol` and `rtol` are the absolute and relative
-tolerances, respectively. The default relative tolerance is `n*ϵ`, where `n`
-is the size of the smallest dimension of `A`, and `ϵ` is the [`eps`](@ref) of
-the element type of `A`.
-
-!!! note
-    Numerical rank can be a sensitive and imprecise characterization of
-    ill-conditioned matrices with singular values that are close to the threshold
-    tolerance `max(atol, rtol*σ₁)`. In such cases, slight perturbations to the
-    singular-value computation or to the matrix can change the result of `rank`
-    by pushing one or more singular values across the threshold. These variations
-    can even occur due to changes in floating-point errors between different Julia
-    versions, architectures, compilers, or operating systems.
-
-!!! compat "Julia 1.1"
-    The `atol` and `rtol` keyword arguments requires at least Julia 1.1.
-    In Julia 1.0 `rtol` is available as a positional argument, but this
-    will be deprecated in Julia 2.0.
-
-# Examples
-```jldoctest
-julia> rank(Matrix(I, 3, 3))
-3
-
-julia> rank(diagm(0 => [1, 0, 2]))
-2
-
-julia> rank(diagm(0 => [1, 0.001, 2]), rtol=0.1)
-2
-
-julia> rank(diagm(0 => [1, 0.001, 2]), rtol=0.00001)
-3
-
-julia> rank(diagm(0 => [1, 0.001, 2]), atol=1.5)
-1
-```
-"""
-function rank(A::AbstractMatrix; atol::Real = 0.0, rtol::Real = (min(size(A)...)*eps(real(float(one(eltype(A))))))*iszero(atol))
-    isempty(A) && return 0 # 0-dimensional case
-    s = svdvals(A)
-    tol = max(atol, rtol*s[1])
-    count(>(tol), s)
-end
-rank(x::Union{Number,AbstractVector}) = iszero(x) ? 0 : 1
-
-"""
-    tr(M)
-
-Matrix trace. Sums the diagonal elements of `M`.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> tr(A)
-5
-```
-"""
-function tr(A::AbstractMatrix)
-    checksquare(A)
-    sum(diag(A))
-end
-tr(x::Number) = x
-
-#kron(a::AbstractVector, b::AbstractVector)
-#kron(a::AbstractMatrix{T}, b::AbstractMatrix{S}) where {T,S}
-
-#det(a::AbstractMatrix)
-
-"""
-    inv(M)
-
-Matrix inverse. Computes matrix `N` such that
-`M * N = I`, where `I` is the identity matrix.
-Computed by solving the left-division
-`N = M \\ I`.
-
-# Examples
-```jldoctest
-julia> M = [2 5; 1 3]
-2×2 Matrix{Int64}:
- 2  5
- 1  3
-
-julia> N = inv(M)
-2×2 Matrix{Float64}:
-  3.0  -5.0
- -1.0   2.0
-
-julia> M*N == N*M == Matrix(I, 2, 2)
-true
-```
-"""
-function inv(A::AbstractMatrix{T}) where T
-    n = checksquare(A)
-    S = typeof(zero(T)/one(T))      # dimensionful
-    S0 = typeof(zero(T)/oneunit(T)) # dimensionless
-    dest = Matrix{S0}(I, n, n)
-    ldiv!(factorize(convert(AbstractMatrix{S}, A)), dest)
-end
-inv(A::Adjoint) = adjoint(inv(parent(A)))
-inv(A::Transpose) = transpose(inv(parent(A)))
-
-pinv(v::AbstractVector{T}, tol::Real = real(zero(T))) where {T<:Real} = _vectorpinv(transpose, v, tol)
-pinv(v::AbstractVector{T}, tol::Real = real(zero(T))) where {T<:Complex} = _vectorpinv(adjoint, v, tol)
-pinv(v::AbstractVector{T}, tol::Real = real(zero(T))) where {T} = _vectorpinv(adjoint, v, tol)
-function _vectorpinv(dualfn::Tf, v::AbstractVector{Tv}, tol) where {Tv,Tf}
-    res = dualfn(similar(v, typeof(zero(Tv) / (abs2(one(Tv)) + abs2(one(Tv))))))
-    den = sum(abs2, v)
-    # as tol is the threshold relative to the maximum singular value, for a vector with
-    # single singular value σ=√den, σ ≦ tol*σ is equivalent to den=0 ∨ tol≥1
-    if iszero(den) || tol >= one(tol)
-        fill!(res, zero(eltype(res)))
-    else
-        res .= dualfn(v) ./ den
-    end
-    return res
-end
-
-# this method is just an optimization: literal negative powers of A are
-# already turned by literal_pow into powers of inv(A), but for A^-1 this
-# would turn into inv(A)^1 = copy(inv(A)), which makes an extra copy.
-@inline Base.literal_pow(::typeof(^), A::AbstractMatrix, ::Val{-1}) = inv(A)
-
-"""
-    \\(A, B)
-
-Matrix division using a polyalgorithm. For input matrices `A` and `B`, the result `X` is
-such that `A*X == B` when `A` is square. The solver that is used depends upon the structure
-of `A`.  If `A` is upper or lower triangular (or diagonal), no factorization of `A` is
-required and the system is solved with either forward or backward substitution.
-For non-triangular square matrices, an LU factorization is used.
-
-For rectangular `A` the result is the minimum-norm least squares solution computed by a
-pivoted QR factorization of `A` and a rank estimate of `A` based on the R factor.
-
-When `A` is sparse, a similar polyalgorithm is used. For indefinite matrices, the `LDLt`
-factorization does not use pivoting during the numerical factorization and therefore the
-procedure can fail even for invertible matrices.
-
-See also: [`factorize`](@ref), [`pinv`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 0; 1 -2]; B = [32; -4];
-
-julia> X = A \\ B
-2-element Vector{Float64}:
- 32.0
- 18.0
-
-julia> A * X == B
-true
-```
-"""
-function (\)(A::AbstractMatrix, B::AbstractVecOrMat)
-    require_one_based_indexing(A, B)
-    m, n = size(A)
-    if m == n
-        if istril(A)
-            if istriu(A)
-                return Diagonal(A) \ B
-            else
-                return LowerTriangular(A) \ B
-            end
-        end
-        if istriu(A)
-            return UpperTriangular(A) \ B
-        end
-        return lu(A) \ B
-    end
-    return qr(A, ColumnNorm()) \ B
-end
-
-(\)(a::AbstractVector, b::AbstractArray) = pinv(a) * b
-"""
-    A / B
-
-Matrix right-division: `A / B` is equivalent to `(B' \\ A')'` where [`\\`](@ref) is the left-division operator.
-For square matrices, the result `X` is such that `A == X*B`.
-
-See also: [`rdiv!`](@ref).
-
-# Examples
-```jldoctest
-julia> A = Float64[1 4 5; 3 9 2]; B = Float64[1 4 2; 3 4 2; 8 7 1];
-
-julia> X = A / B
-2×3 Matrix{Float64}:
- -0.65   3.75  -1.2
-  3.25  -2.75   1.0
-
-julia> isapprox(A, X*B)
-true
-
-julia> isapprox(X, A*pinv(B))
-true
-```
-"""
-function (/)(A::AbstractVecOrMat, B::AbstractVecOrMat)
-    size(A,2) != size(B,2) && throw(DimensionMismatch("Both inputs should have the same number of columns"))
-    return copy(adjoint(adjoint(B) \ adjoint(A)))
-end
-
-cond(x::Number) = iszero(x) ? Inf : 1.0
-cond(x::Number, p) = cond(x)
-
-#Skeel condition numbers
-condskeel(A::AbstractMatrix, p::Real=Inf) = opnorm(abs.(inv(A))*abs.(A), p)
-
-"""
-    condskeel(M, [x, p::Real=Inf])
-
-```math
-\\kappa_S(M, p) = \\left\\Vert \\left\\vert M \\right\\vert \\left\\vert M^{-1} \\right\\vert \\right\\Vert_p \\\\
-\\kappa_S(M, x, p) = \\frac{\\left\\Vert \\left\\vert M \\right\\vert \\left\\vert M^{-1} \\right\\vert \\left\\vert x \\right\\vert \\right\\Vert_p}{\\left \\Vert x \\right \\Vert_p}
-```
-
-Skeel condition number ``\\kappa_S`` of the matrix `M`, optionally with respect to the
-vector `x`, as computed using the operator `p`-norm. ``\\left\\vert M \\right\\vert``
-denotes the matrix of (entry wise) absolute values of ``M``;
-``\\left\\vert M \\right\\vert_{ij} = \\left\\vert M_{ij} \\right\\vert``.
-Valid values for `p` are `1`, `2` and `Inf` (default).
-
-This quantity is also known in the literature as the Bauer condition number, relative
-condition number, or componentwise relative condition number.
-"""
-function condskeel(A::AbstractMatrix, x::AbstractVector, p::Real=Inf)
-    norm(abs.(inv(A))*(abs.(A)*abs.(x)), p) / norm(x, p)
-end
-
-issymmetric(A::AbstractMatrix{<:Real}) = ishermitian(A)
-
-"""
-    issymmetric(A) -> Bool
-
-Test whether a matrix is symmetric.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> issymmetric(a)
-true
-
-julia> b = [1 im; -im 1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im  0+1im
- 0-1im  1+0im
-
-julia> issymmetric(b)
-false
-```
-"""
-function issymmetric(A::AbstractMatrix)
-    indsm, indsn = axes(A)
-    if indsm != indsn
-        return false
-    end
-    for i = first(indsn):last(indsn), j = (i):last(indsn)
-        if A[i,j] != transpose(A[j,i])
-            return false
-        end
-    end
-    return true
-end
-
-issymmetric(x::Number) = x == x
-
-"""
-    ishermitian(A) -> Bool
-
-Test whether a matrix is Hermitian.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> ishermitian(a)
-true
-
-julia> b = [1 im; -im 1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im  0+1im
- 0-1im  1+0im
-
-julia> ishermitian(b)
-true
-```
-"""
-function ishermitian(A::AbstractMatrix)
-    indsm, indsn = axes(A)
-    if indsm != indsn
-        return false
-    end
-    for i = indsn, j = i:last(indsn)
-        if A[i,j] != adjoint(A[j,i])
-            return false
-        end
-    end
-    return true
-end
-
-ishermitian(x::Number) = (x == conj(x))
-
-"""
-    istriu(A::AbstractMatrix, k::Integer = 0) -> Bool
-
-Test whether `A` is upper triangular starting from the `k`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> istriu(a)
-false
-
-julia> istriu(a, -1)
-true
-
-julia> b = [1 im; 0 -1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im   0+1im
- 0+0im  -1+0im
-
-julia> istriu(b)
-true
-
-julia> istriu(b, 1)
-false
-```
-"""
-function istriu(A::AbstractMatrix, k::Integer = 0)
-    require_one_based_indexing(A)
-    return _istriu(A, k)
-end
-istriu(x::Number) = true
-
-@inline function _istriu(A::AbstractMatrix, k)
-    m, n = size(A)
-    for j in 1:min(n, m + k - 1)
-        all(iszero, view(A, max(1, j - k + 1):m, j)) || return false
-    end
-    return true
-end
-
-"""
-    istril(A::AbstractMatrix, k::Integer = 0) -> Bool
-
-Test whether `A` is lower triangular starting from the `k`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> istril(a)
-false
-
-julia> istril(a, 1)
-true
-
-julia> b = [1 0; -im -1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im   0+0im
- 0-1im  -1+0im
-
-julia> istril(b)
-true
-
-julia> istril(b, -1)
-false
-```
-"""
-function istril(A::AbstractMatrix, k::Integer = 0)
-    require_one_based_indexing(A)
-    return _istril(A, k)
-end
-istril(x::Number) = true
-
-@inline function _istril(A::AbstractMatrix, k)
-    m, n = size(A)
-    for j in max(1, k + 2):n
-        all(iszero, view(A, 1:min(j - k - 1, m), j)) || return false
-    end
-    return true
-end
-
-"""
-    isbanded(A::AbstractMatrix, kl::Integer, ku::Integer) -> Bool
-
-Test whether `A` is banded with lower bandwidth starting from the `kl`th superdiagonal
-and upper bandwidth extending through the `ku`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> LinearAlgebra.isbanded(a, 0, 0)
-false
-
-julia> LinearAlgebra.isbanded(a, -1, 1)
-true
-
-julia> b = [1 0; -im -1] # lower bidiagonal
-2×2 Matrix{Complex{Int64}}:
- 1+0im   0+0im
- 0-1im  -1+0im
-
-julia> LinearAlgebra.isbanded(b, 0, 0)
-false
-
-julia> LinearAlgebra.isbanded(b, -1, 0)
-true
-```
-"""
-isbanded(A::AbstractMatrix, kl::Integer, ku::Integer) = istriu(A, kl) && istril(A, ku)
-
-"""
-    isdiag(A) -> Bool
-
-Test whether a matrix is diagonal in the sense that `iszero(A[i,j])` is true unless `i == j`.
-Note that it is not necessary for `A` to be square;
-if you would also like to check that, you need to check that `size(A, 1) == size(A, 2)`.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> isdiag(a)
-false
-
-julia> b = [im 0; 0 -im]
-2×2 Matrix{Complex{Int64}}:
- 0+1im  0+0im
- 0+0im  0-1im
-
-julia> isdiag(b)
-true
-
-julia> c = [1 0 0; 0 2 0]
-2×3 Matrix{Int64}:
- 1  0  0
- 0  2  0
-
-julia> isdiag(c)
-true
-
-julia> d = [1 0 0; 0 2 3]
-2×3 Matrix{Int64}:
- 1  0  0
- 0  2  3
-
-julia> isdiag(d)
-false
-```
-"""
-isdiag(A::AbstractMatrix) = isbanded(A, 0, 0)
-isdiag(x::Number) = true
-
-"""
-    axpy!(α, x::AbstractArray, y::AbstractArray)
-
-Overwrite `y` with `x * α + y` and return `y`.
-If `x` and `y` have the same axes, it's equivalent with `y .+= x .* a`.
-
-# Examples
-```jldoctest
-julia> x = [1; 2; 3];
-
-julia> y = [4; 5; 6];
-
-julia> axpy!(2, x, y)
-3-element Vector{Int64}:
-  6
-  9
- 12
-```
-"""
-function axpy!(α, x::AbstractArray, y::AbstractArray)
-    n = length(x)
-    if n != length(y)
-        throw(DimensionMismatch("x has length $n, but y has length $(length(y))"))
-    end
-    iszero(α) && return y
-    for (IY, IX) in zip(eachindex(y), eachindex(x))
-        @inbounds y[IY] += x[IX]*α
-    end
-    return y
-end
-
-function axpy!(α, x::AbstractArray, rx::AbstractArray{<:Integer}, y::AbstractArray, ry::AbstractArray{<:Integer})
-    if length(rx) != length(ry)
-        throw(DimensionMismatch("rx has length $(length(rx)), but ry has length $(length(ry))"))
-    elseif !checkindex(Bool, eachindex(IndexLinear(), x), rx)
-        throw(BoundsError(x, rx))
-    elseif !checkindex(Bool, eachindex(IndexLinear(), y), ry)
-        throw(BoundsError(y, ry))
-    end
-    iszero(α) && return y
-    for (IY, IX) in zip(eachindex(ry), eachindex(rx))
-        @inbounds y[ry[IY]] += x[rx[IX]]*α
-    end
-    return y
-end
-
-"""
-    axpby!(α, x::AbstractArray, β, y::AbstractArray)
-
-Overwrite `y` with `x * α + y * β` and return `y`.
-If `x` and `y` have the same axes, it's equivalent with `y .= x .* a .+ y .* β`.
-
-# Examples
-```jldoctest
-julia> x = [1; 2; 3];
-
-julia> y = [4; 5; 6];
-
-julia> axpby!(2, x, 2, y)
-3-element Vector{Int64}:
- 10
- 14
- 18
-```
-"""
-function axpby!(α, x::AbstractArray, β, y::AbstractArray)
-    if length(x) != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
-    end
-    iszero(α) && isone(β) && return y
-    for (IX, IY) in zip(eachindex(x), eachindex(y))
-        @inbounds y[IY] = x[IX]*α + y[IY]*β
-    end
-    y
-end
-
-DenseLike{T} = Union{DenseArray{T}, Base.StridedReshapedArray{T}, Base.StridedReinterpretArray{T}}
-StridedVecLike{T} = Union{DenseLike{T}, Base.FastSubArray{T,<:Any,<:DenseLike{T}}}
-axpy!(α::Number, x::StridedVecLike{T}, y::StridedVecLike{T}) where {T<:BlasFloat} = BLAS.axpy!(α, x, y)
-axpby!(α::Number, x::StridedVecLike{T}, β::Number, y::StridedVecLike{T}) where {T<:BlasFloat} = BLAS.axpby!(α, x, β, y)
-function axpy!(α::Number,
-    x::StridedVecLike{T}, rx::AbstractRange{<:Integer},
-    y::StridedVecLike{T}, ry::AbstractRange{<:Integer},
-) where {T<:BlasFloat}
-    if Base.has_offset_axes(rx, ry)
-        return @invoke axpy!(α,
-            x::AbstractArray, rx::AbstractArray{<:Integer},
-            y::AbstractArray, ry::AbstractArray{<:Integer},
-        )
-    end
-    @views BLAS.axpy!(α, x[rx], y[ry])
-    return y
-end
-
-"""
-    rotate!(x, y, c, s)
-
-Overwrite `x` with `c*x + s*y` and `y` with `-conj(s)*x + c*y`.
-Returns `x` and `y`.
-
-!!! compat "Julia 1.5"
-    `rotate!` requires at least Julia 1.5.
-"""
-function rotate!(x::AbstractVector, y::AbstractVector, c, s)
-    require_one_based_indexing(x, y)
-    n = length(x)
-    if n != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
-    end
-    @inbounds for i = 1:n
-        xi, yi = x[i], y[i]
-        x[i] =       c *xi + s*yi
-        y[i] = -conj(s)*xi + c*yi
-    end
-    return x, y
-end
-
-"""
-    reflect!(x, y, c, s)
-
-Overwrite `x` with `c*x + s*y` and `y` with `conj(s)*x - c*y`.
-Returns `x` and `y`.
-
-!!! compat "Julia 1.5"
-    `reflect!` requires at least Julia 1.5.
-"""
-function reflect!(x::AbstractVector, y::AbstractVector, c, s)
-    require_one_based_indexing(x, y)
-    n = length(x)
-    if n != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
-    end
-    @inbounds for i = 1:n
-        xi, yi = x[i], y[i]
-        x[i] =      c *xi + s*yi
-        y[i] = conj(s)*xi - c*yi
-    end
-    return x, y
-end
-
-# Elementary reflection similar to LAPACK. The reflector is not Hermitian but
-# ensures that tridiagonalization of Hermitian matrices become real. See lawn72
-@inline function reflector!(x::AbstractVector{T}) where {T}
-    require_one_based_indexing(x)
-    n = length(x)
-    n == 0 && return zero(eltype(x))
-    @inbounds begin
-        ξ1 = x[1]
-        normu = norm(x)
-        if iszero(normu)
-            return zero(ξ1/normu)
-        end
-        ν = T(copysign(normu, real(ξ1)))
-        ξ1 += ν
-        x[1] = -ν
-        for i = 2:n
-            x[i] /= ξ1
-        end
-    end
-    ξ1/ν
-end
-
-"""
-    reflectorApply!(x, τ, A)
-
-Multiplies `A` in-place by a Householder reflection on the left. It is equivalent to `A .= (I - τ*[1; x] * [1; x]')*A`.
-"""
-@inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractVecOrMat)
-    require_one_based_indexing(x)
-    m, n = size(A, 1), size(A, 2)
-    if length(x) != m
-        throw(DimensionMismatch("reflector has length $(length(x)), which must match the first dimension of matrix A, $m"))
-    end
-    m == 0 && return A
-    @inbounds for j = 1:n
-        Aj, xj = view(A, 2:m, j), view(x, 2:m)
-        vAj = conj(τ)*(A[1, j] + dot(xj, Aj))
-        A[1, j] -= vAj
-        axpy!(-vAj, xj, Aj)
-    end
-    return A
-end
-
-"""
-    det(M)
-
-Matrix determinant.
-
-See also: [`logdet`](@ref) and [`logabsdet`](@ref).
-
-# Examples
-```jldoctest
-julia> M = [1 0; 2 2]
-2×2 Matrix{Int64}:
- 1  0
- 2  2
-
-julia> det(M)
-2.0
-```
-"""
-function det(A::AbstractMatrix{T}) where {T}
-    if istriu(A) || istril(A)
-        S = promote_type(T, typeof((one(T)*zero(T) + zero(T))/one(T)))
-        return convert(S, det(UpperTriangular(A)))
-    end
-    return det(lu(A; check = false))
-end
-det(x::Number) = x
-
-# Resolve Issue #40128
-det(A::AbstractMatrix{BigInt}) = det_bareiss(A)
-
-"""
-    logabsdet(M)
-
-Log of absolute value of matrix determinant. Equivalent to
-`(log(abs(det(M))), sign(det(M)))`, but may provide increased accuracy and/or speed.
-
-# Examples
-```jldoctest
-julia> A = [-1. 0.; 0. 1.]
-2×2 Matrix{Float64}:
- -1.0  0.0
-  0.0  1.0
-
-julia> det(A)
--1.0
-
-julia> logabsdet(A)
-(0.0, -1.0)
-
-julia> B = [2. 0.; 0. 1.]
-2×2 Matrix{Float64}:
- 2.0  0.0
- 0.0  1.0
-
-julia> det(B)
-2.0
-
-julia> logabsdet(B)
-(0.6931471805599453, 1.0)
-```
-"""
-logabsdet(A::AbstractMatrix) = logabsdet(lu(A, check=false))
-
-logabsdet(a::Number) = log(abs(a)), sign(a)
-
-"""
-    logdet(M)
-
-Log of matrix determinant. Equivalent to `log(det(M))`, but may provide
-increased accuracy and/or speed.
-
-# Examples
-```jldoctest
-julia> M = [1 0; 2 2]
-2×2 Matrix{Int64}:
- 1  0
- 2  2
-
-julia> logdet(M)
-0.6931471805599453
-
-julia> logdet(Matrix(I, 3, 3))
-0.0
-```
-"""
-function logdet(A::AbstractMatrix)
-    d,s = logabsdet(A)
-    return d + log(s)
-end
-
-logdet(A) = log(det(A))
-
-const NumberArray{T<:Number} = AbstractArray{T}
-
-exactdiv(a, b) = a/b
-exactdiv(a::Integer, b::Integer) = div(a, b)
-
-"""
-    det_bareiss!(M)
-
-Calculates the determinant of a matrix using the
-[Bareiss Algorithm](https://en.wikipedia.org/wiki/Bareiss_algorithm) using
-inplace operations.
-
-# Examples
-```jldoctest
-julia> M = [1 0; 2 2]
-2×2 Matrix{Int64}:
- 1  0
- 2  2
-
-julia> LinearAlgebra.det_bareiss!(M)
-2
-```
-"""
-function det_bareiss!(M)
-    n = checksquare(M)
-    sign, prev = Int8(1), one(eltype(M))
-    for i in 1:n-1
-        if iszero(M[i,i]) # swap with another col to make nonzero
-            swapto = findfirst(!iszero, @view M[i,i+1:end])
-            isnothing(swapto) && return zero(prev)
-            sign = -sign
-            Base.swapcols!(M, i, i + swapto)
-        end
-        for k in i+1:n, j in i+1:n
-            M[j,k] = exactdiv(M[j,k]*M[i,i] - M[j,i]*M[i,k], prev)
-        end
-        prev = M[i,i]
-    end
-    return sign * M[end,end]
-end
-"""
-    LinearAlgebra.det_bareiss(M)
-
-Calculates the determinant of a matrix using the
-[Bareiss Algorithm](https://en.wikipedia.org/wiki/Bareiss_algorithm).
-Also refer to [`det_bareiss!`](@ref).
-"""
-det_bareiss(M) = det_bareiss!(copy(M))
-
-
-
-"""
-    promote_leaf_eltypes(itr)
-
-For an (possibly nested) iterable object `itr`, promote the types of leaf
-elements.  Equivalent to `promote_type(typeof(leaf1), typeof(leaf2), ...)`.
-Currently supports only numeric leaf elements.
-
-# Examples
-```jldoctest
-julia> a = [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]
-3-element Vector{Any}:
-  Any[1, 2, [3, 4]]
- 5.0
-  Any[0 + 6im, [7.0, 8.0]]
-
-julia> LinearAlgebra.promote_leaf_eltypes(a)
-ComplexF64 (alias for Complex{Float64})
-```
-"""
-promote_leaf_eltypes(x::Union{AbstractArray{T},Tuple{T,Vararg{T}}}) where {T<:Number} = T
-promote_leaf_eltypes(x::Union{AbstractArray{T},Tuple{T,Vararg{T}}}) where {T<:NumberArray} = eltype(T)
-promote_leaf_eltypes(x::T) where {T} = T
-promote_leaf_eltypes(x::Union{AbstractArray,Tuple}) = mapreduce(promote_leaf_eltypes, promote_type, x; init=Bool)
-
-# isapprox: approximate equality of arrays [like isapprox(Number,Number)]
-# Supports nested arrays; e.g., for `a = [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]`
-# `a ≈ a` is `true`.
-function isapprox(x::AbstractArray, y::AbstractArray;
-    atol::Real=0,
-    rtol::Real=Base.rtoldefault(promote_leaf_eltypes(x),promote_leaf_eltypes(y),atol),
-    nans::Bool=false, norm::Function=norm)
-    d = norm(x - y)
-    if isfinite(d)
-        return iszero(rtol) ? d <= atol : d <= max(atol, rtol*max(norm(x), norm(y)))
-    else
-        # Fall back to a component-wise approximate comparison
-        # (mapreduce instead of all for greater generality [#44893])
-        return mapreduce((a, b) -> isapprox(a, b; rtol=rtol, atol=atol, nans=nans), &, x, y)
-    end
-end
-
-"""
-    normalize!(a::AbstractArray, p::Real=2)
-
-Normalize the array `a` in-place so that its `p`-norm equals unity,
-i.e. `norm(a, p) == 1`.
-See also [`normalize`](@ref) and [`norm`](@ref).
-"""
-function normalize!(a::AbstractArray, p::Real=2)
-    nrm = norm(a, p)
-    __normalize!(a, nrm)
-end
-
-@inline function __normalize!(a::AbstractArray, nrm)
-    # The largest positive floating point number whose inverse is less than infinity
-    δ = inv(prevfloat(typemax(nrm)))
-    if nrm ≥ δ # Safe to multiply with inverse
-        invnrm = inv(nrm)
-        rmul!(a, invnrm)
-    else # scale elements to avoid overflow
-        εδ = eps(one(nrm))/δ
-        rmul!(a, εδ)
-        rmul!(a, inv(nrm*εδ))
-    end
-    return a
-end
-
-"""
-    normalize(a, p::Real=2)
-
-Normalize `a` so that its `p`-norm equals unity,
-i.e. `norm(a, p) == 1`. For scalars, this is similar to sign(a),
-except normalize(0) = NaN.
-See also [`normalize!`](@ref), [`norm`](@ref), and [`sign`](@ref).
-
-# Examples
-```jldoctest
-julia> a = [1,2,4];
-
-julia> b = normalize(a)
-3-element Vector{Float64}:
- 0.2182178902359924
- 0.4364357804719848
- 0.8728715609439696
-
-julia> norm(b)
-1.0
-
-julia> c = normalize(a, 1)
-3-element Vector{Float64}:
- 0.14285714285714285
- 0.2857142857142857
- 0.5714285714285714
-
-julia> norm(c, 1)
-1.0
-
-julia> a = [1 2 4 ; 1 2 4]
-2×3 Matrix{Int64}:
- 1  2  4
- 1  2  4
-
-julia> norm(a)
-6.48074069840786
-
-julia> normalize(a)
-2×3 Matrix{Float64}:
- 0.154303  0.308607  0.617213
- 0.154303  0.308607  0.617213
-
-julia> normalize(3, 1)
-1.0
-
-julia> normalize(-8, 1)
--1.0
-
-julia> normalize(0, 1)
-NaN
-```
-"""
-function normalize(a::AbstractArray, p::Real = 2)
-    nrm = norm(a, p)
-    if !isempty(a)
-        aa = copymutable_oftype(a, typeof(first(a)/nrm))
-        return __normalize!(aa, nrm)
-    else
-        T = typeof(zero(eltype(a))/nrm)
-        return T[]
-    end
-end
-
-normalize(x) = x / norm(x)
-normalize(x, p::Real) = x / norm(x, p)
diff --git a/stdlib/LinearAlgebra/src/givens.jl b/stdlib/LinearAlgebra/src/givens.jl
deleted file mode 100644
index 4668d5f542a91..0000000000000
--- a/stdlib/LinearAlgebra/src/givens.jl
+++ /dev/null
@@ -1,430 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# givensAlgorithm functions are derived from LAPACK, see below
-
-abstract type AbstractRotation{T} end
-struct AdjointRotation{T,S<:AbstractRotation{T}} <: AbstractRotation{T}
-    R::S
-end
-
-transpose(R::AbstractRotation) = error("transpose not implemented for $(typeof(R)). Consider using adjoint instead of transpose.")
-
-(*)(R::AbstractRotation, A::AbstractVector) = _rot_mul_vecormat(R, A)
-(*)(R::AbstractRotation, A::AbstractMatrix) = _rot_mul_vecormat(R, A)
-function _rot_mul_vecormat(R::AbstractRotation{T}, A::AbstractVecOrMat{S}) where {T,S}
-    TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    lmul!(convert(AbstractRotation{TS}, R), copy_similar(A, TS))
-end
-
-(*)(A::AbstractVector, R::AbstractRotation) = _vecormat_mul_rot(A, R)
-(*)(A::AbstractMatrix, R::AbstractRotation) = _vecormat_mul_rot(A, R)
-function _vecormat_mul_rot(A::AbstractVecOrMat{T}, R::AbstractRotation{S}) where {T,S}
-    TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    rmul!(copy_similar(A, TS), convert(AbstractRotation{TS}, R))
-end
-
-"""
-    LinearAlgebra.Givens(i1,i2,c,s) -> G
-
-A Givens rotation linear operator. The fields `c` and `s` represent the cosine and sine of
-the rotation angle, respectively. The `Givens` type supports left multiplication `G*A` and
-conjugated transpose right multiplication `A*G'`. The type doesn't have a `size` and can
-therefore be multiplied with matrices of arbitrary size as long as `i2<=size(A,2)` for
-`G*A` or `i2<=size(A,1)` for `A*G'`.
-
-See also [`givens`](@ref).
-"""
-struct Givens{T} <: AbstractRotation{T}
-    i1::Int
-    i2::Int
-    c::T
-    s::T
-end
-struct Rotation{T} <: AbstractRotation{T}
-    rotations::Vector{Givens{T}}
-end
-
-convert(::Type{T}, r::T) where {T<:AbstractRotation} = r
-convert(::Type{T}, r::AbstractRotation) where {T<:AbstractRotation} = T(r)::T
-convert(::Type{AbstractRotation{T}}, r::AdjointRotation) where {T} = convert(AbstractRotation{T}, r.R)'
-convert(::Type{AbstractRotation{T}}, r::AdjointRotation{T}) where {T} = r
-
-Givens(i1, i2, c, s) = Givens(i1, i2, promote(c, s)...)
-Givens{T}(G::Givens{T}) where {T} = G
-Givens{T}(G::Givens) where {T} = Givens(G.i1, G.i2, convert(T, G.c), convert(T, G.s))
-Rotation{T}(R::Rotation{T}) where {T} = R
-Rotation{T}(R::Rotation) where {T} = Rotation{T}([Givens{T}(g) for g in R.rotations])
-AbstractRotation{T}(G::Givens) where {T} = Givens{T}(G)
-AbstractRotation{T}(R::Rotation) where {T} = Rotation{T}(R)
-
-adjoint(G::Givens) = Givens(G.i1, G.i2, G.c', -G.s)
-adjoint(R::AbstractRotation) = AdjointRotation(R)
-adjoint(adjR::AdjointRotation) = adjR.R
-
-Base.copy(aR::AdjointRotation{T,Rotation{T}}) where {T} =
-    Rotation{T}([r' for r in Iterators.reverse(aR.R.rotations)])
-
-floatmin2(::Type{Float32}) = reinterpret(Float32, 0x26000000)
-floatmin2(::Type{Float64}) = reinterpret(Float64, 0x21a0000000000000)
-floatmin2(::Type{T}) where {T} = (twopar = 2one(T); twopar^trunc(Integer,log(floatmin(T)/eps(T))/log(twopar)/twopar))
-
-# derived from LAPACK's dlartg
-# Copyright:
-# Univ. of Tennessee
-# Univ. of California Berkeley
-# Univ. of Colorado Denver
-# NAG Ltd.
-function givensAlgorithm(f::T, g::T) where T<:AbstractFloat
-    onepar = one(T)
-    twopar = 2one(T)
-    T0 = typeof(onepar) # dimensionless
-    zeropar = T0(zero(T)) # must be dimensionless
-
-    # need both dimensionful and dimensionless versions of these:
-    safmn2 = floatmin2(T0)
-    safmn2u = floatmin2(T)
-    safmx2 = one(T)/safmn2
-    safmx2u = oneunit(T)/safmn2
-
-    if g == 0
-        cs = onepar
-        sn = zeropar
-        r = f
-    elseif f == 0
-        cs = zeropar
-        sn = onepar
-        r = g
-    else
-        f1 = f
-        g1 = g
-        scalepar = max(abs(f1), abs(g1))
-        if scalepar >= safmx2u
-            count = 0
-            while true
-                count += 1
-                f1 *= safmn2
-                g1 *= safmn2
-                scalepar = max(abs(f1), abs(g1))
-                if scalepar < safmx2u break end
-            end
-            r = sqrt(f1*f1 + g1*g1)
-            cs = f1/r
-            sn = g1/r
-            for i = 1:count
-                r *= safmx2
-            end
-        elseif scalepar <= safmn2u
-            count = 0
-            while true
-                count += 1
-                f1 *= safmx2
-                g1 *= safmx2
-                scalepar = max(abs(f1), abs(g1))
-                if scalepar > safmn2u break end
-            end
-            r = sqrt(f1*f1 + g1*g1)
-            cs = f1/r
-            sn = g1/r
-            for i = 1:count
-                r *= safmn2
-            end
-        else
-            r = sqrt(f1*f1 + g1*g1)
-            cs = f1/r
-            sn = g1/r
-        end
-        if abs(f) > abs(g) && cs < 0
-            cs = -cs
-            sn = -sn
-            r = -r
-        end
-    end
-    return cs, sn, r
-end
-
-# derived from LAPACK's zlartg
-# Copyright:
-# Univ. of Tennessee
-# Univ. of California Berkeley
-# Univ. of Colorado Denver
-# NAG Ltd.
-function givensAlgorithm(f::Complex{T}, g::Complex{T}) where T<:AbstractFloat
-    twopar, onepar = 2one(T), one(T)
-    T0 = typeof(onepar) # dimensionless
-    zeropar = T0(zero(T)) # must be dimensionless
-    czero = complex(zeropar)
-
-    abs1(ff) = max(abs(real(ff)), abs(imag(ff)))
-    safmin = floatmin(T0)
-    safmn2 = floatmin2(T0)
-    safmn2u = floatmin2(T)
-    safmx2 = one(T)/safmn2
-    safmx2u = oneunit(T)/safmn2
-    scalepar = max(abs1(f), abs1(g))
-    fs = f
-    gs = g
-    count = 0
-    if scalepar >= safmx2u
-        while true
-            count += 1
-            fs *= safmn2
-            gs *= safmn2
-            scalepar *= safmn2
-            if scalepar < safmx2u break end
-        end
-    elseif scalepar <= safmn2u
-        if g == 0
-            cs = onepar
-            sn = czero
-            r = f
-            return cs, sn, r
-        end
-        while true
-            count -= 1
-            fs *= safmx2
-            gs *= safmx2
-            scalepar *= safmx2
-            if scalepar > safmn2u break end
-        end
-    end
-    f2 = abs2(fs)
-    g2 = abs2(gs)
-    if f2 <= max(g2, oneunit(T))*safmin
-        # This is a rare case: F is very small.
-        if f == 0
-            cs = zero(T)
-            r = complex(hypot(real(g), imag(g)))
-            # do complex/real division explicitly with two real divisions
-            d = hypot(real(gs), imag(gs))
-            sn = complex(real(gs)/d, -imag(gs)/d)
-            return cs, sn, r
-        end
-        f2s = hypot(real(fs), imag(fs))
-        # g2 and g2s are accurate
-        # g2 is at least safmin, and g2s is at least safmn2
-        g2s = sqrt(g2)
-        # error in cs from underflow in f2s is at most
-        # unfl / safmn2 .lt. sqrt(unfl*eps) .lt. eps
-        # if max(g2,one)=g2, then f2 .lt. g2*safmin,
-        # and so cs .lt. sqrt(safmin)
-        # if max(g2,one)=one, then f2 .lt. safmin
-        # and so cs .lt. sqrt(safmin)/safmn2 = sqrt(eps)
-        # therefore, cs = f2s/g2s / sqrt( 1 + (f2s/g2s)**2 ) = f2s/g2s
-        cs = f2s/g2s
-        # make sure abs(ff) = 1
-        # do complex/real division explicitly with 2 real divisions
-        if abs1(f) > 1
-            d = hypot(real(f), imag(f))
-            ff = complex(real(f)/d, imag(f)/d)
-        else
-            dr = safmx2*real(f)
-            di = safmx2*imag(f)
-            d = hypot(dr, di)
-            ff = complex(dr/d, di/d)
-        end
-        sn = ff*complex(real(gs)/g2s, -imag(gs)/g2s)
-        r = cs*f + sn*g
-    else
-        # This is the most common case.
-        # Neither F2 nor F2/G2 are less than SAFMIN
-        # F2S cannot overflow, and it is accurate
-        f2s = sqrt(onepar + g2/f2)
-        # do the f2s(real)*fs(complex) multiply with two real multiplies
-        r = complex(f2s*real(fs), f2s*imag(fs))
-        cs = onepar/f2s
-        d = f2 + g2
-        # do complex/real division explicitly with two real divisions
-        sn = complex(real(r)/d, imag(r)/d)
-        sn *= conj(gs)
-        if count != 0
-            if count > 0
-                for i = 1:count
-                    r *= safmx2
-                end
-            else
-                for i = 1:-count
-                    r *= safmn2
-                end
-            end
-        end
-    end
-    return cs, sn, r
-end
-
-# enable for unitful quantities
-function givensAlgorithm(f::T, g::T) where T
-    fs = f / oneunit(T)
-    gs = g / oneunit(T)
-    typeof(fs) === T && typeof(gs) === T &&
-    !isa(fs, Union{AbstractFloat,Complex{<:AbstractFloat}}) &&
-    throw(MethodError(givensAlgorithm, (fs, gs)))
-
-    c, s, r = givensAlgorithm(fs, gs)
-    return c, s, r * oneunit(T)
-end
-
-givensAlgorithm(f, g) = givensAlgorithm(promote(float(f), float(g))...)
-
-"""
-
-    givens(f::T, g::T, i1::Integer, i2::Integer) where {T} -> (G::Givens, r::T)
-
-Computes the Givens rotation `G` and scalar `r` such that for any vector `x` where
-```
-x[i1] = f
-x[i2] = g
-```
-the result of the multiplication
-```
-y = G*x
-```
-has the property that
-```
-y[i1] = r
-y[i2] = 0
-```
-
-See also [`LinearAlgebra.Givens`](@ref).
-"""
-function givens(f::T, g::T, i1::Integer, i2::Integer) where T
-    if i1 == i2
-        throw(ArgumentError("Indices must be distinct."))
-    end
-    c, s, r = givensAlgorithm(f, g)
-    if i1 > i2
-        s = -conj(s)
-        i1, i2 = i2, i1
-    end
-    Givens(i1, i2, c, s), r
-end
-"""
-    givens(A::AbstractArray, i1::Integer, i2::Integer, j::Integer) -> (G::Givens, r)
-
-Computes the Givens rotation `G` and scalar `r` such that the result of the multiplication
-```
-B = G*A
-```
-has the property that
-```
-B[i1,j] = r
-B[i2,j] = 0
-```
-
-See also [`LinearAlgebra.Givens`](@ref).
-"""
-givens(A::AbstractMatrix, i1::Integer, i2::Integer, j::Integer) =
-    givens(A[i1,j], A[i2,j], i1, i2)
-
-
-"""
-    givens(x::AbstractVector, i1::Integer, i2::Integer) -> (G::Givens, r)
-
-Computes the Givens rotation `G` and scalar `r` such that the result of the multiplication
-```
-B = G*x
-```
-has the property that
-```
-B[i1] = r
-B[i2] = 0
-```
-
-See also [`LinearAlgebra.Givens`](@ref).
-"""
-givens(x::AbstractVector, i1::Integer, i2::Integer) = givens(x[i1], x[i2], i1, i2)
-
-function getindex(G::Givens, i::Integer, j::Integer)
-    if i == j
-        if i == G.i1 || i == G.i2
-            G.c
-        else
-            oneunit(G.c)
-        end
-    elseif i == G.i1 && j == G.i2
-        G.s
-    elseif i == G.i2 && j == G.i1
-        -conj(G.s)
-    else
-        zero(G.s)
-    end
-end
-
-@inline function lmul!(G::Givens, A::AbstractVecOrMat)
-    require_one_based_indexing(A)
-    m, n = size(A, 1), size(A, 2)
-    if G.i2 > m
-        throw(DimensionMismatch("column indices for rotation are outside the matrix"))
-    end
-    @inbounds for i = 1:n
-        a1, a2 = A[G.i1,i], A[G.i2,i]
-        A[G.i1,i] =       G.c *a1 + G.s*a2
-        A[G.i2,i] = -conj(G.s)*a1 + G.c*a2
-    end
-    return A
-end
-@inline function rmul!(A::AbstractMatrix, G::Givens)
-    require_one_based_indexing(A)
-    m, n = size(A, 1), size(A, 2)
-    if G.i2 > n
-        throw(DimensionMismatch("column indices for rotation are outside the matrix"))
-    end
-    @inbounds for i = 1:m
-        a1, a2 = A[i,G.i1], A[i,G.i2]
-        A[i,G.i1] = a1*G.c - a2*G.s'
-        A[i,G.i2] = a1*G.s + a2*G.c
-    end
-    return A
-end
-
-function lmul!(G::Givens, R::Rotation)
-    push!(R.rotations, G)
-    return R
-end
-function rmul!(R::Rotation, G::Givens)
-    pushfirst!(R.rotations, G)
-    return R
-end
-
-function lmul!(R::Rotation, A::AbstractVecOrMat)
-    @inbounds for i in eachindex(R.rotations)
-        lmul!(R.rotations[i], A)
-    end
-    return A
-end
-function rmul!(A::AbstractMatrix, R::Rotation)
-    @inbounds for i in eachindex(R.rotations)
-        rmul!(A, R.rotations[i])
-    end
-    return A
-end
-
-function lmul!(adjR::AdjointRotation{<:Any,<:Rotation}, A::AbstractVecOrMat)
-    R = adjR.R
-    @inbounds for i in eachindex(R.rotations)
-        lmul!(adjoint(R.rotations[i]), A)
-    end
-    return A
-end
-function rmul!(A::AbstractMatrix, adjR::AdjointRotation{<:Any,<:Rotation})
-    R = adjR.R
-    @inbounds for i in eachindex(R.rotations)
-        rmul!(A, adjoint(R.rotations[i]))
-    end
-    return A
-end
-
-function *(G1::Givens{S}, G2::Givens{T}) where {S,T}
-    TS = promote_type(T, S)
-    Rotation{TS}([convert(AbstractRotation{TS}, G2), convert(AbstractRotation{TS}, G1)])
-end
-function *(G::Givens{T}, Gs::Givens{T}...) where {T}
-    return Rotation([reverse(Gs)..., G])
-end
-function *(G::Givens{S}, R::Rotation{T}) where {S,T}
-    TS = promote_type(T, S)
-    Rotation(vcat(convert(AbstractRotation{TS}, R).rotations, convert(AbstractRotation{TS}, G)))
-end
-function *(R::Rotation{S}, G::Givens{T}) where {S,T}
-    TS = promote_type(T, S)
-    Rotation(vcat(convert(AbstractRotation{TS}, G), convert(AbstractRotation{TS}, R).rotations))
-end
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
deleted file mode 100644
index 179f93f2cd6f2..0000000000000
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ /dev/null
@@ -1,610 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-######################################################################################
-# Upper-Hessenberg matrices H+μI, analogous to the UpperTriangular type
-
-"""
-    UpperHessenberg(A::AbstractMatrix)
-
-Construct an `UpperHessenberg` view of the matrix `A`.
-Entries of `A` below the first subdiagonal are ignored.
-
-!!! compat "Julia 1.3"
-    This type was added in Julia 1.3.
-
-Efficient algorithms are implemented for `H \\ b`, `det(H)`, and similar.
-
-See also the [`hessenberg`](@ref) function to factor any matrix into a similar
-upper-Hessenberg matrix.
-
-If `F::Hessenberg` is the factorization object, the unitary matrix can be accessed
-with `F.Q` and the Hessenberg matrix with `F.H`. When `Q` is extracted, the resulting
-type is the `HessenbergQ` object, and may be converted to a regular matrix with
-[`convert(Array, _)`](@ref) (or `Array(_)` for short).
-
-Iterating the decomposition produces the factors `F.Q` and `F.H`.
-
-# Examples
-```jldoctest
-julia> A = [1 2 3 4; 5 6 7 8; 9 10 11 12; 13 14 15 16]
-4×4 Matrix{Int64}:
-  1   2   3   4
-  5   6   7   8
-  9  10  11  12
- 13  14  15  16
-
-julia> UpperHessenberg(A)
-4×4 UpperHessenberg{Int64, Matrix{Int64}}:
- 1   2   3   4
- 5   6   7   8
- ⋅  10  11  12
- ⋅   ⋅  15  16
-```
-"""
-struct UpperHessenberg{T,S<:AbstractMatrix{T}} <: AbstractMatrix{T}
-    data::S
-
-    function UpperHessenberg{T,S}(data) where {T,S<:AbstractMatrix{T}}
-        require_one_based_indexing(data)
-        new{T,S}(data)
-    end
-end
-UpperHessenberg(H::UpperHessenberg) = H
-UpperHessenberg{T}(A::AbstractMatrix) where {T} = UpperHessenberg(convert(AbstractMatrix{T}, A))
-UpperHessenberg{T}(H::UpperHessenberg) where {T} = UpperHessenberg{T}(H.data)
-UpperHessenberg(A::AbstractMatrix) = UpperHessenberg{eltype(A),typeof(A)}(A)
-Matrix(H::UpperHessenberg{T}) where {T} = Matrix{T}(H)
-Array(H::UpperHessenberg) = Matrix(H)
-size(H::UpperHessenberg, d) = size(H.data, d)
-size(H::UpperHessenberg) = size(H.data)
-parent(H::UpperHessenberg) = H.data
-
-# similar behaves like UpperTriangular
-similar(H::UpperHessenberg, ::Type{T}) where {T} = UpperHessenberg(similar(H.data, T))
-similar(H::UpperHessenberg, ::Type{T}, dims::Dims{N}) where {T,N} = similar(H.data, T, dims)
-
-AbstractMatrix{T}(H::UpperHessenberg) where {T} = UpperHessenberg(AbstractMatrix{T}(H.data))
-
-copy(H::UpperHessenberg) = UpperHessenberg(copy(H.data))
-real(H::UpperHessenberg{<:Real}) = H
-real(H::UpperHessenberg{<:Complex}) = UpperHessenberg(triu!(real(H.data),-1))
-imag(H::UpperHessenberg) = UpperHessenberg(triu!(imag(H.data),-1))
-
-function istriu(A::UpperHessenberg, k::Integer=0)
-    k <= -1 && return true
-    return _istriu(A, k)
-end
-
-function Matrix{T}(H::UpperHessenberg) where T
-    m,n = size(H)
-    return triu!(copyto!(Matrix{T}(undef, m, n), H.data), -1)
-end
-
-Base.isassigned(H::UpperHessenberg, i::Int, j::Int) =
-    i <= j+1 ? isassigned(H.data, i, j) : true
-
-getindex(H::UpperHessenberg{T}, i::Integer, j::Integer) where {T} =
-    i <= j+1 ? convert(T, H.data[i,j]) : zero(T)
-
-function setindex!(A::UpperHessenberg, x, i::Integer, j::Integer)
-    if i > j+1
-        x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " *
-            "($i, $j) of an UpperHessenberg matrix to a nonzero value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-function Base.replace_in_print_matrix(A::UpperHessenberg, i::Integer, j::Integer, s::AbstractString)
-    return i <= j+1 ? s : Base.replace_with_centered_mark(s)
-end
-
-Base.copy(A::Adjoint{<:Any,<:UpperHessenberg}) = tril!(adjoint!(similar(A.parent.data), A.parent.data), 1)
-Base.copy(A::Transpose{<:Any,<:UpperHessenberg}) = tril!(transpose!(similar(A.parent.data), A.parent.data), 1)
-
--(A::UpperHessenberg) = UpperHessenberg(-A.data)
-rmul!(H::UpperHessenberg, x::Number) = (rmul!(H.data, x); H)
-lmul!(x::Number, H::UpperHessenberg) = (lmul!(x, H.data); H)
-
-fillstored!(H::UpperHessenberg, x) = (fillband!(H.data, x, -1, size(H,2)-1); H)
-
-+(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data+B.data)
--(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data-B.data)
-
-for T = (:UniformScaling, :Diagonal, :Bidiagonal, :Tridiagonal, :SymTridiagonal,
-         :UpperTriangular, :UnitUpperTriangular)
-    for op = (:+, :-)
-        @eval begin
-            $op(H::UpperHessenberg, x::$T) = UpperHessenberg($op(H.data, x))
-            $op(x::$T, H::UpperHessenberg) = UpperHessenberg($op(x, H.data))
-        end
-    end
-end
-
-for T = (:Number, :UniformScaling, :Diagonal)
-    @eval begin
-        *(H::UpperHessenberg, x::$T) = UpperHessenberg(H.data * x)
-        *(x::$T, H::UpperHessenberg) = UpperHessenberg(x * H.data)
-        /(H::UpperHessenberg, x::$T) = UpperHessenberg(H.data / x)
-        \(x::$T, H::UpperHessenberg) = UpperHessenberg(x \ H.data)
-    end
-end
-
-function *(H::UpperHessenberg, U::UpperOrUnitUpperTriangular)
-    HH = _mulmattri!(_initarray(*, eltype(H), eltype(U), H), H, U)
-    UpperHessenberg(HH)
-end
-function *(U::UpperOrUnitUpperTriangular, H::UpperHessenberg)
-    HH = _multrimat!(_initarray(*, eltype(U), eltype(H), H), U, H)
-    UpperHessenberg(HH)
-end
-
-function /(H::UpperHessenberg, U::UpperTriangular)
-    HH = _rdiv!(_initarray(/, eltype(H), eltype(U), H), H, U)
-    UpperHessenberg(HH)
-end
-function /(H::UpperHessenberg, U::UnitUpperTriangular)
-    HH = _rdiv!(_initarray(/, eltype(H), eltype(U), H), H, U)
-    UpperHessenberg(HH)
-end
-
-function \(U::UpperTriangular, H::UpperHessenberg)
-    HH = ldiv!(_initarray(\, eltype(U), eltype(H), H), U, H)
-    UpperHessenberg(HH)
-end
-function \(U::UnitUpperTriangular, H::UpperHessenberg)
-    HH = ldiv!(_initarray(\, eltype(U), eltype(H), H), U, H)
-    UpperHessenberg(HH)
-end
-
-# Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory
-# (in-place in x) by the RQ algorithm from:
-#
-#    G. Henry, "The shifted Hessenberg system solve computation," Tech. Rep. 94–163,
-#    Center for Appl. Math., Cornell University (1994).
-#
-# as reviewed in
-#
-#    C. Beattie et al., "A note on shifted Hessenberg systems and frequency
-#    response computation," ACM Trans. Math. Soft. 38, pp. 12:6–12:16 (2011)
-#
-# (Note, however, that there is apparently a typo in Algorithm 1 of the
-#  Beattie paper: the Givens rotation uses u(k), not H(k,k) - σ.)
-#
-# Essentially, it works by doing a Givens RQ factorization of H+µI from
-# right to left, and doing backsubstitution *simultaneously*.
-
-# solve (H+μI)X = B, storing result in B
-function ldiv!(F::UpperHessenberg, B::AbstractVecOrMat; shift::Number=false)
-    checksquare(F)
-    m = size(F,1)
-    m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side # rows != $m"))
-    require_one_based_indexing(B)
-    n = size(B,2)
-    H = F.data
-    μ = shift
-    u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated col of H-μI
-    copyto!(u, 1, H, m*(m-1)+1, m) # u .= H[:,m]
-    u[m] += μ
-    X = B # not a copy, just rename to match paper
-    cs = Vector{Tuple{real(eltype(u)),eltype(u)}}(undef, length(u)) # store Givens rotations
-    @inbounds for k = m:-1:2
-        c, s, ρ = givensAlgorithm(u[k], H[k,k-1])
-        cs[k] = (c, s)
-        for i = 1:n
-            X[k,i] /= ρ
-            t₁ = s * X[k,i]; t₂ = c * X[k,i]
-            @simd for j = 1:k-2
-                X[j,i] -= u[j]*t₂ + H[j,k-1]*t₁
-            end
-            X[k-1,i] -= u[k-1]*t₂ + (H[k-1,k-1] + μ) * t₁
-        end
-        @simd for j = 1:k-2
-            u[j] = H[j,k-1]*c - u[j]*s'
-        end
-        u[k-1] = (H[k-1,k-1] + μ) * c - u[k-1]*s'
-    end
-    for i = 1:n
-        τ₁ = X[1,i] / u[1]
-        @inbounds for j = 2:m
-            τ₂ = X[j,i]
-            c, s = cs[j]
-            X[j-1,i] = c*τ₁ + s*τ₂
-            τ₁ = c*τ₂ - s'τ₁
-        end
-        X[m,i] = τ₁
-    end
-    return X
-end
-
-# solve X(H+μI) = B, storing result in B
-#
-# Note: this can be derived from the Henry (1994) algorithm
-# by transformation to F(Hᵀ+µI)F FXᵀ = FBᵀ, where
-# F is the permutation matrix that reverses the order
-# of rows/cols.  Essentially, we take the ldiv! algorithm,
-# swap indices of H and X to transpose, and reverse the
-# order of the H indices (or the order of the loops).
-function rdiv!(B::AbstractMatrix, F::UpperHessenberg; shift::Number=false)
-    checksquare(F)
-    m = size(F,1)
-    m != size(B,2) && throw(DimensionMismatch("wrong right-hand-side # cols != $m"))
-    require_one_based_indexing(B)
-    n = size(B,1)
-    H = F.data
-    μ = shift
-    u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated row of H-μI
-    u .= @view H[1,:]
-    u[1] += μ
-    X = B # not a copy, just rename to match paper
-    cs = Vector{Tuple{real(eltype(u)),eltype(u)}}(undef, length(u)) # store Givens rotations
-    @inbounds for k = 1:m-1
-        c, s, ρ = givensAlgorithm(u[k], H[k+1,k])
-        cs[k] = (c, s)
-        for i = 1:n
-            X[i,k] /= ρ
-            t₁ = s * X[i,k]; t₂ = c * X[i,k]
-            @simd for j = k+2:m
-                X[i,j] -= u[j]*t₂ + H[k+1,j]*t₁
-            end
-            X[i,k+1] -= u[k+1]*t₂ + (H[k+1,k+1] + μ) * t₁
-        end
-        @simd for j = k+2:m
-            u[j] = H[k+1,j]*c - u[j]*s'
-        end
-        u[k+1] = (H[k+1,k+1] + μ) * c - u[k+1]*s'
-    end
-    for i = 1:n
-        τ₁ = X[i,m] / u[m]
-        @inbounds for j = m-1:-1:1
-            τ₂ = X[i,j]
-            c, s = cs[j]
-            X[i,j+1] = c*τ₁ + s*τ₂
-            τ₁ = c*τ₂ - s'τ₁
-        end
-        X[i,1] = τ₁
-    end
-    return X
-end
-
-# Hessenberg-matrix determinant formula for H+μI based on:
-#
-#    N. D. Cahill, J. R. D’Errico, D. A. Narayan, and J. Y. Narayan, "Fibonacci determinants,"
-#    College Math. J. 33, pp. 221-225 (2003).
-#
-# as reviewed in Theorem 2.1 of:
-#
-#    K. Kaygisiz and A. Sahin, "Determinant and permanent of Hessenberg matrix and generalized Lucas polynomials,"
-#    arXiv:1111.4067 (2011).
-#
-# Cost is O(m²) with O(m) storage.
-function det(F::UpperHessenberg; shift::Number=false)
-    checksquare(F)
-    H = F.data
-    m = size(H,1)
-    μ = shift
-    m == 0 && return one(zero(eltype(H)) + μ)
-    determinant = H[1,1] + μ
-    prevdeterminant = one(determinant)
-    m == 1 && return determinant
-    prods = Vector{typeof(determinant)}(undef, m-1) # temporary storage for partial products
-    @inbounds for n = 2:m
-        prods[n-1] = prevdeterminant
-        prevdeterminant = determinant
-        determinant *= H[n,n] + μ
-        h = H[n,n-1]
-        @simd for r = n-1:-2:2
-            determinant -= H[r,n] * (prods[r] *= h) - H[r-1,n] * (prods[r-1] *= h)
-        end
-        if iseven(n)
-            determinant -= H[1,n] * (prods[1] *= h)
-        end
-    end
-    return determinant
-end
-
-# O(m²) log-determinant based on first doing Givens RQ to put H+μI into upper-triangular form and then
-# taking the product of the diagonal entries.   The trick is that we only need O(m) temporary storage,
-# because we don't need to store the whole Givens-rotated matrix, only the most recent column.
-# We do RQ (column rotations) rather than QR (row rotations) for more consecutive memory access.
-# (We could also use it for det instead of the Cahill algorithm above.  Cahill is slightly faster
-#  for very small matrices where you are likely to use det, and also uses only ± and * so it can
-#  be applied to Hessenberg matrices over other number fields.)
-function logabsdet(F::UpperHessenberg; shift::Number=false)
-    checksquare(F)
-    H = F.data
-    m = size(H,1)
-    μ = shift
-    P = one(zero(eltype(H)) + μ)
-    logdeterminant = zero(real(P))
-    m == 0 && return (logdeterminant, P)
-    g = Vector{typeof(P)}(undef, m) # below, g is the k-th col of Givens-rotated H+μI matrix
-    copyto!(g, 1, H, m*(m-1)+1, m) # g .= H[:,m]
-    g[m] += μ
-    @inbounds for k = m:-1:2
-        c, s, ρ = givensAlgorithm(g[k], H[k,k-1])
-        logdeterminant += log(abs(ρ))
-        P *= sign(ρ)
-        g[k-1] = c*(H[k-1,k-1] + μ) - s'*g[k-1]
-        @simd for j = 1:k-2
-            g[j] = c*H[j,k-1] - s'*g[j]
-        end
-    end
-    logdeterminant += log(abs(g[1]))
-    P *= sign(g[1])
-    return (logdeterminant, P)
-end
-
-function dot(x::AbstractVector, H::UpperHessenberg, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(H, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(H)), zero(eltype(y)))
-    end
-    x₁ = x[1]
-    r = dot(x₁, H[1,1], y[1])
-    r += dot(x[2], H[2,1], y[1])
-    @inbounds for j in 2:m-1
-        yj = y[j]
-        if !iszero(yj)
-            temp = adjoint(H[1,j]) * x₁
-            @simd for i in 2:j+1
-                temp += adjoint(H[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-        end
-    end
-    ym = y[m]
-    if !iszero(ym)
-        temp = adjoint(H[1,m]) * x₁
-        @simd for i in 2:m
-            temp += adjoint(H[i,m]) * x[i]
-        end
-        r += dot(temp, ym)
-    end
-    return r
-end
-
-######################################################################################
-# Hessenberg factorizations Q(H+μI)Q' of A+μI:
-
-"""
-    Hessenberg <: Factorization
-
-A `Hessenberg` object represents the Hessenberg factorization `QHQ'` of a square
-matrix, or a shift `Q(H+μI)Q'` thereof, which is produced by the [`hessenberg`](@ref) function.
-"""
-struct Hessenberg{T,SH<:AbstractMatrix,S<:AbstractMatrix,W<:AbstractVector,V<:Number} <: Factorization{T}
-    H::SH # UpperHessenberg or SymTridiagonal
-    uplo::Char
-    factors::S # reflector data in uplo triangle, may share data with H
-    τ::W # more Q (reflector) data
-    μ::V # diagonal shift for copy-free (F+μI) \ b solves and similar
-end
-Hessenberg(factors::AbstractMatrix, τ::AbstractVector, H::AbstractMatrix=UpperHessenberg(factors), uplo::AbstractChar='L'; μ::Number=false) =
-    Hessenberg{typeof(zero(eltype(factors))+μ),typeof(H),typeof(factors),typeof(τ),typeof(μ)}(H, uplo, factors, τ, μ)
-Hessenberg(F::Hessenberg) = F
-Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.factors, F.τ, F.H, F.uplo; μ=μ)
-
-copy(F::Hessenberg{<:Any,<:UpperHessenberg}) = Hessenberg(copy(F.factors), copy(F.τ); μ=F.μ)
-copy(F::Hessenberg{<:Any,<:SymTridiagonal}) = Hessenberg(copy(F.factors), copy(F.τ), copy(F.H), F.uplo; μ=F.μ)
-size(F::Hessenberg, d::Integer) = size(F.H, d)
-size(F::Hessenberg) = size(F.H)
-
-transpose(F::Hessenberg{<:Real}) = F'
-transpose(::Hessenberg) =
-    throw(ArgumentError("transpose of Hessenberg decomposition is not supported, consider using adjoint"))
-
-# iteration for destructuring into components
-Base.iterate(S::Hessenberg) = (S.Q, Val(:H))
-Base.iterate(S::Hessenberg, ::Val{:H}) = (S.H, Val(:μ))
-Base.iterate(S::Hessenberg, ::Val{:μ}) = (S.μ, Val(:done))
-Base.iterate(S::Hessenberg, ::Val{:done}) = nothing
-
-hessenberg!(A::StridedMatrix{<:BlasFloat}) = Hessenberg(LAPACK.gehrd!(A)...)
-
-function hessenberg!(A::Union{Symmetric{<:BlasReal,<:StridedMatrix},Hermitian{<:BlasFloat,<:StridedMatrix}})
-    factors, τ, d, e = LAPACK.hetrd!(A.uplo, A.data)
-    return Hessenberg(factors, τ, SymTridiagonal(d, e), A.uplo)
-end
-
-"""
-    hessenberg!(A) -> Hessenberg
-
-`hessenberg!` is the same as [`hessenberg`](@ref), but saves space by overwriting
-the input `A`, instead of creating a copy.
-"""
-hessenberg!(A::AbstractMatrix)
-
-"""
-    hessenberg(A) -> Hessenberg
-
-Compute the Hessenberg decomposition of `A` and return a `Hessenberg` object. If `F` is the
-factorization object, the unitary matrix can be accessed with `F.Q` (of type `LinearAlgebra.HessenbergQ`)
-and the Hessenberg matrix with `F.H` (of type [`UpperHessenberg`](@ref)), either of
-which may be converted to a regular matrix with `Matrix(F.H)` or `Matrix(F.Q)`.
-
-If `A` is [`Hermitian`](@ref) or real-[`Symmetric`](@ref), then the Hessenberg
-decomposition produces a real-symmetric tridiagonal matrix and `F.H` is of type
-[`SymTridiagonal`](@ref).
-
-Note that the shifted factorization `A+μI = Q (H+μI) Q'` can be
-constructed efficiently by `F + μ*I` using the [`UniformScaling`](@ref)
-object [`I`](@ref), which creates a new `Hessenberg` object with shared storage
-and a modified shift.   The shift of a given `F` is obtained by `F.μ`.
-This is useful because multiple shifted solves `(F + μ*I) \\ b`
-(for different `μ` and/or `b`) can be performed efficiently once `F` is created.
-
-Iterating the decomposition produces the factors `F.Q, F.H, F.μ`.
-
-# Examples
-```jldoctest
-julia> A = [4. 9. 7.; 4. 4. 1.; 4. 3. 2.]
-3×3 Matrix{Float64}:
- 4.0  9.0  7.0
- 4.0  4.0  1.0
- 4.0  3.0  2.0
-
-julia> F = hessenberg(A)
-Hessenberg{Float64, UpperHessenberg{Float64, Matrix{Float64}}, Matrix{Float64}, Vector{Float64}, Bool}
-Q factor: 3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false}
-H factor:
-3×3 UpperHessenberg{Float64, Matrix{Float64}}:
-  4.0      -11.3137       -1.41421
- -5.65685    5.0           2.0
-   ⋅        -8.88178e-16   1.0
-
-julia> F.Q * F.H * F.Q'
-3×3 Matrix{Float64}:
- 4.0  9.0  7.0
- 4.0  4.0  1.0
- 4.0  3.0  2.0
-
-julia> q, h = F; # destructuring via iteration
-
-julia> q == F.Q && h == F.H
-true
-```
-"""
-hessenberg(A::AbstractMatrix{T}) where T =
-    hessenberg!(eigencopy_oftype(A, eigtype(T)))
-
-function show(io::IO, mime::MIME"text/plain", F::Hessenberg)
-    summary(io, F)
-    if !iszero(F.μ)
-        print("\nwith shift μI for μ = ", F.μ)
-    end
-    print(io, "\nQ factor: ")
-    show(io, mime, F.Q)
-    println(io, "\nH factor:")
-    show(io, mime, F.H)
-end
-
-function getproperty(F::Hessenberg, d::Symbol)
-    d === :Q && return HessenbergQ(F)
-    return getfield(F, d)
-end
-
-Base.propertynames(F::Hessenberg, private::Bool=false) =
-    (:Q, :H, :μ, (private ? (:τ, :factors, :uplo) : ())...)
-
-AbstractArray(F::Hessenberg) = AbstractMatrix(F)
-Matrix(F::Hessenberg) = Array(AbstractArray(F))
-Array(F::Hessenberg) = Matrix(F)
-function AbstractMatrix(F::Hessenberg)
-    Q = F.Q
-    A = rmul!(lmul!(Q, Matrix{eltype(Q)}(F.H)), Q')
-    μ = F.μ
-    if iszero(μ)
-        return A
-    elseif typeof(zero(eltype(A))+μ) <: eltype(A) # can shift A in-place
-        for i = 1:size(A,1)
-            @inbounds A[i,i] += μ
-        end
-        return A
-    else
-        return A + μ*I # allocate another matrix, e.g. if A is real and μ is complex
-    end
-end
-
-# multiply x by the entries of M in the upper-k triangle, which contains
-# the entries of the upper-Hessenberg matrix H for k=-1
-function rmul_triu!(M::AbstractMatrix, x, k::Integer=0)
-    require_one_based_indexing(M)
-    m, n = size(M)
-    for j = 1:n, i = 1:min(j-k,m)
-        @inbounds M[i,j] *= x
-    end
-    return M
-end
-function lmul_triu!(x, M::AbstractMatrix, k::Integer=0)
-    require_one_based_indexing(M)
-    m, n = size(M)
-    for j = 1:n, i = 1:min(j-k,m)
-        @inbounds M[i,j] = x * M[i,j]
-    end
-    return M
-end
-
-# when H is UpperHessenberg, it shares data with F.factors
-# multiply Hessenberg by scalar (but don't modify lower triangle of F.H.data)
-rmul!(F::Hessenberg{<:Any,<:UpperHessenberg{T}}, x::T) where {T<:Number} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ; μ=F.μ*x)
-lmul!(x::T, F::Hessenberg{<:Any,<:UpperHessenberg{T}}) where {T<:Number} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ; μ=x*F.μ)
-
-rmul!(F::Hessenberg{<:Any,<:SymTridiagonal{T}}, x::T) where {T<:Number} = Hessenberg(F.factors, F.τ, SymTridiagonal(F.H.dv*x, F.H.ev*x), F.uplo; μ=F.μ*x)
-lmul!(x::T, F::Hessenberg{<:Any,<:SymTridiagonal{T}}) where {T<:Number} = Hessenberg(F.factors, F.τ, SymTridiagonal(x*F.H.dv, x*F.H.ev), F.uplo; μ=x*F.μ)
-
-# Promote F * x or x * F.  In general, we don't know how to do promotions
-# that would change the element type of F.H, however.
-function (*)(F::Hessenberg{<:Any,<:AbstractMatrix{T}}, x::S) where {T,S<:Number}
-    TS = typeof(zero(T) * x)
-    if TS === T
-        return rmul!(copy(F), convert(T, x))
-    else
-        throw(MethodError(*, (F, x)))
-    end
-end
-function (*)(x::S, F::Hessenberg{<:Any,<:AbstractMatrix{T}}) where {T,S<:Number}
-    TS = typeof(zero(T) * x)
-    if TS === T
-        return lmul!(convert(T, x), copy(F))
-    else
-        throw(MethodError(*, (x, F)))
-    end
-end
--(F::Hessenberg) = F * -one(eltype(F.H))
-
-# shift Hessenberg by λI
-+(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ + J.λ)
-+(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.μ)
--(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ)
--(J::UniformScaling, F::Hessenberg) = Hessenberg(-F, J.λ - F.μ)
-
-function ldiv!(F::Hessenberg, B::AbstractVecOrMat)
-    Q = F.Q
-    if iszero(F.μ)
-        return lmul!(Q, ldiv!(F.H, lmul!(Q', B)))
-    else
-        return lmul!(Q, ldiv!(F.H, lmul!(Q', B); shift=F.μ))
-    end
-end
-
-function rdiv!(B::AbstractMatrix, F::Hessenberg)
-    Q = F.Q
-    return rmul!(rdiv!(rmul!(B, Q), F.H; shift=F.μ), Q')
-end
-
-# handle case of real H and complex μ — we need to work around the
-# fact that we can't multiple a real F.Q by a complex matrix directly in LAPACK
-function ldiv!(F::Hessenberg{<:Complex,<:Any,<:AbstractMatrix{<:Real}}, B::AbstractVecOrMat{<:Complex})
-    Q = F.Q
-    Br = lmul!(Q', real(B))
-    Bi = lmul!(Q', imag(B))
-    ldiv!(F.H, B .= Complex.(Br,Bi); shift=F.μ)
-    Br .= real.(B); Bi .= imag.(B)
-    Br = lmul!(Q, Br)
-    Bi = lmul!(Q, Bi)
-    return B .= Complex.(Br,Bi)
-end
-function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Any,<:AbstractMatrix{<:Real}})
-    Q = F.Q
-    Br = rmul!(real(B), Q)
-    Bi = rmul!(imag(B), Q)
-    rdiv!(B .= Complex.(Br,Bi), F.H; shift=F.μ)
-    Br .= real.(B); Bi .= imag.(B)
-    Br = rmul!(Br, Q')
-    Bi = rmul!(Bi, Q')
-    return B .= Complex.(Br,Bi)
-end
-
-ldiv!(F::AdjointFactorization{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')'
-rdiv!(B::AbstractMatrix, F::AdjointFactorization{<:Any,<:Hessenberg}) = ldiv!(F', B')'
-
-det(F::Hessenberg) = det(F.H; shift=F.μ)
-logabsdet(F::Hessenberg) = logabsdet(F.H; shift=F.μ)
-function logdet(F::Hessenberg)
-    d,s = logabsdet(F)
-    return d + log(s)
-end
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
deleted file mode 100644
index 6353f9fa8d266..0000000000000
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ /dev/null
@@ -1,6944 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module LAPACK
-@doc """
-Interfaces to LAPACK subroutines.
-""" LAPACK
-
-using ..LinearAlgebra.BLAS: @blasfunc, chkuplo
-
-using ..LinearAlgebra: libblastrampoline, BlasFloat, BlasInt, LAPACKException, DimensionMismatch,
-    SingularException, PosDefException, chkstride1, checksquare,triu, tril, dot
-
-using Base: iszero, require_one_based_indexing
-
-
-# Legacy binding maintained for backwards-compatibility but new packages
-# should not look at this, instead preferring to parse the output
-# of BLAS.get_config()
-const liblapack = libblastrampoline
-
-#Generic LAPACK error handlers
-"""
-Handle only negative LAPACK error codes
-
-*NOTE* use only if the positive error code is useful.
-"""
-function chkargsok(ret::BlasInt)
-    if ret < 0
-        throw(ArgumentError("invalid argument #$(-ret) to LAPACK call"))
-    end
-end
-
-"Handle all nonzero info codes"
-function chklapackerror(ret::BlasInt)
-    if ret == 0
-        return
-    elseif ret < 0
-        throw(ArgumentError("invalid argument #$(-ret) to LAPACK call"))
-    else # ret > 0
-        throw(LAPACKException(ret))
-    end
-end
-
-function chknonsingular(ret::BlasInt)
-    if ret > 0
-        throw(SingularException(ret))
-    end
-end
-
-function chkposdef(ret::BlasInt)
-    if ret > 0
-        throw(PosDefException(ret))
-    end
-end
-
-"Check that {c}transpose is correctly specified"
-function chktrans(trans::AbstractChar)
-    if !(trans == 'N' || trans == 'C' || trans == 'T')
-        throw(ArgumentError("trans argument must be 'N' (no transpose), 'T' (transpose), or 'C' (conjugate transpose), got $trans"))
-    end
-    trans
-end
-
-"Check that left/right hand side multiply is correctly specified"
-function chkside(side::AbstractChar)
-    if !(side == 'L' || side == 'R')
-        throw(ArgumentError("side argument must be 'L' (left hand multiply) or 'R' (right hand multiply), got $side"))
-    end
-    side
-end
-
-"Check that unit diagonal flag is correctly specified"
-function chkdiag(diag::AbstractChar)
-    if !(diag == 'U' || diag =='N')
-        throw(ArgumentError("diag argument must be 'U' (unit diagonal) or 'N' (non-unit diagonal), got $diag"))
-    end
-    diag
-end
-
-subsetrows(X::AbstractVector, Y::AbstractArray, k) = Y[1:k]
-subsetrows(X::AbstractMatrix, Y::AbstractArray, k) = Y[1:k, :]
-
-function chkfinite(A::AbstractMatrix)
-    for a in A
-        if !isfinite(a)
-            throw(ArgumentError("matrix contains Infs or NaNs"))
-        end
-    end
-    return true
-end
-
-function chkuplofinite(A::AbstractMatrix, uplo::AbstractChar)
-    require_one_based_indexing(A)
-    m, n = size(A)
-    if uplo == 'U'
-        @inbounds for j in 1:n, i in 1:j
-            if !isfinite(A[i,j])
-                throw(ArgumentError("matrix contains Infs or NaNs"))
-            end
-        end
-    else
-        @inbounds for j in 1:n, i in j:m
-            if !isfinite(A[i,j])
-                throw(ArgumentError("matrix contains Infs or NaNs"))
-            end
-        end
-    end
-end
-
-# LAPACK version number
-function version()
-    major = Ref{BlasInt}(0)
-    minor = Ref{BlasInt}(0)
-    patch = Ref{BlasInt}(0)
-    ccall((@blasfunc(ilaver_), libblastrampoline), Cvoid,
-          (Ptr{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
-          major, minor, patch)
-    return VersionNumber(major[], minor[], patch[])
-end
-
-# (GB) general banded matrices, LU decomposition and solver
-for (gbtrf, gbtrs, elty) in
-    ((:dgbtrf_,:dgbtrs_,:Float64),
-     (:sgbtrf_,:sgbtrs_,:Float32),
-     (:zgbtrf_,:zgbtrs_,:ComplexF64),
-     (:cgbtrf_,:cgbtrs_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, KL, KU, LDAB, M, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   AB( LDAB, * )
-        function gbtrf!(kl::Integer, ku::Integer, m::Integer, AB::AbstractMatrix{$elty})
-            require_one_based_indexing(AB)
-            chkstride1(AB)
-            n    = size(AB, 2)
-            mnmn = min(m, n)
-            ipiv = similar(AB, BlasInt, mnmn)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gbtrf), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
-                  m, n, kl, ku, AB, max(1,stride(AB,2)), ipiv, info)
-            chklapackerror(info[])
-            AB, ipiv
-        end
-
-        # SUBROUTINE DGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO)
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          TRANS
-        #       INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * )
-        function gbtrs!(trans::AbstractChar, kl::Integer, ku::Integer, m::Integer,
-                        AB::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt},
-                        B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(AB, B)
-            chkstride1(AB, B, ipiv)
-            chktrans(trans)
-            info = Ref{BlasInt}()
-            n    = size(AB,2)
-            if m != n || m != size(B,1)
-                throw(DimensionMismatch("matrix AB has dimensions $(size(AB)), but right hand side matrix B has dimensions $(size(B))"))
-            end
-            ccall((@blasfunc($gbtrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Clong),
-                  trans, n, kl, ku, size(B,2), AB, max(1,stride(AB,2)), ipiv,
-                  B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-"""
-    gbtrf!(kl, ku, m, AB) -> (AB, ipiv)
-
-Compute the LU factorization of a banded matrix `AB`. `kl` is the first
-subdiagonal containing a nonzero band, `ku` is the last superdiagonal
-containing one, and `m` is the first dimension of the matrix `AB`. Returns
-the LU factorization in-place and `ipiv`, the vector of pivots used.
-"""
-gbtrf!(kl::Integer, ku::Integer, m::Integer, AB::AbstractMatrix)
-
-"""
-    gbtrs!(trans, kl, ku, m, AB, ipiv, B)
-
-Solve the equation `AB * X = B`. `trans` determines the orientation of `AB`. It may
-be `N` (no transpose), `T` (transpose), or `C` (conjugate transpose). `kl` is the
-first subdiagonal containing a nonzero band, `ku` is the last superdiagonal
-containing one, and `m` is the first dimension of the matrix `AB`. `ipiv` is the vector
-of pivots returned from `gbtrf!`. Returns the vector or matrix `X`, overwriting `B` in-place.
-"""
-gbtrs!(trans::AbstractChar, kl::Integer, ku::Integer, m::Integer, AB::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-## (GE) general matrices: balancing and back-transforming
-for (gebal, gebak, elty, relty) in
-    ((:dgebal_, :dgebak_, :Float64, :Float64),
-     (:sgebal_, :sgebak_, :Float32, :Float32),
-     (:zgebal_, :zgebak_, :ComplexF64, :Float64),
-     (:cgebal_, :cgebak_, :ComplexF32, :Float32))
-    @eval begin
-        #     SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          JOB
-        #      INTEGER            IHI, ILP, INFO, LDA, N
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), SCALE( * )
-        function gebal!(job::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            ihi = Ref{BlasInt}()
-            ilo = Ref{BlasInt}()
-            scale = similar(A, $relty, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gebal), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$relty}, Ptr{BlasInt}, Clong),
-                  job, n, A, max(1,stride(A,2)), ilo, ihi, scale, info, 1)
-            chklapackerror(info[])
-            ilo[], ihi[], scale
-        end
-
-        #     SUBROUTINE DGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          JOB, SIDE
-        #      INTEGER            IHI, ILP, INFO, LDV, M, N
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   SCALE( * ), V( LDV, * )
-        function gebak!(job::AbstractChar, side::AbstractChar,
-                        ilo::BlasInt, ihi::BlasInt, scale::AbstractVector{$relty},
-                        V::AbstractMatrix{$elty})
-            require_one_based_indexing(scale, V)
-            chkstride1(scale, V)
-            chkside(side)
-            chkfinite(V) # balancing routines don't support NaNs and Infs
-            n = checksquare(V)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gebak), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                   Clong, Clong),
-                  job, side, size(V,1), ilo, ihi, scale, n, V, max(1,stride(V,2)), info,
-                  1, 1)
-            chklapackerror(info[])
-            V
-        end
-    end
-end
-
-"""
-    gebal!(job, A) -> (ilo, ihi, scale)
-
-Balance the matrix `A` before computing its eigensystem or Schur factorization.
-`job` can be one of `N` (`A` will not be permuted or scaled), `P` (`A` will only
-be permuted), `S` (`A` will only be scaled), or `B` (`A` will be both permuted
-and scaled). Modifies `A` in-place and returns `ilo`, `ihi`, and `scale`. If
-permuting was turned on, `A[i,j] = 0` if `j > i` and `1 < j < ilo` or `j > ihi`.
-`scale` contains information about the scaling/permutations performed.
-"""
-gebal!(job::AbstractChar, A::AbstractMatrix)
-
-"""
-    gebak!(job, side, ilo, ihi, scale, V)
-
-Transform the eigenvectors `V` of a matrix balanced using `gebal!` to
-the unscaled/unpermuted eigenvectors of the original matrix. Modifies `V`
-in-place. `side` can be `L` (left eigenvectors are transformed) or `R`
-(right eigenvectors are transformed).
-"""
-gebak!(job::AbstractChar, side::AbstractChar, ilo::BlasInt, ihi::BlasInt, scale::AbstractVector, V::AbstractMatrix)
-
-# (GE) general matrices, direct decompositions
-#
-# These mutating functions take as arguments all the values they
-# return, even if the value of the function does not depend on them
-# (e.g. the tau argument).  This is so that a factorization can be
-# updated in place.  The condensed mutating functions, usually a
-# function of A only, are defined after this block.
-for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty) in
-    ((:dgebrd_,:dgelqf_,:dgeqlf_,:dgeqrf_,:dgeqp3_,:dgeqrt_,:dgeqrt3_,:dgerqf_,:dgetrf_,:Float64,:Float64),
-     (:sgebrd_,:sgelqf_,:sgeqlf_,:sgeqrf_,:sgeqp3_,:sgeqrt_,:sgeqrt3_,:sgerqf_,:sgetrf_,:Float32,:Float32),
-     (:zgebrd_,:zgelqf_,:zgeqlf_,:zgeqrf_,:zgeqp3_,:zgeqrt_,:zgeqrt3_,:zgerqf_,:zgetrf_,:ComplexF64,:Float64),
-     (:cgebrd_,:cgelqf_,:cgeqlf_,:cgeqrf_,:cgeqp3_,:cgeqrt_,:cgeqrt3_,:cgerqf_,:cgetrf_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
-        #                    INFO )
-        # .. Scalar Arguments ..
-        # INTEGER            INFO, LDA, LWORK, M, N
-        # .. Array Arguments ..
-        #  DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
-        #           TAUQ( * ), WORK( * )
-        function gebrd!(A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            m, n  = size(A)
-            k     = min(m, n)
-            d     = similar(A, $relty, k)
-            e     = similar(A, $relty, k)
-            tauq  = similar(A, $elty, k)
-            taup  = similar(A, $elty, k)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gebrd), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ptr{$elty},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                     m, n, A, max(1,stride(A,2)),
-                     d, e, tauq, taup,
-                     work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, d, e, tauq, taup
-        end
-
-        # SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function gelqf!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m     = BlasInt(size(A, 1))
-            n     = BlasInt(size(A, 2))
-            lda   = BlasInt(max(1,stride(A, 2)))
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            lwork = BlasInt(-1)
-            work  = Vector{$elty}(undef, 1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gelqf), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, A, lda, tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        # SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function geqlf!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m     = BlasInt(size(A, 1))
-            n     = BlasInt(size(A, 2))
-            lda   = BlasInt(max(1,stride(A, 2)))
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            lwork = BlasInt(-1)
-            work  = Vector{$elty}(undef, 1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($geqlf), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, A, lda, tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        # SUBROUTINE DGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       INTEGER            JPVT( * )
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function geqp3!(A::AbstractMatrix{$elty}, jpvt::AbstractVector{BlasInt}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, jpvt, tau)
-            chkstride1(A,jpvt,tau)
-            m,n = size(A)
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            if length(jpvt) != n
-                throw(DimensionMismatch("jpvt has length $(length(jpvt)), but needs length $n"))
-            end
-            lda = stride(A,2)
-            if lda == 0
-                return A, tau, jpvt
-            end # Early exit
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            cmplx = eltype(A)<:Complex
-            if cmplx
-                rwork = Vector{$relty}(undef, 2n)
-            end
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                if cmplx
-                    ccall((@blasfunc($geqp3), libblastrampoline), Cvoid,
-                          (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$relty}, Ptr{BlasInt}),
-                          m, n, A, lda,
-                          jpvt, tau, work, lwork,
-                          rwork, info)
-                else
-                    ccall((@blasfunc($geqp3), libblastrampoline), Cvoid,
-                          (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{BlasInt}),
-                          m, n, A, lda,
-                          jpvt, tau, work,
-                          lwork, info)
-                end
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            return A, tau, jpvt
-        end
-
-        function geqrt!(A::AbstractMatrix{$elty}, T::AbstractMatrix{$elty})
-            require_one_based_indexing(A, T)
-            chkstride1(A)
-            m, n = size(A)
-            minmn = min(m, n)
-            nb = size(T, 1)
-            if nb > minmn
-                throw(ArgumentError("block size $nb > $minmn too large"))
-            end
-            lda = max(1, stride(A,2))
-            work = Vector{$elty}(undef, nb*n)
-            if n > 0
-                info = Ref{BlasInt}()
-                ccall((@blasfunc($geqrt), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{BlasInt}),
-                     m, n, nb, A,
-                     lda, T, max(1,stride(T,2)), work,
-                     info)
-                chklapackerror(info[])
-            end
-            A, T
-        end
-
-        function geqrt3!(A::AbstractMatrix{$elty}, T::AbstractMatrix{$elty})
-            require_one_based_indexing(A, T)
-            chkstride1(A)
-            chkstride1(T)
-            m, n = size(A)
-            p, q = size(T)
-            if m < n
-                throw(DimensionMismatch("input matrix A has dimensions ($m,$n), but should have more rows than columns"))
-            end
-            if p != n || q != n
-                throw(DimensionMismatch("block reflector T has dimensions ($p,$q), but should have dimensions ($n,$n)"))
-            end
-            if n > 0
-                info = Ref{BlasInt}()
-                ccall((@blasfunc($geqrt3), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                     m, n, A, max(1, stride(A, 2)),
-                     T, max(1,stride(T,2)), info)
-                chklapackerror(info[])
-            end
-            A, T
-        end
-
-        ## geqrfp! - positive elements on diagonal of R - not defined yet
-        # SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function geqrf!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m, n  = size(A)
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2                # first call returns lwork as work[1]
-                ccall((@blasfunc($geqrf), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, A, max(1,stride(A,2)), tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = max(BlasInt(1),BlasInt(real(work[1])))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        # SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function gerqf!(A::AbstractMatrix{$elty},tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m, n  = size(A)
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            lwork = BlasInt(-1)
-            work  = Vector{$elty}(undef, 1)
-            info  = Ref{BlasInt}()
-            for i = 1:2                # first call returns lwork as work[1]
-                ccall((@blasfunc($gerqf), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, A, max(1,stride(A,2)), tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = max(BlasInt(m), BlasInt(real(work[1])))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        # SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, M, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * )
-        function getrf!(A::AbstractMatrix{$elty}; check = true)
-            require_one_based_indexing(A)
-            check && chkfinite(A)
-            chkstride1(A)
-            m, n = size(A)
-            lda  = max(1,stride(A, 2))
-            ipiv = similar(A, BlasInt, min(m,n))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($getrf), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                   Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
-                  m, n, A, lda, ipiv, info)
-            chkargsok(info[])
-            A, ipiv, info[] #Error code is stored in LU factorization type
-        end
-    end
-end
-
-"""
-    gebrd!(A) -> (A, d, e, tauq, taup)
-
-Reduce `A` in-place to bidiagonal form `A = QBP'`. Returns `A`, containing the
-bidiagonal matrix `B`; `d`, containing the diagonal elements of `B`; `e`,
-containing the off-diagonal elements of `B`; `tauq`, containing the
-elementary reflectors representing `Q`; and `taup`, containing the
-elementary reflectors representing `P`.
-"""
-gebrd!(A::AbstractMatrix)
-
-"""
-    gelqf!(A, tau)
-
-Compute the `LQ` factorization of `A`, `A = LQ`. `tau` contains scalars
-which parameterize the elementary reflectors of the factorization. `tau`
-must have length greater than or equal to the smallest dimension of `A`.
-
-Returns
-`A` and `tau` modified in-place.
-"""
-gelqf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    geqlf!(A, tau)
-
-Compute the `QL` factorization of `A`, `A = QL`. `tau` contains scalars
-which parameterize the elementary reflectors of the factorization. `tau`
-must have length greater than or equal to the smallest dimension of `A`.
-
-Returns `A` and `tau` modified in-place.
-"""
-geqlf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    geqp3!(A, [jpvt, tau]) -> (A, tau, jpvt)
-
-Compute the pivoted `QR` factorization of `A`, `AP = QR` using BLAS level 3.
-`P` is a pivoting matrix, represented by `jpvt`. `tau` stores the elementary
-reflectors. The arguments `jpvt` and `tau` are optional and allow
-for passing preallocated arrays. When passed, `jpvt` must have length greater
-than or equal to `n` if `A` is an `(m x n)` matrix and `tau` must have length
-greater than or equal to the smallest dimension of `A`.
-
-`A`, `jpvt`, and `tau` are modified in-place.
-"""
-geqp3!(A::AbstractMatrix, jpvt::AbstractVector{BlasInt}, tau::AbstractVector)
-
-function geqp3!(A::AbstractMatrix{<:BlasFloat}, jpvt::AbstractVector{BlasInt})
-    m, n = size(A)
-    geqp3!(A, jpvt, similar(A, min(m, n)))
-end
-
-function geqp3!(A::AbstractMatrix{<:BlasFloat})
-    m, n = size(A)
-    geqp3!(A, zeros(BlasInt, n), similar(A, min(m, n)))
-end
-
-"""
-    geqrt!(A, T)
-
-Compute the blocked `QR` factorization of `A`, `A = QR`. `T` contains upper
-triangular block reflectors which parameterize the elementary reflectors of
-the factorization. The first dimension of `T` sets the block size and it must
-be between 1 and `n`. The second dimension of `T` must equal the smallest
-dimension of `A`.
-
-Returns `A` and `T` modified in-place.
-"""
-geqrt!(A::AbstractMatrix, T::AbstractMatrix)
-
-"""
-    geqrt3!(A, T)
-
-Recursively computes the blocked `QR` factorization of `A`, `A = QR`. `T`
-contains upper triangular block reflectors which parameterize the
-elementary reflectors of the factorization.  The first dimension of `T` sets the
-block size and it must be between 1 and `n`. The second dimension of `T` must
-equal the smallest dimension of `A`.
-
-Returns `A` and `T` modified in-place.
-"""
-geqrt3!(A::AbstractMatrix, T::AbstractMatrix)
-
-"""
-    geqrf!(A, tau)
-
-Compute the `QR` factorization of `A`, `A = QR`. `tau` contains scalars
-which parameterize the elementary reflectors of the factorization. `tau`
-must have length greater than or equal to the smallest dimension of `A`.
-
-Returns `A` and `tau` modified in-place.
-"""
-geqrf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    gerqf!(A, tau)
-
-Compute the `RQ` factorization of `A`, `A = RQ`. `tau` contains scalars
-which parameterize the elementary reflectors of the factorization. `tau`
-must have length greater than or equal to the smallest dimension of `A`.
-
-Returns `A` and `tau` modified in-place.
-"""
-gerqf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    getrf!(A) -> (A, ipiv, info)
-
-Compute the pivoted `LU` factorization of `A`, `A = LU`.
-
-Returns `A`, modified in-place, `ipiv`, the pivoting information, and an `info`
-code which indicates success (`info = 0`), a singular value in `U`
-(`info = i`, in which case `U[i,i]` is singular), or an error code (`info < 0`).
-"""
-getrf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    gelqf!(A) -> (A, tau)
-
-Compute the `LQ` factorization of `A`, `A = LQ`.
-
-Returns `A`, modified in-place, and `tau`, which contains scalars
-which parameterize the elementary reflectors of the factorization.
-"""
-gelqf!(A::AbstractMatrix{<:BlasFloat}) = ((m,n) = size(A); gelqf!(A, similar(A, min(m, n))))
-
-"""
-    geqlf!(A) -> (A, tau)
-
-Compute the `QL` factorization of `A`, `A = QL`.
-
-Returns `A`, modified in-place, and `tau`, which contains scalars
-which parameterize the elementary reflectors of the factorization.
-"""
-geqlf!(A::AbstractMatrix{<:BlasFloat}) = ((m,n) = size(A); geqlf!(A, similar(A, min(m, n))))
-
-"""
-    geqrt!(A, nb) -> (A, T)
-
-Compute the blocked `QR` factorization of `A`, `A = QR`. `nb` sets the block size
-and it must be between 1 and `n`, the second dimension of `A`.
-
-Returns `A`, modified in-place, and `T`, which contains upper
-triangular block reflectors which parameterize the elementary reflectors of
-the factorization.
-"""
-geqrt!(A::AbstractMatrix{<:BlasFloat}, nb::Integer) = geqrt!(A, similar(A, nb, minimum(size(A))))
-
-"""
-    geqrt3!(A) -> (A, T)
-
-Recursively computes the blocked `QR` factorization of `A`, `A = QR`.
-
-Returns `A`, modified in-place, and `T`, which contains upper triangular block
-reflectors which parameterize the elementary reflectors of the factorization.
-"""
-geqrt3!(A::AbstractMatrix{<:BlasFloat}) = (n = size(A, 2); geqrt3!(A, similar(A, n, n)))
-
-"""
-    geqrf!(A) -> (A, tau)
-
-Compute the `QR` factorization of `A`, `A = QR`.
-
-Returns `A`, modified in-place, and `tau`, which contains scalars
-which parameterize the elementary reflectors of the factorization.
-"""
-geqrf!(A::AbstractMatrix{<:BlasFloat}) = ((m,n) = size(A); geqrf!(A, similar(A, min(m, n))))
-
-"""
-    gerqf!(A) -> (A, tau)
-
-Compute the `RQ` factorization of `A`, `A = RQ`.
-
-Returns `A`, modified in-place, and `tau`, which contains scalars
-which parameterize the elementary reflectors of the factorization.
-"""
-gerqf!(A::AbstractMatrix{<:BlasFloat}) = ((m,n) = size(A); gerqf!(A, similar(A, min(m, n))))
-
-## Tools to compute and apply elementary reflectors
-for (larfg, elty) in
-    ((:dlarfg_, Float64),
-     (:slarfg_, Float32),
-     (:zlarfg_, ComplexF64),
-     (:clarfg_, ComplexF32))
-    @eval begin
-        #        .. Scalar Arguments ..
-        #        INTEGER            incx, n
-        #        DOUBLE PRECISION   alpha, tau
-        #        ..
-        #        .. Array Arguments ..
-        #        DOUBLE PRECISION   x( * )
-        function larfg!(x::AbstractVector{$elty})
-            N    = BlasInt(length(x))
-            α    = Ref{$elty}(x[1])
-            incx = BlasInt(1)
-            τ    = Ref{$elty}(0)
-            ccall((@blasfunc($larfg), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}),
-                N, α, pointer(x, 2), incx, τ)
-            @inbounds x[1] = one($elty)
-            return τ[]
-        end
-    end
-end
-
-for (larf, elty) in
-    ((:dlarf_, Float64),
-     (:slarf_, Float32),
-     (:zlarf_, ComplexF64),
-     (:clarf_, ComplexF32))
-    @eval begin
-        #        .. Scalar Arguments ..
-        #        CHARACTER          side
-        #        INTEGER            incv, ldc, m, n
-        #        DOUBLE PRECISION   tau
-        #        ..
-        #        .. Array Arguments ..
-        #        DOUBLE PRECISION   c( ldc, * ), v( * ), work( * )
-        function larf!(side::AbstractChar, v::AbstractVector{$elty},
-                       τ::$elty, C::AbstractMatrix{$elty}, work::AbstractVector{$elty})
-            m, n = size(C)
-            chkside(side)
-            ldc = max(1, stride(C, 2))
-            l = side == 'L' ? n : m
-            incv  = BlasInt(1)
-            ccall((@blasfunc($larf), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Clong),
-                side, m, n, v, incv,
-                τ, C, ldc, work, 1)
-            return C
-        end
-
-        function larf!(side::AbstractChar, v::AbstractVector{$elty},
-                       τ::$elty, C::AbstractMatrix{$elty})
-            m, n = size(C)
-            chkside(side)
-            lwork = side == 'L' ? n : m
-            return larf!(side, v, τ, C, Vector{$elty}(undef,lwork))
-        end
-    end
-end
-
-## Complete orthogonaliztion tools
-for (tzrzf, ormrz, elty) in
-    ((:dtzrzf_,:dormrz_,:Float64),
-     (:stzrzf_,:sormrz_,:Float32),
-     (:ztzrzf_,:zunmrz_,:ComplexF64),
-     (:ctzrzf_,:cunmrz_,:ComplexF32))
-    @eval begin
-         #       SUBROUTINE ZTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-         #
-         #       .. Scalar Arguments ..
-         #       INTEGER            INFO, LDA, LWORK, M, N
-         #       ..
-         #       .. Array Arguments ..
-         #       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
-        function tzrzf!(A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            m, n = size(A)
-            if n < m
-                throw(DimensionMismatch("input matrix A has dimensions ($m,$n), but cannot have fewer columns than rows"))
-            end
-            lda = max(1, stride(A,2))
-            tau = similar(A, $elty, m)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($tzrzf), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                    m, n, A, lda,
-                    tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        #       SUBROUTINE ZUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
-        #                          WORK, LWORK, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          SIDE, TRANS
-        #       INTEGER            INFO, K, L, LDA, LDC, LWORK, M, N
-        #       ..
-        #       .. Array Arguments ..
-        #       COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormrz!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractMatrix{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, tau, C)
-            m, n = size(C)
-            k = length(tau)
-            l = size(A, 2) - size(A, 1)
-            lda = max(1, stride(A,2))
-            ldc = max(1, stride(C,2))
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormrz), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                     Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                    side, trans, m, n,
-                    k, l, A, lda,
-                    tau, C, ldc, work,
-                    lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-    end
-end
-
-"""
-    ormrz!(side, trans, A, tau, C)
-
-Multiplies the matrix `C` by `Q` from the transformation supplied by
-`tzrzf!`. Depending on `side` or `trans` the multiplication can be
-left-sided (`side = L, Q*C`) or right-sided (`side = R, C*Q`) and `Q`
-can be unmodified (`trans = N`), transposed (`trans = T`), or conjugate
-transposed (`trans = C`). Returns matrix `C` which is modified in-place
-with the result of the multiplication.
-"""
-ormrz!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractMatrix)
-
-"""
-    tzrzf!(A) -> (A, tau)
-
-Transforms the upper trapezoidal matrix `A` to upper triangular form in-place.
-Returns `A` and `tau`, the scalar parameters for the elementary reflectors
-of the transformation.
-"""
-tzrzf!(A::AbstractMatrix)
-
-## (GE) general matrices, solvers with factorization, solver and inverse
-for (gels, gesv, getrs, getri, elty) in
-    ((:dgels_,:dgesv_,:dgetrs_,:dgetri_,:Float64),
-     (:sgels_,:sgesv_,:sgetrs_,:sgetri_,:Float32),
-     (:zgels_,:zgesv_,:zgetrs_,:zgetri_,:ComplexF64),
-     (:cgels_,:cgesv_,:cgetrs_,:cgetri_,:ComplexF32))
-    @eval begin
-        #      SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK,INFO)
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          TRANS
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS
-        function gels!(trans::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chktrans(trans)
-            chkstride1(A, B)
-            btrn  = trans == 'T'
-            m, n  = size(A)
-            if size(B,1) != (btrn ? n : m)
-                throw(DimensionMismatch("matrix A has dimensions ($m,$n), transposed: $btrn, but leading dimension of B is $(size(B,1))"))
-            end
-            info  = Ref{BlasInt}()
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gels), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      (btrn ? 'T' : 'N'), m, n, size(B,2), A, max(1,stride(A,2)),
-                      B, max(1,stride(B,2)), work, lwork, info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            k   = min(m, n)
-            F   = m < n ? tril(A[1:k, 1:k]) : triu(A[1:k, 1:k])
-            ssr = Vector{$elty}(undef, size(B, 2))
-            for i = 1:size(B,2)
-                x = zero($elty)
-                for j = k+1:size(B,1)
-                    x += abs2(B[j,i])
-                end
-                ssr[i] = x
-            end
-            F, subsetrows(B, B, k), ssr
-        end
-
-        # SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function gesv!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n = checksquare(A)
-            if size(B,1) != n
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv = similar(A, BlasInt, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gesv), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
-            chklapackerror(info[])
-            B, A, ipiv
-        end
-
-        #     SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          TRANS
-        #      INTEGER            INFO, LDA, LDB, N, NRHS
-        #     .. Array Arguments ..
-        #      INTEGER            IPIV( * )
-        #      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function getrs!(trans::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chktrans(trans)
-            chkstride1(A, B, ipiv)
-            n = checksquare(A)
-            if n != size(B, 1)
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
-            end
-            if n != length(ipiv)
-                throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
-            end
-            nrhs = size(B, 2)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($getrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  trans, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-
-        #     SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
-        #*     .. Scalar Arguments ..
-        #      INTEGER            INFO, LDA, LWORK, N
-        #*     .. Array Arguments ..
-        #      INTEGER            IPIV( * )
-        #      DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function getri!(A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            require_one_based_indexing(A, ipiv)
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            if n != length(ipiv)
-                throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs $n"))
-            end
-            lda = max(1,stride(A, 2))
-            lwork = BlasInt(-1)
-            work  = Vector{$elty}(undef, 1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($getri), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      n, A, lda, ipiv, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A
-        end
-    end
-end
-
-"""
-    gels!(trans, A, B) -> (F, B, ssr)
-
-Solves the linear equation `A * X = B`, `transpose(A) * X = B`, or `adjoint(A) * X = B` using
-a QR or LQ factorization. Modifies the matrix/vector `B` in place with the
-solution. `A` is overwritten with its `QR` or `LQ` factorization. `trans`
-may be one of `N` (no modification), `T` (transpose), or `C` (conjugate
-transpose). `gels!` searches for the minimum norm/least squares solution.
-`A` may be under or over determined. The solution is returned in `B`.
-"""
-gels!(trans::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    gesv!(A, B) -> (B, A, ipiv)
-
-Solves the linear equation `A * X = B` where `A` is a square matrix using
-the `LU` factorization of `A`. `A` is overwritten with its `LU`
-factorization and `B` is overwritten with the solution `X`. `ipiv` contains the
-pivoting information for the `LU` factorization of `A`.
-"""
-gesv!(A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    getrs!(trans, A, ipiv, B)
-
-Solves the linear equation `A * X = B`, `transpose(A) * X = B`, or `adjoint(A) * X = B` for
-square `A`. Modifies the matrix/vector `B` in place with the solution. `A`
-is the `LU` factorization from `getrf!`, with `ipiv` the pivoting
-information. `trans` may be one of `N` (no modification), `T` (transpose),
-or `C` (conjugate transpose).
-"""
-getrs!(trans::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-"""
-    getri!(A, ipiv)
-
-Computes the inverse of `A`, using its `LU` factorization found by
-`getrf!`. `ipiv` is the pivot information output and `A`
-contains the `LU` factorization of `getrf!`. `A` is overwritten with
-its inverse.
-"""
-getri!(A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
-
-for (gesvx, elty) in
-    ((:dgesvx_,:Float64),
-     (:sgesvx_,:Float32))
-    @eval begin
-        #     SUBROUTINE DGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV,
-        #                        EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR,
-        #                        WORK, IWORK, INFO )
-        #
-        #     .. Scalar Arguments ..
-        #     CHARACTER          EQUED, FACT, TRANS
-        #     INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
-        #     DOUBLE PRECISION   RCOND
-        #     ..
-        #     .. Array Arguments ..
-        #     INTEGER            IPIV( * ), IWORK( * )
-        #     DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
-        #    $                   BERR( * ), C( * ), FERR( * ), R( * ),
-        #    $                   WORK( * ), X( LDX, *
-        #
-        function gesvx!(fact::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        AF::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}, equed::AbstractChar,
-                        R::AbstractVector{$elty}, C::AbstractVector{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, AF, ipiv, R, C, B)
-            chktrans(trans)
-            chkstride1(ipiv, R, C, B)
-            n    = checksquare(A)
-            lda  = stride(A,2)
-            n    = checksquare(AF)
-            ldaf = stride(AF,2)
-            nrhs = size(B,2)
-            ldb  = stride(B,2)
-            rcond = Ref{$elty}()
-            ferr  = similar(A, $elty, nrhs)
-            berr  = similar(A, $elty, nrhs)
-            work  = Vector{$elty}(undef, 4n)
-            iwork = Vector{BlasInt}(undef, n)
-            info  = Ref{BlasInt}()
-            X = similar(A, $elty, n, nrhs)
-            ccall((@blasfunc($gesvx), libblastrampoline), Cvoid,
-              (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-               Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-               Ref{UInt8}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-               Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-               Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong, Clong, Clong),
-              fact, trans, n, nrhs, A, lda, AF, ldaf, ipiv, equed, R, C, B,
-              ldb, X, n, rcond, ferr, berr, work, iwork, info, 1, 1, 1)
-            chklapackerror(info[])
-            if info[] == n + 1
-                @warn "Matrix is singular to working precision"
-            else
-                chknonsingular(info[])
-            end
-            #WORK(1) contains the reciprocal pivot growth factor norm(A)/norm(U)
-            X, equed, R, C, B, rcond[], ferr, berr, work[1]
-        end
-
-        function gesvx!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            n = size(A,1)
-            X, equed, R, C, B, rcond, ferr, berr, rpgf =
-                gesvx!('N', 'N', A,
-                       similar(A, $elty, n, n),
-                       similar(A, BlasInt, n),
-                       'N',
-                       similar(A, $elty, n),
-                       similar(A, $elty, n),
-                       B)
-            X, rcond, ferr, berr, rpgf
-        end
-    end
-end
-for (gesvx, elty, relty) in
-    ((:zgesvx_,:ComplexF64,:Float64),
-     (:cgesvx_,:ComplexF32 ,:Float32))
-    @eval begin
-        #     SUBROUTINE ZGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV,
-        #                        EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR,
-        #                        WORK, RWORK, INFO )
-        #
-        #     .. Scalar Arguments ..
-        #     CHARACTER          EQUED, FACT, TRANS
-        #     INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
-        #     DOUBLE PRECISION   RCOND
-        #     ..
-        #     .. Array Arguments ..
-        #     INTEGER            IPIV( * )
-        #     DOUBLE PRECISION   BERR( * ), C( * ), FERR( * ), R( * ),
-        #    $                   RWORK( * )
-        #     COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
-        #    $                   WORK( * ), X( LDX, * )
-        function gesvx!(fact::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        AF::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}, equed::AbstractChar,
-                        R::AbstractVector{$relty}, C::AbstractVector{$relty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, AF, ipiv, R, C, B)
-            chktrans(trans)
-            chkstride1(A, AF, ipiv, R, C, B)
-            n   = checksquare(A)
-            lda = stride(A,2)
-            n   = checksquare(AF)
-            ldaf = stride(AF,2)
-            nrhs = size(B,2)
-            ldb = stride(B,2)
-            rcond = Ref{$relty}()
-            ferr  = similar(A, $relty, nrhs)
-            berr  = similar(A, $relty, nrhs)
-            work  = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, 2n)
-            info  = Ref{BlasInt}()
-            X = similar(A, $elty, n, nrhs)
-            ccall((@blasfunc($gesvx), libblastrampoline), Cvoid,
-              (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-               Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-               Ref{UInt8}, Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-               Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{$relty}, Ptr{$relty},
-               Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong, Clong),
-              fact, trans, n, nrhs, A, lda, AF, ldaf, ipiv, equed, R, C, B,
-              ldb, X, n, rcond, ferr, berr, work, rwork, info, 1, 1, 1)
-            chklapackerror(info[])
-            if info[] == n + 1
-                @warn "Matrix is singular to working precision"
-            else
-                chknonsingular(info[])
-            end
-            #RWORK(1) contains the reciprocal pivot growth factor norm(A)/norm(U)
-            X, equed, R, C, B, rcond[], ferr, berr, rwork[1]
-        end
-
-        #Wrapper for the no-equilibration, no-transpose calculation
-        function gesvx!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            n = size(A,1)
-            X, equed, R, C, B, rcond, ferr, berr, rpgf =
-                gesvx!('N', 'N', A,
-                       similar(A, $elty, n, n),
-                       similar(A, BlasInt, n),
-                       'N',
-                       similar(A, $relty, n),
-                       similar(A, $relty, n),
-                       B)
-            X, rcond, ferr, berr, rpgf
-        end
-    end
-end
-
-"""
-    gesvx!(fact, trans, A, AF, ipiv, equed, R, C, B) -> (X, equed, R, C, B, rcond, ferr, berr, work)
-
-Solves the linear equation `A * X = B` (`trans = N`), `transpose(A) * X = B`
-(`trans = T`), or `adjoint(A) * X = B` (`trans = C`) using the `LU` factorization
-of `A`. `fact` may be `E`, in which case `A` will be equilibrated and copied
-to `AF`; `F`, in which case `AF` and `ipiv` from a previous `LU` factorization
-are inputs; or `N`, in which case `A` will be copied to `AF` and then
-factored. If `fact = F`, `equed` may be `N`, meaning `A` has not been
-equilibrated; `R`, meaning `A` was multiplied by `Diagonal(R)` from the left;
-`C`, meaning `A` was multiplied by `Diagonal(C)` from the right; or `B`, meaning
-`A` was multiplied by `Diagonal(R)` from the left and `Diagonal(C)` from the right.
-If `fact = F` and `equed = R` or `B` the elements of `R` must all be positive.
-If `fact = F` and `equed = C` or `B` the elements of `C` must all be positive.
-
-Returns the solution `X`; `equed`, which is an output if `fact` is not `N`,
-and describes the equilibration that was performed; `R`, the row equilibration
-diagonal; `C`, the column equilibration diagonal; `B`, which may be overwritten
-with its equilibrated form `Diagonal(R)*B` (if `trans = N` and `equed = R,B`) or
-`Diagonal(C)*B` (if `trans = T,C` and `equed = C,B`); `rcond`, the reciprocal
-condition number of `A` after equilbrating; `ferr`, the forward error bound for
-each solution vector in `X`; `berr`, the forward error bound for each solution
-vector in `X`; and `work`, the reciprocal pivot growth factor.
-"""
-gesvx!(fact::AbstractChar, trans::AbstractChar, A::AbstractMatrix, AF::AbstractMatrix,
-    ipiv::AbstractVector{BlasInt}, equed::AbstractChar, R::AbstractVector, C::AbstractVector, B::AbstractVecOrMat)
-
-"""
-    gesvx!(A, B)
-
-The no-equilibration, no-transpose simplification of `gesvx!`.
-"""
-gesvx!(A::AbstractMatrix, B::AbstractVecOrMat)
-
-for (gelsd, gelsy, elty) in
-    ((:dgelsd_,:dgelsy_,:Float64),
-     (:sgelsd_,:sgelsy_,:Float32))
-    @eval begin
-        # SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
-        #      $                   WORK, LWORK, IWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-        #       DOUBLE PRECISION   RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
-        function gelsd!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, rcond::Real=-one($elty))
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n  = size(A)
-            if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
-            end
-            newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
-            s     = similar(A, $elty, min(m, n))
-            rnk   = Ref{BlasInt}()
-            info  = Ref{BlasInt}()
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            for i = 1:2  # first call returns lwork as work[1] and iwork length as iwork[1]
-                ccall((@blasfunc($gelsd), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
-                      m, n, size(B,2),
-                      A, max(1,stride(A,2)), newB, max(1,stride(B,2),n),
-                      s, $elty(rcond), rnk, work,
-                      lwork, iwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    resize!(iwork, iwork[1])
-                end
-            end
-            subsetrows(B, newB, n), rnk[]
-        end
-
-        #       SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
-        #      $                   WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-        #       DOUBLE PRECISION   RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            JPVT( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
-        function gelsy!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, rcond::Real=eps($elty))
-            require_one_based_indexing(A, B)
-            chkstride1(A)
-            m = size(A, 1)
-            n = size(A, 2)
-            nrhs = size(B, 2)
-            if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
-            end
-            newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
-            lda = max(1, stride(A,2))
-            ldb = max(1, stride(newB,2))
-            jpvt = zeros(BlasInt, n)
-            rnk = Ref{BlasInt}()
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gelsy), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                     Ref{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}),
-                    m, n, nrhs, A,
-                    lda, newB, ldb, jpvt,
-                    $elty(rcond), rnk, work, lwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            subsetrows(B, newB, n), rnk[]
-        end
-    end
-end
-
-for (gelsd, gelsy, elty, relty) in
-    ((:zgelsd_,:zgelsy_,:ComplexF64,:Float64),
-     (:cgelsd_,:cgelsy_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE ZGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
-        #      $                   WORK, LWORK, RWORK, IWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-        #       DOUBLE PRECISION   RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   RWORK( * ), S( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function gelsd!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, rcond::Real=-one($relty))
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n  = size(A)
-            if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
-            end
-            newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
-            s     = similar(A, $relty, min(m, n))
-            rnk   = Ref{BlasInt}()
-            info  = Ref{BlasInt}()
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 1)
-            iwork = Vector{BlasInt}(undef, 1)
-            for i = 1:2  # first call returns lwork as work[1], rwork length as rwork[1] and iwork length as iwork[1]
-                ccall((@blasfunc($gelsd), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                       Ref{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$relty}, Ref{BlasInt}, Ref{BlasInt}),
-                      m, n, size(B,2), A,
-                      max(1,stride(A,2)), newB, max(1,stride(B,2),n), s,
-                      $relty(rcond), rnk, work, lwork,
-                      rwork, iwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    resize!(rwork, BlasInt(rwork[1]))
-                    resize!(iwork, iwork[1])
-                end
-            end
-            subsetrows(B, newB, n), rnk[]
-        end
-
-        #       SUBROUTINE ZGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
-        #      $                   WORK, LWORK, RWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-        #       DOUBLE PRECISION   RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            JPVT( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function gelsy!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, rcond::Real=eps($relty))
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n = size(A)
-            nrhs = size(B, 2)
-            if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
-            end
-            newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
-            lda = max(1, m)
-            ldb = max(1, m, n)
-            jpvt = zeros(BlasInt, n)
-            rnk = Ref{BlasInt}(1)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 2n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gelsy), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                     Ref{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$relty}, Ptr{BlasInt}),
-                    m, n, nrhs, A,
-                    lda, newB, ldb, jpvt,
-                    $relty(rcond), rnk, work, lwork,
-                    rwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            subsetrows(B, newB, n), rnk[]
-        end
-    end
-end
-
-"""
-    gelsd!(A, B, rcond) -> (B, rnk)
-
-Computes the least norm solution of `A * X = B` by finding the `SVD`
-factorization of `A`, then dividing-and-conquering the problem. `B`
-is overwritten with the solution `X`. Singular values below `rcond`
-will be treated as zero. Returns the solution in `B` and the effective rank
-of `A` in `rnk`.
-"""
-gelsd!(A::AbstractMatrix, B::AbstractVecOrMat, rcond::Real)
-
-"""
-    gelsy!(A, B, rcond) -> (B, rnk)
-
-Computes the least norm solution of `A * X = B` by finding the full `QR`
-factorization of `A`, then dividing-and-conquering the problem. `B`
-is overwritten with the solution `X`. Singular values below `rcond`
-will be treated as zero. Returns the solution in `B` and the effective rank
-of `A` in `rnk`.
-"""
-gelsy!(A::AbstractMatrix, B::AbstractVecOrMat, rcond::Real)
-
-for (gglse, elty) in ((:dgglse_, :Float64),
-                      (:sgglse_, :Float32),
-                      (:zgglse_, :ComplexF64),
-                      (:cgglse_, :ComplexF32))
-    @eval begin
-        # SUBROUTINE DGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK,
-        #      $                   INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, P
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( * ), D( * ),
-        #      $                   WORK( * ), X( * )
-        function gglse!(A::AbstractMatrix{$elty}, c::AbstractVector{$elty},
-                        B::AbstractMatrix{$elty}, d::AbstractVector{$elty})
-            require_one_based_indexing(A, c, B, d)
-            chkstride1(A, c, B, d)
-            m, n = size(A)
-            p = size(B, 1)
-            if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)), needs $n"))
-            end
-            if length(c) != m
-                throw(DimensionMismatch("c has length $(length(c)), needs $m"))
-            end
-            if length(d) != p
-                throw(DimensionMismatch("d has length $(length(d)), needs $p"))
-            end
-            X = zeros($elty, n)
-            info  = Ref{BlasInt}()
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gglse), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}),
-                      m, n, p, A, max(1,stride(A,2)), B, max(1,stride(B,2)), c, d, X,
-                      work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            X, dot(view(c, n - p + 1:m), view(c, n - p + 1:m))
-        end
-    end
-end
-
-"""
-    gglse!(A, c, B, d) -> (X,res)
-
-Solves the equation `A * x = c` where `x` is subject to the equality
-constraint `B * x = d`. Uses the formula `||c - A*x||^2 = 0` to solve.
-Returns `X` and the residual sum-of-squares.
-"""
-gglse!(A::AbstractMatrix, c::AbstractVector, B::AbstractMatrix, d::AbstractVector)
-
-# (GE) general matrices eigenvalue-eigenvector and singular value decompositions
-for (geev, gesvd, gesdd, ggsvd, elty, relty) in
-    ((:dgeev_,:dgesvd_,:dgesdd_,:dggsvd_,:Float64,:Float64),
-     (:sgeev_,:sgesvd_,:sgesdd_,:sggsvd_,:Float32,:Float32),
-     (:zgeev_,:zgesvd_,:zgesdd_,:zggsvd_,:ComplexF64,:Float64),
-     (:cgeev_,:cgesvd_,:cgesdd_,:cggsvd_,:ComplexF32,:Float32))
-    @eval begin
-        #      SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
-        #      $                  LDVR, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDVL, LDVR, LWORK, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   WI( * ), WORK( * ), WR( * )
-        function geev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            lvecs = jobvl == 'V'
-            rvecs = jobvr == 'V'
-            VL    = similar(A, $elty, (n, lvecs ? n : 0))
-            VR    = similar(A, $elty, (n, rvecs ? n : 0))
-            cmplx = eltype(A) <: Complex
-            if cmplx
-                W     = similar(A, $elty, n)
-                rwork = similar(A, $relty, 2n)
-            else
-                WR    = similar(A, $elty, n)
-                WI    = similar(A, $elty, n)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                if cmplx
-                    ccall((@blasfunc($geev), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
-                          jobvl, jobvr, n, A, max(1,stride(A,2)), W, VL, n, VR, n,
-                          work, lwork, rwork, info, 1, 1)
-                else
-                    ccall((@blasfunc($geev), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                          jobvl, jobvr, n, A, max(1,stride(A,2)), WR, WI, VL, n,
-                          VR, n, work, lwork, info, 1, 1)
-                end
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            cmplx ? (W, VL, VR) : (WR, WI, VL, VR)
-        end
-
-        #    SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK,
-        #                   LWORK, IWORK, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          JOBZ
-        #      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
-        #*     ..
-        #*     .. Array Arguments ..
-        #      INTEGER            IWORK( * )
-        #      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
-        #                        VT( LDVT, * ), WORK( * )
-        function gesdd!(job::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            m, n   = size(A)
-            minmn  = min(m, n)
-            if job == 'A'
-                U  = similar(A, $elty, (m, m))
-                VT = similar(A, $elty, (n, n))
-            elseif job == 'S'
-                U  = similar(A, $elty, (m, minmn))
-                VT = similar(A, $elty, (minmn, n))
-            elseif job == 'O'
-                U  = similar(A, $elty, (m, m >= n ? 0 : m))
-                VT = similar(A, $elty, (n, m >= n ? n : 0))
-            else
-                U  = similar(A, $elty, (m, 0))
-                VT = similar(A, $elty, (n, 0))
-            end
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            S      = similar(A, $relty, minmn)
-            cmplx  = eltype(A)<:Complex
-            if cmplx
-                rwork = Vector{$relty}(undef, job == 'N' ? 7*minmn : minmn*max(5*minmn+7, 2*max(m,n)+2*minmn+1))
-            end
-            iwork  = Vector{BlasInt}(undef, 8*minmn)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                if cmplx
-                    ccall((@blasfunc($gesdd), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$relty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-                          job, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, rwork, iwork, info, 1)
-                else
-                    ccall((@blasfunc($gesdd), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-                          job, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, iwork, info, 1)
-                end
-                chklapackerror(info[])
-                if i == 1
-                    # Work around issue with truncated Float32 representation of lwork in
-                    # sgesdd by using nextfloat. See
-                    # http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=13&t=4587&p=11036&hilit=sgesdd#p11036
-                    # and
-                    # https://github.com/scipy/scipy/issues/5401
-                    lwork = round(BlasInt, nextfloat(real(work[1])))
-                    resize!(work, lwork)
-                end
-            end
-            if job == 'O'
-                if m >= n
-                    return (A, S, VT)
-                else
-                    # ()__
-                    # ||::Z__
-                    # ||::|:::Z____
-                    # ||::|:::|====|
-                    # ||==|===|====|
-                    # ||""|===|====|
-                    # ||  `"""|====|
-                    # ||      `""""`
-                    return (U, S, A)
-                end
-            end
-            return (U, S, VT)
-        end
-
-        # SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBU, JOBVT
-        #       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
-        #      $                   VT( LDVT, * ), WORK( * )
-        function gesvd!(jobu::AbstractChar, jobvt::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            m, n   = size(A)
-            minmn  = min(m, n)
-            S      = similar(A, $relty, minmn)
-            U      = similar(A, $elty, jobu  == 'A' ? (m, m) : (jobu  == 'S' ? (m, minmn) : (m, 0)))
-            VT     = similar(A, $elty, jobvt == 'A' ? (n, n) : (jobvt == 'S' ? (minmn, n) : (n, 0)))
-            work   = Vector{$elty}(undef, 1)
-            cmplx  = eltype(A) <: Complex
-            if cmplx
-                rwork = Vector{$relty}(undef, 5minmn)
-            end
-            lwork  = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i in 1:2  # first call returns lwork as work[1]
-                if cmplx
-                    ccall((@blasfunc($gesvd), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
-                          jobu, jobvt, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, rwork, info, 1, 1)
-                else
-                    ccall((@blasfunc($gesvd), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                          jobu, jobvt, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, info, 1, 1)
-                end
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            if jobu == 'O'
-                return (A, S, VT)
-            elseif jobvt == 'O'
-                    # =============|===========|()
-                                   # # # #::::::
-                                   # # # #::::::
-                                   # # # #::::::
-                                   # # # #::::::
-                                   # # # # # # #
-                                   # # # # # # #
-                                   # # # # # # #
-                return (U, S, A)   # # # # # # #
-            else                   # # # # # # #
-                return (U, S, VT)  # # # # # # #
-
-            end
-        end
-
-        #       SUBROUTINE ZGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B,
-        #      $                   LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK,
-        #      $                   RWORK, IWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBQ, JOBU, JOBV
-        #       INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   ALPHA( * ), BETA( * ), RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
-        #      $                   U( LDU, * ), V( LDV, * ), WORK( * )
-        function ggsvd!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n = size(A)
-            if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs $n"))
-            end
-            p = size(B, 1)
-            k = Vector{BlasInt}(undef, 1)
-            l = Vector{BlasInt}(undef, 1)
-            lda = max(1,stride(A, 2))
-            ldb = max(1,stride(B, 2))
-            alpha = similar(A, $relty, n)
-            beta = similar(A, $relty, n)
-            ldu = max(1, m)
-            U = jobu == 'U' ? similar(A, $elty, ldu, m) : similar(A, $elty, 0)
-            ldv = max(1, p)
-            V = jobv == 'V' ? similar(A, $elty, ldv, p) : similar(A, $elty, 0)
-            ldq = max(1, n)
-            Q = jobq == 'Q' ? similar(A, $elty, ldq, n) : similar(A, $elty, 0)
-            work = Vector{$elty}(undef, max(3n, m, p) + n)
-            cmplx = eltype(A) <: Complex
-            if cmplx
-                rwork = Vector{$relty}(undef, 2n)
-            end
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            if cmplx
-                ccall((@blasfunc($ggsvd), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                    Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Clong, Clong, Clong),
-                    jobu, jobv, jobq, m,
-                    n, p, k, l,
-                    A, lda, B, ldb,
-                    alpha, beta, U, ldu,
-                    V, ldv, Q, ldq,
-                    work, rwork, iwork, info,
-                    1, 1, 1)
-            else
-                ccall((@blasfunc($ggsvd), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                    Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Clong, Clong, Clong),
-                    jobu, jobv, jobq, m,
-                    n, p, k, l,
-                    A, lda, B, ldb,
-                    alpha, beta, U, ldu,
-                    V, ldv, Q, ldq,
-                    work, iwork, info,
-                    1, 1, 1)
-            end
-            chklapackerror(info[])
-            if m - k[1] - l[1] >= 0
-                R = triu(A[1:k[1] + l[1],n - k[1] - l[1] + 1:n])
-            else
-                R = triu([A[1:m, n - k[1] - l[1] + 1:n]; B[m - k[1] + 1:l[1], n - k[1] - l[1] + 1:n]])
-            end
-            U, V, Q, alpha, beta, k[1], l[1], R
-        end
-    end
-end
-
-"""
-    geev!(jobvl, jobvr, A) -> (W, VL, VR)
-
-Finds the eigensystem of `A`. If `jobvl = N`, the left eigenvectors of
-`A` aren't computed. If `jobvr = N`, the right eigenvectors of `A`
-aren't computed. If `jobvl = V` or `jobvr = V`, the corresponding
-eigenvectors are computed. Returns the eigenvalues in `W`, the right
-eigenvectors in `VR`, and the left eigenvectors in `VL`.
-"""
-geev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix)
-
-"""
-    gesdd!(job, A) -> (U, S, VT)
-
-Finds the singular value decomposition of `A`, `A = U * S * V'`,
-using a divide and conquer approach. If `job = A`, all the columns of `U` and
-the rows of `V'` are computed. If `job = N`, no columns of `U` or rows of `V'`
-are computed. If `job = O`, `A` is overwritten with the columns of (thin) `U`
-and the rows of (thin) `V'`. If `job = S`, the columns of (thin) `U` and the
-rows of (thin) `V'` are computed and returned separately.
-"""
-gesdd!(job::AbstractChar, A::AbstractMatrix)
-
-"""
-    gesvd!(jobu, jobvt, A) -> (U, S, VT)
-
-Finds the singular value decomposition of `A`, `A = U * S * V'`.
-If `jobu = A`, all the columns of `U` are computed. If `jobvt = A` all the rows
-of `V'` are computed. If `jobu = N`, no columns of `U` are computed. If
-`jobvt = N` no rows of `V'` are computed. If `jobu = O`, `A` is overwritten with
-the columns of (thin) `U`. If `jobvt = O`, `A` is overwritten with the rows
-of (thin) `V'`. If `jobu = S`, the columns of (thin) `U` are computed
-and returned separately. If `jobvt = S` the rows of (thin) `V'` are
-computed and returned separately. `jobu` and `jobvt` can't both be `O`.
-
-Returns `U`, `S`, and `Vt`, where `S` are the singular values of `A`.
-"""
-gesvd!(jobu::AbstractChar, jobvt::AbstractChar, A::AbstractMatrix)
-
-"""
-    ggsvd!(jobu, jobv, jobq, A, B) -> (U, V, Q, alpha, beta, k, l, R)
-
-Finds the generalized singular value decomposition of `A` and `B`, `U'*A*Q = D1*R`
-and `V'*B*Q = D2*R`. `D1` has `alpha` on its diagonal and `D2` has `beta` on its
-diagonal. If `jobu = U`, the orthogonal/unitary matrix `U` is computed. If
-`jobv = V` the orthogonal/unitary matrix `V` is computed. If `jobq = Q`,
-the orthogonal/unitary matrix `Q` is computed. If `jobu`, `jobv` or `jobq` is
-`N`, that matrix is not computed. This function is only available in LAPACK
-versions prior to 3.6.0.
-"""
-ggsvd!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-
-for (f, elty) in ((:dggsvd3_, :Float64),
-                  (:sggsvd3_, :Float32))
-    @eval begin
-        function ggsvd3!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n = size(A)
-            if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs $n"))
-            end
-            p = size(B, 1)
-            k = Ref{BlasInt}()
-            l = Ref{BlasInt}()
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldu = max(1, m)
-            U = jobu == 'U' ? similar(A, $elty, ldu, m) : similar(A, $elty, 0)
-            ldv = max(1, p)
-            V = jobv == 'V' ? similar(A, $elty, ldv, p) : similar(A, $elty, 0)
-            ldq = max(1, n)
-            Q = jobq == 'Q' ? similar(A, $elty, ldq, n) : similar(A, $elty, 0)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($f), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                    Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                    Clong, Clong, Clong),
-                    jobu, jobv, jobq, m,
-                    n, p, k, l,
-                    A, lda, B, ldb,
-                    alpha, beta, U, ldu,
-                    V, ldv, Q, ldq,
-                    work, lwork, iwork, info,
-                    1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            if m - k[] - l[] >= 0
-                R = triu(A[1:k[] + l[],n - k[] - l[] + 1:n])
-            else
-                R = triu([A[1:m, n - k[] - l[] + 1:n]; B[m - k[] + 1:l[], n - k[] - l[] + 1:n]])
-            end
-            return U, V, Q, alpha, beta, k[], l[], R
-        end
-    end
-end
-
-for (f, elty, relty) in ((:zggsvd3_, :ComplexF64, :Float64),
-                         (:cggsvd3_, :ComplexF32, :Float32))
-    @eval begin
-        function ggsvd3!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n = size(A)
-            if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs $n"))
-            end
-            p = size(B, 1)
-            k = Vector{BlasInt}(undef, 1)
-            l = Vector{BlasInt}(undef, 1)
-            lda = max(1,stride(A, 2))
-            ldb = max(1,stride(B, 2))
-            alpha = similar(A, $relty, n)
-            beta = similar(A, $relty, n)
-            ldu = max(1, m)
-            U = jobu == 'U' ? similar(A, $elty, ldu, m) : similar(A, $elty, 0)
-            ldv = max(1, p)
-            V = jobv == 'V' ? similar(A, $elty, ldv, p) : similar(A, $elty, 0)
-            ldq = max(1, n)
-            Q = jobq == 'Q' ? similar(A, $elty, ldq, n) : similar(A, $elty, 0)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 2n)
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($f), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                    Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt},
-                    Ptr{BlasInt}, Clong, Clong, Clong),
-                    jobu, jobv, jobq, m,
-                    n, p, k, l,
-                    A, lda, B, ldb,
-                    alpha, beta, U, ldu,
-                    V, ldv, Q, ldq,
-                    work, lwork, rwork, iwork,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            if m - k[1] - l[1] >= 0
-                R = triu(A[1:k[1] + l[1],n - k[1] - l[1] + 1:n])
-            else
-                R = triu([A[1:m, n - k[1] - l[1] + 1:n]; B[m - k[1] + 1:l[1], n - k[1] - l[1] + 1:n]])
-            end
-            return U, V, Q, alpha, beta, k[1], l[1], R
-        end
-    end
-end
-
-"""
-    ggsvd3!(jobu, jobv, jobq, A, B) -> (U, V, Q, alpha, beta, k, l, R)
-
-Finds the generalized singular value decomposition of `A` and `B`, `U'*A*Q = D1*R`
-and `V'*B*Q = D2*R`. `D1` has `alpha` on its diagonal and `D2` has `beta` on its
-diagonal. If `jobu = U`, the orthogonal/unitary matrix `U` is computed. If
-`jobv = V` the orthogonal/unitary matrix `V` is computed. If `jobq = Q`,
-the orthogonal/unitary matrix `Q` is computed. If `jobu`, `jobv`, or `jobq` is
-`N`, that matrix is not computed. This function requires LAPACK 3.6.0.
-"""
-ggsvd3!
-
-## Expert driver and generalized eigenvalue problem
-for (geevx, ggev, ggev3, elty) in
-    ((:dgeevx_,:dggev_,:dggev3_,:Float64),
-     (:sgeevx_,:sggev_,:sggev3_,:Float32))
-    @eval begin
-        #     SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI,
-        #                          VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM,
-        #                          RCONDE, RCONDV, WORK, LWORK, IWORK, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          BALANC, JOBVL, JOBVR, SENSE
-        #       INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N
-        #       DOUBLE PRECISION   ABNRM
-        #       ..
-        #       .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), RCONDE( * ), RCONDV( * ),
-        #      $                   SCALE( * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   WI( * ), WORK( * ), WR( * )
-        function geevx!(balanc::AbstractChar, jobvl::AbstractChar, jobvr::AbstractChar, sense::AbstractChar, A::AbstractMatrix{$elty})
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            lda = max(1,stride(A,2))
-            wr = similar(A, $elty, n)
-            wi = similar(A, $elty, n)
-            if balanc ∉ ['N', 'P', 'S', 'B']
-                throw(ArgumentError("balanc must be 'N', 'P', 'S', or 'B', but $balanc was passed"))
-            end
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 0
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            VL = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 0
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            VR = similar(A, $elty, ldvr, n)
-            ilo = Ref{BlasInt}()
-            ihi = Ref{BlasInt}()
-            scale = similar(A, $elty, n)
-            abnrm = Ref{$elty}()
-            rconde = similar(A, $elty, n)
-            rcondv = similar(A, $elty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iworksize = 0
-            if sense == 'N' || sense == 'E'
-                iworksize = 0
-            elseif sense == 'V' || sense == 'B'
-                iworksize = 2*n - 2
-            else
-                throw(ArgumentError("sense must be 'N', 'E', 'V' or 'B', but $sense was passed"))
-            end
-            iwork = Vector{BlasInt}(undef, iworksize)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($geevx), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                       Clong, Clong, Clong, Clong),
-                       balanc, jobvl, jobvr, sense,
-                       n, A, lda, wr,
-                       wi, VL, max(1,ldvl), VR,
-                       max(1,ldvr), ilo, ihi, scale,
-                       abnrm, rconde, rcondv, work,
-                       lwork, iwork, info,
-                       1, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            A, wr, wi, VL, VR, ilo[], ihi[], scale, abnrm[], rconde, rcondv
-        end
-
-        #       SUBROUTINE DGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
-        #      $                  BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #      $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
-        #      $                   VR( LDVR, * ), WORK( * )
-        function ggev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n, m = checksquare(A,B)
-            if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 1
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            vl = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 1
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            vr = similar(A, $elty, ldvr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ref{BlasInt}, Clong, Clong),
-                    jobvl, jobvr, n, A,
-                    lda, B, ldb, alphar,
-                    alphai, beta, vl, ldvl,
-                    vr, ldvr, work, lwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            alphar, alphai, beta, vl, vr
-        end
-
-        #       SUBROUTINE DGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
-        #      $                   BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #      $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
-        #      $                   VR( LDVR, * ), WORK( * )
-        function ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n, m = checksquare(A,B)
-            if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 1
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            vl = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 1
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            vr = similar(A, $elty, ldvr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev3), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ref{BlasInt}, Clong, Clong),
-                    jobvl, jobvr, n, A,
-                    lda, B, ldb, alphar,
-                    alphai, beta, vl, ldvl,
-                    vr, ldvr, work, lwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            alphar, alphai, beta, vl, vr
-        end
-    end
-end
-
-for (geevx, ggev, ggev3, elty, relty) in
-    ((:zgeevx_,:zggev_,:zggev3_,:ComplexF64,:Float64),
-     (:cgeevx_,:cggev_,:cggev3_,:ComplexF32,:Float32))
-    @eval begin
-        #     SUBROUTINE ZGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL,
-        #                          LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE,
-        #                          RCONDV, WORK, LWORK, RWORK, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          BALANC, JOBVL, JOBVR, SENSE
-        #       INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N
-        #       DOUBLE PRECISION   ABNRM
-        #       ..
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   RCONDE( * ), RCONDV( * ), RWORK( * ),
-        #      $                   SCALE( * )
-        #       COMPLEX*16         A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   W( * ), WORK( * )
-        function geevx!(balanc::AbstractChar, jobvl::AbstractChar, jobvr::AbstractChar, sense::AbstractChar, A::AbstractMatrix{$elty})
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            lda = max(1,stride(A,2))
-            w = similar(A, $elty, n)
-            if balanc ∉ ['N', 'P', 'S', 'B']
-                throw(ArgumentError("balanc must be 'N', 'P', 'S', or 'B', but $balanc was passed"))
-            end
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 0
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            VL = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 0
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            if sense ∉ ['N','E','V','B']
-                throw(ArgumentError("sense must be 'N', 'E', 'V' or 'B', but $sense was passed"))
-            end
-            VR = similar(A, $elty, ldvr, n)
-            ilo = Ref{BlasInt}()
-            ihi = Ref{BlasInt}()
-            scale = similar(A, $relty, n)
-            abnrm = Ref{$relty}()
-            rconde = similar(A, $relty, n)
-            rcondv = similar(A, $relty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 2n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($geevx), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$relty}, Ptr{$relty},
-                       Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$relty}, Ref{BlasInt}, Clong, Clong, Clong, Clong),
-                       balanc, jobvl, jobvr, sense,
-                       n, A, lda, w,
-                       VL, max(1,ldvl), VR, max(1,ldvr),
-                       ilo, ihi, scale, abnrm,
-                       rconde, rcondv, work, lwork,
-                       rwork, info, 1, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            A, w, VL, VR, ilo[], ihi[], scale, abnrm[], rconde, rcondv
-        end
-
-        # SUBROUTINE ZGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
-        #      $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #      $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   WORK( * )
-        function ggev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 1
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            vl = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 1
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            vr = similar(A, $elty, ldvr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 8n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ref{BlasInt}, Clong, Clong),
-                    jobvl, jobvr, n, A,
-                    lda, B, ldb, alpha,
-                    beta, vl, ldvl, vr,
-                    ldvr, work, lwork, rwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            alpha, beta, vl, vr
-        end
-
-        # SUBROUTINE ZGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
-        #      $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #      $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   WORK( * )
-        function ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 1
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            vl = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 1
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            vr = similar(A, $elty, ldvr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 8n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev3), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ref{BlasInt}, Clong, Clong),
-                    jobvl, jobvr, n, A,
-                    lda, B, ldb, alpha,
-                    beta, vl, ldvl, vr,
-                    ldvr, work, lwork, rwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            alpha, beta, vl, vr
-        end
-    end
-end
-
-"""
-    geevx!(balanc, jobvl, jobvr, sense, A) -> (A, w, VL, VR, ilo, ihi, scale, abnrm, rconde, rcondv)
-
-Finds the eigensystem of `A` with matrix balancing. If `jobvl = N`, the
-left eigenvectors of `A` aren't computed. If `jobvr = N`, the right
-eigenvectors of `A` aren't computed. If `jobvl = V` or `jobvr = V`, the
-corresponding eigenvectors are computed. If `balanc = N`, no balancing is
-performed. If `balanc = P`, `A` is permuted but not scaled. If
-`balanc = S`, `A` is scaled but not permuted. If `balanc = B`, `A` is
-permuted and scaled. If `sense = N`, no reciprocal condition numbers are
-computed. If `sense = E`, reciprocal condition numbers are computed for
-the eigenvalues only. If `sense = V`, reciprocal condition numbers are
-computed for the right eigenvectors only. If `sense = B`, reciprocal
-condition numbers are computed for the right eigenvectors and the
-eigenvectors. If `sense = E,B`, the right and left eigenvectors must be
-computed.
-"""
-geevx!(balanc::AbstractChar, jobvl::AbstractChar, jobvr::AbstractChar, sense::AbstractChar, A::AbstractMatrix)
-
-"""
-    ggev!(jobvl, jobvr, A, B) -> (alpha, beta, vl, vr)
-
-Finds the generalized eigendecomposition of `A` and `B`. If `jobvl = N`,
-the left eigenvectors aren't computed. If `jobvr = N`, the right
-eigenvectors aren't computed. If `jobvl = V` or `jobvr = V`, the
-corresponding eigenvectors are computed.
-"""
-ggev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-"""
-    ggev3!(jobvl, jobvr, A, B) -> (alpha, beta, vl, vr)
-
-Finds the generalized eigendecomposition of `A` and `B` using a blocked
-algorithm. If `jobvl = N`, the left eigenvectors aren't computed. If
-`jobvr = N`, the right eigenvectors aren't computed. If `jobvl = V` or
-`jobvr = V`, the corresponding eigenvectors are computed.  This function
-requires LAPACK 3.6.0.
-"""
-ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-# One step incremental condition estimation of max/min singular values
-for (laic1, elty) in
-    ((:dlaic1_,:Float64),
-     (:slaic1_,:Float32))
-    @eval begin
-        #  SUBROUTINE DLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C )
-        #
-        #  .. Scalar Arguments ..
-        #  INTEGER            J, JOB
-        #  DOUBLE PRECISION   C, GAMMA, S, SEST, SESTPR
-        #  ..
-        #  .. Array Arguments ..
-        #  DOUBLE PRECISION   W( J ), X( J )
-        function laic1!(job::Integer, x::AbstractVector{$elty},
-                        sest::$elty, w::AbstractVector{$elty}, gamma::$elty)
-            require_one_based_indexing(x, w)
-            j = length(x)
-            if j != length(w)
-                throw(DimensionMismatch("vectors must have same length, but length of x is $j and length of w is $(length(w))"))
-            end
-            sestpr = Vector{$elty}(undef, 1)
-            s = Vector{$elty}(undef, 1)
-            c = Vector{$elty}(undef, 1)
-            ccall((@blasfunc($laic1), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{$elty},
-                 Ptr{$elty}, Ref{$elty}, Ptr{$elty}, Ptr{$elty},
-                 Ptr{$elty}),
-                job, j, x, sest,
-                w, gamma, sestpr, s,
-                c)
-            sestpr[1], s[1], c[1]
-        end
-    end
-end
-for (laic1, elty, relty) in
-    ((:zlaic1_,:ComplexF64,:Float64),
-     (:claic1_,:ComplexF32,:Float32))
-    @eval begin
-       #  SUBROUTINE ZLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C )
-       #
-       #  .. Scalar Arguments ..
-       #  INTEGER            J, JOB
-       #  DOUBLE PRECISION   SEST, SESTPR
-       #  COMPLEX*16         C, GAMMA, S
-       #  ..
-       #  .. Array Arguments ..
-       #  COMPLEX*16         W( J ), X( J )
-        function laic1!(job::Integer, x::AbstractVector{$elty},
-                        sest::$relty, w::AbstractVector{$elty}, gamma::$elty)
-            require_one_based_indexing(x, w)
-            j = length(x)
-            if j != length(w)
-                throw(DimensionMismatch("vectors must have same length, but length of x is $j and length of w is $(length(w))"))
-            end
-            sestpr = Vector{$relty}(undef, 1)
-            s = Vector{$elty}(undef, 1)
-            c = Vector{$elty}(undef, 1)
-            ccall((@blasfunc($laic1), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{$relty},
-                 Ptr{$elty}, Ref{$elty}, Ptr{$relty}, Ptr{$elty},
-                 Ptr{$elty}),
-                job, j, x, sest,
-                w, gamma, sestpr, s,
-                c)
-            sestpr[1], s[1], c[1]
-        end
-    end
-end
-
-# (GT) General tridiagonal, decomposition, solver and direct solver
-for (gtsv, gttrf, gttrs, elty) in
-    ((:dgtsv_,:dgttrf_,:dgttrs_,:Float64),
-     (:sgtsv_,:sgttrf_,:sgttrs_,:Float32),
-     (:zgtsv_,:zgttrf_,:zgttrs_,:ComplexF64),
-     (:cgtsv_,:cgttrf_,:cgttrs_,:ComplexF32))
-    @eval begin
-        #       SUBROUTINE DGTSV( N, NRHS, DL, D, DU, B, LDB, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   B( LDB, * ), D( * ), DL( * ), DU( * )
-        function gtsv!(dl::AbstractVector{$elty}, d::AbstractVector{$elty}, du::AbstractVector{$elty},
-                       B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(dl, d, du, B)
-            chkstride1(B, dl, d, du)
-            n = length(d)
-            if !(n >= length(dl) >= n - 1)
-                throw(DimensionMismatch("subdiagonal has length $(length(dl)), but should be $n or $(n - 1)"))
-            end
-            if !(n >= length(du) >= n - 1)
-                throw(DimensionMismatch("superdiagonal has length $(length(du)), but should be $n or $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but should have $n"))
-            end
-            if n == 0
-                return B # Early exit if possible
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gtsv), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  n, size(B,2), dl, d, du, B, max(1,stride(B,2)), info)
-            chklapackerror(info[])
-            B
-        end
-
-        #       SUBROUTINE DGTTRF( N, DL, D, DU, DU2, IPIV, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, N
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   D( * ), DL( * ), DU( * ), DU2( * )
-        function gttrf!(dl::AbstractVector{$elty}, d::AbstractVector{$elty}, du::AbstractVector{$elty})
-            require_one_based_indexing(dl, d, du)
-            chkstride1(dl,d,du)
-            n    = length(d)
-            if length(dl) != n - 1
-                throw(DimensionMismatch("subdiagonal has length $(length(dl)), but should be $(n - 1)"))
-            end
-            if length(du) != n - 1
-                throw(DimensionMismatch("superdiagonal has length $(length(du)), but should be $(n - 1)"))
-            end
-            du2  = similar(d, $elty, n-2)
-            ipiv = similar(d, BlasInt, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gttrf), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                   Ptr{BlasInt}, Ptr{BlasInt}),
-                  n, dl, d, du, du2, ipiv, info)
-            chklapackerror(info[])
-            dl, d, du, du2, ipiv
-        end
-
-        #       SUBROUTINE DGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, INFO )
-        #       .. Scalar Arguments ..
-        #       CHARACTER          TRANS
-        #       INTEGER            INFO, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
-        function gttrs!(trans::AbstractChar, dl::AbstractVector{$elty}, d::AbstractVector{$elty},
-                        du::AbstractVector{$elty}, du2::AbstractVector{$elty}, ipiv::AbstractVector{BlasInt},
-                        B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(dl, d, du, du2, ipiv, B)
-            chktrans(trans)
-            chkstride1(B, ipiv, dl, d, du, du2)
-            n = length(d)
-            if length(dl) != n - 1
-                throw(DimensionMismatch("subdiagonal has length $(length(dl)), but should be $(n - 1)"))
-            end
-            if length(du) != n - 1
-                throw(DimensionMismatch("superdiagonal has length $(length(du)), but should be $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but should have $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gttrs), libblastrampoline), Cvoid,
-                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                    Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                   trans, n, size(B,2), dl, d, du, du2, ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-         end
-    end
-end
-
-"""
-    gtsv!(dl, d, du, B)
-
-Solves the equation `A * X = B` where `A` is a tridiagonal matrix with
-`dl` on the subdiagonal, `d` on the diagonal, and `du` on the
-superdiagonal.
-
-Overwrites `B` with the solution `X` and returns it.
-"""
-gtsv!(dl::AbstractVector, d::AbstractVector, du::AbstractVector, B::AbstractVecOrMat)
-
-"""
-    gttrf!(dl, d, du) -> (dl, d, du, du2, ipiv)
-
-Finds the `LU` factorization of a tridiagonal matrix with `dl` on the
-subdiagonal, `d` on the diagonal, and `du` on the superdiagonal.
-
-Modifies `dl`, `d`, and `du` in-place and returns them and the second
-superdiagonal `du2` and the pivoting vector `ipiv`.
-"""
-gttrf!(dl::AbstractVector, d::AbstractVector, du::AbstractVector)
-
-"""
-    gttrs!(trans, dl, d, du, du2, ipiv, B)
-
-Solves the equation `A * X = B` (`trans = N`), `transpose(A) * X = B` (`trans = T`),
-or `adjoint(A) * X = B` (`trans = C`) using the `LU` factorization computed by
-`gttrf!`. `B` is overwritten with the solution `X`.
-"""
-gttrs!(trans::AbstractChar, dl::AbstractVector, d::AbstractVector, du::AbstractVector, du2::AbstractVector,
-       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-## (OR) orthogonal (or UN, unitary) matrices, extractors and multiplication
-for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
-    ((:dorglq_,:dorgqr_,:dorgql_,:dorgrq_,:dormlq_,:dormqr_,:dormql_,:dormrq_,:dgemqrt_,:Float64),
-     (:sorglq_,:sorgqr_,:sorgql_,:sorgrq_,:sormlq_,:sormqr_,:sormql_,:sormrq_,:sgemqrt_,:Float32),
-     (:zunglq_,:zungqr_,:zungql_,:zungrq_,:zunmlq_,:zunmqr_,:zunmql_,:zunmrq_,:zgemqrt_,:ComplexF64),
-     (:cunglq_,:cungqr_,:cungql_,:cungrq_,:cunmlq_,:cunmqr_,:cunmql_,:cunmrq_,:cgemqrt_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, K, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orglq!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty}, k::Integer = length(tau))
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            n = size(A, 2)
-            m = min(n, size(A, 1))
-            if k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orglq), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, k, A, max(1,stride(A,2)), tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            if m < size(A,1)
-                A[1:m,:]
-            else
-                A
-            end
-        end
-
-        # SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, K, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orgqr!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty}, k::Integer = length(tau))
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m = size(A, 1)
-            n = min(m, size(A, 2))
-            if k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgqr), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, k, A,
-                      max(1,stride(A,2)), tau, work, lwork,
-                      info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            if n < size(A,2)
-                A[:,1:n]
-            else
-                A
-            end
-        end
-
-        # SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, K, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orgql!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty}, k::Integer = length(tau))
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m = size(A, 1)
-            n = min(m, size(A, 2))
-            if k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgql), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, k, A,
-                      max(1,stride(A,2)), tau, work, lwork,
-                      info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            if n < size(A,2)
-                A[:,1:n]
-            else
-                A
-            end
-        end
-
-        # SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, K, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orgrq!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty}, k::Integer = length(tau))
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m, n = size(A)
-            if n < m
-                throw(DimensionMismatch("input matrix A has dimensions ($m,$n), but cannot have fewer columns than rows"))
-            end
-            if k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgrq), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, k, A,
-                      max(1,stride(A,2)), tau, work, lwork,
-                      info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A
-        end
-
-        #      SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
-        #                         WORK, LWORK, INFO )
-        #      .. Scalar Arguments ..
-        #      CHARACTER          SIDE, TRANS
-        #      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormlq!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, C, tau)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            nA = size(A, 2)
-            k   = length(tau)
-            if side == 'L' && m != nA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $nA"))
-            end
-            if side == 'R' && n != nA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $n, must equal the second dimension of A, $nA"))
-            end
-            if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormlq), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                      side, trans, m, n, k, A, max(1,stride(A,2)), tau,
-                      C, max(1,stride(C,2)), work, lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-
-        #      SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
-        #                         WORK, INFO )
-        #      .. Scalar Arguments ..
-        #      CHARACTER          SIDE, TRANS
-        #      INTEGER            INFO, K, LDA, LDC, M, N
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormqr!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, C, tau)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            mA  = size(A, 1)
-            k   = length(tau)
-            if side == 'L' && m != mA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $mA"))
-            end
-            if side == 'R' && n != mA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $mA"))
-            end
-            if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormqr), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Clong, Clong),
-                      side, trans, m, n,
-                      k, A, max(1,stride(A,2)), tau,
-                      C, max(1, stride(C,2)), work, lwork,
-                      info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-
-        #      SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
-        #                         WORK, INFO )
-        #      .. Scalar Arguments ..
-        #      CHARACTER          SIDE, TRANS
-        #      INTEGER            INFO, K, LDA, LDC, M, N
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormql!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, C, tau)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            mA  = size(A, 1)
-            k   = length(tau)
-            if side == 'L' && m != mA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $mA"))
-            end
-            if side == 'R' && n != mA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $mA"))
-            end
-            if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormql), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Clong, Clong),
-                      side, trans, m, n,
-                      k, A, max(1,stride(A,2)), tau,
-                      C, max(1, stride(C,2)), work, lwork,
-                      info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-
-        #      SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
-        #                         WORK, LWORK, INFO )
-        #      .. Scalar Arguments ..
-        #      CHARACTER          SIDE, TRANS
-        #      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormrq!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, C, tau)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            nA  = size(A, 2)
-            k   = length(tau)
-            if side == 'L' && m != nA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $nA"))
-            end
-            if side == 'R' && n != nA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $nA"))
-            end
-            if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormrq), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                      side, trans, m, n, k, A, max(1,stride(A,2)), tau,
-                      C, max(1,stride(C,2)), work, lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-
-        function gemqrt!(side::AbstractChar, trans::AbstractChar, V::AbstractMatrix{$elty}, T::AbstractMatrix{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(V, T, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(V, T, C)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            nb, k = size(T)
-            if k == 0
-                return C
-            end
-            if side == 'L'
-                if !(0 <= k <= m)
-                    throw(DimensionMismatch("wrong value for k = $k: must be between 0 and $m"))
-                end
-                if m != size(V,1)
-                    throw(DimensionMismatch("first dimensions of C, $m, and V, $(size(V,1)) must match"))
-                end
-                ldv = stride(V,2)
-                if ldv < max(1, m)
-                    throw(DimensionMismatch("Q and C don't fit! The stride of V, $ldv, is too small"))
-                end
-                wss = n*k
-            elseif side == 'R'
-                if !(0 <= k <= n)
-                    throw(DimensionMismatch("wrong value for k = $k: must be between 0 and $n"))
-                end
-                if n != size(V,1)
-                    throw(DimensionMismatch("second dimension of C, $n, and first dimension of V, $(size(V,1)) must match"))
-                end
-                ldv = stride(V,2)
-                if ldv < max(1, n)
-                    throw(DimensionMismatch("Q and C don't fit! The stride of V, $ldv, is too small"))
-                end
-                wss = m*k
-            end
-            if !(1 <= nb <= k)
-                throw(DimensionMismatch("wrong value for nb = $nb, which must be between 1 and $k"))
-            end
-            ldc = stride(C, 2)
-            work = Vector{$elty}(undef, wss)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gemqrt), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
-                side, trans, m, n,
-                k, nb, V, ldv,
-                T, max(1,stride(T,2)), C, max(1,ldc),
-                work, info, 1, 1)
-            chklapackerror(info[])
-            return C
-        end
-    end
-end
-
-"""
-    orglq!(A, tau, k = length(tau))
-
-Explicitly finds the matrix `Q` of a `LQ` factorization after calling
-`gelqf!` on `A`. Uses the output of `gelqf!`. `A` is overwritten by `Q`.
-"""
-orglq!(A::AbstractMatrix, tau::AbstractVector, k::Integer = length(tau))
-
-"""
-    orgqr!(A, tau, k = length(tau))
-
-Explicitly finds the matrix `Q` of a `QR` factorization after calling
-`geqrf!` on `A`. Uses the output of `geqrf!`. `A` is overwritten by `Q`.
-"""
-orgqr!(A::AbstractMatrix, tau::AbstractVector, k::Integer = length(tau))
-
-"""
-    orgql!(A, tau, k = length(tau))
-
-Explicitly finds the matrix `Q` of a `QL` factorization after calling
-`geqlf!` on `A`. Uses the output of `geqlf!`. `A` is overwritten by `Q`.
-"""
-orgql!(A::AbstractMatrix, tau::AbstractVector, k::Integer = length(tau))
-
-"""
-    orgrq!(A, tau, k = length(tau))
-
-Explicitly finds the matrix `Q` of a `RQ` factorization after calling
-`gerqf!` on `A`. Uses the output of `gerqf!`. `A` is overwritten by `Q`.
-"""
-orgrq!(A::AbstractMatrix, tau::AbstractVector, k::Integer = length(tau))
-
-"""
-    ormlq!(side, trans, A, tau, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `LQ` factorization of `A` computed using
-`gelqf!`. `C` is overwritten.
-"""
-ormlq!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractVecOrMat)
-
-"""
-    ormqr!(side, trans, A, tau, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `QR` factorization of `A` computed using
-`geqrf!`. `C` is overwritten.
-"""
-ormqr!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractVecOrMat)
-
-"""
-    ormql!(side, trans, A, tau, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `QL` factorization of `A` computed using
-`geqlf!`. `C` is overwritten.
-"""
-ormql!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractVecOrMat)
-
-"""
-    ormrq!(side, trans, A, tau, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `RQ` factorization of `A` computed using
-`gerqf!`. `C` is overwritten.
-"""
-ormrq!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractVecOrMat)
-
-"""
-    gemqrt!(side, trans, V, T, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `QR` factorization of `A` computed using
-`geqrt!`. `C` is overwritten.
-"""
-gemqrt!(side::AbstractChar, trans::AbstractChar, V::AbstractMatrix, T::AbstractMatrix, C::AbstractVecOrMat)
-
-# (PO) positive-definite symmetric matrices,
-for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
-    ((:dposv_,:dpotrf_,:dpotri_,:dpotrs_,:dpstrf_,:Float64,:Float64),
-     (:sposv_,:spotrf_,:spotri_,:spotrs_,:spstrf_,:Float32,:Float32),
-     (:zposv_,:zpotrf_,:zpotri_,:zpotrs_,:zpstrf_,:ComplexF64,:Float64),
-     (:cposv_,:cpotrf_,:cpotri_,:cpotrs_,:cpstrf_,:ComplexF32,:Float32))
-    @eval begin
-        #     SUBROUTINE DPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          UPLO
-        #      INTEGER            INFO, LDA, LDB, N, NRHS
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function posv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if size(B,1) != n
-                throw(DimensionMismatch("first dimension of B, $(size(B,1)), and size of A, ($n,$n), must match!"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($posv), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), B, max(1,stride(B,2)), info, 1)
-            chkargsok(info[])
-            chkposdef(info[])
-            A, B
-        end
-
-        # SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * )
-        function potrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            checksquare(A)
-            chkuplo(uplo)
-            lda = max(1,stride(A,2))
-            if lda == 0
-                return A, 0
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($potrf), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, size(A,1), A, lda, info, 1)
-            chkargsok(info[])
-            #info[] > 0 means the leading minor of order info[] is not positive definite
-            #ordinarily, throw Exception here, but return error code here
-            #this simplifies isposdef! and factorize
-            return A, info[] # info stored in Cholesky
-        end
-
-        #       SUBROUTINE DPOTRI( UPLO, N, A, LDA, INFO )
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * )
-        function potri!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            chkuplo(uplo)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($potri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, size(A,1), A, max(1,stride(A,2)), info, 1)
-            chkargsok(info[])
-            chknonsingular(info[])
-            A
-        end
-
-        #     SUBROUTINE DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
-        #     .. Scalar Arguments ..
-        #      CHARACTER          UPLO
-        #      INTEGER            INFO, LDA, LDB, N, NRHS
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function potrs!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            nrhs = size(B,2)
-            if size(B,1) != n
-                throw(DimensionMismatch("first dimension of B, $(size(B,1)), and size of A, ($n,$n), must match!"))
-            end
-            lda = max(1,stride(A,2))
-            if lda == 0 || nrhs == 0
-                return B
-            end
-            ldb = max(1,stride(B,2))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($potrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                   uplo, n, nrhs, A,
-                   lda, B, ldb, info, 1)
-            chklapackerror(info[])
-            return B
-        end
-
-        #       SUBROUTINE DPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO )
-        #       .. Scalar Arguments ..
-        #       DOUBLE PRECISION   TOL
-        #       INTEGER            INFO, LDA, N, RANK
-        #       CHARACTER          UPLO
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( 2*N )
-        #       INTEGER            PIV( N )
-        function pstrf!(uplo::AbstractChar, A::AbstractMatrix{$elty}, tol::Real)
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            piv  = similar(A, BlasInt, n)
-            rank = Vector{BlasInt}(undef, 1)
-            work = Vector{$rtyp}(undef, 2n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($pstrf), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                   Ptr{BlasInt}, Ref{$rtyp}, Ptr{$rtyp}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), piv, rank, tol, work, info, 1)
-            chkargsok(info[])
-            A, piv, rank[1], info[] #Stored in CholeskyPivoted
-        end
-    end
-end
-
-"""
-    posv!(uplo, A, B) -> (A, B)
-
-Finds the solution to `A * X = B` where `A` is a symmetric or Hermitian
-positive definite matrix. If `uplo = U` the upper Cholesky decomposition
-of `A` is computed. If `uplo = L` the lower Cholesky decomposition of `A`
-is computed. `A` is overwritten by its Cholesky decomposition. `B` is
-overwritten with the solution `X`.
-"""
-posv!(uplo::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    potrf!(uplo, A)
-
-Computes the Cholesky (upper if `uplo = U`, lower if `uplo = L`)
-decomposition of positive-definite matrix `A`. `A` is overwritten and
-returned with an info code.
-"""
-potrf!(uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    potri!(uplo, A)
-
-Computes the inverse of positive-definite matrix `A` after calling
-`potrf!` to find its (upper if `uplo = U`, lower if `uplo = L`) Cholesky
-decomposition.
-
-`A` is overwritten by its inverse and returned.
-"""
-potri!(uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    potrs!(uplo, A, B)
-
-Finds the solution to `A * X = B` where `A` is a symmetric or Hermitian
-positive definite matrix whose Cholesky decomposition was computed by
-`potrf!`. If `uplo = U` the upper Cholesky decomposition of `A` was
-computed. If `uplo = L` the lower Cholesky decomposition of `A` was
-computed. `B` is overwritten with the solution `X`.
-"""
-potrs!(uplo::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    pstrf!(uplo, A, tol) -> (A, piv, rank, info)
-
-Computes the (upper if `uplo = U`, lower if `uplo = L`) pivoted Cholesky
-decomposition of positive-definite matrix `A` with a user-set tolerance
-`tol`. `A` is overwritten by its Cholesky decomposition.
-
-Returns `A`, the pivots `piv`, the rank of `A`, and an `info` code. If `info = 0`,
-the factorization succeeded. If `info = i > 0 `, then `A` is indefinite or
-rank-deficient.
-"""
-pstrf!(uplo::AbstractChar, A::AbstractMatrix, tol::Real)
-
-# (PT) positive-definite, symmetric, tri-diagonal matrices
-# Direct solvers for general tridiagonal and symmetric positive-definite tridiagonal
-for (ptsv, pttrf, elty, relty) in
-    ((:dptsv_,:dpttrf_,:Float64,:Float64),
-     (:sptsv_,:spttrf_,:Float32,:Float32),
-     (:zptsv_,:zpttrf_,:ComplexF64,:Float64),
-     (:cptsv_,:cpttrf_,:ComplexF32,:Float32))
-    @eval begin
-        #       SUBROUTINE DPTSV( N, NRHS, D, E, B, LDB, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   B( LDB, * ), D( * ), E( * )
-        function ptsv!(D::AbstractVector{$relty}, E::AbstractVector{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(D, E, B)
-            chkstride1(B, D, E)
-            n = length(D)
-            if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($ptsv), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  n, size(B,2), D, E, B, max(1,stride(B,2)), info)
-            chklapackerror(info[])
-            B
-        end
-
-        #       SUBROUTINE DPTTRF( N, D, E, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, N
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   D( * ), E( * )
-        function pttrf!(D::AbstractVector{$relty}, E::AbstractVector{$elty})
-            require_one_based_indexing(D, E)
-            chkstride1(D, E)
-            n = length(D)
-            if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($pttrf), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ptr{$relty}, Ptr{$elty}, Ptr{BlasInt}),
-                  n, D, E, info)
-            chklapackerror(info[])
-            D, E
-        end
-    end
-end
-
-"""
-    ptsv!(D, E, B)
-
-Solves `A * X = B` for positive-definite tridiagonal `A`. `D` is the
-diagonal of `A` and `E` is the off-diagonal. `B` is overwritten with the
-solution `X` and returned.
-"""
-ptsv!(D::AbstractVector, E::AbstractVector, B::AbstractVecOrMat)
-
-"""
-    pttrf!(D, E)
-
-Computes the LDLt factorization of a positive-definite tridiagonal matrix
-with `D` as diagonal and `E` as off-diagonal. `D` and `E` are overwritten
-and returned.
-"""
-pttrf!(D::AbstractVector, E::AbstractVector)
-
-for (pttrs, elty, relty) in
-    ((:dpttrs_,:Float64,:Float64),
-     (:spttrs_,:Float32,:Float32))
-    @eval begin
-        #       SUBROUTINE DPTTRS( N, NRHS, D, E, B, LDB, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   B( LDB, * ), D( * ), E( * )
-        function pttrs!(D::AbstractVector{$relty}, E::AbstractVector{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(D, E, B)
-            chkstride1(B, D, E)
-            n = length(D)
-            if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($pttrs), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  n, size(B,2), D, E, B, max(1,stride(B,2)), info)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-for (pttrs, elty, relty) in
-    ((:zpttrs_,:ComplexF64,:Float64),
-     (:cpttrs_,:ComplexF32,:Float32))
-    @eval begin
-        #       SUBROUTINE ZPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   D( * )
-        #       COMPLEX*16         B( LDB, * ), E( * )
-        function pttrs!(uplo::AbstractChar, D::AbstractVector{$relty}, E::AbstractVector{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(D, E, B)
-            chkstride1(B, D, E)
-            chkuplo(uplo)
-            n = length(D)
-            if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($pttrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), D, E, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-"""
-    pttrs!(D, E, B)
-
-Solves `A * X = B` for positive-definite tridiagonal `A` with diagonal
-`D` and off-diagonal `E` after computing `A`'s LDLt factorization using
-`pttrf!`. `B` is overwritten with the solution `X`.
-"""
-pttrs!(D::AbstractVector, E::AbstractVector, B::AbstractVecOrMat)
-
-## (TR) triangular matrices: solver and inverse
-for (trtri, trtrs, elty) in
-    ((:dtrtri_,:dtrtrs_,:Float64),
-     (:strtri_,:strtrs_,:Float32),
-     (:ztrtri_,:ztrtrs_,:ComplexF64),
-     (:ctrtri_,:ctrtrs_,:ComplexF32))
-    @eval begin
-        #     SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          DIAG, UPLO
-        #      INTEGER            INFO, LDA, N
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * )
-        function trtri!(uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            chkdiag(diag)
-            lda = max(1,stride(A, 2))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trtri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Clong, Clong),
-                  uplo, diag, n, A, lda, info, 1, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #      SUBROUTINE DTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          DIAG, TRANS, UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function trtrs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar,
-                        A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chktrans(trans)
-            chkdiag(diag)
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trtrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                   Clong, Clong, Clong),
-                  uplo, trans, diag, n, size(B,2), A, max(1,stride(A,2)),
-                  B, max(1,stride(B,2)), info,
-                  1, 1, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-"""
-    trtri!(uplo, diag, A)
-
-Finds the inverse of (upper if `uplo = U`, lower if `uplo = L`)
-triangular matrix `A`. If `diag = N`, `A` has non-unit diagonal elements.
-If `diag = U`, all diagonal elements of `A` are one. `A` is overwritten
-with its inverse.
-"""
-trtri!(uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix)
-
-"""
-    trtrs!(uplo, trans, diag, A, B)
-
-Solves `A * X = B` (`trans = N`), `transpose(A) * X = B` (`trans = T`), or
-`adjoint(A) * X = B` (`trans = C`) for (upper if `uplo = U`, lower if `uplo = L`)
-triangular matrix `A`. If `diag = N`, `A` has non-unit diagonal elements.
-If `diag = U`, all diagonal elements of `A` are one. `B` is overwritten
-with the solution `X`.
-"""
-trtrs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-#Eigenvector computation and condition number estimation
-for (trcon, trevc, trrfs, elty) in
-    ((:dtrcon_,:dtrevc_,:dtrrfs_,:Float64),
-     (:strcon_,:strevc_,:strrfs_,:Float32))
-    @eval begin
-        # SUBROUTINE DTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK,
-        #                  IWORK, INFO )
-        # .. Scalar Arguments ..
-        # CHARACTER          DIAG, NORM, UPLO
-        # INTEGER            INFO, LDA, N
-        # DOUBLE PRECISION   RCOND
-        # .. Array Arguments ..
-        # INTEGER            IWORK( * )
-        # DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function trcon!(norm::AbstractChar, uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            chkdiag(diag)
-            n = checksquare(A)
-            chkuplo(uplo)
-            rcond = Ref{$elty}()
-            work  = Vector{$elty}(undef, 3n)
-            iwork = Vector{BlasInt}(undef, n)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($trcon), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                   Clong, Clong, Clong),
-                  norm, uplo, diag, n,
-                  A, max(1,stride(A,2)), rcond, work, iwork, info,
-                  1, 1, 1)
-            chklapackerror(info[])
-            rcond[]
-        end
-
-        # SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
-        #                    LDVR, MM, M, WORK, INFO )
-        #
-        # .. Scalar Arguments ..
-        # CHARACTER          HOWMNY, SIDE
-        # INTEGER            INFO, LDT, LDVL, LDVR, M, MM, N
-        # ..
-        # .. Array Arguments ..
-        # LOGICAL            SELECT( * )
-        # DOUBLE PRECISION   T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
-        #$                   WORK( * )
-        function trevc!(side::AbstractChar, howmny::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty},
-                        VL::AbstractMatrix{$elty} = similar(T),
-                        VR::AbstractMatrix{$elty} = similar(T))
-            require_one_based_indexing(select, T, VL, VR)
-            # Extract
-            if side ∉ ['L','R','B']
-                throw(ArgumentError("side argument must be 'L' (left eigenvectors), 'R' (right eigenvectors), or 'B' (both), got $side"))
-            end
-            n, mm = checksquare(T), size(VL, 2)
-            ldt, ldvl, ldvr = stride(T, 2), stride(VL, 2), stride(VR, 2)
-
-            # Check
-            chkstride1(T, select, VL, VR)
-
-            # Allocate
-            m = Ref{BlasInt}()
-            work = Vector{$elty}(undef, 3n)
-            info = Ref{BlasInt}()
-
-            ccall((@blasfunc($trevc), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                 Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
-                side, howmny, select, n,
-                T, ldt, VL, ldvl,
-                VR, ldvr, mm, m,
-                work, info, 1, 1)
-            chklapackerror(info[])
-
-            #Decide what exactly to return
-            if howmny == 'S' #compute selected eigenvectors
-                if side == 'L' #left eigenvectors only
-                    return select, VL[:,1:m[]]
-                elseif side == 'R' #right eigenvectors only
-                    return select, VR[:,1:m[]]
-                else #side == 'B' #both eigenvectors
-                    return select, VL[:,1:m[]], VR[:,1:m[]]
-                end
-            else #compute all eigenvectors
-                if side == 'L' #left eigenvectors only
-                    return VL[:,1:m[]]
-                elseif side == 'R' #right eigenvectors only
-                    return VR[:,1:m[]]
-                else #side == 'B' #both eigenvectors
-                    return VL[:,1:m[]], VR[:,1:m[]]
-                end
-            end
-        end
-
-        # SUBROUTINE DTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X,
-        #                    LDX, FERR, BERR, WORK, IWORK, INFO )
-        # .. Scalar Arguments ..
-        # CHARACTER          DIAG, TRANS, UPLO
-        # INTEGER            INFO, LDA, LDB, LDX, N, NRHS
-        # .. Array Arguments ..
-        # INTEGER            IWORK( * )
-        # DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ),
-        #$                   WORK( * ), X( LDX, * )
-        function trrfs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar,
-                A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, X::AbstractVecOrMat{$elty},
-                Ferr::AbstractVector{$elty} = similar(B, $elty, size(B,2)),
-                Berr::AbstractVector{$elty} = similar(B, $elty, size(B,2)))
-            require_one_based_indexing(A, B, X, Ferr, Berr)
-            chkstride1(A, B, X, Ferr, Berr)
-            chktrans(trans)
-            chkuplo(uplo)
-            chkdiag(diag)
-            n = size(A,2)
-            nrhs = size(B,2)
-            if nrhs != size(X,2)
-                throw(DimensionMismatch("second dimensions of B, $nrhs, and X, $(size(X,2)), must match"))
-            end
-            work = Vector{$elty}(undef, 3n)
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trrfs), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong, Clong, Clong),
-                uplo, trans, diag, n,
-                nrhs, A, max(1,stride(A,2)), B, max(1,stride(B,2)), X, max(1,stride(X,2)),
-                Ferr, Berr, work, iwork, info, 1, 1, 1)
-            chklapackerror(info[])
-            Ferr, Berr
-        end
-    end
-end
-
-for (trcon, trevc, trrfs, elty, relty) in
-    ((:ztrcon_,:ztrevc_,:ztrrfs_,:ComplexF64,:Float64),
-     (:ctrcon_,:ctrevc_,:ctrrfs_,:ComplexF32, :Float32))
-    @eval begin
-        # SUBROUTINE ZTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK,
-        #                   RWORK, INFO )
-        # .. Scalar Arguments ..
-        # CHARACTER          DIAG, NORM, UPLO
-        # INTEGER            INFO, LDA, N
-        # DOUBLE PRECISION   RCOND
-        # .. Array Arguments ..
-        # DOUBLE PRECISION   RWORK( * )
-        # COMPLEX*16         A( LDA, * ), WORK( * )
-        function trcon!(norm::AbstractChar, uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            chkdiag(diag)
-            rcond = Ref{$relty}(1)
-            work  = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, n)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($trcon), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt},
-                   Clong, Clong, Clong),
-                  norm, uplo, diag, n,
-                  A, max(1,stride(A,2)), rcond, work, rwork, info,
-                  1, 1, 1)
-            chklapackerror(info[])
-            rcond[]
-        end
-
-        # SUBROUTINE ZTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
-        #                    LDVR, MM, M, WORK, RWORK, INFO )
-        #
-        # .. Scalar Arguments ..
-        # CHARACTER          HOWMNY, SIDE
-        # INTEGER            INFO, LDT, LDVL, LDVR, M, MM, N
-        # ..
-        # .. Array Arguments ..
-        # LOGICAL            SELECT( * )
-        # DOUBLE PRECISION   RWORK( * )
-        # COMPLEX*16         T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
-        #$                   WORK( * )
-        function trevc!(side::AbstractChar, howmny::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty},
-                        VL::AbstractMatrix{$elty} = similar(T),
-                        VR::AbstractMatrix{$elty} = similar(T))
-            require_one_based_indexing(select, T, VL, VR)
-            # Extract
-            n, mm = checksquare(T), size(VL, 2)
-            ldt, ldvl, ldvr = stride(T, 2), stride(VL, 2), stride(VR, 2)
-
-            # Check
-            chkstride1(T, select, VL, VR)
-            if side ∉ ['L','R','B']
-                throw(ArgumentError("side argument must be 'L' (left eigenvectors), 'R' (right eigenvectors), or 'B' (both), got $side"))
-            end
-
-            # Allocate
-            m = Ref{BlasInt}()
-            work = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trevc), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                 Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
-                side, howmny, select, n,
-                T, ldt, VL, ldvl,
-                VR, ldvr, mm, m,
-                work, rwork, info, 1, 1)
-            chklapackerror(info[])
-
-            #Decide what exactly to return
-            if howmny == 'S' #compute selected eigenvectors
-                if side == 'L' #left eigenvectors only
-                    return select, VL[:,1:m[]]
-                elseif side == 'R' #right eigenvectors only
-                    return select, VR[:,1:m[]]
-                else #side=='B' #both eigenvectors
-                    return select, VL[:,1:m[]], VR[:,1:m[]]
-                end
-            else #compute all eigenvectors
-                if side == 'L' #left eigenvectors only
-                    return VL[:,1:m[]]
-                elseif side == 'R' #right eigenvectors only
-                    return VR[:,1:m[]]
-                else #side=='B' #both eigenvectors
-                    return VL[:,1:m[]], VR[:,1:m[]]
-                end
-            end
-        end
-
-        # SUBROUTINE ZTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X,
-        #                    LDX, FERR, BERR, WORK, IWORK, INFO )
-        # .. Scalar Arguments ..
-        # CHARACTER          DIAG, TRANS, UPLO
-        # INTEGER            INFO, LDA, LDB, LDX, N, NRHS
-        # .. Array Arguments ..
-        # INTEGER            IWORK( * )
-        # DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ),
-        #$                   WORK( * ), X( LDX, * )
-        function trrfs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar,
-                        A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, X::AbstractVecOrMat{$elty},
-                        Ferr::AbstractVector{$relty} = similar(B, $relty, size(B,2)),
-                        Berr::AbstractVector{$relty} = similar(B, $relty, size(B,2)))
-            require_one_based_indexing(A, B, X, Ferr, Berr)
-            chkstride1(A, B, X, Ferr, Berr)
-            chktrans(trans)
-            chkuplo(uplo)
-            chkdiag(diag)
-            n = size(A,2)
-            nrhs = size(B,2)
-            if nrhs != size(X,2)
-                throw(DimensionMismatch("second dimensions of B, $nrhs, and X, $(size(X,2)), must match"))
-            end
-            work  = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, n)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($trrfs), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong, Clong),
-                uplo, trans, diag, n,
-                nrhs, A, max(1,stride(A,2)), B, max(1,stride(B,2)), X, max(1,stride(X,2)),
-                Ferr, Berr, work, rwork, info, 1, 1, 1)
-            chklapackerror(info[])
-            Ferr, Berr
-        end
-    end
-end
-
-"""
-    trcon!(norm, uplo, diag, A)
-
-Finds the reciprocal condition number of (upper if `uplo = U`, lower if
-`uplo = L`) triangular matrix `A`. If `diag = N`, `A` has non-unit
-diagonal elements. If `diag = U`, all diagonal elements of `A` are one.
-If `norm = I`, the condition number is found in the infinity norm. If
-`norm = O` or `1`, the condition number is found in the one norm.
-"""
-trcon!(norm::AbstractChar, uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix)
-
-"""
-    trevc!(side, howmny, select, T, VL = similar(T), VR = similar(T))
-
-Finds the eigensystem of an upper triangular matrix `T`. If `side = R`,
-the right eigenvectors are computed. If `side = L`, the left
-eigenvectors are computed. If `side = B`, both sets are computed. If
-`howmny = A`, all eigenvectors are found. If `howmny = B`, all
-eigenvectors are found and backtransformed using `VL` and `VR`. If
-`howmny = S`, only the eigenvectors corresponding to the values in
-`select` are computed.
-"""
-trevc!(side::AbstractChar, howmny::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix,
-        VL::AbstractMatrix = similar(T), VR::AbstractMatrix = similar(T))
-
-"""
-    trrfs!(uplo, trans, diag, A, B, X, Ferr, Berr) -> (Ferr, Berr)
-
-Estimates the error in the solution to `A * X = B` (`trans = N`),
-`transpose(A) * X = B` (`trans = T`), `adjoint(A) * X = B` (`trans = C`) for `side = L`,
-or the equivalent equations a right-handed `side = R` `X * A` after
-computing `X` using `trtrs!`. If `uplo = U`, `A` is upper triangular.
-If `uplo = L`, `A` is lower triangular. If `diag = N`, `A` has non-unit
-diagonal elements. If `diag = U`, all diagonal elements of `A` are one.
-`Ferr` and `Berr` are optional inputs. `Ferr` is the forward error and
-`Berr` is the backward error, each component-wise.
-"""
-trrfs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat,
-       X::AbstractVecOrMat, Ferr::AbstractVector, Berr::AbstractVector)
-
-## (ST) Symmetric tridiagonal - eigendecomposition
-for (stev, stebz, stegr, stein, elty) in
-    ((:dstev_,:dstebz_,:dstegr_,:dstein_,:Float64),
-     (:sstev_,:sstebz_,:sstegr_,:sstein_,:Float32)
-#     , (:zstev_,:ComplexF64)  Need to rewrite for ZHEEV, rwork, etc.
-#     , (:cstev_,:ComplexF32)
-     )
-    @eval begin
-        function stev!(job::AbstractChar, dv::AbstractVector{$elty}, ev::AbstractVector{$elty})
-            require_one_based_indexing(dv, ev)
-            chkstride1(dv, ev)
-            n = length(dv)
-            if length(ev) != n - 1 && length(ev) != n
-                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than or equal to dv's length, $n)"))
-            end
-            Zmat = similar(dv, $elty, (n, job != 'N' ? n : 0))
-            work = Vector{$elty}(undef, max(1, 2n-2))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($stev), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                   Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  job, n, dv, ev, Zmat, n, work, info, 1)
-            chklapackerror(info[])
-            dv, Zmat
-        end
-
-        #*  DSTEBZ computes the eigenvalues of a symmetric tridiagonal
-        #*  matrix T.  The user may ask for all eigenvalues, all eigenvalues
-        #*  in the half-open interval (VL, VU], or the IL-th through IU-th
-        #*  eigenvalues.
-        function stebz!(range::AbstractChar, order::AbstractChar, vl::$elty, vu::$elty, il::Integer, iu::Integer, abstol::Real, dv::AbstractVector{$elty}, ev::AbstractVector{$elty})
-            require_one_based_indexing(dv, ev)
-            chkstride1(dv, ev)
-            n = length(dv)
-            if length(ev) != n - 1
-                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than dv's length, $n)"))
-            end
-            m = Ref{BlasInt}()
-            nsplit = Vector{BlasInt}(undef, 1)
-            w = similar(dv, $elty, n)
-            tmp = 0.0
-            iblock = similar(dv, BlasInt,n)
-            isplit = similar(dv, BlasInt,n)
-            work = Vector{$elty}(undef, 4*n)
-            iwork = Vector{BlasInt}(undef, 3*n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($stebz), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{$elty},
-                Ref{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
-                Ptr{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                Ptr{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                range, order, n, vl,
-                vu, il, iu, abstol,
-                dv, ev, m, nsplit,
-                w, iblock, isplit, work,
-                iwork, info, 1, 1)
-            chklapackerror(info[])
-            w[1:m[]], iblock[1:m[]], isplit[1:nsplit[1]]
-        end
-
-        function stegr!(jobz::AbstractChar, range::AbstractChar, dv::AbstractVector{$elty}, ev::AbstractVector{$elty}, vl::Real, vu::Real, il::Integer, iu::Integer)
-            require_one_based_indexing(dv, ev)
-            chkstride1(dv, ev)
-            n = length(dv)
-            ne = length(ev)
-            if ne == n - 1
-                eev = [ev; zero($elty)]
-            elseif ne == n
-                eev = copy(ev)
-                eev[n] = zero($elty)
-            else
-                throw(DimensionMismatch("ev has length $ne but needs one less than or equal to dv's length, $n)"))
-            end
-
-            abstol = Vector{$elty}(undef, 1)
-            m = Ref{BlasInt}()
-            w = similar(dv, $elty, n)
-            ldz = jobz == 'N' ? 1 : n
-            Z = similar(dv, $elty, ldz, range == 'I' ? iu-il+1 : n)
-            isuppz = similar(dv, BlasInt, 2*size(Z, 2))
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($stegr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                    Ptr{$elty}, Ref{$elty}, Ref{$elty}, Ref{BlasInt},
-                    Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{$elty},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                    Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                    Clong, Clong),
-                    jobz, range, n, dv,
-                    eev, vl, vu, il,
-                    iu, abstol, m, w,
-                    Z, ldz, isuppz, work,
-                    lwork, iwork, liwork, info,
-                    1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            m[] == length(w) ? w : w[1:m[]], m[] == size(Z, 2) ? Z : Z[:,1:m[]]
-        end
-
-        function stein!(dv::AbstractVector{$elty}, ev_in::AbstractVector{$elty}, w_in::AbstractVector{$elty}, iblock_in::AbstractVector{BlasInt}, isplit_in::AbstractVector{BlasInt})
-            require_one_based_indexing(dv, ev_in, w_in, iblock_in, isplit_in)
-            chkstride1(dv, ev_in, w_in, iblock_in, isplit_in)
-            n = length(dv)
-            ne = length(ev_in)
-            if ne == n - 1
-                ev = [ev_in; zero($elty)]
-            elseif ne == n
-                ev = copy(ev_in)
-                ev[n] = zero($elty)
-            else
-                throw(DimensionMismatch("ev_in has length $ne but needs one less than or equal to dv's length, $n)"))
-            end
-            ldz = n #Leading dimension
-            #Number of eigenvalues to find
-            if !(1 <= length(w_in) <= n)
-                throw(DimensionMismatch("w_in has length $(length(w_in)), but needs to be between 1 and $n"))
-            end
-            m = length(w_in)
-            #If iblock and isplit are invalid input, assume worst-case block partitioning,
-            # i.e. set the block scheme to be the entire matrix
-            iblock = similar(dv, BlasInt,n)
-            isplit = similar(dv, BlasInt,n)
-            w = similar(dv, $elty,n)
-            if length(iblock_in) < m #Not enough block specifications
-                iblock[1:m] = fill(BlasInt(1), m)
-                w[1:m] = sort(w_in)
-            else
-                iblock[1:m] = iblock_in
-                w[1:m] = w_in #Assume user has sorted the eigenvalues properly
-            end
-            if length(isplit_in) < 1 #Not enough block specifications
-                isplit[1] = n
-            else
-                isplit[1:length(isplit_in)] = isplit_in
-            end
-            z = similar(dv, $elty,(n,m))
-            work  = Vector{$elty}(undef, 5*n)
-            iwork = Vector{BlasInt}(undef, n)
-            ifail = Vector{BlasInt}(undef, m)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($stein), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                Ptr{BlasInt}),
-                n, dv, ev, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info)
-            chklapackerror(info[])
-            if any(ifail .!= 0)
-                # TODO: better error message / type
-                error("failed to converge eigenvectors:\n$(findall(!iszero, ifail))")
-            end
-            z
-        end
-    end
-end
-stegr!(jobz::AbstractChar, dv::AbstractVector, ev::AbstractVector) = stegr!(jobz, 'A', dv, ev, 0.0, 0.0, 0, 0)
-
-# Allow user to skip specification of iblock and isplit
-stein!(dv::AbstractVector, ev::AbstractVector, w_in::AbstractVector) = stein!(dv, ev, w_in, zeros(BlasInt,0), zeros(BlasInt,0))
-# Allow user to specify just one eigenvector to get in stein!
-stein!(dv::AbstractVector, ev::AbstractVector, eval::Real) = stein!(dv, ev, [eval], zeros(BlasInt,0), zeros(BlasInt,0))
-
-"""
-    stev!(job, dv, ev) -> (dv, Zmat)
-
-Computes the eigensystem for a symmetric tridiagonal matrix with `dv` as
-diagonal and `ev` as off-diagonal. If `job = N` only the eigenvalues are
-found and returned in `dv`. If `job = V` then the eigenvectors are also found
-and returned in `Zmat`.
-"""
-stev!(job::AbstractChar, dv::AbstractVector, ev::AbstractVector)
-
-"""
-    stebz!(range, order, vl, vu, il, iu, abstol, dv, ev) -> (dv, iblock, isplit)
-
-Computes the eigenvalues for a symmetric tridiagonal matrix with `dv` as
-diagonal and `ev` as off-diagonal. If `range = A`, all the eigenvalues
-are found. If `range = V`, the eigenvalues in the half-open interval
-`(vl, vu]` are found. If `range = I`, the eigenvalues with indices between
-`il` and `iu` are found. If `order = B`, eigvalues are ordered within a
-block. If `order = E`, they are ordered across all the blocks.
-`abstol` can be set as a tolerance for convergence.
-"""
-stebz!(range::AbstractChar, order::AbstractChar, vl, vu, il::Integer, iu::Integer, abstol::Real, dv::AbstractVector, ev::AbstractVector)
-
-"""
-    stegr!(jobz, range, dv, ev, vl, vu, il, iu) -> (w, Z)
-
-Computes the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
-(`jobz = V`) for a symmetric tridiagonal matrix with `dv` as diagonal
-and `ev` as off-diagonal. If `range = A`, all the eigenvalues
-are found. If `range = V`, the eigenvalues in the half-open interval
-`(vl, vu]` are found. If `range = I`, the eigenvalues with indices between
-`il` and `iu` are found. The eigenvalues are returned in `w` and the eigenvectors
-in `Z`.
-"""
-stegr!(jobz::AbstractChar, range::AbstractChar, dv::AbstractVector, ev::AbstractVector, vl::Real, vu::Real, il::Integer, iu::Integer)
-
-"""
-    stein!(dv, ev_in, w_in, iblock_in, isplit_in)
-
-Computes the eigenvectors for a symmetric tridiagonal matrix with `dv`
-as diagonal and `ev_in` as off-diagonal. `w_in` specifies the input
-eigenvalues for which to find corresponding eigenvectors. `iblock_in`
-specifies the submatrices corresponding to the eigenvalues in `w_in`.
-`isplit_in` specifies the splitting points between the submatrix blocks.
-"""
-stein!(dv::AbstractVector, ev_in::AbstractVector, w_in::AbstractVector, iblock_in::AbstractVector{BlasInt}, isplit_in::AbstractVector{BlasInt})
-
-## (SY) symmetric real matrices - Bunch-Kaufman decomposition,
-## solvers (direct and factored) and inverse.
-for (syconv, sysv, sytrf, sytri, sytrs, elty) in
-    ((:dsyconv_,:dsysv_,:dsytrf_,:dsytri_,:dsytrs_,:Float64),
-     (:ssyconv_,:ssysv_,:ssytrf_,:ssytri_,:ssytrs_,:Float32))
-    @eval begin
-        #       SUBROUTINE DSYCONV( UPLO, WAY, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO, WAY
-        #       INTEGER            INFO, LDA, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function syconv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($syconv), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
-                  uplo, 'C', n, A, max(1,stride(A,2)), ipiv, work, info, 1, 1)
-            chklapackerror(info[])
-            A, work
-        end
-
-        #       SUBROUTINE DSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        #                         LWORK, INFO )
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
-        function sysv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info,  1)
-                chkargsok(info[])
-                chknonsingular(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE DSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
-            if n == 0
-                return A, ipiv, zero(BlasInt)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, stride(A,2), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            return A, ipiv, info[]
-        end
-
-        #       SUBROUTINE DSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-#         function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::Vector{BlasInt})
-#             chkstride1(A)
-#             n = checksquare(A)
-#             chkuplo(uplo)
-#             work  = Vector{$elty}(undef, 1)
-#             lwork = BlasInt(-1)
-#             info  = Ref{BlasInt}()
-#             for i in 1:2
-#                 ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-#                       (Ptr{UInt8}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt},
-#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info, 1)
-#                 @assertargsok
-#                 chknonsingular(info[])
-#                 if lwork < 0
-#                     lwork = BlasInt(real(work[1]))
-#                     work = Vector{$elty}(undef, lwork)
-#                 end
-#             end
-#             A
-#         end
-
-        #      SUBROUTINE DSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        #     .. Scalar Arguments ..
-        #      CHARACTER          UPLO
-        #      INTEGER            INFO, LDA, N
-        #     .. Array Arguments ..
-        #      INTEGER            IPIV( * )
-        #      DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chkargsok(info[])
-            chknonsingular(info[])
-            A
-        end
-
-        #       SUBROUTINE DSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function sytrs!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-# Rook-pivoting variants of symmetric-matrix algorithms
-for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
-    ((:dsysv_rook_,:dsytrf_rook_,:dsytri_rook_,:dsytrs_rook_,:dsyconvf_rook_,:Float64),
-     (:ssysv_rook_,:ssytrf_rook_,:ssytri_rook_,:ssytrs_rook_,:ssyconvf_rook_,:Float32))
-    @eval begin
-        #       SUBROUTINE DSYSV_ROOK(UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        #                             LWORK, INFO )
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
-        function sysv_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chkargsok(info[])
-                chknonsingular(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE DSYTRF_ROOK(UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function sytrf_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
-            if n == 0
-                return A, ipiv, zero(BlasInt)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, stride(A,2), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            return A, ipiv, info[]
-        end
-
-        #      SUBROUTINE DSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        #     .. Scalar Arguments ..
-        #      CHARACTER          UPLO
-        #      INTEGER            INFO, LDA, N
-        #     .. Array Arguments ..
-        #      INTEGER            IPIV( * )
-        #      DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function sytri_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chkargsok(info[])
-            chknonsingular(info[])
-            A
-        end
-
-        #       SUBROUTINE DSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function sytrs_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-
-        # SUBROUTINE DSYCONVF_ROOK( UPLO, WAY, N, A, LDA, IPIV, E, INFO )
-        #
-        # .. Scalar Arguments ..
-        # CHARACTER          UPLO, WAY
-        # INTEGER            INFO, LDA, N
-        # ..
-        # .. Array Arguments ..
-        # INTEGER            IPIV( * )
-        # DOUBLE PRECISION   A( LDA, * ), E( * )
-        function syconvf_rook!(uplo::AbstractChar, way::AbstractChar,
-                                A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt},
-                                e::AbstractVector{$elty} = Vector{$elty}(undef, length(ipiv)))
-            require_one_based_indexing(A, ipiv, e)
-            # extract
-            n = checksquare(A)
-            lda = max(1, stride(A, 2))
-
-            # check
-            chkuplo(uplo)
-            if way != 'C' && way != 'R'
-                throw(ArgumentError("way must be C or R"))
-            end
-            if length(ipiv) != n
-                throw(ArgumentError("length of pivot vector was $(length(ipiv)) but should have been $n"))
-            end
-            if length(e) != n
-                throw(ArgumentError("length of e vector was $(length(e)) but should have been $n"))
-            end
-
-            # allocate
-            info = Ref{BlasInt}()
-
-            ccall((@blasfunc($syconvf), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                 Clong, Clong),
-                uplo, way, n, A,
-                lda, e, ipiv, info,
-                1, 1)
-
-            chklapackerror(info[])
-            return A, e
-        end
-    end
-end
-
-## (SY) hermitian matrices - eigendecomposition, Bunch-Kaufman decomposition,
-## solvers (direct and factored) and inverse.
-for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
-    ((:zsyconv_,:zhesv_,:zhetrf_,:zhetri_,:zhetrs_,:ComplexF64, :Float64),
-     (:csyconv_,:chesv_,:chetrf_,:chetri_,:chetrs_,:ComplexF32, :Float32))
-    @eval begin
-       #   SUBROUTINE ZSYCONV( UPLO, WAY, N, A, LDA, IPIV, WORK, INFO )
-       #
-       #        .. Scalar Arguments ..
-       #        CHARACTER          UPLO, WAY
-       #        INTEGER            INFO, LDA, N
-       #        ..
-       #        .. Array Arguments ..
-       #        INTEGER            IPIV( * )
-       #        COMPLEX*16         A( LDA, * ), WORK( * )
-        function syconv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($syconv), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
-                  uplo, 'C', n, A, max(1,stride(A,2)), ipiv, work, info, 1, 1)
-            chklapackerror(info[])
-            A, work
-        end
-
-        #       SUBROUTINE ZHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function hesv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hesv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE ZHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function hetrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i in 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hetrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, ipiv, info[]
-        end
-
-#       SUBROUTINE ZHETRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-# *     .. Scalar Arguments ..
-#       CHARACTER          UPLO
-#       INTEGER            INFO, LDA, LWORK, N
-# *     ..
-# *     .. Array Arguments ..
-#       INTEGER            IPIV( * )
-#       COMPLEX*16         A( LDA, * ), WORK( * )
-#         function hetri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::Vector{BlasInt})
-#             chkstride1(A)
-#             n = checksquare(A)
-#             chkuplo(uplo)
-#             work  = Vector{$elty}(undef, 1)
-#             lwork = BlasInt(-1)
-#             info  = Ref{BlasInt}()
-#             for i in 1:2
-#                 ccall((@blasfunc($hetri), libblastrampoline), Cvoid,
-#                       (Ptr{UInt8}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt},
-#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info, 1)
-#                 chklapackerror(info[])
-#                 if lwork < 0
-#                     lwork = BlasInt(real(work[1]))
-#                     work = Vector{$elty}(undef, lwork)
-#                 end
-#             end
-#             A
-#         end
-
-
-        #       SUBROUTINE ZHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function hetri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($hetri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #       SUBROUTINE ZHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * )
-        function hetrs!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($hetrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-for (hesv, hetrf, hetri, hetrs, elty, relty) in
-    ((:zhesv_rook_,:zhetrf_rook_,:zhetri_rook_,:zhetrs_rook_,:ComplexF64, :Float64),
-     (:chesv_rook_,:chetrf_rook_,:chetri_rook_,:chetrs_rook_,:ComplexF32, :Float32))
-    @eval begin
-        #       SUBROUTINE ZHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function hesv_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hesv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE ZHETRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function hetrf_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i in 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hetrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, ipiv, info[]
-        end
-
-        #       SUBROUTINE ZHETRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function hetri_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($hetri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #       SUBROUTINE ZHETRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * )
-        function hetrs_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                             ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($hetrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-for (sysv, sytrf, sytri, sytrs, elty, relty) in
-    ((:zsysv_,:zsytrf_,:zsytri_,:zsytrs_,:ComplexF64, :Float64),
-     (:csysv_,:csytrf_,:csytri_,:csytrs_,:ComplexF32, :Float32))
-    @eval begin
-        #       SUBROUTINE ZSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        #      $                  LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function sysv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chkargsok(info[])
-                chknonsingular(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv = similar(A, BlasInt, n)
-            if n == 0
-                return A, ipiv, zero(BlasInt)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, ipiv, info[]
-        end
-
-#       SUBROUTINE ZSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-# *     .. Scalar Arguments ..
-#       CHARACTER          UPLO
-#       INTEGER            INFO, LDA, LWORK, N
-# *     ..
-# *     .. Array Arguments ..
-#       INTEGER            IPIV( * )
-#       COMPLEX*16         A( LDA, * ), WORK( * )
-#         function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::Vector{BlasInt})
-#             chkstride1(A)
-#             n = checksquare(A)
-#             chkuplo(uplo)
-#             work  = Vector{$elty}(undef, 1)
-#             lwork = BlasInt(-1)
-#             info  = Ref{BlasInt}()
-#             for i in 1:2
-#                 ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-#                       (Ptr{UInt8}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt},
-#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info, 1)
-#                 chklapackerror(info[])
-#                 if lwork < 0
-#                     lwork = BlasInt(real(work[1]))
-#                     work = Vector{$elty}(undef, lwork)
-#                 end
-#             end
-#             A
-#         end
-
-        #       SUBROUTINE ZSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #       SUBROUTINE ZSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * )
-        function sytrs!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info,  1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
-    ((:zsysv_rook_,:zsytrf_rook_,:zsytri_rook_,:zsytrs_rook_,:zsyconvf_rook_,:ComplexF64, :Float64),
-     (:csysv_rook_,:csytrf_rook_,:csytri_rook_,:csytrs_rook_,:csyconvf_rook_,:ComplexF32, :Float32))
-    @eval begin
-        #       SUBROUTINE ZSYSV_ROOK(UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        #      $                      LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function sysv_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chkargsok(info[])
-                chknonsingular(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE ZSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function sytrf_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv = similar(A, BlasInt, n)
-            if n == 0
-                return A, ipiv, zero(BlasInt)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, ipiv, info[]
-        end
-
-        #       SUBROUTINE ZSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function sytri_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #       SUBROUTINE ZSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * )
-        function sytrs_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                             ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-
-        # SUBROUTINE ZSYCONVF_ROOK( UPLO, WAY, N, A, LDA, IPIV, E, INFO )
-        #
-        # .. Scalar Arguments ..
-        # CHARACTER          UPLO, WAY
-        # INTEGER            INFO, LDA, N
-        # ..
-        # .. Array Arguments ..
-        # INTEGER            IPIV( * )
-        # COMPLEX*16         A( LDA, * ), E( * )
-        function syconvf_rook!(uplo::AbstractChar, way::AbstractChar,
-                                A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt},
-                                e::AbstractVector{$elty} = Vector{$elty}(undef, length(ipiv)))
-            require_one_based_indexing(A, ipiv, e)
-            chkstride1(A, ipiv, e)
-
-            # extract
-            n   = checksquare(A)
-            lda = stride(A, 2)
-
-            # check
-            chkuplo(uplo)
-            if way != 'C' && way != 'R'
-                throw(ArgumentError("way must be 'C' or 'R'"))
-            end
-            if length(ipiv) != n
-                throw(ArgumentError("length of pivot vector was $(length(ipiv)) but should have been $n"))
-            end
-            if length(e) != n
-                throw(ArgumentError("length of e vector was $(length(e)) but should have been $n"))
-            end
-
-            # allocate
-            info = Ref{BlasInt}()
-
-            ccall((@blasfunc($syconvf), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                 Clong, Clong),
-                uplo, way, n, A,
-                max(1, lda), e, ipiv, info,
-                1, 1)
-
-            chklapackerror(info[])
-            return A, e
-        end
-    end
-end
-
-"""
-    syconv!(uplo, A, ipiv) -> (A, work)
-
-Converts a symmetric matrix `A` (which has been factorized into a
-triangular matrix) into two matrices `L` and `D`. If `uplo = U`, `A`
-is upper triangular. If `uplo = L`, it is lower triangular. `ipiv` is
-the pivot vector from the triangular factorization. `A` is overwritten
-by `L` and `D`.
-"""
-syconv!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
-
-"""
-    sysv!(uplo, A, B) -> (B, A, ipiv)
-
-Finds the solution to `A * X = B` for symmetric matrix `A`. If `uplo = U`,
-the upper half of `A` is stored. If `uplo = L`, the lower half is stored.
-`B` is overwritten by the solution `X`. `A` is overwritten by its
-Bunch-Kaufman factorization. `ipiv` contains pivoting information about the
-factorization.
-"""
-sysv!(uplo::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    sytrf!(uplo, A) -> (A, ipiv, info)
-
-Computes the Bunch-Kaufman factorization of a symmetric matrix `A`. If
-`uplo = U`, the upper half of `A` is stored. If `uplo = L`, the lower
-half is stored.
-
-Returns `A`, overwritten by the factorization, a pivot vector `ipiv`, and
-the error code `info` which is a non-negative integer. If `info` is positive
-the matrix is singular and the diagonal part of the factorization is exactly
-zero at position `info`.
-"""
-sytrf!(uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    sytri!(uplo, A, ipiv)
-
-Computes the inverse of a symmetric matrix `A` using the results of
-`sytrf!`. If `uplo = U`, the upper half of `A` is stored. If `uplo = L`,
-the lower half is stored. `A` is overwritten by its inverse.
-"""
-sytri!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
-
-"""
-    sytrs!(uplo, A, ipiv, B)
-
-Solves the equation `A * X = B` for a symmetric matrix `A` using the
-results of `sytrf!`. If `uplo = U`, the upper half of `A` is stored.
-If `uplo = L`, the lower half is stored. `B` is overwritten by the
-solution `X`.
-"""
-sytrs!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-
-"""
-    hesv!(uplo, A, B) -> (B, A, ipiv)
-
-Finds the solution to `A * X = B` for Hermitian matrix `A`. If `uplo = U`,
-the upper half of `A` is stored. If `uplo = L`, the lower half is stored.
-`B` is overwritten by the solution `X`. `A` is overwritten by its
-Bunch-Kaufman factorization. `ipiv` contains pivoting information about the
-factorization.
-"""
-hesv!(uplo::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    hetrf!(uplo, A) -> (A, ipiv, info)
-
-Computes the Bunch-Kaufman factorization of a Hermitian matrix `A`. If
-`uplo = U`, the upper half of `A` is stored. If `uplo = L`, the lower
-half is stored.
-
-Returns `A`, overwritten by the factorization, a pivot vector `ipiv`, and
-the error code `info` which is a non-negative integer. If `info` is positive
-the matrix is singular and the diagonal part of the factorization is exactly
-zero at position `info`.
-"""
-hetrf!(uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    hetri!(uplo, A, ipiv)
-
-Computes the inverse of a Hermitian matrix `A` using the results of
-`sytrf!`. If `uplo = U`, the upper half of `A` is stored. If `uplo = L`,
-the lower half is stored. `A` is overwritten by its inverse.
-"""
-hetri!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
-
-"""
-    hetrs!(uplo, A, ipiv, B)
-
-Solves the equation `A * X = B` for a Hermitian matrix `A` using the
-results of `sytrf!`. If `uplo = U`, the upper half of `A` is stored.
-If `uplo = L`, the lower half is stored. `B` is overwritten by the
-solution `X`.
-"""
-hetrs!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-# Symmetric (real) eigensolvers
-for (syev, syevr, syevd, sygvd, elty) in
-    ((:dsyev_,:dsyevr_,:dsyevd_,:dsygvd_,:Float64),
-     (:ssyev_,:ssyevr_,:ssyevd_,:ssygvd_,:Float32))
-    @eval begin
-        #       SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
-        function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            W     = similar(A, $elty, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($syev), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                      jobz, uplo, n, A, max(1,stride(A,2)), W, work, lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            jobz == 'V' ? (W, A) : W
-        end
-
-        #       SUBROUTINE DSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
-        #      $                   ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK,
-        #      $                   IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, RANGE, UPLO
-        #       INTEGER            IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N
-        #       DOUBLE PRECISION   ABSTOL, VL, VU
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            ISUPPZ( * ), IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * )
-        function syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty},
-                        vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplofinite(A, uplo)
-            if range == 'I' && !(1 <= il <= iu <= n)
-                throw(ArgumentError("illegal choice of eigenvalue indices (il = $il, iu = $iu), which must be between 1 and n = $n"))
-            end
-            if range == 'V' && vl >= vu
-                throw(ArgumentError("lower boundary, $vl, must be less than upper boundary, $vu"))
-            end
-            lda = stride(A,2)
-            m = Ref{BlasInt}()
-            W = similar(A, $elty, n)
-            ldz = n
-            if jobz == 'N'
-                Z = similar(A, $elty, ldz, 0)
-            elseif jobz == 'V'
-                Z = similar(A, $elty, ldz, n)
-            end
-            isuppz = similar(A, BlasInt, 2*n)
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            iwork  = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ref{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{BlasInt},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                        Ptr{BlasInt}, Clong, Clong, Clong),
-                    jobz, range, uplo, n,
-                    A, max(1,lda), vl, vu,
-                    il, iu, abstol, m,
-                    W, Z, max(1,ldz), isuppz,
-                    work, lwork, iwork, liwork,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            W[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
-        end
-        syevr!(jobz::AbstractChar, A::AbstractMatrix{$elty}) =
-            syevr!(jobz, 'A', 'U', A, 0.0, 0.0, 0, 0, -1.0)
-
-        #       SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
-        #      $                   IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, LDA, LIWORK, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
-        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplofinite(A, uplo)
-            lda = stride(A,2)
-            m = Ref{BlasInt}()
-            W = similar(A, $elty, n)
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            iwork  = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevd), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                        Ptr{BlasInt}, Clong, Clong),
-                    jobz, uplo, n, A, max(1,lda),
-                    W, work, lwork, iwork, liwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            jobz == 'V' ? (W, A) : W
-        end
-
-        # Generalized eigenproblem
-        #           SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
-        #      $                   LWORK, IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), W( * ), WORK( * )
-        function sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            w = similar(A, $elty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($sygvd), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                    itype, jobz, uplo, n,
-                    A, lda, B, ldb,
-                    w, work, lwork, iwork,
-                    liwork, info, 1, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            chkposdef(info[])
-            w, A, B
-        end
-    end
-end
-# Hermitian eigensolvers
-for (syev, syevr, syevd, sygvd, elty, relty) in
-    ((:zheev_,:zheevr_,:zheevd_,:zhegvd_,:ComplexF64,:Float64),
-     (:cheev_,:cheevr_,:cheevd_,:chegvd_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   RWORK( * ), W( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            chkuplofinite(A, uplo)
-            n = checksquare(A)
-            W     = similar(A, $relty, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, max(1, 3n-2))
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($syev), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                      Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt},
-                      Clong, Clong),
-                      jobz, uplo, n, A, stride(A,2), W, work, lwork, rwork, info,
-                      1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            jobz == 'V' ? (W, A) : W
-        end
-
-        #       SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
-        #      $                   ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK,
-        #      $                   RWORK, LRWORK, IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, RANGE, UPLO
-        #       INTEGER            IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK,
-        #      $                   M, N
-        #       DOUBLE PRECISION   ABSTOL, VL, VU
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            ISUPPZ( * ), IWORK( * )
-        #       DOUBLE PRECISION   RWORK( * ), W( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * ), Z( LDZ, * )
-        function syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty},
-                        vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
-            chkstride1(A)
-            chkuplofinite(A, uplo)
-            n = checksquare(A)
-            if range == 'I' && !(1 <= il <= iu <= n)
-                throw(ArgumentError("illegal choice of eigenvalue indices (il = $il, iu=$iu), which must be between 1 and n = $n"))
-            end
-            if range == 'V' && vl >= vu
-                throw(ArgumentError("lower boundary, $vl, must be less than upper boundary, $vu"))
-            end
-            lda = max(1,stride(A,2))
-            m = Ref{BlasInt}()
-            W = similar(A, $relty, n)
-            if jobz == 'N'
-                ldz = 1
-                Z = similar(A, $elty, ldz, 0)
-            elseif jobz == 'V'
-                ldz = n
-                Z = similar(A, $elty, ldz, n)
-            end
-            isuppz = similar(A, BlasInt, 2*n)
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            rwork  = Vector{$relty}(undef, 1)
-            lrwork = BlasInt(-1)
-            iwork  = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevr), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ref{$elty},
-                       Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{BlasInt},
-                       Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                       Clong, Clong, Clong),
-                      jobz, range, uplo, n,
-                      A, lda, vl, vu,
-                      il, iu, abstol, m,
-                      W, Z, ldz, isuppz,
-                      work, lwork, rwork, lrwork,
-                      iwork, liwork, info,
-                      1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    lrwork = BlasInt(rwork[1])
-                    resize!(rwork, lrwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            W[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
-        end
-        syevr!(jobz::AbstractChar, A::AbstractMatrix{$elty}) =
-            syevr!(jobz, 'A', 'U', A, 0.0, 0.0, 0, 0, -1.0)
-
-        #       SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
-        #      $                   LRWORK, IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            chkuplofinite(A, uplo)
-            n = checksquare(A)
-            lda = max(1, stride(A,2))
-            m = Ref{BlasInt}()
-            W = similar(A, $relty, n)
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            rwork  = Vector{$relty}(undef, 1)
-            lrwork = BlasInt(-1)
-            iwork  = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevd), liblapack), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ref{BlasInt},
-                    Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                    jobz, uplo, n, A, stride(A,2),
-                    W, work, lwork, rwork, lrwork,
-                    iwork, liwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    lrwork = BlasInt(rwork[1])
-                    resize!(rwork, lrwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            jobz == 'V' ? (W, A) : W
-        end
-
-        #       SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
-        #      $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   RWORK( * ), W( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            chkuplofinite(A, uplo)
-            chkuplofinite(B, uplo)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            w = similar(A, $relty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 1)
-            lrwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
-                ccall((@blasfunc($sygvd), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                     Clong, Clong),
-                    itype, jobz, uplo, n,
-                    A, lda, B, ldb,
-                    w, work, lwork, rwork,
-                    lrwork, iwork, liwork, info,
-                    1, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                    lrwork = BlasInt(rwork[1])
-                    resize!(rwork, lrwork)
-                end
-            end
-            chkposdef(info[])
-            w, A, B
-        end
-    end
-end
-
-"""
-    syev!(jobz, uplo, A)
-
-Finds the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
-(`jobz = V`) of a symmetric matrix `A`. If `uplo = U`, the upper triangle
-of `A` is used. If `uplo = L`, the lower triangle of `A` is used.
-"""
-syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    syevr!(jobz, range, uplo, A, vl, vu, il, iu, abstol) -> (W, Z)
-
-Finds the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
-(`jobz = V`) of a symmetric matrix `A`. If `uplo = U`, the upper triangle
-of `A` is used. If `uplo = L`, the lower triangle of `A` is used. If
-`range = A`, all the eigenvalues are found. If `range = V`, the
-eigenvalues in the half-open interval `(vl, vu]` are found.
-If `range = I`, the eigenvalues with indices between `il` and `iu` are
-found. `abstol` can be set as a tolerance for convergence.
-
-The eigenvalues are returned in `W` and the eigenvectors in `Z`.
-"""
-syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix,
-       vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
-
-"""
-    syevd!(jobz, uplo, A)
-
-Finds the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
-(`jobz = V`) of a symmetric matrix `A`. If `uplo = U`, the upper triangle
-of `A` is used. If `uplo = L`, the lower triangle of `A` is used.
-
-Use the divide-and-conquer method, instead of the QR iteration used by
-`syev!` or multiple relatively robust representations used by `syevr!`.
-See James W. Demmel et al, SIAM J. Sci. Comput. 30, 3, 1508 (2008) for
-a comparison of the accuracy and performatce of different methods.
-"""
-syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    sygvd!(itype, jobz, uplo, A, B) -> (w, A, B)
-
-Finds the generalized eigenvalues (`jobz = N`) or eigenvalues and
-eigenvectors (`jobz = V`) of a symmetric matrix `A` and symmetric
-positive-definite matrix `B`. If `uplo = U`, the upper triangles
-of `A` and `B` are used. If `uplo = L`, the lower triangles of `A` and
-`B` are used. If `itype = 1`, the problem to solve is
-`A * x = lambda * B * x`. If `itype = 2`, the problem to solve is
-`A * B * x = lambda * x`. If `itype = 3`, the problem to solve is
-`B * A * x = lambda * x`.
-"""
-sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-## (BD) Bidiagonal matrices - singular value decomposition
-for (bdsqr, relty, elty) in
-    ((:dbdsqr_,:Float64,:Float64),
-     (:sbdsqr_,:Float32,:Float32),
-     (:zbdsqr_,:Float64,:ComplexF64),
-     (:cbdsqr_,:Float32,:ComplexF32))
-    @eval begin
-        function bdsqr!(uplo::AbstractChar, d::AbstractVector{$relty}, e_::AbstractVector{$relty},
-                        Vt::AbstractMatrix{$elty}, U::AbstractMatrix{$elty}, C::AbstractMatrix{$elty})
-            require_one_based_indexing(d, e_, Vt, U, C)
-            chkstride1(d, e_, Vt, U, C)
-            # Extract number
-            n = length(d)
-            ncvt, nru, ncc = size(Vt, 2), size(U, 1), size(C, 2)
-            ldvt, ldu, ldc = max(1, stride(Vt,2)), max(1, stride(U, 2)), max(1, stride(C,2))
-            # Do checks
-            chkuplo(uplo)
-            if length(e_) != n - 1
-                throw(DimensionMismatch("off-diagonal has length $(length(e_)) but should have length $(n - 1)"))
-            end
-            if ncvt > 0 && ldvt < n
-                throw(DimensionMismatch("leading dimension of Vt, $ldvt, must be at least $n"))
-            end
-            if ldu < nru
-                throw(DimensionMismatch("leading dimension of U, $ldu, must be at least $nru"))
-            end
-            if size(U, 2) != n
-                throw(DimensionMismatch("U must have $n columns but has $(size(U, 2))"))
-            end
-            if ncc > 0 && ldc < n
-                throw(DimensionMismatch("leading dimension of C, $ldc, must be at least $n"))
-            end
-            # Allocate
-            work = Vector{$relty}(undef, 4n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($bdsqr), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{BlasInt}, Ptr{$relty}, Ptr{$relty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt}, Clong),
-                uplo, n, ncvt, nru,
-                ncc, d, e_, Vt,
-                ldvt, U, ldu, C,
-                ldc, work, info, 1)
-            chklapackerror(info[])
-            d, Vt, U, C #singular values in descending order, P**T * VT, U * Q, Q**T * C
-        end
-    end
-end
-
-"""
-    bdsqr!(uplo, d, e_, Vt, U, C) -> (d, Vt, U, C)
-
-Computes the singular value decomposition of a bidiagonal matrix with
-`d` on the diagonal and `e_` on the off-diagonal. If `uplo = U`, `e_` is
-the superdiagonal. If `uplo = L`, `e_` is the subdiagonal. Can optionally also
-compute the product `Q' * C`.
-
-Returns the singular values in `d`, and the matrix `C` overwritten with `Q' * C`.
-"""
-bdsqr!(uplo::AbstractChar, d::AbstractVector, e_::AbstractVector, Vt::AbstractMatrix, U::AbstractMatrix, C::AbstractMatrix)
-
-#Defined only for real types
-for (bdsdc, elty) in
-    ((:dbdsdc_,:Float64),
-     (:sbdsdc_,:Float32))
-    @eval begin
-        #*  DBDSDC computes the singular value decomposition (SVD) of a real
-        #*  N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
-        #*  using a divide and conquer method
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          COMPQ, UPLO
-        #      INTEGER            INFO, LDU, LDVT, N
-        #*     ..
-        #*     .. Array Arguments ..
-        #      INTEGER            IQ( * ), IWORK( * )
-        #      DOUBLE PRECISION   D( * ), E( * ), Q( * ), U( LDU, * ),
-        #     $                   VT( LDVT, * ), WORK( * )
-        function bdsdc!(uplo::AbstractChar, compq::AbstractChar, d::AbstractVector{$elty}, e_::AbstractVector{$elty})
-            require_one_based_indexing(d, e_)
-            chkstride1(d, e_)
-            n, ldiq, ldq, ldu, ldvt = length(d), 1, 1, 1, 1
-            chkuplo(uplo)
-            if compq == 'N'
-                lwork = 6*n
-            elseif compq == 'P'
-                @warn "COMPQ='P' is not tested"
-                #TODO turn this into an actual LAPACK call
-                #smlsiz=ilaenv(9, $elty === :Float64 ? 'dbdsqr' : 'sbdsqr', string(uplo, compq), n,n,n,n)
-                smlsiz=100 #For now, completely overkill
-                ldq = n*(11+2*smlsiz+8*round(Int,log((n/(smlsiz+1)))/log(2)))
-                ldiq = n*(3+3*round(Int,log(n/(smlsiz+1))/log(2)))
-                lwork = 6*n
-            elseif compq == 'I'
-                ldvt=ldu=max(1, n)
-                lwork=3*n^2 + 4*n
-            else
-                throw(ArgumentError("COMPQ argument must be 'N', 'P' or 'I', got $(repr(compq))"))
-            end
-            u  = similar(d, $elty, (ldu,  n))
-            vt = similar(d, $elty, (ldvt, n))
-            q  = similar(d, $elty, ldq)
-            iq = similar(d, BlasInt, ldiq)
-            work  = Vector{$elty}(undef, lwork)
-            iwork = Vector{BlasInt}(undef, 8n)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($bdsdc), libblastrampoline), Cvoid,
-               (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                Ptr{$elty}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                Clong, Clong),
-                uplo, compq, n, d, e_,
-                u, ldu, vt, ldvt,
-                q, iq, work, iwork, info,
-                1, 1)
-            chklapackerror(info[])
-            d, e_, u, vt, q, iq
-        end
-    end
-end
-
-"""
-    bdsdc!(uplo, compq, d, e_) -> (d, e, u, vt, q, iq)
-
-Computes the singular value decomposition of a bidiagonal matrix with `d` on the
-diagonal and `e_` on the off-diagonal using a divide and conqueq method.
-If `uplo = U`, `e_` is the superdiagonal. If `uplo = L`, `e_` is the subdiagonal.
-If `compq = N`, only the singular values are found. If `compq = I`, the singular
-values and vectors are found. If `compq = P`, the singular values
-and vectors are found in compact form. Only works for real types.
-
-Returns the singular values in `d`, and if `compq = P`, the compact singular
-vectors in `iq`.
-"""
-bdsdc!(uplo::AbstractChar, compq::AbstractChar, d::AbstractVector, e_::AbstractVector)
-
-for (gecon, elty) in
-    ((:dgecon_,:Float64),
-     (:sgecon_,:Float32))
-    @eval begin
-        #  SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
-        #      $                   INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          NORM
-        #       INTEGER            INFO, LDA, N
-        #       DOUBLE PRECISION   ANORM, RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function gecon!(normtype::AbstractChar, A::AbstractMatrix{$elty}, anorm::$elty)
-            chkstride1(A)
-            n = checksquare(A)
-            lda = max(1, stride(A, 2))
-            rcond = Ref{$elty}()
-            work = Vector{$elty}(undef, 4n)
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gecon), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ref{$elty}, Ref{$elty}, Ptr{$elty}, Ptr{BlasInt},
-                   Ptr{BlasInt}, Clong),
-                  normtype, n, A, lda, anorm, rcond, work, iwork,
-                  info, 1)
-            chklapackerror(info[])
-            rcond[]
-        end
-    end
-end
-
-for (gecon, elty, relty) in
-    ((:zgecon_,:ComplexF64,:Float64),
-     (:cgecon_,:ComplexF32,:Float32))
-    @eval begin
-        #       SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK,
-        #      $                   INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          NORM
-        #       INTEGER            INFO, LDA, N
-        #       DOUBLE PRECISION   ANORM, RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function gecon!(normtype::AbstractChar, A::AbstractMatrix{$elty}, anorm::$relty)
-            chkstride1(A)
-            n = checksquare(A)
-            lda = max(1, stride(A, 2))
-            rcond = Ref{$relty}()
-            work = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, 2n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gecon), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ref{$relty}, Ref{$relty}, Ptr{$elty}, Ptr{$relty},
-                   Ptr{BlasInt}, Clong),
-                  normtype, n, A, lda, anorm, rcond, work, rwork,
-                  info, 1)
-            chklapackerror(info[])
-            rcond[]
-        end
-    end
-end
-
-"""
-    gecon!(normtype, A, anorm)
-
-Finds the reciprocal condition number of matrix `A`. If `normtype = I`,
-the condition number is found in the infinity norm. If `normtype = O` or
-`1`, the condition number is found in the one norm. `A` must be the
-result of `getrf!` and `anorm` is the norm of `A` in the relevant norm.
-"""
-gecon!(normtype::AbstractChar, A::AbstractMatrix, anorm)
-
-for (gehrd, elty) in
-    ((:dgehrd_,:Float64),
-     (:sgehrd_,:Float32),
-     (:zgehrd_,:ComplexF64),
-     (:cgehrd_,:ComplexF32))
-    @eval begin
-
-        #                 .. Scalar Arguments ..
-        #       INTEGER            IHI, ILO, INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION  A( LDA, * ), TAU( * ), WORK( * )
-        function gehrd!(ilo::Integer, ihi::Integer, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            tau = similar(A, $elty, max(0,n - 1))
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gehrd), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}),
-                    n, ilo, ihi, A,
-                    max(1, stride(A, 2)), tau, work, lwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-    end
-end
-gehrd!(A::AbstractMatrix) = gehrd!(1, size(A, 1), A)
-
-"""
-    gehrd!(ilo, ihi, A) -> (A, tau)
-
-Converts a matrix `A` to Hessenberg form. If `A` is balanced with `gebal!`
-then `ilo` and `ihi` are the outputs of `gebal!`. Otherwise they should be
-`ilo = 1` and `ihi = size(A,2)`. `tau` contains the elementary reflectors of
-the factorization.
-"""
-gehrd!(ilo::Integer, ihi::Integer, A::AbstractMatrix)
-
-for (orghr, elty) in
-    ((:dorghr_,:Float64),
-     (:sorghr_,:Float32),
-     (:zunghr_,:ComplexF64),
-     (:cunghr_,:ComplexF32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       INTEGER            IHI, ILO, INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orghr!(ilo::Integer, ihi::Integer, A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A, tau)
-            n = checksquare(A)
-            if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
-            end
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orghr), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}),
-                    n, ilo, ihi, A,
-                    max(1, stride(A, 2)), tau, work, lwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A
-        end
-    end
-end
-
-"""
-    orghr!(ilo, ihi, A, tau)
-
-Explicitly finds `Q`, the orthogonal/unitary matrix from `gehrd!`. `ilo`,
-`ihi`, `A`, and `tau` must correspond to the input/output to `gehrd!`.
-"""
-orghr!(ilo::Integer, ihi::Integer, A::AbstractMatrix, tau::AbstractVector)
-
-for (ormhr, elty) in
-    ((:dormhr_,:Float64),
-     (:sormhr_,:Float32),
-     (:zunmhr_,:ComplexF64),
-     (:cunmhr_,:ComplexF32))
-    @eval begin
-        # .. Scalar Arguments ..
-        # CHARACTER          side, trans
-        # INTEGER            ihi, ilo, info, lda, ldc, lwork, m, n
-        # ..
-        # .. Array Arguments ..
-        # DOUBLE PRECISION   a( lda, * ), c( ldc, * ), tau( * ), work( * )
-        function ormhr!(side::AbstractChar, trans::AbstractChar, ilo::Integer, ihi::Integer, A::AbstractMatrix{$elty},
-            tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-
-            require_one_based_indexing(A, tau, C)
-            chkstride1(A, tau, C)
-            n = checksquare(A)
-            mC, nC = size(C, 1), size(C, 2)
-
-            if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
-            end
-            if (side == 'L' && mC != n) || (side == 'R' && nC != n)
-                throw(DimensionMismatch("A and C matrices are not conformable"))
-            end
-
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormhr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                     Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                    side, trans, mC, nC,
-                    ilo, ihi, A, max(1, stride(A, 2)),
-                    tau, C, max(1, stride(C, 2)), work,
-                    lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-    end
-end
-
-for (hseqr, elty) in
-    ((:zhseqr_,:ComplexF64),
-     (:chseqr_,:ComplexF32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOB, COMPZ
-        #       INTEGER            N, ILO, IHI, LWORK, LDH, LDZ, INFO
-        # *     ..
-        # *     .. Array Arguments ..
-        #       COMPLEX*16         H( LDH, * ), Z( LDZ, * ), WORK( * )
-        function hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer,
-                        H::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
-            require_one_based_indexing(H, Z)
-            chkstride1(H)
-            n = checksquare(H)
-            checksquare(Z) == n || throw(DimensionMismatch())
-            ldh = max(1, stride(H, 2))
-            ldz = max(1, stride(Z, 2))
-            w = similar(H, $elty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hseqr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{BlasInt}),
-                    job, compz, n, ilo, ihi,
-                    H, ldh, w, Z, ldz, work,
-                    lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            H, Z, w
-        end
-    end
-end
-
-for (hseqr, elty) in
-    ((:dhseqr_,:Float64),
-     (:shseqr_,:Float32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOB, COMPZ
-        #       INTEGER            N, ILO, IHI, LWORK, LDH, LDZ, INFO
-        # *     ..
-        # *     .. Array Arguments ..
-        #       COMPLEX*16         H( LDH, * ), Z( LDZ, * ), WORK( * )
-        function hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer,
-                        H::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
-            require_one_based_indexing(H, Z)
-            chkstride1(H)
-            n = checksquare(H)
-            checksquare(Z) == n || throw(DimensionMismatch())
-            ldh = max(1, stride(H, 2))
-            ldz = max(1, stride(Z, 2))
-            wr = similar(H, $elty, n)
-            wi = similar(H, $elty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hseqr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{BlasInt}),
-                    job, compz, n, ilo, ihi,
-                    H, ldh, wr, wi, Z, ldz, work,
-                    lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            H, Z, complex.(wr, wi)
-        end
-    end
-end
-hseqr!(H::StridedMatrix{T}, Z::StridedMatrix{T}) where {T<:BlasFloat} = hseqr!('S', 'V', 1, size(H, 1), H, Z)
-hseqr!(H::StridedMatrix{T}) where {T<:BlasFloat} = hseqr!('S', 'I', 1, size(H, 1), H, similar(H))
-
-"""
-    hseqr!(job, compz, ilo, ihi, H, Z) -> (H, Z, w)
-
-Computes all eigenvalues and (optionally) the Schur factorization of a matrix
-reduced to Hessenberg form. If `H` is balanced with `gebal!`
-then `ilo` and `ihi` are the outputs of `gebal!`. Otherwise they should be
-`ilo = 1` and `ihi = size(H,2)`. `tau` contains the elementary reflectors of
-the factorization.
-"""
-hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer, H::AbstractMatrix, Z::AbstractMatrix)
-
-for (hetrd, elty) in
-    ((:dsytrd_,Float64),
-     (:ssytrd_,Float32),
-     (:zhetrd_,ComplexF64),
-     (:chetrd_,ComplexF32))
-    relty = real(elty)
-    @eval begin
-
-        #                 .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION  A( LDA, * ), D( * ), E( * ), TAU( * ), WORK( * )
-        function hetrd!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            tau = similar(A, $elty, max(0,n - 1))
-            d = Vector{$relty}(undef, n)
-            e = Vector{$relty}(undef, max(0,n - 1))
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hetrd), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$relty},
-                    Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                    uplo, n, A, max(1, stride(A, 2)), d, e, tau, work, lwork, info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau, d, e
-        end
-    end
-end
-
-"""
-    hetrd!(uplo, A) -> (A, tau, d, e)
-
-Converts a Hermitian matrix `A` to real-symmetric tridiagonal Hessenberg form.
-If `uplo = U`, the upper half of `A` is stored; if `uplo = L`, the lower half is stored.
-`tau` contains the elementary reflectors of the factorization, `d` contains the
-diagonal and `e` contains the upper/lower diagonal.
-"""
-hetrd!(uplo::AbstractChar, A::AbstractMatrix)
-
-for (orgtr, elty) in
-    ((:dorgtr_,:Float64),
-     (:sorgtr_,:Float32),
-     (:zungtr_,:ComplexF64),
-     (:cungtr_,:ComplexF32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orgtr!(uplo::AbstractChar, A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A, tau)
-            n = checksquare(A)
-            if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
-            end
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgtr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}, Clong),
-                    uplo, n, A,
-                    max(1, stride(A, 2)), tau, work, lwork,
-                    info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A
-        end
-    end
-end
-
-"""
-    orgtr!(uplo, A, tau)
-
-Explicitly finds `Q`, the orthogonal/unitary matrix from `hetrd!`. `uplo`,
-`A`, and `tau` must correspond to the input/output to `hetrd!`.
-"""
-orgtr!(uplo::AbstractChar, A::AbstractMatrix, tau::AbstractVector)
-
-for (ormtr, elty) in
-    ((:dormtr_,:Float64),
-     (:sormtr_,:Float32),
-     (:zunmtr_,:ComplexF64),
-     (:cunmtr_,:ComplexF32))
-    @eval begin
-        # .. Scalar Arguments ..
-        # CHARACTER          side, trans, uplo
-        # INTEGER            info, lda, ldc, lwork, m, n
-        # ..
-        # .. Array Arguments ..
-        # DOUBLE PRECISION   a( lda, * ), c( ldc, * ), tau( * ), work( * )
-        function ormtr!(side::AbstractChar, uplo::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-
-            require_one_based_indexing(A, tau, C)
-            chkstride1(A, tau, C)
-            n = checksquare(A)
-            chkuplo(uplo)
-            mC, nC = size(C, 1), size(C, 2)
-
-            if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
-            end
-            if (side == 'L' && mC != n) || (side == 'R' && nC != n)
-                throw(DimensionMismatch("A and C matrices are not conformable"))
-            end
-
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormtr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong, Clong),
-                    side, uplo, trans, mC, nC,
-                    A, max(1, stride(A, 2)),
-                    tau, C, max(1, stride(C, 2)), work,
-                    lwork, info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-    end
-end
-
-for (gees, gges, gges3, elty) in
-    ((:dgees_,:dgges_,:dgges3_,:Float64),
-     (:sgees_,:sgges_,:sgges3_,:Float32))
-    @eval begin
-        #     .. Scalar Arguments ..
-        #     CHARACTER          JOBVS, SORT
-        #     INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
-        #     ..
-        #     .. Array Arguments ..
-        #     LOGICAL            BWORK( * )
-        #     DOUBLE PRECISION   A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ),
-        #    $                   WR( * )
-        function gees!(jobvs::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            n     = checksquare(A)
-            sdim  = Vector{BlasInt}(undef, 1)
-            wr    = similar(A, $elty, n)
-            wi    = similar(A, $elty, n)
-            vs    = similar(A, $elty, jobvs == 'V' ? n : 0, n)
-            ldvs  = max(size(vs, 1), 1)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gees), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{Cvoid}, Ref{BlasInt}, Clong, Clong),
-                    jobvs, 'N', C_NULL, n,
-                        A, max(1, stride(A, 2)), sdim, wr,
-                        wi, vs, ldvs, work,
-                        lwork, C_NULL, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, vs, iszero(wi) ? wr : complex.(wr, wi)
-        end
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVSL, JOBVSR, SORT
-        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
-        #      $                   VSR( LDVSR, * ), WORK( * )
-        function gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            sdim = BlasInt(0)
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? max(1, n) : 1
-            vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? max(1, n) : 1
-            vsr = similar(A, $elty, ldvsr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{Cvoid},
-                        Ref{BlasInt}, Clong, Clong, Clong),
-                    jobvsl, jobvsr, 'N', C_NULL,
-                    n, A, max(1,stride(A, 2)), B,
-                    max(1,stride(B, 2)), sdim, alphar, alphai,
-                    beta, vsl, ldvsl, vsr,
-                    ldvsr, work, lwork, C_NULL,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, B, complex.(alphar, alphai), beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
-        end
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVSL, JOBVSR, SORT
-        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
-        #      $                   VSR( LDVSR, * ), WORK( * )
-        function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            sdim = BlasInt(0)
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? max(1, n) : 1
-            vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? max(1, n) : 1
-            vsr = similar(A, $elty, ldvsr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges3), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{Cvoid},
-                        Ref{BlasInt}, Clong, Clong, Clong),
-                    jobvsl, jobvsr, 'N', C_NULL,
-                    n, A, max(1,stride(A, 2)), B,
-                    max(1,stride(B, 2)), sdim, alphar, alphai,
-                    beta, vsl, ldvsl, vsr,
-                    ldvsr, work, lwork, C_NULL,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, B, complex.(alphar, alphai), beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
-        end
-    end
-end
-
-for (gees, gges, gges3, elty, relty) in
-    ((:zgees_,:zgges_,:zgges3_,:ComplexF64,:Float64),
-     (:cgees_,:cgges_,:cgges3_,:ComplexF32,:Float32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVS, SORT
-        #       INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * )
-        function gees!(jobvs::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            n     = checksquare(A)
-            sort  = 'N'
-            sdim  = BlasInt(0)
-            w     = similar(A, $elty, n)
-            vs    = similar(A, $elty, jobvs == 'V' ? n : 1, n)
-            ldvs  = max(size(vs, 1), 1)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, n)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gees), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$relty}, Ptr{Cvoid}, Ref{BlasInt}, Clong, Clong),
-                    jobvs, sort, C_NULL, n,
-                        A, max(1, stride(A, 2)), sdim, w,
-                        vs, ldvs, work, lwork,
-                        rwork, C_NULL, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, vs, w
-        end
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVSL, JOBVSR, SORT
-        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #      $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
-        #      $                   WORK( * )
-        function gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            sdim = BlasInt(0)
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? max(1, n) : 1
-            vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? max(1, n) : 1
-            vsr = similar(A, $elty, ldvsr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 8n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{Cvoid},
-                        Ref{BlasInt}, Clong, Clong, Clong),
-                    jobvsl, jobvsr, 'N', C_NULL,
-                    n, A, max(1, stride(A, 2)), B,
-                    max(1, stride(B, 2)), sdim, alpha, beta,
-                    vsl, ldvsl, vsr, ldvsr,
-                    work, lwork, rwork, C_NULL,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, B, alpha, beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
-        end
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVSL, JOBVSR, SORT
-        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #      $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
-        #      $                   WORK( * )
-        function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            sdim = BlasInt(0)
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? max(1, n) : 1
-            vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? max(1, n) : 1
-            vsr = similar(A, $elty, ldvsr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 8n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges3), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{Cvoid},
-                        Ref{BlasInt}, Clong, Clong, Clong),
-                    jobvsl, jobvsr, 'N', C_NULL,
-                    n, A, max(1, stride(A, 2)), B,
-                    max(1, stride(B, 2)), sdim, alpha, beta,
-                    vsl, ldvsl, vsr, ldvsr,
-                    work, lwork, rwork, C_NULL,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, B, alpha, beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
-        end
-    end
-end
-
-"""
-    gees!(jobvs, A) -> (A, vs, w)
-
-Computes the eigenvalues (`jobvs = N`) or the eigenvalues and Schur
-vectors (`jobvs = V`) of matrix `A`. `A` is overwritten by its Schur form.
-
-Returns `A`, `vs` containing the Schur vectors, and `w`, containing the
-eigenvalues.
-"""
-gees!(jobvs::AbstractChar, A::AbstractMatrix)
-
-
-"""
-    gges!(jobvsl, jobvsr, A, B) -> (A, B, alpha, beta, vsl, vsr)
-
-Computes the generalized eigenvalues, generalized Schur form, left Schur
-vectors (`jobsvl = V`), or right Schur vectors (`jobvsr = V`) of `A` and
-`B`.
-
-The generalized eigenvalues are returned in `alpha` and `beta`. The left Schur
-vectors are returned in `vsl` and the right Schur vectors are returned in `vsr`.
-"""
-gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-"""
-    gges3!(jobvsl, jobvsr, A, B) -> (A, B, alpha, beta, vsl, vsr)
-
-Computes the generalized eigenvalues, generalized Schur form, left Schur
-vectors (`jobsvl = V`), or right Schur vectors (`jobvsr = V`) of `A` and
-`B` using a blocked algorithm. This function requires LAPACK 3.6.0.
-
-The generalized eigenvalues are returned in `alpha` and `beta`. The left Schur
-vectors are returned in `vsl` and the right Schur vectors are returned in `vsr`.
-"""
-gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-for (trexc, trsen, tgsen, elty) in
-    ((:dtrexc_, :dtrsen_, :dtgsen_, :Float64),
-     (:strexc_, :strsen_, :stgsen_, :Float32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          COMPQ
-        #       INTEGER            IFST, ILST, INFO, LDQ, LDT, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WORK( * )
-        function trexc!(compq::AbstractChar, ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
-            chkstride1(T, Q)
-            n = checksquare(T)
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trexc), libblastrampoline), Cvoid,
-                  (Ref{UInt8},  Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  compq, n,
-                  T, ldt, Q, ldq,
-                  ifst, ilst,
-                  work, info, 1)
-            chklapackerror(info[])
-            T, Q
-        end
-        trexc!(ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
-            trexc!('V', ifst, ilst, T, Q)
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          COMPQ, JOB
-        #       INTEGER            INFO, LDQ, LDT, LIWORK, LWORK, M, N
-        #       DOUBLE PRECISION   S, SEP
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            SELECT( * )
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), WR( * )
-        function trsen!(job::AbstractChar, compq::AbstractChar, select::AbstractVector{BlasInt},
-                        T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
-            chkstride1(T, Q, select)
-            n = checksquare(T)
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            wr = similar(T, $elty, n)
-            wi = similar(T, $elty, n)
-            m = sum(select)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            select = convert(Array{BlasInt}, select)
-            s = Ref{$elty}(zero($elty))
-            sep = Ref{$elty}(zero($elty))
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($trsen), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ref{$elty},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                    Ptr{BlasInt}, Clong, Clong),
-                    job, compq, select, n,
-                    T, ldt, Q, ldq,
-                    wr, wi, m, s, sep,
-                    work, lwork, iwork, liwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1 # only estimated optimal lwork, liwork
-                    lwork  = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = BlasInt(real(iwork[1]))
-                    resize!(iwork, liwork)
-                end
-            end
-            T, Q, iszero(wi) ? wr : complex.(wr, wi), s[], sep[]
-        end
-        trsen!(select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
-            trsen!('N', 'V', select, T, Q)
-
-        #        .. Scalar Arguments ..
-        #        LOGICAL            WANTQ, WANTZ
-        #        INTEGER            IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK,
-        #       $                   M, N
-        #        DOUBLE PRECISION   PL, PR
-        #        ..
-        #        .. Array Arguments ..
-        #        LOGICAL            SELECT( * )
-        #        INTEGER            IWORK( * )
-        #        DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #       $                   B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ),
-        #       $                   WORK( * ), Z( LDZ, * )
-        #        ..
-        function tgsen!(select::AbstractVector{BlasInt}, S::AbstractMatrix{$elty}, T::AbstractMatrix{$elty},
-                        Q::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
-            chkstride1(select, S, T, Q, Z)
-            n, nt, nq, nz = checksquare(S, T, Q, Z)
-            if n != nt
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and T, ($nt,$nt), must match"))
-            end
-            if n != nq
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Q, ($nq,$nq), must match"))
-            end
-            if n != nz
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Z, ($nz,$nz), must match"))
-            end
-            lds = max(1, stride(S, 2))
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            ldz = max(1, stride(Z, 2))
-            m = sum(select)
-            alphai = similar(T, $elty, n)
-            alphar = similar(T, $elty, n)
-            beta = similar(T, $elty, n)
-            lwork = BlasInt(-1)
-            work = Vector{$elty}(undef, 1)
-            liwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            info = Ref{BlasInt}()
-            select = convert(Array{BlasInt}, select)
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($tgsen), libblastrampoline), Cvoid,
-                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ref{BlasInt}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                        Ptr{BlasInt}),
-                    0, 1, 1, select,
-                    n, S, lds, T,
-                    ldt, alphar, alphai, beta,
-                    Q, ldq, Z, ldz,
-                    m, C_NULL, C_NULL, C_NULL,
-                    work, lwork, iwork, liwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1 # only estimated optimal lwork, liwork
-                    lwork  = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = BlasInt(real(iwork[1]))
-                    resize!(iwork, liwork)
-                end
-            end
-            S, T, complex.(alphar, alphai), beta, Q, Z
-        end
-    end
-end
-
-for (trexc, trsen, tgsen, elty, relty) in
-    ((:ztrexc_, :ztrsen_, :ztgsen_, :ComplexF64, :Float64),
-     (:ctrexc_, :ctrsen_, :ctgsen_, :ComplexF32, :Float32))
-    @eval begin
-        #      .. Scalar Arguments ..
-        #      CHARACTER          COMPQ
-        #      INTEGER            IFST, ILST, INFO, LDQ, LDT, N
-        #      ..
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WORK( * )
-        function trexc!(compq::AbstractChar, ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
-            chkstride1(T, Q)
-            n = checksquare(T)
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trexc), libblastrampoline), Cvoid,
-                  (Ref{UInt8},  Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{BlasInt}, Clong),
-                  compq, n,
-                  T, ldt, Q, ldq,
-                  ifst, ilst,
-                  info,  1)
-            chklapackerror(info[])
-            T, Q
-        end
-        trexc!(ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
-            trexc!('V', ifst, ilst, T, Q)
-
-        #      .. Scalar Arguments ..
-        #      CHARACTER          COMPQ, JOB
-        #      INTEGER            INFO, LDQ, LDT, LWORK, M, N
-        #      DOUBLE PRECISION   S, SEP
-        #      ..
-        #      .. Array Arguments ..
-        #      LOGICAL            SELECT( * )
-        #      COMPLEX            Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * )
-        function trsen!(job::AbstractChar, compq::AbstractChar, select::AbstractVector{BlasInt},
-                        T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
-            chkstride1(select, T, Q)
-            n = checksquare(T)
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            w = similar(T, $elty, n)
-            m = sum(select)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            select = convert(Array{BlasInt}, select)
-            s = Ref{$relty}(zero($relty))
-            sep = Ref{$relty}(zero($relty))
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($trsen), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ref{$relty}, Ref{$relty},
-                    Ptr{$elty}, Ref{BlasInt},
-                    Ptr{BlasInt}, Clong, Clong),
-                    job, compq, select, n,
-                    T, ldt, Q, ldq,
-                    w, m, s, sep,
-                    work, lwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1 # only estimated optimal lwork, liwork
-                    lwork  = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            T, Q, w, s[], sep[]
-        end
-        trsen!(select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
-            trsen!('N', 'V', select, T, Q)
-
-        #        .. Scalar Arguments ..
-        #        LOGICAL            WANTQ, WANTZ
-        #        INTEGER            IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK,
-        #       $                   M, N
-        #        DOUBLE PRECISION   PL, PR
-        #        ..
-        #        .. Array Arguments ..
-        #        LOGICAL            SELECT( * )
-        #        INTEGER            IWORK( * )
-        #        DOUBLE PRECISION   DIF( * )
-        #        COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #       $                   BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * )
-        #        ..
-        function tgsen!(select::AbstractVector{BlasInt}, S::AbstractMatrix{$elty}, T::AbstractMatrix{$elty},
-                        Q::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
-            chkstride1(select, S, T, Q, Z)
-            n, nt, nq, nz = checksquare(S, T, Q, Z)
-            if n != nt
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and T, ($nt,$nt), must match"))
-            end
-            if n != nq
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Q, ($nq,$nq), must match"))
-            end
-            if n != nz
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Z, ($nz,$nz), must match"))
-            end
-            lds = max(1, stride(S, 2))
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            ldz = max(1, stride(Z, 2))
-            m = sum(select)
-            alpha = similar(T, $elty, n)
-            beta = similar(T, $elty, n)
-            lwork = BlasInt(-1)
-            work = Vector{$elty}(undef, 1)
-            liwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            info = Ref{BlasInt}()
-            select = convert(Array{BlasInt}, select)
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($tgsen), libblastrampoline), Cvoid,
-                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ref{BlasInt}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                        Ptr{BlasInt}),
-                    0, 1, 1, select,
-                    n, S, lds, T,
-                    ldt, alpha, beta,
-                    Q, ldq, Z, ldz,
-                    m, C_NULL, C_NULL, C_NULL,
-                    work, lwork, iwork, liwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1 # only estimated optimal lwork, liwork
-                    lwork  = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = BlasInt(real(iwork[1]))
-                    resize!(iwork, liwork)
-                end
-            end
-            S, T, alpha, beta, Q, Z
-        end
-    end
-end
-
-"""
-    trexc!(compq, ifst, ilst, T, Q) -> (T, Q)
-    trexc!(ifst, ilst, T, Q) -> (T, Q)
-
-Reorder the Schur factorization `T` of a matrix, such that the diagonal block
-of `T` with row index `ifst` is moved to row index `ilst`. If `compq = V`, the Schur
-vectors `Q` are reordered. If `compq = N` they are not modified. The 4-arg method
-calls the 5-arg method with `compq = V`.
-"""
-trexc!(compq::AbstractChar, ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix, Q::AbstractMatrix)
-
-"""
-    trsen!(job, compq, select, T, Q) -> (T, Q, w, s, sep)
-    trsen!(select, T, Q) -> (T, Q, w, s, sep)
-
-Reorder the Schur factorization of a matrix and optionally finds reciprocal
-condition numbers. If `job = N`, no condition numbers are found. If `job = E`,
-only the condition number for this cluster of eigenvalues is found. If
-`job = V`, only the condition number for the invariant subspace is found.
-If `job = B` then the condition numbers for the cluster and subspace are
-found. If `compq = V` the Schur vectors `Q` are updated. If `compq = N`
-the Schur vectors are not modified. `select` determines which
-eigenvalues are in the cluster. The 3-arg method calls the 5-arg method
-with `job = N` and `compq = V`.
-
-Returns `T`, `Q`, reordered eigenvalues in `w`, the condition number of the
-cluster of eigenvalues `s`, and the condition number of the invariant subspace
-`sep`.
-"""
-trsen!(compq::AbstractChar, job::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix, Q::AbstractMatrix)
-
-"""
-    tgsen!(select, S, T, Q, Z) -> (S, T, alpha, beta, Q, Z)
-
-Reorders the vectors of a generalized Schur decomposition. `select` specifies
-the eigenvalues in each cluster.
-"""
-tgsen!(select::AbstractVector{BlasInt}, S::AbstractMatrix, T::AbstractMatrix, Q::AbstractMatrix, Z::AbstractMatrix)
-
-for (fn, elty, relty) in ((:dtrsyl_, :Float64, :Float64),
-                   (:strsyl_, :Float32, :Float32),
-                   (:ztrsyl_, :ComplexF64, :Float64),
-                   (:ctrsyl_, :ComplexF32, :Float32))
-    @eval begin
-        function trsyl!(transa::AbstractChar, transb::AbstractChar, A::AbstractMatrix{$elty},
-                        B::AbstractMatrix{$elty}, C::AbstractMatrix{$elty}, isgn::Int=1)
-            require_one_based_indexing(A, B, C)
-            chkstride1(A, B, C)
-            m, n = checksquare(A), checksquare(B)
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            m1, n1 = size(C)
-            if m != m1 || n != n1
-                throw(DimensionMismatch("dimensions of A, ($m,$n), and C, ($m1,$n1), must match"))
-            end
-            ldc = max(1, stride(C, 2))
-            scale = Ref{$relty}()
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($fn), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
-                transa, transb, isgn, m, n,
-                A, lda, B, ldb, C, ldc,
-                scale, info, 1, 1)
-            chklapackerror(info[])
-            C, scale[]
-        end
-    end
-end
-
-"""
-    trsyl!(transa, transb, A, B, C, isgn=1) -> (C, scale)
-
-Solves the Sylvester matrix equation `A * X +/- X * B = scale*C` where `A` and
-`B` are both quasi-upper triangular. If `transa = N`, `A` is not modified.
-If `transa = T`, `A` is transposed. If `transa = C`, `A` is conjugate
-transposed. Similarly for `transb` and `B`. If `isgn = 1`, the equation
-`A * X + X * B = scale * C` is solved. If `isgn = -1`, the equation
-`A * X - X * B = scale * C` is solved.
-
-Returns `X` (overwriting `C`) and `scale`.
-"""
-trsyl!(transa::AbstractChar, transb::AbstractChar, A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, isgn::Int=1)
-
-end # module
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
deleted file mode 100644
index b133741611adc..0000000000000
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ /dev/null
@@ -1,314 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## This file contains libblastrampoline-specific APIs
-
-# Keep these in sync with `src/libblastrampoline_internal.h`
-struct lbt_library_info_t
-    libname::Cstring
-    handle::Ptr{Cvoid}
-    suffix::Cstring
-    active_forwards::Ptr{UInt8}
-    interface::Int32
-    complex_retstyle::Int32
-    f2c::Int32
-    cblas::Int32
-end
-const LBT_INTERFACE_LP64    = 32
-const LBT_INTERFACE_ILP64   = 64
-const LBT_INTERFACE_UNKNOWN = -1
-const LBT_INTERFACE_MAP = Dict(
-    LBT_INTERFACE_LP64    => :lp64,
-    LBT_INTERFACE_ILP64   => :ilp64,
-    LBT_INTERFACE_UNKNOWN => :unknown,
-)
-const LBT_INV_INTERFACE_MAP = Dict(v => k for (k, v) in LBT_INTERFACE_MAP)
-
-const LBT_F2C_PLAIN         =  0
-const LBT_F2C_REQUIRED      =  1
-const LBT_F2C_UNKNOWN       = -1
-const LBT_F2C_MAP = Dict(
-    LBT_F2C_PLAIN    => :plain,
-    LBT_F2C_REQUIRED => :required,
-    LBT_F2C_UNKNOWN  => :unknown,
-)
-const LBT_INV_F2C_MAP = Dict(v => k for (k, v) in LBT_F2C_MAP)
-
-const LBT_COMPLEX_RETSTYLE_NORMAL   =  0
-const LBT_COMPLEX_RETSTYLE_ARGUMENT =  1
-const LBT_COMPLEX_RETSTYLE_UNKNOWN  = -1
-const LBT_COMPLEX_RETSTYLE_MAP = Dict(
-    LBT_COMPLEX_RETSTYLE_NORMAL   => :normal,
-    LBT_COMPLEX_RETSTYLE_ARGUMENT => :argument,
-    LBT_COMPLEX_RETSTYLE_UNKNOWN  => :unknown,
-)
-const LBT_INV_COMPLEX_RETSTYLE_MAP = Dict(v => k for (k, v) in LBT_COMPLEX_RETSTYLE_MAP)
-
-const LBT_CBLAS_CONFORMANT =  0
-const LBT_CBLAS_DIVERGENT  =  1
-const LBT_CBLAS_UNKNOWN    = -1
-const LBT_CBLAS_MAP = Dict(
-    LBT_CBLAS_CONFORMANT => :conformant,
-    LBT_CBLAS_DIVERGENT  => :divergent,
-    LBT_CBLAS_UNKNOWN    => :unknown,
-)
-const LBT_INV_CBLAS_MAP = Dict(v => k for (k, v) in LBT_CBLAS_MAP)
-
-struct LBTLibraryInfo
-    libname::String
-    handle::Ptr{Cvoid}
-    suffix::String
-    active_forwards::Vector{UInt8}
-    interface::Symbol
-    complex_retstyle::Symbol
-    f2c::Symbol
-    cblas::Symbol
-
-    function LBTLibraryInfo(lib_info::lbt_library_info_t, num_exported_symbols::UInt32)
-        return new(
-            unsafe_string(lib_info.libname),
-            lib_info.handle,
-            unsafe_string(lib_info.suffix),
-            unsafe_wrap(Vector{UInt8}, lib_info.active_forwards, div(num_exported_symbols,8)+1),
-            LBT_INTERFACE_MAP[lib_info.interface],
-            LBT_COMPLEX_RETSTYLE_MAP[lib_info.complex_retstyle],
-            LBT_F2C_MAP[lib_info.f2c],
-            LBT_CBLAS_MAP[lib_info.cblas],
-        )
-    end
-end
-
-struct lbt_config_t
-    loaded_libs::Ptr{Ptr{lbt_library_info_t}}
-    build_flags::UInt32
-    exported_symbols::Ptr{Cstring}
-    num_exported_symbols::UInt32
-end
-const LBT_BUILDFLAGS_DEEPBINDLESS     = 0x01
-const LBT_BUILDFLAGS_F2C_CAPABLE      = 0x02
-const LBT_BUILDFLAGS_CBLAS_DIVERGENCE = 0x04
-const LBT_BUILDFLAGS_COMPLEX_RETSTYLE = 0x08
-const LBT_BUILDFLAGS_SYMBOL_TRIMMING  = 0x10
-const LBT_BUILDFLAGS_MAP = Dict(
-    LBT_BUILDFLAGS_DEEPBINDLESS => :deepbindless,
-    LBT_BUILDFLAGS_F2C_CAPABLE => :f2c_capable,
-    LBT_BUILDFLAGS_CBLAS_DIVERGENCE => :cblas_divergence,
-    LBT_BUILDFLAGS_COMPLEX_RETSTYLE => :complex_retstyle,
-    LBT_BUILDFLAGS_SYMBOL_TRIMMING  => :symbol_trimming,
-)
-
-struct LBTConfig
-    loaded_libs::Vector{LBTLibraryInfo}
-    build_flags::Vector{Symbol}
-    exported_symbols::Vector{String}
-
-    function LBTConfig(config::lbt_config_t)
-        # Decode OR'ed flags into a list of names
-        build_flag_names = Symbol[]
-        for (flag, name) in LBT_BUILDFLAGS_MAP
-            if config.build_flags & flag != 0x00
-                push!(build_flag_names, name)
-            end
-        end
-
-        # Load all exported symbol names
-        exported_symbols = String[]
-        for sym_idx in 1:config.num_exported_symbols
-            str_ptr = unsafe_load(config.exported_symbols, sym_idx)
-            if str_ptr != C_NULL
-                push!(exported_symbols, unsafe_string(str_ptr))
-            else
-                @error("NULL string in lbt_config.exported_symbols[$(sym_idx)]")
-            end
-        end
-
-        # Unpack library info structures
-        libs = LBTLibraryInfo[]
-        idx = 1
-        lib_ptr = unsafe_load(config.loaded_libs, idx)
-        while lib_ptr != C_NULL
-            push!(libs, LBTLibraryInfo(unsafe_load(lib_ptr), config.num_exported_symbols))
-
-            idx += 1
-            lib_ptr = unsafe_load(config.loaded_libs, idx)
-        end
-        return new(
-            libs,
-            build_flag_names,
-            exported_symbols,
-        )
-    end
-end
-
-Base.show(io::IO, lbt::LBTLibraryInfo) = print(io, "LBTLibraryInfo(", basename(lbt.libname), ", ", lbt.interface, ")")
-function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, lbt::LBTLibraryInfo)
-    summary(io, lbt); println(io)
-    println(io, "├ Library: ", basename(lbt.libname))
-    println(io, "├ Interface: ", lbt.interface)
-    println(io, "├ Complex return style: ", lbt.complex_retstyle)
-    println(io, "├ F2C: ", lbt.f2c)
-      print(io, "└ CBLAS: ", lbt.cblas)
-end
-
-function Base.show(io::IO, lbt::LBTConfig)
-    if length(lbt.loaded_libs) <= 3
-        print(io, "LBTConfig(")
-        gen = (string("[", uppercase(string(l.interface)), "] ",
-            basename(l.libname)) for l in lbt.loaded_libs)
-        print(io, join(gen, ", "))
-        print(io, ")")
-    else
-        print(io, "LBTConfig(...)")
-    end
-end
-function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, lbt::LBTConfig)
-    summary(io, lbt); println(io)
-    println(io, "Libraries: ")
-    for (i,l) in enumerate(lbt.loaded_libs)
-        char = i == length(lbt.loaded_libs) ? "└" : "├"
-        interface_str = if l.interface === :ilp64
-            "ILP64"
-        elseif l.interface === :lp64
-            " LP64"
-        else
-            "UNKWN"
-        end
-        print(io, char, " [", interface_str,"] ", basename(l.libname))
-        i !== length(lbt.loaded_libs) && println()
-    end
-end
-
-mutable struct ConfigCache
-    @atomic config::Union{Nothing,LBTConfig}
-    lock::ReentrantLock
-end
-
-# In the event that users want to call `lbt_get_config()` multiple times (e.g. for
-# runtime checks of which BLAS vendor is providing a symbol), let's cache the value
-# and clear it only when someone calls something that would cause it to change.
-const _CACHED_CONFIG = ConfigCache(nothing, ReentrantLock())
-
-function lbt_get_config()
-    config = @atomic :acquire _CACHED_CONFIG.config
-    config === nothing || return config
-    return lock(_CACHED_CONFIG.lock) do
-        local config = @atomic :monotonic _CACHED_CONFIG.config
-        config === nothing || return config
-        config_ptr = ccall((:lbt_get_config, libblastrampoline), Ptr{lbt_config_t}, ())
-        @atomic :release _CACHED_CONFIG.config = LBTConfig(unsafe_load(config_ptr))
-    end
-end
-
-function _clear_config_with(f)
-    lock(_CACHED_CONFIG.lock) do
-        @atomic :release _CACHED_CONFIG.config = nothing
-        f()
-    end
-end
-
-function lbt_get_num_threads()
-    return ccall((:lbt_get_num_threads, libblastrampoline), Int32, ())
-end
-
-function lbt_set_num_threads(nthreads)
-    return ccall((:lbt_set_num_threads, libblastrampoline), Cvoid, (Int32,), nthreads)
-end
-
-function lbt_forward(path::AbstractString; clear::Bool = false, verbose::Bool = false, suffix_hint::Union{String,Nothing} = nothing)
-    _clear_config_with() do
-        return ccall((:lbt_forward, libblastrampoline), Int32, (Cstring, Int32, Int32, Cstring),
-                     path, clear ? 1 : 0, verbose ? 1 : 0, something(suffix_hint, C_NULL))
-    end
-end
-
-function lbt_set_default_func(addr)
-    _clear_config_with() do
-        return ccall((:lbt_set_default_func, libblastrampoline), Cvoid, (Ptr{Cvoid},), addr)
-    end
-end
-
-function lbt_get_default_func()
-    return ccall((:lbt_get_default_func, libblastrampoline), Ptr{Cvoid}, ())
-end
-
-"""
-    lbt_find_backing_library(symbol_name, interface; config::LBTConfig = lbt_get_config())
-
-Return the `LBTLibraryInfo` that represents the backing library for the given symbol
-exported from libblastrampoline.  This allows us to discover which library will service
-a particular BLAS call from Julia code.  This method returns `nothing` if either of the
-following conditions are met:
-
- * No loaded library exports the desired symbol (the default function will be called)
- * The symbol was set via `lbt_set_forward()`, which does not track library provenance.
-
-If the given `symbol_name` is not contained within the list of exported symbols, an
-`ArgumentError` will be thrown.
-"""
-function lbt_find_backing_library(symbol_name, interface::Symbol;
-                                  config::LBTConfig = lbt_get_config())
-    if interface ∉ (:ilp64, :lp64)
-        throw(ArgumentError("Invalid interface specification: '$(interface)'"))
-    end
-    symbol_idx = findfirst(s -> s == symbol_name, config.exported_symbols)
-    if symbol_idx === nothing
-        throw(ArgumentError("Invalid exported symbol name '$(symbol_name)'"))
-    end
-    # Convert to zero-indexed
-    symbol_idx -= 1
-
-    forward_byte_offset = div(symbol_idx, 8)
-    forward_byte_mask = 1 << mod(symbol_idx, 8)
-    for lib in filter(l -> l.interface == interface, config.loaded_libs)
-        if lib.active_forwards[forward_byte_offset+1] & forward_byte_mask != 0x00
-            return lib
-        end
-    end
-
-    # No backing library was found
-    return nothing
-end
-
-
-## NOTE: Manually setting forwards is referred to as the 'footgun API'.  It allows truly
-## bizarre and complex setups to be created.  If you run into strange errors while using
-## it, the first thing you should ask yourself is whether you've set things up properly.
-function lbt_set_forward(symbol_name, addr, interface,
-                         complex_retstyle = LBT_COMPLEX_RETSTYLE_NORMAL,
-                         f2c = LBT_F2C_PLAIN; verbose::Bool = false)
-    _clear_config_with() do
-        return ccall(
-            (:lbt_set_forward, libblastrampoline),
-            Int32,
-            (Cstring, Ptr{Cvoid}, Int32, Int32, Int32, Int32),
-            string(symbol_name),
-            addr,
-            Int32(interface),
-            Int32(complex_retstyle),
-            Int32(f2c),
-            verbose ? Int32(1) : Int32(0),
-        )
-    end
-end
-function lbt_set_forward(symbol_name, addr, interface::Symbol,
-                         complex_retstyle::Symbol = :normal,
-                         f2c::Symbol = :plain; kwargs...)
-    return lbt_set_forward(symbol_name, addr,
-                           LBT_INV_INTERFACE_MAP[interface],
-                           LBT_INV_COMPLEX_RETSTYLE_MAP[complex_retstyle],
-                           LBT_INV_F2C_MAP[f2c];
-                           kwargs...)
-end
-
-function lbt_get_forward(symbol_name, interface, f2c = LBT_F2C_PLAIN)
-    return ccall(
-        (:lbt_get_forward, libblastrampoline),
-        Ptr{Cvoid},
-        (Cstring, Int32, Int32),
-        string(symbol_name),
-        Int32(interface),
-        Int32(f2c),
-    )
-end
-function lbt_get_forward(symbol_name, interface::Symbol, f2c::Symbol = :plain)
-    return lbt_get_forward(symbol_name, LBT_INV_INTERFACE_MAP[interface], LBT_INV_F2C_MAP[f2c])
-end
diff --git a/stdlib/LinearAlgebra/src/ldlt.jl b/stdlib/LinearAlgebra/src/ldlt.jl
deleted file mode 100644
index d3d6234961c44..0000000000000
--- a/stdlib/LinearAlgebra/src/ldlt.jl
+++ /dev/null
@@ -1,224 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    LDLt <: Factorization
-
-Matrix factorization type of the `LDLt` factorization of a real [`SymTridiagonal`](@ref)
-matrix `S` such that `S = L*Diagonal(d)*L'`, where `L` is a [`UnitLowerTriangular`](@ref)
-matrix and `d` is a vector. The main use of an `LDLt` factorization `F = ldlt(S)`
-is to solve the linear system of equations `Sx = b` with `F\\b`. This is the
-return type of [`ldlt`](@ref), the corresponding matrix factorization function.
-
-The individual components of the factorization `F::LDLt` can be accessed via `getproperty`:
-
-| Component | Description                                 |
-|:---------:|:--------------------------------------------|
-| `F.L`     | `L` (unit lower triangular) part of `LDLt`  |
-| `F.D`     | `D` (diagonal) part of `LDLt`               |
-| `F.Lt`    | `Lt` (unit upper triangular) part of `LDLt` |
-| `F.d`     | diagonal values of `D` as a `Vector`        |
-
-# Examples
-```jldoctest
-julia> S = SymTridiagonal([3., 4., 5.], [1., 2.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 3.0  1.0   ⋅
- 1.0  4.0  2.0
-  ⋅   2.0  5.0
-
-julia> F = ldlt(S)
-LDLt{Float64, SymTridiagonal{Float64, Vector{Float64}}}
-L factor:
-3×3 UnitLowerTriangular{Float64, SymTridiagonal{Float64, Vector{Float64}}}:
- 1.0        ⋅         ⋅
- 0.333333  1.0        ⋅
- 0.0       0.545455  1.0
-D factor:
-3×3 Diagonal{Float64, Vector{Float64}}:
- 3.0   ⋅        ⋅
-  ⋅   3.66667   ⋅
-  ⋅    ⋅       3.90909
-```
-"""
-struct LDLt{T,S<:AbstractMatrix{T}} <: Factorization{T}
-    data::S
-
-    function LDLt{T,S}(data) where {T,S<:AbstractMatrix{T}}
-        require_one_based_indexing(data)
-        new{T,S}(data)
-    end
-end
-LDLt(data::AbstractMatrix{T}) where {T} = LDLt{T,typeof(data)}(data)
-LDLt{T}(data::AbstractMatrix) where {T} = LDLt(convert(AbstractMatrix{T}, data)::AbstractMatrix{T})
-
-size(S::LDLt) = size(S.data)
-size(S::LDLt, i::Integer) = size(S.data, i)
-
-LDLt{T,S}(F::LDLt{T,S}) where {T,S<:AbstractMatrix{T}} = F
-LDLt{T,S}(F::LDLt) where {T,S<:AbstractMatrix{T}} = LDLt{T,S}(convert(S, F.data)::S)
-LDLt{T}(F::LDLt{T}) where {T} = F
-LDLt{T}(F::LDLt) where {T} = LDLt(convert(AbstractMatrix{T}, F.data)::AbstractMatrix{T})
-
-Factorization{T}(F::LDLt{T}) where {T} = F
-Factorization{T}(F::LDLt) where {T} = LDLt{T}(F)
-
-function getproperty(F::LDLt{<:Any, <:SymTridiagonal}, d::Symbol)
-    Fdata = getfield(F, :data)
-    if d === :d
-        return Fdata.dv
-    elseif d === :D
-        return Diagonal(Fdata.dv)
-    elseif d === :L
-        return UnitLowerTriangular(Fdata)
-    elseif d === :Lt
-        return UnitUpperTriangular(Fdata)
-    else
-        return getfield(F, d)
-    end
-end
-
-adjoint(F::LDLt{<:Real,<:SymTridiagonal}) = F
-adjoint(F::LDLt) = LDLt(copy(adjoint(F.data)))
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LDLt)
-    summary(io, F); println(io)
-    println(io, "L factor:")
-    show(io, mime, F.L)
-    println(io, "\nD factor:")
-    show(io, mime, F.D)
-end
-
-# SymTridiagonal
-"""
-    ldlt!(S::SymTridiagonal) -> LDLt
-
-Same as [`ldlt`](@ref), but saves space by overwriting the input `S`, instead of creating a copy.
-
-# Examples
-```jldoctest
-julia> S = SymTridiagonal([3., 4., 5.], [1., 2.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 3.0  1.0   ⋅
- 1.0  4.0  2.0
-  ⋅   2.0  5.0
-
-julia> ldltS = ldlt!(S);
-
-julia> ldltS === S
-false
-
-julia> S
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 3.0       0.333333   ⋅
- 0.333333  3.66667   0.545455
-  ⋅        0.545455  3.90909
-```
-"""
-function ldlt!(S::SymTridiagonal{T,V}) where {T,V}
-    n = size(S,1)
-    d = S.dv
-    e = S.ev
-    @inbounds for i in 1:n-1
-        iszero(d[i]) && throw(ZeroPivotException(i))
-        e[i] /= d[i]
-        d[i+1] -= e[i]^2*d[i]
-    end
-    return LDLt{T,SymTridiagonal{T,V}}(S)
-end
-
-"""
-    ldlt(S::SymTridiagonal) -> LDLt
-
-Compute an `LDLt` (i.e., ``LDL^T``) factorization of the real symmetric tridiagonal matrix `S` such that `S = L*Diagonal(d)*L'`
-where `L` is a unit lower triangular matrix and `d` is a vector. The main use of an `LDLt`
-factorization `F = ldlt(S)` is to solve the linear system of equations `Sx = b` with `F\\b`.
-
-See also [`bunchkaufman`](@ref) for a similar, but pivoted, factorization of arbitrary symmetric or Hermitian matrices.
-
-# Examples
-```jldoctest
-julia> S = SymTridiagonal([3., 4., 5.], [1., 2.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 3.0  1.0   ⋅
- 1.0  4.0  2.0
-  ⋅   2.0  5.0
-
-julia> ldltS = ldlt(S);
-
-julia> b = [6., 7., 8.];
-
-julia> ldltS \\ b
-3-element Vector{Float64}:
- 1.7906976744186047
- 0.627906976744186
- 1.3488372093023255
-
-julia> S \\ b
-3-element Vector{Float64}:
- 1.7906976744186047
- 0.627906976744186
- 1.3488372093023255
-```
-"""
-function ldlt(M::SymTridiagonal{T}; shift::Number=false) where T
-    S = typeof((zero(T)+shift)/one(T))
-    Mₛ = SymTridiagonal{S}(copymutable_oftype(M.dv, S), copymutable_oftype(M.ev, S))
-    if !iszero(shift)
-        Mₛ.dv .+= shift
-    end
-    return ldlt!(Mₛ)
-end
-
-factorize(S::SymTridiagonal) = ldlt(S)
-
-function ldiv!(S::LDLt{<:Any,<:SymTridiagonal}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    n, nrhs = size(B, 1), size(B, 2)
-    if size(S,1) != n
-        throw(DimensionMismatch("Matrix has dimensions $(size(S)) but right hand side has first dimension $n"))
-    end
-    d = S.data.dv
-    l = S.data.ev
-    @inbounds begin
-        for i = 2:n
-            li1 = l[i-1]
-            @simd for j = 1:nrhs
-                B[i,j] -= li1*B[i-1,j]
-            end
-        end
-        dn = d[n]
-        @simd for j = 1:nrhs
-            B[n,j] /= dn
-        end
-        for i = n-1:-1:1
-            di = d[i]
-            li = l[i]
-            @simd for j = 1:nrhs
-                B[i,j] /= di
-                B[i,j] -= li*B[i+1,j]
-            end
-        end
-    end
-    return B
-end
-
-rdiv!(B::AbstractVecOrMat, S::LDLt{<:Any,<:SymTridiagonal}) =
-    transpose(ldiv!(S, transpose(B)))
-
-function logabsdet(F::LDLt{<:Any,<:SymTridiagonal})
-    it = (F.data[i,i] for i in 1:size(F, 1))
-    return sum(log∘abs, it), prod(sign, it)
-end
-
-# Conversion methods
-function SymTridiagonal(F::LDLt{<:Any, <:SymTridiagonal})
-    e = copy(F.data.ev)
-    d = copy(F.data.dv)
-    e .*= d[1:end-1]
-    d[2:end] += e .* F.data.ev
-    SymTridiagonal(d, e)
-end
-AbstractMatrix(F::LDLt) = SymTridiagonal(F)
-AbstractArray(F::LDLt) = AbstractMatrix(F)
-Matrix(F::LDLt) = Array(AbstractArray(F))
-Array(F::LDLt) = Matrix(F)
diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl
deleted file mode 100644
index 07d918c4374a5..0000000000000
--- a/stdlib/LinearAlgebra/src/lq.jl
+++ /dev/null
@@ -1,203 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# LQ Factorizations
-"""
-    LQ <: Factorization
-
-Matrix factorization type of the `LQ` factorization of a matrix `A`. The `LQ`
-decomposition is the [`QR`](@ref) decomposition of `transpose(A)`. This is the return
-type of [`lq`](@ref), the corresponding matrix factorization function.
-
-If `S::LQ` is the factorization object, the lower triangular component can be
-obtained via `S.L`, and the orthogonal/unitary component via `S.Q`, such that
-`A ≈ S.L*S.Q`.
-
-Iterating the decomposition produces the components `S.L` and `S.Q`.
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> S = lq(A)
-LQ{Float64, Matrix{Float64}, Vector{Float64}}
-L factor:
-2×2 Matrix{Float64}:
- -8.60233   0.0
-  4.41741  -0.697486
-Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}
-
-julia> S.L * S.Q
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> l, q = S; # destructuring via iteration
-
-julia> l == S.L &&  q == S.Q
-true
-```
-"""
-struct LQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: Factorization{T}
-    factors::S
-    τ::C
-
-    function LQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
-        require_one_based_indexing(factors)
-        new{T,S,C}(factors, τ)
-    end
-end
-LQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
-    LQ{T,typeof(factors),typeof(τ)}(factors, τ)
-LQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
-    LQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(LQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           LQ{T,S,typeof(τ)}(factors, τ), false)
-
-# iteration for destructuring into components
-Base.iterate(S::LQ) = (S.L, Val(:Q))
-Base.iterate(S::LQ, ::Val{:Q}) = (S.Q, Val(:done))
-Base.iterate(S::LQ, ::Val{:done}) = nothing
-
-"""
-    lq!(A) -> LQ
-
-Compute the [`LQ`](@ref) factorization of `A`, using the input
-matrix as a workspace. See also [`lq`](@ref).
-"""
-lq!(A::StridedMatrix{<:BlasFloat}) = LQ(LAPACK.gelqf!(A)...)
-
-"""
-    lq(A) -> S::LQ
-
-Compute the LQ decomposition of `A`. The decomposition's lower triangular
-component can be obtained from the [`LQ`](@ref) object `S` via `S.L`, and the
-orthogonal/unitary component via `S.Q`, such that `A ≈ S.L*S.Q`.
-
-Iterating the decomposition produces the components `S.L` and `S.Q`.
-
-The LQ decomposition is the QR decomposition of `transpose(A)`, and it is useful
-in order to compute the minimum-norm solution `lq(A) \\ b` to an underdetermined
-system of equations (`A` has more columns than rows, but has full row rank).
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> S = lq(A)
-LQ{Float64, Matrix{Float64}, Vector{Float64}}
-L factor:
-2×2 Matrix{Float64}:
- -8.60233   0.0
-  4.41741  -0.697486
-Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}
-
-julia> S.L * S.Q
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> l, q = S; # destructuring via iteration
-
-julia> l == S.L &&  q == S.Q
-true
-```
-"""
-lq(A::AbstractMatrix{T}) where {T} = lq!(copy_similar(A, lq_eltype(T)))
-lq(x::Number) = lq!(fill(convert(lq_eltype(typeof(x)), x), 1, 1))
-
-lq_eltype(::Type{T}) where {T} = typeof(zero(T) / sqrt(abs2(one(T))))
-
-copy(A::LQ) = LQ(copy(A.factors), copy(A.τ))
-
-LQ{T}(A::LQ) where {T} = LQ(convert(AbstractMatrix{T}, A.factors), convert(Vector{T}, A.τ))
-Factorization{T}(A::LQ) where {T} = LQ{T}(A)
-
-AbstractMatrix(A::LQ) = A.L*A.Q
-AbstractArray(A::LQ) = AbstractMatrix(A)
-Matrix(A::LQ) = Array(AbstractArray(A))
-Array(A::LQ) = Matrix(A)
-
-transpose(F::LQ{<:Real}) = F'
-transpose(::LQ) =
-    throw(ArgumentError("transpose of LQ decomposition is not supported, consider using adjoint"))
-
-Base.copy(F::AdjointFactorization{T,<:LQ{T}}) where {T} =
-    QR{T,typeof(F.parent.factors),typeof(F.parent.τ)}(copy(adjoint(F.parent.factors)), copy(F.parent.τ))
-
-function getproperty(F::LQ, d::Symbol)
-    m, n = size(F)
-    if d === :L
-        return tril!(getfield(F, :factors)[1:m, 1:min(m,n)])
-    elseif d === :Q
-        return LQPackedQ(getfield(F, :factors), getfield(F, :τ))
-    else
-        return getfield(F, d)
-    end
-end
-
-Base.propertynames(F::LQ, private::Bool=false) =
-    (:L, :Q, (private ? fieldnames(typeof(F)) : ())...)
-
-# getindex(A::LQPackedQ, i::Integer, j::Integer) =
-#     lmul!(A, setindex!(zeros(eltype(A), size(A, 2)), 1, j))[i]
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LQ)
-    summary(io, F); println(io)
-    println(io, "L factor:")
-    show(io, mime, F.L)
-    print(io, "\nQ factor: ")
-    show(io, mime, F.Q)
-end
-
-size(F::LQ, dim::Integer) = size(getfield(F, :factors), dim)
-size(F::LQ)               = size(getfield(F, :factors))
-
-## Multiplication by LQ
-function lmul!(A::LQ, B::AbstractVecOrMat)
-    lmul!(LowerTriangular(A.L), view(lmul!(A.Q, B), 1:size(A,1), axes(B,2)))
-    return B
-end
-function *(A::LQ{TA}, B::AbstractVecOrMat{TB}) where {TA,TB}
-    TAB = promote_type(TA, TB)
-    _cut_B(lmul!(convert(Factorization{TAB}, A), copy_similar(B, TAB)), 1:size(A,1))
-end
-
-# With a real lhs and complex rhs with the same precision, we can reinterpret
-# the complex rhs as a real rhs with twice the number of columns
-function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
-    require_one_based_indexing(B)
-    X = zeros(T, size(F,2), 2*size(B,2))
-    X[1:size(B,1), 1:size(B,2)] .= real.(B)
-    X[1:size(B,1), size(B,2)+1:size(X,2)] .= imag.(B)
-    ldiv!(F, X)
-    return reshape(copy(reinterpret(Complex{T}, copy(transpose(reshape(X, div(length(X), 2), 2))))),
-                           isa(B, AbstractVector) ? (size(F,2),) : (size(F,2), size(B,2)))
-end
-
-
-function ldiv!(A::LQ, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    m, n = size(A)
-    m ≤ n || throw(DimensionMismatch("LQ solver does not support overdetermined systems (more rows than columns)"))
-
-    ldiv!(LowerTriangular(A.L), view(B, 1:size(A,1), axes(B,2)))
-    return lmul!(adjoint(A.Q), B)
-end
-
-function ldiv!(Fadj::AdjointFactorization{<:Any,<:LQ}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    m, n = size(Fadj)
-    m >= n || throw(DimensionMismatch("solver does not support underdetermined systems (more columns than rows)"))
-
-    F = parent(Fadj)
-    lmul!(F.Q, B)
-    ldiv!(UpperTriangular(adjoint(F.L)), view(B, 1:size(F,1), axes(B,2)))
-    return B
-end
diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl
deleted file mode 100644
index 5d69090f27e44..0000000000000
--- a/stdlib/LinearAlgebra/src/lu.jl
+++ /dev/null
@@ -1,758 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-####################
-# LU Factorization #
-####################
-"""
-    LU <: Factorization
-
-Matrix factorization type of the `LU` factorization of a square matrix `A`. This
-is the return type of [`lu`](@ref), the corresponding matrix factorization function.
-
-The individual components of the factorization `F::LU` can be accessed via [`getproperty`](@ref):
-
-| Component | Description                              |
-|:----------|:-----------------------------------------|
-| `F.L`     | `L` (unit lower triangular) part of `LU` |
-| `F.U`     | `U` (upper triangular) part of `LU`      |
-| `F.p`     | (right) permutation `Vector`             |
-| `F.P`     | (right) permutation `Matrix`             |
-
-Iterating the factorization produces the components `F.L`, `F.U`, and `F.p`.
-
-# Examples
-```jldoctest
-julia> A = [4 3; 6 3]
-2×2 Matrix{Int64}:
- 4  3
- 6  3
-
-julia> F = lu(A)
-LU{Float64, Matrix{Float64}, Vector{Int64}}
-L factor:
-2×2 Matrix{Float64}:
- 1.0       0.0
- 0.666667  1.0
-U factor:
-2×2 Matrix{Float64}:
- 6.0  3.0
- 0.0  1.0
-
-julia> F.L * F.U == A[F.p, :]
-true
-
-julia> l, u, p = lu(A); # destructuring via iteration
-
-julia> l == F.L && u == F.U && p == F.p
-true
-```
-"""
-struct LU{T,S<:AbstractMatrix{T},P<:AbstractVector{<:Integer}} <: Factorization{T}
-    factors::S
-    ipiv::P
-    info::BlasInt
-
-    function LU{T,S,P}(factors, ipiv, info) where {T, S<:AbstractMatrix{T}, P<:AbstractVector{<:Integer}}
-        require_one_based_indexing(factors)
-        new{T,S,P}(factors, ipiv, info)
-    end
-end
-LU(factors::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer}, info::BlasInt) where {T} =
-    LU{T,typeof(factors),typeof(ipiv)}(factors, ipiv, info)
-LU{T}(factors::AbstractMatrix, ipiv::AbstractVector{<:Integer}, info::Integer) where {T} =
-    LU(convert(AbstractMatrix{T}, factors), ipiv, BlasInt(info))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(LU{T,S}(factors::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer},
-                   info::BlasInt) where {T,S},
-           LU{T,S,typeof(ipiv)}(factors, ipiv, info), false)
-
-# iteration for destructuring into components
-Base.iterate(S::LU) = (S.L, Val(:U))
-Base.iterate(S::LU, ::Val{:U}) = (S.U, Val(:p))
-Base.iterate(S::LU, ::Val{:p}) = (S.p, Val(:done))
-Base.iterate(S::LU, ::Val{:done}) = nothing
-
-# LU prefers transpose over adjoint in the real case, override the generic fallback
-adjoint(F::LU{<:Real}) = TransposeFactorization(F)
-transpose(F::LU{<:Real}) = TransposeFactorization(F)
-
-# the following method is meant to catch calls to lu!(A::LAPACKArray) without a pivoting stategy
-lu!(A::StridedMatrix{<:BlasFloat}; check::Bool = true) = lu!(A, RowMaximum(); check=check)
-function lu!(A::StridedMatrix{T}, ::RowMaximum; check::Bool = true) where {T<:BlasFloat}
-    lpt = LAPACK.getrf!(A; check)
-    check && checknonsingular(lpt[3])
-    return LU{T,typeof(lpt[1]),typeof(lpt[2])}(lpt[1], lpt[2], lpt[3])
-end
-function lu!(A::HermOrSym{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T); check::Bool = true) where {T}
-    copytri!(A.data, A.uplo, isa(A, Hermitian))
-    lu!(A.data, pivot; check = check)
-end
-# for backward compatibility
-# TODO: remove towards Julia v2
-@deprecate lu!(A::Union{StridedMatrix,HermOrSym,Tridiagonal}, ::Val{true}; check::Bool = true) lu!(A, RowMaximum(); check=check)
-@deprecate lu!(A::Union{StridedMatrix,HermOrSym,Tridiagonal}, ::Val{false}; check::Bool = true) lu!(A, NoPivot(); check=check)
-
-"""
-    lu!(A, pivot = RowMaximum(); check = true) -> LU
-
-`lu!` is the same as [`lu`](@ref), but saves space by overwriting the
-input `A`, instead of creating a copy. An [`InexactError`](@ref)
-exception is thrown if the factorization produces a number not representable by the
-element type of `A`, e.g. for integer types.
-
-# Examples
-```jldoctest
-julia> A = [4. 3.; 6. 3.]
-2×2 Matrix{Float64}:
- 4.0  3.0
- 6.0  3.0
-
-julia> F = lu!(A)
-LU{Float64, Matrix{Float64}, Vector{Int64}}
-L factor:
-2×2 Matrix{Float64}:
- 1.0       0.0
- 0.666667  1.0
-U factor:
-2×2 Matrix{Float64}:
- 6.0  3.0
- 0.0  1.0
-
-julia> iA = [4 3; 6 3]
-2×2 Matrix{Int64}:
- 4  3
- 6  3
-
-julia> lu!(iA)
-ERROR: InexactError: Int64(0.6666666666666666)
-Stacktrace:
-[...]
-```
-"""
-lu!(A::AbstractMatrix, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(eltype(A)); check::Bool = true) =
-    generic_lufact!(A, pivot; check = check)
-function generic_lufact!(A::AbstractMatrix{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T);
-                         check::Bool = true) where {T}
-    LAPACK.chkfinite(A)
-    # Extract values
-    m, n = size(A)
-    minmn = min(m,n)
-
-    # Initialize variables
-    info = 0
-    ipiv = Vector{BlasInt}(undef, minmn)
-    @inbounds begin
-        for k = 1:minmn
-            # find index max
-            kp = k
-            if pivot === RowMaximum() && k < m
-                amax = abs(A[k, k])
-                for i = k+1:m
-                    absi = abs(A[i,k])
-                    if absi > amax
-                        kp = i
-                        amax = absi
-                    end
-                end
-            elseif pivot === RowNonZero()
-                for i = k:m
-                    if !iszero(A[i,k])
-                        kp = i
-                        break
-                    end
-                end
-            end
-            ipiv[k] = kp
-            if !iszero(A[kp,k])
-                if k != kp
-                    # Interchange
-                    for i = 1:n
-                        tmp = A[k,i]
-                        A[k,i] = A[kp,i]
-                        A[kp,i] = tmp
-                    end
-                end
-                # Scale first column
-                Akkinv = inv(A[k,k])
-                for i = k+1:m
-                    A[i,k] *= Akkinv
-                end
-            elseif info == 0
-                info = k
-            end
-            # Update the rest
-            for j = k+1:n
-                for i = k+1:m
-                    A[i,j] -= A[i,k]*A[k,j]
-                end
-            end
-        end
-    end
-    check && checknonsingular(info, pivot)
-    return LU{T,typeof(A),typeof(ipiv)}(A, ipiv, convert(BlasInt, info))
-end
-
-function lutype(T::Type)
-    # In generic_lufact!, the elements of the lower part of the matrix are
-    # obtained using the division of two matrix elements. Hence their type can
-    # be different (e.g. the division of two types with the same unit is a type
-    # without unit).
-    # The elements of the upper part are obtained by U - U * L
-    # where U is an upper part element and L is a lower part element.
-    # Therefore, the types LT, UT should be invariant under the map:
-    # (LT, UT) -> begin
-    #     L = oneunit(UT) / oneunit(UT)
-    #     U = oneunit(UT) - oneunit(UT) * L
-    #     typeof(L), typeof(U)
-    # end
-    # The following should handle most cases
-    UT = typeof(oneunit(T) - oneunit(T) * (oneunit(T) / (oneunit(T) + zero(T))))
-    LT = typeof(oneunit(UT) / oneunit(UT))
-    S = promote_type(T, LT, UT)
-end
-
-lupivottype(::Type{T}) where {T} = RowMaximum()
-
-# for all other types we must promote to a type which is stable under division
-"""
-    lu(A, pivot = RowMaximum(); check = true) -> F::LU
-
-Compute the LU factorization of `A`.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-In most cases, if `A` is a subtype `S` of `AbstractMatrix{T}` with an element
-type `T` supporting `+`, `-`, `*` and `/`, the return type is `LU{T,S{T}}`.
-
-In general, LU factorization involves a permutation of the rows of the matrix
-(corresponding to the `F.p` output described below), known as "pivoting" (because it
-corresponds to choosing which row contains the "pivot", the diagonal entry of `F.U`).
-One of the following pivoting strategies can be selected via the optional `pivot` argument:
-
-* `RowMaximum()` (default): the standard pivoting strategy; the pivot corresponds
-  to the element of maximum absolute value among the remaining, to be factorized rows.
-  This pivoting strategy requires the element type to also support [`abs`](@ref) and
-  [`<`](@ref). (This is generally the only numerically stable option for floating-point
-  matrices.)
-* `RowNonZero()`: the pivot corresponds to the first non-zero element among the remaining,
-  to be factorized rows.  (This corresponds to the typical choice in hand calculations, and
-  is also useful for more general algebraic number types that support [`iszero`](@ref) but
-  not `abs` or `<`.)
-* `NoPivot()`: pivoting turned off (may fail if a zero entry is encountered).
-
-The individual components of the factorization `F` can be accessed via [`getproperty`](@ref):
-
-| Component | Description                         |
-|:----------|:------------------------------------|
-| `F.L`     | `L` (lower triangular) part of `LU` |
-| `F.U`     | `U` (upper triangular) part of `LU` |
-| `F.p`     | (right) permutation `Vector`        |
-| `F.P`     | (right) permutation `Matrix`        |
-
-Iterating the factorization produces the components `F.L`, `F.U`, and `F.p`.
-
-The relationship between `F` and `A` is
-
-`F.L*F.U == A[F.p, :]`
-
-`F` further supports the following functions:
-
-| Supported function               | `LU` | `LU{T,Tridiagonal{T}}` |
-|:---------------------------------|:-----|:-----------------------|
-| [`/`](@ref)                      | ✓    |                        |
-| [`\\`](@ref)                     | ✓    | ✓                      |
-| [`inv`](@ref)                    | ✓    | ✓                      |
-| [`det`](@ref)                    | ✓    | ✓                      |
-| [`logdet`](@ref)                 | ✓    | ✓                      |
-| [`logabsdet`](@ref)              | ✓    | ✓                      |
-| [`size`](@ref)                   | ✓    | ✓                      |
-
-# Examples
-```jldoctest
-julia> A = [4 3; 6 3]
-2×2 Matrix{Int64}:
- 4  3
- 6  3
-
-julia> F = lu(A)
-LU{Float64, Matrix{Float64}, Vector{Int64}}
-L factor:
-2×2 Matrix{Float64}:
- 1.0       0.0
- 0.666667  1.0
-U factor:
-2×2 Matrix{Float64}:
- 6.0  3.0
- 0.0  1.0
-
-julia> F.L * F.U == A[F.p, :]
-true
-
-julia> l, u, p = lu(A); # destructuring via iteration
-
-julia> l == F.L && u == F.U && p == F.p
-true
-```
-"""
-function lu(A::AbstractMatrix{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T); check::Bool = true) where {T}
-    lu!(_lucopy(A, lutype(T)), pivot; check = check)
-end
-# TODO: remove for Julia v2.0
-@deprecate lu(A::AbstractMatrix, ::Val{true}; check::Bool = true) lu(A, RowMaximum(); check=check)
-@deprecate lu(A::AbstractMatrix, ::Val{false}; check::Bool = true) lu(A, NoPivot(); check=check)
-
-_lucopy(A::AbstractMatrix, T) = copy_similar(A, T)
-_lucopy(A::HermOrSym, T)      = copymutable_oftype(A, T)
-_lucopy(A::Tridiagonal, T)    = copymutable_oftype(A, T)
-
-lu(S::LU) = S
-function lu(x::Number; check::Bool=true)
-    info = x == 0 ? one(BlasInt) : zero(BlasInt)
-    check && checknonsingular(info)
-    return LU(fill(x, 1, 1), BlasInt[1], info)
-end
-
-function LU{T}(F::LU) where T
-    M = convert(AbstractMatrix{T}, F.factors)
-    LU{T,typeof(M),typeof(F.ipiv)}(M, F.ipiv, F.info)
-end
-LU{T,S,P}(F::LU) where {T,S,P} = LU{T,S,P}(convert(S, F.factors), convert(P, F.ipiv), F.info)
-Factorization{T}(F::LU{T}) where {T} = F
-Factorization{T}(F::LU) where {T} = LU{T}(F)
-
-copy(A::LU{T,S,P}) where {T,S,P} = LU{T,S,P}(copy(A.factors), copy(A.ipiv), A.info)
-
-size(A::LU)    = size(getfield(A, :factors))
-size(A::LU, i::Integer) = size(getfield(A, :factors), i)
-
-function ipiv2perm(v::AbstractVector{T}, maxi::Integer) where T
-    require_one_based_indexing(v)
-    p = T[1:maxi;]
-    @inbounds for i in 1:length(v)
-        p[i], p[v[i]] = p[v[i]], p[i]
-    end
-    return p
-end
-
-function getproperty(F::LU{T}, d::Symbol) where T
-    m, n = size(F)
-    if d === :L
-        L = tril!(getfield(F, :factors)[1:m, 1:min(m,n)])
-        for i = 1:min(m,n); L[i,i] = one(T); end
-        return L
-    elseif d === :U
-        return triu!(getfield(F, :factors)[1:min(m,n), 1:n])
-    elseif d === :p
-        return ipiv2perm(getfield(F, :ipiv), m)
-    elseif d === :P
-        return Matrix{T}(I, m, m)[:,invperm(F.p)]
-    else
-        getfield(F, d)
-    end
-end
-
-Base.propertynames(F::LU, private::Bool=false) =
-    (:L, :U, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
-
-issuccess(F::LU) = F.info == 0
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LU)
-    if issuccess(F)
-        summary(io, F); println(io)
-        println(io, "L factor:")
-        show(io, mime, F.L)
-        println(io, "\nU factor:")
-        show(io, mime, F.U)
-    else
-        print(io, "Failed factorization of type $(typeof(F))")
-    end
-end
-
-_apply_ipiv_rows!(A::LU, B::AbstractVecOrMat) = _ipiv_rows!(A, 1 : length(A.ipiv), B)
-_apply_inverse_ipiv_rows!(A::LU, B::AbstractVecOrMat) = _ipiv_rows!(A, length(A.ipiv) : -1 : 1, B)
-
-function _ipiv_rows!(A::LU, order::OrdinalRange, B::AbstractVecOrMat)
-    for i = order
-        if i != A.ipiv[i]
-            _swap_rows!(B, i, A.ipiv[i])
-        end
-    end
-    B
-end
-
-function _swap_rows!(B::AbstractVector, i::Integer, j::Integer)
-    B[i], B[j] = B[j], B[i]
-    B
-end
-
-function _swap_rows!(B::AbstractMatrix, i::Integer, j::Integer)
-    for col = 1 : size(B, 2)
-        B[i,col], B[j,col] = B[j,col], B[i,col]
-    end
-    B
-end
-
-_apply_ipiv_cols!(A::LU, B::AbstractVecOrMat) = _ipiv_cols!(A, 1 : length(A.ipiv), B)
-_apply_inverse_ipiv_cols!(A::LU, B::AbstractVecOrMat) = _ipiv_cols!(A, length(A.ipiv) : -1 : 1, B)
-
-function _ipiv_cols!(A::LU, order::OrdinalRange, B::AbstractVecOrMat)
-    for i = order
-        if i != A.ipiv[i]
-            _swap_cols!(B, i, A.ipiv[i])
-        end
-    end
-    B
-end
-
-function _swap_cols!(B::AbstractVector, i::Integer, j::Integer)
-    _swap_rows!(B, i, j)
-end
-
-function _swap_cols!(B::AbstractMatrix, i::Integer, j::Integer)
-    for row = 1 : size(B, 1)
-        B[row,i], B[row,j] = B[row,j], B[row,i]
-    end
-    B
-end
-
-function rdiv!(A::AbstractVecOrMat, B::LU)
-    rdiv!(rdiv!(A, UpperTriangular(B.factors)), UnitLowerTriangular(B.factors))
-    _apply_inverse_ipiv_cols!(B, A)
-end
-
-ldiv!(A::LU{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.getrs!('N', A.factors, A.ipiv, B)
-
-function ldiv!(A::LU, B::AbstractVecOrMat)
-    _apply_ipiv_rows!(A, B)
-    ldiv!(UpperTriangular(A.factors), ldiv!(UnitLowerTriangular(A.factors), B))
-end
-
-ldiv!(transA::TransposeFactorization{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (A = transA.parent; LAPACK.getrs!('T', A.factors, A.ipiv, B))
-
-function ldiv!(transA::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat)
-    A = transA.parent
-    ldiv!(transpose(UnitLowerTriangular(A.factors)), ldiv!(transpose(UpperTriangular(A.factors)), B))
-    _apply_inverse_ipiv_rows!(A, B)
-end
-
-ldiv!(adjA::AdjointFactorization{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (A = adjA.parent; LAPACK.getrs!('C', A.factors, A.ipiv, B))
-
-function ldiv!(adjA::AdjointFactorization{<:Any,<:LU}, B::AbstractVecOrMat)
-    A = adjA.parent
-    ldiv!(adjoint(UnitLowerTriangular(A.factors)), ldiv!(adjoint(UpperTriangular(A.factors)), B))
-    _apply_inverse_ipiv_rows!(A, B)
-end
-
-(\)(A::AdjointFactorization{T,<:LU{T,<:StridedMatrix}}, B::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasComplex} =
-    LAPACK.getrs!('C', A.parent.factors, A.parent.ipiv, copy(B))
-(\)(A::TransposeFactorization{T,<:LU{T,<:StridedMatrix}}, B::Transpose{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} =
-    LAPACK.getrs!('T', A.parent.factors, A.parent.ipiv, copy(B))
-
-function det(F::LU{T}) where T
-    n = checksquare(F)
-    issuccess(F) || return zero(T)
-    P = one(T)
-    c = 0
-    @inbounds for i = 1:n
-        P *= F.factors[i,i]
-        if F.ipiv[i] != i
-            c += 1
-        end
-    end
-    s = (isodd(c) ? -one(T) : one(T))
-    return P * s
-end
-
-function logabsdet(F::LU{T}) where T  # return log(abs(det)) and sign(det)
-    n = checksquare(F)
-    issuccess(F) || return log(zero(real(T))), log(one(T))
-    c = 0
-    P = one(T)
-    abs_det = zero(real(T))
-    @inbounds for i = 1:n
-        dg_ii = F.factors[i,i]
-        P *= sign(dg_ii)
-        if F.ipiv[i] != i
-            c += 1
-        end
-        abs_det += log(abs(dg_ii))
-    end
-    s = ifelse(isodd(c), -one(real(T)), one(real(T))) * P
-    abs_det, s
-end
-
-inv!(A::LU{<:BlasFloat,<:StridedMatrix}) =
-    LAPACK.getri!(A.factors, A.ipiv)
-inv!(A::LU{T,<:StridedMatrix}) where {T} =
-    ldiv!(A.factors, copy(A), Matrix{T}(I, size(A, 1), size(A, 1)))
-inv(A::LU{<:BlasFloat,<:StridedMatrix}) = inv!(copy(A))
-
-# Tridiagonal
-
-# See dgttrf.f
-function lu!(A::Tridiagonal{T,V}, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) where {T,V}
-    # Extract values
-    n = size(A, 1)
-
-    # Initialize variables
-    info = 0
-    ipiv = Vector{BlasInt}(undef, n)
-    dl = A.dl
-    d = A.d
-    du = A.du
-    if dl === du
-        throw(ArgumentError("off-diagonals of `A` must not alias"))
-    end
-    # Check if Tridiagonal matrix already has du2 for pivoting
-    has_du2_defined = isdefined(A, :du2) && length(A.du2) == max(0, n-2)
-    if has_du2_defined
-        du2 = A.du2::V
-    else
-        du2 = similar(d, max(0, n-2))::V
-    end
-    fill!(du2, 0)
-
-    @inbounds begin
-        for i = 1:n
-            ipiv[i] = i
-        end
-        for i = 1:n-2
-            # pivot or not?
-            if pivot === NoPivot() || abs(d[i]) >= abs(dl[i])
-                # No interchange
-                if d[i] != 0
-                    fact = dl[i]/d[i]
-                    dl[i] = fact
-                    d[i+1] -= fact*du[i]
-                    du2[i] = 0
-                end
-            else
-                # Interchange
-                fact = d[i]/dl[i]
-                d[i] = dl[i]
-                dl[i] = fact
-                tmp = du[i]
-                du[i] = d[i+1]
-                d[i+1] = tmp - fact*d[i+1]
-                du2[i] = du[i+1]
-                du[i+1] = -fact*du[i+1]
-                ipiv[i] = i+1
-            end
-        end
-        if n > 1
-            i = n-1
-            if pivot === NoPivot() || abs(d[i]) >= abs(dl[i])
-                if d[i] != 0
-                    fact = dl[i]/d[i]
-                    dl[i] = fact
-                    d[i+1] -= fact*du[i]
-                end
-            else
-                fact = d[i]/dl[i]
-                d[i] = dl[i]
-                dl[i] = fact
-                tmp = du[i]
-                du[i] = d[i+1]
-                d[i+1] = tmp - fact*d[i+1]
-                ipiv[i] = i+1
-            end
-        end
-        # check for a zero on the diagonal of U
-        for i = 1:n
-            if d[i] == 0
-                info = i
-                break
-            end
-        end
-    end
-    B = has_du2_defined ? A : Tridiagonal{T,V}(dl, d, du, du2)
-    check && checknonsingular(info, pivot)
-    return LU{T,Tridiagonal{T,V},typeof(ipiv)}(B, ipiv, convert(BlasInt, info))
-end
-
-factorize(A::Tridiagonal) = lu(A)
-
-function getproperty(F::LU{T,Tridiagonal{T,V}}, d::Symbol) where {T,V}
-    m, n = size(F)
-    if d === :L
-        dl = getfield(getfield(F, :factors), :dl)
-        L = Array(Bidiagonal(fill!(similar(dl, n), one(T)), dl, d))
-        for i = 2:n
-            tmp = L[getfield(F, :ipiv)[i], 1:i - 1]
-            L[getfield(F, :ipiv)[i], 1:i - 1] = L[i, 1:i - 1]
-            L[i, 1:i - 1] = tmp
-        end
-        return L
-    elseif d === :U
-        U = Array(Bidiagonal(getfield(getfield(F, :factors), :d), getfield(getfield(F, :factors), :du), d))
-        for i = 1:n - 2
-            U[i,i + 2] = getfield(getfield(F, :factors), :du2)[i]
-        end
-        return U
-    elseif d === :p
-        return ipiv2perm(getfield(F, :ipiv), m)
-    elseif d === :P
-        return Matrix{T}(I, m, m)[:,invperm(F.p)]
-    end
-    return getfield(F, d)
-end
-
-# See dgtts2.f
-function ldiv!(A::LU{T,Tridiagonal{T,V}}, B::AbstractVecOrMat) where {T,V}
-    require_one_based_indexing(B)
-    n = size(A,1)
-    if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
-    end
-    nrhs = size(B,2)
-    dl = A.factors.dl
-    d = A.factors.d
-    du = A.factors.du
-    du2 = A.factors.du2
-    ipiv = A.ipiv
-    @inbounds begin
-        for j = 1:nrhs
-            for i = 1:n-1
-                ip = ipiv[i]
-                tmp = B[i+1-ip+i,j] - dl[i]*B[ip,j]
-                B[i,j] = B[ip,j]
-                B[i+1,j] = tmp
-            end
-            B[n,j] /= d[n]
-            if n > 1
-                B[n-1,j] = (B[n-1,j] - du[n-1]*B[n,j])/d[n-1]
-            end
-            for i = n-2:-1:1
-                B[i,j] = (B[i,j] - du[i]*B[i+1,j] - du2[i]*B[i+2,j])/d[i]
-            end
-        end
-    end
-    return B
-end
-
-function ldiv!(transA::TransposeFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
-    require_one_based_indexing(B)
-    A = transA.parent
-    n = size(A,1)
-    if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
-    end
-    nrhs = size(B,2)
-    dl = A.factors.dl
-    d = A.factors.d
-    du = A.factors.du
-    du2 = A.factors.du2
-    ipiv = A.ipiv
-    @inbounds begin
-        for j = 1:nrhs
-            B[1,j] /= d[1]
-            if n > 1
-                B[2,j] = (B[2,j] - du[1]*B[1,j])/d[2]
-            end
-            for i = 3:n
-                B[i,j] = (B[i,j] - du[i-1]*B[i-1,j] - du2[i-2]*B[i-2,j])/d[i]
-            end
-            for i = n-1:-1:1
-                if ipiv[i] == i
-                    B[i,j] = B[i,j] - dl[i]*B[i+1,j]
-                else
-                    tmp = B[i+1,j]
-                    B[i+1,j] = B[i,j] - dl[i]*tmp
-                    B[i,j] = tmp
-                end
-            end
-        end
-    end
-    return B
-end
-
-# Ac_ldiv_B!(A::LU{T,Tridiagonal{T}}, B::AbstractVecOrMat) where {T<:Real} = At_ldiv_B!(A,B)
-function ldiv!(adjA::AdjointFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
-    require_one_based_indexing(B)
-    A = adjA.parent
-    n = size(A,1)
-    if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
-    end
-    nrhs = size(B,2)
-    dl = A.factors.dl
-    d = A.factors.d
-    du = A.factors.du
-    du2 = A.factors.du2
-    ipiv = A.ipiv
-    @inbounds begin
-        for j = 1:nrhs
-            B[1,j] /= conj(d[1])
-            if n > 1
-                B[2,j] = (B[2,j] - conj(du[1])*B[1,j])/conj(d[2])
-            end
-            for i = 3:n
-                B[i,j] = (B[i,j] - conj(du[i-1])*B[i-1,j] - conj(du2[i-2])*B[i-2,j])/conj(d[i])
-            end
-            for i = n-1:-1:1
-                if ipiv[i] == i
-                    B[i,j] = B[i,j] - conj(dl[i])*B[i+1,j]
-                else
-                    tmp = B[i+1,j]
-                    B[i+1,j] = B[i,j] - conj(dl[i])*tmp
-                    B[i,j] = tmp
-                end
-            end
-        end
-    end
-    return B
-end
-
-rdiv!(B::AbstractMatrix, A::LU) = transpose(ldiv!(transpose(A), transpose(B)))
-rdiv!(B::AbstractMatrix, A::TransposeFactorization{<:Any,<:LU}) = transpose(ldiv!(A.parent, transpose(B)))
-rdiv!(B::AbstractMatrix, A::AdjointFactorization{<:Any,<:LU}) = adjoint(ldiv!(A.parent, adjoint(B)))
-
-# Conversions
-AbstractMatrix(F::LU) = (F.L * F.U)[invperm(F.p),:]
-AbstractArray(F::LU) = AbstractMatrix(F)
-Matrix(F::LU) = Array(AbstractArray(F))
-Array(F::LU) = Matrix(F)
-
-function Tridiagonal(F::LU{T,Tridiagonal{T,V}}) where {T,V}
-    n = size(F, 1)
-
-    dl  = copy(F.factors.dl)
-    d   = copy(F.factors.d)
-    du  = copy(F.factors.du)
-    du2 = copy(F.factors.du2)
-
-    for i = n - 1:-1:1
-        li         = dl[i]
-        dl[i]      = li*d[i]
-        d[i + 1]  += li*du[i]
-        if i < n - 1
-            du[i + 1] += li*du2[i]
-        end
-
-        if F.ipiv[i] != i
-            tmp   = dl[i]
-            dl[i] = d[i]
-            d[i]  = tmp
-
-            tmp      = d[i + 1]
-            d[i + 1] = du[i]
-            du[i]    = tmp
-
-            if i < n - 1
-                tmp       = du[i + 1]
-                du[i + 1] = du2[i]
-                du2[i]    = tmp
-            end
-        end
-    end
-    return Tridiagonal(dl, d, du)
-end
-AbstractMatrix(F::LU{T,Tridiagonal{T,V}}) where {T,V} = Tridiagonal(F)
-AbstractArray(F::LU{T,Tridiagonal{T,V}}) where {T,V} = AbstractMatrix(F)
-Matrix(F::LU{T,Tridiagonal{T,V}}) where {T,V} = Array(AbstractArray(F))
-Array(F::LU{T,Tridiagonal{T,V}}) where {T,V} = Matrix(F)
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
deleted file mode 100644
index e375108f6a831..0000000000000
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ /dev/null
@@ -1,1259 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# matmul.jl: Everything to do with dense matrix multiplication
-
-# Matrix-matrix multiplication
-
-AdjOrTransStridedMat{T} = Union{Adjoint{<:Any, <:StridedMatrix{T}}, Transpose{<:Any, <:StridedMatrix{T}}}
-StridedMaybeAdjOrTransMat{T} = Union{StridedMatrix{T}, Adjoint{<:Any, <:StridedMatrix{T}}, Transpose{<:Any, <:StridedMatrix{T}}}
-StridedMaybeAdjOrTransVecOrMat{T} = Union{StridedVecOrMat{T}, AdjOrTrans{<:Any, <:StridedVecOrMat{T}}}
-
-_parent(A) = A
-_parent(A::Adjoint) = parent(A)
-_parent(A::Transpose) = parent(A)
-
-matprod(x, y) = x*y + x*y
-
-# dot products
-
-dot(x::StridedVecLike{T}, y::StridedVecLike{T}) where {T<:BlasReal} = BLAS.dot(x, y)
-dot(x::StridedVecLike{T}, y::StridedVecLike{T}) where {T<:BlasComplex} = BLAS.dotc(x, y)
-
-function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasReal,TI<:Integer}
-    if length(rx) != length(ry)
-        throw(DimensionMismatch(lazy"length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
-    end
-    if minimum(rx) < 1 || maximum(rx) > length(x)
-        throw(BoundsError(x, rx))
-    end
-    if minimum(ry) < 1 || maximum(ry) > length(y)
-        throw(BoundsError(y, ry))
-    end
-    GC.@preserve x y BLAS.dot(length(rx), pointer(x)+(first(rx)-1)*sizeof(T), step(rx), pointer(y)+(first(ry)-1)*sizeof(T), step(ry))
-end
-
-function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasComplex,TI<:Integer}
-    if length(rx) != length(ry)
-        throw(DimensionMismatch(lazy"length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
-    end
-    if minimum(rx) < 1 || maximum(rx) > length(x)
-        throw(BoundsError(x, rx))
-    end
-    if minimum(ry) < 1 || maximum(ry) > length(y)
-        throw(BoundsError(y, ry))
-    end
-    GC.@preserve x y BLAS.dotc(length(rx), pointer(x)+(first(rx)-1)*sizeof(T), step(rx), pointer(y)+(first(ry)-1)*sizeof(T), step(ry))
-end
-
-function *(transx::Transpose{<:Any,<:StridedVector{T}}, y::StridedVector{T}) where {T<:BlasComplex}
-    x = transx.parent
-    return BLAS.dotu(x, y)
-end
-
-# Matrix-vector multiplication
-function (*)(A::StridedMaybeAdjOrTransMat{T}, x::StridedVector{S}) where {T<:BlasFloat,S<:Real}
-    TS = promote_op(matprod, T, S)
-    y = isconcretetype(TS) ? convert(AbstractVector{TS}, x) : x
-    mul!(similar(x, TS, size(A,1)), A, y)
-end
-function (*)(A::AbstractMatrix{T}, x::AbstractVector{S}) where {T,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, axes(A,1)), A, x)
-end
-
-# these will throw a DimensionMismatch unless B has 1 row (or 1 col for transposed case):
-(*)(a::AbstractVector, tB::TransposeAbsMat) = reshape(a, length(a), 1) * tB
-(*)(a::AbstractVector, adjB::AdjointAbsMat) = reshape(a, length(a), 1) * adjB
-(*)(a::AbstractVector, B::AbstractMatrix) = reshape(a, length(a), 1) * B
-
-@inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
-                alpha::Number, beta::Number) =
-    generic_matvecmul!(y, wrapper_char(A), _unwrap(A), x, MulAddMul(alpha, beta))
-# BLAS cases
-# equal eltypes
-@inline generic_matvecmul!(y::StridedVector{T}, tA, A::StridedVecOrMat{T}, x::StridedVector{T},
-                _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} =
-    gemv!(y, tA, A, x, _add.alpha, _add.beta)
-# Real (possibly transposed) matrix times complex vector.
-# Multiply the matrix with the real and imaginary parts separately
-@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
-                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
-    gemv!(y, tA, A, x, _add.alpha, _add.beta)
-# Complex matrix times real vector.
-# Reinterpret the matrix as a real matrix and do real matvec computation.
-# works only in cooperation with BLAS when A is untransposed (tA == 'N')
-# but that check is included in gemv! anyway
-@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
-                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
-    gemv!(y, tA, A, x, _add.alpha, _add.beta)
-
-# Vector-Matrix multiplication
-(*)(x::AdjointAbsVec,   A::AbstractMatrix) = (A'*x')'
-(*)(x::TransposeAbsVec, A::AbstractMatrix) = transpose(transpose(A)*transpose(x))
-
-# Matrix-matrix multiplication
-"""
-    *(A::AbstractMatrix, B::AbstractMatrix)
-
-Matrix multiplication.
-
-# Examples
-```jldoctest
-julia> [1 1; 0 1] * [1 0; 1 1]
-2×2 Matrix{Int64}:
- 2  1
- 1  1
-```
-"""
-function (*)(A::AbstractMatrix, B::AbstractMatrix)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))), A, B)
-end
-# optimization for dispatching to BLAS, e.g. *(::Matrix{Float32}, ::Matrix{Float64})
-# but avoiding the case *(::Matrix{<:BlasComplex}, ::Matrix{<:BlasReal})
-# which is better handled by reinterpreting rather than promotion
-function (*)(A::StridedMaybeAdjOrTransMat{<:BlasReal}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
-    TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         wrapperop(A)(convert(AbstractArray{TS}, _parent(A))),
-         wrapperop(B)(convert(AbstractArray{TS}, _parent(B))))
-end
-function (*)(A::StridedMaybeAdjOrTransMat{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasComplex})
-    TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         wrapperop(A)(convert(AbstractArray{TS}, _parent(A))),
-         wrapperop(B)(convert(AbstractArray{TS}, _parent(B))))
-end
-
-# Complex Matrix times real matrix: We use that it is generally faster to reinterpret the
-# first matrix as a real matrix and carry out real matrix matrix multiply
-function (*)(A::StridedMatrix{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
-    TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         convert(AbstractArray{TS}, A),
-         wrapperop(B)(convert(AbstractArray{real(TS)}, _parent(B))))
-end
-function (*)(A::AdjOrTransStridedMat{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
-    TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         copymutable_oftype(A, TS), # remove AdjOrTrans to use reinterpret trick below
-         wrapperop(B)(convert(AbstractArray{real(TS)}, _parent(B))))
-end
-# the following case doesn't seem to benefit from the translation A*B = (B' * A')'
-function (*)(A::StridedMatrix{<:BlasReal}, B::StridedMatrix{<:BlasComplex})
-    temp = real(B)
-    R = A * temp
-    temp .= imag.(B)
-    I = A * temp
-    Complex.(R, I)
-end
-(*)(A::AdjOrTransStridedMat{<:BlasReal}, B::StridedMatrix{<:BlasComplex}) = copy(transpose(transpose(B) * parent(A)))
-(*)(A::StridedMaybeAdjOrTransMat{<:BlasReal}, B::AdjOrTransStridedMat{<:BlasComplex}) = copy(wrapperop(B)(parent(B) * transpose(A)))
-
-"""
-    muladd(A, y, z)
-
-Combined multiply-add, `A*y .+ z`, for matrix-matrix or matrix-vector multiplication.
-The result is always the same size as `A*y`, but `z` may be smaller, or a scalar.
-
-!!! compat "Julia 1.6"
-     These methods require Julia 1.6 or later.
-
-# Examples
-```jldoctest
-julia> A=[1.0 2.0; 3.0 4.0]; B=[1.0 1.0; 1.0 1.0]; z=[0, 100];
-
-julia> muladd(A, B, z)
-2×2 Matrix{Float64}:
-   3.0    3.0
- 107.0  107.0
-```
-"""
-function Base.muladd(A::AbstractMatrix, y::AbstractVecOrMat, z::Union{Number, AbstractArray})
-    Ay = A * y
-    for d in 1:ndims(Ay)
-        # Same error as Ay .+= z would give, to match StridedMatrix method:
-        size(z,d) > size(Ay,d) && throw(DimensionMismatch("array could not be broadcast to match destination"))
-    end
-    for d in ndims(Ay)+1:ndims(z)
-        # Similar error to what Ay + z would give, to match (Any,Any,Any) method:
-        size(z,d) > 1 && throw(DimensionMismatch(string("dimensions must match: z has dims ",
-            axes(z), ", must have singleton at dim ", d)))
-    end
-    Ay .+ z
-end
-
-function Base.muladd(u::AbstractVector, v::AdjOrTransAbsVec, z::Union{Number, AbstractArray})
-    if size(z,1) > length(u) || size(z,2) > length(v)
-        # Same error as (u*v) .+= z:
-        throw(DimensionMismatch("array could not be broadcast to match destination"))
-    end
-    for d in 3:ndims(z)
-        # Similar error to (u*v) + z:
-        size(z,d) > 1 && throw(DimensionMismatch(string("dimensions must match: z has dims ",
-            axes(z), ", must have singleton at dim ", d)))
-    end
-    (u .* v) .+ z
-end
-
-Base.muladd(x::AdjointAbsVec, A::AbstractMatrix, z::Union{Number, AbstractVecOrMat}) =
-    muladd(A', x', z')'
-Base.muladd(x::TransposeAbsVec, A::AbstractMatrix, z::Union{Number, AbstractVecOrMat}) =
-    transpose(muladd(transpose(A), transpose(x), transpose(z)))
-
-function Base.muladd(A::StridedMaybeAdjOrTransMat{<:Number}, y::AbstractVector{<:Number}, z::Union{Number, AbstractVector})
-    T = promote_type(eltype(A), eltype(y), eltype(z))
-    C = similar(A, T, axes(A,1))
-    C .= z
-    mul!(C, A, y, true, true)
-end
-
-function Base.muladd(A::StridedMaybeAdjOrTransMat{<:Number}, B::StridedMaybeAdjOrTransMat{<:Number}, z::Union{Number, AbstractVecOrMat})
-    T = promote_type(eltype(A), eltype(B), eltype(z))
-    C = similar(A, T, axes(A,1), axes(B,2))
-    C .= z
-    mul!(C, A, B, true, true)
-end
-
-"""
-    mul!(Y, A, B) -> Y
-
-Calculates the matrix-matrix or matrix-vector product ``AB`` and stores the result in `Y`,
-overwriting the existing value of `Y`. Note that `Y` must not be aliased with either `A` or
-`B`.
-
-# Examples
-```jldoctest
-julia> A=[1.0 2.0; 3.0 4.0]; B=[1.0 1.0; 1.0 1.0]; Y = similar(B); mul!(Y, A, B);
-
-julia> Y
-2×2 Matrix{Float64}:
- 3.0  3.0
- 7.0  7.0
-```
-
-# Implementation
-For custom matrix and vector types, it is recommended to implement
-5-argument `mul!` rather than implementing 3-argument `mul!` directly
-if possible.
-"""
-@inline function mul!(C, A, B)
-    return mul!(C, A, B, true, false)
-end
-
-"""
-    mul!(C, A, B, α, β) -> C
-
-Combined inplace matrix-matrix or matrix-vector multiply-add ``A B α + C β``.
-The result is stored in `C` by overwriting it.  Note that `C` must not be
-aliased with either `A` or `B`.
-
-!!! compat "Julia 1.3"
-    Five-argument `mul!` requires at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> A=[1.0 2.0; 3.0 4.0]; B=[1.0 1.0; 1.0 1.0]; C=[1.0 2.0; 3.0 4.0];
-
-julia> mul!(C, A, B, 100.0, 10.0) === C
-true
-
-julia> C
-2×2 Matrix{Float64}:
- 310.0  320.0
- 730.0  740.0
-```
-"""
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat, α::Number, β::Number) =
-    generic_matmatmul!(
-        C,
-        wrapper_char(A),
-        wrapper_char(B),
-        _unwrap(A),
-        _unwrap(B),
-        MulAddMul(α, β)
-    )
-
-"""
-    rmul!(A, B)
-
-Calculate the matrix-matrix product ``AB``, overwriting `A`, and return the result.
-Here, `B` must be of special matrix type, like, e.g., [`Diagonal`](@ref),
-[`UpperTriangular`](@ref) or [`LowerTriangular`](@ref), or of some orthogonal type,
-see [`QR`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [0 1; 1 0];
-
-julia> B = UpperTriangular([1 2; 0 3]);
-
-julia> rmul!(A, B);
-
-julia> A
-2×2 Matrix{Int64}:
- 0  3
- 1  2
-
-julia> A = [1.0 2.0; 3.0 4.0];
-
-julia> F = qr([0 1; -1 0]);
-
-julia> rmul!(A, F.Q)
-2×2 Matrix{Float64}:
- 2.0  1.0
- 4.0  3.0
-```
-"""
-rmul!(A, B)
-
-"""
-    lmul!(A, B)
-
-Calculate the matrix-matrix product ``AB``, overwriting `B`, and return the result.
-Here, `A` must be of special matrix type, like, e.g., [`Diagonal`](@ref),
-[`UpperTriangular`](@ref) or [`LowerTriangular`](@ref), or of some orthogonal type,
-see [`QR`](@ref).
-
-# Examples
-```jldoctest
-julia> B = [0 1; 1 0];
-
-julia> A = UpperTriangular([1 2; 0 3]);
-
-julia> lmul!(A, B);
-
-julia> B
-2×2 Matrix{Int64}:
- 2  1
- 3  0
-
-julia> B = [1.0 2.0; 3.0 4.0];
-
-julia> F = qr([0 1; -1 0]);
-
-julia> lmul!(F.Q, B)
-2×2 Matrix{Float64}:
- 3.0  4.0
- 1.0  2.0
-```
-"""
-lmul!(A, B)
-
-# THE one big BLAS dispatch
-@inline function generic_matmatmul!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
-                                    _add::MulAddMul=MulAddMul()) where {T<:BlasFloat}
-    if all(in(('N', 'T', 'C')), (tA, tB))
-        if tA == 'T' && tB == 'N' && A === B
-            return syrk_wrapper!(C, 'T', A, _add)
-        elseif tA == 'N' && tB == 'T' && A === B
-            return syrk_wrapper!(C, 'N', A, _add)
-        elseif tA == 'C' && tB == 'N' && A === B
-            return herk_wrapper!(C, 'C', A, _add)
-        elseif tA == 'N' && tB == 'C' && A === B
-            return herk_wrapper!(C, 'N', A, _add)
-        else
-            return gemm_wrapper!(C, tA, tB, A, B, _add)
-        end
-    end
-    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        if (tA == 'S' || tA == 's') && tB == 'N'
-            return BLAS.symm!('L', tA == 'S' ? 'U' : 'L', alpha, A, B, beta, C)
-        elseif (tB == 'S' || tB == 's') && tA == 'N'
-            return BLAS.symm!('R', tB == 'S' ? 'U' : 'L', alpha, B, A, beta, C)
-        elseif (tA == 'H' || tA == 'h') && tB == 'N'
-            return BLAS.hemm!('L', tA == 'H' ? 'U' : 'L', alpha, A, B, beta, C)
-        elseif (tB == 'H' || tB == 'h') && tA == 'N'
-            return BLAS.hemm!('R', tB == 'H' ? 'U' : 'L', alpha, B, A, beta, C)
-        end
-    end
-    return _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add)
-end
-
-# Complex matrix times (transposed) real matrix. Reinterpret the first matrix to real for efficiency.
-@inline function generic_matmatmul!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
-                    _add::MulAddMul=MulAddMul()) where {T<:BlasReal}
-    if all(in(('N', 'T', 'C')), (tA, tB))
-        gemm_wrapper!(C, tA, tB, A, B, _add)
-    else
-        _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add)
-    end
-end
-
-
-# Supporting functions for matrix multiplication
-
-# copy transposed(adjoint) of upper(lower) side-diagonals. Optionally include diagonal.
-@inline function copytri!(A::AbstractMatrix, uplo::AbstractChar, conjugate::Bool=false, diag::Bool=false)
-    n = checksquare(A)
-    off = diag ? 0 : 1
-    if uplo == 'U'
-        for i = 1:n, j = (i+off):n
-            A[j,i] = conjugate ? adjoint(A[i,j]) : transpose(A[i,j])
-        end
-    elseif uplo == 'L'
-        for i = 1:n, j = (i+off):n
-            A[i,j] = conjugate ? adjoint(A[j,i]) : transpose(A[j,i])
-        end
-    else
-        throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
-    end
-    A
-end
-
-function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x::StridedVector{T},
-               α::Number=true, β::Number=false) where {T<:BlasFloat}
-    mA, nA = lapack_size(tA, A)
-    nA != length(x) &&
-        throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
-    mA != length(y) &&
-        throw(DimensionMismatch(lazy"first dimension of A, $mA, does not match length of y, $(length(y))"))
-    mA == 0 && return y
-    nA == 0 && return _rmul_or_fill!(y, β)
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        !iszero(stride(x, 1)) && # We only check input's stride here.
-        if tA in ('N', 'T', 'C')
-            return BLAS.gemv!(tA, alpha, A, x, beta, y)
-        elseif tA in ('S', 's')
-            return BLAS.symv!(tA == 'S' ? 'U' : 'L', alpha, A, x, beta, y)
-        elseif tA in ('H', 'h')
-            return BLAS.hemv!(tA == 'H' ? 'U' : 'L', alpha, A, x, beta, y)
-        end
-    end
-    if tA in ('S', 's', 'H', 'h')
-        # re-wrap again and use plain ('N') matvec mul algorithm,
-        # because _generic_matvecmul! can't handle the HermOrSym cases specifically
-        return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
-    else
-        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
-    end
-end
-
-function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
-    α::Number = true, β::Number = false) where {T<:BlasReal}
-    mA, nA = lapack_size(tA, A)
-    nA != length(x) &&
-        throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
-    mA != length(y) &&
-        throw(DimensionMismatch(lazy"first dimension of A, $mA, does not match length of y, $(length(y))"))
-    mA == 0 && return y
-    nA == 0 && return _rmul_or_fill!(y, β)
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        stride(y, 1) == 1 && tA == 'N' && # reinterpret-based optimization is valid only for contiguous `y`
-        !iszero(stride(x, 1))
-        BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y))
-        return y
-    else
-        Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA)
-        return _generic_matvecmul!(y, ta, Anew, x, MulAddMul(α, β))
-    end
-end
-
-function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
-    α::Number = true, β::Number = false) where {T<:BlasFloat}
-    mA, nA = lapack_size(tA, A)
-    nA != length(x) &&
-        throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
-    mA != length(y) &&
-        throw(DimensionMismatch(lazy"first dimension of A, $mA, does not match length of y, $(length(y))"))
-    mA == 0 && return y
-    nA == 0 && return _rmul_or_fill!(y, β)
-    alpha, beta = promote(α, β, zero(T))
-    @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        !iszero(stride(x, 1)) && tA in ('N', 'T', 'C')
-        xfl = reinterpret(reshape, T, x) # Use reshape here.
-        yfl = reinterpret(reshape, T, y)
-        BLAS.gemv!(tA, alpha, A, xfl[1, :], beta, yfl[1, :])
-        BLAS.gemv!(tA, alpha, A, xfl[2, :], beta, yfl[2, :])
-        return y
-    elseif tA in ('S', 's', 'H', 'h')
-        # re-wrap again and use plain ('N') matvec mul algorithm,
-        # because _generic_matvecmul! can't handle the HermOrSym cases specifically
-        return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
-    else
-        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
-    end
-end
-
-function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat{T},
-        _add = MulAddMul()) where {T<:BlasFloat}
-    nC = checksquare(C)
-    if tA == 'T'
-        (nA, mA) = size(A,1), size(A,2)
-        tAt = 'N'
-    else
-        (mA, nA) = size(A,1), size(A,2)
-        tAt = 'T'
-    end
-    if nC != mA
-        throw(DimensionMismatch(lazy"output matrix has size: $(nC), but should have size $(mA)"))
-    end
-    if mA == 0 || nA == 0 || iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-    if mA == 2 && nA == 2
-        return matmul2x2!(C, tA, tAt, A, A, _add)
-    end
-    if mA == 3 && nA == 3
-        return matmul3x3!(C, tA, tAt, A, A, _add)
-    end
-
-    # BLAS.syrk! only updates symmetric C
-    # alternatively, make non-zero β a show-stopper for BLAS.syrk!
-    if iszero(_add.beta) || issymmetric(C)
-        alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-        if (alpha isa Union{Bool,T} &&
-            beta isa Union{Bool,T} &&
-            stride(A, 1) == stride(C, 1) == 1 &&
-            stride(A, 2) >= size(A, 1) &&
-            stride(C, 2) >= size(C, 1))
-            return copytri!(BLAS.syrk!('U', tA, alpha, A, beta, C), 'U')
-        end
-    end
-    return gemm_wrapper!(C, tA, tAt, A, A, _add)
-end
-
-function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA::AbstractChar, A::Union{StridedVecOrMat{T}, StridedVecOrMat{Complex{T}}},
-        _add = MulAddMul()) where {T<:BlasReal}
-    nC = checksquare(C)
-    if tA == 'C'
-        (nA, mA) = size(A,1), size(A,2)
-        tAt = 'N'
-    else
-        (mA, nA) = size(A,1), size(A,2)
-        tAt = 'C'
-    end
-    if nC != mA
-        throw(DimensionMismatch(lazy"output matrix has size: $(nC), but should have size $(mA)"))
-    end
-    if mA == 0 || nA == 0 || iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-    if mA == 2 && nA == 2
-        return matmul2x2!(C, tA, tAt, A, A, _add)
-    end
-    if mA == 3 && nA == 3
-        return matmul3x3!(C, tA, tAt, A, A, _add)
-    end
-
-    # Result array does not need to be initialized as long as beta==0
-    #    C = Matrix{T}(undef, mA, mA)
-
-    if iszero(_add.beta) || issymmetric(C)
-        alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-        if (alpha isa Union{Bool,T} &&
-            beta isa Union{Bool,T} &&
-            stride(A, 1) == stride(C, 1) == 1 &&
-            stride(A, 2) >= size(A, 1) &&
-            stride(C, 2) >= size(C, 1))
-            return copytri!(BLAS.herk!('U', tA, alpha, A, beta, C), 'U', true)
-        end
-    end
-    return gemm_wrapper!(C, tA, tAt, A, A, _add)
-end
-
-function gemm_wrapper(tA::AbstractChar, tB::AbstractChar,
-                      A::StridedVecOrMat{T},
-                      B::StridedVecOrMat{T}) where {T<:BlasFloat}
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    C = similar(B, T, mA, nB)
-    if all(in(('N', 'T', 'C')), (tA, tB))
-        gemm_wrapper!(C, tA, tB, A, B)
-    else
-        _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add)
-    end
-end
-
-function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar,
-                       A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
-                       _add = MulAddMul()) where {T<:BlasFloat}
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-
-    if nA != mB
-        throw(DimensionMismatch(lazy"A has dimensions ($mA,$nA) but B has dimensions ($mB,$nB)"))
-    end
-
-    if C === A || B === C
-        throw(ArgumentError("output matrix must not be aliased with input matrix"))
-    end
-
-    if mA == 0 || nA == 0 || nB == 0 || iszero(_add.alpha)
-        if size(C) != (mA, nB)
-            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
-        end
-        return _rmul_or_fill!(C, _add.beta)
-    end
-
-    if mA == 2 && nA == 2 && nB == 2
-        return matmul2x2!(C, tA, tB, A, B, _add)
-    end
-    if mA == 3 && nA == 3 && nB == 3
-        return matmul3x3!(C, tA, tB, A, B, _add)
-    end
-
-    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-    if (alpha isa Union{Bool,T} &&
-        beta isa Union{Bool,T} &&
-        stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
-        stride(A, 2) >= size(A, 1) &&
-        stride(B, 2) >= size(B, 1) &&
-        stride(C, 2) >= size(C, 1))
-        return BLAS.gemm!(tA, tB, alpha, A, B, beta, C)
-    end
-    _generic_matmatmul!(C, tA, tB, A, B, _add)
-end
-
-function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::AbstractChar,
-                       A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
-                       _add = MulAddMul()) where {T<:BlasReal}
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-
-    if nA != mB
-        throw(DimensionMismatch(lazy"A has dimensions ($mA,$nA) but B has dimensions ($mB,$nB)"))
-    end
-
-    if C === A || B === C
-        throw(ArgumentError("output matrix must not be aliased with input matrix"))
-    end
-
-    if mA == 0 || nA == 0 || nB == 0 || iszero(_add.alpha)
-        if size(C) != (mA, nB)
-            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
-        end
-        return _rmul_or_fill!(C, _add.beta)
-    end
-
-    if mA == 2 && nA == 2 && nB == 2
-        return matmul2x2!(C, tA, tB, A, B, _add)
-    end
-    if mA == 3 && nA == 3 && nB == 3
-        return matmul3x3!(C, tA, tB, A, B, _add)
-    end
-
-    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-
-    # Make-sure reinterpret-based optimization is BLAS-compatible.
-    if (alpha isa Union{Bool,T} &&
-        beta isa Union{Bool,T} &&
-        stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
-        stride(A, 2) >= size(A, 1) &&
-        stride(B, 2) >= size(B, 1) &&
-        stride(C, 2) >= size(C, 1) && tA == 'N')
-        BLAS.gemm!(tA, tB, alpha, reinterpret(T, A), B, beta, reinterpret(T, C))
-        return C
-    end
-    _generic_matmatmul!(C, tA, tB, A, B, _add)
-end
-
-# blas.jl defines matmul for floats; other integer and mixed precision
-# cases are handled here
-
-lapack_size(t::AbstractChar, M::AbstractVecOrMat) = (size(M, t=='N' ? 1 : 2), size(M, t=='N' ? 2 : 1))
-
-function copyto!(B::AbstractVecOrMat, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
-    if tM == 'N'
-        copyto!(B, ir_dest, jr_dest, M, ir_src, jr_src)
-    else
-        LinearAlgebra.copy_transpose!(B, ir_dest, jr_dest, M, jr_src, ir_src)
-        tM == 'C' && conj!(@view B[ir_dest, jr_dest])
-    end
-    B
-end
-
-function copy_transpose!(B::AbstractMatrix, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
-    if tM == 'N'
-        LinearAlgebra.copy_transpose!(B, ir_dest, jr_dest, M, ir_src, jr_src)
-    else
-        copyto!(B, ir_dest, jr_dest, M, jr_src, ir_src)
-        tM == 'C' && conj!(@view B[ir_dest, jr_dest])
-    end
-    B
-end
-
-# TODO: It will be faster for large matrices to convert to float,
-# call BLAS, and convert back to required type.
-
-# NOTE: the generic version is also called as fallback for
-#       strides != 1 cases
-
-@inline function generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector,
-                                    _add::MulAddMul = MulAddMul())
-    Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA)
-    return _generic_matvecmul!(C, ta, Anew, B, _add)
-end
-
-function _generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector,
-                            _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, A, B)
-    @assert tA in ('N', 'T', 'C')
-    mB = length(B)
-    mA, nA = lapack_size(tA, A)
-    if mB != nA
-        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA), vector B has length $mB"))
-    end
-    if mA != length(C)
-        throw(DimensionMismatch(lazy"result C has length $(length(C)), needs length $mA"))
-    end
-
-    Astride = size(A, 1)
-
-    @inbounds begin
-    if tA == 'T'  # fastest case
-        if nA == 0
-            for k = 1:mA
-                _modify!(_add, false, C, k)
-            end
-        else
-            for k = 1:mA
-                aoffs = (k-1)*Astride
-                s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
-                for i = 1:nA
-                    s += transpose(A[aoffs+i]) * B[i]
-                end
-                _modify!(_add, s, C, k)
-            end
-        end
-    elseif tA == 'C'
-        if nA == 0
-            for k = 1:mA
-                _modify!(_add, false, C, k)
-            end
-        else
-            for k = 1:mA
-                aoffs = (k-1)*Astride
-                s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
-                for i = 1:nA
-                    s += A[aoffs + i]'B[i]
-                end
-                _modify!(_add, s, C, k)
-            end
-        end
-    else # tA == 'N'
-        for i = 1:mA
-            if !iszero(_add.beta)
-                C[i] *= _add.beta
-            elseif mB == 0
-                C[i] = false
-            else
-                C[i] = zero(A[i]*B[1] + A[i]*B[1])
-            end
-        end
-        for k = 1:mB
-            aoffs = (k-1)*Astride
-            b = _add(B[k])
-            for i = 1:mA
-                C[i] += A[aoffs + i] * b
-            end
-        end
-    end
-    end # @inbounds
-    C
-end
-
-function generic_matmatmul(tA, tB, A::AbstractVecOrMat{T}, B::AbstractMatrix{S}) where {T,S}
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    C = similar(B, promote_op(matprod, T, S), mA, nB)
-    generic_matmatmul!(C, tA, tB, A, B)
-end
-
-const tilebufsize = 10800  # Approximately 32k/3
-
-function generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul)
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    mC, nC = size(C)
-
-    if iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-    if mA == nA == mB == nB == mC == nC == 2
-        return matmul2x2!(C, tA, tB, A, B, _add)
-    end
-    if mA == nA == mB == nB == mC == nC == 3
-        return matmul3x3!(C, tA, tB, A, B, _add)
-    end
-    A, tA = tA in ('H', 'h', 'S', 's') ? (wrap(A, tA), 'N') : (A, tA)
-    B, tB = tB in ('H', 'h', 'S', 's') ? (wrap(B, tB), 'N') : (B, tB)
-    _generic_matmatmul!(C, tA, tB, A, B, _add)
-end
-
-function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S},
-                             _add::MulAddMul) where {T,S,R}
-    @assert tA in ('N', 'T', 'C') && tB in ('N', 'T', 'C')
-    require_one_based_indexing(C, A, B)
-
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    if mB != nA
-        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA), matrix B has dimensions ($mB,$nB)"))
-    end
-    if size(C,1) != mA || size(C,2) != nB
-        throw(DimensionMismatch(lazy"result C has dimensions $(size(C)), needs ($mA,$nB)"))
-    end
-
-    if iszero(_add.alpha) || isempty(A) || isempty(B)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-
-    tile_size = 0
-    if isbitstype(R) && isbitstype(T) && isbitstype(S) && (tA == 'N' || tB != 'N')
-        tile_size = floor(Int, sqrt(tilebufsize / max(sizeof(R), sizeof(S), sizeof(T), 1)))
-    end
-    @inbounds begin
-    if tile_size > 0
-        sz = (tile_size, tile_size)
-        Atile = Array{T}(undef, sz)
-        Btile = Array{S}(undef, sz)
-
-        z1 = zero(A[1, 1]*B[1, 1] + A[1, 1]*B[1, 1])
-        z = convert(promote_type(typeof(z1), R), z1)
-
-        if mA < tile_size && nA < tile_size && nB < tile_size
-            copy_transpose!(Atile, 1:nA, 1:mA, tA, A, 1:mA, 1:nA)
-            copyto!(Btile, 1:mB, 1:nB, tB, B, 1:mB, 1:nB)
-            for j = 1:nB
-                boff = (j-1)*tile_size
-                for i = 1:mA
-                    aoff = (i-1)*tile_size
-                    s = z
-                    for k = 1:nA
-                        s += Atile[aoff+k] * Btile[boff+k]
-                    end
-                    _modify!(_add, s, C, (i,j))
-                end
-            end
-        else
-            Ctile = Array{R}(undef, sz)
-            for jb = 1:tile_size:nB
-                jlim = min(jb+tile_size-1,nB)
-                jlen = jlim-jb+1
-                for ib = 1:tile_size:mA
-                    ilim = min(ib+tile_size-1,mA)
-                    ilen = ilim-ib+1
-                    fill!(Ctile, z)
-                    for kb = 1:tile_size:nA
-                        klim = min(kb+tile_size-1,mB)
-                        klen = klim-kb+1
-                        copy_transpose!(Atile, 1:klen, 1:ilen, tA, A, ib:ilim, kb:klim)
-                        copyto!(Btile, 1:klen, 1:jlen, tB, B, kb:klim, jb:jlim)
-                        for j=1:jlen
-                            bcoff = (j-1)*tile_size
-                            for i = 1:ilen
-                                aoff = (i-1)*tile_size
-                                s = z
-                                for k = 1:klen
-                                    s += Atile[aoff+k] * Btile[bcoff+k]
-                                end
-                                Ctile[bcoff+i] += s
-                            end
-                        end
-                    end
-                    if isone(_add.alpha) && iszero(_add.beta)
-                        copyto!(C, ib:ilim, jb:jlim, Ctile, 1:ilen, 1:jlen)
-                    else
-                        C[ib:ilim, jb:jlim] .= @views _add.(Ctile[1:ilen, 1:jlen], C[ib:ilim, jb:jlim])
-                    end
-                end
-            end
-        end
-    else
-        # Multiplication for non-plain-data uses the naive algorithm
-        if tA == 'N'
-            if tB == 'N'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[i, 1]*B[1, j] + A[i, 1]*B[1, j])
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[i, k]*B[k, j]
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            elseif tB == 'T'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[i, 1]*transpose(B[j, 1]) + A[i, 1]*transpose(B[j, 1]))
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[i, k] * transpose(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            else
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[i, 1]*B[j, 1]' + A[i, 1]*B[j, 1]')
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[i, k]*B[j, k]'
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            end
-        elseif tA == 'T'
-            if tB == 'N'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(transpose(A[1, i])*B[1, j] + transpose(A[1, i])*B[1, j])
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += transpose(A[k, i]) * B[k, j]
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            elseif tB == 'T'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(transpose(A[1, i])*transpose(B[j, 1]) + transpose(A[1, i])*transpose(B[j, 1]))
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += transpose(A[k, i]) * transpose(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            else
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(transpose(A[1, i])*B[j, 1]' + transpose(A[1, i])*B[j, 1]')
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += transpose(A[k, i]) * adjoint(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            end
-        else
-            if tB == 'N'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[1, i]'*B[1, j] + A[1, i]'*B[1, j])
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[k, i]'B[k, j]
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            elseif tB == 'T'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[1, i]'*transpose(B[j, 1]) + A[1, i]'*transpose(B[j, 1]))
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += adjoint(A[k, i]) * transpose(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            else
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[1, i]'*B[j, 1]' + A[1, i]'*B[j, 1]')
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[k, i]'B[j, k]'
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            end
-        end
-    end
-    end # @inbounds
-    C
-end
-
-
-# multiply 2x2 matrices
-function matmul2x2(tA, tB, A::AbstractMatrix{T}, B::AbstractMatrix{S}) where {T,S}
-    matmul2x2!(similar(B, promote_op(matprod, T, S), 2, 2), tA, tB, A, B)
-end
-
-function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
-                    _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, A, B)
-    if !(size(A) == size(B) == size(C) == (2,2))
-        throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
-    end
-    @inbounds begin
-    if tA == 'N'
-        A11 = A[1,1]; A12 = A[1,2]; A21 = A[2,1]; A22 = A[2,2]
-    elseif tA == 'T'
-        # TODO making these lazy could improve perf
-        A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1]))
-        A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2]))
-    elseif tA == 'C'
-        # TODO making these lazy could improve perf
-        A11 = copy(A[1,1]'); A12 = copy(A[2,1]')
-        A21 = copy(A[1,2]'); A22 = copy(A[2,2]')
-    elseif tA == 'S'
-        A11 = symmetric(A[1,1], :U); A12 = A[1,2]
-        A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U)
-    elseif tA == 's'
-        A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1]))
-        A21 = A[2,1]; A22 = symmetric(A[2,2], :L)
-    elseif tA == 'H'
-        A11 = hermitian(A[1,1], :U); A12 = A[1,2]
-        A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U)
-    else # if tA == 'h'
-        A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1]))
-        A21 = A[2,1]; A22 = hermitian(A[2,2], :L)
-    end
-    if tB == 'N'
-        B11 = B[1,1]; B12 = B[1,2];
-        B21 = B[2,1]; B22 = B[2,2]
-    elseif tB == 'T'
-        # TODO making these lazy could improve perf
-        B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1]))
-        B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2]))
-    elseif tB == 'C'
-        # TODO making these lazy could improve perf
-        B11 = copy(B[1,1]'); B12 = copy(B[2,1]')
-        B21 = copy(B[1,2]'); B22 = copy(B[2,2]')
-    elseif tB == 'S'
-        B11 = symmetric(B[1,1], :U); B12 = B[1,2]
-        B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U)
-    elseif tB == 's'
-        B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1]))
-        B21 = B[2,1]; B22 = symmetric(B[2,2], :L)
-    elseif tB == 'H'
-        B11 = hermitian(B[1,1], :U); B12 = B[1,2]
-        B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U)
-    else # if tB == 'h'
-        B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1]))
-        B21 = B[2,1]; B22 = hermitian(B[2,2], :L)
-    end
-    _modify!(_add, A11*B11 + A12*B21, C, (1,1))
-    _modify!(_add, A11*B12 + A12*B22, C, (1,2))
-    _modify!(_add, A21*B11 + A22*B21, C, (2,1))
-    _modify!(_add, A21*B12 + A22*B22, C, (2,2))
-    end # inbounds
-    C
-end
-
-# Multiply 3x3 matrices
-function matmul3x3(tA, tB, A::AbstractMatrix{T}, B::AbstractMatrix{S}) where {T,S}
-    matmul3x3!(similar(B, promote_op(matprod, T, S), 3, 3), tA, tB, A, B)
-end
-
-function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
-                    _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, A, B)
-    if !(size(A) == size(B) == size(C) == (3,3))
-        throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
-    end
-    @inbounds begin
-    if tA == 'N'
-        A11 = A[1,1]; A12 = A[1,2]; A13 = A[1,3]
-        A21 = A[2,1]; A22 = A[2,2]; A23 = A[2,3]
-        A31 = A[3,1]; A32 = A[3,2]; A33 = A[3,3]
-    elseif tA == 'T'
-        # TODO making these lazy could improve perf
-        A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1]))
-        A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2])); A23 = copy(transpose(A[3,2]))
-        A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = copy(transpose(A[3,3]))
-    elseif tA == 'C'
-        # TODO making these lazy could improve perf
-        A11 = copy(A[1,1]'); A12 = copy(A[2,1]'); A13 = copy(A[3,1]')
-        A21 = copy(A[1,2]'); A22 = copy(A[2,2]'); A23 = copy(A[3,2]')
-        A31 = copy(A[1,3]'); A32 = copy(A[2,3]'); A33 = copy(A[3,3]')
-    elseif tA == 'S'
-        A11 = symmetric(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
-        A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U); A23 = A[2,3]
-        A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = symmetric(A[3,3], :U)
-    elseif tA == 's'
-        A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1]))
-        A21 = A[2,1]; A22 = symmetric(A[2,2], :L); A23 = copy(transpose(A[3,2]))
-        A31 = A[3,1]; A32 = A[3,2]; A33 = symmetric(A[3,3], :L)
-    elseif tA == 'H'
-        A11 = hermitian(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
-        A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U); A23 = A[2,3]
-        A31 = copy(adjoint(A[1,3])); A32 = copy(adjoint(A[2,3])); A33 = hermitian(A[3,3], :U)
-    else # if tA == 'h'
-        A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1])); A13 = copy(adjoint(A[3,1]))
-        A21 = A[2,1]; A22 = hermitian(A[2,2], :L); A23 = copy(adjoint(A[3,2]))
-        A31 = A[3,1]; A32 = A[3,2]; A33 = hermitian(A[3,3], :L)
-    end
-
-    if tB == 'N'
-        B11 = B[1,1]; B12 = B[1,2]; B13 = B[1,3]
-        B21 = B[2,1]; B22 = B[2,2]; B23 = B[2,3]
-        B31 = B[3,1]; B32 = B[3,2]; B33 = B[3,3]
-    elseif tB == 'T'
-        # TODO making these lazy could improve perf
-        B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1]))
-        B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2])); B23 = copy(transpose(B[3,2]))
-        B31 = copy(transpose(B[1,3])); B32 = copy(transpose(B[2,3])); B33 = copy(transpose(B[3,3]))
-    elseif tB == 'C'
-        # TODO making these lazy could improve perf
-        B11 = copy(B[1,1]'); B12 = copy(B[2,1]'); B13 = copy(B[3,1]')
-        B21 = copy(B[1,2]'); B22 = copy(B[2,2]'); B23 = copy(B[3,2]')
-        B31 = copy(B[1,3]'); B32 = copy(B[2,3]'); B33 = copy(B[3,3]')
-    elseif tB == 'S'
-        B11 = symmetric(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3]
-        B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U); B23 = B[2,3]
-        B31 = copy(transpose(B[1,3])); B32 = copy(transpose(B[2,3])); B33 = symmetric(B[3,3], :U)
-    elseif tB == 's'
-        B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1]))
-        B21 = B[2,1]; B22 = symmetric(B[2,2], :L); B23 = copy(transpose(B[3,2]))
-        B31 = B[3,1]; B32 = B[3,2]; B33 = symmetric(B[3,3], :L)
-    elseif tB == 'H'
-        B11 = hermitian(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3]
-        B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U); B23 = B[2,3]
-        B31 = copy(adjoint(B[1,3])); B32 = copy(adjoint(B[2,3])); B33 = hermitian(B[3,3], :U)
-    else # if tB == 'h'
-        B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1])); B13 = copy(adjoint(B[3,1]))
-        B21 = B[2,1]; B22 = hermitian(B[2,2], :L); B23 = copy(adjoint(B[3,2]))
-        B31 = B[3,1]; B32 = B[3,2]; B33 = hermitian(B[3,3], :L)
-    end
-
-    _modify!(_add, A11*B11 + A12*B21 + A13*B31, C, (1,1))
-    _modify!(_add, A11*B12 + A12*B22 + A13*B32, C, (1,2))
-    _modify!(_add, A11*B13 + A12*B23 + A13*B33, C, (1,3))
-
-    _modify!(_add, A21*B11 + A22*B21 + A23*B31, C, (2,1))
-    _modify!(_add, A21*B12 + A22*B22 + A23*B32, C, (2,2))
-    _modify!(_add, A21*B13 + A22*B23 + A23*B33, C, (2,3))
-
-    _modify!(_add, A31*B11 + A32*B21 + A33*B31, C, (3,1))
-    _modify!(_add, A31*B12 + A32*B22 + A33*B32, C, (3,2))
-    _modify!(_add, A31*B13 + A32*B23 + A33*B33, C, (3,3))
-    end # inbounds
-    C
-end
-
-const RealOrComplex = Union{Real,Complex}
-
-# Three-argument *
-"""
-    *(A, B::AbstractMatrix, C)
-    A * B * C * D
-
-Chained multiplication of 3 or 4 matrices is done in the most efficient sequence,
-based on the sizes of the arrays. That is, the number of scalar multiplications needed
-for `(A * B) * C` (with 3 dense matrices) is compared to that for `A * (B * C)`
-to choose which of these to execute.
-
-If the last factor is a vector, or the first a transposed vector, then it is efficient
-to deal with these first. In particular `x' * B * y` means `(x' * B) * y`
-for an ordinary column-major `B::Matrix`. Unlike `dot(x, B, y)`, this
-allocates an intermediate array.
-
-If the first or last factor is a number, this will be fused with the matrix
-multiplication, using 5-arg [`mul!`](@ref).
-
-See also [`muladd`](@ref), [`dot`](@ref).
-
-!!! compat "Julia 1.7"
-    These optimisations require at least Julia 1.7.
-"""
-*(A::AbstractMatrix, B::AbstractMatrix, x::AbstractVector) = A * (B*x)
-
-*(tu::AdjOrTransAbsVec, B::AbstractMatrix, v::AbstractVector) = (tu*B) * v
-*(tu::AdjOrTransAbsVec, B::AdjOrTransAbsMat, v::AbstractVector) = tu * (B*v)
-
-*(A::AbstractMatrix, x::AbstractVector, γ::Number) = mat_vec_scalar(A,x,γ)
-*(A::AbstractMatrix, B::AbstractMatrix, γ::Number) = mat_mat_scalar(A,B,γ)
-*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractVector{<:RealOrComplex}) =
-    mat_vec_scalar(B,C,α)
-*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}) =
-    mat_mat_scalar(B,C,α)
-
-*(α::Number, u::AbstractVector, tv::AdjOrTransAbsVec) = broadcast(*, α, u, tv)
-*(u::AbstractVector, tv::AdjOrTransAbsVec, γ::Number) = broadcast(*, u, tv, γ)
-*(u::AbstractVector, tv::AdjOrTransAbsVec, C::AbstractMatrix) = u * (tv*C)
-
-*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix) = _tri_matmul(A,B,C)
-*(tv::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix) = (tv*B) * C
-
-function _tri_matmul(A,B,C,δ=nothing)
-    n,m = size(A)
-    # m,k == size(B)
-    k,l = size(C)
-    costAB_C = n*m*k + n*k*l  # multiplications, allocations n*k + n*l
-    costA_BC = m*k*l + n*m*l  #                              m*l + n*l
-    if costA_BC < costAB_C
-        isnothing(δ) ? A * (B*C) : A * mat_mat_scalar(B,C,δ)
-    else
-        isnothing(δ) ? (A*B) * C : mat_mat_scalar(A*B, C, δ)
-    end
-end
-
-# Fast path for two arrays * one scalar is opt-in, via mat_vec_scalar and mat_mat_scalar.
-
-mat_vec_scalar(A, x, γ) = A * (x * γ)  # fallback
-mat_vec_scalar(A::StridedMaybeAdjOrTransMat, x::StridedVector, γ) = _mat_vec_scalar(A, x, γ)
-mat_vec_scalar(A::AdjOrTransAbsVec, x::StridedVector, γ) = (A * x) * γ
-
-function _mat_vec_scalar(A, x, γ)
-    T = promote_type(eltype(A), eltype(x), typeof(γ))
-    C = similar(A, T, axes(A,1))
-    mul!(C, A, x, γ, false)
-end
-
-mat_mat_scalar(A, B, γ) = (A*B) * γ # fallback
-mat_mat_scalar(A::StridedMaybeAdjOrTransMat, B::StridedMaybeAdjOrTransMat, γ) =
-    _mat_mat_scalar(A, B, γ)
-
-function _mat_mat_scalar(A, B, γ)
-    T = promote_type(eltype(A), eltype(B), typeof(γ))
-    C = similar(A, T, axes(A,1), axes(B,2))
-    mul!(C, A, B, γ, false)
-end
-
-mat_mat_scalar(A::AdjointAbsVec, B, γ) = (γ' * (A * B)')' # preserving order, adjoint reverses
-mat_mat_scalar(A::AdjointAbsVec{<:RealOrComplex}, B::StridedMaybeAdjOrTransMat{<:RealOrComplex}, γ::RealOrComplex) =
-    mat_vec_scalar(B', A', γ')'
-
-mat_mat_scalar(A::TransposeAbsVec, B, γ) = transpose(γ * transpose(A * B))
-mat_mat_scalar(A::TransposeAbsVec{<:RealOrComplex}, B::StridedMaybeAdjOrTransMat{<:RealOrComplex}, γ::RealOrComplex) =
-    transpose(mat_vec_scalar(transpose(B), transpose(A), γ))
-
-
-# Four-argument *, by type
-*(α::Number, β::Number, C::AbstractMatrix, x::AbstractVector) = (α*β) * C * x
-*(α::Number, β::Number, C::AbstractMatrix, D::AbstractMatrix) = (α*β) * C * D
-*(α::Number, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = α * B * (C*x)
-*(α::Number, vt::AdjOrTransAbsVec, C::AbstractMatrix, x::AbstractVector) = α * (vt*C*x)
-*(α::RealOrComplex, vt::AdjOrTransAbsVec{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}, D::AbstractMatrix{<:RealOrComplex}) =
-    (α*vt*C) * D # solves an ambiguity
-
-*(A::AbstractMatrix, x::AbstractVector, γ::Number, δ::Number) = A * x * (γ*δ)
-*(A::AbstractMatrix, B::AbstractMatrix, γ::Number, δ::Number) = A * B * (γ*δ)
-*(A::AbstractMatrix, B::AbstractMatrix, x::AbstractVector, δ::Number, ) = A * (B*x*δ)
-*(vt::AdjOrTransAbsVec, B::AbstractMatrix, x::AbstractVector, δ::Number) = (vt*B*x) * δ
-*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, δ::Number) = (vt*B) * C * δ
-
-*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = A * B * (C*x)
-*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, D::AbstractMatrix) = (vt*B) * C * D
-*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = vt * B * (C*x)
-
-# Four-argument *, by size
-*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, δ::Number) = _tri_matmul(A,B,C,δ)
-*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}, D::AbstractMatrix{<:RealOrComplex}) =
-    _tri_matmul(B,C,D,α)
-*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, D::AbstractMatrix) =
-    _quad_matmul(A,B,C,D)
-
-function _quad_matmul(A,B,C,D)
-    c1 = _mul_cost((A,B),(C,D))
-    c2 = _mul_cost(((A,B),C),D)
-    c3 = _mul_cost(A,(B,(C,D)))
-    c4 = _mul_cost((A,(B,C)),D)
-    c5 = _mul_cost(A,((B,C),D))
-    cmin = min(c1,c2,c3,c4,c5)
-    if c1 == cmin
-        (A*B) * (C*D)
-    elseif c2 == cmin
-        ((A*B) * C) * D
-    elseif c3 == cmin
-        A * (B * (C*D))
-    elseif c4 == cmin
-        (A * (B*C)) * D
-    else
-        A * ((B*C) * D)
-    end
-end
-@inline _mul_cost(A::AbstractMatrix) = 0
-@inline _mul_cost((A,B)::Tuple) = _mul_cost(A,B)
-@inline _mul_cost(A,B) = _mul_cost(A) + _mul_cost(B) + *(_mul_sizes(A)..., last(_mul_sizes(B)))
-@inline _mul_sizes(A::AbstractMatrix) = size(A)
-@inline _mul_sizes((A,B)::Tuple) = first(_mul_sizes(A)), last(_mul_sizes(B))
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
deleted file mode 100644
index fe40fec78e801..0000000000000
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ /dev/null
@@ -1,754 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# QR Factorization
-"""
-    QR <: Factorization
-
-A QR matrix factorization stored in a packed format, typically obtained from
-[`qr`](@ref). If ``A`` is an `m`×`n` matrix, then
-
-```math
-A = Q R
-```
-
-where ``Q`` is an orthogonal/unitary matrix and ``R`` is upper triangular.
-The matrix ``Q`` is stored as a sequence of Householder reflectors ``v_i``
-and coefficients ``\\tau_i`` where:
-
-```math
-Q = \\prod_{i=1}^{\\min(m,n)} (I - \\tau_i v_i v_i^T).
-```
-
-Iterating the decomposition produces the components `Q` and `R`.
-
-The object has two fields:
-
-* `factors` is an `m`×`n` matrix.
-
-  - The upper triangular part contains the elements of ``R``, that is `R =
-    triu(F.factors)` for a `QR` object `F`.
-
-  - The subdiagonal part contains the reflectors ``v_i`` stored in a packed format where
-    ``v_i`` is the ``i``th column of the matrix `V = I + tril(F.factors, -1)`.
-
-* `τ` is a vector  of length `min(m,n)` containing the coefficients ``\tau_i``.
-"""
-struct QR{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: Factorization{T}
-    factors::S
-    τ::C
-
-    function QR{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
-        require_one_based_indexing(factors)
-        new{T,S,C}(factors, τ)
-    end
-end
-QR(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
-    QR{T,typeof(factors),typeof(τ)}(factors, τ)
-QR{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
-    QR(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QR{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           QR{T,S,typeof(τ)}(factors, τ), false)
-
-# iteration for destructuring into components
-Base.iterate(S::QR) = (S.Q, Val(:R))
-Base.iterate(S::QR, ::Val{:R}) = (S.R, Val(:done))
-Base.iterate(S::QR, ::Val{:done}) = nothing
-
-# Note. For QRCompactWY factorization without pivoting, the WY representation based method introduced in LAPACK 3.4
-"""
-    QRCompactWY <: Factorization
-
-A QR matrix factorization stored in a compact blocked format, typically obtained from
-[`qr`](@ref). If ``A`` is an `m`×`n` matrix, then
-
-```math
-A = Q R
-```
-
-where ``Q`` is an orthogonal/unitary matrix and ``R`` is upper triangular. It is similar
-to the [`QR`](@ref) format except that the orthogonal/unitary matrix ``Q`` is stored in
-*Compact WY* format [^Schreiber1989].  For the block size ``n_b``, it is stored as
-a `m`×`n` lower trapezoidal matrix ``V`` and a matrix ``T = (T_1 \\; T_2 \\; ... \\;
-T_{b-1} \\; T_b')`` composed of ``b = \\lceil \\min(m,n) / n_b \\rceil`` upper triangular
-matrices ``T_j`` of size ``n_b``×``n_b`` (``j = 1, ..., b-1``) and an upper trapezoidal
-``n_b``×``\\min(m,n) - (b-1) n_b`` matrix ``T_b'`` (``j=b``) whose upper square part
-denoted with ``T_b`` satisfying
-
-```math
-Q = \\prod_{i=1}^{\\min(m,n)} (I - \\tau_i v_i v_i^T)
-= \\prod_{j=1}^{b} (I - V_j T_j V_j^T)
-```
-
-such that ``v_i`` is the ``i``th column of ``V``, ``\\tau_i`` is the ``i``th element
-of `[diag(T_1); diag(T_2); …; diag(T_b)]`, and ``(V_1 \\; V_2 \\; ... \\; V_b)``
-is the left `m`×`min(m, n)` block of ``V``.  When constructed using [`qr`](@ref),
-the block size is given by ``n_b = \\min(m, n, 36)``.
-
-Iterating the decomposition produces the components `Q` and `R`.
-
-The object has two fields:
-
-* `factors`, as in the [`QR`](@ref) type, is an `m`×`n` matrix.
-
-  - The upper triangular part contains the elements of ``R``, that is `R =
-    triu(F.factors)` for a `QR` object `F`.
-
-  - The subdiagonal part contains the reflectors ``v_i`` stored in a packed format such
-    that `V = I + tril(F.factors, -1)`.
-
-* `T` is a ``n_b``-by-``\\min(m,n)`` matrix as described above. The subdiagonal elements
-  for each triangular matrix ``T_j`` are ignored.
-
-!!! note
-
-    This format should not to be confused with the older *WY* representation
-    [^Bischof1987].
-
-
-[^Bischof1987]: C Bischof and C Van Loan, "The WY representation for products of Householder matrices", SIAM J Sci Stat Comput 8 (1987), s2-s13. [doi:10.1137/0908009](https://doi.org/10.1137/0908009)
-
-[^Schreiber1989]: R Schreiber and C Van Loan, "A storage-efficient WY representation for products of Householder transformations", SIAM J Sci Stat Comput 10 (1989), 53-57. [doi:10.1137/0910005](https://doi.org/10.1137/0910005)
-"""
-struct QRCompactWY{S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}} <: Factorization{S}
-    factors::M
-    T::C
-
-    function QRCompactWY{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
-        require_one_based_indexing(factors)
-        new{S,M,C}(factors, T)
-    end
-end
-QRCompactWY(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
-    QRCompactWY{S,typeof(factors),typeof(T)}(factors, T)
-QRCompactWY{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
-    QRCompactWY(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRCompactWY{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
-           QRCompactWY{S,M,typeof(T)}(factors, T), false)
-
-# iteration for destructuring into components
-Base.iterate(S::QRCompactWY) = (S.Q, Val(:R))
-Base.iterate(S::QRCompactWY, ::Val{:R}) = (S.R, Val(:done))
-Base.iterate(S::QRCompactWY, ::Val{:done}) = nothing
-
-# returns upper triangular views of all non-undef values of `qr(A).T`:
-#
-# julia> sparse(qr(A).T .== qr(A).T)
-# 36×100 SparseMatrixCSC{Bool, Int64} with 1767 stored entries:
-# ⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿
-# ⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿
-# ⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿
-# ⠀⠀⠀⠀⠀⠂⠛⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿
-# ⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⢀⠐⠙⢿⣿⣿⣿⣿
-# ⠀⠀⠐⠀⠀⠀⠀⠀⠀⢀⢙⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠁⠀⡀⠀⠙⢿⣿⣿
-# ⠀⠀⠐⠀⠀⠀⠀⠀⠀⠀⠄⠀⠙⢿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⡀⠀⠀⢀⠀⠀⠙⢿
-# ⠀⡀⠀⠀⠀⠀⠀⠀⠂⠒⠒⠀⠀⠀⠙⢿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⠀⠀⠀⠀⠀⠀⠀⢀⠀⠀⠀⡀⠀⠀
-# ⠀⠀⠀⠀⠀⠀⠀⠀⣈⡀⠀⠀⠀⠀⠀⠀⠙⢿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠂⠀⢀⠀
-#
-function _triuppers_qr(T)
-    blocksize, cols = size(T)
-    return Iterators.map(0:div(cols - 1, blocksize)) do i
-        n = min(blocksize, cols - i * blocksize)
-        return UpperTriangular(view(T, 1:n, (1:n) .+ i * blocksize))
-    end
-end
-
-function Base.hash(F::QRCompactWY, h::UInt)
-    return hash(F.factors, foldr(hash, _triuppers_qr(F.T); init=hash(QRCompactWY, h)))
-end
-function Base.:(==)(A::QRCompactWY, B::QRCompactWY)
-    return A.factors == B.factors && all(splat(==), zip(_triuppers_qr.((A.T, B.T))...))
-end
-function Base.isequal(A::QRCompactWY, B::QRCompactWY)
-    return isequal(A.factors, B.factors) && all(zip(_triuppers_qr.((A.T, B.T))...)) do (a, b)
-        isequal(a, b)::Bool
-    end
-end
-
-"""
-    QRPivoted <: Factorization
-
-A QR matrix factorization with column pivoting in a packed format, typically obtained from
-[`qr`](@ref). If ``A`` is an `m`×`n` matrix, then
-
-```math
-A P = Q R
-```
-
-where ``P`` is a permutation matrix, ``Q`` is an orthogonal/unitary matrix and ``R`` is
-upper triangular. The matrix ``Q`` is stored as a sequence of Householder reflectors:
-
-```math
-Q = \\prod_{i=1}^{\\min(m,n)} (I - \\tau_i v_i v_i^T).
-```
-
-Iterating the decomposition produces the components `Q`, `R`, and `p`.
-
-The object has three fields:
-
-* `factors` is an `m`×`n` matrix.
-
-  - The upper triangular part contains the elements of ``R``, that is `R =
-    triu(F.factors)` for a `QR` object `F`.
-
-  - The subdiagonal part contains the reflectors ``v_i`` stored in a packed format where
-    ``v_i`` is the ``i``th column of the matrix `V = I + tril(F.factors, -1)`.
-
-* `τ` is a vector of length `min(m,n)` containing the coefficients ``\tau_i``.
-
-* `jpvt` is an integer vector of length `n` corresponding to the permutation ``P``.
-"""
-struct QRPivoted{T,S<:AbstractMatrix{T},C<:AbstractVector{T},P<:AbstractVector{<:Integer}} <: Factorization{T}
-    factors::S
-    τ::C
-    jpvt::P
-
-    function QRPivoted{T,S,C,P}(factors, τ, jpvt) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T},P<:AbstractVector{<:Integer}}
-        require_one_based_indexing(factors, τ, jpvt)
-        new{T,S,C,P}(factors, τ, jpvt)
-    end
-end
-QRPivoted(factors::AbstractMatrix{T}, τ::AbstractVector{T},
-          jpvt::AbstractVector{<:Integer}) where {T} =
-    QRPivoted{T,typeof(factors),typeof(τ),typeof(jpvt)}(factors, τ, jpvt)
-QRPivoted{T}(factors::AbstractMatrix, τ::AbstractVector,
-             jpvt::AbstractVector{<:Integer}) where {T} =
-    QRPivoted(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ), jpvt)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRPivoted{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T},
-                          jpvt::AbstractVector{<:Integer}) where {T,S},
-           QRPivoted{T,S,typeof(τ),typeof(jpvt)}(factors, τ, jpvt), false)
-
-# iteration for destructuring into components
-Base.iterate(S::QRPivoted) = (S.Q, Val(:R))
-Base.iterate(S::QRPivoted, ::Val{:R}) = (S.R, Val(:p))
-Base.iterate(S::QRPivoted, ::Val{:p}) = (S.p, Val(:done))
-Base.iterate(S::QRPivoted, ::Val{:done}) = nothing
-
-function qrfactUnblocked!(A::AbstractMatrix{T}) where {T}
-    require_one_based_indexing(A)
-    m, n = size(A)
-    τ = zeros(T, min(m,n))
-    for k = 1:min(m - 1 + !(T<:Real), n)
-        x = view(A, k:m, k)
-        τk = reflector!(x)
-        τ[k] = τk
-        reflectorApply!(x, τk, view(A, k:m, k + 1:n))
-    end
-    QR(A, τ)
-end
-
-# Find index for columns with largest two norm
-function indmaxcolumn(A::AbstractMatrix)
-    mm = norm(view(A, :, 1))
-    ii = 1
-    for i = 2:size(A, 2)
-        mi = norm(view(A, :, i))
-        if abs(mi) > mm
-            mm = mi
-            ii = i
-        end
-    end
-    return ii
-end
-
-function qrfactPivotedUnblocked!(A::AbstractMatrix)
-    m, n = size(A)
-    piv = Vector(UnitRange{BlasInt}(1,n))
-    τ = Vector{eltype(A)}(undef, min(m,n))
-    for j = 1:min(m,n)
-
-        # Find column with maximum norm in trailing submatrix
-        jm = indmaxcolumn(view(A, j:m, j:n)) + j - 1
-
-        if jm != j
-            # Flip elements in pivoting vector
-            tmpp = piv[jm]
-            piv[jm] = piv[j]
-            piv[j] = tmpp
-
-            # Update matrix with
-            for i = 1:m
-                tmp = A[i,jm]
-                A[i,jm] = A[i,j]
-                A[i,j] = tmp
-            end
-        end
-
-        # Compute reflector of columns j
-        x = view(A, j:m, j)
-        τj = reflector!(x)
-        τ[j] = τj
-
-        # Update trailing submatrix with reflector
-        reflectorApply!(x, τj, view(A, j:m, j+1:n))
-    end
-    return QRPivoted{eltype(A), typeof(A), typeof(τ), typeof(piv)}(A, τ, piv)
-end
-
-# LAPACK version
-qr!(A::StridedMatrix{<:BlasFloat}, ::NoPivot; blocksize=36) =
-    QRCompactWY(LAPACK.geqrt!(A, min(min(size(A)...), blocksize))...)
-qr!(A::StridedMatrix{<:BlasFloat}, ::ColumnNorm) = QRPivoted(LAPACK.geqp3!(A)...)
-
-# Generic fallbacks
-
-"""
-    qr!(A, pivot = NoPivot(); blocksize)
-
-`qr!` is the same as [`qr`](@ref) when `A` is a subtype of [`AbstractMatrix`](@ref),
-but saves space by overwriting the input `A`, instead of creating a copy.
-An [`InexactError`](@ref) exception is thrown if the factorization produces a number not
-representable by the element type of `A`, e.g. for integer types.
-
-!!! compat "Julia 1.4"
-    The `blocksize` keyword argument requires Julia 1.4 or later.
-
-# Examples
-```jldoctest
-julia> a = [1. 2.; 3. 4.]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> qr!(a)
-LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
-Q factor: 2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
-R factor:
-2×2 Matrix{Float64}:
- -3.16228  -4.42719
-  0.0      -0.632456
-
-julia> a = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> qr!(a)
-ERROR: InexactError: Int64(3.1622776601683795)
-Stacktrace:
-[...]
-```
-"""
-qr!(A::AbstractMatrix, ::NoPivot) = qrfactUnblocked!(A)
-qr!(A::AbstractMatrix, ::ColumnNorm) = qrfactPivotedUnblocked!(A)
-qr!(A::AbstractMatrix) = qr!(A, NoPivot())
-# TODO: Remove in Julia v2.0
-@deprecate qr!(A::AbstractMatrix, ::Val{true})  qr!(A, ColumnNorm())
-@deprecate qr!(A::AbstractMatrix, ::Val{false}) qr!(A, NoPivot())
-
-_qreltype(::Type{T}) where T = typeof(zero(T)/sqrt(abs2(one(T))))
-
-"""
-    qr(A, pivot = NoPivot(); blocksize) -> F
-
-Compute the QR factorization of the matrix `A`: an orthogonal (or unitary if `A` is
-complex-valued) matrix `Q`, and an upper triangular matrix `R` such that
-
-```math
-A = Q R
-```
-
-The returned object `F` stores the factorization in a packed format:
-
- - if `pivot == ColumnNorm()` then `F` is a [`QRPivoted`](@ref) object,
-
- - otherwise if the element type of `A` is a BLAS type ([`Float32`](@ref), [`Float64`](@ref),
-   `ComplexF32` or `ComplexF64`), then `F` is a [`QRCompactWY`](@ref) object,
-
- - otherwise `F` is a [`QR`](@ref) object.
-
-The individual components of the decomposition `F` can be retrieved via property accessors:
-
- - `F.Q`: the orthogonal/unitary matrix `Q`
- - `F.R`: the upper triangular matrix `R`
- - `F.p`: the permutation vector of the pivot ([`QRPivoted`](@ref) only)
- - `F.P`: the permutation matrix of the pivot ([`QRPivoted`](@ref) only)
-
-Iterating the decomposition produces the components `Q`, `R`, and if extant `p`.
-
-The following functions are available for the `QR` objects: [`inv`](@ref), [`size`](@ref),
-and [`\\`](@ref). When `A` is rectangular, `\\` will return a least squares
-solution and if the solution is not unique, the one with smallest norm is returned. When
-`A` is not full rank, factorization with (column) pivoting is required to obtain a minimum
-norm solution.
-
-Multiplication with respect to either full/square or non-full/square `Q` is allowed, i.e. both `F.Q*F.R`
-and `F.Q*A` are supported. A `Q` matrix can be converted into a regular matrix with
-[`Matrix`](@ref). This operation returns the "thin" Q factor, i.e., if `A` is `m`×`n` with `m>=n`, then
-`Matrix(F.Q)` yields an `m`×`n` matrix with orthonormal columns.  To retrieve the "full" Q factor, an
-`m`×`m` orthogonal matrix, use `F.Q*I` or `collect(F.Q)`. If `m<=n`, then `Matrix(F.Q)` yields an `m`×`m`
-orthogonal matrix.
-
-The block size for QR decomposition can be specified by keyword argument
-`blocksize :: Integer` when `pivot == NoPivot()` and `A isa StridedMatrix{<:BlasFloat}`.
-It is ignored when `blocksize > minimum(size(A))`. See [`QRCompactWY`](@ref).
-
-!!! compat "Julia 1.4"
-    The `blocksize` keyword argument requires Julia 1.4 or later.
-
-# Examples
-```jldoctest
-julia> A = [3.0 -6.0; 4.0 -8.0; 0.0 1.0]
-3×2 Matrix{Float64}:
- 3.0  -6.0
- 4.0  -8.0
- 0.0   1.0
-
-julia> F = qr(A)
-LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
-Q factor: 3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
-R factor:
-2×2 Matrix{Float64}:
- -5.0  10.0
-  0.0  -1.0
-
-julia> F.Q * F.R == A
-true
-```
-
-!!! note
-    `qr` returns multiple types because LAPACK uses several representations
-    that minimize the memory storage requirements of products of Householder
-    elementary reflectors, so that the `Q` and `R` matrices can be stored
-    compactly rather as two separate dense matrices.
-"""
-function qr(A::AbstractMatrix{T}, arg...; kwargs...) where T
-    require_one_based_indexing(A)
-    AA = copy_similar(A, _qreltype(T))
-    return qr!(AA, arg...; kwargs...)
-end
-# TODO: remove in Julia v2.0
-@deprecate qr(A::AbstractMatrix, ::Val{false}; kwargs...) qr(A, NoPivot(); kwargs...)
-@deprecate qr(A::AbstractMatrix, ::Val{true}; kwargs...)  qr(A, ColumnNorm(); kwargs...)
-
-qr(x::Number) = qr(fill(x,1,1))
-function qr(v::AbstractVector)
-    require_one_based_indexing(v)
-    qr(reshape(v, (length(v), 1)))
-end
-
-# Conversions
-QR{T}(A::QR) where {T} = QR(convert(AbstractMatrix{T}, A.factors), convert(Vector{T}, A.τ))
-Factorization{T}(A::QR{T}) where {T} = A
-Factorization{T}(A::QR) where {T} = QR{T}(A)
-QRCompactWY{T}(A::QRCompactWY) where {T} = QRCompactWY(convert(AbstractMatrix{T}, A.factors), convert(AbstractMatrix{T}, A.T))
-Factorization{T}(A::QRCompactWY{T}) where {T} = A
-Factorization{T}(A::QRCompactWY) where {T} = QRCompactWY{T}(A)
-AbstractMatrix(F::Union{QR,QRCompactWY}) = F.Q * F.R
-AbstractArray(F::Union{QR,QRCompactWY}) = AbstractMatrix(F)
-Matrix(F::Union{QR,QRCompactWY}) = Array(AbstractArray(F))
-Array(F::Union{QR,QRCompactWY}) = Matrix(F)
-QRPivoted{T}(A::QRPivoted) where {T} = QRPivoted(convert(AbstractMatrix{T}, A.factors), convert(Vector{T}, A.τ), A.jpvt)
-Factorization{T}(A::QRPivoted{T}) where {T} = A
-Factorization{T}(A::QRPivoted) where {T} = QRPivoted{T}(A)
-AbstractMatrix(F::QRPivoted) = (F.Q * F.R)[:,invperm(F.p)]
-AbstractArray(F::QRPivoted) = AbstractMatrix(F)
-Matrix(F::QRPivoted) = Array(AbstractArray(F))
-Array(F::QRPivoted) = Matrix(F)
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{QR, QRCompactWY, QRPivoted})
-    summary(io, F); println(io)
-    print(io, "Q factor: ")
-    show(io, mime, F.Q)
-    println(io, "\nR factor:")
-    show(io, mime, F.R)
-    if F isa QRPivoted
-        println(io, "\npermutation:")
-        show(io, mime, F.p)
-    end
-end
-
-function getproperty(F::QR, d::Symbol)
-    m, n = size(F)
-    if d === :R
-        return triu!(getfield(F, :factors)[1:min(m,n), 1:n])
-    elseif d === :Q
-        return QRPackedQ(getfield(F, :factors), F.τ)
-    else
-        getfield(F, d)
-    end
-end
-function getproperty(F::QRCompactWY, d::Symbol)
-    m, n = size(F)
-    if d === :R
-        return triu!(getfield(F, :factors)[1:min(m,n), 1:n])
-    elseif d === :Q
-        return QRCompactWYQ(getfield(F, :factors), F.T)
-    else
-        getfield(F, d)
-    end
-end
-Base.propertynames(F::Union{QR,QRCompactWY}, private::Bool=false) =
-    (:R, :Q, (private ? fieldnames(typeof(F)) : ())...)
-
-function getproperty(F::QRPivoted{T}, d::Symbol) where T
-    m, n = size(F)
-    if d === :R
-        return triu!(getfield(F, :factors)[1:min(m,n), 1:n])
-    elseif d === :Q
-        return QRPackedQ(getfield(F, :factors), F.τ)
-    elseif d === :p
-        return getfield(F, :jpvt)
-    elseif d === :P
-        p = F.p
-        n = length(p)
-        P = zeros(T, n, n)
-        for i in 1:n
-            P[p[i],i] = one(T)
-        end
-        return P
-    else
-        getfield(F, d)
-    end
-end
-Base.propertynames(F::QRPivoted, private::Bool=false) =
-    (:R, :Q, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
-
-transpose(F::Union{QR{<:Real},QRPivoted{<:Real},QRCompactWY{<:Real}}) = F'
-transpose(::Union{QR,QRPivoted,QRCompactWY}) =
-    throw(ArgumentError("transpose of QR decomposition is not supported, consider using adjoint"))
-
-size(F::Union{QR,QRCompactWY,QRPivoted}) = size(getfield(F, :factors))
-size(F::Union{QR,QRCompactWY,QRPivoted}, dim::Integer) = size(getfield(F, :factors), dim)
-
-
-function ldiv!(A::QRCompactWY{T}, b::AbstractVector{T}) where {T}
-    require_one_based_indexing(b)
-    m, n = size(A)
-    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), b), 1:size(A, 2)))
-    return b
-end
-function ldiv!(A::QRCompactWY{T}, B::AbstractMatrix{T}) where {T}
-    require_one_based_indexing(B)
-    m, n = size(A)
-    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), B), 1:size(A, 2), 1:size(B, 2)))
-    return B
-end
-
-# Julia implementation similar to xgelsy
-function ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractMatrix{T}, rcond::Real) where {T<:BlasFloat}
-    require_one_based_indexing(B)
-    m, n = size(A)
-
-    if m > size(B, 1) || n > size(B, 1)
-        throw(DimensionMismatch("B has leading dimension $(size(B, 1)) but needs at least $(max(m, n))"))
-    end
-
-    if length(A.factors) == 0 || length(B) == 0
-        return B, 0
-    end
-
-    @inbounds begin
-        smin = smax = abs(A.factors[1])
-
-        if smax == 0
-            return fill!(B, 0), 0
-        end
-
-        mn = min(m, n)
-
-        # allocate temporary work space
-        tmp  = Vector{T}(undef, 2mn)
-        wmin = view(tmp, 1:mn)
-        wmax = view(tmp, mn+1:2mn)
-
-        rnk = 1
-        wmin[1] = 1
-        wmax[1] = 1
-
-        while rnk < mn
-            i = rnk + 1
-
-            smin, s1, c1 = LAPACK.laic1!(2, view(wmin, 1:rnk), smin, view(A.factors, 1:rnk, i), A.factors[i,i])
-            smax, s2, c2 = LAPACK.laic1!(1, view(wmax, 1:rnk), smax, view(A.factors, 1:rnk, i), A.factors[i,i])
-
-            if smax*rcond > smin
-                break
-            end
-
-            for j in 1:rnk
-                wmin[j] *= s1
-                wmax[j] *= s2
-            end
-            wmin[i] = c1
-            wmax[i] = c2
-
-            rnk += 1
-        end
-
-        if rnk < n
-            C, τ = LAPACK.tzrzf!(A.factors[1:rnk, :])
-            work = vec(C)
-        else
-            C, τ = A.factors, A.τ
-            work = resize!(tmp, n)
-        end
-
-        lmul!(adjoint(A.Q), view(B, 1:m, :))
-        ldiv!(UpperTriangular(view(C, 1:rnk, 1:rnk)), view(B, 1:rnk, :))
-
-        if rnk < n
-            B[rnk+1:n,:] .= zero(T)
-            LAPACK.ormrz!('L', T <: Complex ? 'C' : 'T', C, τ, view(B, 1:n, :))
-        end
-
-        for j in axes(B, 2)
-            for i in 1:n
-                work[A.p[i]] = B[i,j]
-            end
-            for i in 1:n
-                B[i,j] = work[i]
-            end
-        end
-    end
-
-    return B, rnk
-end
-
-ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractVector{T}) where {T<:BlasFloat} =
-    vec(ldiv!(A, reshape(B, length(B), 1)))
-ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
-    ldiv!(A, B, min(size(A)...)*eps(real(T)))[1]
-
-function _wide_qr_ldiv!(A::QR{T}, B::AbstractMatrix{T}) where T
-    m, n = size(A)
-    minmn = min(m,n)
-    mB, nB = size(B)
-    lmul!(adjoint(A.Q), view(B, 1:m, :))
-    R = A.R # makes a copy, used as a buffer below
-    @inbounds begin
-        if n > m # minimum norm solution
-            τ = zeros(T,m)
-            for k = m:-1:1 # Trapezoid to triangular by elementary operation
-                x = view(R, k, [k; m + 1:n])
-                τk = reflector!(x)
-                τ[k] = conj(τk)
-                for i = 1:k - 1
-                    vRi = R[i,k]
-                    for j = m + 1:n
-                        vRi += R[i,j]*x[j - m + 1]'
-                    end
-                    vRi *= τk
-                    R[i,k] -= vRi
-                    for j = m + 1:n
-                        R[i,j] -= vRi*x[j - m + 1]
-                    end
-                end
-            end
-        end
-        ldiv!(UpperTriangular(view(R, :, 1:minmn)), view(B, 1:minmn, :))
-        if n > m # Apply elementary transformation to solution
-            B[m + 1:mB,1:nB] .= zero(T)
-            for j = 1:nB
-                for k = 1:m
-                    vBj = B[k,j]'
-                    for i = m + 1:n
-                        vBj += B[i,j]'*R[k,i]'
-                    end
-                    vBj *= τ[k]
-                    B[k,j] -= vBj'
-                    for i = m + 1:n
-                        B[i,j] -= R[k,i]'*vBj'
-                    end
-                end
-            end
-        end
-    end
-    return B
-end
-
-
-function ldiv!(A::QR{T}, B::AbstractMatrix{T}) where T
-    m, n = size(A)
-    m < n && return _wide_qr_ldiv!(A, B)
-
-    lmul!(adjoint(A.Q), view(B, 1:m, :))
-    R = A.factors
-    ldiv!(UpperTriangular(view(R,1:n,:)), view(B, 1:n, :))
-    return B
-end
-function ldiv!(A::QR, B::AbstractVector)
-    ldiv!(A, reshape(B, length(B), 1))
-    return B
-end
-
-function ldiv!(A::QRPivoted, b::AbstractVector)
-    ldiv!(QR(A.factors,A.τ), b)
-    b[1:size(A.factors, 2)] = view(b, 1:size(A.factors, 2))[invperm(A.jpvt)]
-    b
-end
-function ldiv!(A::QRPivoted, B::AbstractMatrix)
-    ldiv!(QR(A.factors, A.τ), B)
-    B[1:size(A.factors, 2),:] = view(B, 1:size(A.factors, 2), :)[invperm(A.jpvt),:]
-    B
-end
-
-function _apply_permutation!(F::QRPivoted, B::AbstractVecOrMat)
-    # Apply permutation but only to the top part of the solution vector since
-    # it's padded with zeros for underdetermined problems
-    B[1:length(F.p), :] = B[F.p, :]
-    return B
-end
-_apply_permutation!(::Factorization, B::AbstractVecOrMat) = B
-
-function ldiv!(Fadj::AdjointFactorization{<:Any,<:Union{QR,QRCompactWY,QRPivoted}}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    m, n = size(Fadj)
-
-    # We don't allow solutions overdetermined systems
-    if m > n
-        throw(DimensionMismatch("overdetermined systems are not supported"))
-    end
-    if n != size(B, 1)
-        throw(DimensionMismatch("inputs should have the same number of rows"))
-    end
-    F = parent(Fadj)
-
-    B = _apply_permutation!(F, B)
-
-    # For underdetermined system, the triangular solve should only be applied to the top
-    # part of B that contains the rhs. For square problems, the view corresponds to B itself
-    ldiv!(LowerTriangular(adjoint(F.R)), view(B, 1:size(F.R, 2), :))
-    lmul!(F.Q, B)
-
-    return B
-end
-
-# With a real lhs and complex rhs with the same precision, we can reinterpret the complex
-# rhs as a real rhs with twice the number of columns.
-
-# convenience methods to compute the return size correctly for vectors and matrices
-_ret_size(A::Factorization, b::AbstractVector) = (max(size(A, 2), length(b)),)
-_ret_size(A::Factorization, B::AbstractMatrix) = (max(size(A, 2), size(B, 1)), size(B, 2))
-
-function (\)(A::Union{QR{T},QRCompactWY{T},QRPivoted{T}}, BIn::VecOrMat{Complex{T}}) where T<:BlasReal
-    require_one_based_indexing(BIn)
-    m, n = size(A)
-    m == size(BIn, 1) || throw(DimensionMismatch("left hand side has $m rows, but right hand side has $(size(BIn,1)) rows"))
-
-# |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
-# |z2|z4|      ->       |y1|y2|y3|y4|     ->      |x2|y2|     ->    |x2|y2|x4|y4|
-#                                                 |x3|y3|
-#                                                 |x4|y4|
-    B = reshape(copy(transpose(reinterpret(T, reshape(BIn, (1, length(BIn)))))), size(BIn, 1), 2*size(BIn, 2))
-
-    X = _zeros(T, B, n)
-    X[1:size(B, 1), :] = B
-
-    ldiv!(A, X)
-
-# |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
-# |z2|z4|      <-       |y1|y2|y3|y4|     <-      |x2|y2|     <-    |x2|y2|x4|y4|
-#                                                 |x3|y3|
-#                                                 |x4|y4|
-    XX = reshape(collect(reinterpret(Complex{T}, copy(transpose(reshape(X, div(length(X), 2), 2))))), _ret_size(A, BIn))
-    return _cut_B(XX, 1:n)
-end
-
-##TODO:  Add methods for rank(A::QRP{T}) and adjust the (\) method accordingly
-##       Add rcond methods for Cholesky, LU, QR and QRP types
-## Lower priority: Add LQ, QL and RQ factorizations
-
-# FIXME! Should add balancing option through xgebal
diff --git a/stdlib/LinearAlgebra/src/schur.jl b/stdlib/LinearAlgebra/src/schur.jl
deleted file mode 100644
index 7257544ff872e..0000000000000
--- a/stdlib/LinearAlgebra/src/schur.jl
+++ /dev/null
@@ -1,449 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Schur decomposition
-"""
-    Schur <: Factorization
-
-Matrix factorization type of the Schur factorization of a matrix `A`. This is the
-return type of [`schur(_)`](@ref), the corresponding matrix factorization function.
-
-If `F::Schur` is the factorization object, the (quasi) triangular Schur factor can
-be obtained via either `F.Schur` or `F.T` and the orthogonal/unitary Schur vectors
-via `F.vectors` or `F.Z` such that `A = F.vectors * F.Schur * F.vectors'`. The
-eigenvalues of `A` can be obtained with `F.values`.
-
-Iterating the decomposition produces the components `F.T`, `F.Z`, and `F.values`.
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> F = schur(A)
-Schur{Float64, Matrix{Float64}, Vector{Float64}}
-T factor:
-2×2 Matrix{Float64}:
- 3.0   9.0
- 0.0  -2.0
-Z factor:
-2×2 Matrix{Float64}:
-  0.961524  0.274721
- -0.274721  0.961524
-eigenvalues:
-2-element Vector{Float64}:
-  3.0
- -2.0
-
-julia> F.vectors * F.Schur * F.vectors'
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> t, z, vals = F; # destructuring via iteration
-
-julia> t == F.T && z == F.Z && vals == F.values
-true
-```
-"""
-struct Schur{Ty,S<:AbstractMatrix,C<:AbstractVector} <: Factorization{Ty}
-    T::S
-    Z::S
-    values::C
-    Schur{Ty,S,C}(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty},
-                  values::AbstractVector) where {Ty,S,C} = new(T, Z, values)
-end
-Schur(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}, values::AbstractVector) where {Ty} =
-    Schur{Ty, typeof(T), typeof(values)}(T, Z, values)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(Schur{Ty,S}(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty},
-                       values::AbstractVector) where {Ty,S},
-           Schur{Ty,S,typeof(values)}(T, Z, values))
-
-# iteration for destructuring into components
-Base.iterate(S::Schur) = (S.T, Val(:Z))
-Base.iterate(S::Schur, ::Val{:Z}) = (S.Z, Val(:values))
-Base.iterate(S::Schur, ::Val{:values}) = (S.values, Val(:done))
-Base.iterate(S::Schur, ::Val{:done}) = nothing
-
-"""
-    schur!(A) -> F::Schur
-
-Same as [`schur`](@ref) but uses the input argument `A` as workspace.
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> F = schur!(A)
-Schur{Float64, Matrix{Float64}, Vector{Float64}}
-T factor:
-2×2 Matrix{Float64}:
- 3.0   9.0
- 0.0  -2.0
-Z factor:
-2×2 Matrix{Float64}:
-  0.961524  0.274721
- -0.274721  0.961524
-eigenvalues:
-2-element Vector{Float64}:
-  3.0
- -2.0
-
-julia> A
-2×2 Matrix{Float64}:
- 3.0   9.0
- 0.0  -2.0
-```
-"""
-schur!(A::StridedMatrix{<:BlasFloat}) = Schur(LinearAlgebra.LAPACK.gees!('V', A)...)
-
-schur!(A::UpperHessenberg{T}) where {T<:BlasFloat} = Schur(LinearAlgebra.LAPACK.hseqr!(parent(A))...)
-
-"""
-    schur(A) -> F::Schur
-
-Computes the Schur factorization of the matrix `A`. The (quasi) triangular Schur factor can
-be obtained from the `Schur` object `F` with either `F.Schur` or `F.T` and the
-orthogonal/unitary Schur vectors can be obtained with `F.vectors` or `F.Z` such that
-`A = F.vectors * F.Schur * F.vectors'`. The eigenvalues of `A` can be obtained with `F.values`.
-
-For real `A`, the Schur factorization is "quasitriangular", which means that it
-is upper-triangular except with 2×2 diagonal blocks for any conjugate pair
-of complex eigenvalues; this allows the factorization to be purely real even
-when there are complex eigenvalues.  To obtain the (complex) purely upper-triangular
-Schur factorization from a real quasitriangular factorization, you can use
-`Schur{Complex}(schur(A))`.
-
-Iterating the decomposition produces the components `F.T`, `F.Z`, and `F.values`.
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> F = schur(A)
-Schur{Float64, Matrix{Float64}, Vector{Float64}}
-T factor:
-2×2 Matrix{Float64}:
- 3.0   9.0
- 0.0  -2.0
-Z factor:
-2×2 Matrix{Float64}:
-  0.961524  0.274721
- -0.274721  0.961524
-eigenvalues:
-2-element Vector{Float64}:
-  3.0
- -2.0
-
-julia> F.vectors * F.Schur * F.vectors'
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> t, z, vals = F; # destructuring via iteration
-
-julia> t == F.T && z == F.Z && vals == F.values
-true
-```
-"""
-schur(A::AbstractMatrix{T}) where {T} = schur!(copy_similar(A, eigtype(T)))
-schur(A::UpperHessenberg{T}) where {T} = schur!(copy_similar(A, eigtype(T)))
-function schur(A::RealHermSymComplexHerm)
-    F = eigen(A; sortby=nothing)
-    return Schur(typeof(F.vectors)(Diagonal(F.values)), F.vectors, F.values)
-end
-function schur(A::Union{UnitUpperTriangular{T},UpperTriangular{T}}) where {T}
-    t = eigtype(T)
-    Z = copy_similar(A, t)
-    return Schur(Z, Matrix{t}(I, size(A)), convert(Vector{t}, diag(A)))
-end
-function schur(A::Union{UnitLowerTriangular{T},LowerTriangular{T}}) where {T}
-    t = eigtype(T)
-    # double flip the matrix A
-    Z = copy_similar(A, t)
-    reverse!(reshape(Z, :))
-    # construct "reverse" identity
-    n = size(A, 1)
-    J = zeros(t, n, n)
-    for i in axes(J, 2)
-       J[n+1-i, i] = oneunit(t)
-    end
-    return Schur(Z, J, convert(Vector{t}, diag(A)))
-end
-function schur(A::Bidiagonal{T}) where {T}
-    t = eigtype(T)
-    if A.uplo == 'U'
-        return Schur(Matrix{t}(A), Matrix{t}(I, size(A)), Vector{t}(A.dv))
-    else # A.uplo == 'L'
-        # construct "reverse" identity
-        n = size(A, 1)
-        J = zeros(t, n, n)
-        for i in axes(J, 2)
-            J[n+1-i, i] = oneunit(t)
-        end
-        dv = reverse!(Vector{t}(A.dv))
-        ev = reverse!(Vector{t}(A.ev))
-        return Schur(Matrix{t}(Bidiagonal(dv, ev, 'U')), J, dv)
-    end
-end
-
-function getproperty(F::Schur, d::Symbol)
-    if d === :Schur
-        return getfield(F, :T)
-    elseif d === :vectors
-        return getfield(F, :Z)
-    else
-        getfield(F, d)
-    end
-end
-
-Base.propertynames(F::Schur) =
-    (:Schur, :vectors, fieldnames(typeof(F))...)
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Schur)
-    summary(io, F); println(io)
-    println(io, "T factor:")
-    show(io, mime, F.T)
-    println(io, "\nZ factor:")
-    show(io, mime, F.Z)
-    println(io, "\neigenvalues:")
-    show(io, mime, F.values)
-end
-
-# convert a (standard-form) quasi-triangular real Schur factorization into a
-# triangular complex Schur factorization.
-#
-# Based on the "triangularize" function from GenericSchur.jl,
-# released under the MIT "Expat" license by @RalphAS
-function Schur{CT}(S::Schur{<:Real}) where {CT<:Complex}
-    Tr = S.T
-    T = CT.(Tr)
-    Z = CT.(S.Z)
-    n = size(T,1)
-    for j=n:-1:2
-        if !iszero(Tr[j,j-1])
-            # We want a unitary similarity transform from
-            # ┌   ┐      ┌     ┐
-            # │a b│      │w₁  x│
-            # │c a│ into │0  w₂│ where bc < 0 (a,b,c real)
-            # └   ┘      └     ┘
-            # If we write it as
-            # ┌     ┐
-            # │u  v'│
-            # │-v u'│
-            # └     ┘
-            # and make the Ansatz that u is real (so v is imaginary),
-            # we arrive at a Givens rotation:
-            # θ = atan(sqrt(-Tr[j,j-1]/Tr[j-1,j]))
-            # s,c = sin(θ), cos(θ)
-            s = sqrt(abs(Tr[j,j-1]))
-            c = sqrt(abs(Tr[j-1,j]))
-            r = hypot(s,c)
-            G = Givens(j-1,j,complex(c/r),im*(-s/r))
-            lmul!(G,T)
-            rmul!(T,G')
-            rmul!(Z,G')
-        end
-    end
-    return Schur(triu!(T),Z,diag(T))
-end
-
-Schur{Complex}(S::Schur{<:Complex}) = S
-Schur{T}(S::Schur{T}) where {T} = S
-Schur{T}(S::Schur) where {T} = Schur(T.(S.T), T.(S.Z), T <: Real && !(eltype(S.values) <: Real) ? complex(T).(S.values) : T.(S.values))
-
-"""
-    ordschur!(F::Schur, select::Union{Vector{Bool},BitVector}) -> F::Schur
-
-Same as [`ordschur`](@ref) but overwrites the factorization `F`.
-"""
-function ordschur!(schur::Schur, select::Union{Vector{Bool},BitVector})
-    _, _, vals = _ordschur!(schur.T, schur.Z, select)
-    schur.values[:] = vals
-    return schur
-end
-
-_ordschur(T::StridedMatrix{Ty}, Z::StridedMatrix{Ty}, select::Union{Vector{Bool},BitVector}) where {Ty<:BlasFloat} =
-    _ordschur!(copy(T), copy(Z), select)
-
-_ordschur!(T::StridedMatrix{Ty}, Z::StridedMatrix{Ty}, select::Union{Vector{Bool},BitVector}) where {Ty<:BlasFloat} =
-    LinearAlgebra.LAPACK.trsen!(convert(Vector{BlasInt}, select), T, Z)[1:3]
-
-"""
-    ordschur(F::Schur, select::Union{Vector{Bool},BitVector}) -> F::Schur
-
-Reorders the Schur factorization `F` of a matrix `A = Z*T*Z'` according to the logical array
-`select` returning the reordered factorization `F` object. The selected eigenvalues appear
-in the leading diagonal of `F.Schur` and the corresponding leading columns of
-`F.vectors` form an orthogonal/unitary basis of the corresponding right invariant
-subspace. In the real case, a complex conjugate pair of eigenvalues must be either both
-included or both excluded via `select`.
-"""
-ordschur(schur::Schur, select::Union{Vector{Bool},BitVector}) =
-    Schur(_ordschur(schur.T, schur.Z, select)...)
-
-"""
-    GeneralizedSchur <: Factorization
-
-Matrix factorization type of the generalized Schur factorization of two matrices
-`A` and `B`. This is the return type of [`schur(_, _)`](@ref), the corresponding
-matrix factorization function.
-
-If `F::GeneralizedSchur` is the factorization object, the (quasi) triangular Schur
-factors can be obtained via `F.S` and `F.T`, the left unitary/orthogonal Schur
-vectors via `F.left` or `F.Q`, and the right unitary/orthogonal Schur vectors can
-be obtained with `F.right` or `F.Z` such that `A=F.left*F.S*F.right'` and
-`B=F.left*F.T*F.right'`. The generalized eigenvalues of `A` and `B` can be obtained
-with `F.α./F.β`.
-
-Iterating the decomposition produces the components `F.S`, `F.T`, `F.Q`, `F.Z`,
-`F.α`, and `F.β`.
-"""
-struct GeneralizedSchur{Ty,M<:AbstractMatrix,A<:AbstractVector,B<:AbstractVector{Ty}} <: Factorization{Ty}
-    S::M
-    T::M
-    α::A
-    β::B
-    Q::M
-    Z::M
-    function GeneralizedSchur{Ty,M,A,B}(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty},
-                                        alpha::AbstractVector, beta::AbstractVector{Ty},
-                                        Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where {Ty,M,A,B}
-        new{Ty,M,A,B}(S, T, alpha, beta, Q, Z)
-    end
-end
-function GeneralizedSchur(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty},
-                          alpha::AbstractVector, beta::AbstractVector{Ty},
-                          Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where Ty
-    GeneralizedSchur{Ty, typeof(S), typeof(alpha), typeof(beta)}(S, T, alpha, beta, Q, Z)
-end
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(GeneralizedSchur{Ty,M}(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty},
-                                 alpha::AbstractVector, beta::AbstractVector{Ty},
-                                 Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where {Ty,M},
-           GeneralizedSchur{Ty,M,typeof(alpha),typeof(beta)}(S, T, alpha, beta, Q, Z))
-
-# iteration for destructuring into components
-Base.iterate(S::GeneralizedSchur) = (S.S, Val(:T))
-Base.iterate(S::GeneralizedSchur, ::Val{:T}) = (S.T, Val(:Q))
-Base.iterate(S::GeneralizedSchur, ::Val{:Q}) = (S.Q, Val(:Z))
-Base.iterate(S::GeneralizedSchur, ::Val{:Z}) = (S.Z, Val(:α))
-Base.iterate(S::GeneralizedSchur, ::Val{:α}) = (S.α, Val(:β))
-Base.iterate(S::GeneralizedSchur, ::Val{:β}) = (S.β, Val(:done))
-Base.iterate(S::GeneralizedSchur, ::Val{:done}) = nothing
-
-"""
-    schur!(A::StridedMatrix, B::StridedMatrix) -> F::GeneralizedSchur
-
-Same as [`schur`](@ref) but uses the input matrices `A` and `B` as workspace.
-"""
-function schur!(A::StridedMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat}
-    if LAPACK.version() < v"3.6.0"
-        GeneralizedSchur(LinearAlgebra.LAPACK.gges!('V', 'V', A, B)...)
-    else
-        GeneralizedSchur(LinearAlgebra.LAPACK.gges3!('V', 'V', A, B)...)
-    end
-end
-
-"""
-    schur(A, B) -> F::GeneralizedSchur
-
-Computes the Generalized Schur (or QZ) factorization of the matrices `A` and `B`. The
-(quasi) triangular Schur factors can be obtained from the `Schur` object `F` with `F.S`
-and `F.T`, the left unitary/orthogonal Schur vectors can be obtained with `F.left` or
-`F.Q` and the right unitary/orthogonal Schur vectors can be obtained with `F.right` or
-`F.Z` such that `A=F.left*F.S*F.right'` and `B=F.left*F.T*F.right'`. The
-generalized eigenvalues of `A` and `B` can be obtained with `F.α./F.β`.
-
-Iterating the decomposition produces the components `F.S`, `F.T`, `F.Q`, `F.Z`,
-`F.α`, and `F.β`.
-"""
-function schur(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return schur!(copy_similar(A, S), copy_similar(B, S))
-end
-
-"""
-    ordschur!(F::GeneralizedSchur, select::Union{Vector{Bool},BitVector}) -> F::GeneralizedSchur
-
-Same as `ordschur` but overwrites the factorization `F`.
-"""
-function ordschur!(gschur::GeneralizedSchur, select::Union{Vector{Bool},BitVector})
-    _, _, α, β, _, _ = _ordschur!(gschur.S, gschur.T, gschur.Q, gschur.Z, select)
-    gschur.α[:] = α
-    gschur.β[:] = β
-    return gschur
-end
-
-_ordschur(S::StridedMatrix{Ty}, T::StridedMatrix{Ty}, Q::StridedMatrix{Ty},
-    Z::StridedMatrix{Ty}, select::Union{Vector{Bool},BitVector}) where {Ty<:BlasFloat} =
-        _ordschur!(copy(S), copy(T), copy(Q), copy(Z), select)
-
-_ordschur!(S::StridedMatrix{Ty}, T::StridedMatrix{Ty}, Q::StridedMatrix{Ty},
-    Z::StridedMatrix{Ty}, select::Union{Vector{Bool},BitVector}) where {Ty<:BlasFloat} =
-        LinearAlgebra.LAPACK.tgsen!(convert(Vector{BlasInt}, select), S, T, Q, Z)
-
-"""
-    ordschur(F::GeneralizedSchur, select::Union{Vector{Bool},BitVector}) -> F::GeneralizedSchur
-
-Reorders the Generalized Schur factorization `F` of a matrix pair `(A, B) = (Q*S*Z', Q*T*Z')`
-according to the logical array `select` and returns a GeneralizedSchur object `F`. The
-selected eigenvalues appear in the leading diagonal of both `F.S` and `F.T`, and the
-left and right orthogonal/unitary Schur vectors are also reordered such that
-`(A, B) = F.Q*(F.S, F.T)*F.Z'` still holds and the generalized eigenvalues of `A`
-and `B` can still be obtained with `F.α./F.β`.
-"""
-ordschur(gschur::GeneralizedSchur, select::Union{Vector{Bool},BitVector}) =
-    GeneralizedSchur(_ordschur(gschur.S, gschur.T, gschur.Q, gschur.Z, select)...)
-
-function getproperty(F::GeneralizedSchur, d::Symbol)
-    if d === :values
-        return getfield(F, :α) ./ getfield(F, :β)
-    elseif d === :alpha
-        return getfield(F, :α)
-    elseif d === :beta
-        return getfield(F, :β)
-    elseif d === :left
-        return getfield(F, :Q)
-    elseif d === :right
-        return getfield(F, :Z)
-    else
-        getfield(F, d)
-    end
-end
-
-Base.propertynames(F::GeneralizedSchur) =
-    (:values, :left, :right, fieldnames(typeof(F))...)
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::GeneralizedSchur)
-    summary(io, F); println(io)
-    println(io, "S factor:")
-    show(io, mime, F.S)
-    println(io, "\nT factor:")
-    show(io, mime, F.T)
-    println(io, "\nQ factor:")
-    show(io, mime, F.Q)
-    println(io, "\nZ factor:")
-    show(io, mime, F.Z)
-    println(io, "\nα:")
-    show(io, mime, F.α)
-    println(io, "\nβ:")
-    show(io, mime, F.β)
-end
-
-# Conversion
-AbstractMatrix(F::Schur) = (F.Z * F.T) * F.Z'
-AbstractArray(F::Schur) = AbstractMatrix(F)
-Matrix(F::Schur) = Array(AbstractArray(F))
-Array(F::Schur) = Matrix(F)
-
-copy(F::Schur) = Schur(copy(F.T), copy(F.Z), copy(F.values))
-copy(F::GeneralizedSchur) = GeneralizedSchur(copy(F.S), copy(F.T), copy(F.α), copy(F.β), copy(F.Q), copy(F.Z))
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
deleted file mode 100644
index 22567c2a8ef96..0000000000000
--- a/stdlib/LinearAlgebra/src/special.jl
+++ /dev/null
@@ -1,360 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Methods operating on different special matrix types
-
-# Interconversion between special matrix types
-
-# conversions from Diagonal to other special matrix types
-Bidiagonal(A::Diagonal) = Bidiagonal(A.diag, fill!(similar(A.diag, length(A.diag)-1), 0), :U)
-SymTridiagonal(A::Diagonal) = SymTridiagonal(A.diag, fill!(similar(A.diag, length(A.diag)-1), 0))
-Tridiagonal(A::Diagonal) = Tridiagonal(fill!(similar(A.diag, length(A.diag)-1), 0), A.diag,
-                                       fill!(similar(A.diag, length(A.diag)-1), 0))
-
-# conversions from Bidiagonal to other special matrix types
-Diagonal(A::Bidiagonal) = Diagonal(A.dv)
-SymTridiagonal(A::Bidiagonal) =
-    iszero(A.ev) ? SymTridiagonal(A.dv, A.ev) :
-        throw(ArgumentError("matrix cannot be represented as SymTridiagonal"))
-Tridiagonal(A::Bidiagonal) =
-    Tridiagonal(A.uplo == 'U' ? fill!(similar(A.ev), 0) : A.ev, A.dv,
-                A.uplo == 'U' ? A.ev : fill!(similar(A.ev), 0))
-
-# conversions from SymTridiagonal to other special matrix types
-Diagonal(A::SymTridiagonal) = Diagonal(A.dv)
-
-# These can fail when ev has the same length as dv
-# TODO: Revisit when a good solution for #42477 is found
-Bidiagonal(A::SymTridiagonal) =
-    iszero(A.ev) ? Bidiagonal(A.dv, A.ev, :U) :
-        throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
-Tridiagonal(A::SymTridiagonal) =
-    Tridiagonal(copy(A.ev), A.dv, A.ev)
-
-# conversions from Tridiagonal to other special matrix types
-Diagonal(A::Tridiagonal) = Diagonal(A.d)
-Bidiagonal(A::Tridiagonal) =
-    iszero(A.dl) ? Bidiagonal(A.d, A.du, :U) :
-    iszero(A.du) ? Bidiagonal(A.d, A.dl, :L) :
-        throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
-
-# conversions from AbstractTriangular to special matrix types
-Bidiagonal(A::AbstractTriangular) =
-    isbanded(A, 0, 1) ? Bidiagonal(diag(A, 0), diag(A,  1), :U) : # is upper bidiagonal
-    isbanded(A, -1, 0) ? Bidiagonal(diag(A, 0), diag(A, -1), :L) : # is lower bidiagonal
-        throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
-
-_lucopy(A::Bidiagonal, T) = copymutable_oftype(Tridiagonal(A), T)
-_lucopy(A::Diagonal, T)   = copymutable_oftype(Tridiagonal(A), T)
-function _lucopy(A::SymTridiagonal, T)
-    du = copy_similar(_evview(A), T)
-    dl = copy.(transpose.(du))
-    d  = copy_similar(A.dv, T)
-    return Tridiagonal(dl, d, du)
-end
-
-const ConvertibleSpecialMatrix = Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,AbstractTriangular}
-const PossibleTriangularMatrix = Union{Diagonal, Bidiagonal, AbstractTriangular}
-
-convert(::Type{T}, m::ConvertibleSpecialMatrix) where {T<:Diagonal}       = m isa T ? m :
-    isdiag(m) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as Diagonal"))
-convert(::Type{T}, m::ConvertibleSpecialMatrix) where {T<:SymTridiagonal} = m isa T ? m :
-    issymmetric(m) && isbanded(m, -1, 1) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as SymTridiagonal"))
-convert(::Type{T}, m::ConvertibleSpecialMatrix) where {T<:Tridiagonal}    = m isa T ? m :
-    isbanded(m, -1, 1) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as Tridiagonal"))
-
-convert(::Type{T}, m::Union{LowerTriangular,UnitLowerTriangular}) where {T<:LowerTriangular} = m isa T ? m : T(m)::T
-convert(::Type{T}, m::Union{UpperTriangular,UnitUpperTriangular}) where {T<:UpperTriangular} = m isa T ? m : T(m)::T
-
-convert(::Type{T}, m::PossibleTriangularMatrix) where {T<:LowerTriangular} = m isa T ? m :
-    istril(m) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as LowerTriangular"))
-convert(::Type{T}, m::PossibleTriangularMatrix) where {T<:UpperTriangular} = m isa T ? m :
-    istriu(m) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as UpperTriangular"))
-
-# Constructs two method definitions taking into account (assumed) commutativity
-# e.g. @commutative f(x::S, y::T) where {S,T} = x+y is the same is defining
-#     f(x::S, y::T) where {S,T} = x+y
-#     f(y::T, x::S) where {S,T} = f(x, y)
-macro commutative(myexpr)
-    @assert Base.is_function_def(myexpr) # Make sure it is a function definition
-    y = copy(myexpr.args[1].args[2:end])
-    reverse!(y)
-    reversed_call = Expr(:(=), Expr(:call,myexpr.args[1].args[1],y...), myexpr.args[1])
-    esc(Expr(:block, myexpr, reversed_call))
-end
-
-for op in (:+, :-)
-    for (matrixtype, uplo, converttype) in ((:UpperTriangular, 'U', :UpperTriangular),
-                                            (:UnitUpperTriangular, 'U', :UpperTriangular),
-                                            (:LowerTriangular, 'L', :LowerTriangular),
-                                            (:UnitLowerTriangular, 'L', :LowerTriangular))
-        @eval begin
-            function ($op)(A::$matrixtype, B::Bidiagonal)
-                if B.uplo == $uplo
-                    ($op)(A, convert($converttype, B))
-                else
-                    ($op).(A, B)
-                end
-            end
-
-            function ($op)(A::Bidiagonal, B::$matrixtype)
-                if A.uplo == $uplo
-                    ($op)(convert($converttype, A), B)
-                else
-                    ($op).(A, B)
-                end
-            end
-        end
-    end
-end
-
-# disambiguation between triangular and banded matrices, banded ones "dominate"
-mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix) = _mul!(C, A, B, MulAddMul())
-mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractTriangular) = _mul!(C, A, B, MulAddMul())
-mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix, alpha::Number, beta::Number) =
-    _mul!(C, A, B, MulAddMul(alpha, beta))
-mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractTriangular, alpha::Number, beta::Number) =
-    _mul!(C, A, B, MulAddMul(alpha, beta))
-
-function *(H::UpperHessenberg, B::Bidiagonal)
-    T = promote_op(matprod, eltype(H), eltype(B))
-    A = mul!(similar(H, T, size(H)), H, B)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-function *(B::Bidiagonal, H::UpperHessenberg)
-    T = promote_op(matprod, eltype(B), eltype(H))
-    A = mul!(similar(H, T, size(H)), B, H)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-function /(H::UpperHessenberg, B::Bidiagonal)
-    T = typeof(oneunit(eltype(H))/oneunit(eltype(B)))
-    A = _rdiv!(similar(H, T, size(H)), H, B)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-function \(B::Bidiagonal, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(B))\oneunit(eltype(H)))
-    A = ldiv!(similar(H, T, size(H)), B, H)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-# specialized +/- for structured matrices. If these are removed, it falls
-# back to broadcasting which has ~2-10x speed regressions.
-# For the other structure matrix pairs, broadcasting works well.
-
-# For structured matrix types with different non-zero diagonals the underlying
-# representations must be promoted to the same type.
-# For example, in Diagonal + Bidiagonal only the main diagonal is touched so
-# the off diagonal could be a different type after the operation resulting in
-# an error. See issue #28994
-
-@commutative function (+)(A::Bidiagonal, B::Diagonal)
-    newdv = A.dv + B.diag
-    Bidiagonal(newdv, typeof(newdv)(A.ev), A.uplo)
-end
-
-function (-)(A::Bidiagonal, B::Diagonal)
-    newdv = A.dv - B.diag
-    Bidiagonal(newdv, typeof(newdv)(A.ev), A.uplo)
-end
-
-function (-)(A::Diagonal, B::Bidiagonal)
-    newdv = A.diag - B.dv
-    Bidiagonal(newdv, typeof(newdv)(-B.ev), B.uplo)
-end
-
-@commutative function (+)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag + B.dv
-    SymTridiagonal(A.diag + B.dv, typeof(newdv)(B.ev))
-end
-
-function (-)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag - B.dv
-    SymTridiagonal(newdv, typeof(newdv)(-B.ev))
-end
-
-function (-)(A::SymTridiagonal, B::Diagonal)
-    newdv = A.dv - B.diag
-    SymTridiagonal(newdv, typeof(newdv)(A.ev))
-end
-
-# this set doesn't have the aforementioned problem
-
-@commutative (+)(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+_evview(B), A.d+B.dv, A.du+_evview(B))
--(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl-_evview(B), A.d-B.dv, A.du-_evview(B))
--(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)-B.dl, A.dv-B.d, _evview(A)-B.du)
-
-@commutative function (+)(A::Diagonal, B::Tridiagonal)
-    newdv = A.diag + B.d
-    Tridiagonal(typeof(newdv)(B.dl), newdv, typeof(newdv)(B.du))
-end
-
-function (-)(A::Diagonal, B::Tridiagonal)
-    newdv = A.diag - B.d
-    Tridiagonal(typeof(newdv)(-B.dl), newdv, typeof(newdv)(-B.du))
-end
-
-function (-)(A::Tridiagonal, B::Diagonal)
-    newdv = A.d - B.diag
-    Tridiagonal(typeof(newdv)(A.dl), newdv, typeof(newdv)(A.du))
-end
-
-@commutative function (+)(A::Bidiagonal, B::Tridiagonal)
-    newdv = A.dv + B.d
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(B.dl), newdv, A.ev+B.du) : (A.ev+B.dl, newdv, typeof(newdv)(B.du)))...)
-end
-
-function (-)(A::Bidiagonal, B::Tridiagonal)
-    newdv = A.dv - B.d
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-B.dl), newdv, A.ev-B.du) : (A.ev-B.dl, newdv, typeof(newdv)(-B.du)))...)
-end
-
-function (-)(A::Tridiagonal, B::Bidiagonal)
-    newdv = A.d - B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(A.dl), newdv, A.du-B.ev) : (A.dl-B.ev, newdv, typeof(newdv)(A.du)))...)
-end
-
-@commutative function (+)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv + B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(_evview(B)), A.dv+B.dv, A.ev+_evview(B)) : (A.ev+_evview(B), A.dv+B.dv, typeof(newdv)(_evview(B))))...)
-end
-
-function (-)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv - B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-_evview(B)), newdv, A.ev-_evview(B)) : (A.ev-_evview(B), newdv, typeof(newdv)(-_evview(B))))...)
-end
-
-function (-)(A::SymTridiagonal, B::Bidiagonal)
-    newdv = A.dv - B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)-B.ev) : (_evview(A)-B.ev, newdv, typeof(newdv)(_evview(A))))...)
-end
-
-@commutative function (+)(A::Tridiagonal, B::UniformScaling)
-    newd = A.d .+ Ref(B)
-    Tridiagonal(typeof(newd)(A.dl), newd, typeof(newd)(A.du))
-end
-
-@commutative function (+)(A::SymTridiagonal, B::UniformScaling)
-    newdv = A.dv .+ Ref(B)
-    SymTridiagonal(newdv, typeof(newdv)(A.ev))
-end
-
-@commutative function (+)(A::Bidiagonal, B::UniformScaling)
-    newdv = A.dv .+ Ref(B)
-    Bidiagonal(newdv, typeof(newdv)(A.ev), A.uplo)
-end
-
-@commutative function (+)(A::Diagonal, B::UniformScaling)
-    Diagonal(A.diag .+ Ref(B))
-end
-
-# StructuredMatrix - UniformScaling = StructuredMatrix + (-UniformScaling) =>
-# no need to define reversed order
-function (-)(A::UniformScaling, B::Tridiagonal)
-    d = Ref(A) .- B.d
-    Tridiagonal(convert(typeof(d), -B.dl), d, convert(typeof(d), -B.du))
-end
-function (-)(A::UniformScaling, B::SymTridiagonal)
-    dv = Ref(A) .- B.dv
-    SymTridiagonal(dv, convert(typeof(dv), -B.ev))
-end
-function (-)(A::UniformScaling, B::Bidiagonal)
-    dv = Ref(A) .- B.dv
-    Bidiagonal(dv, convert(typeof(dv), -B.ev), B.uplo)
-end
-function (-)(A::UniformScaling, B::Diagonal)
-    Diagonal(Ref(A) .- B.diag)
-end
-
-## Diagonal construction from UniformScaling
-Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
-Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
-
-Base.muladd(A::Union{Diagonal, UniformScaling}, B::Union{Diagonal, UniformScaling}, z::Union{Diagonal, UniformScaling}) =
-    Diagonal(_diag_or_value(A) .* _diag_or_value(B) .+ _diag_or_value(z))
-
-_diag_or_value(A::Diagonal) = A.diag
-_diag_or_value(A::UniformScaling) = A.λ
-
-# fill[stored]! methods
-fillstored!(A::Diagonal, x) = (fill!(A.diag, x); A)
-fillstored!(A::Bidiagonal, x) = (fill!(A.dv, x); fill!(A.ev, x); A)
-fillstored!(A::Tridiagonal, x) = (fill!(A.dl, x); fill!(A.d, x); fill!(A.du, x); A)
-fillstored!(A::SymTridiagonal, x) = (fill!(A.dv, x); fill!(A.ev, x); A)
-
-_small_enough(A::Union{Diagonal, Bidiagonal}) = size(A, 1) <= 1
-_small_enough(A::Tridiagonal) = size(A, 1) <= 2
-_small_enough(A::SymTridiagonal) = size(A, 1) <= 2
-
-function fill!(A::Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal}, x)
-    xT = convert(eltype(A), x)
-    (iszero(xT) || _small_enough(A)) && return fillstored!(A, xT)
-    throw(ArgumentError("array of type $(typeof(A)) and size $(size(A)) can
-    not be filled with $x, since some of its entries are constrained."))
-end
-
-one(D::Diagonal) = Diagonal(one.(D.diag))
-one(A::Bidiagonal{T}) where T = Bidiagonal(fill!(similar(A.dv, typeof(one(T))), one(T)), fill!(similar(A.ev, typeof(one(T))), zero(one(T))), A.uplo)
-one(A::Tridiagonal{T}) where T = Tridiagonal(fill!(similar(A.du, typeof(one(T))), zero(one(T))), fill!(similar(A.d, typeof(one(T))), one(T)), fill!(similar(A.dl, typeof(one(T))), zero(one(T))))
-one(A::SymTridiagonal{T}) where T = SymTridiagonal(fill!(similar(A.dv, typeof(one(T))), one(T)), fill!(similar(A.ev, typeof(one(T))), zero(one(T))))
-for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
-    @eval one(A::$t) = $t(one(parent(A)))
-    @eval oneunit(A::$t) = $t(oneunit(parent(A)))
-end
-
-zero(D::Diagonal) = Diagonal(zero.(D.diag))
-oneunit(D::Diagonal) = Diagonal(oneunit.(D.diag))
-
-isdiag(A::HermOrSym{<:Any,<:Diagonal}) = isdiag(parent(A))
-dot(x::AbstractVector, A::RealHermSymComplexSym{<:Real,<:Diagonal}, y::AbstractVector) =
-    dot(x, A.data, y)
-
-# equals and approx equals methods for structured matrices
-# SymTridiagonal == Tridiagonal is already defined in tridiag.jl
-
-==(A::Diagonal, B::Bidiagonal) = iszero(B.ev) && A.diag == B.dv
-==(A::Diagonal, B::SymTridiagonal) = iszero(_evview(B)) && A.diag == B.dv
-==(B::Bidiagonal, A::Diagonal) = A == B
-==(A::Diagonal, B::Tridiagonal) = iszero(B.dl) && iszero(B.du) && A.diag == B.d
-==(B::Tridiagonal, A::Diagonal) = A == B
-
-function ==(A::Bidiagonal, B::Tridiagonal)
-    if A.uplo == 'U'
-        return iszero(B.dl) && A.dv == B.d && A.ev == B.du
-    else
-        return iszero(B.du) && A.dv == B.d && A.ev == B.dl
-    end
-end
-==(B::Tridiagonal, A::Bidiagonal) = A == B
-
-==(A::Bidiagonal, B::SymTridiagonal) = iszero(_evview(B)) && iszero(A.ev) && A.dv == B.dv
-==(B::SymTridiagonal, A::Bidiagonal) = A == B
-
-# concatenation
-const _SpecialArrays = Union{Diagonal, Bidiagonal, Tridiagonal, SymTridiagonal}
-const _Symmetric_DenseArrays{T,A<:Matrix} = Symmetric{T,A}
-const _Hermitian_DenseArrays{T,A<:Matrix} = Hermitian{T,A}
-const _Triangular_DenseArrays{T,A<:Matrix} = UpperOrLowerTriangular{T,A}
-const _Annotated_DenseArrays = Union{_SpecialArrays, _Triangular_DenseArrays, _Symmetric_DenseArrays, _Hermitian_DenseArrays}
-const _Annotated_Typed_DenseArrays{T} = Union{_Triangular_DenseArrays{T}, _Symmetric_DenseArrays{T}, _Hermitian_DenseArrays{T}}
-const _DenseConcatGroup = Union{Number, Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _Annotated_DenseArrays}
-const _TypedDenseConcatGroup{T} = Union{Vector{T}, Adjoint{T,Vector{T}}, Transpose{T,Vector{T}}, Matrix{T}, _Annotated_Typed_DenseArrays{T}}
-
-promote_to_array_type(::Tuple{Vararg{Union{_DenseConcatGroup,UniformScaling}}}) = Matrix
-
-Base._cat(dims, xs::_DenseConcatGroup...) = Base._cat_t(dims, promote_eltype(xs...), xs...)
-vcat(A::_DenseConcatGroup...) = Base.typed_vcat(promote_eltype(A...), A...)
-hcat(A::_DenseConcatGroup...) = Base.typed_hcat(promote_eltype(A...), A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::_DenseConcatGroup...) = Base.typed_hvcat(promote_eltype(xs...), rows, xs...)
-# For performance, specially handle the case where the matrices/vectors have homogeneous eltype
-Base._cat(dims, xs::_TypedDenseConcatGroup{T}...) where {T} = Base._cat_t(dims, T, xs...)
-vcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_vcat(T, A...)
-hcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hcat(T, A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hvcat(T, rows, xs...)
-
-# factorizations
-function cholesky(S::RealHermSymComplexHerm{<:Real,<:SymTridiagonal}, ::NoPivot = NoPivot(); check::Bool = true)
-    T = choltype(eltype(S))
-    B = Bidiagonal{T}(diag(S, 0), diag(S, S.uplo == 'U' ? 1 : -1), sym_uplo(S.uplo))
-    cholesky!(Hermitian(B, sym_uplo(S.uplo)), NoPivot(); check = check)
-end
diff --git a/stdlib/LinearAlgebra/src/structuredbroadcast.jl b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
deleted file mode 100644
index 02e39b199679b..0000000000000
--- a/stdlib/LinearAlgebra/src/structuredbroadcast.jl
+++ /dev/null
@@ -1,256 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Broadcast styles
-import Base.Broadcast
-using Base.Broadcast: DefaultArrayStyle, Broadcasted, tail
-
-struct StructuredMatrixStyle{T} <: Broadcast.AbstractArrayStyle{2} end
-StructuredMatrixStyle{T}(::Val{2}) where {T} = StructuredMatrixStyle{T}()
-StructuredMatrixStyle{T}(::Val{N}) where {T,N} = Broadcast.DefaultArrayStyle{N}()
-
-const StructuredMatrix = Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,LowerTriangular,UnitLowerTriangular,UpperTriangular,UnitUpperTriangular}
-for ST in Base.uniontypes(StructuredMatrix)
-    @eval Broadcast.BroadcastStyle(::Type{<:$ST}) = $(StructuredMatrixStyle{ST}())
-end
-
-# Promotion of broadcasts between structured matrices. This is slightly unusual
-# as we define them symmetrically. This allows us to have a fallback to DefaultArrayStyle{2}().
-# Diagonal can cavort with all the other structured matrix types.
-# Bidiagonal doesn't know if it's upper or lower, so it becomes Tridiagonal
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{Diagonal}) =
-    StructuredMatrixStyle{Diagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{Bidiagonal}) =
-    StructuredMatrixStyle{Bidiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{SymTridiagonal,Tridiagonal}}) =
-    StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
-    StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
-    StructuredMatrixStyle{UpperTriangular}()
-
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Bidiagonal}, ::StructuredMatrixStyle{Diagonal}) =
-    StructuredMatrixStyle{Bidiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Bidiagonal}, ::StructuredMatrixStyle{<:Union{Bidiagonal,SymTridiagonal,Tridiagonal}}) =
-    StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{SymTridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
-    StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Tridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
-    StructuredMatrixStyle{Tridiagonal}()
-
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{LowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
-    StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{UpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
-    StructuredMatrixStyle{UpperTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{UnitLowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
-    StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{UnitUpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
-    StructuredMatrixStyle{UpperTriangular}()
-
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
-    StructuredMatrixStyle{Matrix}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
-    StructuredMatrixStyle{Matrix}()
-
-# Make sure that `StructuredMatrixStyle{Matrix}` doesn't ever end up falling
-# through and give back `DefaultArrayStyle{2}`
-Broadcast.BroadcastStyle(T::StructuredMatrixStyle{Matrix}, ::StructuredMatrixStyle) = T
-Broadcast.BroadcastStyle(::StructuredMatrixStyle, T::StructuredMatrixStyle{Matrix}) = T
-Broadcast.BroadcastStyle(T::StructuredMatrixStyle{Matrix}, ::StructuredMatrixStyle{Matrix}) = T
-
-# All other combinations fall back to the default style
-Broadcast.BroadcastStyle(::StructuredMatrixStyle, ::StructuredMatrixStyle) = DefaultArrayStyle{2}()
-
-# And a definition akin to similar using the structured type:
-structured_broadcast_alloc(bc, ::Type{Diagonal}, ::Type{ElType}, n) where {ElType} =
-    Diagonal(Array{ElType}(undef, n))
-# Bidiagonal is tricky as we need to know if it's upper or lower. The promotion
-# system will return Tridiagonal when there's more than one Bidiagonal, but when
-# there's only one, we need to make figure out upper or lower
-merge_uplos(::Nothing, ::Nothing) = nothing
-merge_uplos(a, ::Nothing) = a
-merge_uplos(::Nothing, b) = b
-merge_uplos(a, b) = a == b ? a : 'T'
-
-find_uplo(a::Bidiagonal) = a.uplo
-find_uplo(a) = nothing
-find_uplo(bc::Broadcasted) = mapfoldl(find_uplo, merge_uplos, Broadcast.cat_nested(bc), init=nothing)
-
-function structured_broadcast_alloc(bc, ::Type{Bidiagonal}, ::Type{ElType}, n) where {ElType}
-    uplo = n > 0 ? find_uplo(bc) : 'U'
-    n1 = max(n - 1, 0)
-    if uplo == 'T'
-        return Tridiagonal(Array{ElType}(undef, n1), Array{ElType}(undef, n), Array{ElType}(undef, n1))
-    end
-    return Bidiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n1), uplo)
-end
-structured_broadcast_alloc(bc, ::Type{SymTridiagonal}, ::Type{ElType}, n) where {ElType} =
-    SymTridiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n-1))
-structured_broadcast_alloc(bc, ::Type{Tridiagonal}, ::Type{ElType}, n) where {ElType} =
-    Tridiagonal(Array{ElType}(undef, n-1),Array{ElType}(undef, n),Array{ElType}(undef, n-1))
-structured_broadcast_alloc(bc, ::Type{LowerTriangular}, ::Type{ElType}, n) where {ElType} =
-    LowerTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{UpperTriangular}, ::Type{ElType}, n) where {ElType} =
-    UpperTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{UnitLowerTriangular}, ::Type{ElType}, n) where {ElType} =
-    UnitLowerTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{UnitUpperTriangular}, ::Type{ElType}, n) where {ElType} =
-    UnitUpperTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{Matrix}, ::Type{ElType}, n) where {ElType} =
-    Matrix(Array{ElType}(undef, n, n))
-
-# A _very_ limited list of structure-preserving functions known at compile-time. This list is
-# derived from the formerly-implemented `broadcast` methods in 0.6. Note that this must
-# preserve both zeros and ones (for Unit***erTriangular) and symmetry (for SymTridiagonal)
-const TypeFuncs = Union{typeof(round),typeof(trunc),typeof(floor),typeof(ceil)}
-isstructurepreserving(bc::Broadcasted) = isstructurepreserving(bc.f, bc.args...)
-isstructurepreserving(::Union{typeof(abs),typeof(big)}, ::StructuredMatrix) = true
-isstructurepreserving(::TypeFuncs, ::StructuredMatrix) = true
-isstructurepreserving(::TypeFuncs, ::Ref{<:Type}, ::StructuredMatrix) = true
-function isstructurepreserving(::typeof(Base.literal_pow), ::Ref{typeof(^)}, ::StructuredMatrix, ::Ref{Val{N}}) where N
-    return N isa Integer && N > 0
-end
-isstructurepreserving(f, args...) = false
-
-"""
-    iszerodefined(T::Type)
-
-Return a `Bool` indicating whether `iszero` is well-defined for objects of type
-`T`. By default, this function returns `false` unless `T <: Number`. Note that
-this function may return `true` even if `zero(::T)` is not defined as long as
-`iszero(::T)` has a method that does not requires `zero(::T)`.
-
-This function is used to determine if mapping the elements of an array with
-a specific structure of nonzero elements preserve this structure.
-For instance, it is used to determine whether the output of
-`tuple.(Diagonal([1, 2]))` is `Diagonal([(1,), (2,)])` or
-`[(1,) (0,); (0,) (2,)]`. For this, we need to determine whether `(0,)` is
-considered to be zero. `iszero((0,))` falls back to `(0,) == zero((0,))` which
-fails as `zero(::Tuple{Int})` is not defined. However,
-`iszerodefined(::Tuple{Int})` is `false` hence we falls back to the comparison
-`(0,) == 0` which returns `false` and decides that the correct output is
-`[(1,) (0,); (0,) (2,)]`.
-"""
-iszerodefined(::Type) = false
-iszerodefined(::Type{<:Number}) = true
-iszerodefined(::Type{<:AbstractArray{T}}) where T = iszerodefined(T)
-
-fzeropreserving(bc) = (v = fzero(bc); !ismissing(v) && (iszerodefined(typeof(v)) ? iszero(v) : v == 0))
-# Like sparse matrices, we assume that the zero-preservation property of a broadcasted
-# expression is stable.  We can test the zero-preservability by applying the function
-# in cases where all other arguments are known scalars against a zero from the structured
-# matrix. If any non-structured matrix argument is not a known scalar, we give up.
-fzero(x::Number) = x
-fzero(::Type{T}) where T = T
-fzero(r::Ref) = r[]
-fzero(t::Tuple{Any}) = t[1]
-fzero(S::StructuredMatrix) = zero(eltype(S))
-fzero(x) = missing
-function fzero(bc::Broadcast.Broadcasted)
-    args = map(fzero, bc.args)
-    return any(ismissing, args) ? missing : bc.f(args...)
-end
-
-function Base.similar(bc::Broadcasted{StructuredMatrixStyle{T}}, ::Type{ElType}) where {T,ElType}
-    inds = axes(bc)
-    if isstructurepreserving(bc) || (fzeropreserving(bc) && !(T <: Union{SymTridiagonal,UnitLowerTriangular,UnitUpperTriangular}))
-        return structured_broadcast_alloc(bc, T, ElType, length(inds[1]))
-    end
-    return similar(convert(Broadcasted{DefaultArrayStyle{ndims(bc)}}, bc), ElType)
-end
-
-isvalidstructbc(dest, bc::Broadcasted{T}) where {T<:StructuredMatrixStyle} =
-    Broadcast.combine_styles(dest, bc) === Broadcast.combine_styles(dest) &&
-    (isstructurepreserving(bc) || fzeropreserving(bc))
-
-isvalidstructbc(dest::Bidiagonal, bc::Broadcasted{StructuredMatrixStyle{Bidiagonal}}) =
-    (size(dest, 1) < 2 || find_uplo(bc) == dest.uplo) &&
-    (isstructurepreserving(bc) || fzeropreserving(bc))
-
-function copyto!(dest::Diagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for i in axs[1]
-        dest.diag[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
-    end
-    return dest
-end
-
-function copyto!(dest::Bidiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for i in axs[1]
-        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
-    end
-    if dest.uplo == 'U'
-        for i = 1:size(dest, 1)-1
-            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        end
-    else
-        for i = 1:size(dest, 1)-1
-            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
-        end
-    end
-    return dest
-end
-
-function copyto!(dest::SymTridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for i in axs[1]
-        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
-    end
-    for i = 1:size(dest, 1)-1
-        v = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        v == (@inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))) || throw(ArgumentError("broadcasted assignment breaks symmetry between locations ($i, $(i+1)) and ($(i+1), $i)"))
-        dest.ev[i] = v
-    end
-    return dest
-end
-
-function copyto!(dest::Tridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for i in axs[1]
-        dest.d[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
-    end
-    for i = 1:size(dest, 1)-1
-        dest.du[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        dest.dl[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
-    end
-    return dest
-end
-
-function copyto!(dest::LowerTriangular, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for j in axs[2]
-        for i in j:axs[1][end]
-            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
-        end
-    end
-    return dest
-end
-
-function copyto!(dest::UpperTriangular, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for j in axs[2]
-        for i in 1:j
-            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
-        end
-    end
-    return dest
-end
-
-# We can also implement `map` and its promotion in terms of broadcast with a stricter dimension check
-function map(f, A::StructuredMatrix, Bs::StructuredMatrix...)
-    sz = size(A)
-    all(map(B->size(B)==sz, Bs)) || throw(DimensionMismatch("dimensions must match"))
-    return f.(A, Bs...)
-end
diff --git a/stdlib/LinearAlgebra/src/svd.jl b/stdlib/LinearAlgebra/src/svd.jl
deleted file mode 100644
index c1b886f616f02..0000000000000
--- a/stdlib/LinearAlgebra/src/svd.jl
+++ /dev/null
@@ -1,577 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Singular Value Decomposition
-"""
-    SVD <: Factorization
-
-Matrix factorization type of the singular value decomposition (SVD) of a matrix `A`.
-This is the return type of [`svd(_)`](@ref), the corresponding matrix factorization function.
-
-If `F::SVD` is the factorization object, `U`, `S`, `V` and `Vt` can be obtained
-via `F.U`, `F.S`, `F.V` and `F.Vt`, such that `A = U * Diagonal(S) * Vt`.
-The singular values in `S` are sorted in descending order.
-
-Iterating the decomposition produces the components `U`, `S`, and `V`.
-
-# Examples
-```jldoctest
-julia> A = [1. 0. 0. 0. 2.; 0. 0. 3. 0. 0.; 0. 0. 0. 0. 0.; 0. 2. 0. 0. 0.]
-4×5 Matrix{Float64}:
- 1.0  0.0  0.0  0.0  2.0
- 0.0  0.0  3.0  0.0  0.0
- 0.0  0.0  0.0  0.0  0.0
- 0.0  2.0  0.0  0.0  0.0
-
-julia> F = svd(A)
-SVD{Float64, Float64, Matrix{Float64}, Vector{Float64}}
-U factor:
-4×4 Matrix{Float64}:
- 0.0  1.0   0.0  0.0
- 1.0  0.0   0.0  0.0
- 0.0  0.0   0.0  1.0
- 0.0  0.0  -1.0  0.0
-singular values:
-4-element Vector{Float64}:
- 3.0
- 2.23606797749979
- 2.0
- 0.0
-Vt factor:
-4×5 Matrix{Float64}:
- -0.0        0.0  1.0  -0.0  0.0
-  0.447214   0.0  0.0   0.0  0.894427
-  0.0       -1.0  0.0   0.0  0.0
-  0.0        0.0  0.0   1.0  0.0
-
-julia> F.U * Diagonal(F.S) * F.Vt
-4×5 Matrix{Float64}:
- 1.0  0.0  0.0  0.0  2.0
- 0.0  0.0  3.0  0.0  0.0
- 0.0  0.0  0.0  0.0  0.0
- 0.0  2.0  0.0  0.0  0.0
-
-julia> u, s, v = F; # destructuring via iteration
-
-julia> u == F.U && s == F.S && v == F.V
-true
-```
-"""
-struct SVD{T,Tr,M<:AbstractArray{T},C<:AbstractVector{Tr}} <: Factorization{T}
-    U::M
-    S::C
-    Vt::M
-    function SVD{T,Tr,M,C}(U, S, Vt) where {T,Tr,M<:AbstractArray{T},C<:AbstractVector{Tr}}
-        require_one_based_indexing(U, S, Vt)
-        new{T,Tr,M,C}(U, S, Vt)
-    end
-end
-SVD(U::AbstractArray{T}, S::AbstractVector{Tr}, Vt::AbstractArray{T}) where {T,Tr} =
-    SVD{T,Tr,typeof(U),typeof(S)}(U, S, Vt)
-SVD{T}(U::AbstractArray, S::AbstractVector{Tr}, Vt::AbstractArray) where {T,Tr} =
-    SVD(convert(AbstractArray{T}, U),
-        convert(AbstractVector{Tr}, S),
-        convert(AbstractArray{T}, Vt))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(SVD{T,Tr,M}(U::AbstractArray{T}, S::AbstractVector{Tr}, Vt::AbstractArray{T}) where {T,Tr,M},
-           SVD{T,Tr,M,typeof(S)}(U, S, Vt))
-
-SVD{T}(F::SVD) where {T} = SVD(
-    convert(AbstractMatrix{T}, F.U),
-    convert(AbstractVector{real(T)}, F.S),
-    convert(AbstractMatrix{T}, F.Vt))
-Factorization{T}(F::SVD) where {T} = SVD{T}(F)
-
-# iteration for destructuring into components
-Base.iterate(S::SVD) = (S.U, Val(:S))
-Base.iterate(S::SVD, ::Val{:S}) = (S.S, Val(:V))
-Base.iterate(S::SVD, ::Val{:V}) = (S.V, Val(:done))
-Base.iterate(S::SVD, ::Val{:done}) = nothing
-
-
-default_svd_alg(A) = DivideAndConquer()
-
-
-"""
-    svd!(A; full::Bool = false, alg::Algorithm = default_svd_alg(A)) -> SVD
-
-`svd!` is the same as [`svd`](@ref), but saves space by
-overwriting the input `A`, instead of creating a copy. See documentation of [`svd`](@ref) for details.
-"""
-function svd!(A::StridedMatrix{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T<:BlasFloat}
-    m, n = size(A)
-    if m == 0 || n == 0
-        u, s, vt = (Matrix{T}(I, m, full ? m : n), real(zeros(T,0)), Matrix{T}(I, n, n))
-    else
-        u, s, vt = _svd!(A, full, alg)
-    end
-    SVD(u, s, vt)
-end
-function svd!(A::StridedVector{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T<:BlasFloat}
-    m = length(A)
-    normA = norm(A)
-    if iszero(normA)
-        return SVD(Matrix{T}(I, m, full ? m : 1), [normA], ones(T, 1, 1))
-    elseif !full
-        normalize!(A)
-        return SVD(reshape(A, (m, 1)), [normA], ones(T, 1, 1))
-    else
-        u, s, vt = _svd!(reshape(A, (m, 1)), full, alg)
-        return SVD(u, s, vt)
-    end
-end
-
-_svd!(A::StridedMatrix{T}, full::Bool, alg::Algorithm) where {T<:BlasFloat} =
-    throw(ArgumentError("Unsupported value for `alg` keyword."))
-_svd!(A::StridedMatrix{T}, full::Bool, alg::DivideAndConquer) where {T<:BlasFloat} =
-    LAPACK.gesdd!(full ? 'A' : 'S', A)
-function _svd!(A::StridedMatrix{T}, full::Bool, alg::QRIteration) where {T<:BlasFloat}
-    c = full ? 'A' : 'S'
-    u, s, vt = LAPACK.gesvd!(c, c, A)
-end
-
-
-
-"""
-    svd(A; full::Bool = false, alg::Algorithm = default_svd_alg(A)) -> SVD
-
-Compute the singular value decomposition (SVD) of `A` and return an `SVD` object.
-
-`U`, `S`, `V` and `Vt` can be obtained from the factorization `F` with `F.U`,
-`F.S`, `F.V` and `F.Vt`, such that `A = U * Diagonal(S) * Vt`.
-The algorithm produces `Vt` and hence `Vt` is more efficient to extract than `V`.
-The singular values in `S` are sorted in descending order.
-
-Iterating the decomposition produces the components `U`, `S`, and `V`.
-
-If `full = false` (default), a "thin" SVD is returned. For an ``M
-\\times N`` matrix `A`, in the full factorization `U` is ``M \\times M``
-and `V` is ``N \\times N``, while in the thin factorization `U` is ``M
-\\times K`` and `V` is ``N \\times K``, where ``K = \\min(M,N)`` is the
-number of singular values.
-
-If `alg = DivideAndConquer()` a divide-and-conquer algorithm is used to calculate the SVD.
-Another (typically slower but more accurate) option is `alg = QRIteration()`.
-
-!!! compat "Julia 1.3"
-    The `alg` keyword argument requires Julia 1.3 or later.
-
-# Examples
-```jldoctest
-julia> A = rand(4,3);
-
-julia> F = svd(A); # Store the Factorization Object
-
-julia> A ≈ F.U * Diagonal(F.S) * F.Vt
-true
-
-julia> U, S, V = F; # destructuring via iteration
-
-julia> A ≈ U * Diagonal(S) * V'
-true
-
-julia> Uonly, = svd(A); # Store U only
-
-julia> Uonly == U
-true
-```
-"""
-function svd(A::AbstractVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T}
-    svd!(eigencopy_oftype(A, eigtype(T)), full = full, alg = alg)
-end
-function svd(A::AbstractVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T <: Union{Float16,Complex{Float16}}}
-    A = svd!(eigencopy_oftype(A, eigtype(T)), full = full, alg = alg)
-    return SVD{T}(A)
-end
-function svd(x::Number; full::Bool = false, alg::Algorithm = default_svd_alg(x))
-    SVD(x == 0 ? fill(one(x), 1, 1) : fill(x/abs(x), 1, 1), [abs(x)], fill(one(x), 1, 1))
-end
-function svd(x::Integer; full::Bool = false, alg::Algorithm = default_svd_alg(x))
-    svd(float(x), full = full, alg = alg)
-end
-function svd(A::Adjoint; full::Bool = false, alg::Algorithm = default_svd_alg(A))
-    s = svd(A.parent, full = full, alg = alg)
-    return SVD(s.Vt', s.S, s.U')
-end
-function svd(A::Transpose; full::Bool = false, alg::Algorithm = default_svd_alg(A))
-    s = svd(A.parent, full = full, alg = alg)
-    return SVD(transpose(s.Vt), s.S, transpose(s.U))
-end
-
-function getproperty(F::SVD, d::Symbol)
-    if d === :V
-        return getfield(F, :Vt)'
-    else
-        return getfield(F, d)
-    end
-end
-
-Base.propertynames(F::SVD, private::Bool=false) =
-    private ? (:V, fieldnames(typeof(F))...) : (:U, :S, :V, :Vt)
-
-"""
-    svdvals!(A)
-
-Return the singular values of `A`, saving space by overwriting the input.
-See also [`svdvals`](@ref) and [`svd`](@ref).
-"""
-svdvals!(A::StridedMatrix{T}) where {T<:BlasFloat} = isempty(A) ? zeros(real(T), 0) : LAPACK.gesdd!('N', A)[2]
-svdvals!(A::StridedVector{T}) where {T<:BlasFloat} = svdvals!(reshape(A, (length(A), 1)))
-
-"""
-    svdvals(A)
-
-Return the singular values of `A` in descending order.
-
-# Examples
-```jldoctest
-julia> A = [1. 0. 0. 0. 2.; 0. 0. 3. 0. 0.; 0. 0. 0. 0. 0.; 0. 2. 0. 0. 0.]
-4×5 Matrix{Float64}:
- 1.0  0.0  0.0  0.0  2.0
- 0.0  0.0  3.0  0.0  0.0
- 0.0  0.0  0.0  0.0  0.0
- 0.0  2.0  0.0  0.0  0.0
-
-julia> svdvals(A)
-4-element Vector{Float64}:
- 3.0
- 2.23606797749979
- 2.0
- 0.0
-```
-"""
-svdvals(A::AbstractMatrix{T}) where {T} = svdvals!(eigencopy_oftype(A, eigtype(T)))
-svdvals(A::AbstractVector{T}) where {T} = [convert(eigtype(T), norm(A))]
-svdvals(x::Number) = abs(x)
-svdvals(S::SVD{<:Any,T}) where {T} = (S.S)::Vector{T}
-
-### SVD least squares ###
-function ldiv!(A::SVD{T}, B::AbstractVecOrMat) where T
-    m, n = size(A)
-    k = searchsortedlast(A.S, eps(real(T))*A.S[1], rev=true)
-    mul!(view(B, 1:n, :), view(A.Vt, 1:k, :)', view(A.S, 1:k) .\ (view(A.U, :, 1:k)' * _cut_B(B, 1:m)))
-    return B
-end
-
-function inv(F::SVD{T}) where T
-    @inbounds for i in eachindex(F.S)
-        iszero(F.S[i]) && throw(SingularException(i))
-    end
-    k = searchsortedlast(F.S, eps(real(T))*F.S[1], rev=true)
-    @views (F.S[1:k] .\ F.Vt[1:k, :])' * F.U[:,1:k]'
-end
-
-size(A::SVD, dim::Integer) = dim == 1 ? size(A.U, dim) : size(A.Vt, dim)
-size(A::SVD) = (size(A, 1), size(A, 2))
-
-function adjoint(F::SVD)
-    return SVD(F.Vt', F.S, F.U')
-end
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::SVD{<:Any,<:Any,<:AbstractArray,<:AbstractVector})
-    summary(io, F); println(io)
-    println(io, "U factor:")
-    show(io, mime, F.U)
-    println(io, "\nsingular values:")
-    show(io, mime, F.S)
-    println(io, "\nVt factor:")
-    show(io, mime, F.Vt)
-end
-
-# Generalized svd
-"""
-    GeneralizedSVD <: Factorization
-
-Matrix factorization type of the generalized singular value decomposition (SVD)
-of two matrices `A` and `B`, such that `A = F.U*F.D1*F.R0*F.Q'` and
-`B = F.V*F.D2*F.R0*F.Q'`. This is the return type of [`svd(_, _)`](@ref), the
-corresponding matrix factorization function.
-
-For an M-by-N matrix `A` and P-by-N matrix `B`,
-
-- `U` is a M-by-M orthogonal matrix,
-- `V` is a P-by-P orthogonal matrix,
-- `Q` is a N-by-N orthogonal matrix,
-- `D1` is a M-by-(K+L) diagonal matrix with 1s in the first K entries,
-- `D2` is a P-by-(K+L) matrix whose top right L-by-L block is diagonal,
-- `R0` is a (K+L)-by-N matrix whose rightmost (K+L)-by-(K+L) block is
-           nonsingular upper block triangular,
-
-`K+L` is the effective numerical rank of the matrix `[A; B]`.
-
-Iterating the decomposition produces the components `U`, `V`, `Q`, `D1`, `D2`, and `R0`.
-
-The entries of `F.D1` and `F.D2` are related, as explained in the LAPACK
-documentation for the
-[generalized SVD](http://www.netlib.org/lapack/lug/node36.html) and the
-[xGGSVD3](http://www.netlib.org/lapack/explore-html/d6/db3/dggsvd3_8f.html)
-routine which is called underneath (in LAPACK 3.6.0 and newer).
-
-# Examples
-```jldoctest
-julia> A = [1. 0.; 0. -1.]
-2×2 Matrix{Float64}:
- 1.0   0.0
- 0.0  -1.0
-
-julia> B = [0. 1.; 1. 0.]
-2×2 Matrix{Float64}:
- 0.0  1.0
- 1.0  0.0
-
-julia> F = svd(A, B)
-GeneralizedSVD{Float64, Matrix{Float64}, Float64, Vector{Float64}}
-U factor:
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-V factor:
-2×2 Matrix{Float64}:
- -0.0  -1.0
-  1.0   0.0
-Q factor:
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-D1 factor:
-2×2 Matrix{Float64}:
- 0.707107  0.0
- 0.0       0.707107
-D2 factor:
-2×2 Matrix{Float64}:
- 0.707107  0.0
- 0.0       0.707107
-R0 factor:
-2×2 Matrix{Float64}:
- 1.41421   0.0
- 0.0      -1.41421
-
-julia> F.U*F.D1*F.R0*F.Q'
-2×2 Matrix{Float64}:
- 1.0   0.0
- 0.0  -1.0
-
-julia> F.V*F.D2*F.R0*F.Q'
-2×2 Matrix{Float64}:
- -0.0  1.0
-  1.0  0.0
-```
-"""
-struct GeneralizedSVD{T,S<:AbstractMatrix,Tr,C<:AbstractVector{Tr}} <: Factorization{T}
-    U::S
-    V::S
-    Q::S
-    a::C
-    b::C
-    k::Int
-    l::Int
-    R::S
-    function GeneralizedSVD{T,S,Tr,C}(U, V, Q, a, b, k, l, R) where {T,S<:AbstractMatrix{T},Tr,C<:AbstractVector{Tr}}
-        new{T,S,Tr,C}(U, V, Q, a, b, k, l, R)
-    end
-end
-GeneralizedSVD(U::AbstractMatrix{T}, V::AbstractMatrix{T}, Q::AbstractMatrix{T},
-              a::AbstractVector{Tr}, b::AbstractVector{Tr}, k::Int, l::Int,
-              R::AbstractMatrix{T}) where {T, Tr} =
-    GeneralizedSVD{T,typeof(U),Tr,typeof(a)}(U, V, Q, a, b, k, l, R)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(GeneralizedSVD{T,S}(U, V, Q, a, b, k, l, R) where {T, S},
-           GeneralizedSVD{T,S,real(T),typeof(a)}(U, V, Q, a, b, k, l, R))
-
-# iteration for destructuring into components
-Base.iterate(S::GeneralizedSVD) = (S.U, Val(:V))
-Base.iterate(S::GeneralizedSVD, ::Val{:V}) = (S.V, Val(:Q))
-Base.iterate(S::GeneralizedSVD, ::Val{:Q}) = (S.Q, Val(:D1))
-Base.iterate(S::GeneralizedSVD, ::Val{:D1}) = (S.D1, Val(:D2))
-Base.iterate(S::GeneralizedSVD, ::Val{:D2}) = (S.D2, Val(:R0))
-Base.iterate(S::GeneralizedSVD, ::Val{:R0}) = (S.R0, Val(:done))
-Base.iterate(S::GeneralizedSVD, ::Val{:done}) = nothing
-
-"""
-    svd!(A, B) -> GeneralizedSVD
-
-`svd!` is the same as [`svd`](@ref), but modifies the arguments
-`A` and `B` in-place, instead of making copies. See documentation of [`svd`](@ref) for details.
-"""
-function svd!(A::StridedMatrix{T}, B::StridedMatrix{T}) where T<:BlasFloat
-    # xggsvd3 replaced xggsvd in LAPACK 3.6.0
-    if LAPACK.version() < v"3.6.0"
-        U, V, Q, a, b, k, l, R = LAPACK.ggsvd!('U', 'V', 'Q', A, B)
-    else
-        U, V, Q, a, b, k, l, R = LAPACK.ggsvd3!('U', 'V', 'Q', A, B)
-    end
-    GeneralizedSVD(U, V, Q, a, b, Int(k), Int(l), R)
-end
-svd(A::AbstractMatrix{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
-    svd!(copy_similar(A, T), copy_similar(B, T))
-
-"""
-
-    svd(A, B) -> GeneralizedSVD
-
-Compute the generalized SVD of `A` and `B`, returning a `GeneralizedSVD` factorization
-object `F` such that `[A;B] = [F.U * F.D1; F.V * F.D2] * F.R0 * F.Q'`
-
-- `U` is a M-by-M orthogonal matrix,
-- `V` is a P-by-P orthogonal matrix,
-- `Q` is a N-by-N orthogonal matrix,
-- `D1` is a M-by-(K+L) diagonal matrix with 1s in the first K entries,
-- `D2` is a P-by-(K+L) matrix whose top right L-by-L block is diagonal,
-- `R0` is a (K+L)-by-N matrix whose rightmost (K+L)-by-(K+L) block is
-           nonsingular upper block triangular,
-
-`K+L` is the effective numerical rank of the matrix `[A; B]`.
-
-Iterating the decomposition produces the components `U`, `V`, `Q`, `D1`, `D2`, and `R0`.
-
-The generalized SVD is used in applications such as when one wants to compare how much belongs
-to `A` vs. how much belongs to `B`, as in human vs yeast genome, or signal vs noise, or between
-clusters vs within clusters. (See Edelman and Wang for discussion: https://arxiv.org/abs/1901.00485)
-
-It decomposes `[A; B]` into `[UC; VS]H`, where `[UC; VS]` is a natural orthogonal basis for the
-column space of `[A; B]`, and `H = RQ'` is a natural non-orthogonal basis for the rowspace of `[A;B]`,
-where the top rows are most closely attributed to the `A` matrix, and the bottom to the `B` matrix.
-The multi-cosine/sine matrices `C` and `S` provide a multi-measure of how much `A` vs how much `B`,
-and `U` and `V` provide directions in which these are measured.
-
-# Examples
-```jldoctest
-julia> A = randn(3,2); B=randn(4,2);
-
-julia> F = svd(A, B);
-
-julia> U,V,Q,C,S,R = F;
-
-julia> H = R*Q';
-
-julia> [A; B] ≈ [U*C; V*S]*H
-true
-
-julia> [A; B] ≈ [F.U*F.D1; F.V*F.D2]*F.R0*F.Q'
-true
-
-julia> Uonly, = svd(A,B);
-
-julia> U == Uonly
-true
-```
-"""
-function svd(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
-    S = promote_type(eigtype(TA),TB)
-    return svd!(copy_similar(A, S), copy_similar(B, S))
-end
-# This method can be heavily optimized but it is probably not critical
-# and might introduce bugs or inconsistencies relative to the 1x1 matrix
-# version
-svd(x::Number, y::Number) = svd(fill(x, 1, 1), fill(y, 1, 1))
-
-@inline function getproperty(F::GeneralizedSVD{T}, d::Symbol) where T
-    Fa = getfield(F, :a)
-    Fb = getfield(F, :b)
-    Fk = getfield(F, :k)
-    Fl = getfield(F, :l)
-    FU = getfield(F, :U)
-    FV = getfield(F, :V)
-    FQ = getfield(F, :Q)
-    FR = getfield(F, :R)
-    if d === :alpha
-        return Fa
-    elseif d === :beta
-        return Fb
-    elseif d === :vals || d === :S
-        return Fa[1:Fk + Fl] ./ Fb[1:Fk + Fl]
-    elseif d === :D1
-        m = size(FU, 1)
-        if m - Fk - Fl >= 0
-            return [Matrix{T}(I, Fk, Fk)  zeros(T, Fk, Fl)            ;
-                    zeros(T, Fl, Fk)      Diagonal(Fa[Fk + 1:Fk + Fl]);
-                    zeros(T, m - Fk - Fl, Fk + Fl)                    ]
-        else
-            return [Matrix{T}(I, m, Fk) [zeros(T, Fk, m - Fk); Diagonal(Fa[Fk + 1:m])] zeros(T, m, Fk + Fl - m)]
-        end
-    elseif d === :D2
-        m = size(FU, 1)
-        p = size(FV, 1)
-        if m - Fk - Fl >= 0
-            return [zeros(T, Fl, Fk) Diagonal(Fb[Fk + 1:Fk + Fl]); zeros(T, p - Fl, Fk + Fl)]
-        else
-            return [zeros(T, p, Fk) [Diagonal(Fb[Fk + 1:m]); zeros(T, Fk + p - m, m - Fk)] [zeros(T, m - Fk, Fk + Fl - m); Matrix{T}(I, Fk + p - m, Fk + Fl - m)]]
-        end
-    elseif d === :R0
-        n = size(FQ, 1)
-        return [zeros(T, Fk + Fl, n - Fk - Fl) FR]
-    else
-        getfield(F, d)
-    end
-end
-
-Base.propertynames(F::GeneralizedSVD) =
-    (:alpha, :beta, :vals, :S, :D1, :D2, :R0, fieldnames(typeof(F))...)
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::GeneralizedSVD{<:Any,<:AbstractArray})
-    summary(io, F); println(io)
-    println(io, "U factor:")
-    show(io, mime, F.U)
-    println(io, "\nV factor:")
-    show(io, mime, F.V)
-    println(io, "\nQ factor:")
-    show(io, mime, F.Q)
-    println(io, "\nD1 factor:")
-    show(io, mime, F.D1)
-    println(io, "\nD2 factor:")
-    show(io, mime, F.D2)
-    println(io, "\nR0 factor:")
-    show(io, mime, F.R0)
-end
-
-"""
-    svdvals!(A, B)
-
-Return the generalized singular values from the generalized singular value
-decomposition of `A` and `B`, saving space by overwriting `A` and `B`.
-See also [`svd`](@ref) and [`svdvals`](@ref).
-"""
-function svdvals!(A::StridedMatrix{T}, B::StridedMatrix{T}) where T<:BlasFloat
-    # xggsvd3 replaced xggsvd in LAPACK 3.6.0
-    if LAPACK.version() < v"3.6.0"
-        _, _, _, a, b, k, l, _ = LAPACK.ggsvd!('N', 'N', 'N', A, B)
-    else
-        _, _, _, a, b, k, l, _ = LAPACK.ggsvd3!('N', 'N', 'N', A, B)
-    end
-    a[1:k + l] ./ b[1:k + l]
-end
-
-"""
-    svdvals(A, B)
-
-Return the generalized singular values from the generalized singular value
-decomposition of `A` and `B`. See also [`svd`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1. 0.; 0. -1.]
-2×2 Matrix{Float64}:
- 1.0   0.0
- 0.0  -1.0
-
-julia> B = [0. 1.; 1. 0.]
-2×2 Matrix{Float64}:
- 0.0  1.0
- 1.0  0.0
-
-julia> svdvals(A, B)
-2-element Vector{Float64}:
- 1.0
- 1.0
-```
-"""
-function svdvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return svdvals!(copy_similar(A, S), copy_similar(B, S))
-end
-svdvals(x::Number, y::Number) = abs(x/y)
-
-# Conversion
-AbstractMatrix(F::SVD) = (F.U * Diagonal(F.S)) * F.Vt
-AbstractArray(F::SVD) = AbstractMatrix(F)
-Matrix(F::SVD) = Array(AbstractArray(F))
-Array(F::SVD) = Matrix(F)
diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl
deleted file mode 100644
index fa3464e93230b..0000000000000
--- a/stdlib/LinearAlgebra/src/symmetric.jl
+++ /dev/null
@@ -1,865 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Symmetric and Hermitian matrices
-struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
-    data::S
-    uplo::Char
-
-    function Symmetric{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}}
-        require_one_based_indexing(data)
-        (uplo != 'U' && uplo != 'L') && throw_uplo()
-        new{T,S}(data, uplo)
-    end
-end
-"""
-    Symmetric(A, uplo=:U)
-
-Construct a `Symmetric` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
-triangle of the matrix `A`.
-
-`Symmetric` views are mainly useful for real-symmetric matrices, for which
-specialized algorithms (e.g. for eigenproblems) are enabled for `Symmetric` types.
-More generally, see also [`Hermitian(A)`](@ref) for Hermitian matrices `A == A'`, which
-is effectively equivalent to `Symmetric` for real matrices but is also useful for
-complex matrices.  (Whereas complex `Symmetric` matrices are supported but have few
-if any specialized algorithms.)
-
-To compute the symmetric part of a real matrix, or more generally the Hermitian part `(A + A') / 2` of
-a real or complex matrix `A`, use [`hermitianpart`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2 3; 4 5 6; 7 8 9]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
- 7  8  9
-
-julia> Supper = Symmetric(A)
-3×3 Symmetric{Int64, Matrix{Int64}}:
- 1  2  3
- 2  5  6
- 3  6  9
-
-julia> Slower = Symmetric(A, :L)
-3×3 Symmetric{Int64, Matrix{Int64}}:
- 1  4  7
- 4  5  8
- 7  8  9
-
-julia> hermitianpart(A)
-3×3 Hermitian{Float64, Matrix{Float64}}:
- 1.0  3.0  5.0
- 3.0  5.0  7.0
- 5.0  7.0  9.0
-```
-
-Note that `Supper` will not be equal to `Slower` unless `A` is itself symmetric (e.g. if
-`A == transpose(A)`).
-"""
-function Symmetric(A::AbstractMatrix, uplo::Symbol=:U)
-    checksquare(A)
-    return symmetric_type(typeof(A))(A, char_uplo(uplo))
-end
-
-"""
-    symmetric(A, uplo=:U)
-
-Construct a symmetric view of `A`. If `A` is a matrix, `uplo` controls whether the upper
-(if `uplo = :U`) or lower (if `uplo = :L`) triangle of `A` is used to implicitly fill the
-other one. If `A` is a `Number`, it is returned as is.
-
-If a symmetric view of a matrix is to be constructed of which the elements are neither
-matrices nor numbers, an appropriate method of `symmetric` has to be implemented. In that
-case, `symmetric_type` has to be implemented, too.
-"""
-symmetric(A::AbstractMatrix, uplo::Symbol) = Symmetric(A, uplo)
-symmetric(A::Number, ::Symbol) = A
-
-"""
-    symmetric_type(T::Type)
-
-The type of the object returned by `symmetric(::T, ::Symbol)`. For matrices, this is an
-appropriately typed `Symmetric`, for `Number`s, it is the original type. If `symmetric` is
-implemented for a custom type, so should be `symmetric_type`, and vice versa.
-"""
-function symmetric_type(::Type{T}) where {S, T<:AbstractMatrix{S}}
-    return Symmetric{Union{S, promote_op(transpose, S), symmetric_type(S)}, T}
-end
-function symmetric_type(::Type{T}) where {S<:Number, T<:AbstractMatrix{S}}
-    return Symmetric{S, T}
-end
-function symmetric_type(::Type{T}) where {S<:AbstractMatrix, T<:AbstractMatrix{S}}
-    return Symmetric{AbstractMatrix, T}
-end
-symmetric_type(::Type{T}) where {T<:Number} = T
-
-struct Hermitian{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
-    data::S
-    uplo::Char
-
-    function Hermitian{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}}
-        require_one_based_indexing(data)
-        (uplo != 'U' && uplo != 'L') && throw_uplo()
-        new{T,S}(data, uplo)
-    end
-end
-"""
-    Hermitian(A, uplo=:U)
-
-Construct a `Hermitian` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
-triangle of the matrix `A`.
-
-To compute the Hermitian part of `A`, use [`hermitianpart`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2+2im 3-3im; 4 5 6-6im; 7 8+8im 9]
-3×3 Matrix{Complex{Int64}}:
- 1+0im  2+2im  3-3im
- 4+0im  5+0im  6-6im
- 7+0im  8+8im  9+0im
-
-julia> Hupper = Hermitian(A)
-3×3 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
- 1+0im  2+2im  3-3im
- 2-2im  5+0im  6-6im
- 3+3im  6+6im  9+0im
-
-julia> Hlower = Hermitian(A, :L)
-3×3 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
- 1+0im  4+0im  7+0im
- 4+0im  5+0im  8-8im
- 7+0im  8+8im  9+0im
-
-julia> hermitianpart(A)
-3×3 Hermitian{ComplexF64, Matrix{ComplexF64}}:
- 1.0+0.0im  3.0+1.0im  5.0-1.5im
- 3.0-1.0im  5.0+0.0im  7.0-7.0im
- 5.0+1.5im  7.0+7.0im  9.0+0.0im
-```
-
-Note that `Hupper` will not be equal to `Hlower` unless `A` is itself Hermitian (e.g. if `A == adjoint(A)`).
-
-All non-real parts of the diagonal will be ignored.
-
-```julia
-Hermitian(fill(complex(1,1), 1, 1)) == fill(1, 1, 1)
-```
-"""
-function Hermitian(A::AbstractMatrix, uplo::Symbol=:U)
-    n = checksquare(A)
-    return hermitian_type(typeof(A))(A, char_uplo(uplo))
-end
-
-"""
-    hermitian(A, uplo=:U)
-
-Construct a hermitian view of `A`. If `A` is a matrix, `uplo` controls whether the upper
-(if `uplo = :U`) or lower (if `uplo = :L`) triangle of `A` is used to implicitly fill the
-other one. If `A` is a `Number`, its real part is returned converted back to the input
-type.
-
-If a hermitian view of a matrix is to be constructed of which the elements are neither
-matrices nor numbers, an appropriate method of `hermitian` has to be implemented. In that
-case, `hermitian_type` has to be implemented, too.
-"""
-hermitian(A::AbstractMatrix, uplo::Symbol) = Hermitian(A, uplo)
-hermitian(A::Number, ::Symbol) = convert(typeof(A), real(A))
-
-"""
-    hermitian_type(T::Type)
-
-The type of the object returned by `hermitian(::T, ::Symbol)`. For matrices, this is an
-appropriately typed `Hermitian`, for `Number`s, it is the original type. If `hermitian` is
-implemented for a custom type, so should be `hermitian_type`, and vice versa.
-"""
-function hermitian_type(::Type{T}) where {S, T<:AbstractMatrix{S}}
-    return Hermitian{Union{S, promote_op(adjoint, S), hermitian_type(S)}, T}
-end
-function hermitian_type(::Type{T}) where {S<:Number, T<:AbstractMatrix{S}}
-    return Hermitian{S, T}
-end
-function hermitian_type(::Type{T}) where {S<:AbstractMatrix, T<:AbstractMatrix{S}}
-    return Hermitian{AbstractMatrix, T}
-end
-hermitian_type(::Type{T}) where {T<:Number} = T
-
-_unwrap(A::Hermitian) = parent(A)
-_unwrap(A::Symmetric) = parent(A)
-
-for (S, H) in ((:Symmetric, :Hermitian), (:Hermitian, :Symmetric))
-    @eval begin
-        $S(A::$S) = A
-        function $S(A::$S, uplo::Symbol)
-            if A.uplo == char_uplo(uplo)
-                return A
-            else
-                throw(ArgumentError("Cannot construct $($S); uplo doesn't match"))
-            end
-        end
-        $S(A::$H) = $S(A, sym_uplo(A.uplo))
-        function $S(A::$H, uplo::Symbol)
-            if A.uplo == char_uplo(uplo)
-                if $H === Hermitian && !(eltype(A) <: Real) &&
-                    any(!isreal, A.data[i] for i in diagind(A.data))
-
-                    throw(ArgumentError("Cannot construct $($S)($($H))); diagonal contains complex values"))
-                end
-                return $S(A.data, sym_uplo(A.uplo))
-            else
-                throw(ArgumentError("Cannot construct $($S); uplo doesn't match"))
-            end
-        end
-    end
-end
-
-convert(::Type{T}, m::Union{Symmetric,Hermitian}) where {T<:Symmetric} = m isa T ? m : T(m)::T
-convert(::Type{T}, m::Union{Symmetric,Hermitian}) where {T<:Hermitian} = m isa T ? m : T(m)::T
-
-const HermOrSym{T,        S} = Union{Hermitian{T,S}, Symmetric{T,S}}
-const RealHermSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}}
-const RealHermSymComplexHerm{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, Hermitian{Complex{T},S}}
-const RealHermSymComplexSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, Symmetric{Complex{T},S}}
-
-size(A::HermOrSym, d) = size(A.data, d)
-size(A::HermOrSym) = size(A.data)
-@inline function Base.isassigned(A::HermOrSym, i::Int, j::Int)
-    @boundscheck checkbounds(Bool, A, i, j) || return false
-    @inbounds if i == j || ((A.uplo == 'U') == (i < j))
-        return isassigned(A.data, i, j)
-    else
-        return isassigned(A.data, j, i)
-    end
-end
-
-@inline function getindex(A::Symmetric, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    @inbounds if i == j
-        return symmetric(A.data[i, j], sym_uplo(A.uplo))::symmetric_type(eltype(A.data))
-    elseif (A.uplo == 'U') == (i < j)
-        return A.data[i, j]
-    else
-        return transpose(A.data[j, i])
-    end
-end
-@inline function getindex(A::Hermitian, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    @inbounds if i == j
-        return hermitian(A.data[i, j], sym_uplo(A.uplo))::hermitian_type(eltype(A.data))
-    elseif (A.uplo == 'U') == (i < j)
-        return A.data[i, j]
-    else
-        return adjoint(A.data[j, i])
-    end
-end
-
-function setindex!(A::Symmetric, v, i::Integer, j::Integer)
-    i == j || throw(ArgumentError("Cannot set a non-diagonal index in a symmetric matrix"))
-    setindex!(A.data, v, i, j)
-end
-
-function setindex!(A::Hermitian, v, i::Integer, j::Integer)
-    if i != j
-        throw(ArgumentError("Cannot set a non-diagonal index in a Hermitian matrix"))
-    elseif !isreal(v)
-        throw(ArgumentError("Cannot set a diagonal entry in a Hermitian matrix to a nonreal value"))
-    else
-        setindex!(A.data, v, i, j)
-    end
-end
-
-diag(A::Symmetric) = symmetric.(diag(parent(A)), sym_uplo(A.uplo))
-diag(A::Hermitian) = hermitian.(diag(parent(A)), sym_uplo(A.uplo))
-
-isdiag(A::HermOrSym) = isdiag(A.uplo == 'U' ? UpperTriangular(A.data) : LowerTriangular(A.data))
-
-# For A<:Union{Symmetric,Hermitian}, similar(A[, neweltype]) should yield a matrix with the same
-# symmetry type, uplo flag, and underlying storage type as A. The following methods cover these cases.
-similar(A::Symmetric, ::Type{T}) where {T} = Symmetric(similar(parent(A), T), ifelse(A.uplo == 'U', :U, :L))
-# If the Hermitian constructor's check ascertaining that the wrapped matrix's
-# diagonal is strictly real is removed, the following method can be simplified.
-function similar(A::Hermitian, ::Type{T}) where T
-    B = similar(parent(A), T)
-    for i in 1:size(B, 1) B[i, i] = 0 end
-    return Hermitian(B, ifelse(A.uplo == 'U', :U, :L))
-end
-# On the other hand, similar(A, [neweltype,] shape...) should yield a matrix of the underlying
-# storage type of A (not wrapped in a symmetry type). The following method covers these cases.
-similar(A::Union{Symmetric,Hermitian}, ::Type{T}, dims::Dims{N}) where {T,N} = similar(parent(A), T, dims)
-
-# Conversion
-function Matrix(A::Symmetric)
-    B = copytri!(convert(Matrix, copy(A.data)), A.uplo)
-    for i = 1:size(A, 1)
-        B[i,i] = symmetric(A[i,i], sym_uplo(A.uplo))::symmetric_type(eltype(A.data))
-    end
-    return B
-end
-function Matrix(A::Hermitian)
-    B = copytri!(convert(Matrix, copy(A.data)), A.uplo, true)
-    for i = 1:size(A, 1)
-        B[i,i] = hermitian(A[i,i], sym_uplo(A.uplo))::hermitian_type(eltype(A.data))
-    end
-    return B
-end
-Array(A::Union{Symmetric,Hermitian}) = convert(Matrix, A)
-
-parent(A::HermOrSym) = A.data
-Symmetric{T,S}(A::Symmetric{T,S}) where {T,S<:AbstractMatrix{T}} = A
-Symmetric{T,S}(A::Symmetric) where {T,S<:AbstractMatrix{T}} = Symmetric{T,S}(convert(S,A.data),A.uplo)
-AbstractMatrix{T}(A::Symmetric) where {T} = Symmetric(convert(AbstractMatrix{T}, A.data), sym_uplo(A.uplo))
-Hermitian{T,S}(A::Hermitian{T,S}) where {T,S<:AbstractMatrix{T}} = A
-Hermitian{T,S}(A::Hermitian) where {T,S<:AbstractMatrix{T}} = Hermitian{T,S}(convert(S,A.data),A.uplo)
-AbstractMatrix{T}(A::Hermitian) where {T} = Hermitian(convert(AbstractMatrix{T}, A.data), sym_uplo(A.uplo))
-
-copy(A::Symmetric{T,S}) where {T,S} = (B = copy(A.data); Symmetric{T,typeof(B)}(B,A.uplo))
-copy(A::Hermitian{T,S}) where {T,S} = (B = copy(A.data); Hermitian{T,typeof(B)}(B,A.uplo))
-
-function copyto!(dest::Symmetric, src::Symmetric)
-    if src.uplo == dest.uplo
-        copyto!(dest.data, src.data)
-    else
-        transpose!(dest.data, src.data)
-    end
-    return dest
-end
-
-function copyto!(dest::Hermitian, src::Hermitian)
-    if src.uplo == dest.uplo
-        copyto!(dest.data, src.data)
-    else
-        adjoint!(dest.data, src.data)
-    end
-    return dest
-end
-
-# fill[stored]!
-fill!(A::HermOrSym, x) = fillstored!(A, x)
-function fillstored!(A::HermOrSym{T}, x) where T
-    xT = convert(T, x)
-    if isa(A, Hermitian)
-        isreal(xT) || throw(ArgumentError("cannot fill Hermitian matrix with a nonreal value"))
-    end
-    if A.uplo == 'U'
-        fillband!(A.data, xT, 0, size(A,2)-1)
-    else # A.uplo == 'L'
-        fillband!(A.data, xT, 1-size(A,1), 0)
-    end
-    return A
-end
-
-Base.isreal(A::HermOrSym{<:Real}) = true
-function Base.isreal(A::HermOrSym)
-    n = size(A, 1)
-    @inbounds if A.uplo == 'U'
-        for j in 1:n
-            for i in 1:(j - (A isa Hermitian))
-                if !isreal(A.data[i,j])
-                    return false
-                end
-            end
-        end
-    else
-        for j in 1:n
-            for i in (j + (A isa Hermitian)):n
-                if !isreal(A.data[i,j])
-                    return false
-                end
-            end
-        end
-    end
-    return true
-end
-
-ishermitian(A::Hermitian) = true
-ishermitian(A::Symmetric{<:Real}) = true
-ishermitian(A::Symmetric{<:Complex}) = isreal(A)
-issymmetric(A::Hermitian{<:Real}) = true
-issymmetric(A::Hermitian{<:Complex}) = isreal(A)
-issymmetric(A::Symmetric) = true
-
-adjoint(A::Hermitian) = A
-transpose(A::Symmetric) = A
-adjoint(A::Symmetric{<:Real}) = A
-transpose(A::Hermitian{<:Real}) = A
-adjoint(A::Symmetric) = Adjoint(A)
-transpose(A::Hermitian) = Transpose(A)
-
-real(A::Symmetric{<:Real}) = A
-real(A::Hermitian{<:Real}) = A
-real(A::Symmetric) = Symmetric(real(A.data), sym_uplo(A.uplo))
-real(A::Hermitian) = Hermitian(real(A.data), sym_uplo(A.uplo))
-imag(A::Symmetric) = Symmetric(imag(A.data), sym_uplo(A.uplo))
-
-Base.copy(A::Adjoint{<:Any,<:Symmetric}) =
-    Symmetric(copy(adjoint(A.parent.data)), ifelse(A.parent.uplo == 'U', :L, :U))
-Base.copy(A::Transpose{<:Any,<:Hermitian}) =
-    Hermitian(copy(transpose(A.parent.data)), ifelse(A.parent.uplo == 'U', :L, :U))
-
-tr(A::Symmetric) = tr(A.data) # to avoid AbstractMatrix fallback (incl. allocations)
-tr(A::Hermitian) = real(tr(A.data))
-
-Base.conj(A::HermOrSym) = typeof(A)(conj(A.data), A.uplo)
-Base.conj!(A::HermOrSym) = typeof(A)(conj!(A.data), A.uplo)
-
-# tril/triu
-function tril(A::Hermitian, k::Integer=0)
-    if A.uplo == 'U' && k <= 0
-        return tril!(copy(A.data'),k)
-    elseif A.uplo == 'U' && k > 0
-        return tril!(copy(A.data'),-1) + tril!(triu(A.data),k)
-    elseif A.uplo == 'L' && k <= 0
-        return tril(A.data,k)
-    else
-        return tril(A.data,-1) + tril!(triu!(copy(A.data')),k)
-    end
-end
-
-function tril(A::Symmetric, k::Integer=0)
-    if A.uplo == 'U' && k <= 0
-        return tril!(copy(transpose(A.data)),k)
-    elseif A.uplo == 'U' && k > 0
-        return tril!(copy(transpose(A.data)),-1) + tril!(triu(A.data),k)
-    elseif A.uplo == 'L' && k <= 0
-        return tril(A.data,k)
-    else
-        return tril(A.data,-1) + tril!(triu!(copy(transpose(A.data))),k)
-    end
-end
-
-function triu(A::Hermitian, k::Integer=0)
-    if A.uplo == 'U' && k >= 0
-        return triu(A.data,k)
-    elseif A.uplo == 'U' && k < 0
-        return triu(A.data,1) + triu!(tril!(copy(A.data')),k)
-    elseif A.uplo == 'L' && k >= 0
-        return triu!(copy(A.data'),k)
-    else
-        return triu!(copy(A.data'),1) + triu!(tril(A.data),k)
-    end
-end
-
-function triu(A::Symmetric, k::Integer=0)
-    if A.uplo == 'U' && k >= 0
-        return triu(A.data,k)
-    elseif A.uplo == 'U' && k < 0
-        return triu(A.data,1) + triu!(tril!(copy(transpose(A.data))),k)
-    elseif A.uplo == 'L' && k >= 0
-        return triu!(copy(transpose(A.data)),k)
-    else
-        return triu!(copy(transpose(A.data)),1) + triu!(tril(A.data),k)
-    end
-end
-
-for (T, trans, real) in [(:Symmetric, :transpose, :identity), (:Hermitian, :adjoint, :real)]
-    @eval begin
-        function dot(A::$T, B::$T)
-            n = size(A, 2)
-            if n != size(B, 2)
-                throw(DimensionMismatch("A has dimensions $(size(A)) but B has dimensions $(size(B))"))
-            end
-
-            dotprod = zero(dot(first(A), first(B)))
-            @inbounds if A.uplo == 'U' && B.uplo == 'U'
-                for j in 1:n
-                    for i in 1:(j - 1)
-                        dotprod += 2 * $real(dot(A.data[i, j], B.data[i, j]))
-                    end
-                    dotprod += dot(A[j, j], B[j, j])
-                end
-            elseif A.uplo == 'L' && B.uplo == 'L'
-                for j in 1:n
-                    dotprod += dot(A[j, j], B[j, j])
-                    for i in (j + 1):n
-                        dotprod += 2 * $real(dot(A.data[i, j], B.data[i, j]))
-                    end
-                end
-            elseif A.uplo == 'U' && B.uplo == 'L'
-                for j in 1:n
-                    for i in 1:(j - 1)
-                        dotprod += 2 * $real(dot(A.data[i, j], $trans(B.data[j, i])))
-                    end
-                    dotprod += dot(A[j, j], B[j, j])
-                end
-            else
-                for j in 1:n
-                    dotprod += dot(A[j, j], B[j, j])
-                    for i in (j + 1):n
-                        dotprod += 2 * $real(dot(A.data[i, j], $trans(B.data[j, i])))
-                    end
-                end
-            end
-            return dotprod
-        end
-    end
-end
-
-(-)(A::Symmetric) = Symmetric(-A.data, sym_uplo(A.uplo))
-(-)(A::Hermitian) = Hermitian(-A.data, sym_uplo(A.uplo))
-
-## Addition/subtraction
-for f ∈ (:+, :-), (Wrapper, conjugation) ∈ ((:Hermitian, :adjoint), (:Symmetric, :transpose))
-    @eval begin
-        function $f(A::$Wrapper, B::$Wrapper)
-            if A.uplo == B.uplo
-                return $Wrapper($f(parent(A), parent(B)), sym_uplo(A.uplo))
-            elseif A.uplo == 'U'
-                return $Wrapper($f(parent(A), $conjugation(parent(B))), :U)
-            else
-                return $Wrapper($f($conjugation(parent(A)), parent(B)), :U)
-            end
-        end
-    end
-end
-
-for f in (:+, :-)
-    @eval begin
-        $f(A::Hermitian, B::Symmetric{<:Real}) = $f(A, Hermitian(parent(B), sym_uplo(B.uplo)))
-        $f(A::Symmetric{<:Real}, B::Hermitian) = $f(Hermitian(parent(A), sym_uplo(A.uplo)), B)
-        $f(A::SymTridiagonal, B::Symmetric) = Symmetric($f(A, B.data), sym_uplo(B.uplo))
-        $f(A::Symmetric, B::SymTridiagonal) = Symmetric($f(A.data, B), sym_uplo(A.uplo))
-        $f(A::SymTridiagonal{<:Real}, B::Hermitian) = Hermitian($f(A, B.data), sym_uplo(B.uplo))
-        $f(A::Hermitian, B::SymTridiagonal{<:Real}) = Hermitian($f(A.data, B), sym_uplo(A.uplo))
-    end
-end
-
-*(A::HermOrSym, B::HermOrSym) = A * copyto!(similar(parent(B)), B)
-
-function dot(x::AbstractVector, A::RealHermSymComplexHerm, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    n = length(x)
-    (n == length(y) == size(A, 1)) || throw(DimensionMismatch())
-    data = A.data
-    r = dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    iszero(n) && return r
-    if A.uplo == 'U'
-        @inbounds for j = 1:length(y)
-            r += dot(x[j], real(data[j,j]), y[j])
-            @simd for i = 1:j-1
-                Aij = data[i,j]
-                r += dot(x[i], Aij, y[j]) + dot(x[j], adjoint(Aij), y[i])
-            end
-        end
-    else # A.uplo == 'L'
-        @inbounds for j = 1:length(y)
-            r += dot(x[j], real(data[j,j]), y[j])
-            @simd for i = j+1:length(y)
-                Aij = data[i,j]
-                r += dot(x[i], Aij, y[j]) + dot(x[j], adjoint(Aij), y[i])
-            end
-        end
-    end
-    return r
-end
-
-# Scaling with Number
-*(A::Symmetric, x::Number) = Symmetric(A.data*x, sym_uplo(A.uplo))
-*(x::Number, A::Symmetric) = Symmetric(x*A.data, sym_uplo(A.uplo))
-*(A::Hermitian, x::Real) = Hermitian(A.data*x, sym_uplo(A.uplo))
-*(x::Real, A::Hermitian) = Hermitian(x*A.data, sym_uplo(A.uplo))
-/(A::Symmetric, x::Number) = Symmetric(A.data/x, sym_uplo(A.uplo))
-/(A::Hermitian, x::Real) = Hermitian(A.data/x, sym_uplo(A.uplo))
-
-factorize(A::HermOrSym) = _factorize(A)
-function _factorize(A::HermOrSym{T}; check::Bool=true) where T
-    TT = typeof(sqrt(oneunit(T)))
-    if isdiag(A)
-        return Diagonal(A)
-    elseif TT <: BlasFloat
-        return bunchkaufman(A; check=check)
-    else # fallback
-        return lu(A; check=check)
-    end
-end
-
-det(A::RealHermSymComplexHerm) = real(det(_factorize(A; check=false)))
-det(A::Symmetric{<:Real}) = det(_factorize(A; check=false))
-det(A::Symmetric) = det(_factorize(A; check=false))
-
-\(A::HermOrSym, B::AbstractVector) = \(factorize(A), B)
-# Bunch-Kaufman solves can not utilize BLAS-3 for multiple right hand sides
-# so using LU is faster for AbstractMatrix right hand side
-\(A::HermOrSym, B::AbstractMatrix) = \(isdiag(A) ? Diagonal(A) : lu(A), B)
-
-function _inv(A::HermOrSym)
-    n = checksquare(A)
-    B = inv!(lu(A))
-    conjugate = isa(A, Hermitian)
-    # symmetrize
-    if A.uplo == 'U' # add to upper triangle
-        @inbounds for i = 1:n, j = i:n
-            B[i,j] = conjugate ? (B[i,j] + conj(B[j,i])) / 2 : (B[i,j] + B[j,i]) / 2
-        end
-    else # A.uplo == 'L', add to lower triangle
-        @inbounds for i = 1:n, j = i:n
-            B[j,i] = conjugate ? (B[j,i] + conj(B[i,j])) / 2 : (B[j,i] + B[i,j]) / 2
-        end
-    end
-    B
-end
-# StridedMatrix restriction seems necessary due to inv! call in _inv above
-inv(A::Hermitian{<:Any,<:StridedMatrix}) = Hermitian(_inv(A), sym_uplo(A.uplo))
-inv(A::Symmetric{<:Any,<:StridedMatrix}) = Symmetric(_inv(A), sym_uplo(A.uplo))
-
-function svd(A::RealHermSymComplexHerm; full::Bool=false)
-    vals, vecs = eigen(A)
-    I = sortperm(vals; by=abs, rev=true)
-    permute!(vals, I)
-    Base.permutecols!!(vecs, I)         # left-singular vectors
-    V = copy(vecs)                      # right-singular vectors
-    # shifting -1 from singular values to right-singular vectors
-    @inbounds for i = 1:length(vals)
-        if vals[i] < 0
-            vals[i] = -vals[i]
-            for j = 1:size(V,1); V[j,i] = -V[j,i]; end
-        end
-    end
-    return SVD(vecs, vals, V')
-end
-
-function svdvals!(A::RealHermSymComplexHerm)
-    vals = eigvals!(A)
-    for i = 1:length(vals)
-        vals[i] = abs(vals[i])
-    end
-    return sort!(vals, rev = true)
-end
-
-# Matrix functions
-^(A::Symmetric{<:Real}, p::Integer) = sympow(A, p)
-^(A::Symmetric{<:Complex}, p::Integer) = sympow(A, p)
-function sympow(A::Symmetric, p::Integer)
-    if p < 0
-        return Symmetric(Base.power_by_squaring(inv(A), -p))
-    else
-        return Symmetric(Base.power_by_squaring(A, p))
-    end
-end
-function ^(A::Symmetric{<:Real}, p::Real)
-    isinteger(p) && return integerpow(A, p)
-    F = eigen(A)
-    if all(λ -> λ ≥ 0, F.values)
-        return Symmetric((F.vectors * Diagonal((F.values).^p)) * F.vectors')
-    else
-        return Symmetric((F.vectors * Diagonal((complex(F.values)).^p)) * F.vectors')
-    end
-end
-function ^(A::Symmetric{<:Complex}, p::Real)
-    isinteger(p) && return integerpow(A, p)
-    return Symmetric(schurpow(A, p))
-end
-function ^(A::Hermitian, p::Integer)
-    if p < 0
-        retmat = Base.power_by_squaring(inv(A), -p)
-    else
-        retmat = Base.power_by_squaring(A, p)
-    end
-    for i = 1:size(A,1)
-        retmat[i,i] = real(retmat[i,i])
-    end
-    return Hermitian(retmat)
-end
-function ^(A::Hermitian{T}, p::Real) where T
-    isinteger(p) && return integerpow(A, p)
-    F = eigen(A)
-    if all(λ -> λ ≥ 0, F.values)
-        retmat = (F.vectors * Diagonal((F.values).^p)) * F.vectors'
-        if T <: Real
-            return Hermitian(retmat)
-        else
-            for i = 1:size(A,1)
-                retmat[i,i] = real(retmat[i,i])
-            end
-            return Hermitian(retmat)
-        end
-    else
-        return (F.vectors * Diagonal((complex(F.values).^p))) * F.vectors'
-    end
-end
-
-for func in (:exp, :cos, :sin, :tan, :cosh, :sinh, :tanh, :atan, :asinh, :atanh)
-    @eval begin
-        function ($func)(A::HermOrSym{<:Real})
-            F = eigen(A)
-            return Symmetric((F.vectors * Diagonal(($func).(F.values))) * F.vectors')
-        end
-        function ($func)(A::Hermitian{<:Complex})
-            n = checksquare(A)
-            F = eigen(A)
-            retmat = (F.vectors * Diagonal(($func).(F.values))) * F.vectors'
-            for i = 1:n
-                retmat[i,i] = real(retmat[i,i])
-            end
-            return Hermitian(retmat)
-        end
-    end
-end
-
-function cis(A::Union{RealHermSymComplexHerm,SymTridiagonal{<:Real}})
-    F = eigen(A)
-    # The returned matrix is unitary, and is complex-symmetric for real A
-    return F.vectors .* cis.(F.values') * F.vectors'
-end
-
-for func in (:acos, :asin)
-    @eval begin
-        function ($func)(A::HermOrSym{<:Real})
-            F = eigen(A)
-            if all(λ -> -1 ≤ λ ≤ 1, F.values)
-                retmat = (F.vectors * Diagonal(($func).(F.values))) * F.vectors'
-            else
-                retmat = (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
-            end
-            return Symmetric(retmat)
-        end
-        function ($func)(A::Hermitian{<:Complex})
-            n = checksquare(A)
-            F = eigen(A)
-            if all(λ -> -1 ≤ λ ≤ 1, F.values)
-                retmat = (F.vectors * Diagonal(($func).(F.values))) * F.vectors'
-                for i = 1:n
-                    retmat[i,i] = real(retmat[i,i])
-                end
-                return Hermitian(retmat)
-            else
-                return (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
-            end
-        end
-    end
-end
-
-function acosh(A::HermOrSym{<:Real})
-    F = eigen(A)
-    if all(λ -> λ ≥ 1, F.values)
-        retmat = (F.vectors * Diagonal(acosh.(F.values))) * F.vectors'
-    else
-        retmat = (F.vectors * Diagonal(acosh.(complex.(F.values)))) * F.vectors'
-    end
-    return Symmetric(retmat)
-end
-function acosh(A::Hermitian{<:Complex})
-    n = checksquare(A)
-    F = eigen(A)
-    if all(λ -> λ ≥ 1, F.values)
-        retmat = (F.vectors * Diagonal(acosh.(F.values))) * F.vectors'
-        for i = 1:n
-            retmat[i,i] = real(retmat[i,i])
-        end
-        return Hermitian(retmat)
-    else
-        return (F.vectors * Diagonal(acosh.(complex.(F.values)))) * F.vectors'
-    end
-end
-
-function sincos(A::HermOrSym{<:Real})
-    n = checksquare(A)
-    F = eigen(A)
-    S, C = Diagonal(similar(A, (n,))), Diagonal(similar(A, (n,)))
-    for i in 1:n
-        S.diag[i], C.diag[i] = sincos(F.values[i])
-    end
-    return Symmetric((F.vectors * S) * F.vectors'), Symmetric((F.vectors * C) * F.vectors')
-end
-function sincos(A::Hermitian{<:Complex})
-    n = checksquare(A)
-    F = eigen(A)
-    S, C = Diagonal(similar(A, (n,))), Diagonal(similar(A, (n,)))
-    for i in 1:n
-        S.diag[i], C.diag[i] = sincos(F.values[i])
-    end
-    retmatS, retmatC = (F.vectors * S) * F.vectors', (F.vectors * C) * F.vectors'
-    for i = 1:n
-        retmatS[i,i] = real(retmatS[i,i])
-        retmatC[i,i] = real(retmatC[i,i])
-    end
-    return Hermitian(retmatS), Hermitian(retmatC)
-end
-
-
-for func in (:log, :sqrt)
-    # sqrt has rtol arg to handle matrices that are semidefinite up to roundoff errors
-    rtolarg = func === :sqrt ? Any[Expr(:kw, :(rtol::Real), :(eps(real(float(one(T))))*size(A,1)))] : Any[]
-    rtolval = func === :sqrt ? :(-maximum(abs, F.values) * rtol) : 0
-    @eval begin
-        function ($func)(A::HermOrSym{T}; $(rtolarg...)) where {T<:Real}
-            F = eigen(A)
-            λ₀ = $rtolval # treat λ ≥ λ₀ as "zero" eigenvalues up to roundoff
-            if all(λ -> λ ≥ λ₀, F.values)
-                retmat = (F.vectors * Diagonal(($func).(max.(0, F.values)))) * F.vectors'
-            else
-                retmat = (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
-            end
-            return Symmetric(retmat)
-        end
-
-        function ($func)(A::Hermitian{T}; $(rtolarg...)) where {T<:Complex}
-            n = checksquare(A)
-            F = eigen(A)
-            λ₀ = $rtolval # treat λ ≥ λ₀ as "zero" eigenvalues up to roundoff
-            if all(λ -> λ ≥ λ₀, F.values)
-                retmat = (F.vectors * Diagonal(($func).(max.(0, F.values)))) * F.vectors'
-                for i = 1:n
-                    retmat[i,i] = real(retmat[i,i])
-                end
-                return Hermitian(retmat)
-            else
-                retmat = (F.vectors * Diagonal(($func).(complex(F.values)))) * F.vectors'
-                return retmat
-            end
-        end
-    end
-end
-
-"""
-    hermitianpart(A, uplo=:U) -> Hermitian
-
-Return the Hermitian part of the square matrix `A`, defined as `(A + A') / 2`, as a
-[`Hermitian`](@ref) matrix. For real matrices `A`, this is also known as the symmetric part
-of `A`; it is also sometimes called the "operator real part". The optional argument `uplo` controls the corresponding argument of the
-[`Hermitian`](@ref) view. For real matrices, the latter is equivalent to a
-[`Symmetric`](@ref) view.
-
-See also [`hermitianpart!`](@ref) for the corresponding in-place operation.
-
-!!! compat "Julia 1.10"
-    This function requires Julia 1.10 or later.
-"""
-hermitianpart(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart(A), uplo)
-
-"""
-    hermitianpart!(A, uplo=:U) -> Hermitian
-
-Overwrite the square matrix `A` in-place with its Hermitian part `(A + A') / 2`, and return
-[`Hermitian(A, uplo)`](@ref). For real matrices `A`, this is also known as the symmetric
-part of `A`.
-
-See also [`hermitianpart`](@ref) for the corresponding out-of-place operation.
-
-!!! compat "Julia 1.10"
-    This function requires Julia 1.10 or later.
-"""
-hermitianpart!(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart!(A), uplo)
-
-_hermitianpart(A::AbstractMatrix) = _hermitianpart!(copy_similar(A, Base.promote_op(/, eltype(A), Int)))
-_hermitianpart(a::Number) = real(a)
-
-function _hermitianpart!(A::AbstractMatrix)
-    require_one_based_indexing(A)
-    n = checksquare(A)
-    @inbounds for j in 1:n
-        A[j, j] = _hermitianpart(A[j, j])
-        for i in 1:j-1
-            A[i, j] = val = (A[i, j] + adjoint(A[j, i])) / 2
-            A[j, i] = adjoint(val)
-        end
-    end
-    return A
-end
-
-## structured matrix printing ##
-function Base.replace_in_print_matrix(A::HermOrSym,i::Integer,j::Integer,s::AbstractString)
-    ijminmax = minmax(i, j)
-    inds = A.uplo == 'U' ? ijminmax : reverse(ijminmax)
-    Base.replace_in_print_matrix(parent(A), inds..., s)
-end
diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl
deleted file mode 100644
index 279577c31d664..0000000000000
--- a/stdlib/LinearAlgebra/src/symmetriceigen.jl
+++ /dev/null
@@ -1,220 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# preserve HermOrSym wrapper
-eigencopy_oftype(A::Hermitian, S) = Hermitian(copy_similar(A, S), sym_uplo(A.uplo))
-eigencopy_oftype(A::Symmetric, S) = Symmetric(copy_similar(A, S), sym_uplo(A.uplo))
-
-# Eigensolvers for symmetric and Hermitian matrices
-eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) =
-    Eigen(sorteig!(LAPACK.syevr!('V', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)..., sortby)...)
-
-function eigen(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
-    S = eigtype(eltype(A))
-    eigen!(eigencopy_oftype(A, S), sortby=sortby)
-end
-
-eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
-    Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...)
-
-"""
-    eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> Eigen
-
-Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
-which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
-
-The [`UnitRange`](@ref) `irange` specifies indices of the sorted eigenvalues to search for.
-
-!!! note
-    If `irange` is not `1:n`, where `n` is the dimension of `A`, then the returned factorization
-    will be a *truncated* factorization.
-"""
-function eigen(A::RealHermSymComplexHerm, irange::UnitRange)
-    S = eigtype(eltype(A))
-    eigen!(eigencopy_oftype(A, S), irange)
-end
-
-eigen!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, vl::Real, vh::Real) where {T<:BlasReal} =
-    Eigen(LAPACK.syevr!('V', 'V', A.uplo, A.data, convert(T, vl), convert(T, vh), 0, 0, -1.0)...)
-
-"""
-    eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> Eigen
-
-Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
-which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
-
-`vl` is the lower bound of the window of eigenvalues to search for, and `vu` is the upper bound.
-
-!!! note
-    If [`vl`, `vu`] does not contain all eigenvalues of `A`, then the returned factorization
-    will be a *truncated* factorization.
-"""
-function eigen(A::RealHermSymComplexHerm, vl::Real, vh::Real)
-    S = eigtype(eltype(A))
-    eigen!(eigencopy_oftype(A, S), vl, vh)
-end
-
-function eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing)
-    vals = LAPACK.syevr!('N', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)[1]
-    !isnothing(sortby) && sort!(vals, by=sortby)
-    return vals
-end
-
-function eigvals(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
-    S = eigtype(eltype(A))
-    eigvals!(eigencopy_oftype(A, S), sortby=sortby)
-end
-
-"""
-    eigvals!(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
-`irange` is a range of eigenvalue *indices* to search for - for instance, the 2nd to 8th eigenvalues.
-"""
-eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
-    LAPACK.syevr!('N', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)[1]
-
-"""
-    eigvals(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> values
-
-Return the eigenvalues of `A`. It is possible to calculate only a subset of the
-eigenvalues by specifying a [`UnitRange`](@ref) `irange` covering indices of the sorted eigenvalues,
-e.g. the 2nd to 8th eigenvalues.
-
-# Examples
-```jldoctest
-julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 1.0  2.0   ⋅
- 2.0  2.0  3.0
-  ⋅   3.0  1.0
-
-julia> eigvals(A, 2:2)
-1-element Vector{Float64}:
- 0.9999999999999996
-
-julia> eigvals(A)
-3-element Vector{Float64}:
- -2.1400549446402604
-  1.0000000000000002
-  5.140054944640259
-```
-"""
-function eigvals(A::RealHermSymComplexHerm, irange::UnitRange)
-    S = eigtype(eltype(A))
-    eigvals!(eigencopy_oftype(A, S), irange)
-end
-
-"""
-    eigvals!(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
-`vl` is the lower bound of the interval to search for eigenvalues, and `vu` is the upper bound.
-"""
-eigvals!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, vl::Real, vh::Real) where {T<:BlasReal} =
-    LAPACK.syevr!('N', 'V', A.uplo, A.data, convert(T, vl), convert(T, vh), 0, 0, -1.0)[1]
-
-"""
-    eigvals(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> values
-
-Return the eigenvalues of `A`. It is possible to calculate only a subset of the eigenvalues
-by specifying a pair `vl` and `vu` for the lower and upper boundaries of the eigenvalues.
-
-# Examples
-```jldoctest
-julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 1.0  2.0   ⋅
- 2.0  2.0  3.0
-  ⋅   3.0  1.0
-
-julia> eigvals(A, -1, 2)
-1-element Vector{Float64}:
- 1.0000000000000009
-
-julia> eigvals(A)
-3-element Vector{Float64}:
- -2.1400549446402604
-  1.0000000000000002
-  5.140054944640259
-```
-"""
-function eigvals(A::RealHermSymComplexHerm, vl::Real, vh::Real)
-    S = eigtype(eltype(A))
-    eigvals!(eigencopy_oftype(A, S), vl, vh)
-end
-
-eigmax(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, size(A, 1):size(A, 1))[1]
-eigmin(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, 1:1)[1]
-
-function eigen(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return eigen!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...)
-end
-
-function eigen!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
-    vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))
-    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
-end
-function eigen!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasComplex,S<:StridedMatrix}
-    vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))
-    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
-end
-
-function eigen(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing)
-    if ishermitian(A)
-        eigen!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby)
-    else
-        eigen!(copy_similar(A, eigtype(eltype(A))), C; sortby)
-    end
-end
-function eigen!(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing)
-    # Cholesky decomposition based eigenvalues and eigenvectors
-    vals, w = eigen!(UtiAUi!(A, C.U))
-    vecs = C.U \ w
-    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
-end
-
-# Perform U' \ A / U in-place, where U::Union{UpperTriangular,Diagonal}
-UtiAUi!(A, U) = _UtiAUi!(A, U)
-UtiAUi!(A::Symmetric, U) = Symmetric(_UtiAUi!(copytri!(parent(A), A.uplo), U), sym_uplo(A.uplo))
-UtiAUi!(A::Hermitian, U) = Hermitian(_UtiAUi!(copytri!(parent(A), A.uplo, true), U), sym_uplo(A.uplo))
-_UtiAUi!(A, U) = rdiv!(ldiv!(U', A), U)
-
-function eigvals(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return eigvals!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...)
-end
-
-function eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
-    vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
-    isnothing(sortby) || sort!(vals, by=sortby)
-    return vals
-end
-function eigvals!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasComplex,S<:StridedMatrix}
-    vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
-    isnothing(sortby) || sort!(vals, by=sortby)
-    return vals
-end
-eigvecs(A::HermOrSym) = eigvecs(eigen(A))
-
-function eigvals(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing)
-    if ishermitian(A)
-        eigvals!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby)
-    else
-        eigvals!(copy_similar(A, eigtype(eltype(A))), C; sortby)
-    end
-end
-function eigvals!(A::AbstractMatrix{T}, C::Cholesky{T, <:AbstractMatrix}; sortby::Union{Function,Nothing}=nothing) where {T<:Number}
-    # Cholesky decomposition based eigenvalues
-    return eigvals!(UtiAUi!(A, C.U); sortby)
-end
diff --git a/stdlib/LinearAlgebra/src/transpose.jl b/stdlib/LinearAlgebra/src/transpose.jl
deleted file mode 100644
index 9d70ac3add34b..0000000000000
--- a/stdlib/LinearAlgebra/src/transpose.jl
+++ /dev/null
@@ -1,212 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-adjoint(a::AbstractArray) = error("adjoint not defined for $(typeof(a)). Consider using `permutedims` for higher-dimensional arrays.")
-transpose(a::AbstractArray) = error("transpose not defined for $(typeof(a)). Consider using `permutedims` for higher-dimensional arrays.")
-
-## Matrix transposition ##
-
-"""
-    transpose!(dest,src)
-
-Transpose array `src` and store the result in the preallocated array `dest`, which should
-have a size corresponding to `(size(src,2),size(src,1))`. No in-place transposition is
-supported and unexpected results will happen if `src` and `dest` have overlapping memory
-regions.
-
-# Examples
-```jldoctest
-julia> A = [3+2im 9+2im; 8+7im  4+6im]
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 8+7im  4+6im
-
-julia> B = zeros(Complex{Int64}, 2, 2)
-2×2 Matrix{Complex{Int64}}:
- 0+0im  0+0im
- 0+0im  0+0im
-
-julia> transpose!(B, A);
-
-julia> B
-2×2 Matrix{Complex{Int64}}:
- 3+2im  8+7im
- 9+2im  4+6im
-
-julia> A
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 8+7im  4+6im
-```
-"""
-transpose!(B::AbstractMatrix, A::AbstractMatrix) = transpose_f!(transpose, B, A)
-
-"""
-    adjoint!(dest,src)
-
-Conjugate transpose array `src` and store the result in the preallocated array `dest`, which
-should have a size corresponding to `(size(src,2),size(src,1))`. No in-place transposition
-is supported and unexpected results will happen if `src` and `dest` have overlapping memory
-regions.
-
-# Examples
-```jldoctest
-julia> A = [3+2im 9+2im; 8+7im  4+6im]
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 8+7im  4+6im
-
-julia> B = zeros(Complex{Int64}, 2, 2)
-2×2 Matrix{Complex{Int64}}:
- 0+0im  0+0im
- 0+0im  0+0im
-
-julia> adjoint!(B, A);
-
-julia> B
-2×2 Matrix{Complex{Int64}}:
- 3-2im  8-7im
- 9-2im  4-6im
-
-julia> A
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 8+7im  4+6im
-```
-"""
-adjoint!(B::AbstractMatrix, A::AbstractMatrix) = transpose_f!(adjoint, B, A)
-function transpose!(B::AbstractVector, A::AbstractMatrix)
-    axes(B,1) == axes(A,2) && axes(A,1) == 1:1 || throw(DimensionMismatch("transpose"))
-    copyto!(B, A)
-end
-function transpose!(B::AbstractMatrix, A::AbstractVector)
-    axes(B,2) == axes(A,1) && axes(B,1) == 1:1 || throw(DimensionMismatch("transpose"))
-    copyto!(B, A)
-end
-function adjoint!(B::AbstractVector, A::AbstractMatrix)
-    axes(B,1) == axes(A,2) && axes(A,1) == 1:1 || throw(DimensionMismatch("transpose"))
-    ccopy!(B, A)
-end
-function adjoint!(B::AbstractMatrix, A::AbstractVector)
-    axes(B,2) == axes(A,1) && axes(B,1) == 1:1 || throw(DimensionMismatch("transpose"))
-    ccopy!(B, A)
-end
-
-const transposebaselength=64
-function transpose_f!(f, B::AbstractMatrix, A::AbstractMatrix)
-    inds = axes(A)
-    axes(B,1) == inds[2] && axes(B,2) == inds[1] || throw(DimensionMismatch(string(f)))
-
-    m, n = length(inds[1]), length(inds[2])
-    if m*n<=4*transposebaselength
-        @inbounds begin
-            for j = inds[2]
-                for i = inds[1]
-                    B[j,i] = f(A[i,j])
-                end
-            end
-        end
-    else
-        transposeblock!(f,B,A,m,n,first(inds[1])-1,first(inds[2])-1)
-    end
-    return B
-end
-function transposeblock!(f, B::AbstractMatrix, A::AbstractMatrix, m::Int, n::Int, offseti::Int, offsetj::Int)
-    if m*n<=transposebaselength
-        @inbounds begin
-            for j = offsetj .+ (1:n)
-                for i = offseti .+ (1:m)
-                    B[j,i] = f(A[i,j])
-                end
-            end
-        end
-    elseif m>n
-        newm=m>>1
-        transposeblock!(f,B,A,newm,n,offseti,offsetj)
-        transposeblock!(f,B,A,m-newm,n,offseti+newm,offsetj)
-    else
-        newn=n>>1
-        transposeblock!(f,B,A,m,newn,offseti,offsetj)
-        transposeblock!(f,B,A,m,n-newn,offseti,offsetj+newn)
-    end
-    return B
-end
-
-function ccopy!(B, A)
-    RB, RA = eachindex(B), eachindex(A)
-    if RB == RA
-        for i = RB
-            B[i] = adjoint(A[i])
-        end
-    else
-        for (i,j) = zip(RB, RA)
-            B[i] = adjoint(A[j])
-        end
-    end
-    return B
-end
-
-"""
-    copy(A::Transpose)
-    copy(A::Adjoint)
-
-Eagerly evaluate the lazy matrix transpose/adjoint.
-Note that the transposition is applied recursively to elements.
-
-This operation is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims), which is non-recursive.
-
-# Examples
-```jldoctest
-julia> A = [1 2im; -3im 4]
-2×2 Matrix{Complex{Int64}}:
- 1+0im  0+2im
- 0-3im  4+0im
-
-julia> T = transpose(A)
-2×2 transpose(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
- 1+0im  0-3im
- 0+2im  4+0im
-
-julia> copy(T)
-2×2 Matrix{Complex{Int64}}:
- 1+0im  0-3im
- 0+2im  4+0im
-```
-"""
-copy(::Union{Transpose,Adjoint})
-
-Base.copy(A::TransposeAbsMat) = transpose!(similar(A.parent, reverse(axes(A.parent))), A.parent)
-Base.copy(A::AdjointAbsMat) = adjoint!(similar(A.parent, reverse(axes(A.parent))), A.parent)
-
-function copy_transpose!(B::AbstractVecOrMat, ir_dest::AbstractRange{Int}, jr_dest::AbstractRange{Int},
-                         A::AbstractVecOrMat, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int})
-    if length(ir_dest) != length(jr_src)
-        throw(ArgumentError(LazyString("source and destination must have same size (got ",
-                                   length(jr_src)," and ",length(ir_dest),")")))
-    end
-    if length(jr_dest) != length(ir_src)
-        throw(ArgumentError(LazyString("source and destination must have same size (got ",
-                                   length(ir_src)," and ",length(jr_dest),")")))
-    end
-    @boundscheck checkbounds(B, ir_dest, jr_dest)
-    @boundscheck checkbounds(A, ir_src, jr_src)
-    idest = first(ir_dest)
-    for jsrc in jr_src
-        jdest = first(jr_dest)
-        for isrc in ir_src
-            B[idest,jdest] = A[isrc,jsrc]
-            jdest += step(jr_dest)
-        end
-        idest += step(ir_dest)
-    end
-    return B
-end
-
-function copy_similar(A::AdjointAbsMat, ::Type{T}) where {T}
-    C = similar(A, T, size(A))
-    adjoint!(C, parent(A))
-end
-function copy_similar(A::TransposeAbsMat, ::Type{T}) where {T}
-    C = similar(A, T, size(A))
-    transpose!(C, parent(A))
-end
diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl
deleted file mode 100644
index 807ba5619f7c8..0000000000000
--- a/stdlib/LinearAlgebra/src/triangular.jl
+++ /dev/null
@@ -1,2501 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Triangular
-
-# could be renamed to Triangular when that name has been fully deprecated
-abstract type AbstractTriangular{T} <: AbstractMatrix{T} end
-
-# First loop through all methods that don't need special care for upper/lower and unit diagonal
-for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
-    @eval begin
-        struct $t{T,S<:AbstractMatrix{T}} <: AbstractTriangular{T}
-            data::S
-
-            function $t{T,S}(data) where {T,S<:AbstractMatrix{T}}
-                require_one_based_indexing(data)
-                checksquare(data)
-                new{T,S}(data)
-            end
-        end
-        $t(A::$t) = A
-        $t{T}(A::$t{T}) where {T} = A
-        function $t(A::AbstractMatrix)
-            return $t{eltype(A), typeof(A)}(A)
-        end
-        function $t{T}(A::AbstractMatrix) where T
-            $t(convert(AbstractMatrix{T}, A))
-        end
-
-        function $t{T}(A::$t) where T
-            Anew = convert(AbstractMatrix{T}, A.data)
-            $t(Anew)
-        end
-        Matrix(A::$t{T}) where {T} = Matrix{T}(A)
-
-        AbstractMatrix{T}(A::$t) where {T} = $t{T}(A)
-
-        size(A::$t, d) = size(A.data, d)
-        size(A::$t) = size(A.data)
-
-        # For A<:AbstractTriangular, similar(A[, neweltype]) should yield a matrix with the same
-        # triangular type and underlying storage type as A. The following method covers these cases.
-        similar(A::$t, ::Type{T}) where {T} = $t(similar(parent(A), T))
-        # On the other hand, similar(A, [neweltype,] shape...) should yield a matrix of the underlying
-        # storage type of A (not wrapped in a triangular type). The following method covers these cases.
-        similar(A::$t, ::Type{T}, dims::Dims{N}) where {T,N} = similar(parent(A), T, dims)
-
-        copy(A::$t) = $t(copy(A.data))
-
-        real(A::$t{<:Real}) = A
-        real(A::$t{<:Complex}) = (B = real(A.data); $t(B))
-    end
-end
-
-similar(A::UpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    UpperTriangular(similar(parent(parent(A)), T))
-similar(A::UnitUpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    UnitUpperTriangular(similar(parent(parent(A)), T))
-similar(A::LowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    LowerTriangular(similar(parent(parent(A)), T))
-similar(A::UnitLowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    UnitLowerTriangular(similar(parent(parent(A)), T))
-
-
-"""
-    LowerTriangular(A::AbstractMatrix)
-
-Construct a `LowerTriangular` view of the matrix `A`.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
-3×3 Matrix{Float64}:
- 1.0  2.0  3.0
- 4.0  5.0  6.0
- 7.0  8.0  9.0
-
-julia> LowerTriangular(A)
-3×3 LowerTriangular{Float64, Matrix{Float64}}:
- 1.0   ⋅    ⋅
- 4.0  5.0   ⋅
- 7.0  8.0  9.0
-```
-"""
-LowerTriangular
-"""
-    UpperTriangular(A::AbstractMatrix)
-
-Construct an `UpperTriangular` view of the matrix `A`.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
-3×3 Matrix{Float64}:
- 1.0  2.0  3.0
- 4.0  5.0  6.0
- 7.0  8.0  9.0
-
-julia> UpperTriangular(A)
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 1.0  2.0  3.0
-  ⋅   5.0  6.0
-  ⋅    ⋅   9.0
-```
-"""
-UpperTriangular
-"""
-    UnitLowerTriangular(A::AbstractMatrix)
-
-Construct a `UnitLowerTriangular` view of the matrix `A`.
-Such a view has the [`oneunit`](@ref) of the [`eltype`](@ref)
-of `A` on its diagonal.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
-3×3 Matrix{Float64}:
- 1.0  2.0  3.0
- 4.0  5.0  6.0
- 7.0  8.0  9.0
-
-julia> UnitLowerTriangular(A)
-3×3 UnitLowerTriangular{Float64, Matrix{Float64}}:
- 1.0   ⋅    ⋅
- 4.0  1.0   ⋅
- 7.0  8.0  1.0
-```
-"""
-UnitLowerTriangular
-"""
-    UnitUpperTriangular(A::AbstractMatrix)
-
-Construct an `UnitUpperTriangular` view of the matrix `A`.
-Such a view has the [`oneunit`](@ref) of the [`eltype`](@ref)
-of `A` on its diagonal.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
-3×3 Matrix{Float64}:
- 1.0  2.0  3.0
- 4.0  5.0  6.0
- 7.0  8.0  9.0
-
-julia> UnitUpperTriangular(A)
-3×3 UnitUpperTriangular{Float64, Matrix{Float64}}:
- 1.0  2.0  3.0
-  ⋅   1.0  6.0
-  ⋅    ⋅   1.0
-```
-"""
-UnitUpperTriangular
-
-const UpperOrUnitUpperTriangular{T,S} = Union{UpperTriangular{T,S}, UnitUpperTriangular{T,S}}
-const LowerOrUnitLowerTriangular{T,S} = Union{LowerTriangular{T,S}, UnitLowerTriangular{T,S}}
-const UpperOrLowerTriangular{T,S} = Union{UpperOrUnitUpperTriangular{T,S}, LowerOrUnitLowerTriangular{T,S}}
-
-imag(A::UpperTriangular) = UpperTriangular(imag(A.data))
-imag(A::LowerTriangular) = LowerTriangular(imag(A.data))
-imag(A::UnitLowerTriangular) = LowerTriangular(tril!(imag(A.data),-1))
-imag(A::UnitUpperTriangular) = UpperTriangular(triu!(imag(A.data),1))
-
-Array(A::AbstractTriangular) = Matrix(A)
-parent(A::UpperOrLowerTriangular) = A.data
-
-# then handle all methods that requires specific handling of upper/lower and unit diagonal
-
-function Matrix{T}(A::LowerTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    tril!(B)
-    B
-end
-function Matrix{T}(A::UnitLowerTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    tril!(B)
-    for i = 1:size(B,1)
-        B[i,i] = oneunit(T)
-    end
-    B
-end
-function Matrix{T}(A::UpperTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    triu!(B)
-    B
-end
-function Matrix{T}(A::UnitUpperTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    triu!(B)
-    for i = 1:size(B,1)
-        B[i,i] = oneunit(T)
-    end
-    B
-end
-
-function full!(A::LowerTriangular)
-    B = A.data
-    tril!(B)
-    B
-end
-function full!(A::UnitLowerTriangular)
-    B = A.data
-    tril!(B)
-    for i = 1:size(A,1)
-        B[i,i] = oneunit(eltype(B))
-    end
-    B
-end
-function full!(A::UpperTriangular)
-    B = A.data
-    triu!(B)
-    B
-end
-function full!(A::UnitUpperTriangular)
-    B = A.data
-    triu!(B)
-    for i = 1:size(A,1)
-        B[i,i] = oneunit(eltype(B))
-    end
-    B
-end
-
-Base.isassigned(A::UnitLowerTriangular, i::Int, j::Int) =
-    i > j ? isassigned(A.data, i, j) : true
-Base.isassigned(A::LowerTriangular, i::Int, j::Int) =
-    i >= j ? isassigned(A.data, i, j) : true
-Base.isassigned(A::UnitUpperTriangular, i::Int, j::Int) =
-    i < j ? isassigned(A.data, i, j) : true
-Base.isassigned(A::UpperTriangular, i::Int, j::Int) =
-    i <= j ? isassigned(A.data, i, j) : true
-
-Base.isstored(A::UnitLowerTriangular, i::Int, j::Int) =
-    i > j ? Base.isstored(A.data, i, j) : false
-Base.isstored(A::LowerTriangular, i::Int, j::Int) =
-    i >= j ? Base.isstored(A.data, i, j) : false
-Base.isstored(A::UnitUpperTriangular, i::Int, j::Int) =
-    i < j ? Base.isstored(A.data, i, j) : false
-Base.isstored(A::UpperTriangular, i::Int, j::Int) =
-    i <= j ? Base.isstored(A.data, i, j) : false
-
-getindex(A::UnitLowerTriangular{T}, i::Integer, j::Integer) where {T} =
-    i > j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
-getindex(A::LowerTriangular, i::Integer, j::Integer) =
-    i >= j ? A.data[i,j] : zero(A.data[j,i])
-getindex(A::UnitUpperTriangular{T}, i::Integer, j::Integer) where {T} =
-    i < j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
-getindex(A::UpperTriangular, i::Integer, j::Integer) =
-    i <= j ? A.data[i,j] : zero(A.data[j,i])
-
-function setindex!(A::UpperTriangular, x, i::Integer, j::Integer)
-    if i > j
-        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
-            "($i, $j) of an UpperTriangular matrix to a nonzero value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-function setindex!(A::UnitUpperTriangular, x, i::Integer, j::Integer)
-    if i > j
-        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
-            "($i, $j) of a UnitUpperTriangular matrix to a nonzero value ($x)"))
-    elseif i == j
-        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
-            "of a UnitUpperTriangular matrix to a non-unit value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-function setindex!(A::LowerTriangular, x, i::Integer, j::Integer)
-    if i < j
-        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
-            "($i, $j) of a LowerTriangular matrix to a nonzero value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-function setindex!(A::UnitLowerTriangular, x, i::Integer, j::Integer)
-    if i < j
-        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
-            "($i, $j) of a UnitLowerTriangular matrix to a nonzero value ($x)"))
-    elseif i == j
-        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
-            "of a UnitLowerTriangular matrix to a non-unit value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::Union{UpperTriangular,UnitUpperTriangular},
-                                      i::Integer, j::Integer, s::AbstractString)
-    return i <= j ? s : Base.replace_with_centered_mark(s)
-end
-function Base.replace_in_print_matrix(A::Union{LowerTriangular,UnitLowerTriangular},
-                                      i::Integer, j::Integer, s::AbstractString)
-    return i >= j ? s : Base.replace_with_centered_mark(s)
-end
-
-function istril(A::Union{LowerTriangular,UnitLowerTriangular}, k::Integer=0)
-    k >= 0 && return true
-    return _istril(A, k)
-end
-function istriu(A::Union{UpperTriangular,UnitUpperTriangular}, k::Integer=0)
-    k <= 0 && return true
-    return _istriu(A, k)
-end
-istril(A::Adjoint, k::Integer=0) = istriu(A.parent, -k)
-istril(A::Transpose, k::Integer=0) = istriu(A.parent, -k)
-istriu(A::Adjoint, k::Integer=0) = istril(A.parent, -k)
-istriu(A::Transpose, k::Integer=0) = istril(A.parent, -k)
-
-function tril!(A::UpperTriangular{T}, k::Integer=0) where {T}
-    n = size(A,1)
-    if k < 0
-        fill!(A.data, zero(T))
-        return A
-    elseif k == 0
-        for j in 1:n, i in 1:j-1
-            A.data[i,j] = zero(T)
-        end
-        return A
-    else
-        return UpperTriangular(tril!(A.data,k))
-    end
-end
-triu!(A::UpperTriangular, k::Integer=0) = UpperTriangular(triu!(A.data, k))
-
-function tril!(A::UnitUpperTriangular{T}, k::Integer=0) where {T}
-    n = size(A,1)
-    if k < 0
-        fill!(A.data, zero(T))
-        return UpperTriangular(A.data)
-    elseif k == 0
-        fill!(A.data, zero(T))
-        for i in diagind(A)
-            A.data[i] = oneunit(T)
-        end
-        return UpperTriangular(A.data)
-    else
-        for i in diagind(A)
-            A.data[i] = oneunit(T)
-        end
-        return UpperTriangular(tril!(A.data,k))
-    end
-end
-
-function triu!(A::UnitUpperTriangular, k::Integer=0)
-    for i in diagind(A)
-        A.data[i] = oneunit(eltype(A))
-    end
-    return triu!(UpperTriangular(A.data), k)
-end
-
-function triu!(A::LowerTriangular{T}, k::Integer=0) where {T}
-    n = size(A,1)
-    if k > 0
-        fill!(A.data, zero(T))
-        return A
-    elseif k == 0
-        for j in 1:n, i in j+1:n
-            A.data[i,j] = zero(T)
-        end
-        return A
-    else
-        return LowerTriangular(triu!(A.data, k))
-    end
-end
-
-tril!(A::LowerTriangular, k::Integer=0) = LowerTriangular(tril!(A.data, k))
-
-function triu!(A::UnitLowerTriangular{T}, k::Integer=0) where T
-    n = size(A,1)
-    if k > 0
-        fill!(A.data, zero(T))
-        return LowerTriangular(A.data)
-    elseif k == 0
-        fill!(A.data, zero(T))
-        for i in diagind(A)
-            A.data[i] = oneunit(T)
-        end
-        return LowerTriangular(A.data)
-    else
-        for i in diagind(A)
-            A.data[i] = oneunit(T)
-        end
-        return LowerTriangular(triu!(A.data, k))
-    end
-end
-
-function tril!(A::UnitLowerTriangular, k::Integer=0)
-    for i in diagind(A)
-        A.data[i] = oneunit(eltype(A))
-    end
-    return tril!(LowerTriangular(A.data), k)
-end
-
-adjoint(A::LowerTriangular) = UpperTriangular(adjoint(A.data))
-adjoint(A::UpperTriangular) = LowerTriangular(adjoint(A.data))
-adjoint(A::UnitLowerTriangular) = UnitUpperTriangular(adjoint(A.data))
-adjoint(A::UnitUpperTriangular) = UnitLowerTriangular(adjoint(A.data))
-transpose(A::LowerTriangular) = UpperTriangular(transpose(A.data))
-transpose(A::UpperTriangular) = LowerTriangular(transpose(A.data))
-transpose(A::UnitLowerTriangular) = UnitUpperTriangular(transpose(A.data))
-transpose(A::UnitUpperTriangular) = UnitLowerTriangular(transpose(A.data))
-
-transpose!(A::LowerTriangular) = UpperTriangular(copytri!(A.data, 'L', false, true))
-transpose!(A::UnitLowerTriangular) = UnitUpperTriangular(copytri!(A.data, 'L', false, true))
-transpose!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U', false, true))
-transpose!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U', false, true))
-adjoint!(A::LowerTriangular) = UpperTriangular(copytri!(A.data, 'L' , true, true))
-adjoint!(A::UnitLowerTriangular) = UnitUpperTriangular(copytri!(A.data, 'L' , true, true))
-adjoint!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U' , true, true))
-adjoint!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U' , true, true))
-
-diag(A::LowerTriangular) = diag(A.data)
-diag(A::UnitLowerTriangular) = fill(oneunit(eltype(A)), size(A,1))
-diag(A::UpperTriangular) = diag(A.data)
-diag(A::UnitUpperTriangular) = fill(oneunit(eltype(A)), size(A,1))
-
-# Unary operations
--(A::LowerTriangular) = LowerTriangular(-A.data)
--(A::UpperTriangular) = UpperTriangular(-A.data)
-function -(A::UnitLowerTriangular)
-    Anew = -A.data
-    for i = 1:size(A, 1)
-        Anew[i, i] = -A[i, i]
-    end
-    LowerTriangular(Anew)
-end
-function -(A::UnitUpperTriangular)
-    Anew = -A.data
-    for i = 1:size(A, 1)
-        Anew[i, i] = -A[i, i]
-    end
-    UpperTriangular(Anew)
-end
-
-tr(A::LowerTriangular) = tr(A.data)
-tr(A::UnitLowerTriangular) = size(A, 1) * oneunit(eltype(A))
-tr(A::UpperTriangular) = tr(A.data)
-tr(A::UnitUpperTriangular) = size(A, 1) * oneunit(eltype(A))
-
-# copy and scale
-function copyto!(A::T, B::T) where {T<:Union{UpperTriangular,UnitUpperTriangular}}
-    n = size(B,1)
-    for j = 1:n
-        for i = 1:(isa(B, UnitUpperTriangular) ? j-1 : j)
-            @inbounds A[i,j] = B[i,j]
-        end
-    end
-    return A
-end
-function copyto!(A::T, B::T) where {T<:Union{LowerTriangular,UnitLowerTriangular}}
-    n = size(B,1)
-    for j = 1:n
-        for i = (isa(B, UnitLowerTriangular) ? j+1 : j):n
-            @inbounds A[i,j] = B[i,j]
-        end
-    end
-    return A
-end
-
-# Define `mul!` for (Unit){Upper,Lower}Triangular matrices times a number.
-# be permissive here and require compatibility later in _triscale!
-@inline mul!(A::UpperOrLowerTriangular, B::UpperOrLowerTriangular, C::Number, alpha::Number, beta::Number) =
-    _triscale!(A, B, C, MulAddMul(alpha, beta))
-@inline mul!(A::UpperOrLowerTriangular, B::Number, C::UpperOrLowerTriangular, alpha::Number, beta::Number) =
-    _triscale!(A, B, C, MulAddMul(alpha, beta))
-
-function _triscale!(A::UpperTriangular, B::UpperTriangular, c::Number, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        for i = 1:j
-            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::UpperTriangular, c::Number, B::UpperTriangular, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        for i = 1:j
-            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::UpperOrUnitUpperTriangular, B::UnitUpperTriangular, c::Number, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        @inbounds _modify!(_add, c, A, (j,j))
-        for i = 1:(j - 1)
-            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::UpperOrUnitUpperTriangular, c::Number, B::UnitUpperTriangular, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        @inbounds _modify!(_add, c, A, (j,j))
-        for i = 1:(j - 1)
-            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::LowerTriangular, B::LowerTriangular, c::Number, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        for i = j:n
-            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::LowerTriangular, c::Number, B::LowerTriangular, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        for i = j:n
-            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::LowerOrUnitLowerTriangular, B::UnitLowerTriangular, c::Number, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        @inbounds _modify!(_add, c, A, (j,j))
-        for i = (j + 1):n
-            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::LowerOrUnitLowerTriangular, c::Number, B::UnitLowerTriangular, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        @inbounds _modify!(_add, c, A, (j,j))
-        for i = (j + 1):n
-            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
-        end
-    end
-    return A
-end
-
-rmul!(A::UpperOrLowerTriangular, c::Number) = @inline _triscale!(A, A, c, MulAddMul())
-lmul!(c::Number, A::UpperOrLowerTriangular) = @inline _triscale!(A, c, A, MulAddMul())
-
-function dot(x::AbstractVector, A::UpperTriangular, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(A, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    x₁ = x[1]
-    r = dot(x₁, A[1,1], y[1])
-    @inbounds for j in 2:m
-        yj = y[j]
-        if !iszero(yj)
-            temp = adjoint(A[1,j]) * x₁
-            @simd for i in 2:j
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-        end
-    end
-    return r
-end
-function dot(x::AbstractVector, A::UnitUpperTriangular, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(A, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    x₁ = first(x)
-    r = dot(x₁, y[1])
-    @inbounds for j in 2:m
-        yj = y[j]
-        if !iszero(yj)
-            temp = adjoint(A[1,j]) * x₁
-            @simd for i in 2:j-1
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-            r += dot(x[j], yj)
-        end
-    end
-    return r
-end
-function dot(x::AbstractVector, A::LowerTriangular, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(A, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    r = zero(typeof(dot(first(x), first(A), first(y))))
-    @inbounds for j in 1:m
-        yj = y[j]
-        if !iszero(yj)
-            temp = adjoint(A[j,j]) * x[j]
-            @simd for i in j+1:m
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-        end
-    end
-    return r
-end
-function dot(x::AbstractVector, A::UnitLowerTriangular, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(A, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    r = zero(typeof(dot(first(x), first(y))))
-    @inbounds for j in 1:m
-        yj = y[j]
-        if !iszero(yj)
-            temp = x[j]
-            @simd for i in j+1:m
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-        end
-    end
-    return r
-end
-
-fillstored!(A::LowerTriangular, x)     = (fillband!(A.data, x, 1-size(A,1), 0); A)
-fillstored!(A::UnitLowerTriangular, x) = (fillband!(A.data, x, 1-size(A,1), -1); A)
-fillstored!(A::UpperTriangular, x)     = (fillband!(A.data, x, 0, size(A,2)-1); A)
-fillstored!(A::UnitUpperTriangular, x) = (fillband!(A.data, x, 1, size(A,2)-1); A)
-
-# Binary operations
-+(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data + B.data)
-+(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data + B.data)
-+(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data + triu(B.data, 1) + I)
-+(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data + tril(B.data, -1) + I)
-+(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) + B.data + I)
-+(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) + B.data + I)
-+(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) + triu(B.data, 1) + 2I)
-+(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) + tril(B.data, -1) + 2I)
-+(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) + copyto!(similar(parent(B)), B)
-
--(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data - B.data)
--(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data - B.data)
--(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data - triu(B.data, 1) - I)
--(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data - tril(B.data, -1) - I)
--(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) - B.data + I)
--(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) - B.data + I)
--(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) - triu(B.data, 1))
--(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) - tril(B.data, -1))
--(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) - copyto!(similar(parent(B)), B)
-
-######################
-# BlasFloat routines #
-######################
-
-lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B)
-mul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) = _multrimat!(C, A, B)
-mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) = _multrimat!(C, A, B)
-mul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = _mulmattri!(C, A, B)
-mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) = _multrimat!(C, A, B)
-
-for TC in (:AbstractVector, :AbstractMatrix)
-    @eval @inline function mul!(C::$TC, A::AbstractTriangular, B::AbstractVector, alpha::Number, beta::Number)
-        if isone(alpha) && iszero(beta)
-            return mul!(C, A, B)
-        else
-            return generic_matvecmul!(C, 'N', A, B, MulAddMul(alpha, beta))
-        end
-    end
-end
-for (TA, TB) in ((:AbstractTriangular, :AbstractMatrix),
-                    (:AbstractMatrix, :AbstractTriangular),
-                    (:AbstractTriangular, :AbstractTriangular)
-                )
-    @eval @inline function mul!(C::AbstractMatrix, A::$TA, B::$TB, alpha::Number, beta::Number)
-        if isone(alpha) && iszero(beta)
-            return mul!(C, A, B)
-        else
-            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-        end
-    end
-end
-
-
-# generic fallback for AbstractTriangular matrices outside of the four subtypes provided here
-_multrimat!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) =
-    lmul!(A, inplace_adj_or_trans(B)(C, _parent(B)))
-_mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = rmul!(copyto!(C, A), B)
-
-# preserve triangular structure in in-place multiplication
-for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
-                        (:UpperTriangular, :UpperTriangular, :UnitUpperTriangular),
-                        (:UpperTriangular, :UnitUpperTriangular, :UpperTriangular),
-                        (:UnitUpperTriangular, :UnitUpperTriangular, :UnitUpperTriangular),
-                        (:LowerTriangular, :LowerTriangular, :LowerTriangular),
-                        (:LowerTriangular, :LowerTriangular, :UnitLowerTriangular),
-                        (:LowerTriangular, :UnitLowerTriangular, :LowerTriangular),
-                        (:UnitLowerTriangular, :UnitLowerTriangular, :UnitLowerTriangular))
-    @eval function _multrimat!(C::$cty, A::$aty, B::$bty)
-        _multrimat!(parent(C), A, B)
-        return C
-    end
-end
-
-# direct multiplication/division
-for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
-                            (:UnitLowerTriangular, 'L', 'U'),
-                            (:UpperTriangular, 'U', 'N'),
-                            (:UnitUpperTriangular, 'U', 'U'))
-    @eval begin
-        # Vector multiplication
-        lmul!(A::$t{T,<:StridedMatrix}, b::StridedVector{T}) where {T<:BlasFloat} =
-            BLAS.trmv!($uploc, 'N', $isunitc, A.data, b)
-
-        # Matrix multiplication
-        lmul!(A::$t{T,<:StridedMatrix}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-            BLAS.trmm!('L', $uploc, 'N', $isunitc, one(T), A.data, B)
-        rmul!(A::StridedMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-            BLAS.trmm!('R', $uploc, 'N', $isunitc, one(T), B.data, A)
-
-        # Left division
-        ldiv!(A::$t{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-            LAPACK.trtrs!($uploc, 'N', $isunitc, A.data, B)
-
-        # Right division
-        rdiv!(A::StridedMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-            BLAS.trsm!('R', $uploc, 'N', $isunitc, one(T), B.data, A)
-
-        # Matrix inverse
-        inv!(A::$t{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
-            $t{T,S}(LAPACK.trtri!($uploc, $isunitc, A.data))
-
-        # Error bounds for triangular solve
-        errorbounds(A::$t{T,<:StridedMatrix}, X::StridedVecOrMat{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-            LAPACK.trrfs!($uploc, 'N', $isunitc, A.data, B, X)
-
-        # Condition numbers
-        function cond(A::$t{<:BlasFloat,<:StridedMatrix}, p::Real=2)
-            checksquare(A)
-            if p == 1
-                return inv(LAPACK.trcon!('O', $uploc, $isunitc, A.data))
-            elseif p == Inf
-                return inv(LAPACK.trcon!('I', $uploc, $isunitc, A.data))
-            else # use fallback
-                return cond(copyto!(similar(parent(A)), A), p)
-            end
-        end
-    end
-end
-
-# adjoint/transpose multiplication ('uploc' reversed)
-for (t, uploc, isunitc) in ((:LowerTriangular, 'U', 'N'),
-                            (:UnitLowerTriangular, 'U', 'U'),
-                            (:UpperTriangular, 'L', 'N'),
-                            (:UnitUpperTriangular, 'L', 'U'))
-    @eval begin
-        # Vector multiplication
-        lmul!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasFloat} =
-            BLAS.trmv!($uploc, 'T', $isunitc, parent(parent(A)), b)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasReal} =
-            BLAS.trmv!($uploc, 'T', $isunitc, parent(parent(A)), b)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasComplex} =
-            BLAS.trmv!($uploc, 'C', $isunitc, parent(parent(A)), b)
-
-        # Matrix multiplication
-        lmul!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-            BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), parent(parent(A)), B)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasComplex} =
-            BLAS.trmm!('L', $uploc, 'C', $isunitc, one(T), parent(parent(A)), B)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasReal} =
-            BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), parent(parent(A)), B)
-
-        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-            BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasComplex} =
-            BLAS.trmm!('R', $uploc, 'C', $isunitc, one(T), parent(parent(B)), A)
-        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasReal} =
-            BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-
-        # Left division
-        ldiv!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-            LAPACK.trtrs!($uploc, 'T', $isunitc, parent(parent(A)), B)
-        ldiv!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-            LAPACK.trtrs!($uploc, 'T', $isunitc, parent(parent(A)), B)
-        ldiv!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-            LAPACK.trtrs!($uploc, 'C', $isunitc, parent(parent(A)), B)
-
-        # Right division
-        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-            BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasReal} =
-            BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasComplex} =
-            BLAS.trsm!('R', $uploc, 'C', $isunitc, one(T), parent(parent(B)), A)
-    end
-end
-
-# redirect back to BLAS
-for t in (:UpperTriangular, :UnitUpperTriangular, :LowerTriangular, :UnitLowerTriangular)
-    @eval _multrimat!(C::StridedVecOrMat{T}, A::$t{T,<:StridedMatrix}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        lmul!(A, copyto!(C, B))
-    @eval _multrimat!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        lmul!(A, copyto!(C, B))
-    @eval _multrimat!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        lmul!(A, copyto!(C, B))
-    @eval _mulmattri!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-        rmul!(copyto!(C, A), B)
-    @eval _mulmattri!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-        rmul!(copyto!(C, A), B)
-    @eval _mulmattri!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-        rmul!(copyto!(C, A), B)
-
-    @eval ldiv!(C::StridedVecOrMat{T}, A::$t{T,<:StridedMatrix}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        ldiv!(A, copyto!(C, B))
-    @eval ldiv!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        ldiv!(A, copyto!(C, B))
-    @eval ldiv!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        ldiv!(A, copyto!(C, B))
-    @eval _rdiv!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-        rdiv!(copyto!(C, A), B)
-    @eval _rdiv!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-        rdiv!(copyto!(C, A), B)
-    @eval _rdiv!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-        rdiv!(copyto!(C, A), B)
-end
-
-for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
-    @eval function inv(A::$t{T}) where {T}
-        S = typeof(inv(oneunit(T)))
-        if S <: BlasFloat || S === T # i.e. A is unitless
-            $t(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A))))
-        else
-            J = (one(T)*I)(size(A, 1))
-            $t(ldiv!(similar(A, S, size(A)), A, J))
-        end
-    end
-end
-
-errorbounds(A::AbstractTriangular{T}, X::AbstractVecOrMat{T}, B::AbstractVecOrMat{T}) where {T<:Union{BigFloat,Complex{BigFloat}}} =
-    error("not implemented yet! Please submit a pull request.")
-function errorbounds(A::AbstractTriangular{TA}, X::AbstractVecOrMat{TX}, B::AbstractVecOrMat{TB}) where {TA<:Number,TX<:Number,TB<:Number}
-    TAXB = promote_type(TA, TB, TX, Float32)
-    errorbounds(convert(AbstractMatrix{TAXB}, A), convert(AbstractArray{TAXB}, X), convert(AbstractArray{TAXB}, B))
-end
-
-# Eigensystems
-## Notice that trecv works for quasi-triangular matrices and therefore the lower sub diagonal must be zeroed before calling the subroutine
-function eigvecs(A::UpperTriangular{<:BlasFloat,<:StridedMatrix})
-    LAPACK.trevc!('R', 'A', BlasInt[], triu!(A.data))
-end
-function eigvecs(A::UnitUpperTriangular{<:BlasFloat,<:StridedMatrix})
-    for i = 1:size(A, 1)
-        A.data[i,i] = 1
-    end
-    LAPACK.trevc!('R', 'A', BlasInt[], triu!(A.data))
-end
-function eigvecs(A::LowerTriangular{<:BlasFloat,<:StridedMatrix})
-    LAPACK.trevc!('L', 'A', BlasInt[], copy(tril!(A.data)'))
-end
-function eigvecs(A::UnitLowerTriangular{<:BlasFloat,<:StridedMatrix})
-    for i = 1:size(A, 1)
-        A.data[i,i] = 1
-    end
-    LAPACK.trevc!('L', 'A', BlasInt[], copy(tril!(A.data)'))
-end
-
-####################
-# Generic routines #
-####################
-
-for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
-                   (LowerTriangular, UnitLowerTriangular))
-    @eval begin
-        (*)(A::$t, x::Number) = $t(A.data*x)
-
-        function (*)(A::$unitt, x::Number)
-            B = A.data*x
-            for i = 1:size(A, 1)
-                B[i,i] = x
-            end
-            $t(B)
-        end
-
-        (*)(x::Number, A::$t) = $t(x*A.data)
-
-        function (*)(x::Number, A::$unitt)
-            B = x*A.data
-            for i = 1:size(A, 1)
-                B[i,i] = x
-            end
-            $t(B)
-        end
-
-        (/)(A::$t, x::Number) = $t(A.data/x)
-
-        function (/)(A::$unitt, x::Number)
-            B = A.data/x
-            invx = inv(x)
-            for i = 1:size(A, 1)
-                B[i,i] = invx
-            end
-            $t(B)
-        end
-
-        (\)(x::Number, A::$t) = $t(x\A.data)
-
-        function (\)(x::Number, A::$unitt)
-            B = x\A.data
-            invx = inv(x)
-            for i = 1:size(A, 1)
-                B[i,i] = invx
-            end
-            $t(B)
-        end
-
-        lmul!(A::$t, B::AbstractVecOrMat)     = @inline _multrimat!(B, A, B)
-        lmul!(A::$unitt, B::AbstractVecOrMat) = @inline _multrimat!(B, A, B)
-
-        rmul!(A::AbstractMatrix, B::$t)     = @inline _mulmattri!(A, A, B)
-        rmul!(A::AbstractMatrix, B::$unitt) = @inline _mulmattri!(A, A, B)
-    end
-end
-
-## Generic triangular multiplication
-function _multrimat!(C::AbstractVecOrMat, A::UpperTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(C, A, B)
-    m, n = size(B, 1), size(B, 2)
-    N = size(A, 1)
-    if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
-    end
-    @inbounds for j in 1:n
-        for i in 1:m
-            Cij = A.data[i,i] * B[i,j]
-            for k in i + 1:m
-                Cij += A.data[i,k] * B[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _multrimat!(C::AbstractVecOrMat, A::UnitUpperTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(C, A, B)
-    m, n = size(B, 1), size(B, 2)
-    N = size(A, 1)
-    if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
-    end
-    @inbounds for j in 1:n
-        for i in 1:m
-            Cij = oneunit(eltype(A)) * B[i,j]
-            for k in i + 1:m
-                Cij += A.data[i,k] * B[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _multrimat!(C::AbstractVecOrMat, A::LowerTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(C, A, B)
-    m, n = size(B, 1), size(B, 2)
-    N = size(A, 1)
-    if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
-    end
-    @inbounds for j in 1:n
-        for i in m:-1:1
-            Cij = A.data[i,i] * B[i,j]
-            for k in 1:i - 1
-                Cij += A.data[i,k] * B[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _multrimat!(C::AbstractVecOrMat, A::UnitLowerTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(C, A, B)
-    m, n = size(B, 1), size(B, 2)
-    N = size(A, 1)
-    if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
-    end
-    @inbounds for j in 1:n
-        for i in m:-1:1
-            Cij = oneunit(eltype(A)) * B[i,j]
-            for k in 1:i - 1
-                Cij += A.data[i,k] * B[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-
-function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::UpperTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A, 1), size(A, 2)
-    N = size(B, 1)
-    if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
-    end
-    @inbounds for i in 1:m
-        for j in n:-1:1
-            Cij = A[i,j] * B.data[j,j]
-            for k in 1:j - 1
-                Cij += A[i,k] * B.data[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::UnitUpperTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A, 1), size(A, 2)
-    N = size(B, 1)
-    if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
-    end
-    @inbounds for i in 1:m
-        for j in n:-1:1
-            Cij = A[i,j] * oneunit(eltype(B))
-            for k in 1:j - 1
-                Cij += A[i,k] * B.data[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::LowerTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A, 1), size(A, 2)
-    N = size(B, 1)
-    if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
-    end
-    @inbounds for i in 1:m
-        for j in 1:n
-            Cij = A[i,j] * B.data[j,j]
-            for k in j + 1:n
-                Cij += A[i,k] * B.data[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::UnitLowerTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A, 1), size(A, 2)
-    N = size(B, 1)
-    if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
-    end
-    @inbounds for i in 1:m
-        for j in 1:n
-            Cij = A[i,j] * oneunit(eltype(B))
-            for k in j + 1:n
-                Cij += A[i,k] * B.data[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-
-#Generic solver using naive substitution
-# manually hoisting b[j] significantly improves performance as of Dec 2015
-# manually eliding bounds checking significantly improves performance as of Dec 2015
-# directly indexing A.data rather than A significantly improves performance as of Dec 2015
-# replacing repeated references to A.data with [Adata = A.data and references to Adata]
-# does not significantly impact performance as of Dec 2015
-# replacing repeated references to A.data[j,j] with [Ajj = A.data[j,j] and references to Ajj]
-# does not significantly impact performance as of Dec 2015
-ldiv!(A::AbstractTriangular, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-function ldiv!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix)
-    require_one_based_indexing(C, A, B)
-    nA, mA = size(A)
-    n = size(B, 1)
-    if nA != n
-        throw(DimensionMismatch("second dimension of left hand side A, $mA, and first dimension of right hand side B, $n, must be equal"))
-    end
-    if size(C) != size(B)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
-    end
-    @inbounds for (c, b) in zip(eachcol(C), eachcol(B))
-        ldiv!(c, A, b)
-    end
-    C
-end
-@inline function ldiv!(c::AbstractVector, A::AbstractTriangular, b::AbstractVector)
-    @boundscheck begin
-        require_one_based_indexing(c, A, b)
-        n = size(A, 2)
-        if !(n == length(b))
-            throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-        end
-        if !(n == length(c))
-            throw(DimensionMismatch("length of output c, $(length(c)), does not match length of right hand side b, $(length(b))"))
-        end
-    end
-    return _ldiv!(c, A, b)
-end
-
-_uconvert_copyto!(c, b, oA) = (c .= Ref(oA) .\ b)
-_uconvert_copyto!(c::AbstractArray{T}, b::AbstractArray{T}, _) where {T} = copyto!(c, b)
-
-@inline _ustrip(a) = oneunit(a) \ a
-@inline _ustrip(a::Union{AbstractFloat,Integer,Complex,Rational}) = a
-
-# all of the following _ldiv! methods are "unsafe" in that they assume one-based indexing
-# and compatible sizes
-function _ldiv!(c::AbstractVector, A::UpperTriangular, b::AbstractVector)
-    n = size(A, 2)
-    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
-    @inbounds for j in n:-1:1
-        ajj = A.data[j,j]
-        iszero(ajj) && throw(SingularException(j))
-        cj = c[j] = _ustrip(ajj) \ c[j]
-        for i in j-1:-1:1
-            c[i] -= _ustrip(A.data[i,j]) * cj
-        end
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, A::UnitUpperTriangular, b::AbstractVector)
-    n = size(A, 2)
-    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
-    @inbounds for j in n:-1:1
-        cj = c[j]
-        for i in 1:j-1
-            c[i] -= _ustrip(A.data[i,j]) * cj
-        end
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, A::LowerTriangular, b::AbstractVector)
-    n = size(A, 2)
-    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
-    @inbounds for j in 1:n
-        ajj = A.data[j,j]
-        iszero(ajj) && throw(SingularException(j))
-        cj = c[j] = _ustrip(ajj) \ c[j]
-        for i in j+1:n
-            c[i] -= _ustrip(A.data[i,j]) * cj
-        end
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, A::UnitLowerTriangular, b::AbstractVector)
-    n = size(A, 2)
-    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
-    @inbounds for j in 1:n
-        cj = c[j]
-        for i in j+1:n
-            c[i] -= _ustrip(A.data[i,j]) * cj
-        end
-    end
-    return c
-end
-
-
-# in the following transpose and conjugate transpose naive substitution variants,
-# accumulating in z rather than b[j,k] significantly improves performance as of Dec 2015
-function _ldiv!(c::AbstractVector, xA::UpperTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
-    tfun = adj_or_trans(parent(xA))
-    A = parent(parent(xA))
-    n = size(A, 2)
-    @inbounds for j in n:-1:1
-        ajj = A[j,j]
-        iszero(ajj) && throw(SingularException(j))
-        bj = b[j]
-        for i in j+1:n
-            bj -= tfun(A[i,j]) * c[i]
-        end
-        c[j] = tfun(ajj) \ bj
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, xA::UnitUpperTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
-    tfun = adj_or_trans(parent(xA))
-    A = parent(parent(xA))
-    oA = oneunit(eltype(A))
-    n = size(A, 2)
-    @inbounds for j in n:-1:1
-        bj = b[j]
-        for i in j+1:n
-            bj -= tfun(A[i,j]) * c[i]
-        end
-        c[j] = oA \ bj
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, xA::LowerTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
-    tfun = adj_or_trans(parent(xA))
-    A = parent(parent(xA))
-    n = size(A, 2)
-    @inbounds for j in 1:n
-        ajj = A[j,j]
-        iszero(ajj) && throw(SingularException(j))
-        bj = b[j]
-        for i in 1:j-1
-            bj -= tfun(A[i,j]) * c[i]
-        end
-        c[j] = tfun(ajj) \ bj
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, xA::UnitLowerTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
-    tfun = adj_or_trans(parent(xA))
-    A = parent(parent(xA))
-    oA = oneunit(eltype(A))
-    n = size(A, 2)
-    @inbounds for j in 1:n
-        bj = b[j]
-        for i in 1:j-1
-            bj -= tfun(A[i,j]) * c[i]
-        end
-        c[j] = oA \ bj
-    end
-    return c
-end
-
-rdiv!(A::AbstractMatrix, B::AbstractTriangular) = @inline _rdiv!(A, A, B)
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UpperTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
-    end
-    @inbounds for i in 1:m
-        for j in 1:n
-            Aij = A[i,j]
-            for k in 1:j - 1
-                Aij -= C[i,k]*B.data[k,j]
-            end
-            iszero(B.data[j,j]) && throw(SingularException(j))
-            C[i,j] = Aij / B.data[j,j]
-        end
-    end
-    C
-end
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UnitUpperTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
-    end
-    @inbounds for i in 1:m
-        for j in 1:n
-            Aij = A[i,j]
-            for k in 1:j - 1
-                Aij -= C[i,k]*B.data[k,j]
-            end
-            C[i,j] = Aij / oneunit(eltype(B))
-        end
-    end
-    C
-end
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::LowerTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
-    end
-    @inbounds for i in 1:m
-        for j in n:-1:1
-            Aij = A[i,j]
-            for k in j + 1:n
-                Aij -= C[i,k]*B.data[k,j]
-            end
-            iszero(B.data[j,j]) && throw(SingularException(j))
-            C[i,j] = Aij / B.data[j,j]
-        end
-    end
-    C
-end
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UnitLowerTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
-    end
-    @inbounds for i in 1:m
-        for j in n:-1:1
-            Aij = A[i,j]
-            for k in j + 1:n
-                Aij -= C[i,k]*B.data[k,j]
-            end
-            C[i,j] = Aij / oneunit(eltype(B))
-        end
-    end
-    C
-end
-
-lmul!(A::UpperTriangular,     B::UpperTriangular) = UpperTriangular(lmul!(A, triu!(B.data)))
-lmul!(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(lmul!(A, triu!(B.data)))
-lmul!(A::LowerTriangular,     B::LowerTriangular) = LowerTriangular(lmul!(A, tril!(B.data)))
-lmul!(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(lmul!(A, tril!(B.data)))
-
-ldiv!(A::UpperTriangular,     B::UpperTriangular) = UpperTriangular(ldiv!(A, triu!(B.data)))
-ldiv!(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(ldiv!(A, triu!(B.data)))
-ldiv!(A::LowerTriangular,     B::LowerTriangular) = LowerTriangular(ldiv!(A, tril!(B.data)))
-ldiv!(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(ldiv!(A, tril!(B.data)))
-
-rdiv!(A::UpperTriangular, B::UpperTriangular)     = UpperTriangular(rdiv!(triu!(A.data), B))
-rdiv!(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(rdiv!(triu!(A.data), B))
-rdiv!(A::LowerTriangular, B::LowerTriangular)     = LowerTriangular(rdiv!(tril!(A.data), B))
-rdiv!(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(rdiv!(tril!(A.data), B))
-
-rmul!(A::UpperTriangular, B::UpperTriangular)     = UpperTriangular(rmul!(triu!(A.data), B))
-rmul!(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(rmul!(triu!(A.data), B))
-rmul!(A::LowerTriangular, B::LowerTriangular)     = LowerTriangular(rmul!(tril!(A.data), B))
-rmul!(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(rmul!(tril!(A.data), B))
-
-# Promotion
-## Promotion methods in matmul don't apply to triangular multiplication since
-## it is inplace. Hence we have to make very similar definitions, but without
-## allocation of a result array. For multiplication and unit diagonal division
-## the element type doesn't have to be stable under division whereas that is
-## necessary in the general triangular solve problem.
-
-_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA<:Integer,TB<:Integer} =
-    _init_eltype(*, TA, TB)
-_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
-    _init_eltype(op, TA, TB)
-## The general promotion methods
-function *(A::AbstractTriangular, B::AbstractTriangular)
-    TAB = _init_eltype(*, eltype(A), eltype(B))
-    if TAB <: BlasFloat
-        lmul!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
-    else
-        mul!(similar(B, TAB, size(B)), A, B)
-    end
-end
-
-for mat in (:AbstractVector, :AbstractMatrix)
-    ### Multiplication with triangle to the left and hence rhs cannot be transposed.
-    @eval function *(A::AbstractTriangular, B::$mat)
-        require_one_based_indexing(B)
-        TAB = _init_eltype(*, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            lmul!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
-        else
-            mul!(similar(B, TAB, size(B)), A, B)
-        end
-    end
-    ### Left division with triangle to the left hence rhs cannot be transposed. No quotients.
-    @eval function \(A::Union{UnitUpperTriangular,UnitLowerTriangular}, B::$mat)
-        require_one_based_indexing(B)
-        TAB = _inner_type_promotion(\, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            ldiv!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
-        else
-            ldiv!(similar(B, TAB, size(B)), A, B)
-        end
-    end
-    ### Left division with triangle to the left hence rhs cannot be transposed. Quotients.
-    @eval function \(A::Union{UpperTriangular,LowerTriangular}, B::$mat)
-        require_one_based_indexing(B)
-        TAB = _init_eltype(\, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            ldiv!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
-        else
-            ldiv!(similar(B, TAB, size(B)), A, B)
-        end
-    end
-    ### Right division with triangle to the right hence lhs cannot be transposed. No quotients.
-    @eval function /(A::$mat, B::Union{UnitUpperTriangular, UnitLowerTriangular})
-        require_one_based_indexing(A)
-        TAB = _inner_type_promotion(/, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            rdiv!(copy_similar(A, TAB), convert(AbstractArray{TAB}, B))
-        else
-            _rdiv!(similar(A, TAB, size(A)), A, B)
-        end
-    end
-    ### Right division with triangle to the right hence lhs cannot be transposed. Quotients.
-    @eval function /(A::$mat, B::Union{UpperTriangular,LowerTriangular})
-        require_one_based_indexing(A)
-        TAB = _init_eltype(/, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            rdiv!(copy_similar(A, TAB), convert(AbstractArray{TAB}, B))
-        else
-            _rdiv!(similar(A, TAB, size(A)), A, B)
-        end
-    end
-end
-### Multiplication with triangle to the right and hence lhs cannot be transposed.
-# Only for AbstractMatrix, hence outside the above loop.
-function *(A::AbstractMatrix, B::AbstractTriangular)
-    require_one_based_indexing(A)
-    TAB = _init_eltype(*, eltype(A), eltype(B))
-    if TAB <: BlasFloat
-        rmul!(copy_similar(A, TAB), convert(AbstractArray{TAB}, B))
-    else
-        mul!(similar(A, TAB, size(A)), A, B)
-    end
-end
-# ambiguity resolution with definitions in matmul.jl
-*(v::AdjointAbsVec, A::AbstractTriangular) = adjoint(adjoint(A) * v.parent)
-*(v::TransposeAbsVec, A::AbstractTriangular) = transpose(transpose(A) * v.parent)
-
-## Some Triangular-Triangular cases. We might want to write tailored methods
-## for these cases, but I'm not sure it is worth it.
-for f in (:*, :\)
-    @eval begin
-        ($f)(A::LowerTriangular, B::LowerTriangular) =
-            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
-        ($f)(A::LowerTriangular, B::UnitLowerTriangular) =
-            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
-        ($f)(A::UnitLowerTriangular, B::LowerTriangular) =
-            LowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
-        ($f)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
-            UnitLowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
-        ($f)(A::UpperTriangular, B::UpperTriangular) =
-            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
-        ($f)(A::UpperTriangular, B::UnitUpperTriangular) =
-            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
-        ($f)(A::UnitUpperTriangular, B::UpperTriangular) =
-            UpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
-        ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
-            UnitUpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
-    end
-end
-(/)(A::LowerTriangular, B::LowerTriangular) =
-    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
-(/)(A::LowerTriangular, B::UnitLowerTriangular) =
-    LowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
-(/)(A::UnitLowerTriangular, B::LowerTriangular) =
-    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
-(/)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
-    UnitLowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
-(/)(A::UpperTriangular, B::UpperTriangular) =
-    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
-(/)(A::UpperTriangular, B::UnitUpperTriangular) =
-    UpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
-(/)(A::UnitUpperTriangular, B::UpperTriangular) =
-    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
-(/)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
-    UnitUpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
-
-# Complex matrix power for upper triangular factor, see:
-#   Higham and Lin, "A Schur-Padé algorithm for fractional powers of a Matrix",
-#     SIAM J. Matrix Anal. & Appl., 32 (3), (2011) 1056–1078.
-#   Higham and Lin, "An improved Schur-Padé algorithm for fractional powers of
-#     a matrix and their Fréchet derivatives", SIAM. J. Matrix Anal. & Appl.,
-#     34(3), (2013) 1341–1360.
-function powm!(A0::UpperTriangular{<:BlasFloat}, p::Real)
-    if abs(p) >= 1
-        throw(ArgumentError("p must be a real number in (-1,1), got $p"))
-    end
-
-    normA0 = opnorm(A0, 1)
-    rmul!(A0, 1/normA0)
-
-    theta = [1.53e-5, 2.25e-3, 1.92e-2, 6.08e-2, 1.25e-1, 2.03e-1, 2.84e-1]
-    n = checksquare(A0)
-
-    A, m, s = invsquaring(A0, theta)
-    A = I - A
-
-    # Compute accurate diagonal of I - T
-    sqrt_diag!(A0, A, s)
-    for i = 1:n
-        A[i, i] = -A[i, i]
-    end
-    # Compute the Padé approximant
-    c = 0.5 * (p - m) / (2 * m - 1)
-    triu!(A)
-    S = c * A
-    Stmp = similar(S)
-    for j = m-1:-1:1
-        j4 = 4 * j
-        c = (-p - j) / (j4 + 2)
-        for i = 1:n
-            @inbounds S[i, i] = S[i, i] + 1
-        end
-        copyto!(Stmp, S)
-        mul!(S, A, c)
-        ldiv!(Stmp, S.data)
-
-        c = (p - j) / (j4 - 2)
-        for i = 1:n
-            @inbounds S[i, i] = S[i, i] + 1
-        end
-        copyto!(Stmp, S)
-        mul!(S, A, c)
-        ldiv!(Stmp, S.data)
-    end
-    for i = 1:n
-        S[i, i] = S[i, i] + 1
-    end
-    copyto!(Stmp, S)
-    mul!(S, A, -p)
-    ldiv!(Stmp, S.data)
-    for i = 1:n
-        @inbounds S[i, i] = S[i, i] + 1
-    end
-
-    blockpower!(A0, S, p/(2^s))
-    for m = 1:s
-        mul!(Stmp.data, S, S)
-        copyto!(S, Stmp)
-        blockpower!(A0, S, p/(2^(s-m)))
-    end
-    rmul!(S, normA0^p)
-    return S
-end
-powm(A::LowerTriangular, p::Real) = copy(transpose(powm!(copy(transpose(A)), p::Real)))
-
-# Complex matrix logarithm for the upper triangular factor, see:
-#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
-#     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
-#   Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
-#     logarithm and estimating the condition number", SIAM J. Sci. Comput.,
-#     35(4), (2013), C394–C410.
-#
-# Based on the code available at http://eprints.ma.man.ac.uk/1851/02/logm.zip,
-# Copyright (c) 2011, Awad H. Al-Mohy and Nicholas J. Higham
-# Julia version relicensed with permission from original authors
-log(A::UpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
-log(A::UnitUpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
-log(A::LowerTriangular) = copy(transpose(log(copy(transpose(A)))))
-log(A::UnitLowerTriangular) = copy(transpose(log(copy(transpose(A)))))
-
-function log_quasitriu(A0::AbstractMatrix{T}) where T<:BlasFloat
-    # allocate real A if log(A) will be real and complex A otherwise
-    n = checksquare(A0)
-    if isreal(A0) && (!istriu(A0) || !any(x -> real(x) < zero(real(T)), diag(A0)))
-        A = T <: Complex ? real(A0) : copy(A0)
-    else
-        A = T <: Complex ? copy(A0) : complex(A0)
-    end
-    if A0 isa UnitUpperTriangular
-        A = UpperTriangular(parent(A))
-        @inbounds for i in 1:n
-            A[i,i] = 1
-        end
-    end
-    Y0 = _log_quasitriu!(A0, A)
-    # return complex result for complex input
-    Y = T <: Complex ? complex(Y0) : Y0
-
-    if A0 isa UpperTriangular || A0 isa UnitUpperTriangular
-        return UpperTriangular(Y)
-    else
-        return Y
-    end
-end
-# type-stable implementation of log_quasitriu
-# A is a copy of A0 that is overwritten while computing the result. It has the same eltype
-# as the result.
-function _log_quasitriu!(A0, A)
-    # Find Padé degree m and s while replacing A with A^(1/2^s)
-    m, s = _find_params_log_quasitriu!(A)
-
-    # Compute accurate superdiagonal of A
-    _pow_superdiag_quasitriu!(A, A0, 0.5^s)
-
-    # Compute accurate block diagonal of A
-    _sqrt_pow_diag_quasitriu!(A, A0, s)
-
-    # Get the Gauss-Legendre quadrature points and weights
-    R = zeros(Float64, m, m)
-    for i = 1:m - 1
-        R[i,i+1] = i / sqrt((2 * i)^2 - 1)
-        R[i+1,i] = R[i,i+1]
-    end
-    x,V = eigen(R)
-    w = Vector{Float64}(undef, m)
-    for i = 1:m
-        x[i] = (x[i] + 1) / 2
-        w[i] = V[1,i]^2
-    end
-
-    # Compute the Padé approximation
-    t = eltype(A)
-    n = size(A, 1)
-    Y = zeros(t, n, n)
-    B = similar(A)
-    for k = 1:m
-        B .= t(x[k]) .* A
-        @inbounds for i in 1:n
-            B[i,i] += 1
-        end
-        Y .+= t(w[k]) .* rdiv_quasitriu!(A, B)
-    end
-
-    # Scale back
-    lmul!(2.0^s, Y)
-
-    # Compute accurate diagonal and superdiagonal of log(A)
-    _log_diag_quasitriu!(Y, A0)
-
-    return Y
-end
-
-# Auxiliary functions for matrix logarithm and matrix power
-
-# Find Padé degree m and s while replacing A with A^(1/2^s)
-#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
-#     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
-#   from Algorithm 4.1
-function _find_params_log_quasitriu!(A)
-    maxsqrt = 100
-    theta = [1.586970738772063e-005,
-         2.313807884242979e-003,
-         1.938179313533253e-002,
-         6.209171588994762e-002,
-         1.276404810806775e-001,
-         2.060962623452836e-001,
-         2.879093714241194e-001]
-    tmax = size(theta, 1)
-    n = size(A, 1)
-    p = 0
-    m = 0
-
-    # Find s0, the smallest s such that the ρ(triu(A)^(1/2^s) - I) ≤ theta[tmax], where ρ(X)
-    # is the spectral radius of X
-    d = complex.(@view(A[diagind(A)]))
-    dm1 = d .- 1
-    s = 0
-    while norm(dm1, Inf) > theta[tmax] && s < maxsqrt
-        d .= sqrt.(d)
-        dm1 .= d .- 1
-        s = s + 1
-    end
-    s0 = s
-
-    # Compute repeated roots
-    for k = 1:min(s, maxsqrt)
-        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
-    end
-
-    # these three never needed at the same time, so reuse the same temporary
-    AmI = AmI4 = AmI5 = A - I
-    AmI2 = AmI * AmI
-    AmI3 = AmI2 * AmI
-    d2 = sqrt(opnorm(AmI2, 1))
-    d3 = cbrt(opnorm(AmI3, 1))
-    alpha2 = max(d2, d3)
-    foundm = false
-    if alpha2 <= theta[2]
-        m = alpha2 <= theta[1] ? 1 : 2
-        foundm = true
-    end
-
-    while !foundm
-        more_sqrt = false
-        mul!(AmI4, AmI2, AmI2)
-        d4 = opnorm(AmI4, 1)^(1/4)
-        alpha3 = max(d3, d4)
-        if alpha3 <= theta[tmax]
-            local j
-            for outer j = 3:tmax
-                if alpha3 <= theta[j]
-                    break
-                end
-            end
-            if j <= 6
-                m = j
-                break
-            elseif alpha3 / 2 <= theta[5] && p < 2
-                more_sqrt = true
-                p = p + 1
-           end
-        end
-
-        if !more_sqrt
-            mul!(AmI5, AmI3, AmI2)
-            d5 = opnorm(AmI5, 1)^(1/5)
-            alpha4 = max(d4, d5)
-            eta = min(alpha3, alpha4)
-            if eta <= theta[tmax]
-                j = 0
-                for outer j = 6:tmax
-                    if eta <= theta[j]
-                        m = j
-                        break
-                    end
-                end
-                break
-            end
-        end
-
-        if s == maxsqrt
-            m = tmax
-            break
-        end
-        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
-        copyto!(AmI, A)
-        for i in 1:n
-            @inbounds AmI[i,i] -= 1
-        end
-        mul!(AmI2, AmI, AmI)
-        mul!(AmI3, AmI2, AmI)
-        d3 = cbrt(opnorm(AmI3, 1))
-        s = s + 1
-    end
-    return m, s
-end
-
-# Compute accurate diagonal of A = A0^s - I
-function sqrt_diag!(A0::UpperTriangular, A::UpperTriangular, s)
-    n = checksquare(A0)
-    T = eltype(A)
-    @inbounds for i = 1:n
-        a = complex(A0[i,i])
-        A[i,i] = _sqrt_pow(a, s)
-    end
-end
-# Compute accurate block diagonal of A = A0^s - I for upper quasi-triangular A0 produced
-# by the Schur decomposition. Diagonal is made of 1x1 and 2x2 blocks.
-# 2x2 blocks are real with non-negative conjugate pair eigenvalues
-function _sqrt_pow_diag_quasitriu!(A, A0, s)
-    n = checksquare(A0)
-    t = typeof(sqrt(zero(eltype(A))))
-    i = 1
-    @inbounds while i < n
-        if iszero(A0[i+1,i])  # 1x1 block
-            A[i,i] = _sqrt_pow(t(A0[i,i]), s)
-            i += 1
-        else  # real 2x2 block
-            @views _sqrt_pow_diag_block_2x2!(A[i:i+1,i:i+1], A0[i:i+1,i:i+1], s)
-            i += 2
-        end
-    end
-    if i == n  # last block is 1x1
-        @inbounds A[n,n] = _sqrt_pow(t(A0[n,n]), s)
-    end
-    return A
-end
-# compute a^(1/2^s)-1
-#   Al-Mohy, "A more accurate Briggs method for the logarithm",
-#      Numer. Algorithms, 59, (2012), 393–402.
-#   Algorithm 2
-function _sqrt_pow(a::Number, s)
-    T = typeof(sqrt(zero(a)))
-    s == 0 && return T(a) - 1
-    s0 = s
-    if imag(a) >= 0 && real(a) <= 0 && !iszero(a)  # angle(a) ≥ π / 2
-        a = sqrt(a)
-        s0 = s - 1
-    end
-    z0 = a - 1
-    a = sqrt(a)
-    r = 1 + a
-    for j = 1:s0-1
-        a = sqrt(a)
-        r = r * (1 + a)
-    end
-    return z0 / r
-end
-# compute A0 = A^(1/2^s)-I for 2x2 real matrices A and A0
-# A has non-negative conjugate pair eigenvalues
-# "Improved Inverse Scaling and Squaring Algorithms for the Matrix Logarithm"
-# SIAM J. Sci. Comput., 34(4), (2012) C153–C169. doi: 10.1137/110852553
-# Algorithm 5.1
-Base.@propagate_inbounds function _sqrt_pow_diag_block_2x2!(A, A0, s)
-    _sqrt_real_2x2!(A, A0)
-    if isone(s)
-        A[1,1] -= 1
-        A[2,2] -= 1
-    else
-        # Z = A - I
-        z11, z21, z12, z22 = A[1,1] - 1, A[2,1], A[1,2], A[2,2] - 1
-        # A = sqrt(A)
-        _sqrt_real_2x2!(A, A)
-        # P = A + I
-        p11, p21, p12, p22 = A[1,1] + 1, A[2,1], A[1,2], A[2,2] + 1
-        for i in 1:(s - 2)
-            # A = sqrt(A)
-            _sqrt_real_2x2!(A, A)
-            a11, a21, a12, a22 = A[1,1], A[2,1], A[1,2], A[2,2]
-            # P += P * A
-            r11 = p11*(1 + a11) + p12*a21
-            r22 = p21*a12 + p22*(1 + a22)
-            p21 = p21*(1 + a11) + p22*a21
-            p12 = p11*a12 + p12*(1 + a22)
-            p11 = r11
-            p22 = r22
-        end
-        # A = Z / P
-        c = inv(p11*p22 - p21*p12)
-        A[1,1] = (p22*z11 - p21*z12) * c
-        A[2,1] = (p22*z21 - p21*z22) * c
-        A[1,2] = (p11*z12 - p12*z11) * c
-        A[2,2] = (p11*z22 - p12*z21) * c
-    end
-    return A
-end
-# Compute accurate superdiagonal of A = A0^s - I for upper quasi-triangular A0 produced
-# by a Schur decomposition.
-# Higham and Lin, "A Schur–Padé Algorithm for Fractional Powers of a Matrix"
-# SIAM J. Matrix Anal. Appl., 32(3), (2011), 1056–1078.
-# Equation 5.6
-# see also blockpower for when A0 is upper triangular
-function _pow_superdiag_quasitriu!(A, A0, p)
-    n = checksquare(A0)
-    t = eltype(A)
-    k = 1
-    @inbounds while k < n
-        if !iszero(A[k+1,k])
-            k += 2
-            continue
-        end
-        if !(k == n - 1 || iszero(A[k+2,k+1]))
-            k += 3
-            continue
-        end
-        Ak = t(A0[k,k])
-        Akp1 = t(A0[k+1,k+1])
-
-        Akp = Ak^p
-        Akp1p = Akp1^p
-
-        if Ak == Akp1
-            A[k,k+1] = p * A0[k,k+1] * Ak^(p-1)
-        elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
-            A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
-        else
-            logAk = log(Ak)
-            logAkp1 = log(Akp1)
-            z = (Akp1 - Ak)/(Akp1 + Ak)
-            if abs(z) > 1
-                A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
-            else
-                w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
-                dd = 2 * exp(p*(logAk+logAkp1)/2) * sinh(p*w) / (Akp1 - Ak);
-                A[k,k+1] = A0[k,k+1] * dd
-            end
-        end
-        k += 1
-    end
-end
-
-# Compute accurate block diagonal and superdiagonal of A = log(A0) for upper
-# quasi-triangular A0 produced by the Schur decomposition.
-function _log_diag_quasitriu!(A, A0)
-    n = checksquare(A0)
-    t = eltype(A)
-    k = 1
-    @inbounds while k < n
-        if iszero(A0[k+1,k])  # 1x1 block
-            Ak = t(A0[k,k])
-            logAk = log(Ak)
-            A[k,k] = logAk
-            if k < n - 2 && iszero(A0[k+2,k+1])
-                Akp1 = t(A0[k+1,k+1])
-                logAkp1 = log(Akp1)
-                A[k+1,k+1] = logAkp1
-                if Ak == Akp1
-                    A[k,k+1] = A0[k,k+1] / Ak
-                elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
-                    A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
-                else
-                    z = (Akp1 - Ak)/(Akp1 + Ak)
-                    if abs(z) > 1
-                        A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
-                    else
-                        w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
-                        A[k,k+1] = 2 * A0[k,k+1] * w / (Akp1 - Ak)
-                    end
-                end
-                k += 2
-            else
-                k += 1
-            end
-        else  # real 2x2 block
-            @views _log_diag_block_2x2!(A[k:k+1,k:k+1], A0[k:k+1,k:k+1])
-            k += 2
-        end
-    end
-    if k == n  # last 1x1 block
-        @inbounds A[n,n] = log(t(A0[n,n]))
-    end
-    return A
-end
-# compute A0 = log(A) for 2x2 real matrices A and A0, where A0 is a diagonal 2x2 block
-# produced by real Schur decomposition.
-# Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
-# logarithm and estimating the condition number", SIAM J. Sci. Comput.,
-# 35(4), (2013), C394–C410.
-# Eq. 6.1
-Base.@propagate_inbounds function _log_diag_block_2x2!(A, A0)
-    a, b, c = A0[1,1], A0[1,2], A0[2,1]
-    # avoid underflow/overflow for large/small b and c
-    s = sqrt(abs(b)) * sqrt(abs(c))
-    θ = atan(s, a)
-    t = θ / s
-    au = abs(a)
-    if au > s
-        a1 = log1p((s / au)^2) / 2 + log(au)
-    else
-        a1 = log1p((au / s)^2) / 2 + log(s)
-    end
-    A[1,1] = a1
-    A[2,1] = c*t
-    A[1,2] = b*t
-    A[2,2] = a1
-    return A
-end
-
-# Used only by powm at the moment
-# Repeatedly compute the square roots of A so that in the end its
-# eigenvalues are close enough to the positive real line
-function invsquaring(A0::UpperTriangular, theta)
-    require_one_based_indexing(theta)
-    # assumes theta is in ascending order
-    maxsqrt = 100
-    tmax = size(theta, 1)
-    n = checksquare(A0)
-    A = complex(copy(A0))
-    p = 0
-    m = 0
-
-    # Compute repeated roots
-    d = complex(diag(A))
-    dm1 = d .- 1
-    s = 0
-    while norm(dm1, Inf) > theta[tmax] && s < maxsqrt
-        d .= sqrt.(d)
-        dm1 .= d .- 1
-        s = s + 1
-    end
-    s0 = s
-    for k = 1:min(s, maxsqrt)
-        A = sqrt(A)
-    end
-
-    AmI = A - I
-    d2 = sqrt(opnorm(AmI^2, 1))
-    d3 = cbrt(opnorm(AmI^3, 1))
-    alpha2 = max(d2, d3)
-    foundm = false
-    if alpha2 <= theta[2]
-        m = alpha2 <= theta[1] ? 1 : 2
-        foundm = true
-    end
-
-    while !foundm
-        more = false
-        if s > s0
-            d3 = cbrt(opnorm(AmI^3, 1))
-        end
-        d4 = opnorm(AmI^4, 1)^(1/4)
-        alpha3 = max(d3, d4)
-        if alpha3 <= theta[tmax]
-            local j
-            for outer j = 3:tmax
-                if alpha3 <= theta[j]
-                    break
-                elseif alpha3 / 2 <= theta[5] && p < 2
-                    more = true
-                    p = p + 1
-                end
-            end
-            if j <= 6
-                m = j
-                foundm = true
-                break
-            elseif alpha3 / 2 <= theta[5] && p < 2
-                more = true
-                p = p + 1
-           end
-        end
-
-        if !more
-            d5 = opnorm(AmI^5, 1)^(1/5)
-            alpha4 = max(d4, d5)
-            eta = min(alpha3, alpha4)
-            if eta <= theta[tmax]
-                j = 0
-                for outer j = 6:tmax
-                    if eta <= theta[j]
-                        m = j
-                        break
-                    end
-                    break
-                end
-            end
-            if s == maxsqrt
-                m = tmax
-                break
-            end
-            A = sqrt(A)
-            AmI = A - I
-            s = s + 1
-        end
-    end
-
-    # Compute accurate superdiagonal of T
-    p = 1 / 2^s
-    A = complex(A)
-    blockpower!(A, A0, p)
-    return A,m,s
-end
-
-# Compute accurate diagonal and superdiagonal of A = A0^p
-function blockpower!(A::UpperTriangular, A0::UpperTriangular, p)
-    n = checksquare(A0)
-    @inbounds for k = 1:n-1
-        Ak = complex(A0[k,k])
-        Akp1 = complex(A0[k+1,k+1])
-
-        Akp = Ak^p
-        Akp1p = Akp1^p
-
-        A[k,k] = Akp
-        A[k+1,k+1] = Akp1p
-
-        if Ak == Akp1
-            A[k,k+1] = p * A0[k,k+1] * Ak^(p-1)
-        elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
-            A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
-        else
-            logAk = log(Ak)
-            logAkp1 = log(Akp1)
-            z = (Akp1 - Ak)/(Akp1 + Ak)
-            if abs(z) > 1
-                A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
-            else
-                w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
-                dd = 2 * exp(p*(logAk+logAkp1)/2) * sinh(p*w) / (Akp1 - Ak);
-                A[k,k+1] = A0[k,k+1] * dd
-            end
-        end
-    end
-end
-
-# Unwinding number
-unw(x::Real) = 0
-unw(x::Number) = ceil((imag(x) - pi) / (2 * pi))
-
-# compute A / B for upper quasi-triangular B, possibly overwriting B
-function rdiv_quasitriu!(A, B)
-    n = checksquare(A)
-    AG = copy(A)
-    # use Givens rotations to annihilate 2x2 blocks
-    @inbounds for k in 1:(n-1)
-        s = B[k+1,k]
-        iszero(s) && continue  # 1x1 block
-        G = first(givens(B[k+1,k+1], s, k, k+1))
-        rmul!(B, G)
-        rmul!(AG, G)
-    end
-    return rdiv!(AG, UpperTriangular(B))
-end
-
-# End of auxiliary functions for matrix logarithm and matrix power
-
-sqrt(A::UpperTriangular) = sqrt_quasitriu(A)
-function sqrt(A::UnitUpperTriangular{T}) where T
-    B = A.data
-    n = checksquare(B)
-    t = typeof(sqrt(zero(T)))
-    R = Matrix{t}(I, n, n)
-    tt = typeof(oneunit(t)*oneunit(t))
-    half = inv(R[1,1]+R[1,1]) # for general, algebraic cases. PR#20214
-    @inbounds for j = 1:n
-        for i = j-1:-1:1
-            r::tt = B[i,j]
-            @simd for k = i+1:j-1
-                r -= R[i,k]*R[k,j]
-            end
-            iszero(r) || (R[i,j] = half*r)
-        end
-    end
-    return UnitUpperTriangular(R)
-end
-sqrt(A::LowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
-sqrt(A::UnitLowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
-
-# Auxiliary functions for matrix square root
-
-# square root of upper triangular or real upper quasitriangular matrix
-function sqrt_quasitriu(A0; blockwidth = eltype(A0) <: Complex ? 512 : 256)
-    n = checksquare(A0)
-    T = eltype(A0)
-    Tr = typeof(sqrt(real(zero(T))))
-    Tc = typeof(sqrt(complex(zero(T))))
-    if isreal(A0)
-        is_sqrt_real = true
-        if istriu(A0)
-            for i in 1:n
-                Aii = real(A0[i,i])
-                if Aii < zero(Aii)
-                    is_sqrt_real = false
-                    break
-                end
-            end
-        end
-        if is_sqrt_real
-            R = zeros(Tr, n, n)
-            A = real(A0)
-        else
-            R = zeros(Tc, n, n)
-            A = A0
-        end
-    else
-        A = A0
-        R = zeros(Tc, n, n)
-    end
-    _sqrt_quasitriu!(R, A; blockwidth=blockwidth, n=n)
-    Rc = eltype(A0) <: Real ? R : complex(R)
-    if A0 isa UpperTriangular
-        return UpperTriangular(Rc)
-    elseif A0 isa UnitUpperTriangular
-        return UnitUpperTriangular(Rc)
-    else
-        return Rc
-    end
-end
-
-# in-place recursive sqrt of upper quasi-triangular matrix A from
-# Deadman E., Higham N.J., Ralha R. (2013) Blocked Schur Algorithms for Computing the Matrix
-# Square Root. Applied Parallel and Scientific Computing. PARA 2012. Lecture Notes in
-# Computer Science, vol 7782. https://doi.org/10.1007/978-3-642-36803-5_12
-function _sqrt_quasitriu!(R, A; blockwidth=64, n=checksquare(A))
-    if n ≤ blockwidth || !(eltype(R) <: BlasFloat) # base case, perform "point" algorithm
-        _sqrt_quasitriu_block!(R, A)
-    else  # compute blockwise recursion
-        split = div(n, 2)
-        iszero(A[split+1, split]) || (split += 1) # don't split 2x2 diagonal block
-        r1 = 1:split
-        r2 = (split + 1):n
-        n1, n2 = split, n - split
-        A11, A12, A22 = @views A[r1,r1], A[r1,r2], A[r2,r2]
-        R11, R12, R22 = @views R[r1,r1], R[r1,r2], R[r2,r2]
-        # solve diagonal blocks recursively
-        _sqrt_quasitriu!(R11, A11; blockwidth=blockwidth, n=n1)
-        _sqrt_quasitriu!(R22, A22; blockwidth=blockwidth, n=n2)
-        # solve off-diagonal block
-        R12 .= .- A12
-        _sylvester_quasitriu!(R11, R22, R12; blockwidth=blockwidth, nA=n1, nB=n2, raise=false)
-    end
-    return R
-end
-
-function _sqrt_quasitriu_block!(R, A)
-    _sqrt_quasitriu_diag_block!(R, A)
-    _sqrt_quasitriu_offdiag_block!(R, A)
-    return R
-end
-
-function _sqrt_quasitriu_diag_block!(R, A)
-    n = size(R, 1)
-    ta = eltype(R) <: Complex ? complex(eltype(A)) : eltype(A)
-    i = 1
-    @inbounds while i < n
-        if iszero(A[i + 1, i])
-            R[i, i] = sqrt(ta(A[i, i]))
-            i += 1
-        else
-            # this branch is never reached when A is complex triangular
-            @views _sqrt_real_2x2!(R[i:(i + 1), i:(i + 1)], A[i:(i + 1), i:(i + 1)])
-            i += 2
-        end
-    end
-    if i == n
-        R[n, n] = sqrt(ta(A[n, n]))
-    end
-    return R
-end
-
-function _sqrt_quasitriu_offdiag_block!(R, A)
-    n = size(R, 1)
-    j = 1
-    @inbounds while j ≤ n
-        jsize_is_2 = j < n && !iszero(A[j + 1, j])
-        i = j - 1
-        while i > 0
-            isize_is_2 = i > 1 && !iszero(A[i, i - 1])
-            if isize_is_2
-                if jsize_is_2
-                    _sqrt_quasitriu_offdiag_block_2x2!(R, A, i - 1, j)
-                else
-                    _sqrt_quasitriu_offdiag_block_2x1!(R, A, i - 1, j)
-                end
-                i -= 2
-            else
-                if jsize_is_2
-                    _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
-                else
-                    _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
-                end
-                i -= 1
-            end
-        end
-        j += 2 - !jsize_is_2
-    end
-    return R
-end
-
-# real square root of 2x2 diagonal block of quasi-triangular matrix from real Schur
-# decomposition. Eqs 6.8-6.9 and Algorithm 6.5 of
-# Higham, 2008, "Functions of Matrices: Theory and Computation", SIAM.
-Base.@propagate_inbounds function _sqrt_real_2x2!(R, A)
-    # in the real Schur form, A[1, 1] == A[2, 2], and A[2, 1] * A[1, 2] < 0
-    θ, a21, a12 = A[1, 1], A[2, 1], A[1, 2]
-    # avoid overflow/underflow of μ
-    # for real sqrt, |d| ≤ 2 max(|a12|,|a21|)
-    μ = sqrt(abs(a12)) * sqrt(abs(a21))
-    α = _real_sqrt(θ, μ)
-    c = 2α
-    R[1, 1] = α
-    R[2, 1] = a21 / c
-    R[1, 2] = a12 / c
-    R[2, 2] = α
-    return R
-end
-
-# real part of square root of θ+im*μ
-@inline function _real_sqrt(θ, μ)
-    t = sqrt((abs(θ) + hypot(θ, μ)) / 2)
-    return θ ≥ 0 ? t : μ / 2t
-end
-
-Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
-    Rii = R[i, i]
-    Rjj = R[j, j]
-    iszero(Rii) && iszero(Rjj) && return R
-    t = eltype(R)
-    tt = typeof(zero(t)*zero(t))
-    r = tt(-A[i, j])
-    @simd for k in (i + 1):(j - 1)
-        r += R[i, k] * R[k, j]
-    end
-    iszero(r) && return R
-    R[i, j] = sylvester(Rii, Rjj, r)
-    return R
-end
-
-Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
-    jrange = j:(j + 1)
-    t = eltype(R)
-    tt = typeof(zero(t)*zero(t))
-    r1 = tt(-A[i, j])
-    r2 = tt(-A[i, j + 1])
-    @simd for k in (i + 1):(j - 1)
-        rik = R[i, k]
-        r1 += rik * R[k, j]
-        r2 += rik * R[k, j + 1]
-    end
-    Rjj = @view R[jrange, jrange]
-    Rij = @view R[i, jrange]
-    Rij[1] = r1
-    Rij[2] = r2
-    _sylvester_1x2!(R[i, i], Rjj, Rij)
-    return R
-end
-
-Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x1!(R, A, i, j)
-    irange = i:(i + 1)
-    t = eltype(R)
-    tt = typeof(zero(t)*zero(t))
-    r1 = tt(-A[i, j])
-    r2 = tt(-A[i + 1, j])
-    @simd for k in (i + 2):(j - 1)
-        rkj = R[k, j]
-        r1 += R[i, k] * rkj
-        r2 += R[i + 1, k] * rkj
-    end
-    Rii = @view R[irange, irange]
-    Rij = @view R[irange, j]
-    Rij[1] = r1
-    Rij[2] = r2
-    @views _sylvester_2x1!(Rii, R[j, j], Rij)
-    return R
-end
-
-Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x2!(R, A, i, j)
-    irange = i:(i + 1)
-    jrange = j:(j + 1)
-    t = eltype(R)
-    tt = typeof(zero(t)*zero(t))
-    for i′ in irange, j′ in jrange
-        Cij = tt(-A[i′, j′])
-        @simd for k in (i + 2):(j - 1)
-            Cij += R[i′, k] * R[k, j′]
-        end
-        R[i′, j′] = Cij
-    end
-    Rii = @view R[irange, irange]
-    Rjj = @view R[jrange, jrange]
-    Rij = @view R[irange, jrange]
-    if !iszero(Rij) && !all(isnan, Rij)
-        _sylvester_2x2!(Rii, Rjj, Rij)
-    end
-    return R
-end
-
-# solve Sylvester's equation AX + XB = -C using blockwise recursion until the dimension of
-# A and B are no greater than blockwidth, based on Algorithm 1 from
-# Jonsson I, Kågström B. Recursive blocked algorithms for solving triangular systems—
-# Part I: one-sided and coupled Sylvester-type matrix equations. (2002) ACM Trans Math Softw.
-# 28(4), https://doi.org/10.1145/592843.592845.
-# specify raise=false to avoid breaking the recursion if a LAPACKException is thrown when
-# computing one of the blocks.
-function _sylvester_quasitriu!(A, B, C; blockwidth=64, nA=checksquare(A), nB=checksquare(B), raise=true)
-    if 1 ≤ nA ≤ blockwidth && 1 ≤ nB ≤ blockwidth
-        _sylvester_quasitriu_base!(A, B, C; raise=raise)
-    elseif nA ≥ 2nB ≥ 2
-        _sylvester_quasitriu_split1!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
-    elseif nB ≥ 2nA ≥ 2
-        _sylvester_quasitriu_split2!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
-    else
-        _sylvester_quasitriu_splitall!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
-    end
-    return C
-end
-function _sylvester_quasitriu_base!(A, B, C; raise=true)
-    try
-        _, scale = LAPACK.trsyl!('N', 'N', A, B, C)
-        rmul!(C, -inv(scale))
-    catch e
-        if !(e isa LAPACKException) || raise
-            throw(e)
-        end
-    end
-    return C
-end
-function _sylvester_quasitriu_split1!(A, B, C; nA=checksquare(A), kwargs...)
-    iA = div(nA, 2)
-    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
-    rA1, rA2 = 1:iA, (iA + 1):nA
-    nA1, nA2 = iA, nA-iA
-    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
-    C1, C2 = @views C[rA1,:], C[rA2,:]
-    _sylvester_quasitriu!(A22, B, C2; nA=nA2, kwargs...)
-    mul!(C1, A12, C2, true, true)
-    _sylvester_quasitriu!(A11, B, C1; nA=nA1, kwargs...)
-    return C
-end
-function _sylvester_quasitriu_split2!(A, B, C; nB=checksquare(B), kwargs...)
-    iB = div(nB, 2)
-    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
-    rB1, rB2 = 1:iB, (iB + 1):nB
-    nB1, nB2 = iB, nB-iB
-    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
-    C1, C2 = @views C[:,rB1], C[:,rB2]
-    _sylvester_quasitriu!(A, B11, C1; nB=nB1, kwargs...)
-    mul!(C2, C1, B12, true, true)
-    _sylvester_quasitriu!(A, B22, C2; nB=nB2, kwargs...)
-    return C
-end
-function _sylvester_quasitriu_splitall!(A, B, C; nA=checksquare(A), nB=checksquare(B), kwargs...)
-    iA = div(nA, 2)
-    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
-    iB = div(nB, 2)
-    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
-    rA1, rA2 = 1:iA, (iA + 1):nA
-    nA1, nA2 = iA, nA-iA
-    rB1, rB2 = 1:iB, (iB + 1):nB
-    nB1, nB2 = iB, nB-iB
-    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
-    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
-    C11, C21, C12, C22 = @views C[rA1,rB1], C[rA2,rB1], C[rA1,rB2], C[rA2,rB2]
-    _sylvester_quasitriu!(A22, B11, C21; nA=nA2, nB=nB1, kwargs...)
-    mul!(C11, A12, C21, true, true)
-    _sylvester_quasitriu!(A11, B11, C11; nA=nA1, nB=nB1, kwargs...)
-    mul!(C22, C21, B12, true, true)
-    _sylvester_quasitriu!(A22, B22, C22; nA=nA2, nB=nB2, kwargs...)
-    mul!(C12, A12, C22, true, true)
-    mul!(C12, C11, B12, true, true)
-    _sylvester_quasitriu!(A11, B22, C12; nA=nA1, nB=nB2, kwargs...)
-    return C
-end
-
-# End of auxiliary functions for matrix square root
-
-# Generic eigensystems
-eigvals(A::AbstractTriangular) = diag(A)
-function eigvecs(A::AbstractTriangular{T}) where T
-    TT = promote_type(T, Float32)
-    if TT <: BlasFloat
-        return eigvecs(convert(AbstractMatrix{TT}, A))
-    else
-        throw(ArgumentError("eigvecs type $(typeof(A)) not supported. Please submit a pull request."))
-    end
-end
-det(A::UnitUpperTriangular{T}) where {T} = one(T)
-det(A::UnitLowerTriangular{T}) where {T} = one(T)
-logdet(A::UnitUpperTriangular{T}) where {T} = zero(T)
-logdet(A::UnitLowerTriangular{T}) where {T} = zero(T)
-logabsdet(A::UnitUpperTriangular{T}) where {T} = zero(T), one(T)
-logabsdet(A::UnitLowerTriangular{T}) where {T} = zero(T), one(T)
-det(A::UpperTriangular) = prod(diag(A.data))
-det(A::LowerTriangular) = prod(diag(A.data))
-function logabsdet(A::Union{UpperTriangular{T},LowerTriangular{T}}) where T
-    sgn = one(T)
-    abs_det = zero(real(T))
-    @inbounds for i in 1:size(A,1)
-        diag_i = A.data[i,i]
-        sgn *= sign(diag_i)
-        abs_det += log(abs(diag_i))
-    end
-    return abs_det, sgn
-end
-
-eigen(A::AbstractTriangular) = Eigen(eigvals(A), eigvecs(A))
-
-# Generic singular systems
-for func in (:svd, :svd!, :svdvals)
-    @eval begin
-        ($func)(A::AbstractTriangular; kwargs...) = ($func)(copyto!(similar(parent(A)), A); kwargs...)
-    end
-end
-
-factorize(A::AbstractTriangular) = A
-
-# disambiguation methods: /(Adjoint of AbsVec, <:AbstractTriangular)
-/(u::AdjointAbsVec, A::Union{LowerTriangular,UpperTriangular}) = adjoint(adjoint(A) \ u.parent)
-/(u::AdjointAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = adjoint(adjoint(A) \ u.parent)
-# disambiguation methods: /(Transpose of AbsVec, <:AbstractTriangular)
-/(u::TransposeAbsVec, A::Union{LowerTriangular,UpperTriangular}) = transpose(transpose(A) \ u.parent)
-/(u::TransposeAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = transpose(transpose(A) \ u.parent)
-# disambiguation methods: /(Transpose of AbsVec, Adj/Trans of <:AbstractTriangular)
-for (tritype, comptritype) in ((:LowerTriangular, :UpperTriangular),
-                               (:UnitLowerTriangular, :UnitUpperTriangular),
-                               (:UpperTriangular, :LowerTriangular),
-                               (:UnitUpperTriangular, :UnitLowerTriangular))
-    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Adjoint}) = transpose($comptritype(conj(parent(parent(A)))) \ u.parent)
-    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Transpose}) = transpose(transpose(A) \ u.parent)
-end
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
deleted file mode 100644
index a53bb3815a481..0000000000000
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ /dev/null
@@ -1,896 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#### Specialized matrix types ####
-
-## (complex) symmetric tridiagonal matrices
-struct SymTridiagonal{T, V<:AbstractVector{T}} <: AbstractMatrix{T}
-    dv::V                        # diagonal
-    ev::V                        # superdiagonal
-    function SymTridiagonal{T, V}(dv, ev) where {T, V<:AbstractVector{T}}
-        require_one_based_indexing(dv, ev)
-        if !(length(dv) - 1 <= length(ev) <= length(dv))
-            throw(DimensionMismatch("subdiagonal has wrong length. Has length $(length(ev)), but should be either $(length(dv) - 1) or $(length(dv))."))
-        end
-        new{T, V}(dv, ev)
-    end
-end
-
-"""
-    SymTridiagonal(dv::V, ev::V) where V <: AbstractVector
-
-Construct a symmetric tridiagonal matrix from the diagonal (`dv`) and first
-sub/super-diagonal (`ev`), respectively. The result is of type `SymTridiagonal`
-and provides efficient specialized eigensolvers, but may be converted into a
-regular matrix with [`convert(Array, _)`](@ref) (or `Array(_)` for short).
-
-For `SymTridiagonal` block matrices, the elements of `dv` are symmetrized.
-The argument `ev` is interpreted as the superdiagonal. Blocks from the
-subdiagonal are (materialized) transpose of the corresponding superdiagonal blocks.
-
-# Examples
-```jldoctest
-julia> dv = [1, 2, 3, 4]
-4-element Vector{Int64}:
- 1
- 2
- 3
- 4
-
-julia> ev = [7, 8, 9]
-3-element Vector{Int64}:
- 7
- 8
- 9
-
-julia> SymTridiagonal(dv, ev)
-4×4 SymTridiagonal{Int64, Vector{Int64}}:
- 1  7  ⋅  ⋅
- 7  2  8  ⋅
- ⋅  8  3  9
- ⋅  ⋅  9  4
-
-julia> A = SymTridiagonal(fill([1 2; 3 4], 3), fill([1 2; 3 4], 2));
-
-julia> A[1,1]
-2×2 Symmetric{Int64, Matrix{Int64}}:
- 1  2
- 2  4
-
-julia> A[1,2]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> A[2,1]
-2×2 Matrix{Int64}:
- 1  3
- 2  4
-```
-"""
-SymTridiagonal(dv::V, ev::V) where {T,V<:AbstractVector{T}} = SymTridiagonal{T}(dv, ev)
-SymTridiagonal{T}(dv::V, ev::V) where {T,V<:AbstractVector{T}} = SymTridiagonal{T,V}(dv, ev)
-function SymTridiagonal{T}(dv::AbstractVector, ev::AbstractVector) where {T}
-    SymTridiagonal(convert(AbstractVector{T}, dv)::AbstractVector{T},
-                   convert(AbstractVector{T}, ev)::AbstractVector{T})
-end
-
-"""
-    SymTridiagonal(A::AbstractMatrix)
-
-Construct a symmetric tridiagonal matrix from the diagonal and first superdiagonal
-of the symmetric matrix `A`.
-
-# Examples
-```jldoctest
-julia> A = [1 2 3; 2 4 5; 3 5 6]
-3×3 Matrix{Int64}:
- 1  2  3
- 2  4  5
- 3  5  6
-
-julia> SymTridiagonal(A)
-3×3 SymTridiagonal{Int64, Vector{Int64}}:
- 1  2  ⋅
- 2  4  5
- ⋅  5  6
-
-julia> B = reshape([[1 2; 2 3], [1 2; 3 4], [1 3; 2 4], [1 2; 2 3]], 2, 2);
-
-julia> SymTridiagonal(B)
-2×2 SymTridiagonal{Matrix{Int64}, Vector{Matrix{Int64}}}:
- [1 2; 2 3]  [1 3; 2 4]
- [1 2; 3 4]  [1 2; 2 3]
-```
-"""
-function SymTridiagonal(A::AbstractMatrix)
-    if (diag(A, 1) == transpose.(diag(A, -1))) && all(issymmetric.(diag(A, 0)))
-        SymTridiagonal(diag(A, 0), diag(A, 1))
-    else
-        throw(ArgumentError("matrix is not symmetric; cannot convert to SymTridiagonal"))
-    end
-end
-
-SymTridiagonal{T,V}(S::SymTridiagonal{T,V}) where {T,V<:AbstractVector{T}} = S
-SymTridiagonal{T,V}(S::SymTridiagonal) where {T,V<:AbstractVector{T}} =
-    SymTridiagonal(convert(V, S.dv)::V, convert(V, S.ev)::V)
-SymTridiagonal{T}(S::SymTridiagonal{T}) where {T} = S
-SymTridiagonal{T}(S::SymTridiagonal) where {T} =
-    SymTridiagonal(convert(AbstractVector{T}, S.dv)::AbstractVector{T},
-                   convert(AbstractVector{T}, S.ev)::AbstractVector{T})
-SymTridiagonal(S::SymTridiagonal) = S
-
-AbstractMatrix{T}(S::SymTridiagonal) where {T} =
-    SymTridiagonal(convert(AbstractVector{T}, S.dv)::AbstractVector{T},
-                   convert(AbstractVector{T}, S.ev)::AbstractVector{T})
-function Matrix{T}(M::SymTridiagonal) where T
-    n = size(M, 1)
-    Mf = Matrix{T}(undef, n, n)
-    n == 0 && return Mf
-    n > 2 && fill!(Mf, zero(T))
-    @inbounds for i = 1:n-1
-        Mf[i,i] = symmetric(M.dv[i], :U)
-        Mf[i+1,i] = transpose(M.ev[i])
-        Mf[i,i+1] = M.ev[i]
-    end
-    Mf[n,n] = symmetric(M.dv[n], :U)
-    return Mf
-end
-Matrix(M::SymTridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M)
-Array(M::SymTridiagonal) = Matrix(M)
-
-size(A::SymTridiagonal) = (length(A.dv), length(A.dv))
-function size(A::SymTridiagonal, d::Integer)
-    if d < 1
-        throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    elseif d<=2
-        return length(A.dv)
-    else
-        return 1
-    end
-end
-
-similar(S::SymTridiagonal, ::Type{T}) where {T} = SymTridiagonal(similar(S.dv, T), similar(S.ev, T))
-similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(S.dv, T, dims)
-
-copyto!(dest::SymTridiagonal, src::SymTridiagonal) =
-    (copyto!(dest.dv, src.dv); copyto!(dest.ev, _evview(src)); dest)
-
-#Elementary operations
-for func in (:conj, :copy, :real, :imag)
-    @eval ($func)(M::SymTridiagonal) = SymTridiagonal(($func)(M.dv), ($func)(M.ev))
-end
-
-transpose(S::SymTridiagonal) = S
-adjoint(S::SymTridiagonal{<:Real}) = S
-adjoint(S::SymTridiagonal) = Adjoint(S)
-permutedims(S::SymTridiagonal) = S
-function permutedims(S::SymTridiagonal, perm)
-    Base.checkdims_perm(S, S, perm)
-    NTuple{2}(perm) == (2, 1) ? permutedims(S) : S
-end
-Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(adjoint.(x)), (S.parent.dv, S.parent.ev))...)
-
-ishermitian(S::SymTridiagonal) = isreal(S.dv) && isreal(_evview(S))
-issymmetric(S::SymTridiagonal) = true
-
-tr(S::SymTridiagonal) = sum(S.dv)
-
-function diag(M::SymTridiagonal{T}, n::Integer=0) where T<:Number
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of n
-    absn = abs(n)
-    if absn == 0
-        return copyto!(similar(M.dv, length(M.dv)), M.dv)
-    elseif absn == 1
-        return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
-    elseif absn <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-absn), zero(T))
-    else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
-    end
-end
-function diag(M::SymTridiagonal, n::Integer=0)
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of n
-    if n == 0
-        return copyto!(similar(M.dv, length(M.dv)), symmetric.(M.dv, :U))
-    elseif n == 1
-        return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
-    elseif n == -1
-        return copyto!(similar(M.ev, length(M.dv)-1), transpose.(_evview(M)))
-    elseif n <= size(M,1)
-        throw(ArgumentError("requested diagonal contains undefined zeros of an array type"))
-    else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
-    end
-end
-
-+(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv+B.dv, _evview(A)+_evview(B))
--(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv-B.dv, _evview(A)-_evview(B))
--(A::SymTridiagonal) = SymTridiagonal(-A.dv, -A.ev)
-*(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv*B, A.ev*B)
-*(B::Number, A::SymTridiagonal) = SymTridiagonal(B*A.dv, B*A.ev)
-/(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv/B, A.ev/B)
-\(B::Number, A::SymTridiagonal) = SymTridiagonal(B\A.dv, B\A.ev)
-==(A::SymTridiagonal{<:Number}, B::SymTridiagonal{<:Number}) =
-    (A.dv == B.dv) && (_evview(A) == _evview(B))
-==(A::SymTridiagonal, B::SymTridiagonal) =
-    size(A) == size(B) && all(i -> A[i,i] == B[i,i], axes(A, 1)) && (_evview(A) == _evview(B))
-
-function dot(x::AbstractVector, S::SymTridiagonal, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    nx, ny = length(x), length(y)
-    (nx == size(S, 1) == ny) || throw(DimensionMismatch("dot"))
-    if nx ≤ 1
-        nx == 0 && return dot(zero(eltype(x)), zero(eltype(S)), zero(eltype(y)))
-        return dot(x[1], S.dv[1], y[1])
-    end
-    dv, ev = S.dv, S.ev
-    @inbounds begin
-        x₀ = x[1]
-        x₊ = x[2]
-        sub = transpose(ev[1])
-        r = dot(adjoint(dv[1])*x₀ + adjoint(sub)*x₊, y[1])
-        for j in 2:nx-1
-            x₋, x₀, x₊ = x₀, x₊, x[j+1]
-            sup, sub = transpose(sub), transpose(ev[j])
-            r += dot(adjoint(sup)*x₋ + adjoint(dv[j])*x₀ + adjoint(sub)*x₊, y[j])
-        end
-        r += dot(adjoint(transpose(sub))*x₀ + adjoint(dv[nx])*x₊, y[nx])
-    end
-    return r
-end
-
-(\)(T::SymTridiagonal, B::AbstractVecOrMat) = ldlt(T)\B
-
-# division with optional shift for use in shifted-Hessenberg solvers (hessenberg.jl):
-ldiv!(A::SymTridiagonal, B::AbstractVecOrMat; shift::Number=false) = ldiv!(ldlt(A, shift=shift), B)
-rdiv!(B::AbstractVecOrMat, A::SymTridiagonal; shift::Number=false) = rdiv!(B, ldlt(A, shift=shift))
-
-eigen!(A::SymTridiagonal{<:BlasReal,<:StridedVector}) = Eigen(LAPACK.stegr!('V', A.dv, A.ev)...)
-eigen(A::SymTridiagonal{T}) where T = eigen!(copymutable_oftype(A, eigtype(T)))
-
-eigen!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, irange::UnitRange) =
-    Eigen(LAPACK.stegr!('V', 'I', A.dv, A.ev, 0.0, 0.0, irange.start, irange.stop)...)
-eigen(A::SymTridiagonal{T}, irange::UnitRange) where T =
-    eigen!(copymutable_oftype(A, eigtype(T)), irange)
-
-eigen!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, vl::Real, vu::Real) =
-    Eigen(LAPACK.stegr!('V', 'V', A.dv, A.ev, vl, vu, 0, 0)...)
-eigen(A::SymTridiagonal{T}, vl::Real, vu::Real) where T =
-    eigen!(copymutable_oftype(A, eigtype(T)), vl, vu)
-
-eigvals!(A::SymTridiagonal{<:BlasReal,<:StridedVector}) = LAPACK.stev!('N', A.dv, A.ev)[1]
-eigvals(A::SymTridiagonal{T}) where T = eigvals!(copymutable_oftype(A, eigtype(T)))
-
-eigvals!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, irange::UnitRange) =
-    LAPACK.stegr!('N', 'I', A.dv, A.ev, 0.0, 0.0, irange.start, irange.stop)[1]
-eigvals(A::SymTridiagonal{T}, irange::UnitRange) where T =
-    eigvals!(copymutable_oftype(A, eigtype(T)), irange)
-
-eigvals!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, vl::Real, vu::Real) =
-    LAPACK.stegr!('N', 'V', A.dv, A.ev, vl, vu, 0, 0)[1]
-eigvals(A::SymTridiagonal{T}, vl::Real, vu::Real) where T =
-    eigvals!(copymutable_oftype(A, eigtype(T)), vl, vu)
-
-#Computes largest and smallest eigenvalue
-eigmax(A::SymTridiagonal) = eigvals(A, size(A, 1):size(A, 1))[1]
-eigmin(A::SymTridiagonal) = eigvals(A, 1:1)[1]
-
-#Compute selected eigenvectors only corresponding to particular eigenvalues
-eigvecs(A::SymTridiagonal) = eigen(A).vectors
-
-"""
-    eigvecs(A::SymTridiagonal[, eigvals]) -> Matrix
-
-Return a matrix `M` whose columns are the eigenvectors of `A`. (The `k`th eigenvector can
-be obtained from the slice `M[:, k]`.)
-
-If the optional vector of eigenvalues `eigvals` is specified, `eigvecs`
-returns the specific corresponding eigenvectors.
-
-# Examples
-```jldoctest
-julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 1.0  2.0   ⋅
- 2.0  2.0  3.0
-  ⋅   3.0  1.0
-
-julia> eigvals(A)
-3-element Vector{Float64}:
- -2.1400549446402604
-  1.0000000000000002
-  5.140054944640259
-
-julia> eigvecs(A)
-3×3 Matrix{Float64}:
-  0.418304  -0.83205      0.364299
- -0.656749  -7.39009e-16  0.754109
-  0.627457   0.5547       0.546448
-
-julia> eigvecs(A, [1.])
-3×1 Matrix{Float64}:
-  0.8320502943378438
-  4.263514128092366e-17
- -0.5547001962252291
-```
-"""
-eigvecs(A::SymTridiagonal{<:BlasFloat,<:StridedVector}, eigvals::Vector{<:Real}) = LAPACK.stein!(A.dv, A.ev, eigvals)
-
-function svdvals!(A::SymTridiagonal)
-    vals = eigvals!(A)
-    return sort!(map!(abs, vals, vals); rev=true)
-end
-
-# tril and triu
-
-function istriu(M::SymTridiagonal, k::Integer=0)
-    if k <= -1
-        return true
-    elseif k == 0
-        return iszero(_evview(M))
-    else # k >= 1
-        return iszero(_evview(M)) && iszero(M.dv)
-    end
-end
-istril(M::SymTridiagonal, k::Integer) = istriu(M, -k)
-iszero(M::SymTridiagonal) =  iszero(_evview(M)) && iszero(M.dv)
-isone(M::SymTridiagonal) =  iszero(_evview(M)) && all(isone, M.dv)
-isdiag(M::SymTridiagonal) =  iszero(_evview(M))
-
-
-function tril!(M::SymTridiagonal{T}, k::Integer=0) where T
-    n = length(M.dv)
-    if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
-    elseif k < -1
-        fill!(M.ev, zero(T))
-        fill!(M.dv, zero(T))
-        return Tridiagonal(M.ev,M.dv,copy(M.ev))
-    elseif k == -1
-        fill!(M.dv, zero(T))
-        return Tridiagonal(M.ev,M.dv,zero(M.ev))
-    elseif k == 0
-        return Tridiagonal(M.ev,M.dv,zero(M.ev))
-    elseif k >= 1
-        return Tridiagonal(M.ev,M.dv,copy(M.ev))
-    end
-end
-
-function triu!(M::SymTridiagonal{T}, k::Integer=0) where T
-    n = length(M.dv)
-    if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
-    elseif k > 1
-        fill!(M.ev, zero(T))
-        fill!(M.dv, zero(T))
-        return Tridiagonal(M.ev,M.dv,copy(M.ev))
-    elseif k == 1
-        fill!(M.dv, zero(T))
-        return Tridiagonal(zero(M.ev),M.dv,M.ev)
-    elseif k == 0
-        return Tridiagonal(zero(M.ev),M.dv,M.ev)
-    elseif k <= -1
-        return Tridiagonal(M.ev,M.dv,copy(M.ev))
-    end
-end
-
-###################
-# Generic methods #
-###################
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::SymTridiagonal, i::Integer, j::Integer, s::AbstractString)
-    i==j-1||i==j||i==j+1 ? s : Base.replace_with_centered_mark(s)
-end
-
-# Implements the determinant using principal minors
-# a, b, c are assumed to be the subdiagonal, diagonal, and superdiagonal of
-# a tridiagonal matrix.
-#Reference:
-#    R. Usmani, "Inversion of a tridiagonal Jacobi matrix",
-#    Linear Algebra and its Applications 212-213 (1994), pp.413-414
-#    doi:10.1016/0024-3795(94)90414-6
-function det_usmani(a::V, b::V, c::V, shift::Number=0) where {T,V<:AbstractVector{T}}
-    require_one_based_indexing(a, b, c)
-    n = length(b)
-    θa = oneunit(T)+zero(shift)
-    if n == 0
-        return θa
-    end
-    θb = b[1]+shift
-    for i in 2:n
-        θb, θa = (b[i]+shift)*θb - a[i-1]*c[i-1]*θa, θb
-    end
-    return θb
-end
-
-# det with optional diagonal shift for use with shifted Hessenberg factorizations
-det(A::SymTridiagonal; shift::Number=false) = det_usmani(A.ev, A.dv, A.ev, shift)
-logabsdet(A::SymTridiagonal; shift::Number=false) = logabsdet(ldlt(A; shift=shift))
-
-@inline function Base.isassigned(A::SymTridiagonal, i::Int, j::Int)
-    @boundscheck checkbounds(Bool, A, i, j) || return false
-    if i == j
-        return @inbounds isassigned(A.dv, i)
-    elseif i == j + 1
-        return @inbounds isassigned(A.ev, j)
-    elseif i + 1 == j
-        return @inbounds isassigned(A.ev, i)
-    else
-        return true
-    end
-end
-
-@inline function Base.isstored(A::SymTridiagonal, i::Int, j::Int)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return @inbounds Base.isstored(A.dv, i)
-    elseif i == j + 1
-        return @inbounds Base.isstored(A.ev, j)
-    elseif i + 1 == j
-        return @inbounds Base.isstored(A.ev, i)
-    else
-        return false
-    end
-end
-
-@inline function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return symmetric((@inbounds A.dv[i]), :U)::symmetric_type(eltype(A.dv))
-    elseif i == j + 1
-        return copy(transpose(@inbounds A.ev[j])) # materialized for type stability
-    elseif i + 1 == j
-        return @inbounds A.ev[i]
-    else
-        return zero(T)
-    end
-end
-
-@inline function setindex!(A::SymTridiagonal, x, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        @inbounds A.dv[i] = x
-    else
-        throw(ArgumentError("cannot set off-diagonal entry ($i, $j)"))
-    end
-    return x
-end
-
-## Tridiagonal matrices ##
-struct Tridiagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
-    dl::V    # sub-diagonal
-    d::V     # diagonal
-    du::V    # sup-diagonal
-    du2::V   # supsup-diagonal for pivoting in LU
-    function Tridiagonal{T,V}(dl, d, du) where {T,V<:AbstractVector{T}}
-        require_one_based_indexing(dl, d, du)
-        n = length(d)
-        if (length(dl) != n-1 || length(du) != n-1) && !(length(d) == 0 && length(dl) == 0 && length(du) == 0)
-            throw(ArgumentError(string("cannot construct Tridiagonal from incompatible ",
-                "lengths of subdiagonal, diagonal and superdiagonal: ",
-                "($(length(dl)), $(length(d)), $(length(du)))")))
-        end
-        new{T,V}(dl, d, du)
-    end
-    # constructor used in lu!
-    function Tridiagonal{T,V}(dl, d, du, du2) where {T,V<:AbstractVector{T}}
-        require_one_based_indexing(dl, d, du, du2)
-        # length checks?
-        new{T,V}(dl, d, du, du2)
-    end
-end
-
-"""
-    Tridiagonal(dl::V, d::V, du::V) where V <: AbstractVector
-
-Construct a tridiagonal matrix from the first subdiagonal, diagonal, and first superdiagonal,
-respectively. The result is of type `Tridiagonal` and provides efficient specialized linear
-solvers, but may be converted into a regular matrix with
-[`convert(Array, _)`](@ref) (or `Array(_)` for short).
-The lengths of `dl` and `du` must be one less than the length of `d`.
-
-# Examples
-```jldoctest
-julia> dl = [1, 2, 3];
-
-julia> du = [4, 5, 6];
-
-julia> d = [7, 8, 9, 0];
-
-julia> Tridiagonal(dl, d, du)
-4×4 Tridiagonal{Int64, Vector{Int64}}:
- 7  4  ⋅  ⋅
- 1  8  5  ⋅
- ⋅  2  9  6
- ⋅  ⋅  3  0
-```
-"""
-Tridiagonal(dl::V, d::V, du::V) where {T,V<:AbstractVector{T}} = Tridiagonal{T,V}(dl, d, du)
-Tridiagonal(dl::V, d::V, du::V, du2::V) where {T,V<:AbstractVector{T}} = Tridiagonal{T,V}(dl, d, du, du2)
-function Tridiagonal{T}(dl::AbstractVector, d::AbstractVector, du::AbstractVector) where {T}
-    Tridiagonal(map(x->convert(AbstractVector{T}, x), (dl, d, du))...)
-end
-function Tridiagonal{T,V}(A::Tridiagonal) where {T,V<:AbstractVector{T}}
-    Tridiagonal{T,V}(A.dl, A.d, A.du)
-end
-
-"""
-    Tridiagonal(A)
-
-Construct a tridiagonal matrix from the first sub-diagonal,
-diagonal and first super-diagonal of the matrix `A`.
-
-# Examples
-```jldoctest
-julia> A = [1 2 3 4; 1 2 3 4; 1 2 3 4; 1 2 3 4]
-4×4 Matrix{Int64}:
- 1  2  3  4
- 1  2  3  4
- 1  2  3  4
- 1  2  3  4
-
-julia> Tridiagonal(A)
-4×4 Tridiagonal{Int64, Vector{Int64}}:
- 1  2  ⋅  ⋅
- 1  2  3  ⋅
- ⋅  2  3  4
- ⋅  ⋅  3  4
-```
-"""
-Tridiagonal(A::AbstractMatrix) = Tridiagonal(diag(A,-1), diag(A,0), diag(A,1))
-
-Tridiagonal(A::Tridiagonal) = A
-Tridiagonal{T}(A::Tridiagonal{T}) where {T} = A
-function Tridiagonal{T}(A::Tridiagonal) where {T}
-    dl, d, du = map(x->convert(AbstractVector{T}, x)::AbstractVector{T},
-                    (A.dl, A.d, A.du))
-    if isdefined(A, :du2)
-        Tridiagonal(dl, d, du, convert(AbstractVector{T}, A.du2)::AbstractVector{T})
-    else
-        Tridiagonal(dl, d, du)
-    end
-end
-
-size(M::Tridiagonal) = (length(M.d), length(M.d))
-function size(M::Tridiagonal, d::Integer)
-    if d < 1
-        throw(ArgumentError("dimension d must be ≥ 1, got $d"))
-    elseif d <= 2
-        return length(M.d)
-    else
-        return 1
-    end
-end
-
-function Matrix{T}(M::Tridiagonal) where {T}
-    A = Matrix{T}(undef, size(M))
-    n = length(M.d)
-    n == 0 && return A
-    n > 2 && fill!(A, zero(T))
-    for i in 1:n-1
-        A[i,i] = M.d[i]
-        A[i+1,i] = M.dl[i]
-        A[i,i+1] = M.du[i]
-    end
-    A[n,n] = M.d[n]
-    A
-end
-Matrix(M::Tridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M)
-Array(M::Tridiagonal) = Matrix(M)
-
-similar(M::Tridiagonal, ::Type{T}) where {T} = Tridiagonal(similar(M.dl, T), similar(M.d, T), similar(M.du, T))
-similar(M::Tridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(M.d, T, dims)
-
-# Operations on Tridiagonal matrices
-copyto!(dest::Tridiagonal, src::Tridiagonal) = (copyto!(dest.dl, src.dl); copyto!(dest.d, src.d); copyto!(dest.du, src.du); dest)
-
-#Elementary operations
-for func in (:conj, :copy, :real, :imag)
-    @eval function ($func)(M::Tridiagonal)
-        Tridiagonal(($func)(M.dl), ($func)(M.d), ($func)(M.du))
-    end
-end
-
-adjoint(S::Tridiagonal) = Adjoint(S)
-transpose(S::Tridiagonal) = Transpose(S)
-adjoint(S::Tridiagonal{<:Real}) = Tridiagonal(S.du, S.d, S.dl)
-transpose(S::Tridiagonal{<:Number}) = Tridiagonal(S.du, S.d, S.dl)
-permutedims(T::Tridiagonal) = Tridiagonal(T.du, T.d, T.dl)
-function permutedims(T::Tridiagonal, perm)
-    Base.checkdims_perm(T, T, perm)
-    NTuple{2}(perm) == (2, 1) ? permutedims(T) : T
-end
-Base.copy(aS::Adjoint{<:Any,<:Tridiagonal}) = (S = aS.parent; Tridiagonal(map(x -> copy.(adjoint.(x)), (S.du, S.d, S.dl))...))
-Base.copy(tS::Transpose{<:Any,<:Tridiagonal}) = (S = tS.parent; Tridiagonal(map(x -> copy.(transpose.(x)), (S.du, S.d, S.dl))...))
-
-ishermitian(S::Tridiagonal) = all(ishermitian, S.d) && all(Iterators.map((x, y) -> x == y', S.du, S.dl))
-issymmetric(S::Tridiagonal) = all(issymmetric, S.d) && all(Iterators.map((x, y) -> x == transpose(y), S.du, S.dl))
-
-\(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:AbstractVecOrMat}) = copy(A) \ B
-
-function diag(M::Tridiagonal{T}, n::Integer=0) where T
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of n
-    if n == 0
-        return copyto!(similar(M.d, length(M.d)), M.d)
-    elseif n == -1
-        return copyto!(similar(M.dl, length(M.dl)), M.dl)
-    elseif n == 1
-        return copyto!(similar(M.du, length(M.du)), M.du)
-    elseif abs(n) <= size(M,1)
-        return fill!(similar(M.d, size(M,1)-abs(n)), zero(T))
-    else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
-    end
-end
-
-@inline function Base.isassigned(A::Tridiagonal, i::Int, j::Int)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return @inbounds isassigned(A.d, i)
-    elseif i == j + 1
-        return @inbounds isassigned(A.dl, j)
-    elseif i + 1 == j
-        return @inbounds isassigned(A.du, i)
-    else
-        return true
-    end
-end
-
-@inline function Base.isstored(A::Tridiagonal, i::Int, j::Int)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return @inbounds Base.isstored(A.d, i)
-    elseif i == j + 1
-        return @inbounds Base.isstored(A.dl, j)
-    elseif i + 1 == j
-        return @inbounds Base.isstored(A.du, i)
-    else
-        return false
-    end
-end
-
-@inline function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return @inbounds A.d[i]
-    elseif i == j + 1
-        return @inbounds A.dl[j]
-    elseif i + 1 == j
-        return @inbounds A.du[i]
-    else
-        return zero(T)
-    end
-end
-
-@inline function setindex!(A::Tridiagonal, x, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        @inbounds A.d[i] = x
-    elseif i - j == 1
-        @inbounds A.dl[j] = x
-    elseif j - i == 1
-        @inbounds A.du[i] = x
-    elseif !iszero(x)
-        throw(ArgumentError(string("cannot set entry ($i, $j) off ",
-            "the tridiagonal band to a nonzero value ($x)")))
-    end
-    return x
-end
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::Tridiagonal,i::Integer,j::Integer,s::AbstractString)
-    i==j-1||i==j||i==j+1 ? s : Base.replace_with_centered_mark(s)
-end
-
-
-#tril and triu
-
-iszero(M::Tridiagonal) = iszero(M.dl) && iszero(M.d) && iszero(M.du)
-isone(M::Tridiagonal) = iszero(M.dl) && all(isone, M.d) && iszero(M.du)
-function istriu(M::Tridiagonal, k::Integer=0)
-    if k <= -1
-        return true
-    elseif k == 0
-        return iszero(M.dl)
-    elseif k == 1
-        return iszero(M.dl) && iszero(M.d)
-    else # k >= 2
-        return iszero(M.dl) && iszero(M.d) && iszero(M.du)
-    end
-end
-function istril(M::Tridiagonal, k::Integer=0)
-    if k >= 1
-        return true
-    elseif k == 0
-        return iszero(M.du)
-    elseif k == -1
-        return iszero(M.du) && iszero(M.d)
-    else # k <= -2
-        return iszero(M.du) && iszero(M.d) && iszero(M.dl)
-    end
-end
-isdiag(M::Tridiagonal) = iszero(M.dl) && iszero(M.du)
-
-function tril!(M::Tridiagonal{T}, k::Integer=0) where T
-    n = length(M.d)
-    if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
-    elseif k < -1
-        fill!(M.dl, zero(T))
-        fill!(M.d, zero(T))
-        fill!(M.du, zero(T))
-    elseif k == -1
-        fill!(M.d, zero(T))
-        fill!(M.du, zero(T))
-    elseif k == 0
-        fill!(M.du, zero(T))
-    end
-    return M
-end
-
-function triu!(M::Tridiagonal{T}, k::Integer=0) where T
-    n = length(M.d)
-    if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
-    elseif k > 1
-        fill!(M.dl, zero(T))
-        fill!(M.d, zero(T))
-        fill!(M.du, zero(T))
-    elseif k == 1
-        fill!(M.dl, zero(T))
-        fill!(M.d, zero(T))
-    elseif k == 0
-        fill!(M.dl, zero(T))
-    end
-    return M
-end
-
-tr(M::Tridiagonal) = sum(M.d)
-
-###################
-# Generic methods #
-###################
-
-+(A::Tridiagonal, B::Tridiagonal) = Tridiagonal(A.dl+B.dl, A.d+B.d, A.du+B.du)
--(A::Tridiagonal, B::Tridiagonal) = Tridiagonal(A.dl-B.dl, A.d-B.d, A.du-B.du)
--(A::Tridiagonal) = Tridiagonal(-A.dl, -A.d, -A.du)
-*(A::Tridiagonal, B::Number) = Tridiagonal(A.dl*B, A.d*B, A.du*B)
-*(B::Number, A::Tridiagonal) = Tridiagonal(B*A.dl, B*A.d, B*A.du)
-/(A::Tridiagonal, B::Number) = Tridiagonal(A.dl/B, A.d/B, A.du/B)
-\(B::Number, A::Tridiagonal) = Tridiagonal(B\A.dl, B\A.d, B\A.du)
-
-==(A::Tridiagonal, B::Tridiagonal) = (A.dl==B.dl) && (A.d==B.d) && (A.du==B.du)
-function ==(A::Tridiagonal, B::SymTridiagonal)
-    iseq = all(Iterators.map((x, y) -> x == transpose(y), A.du, A.dl))
-    iseq = iseq && A.du == _evview(B)
-    iseq && all(Iterators.map((x, y) -> x == symmetric(y, :U), A.d, B.dv))
-end
-==(A::SymTridiagonal, B::Tridiagonal) = B == A
-
-det(A::Tridiagonal) = det_usmani(A.dl, A.d, A.du)
-
-AbstractMatrix{T}(M::Tridiagonal) where {T} = Tridiagonal{T}(M)
-Tridiagonal{T}(M::SymTridiagonal{T}) where {T} = Tridiagonal(M)
-function SymTridiagonal{T}(M::Tridiagonal) where T
-    if issymmetric(M)
-        return SymTridiagonal{T}(convert(AbstractVector{T},M.d), convert(AbstractVector{T},M.dl))
-    else
-        throw(ArgumentError("Tridiagonal is not symmetric, cannot convert to SymTridiagonal"))
-    end
-end
-
-Base._sum(A::Tridiagonal, ::Colon) = sum(A.d) + sum(A.dl) + sum(A.du)
-function Base._sum(A::SymTridiagonal, ::Colon)
-    se = sum(_evview(A))
-    symmetric(sum(A.dv), :U) + se + transpose(se)
-end
-
-function Base._sum(A::Tridiagonal, dims::Integer)
-    res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
-    n = length(A.d)
-    if n == 0
-        return res
-    elseif n == 1
-        res[1] = A.d[1]
-        return res
-    end
-    @inbounds begin
-        if dims == 1
-            res[1] = A.dl[1] + A.d[1]
-            for i = 2:n-1
-                res[i] = A.dl[i] + A.d[i] + A.du[i-1]
-            end
-            res[n] = A.d[n] + A.du[n-1]
-        elseif dims == 2
-            res[1] = A.d[1] + A.du[1]
-            for i = 2:n-1
-                res[i] = A.dl[i-1] + A.d[i] + A.du[i]
-            end
-            res[n] = A.dl[n-1] + A.d[n]
-        elseif dims >= 3
-            for i = 1:n-1
-                res[i,i+1] = A.du[i]
-                res[i,i]   = A.d[i]
-                res[i+1,i] = A.dl[i]
-            end
-            res[n,n] = A.d[n]
-        end
-    end
-    res
-end
-
-function Base._sum(A::SymTridiagonal, dims::Integer)
-    res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
-    n = length(A.dv)
-    if n == 0
-        return res
-    elseif n == 1
-        res[1] = A.dv[1]
-        return res
-    end
-    @inbounds begin
-        if dims == 1
-            res[1] = transpose(A.ev[1]) + symmetric(A.dv[1], :U)
-            for i = 2:n-1
-                res[i] = transpose(A.ev[i]) + symmetric(A.dv[i], :U) + A.ev[i-1]
-            end
-            res[n] = symmetric(A.dv[n], :U) + A.ev[n-1]
-        elseif dims == 2
-            res[1] = symmetric(A.dv[1], :U) + A.ev[1]
-            for i = 2:n-1
-                res[i] = transpose(A.ev[i-1]) + symmetric(A.dv[i], :U) + A.ev[i]
-            end
-            res[n] = transpose(A.ev[n-1]) + symmetric(A.dv[n], :U)
-        elseif dims >= 3
-            for i = 1:n-1
-                res[i,i+1] = A.ev[i]
-                res[i,i]   = symmetric(A.dv[i], :U)
-                res[i+1,i] = transpose(A.ev[i])
-            end
-            res[n,n] = symmetric(A.dv[n], :U)
-        end
-    end
-    res
-end
-
-function dot(x::AbstractVector, A::Tridiagonal, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    nx, ny = length(x), length(y)
-    (nx == size(A, 1) == ny) || throw(DimensionMismatch())
-    if nx ≤ 1
-        nx == 0 && return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-        return dot(x[1], A.d[1], y[1])
-    end
-    @inbounds begin
-        x₀ = x[1]
-        x₊ = x[2]
-        dl, d, du = A.dl, A.d, A.du
-        r = dot(adjoint(d[1])*x₀ + adjoint(dl[1])*x₊, y[1])
-        for j in 2:nx-1
-            x₋, x₀, x₊ = x₀, x₊, x[j+1]
-            r += dot(adjoint(du[j-1])*x₋ + adjoint(d[j])*x₀ + adjoint(dl[j])*x₊, y[j])
-        end
-        r += dot(adjoint(du[nx-1])*x₀ + adjoint(d[nx])*x₊, y[nx])
-    end
-    return r
-end
-
-function cholesky(S::SymTridiagonal, ::NoPivot = NoPivot(); check::Bool = true)
-    if !ishermitian(S)
-        check && checkpositivedefinite(-1)
-        return Cholesky(S, 'U', convert(BlasInt, -1))
-    end
-    T = choltype(eltype(S))
-    cholesky!(Hermitian(Bidiagonal{T}(diag(S, 0), diag(S, 1), :U)), NoPivot(); check = check)
-end
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
deleted file mode 100644
index 21ae8a1bb913a..0000000000000
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ /dev/null
@@ -1,541 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-import Base: copy, adjoint, getindex, show, transpose, one, zero, inv,
-             hcat, vcat, hvcat, ^
-
-"""
-    UniformScaling{T<:Number}
-
-Generically sized uniform scaling operator defined as a scalar times
-the identity operator, `λ*I`. Although without an explicit `size`, it
-acts similarly to a matrix in many cases and includes support for some
-indexing. See also [`I`](@ref).
-
-!!! compat "Julia 1.6"
-     Indexing using ranges is available as of Julia 1.6.
-
-# Examples
-```jldoctest
-julia> J = UniformScaling(2.)
-UniformScaling{Float64}
-2.0*I
-
-julia> A = [1. 2.; 3. 4.]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> J*A
-2×2 Matrix{Float64}:
- 2.0  4.0
- 6.0  8.0
-
-julia> J[1:2, 1:2]
-2×2 Matrix{Float64}:
- 2.0  0.0
- 0.0  2.0
-```
-"""
-struct UniformScaling{T<:Number}
-    λ::T
-end
-
-"""
-    I
-
-An object of type [`UniformScaling`](@ref), representing an identity matrix of any size.
-
-# Examples
-```jldoctest
-julia> fill(1, (5,6)) * I == fill(1, (5,6))
-true
-
-julia> [1 2im 3; 1im 2 3] * I
-2×3 Matrix{Complex{Int64}}:
- 1+0im  0+2im  3+0im
- 0+1im  2+0im  3+0im
-```
-"""
-const I = UniformScaling(true)
-
-"""
-    (I::UniformScaling)(n::Integer)
-
-Construct a `Diagonal` matrix from a `UniformScaling`.
-
-!!! compat "Julia 1.2"
-     This method is available as of Julia 1.2.
-
-# Examples
-```jldoctest
-julia> I(3)
-3×3 Diagonal{Bool, Vector{Bool}}:
- 1  ⋅  ⋅
- ⋅  1  ⋅
- ⋅  ⋅  1
-
-julia> (0.7*I)(3)
-3×3 Diagonal{Float64, Vector{Float64}}:
- 0.7   ⋅    ⋅
-  ⋅   0.7   ⋅
-  ⋅    ⋅   0.7
-```
-"""
-(I::UniformScaling)(n::Integer) = Diagonal(fill(I.λ, n))
-
-eltype(::Type{UniformScaling{T}}) where {T} = T
-ndims(J::UniformScaling) = 2
-Base.has_offset_axes(::UniformScaling) = false
-getindex(J::UniformScaling, i::Integer,j::Integer) = ifelse(i==j,J.λ,zero(J.λ))
-
-getindex(J::UniformScaling, n::Integer, m::AbstractVector{<:Integer}) = getindex(J, m, n)
-function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::Integer) where T
-    v = zeros(T, axes(n))
-    @inbounds for (i,ii) in pairs(n)
-        if ii == m
-            v[i] = J.λ
-        end
-    end
-    return v
-end
-
-function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::AbstractVector{<:Integer}) where T
-    A = zeros(T, axes(n)..., axes(m)...)
-    @inbounds for (j,jj) in pairs(m), (i,ii) in pairs(n)
-        if ii == jj
-            A[i,j] = J.λ
-        end
-    end
-    return A
-end
-
-function show(io::IO, ::MIME"text/plain", J::UniformScaling)
-    s = "$(J.λ)"
-    if occursin(r"\w+\s*[\+\-]\s*\w+", s)
-        s = "($s)"
-    end
-    print(io, typeof(J), "\n$s*I")
-end
-copy(J::UniformScaling) = UniformScaling(J.λ)
-
-Base.convert(::Type{UniformScaling{T}}, J::UniformScaling) where {T} = UniformScaling(convert(T, J.λ))::UniformScaling{T}
-
-conj(J::UniformScaling) = UniformScaling(conj(J.λ))
-real(J::UniformScaling) = UniformScaling(real(J.λ))
-imag(J::UniformScaling) = UniformScaling(imag(J.λ))
-
-transpose(J::UniformScaling) = J
-adjoint(J::UniformScaling) = UniformScaling(conj(J.λ))
-
-one(::Type{UniformScaling{T}}) where {T} = UniformScaling(one(T))
-one(J::UniformScaling{T}) where {T} = one(UniformScaling{T})
-oneunit(::Type{UniformScaling{T}}) where {T} = UniformScaling(oneunit(T))
-oneunit(J::UniformScaling{T}) where {T} = oneunit(UniformScaling{T})
-zero(::Type{UniformScaling{T}}) where {T} = UniformScaling(zero(T))
-zero(J::UniformScaling{T}) where {T} = zero(UniformScaling{T})
-
-isdiag(::UniformScaling) = true
-istriu(::UniformScaling) = true
-istril(::UniformScaling) = true
-issymmetric(::UniformScaling) = true
-ishermitian(J::UniformScaling) = isreal(J.λ)
-isposdef(J::UniformScaling) = isposdef(J.λ)
-
-(+)(J::UniformScaling, x::Number) = J.λ + x
-(+)(x::Number, J::UniformScaling) = x + J.λ
-(-)(J::UniformScaling, x::Number) = J.λ - x
-(-)(x::Number, J::UniformScaling) = x - J.λ
-
-(+)(J::UniformScaling)                      = UniformScaling(+J.λ)
-(+)(J1::UniformScaling, J2::UniformScaling) = UniformScaling(J1.λ+J2.λ)
-(+)(B::BitArray{2}, J::UniformScaling)      = Array(B) + J
-(+)(J::UniformScaling, B::BitArray{2})      = J + Array(B)
-(+)(J::UniformScaling, A::AbstractMatrix)   = A + J
-
-(-)(J::UniformScaling)                      = UniformScaling(-J.λ)
-(-)(J1::UniformScaling, J2::UniformScaling) = UniformScaling(J1.λ-J2.λ)
-(-)(B::BitArray{2}, J::UniformScaling)      = Array(B) - J
-(-)(J::UniformScaling, B::BitArray{2})      = J - Array(B)
-(-)(A::AbstractMatrix, J::UniformScaling)   = A + (-J)
-
-# matrix functions
-for f in ( :exp,   :log,
-           :expm1, :log1p,
-           :sqrt,  :cbrt,
-           :sin,   :cos,   :tan,
-           :asin,  :acos,  :atan,
-           :csc,   :sec,   :cot,
-           :acsc,  :asec,  :acot,
-           :sinh,  :cosh,  :tanh,
-           :asinh, :acosh, :atanh,
-           :csch,  :sech,  :coth,
-           :acsch, :asech, :acoth )
-    @eval Base.$f(J::UniformScaling) = UniformScaling($f(J.λ))
-end
-
-# Unit{Lower/Upper}Triangular matrices become {Lower/Upper}Triangular under
-# addition with a UniformScaling
-for (t1, t2) in ((:UnitUpperTriangular, :UpperTriangular),
-                 (:UnitLowerTriangular, :LowerTriangular))
-    @eval begin
-        function (+)(UL::$t1, J::UniformScaling)
-            ULnew = copymutable_oftype(UL.data, Base.promote_op(+, eltype(UL), typeof(J)))
-            for i in axes(ULnew, 1)
-                ULnew[i,i] = one(ULnew[i,i]) + J
-            end
-            return ($t2)(ULnew)
-        end
-    end
-end
-
-# Adding a complex UniformScaling to the diagonal of a Hermitian
-# matrix breaks the hermiticity, if the UniformScaling is non-real.
-# However, to preserve type stability, we do not special-case a
-# UniformScaling{<:Complex} that happens to be real.
-function (+)(A::Hermitian, J::UniformScaling{<:Complex})
-    TS = Base.promote_op(+, eltype(A), typeof(J))
-    B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
-    for i in diagind(B)
-        B[i] = A[i] + J
-    end
-    return B
-end
-
-function (-)(J::UniformScaling{<:Complex}, A::Hermitian)
-    TS = Base.promote_op(+, eltype(A), typeof(J))
-    B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
-    B .= .-B
-    for i in diagind(B)
-        B[i] = J - A[i]
-    end
-    return B
-end
-
-function (+)(A::AbstractMatrix, J::UniformScaling)
-    checksquare(A)
-    B = copymutable_oftype(A, Base.promote_op(+, eltype(A), typeof(J)))
-    for i in intersect(axes(A,1), axes(A,2))
-        @inbounds B[i,i] += J
-    end
-    return B
-end
-
-function (-)(J::UniformScaling, A::AbstractMatrix)
-    checksquare(A)
-    B = convert(AbstractMatrix{Base.promote_op(+, eltype(A), typeof(J))}, -A)
-    for i in intersect(axes(A,1), axes(A,2))
-        @inbounds B[i,i] += J
-    end
-    return B
-end
-
-inv(J::UniformScaling) = UniformScaling(inv(J.λ))
-opnorm(J::UniformScaling, p::Real=2) = opnorm(J.λ, p)
-
-pinv(J::UniformScaling) = ifelse(iszero(J.λ),
-                          UniformScaling(zero(inv(J.λ))),  # type stability
-                          UniformScaling(inv(J.λ)))
-
-function det(J::UniformScaling{T}) where T
-    if isone(J.λ)
-        one(T)
-    elseif iszero(J.λ)
-        zero(T)
-    else
-        throw(ArgumentError("Determinant of UniformScaling is only well-defined when λ = 0 or 1."))
-    end
-end
-
-function tr(J::UniformScaling{T}) where T
-    if iszero(J.λ)
-        zero(T)
-    else
-        throw(ArgumentError("Trace of UniformScaling is only well-defined when λ = 0"))
-    end
-end
-
-*(J1::UniformScaling, J2::UniformScaling) = UniformScaling(J1.λ*J2.λ)
-*(B::BitArray{2}, J::UniformScaling) = *(Array(B), J::UniformScaling)
-*(J::UniformScaling, B::BitArray{2}) = *(J::UniformScaling, Array(B))
-*(A::AbstractMatrix, J::UniformScaling) = A*J.λ
-*(v::AbstractVector, J::UniformScaling) = reshape(v, length(v), 1) * J
-*(J::UniformScaling, A::AbstractVecOrMat) = J.λ*A
-*(x::Number, J::UniformScaling) = UniformScaling(x*J.λ)
-*(J::UniformScaling, x::Number) = UniformScaling(J.λ*x)
-
-/(J1::UniformScaling, J2::UniformScaling) = J2.λ == 0 ? throw(SingularException(1)) : UniformScaling(J1.λ/J2.λ)
-/(J::UniformScaling, A::AbstractMatrix) =
-    (invA = inv(A); lmul!(J.λ, convert(AbstractMatrix{promote_type(eltype(J),eltype(invA))}, invA)))
-/(A::AbstractMatrix, J::UniformScaling) = J.λ == 0 ? throw(SingularException(1)) : A/J.λ
-/(v::AbstractVector, J::UniformScaling) = reshape(v, length(v), 1) / J
-
-/(J::UniformScaling, x::Number) = UniformScaling(J.λ/x)
-
-\(J1::UniformScaling, J2::UniformScaling) = J1.λ == 0 ? throw(SingularException(1)) : UniformScaling(J1.λ\J2.λ)
-\(J::UniformScaling, A::AbstractVecOrMat) = J.λ == 0 ? throw(SingularException(1)) : J.λ\A
-\(A::AbstractMatrix, J::UniformScaling) =
-    (invA = inv(A); rmul!(convert(AbstractMatrix{promote_type(eltype(invA),eltype(J))}, invA), J.λ))
-\(F::Factorization, J::UniformScaling) = F \ J(size(F,1))
-
-\(x::Number, J::UniformScaling) = UniformScaling(x\J.λ)
-
-@inline mul!(C::AbstractMatrix, A::AbstractMatrix, J::UniformScaling, alpha::Number, beta::Number) =
-    mul!(C, A, J.λ, alpha, beta)
-@inline mul!(C::AbstractVecOrMat, J::UniformScaling, B::AbstractVecOrMat, alpha::Number, beta::Number) =
-    mul!(C, J.λ, B, alpha, beta)
-
-function mul!(out::AbstractMatrix{T}, a::Number, B::UniformScaling, α::Number, β::Number) where {T}
-    checksquare(out)
-    if iszero(β)  # zero contribution of the out matrix
-        fill!(out, zero(T))
-    elseif !isone(β)
-        rmul!(out, β)
-    end
-    s = convert(T, a*B.λ*α)
-    if !iszero(s)
-        @inbounds for i in diagind(out)
-            out[i] += s
-        end
-    end
-    return out
-end
-@inline mul!(out::AbstractMatrix, A::UniformScaling, b::Number, α::Number, β::Number)=
-    mul!(out, A.λ, UniformScaling(b), α, β)
-rmul!(A::AbstractMatrix, J::UniformScaling) = rmul!(A, J.λ)
-lmul!(J::UniformScaling, B::AbstractVecOrMat) = lmul!(J.λ, B)
-rdiv!(A::AbstractMatrix, J::UniformScaling) = rdiv!(A, J.λ)
-ldiv!(J::UniformScaling, B::AbstractVecOrMat) = ldiv!(J.λ, B)
-ldiv!(Y::AbstractVecOrMat, J::UniformScaling, B::AbstractVecOrMat) = (Y .= J.λ .\ B)
-
-Broadcast.broadcasted(::typeof(*), x::Number,J::UniformScaling) = UniformScaling(x*J.λ)
-Broadcast.broadcasted(::typeof(*), J::UniformScaling,x::Number) = UniformScaling(J.λ*x)
-
-Broadcast.broadcasted(::typeof(/), J::UniformScaling,x::Number) = UniformScaling(J.λ/x)
-
-Broadcast.broadcasted(::typeof(\), x::Number,J::UniformScaling) = UniformScaling(x\J.λ)
-
-(^)(J::UniformScaling, x::Number) = UniformScaling((J.λ)^x)
-Base.literal_pow(::typeof(^), J::UniformScaling, x::Val) = UniformScaling(Base.literal_pow(^, J.λ, x))
-
-Broadcast.broadcasted(::typeof(^), J::UniformScaling, x::Number) = UniformScaling(J.λ^x)
-function Broadcast.broadcasted(::typeof(Base.literal_pow), ::typeof(^), J::UniformScaling, x::Val)
-    UniformScaling(Base.literal_pow(^, J.λ, x))
-end
-
-==(J1::UniformScaling,J2::UniformScaling) = (J1.λ == J2.λ)
-
-## equality comparison with UniformScaling
-==(J::UniformScaling, A::AbstractMatrix) = A == J
-function ==(A::AbstractMatrix, J::UniformScaling)
-    require_one_based_indexing(A)
-    size(A, 1) == size(A, 2) || return false
-    iszero(J.λ) && return iszero(A)
-    isone(J.λ) && return isone(A)
-    return A == J.λ*one(A)
-end
-function ==(A::StridedMatrix, J::UniformScaling)
-    size(A, 1) == size(A, 2) || return false
-    iszero(J.λ) && return iszero(A)
-    isone(J.λ) && return isone(A)
-    for j in axes(A, 2), i in axes(A, 1)
-        ifelse(i == j, A[i, j] == J.λ, iszero(A[i, j])) || return false
-    end
-    return true
-end
-
-isequal(A::AbstractMatrix, J::UniformScaling) = false
-isequal(J::UniformScaling, A::AbstractMatrix) = false
-
-function isapprox(J1::UniformScaling{T}, J2::UniformScaling{S};
-            atol::Real=0, rtol::Real=Base.rtoldefault(T,S,atol), nans::Bool=false) where {T<:Number,S<:Number}
-    isapprox(J1.λ, J2.λ, rtol=rtol, atol=atol, nans=nans)
-end
-function isapprox(J::UniformScaling, A::AbstractMatrix;
-                  atol::Real = 0,
-                  rtol::Real = Base.rtoldefault(promote_leaf_eltypes(A), eltype(J), atol),
-                  nans::Bool = false, norm::Function = norm)
-    n = checksquare(A)
-    normJ = norm === opnorm             ? abs(J.λ) :
-            norm === LinearAlgebra.norm ? abs(J.λ) * sqrt(n) :
-                                          norm(Diagonal(fill(J.λ, n)))
-    return norm(A - J) <= max(atol, rtol * max(norm(A), normJ))
-end
-isapprox(A::AbstractMatrix, J::UniformScaling; kwargs...) = isapprox(J, A; kwargs...)
-
-"""
-    copyto!(dest::AbstractMatrix, src::UniformScaling)
-
-Copies a [`UniformScaling`](@ref) onto a matrix.
-
-!!! compat "Julia 1.1"
-    In Julia 1.0 this method only supported a square destination matrix. Julia 1.1. added
-    support for a rectangular matrix.
-"""
-function copyto!(A::AbstractMatrix, J::UniformScaling)
-    require_one_based_indexing(A)
-    fill!(A, 0)
-    λ = J.λ
-    for i = 1:min(size(A,1),size(A,2))
-        @inbounds A[i,i] = λ
-    end
-    return A
-end
-
-function copyto!(A::Diagonal, J::UniformScaling)
-    A.diag .= J.λ
-    return A
-end
-function copyto!(A::Union{Bidiagonal, SymTridiagonal}, J::UniformScaling)
-    A.ev .= 0
-    A.dv .= J.λ
-    return A
-end
-function copyto!(A::Tridiagonal, J::UniformScaling)
-    A.dl .= 0
-    A.du .= 0
-    A.d .= J.λ
-    return A
-end
-
-function cond(J::UniformScaling{T}) where T
-    onereal = inv(one(real(J.λ)))
-    return J.λ ≠ zero(T) ? onereal : oftype(onereal, Inf)
-end
-
-# promote_to_arrays(n,k, T, A...) promotes any UniformScaling matrices
-# in A to matrices of type T and sizes given by n[k:end].  n is an array
-# so that the same promotion code can be used for hvcat.  We pass the type T
-# so that we can re-use this code for sparse-matrix hcat etcetera.
-promote_to_arrays_(n::Int, ::Type, a::Number) = a
-promote_to_arrays_(n::Int, ::Type{Matrix}, J::UniformScaling{T}) where {T} = Matrix(J, n, n)
-promote_to_arrays_(n::Int, ::Type, A::AbstractVecOrMat) = A
-promote_to_arrays(n,k, ::Type) = ()
-promote_to_arrays(n,k, ::Type{T}, A) where {T} = (promote_to_arrays_(n[k], T, A),)
-promote_to_arrays(n,k, ::Type{T}, A, B) where {T} =
-    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B))
-promote_to_arrays(n,k, ::Type{T}, A, B, C) where {T} =
-    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays_(n[k+2], T, C))
-promote_to_arrays(n,k, ::Type{T}, A, B, Cs...) where {T} =
-    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays(n,k+2, T, Cs...)...)
-promote_to_array_type(A::Tuple{Vararg{Union{AbstractVecOrMat,UniformScaling,Number}}}) = Matrix
-
-_us2number(A) = A
-_us2number(J::UniformScaling) = J.λ
-
-for (f, _f, dim, name) in ((:hcat, :_hcat, 1, "rows"), (:vcat, :_vcat, 2, "cols"))
-    @eval begin
-        @inline $f(A::Union{AbstractVecOrMat,UniformScaling}...) = $_f(A...)
-        # if there's a Number present, J::UniformScaling must be 1x1-dimensional
-        @inline $f(A::Union{AbstractVecOrMat,UniformScaling,Number}...) = $f(map(_us2number, A)...)
-        function $_f(A::Union{AbstractVecOrMat,UniformScaling,Number}...; array_type = promote_to_array_type(A))
-            n = -1
-            for a in A
-                if !isa(a, UniformScaling)
-                    require_one_based_indexing(a)
-                    na = size(a,$dim)
-                    n >= 0 && n != na &&
-                        throw(DimensionMismatch(string("number of ", $name,
-                            " of each array must match (got ", n, " and ", na, ")")))
-                    n = na
-                end
-            end
-            n == -1 && throw(ArgumentError($("$f of only UniformScaling objects cannot determine the matrix size")))
-            return cat(promote_to_arrays(fill(n, length(A)), 1, array_type, A...)..., dims=Val(3-$dim))
-        end
-    end
-end
-
-hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling}...) = _hvcat(rows, A...)
-hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...) = _hvcat(rows, A...)
-function _hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...; array_type = promote_to_array_type(A))
-    require_one_based_indexing(A...)
-    nr = length(rows)
-    sum(rows) == length(A) || throw(ArgumentError("mismatch between row sizes and number of arguments"))
-    n = fill(-1, length(A))
-    needcols = false # whether we also need to infer some sizes from the column count
-    j = 0
-    for i = 1:nr # infer UniformScaling sizes from row counts, if possible:
-        ni = -1 # number of rows in this block-row, -1 indicates unknown
-        for k = 1:rows[i]
-            if !isa(A[j+k], UniformScaling)
-                na = size(A[j+k], 1)
-                ni >= 0 && ni != na &&
-                    throw(DimensionMismatch("mismatch in number of rows"))
-                ni = na
-            end
-        end
-        if ni >= 0
-            for k = 1:rows[i]
-                n[j+k] = ni
-            end
-        else # row consisted only of UniformScaling objects
-            needcols = true
-        end
-        j += rows[i]
-    end
-    if needcols # some sizes still unknown, try to infer from column count
-        nc = -1
-        j = 0
-        for i = 1:nr
-            nci = 0
-            rows[i] > 0 && n[j+1] == -1 && (j += rows[i]; continue)
-            for k = 1:rows[i]
-                nci += isa(A[j+k], UniformScaling) ? n[j+k] : size(A[j+k], 2)
-            end
-            nc >= 0 && nc != nci && throw(DimensionMismatch("mismatch in number of columns"))
-            nc = nci
-            j += rows[i]
-        end
-        nc == -1 && throw(ArgumentError("sizes of UniformScalings could not be inferred"))
-        j = 0
-        for i = 1:nr
-            if rows[i] > 0 && n[j+1] == -1 # this row consists entirely of UniformScalings
-                nci, r = divrem(nc, rows[i])
-                r != 0 && throw(DimensionMismatch("indivisible UniformScaling sizes"))
-                for k = 1:rows[i]
-                    n[j+k] = nci
-                end
-            end
-            j += rows[i]
-        end
-    end
-    Amat = promote_to_arrays(n, 1, array_type, A...)
-    # We have two methods for promote_to_array_type, one returning Matrix and
-    # another one returning SparseMatrixCSC (in SparseArrays.jl). In the dense
-    # case, we cannot call hvcat for the promoted UniformScalings because this
-    # causes a stack overflow. In the sparse case, however, we cannot call
-    # typed_hvcat because we need a sparse output.
-    if array_type == Matrix
-        return typed_hvcat(promote_eltype(Amat...), rows, Amat...)
-    else
-        return hvcat(rows, Amat...)
-    end
-end
-
-## Matrix construction from UniformScaling
-function Matrix{T}(s::UniformScaling, dims::Dims{2}) where {T}
-    A = zeros(T, dims)
-    v = T(s.λ)
-    for i in diagind(dims...)
-        @inbounds A[i] = v
-    end
-    return A
-end
-Matrix{T}(s::UniformScaling, m::Integer, n::Integer) where {T} = Matrix{T}(s, Dims((m, n)))
-Matrix(s::UniformScaling, m::Integer, n::Integer) = Matrix(s, Dims((m, n)))
-Matrix(s::UniformScaling, dims::Dims{2}) = Matrix{eltype(s)}(s, dims)
-Array{T}(s::UniformScaling, dims::Dims{2}) where {T} = Matrix{T}(s, dims)
-Array{T}(s::UniformScaling, m::Integer, n::Integer) where {T} = Matrix{T}(s, m, n)
-Array(s::UniformScaling, m::Integer, n::Integer) = Matrix(s, m, n)
-Array(s::UniformScaling, dims::Dims{2}) = Matrix(s, dims)
-
-dot(A::AbstractMatrix, J::UniformScaling) = dot(tr(A), J.λ)
-dot(J::UniformScaling, A::AbstractMatrix) = dot(J.λ, tr(A))
-
-dot(x::AbstractVector, J::UniformScaling, y::AbstractVector) = dot(x, J.λ, y)
-dot(x::AbstractVector, a::Number, y::AbstractVector) = sum(t -> dot(t[1], a, t[2]), zip(x, y))
-dot(x::AbstractVector, a::Union{Real,Complex}, y::AbstractVector) = a*dot(x, y)
-
-# muladd
-Base.muladd(A::UniformScaling, B::UniformScaling, z::UniformScaling) =
-    UniformScaling(A.λ * B.λ + z.λ)
diff --git a/stdlib/LinearAlgebra/test/abstractq.jl b/stdlib/LinearAlgebra/test/abstractq.jl
deleted file mode 100644
index 83a26c6050484..0000000000000
--- a/stdlib/LinearAlgebra/test/abstractq.jl
+++ /dev/null
@@ -1,100 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestAbstractQ
-
-using Test
-using LinearAlgebra
-using LinearAlgebra: AbstractQ, AdjointQ
-import LinearAlgebra: lmul!, rmul!
-import Base: size, convert
-
-n = 5
-
-@testset "custom AbstractQ type" begin
-    struct MyQ{T,S<:AbstractQ{T}} <: AbstractQ{T}
-        Q::S
-    end
-    MyQ{T}(Q::AbstractQ) where {T} = (P = convert(AbstractQ{T}, Q); MyQ{T,typeof(P)}(P))
-    MyQ(Q::MyQ) = Q
-
-    Base.size(Q::MyQ) = size(Q.Q)
-    LinearAlgebra.lmul!(Q::MyQ, B::AbstractVecOrMat) = lmul!(Q.Q, B)
-    LinearAlgebra.lmul!(adjQ::AdjointQ{<:Any,<:MyQ}, B::AbstractVecOrMat) = lmul!(parent(adjQ).Q', B)
-    LinearAlgebra.rmul!(A::AbstractVecOrMat, Q::MyQ) = rmul!(A, Q.Q)
-    LinearAlgebra.rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:MyQ}) = rmul!(A, parent(adjQ).Q')
-    Base.convert(::Type{AbstractQ{T}}, Q::MyQ) where {T} = MyQ{T}(Q.Q)
-    LinearAlgebra.det(Q::MyQ) = det(Q.Q)
-
-    for T in (Float64, ComplexF64)
-        A = rand(T, n, n)
-        F = qr(A)
-        Q = MyQ(F.Q)
-        @test ndims(Q) == 2
-        T <: Real && @test transpose(Q) == adjoint(Q)
-        T <: Complex && @test_throws ErrorException transpose(Q)
-        @test convert(AbstractQ{complex(T)}, Q) isa MyQ{complex(T)}
-        @test convert(AbstractQ{complex(T)}, Q') isa AdjointQ{<:complex(T),<:MyQ{complex(T)}}
-        @test Q*I ≈ Q.Q*I rtol=2eps(real(T))
-        @test Q'*I ≈ Q.Q'*I rtol=2eps(real(T))
-        @test I*Q ≈ Q.Q*I rtol=2eps(real(T))
-        @test I*Q' ≈ I*Q.Q' rtol=2eps(real(T))
-        @test abs(det(Q)) ≈ 1
-        @test logabsdet(Q)[1] ≈ 0 atol=2n*eps(real(T))
-        y = rand(T, n)
-        @test Q * y ≈ Q.Q * y ≈ Q' \ y ≈ ldiv!(Q', copy(y)) ≈ ldiv!(zero(y), Q', y)
-        @test Q'y ≈ Q.Q' * y ≈ Q \ y ≈ ldiv!(Q, copy(y)) ≈ ldiv!(zero(y), Q, y)
-        @test y'Q ≈ y'Q.Q ≈ y' / Q'
-        @test y'Q' ≈ y'Q.Q' ≈ y' / Q
-        y = Matrix(y')
-        @test y*Q ≈ y*Q.Q ≈ y / Q' ≈ rdiv!(copy(y), Q')
-        @test y*Q' ≈ y*Q.Q' ≈ y / Q ≈ rdiv!(copy(y), Q)
-        Y = rand(T, n, n); X = similar(Y)
-        for transQ in (identity, adjoint), transY in (identity, adjoint), Y in (Y, Y')
-            @test mul!(X, transQ(Q), transY(Y)) ≈ transQ(Q) * transY(Y) ≈ transQ(Q.Q) * transY(Y)
-            @test mul!(X, transY(Y), transQ(Q)) ≈ transY(Y) * transQ(Q) ≈ transY(Y) * transQ(Q.Q)
-        end
-        @test convert(Matrix, Q) ≈ Matrix(Q) ≈ Q[:,:] ≈ copyto!(zeros(T, size(Q)), Q) ≈ Q.Q*I
-        @test convert(Matrix, Q') ≈ Matrix(Q') ≈ (Q')[:,:] ≈ copyto!(zeros(T, size(Q)), Q') ≈ Q.Q'*I
-        @test Q[1,:] == Q.Q[1,:] == view(Q, 1, :)
-        @test Q[:,1] == Q.Q[:,1] == view(Q, :, 1)
-        @test Q[1,1] == Q.Q[1,1]
-        @test Q[:] == Q.Q[:]
-        @test Q[:,1:3] == Q.Q[:,1:3] == view(Q, :, 1:3)
-        @test Q[:,1:3] ≈ Matrix(Q)[:,1:3]
-        @test Q[2:3,2:3] == view(Q, 2:3, 2:3) ≈ Matrix(Q)[2:3,2:3]
-        @test_throws BoundsError Q[0,1]
-        @test_throws BoundsError Q[n+1,1]
-        @test_throws BoundsError Q[1,0]
-        @test_throws BoundsError Q[1,n+1]
-        @test_throws BoundsError Q[:,1:n+1]
-        @test_throws BoundsError Q[:,0:n]
-        for perm in ((1, 2), (2, 1))
-            P = PermutedDimsArray(zeros(T, size(Q)), perm)
-            @test copyto!(P, Q) ≈ Matrix(Q)
-        end
-        x = randn(T)
-        @test x * Q ≈ (x*I)*Q ≈ x * Q.Q
-        @test Q * x ≈ Q*(x*I) ≈ Q.Q * x
-        @test x * Q' ≈ (x*I)* Q' ≈ x * Q.Q'
-        @test Q' * x ≈ Q'*(x*I) ≈ Q.Q' * x
-        x = rand(T, 1)
-        Q = MyQ(qr(rand(T, 1, 1)).Q)
-        @test x * Q ≈ x * Q.Q
-        @test x * Q' ≈ x * Q.Q'
-        @test Q * x ≈ Q.Q * x
-        @test Q' * x ≈ Q.Q' * x
-    end
-    A = rand(Float64, 5, 3)
-    F = qr(A)
-    Q = MyQ(F.Q)
-    Prect = Matrix(F.Q)
-    Psquare = collect(F.Q)
-    @test Q == Prect
-    @test Q == Psquare
-    @test Q == F.Q*I
-    @test Q ≈ Prect
-    @test Q ≈ Psquare
-    @test Q ≈ F.Q*I
-end
-
-end # module
diff --git a/stdlib/LinearAlgebra/test/addmul.jl b/stdlib/LinearAlgebra/test/addmul.jl
deleted file mode 100644
index 3fff8289242f7..0000000000000
--- a/stdlib/LinearAlgebra/test/addmul.jl
+++ /dev/null
@@ -1,223 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestAddmul
-
-using Base: rtoldefault
-using Test
-using LinearAlgebra
-using LinearAlgebra: AbstractTriangular
-using Random
-
-_rand(::Type{T}) where {T <: AbstractFloat} = T(randn())
-_rand(::Type{T}) where {F, T <: Complex{F}} = T(_rand(F), _rand(F))
-_rand(::Type{T}) where {T <: Integer} =
-    T(rand(max(typemin(T), -10):min(typemax(T), 10)))
-_rand(::Type{BigInt}) = BigInt(_rand(Int))
-
-function _rand(A::Type{<:Array}, shape)
-    T = eltype(A)
-    data = T[_rand(T) for _ in 1:prod(shape)]
-    return copy(reshape(data, shape))
-end
-
-constructor_of(::Type{T}) where T = getfield(parentmodule(T), nameof(T))
-
-function _rand(A::Type{<: AbstractArray}, shape)
-    data = _rand(Array{eltype(A)}, shape)
-    T = constructor_of(A)
-    if A <: Union{Bidiagonal, Hermitian, Symmetric}
-        return T(data, rand([:U, :L]))
-        # Maybe test with both :U and :L?
-    end
-    return T(data)
-end
-
-_rand(A::Type{<: SymTridiagonal{T}}, shape) where {T} =
-    SymTridiagonal(_rand(Symmetric{T}, shape))
-
-const FloatOrC = Union{AbstractFloat, Complex{<: AbstractFloat}}
-const IntegerOrC = Union{Integer, Complex{<: Integer}}
-const LTri = Union{LowerTriangular, UnitLowerTriangular, Diagonal}
-const UTri = Union{UpperTriangular, UnitUpperTriangular, Diagonal}
-
-needsquare(::Type{<:Matrix}) = false
-needsquare(::Type) = true
-
-testdata = []
-
-sizecandidates = 1:4
-floattypes = [
-    Float64, Float32, ComplexF64, ComplexF32,  # BlasFloat
-    BigFloat,
-]
-inttypes = [
-    Int,
-    BigInt,
-]
-# `Bool` can be added to `inttypes` but it's hard to handle
-# `InexactError` bug that is mentioned in:
-# https://github.com/JuliaLang/julia/issues/30094#issuecomment-440175887
-alleltypes = [floattypes; inttypes]
-celtypes = [Float64, ComplexF64, BigFloat, Int]
-
-mattypes = [
-    Matrix,
-    Bidiagonal,
-    Diagonal,
-    Hermitian,
-    LowerTriangular,
-    SymTridiagonal,
-    Symmetric,
-    Tridiagonal,
-    UnitLowerTriangular,
-    UnitUpperTriangular,
-    UpperTriangular,
-]
-
-isnanfillable(::AbstractArray) = false
-isnanfillable(::Array{<:AbstractFloat}) = true
-isnanfillable(A::AbstractArray{<:AbstractFloat}) = parent(A) isa Array
-
-"""
-Sample `n` elements from `S` on average but make sure at least one
-element is sampled.
-"""
-function sample(S, n::Real)
-    length(S) <= n && return S
-    xs = randsubseq(S, n / length(S))
-    return length(xs) > 0 ? xs : rand(S, 1)  # sample at least one
-end
-
-function inputeltypes(celt, alleltypes = alleltypes)
-    # Skip if destination type is "too small"
-    celt <: Bool && return []
-    filter(alleltypes) do aelt
-        celt <: Real && aelt <: Complex && return false
-        !(celt <: BigFloat) && aelt <: BigFloat && return false
-        !(celt <: BigInt) && aelt <: BigInt && return false
-        celt <: IntegerOrC && aelt <: FloatOrC && return false
-        if celt <: IntegerOrC && !(celt <: BigInt)
-            typemin(celt) > typemin(aelt) && return false
-            typemax(celt) < typemax(aelt) && return false
-        end
-        return true
-    end
-end
-# Note: using `randsubseq` instead of `rand` to avoid repetition.
-
-function inputmattypes(cmat, mattypes = mattypes)
-    # Skip if destination type is "too small"
-    cmat <: Union{Bidiagonal, Tridiagonal, SymTridiagonal,
-                  UnitLowerTriangular, UnitUpperTriangular,
-                  Hermitian, Symmetric} && return []
-    filter(mattypes) do amat
-        cmat <: Diagonal && (amat <: Diagonal || return false)
-        cmat <: LowerTriangular && (amat <: LTri || return false)
-        cmat <: UpperTriangular && (amat <: UTri || return false)
-        return true
-    end
-end
-
-n_samples = 1.5
-# n_samples = Inf  # to try all combinations
-for cmat in mattypes,
-    amat in sample(inputmattypes(cmat), n_samples),
-    bmat in sample(inputmattypes(cmat), n_samples),
-    celt in celtypes,
-    aelt in sample(inputeltypes(celt), n_samples),
-    belt in sample(inputeltypes(celt), n_samples)
-
-    push!(testdata, (cmat{celt}, amat{aelt}, bmat{belt}))
-end
-
-@testset "mul!(::$TC, ::$TA, ::$TB, α, β)" for (TC, TA, TB) in testdata
-    if needsquare(TA)
-        na1 = na2 = rand(sizecandidates)
-    else
-        na1, na2 = rand(sizecandidates, 2)
-    end
-    if needsquare(TB)
-        nb2 = na2
-    elseif needsquare(TC)
-        nb2 = na1
-    else
-        nb2 = rand(sizecandidates)
-    end
-    asize = (na1, na2)
-    bsize = (na2, nb2)
-    csize = (na1, nb2)
-
-    @testset for α in Any[true, eltype(TC)(1), _rand(eltype(TC))],
-                 β in Any[false, eltype(TC)(0), _rand(eltype(TC))]
-
-        C = _rand(TC, csize)
-        A = _rand(TA, asize)
-        B = _rand(TB, bsize)
-
-        # This is similar to how `isapprox` choose `rtol` (when
-        # `atol=0`) but consider all number types involved:
-        rtol = max(rtoldefault.(real.(eltype.((C, A, B))))...,
-                   rtoldefault.(real.(typeof.((α, β))))...)
-
-        Cc = copy(C)
-        Ac = Matrix(A)
-        Bc = Matrix(B)
-        returned_mat = mul!(C, A, B, α, β)
-        @test returned_mat === C
-        # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
-        @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
-
-        y = C[:, 1]
-        x = B[:, 1]
-        yc = Vector(y)
-        xc = Vector(x)
-        returned_vec = mul!(y, A, x, α, β)
-        @test returned_vec === y
-        @test collect(returned_vec) ≈ α * Ac * xc + β * yc  rtol=rtol
-
-        if TC <: Matrix
-            @testset "adjoint and transpose" begin
-                @testset for fa in [identity, adjoint, transpose],
-                             fb in [identity, adjoint, transpose]
-                    fa === fb === identity && continue
-
-                    Af = fa === identity ? A : fa(_rand(TA, reverse(asize)))
-                    Bf = fb === identity ? B : fb(_rand(TB, reverse(bsize)))
-
-                    Ac = collect(Af)
-                    Bc = collect(Bf)
-                    Cc = collect(C)
-
-                    returned_mat = mul!(C, Af, Bf, α, β)
-                    @test returned_mat === C
-                    # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
-                    @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
-                end
-            end
-        end
-
-        if isnanfillable(C)
-            @testset "β = 0 ignores C .= NaN" begin
-                parent(C) .= NaN
-                Ac = Matrix(A)
-                Bc = Matrix(B)
-                returned_mat = mul!(C, A, B, α, zero(eltype(C)))
-                @test returned_mat === C
-                # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
-                @test_skip collect(returned_mat) ≈ α * Ac * Bc  rtol=rtol
-            end
-        end
-
-        if isnanfillable(A)
-            @testset "α = 0 ignores A .= NaN" begin
-                parent(A) .= NaN
-                Cc = copy(C)
-                returned_mat = mul!(C, A, B, zero(eltype(A)), β)
-                @test returned_mat === C
-                @test collect(returned_mat) ≈ β * Cc  rtol=rtol
-            end
-        end
-    end
-end
-
-end  # module
diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl
deleted file mode 100644
index 2362ec7fb28f2..0000000000000
--- a/stdlib/LinearAlgebra/test/adjtrans.jl
+++ /dev/null
@@ -1,674 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestAdjointTranspose
-
-using Test, LinearAlgebra
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-
-@testset "Adjoint and Transpose inner constructor basics" begin
-    intvec, intmat = [1, 2], [1 2; 3 4]
-    # Adjoint/Transpose eltype must match the type of the Adjoint/Transpose of the input eltype
-    @test_throws TypeError Adjoint{Float64,Vector{Int}}(intvec)[1,1]
-    @test_throws TypeError Adjoint{Float64,Matrix{Int}}(intmat)[1,1]
-    @test_throws TypeError Transpose{Float64,Vector{Int}}(intvec)[1,1]
-    @test_throws TypeError Transpose{Float64,Matrix{Int}}(intmat)[1,1]
-    # Adjoint/Transpose wrapped array type must match the input array type
-    @test_throws TypeError Adjoint{Int,Vector{Float64}}(intvec)[1,1]
-    @test_throws TypeError Adjoint{Int,Matrix{Float64}}(intmat)[1,1]
-    @test_throws TypeError Transpose{Int,Vector{Float64}}(intvec)[1,1]
-    @test_throws TypeError Transpose{Int,Matrix{Float64}}(intmat)[1,1]
-    # Adjoint/Transpose inner constructor basic functionality, concrete scalar eltype
-    @test (Adjoint{Int,Vector{Int}}(intvec)::Adjoint{Int,Vector{Int}}).parent === intvec
-    @test (Adjoint{Int,Matrix{Int}}(intmat)::Adjoint{Int,Matrix{Int}}).parent === intmat
-    @test (Transpose{Int,Vector{Int}}(intvec)::Transpose{Int,Vector{Int}}).parent === intvec
-    @test (Transpose{Int,Matrix{Int}}(intmat)::Transpose{Int,Matrix{Int}}).parent === intmat
-    # Adjoint/Transpose inner constructor basic functionality, abstract scalar eltype
-    anyvec, anymat = Any[1, 2], Any[1 2; 3 4]
-    @test (Adjoint{Any,Vector{Any}}(anyvec)::Adjoint{Any,Vector{Any}}).parent === anyvec
-    @test (Adjoint{Any,Matrix{Any}}(anymat)::Adjoint{Any,Matrix{Any}}).parent === anymat
-    @test (Transpose{Any,Vector{Any}}(anyvec)::Transpose{Any,Vector{Any}}).parent === anyvec
-    @test (Transpose{Any,Matrix{Any}}(anymat)::Transpose{Any,Matrix{Any}}).parent === anymat
-    # Adjoint/Transpose inner constructor basic functionality, concrete array eltype
-    intvecvec = [[1, 2], [3, 4]]
-    intmatmat = [[[1 2]] [[3 4]] [[5 6]]; [[7 8]] [[9 10]] [[11 12]]]
-    @test (X = Adjoint{Adjoint{Int,Vector{Int}},Vector{Vector{Int}}}(intvecvec);
-            isa(X, Adjoint{Adjoint{Int,Vector{Int}},Vector{Vector{Int}}}) && X.parent === intvecvec)
-    @test (X = Adjoint{Adjoint{Int,Matrix{Int}},Matrix{Matrix{Int}}}(intmatmat);
-            isa(X, Adjoint{Adjoint{Int,Matrix{Int}},Matrix{Matrix{Int}}}) && X.parent === intmatmat)
-    @test (X = Transpose{Transpose{Int,Vector{Int}},Vector{Vector{Int}}}(intvecvec);
-            isa(X, Transpose{Transpose{Int,Vector{Int}},Vector{Vector{Int}}}) && X.parent === intvecvec)
-    @test (X = Transpose{Transpose{Int,Matrix{Int}},Matrix{Matrix{Int}}}(intmatmat);
-            isa(X, Transpose{Transpose{Int,Matrix{Int}},Matrix{Matrix{Int}}}) && X.parent === intmatmat)
-end
-
-@testset "Adjoint and Transpose outer constructor basics" begin
-    intvec, intmat = [1, 2], [1 2; 3 4]
-    # the wrapped array's eltype strictly determines the Adjoint/Transpose eltype
-    # so Adjoint{T}/Transpose{T} constructors are somewhat unnecessary and error-prone
-    # so ascertain that such calls throw whether or not T and the input eltype are compatible
-    @test_throws MethodError Adjoint{Int}(intvec)
-    @test_throws MethodError Adjoint{Int}(intmat)
-    @test_throws MethodError Adjoint{Float64}(intvec)
-    @test_throws MethodError Adjoint{Float64}(intmat)
-    @test_throws MethodError Transpose{Int}(intvec)
-    @test_throws MethodError Transpose{Int}(intmat)
-    @test_throws MethodError Transpose{Float64}(intvec)
-    @test_throws MethodError Transpose{Float64}(intmat)
-    # Adjoint/Transpose outer constructor basic functionality, concrete scalar eltype
-    @test (Adjoint(intvec)::Adjoint{Int,Vector{Int}}).parent === intvec
-    @test (Adjoint(intmat)::Adjoint{Int,Matrix{Int}}).parent === intmat
-    @test (Transpose(intvec)::Transpose{Int,Vector{Int}}).parent === intvec
-    @test (Transpose(intmat)::Transpose{Int,Matrix{Int}}).parent === intmat
-    # the tests for the inner constructors exercise abstract scalar and concrete array eltype, forgoing here
-end
-
-@testset "Adjoint and Transpose add additional layers to already-wrapped objects" begin
-    intvec, intmat = [1, 2], [1 2; 3 4]
-    @test (A = Adjoint(Adjoint(intvec))::Adjoint{Int,Adjoint{Int,Vector{Int}}}; A.parent.parent === intvec)
-    @test (A = Adjoint(Adjoint(intmat))::Adjoint{Int,Adjoint{Int,Matrix{Int}}}; A.parent.parent === intmat)
-    @test (A = Transpose(Transpose(intvec))::Transpose{Int,Transpose{Int,Vector{Int}}}; A.parent.parent === intvec)
-    @test (A = Transpose(Transpose(intmat))::Transpose{Int,Transpose{Int,Matrix{Int}}}; A.parent.parent === intmat)
-end
-
-@testset "Adjoint and Transpose basic AbstractArray functionality" begin
-    # vectors and matrices with real scalar eltype, and their adjoints/transposes
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    tintvec, tintmat = [1 2], [1 4; 2 5; 3 6]
-    @testset "length methods" begin
-        @test length(Adjoint(intvec)) == length(intvec)
-        @test length(Adjoint(intmat)) == length(intmat)
-        @test length(Transpose(intvec)) == length(intvec)
-        @test length(Transpose(intmat)) == length(intmat)
-    end
-    @testset "size methods" begin
-        @test size(Adjoint(intvec)) == (1, length(intvec))
-        @test size(Adjoint(intmat)) == reverse(size(intmat))
-        @test size(Transpose(intvec)) == (1, length(intvec))
-        @test size(Transpose(intmat)) == reverse(size(intmat))
-    end
-    @testset "indices methods" begin
-        @test axes(Adjoint(intvec)) == (Base.OneTo(1), Base.OneTo(length(intvec)))
-        @test axes(Adjoint(intmat)) == reverse(axes(intmat))
-        @test axes(Transpose(intvec)) == (Base.OneTo(1), Base.OneTo(length(intvec)))
-        @test axes(Transpose(intmat)) == reverse(axes(intmat))
-    end
-    @testset "IndexStyle methods" begin
-        @test IndexStyle(Adjoint(intvec)) == IndexLinear()
-        @test IndexStyle(Adjoint(intmat)) == IndexCartesian()
-        @test IndexStyle(Transpose(intvec)) == IndexLinear()
-        @test IndexStyle(Transpose(intmat)) == IndexCartesian()
-    end
-    # vectors and matrices with complex scalar eltype, and their adjoints/transposes
-    complexintvec, complexintmat = [1im, 2im], [1im 2im 3im; 4im 5im 6im]
-    tcomplexintvec, tcomplexintmat = [1im 2im], [1im 4im; 2im 5im; 3im 6im]
-    acomplexintvec, acomplexintmat = conj.(tcomplexintvec), conj.(tcomplexintmat)
-    # vectors and matrices with real-vector and real-matrix eltype, and their adjoints/transposes
-    intvecvec = [[1, 2], [3, 4]]
-    tintvecvec = [[[1 2]] [[3 4]]]
-    intmatmat = [[[1 2]] [[3  4]] [[ 5  6]];
-                 [[7 8]] [[9 10]] [[11 12]]]
-    tintmatmat = [[hcat([1, 2])] [hcat([7, 8])];
-                  [hcat([3, 4])] [hcat([9, 10])];
-                  [hcat([5, 6])] [hcat([11, 12])]]
-    # vectors and matrices with complex-vector and complex-matrix eltype, and their adjoints/transposes
-    complexintvecvec, complexintmatmat = im .* (intvecvec, intmatmat)
-    tcomplexintvecvec, tcomplexintmatmat = im .* (tintvecvec, tintmatmat)
-    acomplexintvecvec, acomplexintmatmat = conj.(tcomplexintvecvec), conj.(tcomplexintmatmat)
-    @testset "getindex methods, elementary" begin
-        # implicitly test elementary definitions, for arrays with concrete real scalar eltype
-        @test Adjoint(intvec) == tintvec
-        @test Adjoint(intmat) == tintmat
-        @test Transpose(intvec) == tintvec
-        @test Transpose(intmat) == tintmat
-        # implicitly test elementary definitions, for arrays with concrete complex scalar eltype
-        @test Adjoint(complexintvec) == acomplexintvec
-        @test Adjoint(complexintmat) == acomplexintmat
-        @test Transpose(complexintvec) == tcomplexintvec
-        @test Transpose(complexintmat) == tcomplexintmat
-        # implicitly test elementary definitions, for arrays with concrete real-array eltype
-        @test Adjoint(intvecvec) == tintvecvec
-        @test Adjoint(intmatmat) == tintmatmat
-        @test Transpose(intvecvec) == tintvecvec
-        @test Transpose(intmatmat) == tintmatmat
-        # implicitly test elementary definitions, for arrays with concrete complex-array type
-        @test Adjoint(complexintvecvec) == acomplexintvecvec
-        @test Adjoint(complexintmatmat) == acomplexintmatmat
-        @test Transpose(complexintvecvec) == tcomplexintvecvec
-        @test Transpose(complexintmatmat) == tcomplexintmatmat
-    end
-    @testset "getindex(::AdjOrTransVec, ::Colon, ::AbstractArray{Int}) methods that preserve wrapper type" begin
-        # for arrays with concrete scalar eltype
-        @test Adjoint(intvec)[:, [1, 2]] == Adjoint(intvec)
-        @test Transpose(intvec)[:, [1, 2]] == Transpose(intvec)
-        @test Adjoint(complexintvec)[:, [1, 2]] == Adjoint(complexintvec)
-        @test Transpose(complexintvec)[:, [1, 2]] == Transpose(complexintvec)
-        # for arrays with concrete array eltype
-        @test Adjoint(intvecvec)[:, [1, 2]] == Adjoint(intvecvec)
-        @test Transpose(intvecvec)[:, [1, 2]] == Transpose(intvecvec)
-        @test Adjoint(complexintvecvec)[:, [1, 2]] == Adjoint(complexintvecvec)
-        @test Transpose(complexintvecvec)[:, [1, 2]] == Transpose(complexintvecvec)
-    end
-    @testset "getindex(::AdjOrTransVec, ::Colon, ::Colon) methods that preserve wrapper type" begin
-        # for arrays with concrete scalar eltype
-        @test Adjoint(intvec)[:, :] == Adjoint(intvec)
-        @test Transpose(intvec)[:, :] == Transpose(intvec)
-        @test Adjoint(complexintvec)[:, :] == Adjoint(complexintvec)
-        @test Transpose(complexintvec)[:, :] == Transpose(complexintvec)
-        # for arrays with concrete array elype
-        @test Adjoint(intvecvec)[:, :] == Adjoint(intvecvec)
-        @test Transpose(intvecvec)[:, :] == Transpose(intvecvec)
-        @test Adjoint(complexintvecvec)[:, :] == Adjoint(complexintvecvec)
-        @test Transpose(complexintvecvec)[:, :] == Transpose(complexintvecvec)
-    end
-    @testset "getindex(::AdjOrTransVec, ::Colon, ::Int) should preserve wrapper type on result entries" begin
-        # for arrays with concrete scalar eltype
-        @test Adjoint(intvec)[:, 2] == intvec[2:2]
-        @test Transpose(intvec)[:, 2] == intvec[2:2]
-        @test Adjoint(complexintvec)[:, 2] == conj.(complexintvec[2:2])
-        @test Transpose(complexintvec)[:, 2] == complexintvec[2:2]
-        # for arrays with concrete array eltype
-        @test Adjoint(intvecvec)[:, 2] == Adjoint.(intvecvec[2:2])
-        @test Transpose(intvecvec)[:, 2] == Transpose.(intvecvec[2:2])
-        @test Adjoint(complexintvecvec)[:, 2] == Adjoint.(complexintvecvec[2:2])
-        @test Transpose(complexintvecvec)[:, 2] == Transpose.(complexintvecvec[2:2])
-    end
-    @testset "setindex! methods" begin
-        # for vectors with real scalar eltype
-        @test (wv = Adjoint(copy(intvec));
-                wv === setindex!(wv, 3, 2) &&
-                 wv == setindex!(copy(tintvec), 3, 1, 2)    )
-        @test (wv = Transpose(copy(intvec));
-                wv === setindex!(wv, 4, 2) &&
-                 wv == setindex!(copy(tintvec), 4, 1, 2)    )
-        # for matrices with real scalar eltype
-        @test (wA = Adjoint(copy(intmat));
-                wA === setindex!(wA, 7, 3, 1) &&
-                 wA == setindex!(copy(tintmat), 7, 3, 1)    )
-        @test (wA = Transpose(copy(intmat));
-                wA === setindex!(wA, 7, 3, 1) &&
-                 wA == setindex!(copy(tintmat), 7, 3, 1)    )
-        # for vectors with complex scalar eltype
-        @test (wz = Adjoint(copy(complexintvec));
-                wz === setindex!(wz, 3im, 2) &&
-                 wz == setindex!(copy(acomplexintvec), 3im, 1, 2)   )
-        @test (wz = Transpose(copy(complexintvec));
-                wz === setindex!(wz, 4im, 2) &&
-                 wz == setindex!(copy(tcomplexintvec), 4im, 1, 2)   )
-        # for  matrices with complex scalar eltype
-        @test (wZ = Adjoint(copy(complexintmat));
-                wZ === setindex!(wZ, 7im, 3, 1) &&
-                 wZ == setindex!(copy(acomplexintmat), 7im, 3, 1)   )
-        @test (wZ = Transpose(copy(complexintmat));
-                wZ === setindex!(wZ, 7im, 3, 1) &&
-                 wZ == setindex!(copy(tcomplexintmat), 7im, 3, 1)   )
-        # for vectors with concrete real-vector eltype
-        @test (wv = Adjoint(copy(intvecvec));
-                wv === setindex!(wv, Adjoint([5, 6]), 2) &&
-                 wv == setindex!(copy(tintvecvec), [5 6], 2))
-        @test (wv = Transpose(copy(intvecvec));
-                wv === setindex!(wv, Transpose([5, 6]), 2) &&
-                 wv == setindex!(copy(tintvecvec), [5 6], 2))
-        # for matrices with concrete real-matrix eltype
-        @test (wA = Adjoint(copy(intmatmat));
-                wA === setindex!(wA, Adjoint([13 14]), 3, 1) &&
-                 wA == setindex!(copy(tintmatmat), hcat([13, 14]), 3, 1))
-        @test (wA = Transpose(copy(intmatmat));
-                wA === setindex!(wA, Transpose([13 14]), 3, 1) &&
-                 wA == setindex!(copy(tintmatmat), hcat([13, 14]), 3, 1))
-        # for vectors with concrete complex-vector eltype
-        @test (wz = Adjoint(copy(complexintvecvec));
-                wz === setindex!(wz, Adjoint([5im, 6im]), 2) &&
-                 wz == setindex!(copy(acomplexintvecvec), [-5im -6im], 2))
-        @test (wz = Transpose(copy(complexintvecvec));
-                wz === setindex!(wz, Transpose([5im, 6im]), 2) &&
-                 wz == setindex!(copy(tcomplexintvecvec), [5im 6im], 2))
-        # for matrices with concrete complex-matrix eltype
-        @test (wZ = Adjoint(copy(complexintmatmat));
-                wZ === setindex!(wZ, Adjoint([13im 14im]), 3, 1) &&
-                 wZ == setindex!(copy(acomplexintmatmat), hcat([-13im, -14im]), 3, 1))
-        @test (wZ = Transpose(copy(complexintmatmat));
-                wZ === setindex!(wZ, Transpose([13im 14im]), 3, 1) &&
-                 wZ == setindex!(copy(tcomplexintmatmat), hcat([13im, 14im]), 3, 1))
-    end
-end
-
-@testset "Adjoint and Transpose convert methods that convert underlying storage" begin
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    @test convert(Adjoint{Float64,Vector{Float64}}, Adjoint(intvec))::Adjoint{Float64,Vector{Float64}} == Adjoint(intvec)
-    @test convert(Adjoint{Float64,Matrix{Float64}}, Adjoint(intmat))::Adjoint{Float64,Matrix{Float64}} == Adjoint(intmat)
-    @test convert(Transpose{Float64,Vector{Float64}}, Transpose(intvec))::Transpose{Float64,Vector{Float64}} == Transpose(intvec)
-    @test convert(Transpose{Float64,Matrix{Float64}}, Transpose(intmat))::Transpose{Float64,Matrix{Float64}} == Transpose(intmat)
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Adjoint and Transpose convert methods to AbstractArray" begin
-    # tests corresponding to #34995
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    statvec = ImmutableArray(intvec)
-    statmat = ImmutableArray(intmat)
-
-    @test convert(AbstractArray{Float64}, Adjoint(statvec))::Adjoint{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Adjoint(statvec)
-    @test convert(AbstractArray{Float64}, Adjoint(statmat))::Array{Float64,2} == Adjoint(statmat)
-    @test convert(AbstractArray{Float64}, Transpose(statvec))::Transpose{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Transpose(statvec)
-    @test convert(AbstractArray{Float64}, Transpose(statmat))::Array{Float64,2} == Transpose(statmat)
-    @test convert(AbstractMatrix{Float64}, Adjoint(statvec))::Adjoint{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Adjoint(statvec)
-    @test convert(AbstractMatrix{Float64}, Adjoint(statmat))::Array{Float64,2} == Adjoint(statmat)
-    @test convert(AbstractMatrix{Float64}, Transpose(statvec))::Transpose{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Transpose(statvec)
-    @test convert(AbstractMatrix{Float64}, Transpose(statmat))::Array{Float64,2} == Transpose(statmat)
-end
-
-@testset "Adjoint and Transpose similar methods" begin
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    # similar with no additional specifications, vector (rewrapping) semantics
-    @test size(similar(Adjoint(intvec))::Adjoint{Int,Vector{Int}}) == size(Adjoint(intvec))
-    @test size(similar(Transpose(intvec))::Transpose{Int,Vector{Int}}) == size(Transpose(intvec))
-    # similar with no additional specifications, matrix (no-rewrapping) semantics
-    @test size(similar(Adjoint(intmat))::Matrix{Int}) == size(Adjoint(intmat))
-    @test size(similar(Transpose(intmat))::Matrix{Int}) == size(Transpose(intmat))
-    # similar with element type specification, vector (rewrapping) semantics
-    @test size(similar(Adjoint(intvec), Float64)::Adjoint{Float64,Vector{Float64}}) == size(Adjoint(intvec))
-    @test size(similar(Transpose(intvec), Float64)::Transpose{Float64,Vector{Float64}}) == size(Transpose(intvec))
-    # similar with element type specification, matrix (no-rewrapping) semantics
-    @test size(similar(Adjoint(intmat), Float64)::Matrix{Float64}) == size(Adjoint(intmat))
-    @test size(similar(Transpose(intmat), Float64)::Matrix{Float64}) == size(Transpose(intmat))
-    # similar with element type and arbitrary dims specifications
-    shape = (2, 2, 2)
-    @test size(similar(Adjoint(intvec), Float64, shape)::Array{Float64,3}) == shape
-    @test size(similar(Adjoint(intmat), Float64, shape)::Array{Float64,3}) == shape
-    @test size(similar(Transpose(intvec), Float64, shape)::Array{Float64,3}) == shape
-    @test size(similar(Transpose(intmat), Float64, shape)::Array{Float64,3}) == shape
-end
-
-@testset "Adjoint and Transpose parent methods" begin
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    @test parent(Adjoint(intvec)) === intvec
-    @test parent(Adjoint(intmat)) === intmat
-    @test parent(Transpose(intvec)) === intvec
-    @test parent(Transpose(intmat)) === intmat
-end
-
-@testset "Adjoint and Transpose vector vec methods" begin
-    intvec = [1, 2]
-    @test vec(Adjoint(intvec)) === intvec
-    @test vec(Transpose(intvec)) === intvec
-    cvec = [1 + 1im]
-    @test vec(cvec')[1] == cvec[1]'
-    mvec = [[1 2; 3 4+5im]];
-    @test vec(transpose(mvec))[1] == transpose(mvec[1])
-    @test vec(adjoint(mvec))[1] == adjoint(mvec[1])
-end
-
-@testset "horizontal concatenation of Adjoint/Transpose-wrapped vectors and Numbers" begin
-    # horizontal concatenation of Adjoint/Transpose-wrapped vectors and Numbers
-    # should preserve the Adjoint/Transpose-wrapper to preserve semantics downstream
-    vec, tvec, avec = [1im, 2im], [1im 2im], [-1im -2im]
-    vecvec = [[1im, 2im], [3im, 4im]]
-    tvecvec = [[[1im 2im]] [[3im 4im]]]
-    avecvec = [[[-1im -2im]] [[-3im -4im]]]
-    # for arrays with concrete scalar eltype
-    @test hcat(Adjoint(vec), Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == hcat(avec, avec)
-    @test hcat(Adjoint(vec), 1, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == hcat(avec, 1, avec)
-    @test hcat(Transpose(vec), Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == hcat(tvec, tvec)
-    @test hcat(Transpose(vec), 1, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == hcat(tvec, 1, tvec)
-    # for arrays with concrete array eltype
-    @test hcat(Adjoint(vecvec), Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == hcat(avecvec, avecvec)
-    @test hcat(Transpose(vecvec), Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == hcat(tvecvec, tvecvec)
-end
-
-@testset "map/broadcast over Adjoint/Transpose-wrapped vectors and Numbers" begin
-    # map and broadcast over Adjoint/Transpose-wrapped vectors and Numbers
-    # should preserve the Adjoint/Transpose-wrapper to preserve semantics downstream
-    vec, tvec, avec = [1im, 2im], [1im 2im], [-1im -2im]
-    vecvec = [[1im, 2im], [3im, 4im]]
-    tvecvec = [[[1im 2im]] [[3im 4im]]]
-    avecvec = [[[-1im -2im]] [[-3im -4im]]]
-    # unary map over wrapped vectors with concrete scalar eltype
-    @test map(-, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == -avec
-    @test map(-, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == -tvec
-    # unary map over wrapped vectors with concrete array eltype
-    @test map(-, Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == -avecvec
-    @test map(-, Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == -tvecvec
-    # binary map over wrapped vectors with concrete scalar eltype
-    @test map(+, Adjoint(vec), Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == avec + avec
-    @test map(+, Transpose(vec), Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec
-    # binary map over wrapped vectors with concrete array eltype
-    @test map(+, Adjoint(vecvec), Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == avecvec + avecvec
-    @test map(+, Transpose(vecvec), Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == tvecvec + tvecvec
-    # unary broadcast over wrapped vectors with concrete scalar eltype
-    @test broadcast(-, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == -avec
-    @test broadcast(-, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == -tvec
-    # unary broadcast over wrapped vectors with concrete array eltype
-    @test broadcast(-, Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == -avecvec
-    @test broadcast(-, Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == -tvecvec
-    # binary broadcast over wrapped vectors with concrete scalar eltype
-    @test broadcast(+, Adjoint(vec), Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == avec + avec
-    @test broadcast(+, Transpose(vec), Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec
-    # binary broadcast over wrapped vectors with concrete array eltype
-    @test broadcast(+, Adjoint(vecvec), Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == avecvec + avecvec
-    @test broadcast(+, Transpose(vecvec), Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == tvecvec + tvecvec
-    # trinary broadcast over wrapped vectors with concrete scalar eltype and numbers
-    @test broadcast(+, Adjoint(vec), 1, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == avec + avec .+ 1
-    @test broadcast(+, Transpose(vec), 1, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec .+ 1
-    @test broadcast(+, Adjoint(vec), 1im, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == avec + avec .+ 1im
-    @test broadcast(+, Transpose(vec), 1im, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec .+ 1im
-end
-
-@testset "Adjoint/Transpose-wrapped vector multiplication" begin
-    realvec, realmat = [1, 2, 3], [1 2 3; 4 5 6; 7 8 9]
-    complexvec, complexmat = [1im, 2, -3im], [1im 2 3; 4 5 -6im; 7im 8 9]
-    # Adjoint/Transpose-vector * vector
-    @test Adjoint(realvec) * realvec == dot(realvec, realvec)
-    @test Transpose(realvec) * realvec == dot(realvec, realvec)
-    @test Adjoint(complexvec) * complexvec == dot(complexvec, complexvec)
-    @test Transpose(complexvec) * complexvec == dot(conj(complexvec), complexvec)
-    # vector * Adjoint/Transpose-vector
-    @test realvec * Adjoint(realvec) == broadcast(*, realvec, reshape(realvec, (1, 3)))
-    @test realvec * Transpose(realvec) == broadcast(*, realvec, reshape(realvec, (1, 3)))
-    @test complexvec * Adjoint(complexvec) == broadcast(*, complexvec, reshape(conj(complexvec), (1, 3)))
-    @test complexvec * Transpose(complexvec) == broadcast(*, complexvec, reshape(complexvec, (1, 3)))
-    # Adjoint/Transpose-vector * matrix
-    @test (Adjoint(realvec) * realmat)::Adjoint{Int,Vector{Int}} ==
-        reshape(copy(Adjoint(realmat)) * realvec, (1, 3))
-    @test (Transpose(realvec) * realmat)::Transpose{Int,Vector{Int}} ==
-        reshape(copy(Transpose(realmat)) * realvec, (1, 3))
-    @test (Adjoint(complexvec) * complexmat)::Adjoint{Complex{Int},Vector{Complex{Int}}} ==
-        reshape(conj(copy(Adjoint(complexmat)) * complexvec), (1, 3))
-    @test (Transpose(complexvec) * complexmat)::Transpose{Complex{Int},Vector{Complex{Int}}} ==
-        reshape(copy(Transpose(complexmat)) * complexvec, (1, 3))
-    # Adjoint/Transpose-vector * Adjoint/Transpose-matrix
-    @test (Adjoint(realvec) * Adjoint(realmat))::Adjoint{Int,Vector{Int}} ==
-        reshape(realmat * realvec, (1, 3))
-    @test (Transpose(realvec) * Transpose(realmat))::Transpose{Int,Vector{Int}} ==
-        reshape(realmat * realvec, (1, 3))
-    @test (Adjoint(complexvec) * Adjoint(complexmat))::Adjoint{Complex{Int},Vector{Complex{Int}}} ==
-        reshape(conj(complexmat * complexvec), (1, 3))
-    @test (Transpose(complexvec) * Transpose(complexmat))::Transpose{Complex{Int},Vector{Complex{Int}}} ==
-        reshape(complexmat * complexvec, (1, 3))
-end
-
-@testset "Adjoint/Transpose-wrapped vector pseudoinversion" begin
-    realvec, complexvec = [1, 2, 3, 4], [1im, 2, 3im, 4]
-    rowrealvec, rowcomplexvec = reshape(realvec, (1, 4)), reshape(complexvec, (1, 4))
-    # pinv(Adjoint/Transpose-vector) should match matrix equivalents
-    # TODO tighten type asserts once pinv yields Transpose/Adjoint
-    @test pinv(Adjoint(realvec))::Vector{Float64} ≈ pinv(rowrealvec)
-    @test pinv(Transpose(realvec))::Vector{Float64} ≈ pinv(rowrealvec)
-    @test pinv(Adjoint(complexvec))::Vector{ComplexF64} ≈ pinv(conj(rowcomplexvec))
-    @test pinv(Transpose(complexvec))::Vector{ComplexF64} ≈ pinv(rowcomplexvec)
-end
-
-@testset "Adjoint/Transpose-wrapped vector left-division" begin
-    realvec, complexvec = [1., 2., 3., 4.,], [1.0im, 2., 3.0im, 4.]
-    rowrealvec, rowcomplexvec = reshape(realvec, (1, 4)), reshape(complexvec, (1, 4))
-    # \(Adjoint/Transpose-vector, Adjoint/Transpose-vector) should mat matrix equivalents
-    @test Adjoint(realvec)\Adjoint(realvec) ≈ rowrealvec\rowrealvec
-    @test Transpose(realvec)\Transpose(realvec) ≈ rowrealvec\rowrealvec
-    @test Adjoint(complexvec)\Adjoint(complexvec) ≈ conj(rowcomplexvec)\conj(rowcomplexvec)
-    @test Transpose(complexvec)\Transpose(complexvec) ≈ rowcomplexvec\rowcomplexvec
-end
-
-@testset "Adjoint/Transpose-wrapped vector right-division" begin
-    realvec, realmat = [1, 2, 3], [1 0 0; 0 2 0; 0 0 3]
-    complexvec, complexmat = [1im, 2, -3im], [2im 0 0; 0 3 0; 0 0 -5im]
-    rowrealvec, rowcomplexvec = reshape(realvec, (1, 3)), reshape(complexvec, (1, 3))
-    # /(Adjoint/Transpose-vector, matrix)
-    @test (Adjoint(realvec) / realmat)::Adjoint ≈ rowrealvec / realmat
-    @test (Adjoint(complexvec) / complexmat)::Adjoint ≈ conj(rowcomplexvec) / complexmat
-    @test (Transpose(realvec) / realmat)::Transpose ≈ rowrealvec / realmat
-    @test (Transpose(complexvec) / complexmat)::Transpose ≈ rowcomplexvec / complexmat
-    # /(Adjoint/Transpose-vector, Adjoint matrix)
-    @test (Adjoint(realvec) / Adjoint(realmat))::Adjoint ≈ rowrealvec / copy(Adjoint(realmat))
-    @test (Adjoint(complexvec) / Adjoint(complexmat))::Adjoint ≈ conj(rowcomplexvec) / copy(Adjoint(complexmat))
-    @test (Transpose(realvec) / Adjoint(realmat))::Transpose ≈ rowrealvec / copy(Adjoint(realmat))
-    @test (Transpose(complexvec) / Adjoint(complexmat))::Transpose ≈ rowcomplexvec / copy(Adjoint(complexmat))
-    # /(Adjoint/Transpose-vector, Transpose matrix)
-    @test (Adjoint(realvec) / Transpose(realmat))::Adjoint ≈ rowrealvec / copy(Transpose(realmat))
-    @test (Adjoint(complexvec) / Transpose(complexmat))::Adjoint ≈ conj(rowcomplexvec) / copy(Transpose(complexmat))
-    @test (Transpose(realvec) / Transpose(realmat))::Transpose ≈ rowrealvec / copy(Transpose(realmat))
-    @test (Transpose(complexvec) / Transpose(complexmat))::Transpose ≈ rowcomplexvec / copy(Transpose(complexmat))
-end
-
-@testset "norm and opnorm of Adjoint/Transpose-wrapped vectors" begin
-    # definitions are in base/linalg/generic.jl
-    realvec, complexvec = [3, -4], [3im, -4im]
-    # one norm result should be sum(abs.(realvec)) == 7
-    # two norm result should be sqrt(sum(abs.(realvec))) == 5
-    # inf norm result should be maximum(abs.(realvec)) == 4
-    for v in (realvec, complexvec)
-        @test norm(Adjoint(v)) ≈ 5
-        @test norm(Adjoint(v), 1) ≈ 7
-        @test norm(Adjoint(v), Inf) ≈ 4
-        @test norm(Transpose(v)) ≈ 5
-        @test norm(Transpose(v), 1) ≈ 7
-        @test norm(Transpose(v), Inf) ≈ 4
-    end
-    # one opnorm result should be maximum(abs.(realvec)) == 4
-    # two opnorm result should be sqrt(sum(abs.(realvec))) == 5
-    # inf opnorm result should be sum(abs.(realvec)) == 7
-    for v in (realvec, complexvec)
-        @test opnorm(Adjoint(v)) ≈ 5
-        @test opnorm(Adjoint(v), 1) ≈ 4
-        @test opnorm(Adjoint(v), Inf) ≈ 7
-        @test opnorm(Transpose(v)) ≈ 5
-        @test opnorm(Transpose(v), 1) ≈ 4
-        @test opnorm(Transpose(v), Inf) ≈ 7
-    end
-end
-
-@testset "adjoint and transpose of Numbers" begin
-    @test adjoint(1) == 1
-    @test adjoint(1.0) == 1.0
-    @test adjoint(1im) == -1im
-    @test adjoint(1.0im) == -1.0im
-    @test transpose(1) == 1
-    @test transpose(1.0) == 1.0
-    @test transpose(1im) == 1im
-    @test transpose(1.0im) == 1.0im
-end
-
-@testset "adjoint!(a, b) return a" begin
-    a = fill(1.0+im, 5)
-    b = fill(1.0+im, 1, 5)
-    @test adjoint!(a, b) === a
-    @test adjoint!(b, a) === b
-end
-
-@testset "aliasing with adjoint and transpose" begin
-    A = collect(reshape(1:25, 5, 5)) .+ rand.().*im
-    B = copy(A)
-    B .= B'
-    @test B == A'
-    B = copy(A)
-    B .= transpose(B)
-    @test B == transpose(A)
-    B = copy(A)
-    B .= B .* B'
-    @test B == A .* A'
-end
-
-@testset "test show methods for $t of Factorizations" for t in (adjoint, transpose)
-    A = randn(ComplexF64, 4, 4)
-    F = lu(A)
-    Fop = t(F)
-    @test sprint(show, Fop) ==
-                  "$t of "*sprint(show, parent(Fop))
-    @test sprint((io, t) -> show(io, MIME"text/plain"(), t), Fop) ==
-                  "$t of "*sprint((io, t) -> show(io, MIME"text/plain"(), t), parent(Fop))
-end
-
-@testset "showarg" begin
-    io = IOBuffer()
-
-    A = ones(Float64, 3,3)
-
-    B = Adjoint(A)
-    @test summary(B) == "3×3 adjoint(::Matrix{Float64}) with eltype Float64"
-    @test Base.showarg(io, B, false) === nothing
-    @test String(take!(io)) == "adjoint(::Matrix{Float64})"
-
-    B = Transpose(A)
-    @test summary(B) == "3×3 transpose(::Matrix{Float64}) with eltype Float64"
-    @test Base.showarg(io, B, false) === nothing
-    @test String(take!(io)) == "transpose(::Matrix{Float64})"
-end
-
-@testset "strided transposes" begin
-    for t in (Adjoint, Transpose)
-        @test strides(t(rand(3))) == (3, 1)
-        @test strides(t(rand(3,2))) == (3, 1)
-        @test strides(t(view(rand(3, 2), :))) == (6, 1)
-        @test strides(t(view(rand(3, 2), :, 1:2))) == (3, 1)
-
-        A = rand(3)
-        @test pointer(t(A)) === pointer(A)
-        B = rand(3,1)
-        @test pointer(t(B)) === pointer(B)
-    end
-    @test_throws MethodError strides(Adjoint(rand(3) .+ rand(3).*im))
-    @test_throws MethodError strides(Adjoint(rand(3, 2) .+ rand(3, 2).*im))
-    @test strides(Transpose(rand(3) .+ rand(3).*im)) == (3, 1)
-    @test strides(Transpose(rand(3, 2) .+ rand(3, 2).*im)) == (3, 1)
-
-    C = rand(3) .+ rand(3).*im
-    @test_throws ErrorException pointer(Adjoint(C))
-    @test pointer(Transpose(C)) === pointer(C)
-    D = rand(3,2) .+ rand(3,2).*im
-    @test_throws ErrorException pointer(Adjoint(D))
-    @test pointer(Transpose(D)) === pointer(D)
-end
-
-isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-@testset "offset axes" begin
-    s = Base.Slice(-3:3)'
-    @test axes(s) === (Base.OneTo(1), Base.IdentityUnitRange(-3:3))
-    @test collect(LinearIndices(s)) == reshape(1:7, 1, 7)
-    @test collect(CartesianIndices(s)) == reshape([CartesianIndex(1,i) for i = -3:3], 1, 7)
-    @test s[1] == -3
-    @test s[7] ==  3
-    @test s[4] ==  0
-    @test_throws BoundsError s[0]
-    @test_throws BoundsError s[8]
-    @test s[1,-3] == -3
-    @test s[1, 3] ==  3
-    @test s[1, 0] ==  0
-    @test_throws BoundsError s[1,-4]
-    @test_throws BoundsError s[1, 4]
-end
-
-@testset "specialized conj of Adjoint/Transpose" begin
-    realmat = [1 2; 3 4]
-    complexmat = ComplexF64[1+im 2; 3 4-im]
-    nested = [[complexmat] [-complexmat]; [0complexmat] [3complexmat]]
-    @testset "AdjOrTrans{...,$(typeof(i))}" for i in (
-                                                      realmat, vec(realmat),
-                                                      complexmat, vec(complexmat),
-                                                      nested, vec(nested),
-                                                     )
-        for (t,type) in ((transpose, Adjoint), (adjoint, Transpose))
-            M = t(i)
-            @test conj(M) isa type
-            @test conj(M) == conj(collect(M))
-            @test conj(conj(M)) === M
-        end
-    end
-    # test if `conj(transpose(::Hermitian))` is a no-op
-    hermitian = Hermitian([1 2+im; 2-im 3])
-    @test conj(transpose(hermitian)) === hermitian
-end
-
-@testset "empty and mismatched lengths" begin
-    # issue 36678
-    @test_throws DimensionMismatch [1, 2]' * [1,2,3]
-    @test Int[]' * Int[] == 0
-    @test transpose(Int[]) * Int[] == 0
-end
-
-@testset "reductions: $adjtrans" for adjtrans in (transpose, adjoint)
-    for (reduction, reduction!, op) in ((sum, sum!, +), (prod, prod!, *), (minimum, minimum!, min), (maximum, maximum!, max))
-        T = op in (max, min) ? Float64 : ComplexF64
-        mat = rand(T, 3,5)
-        rd1 = zeros(T, 1, 3)
-        rd2 = zeros(T, 5, 1)
-        rd3 = zeros(T, 1, 1)
-        @test reduction(adjtrans(mat)) ≈ reduction(copy(adjtrans(mat)))
-        @test reduction(adjtrans(mat), dims=1) ≈ reduction(copy(adjtrans(mat)), dims=1)
-        @test reduction(adjtrans(mat), dims=2) ≈ reduction(copy(adjtrans(mat)), dims=2)
-        @test reduction(adjtrans(mat), dims=(1,2)) ≈ reduction(copy(adjtrans(mat)), dims=(1,2))
-
-        @test reduction!(rd1, adjtrans(mat)) ≈ reduction!(rd1, copy(adjtrans(mat)))
-        @test reduction!(rd2, adjtrans(mat)) ≈ reduction!(rd2, copy(adjtrans(mat)))
-        @test reduction!(rd3, adjtrans(mat)) ≈ reduction!(rd3, copy(adjtrans(mat)))
-
-        @test reduction(imag, adjtrans(mat)) ≈ reduction(imag, copy(adjtrans(mat)))
-        @test reduction(imag, adjtrans(mat), dims=1) ≈ reduction(imag, copy(adjtrans(mat)), dims=1)
-        @test reduction(imag, adjtrans(mat), dims=2) ≈ reduction(imag, copy(adjtrans(mat)), dims=2)
-        @test reduction(imag, adjtrans(mat), dims=(1,2)) ≈ reduction(imag, copy(adjtrans(mat)), dims=(1,2))
-
-        @test Base.mapreducedim!(imag, op, rd1, adjtrans(mat)) ≈ Base.mapreducedim!(imag, op, rd1, copy(adjtrans(mat)))
-        @test Base.mapreducedim!(imag, op, rd2, adjtrans(mat)) ≈ Base.mapreducedim!(imag, op, rd2, copy(adjtrans(mat)))
-        @test Base.mapreducedim!(imag, op, rd3, adjtrans(mat)) ≈ Base.mapreducedim!(imag, op, rd3, copy(adjtrans(mat)))
-
-        op in (max, min) && continue
-        mat = [rand(T,2,2) for _ in 1:3, _ in 1:5]
-        rd1 = fill(zeros(T, 2, 2), 1, 3)
-        rd2 = fill(zeros(T, 2, 2), 5, 1)
-        rd3 = fill(zeros(T, 2, 2), 1, 1)
-        @test reduction(adjtrans(mat)) ≈ reduction(copy(adjtrans(mat)))
-        @test reduction(adjtrans(mat), dims=1) ≈ reduction(copy(adjtrans(mat)), dims=1)
-        @test reduction(adjtrans(mat), dims=2) ≈ reduction(copy(adjtrans(mat)), dims=2)
-        @test reduction(adjtrans(mat), dims=(1,2)) ≈ reduction(copy(adjtrans(mat)), dims=(1,2))
-
-        @test reduction(imag, adjtrans(mat)) ≈ reduction(imag, copy(adjtrans(mat)))
-        @test reduction(x -> x[1,2], adjtrans(mat)) ≈ reduction(x -> x[1,2], copy(adjtrans(mat)))
-        @test reduction(imag, adjtrans(mat), dims=1) ≈ reduction(imag, copy(adjtrans(mat)), dims=1)
-        @test reduction(x -> x[1,2], adjtrans(mat), dims=1) ≈ reduction(x -> x[1,2], copy(adjtrans(mat)), dims=1)
-    end
-    # see #46605
-    Ac = [1 2; 3 4]'
-    @test mapreduce(identity, (x, y) -> 10x+y, copy(Ac)) == mapreduce(identity, (x, y) -> 10x+y, Ac) == 1234
-    @test extrema([3,7,4]') == (3, 7)
-    @test mapreduce(x -> [x;;;], +, [1, 2, 3]') == sum(x -> [x;;;], [1, 2, 3]') == [6;;;]
-    @test mapreduce(string, *, [1 2; 3 4]') == mapreduce(string, *, copy([1 2; 3 4]')) == "1234"
-end
-
-@testset "trace" begin
-    for T in (Float64, ComplexF64), t in (adjoint, transpose)
-        A = randn(T, 10, 10)
-        @test tr(t(A)) == tr(copy(t(A))) == t(tr(A))
-    end
-end
-
-@testset "structured printing" begin
-    D = Diagonal(1:3)
-    @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D)
-    @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D)
-    D = Diagonal((1:3)*im)
-    D2 = Diagonal((1:3)*(-im))
-    @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D)
-    @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D2)
-
-    struct OneHotVecOrMat{N} <: AbstractArray{Bool,N}
-        inds::NTuple{N,Int}
-        sz::NTuple{N,Int}
-    end
-    Base.size(x::OneHotVecOrMat) = x.sz
-    function Base.getindex(x::OneHotVecOrMat{N}, inds::Vararg{Int,N}) where {N}
-        checkbounds(x, inds...)
-        inds == x.inds
-    end
-    Base.replace_in_print_matrix(o::OneHotVecOrMat{1}, i::Integer, j::Integer, s::AbstractString) =
-        o.inds == (i,) ? s : Base.replace_with_centered_mark(s)
-    Base.replace_in_print_matrix(o::OneHotVecOrMat{2}, i::Integer, j::Integer, s::AbstractString) =
-        o.inds == (i,j) ? s : Base.replace_with_centered_mark(s)
-
-    o = OneHotVecOrMat((2,), (4,))
-    @test sprint(Base.print_matrix, Transpose(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4)))
-    @test sprint(Base.print_matrix, Adjoint(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4)))
-end
-
-end # module TestAdjointTranspose
diff --git a/stdlib/LinearAlgebra/test/ambiguous_exec.jl b/stdlib/LinearAlgebra/test/ambiguous_exec.jl
deleted file mode 100644
index 7b89c0a457afb..0000000000000
--- a/stdlib/LinearAlgebra/test/ambiguous_exec.jl
+++ /dev/null
@@ -1,21 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, LinearAlgebra
-let ambig = detect_ambiguities(LinearAlgebra; recursive=true)
-    @test isempty(ambig)
-    ambig = Set{Any}(((m1.sig, m2.sig) for (m1, m2) in ambig))
-    expect = []
-    good = true
-    while !isempty(ambig)
-        sigs = pop!(ambig)
-        i = findfirst(==(sigs), expect)
-        if i === nothing
-            println(stderr, "push!(expect, (", sigs[1], ", ", sigs[2], "))")
-            good = false
-            continue
-        end
-        deleteat!(expect, i)
-    end
-    @test isempty(expect)
-    @test good
-end
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
deleted file mode 100644
index 2306b46b1315e..0000000000000
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ /dev/null
@@ -1,840 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestBidiagonal
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasReal, BlasFloat
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
-using .Main.Quaternions
-
-isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
-using .Main.InfiniteArrays
-
-isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
-using .Main.FillArrays
-
-include("testutils.jl") # test_approx_eq_modphase
-
-n = 10 #Size of test matrix
-Random.seed!(1)
-
-@testset for relty in (Int, Float32, Float64, BigFloat), elty in (relty, Complex{relty})
-    if relty <: AbstractFloat
-        dv = convert(Vector{elty}, randn(n))
-        ev = convert(Vector{elty}, randn(n-1))
-        if (elty <: Complex)
-            dv += im*convert(Vector{elty}, randn(n))
-            ev += im*convert(Vector{elty}, randn(n-1))
-        end
-    elseif relty <: Integer
-        dv = convert(Vector{elty}, rand(1:10, n))
-        ev = convert(Vector{elty}, rand(1:10, n-1))
-        if (elty <: Complex)
-            dv += im*convert(Vector{elty}, rand(1:10, n))
-            ev += im*convert(Vector{elty}, rand(1:10, n-1))
-        end
-    end
-    dv0 = zeros(elty, 0)
-    ev0 = zeros(elty, 0)
-
-    @testset "Constructors" begin
-        for (x, y) in ((dv0, ev0), (dv, ev), (GenericArray(dv), GenericArray(ev)))
-            # from vectors
-            ubd = Bidiagonal(x, y, :U)
-            lbd = Bidiagonal(x, y, :L)
-            @test ubd != lbd || x === dv0
-            @test ubd.dv === x
-            @test lbd.ev === y
-            @test_throws ArgumentError Bidiagonal(x, y, :R)
-            @test_throws ArgumentError Bidiagonal(x, y, 'R')
-            x == dv0 || @test_throws DimensionMismatch Bidiagonal(x, x, :U)
-            @test_throws MethodError Bidiagonal(x, y)
-            # from matrix
-            @test Bidiagonal(ubd, :U) == Bidiagonal(Matrix(ubd), :U) == ubd
-            @test Bidiagonal(lbd, :L) == Bidiagonal(Matrix(lbd), :L) == lbd
-            # from its own type
-            @test typeof(ubd)(ubd) === ubd
-            @test typeof(lbd)(lbd) === lbd
-        end
-        @test eltype(Bidiagonal{elty}([1,2,3,4], [1.0f0,2.0f0,3.0f0], :U)) == elty
-        @test eltype(Bidiagonal([1,2,3,4], [1.0f0,2.0f0,3.0f0], :U)) == Float32 # promotion test
-        @test isa(Bidiagonal{elty,Vector{elty}}(GenericArray(dv), ev, :U), Bidiagonal{elty,Vector{elty}})
-        @test_throws MethodError Bidiagonal(dv, GenericArray(ev), :U)
-        @test_throws MethodError Bidiagonal(GenericArray(dv), ev, :U)
-        BI = Bidiagonal([1,2,3,4], [1,2,3], :U)
-        @test Bidiagonal(BI) === BI
-        @test isa(Bidiagonal{elty}(BI), Bidiagonal{elty})
-    end
-
-    @testset "getindex, setindex!, size, and similar" begin
-        ubd = Bidiagonal(dv, ev, :U)
-        lbd = Bidiagonal(dv, ev, :L)
-        # bidiagonal getindex / upper & lower
-        @test_throws BoundsError ubd[n + 1, 1]
-        @test_throws BoundsError ubd[1, n + 1]
-        @test ubd[2, 2] == dv[2]
-        # bidiagonal getindex / upper
-        @test ubd[2, 3] == ev[2]
-        @test iszero(ubd[3, 2])
-        # bidiagonal getindex / lower
-        @test lbd[3, 2] == ev[2]
-        @test iszero(lbd[2, 3])
-        # bidiagonal setindex! / upper
-        cubd = copy(ubd)
-        @test_throws ArgumentError ubd[2, 1] = 1
-        @test_throws ArgumentError ubd[3, 1] = 1
-        @test (cubd[2, 1] = 0; cubd == ubd)
-        @test ((cubd[1, 2] = 10) == 10; cubd[1, 2] == 10)
-        # bidiagonal setindex! / lower
-        clbd = copy(lbd)
-        @test_throws ArgumentError lbd[1, 2] = 1
-        @test_throws ArgumentError lbd[1, 3] = 1
-        @test (clbd[1, 2] = 0; clbd == lbd)
-        @test ((clbd[2, 1] = 10) == 10; clbd[2, 1] == 10)
-        # bidiagonal setindex! / upper & lower
-        @test_throws BoundsError ubd[n + 1, 1] = 1
-        @test_throws BoundsError ubd[1, n + 1] = 1
-        @test ((cubd[2, 2] = 10) == 10; cubd[2, 2] == 10)
-        # bidiagonal size
-        @test_throws ArgumentError size(ubd, 0)
-        @test size(ubd, 1) == size(ubd, 2) == n
-        @test size(ubd, 3) == 1
-        # bidiagonal similar
-        @test isa(similar(ubd), Bidiagonal{elty})
-        @test similar(ubd).uplo == ubd.uplo
-        @test isa(similar(ubd, Int), Bidiagonal{Int})
-        @test similar(ubd, Int).uplo == ubd.uplo
-        @test isa(similar(ubd, (3, 2)), Matrix)
-        @test isa(similar(ubd, Int, (3, 2)), Matrix{Int})
-
-        # setindex! when off diagonal is zero bug
-        Bu = Bidiagonal(rand(elty, 10), zeros(elty, 9), 'U')
-        Bl = Bidiagonal(rand(elty, 10), zeros(elty, 9), 'L')
-        @test_throws ArgumentError Bu[5, 4] = 1
-        @test_throws ArgumentError Bl[4, 5] = 1
-    end
-
-    @testset "isstored" begin
-        ubd = Bidiagonal(dv, ev, :U)
-        lbd = Bidiagonal(dv, ev, :L)
-        # bidiagonal isstored / upper & lower
-        @test_throws BoundsError Base.isstored(ubd, n + 1, 1)
-        @test_throws BoundsError Base.isstored(ubd, 1, n + 1)
-        @test Base.isstored(ubd, 2, 2)
-        # bidiagonal isstored / upper
-        @test Base.isstored(ubd, 2, 3)
-        @test !Base.isstored(ubd, 3, 2)
-        # bidiagonal isstored / lower
-        @test Base.isstored(lbd, 3, 2)
-        @test !Base.isstored(lbd, 2, 3)
-    end
-
-    @testset "show" begin
-        BD = Bidiagonal(dv, ev, :U)
-        dstring = sprint(Base.print_matrix,BD.dv')
-        estring = sprint(Base.print_matrix,BD.ev')
-        @test sprint(show,BD) == "$(summary(BD)):\n diag:$dstring\n super:$estring"
-        BD = Bidiagonal(dv,ev,:L)
-        @test sprint(show,BD) == "$(summary(BD)):\n diag:$dstring\n sub:$estring"
-    end
-
-    @testset for uplo in (:U, :L)
-        T = Bidiagonal(dv, ev, uplo)
-
-        @testset "Constructor and basic properties" begin
-            @test size(T, 1) == size(T, 2) == n
-            @test size(T) == (n, n)
-            @test Array(T) == diagm(0 => dv, (uplo === :U ? 1 : -1) => ev)
-            @test Bidiagonal(Array(T), uplo) == T
-            @test big.(T) == T
-            @test Array(abs.(T)) == abs.(diagm(0 => dv, (uplo === :U ? 1 : -1) => ev))
-            @test Array(real(T)) == real(diagm(0 => dv, (uplo === :U ? 1 : -1) => ev))
-            @test Array(imag(T)) == imag(diagm(0 => dv, (uplo === :U ? 1 : -1) => ev))
-        end
-
-        @testset for func in (conj, transpose, adjoint)
-            @test func(func(T)) == T
-        end
-
-        @testset "permutedims(::Bidiagonal)" begin
-            @test permutedims(permutedims(T)) === T
-            @test permutedims(T) == transpose.(transpose(T))
-            @test permutedims(T, [1, 2]) === T
-            @test permutedims(T, (2, 1)) == permutedims(T)
-        end
-
-        @testset "triu and tril" begin
-            zerosdv = zeros(elty, length(dv))
-            zerosev = zeros(elty, length(ev))
-            bidiagcopy(dv, ev, uplo) = Bidiagonal(copy(dv), copy(ev), uplo)
-
-            @test istril(Bidiagonal(dv,ev,:L))
-            @test istril(Bidiagonal(dv,ev,:L), 1)
-            @test !istril(Bidiagonal(dv,ev,:L), -1)
-            @test istril(Bidiagonal(zerosdv,ev,:L), -1)
-            @test !istril(Bidiagonal(zerosdv,ev,:L), -2)
-            @test istril(Bidiagonal(zerosdv,zerosev,:L), -2)
-            @test !istril(Bidiagonal(dv,ev,:U))
-            @test istril(Bidiagonal(dv,ev,:U), 1)
-            @test !istril(Bidiagonal(dv,ev,:U), -1)
-            @test !istril(Bidiagonal(zerosdv,ev,:U), -1)
-            @test istril(Bidiagonal(zerosdv,zerosev,:U), -1)
-            @test tril!(bidiagcopy(dv,ev,:U),-1) == Bidiagonal(zerosdv,zerosev,:U)
-            @test tril!(bidiagcopy(dv,ev,:L),-1) == Bidiagonal(zerosdv,ev,:L)
-            @test tril!(bidiagcopy(dv,ev,:U),-2) == Bidiagonal(zerosdv,zerosev,:U)
-            @test tril!(bidiagcopy(dv,ev,:L),-2) == Bidiagonal(zerosdv,zerosev,:L)
-            @test tril!(bidiagcopy(dv,ev,:U),1)  == Bidiagonal(dv,ev,:U)
-            @test tril!(bidiagcopy(dv,ev,:L),1)  == Bidiagonal(dv,ev,:L)
-            @test tril!(bidiagcopy(dv,ev,:U))    == Bidiagonal(dv,zerosev,:U)
-            @test tril!(bidiagcopy(dv,ev,:L))    == Bidiagonal(dv,ev,:L)
-            @test_throws ArgumentError tril!(bidiagcopy(dv, ev, :U), -n - 2)
-            @test_throws ArgumentError tril!(bidiagcopy(dv, ev, :U), n)
-
-            @test istriu(Bidiagonal(dv,ev,:U))
-            @test istriu(Bidiagonal(dv,ev,:U), -1)
-            @test !istriu(Bidiagonal(dv,ev,:U), 1)
-            @test istriu(Bidiagonal(zerosdv,ev,:U), 1)
-            @test !istriu(Bidiagonal(zerosdv,ev,:U), 2)
-            @test istriu(Bidiagonal(zerosdv,zerosev,:U), 2)
-            @test !istriu(Bidiagonal(dv,ev,:L))
-            @test istriu(Bidiagonal(dv,ev,:L), -1)
-            @test !istriu(Bidiagonal(dv,ev,:L), 1)
-            @test !istriu(Bidiagonal(zerosdv,ev,:L), 1)
-            @test istriu(Bidiagonal(zerosdv,zerosev,:L), 1)
-            @test triu!(bidiagcopy(dv,ev,:L),1)  == Bidiagonal(zerosdv,zerosev,:L)
-            @test triu!(bidiagcopy(dv,ev,:U),1)  == Bidiagonal(zerosdv,ev,:U)
-            @test triu!(bidiagcopy(dv,ev,:U),2)  == Bidiagonal(zerosdv,zerosev,:U)
-            @test triu!(bidiagcopy(dv,ev,:L),2)  == Bidiagonal(zerosdv,zerosev,:L)
-            @test triu!(bidiagcopy(dv,ev,:U),-1) == Bidiagonal(dv,ev,:U)
-            @test triu!(bidiagcopy(dv,ev,:L),-1) == Bidiagonal(dv,ev,:L)
-            @test triu!(bidiagcopy(dv,ev,:L))    == Bidiagonal(dv,zerosev,:L)
-            @test triu!(bidiagcopy(dv,ev,:U))    == Bidiagonal(dv,ev,:U)
-            @test_throws ArgumentError triu!(bidiagcopy(dv, ev, :U), -n)
-            @test_throws ArgumentError triu!(bidiagcopy(dv, ev, :U), n + 2)
-            @test !isdiag(Bidiagonal(dv,ev,:U))
-            @test !isdiag(Bidiagonal(dv,ev,:L))
-            @test isdiag(Bidiagonal(dv,zerosev,:U))
-            @test isdiag(Bidiagonal(dv,zerosev,:L))
-        end
-
-        @testset "iszero and isone" begin
-            for uplo in (:U, :L)
-                BDzero = Bidiagonal(zeros(elty, 10), zeros(elty, 9), uplo)
-                BDone = Bidiagonal(ones(elty, 10), zeros(elty, 9), uplo)
-                BDmix = Bidiagonal(zeros(elty, 10), zeros(elty, 9), uplo)
-                BDmix[end,end] = one(elty)
-
-                @test iszero(BDzero)
-                @test !isone(BDzero)
-                @test !iszero(BDone)
-                @test isone(BDone)
-                @test !iszero(BDmix)
-                @test !isone(BDmix)
-            end
-        end
-
-        @testset "trace" begin
-            for uplo in (:U, :L)
-                B = Bidiagonal(dv, ev, uplo)
-                if relty <: Integer
-                    @test tr(B) == tr(Matrix(B))
-                else
-                    @test tr(B) ≈ tr(Matrix(B)) rtol=2eps(relty)
-                end
-            end
-        end
-
-        Tfull = Array(T)
-        @testset "Linear solves" begin
-            if relty <: AbstractFloat
-                c = convert(Matrix{elty}, randn(n,n))
-                b = convert(Matrix{elty}, randn(n, 2))
-                if (elty <: Complex)
-                    b += im*convert(Matrix{elty}, randn(n, 2))
-                end
-            elseif relty <: Integer
-                c = convert(Matrix{elty}, rand(1:10, n, n))
-                b = convert(Matrix{elty}, rand(1:10, n, 2))
-                if (elty <: Complex)
-                    b += im*convert(Matrix{elty}, rand(1:10, n, 2))
-                end
-            end
-            condT = cond(map(ComplexF64,Tfull))
-            promty = typeof((zero(relty)*zero(relty) + zero(relty)*zero(relty))/one(relty))
-            if relty != BigFloat
-                x = transpose(T)\transpose(c)
-                tx = transpose(Tfull) \ transpose(c)
-                elty <: AbstractFloat && @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                @test_throws DimensionMismatch transpose(T)\transpose(b)
-                x = T'\copy(transpose(c))
-                tx = Tfull'\copy(transpose(c))
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                @test_throws DimensionMismatch T'\copy(transpose(b))
-                x = T\transpose(c)
-                tx = Tfull\transpose(c)
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                @test_throws DimensionMismatch T\transpose(b)
-            end
-            offsizemat = Matrix{elty}(undef, n+1, 2)
-            @test_throws DimensionMismatch T \ offsizemat
-            @test_throws DimensionMismatch transpose(T) \ offsizemat
-            @test_throws DimensionMismatch T' \ offsizemat
-
-            if elty <: BigFloat
-                @test_throws SingularException ldiv!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :U), rand(elty, n))
-                @test_throws SingularException ldiv!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :L), rand(elty, n))
-            end
-            let bb = b, cc = c
-                for atype in ("Array", "SubArray")
-                    if atype == "Array"
-                        b = bb
-                        c = cc
-                    else
-                        b = view(bb, 1:n)
-                        c = view(cc, 1:n, 1:2)
-                    end
-                end
-                x = T \ b
-                tx = Tfull \ b
-                @test_throws DimensionMismatch ldiv!(T, Vector{elty}(undef, n+1))
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = transpose(T) \ b
-                tx = transpose(Tfull) \ b
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = copy(transpose(b)) / T
-                tx = copy(transpose(b)) / Tfull
-                @test_throws DimensionMismatch rdiv!(Matrix{elty}(undef, 1, n+1), T)
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = copy(transpose(b)) / transpose(T)
-                tx = copy(transpose(b)) / transpose(Tfull)
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                @testset "Generic Mat-vec ops" begin
-                    @test T*b ≈ Tfull*b
-                    @test T'*b ≈ Tfull'*b
-                    if relty != BigFloat # not supported by pivoted QR
-                        @test T/b' ≈ Tfull/b'
-                    end
-                end
-            end
-            zdv = Vector{elty}(undef, 0)
-            zev = Vector{elty}(undef, 0)
-            zA  = Bidiagonal(zdv, zev, :U)
-            zb  = Vector{elty}(undef, 0)
-            @test ldiv!(zA, zb) === zb
-            @testset "linear solves with abstract matrices" begin
-                diag = b[:,1]
-                D = Diagonal(diag)
-                x = T \ D
-                tx = Tfull \ D
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = D / T
-                tx = D / Tfull
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = transpose(T) \ D
-                tx = transpose(Tfull) \ D
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = D / transpose(T)
-                tx = D / transpose(Tfull)
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-            end
-            @testset "Specialized multiplication/division" begin
-                getval(x) = x
-                getval(x::Furlong) = x.val
-                function _bidiagdivmultest(T,
-                        x,
-                        typemul=T.uplo == 'U' ? UpperTriangular : Matrix,
-                        typediv=T.uplo == 'U' ? UpperTriangular : Matrix,
-                        typediv2=T.uplo == 'U' ? UpperTriangular : Matrix)
-                    TM = Matrix(T)
-                    @test map(getval, (T*x)::typemul) ≈ map(getval, TM*x)
-                    @test map(getval, (x*T)::typemul) ≈ map(getval, x*TM)
-                    @test map(getval, (x\T)::typediv) ≈ map(getval, x\TM)
-                    @test map(getval, (T/x)::typediv) ≈ map(getval, TM/x)
-                    if !isa(x, Number)
-                        @test map(getval, Array((T\x)::typediv2)) ≈ map(getval, Array(TM\x))
-                        @test map(getval, Array((x/T)::typediv2)) ≈ map(getval, Array(x/TM))
-                    end
-                    return nothing
-                end
-                A = Matrix(T)
-                for t in (T, Furlong.(T)), (A, dv, ev) in ((A, dv, ev), (Furlong.(A), Furlong.(dv), Furlong.(ev)))
-                    _bidiagdivmultest(t, 5, Bidiagonal, Bidiagonal)
-                    _bidiagdivmultest(t, 5I, Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
-                    _bidiagdivmultest(t, Diagonal(dv), Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
-                    _bidiagdivmultest(t, UpperTriangular(A))
-                    _bidiagdivmultest(t, UnitUpperTriangular(A))
-                    _bidiagdivmultest(t, LowerTriangular(A), t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix)
-                    _bidiagdivmultest(t, UnitLowerTriangular(A), t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix)
-                    _bidiagdivmultest(t, Bidiagonal(dv, ev, :U), Matrix, Matrix, Matrix)
-                    _bidiagdivmultest(t, Bidiagonal(dv, ev, :L), Matrix, Matrix, Matrix)
-                end
-            end
-        end
-
-        if elty <: BlasReal
-            @testset "$f" for f in (floor, trunc, round, ceil)
-                @test (f.(Int, T))::Bidiagonal == Bidiagonal(f.(Int, T.dv), f.(Int, T.ev), T.uplo)
-                @test (f.(T))::Bidiagonal == Bidiagonal(f.(T.dv), f.(T.ev), T.uplo)
-            end
-        end
-
-        @testset "diag" begin
-            @test (@inferred diag(T))::typeof(dv) == dv
-            @test (@inferred diag(T, uplo === :U ? 1 : -1))::typeof(dv) == ev
-            @test (@inferred diag(T,2))::typeof(dv) == zeros(elty, n-2)
-            @test_throws ArgumentError diag(T, -n - 1)
-            @test_throws ArgumentError diag(T,  n + 1)
-            # test diag with another wrapped vector type
-            gdv, gev = GenericArray(dv), GenericArray(ev)
-            G = Bidiagonal(gdv, gev, uplo)
-            @test (@inferred diag(G))::typeof(gdv) == gdv
-            @test (@inferred diag(G, uplo === :U ? 1 : -1))::typeof(gdv) == gev
-            @test (@inferred diag(G,2))::typeof(gdv) == GenericArray(zeros(elty, n-2))
-        end
-
-        @testset "Eigensystems" begin
-            if relty <: AbstractFloat
-                d1, v1 = eigen(T)
-                d2, v2 = eigen(map(elty<:Complex ? ComplexF64 : Float64,Tfull), sortby=nothing)
-                @test (uplo === :U ? d1 : reverse(d1)) ≈ d2
-                if elty <: Real
-                    test_approx_eq_modphase(v1, uplo === :U ? v2 : v2[:,n:-1:1])
-                end
-            end
-        end
-
-        @testset "Singular systems" begin
-            if (elty <: BlasReal)
-                @test AbstractArray(svd(T)) ≈ AbstractArray(svd!(copy(Tfull)))
-                @test svdvals(Tfull) ≈ svdvals(T)
-                u1, d1, v1 = svd(Tfull)
-                u2, d2, v2 = svd(T)
-                @test d1 ≈ d2
-                if elty <: Real
-                    test_approx_eq_modphase(u1, u2)
-                    test_approx_eq_modphase(copy(v1), copy(v2))
-                end
-                @test 0 ≈ norm(u2*Diagonal(d2)*v2'-Tfull) atol=n*max(n^2*eps(relty),norm(u1*Diagonal(d1)*v1'-Tfull))
-                @inferred svdvals(T)
-                @inferred svd(T)
-            end
-        end
-
-        @testset "Binary operations" begin
-            @test -T == Bidiagonal(-T.dv,-T.ev,T.uplo)
-            @test convert(elty,-1.0) * T == Bidiagonal(-T.dv,-T.ev,T.uplo)
-            @test T / convert(elty,-1.0) == Bidiagonal(-T.dv,-T.ev,T.uplo)
-            @test T * convert(elty,-1.0) == Bidiagonal(-T.dv,-T.ev,T.uplo)
-            @testset for uplo2 in (:U, :L)
-                dv = convert(Vector{elty}, relty <: AbstractFloat ? randn(n) : rand(1:10, n))
-                ev = convert(Vector{elty}, relty <: AbstractFloat ? randn(n-1) : rand(1:10, n-1))
-                T2 = Bidiagonal(dv, ev, uplo2)
-                Tfull2 = Array(T2)
-                for op in (+, -, *)
-                    @test Array(op(T, T2)) ≈ op(Tfull, Tfull2)
-                end
-            end
-            # test pass-through of mul! for SymTridiagonal*Bidiagonal
-            TriSym = SymTridiagonal(T.dv, T.ev)
-            @test Array(TriSym*T) ≈ Array(TriSym)*Array(T)
-            # test pass-through of mul! for AbstractTriangular*Bidiagonal
-            Tri = UpperTriangular(diagm(1 => T.ev))
-            Dia = Diagonal(T.dv)
-            @test Array(Tri*T) ≈ Array(Tri)*Array(T)
-            # test mul! itself for these types
-            for AA in (Tri, Dia)
-                for f in (identity, transpose, adjoint)
-                    C = rand(elty, n, n)
-                    D = copy(C) + 2.0 * Array(f(AA) * T)
-                    mul!(C, f(AA), T, 2.0, 1.0) ≈ D
-                end
-            end
-            # test mul! for BiTrySym * adjoint/transpose AbstractMat
-            for f in (identity, transpose, adjoint)
-                C = relty == Int ? rand(float(elty), n, n) : rand(elty, n, n)
-                B = rand(elty, n, n)
-                D = copy(C) + 2.0 * Array(T*f(B))
-                mul!(C, T, f(B), 2.0, 1.0) ≈ D
-            end
-
-            # Issue #31870
-            # Bi/Tri/Sym times Diagonal
-            Diag = Diagonal(rand(elty, 10))
-            BidiagU = Bidiagonal(rand(elty, 10), rand(elty, 9), 'U')
-            BidiagL = Bidiagonal(rand(elty, 10), rand(elty, 9), 'L')
-            Tridiag = Tridiagonal(rand(elty, 9), rand(elty, 10), rand(elty, 9))
-            SymTri = SymTridiagonal(rand(elty, 10), rand(elty, 9))
-
-            mats = Any[Diag, BidiagU, BidiagL, Tridiag, SymTri]
-            for a in mats
-                for b in mats
-                    @test a*b ≈ Matrix(a)*Matrix(b)
-                end
-            end
-
-            @test typeof(BidiagU*Diag) <: Bidiagonal
-            @test typeof(BidiagL*Diag) <: Bidiagonal
-            @test typeof(Tridiag*Diag) <: Tridiagonal
-            @test typeof(SymTri*Diag)  <: Tridiagonal
-
-            @test typeof(BidiagU*Diag) <: Bidiagonal
-            @test typeof(Diag*BidiagL) <: Bidiagonal
-            @test typeof(Diag*Tridiag) <: Tridiagonal
-            @test typeof(Diag*SymTri)  <: Tridiagonal
-        end
-
-        @test inv(T)*Tfull ≈ Matrix(I, n, n)
-        @test factorize(T) === T
-    end
-    BD = Bidiagonal(dv, ev, :U)
-    @test Matrix{ComplexF64}(BD) == BD
-end
-
-# Issue 10742 and similar
-let A = Bidiagonal([1,2,3], [0,0], :U)
-    @test istril(A)
-    @test isdiag(A)
-end
-
-# test construct from range
-@test Bidiagonal(1:3, 1:2, :U) == [1 1 0; 0 2 2; 0 0 3]
-
-@testset "promote_rule" begin
-    A = Bidiagonal(fill(1f0,10),fill(1f0,9),:U)
-    B = rand(Float64,10,10)
-    C = Tridiagonal(rand(Float64,9),rand(Float64,10),rand(Float64,9))
-    @test promote_rule(Matrix{Float64}, Bidiagonal{Float64}) == Matrix{Float64}
-    @test promote(B,A) == (B, convert(Matrix{Float64}, A))
-    @test promote(B,A) isa Tuple{Matrix{Float64}, Matrix{Float64}}
-    @test promote(C,A) == (C,Tridiagonal(zeros(Float64,9),convert(Vector{Float64},A.dv),convert(Vector{Float64},A.ev)))
-    @test promote(C,A) isa Tuple{Tridiagonal, Tridiagonal}
-end
-
-using LinearAlgebra: fillstored!, UnitLowerTriangular
-@testset "fill! and fillstored!" begin
-    let # fillstored!
-        A = Tridiagonal(randn(2), randn(3), randn(2))
-        @test fillstored!(A, 3) == Tridiagonal([3, 3], [3, 3, 3], [3, 3])
-        B = Bidiagonal(randn(3), randn(2), :U)
-        @test fillstored!(B, 2) == Bidiagonal([2,2,2], [2,2], :U)
-        S = SymTridiagonal(randn(3), randn(2))
-        @test fillstored!(S, 1) == SymTridiagonal([1,1,1], [1,1])
-        Ult = UnitLowerTriangular(randn(3,3))
-        @test fillstored!(Ult, 3) == UnitLowerTriangular([1 0 0; 3 1 0; 3 3 1])
-    end
-    let # fill!(exotic, 0)
-        exotic_arrays = Any[Tridiagonal(randn(3), randn(4), randn(3)),
-        Bidiagonal(randn(3), randn(2), rand([:U,:L])),
-        SymTridiagonal(randn(3), randn(2)),
-        Diagonal(randn(5)),
-        # LowerTriangular(randn(3,3)), # AbstractTriangular fill! deprecated, see below
-        # UpperTriangular(randn(3,3)) # AbstractTriangular fill! deprecated, see below
-        ]
-        for A in exotic_arrays
-            @test iszero(fill!(A, 0))
-        end
-
-        # Diagonal fill! is no longer deprecated. See #29780
-        # AbstractTriangular fill! was defined as fillstored!,
-        # not matching the general behavior of fill!, and so it has been deprecated.
-        # In a future dev cycle, this fill! methods should probably be reintroduced
-        # with behavior matching that of fill! for other structured matrix types.
-        # In the interim, equivalently test fillstored! below
-        @test iszero(fillstored!(Diagonal(fill(1, 3)), 0))
-        @test iszero(fillstored!(LowerTriangular(fill(1, 3, 3)), 0))
-        @test iszero(fillstored!(UpperTriangular(fill(1, 3, 3)), 0))
-    end
-    let # fill!(small, x)
-        val = randn()
-        b = Bidiagonal(randn(1,1), :U)
-        st = SymTridiagonal(randn(1,1))
-        d = Diagonal(rand(1))
-        for x in (b, st, d)
-            @test Array(fill!(x, val)) == fill!(Array(x), val)
-        end
-        b = Bidiagonal(randn(2,2), :U)
-        st = SymTridiagonal(randn(3), randn(2))
-        t = Tridiagonal(randn(3,3))
-        d = Diagonal(rand(3))
-        for x in (b, t, st, d)
-            @test_throws ArgumentError fill!(x, val)
-            @test Array(fill!(x, 0)) == fill!(Array(x), 0)
-        end
-    end
-end
-
-@testset "pathological promotion (#24707)" begin
-    @test promote_type(Matrix{Int}, Bidiagonal{Tuple{S}} where S<:Integer) <: Matrix
-    @test promote_type(Matrix{Tuple{T}} where T<:Integer, Bidiagonal{Tuple{S}} where S<:Integer) <: Matrix
-    @test promote_type(Matrix{Tuple{T}} where T<:Integer, Bidiagonal{Int}) <: Matrix
-    @test promote_type(Tridiagonal{Int}, Bidiagonal{Tuple{S}} where S<:Integer) <: Tridiagonal
-    @test promote_type(Tridiagonal{Tuple{T}} where T<:Integer, Bidiagonal{Tuple{S}} where S<:Integer) <: Tridiagonal
-    @test promote_type(Tridiagonal{Tuple{T}} where T<:Integer, Bidiagonal{Int}) <: Tridiagonal
-end
-
-@testset "solve with matrix elements" begin
-    A = triu(tril(randn(9, 9), 3), -3)
-    b = randn(9)
-    Alb = Bidiagonal(Any[tril(A[1:3,1:3]), tril(A[4:6,4:6]), tril(A[7:9,7:9])],
-                     Any[triu(A[4:6,1:3]), triu(A[7:9,4:6])], 'L')
-    Aub = Bidiagonal(Any[triu(A[1:3,1:3]), triu(A[4:6,4:6]), triu(A[7:9,7:9])],
-                     Any[tril(A[1:3,4:6]), tril(A[4:6,7:9])], 'U')
-    bb = Any[b[1:3], b[4:6], b[7:9]]
-    @test vcat((Alb\bb)...) ≈ LowerTriangular(A)\b
-    @test vcat((Aub\bb)...) ≈ UpperTriangular(A)\b
-    Alb = Bidiagonal([tril(A[1:3,1:3]), tril(A[4:6,4:6]), tril(A[7:9,7:9])],
-                     [triu(A[4:6,1:3]), triu(A[7:9,4:6])], 'L')
-    Aub = Bidiagonal([triu(A[1:3,1:3]), triu(A[4:6,4:6]), triu(A[7:9,7:9])],
-                     [tril(A[1:3,4:6]), tril(A[4:6,7:9])], 'U')
-    d = [randn(3,3) for _ in 1:3]
-    dl = [randn(3,3) for _ in 1:2]
-    B = [randn(3,3) for _ in 1:3, _ in 1:3]
-    for W in (UpperTriangular, LowerTriangular), t in (identity, adjoint, transpose)
-        @test Matrix(t(Alb) \ W(B)) ≈ t(Alb) \ Matrix(W(B))
-        @test Matrix(t(Aub) \ W(B)) ≈ t(Aub) \ Matrix(W(B))
-        @test Matrix(W(B) / t(Alb)) ≈ Matrix(W(B)) / t(Alb)
-        @test Matrix(W(B) / t(Aub)) ≈ Matrix(W(B)) / t(Aub)
-    end
-end
-
-@testset "sum, mapreduce" begin
-    Bu = Bidiagonal([1,2,3], [1,2], :U)
-    Budense = Matrix(Bu)
-    Bl = Bidiagonal([1,2,3], [1,2], :L)
-    Bldense = Matrix(Bl)
-    @test sum(Bu) == 9
-    @test sum(Bl) == 9
-    @test_throws ArgumentError sum(Bu, dims=0)
-    @test sum(Bu, dims=1) == sum(Budense, dims=1)
-    @test sum(Bu, dims=2) == sum(Budense, dims=2)
-    @test sum(Bu, dims=3) == sum(Budense, dims=3)
-    @test typeof(sum(Bu, dims=1)) == typeof(sum(Budense, dims=1))
-    @test mapreduce(one, min, Bu, dims=1) == mapreduce(one, min, Budense, dims=1)
-    @test mapreduce(one, min, Bu, dims=2) == mapreduce(one, min, Budense, dims=2)
-    @test mapreduce(one, min, Bu, dims=3) == mapreduce(one, min, Budense, dims=3)
-    @test typeof(mapreduce(one, min, Bu, dims=1)) == typeof(mapreduce(one, min, Budense, dims=1))
-    @test mapreduce(zero, max, Bu, dims=1) == mapreduce(zero, max, Budense, dims=1)
-    @test mapreduce(zero, max, Bu, dims=2) == mapreduce(zero, max, Budense, dims=2)
-    @test mapreduce(zero, max, Bu, dims=3) == mapreduce(zero, max, Budense, dims=3)
-    @test typeof(mapreduce(zero, max, Bu, dims=1)) == typeof(mapreduce(zero, max, Budense, dims=1))
-    @test_throws ArgumentError sum(Bl, dims=0)
-    @test sum(Bl, dims=1) == sum(Bldense, dims=1)
-    @test sum(Bl, dims=2) == sum(Bldense, dims=2)
-    @test sum(Bl, dims=3) == sum(Bldense, dims=3)
-    @test typeof(sum(Bl, dims=1)) == typeof(sum(Bldense, dims=1))
-    @test mapreduce(one, min, Bl, dims=1) == mapreduce(one, min, Bldense, dims=1)
-    @test mapreduce(one, min, Bl, dims=2) == mapreduce(one, min, Bldense, dims=2)
-    @test mapreduce(one, min, Bl, dims=3) == mapreduce(one, min, Bldense, dims=3)
-    @test typeof(mapreduce(one, min, Bl, dims=1)) == typeof(mapreduce(one, min, Bldense, dims=1))
-    @test mapreduce(zero, max, Bl, dims=1) == mapreduce(zero, max, Bldense, dims=1)
-    @test mapreduce(zero, max, Bl, dims=2) == mapreduce(zero, max, Bldense, dims=2)
-    @test mapreduce(zero, max, Bl, dims=3) == mapreduce(zero, max, Bldense, dims=3)
-    @test typeof(mapreduce(zero, max, Bl, dims=1)) == typeof(mapreduce(zero, max, Bldense, dims=1))
-
-    Bu = Bidiagonal([2], Int[], :U)
-    Budense = Matrix(Bu)
-    Bl = Bidiagonal([2], Int[], :L)
-    Bldense = Matrix(Bl)
-    @test sum(Bu) == 2
-    @test sum(Bl) == 2
-    @test_throws ArgumentError sum(Bu, dims=0)
-    @test sum(Bu, dims=1) == sum(Budense, dims=1)
-    @test sum(Bu, dims=2) == sum(Budense, dims=2)
-    @test sum(Bu, dims=3) == sum(Budense, dims=3)
-    @test typeof(sum(Bu, dims=1)) == typeof(sum(Budense, dims=1))
-end
-
-@testset "empty sub-diagonal" begin
-    # `mul!` must use non-specialized method when sub-diagonal is empty
-    A = [1 2 3 4]'
-    @test A * Tridiagonal(ones(1, 1)) == A
-end
-
-@testset "generalized dot" begin
-    for elty in (Float64, ComplexF64), n in (5, 1)
-        dv = randn(elty, n)
-        ev = randn(elty, n-1)
-        x = randn(elty, n)
-        y = randn(elty, n)
-        for uplo in (:U, :L)
-            B = Bidiagonal(dv, ev, uplo)
-            @test dot(x, B, y) ≈ dot(B'x, y) ≈ dot(x, B*y) ≈ dot(x, Matrix(B), y)
-        end
-        dv = Vector{elty}(undef, 0)
-        ev = Vector{elty}(undef, 0)
-        x = Vector{elty}(undef, 0)
-        y = Vector{elty}(undef, 0)
-        for uplo in (:U, :L)
-            B = Bidiagonal(dv, ev, uplo)
-            @test dot(x, B, y) === zero(elty)
-        end
-    end
-end
-
-@testset "multiplication of bidiagonal and triangular matrix" begin
-    n = 5
-    for eltyB in (Int, ComplexF64)
-        if eltyB == Int
-            BU = Bidiagonal(rand(1:7, n), rand(1:7, n - 1), :U)
-            BL = Bidiagonal(rand(1:7, n), rand(1:7, n - 1), :L)
-        else
-            BU = Bidiagonal(randn(eltyB, n), randn(eltyB, n - 1), :U)
-            BL = Bidiagonal(randn(eltyB, n), randn(eltyB, n - 1), :L)
-        end
-        for eltyT in (Int, ComplexF64)
-            for TriT in (LowerTriangular, UnitLowerTriangular, UpperTriangular, UnitUpperTriangular)
-                if eltyT == Int
-                    T = TriT(rand(1:7, n, n))
-                else
-                    T = TriT(randn(eltyT, n, n))
-                end
-                for B in (BU, BL)
-                    MB = Matrix(B)
-                    MT = Matrix(T)
-                    for transB in (identity, adjoint, transpose), transT in (identity, adjoint, transpose)
-                        @test transB(B) * transT(T) ≈ transB(MB) * transT(MT)
-                        @test transT(T) * transB(B) ≈ transT(MT) * transB(MB)
-                    end
-                end
-            end
-        end
-    end
-end
-
-struct MyNotANumberType
-    n::Float64
-end
-Base.zero(n::MyNotANumberType)      = MyNotANumberType(zero(Float64))
-Base.zero(T::Type{MyNotANumberType}) = MyNotANumberType(zero(Float64))
-Base.copy(n::MyNotANumberType)      = MyNotANumberType(copy(n.n))
-Base.transpose(n::MyNotANumberType) = n
-
-@testset "transpose for a non-numeric eltype" begin
-    @test !(MyNotANumberType(1.0) isa Number)
-    a = [MyNotANumberType(1.0), MyNotANumberType(2.0), MyNotANumberType(3.0)]
-    b = [MyNotANumberType(5.0), MyNotANumberType(6.0)]
-    B = Bidiagonal(a, b, :U)
-    tB = transpose(B)
-    @test tB == Bidiagonal(a, b, :L)
-    @test transpose(copy(tB)) == B
-end
-
-@testset "empty bidiagonal matrices" begin
-    dv0 = zeros(0)
-    ev0 = zeros(0)
-    zm = zeros(0, 0)
-    ubd = Bidiagonal(dv0, ev0, :U)
-    lbd = Bidiagonal(dv0, ev0, :L)
-    @test size(ubd) == (0, 0)
-    @test_throws BoundsError getindex(ubd, 1, 1)
-    @test_throws BoundsError setindex!(ubd, 0.0, 1, 1)
-    @test similar(ubd) == ubd
-    @test similar(lbd, Int) == zeros(Int, 0, 0)
-    @test ubd == zm
-    @test lbd == zm
-    @test ubd == lbd
-    @test ubd * ubd == ubd
-    @test lbd + lbd == lbd
-    @test lbd' == ubd
-    @test ubd' == lbd
-    @test triu(ubd, 1) == ubd
-    @test triu(lbd, 1) == ubd
-    @test tril(ubd, -1) == ubd
-    @test tril(lbd, -1) == ubd
-    @test_throws ArgumentError triu(ubd)
-    @test_throws ArgumentError tril(ubd)
-    @test sum(ubd) == 0.0
-    @test reduce(+, ubd) == 0.0
-    @test reduce(+, ubd, dims=1) == zeros(1, 0)
-    @test reduce(+, ubd, dims=2) == zeros(0, 1)
-    @test hcat(ubd, ubd) == zm
-    @test vcat(ubd, lbd) == zm
-    @test hcat(lbd, ones(0, 3)) == ones(0, 3)
-    @test fill!(copy(ubd), 1.0) == ubd
-    @test map(abs, ubd) == zm
-    @test lbd .+ 1 == zm
-    @test lbd + ubd isa Bidiagonal
-    @test lbd .+ ubd isa Bidiagonal
-    @test ubd * 5 == ubd
-    @test ubd .* 3 == ubd
-end
-
-@testset "non-commutative algebra (#39701)" begin
-    A = Bidiagonal(Quaternion.(randn(5), randn(5), randn(5), randn(5)), Quaternion.(randn(4), randn(4), randn(4), randn(4)), :U)
-    c = Quaternion(1,2,3,4)
-    @test A * c ≈ Matrix(A) * c
-    @test A / c ≈ Matrix(A) / c
-    @test c * A ≈ c * Matrix(A)
-    @test c \ A ≈ c \ Matrix(A)
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    dv = ImmutableArray([1, 2, 3, 4])
-    ev = ImmutableArray([7, 8, 9])
-    Bu = Bidiagonal(dv, ev, :U)
-    Bl = Bidiagonal(dv, ev, :L)
-
-    @test convert(AbstractArray{Float64}, Bu)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bu
-    @test convert(AbstractMatrix{Float64}, Bu)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bu
-    @test convert(AbstractArray{Float64}, Bl)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bl
-    @test convert(AbstractMatrix{Float64}, Bl)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bl
-end
-
-@testset "block-bidiagonal matrix indexing" begin
-    dv = [ones(4,3), ones(2,2).*2, ones(2,3).*3, ones(4,4).*4]
-    evu = [ones(4,2), ones(2,3).*2, ones(2,4).*3]
-    evl = [ones(2,3), ones(2,2).*2, ones(4,3).*3]
-    BU = Bidiagonal(dv, evu, :U)
-    BL = Bidiagonal(dv, evl, :L)
-    # check that all the matrices along a column have the same number of columns,
-    # and the matrices along a row have the same number of rows
-    for j in axes(BU, 2), i in 2:size(BU, 1)
-        @test size(BU[i,j], 2) == size(BU[1,j], 2)
-        @test size(BU[i,j], 1) == size(BU[i,1], 1)
-        if j < i || j > i + 1
-            @test iszero(BU[i,j])
-        end
-    end
-    for j in axes(BL, 2), i in 2:size(BL, 1)
-        @test size(BL[i,j], 2) == size(BL[1,j], 2)
-        @test size(BL[i,j], 1) == size(BL[i,1], 1)
-        if j < i-1 || j > i
-            @test iszero(BL[i,j])
-        end
-    end
-
-    M = ones(2,2)
-    for n in 0:1
-        dv = fill(M, n)
-        ev = fill(M, 0)
-        B = Bidiagonal(dv, ev, :U)
-        @test B == Matrix{eltype(B)}(B)
-    end
-end
-
-@testset "copyto! with UniformScaling" begin
-    @testset "Fill" begin
-        for len in (4, InfiniteArrays.Infinity())
-            d = FillArrays.Fill(1, len)
-            ud = FillArrays.Fill(0, len-1)
-            B = Bidiagonal(d, ud, :U)
-            @test copyto!(B, I) === B
-        end
-    end
-    B = Bidiagonal(fill(2, 4), fill(3, 3), :U)
-    copyto!(B, I)
-    @test all(isone, diag(B))
-    @test all(iszero, diag(B, 1))
-end
-
-end # module TestBidiagonal
diff --git a/stdlib/LinearAlgebra/test/blas.jl b/stdlib/LinearAlgebra/test/blas.jl
deleted file mode 100644
index 4252d9ee7938b..0000000000000
--- a/stdlib/LinearAlgebra/test/blas.jl
+++ /dev/null
@@ -1,724 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestBLAS
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasReal, BlasComplex
-using Libdl: dlsym, dlopen
-fabs(x::Real) = abs(x)
-fabs(x::Complex) = abs(real(x)) + abs(imag(x))
-
-# help function to build packed storage
-function pack(A, uplo)
-    AP = eltype(A)[]
-    n = size(A, 1)
-    for j in 1:n, i in (uplo === :L ? (j:n) : (1:j))
-        push!(AP, A[i,j])
-    end
-    return AP
-end
-
-@testset "vec_pointer_stride" begin
-    a = float(rand(1:20,4,4,4))
-    @test BLAS.asum(a) == sum(a) # dense case
-    @test BLAS.asum(view(a,1:2:4,:,:)) == sum(view(a,1:2:4,:,:)) # vector like
-    @test BLAS.asum(view(a,1:3,2:2,3:3)) == sum(view(a,1:3,2:2,3:3))
-    @test BLAS.asum(view(a,1:1,1:3,1:1)) == sum(view(a,1:1,1:3,1:1))
-    @test BLAS.asum(view(a,1:1,1:1,1:3)) == sum(view(a,1:1,1:1,1:3))
-    @test_throws ArgumentError BLAS.asum(view(a,1:3:4,:,:)) # non-vector like
-    @test_throws ArgumentError BLAS.asum(view(a,1:2,1:1,1:3))
-end
-Random.seed!(100)
-## BLAS tests - testing the interface code to BLAS routines
-@testset for elty in [Float32, Float64, ComplexF32, ComplexF64]
-
-    @testset "syr2k!" begin
-        U = randn(elty, 5, 2)
-        V = randn(elty, 5, 2)
-        @test tril(LinearAlgebra.BLAS.syr2k('L','N',U,V)) ≈ tril(U*transpose(V) + V*transpose(U))
-        @test triu(LinearAlgebra.BLAS.syr2k('U','N',U,V)) ≈ triu(U*transpose(V) + V*transpose(U))
-        @test tril(LinearAlgebra.BLAS.syr2k('L','T',U,V)) ≈ tril(transpose(U)*V + transpose(V)*U)
-        @test triu(LinearAlgebra.BLAS.syr2k('U','T',U,V)) ≈ triu(transpose(U)*V + transpose(V)*U)
-    end
-
-    if elty in (ComplexF32, ComplexF64)
-        @testset "her2k!" begin
-            U = randn(elty, 5, 2)
-            V = randn(elty, 5, 2)
-            @test tril(LinearAlgebra.BLAS.her2k('L','N',U,V)) ≈ tril(U*V' + V*U')
-            @test triu(LinearAlgebra.BLAS.her2k('U','N',U,V)) ≈ triu(U*V' + V*U')
-            @test tril(LinearAlgebra.BLAS.her2k('L','C',U,V)) ≈ tril(U'*V + V'*U)
-            @test triu(LinearAlgebra.BLAS.her2k('U','C',U,V)) ≈ triu(U'*V + V'*U)
-        end
-    end
-
-    o4 = fill(elty(1), 4)
-    z4 = zeros(elty, 4)
-
-    I4 = Matrix{elty}(I, 4, 4)
-    I43 = Matrix{elty}(I, 4, 3)
-    L4 = tril(fill(elty(1), 4,4))
-    U4 = triu(fill(elty(1), 4,4))
-    Z4 = zeros(elty, (4,4))
-
-    elm1 = elty(-1)
-    el2 = elty(2)
-    v14 = elty[1:4;]
-    v41 = elty[4:-1:1;]
-
-    let n = 10
-        @testset "dot products" begin
-            if elty <: Real
-                x1 = randn(elty, n)
-                x2 = randn(elty, n)
-                @test BLAS.dot(x1,x2) ≈ sum(x1.*x2)
-                @test_throws DimensionMismatch BLAS.dot(x1,rand(elty, n + 1))
-            else
-                z1 = randn(elty, n)
-                z2 = randn(elty, n)
-                @test BLAS.dotc(z1,z2) ≈ sum(conj(z1).*z2)
-                @test BLAS.dotu(z1,z2) ≈ sum(z1.*z2)
-                @test_throws DimensionMismatch BLAS.dotc(z1,rand(elty, n + 1))
-                @test_throws DimensionMismatch BLAS.dotu(z1,rand(elty, n + 1))
-            end
-        end
-        @testset "iamax" begin
-            x = randn(elty, n)
-            @test BLAS.iamax(x) == findmax(fabs, x)[2]
-        end
-        @testset "rot!" begin
-            x = randn(elty, n)
-            y = randn(elty, n)
-            c = rand(real(elty))
-            for sty in unique!([real(elty), elty])
-                s = rand(sty)
-                x2 = copy(x)
-                y2 = copy(y)
-                BLAS.rot!(n, x, 1, y, 1, c, s)
-                @test x ≈ c*x2 + s*y2
-                @test y ≈ -conj(s)*x2 + c*y2
-            end
-        end
-        @testset "axp(b)y" begin
-            x1 = randn(elty, n)
-            x2 = randn(elty, n)
-            α  = rand(elty)
-            β  = rand(elty)
-            for X1 in (x1, view(x1,n:-1:1)), X2 in (x2, view(x2, n:-1:1))
-                @test BLAS.axpy!(α,deepcopy(X1),deepcopy(X2)) ≈ α*X1 + X2
-                @test BLAS.axpby!(α,deepcopy(X1),β,deepcopy(X2)) ≈ α*X1 + β*X2
-            end
-            for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1)
-                @test BLAS.axpy!(α,copy(x1),ind1,copy(x2),ind2) ≈ x2 + α*(ind1 == ind2 ? x1 : reverse(x1))
-            end
-            @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), rand(elty, n + 1))
-            @test_throws DimensionMismatch BLAS.axpby!(α, copy(x1), β, rand(elty, n + 1))
-            @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 1:n)
-            @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 0:div(n,2), copy(x2), 1:(div(n, 2) + 1))
-            @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 0:(div(n, 2) - 1))
-        end
-        @testset "nrm2, iamax, and asum for StridedVectors" begin
-            a = rand(elty,n)
-            for ind in (2:2:n, n:-2:2)
-                b = view(a, ind, 1)
-                @test BLAS.nrm2(b) ≈ sqrt(sum(abs2, b))
-                @test BLAS.asum(b) ≈ sum(fabs, b)
-                @test BLAS.iamax(b) == findmax(fabs, b)[2] * (step(ind) >= 0)
-            end
-        end
-        @testset "scal" begin
-            α = rand(elty)
-            a = rand(elty,n)
-            @test BLAS.scal(n,α,a,1) ≈ α * a
-            for v in (a, view(a, n:-1:1))
-                @test BLAS.scal!(α, deepcopy(v)) ≈ α * v
-            end
-        end
-
-        @testset "ger, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n), view(rand(elty,n), n:-1:1)),
-            y in (rand(elty,n), view(rand(elty,3n), 1:3:3n), view(rand(elty,2n), 2n:-2:2))
-
-            A = rand(elty,n,n)
-            α = rand(elty)
-
-            @test BLAS.ger!(α,x,y,copy(A)) ≈ A + α*x*y'
-            @test_throws DimensionMismatch BLAS.ger!(α,Vector{elty}(undef,n+1),y,copy(A))
-
-            A = rand(elty,n,n)
-            A = A + transpose(A)
-            @test issymmetric(A)
-            @test triu(BLAS.syr!('U',α,x,copy(A))) ≈ triu(A + α*x*transpose(x))
-            @test_throws DimensionMismatch BLAS.syr!('U',α,Vector{elty}(undef,n+1),copy(A))
-
-            if elty <: Complex
-                A = rand(elty,n,n)
-                A = A + A'
-                α = real(α)
-                @test triu(BLAS.her!('U',α,x,copy(A))) ≈ triu(A + α*x*x')
-                @test_throws DimensionMismatch BLAS.her!('U',α,Vector{elty}(undef,n+1),copy(A))
-            end
-        end
-        @testset "copy" begin
-            x1 = randn(elty, n)
-            x2 = randn(elty, n)
-            for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1)
-                @test x2 === BLAS.copyto!(x2, ind1, x1, ind2) == (ind1 == ind2 ? x1 : reverse(x1))
-            end
-            @test_throws DimensionMismatch BLAS.copyto!(x2, 1:n, x1, 1:(n - 1))
-            @test_throws ArgumentError BLAS.copyto!(x1, 0:div(n, 2), x2, 1:(div(n, 2) + 1))
-            @test_throws ArgumentError BLAS.copyto!(x1, 1:(div(n, 2) + 1), x2, 0:div(n, 2))
-        end
-        @testset "trmv and trsv" begin
-            A = rand(elty,n,n)
-            x = rand(elty,n)
-            xerr = Vector{elty}(undef,n+1)
-            for uplo in ('U', 'L'), diag in ('U','N'), trans in ('N', 'T', 'C')
-                Wrapper = if uplo == 'U'
-                    diag == 'U' ? UnitUpperTriangular : UpperTriangular
-                else
-                    diag == 'U' ? UnitLowerTriangular : LowerTriangular
-                end
-                fun = trans == 'N' ? identity : trans == 'T' ? transpose : adjoint
-                fullA = collect(fun(Wrapper(A)))
-                @testset "trmv" begin
-                    @test BLAS.trmv(uplo,trans,diag,A,x) ≈ fullA * x
-                    @test_throws DimensionMismatch BLAS.trmv(uplo,trans,diag,A,xerr)
-                    for xx in (x, view(x, n:-1:1))
-                        @test BLAS.trmv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA * xx
-                    end
-                end
-                @testset "trsv" begin
-                    @test BLAS.trsv(uplo,trans,diag,A,x) ≈ fullA \ x
-                    @test_throws DimensionMismatch BLAS.trsv(uplo,trans,diag,A,xerr)
-                    for xx in (x, view(x, n:-1:1))
-                        @test BLAS.trsv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA \ xx
-                    end
-                end
-            end
-        end
-        @testset "symmetric/Hermitian multiplication" begin
-            x = rand(elty,n)
-            A = rand(elty,n,n)
-            y = rand(elty, n)
-            α = randn(elty)
-            β = randn(elty)
-            Aherm = A + A'
-            Asymm = A + transpose(A)
-            offsizevec, offsizemat = Array{elty}.(undef,(n+1, (n,n+1)))
-            @testset "symv and hemv" for uplo in ('U', 'L')
-                @test BLAS.symv(uplo,Asymm,x) ≈ Asymm*x
-                for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                    @test BLAS.symv!(uplo,α,Asymm,xx,β,deepcopy(yy)) ≈ α * Asymm * xx + β * yy
-                end
-                @test_throws DimensionMismatch BLAS.symv!(uplo,α,Asymm,x,β,offsizevec)
-                @test_throws DimensionMismatch BLAS.symv(uplo,offsizemat,x)
-                if elty <: BlasComplex
-                    @test BLAS.hemv(uplo,Aherm,x) ≈ Aherm*x
-                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                        @test BLAS.hemv!(uplo,α,Aherm,xx,β,deepcopy(yy)) ≈ α * Aherm * xx + β * yy
-                    end
-                    @test_throws DimensionMismatch BLAS.hemv(uplo,offsizemat,x)
-                    @test_throws DimensionMismatch BLAS.hemv!(uplo,one(elty),Aherm,x,one(elty),offsizevec)
-                end
-            end
-
-            @testset "symm error throwing" begin
-                Cnn, Cnm, Cmn = Matrix{elty}.(undef,((n,n), (n,n-1), (n-1,n)))
-                @test_throws DimensionMismatch BLAS.symm('L','U',Cnm,Cnn)
-                @test_throws DimensionMismatch BLAS.symm('R','U',Cmn,Cnn)
-                @test_throws DimensionMismatch BLAS.symm!('L','U',one(elty),Asymm,Cnn,one(elty),Cmn)
-                @test_throws DimensionMismatch BLAS.symm!('L','U',one(elty),Asymm,Cnn,one(elty),Cnm)
-                @test_throws DimensionMismatch BLAS.symm!('L','U',one(elty),Asymm,Cmn,one(elty),Cnn)
-                @test_throws DimensionMismatch BLAS.symm!('R','U',one(elty),Asymm,Cnm,one(elty),Cmn)
-                @test_throws DimensionMismatch BLAS.symm!('R','U',one(elty),Asymm,Cnn,one(elty),Cnm)
-                @test_throws DimensionMismatch BLAS.symm!('R','U',one(elty),Asymm,Cmn,one(elty),Cnn)
-                if elty <: BlasComplex
-                    @test_throws DimensionMismatch BLAS.hemm('L','U',Cnm,Cnn)
-                    @test_throws DimensionMismatch BLAS.hemm('R','U',Cmn,Cnn)
-                    @test_throws DimensionMismatch BLAS.hemm!('L','U',one(elty),Aherm,Cnn,one(elty),Cmn)
-                    @test_throws DimensionMismatch BLAS.hemm!('L','U',one(elty),Aherm,Cnn,one(elty),Cnm)
-                    @test_throws DimensionMismatch BLAS.hemm!('L','U',one(elty),Aherm,Cmn,one(elty),Cnn)
-                    @test_throws DimensionMismatch BLAS.hemm!('R','U',one(elty),Aherm,Cnm,one(elty),Cmn)
-                    @test_throws DimensionMismatch BLAS.hemm!('R','U',one(elty),Aherm,Cnn,one(elty),Cnm)
-                    @test_throws DimensionMismatch BLAS.hemm!('R','U',one(elty),Aherm,Cmn,one(elty),Cnn)
-                end
-            end
-        end
-        @testset "trmm error throwing" begin
-            Cnn, Cmn, Cnm = Matrix{elty}.(undef,((n,n), (n+1,n), (n,n+1)))
-            @test_throws DimensionMismatch BLAS.trmm('L','U','N','N',one(elty),triu(Cnn),Cmn)
-            @test_throws DimensionMismatch BLAS.trmm('R','U','N','N',one(elty),triu(Cnn),Cnm)
-        end
-
-        # hpmv!
-        if elty in (ComplexF32, ComplexF64)
-            @testset "hpmv!" begin
-                # Both matrix dimensions n coincide, as we have Hermitian matrices.
-                # Define the inputs and outputs of hpmv!, y = α*A*x+β*y
-                α = rand(elty)
-                A = rand(elty, n, n)
-                x = rand(elty, n)
-                β = rand(elty)
-                y = rand(elty, n)
-                for uplo in (:L, :U)
-                    Cuplo = String(uplo)[1]
-                    AH = Hermitian(A, uplo)
-                    # Create lower/upper triangular packing of AL
-                    AP = pack(AH, uplo)
-                    for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1))
-                        @test BLAS.hpmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AH*xx + β*yy
-                    end
-                    AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1))
-                    @test_throws ErrorException BLAS.hpmv!(Cuplo, α, AP′, x, β, y)
-                    AP′ = view(AP, 1:length(AP′) - 1)
-                    @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, y)
-                    @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, view(y,1:n-1))
-                end
-            end
-        end
-
-        # spmv!
-        if elty in (Float32, Float64)
-            @testset "spmv!" begin
-                # Both matrix dimensions n coincide, as we have symmetric matrices.
-                # Define the inputs and outputs of spmv!, y = α*A*x+β*y
-                α = rand(elty)
-                A = rand(elty, n, n)
-                x = rand(elty, n)
-                β = rand(elty)
-                y = rand(elty, n)
-                for uplo in (:L, :U)
-                    Cuplo = String(uplo)[1]
-                    AS = Symmetric(A, uplo)
-                    # Create lower/upper triangular packing of AL
-                    AP = pack(AS, uplo)
-                    for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1))
-                        @test BLAS.spmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AS*xx + β*yy
-                    end
-                    AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1))
-                    @test_throws ErrorException BLAS.spmv!(Cuplo, α, AP′, x, β, y)
-                    AP′ = view(AP, 1:length(AP′) - 1)
-                    @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, y)
-                    @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, view(y,1:n-1))
-                end
-            end
-        end
-
-        # spr!
-        if elty in (Float32, Float64)
-            @testset "spr! $elty" begin
-                α = rand(elty)
-                M = rand(elty, n, n)
-                AL = Symmetric(M, :L)
-                AU = Symmetric(M, :U)
-                for x in (rand(elty, n), view(rand(elty, n), n:-1:1))
-                    ALP_result_julia_lower = pack(α*x*x' + AL, :L)
-                    ALP_result_blas_lower = pack(AL, :L)
-                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
-                    @test ALP_result_julia_lower ≈ ALP_result_blas_lower
-                    ALP_result_blas_lower = append!(pack(AL, :L), ones(elty, 10))
-                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
-                    @test ALP_result_julia_lower ≈ ALP_result_blas_lower[1:end-10]
-                    ALP_result_blas_lower = reshape(pack(AL, :L), 1, length(ALP_result_julia_lower), 1)
-                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
-                    @test ALP_result_julia_lower ≈ vec(ALP_result_blas_lower)
-
-                    AUP_result_julia_upper = pack(α*x*x' + AU, :U)
-                    AUP_result_blas_upper = pack(AU, :U)
-                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
-                    @test AUP_result_julia_upper ≈ AUP_result_blas_upper
-                    AUP_result_blas_upper = append!(pack(AU, :U), ones(elty, 10))
-                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
-                    @test AUP_result_julia_upper ≈ AUP_result_blas_upper[1:end-10]
-                    AUP_result_blas_upper = reshape(pack(AU, :U), 1, length(AUP_result_julia_upper), 1)
-                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
-                    @test AUP_result_julia_upper ≈ vec(AUP_result_blas_upper)
-                end
-            end
-        end
-
-        #trsm
-        A = triu(rand(elty,n,n))
-        B = rand(elty,(n,n))
-        @test BLAS.trsm('L','U','N','N',one(elty),A,B) ≈ A\B
-
-        #will work for SymTridiagonal,Tridiagonal,Bidiagonal!
-        @testset "banded matrix mv" begin
-            @testset "gbmv" begin
-                TD = Tridiagonal(rand(elty,n-1),rand(elty,n),rand(elty,n-1))
-                x  = rand(elty, n)
-                #put TD into the BLAS format!
-                fTD = zeros(elty,3,n)
-                fTD[1,2:n] = TD.du
-                fTD[2,:] = TD.d
-                fTD[3,1:n-1] = TD.dl
-                @test BLAS.gbmv('N',n,1,1,fTD,x) ≈ TD*x
-                y = rand(elty, n)
-                α = randn(elty)
-                β = randn(elty)
-                for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                    @test BLAS.gbmv!('N',n,1,1,α,fTD,xx,β,deepcopy(yy)) ≈ α * TD * xx + β * yy
-                end
-            end
-            #will work for SymTridiagonal only!
-            @testset "sbmv and hbmv" begin
-                x = rand(elty,n)
-                if elty <: BlasReal
-                    ST  = SymTridiagonal(rand(elty,n),rand(elty,n-1))
-                    #put TD into the BLAS format!
-                    fST = zeros(elty,2,n)
-                    fST[1,2:n] = ST.ev
-                    fST[2,:] = ST.dv
-                    @test BLAS.sbmv('U',1,fST,x) ≈ ST*x
-                    y = rand(elty, n)
-                    α = randn(elty)
-                    β = randn(elty)
-                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                        @test BLAS.sbmv!('U',1,α,fST,xx,β,deepcopy(yy)) ≈ α * ST * xx + β * yy
-                    end
-                else
-                    dv = rand(real(elty),n)
-                    ev = rand(elty,n-1)
-                    bH = zeros(elty,2,n)
-                    bH[1,2:n] = ev
-                    bH[2,:] = dv
-                    fullH = diagm(0 => dv, -1 => conj(ev), 1 => ev)
-                    @test BLAS.hbmv('U',1,bH,x) ≈ fullH*x
-                    y = rand(elty, n)
-                    α = randn(elty)
-                    β = randn(elty)
-                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                        @test BLAS.hbmv!('U',1,α,bH,xx,β,deepcopy(yy)) ≈ α * fullH * xx + β * yy
-                    end
-                end
-            end
-        end
-    end
-
-    @testset "gemv" begin
-        @test all(BLAS.gemv('N', I4, o4) .== o4)
-        @test all(BLAS.gemv('T', I4, o4) .== o4)
-        @test all(BLAS.gemv('N', el2, I4, o4) .== el2 * o4)
-        @test all(BLAS.gemv('T', el2, I4, o4) .== el2 * o4)
-        @test_throws DimensionMismatch BLAS.gemv('N',I43,o4)
-        o4cp = copy(o4)
-        @test_throws DimensionMismatch BLAS.gemv!('T',one(elty),I43,o4,one(elty),o4cp)
-        @test_throws DimensionMismatch BLAS.gemv!('C',one(elty),I43,o4,one(elty),o4cp)
-        @test all(BLAS.gemv!('N', one(elty), I4, o4, elm1, o4cp) .== z4)
-        @test all(o4cp .== z4)
-        o4cp[:] = o4
-        @test all(BLAS.gemv!('T', one(elty), I4, o4, elm1, o4cp) .== z4)
-        @test all(o4cp .== z4)
-        @test all(BLAS.gemv('N', U4, o4) .== v41)
-        @test all(BLAS.gemv('N', U4, o4) .== v41)
-        @testset "non-standard strides" begin
-            A = rand(elty, 3, 4)
-            x = rand(elty, 5)
-            for y = (view(ones(elty, 5), 1:2:5), view(ones(elty, 7), 6:-2:2))
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(x, 1:3:4), elty(3), y) ≈ 2*A[:,2:2:4]*x[1:3:4] + 3*ycopy
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, 4:-2:2), view(x, 1:3:4), elty(3), y) ≈ 2*A[:,4:-2:2]*x[1:3:4] + 3*ycopy
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(x, 4:-3:1), elty(3), y) ≈ 2*A[:,2:2:4]*x[4:-3:1] + 3*ycopy
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, 4:-2:2), view(x, 4:-3:1), elty(3), y) ≈ 2*A[:,4:-2:2]*x[4:-3:1] + 3*ycopy
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, StepRangeLen(1,0,1)), view(x, 1:1), elty(3), y) ≈ 2*A[:,1:1]*x[1:1] + 3*ycopy # stride(A,2) == 0
-            end
-            @test BLAS.gemv!('N', elty(1), zeros(elty, 0, 5), zeros(elty, 5), elty(1), zeros(elty, 0)) == elty[] # empty matrix, stride(A,2) == 0
-            @test BLAS.gemv('N', elty(-1), view(A, 2:3, 1:2:3), view(x, 2:-1:1)) ≈ -1*A[2:3,1:2:3]*x[2:-1:1]
-            @test BLAS.gemv('N', view(A, 2:3, 3:-2:1), view(x, 1:2:3)) ≈ A[2:3,3:-2:1]*x[1:2:3]
-            for (trans, f) = (('T',transpose), ('C',adjoint))
-                for y = (view(ones(elty, 3), 1:2:3), view(ones(elty, 5), 4:-2:2))
-                    ycopy = copy(y)
-                    @test BLAS.gemv!(trans, elty(2), view(A, :, 2:2:4), view(x, 1:2:5), elty(3), y) ≈ 2*f(A[:,2:2:4])*x[1:2:5] + 3*ycopy
-                    ycopy = copy(y)
-                    @test BLAS.gemv!(trans, elty(2), view(A, :, 4:-2:2), view(x, 1:2:5), elty(3), y) ≈ 2*f(A[:,4:-2:2])*x[1:2:5] + 3*ycopy
-                    ycopy = copy(y)
-                    @test BLAS.gemv!(trans, elty(2), view(A, :, 2:2:4), view(x, 5:-2:1), elty(3), y) ≈ 2*f(A[:,2:2:4])*x[5:-2:1] + 3*ycopy
-                    ycopy = copy(y)
-                    @test BLAS.gemv!(trans, elty(2), view(A, :, 4:-2:2), view(x, 5:-2:1), elty(3), y) ≈ 2*f(A[:,4:-2:2])*x[5:-2:1] + 3*ycopy
-                end
-                @test BLAS.gemv!(trans, elty(2), view(A, :, StepRangeLen(1,0,1)), view(x, 1:2:5), elty(3), elty[1]) ≈ 2*f(A[:,1:1])*x[1:2:5] + elty[3] # stride(A,2) == 0
-            end
-            for trans = ('N', 'T', 'C')
-                @test_throws ErrorException BLAS.gemv(trans, view(A, 1:2:3, 1:2), view(x, 1:2)) # stride(A,1) must be 1
-            end
-        end
-    end
-    @testset "gemm" begin
-        @test all(BLAS.gemm('N', 'N', I4, I4) .== I4)
-        @test all(BLAS.gemm('N', 'T', I4, I4) .== I4)
-        @test all(BLAS.gemm('T', 'N', I4, I4) .== I4)
-        @test all(BLAS.gemm('T', 'T', I4, I4) .== I4)
-        @test all(BLAS.gemm('N', 'N', el2, I4, I4) .== el2 * I4)
-        @test all(BLAS.gemm('N', 'T', el2, I4, I4) .== el2 * I4)
-        @test all(BLAS.gemm('T', 'N', el2, I4, I4) .== el2 * I4)
-        @test all(LinearAlgebra.BLAS.gemm('T', 'T', el2, I4, I4) .== el2 * I4)
-        I4cp = copy(I4)
-        @test all(BLAS.gemm!('N', 'N', one(elty), I4, I4, elm1, I4cp) .== Z4)
-        @test all(I4cp .== Z4)
-        I4cp[:] = I4
-        @test all(BLAS.gemm!('N', 'T', one(elty), I4, I4, elm1, I4cp) .== Z4)
-        @test all(I4cp .== Z4)
-        I4cp[:] = I4
-        @test all(BLAS.gemm!('T', 'N', one(elty), I4, I4, elm1, I4cp) .== Z4)
-        @test all(I4cp .== Z4)
-        I4cp[:] = I4
-        @test all(BLAS.gemm!('T', 'T', one(elty), I4, I4, elm1, I4cp) .== Z4)
-        @test all(I4cp .== Z4)
-        @test all(BLAS.gemm('N', 'N', I4, U4) .== U4)
-        @test all(BLAS.gemm('N', 'T', I4, U4) .== L4)
-        @test_throws DimensionMismatch BLAS.gemm!('N','N', one(elty), I4, I4, elm1, Matrix{elty}(I, 5, 5))
-        @test_throws DimensionMismatch BLAS.gemm!('N','N', one(elty), I43, I4, elm1, I4)
-        @test_throws DimensionMismatch BLAS.gemm!('T','N', one(elty), I43, I4, elm1, I43)
-        @test_throws DimensionMismatch BLAS.gemm!('N','T', one(elty), I43, I43, elm1, I43)
-        @test_throws DimensionMismatch BLAS.gemm!('T','T', one(elty), I43, I43, elm1, Matrix{elty}(I, 3, 4))
-    end
-    @testset "gemm compared to (sy)(he)rk" begin
-        if eltype(elm1) <: Complex
-            @test all(triu(BLAS.herk('U', 'N', U4)) .== triu(BLAS.gemm('N', 'T', U4, U4)))
-            @test all(tril(BLAS.herk('L', 'N', U4)) .== tril(BLAS.gemm('N', 'T', U4, U4)))
-            @test all(triu(BLAS.herk('U', 'N', L4)) .== triu(BLAS.gemm('N', 'T', L4, L4)))
-            @test all(tril(BLAS.herk('L', 'N', L4)) .== tril(BLAS.gemm('N', 'T', L4, L4)))
-            @test all(triu(BLAS.herk('U', 'C', U4)) .== triu(BLAS.gemm('T', 'N', U4, U4)))
-            @test all(tril(BLAS.herk('L', 'C', U4)) .== tril(BLAS.gemm('T', 'N', U4, U4)))
-            @test all(triu(BLAS.herk('U', 'C', L4)) .== triu(BLAS.gemm('T', 'N', L4, L4)))
-            @test all(tril(BLAS.herk('L', 'C', L4)) .== tril(BLAS.gemm('T', 'N', L4, L4)))
-            ans = similar(L4)
-            @test all(tril(BLAS.herk('L','C', L4)) .== tril(BLAS.herk!('L', 'C', real(one(elty)), L4, real(zero(elty)), ans)))
-            @test all(LinearAlgebra.copytri!(ans, 'L') .== LinearAlgebra.BLAS.gemm('T', 'N', L4, L4))
-            @test_throws DimensionMismatch BLAS.herk!('L','N',real(one(elty)),Matrix{elty}(I, 5, 5),real(one(elty)), Matrix{elty}(I, 6, 6))
-        else
-            @test all(triu(BLAS.syrk('U', 'N', U4)) .== triu(BLAS.gemm('N', 'T', U4, U4)))
-            @test all(tril(BLAS.syrk('L', 'N', U4)) .== tril(BLAS.gemm('N', 'T', U4, U4)))
-            @test all(triu(BLAS.syrk('U', 'N', L4)) .== triu(BLAS.gemm('N', 'T', L4, L4)))
-            @test all(tril(BLAS.syrk('L', 'N', L4)) .== tril(BLAS.gemm('N', 'T', L4, L4)))
-            @test all(triu(BLAS.syrk('U', 'T', U4)) .== triu(BLAS.gemm('T', 'N', U4, U4)))
-            @test all(tril(BLAS.syrk('L', 'T', U4)) .== tril(BLAS.gemm('T', 'N', U4, U4)))
-            @test all(triu(BLAS.syrk('U', 'T', L4)) .== triu(BLAS.gemm('T', 'N', L4, L4)))
-            @test all(tril(BLAS.syrk('L', 'T', L4)) .== tril(BLAS.gemm('T', 'N', L4, L4)))
-            ans = similar(L4)
-            @test all(tril(BLAS.syrk('L','T', L4)) .== tril(BLAS.syrk!('L', 'T', one(elty), L4, zero(elty), ans)))
-            @test all(LinearAlgebra.copytri!(ans, 'L') .== BLAS.gemm('T', 'N', L4, L4))
-            @test_throws DimensionMismatch BLAS.syrk!('L','N',one(elty), Matrix{elty}(I, 5, 5),one(elty), Matrix{elty}(I, 6, 6))
-        end
-    end
-end
-
-@testset "syr for eltype $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    A = rand(elty, 5, 5)
-    @test triu(A[1,:] * transpose(A[1,:])) ≈ BLAS.syr!('U', one(elty), A[1,:], zeros(elty, 5, 5))
-    @test tril(A[1,:] * transpose(A[1,:])) ≈ BLAS.syr!('L', one(elty), A[1,:], zeros(elty, 5, 5))
-    @test triu(A[1,:] * transpose(A[1,:])) ≈ BLAS.syr!('U', one(elty), view(A, 1, :), zeros(elty, 5, 5))
-    @test tril(A[1,:] * transpose(A[1,:])) ≈ BLAS.syr!('L', one(elty), view(A, 1, :), zeros(elty, 5, 5))
-end
-
-@testset "her for eltype $elty" for elty in (ComplexF32, ComplexF64)
-    A = rand(elty, 5, 5)
-    @test triu(A[1,:] * A[1,:]') ≈ BLAS.her!('U', one(real(elty)), A[1,:], zeros(elty, 5, 5))
-    @test tril(A[1,:] * A[1,:]') ≈ BLAS.her!('L', one(real(elty)), A[1,:], zeros(elty, 5, 5))
-    @test triu(A[1,:] * A[1,:]') ≈ BLAS.her!('U', one(real(elty)), view(A, 1, :), zeros(elty, 5, 5))
-    @test tril(A[1,:] * A[1,:]') ≈ BLAS.her!('L', one(real(elty)), view(A, 1, :), zeros(elty, 5, 5))
-end
-
-struct WrappedArray{T,N} <: AbstractArray{T,N}
-    A::Array{T,N}
-end
-
-Base.size(A::WrappedArray) = size(A.A)
-Base.getindex(A::WrappedArray, i::Int) = A.A[i]
-Base.getindex(A::WrappedArray{T, N}, I::Vararg{Int, N}) where {T, N} = A.A[I...]
-Base.setindex!(A::WrappedArray, v, i::Int) = setindex!(A.A, v, i)
-Base.setindex!(A::WrappedArray{T, N}, v, I::Vararg{Int, N}) where {T, N} = setindex!(A.A, v, I...)
-Base.unsafe_convert(::Type{Ptr{T}}, A::WrappedArray{T}) where T = Base.unsafe_convert(Ptr{T}, A.A)
-
-Base.strides(A::WrappedArray) = strides(A.A)
-Base.elsize(::Type{WrappedArray{T,N}}) where {T,N} = Base.elsize(Array{T,N})
-
-@testset "strided interface adjtrans" begin
-    x = WrappedArray([1, 2, 3, 4])
-    @test stride(x,1) == 1
-    @test stride(x,2) == stride(x,3) == 4
-    @test strides(x') == strides(transpose(x)) == (4,1)
-    @test pointer(x') == pointer(transpose(x)) == pointer(x)
-    @test_throws BoundsError stride(x,0)
-
-    A = WrappedArray([1 2; 3 4; 5 6])
-    @test stride(A,1) == 1
-    @test stride(A,2) == 3
-    @test stride(A,3) == stride(A,4) >= 6
-    @test strides(A') == strides(transpose(A)) == (3,1)
-    @test pointer(A') == pointer(transpose(A)) == pointer(A)
-    @test_throws BoundsError stride(A,0)
-
-    y = WrappedArray([1+im, 2, 3, 4])
-    @test strides(transpose(y)) == (4,1)
-    @test pointer(transpose(y)) == pointer(y)
-    @test_throws MethodError strides(y')
-    @test_throws ErrorException pointer(y')
-
-    B = WrappedArray([1+im 2; 3 4; 5 6])
-    @test strides(transpose(B)) == (3,1)
-    @test pointer(transpose(B)) == pointer(B)
-    @test_throws MethodError strides(B')
-    @test_throws ErrorException pointer(B')
-
-    @test_throws MethodError stride(1:5,0)
-    @test_throws MethodError stride(1:5,1)
-    @test_throws MethodError stride(1:5,2)
-    @test_throws MethodError strides(transpose(1:5))
-    @test_throws MethodError strides((1:5)')
-    @test_throws ErrorException pointer(transpose(1:5))
-    @test_throws ErrorException pointer((1:5)')
-end
-
-@testset "strided interface blas" begin
-    for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    # Level 1
-        x = WrappedArray(elty[1, 2, 3, 4])
-        y = WrappedArray(elty[5, 6, 7, 8])
-        BLAS.blascopy!(2, x, 1, y, 2)
-        @test y == WrappedArray(elty[1, 6, 2, 8])
-        BLAS.scal!(2, elty(2), x, 1)
-        @test x == WrappedArray(elty[2, 4, 3, 4])
-        @test BLAS.nrm2(1, x, 2) == elty(2)
-        @test BLAS.nrm2(x) == BLAS.nrm2(x.A)
-        BLAS.asum(x) == elty(13)
-        BLAS.axpy!(4, elty(2), x, 1, y, 1)
-        @test y == WrappedArray(elty[5, 14, 8, 16])
-        BLAS.axpby!(elty(2), x, elty(3), y)
-        @test y == WrappedArray(elty[19, 50, 30, 56])
-        @test BLAS.iamax(x) == 2
-
-        M = fill(elty(1.0), 3, 3)
-        @test BLAS.scal!(elty(2), view(M,:,2)) === view(M,:,2)
-        @test BLAS.scal!(elty(3), view(M,3,:)) === view(M,3,:)
-        @test M == elty[1. 2. 1.; 1. 2. 1.; 3. 6. 3.]
-    # Level 2
-        A = WrappedArray(elty[1 2; 3 4])
-        x = WrappedArray(elty[1, 2])
-        y = WrappedArray(elty[3, 4])
-        @test BLAS.gemv!('N', elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[13, 26])
-        @test BLAS.gbmv!('N', 2, 1, 0, elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[15, 40])
-        @test BLAS.symv!('U', elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[25, 60])
-        @test BLAS.trmv!('U', 'N', 'N', A, y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[145, 240])
-        @test BLAS.trsv!('U', 'N', 'N', A, y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[25,60])
-        @test BLAS.ger!(elty(2), x, y, A) isa WrappedArray{elty,2}
-        @test A == WrappedArray(elty[51 122; 103 244])
-        @test BLAS.syr!('L', elty(2), x, A) isa WrappedArray{elty,2}
-        @test A == WrappedArray(elty[53 122; 107 252])
-    # Level 3
-        A = WrappedArray(elty[1 2; 3 4])
-        B = WrappedArray(elty[5 6; 7 8])
-        C = WrappedArray(elty[9 10; 11 12])
-        BLAS.gemm!('N', 'N', elty(2), A, B, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([47 54; 97 112])
-        BLAS.symm!('L', 'U', elty(2), A, B, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([85 98; 173 200])
-        BLAS.syrk!('U', 'N', elty(2), A, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([95 120; 173 250])
-        BLAS.syr2k!('U', 'N', elty(2), A, B, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([163 244; 173 462])
-        BLAS.trmm!('L', 'U', 'N', 'N', elty(2), A, B) isa WrappedArray{elty,2}
-        @test B == WrappedArray([38 44; 56 64])
-        BLAS.trsm!('L', 'U', 'N', 'N', elty(2), A, B) isa WrappedArray{elty,2}
-        @test B == WrappedArray([20 24; 28 32])
-    end
-    for elty in (Float32, Float64)
-    # Level 1
-        x = WrappedArray(elty[1, 2, 3, 4])
-        y = WrappedArray(elty[5, 6, 7, 8])
-        @test BLAS.dot(2, x, 1, y, 2) == elty(19)
-    # Level 2
-        A = WrappedArray(elty[1 2; 3 4])
-        x = WrappedArray(elty[1, 2])
-        y = WrappedArray(elty[3, 4])
-        BLAS.sbmv!('U', 1, elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[17,24])
-    end
-    for elty in (ComplexF32, ComplexF64)
-    # Level 1
-        x = WrappedArray(elty[1+im, 2+2im, 3+3im, 4+im])
-        y = WrappedArray(elty[5-im, 6-2im, 7-3im, 8-im])
-        @test BLAS.dotc(2, x, 1, y, 2) == elty(12-26im)
-        @test BLAS.dotu(2, x, 1, y, 2) == elty(26+12im)
-    # Level 2
-        A = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im])
-        x = WrappedArray(elty[1+im, 2+2im])
-        y = WrappedArray(elty[5-im, 6-2im])
-        @test BLAS.hemv!('U', elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[7+17im, 30+14im])
-        BLAS.hbmv!('U', 1, elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[13+39im, 54+30im])
-        @test BLAS.her!('L', real(elty(2)), x, A) isa WrappedArray{elty,2}
-        @test A == WrappedArray(elty[5 2+2im; 11+3im 20])
-    # Level 3
-        A = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im])
-        B = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im])
-        C = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im])
-        @test BLAS.hemm!('L', 'U', elty(2), A, B, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([3+27im 6+38im; 35+27im 52+36im])
-        @test BLAS.herk!('U', 'N', real(elty(2)), A, real(elty(1)), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([23 50+38im; 35+27im 152])
-        @test BLAS.her2k!('U', 'N', elty(2), A, B, real(elty(1)), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([63 138+38im; 35+27im 352])
-    end
-end
-
-@testset "get_set_num_threads" begin
-    default = BLAS.get_num_threads()
-    @test default isa Int
-    @test default > 0
-    BLAS.set_num_threads(1)
-    @test BLAS.get_num_threads() === 1
-    BLAS.set_num_threads(default)
-    @test BLAS.get_num_threads() === default
-end
-
-@testset "test for 0-strides" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    A = randn(elty, 10, 10);
-    a = view([randn(elty)], 1 .+ 0(1:10))
-    b = view([randn(elty)], 1 .+ 0(1:10))
-    α, β = randn(elty), randn(elty)
-    @testset "dot/dotc/dotu" begin
-        if elty <: Real
-            @test BLAS.dot(a,b) ≈ sum(a.*b)
-        else
-            @test BLAS.dotc(a,b) ≈ sum(conj(a).*b)
-            @test BLAS.dotu(a,b) ≈ sum(a.*b)
-        end
-    end
-    @testset "axp(b)y!" begin
-        @test BLAS.axpy!(α,a,copy(b)) ≈ α*a + b
-        @test BLAS.axpby!(α,a,β,copy(b)) ≈ α*a + β*b
-        @test_throws "dest" BLAS.axpy!(α,a,b)
-        @test_throws "dest" BLAS.axpby!(α,a,β,b)
-    end
-    @test BLAS.iamax(a) == 0
-    @test_throws "dest" BLAS.scal!(b[1], a)
-    @testset "nrm2/asum" begin # OpenBLAS always return 0.0
-        @test_throws "input" BLAS.nrm2(a)
-        @test_throws "input" BLAS.asum(a)
-    end
-    # All level2 reject 0-stride array.
-    @testset "gemv!" begin
-        @test_throws "input" BLAS.gemv!('N', true, A, a, false, copy(b))
-        @test_throws "dest" BLAS.gemv!('N', true, A, copy(a), false, b)
-    end
-end
-
-# Make sure we can use `Base.libblas_name`.  Avoid causing
-# https://github.com/JuliaLang/julia/issues/48427 again.
-@testset "libblas_name" begin
-    dot_sym = dlsym(dlopen(Base.libblas_name), "cblas_ddot" * (Sys.WORD_SIZE == 64 ? "64_" : ""))
-    @test 23.0 === @ccall $(dot_sym)(2::Int, [2.0, 3.0]::Ref{Cdouble}, 1::Int, [4.0, 5.0]::Ref{Cdouble}, 1::Int)::Cdouble
-end
-
-end # module TestBLAS
diff --git a/stdlib/LinearAlgebra/test/bunchkaufman.jl b/stdlib/LinearAlgebra/test/bunchkaufman.jl
deleted file mode 100644
index 613e4d09a3cc6..0000000000000
--- a/stdlib/LinearAlgebra/test/bunchkaufman.jl
+++ /dev/null
@@ -1,199 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestBunchKaufman
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
-using Base: getproperty
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(12343212)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-a2real = randn(n,n)/2
-a2img  = randn(n,n)/2
-breal = randn(n,2)/2
-bimg  = randn(n,2)/2
-
-@testset "$eltya argument A" for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    a2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-    asym = transpose(a) + a                  # symmetric indefinite
-    aher = a' + a                  # Hermitian indefinite
-    apd  = a' * a                  # Positive-definite
-    for (a, a2, aher, apd) in ((a, a2, aher, apd),
-                               (view(a, 1:n, 1:n),
-                                view(a2, 1:n, 1:n),
-                                view(aher, 1:n, 1:n),
-                                view(apd , 1:n, 1:n)))
-        ε = εa = eps(abs(float(one(eltya))))
-
-        # check that factorize gives a Bunch-Kaufman
-        @test isa(factorize(asym), LinearAlgebra.BunchKaufman)
-        @test isa(factorize(aher), LinearAlgebra.BunchKaufman)
-        @testset "$uplo Bunch-Kaufman factor of indefinite matrix" for uplo in (:L, :U)
-            bc1 = bunchkaufman(Hermitian(aher, uplo))
-            @test LinearAlgebra.issuccess(bc1)
-            @test logabsdet(bc1)[1] ≈ log(abs(det(bc1)))
-            if eltya <: Real
-                @test logabsdet(bc1)[2] == sign(det(bc1))
-            else
-                @test logabsdet(bc1)[2] ≈ sign(det(bc1))
-            end
-            @test inv(bc1)*aher ≈ Matrix(I, n, n)
-            @testset for rook in (false, true)
-                @test inv(bunchkaufman(Symmetric(transpose(a) + a, uplo), rook))*(transpose(a) + a) ≈ Matrix(I, n, n)
-                if eltya <: BlasFloat
-                    # test also bunchkaufman! without explicit type tag
-                    # no bunchkaufman! method for Int ... yet
-                    @test inv(bunchkaufman!(transpose(a) + a, rook))*(transpose(a) + a) ≈ Matrix(I, n, n)
-                end
-                @test size(bc1) == size(bc1.LD)
-                @test size(bc1, 1) == size(bc1.LD, 1)
-                @test size(bc1, 2) == size(bc1.LD, 2)
-                if eltya <: BlasReal
-                    @test_throws ArgumentError bunchkaufman(a)
-                end
-                # Test extraction of factors
-                if eltya <: Real
-                    @test getproperty(bc1, uplo)*bc1.D*getproperty(bc1, uplo)' ≈ aher[bc1.p, bc1.p]
-                    @test getproperty(bc1, uplo)*bc1.D*getproperty(bc1, uplo)' ≈ bc1.P*aher*bc1.P'
-                end
-
-                bc1 = bunchkaufman(Symmetric(asym, uplo))
-                @test getproperty(bc1, uplo)*bc1.D*transpose(getproperty(bc1, uplo)) ≈ asym[bc1.p, bc1.p]
-                @test getproperty(bc1, uplo)*bc1.D*transpose(getproperty(bc1, uplo)) ≈ bc1.P*asym*transpose(bc1.P)
-                @test_throws ErrorException bc1.Z
-                @test_throws ArgumentError uplo === :L ? bc1.U : bc1.L
-            end
-            # test Base.iterate
-            ref_objs = (bc1.D, uplo === :L ? bc1.L : bc1.U, bc1.p)
-            for (bki, bkobj) in enumerate(bc1)
-                @test bkobj == ref_objs[bki]
-            end
-            if eltya <: BlasFloat
-                @test convert(LinearAlgebra.BunchKaufman{eltya}, bc1) === bc1
-                @test convert(LinearAlgebra.Factorization{eltya}, bc1) === bc1
-                if eltya <: BlasReal
-                    @test convert(LinearAlgebra.Factorization{Float16}, bc1) == convert(LinearAlgebra.BunchKaufman{Float16}, bc1)
-                elseif eltya <: BlasComplex
-                    @test convert(LinearAlgebra.Factorization{ComplexF16}, bc1) == convert(LinearAlgebra.BunchKaufman{ComplexF16}, bc1)
-                end
-            end
-            @test Base.propertynames(bc1) == (:p, :P, :L, :U, :D)
-        end
-
-        @testset "$eltyb argument B" for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-            b = eltyb == Int ? rand(1:5, n, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-            for b in (b, view(b, 1:n, 1:2))
-                εb = eps(abs(float(one(eltyb))))
-                ε = max(εa,εb)
-
-                @testset "$uplo Bunch-Kaufman factor of indefinite matrix" for uplo in (:L, :U)
-                    bc1 = bunchkaufman(Hermitian(aher, uplo))
-                    @test aher*(bc1\b) ≈ b atol=1000ε
-                end
-
-                @testset "$uplo Bunch-Kaufman factors of a pos-def matrix" for uplo in (:U, :L)
-                    @testset "rook pivoting: $rook" for rook in (false, true)
-                        bc2 = bunchkaufman(Hermitian(apd, uplo), rook)
-                        @test LinearAlgebra.issuccess(bc2)
-                        bks = split(sprint(show, "text/plain", bc2), "\n")
-                        @test bks[1] == summary(bc2)
-                        @test bks[2] == "D factor:"
-                        @test bks[4+n] == "$uplo factor:"
-                        @test bks[6+2n] == "permutation:"
-                        @test logdet(bc2) ≈ log(det(bc2))
-                        @test logabsdet(bc2)[1] ≈ log(abs(det(bc2)))
-                        @test logabsdet(bc2)[2] == sign(det(bc2))
-                        @test inv(bc2)*apd ≈ Matrix(I, n, n)
-                        @test apd*(bc2\b) ≈ b rtol=eps(cond(apd))
-                        @test ishermitian(bc2)
-                        @test !issymmetric(bc2) || eltya <: Real
-                    end
-                end
-            end
-        end
-    end
-end
-
-@testset "Singular matrices" begin
-    R = Float64[1 0; 0 0]
-    C = ComplexF64[1 0; 0 0]
-    for A in (R, Symmetric(R), C, Hermitian(C))
-        @test_throws SingularException bunchkaufman(A)
-        @test_throws SingularException bunchkaufman!(copy(A))
-        @test_throws SingularException bunchkaufman(A; check = true)
-        @test_throws SingularException bunchkaufman!(copy(A); check = true)
-        @test !issuccess(bunchkaufman(A; check = false))
-        @test !issuccess(bunchkaufman!(copy(A); check = false))
-    end
-    F = bunchkaufman(R; check = false)
-    @test sprint(show, "text/plain", F) == "Failed factorization of type $(typeof(F))"
-end
-
-@testset "test example due to @timholy in PR 15354" begin
-    A = rand(6,5); A = complex(A'*A) # to avoid calling the real-lhs-complex-rhs method
-    F = cholesky(A);
-    v6 = rand(ComplexF64, 6)
-    v5 = view(v6, 1:5)
-    @test F\v5 == F\v6[1:5]
-end
-
-@testset "issue #32080" begin
-    A = Symmetric([-5 -9 9; -9 4 1; 9 1 2])
-    B = bunchkaufman(A, true)
-    @test B.U * B.D * B.U' ≈ A[B.p, B.p]
-end
-
-@test_throws DomainError logdet(bunchkaufman([-1 -1; -1 1]))
-@test logabsdet(bunchkaufman([8 4; 4 2]; check = false))[1] == -Inf
-
-@testset "0x0 matrix" begin
-    for ul in (:U, :L)
-        B = bunchkaufman(Symmetric(ones(0, 0), ul))
-        @test isa(B, BunchKaufman)
-        @test B.D == Tridiagonal([], [], [])
-        @test B.P == ones(0, 0)
-        @test B.p == []
-        if ul === :U
-            @test B.U == UnitUpperTriangular(ones(0, 0))
-            @test_throws ArgumentError B.L
-        else
-            @test B.L == UnitLowerTriangular(ones(0, 0))
-            @test_throws ArgumentError B.U
-        end
-    end
-end
-
-@testset "adjoint of BunchKaufman" begin
-    Ar = randn(5, 5)
-    Ar = Ar + Ar'
-    Actmp = complex.(randn(5, 5), randn(5, 5))
-    Ac1 = Actmp + Actmp'
-    Ac2 = Actmp + transpose(Actmp)
-    b = ones(size(Ar, 1))
-
-    F = bunchkaufman(Ar)
-    @test F\b == F'\b
-
-    F = bunchkaufman(Ac1)
-    @test F\b == F'\b
-
-    F = bunchkaufman(Ac2)
-    @test_throws ArgumentError("adjoint not implemented for complex symmetric matrices") F'
-end
-
-@testset "BunchKaufman for AbstractMatrix" begin
-    S = SymTridiagonal(fill(2.0, 4), ones(3))
-    B = bunchkaufman(S)
-    @test B.U * B.D * B.U' ≈ S
-end
-
-end # module TestBunchKaufman
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
deleted file mode 100644
index a795eb8d44a03..0000000000000
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ /dev/null
@@ -1,551 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestCholesky
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted,
-    PosDefException, RankDeficientException, chkfullrank
-
-function unary_ops_tests(a, ca, tol; n=size(a, 1))
-    @test inv(ca)*a ≈ Matrix(I, n, n)
-    @test a*inv(ca) ≈ Matrix(I, n, n)
-    @test abs((det(ca) - det(a))/det(ca)) <= tol # Ad hoc, but statistically verified, revisit
-    @test logdet(ca) ≈ logdet(a)
-    @test logdet(ca) ≈ log(det(ca))  # logdet is less likely to overflow
-    logabsdet_ca = logabsdet(ca)
-    logabsdet_a = logabsdet(a)
-    @test logabsdet_ca[1] ≈ logabsdet_a[1]
-    @test logabsdet_ca[2] ≈ logabsdet_a[2]
-    @test isposdef(ca)
-    @test_throws ErrorException ca.Z
-    @test size(ca) == size(a)
-    @test Array(copy(ca)) ≈ a
-end
-
-function factor_recreation_tests(a_U, a_L)
-    c_U = cholesky(a_U)
-    c_L = cholesky(a_L)
-    cl  = c_L.U
-    ls = c_L.L
-    @test Array(c_U) ≈ Array(c_L) ≈ a_U
-    @test ls*ls' ≈ a_U
-    @test triu(c_U.factors) ≈ c_U.U
-    @test tril(c_L.factors) ≈ c_L.L
-    @test istriu(cl)
-    @test cl'cl ≈ a_U
-    @test cl'cl ≈ a_L
-end
-
-@testset "core functionality" begin
-    n = 10
-
-    # Split n into 2 parts for tests needing two matrices
-    n1 = div(n, 2)
-    n2 = 2*n1
-
-    Random.seed!(12344)
-
-    areal = randn(n,n)/2
-    aimg  = randn(n,n)/2
-    a2real = randn(n,n)/2
-    a2img  = randn(n,n)/2
-    breal = randn(n,2)/2
-    bimg  = randn(n,2)/2
-
-    for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-        a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-        a2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-
-        ε = εa = eps(abs(float(one(eltya))))
-
-        # Test of symmetric pos. def. strided matrix
-        apd  = a'*a
-        @inferred cholesky(apd)
-        capd  = factorize(apd)
-        r     = capd.U
-        κ     = cond(apd, 1) #condition number
-
-        unary_ops_tests(apd, capd, ε*κ*n)
-        if eltya != Int
-            @test Factorization{eltya}(capd) === capd
-            if eltya <: Real
-                @test Array(Factorization{complex(eltya)}(capd)) ≈ Array(factorize(complex(apd)))
-                @test eltype(Factorization{complex(eltya)}(capd)) == complex(eltya)
-            end
-        end
-        @testset "throw for non-square input" begin
-            A = rand(eltya, 2, 3)
-            @test_throws DimensionMismatch cholesky(A)
-            @test_throws DimensionMismatch cholesky!(A)
-        end
-
-        #Test error bound on reconstruction of matrix: LAWNS 14, Lemma 2.1
-
-        #these tests were failing on 64-bit linux when inside the inner loop
-        #for eltya = ComplexF32 and eltyb = Int. The E[i,j] had NaN32 elements
-        #but only with Random.seed!(1234321) set before the loops.
-        E = abs.(apd - r'*r)
-        for i=1:n, j=1:n
-            @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*real(sqrt(apd[i,i]*apd[j,j]))
-        end
-        E = abs.(apd - Matrix(capd))
-        for i=1:n, j=1:n
-            @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*real(sqrt(apd[i,i]*apd[j,j]))
-        end
-        @test LinearAlgebra.issuccess(capd)
-        @inferred(logdet(capd))
-
-        apos = apd[1,1]
-        @test all(x -> x ≈ √apos, cholesky(apos).factors)
-
-        # Test cholesky with Symmetric/Hermitian upper/lower
-        apds  = Symmetric(apd)
-        apdsL = Symmetric(apd, :L)
-        apdh  = Hermitian(apd)
-        apdhL = Hermitian(apd, :L)
-        if eltya <: Real
-            capds = cholesky(apds)
-            unary_ops_tests(apds, capds, ε*κ*n)
-            if eltya <: BlasReal
-                capds = cholesky!(copy(apds))
-                unary_ops_tests(apds, capds, ε*κ*n)
-            end
-            ulstring = sprint((t, s) -> show(t, "text/plain", s), capds.UL)
-            @test sprint((t, s) -> show(t, "text/plain", s), capds) == "$(typeof(capds))\nU factor:\n$ulstring"
-        else
-            capdh = cholesky(apdh)
-            unary_ops_tests(apdh, capdh, ε*κ*n)
-            capdh = cholesky!(copy(apdh))
-            unary_ops_tests(apdh, capdh, ε*κ*n)
-            capdh = cholesky!(copy(apd))
-            unary_ops_tests(apd, capdh, ε*κ*n)
-            ulstring = sprint((t, s) -> show(t, "text/plain", s), capdh.UL)
-            @test sprint((t, s) -> show(t, "text/plain", s), capdh) == "$(typeof(capdh))\nU factor:\n$ulstring"
-        end
-
-        # test cholesky of 2x2 Strang matrix
-        S = SymTridiagonal{eltya}([2, 2], [-1])
-        for uplo in (:U, :L)
-            @test Matrix(@inferred cholesky(Hermitian(S, uplo))) ≈ S
-            if eltya <: Real
-                @test Matrix(@inferred cholesky(Symmetric(S, uplo))) ≈ S
-            end
-        end
-        @test Matrix(cholesky(S).U) ≈ [2 -1; 0 sqrt(eltya(3))] / sqrt(eltya(2))
-        @test Matrix(cholesky(S)) ≈ S
-
-        # test extraction of factor and re-creating original matrix
-        if eltya <: Real
-            factor_recreation_tests(apds, apdsL)
-        else
-            factor_recreation_tests(apdh, apdhL)
-        end
-
-        #pivoted upper Cholesky
-        if eltya != BigFloat
-            cpapd = cholesky(apdh, RowMaximum())
-            unary_ops_tests(apdh, cpapd, ε*κ*n)
-            @test rank(cpapd) == n
-            @test all(diff(diag(real(cpapd.factors))).<=0.) # diagonal should be non-increasing
-
-            @test cpapd.P*cpapd.L*cpapd.U*cpapd.P' ≈ apd
-        end
-
-        for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-            b = eltyb == Int ? rand(1:5, n, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-            εb = eps(abs(float(one(eltyb))))
-            ε = max(εa,εb)
-
-            for b in (b, view(b, 1:n, 1)) # Array and SubArray
-
-                # Test error bound on linear solver: LAWNS 14, Theorem 2.1
-                # This is a surprisingly loose bound
-                x = capd\b
-                @test norm(x-apd\b,1)/norm(x,1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                @test norm(apd*x-b,1)/norm(b,1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-
-                @test norm(a*(capd\(a'*b)) - b,1)/norm(b,1) <= ε*κ*n # Ad hoc, revisit
-
-                if eltya != BigFloat && eltyb != BigFloat
-                    lapd = cholesky(apdhL)
-                    @test norm(apd * (lapd\b) - b)/norm(b) <= ε*κ*n
-                    @test norm(apd * (lapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
-                end
-
-                if eltya != BigFloat && eltyb != BigFloat # Note! Need to implement pivoted Cholesky decomposition in julia
-
-                    cpapd = cholesky(apdh, RowMaximum())
-                    @test norm(apd * (cpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
-                    @test norm(apd * (cpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
-
-                    lpapd = cholesky(apdhL, RowMaximum())
-                    @test norm(apd * (lpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
-                    @test norm(apd * (lpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
-                end
-            end
-        end
-
-        for eltyb in (Float64, ComplexF64)
-            Breal = convert(Matrix{BigFloat}, randn(n,n)/2)
-            Bimg  = convert(Matrix{BigFloat}, randn(n,n)/2)
-            B = (eltya <: Complex || eltyb <: Complex) ? complex.(Breal, Bimg) : Breal
-            εb = eps(abs(float(one(eltyb))))
-            ε = max(εa,εb)
-
-            for B in (B, view(B, 1:n, 1:n)) # Array and SubArray
-
-                # Test error bound on linear solver: LAWNS 14, Theorem 2.1
-                # This is a surprisingly loose bound
-                BB = copy(B)
-                ldiv!(capd, BB)
-                @test norm(apd \ B - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                @test norm(apd * BB - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                if eltya != BigFloat
-                    cpapd = cholesky(apdh, RowMaximum())
-                    BB = copy(B)
-                    ldiv!(cpapd, BB)
-                    @test norm(apd \ B - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    @test norm(apd * BB - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                end
-            end
-        end
-
-        @testset "solve with generic Cholesky" begin
-            Breal = convert(Matrix{BigFloat}, randn(n,n)/2)
-            Bimg  = convert(Matrix{BigFloat}, randn(n,n)/2)
-            B = eltya <: Complex ? complex.(Breal, Bimg) : Breal
-            εb = eps(abs(float(one(eltype(B)))))
-            ε = max(εa,εb)
-
-            for B in (B, view(B, 1:n, 1:n)) # Array and SubArray
-
-                # Test error bound on linear solver: LAWNS 14, Theorem 2.1
-                # This is a surprisingly loose bound
-                cpapd = cholesky(eltya <: Complex ? apdh : apds)
-                BB = copy(B)
-                rdiv!(BB, cpapd)
-                @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                cpapd = cholesky(eltya <: Complex ? apdhL : apdsL)
-                BB = copy(B)
-                rdiv!(BB, cpapd)
-                @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                if eltya != BigFloat
-                    cpapd = cholesky(eltya <: Complex ? apdh : apds, RowMaximum())
-                    BB = copy(B)
-                    rdiv!(BB, cpapd)
-                    @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    cpapd = cholesky(eltya <: Complex ? apdhL : apdsL, RowMaximum())
-                    BB = copy(B)
-                    rdiv!(BB, cpapd)
-                    @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                end
-            end
-        end
-        if eltya <: BlasFloat
-            @testset "generic cholesky!" begin
-                if eltya <: Complex
-                    A = complex.(randn(5,5), randn(5,5))
-                else
-                    A = randn(5,5)
-                end
-                A = convert(Matrix{eltya}, A'A)
-                @test Matrix(cholesky(A).L) ≈ Matrix(invoke(LinearAlgebra._chol!, Tuple{AbstractMatrix, Type{LowerTriangular}}, copy(A), LowerTriangular)[1])
-                @test Matrix(cholesky(A).U) ≈ Matrix(invoke(LinearAlgebra._chol!, Tuple{AbstractMatrix, Type{UpperTriangular}}, copy(A), UpperTriangular)[1])
-            end
-        end
-    end
-end
-
-@testset "behavior for non-positive definite matrices" for T in (Float64, ComplexF64, BigFloat)
-    A = T[1 2; 2 1]
-    B = T[1 2; 0 1]
-    C = T[2 0; 0 0]
-    # check = (true|false)
-    for M in (A, Hermitian(A), B, C)
-        @test_throws PosDefException cholesky(M)
-        @test_throws PosDefException cholesky!(copy(M))
-        @test_throws PosDefException cholesky(M; check = true)
-        @test_throws PosDefException cholesky!(copy(M); check = true)
-        @test !LinearAlgebra.issuccess(cholesky(M; check = false))
-        @test !LinearAlgebra.issuccess(cholesky!(copy(M); check = false))
-    end
-    if T !== BigFloat # generic pivoted cholesky is not implemented
-        for M in (A, Hermitian(A), B)
-            @test_throws RankDeficientException cholesky(M, RowMaximum())
-            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum())
-            @test_throws RankDeficientException cholesky(M, RowMaximum(); check = true)
-            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum(); check = true)
-            @test !LinearAlgebra.issuccess(cholesky(M, RowMaximum(); check = false))
-            @test !LinearAlgebra.issuccess(cholesky!(copy(M), RowMaximum(); check = false))
-            C = cholesky(M, RowMaximum(); check = false)
-            @test_throws RankDeficientException chkfullrank(C)
-            C = cholesky!(copy(M), RowMaximum(); check = false)
-            @test_throws RankDeficientException chkfullrank(C)
-        end
-    end
-    @test !isposdef(A)
-    str = sprint((io, x) -> show(io, "text/plain", x), cholesky(A; check = false))
-end
-
-@testset "Cholesky factor of Matrix with non-commutative elements, here 2x2-matrices" begin
-    X = Matrix{Float64}[0.1*rand(2,2) for i in 1:3, j = 1:3]
-    L = Matrix(LinearAlgebra._chol!(X*X', LowerTriangular)[1])
-    U = Matrix(LinearAlgebra._chol!(X*X', UpperTriangular)[1])
-    XX = Matrix(X*X')
-
-    @test sum(sum(norm, L*L' - XX)) < eps()
-    @test sum(sum(norm, U'*U - XX)) < eps()
-end
-
-@testset "Non-strided Cholesky solves" begin
-    B = randn(5, 5)
-    v = rand(5)
-    @test cholesky(Diagonal(v)) \ B ≈ Diagonal(v) \ B
-    @test B / cholesky(Diagonal(v)) ≈ B / Diagonal(v)
-    @test inv(cholesky(Diagonal(v)))::Diagonal ≈ Diagonal(1 ./ v)
-end
-
-struct WrappedVector{T} <: AbstractVector{T}
-    data::Vector{T}
-end
-Base.copy(v::WrappedVector) = WrappedVector(copy(v.data))
-Base.size(v::WrappedVector) = size(v.data)
-Base.getindex(v::WrappedVector, i::Integer) = getindex(v.data, i)
-Base.setindex!(v::WrappedVector, val, i::Integer) = setindex!(v.data, val, i)
-
-@testset "cholesky up- and downdates" begin
-    A = complex.(randn(10,5), randn(10, 5))
-    v = complex.(randn(5), randn(5))
-    w = WrappedVector(v)
-    for uplo in (:U, :L)
-        AcA = A'*A
-        BcB = AcA + v*v'
-        BcB = (BcB + BcB')/2
-        F = cholesky(Hermitian(AcA, uplo))
-        G = cholesky(Hermitian(BcB, uplo))
-        @test getproperty(lowrankupdate(F, v), uplo) ≈ getproperty(G, uplo)
-        @test getproperty(lowrankupdate(F, w), uplo) ≈ getproperty(G, uplo)
-        @test_throws DimensionMismatch lowrankupdate(F, Vector{eltype(v)}(undef,length(v)+1))
-        @test getproperty(lowrankdowndate(G, v), uplo) ≈ getproperty(F, uplo)
-        @test getproperty(lowrankdowndate(G, w), uplo) ≈ getproperty(F, uplo)
-        @test_throws DimensionMismatch lowrankdowndate(G, Vector{eltype(v)}(undef,length(v)+1))
-    end
-end
-
-@testset "issue #13243, unexpected nans in complex cholesky" begin
-    apd = [5.8525753f0 + 0.0f0im -0.79540455f0 + 0.7066077f0im 0.98274714f0 + 1.3824869f0im 2.619998f0 + 1.8532984f0im -1.8306153f0 - 1.2336911f0im 0.32275113f0 + 0.015575029f0im 2.1968813f0 + 1.0640624f0im 0.27894387f0 + 0.97911835f0im 3.0476584f0 + 0.18548489f0im 0.3842994f0 + 0.7050991f0im
-        -0.79540455f0 - 0.7066077f0im 8.313246f0 + 0.0f0im -1.8076122f0 - 0.8882447f0im 0.47806996f0 + 0.48494184f0im 0.5096429f0 - 0.5395974f0im -0.7285097f0 - 0.10360408f0im -1.1760061f0 - 2.7146957f0im -0.4271084f0 + 0.042899966f0im -1.7228563f0 + 2.8335886f0im 1.8942566f0 + 0.6389735f0im
-        0.98274714f0 - 1.3824869f0im -1.8076122f0 + 0.8882447f0im 9.367975f0 + 0.0f0im -0.1838578f0 + 0.6468568f0im -1.8338387f0 + 0.7064959f0im 0.041852742f0 - 0.6556877f0im 2.5673025f0 + 1.9732997f0im -1.1148382f0 - 0.15693812f0im 2.4704504f0 - 1.0389464f0im 1.0858271f0 - 1.298006f0im
-        2.619998f0 - 1.8532984f0im 0.47806996f0 - 0.48494184f0im -0.1838578f0 - 0.6468568f0im 3.1117508f0 + 0.0f0im -1.956626f0 + 0.22825956f0im 0.07081801f0 - 0.31801307f0im 0.3698375f0 - 0.5400855f0im 0.80686307f0 + 1.5315914f0im 1.5649154f0 - 1.6229297f0im -0.112077385f0 + 1.2014246f0im
-        -1.8306153f0 + 1.2336911f0im 0.5096429f0 + 0.5395974f0im -1.8338387f0 - 0.7064959f0im -1.956626f0 - 0.22825956f0im 3.6439795f0 + 0.0f0im -0.2594722f0 + 0.48786148f0im -0.47636223f0 - 0.27821827f0im -0.61608654f0 - 2.01858f0im -2.7767487f0 + 1.7693765f0im 0.048102796f0 - 0.9741874f0im
-        0.32275113f0 - 0.015575029f0im -0.7285097f0 + 0.10360408f0im 0.041852742f0 + 0.6556877f0im 0.07081801f0 + 0.31801307f0im -0.2594722f0 - 0.48786148f0im 3.624376f0 + 0.0f0im -1.6697118f0 + 0.4017511f0im -1.4397877f0 - 0.7550918f0im -0.31456697f0 - 1.0403451f0im -0.31978557f0 + 0.13701046f0im
-        2.1968813f0 - 1.0640624f0im -1.1760061f0 + 2.7146957f0im 2.5673025f0 - 1.9732997f0im 0.3698375f0 + 0.5400855f0im -0.47636223f0 + 0.27821827f0im -1.6697118f0 - 0.4017511f0im 6.8273163f0 + 0.0f0im -0.10051322f0 + 0.24303961f0im 1.4415971f0 + 0.29750675f0im 1.221786f0 - 0.85654986f0im
-        0.27894387f0 - 0.97911835f0im -0.4271084f0 - 0.042899966f0im -1.1148382f0 + 0.15693812f0im 0.80686307f0 - 1.5315914f0im -0.61608654f0 + 2.01858f0im -1.4397877f0 + 0.7550918f0im -0.10051322f0 - 0.24303961f0im 3.4057708f0 + 0.0f0im -0.5856801f0 - 1.0203559f0im 0.7103452f0 + 0.8422135f0im
-        3.0476584f0 - 0.18548489f0im -1.7228563f0 - 2.8335886f0im 2.4704504f0 + 1.0389464f0im 1.5649154f0 + 1.6229297f0im -2.7767487f0 - 1.7693765f0im -0.31456697f0 + 1.0403451f0im 1.4415971f0 - 0.29750675f0im -0.5856801f0 + 1.0203559f0im 7.005772f0 + 0.0f0im -0.9617417f0 - 1.2486815f0im
-        0.3842994f0 - 0.7050991f0im 1.8942566f0 - 0.6389735f0im 1.0858271f0 + 1.298006f0im -0.112077385f0 - 1.2014246f0im 0.048102796f0 + 0.9741874f0im -0.31978557f0 - 0.13701046f0im 1.221786f0 + 0.85654986f0im 0.7103452f0 - 0.8422135f0im -0.9617417f0 + 1.2486815f0im 3.4629636f0 + 0.0f0im]
-    b = [-0.905011814118756 + 0.2847570854574069im -0.7122162951294634 - 0.630289556702497im
-        -0.7620356655676837 + 0.15533508334193666im 0.39947219167701153 - 0.4576746001199889im
-        -0.21782716937787788 - 0.9222220085490986im -0.727775859267237 + 0.50638268521728im
-        -1.0509472322215125 + 0.5022165705328413im -0.7264975746431271 + 0.31670415674097235im
-        -0.6650468984506477 - 0.5000967284800251im -0.023682508769195098 + 0.18093440285319276im
-        -0.20604111555491242 + 0.10570814584017311im 0.562377322638969 - 0.2578030745663871im
-        -0.3451346708401685 + 1.076948486041297im 0.9870834574024372 - 0.2825689605519449im
-        0.25336108035924787 + 0.975317836492159im 0.0628393808469436 - 0.1253397353973715im
-        0.11192755545114 - 0.1603741874112385im 0.8439562576196216 + 1.0850814110398734im
-        -1.0568488936791578 - 0.06025820467086475im 0.12696236014017806 - 0.09853584666755086im]
-    cholesky(Hermitian(apd, :L), RowMaximum()) \ b
-    r = factorize(apd).U
-    E = abs.(apd - r'*r)
-    ε = eps(abs(float(one(ComplexF32))))
-    n = 10
-    for i=1:n, j=1:n
-        @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*real(sqrt(apd[i,i]*apd[j,j]))
-    end
-end
-
-@testset "fail for non-BLAS element types" begin
-    @test_throws ArgumentError cholesky!(Hermitian(rand(Float16, 5,5)), RowMaximum())
-end
-
-@testset "cholesky Diagonal" begin
-    # real
-    d = abs.(randn(3)) .+ 0.1
-    D = Diagonal(d)
-    CD = cholesky(D)
-    CM = cholesky(Matrix(D))
-    @test CD isa Cholesky{Float64}
-    @test CD.U ≈ Diagonal(.√d) ≈ CM.U
-    @test D ≈ CD.L * CD.U
-    @test CD.info == 0
-
-    F = cholesky(Hermitian(I(3)))
-    @test F isa Cholesky{Float64,<:Diagonal}
-    @test Matrix(F) ≈ I(3)
-
-    # real, failing
-    @test_throws PosDefException cholesky(Diagonal([1.0, -2.0]))
-    Dnpd = cholesky(Diagonal([1.0, -2.0]); check = false)
-    @test Dnpd.info == 2
-
-    # complex
-    D = complex(D)
-    CD = cholesky(Hermitian(D))
-    CM = cholesky(Matrix(Hermitian(D)))
-    @test CD isa Cholesky{ComplexF64,<:Diagonal}
-    @test CD.U ≈ Diagonal(.√d) ≈ CM.U
-    @test D ≈ CD.L * CD.U
-    @test CD.info == 0
-
-    # complex, failing
-    D[2, 2] = 0.0 + 0im
-    @test_throws PosDefException cholesky(D)
-    Dnpd = cholesky(D; check = false)
-    @test Dnpd.info == 2
-
-    # InexactError for Int
-    @test_throws InexactError cholesky!(Diagonal([2, 1]))
-end
-
-@testset "Cholesky for AbstractMatrix" begin
-    S = SymTridiagonal(fill(2.0, 4), ones(3))
-    C = cholesky(S)
-    @test C.L * C.U ≈ S
-end
-
-@testset "constructor with non-BlasInt arguments" begin
-
-    x = rand(5,5)
-    chol = cholesky(x'x)
-
-    factors, uplo, info = chol.factors, chol.uplo, chol.info
-
-    @test Cholesky(factors, uplo, Int32(info)) == chol
-    @test Cholesky(factors, uplo, Int64(info)) == chol
-
-    cholp = cholesky(x'x, RowMaximum())
-
-    factors, uplo, piv, rank, tol, info =
-        cholp.factors, cholp.uplo, cholp.piv, cholp.rank, cholp.tol, cholp.info
-
-    @test CholeskyPivoted(factors, uplo, piv, Int32(rank), tol, info) == cholp
-    @test CholeskyPivoted(factors, uplo, piv, Int64(rank), tol, info) == cholp
-
-    @test CholeskyPivoted(factors, uplo, piv, rank, tol, Int32(info)) == cholp
-    @test CholeskyPivoted(factors, uplo, piv, rank, tol, Int64(info)) == cholp
-
-end
-
-@testset "issue #33704, casting low-rank CholeskyPivoted to Matrix" begin
-    A = randn(1,8)
-    B = A'A
-    C = cholesky(B, RowMaximum(), check=false)
-    @test B ≈ Matrix(C)
-end
-
-@testset "CholeskyPivoted and Factorization" begin
-    A = randn(8,8)
-    B = A'A
-    C = cholesky(B, RowMaximum(), check=false)
-    @test CholeskyPivoted{eltype(C)}(C) === C
-    @test Factorization{eltype(C)}(C) === C
-    @test Array(CholeskyPivoted{complex(eltype(C))}(C)) ≈ Array(cholesky(complex(B), RowMaximum(), check=false))
-    @test Array(Factorization{complex(eltype(C))}(C)) ≈ Array(cholesky(complex(B), RowMaximum(), check=false))
-    @test eltype(Factorization{complex(eltype(C))}(C)) == complex(eltype(C))
-end
-
-@testset "REPL printing of CholeskyPivoted" begin
-    A = randn(8,8)
-    B = A'A
-    C = cholesky(B, RowMaximum(), check=false)
-    cholstring = sprint((t, s) -> show(t, "text/plain", s), C)
-    rankstring = "$(C.uplo) factor with rank $(rank(C)):"
-    factorstring = sprint((t, s) -> show(t, "text/plain", s), C.uplo == 'U' ? C.U : C.L)
-    permstring   = sprint((t, s) -> show(t, "text/plain", s), C.p)
-    @test cholstring == "$(summary(C))\n$rankstring\n$factorstring\npermutation:\n$permstring"
-end
-
-@testset "destructuring for Cholesky[Pivoted]" begin
-    for val in (NoPivot(), RowMaximum())
-        A = rand(8, 8)
-        B = A'A
-        C = cholesky(B, val, check=false)
-        l, u = C
-        @test l == C.L
-        @test u == C.U
-    end
-end
-
-@testset "issue #37356, diagonal elements of hermitian generic matrix" begin
-    B = Hermitian(hcat([one(BigFloat) + im]))
-    @test Matrix(cholesky(B)) ≈ B
-    C = Hermitian(hcat([one(BigFloat) + im]), :L)
-    @test Matrix(cholesky(C)) ≈ C
-end
-
-@testset "constructing a Cholesky factor from a triangular matrix" begin
-    A = [1.0 2.0; 3.0 4.0]
-    let
-        U = UpperTriangular(A)
-        C = Cholesky(U)
-        @test C isa Cholesky{Float64}
-        @test C.U == U
-        @test C.L == U'
-    end
-    let
-        L = LowerTriangular(A)
-        C = Cholesky(L)
-        @test C isa Cholesky{Float64}
-        @test C.L == L
-        @test C.U == L'
-    end
-end
-
-@testset "adjoint of Cholesky" begin
-    A = randn(5, 5)
-    A = A'A
-    F = cholesky(A)
-    b = ones(size(A, 1))
-    @test F\b == F'\b
-end
-
-@testset "Float16" begin
-    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-    B = cholesky(A)
-    B32 = cholesky(Float32.(A))
-    @test B isa Cholesky{Float16, Matrix{Float16}}
-    @test B.U isa UpperTriangular{Float16, Matrix{Float16}}
-    @test B.L isa LowerTriangular{Float16, Matrix{Float16}}
-    @test B.UL isa UpperTriangular{Float16, Matrix{Float16}}
-    @test B.U ≈ B32.U
-    @test B.L ≈ B32.L
-    @test B.UL ≈ B32.UL
-    @test Matrix(B) ≈ A
-    B = cholesky(A, RowMaximum())
-    B32 = cholesky(Float32.(A), RowMaximum())
-    @test B isa CholeskyPivoted{Float16,Matrix{Float16}}
-    @test B.U isa UpperTriangular{Float16, Matrix{Float16}}
-    @test B.L isa LowerTriangular{Float16, Matrix{Float16}}
-    @test B.U ≈ B32.U
-    @test B.L ≈ B32.L
-    @test Matrix(B) ≈ A
-end
-
-@testset "det and logdet" begin
-    A = [4083 3825 5876 2048 4470 5490;
-         3825 3575 5520 1920 4200 5140;
-         5876 5520 8427 2940 6410 7903;
-         2048 1920 2940 1008 2240 2740;
-         4470 4200 6410 2240 4875 6015;
-         5490 5140 7903 2740 6015 7370]
-    B = cholesky(A, RowMaximum(), check=false)
-    @test det(B)  ==  0.0
-    @test det(B)  ≈  det(A) atol=eps()
-    @test logdet(B)  ==  -Inf
-    @test logabsdet(B)[1] == -Inf
- end
-
-end # module TestCholesky
diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl
deleted file mode 100644
index efeedf93ebd1f..0000000000000
--- a/stdlib/LinearAlgebra/test/dense.jl
+++ /dev/null
@@ -1,1232 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestDense
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal
-
-@testset "Check that non-floats are correctly promoted" begin
-    @test [1 0 0; 0 1 0]\[1,1] ≈ [1;1;0]
-end
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(1234323)
-
-@testset "Matrix condition number" begin
-    ainit = rand(n, n)
-    @testset "for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        ainit = convert(Matrix{elty}, ainit)
-        for a in (copy(ainit), view(ainit, 1:n, 1:n))
-            ainv = inv(a)
-            @test cond(a, 1)   == opnorm(a, 1)  *opnorm(ainv, 1)
-            @test cond(a, Inf) == opnorm(a, Inf)*opnorm(ainv, Inf)
-            @test cond(a[:, 1:5]) == (\)(extrema(svdvals(a[:, 1:5]))...)
-            @test_throws ArgumentError cond(a,3)
-        end
-    end
-    @testset "Singular matrices" for p in (1, 2, Inf)
-        @test cond(zeros(Int, 2, 2), p) == Inf
-        @test cond(zeros(2, 2), p)      == Inf
-        @test cond([0 0; 1 1], p)       == Inf
-        @test cond([0. 0.; 1. 1.], p)   == Inf
-    end
-    @testset "Issue #33547, condition number of 2x2 matrix" begin
-        M = [1.0 -2.0
-            -2.0 -1.5]
-        @test cond(M, 1) ≈ 2.227272727272727
-    end
-    @testset "Condition numbers of a non-random matrix" begin
-        # To ensure that we detect any regressions in the underlying functions
-        Mars= [11  24   7  20   3
-                4  12  25   8  16
-               17   5  13  21   9
-               10  18   1  14  22
-               23   6  19   2  15]
-        @test cond(Mars, 1)   ≈ 7.1
-        @test cond(Mars, 2)   ≈ 6.181867355918493
-        @test cond(Mars, Inf) ≈ 7.1
-    end
-end
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-a2real = randn(n,n)/2
-a2img  = randn(n,n)/2
-breal = randn(n,2)/2
-bimg  = randn(n,2)/2
-
-@testset "For A containing $eltya" for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    ainit = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    ainit2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-    ε = εa = eps(abs(float(one(eltya))))
-
-    apd  = ainit'*ainit # symmetric positive-definite
-    @testset "Positive definiteness" begin
-        @test !isposdef(ainit)
-        @test isposdef(apd)
-        if eltya != Int # cannot perform cholesky! for Matrix{Int}
-            @test !isposdef!(copy(ainit))
-            @test isposdef!(copy(apd))
-        end
-    end
-    @testset "For b containing $eltyb" for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        binit = eltyb == Int ? rand(1:5, n, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-        εb = eps(abs(float(one(eltyb))))
-        ε = max(εa,εb)
-        for (a, b) in ((copy(ainit), copy(binit)), (view(ainit, 1:n, 1:n), view(binit, 1:n, 1:2)))
-            @testset "Solve square general system of equations" begin
-                κ = cond(a,1)
-                x = a \ b
-                @test_throws DimensionMismatch b'\b
-                @test_throws DimensionMismatch b\b'
-                @test norm(a*x - b, 1)/norm(b) < ε*κ*n*2 # Ad hoc, revisit!
-                @test zeros(eltya,n)\fill(eltya(1),n) ≈ (zeros(eltya,n,1)\fill(eltya(1),n,1))[1,1]
-            end
-
-            @testset "Test nullspace" begin
-                a15null = nullspace(a[:,1:n1]')
-                @test rank([a[:,1:n1] a15null]) == 10
-                @test norm(a[:,1:n1]'a15null,Inf) ≈ zero(eltya) atol=300ε
-                @test norm(a15null'a[:,1:n1],Inf) ≈ zero(eltya) atol=400ε
-                @test size(nullspace(b), 2) == 0
-                @test size(nullspace(b, rtol=0.001), 2) == 0
-                @test size(nullspace(b, atol=100*εb), 2) == 0
-                @test size(nullspace(b, 100*εb), 2) == 0
-                @test nullspace(zeros(eltya,n)) == Matrix(I, 1, 1)
-                @test nullspace(zeros(eltya,n), 0.1) == Matrix(I, 1, 1)
-                # test empty cases
-                @test @inferred(nullspace(zeros(n, 0))) == Matrix(I, 0, 0)
-                @test @inferred(nullspace(zeros(0, n))) == Matrix(I, n, n)
-                # test vector cases
-                @test size(@inferred nullspace(a[:, 1])) == (1, 0)
-                @test size(@inferred nullspace(zero(a[:, 1]))) == (1, 1)
-                @test nullspace(zero(a[:, 1]))[1,1] == 1
-                # test adjortrans vectors, including empty ones
-                @test size(@inferred nullspace(a[:, 1]')) == (n, n - 1)
-                @test @inferred(nullspace(a[1:0, 1]')) == Matrix(I, 0, 0)
-                @test size(@inferred nullspace(b[1, :]')) == (2, 1)
-                @test @inferred(nullspace(b[1, 1:0]')) == Matrix(I, 0, 0)
-                @test size(@inferred nullspace(transpose(a[:, 1]))) == (n, n - 1)
-                @test size(@inferred nullspace(transpose(b[1, :]))) == (2, 1)
-            end
-        end
-    end # for eltyb
-
-    for (a, a2) in ((copy(ainit), copy(ainit2)), (view(ainit, 1:n, 1:n), view(ainit2, 1:n, 1:n)))
-        @testset "Test pinv" begin
-            pinva15 = pinv(a[:,1:n1])
-            @test a[:,1:n1]*pinva15*a[:,1:n1] ≈ a[:,1:n1]
-            @test pinva15*a[:,1:n1]*pinva15 ≈ pinva15
-            pinva15 = pinv(a[:,1:n1]') # the Adjoint case
-            @test a[:,1:n1]'*pinva15*a[:,1:n1]' ≈ a[:,1:n1]'
-            @test pinva15*a[:,1:n1]'*pinva15 ≈ pinva15
-
-            @test size(pinv(Matrix{eltya}(undef,0,0))) == (0,0)
-        end
-
-        @testset "Lyapunov/Sylvester" begin
-            x = lyap(a, a2)
-            @test -a2 ≈ a*x + x*a'
-            y = lyap(a', a2')
-            @test y ≈ lyap(Array(a'), Array(a2'))
-            @test -a2' ≈ a'y + y*a
-            z = lyap(Tridiagonal(a)', Diagonal(a2))
-            @test z ≈ lyap(Array(Tridiagonal(a)'), Array(Diagonal(a2)))
-            @test -Diagonal(a2) ≈ Tridiagonal(a)'*z + z*Tridiagonal(a)
-            x2 = sylvester(a[1:3, 1:3], a[4:n, 4:n], a2[1:3,4:n])
-            @test -a2[1:3, 4:n] ≈ a[1:3, 1:3]*x2 + x2*a[4:n, 4:n]
-            y2 = sylvester(a[1:3, 1:3]', a[4:n, 4:n]', a2[4:n,1:3]')
-            @test y2 ≈ sylvester(Array(a[1:3, 1:3]'), Array(a[4:n, 4:n]'), Array(a2[4:n,1:3]'))
-            @test -a2[4:n, 1:3]' ≈ a[1:3, 1:3]'*y2 + y2*a[4:n, 4:n]'
-            z2 = sylvester(Tridiagonal(a[1:3, 1:3]), Diagonal(a[4:n, 4:n]), a2[1:3,4:n])
-            @test z2 ≈ sylvester(Array(Tridiagonal(a[1:3, 1:3])), Array(Diagonal(a[4:n, 4:n])), Array(a2[1:3,4:n]))
-            @test -a2[1:3, 4:n] ≈ Tridiagonal(a[1:3, 1:3])*z2 + z2*Diagonal(a[4:n, 4:n])
-        end
-
-        @testset "Matrix square root" begin
-            asq = sqrt(a)
-            @test asq*asq ≈ a
-            @test sqrt(transpose(a))*sqrt(transpose(a)) ≈ transpose(a)
-            @test sqrt(adjoint(a))*sqrt(adjoint(a)) ≈ adjoint(a)
-            asym = a + a' # symmetric indefinite
-            asymsq = sqrt(asym)
-            @test asymsq*asymsq ≈ asym
-            @test sqrt(transpose(asym))*sqrt(transpose(asym)) ≈ transpose(asym)
-            @test sqrt(adjoint(asym))*sqrt(adjoint(asym)) ≈ adjoint(asym)
-            if eltype(a) <: Real  # real square root
-                apos = a * a
-                @test sqrt(apos)^2 ≈ apos
-                @test eltype(sqrt(apos)) <: Real
-                # test that real but Complex input produces Complex output
-                @test sqrt(complex(apos)) ≈ sqrt(apos)
-                @test eltype(sqrt(complex(apos))) <: Complex
-            end
-        end
-
-        @testset "Powers" begin
-            if eltya <: AbstractFloat
-                z = zero(eltya)
-                t = convert(eltya,2)
-                r = convert(eltya,2.5)
-                @test a^z ≈ Matrix(I, size(a))
-                @test a^t ≈ a^2
-                @test Matrix{eltya}(I, n, n)^r ≈ Matrix(I, size(a))
-            end
-        end
-    end # end for loop over arraytype
-
-    @testset "Factorize" begin
-        d = rand(eltya,n)
-        e = rand(eltya,n-1)
-        e2 = rand(eltya,n-1)
-        f = rand(eltya,n-2)
-        A = diagm(0 => d)
-        @test factorize(A) == Diagonal(d)
-        A += diagm(-1 => e)
-        @test factorize(A) == Bidiagonal(d,e,:L)
-        A += diagm(-2 => f)
-        @test factorize(A) == LowerTriangular(A)
-        A = diagm(0 => d, 1 => e)
-        @test factorize(A) == Bidiagonal(d,e,:U)
-        if eltya <: Real
-            A = diagm(0 => d, 1 => e, -1 => e)
-            @test Matrix(factorize(A)) ≈ Matrix(factorize(SymTridiagonal(d,e)))
-            A = diagm(0 => d, 1 => e, -1 => e, 2 => f, -2 => f)
-            @test inv(factorize(A)) ≈ inv(factorize(Symmetric(A)))
-        end
-        A = diagm(0 => d, 1 => e, -1 => e2)
-        @test Matrix(factorize(A)) ≈ Matrix(factorize(Tridiagonal(e2,d,e)))
-        A = diagm(0 => d, 1 => e, 2 => f)
-        @test factorize(A) == UpperTriangular(A)
-
-        x = rand(eltya)
-        @test factorize(x) == x
-    end
-end # for eltya
-
-@testset "Test diagm for vectors" begin
-    @test diagm(zeros(50)) == diagm(0 => zeros(50))
-    @test diagm(ones(50)) == diagm(0 => ones(50))
-    v = randn(500)
-    @test diagm(v) == diagm(0 => v)
-    @test diagm(500, 501, v) == diagm(500, 501, 0 => v)
-end
-
-@testset "Non-square diagm" begin
-    x = [7, 8]
-    for m=1:4, n=2:4
-        if m < 2 || n < 3
-            @test_throws DimensionMismatch diagm(m,n, 0 => x,  1 => x)
-            @test_throws DimensionMismatch diagm(n,m, 0 => x,  -1 => x)
-        else
-            M = zeros(m,n)
-            M[1:2,1:3] = [7 7 0; 0 8 8]
-            @test diagm(m,n, 0 => x,  1 => x) == M
-            @test diagm(n,m, 0 => x,  -1 => x) == M'
-        end
-    end
-end
-
-@testset "Test pinv (rtol, atol)" begin
-    M = [1 0 0; 0 1 0; 0 0 0]
-    @test pinv(M,atol=1)== zeros(3,3)
-    @test pinv(M,rtol=0.5)== M
-end
-
-@testset "Test inv of matrix of NaNs" begin
-    for eltya in (NaN16, NaN32, NaN32)
-        r = fill(eltya, 2, 2)
-        @test_throws ArgumentError inv(r)
-        c = fill(complex(eltya, eltya), 2, 2)
-        @test_throws ArgumentError inv(c)
-    end
-end
-
-@testset "test out of bounds triu/tril" begin
-    local m, n = 5, 7
-    ainit = rand(m, n)
-    for a in (copy(ainit), view(ainit, 1:m, 1:n))
-        @test triu(a, -m) == a
-        @test triu(a, n + 2) == zero(a)
-        @test tril(a, -m - 2) == zero(a)
-        @test tril(a, n) == a
-    end
-end
-
-@testset "triu M > N case bug fix" begin
-    mat=[1 2;
-         3 4;
-         5 6;
-         7 8]
-    res=[1 2;
-         3 4;
-         0 6;
-         0 0]
-    @test triu(mat, -1) == res
-end
-
-@testset "Tests norms" begin
-    nnorm = 10
-    mmat = 10
-    nmat = 8
-    @testset "For $elty" for elty in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int32, Int64, BigInt)
-        x = fill(elty(1),10)
-        @testset "Vector" begin
-            xs = view(x,1:2:10)
-            @test norm(x, -Inf) ≈ 1
-            @test norm(x, -1) ≈ 1/10
-            @test norm(x, 0) ≈ 10
-            @test norm(x, 1) ≈ 10
-            @test norm(x, 2) ≈ sqrt(10)
-            @test norm(x, 3) ≈ cbrt(10)
-            @test norm(x, Inf) ≈ 1
-            if elty <: LinearAlgebra.BlasFloat
-                @test norm(x, 1:4) ≈ 2
-                @test_throws BoundsError norm(x,-1:4)
-                @test_throws BoundsError norm(x,1:11)
-            end
-            @test norm(xs, -Inf) ≈ 1
-            @test norm(xs, -1) ≈ 1/5
-            @test norm(xs, 0) ≈ 5
-            @test norm(xs, 1) ≈ 5
-            @test norm(xs, 2) ≈ sqrt(5)
-            @test norm(xs, 3) ≈ cbrt(5)
-            @test norm(xs, Inf) ≈ 1
-        end
-
-        @testset "Issue #12552:" begin
-            if real(elty) <: AbstractFloat
-                for p in [-Inf,-1,1,2,3,Inf]
-                    @test isnan(norm(elty[0,NaN],p))
-                    @test isnan(norm(elty[NaN,0],p))
-                end
-            end
-        end
-
-        @testset "Number" begin
-            norm(x[1:1]) === norm(x[1], -Inf)
-            norm(x[1:1]) === norm(x[1], 0)
-            norm(x[1:1]) === norm(x[1], 1)
-            norm(x[1:1]) === norm(x[1], 2)
-            norm(x[1:1]) === norm(x[1], Inf)
-        end
-
-        @testset "Absolute homogeneity, triangle inequality, & vectorized versions" begin
-            for i = 1:10
-                xinit = elty <: Integer ? convert(Vector{elty}, rand(1:10, nnorm)) :
-                        elty <: Complex ? convert(Vector{elty}, complex.(randn(nnorm), randn(nnorm))) :
-                        convert(Vector{elty}, randn(nnorm))
-                yinit = elty <: Integer ? convert(Vector{elty}, rand(1:10, nnorm)) :
-                        elty <: Complex ? convert(Vector{elty}, complex.(randn(nnorm), randn(nnorm))) :
-                        convert(Vector{elty}, randn(nnorm))
-                α = elty <: Integer ? randn() :
-                    elty <: Complex ? convert(elty, complex(randn(),randn())) :
-                    convert(elty, randn())
-                for (x, y) in ((copy(xinit), copy(yinit)), (view(xinit,1:2:nnorm), view(yinit,1:2:nnorm)))
-                    # Absolute homogeneity
-                    @test norm(α*x,-Inf) ≈ abs(α)*norm(x,-Inf)
-                    @test norm(α*x,-1) ≈ abs(α)*norm(x,-1)
-                    @test norm(α*x,1) ≈ abs(α)*norm(x,1)
-                    @test norm(α*x) ≈ abs(α)*norm(x) # two is default
-                    @test norm(α*x,3) ≈ abs(α)*norm(x,3)
-                    @test norm(α*x,Inf) ≈ abs(α)*norm(x,Inf)
-
-                    # Triangle inequality
-                    @test norm(x + y,1) <= norm(x,1) + norm(y,1)
-                    @test norm(x + y) <= norm(x) + norm(y) # two is default
-                    @test norm(x + y,3) <= norm(x,3) + norm(y,3)
-                    @test norm(x + y,Inf) <= norm(x,Inf) + norm(y,Inf)
-
-                    # Against vectorized versions
-                    @test norm(x,-Inf) ≈ minimum(abs.(x))
-                    @test norm(x,-1) ≈ inv(sum(1 ./ abs.(x)))
-                    @test norm(x,0) ≈ sum(x .!= 0)
-                    @test norm(x,1) ≈ sum(abs.(x))
-                    @test norm(x) ≈ sqrt(sum(abs2.(x)))
-                    @test norm(x,3) ≈ cbrt(sum(abs.(x).^3.))
-                    @test norm(x,Inf) ≈ maximum(abs.(x))
-                end
-            end
-        end
-
-        @testset "Matrix (Operator) opnorm" begin
-            A = fill(elty(1),10,10)
-            As = view(A,1:5,1:5)
-            @test opnorm(A, 1) ≈ 10
-            elty <: Union{BigFloat,Complex{BigFloat},BigInt} || @test opnorm(A, 2) ≈ 10
-            @test opnorm(A, Inf) ≈ 10
-            @test opnorm(As, 1) ≈ 5
-            elty <: Union{BigFloat,Complex{BigFloat},BigInt} || @test opnorm(As, 2) ≈ 5
-            @test opnorm(As, Inf) ≈ 5
-        end
-
-        @testset "Absolute homogeneity, triangle inequality, & norm" begin
-            for i = 1:10
-                Ainit = elty <: Integer ? convert(Matrix{elty}, rand(1:10, mmat, nmat)) :
-                        elty <: Complex ? convert(Matrix{elty}, complex.(randn(mmat, nmat), randn(mmat, nmat))) :
-                        convert(Matrix{elty}, randn(mmat, nmat))
-                Binit = elty <: Integer ? convert(Matrix{elty}, rand(1:10, mmat, nmat)) :
-                        elty <: Complex ? convert(Matrix{elty}, complex.(randn(mmat, nmat), randn(mmat, nmat))) :
-                        convert(Matrix{elty}, randn(mmat, nmat))
-                α = elty <: Integer ? randn() :
-                    elty <: Complex ? convert(elty, complex(randn(),randn())) :
-                    convert(elty, randn())
-                for (A, B) in ((copy(Ainit), copy(Binit)), (view(Ainit,1:nmat,1:nmat), view(Binit,1:nmat,1:nmat)))
-                    # Absolute homogeneity
-                    @test norm(α*A,1) ≈ abs(α)*norm(A,1)
-                    elty <: Union{BigFloat,Complex{BigFloat},BigInt} || @test norm(α*A) ≈ abs(α)*norm(A) # two is default
-                    @test norm(α*A,Inf) ≈ abs(α)*norm(A,Inf)
-
-                    # Triangle inequality
-                    @test norm(A + B,1) <= norm(A,1) + norm(B,1)
-                    elty <: Union{BigFloat,Complex{BigFloat},BigInt} || @test norm(A + B) <= norm(A) + norm(B) # two is default
-                    @test norm(A + B,Inf) <= norm(A,Inf) + norm(B,Inf)
-
-                    # norm
-                    for p in (-Inf, Inf, (-2:3)...)
-                        @test norm(A, p) == norm(vec(A), p)
-                    end
-                end
-            end
-
-            @testset "issue #10234" begin
-                if elty <: AbstractFloat || elty <: Complex
-                    z = zeros(elty, 100)
-                    z[1] = -Inf
-                    for p in [-2,-1.5,-1,-0.5,0.5,1,1.5,2,Inf]
-                        @test norm(z, p) == (p < 0 ? 0 : Inf)
-                        @test norm(elty[Inf],p) == Inf
-                    end
-                end
-            end
-        end
-    end
-
-    @testset "issue #10234" begin
-        @test norm(Any[Inf],-2) == norm(Any[Inf],-1) == norm(Any[Inf],1) == norm(Any[Inf],1.5) == norm(Any[Inf],2) == norm(Any[Inf],Inf) == Inf
-    end
-
-    @testset "overflow/underflow in norms" begin
-        @test norm(Float64[1e-300, 1], -3)*1e300 ≈ 1
-        @test norm(Float64[1e300, 1], 3)*1e-300 ≈ 1
-    end
-end
-
-## Issue related tests
-@testset "issue #1447" begin
-    A = [1.0+0.0im 0; 0 1]
-    B = pinv(A)
-    for i = 1:4
-        @test A[i] ≈ B[i]
-    end
-end
-
-@testset "issue #2246" begin
-    A = [1 2 0 0; 0 1 0 0; 0 0 0 0; 0 0 0 0]
-    Asq = sqrt(A)
-    @test Asq*Asq ≈ A
-    A2 = view(A, 1:2, 1:2)
-    A2sq = sqrt(A2)
-    @test A2sq*A2sq ≈ A2
-
-    N = 3
-    @test log(det(Matrix(1.0I, N, N))) ≈ logdet(Matrix(1.0I, N, N))
-end
-
-@testset "issue #2637" begin
-    a = [1, 2, 3]
-    b = [4, 5, 6]
-    @test kron(Matrix(I, 2, 2), Matrix(I, 2, 2)) == Matrix(I, 4, 4)
-    @test kron(a,b) == [4,5,6,8,10,12,12,15,18]
-    @test kron(a',b') == [4 5 6 8 10 12 12 15 18]
-    @test kron(a,b')  == [4 5 6; 8 10 12; 12 15 18]
-    @test kron(a',b)  == [4 8 12; 5 10 15; 6 12 18]
-    @test kron(a, Matrix(1I, 2, 2)) == [1 0; 0 1; 2 0; 0 2; 3 0; 0 3]
-    @test kron(Matrix(1I, 2, 2), a) == [ 1 0; 2 0; 3 0; 0 1; 0 2; 0 3]
-    @test kron(Matrix(1I, 2, 2), 2) == Matrix(2I, 2, 2)
-    @test kron(3, Matrix(1I, 3, 3)) == Matrix(3I, 3, 3)
-    @test kron(a,2) == [2, 4, 6]
-    @test kron(b',2) == [8 10 12]
-end
-
-@testset "kron!" begin
-    a = [1.0, 0.0]
-    b = [0.0, 1.0]
-    @test kron!([1.0, 0.0], b, 0.5) == [0.0; 0.5]
-    @test kron!([1.0, 0.0], 0.5, b) == [0.0; 0.5]
-    c = Vector{Float64}(undef, 4)
-    kron!(c, a, b)
-    @test c == [0.0; 1.0; 0.0; 0.0]
-    c = Matrix{Float64}(undef, 2, 2)
-    kron!(c, a, b')
-    @test c == [0.0 1.0; 0.0 0.0]
-end
-
-@testset "kron adjoint" begin
-    a = [1+im, 2, 3]
-    b = [4, 5, 6+7im]
-    @test kron(a', b') isa Adjoint
-    @test kron(a', b') == kron(a, b)'
-    @test kron(transpose(a), b') isa Transpose
-    @test kron(transpose(a), b') == kron(permutedims(a), collect(b'))
-    @test kron(transpose(a), transpose(b)) isa Transpose
-    @test kron(transpose(a), transpose(b)) == transpose(kron(a, b))
-end
-
-@testset "issue #4796" begin
-    dim=2
-    S=zeros(Complex,dim,dim)
-    T=zeros(Complex,dim,dim)
-    fill!(T, 1)
-    z = 2.5 + 1.5im
-    S[1] = z
-    @test S*T == [z z; 0 0]
-
-    # similar issue for Array{Real}
-    @test Real[1 2] * Real[1.5; 2.0] == Real[5.5]
-end
-
-@testset "Matrix exponential" begin
-    @testset "Tests for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A1  = convert(Matrix{elty}, [4 2 0; 1 4 1; 1 1 4])
-        eA1 = convert(Matrix{elty}, [147.866622446369 127.781085523181  127.781085523182;
-                                     183.765138646367 183.765138646366  163.679601723179;
-                                      71.797032399996  91.8825693231832 111.968106246371]')
-        @test exp(A1) ≈ eA1
-        @test exp(adjoint(A1)) ≈ adjoint(eA1)
-        @test exp(transpose(A1)) ≈ transpose(eA1)
-        for f in (sin, cos, sinh, cosh, tanh, tan)
-            @test f(adjoint(A1)) ≈ f(copy(adjoint(A1)))
-        end
-
-        A2  = convert(Matrix{elty},
-                      [29.87942128909879    0.7815750847907159 -2.289519314033932;
-                       0.7815750847907159 25.72656945571064    8.680737820540137;
-                       -2.289519314033932   8.680737820540137  34.39400925519054])
-        eA2 = convert(Matrix{elty},
-                      [  5496313853692458.0 -18231880972009236.0 -30475770808580460.0;
-                       -18231880972009252.0  60605228702221920.0 101291842930249760.0;
-                       -30475770808580480.0 101291842930249728.0 169294411240851968.0])
-        @test exp(A2) ≈ eA2
-        @test exp(adjoint(A2)) ≈ adjoint(eA2)
-        @test exp(transpose(A2)) ≈ transpose(eA2)
-
-        A3  = convert(Matrix{elty}, [-131 19 18;-390 56 54;-387 57 52])
-        eA3 = convert(Matrix{elty}, [-1.50964415879218 -5.6325707998812  -4.934938326092;
-                                      0.367879439109187 1.47151775849686  1.10363831732856;
-                                      0.135335281175235 0.406005843524598 0.541341126763207]')
-        @test exp(A3) ≈ eA3
-        @test exp(adjoint(A3)) ≈ adjoint(eA3)
-        @test exp(transpose(A3)) ≈ transpose(eA3)
-
-        A4 = convert(Matrix{elty}, [0.25 0.25; 0 0])
-        eA4 = convert(Matrix{elty}, [1.2840254166877416 0.2840254166877415; 0 1])
-        @test exp(A4) ≈ eA4
-        @test exp(adjoint(A4)) ≈ adjoint(eA4)
-        @test exp(transpose(A4)) ≈ transpose(eA4)
-
-        A5 = convert(Matrix{elty}, [0 0.02; 0 0])
-        eA5 = convert(Matrix{elty}, [1 0.02; 0 1])
-        @test exp(A5) ≈ eA5
-        @test exp(adjoint(A5)) ≈ adjoint(eA5)
-        @test exp(transpose(A5)) ≈ transpose(eA5)
-
-        # Hessenberg
-        @test hessenberg(A1).H ≈ convert(Matrix{elty},
-                                                 [4.000000000000000  -1.414213562373094  -1.414213562373095
-                                                  -1.414213562373095   4.999999999999996  -0.000000000000000
-                                                  0  -0.000000000000002   3.000000000000000])
-
-        # cis always returns a complex matrix
-        if elty <: Real
-            eltyim = Complex{elty}
-        else
-            eltyim = elty
-        end
-
-        @test cis(A1) ≈ convert(Matrix{eltyim}, [-0.339938 + 0.000941506im   0.772659  - 0.8469im     0.52745  + 0.566543im;
-                                                  0.650054 - 0.140179im     -0.0762135 + 0.284213im   0.38633  - 0.42345im ;
-                                                  0.650054 - 0.140179im      0.913779  + 0.143093im  -0.603663 - 0.28233im ]) rtol=7e-7
-    end
-
-    @testset "Additional tests for $elty" for elty in (Float64, ComplexF64)
-        A4  = convert(Matrix{elty}, [1/2 1/3 1/4 1/5+eps();
-                                     1/3 1/4 1/5 1/6;
-                                     1/4 1/5 1/6 1/7;
-                                     1/5 1/6 1/7 1/8])
-        @test exp(log(A4)) ≈ A4
-        @test exp(log(transpose(A4))) ≈ transpose(A4)
-        @test exp(log(adjoint(A4))) ≈ adjoint(A4)
-
-        A5  = convert(Matrix{elty}, [1 1 0 1; 0 1 1 0; 0 0 1 1; 1 0 0 1])
-        @test exp(log(A5)) ≈ A5
-        @test exp(log(transpose(A5))) ≈ transpose(A5)
-        @test exp(log(adjoint(A5))) ≈ adjoint(A5)
-
-        A6  = convert(Matrix{elty}, [-5 2 0 0 ; 1/2 -7 3 0; 0 1/3 -9 4; 0 0 1/4 -11])
-        @test exp(log(A6)) ≈ A6
-        @test exp(log(transpose(A6))) ≈ transpose(A6)
-        @test exp(log(adjoint(A6))) ≈ adjoint(A6)
-
-        A7  = convert(Matrix{elty}, [1 0 0 1e-8; 0 1 0 0; 0 0 1 0; 0 0 0 1])
-        @test exp(log(A7)) ≈ A7
-        @test exp(log(transpose(A7))) ≈ transpose(A7)
-        @test exp(log(adjoint(A7))) ≈ adjoint(A7)
-    end
-
-    @testset "Integer promotion tests" begin
-        for (elty1, elty2) in ((Int64, Float64), (Complex{Int64}, ComplexF64))
-            A4int  = convert(Matrix{elty1}, [1 2; 3 4])
-            A4float  = convert(Matrix{elty2}, A4int)
-            @test exp(A4int) == exp(A4float)
-        end
-    end
-
-    @testset "^ tests" for elty in (Float32, Float64, ComplexF32, ComplexF64, Int32, Int64)
-        # should all be exact as the lhs functions are simple aliases
-        @test ℯ^(fill(elty(2), (4,4))) == exp(fill(elty(2), (4,4)))
-        @test 2^(fill(elty(2), (4,4))) == exp(log(2)*fill(elty(2), (4,4)))
-        @test 2.0^(fill(elty(2), (4,4))) == exp(log(2.0)*fill(elty(2), (4,4)))
-    end
-
-    A8 = 100 * [-1+1im 0 0 1e-8; 0 1 0 0; 0 0 1 0; 0 0 0 1]
-    @test exp(log(A8)) ≈ A8
-end
-
-@testset "Matrix trigonometry" begin
-    @testset "Tests for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A1  = convert(Matrix{elty}, [3 2 0; 1 3 1; 1 1 3])
-        A2  = convert(Matrix{elty},
-                      [3.975884257819758 0.15631501695814318 -0.4579038628067864;
-                       0.15631501695814318 4.545313891142127 1.7361475641080275;
-                       -0.4579038628067864 1.7361475641080275 6.478801851038108])
-        A3 = convert(Matrix{elty}, [0.25 0.25; 0 0])
-        A4 = convert(Matrix{elty}, [0 0.02; 0 0])
-
-        cosA1 = convert(Matrix{elty},[-0.18287716254368605 -0.29517205254584633 0.761711400552759;
-                                      0.23326967400345625 0.19797853773269333 -0.14758602627292305;
-                                      0.23326967400345636 0.6141253742798355 -0.5637328628200653])
-        sinA1 = convert(Matrix{elty}, [0.2865568596627417 -1.107751980582015 -0.13772915374386513;
-                                       -0.6227405671629401 0.2176922827908092 -0.5538759902910078;
-                                       -0.6227405671629398 -0.6916051440348725 0.3554214365346742])
-        @test cos(A1) ≈ cosA1
-        @test sin(A1) ≈ sinA1
-
-        cosA2 = convert(Matrix{elty}, [-0.6331745163802187 0.12878366262380136 -0.17304181968301532;
-                                       0.12878366262380136 -0.5596234510748788 0.5210483146041339;
-                                       -0.17304181968301532 0.5210483146041339 0.002263776356015268])
-        sinA2 = convert(Matrix{elty},[-0.6677253518411841 -0.32599318928375437 0.020799609079003523;
-                                      -0.32599318928375437 -0.04568726058081066 0.5388748740270427;
-                                      0.020799609079003523 0.5388748740270427 0.6385462428126032])
-        @test cos(A2) ≈ cosA2
-        @test sin(A2) ≈ sinA2
-
-        cosA3 = convert(Matrix{elty}, [0.9689124217106446 -0.031087578289355197; 0.0 1.0])
-        sinA3 = convert(Matrix{elty}, [0.24740395925452285 0.24740395925452285; 0.0 0.0])
-        @test cos(A3) ≈ cosA3
-        @test sin(A3) ≈ sinA3
-
-        cosA4 = convert(Matrix{elty}, [1.0 0.0; 0.0 1.0])
-        sinA4 = convert(Matrix{elty}, [0.0 0.02; 0.0 0.0])
-        @test cos(A4) ≈ cosA4
-        @test sin(A4) ≈ sinA4
-
-        # Identities
-        for (i, A) in enumerate((A1, A2, A3, A4))
-            @test sincos(A) == (sin(A), cos(A))
-            @test cos(A)^2 + sin(A)^2 ≈ Matrix(I, size(A))
-            @test cos(A) ≈ cos(-A)
-            @test sin(A) ≈ -sin(-A)
-            @test tan(A) ≈ sin(A) / cos(A)
-
-            @test cos(A) ≈ real(exp(im*A))
-            @test sin(A) ≈ imag(exp(im*A))
-            @test cos(A) ≈ real(cis(A))
-            @test sin(A) ≈ imag(cis(A))
-            @test cis(A) ≈ cos(A) + im * sin(A)
-
-            @test cosh(A) ≈ 0.5 * (exp(A) + exp(-A))
-            @test sinh(A) ≈ 0.5 * (exp(A) - exp(-A))
-            @test cosh(A) ≈ cosh(-A)
-            @test sinh(A) ≈ -sinh(-A)
-
-            # Some of the following identities fail for A3, A4 because the matrices are singular
-            if i in (1, 2)
-                @test sec(A) ≈ inv(cos(A))
-                @test csc(A) ≈ inv(sin(A))
-                @test cot(A) ≈ inv(tan(A))
-                @test sech(A) ≈ inv(cosh(A))
-                @test csch(A) ≈ inv(sinh(A))
-                @test coth(A) ≈ inv(tanh(A))
-            end
-            # The following identities fail for A1, A2 due to rounding errors;
-            # probably needs better algorithm for the general case
-            if i in (3, 4)
-                @test cosh(A)^2 - sinh(A)^2 ≈ Matrix(I, size(A))
-                @test tanh(A) ≈ sinh(A) / cosh(A)
-            end
-        end
-    end
-
-    @testset "Additional tests for $elty" for elty in (ComplexF32, ComplexF64)
-        A5 = convert(Matrix{elty}, [1im 2; 0.02+0.5im 3])
-
-        @test sincos(A5) == (sin(A5), cos(A5))
-
-        @test cos(A5)^2 + sin(A5)^2 ≈ Matrix(I, size(A5))
-        @test cosh(A5)^2 - sinh(A5)^2 ≈ Matrix(I, size(A5))
-        @test cos(A5)^2 + sin(A5)^2 ≈ Matrix(I, size(A5))
-        @test tan(A5) ≈ sin(A5) / cos(A5)
-        @test tanh(A5) ≈ sinh(A5) / cosh(A5)
-
-        @test sec(A5) ≈ inv(cos(A5))
-        @test csc(A5) ≈ inv(sin(A5))
-        @test cot(A5) ≈ inv(tan(A5))
-        @test sech(A5) ≈ inv(cosh(A5))
-        @test csch(A5) ≈ inv(sinh(A5))
-        @test coth(A5) ≈ inv(tanh(A5))
-
-        @test cos(A5) ≈ 0.5 * (exp(im*A5) + exp(-im*A5))
-        @test sin(A5) ≈ -0.5im * (exp(im*A5) - exp(-im*A5))
-        @test cos(A5) ≈ 0.5 * (cis(A5) + cis(-A5))
-        @test sin(A5) ≈ -0.5im * (cis(A5) - cis(-A5))
-
-        @test cosh(A5) ≈ 0.5 * (exp(A5) + exp(-A5))
-        @test sinh(A5) ≈ 0.5 * (exp(A5) - exp(-A5))
-    end
-
-    @testset "Additional tests for $elty" for elty in (Int32, Int64, Complex{Int32}, Complex{Int64})
-        A1 = convert(Matrix{elty}, [1 2; 3 4])
-        A2 = convert(Matrix{elty}, [1 2; 2 1])
-
-        cosA1 = convert(Matrix{float(elty)}, [0.855423165077998 -0.11087638101074865;
-                                              -0.16631457151612294 0.689108593561875])
-        cosA2 = convert(Matrix{float(elty)}, [-0.22484509536615283 -0.7651474012342925;
-                                              -0.7651474012342925 -0.22484509536615283])
-
-        @test cos(A1) ≈ cosA1
-        @test cos(A2) ≈ cosA2
-
-        sinA1 = convert(Matrix{float(elty)}, [-0.46558148631373036 -0.14842445991317652;
-                                              -0.22263668986976476 -0.6882181761834951])
-        sinA2 = convert(Matrix{float(elty)}, [-0.3501754883740146 0.4912954964338818;
-                                              0.4912954964338818 -0.3501754883740146])
-
-        @test sin(A1) ≈ sinA1
-        @test sin(A2) ≈ sinA2
-    end
-
-    @testset "Inverse functions for $elty" for elty in (Float32, Float64)
-        A1 = convert(Matrix{elty}, [0.244637  -0.63578;
-                                    0.22002    0.189026])
-        A2 = convert(Matrix{elty}, [1.11656   -0.098672   0.158485;
-                                    -0.098672   0.100933  -0.107107;
-                                    0.158485  -0.107107   0.612404])
-
-        for A in (A1, A2)
-            @test cos(acos(cos(A))) ≈ cos(A)
-            @test sin(asin(sin(A))) ≈ sin(A)
-            @test tan(atan(tan(A))) ≈ tan(A)
-            @test cosh(acosh(cosh(A))) ≈ cosh(A)
-            @test sinh(asinh(sinh(A))) ≈ sinh(A)
-            @test tanh(atanh(tanh(A))) ≈ tanh(A)
-            @test sec(asec(sec(A))) ≈ sec(A)
-            @test csc(acsc(csc(A))) ≈ csc(A)
-            @test cot(acot(cot(A))) ≈ cot(A)
-            @test sech(asech(sech(A))) ≈ sech(A)
-            @test csch(acsch(csch(A))) ≈ csch(A)
-            @test coth(acoth(coth(A))) ≈ coth(A)
-        end
-    end
-
-    @testset "Inverse functions for $elty" for elty in (ComplexF32, ComplexF64)
-        A1 = convert(Matrix{elty}, [ 0.143721-0.0im       -0.138386-0.106905im;
-                                     -0.138386+0.106905im   0.306224-0.0im])
-        A2 = convert(Matrix{elty}, [1im 2; 0.02+0.5im 3])
-        A3 = convert(Matrix{elty}, [0.138721-0.266836im 0.0971722-0.13715im 0.205046-0.137136im;
-                                    -0.0154974-0.00358254im 0.152163-0.445452im 0.0314575-0.536521im;
-                                    -0.387488+0.0294059im -0.0448773+0.114305im 0.230684-0.275894im])
-        for A in (A1, A2, A3)
-            @test cos(acos(cos(A))) ≈ cos(A)
-            @test sin(asin(sin(A))) ≈ sin(A)
-            @test tan(atan(tan(A))) ≈ tan(A)
-            @test cosh(acosh(cosh(A))) ≈ cosh(A)
-            @test sinh(asinh(sinh(A))) ≈ sinh(A)
-            @test tanh(atanh(tanh(A))) ≈ tanh(A)
-            @test sec(asec(sec(A))) ≈ sec(A)
-            @test csc(acsc(csc(A))) ≈ csc(A)
-            @test cot(acot(cot(A))) ≈ cot(A)
-            @test sech(asech(sech(A))) ≈ sech(A)
-            @test csch(acsch(csch(A))) ≈ csch(A)
-            @test coth(acoth(coth(A))) ≈ coth(A)
-
-            # Definition of principal values (Aprahamian & Higham, 2016, pp. 4-5)
-            abstol = sqrt(eps(real(elty))) * norm(acosh(A))
-            @test all(z -> (0 < real(z) < π ||
-                            abs(real(z)) < abstol && imag(z) >= 0 ||
-                            abs(real(z) - π) < abstol && imag(z) <= 0),
-                      eigen(acos(A)).values)
-            @test all(z -> (-π/2 < real(z) < π/2 ||
-                            abs(real(z) + π/2) < abstol && imag(z) >= 0 ||
-                            abs(real(z) - π/2) < abstol && imag(z) <= 0),
-                      eigen(asin(A)).values)
-            @test all(z -> (-π < imag(z) < π && real(z) > 0 ||
-                            0 <= imag(z) < π && abs(real(z)) < abstol ||
-                            abs(imag(z) - π) < abstol && real(z) >= 0),
-                      eigen(acosh(A)).values)
-            @test all(z -> (-π/2 < imag(z) < π/2 ||
-                            abs(imag(z) + π/2) < abstol && real(z) <= 0 ||
-                            abs(imag(z) - π/2) < abstol && real(z) <= 0),
-                      eigen(asinh(A)).values)
-        end
-    end
-end
-
-@testset "issue 5116" begin
-    A9  = [0 10 0 0; -1 0 0 0; 0 0 0 0; -2 0 0 0]
-    eA9 = [-0.999786072879326  -0.065407069689389   0.0   0.0
-           0.006540706968939  -0.999786072879326   0.0   0.0
-           0.0                 0.0                 1.0   0.0
-           0.013081413937878  -3.999572145758650   0.0   1.0]
-    @test exp(A9) ≈ eA9
-
-    A10  = [ 0. 0. 0. 0. ; 0. 0. -im 0.; 0. im 0. 0.; 0. 0. 0. 0.]
-    eA10 = [ 1.0+0.0im   0.0+0.0im                 0.0+0.0im                0.0+0.0im
-            0.0+0.0im   1.543080634815244+0.0im   0.0-1.175201193643801im  0.0+0.0im
-            0.0+0.0im   0.0+1.175201193643801im   1.543080634815243+0.0im  0.0+0.0im
-            0.0+0.0im   0.0+0.0im                 0.0+0.0im                1.0+0.0im]
-    @test exp(A10) ≈ eA10
-end
-
-@testset "Additional matrix logarithm tests" for elty in (Float64, ComplexF64)
-    A11 = convert(Matrix{elty}, [3 2; -5 -3])
-    @test exp(log(A11)) ≈ A11
-
-    A13 = convert(Matrix{elty}, [2 0; 0 2])
-    @test typeof(log(A13)) == Array{elty, 2}
-
-    T = elty == Float64 ? Symmetric : Hermitian
-    @test typeof(log(T(A13))) == T{elty, Array{elty, 2}}
-
-    A1  = convert(Matrix{elty}, [4 2 0; 1 4 1; 1 1 4])
-    logA1 = convert(Matrix{elty}, [1.329661349 0.5302876358 -0.06818951543;
-                                    0.2310490602 1.295566591 0.2651438179;
-                                    0.2310490602 0.1969543025 1.363756107])
-    @test log(A1) ≈ logA1
-    @test exp(log(A1)) ≈ A1
-    @test typeof(log(A1)) == Matrix{elty}
-
-    A4  = convert(Matrix{elty}, [1/2 1/3 1/4 1/5+eps();
-                                 1/3 1/4 1/5 1/6;
-                                 1/4 1/5 1/6 1/7;
-                                 1/5 1/6 1/7 1/8])
-    logA4 = convert(Matrix{elty}, [-1.73297159 1.857349738 0.4462766564 0.2414170219;
-                                    1.857349738 -5.335033737 2.994142974 0.5865285289;
-                                    0.4462766564 2.994142974 -7.351095988 3.318413247;
-                                    0.2414170219 0.5865285289 3.318413247 -5.444632124])
-    @test log(A4) ≈ logA4
-    @test exp(log(A4)) ≈ A4
-    @test typeof(log(A4)) == Matrix{elty}
-
-    # real triu matrix
-    A5  = convert(Matrix{elty}, [1 2 3; 0 4 5; 0 0 6])  # triu
-    logA5 = convert(Matrix{elty}, [0.0 0.9241962407465937 0.5563245488984037;
-                                   0.0 1.3862943611198906 1.0136627702704109;
-                                   0.0 0.0 1.791759469228055])
-    @test log(A5) ≈ logA5
-    @test exp(log(A5)) ≈ A5
-    @test typeof(log(A5)) == Matrix{elty}
-
-    # real quasitriangular schur form with 2 2x2 blocks, 2 1x1 blocks, and all positive eigenvalues
-    A6 = convert(Matrix{elty}, [2 3 2 2 3 1;
-                                1 3 3 2 3 1;
-                                3 3 3 1 1 2;
-                                2 1 2 2 2 2;
-                                1 1 2 2 3 1;
-                                2 2 2 2 1 3])
-    @test exp(log(A6)) ≈ A6
-    @test typeof(log(A6)) == Matrix{elty}
-
-    # real quasitriangular schur form with a negative eigenvalue
-    A7 = convert(Matrix{elty}, [1 3 3 2 2 2;
-                                1 2 1 3 1 2;
-                                3 1 2 3 2 1;
-                                3 1 2 2 2 1;
-                                3 1 3 1 2 1;
-                                1 1 3 1 1 3])
-    @test exp(log(A7)) ≈ A7
-    @test typeof(log(A7)) == Matrix{complex(elty)}
-
-    if elty <: Complex
-        A8 = convert(Matrix{elty}, [1 + 1im 1 + 1im 1 - 1im;
-                                    1 + 1im -1 + 1im 1 + 1im;
-                                    1 - 1im 1 + 1im -1 - 1im])
-        logA8 = convert(
-            Matrix{elty},
-            [0.9478628953131517 + 1.3725201223387407im -0.2547157147532057 + 0.06352318334299434im 0.8560050197863862 - 1.0471975511965979im;
-             -0.2547157147532066 + 0.06352318334299467im -0.16285783922644065 + 0.2617993877991496im 0.2547157147532063 + 2.1579182857361894im;
-             0.8560050197863851 - 1.0471975511965974im 0.25471571475320665 + 2.1579182857361903im 0.9478628953131519 - 0.8489213467404436im],
-        )
-        @test log(A8) ≈ logA8
-        @test exp(log(A8)) ≈ A8
-        @test typeof(log(A8)) == Matrix{elty}
-    end
-end
-
-@testset "matrix logarithm is type-inferrable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
-    A1 = randn(elty, 4, 4)
-    @inferred Union{Matrix{elty},Matrix{complex(elty)}} log(A1)
-end
-
-@testset "Additional matrix square root tests" for elty in (Float64, ComplexF64)
-    A11 = convert(Matrix{elty}, [3 2; -5 -3])
-    @test sqrt(A11)^2 ≈ A11
-
-    A13 = convert(Matrix{elty}, [2 0; 0 2])
-    @test typeof(sqrt(A13)) == Array{elty, 2}
-
-    T = elty == Float64 ? Symmetric : Hermitian
-    @test typeof(sqrt(T(A13))) == T{elty, Array{elty, 2}}
-
-    A1  = convert(Matrix{elty}, [4 2 0; 1 4 1; 1 1 4])
-    sqrtA1 = convert(Matrix{elty}, [1.971197119306979 0.5113118387140085 -0.03301921523780871;
-                                   0.23914631173809942 1.9546875116880718 0.2556559193570036;
-                                   0.23914631173810008 0.22263670411919556 1.9877067269258815])
-    @test sqrt(A1) ≈ sqrtA1
-    @test sqrt(A1)^2 ≈ A1
-    @test typeof(sqrt(A1)) == Matrix{elty}
-
-    A4  = convert(Matrix{elty}, [1/2 1/3 1/4 1/5+eps();
-                                 1/3 1/4 1/5 1/6;
-                                 1/4 1/5 1/6 1/7;
-                                 1/5 1/6 1/7 1/8])
-                                 sqrtA4 = convert(
-        Matrix{elty},
-        [0.590697761556362 0.3055006800405779 0.19525404749300546 0.14007621469988107;
-         0.30550068004057784 0.2825388389385975 0.21857572599211642 0.17048692323164674;
-         0.19525404749300565 0.21857572599211622 0.21155429252242863 0.18976816626246887;
-         0.14007621469988046 0.17048692323164724 0.1897681662624689 0.20075085592778794],
-    )
-    @test sqrt(A4) ≈ sqrtA4
-    @test sqrt(A4)^2 ≈ A4
-    @test typeof(sqrt(A4)) == Matrix{elty}
-
-    # real triu matrix
-    A5  = convert(Matrix{elty}, [1 2 3; 0 4 5; 0 0 6])  # triu
-    sqrtA5 = convert(Matrix{elty}, [1.0 0.6666666666666666 0.6525169217864183;
-                                   0.0 2.0 1.1237243569579454;
-                                   0.0 0.0 2.449489742783178])
-    @test sqrt(A5) ≈ sqrtA5
-    @test sqrt(A5)^2 ≈ A5
-    @test typeof(sqrt(A5)) == Matrix{elty}
-
-    # real quasitriangular schur form with 2 2x2 blocks, 2 1x1 blocks, and all positive eigenvalues
-    A6 = convert(Matrix{elty}, [2 3 2 2 3 1;
-                                1 3 3 2 3 1;
-                                3 3 3 1 1 2;
-                                2 1 2 2 2 2;
-                                1 1 2 2 3 1;
-                                2 2 2 2 1 3])
-    @test sqrt(A6)^2 ≈ A6
-    @test typeof(sqrt(A6)) == Matrix{elty}
-
-    # real quasitriangular schur form with a negative eigenvalue
-    A7 = convert(Matrix{elty}, [1 3 3 2 2 2;
-                                1 2 1 3 1 2;
-                                3 1 2 3 2 1;
-                                3 1 2 2 2 1;
-                                3 1 3 1 2 1;
-                                1 1 3 1 1 3])
-    @test sqrt(A7)^2 ≈ A7
-    @test typeof(sqrt(A7)) == Matrix{complex(elty)}
-
-    if elty <: Complex
-        A8 = convert(Matrix{elty}, [1 + 1im 1 + 1im 1 - 1im;
-                                    1 + 1im -1 + 1im 1 + 1im;
-                                    1 - 1im 1 + 1im -1 - 1im])
-        sqrtA8 = convert(
-            Matrix{elty},
-            [1.2559748527474284 + 0.6741878819930323im 0.20910077991005582 + 0.24969165051825476im 0.591784212275146 - 0.6741878819930327im;
-             0.2091007799100553 + 0.24969165051825515im 0.3320953202361413 + 0.2915044496279425im 0.33209532023614136 + 1.0568713143581219im;
-             0.5917842122751455 - 0.674187881993032im 0.33209532023614147 + 1.0568713143581223im 0.7147787526012315 - 0.6323750828833452im],
-        )
-        @test sqrt(A8) ≈ sqrtA8
-        @test sqrt(A8)^2 ≈ A8
-        @test typeof(sqrt(A8)) == Matrix{elty}
-    end
-end
-
-@testset "issue #40141" begin
-    x = [-1 -eps() 0 0; eps() -1 0 0; 0 0 -1 -eps(); 0 0 eps() -1]
-    @test sqrt(x)^2 ≈ x
-
-    x2 =  [-1 -eps() 0 0; 3eps() -1 0 0; 0 0 -1 -3eps(); 0 0 eps() -1]
-    @test sqrt(x2)^2 ≈ x2
-
-    x3 = [-1 -eps() 0 0; eps() -1 0 0; 0 0 -1 -eps(); 0 0 eps() Inf]
-    @test all(isnan, sqrt(x3))
-
-    # test overflow/underflow handled
-    x4 = [0 -1e200; 1e200 0]
-    @test sqrt(x4)^2 ≈ x4
-
-    x5 = [0 -1e-200; 1e-200 0]
-    @test sqrt(x5)^2 ≈ x5
-
-    x6 = [1.0 1e200; -1e-200 1.0]
-    @test sqrt(x6)^2 ≈ x6
-end
-
-@testset "matrix logarithm block diagonal underflow/overflow" begin
-    x1 = [0 -1e200; 1e200 0]
-    @test exp(log(x1)) ≈ x1
-
-    x2 = [0 -1e-200; 1e-200 0]
-    @test exp(log(x2)) ≈ x2
-
-    x3 = [1.0 1e200; -1e-200 1.0]
-    @test exp(log(x3)) ≈ x3
-end
-
-@testset "issue #7181" begin
-    A = [ 1  5  9
-          2  6 10
-          3  7 11
-          4  8 12 ]
-    @test diag(A,-5) == []
-    @test diag(A,-4) == []
-    @test diag(A,-3) == [4]
-    @test diag(A,-2) == [3,8]
-    @test diag(A,-1) == [2,7,12]
-    @test diag(A, 0) == [1,6,11]
-    @test diag(A, 1) == [5,10]
-    @test diag(A, 2) == [9]
-    @test diag(A, 3) == []
-    @test diag(A, 4) == []
-
-    @test diag(zeros(0,0)) == []
-    @test diag(zeros(0,0),1) == []
-    @test diag(zeros(0,0),-1) == []
-
-    @test diag(zeros(1,0)) == []
-    @test diag(zeros(1,0),-1) == []
-    @test diag(zeros(1,0),1) == []
-    @test diag(zeros(1,0),-2) == []
-
-    @test diag(zeros(0,1)) == []
-    @test diag(zeros(0,1),1) == []
-    @test diag(zeros(0,1),-1) == []
-    @test diag(zeros(0,1),2) == []
-end
-
-@testset "issue #39857" begin
-    @test lyap(1.0+2.0im, 3.0+4.0im) == -1.5 - 2.0im
-end
-
-@testset "Matrix to real power" for elty in (Float64, ComplexF64)
-# Tests proposed at Higham, Deadman: Testing Matrix Function Algorithms Using Identities, March 2014
-    #Aa : only positive real eigenvalues
-    Aa = convert(Matrix{elty}, [5 4 2 1; 0 1 -1 -1; -1 -1 3 0; 1 1 -1 2])
-
-    #Ab : both positive and negative real eigenvalues
-    Ab = convert(Matrix{elty}, [1 2 3; 4 7 1; 2 1 4])
-
-    #Ac : complex eigenvalues
-    Ac = convert(Matrix{elty}, [5 4 2 1;0 1 -1 -1;-1 -1 3 6;1 1 -1 5])
-
-    #Ad : defective Matrix
-    Ad = convert(Matrix{elty}, [3 1; 0 3])
-
-    #Ah : Hermitian Matrix
-    Ah = convert(Matrix{elty}, [3 1; 1 3])
-    if elty <: LinearAlgebra.BlasComplex
-        Ah += [0 im; -im 0]
-    end
-
-    #ADi : Diagonal Matrix
-    ADi = convert(Matrix{elty}, [3 0; 0 3])
-    if elty <: LinearAlgebra.BlasComplex
-        ADi += [im 0; 0 im]
-    end
-
-    for A in (Aa, Ab, Ac, Ad, Ah, ADi)
-        @test A^(1/2) ≈ sqrt(A)
-        @test A^(-1/2) ≈ inv(sqrt(A))
-        @test A^(3/4) ≈ sqrt(A) * sqrt(sqrt(A))
-        @test A^(-3/4) ≈ inv(A) * sqrt(sqrt(A))
-        @test A^(17/8) ≈ A^2 * sqrt(sqrt(sqrt(A)))
-        @test A^(-17/8) ≈ inv(A^2 * sqrt(sqrt(sqrt(A))))
-        @test (A^0.2)^5 ≈ A
-        @test (A^(2/3))*(A^(1/3)) ≈ A
-        @test (A^im)^(-im) ≈ A
-    end
-end
-
-@testset "diagonal integer matrix to real power" begin
-    A = Matrix(Diagonal([1, 2, 3]))
-    @test A^2.3 ≈ float(A)^2.3
-end
-
-@testset "issue #23366 (Int Matrix to Int power)" begin
-    @testset "Tests for $elty" for elty in (Int128, Int16, Int32, Int64, Int8,
-                                            UInt128, UInt16, UInt32, UInt64, UInt8,
-                                            BigInt)
-        #@info "Testing $elty"
-        @test elty[1 1;1 0]^-1 == [0  1;  1 -1]
-        @test elty[1 1;1 0]^-2 == [1 -1; -1  2]
-        @test (@inferred elty[1 1;1 0]^2) == elty[2 1;1 1]
-        I_ = elty[1 0;0 1]
-        @test I_^-1 == I_
-        if !(elty<:Unsigned)
-            @test (@inferred (-I_)^-1) == -I_
-            @test (@inferred (-I_)^-2) == I_
-        end
-        # make sure that type promotion for ^(::Matrix{<:Integer}, ::Integer)
-        # is analogous to type promotion for ^(::Integer, ::Integer)
-        # e.g. [1 1;1 0]^big(10000) should return Matrix{BigInt}, the same
-        # way as 2^big(10000) returns BigInt
-        for elty2 = (Int64, BigInt)
-            TT = Base.promote_op(^, elty, elty2)
-            @test (@inferred elty[1 1;1 0]^elty2(1))::Matrix{TT} == [1 1;1 0]
-        end
-    end
-end
-
-@testset "Least squares solutions" begin
-    a = [fill(1, 20) 1:20 1:20]
-    b = reshape(Matrix(1.0I, 8, 5), 20, 2)
-    @testset "Tests for type $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        a = convert(Matrix{elty}, a)
-        b = convert(Matrix{elty}, b)
-
-        # Vector rhs
-        x = a[:,1:2]\b[:,1]
-        @test ((a[:,1:2]*x-b[:,1])'*(a[:,1:2]*x-b[:,1]))[1] ≈ convert(elty, 2.546616541353384)
-
-        # Matrix rhs
-        x = a[:,1:2]\b
-        @test det((a[:,1:2]*x-b)'*(a[:,1:2]*x-b)) ≈ convert(elty, 4.437969924812031)
-
-        # Rank deficient
-        x = a\b
-        @test det((a*x-b)'*(a*x-b)) ≈ convert(elty, 4.437969924812031)
-
-        # Underdetermined minimum norm
-        x = convert(Matrix{elty}, [1 0 0; 0 1 -1]) \ convert(Vector{elty}, [1,1])
-        @test x ≈ convert(Vector{elty}, [1, 0.5, -0.5])
-
-        # symmetric, positive definite
-        @test inv(convert(Matrix{elty}, [6. 2; 2 1])) ≈ convert(Matrix{elty}, [0.5 -1; -1 3])
-
-        # symmetric, indefinite
-        @test inv(convert(Matrix{elty}, [1. 2; 2 1])) ≈ convert(Matrix{elty}, [-1. 2; 2 -1]/3)
-    end
-end
-
-function test_rdiv_pinv_consistency(a, b)
-    @test a*(b/b) ≈ (a*b)*pinv(b) ≈ a*(b*pinv(b))
-    @test typeof(a*(b/b)) == typeof((a*b)*pinv(b)) == typeof(a*(b*pinv(b)))
-end
-function test_ldiv_pinv_consistency(a, b)
-    @test (a\a)*b ≈ (pinv(a)*a)*b ≈ pinv(a)*(a*b)
-    @test typeof((a\a)*b) == typeof((pinv(a)*a)*b) == typeof(pinv(a)*(a*b))
-end
-function test_div_pinv_consistency(a, b)
-    test_rdiv_pinv_consistency(a, b)
-    test_ldiv_pinv_consistency(a, b)
-end
-
-@testset "/ and \\ consistency with pinv for vectors" begin
-    @testset "Tests for type $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        c = rand(elty, 5)
-        r = (elty <: Complex ? adjoint : transpose)(rand(elty, 5))
-        cm = rand(elty, 5, 1)
-        rm = rand(elty, 1, 5)
-        @testset "dot products" begin
-            test_div_pinv_consistency(r, c)
-            test_div_pinv_consistency(rm, c)
-            test_div_pinv_consistency(r, cm)
-            test_div_pinv_consistency(rm, cm)
-        end
-        @testset "outer products" begin
-            test_div_pinv_consistency(c, r)
-            test_div_pinv_consistency(cm, rm)
-        end
-        @testset "matrix/vector" begin
-            m = rand(5, 5)
-            test_ldiv_pinv_consistency(m, c)
-            test_rdiv_pinv_consistency(r, m)
-        end
-    end
-end
-
-@testset "test ops on Numbers for $elty" for elty in [Float32,Float64,ComplexF32,ComplexF64]
-    a = rand(elty)
-    @test isposdef(one(elty))
-    @test lyap(one(elty),a) == -a/2
-end
-
-@testset "strides" begin
-    a = rand(10)
-    b = view(a,2:2:10)
-    @test LinearAlgebra.stride1(a) == 1
-    @test LinearAlgebra.stride1(b) == 2
-end
-
-@testset "inverse of Adjoint" begin
-    A = randn(n, n)
-
-    @test @inferred(inv(A'))*A'                     ≈ I
-    @test @inferred(inv(transpose(A)))*transpose(A) ≈ I
-
-    B = complex.(A, randn(n, n))
-
-    @test @inferred(inv(B'))*B'                     ≈ I
-    @test @inferred(inv(transpose(B)))*transpose(B) ≈ I
-end
-
-@testset "Factorize fallback for Adjoint/Transpose" begin
-    a = rand(Complex{Int8}, n, n)
-    @test Array(transpose(factorize(Transpose(a)))) ≈ Array(factorize(a))
-    @test transpose(factorize(transpose(a))) == factorize(a)
-    @test Array(adjoint(factorize(Adjoint(a)))) ≈ Array(factorize(a))
-    @test adjoint(factorize(adjoint(a))) == factorize(a)
-end
-
-@testset "Matrix log issue #32313" begin
-    for A in ([30 20; -50 -30], [10.0im 0; 0 -10.0im], randn(6,6))
-        @test exp(log(A)) ≈ A
-    end
-end
-
-@testset "Matrix log PR #33245" begin
-    # edge case for divided difference
-    A1 = triu(ones(3,3),1) + diagm([1.0, -2eps()-1im, -eps()+0.75im])
-    @test exp(log(A1)) ≈ A1
-    # case where no sqrt is needed (s=0)
-    A2 = [1.01 0.01 0.01; 0 1.01 0.01; 0 0 1.01]
-    @test exp(log(A2)) ≈ A2
-end
-
-@testset "sqrt of empty Matrix of type $T" for T in [Int,Float32,Float64,ComplexF32,ComplexF64]
-    @test sqrt(Matrix{T}(undef, 0, 0)) == Matrix{T}(undef, 0, 0)
-    @test_throws DimensionMismatch sqrt(Matrix{T}(undef, 0, 3))
-end
-
-struct TypeWithoutZero end
-Base.zero(::Type{TypeWithoutZero}) = TypeWithZero()
-struct TypeWithZero end
-Base.promote_rule(::Type{TypeWithoutZero}, ::Type{TypeWithZero}) = TypeWithZero
-Base.zero(::Type{<:Union{TypeWithoutZero, TypeWithZero}}) = TypeWithZero()
-Base.:+(x::TypeWithZero, ::TypeWithoutZero) = x
-
-@testset "diagm for type with no zero" begin
-    @test diagm(0 => [TypeWithoutZero()]) isa Matrix{TypeWithZero}
-end
-
-end # module TestDense
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
deleted file mode 100644
index 61045a957cfed..0000000000000
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ /dev/null
@@ -1,1186 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestDiagonal
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasFloat, BlasComplex
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
-using .Main.InfiniteArrays
-
-isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
-using .Main.FillArrays
-
-const n=12 # Size of matrix problem to test
-Random.seed!(1)
-
-@testset for relty in (Float32, Float64, BigFloat), elty in (relty, Complex{relty})
-    dd=convert(Vector{elty}, randn(n))
-    vv=convert(Vector{elty}, randn(n))
-    UU=convert(Matrix{elty}, randn(n,n))
-    if elty <: Complex
-        dd+=im*convert(Vector{elty}, randn(n))
-        vv+=im*convert(Vector{elty}, randn(n))
-        UU+=im*convert(Matrix{elty}, randn(n,n))
-    end
-    D = Diagonal(dd)
-    DM = Matrix(Diagonal(dd))
-
-    @testset "constructor" begin
-        for x in (dd, GenericArray(dd))
-            @test Diagonal(x)::Diagonal{elty,typeof(x)} == DM
-            @test Diagonal(x).diag === x
-            @test Diagonal{elty}(x)::Diagonal{elty,typeof(x)} == DM
-            @test Diagonal{elty}(x).diag === x
-            @test Diagonal{elty}(D) === D
-        end
-        @test eltype(Diagonal{elty}([1,2,3,4])) == elty
-        @test isa(Diagonal{elty,Vector{elty}}(GenericArray([1,2,3,4])), Diagonal{elty,Vector{elty}})
-        @test isa(Diagonal{elty}(rand(Int,n,n)), Diagonal{elty,Vector{elty}})
-        DI = Diagonal([1,2,3,4])
-        @test Diagonal(DI) === DI
-        @test isa(Diagonal{elty}(DI), Diagonal{elty})
-        # issue #26178
-        @test_throws MethodError convert(Diagonal, [1,2,3,4])
-        @test_throws DimensionMismatch convert(Diagonal, [1 2 3 4])
-        @test_throws InexactError convert(Diagonal, ones(2,2))
-    end
-
-    @testset "Basic properties" begin
-        @test_throws ArgumentError size(D,0)
-        @test typeof(convert(Diagonal{ComplexF32},D)) <: Diagonal{ComplexF32}
-        @test typeof(convert(AbstractMatrix{ComplexF32},D)) <: Diagonal{ComplexF32}
-
-        @test Array(real(D)) == real(DM)
-        @test Array(abs.(D)) == abs.(DM)
-        @test Array(imag(D)) == imag(DM)
-
-        @test parent(D) == dd
-        @test D[1,1] == dd[1]
-        @test D[1,2] == 0
-
-        @test issymmetric(D)
-        @test isdiag(D)
-        @test isdiag(Diagonal([[1 0; 0 1], [1 0; 0 1]]))
-        @test !isdiag(Diagonal([[1 0; 0 1], [1 0; 1 1]]))
-        @test istriu(D)
-        @test istriu(D, -1)
-        @test !istriu(D, 1)
-        @test istriu(Diagonal(zero(diag(D))), 1)
-        @test istril(D)
-        @test !istril(D, -1)
-        @test istril(D, 1)
-        @test istril(Diagonal(zero(diag(D))), -1)
-        @test Base.isstored(D,1,1)
-        @test !Base.isstored(D,1,2)
-        @test_throws BoundsError Base.isstored(D, n + 1, 1)
-        if elty <: Real
-            @test ishermitian(D)
-        end
-    end
-
-    @testset "diag" begin
-        @test_throws ArgumentError diag(D,  n+1)
-        @test_throws ArgumentError diag(D, -n-1)
-        @test (@inferred diag(D))::typeof(dd) == dd
-        @test (@inferred diag(D, 0))::typeof(dd) == dd
-        @test (@inferred diag(D, 1))::typeof(dd) == zeros(elty, n-1)
-        DG = Diagonal(GenericArray(dd))
-        @test (@inferred diag(DG))::typeof(GenericArray(dd)) == GenericArray(dd)
-        @test (@inferred diag(DG, 1))::typeof(GenericArray(dd)) == GenericArray(zeros(elty, n-1))
-    end
-
-
-    @testset "Simple unary functions" begin
-        for op in (-,)
-            @test op(D)==op(DM)
-        end
-
-        for func in (det, tr)
-            @test func(D) ≈ func(DM) atol=n^2*eps(relty)*(1+(elty<:Complex))
-        end
-        if relty <: BlasFloat
-            for func in (exp, cis, sinh, cosh, tanh, sech, csch, coth)
-                @test func(D) ≈ func(DM) atol=n^3*eps(relty)
-            end
-            @test log(Diagonal(abs.(D.diag))) ≈ log(abs.(DM)) atol=n^3*eps(relty)
-        end
-        if elty <: BlasComplex
-            for func in (logdet, sqrt, sin, cos, tan, sec, csc, cot,
-                         asin, acos, atan, asec, acsc, acot,
-                         asinh, acosh, atanh, asech, acsch, acoth)
-                @test func(D) ≈ func(DM) atol=n^2*eps(relty)*2
-            end
-        end
-    end
-
-    @testset "Two-dimensional Euler formula for Diagonal" begin
-        @test cis(Diagonal([π, π])) ≈ -I
-    end
-
-    @testset "Linear solve" begin
-        for (v, U) in ((vv, UU), (view(vv, 1:n), view(UU, 1:n, 1:2)))
-            @test D*v ≈ DM*v atol=n*eps(relty)*(1+(elty<:Complex))
-            @test D*U ≈ DM*U atol=n^2*eps(relty)*(1+(elty<:Complex))
-
-            @test transpose(U)*D ≈ transpose(U)*Array(D)
-            @test U'*D ≈ U'*Array(D)
-
-            if relty != BigFloat
-                atol_two = 2n^2 * eps(relty) * (1 + (elty <: Complex))
-                atol_three = 2n^3 * eps(relty) * (1 + (elty <: Complex))
-                @test D\v ≈ DM\v atol=atol_two
-                @test D\U ≈ DM\U atol=atol_three
-                @test ldiv!(D, copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(transpose(D), copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(adjoint(conj(D)), copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(D, copy(U)) ≈ DM\U atol=atol_three
-                @test ldiv!(transpose(D), copy(U)) ≈ DM\U atol=atol_three
-                @test ldiv!(adjoint(conj(D)), copy(U)) ≈ DM\U atol=atol_three
-                # this method tests AbstractMatrix/AbstractVec for second arg
-                Usym_bad = Symmetric(ones(elty, n+1, n+1))
-                @test_throws DimensionMismatch ldiv!(D, copy(Usym_bad))
-
-                @test ldiv!(zero(v), D, copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(zero(v), transpose(D), copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(zero(v), adjoint(conj(D)), copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(zero(U), D, copy(U)) ≈ DM\U atol=atol_three
-                @test ldiv!(zero(U), transpose(D), copy(U)) ≈ DM\U atol=atol_three
-                @test ldiv!(zero(U), adjoint(conj(D)), copy(U)) ≈ DM\U atol=atol_three
-
-                Uc = copy(U')
-                target = rmul!(Uc, Diagonal(inv.(D.diag)))
-                @test rdiv!(Uc, D) ≈ target atol=atol_three
-                @test_throws DimensionMismatch rdiv!(Matrix{elty}(I, n-1, n-1), D)
-                @test_throws SingularException rdiv!(Uc, Diagonal(fill!(similar(D.diag), 0)))
-                @test rdiv!(Uc, transpose(D)) ≈ target atol=atol_three
-                @test rdiv!(Uc, adjoint(conj(D))) ≈ target atol=atol_three
-                @test ldiv!(D, Matrix{eltype(D)}(I, size(D))) ≈ D \ Matrix{eltype(D)}(I, size(D)) atol=atol_three
-                @test_throws DimensionMismatch ldiv!(D, fill(elty(1), n + 1))
-                @test_throws SingularException ldiv!(Diagonal(zeros(relty, n)), copy(v))
-                b = rand(elty, n, n)
-                @test ldiv!(D, copy(b)) ≈ Array(D)\Array(b)
-                @test_throws SingularException ldiv!(Diagonal(zeros(elty, n)), copy(b))
-                b = view(rand(elty, n), Vector(1:n))
-                b2 = copy(b)
-                c = ldiv!(D, b)
-                d = Array(D)\b2
-                @test c ≈ d
-                @test_throws SingularException ldiv!(Diagonal(zeros(elty, n)), b)
-                b = rand(elty, n+1, n+1)
-                @test_throws DimensionMismatch ldiv!(D, copy(b))
-                b = view(rand(elty, n+1), Vector(1:n+1))
-                @test_throws DimensionMismatch ldiv!(D, b)
-            end
-        end
-    end
-    d = convert(Vector{elty}, randn(n))
-    D2 = Diagonal(d)
-    DM2= Matrix(Diagonal(d))
-    @testset "Binary operations" begin
-        for op in (+, -, *)
-            @test Array(op(D, D2)) ≈ op(DM, DM2)
-        end
-        @testset "with plain numbers" begin
-            a = rand()
-            @test Array(a*D) ≈ a*DM
-            @test Array(D*a) ≈ DM*a
-            @test Array(D/a) ≈ DM/a
-            if elty <: Real
-                @test Array(abs.(D)^a) ≈ abs.(DM)^a
-            else
-                @test Array(D^a) ≈ DM^a
-            end
-            @test Diagonal(1:100)^2 == Diagonal((1:100).^2)
-            p = 3
-            @test Diagonal(1:100)^p == Diagonal((1:100).^p)
-            @test Diagonal(1:100)^(-1) == Diagonal(inv.(1:100))
-            @test Diagonal(1:100)^2.0 == Diagonal((1:100).^2.0)
-            @test Diagonal(1:100)^(2.0+0im) == Diagonal((1:100).^(2.0+0im))
-        end
-
-        if relty <: BlasFloat
-            for b in (rand(elty,n,n), rand(elty,n))
-                @test lmul!(copy(D), copy(b)) ≈ Array(D)*Array(b)
-                @test lmul!(transpose(copy(D)), copy(b)) ≈ transpose(Array(D))*Array(b)
-                @test lmul!(adjoint(copy(D)), copy(b)) ≈ Array(D)'*Array(b)
-            end
-        end
-
-        #a few missing mults
-        bd = Bidiagonal(D2)
-        @test D*transpose(D2) ≈ Array(D)*transpose(Array(D2))
-        @test D2*transpose(D) ≈ Array(D2)*transpose(Array(D))
-        @test D2*D' ≈ Array(D2)*Array(D)'
-
-        #division of two Diagonals
-        @test D/D2 ≈ Diagonal(D.diag./D2.diag)
-        @test D\D2 ≈ Diagonal(D2.diag./D.diag)
-
-        # QR \ Diagonal
-        A = rand(elty, n, n)
-        qrA = qr(A)
-        @test qrA \ D ≈ A \ D
-
-        # HermOrSym
-        A     = rand(elty, n, n)
-        Asym  = Symmetric(A + transpose(A), :U)
-        Aherm = Hermitian(A + adjoint(A), :U)
-        for op in (+, -)
-            @test op(Asym, D) isa Symmetric
-            @test Array(op(Asym, D)) ≈ Array(Symmetric(op(Array(Asym), Array(D))))
-            @test op(D, Asym) isa Symmetric
-            @test Array(op(D, Asym)) ≈ Array(Symmetric(op(Array(D), Array(Asym))))
-            if !(elty <: Real)
-                Dr = real(D)
-                @test op(Aherm, Dr) isa Hermitian
-                @test Array(op(Aherm, Dr)) ≈ Array(Hermitian(op(Array(Aherm), Array(Dr))))
-                @test op(Dr, Aherm) isa Hermitian
-                @test Array(op(Dr, Aherm)) ≈ Array(Hermitian(op(Array(Dr), Array(Aherm))))
-            end
-        end
-        @test Array(D*transpose(Asym)) ≈ Array(D) * Array(transpose(Asym))
-        @test Array(D*adjoint(Asym)) ≈ Array(D) * Array(adjoint(Asym))
-        @test Array(D*transpose(Aherm)) ≈ Array(D) * Array(transpose(Aherm))
-        @test Array(D*adjoint(Aherm)) ≈ Array(D) * Array(adjoint(Aherm))
-        @test Array(transpose(Asym)*transpose(D)) ≈ Array(transpose(Asym)) * Array(transpose(D))
-        @test Array(transpose(D)*transpose(Asym)) ≈ Array(transpose(D)) * Array(transpose(Asym))
-        @test Array(adjoint(Aherm)*adjoint(D)) ≈ Array(adjoint(Aherm)) * Array(adjoint(D))
-        @test Array(adjoint(D)*adjoint(Aherm)) ≈ Array(adjoint(D)) * Array(adjoint(Aherm))
-
-        # Performance specialisations for A*_mul_B!
-        vvv = similar(vv)
-        @test (r = Matrix(D) * vv   ; mul!(vvv, D, vv)  ≈ r ≈ vvv)
-        @test (r = Matrix(D)' * vv  ; mul!(vvv, adjoint(D), vv) ≈ r ≈ vvv)
-        @test (r = transpose(Matrix(D)) * vv ; mul!(vvv, transpose(D), vv) ≈ r ≈ vvv)
-
-        UUU = similar(UU)
-        for transformA in (identity, adjoint, transpose)
-            for transformD in (identity, adjoint, transpose)
-                @test mul!(UUU, transformA(UU), transformD(D)) ≈  transformA(UU) * Matrix(transformD(D))
-                @test mul!(UUU, transformD(D), transformA(UU)) ≈  Matrix(transformD(D)) * transformA(UU)
-            end
-        end
-
-        alpha = elty(randn())  # randn(elty) does not work with BigFloat
-        beta = elty(randn())
-        @test begin
-            vvv = similar(vv)
-            vvv .= randn(size(vvv))  # randn!(vvv) does not work with BigFloat
-            r = alpha * Matrix(D) * vv + beta * vvv
-            mul!(vvv, D, vv, alpha, beta)  ≈ r ≈ vvv
-        end
-        @test begin
-            vvv = similar(vv)
-            vvv .= randn(size(vvv))  # randn!(vvv) does not work with BigFloat
-            r = alpha * Matrix(D)' * vv + beta * vvv
-            mul!(vvv, adjoint(D), vv, alpha, beta) ≈ r ≈ vvv
-        end
-        @test begin
-            vvv = similar(vv)
-            vvv .= randn(size(vvv))  # randn!(vvv) does not work with BigFloat
-            r = alpha * transpose(Matrix(D)) * vv + beta * vvv
-            mul!(vvv, transpose(D), vv, alpha, beta) ≈ r ≈ vvv
-        end
-
-        @test begin
-            UUU = similar(UU)
-            UUU .= randn(size(UUU))  # randn!(UUU) does not work with BigFloat
-            r = alpha * Matrix(D) * UU + beta * UUU
-            mul!(UUU, D, UU, alpha, beta) ≈ r ≈ UUU
-        end
-        @test begin
-            UUU = similar(UU)
-            UUU .= randn(size(UUU))  # randn!(UUU) does not work with BigFloat
-            r = alpha * Matrix(D)' * UU + beta * UUU
-            mul!(UUU, adjoint(D), UU, alpha, beta) ≈ r ≈ UUU
-        end
-        @test begin
-            UUU = similar(UU)
-            UUU .= randn(size(UUU))  # randn!(UUU) does not work with BigFloat
-            r = alpha * transpose(Matrix(D)) * UU + beta * UUU
-            mul!(UUU, transpose(D), UU, alpha, beta) ≈ r ≈ UUU
-        end
-
-        # make sure that mul!(A, {Adj|Trans}(B)) works with B as a Diagonal
-        VV = Array(D)
-        DD = copy(D)
-        r  = VV * Matrix(D)
-        @test Array(rmul!(VV, DD)) ≈ r ≈ Array(D)*Array(D)
-        DD = copy(D)
-        r  = VV * transpose(Array(D))
-        @test Array(rmul!(VV, transpose(DD))) ≈ r
-        DD = copy(D)
-        r  = VV * Array(D)'
-        @test Array(rmul!(VV, adjoint(DD))) ≈ r
-
-        # kron
-        D3 = Diagonal(convert(Vector{elty}, rand(n÷2)))
-        DM3= Matrix(D3)
-        @test Matrix(kron(D, D3)) ≈ kron(DM, DM3)
-        M4 = rand(elty, n÷2, n÷2)
-        @test kron(D3, M4) ≈ kron(DM3, M4)
-        @test kron(M4, D3) ≈ kron(M4, DM3)
-        X = [ones(1,1) for i in 1:2, j in 1:2]
-        @test kron(I(2), X)[1,3] == zeros(1,1)
-        X = [ones(2,2) for i in 1:2, j in 1:2]
-        @test kron(I(2), X)[1,3] == zeros(2,2)
-    end
-    @testset "iszero, isone, triu, tril" begin
-        Dzero = Diagonal(zeros(elty, 10))
-        Done = Diagonal(ones(elty, 10))
-        Dmix = Diagonal(zeros(elty, 10))
-        Dmix[end,end] = one(elty)
-        @test iszero(Dzero)
-        @test !isone(Dzero)
-        @test !iszero(Done)
-        @test isone(Done)
-        @test !iszero(Dmix)
-        @test !isone(Dmix)
-        @test istriu(D)
-        @test istril(D)
-        @test iszero(triu(D,1))
-        @test triu(D,0)  == D
-        @test triu(D,-1) == D
-        @test tril(D,1)  == D
-        @test iszero(tril(D,-1))
-        @test tril(D,0)  == D
-        @test_throws ArgumentError tril(D, -n - 2)
-        @test_throws ArgumentError tril(D, n)
-        @test_throws ArgumentError triu(D, -n)
-        @test_throws ArgumentError triu(D, n + 2)
-    end
-
-    # factorize
-    @test factorize(D) == D
-
-    @testset "Eigensystem" begin
-        eigD = eigen(D)
-        @test Diagonal(eigD.values) == D
-        @test eigD.vectors == Matrix(I, size(D))
-        eigsortD = eigen(D, sortby=LinearAlgebra.eigsortby)
-        @test eigsortD.values !== D.diag
-        @test eigsortD.values == sort(D.diag, by=LinearAlgebra.eigsortby)
-        @test Matrix(eigsortD) == D
-    end
-
-    @testset "ldiv" begin
-        v = rand(n + 1)
-        @test_throws DimensionMismatch D\v
-        v = rand(n)
-        @test D\v ≈ DM\v
-        V = rand(n + 1, n)
-        @test_throws DimensionMismatch D\V
-        V = rand(n, n)
-        @test D\V ≈ DM\V
-    end
-
-    @testset "conj and transpose" begin
-        @test transpose(D) == D
-        if elty <: Real
-            @test transpose(D) === D
-            @test adjoint(D) === D
-        elseif elty <: BlasComplex
-            @test Array(conj(D)) ≈ conj(DM)
-            @test adjoint(D) == conj(D)
-            local D2 = copy(D)
-            local D2adj = adjoint(D2)
-            D2adj[1,1] = rand(eltype(D2adj))
-            @test D2[1,1] == adjoint(D2adj[1,1])
-            @test D2adj' === D2
-        end
-        # Translates to Ac/t_mul_B, which is specialized after issue 21286
-        @test(D' * vv == conj(D) * vv)
-        @test(transpose(D) * vv == D * vv)
-    end
-
-    # logdet and logabsdet
-    if relty <: Real
-        lD = Diagonal(convert(Vector{relty}, rand(n)))
-        lM = Matrix(lD)
-        @test logdet(lD) ≈ logdet(lM)
-        d1, s1 = @inferred logabsdet(lD)
-        d2, s2 = logabsdet(lM)
-        @test d1 ≈ d2
-        @test s1 == s2
-        @test logdet(Diagonal(relty[-1,-2])) ≈ log(2)
-        @test_throws DomainError logdet(Diagonal(relty[-1,-2,-3]))
-    end
-
-    @testset "similar" begin
-        @test isa(similar(D), Diagonal{elty})
-        @test isa(similar(D, Int), Diagonal{Int})
-        @test isa(similar(D, (3,2)), Matrix{elty})
-        @test isa(similar(D, Int, (3,2)), Matrix{Int})
-    end
-
-    # Issue number 10036
-    # make sure issymmetric/ishermitian work for
-    # non-real diagonal matrices
-    @testset "issymmetric/hermitian for complex Diagonal" begin
-        @test issymmetric(D2)
-        @test ishermitian(D2)
-        if elty <: Complex
-            dc = d .+ elty(1im)
-            D3 = Diagonal(dc)
-            @test issymmetric(D3)
-            @test !ishermitian(D3)
-        end
-    end
-
-    @testset "svd (#11120/#11247)" begin
-        U, s, V = svd(D)
-        @test (U*Diagonal(s))*V' ≈ D
-        @test svdvals(D) == s
-        @test svd(D).V == V
-    end
-
-    @testset "svd/eigen with Diagonal{Furlong}" begin
-        Du = Furlong.(D)
-        @test Du isa Diagonal{<:Furlong{1}}
-        F = svd(Du)
-        U, s, V = F
-        @test map(x -> x.val, Matrix(F)) ≈ map(x -> x.val, Du)
-        @test svdvals(Du) == s
-        @test U isa AbstractMatrix{<:Furlong{0}}
-        @test V isa AbstractMatrix{<:Furlong{0}}
-        @test s isa AbstractVector{<:Furlong{1}}
-        E = eigen(Du)
-        vals, vecs = E
-        @test Matrix(E) == Du
-        @test vals isa AbstractVector{<:Furlong{1}}
-        @test vecs isa AbstractMatrix{<:Furlong{0}}
-    end
-end
-
-@testset "rdiv! (#40887)" begin
-    @test rdiv!(Matrix(Diagonal([2.0, 3.0])), Diagonal(2:3)) == Diagonal([1.0, 1.0])
-    @test rdiv!(fill(3.0, 3, 3), 3.0I(3)) == ones(3,3)
-end
-
-@testset "kron (issue #40595)" begin
-    # custom array type to test that kron on Diagonal matrices preserves types of the parents if possible
-    struct KronTestArray{T, N, AT} <: AbstractArray{T, N}
-        data::AT
-    end
-    KronTestArray(data::AbstractArray) = KronTestArray{eltype(data), ndims(data), typeof(data)}(data)
-    Base.size(A::KronTestArray) = size(A.data)
-    LinearAlgebra.kron(A::KronTestArray, B::KronTestArray) = KronTestArray(kron(A.data, B.data))
-    Base.getindex(K::KronTestArray{<:Any,N}, i::Vararg{Int,N}) where {N} = K.data[i...]
-
-    A = KronTestArray([1, 2, 3]);
-    @test kron(A, A) isa KronTestArray
-    Ad = Diagonal(A);
-    @test kron(Ad, Ad).diag isa KronTestArray
-    @test kron(Ad, Ad).diag == kron([1, 2, 3], [1, 2, 3])
-end
-
-# Define a vector type that does not support `deleteat!`, to ensure that `kron` handles this
-struct SimpleVector{T} <: AbstractVector{T}
-    vec::Vector{T}
-end
-SimpleVector(x::SimpleVector) = SimpleVector(Vector(x.vec))
-SimpleVector{T}(::UndefInitializer, n::Integer) where {T} = SimpleVector(Vector{T}(undef, n))
-Base.:(==)(x::SimpleVector, y::SimpleVector) = x == y
-Base.axes(x::SimpleVector) = axes(x.vec)
-Base.convert(::Type{Vector{T}}, x::SimpleVector) where {T} = convert(Vector{T}, x.vec)
-Base.convert(::Type{Vector}, x::SimpleVector{T}) where {T} = convert(Vector{T}, x)
-Base.convert(::Type{Array{T}}, x::SimpleVector) where {T} = convert(Vector{T}, x)
-Base.convert(::Type{Array}, x::SimpleVector) = convert(Vector, x)
-Base.copyto!(x::SimpleVector, y::SimpleVector) = (copyto!(x.vec, y.vec); x)
-Base.eltype(::Type{SimpleVector{T}}) where {T} = T
-Base.getindex(x::SimpleVector, ind...) = getindex(x.vec, ind...)
-Base.kron(x::SimpleVector, y::SimpleVector) = SimpleVector(kron(x.vec, y.vec))
-Base.promote_rule(::Type{<:AbstractVector{T}}, ::Type{SimpleVector{U}}) where {T,U} = Vector{promote_type(T, U)}
-Base.promote_rule(::Type{SimpleVector{T}}, ::Type{SimpleVector{U}}) where {T,U} = SimpleVector{promote_type(T, U)}
-Base.setindex!(x::SimpleVector, val, ind...) = (setindex!(x.vec, val, ind...), x)
-Base.similar(x::SimpleVector, ::Type{T}) where {T} = SimpleVector(similar(x.vec, T))
-Base.similar(x::SimpleVector, ::Type{T}, dims::Dims{1}) where {T} = SimpleVector(similar(x.vec, T, dims))
-Base.size(x::SimpleVector) = size(x.vec)
-
-@testset "kron (issue #46456)" for repr in Any[identity, SimpleVector]
-    A = Diagonal(repr(randn(10)))
-    BL = Bidiagonal(repr(randn(10)), repr(randn(9)), :L)
-    BU = Bidiagonal(repr(randn(10)), repr(randn(9)), :U)
-    C = SymTridiagonal(repr(randn(10)), repr(randn(9)))
-    Cl = SymTridiagonal(repr(randn(10)), repr(randn(10)))
-    D = Tridiagonal(repr(randn(9)), repr(randn(10)), repr(randn(9)))
-    @test kron(A, BL)::Bidiagonal == kron(Array(A), Array(BL))
-    @test kron(A, BU)::Bidiagonal == kron(Array(A), Array(BU))
-    @test kron(A, C)::SymTridiagonal == kron(Array(A), Array(C))
-    @test kron(A, Cl)::SymTridiagonal == kron(Array(A), Array(Cl))
-    @test kron(A, D)::Tridiagonal == kron(Array(A), Array(D))
-end
-
-@testset "svdvals and eigvals (#11120/#11247)" begin
-    D = Diagonal(Matrix{Float64}[randn(3,3), randn(2,2)])
-    @test sort([svdvals(D)...;], rev = true) ≈ svdvals([D.diag[1] zeros(3,2); zeros(2,3) D.diag[2]])
-    @test sort([eigvals(D)...;], by=LinearAlgebra.eigsortby) ≈ eigvals([D.diag[1] zeros(3,2); zeros(2,3) D.diag[2]])
-end
-
-@testset "eigvals should return a copy of the diagonal" begin
-    D = Diagonal([1, 2, 3])
-    lam = eigvals(D)
-    D[3,3] = 4 # should not affect lam
-    @test lam == [1, 2, 3]
-end
-
-@testset "eigmin (#27847)" begin
-    for _ in 1:100
-        d = randn(rand(1:10))
-        D = Diagonal(d)
-        @test eigmin(D) == minimum(d)
-    end
-end
-
-@testset "isposdef" begin
-    @test isposdef(Diagonal(1.0 .+ rand(n)))
-    @test !isposdef(Diagonal(-1.0 * rand(n)))
-    @test isposdef(Diagonal(complex(1.0, 0.0) .+ rand(n)))
-    @test !isposdef(Diagonal(complex(1.0, 1.0) .+ rand(n)))
-    @test isposdef(Diagonal([[1 0; 0 1], [1 0; 0 1]]))
-    @test !isposdef(Diagonal([[1 0; 0 1], [1 0; 1 1]]))
-end
-
-@testset "getindex" begin
-    d = randn(n)
-    D = Diagonal(d)
-    # getindex bounds checking
-    @test_throws BoundsError D[0, 0]
-    @test_throws BoundsError D[-1, -2]
-    @test_throws BoundsError D[n, n + 1]
-    @test_throws BoundsError D[n + 1, n]
-    @test_throws BoundsError D[n + 1, n + 1]
-    # getindex on and off the diagonal
-    for i in 1:n, j in 1:n
-        @test D[i, j] == (i == j ? d[i] : 0)
-    end
-end
-
-@testset "setindex!" begin
-    d = randn(n)
-    D = Diagonal(d)
-    # setindex! bounds checking
-    @test_throws BoundsError D[0, 0] = 0
-    @test_throws BoundsError D[-1 , -2] = 0
-    @test_throws BoundsError D[n, n + 1] = 0
-    @test_throws BoundsError D[n + 1, n] = 0
-    @test_throws BoundsError D[n + 1, n + 1] = 0
-    for i in 1:n, j in 1:n
-        if i == j
-            # setindex on! the diagonal
-            @test ((D[i, j] = i) == i; D[i, j] == i)
-        else
-            # setindex! off the diagonal
-            @test ((D[i, j] = 0) == 0; iszero(D[i, j]))
-            @test_throws ArgumentError D[i, j] = 1
-        end
-    end
-end
-
-@testset "inverse" begin
-    for d in Any[randn(n), Int[], [1, 2, 3], [1im, 2im, 3im], [1//1, 2//1, 3//1], [1+1im//1, 2//1, 3im//1]]
-        D = Diagonal(d)
-        @test inv(D) ≈ inv(Array(D))
-    end
-    @test_throws SingularException inv(Diagonal(zeros(n)))
-    @test_throws SingularException inv(Diagonal([0, 1, 2]))
-    @test_throws SingularException inv(Diagonal([0im, 1im, 2im]))
-end
-
-@testset "pseudoinverse" begin
-    for d in Any[randn(n), zeros(n), Int[], [0, 2, 0.003], [0im, 1+2im, 0.003im], [0//1, 2//1, 3//100], [0//1, 1//1+2im, 3im//100]]
-        D = Diagonal(d)
-        @test pinv(D) ≈ pinv(Array(D))
-        @test pinv(D, 1.0e-2) ≈ pinv(Array(D), 1.0e-2)
-    end
-end
-
-# allow construct from range
-@test all(Diagonal(range(1, stop=3, length=3)) .== Diagonal([1.0,2.0,3.0]))
-
-# Issue 12803
-for t in (Float32, Float64, Int, ComplexF64, Rational{Int})
-    @test Diagonal(Matrix{t}[fill(t(1), 2, 2), fill(t(1), 3, 3)])[2,1] == zeros(t, 3, 2)
-end
-
-# Issue 15401
-@test Matrix(1.0I, 5, 5) \ Diagonal(fill(1.,5)) == Matrix(I, 5, 5)
-
-@testset "Triangular and Diagonal" begin
-    function _test_matrix(type)
-        if type == Int
-            return rand(1:9, 5, 5)
-        else
-            return randn(type, 5, 5)
-        end
-    end
-    types = (Float64, Int, ComplexF64)
-    for ta in types
-        D = Diagonal(_test_matrix(ta))
-        for tb in types
-            B = _test_matrix(tb)
-            Tmats = (LowerTriangular(B), UnitLowerTriangular(B), UpperTriangular(B), UnitUpperTriangular(B))
-            restypes = (LowerTriangular, LowerTriangular, UpperTriangular, UpperTriangular)
-            for (T, rtype) in zip(Tmats, restypes)
-                adjtype = (rtype == LowerTriangular) ? UpperTriangular : LowerTriangular
-
-                # Triangular * Diagonal
-                R = T * D
-                @test R ≈ Array(T) * Array(D)
-                @test isa(R, rtype)
-
-                # Diagonal * Triangular
-                R = D * T
-                @test R ≈ Array(D) * Array(T)
-                @test isa(R, rtype)
-
-                # Adjoint of Triangular * Diagonal
-                R = T' * D
-                @test R ≈ Array(T)' * Array(D)
-                @test isa(R, adjtype)
-
-                # Diagonal * Adjoint of Triangular
-                R = D * T'
-                @test R ≈ Array(D) * Array(T)'
-                @test isa(R, adjtype)
-
-                # Transpose of Triangular * Diagonal
-                R = transpose(T) * D
-                @test R ≈ transpose(Array(T)) * Array(D)
-                @test isa(R, adjtype)
-
-                # Diagonal * Transpose of Triangular
-                R = D * transpose(T)
-                @test R ≈ Array(D) * transpose(Array(T))
-                @test isa(R, adjtype)
-            end
-        end
-    end
-end
-
-let D1 = Diagonal(rand(5)), D2 = Diagonal(rand(5))
-    @test LinearAlgebra.rmul!(copy(D1),D2) == D1*D2
-    @test LinearAlgebra.lmul!(D1,copy(D2)) == D1*D2
-    @test LinearAlgebra.rmul!(copy(D1),transpose(D2)) == D1*transpose(D2)
-    @test LinearAlgebra.lmul!(transpose(D1),copy(D2)) == transpose(D1)*D2
-    @test LinearAlgebra.rmul!(copy(D1),adjoint(D2)) == D1*adjoint(D2)
-    @test LinearAlgebra.lmul!(adjoint(D1),copy(D2)) == adjoint(D1)*D2
-end
-
-@testset "multiplication of a Diagonal with a Matrix" begin
-    A = collect(reshape(1:8, 4, 2));
-    B = BigFloat.(A);
-    DL = Diagonal(collect(axes(A, 1)));
-    DR = Diagonal(Float16.(collect(axes(A, 2))));
-
-    @test DL * A == collect(DL) * A
-    @test A * DR == A * collect(DR)
-    @test DL * B == collect(DL) * B
-    @test B * DR == B * collect(DR)
-
-    A = reshape([ones(2,2), ones(2,2)*2, ones(2,2)*3, ones(2,2)*4], 2, 2)
-    Ac = collect(A)
-    D = Diagonal([collect(reshape(1:4, 2, 2)), collect(reshape(5:8, 2, 2))])
-    Dc = collect(D)
-    @test A * D == Ac * Dc
-    @test D * A == Dc * Ac
-    @test D * D == Dc * Dc
-
-    AS = similar(A)
-    mul!(AS, A, D, true, false)
-    @test AS == A * D
-
-    D2 = similar(D)
-    mul!(D2, D, D)
-    @test D2 == D * D
-
-    copyto!(D2, D)
-    lmul!(D, D2)
-    @test D2 == D * D
-    copyto!(D2, D)
-    rmul!(D2, D)
-    @test D2 == D * D
-end
-
-@testset "multiplication of 2 Diagonal and a Matrix (#46400)" begin
-    A = randn(10, 10)
-    D = Diagonal(randn(10))
-    D2 = Diagonal(randn(10))
-    @test D * A * D2 ≈ D * (A * D2)
-    @test D * A * D2 ≈ (D * A) * D2
-    @test_throws DimensionMismatch Diagonal(ones(9)) * A * D2
-    @test_throws DimensionMismatch D * A * Diagonal(ones(9))
-end
-
-@testset "multiplication of QR Q-factor and Diagonal (#16615 spot test)" begin
-    D = Diagonal(randn(5))
-    Q = qr(randn(5, 5)).Q
-    @test D * Q' == Array(D) * Q'
-    Q = qr(randn(5, 5), ColumnNorm()).Q
-    @test_throws ArgumentError lmul!(Q, D)
-end
-
-@testset "block diagonal matrices" begin
-    D = Diagonal([[1 2; 3 4], [1 2; 3 4]])
-    Dherm = Diagonal([[1 1+im; 1-im 1], [1 1+im; 1-im 1]])
-    Dsym = Diagonal([[1 1+im; 1+im 1], [1 1+im; 1+im 1]])
-    @test adjoint(D) == Diagonal([[1 3; 2 4], [1 3; 2 4]])
-    @test transpose(D) == Diagonal([[1 3; 2 4], [1 3; 2 4]])
-    @test adjoint(Dherm) == Dherm
-    @test transpose(Dherm) == Diagonal([[1 1-im; 1+im 1], [1 1-im; 1+im 1]])
-    @test adjoint(Dsym) == Diagonal([[1 1-im; 1-im 1], [1 1-im; 1-im 1]])
-    @test transpose(Dsym) == Dsym
-
-    v = [[1, 2], [3, 4]]
-    @test Dherm' * v == Dherm * v
-    @test transpose(D) * v == [[7, 10], [15, 22]]
-
-    @test issymmetric(D) == false
-    @test issymmetric(Dherm) == false
-    @test issymmetric(Dsym) == true
-
-    @test ishermitian(D) == false
-    @test ishermitian(Dherm) == true
-    @test ishermitian(Dsym) == false
-
-    @test exp(D) == Diagonal([exp([1 2; 3 4]), exp([1 2; 3 4])])
-    @test cis(D) == Diagonal([cis([1 2; 3 4]), cis([1 2; 3 4])])
-    @test log(D) == Diagonal([log([1 2; 3 4]), log([1 2; 3 4])])
-    @test sqrt(D) == Diagonal([sqrt([1 2; 3 4]), sqrt([1 2; 3 4])])
-
-    @test tr(D) == 10
-    @test det(D) == 4
-
-    M = [1 2; 3 4]
-    for n in 0:1
-        D = Diagonal(fill(M, n))
-        @test D == Matrix{eltype(D)}(D)
-    end
-end
-
-@testset "linear solve for block diagonal matrices" begin
-    D = Diagonal([rand(2,2) for _ in 1:5])
-    b = [rand(2,2) for _ in 1:5]
-    B = [rand(2,2) for _ in 1:5, _ in 1:5]
-    @test ldiv!(D, copy(b)) ≈ Diagonal(inv.(D.diag)) * b
-    @test ldiv!(D, copy(B)) ≈ Diagonal(inv.(D.diag)) * B
-    @test rdiv!(copy(B), D) ≈ B * Diagonal(inv.(D.diag))
-end
-
-@testset "multiplication with Symmetric/Hermitian" begin
-    for T in (Float64, ComplexF64)
-        D = Diagonal(randn(T, n))
-        A = randn(T, n, n); A = A'A
-        S = Symmetric(A)
-        H = Hermitian(A)
-        for (transform1, transform2) in ((identity,  identity),
-                (identity,  adjoint  ), (adjoint,   identity ), (adjoint,   adjoint  ),
-                (identity,  transpose), (transpose, identity ), (transpose, transpose) )
-            @test *(transform1(D), transform2(S)) ≈ *(transform1(Matrix(D)), transform2(Matrix(S)))
-            @test *(transform1(D), transform2(H)) ≈ *(transform1(Matrix(D)), transform2(Matrix(H)))
-            @test *(transform1(S), transform2(D)) ≈ *(transform1(Matrix(S)), transform2(Matrix(D)))
-            @test *(transform1(S), transform2(H)) ≈ *(transform1(Matrix(S)), transform2(Matrix(H)))
-        end
-    end
-end
-
-@testset "multiplication of transposes of Diagonal (#22428)" begin
-    for T in (Float64, ComplexF64)
-        D = Diagonal(randn(T, 5, 5))
-        B = Diagonal(randn(T, 5, 5))
-        DD = Diagonal([randn(T, 2, 2), rand(T, 2, 2)])
-        BB = Diagonal([randn(T, 2, 2), rand(T, 2, 2)])
-        fullDD = copyto!(Matrix{Matrix{T}}(undef, 2, 2), DD)
-        fullBB = copyto!(Matrix{Matrix{T}}(undef, 2, 2), BB)
-        for (transform1, transform2) in ((identity,  identity),
-                (identity,  adjoint  ), (adjoint,   identity ), (adjoint,   adjoint  ),
-                (identity,  transpose), (transpose, identity ), (transpose, transpose))
-            @test *(transform1(D), transform2(B))::typeof(D) ≈ *(transform1(Matrix(D)), transform2(Matrix(B))) atol=2 * eps()
-            @test *(transform1(DD), transform2(BB))::typeof(DD) == *(transform1(fullDD), transform2(fullBB))
-        end
-        M = randn(T, 5, 5)
-        MM = [randn(T, 2, 2) for _ in 1:2, _ in 1:2]
-        for transform in (identity, adjoint, transpose)
-            @test lmul!(transform(D), copy(M)) ≈ *(transform(Matrix(D)), M)
-            @test rmul!(copy(M), transform(D)) ≈ *(M, transform(Matrix(D)))
-            @test lmul!(transform(DD), copy(MM)) ≈ *(transform(fullDD), MM)
-            @test rmul!(copy(MM), transform(DD)) ≈ *(MM, transform(fullDD))
-        end
-    end
-end
-
-@testset "Diagonal of adjoint/transpose vectors (#23649)" begin
-    @test Diagonal(adjoint([1, 2, 3])) == Diagonal([1 2 3])
-    @test Diagonal(transpose([1, 2, 3])) == Diagonal([1 2 3])
-end
-
-@testset "Multiplication with adjoint and transpose vectors (#26863)" begin
-    x = collect(1:2)
-    xt = transpose(x)
-    A = reshape([[1 2; 3 4], zeros(Int,2,2), zeros(Int, 2, 2), [5 6; 7 8]], 2, 2)
-    D = Diagonal(A)
-    @test x'*D == x'*A == collect(x')*D == collect(x')*A
-    @test xt*D == xt*A == collect(xt)*D == collect(xt)*A
-    outadjxD = similar(x'*D); outtrxD = similar(xt*D);
-    mul!(outadjxD, x', D)
-    @test outadjxD == x'*D
-    mul!(outtrxD, xt, D)
-    @test outtrxD == xt*D
-
-    D1 = Diagonal([[1 2; 3 4]])
-    @test D1 * x' == D1 * collect(x') == collect(D1) * collect(x')
-    @test D1 * xt == D1 * collect(xt) == collect(D1) * collect(xt)
-    outD1adjx = similar(D1 * x'); outD1trx = similar(D1 * xt);
-    mul!(outadjxD, D1, x')
-    @test outadjxD == D1*x'
-    mul!(outtrxD, D1, xt)
-    @test outtrxD == D1*xt
-
-    y = [x, x]
-    yt = transpose(y)
-    @test y'*D*y == (y'*D)*y == (y'*A)*y
-    @test yt*D*y == (yt*D)*y == (yt*A)*y
-    outadjyD = similar(y'*D); outtryD = similar(yt*D);
-    outadjyD2 = similar(collect(y'*D)); outtryD2 = similar(collect(yt*D));
-    mul!(outadjyD, y', D)
-    mul!(outadjyD2, y', D)
-    @test outadjyD == outadjyD2 == y'*D
-    mul!(outtryD, yt, D)
-    mul!(outtryD2, yt, D)
-    @test outtryD == outtryD2 == yt*D
-end
-
-@testset "Multiplication of single element Diagonal (#36746, #40726)" begin
-    @test_throws DimensionMismatch Diagonal(randn(1)) * randn(5)
-    @test_throws DimensionMismatch Diagonal(randn(1)) * Diagonal(randn(3, 3))
-    A = [1 0; 0 2]
-    v = [3, 4]
-    @test Diagonal(A) * v == A * v
-    @test Diagonal(A) * Diagonal(A) == A * A
-    @test_throws DimensionMismatch [1 0;0 1] * Diagonal([2 3])   # Issue #40726
-    @test_throws DimensionMismatch lmul!(Diagonal([1]), [1,2,3]) # nearby
-end
-
-@testset "Multiplication of a Diagonal with an OffsetArray" begin
-    # Offset indices should throw
-    D = Diagonal(1:4)
-    A = OffsetArray(rand(4,4), 2, 2)
-    @test_throws ArgumentError D * A
-    @test_throws ArgumentError A * D
-    @test_throws ArgumentError mul!(similar(A, size(A)), A, D)
-    @test_throws ArgumentError mul!(similar(A, size(A)), D, A)
-end
-
-@testset "Triangular division by Diagonal #27989" begin
-    K = 5
-    for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        U = UpperTriangular(randn(elty, K, K))
-        L = LowerTriangular(randn(elty, K, K))
-        D = Diagonal(randn(elty, K))
-        @test (U / D)::UpperTriangular{elty} == UpperTriangular(Matrix(U) / Matrix(D))
-        @test (L / D)::LowerTriangular{elty} == LowerTriangular(Matrix(L) / Matrix(D))
-        @test (D \ U)::UpperTriangular{elty} == UpperTriangular(Matrix(D) \ Matrix(U))
-        @test (D \ L)::LowerTriangular{elty} == LowerTriangular(Matrix(D) \ Matrix(L))
-    end
-end
-
-@testset "(Sym)Tridiagonal division by Diagonal" begin
-    for K in (5, 1), elty in (Float64, ComplexF32), overlength in (1, 0)
-        S = SymTridiagonal(randn(elty, K), randn(elty, K-overlength))
-        T = Tridiagonal(randn(elty, K-1), randn(elty, K), randn(elty, K-1))
-        D = Diagonal(randn(elty, K))
-        D0 = Diagonal(zeros(elty, K))
-        @test (D \ S)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(S))
-        @test (D \ T)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(T))
-        @test (S / D)::Tridiagonal{elty} == Tridiagonal(Matrix(S) / Matrix(D))
-        @test (T / D)::Tridiagonal{elty} == Tridiagonal(Matrix(T) / Matrix(D))
-        @test_throws SingularException D0 \ S
-        @test_throws SingularException D0 \ T
-        @test_throws SingularException S / D0
-        @test_throws SingularException T / D0
-    end
-    # 0-length case
-    S = SymTridiagonal(Float64[], Float64[])
-    T = Tridiagonal(Float64[], Float64[], Float64[])
-    D = Diagonal(Float64[])
-    @test (D \ S)::Tridiagonal{Float64} == T
-    @test (D \ T)::Tridiagonal{Float64} == T
-    @test (S / D)::Tridiagonal{Float64} == T
-    @test (T / D)::Tridiagonal{Float64} == T
-    # matrix eltype case
-    K = 5
-    for elty in (Float64, ComplexF32), overlength in (1, 0)
-        S = SymTridiagonal([rand(elty, 2, 2) for _ in 1:K], [rand(elty, 2, 2) for _ in 1:K-overlength])
-        T = Tridiagonal([rand(elty, 2, 2) for _ in 1:K-1], [rand(elty, 2, 2) for _ in 1:K], [rand(elty, 2, 2) for _ in 1:K-1])
-        D = Diagonal(randn(elty, K))
-        SM = fill(zeros(elty, 2, 2), K, K)
-        TM = copy(SM)
-        SM[1,1] = S[1,1]; TM[1,1] = T[1,1]
-        for j in 2:K
-            SM[j,j-1] = S[j,j-1]; SM[j,j] = S[j,j]; SM[j-1,j] = S[j-1,j]
-            TM[j,j-1] = T[j,j-1]; TM[j,j] = T[j,j]; TM[j-1,j] = T[j-1,j]
-        end
-        for (M, Mm) in ((S, SM), (T, TM))
-            DS = D \ M
-            @test DS isa Tridiagonal
-            DM = D \ Mm
-            for i in -1:1; @test diag(DS, i) ≈ diag(DM, i) end
-            DS = M / D
-            @test DS isa Tridiagonal
-            DM = Mm / D
-            for i in -1:1; @test diag(DS, i) ≈ diag(DM, i) end
-        end
-    end
-    # eltype promotion case
-    S = SymTridiagonal(rand(-20:20, K), rand(-20:20, K-1))
-    T = Tridiagonal(rand(-20:20, K-1), rand(-20:20, K), rand(-20:20, K-1))
-    D = Diagonal(rand(1:20, K))
-    @test (D \ S)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(S))
-    @test (D \ T)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(T))
-    @test (S / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(S) / Matrix(D))
-    @test (T / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(T) / Matrix(D))
-end
-
-@testset "eigenvalue sorting" begin
-    D = Diagonal([0.4, 0.2, -1.3])
-    @test eigvals(D) == eigen(D).values == [0.4, 0.2, -1.3] # not sorted by default
-    @test eigvals(Matrix(D)) == eigen(Matrix(D)).values == [-1.3, 0.2, 0.4] # sorted even if diagonal special case is detected
-    E = eigen(D, sortby=abs) # sortby keyword supported for eigen(::Diagonal)
-    @test E.values == [0.2, 0.4, -1.3]
-    @test E.vectors == [0 1 0; 1 0 0; 0 0 1]
-end
-
-@testset "sum, mapreduce" begin
-    D = Diagonal([1,2,3])
-    Ddense = Matrix(D)
-    @test sum(D) == 6
-    @test_throws ArgumentError sum(D, dims=0)
-    @test sum(D, dims=1) == sum(Ddense, dims=1)
-    @test sum(D, dims=2) == sum(Ddense, dims=2)
-    @test sum(D, dims=3) == sum(Ddense, dims=3)
-    @test typeof(sum(D, dims=1)) == typeof(sum(Ddense, dims=1))
-    @test mapreduce(one, min, D, dims=1) == mapreduce(one, min, Ddense, dims=1)
-    @test mapreduce(one, min, D, dims=2) == mapreduce(one, min, Ddense, dims=2)
-    @test mapreduce(one, min, D, dims=3) == mapreduce(one, min, Ddense, dims=3)
-    @test typeof(mapreduce(one, min, D, dims=1)) == typeof(mapreduce(one, min, Ddense, dims=1))
-    @test mapreduce(zero, max, D, dims=1) == mapreduce(zero, max, Ddense, dims=1)
-    @test mapreduce(zero, max, D, dims=2) == mapreduce(zero, max, Ddense, dims=2)
-    @test mapreduce(zero, max, D, dims=3) == mapreduce(zero, max, Ddense, dims=3)
-    @test typeof(mapreduce(zero, max, D, dims=1)) == typeof(mapreduce(zero, max, Ddense, dims=1))
-
-    D = Diagonal(Int[])
-    Ddense = Matrix(D)
-    @test sum(D) == 0
-    @test_throws ArgumentError sum(D, dims=0)
-    @test sum(D, dims=1) == sum(Ddense, dims=1)
-    @test sum(D, dims=2) == sum(Ddense, dims=2)
-    @test sum(D, dims=3) == sum(Ddense, dims=3)
-    @test typeof(sum(D, dims=1)) == typeof(sum(Ddense, dims=1))
-
-    D = Diagonal(Int[2])
-    Ddense = Matrix(D)
-    @test sum(D) == 2
-    @test_throws ArgumentError sum(D, dims=0)
-    @test sum(D, dims=1) == sum(Ddense, dims=1)
-    @test sum(D, dims=2) == sum(Ddense, dims=2)
-    @test sum(D, dims=3) == sum(Ddense, dims=3)
-    @test typeof(sum(D, dims=1)) == typeof(sum(Ddense, dims=1))
-end
-
-@testset "logabsdet for generic eltype" begin
-    d = Any[1, -2.0, -3.0]
-    D = Diagonal(d)
-    d1, s1 = logabsdet(D)
-    @test d1 ≈ sum(log ∘ abs, d)
-    @test s1 == prod(sign, d)
-end
-
-@testset "Empty (#35424) & size checks (#47060)" begin
-    @test zeros(0)'*Diagonal(zeros(0))*zeros(0) === 0.0
-    @test transpose(zeros(0))*Diagonal(zeros(Complex{Int}, 0))*zeros(0) === 0.0 + 0.0im
-    @test dot(zeros(Int32, 0), Diagonal(zeros(Int, 0)), zeros(Int16, 0)) === 0
-    @test_throws DimensionMismatch zeros(2)' * Diagonal(zeros(2)) * zeros(3)
-    @test_throws DimensionMismatch zeros(3)' * Diagonal(zeros(2)) * zeros(2)
-    @test_throws DimensionMismatch dot(zeros(2), Diagonal(zeros(2)), zeros(3))
-    @test_throws DimensionMismatch dot(zeros(3), Diagonal(zeros(2)), zeros(2))
-end
-
-@testset "Diagonal(undef)" begin
-    d = Diagonal{Float32}(undef, 2)
-    @test length(d.diag) == 2
-end
-
-@testset "permutedims (#39447)" begin
-    for D in (Diagonal(zeros(5)), Diagonal(zeros(5) .+ 1im), Diagonal([[1,2],[3,4]]))
-        @test permutedims(D) === permutedims(D,(1,2)) === permutedims(D,(2,1)) === D
-        @test_throws ArgumentError permutedims(D,(1,3))
-    end
-end
-
-@testset "Inner product" begin
-    A = Diagonal(rand(10) .+ im)
-    B = Diagonal(rand(10) .+ im)
-    @test dot(A, B) ≈ dot(Matrix(A), B)
-    @test dot(A, B) ≈ dot(A, Matrix(B))
-    @test dot(A, B) ≈ dot(Matrix(A), Matrix(B))
-    @test dot(A, B) ≈ conj(dot(B, A))
-end
-
-@testset "eltype relaxation(#41015)" begin
-    A = rand(3,3)
-    for trans in (identity, adjoint, transpose)
-        @test ldiv!(trans(I(3)), A) == A
-        @test rdiv!(A, trans(I(3))) == A
-    end
-end
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    d = ImmutableArray([1, 2, 3, 4])
-    D = Diagonal(d)
-
-    @test convert(AbstractArray{Float64}, D)::Diagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == D
-    @test convert(AbstractMatrix{Float64}, D)::Diagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == D
-end
-
-@testset "divisions functionality" for elty in (Int, Float64, ComplexF64)
-    B = Diagonal(rand(elty,5,5))
-    x = rand(elty)
-    @test \(x, B) == /(B, x)
-end
-
-@testset "promotion" begin
-    for (v1, v2) in (([true], [1]), ([zeros(2,2)], [zeros(Int, 2,2)]))
-        T = promote_type(eltype(v1), eltype(v2))
-        V = promote_type(typeof(v1), typeof(v2))
-        d1 = Diagonal(v1)
-        d2 = Diagonal(v2)
-        v = [d1, d2]
-        @test (@inferred eltype(v)) == Diagonal{T, V}
-    end
-    # test for a type for which promote_type doesn't lead to a concrete eltype
-    struct MyArrayWrapper{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
-       a :: A
-    end
-    Base.size(M::MyArrayWrapper) = size(M.a)
-    Base.axes(M::MyArrayWrapper) = axes(M.a)
-    Base.length(M::MyArrayWrapper) = length(M.a)
-    Base.getindex(M::MyArrayWrapper, i::Int...) = M.a[i...]
-    Base.setindex!(M::MyArrayWrapper, v, i::Int...) = M.a[i...] = v
-    d1 = Diagonal(MyArrayWrapper(1:3))
-    d2 = Diagonal(MyArrayWrapper(1.0:3.0))
-    c = [d1, d2]
-    @test c[1] == d1
-    @test c[2] == d2
-end
-
-@testset "zero and one" begin
-    D1 = Diagonal(rand(3))
-    @test D1 + zero(D1) == D1
-    @test D1 * one(D1) == D1
-    @test D1 * oneunit(D1) == D1
-    @test oneunit(D1) isa typeof(D1)
-    D2 = Diagonal([collect(reshape(1:4, 2, 2)), collect(reshape(5:8, 2, 2))])
-    @test D2 + zero(D2) == D2
-    @test D2 * one(D2) == D2
-    @test D2 * oneunit(D2) == D2
-    @test oneunit(D2) isa typeof(D2)
-    D3 = Diagonal([D2, D2]);
-    @test D3 + zero(D3) == D3
-    @test D3 * one(D3) == D3
-    @test D3 * oneunit(D3) == D3
-    @test oneunit(D3) isa typeof(D3)
-end
-
-@testset "AbstractTriangular" for (Tri, UTri) in ((UpperTriangular, UnitUpperTriangular), (LowerTriangular, UnitLowerTriangular))
-    A = randn(4, 4)
-    TriA = Tri(A)
-    UTriA = UTri(A)
-    D = Diagonal(1.0:4.0)
-    DM = Matrix(D)
-    DMF = factorize(DM)
-    outTri = similar(TriA)
-    out = similar(A)
-    # 2 args
-    for fun in (*, rmul!, rdiv!, /)
-        @test fun(copy(TriA), D)::Tri == fun(Matrix(TriA), D)
-        @test fun(copy(UTriA), D)::Tri == fun(Matrix(UTriA), D)
-    end
-    for fun in (*, lmul!, ldiv!, \)
-        @test fun(D, copy(TriA))::Tri == fun(D, Matrix(TriA))
-        @test fun(D, copy(UTriA))::Tri == fun(D, Matrix(UTriA))
-    end
-    # 3 args
-    @test outTri === ldiv!(outTri, D, TriA)::Tri == ldiv!(out, D, Matrix(TriA))
-    @test outTri === ldiv!(outTri, D, UTriA)::Tri == ldiv!(out, D, Matrix(UTriA))
-    @test outTri === mul!(outTri, D, TriA)::Tri == mul!(out, D, Matrix(TriA))
-    @test outTri === mul!(outTri, D, UTriA)::Tri == mul!(out, D, Matrix(UTriA))
-    @test outTri === mul!(outTri, TriA, D)::Tri == mul!(out, Matrix(TriA), D)
-    @test outTri === mul!(outTri, UTriA, D)::Tri == mul!(out, Matrix(UTriA), D)
-    # 5 args
-    @test outTri === mul!(outTri, D, TriA, 2, 1)::Tri == mul!(out, D, Matrix(TriA), 2, 1)
-    @test outTri === mul!(outTri, D, UTriA, 2, 1)::Tri == mul!(out, D, Matrix(UTriA), 2, 1)
-    @test outTri === mul!(outTri, TriA, D, 2, 1)::Tri == mul!(out, Matrix(TriA), D, 2, 1)
-    @test outTri === mul!(outTri, UTriA, D, 2, 1)::Tri == mul!(out, Matrix(UTriA), D, 2, 1)
-end
-
-struct SMatrix1{T} <: AbstractArray{T,2}
-    elt::T
-end
-Base.:(==)(A::SMatrix1, B::SMatrix1) = A.elt == B.elt
-Base.zero(::Type{SMatrix1{T}}) where {T} = SMatrix1(zero(T))
-Base.iszero(A::SMatrix1) = iszero(A.elt)
-Base.getindex(A::SMatrix1, inds...) = A.elt
-Base.size(::SMatrix1) = (1, 1)
-@testset "map for Diagonal matrices (#46292)" begin
-    A = Diagonal([1])
-    @test A isa Diagonal{Int,Vector{Int}}
-    @test 2*A isa Diagonal{Int,Vector{Int}}
-    @test A.+1 isa Matrix{Int}
-    # Numeric element types remain diagonal
-    B = map(SMatrix1, A)
-    @test B == fill(SMatrix1(1), 1, 1)
-    @test B isa Diagonal{SMatrix1{Int},Vector{SMatrix1{Int}}}
-    # Non-numeric element types become dense
-    C = map(a -> SMatrix1(string(a)), A)
-    @test C == fill(SMatrix1(string(1)), 1, 1)
-    @test C isa Matrix{SMatrix1{String}}
-end
-
-@testset "copyto! with UniformScaling" begin
-    @testset "Fill" begin
-        for len in (4, InfiniteArrays.Infinity())
-            d = FillArrays.Fill(1, len)
-            D = Diagonal(d)
-            @test copyto!(D, I) === D
-        end
-    end
-    D = Diagonal(fill(2, 2))
-    copyto!(D, I)
-    @test all(isone, diag(D))
-end
-
-@testset "diagonal triple multiplication (#49005)" begin
-    n = 10
-    @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n))) isa Diagonal
-    @test_throws DimensionMismatch (*(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n+1))))
-    @test_throws DimensionMismatch (*(Diagonal(ones(n)), Diagonal(1:n+1), Diagonal(ones(n+1))))
-    @test_throws DimensionMismatch (*(Diagonal(ones(n+1)), Diagonal(1:n), Diagonal(ones(n))))
-
-    # currently falls back to two-term *
-    @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n)), Diagonal(1:n)) isa Diagonal
-end
-
-end # module TestDiagonal
diff --git a/stdlib/LinearAlgebra/test/eigen.jl b/stdlib/LinearAlgebra/test/eigen.jl
deleted file mode 100644
index 413a8df0474fa..0000000000000
--- a/stdlib/LinearAlgebra/test/eigen.jl
+++ /dev/null
@@ -1,246 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestEigen
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted, UtiAUi!
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(12343219)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    aa = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    asym = aa' + aa                  # symmetric indefinite
-    apd  = aa' * aa                 # symmetric positive-definite
-    for (a, asym, apd) in ((aa, asym, apd),
-                           (view(aa, 1:n, 1:n),
-                            view(asym, 1:n, 1:n),
-                            view(apd, 1:n, 1:n)))
-        ε = εa = eps(abs(float(one(eltya))))
-
-        α = rand(eltya)
-        β = rand(eltya)
-        eab = eigen(α,β)
-        @test eab.values == eigvals(fill(α,1,1),fill(β,1,1))
-        @test eab.vectors == eigvecs(fill(α,1,1),fill(β,1,1))
-
-        @testset "non-symmetric eigen decomposition" begin
-            d, v = eigen(a)
-            for i in 1:size(a,2)
-                @test a*v[:,i] ≈ d[i]*v[:,i]
-            end
-            f = eigen(a)
-            @test det(a) ≈ det(f)
-            @test inv(a) ≈ inv(f)
-            @test isposdef(a) == isposdef(f)
-            @test eigvals(f) === f.values
-            @test eigvecs(f) === f.vectors
-            @test Array(f) ≈ a
-
-            for T in (Tridiagonal(a), Hermitian(Tridiagonal(a)))
-                f = eigen(T)
-                d, v = f
-                for i in 1:size(a,2)
-                    @test T*v[:,i] ≈ d[i]*v[:,i]
-                end
-                @test det(T) ≈ det(f)
-                @test inv(T) ≈ inv(f)
-            end
-
-            num_fact = eigen(one(eltya))
-            @test num_fact.values[1] == one(eltya)
-            h = asym
-            @test minimum(eigvals(h)) ≈ eigmin(h)
-            @test maximum(eigvals(h)) ≈ eigmax(h)
-            @test_throws DomainError eigmin(a - a')
-            @test_throws DomainError eigmax(a - a')
-        end
-        @testset "symmetric generalized eigenproblem" begin
-            if isa(a, Array)
-                asym_sg = asym[1:n1, 1:n1]
-                a_sg = a[:,n1+1:n2]
-            else
-                asym_sg = view(asym, 1:n1, 1:n1)
-                a_sg = view(a, 1:n, n1+1:n2)
-            end
-            ASG2 = a_sg'a_sg
-            f = eigen(asym_sg, ASG2)
-            @test asym_sg*f.vectors ≈ (ASG2*f.vectors) * Diagonal(f.values)
-            @test f.values ≈ eigvals(asym_sg, ASG2)
-            @test prod(f.values) ≈ prod(eigvals(asym_sg/(ASG2))) atol=200ε
-            @test eigvecs(asym_sg, ASG2) == f.vectors
-            @test eigvals(f) === f.values
-            @test eigvecs(f) === f.vectors
-            @test_throws ErrorException f.Z
-
-            d,v = eigen(asym_sg, ASG2)
-            @test d == f.values
-            @test v == f.vectors
-
-            # solver for in-place U' \ A / U (#14896)
-            if !(eltya <: Integer)
-                for atyp in (eltya <: Real ? (Symmetric, Hermitian) : (Hermitian,))
-                    for utyp in (UpperTriangular, Diagonal), uplo in (:L, :U)
-                        A = atyp(asym_sg, uplo)
-                        U = utyp(ASG2)
-                        @test UtiAUi!(copy(A), U) ≈ U' \ A / U
-                    end
-                end
-            end
-
-            # matrices of different types (#14896)
-            D = Diagonal(ASG2)
-            for uplo in (:L, :U)
-                if eltya <: Real
-                    fs = eigen(Symmetric(asym_sg, uplo), ASG2)
-                    @test fs.values ≈ f.values
-                    @test abs.(fs.vectors) ≈ abs.(f.vectors)  # may change sign
-                    gs = eigen(Symmetric(asym_sg, uplo), D)
-                    @test Symmetric(asym_sg, uplo)*gs.vectors ≈ (D*gs.vectors) * Diagonal(gs.values)
-                end
-                fh = eigen(Hermitian(asym_sg, uplo), ASG2)
-                @test fh.values ≈ f.values
-                @test abs.(fh.vectors) ≈ abs.(f.vectors)  # may change sign
-                gh = eigen(Hermitian(asym_sg, uplo), D)
-                @test Hermitian(asym_sg, uplo)*gh.vectors ≈ (D*gh.vectors) * Diagonal(gh.values)
-                gd = eigen(Matrix(Hermitian(ASG2, uplo)), D)
-                @test Hermitian(ASG2, uplo) * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-                gd = eigen(Hermitian(Tridiagonal(ASG2), uplo), D)
-                @test Hermitian(Tridiagonal(ASG2), uplo) * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-            end
-            gd = eigen(D, D)
-            @test all(≈(1), gd.values)
-            @test D * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-            gd = eigen(Matrix(D), D)
-            @test D * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-            gd = eigen(D, Matrix(D))
-            @test D * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-            gd = eigen(Tridiagonal(ASG2), Matrix(D))
-            @test Tridiagonal(ASG2) * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-        end
-        @testset "Non-symmetric generalized eigenproblem" begin
-            if isa(a, Array)
-                a1_nsg = a[1:n1, 1:n1]
-                a2_nsg = a[n1+1:n2, n1+1:n2]
-            else
-                a1_nsg = view(a, 1:n1, 1:n1)
-                a2_nsg = view(a, n1+1:n2, n1+1:n2)
-            end
-            sortfunc = x -> real(x) + imag(x)
-            f = eigen(a1_nsg, a2_nsg; sortby = sortfunc)
-            @test a1_nsg*f.vectors ≈ (a2_nsg*f.vectors) * Diagonal(f.values)
-            @test f.values ≈ eigvals(a1_nsg, a2_nsg; sortby = sortfunc)
-            @test prod(f.values) ≈ prod(eigvals(a1_nsg/a2_nsg, sortby = sortfunc)) atol=50000ε
-            @test eigvecs(a1_nsg, a2_nsg; sortby = sortfunc) == f.vectors
-            @test_throws ErrorException f.Z
-
-            g = eigen(a1_nsg, Diagonal(1:n1))
-            @test a1_nsg*g.vectors ≈ (Diagonal(1:n1)*g.vectors) * Diagonal(g.values)
-
-            d,v = eigen(a1_nsg, a2_nsg; sortby = sortfunc)
-            @test d == f.values
-            @test v == f.vectors
-        end
-    end
-end
-
-@testset "eigenvalue computations with NaNs" begin
-    for eltya in (NaN16, NaN32, NaN)
-        @test_throws(ArgumentError, eigen(fill(eltya, 1, 1)))
-        @test_throws(ArgumentError, eigen(fill(eltya, 2, 2)))
-        test_matrix = rand(typeof(eltya),3,3)
-        test_matrix[1,3] = eltya
-        @test_throws(ArgumentError, eigen(test_matrix))
-        @test_throws(ArgumentError, eigvals(test_matrix))
-        @test_throws(ArgumentError, eigvecs(test_matrix))
-        @test_throws(ArgumentError, eigen(Symmetric(test_matrix)))
-        @test_throws(ArgumentError, eigvals(Symmetric(test_matrix)))
-        @test_throws(ArgumentError, eigvecs(Symmetric(test_matrix)))
-        @test_throws(ArgumentError, eigen(Hermitian(test_matrix)))
-        @test_throws(ArgumentError, eigvals(Hermitian(test_matrix)))
-        @test_throws(ArgumentError, eigvecs(Hermitian(test_matrix)))
-        @test_throws(ArgumentError, eigen(Hermitian(complex.(test_matrix))))
-        @test_throws(ArgumentError, eigvals(Hermitian(complex.(test_matrix))))
-        @test_throws(ArgumentError, eigvecs(Hermitian(complex.(test_matrix))))
-        @test eigen(Symmetric(test_matrix, :L)) isa Eigen
-        @test eigen(Hermitian(test_matrix, :L)) isa Eigen
-    end
-end
-
-# test a matrix larger than 140-by-140 for #14174
-let aa = rand(200, 200)
-    for a in (aa, view(aa, 1:n, 1:n))
-        f = eigen(a)
-        @test a ≈ f.vectors * Diagonal(f.values) / f.vectors
-    end
-end
-
-@testset "rational promotion: issue #24935" begin
-    A = [1//2 0//1; 0//1 2//3]
-    for λ in (eigvals(A), @inferred(eigvals(Symmetric(A))))
-        @test λ isa Vector{Float64}
-        @test λ ≈ [0.5, 2/3]
-    end
-end
-
-@testset "text/plain (REPL) printing of Eigen and GeneralizedEigen" begin
-    A, B = randn(5,5), randn(5,5)
-    e    = eigen(A)
-    ge   = eigen(A, B)
-    valsstring = sprint((t, s) -> show(t, "text/plain", s), e.values)
-    vecsstring = sprint((t, s) -> show(t, "text/plain", s), e.vectors)
-    factstring = sprint((t, s) -> show(t, "text/plain", s), e)
-    @test factstring == "$(summary(e))\nvalues:\n$valsstring\nvectors:\n$vecsstring"
-end
-
-@testset "eigen of an Adjoint" begin
-    Random.seed!(4)
-    A = randn(3,3)
-    @test eigvals(A') == eigvals(copy(A'))
-    @test eigen(A')   == eigen(copy(A'))
-    @test eigmin(A') == eigmin(copy(A'))
-    @test eigmax(A') == eigmax(copy(A'))
-end
-
-@testset "equality of eigen factorizations" begin
-    A = randn(3, 3)
-    @test eigen(A) == eigen(A)
-    @test hash(eigen(A)) == hash(eigen(A))
-    @test isequal(eigen(A), eigen(A))
-end
-
-@testset "Float16" begin
-    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-    B = eigen(A)
-    B32 = eigen(Float32.(A))
-    C = Float16[3 -2; 4 -1]
-    D = eigen(C)
-    D32 = eigen(Float32.(C))
-    F = eigen(complex(C))
-    F32 = eigen(complex(Float32.(C)))
-    @test B isa Eigen{Float16, Float16, Matrix{Float16}, Vector{Float16}}
-    @test B.values isa Vector{Float16}
-    @test B.vectors isa Matrix{Float16}
-    @test B.values ≈ B32.values
-    @test B.vectors ≈ B32.vectors
-    @test D isa Eigen{ComplexF16, ComplexF16, Matrix{ComplexF16}, Vector{ComplexF16}}
-    @test D.values isa Vector{ComplexF16}
-    @test D.vectors isa Matrix{ComplexF16}
-    @test D.values ≈ D32.values
-    @test D.vectors ≈ D32.vectors
-    @test F isa Eigen{ComplexF16, ComplexF16, Matrix{ComplexF16}, Vector{ComplexF16}}
-    @test F.values isa Vector{ComplexF16}
-    @test F.vectors isa Matrix{ComplexF16}
-    @test F.values ≈ F32.values
-    @test F.vectors ≈ F32.vectors
-end
-
-end # module TestEigen
diff --git a/stdlib/LinearAlgebra/test/factorization.jl b/stdlib/LinearAlgebra/test/factorization.jl
deleted file mode 100644
index 72233293ff515..0000000000000
--- a/stdlib/LinearAlgebra/test/factorization.jl
+++ /dev/null
@@ -1,94 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestFactorization
-using Test, LinearAlgebra
-
-@testset "equality for factorizations - $f" for f in Any[
-    bunchkaufman,
-    cholesky,
-    x -> cholesky(x, RowMaximum()),
-    eigen,
-    hessenberg,
-    lq,
-    lu,
-    qr,
-    x -> qr(x, ColumnNorm()),
-    svd,
-    schur,
-]
-    A = randn(3, 3)
-    A = A * A' # ensure A is pos. def. and symmetric
-    F, G = f(A), f(A)
-
-    @test F == G
-    @test isequal(F, G)
-    @test hash(F) == hash(G)
-
-    f === hessenberg && continue
-
-    # change all arrays in F to have eltype Float32
-    F = typeof(F).name.wrapper(Base.mapany(1:nfields(F)) do i
-        x = getfield(F, i)
-        return x isa AbstractArray{Float64} ? Float32.(x) : x
-    end...)
-    # round all arrays in G to the nearest Float64 representable as Float32
-    G = typeof(G).name.wrapper(Base.mapany(1:nfields(G)) do i
-        x = getfield(G, i)
-        return x isa AbstractArray{Float64} ? Float64.(Float32.(x)) : x
-    end...)
-
-    @test F == G broken=!(f === eigen || f === qr)
-    @test isequal(F, G) broken=!(f === eigen || f === qr)
-    @test hash(F) == hash(G)
-end
-
-@testset "size for factorizations - $f" for f in Any[
-    bunchkaufman,
-    cholesky,
-    x -> cholesky(x, RowMaximum()),
-    hessenberg,
-    lq,
-    lu,
-    qr,
-    x -> qr(x, ColumnNorm()),
-    svd,
-]
-    A = randn(3, 3)
-    A = A * A' # ensure A is pos. def. and symmetric
-    F = f(A)
-    @test size(F) == size(A)
-    @test size(F') == size(A')
-end
-
-@testset "size for transpose factorizations - $f" for f in Any[
-    bunchkaufman,
-    cholesky,
-    x -> cholesky(x, RowMaximum()),
-    hessenberg,
-    lq,
-    lu,
-    svd,
-]
-    A = randn(3, 3)
-    A = A * A' # ensure A is pos. def. and symmetric
-    F = f(A)
-    @test size(F) == size(A)
-    @test size(transpose(F)) == size(transpose(A))
-end
-
-@testset "equality of QRCompactWY" begin
-    A = rand(100, 100)
-    F, G = qr(A), qr(A)
-
-    @test F == G
-    @test isequal(F, G)
-    @test hash(F) == hash(G)
-
-    G.T[28, 100] = 42
-
-    @test F != G
-    @test !isequal(F, G)
-    @test hash(F) != hash(G)
-end
-
-end
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
deleted file mode 100644
index 33eb50d58836a..0000000000000
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ /dev/null
@@ -1,628 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestGeneric
-
-using Test, LinearAlgebra, Random
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-
-isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
-using .Main.Quaternions
-
-isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-isdefined(Main, :DualNumbers) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "DualNumbers.jl"))
-using .Main.DualNumbers
-
-Random.seed!(123)
-
-n = 5 # should be odd
-
-@testset for elty in (Int, Rational{BigInt}, Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-    # In the long run, these tests should step through Strang's
-    #  axiomatic definition of determinants.
-    # If all axioms are satisfied and all the composition rules work,
-    #  all determinants will be correct except for floating point errors.
-    if elty != Rational{BigInt}
-        @testset "det(A::Matrix)" begin
-            # The determinant of the identity matrix should always be 1.
-            for i = 1:10
-                A = Matrix{elty}(I, i, i)
-                @test det(A) ≈ one(elty)
-            end
-
-            # The determinant of a Householder reflection matrix should always be -1.
-            for i = 1:10
-                A = Matrix{elty}(I, 10, 10)
-                A[i, i] = -one(elty)
-                @test det(A) ≈ -one(elty)
-            end
-
-            # The determinant of a rotation matrix should always be 1.
-            if elty != Int
-                for theta = convert(Vector{elty}, pi ./ [1:4;])
-                    R = [cos(theta) -sin(theta);
-                         sin(theta) cos(theta)]
-                    @test convert(elty, det(R)) ≈ one(elty)
-                end
-            end
-        end
-    end
-    if elty <: Int
-        A = rand(-n:n, n, n) + 10I
-    elseif elty <: Rational
-        A = Rational{BigInt}[rand(-n:n)/rand(1:n) for i = 1:n, j = 1:n] + 10I
-    elseif elty <: Real
-        A = convert(Matrix{elty}, randn(n,n)) + 10I
-    else
-        A = convert(Matrix{elty}, complex.(randn(n,n), randn(n,n)))
-    end
-
-    @testset "logdet and logabsdet" begin
-        @test logdet(A[1,1]) == log(det(A[1,1]))
-        @test logdet(A) ≈ log(det(A))
-        @test logabsdet(A)[1] ≈ log(abs(det(A)))
-        @test logabsdet(Matrix{elty}(-I, n, n))[2] == -1
-        infinity = convert(float(elty), Inf)
-        @test logabsdet(zeros(elty, n, n)) == (-infinity, zero(elty))
-        if elty <: Real
-            @test logabsdet(A)[2] == sign(det(A))
-            @test_throws DomainError logdet(Matrix{elty}(-I, n, n))
-        else
-            @test logabsdet(A)[2] ≈ sign(det(A))
-        end
-        # logabsdet for Number"
-        x = A[1, 1] # getting a number of type elty
-        X = fill(x, 1, 1)
-        @test logabsdet(x)[1] ≈ logabsdet(X)[1]
-        @test logabsdet(x)[2] ≈ logabsdet(X)[2]
-    end
-
-    @testset "det with nonstandard Number type" begin
-        elty <: Real && @test det(Dual.(triu(A), zero(A))) isa Dual
-    end
-end
-
-@testset "diag" begin
-    A = Matrix(1.0I, 4, 4)
-    @test diag(A) == fill(1, 4)
-    @test diag(view(A, 1:3, 1:3)) == fill(1, 3)
-    @test diag(view(A, 1:2, 1:2)) == fill(1, 2)
-    @test_throws ArgumentError diag(rand(10))
-end
-
-@testset "generic axpy" begin
-    x = ['a','b','c','d','e']
-    y = ['a','b','c','d','e']
-    α, β = 'f', 'g'
-    @test_throws DimensionMismatch axpy!(α, x, ['g'])
-    @test_throws DimensionMismatch axpby!(α, x, β, ['g'])
-    @test_throws BoundsError axpy!(α, x, Vector(-1:5), y, Vector(1:7))
-    @test_throws BoundsError axpy!(α, x, Vector(1:7), y, Vector(-1:5))
-    @test_throws BoundsError axpy!(α, x, Vector(1:7), y, Vector(1:7))
-    @test_throws DimensionMismatch axpy!(α, x, Vector(1:3), y, Vector(1:5))
-end
-
-@test !issymmetric(fill(1,5,3))
-@test !ishermitian(fill(1,5,3))
-@test (x = fill(1,3); cross(x,x) == zeros(3))
-@test_throws DimensionMismatch cross(fill(1,3), fill(1,4))
-@test_throws DimensionMismatch cross(fill(1,2), fill(1,3))
-
-@test tr(Bidiagonal(fill(1,5),fill(0,4),:U)) == 5
-
-
-@testset "array and subarray" begin
-    aa = reshape([1.:6;], (2,3))
-    for a in (aa, view(aa, 1:2, 1:2))
-        am, an = size(a)
-        @testset "Scaling with rmul! and lmul" begin
-            @test rmul!(copy(a), 5.) == a*5
-            @test lmul!(5., copy(a)) == a*5
-            b = randn(2048)
-            subB = view(b, :, :)
-            @test rmul!(copy(b), 5.) == b*5
-            @test rmul!(copy(subB), 5.) == subB*5
-            @test lmul!(Diagonal([1.; 2.]), copy(a)) == a.*[1; 2]
-            @test lmul!(Diagonal([1; 2]), copy(a)) == a.*[1; 2]
-            @test rmul!(copy(a), Diagonal(1.:an)) == a.*Vector(1:an)'
-            @test rmul!(copy(a), Diagonal(1:an)) == a.*Vector(1:an)'
-            @test_throws DimensionMismatch lmul!(Diagonal(Vector{Float64}(undef,am+1)), a)
-            @test_throws DimensionMismatch rmul!(a, Diagonal(Vector{Float64}(undef,an+1)))
-        end
-
-        @testset "Scaling with rdiv! and ldiv!" begin
-            @test rdiv!(copy(a), 5.) == a/5
-            @test ldiv!(5., copy(a)) == a/5
-            @test ldiv!(zero(a), 5., copy(a)) == a/5
-        end
-
-        @testset "Scaling with 3-argument mul!" begin
-            @test mul!(similar(a), 5., a) == a*5
-            @test mul!(similar(a), a, 5.) == a*5
-            @test mul!(similar(a), Diagonal([1.; 2.]), a) == a.*[1; 2]
-            @test mul!(similar(a), Diagonal([1; 2]), a)   == a.*[1; 2]
-            @test_throws DimensionMismatch mul!(similar(a), Diagonal(Vector{Float64}(undef, am+1)), a)
-            @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 3, 2), a, Diagonal(Vector{Float64}(undef, an+1)))
-            @test_throws DimensionMismatch mul!(similar(a), a, Diagonal(Vector{Float64}(undef, an+1)))
-            @test mul!(similar(a), a, Diagonal(1.:an)) == a.*Vector(1:an)'
-            @test mul!(similar(a), a, Diagonal(1:an))  == a.*Vector(1:an)'
-        end
-
-        @testset "Scaling with 5-argument mul!" begin
-            @test mul!(copy(a), 5., a, 10, 100) == a*150
-            @test mul!(copy(a), a, 5., 10, 100) == a*150
-            @test mul!(vec(copy(a)), 5., a, 10, 100) == vec(a*150)
-            @test mul!(vec(copy(a)), a, 5., 10, 100) == vec(a*150)
-            @test_throws DimensionMismatch mul!([vec(copy(a)); 0], 5., a, 10, 100)
-            @test_throws DimensionMismatch mul!([vec(copy(a)); 0], a, 5., 10, 100)
-            @test mul!(copy(a), Diagonal([1.; 2.]), a, 10, 100) == 10a.*[1; 2] .+ 100a
-            @test mul!(copy(a), Diagonal([1; 2]), a, 10, 100)   == 10a.*[1; 2] .+ 100a
-            @test mul!(copy(a), a, Diagonal(1.:an), 10, 100) == 10a.*Vector(1:an)' .+ 100a
-            @test mul!(copy(a), a, Diagonal(1:an), 10, 100)  == 10a.*Vector(1:an)' .+ 100a
-        end
-    end
-end
-
-@testset "scale real matrix by complex type" begin
-    @test_throws InexactError rmul!([1.0], 2.0im)
-    @test isequal([1.0] * 2.0im,             ComplexF64[2.0im])
-    @test isequal(2.0im * [1.0],             ComplexF64[2.0im])
-    @test isequal(Float32[1.0] * 2.0f0im,    ComplexF32[2.0im])
-    @test isequal(Float32[1.0] * 2.0im,      ComplexF64[2.0im])
-    @test isequal(Float64[1.0] * 2.0f0im,    ComplexF64[2.0im])
-    @test isequal(Float32[1.0] * big(2.0)im, Complex{BigFloat}[2.0im])
-    @test isequal(Float64[1.0] * big(2.0)im, Complex{BigFloat}[2.0im])
-    @test isequal(BigFloat[1.0] * 2.0im,     Complex{BigFloat}[2.0im])
-    @test isequal(BigFloat[1.0] * 2.0f0im,   Complex{BigFloat}[2.0im])
-end
-@testset "* and mul! for non-commutative scaling" begin
-    q = Quaternion(0.44567, 0.755871, 0.882548, 0.423612)
-    qmat = [Quaternion(0.015007, 0.355067, 0.418645, 0.318373)]
-    @test lmul!(q, copy(qmat)) != rmul!(copy(qmat), q)
-    @test q*qmat ≉ qmat*q
-    @test conj(q*qmat) ≈ conj(qmat)*conj(q)
-    @test q * (q \ qmat) ≈ qmat ≈ (qmat / q) * q
-    @test q\qmat ≉ qmat/q
-    alpha = Quaternion(rand(4)...)
-    beta = Quaternion(0, 0, 0, 0)
-    @test mul!(copy(qmat), qmat, q, alpha, beta) ≈ qmat * q * alpha
-    @test mul!(copy(qmat), q, qmat, alpha, beta) ≈ q * qmat * alpha
-end
-@testset "ops on Numbers" begin
-    @testset for elty in [Float32,Float64,ComplexF32,ComplexF64]
-        a = rand(elty)
-        @test tr(a)            == a
-        @test rank(zero(elty)) == 0
-        @test rank(one(elty))  == 1
-        @test !isfinite(cond(zero(elty)))
-        @test cond(a)          == one(elty)
-        @test cond(a,1)        == one(elty)
-        @test issymmetric(a)
-        @test ishermitian(one(elty))
-        @test det(a) == a
-        @test norm(a) == abs(a)
-        @test norm(a, 0) == 1
-        @test norm(0, 0) == 0
-    end
-
-    @test !issymmetric(NaN16)
-    @test !issymmetric(NaN32)
-    @test !issymmetric(NaN)
-    @test norm(NaN)    === NaN
-    @test norm(NaN, 0) === NaN
-end
-
-@test rank(zeros(4)) == 0
-@test rank(1:10) == 1
-@test rank(fill(0, 0, 0)) == 0
-@test rank([1.0 0.0; 0.0 0.9],0.95) == 1
-@test rank([1.0 0.0; 0.0 0.9],rtol=0.95) == 1
-@test rank([1.0 0.0; 0.0 0.9],atol=0.95) == 1
-@test rank([1.0 0.0; 0.0 0.9],atol=0.95,rtol=0.95)==1
-@test qr(big.([0 1; 0 0])).R == [0 1; 0 0]
-
-@test norm([2.4e-322, 4.4e-323]) ≈ 2.47e-322
-@test norm([2.4e-322, 4.4e-323], 3) ≈ 2.4e-322
-@test_throws ArgumentError opnorm(Matrix{Float64}(undef,5,5),5)
-
-# operator norm for zero-dimensional domain is zero (see #40370)
-@testset "opnorm" begin
-    for m in (0, 1, 2)
-        @test @inferred(opnorm(fill(1,0,m))) == 0.0
-        @test @inferred(opnorm(fill(1,m,0))) == 0.0
-    end
-    for m in (1, 2)
-        @test @inferred(opnorm(fill(1im,1,m))) ≈ sqrt(m)
-        @test @inferred(opnorm(fill(1im,m,1))) ≈ sqrt(m)
-    end
-    @test @inferred(opnorm(fill(1,2,2))) ≈ 2
-end
-
-@testset "generic norm for arrays of arrays" begin
-    x = Vector{Int}[[1,2], [3,4]]
-    @test @inferred(norm(x)) ≈ sqrt(30)
-    @test norm(x, 0) == length(x)
-    @test norm(x, 1) ≈ 5+sqrt(5)
-    @test norm(x, 3) ≈ cbrt(5^3  +sqrt(5)^3)
-end
-
-@testset "norm of transpose/adjoint equals norm of parent #32739" begin
-    for t in (transpose, adjoint), elt in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-        # Vector/matrix of scalars
-        for sz in ((2,), (2, 3))
-            A = rand(elt, sz...)
-            Aᵀ = t(A)
-            @test norm(Aᵀ) ≈ norm(Matrix(Aᵀ))
-        end
-
-        # Vector/matrix of vectors/matrices
-        for sz_outer in ((2,), (2, 3)), sz_inner in ((3,), (1, 2))
-            A = [rand(elt, sz_inner...) for _ in CartesianIndices(sz_outer)]
-            Aᵀ = t(A)
-            @test norm(Aᵀ) ≈ norm(Matrix(Matrix.(Aᵀ)))
-        end
-    end
-end
-
-@testset "rotate! and reflect!" begin
-    x = rand(ComplexF64, 10)
-    y = rand(ComplexF64, 10)
-    c = rand(Float64)
-    s = rand(ComplexF64)
-
-    x2 = copy(x)
-    y2 = copy(y)
-    rotate!(x, y, c, s)
-    @test x ≈ c*x2 + s*y2
-    @test y ≈ -conj(s)*x2 + c*y2
-    @test_throws DimensionMismatch rotate!([x; x], y, c, s)
-
-    x3 = copy(x)
-    y3 = copy(y)
-    reflect!(x, y, c, s)
-    @test x ≈ c*x3 + s*y3
-    @test y ≈ conj(s)*x3 - c*y3
-    @test_throws DimensionMismatch reflect!([x; x], y, c, s)
-end
-
-@testset "LinearAlgebra.reflectorApply!" begin
-    for T in (Float64, ComplexF64)
-        x = rand(T, 6)
-        τ = rand(T)
-        A = rand(T, 6)
-        B = LinearAlgebra.reflectorApply!(x, τ, copy(A))
-        C = LinearAlgebra.reflectorApply!(x, τ, reshape(copy(A), (length(A), 1)))
-        @test B[1] ≈ C[1] ≈ A[1] - conj(τ)*(A[1] + dot(x[2:end], A[2:end]))
-        @test B[2:end] ≈ C[2:end] ≈ A[2:end] - conj(τ)*(A[1] + dot(x[2:end], A[2:end]))*x[2:end]
-    end
-end
-
-@testset "axp(b)y! for element type without commutative multiplication" begin
-    α = [1 2; 3 4]
-    β = [5 6; 7 8]
-    x = fill([ 9 10; 11 12], 3)
-    y = fill([13 14; 15 16], 3)
-    axpy = axpy!(α, x, deepcopy(y))
-    axpby = axpby!(α, x, β, deepcopy(y))
-    @test axpy == x .* [α] .+ y
-    @test axpy != [α] .* x .+ y
-    @test axpby == x .* [α] .+ y .* [β]
-    @test axpby != [α] .* x .+ [β] .* y
-    axpy = axpy!(zero(α), x, deepcopy(y))
-    axpby = axpby!(zero(α), x, one(β), deepcopy(y))
-    @test axpy == y
-    @test axpy == y
-    @test axpby == y
-    @test axpby == y
-end
-
-@testset "axpy! for x and y of different dimensions" begin
-    α = 5
-    x = 2:5
-    y = fill(1, 2, 4)
-    rx = [1 4]
-    ry = [2 8]
-    @test axpy!(α, x, rx, y, ry) == [1 1 1 1; 11 1 1 26]
-end
-
-@testset "axp(b)y! for non strides input" begin
-    a = rand(5, 5)
-    @test axpby!(1, Hermitian(a), 1, zeros(size(a))) == Hermitian(a)
-    @test axpby!(1, 1.:5, 1, zeros(5)) == 1.:5
-    @test axpy!(1, Hermitian(a), zeros(size(a))) == Hermitian(a)
-    @test axpy!(1, 1.:5, zeros(5)) == 1.:5
-end
-
-@testset "LinearAlgebra.axp(b)y! for stride-vector like input" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        a = rand(T, 5, 5)
-        @test axpby!(1, view(a, :, 1:5), 1, zeros(T, size(a))) == a
-        @test axpy!(1, view(a, :, 1:5), zeros(T, size(a))) == a
-        b = view(a, 25:-2:1)
-        @test axpby!(1, b, 1, zeros(T, size(b))) == b
-        @test axpy!(1, b, zeros(T, size(b))) == b
-    end
-end
-
-@testset "norm and normalize!" begin
-    vr = [3.0, 4.0]
-    for Tr in (Float32, Float64)
-        for T in (Tr, Complex{Tr})
-            v = convert(Vector{T}, vr)
-            @test norm(v) == 5.0
-            w = normalize(v)
-            @test norm(w - [0.6, 0.8], Inf) < eps(Tr)
-            @test norm(w) == 1.0
-            @test norm(normalize!(copy(v)) - w, Inf) < eps(Tr)
-            @test isempty(normalize!(T[]))
-        end
-    end
-end
-
-@testset "normalize for multidimensional arrays" begin
-
-    for arr in (
-        fill(10.0, ()),  # 0 dim
-        [1.0],           # 1 dim
-        [1.0 2.0 3.0; 4.0 5.0 6.0], # 2-dim
-        rand(1,2,3),                # higher dims
-        rand(1,2,3,4),
-        Dual.(randn(2,3), randn(2,3)),
-        OffsetArray([-1,0], (-2,))  # no index 1
-    )
-        @test normalize(arr) == normalize!(copy(arr))
-        @test size(normalize(arr)) == size(arr)
-        @test axes(normalize(arr)) == axes(arr)
-        @test vec(normalize(arr)) == normalize(vec(arr))
-    end
-
-    @test typeof(normalize([1 2 3; 4 5 6])) == Array{Float64,2}
-end
-
-@testset "normalize for scalars" begin
-    @test normalize(8.0) == 1.0
-    @test normalize(-3.0) == -1.0
-    @test normalize(-3.0, 1) == -1.0
-    @test isnan(normalize(0.0))
-end
-
-@testset "Issue #30466" begin
-    @test norm([typemin(Int), typemin(Int)], Inf) == -float(typemin(Int))
-    @test norm([typemin(Int), typemin(Int)], 1) == -2float(typemin(Int))
-end
-
-@testset "potential overflow in normalize!" begin
-    δ = inv(prevfloat(typemax(Float64)))
-    v = [δ, -δ]
-
-    @test norm(v) === 7.866824069956793e-309
-    w = normalize(v)
-    @test w ≈ [1/√2, -1/√2]
-    @test norm(w) === 1.0
-    @test norm(normalize!(v) - w, Inf) < eps()
-end
-
-@testset "normalize with Infs. Issue 29681." begin
-    @test all(isequal.(normalize([1, -1, Inf]),
-                       [0.0, -0.0, NaN]))
-    @test all(isequal.(normalize([complex(1), complex(0, -1), complex(Inf, -Inf)]),
-                       [0.0 + 0.0im, 0.0 - 0.0im, NaN + NaN*im]))
-end
-
-@testset "Issue 14657" begin
-    @test det([true false; false true]) == det(Matrix(1I, 2, 2))
-end
-
-@test_throws ArgumentError LinearAlgebra.char_uplo(:Z)
-
-@testset "Issue 17650" begin
-    @test [0.01311489462160816, Inf] ≈ [0.013114894621608135, Inf]
-end
-
-@testset "Issue 19035" begin
-    @test LinearAlgebra.promote_leaf_eltypes([1, 2, [3.0, 4.0]]) == Float64
-    @test LinearAlgebra.promote_leaf_eltypes([[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]) == ComplexF64
-    @test [1, 2, 3] ≈ [1, 2, 3]
-    @test [[1, 2], [3, 4]] ≈ [[1, 2], [3, 4]]
-    @test [[1, 2], [3, 4]] ≈ [[1.0-eps(), 2.0+eps()], [3.0+2eps(), 4.0-1e8eps()]]
-    @test [[1, 2], [3, 4]] ≉ [[1.0-eps(), 2.0+eps()], [3.0+2eps(), 4.0-1e9eps()]]
-    @test [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]] ≈ [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]
-end
-
-@testset "Issue 40128" begin
-    @test det(BigInt[9 1 8 0; 0 0 8 7; 7 6 8 3; 2 9 7 7])::BigInt == -1
-    @test det(BigInt[1 big(2)^65+1; 3 4])::BigInt == (4 - 3*(big(2)^65+1))
-end
-
-# Minimal modulo number type - but not subtyping Number
-struct ModInt{n}
-    k
-    ModInt{n}(k) where {n} = new(mod(k,n))
-    ModInt{n}(k::ModInt{n}) where {n} = k
-end
-Base.:+(a::ModInt{n}, b::ModInt{n}) where {n} = ModInt{n}(a.k + b.k)
-Base.:-(a::ModInt{n}, b::ModInt{n}) where {n} = ModInt{n}(a.k - b.k)
-Base.:*(a::ModInt{n}, b::ModInt{n}) where {n} = ModInt{n}(a.k * b.k)
-Base.:-(a::ModInt{n}) where {n} = ModInt{n}(-a.k)
-Base.inv(a::ModInt{n}) where {n} = ModInt{n}(invmod(a.k, n))
-Base.:/(a::ModInt{n}, b::ModInt{n}) where {n} = a*inv(b)
-
-Base.isfinite(a::ModInt{n}) where {n} = isfinite(a.k)
-Base.zero(::Type{ModInt{n}}) where {n} = ModInt{n}(0)
-Base.zero(::ModInt{n}) where {n} = ModInt{n}(0)
-Base.one(::Type{ModInt{n}}) where {n} = ModInt{n}(1)
-Base.one(::ModInt{n}) where {n} = ModInt{n}(1)
-Base.conj(a::ModInt{n}) where {n} = a
-LinearAlgebra.lupivottype(::Type{ModInt{n}}) where {n} = RowNonZero()
-Base.adjoint(a::ModInt{n}) where {n} = ModInt{n}(conj(a))
-Base.transpose(a::ModInt{n}) where {n} = a  # see Issue 20978
-LinearAlgebra.Adjoint(a::ModInt{n}) where {n} = adjoint(a)
-LinearAlgebra.Transpose(a::ModInt{n}) where {n} = transpose(a)
-
-@testset "Issue 22042" begin
-    A = [ModInt{2}(1) ModInt{2}(0); ModInt{2}(1) ModInt{2}(1)]
-    b = [ModInt{2}(1), ModInt{2}(0)]
-
-    @test A*(A\b) == b
-    @test A*(lu(A)\b) == b
-    @test A*(lu(A, NoPivot())\b) == b
-    @test A*(lu(A, RowNonZero())\b) == b
-    @test_throws MethodError lu(A, RowMaximum())
-
-    # Needed for pivoting:
-    Base.abs(a::ModInt{n}) where {n} = a
-    Base.:<(a::ModInt{n}, b::ModInt{n}) where {n} = a.k < b.k
-    @test A*(lu(A, RowMaximum())\b) == b
-
-    A = [ModInt{2}(0) ModInt{2}(1); ModInt{2}(1) ModInt{2}(1)]
-    @test A*(A\b) == b
-    @test A*(lu(A)\b) == b
-    @test A*(lu(A, RowMaximum())\b) == b
-    @test A*(lu(A, RowNonZero())\b) == b
-end
-
-@testset "Issue 18742" begin
-    @test_throws DimensionMismatch ones(4,5)/zeros(3,6)
-    @test_throws DimensionMismatch ones(4,5)\zeros(3,6)
-end
-@testset "fallback throws properly for AbstractArrays with dimension > 2" begin
-    @test_throws ErrorException adjoint(rand(2,2,2,2))
-    @test_throws ErrorException transpose(rand(2,2,2,2))
-end
-
-@testset "generic functions for checking whether matrices have banded structure" begin
-    using LinearAlgebra: isbanded
-    pentadiag = [1 2 3; 4 5 6; 7 8 9]
-    tridiag   = [1 2 0; 4 5 6; 0 8 9]
-    ubidiag   = [1 2 0; 0 5 6; 0 0 9]
-    lbidiag   = [1 0 0; 4 5 0; 0 8 9]
-    adiag     = [1 0 0; 0 5 0; 0 0 9]
-    @testset "istriu" begin
-        @test !istriu(pentadiag)
-        @test istriu(pentadiag, -2)
-        @test !istriu(tridiag)
-        @test istriu(tridiag, -1)
-        @test istriu(ubidiag)
-        @test !istriu(ubidiag, 1)
-        @test !istriu(lbidiag)
-        @test istriu(lbidiag, -1)
-        @test istriu(adiag)
-    end
-    @testset "istril" begin
-        @test !istril(pentadiag)
-        @test istril(pentadiag, 2)
-        @test !istril(tridiag)
-        @test istril(tridiag, 1)
-        @test !istril(ubidiag)
-        @test istril(ubidiag, 1)
-        @test istril(lbidiag)
-        @test !istril(lbidiag, -1)
-        @test istril(adiag)
-    end
-    @testset "isbanded" begin
-        @test isbanded(pentadiag, -2, 2)
-        @test !isbanded(pentadiag, -1, 2)
-        @test !isbanded(pentadiag, -2, 1)
-        @test isbanded(tridiag, -1, 1)
-        @test !isbanded(tridiag, 0, 1)
-        @test !isbanded(tridiag, -1, 0)
-        @test isbanded(ubidiag, 0, 1)
-        @test !isbanded(ubidiag, 1, 1)
-        @test !isbanded(ubidiag, 0, 0)
-        @test isbanded(lbidiag, -1, 0)
-        @test !isbanded(lbidiag, 0, 0)
-        @test !isbanded(lbidiag, -1, -1)
-        @test isbanded(adiag, 0, 0)
-        @test !isbanded(adiag, -1, -1)
-        @test !isbanded(adiag, 1, 1)
-    end
-    @testset "isdiag" begin
-        @test !isdiag(tridiag)
-        @test !isdiag(ubidiag)
-        @test !isdiag(lbidiag)
-        @test isdiag(adiag)
-    end
-end
-
-@testset "missing values" begin
-    @test ismissing(norm(missing))
-    x = [5, 6, missing]
-    y = [missing, 5, 6]
-    for p in (-Inf, -1, 1, 2, 3, Inf)
-        @test ismissing(norm(x, p))
-        @test ismissing(norm(y, p))
-    end
-    @test_broken ismissing(norm(x, 0))
-end
-
-@testset "peakflops" begin
-    @test LinearAlgebra.peakflops(1024, eltype=Float32, ntrials=2) > 0
-end
-
-@testset "NaN handling: Issue 28972" begin
-    @test all(isnan, rmul!([NaN], 0.0))
-    @test all(isnan, rmul!(Any[NaN], 0.0))
-    @test all(isnan, lmul!(0.0, [NaN]))
-    @test all(isnan, lmul!(0.0, Any[NaN]))
-
-    @test all(!isnan, rmul!([NaN], false))
-    @test all(!isnan, rmul!(Any[NaN], false))
-    @test all(!isnan, lmul!(false, [NaN]))
-    @test all(!isnan, lmul!(false, Any[NaN]))
-end
-
-@testset "adjtrans dot" begin
-    for t in (transpose, adjoint), T in (ComplexF64, Quaternion{Float64})
-        x, y = t(rand(T, 10)), t(rand(T, 10))
-        X, Y = copy(x), copy(y)
-        @test dot(x, y) ≈ dot(X, Y)
-        x, y = t([rand(T, 2, 2) for _ in 1:5]), t([rand(T, 2, 2) for _ in 1:5])
-        X, Y = copy(x), copy(y)
-        @test dot(x, y) ≈ dot(X, Y)
-        x, y = t(rand(T, 10, 5)), t(rand(T, 10, 5))
-        X, Y = copy(x), copy(y)
-        @test dot(x, y) ≈ dot(X, Y)
-        x = t([rand(T, 2, 2) for _ in 1:5, _ in 1:5])
-        y = t([rand(T, 2, 2) for _ in 1:5, _ in 1:5])
-        X, Y = copy(x), copy(y)
-        @test dot(x, y) ≈ dot(X, Y)
-        x, y = t([rand(T, 2, 2) for _ in 1:5]), t([rand(T, 2, 2) for _ in 1:5])
-    end
-end
-
-@testset "generalized dot #32739" begin
-    for elty in (Int, Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-        n = 10
-        if elty <: Int
-            A = rand(-n:n, n, n)
-            x = rand(-n:n, n)
-            y = rand(-n:n, n)
-        elseif elty <: Real
-            A = convert(Matrix{elty}, randn(n,n))
-            x = rand(elty, n)
-            y = rand(elty, n)
-        else
-            A = convert(Matrix{elty}, complex.(randn(n,n), randn(n,n)))
-            x = rand(elty, n)
-            y = rand(elty, n)
-        end
-        @test dot(x, A, y) ≈ dot(A'x, y) ≈ *(x', A, y) ≈ (x'A)*y
-        @test dot(x, A', y) ≈ dot(A*x, y) ≈ *(x', A', y) ≈ (x'A')*y
-        elty <: Real && @test dot(x, transpose(A), y) ≈ dot(x, transpose(A)*y) ≈ *(x', transpose(A), y) ≈ (x'*transpose(A))*y
-        B = reshape([A], 1, 1)
-        x = [x]
-        y = [y]
-        @test dot(x, B, y) ≈ dot(B'x, y)
-        @test dot(x, B', y) ≈ dot(B*x, y)
-        elty <: Real && @test dot(x, transpose(B), y) ≈ dot(x, transpose(B)*y)
-    end
-end
-
-@testset "condskeel #34512" begin
-    A = rand(3, 3)
-    @test condskeel(A) ≈ condskeel(A, [8,8,8])
-end
-
-end # module TestGeneric
diff --git a/stdlib/LinearAlgebra/test/givens.jl b/stdlib/LinearAlgebra/test/givens.jl
deleted file mode 100644
index a2556b45d1280..0000000000000
--- a/stdlib/LinearAlgebra/test/givens.jl
+++ /dev/null
@@ -1,115 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestGivens
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: Givens, Rotation
-
-# Test givens rotations
-@testset "Test Givens for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    if elty <: Real
-        raw_A = convert(Matrix{elty}, randn(10,10))
-    else
-        raw_A = convert(Matrix{elty}, complex.(randn(10,10),randn(10,10)))
-    end
-    @testset for A in (raw_A, view(raw_A, 1:10, 1:10))
-        Ac = copy(A)
-        R = Rotation(Givens{elty}[])
-        T = Rotation(Givens{elty}[])
-        for j = 1:8
-            for i = j+2:10
-                G, _ = givens(A, j+1, i, j)
-                lmul!(G, A)
-                rmul!(A, adjoint(G))
-                lmul!(G, R)
-                rmul!(T, G)
-
-                @test lmul!(G, Matrix{elty}(I, 10, 10)) == [G[i,j] for i=1:10,j=1:10]
-
-                @testset "transposes" begin
-                    @test (@inferred G'*G)*Matrix(elty(1)I, 10, 10) ≈ Matrix(I, 10, 10)
-                    @test (G*Matrix(elty(1)I, 10, 10))*G' ≈ Matrix(I, 10, 10)
-                    @test (@inferred copy(R'))*(R*Matrix(elty(1)I, 10, 10)) ≈ Matrix(I, 10, 10)
-                    @test_throws ErrorException transpose(G)
-                    @test_throws ErrorException transpose(R)
-                end
-            end
-        end
-        @test (R')' === R
-        # test products of Givens and Rotations
-        for r in (R, T, *(R.rotations...), *(R.rotations[1], *(R.rotations[2:end]...)))
-            @test r * A ≈ (A' * r')' ≈ lmul!(r, copy(A))
-            @test A * r ≈ (r' * A')' ≈ rmul!(copy(A), r)
-            @test r' * A ≈ lmul!(r', copy(A))
-            @test A * r' ≈ rmul!(copy(A), r')
-        end
-        @test_throws ArgumentError givens(A, 3, 3, 2)
-        @test_throws ArgumentError givens(one(elty),zero(elty),2,2)
-        G, _ = givens(one(elty),zero(elty),11,12)
-        @test_throws DimensionMismatch lmul!(G, A)
-        @test_throws DimensionMismatch rmul!(A, adjoint(G))
-        @test abs.(A) ≈ abs.(hessenberg(Ac).H)
-        @test opnorm(R*Matrix{elty}(I, 10, 10)) ≈ one(elty)
-
-        I10 = Matrix{elty}(I, 10, 10)
-        G, _ = givens(one(elty),zero(elty),9,10)
-        @test (G*I10)' * (G*I10) ≈ I10
-        K, _ = givens(zero(elty),one(elty),9,10)
-        @test (K*I10)' * (K*I10) ≈ I10
-    end
-
-    @testset "Givens * vectors" begin
-        for x in (raw_A[:,1], view(raw_A, :, 1))
-            G, r = @inferred  givens(x[2], x[4], 2, 4)
-            @test (G*x)[2] ≈ r
-            @test abs((G*x)[4]) < eps(real(elty))
-
-            G, r = @inferred givens(x, 2, 4)
-            @test (G*x)[2] ≈ r
-            @test abs((G*x)[4]) < eps(real(elty))
-
-            G, r = givens(x, 4, 2)
-            @test (G*x)[4] ≈ r
-            @test abs((G*x)[2]) < eps(real(elty))
-        end
-        d = rand(4)
-        l = d[1]
-        g2, l = givens(l, d[2], 1, 2)
-        g3, l = givens(l, d[3], 1, 3)
-        g4, l = givens(l, d[4], 1, 4)
-        @test g2*(g3*d) ≈ g2*g3*d ≈ (g2*g3)*d
-        @test g2*g3*g4 isa Rotation
-    end
-end
-
-# 36430
-# dimensional correctness:
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-@testset "testing dimensions with Furlongs" begin
-    @test_throws MethodError givens(Furlong(1.0), Furlong(2.0), 1, 2)
-end
-
-const TNumber = Union{Float64,ComplexF64}
-struct MockUnitful{T<:TNumber} <: Number
-    data::T
-    MockUnitful(data::T) where T<:TNumber = new{T}(data)
-end
-import Base: *, /, one, oneunit
-*(a::MockUnitful{T}, b::T) where T<:TNumber = MockUnitful(a.data * b)
-*(a::T, b::MockUnitful{T}) where T<:TNumber = MockUnitful(a * b.data)
-*(a::MockUnitful{T}, b::MockUnitful{T}) where T<:TNumber = MockUnitful(a.data * b.data)
-/(a::MockUnitful{T}, b::MockUnitful{T}) where T<:TNumber = a.data / b.data
-one(::Type{<:MockUnitful{T}}) where T = one(T)
-oneunit(::Type{<:MockUnitful{T}}) where T = MockUnitful(one(T))
-
-@testset "unitful givens rotation unitful $T " for T in (Float64, ComplexF64)
-    g, r = givens(MockUnitful(T(3)), MockUnitful(T(4)), 1, 2)
-    @test g.c ≈ 3/5
-    @test g.s ≈ 4/5
-    @test r.data ≈ 5.0
-end
-
-end # module TestGivens
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
deleted file mode 100644
index 61e498211ca7b..0000000000000
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ /dev/null
@@ -1,241 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestHessenberg
-
-using Test, LinearAlgebra, Random
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-# for tuple tests below
-≅(x,y) = all(p -> p[1] ≈ p[2], zip(x,y))
-
-let n = 10
-    Random.seed!(1234321)
-
-    Areal  = randn(n,n)/2
-    Aimg   = randn(n,n)/2
-    b_ = randn(n)
-    B_ = randn(n,3)
-
-    # UpperHessenberg methods not covered by the tests below
-    @testset "UpperHessenberg" begin
-        A = Areal
-        H = UpperHessenberg(A)
-        AH = triu(A,-1)
-        for k in -2:2
-            @test istril(H, k) == istril(AH, k)
-            @test istriu(H, k) == istriu(AH, k)
-            @test (k <= -1 ? istriu(H, k) : !istriu(H, k))
-        end
-        @test UpperHessenberg(H) === H
-        @test parent(H) === A
-        @test Matrix(H) == Array(H) == H == AH
-        @test real(H) == real(AH)
-        @test real(UpperHessenberg{ComplexF64}(A)) == H
-        @test real(UpperHessenberg{ComplexF64}(H)) == H
-        sim = similar(H, ComplexF64)
-        @test sim isa UpperHessenberg{ComplexF64}
-        @test size(sim) == size(H)
-        for x in (2,2+3im)
-            @test x*H == H*x == x*AH
-            for op in (+,-)
-                @test op(H,x*I) == op(AH,x*I) == op(op(x*I,H))
-                @test op(H,x*I)*x == op(AH,x*I)*x == x*op(H,x*I)
-            end
-        end
-        @test [H[i,j] for i=1:size(H,1), j=1:size(H,2)] == triu(A,-1)
-        H1 = LinearAlgebra.fillstored!(copy(H), 1)
-        @test H1 == triu(fill(1, n,n), -1)
-        @test tril(H1.data,-2) == tril(H.data,-2)
-        A2, H2 = copy(A), copy(H)
-        A2[1:4,3]=H2[1:4,3]=1:4
-        H2[5,3]=0
-        @test H2 == triu(A2,-1)
-        @test_throws ArgumentError H[5,3]=1
-        Hc = UpperHessenberg(Areal + im .* Aimg)
-        AHc = triu(Areal + im .* Aimg,-1)
-        @test real(Hc) == real(AHc)
-        @test imag(Hc) == imag(AHc)
-        @test Array(copy(adjoint(Hc))) == adjoint(Array(Hc))
-        @test Array(copy(transpose(Hc))) == transpose(Array(Hc))
-        @test rmul!(copy(Hc), 2.0) == lmul!(2.0, copy(Hc))
-        H = UpperHessenberg(Areal)
-        @test Array(Hc + H) == Array(Hc) + Array(H)
-        @test Array(Hc - H) == Array(Hc) - Array(H)
-        @testset "Preserve UpperHessenberg shape (issue #39388)" begin
-            for H = (UpperHessenberg(Areal), UpperHessenberg(Furlong.(Areal)))
-                if eltype(H) <: Furlong
-                    A = Furlong.(rand(n,n))
-                    d = Furlong.(rand(n))
-                    dl = Furlong.(rand(n-1))
-                    du = Furlong.(rand(n-1))
-                    us = Furlong(1)*I
-                else
-                    A = rand(n,n)
-                    d = rand(n)
-                    dl = rand(n-1)
-                    du = rand(n-1)
-                    us = 1*I
-                end
-                @testset "$op" for op = (+,-)
-                    for x = (us, Diagonal(d), Bidiagonal(d,dl,:U), Bidiagonal(d,dl,:L),
-                             Tridiagonal(dl,d,du), SymTridiagonal(d,dl),
-                             UpperTriangular(A), UnitUpperTriangular(A))
-                        @test op(H,x) == op(Array(H),x)
-                        @test op(x,H) == op(x,Array(H))
-                        @test op(H,x) isa UpperHessenberg
-                        @test op(x,H) isa UpperHessenberg
-                    end
-                end
-            end
-            H = UpperHessenberg(Areal)
-            A = randn(n,n)
-            d = randn(n)
-            dl = randn(n-1)
-            @testset "Multiplication/division" begin
-                for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
-                            UpperTriangular(A), UnitUpperTriangular(A))
-                    @test (H*x)::UpperHessenberg ≈ Array(H)*x
-                    @test (x*H)::UpperHessenberg ≈ x*Array(H)
-                    @test H/x ≈ Array(H)/x# broken = eltype(H) <: Furlong && x isa UpperTriangular
-                    @test x\H ≈ x\Array(H)# broken = eltype(H) <: Furlong && x isa UpperTriangular
-                    @test H/x isa UpperHessenberg
-                    @test x\H isa UpperHessenberg
-                end
-                x = Bidiagonal(d, dl, :L)
-                @test H*x == Array(H)*x
-                @test x*H == x*Array(H)
-                @test H/x == Array(H)/x
-                @test x\H == x\Array(H)
-            end
-            H = UpperHessenberg(Furlong.(Areal))
-            for A in (A, Furlong.(A))
-                @testset "Multiplication/division Furlong" begin
-                    for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
-                                UpperTriangular(A), UnitUpperTriangular(A))
-                        @test map(x -> x.val, (H*x)::UpperHessenberg) ≈ map(x -> x.val, Array(H)*x)
-                        @test map(x -> x.val, (x*H)::UpperHessenberg) ≈ map(x -> x.val, x*Array(H))
-                        @test map(x -> x.val, (H/x)::UpperHessenberg) ≈ map(x -> x.val, Array(H)/x)
-                        @test map(x -> x.val, (x\H)::UpperHessenberg) ≈ map(x -> x.val, x\Array(H))
-                    end
-                    x = Bidiagonal(d, dl, :L)
-                    @test H*x == Array(H)*x
-                    @test x*H == x*Array(H)
-                    @test H/x == Array(H)/x
-                    @test x\H == x\Array(H)
-                end
-            end
-        end
-    end
-
-    @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int), herm in (false, true)
-        A_ = eltya == Int ?
-                rand(1:7, n, n) :
-                convert(Matrix{eltya}, eltya <: Complex ?
-                    complex.(Areal, Aimg) :
-                    Areal)
-        A = herm ? Hermitian(A_ + A_') : A_
-
-        H = hessenberg(A)
-        @test Hessenberg(H) === H
-        eltyh = eltype(H)
-        @test size(H.Q, 1) == size(A, 1)
-        @test size(H.Q, 2) == size(A, 2)
-        @test size(H.Q) == size(A)
-        @test size(H) == size(A)
-        @test_throws ErrorException H.Z
-        @test convert(Array, H) ≈ A
-        @test (H.Q * H.H) * H.Q' ≈ A ≈ (Matrix(H.Q) * Matrix(H.H)) * Matrix(H.Q)'
-        @test (H.Q' * A) * H.Q ≈ H.H
-        #getindex for HessenbergQ
-        @test H.Q[1,1] ≈ Array(H.Q)[1,1]
-        @test det(H.Q) ≈ det(Matrix(H.Q))
-        @test logabsdet(H.Q)[1] ≈ logabsdet(Matrix(H.Q))[1] atol=2n*eps(float(real(eltya)))
-
-        # REPL show
-        hessstring = sprint((t, s) -> show(t, "text/plain", s), H)
-        qstring = sprint((t, s) -> show(t, "text/plain", s), H.Q)
-        hstring = sprint((t, s) -> show(t, "text/plain", s), H.H)
-        @test hessstring == "$(summary(H))\nQ factor: $qstring\nH factor:\n$hstring"
-
-        #iterate
-        q,h = H
-        @test q == H.Q
-        @test h == H.H
-
-        @test convert(Array, 2 * H) ≈ 2 * A ≈ convert(Array, H * 2)
-        @test convert(Array, H + 2I) ≈ A + 2I ≈ convert(Array, 2I + H)
-        @test convert(Array, H + (2+4im)I) ≈ A + (2+4im)I ≈ convert(Array, (2+4im)I + H)
-        @test convert(Array, H - 2I) ≈ A - 2I ≈ -convert(Array, 2I - H)
-        @test convert(Array, -H) == -convert(Array, H)
-        @test convert(Array, 2*(H + (2+4im)I)) ≈ 2A + (4+8im)I
-
-        b = convert(Vector{eltype(H)}, b_)
-        B = convert(Matrix{eltype(H)}, B_)
-        @test H \ b ≈ A \ b ≈ H \ complex(b)
-        @test H \ B ≈ A \ B ≈ H \ complex(B)
-        @test (H - I) \ B ≈ (A - I) \ B
-        @test (H - (3+4im)I) \ B ≈ (A - (3+4im)I) \ B
-        @test b' / H ≈ b' / A ≈ complex.(b') / H
-        @test B' / H ≈ B' / A ≈ complex(B') / H
-        @test B' / (H - I) ≈ B' / (A - I)
-        @test B' / (H - (3+4im)I) ≈ B' / (A - (3+4im)I)
-        @test (H - (3+4im)I)' \ B ≈ (A - (3+4im)I)' \ B
-        @test B' / (H - (3+4im)I)' ≈ B' / (A - (3+4im)I)'
-
-        for shift in (0,1,3+4im)
-            @test det(H + shift*I) ≈ det(A + shift*I)
-            @test logabsdet(H + shift*I) ≅ logabsdet(A + shift*I)
-        end
-
-        HM = Matrix(h)
-        @test dot(b, h, b) ≈ dot(h'b, b) ≈ dot(b, HM, b) ≈ dot(HM'b, b)
-        c = b .+ 1
-        @test dot(b, h, c) ≈ dot(h'b, c) ≈ dot(b, HM, c) ≈ dot(HM'b, c)
-    end
-end
-
-@testset "hessenberg(::AbstractMatrix)" begin
-    n = 10
-    A = Tridiagonal(rand(n-1), rand(n), rand(n-1))
-    H = hessenberg(A)
-    @test convert(Array, H) ≈ A
-end
-
-# check logdet on a matrix that has a positive determinant
-let A = [0.5 0.1 0.9 0.4; 0.9 0.7 0.5 0.4; 0.3 0.4 0.9 0.0; 0.4 0.0 0.0 0.5]
-    @test logdet(hessenberg(A)) ≈ logdet(A) ≈ -3.5065578973199822
-end
-
-@testset "Base.propertynames" begin
-    F =  hessenberg([4. 9. 7.; 4. 4. 1.; 4. 3. 2.])
-    @test Base.propertynames(F) == (:Q, :H, :μ)
-    @test Base.propertynames(F, true) == (:Q, :H, :μ, :τ, :factors, :uplo)
-end
-
-@testset "adjoint of Hessenberg" begin
-    Ar = randn(5, 5)
-    Ac = complex.(randn(5, 5), randn(5, 5))
-    b = ones(size(Ar, 1))
-
-    for A in (Ar, Ac)
-        F = hessenberg(A)
-        @test A'\b ≈ F'\b
-    end
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    A = ImmutableArray([1 2 3; 4 5 6; 7 8 9])
-    H = UpperHessenberg(A)
-
-    @test convert(AbstractArray{Float64}, H)::UpperHessenberg{Float64,ImmutableArray{Float64,2,Array{Float64,2}}} == H
-    @test convert(AbstractMatrix{Float64}, H)::UpperHessenberg{Float64,ImmutableArray{Float64,2,Array{Float64,2}}} == H
-end
-
-end # module TestHessenberg
diff --git a/stdlib/LinearAlgebra/test/lapack.jl b/stdlib/LinearAlgebra/test/lapack.jl
deleted file mode 100644
index 2c5d92541af93..0000000000000
--- a/stdlib/LinearAlgebra/test/lapack.jl
+++ /dev/null
@@ -1,732 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestLAPACK
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasInt
-
-@test_throws ArgumentError LinearAlgebra.LAPACK.chkuplo('Z')
-@test_throws ArgumentError LinearAlgebra.LAPACK.chkside('Z')
-@test_throws ArgumentError LinearAlgebra.LAPACK.chkdiag('Z')
-@test_throws ArgumentError LinearAlgebra.LAPACK.chktrans('Z')
-
-@testset "syevr" begin
-    Random.seed!(123)
-    Ainit = randn(5,5)
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        if elty == ComplexF32 || elty == ComplexF64
-            A = complex.(Ainit, Ainit)
-        else
-            A = Ainit
-        end
-        A = convert(Array{elty, 2}, A)
-        Asym = A'A
-        vals, Z = LAPACK.syevr!('V', copy(Asym))
-        @test Z*(Diagonal(vals)*Z') ≈ Asym
-        @test all(vals .> 0.0)
-        @test LAPACK.syevr!('N', 'V', 'U', copy(Asym), 0.0, 1.0, 4, 5, -1.0)[1] ≈ vals[vals .< 1.0]
-        @test LAPACK.syevr!('N', 'I', 'U', copy(Asym), 0.0, 1.0, 4, 5, -1.0)[1] ≈ vals[4:5]
-        @test vals ≈ LAPACK.syev!('N', 'U', copy(Asym))
-        @test vals ≈ LAPACK.syevd!('N', 'U', copy(Asym))
-        vals_test, Z_test = LAPACK.syev!('V', 'U', copy(Asym))
-        @test vals_test ≈ vals
-        @test Z_test*(Diagonal(vals)*Z_test') ≈ Asym
-        vals_test, Z_test = LAPACK.syevd!('V', 'U', copy(Asym))
-        @test vals_test ≈ vals
-        @test Z_test*(Diagonal(vals)*Z_test') ≈ Asym
-        @test_throws DimensionMismatch LAPACK.sygvd!(1, 'V', 'U', copy(Asym), zeros(elty, 6, 6))
-    end
-end
-
-@testset "gglse" begin
-    let
-        @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-            A = convert(Array{elty, 2}, [1 1 1 1; 1 3 1 1; 1 -1 3 1; 1 1 1 3; 1 1 1 -1])
-            c = convert(Array{elty, 1}, [2, 1, 6, 3, 1])
-            B = convert(Array{elty, 2}, [1 1 1 -1; 1 -1 1 1; 1 1 -1 1])
-            d = convert(Array{elty, 1}, [1, 3, -1])
-            @test LAPACK.gglse!(A, c, B, d)[1] ≈ convert(Array{elty}, [0.5, -0.5, 1.5, 0.5])
-        end
-    end
-end
-
-@testset "gebrd, bdsqr, throw for bdsdc" begin
-    let
-        n = 10
-        @testset for elty in (Float32, Float64)
-            d, e = convert(Vector{elty}, randn(n)), convert(Vector{elty}, randn(n - 1))
-            U, Vt, C = Matrix{elty}(I, n, n), Matrix{elty}(I, n, n), Matrix{elty}(I, n, n)
-            s, _ = LAPACK.bdsqr!('U', copy(d), copy(e), Vt, U, C)
-            @test Array(Bidiagonal(d, e, :U)) ≈ U*Diagonal(s)*Vt
-
-            @test_throws ArgumentError LAPACK.bdsqr!('A', d, e, Vt, U, C)
-            @test_throws DimensionMismatch LAPACK.bdsqr!('U', d, [e; 1], Vt, U, C)
-            @test_throws DimensionMismatch LAPACK.bdsqr!('U', d, e, Vt[1:end - 1, :], U, C)
-            @test_throws DimensionMismatch LAPACK.bdsqr!('U', d, e, Vt, U[:,1:end - 1], C)
-            @test_throws DimensionMismatch LAPACK.bdsqr!('U', d, e, Vt, U, C[1:end - 1, :])
-
-            @test_throws ArgumentError LAPACK.bdsdc!('U','Z',d,e)
-
-            A = rand(elty,n,n)
-            B = copy(A)
-            B, d, e, tauq, taup = LAPACK.gebrd!(B)
-            U, Vt, C = Matrix{elty}(I, n, n), Matrix{elty}(I, n, n), Matrix{elty}(I, n, n)
-            s, _ = LAPACK.bdsqr!('U',d,e[1:n-1],Vt, U, C)
-            @test s ≈ svdvals(A)
-        end
-    end
-end
-
-@testset "Issue #7886" begin
-    let
-        x, r = LAPACK.gelsy!([0 1; 0 2; 0 3.], [2, 4, 6.])
-        @test x ≈ [0,2]
-        @test r == 1
-    end
-end
-
-@testset "geqrt(3)" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        B = copy(A)
-        C,T = LAPACK.geqrt!(A,zeros(elty,10,10))
-        D,S = LAPACK.geqrt3!(A,zeros(elty,10,10))
-        @test C ≈ D
-    end
-end
-
-@testset "gbtrf and gbtrs" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        d = rand(elty,6)
-        dl = rand(elty,5)
-        du = rand(elty,5)
-        dl2 = rand(elty,4)
-        AB = zeros(elty,6,6)
-        AB[6,1:4] = dl2
-        AB[5,1:5] = dl
-        AB[4,:] = d
-        AB[3,2:6] = du
-        AB,ipiv = LAPACK.gbtrf!(2,1,6,AB)
-        C = rand(elty,6,6)
-        D = copy(C)
-        D = LAPACK.gbtrs!('N',2,1,6,AB,ipiv,D)
-        A = diagm(-2 => dl2, -1 => dl, 0 => d, 1 => du)
-        @test A\C ≈ D
-        @test_throws DimensionMismatch LAPACK.gbtrs!('N',2,1,6,AB,ipiv,Matrix{elty}(undef,7,6))
-        @test_throws LinearAlgebra.LAPACKException LAPACK.gbtrf!(2,1,6,zeros(elty,6,6))
-    end
-end
-
-
-@testset "geqp3, geqrt error handling" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        x10, x11 = Vector{elty}.(undef, (10, 11))
-        y10, y11 = Vector{LinearAlgebra.BlasInt}.(undef, (10, 11))
-        A10x10, A11x10, A10x11, A11x11 = Matrix{elty}.(undef, ((10,10), (11,10), (10,11), (11,11)))
-        @test_throws DimensionMismatch LAPACK.geqlf!(A10x10, x11)
-        @test_throws DimensionMismatch LAPACK.gelqf!(A10x10, x11)
-        @test_throws DimensionMismatch LAPACK.geqp3!(A10x10, y11, x10)
-        @test_throws DimensionMismatch LAPACK.geqp3!(A10x10, y10, x11)
-        @test_throws ArgumentError LAPACK.geqrt!(A10x10, A11x10)
-        @test_throws DimensionMismatch LAPACK.geqrt3!(A10x10, A11x10)
-        @test_throws DimensionMismatch LAPACK.geqrt3!(A10x11, A11x11)
-        @test_throws DimensionMismatch LAPACK.geqrf!(A10x10, x11)
-        @test_throws DimensionMismatch LAPACK.gerqf!(A10x10, x11)
-    end
-end
-
-@testset "gels, gesv, getrs, getri error handling" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A10x10, B11x11 = Matrix{elty}.(undef, ((10,10), (11,11)))
-        x10, x11 = Vector{LinearAlgebra.BlasInt}.(undef, (10, 11))
-        @test_throws DimensionMismatch LAPACK.gels!('N',A10x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.gels!('T',A10x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.gesv!(A10x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.getrs!('N',A10x10,x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.getrs!('T',A10x10,x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.getri!(A10x10,x11)
-    end
-end
-
-@testset "gelsy, gelsd" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty, 10, 10)
-        B = rand(elty, 10, 10)
-        C, j = LAPACK.gelsd!(copy(A),copy(B))
-        D, k = LAPACK.gelsy!(copy(A),copy(B))
-        @test C ≈ D rtol=4*eps(cond(A))
-        @test_throws DimensionMismatch LAPACK.gelsd!(A,rand(elty,12,10))
-        @test_throws DimensionMismatch LAPACK.gelsy!(A,rand(elty,12,10))
-    end
-end
-
-@testset "gglse errors" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        @test_throws DimensionMismatch LAPACK.gglse!(A,zeros(elty,10),rand(elty,12,11),zeros(elty,12))
-        @test_throws DimensionMismatch LAPACK.gglse!(A,zeros(elty,11),rand(elty,10,10),zeros(elty,10))
-        @test_throws DimensionMismatch LAPACK.gglse!(A,zeros(elty,10),rand(elty,10,10),zeros(elty,11))
-    end
-end
-
-@testset "gesvd, ggsvd" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,5)
-        U,S,V = svd(A)
-        lU,lS,lVt = LAPACK.gesvd!('S','S',A)
-        @test U ≈ lU
-        @test S ≈ lS
-        @test V' ≈ lVt
-        B = rand(elty,10,10)
-        # xggsvd3 replaced xggsvd in LAPACK 3.6.0
-        if LAPACK.version() < v"3.6.0"
-            @test_throws DimensionMismatch LAPACK.ggsvd!('S','S','S',A,B)
-        else
-            @test_throws DimensionMismatch LAPACK.ggsvd3!('S','S','S',A,B)
-        end
-    end
-end
-
-@testset "geevx, ggev, ggev3 errors" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        B = rand(elty,10,10)
-        @test_throws ArgumentError LAPACK.geevx!('M','N','N','N',A)
-        @test_throws ArgumentError LAPACK.geevx!('N','Z','N','N',A)
-        @test_throws ArgumentError LAPACK.geevx!('N','N','Z','N',A)
-        @test_throws ArgumentError LAPACK.geevx!('N','N','N','Z',A)
-        @test_throws ArgumentError LAPACK.ggev!('N','B',A,B)
-        @test_throws ArgumentError LAPACK.ggev!('B','N',A,B)
-        @test_throws DimensionMismatch LAPACK.ggev!('N','N',A,zeros(elty,12,12))
-        @test_throws ArgumentError LAPACK.ggev3!('N','B',A,B)
-        @test_throws ArgumentError LAPACK.ggev3!('B','N',A,B)
-        @test_throws DimensionMismatch LAPACK.ggev3!('N','N',A,zeros(elty,12,12))
-    end
-end
-
-@testset "gebal/gebak" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        typescale = log10(eps(real(elty))) / 3 * 2
-        A = rand(elty,10,10) * Diagonal(exp10.(range(typescale, stop=-typescale, length=10)))
-        B = copy(A)
-        ilo, ihi, scale = LAPACK.gebal!('S',B)
-        Bvs = eigvecs(B)
-        Avs = eigvecs(A)
-        Bvs = LAPACK.gebak!('S','R',ilo,ihi,scale,Bvs)
-        @test norm(diff(Avs ./ Bvs, dims=1)) < 100 * eps(abs(float(one(elty))))
-    end
-end
-
-@testset "gels" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        Random.seed!(913)
-        A = rand(elty,10,10)
-        X = rand(elty,10)
-        B,Y,z = LAPACK.gels!('N',copy(A),copy(X))
-        @test A\X ≈ Y
-    end
-end
-
-@testset "getrf/getri" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        iA = inv(A)
-        A, ipiv = LAPACK.getrf!(A)
-        A = LAPACK.getri!(A, ipiv)
-        @test A ≈ iA
-    end
-end
-
-@testset "geev" begin
-    # complex is easier for now
-    @testset for elty in (ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        Aw, Avl, Avr = LAPACK.geev!('N','V',copy(A))
-        fA = eigen(A, sortby=nothing)
-        @test fA.values  ≈ Aw
-        @test fA.vectors ≈ Avr
-    end
-end
-
-@testset "gtsv" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        du = rand(elty,9)
-        d  = rand(elty,10)
-        dl = rand(elty,9)
-        b  = rand(elty,10)
-        c = Tridiagonal(dl,d,du) \ b
-        b = LAPACK.gtsv!(dl,d,du,b)
-        @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.gtsv!(zeros(elty,11),d,du,b)
-        @test_throws DimensionMismatch LAPACK.gtsv!(dl,d,zeros(elty,11),b)
-        @test_throws DimensionMismatch LAPACK.gtsv!(dl,d,du,zeros(elty,11))
-        @test LAPACK.gtsv!(elty[],elty[],elty[],elty[]) == elty[]
-    end
-end
-
-@testset "gttrs,gttrf errors" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        du = rand(elty,9)
-        d  = rand(elty,10)
-        dl = rand(elty,9)
-        b  = rand(elty,10)
-        y10 = Vector{BlasInt}(undef, 10)
-        x9, x11 = Vector{elty}.(undef, (9, 11))
-        @test_throws DimensionMismatch LAPACK.gttrf!(x11, d, du)
-        @test_throws DimensionMismatch LAPACK.gttrf!(dl, d, x11)
-        @test_throws DimensionMismatch LAPACK.gttrs!('N', x11, d, du, x9, y10, b)
-        @test_throws DimensionMismatch LAPACK.gttrs!('N', dl, d, x11, x9, y10, b)
-        @test_throws DimensionMismatch LAPACK.gttrs!('N', dl, d, du, x9, y10, x11)
-        A = lu(Tridiagonal(dl,d,du))
-        b  = rand(elty,10,5)
-        c = copy(b)
-        dl,d,du,du2,ipiv = LAPACK.gttrf!(dl,d,du)
-        c = LAPACK.gttrs!('N',dl,d,du,du2,ipiv,c)
-        @test A\b ≈ c
-    end
-end
-
-@testset "orglq and friends errors" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        A,tau = LAPACK.gelqf!(A)
-        @test_throws DimensionMismatch LAPACK.orglq!(A,tau,11)
-        @test_throws DimensionMismatch LAPACK.ormlq!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormlq!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormlq!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormlq!('L','N',A,zeros(elty,11),rand(elty,10,10))
-
-        B = copy(A)
-        C = LAPACK.orglq!(B,tau)
-        @test LAPACK.ormlq!('R','N',A,tau, Matrix{elty}(I, 10, 10)) ≈ C
-
-        A = rand(elty,10,10)
-        A,tau = LAPACK.geqrf!(A)
-        @test_throws DimensionMismatch LAPACK.orgqr!(A,tau,11)
-        B = copy(A)
-        @test LAPACK.orgqr!(B,tau) ≈ LAPACK.ormqr!('R','N',A,tau,Matrix{elty}(I, 10, 10))
-        @test_throws DimensionMismatch LAPACK.ormqr!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormqr!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormqr!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormqr!('L','N',A,zeros(elty,11),rand(elty,10,10))
-
-        A = rand(elty,10,10)
-        A,tau = LAPACK.geqlf!(A)
-        @test_throws DimensionMismatch LAPACK.orgql!(A,tau,11)
-        B = copy(A)
-        @test LAPACK.orgql!(B,tau) ≈ LAPACK.ormql!('R','N',A,tau,Matrix{elty}(I, 10, 10))
-        @test_throws DimensionMismatch LAPACK.ormql!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormql!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormql!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormql!('L','N',A,zeros(elty,11),rand(elty,10,10))
-
-        A = rand(elty,10,10)
-        A,tau = LAPACK.gerqf!(A)
-        @test_throws DimensionMismatch LAPACK.orgrq!(A,tau,11)
-        B = copy(A)
-        @test LAPACK.orgrq!(B,tau) ≈ LAPACK.ormrq!('R','N',A,tau,Matrix{elty}(I, 10, 10))
-        @test_throws DimensionMismatch LAPACK.ormrq!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormrq!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormrq!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormrq!('L','N',A,zeros(elty,11),rand(elty,10,10))
-
-        A = rand(elty,10,11)
-        Q = copy(A)
-        Q,tau = LAPACK.gerqf!(Q)
-        R = triu(Q[:,2:11])
-        LAPACK.orgrq!(Q,tau)
-        @test Q*Q' ≈ Matrix(I, 10, 10)
-        @test R*Q ≈ A
-        @test_throws DimensionMismatch LAPACK.orgrq!(zeros(elty,11,10),zeros(elty,10))
-
-        C = rand(elty,10,10)
-        V = rand(elty,10,10)
-        T = zeros(elty,10,11)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('L','N',V,T,C)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('R','N',V,T,C)
-
-        C = rand(elty,10,10)
-        V = rand(elty,11,10)
-        T = zeros(elty,10,10)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('R','N',V,T,C)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('L','N',V,T,C)
-
-        # test size(T) = (nb,k) ensures 1 <= nb <= k
-        T = zeros(elty,10,10)
-        V = rand(elty,5,10)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('L','N',V,T,C)
-        C = rand(elty,10,10)
-        V = rand(elty,10,10)
-        T = zeros(elty,11,10)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('R','N',V,T,C)
-
-        @test_throws DimensionMismatch LAPACK.orghr!(1, 10, C, zeros(elty,11))
-    end
-end
-
-@testset "sytri, sytrs, and sytrf" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        A = A + transpose(A) #symmetric!
-        B = copy(A)
-        B,ipiv = LAPACK.sytrf!('U',B)
-        @test triu(inv(A)) ≈ triu(LAPACK.sytri!('U',B,ipiv)) rtol=eps(cond(A))
-        @test_throws DimensionMismatch LAPACK.sytrs!('U',B,ipiv,rand(elty,11,5))
-        @test LAPACK.sytrf!('U',zeros(elty,0,0)) == (zeros(elty,0,0),zeros(BlasInt,0),zero(BlasInt))
-    end
-
-    # Rook-pivoting variants
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty, 10, 10)
-        A = A + transpose(A) #symmetric!
-        B = copy(A)
-        B,ipiv = LAPACK.sytrf_rook!('U', B)
-        @test triu(inv(A)) ≈ triu(LAPACK.sytri_rook!('U', B, ipiv)) rtol=eps(cond(A))
-        @test_throws DimensionMismatch LAPACK.sytrs_rook!('U', B, ipiv, rand(elty, 11, 5))
-        @test LAPACK.sytrf_rook!('U',zeros(elty, 0, 0)) == (zeros(elty, 0, 0),zeros(BlasInt, 0),zero(BlasInt))
-        A = rand(elty, 10, 10)
-        A = A + transpose(A) #symmetric!
-        b = rand(elty, 10)
-        c = A \ b
-        cnd = cond(A)
-        b,A = LAPACK.sysv_rook!('U', A, b)
-        @test b ≈ c rtol=eps(cnd)
-        @test_throws DimensionMismatch LAPACK.sysv_rook!('U',A,rand(elty,11))
-
-        # syconvf_rook error handling
-        # way argument is wrong
-        @test_throws ArgumentError LAPACK.syconvf_rook!('U', 'U', A, rand(BlasInt, 10))
-        # ipiv has wrong length
-        @test_throws ArgumentError LAPACK.syconvf_rook!('U', 'R', A, rand(BlasInt, 9))
-        # e has wrong length
-        @test_throws ArgumentError LAPACK.syconvf_rook!('U', 'R', A, rand(BlasInt, 10), rand(elty, 9))
-    end
-end
-
-@testset "hetrf, hetrs" begin
-    @testset for elty in (ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        A = A + A' #hermitian!
-        B = copy(A)
-        B,ipiv = LAPACK.hetrf!('U',B)
-        @test_throws DimensionMismatch LAPACK.hetrs!('U',B,ipiv,rand(elty,11,5))
-        @test_throws DimensionMismatch LAPACK.hetrs_rook!('U',B,ipiv,rand(elty,11,5))
-    end
-end
-
-@testset "stev, stebz, stein, stegr" begin
-    @testset for elty in (Float32, Float64)
-        d = rand(elty,10)
-        e = rand(elty,9)
-        @test_throws DimensionMismatch LAPACK.stev!('U',d,rand(elty,11))
-        @test_throws DimensionMismatch LAPACK.stebz!('A','B',zero(elty),zero(elty),0,0,-1.,d,rand(elty,10))
-        @test_throws DimensionMismatch LAPACK.stegr!('N','A',d,rand(elty,11),zero(elty),zero(elty),0,0)
-        @test_throws DimensionMismatch LAPACK.stein!(d,zeros(elty,11),zeros(elty,10),zeros(BlasInt,10),zeros(BlasInt,10))
-        @test_throws DimensionMismatch LAPACK.stein!(d,e,zeros(elty,11),zeros(BlasInt,10),zeros(BlasInt,10))
-    end
-end
-
-@testset "trtri & trtrs" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        A = triu(A)
-        B = copy(A)
-        @test inv(A) ≈ LAPACK.trtri!('U','N',B)
-        @test_throws DimensionMismatch LAPACK.trtrs!('U','N','N',B,zeros(elty,11,10))
-    end
-end
-
-@testset "larfg & larf" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        ## larfg
-        Random.seed!(0)
-        x  = rand(elty, 5)
-        v  = copy(x)
-        τ = LinearAlgebra.LAPACK.larfg!(v)
-        H = (I - τ*v*v')
-        # for complex input, LAPACK wants a conjugate transpose of H (check clarfg docs)
-        y = elty <: Complex ? H'*x : H*x
-        # we have rotated a vector
-        @test norm(y) ≈ norm(x)
-        # an annihilated almost all the first column
-        @test norm(y[2:end], Inf) < 10*eps(real(one(elty)))
-
-        ## larf
-        C = rand(elty, 5, 5)
-        C_norm = norm(C, 2)
-        v = C[1:end, 1]
-        τ = LinearAlgebra.LAPACK.larfg!(v)
-        LinearAlgebra.LAPACK.larf!('L', v, conj(τ), C)
-        # we have applied a unitary transformation
-        @test norm(C, 2) ≈ C_norm
-        # an annihilated almost all the first column
-        @test norm(C[2:end, 1], Inf) < 10*eps(real(one(elty)))
-
-        # apply left and right
-        C1 = rand(elty, 5, 5)
-        C2 = rand(elty, 5, 5)
-        C = C2*C1
-
-        v = C1[1:end, 1]
-        τ = LinearAlgebra.LAPACK.larfg!(v)
-        LinearAlgebra.LAPACK.larf!('L', v,      τ, C1)
-        LinearAlgebra.LAPACK.larf!('R', v, conj(τ), C2)
-        @test C ≈ C2*C1
-    end
-end
-
-@testset "tgsen, tzrzf, & trsyl" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        Z = zeros(elty,10,10)
-        @test_throws DimensionMismatch LAPACK.tgsen!(zeros(BlasInt,10),Z,zeros(elty,11,11),Z,Z)
-        @test_throws DimensionMismatch LAPACK.tgsen!(zeros(BlasInt,10),Z,Z,zeros(elty,11,11),Z)
-        @test_throws DimensionMismatch LAPACK.tgsen!(zeros(BlasInt,10),Z,Z,Z,zeros(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.trsyl!('N','N',Z,Z,zeros(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.tzrzf!(zeros(elty,10,5))
-
-        A = triu(rand(elty,4,4))
-        V = view(A, 1:2, :)
-        M = Matrix(V)
-        @test LAPACK.tzrzf!(V) == LAPACK.tzrzf!(M)
-    end
-end
-
-@testset "sysv" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        Random.seed!(123)
-        A = rand(elty,10,10)
-        A = A + transpose(A) #symmetric!
-        b = rand(elty,10)
-        c = A \ b
-        b,A = LAPACK.sysv!('U',A,b)
-        @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.sysv!('U',A,rand(elty,11))
-    end
-end
-
-@testset "hesv" begin
-    @testset for elty in (ComplexF32, ComplexF64)
-        Random.seed!(935)
-        A = rand(elty,10,10)
-        A = A + A' #hermitian!
-        b = rand(elty,10)
-        c = A \ b
-        b,A = LAPACK.hesv!('U',A,b)
-        @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.hesv!('U',A,rand(elty,11))
-        A = rand(elty,10,10)
-        A = A + A' #hermitian!
-        b = rand(elty,10)
-        c = A \ b
-        b,A = LAPACK.hesv_rook!('U',A,b)
-        @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.hesv_rook!('U',A,rand(elty,11))
-    end
-end
-
-@testset "ptsv" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        dv = fill(elty(1),10)
-        ev = zeros(elty,9)
-        rdv = real(dv)
-        A = SymTridiagonal(dv,ev)
-        if elty <: Complex
-            A = Tridiagonal(conj(ev),dv,ev)
-        end
-        B = rand(elty,10,10)
-        C = copy(B)
-        @test A\B ≈ LAPACK.ptsv!(rdv,ev,C)
-        @test_throws DimensionMismatch LAPACK.ptsv!(rdv,Vector{elty}(undef,10),C)
-        @test_throws DimensionMismatch LAPACK.ptsv!(rdv,ev,Matrix{elty}(undef,11,11))
-    end
-end
-
-@testset "pttrf and pttrs" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        dv = fill(elty(1),10)
-        ev = zeros(elty,9)
-        rdv = real(dv)
-        A = SymTridiagonal(dv,ev)
-        if elty <: Complex
-            A = Tridiagonal(conj(ev),dv,ev)
-        end
-        rdv,ev = LAPACK.pttrf!(rdv,ev)
-        @test_throws DimensionMismatch LAPACK.pttrf!(rdv,dv)
-        B = rand(elty,10,10)
-        C = copy(B)
-        if elty <: Complex
-            @test A\B ≈ LAPACK.pttrs!('U',rdv,ev,C)
-            @test_throws DimensionMismatch LAPACK.pttrs!('U',rdv,Vector{elty}(undef,10),C)
-            @test_throws DimensionMismatch LAPACK.pttrs!('U',rdv,ev,Matrix{elty}(undef,11,11))
-        else
-            @test A\B ≈ LAPACK.pttrs!(rdv,ev,C)
-            @test_throws DimensionMismatch LAPACK.pttrs!(rdv,Vector{elty}(undef,10),C)
-            @test_throws DimensionMismatch LAPACK.pttrs!(rdv,ev,Matrix{elty}(undef,11,11))
-        end
-    end
-end
-
-@testset "posv and some errors for friends" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        local n = 10
-        A = rand(elty,n,n)/100
-        A += real(diagm(0 => n*real(rand(elty,n))))
-        if elty <: Complex
-            A = A + A'
-        else
-            A = A + transpose(A)
-        end
-        B = rand(elty,n,n)
-        D = copy(A)
-        C = copy(B)
-        D,C = LAPACK.posv!('U',D,C)
-        @test A\B ≈ C
-        offsizemat = Matrix{elty}(undef, n+1, n+1)
-        @test_throws DimensionMismatch LAPACK.posv!('U', D, offsizemat)
-        @test_throws DimensionMismatch LAPACK.potrs!('U', D, offsizemat)
-
-        @test LAPACK.potrs!('U',Matrix{elty}(undef,0,0),elty[]) == elty[]
-    end
-end
-
-@testset "gesvx" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        B = rand(elty,10,5)
-        C = copy(A)
-        D = copy(B)
-        X, rcond, f, b, r = LAPACK.gesvx!(C,D)
-        @test X ≈ A\B rtol=inv(rcond)*eps(real(elty))
-    end
-end
-
-@testset "gees, gges, gges3 error throwing" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        B = rand(elty,11,11)
-        @test_throws DimensionMismatch LAPACK.gges!('V','V',A,B)
-        @test_throws DimensionMismatch LAPACK.gges3!('V','V',A,B)
-    end
-end
-
-@testset "trrfs & trevc" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        T = triu(rand(elty,10,10))
-        v = eigvecs(T, sortby=nothing)[:,1]
-        select = zeros(LinearAlgebra.BlasInt,10)
-        select[1] = 1
-        select,Vr = LAPACK.trevc!('R','S',select,copy(T))
-        @test Vr ≈ v
-        select = zeros(LinearAlgebra.BlasInt,10)
-        select[1] = 1
-        select,Vl = LAPACK.trevc!('L','S',select,copy(T))
-        select = zeros(LinearAlgebra.BlasInt,10)
-        select[1] = 1
-        select,Vln,Vrn = LAPACK.trevc!('B','S',select,copy(T))
-        @test Vrn ≈ v
-        @test Vln ≈ Vl
-        @test_throws ArgumentError LAPACK.trevc!('V','S',select,copy(T))
-        @test_throws DimensionMismatch LAPACK.trrfs!('U','N','N',T,rand(elty,10,10),rand(elty,10,11))
-    end
-end
-
-@testset "laic1" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        @test_throws DimensionMismatch LAPACK.laic1!(1,rand(elty,10),real(rand(elty)),rand(elty,11),rand(elty))
-    end
-end
-
-@testset "trsen" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        for job in ('N', 'E', 'V', 'B')
-            for c in ('V', 'N')
-                A = convert(Matrix{elty}, [7 2 2 1; 1 5 2 0; 0 3 9 4; 1 1 1 4])
-                T,Q,d = schur(A)
-                s, sep = LinearAlgebra.LAPACK.trsen!(job,c,Array{LinearAlgebra.BlasInt}([0,1,0,0]),T,Q)[4:5]
-                @test d[1] ≈ T[2,2]
-                @test d[2] ≈ T[1,1]
-                if c == 'V'
-                    @test  Q*T*Q' ≈ A
-                end
-                if job == 'N' || job == 'V'
-                    @test iszero(s)
-                else
-                    @test s ≈ 0.8080423 atol=1e-6
-                end
-                if job == 'N' || job == 'E'
-                    @test iszero(sep)
-                else
-                    @test sep ≈ 2. atol=3e-1
-                end
-            end
-        end
-    end
-end
-
-@testset "trexc" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        for c in ('V', 'N')
-            A = convert(Matrix{elty}, [7 2 2 1; 1 5 2 0; 0 3 9 4; 1 1 1 4])
-            T,Q,d = schur(A)
-            LinearAlgebra.LAPACK.trexc!(c,LinearAlgebra.BlasInt(1),LinearAlgebra.BlasInt(2),T,Q)
-            @test d[1] ≈ T[2,2]
-            @test d[2] ≈ T[1,1]
-            if c == 'V'
-                @test Q*T*Q' ≈ A
-            end
-        end
-    end
-end
-
-@testset "Julia vs LAPACK" begin
-    # Test our own linear algebra functionality against LAPACK
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        for nn in (5,10,15)
-            if elty <: Real
-                A = convert(Matrix{elty}, randn(10,nn))
-            else
-                A = convert(Matrix{elty}, complex.(randn(10,nn),randn(10,nn)))
-            end    ## LU (only equal for real because LAPACK uses different absolute value when choosing permutations)
-            if elty <: Real
-                FJulia  = LinearAlgebra.generic_lufact!(copy(A))
-                FLAPACK = LinearAlgebra.LAPACK.getrf!(copy(A))
-                @test FJulia.factors ≈ FLAPACK[1]
-                @test FJulia.ipiv ≈ FLAPACK[2]
-                @test FJulia.info ≈ FLAPACK[3]
-            end
-
-            ## QR
-            FJulia  = LinearAlgebra.qrfactUnblocked!(copy(A))
-            FLAPACK = LinearAlgebra.LAPACK.geqrf!(copy(A))
-            @test FJulia.factors ≈ FLAPACK[1]
-            @test FJulia.τ ≈ FLAPACK[2]
-        end
-    end
-end
-
-# Issue 13976
-let A = [NaN 0.0 NaN; 0 0 0; NaN 0 NaN]
-    @test_throws ArgumentError exp(A)
-end
-
-# Issue 14065 (and 14220)
-let A = [NaN NaN; NaN NaN]
-    @test_throws ArgumentError eigen(A)
-end
-
-# Issue #42762 https://github.com/JuliaLang/julia/issues/42762
-# Tests geqrf! and gerqf! with null column dimensions
-a = zeros(2,0), zeros(0)
-@test LinearAlgebra.LAPACK.geqrf!(a...) === a
-@test LinearAlgebra.LAPACK.gerqf!(a...) === a
-
-# Issue #49489: https://github.com/JuliaLang/julia/issues/49489
-# Dimension mismatch between A and ipiv causes segfaults
-@testset "issue #49489" begin
-    A = randn(23,23)
-    b = randn(23)
-    ipiv = collect(1:20)
-    @test_throws DimensionMismatch LinearAlgebra.LAPACK.getrs!('N', A, ipiv, b)
-end
-
-end # module TestLAPACK
diff --git a/stdlib/LinearAlgebra/test/ldlt.jl b/stdlib/LinearAlgebra/test/ldlt.jl
deleted file mode 100644
index 51abf31086091..0000000000000
--- a/stdlib/LinearAlgebra/test/ldlt.jl
+++ /dev/null
@@ -1,41 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestLDLT
-
-using Test, LinearAlgebra, Random
-
-Random.seed!(123)
-
-@testset "Factorization conversions of LDLT" begin
-    S = SymTridiagonal(randn(5), randn(4))
-    F = ldlt(S)
-    @test Factorization{eltype(S)}(F) === F
-    @test Array(Factorization{complex(eltype(S))}(F)) ≈ Array(ldlt(complex(S)))
-    @test eltype(Factorization{complex(eltype(S))}) == complex(eltype(S))
-end
-
-@testset "eltype conversions of LDLT" begin
-    S = SymTridiagonal(randn(5), randn(4))
-    F = ldlt(S)
-    Fc = LDLt{ComplexF32}(F.data)
-    @test Fc isa LDLt{ComplexF32}
-    @test Array(Fc) ≈ ComplexF32.(Array(S))
-end
-
-@testset "Accessing fields of LDLT" begin
-    S = SymTridiagonal(randn(5), randn(4))
-    F = ldlt(S)
-    @test getproperty(F, :L) == transpose(getproperty(F, :Lt))
-    @test getproperty(F, :d) == diag(getproperty(F, :D), 0)
-end
-
-@testset "REPL printing of LDLT" begin
-    S = SymTridiagonal(randn(5), randn(4))
-    F = ldlt(S)
-    ldltstring = sprint((t, s) -> show(t, "text/plain", s), F)
-    lstring = sprint((t, s) -> show(t, "text/plain", s), F.L)
-    dstring = sprint((t, s) -> show(t, "text/plain", s), F.D)
-    @test ldltstring == "$(summary(F))\nL factor:\n$lstring\nD factor:\n$dstring"
-end
-
-end # module TestLDLT
diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl
deleted file mode 100644
index 6bdc4efa5d6dd..0000000000000
--- a/stdlib/LinearAlgebra/test/lq.jl
+++ /dev/null
@@ -1,237 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestLQ
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, rmul!, lmul!
-
-m = 10
-
-Random.seed!(1234321)
-
-asquare = randn(ComplexF64, m, m) / 2
-awide = randn(ComplexF64, m, m+3) / 2
-bcomplex = randn(ComplexF64, m, 2) / 2
-
-# helper functions to unambiguously recover explicit forms of an LQPackedQ
-squareQ(Q::LinearAlgebra.LQPackedQ) = (n = size(Q.factors, 2); lmul!(Q, Matrix{eltype(Q)}(I, n, n)))
-rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64), n in (m, size(awide, 2))
-    adata = m == n ? asquare : awide
-    a = convert(Matrix{eltya}, eltya <: Complex ? adata : real(adata))
-    ε = εa = eps(abs(float(one(eltya))))
-    n1 = n ÷ 2
-
-    α = rand(eltya)
-    aα = fill(α,1,1)
-    @test lq(α).L*lq(α).Q ≈ lq(aα).L*lq(aα).Q
-    @test abs(lq(α).Q[1,1]) ≈ one(eltya)
-
-    @testset for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        b = eltyb == Int ? rand(1:5, m, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? bcomplex : real(bcomplex))
-        εb = eps(abs(float(one(eltyb))))
-        ε = max(εa,εb)
-
-        tab = promote_type(eltya,eltyb)
-
-        @testset for isview in (false,true)
-            let a = isview ? view(a, 1:m - 1, 1:n - 1) : a, b = isview ? view(b, 1:m - 1) : b, m = m - isview, n = n - isview
-                lqa = lq(a)
-                x = lqa\b
-                l, q = lqa.L, lqa.Q
-                qra = qr(a, ColumnNorm())
-                @testset "Basic ops" begin
-                    @test size(lqa,1) == size(a,1)
-                    @test size(lqa,3) == 1
-                    @test size(lqa.Q,3) == 1
-                    @test Base.propertynames(lqa) == (:L, :Q)
-                    ref_obs = (l, q)
-                    for (ii, lq_obj) in enumerate(lqa)
-                        @test ref_obs[ii] == lq_obj
-                    end
-                    @test_throws ErrorException lqa.Z
-                    @test Array(copy(adjoint(lqa))) ≈ a'
-                    @test q*squareQ(q)' ≈ Matrix(I, n, n)
-                    @test l*q ≈ a
-                    @test Array(lqa) ≈ a
-                    @test Array(copy(lqa)) ≈ a
-                    @test LinearAlgebra.Factorization{eltya}(lqa) === lqa
-                    @test Matrix{eltya}(q) isa Matrix{eltya}
-                    # test Array{T}(LQPackedQ{T})
-                    @test Array{eltya}(q) ≈ Matrix(q)
-                end
-                @testset "Binary ops" begin
-                    k = size(a, 2)
-                    T = Tridiagonal(rand(eltya, k-1), rand(eltya, k), rand(eltya, k-1))
-                    @test lq(T) * T ≈ T * T rtol=3000ε
-                    @test lqa * T ≈ a * T rtol=3000ε
-                    @test a*x ≈ b rtol=3000ε
-                    @test x ≈ qra \ b rtol=3000ε
-                    @test lqa*x ≈ a*x rtol=3000ε
-                    @test (sq = size(q.factors, 2); *(Matrix{eltyb}(I, sq, sq), adjoint(q))*squareQ(q)) ≈ Matrix(I, n, n) rtol=5000ε
-                    if eltya != Int
-                        @test Matrix{eltyb}(I, n, n)*q ≈ Matrix(I, n, n) * convert(LinearAlgebra.AbstractQ{tab}, q)
-                    end
-                    @test q*x ≈ squareQ(q)*x rtol=100ε
-                    @test q'*x ≈ squareQ(q)'*x rtol=100ε
-                    @test a*q ≈ a*squareQ(q) rtol=100ε
-                    @test a*q' ≈ a*squareQ(q)' rtol=100ε
-                    @test q*a'≈ squareQ(q)*a' rtol=100ε
-                    @test q'*a' ≈ squareQ(q)'*a' rtol=100ε
-                    @test_throws DimensionMismatch q*x[1:n1 + 1]
-                    @test_throws DimensionMismatch adjoint(q) * Matrix{eltya}(undef,m+2,m+2)
-                    @test_throws DimensionMismatch Matrix{eltyb}(undef,m+2,m+2)*q
-                    if isa(a, DenseArray) && isa(b, DenseArray)
-                        # use this to test 2nd branch in mult code
-                        pad_a = vcat(I, a)
-                        pad_x = hcat(I, x)
-                        @test pad_a*q ≈ pad_a*squareQ(q) rtol=100ε
-                        @test q'*pad_x ≈ squareQ(q)'*pad_x rtol=100ε
-                    end
-                end
-            end
-        end
-
-        @testset "Matmul with LQ factorizations" begin
-            lqa = lq(a[:,1:n1])
-            l,q = lqa.L, lqa.Q
-            @test rectangularQ(q)*rectangularQ(q)' ≈ Matrix(I, n1, n1)
-            @test squareQ(q)'*squareQ(q) ≈ Matrix(I, n1, n1)
-            @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),q)
-            @test lmul!(adjoint(q), rectangularQ(q)) ≈ Matrix(I, n1, n1)
-            @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1), adjoint(q))
-            @test_throws BoundsError size(q,-1)
-        end
-    end
-end
-
-@testset "getindex on LQPackedQ (#23733)" begin
-    local m, n
-    function getqs(F::LinearAlgebra.LQ)
-        implicitQ = F.Q
-        sq = size(implicitQ.factors, 2)
-        explicitQ = lmul!(implicitQ, Matrix{eltype(implicitQ)}(I, sq, sq))
-        return implicitQ, explicitQ
-    end
-
-    m, n = 3, 3 # reduced Q 3-by-3, full Q 3-by-3
-    implicitQ, explicitQ = getqs(lq(randn(m, n)))
-    @test implicitQ[1, 1] == explicitQ[1, 1]
-    @test implicitQ[m, 1] == explicitQ[m, 1]
-    @test implicitQ[1, n] == explicitQ[1, n]
-    @test implicitQ[m, n] == explicitQ[m, n]
-
-    m, n = 3, 4 # reduced Q 3-by-4, full Q 4-by-4
-    implicitQ, explicitQ = getqs(lq(randn(m, n)))
-    @test implicitQ[1, 1] == explicitQ[1, 1]
-    @test implicitQ[m, 1] == explicitQ[m, 1]
-    @test implicitQ[1, n] == explicitQ[1, n]
-    @test implicitQ[m, n] == explicitQ[m, n]
-    @test implicitQ[m+1, 1] == explicitQ[m+1, 1]
-    @test implicitQ[m+1, n] == explicitQ[m+1, n]
-
-    m, n = 4, 3 # reduced Q 3-by-3, full Q 3-by-3
-    implicitQ, explicitQ = getqs(lq(randn(m, n)))
-    @test implicitQ[1, 1] == explicitQ[1, 1]
-    @test implicitQ[n, 1] == explicitQ[n, 1]
-    @test implicitQ[1, n] == explicitQ[1, n]
-    @test implicitQ[n, n] == explicitQ[n, n]
-end
-
-@testset "size on LQPackedQ (#23780)" begin
-    # size(Q::LQPackedQ) yields the shape of Q's full/square form
-    for ((mA, nA), nQ) in (
-        ((3, 3), 3), # A 3-by-3 => full/square Q 3-by-3
-        ((3, 4), 4), # A 3-by-4 => full/square Q 4-by-4
-        ((4, 3), 3) )# A 4-by-3 => full/square Q 3-by-3
-        @test size(lq(randn(mA, nA)).Q) == (nQ, nQ)
-    end
-end
-
-@testset "postmultiplication with / right-application of LQPackedQ (#23779)" begin
-    function getqs(F::LinearAlgebra.LQ)
-        implicitQ = F.Q
-        explicitQ = lmul!(implicitQ, Matrix{eltype(implicitQ)}(I, size(implicitQ)...))
-        return implicitQ, explicitQ
-    end
-    # for any shape m-by-n of LQ-factored matrix, where Q is an LQPackedQ
-    # A_mul_B*(C, Q) (Ac_mul_B*(C, Q)) operations should work for
-    # *-by-n (n-by-*) C, which we test below via n-by-n C
-    for (mA, nA) in ((3, 3), (3, 4), (4, 3))
-        implicitQ, explicitQ = getqs(lq(randn(mA, nA)))
-        C = randn(nA, nA)
-        @test *(C, implicitQ) ≈ *(C, explicitQ)
-        @test *(C, adjoint(implicitQ)) ≈ *(C, adjoint(explicitQ))
-        @test *(adjoint(C), implicitQ) ≈ *(adjoint(C), explicitQ)
-        @test *(adjoint(C), adjoint(implicitQ)) ≈ *(adjoint(C), adjoint(explicitQ))
-    end
-    # where the LQ-factored matrix has at least as many rows m as columns n,
-    # Q's full/square and reduced/rectangular forms have the same shape (n-by-n). hence we expect
-    # _only_ *-by-n (n-by-*) C to work in A_mul_B*(C, Q) (Ac_mul_B*(C, Q)) ops.
-    # and hence the n-by-n C tests above suffice.
-    #
-    # where the LQ-factored matrix has more columns n than rows m,
-    # Q's full/square form is n-by-n whereas its reduced/rectangular form is m-by-n.
-    # hence we need also test *-by-m C with
-    # A*_mul_B(C, Q) ops, as below via m-by-m C.
-    mA, nA = 3, 4
-    implicitQ, explicitQ = getqs(lq(randn(mA, nA)))
-    C = randn(mA, mA)
-    zeroextCright = hcat(C, zeros(eltype(C), mA))
-    zeroextCdown = vcat(C, zeros(eltype(C), (1, mA)))
-    @test *(C, implicitQ) ≈ *(zeroextCright, explicitQ)
-    @test *(adjoint(C), implicitQ) ≈ *(adjoint(zeroextCdown), explicitQ)
-    @test_throws DimensionMismatch C * adjoint(implicitQ)
-    @test_throws DimensionMismatch adjoint(C) * adjoint(implicitQ)
-end
-
-@testset "det(Q::LQPackedQ)" begin
-    @testset for n in 1:3, m in 1:3
-        @testset "real" begin
-            _, Q = lq(randn(n, m))
-            @test det(Q) ≈ det(Q*I)
-            @test abs(det(Q)) ≈ 1
-        end
-        @testset "complex" begin
-            _, Q = lq(randn(ComplexF64, n, m))
-            @test det(Q) ≈ det(Q*I)
-            @test abs(det(Q)) ≈ 1
-        end
-    end
-end
-
-@testset "REPL printing" begin
-    bf = IOBuffer()
-    show(bf, "text/plain", lq(Matrix(I, 4, 4)))
-    seekstart(bf)
-    @test String(take!(bf)) == """
-LinearAlgebra.LQ{Float64, Matrix{Float64}, Vector{Float64}}
-L factor:
-4×4 Matrix{Float64}:
- 1.0  0.0  0.0  0.0
- 0.0  1.0  0.0  0.0
- 0.0  0.0  1.0  0.0
- 0.0  0.0  0.0  1.0
-Q factor: 4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}"""
-end
-
-@testset "adjoint of LQ" begin
-    n = 5
-
-    for b in (ones(n), ones(n, 2), ones(Complex{Float64}, n, 2))
-        for A in (
-            randn(n, n),
-            # Tall problems become least squares problems similarly to QR
-            randn(n - 2, n),
-            complex.(randn(n, n), randn(n, n)))
-
-            F = lq(A)
-            @test A'\b ≈ F'\b
-        end
-        @test_throws DimensionMismatch lq(randn(n, n + 2))'\b
-    end
-
-end
-
-end # module TestLQ
diff --git a/stdlib/LinearAlgebra/test/lu.jl b/stdlib/LinearAlgebra/test/lu.jl
deleted file mode 100644
index aa73bee6ddc38..0000000000000
--- a/stdlib/LinearAlgebra/test/lu.jl
+++ /dev/null
@@ -1,467 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestLU
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: ldiv!, BlasReal, BlasInt, BlasFloat, rdiv!
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(1234324)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-breal = randn(n,2)/2
-bimg  = randn(n,2)/2
-creal = randn(n)/2
-cimg  = randn(n)/2
-dureal = randn(n-1)/2
-duimg  = randn(n-1)/2
-dlreal = randn(n-1)/2
-dlimg  = randn(n-1)/2
-dreal = randn(n)/2
-dimg  = randn(n)/2
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-    a = eltya == Int ? rand(1:7, n, n) :
-        convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    d = if eltya == Int
-        Tridiagonal(rand(1:7, n-1), rand(1:7, n), rand(1:7, n-1))
-    elseif eltya <: Complex
-        convert(Tridiagonal{eltya}, Tridiagonal(
-            complex.(dlreal, dlimg), complex.(dreal, dimg), complex.(dureal, duimg)))
-    else
-        convert(Tridiagonal{eltya}, Tridiagonal(dlreal, dreal, dureal))
-    end
-    εa = eps(abs(float(one(eltya))))
-
-    if eltya <: BlasFloat
-        @testset "LU factorization for Number" begin
-            num = rand(eltya)
-            @test (lu(num)...,) == (hcat(one(eltya)), hcat(num), [1])
-            @test convert(Array, lu(num)) ≈ eltya[num]
-        end
-        @testset "Balancing in eigenvector calculations" begin
-            A = convert(Matrix{eltya}, [ 3.0     -2.0      -0.9     2*eps(real(one(eltya)));
-                                       -2.0      4.0       1.0    -eps(real(one(eltya)));
-                                       -eps(real(one(eltya)))/4  eps(real(one(eltya)))/2  -1.0     0;
-                                       -0.5     -0.5       0.1     1.0])
-            F = eigen(A, permute=false, scale=false)
-            @test F.vectors*Diagonal(F.values)/F.vectors ≈ A
-            F = eigen(A)
-            # @test norm(F.vectors*Diagonal(F.values)/F.vectors - A) > 0.01
-        end
-    end
-    κ  = cond(a,1)
-    @testset "(Automatic) Square LU decomposition" begin
-        lua   = factorize(a)
-        @test_throws ErrorException lua.Z
-        l,u,p = lua.L, lua.U, lua.p
-        ll,ul,pl = @inferred lu(a)
-        @test ll * ul ≈ a[pl,:]
-        @test l*u ≈ a[p,:]
-        @test (l*u)[invperm(p),:] ≈ a
-        @test a * inv(lua) ≈ Matrix(I, n, n)
-        @test copy(lua) == lua
-        if eltya <: BlasFloat
-            # test conversion of LU factorization's numerical type
-            bft = eltya <: Real ? LinearAlgebra.LU{BigFloat} : LinearAlgebra.LU{Complex{BigFloat}}
-            bflua = convert(bft, lua)
-            @test bflua.L*bflua.U ≈ big.(a)[p,:] rtol=εa*norm(a)
-            @test Factorization{eltya}(lua) === lua
-            # test Factorization with different eltype
-            if eltya <: BlasReal
-                @test Array(Factorization{Float16}(lua)) ≈ Array(lu(convert(Matrix{Float16}, a)))
-                @test eltype(Factorization{Float16}(lua)) == Float16
-            end
-        end
-        # compact printing
-        lstring = sprint(show,l)
-        ustring = sprint(show,u)
-    end
-    κd    = cond(Array(d),1)
-    @testset "Tridiagonal LU" begin
-        lud = @inferred lu(d)
-        @test LinearAlgebra.issuccess(lud)
-        @test @inferred(lu(lud)) == lud
-        @test_throws ErrorException lud.Z
-        @test lud.L*lud.U ≈ lud.P*Array(d)
-        @test lud.L*lud.U ≈ Array(d)[lud.p,:]
-        @test AbstractArray(lud) ≈ d
-        @test Array(lud) ≈ d
-        if eltya != Int
-            dlu = convert.(eltya, [1, 1])
-            dia = convert.(eltya, [-2, -2, -2])
-            tri = Tridiagonal(dlu, dia, dlu)
-            @test_throws ArgumentError lu!(tri)
-        end
-    end
-    @testset for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        b  = eltyb == Int ? rand(1:5, n, 2) :
-            convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-        c  = eltyb == Int ? rand(1:5, n) :
-            convert(Vector{eltyb}, eltyb <: Complex ? complex.(creal, cimg) : creal)
-        εb = eps(abs(float(one(eltyb))))
-        ε  = max(εa,εb)
-        @testset "(Automatic) Square LU decomposition" begin
-            lua   = factorize(a)
-            let Bs = copy(b), Cs = copy(c)
-                for (bb, cc) in ((Bs, Cs), (view(Bs, 1:n, 1), view(Cs, 1:n)))
-                    @test norm(a*(lua\bb) - bb, 1) < ε*κ*n*2 # Two because the right hand side has two columns
-                    @test norm(a'*(lua'\bb) - bb, 1) < ε*κ*n*2 # Two because the right hand side has two columns
-                    @test norm(a'*(lua'\a') - a', 1) < ε*κ*n^2
-                    @test norm(a*(lua\cc) - cc, 1) < ε*κ*n # cc is a vector
-                    @test norm(a'*(lua'\cc) - cc, 1) < ε*κ*n # cc is a vector
-                    @test AbstractArray(lua) ≈ a
-                    @test norm(transpose(a)*(transpose(lua)\bb) - bb,1) < ε*κ*n*2 # Two because the right hand side has two columns
-                    @test norm(transpose(a)*(transpose(lua)\cc) - cc,1) < ε*κ*n
-                end
-
-                # Test whether Ax_ldiv_B!(y, LU, x) indeed overwrites y
-                resultT = typeof(oneunit(eltyb) / oneunit(eltya))
-
-                b_dest = similar(b, resultT)
-                c_dest = similar(c, resultT)
-
-                ldiv!(b_dest, lua, b)
-                ldiv!(c_dest, lua, c)
-                @test norm(b_dest - lua \ b, 1) < ε*κ*2n
-                @test norm(c_dest - lua \ c, 1) < ε*κ*n
-
-                ldiv!(b_dest, transpose(lua), b)
-                ldiv!(c_dest, transpose(lua), c)
-                @test norm(b_dest - transpose(lua) \ b, 1) < ε*κ*2n
-                @test norm(c_dest - transpose(lua) \ c, 1) < ε*κ*n
-
-                ldiv!(b_dest, adjoint(lua), b)
-                ldiv!(c_dest, adjoint(lua), c)
-                @test norm(b_dest - lua' \ b, 1) < ε*κ*2n
-                @test norm(c_dest - lua' \ c, 1) < ε*κ*n
-
-                if eltyb != Int && !(eltya <: Complex) || eltya <: Complex && eltyb <: Complex
-                    p = Matrix(b')
-                    q = Matrix(c')
-                    p_dest = copy(p)
-                    q_dest = copy(q)
-                    rdiv!(p_dest, lua)
-                    rdiv!(q_dest, lua)
-                    @test norm(p_dest - p / lua, 1) < ε*κ*2n
-                    @test norm(q_dest - q / lua, 1) < ε*κ*n
-                end
-            end
-            if eltya <: BlasFloat && eltyb <: BlasFloat
-                e = rand(eltyb,n,n)
-                @test norm(e/lua - e/a,1) < ε*κ*n^2
-            end
-        end
-        @testset "Tridiagonal LU" begin
-            lud   = factorize(d)
-            f = zeros(eltyb, n+1)
-            @test_throws DimensionMismatch lud\f
-            @test_throws DimensionMismatch transpose(lud)\f
-            @test_throws DimensionMismatch lud'\f
-            @test_throws DimensionMismatch LinearAlgebra.ldiv!(transpose(lud), f)
-            let Bs = copy(b)
-                for bb in (Bs, view(Bs, 1:n, 1))
-                    @test norm(d*(lud\bb) - bb, 1) < ε*κd*n*2 # Two because the right hand side has two columns
-                    if eltya <: Real
-                        @test norm((transpose(lud)\bb) - Array(transpose(d))\bb, 1) < ε*κd*n*2 # Two because the right hand side has two columns
-                        if eltya != Int && eltyb != Int
-                            @test norm(LinearAlgebra.ldiv!(transpose(lud), copy(bb)) - Array(transpose(d))\bb, 1) < ε*κd*n*2
-                        end
-                    end
-                    if eltya <: Complex
-                        dummy_factor = 2.5
-                        # TODO: Remove dummy_factor, this test started failing when the RNG stream changed
-                        # so the factor was added.
-                        @test norm((lud'\bb) - Array(d')\bb, 1) < ε*κd*n*2*dummy_factor # Two because the right hand side has two columns
-                    end
-                end
-            end
-            if eltya <: BlasFloat && eltyb <: BlasFloat
-                e = rand(eltyb,n,n)
-                @test norm(e/lud - e/d,1) < ε*κ*n^2
-                @test norm((transpose(lud)\e') - Array(transpose(d))\e',1) < ε*κd*n^2
-                #test singular
-                du = rand(eltya,n-1)
-                dl = rand(eltya,n-1)
-                dd = rand(eltya,n)
-                dd[1] = zero(eltya)
-                du[1] = zero(eltya)
-                dl[1] = zero(eltya)
-                zT = Tridiagonal(dl,dd,du)
-                @test !LinearAlgebra.issuccess(lu(zT; check = false))
-            end
-        end
-        @testset "Thin LU" begin
-            lua   = @inferred lu(a[:,1:n1])
-            @test lua.L*lua.U ≈ lua.P*a[:,1:n1]
-        end
-        @testset "Fat LU" begin
-            lua   = @inferred lu(a[1:n1,:])
-            @test lua.L*lua.U ≈ lua.P*a[1:n1,:]
-        end
-    end
-
-    @testset "LU of Symmetric/Hermitian" begin
-        for HS in (Hermitian(a'a), Symmetric(a'a))
-            luhs = @inferred lu(HS)
-            @test luhs.L*luhs.U ≈ luhs.P*Matrix(HS)
-        end
-    end
-
-    @testset "Factorization of symtridiagonal dense matrix with zero ldlt-pivot (#38026)" begin
-        A = [0.0 -1.0 0.0 0.0
-            -1.0 0.0 0.0 0.0
-            0.0 0.0 0.0 -1.0
-            0.0 0.0 -1.0 0.0]
-        F = factorize(A)
-        @test all((!isnan).(Matrix(F)))
-    end
-end
-
-@testset "Small tridiagonal matrices" for T in (Float64, ComplexF64)
-    A = Tridiagonal(T[], T[1], T[])
-    @test inv(A) == A
-end
-
-@testset "Singular matrices" for T in (Float64, ComplexF64)
-    A = T[1 2; 0 0]
-    @test_throws SingularException lu(A)
-    @test_throws SingularException lu!(copy(A))
-    @test_throws SingularException lu(A; check = true)
-    @test_throws SingularException lu!(copy(A); check = true)
-    @test !issuccess(lu(A; check = false))
-    @test !issuccess(lu!(copy(A); check = false))
-    @test_throws ZeroPivotException lu(A, NoPivot())
-    @test_throws ZeroPivotException lu!(copy(A), NoPivot())
-    @test_throws ZeroPivotException lu(A, NoPivot(); check = true)
-    @test_throws ZeroPivotException lu!(copy(A), NoPivot(); check = true)
-    @test !issuccess(lu(A, NoPivot(); check = false))
-    @test !issuccess(lu!(copy(A), NoPivot(); check = false))
-    F = lu(A; check = false)
-    @test sprint((io, x) -> show(io, "text/plain", x), F) ==
-        "Failed factorization of type $(typeof(F))"
-end
-
-@testset "conversion" begin
-    Random.seed!(4)
-    a = Tridiagonal(rand(9),rand(10),rand(9))
-    fa = Array(a)
-    falu = lu(fa)
-    alu = lu(a)
-    falu = convert(typeof(falu),alu)
-    @test Array(alu) == fa
-    @test AbstractArray(alu) == fa
-end
-
-@testset "Rational Matrices" begin
-    ## Integrate in general tests when more linear algebra is implemented in julia
-    a = convert(Matrix{Rational{BigInt}}, rand(1:10//1,n,n))/n
-    b = rand(1:10,n,2)
-    @inferred lu(a)
-    lua   = factorize(a)
-    l,u,p = lua.L, lua.U, lua.p
-    @test l*u ≈ a[p,:]
-    @test l[invperm(p),:]*u ≈ a
-    @test a*inv(lua) ≈ Matrix(I, n, n)
-    let Bs = b
-        for b in (Bs, view(Bs, 1:n, 1))
-            @test a*(lua\b) ≈ b
-        end
-    end
-    @test @inferred(det(a)) ≈ det(Array{Float64}(a))
-end
-
-@testset "Rational{BigInt} and BigFloat Hilbert Matrix" begin
-    ## Hilbert Matrix (very ill conditioned)
-    ## Testing Rational{BigInt} and BigFloat version
-    nHilbert = 50
-    H = Rational{BigInt}[1//(i+j-1) for i = 1:nHilbert,j = 1:nHilbert]
-    Hinv = Rational{BigInt}[(-1)^(i+j)*(i+j-1)*binomial(nHilbert+i-1,nHilbert-j)*
-        binomial(nHilbert+j-1,nHilbert-i)*binomial(i+j-2,i-1)^2
-        for i = big(1):nHilbert,j=big(1):nHilbert]
-    @test inv(H) == Hinv
-    setprecision(2^10) do
-        @test norm(Array{Float64}(inv(float(H)) - float(Hinv))) < 1e-100
-    end
-end
-
-@testset "logdet" begin
-    @test @inferred(logdet(ComplexF32[1.0f0 0.5f0; 0.5f0 -1.0f0])) === 0.22314355f0 + 3.1415927f0im
-    @test_throws DomainError logdet([1 1; 1 -1])
-end
-
-@testset "REPL printing" begin
-        bf = IOBuffer()
-        show(bf, "text/plain", lu(Matrix(I, 4, 4)))
-        seekstart(bf)
-        @test String(take!(bf)) == """
-LinearAlgebra.LU{Float64, Matrix{Float64}, Vector{$Int}}
-L factor:
-4×4 Matrix{Float64}:
- 1.0  0.0  0.0  0.0
- 0.0  1.0  0.0  0.0
- 0.0  0.0  1.0  0.0
- 0.0  0.0  0.0  1.0
-U factor:
-4×4 Matrix{Float64}:
- 1.0  0.0  0.0  0.0
- 0.0  1.0  0.0  0.0
- 0.0  0.0  1.0  0.0
- 0.0  0.0  0.0  1.0"""
-end
-
-@testset "propertynames" begin
-    names = sort!(collect(string.(Base.propertynames(lu(rand(3,3))))))
-    @test names == ["L", "P", "U", "p"]
-    allnames = sort!(collect(string.(Base.propertynames(lu(rand(3,3)), true))))
-    @test allnames == ["L", "P", "U", "factors", "info", "ipiv", "p"]
-end
-
-include("trickyarithmetic.jl")
-
-@testset "lu with type whose sum is another type" begin
-    A = TrickyArithmetic.A[1 2; 3 4]
-    ElT = TrickyArithmetic.D{TrickyArithmetic.C,TrickyArithmetic.C}
-    B = lu(A, NoPivot())
-    @test B isa LinearAlgebra.LU{ElT,Matrix{ElT}}
-end
-
-# dimensional correctness:
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-@testset "lu factorization with dimension type" begin
-    n = 4
-    A = Matrix(Furlong(1.0) * I, n, n)
-    F = lu(A).factors
-    @test Diagonal(F) == Diagonal(A)
-    # upper triangular part has a unit Furlong{1}
-    @test all(x -> typeof(x) == Furlong{1, Float64}, F[i,j] for j=1:n for i=1:j)
-    # lower triangular part is unitless Furlong{0}
-    @test all(x -> typeof(x) == Furlong{0, Float64}, F[i,j] for j=1:n for i=j+1:n)
-end
-
-@testset "Issue #30917. Determinant of integer matrix" begin
-    @test det([1 1 0 0 1 0 0 0
-               1 0 1 0 0 1 0 0
-               1 0 0 1 0 0 1 0
-               0 1 1 1 0 0 0 0
-               0 1 0 0 0 0 1 1
-               0 0 1 0 1 0 0 1
-               0 0 0 1 1 1 0 0
-               0 0 0 0 1 1 0 1]) ≈ 6
-end
-
-@testset "Issue #33177. No ldiv!(LU, Adjoint)" begin
-    A = [1 0; 1 1]
-    B = [1 2; 2 8]
-    F = lu(B)
-    @test (A  / F') * B == A
-    @test (A' / F') * B == A'
-
-    a = complex.(randn(2), randn(2))
-    @test (a' / F') * B ≈ a'
-    @test (transpose(a) / F') * B ≈ transpose(a)
-
-    A = complex.(randn(2, 2), randn(2, 2))
-    @test (A' / F') * B ≈ A'
-    @test (transpose(A) / F') * B ≈ transpose(A)
-end
-
-@testset "0x0 matrix" begin
-    A = ones(0, 0)
-    F = lu(A)
-    @test F.U == ones(0, 0)
-    @test F.L == ones(0, 0)
-    @test F.P == ones(0, 0)
-    @test F.p == []
-end
-
-@testset "more rdiv! methods" begin
-    for elty in (Float16, Float64, ComplexF64), transform in (transpose, adjoint)
-        A = randn(elty, 5, 5)
-        C = copy(A)
-        B = randn(elty, 5, 5)
-        @test rdiv!(transform(A), transform(lu(B))) ≈ transform(C) / transform(B)
-    end
-end
-
-@testset "transpose(A) / lu(B)' should not overwrite A (#36657)" begin
-    for elty in (Float16, Float64, ComplexF64)
-        A = randn(elty, 5, 5)
-        B = randn(elty, 5, 5)
-        C = copy(A)
-        a = randn(elty, 5)
-        c = copy(a)
-        @test transpose(A) / lu(B)' ≈ transpose(A) / B'
-        @test transpose(a) / lu(B)' ≈ transpose(a) / B'
-        @test A == C
-        @test a == c
-    end
-end
-
-@testset "lu on *diagonal matrices" begin
-    dl = rand(3)
-    d = rand(4)
-    Bl = Bidiagonal(d, dl, :L)
-    Bu = Bidiagonal(d, dl, :U)
-    Tri = Tridiagonal(dl, d, dl)
-    Sym = SymTridiagonal(d, dl)
-    D = Diagonal(d)
-    b = ones(4)
-    B = rand(4,4)
-    for A in (Bl, Bu, Tri, Sym, D), pivot in (NoPivot(), RowMaximum())
-        @test A\b ≈ lu(A, pivot)\b
-        @test B/A ≈ B/lu(A, pivot)
-        @test B/A ≈ B/Matrix(A)
-        @test Matrix(lu(A, pivot)) ≈ A
-        @test @inferred(lu(A)) isa LU
-        if A isa Union{Bidiagonal, Diagonal, Tridiagonal, SymTridiagonal}
-            @test lu(A) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
-            @test lu(A, pivot) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
-            @test lu(A, pivot; check = false) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
-        end
-    end
-end
-
-@testset "can push to vector after 3-arg ldiv! (#43507)" begin
-    u = rand(3)
-    A = rand(3,3)
-    b = rand(3)
-    ldiv!(u,lu(A),b)
-    push!(b,4.0)
-    @test length(b) == 4
-end
-
-@testset "NaN matrix should throw error" begin
-    for eltya in (NaN16, NaN32, NaN64, BigFloat(NaN))
-        r = fill(eltya, 2, 3)
-        c = fill(complex(eltya, eltya), 2, 3)
-        @test_throws ArgumentError lu(r)
-        @test_throws ArgumentError lu(c)
-    end
-end
-
-@testset "more generic ldiv! #35419" begin
-    A = rand(3, 3)
-    b = rand(3)
-    @test A * ldiv!(lu(A), Base.ReshapedArray(copy(b)', (3,), ())) ≈ b
-end
-
-@testset "generic lu!" begin
-    A = rand(3,3); B = deepcopy(A); C = A[2:3,2:3]
-    Asub1 = @view(A[2:3,2:3])
-    F1 = lu!(Asub1)
-    Asub2 = @view(B[[2,3],[2,3]])
-    F2 = lu!(Asub2)
-    @test Matrix(F1) ≈ Matrix(F2) ≈ C
-end
-
-end # module TestLU
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
deleted file mode 100644
index e6000a4b24e2d..0000000000000
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ /dev/null
@@ -1,1010 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestMatmul
-
-using Base: rtoldefault
-using Test, LinearAlgebra, Random
-using LinearAlgebra: mul!, Symmetric, Hermitian
-
-## Test Julia fallbacks to BLAS routines
-
-mul_wrappers = [
-    m -> m,
-    m -> Symmetric(m, :U),
-    m -> Symmetric(m, :L),
-    m -> Hermitian(m, :U),
-    m -> Hermitian(m, :L),
-    m -> adjoint(m),
-    m -> transpose(m)]
-
-@testset "matrices with zero dimensions" begin
-    for (dimsA, dimsB, dimsC) in (
-        ((0, 5), (5, 3), (0, 3)),
-        ((3, 5), (5, 0), (3, 0)),
-        ((3, 0), (0, 4), (3, 4)),
-        ((0, 5), (5, 0), (0, 0)),
-        ((0, 0), (0, 4), (0, 4)),
-        ((3, 0), (0, 0), (3, 0)),
-        ((0, 0), (0, 0), (0, 0)))
-        @test Matrix{Float64}(undef, dimsA) * Matrix{Float64}(undef, dimsB) == zeros(dimsC)
-    end
-    @test Matrix{Float64}(undef, 5, 0) |> t -> t't == zeros(0, 0)
-    @test Matrix{Float64}(undef, 5, 0) |> t -> t * t' == zeros(5, 5)
-    @test Matrix{ComplexF64}(undef, 5, 0) |> t -> t't == zeros(0, 0)
-    @test Matrix{ComplexF64}(undef, 5, 0) |> t -> t * t' == zeros(5, 5)
-end
-@testset "2x2 matmul" begin
-    AA = [1 2; 3 4]
-    BB = [5 6; 7 8]
-    AAi = AA + (0.5 * im) .* BB
-    BBi = BB + (2.5 * im) .* AA[[2, 1], [2, 1]]
-    for A in (copy(AA), view(AA, 1:2, 1:2)), B in (copy(BB), view(BB, 1:2, 1:2))
-        @test A * B == [19 22; 43 50]
-        @test *(transpose(A), B) == [26 30; 38 44]
-        @test *(A, transpose(B)) == [17 23; 39 53]
-        @test *(transpose(A), transpose(B)) == [23 31; 34 46]
-    end
-    for Ai in (copy(AAi), view(AAi, 1:2, 1:2)), Bi in (copy(BBi), view(BBi, 1:2, 1:2))
-        @test Ai * Bi == [-21+53.5im -4.25+51.5im; -12+95.5im 13.75+85.5im]
-        @test *(adjoint(Ai), Bi) == [68.5-12im 57.5-28im; 88-3im 76.5-25im]
-        @test *(Ai, adjoint(Bi)) == [64.5+5.5im 43+31.5im; 104-18.5im 80.5+31.5im]
-        @test *(adjoint(Ai), adjoint(Bi)) == [-28.25-66im 9.75-58im; -26-89im 21-73im]
-        @test_throws DimensionMismatch [1 2; 0 0; 0 0] * [1 2]
-    end
-    for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers
-        @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB))
-    end
-    @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 3, 3), AA, BB)
-end
-@testset "3x3 matmul" begin
-    AA = [1 2 3; 4 5 6; 7 8 9] .- 5
-    BB = [1 0 5; 6 -10 3; 2 -4 -1]
-    AAi = AA + (0.5 * im) .* BB
-    BBi = BB + (2.5 * im) .* AA[[2, 1, 3], [2, 3, 1]]
-    for A in (copy(AA), view(AA, 1:3, 1:3)), B in (copy(BB), view(BB, 1:3, 1:3))
-        @test A * B == [-26 38 -27; 1 -4 -6; 28 -46 15]
-        @test *(adjoint(A), B) == [-6 2 -25; 3 -12 -18; 12 -26 -11]
-        @test *(A, adjoint(B)) == [-14 0 6; 4 -3 -3; 22 -6 -12]
-        @test *(adjoint(A), adjoint(B)) == [6 -8 -6; 12 -9 -9; 18 -10 -12]
-    end
-    for Ai in (copy(AAi), view(AAi, 1:3, 1:3)), Bi in (copy(BBi), view(BBi, 1:3, 1:3))
-        @test Ai * Bi == [-44.75+13im 11.75-25im -38.25+30im; -47.75-16.5im -51.5+51.5im -56+6im; 16.75-4.5im -53.5+52im -15.5im]
-        @test *(adjoint(Ai), Bi) == [-21+2im -1.75+49im -51.25+19.5im; 25.5+56.5im -7-35.5im 22+35.5im; -3+12im -32.25+43im -34.75-2.5im]
-        @test *(Ai, adjoint(Bi)) == [-20.25+15.5im -28.75-54.5im 22.25+68.5im; -12.25+13im -15.5+75im -23+27im; 18.25+im 1.5+94.5im -27-54.5im]
-        @test *(adjoint(Ai), adjoint(Bi)) == [1+2im 20.75+9im -44.75+42im; 19.5+17.5im -54-36.5im 51-14.5im; 13+7.5im 11.25+31.5im -43.25-14.5im]
-        @test_throws DimensionMismatch [1 2 3; 0 0 0; 0 0 0] * [1 2 3]
-    end
-    for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers
-        @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB))
-    end
-    @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 4, 4), AA, BB)
-end
-
-# Generic AbstractArrays
-module MyArray15367
-using Test, Random
-
-struct MyArray{T,N} <: AbstractArray{T,N}
-    data::Array{T,N}
-end
-
-Base.size(A::MyArray) = size(A.data)
-Base.getindex(A::MyArray, indices...) = A.data[indices...]
-
-A = MyArray(rand(4, 5))
-b = rand(5)
-@test A * b ≈ A.data * b
-end
-
-@testset "Generic integer matrix multiplication" begin
-    AA = [1 2 3; 4 5 6] .- 3
-    BB = [2 -2; 3 -5; -4 7]
-    for A in (copy(AA), view(AA, 1:2, 1:3)), B in (copy(BB), view(BB, 1:3, 1:2))
-        @test A * B == [-7 9; -4 9]
-        @test *(transpose(A), transpose(B)) == [-6 -11 15; -6 -13 18; -6 -15 21]
-    end
-    AA = fill(1, 2, 100)
-    BB = fill(1, 100, 3)
-    for A in (copy(AA), view(AA, 1:2, 1:100)), B in (copy(BB), view(BB, 1:100, 1:3))
-        @test A * B == [100 100 100; 100 100 100]
-    end
-    AA = rand(1:20, 5, 5) .- 10
-    BB = rand(1:20, 5, 5) .- 10
-    CC = Matrix{Int}(undef, size(AA, 1), size(BB, 2))
-    for A in (copy(AA), view(AA, 1:5, 1:5)), B in (copy(BB), view(BB, 1:5, 1:5)), C in (copy(CC), view(CC, 1:5, 1:5))
-        @test *(transpose(A), B) == A' * B
-        @test *(A, transpose(B)) == A * B'
-        # Preallocated
-        @test mul!(C, A, B) == A * B
-        @test mul!(C, transpose(A), B) == A' * B
-        @test mul!(C, A, transpose(B)) == A * B'
-        @test mul!(C, transpose(A), transpose(B)) == A' * B'
-        @test LinearAlgebra.mul!(C, adjoint(A), transpose(B)) == A' * transpose(B)
-
-        # Inplace multiply-add
-        α = rand(-10:10)
-        β = rand(-10:10)
-        rand!(C, -10:10)
-        βC = β * C
-        _C0 = copy(C)
-        C0() = (C .= _C0; C)  # reset C but don't change the container type
-        @test mul!(C0(), A, B, α, β) == α * A * B .+ βC
-        @test mul!(C0(), transpose(A), B, α, β) == α * A' * B .+ βC
-        @test mul!(C0(), A, transpose(B), α, β) == α * A * B' .+ βC
-        @test mul!(C0(), transpose(A), transpose(B), α, β) == α * A' * B' .+ βC
-        @test mul!(C0(), adjoint(A), transpose(B), α, β) == α * A' * transpose(B) .+ βC
-
-        #test DimensionMismatch for generic_matmatmul
-        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(A), transpose(fill(1, 4, 4)))
-        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(fill(1, 4, 4)), transpose(B))
-    end
-    vv = [1, 2]
-    CC = Matrix{Int}(undef, 2, 2)
-    for v in (copy(vv), view(vv, 1:2)), C in (copy(CC), view(CC, 1:2, 1:2))
-        @test @inferred(mul!(C, v, adjoint(v))) == [1 2; 2 4]
-
-        C .= [1 0; 0 1]
-        @test @inferred(mul!(C, v, adjoint(v), 2, 3)) == [5 4; 4 11]
-    end
-end
-
-@testset "generic_matvecmul" begin
-    AA = rand(5, 5)
-    BB = rand(5)
-    for A in (copy(AA), view(AA, 1:5, 1:5)), B in (copy(BB), view(BB, 1:5))
-        @test_throws DimensionMismatch LinearAlgebra.generic_matvecmul!(zeros(6), 'N', A, B)
-        @test_throws DimensionMismatch LinearAlgebra.generic_matvecmul!(B, 'N', A, zeros(6))
-    end
-    vv = [1, 2, 3]
-    CC = Matrix{Int}(undef, 3, 3)
-    for v in (copy(vv), view(vv, 1:3)), C in (copy(CC), view(CC, 1:3, 1:3))
-        @test mul!(C, v, transpose(v)) == v * v'
-        C .= C0 = rand(-10:10, size(C))
-        @test mul!(C, v, transpose(v), 2, 3) == 2v * v' .+ 3C0
-    end
-    vvf = map(Float64, vv)
-    CC = Matrix{Float64}(undef, 3, 3)
-    for vf in (copy(vvf), view(vvf, 1:3)), C in (copy(CC), view(CC, 1:3, 1:3))
-        @test mul!(C, vf, transpose(vf)) == vf * vf'
-        C .= C0 = rand(eltype(C), size(C))
-        @test mul!(C, vf, transpose(vf), 2, 3) ≈ 2vf * vf' .+ 3C0
-    end
-end
-
-@testset "generic_matvecmul for vectors of vectors" begin
-    @testset "matrix of scalars" begin
-        u = [[1, 2], [3, 4]]
-        A = [1 2; 3 4]
-        v = [[0, 0], [0, 0]]
-        Au = [[7, 10], [15, 22]]
-        @test A * u == Au
-        mul!(v, A, u)
-        @test v == Au
-        mul!(v, A, u, 2, -1)
-        @test v == Au
-    end
-
-    @testset "matrix of matrices" begin
-        u = [[1, 2], [3, 4]]
-        A = Matrix{Matrix{Int}}(undef, 2, 2)
-        A[1, 1] = [1 2; 3 4]
-        A[1, 2] = [5 6; 7 8]
-        A[2, 1] = [9 10; 11 12]
-        A[2, 2] = [13 14; 15 16]
-        v = [[0, 0], [0, 0]]
-        Au = [[44, 64], [124, 144]]
-        @test A * u == Au
-        mul!(v, A, u)
-        @test v == Au
-        mul!(v, A, u, 2, -1)
-        @test v == Au
-    end
-end
-
-@testset "generic_matmatmul for matrices of vectors" begin
-    B = Matrix{Vector{Int}}(undef, 2, 2)
-    B[1, 1] = [1, 2]
-    B[2, 1] = [3, 4]
-    B[1, 2] = [5, 6]
-    B[2, 2] = [7, 8]
-    A = [1 2; 3 4]
-    C = Matrix{Vector{Int}}(undef, 2, 2)
-    AB = Matrix{Vector{Int}}(undef, 2, 2)
-    AB[1, 1] = [7, 10]
-    AB[2, 1] = [15, 22]
-    AB[1, 2] = [19, 22]
-    AB[2, 2] = [43, 50]
-    @test A * B == AB
-    mul!(C, A, B)
-    @test C == AB
-    mul!(C, A, B, 2, -1)
-    @test C == AB
-    LinearAlgebra._generic_matmatmul!(C, 'N', 'N', A, B, LinearAlgebra.MulAddMul(2, -1))
-    @test C == AB
-end
-
-@testset "fallbacks & such for BlasFloats" begin
-    AA = rand(Float64, 6, 6)
-    BB = rand(Float64, 6, 6)
-    CC = zeros(Float64, 6, 6)
-    for A in (copy(AA), view(AA, 1:6, 1:6)), B in (copy(BB), view(BB, 1:6, 1:6)), C in (copy(CC), view(CC, 1:6, 1:6))
-        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
-        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A * transpose(B)
-        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A) * B
-
-        # Inplace multiply-add
-        α = rand(Float64)
-        β = rand(Float64)
-        rand!(C)
-        βC = β * C
-        _C0 = copy(C)
-        C0() = (C .= _C0; C)  # reset C but don't change the container type
-        @test mul!(C0(), transpose(A), transpose(B), α, β) ≈ α * transpose(A) * transpose(B) .+ βC
-        @test mul!(C0(), A, adjoint(B), α, β) ≈ α * A * transpose(B) .+ βC
-        @test mul!(C0(), adjoint(A), B, α, β) ≈ α * transpose(A) * B .+ βC
-    end
-end
-
-@testset "mixed Blas-non-Blas matmul" begin
-    AA = rand(-10:10, 6, 6)
-    BB = rand(Float64, 6, 6)
-    CC = zeros(Float64, 6, 6)
-    for A in (copy(AA), view(AA, 1:6, 1:6)), B in (copy(BB), view(BB, 1:6, 1:6)), C in (copy(CC), view(CC, 1:6, 1:6))
-        @test LinearAlgebra.mul!(C, A, B) == A * B
-        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
-        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A * transpose(B)
-        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A) * B
-    end
-end
-
-@testset "matrix algebra with subarrays of floats (stride != 1)" begin
-    A = reshape(map(Float64, 1:20), 5, 4)
-    Aref = A[1:2:end, 1:2:end]
-    Asub = view(A, 1:2:5, 1:2:4)
-    b = [1.2, -2.5]
-    @test (Aref * b) == (Asub * b)
-    @test *(transpose(Asub), Asub) == *(transpose(Aref), Aref)
-    @test *(Asub, transpose(Asub)) == *(Aref, transpose(Aref))
-    Ai = A .+ im
-    Aref = Ai[1:2:end, 1:2:end]
-    Asub = view(Ai, 1:2:5, 1:2:4)
-    @test *(adjoint(Asub), Asub) == *(adjoint(Aref), Aref)
-    @test *(Asub, adjoint(Asub)) == *(Aref, adjoint(Aref))
-end
-
-@testset "matrix x matrix with negative stride" begin
-    M = reshape(map(Float64, 1:77), 7, 11)
-    N = reshape(map(Float64, 1:63), 9, 7)
-    U = view(M, 7:-1:1, 11:-2:1)
-    V = view(N, 7:-1:2, 7:-1:1)
-    @test U * V ≈ Matrix(U) * Matrix(V)
-end
-
-@testset "dot product of subarrays of vectors (floats, negative stride, issue #37767)" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        a = Vector{T}(3:2:7)
-        b = Vector{T}(1:10)
-        v = view(b, 7:-2:3)
-        @test dot(a, Vector(v)) ≈ 67.0
-        @test dot(a, v) ≈ 67.0
-        @test dot(v, a) ≈ 67.0
-        @test dot(Vector(v), Vector(v)) ≈ 83.0
-        @test dot(v, v) ≈ 83.0
-    end
-end
-
-@testset "dot product of stride-vector like input" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        a = randn(T, 10)
-        b = view(a, 1:10)
-        c = reshape(b, 5, 2)
-        d = view(c, :, 1:2)
-        r = sum(abs2, a)
-        for x in (a,b,c,d), y in (a,b,c,d)
-            @test dot(x, y) ≈ r
-        end
-    end
-end
-
-@testset "Complex matrix x real MatOrVec etc (issue #29224)" for T in (Float32, Float64)
-    A0 = randn(complex(T), 10, 10)
-    B0 = randn(T, 10, 10)
-    @testset "Combination Mat{$(complex(T))} Mat{$T}" for Bax1 in (1:5, 2:2:10), Bax2 in (1:5, 2:2:10)
-        B = view(A0, Bax1, Bax2)
-        tB = transpose(B)
-        Bd, tBd = copy(B), copy(tB)
-        for Aax1 in (1:5, 2:2:10, (:)), Aax2 in (1:5, 2:2:10)
-            A = view(A0, Aax1, Aax2)
-            AB_correct = copy(A) * Bd
-            AtB_correct = copy(A) * tBd
-            @test A*Bd ≈ AB_correct # view times matrix
-            @test A*B ≈ AB_correct # view times view
-            @test A*tBd ≈ AtB_correct # view times transposed matrix
-            @test A*tB ≈ AtB_correct # view times transposed view
-        end
-    end
-    x = randn(T, 10)
-    y0 = similar(A0, 20)
-    @testset "Combination Mat{$(complex(T))} Vec{$T}" for Aax1 in (1:5, 2:2:10, (:)), Aax2 in (1:5, 2:2:10)
-        A = view(A0, Aax1, Aax2)
-        Ad = copy(A)
-        for indx in (1:5, 1:2:10, 6:-1:2)
-            vx = view(x, indx)
-            dx = x[indx]
-            Ax_correct = Ad*dx
-            @test A*vx ≈ A*dx ≈ Ad*vx ≈ Ax_correct # view/matrix times view/vector
-            for indy in (1:2:2size(A,1), size(A,1):-1:1)
-                y = view(y0, indy)
-                @test mul!(y, A, vx) ≈ mul!(y, A, dx) ≈ mul!(y, Ad, vx) ≈
-                    mul!(y, Ad, dx) ≈ Ax_correct   # test for uncontiguous dest
-            end
-        end
-    end
-end
-
-@testset "real matrix x complex vec" begin
-    _matmulres(M, v) = [mapreduce(*, +, row, v) for row in eachrow(M)]
-    testmatmul(M, v) = @test M * v ≈ _matmulres(M, v)
-
-    @testset for T in (Float32, Float64), n = (4, 5)
-        M1 = reshape(Vector{T}(1:n^2), n, n)
-        M2 = reinterpret(reshape, T, [Tuple(T(i + j) for j in 1:n) for i in 1:n])
-        v = convert(Vector{Complex{T}}, (1:n) .+ im .* (4 .+ (1:n)))
-
-        for M in (M1, M2)
-            M_view_cont = @view M[:, :]
-            v_view_cont = @view v[:]
-            for _M in (M, M_view_cont), _v in (v, v_view_cont)
-                testmatmul(_M, _v)
-            end
-
-            # construct a view with strides(M, 1) == 1 and strides(M, 2) != 1
-            ax_noncont = 1:2:n
-            n1 = length(ax_noncont)
-            M_view_noncont = @view M[1:n1, ax_noncont]
-            v_view_noncont = @view v[ax_noncont]
-            testmatmul(M_view_noncont, v_view_noncont)
-
-            @testset for op in (transpose, adjoint)
-                for _M in (M, M_view_cont), _v in (v, v_view_cont)
-                    _M2 = op(_M)
-                    testmatmul(_M2, _v)
-                end
-                _M2 = op(M_view_noncont)
-                testmatmul(_M2, v_view_noncont)
-            end
-        end
-    end
-end
-
-@testset "matrix x vector with negative lda or 0 stride" for T in (Float32, Float64)
-    for TA in (T, complex(T)), TB in (T, complex(T))
-        A = view(randn(TA, 10, 10), 1:10, 10:-1:1) # negative lda
-        v = view([randn(TB)], 1 .+ 0(1:10)) # 0 stride
-        Ad, vd = copy(A), copy(v)
-        @test Ad * vd ≈ A * vd ≈ Ad * v ≈ A * v
-    end
-end
-
-@testset "issue #15286" begin
-    A = reshape(map(Float64, 1:20), 5, 4)
-    C = zeros(8, 8)
-    sC = view(C, 1:2:8, 1:2:8)
-    B = reshape(map(Float64, -9:10), 5, 4)
-    @test mul!(sC, transpose(A), A) == A' * A
-    @test mul!(sC, transpose(A), B) == A' * B
-
-    Aim = A .- im
-    C = zeros(ComplexF64, 8, 8)
-    sC = view(C, 1:2:8, 1:2:8)
-    B = reshape(map(Float64, -9:10), 5, 4) .+ im
-    @test mul!(sC, adjoint(Aim), Aim) == Aim' * Aim
-    @test mul!(sC, adjoint(Aim), B) == Aim' * B
-end
-
-@testset "syrk & herk" begin
-    AA = reshape(1:1503, 501, 3) .- 750.0
-    res = Float64[135228751 9979252 -115270247; 9979252 10481254 10983256; -115270247 10983256 137236759]
-    for A in (copy(AA), view(AA, 1:501, 1:3))
-        @test *(transpose(A), A) == res
-        @test *(adjoint(A), transpose(copy(A'))) == res
-    end
-    cutoff = 501
-    A = reshape(1:6*cutoff, 2 * cutoff, 3) .- (6 * cutoff) / 2
-    Asub = view(A, 1:2:2*cutoff, 1:3)
-    Aref = A[1:2:2*cutoff, 1:3]
-    @test *(transpose(Asub), Asub) == *(transpose(Aref), Aref)
-    Ai = A .- im
-    Asub = view(Ai, 1:2:2*cutoff, 1:3)
-    Aref = Ai[1:2:2*cutoff, 1:3]
-    @test *(adjoint(Asub), Asub) == *(adjoint(Aref), Aref)
-
-    A5x5, A6x5 = Matrix{Float64}.(undef, ((5, 5), (6, 5)))
-    @test_throws DimensionMismatch LinearAlgebra.syrk_wrapper!(A5x5, 'N', A6x5)
-    @test_throws DimensionMismatch LinearAlgebra.herk_wrapper!(A5x5, 'N', A6x5)
-end
-
-@testset "matmul for types w/o sizeof (issue #1282)" begin
-    AA = fill(complex(1, 1), 10, 10)
-    for A in (copy(AA), view(AA, 1:10, 1:10))
-        A2 = A^2
-        @test A2[1, 1] == 20im
-    end
-end
-
-@testset "mul! (scaling)" begin
-    A5x5, b5, C5x6 = Array{Float64}.(undef, ((5, 5), 5, (5, 6)))
-    for A in (A5x5, view(A5x5, :, :)), b in (b5, view(b5, :)), C in (C5x6, view(C5x6, :, :))
-        @test_throws DimensionMismatch mul!(A, Diagonal(b), C)
-    end
-end
-
-@testset "muladd" begin
-    A23 = reshape(1:6, 2, 3) .+ 0
-    B34 = reshape(1:12, 3, 4) .+ im
-    u2 = [10, 20]
-    v3 = [3, 5, 7] .+ im
-    w4 = [11, 13, 17, 19im]
-
-    @testset "matrix-matrix" begin
-        @test muladd(A23, B34, 0) == A23 * B34
-        @test muladd(A23, B34, 100) == A23 * B34 .+ 100
-        @test muladd(A23, B34, u2) == A23 * B34 .+ u2
-        @test muladd(A23, B34, w4') == A23 * B34 .+ w4'
-        @test_throws DimensionMismatch muladd(B34, A23, 1)
-        @test muladd(ones(1, 3), ones(3, 4), ones(1, 4)) == fill(4.0, 1, 4)
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3, 4), ones(9, 4))
-
-        # broadcasting fallback method allows trailing dims
-        @test muladd(A23, B34, ones(2, 4, 1)) == A23 * B34 + ones(2, 4, 1)
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3, 4), ones(9, 4, 1))
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3, 4), ones(1, 4, 9))
-        # and catches z::Array{T,0}
-        @test muladd(A23, B34, fill(0)) == A23 * B34
-    end
-    @testset "matrix-vector" begin
-        @test muladd(A23, v3, 0) == A23 * v3
-        @test muladd(A23, v3, 100) == A23 * v3 .+ 100
-        @test muladd(A23, v3, u2) == A23 * v3 .+ u2
-        @test muladd(A23, v3, im) isa Vector{Complex{Int}}
-        @test muladd(ones(1, 3), ones(3), ones(1)) == [4]
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3), ones(7))
-
-        # fallback
-        @test muladd(A23, v3, ones(2, 1, 1)) == A23 * v3 + ones(2, 1, 1)
-        @test_throws DimensionMismatch muladd(A23, v3, ones(2, 2))
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3), ones(7, 1))
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3), ones(1, 7))
-        @test muladd(A23, v3, fill(0)) == A23 * v3
-    end
-    @testset "adjoint-matrix" begin
-        @test muladd(v3', B34, 0) isa Adjoint
-        @test muladd(v3', B34, 2im) == v3' * B34 .+ 2im
-        @test muladd(v3', B34, w4') == v3' * B34 .+ w4'
-
-        # via fallback
-        @test muladd(v3', B34, ones(1, 4)) == (B34' * v3 + ones(4, 1))'
-        @test_throws DimensionMismatch muladd(v3', B34, ones(7, 4))
-        @test_throws DimensionMismatch muladd(v3', B34, ones(1, 4, 7))
-        @test muladd(v3', B34, fill(0)) == v3' * B34 # does not make an Adjoint
-    end
-    @testset "vector-adjoint" begin
-        @test muladd(u2, v3', 0) isa Matrix
-        @test muladd(u2, v3', 99) == u2 * v3' .+ 99
-        @test muladd(u2, v3', A23) == u2 * v3' .+ A23
-
-        @test muladd(u2, v3', ones(2, 3, 1)) == u2 * v3' + ones(2, 3, 1)
-        @test_throws DimensionMismatch muladd(u2, v3', ones(2, 3, 4))
-        @test_throws DimensionMismatch muladd([1], v3', ones(7, 3))
-        @test muladd(u2, v3', fill(0)) == u2 * v3'
-    end
-    @testset "dot" begin # all use muladd(::Any, ::Any, ::Any)
-        @test muladd(u2', u2, 0) isa Number
-        @test muladd(v3', v3, im) == dot(v3, v3) + im
-        @test muladd(u2', u2, [1]) == [dot(u2, u2) + 1]
-        @test_throws DimensionMismatch muladd(u2', u2, [1, 1]) == [dot(u2, u2) + 1]
-        @test muladd(u2', u2, fill(0)) == dot(u2, u2)
-    end
-    @testset "arrays of arrays" begin
-        vofm = [rand(1:9, 2, 2) for _ in 1:3]
-        Mofm = [rand(1:9, 2, 2) for _ in 1:3, _ in 1:3]
-
-        @test muladd(vofm', vofm, vofm[1]) == vofm' * vofm .+ vofm[1] # inner
-        @test muladd(vofm, vofm', Mofm) == vofm * vofm' .+ Mofm       # outer
-        @test muladd(vofm', Mofm, vofm') == vofm' * Mofm .+ vofm'     # bra-mat
-        @test muladd(Mofm, Mofm, vofm) == Mofm * Mofm .+ vofm         # mat-mat
-        @test muladd(Mofm, vofm, vofm) == Mofm * vofm .+ vofm         # mat-vec
-    end
-end
-
-@testset "muladd & structured matrices" begin
-    A33 = reshape(1:9, 3, 3) .+ im
-    v3 = [3, 5, 7im]
-
-    # no special treatment
-    @test muladd(Symmetric(A33), Symmetric(A33), 1) == Symmetric(A33) * Symmetric(A33) .+ 1
-    @test muladd(Hermitian(A33), Hermitian(A33), v3) == Hermitian(A33) * Hermitian(A33) .+ v3
-    @test muladd(adjoint(A33), transpose(A33), A33) == A33' * transpose(A33) .+ A33
-
-    u1 = muladd(UpperTriangular(A33), UpperTriangular(A33), Diagonal(v3))
-    @test u1 isa UpperTriangular
-    @test u1 == UpperTriangular(A33) * UpperTriangular(A33) + Diagonal(v3)
-
-    # diagonal
-    @test muladd(Diagonal(v3), Diagonal(A33), Diagonal(v3)).diag == ([1, 5, 9] .+ im .+ 1) .* v3
-
-    # uniformscaling
-    @test muladd(Diagonal(v3), I, I).diag == v3 .+ 1
-    @test muladd(2 * I, 3 * I, I).λ == 7
-    @test muladd(A33, A33', I) == A33 * A33' + I
-
-    # https://github.com/JuliaLang/julia/issues/38426
-    @test @evalpoly(A33, 1.0 * I, 1.0 * I) == I + A33
-    @test @evalpoly(A33, 1.0 * I, 1.0 * I, 1.0 * I) == I + A33 + A33^2
-end
-
-# issue #6450
-@test dot(Any[1.0, 2.0], Any[3.5, 4.5]) === 12.5
-
-@testset "dot" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    x = convert(Vector{elty}, [1.0, 2.0, 3.0])
-    y = convert(Vector{elty}, [3.5, 4.5, 5.5])
-    @test_throws DimensionMismatch dot(x, 1:2, y, 1:3)
-    @test_throws BoundsError dot(x, 1:4, y, 1:4)
-    @test_throws BoundsError dot(x, 1:3, y, 2:4)
-    @test dot(x, 1:2, y, 1:2) == convert(elty, 12.5)
-    @test transpose(x) * y == convert(elty, 29.0)
-    X = convert(Matrix{elty}, [1.0 2.0; 3.0 4.0])
-    Y = convert(Matrix{elty}, [1.5 2.5; 3.5 4.5])
-    @test dot(X, Y) == convert(elty, 35.0)
-    Z = Matrix{elty}[reshape(1:4, 2, 2), fill(1, 2, 2)]
-    @test dot(Z, Z) == convert(elty, 34.0)
-end
-
-dot1(x, y) = invoke(dot, Tuple{Any,Any}, x, y)
-dot2(x, y) = invoke(dot, Tuple{AbstractArray,AbstractArray}, x, y)
-@testset "generic dot" begin
-    AA = [1+2im 3+4im; 5+6im 7+8im]
-    BB = [2+7im 4+1im; 3+8im 6+5im]
-    for A in (copy(AA), view(AA, 1:2, 1:2)), B in (copy(BB), view(BB, 1:2, 1:2))
-        @test dot(A, B) == dot(vec(A), vec(B)) == dot1(A, B) == dot2(A, B) == dot(float.(A), float.(B))
-        @test dot(Int[], Int[]) == 0 == dot1(Int[], Int[]) == dot2(Int[], Int[])
-        @test_throws MethodError dot(Any[], Any[])
-        @test_throws MethodError dot1(Any[], Any[])
-        @test_throws MethodError dot2(Any[], Any[])
-        for n1 = 0:2, n2 = 0:2, d in (dot, dot1, dot2)
-            if n1 != n2
-                @test_throws DimensionMismatch d(1:n1, 1:n2)
-            else
-                @test d(1:n1, 1:n2) ≈ norm(1:n1)^2
-            end
-        end
-    end
-end
-
-@testset "Issue 11978" begin
-    A = Matrix{Matrix{Float64}}(undef, 2, 2)
-    A[1, 1] = Matrix(1.0I, 3, 3)
-    A[2, 2] = Matrix(1.0I, 2, 2)
-    A[1, 2] = Matrix(1.0I, 3, 2)
-    A[2, 1] = Matrix(1.0I, 2, 3)
-    b = Vector{Vector{Float64}}(undef, 2)
-    b[1] = fill(1.0, 3)
-    b[2] = fill(1.0, 2)
-    @test A * b == Vector{Float64}[[2, 2, 1], [2, 2]]
-end
-
-@test_throws ArgumentError LinearAlgebra.copytri!(Matrix{Float64}(undef, 10, 10), 'Z')
-
-@testset "Issue 30055" begin
-    B = [1+im 2+im 3+im; 4+im 5+im 6+im; 7+im 9+im im]
-    A = UpperTriangular(B)
-    @test copy(transpose(A)) == transpose(A)
-    @test copy(A') == A'
-    A = LowerTriangular(B)
-    @test copy(transpose(A)) == transpose(A)
-    @test copy(A') == A'
-    B = Matrix{Matrix{Complex{Int}}}(undef, 2, 2)
-    B[1, 1] = [1+im 2+im; 3+im 4+im]
-    B[2, 1] = [1+2im 1+3im; 1+3im 1+4im]
-    B[1, 2] = [7+im 8+2im; 9+3im 4im]
-    B[2, 2] = [9+im 8+im; 7+im 6+im]
-    A = UpperTriangular(B)
-    @test copy(transpose(A)) == transpose(A)
-    @test copy(A') == A'
-    A = LowerTriangular(B)
-    @test copy(transpose(A)) == transpose(A)
-    @test copy(A') == A'
-end
-
-@testset "gemv! and gemm_wrapper for $elty" for elty in [Float32, Float64, ComplexF64, ComplexF32]
-    A10x10, x10, x11 = Array{elty}.(undef, ((10, 10), 10, 11))
-    @test_throws DimensionMismatch LinearAlgebra.gemv!(x10, 'N', A10x10, x11)
-    @test_throws DimensionMismatch LinearAlgebra.gemv!(x11, 'N', A10x10, x10)
-    @test LinearAlgebra.gemv!(elty[], 'N', Matrix{elty}(undef, 0, 0), elty[]) == elty[]
-    @test LinearAlgebra.gemv!(x10, 'N', Matrix{elty}(undef, 10, 0), elty[]) == zeros(elty, 10)
-
-    I0x0 = Matrix{elty}(I, 0, 0)
-    I10x10 = Matrix{elty}(I, 10, 10)
-    I10x11 = Matrix{elty}(I, 10, 11)
-    @test LinearAlgebra.gemm_wrapper('N', 'N', I10x10, I10x10) == I10x10
-    @test_throws DimensionMismatch LinearAlgebra.gemm_wrapper!(I10x10, 'N', 'N', I10x11, I10x10)
-    @test_throws DimensionMismatch LinearAlgebra.gemm_wrapper!(I10x10, 'N', 'N', I0x0, I0x0)
-
-    A = rand(elty, 3, 3)
-    @test LinearAlgebra.matmul3x3('T', 'N', A, Matrix{elty}(I, 3, 3)) == transpose(A)
-end
-
-@testset "#13593, #13488" begin
-    aa = rand(3, 3)
-    bb = rand(3, 3)
-    for a in (copy(aa), view(aa, 1:3, 1:3)), b in (copy(bb), view(bb, 1:3, 1:3))
-        @test_throws ArgumentError mul!(a, a, b)
-        @test_throws ArgumentError mul!(a, b, a)
-        @test_throws ArgumentError mul!(a, a, a)
-    end
-end
-
-@testset "#35163" begin
-    # typemax(Int32) * Int32(1) + Int32(1) * Int32(1) should wrap around
-    # not promote to Int64, convert to Int32 and throw inexacterror
-    val = mul!(Int32[1], fill(typemax(Int32), 1, 1), Int32[1], Int32(1), Int32(1))
-    @test val[1] == typemin(Int32)
-end
-
-# Number types that lack conversion to the destination type
-struct RootInt
-    i::Int
-end
-import Base: *, adjoint, transpose
-import LinearAlgebra: Adjoint, Transpose
-(*)(x::RootInt, y::RootInt) = x.i * y.i
-adjoint(x::RootInt) = x
-transpose(x::RootInt) = x
-Adjoint(x::RootInt) = x
-Transpose(x::RootInt) = x
-# TODO once Adjoint/Transpose constructors call adjoint/transpose recursively
-# rather than Adjoint/Transpose, the additional definitions should become unnecessary
-
-@test Base.promote_op(*, RootInt, RootInt) === Int
-
-@testset "#14293" begin
-    a = [RootInt(3)]
-    C = [0;;]
-    mul!(C, a, transpose(a))
-    @test C[1] == 9
-    C = [1;;]
-    mul!(C, a, transpose(a), 2, 3)
-    @test C[1] == 21
-    a = [RootInt(2), RootInt(10)]
-    @test a * adjoint(a) == [4 20; 20 100]
-    A = [RootInt(3) RootInt(5)]
-    @test A * a == [56]
-end
-
-function test_mul(C, A, B)
-    mul!(C, A, B)
-    @test Array(A) * Array(B) ≈ C
-    @test A * B ≈ C
-
-    # This is similar to how `isapprox` choose `rtol` (when `atol=0`)
-    # but consider all number types involved:
-    rtol = max(rtoldefault.(real.(eltype.((C, A, B))))...)
-
-    rand!(C)
-    T = promote_type(eltype.((A, B))...)
-    α = rand(T)
-    β = rand(T)
-    βArrayC = β * Array(C)
-    βC = β * C
-    mul!(C, A, B, α, β)
-    @test α * Array(A) * Array(B) .+ βArrayC ≈ C rtol = rtol
-    @test α * A * B .+ βC ≈ C rtol = rtol
-end
-
-@testset "mul! vs * for special types" begin
-    eltypes = [Float32, Float64, Int64]
-    for k in [3, 4, 10]
-        T = rand(eltypes)
-        bi1 = Bidiagonal(rand(T, k), rand(T, k - 1), rand([:U, :L]))
-        bi2 = Bidiagonal(rand(T, k), rand(T, k - 1), rand([:U, :L]))
-        tri1 = Tridiagonal(rand(T, k - 1), rand(T, k), rand(T, k - 1))
-        tri2 = Tridiagonal(rand(T, k - 1), rand(T, k), rand(T, k - 1))
-        stri1 = SymTridiagonal(rand(T, k), rand(T, k - 1))
-        stri2 = SymTridiagonal(rand(T, k), rand(T, k - 1))
-        C = rand(T, k, k)
-        specialmatrices = (bi1, bi2, tri1, tri2, stri1, stri2)
-        for A in specialmatrices
-            B = specialmatrices[rand(1:length(specialmatrices))]
-            test_mul(C, A, B)
-        end
-        for S in specialmatrices
-            l = rand(1:6)
-            B = randn(k, l)
-            C = randn(k, l)
-            test_mul(C, S, B)
-            A = randn(l, k)
-            C = randn(l, k)
-            test_mul(C, A, S)
-        end
-    end
-    for T in eltypes
-        A = Bidiagonal(rand(T, 2), rand(T, 1), rand([:U, :L]))
-        B = Bidiagonal(rand(T, 2), rand(T, 1), rand([:U, :L]))
-        C = randn(2, 2)
-        test_mul(C, A, B)
-        B = randn(2, 9)
-        C = randn(2, 9)
-        test_mul(C, A, B)
-    end
-    let
-        tri44 = Tridiagonal(randn(3), randn(4), randn(3))
-        tri33 = Tridiagonal(randn(2), randn(3), randn(2))
-        full43 = randn(4, 3)
-        full24 = randn(2, 4)
-        full33 = randn(3, 3)
-        full44 = randn(4, 4)
-        @test_throws DimensionMismatch mul!(full43, tri44, tri33)
-        @test_throws DimensionMismatch mul!(full44, tri44, tri33)
-        @test_throws DimensionMismatch mul!(full44, tri44, full43)
-        @test_throws DimensionMismatch mul!(full43, tri33, full43)
-        @test_throws DimensionMismatch mul!(full43, full43, tri44)
-    end
-end
-
-# #18218
-module TestPR18218
-using Test
-import Base.*, Base.+, Base.zero
-struct TypeA
-    x::Int
-end
-Base.convert(::Type{TypeA}, x::Int) = TypeA(x)
-struct TypeB
-    x::Int
-end
-struct TypeC
-    x::Int
-end
-Base.convert(::Type{TypeC}, x::Int) = TypeC(x)
-zero(c::TypeC) = TypeC(0)
-zero(::Type{TypeC}) = TypeC(0)
-(*)(x::Int, a::TypeA) = TypeB(x * a.x)
-(*)(a::TypeA, x::Int) = TypeB(a.x * x)
-(+)(a::Union{TypeB,TypeC}, b::Union{TypeB,TypeC}) = TypeC(a.x + b.x)
-A = TypeA[1 2; 3 4]
-b = [1, 2]
-d = A * b
-@test typeof(d) == Vector{TypeC}
-@test d == TypeC[5, 11]
-end
-
-@testset "VecOrMat of Vectors" begin
-    X = rand(ComplexF64, 3, 3)
-    Xv1 = [X[:, j] for i in 1:1, j in 1:3]
-    Xv2 = [transpose(X[i, :]) for i in 1:3]
-    Xv3 = [transpose(X[i, :]) for i in 1:3, j in 1:1]
-
-    XX = X * X
-    XtX = transpose(X) * X
-    XcX = X' * X
-    XXt = X * transpose(X)
-    XtXt = transpose(XX)
-    XcXt = X' * transpose(X)
-    XXc = X * X'
-    XtXc = transpose(X) * X'
-    XcXc = X' * X'
-
-    @test (Xv1*Xv2)[1] ≈ XX
-    @test (Xv1*Xv3)[1] ≈ XX
-    @test transpose(Xv1) * Xv1 ≈ XtX
-    @test transpose(Xv2) * Xv2 ≈ XtX
-    @test (transpose(Xv3)*Xv3)[1] ≈ XtX
-    @test Xv1' * Xv1 ≈ XcX
-    @test Xv2' * Xv2 ≈ XcX
-    @test (Xv3'*Xv3)[1] ≈ XcX
-    @test (Xv1*transpose(Xv1))[1] ≈ XXt
-    @test Xv2 * transpose(Xv2) ≈ XXt
-    @test Xv3 * transpose(Xv3) ≈ XXt
-    @test transpose(Xv1) * transpose(Xv2) ≈ XtXt
-    @test transpose(Xv1) * transpose(Xv3) ≈ XtXt
-    @test Xv1' * transpose(Xv2) ≈ XcXt
-    @test Xv1' * transpose(Xv3) ≈ XcXt
-    @test (Xv1*Xv1')[1] ≈ XXc
-    @test Xv2 * Xv2' ≈ XXc
-    @test Xv3 * Xv3' ≈ XXc
-    @test transpose(Xv1) * Xv2' ≈ XtXc
-    @test transpose(Xv1) * Xv3' ≈ XtXc
-    @test Xv1' * Xv2' ≈ XcXc
-    @test Xv1' * Xv3' ≈ XcXc
-end
-
-@testset "method ambiguity" begin
-    # Ambiguity test is run inside a clean process.
-    # https://github.com/JuliaLang/julia/issues/28804
-    script = joinpath(@__DIR__, "ambiguous_exec.jl")
-    cmd = `$(Base.julia_cmd()) --startup-file=no $script`
-    @test success(pipeline(cmd; stdout = stdout, stderr = stderr))
-end
-
-struct A32092
-    x::Float64
-end
-Base.:+(x::Float64, a::A32092) = x + a.x
-Base.:*(x::Float64, a::A32092) = x * a.x
-@testset "Issue #32092" begin
-    @test ones(2, 2) * [A32092(1.0), A32092(2.0)] == fill(3.0, (2,))
-end
-
-@testset "strong zero" begin
-    @testset for α in Any[false, 0.0, 0], n in 1:4
-        C = ones(n, n)
-        A = fill!(zeros(n, n), NaN)
-        B = ones(n, n)
-        @test mul!(copy(C), A, B, α, 1.0) == C
-    end
-end
-
-@testset "CartesianIndex handling in _modify!" begin
-    C = rand(10, 10)
-    A = rand(10, 10)
-    @test mul!(view(C, 1:10, 1:10), A, 0.5) == A * 0.5
-end
-
-@testset "Issue #33214: tiled generic mul!" begin
-    n = 100
-    A = rand(n, n)
-    B = rand(n, n)
-    C = zeros(n, n)
-    mul!(C, A, B, -1 + 0im, 0)
-    D = -A * B
-    @test D ≈ C
-
-    # Just in case dispatching on the surface API `mul!` is changed in the future,
-    # let's test the function where the tiled multiplication is defined.
-    fill!(C, 0)
-    LinearAlgebra._generic_matmatmul!(C, 'N', 'N', A, B, LinearAlgebra.MulAddMul(-1, 0))
-    @test D ≈ C
-end
-
-@testset "size zero types in matrix mult (see issue 39362)" begin
-    A = [missing missing; missing missing]
-    v = [missing, missing]
-    @test (A * v == v) === missing
-    M = fill(1.0, 2, 2)
-    a = fill(missing, 2, 1)
-    @test (a' * M * a == fill(missing, 1, 1)) === missing
-end
-
-
-@testset "multiplication of empty matrices without calling zero" begin
-    r, c = rand(0:9, 2)
-    A = collect(Number, rand(r, c))
-    B = rand(c, 0)
-    C = A * B
-    @test size(C) == (r, 0)
-    @test_throws MethodError zero(eltype(C))
-end
-
-@testset "Issue #33873: genmatmul! with empty operands" begin
-    @test Matrix{Any}(undef, 0, 2) * Matrix{Any}(undef, 2, 3) == Matrix{Any}(undef, 0, 3)
-    @test_throws MethodError Matrix{Any}(undef, 2, 0) * Matrix{Any}(undef, 0, 3)
-    @test Matrix{Int}(undef, 2, 0) * Matrix{Int}(undef, 0, 3) == zeros(Int, 2, 3)
-end
-
-@testset "3-arg *, order by type" begin
-    x = [1, 2im]
-    y = [im, 20, 30 + 40im]
-    z = [-1, 200 + im, -3]
-    A = [1 2 3im; 4 5 6+im]
-    B = [-10 -20; -30 -40]
-    a = 3 + im * round(Int, 10^6 * (pi - 3))
-    b = 123
-
-    @test x' * A * y == (x' * A) * y == x' * (A * y)
-    @test y' * A' * x == (y' * A') * x == y' * (A' * x)
-    @test y' * transpose(A) * x == (y' * transpose(A)) * x == y' * (transpose(A) * x)
-
-    @test B * A * y == (B * A) * y == B * (A * y)
-
-    @test a * A * y == (a * A) * y == a * (A * y)
-    @test A * y * a == (A * y) * a == A * (y * a)
-
-    @test a * B * A == (a * B) * A == a * (B * A)
-    @test B * A * a == (B * A) * a == B * (A * a)
-
-    @test a * y' * z == (a * y') * z == a * (y' * z)
-    @test y' * z * a == (y' * z) * a == y' * (z * a)
-
-    @test a * y * z' == (a * y) * z' == a * (y * z')
-    @test y * z' * a == (y * z') * a == y * (z' * a)
-
-    @test a * x' * A == (a * x') * A == a * (x' * A)
-    @test x' * A * a == (x' * A) * a == x' * (A * a)
-    @test a * x' * A isa Adjoint{<:Any,<:Vector}
-
-    @test a * transpose(x) * A == (a * transpose(x)) * A == a * (transpose(x) * A)
-    @test transpose(x) * A * a == (transpose(x) * A) * a == transpose(x) * (A * a)
-    @test a * transpose(x) * A isa Transpose{<:Any,<:Vector}
-
-    @test x' * B * A == (x' * B) * A == x' * (B * A)
-    @test x' * B * A isa Adjoint{<:Any,<:Vector}
-
-    @test y * x' * A == (y * x') * A == y * (x' * A)
-    y31 = reshape(y, 3, 1)
-    @test y31 * x' * A == (y31 * x') * A == y31 * (x' * A)
-
-    vm = [rand(1:9, 2, 2) for _ in 1:3]
-    Mm = [rand(1:9, 2, 2) for _ in 1:3, _ in 1:3]
-
-    @test vm' * Mm * vm == (vm' * Mm) * vm == vm' * (Mm * vm)
-    @test Mm * Mm' * vm == (Mm * Mm') * vm == Mm * (Mm' * vm)
-    @test vm' * Mm * Mm == (vm' * Mm) * Mm == vm' * (Mm * Mm)
-    @test Mm * Mm' * Mm == (Mm * Mm') * Mm == Mm * (Mm' * Mm)
-end
-
-@testset "3-arg *, order by size" begin
-    M44 = randn(4, 4)
-    M24 = randn(2, 4)
-    M42 = randn(4, 2)
-    @test M44 * M44 * M44 ≈ (M44 * M44) * M44 ≈ M44 * (M44 * M44)
-    @test M42 * M24 * M44 ≈ (M42 * M24) * M44 ≈ M42 * (M24 * M44)
-    @test M44 * M42 * M24 ≈ (M44 * M42) * M24 ≈ M44 * (M42 * M24)
-end
-
-@testset "4-arg *, by type" begin
-    y = [im, 20, 30 + 40im]
-    z = [-1, 200 + im, -3]
-    a = 3 + im * round(Int, 10^6 * (pi - 3))
-    b = 123
-    M = rand(vcat(1:9, im .* [1, 2, 3]), 3, 3)
-    N = rand(vcat(1:9, im .* [1, 2, 3]), 3, 3)
-
-    @test a * b * M * y == (a * b) * (M * y)
-    @test a * b * M * N == (a * b) * (M * N)
-    @test a * M * N * y == (a * M) * (N * y)
-    @test a * y' * M * z == (a * y') * (M * z)
-    @test a * y' * M * N == (a * y') * (M * N)
-
-    @test M * y * a * b == (M * y) * (a * b)
-    @test M * N * a * b == (M * N) * (a * b)
-    @test M * N * y * a == (a * M) * (N * y)
-    @test y' * M * z * a == (a * y') * (M * z)
-    @test y' * M * N * a == (a * y') * (M * N)
-
-    @test M * N * conj(M) * y == (M * N) * (conj(M) * y)
-    @test y' * M * N * conj(M) == (y' * M) * (N * conj(M))
-    @test y' * M * N * z == (y' * M) * (N * z)
-end
-
-@testset "4-arg *, by size" begin
-    for shift in 1:5
-        s1, s2, s3, s4, s5 = circshift(3:7, shift)
-        a = randn(s1, s2)
-        b = randn(s2, s3)
-        c = randn(s3, s4)
-        d = randn(s4, s5)
-
-        # _quad_matmul
-        @test *(a, b, c, d) ≈ (a * b) * (c * d)
-
-        # _tri_matmul(A,B,B,δ)
-        @test *(11.1, b, c, d) ≈ (11.1 * b) * (c * d)
-        @test *(a, b, c, 99.9) ≈ (a * b) * (c * 99.9)
-    end
-end
-
-@testset "Issue #46865: mul!() with non-const alpha, beta" begin
-    f!(C,A,B,alphas,betas) = mul!(C, A, B, alphas[1], betas[1])
-    alphas = [1.0]
-    betas = [0.5]
-    for d in [2,3,4]  # test native small-matrix cases as well as BLAS
-        A = rand(d,d)
-        B = copy(A)
-        C = copy(A)
-        f!(C, A, B, alphas, betas)
-        @test_broken (@allocated f!(C, A, B, alphas, betas)) == 0
-    end
-end
-
-end # module TestMatmul
diff --git a/stdlib/LinearAlgebra/test/pinv.jl b/stdlib/LinearAlgebra/test/pinv.jl
deleted file mode 100644
index c7268865a0505..0000000000000
--- a/stdlib/LinearAlgebra/test/pinv.jl
+++ /dev/null
@@ -1,186 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestPinv
-
-using Test, LinearAlgebra, Random
-
-Random.seed!(12345)
-
-function hilb(T::Type, n::Integer)
-    a = Matrix{T}(undef, n, n)
-    for i=1:n
-        for j=1:n
-            a[j,i]=one(T)/(i+j-one(T))
-        end
-    end
-    return a
-end
-hilb(n::Integer) = hilb(Float64,n)
-
-function hilb(T::Type, m::Integer, n::Integer)
-    a = Matrix{T}(undef, m, n)
-    for i=1:n
-        for j=1:m
-            a[j,i]=one(T)/(i+j-one(T))
-        end
-    end
-    return a
-end
-hilb(m::Integer, n::Integer) = hilb(Float64,m,n)
-
-function onediag(T::Type, m::Integer, n::Integer)
-    a=zeros(T,m,n)
-    for i=1:min(n,m)
-        a[i,i]=one(T)/(float(i)^5)
-    end
-    a[1,1] = 0
-    a[min(m,n),min(m,n)] = 0
-    return a
-end
-onediag(m::Integer, n::Integer) = onediag(Float64, m::Integer, n::Integer)
-
-function onediag_sparse(T::Type, n::Integer)
-    a=zeros(T,n)
-    for i=1:n
-        a[i]=one(T)/(float(i)^5)
-    end
-    a[1] = 0
-    a[n] = 0
-    return Diagonal(a)
-end
-onediag_sparse(n::Integer) = onediag_sparse(Float64, n::Integer)
-
-function tridiag(T::Type, m::Integer, n::Integer)
-    a=zeros(T,m,n)
-    for i=1:min(n,m)
-        a[i,i]=one(T)/(float(i)^5)
-    end
-    for i=1:min(n,m)-1
-        a[i+1,i]=2*one(T)/(float(i)^5)
-        a[1,i+1]=2*one(T)/(float(i)^5)
-    end
-    return a
-end
-tridiag(m::Integer, n::Integer) = tridiag(Float64, m::Integer, n::Integer)
-
-function test_pinv(a,tol1,tol2)
-    m,n = size(a)
-
-    apinv = @inferred pinv(a)
-    @test size(apinv) == (n,m)
-    @test norm(a*apinv*a-a)/norm(a) ≈ 0 atol=tol1
-    @test norm(apinv*a*apinv-apinv)/norm(apinv) ≈ 0 atol=tol1
-    b = a*randn(n)
-    x = apinv*b
-    @test norm(a*x-b)/norm(b) ≈ 0 atol=tol1
-
-    apinv = @inferred pinv(a,sqrt(eps(real(one(eltype(a))))))
-    @test size(apinv) == (n,m)
-    @test norm(a*apinv*a-a)/norm(a) ≈ 0 atol=tol2
-    @test norm(apinv*a*apinv-apinv)/norm(apinv) ≈ 0 atol=tol2
-    b = a*randn(n)
-    x = apinv*b
-    @test norm(a*x-b)/norm(b) ≈ 0 atol=tol2
-end
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64)
-    @testset for (m, n) in [(1000, 100), (100, 100), (100, 1000)]
-        default_tol = (real(one(eltya))) * max(m,n) * 10
-        tol1 = 1e-2
-        tol2 = 1e-5
-        if real(eltya) == Float32
-            tol1 = 1e0
-            tol2 = 1e-2
-        end
-        @testset "dense/ill-conditioned matrix" begin
-            a = hilb(eltya, m, n)
-            test_pinv(a, tol1, tol2)
-        end
-        @testset "dense/diagonal matrix" begin
-            a = onediag(eltya, m, n)
-            test_pinv(a, default_tol, default_tol)
-        end
-        @testset "dense/tri-diagonal matrix" begin
-            a = tridiag(eltya, m, n)
-            test_pinv(a, default_tol, tol2)
-        end
-        @testset "Diagonal matrix" begin
-            a = onediag_sparse(eltya, m)
-            test_pinv(a, default_tol, default_tol)
-        end
-        @testset "Vector" begin
-            a = rand(eltya, m)
-            apinv = @inferred pinv(a)
-            @test pinv(hcat(a)) ≈ apinv
-            @test isa(apinv, eltya <: Complex ? Adjoint{eltya} : Transpose{eltya})
-        end
-        @testset "Adjoint/Transpose vector" begin
-            a = rand(eltya, m)'
-            apinv = @inferred pinv(a)
-            @test pinv(vcat(a)) ≈ apinv
-            @test apinv isa Vector{eltya}
-        end
-    end
-
-    @testset "zero valued numbers/vectors/matrices" begin
-        a = pinv(zero(eltya))
-        @test a ≈ 0.0
-
-        a = pinv([zero(eltya); zero(eltya)])
-        @test a[1] ≈ 0.0
-        @test a[2] ≈ 0.0
-
-        a = pinv([zero(eltya); zero(eltya)]')
-        @test a[1] ≈ 0.0
-        @test a[2] ≈ 0.0
-
-        a = pinv(Diagonal([zero(eltya); zero(eltya)]))
-        @test a.diag[1] ≈ 0.0
-        @test a.diag[2] ≈ 0.0
-    end
-
-    @testset "hermitian matrices" begin
-        Q = ones(2,2)
-        C = pinv(Hermitian(Q))/0.25
-        @test C ≈ ones(2,2)
-    end
-
-    @testset "non-square diagonal matrices" begin
-        A = eltya[1 0 ; 0 1 ; 0 0]
-        B = pinv(A)
-        @test A*B*A ≈ A
-        @test B*A*B ≈ B
-
-        A = eltya[1 0 0 ; 0 1 0]
-        B = pinv(A)
-        @test A*B*A ≈ A
-        @test B*A*B ≈ B
-    end
-
-    if eltya <: LinearAlgebra.BlasReal
-        @testset "sub-normal numbers/vectors/matrices" begin
-            a = pinv(floatmin(eltya)/100)
-            @test a ≈ 0.0
-            # Complex subnormal
-            a = pinv(floatmin(eltya)/100*(1+1im))
-            @test a ≈ 0.0
-
-            a = pinv([floatmin(eltya); floatmin(eltya)]/100)
-            @test a[1] ≈ 0.0
-            @test a[2] ≈ 0.0
-            # Complex subnormal
-            a = pinv([floatmin(eltya); floatmin(eltya)]/100*(1+1im))
-            @test a[1] ≈ 0.0
-            @test a[2] ≈ 0.0
-            a = pinv(Diagonal([floatmin(eltya); floatmin(eltya)]/100))
-            @test a.diag[1] ≈ 0.0
-            @test a.diag[2] ≈ 0.0
-            # Complex subnormal
-            a = pinv(Diagonal([floatmin(eltya); floatmin(eltya)]/100*(1+1im)))
-            @test a.diag[1] ≈ 0.0
-            @test a.diag[2] ≈ 0.0
-        end
-    end
-end
-
-end # module TestPinv
diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl
deleted file mode 100644
index 184971da304f7..0000000000000
--- a/stdlib/LinearAlgebra/test/qr.jl
+++ /dev/null
@@ -1,507 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestQR
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted, rmul!, lmul!
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(1234325)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-a2real = randn(n,n)/2
-a2img  = randn(n,n)/2
-breal = randn(n,2)/2
-bimg  = randn(n,2)/2
-
-# helper functions to unambiguously recover explicit forms of an implicit QR Q
-squareQ(Q::LinearAlgebra.AbstractQ) = Q*I
-rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q)
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-    raw_a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    raw_a2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-    asym = raw_a' + raw_a                  # symmetric indefinite
-    apd  = raw_a' * raw_a                 # symmetric positive-definite
-    ε = εa = eps(abs(float(one(eltya))))
-
-    @testset for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        raw_b = eltyb == Int ? rand(1:5, n, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-        εb = eps(abs(float(one(eltyb))))
-        ε = max(εa, εb)
-        tab = promote_type(eltya, eltyb)
-
-        @testset "QR decomposition of a Number" begin
-            α = rand(eltyb)
-            aα = fill(α, 1, 1)
-            @test qr(α).Q * qr(α).R ≈ qr(aα).Q * qr(aα).R
-            @test abs(qr(α).Q[1,1]) ≈ one(eltyb)
-        end
-
-        for (a, b) in ((raw_a, raw_b),
-               (view(raw_a, 1:n-1, 1:n-1), view(raw_b, 1:n-1, 1)))
-            a_1 = size(a, 1)
-            @testset "QR decomposition (without pivoting)" begin
-                qra   = @inferred qr(a)
-                q, r  = qra.Q, qra.R
-                @test_throws ErrorException qra.Z
-                @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
-                @test q*squareQ(q)' ≈ Matrix(I, a_1, a_1)
-                @test q'*Matrix(1.0I, a_1, a_1)' ≈ squareQ(q)'
-                @test squareQ(q)'q ≈ Matrix(I, a_1, a_1)
-                @test Matrix(1.0I, a_1, a_1)'q' ≈ squareQ(q)'
-                @test q*r ≈ a
-                @test a*(qra\b) ≈ b atol=3000ε
-                @test Array(qra) ≈ a
-                sq = size(q.factors, 2)
-                @test *(Matrix{eltyb}(I, sq, sq), adjoint(q)) * squareQ(q) ≈ Matrix(I, sq, sq) atol=5000ε
-                if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab}, q))
-                    ac = copy(a)
-                    @test qr!(a[:, 1:5])\b == qr!(view(ac, :, 1:5))\b
-                end
-                qrstring = sprint((t, s) -> show(t, "text/plain", s), qra)
-                rstring  = sprint((t, s) -> show(t, "text/plain", s), r)
-                qstring  = sprint((t, s) -> show(t, "text/plain", s), q)
-                @test qrstring == "$(summary(qra))\nQ factor: $qstring\nR factor:\n$rstring"
-                # iterate
-                q, r = qra
-                @test q*r ≈ a
-                # property names
-                @test Base.propertynames(qra)       == (:R, :Q)
-            end
-            @testset "Thin QR decomposition (without pivoting)" begin
-                qra   = @inferred qr(a[:, 1:n1], NoPivot())
-                q,r   = qra.Q, qra.R
-                @test_throws ErrorException qra.Z
-                @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
-                @test q'*rectangularQ(q) ≈ Matrix(I, a_1, n1)
-                @test q*r ≈ a[:, 1:n1]
-                @test q*b[1:n1] ≈ rectangularQ(q)*b[1:n1] atol=100ε
-                @test q*b ≈ squareQ(q)*b atol=100ε
-                if eltya != Int
-                    @test Array{eltya}(q) ≈ rectangularQ(q)
-                end
-                @test_throws DimensionMismatch q*b[1:n1 + 1]
-                @test_throws DimensionMismatch b[1:n1 + 1]*q'
-                sq = size(q.factors, 2)
-                @test *(UpperTriangular(Matrix{eltyb}(I, sq, sq)), adjoint(q))*squareQ(q) ≈ Matrix(I, n1, a_1) atol=5000ε
-                if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
-                end
-                # iterate
-                q, r = qra
-                @test q*r ≈ a[:, 1:n1]
-                # property names
-                @test Base.propertynames(qra)       == (:R, :Q)
-            end
-            @testset "(Automatic) Fat (pivoted) QR decomposition" begin
-                @inferred qr(a, ColumnNorm())
-
-                qrpa  = factorize(a[1:n1,:])
-                q,r = qrpa.Q, qrpa.R
-                @test_throws ErrorException qrpa.Z
-                p = qrpa.p
-                @test q'*squareQ(q) ≈ Matrix(I, n1, n1)
-                @test q*squareQ(q)' ≈ Matrix(I, n1, n1)
-                sq = size(q, 2);
-                @test (UpperTriangular(Matrix{eltya}(I, sq, sq))*q')*squareQ(q) ≈ Matrix(I, n1, n1)
-                @test q*r ≈ (isa(qrpa,QRPivoted) ? a[1:n1,p] : a[1:n1,:])
-                @test q*r[:,invperm(p)] ≈ a[1:n1,:]
-                @test q*r*transpose(qrpa.P) ≈ a[1:n1,:]
-                @test a[1:n1,:]*(qrpa\b[1:n1]) ≈ b[1:n1] atol=5000ε
-                @test Array(qrpa) ≈ a[1:5,:]
-                if eltya != Int
-                    @test Array{eltya}(q) ≈ Matrix(q)
-                end
-                @test_throws DimensionMismatch q*b[1:n1+1]
-                @test_throws DimensionMismatch b[1:n1+1]*q'
-                if eltya != Int
-                    @test Matrix{eltyb}(I, n1, n1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
-                end
-                # iterate
-                q, r, p = qrpa
-                @test q*r[:,invperm(p)] ≈ a[1:n1,:]
-                # property names
-                @test Base.propertynames(qrpa)       == (:R, :Q, :p, :P)
-            end
-            @testset "(Automatic) Thin (pivoted) QR decomposition" begin
-                qrpa  = factorize(a[:,1:n1])
-                q,r = qrpa.Q, qrpa.R
-                @test_throws ErrorException qrpa.Z
-                p = qrpa.p
-                @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
-                @test q*squareQ(q)' ≈ Matrix(I, a_1, a_1)
-                @test q*r ≈ a[:,p]
-                @test q*r[:,invperm(p)] ≈ a[:,1:n1]
-                @test Array(qrpa) ≈ a[:,1:5]
-                if eltya != Int
-                    @test Array{eltya}(q) ≈ Matrix(q)
-                end
-                @test_throws DimensionMismatch q*b[1:n1+1]
-                @test_throws DimensionMismatch b[1:n1+1]*q'
-                sq = size(q.factors, 2)
-                @test *(UpperTriangular(Matrix{eltyb}(I, sq, sq)), adjoint(q))*squareQ(q) ≈ Matrix(I, n1, a_1) atol=5000ε
-                if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
-                end
-                qrstring = sprint((t, s) -> show(t, "text/plain", s), qrpa)
-                rstring  = sprint((t, s) -> show(t, "text/plain", s), r)
-                qstring  = sprint((t, s) -> show(t, "text/plain", s), q)
-                pstring  = sprint((t, s) -> show(t, "text/plain", s), p)
-                @test qrstring == "$(summary(qrpa))\nQ factor: $qstring\nR factor:\n$rstring\npermutation:\n$pstring"
-                # iterate
-                q, r, p = qrpa
-                @test q*r[:,invperm(p)] ≈ a[:,1:n1]
-                # property names
-                @test Base.propertynames(qrpa)       == (:R, :Q, :p, :P)
-            end
-        end
-        if eltya != Int
-            @testset "Matmul with QR factorizations" begin
-                a = raw_a
-                qrpa = factorize(a[:,1:n1])
-                q, r = qrpa.Q, qrpa.R
-                @test rmul!(copy(squareQ(q)'), q) ≈ Matrix(I, n, n)
-                @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),q)
-                @test rmul!(squareQ(q), adjoint(q)) ≈ Matrix(I, n, n)
-                @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1), adjoint(q))
-                @test_throws ErrorException size(q,-1)
-                @test_throws DimensionMismatch LinearAlgebra.lmul!(q,zeros(eltya,n1+1))
-                @test_throws DimensionMismatch LinearAlgebra.lmul!(adjoint(q), zeros(eltya,n1+1))
-
-                b = similar(a); rand!(b)
-                c = similar(a)
-                d = similar(a[:,1:n1])
-                @test mul!(c, q, b) ≈ q*b
-                @test mul!(d, q, r) ≈ q*r ≈ a[:,qrpa.p]
-                @test mul!(c, q', b) ≈ q'*b
-                @test mul!(d, q', a[:,qrpa.p])[1:n1,:] ≈ r
-                @test all(x -> abs(x) < ε*norm(a), d[n1+1:end,:])
-                @test mul!(c, b, q) ≈ b*q
-                @test mul!(c, b, q') ≈ b*q'
-                @test_throws DimensionMismatch mul!(Matrix{eltya}(I, n+1, n), q, b)
-
-                qra = qr(a[:,1:n1], NoPivot())
-                q, r = qra.Q, qra.R
-                @test rmul!(copy(squareQ(q)'), q) ≈ Matrix(I, n, n)
-                @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),q)
-                @test rmul!(squareQ(q), adjoint(q)) ≈ Matrix(I, n, n)
-                @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),adjoint(q))
-                @test_throws ErrorException size(q,-1)
-                @test_throws DimensionMismatch q * Matrix{Int8}(I, n+4, n+4)
-
-                @test mul!(c, q, b) ≈ q*b
-                @test mul!(d, q, r) ≈ a[:,1:n1]
-                @test mul!(c, q', b) ≈ q'*b
-                @test mul!(d, q', a[:,1:n1])[1:n1,:] ≈ r
-                @test all(x -> abs(x) < ε*norm(a), d[n1+1:end,:])
-                @test mul!(c, b, q) ≈ b*q
-                @test mul!(c, b, q') ≈ b*q'
-                @test_throws DimensionMismatch mul!(Matrix{eltya}(I, n+1, n), q, b)
-
-                b = similar(a[:,1]); rand!(b)
-                c = similar(a[:,1])
-                d = similar(a[:,1])
-                @test mul!(c, q, b) ≈ q*b
-                @test mul!(c, q', b) ≈ q'*b
-                @test_throws DimensionMismatch mul!(Vector{eltya}(undef, n+1), q, b)
-            end
-        end
-    end
-end
-
-@testset "transpose errors" begin
-    @test_throws ArgumentError transpose(qr(randn(ComplexF64,3,3)))
-    @test_throws ArgumentError transpose(qr(randn(ComplexF64,3,3), NoPivot()))
-    @test_throws ArgumentError transpose(qr(big.(randn(ComplexF64,3,3))))
-end
-
-@testset "Issue 7304" begin
-    A = [-√.5 -√.5; -√.5 √.5]
-    Q = rectangularQ(qr(A).Q)
-    @test norm(A-Q) < eps()
-end
-
-@testset "qr on AbstractVector" begin
-    vr = [3.0, 4.0]
-    for Tr in (Float32, Float64)
-        for T in (Tr, Complex{Tr})
-            v = convert(Vector{T}, vr)
-            nv, nm = qr(v)
-            @test norm(nv*Matrix(I, (2,2)) - [-0.6 -0.8; -0.8 0.6], Inf) < eps(Tr)
-            @test nm == fill(-5.0, 1, 1)
-        end
-    end
-end
-
-@testset "QR on Ints" begin
-    # not sure what to do about this edge case now that we build decompositions
-    # for qr(...), so for now just commenting this out
-    # @test qr(Int[]) == (Int[],1)
-
-    B = rand(7,2)
-    @test (1:7)\B ≈ Vector(1:7)\B
-end
-
-@testset "Issue 16520" begin
-    @test_throws DimensionMismatch rand(3,2)\(1:5)
-end
-
-@testset "Issue 22810" begin
-    A = zeros(1, 2)
-    B = zeros(1, 1)
-    @test A \ B == zeros(2, 1)
-    @test qr(A, ColumnNorm()) \ B == zeros(2, 1)
-end
-
-@testset "Issue 24107" begin
-    A = rand(200,2)
-    @test A \ range(0, stop=1, length=200) == A \ Vector(range(0, stop=1, length=200))
-end
-
-@testset "Issue 24589. Promotion of rational matrices" begin
-    A = rand(1//1:5//5, 4,3)
-    @test Matrix(first(qr(A))) == Matrix(first(qr(float(A))))
-end
-
-@testset "Issue Test Factorization fallbacks for rectangular problems" begin
-    A  = randn(3,2)
-    Ac = copy(A')
-    b  = randn(3)
-    b0 = copy(b)
-    c  = randn(2)
-    B  = randn(3,3)
-    B0 = copy(B)
-    C  = randn(2,3)
-    @test A \b ≈ ldiv!(c, qr(A ), b)
-    @test b == b0
-    @test A \B ≈ ldiv!(C, qr(A ), B)
-    @test B == B0
-    c0 = copy(c)
-    C0 = copy(C)
-    @test Ac\c ≈ ldiv!(b, qr(Ac, ColumnNorm()), c)
-    @test c0 == c
-    @test Ac\C ≈ ldiv!(B, qr(Ac, ColumnNorm()), C)
-    @test C0 == C
-end
-
-@testset "Issue reflector of zero-length vector" begin
-    a = [2.0]
-    x = view(a,1:0)
-    τ = LinearAlgebra.reflector!(view(x,1:0))
-    @test τ == 0.0
-
-    b = reshape([3.0],1,1)
-    @test isempty(LinearAlgebra.reflectorApply!(x, τ, view(b,1:0,:)))
-    @test b[1] == 3.0
-end
-
-@testset "det(Q::Union{QRCompactWYQ, QRPackedQ})" begin
-    # 40 is the number larger than the default block size 36 of QRCompactWY
-    @testset for n in [1:3; 40], m in [1:3; 40], pivot in (NoPivot(), ColumnNorm())
-        @testset "real" begin
-            @testset for k in 0:min(n, m, 5)
-                A = cat(Array(I(k)), randn(n - k, m - k); dims=(1, 2))
-                Q, = qr(A, pivot)
-                @test det(Q) ≈ det(Q*Matrix(I, size(Q, 1), size(Q, 1)))
-                @test abs(det(Q)) ≈ 1
-            end
-        end
-        @testset "complex" begin
-            @testset for k in 0:min(n, m, 5)
-                A = cat(Array(I(k)), randn(ComplexF64, n - k, m - k); dims=(1, 2))
-                Q, = qr(A, pivot)
-                @test det(Q) ≈ det(Q*Matrix(I, size(Q, 1), size(Q, 1)))
-                @test abs(det(Q)) ≈ 1
-            end
-        end
-    end
-end
-
-@testset "inv(::AbstractQ)" begin
-    for T in (Float64, ComplexF64)
-        Q = qr(randn(T,5,5)).Q
-        @test inv(Q) === Q'
-        @test inv(Q)' === inv(Q') === Q
-    end
-end
-
-@testset "QR factorization of Q" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        Q1, R1 = qr(randn(T,5,5))
-        Q2, R2 = qr(Q1)
-        @test Matrix(Q1) ≈ Matrix(Q2)
-        @test R2 ≈ I
-    end
-end
-
-@testset "Generation of orthogonal matrices" begin
-    for T in (Float32, Float64)
-        n = 5
-        Q, R = qr(randn(T,n,n))
-        O = Q * Diagonal(sign.(diag(R)))
-        @test O' * O ≈ I
-    end
-end
-
-@testset "Multiplication of Q by special matrices" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        n = 5
-        Q, R = qr(randn(T,n,n))
-        Qmat = Matrix(Q)
-        D = Diagonal(randn(T,n))
-        @test Q * D ≈ Qmat * D
-        @test D * Q ≈ D * Qmat
-        J = 2*I
-        @test Q * J ≈ Qmat * J
-        @test J * Q ≈ J * Qmat
-    end
-end
-
-@testset "copyto! for Q" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        n = 5
-        Q, R = qr(randn(T,n,n))
-        Qmat = Matrix(Q)
-        dest1 = Matrix{T}(undef, size(Q))
-        copyto!(dest1, Q)
-        @test dest1 ≈ Qmat
-        dest2 = PermutedDimsArray(Matrix{T}(undef, size(Q)), (1, 2))
-        copyto!(dest2, Q)
-        @test dest2 ≈ Qmat
-        dest3 = PermutedDimsArray(Matrix{T}(undef, size(Q)), (2, 1))
-        copyto!(dest3, Q)
-        @test dest3 ≈ Qmat
-    end
-end
-
-@testset "adjoint of QR" begin
-    n = 5
-    B = randn(5, 2)
-
-    @testset "size(b)=$(size(b))" for b in (B[:, 1], B)
-        @testset "size(A)=$(size(A))" for A in (
-            randn(n, n),
-            # Wide problems become minimum norm (in x) problems similarly to LQ
-            randn(n + 2, n),
-            complex.(randn(n, n), randn(n, n)))
-
-            @testset "QRCompactWY" begin
-                F = qr(A)
-                x = F'\b
-                @test x ≈ A'\b
-                @test length(size(x)) == length(size(b))
-            end
-
-            @testset "QR" begin
-                F = LinearAlgebra.qrfactUnblocked!(copy(A))
-                x = F'\b
-                @test x ≈ A'\b
-                @test length(size(x)) == length(size(b))
-            end
-
-            @testset "QRPivoted" begin
-                F = LinearAlgebra.qr(A, ColumnNorm())
-                x = F'\b
-                @test x ≈ A'\b
-                @test length(size(x)) == length(size(b))
-            end
-        end
-        @test_throws DimensionMismatch("overdetermined systems are not supported")    qr(randn(n - 2, n))'\b
-        @test_throws DimensionMismatch("arguments must have the same number of rows") qr(randn(n, n + 1))'\b
-        @test_throws DimensionMismatch("overdetermined systems are not supported")    LinearAlgebra.qrfactUnblocked!(randn(n - 2, n))'\b
-        @test_throws DimensionMismatch("arguments must have the same number of rows") LinearAlgebra.qrfactUnblocked!(randn(n, n + 1))'\b
-        @test_throws DimensionMismatch("overdetermined systems are not supported")    qr(randn(n - 2, n), ColumnNorm())'\b
-        @test_throws DimensionMismatch("arguments must have the same number of rows") qr(randn(n, n + 1), ColumnNorm())'\b
-    end
-end
-
-@testset "issue #38974" begin
-    A = qr(ones(3, 1))
-    B = I(3)
-    C = B*A.Q'
-    @test C ≈ A.Q * Matrix(I, 3, 3)
-    @test A.Q' * B ≈ A.Q * Matrix(I, 3, 3)
-end
-
-@testset "convert between eltypes" begin
-    a = rand(Float64, 10, 5)
-    qra = qr(a)
-    qrwy = LinearAlgebra.QRCompactWY{Float32}(qra.factors, qra.T)
-    @test Array(qrwy) ≈ Array(qr(Float32.(a)))
-    @test eltype(qrwy.factors) == eltype(qrwy.T) == Float32
-    qra = qr(a, ColumnNorm())
-    qrp = QRPivoted{Float32}(qra.factors, qra.τ, qra.jpvt)
-    @test Array(qrp) ≈ Array(qr(Float32.(a), ColumnNorm()))
-    @test eltype(qrp.factors) == eltype(qrp.τ) == Float32
-    a = rand(Float16, 10, 5)
-    qra = qr(a)
-    qrnonblas = QR{ComplexF16}(qra.factors, qra.τ)
-    @test Array(qrnonblas) ≈ Array(qr(ComplexF16.(a)))
-    @test eltype(qrnonblas.factors) == eltype(qrnonblas.τ) == ComplexF16
-end
-
-# We use approximate equals to get MKL.jl tests to pass.
-@testset "optimized getindex for an AbstractQ" begin
-    for T in [Float64, ComplexF64]
-        Q = qr(rand(T, 4, 4))
-        Q2 = Q.Q
-        M = Matrix(Q2)
-        for j in axes(M, 2)
-            @test Q2[:, j] ≈ M[:, j]
-            for i in axes(M, 1)
-                @test Q2[i, :] ≈ M[i, :]
-                @test Q2[i, j] ≈ M[i, j]
-            end
-        end
-        @test Q2[:] ≈ M[:]
-        @test Q2[:, :] ≈ M[:, :]
-        @test Q2[:, :, :] ≈ M[:, :, :]
-    end
-    # Check that getindex works if copy returns itself (#44729)
-    struct MyIdentity{T} <: LinearAlgebra.AbstractQ{T} end
-    Base.size(::MyIdentity, dim::Integer) = dim in (1,2) ? 2 : 1
-    Base.size(::MyIdentity) = (2, 2)
-    Base.copy(J::MyIdentity) = J
-    LinearAlgebra.lmul!(::MyIdentity{T}, M::Array{T}) where {T} = M
-    @test MyIdentity{Float64}()[1,:] == [1.0, 0.0]
-end
-
-@testset "issue #48911" begin
-    # testcase in the original issue
-    # test ldiv!(::QRPivoted, ::AbstractVector)
-    A = Complex{BigFloat}[1+im 1-im]
-    b = Complex{BigFloat}[3+im]
-    x = A\b
-    AF = Complex{Float64}[1+im 1-im]
-    bf = Complex{Float64}[3+im]
-    xf = AF\bf
-    @test x ≈ xf
-
-    # test ldiv!(::QRPivoted, ::AbstractVector)
-    A = Complex{BigFloat}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
-    b = Complex{BigFloat}[1+im; 0]
-    x = A\b
-    AF = Complex{Float64}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
-    bf = Complex{Float64}[1+im; 0]
-    xf = AF\bf
-    @test x ≈ xf
-
-    # test ldiv!(::QRPivoted, ::AbstractMatrix)
-    C = Complex{BigFloat}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
-    D = Complex{BigFloat}[1+im 1-im; 0 0]
-    x = C\D
-    CF = Complex{Float64}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
-    DF = Complex{Float64}[1+im 1-im; 0 0]
-    xf = CF\DF
-    @test x ≈ xf
-end
-
-end # module TestQR
diff --git a/stdlib/LinearAlgebra/test/schur.jl b/stdlib/LinearAlgebra/test/schur.jl
deleted file mode 100644
index c9a5d92dbdae8..0000000000000
--- a/stdlib/LinearAlgebra/test/schur.jl
+++ /dev/null
@@ -1,221 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSchur
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(1234321)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    asym = a' + a                 # symmetric indefinite
-    apd  = a' * a                 # symmetric positive-definite
-    for (a, asym, apd) in ((a, asym, apd),
-                           (view(a, 1:n, 1:n),
-                            view(asym, 1:n, 1:n),
-                            view(apd, 1:n, 1:n)))
-        ε = εa = eps(abs(float(one(eltya))))
-
-        d,v = eigen(a)
-        f   = schur(a)
-        @test f.vectors*f.Schur*f.vectors' ≈ a
-        @test sort(real(f.values)) ≈ sort(real(d))
-        @test sort(imag(f.values)) ≈ sort(imag(d))
-        @test istriu(f.Schur) || eltype(a)<:Real
-        @test convert(Array, f) ≈ a
-        @test_throws ErrorException f.A
-
-        sch, vecs, vals = schur(UpperTriangular(triu(a)))
-        @test vecs*sch*vecs' ≈ triu(a)
-        sch, vecs, vals = schur(UnitUpperTriangular(triu(a)))
-        @test vecs*sch*vecs' ≈ UnitUpperTriangular(triu(a))
-        sch, vecs, vals = schur(LowerTriangular(tril(a)))
-        @test vecs*sch*vecs' ≈ tril(a)
-        sch, vecs, vals = schur(UnitLowerTriangular(tril(a)))
-        @test vecs*sch*vecs' ≈ UnitLowerTriangular(tril(a))
-        sch, vecs, vals = schur(Hermitian(asym))
-        @test vecs*sch*vecs' ≈ asym
-        sch, vecs, vals = schur(Symmetric(a + transpose(a)))
-        @test vecs*sch*vecs' ≈ a + transpose(a)
-        sch, vecs, vals = schur(Tridiagonal(a + transpose(a)))
-        @test vecs*sch*vecs' ≈ Tridiagonal(a + transpose(a))
-        sch, vecs, vals = schur(Bidiagonal(a, :U))
-        @test vecs*sch*vecs' ≈ Bidiagonal(a, :U)
-        sch, vecs, vals = schur(Bidiagonal(a, :L))
-        @test vecs*sch*vecs' ≈ Bidiagonal(a, :L)
-
-        tstring = sprint((t, s) -> show(t, "text/plain", s), f.T)
-        zstring = sprint((t, s) -> show(t, "text/plain", s), f.Z)
-        vstring = sprint((t, s) -> show(t, "text/plain", s), f.values)
-        fstring = sprint((t, s) -> show(t, "text/plain", s), f)
-        @test fstring == "$(summary(f))\nT factor:\n$tstring\nZ factor:\n$(zstring)\neigenvalues:\n$vstring"
-        @testset "Reorder Schur" begin
-            # use asym for real schur to enforce tridiag structure
-            # avoiding partly selection of conj. eigenvalues
-            ordschura = eltya <: Complex ? a : asym
-            S = schur(ordschura)
-            select = bitrand(n)
-            O = ordschur(S, select)
-            sum(select) != 0 && @test S.values[findall(select)] ≈ O.values[1:sum(select)]
-            @test O.vectors*O.Schur*O.vectors' ≈ ordschura
-            @test_throws ErrorException f.A
-            Snew = LinearAlgebra.Schur(S.T, S.Z, S.values)
-            SchurNew = ordschur!(copy(Snew), select)
-            @test O.vectors ≈ SchurNew.vectors
-            @test O.Schur ≈ SchurNew.Schur
-        end
-
-        if isa(a, Array)
-            a1_sf = a[1:n1, 1:n1]
-            a2_sf = a[n1+1:n2, n1+1:n2]
-        else
-            a1_sf = view(a, 1:n1, 1:n1)
-            a2_sf = view(a, n1+1:n2, n1+1:n2)
-        end
-        @testset "Generalized Schur" begin
-            f = schur(a1_sf, a2_sf)
-            @test f.Q*f.S*f.Z' ≈ a1_sf
-            @test f.Q*f.T*f.Z' ≈ a2_sf
-            @test istriu(f.S) || eltype(a)<:Real
-            @test istriu(f.T) || eltype(a)<:Real
-            @test_throws ErrorException f.A
-
-            sstring = sprint((t, s) -> show(t, "text/plain", s), f.S)
-            tstring = sprint((t, s) -> show(t, "text/plain", s), f.T)
-            qstring = sprint((t, s) -> show(t, "text/plain", s), f.Q)
-            zstring = sprint((t, s) -> show(t, "text/plain", s), f.Z)
-            αstring = sprint((t, s) -> show(t, "text/plain", s), f.α)
-            βstring = sprint((t, s) -> show(t, "text/plain", s), f.β)
-            fstring = sprint((t, s) -> show(t, "text/plain", s), f)
-            @test fstring == "$(summary(f))\nS factor:\n$sstring\nT factor:\n$(tstring)\nQ factor:\n$(qstring)\nZ factor:\n$(zstring)\nα:\n$αstring\nβ:\n$βstring"
-        end
-        @testset "Reorder Generalized Schur" begin
-            NS = schur(a1_sf, a2_sf)
-            # Currently just testing with selecting gen eig values < 1
-            select = abs2.(NS.values) .< 1
-            m = sum(select)
-            S = ordschur(NS, select)
-            # Make sure that the new factorization still factors matrix
-            @test S.Q*S.S*S.Z' ≈ a1_sf
-            @test S.Q*S.T*S.Z' ≈ a2_sf
-            # Make sure that we have sorted it correctly
-            @test NS.values[findall(select)] ≈ S.values[1:m]
-
-            Snew = LinearAlgebra.GeneralizedSchur(NS.S, NS.T, NS.alpha, NS.beta, NS.Q, NS.Z)
-            SchurNew = ordschur!(copy(Snew), select)
-            @test S.Q ≈ SchurNew.Q
-            @test S.S ≈ SchurNew.S
-            @test S.T ≈ SchurNew.T
-            @test S.Z ≈ SchurNew.Z
-            @test S.alpha ≈ SchurNew.alpha
-            @test S.beta  ≈ SchurNew.beta
-            sS,sT,sQ,sZ = schur(a1_sf,a2_sf)
-            @test NS.Q ≈ sQ
-            @test NS.T ≈ sT
-            @test NS.S ≈ sS
-            @test NS.Z ≈ sZ
-        end
-    end
-    @testset "0x0 matrix" for A in (zeros(eltya, 0, 0), view(rand(eltya, 2, 2), 1:0, 1:0))
-        T, Z, λ = LinearAlgebra.schur(A)
-        @test T == A
-        @test Z == A
-        @test λ == zeros(0)
-    end
-
-    if eltya <: Real
-        @testset "quasitriangular to triangular" begin
-            S = schur(a)
-            SC = Schur{Complex}(S)
-            @test eltype(SC) == complex(eltype(S))
-            @test istriu(SC.T)
-            @test SC.Z*SC.Z' ≈ I
-            @test SC.Z*SC.T*SC.Z' ≈ a
-            @test sort(SC.values,by=LinearAlgebra.eigsortby) ≈ sort(S.values,by=LinearAlgebra.eigsortby)
-            @test Schur{Complex}(SC) === SC === Schur{eltype(SC)}(SC)
-            @test Schur{eltype(S)}(S) === S
-            if eltype(S) === Float32
-                S64 = Schur{Float64}(S)
-                @test eltype(S64) == Float64
-                @test S64.Z == S.Z
-                @test S64.T == S.T
-                @test S64.values == S.values
-            end
-        end
-    end
-
-    @testset "0x0 $eltya matrices" begin
-        A = zeros(eltya, 0, 0)
-        B = zeros(eltya, 0, 0)
-        S = LinearAlgebra.schur(A, B)
-        @test S.S == A
-        @test S.T == A
-        @test S.Q == A
-        @test S.Z == A
-        @test S.alpha == zeros(0)
-        @test S.beta == zeros(0)
-    end
-end
-
-@testset "Generalized Schur convergence" begin
-    # Check for convergence issues, #40279
-    problematic_pencils = [
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 3.7796350217469814 -3.3125635598133054 0.0 0.0 0.0 0.0 0.0 0.0 6.418270043493963 -6.625127119626611 0.0 0.0 0.0 0.0 0.0 -1.0; -3.312563559813306 3.779635021746982 0.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626612 6.418270043493964 -1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 3.7796350217469814 0.0 0.0 -3.3125635598133054 0.0 0.0 0.0 -1.0 6.418270043493963 0.0 0.0 -6.625127119626611 0.0 0.0; 0.0 0.0 0.0 3.779635021746982 -3.312563559813306 0.0 0.0 0.0 0.0 0.0 0.0 6.418270043493964 -6.625127119626612 0.0 -1.0 0.0; 0.0 0.0 0.0 -3.3125635598133054 3.7796350217469814 0.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626611 6.418270043493963 -1.0 0.0 0.0; 0.0 0.0 -3.312563559813306 0.0 0.0 3.779635021746982 0.0 0.0 0.0 0.0 -6.625127119626612 0.0 -1.0 6.418270043493964 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 3.7796350217469814 -3.3125635598133054 0.0 0.0 0.0 -1.0 0.0 0.0 6.418270043493963 -6.625127119626611; 0.0 0.0 0.0 0.0 0.0 0.0 -3.312563559813306 3.779635021746982 -1.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626612 6.418270043493964],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 3.312563559813306 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 -3.779635021746982 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 0.0 0.0 3.312563559813306 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.779635021746982 3.3125635598133054 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.312563559813306 -3.7796350217469814 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 0.0 0.0 -3.779635021746982 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 3.312563559813306; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 -3.779635021746982]
-        ),
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 -1.0 0.0 0.0 0.0 0.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 -1.0 -2.62 0.0 0.0 0.0 0.0 0.0; 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0; 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0; 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]
-        ),
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.33748484079831426 -0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853 0.0 0.0 0.0 0.0 0.0 -1.0; -0.10323794456968927 0.3374848407983142 0.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713 -1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.33748484079831426 0.0 0.0 -0.10323794456968927 0.0 0.0 0.0 -1.0 -2.5940303184033713 0.0 0.0 -0.20647588913937853 0.0 0.0; 0.0 0.0 0.0 0.3374848407983142 -0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853 0.0 -1.0 0.0; 0.0 0.0 0.0 -0.10323794456968927 0.33748484079831426 0.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713 -1.0 0.0 0.0; 0.0 0.0 -0.10323794456968927 0.0 0.0 0.3374848407983142 0.0 0.0 0.0 0.0 -0.20647588913937853 0.0 -1.0 -2.5940303184033713 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.33748484079831426 -0.10323794456968927 0.0 0.0 0.0 -1.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853; 0.0 0.0 0.0 0.0 0.0 0.0 -0.10323794456968927 0.3374848407983142 -1.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.3374848407983142 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.0 0.0 0.10323794456968927 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.3374848407983142 0.10323794456968927 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.33748484079831426 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 0.0 0.0 -0.3374848407983142 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.10323794456968927; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.3374848407983142]
-        ),
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 1.7391668762048442 -1.309613611600033 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.150333752409688 -2.619227223200066 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0; -1.3096136116000332 1.739166876204844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.6192272232000664 2.150333752409688 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.739166876204844 0.0 0.0 -1.3096136116000332 0.0 0.0 0.0 0.0 0.0 -1.0 2.150333752409688 0.0 0.0 -2.6192272232000664 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.739166876204844 0.0 0.0 0.0 0.0 -1.3096136116000332 0.0 -1.0 0.0 0.0 2.150333752409688 0.0 0.0 0.0 0.0 -2.6192272232000664 0.0; 0.0 0.0 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 -1.309613611600033 0.0 0.0 0.0 0.0 2.150333752409688 -1.0 0.0 0.0 0.0 -2.619227223200066; 0.0 0.0 -1.309613611600033 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 0.0 0.0 -2.619227223200066 0.0 -1.0 2.150333752409688 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.739166876204844 -1.3096136116000332 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.150333752409688 -2.6192272232000664 0.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 -1.309613611600033 1.7391668762048442 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.619227223200066 2.150333752409688 -1.0 0.0; 0.0 0.0 0.0 -1.309613611600033 0.0 0.0 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 -2.619227223200066 0.0 0.0 0.0 -1.0 2.150333752409688 0.0; 0.0 0.0 0.0 0.0 -1.3096136116000332 0.0 0.0 0.0 0.0 1.739166876204844 0.0 0.0 0.0 0.0 -2.6192272232000664 0.0 -1.0 0.0 0.0 2.150333752409688],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.7391668762048442 1.3096136116000332 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.309613611600033 -1.739166876204844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 0.0 0.0 1.309613611600033 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 0.0 0.0 0.0 0.0 1.309613611600033 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.7391668762048442 0.0 0.0 0.0 0.0 1.3096136116000332; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 0.0 0.0 -1.7391668762048442 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 1.309613611600033 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 -1.7391668762048442 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 0.0 0.0 0.0 0.0 -1.7391668762048442 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.309613611600033 0.0 0.0 0.0 0.0 -1.739166876204844]
-        ),
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007; 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769246 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230784 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769246 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230784 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788; -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 -6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769244 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769244 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615393 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384622 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624]
-        )]
-
-    for (A, B) in problematic_pencils
-        f = schur(A, B)
-        @test f.Q*f.S*f.Z' ≈ A
-        @test f.Q*f.T*f.Z' ≈ B
-    end
-end
-
-@testset "adjoint and transpose for schur (#40941)" begin
-    A = rand(3, 3)
-    B = schur(A', A)
-    C = B.left*B.S*B.right'
-    D = schur(transpose(A), A)
-    E = D.left*D.S*D.right'
-    @test A' ≈ C ≈ E
-end
-
-@testset "UpperHessenberg schur" begin
-    A = UpperHessenberg(rand(ComplexF64, 100, 100))
-    B = Array(A)
-    fact1 = schur(A)
-    fact2 = schur(B)
-    @test fact1.values ≈ fact2.values
-    @test fact1.Z * fact1.T * fact1.Z' ≈ B
-
-    A = UpperHessenberg(rand(Int32, 50, 50))
-    B = Array(A)
-    fact1 = schur(A)
-    fact2 = schur(B)
-    @test fact1.values ≈ fact2.values
-    @test fact1.Z * fact1.T * fact1.Z' ≈ B
-end
-
-end # module TestSchur
diff --git a/stdlib/LinearAlgebra/test/special.jl b/stdlib/LinearAlgebra/test/special.jl
deleted file mode 100644
index eaa297e05d957..0000000000000
--- a/stdlib/LinearAlgebra/test/special.jl
+++ /dev/null
@@ -1,538 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSpecial
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: rmul!
-
-n= 10 #Size of matrix to test
-Random.seed!(1)
-
-@testset "Interconversion between special matrix types" begin
-    a = [1.0:n;]
-    A = Diagonal(a)
-    @testset for newtype in [Diagonal, Bidiagonal, SymTridiagonal, Tridiagonal, Matrix]
-       @test Matrix(convert(newtype, A)) == Matrix(A)
-       @test Matrix(convert(newtype, Diagonal(GenericArray(a)))) == Matrix(A)
-    end
-
-    @testset for isupper in (true, false)
-        A = Bidiagonal(a, [1.0:n-1;], ifelse(isupper, :U, :L))
-        for newtype in [Bidiagonal, Tridiagonal, Matrix]
-           @test Matrix(convert(newtype, A)) == Matrix(A)
-           @test Matrix(newtype(A)) == Matrix(A)
-        end
-        @test_throws ArgumentError convert(SymTridiagonal, A)
-        tritype = isupper ? UpperTriangular : LowerTriangular
-        @test Matrix(tritype(A)) == Matrix(A)
-
-        A = Bidiagonal(a, zeros(n-1), ifelse(isupper, :U, :L)) #morally Diagonal
-        for newtype in [Diagonal, Bidiagonal, SymTridiagonal, Tridiagonal, Matrix]
-           @test Matrix(convert(newtype, A)) == Matrix(A)
-           @test Matrix(newtype(A)) == Matrix(A)
-        end
-        @test Matrix(tritype(A)) == Matrix(A)
-    end
-
-    A = SymTridiagonal(a, [1.0:n-1;])
-    for newtype in [Tridiagonal, Matrix]
-       @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    for newtype in [Diagonal, Bidiagonal]
-       @test_throws ArgumentError convert(newtype,A)
-    end
-    A = SymTridiagonal(a, zeros(n-1))
-    @test Matrix(convert(Bidiagonal,A)) == Matrix(A)
-
-    A = Tridiagonal(zeros(n-1), [1.0:n;], zeros(n-1)) #morally Diagonal
-    for newtype in [Diagonal, Bidiagonal, SymTridiagonal, Matrix]
-       @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    A = Tridiagonal(fill(1., n-1), [1.0:n;], fill(1., n-1)) #not morally Diagonal
-    for newtype in [SymTridiagonal, Matrix]
-       @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    for newtype in [Diagonal, Bidiagonal]
-        @test_throws ArgumentError convert(newtype,A)
-    end
-    A = Tridiagonal(zeros(n-1), [1.0:n;], fill(1., n-1)) #not morally Diagonal
-    @test Matrix(convert(Bidiagonal, A)) == Matrix(A)
-    A = UpperTriangular(Tridiagonal(zeros(n-1), [1.0:n;], fill(1., n-1)))
-    @test Matrix(convert(Bidiagonal, A)) == Matrix(A)
-    A = Tridiagonal(fill(1., n-1), [1.0:n;], zeros(n-1)) #not morally Diagonal
-    @test Matrix(convert(Bidiagonal, A)) == Matrix(A)
-    A = LowerTriangular(Tridiagonal(fill(1., n-1), [1.0:n;], zeros(n-1)))
-    @test Matrix(convert(Bidiagonal, A)) == Matrix(A)
-    @test_throws ArgumentError convert(SymTridiagonal,A)
-
-    A = LowerTriangular(Matrix(Diagonal(a))) #morally Diagonal
-    for newtype in [Diagonal, Bidiagonal, SymTridiagonal, LowerTriangular, Matrix]
-        @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    A = UpperTriangular(Matrix(Diagonal(a))) #morally Diagonal
-    for newtype in [Diagonal, Bidiagonal, SymTridiagonal, UpperTriangular, Matrix]
-        @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    A = UpperTriangular(triu(rand(n,n)))
-    for newtype in [Diagonal, Bidiagonal, Tridiagonal, SymTridiagonal]
-        @test_throws ArgumentError convert(newtype,A)
-    end
-
-
-    # test operations/constructors (not conversions) permitted in the docs
-    dl = [1., 1.]
-    d = [-2., -2., -2.]
-    T = Tridiagonal(dl, d, -dl)
-    S = SymTridiagonal(d, dl)
-    Bu = Bidiagonal(d, dl, :U)
-    Bl = Bidiagonal(d, dl, :L)
-    D = Diagonal(d)
-    M = [-2. 0. 0.; 1. -2. 0.; -1. 1. -2.]
-    U = UpperTriangular(M)
-    L = LowerTriangular(Matrix(M'))
-
-    for A in (T, S, Bu, Bl, D, U, L, M)
-        Adense = Matrix(A)
-        B = Symmetric(A)
-        Bdense = Matrix(B)
-        for (C,Cdense) in ((A,Adense), (B,Bdense))
-            @test Diagonal(C) == Diagonal(Cdense)
-            @test Bidiagonal(C, :U) == Bidiagonal(Cdense, :U)
-            @test Bidiagonal(C, :L) == Bidiagonal(Cdense, :L)
-            @test Tridiagonal(C) == Tridiagonal(Cdense)
-            @test UpperTriangular(C) == UpperTriangular(Cdense)
-            @test LowerTriangular(C) == LowerTriangular(Cdense)
-        end
-    end
-
-    @testset "Matrix constructor for !isa(zero(T), T)" begin
-        # the following models JuMP.jl's VariableRef and AffExpr, resp.
-        struct TypeWithoutZero end
-        struct TypeWithZero end
-        Base.promote_rule(::Type{TypeWithoutZero}, ::Type{TypeWithZero}) = TypeWithZero
-        Base.convert(::Type{TypeWithZero}, ::TypeWithoutZero) = TypeWithZero()
-        Base.zero(::Type{<:Union{TypeWithoutZero, TypeWithZero}}) = TypeWithZero()
-        LinearAlgebra.symmetric(::TypeWithoutZero, ::Symbol) = TypeWithoutZero()
-        Base.transpose(::TypeWithoutZero) = TypeWithoutZero()
-        d  = fill(TypeWithoutZero(), 3)
-        du = fill(TypeWithoutZero(), 2)
-        dl = fill(TypeWithoutZero(), 2)
-        D  = Diagonal(d)
-        Bu = Bidiagonal(d, du, :U)
-        Bl = Bidiagonal(d, dl, :L)
-        Tri = Tridiagonal(dl, d, du)
-        Sym = SymTridiagonal(d, dl)
-        for M in (D, Bu, Bl, Tri, Sym)
-            @test Matrix(M) == zeros(TypeWithZero, 3, 3)
-        end
-    end
-end
-
-@testset "Binary ops among special types" begin
-    a=[1.0:n;]
-    A=Diagonal(a)
-    Spectypes = [Diagonal, Bidiagonal, Tridiagonal, Matrix]
-    for (idx, type1) in enumerate(Spectypes)
-        for type2 in Spectypes
-           B = convert(type1,A)
-           C = convert(type2,A)
-           @test Matrix(B + C) ≈ Matrix(A + A)
-           @test Matrix(B - C) ≈ Matrix(A - A)
-       end
-    end
-    B = SymTridiagonal(a, fill(1., n-1))
-    for Spectype in [Diagonal, Bidiagonal, Tridiagonal, Matrix]
-        @test Matrix(B + convert(Spectype,A)) ≈ Matrix(B + A)
-        @test Matrix(convert(Spectype,A) + B) ≈ Matrix(B + A)
-        @test Matrix(B - convert(Spectype,A)) ≈ Matrix(B - A)
-        @test Matrix(convert(Spectype,A) - B) ≈ Matrix(A - B)
-    end
-
-    C = rand(n,n)
-    for TriType in [LinearAlgebra.UnitLowerTriangular, LinearAlgebra.UnitUpperTriangular, UpperTriangular, LowerTriangular]
-        D = TriType(C)
-        for Spectype in [Diagonal, Bidiagonal, Tridiagonal, Matrix]
-            @test Matrix(D + convert(Spectype,A)) ≈ Matrix(D + A)
-            @test Matrix(convert(Spectype,A) + D) ≈ Matrix(A + D)
-            @test Matrix(D - convert(Spectype,A)) ≈ Matrix(D - A)
-            @test Matrix(convert(Spectype,A) - D) ≈ Matrix(A - D)
-        end
-    end
-
-    UpTri = UpperTriangular(rand(20,20))
-    LoTri = LowerTriangular(rand(20,20))
-    Diag = Diagonal(rand(20,20))
-    Tridiag = Tridiagonal(rand(20, 20))
-    UpBi = Bidiagonal(rand(20,20), :U)
-    LoBi = Bidiagonal(rand(20,20), :L)
-    Sym = SymTridiagonal(rand(20), rand(19))
-    Dense = rand(20, 20)
-    mats = Any[UpTri, LoTri, Diag, Tridiag, UpBi, LoBi, Sym, Dense]
-
-    for op in (+,-,*)
-        for A in mats
-            for B in mats
-                @test (op)(A, B) ≈ (op)(Matrix(A), Matrix(B)) ≈ Matrix((op)(A, B))
-            end
-        end
-    end
-end
-
-@testset "+ and - among structured matrices with different container types" begin
-    diag = 1:5
-    offdiag = 1:4
-    uniformscalingmats = [UniformScaling(3), UniformScaling(1.0), UniformScaling(3//5), UniformScaling(ComplexF64(1.3, 3.5))]
-    mats = Any[Diagonal(diag), Bidiagonal(diag, offdiag, 'U'), Bidiagonal(diag, offdiag, 'L'), Tridiagonal(offdiag, diag, offdiag), SymTridiagonal(diag, offdiag)]
-    for T in [ComplexF64, Int64, Rational{Int64}, Float64]
-        push!(mats, Diagonal(Vector{T}(diag)))
-        push!(mats, Bidiagonal(Vector{T}(diag), Vector{T}(offdiag), 'U'))
-        push!(mats, Bidiagonal(Vector{T}(diag), Vector{T}(offdiag), 'L'))
-        push!(mats, Tridiagonal(Vector{T}(offdiag), Vector{T}(diag), Vector{T}(offdiag)))
-        push!(mats, SymTridiagonal(Vector{T}(diag), Vector{T}(offdiag)))
-    end
-
-    for op in (+,-,*)
-        for A in mats
-            for B in mats
-                @test (op)(A, B) ≈ (op)(Matrix(A), Matrix(B)) ≈ Matrix((op)(A, B))
-            end
-        end
-    end
-    for op in (+,-)
-        for A in mats
-            for B in uniformscalingmats
-                @test (op)(A, B) ≈ (op)(Matrix(A), B) ≈ Matrix((op)(A, B))
-                @test (op)(B, A) ≈ (op)(B, Matrix(A)) ≈ Matrix((op)(B, A))
-            end
-        end
-    end
-    diag = [randn(ComplexF64, 2, 2) for _ in 1:3]
-    odiag = [randn(ComplexF64, 2, 2) for _ in 1:2]
-    for A in (Diagonal(diag),
-                Bidiagonal(diag, odiag, :U),
-                Bidiagonal(diag, odiag, :L),
-                Tridiagonal(odiag, diag, odiag),
-                SymTridiagonal(diag, odiag)), B in uniformscalingmats
-        @test (A + B)::typeof(A) == (B + A)::typeof(A)
-        @test (A - B)::typeof(A) == ((A + (-B))::typeof(A))
-        @test (B - A)::typeof(A) == ((B + (-A))::typeof(A))
-    end
-end
-
-
-@testset "Triangular Types and QR" begin
-    for typ in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
-        a = rand(n,n)
-        atri = typ(a)
-        matri = Matrix(atri)
-        b = rand(n,n)
-        for pivot in (ColumnNorm(), NoPivot())
-            qrb = qr(b, pivot)
-            @test atri * qrb.Q ≈ matri * qrb.Q
-            @test atri * qrb.Q' ≈ matri * qrb.Q'
-            @test qrb.Q * atri ≈ qrb.Q * matri
-            @test qrb.Q' * atri ≈ qrb.Q' * matri
-        end
-    end
-end
-
-@testset "Multiplication of Qs" begin
-    for pivot in (ColumnNorm(), NoPivot()), A in (rand(5, 3), rand(5, 5), rand(3, 5))
-        Q = qr(A, pivot).Q
-        m = size(A, 1)
-        C = Matrix{Float64}(undef, (m, m))
-        @test Q*Q ≈ (Q*I) * (Q*I) ≈ mul!(C, Q, Q)
-        @test size(Q*Q) == (m, m)
-        @test Q'Q ≈ (Q'*I) * (Q*I) ≈ mul!(C, Q', Q)
-        @test size(Q'Q) == (m, m)
-        @test Q*Q' ≈ (Q*I) * (Q'*I) ≈ mul!(C, Q, Q')
-        @test size(Q*Q') == (m, m)
-        @test Q'Q' ≈ (Q'*I) * (Q'*I) ≈ mul!(C, Q', Q')
-        @test size(Q'Q') == (m, m)
-    end
-end
-
-@testset "concatenations of combinations of special and other matrix types" begin
-    N = 4
-    # Test concatenating pairwise combinations of special matrices
-    diagmat = Diagonal(1:N)
-    bidiagmat = Bidiagonal(1:N, 1:(N-1), :U)
-    tridiagmat = Tridiagonal(1:(N-1), 1:N, 1:(N-1))
-    symtridiagmat = SymTridiagonal(1:N, 1:(N-1))
-    specialmats = (diagmat, bidiagmat, tridiagmat, symtridiagmat)
-    for specialmata in specialmats, specialmatb in specialmats
-        MA = Matrix(specialmata); MB = Matrix(specialmatb)
-        @test hcat(specialmata, specialmatb) == hcat(MA, MB)
-        @test vcat(specialmata, specialmatb) == vcat(MA, MB)
-        @test hvcat((1,1), specialmata, specialmatb) == hvcat((1,1), MA, MB)
-        @test cat(specialmata, specialmatb; dims=(1,2)) == cat(MA, MB; dims=(1,2))
-    end
-    # Test concatenating pairwise combinations of special matrices with dense matrices or dense vectors
-    densevec = fill(1., N)
-    densemat = diagm(0 => densevec)
-    for specialmat in specialmats
-        SM = Matrix(specialmat)
-        # --> Tests applicable only to pairs of matrices
-        @test vcat(specialmat, densemat) == vcat(SM, densemat)
-        @test vcat(densemat, specialmat) == vcat(densemat, SM)
-        # --> Tests applicable also to pairs including vectors
-        for specialmat in specialmats, othermatorvec in (densemat, densevec)
-            SM = Matrix(specialmat); OM = Array(othermatorvec)
-            @test hcat(specialmat, othermatorvec) == hcat(SM, OM)
-            @test hcat(othermatorvec, specialmat) == hcat(OM, SM)
-            @test hvcat((2,), specialmat, othermatorvec) == hvcat((2,), SM, OM)
-            @test hvcat((2,), othermatorvec, specialmat) == hvcat((2,), OM, SM)
-            @test cat(specialmat, othermatorvec; dims=(1,2)) == cat(SM, OM; dims=(1,2))
-            @test cat(othermatorvec, specialmat; dims=(1,2)) == cat(OM, SM; dims=(1,2))
-        end
-    end
-end
-
-@testset "concatenations of annotated types" begin
-    N = 4
-    # The tested annotation types
-    testfull = Base.get_bool_env("JULIA_TESTFULL", false)
-    utriannotations = (UpperTriangular, UnitUpperTriangular)
-    ltriannotations = (LowerTriangular, UnitLowerTriangular)
-    triannotations = (utriannotations..., ltriannotations...)
-    symannotations = (Symmetric, Hermitian)
-    annotations = testfull ? (triannotations..., symannotations...) : (LowerTriangular, Symmetric)
-    # Concatenations involving these types, un/annotated
-    diagmat = Diagonal(1:N)
-    bidiagmat = Bidiagonal(1:N, 1:(N-1), :U)
-    tridiagmat = Tridiagonal(1:(N-1), 1:N, 1:(N-1))
-    symtridiagmat = SymTridiagonal(1:N, 1:(N-1))
-    specialconcatmats = testfull ? (diagmat, bidiagmat, tridiagmat, symtridiagmat) : (diagmat,)
-    # Concatenations involving strictly these types, un/annotated
-    densevec = fill(1., N)
-    densemat = fill(1., N, N)
-    # Annotated collections
-    annodmats = [annot(densemat) for annot in annotations]
-    annospcmats = [annot(spcmat) for annot in annotations, spcmat in specialconcatmats]
-    # Test concatenations of pairwise combinations of annotated special matrices
-    for annospcmata in annospcmats, annospcmatb in annospcmats
-        AM = Array(annospcmata); BM = Array(annospcmatb)
-        @test vcat(annospcmata, annospcmatb) == vcat(AM, BM)
-        @test hcat(annospcmata, annospcmatb) == hcat(AM, BM)
-        @test hvcat((2,), annospcmata, annospcmatb) == hvcat((2,), AM, BM)
-        @test cat(annospcmata, annospcmatb; dims=(1,2)) == cat(AM, BM; dims=(1,2))
-    end
-    # Test concatenations of pairwise combinations of annotated special matrices and other matrix/vector types
-    for annospcmat in annospcmats
-        AM = Array(annospcmat)
-        # --> Tests applicable to pairs including only matrices
-        for othermat in (densemat, annodmats..., specialconcatmats...)
-            OM = Array(othermat)
-            @test vcat(annospcmat, othermat) == vcat(AM, OM)
-            @test vcat(othermat, annospcmat) == vcat(OM, AM)
-        end
-        # --> Tests applicable to pairs including other vectors or matrices
-        for other in (densevec, densemat, annodmats..., specialconcatmats...)
-            OM = Array(other)
-            @test hcat(annospcmat, other) == hcat(AM, OM)
-            @test hcat(other, annospcmat) == hcat(OM, AM)
-            @test hvcat((2,), annospcmat, other) == hvcat((2,), AM, OM)
-            @test hvcat((2,), other, annospcmat) == hvcat((2,), OM, AM)
-            @test cat(annospcmat, other; dims=(1,2)) == cat(AM, OM; dims=(1,2))
-            @test cat(other, annospcmat; dims=(1,2)) == cat(OM, AM; dims=(1,2))
-        end
-    end
-    # Test concatenations strictly involving un/annotated dense matrices/vectors
-    for densemata in (densemat, annodmats...)
-        AM = Array(densemata)
-        # --> Tests applicable to pairs including only matrices
-        for densematb in (densemat, annodmats...)
-            BM = Array(densematb)
-            @test vcat(densemata, densematb) == vcat(AM, BM)
-            @test vcat(densematb, densemata) == vcat(BM, AM)
-        end
-        # --> Tests applicable to pairs including vectors or matrices
-        for otherdense in (densevec, densemat, annodmats...)
-            OM = Array(otherdense)
-            @test hcat(densemata, otherdense) == hcat(AM, OM)
-            @test hcat(otherdense, densemata) == hcat(OM, AM)
-            @test hvcat((2,), densemata, otherdense) == hvcat((2,), AM, OM)
-            @test hvcat((2,), otherdense, densemata) == hvcat((2,), OM, AM)
-            @test cat(densemata, otherdense; dims=(1,2)) == cat(AM, OM; dims=(1,2))
-            @test cat(otherdense, densemata; dims=(1,2)) == cat(OM, AM; dims=(1,2))
-        end
-    end
-end
-
-# for testing types with a dimension
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-@testset "zero and one for structured matrices" begin
-    for elty in (Int64, Float64, ComplexF64)
-        D = Diagonal(rand(elty, 10))
-        Bu = Bidiagonal(rand(elty, 10), rand(elty, 9), 'U')
-        Bl = Bidiagonal(rand(elty, 10), rand(elty, 9), 'L')
-        T = Tridiagonal(rand(elty, 9),rand(elty, 10), rand(elty, 9))
-        S = SymTridiagonal(rand(elty, 10), rand(elty, 9))
-        mats = Any[D, Bu, Bl, T, S]
-        for A in mats
-            @test iszero(zero(A))
-            @test isone(one(A))
-            @test zero(A) == zero(Matrix(A))
-            @test one(A) == one(Matrix(A))
-        end
-
-        @test zero(D) isa Diagonal
-        @test one(D) isa Diagonal
-
-        @test zero(Bu) isa Bidiagonal
-        @test one(Bu) isa Bidiagonal
-        @test zero(Bl) isa Bidiagonal
-        @test one(Bl) isa Bidiagonal
-        @test zero(Bu).uplo == one(Bu).uplo == Bu.uplo
-        @test zero(Bl).uplo == one(Bl).uplo == Bl.uplo
-
-        @test zero(T) isa Tridiagonal
-        @test one(T) isa Tridiagonal
-        @test zero(S) isa SymTridiagonal
-        @test one(S) isa SymTridiagonal
-    end
-
-    # ranges
-    D = Diagonal(1:10)
-    Bu = Bidiagonal(1:10, 1:9, 'U')
-    Bl = Bidiagonal(1:10, 1:9, 'L')
-    T = Tridiagonal(1:9, 1:10, 1:9)
-    S = SymTridiagonal(1:10, 1:9)
-    mats = [D, Bu, Bl, T, S]
-    for A in mats
-        @test iszero(zero(A))
-        @test isone(one(A))
-        @test zero(A) == zero(Matrix(A))
-        @test one(A) == one(Matrix(A))
-    end
-
-    @test zero(D) isa Diagonal
-    @test one(D) isa Diagonal
-
-    @test zero(Bu) isa Bidiagonal
-    @test one(Bu) isa Bidiagonal
-    @test zero(Bl) isa Bidiagonal
-    @test one(Bl) isa Bidiagonal
-    @test zero(Bu).uplo == one(Bu).uplo == Bu.uplo
-    @test zero(Bl).uplo == one(Bl).uplo == Bl.uplo
-
-    @test zero(T) isa Tridiagonal
-    @test one(T) isa Tridiagonal
-    @test zero(S) isa SymTridiagonal
-    @test one(S) isa SymTridiagonal
-
-    # eltype with dimensions
-    D0 = Diagonal{Furlong{0, Int64}}([1, 2, 3, 4])
-    Bu0 = Bidiagonal{Furlong{0, Int64}}([1, 2, 3, 4], [1, 2, 3], 'U')
-    Bl0 =  Bidiagonal{Furlong{0, Int64}}([1, 2, 3, 4], [1, 2, 3], 'L')
-    T0 = Tridiagonal{Furlong{0, Int64}}([1, 2, 3], [1, 2, 3, 4], [1, 2, 3])
-    S0 = SymTridiagonal{Furlong{0, Int64}}([1, 2, 3, 4], [1, 2, 3])
-    F2 = Furlongs.Furlong{2}(1)
-    D2 = Diagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2)
-    Bu2 = Bidiagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2, [1, 2, 3].*F2, 'U')
-    Bl2 =  Bidiagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2, [1, 2, 3].*F2, 'L')
-    T2 = Tridiagonal{Furlong{2, Int64}}([1, 2, 3].*F2, [1, 2, 3, 4].*F2, [1, 2, 3].*F2)
-    S2 = SymTridiagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2, [1, 2, 3].*F2)
-    mats = Any[D0, Bu0, Bl0, T0, S0, D2, Bu2, Bl2, T2, S2]
-    for A in mats
-        @test iszero(zero(A))
-        @test isone(one(A))
-        @test zero(A) == zero(Matrix(A))
-        @test one(A) == one(Matrix(A))
-        @test eltype(one(A)) == typeof(one(eltype(A)))
-    end
-end
-
-@testset "== for structured matrices" begin
-    diag = rand(10)
-    offdiag = rand(9)
-    D = Diagonal(rand(10))
-    Bup = Bidiagonal(diag, offdiag, 'U')
-    Blo = Bidiagonal(diag, offdiag, 'L')
-    Bupd = Bidiagonal(diag, zeros(9), 'U')
-    Blod = Bidiagonal(diag, zeros(9), 'L')
-    T = Tridiagonal(offdiag, diag, offdiag)
-    Td = Tridiagonal(zeros(9), diag, zeros(9))
-    Tu = Tridiagonal(zeros(9), diag, offdiag)
-    Tl = Tridiagonal(offdiag, diag, zeros(9))
-    S = SymTridiagonal(diag, offdiag)
-    Sd = SymTridiagonal(diag, zeros(9))
-
-    mats = [D, Bup, Blo, Bupd, Blod, T, Td, Tu, Tl, S, Sd]
-
-    for a in mats
-        for b in mats
-            @test (a == b) == (Matrix(a) == Matrix(b)) == (b == a) == (Matrix(b) == Matrix(a))
-        end
-    end
-end
-
-@testset "BiTriSym*Q' and Q'*BiTriSym" begin
-    dl = [1, 1, 1]
-    d = [1, 1, 1, 1]
-    D = Diagonal(d)
-    Bi = Bidiagonal(d, dl, :L)
-    Tri = Tridiagonal(dl, d, dl)
-    Sym = SymTridiagonal(d, dl)
-    F = qr(ones(4, 1))
-    A = F.Q'
-    for A in (F.Q, F.Q'), B in (D, Bi, Tri, Sym)
-        @test B*A ≈ Matrix(B)*A
-        @test A*B ≈ A*Matrix(B)
-    end
-end
-
-@testset "Ops on SymTridiagonal ev has the same length as dv" begin
-    x = rand(3)
-    y = rand(3)
-    z = rand(2)
-
-    S = SymTridiagonal(x, y)
-    T = Tridiagonal(z, x, z)
-    Bu = Bidiagonal(x, z, :U)
-    Bl = Bidiagonal(x, z, :L)
-
-    Ms = Matrix(S)
-    Mt = Matrix(T)
-    Mbu = Matrix(Bu)
-    Mbl = Matrix(Bl)
-
-    @test S + T ≈ Ms + Mt
-    @test T + S ≈ Mt + Ms
-    @test S + Bu ≈ Ms + Mbu
-    @test Bu + S ≈ Mbu + Ms
-    @test S + Bl ≈ Ms + Mbl
-    @test Bl + S ≈ Mbl + Ms
-end
-
-@testset "Ensure Strided * (Sym)Tridiagonal is Dense" begin
-    x = rand(3)
-    y = rand(3)
-    z = rand(2)
-
-    l = rand(12, 12)
-    # strided but not a Matrix
-    v = @view l[1:4:end, 1:4:end]
-    M_v = Matrix(v)
-    m = rand(3, 3)
-
-    S = SymTridiagonal(x, y)
-    T = Tridiagonal(z, x, z)
-    M_S = Matrix(S)
-    M_T = Matrix(T)
-
-    @test m * T ≈ m * M_T
-    @test m * S ≈ m * M_S
-    @test v * T ≈ M_v * T
-    @test v * S ≈ M_v * S
-
-    @test m * T isa Matrix
-    @test m * S isa Matrix
-    @test v * T isa Matrix
-    @test v * S isa Matrix
-end
-
-end # module TestSpecial
diff --git a/stdlib/LinearAlgebra/test/structuredbroadcast.jl b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
deleted file mode 100644
index 2ca1904b2ff2d..0000000000000
--- a/stdlib/LinearAlgebra/test/structuredbroadcast.jl
+++ /dev/null
@@ -1,241 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestStructuredBroadcast
-using Test, LinearAlgebra
-
-@testset "broadcast[!] over combinations of scalars, structured matrices, and dense vectors/matrices" begin
-    N = 10
-    s = rand()
-    fV = rand(N)
-    fA = rand(N, N)
-    Z = copy(fA)
-    D = Diagonal(rand(N))
-    B = Bidiagonal(rand(N), rand(N - 1), :U)
-    T = Tridiagonal(rand(N - 1), rand(N), rand(N - 1))
-    U = UpperTriangular(rand(N,N))
-    L = LowerTriangular(rand(N,N))
-    M = Matrix(rand(N,N))
-    structuredarrays = (D, B, T, U, L, M)
-    fstructuredarrays = map(Array, structuredarrays)
-    for (X, fX) in zip(structuredarrays, fstructuredarrays)
-        @test (Q = broadcast(sin, X); typeof(Q) == typeof(X) && Q == broadcast(sin, fX))
-        @test broadcast!(sin, Z, X) == broadcast(sin, fX)
-        @test (Q = broadcast(cos, X); Q isa Matrix && Q == broadcast(cos, fX))
-        @test broadcast!(cos, Z, X) == broadcast(cos, fX)
-        @test (Q = broadcast(*, s, X); typeof(Q) == typeof(X) && Q == broadcast(*, s, fX))
-        @test broadcast!(*, Z, s, X) == broadcast(*, s, fX)
-        @test (Q = broadcast(+, fV, fA, X); Q isa Matrix && Q == broadcast(+, fV, fA, fX))
-        @test broadcast!(+, Z, fV, fA, X) == broadcast(+, fV, fA, fX)
-        @test (Q = broadcast(*, s, fV, fA, X); Q isa Matrix && Q == broadcast(*, s, fV, fA, fX))
-        @test broadcast!(*, Z, s, fV, fA, X) == broadcast(*, s, fV, fA, fX)
-
-        @test X .* 2.0 == X .* (2.0,) == fX .* 2.0
-        @test X .* 2.0 isa typeof(X)
-        @test X .* (2.0,) isa typeof(X)
-        @test isequal(X .* Inf, fX .* Inf)
-
-        two = 2
-        @test X .^ 2 ==  X .^ (2,) == fX .^ 2 == X .^ two
-        @test X .^ 2 isa typeof(X)
-        @test X .^ (2,) isa typeof(X)
-        @test X .^ two isa typeof(X)
-        @test X .^ 0 == fX .^ 0
-        @test X .^ -1 == fX .^ -1
-
-        for (Y, fY) in zip(structuredarrays, fstructuredarrays)
-            @test broadcast(+, X, Y) == broadcast(+, fX, fY)
-            @test broadcast!(+, Z, X, Y) == broadcast(+, fX, fY)
-            @test broadcast(*, X, Y) == broadcast(*, fX, fY)
-            @test broadcast!(*, Z, X, Y) == broadcast(*, fX, fY)
-        end
-    end
-    diagonals = (D, B, T)
-    fdiagonals = map(Array, diagonals)
-    for (X, fX) in zip(diagonals, fdiagonals)
-        for (Y, fY) in zip(diagonals, fdiagonals)
-            @test broadcast(+, X, Y)::Union{Diagonal,Bidiagonal,Tridiagonal} == broadcast(+, fX, fY)
-            @test broadcast!(+, Z, X, Y) == broadcast(+, fX, fY)
-            @test broadcast(*, X, Y)::Union{Diagonal,Bidiagonal,Tridiagonal} == broadcast(*, fX, fY)
-            @test broadcast!(*, Z, X, Y) == broadcast(*, fX, fY)
-        end
-    end
-end
-
-@testset "broadcast! where the destination is a structured matrix" begin
-    N = 5
-    A = rand(N, N)
-    sA = A + copy(A')
-    D = Diagonal(rand(N))
-    Bu = Bidiagonal(rand(N), rand(N - 1), :U)
-    Bl = Bidiagonal(rand(N), rand(N - 1), :L)
-    T = Tridiagonal(rand(N - 1), rand(N), rand(N - 1))
-    ◣ = LowerTriangular(rand(N,N))
-    ◥ = UpperTriangular(rand(N,N))
-    M = Matrix(rand(N,N))
-
-    @test broadcast!(sin, copy(D), D) == Diagonal(sin.(D))
-    @test broadcast!(sin, copy(Bu), Bu) == Bidiagonal(sin.(Bu), :U)
-    @test broadcast!(sin, copy(Bl), Bl) == Bidiagonal(sin.(Bl), :L)
-    @test broadcast!(sin, copy(T), T) == Tridiagonal(sin.(T))
-    @test broadcast!(sin, copy(◣), ◣) == LowerTriangular(sin.(◣))
-    @test broadcast!(sin, copy(◥), ◥) == UpperTriangular(sin.(◥))
-    @test broadcast!(sin, copy(M), M) == Matrix(sin.(M))
-    @test broadcast!(*, copy(D), D, A) == Diagonal(broadcast(*, D, A))
-    @test broadcast!(*, copy(Bu), Bu, A) == Bidiagonal(broadcast(*, Bu, A), :U)
-    @test broadcast!(*, copy(Bl), Bl, A) == Bidiagonal(broadcast(*, Bl, A), :L)
-    @test broadcast!(*, copy(T), T, A) == Tridiagonal(broadcast(*, T, A))
-    @test broadcast!(*, copy(◣), ◣, A) == LowerTriangular(broadcast(*, ◣, A))
-    @test broadcast!(*, copy(◥), ◥, A) == UpperTriangular(broadcast(*, ◥, A))
-    @test broadcast!(*, copy(M), M, A) == Matrix(broadcast(*, M, A))
-
-    @test_throws ArgumentError broadcast!(cos, copy(D), D) == Diagonal(sin.(D))
-    @test_throws ArgumentError broadcast!(cos, copy(Bu), Bu) == Bidiagonal(sin.(Bu), :U)
-    @test_throws ArgumentError broadcast!(cos, copy(Bl), Bl) == Bidiagonal(sin.(Bl), :L)
-    @test_throws ArgumentError broadcast!(cos, copy(T), T) == Tridiagonal(sin.(T))
-    @test_throws ArgumentError broadcast!(cos, copy(◣), ◣) == LowerTriangular(sin.(◣))
-    @test_throws ArgumentError broadcast!(cos, copy(◥), ◥) == UpperTriangular(sin.(◥))
-    @test_throws ArgumentError broadcast!(+, copy(D), D, A) == Diagonal(broadcast(*, D, A))
-    @test_throws ArgumentError broadcast!(+, copy(Bu), Bu, A) == Bidiagonal(broadcast(*, Bu, A), :U)
-    @test_throws ArgumentError broadcast!(+, copy(Bl), Bl, A) == Bidiagonal(broadcast(*, Bl, A), :L)
-    @test_throws ArgumentError broadcast!(+, copy(T), T, A) == Tridiagonal(broadcast(*, T, A))
-    @test_throws ArgumentError broadcast!(+, copy(◣), ◣, A) == LowerTriangular(broadcast(*, ◣, A))
-    @test_throws ArgumentError broadcast!(+, copy(◥), ◥, A) == UpperTriangular(broadcast(*, ◥, A))
-    @test_throws ArgumentError broadcast!(*, copy(◥), ◣, 2)
-    @test_throws ArgumentError broadcast!(*, copy(Bu), Bl, 2)
-end
-
-@testset "map[!] over combinations of structured matrices" begin
-    N = 10
-    fA = rand(N, N)
-    Z = copy(fA)
-    D = Diagonal(rand(N))
-    B = Bidiagonal(rand(N), rand(N - 1), :U)
-    T = Tridiagonal(rand(N - 1), rand(N), rand(N - 1))
-    U = UpperTriangular(rand(N,N))
-    L = LowerTriangular(rand(N,N))
-    M = Matrix(rand(N,N))
-    structuredarrays = (M, D, B, T, U, L)
-    fstructuredarrays = map(Array, structuredarrays)
-    for (X, fX) in zip(structuredarrays, fstructuredarrays)
-        @test (Q = map(sin, X); typeof(Q) == typeof(X) && Q == map(sin, fX))
-        @test map!(sin, Z, X) == map(sin, fX)
-        @test (Q = map(cos, X); Q isa Matrix && Q == map(cos, fX))
-        @test map!(cos, Z, X) == map(cos, fX)
-        @test (Q = map(+, fA, X); Q isa Matrix && Q == map(+, fA, fX))
-        @test map!(+, Z, fA, X) == map(+, fA, fX)
-        for (Y, fY) in zip(structuredarrays, fstructuredarrays)
-            @test map(+, X, Y) == map(+, fX, fY)
-            @test map!(+, Z, X, Y) == map(+, fX, fY)
-            @test map(*, X, Y) == map(*, fX, fY)
-            @test map!(*, Z, X, Y) == map(*, fX, fY)
-            @test map(+, X, fA, Y) == map(+, fX, fA, fY)
-            @test map!(+, Z, X, fA, Y) == map(+, fX, fA, fY)
-        end
-    end
-    diagonals = (D, B, T)
-    fdiagonals = map(Array, diagonals)
-    for (X, fX) in zip(diagonals, fdiagonals)
-        for (Y, fY) in zip(diagonals, fdiagonals)
-            @test map(+, X, Y)::Union{Diagonal,Bidiagonal,Tridiagonal} == broadcast(+, fX, fY)
-            @test map!(+, Z, X, Y) == broadcast(+, fX, fY)
-            @test map(*, X, Y)::Union{Diagonal,Bidiagonal,Tridiagonal} == broadcast(*, fX, fY)
-            @test map!(*, Z, X, Y) == broadcast(*, fX, fY)
-        end
-    end
-end
-
-@testset "Issue #33397" begin
-    N = 5
-    U = UpperTriangular(rand(N, N))
-    L = LowerTriangular(rand(N, N))
-    UnitU = UnitUpperTriangular(rand(N, N))
-    UnitL = UnitLowerTriangular(rand(N, N))
-    D = Diagonal(rand(N))
-    @test U .+ L .+ D == U + L + D
-    @test L .+ U .+ D == L + U + D
-    @test UnitU .+ UnitL .+ D == UnitU + UnitL + D
-    @test UnitL .+ UnitU .+ D == UnitL + UnitU + D
-    @test U .+ UnitL .+ D == U + UnitL + D
-    @test L .+ UnitU .+ D == L + UnitU + D
-    @test L .+ U .+ L .+ U == L + U + L + U
-    @test U .+ L .+ U .+ L == U + L + U + L
-    @test L .+ UnitL .+ UnitU .+ U .+ D == L + UnitL + UnitU + U + D
-    @test L .+ U .+ D .+ D .+ D .+ D == L + U + D + D + D + D
-end
-@testset "Broadcast Returned Types" begin
-    # Issue 35245
-    N = 3
-    dV = rand(N)
-    evu = rand(N-1)
-    evl = rand(N-1)
-
-    Bu = Bidiagonal(dV, evu, :U)
-    Bl = Bidiagonal(dV, evl, :L)
-    T = Tridiagonal(evl, dV * 2, evu)
-
-    @test typeof(Bu .+ Bl) <: Tridiagonal
-    @test typeof(Bl .+ Bu) <: Tridiagonal
-    @test typeof(Bu .+ Bu) <: Bidiagonal
-    @test typeof(Bl .+ Bl) <: Bidiagonal
-    @test Bu .+ Bl == T
-    @test Bl .+ Bu == T
-    @test Bu .+ Bu == Bidiagonal(dV * 2, evu * 2, :U)
-    @test Bl .+ Bl == Bidiagonal(dV * 2, evl * 2, :L)
-
-
-    @test typeof(Bu .* Bl) <: Tridiagonal
-    @test typeof(Bl .* Bu) <: Tridiagonal
-    @test typeof(Bu .* Bu) <: Bidiagonal
-    @test typeof(Bl .* Bl) <: Bidiagonal
-
-    @test Bu .* Bl == Tridiagonal(zeros(N-1), dV .* dV, zeros(N-1))
-    @test Bl .* Bu == Tridiagonal(zeros(N-1), dV .* dV, zeros(N-1))
-    @test Bu .* Bu == Bidiagonal(dV .* dV, evu .* evu, :U)
-    @test Bl .* Bl == Bidiagonal(dV .* dV, evl .* evl, :L)
-
-    Bu2 =  Bu .* 2
-    @test typeof(Bu2) <: Bidiagonal && Bu2.uplo == 'U'
-    Bu2 = 2 .* Bu
-    @test typeof(Bu2) <: Bidiagonal && Bu2.uplo == 'U'
-    Bl2 =  Bl .* 2
-    @test typeof(Bl2) <: Bidiagonal && Bl2.uplo == 'L'
-    Bu2 = 2 .* Bl
-    @test typeof(Bl2) <: Bidiagonal && Bl2.uplo == 'L'
-
-    # Example of Nested Broadcasts
-    tmp = (1 .* 2) .* (Bidiagonal(1:3, 1:2, 'U') .* (3 .* 4)) .* (5 .* Bidiagonal(1:3, 1:2, 'L'))
-    @test typeof(tmp) <: Tridiagonal
-
-end
-
-struct Zero36193 end
-Base.iszero(::Zero36193) = true
-LinearAlgebra.iszerodefined(::Type{Zero36193}) = true
-@testset "PR #36193" begin
-    f(::Union{Int, Zero36193}) = Zero36193()
-    function test(el)
-        M = [el el
-             el el]
-        v = [el, el]
-        U = UpperTriangular(M)
-        L = LowerTriangular(M)
-        D = Diagonal(v)
-        for (T, A) in [(UpperTriangular, U), (LowerTriangular, L), (Diagonal, D)]
-            @test identity.(A) isa typeof(A)
-            @test map(identity, A) isa typeof(A)
-            @test f.(A) isa T{Zero36193}
-            @test map(f, A) isa T{Zero36193}
-        end
-    end
-    # This should not need `zero(::Type{Zero36193})` to be defined
-    test(1)
-    Base.zero(::Type{Zero36193}) = Zero36193()
-    # This should not need `==(::Zero36193, ::Int)` to be defined as `iszerodefined`
-    # returns true.
-    test(Zero36193())
-end
-
-# structured broadcast with function returning non-number type
-@test tuple.(Diagonal([1, 2])) == [(1,) (0,); (0,) (2,)]
-
-end
diff --git a/stdlib/LinearAlgebra/test/svd.jl b/stdlib/LinearAlgebra/test/svd.jl
deleted file mode 100644
index 7f2aad904a88f..0000000000000
--- a/stdlib/LinearAlgebra/test/svd.jl
+++ /dev/null
@@ -1,276 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSVD
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
-
-@testset "Simple svdvals / svd tests" begin
-    ≊(x,y) = isapprox(x,y,rtol=1e-15)
-
-    m = [2, 0]
-    @test @inferred(svdvals(m)) ≊ [2]
-    @test @inferred(svdvals!(float(m))) ≊ [2]
-    for sf in (@inferred(svd(m)), @inferred(svd!(float(m))))
-        @test sf.S ≊ [2]
-        @test sf.U'sf.U ≊ [1]
-        @test sf.Vt'sf.Vt ≊ [1]
-        @test sf.U*Diagonal(sf.S)*sf.Vt' ≊ m
-    end
-    F = @inferred svd(m, full=true)
-    @test size(F.U) == (2, 2)
-    @test F.S ≊ [2]
-    @test F.U'F.U ≊ Matrix(I, 2, 2)
-    @test F.Vt'*F.Vt ≊ [1]
-    @test @inferred(svdvals(3:4)) ≊ [5]
-    A = Matrix(1.0I, 2, 2)
-    Z = svd(Hermitian(A); full=true)
-    @test Z.S ≈ ones(2)
-    @test Z.U'Z.U ≈ I(2)
-
-    m1 = [2 0; 0 0]
-    m2 = [2 -2; 1 1]/sqrt(2)
-    m2c = Complex.([2 -2; 1 1]/sqrt(2))
-    @test @inferred(svdvals(m1))  ≊ [2, 0]
-    @test @inferred(svdvals(m2))  ≊ [2, 1]
-    @test @inferred(svdvals(m2c)) ≊ [2, 1]
-
-    sf1 = @inferred svd(m1)
-    sf2 = @inferred svd(m2)
-    @test sf1.S ≊ [2, 0]
-    @test sf2.S ≊ [2, 1]
-    # U & Vt are unitary
-    I22 = Matrix(I, 2, 2)
-    @test sf1.U*sf1.U'   ≊ I22
-    @test sf1.Vt*sf1.Vt' ≊ I22
-    @test sf2.U*sf2.U'   ≊ I22
-    @test sf2.Vt*sf2.Vt' ≊ I22
-    # SVD not uniquely determined, so just test we can reconstruct the
-    # matrices from the factorization as expected.
-    @test sf1.U*Diagonal(sf1.S)*sf1.Vt' ≊ m1
-    @test sf2.U*Diagonal(sf2.S)*sf2.Vt' ≊ m2
-
-    @test ldiv!([0., 0.], svd(Matrix(I, 2, 2)), [1., 1.]) ≊ [1., 1.]
-    @test inv(svd(Matrix(I, 2, 2))) ≈ I
-    @test inv(svd([1 2; 3 4])) ≈ [-2.0 1.0; 1.5 -0.5]
-    @test inv(svd([1 0 1; 0 1 0])) ≈ [0.5 0.0; 0.0 1.0; 0.5 0.0]
-    @test_throws SingularException inv(svd([0 0; 0 0]))
-    @test inv(svd([1+2im 3+4im; 5+6im 7+8im])) ≈ [-0.5 + 0.4375im 0.25 - 0.1875im; 0.375 - 0.3125im -0.125 + 0.0625im]
-end
-
-n = 10
-
-Random.seed!(1234321)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    aa = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    asym = aa' + aa                 # symmetric indefinite
-    for a in (aa, view(aa, 1:n, 1:n))
-        usv = svd(a)
-        @testset "singular value decomposition" begin
-            @test usv.S === svdvals(usv)
-            @test usv.U * (Diagonal(usv.S) * usv.Vt) ≈ a
-            @test convert(Array, usv) ≈ a
-            @test usv.Vt' ≈ usv.V
-            @test_throws ErrorException usv.Z
-            b = rand(eltya,n)
-            @test usv\b ≈ a\b
-            @test Base.propertynames(usv) == (:U, :S, :V, :Vt)
-            @test size(usv) == size(a)
-            if eltya <: BlasFloat
-                svdz = svd!(Matrix{eltya}(undef,0,0))
-                @test svdz.U ≈ Matrix{eltya}(I, 0, 0)
-                @test svdz.S ≈ real(zeros(eltya,0))
-                @test svdz.Vt ≈ Matrix{eltya}(I, 0, 0)
-            end
-        end
-        @testset "singular value decomposition of adjoint/transpose" begin
-            for transform in (adjoint, transpose)
-                usv = svd(transform(a))
-                @test usv.S === svdvals(usv)
-                @test usv.U * (Diagonal(usv.S) * usv.Vt) ≈ transform(a)
-                @test convert(Array, usv) ≈ transform(a)
-                @test usv.Vt' ≈ usv.V
-                @test_throws ErrorException usv.Z
-                b = rand(eltya,n)
-                @test usv\b ≈ transform(a)\b
-            end
-        end
-        @testset "Generalized svd" begin
-            a_svd = a[1:div(n, 2), :]
-            gsvd = svd(a,a_svd)
-            @test Base.propertynames(gsvd) == (:alpha, :beta, :vals, :S, :D1, :D2, :R0, :U, :V, :Q, :a, :b, :k, :l, :R)
-            @test gsvd.U*gsvd.D1*gsvd.R*gsvd.Q' ≈ a
-            @test gsvd.V*gsvd.D2*gsvd.R*gsvd.Q' ≈ a_svd
-            @test usv.Vt' ≈ usv.V
-            @test_throws ErrorException usv.Z
-            @test_throws ErrorException gsvd.Z
-            @test gsvd.vals ≈ svdvals(a,a_svd)
-            α = eltya == Int ? -1 : rand(eltya)
-            β = svd(α)
-            @test β.S == [abs(α)]
-            @test svdvals(α) == abs(α)
-            u,v,q,d1,d2,r0 = svd(a,a_svd)
-            @test u ≈ gsvd.U
-            @test v ≈ gsvd.V
-            @test d1 ≈ gsvd.D1
-            @test d2 ≈ gsvd.D2
-            @test q ≈ gsvd.Q
-            @test gsvd.a.^2 + gsvd.b.^2 ≈ fill(1, length(gsvd.a))
-            @test gsvd.alpha.^2 + gsvd.beta.^2 ≈ ones(eltya, length(gsvd.a))
-            #testing the other layout for D1 & D2
-            b = rand(eltya,n,2*n)
-            c = rand(eltya,n,2*n)
-            gsvd = svd(b,c)
-            @test gsvd.U*gsvd.D1*gsvd.R*gsvd.Q' ≈ b
-            @test gsvd.V*gsvd.D2*gsvd.R*gsvd.Q' ≈ c
-            # AbstractMatrix svd
-            T = Tridiagonal(a)
-            asvd = svd(T, a)
-            @test asvd.U*asvd.D1*asvd.R*asvd.Q' ≈ T
-            @test asvd.V*asvd.D2*asvd.R*asvd.Q' ≈ a
-            @test all(≈(1), svdvals(T, T))
-        end
-    end
-    @testset "singular value decomposition of AbstractMatrix" begin
-        A = Tridiagonal(aa)
-        F = svd(A)
-        @test Matrix(F) ≈ A
-        @test svdvals(A) ≈ F.S
-    end
-    @testset "singular value decomposition of Hermitian/real-Symmetric" begin
-        for T in (eltya <: Real ? (Symmetric, Hermitian) : (Hermitian,))
-            usv = svd(T(asym))
-            @test usv.S === svdvals(usv)
-            @test usv.U * (Diagonal(usv.S) * usv.Vt) ≈ T(asym)
-            @test convert(Array, usv) ≈ T(asym)
-            @test usv.Vt' ≈ usv.V
-            @test_throws ErrorException usv.Z
-            b = rand(eltya,n)
-            @test usv\b ≈ T(asym)\b
-        end
-    end
-    if eltya <: LinearAlgebra.BlasReal
-        @testset "Number input" begin
-            x, y = randn(eltya, 2)
-            @test svd(x)    == svd(fill(x, 1, 1))
-            @test svdvals(x)    == first(svdvals(fill(x, 1, 1)))
-            @test svd(x, y) == svd(fill(x, 1, 1), fill(y, 1, 1))
-            @test svdvals(x, y) ≈  first(svdvals(fill(x, 1, 1), fill(y, 1, 1)))
-        end
-    end
-    if eltya != Int
-        @testset "isequal, ==, and hash" begin
-            x, y   = rand(eltya), convert(eltya, NaN)
-            Fx, Fy = svd(x), svd(y)
-            @test   Fx == Fx
-            @test !(Fy == Fy)
-            @test isequal(Fy, Fy)
-            @test hash(Fx)          == hash(Fx)
-            @test hash(Fx, UInt(1)) == hash(Fx, UInt(1))
-            @test hash(Fy)          == hash(Fy)
-            @test hash(Fy, UInt(1)) == hash(Fy, UInt(1))
-        end
-    end
-end
-
-
-
-@testset "SVD Algorithms" begin
-    ≊(x,y) = isapprox(x,y,rtol=1e-15)
-
-    x = [0.1 0.2; 0.3 0.4]
-
-    for alg in [LinearAlgebra.QRIteration(), LinearAlgebra.DivideAndConquer()]
-        sx1 = svd(x, alg = alg)
-        @test sx1.U * Diagonal(sx1.S) * sx1.Vt ≊ x
-        @test sx1.V * sx1.Vt ≊ I
-        @test sx1.U * sx1.U' ≊ I
-        @test all(sx1.S .≥ 0)
-
-        sx2 = svd!(copy(x), alg = alg)
-        @test sx2.U * Diagonal(sx2.S) * sx2.Vt ≊ x
-        @test sx2.V * sx2.Vt ≊ I
-        @test sx2.U * sx2.U' ≊ I
-        @test all(sx2.S .≥ 0)
-    end
-end
-
-@testset "REPL printing of SVD" begin
-    svdd = svd(randn(3, 3))
-    svdstring = sprint((t, s) -> show(t, "text/plain", s), svdd)
-    ustring = sprint((t, s) -> show(t, "text/plain", s), svdd.U)
-    sstring = sprint((t, s) -> show(t, "text/plain", s), svdd.S)
-    vtstring = sprint((t, s) -> show(t, "text/plain", s), svdd.Vt)
-    @test svdstring == "$(summary(svdd))\nU factor:\n$ustring\nsingular values:\n$sstring\nVt factor:\n$vtstring"
-end
-
-@testset "REPL printing of Generalized SVD" begin
-    a = randn(3, 3)
-    b = randn(3, 3)
-    svdd = svd(a, b)
-    svdstring = sprint((t, s) -> show(t, "text/plain", s), svdd)
-    ustring = sprint((t, s) -> show(t, "text/plain", s), svdd.U)
-    qstring = sprint((t, s) -> show(t, "text/plain", s), svdd.Q)
-    vstring = sprint((t, s) -> show(t, "text/plain", s), svdd.V)
-    d1string = sprint((t, s) -> show(t, "text/plain", s), svdd.D1)
-    d2string = sprint((t, s) -> show(t, "text/plain", s), svdd.D2)
-    r0string = sprint((t, s) -> show(t, "text/plain", s), svdd.R0)
-    @test svdstring == "$(summary(svdd))\nU factor:\n$ustring\nV factor:\n$vstring\nQ factor:\n$qstring\nD1 factor:\n$d1string\nD2 factor:\n$d2string\nR0 factor:\n$r0string"
-end
-
-@testset "c-tor with varying input eltypes" begin
-    A = randn(Float64, 10, 10)
-    U, S, V = svd(A)
-    Ut = convert.(Float16, U)
-    Vt = convert.(Float32, V)
-    svdc = SVD{ComplexF32}(Ut, S, Vt)
-    @test svdc isa SVD{ComplexF32}
-    Uc, Sc, Vc = svdc
-    @test Uc * diagm(0=>Sc) * transpose(V) ≈ complex.(A) rtol=1e-3
-end
-
-@testset "Issue 40944. ldiV!(SVD) should update rhs" begin
-    F = svd(randn(2, 2))
-    b = randn(2)
-    x = ldiv!(F, b)
-    @test x === b
-end
-
-@testset "adjoint of SVD" begin
-    n = 5
-    B = randn(5, 2)
-
-    @testset "size(b)=$(size(b))" for b in (B[:, 1], B)
-        @testset "size(A)=$(size(A))" for A in (
-            randn(n, n),
-            # Wide problems become minimum norm (in x) problems similarly to LQ
-            randn(n + 2, n),
-            randn(n - 2, n),
-            complex.(randn(n, n), randn(n, n)))
-
-            F = svd(A)
-            x = F'\b
-            @test x ≈ A'\b
-            @test length(size(x)) == length(size(b))
-        end
-    end
-end
-
-@testset "Float16" begin
-    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-    B = svd(A)
-    B32 = svd(Float32.(A))
-    @test B isa SVD{Float16, Float16, Matrix{Float16}}
-    @test B.U isa Matrix{Float16}
-    @test B.Vt isa Matrix{Float16}
-    @test B.S isa Vector{Float16}
-    @test B.U ≈ B32.U
-    @test B.Vt ≈ B32.Vt
-    @test B.S ≈ B32.S
-end
-
-end # module TestSVD
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
deleted file mode 100644
index 224b7b31a50df..0000000000000
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ /dev/null
@@ -1,887 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSymmetric
-
-using Test, LinearAlgebra, Random
-
-Random.seed!(1010)
-
-@testset "Pauli σ-matrices: $σ" for σ in map(Hermitian,
-        Any[ [1 0; 0 1], [0 1; 1 0], [0 -im; im 0], [1 0; 0 -1] ])
-    @test ishermitian(σ)
-end
-
-@testset "Two-dimensional Euler formula for Hermitian" begin
-    @test cis(Hermitian([π 0; 0 π])) ≈ -I
-end
-
-@testset "Hermitian matrix exponential/log" begin
-    A1 = randn(4,4) + im*randn(4,4)
-    A2 = A1 + A1'
-    @test exp(A2) ≈ exp(Hermitian(A2))
-    @test cis(A2) ≈ cis(Hermitian(A2))
-    @test log(A2) ≈ log(Hermitian(A2))
-    A3 = A1 * A1' # posdef
-    @test exp(A3) ≈ exp(Hermitian(A3))
-    @test cis(A3) ≈ cis(Hermitian(A3))
-    @test log(A3) ≈ log(Hermitian(A3))
-
-    A1 = randn(4,4)
-    A3 = A1 * A1'
-    A4 = A1 + transpose(A1)
-    @test exp(A4) ≈ exp(Symmetric(A4))
-    @test log(A3) ≈ log(Symmetric(A3))
-    @test log(A3) ≈ log(Hermitian(A3))
-end
-
-@testset "Core functionality" begin
-    n = 10
-    areal = randn(n,n)/2
-    aimg  = randn(n,n)/2
-    @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-        a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-        asym = transpose(a) + a                 # symmetric indefinite
-        aherm = a' + a                 # Hermitian indefinite
-        apos  = a' * a                 # Hermitian positive definite
-        aposs = apos + transpose(apos)        # Symmetric positive definite
-        ε = εa = eps(abs(float(one(eltya))))
-
-        x = randn(n)
-        y = randn(n)
-        b = randn(n,n)/2
-        x = eltya == Int ? rand(1:7, n) : convert(Vector{eltya}, eltya <: Complex ? complex.(x, zeros(n)) : x)
-        y = eltya == Int ? rand(1:7, n) : convert(Vector{eltya}, eltya <: Complex ? complex.(y, zeros(n)) : y)
-        b = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(b, zeros(n,n)) : b)
-        @testset "basic ops" begin
-            @testset "constructor" begin
-                @test Symmetric(Symmetric(asym, :U))     === Symmetric(asym, :U)
-                @test Hermitian(Hermitian(aherm, :U))    === Hermitian(aherm, :U)
-                @test Symmetric(Symmetric(asym, :U), :U) === Symmetric(asym, :U)
-                @test Hermitian(Hermitian(aherm, :U), :U) === Hermitian(aherm, :U)
-                @test_throws ArgumentError Symmetric(Symmetric(asym, :U), :L)
-                @test_throws ArgumentError Hermitian(Hermitian(aherm, :U), :L)
-
-                @test_throws ArgumentError Symmetric(asym, :R)
-                @test_throws ArgumentError Hermitian(asym, :R)
-
-                @test_throws MethodError Symmetric{eltya,typeof(asym)}(asym, :L)
-                @test_throws MethodError Hermitian{eltya,typeof(aherm)}(aherm, :L)
-
-                # mixed cases with Hermitian/Symmetric
-                if eltya <: Real
-                    @test Symmetric(Hermitian(aherm, :U))     === Symmetric(aherm, :U)
-                    @test Hermitian(Symmetric(asym, :U))     === Hermitian(asym, :U)
-                    @test Symmetric(Hermitian(aherm, :U), :U) === Symmetric(aherm, :U)
-                    @test Hermitian(Symmetric(asym, :U), :U) === Hermitian(asym, :U)
-                    @test_throws ArgumentError Symmetric(Hermitian(aherm, :U), :L)
-                    @test_throws ArgumentError Hermitian(Symmetric(aherm, :U), :L)
-                end
-            end
-            @testset "diag" begin
-                D = Diagonal(x)
-                DM = Matrix(D)
-                B = diagm(-1 => x, 1 => x)
-                for uplo in (:U, :L)
-                    @test diag(Symmetric(D, uplo))::Vector == x
-                    @test diag(Hermitian(D, uplo))::Vector == real(x)
-                    @test isdiag(Symmetric(DM, uplo))
-                    @test isdiag(Hermitian(DM, uplo))
-                    @test !isdiag(Symmetric(B, uplo))
-                    @test !isdiag(Hermitian(B, uplo))
-                end
-            end
-            @testset "similar" begin
-                @test isa(similar(Symmetric(asym)), Symmetric{eltya})
-                @test isa(similar(Hermitian(aherm)), Hermitian{eltya})
-                @test isa(similar(Symmetric(asym), Int), Symmetric{Int})
-                @test isa(similar(Hermitian(aherm), Int), Hermitian{Int})
-                @test isa(similar(Symmetric(asym), (3,2)), Matrix{eltya})
-                @test isa(similar(Hermitian(aherm), (3,2)), Matrix{eltya})
-                @test isa(similar(Symmetric(asym), Int, (3,2)), Matrix{Int})
-                @test isa(similar(Hermitian(aherm), Int, (3,2)), Matrix{Int})
-            end
-
-            @testset "Array/Matrix constructor from Symmetric/Hermitian" begin
-                @test asym  == Matrix(Symmetric(asym))  == Array(Symmetric(asym))
-                @test aherm == Matrix(Hermitian(aherm)) == Array(Hermitian(aherm))
-            end
-
-            @testset "parent" begin
-                @test asym === parent(Symmetric(asym))
-                @test aherm === parent(Hermitian(aherm))
-            end
-            # Unary minus for Symmetric/Hermitian matrices
-            @testset "Unary minus for Symmetric/Hermitian matrices" begin
-                @test (-Symmetric(asym))::typeof(Symmetric(asym)) == -asym
-                @test (-Hermitian(aherm))::typeof(Hermitian(aherm)) == -aherm
-                @test (-Symmetric([true true; false false]))::Symmetric{Int,Matrix{Int}} == [-1 -1; -1 0]
-                @test (-Hermitian([true false; true false]))::Hermitian{Int,Matrix{Int}} == [-1 0; 0 0]
-            end
-
-            @testset "Addition and subtraction for Symmetric/Hermitian matrices" begin
-                for f in (+, -)
-                    @test (f(Symmetric(asym), Symmetric(aposs)))::typeof(Symmetric(asym)) == f(asym, aposs)
-                    @test (f(Hermitian(aherm), Hermitian(apos)))::typeof(Hermitian(aherm)) == f(aherm, apos)
-                    @test (f(Symmetric(real(asym)), Hermitian(aherm)))::typeof(Hermitian(aherm)) == f(real(asym), aherm)
-                    @test (f(Hermitian(aherm), Symmetric(real(asym))))::typeof(Hermitian(aherm)) == f(aherm, real(asym))
-                    @test (f(Symmetric(asym), Hermitian(aherm))) == f(asym, aherm)
-                    @test (f(Hermitian(aherm), Symmetric(asym))) == f(aherm, asym)
-                end
-            end
-
-            @testset "getindex and unsafe_getindex" begin
-                @test aherm[1,1] == Hermitian(aherm)[1,1]
-                @test asym[1,1] == Symmetric(asym)[1,1]
-                @test Symmetric(asym)[1:2,1:2] == asym[1:2,1:2]
-                @test Hermitian(aherm)[1:2,1:2] == aherm[1:2,1:2]
-            end
-
-            @testset "conversion" begin
-                @test Symmetric(asym) == convert(Symmetric,Symmetric(asym))
-                if eltya <: Real
-                    typs = [Float16,Float32,Float64]
-                    for typ in typs
-                        @test Symmetric(convert(Matrix{typ},asym)) == convert(Symmetric{typ,Matrix{typ}},Symmetric(asym))
-                    end
-                end
-                if eltya <: Complex
-                    typs = [ComplexF32,ComplexF64]
-                    for typ in typs
-                        @test Symmetric(convert(Matrix{typ},asym)) == convert(Symmetric{typ,Matrix{typ}},Symmetric(asym))
-                        @test Hermitian(convert(Matrix{typ},aherm)) == convert(Hermitian{typ,Matrix{typ}},Hermitian(aherm))
-                    end
-                end
-                @test Symmetric{eltya, Matrix{eltya}}(Symmetric(asym, :U)) === Symmetric(asym, :U)
-                @test Hermitian{eltya, Matrix{eltya}}(Hermitian(aherm, :U)) === Hermitian(aherm, :U)
-            end
-
-            @testset "issymmetric, ishermitian" begin
-                @test issymmetric(Symmetric(asym))
-                @test ishermitian(Hermitian(aherm))
-                if eltya <: Real
-                    @test ishermitian(Symmetric(asym))
-                    @test issymmetric(Hermitian(asym))
-                elseif eltya <: Complex
-                    # test that zero imaginary component is
-                    # handled properly
-                    @test ishermitian(Symmetric(b + b'))
-                end
-            end
-
-            @testset "tril/triu" begin
-                for (op, validks) in (
-                        (triu, (-n + 1):(n + 1)),
-                        (tril, (-n - 1):(n - 1)) )
-                    for di in validks
-                        @test op(Symmetric(asym), di) == op(asym, di)
-                        @test op(Hermitian(aherm), di) == op(aherm, di)
-                        @test op(Symmetric(asym, :L), di) == op(asym, di)
-                        @test op(Hermitian(aherm, :L), di) == op(aherm, di)
-                    end
-                end
-            end
-
-            @testset "transpose, adjoint" begin
-                S = Symmetric(asym)
-                H = Hermitian(aherm)
-                @test transpose(S) === S == asym
-                @test adjoint(H) === H == aherm
-                if eltya <: Real
-                    @test adjoint(S) === S == asym
-                    @test  transpose(H) === H == aherm
-                else
-                    @test adjoint(S) ==  Symmetric(conj(asym))
-                    @test transpose(H) ==  Hermitian(copy(transpose(aherm)))
-                end
-                @test copy(adjoint(H)) == copy(aherm)
-                @test copy(transpose(S)) == copy(asym)
-            end
-
-            @testset "real, imag" begin
-                S = Symmetric(asym)
-                H = Hermitian(aherm)
-                @test issymmetric(real(S))
-                @test ishermitian(real(H))
-                if eltya <: Real
-                    @test real(S) === S == asym
-                    @test real(H) === H == aherm
-                elseif eltya <: Complex
-                    @test issymmetric(imag(S))
-                    @test !ishermitian(imag(H))
-                end
-            end
-
-        end
-
-        @testset "linalg unary ops" begin
-            @testset "tr" begin
-                @test tr(asym) == tr(Symmetric(asym))
-                @test tr(aherm) == tr(Hermitian(aherm))
-            end
-
-            @testset "isposdef[!]" begin
-                @test isposdef(Symmetric(asym))  == isposdef(asym)
-                @test isposdef(Symmetric(aposs)) == isposdef(aposs) == true
-                @test isposdef(Hermitian(aherm)) == isposdef(aherm)
-                @test isposdef(Hermitian(apos))  == isposdef(apos) == true
-                if eltya != Int #chol! won't work with Int
-                    @test isposdef!(Symmetric(copy(asym)))  == isposdef(asym)
-                    @test isposdef!(Symmetric(copy(aposs))) == isposdef(aposs) == true
-                    @test isposdef!(Hermitian(copy(aherm))) == isposdef(aherm)
-                    @test isposdef!(Hermitian(copy(apos)))  == isposdef(apos) == true
-                end
-            end
-
-            @testset "$f" for f in (det, logdet, logabsdet)
-                for uplo in (:U, :L)
-                    @test all(f(apos)  .≈ f(Hermitian(apos, uplo)))
-                    @test all(f(aposs) .≈ f(Symmetric(aposs, uplo)))
-                    if f != logdet
-                        @test all(f(aherm) .≈ f(Hermitian(aherm, uplo)))
-                        @test all(f(asym)  .≈ f(Symmetric(asym, uplo)))
-                    end
-                end
-            end
-
-            @testset "inversion" begin
-                for uplo in (:U, :L)
-                    @test inv(Symmetric(asym, uplo))::Symmetric ≈ inv(asym)
-                    @test inv(Hermitian(aherm, uplo))::Hermitian ≈ inv(aherm)
-                    @test inv(Symmetric(a, uplo))::Symmetric ≈ inv(Matrix(Symmetric(a, uplo)))
-                    if eltya <: Real
-                        @test inv(Hermitian(a, uplo))::Hermitian ≈ inv(Matrix(Hermitian(a, uplo)))
-                    end
-                end
-                if eltya <: LinearAlgebra.BlasComplex
-                    @testset "inverse edge case with complex Hermitian" begin
-                        # Hermitian matrix, where inv(lu(A)) generates non-real diagonal elements
-                        for T in (ComplexF32, ComplexF64)
-                            A = T[0.650488+0.0im 0.826686+0.667447im; 0.826686-0.667447im 1.81707+0.0im]
-                            H = Hermitian(A)
-                            @test inv(H) ≈ inv(A)
-                            @test ishermitian(Matrix(inv(H)))
-                        end
-                    end
-                end
-                if eltya <: AbstractFloat
-                @testset "inv should error with NaNs/Infs" begin
-                    h = Hermitian(fill(eltya(NaN), 2, 2))
-                    @test_throws ArgumentError inv(h)
-                    s = Symmetric(fill(eltya(NaN), 2, 2))
-                    @test_throws ArgumentError inv(s)
-                end
-                end
-            end
-
-            # Revisit when implemented in julia
-            if eltya != BigFloat
-                @testset "cond" begin
-                    if eltya <: Real #svdvals! has no method for Symmetric{Complex}
-                        @test cond(Symmetric(asym)) ≈ cond(asym)
-                    end
-                    @test cond(Hermitian(aherm)) ≈ cond(aherm)
-                end
-
-                @testset "symmetric eigendecomposition" begin
-                    if eltya <: Real # the eigenvalues are only real and ordered for Hermitian matrices
-                        d, v = eigen(asym)
-                        @test asym*v[:,1] ≈ d[1]*v[:,1]
-                        @test v*Diagonal(d)*transpose(v) ≈ asym
-                        @test isequal(eigvals(asym[1]), eigvals(asym[1:1,1:1])[1])
-                        @test abs.(eigen(Symmetric(asym), 1:2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
-                        @test abs.(eigen(Symmetric(asym), d[1] - 1, (d[2] + d[3])/2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
-                        @test eigvals(Symmetric(asym), 1:2) ≈ d[1:2]
-                        @test eigvals(Symmetric(asym), sortby= x -> -x) ≈ eigvals(eigen(Symmetric(asym), sortby = x -> -x))
-                        @test eigvals(Symmetric(asym), d[1] - 1, (d[2] + d[3])/2) ≈ d[1:2]
-                        # eigen doesn't support Symmetric{Complex}
-                        @test Matrix(eigen(asym)) ≈ asym
-                        @test eigvecs(Symmetric(asym)) ≈ eigvecs(asym)
-                    end
-
-                    d, v = eigen(aherm)
-                    @test aherm*v[:,1] ≈ d[1]*v[:,1]
-                    @test v*Diagonal(d)*v' ≈ aherm
-                    @test isequal(eigvals(aherm[1]), eigvals(aherm[1:1,1:1])[1])
-                    @test abs.(eigen(Hermitian(aherm), 1:2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
-                    @test abs.(eigen(Hermitian(aherm), d[1] - 1, (d[2] + d[3])/2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
-                    @test eigvals(Hermitian(aherm), 1:2) ≈ d[1:2]
-                    @test eigvals(Hermitian(aherm), sortby= x -> -x) ≈ eigvals(eigen(Hermitian(aherm), sortby = x -> -x))
-                    @test eigvals(Hermitian(aherm), d[1] - 1, (d[2] + d[3])/2) ≈ d[1:2]
-                    @test Matrix(eigen(aherm)) ≈ aherm
-                    @test eigvecs(Hermitian(aherm)) ≈ eigvecs(aherm)
-
-                    # relation to svdvals
-                    if eltya <: Real #svdvals! has no method for Symmetric{Complex}
-                        @test sum(sort(abs.(eigvals(Symmetric(asym))))) == sum(sort(svdvals(Symmetric(asym))))
-                    end
-                    @test sum(sort(abs.(eigvals(Hermitian(aherm))))) == sum(sort(svdvals(Hermitian(aherm))))
-                end
-
-                @testset "rank" begin
-                    let A = a[:,1:5]*a[:,1:5]'
-                        # Make sure A is Hermitian even in the presence of rounding error
-                        # xianyi/OpenBLAS#729
-                        A = (A + A') / 2
-                        @test rank(A) == rank(Hermitian(A))
-                    end
-                end
-
-                @testset "pow" begin
-                    # Integer power
-                    @test (asym)^2   ≈ (Symmetric(asym)^2)::Symmetric
-                    @test (asym)^-2  ≈ (Symmetric(asym)^-2)::Symmetric
-                    @test (aposs)^2  ≈ (Symmetric(aposs)^2)::Symmetric
-                    @test (aherm)^2  ≈ (Hermitian(aherm)^2)::Hermitian
-                    @test (aherm)^-2 ≈ (Hermitian(aherm)^-2)::Hermitian
-                    @test (apos)^2   ≈ (Hermitian(apos)^2)::Hermitian
-                    # integer floating point power
-                    @test (asym)^2.0   ≈ (Symmetric(asym)^2.0)::Symmetric
-                    @test (asym)^-2.0  ≈ (Symmetric(asym)^-2.0)::Symmetric
-                    @test (aposs)^2.0  ≈ (Symmetric(aposs)^2.0)::Symmetric
-                    @test (aherm)^2.0  ≈ (Hermitian(aherm)^2.0)::Hermitian
-                    @test (aherm)^-2.0 ≈ (Hermitian(aherm)^-2.0)::Hermitian
-                    @test (apos)^2.0   ≈ (Hermitian(apos)^2.0)::Hermitian
-                    # non-integer floating point power
-                    @test (asym)^2.5   ≈ (Symmetric(asym)^2.5)::Symmetric
-                    @test (asym)^-2.5  ≈ (Symmetric(asym)^-2.5)::Symmetric
-                    @test (aposs)^2.5  ≈ (Symmetric(aposs)^2.5)::Symmetric
-                    @test (aherm)^2.5  ≈ (Hermitian(aherm)^2.5)#::Hermitian
-                    @test (aherm)^-2.5 ≈ (Hermitian(aherm)^-2.5)#::Hermitian
-                    @test (apos)^2.5   ≈ (Hermitian(apos)^2.5)::Hermitian
-                end
-            end
-        end
-
-        @testset "linalg binary ops" begin
-            @testset "mat * vec" begin
-                @test Symmetric(asym)*x+y ≈ asym*x+y
-                # testing fallbacks for transpose-vector * transpose(SymHerm)
-                xadj = transpose(x)
-                @test xadj * transpose(Symmetric(asym)) ≈ xadj * asym
-                @test x' * Symmetric(asym) ≈ x' * asym
-
-                @test Hermitian(aherm)*x+y ≈ aherm*x+y
-                # testing fallbacks for adjoint-vector * SymHerm'
-                xadj = x'
-                @test x' * Hermitian(aherm) ≈ x' * aherm
-                @test xadj * Hermitian(aherm)' ≈ xadj * aherm
-            end
-
-            @testset "mat * mat" begin
-                C = zeros(eltya,n,n)
-                @test Hermitian(aherm) * a ≈ aherm * a
-                @test a * Hermitian(aherm) ≈ a * aherm
-                # rectangular multiplication
-                @test [a; a] * Hermitian(aherm) ≈ [a; a] * aherm
-                @test Hermitian(aherm) * [a a] ≈ aherm * [a a]
-                @test Hermitian(aherm) * Hermitian(aherm) ≈ aherm*aherm
-                @test_throws DimensionMismatch Hermitian(aherm) * Vector{eltya}(undef, n+1)
-                LinearAlgebra.mul!(C,a,Hermitian(aherm))
-                @test C ≈ a*aherm
-
-                @test Symmetric(asym) * Symmetric(asym) ≈ asym*asym
-                @test Symmetric(asym) * a ≈ asym * a
-                @test a * Symmetric(asym) ≈ a * asym
-                # rectangular multiplication
-                @test Symmetric(asym) * [a a] ≈ asym * [a a]
-                @test [a; a] * Symmetric(asym) ≈ [a; a] * asym
-                @test_throws DimensionMismatch Symmetric(asym) * Vector{eltya}(undef, n+1)
-                LinearAlgebra.mul!(C,a,Symmetric(asym))
-                @test C ≈ a*asym
-
-                tri_b = UpperTriangular(triu(b))
-                @test Array(transpose(Hermitian(aherm)) * tri_b) ≈ transpose(aherm) * Array(tri_b)
-                @test Array(tri_b * transpose(Hermitian(aherm))) ≈ Array(tri_b) * transpose(aherm)
-                @test Array(Hermitian(aherm)' * tri_b) ≈ aherm' * Array(tri_b)
-                @test Array(tri_b * Hermitian(aherm)') ≈ Array(tri_b) * aherm'
-
-                @test Array(transpose(Symmetric(asym)) * tri_b) ≈ transpose(asym) * Array(tri_b)
-                @test Array(tri_b * transpose(Symmetric(asym))) ≈ Array(tri_b) * transpose(asym)
-                @test Array(Symmetric(asym)' * tri_b) ≈ asym' * Array(tri_b)
-                @test Array(tri_b * Symmetric(asym)') ≈ Array(tri_b) * asym'
-            end
-            @testset "solver" begin
-                @test Hermitian(aherm)\x ≈ aherm\x
-                @test Hermitian(aherm)\b ≈ aherm\b
-                @test Symmetric(asym)\x  ≈ asym\x
-                @test Symmetric(asym)\b  ≈ asym\b
-                @test Hermitian(Diagonal(aherm))\x ≈ Diagonal(aherm)\x
-                @test Hermitian(Matrix(Diagonal(aherm)))\b ≈ Diagonal(aherm)\b
-                @test Symmetric(Diagonal(asym))\x  ≈ Diagonal(asym)\x
-                @test Symmetric(Matrix(Diagonal(asym)))\b  ≈ Diagonal(asym)\b
-            end
-        end
-        @testset "generalized dot product" begin
-            for uplo in (:U, :L)
-                @test dot(x, Hermitian(aherm, uplo), y) ≈ dot(x, Hermitian(aherm, uplo)*y) ≈ dot(x, Matrix(Hermitian(aherm, uplo)), y)
-                @test dot(x, Hermitian(aherm, uplo), x) ≈ dot(x, Hermitian(aherm, uplo)*x) ≈ dot(x, Matrix(Hermitian(aherm, uplo)), x)
-            end
-            @test dot(x, Hermitian(Diagonal(a)), y) ≈ dot(x, Hermitian(Diagonal(a))*y) ≈ dot(x, Matrix(Hermitian(Diagonal(a))), y)
-            @test dot(x, Hermitian(Diagonal(a)), x) ≈ dot(x, Hermitian(Diagonal(a))*x) ≈ dot(x, Matrix(Hermitian(Diagonal(a))), x)
-            if eltya <: Real
-                for uplo in (:U, :L)
-                    @test dot(x, Symmetric(aherm, uplo), y) ≈ dot(x, Symmetric(aherm, uplo)*y) ≈ dot(x, Matrix(Symmetric(aherm, uplo)), y)
-                    @test dot(x, Symmetric(aherm, uplo), x) ≈ dot(x, Symmetric(aherm, uplo)*x) ≈ dot(x, Matrix(Symmetric(aherm, uplo)), x)
-                end
-            end
-        end
-
-        @testset "dot product of symmetric and Hermitian matrices" begin
-            for mtype in (Symmetric, Hermitian)
-                symau = mtype(a, :U)
-                symal = mtype(a, :L)
-                msymau = Matrix(symau)
-                msymal = Matrix(symal)
-                @test_throws DimensionMismatch dot(symau, mtype(zeros(eltya, n-1, n-1)))
-                for eltyc in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-                    creal = randn(n, n)/2
-                    cimag = randn(n, n)/2
-                    c = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(creal, cimag) : creal)
-                    symcu = mtype(c, :U)
-                    symcl = mtype(c, :L)
-                    msymcu = Matrix(symcu)
-                    msymcl = Matrix(symcl)
-                    @test dot(symau, symcu) ≈ dot(msymau, msymcu)
-                    @test dot(symau, symcl) ≈ dot(msymau, msymcl)
-                    @test dot(symal, symcu) ≈ dot(msymal, msymcu)
-                    @test dot(symal, symcl) ≈ dot(msymal, msymcl)
-                end
-
-                # block matrices
-                blockm = [eltya == Int ? rand(1:7, 3, 3) : convert(Matrix{eltya}, eltya <: Complex ? complex.(randn(3, 3)/2, randn(3, 3)/2) : randn(3, 3)/2) for _ in 1:3, _ in 1:3]
-                symblockmu = mtype(blockm, :U)
-                symblockml = mtype(blockm, :L)
-                msymblockmu = Matrix(symblockmu)
-                msymblockml = Matrix(symblockml)
-                @test dot(symblockmu, symblockmu) ≈ dot(msymblockmu, msymblockmu)
-                @test dot(symblockmu, symblockml) ≈ dot(msymblockmu, msymblockml)
-                @test dot(symblockml, symblockmu) ≈ dot(msymblockml, msymblockmu)
-                @test dot(symblockml, symblockml) ≈ dot(msymblockml, msymblockml)
-            end
-        end
-    end
-end
-
-#Issue #7647: test xsyevr, xheevr, xstevr drivers.
-@testset "Eigenvalues in interval for $(typeof(Mi7647))" for Mi7647 in
-        (Symmetric(diagm(0 => 1.0:3.0)),
-         Hermitian(diagm(0 => 1.0:3.0)),
-         Hermitian(diagm(0 => complex(1.0:3.0))),
-         SymTridiagonal([1.0:3.0;], zeros(2)))
-    @test eigmin(Mi7647)  == eigvals(Mi7647, 0.5, 1.5)[1] == 1.0
-    @test eigmax(Mi7647)  == eigvals(Mi7647, 2.5, 3.5)[1] == 3.0
-    @test eigvals(Mi7647) == eigvals(Mi7647, 0.5, 3.5) == [1.0:3.0;]
-end
-
-@testset "Hermitian wrapper ignores imaginary parts on diagonal" begin
-    A = [1.0+im 2.0; 2.0 0.0]
-    @test !ishermitian(A)
-    @test Hermitian(A)[1,1] == 1
-end
-
-@testset "Issue #7933" begin
-    A7933 = [1 2; 3 4]
-    B7933 = copy(A7933)
-    C7933 = Matrix(Symmetric(A7933))
-    @test A7933 == B7933
-end
-
-@testset "Issues #8057 and #8058. f=$f, A=$A" for f in
-        (eigen, eigvals),
-            A in (Symmetric([0 1; 1 0]), Hermitian([0 im; -im 0]))
-    @test_throws ArgumentError f(A, 3, 2)
-    @test_throws ArgumentError f(A, 1:4)
-end
-
-@testset "Ignore imaginary part of Hermitian diagonal" begin
-    A = [1.0+im 2.0; 2.0 0.0]
-    @test !ishermitian(A)
-    @test diag(Hermitian(A)) == real(diag(A))
-end
-
-@testset "Issue #17780" begin
-    a = randn(2,2)
-    a = a'a
-    b = complex.(a,a)
-    c = Symmetric(b)
-    @test conj(c) == conj(Array(c))
-    cc = copy(c)
-    @test conj!(c) == conj(Array(cc))
-    c = Hermitian(b + b')
-    @test conj(c) == conj(Array(c))
-    cc = copy(c)
-    @test conj!(c) == conj(Array(cc))
-end
-
-@testset "Issue # 19225" begin
-    X = [1 -1; -1 1]
-    for T in (Symmetric, Hermitian)
-        Y = T(copy(X))
-        _Y = similar(Y)
-        copyto!(_Y, Y)
-        @test _Y == Y
-
-        W = T(copy(X), :L)
-        copyto!(W, Y)
-        @test W.data == Y.data
-        @test W.uplo != Y.uplo
-
-        W[1,1] = 4
-        @test W == T([4 -1; -1 1])
-        @test_throws ArgumentError (W[1,2] = 2)
-        if T == Hermitian
-            @test_throws ArgumentError (W[2,2] = 3+4im)
-        end
-
-        @test Y + I == T([2 -1; -1 2])
-        @test Y - I == T([0 -1; -1 0])
-        @test Y * I == Y
-
-        @test Y .+ 1 == T([2 0; 0 2])
-        @test Y .- 1 == T([0 -2; -2 0])
-        @test Y * 2 == T([2 -2; -2 2])
-        @test Y / 1 == Y
-
-        @test T([true false; false true]) .+ true == T([2 1; 1 2])
-    end
-end
-
-@testset "Issue #21981" begin
-    B = complex(rand(4,4))
-    B[4,1] += 1im;
-    @test ishermitian(Symmetric(B, :U))
-    @test issymmetric(Hermitian(B, :U))
-    B[4,1]  = real(B[4,1])
-    B[1,4] += 1im
-    @test ishermitian(Symmetric(B, :L))
-    @test issymmetric(Hermitian(B, :L))
-end
-
-@testset "$HS solver with $RHS RHS - $T" for HS in (Hermitian, Symmetric),
-        RHS in (Hermitian, Symmetric, Diagonal, UpperTriangular, LowerTriangular),
-        T   in (Float64, ComplexF64)
-    D = rand(T, 10, 10); D = D'D
-    A = HS(D)
-    B = RHS(D)
-    @test A\B ≈ Matrix(A)\Matrix(B)
-end
-
-@testset "inversion of Hilbert matrix" begin
-    for T in (Float64, ComplexF64)
-        H = T[1/(i + j - 1) for i in 1:8, j in 1:8]
-        @test norm(inv(Symmetric(H))*(H*fill(1., 8)) .- 1) ≈ 0 atol = 1e-5
-        @test norm(inv(Hermitian(H))*(H*fill(1., 8)) .- 1) ≈ 0 atol = 1e-5
-    end
-end
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    immutablemat = ImmutableArray([1 2 3; 4 5 6; 7 8 9])
-    for SymType in (Symmetric, Hermitian)
-        S = Float64
-        symmat = SymType(immutablemat)
-        @test convert(AbstractArray{S}, symmat).data isa ImmutableArray{S}
-        @test convert(AbstractMatrix{S}, symmat).data isa ImmutableArray{S}
-        @test AbstractArray{S}(symmat).data isa ImmutableArray{S}
-        @test AbstractMatrix{S}(symmat).data isa ImmutableArray{S}
-        @test convert(AbstractArray{S}, symmat) == symmat
-        @test convert(AbstractMatrix{S}, symmat) == symmat
-    end
-end
-
-
-@testset "#24572: eltype(A::HermOrSym) === eltype(parent(A))" begin
-    A = rand(Float32, 3, 3)
-    @test_throws TypeError Symmetric{Float64,Matrix{Float32}}(A, 'U')
-    @test_throws TypeError Hermitian{Float64,Matrix{Float32}}(A, 'U')
-end
-
-@testset "fill[stored]!" begin
-    for uplo in (:U, :L)
-        # Hermitian
-        A = Hermitian(fill(1.0+0im, 2, 2), uplo)
-        @test fill!(A, 2) == fill(2, 2, 2)
-        @test A.data == (uplo === :U ? [2 2; 1.0+0im 2] : [2 1.0+0im; 2 2])
-        @test_throws ArgumentError fill!(A, 2+im)
-
-        # Symmetric
-        A = Symmetric(fill(1.0+im, 2, 2), uplo)
-        @test fill!(A, 2) == fill(2, 2, 2)
-        @test A.data == (uplo === :U ? [2 2; 1.0+im 2] : [2 1.0+im; 2 2])
-    end
-end
-
-@testset "#25625 recursive transposition" begin
-    A = Matrix{Matrix{Int}}(undef, 2, 2)
-    A[1,1] = [1 2; 2 3]
-    A[1,2] = [4 5 6; 7 8 9]
-    A[2,1] = [4 7; 5 8; 6 9]
-    A[2,2] = [1 2; 3 4]
-    for uplo in (:U, :L)
-        S = Symmetric(A, uplo)
-        @test S[1,1] == A[1,1]
-        @test S[1,2] == transpose(S[2,1]) == A[1,2]
-        @test S[2,2] == Symmetric(A[2,2], uplo)
-        @test S == transpose(S) == Matrix(S) == Matrix(transpose(S)) == transpose(Matrix(S))
-    end
-
-    B = Matrix{Matrix{Complex{Int}}}(undef, 2, 2)
-    B[1,1] = [1 2+im; 2-im 3]
-    B[1,2] = [4 5+1im 6-2im; 7+3im 8-4im 9+5im]
-    B[2,1] = [4 7-3im; 5-1im 8+4im; 6+2im 9-5im]
-    B[2,2] = [1+1im 2+2im; 3-3im 4-2im]
-    for uplo in (:U, :L)
-        H = Hermitian(B, uplo)
-        @test H[1,1] == Hermitian(B[1,1], uplo)
-        @test H[1,2] == adjoint(H[2,1]) == B[1,2]
-        @test H[2,1] == adjoint(H[1,2]) == B[2,1]
-        @test H[2,2] == Hermitian(B[2,2], uplo)
-        @test H == adjoint(H) == Matrix(H) == Matrix(adjoint(H)) == adjoint(Matrix(H))
-    end
-end
-
-@testset "getindex of diagonal element (#25972)" begin
-    A = rand(ComplexF64, 2, 2)
-    @test Hermitian(A, :U)[1,1] == Hermitian(A, :L)[1,1] == real(A[1,1])
-end
-
-@testset "issue #29392: SymOrHerm scaled with Number" begin
-    R = rand(Float64, 2, 2); C = rand(ComplexF64, 2, 2)
-    # Symmetric * Real, Real * Symmetric
-    A = Symmetric(R); x = 2.0
-    @test (A * x)::Symmetric == (x * A)::Symmetric
-    A = Symmetric(C); x = 2.0
-    @test (A * x)::Symmetric == (x * A)::Symmetric
-    # Symmetric * Complex, Complex * Symmetrics
-    A = Symmetric(R); x = 2.0im
-    @test (A * x)::Symmetric == (x * A)::Symmetric
-    A = Symmetric(C); x = 2.0im
-    @test (A * x)::Symmetric == (x * A)::Symmetric
-    # Hermitian * Real, Real * Hermitian
-    A = Hermitian(R); x = 2.0
-    @test (A * x)::Hermitian == (x * A)::Hermitian
-    A = Hermitian(C); x = 2.0
-    @test (A * x)::Hermitian == (x * A)::Hermitian
-    # Hermitian * Complex, Complex * Hermitian
-    A = Hermitian(R); x = 2.0im
-    @test (A * x)::Matrix == (x * A)::Matrix
-    A = Hermitian(C); x = 2.0im
-    @test (A * x)::Matrix == (x * A)::Matrix
-    # Symmetric / Real
-    A = Symmetric(R); x = 2.0
-    @test (A / x)::Symmetric == Matrix(A) / x
-    A = Symmetric(C); x = 2.0
-    @test (A / x)::Symmetric == Matrix(A) / x
-    # Symmetric / Complex
-    A = Symmetric(R); x = 2.0im
-    @test (A / x)::Symmetric == Matrix(A) / x
-    A = Symmetric(C); x = 2.0im
-    @test (A / x)::Symmetric == Matrix(A) / x
-    # Hermitian / Real
-    A = Hermitian(R); x = 2.0
-    @test (A / x)::Hermitian == Matrix(A) / x
-    A = Hermitian(C); x = 2.0
-    @test (A / x)::Hermitian == Matrix(A) / x
-    # Hermitian / Complex
-    A = Hermitian(R); x = 2.0im
-    @test (A / x)::Matrix == Matrix(A) / x
-    A = Hermitian(C); x = 2.0im
-    @test (A / x)::Matrix == Matrix(A) / x
-end
-
-@testset "issue #30814: Symmetric of Hermitian if diag is not real" begin
-    A = [1 2; 3 4] * (1 + im)
-    B = Hermitian(A)
-    @test_throws ArgumentError Symmetric(B) == Symmetric(Matrix(B))
-    A[1,1] = 1; A[2,2] = 4
-    @test Symmetric(B) == Symmetric(Matrix(B))
-end
-
-@testset "issue #32079: det for singular Symmetric matrix" begin
-    A = ones(Float64, 3, 3)
-    @test det(Symmetric(A))::Float64 == det(A) == 0.0
-    @test det(Hermitian(A))::Float64 == det(A) == 0.0
-    A = ones(ComplexF64, 3, 3)
-    @test det(Symmetric(A))::ComplexF64 == det(A) == 0.0
-    @test det(Hermitian(A))::Float64 == det(A) == 0.0
-end
-
-@testset "symmetric()/hermitian() for Numbers" begin
-    @test LinearAlgebra.symmetric(1, :U) == 1
-    @test LinearAlgebra.symmetric_type(Int) == Int
-    @test LinearAlgebra.hermitian(1, :U) == 1
-    @test LinearAlgebra.hermitian_type(Int) == Int
-end
-
-@testset "sqrt(nearly semidefinite)" begin
-    let A = [0.9999999999999998 4.649058915617843e-16 -1.3149405273715513e-16 9.9959579317056e-17; -8.326672684688674e-16 1.0000000000000004 2.9280733590254494e-16 -2.9993900031619594e-16; 9.43689570931383e-16 -1.339206523454095e-15 1.0000000000000007 -8.550505126287743e-16; -6.245004513516506e-16 -2.0122792321330962e-16 1.183061278035052e-16 1.0000000000000002],
-        B = [0.09648289218436859 0.023497875751503007 0.0 0.0; 0.023497875751503007 0.045787575150300804 0.0 0.0; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0],
-        C = Symmetric(A*B*A'), # semidefinite up to roundoff
-        Csqrt = sqrt(C)
-        @test Csqrt isa Symmetric{Float64}
-        @test Csqrt*Csqrt ≈ C rtol=1e-14
-    end
-    let D = Symmetric(Matrix(Diagonal([1 0; 0 -1e-14])))
-        @test sqrt(D) ≈ [1 0; 0 1e-7im] rtol=1e-14
-        @test sqrt(D, rtol=1e-13) ≈ [1 0; 0 0] rtol=1e-14
-        @test sqrt(D, rtol=1e-13)^2 ≈ D rtol=1e-13
-    end
-end
-
-@testset "Multiplications symmetric/hermitian for $T and $S" for T in
-        (Float16, Float32, Float64, BigFloat), S in (ComplexF16, ComplexF32, ComplexF64)
-    let A = transpose(Symmetric(rand(S, 3, 3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
-        @test A * Bv ≈ Matrix(A) * Bv
-        @test A * Bm ≈ Matrix(A) * Bm
-        @test Bm * A ≈ Bm * Matrix(A)
-    end
-    let A = adjoint(Hermitian(rand(S, 3,3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
-        @test A * Bv ≈ Matrix(A) * Bv
-        @test A * Bm ≈ Matrix(A) * Bm
-        @test Bm * A ≈ Bm * Matrix(A)
-    end
-    let Ahrs = transpose(Hermitian(Symmetric(rand(T, 3, 3)))),
-        Acs = transpose(Symmetric(rand(S, 3, 3))),
-        Ahcs = transpose(Hermitian(Symmetric(rand(S, 3, 3))))
-
-        @test Ahrs * Ahrs ≈ Ahrs * Matrix(Ahrs)
-        @test Ahrs * Acs ≈ Ahrs * Matrix(Acs)
-        @test Acs * Acs ≈ Matrix(Acs) * Matrix(Acs)
-        @test Acs * Ahrs ≈ Matrix(Acs) * Ahrs
-        @test Ahrs * Ahcs ≈ Matrix(Ahrs) * Ahcs
-        @test Ahcs * Ahrs ≈ Ahcs * Matrix(Ahrs)
-    end
-    let Ahrs = adjoint(Hermitian(Symmetric(rand(T, 3, 3)))),
-        Acs = adjoint(Symmetric(rand(S, 3, 3))),
-        Ahcs = adjoint(Hermitian(Symmetric(rand(S, 3, 3))))
-
-        @test Ahrs * Ahrs ≈ Ahrs * Matrix(Ahrs)
-        @test Ahcs * Ahcs ≈ Matrix(Ahcs) * Matrix(Ahcs)
-        @test Ahrs * Ahcs ≈ Ahrs * Matrix(Ahcs)
-        @test Acs * Ahcs ≈ Acs * Matrix(Ahcs)
-        @test Ahcs * Ahrs ≈ Matrix(Ahcs) * Ahrs
-        @test Ahcs * Acs ≈ Matrix(Ahcs) * Acs
-    end
-end
-
-@testset "Addition/subtraction with SymTridiagonal" begin
-    TR = SymTridiagonal(randn(Float64,5), randn(Float64,4))
-    TC = SymTridiagonal(randn(ComplexF64,5), randn(ComplexF64,4))
-    SR = Symmetric(randn(Float64,5,5))
-    SC = Symmetric(randn(ComplexF64,5,5))
-    HR = Hermitian(randn(Float64,5,5))
-    HC = Hermitian(randn(ComplexF64,5,5))
-    for op = (+,-)
-        for T = (TR, TC), S = (SR, SC)
-            @test op(T, S) == op(Array(T), S)
-            @test op(S, T) == op(S, Array(T))
-            @test op(T, S) isa Symmetric
-            @test op(S, T) isa Symmetric
-        end
-        for H = (HR, HC)
-            for T = (TR, TC)
-                @test op(T, H) == op(Array(T), H)
-                @test op(H, T) == op(H, Array(T))
-            end
-            @test op(TR, H) isa Hermitian
-            @test op(H, TR) isa Hermitian
-        end
-    end
-end
-
-@testset "hermitian part" begin
-    for T in [Float32, Complex{Float32}, Int32, Rational{Int32},
-              Complex{Int32}, Complex{Rational{Int32}}]
-        f, f!, t = hermitianpart, hermitianpart!, T <: Real ? transpose : adjoint
-        X = T[1 2 3; 4 5 6; 7 8 9]
-        T <: Complex && (X .+= im .* X)
-        Xc = copy(X)
-        Y = (X + t(X)) / 2
-        U = f(X)
-        L = f(X, :L)
-        @test U isa Hermitian
-        @test L isa Hermitian
-        @test U.uplo == 'U'
-        @test L.uplo == 'L'
-        @test U == L == Y
-        if T <: AbstractFloat || real(T) <: AbstractFloat
-            HU = f!(X)
-            @test HU == Y
-            @test triu(X) == triu(Y)
-            HL = f!(Xc, :L)
-            @test HL == Y
-            @test tril(Xc) == tril(Y)
-        end
-    end
-    @test_throws DimensionMismatch hermitianpart(ones(1,2))
-    for T in (Float64, ComplexF64), uplo in (:U, :L)
-        A = [randn(T, 2, 2) for _ in 1:2, _ in 1:2]
-        Aherm = hermitianpart(A, uplo)
-        @test Aherm == Aherm.data == (A + A')/2
-        @test Aherm isa Hermitian
-        @test Aherm.uplo == LinearAlgebra.char_uplo(uplo)
-    end
-end
-
-@testset "Structured display" begin
-    @testset "Diagonal" begin
-        d = 10:13
-        D = Diagonal(d)
-        for uplo in (:L, :U), SymHerm in (Symmetric, Hermitian)
-            S = SymHerm(D, uplo)
-            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D)
-        end
-
-        d = (10:13) .+ 2im
-        D = Diagonal(d)
-        DR = Diagonal(complex.(real.(d)))
-        for uplo in (:L, :U)
-            H = Hermitian(D, uplo)
-            @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, DR)
-
-            S = Symmetric(D, uplo)
-            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D)
-        end
-    end
-    @testset "Bidiagonal" begin
-        dv, ev = 1:4, 1:3
-        ST = SymTridiagonal(dv, ev)
-        D = Diagonal(dv)
-        for B_uplo in (:L, :U)
-            B = Bidiagonal(dv, ev, B_uplo)
-            for Sym_uplo in (:L, :U), SymHerm in (Symmetric, Hermitian)
-                SB = SymHerm(B, Sym_uplo)
-                teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? ST : D)
-                @test sprint(Base.print_matrix, SB) == teststr
-                SB = SymHerm(Transpose(B), Sym_uplo)
-                teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? D : ST)
-                @test sprint(Base.print_matrix, SB) == teststr
-            end
-        end
-    end
-    @testset "Tridiagonal" begin
-        superd, d, subd = 3:5, 10:13, 1:3
-        for uplo in (:U, :L), SymHerm in (Symmetric, Hermitian)
-            S = SymHerm(Tridiagonal(subd, d, superd), uplo)
-            ST = SymTridiagonal(d, uplo == :U ? superd : subd)
-            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST)
-        end
-
-        superd, d, subd = collect((3:5)*im), collect(Complex{Int}, 10:13), collect((1:3)*im)
-        for uplo in (:U, :L)
-            S = Symmetric(Tridiagonal(subd, d, superd), uplo)
-            ST = SymTridiagonal(d, uplo == :U ? superd : subd)
-            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST)
-
-            H = Hermitian(Tridiagonal(subd, d, superd), uplo)
-            T = Tridiagonal(uplo == :L ? subd : conj(superd), d, uplo == :U ? superd : conj(subd))
-            @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, T)
-        end
-    end
-end
-
-end # module TestSymmetric
diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl
deleted file mode 100644
index c28c17255c222..0000000000000
--- a/stdlib/LinearAlgebra/test/symmetriceigen.jl
+++ /dev/null
@@ -1,78 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSymmetricEigen
-
-using Test, LinearAlgebra
-
-@testset "chol-eigen-eigvals" begin
-    ## Cholesky decomposition based
-
-    # eigenvalue sorting
-    sf = x->(real(x),imag(x))
-
-    ## Real valued
-    A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4]
-    H = (A+A')/2
-    B = Float64[2 1 4 3; 0 3 1 3; 3 1 0 0; 0 1 3 1]
-    BH = (B+B')/2
-    # PD matrix
-    BPD = B*B'
-    # eigen
-    C = cholesky(BPD)
-    e,v = eigen(A, C; sortby=sf)
-    @test A*v ≈ BPD*v*Diagonal(e)
-    # eigvals
-    @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf)
-
-    ## Complex valued
-    A =  [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im]
-    AH = (A+A')/2
-    B =  [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im]
-    BH = (B+B')/2
-    # PD matrix
-    BPD = B*B'
-    # eigen
-    C = cholesky(BPD)
-    e,v = eigen(A, C; sortby=sf)
-    @test A*v ≈ BPD*v*Diagonal(e)
-    # eigvals
-    @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf)
-end
-
-@testset "issue #49533" begin
-    ## Real valued
-    A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4]
-    B = Matrix(Diagonal(Float64[1:4;]))
-    # eigen
-    e0,v0 = eigen(A, B)
-    e1,v1 = eigen(A, Symmetric(B))
-    e2,v2 = eigen(Symmetric(A), B)
-    e3,v3 = eigen(Symmetric(A), Symmetric(B))
-    @test e0 ≈ e1 && v0 ≈ v1
-    @test e0 ≈ e2 && v0 ≈ v2
-    @test e0 ≈ e3 && v0 ≈ v3
-    # eigvals
-    @test eigvals(A, B) ≈ eigvals(A, Symmetric(B))
-    @test eigvals(A, B) ≈ eigvals(Symmetric(A), B)
-    @test eigvals(A, B) ≈ eigvals(Symmetric(A), Symmetric(B))
-
-    ## Complex valued
-    A =  [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im]
-    AH = A'A
-    B =  [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im]
-    BH = B'B
-    # eigen
-    sf = x->(real(x),imag(x))
-    e1,v1 = eigen(A, Hermitian(BH))
-    @test A*v1 ≈ Hermitian(BH)*v1*Diagonal(e1)
-    e2,v2 = eigen(Hermitian(AH), B)
-    @test Hermitian(AH)*v2 ≈ B*v2*Diagonal(e2)
-    e3,v3 = eigen(Hermitian(AH), Hermitian(BH))
-    @test Hermitian(AH)*v3 ≈ Hermitian(BH)*v3*Diagonal(e3)
-    # eigvals
-    @test eigvals(A, BH; sortby=sf) ≈ eigvals(A, Hermitian(BH); sortby=sf)
-    @test eigvals(AH, B; sortby=sf) ≈ eigvals(Hermitian(AH), B; sortby=sf)
-    @test eigvals(AH, BH; sortby=sf) ≈ eigvals(Hermitian(AH), Hermitian(BH); sortby=sf)
-end
-
-end # module TestSymmetricEigen
diff --git a/stdlib/LinearAlgebra/test/testgroups b/stdlib/LinearAlgebra/test/testgroups
deleted file mode 100644
index 0f2f4f4af8708..0000000000000
--- a/stdlib/LinearAlgebra/test/testgroups
+++ /dev/null
@@ -1,30 +0,0 @@
-triangular
-addmul
-bidiag
-matmul
-dense
-symmetric
-diagonal
-special
-qr
-cholesky
-blas
-lu
-uniformscaling
-structuredbroadcast
-hessenberg
-svd
-eigen
-tridiag
-lapack
-lq
-adjtrans
-generic
-schur
-bunchkaufman
-givens
-pinv
-factorization
-abstractq
-ldlt
-symmetriceigen
diff --git a/stdlib/LinearAlgebra/test/testutils.jl b/stdlib/LinearAlgebra/test/testutils.jl
deleted file mode 100644
index 33eff29765c70..0000000000000
--- a/stdlib/LinearAlgebra/test/testutils.jl
+++ /dev/null
@@ -1,27 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Test approximate equality of vectors or columns of matrices modulo floating
-# point roundoff and phase (sign) differences.
-#
-# This function is designed to test for equality between vectors of floating point
-# numbers when the vectors are defined only up to a global phase or sign, such as
-# normalized eigenvectors or singular vectors. The global phase is usually
-# defined consistently, but may occasionally change due to small differences in
-# floating point rounding noise or rounding modes, or through the use of
-# different conventions in different algorithms. As a result, most tests checking
-# such vectors have to detect and discard such overall phase differences.
-#
-# Inputs:
-#     a, b:: StridedVecOrMat to be compared
-#     err :: Default: m^3*(eps(S)+eps(T)), where m is the number of rows
-#
-# Raises an error if any columnwise vector norm exceeds err. Otherwise, returns
-# nothing.
-function test_approx_eq_modphase(a::StridedVecOrMat{S}, b::StridedVecOrMat{T},
-                                 err = length(axes(a,1))^3*(eps(S)+eps(T))) where {S<:Real,T<:Real}
-    @test axes(a,1) == axes(b,1) && axes(a,2) == axes(b,2)
-    for i in axes(a,2)
-        v1, v2 = a[:, i], b[:, i]
-        @test min(abs(norm(v1-v2)),abs(norm(v1+v2))) ≈ 0.0 atol=err
-    end
-end
diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl
deleted file mode 100644
index 78fc2d5e0e74c..0000000000000
--- a/stdlib/LinearAlgebra/test/triangular.jl
+++ /dev/null
@@ -1,869 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestTriangular
-
-debug = false
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasFloat, errorbounds, full!, transpose!,
-    UnitUpperTriangular, UnitLowerTriangular,
-    mul!, rdiv!, rmul!, lmul!
-
-debug && println("Triangular matrices")
-
-n = 9
-Random.seed!(123)
-
-debug && println("Test basic type functionality")
-@test_throws DimensionMismatch LowerTriangular(randn(5, 4))
-@test LowerTriangular(randn(3, 3)) |> t -> [size(t, i) for i = 1:3] == [size(Matrix(t), i) for i = 1:3]
-
-# The following test block tries to call all methods in base/linalg/triangular.jl in order for a combination of input element types. Keep the ordering when adding code.
-for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
-    # Begin loop for first Triangular matrix
-    for (t1, uplo1) in ((UpperTriangular, :U),
-                        (UnitUpperTriangular, :U),
-                        (LowerTriangular, :L),
-                        (UnitLowerTriangular, :L))
-
-        # Construct test matrix
-        A1 = t1(elty1 == Int ? rand(1:7, n, n) : convert(Matrix{elty1}, (elty1 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo1 === :U ? t : copy(t')))
-        @test t1(A1) === A1
-        @test t1{elty1}(A1) === A1
-        # test the ctor works for AbstractMatrix
-        symm = Symmetric(rand(Int8, n, n))
-        t1s = t1{elty1}(symm)
-        @test typeof(t1s) == t1{elty1, Symmetric{elty1, Matrix{elty1}}}
-        t1t = t1{elty1}(t1(rand(Int8, n, n)))
-        @test typeof(t1t) == t1{elty1, Matrix{elty1}}
-
-        debug && println("elty1: $elty1, A1: $t1")
-
-        # Convert
-        @test convert(AbstractMatrix{elty1}, A1) == A1
-        @test convert(Matrix, A1) == A1
-        @test t1{elty1}(convert(AbstractMatrix{elty1}, A1)) == A1
-
-        # full!
-        @test full!(copy(A1)) == A1
-
-        # similar
-        @test isa(similar(A1), t1)
-        @test eltype(similar(A1)) == elty1
-        @test isa(similar(A1, Int), t1)
-        @test eltype(similar(A1, Int)) == Int
-        @test isa(similar(A1, (3,2)), Matrix{elty1})
-        @test isa(similar(A1, Int, (3,2)), Matrix{Int})
-
-        #copyto!
-        simA1 = similar(A1)
-        copyto!(simA1, A1)
-        @test simA1 == A1
-
-        # getindex
-        let mA1 = Matrix(A1)
-            # linear indexing
-            for i in 1:length(A1)
-                @test A1[i] == mA1[i]
-            end
-            # cartesian indexing
-            for i in 1:size(A1, 1), j in 1:size(A1, 2)
-                @test A1[i,j] == mA1[i,j]
-            end
-        end
-        @test isa(A1[2:4,1], Vector)
-
-
-        # setindex! (and copy)
-        A1c = copy(A1)
-        for i = 1:size(A1, 1)
-            for j = 1:size(A1, 2)
-                if uplo1 === :U
-                    if i > j
-                        A1c[i,j] = 0
-                        @test_throws ArgumentError A1c[i,j] = 1
-                    elseif i == j && t1 == UnitUpperTriangular
-                        A1c[i,j] = 1
-                        @test_throws ArgumentError A1c[i,j] = 0
-                    else
-                        A1c[i,j] = 0
-                        @test A1c[i,j] == 0
-                    end
-                else
-                    if i < j
-                        A1c[i,j] = 0
-                        @test_throws ArgumentError A1c[i,j] = 1
-                    elseif i == j && t1 == UnitLowerTriangular
-                        A1c[i,j] = 1
-                        @test_throws ArgumentError A1c[i,j] = 0
-                    else
-                        A1c[i,j] = 0
-                        @test A1c[i,j] == 0
-                    end
-                end
-            end
-        end
-
-        # istril/istriu
-        if uplo1 === :L
-            @test istril(A1)
-            @test !istriu(A1)
-            @test istriu(A1')
-            @test istriu(transpose(A1))
-            @test !istril(A1')
-            @test !istril(transpose(A1))
-        else
-            @test istriu(A1)
-            @test !istril(A1)
-            @test istril(A1')
-            @test istril(transpose(A1))
-            @test !istriu(A1')
-            @test !istriu(transpose(A1))
-        end
-        M = copy(parent(A1))
-        for trans in (adjoint, transpose), k in -1:1
-            triu!(M, k)
-            @test istril(trans(M), -k) == istril(copy(trans(M)), -k) == true
-        end
-        M = copy(parent(A1))
-        for trans in (adjoint, transpose), k in 1:-1:-1
-            tril!(M, k)
-            @test istriu(trans(M), -k) == istriu(copy(trans(M)), -k) == true
-        end
-
-        #tril/triu
-        if uplo1 === :L
-            @test tril(A1,0)  == A1
-            @test tril(A1,-1) == LowerTriangular(tril(Matrix(A1), -1))
-            @test tril(A1,1)  == t1(tril(tril(Matrix(A1), 1)))
-            @test tril(A1, -n - 2) == zeros(size(A1))
-            @test tril(A1, n) == A1
-            @test triu(A1,0)  == t1(diagm(0 => diag(A1)))
-            @test triu(A1,-1) == t1(tril(triu(A1.data,-1)))
-            @test triu(A1,1)  == zeros(size(A1)) # or just @test iszero(triu(A1,1))?
-            @test triu(A1, -n) == A1
-            @test triu(A1, n + 2) == zeros(size(A1))
-        else
-            @test triu(A1,0)  == A1
-            @test triu(A1,1)  == UpperTriangular(triu(Matrix(A1), 1))
-            @test triu(A1,-1) == t1(triu(triu(Matrix(A1), -1)))
-            @test triu(A1, -n) == A1
-            @test triu(A1, n + 2) == zeros(size(A1))
-            @test tril(A1,0)  == t1(diagm(0 => diag(A1)))
-            @test tril(A1,1)  == t1(triu(tril(A1.data,1)))
-            @test tril(A1,-1) == zeros(size(A1)) # or just @test iszero(tril(A1,-1))?
-            @test tril(A1, -n - 2) == zeros(size(A1))
-            @test tril(A1, n) == A1
-        end
-
-        # factorize
-        @test factorize(A1) == A1
-
-        # [c]transpose[!] (test views as well, see issue #14317)
-        let vrange = 1:n-1, viewA1 = t1(view(A1.data, vrange, vrange))
-            # transpose
-            @test copy(transpose(A1)) == transpose(Matrix(A1))
-            @test copy(transpose(viewA1)) == transpose(Matrix(viewA1))
-            # adjoint
-            @test copy(A1') == Matrix(A1)'
-            @test copy(viewA1') == Matrix(viewA1)'
-            # transpose!
-            @test transpose!(copy(A1)) == transpose(A1)
-            @test typeof(transpose!(copy(A1))).name == typeof(transpose(A1)).name
-            @test transpose!(t1(view(copy(A1).data, vrange, vrange))) == transpose(viewA1)
-            # adjoint!
-            @test adjoint!(copy(A1)) == adjoint(A1)
-            @test typeof(adjoint!(copy(A1))).name == typeof(adjoint(A1)).name
-            @test adjoint!(t1(view(copy(A1).data, vrange, vrange))) == adjoint(viewA1)
-        end
-
-        # diag
-        @test diag(A1) == diag(Matrix(A1))
-
-        # tr
-        @test tr(A1)::elty1 == tr(Matrix(A1))
-
-        # real
-        @test real(A1) == real(Matrix(A1))
-        @test imag(A1) == imag(Matrix(A1))
-        @test abs.(A1) == abs.(Matrix(A1))
-
-        # Unary operations
-        @test -A1 == -Matrix(A1)
-
-        # copy and copyto! (test views as well, see issue #14317)
-        let vrange = 1:n-1, viewA1 = t1(view(A1.data, vrange, vrange))
-            # copy
-            @test copy(A1) == copy(Matrix(A1))
-            @test copy(viewA1) == copy(Matrix(viewA1))
-            # copyto!
-            B = similar(A1)
-            copyto!(B, A1)
-            @test B == A1
-            B = similar(copy(transpose(A1)))
-            copyto!(B, copy(transpose(A1)))
-            @test B == copy(transpose(A1))
-            B = similar(viewA1)
-            copyto!(B, viewA1)
-            @test B == viewA1
-            B = similar(copy(transpose(viewA1)))
-            copyto!(B, copy(transpose(viewA1)))
-            @test B == transpose(viewA1)
-        end
-
-        #exp/log
-        if elty1 ∈ (Float32,Float64,ComplexF32,ComplexF64)
-            @test exp(Matrix(log(A1))) ≈ A1
-        end
-
-        # scale
-        if (t1 == UpperTriangular || t1 == LowerTriangular)
-            unitt = istriu(A1) ? UnitUpperTriangular : UnitLowerTriangular
-            if elty1 == Int
-                cr = 2
-            else
-                cr = 0.5
-            end
-            ci = cr * im
-            if elty1 <: Real
-                A1tmp = copy(A1)
-                rmul!(A1tmp, cr)
-                @test A1tmp == cr*A1
-                A1tmp = copy(A1)
-                lmul!(cr, A1tmp)
-                @test A1tmp == cr*A1
-                A1tmp = copy(A1)
-                A2tmp = unitt(A1)
-                mul!(A1tmp, A2tmp, cr)
-                @test A1tmp == cr * A2tmp
-                A1tmp = copy(A1)
-                A2tmp = unitt(A1)
-                mul!(A1tmp, cr, A2tmp)
-                @test A1tmp == cr * A2tmp
-            else
-                A1tmp = copy(A1)
-                rmul!(A1tmp, ci)
-                @test A1tmp == ci*A1
-                A1tmp = copy(A1)
-                lmul!(ci, A1tmp)
-                @test A1tmp == ci*A1
-                A1tmp = copy(A1)
-                A2tmp = unitt(A1)
-                mul!(A1tmp, ci, A2tmp)
-                @test A1tmp == ci * A2tmp
-                A1tmp = copy(A1)
-                A2tmp = unitt(A1)
-                mul!(A1tmp, A2tmp, ci)
-                @test A1tmp == A2tmp*ci
-            end
-        end
-
-        # generalized dot
-        for eltyb in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-            b1 = convert(Vector{eltyb}, (elty1 <: Complex ? real(A1) : A1)*fill(1., n))
-            b2 = convert(Vector{eltyb}, (elty1 <: Complex ? real(A1) : A1)*randn(n))
-            @test dot(b1, A1, b2) ≈ dot(A1'b1, b2)  atol=sqrt(max(eps(real(float(one(elty1)))),eps(real(float(one(eltyb))))))*n*n
-        end
-
-        # Binary operations
-        @test A1*0.5 == Matrix(A1)*0.5
-        @test 0.5*A1 == 0.5*Matrix(A1)
-        @test A1/0.5 == Matrix(A1)/0.5
-        @test 0.5\A1 == 0.5\Matrix(A1)
-
-        # inversion
-        @test inv(A1) ≈ inv(lu(Matrix(A1)))
-        inv(Matrix(A1)) # issue #11298
-        @test isa(inv(A1), t1)
-        # make sure the call to LAPACK works right
-        if elty1 <: BlasFloat
-            @test LinearAlgebra.inv!(copy(A1)) ≈ inv(lu(Matrix(A1)))
-        end
-
-        # Determinant
-        @test det(A1) ≈ det(lu(Matrix(A1))) atol=sqrt(eps(real(float(one(elty1)))))*n*n
-        @test logdet(A1) ≈ logdet(lu(Matrix(A1))) atol=sqrt(eps(real(float(one(elty1)))))*n*n
-        lada, ladb = logabsdet(A1)
-        flada, fladb = logabsdet(lu(Matrix(A1)))
-        @test lada ≈ flada atol=sqrt(eps(real(float(one(elty1)))))*n*n
-        @test ladb ≈ fladb atol=sqrt(eps(real(float(one(elty1)))))*n*n
-
-        # Matrix square root
-        @test sqrt(A1) |> (t -> (t*t)::typeof(t)) ≈ A1
-
-        # naivesub errors
-        @test_throws DimensionMismatch ldiv!(A1, Vector{elty1}(undef, n+1))
-
-        # eigenproblems
-        if !(elty1 in (BigFloat, Complex{BigFloat})) # Not handled yet
-            vals, vecs = eigen(A1)
-            if (t1 == UpperTriangular || t1 == LowerTriangular) && elty1 != Int # Cannot really handle degenerate eigen space and Int matrices will probably have repeated eigenvalues.
-                @test vecs*diagm(0 => vals)/vecs ≈ A1 atol=sqrt(eps(float(real(one(vals[1])))))*(opnorm(A1,Inf)*n)^2
-            end
-        end
-
-        # Condition number tests - can be VERY approximate
-        if elty1 <:BlasFloat
-            for p in (1.0, Inf)
-                @test cond(A1,p) ≈ cond(A1,p) atol=(cond(A1,p)+cond(A1,p))
-            end
-            @test cond(A1,2) == cond(Matrix(A1),2)
-        end
-
-        if !(elty1 in (BigFloat, Complex{BigFloat})) # Not implemented yet
-            svd(A1)
-            elty1 <: BlasFloat && svd!(copy(A1))
-            svdvals(A1)
-        end
-
-        @test ((A1*A1)::t1) ≈ Matrix(A1) * Matrix(A1)
-        @test ((A1/A1)::t1) ≈ Matrix(A1) / Matrix(A1)
-        @test ((A1\A1)::t1) ≈ Matrix(A1) \ Matrix(A1)
-
-        # Begin loop for second Triangular matrix
-        for elty2 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
-            for (t2, uplo2) in ((UpperTriangular, :U),
-                                (UnitUpperTriangular, :U),
-                                (LowerTriangular, :L),
-                                (UnitLowerTriangular, :L))
-
-                debug && println("elty1: $elty1, A1: $t1, elty2: $elty2")
-
-                A2 = t2(elty2 == Int ? rand(1:7, n, n) : convert(Matrix{elty2}, (elty2 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo2 === :U ? t : copy(t')))
-
-                # Convert
-                if elty1 <: Real && !(elty2 <: Integer)
-                    @test convert(AbstractMatrix{elty2}, A1) == t1(convert(Matrix{elty2}, A1.data))
-                elseif elty2 <: Real && !(elty1 <: Integer)
-                    @test_throws InexactError convert(AbstractMatrix{elty2}, A1) == t1(convert(Matrix{elty2}, A1.data))
-                end
-
-                # Binary operations
-                @test A1 + A2 == Matrix(A1) + Matrix(A2)
-                @test A1 - A2 == Matrix(A1) - Matrix(A2)
-
-                # Triangular-Triangular multiplication and division
-                @test A1*A2 ≈ Matrix(A1)*Matrix(A2)
-                @test transpose(A1)*A2 ≈ transpose(Matrix(A1))*Matrix(A2)
-                @test transpose(A1)*adjoint(A2) ≈ transpose(Matrix(A1))*adjoint(Matrix(A2))
-                @test adjoint(A1)*transpose(A2) ≈ adjoint(Matrix(A1))*transpose(Matrix(A2))
-                @test A1'A2 ≈ Matrix(A1)'Matrix(A2)
-                @test A1*transpose(A2) ≈ Matrix(A1)*transpose(Matrix(A2))
-                @test A1*A2' ≈ Matrix(A1)*Matrix(A2)'
-                @test transpose(A1)*transpose(A2) ≈ transpose(Matrix(A1))*transpose(Matrix(A2))
-                @test A1'A2' ≈ Matrix(A1)'Matrix(A2)'
-                @test A1/A2 ≈ Matrix(A1)/Matrix(A2)
-                @test A1\A2 ≈ Matrix(A1)\Matrix(A2)
-                if uplo1 === :U && uplo2 === :U
-                    if t1 === UnitUpperTriangular && t2 === UnitUpperTriangular
-                        @test A1*A2 isa UnitUpperTriangular
-                        @test A1/A2 isa UnitUpperTriangular
-                        elty1 == Int && elty2 == Int && @test eltype(A1/A2) == Int
-                        @test A1\A2 isa UnitUpperTriangular
-                        elty1 == Int && elty2 == Int && @test eltype(A1\A2) == Int
-                    else
-                        @test A1*A2 isa UpperTriangular
-                        @test A1/A2 isa UpperTriangular
-                        elty1 == Int && elty2 == Int && t2 === UnitUpperTriangular && @test eltype(A1/A2) == Int
-                        @test A1\A2 isa UpperTriangular
-                        elty1 == Int && elty2 == Int && t1 === UnitUpperTriangular && @test eltype(A1\A2) == Int
-                    end
-                elseif uplo1 === :L && uplo2 === :L
-                    if t1 === UnitLowerTriangular && t2 === UnitLowerTriangular
-                        @test A1*A2 isa UnitLowerTriangular
-                        @test A1/A2 isa UnitLowerTriangular
-                        elty1 == Int && elty2 == Int && @test eltype(A1/A2) == Int
-                        @test A1\A2 isa UnitLowerTriangular
-                        elty1 == Int && elty2 == Int && @test eltype(A1\A2) == Int
-                    else
-                        @test A1*A2 isa LowerTriangular
-                        @test A1/A2 isa LowerTriangular
-                        elty1 == Int && elty2 == Int && t2 === UnitLowerTriangular && @test eltype(A1/A2) == Int
-                        @test A1\A2 isa LowerTriangular
-                        elty1 == Int && elty2 == Int && t1 === UnitLowerTriangular && @test eltype(A1\A2) == Int
-                    end
-                end
-                offsizeA = Matrix{Float64}(I, n+1, n+1)
-                @test_throws DimensionMismatch offsizeA / A2
-                @test_throws DimensionMismatch offsizeA / transpose(A2)
-                @test_throws DimensionMismatch offsizeA / A2'
-                @test_throws DimensionMismatch offsizeA * A2
-                @test_throws DimensionMismatch offsizeA * transpose(A2)
-                @test_throws DimensionMismatch offsizeA * A2'
-                @test_throws DimensionMismatch transpose(A2) * offsizeA
-                @test_throws DimensionMismatch A2'  * offsizeA
-                @test_throws DimensionMismatch A2   * offsizeA
-                if (uplo1 == uplo2 && elty1 == elty2 != Int && t1 != UnitLowerTriangular && t1 != UnitUpperTriangular)
-                    @test rdiv!(copy(A1), copy(A2))::t1 ≈ A1/A2 ≈ Matrix(A1)/Matrix(A2)
-                    @test ldiv!(copy(A2), copy(A1))::t1 ≈ A2\A1 ≈ Matrix(A2)\Matrix(A1)
-                end
-                if (uplo1 != uplo2 && elty1 == elty2 != Int && t2 != UnitLowerTriangular && t2 != UnitUpperTriangular)
-                    @test lmul!(adjoint(copy(A1)), copy(A2)) ≈ A1'*A2 ≈ Matrix(A1)'*Matrix(A2)
-                    @test lmul!(transpose(copy(A1)), copy(A2)) ≈ transpose(A1)*A2 ≈ transpose(Matrix(A1))*Matrix(A2)
-                    @test ldiv!(adjoint(copy(A1)), copy(A2)) ≈ A1'\A2 ≈ Matrix(A1)'\Matrix(A2)
-                    @test ldiv!(transpose(copy(A1)), copy(A2)) ≈ transpose(A1)\A2 ≈ transpose(Matrix(A1))\Matrix(A2)
-                end
-                if (uplo1 != uplo2 && elty1 == elty2 != Int && t1 != UnitLowerTriangular && t1 != UnitUpperTriangular)
-                    @test rmul!(copy(A1), adjoint(copy(A2))) ≈ A1*A2' ≈ Matrix(A1)*Matrix(A2)'
-                    @test rmul!(copy(A1), transpose(copy(A2))) ≈ A1*transpose(A2) ≈ Matrix(A1)*transpose(Matrix(A2))
-                    @test rdiv!(copy(A1), adjoint(copy(A2))) ≈ A1/A2' ≈ Matrix(A1)/Matrix(A2)'
-                    @test rdiv!(copy(A1), transpose(copy(A2))) ≈ A1/transpose(A2) ≈ Matrix(A1)/transpose(Matrix(A2))
-                end
-            end
-        end
-
-        for eltyB in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-            B = convert(Matrix{eltyB}, (elty1 <: Complex ? real(A1) : A1)*fill(1., n, n))
-
-            debug && println("elty1: $elty1, A1: $t1, B: $eltyB")
-
-            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-            @test lmul!(Tri,copy(A1)) ≈ Tri*Matrix(A1)
-            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-            C = Matrix{promote_type(elty1,eltyB)}(undef, n, n)
-            mul!(C, Tri, copy(A1))
-            @test C ≈ Tri*Matrix(A1)
-            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-            mul!(C, copy(A1), Tri)
-            @test C ≈ Matrix(A1)*Tri
-
-            # Triangular-dense Matrix/vector multiplication
-            @test A1*B[:,1] ≈ Matrix(A1)*B[:,1]
-            @test A1*B ≈ Matrix(A1)*B
-            @test transpose(A1)*B[:,1] ≈ transpose(Matrix(A1))*B[:,1]
-            @test A1'B[:,1] ≈ Matrix(A1)'B[:,1]
-            @test transpose(A1)*B ≈ transpose(Matrix(A1))*B
-            @test A1'B ≈ Matrix(A1)'B
-            @test A1*transpose(B) ≈ Matrix(A1)*transpose(B)
-            @test adjoint(A1)*transpose(B) ≈ Matrix(A1)'*transpose(B)
-            @test transpose(A1)*adjoint(B) ≈ transpose(Matrix(A1))*adjoint(B)
-            @test A1*B' ≈ Matrix(A1)*B'
-            @test B*A1 ≈ B*Matrix(A1)
-            @test transpose(B[:,1])*A1 ≈ transpose(B[:,1])*Matrix(A1)
-            @test B[:,1]'A1 ≈ B[:,1]'Matrix(A1)
-            @test transpose(B)*A1 ≈ transpose(B)*Matrix(A1)
-            @test transpose(B)*adjoint(A1) ≈ transpose(B)*Matrix(A1)'
-            @test adjoint(B)*transpose(A1) ≈ adjoint(B)*transpose(Matrix(A1))
-            @test B'A1 ≈ B'Matrix(A1)
-            @test B*transpose(A1) ≈ B*transpose(Matrix(A1))
-            @test B*A1' ≈ B*Matrix(A1)'
-            @test transpose(B[:,1])*transpose(A1) ≈ transpose(B[:,1])*transpose(Matrix(A1))
-            @test B[:,1]'A1' ≈ B[:,1]'Matrix(A1)'
-            @test transpose(B)*transpose(A1) ≈ transpose(B)*transpose(Matrix(A1))
-            @test B'A1' ≈ B'Matrix(A1)'
-
-            if eltyB == elty1
-                @test mul!(similar(B), A1, B) ≈ Matrix(A1)*B
-                @test mul!(similar(B), A1, adjoint(B)) ≈ Matrix(A1)*B'
-                @test mul!(similar(B), A1, transpose(B)) ≈ Matrix(A1)*transpose(B)
-                @test mul!(similar(B), adjoint(A1), adjoint(B)) ≈ Matrix(A1)'*B'
-                @test mul!(similar(B), transpose(A1), transpose(B)) ≈ transpose(Matrix(A1))*transpose(B)
-                @test mul!(similar(B), transpose(A1), adjoint(B)) ≈ transpose(Matrix(A1))*B'
-                @test mul!(similar(B), adjoint(A1), transpose(B)) ≈ Matrix(A1)'*transpose(B)
-                @test mul!(similar(B), adjoint(A1), B) ≈ Matrix(A1)'*B
-                @test mul!(similar(B), transpose(A1), B) ≈ transpose(Matrix(A1))*B
-                # test also vector methods
-                B1 = vec(B[1,:])
-                @test mul!(similar(B1), A1, B1)  ≈ Matrix(A1)*B1
-                @test mul!(similar(B1), adjoint(A1), B1) ≈ Matrix(A1)'*B1
-                @test mul!(similar(B1), transpose(A1), B1) ≈ transpose(Matrix(A1))*B1
-            end
-            #error handling
-            Ann, Bmm, bm = A1, Matrix{eltyB}(undef, n+1, n+1), Vector{eltyB}(undef, n+1)
-            @test_throws DimensionMismatch lmul!(Ann, bm)
-            @test_throws DimensionMismatch rmul!(Bmm, Ann)
-            @test_throws DimensionMismatch lmul!(transpose(Ann), bm)
-            @test_throws DimensionMismatch lmul!(adjoint(Ann), bm)
-            @test_throws DimensionMismatch rmul!(Bmm, adjoint(Ann))
-            @test_throws DimensionMismatch rmul!(Bmm, transpose(Ann))
-
-            # ... and division
-            @test A1\B[:,1] ≈ Matrix(A1)\B[:,1]
-            @test A1\B ≈ Matrix(A1)\B
-            @test transpose(A1)\B[:,1] ≈ transpose(Matrix(A1))\B[:,1]
-            @test A1'\B[:,1] ≈ Matrix(A1)'\B[:,1]
-            @test transpose(A1)\B ≈ transpose(Matrix(A1))\B
-            @test A1'\B ≈ Matrix(A1)'\B
-            @test A1\transpose(B) ≈ Matrix(A1)\transpose(B)
-            @test A1\B' ≈ Matrix(A1)\B'
-            @test transpose(A1)\transpose(B) ≈ transpose(Matrix(A1))\transpose(B)
-            @test A1'\B' ≈ Matrix(A1)'\B'
-            Ann, bm = A1, Vector{elty1}(undef,n+1)
-            @test_throws DimensionMismatch Ann\bm
-            @test_throws DimensionMismatch Ann'\bm
-            @test_throws DimensionMismatch transpose(Ann)\bm
-            if t1 == UpperTriangular || t1 == LowerTriangular
-                if elty1 === eltyB <: BlasFloat
-                    @test_throws LAPACKException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n))
-                else
-                    @test_throws SingularException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n))
-                end
-            end
-            @test B/A1 ≈ B/Matrix(A1)
-            @test B/transpose(A1) ≈ B/transpose(Matrix(A1))
-            @test B/A1' ≈ B/Matrix(A1)'
-            @test transpose(B)/A1 ≈ transpose(B)/Matrix(A1)
-            @test B'/A1 ≈ B'/Matrix(A1)
-            @test transpose(B)/transpose(A1) ≈ transpose(B)/transpose(Matrix(A1))
-            @test B'/A1' ≈ B'/Matrix(A1)'
-
-            # Error bounds
-            !(elty1 in (BigFloat, Complex{BigFloat})) && !(eltyB in (BigFloat, Complex{BigFloat})) && errorbounds(A1, A1\B, B)
-
-        end
-    end
-end
-
-# Matrix square root
-Atn = UpperTriangular([-1 1 2; 0 -2 2; 0 0 -3])
-Atp = UpperTriangular([1 1 2; 0 2 2; 0 0 3])
-Atu = UnitUpperTriangular([1 1 2; 0 1 2; 0 0 1])
-@test sqrt(Atn) |> t->t*t ≈ Atn
-@test sqrt(Atn) isa UpperTriangular
-@test typeof(sqrt(Atn)[1,1]) <: Complex
-@test sqrt(Atp) |> t->t*t ≈ Atp
-@test sqrt(Atp) isa UpperTriangular
-@test typeof(sqrt(Atp)[1,1]) <: Real
-@test typeof(sqrt(complex(Atp))[1,1]) <: Complex
-@test sqrt(Atu) |> t->t*t ≈ Atu
-@test sqrt(Atu) isa UnitUpperTriangular
-@test typeof(sqrt(Atu)[1,1]) <: Real
-@test typeof(sqrt(complex(Atu))[1,1]) <: Complex
-
-@testset "matrix square root quasi-triangular blockwise" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        A = schur(rand(T, 100, 100)^2).T
-        @test LinearAlgebra.sqrt_quasitriu(A; blockwidth=16)^2 ≈ A
-    end
-    n = 256
-    A = rand(ComplexF64, n, n)
-    U = schur(A).T
-    Ubig = Complex{BigFloat}.(U)
-    @test LinearAlgebra.sqrt_quasitriu(U; blockwidth=64) ≈ LinearAlgebra.sqrt_quasitriu(Ubig; blockwidth=64)
-end
-
-@testset "sylvester quasi-triangular blockwise" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64), m in (15, 40), n in (15, 45)
-        A = schur(rand(T, m, m)).T
-        B = schur(rand(T, n, n)).T
-        C = randn(T, m, n)
-        Ccopy = copy(C)
-        X = LinearAlgebra._sylvester_quasitriu!(A, B, C; blockwidth=16)
-        @test X === C
-        @test A * X + X * B ≈ -Ccopy
-
-        @testset "test raise=false does not break recursion" begin
-            Az = zero(A)
-            Bz = zero(B)
-            C2 = copy(Ccopy)
-            @test_throws LAPACKException LinearAlgebra._sylvester_quasitriu!(Az, Bz, C2; blockwidth=16)
-            m == n || @test any(C2 .== Ccopy)  # recursion broken
-            C3 = copy(Ccopy)
-            X3 = LinearAlgebra._sylvester_quasitriu!(Az, Bz, C3; blockwidth=16, raise=false)
-            @test !any(X3 .== Ccopy)  # recursion not broken
-        end
-    end
-end
-
-@testset "check matrix logarithm type-inferrable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
-    A = UpperTriangular(exp(triu(randn(elty, n, n))))
-    @inferred Union{typeof(A),typeof(complex(A))} log(A)
-    @test exp(Matrix(log(A))) ≈ A
-    if elty <: Real
-        @test typeof(log(A)) <: UpperTriangular{elty}
-        @test typeof(log(complex(A))) <: UpperTriangular{complex(elty)}
-        @test isreal(log(complex(A)))
-        @test log(complex(A)) ≈ log(A)
-    end
-
-    Au = UnitUpperTriangular(exp(triu(randn(elty, n, n), 1)))
-    @inferred Union{typeof(A),typeof(complex(A))} log(Au)
-    @test exp(Matrix(log(Au))) ≈ Au
-    if elty <: Real
-        @test typeof(log(Au)) <: UpperTriangular{elty}
-        @test typeof(log(complex(Au))) <: UpperTriangular{complex(elty)}
-        @test isreal(log(complex(Au)))
-        @test log(complex(Au)) ≈ log(Au)
-    end
-end
-
-Areal   = randn(n, n)/2
-Aimg    = randn(n, n)/2
-A2real  = randn(n, n)/2
-A2img   = randn(n, n)/2
-
-for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-    A = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(Areal, Aimg) : Areal)
-    # a2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-    εa = eps(abs(float(one(eltya))))
-
-    for eltyb in (Float32, Float64, ComplexF32, ComplexF64)
-        εb = eps(abs(float(one(eltyb))))
-        ε = max(εa,εb)
-
-        debug && println("\ntype of A: ", eltya, " type of b: ", eltyb, "\n")
-
-        debug && println("Solve upper triangular system")
-        Atri = UpperTriangular(lu(A).U) |> t -> eltya <: Complex && eltyb <: Real ? real(t) : t # Here the triangular matrix can't be too badly conditioned
-        b = convert(Matrix{eltyb}, Matrix(Atri)*fill(1., n, 2))
-        x = Matrix(Atri) \ b
-
-        debug && println("Test error estimates")
-        if eltya != BigFloat && eltyb != BigFloat
-            for i = 1:2
-                @test  norm(x[:,1] .- 1) <= errorbounds(UpperTriangular(A), x, b)[1][i]
-            end
-        end
-        debug && println("Test forward error [JIN 5705] if this is not a BigFloat")
-
-        x = Atri \ b
-        γ = n*ε/(1 - n*ε)
-        if eltya != BigFloat
-            bigA = big.(Atri)
-            x̂ = fill(1., n, 2)
-            for i = 1:size(b, 2)
-                @test norm(x̂[:,i] - x[:,i], Inf)/norm(x̂[:,i], Inf) <= condskeel(bigA, x̂[:,i])*γ/(1 - condskeel(bigA)*γ)
-            end
-        end
-
-        debug && println("Test backward error [JIN 5705]")
-        for i = 1:size(b, 2)
-            @test norm(abs.(b[:,i] - Atri*x[:,i]), Inf) <= γ * norm(Atri, Inf) * norm(x[:,i], Inf)
-        end
-
-        debug && println("Solve lower triangular system")
-        Atri = UpperTriangular(lu(A).U) |> t -> eltya <: Complex && eltyb <: Real ? real(t) : t # Here the triangular matrix can't be too badly conditioned
-        b = convert(Matrix{eltyb}, Matrix(Atri)*fill(1., n, 2))
-        x = Matrix(Atri)\b
-
-        debug && println("Test error estimates")
-        if eltya != BigFloat && eltyb != BigFloat
-            for i = 1:2
-                @test  norm(x[:,1] .- 1) <= errorbounds(UpperTriangular(A), x, b)[1][i]
-            end
-        end
-
-        debug && println("Test forward error [JIN 5705] if this is not a BigFloat")
-        b = (b0 = Atri*fill(1, n, 2); convert(Matrix{eltyb}, eltyb == Int ? trunc.(b0) : b0))
-        x = Atri \ b
-        γ = n*ε/(1 - n*ε)
-        if eltya != BigFloat
-            bigA = big.(Atri)
-            x̂ = fill(1., n, 2)
-            for i = 1:size(b, 2)
-                @test norm(x̂[:,i] - x[:,i], Inf)/norm(x̂[:,i], Inf) <= condskeel(bigA, x̂[:,i])*γ/(1 - condskeel(bigA)*γ)
-            end
-        end
-
-        debug && println("Test backward error [JIN 5705]")
-        for i = 1:size(b, 2)
-            @test norm(abs.(b[:,i] - Atri*x[:,i]), Inf) <= γ * norm(Atri, Inf) * norm(x[:,i], Inf)
-        end
-    end
-end
-
-# Issue 10742 and similar
-@test istril(UpperTriangular(diagm(0 => [1,2,3,4])))
-@test istriu(LowerTriangular(diagm(0 => [1,2,3,4])))
-@test isdiag(UpperTriangular(diagm(0 => [1,2,3,4])))
-@test isdiag(LowerTriangular(diagm(0 => [1,2,3,4])))
-@test !isdiag(UpperTriangular(rand(4, 4)))
-@test !isdiag(LowerTriangular(rand(4, 4)))
-
-# Test throwing in fallbacks for non BlasFloat/BlasComplex in A_rdiv_Bx!
-let n = 5
-    A = rand(Float16, n, n)
-    B = rand(Float16, n-1, n-1)
-    @test_throws DimensionMismatch rdiv!(A, LowerTriangular(B))
-    @test_throws DimensionMismatch rdiv!(A, UpperTriangular(B))
-    @test_throws DimensionMismatch rdiv!(A, UnitLowerTriangular(B))
-    @test_throws DimensionMismatch rdiv!(A, UnitUpperTriangular(B))
-
-    @test_throws DimensionMismatch rdiv!(A, adjoint(LowerTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, adjoint(UpperTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, adjoint(UnitLowerTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, adjoint(UnitUpperTriangular(B)))
-
-    @test_throws DimensionMismatch rdiv!(A, transpose(LowerTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, transpose(UpperTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, transpose(UnitLowerTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, transpose(UnitUpperTriangular(B)))
-end
-
-@test isdiag(LowerTriangular(UpperTriangular(randn(3,3))))
-@test isdiag(UpperTriangular(LowerTriangular(randn(3,3))))
-
-# Issue 16196
-@test UpperTriangular(Matrix(1.0I, 3, 3)) \ view(fill(1., 3), [1,2,3]) == fill(1., 3)
-
-# dimensional correctness:
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-LinearAlgebra.sylvester(a::Furlong,b::Furlong,c::Furlong) = -c / (a + b)
-
-@testset "dimensional correctness" begin
-    A = UpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
-    @test sqrt(A)::UpperTriangular == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
-    @test inv(A)::UpperTriangular == Furlong{-1}.(UpperTriangular([1 -4; 0 1]))
-    B = UnitUpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
-    @test sqrt(B)::UnitUpperTriangular == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
-    @test inv(B)::UnitUpperTriangular == Furlong{-1}.(UpperTriangular([1 -4; 0 1]))
-    b = [Furlong(5), Furlong(8)]
-    @test (A \ b)::Vector{<:Furlong{0}} == (B \ b)::Vector{<:Furlong{0}} == Furlong{0}.([-27, 8])
-    C = LowerTriangular([Furlong(1) Furlong(0); Furlong(4) Furlong(1)])
-    @test sqrt(C)::LowerTriangular == Furlong{1//2}.(LowerTriangular([1 0; 2 1]))
-    @test inv(C)::LowerTriangular == Furlong{-1}.(LowerTriangular([1 0; -4 1]))
-    D = UnitLowerTriangular([Furlong(1) Furlong(0); Furlong(4) Furlong(1)])
-    @test sqrt(D)::UnitLowerTriangular == Furlong{1//2}.(UnitLowerTriangular([1 0; 2 1]))
-    @test inv(D)::UnitLowerTriangular == Furlong{-1}.(UnitLowerTriangular([1 0; -4 1]))
-    b = [Furlong(5), Furlong(8)]
-    @test (C \ b)::Vector{<:Furlong{0}} == (D \ b)::Vector{<:Furlong{0}} == Furlong{0}.([5, -12])
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "AbstractArray constructor should preserve underlying storage type" begin
-    # tests corresponding to #34995
-    local m = 4
-    local T, S = Float32, Float64
-    immutablemat = ImmutableArray(randn(T,m,m))
-    for TriType in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
-        trimat = TriType(immutablemat)
-        @test convert(AbstractArray{S}, trimat).data isa ImmutableArray{S}
-        @test convert(AbstractMatrix{S}, trimat).data isa ImmutableArray{S}
-        @test AbstractArray{S}(trimat).data isa ImmutableArray{S}
-        @test AbstractMatrix{S}(trimat).data isa ImmutableArray{S}
-        @test convert(AbstractArray{S}, trimat) == trimat
-        @test convert(AbstractMatrix{S}, trimat) == trimat
-    end
-end
-
-@testset "inplace mul of appropriate types should preserve triagular structure" begin
-    for elty1 in (Float64, ComplexF32), elty2 in (Float64, ComplexF32)
-        T = promote_type(elty1, elty2)
-        M1 = rand(elty1, 5, 5)
-        M2 = rand(elty2, 5, 5)
-        A = UpperTriangular(M1)
-        A2 = UpperTriangular(M2)
-        Au = UnitUpperTriangular(M1)
-        Au2 = UnitUpperTriangular(M2)
-        B = LowerTriangular(M1)
-        B2 = LowerTriangular(M2)
-        Bu = UnitLowerTriangular(M1)
-        Bu2 = UnitLowerTriangular(M2)
-
-        @test mul!(similar(A), A, A)::typeof(A) == A*A
-        @test mul!(similar(A, T), A, A2) ≈ A*A2
-        @test mul!(similar(A, T), A2, A) ≈ A2*A
-        @test mul!(typeof(similar(A, T))(A), A, A2, 2.0, 3.0) ≈ 2.0*A*A2 + 3.0*A
-        @test mul!(typeof(similar(A2, T))(A2), A2, A, 2.0, 3.0) ≈ 2.0*A2*A + 3.0*A2
-
-        @test mul!(similar(A), A, Au)::typeof(A) == A*Au
-        @test mul!(similar(A), Au, A)::typeof(A) == Au*A
-        @test mul!(similar(Au), Au, Au)::typeof(Au) == Au*Au
-        @test mul!(similar(A, T), A, Au2) ≈ A*Au2
-        @test mul!(similar(A, T), Au2, A) ≈ Au2*A
-        @test mul!(similar(Au2), Au2, Au2) == Au2*Au2
-
-        @test mul!(similar(B), B, B)::typeof(B) == B*B
-        @test mul!(similar(B, T), B, B2) ≈ B*B2
-        @test mul!(similar(B, T), B2, B) ≈ B2*B
-        @test mul!(typeof(similar(B, T))(B), B, B2, 2.0, 3.0) ≈ 2.0*B*B2 + 3.0*B
-        @test mul!(typeof(similar(B2, T))(B2), B2, B, 2.0, 3.0) ≈ 2.0*B2*B + 3.0*B2
-
-        @test mul!(similar(B), B, Bu)::typeof(B) == B*Bu
-        @test mul!(similar(B), Bu, B)::typeof(B) == Bu*B
-        @test mul!(similar(Bu), Bu, Bu)::typeof(Bu) == Bu*Bu
-        @test mul!(similar(B, T), B, Bu2) ≈ B*Bu2
-        @test mul!(similar(B, T), Bu2, B) ≈ Bu2*B
-    end
-end
-
-@testset "special printing of Lower/UpperTriangular" begin
-    @test occursin(r"3×3 (LinearAlgebra\.)?LowerTriangular{Int64, Matrix{Int64}}:\n 2  ⋅  ⋅\n 2  2  ⋅\n 2  2  2",
-                   sprint(show, MIME"text/plain"(), LowerTriangular(2ones(Int64,3,3))))
-    @test occursin(r"3×3 (LinearAlgebra\.)?UnitLowerTriangular{Int64, Matrix{Int64}}:\n 1  ⋅  ⋅\n 2  1  ⋅\n 2  2  1",
-                   sprint(show, MIME"text/plain"(), UnitLowerTriangular(2ones(Int64,3,3))))
-    @test occursin(r"3×3 (LinearAlgebra\.)?UpperTriangular{Int64, Matrix{Int64}}:\n 2  2  2\n ⋅  2  2\n ⋅  ⋅  2",
-                   sprint(show, MIME"text/plain"(), UpperTriangular(2ones(Int64,3,3))))
-    @test occursin(r"3×3 (LinearAlgebra\.)?UnitUpperTriangular{Int64, Matrix{Int64}}:\n 1  2  2\n ⋅  1  2\n ⋅  ⋅  1",
-                   sprint(show, MIME"text/plain"(), UnitUpperTriangular(2ones(Int64,3,3))))
-end
-
-@testset "adjoint/transpose triangular/vector multiplication" begin
-    for elty in (Float64, ComplexF64), trity in (UpperTriangular, LowerTriangular)
-        A1 = trity(rand(elty, 1, 1))
-        b1 = rand(elty, 1)
-        A4 = trity(rand(elty, 4, 4))
-        b4 = rand(elty, 4)
-        @test A1 * b1' ≈ Matrix(A1) * b1'
-        @test_throws DimensionMismatch A4 * b4'
-        @test A1 * transpose(b1) ≈ Matrix(A1) * transpose(b1)
-        @test_throws DimensionMismatch A4 * transpose(b4)
-        @test A1' * b1' ≈ Matrix(A1') * b1'
-        @test_throws DimensionMismatch A4' * b4'
-        @test A1' * transpose(b1) ≈  Matrix(A1') * transpose(b1)
-        @test_throws DimensionMismatch A4' * transpose(b4)
-        @test transpose(A1) * transpose(b1) ≈  Matrix(transpose(A1)) * transpose(b1)
-        @test_throws DimensionMismatch transpose(A4) * transpose(b4)
-        @test transpose(A1) * b1' ≈ Matrix(transpose(A1)) * b1'
-        @test_throws DimensionMismatch transpose(A4) * b4'
-        @test b1' * transpose(A1) ≈ b1' * Matrix(transpose(A1))
-        @test b4' * transpose(A4) ≈ b4' * Matrix(transpose(A4))
-        @test transpose(b1) * A1' ≈ transpose(b1) * Matrix(A1')
-        @test transpose(b4) * A4' ≈ transpose(b4) * Matrix(A4')
-    end
-end
-
-@testset "Error condition for powm" begin
-    A = UpperTriangular(rand(ComplexF64, 10, 10))
-    @test_throws ArgumentError LinearAlgebra.powm!(A, 2.2)
-    A = LowerTriangular(rand(ComplexF64, 10, 10))
-    At = copy(transpose(A))
-    p = rand()
-    @test LinearAlgebra.powm(A, p) == transpose(LinearAlgebra.powm!(At, p))
-    @test_throws ArgumentError LinearAlgebra.powm(A, 2.2)
-end
-
-# Issue 35058
-let A = [0.9999999999999998 4.649058915617843e-16 -1.3149405273715513e-16 9.9959579317056e-17; -8.326672684688674e-16 1.0000000000000004 2.9280733590254494e-16 -2.9993900031619594e-16; 9.43689570931383e-16 -1.339206523454095e-15 1.0000000000000007 -8.550505126287743e-16; -6.245004513516506e-16 -2.0122792321330962e-16 1.183061278035052e-16 1.0000000000000002],
-    B = [0.09648289218436859 0.023497875751503007 0.0 0.0; 0.023497875751503007 0.045787575150300804 0.0 0.0; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]
-    @test sqrt(A*B*A')^2 ≈ A*B*A'
-end
-
-@testset "one and oneunit for triangular" begin
-    m = rand(4,4)
-    function test_one_oneunit_triangular(a)
-        b = Matrix(a)
-        @test (@inferred a^1) == b^1
-        @test (@inferred a^-1) == b^-1
-        @test one(a) == one(b)
-        @test one(a)*a == a
-        @test a*one(a) == a
-        @test oneunit(a) == oneunit(b)
-        @test oneunit(a) isa typeof(a)
-    end
-    for T in [UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular]
-        a = T(m)
-        test_one_oneunit_triangular(a)
-    end
-    # more complicated examples
-    b = UpperTriangular(LowerTriangular(m))
-    test_one_oneunit_triangular(b)
-    c = UpperTriangular(Diagonal(rand(2)))
-    test_one_oneunit_triangular(c)
-end
-
-@testset "LowerTriangular(Diagonal(...)) and friends (issue #28869)" begin
-    for elty in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
-        V = elty ≡ Int ? rand(1:10, 5) : elty.(randn(5))
-        D = Diagonal(V)
-        for dty in (UpperTriangular, LowerTriangular)
-            A = dty(D)
-            @test A * A' == D * D'
-        end
-    end
-end
-
-end # module TestTriangular
diff --git a/stdlib/LinearAlgebra/test/trickyarithmetic.jl b/stdlib/LinearAlgebra/test/trickyarithmetic.jl
deleted file mode 100644
index ad04ac89c2761..0000000000000
--- a/stdlib/LinearAlgebra/test/trickyarithmetic.jl
+++ /dev/null
@@ -1,66 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TrickyArithmetic
-    struct A
-        x::Int
-    end
-    A(a::A) = a
-    Base.convert(::Type{A}, i::Int) = A(i)
-    Base.zero(::Union{A, Type{A}}) = A(0)
-    Base.one(::Union{A, Type{A}}) = A(1)
-    Base.isfinite(a::A) = isfinite(a.x)
-    struct B
-        x::Int
-    end
-    struct C
-        x::Int
-    end
-    Base.isfinite(b::B) = isfinite(b.x)
-    Base.isfinite(c::C) = isfinite(c.x)
-    C(a::A) = C(a.x)
-    Base.zero(::Union{C, Type{C}}) = C(0)
-    Base.one(::Union{C, Type{C}}) = C(1)
-
-    Base.:(*)(x::Int, a::A) = B(x*a.x)
-    Base.:(*)(a::A, x::Int) = B(a.x*x)
-    Base.:(*)(a::Union{A,B}, b::Union{A,B}) = B(a.x*b.x)
-    Base.:(*)(a::Union{A,B,C}, b::Union{A,B,C}) = C(a.x*b.x)
-    Base.:(+)(a::Union{A,B,C}, b::Union{A,B,C}) = C(a.x+b.x)
-    Base.:(-)(a::Union{A,B,C}, b::Union{A,B,C}) = C(a.x-b.x)
-
-    struct D{NT, DT}
-        n::NT
-        d::DT
-    end
-    D{NT, DT}(d::D{NT, DT}) where {NT, DT} = d # called by oneunit
-    Base.zero(::Union{D{NT, DT}, Type{D{NT, DT}}}) where {NT, DT} = zero(NT) / one(DT)
-    Base.one(::Union{D{NT, DT}, Type{D{NT, DT}}}) where {NT, DT} = one(NT) / one(DT)
-    Base.convert(::Type{D{NT, DT}}, a::Union{A, B, C}) where {NT, DT} = NT(a) / one(DT)
-    #Base.convert(::Type{D{NT, DT}}, a::D) where {NT, DT} = NT(a.n) / DT(a.d)
-
-    Base.:(*)(a::D, b::D) = (a.n*b.n) / (a.d*b.d)
-    Base.:(*)(a::D, b::Union{A,B,C}) = (a.n * b) / a.d
-    Base.:(*)(a::Union{A,B,C}, b::D) = b * a
-    Base.inv(a::Union{A,B,C}) = A(1) / a
-    Base.inv(a::D) = a.d / a.n
-    Base.isfinite(a::D) = isfinite(a.n) && isfinite(a.d)
-    Base.:(/)(a::Union{A,B,C}, b::Union{A,B,C}) = D(a, b)
-    Base.:(/)(a::D, b::Union{A,B,C}) = a.n / (a.d*b)
-    Base.:(/)(a::Union{A,B,C,D}, b::D) = a * inv(b)
-    Base.:(+)(a::Union{A,B,C}, b::D) = (a*b.d+b.n) / b.d
-    Base.:(+)(a::D, b::Union{A,B,C}) = b + a
-    Base.:(+)(a::D, b::D) = (a.n*b.d+a.d*b.n) / (a.d*b.d)
-    Base.:(-)(a::Union{A,B,C}) = typeof(a)(a.x)
-    Base.:(-)(a::D) = (-a.n) / a.d
-    Base.:(-)(a::Union{A,B,C,D}, b::Union{A,B,C,D}) = a + (-b)
-
-    Base.promote_rule(::Type{A}, ::Type{B}) = B
-    Base.promote_rule(::Type{B}, ::Type{A}) = B
-    Base.promote_rule(::Type{A}, ::Type{C}) = C
-    Base.promote_rule(::Type{C}, ::Type{A}) = C
-    Base.promote_rule(::Type{B}, ::Type{C}) = C
-    Base.promote_rule(::Type{C}, ::Type{B}) = C
-    Base.promote_rule(::Type{D{NT,DT}}, T::Type{<:Union{A,B,C}}) where {NT,DT} = D{promote_type(NT,T),DT}
-    Base.promote_rule(T::Type{<:Union{A,B,C}}, ::Type{D{NT,DT}}) where {NT,DT} = D{promote_type(NT,T),DT}
-    Base.promote_rule(::Type{D{NS,DS}}, ::Type{D{NT,DT}}) where {NS,DS,NT,DT} = D{promote_type(NS,NT),promote_type(DS,DT)}
-end
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
deleted file mode 100644
index d4b2dd5e3f269..0000000000000
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ /dev/null
@@ -1,802 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestTridiagonal
-
-using Test, LinearAlgebra, Random
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-
-isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
-using .Main.Quaternions
-
-isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
-using .Main.InfiniteArrays
-
-isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
-using .Main.FillArrays
-
-include("testutils.jl") # test_approx_eq_modphase
-
-#Test equivalence of eigenvectors/singular vectors taking into account possible phase (sign) differences
-function test_approx_eq_vecs(a::StridedVecOrMat{S}, b::StridedVecOrMat{T}, error=nothing) where {S<:Real,T<:Real}
-    n = size(a, 1)
-    @test n==size(b,1) && size(a,2)==size(b,2)
-    error===nothing && (error=n^3*(eps(S)+eps(T)))
-    for i=1:n
-        ev1, ev2 = a[:,i], b[:,i]
-        deviation = min(abs(norm(ev1-ev2)),abs(norm(ev1+ev2)))
-        if !isnan(deviation)
-            @test deviation ≈ 0.0 atol=error
-        end
-    end
-end
-
-@testset for elty in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    n = 12 #Size of matrix problem to test
-    Random.seed!(123)
-    if elty == Int
-        Random.seed!(61516384)
-        d = rand(1:100, n)
-        dl = -rand(0:10, n-1)
-        du = -rand(0:10, n-1)
-        v = rand(1:100, n)
-        B = rand(1:100, n, 2)
-        a = rand(1:100, n-1)
-        b = rand(1:100, n)
-        c = rand(1:100, n-1)
-    else
-        d = convert(Vector{elty}, 1 .+ randn(n))
-        dl = convert(Vector{elty}, randn(n - 1))
-        du = convert(Vector{elty}, randn(n - 1))
-        v = convert(Vector{elty}, randn(n))
-        B = convert(Matrix{elty}, randn(n, 2))
-        a = convert(Vector{elty}, randn(n - 1))
-        b = convert(Vector{elty}, randn(n))
-        c = convert(Vector{elty}, randn(n - 1))
-        if elty <: Complex
-            a += im*convert(Vector{elty}, randn(n - 1))
-            b += im*convert(Vector{elty}, randn(n))
-            c += im*convert(Vector{elty}, randn(n - 1))
-        end
-    end
-    @test_throws DimensionMismatch SymTridiagonal(dl, fill(elty(1), n+1))
-    @test_throws ArgumentError SymTridiagonal(rand(n, n))
-    @test_throws ArgumentError Tridiagonal(dl, dl, dl)
-    @test_throws ArgumentError convert(SymTridiagonal{elty}, Tridiagonal(dl, d, du))
-
-    if elty != Int
-        @testset "issue #1490" begin
-            @test det(fill(elty(1),3,3)) ≈ zero(elty) atol=3*eps(real(one(elty)))
-            @test det(SymTridiagonal(elty[],elty[])) == one(elty)
-        end
-    end
-
-    @testset "constructor" begin
-        for (x, y) in ((d, dl), (GenericArray(d), GenericArray(dl)))
-            ST = (SymTridiagonal(x, y))::SymTridiagonal{elty, typeof(x)}
-            @test ST == Matrix(ST)
-            @test ST.dv === x
-            @test ST.ev === y
-            @test typeof(ST)(ST) === ST
-            TT = (Tridiagonal(y, x, y))::Tridiagonal{elty, typeof(x)}
-            @test TT == Matrix(TT)
-            @test TT.dl === y
-            @test TT.d  === x
-            @test TT.du === y
-            @test typeof(TT)(TT) === TT
-        end
-        ST = SymTridiagonal{elty}([1,2,3,4], [1,2,3])
-        @test eltype(ST) == elty
-        @test SymTridiagonal{elty, Vector{elty}}(ST) === ST
-        @test SymTridiagonal{Int64, Vector{Int64}}(ST) isa SymTridiagonal{Int64, Vector{Int64}}
-        TT = Tridiagonal{elty}([1,2,3], [1,2,3,4], [1,2,3])
-        @test eltype(TT) == elty
-        ST = SymTridiagonal{elty,Vector{elty}}(d, GenericArray(dl))
-        @test isa(ST, SymTridiagonal{elty,Vector{elty}})
-        TT = Tridiagonal{elty,Vector{elty}}(GenericArray(dl), d, GenericArray(dl))
-        @test isa(TT, Tridiagonal{elty,Vector{elty}})
-        @test_throws MethodError SymTridiagonal(d, GenericArray(dl))
-        @test_throws MethodError SymTridiagonal(GenericArray(d), dl)
-        @test_throws MethodError Tridiagonal(GenericArray(dl), d, GenericArray(dl))
-        @test_throws MethodError Tridiagonal(dl, GenericArray(d), dl)
-        @test_throws MethodError SymTridiagonal{elty}(d, GenericArray(dl))
-        @test_throws MethodError Tridiagonal{elty}(GenericArray(dl), d,GenericArray(dl))
-        STI = SymTridiagonal([1,2,3,4], [1,2,3])
-        TTI = Tridiagonal([1,2,3], [1,2,3,4], [1,2,3])
-        TTI2 = Tridiagonal([1,2,3], [1,2,3,4], [1,2,3], [1,2])
-        @test SymTridiagonal(STI) === STI
-        @test Tridiagonal(TTI)    === TTI
-        @test Tridiagonal(TTI2)   === TTI2
-        @test isa(SymTridiagonal{elty}(STI), SymTridiagonal{elty})
-        @test isa(Tridiagonal{elty}(TTI), Tridiagonal{elty})
-        TTI2y = Tridiagonal{elty}(TTI2)
-        @test isa(TTI2y, Tridiagonal{elty})
-        @test TTI2y.du2 == convert(Vector{elty}, [1,2])
-    end
-    @testset "interconversion of Tridiagonal and SymTridiagonal" begin
-        @test Tridiagonal(dl, d, dl) == SymTridiagonal(d, dl)
-        @test SymTridiagonal(d, dl) == Tridiagonal(dl, d, dl)
-        @test Tridiagonal(dl, d, du) + Tridiagonal(du, d, dl) == SymTridiagonal(2d, dl+du)
-        @test SymTridiagonal(d, dl) + Tridiagonal(dl, d, du) == Tridiagonal(dl + dl, d+d, dl+du)
-        @test convert(SymTridiagonal,Tridiagonal(SymTridiagonal(d, dl))) == SymTridiagonal(d, dl)
-        @test Array(convert(SymTridiagonal{ComplexF32},Tridiagonal(SymTridiagonal(d, dl)))) == convert(Matrix{ComplexF32}, SymTridiagonal(d, dl))
-    end
-    @testset "tril/triu" begin
-        zerosd = fill!(similar(d), 0)
-        zerosdl = fill!(similar(dl), 0)
-        zerosdu = fill!(similar(du), 0)
-        @test_throws ArgumentError tril!(SymTridiagonal(d, dl), -n - 2)
-        @test_throws ArgumentError tril!(SymTridiagonal(d, dl), n)
-        @test_throws ArgumentError tril!(Tridiagonal(dl, d, du), -n - 2)
-        @test_throws ArgumentError tril!(Tridiagonal(dl, d, du), n)
-        @test tril(SymTridiagonal(d,dl))    == Tridiagonal(dl,d,zerosdl)
-        @test tril(SymTridiagonal(d,dl),1)  == Tridiagonal(dl,d,dl)
-        @test tril(SymTridiagonal(d,dl),-1) == Tridiagonal(dl,zerosd,zerosdl)
-        @test tril(SymTridiagonal(d,dl),-2) == Tridiagonal(zerosdl,zerosd,zerosdl)
-        @test tril(Tridiagonal(dl,d,du))    == Tridiagonal(dl,d,zerosdu)
-        @test tril(Tridiagonal(dl,d,du),1)  == Tridiagonal(dl,d,du)
-        @test tril(Tridiagonal(dl,d,du),-1) == Tridiagonal(dl,zerosd,zerosdu)
-        @test tril(Tridiagonal(dl,d,du),-2) == Tridiagonal(zerosdl,zerosd,zerosdu)
-
-        @test_throws ArgumentError triu!(SymTridiagonal(d, dl), -n)
-        @test_throws ArgumentError triu!(SymTridiagonal(d, dl), n + 2)
-        @test_throws ArgumentError triu!(Tridiagonal(dl, d, du), -n)
-        @test_throws ArgumentError triu!(Tridiagonal(dl, d, du), n + 2)
-        @test triu(SymTridiagonal(d,dl))    == Tridiagonal(zerosdl,d,dl)
-        @test triu(SymTridiagonal(d,dl),-1) == Tridiagonal(dl,d,dl)
-        @test triu(SymTridiagonal(d,dl),1)  == Tridiagonal(zerosdl,zerosd,dl)
-        @test triu(SymTridiagonal(d,dl),2)  == Tridiagonal(zerosdl,zerosd,zerosdl)
-        @test triu(Tridiagonal(dl,d,du))    == Tridiagonal(zerosdl,d,du)
-        @test triu(Tridiagonal(dl,d,du),-1) == Tridiagonal(dl,d,du)
-        @test triu(Tridiagonal(dl,d,du),1)  == Tridiagonal(zerosdl,zerosd,du)
-        @test triu(Tridiagonal(dl,d,du),2)  == Tridiagonal(zerosdl,zerosd,zerosdu)
-
-        @test !istril(SymTridiagonal(d,dl))
-        @test istril(SymTridiagonal(d,zerosdl))
-        @test !istril(SymTridiagonal(d,dl),-2)
-        @test !istriu(SymTridiagonal(d,dl))
-        @test istriu(SymTridiagonal(d,zerosdl))
-        @test !istriu(SymTridiagonal(d,dl),2)
-        @test istriu(Tridiagonal(zerosdl,d,du))
-        @test !istriu(Tridiagonal(dl,d,zerosdu))
-        @test istriu(Tridiagonal(zerosdl,zerosd,du),1)
-        @test !istriu(Tridiagonal(dl,d,zerosdu),2)
-        @test istril(Tridiagonal(dl,d,zerosdu))
-        @test !istril(Tridiagonal(zerosdl,d,du))
-        @test istril(Tridiagonal(dl,zerosd,zerosdu),-1)
-        @test !istril(Tridiagonal(dl,d,zerosdu),-2)
-
-        @test isdiag(SymTridiagonal(d,zerosdl))
-        @test !isdiag(SymTridiagonal(d,dl))
-        @test isdiag(Tridiagonal(zerosdl,d,zerosdu))
-        @test !isdiag(Tridiagonal(dl,d,zerosdu))
-        @test !isdiag(Tridiagonal(zerosdl,d,du))
-        @test !isdiag(Tridiagonal(dl,d,du))
-
-        # Test methods that could fail due to dv and ev having the same length
-        # see #41089
-
-        badev = zero(d)
-        badev[end] = 1
-        S = SymTridiagonal(d, badev)
-
-        @test istriu(S, -2)
-        @test istriu(S, 0)
-        @test !istriu(S, 2)
-
-        @test isdiag(S)
-    end
-
-    @testset "iszero and isone" begin
-        Tzero = Tridiagonal(zeros(elty, 9), zeros(elty, 10), zeros(elty, 9))
-        Tone = Tridiagonal(zeros(elty, 9), ones(elty, 10), zeros(elty, 9))
-        Tmix = Tridiagonal(zeros(elty, 9), zeros(elty, 10), zeros(elty, 9))
-        Tmix[end, end] = one(elty)
-
-        Szero = SymTridiagonal(zeros(elty, 10), zeros(elty, 9))
-        Sone = SymTridiagonal(ones(elty, 10), zeros(elty, 9))
-        Smix = SymTridiagonal(zeros(elty, 10), zeros(elty, 9))
-        Smix[end, end] = one(elty)
-
-        @test iszero(Tzero)
-        @test !isone(Tzero)
-        @test !iszero(Tone)
-        @test isone(Tone)
-        @test !iszero(Tmix)
-        @test !isone(Tmix)
-
-        @test iszero(Szero)
-        @test !isone(Szero)
-        @test !iszero(Sone)
-        @test isone(Sone)
-        @test !iszero(Smix)
-        @test !isone(Smix)
-
-        badev = zeros(elty, 3)
-        badev[end] = 1
-
-        @test isone(SymTridiagonal(ones(elty, 3), badev))
-        @test iszero(SymTridiagonal(zeros(elty, 3), badev))
-    end
-
-    @testset for mat_type in (Tridiagonal, SymTridiagonal)
-        A = mat_type == Tridiagonal ? mat_type(dl, d, du) : mat_type(d, dl)
-        fA = map(elty <: Complex ? ComplexF64 : Float64, Array(A))
-        @testset "similar, size, and copyto!" begin
-            B = similar(A)
-            @test size(B) == size(A)
-            copyto!(B, A)
-            @test B == A
-            @test isa(similar(A), mat_type{elty})
-            @test isa(similar(A, Int), mat_type{Int})
-            @test isa(similar(A, (3, 2)), Matrix)
-            @test isa(similar(A, Int, (3, 2)), Matrix{Int})
-            @test size(A, 3) == 1
-            @test size(A, 1) == n
-            @test size(A) == (n, n)
-            @test_throws ArgumentError size(A, 0)
-        end
-        @testset "getindex" begin
-            @test_throws BoundsError A[n + 1, 1]
-            @test_throws BoundsError A[1, n + 1]
-            @test A[1, n] == convert(elty, 0.0)
-            @test A[1, 1] == d[1]
-        end
-        @testset "setindex!" begin
-            @test_throws BoundsError A[n + 1, 1] = 0 # test bounds check
-            @test_throws BoundsError A[1, n + 1] = 0 # test bounds check
-            @test_throws ArgumentError A[1, 3]   = 1 # test assignment off the main/sub/super diagonal
-            if mat_type == Tridiagonal
-                @test (A[3, 3] = A[3, 3]; A == fA) # test assignment on the main diagonal
-                @test (A[3, 2] = A[3, 2]; A == fA) # test assignment on the subdiagonal
-                @test (A[2, 3] = A[2, 3]; A == fA) # test assignment on the superdiagonal
-                @test ((A[1, 3] = 0) == 0; A == fA) # test zero assignment off the main/sub/super diagonal
-            else # mat_type is SymTridiagonal
-                @test ((A[3, 3] = A[3, 3]) == A[3, 3]; A == fA) # test assignment on the main diagonal
-                @test_throws ArgumentError A[3, 2] = 1 # test assignment on the subdiagonal
-                @test_throws ArgumentError A[2, 3] = 1 # test assignment on the superdiagonal
-            end
-        end
-        @testset "diag" begin
-            @test (@inferred diag(A))::typeof(d) == d
-            @test (@inferred diag(A, 0))::typeof(d) == d
-            @test (@inferred diag(A, 1))::typeof(d) == (mat_type == Tridiagonal ? du : dl)
-            @test (@inferred diag(A, -1))::typeof(d) == dl
-            @test (@inferred diag(A, n-1))::typeof(d) == zeros(elty, 1)
-            @test_throws ArgumentError diag(A, -n - 1)
-            @test_throws ArgumentError diag(A, n + 1)
-            GA = mat_type == Tridiagonal ? mat_type(GenericArray.((dl, d, du))...) : mat_type(GenericArray.((d, dl))...)
-            @test (@inferred diag(GA))::typeof(GenericArray(d)) == GenericArray(d)
-            @test (@inferred diag(GA, -1))::typeof(GenericArray(d)) == GenericArray(dl)
-        end
-        @testset "trace" begin
-            if real(elty) <: Integer
-                @test tr(A) == tr(fA)
-            else
-                @test tr(A) ≈ tr(fA) rtol=2eps(real(elty))
-            end
-        end
-        @testset "Idempotent tests" begin
-            for func in (conj, transpose, adjoint)
-                @test func(func(A)) == A
-            end
-        end
-        @testset "permutedims(::[Sym]Tridiagonal)" begin
-            @test permutedims(permutedims(A)) === A
-            @test permutedims(A) == transpose.(transpose(A))
-            @test permutedims(A, [1, 2]) === A
-            @test permutedims(A, (2, 1)) == permutedims(A)
-        end
-        if elty != Int
-            @testset "Simple unary functions" begin
-                for func in (det, inv)
-                    @test func(A) ≈ func(fA) atol=n^2*sqrt(eps(real(one(elty))))
-                end
-            end
-        end
-        ds = mat_type == Tridiagonal ? (dl, d, du) : (d, dl)
-        for f in (real, imag)
-            @test f(A)::mat_type == mat_type(map(f, ds)...)
-        end
-        if elty <: Real
-            for f in (round, trunc, floor, ceil)
-                fds = [f.(d) for d in ds]
-                @test f.(A)::mat_type == mat_type(fds...)
-                @test f.(Int, A)::mat_type == f.(Int, fA)
-            end
-        end
-        fds = [abs.(d) for d in ds]
-        @test abs.(A)::mat_type == mat_type(fds...)
-        @testset "Multiplication with strided matrix/vector" begin
-            @test (x = fill(1.,n); A*x ≈ Array(A)*x)
-            @test (X = fill(1.,n,2); A*X ≈ Array(A)*X)
-        end
-        @testset "Binary operations" begin
-            B = mat_type == Tridiagonal ? mat_type(a, b, c) : mat_type(b, a)
-            fB = map(elty <: Complex ? ComplexF64 : Float64, Array(B))
-            for op in (+, -, *)
-                @test Array(op(A, B)) ≈ op(fA, fB)
-            end
-            α = rand(elty)
-            @test Array(α*A) ≈ α*Array(A)
-            @test Array(A*α) ≈ Array(A)*α
-            @test Array(A/α) ≈ Array(A)/α
-
-            @testset "Matmul with Triangular types" begin
-                @test A*LinearAlgebra.UnitUpperTriangular(Matrix(1.0I, n, n)) ≈ fA
-                @test A*LinearAlgebra.UnitLowerTriangular(Matrix(1.0I, n, n)) ≈ fA
-                @test A*UpperTriangular(Matrix(1.0I, n, n)) ≈ fA
-                @test A*LowerTriangular(Matrix(1.0I, n, n)) ≈ fA
-            end
-            @testset "mul! errors" begin
-                Cnn, Cnm, Cmn = Matrix{elty}.(undef, ((n,n), (n,n+1), (n+1,n)))
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cnn,A,Cnm)
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cnn,A,Cmn)
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cnn,B,Cmn)
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cmn,B,Cnn)
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cnm,B,Cnn)
-            end
-        end
-        @testset "Negation" begin
-            mA = -A
-            @test mA isa mat_type
-            @test -mA == A
-        end
-        if mat_type == SymTridiagonal
-            @testset "Tridiagonal/SymTridiagonal mixing ops" begin
-                B = convert(Tridiagonal{elty}, A)
-                @test B == A
-                @test B + A == A + B
-                @test B - A == A - B
-            end
-            if elty <: LinearAlgebra.BlasReal
-                @testset "Eigensystems" begin
-                    zero, infinity = convert(elty, 0), convert(elty, Inf)
-                    @testset "stebz! and stein!" begin
-                        w, iblock, isplit = LAPACK.stebz!('V', 'B', -infinity, infinity, 0, 0, zero, b, a)
-                        evecs = LAPACK.stein!(b, a, w)
-
-                        (e, v) = eigen(SymTridiagonal(b, a))
-                        @test e ≈ w
-                        test_approx_eq_vecs(v, evecs)
-                    end
-                    @testset "stein! call using iblock and isplit" begin
-                        w, iblock, isplit = LAPACK.stebz!('V', 'B', -infinity, infinity, 0, 0, zero, b, a)
-                        evecs = LAPACK.stein!(b, a, w, iblock, isplit)
-                        test_approx_eq_vecs(v, evecs)
-                    end
-                    @testset "stegr! call with index range" begin
-                        F = eigen(SymTridiagonal(b, a),1:2)
-                        fF = eigen(Symmetric(Array(SymTridiagonal(b, a))),1:2)
-                        test_approx_eq_modphase(F.vectors, fF.vectors)
-                        @test F.values ≈ fF.values
-                    end
-                    @testset "stegr! call with value range" begin
-                        F = eigen(SymTridiagonal(b, a),0.0,1.0)
-                        fF = eigen(Symmetric(Array(SymTridiagonal(b, a))),0.0,1.0)
-                        test_approx_eq_modphase(F.vectors, fF.vectors)
-                        @test F.values ≈ fF.values
-                    end
-                    @testset "eigenvalues/eigenvectors of symmetric tridiagonal" begin
-                        if elty === Float32 || elty === Float64
-                            DT, VT = @inferred eigen(A)
-                            @inferred eigen(A, 2:4)
-                            @inferred eigen(A, 1.0, 2.0)
-                            D, Vecs = eigen(fA)
-                            @test DT ≈ D
-                            @test abs.(VT'Vecs) ≈ Matrix(elty(1)I, n, n)
-                            test_approx_eq_modphase(eigvecs(A), eigvecs(fA))
-                            #call to LAPACK.stein here
-                            test_approx_eq_modphase(eigvecs(A,eigvals(A)),eigvecs(A))
-                        elseif elty != Int
-                            # check that undef is determined accurately even if type inference
-                            # bails out due to the number of try/catch blocks in this code.
-                            @test_throws UndefVarError fA
-                        end
-                    end
-                end
-            end
-            if elty <: Real
-                Ts = SymTridiagonal(d, dl)
-                Fs = Array(Ts)
-                Tldlt = factorize(Ts)
-                @testset "symmetric tridiagonal" begin
-                    @test_throws DimensionMismatch Tldlt\rand(elty,n+1)
-                    @test size(Tldlt) == size(Ts)
-                    if elty <: AbstractFloat
-                        @test LinearAlgebra.LDLt{elty,SymTridiagonal{elty,Vector{elty}}}(Tldlt) === Tldlt
-                        @test LinearAlgebra.LDLt{elty}(Tldlt) === Tldlt
-                        @test typeof(convert(LinearAlgebra.LDLt{Float32,Matrix{Float32}},Tldlt)) ==
-                            LinearAlgebra.LDLt{Float32,Matrix{Float32}}
-                        @test typeof(convert(LinearAlgebra.LDLt{Float32},Tldlt)) ==
-                            LinearAlgebra.LDLt{Float32,SymTridiagonal{Float32,Vector{Float32}}}
-                    end
-                    for vv in (copy(v), view(v, 1:n))
-                        invFsv = Fs\vv
-                        x = Ts\vv
-                        @test x ≈ invFsv
-                        @test Array(Tldlt) ≈ Fs
-                    end
-
-                    @testset "similar" begin
-                        @test isa(similar(Ts), SymTridiagonal{elty})
-                        @test isa(similar(Ts, Int), SymTridiagonal{Int})
-                        @test isa(similar(Ts, (3, 2)), Matrix)
-                        @test isa(similar(Ts, Int, (3, 2)), Matrix{Int})
-                    end
-
-                    @test first(logabsdet(Tldlt)) ≈ first(logabsdet(Fs))
-                    @test last(logabsdet(Tldlt))  ≈ last(logabsdet(Fs))
-                    # just test that the det method exists. The numerical value of the
-                    # determinant is unreliable
-                    det(Tldlt)
-                end
-            end
-        else # mat_type is Tridiagonal
-            @testset "tridiagonal linear algebra" begin
-                for (BB, vv) in ((copy(B), copy(v)), (view(B, 1:n, 1), view(v, 1:n)))
-                    @test A*vv ≈ fA*vv
-                    invFv = fA\vv
-                    @test A\vv ≈ invFv
-                    # @test Base.solve(T,v) ≈ invFv
-                    # @test Base.solve(T, B) ≈ F\B
-                    Tlu = factorize(A)
-                    x = Tlu\vv
-                    @test x ≈ invFv
-                end
-            end
-        end
-        @testset "generalized dot" begin
-            x = fill(convert(elty, 1), n)
-            y = fill(convert(elty, 1), n)
-            @test dot(x, A, y) ≈ dot(A'x, y) ≈ dot(x, A*y)
-            @test dot([1], SymTridiagonal([1], Int[]), [1]) == 1
-            @test dot([1], Tridiagonal(Int[], [1], Int[]), [1]) == 1
-            @test dot(Int[], SymTridiagonal(Int[], Int[]), Int[]) === 0
-            @test dot(Int[], Tridiagonal(Int[], Int[], Int[]), Int[]) === 0
-        end
-    end
-end
-
-@testset "SymTridiagonal/Tridiagonal block matrix" begin
-    M = [1 2; 2 4]
-    n = 5
-    A = SymTridiagonal(fill(M, n), fill(M, n-1))
-    @test @inferred A[1,1] == Symmetric(M)
-    @test @inferred A[1,2] == M
-    @test @inferred A[2,1] == transpose(M)
-    @test @inferred diag(A, 1) == fill(M, n-1)
-    @test @inferred diag(A, 0) == fill(Symmetric(M), n)
-    @test @inferred diag(A, -1) == fill(transpose(M), n-1)
-    @test_throws ArgumentError diag(A, -2)
-    @test_throws ArgumentError diag(A, 2)
-    @test_throws ArgumentError diag(A, n+1)
-    @test_throws ArgumentError diag(A, -n-1)
-
-    A = Tridiagonal(fill(M, n-1), fill(M, n), fill(M, n-1))
-    @test @inferred A[1,1] == M
-    @test @inferred A[1,2] == M
-    @test @inferred A[2,1] == M
-    @test @inferred diag(A, 1) == fill(M, n-1)
-    @test @inferred diag(A, 0) == fill(M, n)
-    @test @inferred diag(A, -1) == fill(M, n-1)
-    @test_throws MethodError diag(A, -2)
-    @test_throws MethodError diag(A, 2)
-    @test_throws ArgumentError diag(A, n+1)
-    @test_throws ArgumentError diag(A, -n-1)
-
-    for n in 0:2
-        dv, ev = fill(M, n), fill(M, max(n-1,0))
-        A = SymTridiagonal(dv, ev)
-        @test A == Matrix{eltype(A)}(A)
-
-        A = Tridiagonal(ev, dv, ev)
-        @test A == Matrix{eltype(A)}(A)
-    end
-end
-
-@testset "Issue 12068" begin
-    @test SymTridiagonal([1, 2], [0])^3 == [1 0; 0 8]
-end
-
-@testset "convert for SymTridiagonal" begin
-    STF32 = SymTridiagonal{Float32}(fill(1f0, 5), fill(1f0, 4))
-    @test convert(SymTridiagonal{Float64}, STF32)::SymTridiagonal{Float64} == STF32
-    @test convert(AbstractMatrix{Float64}, STF32)::SymTridiagonal{Float64} == STF32
-end
-
-@testset "constructors from matrix" begin
-    @test SymTridiagonal([1 2 3; 2 5 6; 0 6 9]) == [1 2 0; 2 5 6; 0 6 9]
-    @test Tridiagonal([1 2 3; 4 5 6; 7 8 9]) == [1 2 0; 4 5 6; 0 8 9]
-end
-
-@testset "constructors with range and other abstract vectors" begin
-    @test SymTridiagonal(1:3, 1:2) == [1 1 0; 1 2 2; 0 2 3]
-    @test Tridiagonal(4:5, 1:3, 1:2) == [1 1 0; 4 2 2; 0 5 3]
-end
-
-@testset "Issue #26994 (and the empty case)" begin
-    T = SymTridiagonal([1.0],[3.0])
-    x = ones(1)
-    @test T*x == ones(1)
-    @test SymTridiagonal(ones(0), ones(0)) * ones(0, 2) == ones(0, 2)
-end
-
-@testset "Issue 29630" begin
-    function central_difference_discretization(N; dfunc = x -> 12x^2 - 2N^2,
-                                               dufunc = x -> N^2 + 4N*x,
-                                               dlfunc = x -> N^2 - 4N*x,
-                                               bfunc = x -> 114ℯ^-x * (1 + 3x),
-                                               b0 = 0, bf = 57/ℯ,
-                                               x0 = 0, xf = 1)
-        h = 1/N
-        d, du, dl, b = map(dfunc, (x0+h):h:(xf-h)), map(dufunc, (x0+h):h:(xf-2h)),
-                       map(dlfunc, (x0+2h):h:(xf-h)), map(bfunc, (x0+h):h:(xf-h))
-        b[1] -= dlfunc(x0)*b0     # subtract the boundary term
-        b[end] -= dufunc(xf)*bf   # subtract the boundary term
-        Tridiagonal(dl, d, du), b
-    end
-
-    A90, b90 = central_difference_discretization(90)
-
-    @test A90\b90 ≈ inv(A90)*b90
-end
-
-@testset "singular values of SymTridiag" begin
-    @test svdvals(SymTridiagonal([-4,2,3], [0,0])) ≈ [4,3,2]
-    @test svdvals(SymTridiagonal(collect(0.:10.), zeros(10))) ≈ reverse(0:10)
-    @test svdvals(SymTridiagonal([1,2,1], [1,1])) ≈ [3,1,0]
-    # test that dependent methods such as `cond` also work
-    @test cond(SymTridiagonal([1,2,3], [0,0])) ≈ 3
-end
-
-@testset "sum, mapreduce" begin
-    T = Tridiagonal([1,2], [1,2,3], [7,8])
-    Tdense = Matrix(T)
-    S = SymTridiagonal([1,2,3], [1,2])
-    Sdense = Matrix(S)
-    @test sum(T) == 24
-    @test sum(S) == 12
-    @test_throws ArgumentError sum(T, dims=0)
-    @test sum(T, dims=1) == sum(Tdense, dims=1)
-    @test sum(T, dims=2) == sum(Tdense, dims=2)
-    @test sum(T, dims=3) == sum(Tdense, dims=3)
-    @test typeof(sum(T, dims=1)) == typeof(sum(Tdense, dims=1))
-    @test mapreduce(one, min, T, dims=1) == mapreduce(one, min, Tdense, dims=1)
-    @test mapreduce(one, min, T, dims=2) == mapreduce(one, min, Tdense, dims=2)
-    @test mapreduce(one, min, T, dims=3) == mapreduce(one, min, Tdense, dims=3)
-    @test typeof(mapreduce(one, min, T, dims=1)) == typeof(mapreduce(one, min, Tdense, dims=1))
-    @test mapreduce(zero, max, T, dims=1) == mapreduce(zero, max, Tdense, dims=1)
-    @test mapreduce(zero, max, T, dims=2) == mapreduce(zero, max, Tdense, dims=2)
-    @test mapreduce(zero, max, T, dims=3) == mapreduce(zero, max, Tdense, dims=3)
-    @test typeof(mapreduce(zero, max, T, dims=1)) == typeof(mapreduce(zero, max, Tdense, dims=1))
-    @test_throws ArgumentError sum(S, dims=0)
-    @test sum(S, dims=1) == sum(Sdense, dims=1)
-    @test sum(S, dims=2) == sum(Sdense, dims=2)
-    @test sum(S, dims=3) == sum(Sdense, dims=3)
-    @test typeof(sum(S, dims=1)) == typeof(sum(Sdense, dims=1))
-    @test mapreduce(one, min, S, dims=1) == mapreduce(one, min, Sdense, dims=1)
-    @test mapreduce(one, min, S, dims=2) == mapreduce(one, min, Sdense, dims=2)
-    @test mapreduce(one, min, S, dims=3) == mapreduce(one, min, Sdense, dims=3)
-    @test typeof(mapreduce(one, min, S, dims=1)) == typeof(mapreduce(one, min, Sdense, dims=1))
-    @test mapreduce(zero, max, S, dims=1) == mapreduce(zero, max, Sdense, dims=1)
-    @test mapreduce(zero, max, S, dims=2) == mapreduce(zero, max, Sdense, dims=2)
-    @test mapreduce(zero, max, S, dims=3) == mapreduce(zero, max, Sdense, dims=3)
-    @test typeof(mapreduce(zero, max, S, dims=1)) == typeof(mapreduce(zero, max, Sdense, dims=1))
-
-    T = Tridiagonal(Int[], Int[], Int[])
-    Tdense = Matrix(T)
-    S = SymTridiagonal(Int[], Int[])
-    Sdense = Matrix(S)
-    @test sum(T) == 0
-    @test sum(S) == 0
-    @test_throws ArgumentError sum(T, dims=0)
-    @test sum(T, dims=1) == sum(Tdense, dims=1)
-    @test sum(T, dims=2) == sum(Tdense, dims=2)
-    @test sum(T, dims=3) == sum(Tdense, dims=3)
-    @test typeof(sum(T, dims=1)) == typeof(sum(Tdense, dims=1))
-    @test_throws ArgumentError sum(S, dims=0)
-    @test sum(S, dims=1) == sum(Sdense, dims=1)
-    @test sum(S, dims=2) == sum(Sdense, dims=2)
-    @test sum(S, dims=3) == sum(Sdense, dims=3)
-    @test typeof(sum(S, dims=1)) == typeof(sum(Sdense, dims=1))
-
-    T = Tridiagonal(Int[], Int[2], Int[])
-    Tdense = Matrix(T)
-    S = SymTridiagonal(Int[2], Int[])
-    Sdense = Matrix(S)
-    @test sum(T) == 2
-    @test sum(S) == 2
-    @test_throws ArgumentError sum(T, dims=0)
-    @test sum(T, dims=1) == sum(Tdense, dims=1)
-    @test sum(T, dims=2) == sum(Tdense, dims=2)
-    @test sum(T, dims=3) == sum(Tdense, dims=3)
-    @test typeof(sum(T, dims=1)) == typeof(sum(Tdense, dims=1))
-    @test_throws ArgumentError sum(S, dims=0)
-    @test sum(S, dims=1) == sum(Sdense, dims=1)
-    @test sum(S, dims=2) == sum(Sdense, dims=2)
-    @test sum(S, dims=3) == sum(Sdense, dims=3)
-    @test typeof(sum(S, dims=1)) == typeof(sum(Sdense, dims=1))
-end
-
-@testset "Issue #28994 (sum of Tridigonal and UniformScaling)" begin
-    dl = [1., 1.]
-    d = [-2., -2., -2.]
-    T = Tridiagonal(dl, d, dl)
-    S = SymTridiagonal(T)
-
-    @test diag(T + 2I) == zero(d)
-    @test diag(S + 2I) == zero(d)
-end
-
-@testset "convert Tridiagonal to SymTridiagonal error" begin
-    du = rand(Float64, 4)
-    d  = rand(Float64, 5)
-    dl = rand(Float64, 4)
-    T = Tridiagonal(dl, d, du)
-    @test_throws ArgumentError SymTridiagonal{Float32}(T)
-end
-
-# Issue #38765
-@testset "Eigendecomposition with different lengths" begin
-    # length(A.ev) can be either length(A.dv) or length(A.dv) - 1
-    A = SymTridiagonal(fill(1.0, 3), fill(-1.0, 3))
-    F = eigen(A)
-    A2 = SymTridiagonal(fill(1.0, 3), fill(-1.0, 2))
-    F2 = eigen(A2)
-    test_approx_eq_modphase(F.vectors, F2.vectors)
-    @test F.values ≈ F2.values ≈ eigvals(A) ≈ eigvals(A2)
-    @test eigvecs(A) ≈ eigvecs(A2)
-    @test eigvecs(A, eigvals(A)[1:1]) ≈ eigvecs(A2, eigvals(A2)[1:1])
-end
-
-@testset "non-commutative algebra (#39701)" begin
-    for A in (SymTridiagonal(Quaternion.(randn(5), randn(5), randn(5), randn(5)), Quaternion.(randn(4), randn(4), randn(4), randn(4))),
-              Tridiagonal(Quaternion.(randn(4), randn(4), randn(4), randn(4)), Quaternion.(randn(5), randn(5), randn(5), randn(5)), Quaternion.(randn(4), randn(4), randn(4), randn(4))))
-        c = Quaternion(1,2,3,4)
-        @test A * c ≈ Matrix(A) * c
-        @test A / c ≈ Matrix(A) / c
-        @test c * A ≈ c * Matrix(A)
-        @test c \ A ≈ c \ Matrix(A)
-    end
-end
-
-@testset "adjoint of LDLt" begin
-    Sr = SymTridiagonal(randn(5), randn(4))
-    Sc = SymTridiagonal(complex.(randn(5)) .+ 1im, complex.(randn(4), randn(4)))
-    b = ones(size(Sr, 1))
-
-    F = ldlt(Sr)
-    @test F\b == F'\b
-
-    F = ldlt(Sc)
-    @test copy(Sc')\b == F'\b
-end
-
-@testset "symmetric and hermitian tridiagonals" begin
-    A = [im 0; 0 -im]
-    @test issymmetric(A)
-    @test !ishermitian(A)
-
-    # real
-    A = SymTridiagonal(randn(5), randn(4))
-    @test issymmetric(A)
-    @test ishermitian(A)
-
-    A = Tridiagonal(A.ev, A.dv, A.ev .+ 1)
-    @test !issymmetric(A)
-    @test !ishermitian(A)
-
-    # complex
-    # https://github.com/JuliaLang/julia/pull/41037#discussion_r645524081
-    S = SymTridiagonal(randn(5) .+ 0im, randn(5) .+ 0im)
-    S.ev[end] = im
-    @test issymmetric(S)
-    @test ishermitian(S)
-
-    S = SymTridiagonal(randn(5) .+ 1im, randn(4) .+ 1im)
-    @test issymmetric(S)
-    @test !ishermitian(S)
-
-    S = Tridiagonal(S.ev, S.dv, adjoint.(S.ev))
-    @test !issymmetric(S)
-    @test !ishermitian(S)
-
-    S = Tridiagonal(S.dl, real.(S.d) .+ 0im, S.du)
-    @test !issymmetric(S)
-    @test ishermitian(S)
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    v1 = ImmutableArray([1, 2])
-    v2 = ImmutableArray([3, 4, 5])
-    v3 = ImmutableArray([6, 7])
-    T = Tridiagonal(v1, v2, v3)
-    Tsym = SymTridiagonal(v2, v1)
-
-    @test convert(AbstractArray{Float64}, T)::Tridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == T
-    @test convert(AbstractMatrix{Float64}, T)::Tridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == T
-    @test convert(AbstractArray{Float64}, Tsym)::SymTridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Tsym
-    @test convert(AbstractMatrix{Float64}, Tsym)::SymTridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Tsym
-end
-
-@testset "dot(x,A,y) for A::Tridiagonal or SymTridiagonal" begin
-    for elty in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        x = fill(convert(elty, 1), 0)
-        T = Tridiagonal(x, x, x)
-        Tsym = SymTridiagonal(x, x)
-        @test dot(x, T, x) == 0.0
-        @test dot(x, Tsym, x) == 0.0
-    end
-end
-
-isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl"))
-using .Main.SizedArrays
-@testset "non-number eltype" begin
-    @testset "sum for SymTridiagonal" begin
-        dv = [SizedArray{(2,2)}(rand(1:2048,2,2)) for i in 1:10]
-        ev = [SizedArray{(2,2)}(rand(1:2048,2,2)) for i in 1:10]
-        S = SymTridiagonal(dv, ev)
-        Sdense = Matrix(S)
-        @test Sdense == collect(S)
-        @test sum(S) == sum(Sdense)
-        @test sum(S, dims = 1) == sum(Sdense, dims = 1)
-        @test sum(S, dims = 2) == sum(Sdense, dims = 2)
-    end
-    @testset "issymmetric/ishermitian for Tridiagonal" begin
-        @test !issymmetric(Tridiagonal([[1 2;3 4]], [[1 2;2 3], [1 2;2 3]], [[1 2;3 4]]))
-        @test !issymmetric(Tridiagonal([[1 3;2 4]], [[1 2;3 4], [1 2;3 4]], [[1 2;3 4]]))
-        @test issymmetric(Tridiagonal([[1 3;2 4]], [[1 2;2 3], [1 2;2 3]], [[1 2;3 4]]))
-
-        @test ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+0im, [1 2;2 3].+0im], [[1 2;3 4].-im]))
-        @test !ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+0im, [1 2;2 3].+0im], [[1 2;3 4].+im]))
-        @test !ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+im, [1 2;2 3].+0im], [[1 2;3 4].-im]))
-    end
-    @testset "== between Tridiagonal and SymTridiagonal" begin
-        dv = [SizedArray{(2,2)}([1 2;3 4]) for i in 1:4]
-        ev = [SizedArray{(2,2)}([3 4;1 2]) for i in 1:4]
-        S = SymTridiagonal(dv, ev)
-        Sdense = Matrix(S)
-        @test S == Tridiagonal(diag(Sdense, -1), diag(Sdense),  diag(Sdense, 1)) == S
-        @test S !== Tridiagonal(diag(Sdense, 1), diag(Sdense),  diag(Sdense, 1)) !== S
-    end
-end
-
-@testset "copyto! with UniformScaling" begin
-    @testset "Tridiagonal" begin
-        @testset "Fill" begin
-            for len in (4, InfiniteArrays.Infinity())
-                d = FillArrays.Fill(1, len)
-                ud = FillArrays.Fill(0, len-1)
-                T = Tridiagonal(ud, d, ud)
-                @test copyto!(T, I) === T
-            end
-        end
-        T = Tridiagonal(fill(3, 3), fill(2, 4), fill(3, 3))
-        copyto!(T, I)
-        @test all(isone, diag(T))
-        @test all(iszero, diag(T, 1))
-        @test all(iszero, diag(T, -1))
-    end
-    @testset "SymTridiagonal" begin
-        @testset "Fill" begin
-            for len in (4, InfiniteArrays.Infinity())
-                d = FillArrays.Fill(1, len)
-                ud = FillArrays.Fill(0, len-1)
-                ST = SymTridiagonal(d, ud)
-                @test copyto!(ST, I) === ST
-            end
-        end
-        ST = SymTridiagonal(fill(2, 4), fill(3, 3))
-        copyto!(ST, I)
-        @test all(isone, diag(ST))
-        @test all(iszero, diag(ST, 1))
-        @test all(iszero, diag(ST, -1))
-    end
-end
-
-end # module TestTridiagonal
diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl
deleted file mode 100644
index be1b9887d570f..0000000000000
--- a/stdlib/LinearAlgebra/test/uniformscaling.jl
+++ /dev/null
@@ -1,564 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestUniformscaling
-
-using Test, LinearAlgebra, Random
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
-using .Main.Quaternions
-isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-Random.seed!(1234543)
-
-@testset "basic functions" begin
-    @test I === I' # transpose
-    @test ndims(I) == 2
-    @test one(UniformScaling{Float32}) == UniformScaling(one(Float32))
-    @test zero(UniformScaling{Float32}) == UniformScaling(zero(Float32))
-    @test eltype(one(UniformScaling{Float32})) == Float32
-    @test zero(UniformScaling(rand(ComplexF64))) == zero(UniformScaling{ComplexF64})
-    @test one(UniformScaling(rand(ComplexF64))) == one(UniformScaling{ComplexF64})
-    @test eltype(one(UniformScaling(rand(ComplexF64)))) == ComplexF64
-    @test -one(UniformScaling(2)) == UniformScaling(-1)
-    @test opnorm(UniformScaling(1+im)) ≈ sqrt(2)
-    @test convert(UniformScaling{Float64}, 2I) === 2.0I
-end
-
-@testset "getindex" begin
-    @test I[1,1] == 1
-    @test I[1,2] == 0
-
-    J = I(15)
-    for (a, b) in [
-        # indexing that returns a Vector
-        (1:10, 1),
-        (4, 1:10),
-        (11, 1:10),
-        # indexing that returns a Matrix
-        (1:2, 1:2),
-        (1:2:3, 1:2:3),
-        (1:2:8, 2:2:9),
-        (1:2:8, 9:-4:1),
-        (9:-4:1, 1:2:8),
-        (2:3, 1:2),
-        (2:-1:1, 1:2),
-        (1:2:9, 5:2:13),
-        (1, [1,2,5]),
-        (1, [1,10,5,2]),
-        (10, [10]),
-        ([1], 1),
-        ([15,1,5,2], 6),
-        ([2], [2]),
-        ([2,9,8,2,1], [2,8,4,3,1]),
-        ([8,3,5,3], 2:9),
-    ]
-        @test I[a,b] == J[a,b]
-        ndims(a) == 1 && @test I[OffsetArray(a,-10),b] == J[OffsetArray(a,-10),b]
-        ndims(b) == 1 && @test I[a,OffsetArray(b,-9)] == J[a,OffsetArray(b,-9)]
-        ndims(a) == ndims(b) == 1 && @test I[OffsetArray(a,-7),OffsetArray(b,-8)] == J[OffsetArray(a,-7),OffsetArray(b,-8)]
-    end
-end
-
-@testset "sqrt, exp, log, and trigonometric functions" begin
-    # convert to a dense matrix with random size
-    M(J) = (N = rand(1:10); Matrix(J, N, N))
-
-    # on complex plane
-    J = UniformScaling(randn(ComplexF64))
-    for f in ( exp,   log,
-               sqrt,
-               sin,   cos,   tan,
-               asin,  acos,  atan,
-               csc,   sec,   cot,
-               acsc,  asec,  acot,
-               sinh,  cosh,  tanh,
-               asinh, acosh, atanh,
-               csch,  sech,  coth,
-               acsch, asech, acoth )
-        @test f(J) ≈ f(M(J))
-    end
-
-    # on real axis
-    for (λ, fs) in (
-        # functions defined for x ∈ ℝ
-        (()->randn(),           (exp,
-                                 sin,   cos,   tan,
-                                 csc,   sec,   cot,
-                                 atan,  acot,
-                                 sinh,  cosh,  tanh,
-                                 csch,  sech,  coth,
-                                 asinh, acsch)),
-        # functions defined for x ≥ 0
-        (()->abs(randn()),      (log,   sqrt)),
-        # functions defined for -1 ≤ x ≤ 1
-        (()->2rand()-1,         (asin,  acos,  atanh)),
-        # functions defined for x ≤ -1 or x ≥ 1
-        (()->1/(2rand()-1),     (acsc,  asec,  acoth)),
-        # functions defined for 0 ≤ x ≤ 1
-        (()->rand(),            (asech,)),
-        # functions defined for x ≥ 1
-        (()->1/rand(),          (acosh,))
-    )
-        for f in fs
-            J = UniformScaling(λ())
-            @test f(J) ≈ f(M(J))
-        end
-    end
-end
-
-@testset "conjugation of UniformScaling" begin
-    @test conj(UniformScaling(1))::UniformScaling{Int} == UniformScaling(1)
-    @test conj(UniformScaling(1.0))::UniformScaling{Float64} == UniformScaling(1.0)
-    @test conj(UniformScaling(1+1im))::UniformScaling{Complex{Int}} == UniformScaling(1-1im)
-    @test conj(UniformScaling(1.0+1.0im))::UniformScaling{ComplexF64} == UniformScaling(1.0-1.0im)
-end
-
-@testset "isdiag, istriu, istril, issymmetric, ishermitian, isposdef, isapprox" begin
-    @test isdiag(I)
-    @test istriu(I)
-    @test istril(I)
-    @test issymmetric(I)
-    @test issymmetric(UniformScaling(complex(1.0,1.0)))
-    @test ishermitian(I)
-    @test !ishermitian(UniformScaling(complex(1.0,1.0)))
-    @test isposdef(UniformScaling(rand()))
-    @test !isposdef(UniformScaling(-rand()))
-    @test !isposdef(UniformScaling(randn(ComplexF64)))
-    @test !isposdef(UniformScaling(NaN))
-    @test isposdef(I)
-    @test !isposdef(-I)
-    @test isposdef(UniformScaling(complex(1.0, 0.0)))
-    @test !isposdef(UniformScaling(complex(1.0, 1.0)))
-    @test UniformScaling(4.00000000000001) ≈ UniformScaling(4.0)
-    @test UniformScaling(4.32) ≈ UniformScaling(4.3) rtol=0.1 atol=0.01
-    @test UniformScaling(4.32) ≈ 4.3 * [1 0; 0 1] rtol=0.1 atol=0.01
-    @test UniformScaling(4.32) ≈ 4.3 * [1 0; 0 1] rtol=0.1 atol=0.01 norm=norm
-    @test 4.3 * [1 0; 0 1] ≈ UniformScaling(4.32) rtol=0.1 atol=0.01
-    @test [4.3201 0.002;0.001 4.32009] ≈ UniformScaling(4.32) rtol=0.1 atol=0.
-    @test UniformScaling(4.32) ≉ fill(4.3,2,2) rtol=0.1 atol=0.01
-    @test UniformScaling(4.32) ≈ 4.32 * [1 0; 0 1]
-end
-
-@testset "arithmetic with Number" begin
-    α = rand()
-    @test α + I == α + 1
-    @test I + α == α + 1
-    @test α - I == α - 1
-    @test I - α == 1 - α
-    @test α .* UniformScaling(1.0) == UniformScaling(1.0) .* α
-    @test UniformScaling(α)./α == UniformScaling(1.0)
-    @test α.\UniformScaling(α) == UniformScaling(1.0)
-    @test α * UniformScaling(1.0) == UniformScaling(1.0) * α
-    @test UniformScaling(α)/α == UniformScaling(1.0)
-    @test (2I)^α == (2I).^α == (2^α)I
-
-    β = rand()
-    @test (α*I)^2    == UniformScaling(α^2)
-    @test (α*I)^(-2) == UniformScaling(α^(-2))
-    @test (α*I)^(.5) == UniformScaling(α^(.5))
-    @test (α*I)^β    == UniformScaling(α^β)
-
-    @test (α * I) .^ 2 == UniformScaling(α^2)
-    @test (α * I) .^ β == UniformScaling(α^β)
-end
-
-@testset "unary" begin
-    @test +I === +1*I
-    @test -I === -1*I
-end
-
-@testset "tr, det and logdet" begin
-    for T in (Int, Float64, ComplexF64, Bool)
-        @test tr(UniformScaling(zero(T))) === zero(T)
-    end
-    @test_throws ArgumentError tr(UniformScaling(1))
-    @test det(I) === true
-    @test det(1.0I) === 1.0
-    @test det(0I) === 0
-    @test det(0.0I) === 0.0
-    @test logdet(I) == 0
-    @test_throws ArgumentError det(2I)
-end
-
-@test copy(UniformScaling(one(Float64))) == UniformScaling(one(Float64))
-@test sprint(show,MIME"text/plain"(),UniformScaling(one(ComplexF64))) == "LinearAlgebra.UniformScaling{ComplexF64}\n(1.0 + 0.0im)*I"
-@test sprint(show,MIME"text/plain"(),UniformScaling(one(Float32))) == "LinearAlgebra.UniformScaling{Float32}\n1.0*I"
-@test sprint(show,UniformScaling(one(ComplexF64))) == "LinearAlgebra.UniformScaling{ComplexF64}(1.0 + 0.0im)"
-@test sprint(show,UniformScaling(one(Float32))) == "LinearAlgebra.UniformScaling{Float32}(1.0f0)"
-
-let
-    λ = complex(randn(),randn())
-    J = UniformScaling(λ)
-    @testset "transpose, conj, inv, pinv, cond" begin
-        @test ndims(J) == 2
-        @test transpose(J) == J
-        @test J * [1 0; 0 1] == conj(*(adjoint(J), [1 0; 0 1])) # ctranpose (and A(c)_mul_B)
-        @test I + I === UniformScaling(2) # +
-        @test inv(I) == I
-        @test inv(J) == UniformScaling(inv(λ))
-        @test pinv(J) == UniformScaling(inv(λ))
-        @test @inferred(pinv(0.0I)) == 0.0I
-        @test @inferred(pinv(0I)) == 0.0I
-        @test @inferred(pinv(false*I)) == 0.0I
-        @test @inferred(pinv(0im*I)) == 0im*I
-        @test cond(I) == 1
-        @test cond(J) == (λ ≠ zero(λ) ? one(real(λ)) : oftype(real(λ), Inf))
-    end
-
-    @testset "real, imag, reim" begin
-        @test real(J) == UniformScaling(real(λ))
-        @test imag(J) == UniformScaling(imag(λ))
-        @test reim(J) == (UniformScaling(real(λ)), UniformScaling(imag(λ)))
-    end
-
-    @testset "copyto!" begin
-        A = Matrix{Int}(undef, (3,3))
-        @test copyto!(A, I) == one(A)
-        B = Matrix{ComplexF64}(undef, (1,2))
-        @test copyto!(B, J) == [λ zero(λ)]
-    end
-
-    @testset "binary ops with vectors" begin
-        v = complex.(randn(3), randn(3))
-        # As shown in #20423@GitHub, vector acts like x1 matrix when participating in linear algebra
-        @test v  * J ≈ v  * λ
-        @test v' * J ≈ v' * λ
-        @test J * v  ≈ λ * v
-        @test J * v' ≈ λ * v'
-        @test v  / J ≈ v  / λ
-        @test v' / J ≈ v' / λ
-        @test J \ v  ≈ λ \ v
-        @test J \ v' ≈ λ \ v'
-    end
-
-    @testset "binary ops with matrices" begin
-        B = bitrand(2, 2)
-        @test B + I == B + Matrix(I, size(B))
-        @test I + B == B + Matrix(I, size(B))
-        AA = randn(2, 2)
-        for A in (AA, view(AA, 1:2, 1:2))
-            I22 = Matrix(I, size(A))
-            @test @inferred(A + I) == A + I22
-            @test @inferred(I + A) == A + I22
-            @test @inferred(I - I) === UniformScaling(0)
-            @test @inferred(B - I) == B - I22
-            @test @inferred(I - B) == I22 - B
-            @test @inferred(A - I) == A - I22
-            @test @inferred(I - A) == I22 - A
-            @test @inferred(I*J) === UniformScaling(λ)
-            @test @inferred(B*J) == B*λ
-            @test @inferred(J*B) == B*λ
-            @test @inferred(I*A) !== A # Don't alias
-            @test @inferred(A*I) !== A # Don't alias
-
-            @test @inferred(A*J) == A*λ
-            @test @inferred(J*A) == A*λ
-            @test @inferred(J*fill(1, 3)) == fill(λ, 3)
-            @test @inferred(λ*J) === UniformScaling(λ*J.λ)
-            @test @inferred(J*λ) === UniformScaling(λ*J.λ)
-            @test @inferred(J/I) === J
-            @test @inferred(I/A) == inv(A)
-            @test @inferred(A/I) == A
-            @test @inferred(I/λ) === UniformScaling(1/λ)
-            @test @inferred(I\J) === J
-
-            if isa(A, Array)
-                T = LowerTriangular(randn(3,3))
-            else
-                T = LowerTriangular(view(randn(3,3), 1:3, 1:3))
-            end
-            @test @inferred(T + J) == Array(T) + J
-            @test @inferred(J + T) == J + Array(T)
-            @test @inferred(T - J) == Array(T) - J
-            @test @inferred(J - T) == J - Array(T)
-            @test @inferred(T\I) == inv(T)
-
-            if isa(A, Array)
-                T = LinearAlgebra.UnitLowerTriangular(randn(3,3))
-            else
-                T = LinearAlgebra.UnitLowerTriangular(view(randn(3,3), 1:3, 1:3))
-            end
-            @test @inferred(T + J) == Array(T) + J
-            @test @inferred(J + T) == J + Array(T)
-            @test @inferred(T - J) == Array(T) - J
-            @test @inferred(J - T) == J - Array(T)
-            @test @inferred(T\I) == inv(T)
-
-            if isa(A, Array)
-                T = UpperTriangular(randn(3,3))
-            else
-                T = UpperTriangular(view(randn(3,3), 1:3, 1:3))
-            end
-            @test @inferred(T + J) == Array(T) + J
-            @test @inferred(J + T) == J + Array(T)
-            @test @inferred(T - J) == Array(T) - J
-            @test @inferred(J - T) == J - Array(T)
-            @test @inferred(T\I) == inv(T)
-
-            if isa(A, Array)
-                T = LinearAlgebra.UnitUpperTriangular(randn(3,3))
-            else
-                T = LinearAlgebra.UnitUpperTriangular(view(randn(3,3), 1:3, 1:3))
-            end
-            @test @inferred(T + J) == Array(T) + J
-            @test @inferred(J + T) == J + Array(T)
-            @test @inferred(T - J) == Array(T) - J
-            @test @inferred(J - T) == J - Array(T)
-            @test @inferred(T\I) == inv(T)
-
-            for elty in (Float64, ComplexF64)
-                if isa(A, Array)
-                    T = Hermitian(randn(elty, 3,3))
-                else
-                    T = Hermitian(view(randn(elty, 3,3), 1:3, 1:3))
-                end
-                @test @inferred(T + J) == Array(T) + J
-                @test @inferred(J + T) == J + Array(T)
-                @test @inferred(T - J) == Array(T) - J
-                @test @inferred(J - T) == J - Array(T)
-            end
-
-            @test @inferred(I\A) == A
-            @test @inferred(A\I) == inv(A)
-            @test @inferred(λ\I) === UniformScaling(1/λ)
-        end
-    end
-end
-
-@testset "hcat and vcat" begin
-    @test_throws ArgumentError hcat(I)
-    @test_throws ArgumentError [I I]
-    @test_throws ArgumentError vcat(I)
-    @test_throws ArgumentError [I; I]
-    @test_throws ArgumentError [I I; I]
-
-    A = rand(3,4)
-    B = rand(3,3)
-    C = rand(0,3)
-    D = rand(2,0)
-    E = rand(1,3)
-    F = rand(3,1)
-    α = rand()
-    @test (hcat(A, 2I))::Matrix == hcat(A, Matrix(2I, 3, 3))
-    @test (hcat(E, α))::Matrix == hcat(E, [α])
-    @test (hcat(E, α, 2I))::Matrix == hcat(E, [α], fill(2, 1, 1))
-    @test (vcat(A, 2I))::Matrix == vcat(A, Matrix(2I, 4, 4))
-    @test (vcat(F, α))::Matrix == vcat(F, [α])
-    @test (vcat(F, α, 2I))::Matrix == vcat(F, [α], fill(2, 1, 1))
-    @test (hcat(C, 2I))::Matrix == C
-    @test_throws DimensionMismatch hcat(C, α)
-    @test (vcat(D, 2I))::Matrix == D
-    @test_throws DimensionMismatch vcat(D, α)
-    @test (hcat(I, 3I, A, 2I))::Matrix == hcat(Matrix(I, 3, 3), Matrix(3I, 3, 3), A, Matrix(2I, 3, 3))
-    @test (vcat(I, 3I, A, 2I))::Matrix == vcat(Matrix(I, 4, 4), Matrix(3I, 4, 4), A, Matrix(2I, 4, 4))
-    @test (hvcat((2,1,2), B, 2I, I, 3I, 4I))::Matrix ==
-        hvcat((2,1,2), B, Matrix(2I, 3, 3), Matrix(I, 6, 6), Matrix(3I, 3, 3), Matrix(4I, 3, 3))
-    @test hvcat((3,1), C, C, I, 3I)::Matrix == hvcat((2,1), C, C, Matrix(3I, 6,6))
-    @test hvcat((2,2,2), I, 2I, 3I, 4I, C, C)::Matrix ==
-        hvcat((2,2,2), Matrix(I, 3, 3), Matrix(2I, 3,3 ), Matrix(3I, 3,3), Matrix(4I, 3,3), C, C)
-    @test hvcat((2,2,4), C, C, I, 2I, 3I, 4I, 5I, D)::Matrix ==
-        hvcat((2,2,4), C, C, Matrix(I, 3, 3), Matrix(2I,3,3),
-            Matrix(3I, 2, 2), Matrix(4I, 2, 2), Matrix(5I,2,2), D)
-    @test (hvcat((2,3,2), B, 2I, C, C, I, 3I, 4I))::Matrix ==
-        hvcat((2,2,2), B, Matrix(2I, 3, 3), C, C, Matrix(3I, 3, 3), Matrix(4I, 3, 3))
-    @test hvcat((3,2,1), C, C, I, B ,3I, 2I)::Matrix ==
-        hvcat((2,2,1), C, C, B, Matrix(3I,3,3), Matrix(2I,6,6))
-    @test (hvcat((1,2), A, E, α))::Matrix == hvcat((1,2), A, E, [α]) == hvcat((1,2), A, E, α*I)
-    @test (hvcat((2,2), α, E, F, 3I))::Matrix == hvcat((2,2), [α], E, F, Matrix(3I, 3, 3))
-    @test (hvcat((2,2), 3I, F, E, α))::Matrix == hvcat((2,2), Matrix(3I, 3, 3), F, E, [α])
-end
-
-@testset "Matrix/Array construction from UniformScaling" begin
-    I2_33 = [2 0 0; 0 2 0; 0 0 2]
-    I2_34 = [2 0 0 0; 0 2 0 0; 0 0 2 0]
-    I2_43 = [2 0 0; 0 2 0; 0 0 2; 0 0 0]
-    for ArrType in (Matrix, Array)
-        @test ArrType(2I, 3, 3)::Matrix{Int} == I2_33
-        @test ArrType(2I, 3, 4)::Matrix{Int} == I2_34
-        @test ArrType(2I, 4, 3)::Matrix{Int} == I2_43
-        @test ArrType(2.0I, 3, 3)::Matrix{Float64} == I2_33
-        @test ArrType{Real}(2I, 3, 3)::Matrix{Real} == I2_33
-        @test ArrType{Float64}(2I, 3, 3)::Matrix{Float64} == I2_33
-    end
-end
-
-@testset "Diagonal construction from UniformScaling" begin
-    @test Diagonal(2I, 3)::Diagonal{Int} == Matrix(2I, 3, 3)
-    @test Diagonal(2.0I, 3)::Diagonal{Float64} == Matrix(2I, 3, 3)
-    @test Diagonal{Real}(2I, 3)::Diagonal{Real} == Matrix(2I, 3, 3)
-    @test Diagonal{Float64}(2I, 3)::Diagonal{Float64} == Matrix(2I, 3, 3)
-end
-
-@testset "equality comparison of matrices with UniformScaling" begin
-    # AbstractMatrix methods
-    diagI = Diagonal(fill(1, 3))
-    rdiagI = view(diagI, 1:2, 1:3)
-    bidiag = Bidiagonal(fill(2, 3), fill(2, 2), :U)
-    @test diagI  ==  I == diagI  # test isone(I) path / equality
-    @test 2diagI !=  I != 2diagI # test isone(I) path / inequality
-    @test 0diagI == 0I == 0diagI # test iszero(I) path / equality
-    @test 2diagI != 0I != 2diagI # test iszero(I) path / inequality
-    @test 2diagI == 2I == 2diagI # test generic path / equality
-    @test 0diagI != 2I != 0diagI # test generic path / inequality on diag
-    @test bidiag != 2I != bidiag # test generic path / inequality off diag
-    @test rdiagI !=  I != rdiagI # test square matrix check
-    # StridedMatrix specialization
-    denseI = [1 0 0; 0 1 0; 0 0 1]
-    rdenseI = [1 0 0 0; 0 1 0 0; 0 0 1 0]
-    alltwos = fill(2, (3, 3))
-    @test denseI  ==  I == denseI  # test isone(I) path / equality
-    @test 2denseI !=  I != 2denseI # test isone(I) path / inequality
-    @test 0denseI == 0I == 0denseI # test iszero(I) path / equality
-    @test 2denseI != 0I != 2denseI # test iszero(I) path / inequality
-    @test 2denseI == 2I == 2denseI # test generic path / equality
-    @test 0denseI != 2I != 0denseI # test generic path / inequality on diag
-    @test alltwos != 2I != alltwos # test generic path / inequality off diag
-    @test rdenseI !=  I != rdenseI # test square matrix check
-
-    # isequal
-    @test !isequal(I, I(3))
-    @test !isequal(I(1), I)
-    @test !isequal([1], I)
-    @test isequal(I, 1I)
-    @test !isequal(2I, 3I)
-end
-
-@testset "operations involving I should preserve eltype" begin
-    @test isa(Int8(1) + I, Int8)
-    @test isa(Float16(1) + I, Float16)
-    @test eltype(Int8(1)I) == Int8
-    @test eltype(Float16(1)I) == Float16
-    @test eltype(fill(Int8(1), 2, 2)I) == Int8
-    @test eltype(fill(Float16(1), 2, 2)I) == Float16
-    @test eltype(fill(Int8(1), 2, 2) + I) == Int8
-    @test eltype(fill(Float16(1), 2, 2) + I) == Float16
-end
-
-@testset "test that UniformScaling is applied correctly for matrices of matrices" begin
-    LL = Bidiagonal(fill(0*I, 3), fill(1*I, 2), :L)
-    @test (I - LL')\[[0], [0], [1]] == (I - LL)'\[[0], [0], [1]] == fill([1], 3)
-end
-
-# Ensure broadcasting of I is an error (could be made to work in the future)
-@testset "broadcasting of I (#23197)" begin
-    @test_throws MethodError I .+ 1
-    @test_throws MethodError I .+ [1 1; 1 1]
-end
-
-@testset "in-place mul! and div! methods" begin
-    J = randn()*I
-    A = randn(4, 3)
-    C = similar(A)
-    target_mul = J * A
-    target_div = A / J
-    @test mul!(C, J, A) == target_mul
-    @test mul!(C, A, J) == target_mul
-    @test lmul!(J, copyto!(C, A)) == target_mul
-    @test rmul!(copyto!(C, A), J) == target_mul
-    @test ldiv!(J, copyto!(C, A)) == target_div
-    @test ldiv!(C, J, A) == target_div
-    @test rdiv!(copyto!(C, A), J) == target_div
-
-    A = randn(4, 3)
-    C = randn!(similar(A))
-    alpha = randn()
-    beta = randn()
-    target = J * A * alpha + C * beta
-    @test mul!(copy(C), J, A, alpha, beta) ≈ target
-    @test mul!(copy(C), A, J, alpha, beta) ≈ target
-
-    a = randn()
-    C = randn(3, 3)
-    target_5mul = a*alpha*J + beta*C
-    @test mul!(copy(C), a, J, alpha, beta) ≈ target_5mul
-    @test mul!(copy(C), J, a, alpha, beta) ≈ target_5mul
-    target_5mul = beta*C # alpha = 0
-    @test mul!(copy(C), a, J, 0, beta) ≈ target_5mul
-    target_5mul = a*alpha*Matrix(J, 3, 3) # beta = 0
-    @test mul!(copy(C), a, J, alpha, 0) ≈ target_5mul
-
-end
-
-@testset "Construct Diagonal from UniformScaling" begin
-    @test size(I(3)) === (3,3)
-    @test I(3) isa Diagonal
-    @test I(3) == [1 0 0; 0 1 0; 0 0 1]
-end
-
-@testset "dot" begin
-    A = randn(3, 3)
-    λ = randn()
-    J = UniformScaling(λ)
-    @test dot(A, J) ≈ dot(J, A)
-    @test dot(A, J) ≈ tr(A' * J)
-
-    A = rand(ComplexF64, 3, 3)
-    λ = randn() + im * randn()
-    J = UniformScaling(λ)
-    @test dot(A, J) ≈ conj(dot(J, A))
-    @test dot(A, J) ≈ tr(A' * J)
-end
-
-@testset "generalized dot" begin
-    x = rand(-10:10, 3)
-    y = rand(-10:10, 3)
-    λ = rand(-10:10)
-    J = UniformScaling(λ)
-    @test dot(x, J, y) == λ*dot(x, y)
-    λ = Quaternion(0.44567, 0.755871, 0.882548, 0.423612)
-    x, y = Quaternion(rand(4)...), Quaternion(rand(4)...)
-    @test dot([x], λ*I, [y]) ≈ dot(x, λ, y) ≈ dot(x, λ*y)
-end
-
-@testset "Factorization solutions" begin
-    J = complex(randn(),randn()) * I
-    qrp = A -> qr(A, ColumnNorm())
-
-    # thin matrices
-    X = randn(3,2)
-    Z = pinv(X)
-    for fac in (qr,qrp,svd)
-        F = fac(X)
-        @test @inferred(F \ I) ≈ Z
-        @test @inferred(F \ J) ≈ Z * J
-    end
-
-    # square matrices
-    X = randn(3,3)
-    X = X'X + rand()I # make positive definite for cholesky
-    Z = pinv(X)
-    for fac in (bunchkaufman,cholesky,lu,qr,qrp,svd)
-        F = fac(X)
-        @test @inferred(F \ I) ≈ Z
-        @test @inferred(F \ J) ≈ Z * J
-    end
-
-    # fat matrices - only rank-revealing variants
-    X = randn(2,3)
-    Z = pinv(X)
-    for fac in (qrp,svd)
-        F = fac(X)
-        @test @inferred(F \ I) ≈ Z
-        @test @inferred(F \ J) ≈ Z * J
-    end
-end
-
-@testset "offset arrays" begin
-    A = OffsetArray(zeros(4,4), -1:2, 0:3)
-    @test sum(I + A) ≈ 3.0
-    @test sum(A + I) ≈ 3.0
-    @test sum(I - A) ≈ 3.0
-    @test sum(A - I) ≈ -3.0
-end
-
-@testset "type promotion when dividing UniformScaling by matrix" begin
-    A = randn(5,5)
-    cA = complex(A)
-    J = (5+2im)*I
-    @test J/A ≈ J/cA
-    @test A\J ≈ cA\J
-end
-
-end # module TestUniformscaling
diff --git a/stdlib/Logging/Project.toml b/stdlib/Logging/Project.toml
index af931e68e07d1..ce69112733d5e 100644
--- a/stdlib/Logging/Project.toml
+++ b/stdlib/Logging/Project.toml
@@ -1,5 +1,6 @@
 name = "Logging"
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Logging/docs/src/index.md b/stdlib/Logging/docs/src/index.md
index 9a269ee54571b..a2bfd499e4586 100644
--- a/stdlib/Logging/docs/src/index.md
+++ b/stdlib/Logging/docs/src/index.md
@@ -1,7 +1,11 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Logging/docs/src/index.md"
+```
+
 # [Logging](@id man-logging)
 
 The [`Logging`](@ref Logging.Logging) module provides a way to record the history and progress of a
-computation as a log of events.  Events are created by inserting a logging
+computation as a log of events. Events are created by inserting a logging
 statement into the source code, for example:
 
 ```julia
@@ -11,17 +15,17 @@ statement into the source code, for example:
 ```
 
 The system provides several advantages over peppering your source code with
-calls to `println()`.  First, it allows you to control the visibility and
-presentation of messages without editing the source code.  For example, in
+calls to `println()`. First, it allows you to control the visibility and
+presentation of messages without editing the source code. For example, in
 contrast to the `@warn` above
 
 ```julia
 @debug "The sum of some values $(sum(rand(100)))"
 ```
 
-will produce no output by default.  Furthermore, it's very cheap to leave debug
+will produce no output by default. Furthermore, it's very cheap to leave debug
 statements like this in the source code because the system avoids evaluating
-the message if it would later be ignored.  In this case `sum(rand(100))` and
+the message if it would later be ignored. In this case `sum(rand(100))` and
 the associated string processing will never be executed unless debug logging is
 enabled.
 
@@ -88,7 +92,7 @@ The system also generates some standard information for each event:
   fairly stable even if the source code of the file changes, as long as the
   logging statement itself remains the same.
 * A `group` for the event, which is set to the base name of the file by default,
-  without extension.  This can be used to group messages into categories more
+  without extension. This can be used to group messages into categories more
   finely than the log level (for example, all deprecation warnings have group
   `:depwarn`), or into logical groupings across or within modules.
 
@@ -120,7 +124,7 @@ user configurable code to see the event. All loggers must be subtypes of
 [`AbstractLogger`](@ref).
 
 When an event is triggered, the appropriate logger is found by looking for a
-task-local logger with the global logger as fallback.  The idea here is that
+task-local logger with the global logger as fallback. The idea here is that
 the application code knows how log events should be processed and exists
 somewhere at the top of the call stack. So we should look up through the call
 stack to discover the logger — that is, the logger should be *dynamically
@@ -130,11 +134,11 @@ simple global variable. In such a system it's awkward to control logging while
 composing functionality from multiple modules.)
 
 The global logger may be set with [`global_logger`](@ref), and task-local
-loggers controlled using [`with_logger`](@ref).  Newly spawned tasks inherit
+loggers controlled using [`with_logger`](@ref). Newly spawned tasks inherit
 the logger of the parent task.
 
 There are three logger types provided by the library.  [`ConsoleLogger`](@ref)
-is the default logger you see when starting the REPL.  It displays events in a
+is the default logger you see when starting the REPL. It displays events in a
 readable text format and tries to give simple but user friendly control over
 formatting and filtering.  [`NullLogger`](@ref) is a convenient way to drop all
 messages where necessary; it is the logging equivalent of the [`devnull`](@ref)
@@ -150,14 +154,14 @@ When an event occurs, a few steps of early filtering occur to avoid generating
 messages that will be discarded:
 
 1. The message log level is checked against a global minimum level (set via
-   [`disable_logging`](@ref)).  This is a crude but extremely cheap global
+   [`disable_logging`](@ref)). This is a crude but extremely cheap global
    setting.
 2. The current logger state is looked up and the message level checked against the
    logger's cached minimum level, as found by calling [`Logging.min_enabled_level`](@ref).
    This behavior can be overridden via environment variables (more on this later).
 3. The [`Logging.shouldlog`](@ref) function is called with the current logger, taking
    some minimal information (level, module, group, id) which can be computed
-   statically.  Most usefully, `shouldlog` is passed an event `id` which can be
+   statically. Most usefully, `shouldlog` is passed an event `id` which can be
    used to discard events early based on a cached predicate.
 
 If all these checks pass, the message and key--value pairs are evaluated in full
@@ -166,9 +170,9 @@ and passed to the current logger via the [`Logging.handle_message`](@ref) functi
 event to the screen, save it to a file, etc.
 
 Exceptions that occur while generating the log event are captured and logged
-by default.  This prevents individual broken events from crashing the
+by default. This prevents individual broken events from crashing the
 application, which is helpful when enabling little-used debug events in a
-production system.  This behavior can be customized per logger type by
+production system. This behavior can be customized per logger type by
 extending [`Logging.catch_exceptions`](@ref).
 
 ## Testing log events
@@ -180,17 +184,17 @@ pattern match against the log event stream.
 
 ## Environment variables
 
-Message filtering can be influenced through the `JULIA_DEBUG` environment
+Message filtering can be influenced through the [`JULIA_DEBUG`](@ref JULIA_DEBUG) environment
 variable, and serves as an easy way to enable debug logging for a file or
 module. Loading julia with `JULIA_DEBUG=loading` will activate
 `@debug` log messages in `loading.jl`. For example, in Linux shells:
 
 ```
 $ JULIA_DEBUG=loading julia -e 'using OhMyREPL'
-┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an invalid cache header
+┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an incompatible cache header
 └ @ Base loading.jl:1328
 [ Info: Recompiling stale cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji for module OhMyREPL
-┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an invalid cache header
+┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an incompatible cache header
 └ @ Base loading.jl:1328
 ...
 ```
@@ -298,6 +302,8 @@ Logging.Debug
 Logging.Info
 Logging.Warn
 Logging.Error
+Logging.BelowMinLevel
+Logging.AboveMaxLevel
 ```
 
 ### [Processing events with AbstractLogger](@id AbstractLogger-interface)
diff --git a/stdlib/Logging/src/Logging.jl b/stdlib/Logging/src/Logging.jl
index 0743c650326cc..ab02073f7b40a 100644
--- a/stdlib/Logging/src/Logging.jl
+++ b/stdlib/Logging/src/Logging.jl
@@ -8,55 +8,33 @@ and available by default.
 """
 module Logging
 
-# Import the CoreLogging implementation into Logging as new const bindings.
-# Doing it this way (rather than with import) makes these symbols accessible to
-# tab completion.
-for sym in [
-    :LogLevel, :BelowMinLevel, :AboveMaxLevel,
-    :AbstractLogger,
-    :NullLogger,
-    :handle_message, :shouldlog, :min_enabled_level, :catch_exceptions,
-    Symbol("@debug"),
-    Symbol("@info"),
-    Symbol("@warn"),
-    Symbol("@error"),
-    Symbol("@logmsg"),
-    :with_logger,
-    :current_logger,
-    :global_logger,
-    :disable_logging,
-    :SimpleLogger]
-    @eval const $sym = Base.CoreLogging.$sym
-end
-
-# LogLevel aliases (re-)documented here (JuliaLang/julia#40978)
-"""
-    Debug
-
-Alias for [`LogLevel(-1000)`](@ref LogLevel).
-"""
-const Debug = Base.CoreLogging.Debug
-"""
-    Info
-
-Alias for [`LogLevel(0)`](@ref LogLevel).
-"""
-const Info = Base.CoreLogging.Info
-"""
-    Warn
-
-Alias for [`LogLevel(1000)`](@ref LogLevel).
-"""
-const Warn = Base.CoreLogging.Warn
-"""
-    Error
-
-Alias for [`LogLevel(2000)`](@ref LogLevel).
-"""
-const Error = Base.CoreLogging.Error
-
-using Base.CoreLogging:
-    closed_stream
+import Base.CoreLogging:
+    LogLevel,
+    AbstractLogger,
+    NullLogger,
+    handle_message, shouldlog, min_enabled_level, catch_exceptions,
+    var"@debug",
+    var"@info",
+    var"@warn",
+    var"@error",
+    var"@logmsg",
+    with_logger,
+    current_logger,
+    global_logger,
+    disable_logging,
+    SimpleLogger,
+    Debug,
+    Info,
+    Warn,
+    Error,
+    BelowMinLevel,
+    AboveMaxLevel,
+    default_logcolor,
+    closed_stream,
+    ConsoleLogger,
+    default_metafmt,
+    # Some packages use `Logging.default_logcolor`
+    default_logcolor
 
 export
     AbstractLogger,
@@ -80,18 +58,6 @@ export
     Error,
     AboveMaxLevel
 
-include("ConsoleLogger.jl")
-
-# The following are also part of the public API, but not exported:
-#
-# 1. Log levels:
-#   BelowMinLevel, Debug, Info, Warn, Error, AboveMaxLevel,
-#
-# 2. AbstractLogger message related functions:
-#  handle_message, shouldlog, min_enabled_level, catch_exceptions,
-
-function __init__()
-    global_logger(ConsoleLogger())
-end
+public handle_message, shouldlog, min_enabled_level, catch_exceptions
 
 end
diff --git a/stdlib/Logging/test/runtests.jl b/stdlib/Logging/test/runtests.jl
index b6b4813964536..6d926f4dd0340 100644
--- a/stdlib/Logging/test/runtests.jl
+++ b/stdlib/Logging/test/runtests.jl
@@ -6,6 +6,10 @@ import Logging: min_enabled_level, shouldlog, handle_message
 
 @noinline func1() = backtrace()
 
+# see "custom log macro" testset
+CustomLog = LogLevel(-500)
+macro customlog(exs...) Base.CoreLogging.logmsg_code((Base.CoreLogging.@_sourceinfo)..., esc(CustomLog), exs...) end
+
 @testset "Logging" begin
 
 @testset "Core" begin
@@ -48,6 +52,15 @@ end
     end
     @test String(take!(buf)) == ""
 
+    # Check that the AnnotatedString path works too
+    with_logger(logger) do
+        @info Base.AnnotatedString("test")
+    end
+    @test String(take!(buf)) ==
+    """
+    [ Info: test
+    """
+
     @testset "Default metadata formatting" begin
         @test Logging.default_metafmt(Logging.Debug, Base, :g, :i, expanduser("~/somefile.jl"), 42) ==
             (:blue,      "Debug:",   "@ Base ~/somefile.jl:42")
@@ -272,7 +285,68 @@ end
             AboveMaxLevel === Logging.AboveMaxLevel
         end
         """)
-    @test m.run()
+    @test invokelatest(m.run)
+end
+
+@testset "custom log macro" begin
+    @test_logs (CustomLog, "a") min_level=CustomLog @customlog "a"
+
+    buf = IOBuffer()
+    io = IOContext(buf, :displaysize=>(30,80), :color=>false)
+    logger = ConsoleLogger(io, CustomLog)
+
+    with_logger(logger) do
+        @customlog "a"
+    end
+    @test occursin("LogLevel(-500): a", String(take!(buf)))
+end
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(Logging)
+    @test isempty(undoc)
+end
+
+@testset "Logging when multithreaded" begin
+    n = 10000
+    cmd = `$(Base.julia_cmd()) -t4 --color=no $(joinpath(@__DIR__, "threads_exec.jl")) $n`
+    fname = tempname()
+    @testset "Thread safety" begin
+        f = open(fname, "w")
+        @test success(run(pipeline(cmd, stderr=f)))
+        close(f)
+    end
+
+    @testset "No tearing in log printing" begin
+        # Check for print tearing by verifying that each log entry starts and ends correctly
+        f = open(fname, "r")
+        entry_start = r"┌ (Info|Warning|Error): iteration"
+        entry_end = r"└ "
+
+        open_entries = 0
+        total_entries = 0
+        for line in eachline(fname)
+            starts = count(entry_start, line)
+            starts > 1 && error("Interleaved logs: Multiple log entries started on one line")
+            if starts == 1
+                startswith(line, entry_start) || error("Interleaved logs: Log entry started in the middle of a line")
+                open_entries += 1
+                total_entries += 1
+            end
+
+            ends = count(entry_end, line)
+            starts == 1 && ends == 1 && error("Interleaved logs: Log entry started and another ended on one line")
+            ends > 1 && error("Interleaved logs: Multiple log entries ended on one line")
+            if ends == 1
+                startswith(line, entry_end) || error("Interleaved logs: Log entry ended in the middle of a line")
+                open_entries -= 1
+            end
+            # Ensure no mismatched log entries
+            open_entries >= 0 || error("Interleaved logs")
+        end
+
+        @test open_entries == 0  # Ensure all entries closed properly
+        @test total_entries == n * 3  # Ensure all logs were printed (3 because @debug is hidden)
+    end
 end
 
 end
diff --git a/stdlib/Logging/test/threads_exec.jl b/stdlib/Logging/test/threads_exec.jl
new file mode 100644
index 0000000000000..497a22b1c7b22
--- /dev/null
+++ b/stdlib/Logging/test/threads_exec.jl
@@ -0,0 +1,13 @@
+using Logging
+
+function test_threads_exec(n)
+    Threads.@threads for i in 1:n
+        @debug "iteration" maxlog=1 _id=Symbol("$(i)_debug") i Threads.threadid()
+        @info "iteration" maxlog=1 _id=Symbol("$(i)_info") i Threads.threadid()
+        @warn "iteration" maxlog=1 _id=Symbol("$(i)_warn") i Threads.threadid()
+        @error "iteration" maxlog=1 _id=Symbol("$(i)_error") i Threads.threadid()
+    end
+end
+
+n = parse(Int, ARGS[1])
+test_threads_exec(n)
diff --git a/stdlib/MPFR_jll/Project.toml b/stdlib/MPFR_jll/Project.toml
index 39f99815832eb..5d1524c1455b0 100644
--- a/stdlib/MPFR_jll/Project.toml
+++ b/stdlib/MPFR_jll/Project.toml
@@ -1,13 +1,15 @@
 name = "MPFR_jll"
 uuid = "3a97d323-0669-5f0c-9066-3539efd106a3"
-version = "4.2.0+0"
+version = "4.2.2+0"
 
 [deps]
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 GMP_jll = "781609d7-10c4-51f6-84f2-b8444358ff6d"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
+CompilerSupportLibraries_jll = "1.3.0"
 julia = "1.6"
 
 [extras]
diff --git a/stdlib/MPFR_jll/src/MPFR_jll.jl b/stdlib/MPFR_jll/src/MPFR_jll.jl
index c184a9801102f..b7b379c543c7a 100644
--- a/stdlib/MPFR_jll/src/MPFR_jll.jl
+++ b/stdlib/MPFR_jll/src/MPFR_jll.jl
@@ -3,43 +3,51 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/MPFR_jll.jl
 baremodule MPFR_jll
 using Base, Libdl, GMP_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+if Sys.iswindows()
+    using CompilerSupportLibraries_jll
+end
 
 export libmpfr
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libmpfr_handle::Ptr{Cvoid} = C_NULL
-libmpfr_path::String = ""
 
-if Sys.iswindows()
-    const libmpfr = "libmpfr-6.dll"
-elseif Sys.isapple()
-    const libmpfr = "@rpath/libmpfr.6.dylib"
-else
-    const libmpfr = "libmpfr.so.6"
+libmpfr_path::String = ""
+const libmpfr = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libmpfr-6.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libmpfr.6.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libmpfr.so.6")
+    else
+        error("MPFR_jll: Library 'libmpfr' is not available for $(Sys.KERNEL)")
+    end,
+    dependencies = if Sys.iswindows()
+        LazyLibrary[libgmp, libgcc_s]
+    else
+        LazyLibrary[libgmp]
+    end
+)
+
+function eager_mode()
+    GMP_jll.eager_mode()
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    dlopen(libmpfr)
 end
+is_available() = true
 
 function __init__()
-    global libmpfr_handle = dlopen(libmpfr)
-    global libmpfr_path = dlpath(libmpfr_handle)
+    global libmpfr_path = string(libmpfr.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libmpfr_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libmpfr_path() = libmpfr_path
-
 end  # module MPFR_jll
diff --git a/stdlib/MPFR_jll/test/runtests.jl b/stdlib/MPFR_jll/test/runtests.jl
index 81b6e06ed7b49..1dbbbb298e737 100644
--- a/stdlib/MPFR_jll/test/runtests.jl
+++ b/stdlib/MPFR_jll/test/runtests.jl
@@ -4,5 +4,5 @@ using Test, Libdl, MPFR_jll
 
 @testset "MPFR_jll" begin
     vn = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Cstring, ())))
-    @test vn == v"4.2.0"
+    @test vn == v"4.2.2"
 end
diff --git a/stdlib/Makefile b/stdlib/Makefile
index e42061d593905..2ab2d79536529 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -15,11 +15,12 @@ include $(JULIAHOME)/deps/*.version
 
 VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 DIRS := $(build_datarootdir)/julia/stdlib/$(VERSDIR) $(build_prefix)/manifest/$(VERSDIR)
+
 $(foreach dir,$(DIRS),$(eval $(call dir_target,$(dir))))
 
-JLLS = DSFMT GMP CURL LIBGIT2 LLVM LIBSSH2 LIBUV MBEDTLS MPFR NGHTTP2 \
+JLLS = DSFMT GMP CURL LIBGIT2 LLVM LIBSSH2 LIBUV OPENSSL MPFR NGHTTP2 \
        BLASTRAMPOLINE OPENBLAS OPENLIBM P7ZIP PCRE LIBSUITESPARSE ZLIB \
-       LLVMUNWIND CSL UNWIND LLD
+       ZSTD LLVMUNWIND CSL UNWIND LLD
 
 # Initialize this with JLLs that aren't in "deps/$(LibName).version"
 JLL_NAMES := MozillaCACerts_jll
@@ -39,14 +40,15 @@ install-$$($(1)_JLL_NAME)_jll: get-$$($(1)_JLL_NAME)_jll
 endef
 $(foreach jll,$(JLLS),$(eval $(call download-artifacts-toml,$(jll))))
 
-
-STDLIBS = Artifacts Base64 CRC32c Dates Distributed FileWatching \
-          Future InteractiveUtils LazyArtifacts Libdl LibGit2 LinearAlgebra Logging \
+STDLIBS = Artifacts Base64 CRC32c Dates FileWatching \
+          Future InteractiveUtils Libdl LibGit2 Logging \
           Markdown Mmap Printf Profile Random REPL Serialization \
           SharedArrays Sockets Test TOML Unicode UUIDs \
           $(JLL_NAMES)
 
-STDLIBS_EXT = Pkg Statistics LibCURL DelimitedFiles Downloads ArgTools Tar NetworkOptions SuiteSparse SparseArrays SHA
+STDLIBS_EXT = Pkg Statistics LazyArtifacts LibCURL DelimitedFiles Downloads ArgTools \
+              Tar NetworkOptions SuiteSparse SparseArrays StyledStrings SHA Distributed \
+              JuliaSyntaxHighlighting LinearAlgebra
 
 $(foreach module, $(STDLIBS_EXT), $(eval $(call stdlib-external,$(module),$(shell echo $(module) | tr a-z A-Z))))
 
@@ -54,14 +56,25 @@ ifneq ($(filter $(STDLIBS),$(STDLIBS_EXT)),)
 $(error ERROR duplicated STDLIBS in list)
 endif
 
-
 # Generate symlinks to all stdlibs at usr/share/julia/stdlib/vX.Y/
 $(foreach module, $(STDLIBS), $(eval $(call symlink_target,$$(JULIAHOME)/stdlib/$(module),$$(build_datarootdir)/julia/stdlib/$$(VERSDIR),$(module))))
 
 STDLIBS_LINK_TARGETS := $(addprefix $(build_datarootdir)/julia/stdlib/$(VERSDIR)/,$(STDLIBS))
 
+remove-gpl-libs:
+ifeq ($(USE_GPL_LIBS),0)
+	@echo Removing GPL libs...
+	-rm -f $(build_private_libdir)/libcholmod*
+	-rm -f $(build_private_libdir)/libklu_cholmod*
+	-rm -f $(build_private_libdir)/librbio*
+	-rm -f $(build_private_libdir)/libspqr*
+	-rm -f $(build_private_libdir)/libumfpack*
+endif
+
 getall get: $(addprefix get-, $(STDLIBS_EXT) $(JLL_NAMES))
-install: version-check $(addprefix install-, $(STDLIBS_EXT) $(JLL_NAMES)) $(STDLIBS_LINK_TARGETS)
+
+install: version-check $(addprefix install-, $(STDLIBS_EXT) $(JLL_NAMES)) $(STDLIBS_LINK_TARGETS) remove-gpl-libs
+.PHONY: version-check
 version-check: $(addprefix version-check-, $(STDLIBS_EXT))
 uninstall: $(addprefix uninstall-, $(STDLIBS_EXT))
 extstdlibclean:
diff --git a/stdlib/Manifest.toml b/stdlib/Manifest.toml
new file mode 100644
index 0000000000000..5cf24ed77b909
--- /dev/null
+++ b/stdlib/Manifest.toml
@@ -0,0 +1,305 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.13.0-DEV"
+manifest_format = "2.0"
+project_hash = "6826701002e0b87f8744b1c4bf97e2cff5fc1642"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.2"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.CRC32c]]
+uuid = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
+version = "1.11.0"
+
+[[deps.CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+version = "1.3.0+1"
+
+[[deps.Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
+
+[[deps.DelimitedFiles]]
+deps = ["Mmap"]
+git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae"
+uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+version = "1.9.1"
+
+[[deps.Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+version = "1.11.0"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.7.0"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
+
+[[deps.Future]]
+deps = ["Random"]
+uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+version = "1.11.0"
+
+[[deps.GMP_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+version = "6.3.0+2"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.JuliaSyntaxHighlighting]]
+deps = ["StyledStrings"]
+uuid = "ac6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+version = "1.12.0"
+
+[[deps.LLD_jll]]
+deps = ["Artifacts", "Libdl", "Zlib_jll", "libLLVM_jll"]
+uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
+version = "20.1.8+0"
+
+[[deps.LLVMLibUnwind_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
+version = "19.1.4+0"
+
+[[deps.LazyArtifacts]]
+deps = ["Artifacts", "Pkg"]
+uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+version = "1.11.0"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.4"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "LibSSH2_jll", "Libdl", "OpenSSL_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "8.16.0+0"
+
+[[deps.LibGit2]]
+deps = ["LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "LibSSH2_jll", "Libdl", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.9.1+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl", "OpenSSL_jll", "Zlib_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.3+1"
+
+[[deps.LibUV_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
+version = "2.0.1+21"
+
+[[deps.LibUnwind_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl", "Zlib_jll"]
+uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
+version = "1.8.2+0"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.LinearAlgebra]]
+deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+version = "1.12.0"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
+
+[[deps.MPFR_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "GMP_jll", "Libdl"]
+uuid = "3a97d323-0669-5f0c-9066-3539efd106a3"
+version = "4.2.2+0"
+
+[[deps.Markdown]]
+deps = ["Base64", "JuliaSyntaxHighlighting", "StyledStrings"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+version = "1.11.0"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2025.5.20"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.3.0"
+
+[[deps.OpenBLAS_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+version = "0.3.29+0"
+
+[[deps.OpenLibm_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+version = "0.8.7+0"
+
+[[deps.OpenSSL_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.5.0+0"
+
+[[deps.PCRE2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.45.0+0"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.13.0"
+weakdeps = ["REPL"]
+
+    [deps.Pkg.extensions]
+    REPLExt = "REPL"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
+
+[[deps.Profile]]
+deps = ["StyledStrings"]
+uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+version = "1.11.0"
+
+[[deps.REPL]]
+deps = ["Dates", "FileWatching", "InteractiveUtils", "JuliaSyntaxHighlighting", "Markdown", "Sockets", "StyledStrings", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
+
+[[deps.SharedArrays]]
+deps = ["Distributed", "Mmap", "Random", "Serialization"]
+uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+version = "1.11.0"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
+
+[[deps.SparseArrays]]
+deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+version = "1.12.0"
+
+[[deps.Statistics]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0"
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+version = "1.11.1"
+weakdeps = ["SparseArrays"]
+
+    [deps.Statistics.extensions]
+    SparseArraysExt = ["SparseArrays"]
+
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
+[[deps.SuiteSparse_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl", "libblastrampoline_jll"]
+uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+version = "7.10.1+0"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.3"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.3.1+2"
+
+[[deps.Zstd_jll]]
+deps = ["CompilerSupportLibraries_jll", "Libdl"]
+uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
+version = "1.5.7+1"
+
+[[deps.dSFMT_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
+version = "2.2.5+2"
+
+[[deps.libLLVM_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl", "Zlib_jll", "Zstd_jll"]
+uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
+version = "20.1.8+0"
+
+[[deps.libblastrampoline_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+version = "5.12.0+0"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.66.0+0"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.7.0+0"
diff --git a/stdlib/Markdown/Project.toml b/stdlib/Markdown/Project.toml
index 229e58749d233..1d5962c80803d 100644
--- a/stdlib/Markdown/Project.toml
+++ b/stdlib/Markdown/Project.toml
@@ -1,8 +1,11 @@
 name = "Markdown"
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
 
 [deps]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+JuliaSyntaxHighlighting = "ac6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Markdown/docs/src/index.md b/stdlib/Markdown/docs/src/index.md
index a107929d1e838..926e3921d339d 100644
--- a/stdlib/Markdown/docs/src/index.md
+++ b/stdlib/Markdown/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Markdown/docs/src/index.md"
+```
+
 # [Markdown](@id markdown_stdlib)
 
 This section describes Julia's markdown syntax, which is enabled by the
@@ -75,7 +79,7 @@ the text enclosed in square brackets, `[ ]`, is the name of the link and the tex
 parentheses, `( )`, is the URL.
 
 ```
-A paragraph containing a link to [Julia](http://www.julialang.org).
+A paragraph containing a link to [Julia](https://www.julialang.org).
 ```
 
 It's also possible to add cross-references to other documented functions/methods/variables within
@@ -153,8 +157,8 @@ A header line can contain any inline syntax in the same way as a paragraph can.
 
 ### Code blocks
 
-Source code can be displayed as a literal block using an indent of four spaces as shown in the
-following example.
+Source code can be displayed as a literal block using an indent of four spaces or one tab as shown
+in the following example.
 
 ```
 This is a paragraph.
@@ -298,7 +302,8 @@ aside from the `:` character that is appended to the footnote label.
 
 [^note]:
 
-    Named footnote text containing several toplevel elements.
+    Named footnote text containing several toplevel elements
+    indented by 4 spaces or one tab.
 
       * item one
       * item two
@@ -357,6 +362,7 @@ They can be defined using the following `!!!` syntax:
 !!! note
 
     This is the content of the note.
+    It is indented by 4 spaces. A tab would work as well.
 
 !!! warning "Beware!"
 
@@ -386,6 +392,16 @@ If no title text is specified after the admonition type, then the type name will
 
 Admonitions, like most other toplevel elements, can contain other toplevel elements (e.g. lists, images).
 
+## [Markdown String Literals](@id stdlib-markdown-literals)
+
+The `md""` macro allows you to embed Markdown strings directly into your Julia code.
+This macro is designed to simplify the inclusion of Markdown-formatted text within your Julia source files.
+
+### Usage
+
+```julia
+result = md"This is a **custom** Markdown string with [a link](http://example.com)."
+```
 ## Markdown Syntax Extensions
 
 Julia's markdown supports interpolation in a very similar way to basic string literals, with the
@@ -396,3 +412,15 @@ complex features (such as references) without cluttering the basic syntax.
 
 In principle, the Markdown parser itself can also be arbitrarily extended by packages, or an entirely
 custom flavour of Markdown can be used, but this should generally be unnecessary.
+
+
+## [API reference](@id stdlib-markdown-api)
+
+```@docs
+Markdown.MD
+Markdown.@md_str
+Markdown.@doc_str
+Markdown.parse
+Markdown.html
+Markdown.latex
+```
diff --git a/stdlib/Markdown/src/Common/Common.jl b/stdlib/Markdown/src/Common/Common.jl
index 3036f2b4b730b..4bd3e5b4af8d6 100644
--- a/stdlib/Markdown/src/Common/Common.jl
+++ b/stdlib/Markdown/src/Common/Common.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+abstract type MarkdownElement end
+
 include("block.jl")
 include("inline.jl")
 
diff --git a/stdlib/Markdown/src/Common/block.jl b/stdlib/Markdown/src/Common/block.jl
index bd184b60c40fa..59ab0e58cf65b 100644
--- a/stdlib/Markdown/src/Common/block.jl
+++ b/stdlib/Markdown/src/Common/block.jl
@@ -4,7 +4,7 @@
 # Paragraphs
 # ––––––––––
 
-mutable struct Paragraph
+mutable struct Paragraph <: MarkdownElement
     content
 end
 
@@ -21,7 +21,7 @@ function paragraph(stream::IO, md::MD)
             char == '\r' && !eof(stream) && peek(stream, Char) == '\n' && read(stream, Char)
             if prev_char == '\\'
                 write(buffer, '\n')
-            elseif blankline(stream) || parse(stream, md, breaking = true)
+            elseif blankline(stream) || _parse(stream, md, breaking = true)
                 break
             else
                 write(buffer, ' ')
@@ -39,7 +39,7 @@ end
 # Headers
 # –––––––
 
-mutable struct Header{level}
+mutable struct Header{level} <: MarkdownElement
     text
 end
 
@@ -95,7 +95,7 @@ end
 # Code
 # ––––
 
-mutable struct Code
+mutable struct Code <: MarkdownElement
     language::String
     code::String
 end
@@ -114,7 +114,7 @@ function indentcode(stream::IO, block::MD)
                 break
             end
         end
-        code = String(take!(buffer))
+        code = takestring!(buffer)
         !isempty(code) && (push!(block, Code(rstrip(code))); return true)
         return false
     end
@@ -124,7 +124,7 @@ end
 # Footnote
 # --------
 
-mutable struct Footnote
+mutable struct Footnote <: MarkdownElement
     id::String
     text
 end
@@ -159,7 +159,7 @@ end
 # Quotes
 # ––––––
 
-mutable struct BlockQuote
+mutable struct BlockQuote <: MarkdownElement
     content
 end
 
@@ -178,7 +178,7 @@ function blockquote(stream::IO, block::MD)
         end
         empty && return false
 
-        md = String(take!(buffer))
+        md = takestring!(buffer)
         push!(block, BlockQuote(parse(md, flavor = config(block)).content))
         return true
     end
@@ -188,7 +188,7 @@ end
 # Admonitions
 # -----------
 
-mutable struct Admonition
+mutable struct Admonition <: MarkdownElement
     category::String
     title::String
     content::Vector
@@ -236,7 +236,7 @@ function admonition(stream::IO, block::MD)
             end
         end
         # Parse the nested block as markdown and create a new Admonition block.
-        nested = parse(String(take!(buffer)), flavor = config(block))
+        nested = parse(takestring!(buffer), flavor = config(block))
         push!(block, Admonition(category, title, nested.content))
         return true
     end
@@ -246,7 +246,7 @@ end
 # Lists
 # –––––
 
-mutable struct List
+mutable struct List <: MarkdownElement
     items::Vector{Any}
     ordered::Int # `-1` is unordered, `>= 0` is ordered.
     loose::Bool # TODO: Renderers should use this field
@@ -326,13 +326,13 @@ function list(stream::IO, block::MD)
         return true
     end
 end
-pushitem!(list, buffer) = push!(list.items, parse(String(take!(buffer))).content)
+pushitem!(list, buffer) = push!(list.items, parse(takestring!(buffer)).content)
 
 # ––––––––––––––
 # HorizontalRule
 # ––––––––––––––
 
-mutable struct HorizontalRule
+mutable struct HorizontalRule <: MarkdownElement
 end
 
 function horizontalrule(stream::IO, block::MD)
diff --git a/stdlib/Markdown/src/Common/inline.jl b/stdlib/Markdown/src/Common/inline.jl
index fda716a10fae7..dc8c1e5e1dc9f 100644
--- a/stdlib/Markdown/src/Common/inline.jl
+++ b/stdlib/Markdown/src/Common/inline.jl
@@ -4,7 +4,7 @@
 # Emphasis
 # ––––––––
 
-mutable struct Italic
+mutable struct Italic <: MarkdownElement
     text
 end
 
@@ -20,7 +20,7 @@ function underscore_italic(stream::IO, md::MD)
     return result === nothing ? nothing : Italic(parseinline(result, md))
 end
 
-mutable struct Bold
+mutable struct Bold <: MarkdownElement
     text
 end
 
@@ -66,7 +66,7 @@ end
 # Images & Links
 # ––––––––––––––
 
-mutable struct Image
+mutable struct Image <: MarkdownElement
     url::String
     alt::String
 end
@@ -85,7 +85,7 @@ function image(stream::IO, md::MD)
     end
 end
 
-mutable struct Link
+mutable struct Link <: MarkdownElement
     text
     url::String
 end
@@ -96,7 +96,6 @@ function link(stream::IO, md::MD)
         startswith(stream, '[') || return
         text = readuntil(stream, ']', match = '[')
         text ≡ nothing && return
-        skipwhitespace(stream)
         startswith(stream, '(') || return
         url = readuntil(stream, ')', match = '(')
         url ≡ nothing && return
@@ -156,7 +155,7 @@ end
 # Punctuation
 # –––––––––––
 
-mutable struct LineBreak end
+mutable struct LineBreak <: MarkdownElement end
 
 @trigger '\\' ->
 function linebreak(stream::IO, md::MD)
@@ -166,7 +165,10 @@ function linebreak(stream::IO, md::MD)
 end
 
 @trigger '-' ->
-function en_dash(stream::IO, md::MD)
+function en_or_em_dash(stream::IO, md::MD)
+    if startswith(stream, "---")
+        return "—"
+    end
     if startswith(stream, "--")
         return "–"
     end
diff --git a/stdlib/Markdown/src/GitHub/GitHub.jl b/stdlib/Markdown/src/GitHub/GitHub.jl
index 61807d267511d..de18b367988d9 100644
--- a/stdlib/Markdown/src/GitHub/GitHub.jl
+++ b/stdlib/Markdown/src/GitHub/GitHub.jl
@@ -21,9 +21,9 @@ function fencedcode(stream::IO, block::MD)
             if startswith(stream, string(ch) ^ n)
                 if !startswith(stream, string(ch))
                     if flavor == "math"
-                        push!(block, LaTeX(String(take!(buffer)) |> chomp))
+                        push!(block, LaTeX(takestring!(buffer) |> chomp))
                     else
-                        push!(block, Code(flavor, String(take!(buffer)) |> chomp))
+                        push!(block, Code(flavor, takestring!(buffer) |> chomp))
                     end
                     return true
                 else
@@ -44,7 +44,7 @@ function github_paragraph(stream::IO, md::MD)
     for char in readeach(stream, Char)
         if char == '\n'
             eof(stream) && break
-            if blankline(stream) || parse(stream, md, breaking = true)
+            if blankline(stream) || _parse(stream, md, breaking = true)
                 break
             else
                 write(buffer, '\n')
@@ -60,5 +60,5 @@ end
 @flavor github [list, indentcode, blockquote, admonition, footnote, fencedcode, hashheader,
                 github_table, github_paragraph,
 
-                linebreak, escapes, en_dash, inline_code, asterisk_bold,
+                linebreak, escapes, en_or_em_dash, inline_code, asterisk_bold,
                 underscore_bold, asterisk_italic, underscore_italic, image, footnote_link, link, autolink]
diff --git a/stdlib/Markdown/src/GitHub/table.jl b/stdlib/Markdown/src/GitHub/table.jl
index 29f956e9a0710..fefa667fc7f93 100644
--- a/stdlib/Markdown/src/GitHub/table.jl
+++ b/stdlib/Markdown/src/GitHub/table.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-mutable struct Table
+mutable struct Table <: MarkdownElement
     rows::Vector{Vector{Any}}
     align::Vector{Symbol}
 end
@@ -140,15 +140,15 @@ end
 
 function term(io::IO, md::Table, columns)
     margin_str = " "^margin
-    cells = mapmap(x -> terminline_string(io, x), md.rows)
-    padcells!(cells, md.align, len = ansi_length)
+    cells = mapmap(x -> annotprint(terminline, x), md.rows)
+    padcells!(cells, md.align, len = textwidth)
     for i = 1:length(cells)
         print(io, margin_str)
         join(io, cells[i], " ")
         if i == 1
             println(io)
             print(io, margin_str)
-            join(io, ["–"^ansi_length(cells[i][j]) for j = 1:length(cells[1])], " ")
+            join(io, ["–"^textwidth(cells[i][j]) for j = 1:length(cells[1])], " ")
         end
         i < length(cells) && println(io)
     end
diff --git a/stdlib/Markdown/src/IPython/IPython.jl b/stdlib/Markdown/src/IPython/IPython.jl
index 54b628e768a48..cab4abbf65412 100644
--- a/stdlib/Markdown/src/IPython/IPython.jl
+++ b/stdlib/Markdown/src/IPython/IPython.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-mutable struct LaTeX
+mutable struct LaTeX <: MarkdownElement
     formula::String
 end
 
diff --git a/stdlib/Markdown/src/Julia/Julia.jl b/stdlib/Markdown/src/Julia/Julia.jl
index 3797c5a8a0f79..171fe4431a596 100644
--- a/stdlib/Markdown/src/Julia/Julia.jl
+++ b/stdlib/Markdown/src/Julia/Julia.jl
@@ -10,5 +10,5 @@ include("interp.jl")
 @flavor julia [blocktex, blockinterp, hashheader, list, indentcode, fencedcode,
                blockquote, admonition, footnote, github_table, horizontalrule, setextheader, paragraph,
 
-               linebreak, escapes, tex, interp, en_dash, inline_code,
+               linebreak, escapes, tex, interp, en_or_em_dash, inline_code,
                asterisk_bold, underscore_bold, asterisk_italic, underscore_italic, image, footnote_link, link, autolink]
diff --git a/stdlib/Markdown/src/Julia/interp.jl b/stdlib/Markdown/src/Julia/interp.jl
index ad91226e00956..10ef6889cd6a5 100644
--- a/stdlib/Markdown/src/Julia/interp.jl
+++ b/stdlib/Markdown/src/Julia/interp.jl
@@ -46,7 +46,7 @@ toexpr(x) = x
 toexpr(xs::Union{Vector{Any},Vector{Vector{Any}}}) =
     Expr(:call, GlobalRef(Base,:getindex), Any, mapany(toexpr, xs)...)
 
-for T in Any[MD, Paragraph, Header, Link, Bold, Italic]
+for T in Any[MD, Paragraph, Header, Link, Bold, Italic, Footnote, Admonition]
     @eval function toexpr(md::$T)
         Expr(:call, typeof(md), $(map(x->:(toexpr(md.$x)), fieldnames(Base.unwrap_unionall(T)))...))
     end
diff --git a/stdlib/Markdown/src/Markdown.jl b/stdlib/Markdown/src/Markdown.jl
index 781fcbdafddc8..83fb58e2849ee 100644
--- a/stdlib/Markdown/src/Markdown.jl
+++ b/stdlib/Markdown/src/Markdown.jl
@@ -1,13 +1,20 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-Tools for working with the Markdown file format. Mainly for documentation.
+    Markdown
+
+Tools for working with the Markdown markup language for formatted text, used within Julia for documentation.
+The `Markdown` module provides the (internal) [`MD`](@ref) type as well as the string
+literals `md"..."` and `doc"..."`.
 """
 module Markdown
 
-import Base: show, ==, with_output_color, mapany
+import Base: AnnotatedString, AnnotatedIOBuffer, show, ==, with_output_color, mapany
 using Base64: stringmime
 
+using StyledStrings: StyledStrings, Face, addface!, @styled_str, styled
+using JuliaSyntaxHighlighting: highlight, highlight!
+
 # Margin for printing in terminal.
 const margin = 2
 
@@ -28,7 +35,40 @@ include("render/terminal/render.jl")
 
 export @md_str, @doc_str
 
-parse(markdown::AbstractString; flavor = julia) = parse(IOBuffer(markdown), flavor = flavor)
+public MD, parse
+
+const MARKDOWN_FACES = [
+    :markdown_header => Face(weight=:bold),
+    :markdown_h1 => Face(height=1.25, inherit=:markdown_header),
+    :markdown_h2 => Face(height=1.20, inherit=:markdown_header),
+    :markdown_h3 => Face(height=1.15, inherit=:markdown_header),
+    :markdown_h4 => Face(height=1.12, inherit=:markdown_header),
+    :markdown_h5 => Face(height=1.08, inherit=:markdown_header),
+    :markdown_h6 => Face(height=1.05, inherit=:markdown_header),
+    :markdown_admonition => Face(weight=:bold),
+    :markdown_code => Face(inherit=:code),
+    :markdown_julia_prompt => Face(inherit=:repl_prompt_julia),
+    :markdown_footnote => Face(inherit=:bright_yellow),
+    :markdown_hrule => Face(inherit=:shadow),
+    :markdown_inlinecode => Face(inherit=:markdown_code),
+    :markdown_latex => Face(inherit=:magenta),
+    :markdown_link => Face(underline=:bright_blue),
+    :markdown_list => Face(foreground=:blue),
+]
+
+__init__() = foreach(addface!, MARKDOWN_FACES)
+
+parse(markdown::String; flavor = julia) = parse(IOBuffer(markdown), flavor = flavor)
+
+"""
+    Markdown.parse(markdown::AbstractString)::MD
+
+Parse `markdown` as Julia-flavored Markdown text and return the corresponding `MD` object.
+
+See also [`@md_str`](@ref).
+"""
+parse(markdown::AbstractString; flavor = julia) = parse(String(markdown), flavor = flavor)
+
 parse_file(file::AbstractString; flavor = julia) = parse(read(file, String), flavor = flavor)
 
 function mdexpr(s, flavor = :julia)
@@ -40,6 +80,24 @@ function docexpr(source::LineNumberNode, mod::Module, s, flavor = :julia)
     :($doc_str($(mdexpr(s, flavor)), $(QuoteNode(source)), $mod))
 end
 
+"""
+    @md_str -> MD
+
+Parse the given string as Markdown text and return a corresponding [`MD`](@ref) object.
+
+See also [`Markdown.parse`](@ref Markdown.parse(::AbstractString)).
+
+# Examples
+```jldoctest
+julia> s = md"# Hello, world!"
+  Hello, world!
+  ≡≡≡≡≡≡≡≡≡≡≡≡≡
+
+julia> typeof(s)
+Markdown.MD
+
+```
+"""
 macro md_str(s, t...)
     mdexpr(s, t...)
 end
@@ -51,6 +109,25 @@ function doc_str(md, source::LineNumberNode, mod::Module)
 end
 doc_str(md::AbstractString, source::LineNumberNode, mod::Module) = doc_str(parse(md), source, mod)
 
+"""
+    @doc_str -> MD
+
+Parse the given string as Markdown text, add line and module information and return a
+corresponding [`MD`](@ref) object.
+
+`@doc_str` can be used in conjunction with the [`Base.Docs`](@ref) module. Please also refer to
+the manual section on [documentation](@ref man-documentation) for more information.
+
+# Examples
+```
+julia> s = doc"f(x) = 2*x"
+  f(x) = 2*x
+
+julia> typeof(s)
+Markdown.MD
+
+```
+"""
 macro doc_str(s::AbstractString, t...)
     docexpr(__source__, __module__, s, t...)
 end
@@ -59,4 +136,37 @@ import Base.Docs: catdoc
 
 catdoc(md::MD...) = MD(md...)
 
+if Base.generating_output()
+    # workload to reduce latency
+    show(devnull, MIME("text/plain"), md"""
+    # H1
+    ## H2
+    ### H3
+    #### H4
+    ##### H5
+    ###### H6
+    **bold text**
+    *italicized text*
+    ***bold and italicized text***
+    > blockquote
+    1. First item
+    2. Second item
+    3. Third item
+    - First item
+    - Second item
+    - Third item
+        - Indented item
+    `code`
+    Horizontal Rule
+    ---
+    **[Duck Duck Go](https://duckduckgo.com)**
+    <https://www.markdownguide.org>
+    <fake@example.com>
+    ![The San Juan Mountains are beautiful!](/assets/images/san-juan-mountains.jpg "San Juan Mountains")
+
+    H~2~O
+    X^2^
+    """)
+end
+
 end
diff --git a/stdlib/Markdown/src/parse/parse.jl b/stdlib/Markdown/src/parse/parse.jl
index 452d90d1176e1..8d691a281f218 100644
--- a/stdlib/Markdown/src/parse/parse.jl
+++ b/stdlib/Markdown/src/parse/parse.jl
@@ -1,5 +1,12 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+    MD
+
+`MD` represents a Markdown document. Note that the `MD` constructor should not generally be
+used directly, since it constructs the internal data structures. Instead, you can construct
+`MD` objects using the exported macros [`@md_str`](@ref) and [`@doc_str`](@ref).
+"""
 mutable struct MD
     content::Vector{Any}
     meta::Dict{Symbol, Any}
@@ -56,7 +63,7 @@ function parseinline(stream::IO, md::MD, config::Config)
         char = peek(stream, Char)
         if haskey(config.inner, char) &&
                 (inner = parseinline(stream, md, config.inner[char])) !== nothing
-            c = String(take!(buffer))
+            c = takestring!(buffer)
             !isempty(c) && push!(content, c)
             buffer = IOBuffer()
             push!(content, inner)
@@ -64,7 +71,7 @@ function parseinline(stream::IO, md::MD, config::Config)
             write(buffer, read(stream, Char))
         end
     end
-    c = String(take!(buffer))
+    c = takestring!(buffer)
     !isempty(c) && push!(content, c)
     return content
 end
@@ -76,7 +83,7 @@ parseinline(s, md::MD) = parseinline(s, md, config(md))
 
 # Block parsing
 
-function parse(stream::IO, block::MD, config::Config; breaking = false)
+function _parse(stream::IO, block::MD, config::Config; breaking = false)
     skipblank(stream)
     eof(stream) && return false
     for parser in (breaking ? config.breaking : [config.breaking; config.regular])
@@ -85,12 +92,17 @@ function parse(stream::IO, block::MD, config::Config; breaking = false)
     return false
 end
 
-parse(stream::IO, block::MD; breaking = false) =
-  parse(stream, block, config(block), breaking = breaking)
+_parse(stream::IO, block::MD; breaking = false) =
+    _parse(stream, block, config(block), breaking = breaking)
+
+"""
+    parse(stream::IO)::MD
 
+Parse the content of `stream` as Julia-flavored Markdown text and return the corresponding `MD` object.
+"""
 function parse(stream::IO; flavor = julia)
     isa(flavor, Symbol) && (flavor = flavors[flavor])
     markdown = MD(flavor)
-    while parse(stream, markdown, flavor) end
+    while _parse(stream, markdown, flavor) end
     return markdown
 end
diff --git a/stdlib/Markdown/src/parse/util.jl b/stdlib/Markdown/src/parse/util.jl
index aabfcbb3ddc62..cd8158780bd6d 100644
--- a/stdlib/Markdown/src/parse/util.jl
+++ b/stdlib/Markdown/src/parse/util.jl
@@ -141,7 +141,7 @@ function readuntil(stream::IO, delimiter; newlines = false, match = nothing)
         while !eof(stream)
             if startswith(stream, delimiter)
                 if count == 0
-                    return String(take!(buffer))
+                    return takestring!(buffer)
                 else
                     count -= 1
                     write(buffer, delimiter)
@@ -187,7 +187,7 @@ function parse_inline_wrapper(stream::IO, delimiter::AbstractString; rep = false
             if !(char in whitespace || char == '\n' || char in delimiter) && startswith(stream, delimiter^n)
                 trailing = 0
                 while startswith(stream, delimiter); trailing += 1; end
-                trailing == 0 && return String(take!(buffer))
+                trailing == 0 && return takestring!(buffer)
                 write(buffer, delimiter ^ (n + trailing))
             end
         end
diff --git a/stdlib/Markdown/src/render/html.jl b/stdlib/Markdown/src/render/html.jl
index a48180509400f..829fa6c7bf986 100644
--- a/stdlib/Markdown/src/render/html.jl
+++ b/stdlib/Markdown/src/render/html.jl
@@ -67,6 +67,9 @@ end
 
 function html(io::IO, code::Code)
     withtag(io, :pre) do
+        if code.language == "styled"
+            code = Code("", String(styled(code.code)))
+        end
         maybe_lang = !isempty(code.language) ? Any[:class=>"language-$(code.language)"] : []
         withtag(io, :code, maybe_lang...) do
             htmlesc(io, code.code)
@@ -134,6 +137,9 @@ function htmlinline(io::IO, content::Vector)
 end
 
 function htmlinline(io::IO, code::Code)
+    if code.language == "styled"
+        code = Code("", String(styled(code.code)))
+    end
     withtag(io, :code) do
         htmlesc(io, code.code)
     end
@@ -182,6 +188,21 @@ htmlinline(io::IO, x) = tohtml(io, x)
 
 export html
 
+"""
+    html([io::IO], md)
+
+Output the contents of the Markdown object `md` in HTML format, either
+writing to an (optional) `io` stream or returning a string.
+
+One can alternatively use `show(io, "text/html", md)` or `repr("text/html", md)`, which
+differ in that they wrap the output in a `<div class="markdown"> ... </div>` element.
+
+# Examples
+```jldoctest
+julia> html(md"hello _world_")
+"<p>hello <em>world</em></p>\\n"
+```
+"""
 html(md) = sprint(html, md)
 
 function show(io::IO, ::MIME"text/html", md::MD)
diff --git a/stdlib/Markdown/src/render/latex.jl b/stdlib/Markdown/src/render/latex.jl
index d18a2e760ef3d..fad0508ce0e59 100644
--- a/stdlib/Markdown/src/render/latex.jl
+++ b/stdlib/Markdown/src/render/latex.jl
@@ -33,6 +33,9 @@ function latex(io::IO, header::Header{l}) where l
 end
 
 function latex(io::IO, code::Code)
+    if code.language == "styled"
+        code = Code("", String(styled(code.code)))
+    end
     occursin("\\end{verbatim}", code.code) && error("Cannot include \"\\end{verbatim}\" in a latex code block")
     wrapblock(io, "verbatim") do
         println(io, code.code)
@@ -167,6 +170,20 @@ function latexesc(io, s::AbstractString)
     end
 end
 
+"""
+    latex([io::IO], md)
+
+Output the contents of the Markdown object `md` in LaTeX format, either
+writing to an (optional) `io` stream or returning a string.
+
+One can alternatively use `show(io, "text/latex", md)` or `repr("text/latex", md)`.
+
+# Examples
+```jldoctest
+julia> latex(md"hello _world_")
+"hello \\\\emph{world}\\n\\n"
+```
+"""
 latex(md) = sprint(latex, md)
 latexinline(md) = sprint(latexinline, md)
 latexesc(s) = sprint(latexesc, s)
diff --git a/stdlib/Markdown/src/render/rst.jl b/stdlib/Markdown/src/render/rst.jl
index 752916c581a07..e441ee0495da0 100644
--- a/stdlib/Markdown/src/render/rst.jl
+++ b/stdlib/Markdown/src/render/rst.jl
@@ -23,10 +23,16 @@ end
 function rst(io::IO, code::Code)
     if code.language == "jldoctest"
         println(io, ".. doctest::\n")
-    elseif code.language != "rst"
+    elseif code.language in ("", "julia", "julia-repl")
         println(io, ".. code-block:: julia\n")
+    elseif code.language == "rst"
+    elseif code.language == "styled"
+        code = Code("", String(styled(code.code)))
+        println(io, "::\n")
+    else
+        println(io, "::\n")
     end
-    for l in lines(code.code)
+    for l in eachsplit(code.code, '\n')
         println(io, "    ", l)
     end
 end
@@ -90,7 +96,7 @@ end
 
 function rst(io::IO, l::LaTeX)
     println(io, ".. math::\n")
-    for line in lines(l.formula)
+    for line in eachsplit(l.formula, '\n')
         println(io, "    ", line)
     end
 end
diff --git a/stdlib/Markdown/src/render/terminal/formatting.jl b/stdlib/Markdown/src/render/terminal/formatting.jl
index a031de4d9ad82..c9dadfb5f3d94 100644
--- a/stdlib/Markdown/src/render/terminal/formatting.jl
+++ b/stdlib/Markdown/src/render/terminal/formatting.jl
@@ -1,68 +1,82 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Wrapping
+const AnnotIO = Union{AnnotatedIOBuffer, IOContext{AnnotatedIOBuffer}}
 
-function ansi_length(s)
-    replace(s, r"\e\[[0-9]+m" => "") |> textwidth
+function annotprint(f::Function, args...)
+    buf = AnnotatedIOBuffer()
+    f(buf, args...)
+    read(seekstart(buf), AnnotatedString)
 end
 
-words(s) = split(s, " ")
-lines(s) = split(s, "\n")
+"""
+    with_output_annotations(f::Function, io::AnnotIO, annots::Pair{Symbol, <:Any}...)
 
-function wrapped_line(io::IO, s::AbstractString, width, i)
-    ws = words(s)
-    lines = String[]
-    for word in ws
-        word_length = ansi_length(word)
-        word_length == 0 && continue
-        if isempty(lines) || i + word_length + 1 > width
-            i = word_length
-            if length(lines) > 0
-                last_line = lines[end]
-                maybe_underline = findlast(Base.text_colors[:underline], last_line)
-                if !isnothing(maybe_underline)
-                    # disable underline style at end of line if not already disabled.
-                    maybe_disable_underline = max(
-                        last(something(findlast(Base.disable_text_style[:underline], last_line), -1)),
-                        last(something(findlast(Base.text_colors[:normal], last_line), -1)),
-                    )
+Call `f(io)`, and apply `annots` to the output created by doing so.
+"""
+function with_output_annotations(f::Function, io::AnnotIO, annots::Pair{Symbol, <:Any}...)
+    @nospecialize annots
+    aio = if io isa AnnotatedIOBuffer io else io.io end
+    start = position(aio) + 1
+    f(io)
+    stop = position(aio)
+    sortedindex = searchsortedlast(aio.annotations, (region=start:stop,), by=a -> a.region)
+    for (i, annot) in enumerate(annots)
+        insert!(aio.annotations, sortedindex + i, (start:stop, annot...))
+    end
+end
 
-                    if maybe_disable_underline < 0 || maybe_disable_underline < last(maybe_underline)
+"""
+    wraplines(content::AnnotatedString, width::Integer = 80, column::Integer = 0)
 
-                        lines[end] = last_line * Base.disable_text_style[:underline]
-                        word = Base.text_colors[:underline] * word
-                    end
+Wrap `content` into a vector of lines of at most `width` (according to
+`textwidth`), with the first line starting at `column`.
+"""
+function wraplines(content::Union{Annot, SubString{<:Annot}}, width::Integer = 80, column::Integer = 0) where { Annot <: AnnotatedString}
+    s, lines = String(content), SubString{Annot}[]
+    i, lastwrap, slen = firstindex(s), 0, ncodeunits(s)
+    most_recent_break_opportunity = 1
+    while i < slen
+        if isspace(s[i]) && s[i] != '\n'
+            most_recent_break_opportunity = i
+        elseif s[i] == '\n'
+            push!(lines, content[nextind(s, lastwrap):prevind(s, i)])
+            lastwrap = i
+            column = 0
+        elseif column >= width && most_recent_break_opportunity > 1
+            if lastwrap == most_recent_break_opportunity
+                nextbreak = findfirst(isspace, @view s[nextind(s, lastwrap):end])
+                if isnothing(nextbreak)
+                    break
+                else
+                    most_recent_break_opportunity = lastwrap + nextbreak
                 end
+                i = most_recent_break_opportunity
+            else
+                i = nextind(s, most_recent_break_opportunity)
             end
-            push!(lines, word)
-        else
-            i += word_length + 1
-            lines[end] *= " " * word   # this could be more efficient
+            push!(lines, content[nextind(s, lastwrap):prevind(s, most_recent_break_opportunity)])
+            lastwrap = most_recent_break_opportunity
+            column = 0
         end
+        column += textwidth(s[i])
+        i = nextind(s, i)
     end
-    return i, lines
-end
-
-function wrapped_lines(io::IO, s::AbstractString; width = 80, i = 0)
-    ls = String[]
-    for ss in lines(s)
-        i, line = wrapped_line(io, ss, width, i)
-        append!(ls, line)
+    if lastwrap < slen
+        push!(lines, content[nextind(s, lastwrap):end])
     end
-    return ls
+    lines
 end
 
-wrapped_lines(io::IO, f::Function, args...; width = 80, i = 0) =
-    wrapped_lines(io, sprint(f, args...; context=io), width = width, i = 0)
-
-function print_wrapped(io::IO, s...; width = 80, pre = "", i = 0)
-    lines = wrapped_lines(io, s..., width = width, i = i)
-    isempty(lines) && return 0, 0
-    print(io, lines[1])
-    for line in lines[2:end]
-        print(io, '\n', pre, line)
+# Print horizontal lines between each docstring if there are multiple docs
+function insert_hlines(docs)
+    if !isa(docs, MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
+        return docs
+    end
+    docs = docs::MD
+    v = Any[]
+    for (n, doc) in enumerate(docs.content)
+        push!(v, doc)
+        n == length(docs.content) || push!(v, HorizontalRule())
     end
-    length(lines), length(pre) + ansi_length(lines[end])
+    return MD(v)
 end
-
-print_wrapped(f::Function, io::IO, args...; kws...) = print_wrapped(io, f, args...; kws...)
diff --git a/stdlib/Markdown/src/render/terminal/render.jl b/stdlib/Markdown/src/render/terminal/render.jl
index 20b1ef6d041fc..fc3fdac7c7aaf 100644
--- a/stdlib/Markdown/src/render/terminal/render.jl
+++ b/stdlib/Markdown/src/render/terminal/render.jl
@@ -13,121 +13,183 @@ function term(io::IO, content::Vector, cols)
     term(io, content[end], cols)
 end
 
-term(io::IO, md::MD, columns = cols(io)) = term(io, md.content, columns)
+function term(io::IO, md::MD, columns = cols(io))
+    md = insert_hlines(md)
+    return term(io, md.content, columns)
+end
 
 function term(io::IO, md::Paragraph, columns)
-    print(io, ' '^margin)
-    print_wrapped(io, width = columns-2margin, pre = ' '^margin) do io
-        terminline(io, md.content)
+    lines = wraplines(annotprint(terminline, md.content), columns-2margin)
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, md::BlockQuote, columns)
-    s = sprint(term, md.content, columns - 10; context=io)
-    lines = split(rstrip(s), '\n')
-    print(io, ' '^margin, '│', lines[1])
-    for i = 2:length(lines)
-        print(io, '\n', ' '^margin, '│', lines[i])
+    content = annotprint(term, md.content, columns - 10)
+    lines = wraplines(rstrip(content), columns - 10)
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, '│', line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, md::Admonition, columns)
-    col = :default
-    # If the types below are modified, the page manual/documentation.md must be updated accordingly.
-    if md.category == "danger"
-        col = Base.error_color()
-    elseif md.category == "warning"
-        col = Base.warn_color()
-    elseif md.category in ("info", "note")
-        col = Base.info_color()
-    elseif md.category == "tip"
-        col = :green
+    accent = if md.category == "danger"
+        :error
+    elseif md.category in ("warning", "info", "note", "tip")
+        Symbol(md.category)
+    elseif md.category == "compat"
+        :bright_cyan
+    elseif md.category == "todo"
+        :magenta
+    else
+        :default
     end
-    printstyled(io, ' '^margin, "│ "; color=col, bold=true)
-    printstyled(io, isempty(md.title) ? md.category : md.title; color=col, bold=true)
-    printstyled(io, '\n', ' '^margin, '│', '\n'; color=col, bold=true)
-    s = sprint(term, md.content, columns - 10; context=io)
-    lines = split(rstrip(s), '\n')
-    for i in eachindex(lines)
-        printstyled(io, ' '^margin, '│'; color=col, bold=true)
-        print(io, lines[i])
-        i < lastindex(lines) && println(io)
+    title = if isempty(md.title) md.category else md.title end
+    print(io, ' '^margin, styled"{$accent,markdown_admonition:│ $title}",
+          '\n', ' '^margin, styled"{$accent,markdown_admonition:│}", '\n')
+    content = annotprint(term, md.content, columns - 10)
+    lines = split(rstrip(content), '\n')
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, styled"{$accent,markdown_admonition:│}", line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, f::Footnote, columns)
     print(io, ' '^margin, "│ ")
-    printstyled(io, "[^$(f.id)]", bold=true)
+    print(io, styled"{markdown_footnote:[^$(f.id)]}")
     println(io, '\n', ' '^margin, '│')
-    s = sprint(term, f.text, columns - 10; context=io)
-    lines = split(rstrip(s), '\n')
-    for i in eachindex(lines)
-        print(io, ' '^margin, '│', lines[i])
-        i < lastindex(lines) && println(io)
+    content = annotprint(term, f.text, columns - 10)
+    lines = split(rstrip(content), '\n')
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, '│', line)
+        i < length(lines) && println(io)
     end
 end
 
-function term(io::IO, md::List, columns)
+const _list_bullets = ("•  ", "–  ", "▪  ")
+
+function term(io::IO, md::List, columns, depth::Int = 1)
+    dterm(io, md, columns, _depth)      = term(io, md, columns)
+    dterm(io, md::List, columns, depth) = term(io, md, columns, depth)
     for (i, point) in enumerate(md.items)
-        print(io, ' '^2margin, isordered(md) ? "$(i + md.ordered - 1). " : "•  ")
-        print_wrapped(io, width = columns-(4margin+2), pre = ' '^(2margin+3),
-                          i = 2margin+2) do io
-            term(io, point, columns - 10)
+        bullet = if isordered(md)
+            string(lpad(i + md.ordered - 1, ndigits(length(md.items))), ". ")
+        elseif depth == 1
+            first(_list_bullets)
+        else
+            _list_bullets[2 + mod(depth, length(_list_bullets) - 1)]
         end
-        i < lastindex(md.items) && print(io, '\n', '\n')
-    end
-end
-
-function _term_header(io::IO, md, char, columns)
-    text = terminline_string(io, md.text)
-    with_output_color(:bold, io) do io
-        pre = ' '^margin
-        print(io, pre)
-        line_no, lastline_width = print_wrapped(io, text,
-                                                width=columns - 4margin; pre)
-        line_width = min(lastline_width, columns)
-        if line_no > 1
-            line_width = max(line_width, div(columns, 3)+length(pre))
+        print(io, ' '^ifelse(depth == 1, 2margin, 2*(depth-1)), styled"{markdown_list:$bullet}")
+        buf = AnnotatedIOBuffer()
+        if point isa Vector && !isempty(point)
+            for (i, elt) in enumerate(point[1:end-1])
+                dterm(buf, elt, columns - 10, depth + 1)
+                println(buf)
+                (!(point[i+1] isa List) || point[i+1].loose) && println(buf)
+            end
+            dterm(buf, point[end], columns - 10, depth + 1)
+        else
+            dterm(buf, point, columns - 10, depth + 1)
         end
-        header_width = max(0, line_width-length(pre))
-        char != ' ' && header_width > 0 && print(io, '\n', ' '^(margin), char^header_width)
+        content = read(seekstart(buf), AnnotatedString)
+        lines = split(rstrip(content), '\n')
+        common_indent = minimum(
+            (sum((1 for _ in Iterators.takewhile(isspace, line)), init=0)
+             for line in Iterators.filter(!isempty, lines)),
+            init=if isempty(lines) 0 else length(first(lines)) end)
+        for (l, line) in enumerate(lines)
+            l > 1 && print(io, ' '^ifelse(depth == 1, 2margin + 3, 3))
+            !isempty(line) && print(io, line[common_indent+1:end])
+            l < length(lines) && println(io)
+        end
+        i < length(md.items) && print(io, '\n'^(1 + md.loose))
     end
 end
 
 const _header_underlines = collect("≡=–-⋅ ")
 # TODO settle on another option with unicode e.g. "≡=≃–∼⋅" ?
 
-function term(io::IO, md::Header{l}, columns) where l
+function term(io::AnnotIO, md::Header{l}, columns) where l
+    face = Symbol("markdown_h$l")
     underline = _header_underlines[l]
-    _term_header(io, md, underline, columns)
+    pre = ' '^margin
+    local line_width
+    with_output_annotations(io, :face => face) do io
+        headline = annotprint(terminline, md.text)
+        lines = wraplines(headline, columns - 4margin)
+        for (i, line) in enumerate(lines)
+            print(io, pre, line)
+            i < length(lines) && println(io)
+        end
+        line_width = if length(lines) == 1
+            min(textwidth(lines[end]), columns)
+        elseif length(lines) > 1
+            max(textwidth(lines[end]), div(columns, 3)+length(pre))
+        else
+            0
+        end
+    end
+    header_width = max(0, line_width)
+    if underline != ' ' && header_width > 0
+        print(io, '\n', ' '^(margin))
+        with_output_annotations(io -> print(io, underline^header_width), io, :face => face)
+    end
 end
 
 function term(io::IO, md::Code, columns)
-    with_output_color(:cyan, io) do io
-        L = lines(md.code)
-        for i in eachindex(L)
-            print(io, ' '^margin, L[i])
-            i < lastindex(L) && println(io)
+    code = if md.language == "julia"
+        highlight(md.code)
+    elseif md.language == "julia-repl" || Base.startswith(md.language, "jldoctest")
+        hl = AnnotatedString(md.code)
+        for (; match) in eachmatch(r"(?:^|\n)julia>", hl)
+            StyledStrings.face!(match, :markdown_julia_prompt)
+            afterprompt = match.offset + ncodeunits(match) + 1
+            _, exprend = Meta.parse(md.code, afterprompt, raise = false)
+            highlight!(hl[afterprompt:prevind(md.code, exprend)])
+            if (nextspace = findnext(' ', md.code, exprend)) |> !isnothing
+                nextword = hl[exprend:prevind(hl, nextspace)]
+                if nextword == "ERROR:"
+                    StyledStrings.face!(nextword, :error)
+                end
+            end
         end
+        hl
+    elseif md.language == "styled"
+        styled(md.code)
+    else
+        styled"{markdown_code:$(md.code)}"
+    end
+    lines = split(code, '\n')
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, tex::LaTeX, columns)
-    printstyled(io, ' '^margin, tex.formula, color=:magenta)
+    print(io, ' '^margin, styled"{markdown_latex:$(tex.formula)}")
 end
 
 term(io::IO, br::LineBreak, columns) = nothing # line breaks already printed between subsequent elements
 
 function term(io::IO, br::HorizontalRule, columns)
-   print(io, ' '^margin, '─'^(columns - 2margin))
+    print(io, ' '^margin, styled"{markdown_hrule:$('─'^(columns - 2margin))}")
+end
+
+function term(io::IO, md::MarkdownElement, columns)
+    a = IOContext(AnnotatedIOBuffer(), io)
+    term(a, md, columns)
+    print(io, read(seekstart(a.io), AnnotatedString))
 end
 
 term(io::IO, x, _) = show(io, MIME"text/plain"(), x)
 
 # Inline Content
 
-terminline_string(io::IO, md) = sprint(terminline, md; context=io)
-
 terminline(io::IO, content...) = terminline(io, collect(content))
 
 function terminline(io::IO, content::Vector)
@@ -140,12 +202,12 @@ function terminline(io::IO, md::AbstractString)
     print(io, replace(md, r"[\s\t\n]+" => ' '))
 end
 
-function terminline(io::IO, md::Bold)
-    with_output_color(terminline, :bold, io, md.text)
+function terminline(io::AnnotIO, md::Bold)
+    with_output_annotations(io -> terminline(io, md.text), io, :face => :bold)
 end
 
-function terminline(io::IO, md::Italic)
-    with_output_color(terminline, :underline, io, md.text)
+function terminline(io::AnnotIO, md::Italic)
+    with_output_annotations(io -> terminline(io, md.text), io, :face => :italic)
 end
 
 function terminline(io::IO, md::LineBreak)
@@ -156,20 +218,36 @@ function terminline(io::IO, md::Image)
     terminline(io, "(Image: $(md.alt))")
 end
 
-terminline(io::IO, f::Footnote) = with_output_color(terminline, :bold, io, "[^$(f.id)]")
+function terminline(io::IO, f::Footnote)
+    print(io, styled"{markdown_footnote:[^$(f.id)]}")
+end
 
-function terminline(io::IO, md::Link)
-    url = !Base.startswith(md.url, "@ref") ? " ($(md.url))" : ""
-    text = terminline_string(io, md.text)
-    terminline(io, text, url)
+function terminline(io::AnnotIO, md::Link)
+    annots = if occursin(r"^(https?|file)://", md.url)
+        (:face => :markdown_link, :link => md.url)
+    else
+        (:face => :markdown_link,)
+    end
+    with_output_annotations(io -> terminline(io, md.text), io, annots...)
 end
 
 function terminline(io::IO, code::Code)
-    printstyled(io, code.code, color=:cyan)
+    body = if code.language == "styled"
+        styled(code.code)
+    else
+        code.code
+    end
+    print(io, styled"{markdown_inlinecode:$body}")
 end
 
 function terminline(io::IO, tex::LaTeX)
-    printstyled(io, tex.formula, color=:magenta)
+    print(io, styled"{markdown_latex:$(tex.formula)}")
+end
+
+function terminline(io::IO, md::MarkdownElement)
+    a = IOContext(AnnotatedIOBuffer(), io)
+    terminline(a, md)
+    print(io, read(seekstart(a.io), AnnotatedString))
 end
 
 terminline(io::IO, x) = show(io, MIME"text/plain"(), x)
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index 19d821a0254d7..6c5f6f12e771c 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test, Markdown
-import Markdown: MD, Paragraph, Header, Italic, Bold, LineBreak, plain, term, html, rst, Table, Code, LaTeX, Footnote
+using Test, Markdown, StyledStrings
+import Markdown: MD, Paragraph, Header, Italic, Bold, LineBreak, insert_hlines, plain, term, html, rst, Table, Code, LaTeX, Footnote
 import Base: show
 
 # Basics
@@ -233,7 +233,7 @@ World""" |> plain == "Hello\n\n---\n\nWorld\n"
 
 # multiple whitespace is ignored
 @test sprint(term, md"a  b") == "  a b"
-@test sprint(term, md"[x](https://julialang.org)") == "  x (https://julialang.org)"
+@test sprint(term, md"[x](https://julialang.org)") == "  x"
 @test sprint(term, md"[x](@ref)") == "  x"
 @test sprint(term, md"[x](@ref something)") == "  x"
 @test sprint(term, md"![x](https://julialang.org)") == "  (Image: x)"
@@ -298,6 +298,7 @@ end
 let doc =
     md"""
     1. a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij
+
     2. a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij
     """
     str = sprint(term, doc, 50)
@@ -376,8 +377,8 @@ table = md"""
 # mime output
 let out =
     @test sprint(show, "text/plain", book) ==
-        "  Title\n  ≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  =================\n\n  Some bolded\n\n    •  list1\n\n    •  list2"
-    @test sprint(show, "text/plain", md"#") == "  " # edge case of empty header
+        "  Title\n  ≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  =================\n\n  Some bolded\n\n    •  list1\n    •  list2"
+    @test sprint(show, "text/plain", md"#") == "" # edge case of empty header
     @test sprint(show, "text/markdown", book) ==
         """
         # Title
@@ -501,6 +502,30 @@ sum_ref = md"Behaves like $(ref(sum))"
 @test plain(sum_ref) == "Behaves like sum (see Julia docs)\n"
 @test html(sum_ref) == "<p>Behaves like sum &#40;see Julia docs&#41;</p>\n"
 
+# JuliaLang/julia#59783
+let x = 1,
+    result = md"""
+    $x
+
+    [^1]: $x
+
+    !!! note
+    $x
+    """,
+    expected = """
+    1
+
+    [^1]: 1
+
+    !!! note
+
+
+
+    1
+    """
+    @test plain(result) == expected
+end
+
 show(io::IO, m::MIME"text/html", r::Reference) =
     Markdown.withtag(io, :a, :href=>"test") do
         Markdown.htmlesc(io, Markdown.plaininline(r))
@@ -1157,7 +1182,7 @@ let buf = IOBuffer()
     show(buf, "text/markdown", md"*emph*")
     @test String(take!(buf)) == "*emph*\n"
     show(IOContext(buf, :color=>true), "text/plain", md"*emph*")
-    @test String(take!(buf)) == "  \e[4memph\e[24m"
+    @test String(take!(buf)) in ("  \e[3memph\e[23m", "  \e[4memph\e[24m")
 end
 
 let word = "Markdown" # disable underline when wrapping lines
@@ -1166,8 +1191,8 @@ let word = "Markdown" # disable underline when wrapping lines
     long_italic_text = Markdown.parse('_' * join(fill(word, 10), ' ') * '_')
     show(ctx, MIME("text/plain"), long_italic_text)
     lines = split(String(take!(buf)), '\n')
-    @test endswith(lines[begin], Base.disable_text_style[:underline])
-    @test startswith(lines[begin+1], ' '^Markdown.margin * Base.text_colors[:underline])
+    @test endswith(lines[begin], r"\e\[2[34]m")
+    @test startswith(lines[begin+1], Regex(' '^Markdown.margin * "\e\\[[34]m"))
 end
 
 let word = "Markdown" # pre is of size Markdown.margin when wrapping title
@@ -1176,7 +1201,9 @@ let word = "Markdown" # pre is of size Markdown.margin when wrapping title
     long_title = Markdown.parse("# " * join(fill(word, 3)))
     show(ctx, MIME("text/plain"), long_title)
     lines = split(String(take!(buf)), '\n')
-    @test all(startswith(Base.text_colors[:bold] * ' '^Markdown.margin), lines)
+    @test all(l -> startswith(l, ' '^Markdown.margin * StyledStrings.ANSI_STYLE_CODES.bold_weight) ||
+                   startswith(l, StyledStrings.ANSI_STYLE_CODES.bold_weight * ' '^Markdown.margin),
+              lines)
 end
 
 struct Struct49454 end
@@ -1185,7 +1212,7 @@ Base.show(io::IO, ::Struct49454) =
 
 let buf = IOBuffer()
     ctx = IOContext(buf, :color => true, :displaysize => (displaysize(buf)[1], 10))
-    show(stdout, MIME("text/plain"), md"""
+    show(ctx, MIME("text/plain"), md"""
     text without $(Struct49454()) underline.
     """)
     lines = split(String(take!(buf)), '\n')
@@ -1259,8 +1286,30 @@ end
     s = @md_str """
        Misc:\\
        - line\\
+         break
        """
-    @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line"
+    @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line\n   break"
+end
+
+@testset "pullrequest #57664: en_or_em_dash" begin
+    # Test that two hyphens (--) is parsed as en dash (–)
+    # and three hyphens (---) is parsed as em dash (—)
+    hyphen_text = md"foo - bar"
+    en_dash_text = md"foo -- bar"
+    em_dash_text = md"foo --- bar"
+
+    @test sprint(show, "text/markdown", hyphen_text) == "foo - bar\n"
+    @test sprint(show, "text/markdown", en_dash_text) == "foo – bar\n"
+    @test sprint(show, "text/markdown", em_dash_text) == "foo — bar\n"
+
+    # Test that parsing works for hyphen-minus (-), en dash (–) and em dash (—)
+    hyphen_text = md"foo - bar"
+    en_dash_text = md"foo – bar"
+    em_dash_text = md"foo — bar"
+
+    @test hyphen_text |> Markdown.plain == "foo - bar\n"
+    @test en_dash_text |> Markdown.plain == "foo – bar\n"
+    @test em_dash_text |> Markdown.plain == "foo — bar\n"
 end
 
 @testset "pullrequest #41552: a code block has \\end{verbatim}" begin
@@ -1293,3 +1342,18 @@ end
     # see issue #42139
     @test md"<一轮红日初升>" |> html == """<p>&lt;一轮红日初升&gt;</p>\n"""
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Markdown))
+end
+
+@testset "Non-Markdown" begin
+    # https://github.com/JuliaLang/julia/issues/37765
+    @test isa(insert_hlines(Text("foo")), Text)
+    # https://github.com/JuliaLang/julia/issues/37757
+    @test insert_hlines(nothing) === nothing
+end
+
+@testset "Lazy Strings" begin
+    @test Markdown.parse(lazy"foo") == Markdown.parse("foo")
+end
diff --git a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
deleted file mode 100644
index e46da42a9a638..0000000000000
--- a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## dummy stub for https://github.com/JuliaBinaryWrappers/MbedTLS_jll.jl
-
-baremodule MbedTLS_jll
-using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
-
-export libmbedcrypto, libmbedtls, libmbedx509
-
-# These get calculated in __init__()
-const PATH = Ref("")
-const LIBPATH = Ref("")
-artifact_dir::String = ""
-libmbedcrypto_handle::Ptr{Cvoid} = C_NULL
-libmbedcrypto_path::String = ""
-libmbedtls_handle::Ptr{Cvoid} = C_NULL
-libmbedtls_path::String = ""
-libmbedx509_handle::Ptr{Cvoid} = C_NULL
-libmbedx509_path::String = ""
-
-if Sys.iswindows()
-    const libmbedcrypto = "libmbedcrypto.dll"
-    const libmbedtls = "libmbedtls.dll"
-    const libmbedx509 = "libmbedx509.dll"
-elseif Sys.isapple()
-    const libmbedcrypto = "@rpath/libmbedcrypto.7.dylib"
-    const libmbedtls = "@rpath/libmbedtls.14.dylib"
-    const libmbedx509 = "@rpath/libmbedx509.1.dylib"
-else
-    const libmbedcrypto = "libmbedcrypto.so.7"
-    const libmbedtls = "libmbedtls.so.14"
-    const libmbedx509 = "libmbedx509.so.1"
-end
-
-function __init__()
-    global libmbedcrypto_handle = dlopen(libmbedcrypto)
-    global libmbedcrypto_path = dlpath(libmbedcrypto_handle)
-    global libmbedtls_handle = dlopen(libmbedtls)
-    global libmbedtls_path = dlpath(libmbedtls_handle)
-    global libmbedx509_handle = dlopen(libmbedx509)
-    global libmbedx509_path = dlpath(libmbedx509_handle)
-    global artifact_dir = dirname(Sys.BINDIR)
-    LIBPATH[] = dirname(libmbedtls_path)
-    push!(LIBPATH_list, LIBPATH[])
-end
-
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libmbedcrypto_path() =libmbedcrypto_path
-get_libmbedtls_path() = libmbedtls_path
-get_libmbedx509_path() = libmbedx509_path
-
-end  # module MbedTLS_jll
diff --git a/stdlib/MbedTLS_jll/test/runtests.jl b/stdlib/MbedTLS_jll/test/runtests.jl
deleted file mode 100644
index 2d82fa564cd18..0000000000000
--- a/stdlib/MbedTLS_jll/test/runtests.jl
+++ /dev/null
@@ -1,10 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, Libdl, MbedTLS_jll
-
-@testset "MbedTLS_jll" begin
-    vstr = zeros(UInt8, 32)
-    ccall((:mbedtls_version_get_string, libmbedcrypto), Cvoid, (Ref{UInt8},), vstr)
-    vn = VersionNumber(unsafe_string(pointer(vstr)))
-    @test vn == v"2.28.2"
-end
diff --git a/stdlib/Mmap/Project.toml b/stdlib/Mmap/Project.toml
index f3dab686d2eaa..ce4b65ccbb06a 100644
--- a/stdlib/Mmap/Project.toml
+++ b/stdlib/Mmap/Project.toml
@@ -1,5 +1,6 @@
 name = "Mmap"
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Mmap/docs/src/index.md b/stdlib/Mmap/docs/src/index.md
index 5c40f11db4a4c..5ec2d5064eaf0 100644
--- a/stdlib/Mmap/docs/src/index.md
+++ b/stdlib/Mmap/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Mmap/docs/src/index.md"
+```
+
 # Memory-mapped I/O
 
 Low level module for mmap (memory mapping of files).
diff --git a/stdlib/Mmap/src/Mmap.jl b/stdlib/Mmap/src/Mmap.jl
index 629f53e8371ed..806abc714d391 100644
--- a/stdlib/Mmap/src/Mmap.jl
+++ b/stdlib/Mmap/src/Mmap.jl
@@ -6,11 +6,10 @@ Low level module for mmap (memory mapping of files).
 module Mmap
 
 import Base: OS_HANDLE, INVALID_OS_HANDLE
+using Base.Sys: PAGESIZE
 
 export mmap
 
-const PAGESIZE = Int(Sys.isunix() ? ccall(:jl_getpagesize, Clong, ()) : ccall(:jl_getallocationgranularity, Clong, ()))
-
 # for mmaps not backed by files
 mutable struct Anonymous <: IO
     name::String
@@ -86,6 +85,8 @@ grow!(::Anonymous,o::Integer,l::Integer) = return
 function grow!(io::IO, offset::Integer, len::Integer)
     pos = position(io)
     filelen = filesize(io)
+    # If non-regular file skip trying to grow since we know that will fail the ftruncate syscall
+    filelen == 0 && !isfile(io) && return
     if filelen < offset + len
         failure = ccall(:jl_ftruncate, Cint, (Cint, Int64), fd(io), offset+len)
         Base.systemerror(:ftruncate, failure != 0)
@@ -185,19 +186,19 @@ like HDF5 (which can be used with memory-mapping).
 """
 function mmap(io::IO,
               ::Type{Array{T,N}}=Vector{UInt8},
-              dims::NTuple{N,Integer}=(div(filesize(io)-position(io),sizeof(T)),),
+              dims::NTuple{N,Integer}=(div(filesize(io)-position(io),Base.aligned_sizeof(T)),),
               offset::Integer=position(io); grow::Bool=true, shared::Bool=true) where {T,N}
     # check inputs
     isopen(io) || throw(ArgumentError("$io must be open to mmap"))
     isbitstype(T)  || throw(ArgumentError("unable to mmap $T; must satisfy isbitstype(T) == true"))
 
-    len = sizeof(T)
+    len = Base.aligned_sizeof(T)
     for l in dims
         len, overflow = Base.Checked.mul_with_overflow(promote(len, l)...)
-        overflow && throw(ArgumentError("requested size prod($((sizeof(T), dims...))) too large, would overflow typeof(size(T)) == $(typeof(len))"))
+        overflow && throw(ArgumentError("requested size prod($((len, dims...))) too large, would overflow typeof(size(T)) == $(typeof(len))"))
     end
     len >= 0 || throw(ArgumentError("requested size must be ≥ 0, got $len"))
-    len == 0 && return Array{T}(undef, ntuple(x->0,Val(N)))
+    len == 0 && return Array{T}(undef, dims)
     len < typemax(Int) - PAGESIZE || throw(ArgumentError("requested size must be < $(typemax(Int)-PAGESIZE), got $len"))
 
     offset >= 0 || throw(ArgumentError("requested offset must be ≥ 0, got $offset"))
@@ -208,18 +209,38 @@ function mmap(io::IO,
     mmaplen = (offset - offset_page) + len
 
     file_desc = gethandle(io)
+    szfile = convert(Csize_t, len + offset)
+    requestedSizeLarger = false
+    if !(io isa Mmap.Anonymous)
+        requestedSizeLarger = szfile > filesize(io)
+    end
     # platform-specific mmapping
     @static if Sys.isunix()
         prot, flags, iswrite = settings(file_desc, shared)
-        iswrite && grow && grow!(io, offset, len)
+        if requestedSizeLarger && isfile(io) # add a condition to this line to ensure it only checks files
+            if iswrite
+                if grow
+                    grow!(io, offset, len)
+                else
+                    throw(ArgumentError("requested size $szfile larger than file size $(filesize(io)), but requested not to grow"))
+                end
+            else
+                throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
+            end
+        end
         # mmap the file
         ptr = ccall(:jl_mmap, Ptr{Cvoid}, (Ptr{Cvoid}, Csize_t, Cint, Cint, RawFD, Int64),
             C_NULL, mmaplen, prot, flags, file_desc, offset_page)
         systemerror("memory mapping failed", reinterpret(Int, ptr) == -1)
     else
         name, readonly, create = settings(io)
-        szfile = convert(Csize_t, len + offset)
-        readonly && szfile > filesize(io) && throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
+        if requestedSizeLarger
+            if readonly
+                throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
+            elseif !grow
+                throw(ArgumentError("requested size $szfile larger than file size $(filesize(io)), but requested not to grow"))
+            end
+        end
         handle = create ? ccall(:CreateFileMappingW, stdcall, Ptr{Cvoid}, (OS_HANDLE, Ptr{Cvoid}, DWORD, DWORD, DWORD, Cwstring),
                                 file_desc, C_NULL, readonly ? PAGE_READONLY : PAGE_READWRITE, szfile >> 32, szfile & typemax(UInt32), name) :
                           ccall(:OpenFileMappingW, stdcall, Ptr{Cvoid}, (DWORD, Cint, Cwstring),
@@ -231,7 +252,7 @@ function mmap(io::IO,
     end # os-test
     # convert mmapped region to Julia Array at `ptr + (offset - offset_page)` since file was mapped at offset_page
     A = unsafe_wrap(Array, convert(Ptr{T}, UInt(ptr) + UInt(offset - offset_page)), dims)
-    finalizer(A) do x
+    finalizer(A.ref.mem) do x
         @static if Sys.isunix()
             systemerror("munmap",  ccall(:munmap, Cint, (Ptr{Cvoid}, Int), ptr, mmaplen) != 0)
         else
@@ -245,7 +266,7 @@ end
 
 mmap(file::AbstractString,
      ::Type{T}=Vector{UInt8},
-     dims::NTuple{N,Integer}=(div(filesize(file),sizeof(eltype(T))),),
+     dims::NTuple{N,Integer}=(div(filesize(file),Base.aligned_sizeof(eltype(T))),),
      offset::Integer=Int64(0); grow::Bool=true, shared::Bool=true) where {T<:Array,N} =
     open(io->mmap(io, T, dims, offset; grow=grow, shared=shared), file, isfile(file) ? "r" : "w+")::Array{eltype(T),N}
 
@@ -342,8 +363,9 @@ Forces synchronization between the in-memory version of a memory-mapped `Array`
 [`BitArray`](@ref) and the on-disk version.
 """
 function sync!(m::Array, flags::Integer=MS_SYNC)
-    offset = rem(UInt(pointer(m)), PAGESIZE)
-    ptr = pointer(m) - offset
+    ptr = pointer(m)
+    offset = rem(UInt(ptr), PAGESIZE)
+    ptr = ptr - offset
     mmaplen = sizeof(m) + offset
     GC.@preserve m @static if Sys.isunix()
         systemerror("msync",
@@ -404,8 +426,9 @@ Advises the kernel on the intended usage of the memory-mapped `array`, with the
 `flag` being one of the available `MADV_*` constants.
 """
 function madvise!(m::Array, flag::Integer=MADV_NORMAL)
-    offset = rem(UInt(pointer(m)), PAGESIZE)
-    ptr = pointer(m) - offset
+    ptr = pointer(m)
+    offset = rem(UInt(ptr), PAGESIZE)
+    ptr = ptr - offset
     mmaplen = sizeof(m) + offset
     GC.@preserve m begin
         systemerror("madvise",
diff --git a/stdlib/Mmap/test/runtests.jl b/stdlib/Mmap/test/runtests.jl
index 0b3cb0b9f1a42..ec4c85c692549 100644
--- a/stdlib/Mmap/test/runtests.jl
+++ b/stdlib/Mmap/test/runtests.jl
@@ -11,12 +11,13 @@ GC.gc(); GC.gc()
 GC.gc(); GC.gc()
 @test mmap(file, Array{UInt8,3}, (1,1,11)) == reshape(t,(1,1,11))
 GC.gc(); GC.gc()
-@test mmap(file, Array{UInt8,3}, (11,0,1)) == Array{UInt8}(undef, (0,0,0))
+@test size(mmap(file, Array{UInt8,3}, (11,0,1))) == (11,0,1)
 @test mmap(file, Vector{UInt8}, (11,)) == t
 GC.gc(); GC.gc()
 @test mmap(file, Array{UInt8,2}, (1,11)) == t'
 GC.gc(); GC.gc()
-@test mmap(file, Array{UInt8,2}, (0,12)) == Array{UInt8}(undef, (0,0))
+@test size(mmap(file, Array{UInt8,2}, (0,12))) == (0,12)
+@test size(mmap(file, Matrix{Float32}, (10,0))) == (10,0)
 m = mmap(file, Array{UInt8,3}, (1,2,1))
 @test m == reshape(b"He",(1,2,1))
 finalize(m); m=nothing; GC.gc()
@@ -44,9 +45,10 @@ s = open(file)
 @test length(@inferred mmap(s, Vector{Int8}, 12, 0; grow=false)) == 12
 @test length(@inferred mmap(s, Vector{Int8}, 12, 0; shared=false)) == 12
 close(s)
-@test_throws ErrorException mmap(file, Vector{Ref}) # must be bit-type
+@test_throws ArgumentError mmap(file, Vector{Ref}) # must be bit-type
 GC.gc(); GC.gc()
 
+file = tempname() # new name to reduce chance of issues due slow windows fs
 s = open(f->f,file,"w")
 @test mmap(file) == Vector{UInt8}() # requested len=0 on empty file
 @test mmap(file,Vector{UInt8},0) == Vector{UInt8}()
@@ -100,9 +102,9 @@ if !(Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le)
     s = open(file, "r")
     m = mmap(s)
     @test_throws ReadOnlyMemoryError m[5] = UInt8('x') # tries to setindex! on read-only array
-    finalize(m); m=nothing; GC.gc()
+    finalize(m); m=nothing;
 end
-
+GC.gc()
 write(file, "Hello World\n")
 
 s = open(file, "r")
@@ -191,6 +193,7 @@ m = mmap(file,Vector{UInt8},2,6)
 @test_throws BoundsError m[3]
 finalize(m); m = nothing; GC.gc()
 
+file = tempname() # new name to reduce chance of issues due slow windows fs
 s = open(file, "w")
 write(s, [0xffffffffffffffff,
           0xffffffffffffffff,
@@ -266,6 +269,7 @@ A2 = mmap(s, Matrix{Int}, (m,n))
 seek(s, 0)
 A3 = mmap(s, Matrix{Int}, (m,n), convert(Int64, 2*sizeof(Int)))
 @test A == A3
+seek(s, 0)
 A4 = mmap(s, Matrix{Int}, (m,150), convert(Int64, (2+150*m)*sizeof(Int)))
 @test A[:, 151:end] == A4
 close(s)
@@ -336,6 +340,24 @@ open(file, "r+") do s
     finalize(A); A = nothing; GC.gc()
     A = mmap(s, Vector{UInt8}, (10,), 1)
     Mmap.sync!(A)
-    finalize(A); A = nothing; GC.gc()
+    finalize(A); A = nothing;
 end
+GC.gc()
+rm(file)
+
+# test for #58982 - mmap with primitive types
+file = tempname()
+primitive type PrimType9Bytes 9*8 end
+arr = Vector{PrimType9Bytes}(undef, 2)
+write(file, arr)
+m = mmap(file, Vector{PrimType9Bytes})
+@test length(m) == 2
+@test m[1] == arr[1]
+@test m[2] == arr[2]
+finalize(m); m = nothing; GC.gc()
 rm(file)
+
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Mmap))
+end
diff --git a/stdlib/MozillaCACerts_jll/Project.toml b/stdlib/MozillaCACerts_jll/Project.toml
index cef860fda4acd..d63251d59d58f 100644
--- a/stdlib/MozillaCACerts_jll/Project.toml
+++ b/stdlib/MozillaCACerts_jll/Project.toml
@@ -1,6 +1,7 @@
 name = "MozillaCACerts_jll"
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-version = "2023.01.10"
+# Keep in sync with `deps/libgit2.version`.
+version = "2025.11.04"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl b/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
index 244c1204563d5..1d5df0236ae9e 100644
--- a/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
+++ b/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule MozillaCACerts_jll
 using Base
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/NetworkOptions.version b/stdlib/NetworkOptions.version
index 64d3fab9d7bf4..7a3c0ccd27d3f 100644
--- a/stdlib/NetworkOptions.version
+++ b/stdlib/NetworkOptions.version
@@ -1,4 +1,4 @@
 NETWORKOPTIONS_BRANCH = master
-NETWORKOPTIONS_SHA1 = f7bbeb66f05fc651adb12758b650e8630a998fbd
+NETWORKOPTIONS_SHA1 = 7034c55dbf52ee959cabd63bcbe656df658f5bda
 NETWORKOPTIONS_GIT_URL := https://github.com/JuliaLang/NetworkOptions.jl.git
 NETWORKOPTIONS_TAR_URL = https://api.github.com/repos/JuliaLang/NetworkOptions.jl/tarball/$1
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
index 529c9945e65f1..8eafa2f2365c1 100644
--- a/stdlib/OpenBLAS_jll/Project.toml
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -1,15 +1,14 @@
 name = "OpenBLAS_jll"
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.23+0"
+version = "0.3.29+0"
 
 [deps]
-# See note in `src/OpenBLAS_jll.jl` about this dependency.
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.9"
+julia = "1.11"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
index a0c11ab047142..238ad459a2dc9 100644
--- a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
+++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
@@ -2,30 +2,18 @@
 
 ## dummy stub for https://github.com/JuliaBinaryWrappers/OpenBLAS_jll.jl
 baremodule OpenBLAS_jll
-using Base, Libdl, Base.BinaryPlatforms
-
-# We are explicitly NOT loading this at runtime, as it contains `libgomp`
-# which conflicts with `libiomp5`, breaking things like MKL.  In the future,
-# we hope to transition to a JLL interface that provides a more granular
-# interface than eagerly dlopen'ing all libraries provided in the JLL
-# which will eliminate issues like this, where we avoid loading a JLL
-# because we don't want to load a library that we don't even use yet.
-# using CompilerSupportLibraries_jll
-# Because of this however, we have to manually load the libraries we
-# _do_ care about, namely libgfortran
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+using Base, Libdl
+using CompilerSupportLibraries_jll
 
 export libopenblas
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libopenblas_handle::Ptr{Cvoid} = C_NULL
-libopenblas_path::String = ""
+
 
 if Base.USE_BLAS64
     const libsuffix = "64_"
@@ -33,20 +21,42 @@ else
     const libsuffix = ""
 end
 
-if Sys.iswindows()
-    const libopenblas = "libopenblas$(libsuffix).dll"
-    const _libgfortran = string("libgfortran-", libgfortran_version(HostPlatform()).major, ".dll")
-elseif Sys.isapple()
-    const libopenblas = "@rpath/libopenblas$(libsuffix).dylib"
-    const _libgfortran = string("@rpath/", "libgfortran.", libgfortran_version(HostPlatform()).major, ".dylib")
-else
-    const libopenblas = "libopenblas$(libsuffix).so"
-    const _libgfortran = string("libgfortran.so.", libgfortran_version(HostPlatform()).major)
+libopenblas_path::String = ""
+const libopenblas = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath(string("libopenblas", libsuffix, ".dll"))
+    elseif Sys.isapple()
+        BundledLazyLibraryPath(string("libopenblas", libsuffix, ".dylib"))
+    else
+        BundledLazyLibraryPath(string("libopenblas", libsuffix, ".so"))
+    end,
+    dependencies = if Sys.iswindows()
+        LazyLibrary[libgfortran, libgcc_s]
+    elseif Sys.isapple()
+        deps = LazyLibrary[libgfortran]
+        if isdefined(CompilerSupportLibraries_jll, :libquadmath)
+            push!(deps, CompilerSupportLibraries_jll.libquadmath)
+        end
+        if Sys.ARCH != :aarch64
+            push!(deps, CompilerSupportLibraries_jll.libgcc_s)
+        end
+        deps
+    else
+        LazyLibrary[libgfortran]
+    end
+)
+
+# Conform to LazyJLLWrappers API
+function eager_mode()
+    CompilerSupportLibraries_jll.eager_mode()
+    dlopen(libopenblas_path)
 end
+is_available() = true
 
 function __init__()
+    global libopenblas_path = string(libopenblas.path)
     # make sure OpenBLAS does not set CPU affinity (#1070, #9639)
-    if !haskey(ENV, "OPENBLAS_MAIN_FREE")
+    if !(haskey(ENV, "OPENBLAS_MAIN_FREE"))
         ENV["OPENBLAS_MAIN_FREE"] = "1"
     end
 
@@ -55,32 +65,16 @@ function __init__()
     # threads it thinks it needs to use.
     # X-ref: https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
     # X-ref: https://github.com/JuliaLang/julia/issues/45434
-    if !haskey(ENV, "OPENBLAS_NUM_THREADS") &&
-       !haskey(ENV, "GOTO_NUM_THREADS") &&
-       !haskey(ENV, "OMP_NUM_THREADS")
+    if !(haskey(ENV, "OPENBLAS_NUM_THREADS")) && (!(haskey(ENV, "GOTO_NUM_THREADS")) && !(haskey(ENV, "OMP_NUM_THREADS")))
         # We set this to `1` here, and then LinearAlgebra will update
         # to the true value in its `__init__()` function.
         ENV["OPENBLAS_DEFAULT_NUM_THREADS"] = "1"
     end
 
-    # As mentioned above, we are sneaking this in here so that we don't have to
-    # depend on CSL_jll and load _all_ of its libraries.
-    dlopen(_libgfortran)
-
-    global libopenblas_handle = dlopen(libopenblas)
-    global libopenblas_path = dlpath(libopenblas_handle)
+    global libopenblas_path = string(libopenblas.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libopenblas_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libopenblas_path() = libopenblas_path
-
 end  # module OpenBLAS_jll
diff --git a/stdlib/OpenBLAS_jll/test/runtests.jl b/stdlib/OpenBLAS_jll/test/runtests.jl
index 1d944bab8cd67..9f101d4c498c9 100644
--- a/stdlib/OpenBLAS_jll/test/runtests.jl
+++ b/stdlib/OpenBLAS_jll/test/runtests.jl
@@ -13,5 +13,5 @@ else
 end
 
 @testset "OpenBLAS_jll" begin
-    @test dlsym(OpenBLAS_jll.libopenblas_handle, @blasfunc(openblas_set_num_threads); throw_error=false) != nothing
+    @test dlsym(OpenBLAS_jll.libopenblas, @blasfunc(openblas_set_num_threads); throw_error=false) !== nothing
 end
diff --git a/stdlib/OpenLibm_jll/Project.toml b/stdlib/OpenLibm_jll/Project.toml
index 7f02fbc81ce1b..750b6b77b5020 100644
--- a/stdlib/OpenLibm_jll/Project.toml
+++ b/stdlib/OpenLibm_jll/Project.toml
@@ -1,12 +1,14 @@
 name = "OpenLibm_jll"
 uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
-version = "0.8.1+0"
+version = "0.8.7+0"
 
 [deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 
 [compat]
+CompilerSupportLibraries_jll = "1.3.0"
 julia = "1.0"
 
 [extras]
diff --git a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
index f2dee45a279cd..264dbbf9af8b9 100644
--- a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
+++ b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
@@ -3,43 +3,48 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/OpenLibm_jll.jl
 baremodule OpenLibm_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+if Sys.iswindows()
+    using CompilerSupportLibraries_jll
+end
 
 export libopenlibm
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libopenlibm_handle::Ptr{Cvoid} = C_NULL
-libopenlibm_path::String = ""
 
-if Sys.iswindows()
-    const libopenlibm = "libopenlibm.dll"
-elseif Sys.isapple()
-    const libopenlibm = "@rpath/libopenlibm.4.dylib"
-else
-    const libopenlibm = "libopenlibm.so.4"
+libopenlibm_path::String = ""
+const libopenlibm = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libopenlibm.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libopenlibm.4.dylib")
+    else
+        BundledLazyLibraryPath("libopenlibm.so.4")
+    end,
+    dependencies = if Sys.iswindows()
+        LazyLibrary[libgcc_s]
+    else
+        LazyLibrary[]
+    end
+)
+
+function eager_mode()
+    dlopen(libopenlibm)
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
 end
+is_available() = true
 
 function __init__()
-    global libopenlibm_handle = dlopen(libopenlibm)
-    global libopenlibm_path = dlpath(libopenlibm_handle)
+    global libopenlibm_path = string(libopenlibm.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libopenlibm_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libopenlibm_path() = libopenlibm_path
-
 end  # module OpenLibm_jll
diff --git a/stdlib/MbedTLS_jll/Project.toml b/stdlib/OpenSSL_jll/Project.toml
similarity index 66%
rename from stdlib/MbedTLS_jll/Project.toml
rename to stdlib/OpenSSL_jll/Project.toml
index 2e8d0d384f88a..ac024da2f9a02 100644
--- a/stdlib/MbedTLS_jll/Project.toml
+++ b/stdlib/OpenSSL_jll/Project.toml
@@ -1,13 +1,13 @@
-name = "MbedTLS_jll"
-uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-version = "2.28.2+0"
+name = "OpenSSL_jll"
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.5.4+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.8"
+julia = "1.6"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/OpenSSL_jll/src/OpenSSL_jll.jl b/stdlib/OpenSSL_jll/src/OpenSSL_jll.jl
new file mode 100644
index 0000000000000..f4d11ee65b3bf
--- /dev/null
+++ b/stdlib/OpenSSL_jll/src/OpenSSL_jll.jl
@@ -0,0 +1,71 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/OpenSSL_jll.jl
+
+baremodule OpenSSL_jll
+using Base, Libdl, Base.BinaryPlatforms
+
+export libcrypto, libssl
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const PATH_list = String[]
+const LIBPATH = Ref("")
+const LIBPATH_list = String[]
+artifact_dir::String = ""
+
+libcrypto_path::String = ""
+const libcrypto = LazyLibrary(
+    if Sys.iswindows()
+        if arch(HostPlatform()) == "x86_64"
+            BundledLazyLibraryPath("libcrypto-3-x64.dll")
+        else
+            BundledLazyLibraryPath("libcrypto-3.dll")
+        end
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libcrypto.3.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libcrypto.so.3")
+    else
+        error("OpenSSL_jll: Library 'libcrypto' is not available for $(Sys.KERNEL)")
+    end
+)
+
+libssl_path::String = ""
+const libssl = LazyLibrary(
+    if Sys.iswindows()
+        if arch(HostPlatform()) == "x86_64"
+            BundledLazyLibraryPath("libssl-3-x64.dll")
+        else
+            BundledLazyLibraryPath("libssl-3.dll")
+        end
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libssl.3.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libssl.so.3")
+    else
+        error("OpenSSL_jll: Library 'libssl' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = LazyLibrary[libcrypto]
+)
+
+function eager_mode()
+    dlopen(libcrypto)
+    dlopen(libssl)
+end
+is_available() = true
+
+function __init__()
+    global libcrypto_path = string(libcrypto.path)
+    global libssl_path = string(libssl.path)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libssl_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
+
+end  # module OpenSSL_jll
diff --git a/stdlib/OpenSSL_jll/test/runtests.jl b/stdlib/OpenSSL_jll/test/runtests.jl
new file mode 100644
index 0000000000000..6c4cfc1184b87
--- /dev/null
+++ b/stdlib/OpenSSL_jll/test/runtests.jl
@@ -0,0 +1,10 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, OpenSSL_jll
+
+@testset "OpenSSL_jll" begin
+    major = ccall((:OPENSSL_version_major, libcrypto), Cuint, ())
+    minor = ccall((:OPENSSL_version_minor, libcrypto), Cuint, ())
+    patch = ccall((:OPENSSL_version_patch, libcrypto), Cuint, ())
+    @test VersionNumber(major, minor, patch) == v"3.5.4"
+end
diff --git a/stdlib/PCRE2_jll/Project.toml b/stdlib/PCRE2_jll/Project.toml
index d630c04383bfb..dd65bfadd1d84 100644
--- a/stdlib/PCRE2_jll/Project.toml
+++ b/stdlib/PCRE2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "PCRE2_jll"
 uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
-version = "10.42.0+0"
+version = "10.47.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/PCRE2_jll/src/PCRE2_jll.jl b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
index e7f685820830b..c6e32bf3e6672 100644
--- a/stdlib/PCRE2_jll/src/PCRE2_jll.jl
+++ b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
@@ -3,43 +3,44 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/PCRE2_jll.jl
 baremodule PCRE2_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
 
 export libpcre2_8
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libpcre2_8_handle::Ptr{Cvoid} = C_NULL
-libpcre2_8_path::String = ""
 
-if Sys.iswindows()
-    const libpcre2_8 = "libpcre2-8-0.dll"
-elseif Sys.isapple()
-    const libpcre2_8 = "@rpath/libpcre2-8.0.dylib"
-else
-    const libpcre2_8 = "libpcre2-8.so.0"
+libpcre2_8_path::String = ""
+const libpcre2_8 = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libpcre2-8.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libpcre2-8.0.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libpcre2-8.so.0")
+    else
+        error("PCRE2_jll: Library 'libpcre2_8' is not available for $(Sys.KERNEL)")
+    end
+)
+
+function eager_mode()
+    dlopen(libpcre2_8)
 end
+is_available() = true
 
 function __init__()
-    global libpcre2_8_handle = dlopen(libpcre2_8)
-    global libpcre2_8_path = dlpath(libpcre2_8_handle)
+    global libpcre2_8_path = string(libpcre2_8.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libpcre2_8_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libpcre2_8_path() = libpcre2_8_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module PCRE2_jll
diff --git a/stdlib/PCRE2_jll/test/runtests.jl b/stdlib/PCRE2_jll/test/runtests.jl
index d593b07af31ce..1a483d582f766 100644
--- a/stdlib/PCRE2_jll/test/runtests.jl
+++ b/stdlib/PCRE2_jll/test/runtests.jl
@@ -6,5 +6,5 @@ using Test, Libdl, PCRE2_jll
     vstr = zeros(UInt8, 32)
     @test ccall((:pcre2_config_8, libpcre2_8), Cint, (UInt32, Ref{UInt8}), 11, vstr) > 0
     vn = VersionNumber(split(unsafe_string(pointer(vstr)), " ")[1])
-    @test vn == v"10.42.0"
+    @test vn == v"10.47.0"
 end
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index 6551c7e24049f..058343acdd8ad 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,4 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = e8197dd0ed8132d4a7619f3657363c8415249c47
+PKG_SHA1 = 1e90f07f9f28e9cec60c5aea0e55302a02164b10
 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
 PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Printf/Project.toml b/stdlib/Printf/Project.toml
index 9fa4e3633cae1..019b7e94ef9bd 100644
--- a/stdlib/Printf/Project.toml
+++ b/stdlib/Printf/Project.toml
@@ -1,5 +1,6 @@
 name = "Printf"
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
 
 [deps]
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
diff --git a/stdlib/Printf/docs/src/index.md b/stdlib/Printf/docs/src/index.md
index 48e38e2b2ce5b..1c6f98ce22e58 100644
--- a/stdlib/Printf/docs/src/index.md
+++ b/stdlib/Printf/docs/src/index.md
@@ -1,6 +1,14 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Printf/docs/src/index.md"
+```
+
 # [Printf](@id man-printf)
 
+The `Printf` module provides formatted output functions similar to the C standard library's `printf`. It allows formatted printing to an output stream or to a string.
+
 ```@docs
 Printf.@printf
 Printf.@sprintf
+Printf.Format
+Printf.format
 ```
diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index cb336a8d9c18b..fd38b3ebd3573 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -1,11 +1,15 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+"""
+The `Printf` module provides formatted output functions similar to the C standard library's `printf`. It allows formatted printing to an output stream or to a string.
+"""
 module Printf
 
 using Base.Ryu
 
 export @printf, @sprintf
 
+public format, Format
+
 # format specifier categories
 const Ints = Union{Val{'d'}, Val{'i'}, Val{'u'}, Val{'x'}, Val{'X'}, Val{'o'}}
 const Floats = Union{Val{'e'}, Val{'E'}, Val{'f'}, Val{'F'}, Val{'g'}, Val{'G'}, Val{'a'}, Val{'A'}}
@@ -237,7 +241,7 @@ function Format(f::AbstractString)
         !(b in b"diouxXDOUeEfFgGaAcCsSpn") && throw(InvalidFormatStringError("'$(Char(b))' is not a valid type specifier", f, last_percent_pos, pos-1))
         type = Val{Char(b)}
         if type <: Ints && precision > 0
-            # note - we should also set zero to false if dynamic precison > 0
+            # note - we should also set zero to false if dynamic precision > 0
             # this is taken care of in fmt() for Ints
             zero = false
         elseif (type <: Strings || type <: Chars) && !parsedprecdigits
@@ -293,11 +297,11 @@ end
 @inline function rmdynamic(spec::Spec{T}, args, argp) where {T}
     zero, width, precision = spec.zero, spec.width, spec.precision
     if spec.dynamic_width
-        width = args[argp]
+        width = args[argp]::Integer
         argp += 1
     end
     if spec.dynamic_precision
-        precision = args[argp]
+        precision = args[argp]::Integer
         if zero && T <: Ints && precision > 0
             zero = false
         end
@@ -306,12 +310,12 @@ end
     (Spec{T}(spec.leftalign, spec.plus, spec.space, zero, spec.hash, width, precision, false, false), argp)
 end
 
-@inline function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
+Base.@constprop :aggressive function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
     spec, argp = rmdynamic(spec, args, argp)
     (fmt(buf, pos, args[argp], spec), argp+1)
 end
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
     leftalign, width = spec.leftalign, spec.width
     c = Char(first(arg))
     w = textwidth(c)
@@ -332,7 +336,7 @@ end
 end
 
 # strings
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
     leftalign, hash, width, prec = spec.leftalign, spec.hash, spec.width, spec.precision
     str = string(arg)
     slen = textwidth(str)::Int + (hash ? arg isa AbstractString ? 2 : 1 : 0)
@@ -379,7 +383,7 @@ toint(x::Rational) = Integer(x)
 fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
     fmt(buf, pos, arg, floatfmt(spec))
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Ints}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Ints}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
     bs = base(T)
@@ -493,7 +497,7 @@ _snprintf(ptr, siz, str, arg) =
 # seems like a dangerous thing to do.
 const __BIG_FLOAT_MAX__ = 8192
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
     x = tofloat(arg)
@@ -650,7 +654,7 @@ const __BIG_FLOAT_MAX__ = 8192
         else
             # right aligned
             n = width - (newpos - pos)
-            if zero
+            if zero && isfinite(x)
                 ex = (arg < 0 || (plus | space)) + (T <: Union{Val{'a'}, Val{'A'}} ? 2 : 0)
                 so = pos + ex
                 len = (newpos - pos) - ex
@@ -927,7 +931,8 @@ for more details on C `printf` support.
 """
 function format end
 
-function format(io::IO, f::Format, args...) # => Nothing
+# Since it will specialize on `f`, which has a Tuple-type often of length(args), we might as well specialize on `args` too.
+function format(io::IO, f::Format, args::Vararg{Any,N}) where N # => Nothing
     f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
@@ -935,7 +940,7 @@ function format(io::IO, f::Format, args...) # => Nothing
     return
 end
 
-function format(f::Format, args...) # => String
+function format(f::Format, args::Vararg{Any,N}) where N # => String
     f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl
index 33970f78648e2..abe547c00ed0d 100644
--- a/stdlib/Printf/test/runtests.jl
+++ b/stdlib/Printf/test/runtests.jl
@@ -116,12 +116,15 @@ end
     @test (Printf.@sprintf "%+f" Inf) == "+Inf"
     @test (Printf.@sprintf "% f" Inf) == " Inf"
     @test (Printf.@sprintf "% #f" Inf) == " Inf"
+    @test (Printf.@sprintf "%07f" Inf) == "    Inf"
     @test (Printf.@sprintf "%f" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+f" -Inf) == "-Inf"
+    @test (Printf.@sprintf "%07f" -Inf) == "   -Inf"
     @test (Printf.@sprintf "%f" NaN) == "NaN"
     @test (Printf.@sprintf "%+f" NaN) == "+NaN"
     @test (Printf.@sprintf "% f" NaN) == " NaN"
     @test (Printf.@sprintf "% #f" NaN) == " NaN"
+    @test (Printf.@sprintf "%07f" NaN) == "    NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
@@ -169,12 +172,15 @@ end
     @test (Printf.@sprintf "%+e" Inf) == "+Inf"
     @test (Printf.@sprintf "% e" Inf) == " Inf"
     @test (Printf.@sprintf "% #e" Inf) == " Inf"
+    @test (Printf.@sprintf "%07e" Inf) == "    Inf"
     @test (Printf.@sprintf "%e" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+e" -Inf) == "-Inf"
+    @test (Printf.@sprintf "%07e" -Inf) == "   -Inf"
     @test (Printf.@sprintf "%e" NaN) == "NaN"
     @test (Printf.@sprintf "%+e" NaN) == "+NaN"
     @test (Printf.@sprintf "% e" NaN) == " NaN"
     @test (Printf.@sprintf "% #e" NaN) == " NaN"
+    @test (Printf.@sprintf "%07e" NaN) == "    NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
@@ -1145,4 +1151,11 @@ end
     @test_throws Printf.InvalidFormatStringError Printf.Format("%z")
 end
 
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Printf))
+end
+
+# issue #52749
+@test @sprintf("%.160g", 1.38e-23) == "1.380000000000000060010582465734078799297660966782642624395399644741944111814291318296454846858978271484375e-23"
+
 end # @testset "Printf"
diff --git a/stdlib/Profile/Project.toml b/stdlib/Profile/Project.toml
index 334d475832b6d..6b70f9c7cd19d 100644
--- a/stdlib/Profile/Project.toml
+++ b/stdlib/Profile/Project.toml
@@ -1,14 +1,19 @@
 name = "Profile"
 uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+version = "1.11.0"
 
 [deps]
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+
+[compat]
+StyledStrings = "1.11.0"
 
 [extras]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Base64", "Logging", "Serialization", "Test"]
+test = ["Base64", "InteractiveUtils", "Logging", "Serialization", "Test"]
diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md
index adb91cebb8c46..0b358e5decfa9 100644
--- a/stdlib/Profile/docs/src/index.md
+++ b/stdlib/Profile/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Profile/docs/src/index.md"
+```
+
 # [Profiling](@id lib-profiling)
 
 ## CPU Profiling
@@ -34,7 +38,7 @@ First, a single stack trace at the instant that the signal was thrown is shown,
 followed by the profile report at the next yield point, which may be at task completion for code without yield points
 e.g. tight loops.
 
-Optionally set environment variable `JULIA_PROFILE_PEEK_HEAP_SNAPSHOT` to `1` to also automatically collect a
+Optionally set environment variable [`JULIA_PROFILE_PEEK_HEAP_SNAPSHOT`](@ref JULIA_PROFILE_PEEK_HEAP_SNAPSHOT) to `1` to also automatically collect a
 [heap snapshot](@ref Heap-Snapshots).
 
 ```julia-repl
@@ -106,6 +110,7 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as `
 
 ```@docs
 Profile.Allocs.clear
+Profile.Allocs.print
 Profile.Allocs.fetch
 Profile.Allocs.start
 Profile.Allocs.stop
@@ -129,5 +134,29 @@ Traces and records julia objects on the heap. This only records objects known to
 garbage collector. Memory allocated by external libraries not managed by the garbage
 collector will not show up in the snapshot.
 
+To avoid OOMing while recording the snapshot, we added a streaming option to stream out the heap snapshot
+into four files,
+
+```julia-repl
+julia> using Profile
+
+julia> Profile.take_heap_snapshot("snapshot"; streaming=true)
+```
+
+where "snapshot" is the filepath as the prefix for the generated files.
+
+Once the snapshot files are generated, they could be assembled offline with the following command:
+
+```julia-repl
+julia> using Profile
+
+julia> Profile.HeapSnapshot.assemble_snapshot("snapshot", "snapshot.heapsnapshot")
+```
+
 The resulting heap snapshot file can be uploaded to chrome devtools to be viewed.
 For more information, see the [chrome devtools docs](https://developer.chrome.com/docs/devtools/memory-problems/heap-snapshots/#view_snapshots).
+An alternative for analyzing Chromium heap snapshots is with the VS Code extension
+`ms-vscode.vscode-js-profile-flame`.
+
+The Firefox heap snapshots are of a different format, and Firefox currently may
+*not* be used for viewing the heap snapshots generated by Julia.
diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl
index e45f4dca9607f..93c9d3392626f 100644
--- a/stdlib/Profile/src/Allocs.jl
+++ b/stdlib/Profile/src/Allocs.jl
@@ -1,5 +1,12 @@
 module Allocs
 
+global print # Allocs.print is separate from both Base.print and Profile.print
+public @profile,
+    clear,
+    print,
+    fetch
+
+using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree
 using Base.StackTraces: StackTrace, StackFrame, lookup
 using Base: InterpreterIP
 
@@ -33,7 +40,7 @@ end
     Profile.Allocs.@profile [sample_rate=0.1] expr
 
 Profile allocations that happen during `expr`, returning
-both the result and and AllocResults struct.
+both the result and AllocResults struct.
 
 A sample rate of 1.0 will record everything; 0.0 will record nothing.
 
@@ -47,18 +54,17 @@ julia> last(sort(results.allocs, by=x->x.size))
 Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at array.c:127, ...], 5576)
 ```
 
-The best way to visualize these is currently with the
-[PProf.jl](https://github.com/JuliaPerf/PProf.jl) package,
-by invoking `PProf.Allocs.pprof`.
+See the profiling tutorial in the Julia documentation for more information.
+
+!!! compat "Julia 1.11"
 
-!!! note
-    The current implementation of the Allocations Profiler does not
-    capture types for all allocations. Allocations for which the profiler
-    could not capture the type are represented as having type
-    `Profile.Allocs.UnknownType`.
+    Older versions of Julia could not capture types in all cases. In older versions of
+    Julia, if you see an allocation of type `Profile.Allocs.UnknownType`, it means that
+    the profiler doesn't know what type of object was allocated. This mainly happened when
+    the allocation was coming from generated code produced by the compiler. See
+    [issue #43688](https://github.com/JuliaLang/julia/issues/43688) for more info.
 
-    You can read more about the missing types and the plan to improve this, here:
-    <https://github.com/JuliaLang/julia/issues/43688>.
+    Since Julia 1.11, all allocations should have a type reported.
 
 !!! compat "Julia 1.8"
     The allocation profiler was added in Julia 1.8.
@@ -73,11 +79,11 @@ end
 function _prof_expr(expr, opts)
     quote
         $start(; $(esc(opts)))
-        try
+        Base.@__tryfinally(
             $(esc(expr))
-        finally
+            ,
             $stop()
-        end
+        )
     end
 end
 
@@ -138,7 +144,7 @@ end
 # Without this, the Alloc's stacktrace prints for lines and lines and lines...
 function Base.show(io::IO, a::Alloc)
     stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
-    print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
+    Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
 end
 
 const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
@@ -216,4 +222,201 @@ function stacktrace_memoized(
     return stack
 end
 
+function warning_empty()
+    @warn """
+    There were no samples collected.
+    Run your program longer (perhaps by running it multiple times),
+    or adjust the frequency of samples to record every event with
+    the `sample_rate=1.0` kwarg."""
+end
+
+
+"""
+    Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...)
+
+Prints profiling results to `io` (by default, `stdout`). If you do not
+supply a `data` vector, the internal buffer of accumulated backtraces
+will be used.
+
+See `Profile.print` for an explanation of the valid keyword arguments.
+"""
+print(; kwargs...) =
+    Profile.print(stdout, fetch(); kwargs...)
+print(io::IO; kwargs...) =
+    Profile.print(io, fetch(); kwargs...)
+print(io::IO, data::AllocResults; kwargs...) =
+    Profile.print(io, data; kwargs...)
+Profile.print(data::AllocResults; kwargs...) =
+    Profile.print(stdout, data; kwargs...)
+
+function Profile.print(io::IO,
+        data::AllocResults,
+        ;
+        format = :tree,
+        C = false,
+        #combine = true,
+        maxdepth::Int = typemax(Int),
+        mincount::Int = 0,
+        noisefloor = 0,
+        sortedby::Symbol = :filefuncline,
+        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
+        recur::Symbol = :off,
+        )
+    pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur)
+    Profile.print(io, data, pf, format)
+    return
+end
+
+function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol)
+    cols::Int = Base.displaysize(io)[2]
+    fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
+    data = data.allocs
+    if format === :tree
+        tree(io, data, cols, fmt)
+    elseif format === :flat
+        fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
+        flat(io, data, cols, fmt)
+    else
+        throw(ArgumentError("output format $(repr(format)) not recognized"))
+    end
+    nothing
+end
+
+
+function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T
+    lilist = StackFrame[]
+    n = Int[]
+    m = Int[]
+    lilist_idx = Dict{T, Int}()
+    recursive = Set{T}()
+    totalbytes = 0
+    for r in data
+        first = true
+        empty!(recursive)
+        nb = r.size # or 1 for counting
+        totalbytes += nb
+        for frame in r.stacktrace
+            !C && frame.from_c && continue
+            key = (T === UInt64 ? ip : frame)
+            idx = get!(lilist_idx, key, length(lilist) + 1)
+            if idx > length(lilist)
+                push!(recursive, key)
+                push!(lilist, frame)
+                push!(n, nb)
+                push!(m, 0)
+            elseif !(key in recursive)
+                push!(recursive, key)
+                n[idx] += nb
+            end
+            if first
+                m[idx] += nb
+                first = false
+            end
+        end
+    end
+    @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
+    return (lilist, n, m, totalbytes)
+end
+
+function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
+    fmt.combine || error(ArgumentError("combine=false"))
+    lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
+    filenamemap = Profile.FileNameMap()
+    if isempty(lilist)
+        warning_empty()
+        return true
+    end
+    print_flat(io, lilist, n, m, cols, filenamemap, fmt)
+    Base.println(io, "Total snapshots: ", length(data))
+    Base.println(io, "Total bytes: ", totalbytes)
+    return false
+end
+
+function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T}
+    tops = Vector{StackFrameTree{T}}()
+    build = Dict{T, StackFrameTree{T}}()
+    for r in all
+        first = true
+        nb = r.size # or 1 for counting
+        root.recur = 0
+        root.count += nb
+        parent = root
+        for i in reverse(eachindex(r.stacktrace))
+            frame = r.stacktrace[i]
+            key = (T === UInt64 ? ip : frame)
+            if (recur === :flat && !frame.from_c) || recur === :flatc
+                # see if this frame already has a parent
+                this = get!(build, frame, parent)
+                if this !== parent
+                    # Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree
+                    push!(tops, parent)
+                    parent = this
+                end
+            end
+            !C && frame.from_c && continue
+            this = get!(StackFrameTree{T}, parent.down, key)
+            if recur === :off || this.recur == 0
+                this.frame = frame
+                this.up = parent
+                this.count += nb
+                this.recur = 1
+            else
+                this.count_recur += 1
+            end
+            parent = this
+        end
+        parent.overhead += nb
+        if recur !== :off
+            # We mark all visited nodes to so we'll only count those branches
+            # once for each backtrace. Reset that now for the next backtrace.
+            empty!(build)
+            push!(tops, parent)
+            for top in tops
+                while top.recur != 0
+                    top.max_recur < top.recur && (top.max_recur = top.recur)
+                    top.recur = 0
+                    top = top.up
+                end
+            end
+            empty!(tops)
+        end
+        let this = parent
+            while this !== root
+                this.flat_count += nb
+                this = this.up
+            end
+        end
+    end
+    function cleanup!(node::StackFrameTree)
+        stack = [node]
+        while !isempty(stack)
+            node = pop!(stack)
+            node.recur = 0
+            empty!(node.builder_key)
+            empty!(node.builder_value)
+            append!(stack, values(node.down))
+        end
+        nothing
+    end
+    cleanup!(root)
+    return root
+end
+
+function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
+    fmt.combine || error(ArgumentError("combine=false"))
+    if fmt.combine
+        root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur)
+    else
+        root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur)
+    end
+    print_tree(io, root, cols, fmt, false)
+    if isempty(root.down)
+        warning_empty()
+        return true
+    end
+    Base.println(io, "Total snapshots: ", length(data))
+    Base.println(io, "Total bytes: ", root.count)
+    return false
+end
+
 end
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index f0323d0334b1a..7c7e220a6c5bc 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -1,19 +1,53 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-Profiling support, main entry point is the [`@profile`](@ref) macro.
+    Profile
+
+Profiling support.
+
+## CPU profiling
+- `@profile foo()` to profile a specific call.
+- `Profile.print()` to print the report. Paths are clickable links in supported terminals and specialized for JULIA_EDITOR etc.
+- `Profile.clear()` to clear the buffer.
+- Send a SIGUSR1 (on linux) or SIGINFO (on macOS/BSD) signal to the process to automatically trigger a profile and print. i.e. `kill -s SIGUSR1/SIGINFO 1234`, where 1234 is the pid of the julia process. On macOS & BSD platforms `ctrl-t` can be used directly.
+
+## Memory profiling
+- `Profile.Allocs.@profile [sample_rate=0.1] foo()` to sample allocations within a specific call. A sample rate of 1.0 will record everything; 0.0 will record nothing.
+- `Profile.Allocs.print()` to print the report.
+- `Profile.Allocs.clear()` to clear the buffer.
+
+## Heap profiling
+- `Profile.take_heap_snapshot()` to record a `.heapsnapshot` record of the heap.
+- Set `JULIA_PROFILE_PEEK_HEAP_SNAPSHOT=true` to capture a heap snapshot when signal $(Sys.isbsd() ? "SIGINFO (ctrl-t)" : "SIGUSR1") is sent.
 """
 module Profile
 
+global print
+export @profile, @profile_walltime
+public clear,
+    print,
+    fetch,
+    retrieve,
+    add_fake_meta,
+    flatten,
+    callers,
+    init,
+    take_heap_snapshot,
+    take_page_profile,
+    clear_malloc_data,
+    Allocs
+
 import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
+import Base: AnnotatedString
+using StyledStrings: @styled_str
 
 const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
 
+const slash = Sys.iswindows() ? "\\" : "/"
+
 # deprecated functions: use `getdict` instead
 lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
 
-export @profile
-
 """
     @profile
 
@@ -22,21 +56,43 @@ appended to an internal buffer of backtraces.
 """
 macro profile(ex)
     return quote
-        try
-            start_timer()
+        start_timer()
+        Base.@__tryfinally(
             $(esc(ex))
-        finally
+            ,
             stop_timer()
-        end
+        )
+    end
+end
+
+"""
+    @profile_walltime
+
+`@profile_walltime <expression>` runs your expression while taking periodic backtraces of a sample of all live tasks (both running and not running).
+These are appended to an internal buffer of backtraces.
+
+It can be configured via `Profile.init`, same as the `Profile.@profile`, and that you can't use `@profile` simultaneously with `@profile_walltime`.
+
+As mentioned above, since this tool sample not only running tasks, but also sleeping tasks and tasks performing IO,
+it can be used to diagnose performance issues such as lock contention, IO bottlenecks, and other issues that are not visible in the CPU profile.
+"""
+macro profile_walltime(ex)
+    return quote
+        start_timer(true);
+        Base.@__tryfinally(
+            $(esc(ex))
+            ,
+            stop_timer()
+        )
     end
 end
 
 # An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
 function _peek_report()
-    iob = IOBuffer()
+    iob = Base.AnnotatedIOBuffer()
     ioc = IOContext(IOContext(iob, stderr), :displaysize=>displaysize(stderr))
     print(ioc, groupby = [:thread, :task])
-    Base.print(stderr, String(take!(iob)))
+    Base.print(stderr, read(seekstart(iob), AnnotatedString))
 end
 # This is a ref so that it can be overridden by other profile info consumers.
 const peek_report = Ref{Function}(_peek_report)
@@ -163,10 +219,13 @@ const META_OFFSET_THREADID = 5
 
 """
     print([io::IO = stdout,] [data::Vector = fetch()], [lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)]; kwargs...)
+    print(path::String, [cols::Int = 1000], [data::Vector = fetch()], [lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)]; kwargs...)
 
 Prints profiling results to `io` (by default, `stdout`). If you do not
 supply a `data` vector, the internal buffer of accumulated backtraces
-will be used.
+will be used. Paths are clickable links in supported terminals and
+specialized for [`JULIA_EDITOR`](@ref) with line numbers, or just file
+links if no editor is set.
 
 The keyword arguments can be any combination of:
 
@@ -201,6 +260,13 @@ The keyword arguments can be any combination of:
 
  - `tasks::Union{Int,AbstractVector{Int}}` -- Specify which tasks to include snapshots from in the report. Note that this
     does not control which tasks samples are collected within.
+
+!!! compat "Julia 1.8"
+    The `groupby`, `threads`, and `tasks` keyword arguments were introduced in Julia 1.8.
+
+!!! note
+    Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report.
+
 """
 function print(io::IO,
         data::Vector{<:Unsigned} = fetch(),
@@ -220,7 +286,7 @@ function print(io::IO,
 
     pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
     if groupby === :none
-        print(io, data, lidict, pf, format, threads, tasks, false)
+        print_group(io, data, lidict, pf, format, threads, tasks, false)
     else
         if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
             error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
@@ -229,7 +295,7 @@ function print(io::IO,
         end
         any_nosamples = true
         if format === :tree
-            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
             Base.print(io, "=========================================================\n")
         end
         if groupby == [:task, :thread]
@@ -243,8 +309,8 @@ function print(io::IO,
                     nl = length(threadids) > 1 ? "\n" : ""
                     printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
                     for threadid in threadids
-                        printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
-                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        printstyled(io, " Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
+                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
                     end
@@ -259,10 +325,10 @@ function print(io::IO,
                     any_nosamples = true
                 else
                     nl = length(taskids) > 1 ? "\n" : ""
-                    printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
+                    printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid)))$nl"; bold=true, color=Base.info_color())
                     for taskid in taskids
                         printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
-                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
                     end
@@ -274,7 +340,7 @@ function print(io::IO,
             isempty(taskids) && (any_nosamples = true)
             for taskid in taskids
                 printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
-                nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
+                nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
                 nosamples && (any_nosamples = true)
                 println(io)
             end
@@ -283,8 +349,8 @@ function print(io::IO,
             threadids = intersect(get_thread_ids(data), threads)
             isempty(threadids) && (any_nosamples = true)
             for threadid in threadids
-                printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
-                nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
+                printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
+                nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
                 nosamples && (any_nosamples = true)
                 println(io)
             end
@@ -294,6 +360,13 @@ function print(io::IO,
     return
 end
 
+function print(path::String, cols::Int = 1000, args...; kwargs...)
+    open(path, "w") do io
+        ioc = IOContext(io, :displaysize=>(1000,cols))
+        print(ioc, args...; kwargs...)
+    end
+end
+
 """
     print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...)
 
@@ -306,7 +379,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
 print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
     print(stdout, data, lidict; kwargs...)
 
-function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
+function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
                 format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
                 is_subsection::Bool = false)
     cols::Int = Base.displaysize(io)[2]
@@ -362,9 +435,10 @@ end
 
 function has_meta(data)
     for i in 6:length(data)
-        data[i] == 0 || continue            # first block end null
-        data[i - 1] == 0 || continue        # second block end null
-        data[i - META_OFFSET_SLEEPSTATE] in 1:2 || continue
+        data[i] == 0 || continue                            # first block end null
+        data[i - 1] == 0 || continue                        # second block end null
+        data[i - META_OFFSET_SLEEPSTATE] in 1:3 || continue # 1 for not sleeping, 2 for sleeping, 3 for task profiler fake state
+                                                            # See definition in `src/julia_internal.h`
         data[i - META_OFFSET_CPUCYCLECLOCK] != 0 || continue
         data[i - META_OFFSET_TASKID] != 0 || continue
         data[i - META_OFFSET_THREADID] != 0 || continue
@@ -464,12 +538,29 @@ function flatten(data::Vector, lidict::LineInfoDict)
     return (newdata, newdict)
 end
 
+const SRC_DIR = normpath(Base.SOURCEDIR, "src")
+
 # Take a file-system path and try to form a concise representation of it
 # based on the package ecosystem
-function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
+# filenamecache is a dict of spath -> (fullpath or "" if !isfile, modulename, shortpath)
+function short_path(spath::Symbol, filenamecache::Dict{Symbol, Tuple{String,String,String}})
     return get!(filenamecache, spath) do
-        path = string(spath)
-        if isabspath(path)
+        path = Base.fixup_stdlib_path(string(spath))
+        path_norm = normpath(path)
+        possible_base_path = normpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path)
+        lib_dir = abspath(Sys.BINDIR, Base.LIBDIR)
+        compiler_dir = normpath(Base.DATAROOT, "julia", "Compiler/")
+        if startswith(path_norm, SRC_DIR)
+            remainder = only(split(path_norm, SRC_DIR, keepempty=false))
+            return (isfile(path_norm) ? path_norm : ""), "@juliasrc", remainder
+        elseif startswith(path_norm, lib_dir)
+            remainder = only(split(path_norm, lib_dir, keepempty=false))
+            return (isfile(path_norm) ? path_norm : ""), "@julialib", remainder
+        elseif startswith(path_norm, compiler_dir)
+            remainder = split(path_norm, compiler_dir, keepempty=false)[end]
+            possible_compiler_path = normpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "Compiler", remainder)
+            return (isfile(possible_compiler_path) ? possible_compiler_path : ""), "@Compiler", remainder
+        elseif isabspath(path)
             if ispath(path)
                 # try to replace the file-system prefix with a short "@Module" one,
                 # assuming that profile came from the current machine
@@ -483,28 +574,28 @@ function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
                         project_file = joinpath(root, proj)
                         if Base.isfile_casesensitive(project_file)
                             pkgid = Base.project_file_name_uuid(project_file, "")
-                            isempty(pkgid.name) && return path # bad Project file
+                            isempty(pkgid.name) && return path, "", path # bad Project file
                             # return the joined the module name prefix and path suffix
-                            path = path[nextind(path, sizeof(root)):end]
-                            return string("@", pkgid.name, path)
+                            _short_path = path[nextind(path, sizeof(root)):end]
+                            return path, string("@", pkgid.name), _short_path
                         end
                     end
                 end
             end
-            return path
-        elseif isfile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
+            return path, "", path
+        elseif isfile(possible_base_path)
             # do the same mechanic for Base (or Core/Compiler) files as above,
             # but they start from a relative path
-            return joinpath("@Base", normpath(path))
+            return possible_base_path, "@Base", path_norm
         else
             # for non-existent relative paths (such as "REPL[1]"), just consider simplifying them
-            return normpath(path) # drop leading "./"
+            return "", "", path_norm # drop leading "./"
         end
     end
 end
 
 """
-    callers(funcname, [data, lidict], [filename=<filename>], [linerange=<start:stop>]) -> Vector{Tuple{count, lineinfo}}
+    callers(funcname, [data, lidict], [filename=<filename>], [linerange=<start:stop>])::Vector{Tuple{count, lineinfo}}
 
 Given a previous profiling run, determine who called a particular function. Supplying the
 filename (and optionally, range of line numbers over which the function is defined) allows
@@ -555,9 +646,9 @@ Julia, and examine the resulting `*.mem` files.
 clear_malloc_data() = ccall(:jl_clear_malloc_data, Cvoid, ())
 
 # C wrappers
-function start_timer()
+function start_timer(all_tasks::Bool=false)
     check_init() # if the profile buffer hasn't been initialized, initialize with default size
-    status = ccall(:jl_profile_start_timer, Cint, ())
+    status = ccall(:jl_profile_start_timer, Cint, (Bool,), all_tasks)
     if status < 0
         error(error_codes[status])
     end
@@ -641,10 +732,10 @@ function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
     !isempty(data) && has_meta(data) && error("input already has metadata")
     cpu_clock_cycle = UInt64(99)
     data_with_meta = similar(data, 0)
-    for i = 1:length(data)
+    for i in eachindex(data)
         val = data[i]
         if iszero(val)
-            # (threadid, taskid, cpu_cycle_clock, thread_sleeping)
+            # META_OFFSET_THREADID, META_OFFSET_TASKID, META_OFFSET_CPUCYCLECLOCK, META_OFFSET_SLEEPSTATE
             push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
         else
             push!(data_with_meta, val)
@@ -669,12 +760,16 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
     startframe = length(data)
     skip = false
     nsleeping = 0
+    is_task_profile = false
     for i in startframe:-1:1
-        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
+        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
         ip = data[i]
         if is_block_end(data, i)
             # read metadata
-            thread_sleeping = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            thread_sleeping_state = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            if thread_sleeping_state == 2
+                is_task_profile = true
+            end
             # cpu_cycle_clock = data[i - META_OFFSET_CPUCYCLECLOCK]
             taskid = data[i - META_OFFSET_TASKID]
             threadid = data[i - META_OFFSET_THREADID]
@@ -682,7 +777,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
                 skip = true
                 continue
             end
-            if thread_sleeping == 1
+            if thread_sleeping_state == 1
                 nsleeping += 1
             end
             skip = false
@@ -716,12 +811,14 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
         end
     end
     @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
-    return (lilist, n, m, totalshots, nsleeping)
+    return (lilist, n, m, totalshots, nsleeping, is_task_profile)
 end
 
+const FileNameMap = Dict{Symbol,Tuple{String,String,String}}
+
 function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
                 threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
-    lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
+    lilist, n, m, totalshots, nsleeping, is_task_profile = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
     if false # optional: drop the "non-interpretable" ones
         keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
         lilist = lilist[keep]
@@ -729,7 +826,7 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
         m = m[keep]
     end
     util_perc = (1 - (nsleeping / totalshots)) * 100
-    filenamemap = Dict{Symbol,String}()
+    filenamemap = FileNameMap()
     if isempty(lilist)
         if is_subsection
             Base.print(io, "Total snapshots: ")
@@ -741,19 +838,57 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
         return true
     end
     is_subsection || print_flat(io, lilist, n, m, cols, filenamemap, fmt)
-    Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%")
+    if is_task_profile
+        Base.print(io, "Total snapshots: ", totalshots, "\n")
+    else
+        Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%")
+    end
     if is_subsection
         println(io)
         print_flat(io, lilist, n, m, cols, filenamemap, fmt)
-    else
+    elseif !is_task_profile
         Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
     end
     return false
 end
 
+# make a terminal-clickable link to the file and linenum.
+# Similar to `define_default_editors` in `Base.Filesystem` but for creating URIs not commands
+function editor_link(path::String, linenum::Int)
+    # Note: the editor path can include spaces (if escaped) and flags.
+    editor = nothing
+    for var in ["JULIA_EDITOR", "VISUAL", "EDITOR"]
+        str = get(ENV, var, nothing)
+        str isa String || continue
+        editor = str
+        break
+    end
+    path_encoded = Base.Filesystem.encode_uri_component(path)
+    if editor !== nothing
+        if editor == "code"
+            return "vscode://file/$path_encoded:$linenum"
+        elseif editor == "subl" || editor == "sublime_text"
+            return "subl://open?url=file://$path_encoded&line=$linenum"
+        elseif editor == "idea" || occursin("idea", editor)
+            return "idea://open?file=$path_encoded&line=$linenum"
+        elseif editor == "pycharm"
+            return "pycharm://open?file=$path_encoded&line=$linenum"
+        elseif editor == "atom"
+            return "atom://core/open/file?filename=$path_encoded&line=$linenum"
+        elseif editor == "emacsclient" || editor == "emacs"
+            return "emacs://open?file=$path_encoded&line=$linenum"
+        elseif editor == "vim" || editor == "nvim"
+            # Note: Vim/Nvim may not support standard URI schemes without specific plugins
+            return "vim://open?file=$path_encoded&line=$linenum"
+        end
+    end
+    # fallback to generic URI, but line numbers are not supported by generic URI
+    return Base.Filesystem.uripath(path)
+end
+
 function print_flat(io::IO, lilist::Vector{StackFrame},
         n::Vector{Int}, m::Vector{Int},
-        cols::Int, filenamemap::Dict{Symbol,String},
+        cols::Int, filenamemap::FileNameMap,
         fmt::ProfileFormat)
     if fmt.sortedby === :count
         p = sortperm(n)
@@ -765,18 +900,18 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
     lilist = lilist[p]
     n = n[p]
     m = m[p]
-    filenames = String[short_path(li.file, filenamemap) for li in lilist]
+    pkgnames_filenames = Tuple{String,String,String}[short_path(li.file, filenamemap) for li in lilist]
     funcnames = String[string(li.func) for li in lilist]
     wcounts = max(6, ndigits(maximum(n)))
     wself = max(9, ndigits(maximum(m)))
     maxline = 1
     maxfile = 6
     maxfunc = 10
-    for i in 1:length(lilist)
+    for i in eachindex(lilist)
         li = lilist[i]
         maxline = max(maxline, li.line)
-        maxfunc = max(maxfunc, length(funcnames[i]))
-        maxfile = max(maxfile, length(filenames[i]))
+        maxfunc = max(maxfunc, textwidth(funcnames[i]))
+        maxfile = max(maxfile, sum(textwidth, pkgnames_filenames[i][2:3]) + 1)
     end
     wline = max(5, ndigits(maxline))
     ntext = max(20, cols - wcounts - wself - wline - 3)
@@ -792,7 +927,7 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
             rpad("File", wfile, " "), " ", lpad("Line", wline, " "), " Function")
     println(io, lpad("=====", wcounts, " "), " ", lpad("========", wself, " "), " ",
             rpad("====", wfile, " "), " ", lpad("====", wline, " "), " ========")
-    for i = 1:length(n)
+    for i in eachindex(n)
         n[i] < fmt.mincount && continue
         li = lilist[i]
         Base.print(io, lpad(string(n[i]), wcounts, " "), " ")
@@ -804,16 +939,29 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
                 Base.print(io, "[any unknown stackframes]")
             end
         else
-            file = filenames[i]
+            path, pkgname, file = pkgnames_filenames[i]
             isempty(file) && (file = "[unknown file]")
-            Base.print(io, rpad(rtruncto(file, wfile), wfile, " "), " ")
+            pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
+            Base.printstyled(io, pkgname, color=pkgcolor)
+            file_trunc = ltruncate(file, max(1, wfile))
+            wpad = wfile - textwidth(pkgname)
+            if !isempty(pkgname) && !startswith(file_trunc, slash)
+                Base.print(io, slash)
+                wpad -= 1
+            end
+            if isempty(path)
+                Base.print(io, rpad(file_trunc, wpad, " "))
+            else
+                link = editor_link(path, li.line)
+                Base.print(io, rpad(styled"{link=$link:$file_trunc}", wpad, " "))
+            end
             Base.print(io, lpad(li.line > 0 ? string(li.line) : "?", wline, " "), " ")
             fname = funcnames[i]
             if !li.from_c && li.linfo !== nothing
                 fname = sprint(show_spec_linfo, li)
             end
             isempty(fname) && (fname = "[unknown function]")
-            Base.print(io, ltruncto(fname, wfunc))
+            Base.print(io, rtruncate(fname, wfunc))
         end
         println(io)
     end
@@ -831,13 +979,14 @@ mutable struct StackFrameTree{T} # where T <: Union{UInt64, StackFrame}
     flat_count::Int     # number of times this frame was in the flattened representation (unlike count, this'll sum to 100% of parent)
     max_recur::Int      # maximum number of times this frame was the *top* of the recursion in the stack
     count_recur::Int    # sum of the number of times this frame was the *top* of the recursion in a stack (divide by count to get an average)
+    sleeping::Bool      # whether this frame was in a sleeping state
     down::Dict{T, StackFrameTree{T}}
     # construction workers:
     recur::Int
     builder_key::Vector{UInt64}
     builder_value::Vector{StackFrameTree{T}}
     up::StackFrameTree{T}
-    StackFrameTree{T}() where {T} = new(UNKNOWN, 0, 0, 0, 0, 0, Dict{T, StackFrameTree{T}}(), 0, UInt64[], StackFrameTree{T}[])
+    StackFrameTree{T}() where {T} = new(UNKNOWN, 0, 0, 0, 0, 0, true, Dict{T, StackFrameTree{T}}(), 0, UInt64[], StackFrameTree{T}[])
 end
 
 
@@ -852,22 +1001,24 @@ function indent(depth::Int)
     return indent
 end
 
-function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::Dict{Symbol,String}, showpointer::Bool)
+# mimics Stacktraces
+const PACKAGE_FIXEDCOLORS = Dict{String, Any}("@Base" => :gray, "@Core" => :gray)
+
+function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::FileNameMap, showpointer::Bool)
     nindent = min(cols>>1, level)
     ndigoverhead = ndigits(maxes.overhead)
     ndigcounts = ndigits(maxes.count)
     ndigline = ndigits(maximum(frame.frame.line for frame in frames)) + 6
     ntext = max(30, cols - ndigoverhead - nindent - ndigcounts - ndigline - 6)
     widthfile = 2*ntext÷5 # min 12
-    widthfunc = 3*ntext÷5 # min 18
-    strs = Vector{String}(undef, length(frames))
+    strs = Vector{AnnotatedString{String}}(undef, length(frames))
     showextra = false
     if level > nindent
         nextra = level - nindent
         nindent -= ndigits(nextra) + 2
         showextra = true
     end
-    for i = 1:length(frames)
+    for i in eachindex(frames)
         frame = frames[i]
         li = frame.frame
         stroverhead = lpad(frame.overhead > 0 ? string(frame.overhead) : "", ndigoverhead, " ")
@@ -876,6 +1027,10 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
             base = string(base, "+", nextra, " ")
         end
         strcount = rpad(string(frame.count), ndigcounts, " ")
+        if frame.sleeping
+            stroverhead = styled"{gray:$(stroverhead)}"
+            strcount = styled"{gray:$(strcount)}"
+        end
         if li != UNKNOWN
             if li.line == li.pointer
                 strs[i] = string(stroverhead, "╎", base, strcount, " ",
@@ -888,7 +1043,8 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
                 else
                     fname = string(li.func)
                 end
-                filename = short_path(li.file, filenamemap)
+                frame.sleeping && (fname = styled"{gray:$(fname)}")
+                path, pkgname, filename = short_path(li.file, filenamemap)
                 if showpointer
                     fname = string(
                         "0x",
@@ -896,16 +1052,26 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
                         " ",
                         fname)
                 end
-                strs[i] = string(stroverhead, "╎", base, strcount, " ",
-                    rtruncto(filename, widthfile),
-                    ":",
-                    li.line == -1 ? "?" : string(li.line),
-                    "; ",
-                    ltruncto(fname, widthfunc))
+                pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
+                remaining_path = ltruncate(filename, max(1, widthfile - textwidth(pkgname) - 1))
+                linenum = li.line == -1 ? "?" : string(li.line)
+                _slash = (!isempty(pkgname) && !startswith(remaining_path, slash)) ? slash : ""
+                styled_path = styled"{$pkgcolor:$pkgname}$(_slash)$remaining_path:$linenum"
+                rich_file = if isempty(path)
+                    styled_path
+                else
+                    link = editor_link(path, li.line)
+                    styled"{link=$link:$styled_path}"
+                end
+                strs[i] = Base.annotatedstring(stroverhead, "╎", base, strcount, " ", rich_file, "  ", fname)
+                if frame.overhead > 0
+                    strs[i] = styled"{bold:$(strs[i])}"
+                end
             end
         else
             strs[i] = string(stroverhead, "╎", base, strcount, " [unknown stackframe]")
         end
+        strs[i] = rtruncate(strs[i], cols)
     end
     return strs
 end
@@ -920,12 +1086,16 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
     startframe = length(all)
     skip = false
     nsleeping = 0
+    is_task_profile = false
+    is_sleeping = true
     for i in startframe:-1:1
         (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
         ip = all[i]
         if is_block_end(all, i)
             # read metadata
-            thread_sleeping = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            thread_sleeping_state = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            is_sleeping = thread_sleeping_state == 1
+            is_task_profile = thread_sleeping_state == 2
             # cpu_cycle_clock = all[i - META_OFFSET_CPUCYCLECLOCK]
             taskid = all[i - META_OFFSET_TASKID]
             threadid = all[i - META_OFFSET_THREADID]
@@ -934,7 +1104,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
                 skip = true
                 continue
             end
-            if thread_sleeping == 1
+            if thread_sleeping_state == 1
                 nsleeping += 1
             end
             skip = false
@@ -980,6 +1150,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
                         parent = build[j]
                         parent.recur += 1
                         parent.count_recur += 1
+                        parent.sleeping &= is_sleeping
                         found = true
                         break
                     end
@@ -999,6 +1170,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
                     while this !== parent && (recur === :off || this.recur == 0)
                         this.count += 1
                         this.recur = 1
+                        this.sleeping &= is_sleeping
                         this = this.up
                     end
                 end
@@ -1020,6 +1192,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
                     this.up = parent
                     this.count += 1
                     this.recur = 1
+                    this.sleeping &= is_sleeping
                 end
                 parent = this
             end
@@ -1040,7 +1213,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
         nothing
     end
     cleanup!(root)
-    return root, nsleeping
+    return root, nsleeping, is_task_profile
 end
 
 function maxstats(root::StackFrameTree)
@@ -1064,10 +1237,10 @@ end
 # avoid stack overflows.
 function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
     maxes = maxstats(bt)
-    filenamemap = Dict{Symbol,String}()
-    worklist = [(bt, 0, 0, "")]
+    filenamemap = FileNameMap()
+    worklist = [(bt, 0, 0, AnnotatedString(""))]
     if !is_subsection
-        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
         Base.print(io, "=========================================================\n")
     end
     while !isempty(worklist)
@@ -1098,7 +1271,7 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat
             count = down.count
             count < fmt.mincount && continue
             count < noisefloor && continue
-            str = strs[i]
+            str = strs[i]::AnnotatedString
             noisefloor_down = fmt.noisefloor > 0 ? floor(Int, fmt.noisefloor * sqrt(count)) : 0
             pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
         end
@@ -1109,9 +1282,9 @@ end
 function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat,
                 threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
     if fmt.combine
-        root, nsleeping = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
+        root, nsleeping, is_task_profile = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     else
-        root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
+        root, nsleeping, is_task_profile = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     end
     util_perc = (1 - (nsleeping / root.count)) * 100
     is_subsection || print_tree(io, root, cols, fmt, is_subsection)
@@ -1125,11 +1298,15 @@ function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, Line
         end
         return true
     end
-    Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
+    if is_task_profile
+        Base.print(io, "Total snapshots: ", root.count, "\n")
+    else
+        Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
+    end
     if is_subsection
         Base.println(io)
         print_tree(io, root, cols, fmt, is_subsection)
-    else
+    elseif !is_task_profile
         Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
     end
     return false
@@ -1159,24 +1336,7 @@ function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
     return [(v[i], k[i]) for i in p]
 end
 
-# Utilities
-function rtruncto(str::String, w::Int)
-    if length(str) <= w
-        return str
-    else
-        return string("...", str[prevind(str, end, w-4):end])
-    end
-end
-function ltruncto(str::String, w::Int)
-    if length(str) <= w
-        return str
-    else
-        return string(str[1:nextind(str, 1, w-4)], "...")
-    end
-end
-
-
-truncto(str::Symbol, w::Int) = truncto(string(str), w)
+## Utilities
 
 # Order alphabetically (file, function) and then by line number
 function liperm(lilist::Vector{StackFrame})
@@ -1213,33 +1373,119 @@ end
 
 
 """
-    Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false)
-    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false)
-    Profile.take_heap_snapshot(all_one::Bool=false)
+    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false;
+                               redact_data::Bool=true, streaming::Bool=false)
+    Profile.take_heap_snapshot(all_one::Bool=false; redact_data:Bool=true,
+                               dir::String=nothing, streaming::Bool=false)
 
 Write a snapshot of the heap, in the JSON format expected by the Chrome
-Devtools Heap Snapshot viewer (.heapsnapshot extension), to a file
-(`\$pid_\$timestamp.heapsnapshot`) in the current directory, or the given
-file path, or IO stream. If `all_one` is true, then report the size of
-every object as one so they can be easily counted. Otherwise, report the
-actual size.
+Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
+(`\$pid_\$timestamp.heapsnapshot`) in the current directory by default (or tempdir if
+the current directory is unwritable), or in `dir` if given, or the given
+full file path, or IO stream.
+
+If `all_one` is true, then report the size of every object as one so they can be easily
+counted. Otherwise, report the actual size.
+
+If `redact_data` is true (default), then do not emit the contents of any object.
+
+If `streaming` is true, we will stream the snapshot data out into four files, using filepath
+as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
+used for any setting where your memory is constrained. These files can then be reassembled
+by calling Profile.HeapSnapshot.assemble_snapshot(), which can
+be done offline.
+
+NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing
+the snapshot from the parts requires holding the entire snapshot in memory, so if the
+snapshot is large, you can run out of memory while processing it. Streaming allows you to
+reconstruct the snapshot offline, after your workload is done running.
+If you do attempt to collect a snapshot with streaming=false (the default, for
+backwards-compatibility) and your process is killed, note that this will always save the
+parts in the same directory as your provided filepath, so you can still reconstruct the
+snapshot after the fact, via `assemble_snapshot()`.
 """
-function take_heap_snapshot(io::IOStream, all_one::Bool=false)
-    Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one)))
-end
-function take_heap_snapshot(filepath::String, all_one::Bool=false)
-    open(filepath, "w") do io
-        take_heap_snapshot(io, all_one)
+function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; redact_data::Bool=true, streaming::Bool=false)
+    if streaming
+        _stream_heap_snapshot(filepath, all_one, redact_data)
+    else
+        # Support the legacy, non-streaming mode, by first streaming the parts, then
+        # reassembling it after we're done.
+        prefix = filepath
+        _stream_heap_snapshot(prefix, all_one, redact_data)
+        Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
+        Profile.HeapSnapshot.cleanup_streamed_files(prefix)
     end
     return filepath
 end
-function take_heap_snapshot(all_one::Bool=false)
-    f = joinpath(tempdir(), "$(getpid())_$(time_ns()).heapsnapshot")
-    return take_heap_snapshot(f, all_one)
+function take_heap_snapshot(io::IO, all_one::Bool=false; redact_data::Bool=true)
+    # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
+    # then reassembling it after we're done.
+    dir = tempdir()
+    prefix = joinpath(dir, "snapshot")
+    _stream_heap_snapshot(prefix, all_one, redact_data)
+    Profile.HeapSnapshot.assemble_snapshot(prefix, io)
+end
+function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool, redact_data::Bool)
+    # Nodes and edges are binary files
+    open("$prefix.nodes", "w") do nodes
+        open("$prefix.edges", "w") do edges
+            open("$prefix.strings", "w") do strings
+                # The following file is json data
+                open("$prefix.metadata.json", "w") do json
+                    Base.@_lock_ios(nodes,
+                    Base.@_lock_ios(edges,
+                    Base.@_lock_ios(strings,
+                    Base.@_lock_ios(json,
+                        ccall(:jl_gc_take_heap_snapshot,
+                            Cvoid,
+                            (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar, Cchar),
+                            nodes.handle, edges.handle, strings.handle, json.handle,
+                            Cchar(all_one), Cchar(redact_data))
+                    )
+                    )
+                    )
+                    )
+                end
+            end
+        end
+    end
 end
+function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing, kwargs...) where {S <: AbstractString}
+    fname = "$(getpid())_$(time_ns()).heapsnapshot"
+    if isnothing(dir)
+        wd = pwd()
+        fpath = joinpath(wd, fname)
+        try
+            touch(fpath)
+            rm(fpath; force=true)
+        catch
+            @warn "Cannot write to current directory `$(pwd())` so saving heap snapshot to `$(tempdir())`" maxlog=1 _id=Symbol(wd)
+            fpath = joinpath(tempdir(), fname)
+        end
+    else
+        fpath = joinpath(expanduser(dir), fname)
+    end
+    return take_heap_snapshot(fpath, all_one; kwargs...)
+end
+
+"""
+    Profile.take_page_profile(io::IOStream)
+    Profile.take_page_profile(filepath::String)
 
+Write a JSON snapshot of the pages from Julia's pool allocator, printing for every pool allocated object, whether it's garbage, or its type.
+"""
+function take_page_profile(io::IOStream)
+    Base.@_lock_ios(io, ccall(:jl_gc_take_page_profile, Cvoid, (Ptr{Cvoid},), io.handle))
+end
+function take_page_profile(filepath::String)
+    open(filepath, "w") do io
+        take_page_profile(io)
+    end
+    return filepath
+end
 
 include("Allocs.jl")
+include("heapsnapshot_reassemble.jl")
 include("precompile.jl")
 
 end # module
diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl
new file mode 100644
index 0000000000000..b2d86ee1f27b6
--- /dev/null
+++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl
@@ -0,0 +1,257 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module HeapSnapshot
+
+"""
+    assemble_snapshot(filepath::AbstractString, out_file::AbstractString)
+
+Assemble a .heapsnapshot file from the .json files produced by `Profile.take_snapshot`.
+"""
+
+# SoA layout to reduce padding
+struct Edges
+    type::Vector{Int8}       # index into `snapshot.meta.edge_types`
+    name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type
+    to_pos::Vector{UInt}   # index into `snapshot.nodes`
+end
+function Edges(n::Int)
+    Edges(
+        Vector{Int8}(undef, n),
+        Vector{UInt}(undef, n),
+        Vector{UInt}(undef, n),
+    )
+end
+Base.length(n::Edges) = length(n.type)
+
+# trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them
+struct Nodes
+    type::Vector{Int8}         # index into `snapshot.meta.node_types`
+    name_idx::Vector{UInt32} # index into `snapshot.strings`
+    id::Vector{UInt}           # unique id, in julia it is the address of the object
+    self_size::Vector{Int}     # size of the object itself, not including the size of its fields
+    edge_count::Vector{UInt} # number of outgoing edges
+    edges::Edges               # outgoing edges
+    # This is the main complexity of the .heapsnapshot format, and it's the reason we need
+    # to read in all the data before writing it out. The edges vector contains all edges,
+    # but organized by which node they came from. First, it contains all the edges coming
+    # out of node 0, then all edges leaving node 1, etc. So we need to have visited all
+    # edges, and assigned them to their corresponding nodes, before we can emit the file.
+    edge_idxs::Vector{Vector{UInt}} # indexes into edges, keeping per-node outgoing edge ids
+end
+function Nodes(n::Int, e::Int)
+    Nodes(
+        Vector{Int8}(undef, n),
+        Vector{UInt32}(undef, n),
+        Vector{UInt}(undef, n),
+        Vector{Int}(undef, n),
+        Vector{UInt32}(undef, n),
+        Edges(e),
+        [Vector{UInt}() for _ in 1:n],  # Take care to construct n separate empty vectors
+    )
+end
+Base.length(n::Nodes) = length(n.type)
+
+const k_node_number_of_fields = 7
+
+# Like Base.dec, but doesn't allocate a string and writes directly to the io object
+# We know all of the numbers we're about to write fit into a UInt and are non-negative
+let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+    global _write_decimal_number
+    _write_decimal_number(io, x::Integer, buf) = _write_decimal_number(io, unsigned(x), buf)
+    function _write_decimal_number(io, x::Unsigned, digits_buf)
+        buf = digits_buf
+        n = ndigits(x)
+        i = n
+        @inbounds while i >= 2
+            d, r = divrem(x, 0x64)
+            d100 = _dec_d100[(r % Int)::Int + 1]
+            buf[i-1] = d100 % UInt8
+            buf[i] = (d100 >> 0x8) % UInt8
+            x = oftype(x, d)
+            i -= 2
+        end
+        if i > 0
+            @inbounds buf[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
+        end
+        write(io, @view buf[max(i, 1):n])
+    end
+end
+
+function assemble_snapshot(in_prefix, out_file::AbstractString = in_prefix)
+    open(out_file, "w") do io
+        assemble_snapshot(in_prefix, io)
+    end
+end
+
+# Manually parse and write the .json files, given that we don't have JSON import/export in
+# julia's stdlibs.
+function assemble_snapshot(in_prefix, io::IO)
+    preamble = read(string(in_prefix, ".metadata.json"), String)
+    pos = last(findfirst("node_count\":", preamble)) + 1
+    endpos = findnext(==(','), preamble, pos) - 1
+    node_count = parse(Int, String(@view preamble[pos:endpos]))
+
+    pos = last(findnext("edge_count\":", preamble, endpos)) + 1
+    endpos = findnext(==(','), preamble, pos) - 1
+    edge_count = parse(Int, String(@view preamble[pos:endpos]))
+
+    nodes = Nodes(node_count, edge_count)
+
+    orphans = Set{UInt}() # nodes that have no incoming edges
+    # Parse nodes with empty edge counts that we need to fill later
+    open(string(in_prefix, ".nodes"), "r") do nodes_file
+        for i in 1:length(nodes)
+            node_type = read(nodes_file, Int8)
+            node_name_idx = read(nodes_file, UInt)
+            id = read(nodes_file, UInt)
+            self_size = read(nodes_file, Int)
+            @assert read(nodes_file, Int) == 0 # trace_node_id
+            @assert read(nodes_file, Int8) == 0 # detachedness
+
+            nodes.type[i] = node_type
+            nodes.name_idx[i] = node_name_idx
+            nodes.id[i] = id
+            nodes.self_size[i] = self_size
+            nodes.edge_count[i] = 0 # edge_count
+            # populate the orphans set with node index
+            push!(orphans, i-1)
+        end
+    end
+
+    # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets
+    open(string(in_prefix, ".edges"), "r") do edges_file
+        for i in 1:length(nodes.edges)
+            edge_type = read(edges_file, Int8)
+            edge_name_or_index = read(edges_file, UInt)
+            from_node = read(edges_file, UInt)
+            to_node = read(edges_file, UInt)
+
+            nodes.edges.type[i] = edge_type
+            nodes.edges.name_or_index[i] = edge_name_or_index
+            nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7
+            nodes.edge_count[from_node + 1] += UInt32(1)  # C and JSON use 0-based indexing
+            push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges
+            # remove the node from the orphans if it has at least one incoming edge
+            if to_node in orphans
+                delete!(orphans, to_node)
+            end
+        end
+    end
+
+    _digits_buf = zeros(UInt8, ndigits(typemax(UInt)))
+    println(io, @view(preamble[1:end-1]), ",") # remove trailing "}" to reopen the object
+
+    println(io, "\"nodes\":[")
+    for i in 1:length(nodes)
+        i > 1 && println(io, ",")
+        _write_decimal_number(io, nodes.type[i], _digits_buf)
+        print(io, ",")
+        _write_decimal_number(io, nodes.name_idx[i], _digits_buf)
+        print(io, ",")
+        _write_decimal_number(io, nodes.id[i], _digits_buf)
+        print(io, ",")
+        _write_decimal_number(io, nodes.self_size[i], _digits_buf)
+        print(io, ",")
+        _write_decimal_number(io, nodes.edge_count[i], _digits_buf)
+        print(io, ",0,0")
+    end
+    print(io, "],\n")
+    print(io, "\"edges\":[")
+    e = 1
+    for n in 1:length(nodes)
+        count = nodes.edge_count[n]
+        len_edges = length(nodes.edge_idxs[n])
+        @assert count == len_edges "For node $n: $count != $len_edges"
+        for i in nodes.edge_idxs[n]
+            e > 1 && print(io, ",")
+            println(io)
+            _write_decimal_number(io, nodes.edges.type[i], _digits_buf)
+            print(io, ",")
+            _write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf)
+            print(io, ",")
+            _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf)
+            if !(nodes.edges.to_pos[i] % k_node_number_of_fields == 0)
+                @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])"
+            end
+            e += 1
+        end
+    end
+    println(io, "],")
+
+    # not used. Required by microsoft/vscode-v8-heap-tools
+    # This order of these fields is required by chrome dev tools otherwise loading fails
+    println(io, "\"trace_function_infos\":[],")
+    println(io, "\"trace_tree\":[],")
+    println(io, "\"samples\":[],")
+    println(io, "\"locations\":[],")
+
+    println(io, "\"strings\":[")
+    open(string(in_prefix, ".strings"), "r") do strings_io
+        first = true
+        while !eof(strings_io)
+            str_size = read(strings_io, UInt)
+            str_bytes = read(strings_io, str_size)
+            str = String(str_bytes)
+            if first
+                first = false
+            else
+                print(io, ",\n")
+            end
+            print_str_escape_json(io, str)
+        end
+    end
+    print(io, "]}")
+
+    # remove the uber node from the orphans
+    if 0 in orphans
+        delete!(orphans, 0)
+    end
+
+    @assert isempty(orphans) "Orphaned nodes: $(orphans), node count: $(length(nodes)), orphan node count: $(length(orphans))"
+
+    return nothing
+end
+
+"""
+    cleanup_streamed_files(prefix::AbstractString)
+
+Remove files streamed during `take_heap_snapshot` in streaming mode.
+"""
+function cleanup_streamed_files(prefix::AbstractString)
+    rm(string(prefix, ".metadata.json"))
+    rm(string(prefix, ".nodes"))
+    rm(string(prefix, ".edges"))
+    rm(string(prefix, ".strings"))
+    return nothing
+end
+
+function print_str_escape_json(stream::IO, s::AbstractString)
+    print(stream, '"')
+    for c in s
+        if c == '"'
+            print(stream, "\\\"")
+        elseif c == '\\'
+            print(stream, "\\\\")
+        elseif c == '\b'
+            print(stream, "\\b")
+        elseif c == '\f'
+            print(stream, "\\f")
+        elseif c == '\n'
+            print(stream, "\\n")
+        elseif c == '\r'
+            print(stream, "\\r")
+        elseif c == '\t'
+            print(stream, "\\t")
+        elseif '\x00' <= c <= '\x1f'
+            print(stream, "\\u", lpad(string(UInt16(c), base=16), 4, '0'))
+        elseif !isvalid(c)
+            # we have to do this because vscode's viewer doesn't like the replace character
+            print(stream, "[invalid unicode character]")
+        else
+            print(stream, c)
+        end
+    end
+    print(stream, '"')
+end
+
+end
diff --git a/stdlib/Profile/src/precompile.jl b/stdlib/Profile/src/precompile.jl
index 2d947429861a9..7b33e09941b28 100644
--- a/stdlib/Profile/src/precompile.jl
+++ b/stdlib/Profile/src/precompile.jl
@@ -1,4 +1,4 @@
-if ccall(:jl_generating_output, Cint, ()) == 1
+if Base.generating_output()
     precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
     precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
     precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
diff --git a/stdlib/Profile/test/allocs.jl b/stdlib/Profile/test/allocs.jl
index c2ec7d2f6cb54..8f6539e0baed6 100644
--- a/stdlib/Profile/test/allocs.jl
+++ b/stdlib/Profile/test/allocs.jl
@@ -1,6 +1,18 @@
 using Test
 using Profile: Allocs
 
+Allocs.clear()
+let iobuf = IOBuffer()
+    for format in (:tree, :flat)
+        Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true)
+    end
+end
+
+# Issue #57103: This test does not work with MMTk because of fastpath
+# allocation which never calls the allocation profiler.
+# TODO: We should port these observability tools (e.g. allocation
+# profiler and heap snapshot) to MMTk
+@static if Base.USING_STOCK_GC
 @testset "alloc profiler doesn't segfault" begin
     res = Allocs.@profile sample_rate=1.0 begin
         # test the allocations during compilation
@@ -13,6 +25,20 @@ using Profile: Allocs
     @test first_alloc.size > 0
     @test length(first_alloc.stacktrace) > 0
     @test length(string(first_alloc.type)) > 0
+
+    # test printing options
+    for options in ((format=:tree, C=true),
+                    (format=:tree, maxdepth=2),
+                    (format=:flat, C=true),
+                    (),
+                    (format=:flat, sortedby=:count),
+                    (format=:tree, recur=:flat),
+                   )
+        iobuf = IOBuffer()
+        Allocs.print(iobuf; options...)
+        str = String(take!(iobuf))
+        @test !isempty(str)
+    end
 end
 
 @testset "alloc profiler works when there are multiple tasks on multiple threads" begin
@@ -121,3 +147,40 @@ end
     @test length(prof.allocs) >= 1
     @test length([a for a in prof.allocs if a.type == String]) >= 1
 end
+
+@testset "alloc profiler catches allocs from codegen" begin
+    @eval begin
+        struct MyType x::Int; y::Int end
+        Base.:(+)(n::Number, x::MyType) = n + x.x + x.y
+        foo(a, x) = a[1] + x
+        wrapper(a) = foo(a, MyType(0,1))
+    end
+    a = Any[1,2,3]
+    # warmup
+    wrapper(a)
+
+    @eval Allocs.@profile sample_rate=1 wrapper($a)
+
+    prof = Allocs.fetch()
+    Allocs.clear()
+
+    @test length(prof.allocs) >= 1
+    @test length([a for a in prof.allocs if a.type == MyType]) >= 1
+end
+
+@testset "alloc profiler catches allocs from buffer resize" begin
+    f(a) = for _ in 1:100; push!(a, 1); end
+    f(Int[])
+    resize!(Int[], 1)
+    a = Int[]
+    Allocs.clear()
+    Allocs.@profile sample_rate=1 f(a)
+    Allocs.@profile sample_rate=1 resize!(a, 1_000_000) # 4MB
+    prof = Allocs.fetch()
+    Allocs.clear()
+
+    @test 3 <= length(prof.allocs) <= 10
+    @test length([a for a in prof.allocs if a.type === Allocs.BufferType]) == 1
+    @test length([a for a in prof.allocs if a.type === Memory{Int}]) >= 2
+end
+end
diff --git a/stdlib/Profile/test/heapsnapshot_reassemble.jl b/stdlib/Profile/test/heapsnapshot_reassemble.jl
new file mode 100644
index 0000000000000..e1d6621647671
--- /dev/null
+++ b/stdlib/Profile/test/heapsnapshot_reassemble.jl
@@ -0,0 +1,54 @@
+using Test
+
+@testset "_write_decimal_number" begin
+    _digits_buf = zeros(UInt8, ndigits(typemax(UInt)))
+    io = IOBuffer()
+
+    test_write(d) = begin
+        Profile.HeapSnapshot._write_decimal_number(io, d, _digits_buf)
+        s = String(take!(io))
+        seekstart(io)
+        return s
+    end
+    @test test_write(0) == "0"
+    @test test_write(99) == "99"
+
+    @test test_write(UInt8(0)) == "0"
+    @test test_write(UInt32(0)) == "0"
+    @test test_write(Int32(0)) == "0"
+
+    @test test_write(UInt8(99)) == "99"
+    @test test_write(UInt32(99)) == "99"
+    @test test_write(Int32(99)) == "99"
+
+    # Sample among possible UInts we might print
+    for x in typemin(UInt8):typemax(UInt8)
+        @test test_write(x) == string(x)
+    end
+    for x in typemin(UInt):typemax(UInt)÷10001:typemax(UInt)
+        @test test_write(x) == string(x)
+    end
+end
+
+function test_print_str_escape_json(input::AbstractString, expected::AbstractString)
+    output = IOBuffer()
+    Profile.HeapSnapshot.print_str_escape_json(output, input)
+    @test String(take!(output)) == expected
+end
+
+@testset "print_str_escape_json" begin
+    # Test basic string escaping
+    test_print_str_escape_json("\"hello\"", "\"\\\"hello\\\"\"")
+
+    # Test escaping of control characters
+    test_print_str_escape_json("\x01\x02\x03", "\"\\u0001\\u0002\\u0003\"")
+
+    # Test escaping of other special characters
+    test_print_str_escape_json("\b\f\n\r\t", "\"\\b\\f\\n\\r\\t\"")
+
+    # Test handling of mixed characters
+    test_print_str_escape_json("abc\ndef\"ghi", "\"abc\\ndef\\\"ghi\"")
+
+    # Test handling of empty string
+    test_print_str_escape_json("", "\"\"")
+end
diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
index 2d6df81b1015d..17af484bda242 100644
--- a/stdlib/Profile/test/runtests.jl
+++ b/stdlib/Profile/test/runtests.jl
@@ -25,41 +25,79 @@ end
     end
 end
 
-busywait(0, 0) # compile
-@profile busywait(1, 20)
+@noinline function sleeping_tasks(ch::Channel)
+    for _ in 1:100
+        Threads.@spawn take!(ch)
+    end
+    sleep(10)
+end
 
-let r = Profile.retrieve()
-    mktemp() do path, io
-        serialize(io, r)
-        close(io)
-        open(path) do io
-            @test isa(deserialize(io), Tuple{Vector{UInt},Dict{UInt64,Vector{Base.StackTraces.StackFrame}}})
+function test_profile()
+    let r = Profile.retrieve()
+        mktemp() do path, io
+            serialize(io, r)
+            close(io)
+            open(path) do io
+                @test isa(deserialize(io), Tuple{Vector{UInt},Dict{UInt64,Vector{Base.StackTraces.StackFrame}}})
+            end
         end
     end
 end
 
-let iobuf = IOBuffer()
-    Profile.print(iobuf, format=:tree, C=true)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:tree, maxdepth=2)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:flat, C=true)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf)
-    @test !isempty(String(take!(iobuf)))
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:flat, sortedby=:count)
-    @test !isempty(String(take!(iobuf)))
-    Profile.print(iobuf, format=:tree, recur=:flat)
+function test_has_task_profiler_sample_in_buffer()
+    let r = Profile.retrieve()
+        mktemp() do path, io
+            serialize(io, r)
+            close(io)
+            open(path) do io
+                all = deserialize(io)
+                data = all[1]
+                startframe = length(data)
+                for i in startframe:-1:1
+                    (startframe - 1) >= i >= (startframe - (Profile.nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
+                    if Profile.is_block_end(data, i)
+                        thread_sleeping_state = data[i - Profile.META_OFFSET_SLEEPSTATE]
+                        @test thread_sleeping_state == 0x3
+                    end
+                end
+            end
+        end
+    end
+end
+
+busywait(0, 0) # compile
+
+@profile_walltime busywait(1, 20)
+test_profile()
+
+Profile.clear()
+
+ch = Channel(1)
+@profile_walltime sleeping_tasks(ch)
+test_profile()
+close(ch)
+test_has_task_profiler_sample_in_buffer()
+
+Profile.clear()
+
+@profile busywait(1, 20)
+test_profile()
+
+# test printing options
+for options in ((format=:tree, C=true),
+                (format=:tree, maxdepth=2),
+                (format=:flat, C=true),
+                (),
+                (format=:flat, sortedby=:count),
+                (format=:tree, recur=:flat),
+               )
+    iobuf = IOBuffer()
+    Profile.print(iobuf; options...)
     str = String(take!(iobuf))
     @test !isempty(str)
-    truncate(iobuf, 0)
+    file, _ = mktemp()
+    Profile.print(file; options...)
+    @test filesize(file) > 0
 end
 
 @testset "Profile.print() groupby options" begin
@@ -117,6 +155,20 @@ end
 @test z == 10
 end
 
+@testset "@profile no scope" begin
+    @profile no_scope_57858_1 = 1
+    @test @isdefined no_scope_57858_1
+    Profile.clear()
+
+    @profile_walltime no_scope_57858_1 = 1
+    @test @isdefined no_scope_57858_1
+    Profile.clear()
+
+    Profile.Allocs.@profile no_scope_57858_2 = 1
+    @test @isdefined no_scope_57858_2
+    Profile.Allocs.clear()
+end
+
 @testset "setting sample count and delay in init" begin
     n_, delay_ = Profile.init()
     n_original = n_
@@ -166,6 +218,51 @@ end
     @test getline(values(fdictc)) == getline(values(fdict0)) + 2
 end
 
+import InteractiveUtils
+
+@generated function compile_takes_1_second(x)
+    t = time_ns()
+    while time_ns() < t + 1e9
+        # busy wait for 1 second
+    end
+    return :(x)
+end
+@testset "Module short names" begin
+    Profile.clear()
+    @profile begin
+        @eval compile_takes_1_second(1) # to increase chance of profiling hitting compilation code
+        InteractiveUtils.peakflops()
+    end
+    io = IOBuffer()
+    ioc = IOContext(io, :displaysize=>(1000,1000))
+    Profile.print(ioc, C=true)
+    str = String(take!(io))
+    slash = Sys.iswindows() ? "\\" : "/"
+    @test occursin("@Compiler" * slash, str)
+    @test occursin("@Base" * slash, str)
+    @test occursin("@InteractiveUtils" * slash, str)
+    @test occursin("@LinearAlgebra" * slash, str)
+    @test occursin("@juliasrc" * slash, str)
+    @test occursin("@julialib" * slash, str)
+end
+
+function run_with_watchdog(cmd, timeout=120)
+    p = open(cmd)
+    t = Timer(timeout) do t
+        # should be under 10 seconds, so give it 2 minutes then report failure
+        println("KILLING debuginfo registration test BY PROFILE TEST WATCHDOG\n")
+        kill(p, Base.SIGQUIT)
+        sleep(30)
+        kill(p, Base.SIGQUIT)
+        sleep(30)
+        kill(p, Base.SIGKILL)
+    end
+    s = read(p, String)
+    close(t)
+    close(p)
+    success(p) ? s : ""
+end
+
 # Profile deadlocking in compilation (debuginfo registration)
 let cmd = Base.julia_cmd()
     script = """
@@ -178,40 +275,51 @@ let cmd = Base.julia_cmd()
         println("done")
         print(Profile.len_data())
         """
-    p = open(`$cmd -e $script`)
-    t = Timer(120) do t
-        # should be under 10 seconds, so give it 2 minutes then report failure
-        println("KILLING BY PROFILE TEST WATCHDOG\n")
-        kill(p, Base.SIGTERM)
-        sleep(10)
-        kill(p, Base.SIGKILL)
-    end
-    s = read(p, String)
-    close(t)
-    @test success(p)
+    # use multiple threads here to ensure that profiling works with threading
+    s = run_with_watchdog(`$cmd -t2 -e $script`)
     @test !isempty(s)
     @test occursin("done", s)
     @test parse(Int, split(s, '\n')[end]) > 100
 end
 
+# Thread suspend deadlock - run many times (#60042)
+@test_skip let cmd = Base.julia_cmd()
+    script = """
+        using Profile
+        @profile println("done")
+        """
+    good = true
+    for i=1:100
+        s = run_with_watchdog(`$cmd -t2 -e $script`, 5)
+        good &= occursin("done", s)
+    end
+    good
+end
+
 if Sys.isbsd() || Sys.islinux()
     @testset "SIGINFO/SIGUSR1 profile triggering" begin
         let cmd = Base.julia_cmd()
             script = """
                 print(stderr, "started\n")
                 eof(stdin)
-                close(t)
                 """
-            iob = Base.BufferStream()
+            iob = Base.BufferStream() # make an unbounded buffer, so we can just read after waiting for exit
             notify_exit = Base.PipeEndpoint()
-            p = run(pipeline(`$cmd -e $script`, stdin=notify_exit, stderr=iob, stdout=devnull), wait=false)
+            p = run(`$cmd -e $script`, notify_exit, devnull, iob, wait=false)
+            eof = @async try # set up a monitor task to set EOF on iob after p exits
+                wait(p)
+            finally
+                closewrite(iob)
+            end
             t = Timer(120) do t
                 # should be under 10 seconds, so give it 2 minutes then report failure
-                println("KILLING BY PROFILE TEST WATCHDOG\n")
-                kill(p, Base.SIGTERM)
-                sleep(10)
+                println("KILLING siginfo/sigusr1 test BY PROFILE TEST WATCHDOG\n")
+                kill(p, Base.SIGQUIT)
+                sleep(30)
+                kill(p, Base.SIGQUIT)
+                sleep(30)
                 kill(p, Base.SIGKILL)
-                close(p)
+                close(notify_exit)
             end
             try
                 s = readuntil(iob, "started", keep=true)
@@ -230,17 +338,18 @@ if Sys.isbsd() || Sys.islinux()
                     @test occursin("Overhead ╎", s)
                 end
                 close(notify_exit) # notify test finished
-                s = read(iob, String) # consume test output
-                wait(p) # wait for test completion
+                wait(eof) # wait for test completion
+                s = read(iob, String) # consume test output from buffer
                 close(t)
             catch
                 close(notify_exit)
+                wait(eof) # wait for test completion
                 errs = read(iob, String) # consume test output
                 isempty(errs) || println("CHILD STDERR after test failure: ", errs)
-                wait(p) # wait for test completion
                 close(t)
                 rethrow()
             end
+            @test success(p)
         end
     end
 end
@@ -278,20 +387,55 @@ end
     @test only(node.down).first == lidict[8]
 end
 
+# FIXME: Issue #57103: heap snapshots are currently not supported in MMTk
+@static if Base.USING_STOCK_GC
 @testset "HeapSnapshot" begin
     tmpdir = mktempdir()
+
+    # ensure that we can prevent redacting data
     fname = cd(tmpdir) do
-        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; print(Profile.take_heap_snapshot())"`, String)
+        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot(; redact_data=false))"`, String)
     end
 
     @test isfile(fname)
 
-    open(fname) do fs
-        @test readline(fs) != ""
+    sshot = read(fname, String)
+    @test sshot != ""
+    @test contains(sshot, "redact_this")
+
+    rm(fname)
+
+    # ensure that string data is redacted by default
+    fname = cd(tmpdir) do
+        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot())"`, String)
     end
 
+    @test isfile(fname)
+
+    sshot = read(fname, String)
+    @test sshot != ""
+    @test !contains(sshot, "redact_this")
+
     rm(fname)
     rm(tmpdir, force = true, recursive = true)
 end
+end
+
+@testset "PageProfile" begin
+    fname = "$(getpid())_$(time_ns())"
+    fpath = joinpath(tempdir(), fname)
+    Profile.take_page_profile(fpath)
+    open(fpath) do fs
+        @test readline(fs) != ""
+    end
+    rm(fpath)
+end
 
 include("allocs.jl")
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(Profile)
+    @test_broken isempty(undoc)
+    @test undoc == [:Allocs]
+end
+include("heapsnapshot_reassemble.jl")
diff --git a/stdlib/Project.toml b/stdlib/Project.toml
new file mode 100644
index 0000000000000..2051377a6967c
--- /dev/null
+++ b/stdlib/Project.toml
@@ -0,0 +1,62 @@
+[deps]
+ArgTools = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+GMP_jll = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+JuliaSyntaxHighlighting = "ac6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+LLD_jll = "d55e3150-da41-5e91-b323-ecfd1eec6109"
+LLVMLibUnwind_jll = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
+LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+LibCURL = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+LibCURL_jll = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
+LibGit2_jll = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+LibUV_jll = "183b4373-6708-53ba-ad28-60e28bb38547"
+LibUnwind_jll = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+MPFR_jll = "3a97d323-0669-5f0c-9066-3539efd106a3"
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
+MozillaCACerts_jll = "14a3606d-f60d-562e-9121-12d972cd8159"
+NetworkOptions = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
+OpenLibm_jll = "05823500-19ac-5b8b-9628-191a04bc5112"
+OpenSSL_jll = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+PCRE2_jll = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+SuiteSparse_jll = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
+Zstd_jll = "3161d3a3-bdf6-5164-811a-617609db77b4"
+dSFMT_jll = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
+libLLVM_jll = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
+libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
+nghttp2_jll = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/stdlib/REPL/Project.toml b/stdlib/REPL/Project.toml
index 4f77157da0146..6b37c892f8aa3 100644
--- a/stdlib/REPL/Project.toml
+++ b/stdlib/REPL/Project.toml
@@ -1,15 +1,21 @@
 name = "REPL"
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
 
 [deps]
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+JuliaSyntaxHighlighting = "ac6e5ff7-fb65-4e79-a425-ec3bc9c03011"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Random"]
+test = ["Logging", "Test", "Random"]
diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index ce594d55863bc..867c965a87666 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/REPL/docs/src/index.md"
+```
+
 # The Julia REPL
 
 Julia comes with a full-featured interactive command-line REPL (read-eval-print loop) built into
@@ -7,8 +11,9 @@ shell modes. The REPL can be started by simply calling `julia` with no arguments
 on the executable:
 
 ```@eval
+using REPL
 io = IOBuffer()
-Base.banner(io)
+REPL.banner(io)
 banner = String(take!(io))
 import Markdown
 Markdown.parse("```\n\$ julia\n\n$(banner)\njulia>\n```")
@@ -45,14 +50,16 @@ julia> ans
 
 In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting text
 that starts with `julia> ` into the REPL. In that case, only expressions starting with `julia> ` (as
-well as the other REPL mode prompts: `shell> `, `help?> `, `pkg>` ) are parsed, but others are
+well as the other REPL mode prompts: `shell> `, `help?> `, `pkg> ` ) are parsed, but others are
 removed. This makes it possible to paste a chunk of text that has been copied from a REPL session
 without having to scrub away prompts and outputs. This feature is enabled by default but can be
 disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`. If it is enabled, you can try it
 out by pasting the code block above this paragraph straight into the REPL. This feature does not
 work on the standard Windows command prompt due to its limitation at detecting when a paste occurs.
 
-Objects are printed at the REPL using the [`show`](@ref) function with a specific [`IOContext`](@ref).
+A non-[`nothing`](@ref) result of executing an expression is displayed by the REPL using the [`show`](@ref) function
+with a specific [`IOContext`](@ref) (via [`display`](@ref), which defaults to calling
+`show(io, MIME("text/plain"), ans)`, which in turn defaults to `show(io, ans)`).
 In particular, the `:limit` attribute is set to `true`.
 Other attributes can receive in certain `show` methods a default value if it's not already set,
 like `:compact`.
@@ -61,7 +68,7 @@ It's possible, as an experimental feature, to specify the attributes used by the
 
 ```julia-repl
 julia> rand(2, 2)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  0.8833    0.329197
  0.719708  0.59114
 
@@ -71,7 +78,7 @@ julia> show(IOContext(stdout, :compact => false), "text/plain", rand(2, 2))
 julia> Base.active_repl.options.iocontext[:compact] = false;
 
 julia> rand(2, 2)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  0.2083967319174056  0.13330606013126012
  0.6244375177790158  0.9777957560761545
 ```
@@ -198,83 +205,92 @@ at the beginning of the line. The prompt for this mode is `pkg>`. It supports it
 entered by pressing `?` at the beginning  of the line of the `pkg>` prompt. The Package manager mode is
 documented in the Pkg manual, available at [https://julialang.github.io/Pkg.jl/v1/](https://julialang.github.io/Pkg.jl/v1/).
 
-### Search modes
+### History searching
 
 In all of the above modes, the executed lines get saved to a history file, which can be searched.
- To initiate an incremental search through the previous history, type `^R` -- the control key
-together with the `r` key. The prompt will change to ```(reverse-i-search)`':```, and as you
-type the search query will appear in the quotes. The most recent result that matches the query
-will dynamically update to the right of the colon as more is typed. To find an older result using
-the same query, simply type `^R` again.
+ To initiate an interactive search through the previous history, type `^R` -- the control key
+together with the `r` key.
 
-Just as `^R` is a reverse search, `^S` is a forward search, with the prompt ```(i-search)`':```.
- The two may be used in conjunction with each other to move through the previous or next matching
-results, respectively.
+You will be presented with an interactive history viewer. As you type your search history will be filtered;
+pressing enter will insert the selected history entry into the REPL. Detailed help for the history
+searcher is available within the REPL with the special queries `?`  and `??`.
 
 All executed commands in the Julia REPL are logged into `~/.julia/logs/repl_history.jl` along with a timestamp of when it was executed
-and the current REPL mode you were in. Search mode queries this log file in order to find the commands which you previously ran.
-This can be disabled at startup by passing the `--history-file=no` flag to Julia.
+and the current REPL mode you were in. The history searcher reads this log file in order to find the commands which you previously ran.
+Multiple REPLs can write to this file at once, and every time you begin a search the newest history is fetched.
+Use of this file can be disabled at startup by passing the `--history-file=no` flag to Julia.
 
 ## Key bindings
 
 The Julia REPL makes great use of key bindings. Several control-key bindings were already introduced
-above (`^D` to exit, `^R` and `^S` for searching), but there are many more. In addition to the
+above (`^D` to exit, `^R` for searching), but there are many more. In addition to the
 control-key, there are also meta-key bindings. These vary more by platform, but most terminals
 default to using alt- or option- held down with a key to send the meta-key (or can be configured
 to do so), or pressing Esc and then the key.
 
-| Keybinding          | Description                                                                                                |
-|:------------------- |:---------------------------------------------------------------------------------------------------------- |
-| **Program control** |                                                                                                            |
-| `^D`                | Exit (when buffer is empty)                                                                                |
-| `^C`                | Interrupt or cancel                                                                                        |
-| `^L`                | Clear console screen                                                                                       |
-| Return/Enter, `^J`  | New line, executing if it is complete                                                                      |
-| meta-Return/Enter   | Insert new line without executing it                                                                       |
-| `?` or `;`          | Enter help or shell mode (when at start of a line)                                                         |
-| `^R`, `^S`          | Incremental history search, described above                                                                |
-| **Cursor movement** |                                                                                                            |
-| Right arrow, `^F`   | Move right one character                                                                                   |
-| Left arrow, `^B`    | Move left one character                                                                                    |
-| ctrl-Right, `meta-F`| Move right one word                                                                                        |
-| ctrl-Left, `meta-B` | Move left one word                                                                                         |
-| Home, `^A`          | Move to beginning of line                                                                                  |
-| End, `^E`           | Move to end of line                                                                                        |
-| Up arrow, `^P`      | Move up one line (or change to the previous history entry that matches the text before the cursor)         |
-| Down arrow, `^N`    | Move down one line (or change to the next history entry that matches the text before the cursor)           |
-| Shift-Arrow Key     | Move cursor according to the direction of the Arrow key, while activating the region ("shift selection")   |
-| Page-up, `meta-P`   | Change to the previous history entry                                                                       |
-| Page-down, `meta-N` | Change to the next history entry                                                                           |
-| `meta-<`            | Change to the first history entry (of the current session if it is before the current position in history) |
-| `meta->`            | Change to the last history entry                                                                           |
-| `^-Space`           | Set the "mark" in the editing region (and de-activate the region if it's active)                           |
-| `^-Space ^-Space`   | Set the "mark" in the editing region and make the region "active", i.e. highlighted                        |
-| `^G`                | De-activate the region (i.e. make it not highlighted)                                                      |
-| `^X^X`              | Exchange the current position with the mark                                                                |
-| **Editing**         |                                                                                                            |
-| Backspace, `^H`     | Delete the previous character, or the whole region when it's active                                        |
-| Delete, `^D`        | Forward delete one character (when buffer has text)                                                        |
-| meta-Backspace      | Delete the previous word                                                                                   |
-| `meta-d`            | Forward delete the next word                                                                               |
-| `^W`                | Delete previous text up to the nearest whitespace                                                          |
-| `meta-w`            | Copy the current region in the kill ring                                                                   |
-| `meta-W`            | "Kill" the current region, placing the text in the kill ring                                               |
-| `^U`                | "Kill" to beginning of line, placing the text in the kill ring                                             |
-| `^K`                | "Kill" to end of line, placing the text in the kill ring                                                   |
-| `^Y`                | "Yank" insert the text from the kill ring                                                                  |
-| `meta-y`            | Replace a previously yanked text with an older entry from the kill ring                                    |
-| `^T`                | Transpose the characters about the cursor                                                                  |
-| `meta-Up arrow`     | Transpose current line with line above                                                                     |
-| `meta-Down arrow`   | Transpose current line with line below                                                                     |
-| `meta-u`            | Change the next word to uppercase                                                                          |
-| `meta-c`            | Change the next word to titlecase                                                                          |
-| `meta-l`            | Change the next word to lowercase                                                                          |
-| `^/`, `^_`          | Undo previous editing action                                                                               |
-| `^Q`                | Write a number in REPL and press `^Q` to open editor at corresponding stackframe or method                 |
-| `meta-Left Arrow`   | Indent the current line on the left                                                                        |
-| `meta-Right Arrow`  | Indent the current line on the right                                                                       |
-| `meta-.`            | Insert last word from previous history entry                                                               |
-| `meta-e`            | Edit the current input in an editor                                                                        |
+| Keybinding            | Description                                                                                                |
+|:----------------------|:-----------------------------------------------------------------------------------------------------------|
+| **Program control**   |                                                                                                            |
+| `^D`                  | Exit (when buffer is empty)                                                                                |
+| `^C`                  | Interrupt or cancel                                                                                        |
+| `^L`                  | Clear console screen                                                                                       |
+| Return/Enter, `^J`    | New line, executing if it is complete                                                                      |
+| meta-Return/Enter     | Insert new line without executing it                                                                       |
+| `?` or `;`            | Enter help or shell mode (when at start of a line)                                                         |
+| `^R`, `^S`            | Interactive history search, described above                                                                |
+| **Cursor movement**   |                                                                                                            |
+| Right arrow, `^F`     | Move right one character                                                                                   |
+| Left arrow, `^B`      | Move left one character                                                                                    |
+| ctrl-Right, `meta-F`  | Move right one word                                                                                        |
+| ctrl-Left, `meta-B`   | Move left one word                                                                                         |
+| Home, `^A`            | Move to beginning of line                                                                                  |
+| End, `^E`             | Move to end of line                                                                                        |
+| Up arrow, `^P`        | Move up one line (or change to the previous history entry that matches the text before the cursor)         |
+| Down arrow, `^N`      | Move down one line (or change to the next history entry that matches the text before the cursor)           |
+| Shift-Arrow Key       | Move cursor according to the direction of the Arrow key, while activating the region ("shift selection")   |
+| Page-up, `meta-P`     | Change to the previous history entry                                                                       |
+| Page-down, `meta-N`   | Change to the next history entry                                                                           |
+| `meta-<`              | Change to the first history entry (of the current session if it is before the current position in history) |
+| `meta->`              | Change to the last history entry                                                                           |
+| `^-Space`             | Set the "mark" in the editing region (and de-activate the region if it's active)                           |
+| `^-Space ^-Space`     | Set the "mark" in the editing region and make the region "active", i.e. highlighted                        |
+| `^G`                  | De-activate the region (i.e. make it not highlighted)                                                      |
+| `^X^X`                | Exchange the current position with the mark                                                                |
+| **Editing**           |                                                                                                            |
+| Backspace, `^H`       | Delete the previous character, or the whole region when it's active                                        |
+| Delete, `^D`          | Forward delete one character (when buffer has text)                                                        |
+| meta-Backspace        | Delete the previous word                                                                                   |
+| `meta-d`              | Forward delete the next word                                                                               |
+| `^W`                  | Delete previous text up to the nearest whitespace                                                          |
+| `meta-w`              | Copy the current region in the kill ring                                                                   |
+| `meta-W`              | "Kill" the current region, placing the text in the kill ring                                               |
+| `^U`                  | "Kill" to beginning of line, placing the text in the kill ring                                             |
+| `^K`                  | "Kill" to end of line, placing the text in the kill ring                                                   |
+| `^Y`                  | "Yank" insert the text from the kill ring                                                                  |
+| `meta-y`              | Replace a previously yanked text with an older entry from the kill ring                                    |
+| `^T`                  | Transpose the characters about the cursor                                                                  |
+| `meta-Up arrow`       | Transpose current line with line above                                                                     |
+| `meta-Down arrow`     | Transpose current line with line below                                                                     |
+| `meta-u`              | Change the next word to uppercase                                                                          |
+| `meta-c`              | Change the next word to titlecase                                                                          |
+| `meta-l`              | Change the next word to lowercase                                                                          |
+| `^/`, `^_`            | Undo previous editing action                                                                               |
+| `^Q`                  | Write a number in REPL and press `^Q` to open editor at corresponding stackframe or method                 |
+| `meta-Left Arrow`     | Indent the current line on the left                                                                        |
+| `meta-Right Arrow`    | Indent the current line on the right                                                                       |
+| `meta-.`              | Insert last word from previous history entry                                                               |
+| `meta-e`              | Edit the current input in an editor                                                                        |
+| **History search**    |                                                                                                            |
+| Up arrow, `^P`, `^K`  | Move the focus one entry up                                                                                |
+| Down arrow, `^P`, `^N`| Move the focus one entry down                                                                              |
+| Page up, `^B`         | Move the focus one page up                                                                                 |
+| Page down, `^F`       | Move the focus one page down                                                                               |
+| `meta-<`              | Focus on the first (oldest) history entry                                                                  |
+| `meta->`              | Focus on the last (most recent) history entry                                                              |
+| Tab                   | Toggle selection of the currently focused entry                                                            |
+| Enter                 | Accept the currently focused/selected entries                                                              |
+| `^S`                  | Save the focused/selected entries to the clipboard or a file                                               |
+| `^C`, `^D`, `^G`      | Abort the history search                                                                                   |
 
 ### Customizing keybindings
 
@@ -310,9 +326,31 @@ atreplinit(customize_keys)
 
 Users should refer to `LineEdit.jl` to discover the available actions on key input.
 
+### Automatic bracket insertion
+
+The Julia REPL supports automatically inserting closing brackets, parentheses, braces, and quotes
+when you type the opening character.
+
+When enabled, typing an opening bracket `(`, `{`, or `[` will automatically insert the matching
+closing bracket `)`, `}`, or `]` and position the cursor between them. The same behavior applies
+to quotes (`"`, `'`, and `` ` ``). If you then type the closing character, the REPL will skip over
+the auto-inserted character instead of inserting a duplicate. Additionally, pressing backspace
+immediately after auto-insertion will remove both the opening and closing characters.
+
+To disable this feature, add the following to your `~/.julia/config/startup.jl` file:
+
+```julia
+atreplinit() do repl
+    # Robust against older julia versions
+    if hasfield(typeof(repl.options), :auto_insert_closing_bracket)
+        repl.options.auto_insert_closing_bracket = false
+    end
+end
+```
+
 ## Tab completion
 
-In both the Julian and help modes of the REPL, one can enter the first few characters of a function
+In the Julian, pkg and help modes of the REPL, one can enter the first few characters of a function
 or type and then press the tab key to get a list all matches:
 
 ```julia-repl
@@ -334,6 +372,21 @@ julia> mapfold[TAB]
 mapfoldl mapfoldr
 ```
 
+When a single complete tab-complete result is available at the end of an input line and 2 or more characters
+have been typed, a hint of the completion will show in a lighter color.
+This can be disabled via `Base.active_repl.options.hint_tab_completes = false` or by adding
+```
+atreplinit() do repl
+    if VERSION >= v"1.11.0-0"
+        repl.options.hint_tab_completes = false
+    end
+end
+```
+to your `~/.julia/config/startup.jl`.
+
+!!! compat "Julia 1.11"
+    Tab-complete hinting was added in Julia 1.11
+
 Like other components of the REPL, the search is case-sensitive:
 
 ```julia-repl
@@ -354,13 +407,13 @@ julia> π
 
 julia> e\_1[TAB] = [1,0]
 julia> e₁ = [1,0]
-2-element Array{Int64,1}:
+2-element Vector{Int64}:
  1
  0
 
 julia> e\^1[TAB] = [1 0]
 julia> e¹ = [1 0]
-1×2 Array{Int64,2}:
+1×2 Matrix{Int64}:
  1  0
 
 julia> \sqrt[TAB]2     # √ is equivalent to the sqrt function
@@ -511,6 +564,195 @@ mmap(file::AbstractString, ::Type{T}, len::Integer) where T<:BitArray in Mmap at
 mmap(file::AbstractString, ::Type{T}, len::Integer, offset::Integer; grow, shared) where T<:BitArray in Mmap at Mmap/src/Mmap.jl:322
 ```
 
+## Syntax Highlighting
+
+The REPL provides syntax highlighting for input as you type.
+Syntax highlighting is enabled by default but can be disabled in your `~/.julia/config/startup.jl`:
+
+```julia
+atreplinit() do repl
+    repl.options.style_input = false
+end
+```
+
+### Customizing Syntax Highlighting Colors
+
+The default syntax highlighting theme is quite conservative but can be customized using a TOML file `faces.toml` (https://julialang.github.io/StyledStrings.jl/dev/#stdlib-styledstrings-face-toml) in `.julia/config` (or by explicitly loading the faces from a face toml file).
+
+
+!!! details "Example: Monokai color theme (click to expand)"
+    ```toml
+
+    # Monokai color theme for Julia syntax highlighting
+
+    [julia_macro]
+    foreground = "#A6E22E"
+
+    [julia_symbol]
+    foreground = "#AE81FF"
+
+    [julia_singleton_identifier]
+    inherit = "julia_symbol"
+
+    [julia_type]
+    foreground = "#66D9EF"
+
+    [julia_typedec]
+    foreground = "#66D9EF"
+    weight = "bold"
+
+    [julia_comment]
+    foreground = "#75715E"
+    italic = true
+
+    [julia_string]
+    foreground = "#E6DB74"
+
+    [julia_regex]
+    inherit = "julia_string"
+
+    [julia_backslash_literal]
+    foreground = "#FD971F"
+    inherit = "julia_string"
+
+    [julia_string_delim]
+    foreground = "#E6DB74"
+    weight = "bold"
+
+    [julia_cmdstring]
+    inherit = "julia_string"
+
+    [julia_char]
+    inherit = "julia_string"
+
+    [julia_char_delim]
+    inherit = "julia_string_delim"
+
+    [julia_number]
+    foreground = "#AE81FF"
+
+    [julia_bool]
+    foreground = "#AE81FF"
+    weight = "bold"
+
+    [julia_funcall]
+    foreground = "#A6E22E"
+
+    [julia_broadcast]
+    foreground = "#F92672"
+    weight = "bold"
+
+    [julia_builtin]
+    foreground = "#66D9EF"
+    weight = "bold"
+
+    [julia_operator]
+    foreground = "#F92672"
+
+    [julia_comparator]
+    inherit = "julia_operator"
+
+    [julia_assignment]
+    foreground = "#F92672"
+    weight = "bold"
+
+    [julia_keyword]
+    foreground = "#F92672"
+    weight = "bold"
+
+    [julia_parentheses]
+    foreground = "#F8F8F2"
+
+    [julia_unpaired_parentheses]
+    background = "#F92672"
+    foreground = "#F8F8F0"
+    weight = "bold"
+
+    [julia_error]
+    background = "#F92672"
+    foreground = "#F8F8F0"
+
+    [julia_rainbow_paren_1]
+    foreground = "#A6E22E"
+    inherit = "julia_parentheses"
+
+    [julia_rainbow_paren_2]
+    foreground = "#66D9EF"
+    inherit = "julia_parentheses"
+
+    [julia_rainbow_paren_3]
+    foreground = "#FD971F"
+    inherit = "julia_parentheses"
+
+    [julia_rainbow_paren_4]
+    inherit = "julia_rainbow_paren_1"
+
+    [julia_rainbow_paren_5]
+    inherit = "julia_rainbow_paren_2"
+
+    [julia_rainbow_paren_6]
+    inherit = "julia_rainbow_paren_3"
+
+    # Rainbow brackets
+    [julia_rainbow_bracket_1]
+    foreground = "#AE81FF"
+    inherit = "julia_parentheses"
+
+    [julia_rainbow_bracket_2]
+    foreground = "#E6DB74"
+    inherit = "julia_parentheses"
+
+    [julia_rainbow_bracket_3]
+    inherit = "julia_rainbow_bracket_1"
+
+    [julia_rainbow_bracket_4]
+    inherit = "julia_rainbow_bracket_2"
+
+    [julia_rainbow_bracket_5]
+    inherit = "julia_rainbow_bracket_1"
+
+    [julia_rainbow_bracket_6]
+    inherit = "julia_rainbow_bracket_2"
+
+    # Rainbow curlies
+    [julia_rainbow_curly_1]
+    foreground = "#F92672"
+    inherit = "julia_parentheses"
+
+    [julia_rainbow_curly_2]
+    foreground = "#A6E22E"
+    inherit = "julia_parentheses"
+
+    [julia_rainbow_curly_3]
+    inherit = "julia_rainbow_curly_1"
+
+    [julia_rainbow_curly_4]
+    inherit = "julia_rainbow_curly_2"
+
+    [julia_rainbow_curly_5]
+    inherit = "julia_rainbow_curly_1"
+
+    [julia_rainbow_curly_6]
+    inherit = "julia_rainbow_curly_2"
+    ```
+
+For a complete list of customizable faces, see the [JuliaSyntaxHighlighting package documentation](https://julialang.github.io/JuliaSyntaxHighlighting.jl/dev/).
+
+## Customising the history searcher
+
+The history searcher uses the following default faces, that can be customised:
+
+```toml
+[REPL.History.search]
+separator.fg  = "blue"
+prefix.fg = "magenta"
+selected.fg = "blue"
+unselected.fg = "grey"
+hint = { fg = "magenta", slant = "italic", weight ="light" }
+results.inherit = "shadow"
+match = { weight = "bold", underline = true }
+```
+
 ## Customizing Colors
 
 The colors used by Julia and the REPL can be customized, as well. To change the
@@ -570,8 +812,9 @@ Main
 
 It is possible to change this contextual module via the function
 `REPL.activate(m)` where `m` is a `Module` or by typing the module in the REPL
-and pressing the keybinding Alt-m (the cursor must be on the module name). The
-active module is shown in the prompt:
+and pressing the keybinding Alt-m with the cursor on the module name (Esc-m on MacOS).
+Pressing the keybinding on an empty prompt toggles the context between the previously active
+non-`Main` module and `Main`. The active module is shown in the prompt (unless it is `Main`):
 
 ```julia-repl
 julia> using REPL
@@ -591,9 +834,13 @@ julia> Core<Alt-m> # using the keybinding to change module
 
 (Core) julia>
 
-(Core) julia> Main<Alt-m> # going back to Main via keybinding
+(Core) julia> <Alt-m> # going back to Main via keybinding
 
 julia>
+
+julia> <Alt-m> # going back to previously-active Core via keybinding
+
+(Core) julia>
 ```
 
 Functions that take an optional module argument often defaults to the REPL
diff --git a/stdlib/REPL/src/History/History.jl b/stdlib/REPL/src/History/History.jl
new file mode 100644
index 0000000000000..3a7ff97543688
--- /dev/null
+++ b/stdlib/REPL/src/History/History.jl
@@ -0,0 +1,34 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module History
+
+using ..REPL: REPL
+
+using StyledStrings: @styled_str as @S_str, Face, addface!, face!, annotations, AnnotatedIOBuffer, AnnotatedString, AnnotatedChar
+using JuliaSyntaxHighlighting: highlight
+using Base.Threads
+using Dates
+using InteractiveUtils: clipboard
+
+export HistoryFile, HistEntry, update!, runsearch
+
+const FACES = (
+    :REPL_History_search_separator   => Face(foreground=:blue),
+    :REPL_History_search_prefix      => Face(foreground=:magenta),
+    :REPL_History_search_selected    => Face(foreground=:blue),
+    :REPL_History_search_unselected  => Face(foreground=:grey),
+    # :REPL_History_search_preview_box => Face(foreground=:grey),
+    :REPL_History_search_hint        => Face(foreground=:magenta, slant=:italic, weight=:light),
+    :REPL_History_search_results     => Face(inherit=:shadow),
+    :REPL_History_search_match       => Face(weight = :bold, underline = true),
+)
+
+include("histfile.jl")
+include("resumablefiltering.jl")
+include("prompt.jl")
+include("display.jl")
+include("search.jl")
+
+__init__() = foreach(addface!, FACES)
+
+end
diff --git a/stdlib/REPL/src/History/display.jl b/stdlib/REPL/src/History/display.jl
new file mode 100644
index 0000000000000..54397fa0e0545
--- /dev/null
+++ b/stdlib/REPL/src/History/display.jl
@@ -0,0 +1,812 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+struct SelectorState
+    area::@NamedTuple{height::Int, width::Int}
+    query::String
+    filter::FilterSpec
+    candidates::Vector{HistEntry}
+    scroll::Int
+    selection::@NamedTuple{active::Vector{Int}, gathered::Vector{HistEntry}}
+    hover::Int
+end
+
+SelectorState((height, width), query::String, filter::FilterSpec, candidates::Vector{HistEntry} = HistEntry[], gathered::Vector{HistEntry} = HistEntry[]) =
+    SelectorState((height, width), query, filter, candidates, -length(gathered), (; active = Int[], gathered), 1)
+
+const EMPTY_STATE = SelectorState((0, 0), "", FilterSpec(), [], 0, (active = Int[], gathered = HistEntry[]), 0)
+
+STATES = Pair{SelectorState, SelectorState}[]
+
+const LABELS = (
+    gatherdivider = S"{italic:carried over}",
+    preview_suggestion = S"Ctrl+S to save",
+    help_prompt = S"{REPL_History_search_hint,shadow:try {REPL_History_search_hint,(slant=normal):?} for help} ",
+)
+
+const SYNC_UPDATE_BEGIN = "\eP=1s\e\\"
+const SYNC_UPDATE_END = "\eP=2s\e\\"
+const CLEAR_BELOW = "\e[1G\e[J"
+
+"""
+    redisplay_all(io::IO, oldstate::SelectorState, newstate::SelectorState, pstate::PromptState; buf)
+
+Diff and redraw the entire UI (candidates, preview, prompt).
+
+Uses ANSI sync sequences to update only changed regions between
+`oldstate` and `newstate`, then reprints the prompt.
+"""
+function redisplay_all(io::IO, oldstate::SelectorState, newstate::SelectorState, pstate::REPL.LineEdit.PromptState;
+                       buf::IOContext{IOBuffer} = IOContext(IOBuffer(), io))
+    # Calculate dimensions
+    oldrows = componentrows(oldstate)
+    newrows = componentrows(newstate)
+    # Redisplay components
+    synccap = haskey(Base.current_terminfo(), :Sync)
+    synccap && print(buf, SYNC_UPDATE_BEGIN)
+    currentrow = 0
+    if newstate.query == FILTER_SHORTHELP_QUERY
+        print(buf, CLEAR_BELOW * '\n', FILTER_SHORTHELP)
+        currentrow += 1 + count('\n', String(FILTER_SHORTHELP))
+    elseif newstate.query == FILTER_LONGHELP_QUERY
+        print(buf, CLEAR_BELOW * '\n', FILTER_LONGHELP)
+        currentrow += 1 + count('\n', String(FILTER_LONGHELP))
+    else
+        println(buf) # Move to line under prompt
+        currentrow += 1
+        if oldstate.area.width > newstate.area.width || oldstate.query == FILTER_SHORTHELP_QUERY
+            print(buf, CLEAR_BELOW)
+            oldstate = EMPTY_STATE
+        end
+        refresh_cands = oldstate.query != newstate.query ||
+            length(oldstate.candidates) != length(newstate.candidates) ||
+            oldstate.area != newstate.area ||
+            oldstate.scroll != newstate.scroll ||
+            oldstate.selection.active != newstate.selection.active ||
+            oldstate.hover != newstate.hover ||
+            oldstate.filter != newstate.filter
+        refresh_preview = refresh_cands ||
+            oldstate.selection.gathered != newstate.selection.gathered ||
+            gethover(oldstate) != gethover(newstate)
+        if refresh_cands
+            redisplay_candidates(buf, oldstate, oldrows.candidates, newstate, newrows.candidates)
+            currentrow += newrows.candidates
+        end
+        if refresh_preview
+            if !refresh_cands
+                print(buf, '\n' ^ newrows.candidates)
+                currentrow += newrows.candidates
+            end
+            redisplay_preview(buf, oldstate, oldrows.preview, newstate, newrows.preview)
+            currentrow += max(0, newrows.preview - 1)
+        end
+    end
+    # Restore row pos
+    print(buf, "\e[", currentrow, "A\e[1G")
+    redisplay_prompt(buf, oldstate, newstate, pstate)
+    # Restore column pos
+    print(buf, "\e[", textwidth(PROMPT_TEXT) + position(pstate.input_buffer) + 1, 'G')
+    synccap && print(buf, SYNC_UPDATE_END)
+    if Base.generating_output()
+        # Write output in chunks seems to avoid a hang that happens here during precompilation
+        # of the history on mac (io gets full without anything draining it?)
+        seekstart(buf.io)
+        data = read(buf.io)
+        for chunk in Iterators.partition(data, 32)
+            write(io, chunk)
+            flush(io)
+        end
+    else
+        write(io, seekstart(buf.io))
+    end
+    truncate(buf.io, 0)
+    flush(io)
+end
+
+"""
+    componentrows(state::SelectorState) -> (; candidates::Int, preview::Int)
+
+Split available terminal rows into candidate list and preview panes.
+
+Clamps preview height between one-third and two-thirds of the usable area.
+"""
+function componentrows(state::SelectorState)
+    available_rows = 2 * (state.area.height - 1) ÷ 3 # REVIEW: maybe `min(height, ?)`
+    preview_min, preview_max = available_rows ÷ 3, 2 * available_rows ÷ 3
+    nlines_preview = countlines_selected(state)
+    # To prevent jittering when key-repeat is happening with TAB at
+    # the end of a list of multiple selected candidates, stop
+    # the final candidate from affecting the size of the preview pane.
+    if length(state.selection.active) > 2 &&
+        last(state.selection.active) == lastindex(state.candidates)
+        nlines_preview -= count('\n', state.candidates[end].content) + 1
+    end
+    preview_rows = clamp(nlines_preview, preview_min, preview_max)
+    if preview_min <= 2
+        preview_rows = 0 # Not worth just showing the frame
+    end
+    candidate_rows = available_rows - preview_rows
+    (; candidates = candidate_rows, preview = preview_rows)
+end
+
+"""
+    countlines_selected(state::SelectorState) -> Int
+
+Count display lines needed for active and gathered entries.
+
+Includes one line per entry plus extra lines for multi-line content
+and a divider if any gathered entries exist.
+"""
+function countlines_selected((; candidates, selection)::SelectorState)
+    (; active, gathered) = selection
+    nlines = 0
+    for idx in active
+        entry = candidates[idx]
+        nlines += 1 + count('\n', entry.content)
+    end
+    if !isempty(gathered)
+        nlines += 1 # The divider line
+        for entry in gathered
+            nlines += 1 + count('\n', entry.content)
+        end
+    end
+    nlines
+end
+
+const BASE_MODE = :julia
+
+const MODE_FACES = Dict(
+    :julia => :green,
+    :shell => :red,
+    :pkg => :blue,
+    :help => :yellow,
+)
+
+"""
+    redisplay_prompt(io::IO, oldstate::SelectorState, newstate::SelectorState, pstate::PromptState)
+
+Redraw just the prompt line with updated query, separators, and hints.
+
+Styles prefixes, match-type indicators, and result counts based on cursor position in `pstate`.
+"""
+function redisplay_prompt(io::IO, oldstate::SelectorState, newstate::SelectorState, pstate::REPL.LineEdit.PromptState)
+    # oldstate.query == newstate.query && return
+    hov = gethover(newstate)
+    query = newstate.query
+    styquery = S"$query"
+    styconds = ConditionSet(styquery)
+    qpos = position(pstate.input_buffer)
+    kindname = ""
+    patend = 0
+    for (name, substrs) in (("words", styconds.words),
+                            ("exact", styconds.exacts),
+                            ("negated", styconds.negatives),
+                            ("initialism", styconds.initialisms),
+                            ("regexp", styconds.regexps),
+                            ("fuzzy", styconds.fuzzy),
+                            ("mode", styconds.modes))
+        for substr in substrs
+            start, len = substr.offset, substr.ncodeunits
+            patend = max(patend, start + len)
+            if start > 1
+                if query[start] == FILTER_SEPARATOR
+                    face!(styquery[start:start], :REPL_History_search_separator)
+                else
+                    face!(styquery[start:start], :REPL_History_search_prefix)
+                    face!(styquery[start-1:start-1], :REPL_History_search_separator)
+                end
+            elseif start > 0
+                face!(styquery[start:start],
+                      if query[start] == FILTER_SEPARATOR
+                          :REPL_History_search_separator
+                      else
+                          :REPL_History_search_prefix
+                      end)
+            end
+            isempty(kindname) || continue
+            if start <= qpos <= start + len
+                kindname = name
+                break
+            end
+        end
+    end
+    if patend < ncodeunits(query)
+        if query[patend+1] == FILTER_SEPARATOR
+            face!(styquery[patend+1:patend+1], :REPL_History_search_separator)
+            if patend + 1 < ncodeunits(query) && query[patend+2] ∈ FILTER_PREFIXES
+                face!(styquery[patend+2:patend+2], :REPL_History_search_prefix)
+            elseif isempty(kindname)
+                kindname = "separator"
+            end
+        elseif ncodeunits(query) == 1 && query[1] ∈ FILTER_PREFIXES
+            face!(styquery[1:1], :REPL_History_search_prefix)
+        end
+    end
+    prefix = S"{bold:▪:} "
+    ncand = length(newstate.candidates)
+    resultnum = S"{REPL_History_search_results:[$(ncand - newstate.hover + 1)/$ncand]}"
+    padspaces = newstate.area.width - sum(textwidth, (prefix, styquery, resultnum))
+    suffix = if isempty(styquery)
+        LABELS.help_prompt
+    elseif newstate.query ∈ (FILTER_SHORTHELP_QUERY, FILTER_LONGHELP_QUERY)
+        S"{REPL_History_search_hint:help} "
+    elseif kindname != ""
+        S"{REPL_History_search_hint:$kindname} "
+    else
+        S""
+    end
+    if textwidth(suffix) < padspaces
+        padspaces -= textwidth(suffix)
+    else
+        suffix = S""
+    end
+    # TODO: Replace with a face-based approach when possible
+    print(io, pstate.p.prompt_prefix, prefix, "\e[0m",
+          styquery, ' ' ^ max(0, padspaces), suffix, resultnum)
+end
+
+# Unicode circles:
+# - large: ● ○
+# - medium: ⏺🞉🞈🞇🞆🞅⚬🞊⦿⦾
+# - small: •⋅∙∘◦
+# - dots: 🞄⁃·
+
+const LIST_MARKERS = if Sys.isapple()
+    # '🞇' is not available by default, and '⬤' is oversized, so we must compromise.
+    (selected = AnnotatedChar('⏺', [(:face, :REPL_History_search_selected)]),
+     hover = AnnotatedChar('⦿', [(:face, :REPL_History_search_selected)]),
+     unselected = AnnotatedChar('◦', [(:face, :REPL_History_search_unselected)]),
+     pending = AnnotatedChar('·', [(:face, :shadow)]))
+else
+    # Linux tends to have pretty fantastic OOTB Unicode support, with fonts
+    # like Symbola installed by default, so we can go for the best symbols.
+    (selected = AnnotatedChar('⬤', [(:face, :REPL_History_search_selected)]),
+     hover = AnnotatedChar('🞇', [(:face, :REPL_History_search_selected)]),
+     unselected = AnnotatedChar('◦', [(:face, :REPL_History_search_unselected)]),
+     pending = AnnotatedChar('🞄', [(:face, :shadow)]))
+end
+
+const NEWLINE_MARKER = S"{shadow:↩ }"
+const LINE_ELLIPSIS = S"{shadow:…}"
+
+"""
+    hoveridx(state::SelectorState) -> Int
+
+Compute the signed index into `candidates` or `gathered` for hover.
+
+Positive values index `candidates`, negative values index `gathered`, zero is
+invalid.
+"""
+function hoveridx(state::SelectorState)
+    if state.hover > 0
+        length(state.candidates) - state.hover + 1
+    else
+        state.hover
+    end
+end
+
+"""
+    ishover(state::SelectorState, idx::Int) -> Bool
+
+Return true if `idx` matches the current hover position.
+
+Used to highlight the hovered line in the UI.
+"""
+ishover(state::SelectorState, idx::Int) = idx == hoveridx(state)
+
+"""
+    gethover(state::SelectorState) -> Union{HistEntry, Nothing}
+
+Return the `HistEntry` under the cursor (hover position), or `nothing`.
+
+Handles positive hover for `candidates` and negative for `gathered`.
+"""
+function gethover(state::SelectorState)
+    idx = hoveridx(state)
+    if idx ∈ axes(state.candidates, 1)
+        state.candidates[idx]
+    elseif idx < 0 && -idx ∈ axes(state.selection.gathered, 1)
+        state.selection.gathered[-idx]
+    end
+end
+
+struct CandsState{V<:AbstractVector{HistEntry}}
+    search::FilterSpec
+    entries::V
+    selected::Vector{Int}
+    hover::Int
+    rows::Int
+    width::Int
+end
+
+
+"""
+    candidates(state::SelectorState, rows::Int) -> (; active::CandsState, gathered::CandsState)
+
+Compute visible slices of active and gathered entries for display.
+"""
+function candidates(state::SelectorState, rows::Int)
+    gathshift = 0
+    gathcount = clamp(-state.scroll, 0, length(state.selection.gathered))
+    if gathcount >= rows
+        gathshift = gathcount - rows + 1
+        gathcount = rows - 1
+    end
+    actcount = rows - gathcount - sign(gathcount)
+    offset = max(0, length(state.candidates) - actcount - max(0, state.scroll))
+    candend = offset + actcount
+    actcands = @view state.candidates[max(begin, begin+offset):min(end, candend)]
+    actempty = actcount - length(actcands)
+    actsel = Int[idx - offset for idx in state.selection.active]
+    if !isempty(state.selection.gathered)
+        append!(actsel, filter!(!isnothing, indexin(state.selection.gathered, actcands)))
+    end
+    active = CandsState(
+        state.filter,
+        actcands,
+        actsel,
+        rows + state.scroll - state.hover - actempty + gathshift + (state.scroll >= 0),
+        actcount,
+        state.area.width)
+    gathcands = @view state.selection.gathered[begin+gathshift:min(end, gathshift+gathcount)]
+    gathered = CandsState(
+        state.filter,
+        gathcands,
+        collect(axes(gathcands, 1)),
+        -state.hover - gathshift,
+        gathcount,
+        state.area.width)
+    (; active, gathered)
+end
+
+"""
+    redisplay_candidates(io::IO, oldstate::SelectorState, oldrows::Int, newstate::SelectorState, newrows::Int)
+
+Diff and redraw the candidate list pane between two states.
+
+Only lines that changed (entry text, selection, hover, width) are reprinted;
+unchanged lines remain.
+"""
+function redisplay_candidates(io::IO, oldstate::SelectorState, oldrows::Int, newstate::SelectorState, newrows::Int)
+    danglingdivider = false
+    if oldstate.scroll < 0 && newstate.scroll == 0
+        newrows -= 1
+        danglingdivider = true
+    end
+    oldcands = candidates(oldstate, oldrows)
+    newcands = candidates(newstate, newrows)
+    samefilter = oldstate.filter == newstate.filter
+    # Redisplay active candidates
+    update_candidates(io, oldcands.active, newcands.active,
+                      !samefilter || oldstate.scroll == 0 && !isempty(oldstate.selection.gathered))
+    # Redisplay gathered candidates
+    gathchange = oldrows != newrows || length(oldcands.gathered.entries) != length(newcands.gathered.entries)
+    if isempty(newcands.gathered.entries) && !danglingdivider
+    elseif gathchange || danglingdivider || oldstate.area != newstate.area
+        netlines = newstate.area.width - textwidth(LABELS.gatherdivider) - 6
+        leftlines = netlines ÷ 2
+        rightlines = netlines - leftlines
+        println(io, S" {shadow:╶$('─' ^ leftlines)╴$(LABELS.gatherdivider)╶$('─' ^ rightlines)╴} ")
+    else
+        println(io)
+    end
+    update_candidates(io, oldcands.gathered, newcands.gathered, gathchange != 0)
+end
+
+"""
+    update_candidates(io::IO, oldcands::CandsState, newcands::CandsState, force::Bool = false)
+
+Write an update to `io` that changes the display from `oldcands` to `newcands`.
+
+Only changes are printed, and exactly `length(newcands.entries)` lines are printed.
+"""
+function update_candidates(io::IO, oldcands::CandsState, newcands::CandsState, force::Bool = false)
+    thisline = 1
+    for (i, (old, new)) in enumerate(zip(oldcands.entries, newcands.entries))
+        oldsel, newsel = i ∈ oldcands.selected, i ∈ newcands.selected
+        oldhov, newhov = i == oldcands.hover, i == newcands.hover
+        if !force && old == new && oldsel == newsel && oldhov == newhov && oldcands.width == newcands.width
+            println(io)
+        else
+            print_candidate(io, newcands.search, new, newcands.width;
+                            selected = newsel, hover = newhov)
+        end
+        thisline = i + 1
+    end
+    for (i, new) in enumerate(newcands.entries)
+        i <= length(oldcands.entries) && continue
+        print_candidate(io, newcands.search, new, newcands.width;
+                        selected = i ∈ newcands.selected,
+                        hover = i == newcands.hover)
+        thisline = i + 1
+    end
+    for _ in thisline:newcands.rows
+        print(io, "\e[K ", LIST_MARKERS.pending, '\n')
+    end
+end
+
+const DURATIONS = (
+    m = 60,
+    h = 60 * 60,
+    d = 24 * 60 * 60,
+    w = 7 * 24 * 60 * 60,
+    y = 365 * 24 * 60 * 60,
+)
+
+"""
+    humanage(seconds::Integer) -> String
+
+Convert `seconds` into a compact age string with largest unit.
+
+```julia-repl
+julia> humanage(70)
+"1m"
+
+julia> humanage(4000)
+"1h"
+```
+"""
+function humanage(seconds::Integer)
+    unit, count = :s, seconds
+    for (dunit, dsecs) in pairs(DURATIONS)
+        n = seconds ÷ dsecs
+        n == 0 && break
+        unit, count = dunit, n
+    end
+    "$count$unit"
+end
+
+"""
+    print_candidate(io::IO, search::FilterSpec, cand::HistEntry, width::Int; selected::Bool, hover::Bool)
+
+Render one history entry line with markers, mode hint, age, and highlighted content.
+
+Truncates and focuses on matches to fit `width`.
+"""
+function print_candidate(io::IO, search::FilterSpec, cand::HistEntry, width::Int; selected::Bool, hover::Bool)
+    print(io, ' ', if selected
+              LIST_MARKERS.selected
+          elseif hover
+              LIST_MARKERS.hover
+          else
+              LIST_MARKERS.unselected
+          end, ' ')
+    age = humanage(floor(Int, ((now(UTC) - cand.date)::Millisecond).value ÷ 1000))
+    agedec = S" {shadow,light,italic:$age}"
+    modehint = if cand.mode == BASE_MODE
+        S""
+    else
+        modeface = get(MODE_FACES, cand.mode, :grey)
+        if hover
+            S"{region: {bold,inverse,$modeface: $(cand.mode) }}"
+        elseif ncodeunits(age) == 2
+            S" {$modeface:◼}  "
+        else
+            S" {$modeface:◼} "
+        end
+    end
+    decorationlen = 3 #= spc + marker + spc =# + textwidth(modehint) + textwidth(agedec) + 1 #= spc =#
+    flatcand = replace(highlightcand(cand), r"\r?\n\s*" => NEWLINE_MARKER)
+    candstr = focus_matches(search, flatcand, width - decorationlen)
+    if hover
+        face!(candstr, :region)
+        face!(agedec, :region)
+    end
+    println(io, candstr, modehint, agedec, ' ')
+end
+
+"""
+    highlightcand(cand::HistEntry) -> AnnotatedString
+
+Syntax-highlight Julia content or return raw content otherwise.
+"""
+function highlightcand(cand::HistEntry)
+    if cand.mode === :julia
+        highlight(cand.content)
+    else
+        S"$(cand.content)"
+    end
+end
+
+"""
+    focus_matches(search::FilterSpec, content::AnnotatedString, targetwidth::Int) -> AnnotatedString
+
+Center and trim `content` around matching regions, adding ellipses.
+
+To best display matches, this function operates in multiple stages:
+1. Find all matching character ranges in `content` via `matchregions(search, String(content))`.
+2. Choose a primary match region that can be fully shown within `targetwidth`,
+   preferring the first match.
+3. Starting from the end of that region, expand a window leftwards up to
+   `targetwidth`, accounting for character widths.
+4. If the left bound exceeds the start of `content`, reserve space for a leading
+   ellipsis (`LINE_ELLIPSIS`) and adjust the window.
+5. Expand the window rightwards similarly, inserting a trailing ellipsis if
+   there is remaining text.
+6. Slice out the computed substring from `content`, preserving existing annotations.
+7. Re-apply the match highlight face (`:REPL_History_search_match`) to any
+   regions within the window.
+8. Pad the result with spaces if its width is less than `targetwidth`.
+
+The returned `AnnotatedString` is exactly `targetwidth` columns wide,
+guaranteeing at least one full match is visible and highlighted.
+"""
+function focus_matches(search::FilterSpec, content::AnnotatedString{String}, targetwidth::Int)
+    cstr = String(content) # zero-cost
+    mregions = matchregions(search, cstr)
+    isempty(mregions) && return rpad(rtruncate(content, targetwidth, LINE_ELLIPSIS), targetwidth)
+    mstart = first(first(mregions))
+    mlast = first(mregions)
+    ellipwidth = textwidth(LINE_ELLIPSIS)
+    # Assume approximately one cell per character, and refine later
+    for (i, region) in Iterators.reverse(enumerate(mregions))
+        if first(region) - mstart <= targetwidth - 2 * ellipwidth
+            mlast = region
+            break
+        end
+    end
+    # Start at the end of the last region, and extend backwards `targetwidth` characters
+    left, right = let pos = thisind(cstr, last(mlast)); (pos, pos) end
+    width = textwidth(cstr[left])
+    while left > firstindex(cstr)
+        lnext = prevind(cstr, left)
+        lwidth = textwidth(cstr[lnext])
+        if width + lwidth > targetwidth - 2 * ellipwidth
+            break
+        end
+        width += lwidth
+        left = lnext
+    end
+    # Check to see if we have reached the beginning of the first match,
+    # if we haven't we want to shrink the region to the left until the
+    # beginning of the first match is reached.
+    if left > first(mstart)
+        while left > first(mstart)
+            left = prevind(cstr, left)
+            lwidth = textwidth(cstr[left])
+            width += lwidth
+            # We'll move according to the assumption that each character
+            # is one cell wide, but account for the width correctly and
+            # adjust for any underestimate later.
+            for _ in 1:lwidth
+                width -= textwidth(cstr[right])
+                right = prevind(cstr, right)
+                right == left && break
+            end
+        end
+    end
+    isltrunc, isrtrunc = left > firstindex(cstr), right < lastindex(cstr)
+    # Use any available space to extend to the left.
+    if width < targetwidth - (isltrunc + isrtrunc) * ellipwidth && left < firstindex(cstr)
+        while left < firstindex(cstr)
+            lnext = prevind(cstr, left)
+            lwidth = textwidth(cstr[lnext])
+            isnextltrunc = lnext > firstindex(cstr)
+            nellipsis = isnextltrunc + isrtrunc
+            if width + lwidth > targetwidth - nellipsis * ellipwidth
+                break
+            end
+            width += lwidth
+            left = lnext
+        end
+        isltrunc = left > firstindex(cstr)
+    end
+    # Use any available space to extend to the right.
+    if width < targetwidth - (isltrunc + isrtrunc) * ellipwidth && right < lastindex(cstr)
+        while right < lastindex(cstr)
+            rnext = nextind(cstr, right)
+            rwidth = textwidth(cstr[rnext])
+            isnextrtrunc = rnext < lastindex(cstr)
+            nellipsis = isltrunc + isnextrtrunc
+            if width + rwidth > targetwidth - nellipsis * ellipwidth
+                break
+            end
+            width += rwidth
+            right = rnext
+        end
+    end
+    # Construct the new region
+    regstr = AnnotatedString(content[left:right])
+    # Emphasise matches
+    for region in mregions
+        (last(region) < left || first(region) > right) && continue
+        adjregion = (max(left, first(region)) - left + 1):(min(right, last(region)) - left + 1)
+        face!(regstr, adjregion, :REPL_History_search_match)
+    end
+    # Add ellipses
+    ellipstr = if left > firstindex(cstr) && right < lastindex(cstr)
+        width += 2 * ellipwidth
+        LINE_ELLIPSIS * regstr * LINE_ELLIPSIS
+    elseif left > firstindex(cstr)
+        width += ellipwidth
+        LINE_ELLIPSIS * regstr
+    elseif right < lastindex(cstr)
+        width += ellipwidth
+        regstr * LINE_ELLIPSIS
+    else
+        regstr
+    end
+    # Pad (if necessary)
+    if width < targetwidth
+        rpad(ellipstr, targetwidth)
+    else
+        ellipstr
+    end
+end
+
+"""
+    redisplay_preview(io::IO, oldstate::SelectorState, oldrows::Int, newstate::SelectorState, newrows::Int)
+
+Diff and redraw the preview pane (right side) with boxed content.
+
+Shows hover or gathered entries in a box.
+"""
+function redisplay_preview(io::IO, oldstate::SelectorState, oldrows::Int, newstate::SelectorState, newrows::Int)
+    newrows == 0 && return
+    function getcand(state::SelectorState, idx::Int)
+        if idx ∈ axes(state.candidates, 1)
+            state.candidates[idx]
+        elseif -idx ∈ axes(state.selection.gathered, 1)
+            state.selection.gathered[-idx]
+        else
+            throw(ArgumentError("Invalid candidate index: $idx")) # Should never happen
+        end
+    end
+    function getselidxs(state::SelectorState)
+        idxs = collect(-1:-1:-length(state.selection.gathered))
+        append!(idxs, state.selection.active)
+        sort!(idxs, by = i -> getcand(state, i).index)
+    end
+    rtruncpad(s::AbstractString, width::Int) =
+        rpad(rtruncate(s, width, LINE_ELLIPSIS), width)
+    bar = S"{shadow:│}"
+    innerwidth = newstate.area.width - 2
+    if oldstate.area != newstate.area || (oldstate.area.height - oldrows) != (newstate.area.height - newrows)
+        println(io, S"{shadow:╭$('─' ^ innerwidth)╮}")
+    else
+        println(io)
+    end
+    if newrows - 2 < 1
+        # Well, this is awkward.
+    elseif isempty(newstate.selection.active) && isempty(newstate.selection.gathered)
+        linesprinted = if (gethover(newstate) != gethover(oldstate) ||
+            oldstate.area != newstate.area ||
+            oldrows != newrows ||
+            oldstate.filter != newstate.filter)
+            hovcand = gethover(newstate)
+            if !isnothing(hovcand)
+                hovcontent = highlightcand(hovcand)
+                for region in matchregions(newstate.filter, String(hovcontent))
+                    face!(hovcontent[region], :REPL_History_search_match)
+                end
+                if hovcand.mode !== BASE_MODE
+                    mcolor = get(MODE_FACES, hovcand.mode, :grey)
+                    hovcontent = S"{bold,$mcolor:$(hovcand.mode)>} " * hovcontent
+                end
+                boxedcontent(io, hovcontent, newstate.area.width, newrows - 2)
+            else
+                0
+            end
+        else
+            print(io, '\n' ^ (newrows - 2))
+            newrows - 2
+        end
+        for _ in (linesprinted + 1):(newrows - 2)
+            println(io, bar, ' '^innerwidth, bar)
+        end
+    else
+        linesprinted = 0
+        seltexts = AnnotatedString{String}[]
+        for idx in getselidxs(newstate)
+            entry = getcand(newstate, idx)
+            content = highlightcand(entry)
+            ishover(newstate, idx) && face!(content, :region)
+            push!(seltexts, content)
+        end
+        linecount = sum(t -> 1 + count('\n', String(t)), seltexts, init=0)
+        for (i, content) in enumerate(seltexts)
+            clines = 1 + count('\n', String(content))
+            if linesprinted + clines < newrows - 2 || (i == length(seltexts) && linesprinted + clines == newrows - 2)
+                for line in eachsplit(content, '\n')
+                    println(io, bar, ' ', rtruncpad(line, innerwidth - 2), ' ', bar)
+                end
+                linesprinted += clines
+            else
+                remaininglines = newrows - 2 - linesprinted
+                for (i, line) in enumerate(eachsplit(content, '\n'))
+                    i == remaininglines && break
+                    println(io, bar, ' ', rtruncpad(line, innerwidth - 2), ' ', bar)
+                end
+                msg = S"{julia_comment:⋮ {italic:$(linecount - newrows + 3) lines hidden}}"
+                println(io, bar, ' ', rtruncpad(msg, innerwidth - 2), ' ', bar)
+                linesprinted += remaininglines
+                break
+            end
+        end
+        for _ in (linesprinted + 1):(newrows - 2)
+            println(io, bar, ' ' ^ innerwidth, bar)
+        end
+    end
+    if oldstate.area != newstate.area || length(oldstate.selection.active) != length(newstate.selection.active)
+        if textwidth(LABELS.preview_suggestion) < innerwidth
+            line = '─' ^ (innerwidth - textwidth(LABELS.preview_suggestion) - 2)
+            print(io, S"{shadow:╰$(line)╴$(LABELS.preview_suggestion)╶╯}")
+        else
+            print(io, S"{shadow:╰$('─' ^ innerwidth)╯}")
+        end
+    end
+end
+
+"""
+    boxedcontent(io::IO, content::AnnotatedString, width::Int, maxlines::Int) -> Int
+
+Draw `content` inside a Unicode box, wrapping or truncating to `width` and `maxlines`.
+
+Returns the number of printed lines.
+"""
+function boxedcontent(io::IO, content::AnnotatedString{String}, width::Int, maxlines::Int)
+    function breaklines(content::AnnotatedString{String}, maxwidth::Int)
+        textwidth(content) <= maxwidth && return [content]
+        spans = AnnotatedString{String}[]
+        basestr = String(content) # Because of expensive char iteration
+        start, pos, linewidth = 1, 0, 0
+        for char in basestr
+            linewidth += textwidth(char)
+            pos = nextind(basestr, pos)
+            if linewidth > maxwidth
+                spans = push!(spans, AnnotatedString(content[start:prevind(basestr, pos)]))
+                start = pos
+                linewidth = textwidth(char)
+            end
+        end
+        if start <= length(basestr)
+            spans = push!(spans, AnnotatedString(content[start:end]))
+        end
+        spans
+    end
+    left, right = S"{shadow:│} ", S" {shadow:│}"
+    leftcont, rightcont = S"{shadow:┊▸}", S"{shadow:◂┊}"
+    if maxlines == 1
+        println(io, left,
+                rpad(rtruncate(content, width - 4, LINE_ELLIPSIS), width - 4),
+                right)
+        return 1
+    end
+    printedlines = 0
+    if ncodeunits(content) > (width * maxlines)
+        content = AnnotatedString(rtruncate(content, width * maxlines, ' '))
+    end
+    lines = split(content, '\n')
+    innerwidth = width - 4
+    for (i, line) in enumerate(lines)
+        printedlines >= maxlines && break
+        if textwidth(line) <= innerwidth
+            println(io, left, rpad(line, innerwidth), right)
+            printedlines += 1
+            continue
+        end
+        plainline = String(line)
+        indent, ichars = 0, 1
+        while isspace(plainline[ichars])
+            indent += textwidth(plainline[ichars])
+            ichars = nextind(plainline, ichars)
+        end
+        line = @view line[ichars:end]
+        spans = breaklines(AnnotatedString(line), innerwidth - 2 - indent)
+        for (i, span) in enumerate(spans)
+            prefix, suffix = if i == 1
+                S"", S"$LINE_ELLIPSIS "
+            elseif i == length(spans)
+                S"$LINE_ELLIPSIS", S" "
+            else
+                LINE_ELLIPSIS, LINE_ELLIPSIS
+            end
+            printedlines += 1
+            println(io, ifelse(i == 1, left, leftcont), ' ' ^ indent,
+                    prefix, rpad(span, innerwidth - 2 - indent), suffix,
+                    ifelse(i == length(spans) || printedlines == maxlines,
+                           right, rightcont))
+            printedlines >= maxlines && break
+        end
+    end
+    printedlines
+end
diff --git a/stdlib/REPL/src/History/histfile.jl b/stdlib/REPL/src/History/histfile.jl
new file mode 100644
index 0000000000000..b62dfeae504e2
--- /dev/null
+++ b/stdlib/REPL/src/History/histfile.jl
@@ -0,0 +1,288 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    REPL_DATE_FORMAT
+
+The `DateFormat` used to parse and format timestamps in the REPL history file.
+"""
+const REPL_DATE_FORMAT = dateformat"yyyy-mm-dd HH:MM:SS"
+
+const HIST_OPEN_FLAGS =
+    Base.Filesystem.JL_O_APPEND |
+    Base.Filesystem.JL_O_RDWR |
+    Base.Filesystem.JL_O_CREAT |
+    Base.Filesystem.JL_O_CLOEXEC
+
+struct HistEntry
+    mode::Symbol
+    date::DateTime
+    # cwd::String
+    content::String
+    # resulttype::String
+    # session::UInt64
+    index::UInt32
+    # sindex::UInt16
+    # error::Bool
+end
+
+"""
+    HistoryFile(path::String) -> HistoryFile
+
+Create a handle to the history file at `path`, and store the `HistEntry` records.
+
+See also: `update!(::HistoryFile)`.
+"""
+struct HistoryFile <: AbstractVector{HistEntry}
+    path::String
+    file::Base.Filesystem.File
+    lock::ReentrantLock
+    records::Vector{HistEntry}
+end
+
+HistoryFile(path::String) = HistoryFile(
+    path, Base.Filesystem.open(path, HIST_OPEN_FLAGS, 0o640), ReentrantLock(), [])
+
+function HistoryFile()
+    nofile = Base.Filesystem.File(Base.Filesystem.INVALID_OS_HANDLE)
+    nofile.open = false
+    HistoryFile("", nofile, ReentrantLock(), [])
+end
+
+Base.lock(hist::HistoryFile) = lock(hist.lock)
+Base.trylock(hist::HistoryFile) = trylock(hist.lock)
+Base.unlock(hist::HistoryFile) = unlock(hist.lock)
+
+Base.size(hist::HistoryFile) = @lock hist (length(hist.records),)
+Base.getindex(hist::HistoryFile, i::Int) = hist.records[i]
+
+function ensureopen(hist::HistoryFile)
+    isopen(hist.file) && return true
+    isempty(hist.path) && return false
+    try
+        lock(hist)
+        newfile = Base.Filesystem.open(hist.path, HIST_OPEN_FLAGS, 0o640)
+        newfile.open || return false
+        hist.file.handle = newfile.handle
+        hist.file.open = true
+    finally
+        unlock(hist)
+    end
+end
+
+Base.close(hist::HistoryFile) = close(hist.file)
+
+"""
+    update!(hist::HistoryFile) -> HistoryFile
+
+Read any new entries from the history file and record them as `HistEntry`s.
+
+Malformed entries are skipped, and if the last entry is incomplete the IO
+position will be reset to the start of the entry.
+"""
+function update!(hist::HistoryFile)
+    (; file, records) = hist
+    # If the file has grown since the last read,
+    # we need to trigger a synchronisation of the
+    # stream state. This can be done with `fseek`,
+    # but that can't easily be called from Julia.
+    # Instead, we can use `filesize` to detect when
+    # we need to do this, and then use `peek` to
+    # trigger the synchronisation. This relies on
+    # undocumented implementation details, but
+    # there's not much to be done about that.
+    ensureopen(hist) || return hist
+    offset = position(file)
+    offset == filesize(file) && return hist
+    try
+        lock(hist)
+        bytes = read(file)
+        function findnext(data::Vector{UInt8}, index::Int, byte::UInt8, limit::Int = length(data))
+            for i in index:limit
+                data[i] == byte && return i
+            end
+            limit
+        end
+        function isstrmatch(data::Vector{UInt8}, at::Int, str::String)
+            at + ncodeunits(str) <= length(data) || return false
+            for (i, byte) in enumerate(codeunits(str))
+                data[at + i - 1] == byte || return false
+            end
+            true
+        end
+        histindex = if isempty(hist.records)
+            0
+        else
+            hist.records[end].index
+        end
+        pos = firstindex(bytes)
+        while true
+            pos >= length(bytes) && break
+            entrystart = pos
+            if bytes[pos] != UInt8('#')
+                @warn S"Malformed history entry: expected meta-line starting with {success:'#'} at byte {emphasis:$(offset + pos - 1)} in \
+                       {(underline=grey),link=$(Base.Filesystem.uripath(hist.path)):$(contractuser(hist.path))}, but found \
+                       {error:$(sprint(show, Char(bytes[pos])))} instead" _id=:invalid_history_entry maxlog=3 _file=nothing _line=nothing
+                pos = findnext(bytes, pos, UInt8('\n')) + 1
+                continue
+            end
+            time, mode = zero(DateTime), :julia
+            while pos < length(bytes) && bytes[pos] == UInt8('#')
+                pos += 1
+                while pos < length(bytes) && bytes[pos] == UInt8(' ')
+                    pos += 1
+                end
+                metastart = pos
+                metaend = findnext(bytes, pos, UInt8(':'))
+                pos = metaend + 1
+                while pos < length(bytes) && bytes[pos] == UInt8(' ')
+                    pos += 1
+                end
+                valstart = pos
+                valend = findnext(bytes, pos, UInt8('\n'))
+                pos = valend + 1
+                if isstrmatch(bytes, metastart, "mode:")
+                    mode = if isstrmatch(bytes, valstart, "julia") && bytes[valstart + ncodeunits("julia")] ∈ (UInt8('\n'), UInt8('\r'))
+                        :julia
+                    elseif isstrmatch(bytes, valstart, "help") && bytes[valstart + ncodeunits("help")] ∈ (UInt8('\n'), UInt8('\r'))
+                        :help
+                    elseif all(>(0x5a), view(bytes, valstart:valend-1))
+                        Symbol(bytes[valstart:valend-1])
+                    else
+                        Symbol(lowercase(String(bytes[valstart:valend-1])))
+                    end
+                elseif isstrmatch(bytes, metastart, "time:")
+                    valend = min(valend, valstart + ncodeunits("0000-00-00 00:00:00"))
+                    timestr = String(bytes[valstart:valend-1]) # It would be nice to avoid the string, but oh well
+                    timeval = tryparse(DateTime, timestr, REPL_DATE_FORMAT)
+                    if !isnothing(timeval)
+                        time = timeval
+                    end
+                end
+            end
+            if pos >= length(bytes)
+                # Potentially incomplete entry; roll back to start
+                seek(file, offset + entrystart - 1)
+                break
+            elseif bytes[pos] == UInt8(' ')
+                @warn S"Malformed history content: expected line to start with {success:'\\t'} at byte {emphasis:$(offset + pos - 1)} in \
+                        {(underline=grey),link=$(Base.Filesystem.uripath(hist.path)):$(contractuser(hist.path))}, but found \
+                        space ({error:' '}) instead. A text editor may have converted tabs to spaces in the \
+                        history file." _id=:invalid_history_content_spc maxlog=1 _file=nothing _line=nothing
+                continue
+            elseif bytes[pos] != UInt8('\t')
+                @warn S"Malformed history content: expected line to start with {success:'\\t'} at byte {emphasis:$(offset + pos - 1)} in \
+                        {(underline=grey),link=$(Base.Filesystem.uripath(hist.path)):$(contractuser(hist.path))}, but found \
+                        {error:$(sprint(show, Char(bytes[pos])))} instead" _id=:invalid_history_content maxlog=3 _file=nothing _line=nothing
+                continue
+            end
+            contentstart = pos
+            nlines = 0
+            while true
+                pos = findnext(bytes, pos, UInt8('\n'))
+                nlines += 1
+                if pos < length(bytes) && bytes[pos+1] == UInt8('\t')
+                    pos += 1
+                else
+                    break
+                end
+            end
+            contentend, pos = pos, contentstart
+            content = Vector{UInt8}(undef, contentend - contentstart - nlines)
+            bytescopied = 0
+            while pos < contentend
+                lineend = findnext(bytes, pos, UInt8('\n'))
+                nbytes = lineend - pos - (lineend == contentend)
+                copyto!(content, bytescopied + 1, bytes, pos + 1, nbytes)
+                bytescopied += nbytes
+                pos = lineend + 1
+            end
+            entry = HistEntry(mode, time, String(content), histindex += 1)
+            push!(records, entry)
+        end
+        seek(file, offset + pos - 1)
+    finally
+        unlock(hist)
+    end
+    hist
+end
+
+function Base.push!(hist::HistoryFile, entry::HistEntry)
+    try
+        lock(hist)
+        update!(hist)
+        entry = HistEntry(
+            if all(islowercase, String(entry.mode))
+                entry.mode
+            else
+                Symbol(lowercase(String(entry.mode)))
+            end,
+            round(entry.date, Dates.Second),
+            entry.content,
+            length(hist.records) + 1)
+        push!(hist.records, entry)
+        isopen(hist.file) || return hist
+        content = IOBuffer()
+        write(content, "# time: ",
+              Dates.format(entry.date, REPL_DATE_FORMAT), "Z\n",
+              "# mode: ", String(entry.mode), '\n')
+        replace(content, entry.content, r"^"ms => "\t")
+        write(content, '\n')
+        # Short version:
+        #
+        # Libuv supports opening files with an atomic append flag,
+        # and so if we pass the entire new entry to `uv_fs_write`
+        # with an offset of `-1`, the OS will ensure that the write
+        # is atomic. There are some caveats around this, but there's
+        # no silver bullet.
+        #
+        # Long version:
+        #
+        # Normally, we would need to make sure we've got unique access to the file,
+        # however because we opened it with `O_APPEND` the OS (as of POSIX.1-2017, and on:
+        # Linux/FreeBSD/Darwin/Windows) guarantees that concurrent writes will not tear.
+        #
+        # This requires that a single `write` call be used to write the entire new entry.
+        # This is not obvious, but if you look at `base/filesystem.jl` we can see that
+        # the `unsafe_write` call below is turned into a `uv_fs_write` call.
+        # Following this to `src/jl_uv.c` we can see this quickly turns into a `uv_fs_write`
+        # call, which will produce a `uv__fs_write_all` call, and then calls `uv__fs_write`
+        # in a loop until everything is written.
+        #
+        # This loop seems like it might allow writes to be interleaved, but since
+        # we know that `nbufs = 1` and `off = -1` (from the parameters set in `unsafe_write`
+        # and `jl_uv_write`), we can see that `uv__fs_write` will call the `write`
+        # syscall directly, and so we get the `O_APPEND` semantics guaranteed by the OS.
+        #
+        # POSIX does mention that `write` may write less bytes than it is asked to,
+        # but only when either:
+        # 1. There is insufficient space on the device, or
+        # 2. The size of the write exceeds `RLIMIT_FSIZE`, or
+        # 3. The call is interrupted by a signal handler.
+        #
+        # Any of these would cause issues regardless.
+        #
+        # Over in Windows-land, `FILE_APPEND_DATA` has been around for a while (and is used
+        # by libuv), and from reading `win/fs.c` we can see that a similar approach is taken
+        # using `WriteFile` calls. Before Windows 10 (on NTFS), v10.0.14393 update atomicity
+        # could be as small as 1 byte, but after that testing indicates that writes through
+        # to 1MB are written in a single operation. Given that this is not an upper limit,
+        # and it would be quite an extraordinary REPL entry, this seem safe enough.
+        #
+        # While in theory a split write may occur, in practice this seems exceptionally rare
+        # (near non-existent), and the previous pidfile locking approach is no silver bullet
+        # either, with its own set of "reasonable assumptions" like:
+        # 1. PIDs not being rapidly recycled
+        # 2. No process being able to delete and write a file faster than another
+        #    process can do the same
+        # 3. The PID number itself being written in one shot (see the above lack of
+        #    formal guarantees around `write`, which also applies here)
+        #
+        # All in all, relying on kernel inode locking with `O_APPEND` and whole writes
+        # seems like the sanest approach overall. Mutual exclusion isn't the priority
+        # here, safe appending is.
+        unsafe_write(hist.file, pointer(content.data), position(content) % UInt, Int64(-1))
+    finally
+        unlock(hist)
+    end
+    hist
+end
diff --git a/stdlib/REPL/src/History/prompt.jl b/stdlib/REPL/src/History/prompt.jl
new file mode 100644
index 0000000000000..78b87e7868344
--- /dev/null
+++ b/stdlib/REPL/src/History/prompt.jl
@@ -0,0 +1,165 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+const PROMPT_TEXT = "▪: "
+
+struct Event <: Function
+    info::Channel{Symbol}
+    name::Symbol
+end
+
+function (e::Event)(_...)
+    push!(e.info, e.name)
+    :ignore
+end
+
+"""
+    select_keymap(events::Channel{Symbol})
+
+Build a REPL.LineEdit keymap that pushes symbols into `events`.
+
+Binds arrows, page keys, Tab, Ctrl-C/D/S, and meta-< / > to
+`Event` or `Returns` actions for driving the prompt loop.
+"""
+function select_keymap(events::Channel{Symbol})
+    REPL.LineEdit.keymap([
+        Dict{Any, Any}(
+            # Up Arrow
+            "\e[A" => Event(events, :up),
+            "^P" => Event(events, :up),
+            # Down Arrow
+            "\e[B" => Event(events, :down),
+            "^N" => Event(events, :down),
+            # Tab
+            '\t' => Event(events, :tab),
+            # Page up
+            "\e[5~" => Event(events, :pageup),
+            "\ev" => Event(events, :pageup),
+            # Page down
+            "\e[6~" => Event(events, :pagedown),
+            "^V" => Event(events, :pagedown),
+            # Meta + < / >
+            "\e<" => Event(events, :jumpfirst),
+            "\e>" => Event(events, :jumplast),
+            #
+            "^L" => Event(events, :clear),
+            # Exits
+            "^C" => Returns(:abort),
+            "^D" => Returns(:abort),
+            "^G" => Returns(:abort),
+            "\e\e" => Returns(:abort),
+            "^S" => Returns(:save),
+            "^Y" => Returns(:copy),
+        ),
+        REPL.LineEdit.default_keymap,
+        REPL.LineEdit.escape_defaults])
+end
+
+"""
+    create_prompt(events::Channel{Symbol}, term)
+
+Initialize a custom REPL prompt tied to `events` using the existing `term`.
+
+Returns a tuple `(term, prompt, istate, pstate)` ready for
+input handling and display.
+"""
+function create_prompt(events::Channel{Symbol}, term, prefix::String = "\e[90m")
+    prompt = REPL.LineEdit.Prompt(
+        PROMPT_TEXT, # prompt
+        prefix, "\e[0m", # prompt_prefix, prompt_suffix
+        "", "", "", # output_prefix, output_prefix_prefix, output_prefix_suffix
+        select_keymap(events), # keymap_dict
+        nothing, # repl
+        REPL.LatexCompletions(), # complete
+        _ -> true, # on_enter
+        () -> nothing, # on_done
+        REPL.LineEdit.EmptyHistoryProvider(), # hist
+        false, # sticky
+        REPL.StylingPasses.StylingPass[]) # styling_passes
+    interface = REPL.LineEdit.ModalInterface([prompt])
+    istate = REPL.LineEdit.init_state(term, interface)
+    pstate = istate.mode_state[prompt]
+    (; term, prompt, istate, pstate)
+end
+
+"""
+    runprompt!((; term,prompt,pstate,istate), events::Channel{Symbol})
+
+Drive the prompt input loop until confirm, save, or abort.
+
+Emits `:edit`, `:confirm`, `:save`, or `:abort` into `events`,
+manages raw mode and bracketed paste, and cleans up on exit.
+"""
+function runprompt!((; term, prompt, pstate, istate), events::Channel{Symbol})
+    Base.reseteof(term)
+    REPL.LineEdit.raw!(term, true)
+    REPL.LineEdit.enable_bracketed_paste(term)
+    try
+        pstate.ias = REPL.LineEdit.InputAreaState(0, 0)
+        REPL.LineEdit.refresh_multi_line(term, pstate)
+        while true
+            kmap = REPL.LineEdit.keymap(pstate, prompt)
+            matchfn = REPL.LineEdit.match_input(kmap, istate)
+            kdata = REPL.LineEdit.keymap_data(pstate, prompt)
+            status = matchfn(istate, kdata)
+            if status === :ok
+                push!(events, :edit)
+            elseif status === :ignore
+                istate.last_action = istate.current_action
+            elseif status === :done
+                print("\e[F")
+                push!(events, :confirm)
+                break
+            elseif status === :copy
+                print("\e[1G\e[J")
+                push!(events, status)
+                break
+            elseif status === :save
+                print("\e[1G\e[J")
+                dest = savedest(term)
+                if isnothing(dest)
+                    push!(events, :redraw)
+                else
+                    push!(events, dest)
+                    break
+                end
+            else
+                push!(events, :abort)
+                break
+            end
+        end
+    finally
+        REPL.LineEdit.raw!(term, false) &&
+            REPL.LineEdit.disable_bracketed_paste(term)
+    end
+end
+
+function savedest(term::Base.Terminals.TTYTerminal)
+    out = term.out_stream
+    print(out, "\e[1G\e[J")
+    clipsave = true
+    try
+        print(out, get(Base.current_terminfo(), :cursor_invisible, ""))
+        fclip, ffile = [:emphasis, :bold], [:grey]
+        char = '\0'
+        while true
+            print(out, S"\e[1G\e[2K{bold,grey:history>} {bold,emphasis:save to} {$fclip,inverse: Clipboard } {$ffile,inverse: File }   {shadow:Tab to toggle ⋅ ⏎ to select}")
+            ichar = read(term.in_stream, Char)
+            if ichar ∈ ('\x03', '\x18', '\a') || char == ichar == '\e'
+                return
+            elseif ichar == '\r'
+                break
+            end
+            char = ichar
+            fclip, ffile = ffile, fclip
+            clipsave = !clipsave
+        end
+    finally
+        # NOTE: While it may look like `:cursor_visible` would be the
+        # appropriate choice to reverse `:cursor_invisible`, unfortunately
+        # tmux-256color declares a sequence that doesn't actually make
+        # the cursor become visible again 😑.
+        print(out, get(Base.current_terminfo(), :cursor_normal, ""))
+        print(out, "\e[1G\e[2K")
+    end
+    if clipsave; :copy else :filesave end
+end
diff --git a/stdlib/REPL/src/History/resumablefiltering.jl b/stdlib/REPL/src/History/resumablefiltering.jl
new file mode 100644
index 0000000000000..8d1f377266a2e
--- /dev/null
+++ b/stdlib/REPL/src/History/resumablefiltering.jl
@@ -0,0 +1,351 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+struct ConditionSet{S}
+    words::Vector{SubString{S}}
+    exacts::Vector{SubString{S}}
+    negatives::Vector{SubString{S}}
+    initialisms::Vector{SubString{S}}
+    fuzzy::Vector{SubString{S}}
+    regexps::Vector{SubString{S}}
+    modes::Vector{SubString{S}}
+end
+
+ConditionSet{S}() where {S} = ConditionSet{S}([], [], [], [], [], [], [])
+
+"""
+    FILTER_SEPARATOR
+
+Character used to separate multiple search conditions in a single query.
+"""
+const FILTER_SEPARATOR = ';'
+
+"""
+    FILTER_PREFIXES
+
+List of single-character prefixes that set search modes.
+"""
+const FILTER_PREFIXES = ('!', '`', '=', '/', '~')
+
+"""
+    FILTER_SHORTHELP_QUERY
+
+The special single-character query that triggers display of `FILTER_SHORTHELP`.
+"""
+const FILTER_SHORTHELP_QUERY = "?"
+
+"""
+    FILTER_LONGHELP_QUERY
+
+The special query that triggers display of `FILTER_LONGHELP`.
+"""
+const FILTER_LONGHELP_QUERY = "??"
+
+"""
+    FILTER_SHORTHELP
+
+Annotated help text displayed when the user enters the help query (`$FILTER_SHORTHELP_QUERY`).
+"""
+const FILTER_SHORTHELP = S"""
+ {bold,magenta:Interactive history search}
+
+ Enter a search term at the prompt, and see matching candidates.
+ A search term that is {italic:just} '{REPL_History_search_prefix:?}' brings up this help page.
+
+ See more information on behaviour and keybindings with '{REPL_History_search_prefix:??}'.
+
+ By default, each word in the search string is looked for in any order.
+ Should the search string start with {REPL_History_search_prefix:xyz>}, then only xyz-mode entries are considered.
+
+ Different search modes are available via prefixes, as follows:
+ {emphasis:•} {REPL_History_search_prefix:=} looks for exact matches
+ {emphasis:•} {REPL_History_search_prefix:!} {italic:excludes} exact matches
+ {emphasis:•} {REPL_History_search_prefix:/} performs a regexp search
+ {emphasis:•} {REPL_History_search_prefix:~} looks for fuzzy matches
+ {emphasis:•} {REPL_History_search_prefix:`} looks for an initialism (text with matching initials)
+
+ You can also apply multiple restrictions with the separator '{REPL_History_search_separator:$FILTER_SEPARATOR}'.
+
+ For example, {region:{REPL_History_search_prefix:/}^foo{REPL_History_search_separator:$FILTER_SEPARATOR}\
+{REPL_History_search_prefix:`}bar{REPL_History_search_separator:$FILTER_SEPARATOR}\
+{REPL_History_search_prefix:shell>}} will look for history entries that start with "{code:foo}",
+ contains "{code:b... a... r...}", {italic:and} are a shell history entry.
+"""
+
+const FILTER_LONGHELP = S"""
+ {bold,magenta:Interactive history search — behaviour and keybindings}
+
+ Search your REPL history interactively by constructing filters.
+
+ With no mode specified (see the basic help with '{REPL_History_search_prefix:?}'), entries are matched
+ if they contain all of the words in the search string, in any order.
+
+ If the entire search string is lowercase, the search is case-insensitive.
+
+ If you want to include the filter separator '{REPL_History_search_separator:$FILTER_SEPARATOR}' in a query, or start
+ a words filter with a prefix character, you may escape it with a backslash (e.g. {code:\\;}).
+
+ Search results can be navigated with:
+ {emphasis:•} {code:↑}, {code:Ctrl+P}, or {code:Ctrl+K} to move up
+ {emphasis:•} {code:↓}, {code:Ctrl+N}, or {code:Ctrl+J} to move down
+ {emphasis:•} {code:PageUp} or {code:Ctrl+B} to page up
+ {emphasis:•} {code:PageDown} or {code:Ctrl+F} to page down
+ {emphasis:•} {code:Alt+<} to jump to the first result
+ {emphasis:•} {code:Alt+>} to jump to the last result
+
+ Multiple search results can be selected with {code:Tab} and confirmed with {code:Enter}.
+ You may use {code:Ctrl+S} to save selected entries to a file or the clipboard.
+
+ To abort the search, use {code:Ctrl+C}, {code:Ctrl+D}, {code:Ctrl+G}, or {code:Esc Esc}.
+"""
+
+"""
+    ConditionSet(spec::AbstractString) -> ConditionSet
+
+Parse the raw search string `spec` into a `ConditionSet`.
+
+Parsing is performed by splitting on unescaped `FILTER_SEPARATOR` and
+dispatching each segment according to its leading prefix character.
+"""
+function ConditionSet(spec::S) where {S <: AbstractString}
+    function addcond!(condset::ConditionSet, cond::SubString)
+        isempty(cond) && return
+        kind = first(cond)
+        if kind ∈ ('!', '=', '`', '/', '~')
+            value = @view cond[2:end]
+            if kind ∈ ('`', '~')
+                value = strip(value)
+            elseif !all(isspace, value)
+                value = if kind == '/'
+                    rstrip(value)
+                else # kind ∈ ('!', '=')
+                    strip(value)
+                end
+            end
+            isempty(value) && return
+            if startswith(cond, '!')
+                push!(condset.negatives, value)
+            elseif startswith(cond, '=')
+                push!(condset.exacts, value)
+            elseif startswith(cond, '`')
+                push!(condset.initialisms, value)
+            elseif startswith(cond, '/')
+                push!(condset.regexps, value)
+            elseif startswith(cond, '~')
+                push!(condset.fuzzy, value)
+            end
+        else
+            if startswith(cond, '\\') && !(length(cond) > 1 && cond[2] == '\\')
+                cond = @view cond[2:end]
+            else
+                rang = something(findfirst('>', cond), typemax(Int))
+                if rang == something(findfirst(isspace, cond), ncodeunits(cond) + 1) - 1
+                    mode = @view cond[1:prevind(cond, rang)]
+                    push!(condset.modes, SubString(lowercase(mode)))
+                    cond = @view cond[rang + 1:end]
+                end
+            end
+            cond = strip(cond)
+            isempty(cond) && return
+            push!(condset.words, cond)
+        end
+        nothing
+    end
+    cset = ConditionSet{S}()
+    pos = firstindex(spec)
+    mark = pos
+    lastind = lastindex(spec)
+    escaped = false
+    dropbytes = Int[]
+    while pos <= lastind
+        chr = spec[pos]
+        if escaped
+            chr == FILTER_SEPARATOR && push!(dropbytes, pos - mark)
+            escaped = false
+        elseif chr == '\\'
+            escaped = true
+        elseif chr == FILTER_SEPARATOR
+            str = if isempty(dropbytes)
+                SubString(spec, mark:prevind(spec, pos))
+            else
+                subbytes = deleteat!(codeunits(spec)[mark:pos-1], dropbytes)
+                empty!(dropbytes)
+                SubString(convert(S, String(subbytes)))
+            end
+            addcond!(cset, lstrip(str))
+            mark = pos + 1
+        end
+        pos = nextind(spec, pos)
+    end
+    if mark <= lastind
+        str = if isempty(dropbytes)
+            SubString(spec, mark)
+        else
+            subbytes = deleteat!(codeunits(spec)[mark:end], dropbytes)
+            empty!(dropbytes)
+            SubString(convert(S, String(subbytes)))
+        end
+        addcond!(cset, lstrip(str))
+    end
+    cset
+end
+
+"""
+    ismorestrict(a::ConditionSet, b::ConditionSet) -> Bool
+
+Whether `a` is at least as restrictive as `b`, across all conditions.
+"""
+function ismorestrict(a::ConditionSet, b::ConditionSet)
+    length(a.fuzzy) == length(b.fuzzy) &&
+        all(splat(==), zip(a.fuzzy, b.fuzzy)) || return false
+    length(a.regexps) == length(b.regexps) &&
+        all(splat(==), zip(a.regexps, b.regexps)) || return false
+    length(a.modes) == length(b.modes) &&
+        all(splat(==), zip(a.modes, b.modes)) || return false
+    length(a.exacts) >= length(b.exacts) &&
+        all(splat(occursin), zip(b.exacts, a.exacts)) || return false
+    length(a.words) >= length(b.words) &&
+        all(splat(occursin), zip(b.words, a.words)) || return false
+    length(a.negatives) >= length(b.negatives) &&
+        all(splat(occursin), zip(a.negatives, b.negatives)) || return false
+    length(a.initialisms) >= length(b.initialisms) &&
+        all(splat(occursin), zip(b.initialisms, a.initialisms)) || return false
+    true
+end
+
+struct FilterSpec
+    exacts::Vector{String}
+    negatives::Vector{String}
+    regexps::Vector{Regex}
+    modes::Vector{Symbol}
+end
+
+FilterSpec() = FilterSpec([], [], [], [])
+
+function FilterSpec(cset::ConditionSet)
+    spec = FilterSpec([], [], [], [])
+    for term in cset.exacts
+        push!(spec.exacts, String(term))
+    end
+    for words in cset.words
+        casesensitive = any(isuppercase, words)
+        for word in eachsplit(words)
+            if casesensitive
+                push!(spec.exacts, String(word))
+            else
+                push!(spec.regexps, Regex(string("\\Q", word, "\\E"), "i"))
+            end
+        end
+    end
+    for term in cset.negatives
+        push!(spec.negatives, String(term))
+    end
+    for rx in cset.regexps
+        try
+            push!(spec.regexps, Regex(rx))
+        catch _
+            # Regex error, skip
+        end
+    end
+    for itlsm in cset.initialisms
+        rx = Regex(join((string("(?:(?:\\b|_+)(?:\\Q", ltr, "\\E|\\Q", uppercase(ltr),
+                                "\\E)\\w+|\\p{Ll}\\Q", uppercase(ltr), "\\E)")
+                         for ltr in itlsm), "[\\W_]*?"))
+        push!(spec.regexps, rx)
+    end
+    for fuzz in cset.fuzzy
+        for word in eachsplit(fuzz)
+            rx = Regex(join((string("\\Q", ltr, "\\E") for ltr in word), "[^\\s\"#%&()*+,\\-\\/:;<=>?@[\\]^`{|}~]*?"),
+                       ifelse(any(isuppercase, fuzz), "", "i"))
+            push!(spec.regexps, rx)
+        end
+    end
+    for mode in cset.modes
+        push!(spec.modes, Symbol(mode))
+    end
+    spec
+end
+
+
+"""
+    filterchunkrev!(out, candidates, spec, seen, idx; maxtime, maxresults) -> Int
+
+Incrementally filter `candidates[1:idx]` in reverse order.
+
+Pushes matches onto `out` until either `maxtime` is exceeded or `maxresults`
+collected, then returns the new resume index. Only unique entries (by mode and content)
+are added to avoid showing duplicate history items.
+"""
+function filterchunkrev!(out::Vector{HistEntry}, candidates::DenseVector{HistEntry},
+                         spec::FilterSpec, seen::Set{Tuple{Symbol,String}}, idx::Int = length(candidates);
+                         maxtime::Float64 = Inf, maxresults::Int = length(candidates))
+    batchsize = clamp(length(candidates) ÷ 512, 10, 1000)
+    for batch in Iterators.partition(idx:-1:1, batchsize)
+        time() > maxtime && break
+        for outer idx in batch
+            entry = candidates[idx]
+            if (entry.mode, entry.content) ∈ seen
+                continue
+            end
+            if !isempty(spec.modes)
+                entry.mode ∈ spec.modes || continue
+            end
+            matchfail = false
+            for text in spec.exacts
+                if !occursin(text, entry.content)
+                    matchfail = true
+                    break
+                end
+            end
+            matchfail && continue
+            for text in spec.negatives
+                if occursin(text, entry.content)
+                    matchfail = true
+                    break
+                end
+            end
+            matchfail && continue
+            for rx in spec.regexps
+                if !occursin(rx, entry.content)
+                    matchfail = true
+                    break
+                end
+            end
+            matchfail && continue
+            push!(seen, (entry.mode, entry.content))
+            pushfirst!(out, entry)
+            length(out) == maxresults && break
+        end
+    end
+    max(0, idx - 1)
+end
+
+"""
+    matchregions(spec::FilterSpec, candidate::AbstractString) -> Vector{UnitRange{Int}}
+
+Find all matching character ranges in `candidate` for `spec`.
+
+Collects exact-substring and regex matches, then returns them
+sorted by start index (and longer matches first).
+"""
+function matchregions(spec::FilterSpec, candidate::AbstractString)
+    matches = UnitRange{Int}[]
+    for text in spec.exacts
+        append!(matches, findall(text, candidate))
+    end
+    for rx in spec.regexps
+        for (; match) in eachmatch(rx, candidate)
+            push!(matches, 1+match.offset:thisind(candidate, match.offset + match.ncodeunits))
+        end
+    end
+    sort!(matches, by = m -> (first(m), -last(m)))
+    # Combine adjacent matches separated by a single space
+    for (i, match) in enumerate(matches)
+        i == length(matches) && break
+        nextmatch = matches[i + 1]
+        if last(match) + 1 == first(nextmatch) - 1 && candidate[last(match)+1] == ' '
+            matches[i] = first(match):last(nextmatch)
+            matches[i+1] = last(nextmatch)+1:last(nextmatch)
+        end
+    end
+    filter!(!isempty, matches)
+end
diff --git a/stdlib/REPL/src/History/search.jl b/stdlib/REPL/src/History/search.jl
new file mode 100644
index 0000000000000..a8351cc17157b
--- /dev/null
+++ b/stdlib/REPL/src/History/search.jl
@@ -0,0 +1,388 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    runsearch() -> (; mode::Union{Symbol, Nothing}, text::String)
+
+Launch the interactive REPL history search interface.
+
+Spawns prompt and display tasks, waits for user confirm or abort,
+and returns the final selection (if any).
+"""
+function runsearch(histfile::HistoryFile, term, prefix::String = "\e[90m")
+    update!(histfile)
+    events = Channel{Symbol}(Inf)
+    pspec = create_prompt(events, term, prefix)
+    ptask = @spawn runprompt!(pspec, events)
+    dtask = @spawn run_display!(pspec, events, histfile.records)
+    wait(ptask)
+    fullselection(fetch(dtask))
+end
+
+"""
+    fullselection(state::SelectorState) -> (; mode::Symbol, text::String)
+
+Gather all selected and hovered entries and return them as joined text.
+"""
+function fullselection(state::SelectorState)
+    text = IOBuffer()
+    entries = copy(state.selection.gathered)
+    for act in state.selection.active
+        push!(entries, state.candidates[act])
+    end
+    if isempty(entries) && state.hover ∈ axes(state.candidates, 1)
+        push!(entries, state.candidates[end-state.hover+1])
+    end
+    sort!(entries, by = e -> e.index)
+    mainmode = if !isempty(entries) first(entries).mode end
+    join(text, Iterators.map(e -> e.content, entries), '\n')
+    (mode = mainmode, text = String(take!(text)))
+end
+
+"""
+    run_display!((; term,pstate), events::Channel{Symbol}, hist::Vector{HistEntry})
+
+Drive the display event loop until confirm or abort.
+
+Listens for navigation, edits, save, and abort events, re-filters history
+incrementally, and re-renders via `redisplay_all`.
+"""
+function run_display!((; term, pstate), events::Channel{Symbol}, hist::Vector{HistEntry})
+    # Output-related variables
+    out = term.out_stream
+    outsize = displaysize(out)
+    buf = IOContext(IOBuffer(), out)
+    # Main state variables
+    state = SelectorState(outsize, "", FilterSpec(), hist)
+    redisplay_all(out, EMPTY_STATE, state, pstate; buf)
+    # Candidate cache
+    cands_cache = Pair{ConditionSet{String}, Vector{HistEntry}}[]
+    cands_cachestate = zero(UInt8)
+    cands_current = HistEntry[]
+    cands_cond = ConditionSet{String}()
+    cands_temp = HistEntry[]
+    # Filter state
+    filter_idx = 0
+    filter_seen = Set{Tuple{Symbol,String}}()
+    # Event loop
+    while true
+        event = @lock events if !isempty(events) take!(events) end
+        if isnothing(event)
+        elseif event === :abort
+            print(out, "\e[1G\e[J")
+            return EMPTY_STATE
+        elseif event === :confirm
+            print(out, "\e[1G\e[J")
+            return state
+        elseif event === :clear
+            print(out, "\e[H\e[2J")
+            redisplay_all(out, EMPTY_STATE, state, pstate; buf)
+            continue
+        elseif event === :redraw
+            print(out, "\e[1G\e[J")
+            redisplay_all(out, EMPTY_STATE, state, pstate; buf)
+            continue
+        elseif event ∈ (:up, :down, :pageup, :pagedown)
+            prevstate, state = state, movehover(state, event ∈ (:up, :pageup), event ∈ (:pageup, :pagedown))
+            @lock events begin
+                nextevent = if !isempty(events) first(events.data) end
+                while nextevent ∈ (:up, :down, :pageup, :pagedown)
+                    take!(events)
+                    state = movehover(state, nextevent ∈ (:up, :pageup), event ∈ (:pageup, :pagedown))
+                    nextevent = if !isempty(events) first(events.data) end
+                end
+            end
+            redisplay_all(out, prevstate, state, pstate; buf)
+            continue
+        elseif event === :jumpfirst
+            prevstate = state
+            state = SelectorState(
+                state.area, state.query, state.filter, state.candidates,
+                length(state.candidates) - componentrows(state).candidates,
+                state.selection, length(state.candidates))
+            redisplay_all(out, prevstate, state, pstate; buf)
+            continue
+        elseif event === :jumplast
+            prevstate = state
+            state = SelectorState(
+                state.area, state.query, state.filter, state.candidates,
+                0, state.selection, 1)
+            redisplay_all(out, prevstate, state, pstate; buf)
+            continue
+        elseif event === :tab
+            prevstate, state = state, toggleselection(state)
+            redisplay_all(out, prevstate, state, pstate; buf)
+            continue
+        elseif event === :edit
+            @lock events begin
+                while !isempty(events) && first(events.data) === :edit
+                    take!(events)
+                end
+            end
+            query = REPL.LineEdit.input_string(pstate)
+            if query === state.query
+                redisplay_all(out, state, state, pstate; buf)
+                continue
+            end
+            # Determine the conditions/filter spec
+            cands_cond = ConditionSet(query)
+            filter_spec = FilterSpec(cands_cond)
+            # Construct a provisional new state
+            prevstate, state = state, SelectorState(
+                outsize, query, filter_spec, HistEntry[], state.selection.gathered)
+            # Gather selected candidates
+            if !isempty(prevstate.selection.active)
+                for act in prevstate.selection.active
+                    push!(state.selection.gathered, prevstate.candidates[act])
+                end
+                sort!(state.selection.gathered, by = e -> e.index)
+                state = SelectorState(
+                    state.area, state.query, state.filter, state.candidates,
+                    -min(length(state.selection.gathered), state.area.height ÷ 8),
+                    state.selection, 1)
+            end
+            # Show help?
+            if query ∈ (FILTER_SHORTHELP_QUERY,FILTER_LONGHELP_QUERY)
+                redisplay_all(out, prevstate, state, pstate; buf)
+                continue
+            end
+            # Parse the conditions and find a good candidate list
+            cands_current = hist
+            for (cond, cands) in Iterators.reverse(cands_cache)
+                if ismorestrict(cands_cond, cond)
+                    cands_current = cands
+                    break
+                end
+            end
+            # Start filtering candidates
+            # Only deduplicate when user has entered a search query. When browsing
+            # with no filter (empty query), show all history including duplicates.
+            if isempty(filter_spec.exacts) && isempty(filter_spec.negatives) &&
+               isempty(filter_spec.regexps) && isempty(filter_spec.modes)
+                # No filtering needed, just copy all candidates
+                append!(state.candidates, cands_current)
+                filter_idx = 0
+            else
+                # Filtering needed, deduplicate results
+                empty!(filter_seen)
+                filter_idx = filterchunkrev!(
+                    state, cands_current, filter_seen;
+                    maxtime = time() + 0.01,
+                    maxresults = outsize[1])
+            end
+            if filter_idx == 0
+                cands_cachestate = addcache!(
+                    cands_cache, cands_cachestate, cands_cond => state.candidates)
+            end
+            redisplay_all(out, prevstate, state, pstate; buf)
+            continue
+        elseif event === :copy
+            content = strip(fullselection(state).text)
+            isempty(content) || saveclipboard(term.out_stream, content)
+            return EMPTY_STATE
+        elseif event === :filesave
+            content = strip(fullselection(state).text)
+            isempty(content) || savefile(term, content)
+            return EMPTY_STATE
+        else
+            error("Unknown event: $event")
+        end
+        if displaysize(out) != outsize
+            outsize = displaysize(out)
+            prevstate, state = state, SelectorState(
+                outsize, state.query, state.filter, state.candidates,
+                state.scroll, state.selection, state.hover)
+            redisplay_all(out, prevstate, state, pstate; buf)
+        elseif filter_idx != 0
+            append!(empty!(cands_temp), state.candidates)
+            prevstate = SelectorState(
+                state.area, state.query, state.filter, cands_temp,
+                state.scroll, state.selection, state.hover)
+            filter_idx = filterchunkrev!(
+                state, cands_current, filter_seen, filter_idx;
+                maxtime = time() + 0.01)
+            if filter_idx == 0
+                cands_cachestate = addcache!(
+                    cands_cache, cands_cachestate, cands_cond => state.candidates)
+            end
+            # If there are now new candidates in the view, update
+            length(state.candidates) != length(prevstate.candidates) &&
+                length(prevstate.candidates) - state.hover < outsize[1] &&
+                redisplay_all(out, prevstate, state, pstate; buf)
+        elseif isnothing(event)
+            yield()
+            sleep(0.01)
+        end
+    end
+end
+
+function filterchunkrev!(state::SelectorState, candidates::DenseVector{HistEntry},
+                         seen::Set{Tuple{Symbol,String}}, idx::Int = length(candidates);
+                         maxtime::Float64 = Inf, maxresults::Int = length(candidates))
+    oldlen = length(state.candidates)
+    idx = filterchunkrev!(state.candidates, candidates, state.filter, seen, idx;
+                          maxtime = maxtime, maxresults = maxresults)
+    newlen = length(state.candidates)
+    newcands = view(state.candidates, (oldlen + 1):newlen)
+    gfound = Int[]
+    for (i, g) in enumerate(state.selection.gathered)
+        cind = searchsorted(newcands, g, by = e -> e.index)
+        isempty(cind) && continue
+        push!(state.selection.active, oldlen + first(cind))
+        push!(gfound, i)
+    end
+    isempty(gfound) || deleteat!(state.selection.gathered, gfound)
+    idx
+end
+
+"""
+    movehover(state::SelectorState, backwards::Bool, page::Bool)
+
+Move the hover cursor in `state` by one row or one page.
+
+The direction and size of the move is determined by `backwards` and `page`.
+"""
+function movehover(state::SelectorState, backwards::Bool, page::Bool)
+    candrows = componentrows(state).candidates
+    shift = ifelse(backwards, 1, -1) * ifelse(page, max(1, candrows - 1), 1)
+    # We need to adjust for the existence of the gathered selection,
+    # and the division line depending on whether it will still be
+    # visible after the move.
+    if !isempty(state.selection.gathered) && state.scroll < 0 &&
+        state.hover + shift + state.scroll <= candrows
+        candrows -= 1
+        shift -= page
+    end
+    ngathered = length(state.selection.gathered)
+    if page && state.scroll < 0 && state.hover < shift
+        shift -= min(-state.scroll, ngathered) - 2 * (state.hover == -1)
+    end
+    newhover = state.hover + shift
+    # This looks a little funky because we want to produce a particular
+    # behaviour when crossing between the active and gathered selection, namely
+    # we want to ensure it always takes an explicit step to go from one section
+    # to another and skip over 0 as an invalid position.
+    newhover = if sign(newhover) == sign(state.hover) || (abs(state.hover) == 1 && newhover != 0)
+        clamp(newhover, -ngathered + iszero(ngathered), max(1, length(state.candidates)))
+    elseif ngathered == 0
+        1
+    elseif newhover == 0
+        -sign(state.hover)
+    else
+        sign(state.hover)
+    end
+    newscroll = clamp(state.scroll,
+                      max(-ngathered, newhover - candrows + (ngathered >= candrows)),
+                      newhover - (newhover >= 0))
+    SelectorState(
+        state.area, state.query, state.filter, state.candidates,
+        newscroll, state.selection, newhover)
+end
+
+"""
+    toggleselection(state::SelectorState)
+
+Vary the selection of the current candidate (selected by hover) in `state`.
+"""
+function toggleselection(state::SelectorState)
+    newselection = if state.hover > 0
+        hoveridx = length(state.candidates) - state.hover + 1
+        hoveridx ∈ axes(state.candidates, 1) || return state
+        activecopy = copy(state.selection.active)
+        selsearch = searchsorted(activecopy, hoveridx)
+        if isempty(selsearch)
+            insert!(activecopy, first(selsearch), hoveridx)
+        else
+            elt = activecopy[selsearch]
+            gidx = findfirst(==(elt), state.selection.gathered)
+            isnothing(gidx) || deleteat!(state.selection.gathered, gidx)
+            deleteat!(activecopy, first(selsearch))
+        end
+        (active = activecopy, gathered = state.selection.gathered)
+    elseif state.hover < 0
+        -state.hover ∈ axes(state.selection.gathered, 1) || return state
+        gatheredcopy = copy(state.selection.gathered)
+        deleteat!(gatheredcopy, -state.hover)
+        (active = state.selection.active, gathered = gatheredcopy)
+    else
+        return state
+    end
+    newstate = SelectorState(
+        state.area, state.query, state.filter, state.candidates,
+        state.scroll, newselection, state.hover)
+    movehover(newstate, false, false)
+end
+
+"""
+    addcache!(cache::Vector{T}, state::Unsigned, new::T)
+
+Add `new` to the log-structured `cache` according to `state`.
+
+The lifetime of `new` is exponentially decaying, it has a `1` in `2^(k-1)`
+chance of reaching the `k`-th position in the cache.
+
+The cache can hold as many items as the number of bits in `state` (e.g. 8 for `UInt8`).
+"""
+function addcache!(cache::Vector{T}, state::Unsigned, new::T) where {T}
+    maxsize = sizeof(state) * 8
+    nextstate = state + one(state)
+    shift = state ⊻ nextstate
+    uninitialised = maxsize - length(cache)
+    if Base.leading_zeros(nextstate) < uninitialised
+        push!(cache, new)
+        return nextstate
+    end
+    for b in 1:(maxsize - 1)
+        iszero(shift & (0x1 << (maxsize - b))) && continue
+        cache[b - uninitialised] = cache[b - uninitialised + 1]
+    end
+    cache[end] = new
+    nextstate
+end
+
+"""
+    savefile(term::Base.Terminals.TTYTerminal, content::AbstractString)
+
+Prompt the user to save `content` to a file path, and record the action.
+"""
+function savefile(term::Base.Terminals.TTYTerminal, content::AbstractString)
+    out = term.out_stream
+    nlines = count('\n', content) + 1
+    print(out, S"\e[1G\e[2K{grey,bold:history>} {bold,emphasis:save file: }")
+    filename = try
+        readline(term.in_stream)
+    catch err
+        if err isa InterruptException
+            ""
+        else
+            rethrow()
+        end
+    end
+    if isempty(filename)
+        println(out, S"\e[F\e[2K{light,grey:{bold:history>} {red:×} History selection aborted}\n")
+        return
+    end
+    open(filename, "w") do io
+        seekend(io)
+        if !iszero(position(io))
+            seek(io, position(io) - 1)
+            lastchar = read(io, UInt8)
+            seekend(io)
+            lastchar == UInt8('\n') || write(io, '\n')
+        end
+        write(io, content, '\n')
+    end
+    println(out, S"\e[F\e[2K{grey,bold:history>} {shadow:Wrote $nlines selected \
+                   $(ifelse(nlines == 1, \"line\", \"lines\")) to {underline,link=$(abspath(filename)):$filename}}\n")
+end
+
+"""
+    saveclipboard(term::Base.Terminals.TTYTerminal, content::AbstractString)
+
+Save `content` to the clipboard and record the action.
+"""
+function saveclipboard(msgio::IO, content::AbstractString)
+    nlines = count('\n', content) + 1
+    clipboard(content)
+    println(msgio, S"\e[1G\e[2K{grey,bold:history>} {shadow:Copied $nlines \
+                     $(ifelse(nlines == 1, \"line\", \"lines\")) to clipboard}\n")
+end
diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl
index ff67e849fcc5a..1ea3044264a0e 100644
--- a/stdlib/REPL/src/LineEdit.jl
+++ b/stdlib/REPL/src/LineEdit.jl
@@ -3,11 +3,14 @@
 module LineEdit
 
 import ..REPL
-using REPL: AbstractREPL, Options
+using ..REPL: AbstractREPL, Options
+using ..REPL.StylingPasses: StylingPass, SyntaxHighlightPass, RegionHighlightPass, EnclosingParenHighlightPass, StylingContext, apply_styling_passes, merge_annotations
+using ..REPL: histsearch
 
 using ..Terminals
-import ..Terminals: raw!, width, height, cmove, getX,
-                       getY, clear_line, beep
+import ..Terminals: raw!, width, height, clear_line, beep
+
+using StyledStrings
 
 import Base: ensureroom, show, AnyDict, position
 using Base: something
@@ -59,6 +62,7 @@ mutable struct Prompt <: TextInterface
     on_done::Function
     hist::HistoryProvider  # TODO?: rename this `hp` (consistency with other TextInterfaces), or is the type-assert useful for mode(s)?
     sticky::Bool
+    styling_passes::Vector{StylingPass}  # Styling passes to apply to input
 end
 
 show(io::IO, x::Prompt) = show(io, string("Prompt(\"", prompt_string(x.prompt), "\",...)"))
@@ -67,6 +71,7 @@ show(io::IO, x::Prompt) = show(io, string("Prompt(\"", prompt_string(x.prompt),
 mutable struct MIState
     interface::ModalInterface
     active_module::Module
+    previous_active_module::Module
     current_mode::TextInterface
     aborted::Bool
     mode_state::IdDict{TextInterface,ModeState}
@@ -76,9 +81,13 @@ mutable struct MIState
     key_repeats::Int
     last_action::Symbol
     current_action::Symbol
+    async_channel::Channel{Function}
+    line_modify_lock::Base.ReentrantLock
+    hint_generation_lock::Base.ReentrantLock
+    n_keys_pressed::Int
 end
 
-MIState(i, mod, c, a, m) = MIState(i, mod, c, a, m, String[], 0, Char[], 0, :none, :none)
+MIState(i, mod, c, a, m) = MIState(i, mod, mod, c, a, m, String[], 0, Char[], 0, :none, :none, Channel{Function}(), Base.ReentrantLock(), Base.ReentrantLock(), 0)
 
 const BufferLike = Union{MIState,ModeState,IOBuffer}
 const State = Union{MIState,ModeState}
@@ -97,6 +106,7 @@ mutable struct PromptState <: ModeState
     p::Prompt
     input_buffer::IOBuffer
     region_active::Symbol # :shift or :mark or :off
+    hint::Union{String,Nothing}
     undo_buffers::Vector{IOBuffer}
     undo_idx::Int
     ias::InputAreaState
@@ -164,7 +174,7 @@ region_active(s::PromptState) = s.region_active
 region_active(s::ModeState) = :off
 
 
-input_string(s::PromptState) = String(take!(copy(s.input_buffer)))
+input_string(s::PromptState) = takestring!(copy(s.input_buffer))::String
 
 input_string_newlines(s::PromptState) = count(c->(c == '\n'), input_string(s))
 function input_string_newlines_aftercursor(s::PromptState)
@@ -179,11 +189,22 @@ struct EmptyHistoryProvider <: HistoryProvider end
 
 reset_state(::EmptyHistoryProvider) = nothing
 
-complete_line(c::EmptyCompletionProvider, s) = String[], "", true
+# Before, completions were always given as strings. But at least for backslash
+# completions, it's nice to see what glyphs are available in the completion preview.
+# To separate between what's shown in the preview list of possible matches, and what's
+# actually completed, we introduce this struct.
+struct NamedCompletion
+    completion::String # what is actually completed, for example "\trianglecdot"
+    name::String # what is displayed in lists of possible completions, for example "◬ \trianglecdot"
+end
+
+NamedCompletion(completion::String) = NamedCompletion(completion, completion)
+
+complete_line(c::EmptyCompletionProvider, s; hint::Bool=false) = NamedCompletion[], "", true
 
 # complete_line can be specialized for only two arguments, when the active module
 # doesn't matter (e.g. Pkg does this)
-complete_line(c::CompletionProvider, s, ::Module) = complete_line(c, s)
+complete_line(c::CompletionProvider, s, ::Module; hint::Bool=false) = complete_line(c, s; hint)
 
 terminal(s::IO) = s
 terminal(s::PromptState) = s.terminal
@@ -306,6 +327,7 @@ end
 
 set_action!(s, command::Symbol) = nothing
 
+common_prefix(completions::Vector{NamedCompletion}) = common_prefix(map(x -> x.completion, completions))
 function common_prefix(completions::Vector{String})
     ret = ""
     c1 = completions[1]
@@ -328,6 +350,8 @@ end
 # does not restrict column length when multiple columns are used.
 const MULTICOLUMN_THRESHOLD = 5
 
+show_completions(s::PromptState, completions::Vector{NamedCompletion}) = show_completions(s, map(x -> x.name, completions))
+
 # Show available completions
 function show_completions(s::PromptState, completions::Vector{String})
     # skip any lines of input after the cursor
@@ -361,7 +385,7 @@ function show_completions(s::PromptState, completions::Vector{String})
     end
 end
 
-# Prompt Completions
+# Prompt Completions & Hints
 function complete_line(s::MIState)
     set_action!(s, :complete_line)
     if complete_line(state(s), s.key_repeats, s.active_module)
@@ -372,8 +396,110 @@ function complete_line(s::MIState)
     end
 end
 
-function complete_line(s::PromptState, repeats::Int, mod::Module)
-    completions, partial, should_complete = complete_line(s.p.complete, s, mod)::Tuple{Vector{String},String,Bool}
+# Old complete_line return type: Vector{String},          String, Bool
+# New complete_line return type: NamedCompletion{String}, String, Bool
+#                            OR  NamedCompletion{String}, Region, Bool
+#
+# due to close coupling of the Pkg ReplExt `complete_line` can still return a vector of strings,
+# so we convert those in this helper
+function complete_line_named(c, s, args...; kwargs...)::Tuple{Vector{NamedCompletion},Region,Bool}
+    r1, r2, should_complete = complete_line(c, s, args...; kwargs...)::Union{
+        Tuple{Vector{String}, String, Bool},
+        Tuple{Vector{NamedCompletion}, String, Bool},
+        Tuple{Vector{NamedCompletion}, Region, Bool},
+    }
+    completions = (r1 isa Vector{String} ? map(NamedCompletion, r1) : r1)
+    r = (r2 isa String ? (position(s)-sizeof(r2) => position(s)) : r2)
+    completions, r, should_complete
+end
+
+# checks for a hint and shows it if appropriate.
+# to allow the user to type even if hint generation is slow, the
+# hint is generated on a worker thread, and only shown if the user hasn't
+# pressed a key since the hint generation was requested
+function check_show_hint(s::MIState)
+    st = state(s)
+
+    this_key_i = s.n_keys_pressed
+    next_key_pressed() = @lock s.line_modify_lock s.n_keys_pressed > this_key_i
+    function lock_clear_hint()
+        @lock s.line_modify_lock begin
+            next_key_pressed() || s.aborted || clear_hint(st) && refresh_line(s)
+        end
+    end
+
+    if !options(st).hint_tab_completes || !eof(buffer(st))
+        # only generate hints if enabled and at the end of the line
+        # TODO: maybe show hints for insertions at other positions
+        # Requires making space for them earlier in refresh_multi_line
+        lock_clear_hint()
+        return
+    end
+    t_completion = Threads.@spawn :default begin
+        named_completions, reg, should_complete = nothing, nothing, nothing
+
+        # only allow one task to generate hints at a time and check around lock
+        # if the user has pressed a key since the hint was requested, to skip old completions
+        next_key_pressed() && return
+        @lock s.hint_generation_lock begin
+            next_key_pressed() && return
+            named_completions, reg, should_complete = try
+                complete_line_named(st.p.complete, st, s.active_module; hint = true)
+            catch
+                lock_clear_hint()
+                return
+            end
+        end
+        next_key_pressed() && return
+
+        completions = map(x -> x.completion, named_completions)
+        if isempty(completions)
+            lock_clear_hint()
+            return
+        end
+        # Don't complete for single chars, given e.g. `x` completes to `xor`
+        if reg.second - reg.first > 1 && should_complete
+            singlecompletion = length(completions) == 1
+            p = singlecompletion ? completions[1] : common_prefix(completions)
+            if singlecompletion || p in completions # i.e. complete `@time` even though `@time_imports` etc. exists
+                # The completion `p` and the region `reg` may not share the same initial
+                # characters, for instance when completing to subscripts or superscripts.
+                # So, in general, make sure that the hint starts at the correct position by
+                # incrementing its starting position by as many characters as the input.
+                maxind = lastindex(p)
+                startind = sizeof(content(s, reg))
+                if startind ≤ maxind # completion on a complete name returns itself so check that there's something to hint
+                    # index of p from which to start providing the hint
+                    startind = nextind(p, startind)
+                    hint = p[startind:end]
+                    next_key_pressed() && return
+                    @lock s.line_modify_lock begin
+                        if !s.aborted
+                            state(s).hint = hint
+                            refresh_line(s)
+                        end
+                    end
+                    return
+                end
+            end
+        end
+        lock_clear_hint()
+    end
+    Base.errormonitor(t_completion)
+    return
+end
+
+function clear_hint(s::ModeState)
+    if !isnothing(s.hint)
+        s.hint = "" # don't set to nothing here. That will be done in `maybe_show_hint`
+        return true # indicate maybe_show_hint has work to do
+    else
+        return false
+    end
+end
+
+function complete_line(s::PromptState, repeats::Int, mod::Module; hint::Bool=false)
+    completions, reg, should_complete = complete_line_named(s.p.complete, s, mod; hint)
     isempty(completions) && return false
     if !should_complete
         # should_complete is false for cases where we only want to show
@@ -381,17 +507,16 @@ function complete_line(s::PromptState, repeats::Int, mod::Module)
         show_completions(s, completions)
     elseif length(completions) == 1
         # Replace word by completion
-        prev_pos = position(s)
         push_undo(s)
-        edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1])
+        edit_splice!(s, reg, completions[1].completion)
     else
         p = common_prefix(completions)
+        partial = content(s, reg.first => min(bufend(s), reg.first + sizeof(p)))
         if !isempty(p) && p != partial
             # All possible completions share the same prefix, so we might as
-            # well complete that
-            prev_pos = position(s)
+            # well complete that.
             push_undo(s)
-            edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, p)
+            edit_splice!(s, reg, p)
         elseif repeats > 0
             show_completions(s, completions)
         end
@@ -432,12 +557,34 @@ prompt_string(p::Prompt) = prompt_string(p.prompt)
 prompt_string(s::AbstractString) = s
 prompt_string(f::Function) = Base.invokelatest(f)
 
+function maybe_show_hint(s::PromptState)
+    isa(s.hint, String) || return nothing
+    # The hint being "" then nothing is used to first clear a previous hint, then skip printing the hint
+    if isempty(s.hint)
+        s.hint = nothing
+    else
+        Base.printstyled(terminal(s), s.hint, color=:light_black)
+        cmove_left(terminal(s), textwidth(s.hint))
+        s.hint = "" # being "" signals to do one clear line remainder to clear the hint next time the screen is refreshed
+    end
+    return nothing
+end
+
+max_highlight_size::Int = 10000 # bytes
+
 function refresh_multi_line(s::PromptState; kw...)
     if s.refresh_wait !== nothing
         close(s.refresh_wait)
         s.refresh_wait = nothing
     end
-    refresh_multi_line(terminal(s), s; kw...)
+    if s.hint isa String
+        # clear remainder of line which is unknown here if it had a hint before unbeknownst to refresh_multi_line
+        # the clear line cannot be printed each time because it would break column movement
+        print(terminal(s), "\e[0K")
+    end
+    r = refresh_multi_line(terminal(s), s; kw...)
+    maybe_show_hint(s) # now maybe write the hint back to the screen
+    return r
 end
 refresh_multi_line(s::ModeState; kw...) = refresh_multi_line(terminal(s), s; kw...)
 refresh_multi_line(termbuf::TerminalBuffer, s::ModeState; kw...) = refresh_multi_line(termbuf, terminal(s), s; kw...)
@@ -457,10 +604,55 @@ function refresh_multi_line(termbuf::TerminalBuffer, terminal::UnixTerminal, buf
     line_pos = buf_pos
     regstart, regstop = region(buf)
     written = 0
+    @static if Sys.iswindows()
+        writer = Terminals.pipe_writer(terminal)
+        if writer isa Base.TTY && !Base.ispty(writer)::Bool
+            _reset_console_mode(writer.handle)
+        end
+    end
     # Write out the prompt string
     lindent = write_prompt(termbuf, prompt, hascolor(terminal))::Int
     # Count the '\n' at the end of the line if the terminal emulator does (specific to DOS cmd prompt)
-    miscountnl = @static Sys.iswindows() ? (isa(Terminals.pipe_reader(terminal), Base.TTY) && !(Base.ispty(Terminals.pipe_reader(terminal)))::Bool) : false
+    miscountnl = @static if Sys.iswindows()
+        reader = Terminals.pipe_reader(terminal)
+        reader isa Base.TTY && !Base.ispty(reader)::Bool
+    else false end
+
+    # Get the styling passes from the prompt
+    prompt_obj = nothing
+    if prompt isa PromptState
+        prompt_obj = prompt.p
+    elseif prompt isa PrefixSearchState
+        if isdefined(prompt, :parent) && prompt.parent isa Prompt
+            prompt_obj = prompt.parent
+        end
+    end
+
+    styled_buffer = AnnotatedString("")
+    if buf.size > 0 && buf.size <= max_highlight_size
+        full_input = String(buf.data[1:buf.size])
+        if !isempty(full_input)
+            passes = StylingPass[]
+            context = StylingContext(buf_pos, regstart, regstop)
+
+            # Add prompt-specific styling passes if the prompt has them and styling is enabled
+            enable_style_input = prompt_obj === nothing ? false :
+                (isdefined(prompt_obj, :repl) && prompt_obj.repl !== nothing ?
+                    prompt_obj.repl.options.style_input : false)
+
+            if enable_style_input && prompt_obj !== nothing
+                append!(passes, prompt_obj.styling_passes)
+            end
+
+            if region_active
+                push!(passes, RegionHighlightPass())
+            end
+
+            if !isempty(passes)
+                styled_buffer = apply_styling_passes(full_input, passes, context)
+            end
+        end
+    end
 
     # Now go through the buffer line by line
     seek(buf, 0)
@@ -487,12 +679,26 @@ function refresh_multi_line(termbuf::TerminalBuffer, terminal::UnixTerminal, buf
         llength = textwidth(line)
         slength = sizeof(line)
         cur_row += 1
-        # lwrite: what will be written to termbuf
-        lwrite = region_active ? highlight_region(line, regstart, regstop, written, slength) :
-                                 line
+
+        # Extract the portion of styled_buffer corresponding to this line.
+        if !isempty(styled_buffer)
+            # Calculate byte positions for this line in the buffer
+            line_start_byte = written + 1
+            line_end_byte = written + slength
+
+            # Convert to valid character indices (handles UTF-8 boundaries)
+            start_idx = thisind(styled_buffer, line_start_byte)
+            end_idx = thisind(styled_buffer, line_end_byte)
+
+            lwrite = @view styled_buffer[start_idx:end_idx]
+        else
+            lwrite = line
+        end
+
         written += slength
         cmove_col(termbuf, lindent + 1)
-        write(termbuf, lwrite)
+
+        write(IOContext(termbuf, :color => hascolor(terminal)), lwrite)
         # We expect to be line after the last valid output line (due to
         # the '\n' at the end of the previous line)
         if curs_row == -1
@@ -543,18 +749,6 @@ function refresh_multi_line(termbuf::TerminalBuffer, terminal::UnixTerminal, buf
     return InputAreaState(cur_row, curs_row)
 end
 
-function highlight_region(lwrite::Union{String,SubString{String}}, regstart::Int, regstop::Int, written::Int, slength::Int)
-    if written <= regstop <= written+slength
-        i = thisind(lwrite, regstop-written)
-        lwrite = lwrite[1:i] * Base.disable_text_style[:reverse] * lwrite[nextind(lwrite, i):end]
-    end
-    if written <= regstart <= written+slength
-        i = thisind(lwrite, regstart-written)
-        lwrite = lwrite[1:i] * Base.text_colors[:reverse] * lwrite[nextind(lwrite, i):end]
-    end
-    return lwrite
-end
-
 function refresh_multi_line(terminal::UnixTerminal, args...; kwargs...)
     outbuf = IOBuffer()
     termbuf = TerminalBuffer(outbuf)
@@ -676,7 +870,26 @@ function edit_move_right(buf::IOBuffer)
     end
     return false
 end
-edit_move_right(s::PromptState) = edit_move_right(s.input_buffer) ? refresh_line(s) : false
+function edit_move_right(m::MIState)
+    s = state(m)
+    buf = s.input_buffer
+    if edit_move_right(s.input_buffer)
+        refresh_line(s)
+        return true
+    else
+        completions, reg, should_complete = complete_line(s.p.complete, s, m.active_module)
+        if should_complete && eof(buf) && length(completions) == 1 && reg.second - reg.first > 1
+            # Replace word by completion
+            prev_pos = position(s)
+            push_undo(s)
+            edit_splice!(s, (prev_pos - reg.second + reg.first) => prev_pos, completions[1].completion)
+            refresh_line(state(s))
+            return true
+        else
+            return false
+        end
+    end
+end
 
 function edit_move_word_right(s::PromptState)
     if !eof(s.input_buffer)
@@ -751,9 +964,9 @@ end
 # returns the removed portion as a String
 function edit_splice!(s::BufferLike, r::Region=region(s), ins::String = ""; rigid_mark::Bool=true)
     A, B = first(r), last(r)
-    A >= B && isempty(ins) && return String(ins)
+    A >= B && isempty(ins) && return ins
     buf = buffer(s)
-    pos = position(buf)
+    pos = position(buf) # n.b. position(), etc, are 0-indexed
     adjust_pos = true
     if A <= pos < B
         seek(buf, A)
@@ -762,18 +975,29 @@ function edit_splice!(s::BufferLike, r::Region=region(s), ins::String = ""; rigi
     else
         adjust_pos = false
     end
-    if A < buf.mark  < B || A == buf.mark == B
-        # rigid_mark is used only if the mark is strictly "inside"
-        # the region, or the region is empty and the mark is at the boundary
-        buf.mark = rigid_mark ? A : A + sizeof(ins)
-    elseif buf.mark >= B
-        buf.mark += sizeof(ins) - B + A
+    mark = buf.mark
+    if mark != -1
+        if A < mark < B || A == mark == B
+            # rigid_mark is used only if the mark is strictly "inside"
+            # the region, or the region is empty and the mark is at the boundary
+            mark = rigid_mark ? A : A + sizeof(ins)
+        elseif mark >= B
+            mark += sizeof(ins) - B + A
+        end
+        buf.mark = -1
     end
-    ensureroom(buf, B) # handle !buf.reinit from take!
-    ret = splice!(buf.data, A+1:B, codeunits(String(ins))) # position(), etc, are 0-indexed
-    buf.size = buf.size + sizeof(ins) - B + A
-    adjust_pos && seek(buf, position(buf) + sizeof(ins))
-    return String(copy(ret))
+    # Implement ret = splice!(buf.data, A+1:B, codeunits(ins)) for a stream
+    pos = position(buf)
+    seek(buf, A)
+    ret = read(buf, A >= B ? 0 : B - A)
+    trail = read(buf)
+    seek(buf, A)
+    write(buf, ins)
+    write(buf, trail)
+    truncate(buf, position(buf))
+    seek(buf, pos + (adjust_pos ? sizeof(ins) : 0))
+    buf.mark = mark
+    return String(ret)
 end
 
 edit_splice!(s::MIState, ins::AbstractString) = edit_splice!(s, region(s), ins)
@@ -820,7 +1044,9 @@ function edit_insert(s::PromptState, c::StringLike)
         offset += position(buf) - beginofline(buf) # size of current line
         spinner = '\0'
         delayup = !eof(buf) || old_wait
-        if offset + textwidth(str) <= w && !(after == 0 && delayup)
+        # Disable fast path when syntax highlighting is enabled
+        use_fast_path = offset + textwidth(str) <= w && !(after == 0 && delayup) && !options(s).style_input
+        if use_fast_path
             # Avoid full update when appending characters to the end
             # and an update of curs_row isn't necessary (conservatively estimated)
             write(termbuf, str)
@@ -1347,7 +1573,7 @@ function edit_input(s, f = (filename, line, column) -> InteractiveUtils.edit(fil
     end
     buf = buffer(s)
     pos = position(buf)
-    str = String(take!(buf))
+    str = takestring!(buf)
     lines = readlines(IOBuffer(str); keep=true)
 
     # Compute line
@@ -1417,14 +1643,28 @@ current_word_with_dots(s::MIState) = current_word_with_dots(buffer(s))
 
 function activate_module(s::MIState)
     word = current_word_with_dots(s);
-    isempty(word) && return beep(s)
-    try
-        mod = Base.Core.eval(Base.active_module(), Base.Meta.parse(word))
-        REPL.activate(mod)
-        edit_clear(s)
-    catch
+    empty = isempty(word)
+    mod = if empty
+        s.previous_active_module
+    else
+        try
+            Base.Core.eval(Base.active_module(), Base.Meta.parse(word))
+        catch
+            nothing
+        end
+    end
+    if !(mod isa Module) || mod == Base.active_module()
         beep(s)
+        return
+    end
+    empty && edit_insert(s, ' ') # makes the `edit_clear` below actually update the prompt
+    if Base.active_module() == Main || mod == Main
+        # At least one needs to be Main. Disallows toggling between two non-Main modules because it's
+        # otherwise hard to get back to Main
+        s.previous_active_module = Base.active_module()
     end
+    REPL.activate(mod)
+    edit_clear(s)
 end
 
 history_prev(::EmptyHistoryProvider) = ("", false)
@@ -1494,34 +1734,39 @@ end
 # not leave the console mode in a corrupt state.
 # FIXME: remove when pseudo-tty are implemented for child processes
 if Sys.iswindows()
-function _console_mode()
-    hOutput = ccall(:GetStdHandle, stdcall, Ptr{Cvoid}, (UInt32,), -11 % UInt32) # STD_OUTPUT_HANDLE
-    dwMode = Ref{UInt32}()
-    ccall(:GetConsoleMode, stdcall, Int32, (Ref{Cvoid}, Ref{UInt32}), hOutput, dwMode)
-    return dwMode[]
-end
-const default_console_mode_ref = Ref{UInt32}()
-const default_console_mode_assigned = Ref(false)
-function get_default_console_mode()
-    if default_console_mode_assigned[] == false
-        default_console_mode_assigned[] = true
-        default_console_mode_ref[] = _console_mode()
-    end
-    return default_console_mode_ref[]
-end
-function _reset_console_mode()
-    mode = _console_mode()
-    if mode !== get_default_console_mode()
-        hOutput = ccall(:GetStdHandle, stdcall, Ptr{Cvoid}, (UInt32,), -11 % UInt32) # STD_OUTPUT_HANDLE
-        ccall(:SetConsoleMode, stdcall, Int32, (Ptr{Cvoid}, UInt32), hOutput, default_console_mode_ref[])
+
+#= Get/SetConsoleMode flags =#
+const ENABLE_PROCESSED_OUTPUT            = UInt32(0x0001)
+const ENABLE_WRAP_AT_EOL_OUTPUT          = UInt32(0x0002)
+const ENABLE_VIRTUAL_TERMINAL_PROCESSING = UInt32(0x0004)
+const DISABLE_NEWLINE_AUTO_RETURN        = UInt32(0x0008)
+const ENABLE_LVB_GRID_WORLDWIDE          = UInt32(0x0010)
+
+#= libuv flags =#
+const UV_TTY_SUPPORTED = 0
+const UV_TTY_UNSUPPORTED = 1
+
+function _reset_console_mode(handle::Ptr{Cvoid})
+    # Query libuv to see whether it expects the console to support virtual terminal sequences
+    vterm_state = Ref{Cint}()
+    ccall(:uv_tty_get_vterm_state, Cint, (Ref{Cint},), vterm_state)
+
+    mode::UInt32 = ENABLE_PROCESSED_OUTPUT | ENABLE_WRAP_AT_EOL_OUTPUT
+    if vterm_state[] == UV_TTY_SUPPORTED
+        mode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING
     end
-    nothing
+
+    # Expected to fail (benignly) with ERROR_INVALID_HANDLE if the provided handle does not
+    # allow setting the console mode
+    ccall(:SetConsoleMode, stdcall, Int32, (Ptr{Cvoid}, UInt32), handle, mode)
+
+    return nothing
 end
+
 end
 
 # returns the width of the written prompt
 function write_prompt(terminal::Union{IO, AbstractTerminal}, s::Union{AbstractString,Function}, color::Bool)
-    @static Sys.iswindows() && _reset_console_mode()
     promptstr = prompt_string(s)::String
     write(terminal, promptstr)
     return textwidth(promptstr)
@@ -1562,7 +1807,7 @@ function normalize_key(key::Union{String,SubString{String}})
             write(buf, c)
         end
     end
-    return String(take!(buf))
+    return takestring!(buf)
 end
 
 function normalize_keys(keymap::Union{Dict{Char,Any},AnyDict})
@@ -1857,65 +2102,148 @@ const escape_defaults = merge!(
     AnyDict("\e[$(c)l" => nothing for c in 1:20)
     )
 
-mutable struct HistoryPrompt <: TextInterface
-    hp::HistoryProvider
-    complete::CompletionProvider
-    keymap_dict::Dict{Char,Any}
-    HistoryPrompt(hp) = new(hp, EmptyCompletionProvider())
-end
 
-mutable struct SearchState <: ModeState
-    terminal::AbstractTerminal
-    histprompt::HistoryPrompt
-    #rsearch (true) or ssearch (false)
-    backward::Bool
-    query_buffer::IOBuffer
-    response_buffer::IOBuffer
-    failed::Bool
-    ias::InputAreaState
-    #The prompt whose input will be replaced by the matched history
-    parent::Prompt
-    SearchState(terminal, histprompt, backward, query_buffer, response_buffer) =
-        new(terminal, histprompt, backward, query_buffer, response_buffer, false, InputAreaState(0,0))
+# Helper function to check and remove paired brackets/quotes
+# Returns true if paired delimiters were removed, false otherwise
+function try_remove_paired_delimiter(buf::IOBuffer)
+    left_brackets = ('(', '{', '[', '"', '\'', '`')
+    right_brackets = (')', '}', ']', '"', '\'', '`')
+
+    if !eof(buf) && position(buf) > 0
+        # Peek at char to the left
+        p = position(buf)
+        left_char = char_move_left(buf)
+        seek(buf, p)
+
+        i = findfirst(isequal(left_char), left_brackets)
+        if i !== nothing && peek(buf, Char) == right_brackets[i]
+            # Remove both the left and right bracket/quote
+            edit_delete(buf)
+            edit_backspace(buf)
+            return true
+        end
+    end
+    return false
 end
 
-init_state(terminal, p::HistoryPrompt) = SearchState(terminal, p, true, IOBuffer(), IOBuffer())
+# Keymap for automatic bracket/quote insertion and completion
+const bracket_insert_keymap = AnyDict()
+let
+    # Determine when we should not close a bracket/quote
+    function should_skip_closing_bracket(left_peek, v)
+        # Don't close if we already have an open quote immediately before (triple quote case)
+        # For quotes, also check for transpose expressions: issue JuliaLang/OhMyREPL.jl#200
+        left_peek == v && return true
+        if v == '\''
+            tr_expr = isletter(left_peek) || isnumeric(left_peek) || left_peek == '_' || left_peek == ']'
+            return tr_expr
+        end
+        return false
+    end
 
-terminal(s::SearchState) = s.terminal
+    function peek_char_left(b::IOBuffer)
+        p = position(b)
+        c = char_move_left(b)
+        seek(b, p)
+        return c
+    end
 
-function update_display_buffer(s::SearchState, data::ModeState)
-    s.failed = !history_search(data.histprompt.hp, data.query_buffer, data.response_buffer, data.backward, false)
-    s.failed && beep(s)
-    refresh_line(s)
-    nothing
-end
+    # Check if we should auto-close a quote (insert paired quotes)
+    # auto-close when "transparent" chars on both sides
+    # Transparent chars: whitespace, opening brackets ([{, closing brackets )]}, or nothing
+    function should_auto_close_quote(buf::IOBuffer, quote_char::Char)
+        # Check left side: BOF, whitespace, or opening bracket
+        left_ok = if position(buf) == 0
+            true
+        else
+            left_char = peek_char_left(buf)
+            isspace(left_char) || left_char in ('(', '[', '{')
+        end
 
-function history_next_result(s::MIState, data::ModeState)
-    data.failed = !history_search(data.histprompt.hp, data.query_buffer, data.response_buffer, data.backward, true)
-    data.failed && beep(s)
-    refresh_line(data)
-    nothing
-end
+        # Check right side: EOF, whitespace, or closing bracket
+        right_ok = if eof(buf)
+            true
+        else
+            right_char = peek(buf, Char)
+            isspace(right_char) || right_char in (')', ']', '}')
+        end
 
-function history_set_backward(s::SearchState, backward::Bool)
-    s.backward = backward
-    nothing
-end
+        return left_ok && right_ok
+    end
+
+    # Left/right bracket pairs
+    bracket_pairs = (('(', ')'), ('{', '}'), ('[', ']'))
+    # Characters that are "transparent" for bracket auto-closing
+    right_brackets_ws = (')', '}', ']', ' ', '\t', '\n', '"', '\'', '`')
 
-input_string(s::SearchState) = String(take!(copy(s.query_buffer)))
+    for (left, right) in bracket_pairs
+        # Left bracket: insert both and move cursor between them
+        bracket_insert_keymap[left] = (s::MIState, o...) -> begin
+            buf = buffer(s)
+            edit_insert(buf, left)
+            if eof(buf) || peek(buf, Char) in right_brackets_ws
+                edit_insert(buf, right)
+                edit_move_left(buf)
+            end
+            refresh_line(s)
+        end
 
-function reset_state(s::SearchState)
-    if s.query_buffer.size != 0
-        s.query_buffer.size = 0
-        s.query_buffer.ptr = 1
+        # Right bracket: skip over if next char matches, otherwise insert
+        bracket_insert_keymap[right] = (s::MIState, o...) -> begin
+            buf = buffer(s)
+            if !eof(buf) && peek(buf, Char) == right
+                edit_move_right(buf)
+            else
+                edit_insert(buf, right)
+            end
+            refresh_line(s)
+        end
     end
-    if s.response_buffer.size != 0
-        s.response_buffer.size = 0
-        s.response_buffer.ptr = 1
+
+    # Quote characters (need special handling for transpose detection)
+    for quote_char in ('"', '\'', '`')
+        bracket_insert_keymap[quote_char] = (s::MIState, o...) -> begin
+            buf = buffer(s)
+            if !eof(buf) && peek(buf, Char) == quote_char
+                # Skip over closing quote
+                edit_move_right(buf)
+            elseif position(buf) > 0 && should_skip_closing_bracket(peek_char_left(buf), quote_char)
+                # Don't auto-close (e.g., for transpose or triple quotes)
+                edit_insert(buf, quote_char)
+            elseif should_auto_close_quote(buf, quote_char)
+                edit_insert(buf, quote_char)
+                edit_insert(buf, quote_char)
+                edit_move_left(buf)
+            else
+                # Just insert single quote
+                edit_insert(buf, quote_char)
+            end
+            refresh_line(s)
+        end
+    end
+
+    # Backspace - also remove matching closing bracket/quote
+    bracket_insert_keymap['\b'] = (s::MIState, o...) -> begin
+        if is_region_active(s)
+            return edit_kill_region(s)
+        elseif isempty(s) || position(buffer(s)) == 0
+            # Handle transitioning to main mode
+            repl = Base.active_repl
+            mirepl = isdefined(repl, :mi) ? repl.mi : repl
+            main_mode = mirepl.interface.modes[1]
+            buf = copy(buffer(s))
+            transition(s, main_mode) do
+                state(s, main_mode).input_buffer = buf
+            end
+            return
+        end
+
+        buf = buffer(s)
+        if try_remove_paired_delimiter(buf)
+            return refresh_line(s)
+        end
+        return edit_backspace(s)
     end
-    reset_state(s.histprompt.hp)
-    s.failed = false
-    nothing
 end
 
 # a meta-prompt that presents itself as parent_prompt, but which has an independent keymap
@@ -1951,7 +2279,7 @@ function Base.getproperty(s::ModeState, name::Symbol)
     elseif name === :prompt
         return getfield(s, :prompt)::Prompt
     elseif name === :histprompt
-        return getfield(s, :histprompt)::Union{HistoryPrompt,PrefixHistoryPrompt}
+        return getfield(s, :histprompt)::PrefixHistoryPrompt
     elseif name === :parent
         return getfield(s, :parent)::Prompt
     elseif name === :response_buffer
@@ -1990,7 +2318,7 @@ function refresh_multi_line(termbuf::TerminalBuffer, terminal::UnixTerminal,
     return ias
 end
 
-input_string(s::PrefixSearchState) = String(take!(copy(s.response_buffer)))
+input_string(s::PrefixSearchState) = takestring!(copy(s.response_buffer))
 
 write_prompt(terminal, s::PrefixSearchState, color::Bool) = write_prompt(terminal, s.histprompt.parent_prompt, color)
 prompt_string(s::PrefixSearchState) = prompt_string(s.histprompt.parent_prompt.prompt)
@@ -2028,48 +2356,17 @@ function replace_line(s::PrefixSearchState, l::Union{String,SubString{String}})
     nothing
 end
 
-function refresh_multi_line(termbuf::TerminalBuffer, s::SearchState)
-    buf = IOBuffer()
-    unsafe_write(buf, pointer(s.query_buffer.data), s.query_buffer.ptr-1)
-    write(buf, "': ")
-    offset = buf.ptr
-    ptr = s.response_buffer.ptr
-    seek(s.response_buffer, 0)
-    write(buf, read(s.response_buffer, String))
-    buf.ptr = offset + ptr - 1
-    s.response_buffer.ptr = ptr
-    failed = s.failed ? "failed " : ""
-    ias = refresh_multi_line(termbuf, s.terminal, buf, s.ias,
-                             s.backward ? "($(failed)reverse-i-search)`" : "($(failed)forward-i-search)`")
-    s.ias = ias
-    return ias
-end
-
 state(s::MIState, p::TextInterface=mode(s)) = s.mode_state[p]
 state(s::PromptState, p::Prompt=mode(s)) = (@assert s.p == p; s)
 
 mode(s::MIState) = s.current_mode   # ::TextInterface, and might be a Prompt
 mode(s::PromptState) = s.p          # ::Prompt
-mode(s::SearchState) = @assert false
 mode(s::PrefixSearchState) = s.histprompt.parent_prompt   # ::Prompt
 
 setmodifiers!(s::MIState, m::Modifiers) = setmodifiers!(mode(s), m)
 setmodifiers!(p::Prompt, m::Modifiers) = setmodifiers!(p.complete, m)
 setmodifiers!(c) = nothing
 
-# Search Mode completions
-function complete_line(s::SearchState, repeats, mod::Module)
-    completions, partial, should_complete = complete_line(s.histprompt.complete, s, mod)
-    # For now only allow exact completions in search mode
-    if length(completions) == 1
-        prev_pos = position(s)
-        push_undo(s)
-        edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1])
-        return true
-    end
-    return false
-end
-
 accept_result_newmode(hp::HistoryProvider) = nothing
 function accept_result(s::MIState, p::TextInterface)
     parent = something(accept_result_newmode(p.hp), state(s, p).parent)
@@ -2089,24 +2386,6 @@ function copybuf!(dst::IOBuffer, src::IOBuffer)
     nothing
 end
 
-function enter_search(s::MIState, p::HistoryPrompt, backward::Bool)
-    # a bit of hack to help fix #6325
-    buf = copy(buffer(s))
-    parent = mode(s)
-    p.hp.last_mode = mode(s)
-    p.hp.last_buffer = buf
-
-    transition(s, p) do
-        ss = state(s, p)
-        ss.parent = parent
-        ss.backward = backward
-        truncate(ss.query_buffer, 0)
-        ss.failed = false
-        copybuf!(ss.response_buffer, buf)
-    end
-    nothing
-end
-
 function enter_prefix_search(s::MIState, p::PrefixHistoryPrompt, backward::Bool)
     buf = copy(buffer(s))
     parent = mode(s)
@@ -2129,96 +2408,12 @@ function enter_prefix_search(s::MIState, p::PrefixHistoryPrompt, backward::Bool)
     nothing
 end
 
-function setup_search_keymap(hp)
-    p = HistoryPrompt(hp)
-    pkeymap = AnyDict(
-        "^R"      => (s::MIState,data::ModeState,c)->(history_set_backward(data, true); history_next_result(s, data)),
-        "^S"      => (s::MIState,data::ModeState,c)->(history_set_backward(data, false); history_next_result(s, data)),
-        '\r'      => (s::MIState,o...)->accept_result(s, p),
-        '\n'      => '\r',
-        # Limited form of tab completions
-        '\t'      => (s::MIState,data::ModeState,c)->(complete_line(s); update_display_buffer(s, data)),
-        "^L"      => (s::MIState,data::ModeState,c)->(Terminals.clear(terminal(s)); update_display_buffer(s, data)),
-
-        # Backspace/^H
-        '\b'      => (s::MIState,data::ModeState,c)->(edit_backspace(data.query_buffer) ?
-                        update_display_buffer(s, data) : beep(s)),
-        127       => KeyAlias('\b'),
-        # Meta Backspace
-        "\e\b"    => (s::MIState,data::ModeState,c)->(isempty(edit_delete_prev_word(data.query_buffer)) ?
-                                  beep(s) : update_display_buffer(s, data)),
-        "\e\x7f"  => "\e\b",
-        # Word erase to whitespace
-        "^W"      => (s::MIState,data::ModeState,c)->(isempty(edit_werase(data.query_buffer)) ?
-                                  beep(s) : update_display_buffer(s, data)),
-        # ^C and ^D
-        "^C"      => (s::MIState,data::ModeState,c)->(edit_clear(data.query_buffer);
-                       edit_clear(data.response_buffer);
-                       update_display_buffer(s, data);
-                       reset_state(data.histprompt.hp);
-                       transition(s, data.parent)),
-        "^D"      => "^C",
-        # Other ways to cancel search mode (it's difficult to bind \e itself)
-        "^G"      => "^C",
-        "\e\e"    => "^C",
-        "^K"      => (s::MIState,o...)->transition(s, state(s, p).parent),
-        "^Y"      => (s::MIState,data::ModeState,c)->(edit_yank(s); update_display_buffer(s, data)),
-        "^U"      => (s::MIState,data::ModeState,c)->(edit_clear(data.query_buffer);
-                     edit_clear(data.response_buffer);
-                     update_display_buffer(s, data)),
-        # Right Arrow
-        "\e[C"    => (s::MIState,o...)->(accept_result(s, p); edit_move_right(s)),
-        # Left Arrow
-        "\e[D"    => (s::MIState,o...)->(accept_result(s, p); edit_move_left(s)),
-        # Up Arrow
-        "\e[A"    => (s::MIState,o...)->(accept_result(s, p); edit_move_up(s)),
-        # Down Arrow
-        "\e[B"    => (s::MIState,o...)->(accept_result(s, p); edit_move_down(s)),
-        "^B"      => (s::MIState,o...)->(accept_result(s, p); edit_move_left(s)),
-        "^F"      => (s::MIState,o...)->(accept_result(s, p); edit_move_right(s)),
-        # Meta B
-        "\eb"     => (s::MIState,o...)->(accept_result(s, p); edit_move_word_left(s)),
-        # Meta F
-        "\ef"     => (s::MIState,o...)->(accept_result(s, p); edit_move_word_right(s)),
-        # Ctrl-Left Arrow
-        "\e[1;5D" => "\eb",
-        # Ctrl-Left Arrow on rxvt
-        "\eOd" => "\eb",
-        # Ctrl-Right Arrow
-        "\e[1;5C" => "\ef",
-        # Ctrl-Right Arrow on rxvt
-        "\eOc" => "\ef",
-        "^A"         => (s::MIState,o...)->(accept_result(s, p); move_line_start(s); refresh_line(s)),
-        "^E"         => (s::MIState,o...)->(accept_result(s, p); move_line_end(s); refresh_line(s)),
-        "^Z"      => (s::MIState,o...)->(return :suspend),
-        # Try to catch all Home/End keys
-        "\e[H"    => (s::MIState,o...)->(accept_result(s, p); move_input_start(s); refresh_line(s)),
-        "\e[F"    => (s::MIState,o...)->(accept_result(s, p); move_input_end(s); refresh_line(s)),
-        # Use ^N and ^P to change search directions and iterate through results
-        "^N"      => (s::MIState,data::ModeState,c)->(history_set_backward(data, false); history_next_result(s, data)),
-        "^P"      => (s::MIState,data::ModeState,c)->(history_set_backward(data, true); history_next_result(s, data)),
-        # Bracketed paste mode
-        "\e[200~" => (s::MIState,data::ModeState,c)-> begin
-            ps = state(s, mode(s))
-            input = readuntil(ps.terminal, "\e[201~", keep=false)
-            edit_insert(data.query_buffer, input); update_display_buffer(s, data)
-        end,
-        "*"       => (s::MIState,data::ModeState,c::StringLike)->(edit_insert(data.query_buffer, c); update_display_buffer(s, data))
-    )
-    p.keymap_dict = keymap([pkeymap, escape_defaults])
-    skeymap = AnyDict(
-        "^R"    => (s::MIState,o...)->(enter_search(s, p, true)),
-        "^S"    => (s::MIState,o...)->(enter_search(s, p, false)),
-    )
-    return (p, skeymap)
-end
-
-keymap(state, p::Union{HistoryPrompt,PrefixHistoryPrompt}) = p.keymap_dict
-keymap_data(state, ::Union{HistoryPrompt, PrefixHistoryPrompt}) = state
+keymap(state, p::PrefixHistoryPrompt) = p.keymap_dict
+keymap_data(state, ::PrefixHistoryPrompt) = state
 
 Base.isempty(s::PromptState) = s.input_buffer.size == 0
 
-on_enter(s::PromptState) = s.p.on_enter(s)
+on_enter(s::MIState) = state(s).p.on_enter(s)
 
 move_input_start(s::BufferLike) = (seek(buffer(s), 0); nothing)
 move_input_end(buf::IOBuffer) = (seekend(buf); nothing)
@@ -2256,8 +2451,12 @@ function move_line_end(buf::IOBuffer)
     nothing
 end
 
-edit_insert_last_word(s::MIState) =
-    edit_insert(s, get_last_word(IOBuffer(mode(s).hist.history[end])))
+function edit_insert_last_word(s::MIState)
+    hist = mode(s).hist.history
+    isempty(hist) && return 0
+    isempty(hist.records) && return 0
+    edit_insert(s, get_last_word(IOBuffer(hist[end].content)))
+end
 
 function get_last_word(buf::IOBuffer)
     move_line_end(buf)
@@ -2424,8 +2623,8 @@ AnyDict(
     "\e\n" => "\e\r",
     "^_" => (s::MIState,o...)->edit_undo!(s),
     "\e_" => (s::MIState,o...)->edit_redo!(s),
-    # Simply insert it into the buffer by default
-    "*" => (s::MIState,data,c::StringLike)->(edit_insert(s, c)),
+    # Show hints at what tab complete would do by default
+    "*" => (s::MIState,data,c::StringLike)->(edit_insert(s, c); check_show_hint(s)),
     "^U" => (s::MIState,o...)->edit_kill_line_backwards(s),
     "^K" => (s::MIState,o...)->edit_kill_line_forwards(s),
     "^Y" => (s::MIState,o...)->edit_yank(s),
@@ -2483,6 +2682,9 @@ AnyDict(
 )
 
 const history_keymap = AnyDict(
+    "^R" => (s::MIState,o...)->(history_search(s)),
+    "^S" => (s::MIState,o...)->(history_search(s)),
+    # C/M-n/p
     "^P" => (s::MIState,o...)->(edit_move_up(s) || history_prev(s, mode(s).hist)),
     "^N" => (s::MIState,o...)->(edit_move_down(s) || history_next(s, mode(s).hist)),
     "\ep" => (s::MIState,o...)->(history_prev(s, mode(s).hist)),
@@ -2499,6 +2701,38 @@ const history_keymap = AnyDict(
     "\e>" => (s::MIState,o...)->(history_last(s, mode(s).hist)),
 )
 
+function history_search(mistate::MIState)
+    cancel_beep(mistate)
+    termbuf = TerminalBuffer(IOBuffer())
+    term = terminal(mistate)
+    mimode = mode(mistate)
+    mimode.hist.last_mode = mimode
+    mimode.hist.last_buffer = copy(buffer(mistate))
+    mistate.mode_state[mimode] =
+        deactivate(mimode, state(mistate), termbuf, term)
+    prefix = if mimode.prompt_prefix isa Function
+        mimode.prompt_prefix()
+    else
+        mimode.prompt_prefix
+    end
+    result = histsearch(mimode.hist.history, term, prefix)
+    mimode = if isnothing(result.mode)
+        mistate.current_mode
+    else
+        get(mistate.interface.modes[1].hist.mode_mapping,
+            result.mode,
+            mistate.current_mode)
+    end
+    pstate = mistate.mode_state[mimode]
+    raw!(term, true)
+    mistate.current_mode = mimode
+    activate(mimode, state(mistate, mimode), termbuf, term)
+    commit_changes(term, termbuf)
+    edit_insert(pstate, result.text)
+    refresh_multi_line(mistate)
+    nothing
+end
+
 const prefix_history_keymap = merge!(
     AnyDict(
         "^P" => (s::MIState,data::ModeState,c)->history_prev_prefix(data, data.histprompt.hp, data.prefix),
@@ -2625,16 +2859,17 @@ function Prompt(prompt
     on_enter = default_enter_cb,
     on_done = ()->nothing,
     hist = EmptyHistoryProvider(),
-    sticky = false)
+    sticky = false,
+    styling_passes = StylingPass[])
 
     return Prompt(prompt, prompt_prefix, prompt_suffix, output_prefix, output_prefix_prefix, output_prefix_suffix,
-                   keymap_dict, repl, complete, on_enter, on_done, hist, sticky)
+                   keymap_dict, repl, complete, on_enter, on_done, hist, sticky, styling_passes)
 end
 
 run_interface(::Prompt) = nothing
 
 init_state(terminal, prompt::Prompt) =
-    PromptState(terminal, prompt, IOBuffer(), :off, IOBuffer[], 1, InputAreaState(1, 1),
+    PromptState(terminal, prompt, IOBuffer(), :off, nothing, IOBuffer[], 1, InputAreaState(1, 1),
                 #=indent(spaces)=# -1, Threads.SpinLock(), 0.0, -Inf, nothing)
 
 function init_state(terminal, m::ModalInterface)
@@ -2659,7 +2894,6 @@ end
 
 buffer(s) = _buffer(s)::IOBuffer
 _buffer(s::PromptState) = s.input_buffer
-_buffer(s::SearchState) = s.query_buffer
 _buffer(s::PrefixSearchState) = s.response_buffer
 _buffer(s::IOBuffer) = s
 
@@ -2731,44 +2965,61 @@ keymap_data(ms::MIState, m::ModalInterface) = keymap_data(state(ms), mode(ms))
 
 function prompt!(term::TextTerminal, prompt::ModalInterface, s::MIState = init_state(term, prompt))
     Base.reseteof(term)
+    t1 = Threads.@spawn :interactive while true
+        wait(s.async_channel)
+        status = @lock s.line_modify_lock begin
+            fcn = take!(s.async_channel)
+            fcn(s)
+        end
+        status ∈ (:ok, :ignore) || break
+    end
     raw!(term, true)
     enable_bracketed_paste(term)
     try
         activate(prompt, s, term, term)
         old_state = mode(s)
-        while true
-            kmap = keymap(s, prompt)
-            fcn = match_input(kmap, s)
-            kdata = keymap_data(s, prompt)
-            s.current_action = :unknown # if the to-be-run action doesn't update this field,
-                                        # :unknown will be recorded in the last_action field
-            local status
-            # errors in keymaps shouldn't cause the REPL to fail, so wrap in a
-            # try/catch block
-            try
-                status = fcn(s, kdata)
-            catch e
-                @error "Error in the keymap" exception=e,catch_backtrace()
-                # try to cleanup and get `s` back to its original state before returning
-                transition(s, :reset)
-                transition(s, old_state)
-                status = :done
-            end
-            status !== :ignore && (s.last_action = s.current_action)
-            if status === :abort
-                s.aborted = true
-                return buffer(s), false, false
-            elseif status === :done
-                return buffer(s), true, false
-            elseif status === :suspend
-                if Sys.isunix()
-                    return buffer(s), true, true
+        # spawn this because the main repl task is sticky (due to use of @async and _wait2)
+        # and we want to not block typing when the repl task thread is busy
+        t2 = Threads.@spawn :interactive while true
+            eof(term) || peek(term) # wait before locking but don't consume
+            @lock s.line_modify_lock begin
+                s.n_keys_pressed += 1
+                kmap = keymap(s, prompt)
+                fcn = match_input(kmap, s)
+                kdata = keymap_data(s, prompt)
+                s.current_action = :unknown # if the to-be-run action doesn't update this field,
+                                            # :unknown will be recorded in the last_action field
+                local status
+                # errors in keymaps shouldn't cause the REPL to fail, so wrap in a
+                # try/catch block
+                try
+                    status = fcn(s, kdata)
+                catch e
+                    @error "Error in the keymap" exception=e,catch_backtrace()
+                    # try to cleanup and get `s` back to its original state before returning
+                    transition(s, :reset)
+                    transition(s, old_state)
+                    status = :done
+                end
+                status !== :ignore && (s.last_action = s.current_action)
+                if status === :abort
+                    s.aborted = true
+                    return buffer(s), false, false
+                elseif status === :done
+                    return buffer(s), true, false
+                elseif status === :suspend
+                    if Sys.isunix()
+                        return buffer(s), true, true
+                    end
+                else
+                    @assert status ∈ (:ok, :ignore)
                 end
-            else
-                @assert status ∈ (:ok, :ignore)
             end
         end
+        return fetch(t2)
     finally
+        put!(s.async_channel, Returns(:done))
+        wait(t1)
         raw!(term, false) && disable_bracketed_paste(term)
     end
     # unreachable
diff --git a/stdlib/REPL/src/Pkg_beforeload.jl b/stdlib/REPL/src/Pkg_beforeload.jl
new file mode 100644
index 0000000000000..e51cf7550bce5
--- /dev/null
+++ b/stdlib/REPL/src/Pkg_beforeload.jl
@@ -0,0 +1,122 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## Pkg stuff needed before Pkg has loaded
+
+const Pkg_pkgid = Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg")
+load_pkg() = Base.require_stdlib(Pkg_pkgid, "REPLExt", REPL)
+
+## Below here copied/tweaked from Pkg Types.jl so that the dummy Pkg prompt
+# can populate the env correctly before Pkg loads
+
+function safe_realpath(path)
+    isempty(path) && return path
+    if ispath(path)
+        try
+            return realpath(path)
+        catch
+            return path
+        end
+    end
+    a, b = splitdir(path)
+    return joinpath(safe_realpath(a), b)
+end
+
+function find_project_file(env::Union{Nothing,String}=nothing)
+    project_file = nothing
+    if env isa Nothing
+        project_file = Base.active_project()
+        project_file === nothing && return nothing # in the Pkg version these are pkgerrors
+    elseif startswith(env, '@')
+        project_file = Base.load_path_expand(env)
+        project_file === nothing && return nothing
+    elseif env isa String
+        if isdir(env)
+            isempty(readdir(env)) || return nothing
+            project_file = joinpath(env, Base.project_names[end])
+        else
+            project_file = endswith(env, ".toml") ? abspath(env) :
+                abspath(env, Base.project_names[end])
+        end
+    end
+    @assert project_file isa String &&
+        (isfile(project_file) || !ispath(project_file) ||
+         isdir(project_file) && isempty(readdir(project_file)))
+    return safe_realpath(project_file)
+end
+
+function find_root_base_project(start_project::String)
+    project_file = start_project
+    while true
+        base_project_file = Base.base_project(project_file)
+        base_project_file === nothing && return project_file
+        project_file = base_project_file
+    end
+end
+
+function relative_project_path(project_file::String, path::String)
+    # compute path relative the project
+    # realpath needed to expand symlinks before taking the relative path
+    return relpath(safe_realpath(abspath(path)), safe_realpath(dirname(project_file)))
+end
+
+function projname(project_file::String)
+    if isfile(project_file)
+        name = try
+            # The `nothing` here means that this TOML parser does not return proper Dates.jl
+            # objects - but that's OK since we're just checking the name here.
+            p = Base.TOML.Parser{nothing}()
+            Base.TOML.reinit!(p, read(project_file, String); filepath=project_file)
+            proj = Base.TOML.parse(p)
+            get(proj, "name", nothing)
+        catch
+            nothing
+        end
+    else
+        name = nothing
+    end
+    if name === nothing
+        name = basename(dirname(project_file))
+    end
+    for depot in Base.DEPOT_PATH
+        envdir = joinpath(depot, "environments")
+        if startswith(safe_realpath(project_file), safe_realpath(envdir))
+            return "@" * name
+        end
+    end
+    return name
+end
+
+prev_project_file = nothing
+prev_project_timestamp = nothing
+prev_prefix = ""
+
+function Pkg_promptf()
+    global prev_project_timestamp, prev_prefix, prev_project_file
+    project_file = find_project_file()
+    prefix = ""
+    if project_file !== nothing
+        if prev_project_file == project_file && prev_project_timestamp == mtime(project_file)
+            prefix = prev_prefix
+        else
+            project_name = projname(project_file)
+            if project_name !== nothing
+                root = find_root_base_project(project_file)
+                rootname = projname(root)
+                if root !== project_file
+                    path_prefix = "/" * dirname(relative_project_path(root, project_file))
+                else
+                    path_prefix = ""
+                end
+                if textwidth(rootname) > 30
+                    rootname = first(rootname, 27) * "..."
+                end
+                prefix = "($(rootname)$(path_prefix)) "
+                prev_prefix = prefix
+                prev_project_timestamp = mtime(project_file)
+                prev_project_file = project_file
+            end
+        end
+    end
+    # Note no handling of Pkg.offline, as the Pkg version does here
+    return "$(prefix)$(PKG_PROMPT)"
+end
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index 1328a87f1a77d..0c31315e9bea1 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -17,8 +17,33 @@ module REPL
 Base.Experimental.@optlevel 1
 Base.Experimental.@max_methods 1
 
-using Base.Meta, Sockets
+function UndefVarError_REPL_hint(io::IO, ex::UndefVarError)
+    var = ex.var
+    if var === :or
+        print(io, "\nSuggestion: Use `||` for short-circuiting boolean OR.")
+    elseif var === :and
+        print(io, "\nSuggestion: Use `&&` for short-circuiting boolean AND.")
+    elseif var === :help
+        println(io)
+        # Show friendly help message when user types help or help() and help is undefined
+        show(io, MIME("text/plain"), Base.Docs.parsedoc(Base.Docs.keywords[:help]))
+    elseif var === :quit
+        print(io, "\nSuggestion: To exit Julia, use Ctrl-D, or type exit() and press enter.")
+    end
+end
+
+function __init__()
+    Base.REPL_MODULE_REF[] = REPL
+    Base.Experimental.register_error_hint(UndefVarError_REPL_hint, UndefVarError)
+    return nothing
+end
+
+using Base.Meta, Sockets, StyledStrings
+using JuliaSyntaxHighlighting
+using Dates: now, UTC
 import InteractiveUtils
+import FileWatching
+import Base.JuliaSyntax: kind, @K_str, @KSet_str, Tokenize.tokenize
 
 export
     AbstractREPL,
@@ -26,6 +51,8 @@ export
     LineEditREPL,
     StreamREPL
 
+public TerminalMenus
+
 import Base:
     AbstractDisplay,
     display,
@@ -35,16 +62,19 @@ import Base:
 
 _displaysize(io::IO) = displaysize(io)::Tuple{Int,Int}
 
-include("Terminals.jl")
-using .Terminals
+using Base.Terminals
 
 abstract type AbstractREPL end
 
 include("options.jl")
+include("StylingPasses.jl")
+using .StylingPasses
+
+function histsearch end # To work around circular dependency
 
 include("LineEdit.jl")
 using .LineEdit
-import ..LineEdit:
+import .LineEdit:
     CompletionProvider,
     HistoryProvider,
     add_history,
@@ -56,20 +86,26 @@ import ..LineEdit:
     history_first,
     history_last,
     history_search,
-    accept_result,
     setmodifiers!,
     terminal,
     MIState,
     PromptState,
-    TextInterface,
     mode_idx
 
+include("SyntaxUtil.jl")
 include("REPLCompletions.jl")
 using .REPLCompletions
 
 include("TerminalMenus/TerminalMenus.jl")
 include("docview.jl")
 
+include("History/History.jl")
+using .History
+
+histsearch(args...) = runsearch(args...)
+
+include("Pkg_beforeload.jl")
+
 @nospecialize # use only declared type signatures
 
 answer_color(::AbstractREPL) = ""
@@ -96,6 +132,22 @@ mutable struct REPLBackend
 end
 REPLBackend() = REPLBackend(Channel(1), Channel(1), false)
 
+# A reference to a backend that is not mutable
+struct REPLBackendRef
+    repl_channel::Channel{Any}
+    response_channel::Channel{Any}
+end
+REPLBackendRef(backend::REPLBackend) = REPLBackendRef(backend.repl_channel, backend.response_channel)
+
+function destroy(ref::REPLBackendRef, state::Task)
+    if istaskfailed(state)
+        close(ref.repl_channel, TaskFailedException(state))
+        close(ref.response_channel, TaskFailedException(state))
+    end
+    close(ref.repl_channel)
+    close(ref.response_channel)
+end
+
 """
     softscope(ex)
 
@@ -123,7 +175,129 @@ end
 # Temporary alias until Documenter updates
 const softscope! = softscope
 
-const repl_ast_transforms = Any[softscope] # defaults for new REPL backends
+function print_qualified_access_warning(mod::Module, owner::Module, name::Symbol)
+    @warn string(name, " is defined in ", owner, " and is not public in ", mod) maxlog = 1 _id = string("repl-warning-", mod, "-", owner, "-", name) _line = nothing _file = nothing _module = nothing
+end
+
+function has_ancestor(query::Module, target::Module)
+    query == target && return true
+    while true
+        next = parentmodule(query)
+        next == target && return true
+        next == query && return false
+        query = next
+    end
+end
+
+retrieve_modules(::Module, ::Any) = (nothing,)
+function retrieve_modules(current_module::Module, mod_name::Symbol)
+    mod = try
+        getproperty(current_module, mod_name)
+    catch
+        return (nothing,)
+    end
+    return (mod isa Module ? mod : nothing,)
+end
+retrieve_modules(current_module::Module, mod_name::QuoteNode) = retrieve_modules(current_module, mod_name.value)
+function retrieve_modules(current_module::Module, mod_expr::Expr)
+    if Meta.isexpr(mod_expr, :., 2)
+        current_module = retrieve_modules(current_module, mod_expr.args[1])[1]
+        current_module === nothing && return (nothing,)
+        return (current_module, retrieve_modules(current_module, mod_expr.args[2])...)
+    else
+        return (nothing,)
+    end
+end
+
+add_locals!(locals, ast::Any) = nothing
+function add_locals!(locals, ast::Expr)
+    for arg in ast.args
+        add_locals!(locals, arg)
+    end
+    return nothing
+end
+function add_locals!(locals, ast::Symbol)
+    push!(locals, ast)
+    return nothing
+end
+
+function collect_names_to_warn!(warnings, locals, current_module::Module, ast)
+    ast isa Expr || return
+
+    # don't recurse through module definitions
+    ast.head === :module && return
+
+    if Meta.isexpr(ast, :., 2)
+        mod_name, name_being_accessed = ast.args
+        # retrieve the (possibly-nested) module being named here
+        mods = retrieve_modules(current_module, mod_name)
+        all(x -> x isa Module, mods) || return
+        outer_mod = first(mods)
+        mod = last(mods)
+        if name_being_accessed isa QuoteNode
+            name_being_accessed = name_being_accessed.value
+        end
+        name_being_accessed isa Symbol || return
+        owner = try
+            which(mod, name_being_accessed)
+        catch
+            return
+        end
+        # if `owner` is a submodule of `mod`, then don't warn. E.g. the name `parse` is present in the module `JSON`
+        # but is owned by `JSON.Parser`; we don't warn if it is accessed as `JSON.parse`.
+        has_ancestor(owner, mod) && return
+        # Don't warn if the name is public in the module we are accessing it
+        Base.ispublic(mod, name_being_accessed) && return
+        # Don't warn if accessing names defined in Core from Base if they are present in Base (e.g. `Base.throw`).
+        mod === Base && Base.ispublic(Core, name_being_accessed) && return
+        push!(warnings, (; outer_mod, mod, owner, name_being_accessed))
+        # no recursion
+        return
+    elseif Meta.isexpr(ast, :(=), 2)
+        lhs, rhs = ast.args
+        # any symbols we find on the LHS we will count as local. This can potentially be overzealous,
+        # but we want to avoid false positives (unnecessary warnings) more than false negatives.
+        add_locals!(locals, lhs)
+        # we'll recurse into the RHS only
+        return collect_names_to_warn!(warnings, locals, current_module, rhs)
+    elseif Meta.isexpr(ast, :function) && length(ast.args) >= 1
+
+        if Meta.isexpr(ast.args[1], :call, 2)
+            func_name, func_args = ast.args[1].args
+            # here we have a function definition and are inspecting it's arguments for local variables.
+            # we will error on the conservative side by adding all symbols we find (regardless if they are local variables or possibly-global default values)
+            add_locals!(locals, func_args)
+        end
+        # fall through to general recursion
+    end
+
+    for arg in ast.args
+        collect_names_to_warn!(warnings, locals, current_module, arg)
+    end
+
+    return nothing
+end
+
+function collect_qualified_access_warnings(current_mod, ast)
+    warnings = Set()
+    locals = Set{Symbol}()
+    collect_names_to_warn!(warnings, locals, current_mod, ast)
+    filter!(warnings) do (; outer_mod)
+        nameof(outer_mod) ∉ locals
+    end
+    return warnings
+end
+
+function warn_on_non_owning_accesses(current_mod, ast)
+    warnings = collect_qualified_access_warnings(current_mod, ast)
+    for (; outer_mod, mod, owner, name_being_accessed) in warnings
+        print_qualified_access_warning(mod, owner, name_being_accessed)
+    end
+    return ast
+end
+warn_on_non_owning_accesses(ast) = warn_on_non_owning_accesses(Base.active_module(), ast)
+
+const repl_ast_transforms = Any[softscope, warn_on_non_owning_accesses] # defaults for new REPL backends
 
 # Allows an external package to add hooks into the code loading.
 # The hook should take a Vector{Symbol} of package names and
@@ -131,6 +305,27 @@ const repl_ast_transforms = Any[softscope] # defaults for new REPL backends
 # to e.g. install packages on demand
 const install_packages_hooks = Any[]
 
+# N.B.: Any functions starting with __repl_entry cut off backtraces when printing in the REPL.
+# We need to do this for both the actual eval and macroexpand, since the latter can cause custom macro
+# code to run (and error).
+__repl_entry_lower_with_loc(mod::Module, @nospecialize(ast), toplevel_file::Ref{Ptr{UInt8}}, toplevel_line::Ref{Csize_t}) =
+    Core._lower(ast, mod, toplevel_file[], toplevel_line[])[1]
+__repl_entry_eval_expanded_with_loc(mod::Module, @nospecialize(ast), toplevel_file::Ref{Ptr{UInt8}}, toplevel_line::Ref{Csize_t}) =
+    ccall(:jl_toplevel_eval_flex, Any, (Any, Any, Cint, Cint, Ptr{Ptr{UInt8}}, Ptr{Csize_t}), mod, ast, 1, 1, toplevel_file, toplevel_line)
+
+function toplevel_eval_with_hooks(mod::Module, @nospecialize(ast), toplevel_file=Ref{Ptr{UInt8}}(Base.unsafe_convert(Ptr{UInt8}, :REPL)), toplevel_line=Ref{Csize_t}(1))
+    if !isexpr(ast, :toplevel)
+        ast = invokelatest(__repl_entry_lower_with_loc, mod, ast, toplevel_file, toplevel_line)
+        check_for_missing_packages_and_run_hooks(ast)
+        return invokelatest(__repl_entry_eval_expanded_with_loc, mod, ast, toplevel_file, toplevel_line)
+    end
+    local value=nothing
+    for i = 1:length(ast.args)
+        value = toplevel_eval_with_hooks(mod, ast.args[i], toplevel_file, toplevel_line)
+    end
+    return value
+end
+
 function eval_user_input(@nospecialize(ast), backend::REPLBackend, mod::Module)
     lasterr = nothing
     Base.sigatomic_begin()
@@ -141,13 +336,10 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend, mod::Module)
                 put!(backend.response_channel, Pair{Any, Bool}(lasterr, true))
             else
                 backend.in_eval = true
-                if !isempty(install_packages_hooks)
-                    check_for_missing_packages_and_run_hooks(ast)
-                end
                 for xf in backend.ast_transforms
                     ast = Base.invokelatest(xf, ast)
                 end
-                value = Core.eval(mod, ast)
+                value = toplevel_eval_with_hooks(mod, ast)
                 backend.in_eval = false
                 setglobal!(Base.MainInclude, :ans, value)
                 put!(backend.response_channel, Pair{Any, Bool}(value, false))
@@ -170,33 +362,54 @@ function check_for_missing_packages_and_run_hooks(ast)
     mods = modules_to_be_loaded(ast)
     filter!(mod -> isnothing(Base.identify_package(String(mod))), mods) # keep missing modules
     if !isempty(mods)
+        isempty(install_packages_hooks) && load_pkg()
         for f in install_packages_hooks
             Base.invokelatest(f, mods) && return
         end
     end
 end
 
-function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
+function _modules_to_be_loaded!(ast::Expr, mods::Vector{Symbol})
+    function add!(ctx)
+        if ctx.head == :as
+            ctx = ctx.args[1]
+        end
+        if ctx.args[1] != :. # don't include local import `import .Foo`
+            push!(mods, ctx.args[1])
+        end
+    end
     ast.head === :quote && return mods # don't search if it's not going to be run during this eval
-    if ast.head === :using || ast.head === :import
-        for arg in ast.args
-            arg = arg::Expr
-            arg1 = first(arg.args)
-            if arg1 isa Symbol # i.e. `Foo`
-                if arg1 != :. # don't include local imports
-                    push!(mods, arg1)
-                end
-            else # i.e. `Foo: bar`
-                push!(mods, first((arg1::Expr).args))
+    if ast.head == :call
+        if length(ast.args) == 5 && ast.args[1] === GlobalRef(Base, :_eval_import)
+            ctx = ast.args[4]
+            if ctx isa QuoteNode # i.e. `Foo: bar`
+                ctx = ctx.value
+            else
+                ctx = ast.args[5].value
             end
+            add!(ctx)
+        elseif length(ast.args) == 3 && ast.args[1] == GlobalRef(Base, :_eval_using)
+            add!(ast.args[3].value)
         end
     end
-    for arg in ast.args
-        if isexpr(arg, (:block, :if, :using, :import))
-            modules_to_be_loaded(arg, mods)
+    if ast.head !== :thunk
+        for arg in ast.args
+            if isexpr(arg, (:block, :if))
+                _modules_to_be_loaded!(arg, mods)
+            end
+        end
+    else
+        code = ast.args[1]
+        for arg in code.code
+            isa(arg, Expr) || continue
+            _modules_to_be_loaded!(arg, mods)
         end
     end
-    filter!(mod -> !in(String(mod), ["Base", "Main", "Core"]), mods) # Exclude special non-package modules
+end
+
+function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
+    _modules_to_be_loaded!(ast, mods)
+    filter!(mod::Symbol -> !in(mod, (:Base, :Main, :Core)), mods) # Exclude special non-package modules
     return unique(mods)
 end
 
@@ -238,26 +451,97 @@ function repl_backend_loop(backend::REPLBackend, get_module::Function)
     while true
         tls = task_local_storage()
         tls[:SOURCE_PATH] = nothing
-        ast, show_value = take!(backend.repl_channel)
+        ast_or_func, show_value = take!(backend.repl_channel)
         if show_value == -1
             # exit flag
             break
         end
-        eval_user_input(ast, backend, get_module())
+        if show_value == 2 # 2 indicates a function to be called
+            f = ast_or_func
+            try
+                ret = f()
+                put!(backend.response_channel, Pair{Any, Bool}(ret, false))
+            catch
+                put!(backend.response_channel, Pair{Any, Bool}(current_exceptions(), true))
+            end
+        else
+            ast = ast_or_func
+            eval_user_input(ast, backend, get_module())
+        end
     end
     return nothing
 end
 
-struct REPLDisplay{R<:AbstractREPL} <: AbstractDisplay
-    repl::R
+SHOW_MAXIMUM_BYTES::Int = 1_048_576
+
+# Limit printing during REPL display
+mutable struct LimitIO{IO_t <: IO} <: IO
+    io::IO_t
+    maxbytes::Int
+    n::Int # max bytes to write
 end
+LimitIO(io::IO, maxbytes) = LimitIO(io, maxbytes, 0)
 
-==(a::REPLDisplay, b::REPLDisplay) = a.repl === b.repl
+struct LimitIOException <: Exception
+    maxbytes::Int
+end
+
+function Base.showerror(io::IO, e::LimitIOException)
+    print(io, "$LimitIOException: aborted printing after attempting to print more than $(Base.format_bytes(e.maxbytes)) within a `LimitIO`.")
+end
+
+Base.displaysize(io::LimitIO) = _displaysize(io.io)
+
+function Base.write(io::LimitIO, v::UInt8)
+    io.n > io.maxbytes && throw(LimitIOException(io.maxbytes))
+    n_bytes = write(io.io, v)
+    io.n += n_bytes
+    return n_bytes
+end
+
+# Semantically, we only need to override `Base.write`, but we also
+# override `unsafe_write` for performance.
+function Base.unsafe_write(limiter::LimitIO, p::Ptr{UInt8}, nb::UInt)
+    # already exceeded? throw
+    limiter.n > limiter.maxbytes && throw(LimitIOException(limiter.maxbytes))
+    remaining = limiter.maxbytes - limiter.n # >= 0
+
+    # Not enough bytes left; we will print up to the limit, then throw
+    if remaining < nb
+        if remaining > 0
+            Base.unsafe_write(limiter.io, p, remaining)
+        end
+        throw(LimitIOException(limiter.maxbytes))
+    end
+
+    # We won't hit the limit so we'll write the full `nb` bytes
+    bytes_written = Base.unsafe_write(limiter.io, p, nb)::Union{Int,UInt}
+    limiter.n += bytes_written
+    return bytes_written
+end
+
+struct REPLDisplay{Repl<:AbstractREPL} <: AbstractDisplay
+    repl::Repl
+end
+
+function show_limited(io::IO, mime::MIME, x)
+    try
+        # We wrap in a LimitIO to limit the amount of printing.
+        # We unpack `IOContext`s, since we will pass the properties on the outside.
+        inner = io isa IOContext ? io.io : io
+        wrapped_limiter = IOContext(LimitIO(inner, SHOW_MAXIMUM_BYTES), io)
+        # `show_repl` to allow the hook with special syntax highlighting
+        show_repl(wrapped_limiter, mime, x)
+    catch e
+        e isa LimitIOException || rethrow()
+        printstyled(io, """…[printing stopped after displaying $(Base.format_bytes(e.maxbytes)); call `show(stdout, MIME"text/plain"(), ans)` to print without truncation]"""; color=:light_yellow, bold=true)
+    end
+end
 
 function display(d::REPLDisplay, mime::MIME"text/plain", x)
     x = Ref{Any}(x)
     with_repl_linfo(d.repl) do io
-        io = IOContext(io, :limit => true, :module => active_module(d)::Module)
+        io = IOContext(io, :limit => true, :module => Base.active_module(d)::Module)
         if d.repl isa LineEditREPL
             mistate = d.repl.mistate
             mode = LineEdit.mode(mistate)
@@ -270,86 +554,116 @@ function display(d::REPLDisplay, mime::MIME"text/plain", x)
             # this can override the :limit property set initially
             io = foldl(IOContext, d.repl.options.iocontext, init=io)
         end
-        show(io, mime, x[])
+        show_limited(io, mime, x[])
         println(io)
     end
     return nothing
 end
+
 display(d::REPLDisplay, x) = display(d, MIME("text/plain"), x)
 
+show_repl(io::IO, mime::MIME"text/plain", x) = show(io, mime, x)
+
+function show_repl(io::IO, mime::MIME"text/plain", c::AbstractChar)
+    show(io, mime, c) # Call the original Base.show
+    # Check for LaTeX/emoji alias and print if found and using symbol_latex which is used in help?> mode
+    latex = symbol_latex(string(c))
+    if !isempty(latex)
+        print(io, ", input as ")
+        printstyled(io, latex, "<tab>"; color=:cyan)
+    end
+end
+
+show_repl(io::IO, ::MIME"text/plain", ex::Expr) =
+    print(io, JuliaSyntaxHighlighting.highlight(
+        sprint(show, ex, context=IOContext(io, :color => false))))
+
 function print_response(repl::AbstractREPL, response, show_value::Bool, have_color::Bool)
     repl.waserror = response[2]
     with_repl_linfo(repl) do io
-        io = IOContext(io, :module => active_module(repl)::Module)
-        print_response(io, response, show_value, have_color, specialdisplay(repl))
+        io = IOContext(io, :module => Base.active_module(repl)::Module)
+        print_response(io, response, backend(repl), show_value, have_color, specialdisplay(repl))
     end
     return nothing
 end
-function print_response(errio::IO, response, show_value::Bool, have_color::Bool, specialdisplay::Union{AbstractDisplay,Nothing}=nothing)
+
+# N.B.: Any functions starting with __repl_entry cut off backtraces when printing in the REPL.
+__repl_entry_display(val) = Base.invokelatest(display, val)
+__repl_entry_display(specialdisplay::Union{AbstractDisplay,Nothing}, val) = Base.invokelatest(display, specialdisplay, val)
+
+function __repl_entry_display_error(errio::IO, @nospecialize errval)
+    # this will be set to true if types in the stacktrace are truncated
+    limitflag = Ref(false)
+    errio = IOContext(errio, :stacktrace_types_limited => limitflag)
+    Base.invokelatest(Base.display_error, errio, errval)
+    if limitflag[]
+        print(errio, "Some type information was truncated. Use `show(err)` to see complete types.")
+        println(errio)
+    end
+    return nothing
+end
+
+function print_response(errio::IO, response, backend::Union{REPLBackendRef,Nothing}, show_value::Bool, have_color::Bool, specialdisplay::Union{AbstractDisplay,Nothing}=nothing)
     Base.sigatomic_begin()
     val, iserr = response
-    while true
+    if !iserr
+        # display result
         try
-            Base.sigatomic_end()
-            if iserr
-                val = Base.scrub_repl_backtrace(val)
-                Base.istrivialerror(val) || setglobal!(Base.MainInclude, :err, val)
-                Base.invokelatest(Base.display_error, errio, val)
-            else
-                if val !== nothing && show_value
-                    try
-                        if specialdisplay === nothing
-                            Base.invokelatest(display, val)
-                        else
-                            Base.invokelatest(display, specialdisplay, val)
-                        end
-                    catch
-                        println(errio, "Error showing value of type ", typeof(val), ":")
-                        rethrow()
+            if val !== nothing && show_value
+                Base.sigatomic_end() # allow display to be interrupted
+                val2, iserr = if specialdisplay === nothing
+                    # display calls may require being run on the main thread
+                    call_on_backend(backend) do
+                        __repl_entry_display(val)
+                    end
+                else
+                    call_on_backend(backend) do
+                        __repl_entry_display(specialdisplay, val)
                     end
                 end
-            end
-            break
-        catch ex
-            if iserr
-                println(errio) # an error during printing is likely to leave us mid-line
-                println(errio, "SYSTEM (REPL): showing an error caused an error")
-                try
-                    excs = Base.scrub_repl_backtrace(current_exceptions())
-                    setglobal!(Base.MainInclude, :err, excs)
-                    Base.invokelatest(Base.display_error, errio, excs)
-                catch e
-                    # at this point, only print the name of the type as a Symbol to
-                    # minimize the possibility of further errors.
+                Base.sigatomic_begin()
+                if iserr
                     println(errio)
-                    println(errio, "SYSTEM (REPL): caught exception of type ", typeof(e).name.name,
-                            " while trying to handle a nested exception; giving up")
+                    println(errio, "Error showing value of type ", typeof(val), ":")
+                    val = val2
                 end
-                break
             end
+        catch ex
+            println(errio)
+            println(errio, "SYSTEM (REPL): showing a value caused an error")
             val = current_exceptions()
             iserr = true
         end
     end
+    if iserr
+        # print error
+        iserr = false
+        while true
+            try
+                Base.sigatomic_end() # allow stacktrace printing to be interrupted
+                val = Base.scrub_repl_backtrace(val)
+                Base.istrivialerror(val) || setglobal!(Base.MainInclude, :err, val)
+                __repl_entry_display_error(errio, val)
+                break
+            catch ex
+                println(errio) # an error during printing is likely to leave us mid-line
+                if !iserr
+                    println(errio, "SYSTEM (REPL): showing an error caused an error")
+                    val = current_exceptions()
+                    iserr = true
+                else
+                    println(errio, "SYSTEM (REPL): caught exception of type ", typeof(ex).name.name,
+                        " while trying to print an exception; giving up")
+                    break
+                end
+            end
+        end
+    end
     Base.sigatomic_end()
     nothing
 end
 
-# A reference to a backend that is not mutable
-struct REPLBackendRef
-    repl_channel::Channel{Any}
-    response_channel::Channel{Any}
-end
-REPLBackendRef(backend::REPLBackend) = REPLBackendRef(backend.repl_channel, backend.response_channel)
 
-function destroy(ref::REPLBackendRef, state::Task)
-    if istaskfailed(state)
-        close(ref.repl_channel, TaskFailedException(state))
-        close(ref.response_channel, TaskFailedException(state))
-    end
-    close(ref.repl_channel)
-    close(ref.response_channel)
-end
 
 """
     run_repl(repl::AbstractREPL)
@@ -368,7 +682,7 @@ function run_repl(repl::AbstractREPL, @nospecialize(consumer = x -> nothing); ba
             Core.println(Core.stderr, e)
             Core.println(Core.stderr, catch_backtrace())
         end
-    get_module = () -> active_module(repl)
+    get_module = () -> Base.active_module(repl)
     if backend_on_current_task
         t = @async run_frontend(repl, backend_ref)
         errormonitor(t)
@@ -426,11 +740,11 @@ function run_frontend(repl::BasicREPL, backend::REPLBackendRef)
                     rethrow()
                 end
             end
-            ast = Base.parse_input_line(line)
+            ast = Base.parse_input_line(line; mod=Base.active_module(repl))
             (isa(ast,Expr) && ast.head === :incomplete) || break
         end
         if !isempty(line)
-            response = eval_with_backend(ast, backend)
+            response = eval_on_backend(ast, backend)
             print_response(repl, response, !ends_with_semicolon(line), false)
         end
         write(repl.terminal, '\n')
@@ -452,6 +766,7 @@ mutable struct LineEditREPL <: AbstractREPL
     answer_color::String
     shell_color::String
     help_color::String
+    pkg_color::String
     history_file::Bool
     in_shell::Bool
     in_help::Bool
@@ -464,13 +779,13 @@ mutable struct LineEditREPL <: AbstractREPL
     interface::ModalInterface
     backendref::REPLBackendRef
     frontend_task::Task
-    function LineEditREPL(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,history_file,in_shell,in_help,envcolors)
+    function LineEditREPL(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,pkg_color,history_file,in_shell,in_help,envcolors)
         opts = Options()
         opts.hascolor = hascolor
         if !hascolor
             opts.beep_colors = [""]
         end
-        new(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,history_file,in_shell,
+        new(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,pkg_color,history_file,in_shell,
             in_help,envcolors,false,nothing, opts, nothing, Tuple{String,Int}[])
     end
 end
@@ -487,6 +802,7 @@ LineEditREPL(t::TextTerminal, hascolor::Bool, envcolors::Bool=false) =
         hascolor ? Base.answer_color() : "",
         hascolor ? Base.text_colors[:red] : "",
         hascolor ? Base.text_colors[:yellow] : "",
+        hascolor ? Base.text_colors[:blue] : "",
         false, false, false, envcolors
     )
 
@@ -498,13 +814,12 @@ REPLCompletionProvider() = REPLCompletionProvider(LineEdit.Modifiers())
 mutable struct ShellCompletionProvider <: CompletionProvider end
 struct LatexCompletions <: CompletionProvider end
 
-function active_module() # this method is also called from Base
-    isdefined(Base, :active_repl) || return Main
-    return active_module(Base.active_repl::AbstractREPL)
-end
-active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : mistate.active_module
-active_module(::AbstractREPL) = Main
-active_module(d::REPLDisplay) = active_module(d.repl)
+Base.active_module(mistate::MIState) = mistate.active_module
+Base.active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : Base.active_module(mistate)
+Base.active_module(::AbstractREPL) = Main
+Base.active_module(d::REPLDisplay) = Base.active_module(d.repl)
+
+setmodifiers!(c::CompletionProvider, m::LineEdit.Modifiers) = nothing
 
 setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers = m
 
@@ -514,37 +829,39 @@ setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers =
 Set `mod` as the default contextual module in the REPL,
 both for evaluating expressions and printing them.
 """
-function activate(mod::Module=Main)
+function activate(mod::Module=Main; interactive_utils::Bool=true)
     mistate = (Base.active_repl::LineEditREPL).mistate
     mistate === nothing && return nothing
     mistate.active_module = mod
-    Base.load_InteractiveUtils(mod)
+    interactive_utils && Base.load_InteractiveUtils(mod)
     return nothing
 end
 
 beforecursor(buf::IOBuffer) = String(buf.data[1:buf.ptr-1])
 
-function complete_line(c::REPLCompletionProvider, s::PromptState, mod::Module)
-    partial = beforecursor(s.input_buffer)
+# Convert inclusive-inclusive 1-based char indexing to inclusive-exclusive byte Region.
+to_region(s, r) = first(r)-1 => (length(r) > 0 ? nextind(s, last(r))-1 : first(r)-1)
+
+function complete_line(c::REPLCompletionProvider, s::PromptState, mod::Module; hint::Bool=false)
     full = LineEdit.input_string(s)
-    ret, range, should_complete = completions(full, lastindex(partial), mod, c.modifiers.shift)
+    ret, range, should_complete = completions(full, thisind(full, position(s)), mod, c.modifiers.shift, hint)
+    range = to_region(full, range)
     c.modifiers = LineEdit.Modifiers()
-    return unique!(map(completion_text, ret)), partial[range], should_complete
+    return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), range, should_complete
 end
 
-function complete_line(c::ShellCompletionProvider, s::PromptState)
-    # First parse everything up to the current position
-    partial = beforecursor(s.input_buffer)
+function complete_line(c::ShellCompletionProvider, s::PromptState; hint::Bool=false)
     full = LineEdit.input_string(s)
-    ret, range, should_complete = shell_completions(full, lastindex(partial))
-    return unique!(map(completion_text, ret)), partial[range], should_complete
+    ret, range, should_complete = shell_completions(full, thisind(full, position(s)), hint)
+    range = to_region(full, range)
+    return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), range, should_complete
 end
 
-function complete_line(c::LatexCompletions, s)
-    partial = beforecursor(LineEdit.buffer(s))
+function complete_line(c::LatexCompletions, s; hint::Bool=false)
     full = LineEdit.input_string(s)::String
-    ret, range, should_complete = bslash_completions(full, lastindex(partial))[2]
-    return unique!(map(completion_text, ret)), partial[range], should_complete
+    ret, range, should_complete = bslash_completions(full, thisind(full, position(s)), hint)[2]
+    range = to_region(full, range)
+    return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), range, should_complete
 end
 
 with_repl_linfo(f, repl) = f(outstream(repl))
@@ -559,107 +876,26 @@ function with_repl_linfo(f, repl::LineEditREPL)
 end
 
 mutable struct REPLHistoryProvider <: HistoryProvider
-    history::Vector{String}
-    file_path::String
-    history_file::Union{Nothing,IO}
+    history::HistoryFile
     start_idx::Int
     cur_idx::Int
     last_idx::Int
     last_buffer::IOBuffer
     last_mode::Union{Nothing,Prompt}
     mode_mapping::Dict{Symbol,Prompt}
-    modes::Vector{Symbol}
 end
 REPLHistoryProvider(mode_mapping::Dict{Symbol}) =
-    REPLHistoryProvider(String[], "", nothing, 0, 0, -1, IOBuffer(),
-                        nothing, mode_mapping, UInt8[])
-
-invalid_history_message(path::String) = """
-Invalid history file ($path) format:
-If you have a history file left over from an older version of Julia,
-try renaming or deleting it.
-Invalid character: """
-
-munged_history_message(path::String) = """
-Invalid history file ($path) format:
-An editor may have converted tabs to spaces at line """
-
-function hist_open_file(hp::REPLHistoryProvider)
-    f = open(hp.file_path, read=true, write=true, create=true)
-    hp.history_file = f
-    seekend(f)
-end
-
-function hist_from_file(hp::REPLHistoryProvider, path::String)
-    getline(lines, i) = i > length(lines) ? "" : lines[i]
-    file_lines = readlines(path)
-    countlines = 0
-    while true
-        # First parse the metadata that starts with '#' in particular the REPL mode
-        countlines += 1
-        line = getline(file_lines, countlines)
-        mode = :julia
-        isempty(line) && break
-        line[1] != '#' &&
-            error(invalid_history_message(path), repr(line[1]), " at line ", countlines)
-        while !isempty(line)
-            startswith(line, '#') || break
-            if startswith(line, "# mode: ")
-                mode = Symbol(SubString(line, 9))
-            end
-            countlines += 1
-            line = getline(file_lines, countlines)
-        end
-        isempty(line) && break
-
-        # Now parse the code for the current REPL mode
-        line[1] == ' '  &&
-            error(munged_history_message(path), countlines)
-        line[1] != '\t' &&
-            error(invalid_history_message(path), repr(line[1]), " at line ", countlines)
-        lines = String[]
-        while !isempty(line)
-            push!(lines, chomp(SubString(line, 2)))
-            next_line = getline(file_lines, countlines+1)
-            isempty(next_line) && break
-            first(next_line) == ' '  && error(munged_history_message(path), countlines)
-            # A line not starting with a tab means we are done with code for this entry
-            first(next_line) != '\t' && break
-            countlines += 1
-            line = getline(file_lines, countlines)
-        end
-        push!(hp.modes, mode)
-        push!(hp.history, join(lines, '\n'))
-    end
-    hp.start_idx = length(hp.history)
-    return hp
-end
+    REPLHistoryProvider(HistoryFile(), 0, 0, -1, IOBuffer(),
+                        nothing, mode_mapping)
 
 function add_history(hist::REPLHistoryProvider, s::PromptState)
-    str = rstrip(String(take!(copy(s.input_buffer))))
+    str = rstrip(takestring!(copy(s.input_buffer)))
     isempty(strip(str)) && return
     mode = mode_idx(hist, LineEdit.mode(s))
-    !isempty(hist.history) &&
-        isequal(mode, hist.modes[end]) && str == hist.history[end] && return
-    push!(hist.modes, mode)
-    push!(hist.history, str)
-    hist.history_file === nothing && return
-    entry = """
-    # time: $(Libc.strftime("%Y-%m-%d %H:%M:%S %Z", time()))
-    # mode: $mode
-    $(replace(str, r"^"ms => "\t"))
-    """
-    # TODO: write-lock history file
-    try
-        seekend(hist.history_file)
-    catch err
-        (err isa SystemError) || rethrow()
-        # File handle might get stale after a while, especially under network file systems
-        # If this doesn't fix it (e.g. when file is deleted), we'll end up rethrowing anyway
-        hist_open_file(hist)
-    end
-    print(hist.history_file, entry)
-    flush(hist.history_file)
+    !isempty(hist.history) && isequal(mode, hist.history[end].mode) &&
+        str == hist.history[end].content && return
+    entry = HistEntry(mode, now(UTC), str, 0)
+    push!(hist.history, entry)
     nothing
 end
 
@@ -674,8 +910,15 @@ function history_move(s::Union{LineEdit.MIState,LineEdit.PrefixSearchState}, his
         hist.last_mode = LineEdit.mode(s)
         hist.last_buffer = copy(LineEdit.buffer(s))
     else
-        hist.history[save_idx] = LineEdit.input_string(s)
-        hist.modes[save_idx] = mode_idx(hist, LineEdit.mode(s))
+        # NOTE: Modifying the history is a bit funky, so
+        # we reach into the internals of `HistoryFile`
+        # to do so rather than implementing `setindex!`.
+        oldrec = hist.history.records[save_idx]
+        hist.history.records[save_idx] = HistEntry(
+            mode_idx(hist, LineEdit.mode(s)),
+            oldrec.date,
+            LineEdit.input_string(s),
+            oldrec.index)
     end
 
     # load the saved line
@@ -687,9 +930,9 @@ function history_move(s::Union{LineEdit.MIState,LineEdit.PrefixSearchState}, his
         hist.last_mode = nothing
         hist.last_buffer = IOBuffer()
     else
-        if haskey(hist.mode_mapping, hist.modes[idx])
-            LineEdit.transition(s, hist.mode_mapping[hist.modes[idx]]) do
-                LineEdit.replace_line(s, hist.history[idx])
+        if haskey(hist.mode_mapping, hist.history[idx].mode)
+            LineEdit.transition(s, hist.mode_mapping[hist.history[idx].mode]) do
+                LineEdit.replace_line(s, hist.history[idx].content)
             end
         else
             return :skip
@@ -702,12 +945,21 @@ end
 
 # REPL History can also transitions modes
 function LineEdit.accept_result_newmode(hist::REPLHistoryProvider)
-    if 1 <= hist.cur_idx <= length(hist.modes)
-        return hist.mode_mapping[hist.modes[hist.cur_idx]]
+    if 1 <= hist.cur_idx <= length(hist.history)
+        return hist.mode_mapping[hist.history[hist.cur_idx].mode]
     end
     return nothing
 end
 
+function history_do_initialize(hist::REPLHistoryProvider)
+    isempty(hist.history) || return false
+    update!(hist.history)
+    hist.start_idx = length(hist.history) + 1
+    hist.cur_idx = hist.start_idx
+    hist.last_idx = -1
+    true
+end
+
 function history_prev(s::LineEdit.MIState, hist::REPLHistoryProvider,
                       num::Int=1, save_idx::Int = hist.cur_idx)
     num <= 0 && return history_next(s, hist, -num, save_idx)
@@ -733,6 +985,7 @@ function history_next(s::LineEdit.MIState, hist::REPLHistoryProvider,
         return
     end
     num < 0 && return history_prev(s, hist, -num, save_idx)
+    history_do_initialize(hist)
     cur_idx = hist.cur_idx
     max_idx = length(hist.history) + 1
     if cur_idx == max_idx && 0 < hist.last_idx
@@ -756,14 +1009,17 @@ history_first(s::LineEdit.MIState, hist::REPLHistoryProvider) =
                  (hist.cur_idx > hist.start_idx+1 ? hist.start_idx : 0))
 
 history_last(s::LineEdit.MIState, hist::REPLHistoryProvider) =
-    history_next(s, hist, length(hist.history) - hist.cur_idx + 1)
+    history_next(s, hist, length(update!(hist.history)) - hist.cur_idx + 1)
 
 function history_move_prefix(s::LineEdit.PrefixSearchState,
                              hist::REPLHistoryProvider,
                              prefix::AbstractString,
                              backwards::Bool,
                              cur_idx::Int = hist.cur_idx)
-    cur_response = String(take!(copy(LineEdit.buffer(s))))
+    if history_do_initialize(hist)
+        cur_idx = hist.cur_idx
+    end
+    cur_response = takestring!(copy(LineEdit.buffer(s)))
     # when searching forward, start at last_idx
     if !backwards && hist.last_idx > 0
         cur_idx = hist.last_idx
@@ -772,7 +1028,7 @@ function history_move_prefix(s::LineEdit.PrefixSearchState,
     max_idx = length(hist.history)+1
     idxs = backwards ? ((cur_idx-1):-1:1) : ((cur_idx+1):1:max_idx)
     for idx in idxs
-        if (idx == max_idx) || (startswith(hist.history[idx], prefix) && (hist.history[idx] != cur_response || get(hist.mode_mapping, hist.modes[idx], nothing) !== LineEdit.mode(s)))
+        if (idx == max_idx) || (startswith(hist.history[idx].content, prefix) && (hist.history[idx].content != cur_response || get(hist.mode_mapping, hist.history[idx].mode, nothing) !== LineEdit.mode(s)))
             m = history_move(s, hist, idx)
             if m === :ok
                 if idx == max_idx
@@ -805,7 +1061,7 @@ function history_search(hist::REPLHistoryProvider, query_buffer::IOBuffer, respo
     qpos = position(query_buffer)
     qpos > 0 || return true
     searchdata = beforecursor(query_buffer)
-    response_str = String(take!(copy(response_buffer)))
+    response_str = takestring!(copy(response_buffer))
 
     # Alright, first try to see if the current match still works
     a = position(response_buffer) + 1 # position is zero-indexed
@@ -838,9 +1094,9 @@ function history_search(hist::REPLHistoryProvider, query_buffer::IOBuffer, respo
     # Now search all the other buffers
     idxs = backwards ? ((hist.cur_idx-1):-1:1) : ((hist.cur_idx+1):1:length(hist.history))
     for idx in idxs
-        h = hist.history[idx]
+        h = hist.history[idx].content
         match = backwards ? findlast(searchdata, h) : findfirst(searchdata, h)
-        if match !== nothing && h != response_str && haskey(hist.mode_mapping, hist.modes[idx])
+        if match !== nothing && h != response_str && haskey(hist.mode_mapping, hist.history[idx].mode)
             truncate(response_buffer, 0)
             write(response_buffer, h)
             seek(response_buffer, first(match) - 1)
@@ -862,20 +1118,37 @@ end
 LineEdit.reset_state(hist::REPLHistoryProvider) = history_reset_state(hist)
 
 function return_callback(s)
-    ast = Base.parse_input_line(String(take!(copy(LineEdit.buffer(s)))), depwarn=false)
+    ast = Base.parse_input_line(takestring!(copy(LineEdit.buffer(s))); mod=Base.active_module(s), depwarn=false)
     return !(isa(ast, Expr) && ast.head === :incomplete)
 end
 
 find_hist_file() = get(ENV, "JULIA_HISTORY",
                        !isempty(DEPOT_PATH) ? joinpath(DEPOT_PATH[1], "logs", "repl_history.jl") :
-                       error("DEPOT_PATH is empty and and ENV[\"JULIA_HISTORY\"] not set."))
+                       error("DEPOT_PATH is empty and ENV[\"JULIA_HISTORY\"] not set."))
+
+backend(r::AbstractREPL) = hasproperty(r, :backendref) && isdefined(r, :backendref) ? r.backendref : nothing
 
-backend(r::AbstractREPL) = r.backendref
 
-function eval_with_backend(ast, backend::REPLBackendRef)
-    put!(backend.repl_channel, (ast, 1))
+function eval_on_backend(ast, backend::REPLBackendRef)
+    put!(backend.repl_channel, (ast, 1)) # (f, show_value)
+    return take!(backend.response_channel) # (val, iserr)
+end
+function call_on_backend(f, backend::REPLBackendRef)
+    applicable(f) || error("internal error: f is not callable")
+    put!(backend.repl_channel, (f, 2)) # (f, show_value) 2 indicates function (rather than ast)
     return take!(backend.response_channel) # (val, iserr)
 end
+# if no backend just eval (used by tests)
+eval_on_backend(ast, backend::Nothing) = error("no backend for eval ast")
+function call_on_backend(f, backend::Nothing)
+    try
+        ret = f()
+        return (ret, false) # (val, iserr)
+    catch
+        return (current_exceptions(), true)
+    end
+end
+
 
 function respond(f, repl, main; pass_empty::Bool = false, suppress_on_semicolon::Bool = true)
     return function do_respond(s::MIState, buf, ok::Bool)
@@ -888,7 +1161,7 @@ function respond(f, repl, main; pass_empty::Bool = false, suppress_on_semicolon:
             local response
             try
                 ast = Base.invokelatest(f, line)
-                response = eval_with_backend(ast, backend(repl))
+                response = eval_on_backend(ast, backend(repl))
             catch
                 response = Pair{Any, Bool}(current_exceptions(), true)
             end
@@ -920,6 +1193,10 @@ function mode_keymap(julia_prompt::Prompt)
                 LineEdit.state(s, julia_prompt).input_buffer = buf
             end
         else
+            buf = LineEdit.buffer(s)
+            if LineEdit.try_remove_paired_delimiter(buf)
+                return LineEdit.refresh_line(s)
+            end
             LineEdit.edit_backspace(s)
         end
     end,
@@ -941,7 +1218,7 @@ enable_promptpaste(v::Bool) = JL_PROMPT_PASTE[] = v
 
 function contextual_prompt(repl::LineEditREPL, prompt::Union{String,Function})
     function ()
-        mod = active_module(repl)
+        mod = Base.active_module(repl)
         prefix = mod == Main ? "" : string('(', mod, ") ")
         pr = prompt isa String ? prompt : prompt()
         prefix * pr
@@ -955,6 +1232,7 @@ setup_interface(
     extra_repl_keymap::Any = repl.options.extra_keymap
 ) = setup_interface(repl, hascolor, extra_repl_keymap)
 
+
 # This non keyword method can be precompiled which is important
 function setup_interface(
     repl::LineEditREPL,
@@ -999,17 +1277,21 @@ function setup_interface(
             (repl.envcolors ? Base.input_color : repl.input_color) : "",
         repl = repl,
         complete = replc,
-        on_enter = return_callback)
+        on_enter = return_callback,
+        styling_passes = StylingPasses.StylingPass[
+            StylingPasses.SyntaxHighlightPass(),
+            StylingPasses.EnclosingParenHighlightPass()
+        ])
 
     # Setup help mode
-    help_mode = Prompt(contextual_prompt(repl, "help?> "),
+    help_mode = Prompt(contextual_prompt(repl, HELP_PROMPT),
         prompt_prefix = hascolor ? repl.help_color : "",
         prompt_suffix = hascolor ?
             (repl.envcolors ? Base.input_color : repl.input_color) : "",
         repl = repl,
         complete = replc,
         # When we're done transform the entered line into a call to helpmode function
-        on_done = respond(line::String->helpmode(outstream(repl), line, repl.mistate.active_module),
+        on_done = respond(line::String->helpmode(outstream(repl), line, Base.active_module(repl)),
                           repl, julia_prompt, pass_empty=true, suppress_on_semicolon=false))
 
 
@@ -1030,6 +1312,34 @@ function setup_interface(
         end,
         sticky = true)
 
+    # Set up dummy Pkg mode that will be replaced once Pkg is loaded
+    # use 6 dots to occupy the same space as the most likely "@v1.xx" env name
+    dummy_pkg_mode = Prompt(Pkg_promptf,
+        prompt_prefix = hascolor ? repl.pkg_color : "",
+        prompt_suffix = hascolor ?
+        (repl.envcolors ? Base.input_color : repl.input_color) : "",
+        repl = repl,
+        complete = LineEdit.EmptyCompletionProvider(),
+        on_done = respond(line->nothing, repl, julia_prompt),
+        on_enter = function (s::MIState)
+                # This is hit when the user tries to execute a command before the real Pkg mode has been
+                # switched to. Ok to do this even if Pkg is loading on the other task because of the loading lock.
+                REPLExt = load_pkg()
+                if REPLExt isa Module && isdefined(REPLExt, :PkgCompletionProvider)
+                    for mode in repl.interface.modes
+                        if mode isa LineEdit.Prompt && mode.complete isa REPLExt.PkgCompletionProvider
+                            # pkg mode
+                            buf = copy(LineEdit.buffer(s))
+                            transition(s, mode) do
+                                LineEdit.state(s, mode).input_buffer = buf
+                            end
+                        end
+                    end
+                end
+                return true
+            end,
+        sticky = true)
+
 
     ################################# Stage II #############################
 
@@ -1037,17 +1347,17 @@ function setup_interface(
     # We will have a unified history for all REPL modes
     hp = REPLHistoryProvider(Dict{Symbol,Prompt}(:julia => julia_prompt,
                                                  :shell => shell_mode,
-                                                 :help  => help_mode))
+                                                 :help  => help_mode,
+                                                 :pkg  => dummy_pkg_mode))
     if repl.history_file
         try
-            hist_path = find_hist_file()
-            mkpath(dirname(hist_path))
-            hp.file_path = hist_path
-            hist_open_file(hp)
+            path = find_hist_file()
+            mkpath(dirname(path))
+            hp.history = HistoryFile(path)
+            errormonitor(@async history_do_initialize(hp))
             finalizer(replc) do replc
-                close(hp.history_file)
+                close(hp.history)
             end
-            hist_from_file(hp, hist_path)
         catch
             # use REPL.hascolor to avoid using the local variable with the same name
             print_response(repl, Pair{Any, Bool}(current_exceptions(), true), true, REPL.hascolor(repl))
@@ -1060,17 +1370,14 @@ function setup_interface(
     julia_prompt.hist = hp
     shell_mode.hist = hp
     help_mode.hist = hp
+    dummy_pkg_mode.hist = hp
 
-    julia_prompt.on_done = respond(x->Base.parse_input_line(x,filename=repl_filename(repl,hp)), repl, julia_prompt)
-
-
-    search_prompt, skeymap = LineEdit.setup_search_keymap(hp)
-    search_prompt.complete = LatexCompletions()
+    julia_prompt.on_done = respond(x->Base.parse_input_line(x; filename=repl_filename(repl,hp), mod=Base.active_module(repl)), repl, julia_prompt)
 
     shell_prompt_len = length(SHELL_PROMPT)
     help_prompt_len = length(HELP_PROMPT)
-    jl_prompt_regex = r"^In \[[0-9]+\]: |^(?:\(.+\) )?julia> "
-    pkg_prompt_regex = r"^(?:\(.+\) )?pkg> "
+    jl_prompt_regex = Regex("^In \\[[0-9]+\\]: |^(?:\\(.+\\) )?$JULIA_PROMPT")
+    pkg_prompt_regex = Regex("^(?:\\(.+\\) )?$PKG_PROMPT")
 
     # Canonicalize user keymap input
     if isa(extra_repl_keymap, Dict)
@@ -1086,6 +1393,7 @@ function setup_interface(
                 end
             else
                 edit_insert(s, ';')
+                LineEdit.check_show_hint(s)
             end
         end,
         '?' => function (s::MIState,o...)
@@ -1096,6 +1404,55 @@ function setup_interface(
                 end
             else
                 edit_insert(s, '?')
+                LineEdit.check_show_hint(s)
+            end
+        end,
+        ']' => function (s::MIState,o...)
+            if isempty(s) || position(LineEdit.buffer(s)) == 0
+                buf = copy(LineEdit.buffer(s))
+                transition(s, dummy_pkg_mode) do
+                    LineEdit.state(s, dummy_pkg_mode).input_buffer = buf
+                end
+                # load Pkg on another thread if available so that typing in the dummy Pkg prompt
+                # isn't blocked, but instruct the main REPL task to do the transition via s.async_channel
+                t_replswitch = Threads.@spawn begin
+                    REPLExt = load_pkg()
+                    if REPLExt isa Module && isdefined(REPLExt, :PkgCompletionProvider)
+                        put!(s.async_channel,
+                            function (s::MIState)
+                                LineEdit.mode(s) === dummy_pkg_mode || return :ok
+                                for mode in repl.interface.modes
+                                    if mode isa LineEdit.Prompt && mode.complete isa REPLExt.PkgCompletionProvider
+                                        buf = copy(LineEdit.buffer(s))
+                                        transition(s, mode) do
+                                            LineEdit.state(s, mode).input_buffer = buf
+                                        end
+                                        if !isempty(s)
+                                            @invokelatest(LineEdit.check_show_hint(s))
+                                        end
+                                        break
+                                    end
+                                end
+                                return :ok
+                            end
+                        )
+                    end
+                end
+                Base.errormonitor(t_replswitch)
+            else
+                # Use bracket insertion if enabled, otherwise just insert
+                if repl.options.auto_insert_closing_bracket
+                    buf = LineEdit.buffer(s)
+                    if !eof(buf) && LineEdit.peek(buf, Char) == ']'
+                        LineEdit.edit_move_right(buf)
+                    else
+                        edit_insert(buf, ']')
+                    end
+                    LineEdit.refresh_line(s)
+                else
+                    edit_insert(s, ']')
+                end
+                LineEdit.check_show_hint(s)
             end
         end,
 
@@ -1179,7 +1536,7 @@ function setup_interface(
                 dump_tail = false
                 nl_pos = findfirst('\n', input[oldpos:end])
                 if s.current_mode == julia_prompt
-                    ast, pos = Meta.parse(input, oldpos, raise=false, depwarn=false)
+                    ast, pos = Meta.parse(input, oldpos, raise=false, depwarn=false, mod=Base.active_module(s))
                     if (isa(ast, Expr) && (ast.head === :error || ast.head === :incomplete)) ||
                             (pos > ncodeunits(input) && !endswith(input, '\n'))
                         # remaining text is incomplete (an error, or parser ran to the end but didn't stop with a newline):
@@ -1230,7 +1587,7 @@ function setup_interface(
                     # execute the statement
                     terminal = LineEdit.terminal(s) # This is slightly ugly but ok for now
                     raw!(terminal, false) && disable_bracketed_paste(terminal)
-                    LineEdit.mode(s).on_done(s, LineEdit.buffer(s), true)
+                    @invokelatest LineEdit.mode(s).on_done(s, LineEdit.buffer(s), true)
                     raw!(terminal, true) && enable_bracketed_paste(terminal)
                     LineEdit.push_undo(s) # when the last line is incomplete
                 end
@@ -1266,19 +1623,33 @@ function setup_interface(
 
     prefix_prompt, prefix_keymap = LineEdit.setup_prefix_keymap(hp, julia_prompt)
 
-    a = Dict{Any,Any}[skeymap, repl_keymap, prefix_keymap, LineEdit.history_keymap, LineEdit.default_keymap, LineEdit.escape_defaults]
+    # Build keymap list - add bracket insertion if enabled
+    base_keymaps = Dict{Any,Any}[repl_keymap, prefix_keymap, LineEdit.history_keymap]
+    if repl.options.auto_insert_closing_bracket
+        push!(base_keymaps, LineEdit.bracket_insert_keymap)
+    end
+    push!(base_keymaps, LineEdit.default_keymap, LineEdit.escape_defaults)
+
+    a = base_keymaps
     prepend!(a, extra_repl_keymap)
 
     julia_prompt.keymap_dict = LineEdit.keymap(a)
 
     mk = mode_keymap(julia_prompt)
 
-    b = Dict{Any,Any}[skeymap, mk, prefix_keymap, LineEdit.history_keymap, LineEdit.default_keymap, LineEdit.escape_defaults]
+    # Build keymap list for other modes
+    mode_base_keymaps = Dict{Any,Any}[mk, prefix_keymap, LineEdit.history_keymap]
+    if repl.options.auto_insert_closing_bracket
+        push!(mode_base_keymaps, LineEdit.bracket_insert_keymap)
+    end
+    push!(mode_base_keymaps, LineEdit.default_keymap, LineEdit.escape_defaults)
+
+    b = mode_base_keymaps
     prepend!(b, extra_repl_keymap)
 
-    shell_mode.keymap_dict = help_mode.keymap_dict = LineEdit.keymap(b)
+    shell_mode.keymap_dict = help_mode.keymap_dict = dummy_pkg_mode.keymap_dict = LineEdit.keymap(b)
 
-    allprompts = LineEdit.TextInterface[julia_prompt, shell_mode, help_mode, search_prompt, prefix_prompt]
+    allprompts = LineEdit.TextInterface[julia_prompt, shell_mode, help_mode, dummy_pkg_mode, prefix_prompt]
     return ModalInterface(allprompts)
 end
 
@@ -1323,54 +1694,91 @@ answer_color(r::StreamREPL) = r.answer_color
 input_color(r::LineEditREPL) = r.envcolors ? Base.input_color() : r.input_color
 input_color(r::StreamREPL) = r.input_color
 
-let matchend = Dict("\"" => r"\"", "\"\"\"" => r"\"\"\"", "'" => r"'",
-    "`" => r"`", "```" => r"```", "#" => r"$"m, "#=" => r"=#|#=")
-    global _rm_strings_and_comments
-    function _rm_strings_and_comments(code::Union{String,SubString{String}})
-        buf = IOBuffer(sizehint = sizeof(code))
-        pos = 1
-        while true
-            i = findnext(r"\"(?!\"\")|\"\"\"|'|`(?!``)|```|#(?!=)|#=", code, pos)
-            isnothing(i) && break
-            match = SubString(code, i)
-            j = findnext(matchend[match]::Regex, code, nextind(code, last(i)))
-            if match == "#=" # possibly nested
-                nested = 1
-                while j !== nothing
-                    nested += SubString(code, j) == "#=" ? +1 : -1
-                    iszero(nested) && break
-                    j = findnext(r"=#|#=", code, nextind(code, last(j)))
-                end
-            elseif match[1] != '#' # quote match: check non-escaped
-                while j !== nothing
-                    notbackslash = findprev(!=('\\'), code, prevind(code, first(j)))::Int
-                    isodd(first(j) - notbackslash) && break # not escaped
-                    j = findnext(matchend[match]::Regex, code, nextind(code, first(j)))
-                end
-            end
-            isnothing(j) && break
-            if match[1] == '#'
-                print(buf, SubString(code, pos, prevind(code, first(i))))
-            else
-                print(buf, SubString(code, pos, last(i)), ' ', SubString(code, j))
-            end
-            pos = nextind(code, last(j))
-        end
-        print(buf, SubString(code, pos, lastindex(code)))
-        return String(take!(buf))
+# heuristic function to decide if the presence of a semicolon
+# at the end of the expression was intended for suppressing output
+function ends_with_semicolon(code)
+    semi = false
+    for tok in tokenize(code)
+        kind(tok) in KSet"Whitespace NewlineWs Comment EndMarker" && continue
+        semi = kind(tok) == K";"
     end
+    return semi
 end
 
-# heuristic function to decide if the presence of a semicolon
-# at the end of the expression was intended for suppressing output
-ends_with_semicolon(code::AbstractString) = ends_with_semicolon(String(code))
-ends_with_semicolon(code::Union{String,SubString{String}}) =
-    contains(_rm_strings_and_comments(code), r";\s*$")
+function banner(io::IO = stdout; short = false)
+    if Base.GIT_VERSION_INFO.tagged_commit
+        commit_string = Base.TAGGED_RELEASE_BANNER
+    elseif isempty(Base.GIT_VERSION_INFO.commit)
+        commit_string = ""
+    else
+        days = Int(floor((ccall(:jl_clock_now, Float64, ()) - Base.GIT_VERSION_INFO.fork_master_timestamp) / (60 * 60 * 24)))
+        days = max(0, days)
+        unit = days == 1 ? "day" : "days"
+        distance = Base.GIT_VERSION_INFO.fork_master_distance
+        commit = Base.GIT_VERSION_INFO.commit_short
+
+        if distance == 0
+            commit_string = "Commit $(commit) ($(days) $(unit) old master)"
+        else
+            branch = Base.GIT_VERSION_INFO.branch
+            commit_string = "$(branch)/$(commit) (fork: $(distance) commits, $(days) $(unit))"
+        end
+    end
+
+    commit_date = isempty(Base.GIT_VERSION_INFO.date_string) ? "" : " ($(split(Base.GIT_VERSION_INFO.date_string)[1]))"
+
+    if get(io, :color, false)::Bool
+        c = Base.text_colors
+        tx = c[:normal] # text
+        jl = c[:normal] # julia
+        d1 = c[:bold] * c[:blue]    # first dot
+        d2 = c[:bold] * c[:red]     # second dot
+        d3 = c[:bold] * c[:green]   # third dot
+        d4 = c[:bold] * c[:magenta] # fourth dot
+
+        if short
+            print(io,"""
+              $(d3)o$(tx)  | Version $(VERSION)$(commit_date)
+             $(d2)o$(tx) $(d4)o$(tx) | $(commit_string)
+            """)
+        else
+            print(io,"""               $(d3)_$(tx)
+               $(d1)_$(tx)       $(jl)_$(tx) $(d2)_$(d3)(_)$(d4)_$(tx)     |  Documentation: https://docs.julialang.org
+              $(d1)(_)$(jl)     | $(d2)(_)$(tx) $(d4)(_)$(tx)    |
+               $(jl)_ _   _| |_  __ _$(tx)   |  Type \"?\" for help, \"]?\" for Pkg help.
+              $(jl)| | | | | | |/ _` |$(tx)  |
+              $(jl)| | |_| | | | (_| |$(tx)  |  Version $(VERSION)$(commit_date)
+             $(jl)_/ |\\__'_|_|_|\\__'_|$(tx)  |  $(commit_string)
+            $(jl)|__/$(tx)                   |
+
+            """)
+        end
+    else
+        if short
+            print(io,"""
+              o  |  Version $(VERSION)$(commit_date)
+             o o |  $(commit_string)
+            """)
+        else
+            print(io,"""
+                           _
+               _       _ _(_)_     |  Documentation: https://docs.julialang.org
+              (_)     | (_) (_)    |
+               _ _   _| |_  __ _   |  Type \"?\" for help, \"]?\" for Pkg help.
+              | | | | | | |/ _` |  |
+              | | |_| | | | (_| |  |  Version $(VERSION)$(commit_date)
+             _/ |\\__'_|_|_|\\__'_|  |  $(commit_string)
+            |__/                   |
+
+            """)
+        end
+    end
+end
 
 function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
     repl.frontend_task = current_task()
     have_color = hascolor(repl)
-    Base.banner(repl.stream)
+    banner(repl.stream)
     d = REPLDisplay(repl)
     dopushdisplay = !in(d,Base.Multimedia.displays)
     dopushdisplay && pushdisplay(d)
@@ -1378,17 +1786,17 @@ function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
         if have_color
             print(repl.stream,repl.prompt_color)
         end
-        print(repl.stream, "julia> ")
+        print(repl.stream, JULIA_PROMPT)
         if have_color
             print(repl.stream, input_color(repl))
         end
         line = readline(repl.stream, keep=true)
         if !isempty(line)
-            ast = Base.parse_input_line(line)
+            ast = Base.parse_input_line(line; mod=Base.active_module(repl))
             if have_color
                 print(repl.stream, Base.color_normal)
             end
-            response = eval_with_backend(ast, backend)
+            response = eval_on_backend(ast, backend)
             print_response(repl, response, !ends_with_semicolon(line), have_color)
         end
     end
@@ -1402,54 +1810,49 @@ module Numbered
 
 using ..REPL
 
-__current_ast_transforms() = isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+__current_ast_transforms() = Base.active_repl_backend !== nothing ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
 
 function repl_eval_counter(hp)
     return length(hp.history) - hp.start_idx
 end
 
 function out_transform(@nospecialize(x), n::Ref{Int})
-    return Expr(:toplevel, get_usings!([], x)..., quote
-        let __temp_val_a72df459 = $x
-            $capture_result($n, __temp_val_a72df459)
-            __temp_val_a72df459
-        end
-    end)
+    return Expr(:block, # avoid line numbers or scope that would leak into the output and change the meaning of x
+        :(local __temp_val_a72df459 = $x),
+        Expr(:call, capture_result, n, :__temp_val_a72df459),
+        :__temp_val_a72df459)
 end
 
-function get_usings!(usings, ex)
-    ex isa Expr || return usings
-    # get all `using` and `import` statements which are at the top level
-    for (i, arg) in enumerate(ex.args)
-        if Base.isexpr(arg, :toplevel)
-            get_usings!(usings, arg)
-        elseif Base.isexpr(arg, [:using, :import])
-            push!(usings, popat!(ex.args, i))
+function create_global_out!(mod)
+    if !isdefinedglobal(mod, :Out)
+        out = Dict{Int, Any}()
+        @eval mod begin
+            const Out = $(out)
+            export Out
         end
+        return out
     end
-    return usings
+    return getglobal(mod, :Out)
 end
 
 function capture_result(n::Ref{Int}, @nospecialize(x))
     n = n[]
     mod = Base.MainInclude
-    if !isdefined(mod, :Out)
-        @eval mod global Out
-        @eval mod export Out
-        setglobal!(mod, :Out, Dict{Int, Any}())
-    end
-    if x !== getglobal(mod, :Out) && x !== nothing # remove this?
-        getglobal(mod, :Out)[n] = x
+    # TODO: This invokelatest is only required due to backdated constants
+    # and should be removed after
+    out = isdefinedglobal(mod, :Out) ? invokelatest(getglobal, mod, :Out) : invokelatest(create_global_out!, mod)
+    if x !== out && x !== nothing # remove this?
+        out[n] = x
     end
     nothing
 end
 
 function set_prompt(repl::LineEditREPL, n::Ref{Int})
     julia_prompt = repl.interface.modes[1]
-    julia_prompt.prompt = function()
+    julia_prompt.prompt = REPL.contextual_prompt(repl, function()
         n[] = repl_eval_counter(julia_prompt.hist)+1
         string("In [", n[], "]: ")
-    end
+    end)
     nothing
 end
 
@@ -1464,14 +1867,13 @@ end
 
 function __current_ast_transforms(backend)
     if backend === nothing
-        isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+        Base.active_repl_backend !== nothing ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
     else
         backend.ast_transforms
     end
 end
 
-
-function numbered_prompt!(repl::LineEditREPL=Base.active_repl, backend=nothing)
+function numbered_prompt!(repl::LineEditREPL=Base.active_repl::LineEditREPL, backend=nothing)
     n = Ref{Int}(0)
     set_prompt(repl, n)
     set_output_prefix(repl, n)
@@ -1493,4 +1895,13 @@ end
 
 import .Numbered.numbered_prompt!
 
+# this assignment won't survive precompilation,
+# but will stick if REPL is baked into a sysimg.
+# Needs to occur after this module is finished.
+Base.REPL_MODULE_REF[] = REPL
+
+if Base.generating_output()
+   include("precompile.jl")
+end
+
 end # module
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index 20d26953eb22b..b2b510e8e1e10 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -2,12 +2,20 @@
 
 module REPLCompletions
 
-export completions, shell_completions, bslash_completions, completion_text
+export completions, shell_completions, bslash_completions, completion_text, named_completion
 
-using Core: CodeInfo, MethodInstance, CodeInstance, Const
-const CC = Core.Compiler
+using Core: Const
+# We want to insulate the REPLCompletion module from any changes the user may
+# make to the compiler, since it runs by default and the system becomes unusable
+# if it breaks.
+const CC = Base.Compiler
 using Base.Meta
-using Base: propertynames, something
+using Base: propertynames, something, IdSet
+using Base.Filesystem: _readdirx
+using Base.JuliaSyntax: @K_str, @KSet_str, parseall, byte_range, children, is_prefix_call, is_trivia, kind
+
+using ..REPL.LineEdit: NamedCompletion
+using ..REPL.SyntaxUtil: CursorNode, find_parent, seek_pos, char_range, char_first, char_last, children_nt, find_delim
 
 abstract type Completion end
 
@@ -19,6 +27,10 @@ struct KeywordCompletion <: Completion
     keyword::String
 end
 
+struct KeyvalCompletion <: Completion
+    keyval::String
+end
+
 struct PathCompletion <: Completion
     path::String
 end
@@ -49,8 +61,10 @@ struct MethodCompletion <: Completion
 end
 
 struct BslashCompletion <: Completion
-    bslash::String
+    completion::String # what is actually completed, for example "\trianglecdot"
+    name::String # what is displayed, for example "◬ \trianglecdot"
 end
+BslashCompletion(completion::String) = BslashCompletion(completion, completion)
 
 struct ShellCompletion <: Completion
     text::String
@@ -99,19 +113,28 @@ end
 
 _completion_text(c::TextCompletion) = c.text
 _completion_text(c::KeywordCompletion) = c.keyword
+_completion_text(c::KeyvalCompletion) = c.keyval
 _completion_text(c::PathCompletion) = c.path
 _completion_text(c::ModuleCompletion) = c.mod
 _completion_text(c::PackageCompletion) = c.package
 _completion_text(c::PropertyCompletion) = sprint(Base.show_sym, c.property)
 _completion_text(c::FieldCompletion) = sprint(Base.show_sym, c.field)
 _completion_text(c::MethodCompletion) = repr(c.method)
-_completion_text(c::BslashCompletion) = c.bslash
 _completion_text(c::ShellCompletion) = c.text
 _completion_text(c::DictCompletion) = c.key
 _completion_text(c::KeywordArgumentCompletion) = c.kwarg*'='
 
 completion_text(c) = _completion_text(c)::String
 
+named_completion(c::BslashCompletion) = NamedCompletion(c.completion, c.name)
+
+function named_completion(c)
+    text = completion_text(c)::String
+    return NamedCompletion(text, text)
+end
+
+named_completion_completion(c) = named_completion(c).completion::String
+
 const Completions = Tuple{Vector{Completion}, UnitRange{Int}, Bool}
 
 function completes_global(x, name)
@@ -129,335 +152,398 @@ function appendmacro!(syms, macros, needle, endchar)
     end
 end
 
-function filtered_mod_names(ffunc::Function, mod::Module, name::AbstractString, all::Bool = false, imported::Bool = false)
-    ssyms = names(mod, all = all, imported = imported)
+function append_filtered_mod_names!(ffunc::Function, suggestions::Vector{Completion},
+                                    mod::Module, name::String, complete_internal_only::Bool)
+    imported = usings = !complete_internal_only
+    ssyms = names(mod; all=true, imported, usings)
     filter!(ffunc, ssyms)
     macros = filter(x -> startswith(String(x), "@" * name), ssyms)
+
+    # don't complete string and command macros when the input matches the internal name like `r_` to `r"`
+    if !startswith(name, "@")
+        filter!(macros) do m
+            s = String(m)
+            if endswith(s, "_str") || endswith(s, "_cmd")
+                occursin(name, first(s, length(s)-4))
+            else
+                true
+            end
+        end
+    end
+
     syms = String[sprint((io,s)->Base.show_sym(io, s; allow_macroname=true), s) for s in ssyms if completes_global(String(s), name)]
     appendmacro!(syms, macros, "_str", "\"")
     appendmacro!(syms, macros, "_cmd", "`")
-    return [ModuleCompletion(mod, sym) for sym in syms]
+    for sym in syms
+        push!(suggestions, ModuleCompletion(mod, sym))
+    end
+    return suggestions
 end
 
 # REPL Symbol Completions
-function complete_symbol(@nospecialize(ex), name::String, @nospecialize(ffunc), context_module::Module=Main)
-    mod = context_module
-
-    lookup_module = true
-    t = Union{}
-    val = nothing
-    if ex !== nothing
-        res = repl_eval_ex(ex, context_module)
+function complete_symbol!(suggestions::Vector{Completion},
+                          @nospecialize(prefix), name::String, context_module::Module;
+                          complete_modules_only::Bool=false,
+                          shift::Bool=false)
+    local mod, t, val
+    complete_internal_only = isempty(name)
+    if prefix !== nothing
+        res = repl_eval_ex(prefix, context_module)
         res === nothing && return Completion[]
         if res isa Const
             val = res.val
             if isa(val, Module)
                 mod = val
-                lookup_module = true
+                if !shift
+                    # when module is explicitly accessed, show internal bindings that are
+                    # defined by the module, unless shift key is pressed
+                    complete_internal_only = true
+                end
             else
-                lookup_module = false
                 t = typeof(val)
             end
         else
-            lookup_module = false
             t = CC.widenconst(res)
         end
+    else
+        mod = context_module
     end
 
-    suggestions = Completion[]
-    if lookup_module
-        # We will exclude the results that the user does not want, as well
-        # as excluding Main.Main.Main, etc., because that's most likely not what
-        # the user wants
-        p = let mod=mod, modname=nameof(mod)
-            s->(!Base.isdeprecated(mod, s) && s != modname && ffunc(mod, s)::Bool)
-        end
-        # Looking for a binding in a module
-        if mod == context_module
-            # Also look in modules we got through `using`
-            mods = ccall(:jl_module_usings, Any, (Any,), context_module)::Vector
-            for m in mods
-                append!(suggestions, filtered_mod_names(p, m::Module, name))
+    if @isdefined(mod) # lookup names available within the module
+        let modname = nameof(mod),
+            is_main = mod===Main
+            append_filtered_mod_names!(suggestions, mod, name, complete_internal_only) do s::Symbol
+                if Base.isdeprecated(mod, s)
+                    return false
+                elseif s === modname
+                    return false # exclude `Main.Main.Main`, etc.
+                elseif complete_modules_only && !completes_module(mod, s)
+                    return false
+                elseif is_main && s === :MainInclude
+                    return false
+                end
+                return true
             end
-            append!(suggestions, filtered_mod_names(p, mod, name, true, true))
-        else
-            append!(suggestions, filtered_mod_names(p, mod, name, true, false))
         end
-    elseif val !== nothing # looking for a property of an instance
-        for property in propertynames(val, false)
-            # TODO: support integer arguments (#36872)
-            if property isa Symbol && startswith(string(property), name)
-                push!(suggestions, PropertyCompletion(val, property))
+    elseif @isdefined(val) # looking for a property of an instance
+        try
+            for property in propertynames(val, false)
+                # TODO: support integer arguments (#36872)
+                if property isa Symbol && startswith(string(property), name)
+                    push!(suggestions, PropertyCompletion(val, property))
+                end
             end
+        catch
         end
-    else
+    elseif @isdefined(t) && field_completion_eligible(t)
         # Looking for a member of a type
-        if t isa DataType && t != Any
-            # Check for cases like Type{typeof(+)}
-            if Base.isType(t)
-                t = typeof(t.parameters[1])
-            end
-            # Only look for fields if this is a concrete type
-            if isconcretetype(t)
-                fields = fieldnames(t)
-                for field in fields
-                    isa(field, Symbol) || continue # Tuple type has ::Int field name
-                    s = string(field)
-                    if startswith(s, name)
-                        push!(suggestions, FieldCompletion(t, field))
-                    end
-                end
+        add_field_completions!(suggestions, name, t)
+    end
+    return suggestions
+end
+
+completes_module(mod::Module, x::Symbol) = isdefined(mod, x) && isa(getglobal(mod, x), Module)
+
+function add_field_completions!(suggestions::Vector{Completion}, name::String, @nospecialize(t))
+    if isa(t, Union)
+        add_field_completions!(suggestions, name, t.a)
+        add_field_completions!(suggestions, name, t.b)
+    else
+        @assert isconcretetype(t)
+        fields = fieldnames(t)
+        for field in fields
+            isa(field, Symbol) || continue # Tuple type has ::Int field name
+            s = string(field)
+            if startswith(s, name)
+                push!(suggestions, FieldCompletion(t, field))
             end
         end
     end
-    suggestions
 end
 
-const sorted_keywords = [
-    "abstract type", "baremodule", "begin", "break", "catch", "ccall",
-    "const", "continue", "do", "else", "elseif", "end", "export", "false",
-    "finally", "for", "function", "global", "if", "import",
-    "let", "local", "macro", "module", "mutable struct",
-    "primitive type", "quote", "return", "struct",
-    "true", "try", "using", "while"]
+const GENERIC_PROPERTYNAMES_METHOD = which(propertynames, (Any,))
 
-function complete_keyword(s::Union{String,SubString{String}})
-    r = searchsorted(sorted_keywords, s)
+function field_completion_eligible(@nospecialize t)
+    if isa(t, Union)
+        return field_completion_eligible(t.a) && field_completion_eligible(t.b)
+    end
+    isconcretetype(t) || return false
+    # field completion is correct only when `getproperty` fallbacks to `getfield`
+    match = Base._which(Tuple{typeof(propertynames),t}; raise=false)
+    match === nothing && return false
+    return match.method === GENERIC_PROPERTYNAMES_METHOD
+end
+
+function complete_from_list!(suggestions::Vector{Completion}, T::Type, list::Vector{String}, s::String)
+    r = searchsorted(list, s)
     i = first(r)
-    n = length(sorted_keywords)
-    while i <= n && startswith(sorted_keywords[i],s)
+    n = length(list)
+    while i <= n && startswith(list[i],s)
         r = first(r):i
         i += 1
     end
-    Completion[KeywordCompletion(kw) for kw in sorted_keywords[r]]
+    for kw in list[r]
+        push!(suggestions, T(kw))
+    end
+    return suggestions
 end
 
-function complete_path(path::AbstractString, pos::Int;
-                       use_envpath=false, shell_escape=false,
-                       string_escape=false)
-    @assert !(shell_escape && string_escape)
-    if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path)
-        # if the path is just "~", don't consider the expanded username as a prefix
-        if path == "~"
-            dir, prefix = homedir(), ""
-        else
-            dir, prefix = splitdir(homedir() * path[2:end])
-        end
-    else
-        dir, prefix = splitdir(path)
-    end
-    local files
+const sorted_keywords = [
+    "abstract type", "baremodule", "begin", "break", "catch", "ccall",
+    "const", "continue", "do", "else", "elseif", "end", "export",
+    "finally", "for", "function", "global", "if", "import",
+    "let", "local", "macro", "module", "mutable struct",
+    "primitive type", "quote", "return", "struct",
+    "try", "using", "while"]
+
+complete_keyword!(suggestions::Vector{Completion}, s::String) =
+    complete_from_list!(suggestions, KeywordCompletion, sorted_keywords, s)
+
+const sorted_keyvals = ["false", "true"]
+
+complete_keyval!(suggestions::Vector{Completion}, s::String) =
+    complete_from_list!(suggestions, KeyvalCompletion, sorted_keyvals, s)
+
+function do_cmd_escape(s)
+    return Base.escape_raw_string(Base.shell_escape_posixly(s), '`')
+end
+function do_shell_escape(s)
+    return Base.shell_escape_posixly(s)
+end
+function do_string_escape(s)
+    return escape_string(s, ('\"','$'))
+end
+function do_string_unescape(s)
+    s = replace(s, "\\\$"=>"\$")
     try
-        if isempty(dir)
-            files = readdir()
-        elseif isdir(dir)
-            files = readdir(dir)
-        else
-            return Completion[], 0:-1, false
-        end
-    catch
-        return Completion[], 0:-1, false
+        unescape_string(s)
+    catch e
+        e isa ArgumentError || rethrow()
+        s # it is unlikely, but if it isn't a valid string, maybe it was a valid path, and just needs escape_string called?
     end
+end
 
-    matches = Set{String}()
-    for file in files
-        if startswith(file, prefix)
-            p = joinpath(dir, file)
-            is_dir = try isdir(p) catch; false end
-            push!(matches, is_dir ? joinpath(file, "") : file)
+function joinpath_withsep(dir, path; dirsep)
+    dir == "" && return path
+    dir[end] == dirsep ? dir * path : dir * dirsep * path
+end
+
+const PATH_cache_lock = Base.ReentrantLock()
+const PATH_cache = Set{String}()
+PATH_cache_task::Union{Task,Nothing} = nothing
+PATH_cache_condition::Union{Threads.Condition, Nothing} = nothing # used for sync in tests
+next_cache_update::Float64 = 0.0
+function maybe_spawn_cache_PATH()
+    global PATH_cache_task, PATH_cache_condition, next_cache_update
+    @lock PATH_cache_lock begin
+        # Extract to local variables to enable flow-sensitive type inference for these global variables
+        PATH_cache_task_local = PATH_cache_task
+        PATH_cache_task_local isa Task && !istaskdone(PATH_cache_task_local) && return
+        time() < next_cache_update && return
+        PATH_cache_task = PATH_cache_task_local = Threads.@spawn begin
+            try
+                REPLCompletions.cache_PATH()
+            finally
+                @lock PATH_cache_lock begin
+                    next_cache_update = time() + 10 # earliest next update can run is 10s after
+                    PATH_cache_task = nothing # release memory when done
+                    PATH_cache_condition_local = PATH_cache_condition
+                    PATH_cache_condition_local !== nothing && notify(PATH_cache_condition_local)
+                end
+            end
         end
+        Base.errormonitor(PATH_cache_task_local)
     end
+end
 
-    if use_envpath && length(dir) == 0
-        # Look for files in PATH as well
-        local pathdirs = split(ENV["PATH"], @static Sys.iswindows() ? ";" : ":")
+# caches all reachable files in PATH dirs
+function cache_PATH()
+    path = get(ENV, "PATH", nothing)
+    path isa String || return
 
-        for pathdir in pathdirs
-            local actualpath
-            try
-                actualpath = realpath(pathdir)
-            catch
-                # Bash doesn't expect every folder in PATH to exist, so neither shall we
-                continue
-            end
+    # Calling empty! on PATH_cache would be annoying for async typing hints as completions would temporarily disappear.
+    # So keep track of what's added this time and at the end remove any that didn't appear this time from the global cache.
+    this_PATH_cache = Set{String}()
+
+    @debug "caching PATH files" PATH=path
+    pathdirs = split(path, @static Sys.iswindows() ? ";" : ":")
+
+    next_yield_time = time() + 0.01
+
+    t = @elapsed for pathdir in pathdirs
+        actualpath = try
+            realpath(pathdir)
+        catch ex
+            ex isa Base.IOError || rethrow()
+            # Bash doesn't expect every folder in PATH to exist, so neither shall we
+            continue
+        end
+
+        if actualpath != pathdir && in(actualpath, pathdirs)
+            # Remove paths which (after resolving links) are in the env path twice.
+            # Many distros eg. point /bin to /usr/bin but have both in the env path.
+            continue
+        end
 
-            if actualpath != pathdir && in(actualpath,pathdirs)
-                # Remove paths which (after resolving links) are in the env path twice.
-                # Many distros eg. point /bin to /usr/bin but have both in the env path.
+        path_entries = try
+            _readdirx(pathdir)
+        catch e
+            # Bash allows dirs in PATH that can't be read, so we should as well.
+            if isa(e, Base.IOError) || isa(e, Base.ArgumentError)
                 continue
+            else
+                # We only handle IOError and ArgumentError here
+                rethrow()
             end
-
-            local filesinpath
+        end
+        for entry in path_entries
+            # In a perfect world, we would filter on whether the file is executable
+            # here, or even on whether the current user can execute the file in question.
             try
-                filesinpath = readdir(pathdir)
+                if isfile(entry)
+                    @lock PATH_cache_lock push!(PATH_cache, entry.name)
+                    push!(this_PATH_cache, entry.name)
+                end
             catch e
-                # Bash allows dirs in PATH that can't be read, so we should as well.
-                if isa(e, Base.IOError) || isa(e, Base.ArgumentError)
+                # `isfile()` can throw in rare cases such as when probing a
+                # symlink that points to a file within a directory we do not
+                # have read access to.
+                if isa(e, Base.IOError)
                     continue
                 else
-                    # We only handle IOError and ArgumentError here
                     rethrow()
                 end
             end
-
-            for file in filesinpath
-                # In a perfect world, we would filter on whether the file is executable
-                # here, or even on whether the current user can execute the file in question.
-                if startswith(file, prefix) && isfile(joinpath(pathdir, file))
-                    push!(matches, file)
-                end
+            if time() >= next_yield_time
+                yield() # to avoid blocking typing when -t1
+                next_yield_time = time() + 0.01
             end
         end
     end
 
-    function do_escape(s)
-        return shell_escape ? replace(s, r"(\s|\\)" => s"\\\0") :
-               string_escape ? escape_string(s, ('\"','$')) :
-               s
+    @lock PATH_cache_lock begin
+        intersect!(PATH_cache, this_PATH_cache) # remove entries from PATH_cache that weren't found this time
     end
 
-    matchList = Completion[PathCompletion(do_escape(s)) for s in matches]
-    startpos = pos - lastindex(do_escape(prefix)) + 1
-    # The pos - lastindex(prefix) + 1 is correct due to `lastindex(prefix)-lastindex(prefix)==0`,
-    # hence we need to add one to get the first index. This is also correct when considering
-    # pos, because pos is the `lastindex` a larger string which `endswith(path)==true`.
-    return matchList, startpos:pos, !isempty(matchList)
+    @debug "caching PATH files took $t seconds" length(pathdirs) length(PATH_cache)
+    return PATH_cache
 end
 
-function complete_expanduser(path::AbstractString, r)
-    expanded =
-        try expanduser(path)
-        catch e
-            e isa ArgumentError || rethrow()
-            path
+function complete_path(path::AbstractString;
+                       use_envpath=false,
+                       shell_escape=false,
+                       cmd_escape=false,
+                       string_escape=false,
+                       contract_user=false,
+                       dirsep=Sys.iswindows() ? '\\' : '/')
+    @assert !(shell_escape && string_escape)
+    if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path)
+        # if the path is just "~", don't consider the expanded username as a prefix
+        if path == "~"
+            dir, prefix = homedir(), ""
+        else
+            dir, prefix = splitdir(homedir() * path[2:end])
         end
-    return Completion[PathCompletion(expanded)], r, path != expanded
-end
-
-# Returns a range that includes the method name in front of the first non
-# closed start brace from the end of the string.
-function find_start_brace(s::AbstractString; c_start='(', c_end=')')
-    braces = 0
-    r = reverse(s)
-    i = firstindex(r)
-    in_single_quotes = false
-    in_double_quotes = false
-    in_back_ticks = false
-    in_comment = 0
-    while i <= ncodeunits(r)
-        c, i = iterate(r, i)
-        if c == '#' && i <= ncodeunits(r) && iterate(r, i)[1] == '='
-            c, i = iterate(r, i) # consume '='
-            new_comments = 1
-            # handle #=#=#=#, by counting =# pairs
-            while i <= ncodeunits(r) && iterate(r, i)[1] == '#'
-                c, i = iterate(r, i) # consume '#'
-                iterate(r, i)[1] == '=' || break
-                c, i = iterate(r, i) # consume '='
-                new_comments += 1
-            end
-            if c == '='
-                in_comment += new_comments
-            else
-                in_comment -= new_comments
-            end
-        elseif !in_single_quotes && !in_double_quotes && !in_back_ticks && in_comment == 0
-            if c == c_start
-                braces += 1
-            elseif c == c_end
-                braces -= 1
-            elseif c == '\''
-                in_single_quotes = true
-            elseif c == '"'
-                in_double_quotes = true
-            elseif c == '`'
-                in_back_ticks = true
-            end
+    else
+        dir, prefix = splitdir(path)
+    end
+    entries = try
+        if isempty(dir)
+            _readdirx()
+        elseif isdir(dir)
+            _readdirx(dir)
         else
-            if in_single_quotes &&
-                c == '\'' && i <= ncodeunits(r) && iterate(r, i)[1] != '\\'
-                in_single_quotes = false
-            elseif in_double_quotes &&
-                c == '"' && i <= ncodeunits(r) && iterate(r, i)[1] != '\\'
-                in_double_quotes = false
-            elseif in_back_ticks &&
-                c == '`' && i <= ncodeunits(r) && iterate(r, i)[1] != '\\'
-                in_back_ticks = false
-            elseif in_comment > 0 &&
-                c == '=' && i <= ncodeunits(r) && iterate(r, i)[1] == '#'
-                # handle =#=#=#=, by counting #= pairs
-                c, i = iterate(r, i) # consume '#'
-                old_comments = 1
-                while i <= ncodeunits(r) && iterate(r, i)[1] == '='
-                    c, i = iterate(r, i) # consume '='
-                    iterate(r, i)[1] == '#' || break
-                    c, i = iterate(r, i) # consume '#'
-                    old_comments += 1
-                end
-                if c == '#'
-                    in_comment -= old_comments
-                else
-                    in_comment += old_comments
-                end
+            return Completion[], dir, false
+        end
+    catch ex
+        ex isa Base.IOError || rethrow()
+        return Completion[], dir, false
+    end
+
+    matches = Set{String}()
+    for entry in entries
+        if startswith(entry.name, prefix)
+            is_dir = try isdir(entry) catch ex; ex isa Base.IOError ? false : rethrow() end
+            push!(matches, is_dir ? joinpath_withsep(entry.name, ""; dirsep) : entry.name)
+        end
+    end
+
+    if use_envpath && isempty(dir)
+        # Look for files in PATH as well. These are cached in `cache_PATH` in an async task to not block typing.
+        # If we cannot get lock because its still caching just pass over this so that typing isn't laggy.
+        maybe_spawn_cache_PATH() # only spawns if enough time has passed and the previous caching task has completed
+        @lock PATH_cache_lock begin
+            for file in PATH_cache
+                startswith(file, prefix) && push!(matches, file)
             end
         end
-        braces == 1 && break
     end
-    braces != 1 && return 0:-1, -1
-    method_name_end = reverseind(s, i)
-    startind = nextind(s, something(findprev(in(non_identifier_chars), s, method_name_end), 0))::Int
-    return (startind:lastindex(s), method_name_end)
-end
 
-struct REPLInterpreterCache
-    dict::IdDict{MethodInstance,CodeInstance}
+    matches = ((shell_escape ? do_shell_escape(s) : string_escape ? do_string_escape(s) : s) for s in matches)
+    matches = ((cmd_escape ? do_cmd_escape(s) : s) for s in matches)
+    matches = Completion[PathCompletion(contract_user ? contractuser(s) : s) for s in matches]
+    return matches, dir, !isempty(matches)
 end
-REPLInterpreterCache() = REPLInterpreterCache(IdDict{MethodInstance,CodeInstance}())
-const REPL_INTERPRETER_CACHE = REPLInterpreterCache()
 
-function get_code_cache()
-    # XXX Avoid storing analysis results into the cache that persists across precompilation,
-    #     as [sys|pkg]image currently doesn't support serializing externally created `CodeInstance`.
-    #     Otherwise, `CodeInstance`s created by `REPLInterpreter`, that are much less optimized
-    #     that those produced by `NativeInterpreter`, will leak into the native code cache,
-    #     potentially causing runtime slowdown.
-    #     (see https://github.com/JuliaLang/julia/issues/48453).
-    if (@ccall jl_generating_output()::Cint) == 1
-        return REPLInterpreterCache()
+function complete_path(path::AbstractString,
+                       pos::Int;
+                       use_envpath=false,
+                       shell_escape=false,
+                       string_escape=false,
+                       contract_user=false)
+    ## TODO: enable this depwarn once Pkg is fixed
+    #Base.depwarn("complete_path with pos argument is deprecated because the return value [2] is incorrect to use", :complete_path)
+    paths, dir, success = complete_path(path; use_envpath, shell_escape, string_escape, dirsep='/')
+
+    if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path)
+        # if the path is just "~", don't consider the expanded username as a prefix
+        if path == "~"
+            dir, prefix = homedir(), ""
+        else
+            dir, prefix = splitdir(homedir() * path[2:end])
+        end
     else
-        return REPL_INTERPRETER_CACHE
+        dir, prefix = splitdir(path)
     end
+    startpos = pos - lastindex(prefix) + 1
+    Sys.iswindows() && map!(paths, paths) do c::PathCompletion
+        # emulation for unnecessarily complicated return value, since / is a
+        # perfectly acceptable path character which does not require quoting
+        # but is required by Pkg's awkward parser handling
+        return endswith(c.path, "/") ? PathCompletion(chop(c.path) * "\\\\") : c
+    end
+    return paths, startpos:pos, success
 end
 
+struct REPLCacheToken end
+
 struct REPLInterpreter <: CC.AbstractInterpreter
-    repl_frame::CC.InferenceResult
+    limit_aggressive_inference::Bool
     world::UInt
     inf_params::CC.InferenceParams
     opt_params::CC.OptimizationParams
     inf_cache::Vector{CC.InferenceResult}
-    code_cache::REPLInterpreterCache
-    function REPLInterpreter(repl_frame::CC.InferenceResult;
+    function REPLInterpreter(limit_aggressive_inference::Bool=false;
                              world::UInt = Base.get_world_counter(),
-                             inf_params::CC.InferenceParams = CC.InferenceParams(),
+                             inf_params::CC.InferenceParams = CC.InferenceParams(;
+                                 aggressive_constant_propagation=true),
                              opt_params::CC.OptimizationParams = CC.OptimizationParams(),
-                             inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[],
-                             code_cache::REPLInterpreterCache = get_code_cache())
-        return new(repl_frame, world, inf_params, opt_params, inf_cache, code_cache)
+                             inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[])
+        return new(limit_aggressive_inference, world, inf_params, opt_params, inf_cache)
     end
 end
 CC.InferenceParams(interp::REPLInterpreter) = interp.inf_params
 CC.OptimizationParams(interp::REPLInterpreter) = interp.opt_params
-CC.get_world_counter(interp::REPLInterpreter) = interp.world
+CC.get_inference_world(interp::REPLInterpreter) = interp.world
 CC.get_inference_cache(interp::REPLInterpreter) = interp.inf_cache
-CC.code_cache(interp::REPLInterpreter) = CC.WorldView(interp.code_cache, CC.WorldRange(interp.world))
-CC.get(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
-CC.getindex(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
-CC.haskey(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
-CC.setindex!(wvc::CC.WorldView{REPLInterpreterCache}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+CC.cache_owner(::REPLInterpreter) = REPLCacheToken()
 
 # REPLInterpreter is only used for type analysis, so it should disable optimization entirely
 CC.may_optimize(::REPLInterpreter) = false
 
+# REPLInterpreter doesn't need any sources to be cached, so discard them aggressively
+CC.transform_result_for_cache(::REPLInterpreter, ::CC.InferenceResult, edges::Core.SimpleVector) = nothing
+
 # REPLInterpreter analyzes a top-level frame, so better to not bail out from it
 CC.bail_out_toplevel_call(::REPLInterpreter, ::CC.InferenceLoopState, ::CC.InferenceState) = false
 
@@ -466,90 +552,105 @@ CC.bail_out_toplevel_call(::REPLInterpreter, ::CC.InferenceLoopState, ::CC.Infer
 # Aggressive binding resolution poses challenges for the inference cache validation
 # (until https://github.com/JuliaLang/julia/issues/40399 is implemented).
 # To avoid the cache validation issues, `REPLInterpreter` only allows aggressive binding
-# resolution for top-level frame representing REPL input code (`repl_frame`) and for child
-# `getproperty` frames that are constant propagated from the `repl_frame`. This works, since
-# a.) these frames are never cached, and
-# b.) their results are only observed by the non-cached `repl_frame`.
+# resolution for top-level frame representing REPL input code and for child uncached frames
+# that are constant propagated from the top-level frame ("repl-frame"s). This works, even if
+# those global bindings are not constant and may be mutated in the future, since:
+# a.) "repl-frame"s are never cached, and
+# b.) mutable values are never observed by any cached frames.
 #
 # `REPLInterpreter` also aggressively concrete evaluate `:inconsistent` calls within
-# `repl_frame` to provide reasonable completions for lines like `Ref(Some(42))[].|`.
+# "repl-frame" to provide reasonable completions for lines like `Ref(Some(42))[].|`.
 # Aggressive concrete evaluation allows us to get accurate type information about complex
 # expressions that otherwise can not be constant folded, in a safe way, i.e. it still
 # doesn't evaluate effectful expressions like `pop!(xs)`.
 # Similarly to the aggressive binding resolution, aggressive concrete evaluation doesn't
-# present any cache validation issues because `repl_frame` is never cached.
-
-is_repl_frame(interp::REPLInterpreter, sv::CC.InferenceState) = interp.repl_frame === sv.result
+# present any cache validation issues because "repl-frame" is never cached.
+
+# `REPLInterpreter` is specifically used by `repl_eval_ex`, where all top-level frames are
+# `repl_frame` always. However, this assumption wouldn't stand if `REPLInterpreter` were to
+# be employed, for instance, by `typeinf_ext_toplevel`.
+is_repl_frame(sv::CC.InferenceState) = sv.linfo.def isa Module && sv.cache_mode === CC.CACHE_MODE_NULL
+
+function is_call_stack_uncached(sv::CC.InferenceState)
+    CC.is_cached(sv) && return false
+    parent = CC.frame_parent(sv)
+    parent === nothing && return true
+    return is_call_stack_uncached(parent::CC.InferenceState)
+end
 
 # aggressive global binding resolution within `repl_frame`
-function CC.abstract_eval_globalref(interp::REPLInterpreter, g::GlobalRef,
+function CC.abstract_eval_globalref(interp::REPLInterpreter, g::GlobalRef, bailed::Bool,
                                     sv::CC.InferenceState)
-    if is_repl_frame(interp, sv)
-        if CC.isdefined_globalref(g)
-            return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
-        end
-        return Union{}
-    end
-    return @invoke CC.abstract_eval_globalref(interp::CC.AbstractInterpreter, g::GlobalRef,
-                                              sv::CC.InferenceState)
-end
-
-function is_repl_frame_getproperty(interp::REPLInterpreter, sv::CC.InferenceState)
-    def = sv.linfo.def
-    def isa Method || return false
-    def.name === :getproperty || return false
-    sv.cached && return false
-    return is_repl_frame(interp, sv.parent)
-end
-
-# aggressive global binding resolution for `getproperty(::Module, ::Symbol)` calls within `repl_frame`
-function CC.builtin_tfunction(interp::REPLInterpreter, @nospecialize(f),
-                              argtypes::Vector{Any}, sv::CC.InferenceState)
-    if f === Core.getglobal && is_repl_frame_getproperty(interp, sv)
-        if length(argtypes) == 2
-            a1, a2 = argtypes
-            if isa(a1, Const) && isa(a2, Const)
-                a1val, a2val = a1.val, a2.val
-                if isa(a1val, Module) && isa(a2val, Symbol)
-                    g = GlobalRef(a1val, a2val)
-                    if CC.isdefined_globalref(g)
-                        return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
-                    end
-                    return Union{}
-                end
+    # Ignore saw_latestworld
+    if (interp.limit_aggressive_inference ? is_repl_frame(sv) : is_call_stack_uncached(sv))
+        partition = CC.abstract_eval_binding_partition!(interp, g, sv)
+        if CC.is_defined_const_binding(CC.binding_kind(partition))
+            return CC.RTEffects(Const(CC.partition_restriction(partition)), Union{}, CC.EFFECTS_TOTAL)
+        else
+            b = convert(Core.Binding, g)
+            if CC.binding_kind(partition) == CC.PARTITION_KIND_GLOBAL && isdefined(b, :value)
+                return CC.RTEffects(Const(b.value), Union{}, CC.EFFECTS_TOTAL)
             end
         end
+        return CC.RTEffects(Union{}, UndefVarError, CC.EFFECTS_THROWS)
     end
-    return @invoke CC.builtin_tfunction(interp::CC.AbstractInterpreter, f::Any,
-                                        argtypes::Vector{Any}, sv::CC.InferenceState)
+    return @invoke CC.abstract_eval_globalref(interp::CC.AbstractInterpreter, g::GlobalRef, bailed::Bool,
+                                              sv::CC.InferenceState)
 end
 
 # aggressive concrete evaluation for `:inconsistent` frames within `repl_frame`
 function CC.concrete_eval_eligible(interp::REPLInterpreter, @nospecialize(f),
                                    result::CC.MethodCallResult, arginfo::CC.ArgInfo,
                                    sv::CC.InferenceState)
-    if is_repl_frame(interp, sv)
+    if (interp.limit_aggressive_inference ? is_repl_frame(sv) : is_call_stack_uncached(sv))
         neweffects = CC.Effects(result.effects; consistent=CC.ALWAYS_TRUE)
-        result = CC.MethodCallResult(result.rt, result.edgecycle, result.edgelimited,
-                                     result.edge, neweffects)
+        result = CC.MethodCallResult(result.rt, result.exct, neweffects, result.edge,
+                                     result.edgecycle, result.edgelimited, result.call_result)
+    end
+    ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any,
+                                            result::CC.MethodCallResult, arginfo::CC.ArgInfo,
+                                            sv::CC.InferenceState)
+    if ret === :semi_concrete_eval
+        # while the base eligibility check probably won't permit semi-concrete evaluation
+        # for `REPLInterpreter` (given it completely turns off optimization),
+        # this ensures we don't inadvertently enter irinterp
+        ret = :none
     end
-    return @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any,
-                                             result::CC.MethodCallResult, arginfo::CC.ArgInfo,
-                                             sv::CC.InferenceState)
+    return ret
 end
 
-function resolve_toplevel_symbols!(mod::Module, src::Core.CodeInfo)
-    newsrc = copy(src)
-    @ccall jl_resolve_globals_in_ir(
-        #=jl_array_t *stmts=# newsrc.code::Any,
+# allow constant propagation for mutable constants
+function CC.const_prop_argument_heuristic(interp::REPLInterpreter, arginfo::CC.ArgInfo, sv::CC.InferenceState)
+    if !interp.limit_aggressive_inference
+        any(@nospecialize(a)->isa(a, Const), arginfo.argtypes) && return true # even if mutable
+    end
+    return @invoke CC.const_prop_argument_heuristic(interp::CC.AbstractInterpreter, arginfo::CC.ArgInfo, sv::CC.InferenceState)
+end
+
+# Perform some post-hoc mutation on lowered code, as expected by some abstract interpretation
+# routines, especially for `:foreigncall` and `:cglobal`.
+function resolve_toplevel_symbols!(src::Core.CodeInfo, mod::Module)
+    @ccall jl_resolve_definition_effects_in_ir(
+        #=jl_array_t *stmts=# src.code::Any,
         #=jl_module_t *m=# mod::Any,
         #=jl_svec_t *sparam_vals=# Core.svec()::Any,
+        #=jl_value_t *binding_edge=# C_NULL::Ptr{Cvoid},
         #=int binding_effects=# 0::Int)::Cvoid
-    return newsrc
+    return src
+end
+
+function construct_toplevel_mi(src::Core.CodeInfo, context_module::Module)
+    resolve_toplevel_symbols!(src, context_module)
+    return @ccall jl_method_instance_for_thunk(src::Any, context_module::Any)::Ref{Core.MethodInstance}
 end
 
 # lower `ex` and run type inference on the resulting top-level expression
-function repl_eval_ex(@nospecialize(ex), context_module::Module)
+function repl_eval_ex(@nospecialize(ex), context_module::Module; limit_aggressive_inference::Bool=false)
+    expr_has_error(ex) && return nothing
+    if (isexpr(ex, :toplevel) || isexpr(ex, :tuple)) && !isempty(ex.args)
+        # get the inference result for the last expression
+        ex = ex.args[end]
+    end
     lwr = try
         Meta.lower(context_module, ex)
     catch # macro expansion failed, etc.
@@ -562,17 +663,10 @@ function repl_eval_ex(@nospecialize(ex), context_module::Module)
     isexpr(lwr, :thunk) || return nothing # lowered to `Expr(:error, ...)` or similar
     src = lwr.args[1]::Core.CodeInfo
 
-    # construct top-level `MethodInstance`
-    mi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
-    mi.specTypes = Tuple{}
-
-    mi.def = context_module
-    src = resolve_toplevel_symbols!(context_module, src)
-    @atomic mi.uninferred = src
-
+    mi = construct_toplevel_mi(src, context_module)
+    interp = REPLInterpreter(limit_aggressive_inference)
     result = CC.InferenceResult(mi)
-    interp = REPLInterpreter(result)
-    frame = CC.InferenceState(result, src, #=cache=#:no, interp)::CC.InferenceState
+    frame = CC.InferenceState(result, src, #=cache=#:no, interp)
 
     # NOTE Use the fixed world here to make `REPLInterpreter` robust against
     #      potential invalidations of `Core.Compiler` methods.
@@ -599,6 +693,16 @@ code_typed(CC.typeinf, (REPLInterpreter, CC.InferenceState))
 # Method completion on function call expression that look like :(max(1))
 MAX_METHOD_COMPLETIONS::Int = 40
 function _complete_methods(ex_org::Expr, context_module::Module, shift::Bool)
+    isempty(ex_org.args) && return 2, nothing, [], Set{Symbol}()
+    # Desugar do block call into call with lambda
+    if ex_org.head === :do && length(ex_org.args) >= 2
+        ex_call = ex_org.args[1]
+        ex_args = [x for x in ex_call.args if !(x isa Expr && x.head === :parameters)]
+        ex_params = findfirst(x -> x isa Expr && x.head === :parameters, ex_call.args)
+        new_args = [ex_args[1], ex_org.args[end], ex_args[2:end]...]
+        ex_params !== nothing && push!(new_args, ex_call.args[ex_params])
+        ex_org = Expr(:call, new_args...)
+    end
     funct = repl_eval_ex(ex_org.args[1], context_module)
     funct === nothing && return 2, nothing, [], Set{Symbol}()
     funct = CC.widenconst(funct)
@@ -606,9 +710,12 @@ function _complete_methods(ex_org::Expr, context_module::Module, shift::Bool)
     return kwargs_flag, funct, args_ex, kwargs_ex
 end
 
-function complete_methods(ex_org::Expr, context_module::Module=Main, shift::Bool=false)
+# cursor_pos: either :positional (complete either kwargs or positional) or :kwargs (beyond semicolon)
+function complete_methods(ex_org::Expr, context_module::Module=Main, shift::Bool=false, cursor_pos::Symbol=:positional)
     kwargs_flag, funct, args_ex, kwargs_ex = _complete_methods(ex_org, context_module, shift)::Tuple{Int, Any, Vector{Any}, Set{Symbol}}
     out = Completion[]
+    # Allow more arguments when cursor before semicolon, even if kwargs are present
+    cursor_pos == :positional && kwargs_flag == 1 && (kwargs_flag = 0)
     kwargs_flag == 2 && return out # one of the kwargs is invalid
     kwargs_flag == 0 && push!(args_ex, Vararg{Any}) # allow more arguments if there is no semicolon
     complete_methods!(out, funct, args_ex, kwargs_ex, shift ? -2 : MAX_METHOD_COMPLETIONS, kwargs_flag == 1)
@@ -616,6 +723,14 @@ function complete_methods(ex_org::Expr, context_module::Module=Main, shift::Bool
 end
 
 MAX_ANY_METHOD_COMPLETIONS::Int = 10
+
+function accessible(mod::Module, private::Bool)
+    bindings = IdSet{Any}(Core.Typeof(getglobal(mod, s)) for s in names(mod; all=private, imported=private, usings=private)
+                   if !Base.isdeprecated(mod, s) && !startswith(string(s), '#') && !startswith(string(s), '@') && isdefined(mod, s))
+    delete!(bindings, Module)
+    return collect(bindings)
+end
+
 function complete_any_methods(ex_org::Expr, callee_module::Module, context_module::Module, moreargs::Bool, shift::Bool)
     out = Completion[]
     args_ex, kwargs_ex, kwargs_flag = try
@@ -631,32 +746,8 @@ function complete_any_methods(ex_org::Expr, callee_module::Module, context_modul
     # semicolon for the ".?(" syntax
     moreargs && push!(args_ex, Vararg{Any})
 
-    seen = Base.IdSet()
-    for name in names(callee_module; all=true)
-        if !Base.isdeprecated(callee_module, name) && isdefined(callee_module, name) && !startswith(string(name), '#')
-            func = getfield(callee_module, name)
-            if !isa(func, Module)
-                funct = Core.Typeof(func)
-                if !in(funct, seen)
-                    push!(seen, funct)
-                    complete_methods!(out, funct, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS, false)
-                end
-            elseif callee_module === Main && isa(func, Module)
-                callee_module2 = func
-                for name in names(callee_module2)
-                    if !Base.isdeprecated(callee_module2, name) && isdefined(callee_module2, name) && !startswith(string(name), '#')
-                        func = getfield(callee_module, name)
-                        if !isa(func, Module)
-                            funct = Core.Typeof(func)
-                            if !in(funct, seen)
-                                push!(seen, funct)
-                                complete_methods!(out, funct, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS, false)
-                            end
-                        end
-                    end
-                end
-            end
-        end
+    for seen_name in accessible(callee_module, callee_module === context_module)
+        complete_methods!(out, seen_name, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS, false)
     end
 
     if !shift
@@ -665,7 +756,7 @@ function complete_any_methods(ex_org::Expr, callee_module::Module, context_modul
             isa(c, TextCompletion) && return false
             isa(c, MethodCompletion) || return true
             sig = Base.unwrap_unionall(c.method.sig)::DataType
-            return !all(T -> T === Any || T === Vararg{Any}, sig.parameters[2:end])
+            return !all(@nospecialize(T) -> T === Any || T === Vararg{Any}, sig.parameters[2:end])
         end
     end
 
@@ -760,34 +851,7 @@ const subscript_regex = Regex("^\\\\_[" * join(isdigit(k) || isletter(k) ? "$k"
 const superscripts = Dict(k[3]=>v[1] for (k,v) in latex_symbols if startswith(k, "\\^") && length(k)==3)
 const superscript_regex = Regex("^\\\\\\^[" * join(isdigit(k) || isletter(k) ? "$k" : "\\$k" for k in keys(superscripts)) * "]+\\z")
 
-# Aux function to detect whether we're right after a
-# using or import keyword
-function afterusing(string::String, startpos::Int)
-    (isempty(string) || startpos == 0) && return false
-    str = string[1:prevind(string,startpos)]
-    isempty(str) && return false
-    rstr = reverse(str)
-    r = findfirst(r"\s(gnisu|tropmi)\b", rstr)
-    r === nothing && return false
-    fr = reverseind(str, last(r))
-    return occursin(r"^\b(using|import)\s*((\w+[.])*\w+\s*,\s*)*$", str[fr:end])
-end
-
-function close_path_completion(str, startpos, r, paths, pos)
-    length(paths) == 1 || return false  # Only close if there's a single choice...
-    _path = str[startpos:prevind(str, first(r))] * (paths[1]::PathCompletion).path
-    path = expanduser(unescape_string(replace(_path, "\\\$"=>"\$", "\\\""=>"\"")))
-    # ...except if it's a directory...
-    try
-        isdir(path)
-    catch e
-        e isa Base.IOError || rethrow() # `path` cannot be determined to be a file
-    end && return false
-    # ...and except if there's already a " at the cursor.
-    return lastindex(str) <= pos || str[nextind(str, pos)] != '"'
-end
-
-function bslash_completions(string::String, pos::Int)
+function bslash_completions(string::String, pos::Int, hint::Bool=false)
     slashpos = something(findprev(isequal('\\'), string, pos), 0)
     if (something(findprev(in(bslash_separators), string, pos), 0) < slashpos &&
         !(1 < slashpos && (string[prevind(string, slashpos)]=='\\')))
@@ -809,38 +873,12 @@ function bslash_completions(string::String, pos::Int)
         end
         # return possible matches; these cannot be mixed with regular
         # Julian completions as only latex / emoji symbols contain the leading \
-        if startswith(s, "\\:") # emoji
-            namelist = Iterators.filter(k -> startswith(k, s), keys(emoji_symbols))
-        else # latex
-            namelist = Iterators.filter(k -> startswith(k, s), keys(latex_symbols))
-        end
-        return (true, (Completion[BslashCompletion(name) for name in sort!(collect(namelist))], slashpos:pos, true))
-    end
-    return (false, (Completion[], 0:-1, false))
-end
-
-function dict_identifier_key(str::String, tag::Symbol, context_module::Module=Main)
-    if tag === :string
-        str_close = str*"\""
-    elseif tag === :cmd
-        str_close = str*"`"
-    else
-        str_close = str
-    end
-
-    frange, end_of_identifier = find_start_brace(str_close, c_start='[', c_end=']')
-    isempty(frange) && return (nothing, nothing, nothing)
-    obj = context_module
-    for name in split(str[frange[1]:end_of_identifier], '.')
-        Base.isidentifier(name) || return (nothing, nothing, nothing)
-        sym = Symbol(name)
-        isdefined(obj, sym) || return (nothing, nothing, nothing)
-        obj = getfield(obj, sym)
+        symbol_dict = startswith(s, "\\:") ? emoji_symbols : latex_symbols
+        namelist = Iterators.filter(k -> startswith(k, s), keys(symbol_dict))
+        completions = Completion[BslashCompletion(name, "$(symbol_dict[name]) $name") for name in sort!(collect(namelist))]
+        return (true, (completions, slashpos:pos, true))
     end
-    (isa(obj, AbstractDict) && length(obj)::Int < 1_000_000) || return (nothing, nothing, nothing)
-    begin_of_key = something(findnext(!isspace, str, nextind(str, end_of_identifier) + 1), # +1 for [
-                             lastindex(str)+1)
-    return (obj::AbstractDict, str[begin_of_key:end], begin_of_key)
+    return (false, (Completion[], 1:0, false))
 end
 
 # This needs to be a separate non-inlined function, see #19441
@@ -853,47 +891,17 @@ end
     return matches
 end
 
-# Identify an argument being completed in a method call. If the argument is empty, method
-# suggestions will be provided instead of argument completions.
-function identify_possible_method_completion(partial, last_idx)
-    fail = 0:-1, Expr(:nothing), 0:-1, 0
-
-    # First, check that the last punctuation is either ',', ';' or '('
-    idx_last_punct = something(findprev(x -> ispunct(x) && x != '_' && x != '!', partial, last_idx), 0)::Int
-    idx_last_punct == 0 && return fail
-    last_punct = partial[idx_last_punct]
-    last_punct == ',' || last_punct == ';' || last_punct == '(' || return fail
-
-    # Then, check that `last_punct` is only followed by an identifier or nothing
-    before_last_word_start = something(findprev(in(non_identifier_chars), partial, last_idx), 0)
-    before_last_word_start == 0 && return fail
-    all(isspace, @view partial[nextind(partial, idx_last_punct):before_last_word_start]) || return fail
-
-    # Check that `last_punct` is either the last '(' or placed after a previous '('
-    frange, method_name_end = find_start_brace(@view partial[1:idx_last_punct])
-    method_name_end ∈ frange || return fail
-
-    # Strip the preceding ! operators, if any, and close the expression with a ')'
-    s = replace(partial[frange], r"\G\!+([^=\(]+)" => s"\1"; count=1) * ')'
-    ex = Meta.parse(s, raise=false, depwarn=false)
-    isa(ex, Expr) || return fail
-
-    # `wordrange` is the position of the last argument to complete
-    wordrange = nextind(partial, before_last_word_start):last_idx
-    return frange, ex, wordrange, method_name_end
-end
-
 # Provide completion for keyword arguments in function calls
-function complete_keyword_argument(partial, last_idx, context_module)
-    frange, ex, wordrange, = identify_possible_method_completion(partial, last_idx)
-    fail = Completion[], 0:-1, frange
-    ex.head === :call || is_broadcasting_expr(ex) || return fail
-
+# Returns true if the current argument must be a keyword because the cursor is beyond the semicolon
+function complete_keyword_argument!(suggestions::Vector{Completion},
+                                    ex::Expr, last_word::String,
+                                    context_module::Module,
+                                    arg_pos::Symbol; shift::Bool=false)
     kwargs_flag, funct, args_ex, kwargs_ex = _complete_methods(ex, context_module, true)::Tuple{Int, Any, Vector{Any}, Set{Symbol}}
-    kwargs_flag == 2 && return fail # one of the previous kwargs is invalid
+    kwargs_flag == 2 && return false # one of the previous kwargs is invalid
 
     methods = Completion[]
-    complete_methods!(methods, funct, Any[Vararg{Any}], kwargs_ex, -1, kwargs_flag == 1)
+    complete_methods!(methods, funct, Any[Vararg{Any}], kwargs_ex, -1, arg_pos == :kwargs)
     # TODO: use args_ex instead of Any[Vararg{Any}] and only provide kwarg completion for
     # method calls compatible with the current arguments.
 
@@ -902,28 +910,31 @@ function complete_keyword_argument(partial, last_idx, context_module)
     # previously in the expression. The corresponding suggestion is "kwname=".
     # If the keyword corresponds to an existing name, also include "kwname" as a suggestion
     # since the syntax "foo(; kwname)" is equivalent to "foo(; kwname=kwname)".
-    last_word = partial[wordrange] # the word to complete
     kwargs = Set{String}()
     for m in methods
+        # if MAX_METHOD_COMPLETIONS is hit a single TextCompletion is return by complete_methods! with an explanation
+        # which can be ignored here
+        m isa TextCompletion && continue
         m::MethodCompletion
         possible_kwargs = Base.kwarg_decl(m.method)
         current_kwarg_candidates = String[]
         for _kw in possible_kwargs
             kw = String(_kw)
-            if !endswith(kw, "...") && startswith(kw, last_word) && _kw ∉ kwargs_ex
+            # HACK: Should consider removing current arg from AST.
+            if !endswith(kw, "...") && startswith(kw, last_word) && (_kw ∉ kwargs_ex || kw == last_word)
                 push!(current_kwarg_candidates, kw)
             end
         end
         union!(kwargs, current_kwarg_candidates)
     end
 
-    suggestions = Completion[KeywordArgumentCompletion(kwarg) for kwarg in kwargs]
-    append!(suggestions, complete_symbol(nothing, last_word, Returns(true), context_module))
-
-    return sort!(suggestions, by=completion_text), wordrange
+    for kwarg in kwargs
+        push!(suggestions, KeywordArgumentCompletion(kwarg))
+    end
+    return kwargs_flag != 0 && arg_pos == :kwargs
 end
 
-function project_deps_get_completion_candidates(pkgstarts::String, project_file::String)
+function get_loading_candidates(pkgstarts::String, project_file::String)
     loading_candidates = String[]
     d = Base.parsed_toml(project_file)
     pkg = get(d, "name", nothing)::Union{String, Nothing}
@@ -936,292 +947,450 @@ function project_deps_get_completion_candidates(pkgstarts::String, project_file:
             startswith(pkg, pkgstarts) && push!(loading_candidates, pkg)
         end
     end
-    return Completion[PackageCompletion(name) for name in loading_candidates]
-end
-
-function complete_identifiers!(suggestions::Vector{Completion}, @nospecialize(ffunc::Function), context_module::Module, string::String, name::String, pos::Int, dotpos::Int, startpos::Int, comp_keywords=false)
-    ex = nothing
-    comp_keywords && append!(suggestions, complete_keyword(name))
-    if dotpos > 1 && string[dotpos] == '.'
-        s = string[1:dotpos-1]
-        # First see if the whole string up to `pos` is a valid expression. If so, use it.
-        ex = Meta.parse(s, raise=false, depwarn=false)
-        if isexpr(ex, :incomplete)
-            s = string[startpos:pos]
-            # Heuristic to find the start of the expression. TODO: This would be better
-            # done with a proper error-recovering parser.
-            if 0 < startpos <= lastindex(string) && string[startpos] == '.'
-                i = prevind(string, startpos)
-                while 0 < i
-                    c = string[i]
-                    if c in (')', ']')
-                        if c == ')'
-                            c_start = '('
-                            c_end = ')'
-                        elseif c == ']'
-                            c_start = '['
-                            c_end = ']'
-                        end
-                        frange, end_of_identifier = find_start_brace(string[1:prevind(string, i)], c_start=c_start, c_end=c_end)
-                        isempty(frange) && break # unbalanced parens
-                        startpos = first(frange)
-                        i = prevind(string, startpos)
-                    elseif c in ('\'', '\"', '\`')
-                        s = "$c$c"*string[startpos:pos]
-                        break
+    return loading_candidates
+end
+
+function complete_loading_candidates!(suggestions::Vector{Completion}, s::String)
+    for name in ("Core", "Base")
+        startswith(name, s) && push!(suggestions, PackageCompletion(name))
+    end
+
+    # If there's no dot, we're in toplevel, so we should
+    # also search for packages
+    for dir in Base.load_path()
+        if basename(dir) in Base.project_names && isfile(dir)
+            for name in get_loading_candidates(s, dir)
+                push!(suggestions, PackageCompletion(name))
+            end
+        end
+        isdir(dir) || continue
+        for entry in _readdirx(dir)
+            pname = entry.name
+            if pname[1] != '.' && pname != "METADATA" &&
+                pname != "REQUIRE" && startswith(pname, s)
+                # Valid file paths are
+                #   <Mod>.jl
+                #   <Mod>/src/<Mod>.jl
+                #   <Mod>.jl/src/<Mod>.jl
+                if isfile(entry)
+                    endswith(pname, ".jl") && push!(suggestions,
+                                                    PackageCompletion(pname[1:prevind(pname, end-2)]))
+                else
+                    mod_name = if endswith(pname, ".jl")
+                        pname[1:prevind(pname, end-2)]
                     else
-                        break
+                        pname
+                    end
+                    if isfile(joinpath(entry, "src",
+                                       "$mod_name.jl"))
+                        push!(suggestions, PackageCompletion(mod_name))
                     end
-                    s = string[startpos:pos]
                 end
             end
-            if something(findlast(in(non_identifier_chars), s), 0) < something(findlast(isequal('.'), s), 0)
-                lookup_name, name = rsplit(s, ".", limit=2)
-                name = String(name)
+        end
+    end
+end
+
+function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true, hint::Bool=false)
+    # filename needs to be string so macro can be evaluated
+    # TODO: JuliaSyntax version API here
+    node = parseall(CursorNode, string, ignore_errors=true, keep_parens=true, filename="none")
+    cur = @something seek_pos(node, pos) node
+
+    # Back up before whitespace to get a more useful AST node.
+    pos_not_ws = findprev(!isspace, string, pos)
+    cur_not_ws = something(seek_pos(node, pos_not_ws), node)
+
+    suggestions = Completion[]
+    sort_suggestions() = sort!(unique!(named_completion, suggestions), by=named_completion_completion)
+
+    # Search for methods (requires tab press):
+    #   ?(x, y)TAB           lists methods you can call with these objects
+    #   ?(x, y TAB           lists methods that take these objects as the first two arguments
+    #   MyModule.?(x, y)TAB  restricts the search to names in MyModule
+    if !hint
+        cs = method_search(view(string, 1:pos), context_module, shift)
+        cs !== nothing && return cs
+    end
 
-                ex = Meta.parse(lookup_name, raise=false, depwarn=false)
+    # Complete keys in a Dict:
+    #   my_dict[ TAB
+    n, key, closed = find_ref_key(cur_not_ws, pos)
+    if n !== nothing
+        key::UnitRange{Int}
+        obj = dict_eval(Expr(n), context_module)
+        if obj !== nothing
+            # Skip leading whitespace inside brackets.
+            i = @something findnext(!isspace, string, first(key)) nextind(string, last(key))
+            key = i:last(key)
+            s = string[intersect(key, 1:pos)]
+            matches = find_dict_matches(obj, s)
+            length(matches) == 1 && !closed && (matches[1] *= ']')
+            if length(matches) > 0
+                ret = Completion[DictCompletion(obj, match) for match in sort!(matches)]
+                return ret, key, true
             end
-            isexpr(ex, :incomplete) && (ex = nothing)
         end
     end
-    append!(suggestions, complete_symbol(ex, name, ffunc, context_module))
-    return sort!(unique(suggestions), by=completion_text), (dotpos+1):pos, true
-end
 
-function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true)
-    # First parse everything up to the current position
-    partial = string[1:pos]
-    inc_tag = Base.incomplete_tag(Meta.parse(partial, raise=false, depwarn=false))
+    # Complete Cmd strings:
+    #   `fil TAB                 => `file
+    #   `file ~/exa TAB          => `file ~/example.txt
+    #   `file ~/example.txt TAB  => `file /home/user/example.txt
+    if (n = find_parent(cur, K"CmdString")) !== nothing
+        off = char_first(n) - 1
+        ret, r, success = shell_completions(string[char_range(n)], pos - off, hint, cmd_escape=true)
+        success && return ret, r .+ off, success
+    end
 
-    # ?(x, y)TAB lists methods you can call with these objects
-    # ?(x, y TAB lists methods that take these objects as the first two arguments
-    # MyModule.?(x, y)TAB restricts the search to names in MyModule
-    rexm = match(r"(\w+\.|)\?\((.*)$", partial)
-    if rexm !== nothing
-        # Get the module scope
-        if isempty(rexm.captures[1])
-            callee_module = context_module
+    # Complete ordinary strings:
+    #  "~/exa TAB         => "~/example.txt"
+    #  "~/example.txt TAB => "/home/user/example.txt"
+    r, closed = find_str(cur)
+    if r !== nothing
+        s = do_string_unescape(string[intersect(r, 1:pos)])
+        ret, success = complete_path_string(s, hint; string_escape=true,
+                                            dirsep=Sys.iswindows() ? '\\' : '/')
+        if length(ret) == 1 && !closed && close_path_completion(ret[1].path)
+            ret[1] = PathCompletion(ret[1].path * '"')
+        end
+        success && return ret, r, success
+    end
+
+    # Backlash symbols:
+    #   \pi => π
+    # Comes after string completion so backslash escapes are not misinterpreted.
+    ok, ret = bslash_completions(string, pos)
+    ok && return ret
+
+    # Don't fall back to symbol completion inside strings or comments.
+    inside_cmdstr = find_parent(cur, K"cmdstring") !== nothing
+    (kind(cur) in KSet"String Comment ErrorEofMultiComment" || inside_cmdstr) &&
+         return Completion[], 1:0, false
+
+    n, arg_pos = find_prefix_call(cur_not_ws)
+    if n !== nothing
+        func = first(children_nt(n))
+        e = Expr(n)
+        # Remove arguments past the first parse error (allows unclosed parens)
+        if is_broadcasting_expr(e)
+            i = findfirst(x -> x isa Expr && x.head == :error, e.args[2].args)
+            i !== nothing && deleteat!(e.args[2].args, i:lastindex(e.args[2].args))
         else
-            modname = Symbol(rexm.captures[1][1:end-1])
-            if isdefined(context_module, modname)
-                callee_module = getfield(context_module, modname)
-                if !isa(callee_module, Module)
-                    callee_module = context_module
-                end
-            else
-                callee_module = context_module
-            end
+            i = findfirst(x -> x isa Expr && x.head == :error, e.args)
+            i !== nothing && deleteat!(e.args, i:lastindex(e.args))
         end
-        moreargs = !endswith(rexm.captures[2], ')')
-        callstr = "_(" * rexm.captures[2]
-        if moreargs
-            callstr *= ')'
+
+        # Method completion:
+        #   foo( TAB     => list of method signatures for foo
+        #   foo(x, TAB   => list of methods signatures for foo with x as first argument
+        if kind(cur_not_ws) in KSet"( , ;"
+            # Don't provide method completions unless the cursor is after: '(' ',' ';'
+            return complete_methods(e, context_module, shift, arg_pos), char_range(func), false
+
+        # Keyword argument completion:
+        #   foo(ar TAB   => keyword arguments like `arg1=`
+        elseif kind(cur) == K"Identifier"
+            r = char_range(cur)
+            s = string[intersect(r, 1:pos)]
+            # Return without adding more suggestions if kwargs only
+            complete_keyword_argument!(suggestions, e, s, context_module, arg_pos; shift) &&
+                return sort_suggestions(), r, true
         end
-        ex_org = Meta.parse(callstr, raise=false, depwarn=false)
-        if isa(ex_org, Expr)
-            return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs, shift), (0:length(rexm.captures[1])+1) .+ rexm.offset, false
+    end
+
+    # Symbol completion
+    # TODO: Should completions replace the identifier at the cursor?
+    looks_like_ident = Base.isidentifier(@view string[intersect(char_range(cur), 1:pos)])
+    if cur.parent !== nothing && kind(cur.parent) === K"var"
+        # Replace the entire var"foo", but search using only "foo".
+        r = intersect(char_range(cur.parent), 1:pos)
+        r2 = char_range(children_nt(cur.parent)[1])
+        s = string[intersect(r2, 1:pos)]
+    elseif cur.parent !== nothing && kind(cur.parent) === K"macro_name"
+        # Include the `@`
+        r = intersect(prevind(string, char_first(cur)):char_last(cur), 1:pos)
+        s = string[r]
+    elseif looks_like_ident || kind(cur) in KSet"Bool Identifier @"
+        r = intersect(char_range(cur), 1:pos)
+        s = string[r]
+    else
+        r = nextind(string, pos):pos
+        s = ""
+    end
+
+    complete_modules_only = false
+    prefix = node_prefix(cur, context_module)
+    comp_keywords = prefix === nothing && !isempty(s)
+
+    # Complete loadable module names:
+    #   import Mod TAB
+    #   import Mod1, Mod2 TAB
+    #   using Mod TAB
+    if (n = find_parent(cur, K"importpath")) !== nothing
+        # Given input lines like `using Foo|`, `import Foo, Bar|` and `using Foo.Bar, Baz, |`:
+        # Let's look only for packages and modules we can reach from here
+        if prefix == nothing
+            complete_loading_candidates!(suggestions, s)
+            return sort_suggestions(), r, true
         end
+
+        # Allow completion for `import Mod.name` (where `name` is not a module)
+        complete_modules_only = prefix === nothing || kind(n.parent) === K"using"
+        comp_keywords = false
     end
 
-    # if completing a key in a Dict
-    identifier, partial_key, loc = dict_identifier_key(partial, inc_tag, context_module)
-    if identifier !== nothing
-        matches = find_dict_matches(identifier, partial_key)
-        length(matches)==1 && (lastindex(string) <= pos || string[nextind(string,pos)] != ']') && (matches[1]*=']')
-        length(matches)>0 && return Completion[DictCompletion(identifier, match) for match in sort!(matches)], loc::Int:pos, true
+    if comp_keywords
+        complete_keyword!(suggestions, s)
+        complete_keyval!(suggestions, s)
     end
 
-    ffunc = Returns(true)
-    suggestions = Completion[]
+    complete_symbol!(suggestions, prefix, s, context_module; complete_modules_only, shift)
+    return sort_suggestions(), r, true
+end
 
-    # Check if this is a var"" string macro that should be completed like
-    # an identifier rather than a string.
-    # TODO: It would be nice for the parser to give us more information here
-    # so that we can lookup the macro by identity rather than pattern matching
-    # its invocation.
-    varrange = findprev("var\"", string, pos)
-
-    if varrange !== nothing
-        ok, ret = bslash_completions(string, pos)
-        ok && return ret
-        startpos = first(varrange) + 4
-        dotpos = something(findprev(isequal('.'), string, first(varrange)-1), 0)
-        return complete_identifiers!(Completion[], ffunc, context_module, string,
-            string[startpos:pos], pos, dotpos, startpos)
-    elseif inc_tag === :cmd
-        m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial))
-        startpos = nextind(partial, reverseind(partial, m.offset))
-        r = startpos:pos
-
-        # This expansion with "\\ "=>' ' replacement and shell_escape=true
-        # assumes the path isn't further quoted within the cmd backticks.
-        expanded = complete_expanduser(replace(string[r], r"\\ " => " "), r)
-        expanded[3] && return expanded  # If user expansion available, return it
-
-        paths, r, success = complete_path(replace(string[r], r"\\ " => " "), pos,
-                                          shell_escape=true)
-
-        return sort!(paths, by=p->p.path), r, success
-    elseif inc_tag === :string
-        # Find first non-escaped quote
-        m = match(r"\"(?!\\)", reverse(partial))
-        startpos = nextind(partial, reverseind(partial, m.offset))
-        r = startpos:pos
-
-        expanded = complete_expanduser(string[r], r)
-        expanded[3] && return expanded  # If user expansion available, return it
-
-        path_prefix = try
-            unescape_string(replace(string[r], "\\\$"=>"\$", "\\\""=>"\""))
-        catch
-            nothing
-        end
-        if !isnothing(path_prefix)
-            paths, r, success = complete_path(path_prefix, pos, string_escape=true)
+function close_path_completion(path)
+    path = expanduser(path)
+    path = do_string_unescape(path)
+    !Base.isaccessibledir(path)
+end
 
-            if close_path_completion(string, startpos, r, paths, pos)
-                paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"")
-            end
+# Lowering can misbehave with nested error expressions.
+function expr_has_error(@nospecialize(e))
+    e isa Expr || return false
+    e.head === :error &&  return true
+    any(expr_has_error, e.args)
+end
 
-            # Fallthrough allowed so that Latex symbols can be completed in strings
-            success && return sort!(paths, by=p->p.path), r, success
-        end
+# Is the cursor inside the square brackets of a ref expression?  If so, returns:
+# - The ref node
+# - The range of characters for the brackets
+# - A flag indicating if the closing bracket is present
+function find_ref_key(cur::CursorNode, pos::Int)
+    n = find_parent(cur, K"ref")
+    n !== nothing || return nothing, nothing, nothing
+    key, closed = find_delim(n, K"[", K"]")
+    if key === nothing || !(first(key) - 1 <= pos <= last(key))
+        return nothing, nothing, nothing
     end
+    return n, key, closed
+end
 
-    ok, ret = bslash_completions(string, pos)
-    ok && return ret
+# If the cursor is in a literal string, return the contents and char range
+# inside the quotes.  Ignores triple strings.
+function find_str(cur::CursorNode)
+    n = find_parent(cur, K"string")
+    n !== nothing || return nothing, nothing
+    find_delim(n, K"\"", K"\"")
+end
+
+# Is the cursor directly inside of the arguments of a prefix call (no nested
+# expressions)?  If so, return:
+#   - The call node
+#   - Either :positional or :kwargs, if the cursor is before or after the `;`
+function find_prefix_call(cur::CursorNode)
+    n = cur.parent
+    n !== nothing || return nothing, nothing
+    is_call(n) = kind(n) in KSet"call dotcall" && is_prefix_call(n)
+    if kind(n) == K"parameters"
+        is_call(n.parent) || return nothing, nothing
+        n.parent, :kwargs
+    else
+        # Check that we are beyond the function name.
+        is_call(n) && cur.index > children_nt(n)[1].index || return nothing, nothing
+        n, :positional
+    end
+end
+
+# If node is the field in a getfield-like expression, return the value
+# complete_symbol! should use as the prefix.
+function node_prefix(node::CursorNode, context_module::Module)
+    node.parent !== nothing || return nothing
+    p = node.parent
+    # In x.var"y", the parent is the "var" when the cursor is on "y".
+    kind(p) == K"var" && (p = p.parent)
+    kind(p) == K"macro_name" && (p = p.parent)
+
+    # expr.node => expr
+    if kind(p) == K"."
+        n = children_nt(p)[1]
+        # Don't use prefix if we are the value
+        n !== node || return nothing
+        return Expr(n)
+    end
 
-    # Make sure that only bslash_completions is working on strings
-    inc_tag === :string && return Completion[], 0:-1, false
-    if inc_tag === :other
-        frange, ex, wordrange, method_name_end = identify_possible_method_completion(partial, pos)
-        if last(frange) != -1 && all(isspace, @view partial[wordrange]) # no last argument to complete
-            if ex.head === :call
-                return complete_methods(ex, context_module, shift), first(frange):method_name_end, false
-            elseif is_broadcasting_expr(ex)
-                return complete_methods(ex, context_module, shift), first(frange):(method_name_end - 1), false
+    if kind(p) == K"importpath"
+        if p.parent !== nothing && kind(p.parent) == K":" && p.index_nt > 1
+            # import A.B: C.node
+            chain = children_nt(children_nt(p.parent)[1])
+            append!(chain, children_nt(p)[1:end-1])
+        else
+            # import A.node
+            # import A.node: ...
+            chain = children_nt(p)[1:node.index_nt]
+            # Don't include the node under cursor in prefix unless it is `.`
+            kind(chain[end]) != K"." && deleteat!(chain, lastindex(chain))
+        end
+        length(chain) > 0 || return nothing
+
+        # (:importpath :x :y :z) => (:. (:. :x :y) :z)
+        # (:importpath :. :. :z) => (:. (parentmodule context_module) :z)
+        if (i = findlast(x -> kind(x) == K".", chain)) !== nothing
+            init = context_module
+            for j in 2:i
+                init = parentmodule(init)
             end
+            deleteat!(chain, 1:i)
+        else
+            # No leading `.`, init is the first element of the path
+            init = chain[1].val
+            deleteat!(chain, 1)
         end
-    elseif inc_tag === :comment
-        return Completion[], 0:-1, false
-    end
-
-    # Check whether we can complete a keyword argument in a function call
-    kwarg_completion, wordrange = complete_keyword_argument(partial, pos, context_module)
-    isempty(wordrange) || return kwarg_completion, wordrange, !isempty(kwarg_completion)
-
-    dotpos = something(findprev(isequal('.'), string, pos), 0)
-    startpos = nextind(string, something(findprev(in(non_identifier_chars), string, pos), 0))
-    # strip preceding ! operator
-    if (m = match(r"\G\!+", partial, startpos)) isa RegexMatch
-        startpos += length(m.match)
-    end
-
-    name = string[max(startpos, dotpos+1):pos]
-    comp_keywords = !isempty(name) && startpos > dotpos
-    if afterusing(string, startpos)
-        # We're right after using or import. Let's look only for packages
-        # and modules we can reach from here
-
-        # If there's no dot, we're in toplevel, so we should
-        # also search for packages
-        s = string[startpos:pos]
-        if dotpos <= startpos
-            for dir in Base.load_path()
-                if basename(dir) in Base.project_names && isfile(dir)
-                    append!(suggestions, project_deps_get_completion_candidates(s, dir))
-                end
-                isdir(dir) || continue
-                for pname in readdir(dir)
-                    if pname[1] != '.' && pname != "METADATA" &&
-                        pname != "REQUIRE" && startswith(pname, s)
-                        # Valid file paths are
-                        #   <Mod>.jl
-                        #   <Mod>/src/<Mod>.jl
-                        #   <Mod>.jl/src/<Mod>.jl
-                        if isfile(joinpath(dir, pname))
-                            endswith(pname, ".jl") && push!(suggestions,
-                                                            PackageCompletion(pname[1:prevind(pname, end-2)]))
-                        else
-                            mod_name = if endswith(pname, ".jl")
-                                pname[1:prevind(pname, end-2)]
-                            else
-                                pname
-                            end
-                            if isfile(joinpath(dir, pname, "src",
-                                               "$mod_name.jl"))
-                                push!(suggestions, PackageCompletion(mod_name))
-                            end
-                        end
+
+        # Convert the "chain" into nested (. a b) expressions.
+        all(x -> kind(x) == K"Identifier", chain) || return nothing
+        return foldl((x, y) -> Expr(:., x, Expr(:quote, y.val)), chain; init)
+    end
+
+    nothing
+end
+
+function dict_eval(@nospecialize(e), context_module::Module=Main)
+    objt = repl_eval_ex(e.args[1], context_module)
+    isa(objt, Core.Const) || return nothing
+    obj = objt.val
+    isa(obj, AbstractDict) || return nothing
+    (Base.haslength(obj) && length(obj)::Int < 1_000_000) || return nothing
+    return obj
+end
+
+function method_search(partial::AbstractString, context_module::Module, shift::Bool)
+    rexm = match(r"([\w.]+.)?\?\((.*)$", partial)
+    if rexm !== nothing
+        # Get the module scope
+        callee_module = context_module
+        if !isnothing(rexm.captures[1])
+            modnames = map(Symbol, split(something(rexm.captures[1]), '.'))
+            for m in modnames
+                if isdefined(callee_module, m)
+                    callee_module = getfield(callee_module, m)
+                    if !isa(callee_module, Module)
+                        callee_module = context_module
+                        break
                     end
                 end
             end
         end
-        ffunc = (mod,x)->(Base.isbindingresolved(mod, x) && isdefined(mod, x) && isa(getfield(mod, x), Module))
-        comp_keywords = false
+        moreargs = !endswith(rexm.captures[2], ')')
+        callstr = "_(" * rexm.captures[2]
+        if moreargs
+            callstr *= ')'
+        end
+        ex_org = Meta.parse(callstr, raise=false, depwarn=false)
+        if isa(ex_org, Expr)
+            pos_q = isnothing(rexm.captures[1]) ? 1 : sizeof(something(rexm.captures[1]))+1 # position after ?
+            return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs, shift), (0:pos_q) .+ rexm.offset, false
+        end
     end
-
-    startpos == 0 && (pos = -1)
-    dotpos < startpos && (dotpos = startpos - 1)
-    return complete_identifiers!(suggestions, ffunc, context_module, string,
-        name, pos, dotpos, startpos, comp_keywords)
 end
 
-function shell_completions(string, pos)
+function shell_completions(str, pos, hint::Bool=false; cmd_escape::Bool=false)
     # First parse everything up to the current position
-    scs = string[1:pos]
-    local args, last_parse
-    try
-        args, last_parse = Base.shell_parse(scs, true)::Tuple{Expr,UnitRange{Int}}
-    catch
-        return Completion[], 0:-1, false
+    scs = str[1:pos]
+    args, last_arg_start = try
+        Base.shell_parse(scs, true)::Tuple{Expr,Int}
+    catch ex
+        ex isa ArgumentError || ex isa ErrorException || rethrow()
+        return Completion[], 1:0, false
     end
     ex = args.args[end]::Expr
     # Now look at the last thing we parsed
-    isempty(ex.args) && return Completion[], 0:-1, false
-    arg = ex.args[end]
-    if all(s -> isa(s, AbstractString), ex.args)
-        arg = arg::AbstractString
-        # Treat this as a path
-
-        # As Base.shell_parse throws away trailing spaces (unless they are escaped),
-        # we need to special case here.
-        # If the last char was a space, but shell_parse ignored it search on "".
-        ignore_last_word = arg != " " && scs[end] == ' '
-        prefix = ignore_last_word ? "" : join(ex.args)
+    isempty(ex.args) && return Completion[], 1:0, false
+    # Concatenate every string fragment so dir\file completes correctly.
+    lastarg = all(x -> x isa String, ex.args) ? string(ex.args...) : ex.args[end]
+
+    # As Base.shell_parse throws away trailing spaces (unless they are escaped),
+    # we need to special case here.
+    # If the last char was a space, but shell_parse ignored it search on "".
+    if isexpr(lastarg, :incomplete) || isexpr(lastarg, :error)
+        partial = str[last_arg_start:pos]
+        ret, range = completions(partial, lastindex(partial), Main, true, hint)
+        range = range .+ (last_arg_start - 1)
+        return ret, range, true
+    elseif endswith(scs, ' ') && !endswith(scs, "\\ ")
+        r = pos+1:pos
+        paths, dir, success = complete_path(""; use_envpath=false, shell_escape=!cmd_escape, cmd_escape, dirsep='/')
+        return paths, r, success
+    elseif all(@nospecialize(arg) -> arg isa AbstractString, ex.args)
+        # Join these and treat this as a path
+        path::String = join(ex.args)
+        r = last_arg_start:pos
 
         # Also try looking into the env path if the user wants to complete the first argument
-        use_envpath = !ignore_last_word && length(args.args) < 2
+        use_envpath = length(args.args) < 2
 
-        return complete_path(prefix, pos, use_envpath=use_envpath, shell_escape=true)
-    elseif isexpr(arg, :incomplete) || isexpr(arg, :error)
-        partial = scs[last_parse]
-        ret, range = completions(partial, lastindex(partial))
-        range = range .+ (first(last_parse) - 1)
-        return ret, range, true
+        paths, success = complete_path_string(path, hint; use_envpath, shell_escape=!cmd_escape, cmd_escape, dirsep='/')
+        return paths, r, success
     end
-    return Completion[], 0:-1, false
+    return Completion[], 1:0, false
 end
 
-function UndefVarError_hint(io::IO, ex::UndefVarError)
-    var = ex.var
-    if var === :or
-        print(io, "\nsuggestion: Use `||` for short-circuiting boolean OR.")
-    elseif var === :and
-        print(io, "\nsuggestion: Use `&&` for short-circuiting boolean AND.")
-    elseif var === :help
-        println(io)
-        # Show friendly help message when user types help or help() and help is undefined
-        show(io, MIME("text/plain"), Base.Docs.parsedoc(Base.Docs.keywords[:help]))
-    elseif var === :quit
-        print(io, "\nsuggestion: To exit Julia, use Ctrl-D, or type exit() and press enter.")
+function complete_path_string(path, hint::Bool=false;
+                              shell_escape::Bool=false,
+                              cmd_escape::Bool=false,
+                              string_escape::Bool=false,
+                              dirsep='/',
+                              kws...)
+    # Expand "~" and remember if we expanded it.
+    local expanded
+    try
+        let p = expanduser(path)
+            expanded = path != p
+            path = p
+        end
+    catch e
+        e isa ArgumentError || rethrow()
+        expanded = false
+    end
+
+    function escape(p)
+        shell_escape && (p = do_shell_escape(p))
+        string_escape && (p = do_string_escape(p))
+        cmd_escape && (p = do_cmd_escape(p))
+        p
+    end
+
+    paths, dir, success = complete_path(path; dirsep, kws...)
+
+    # Expand '~' if the user hits TAB after exhausting completions (either
+    # because we have found an existing file, or there is no such file).
+    full_path = try
+        ispath(path) || isempty(paths)
+    catch err
+        # access(2) errors unhandled by ispath: EACCES, EIO, ELOOP, ENAMETOOLONG
+        if err isa Base.IOError
+            false
+        elseif err isa Base.ArgumentError && occursin("embedded NULs", err.msg)
+            false
+        else
+            rethrow()
+        end
     end
+    expanded && !hint && full_path && return Completion[PathCompletion(escape(path))], true
+
+    # Expand '~' if the user hits TAB on a path ending in '/'.
+    expanded && (hint || path != dir * "/") && (dir = contractuser(dir))
+
+    map!(paths) do c::PathCompletion
+        p = joinpath_withsep(dir, c.path; dirsep)
+        PathCompletion(escape(p))
+    end
+    return sort!(paths, by=p->p.path), success
 end
 
 function __init__()
-    Base.Experimental.register_error_hint(UndefVarError_hint, UndefVarError)
     COMPLETION_WORLD[] = Base.get_world_counter()
-    nothing
+    return nothing
 end
 
 end # module
diff --git a/stdlib/REPL/src/StylingPasses.jl b/stdlib/REPL/src/StylingPasses.jl
new file mode 100644
index 0000000000000..3606566d341ea
--- /dev/null
+++ b/stdlib/REPL/src/StylingPasses.jl
@@ -0,0 +1,165 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Each pass takes the input string and returns an AnnotatedString with styling annotations
+
+module StylingPasses
+
+using StyledStrings
+using StyledStrings: Face
+using JuliaSyntaxHighlighting
+import Base: AnnotatedString, annotate!, annotations, JuliaSyntax
+
+export StylingPass, StylingContext, SyntaxHighlightPass, RegionHighlightPass,
+       EnclosingParenHighlightPass, apply_styling_passes, merge_annotations
+
+# Context information passed to all styling passes
+struct StylingContext
+    cursor_pos::Int
+    region_start::Int
+    region_stop::Int
+end
+
+StylingContext(cursor_pos::Int) = StylingContext(cursor_pos, 0, 0)
+
+abstract type StylingPass end
+
+function merge_annotations(annotated_strings::Vector{<:AnnotatedString})
+    isempty(annotated_strings) && return AnnotatedString("")
+
+    result = AnnotatedString(annotated_strings[1])
+
+    for source in annotated_strings
+        for ann in annotations(source)
+            annotate!(result, ann.region, ann.label, ann.value)
+        end
+    end
+
+    return result
+end
+
+function apply_style(pass::StylingPass, input::String, ast, context::StylingContext)
+    return pass(input, ast, context)::AnnotatedString{String}
+end
+
+function apply_styling_passes(input::String, passes::Vector{StylingPass}, context::StylingContext)
+    if isempty(passes)
+        return AnnotatedString(input)
+    end
+
+    # Parse once and share AST across all passes
+    ast = JuliaSyntax.parseall(JuliaSyntax.GreenNode, input; ignore_errors=true)
+
+    results = [apply_style(pass, input, ast, context) for pass in passes]
+    return merge_annotations(results)
+end
+
+# Applies Julia syntax highlighting
+struct SyntaxHighlightPass <: StylingPass end
+
+function (::SyntaxHighlightPass)(input::String, ast, ::StylingContext)
+    try
+        return JuliaSyntaxHighlighting.highlight(input, ast)
+    catch e
+        @error "Error in SyntaxHighlightPass" exception=(e, catch_backtrace()) maxlog=1
+        return AnnotatedString(input)
+    end
+end
+
+# Applies inverse video styling to the selected region
+struct RegionHighlightPass <: StylingPass end
+
+function (::RegionHighlightPass)(input::String, ::Any, context::StylingContext)
+    result = AnnotatedString(input)
+
+    if context.region_start > 0 && context.region_stop >= context.region_start
+        # Add inverse face to the region
+        # Region positions are 1-based byte positions
+        region_range = context.region_start:context.region_stop
+        annotate!(result, region_range, :face, Face(inverse=true))
+    end
+
+    return result
+end
+
+# Applies bold styling to parentheses that enclose the cursor position
+struct EnclosingParenHighlightPass <: StylingPass
+    face::Face
+end
+
+EnclosingParenHighlightPass() = EnclosingParenHighlightPass(Face(weight=:bold, underline=true))
+
+function (pass::EnclosingParenHighlightPass)(input::String, ast, context::StylingContext)
+    result = AnnotatedString(input)
+
+    if isempty(input) || context.cursor_pos < 1
+        return result
+    end
+
+    try
+        paren_pairs = find_enclosing_parens(input, ast, context.cursor_pos)
+
+        for (open_pos, close_pos) in paren_pairs
+            annotate!(result, open_pos:open_pos, :face, pass.face)
+            annotate!(result, close_pos:close_pos, :face, pass.face)
+        end
+    catch e
+        @error "Error in EnclosingParenHighlightPass" exception=(e, catch_backtrace()) maxlog=1
+    end
+
+    return result
+end
+
+function paren_type(k)
+    if     k == JuliaSyntax.K"(";  1, :paren
+    elseif k == JuliaSyntax.K")"; -1, :paren
+    elseif k == JuliaSyntax.K"[";  1, :bracket
+    elseif k == JuliaSyntax.K"]"; -1, :bracket
+    elseif k == JuliaSyntax.K"{";  1, :curly
+    elseif k == JuliaSyntax.K"}"; -1, :curly
+    else                           0, :none
+    end
+end
+
+function find_enclosing_parens(content::String, ast, cursor_pos::Int)
+    innermost_pairs = Dict{Symbol,Tuple{Int,Int}}()
+    paren_stack = Tuple{Int,Int,Symbol}[]  # (open_pos, depth, type)
+
+    walk_tree(ast, content, UInt32(0)) do node, offset
+        nkind = JuliaSyntax.kind(node)
+        pos = firstindex(content) + offset
+
+        depthchange, ptype = paren_type(nkind)
+
+        if ptype != :none
+            if depthchange > 0
+                # Opening paren - push to stack
+                push!(paren_stack, (pos, length(paren_stack) + 1, ptype))
+            elseif depthchange < 0 && !isempty(paren_stack)
+                # Closing paren - pop from stack and check if cursor is inside
+                open_pos, depth, open_ptype = pop!(paren_stack)
+                if open_ptype == ptype && open_pos <= cursor_pos < pos
+                    # Cursor is inside this paren pair - keep only innermost per type
+                    # Only update if this is the first pair or if it's smaller (more inner) than existing
+                    if !haskey(innermost_pairs, ptype) || (pos - open_pos) < (innermost_pairs[ptype][2] - innermost_pairs[ptype][1])
+                        innermost_pairs[ptype] = (open_pos, pos)
+                    end
+                end
+            end
+        end
+    end
+
+    return collect(values(innermost_pairs))
+end
+
+function walk_tree(f::Function, node, content::String, offset::UInt32)
+    f(node, offset)
+
+    if JuliaSyntax.numchildren(node) > 0
+        for child in JuliaSyntax.children(node)
+            walk_tree(f, child, content, offset)
+            offset += JuliaSyntax.span(child)
+        end
+    end
+end
+
+end # module StylingPasses
diff --git a/stdlib/REPL/src/SyntaxUtil.jl b/stdlib/REPL/src/SyntaxUtil.jl
new file mode 100644
index 0000000000000..86178b45bf163
--- /dev/null
+++ b/stdlib/REPL/src/SyntaxUtil.jl
@@ -0,0 +1,109 @@
+module SyntaxUtil
+
+import Base.JuliaSyntax: build_tree
+using Base.JuliaSyntax:
+    AbstractSyntaxData, GreenNode, Kind, ParseStream, SourceFile, SyntaxHead, SyntaxNode, TreeNode,
+    byte_range, children, first_byte, head, is_leaf, is_trivia, kind, parse_julia_literal, span,
+    @K_str, _unsafe_wrap_substring
+
+export CursorNode, char_range, char_last, children_nt, find_delim, seek_pos
+
+# Like SyntaxNode, but keeps trivia, and tracks each child's index in its parent.
+# Extracted from JuliaSyntax/src/syntax_tree.jl
+# TODO: don't duplicate so much code?
+struct CursorData <: AbstractSyntaxData
+    source::SourceFile
+    raw::GreenNode{SyntaxHead}
+    byte_end::Int
+    index::Int
+    index_nt::Int # nth non-trivia in parent
+    val::Any
+end
+
+const CursorNode = TreeNode{CursorData}
+
+function CursorNode(source::SourceFile, raw::GreenNode{SyntaxHead};
+                    position::Integer=1)
+    GC.@preserve source begin
+        raw_offset, txtbuf = _unsafe_wrap_substring(source.code)
+        offset = raw_offset - source.byte_offset
+        _to_CursorNode(source, txtbuf, offset, raw, convert(Int, position))
+    end
+end
+
+function _to_CursorNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int,
+                        raw::GreenNode{SyntaxHead},
+                        position::Int, index::Int=-1, index_nt::Int=-1)
+    byte_end = position + span(raw) - 1
+    if is_leaf(raw)
+        val = parse_julia_literal(txtbuf, head(raw), position:byte_end .+ offset)
+        return CursorNode(nothing, nothing, CursorData(source, raw, byte_end, index, index_nt, val))
+    else
+        cs = CursorNode[]
+        pos = position
+        i_nt = 1
+        for (i,rawchild) in enumerate(children(raw))
+            push!(cs, _to_CursorNode(source, txtbuf, offset, rawchild, pos, i, i_nt))
+            pos += Int(rawchild.span)
+            i_nt += !is_trivia(rawchild)
+        end
+        node = CursorNode(nothing, cs, CursorData(source, raw, byte_end, index, index_nt, nothing))
+        for c in cs
+            c.parent = node
+        end
+        return node
+    end
+end
+
+function build_tree(::Type{CursorNode}, stream::ParseStream;
+                    filename=nothing, first_line=1, kws...)
+    green_tree = build_tree(GreenNode, stream; kws...)
+    source = SourceFile(stream, filename=filename, first_line=first_line)
+    CursorNode(source, green_tree, position=first_byte(stream))
+end
+
+Base.show(io::IO, node::CursorNode) = show(io, MIME("text/plain"), node.raw)
+Base.show(io::IO, mime::MIME{Symbol("text/plain")}, node::CursorNode) = show(io, mime, node.raw)
+
+Base.JuliaSyntax._expr_leaf_val(node::CursorNode, _...) = node.val
+Base.Expr(node::CursorNode) = Base.JuliaSyntax.to_expr(node)
+
+char_range(node::CursorNode) = char_first(node):char_last(node)
+char_first(node::CursorNode) = Int(node.byte_end) - Int(node.raw.span) + 1
+char_last(node::CursorNode) = thisind(node.source, node.byte_end)
+
+children_nt(node::CursorNode) = [n for n in children(node) if !is_trivia(n)]
+
+function seek_pos(node, pos)
+    pos in byte_range(node) || return nothing
+    (cs = children(node)) === nothing && return node
+    for n in cs
+        c = seek_pos(n, pos)
+        c === nothing || return c
+    end
+    node
+end
+
+find_parent(node, k::Kind) = find_parent(node, n -> kind(n) == k)
+function find_parent(node, f::Function)
+    while node !== nothing && !f(node)
+        node = node.parent
+    end
+    node
+end
+
+# Return the character range between left_kind and right_kind in node.  The left
+# delimiter must be present, while the range will extend to the rest of the node
+# if the right delimiter is missing.
+function find_delim(node::CursorNode, left_kind::Kind, right_kind::Kind)
+    cs = children(node)
+    left = findfirst(c -> kind(c) == left_kind, cs)
+    left !== nothing || return nothing, nothing
+    right = findlast(c -> kind(c) == right_kind, cs)
+    closed = right !== nothing && right != left
+    right = closed ? thisind(node.source, char_first(cs[right]) - 1) : char_last(node)
+    left = nextind(node.source, char_last(cs[left]))
+    return left:right, closed
+end
+
+end
diff --git a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
index a1f94852b38ec..fb546c8185232 100644
--- a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
@@ -129,7 +129,7 @@ end
 ##################################################################
 
 """
-    header(m::AbstractMenu) -> String
+    header(m::AbstractMenu)::String
 
 Return a header string to be printed above the menu.
 Defaults to "".
@@ -137,7 +137,7 @@ Defaults to "".
 header(m::AbstractMenu) = ""
 
 """
-    keypress(m::AbstractMenu, i::UInt32) -> Bool
+    keypress(m::AbstractMenu, i::UInt32)::Bool
 
 Handle any non-standard keypress event.
 If `true` is returned, [`TerminalMenus.request`](@ref) will exit.
@@ -146,7 +146,7 @@ Defaults to `false`.
 keypress(m::AbstractMenu, i::UInt32) = false
 
 """
-    numoptions(m::AbstractMenu) -> Int
+    numoptions(m::AbstractMenu)::Int
 
 Return the number of options in menu `m`. Defaults to `length(options(m))`.
 
@@ -176,7 +176,7 @@ Returns `selected(m)`.
 !!! compat "Julia 1.6"
     The `cursor` argument requires Julia 1.6 or later.
 """
-request(m::AbstractMenu; kwargs...) = request(terminal, m; kwargs...)
+request(m::AbstractMenu; kwargs...) = request(default_terminal(), m; kwargs...)
 
 function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Union{Int, Base.RefValue{Int}}=1, suppress_output=false)
     if cursor isa Int
@@ -252,7 +252,7 @@ end
 
 Shorthand for `println(msg); request(m)`.
 """
-request(msg::AbstractString, m::AbstractMenu; kwargs...) = request(terminal, msg, m; kwargs...)
+request(msg::AbstractString, m::AbstractMenu; kwargs...) = request(default_terminal(), msg, m; kwargs...)
 
 function request(term::REPL.Terminals.TTYTerminal, msg::AbstractString, m::AbstractMenu; kwargs...)
     println(term.out_stream, msg)
diff --git a/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl b/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
index 5c3ecf3808c49..fd660fc0f7824 100644
--- a/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
@@ -38,7 +38,7 @@ end
 
 """
 
-    MultiSelectMenu(options::Array{String,1}; pagesize::Int=10, selected=[], kwargs...)
+    MultiSelectMenu(options::Vector{String}; pagesize::Int=10, selected=[], kwargs...)
 
 Create a MultiSelectMenu object. Use `request(menu::MultiSelectMenu)` to get
 user input. It returns a `Set` containing the indices of options that
@@ -46,7 +46,7 @@ were selected by the user.
 
 # Arguments
 
-  - `options::Array{String, 1}`: Options to be displayed
+  - `options::Vector{String}`: Options to be displayed
   - `pagesize::Int=10`: The number of options to be displayed at one time, the menu will scroll if length(options) > pagesize
   - `selected=[]`: pre-selected items. `i ∈ selected` means that `options[i]` is preselected.
 
diff --git a/stdlib/REPL/src/TerminalMenus/Pager.jl b/stdlib/REPL/src/TerminalMenus/Pager.jl
index c823a5dedd1ba..091f87801e7a4 100644
--- a/stdlib/REPL/src/TerminalMenus/Pager.jl
+++ b/stdlib/REPL/src/TerminalMenus/Pager.jl
@@ -39,4 +39,4 @@ function pager(terminal, object)
     pager = Pager(String(take!(buffer)); pagesize = div(lines, 2))
     return request(terminal, pager)
 end
-pager(object) = pager(terminal, object)
+pager(object) = pager(default_terminal(), object)
diff --git a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
index 32a6373b719d7..8e35e37f7f973 100644
--- a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
@@ -31,9 +31,9 @@ end
 
 """
 
-    RadioMenu(options::Array{String,1}; pagesize::Int=10,
-                                        keybindings::Vector{Char}=Char[],
-                                        kwargs...)
+    RadioMenu(options::Vector{String}; pagesize::Int=10,
+                                       keybindings::Vector{Char}=Char[],
+                                       kwargs...)
 
 Create a RadioMenu object. Use `request(menu::RadioMenu)` to get user input.
 `request()` returns an `Int` which is the index of the option selected by the
@@ -41,7 +41,7 @@ user.
 
 # Arguments
 
-  - `options::Array{String, 1}`: Options to be displayed
+  - `options::Vector{String}`: Options to be displayed
   - `pagesize::Int=10`: The number of options to be displayed at one time, the menu will scroll if length(options) > pagesize
   - `keybindings::Vector{Char}=Char[]`: Shortcuts to pick corresponding entry from `options`
 
diff --git a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
index 87869e84d9838..f970cd9a289c2 100644
--- a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
+++ b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
@@ -1,14 +1,19 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-module TerminalMenus
+"""
+    REPL.TerminalMenus
 
-terminal = nothing  # The user terminal
+A module that contains code for displaying text mode interactive menus.
+Key exported symbols include [`REPL.TerminalMenus.RadioMenu`](@ref) and
+[`REPL.TerminalMenus.MultiSelectMenu`](@ref).
+"""
+module TerminalMenus
 
-import REPL
+using ..REPL: REPL
 
-function __init__()
-    global terminal
-    terminal = REPL.Terminals.TTYTerminal(get(ENV, "TERM", Sys.iswindows() ? "" : "dumb"), stdin, stdout, stderr)
+function default_terminal(; in::IO=stdin, out::IO=stdout, err::IO=stderr)
+    return REPL.Terminals.TTYTerminal(
+        get(ENV, "TERM", Sys.iswindows() ? "" : "dumb"), in, out, err)
 end
 
 include("util.jl")
@@ -25,6 +30,9 @@ export
     Pager,
     request
 
+public Config, config, MultiSelectConfig
+public pick, cancel, writeline, options, numoptions, selected, header, keypress
+
 # TODO: remove in Julia 2.0
 # While not exported, AbstractMenu documented these as an extension interface
 @deprecate printMenu printmenu
diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl
index b9797dee910c2..e05b3075d9f7c 100644
--- a/stdlib/REPL/src/docview.jl
+++ b/stdlib/REPL/src/docview.jl
@@ -9,9 +9,9 @@ using Base.Docs: catdoc, modules, DocStr, Binding, MultiDoc, keywords, isfield,
 
 import Base.Docs: doc, formatdoc, parsedoc, apropos
 
-using Base: with_output_color, mapany
+using Base: with_output_color, mapany, isdeprecated, isexported
 
-import REPL
+using Base.Filesystem: _readdirx
 
 using InteractiveUtils: subtypes
 
@@ -20,20 +20,28 @@ using Unicode: normalize
 ## Help mode ##
 
 # This is split into helpmode and _helpmode to easier unittest _helpmode
-helpmode(io::IO, line::AbstractString, mod::Module=Main) = :($REPL.insert_hlines($io, $(REPL._helpmode(io, line, mod))))
+function helpmode(io::IO, line::AbstractString, mod::Module=Main)
+    internal_accesses = Set{Pair{Module,Symbol}}()
+    quote
+        docs = $Markdown.insert_hlines($(REPL._helpmode(io, line, mod, internal_accesses)))
+        $REPL.insert_internal_warning(docs, $internal_accesses)
+    end
+end
 helpmode(line::AbstractString, mod::Module=Main) = helpmode(stdout, line, mod)
 
+# A hack to make the line entered at the REPL available at trimdocs without
+# passing the string through the entire mechanism.
 const extended_help_on = Ref{Any}(nothing)
 
-function _helpmode(io::IO, line::AbstractString, mod::Module=Main)
+function _helpmode(io::IO, line::AbstractString, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing)
     line = strip(line)
     ternary_operator_help = (line == "?" || line == "?:")
     if startswith(line, '?') && !ternary_operator_help
         line = line[2:end]
-        extended_help_on[] = line
+        extended_help_on[] = nothing
         brief = false
     else
-        extended_help_on[] = nothing
+        extended_help_on[] = line
         brief = true
     end
     # interpret anything starting with # or #= as asking for help on comments
@@ -47,7 +55,7 @@ function _helpmode(io::IO, line::AbstractString, mod::Module=Main)
     x = Meta.parse(line, raise = false, depwarn = false)
     assym = Symbol(line)
     expr =
-        if haskey(keywords, Symbol(line)) || Base.isoperator(assym) || isexpr(x, :error) ||
+        if haskey(keywords, assym) || Base.isoperator(assym) || isexpr(x, :error) ||
             isexpr(x, :invalid) || isexpr(x, :incomplete)
             # Docs for keywords must be treated separately since trying to parse a single
             # keyword such as `function` would throw a parse error due to the missing `end`.
@@ -64,30 +72,17 @@ function _helpmode(io::IO, line::AbstractString, mod::Module=Main)
         end
     # the following must call repl(io, expr) via the @repl macro
     # so that the resulting expressions are evaluated in the Base.Docs namespace
-    :($REPL.@repl $io $expr $brief $mod)
+    :($REPL.@repl $io $expr $brief $mod $internal_accesses)
 end
 _helpmode(line::AbstractString, mod::Module=Main) = _helpmode(stdout, line, mod)
 
-# Print vertical lines along each docstring if there are multiple docs
-function insert_hlines(io::IO, docs)
-    if !isa(docs, Markdown.MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
-        return docs
-    end
-    docs = docs::Markdown.MD
-    v = Any[]
-    for (n, doc) in enumerate(docs.content)
-        push!(v, doc)
-        n == length(docs.content) || push!(v, Markdown.HorizontalRule())
-    end
-    return Markdown.MD(v)
-end
-
 function formatdoc(d::DocStr)
     buffer = IOBuffer()
     for part in d.text
         formatdoc(buffer, d, part)
     end
-    Markdown.MD(Any[Markdown.parse(seekstart(buffer))])
+    md = Markdown.MD(Any[Markdown.parse(seekstart(buffer))])
+    assume_julia_code!(md)
 end
 @noinline formatdoc(buffer, d, part) = print(buffer, part)
 
@@ -101,6 +96,27 @@ function parsedoc(d::DocStr)
     d.object
 end
 
+"""
+    assume_julia_code!(doc::Markdown.MD) -> doc
+
+Assume that code blocks with no language specified are Julia code.
+"""
+function assume_julia_code!(doc::Markdown.MD)
+    assume_julia_code!(doc.content)
+    doc
+end
+
+function assume_julia_code!(blocks::Vector)
+    for (i, block) in enumerate(blocks)
+        if block isa Markdown.Code && block.language == ""
+            blocks[i] = Markdown.Code("julia", block.code)
+        elseif block isa Vector || block isa Markdown.MD
+            assume_julia_code!(block)
+        end
+    end
+    blocks
+end
+
 ## Trimming long help ("# Extended help")
 
 struct Message  # For direct messages to the terminal
@@ -148,14 +164,48 @@ end
 
 _trimdocs(md, brief::Bool) = md, false
 
-"""
-    Docs.doc(binding, sig)
 
-Return all documentation that matches both `binding` and `sig`.
+is_tuple(expr) = false
+is_tuple(expr::Expr) = expr.head == :tuple
+
+struct Logged{F}
+    f::F
+    mod::Module
+    collection::Set{Pair{Module,Symbol}}
+end
+function (la::Logged)(m::Module, s::Symbol)
+    m !== la.mod && Base.isdefined(m, s) && !Base.ispublic(m, s) && push!(la.collection, m => s)
+    la.f(m, s)
+end
+(la::Logged)(args...) = la.f(args...)
+
+function log_nonpublic_access(expr::Expr, mod::Module, internal_access::Set{Pair{Module,Symbol}})
+    if expr.head === :. && length(expr.args) == 2 && !is_tuple(expr.args[2])
+        Expr(:call, Logged(getproperty, mod, internal_access), log_nonpublic_access.(expr.args, (mod,), (internal_access,))...)
+    elseif expr.head === :call && expr.args[1] === Base.Docs.Binding
+        Expr(:call, Logged(Base.Docs.Binding, mod, internal_access), log_nonpublic_access.(expr.args[2:end], (mod,), (internal_access,))...)
+    else
+        Expr(expr.head, log_nonpublic_access.(expr.args, (mod,), (internal_access,))...)
+    end
+end
+log_nonpublic_access(expr, ::Module, _) = expr
+
+function insert_internal_warning(md::Markdown.MD, internal_access::Set{Pair{Module,Symbol}})
+    if !isempty(internal_access)
+        items = Any[Any[Markdown.Paragraph(Any[Markdown.Code("", s)])] for s in sort!(["$mod.$sym" for (mod, sym) in internal_access])]
+        admonition = Markdown.Admonition("warning", "Warning", Any[
+            Markdown.Paragraph(Any["The following bindings may be internal; they may change or be removed in future versions:"]),
+            Markdown.List(items, -1, false)])
+        pushfirst!(md.content, admonition)
+    end
+    md
+end
+function insert_internal_warning(other, internal_access::Set{Pair{Module,Symbol}})
+    # We don't know how to insert an internal symbol warning into non-markdown
+    # content, so we don't.
+    other
+end
 
-If `getdoc` returns a non-`nothing` result on the value of the binding, then a
-dynamic docstring is returned instead of one based on the binding itself.
-"""
 function doc(binding::Binding, sig::Type = Union{})
     if defined(binding)
         result = getdoc(resolve(binding), sig)
@@ -206,7 +256,7 @@ doc(obj::UnionAll) = doc(Base.unwrap_unionall(obj))
 doc(object, sig::Type = Union{}) = doc(aliasof(object, typeof(object)), sig)
 doc(object, sig...)              = doc(object, Tuple{sig...})
 
-function lookup_doc(ex)
+function lookup_doc(@nospecialize(ex))
     if isa(ex, Expr) && ex.head !== :(.) && Base.isoperator(ex.head)
         # handle syntactic operators, e.g. +=, ::, .=
         ex = ex.head
@@ -234,7 +284,13 @@ function lookup_doc(ex)
             end
         end
     end
-    binding = esc(bindingexpr(namify(ex)))
+    name = namify(ex)
+    # If namify couldn't extract a meaningful name and returned an Expr
+    # that can't be converted to a binding, treat it like a value
+    if isa(name, Expr) && !isexpr(name, :(.))
+        return :($(doc)($(typeof)($(esc(ex)))))
+    end
+    binding = esc(bindingexpr(name))
     if isexpr(ex, :call) || isexpr(ex, :macrocall) || isexpr(ex, :where)
         sig = esc(signature(ex))
         :($(doc)($binding, $sig))
@@ -250,15 +306,23 @@ function summarize(binding::Binding, sig)
     io = IOBuffer()
     if defined(binding)
         binding_res = resolve(binding)
-        !isa(binding_res, Module) && println(io, "No documentation found.\n")
+        if !isa(binding_res, Module)
+            varstr = "$(binding.mod).$(binding.var)"
+            if Base.ispublic(binding.mod, binding.var)
+                println(io, "No documentation found for public binding `$varstr`.\n")
+            else
+                println(io, "No documentation found for private binding `$varstr`.\n")
+            end
+        end
         summarize(io, binding_res, binding)
     else
         println(io, "No documentation found.\n")
         quot = any(isspace, sprint(print, binding)) ? "'" : ""
-        if Base.isbindingresolved(binding.mod, binding.var)
-            println(io, "Binding ", quot, "`", binding, "`", quot, " exists, but has not been assigned a value.")
-        else
+        bpart = Base.lookup_binding_partition(Base.tls_world_age(), convert(Core.Binding, GlobalRef(binding.mod, binding.var)))
+        if Base.binding_kind(bpart) === Base.PARTITION_KIND_GUARD
             println(io, "Binding ", quot, "`", binding, "`", quot, " does not exist.")
+        else
+            println(io, "Binding ", quot, "`", binding, "`", quot, " exists, but has not been assigned a value.")
         end
     end
     md = Markdown.parse(seekstart(io))
@@ -331,9 +395,9 @@ function find_readme(m::Module)::Union{String, Nothing}
     path = dirname(mpath)
     top_path = pkgdir(m)
     while true
-        for file in readdir(path; join=true, sort=true)
-            isfile(file) && (basename(lowercase(file)) in ["readme.md", "readme"]) || continue
-            return file
+        for entry in _readdirx(path; sort=true)
+            isfile(entry) && (lowercase(entry.name) in ["readme.md", "readme"]) || continue
+            return entry.path
         end
         path == top_path && break # go no further than pkgdir
         path = dirname(path) # work up through nested modules
@@ -342,16 +406,17 @@ function find_readme(m::Module)::Union{String, Nothing}
 end
 function summarize(io::IO, m::Module, binding::Binding; nlines::Int = 200)
     readme_path = find_readme(m)
+    public = Base.ispublic(binding.mod, binding.var) ? "public" : "internal"
     if isnothing(readme_path)
-        println(io, "No docstring or readme file found for module `$m`.\n")
+        println(io, "No docstring or readme file found for $public module `$m`.\n")
     else
-        println(io, "No docstring found for module `$m`.")
+        println(io, "No docstring found for $public module `$m`.")
     end
     exports = filter!(!=(nameof(m)), names(m))
     if isempty(exports)
-        println(io, "Module does not export any names.")
+        println(io, "Module does not have any public names.")
     else
-        println(io, "# Exported names")
+        println(io, "# Public names")
         print(io, "  `")
         join(io, exports, "`, `")
         println(io, "`\n")
@@ -359,7 +424,9 @@ function summarize(io::IO, m::Module, binding::Binding; nlines::Int = 200)
     if !isnothing(readme_path)
         readme_lines = readlines(readme_path)
         isempty(readme_lines) && return  # don't say we are going to print empty file
-        println(io, "# Displaying contents of readme found at `$(readme_path)`")
+        println(io)
+        println(io, "---")
+        println(io, "_Package description from `$(basename(readme_path))`:_")
         for line in first(readme_lines, nlines)
             println(io, line)
         end
@@ -375,8 +442,31 @@ end
 
 # repl search and completions for help
 
+# This type is returned from `accessible` and denotes a binding that is accessible within
+# some context. It differs from `Base.Docs.Binding`, which is also used by the REPL, in
+# that it doesn't track the defining module for a symbol unless the symbol is public but
+# not exported, i.e. it's accessible but requires qualification. Using this type rather
+# than `Base.Docs.Binding` simplifies things considerably, partially because REPL searching
+# is based on `String`s, which this type stores, but `Base.Docs.Binding` stores a module
+# and symbol and does not have any notion of the context from which the binding is accessed.
+struct AccessibleBinding
+    source::Union{String,Nothing}
+    name::String
+end
+
+function AccessibleBinding(mod::Module, name::Symbol)
+    m = isexported(mod, name) ? nothing : String(nameof(mod))
+    return AccessibleBinding(m, String(name))
+end
+AccessibleBinding(name::Symbol) = AccessibleBinding(nothing, String(name))
+
+function Base.show(io::IO, b::AccessibleBinding)
+    b.source === nothing || print(io, b.source, '.')
+    print(io, b.name)
+end
 
 quote_spaces(x) = any(isspace, x) ? "'" * x * "'" : x
+quote_spaces(x::AccessibleBinding) = AccessibleBinding(x.source, quote_spaces(x.name))
 
 function repl_search(io::IO, s::Union{Symbol,String}, mod::Module)
     pre = "search:"
@@ -393,7 +483,12 @@ function repl_corrections(io::IO, s, mod::Module)
     quot = any(isspace, s) ? "'" : ""
     print(io, quot)
     printstyled(io, s, color=:cyan)
-    print(io, quot, '\n')
+    print(io, quot)
+    if Base.identify_package(s) === nothing
+        print(io, '\n')
+    else
+        print(io, ", but a loadable package with that name exists. If you are looking for the package docs load the package first.\n")
+    end
     print_correction(io, s, mod)
 end
 repl_corrections(s) = repl_corrections(stdout, s)
@@ -401,7 +496,7 @@ repl_corrections(s) = repl_corrections(stdout, s)
 # inverse of latex_symbols Dict, lazily created as needed
 const symbols_latex = Dict{String,String}()
 function symbol_latex(s::String)
-    if isempty(symbols_latex) && isassigned(Base.REPL_MODULE_REF)
+    if isempty(symbols_latex)
         for (k,v) in Iterators.flatten((REPLCompletions.latex_symbols,
                                         REPLCompletions.emoji_symbols))
             symbols_latex[v] = k
@@ -472,30 +567,28 @@ end
 repl_latex(s::String) = repl_latex(stdout, s)
 
 macro repl(ex, brief::Bool=false, mod::Module=Main) repl(ex; brief, mod) end
-macro repl(io, ex, brief, mod) repl(io, ex; brief, mod) end
+macro repl(io, ex, brief, mod, internal_accesses) repl(io, ex; brief, mod, internal_accesses) end
 
-function repl(io::IO, s::Symbol; brief::Bool=true, mod::Module=Main)
+function repl(io::IO, s::Symbol; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing)
     str = string(s)
     quote
         repl_latex($io, $str)
         repl_search($io, $str, $mod)
-        $(if !isdefined(mod, s) && !Base.isbindingresolved(mod, s) && !haskey(keywords, s) && !Base.isoperator(s)
-               # n.b. we call isdefined for the side-effect of resolving the binding, if possible
+        $(if !isdefined(mod, s) && !haskey(keywords, s) && !Base.isoperator(s)
                :(repl_corrections($io, $str, $mod))
           end)
-        $(_repl(s, brief))
+        $(_repl(s, brief, mod, internal_accesses))
     end
 end
 isregex(x) = isexpr(x, :macrocall, 3) && x.args[1] === Symbol("@r_str") && !isempty(x.args[3])
 
-repl(io::IO, ex::Expr; brief::Bool=true, mod::Module=Main) = isregex(ex) ? :(apropos($io, $ex)) : _repl(ex, brief)
-repl(io::IO, str::AbstractString; brief::Bool=true, mod::Module=Main) = :(apropos($io, $str))
-repl(io::IO, other; brief::Bool=true, mod::Module=Main) = esc(:(@doc $other))
-#repl(io::IO, other) = lookup_doc(other) # TODO
+repl(io::IO, ex::Expr; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing) = isregex(ex) ? :(apropos($io, $ex)) : _repl(ex, brief, mod, internal_accesses)
+repl(io::IO, str::AbstractString; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing) = :(apropos($io, $str))
+repl(io::IO, other; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing) = esc(:(@doc $other)) # TODO: track internal_accesses
 
 repl(x; brief::Bool=true, mod::Module=Main) = repl(stdout, x; brief, mod)
 
-function _repl(x, brief::Bool=true)
+function _repl(x, brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing)
     if isexpr(x, :call)
         x = x::Expr
         # determine the types of the values
@@ -552,7 +645,7 @@ function _repl(x, brief::Bool=true)
     docs = esc(:(@doc $x))
     docs = if isfield(x)
         quote
-            if isa($(esc(x.args[1])), DataType)
+            if $(esc(x.args[1])) isa Type
                 fielddoc($(esc(x.args[1])), $(esc(x.args[2])))
             else
                 $docs
@@ -561,6 +654,7 @@ function _repl(x, brief::Bool=true)
     else
         docs
     end
+    docs = log_nonpublic_access(macroexpand(mod, docs), mod, internal_accesses)
     :(REPL.trimdocs($docs, $brief))
 end
 
@@ -573,23 +667,29 @@ function fielddoc(binding::Binding, field::Symbol)
     for mod in modules
         dict = meta(mod; autoinit=false)
         isnothing(dict) && continue
-        if haskey(dict, binding)
-            multidoc = dict[binding]
-            if haskey(multidoc.docs, Union{})
-                fields = multidoc.docs[Union{}].data[:fields]
-                if haskey(fields, field)
-                    doc = fields[field]
-                    return isa(doc, Markdown.MD) ? doc : Markdown.parse(doc)
+        multidoc = get(dict, binding, nothing)
+        if multidoc !== nothing
+            structdoc = get(multidoc.docs, Union{}, nothing)
+            if structdoc !== nothing
+                fieldsdoc = get(structdoc.data, :fields, nothing)
+                if fieldsdoc !== nothing
+                    fielddoc = get(fieldsdoc, field, nothing)
+                    if fielddoc !== nothing
+                        return isa(fielddoc, Markdown.MD) ?
+                            fielddoc : Markdown.parse(fielddoc)
+                    end
                 end
             end
         end
     end
-    fields = join(["`$f`" for f in fieldnames(resolve(binding))], ", ", ", and ")
-    fields = isempty(fields) ? "no fields" : "fields $fields"
+    fs = fieldnames(resolve(binding))
+    fields = isempty(fs) ? "no fields" : (length(fs) == 1 ? "field " : "fields ") *
+                                          join(("`$f`" for f in fs), ", ", ", and ")
     Markdown.parse("`$(resolve(binding))` has $fields.")
 end
 
 # As with the additional `doc` methods, this converts an object to a `Binding` first.
+fielddoc(obj::UnionAll, field::Symbol) = fielddoc(Base.unwrap_unionall(obj), field)
 fielddoc(object, field::Symbol) = fielddoc(aliasof(object, typeof(object)), field)
 
 
@@ -618,28 +718,80 @@ function matchinds(needle, haystack; acronym::Bool = false)
     return is
 end
 
+matchinds(needle, (; name)::AccessibleBinding; acronym::Bool=false) =
+    matchinds(needle, name; acronym)
+
 longer(x, y) = length(x) ≥ length(y) ? (x, true) : (y, false)
 
 bestmatch(needle, haystack) =
     longer(matchinds(needle, haystack, acronym = true),
            matchinds(needle, haystack))
 
-avgdistance(xs) =
-    isempty(xs) ? 0 :
-    (xs[end] - xs[1] - length(xs)+1)/length(xs)
+# Optimal string distance: Counts the minimum number of insertions, deletions,
+# transpositions or substitutions to go from one string to the other.
+function string_distance(a::AbstractString, lena::Integer, b::AbstractString, lenb::Integer)
+    if lena > lenb
+        a, b = b, a
+        lena, lenb = lenb, lena
+    end
+    start = 0
+    for (i, j) in zip(a, b)
+        if a == b
+            start += 1
+        else
+            break
+        end
+    end
+    start == lena && return lenb - start
+    vzero = collect(1:(lenb - start))
+    vone = similar(vzero)
+    prev_a, prev_b = first(a), first(b)
+    current = 0
+    for (i, ai) in enumerate(a)
+        i > start || (prev_a = ai; continue)
+        left = i - start - 1
+        current = i - start
+        transition_next = 0
+        for (j, bj) in enumerate(b)
+            j > start || (prev_b = bj; continue)
+            # No need to look beyond window of lower right diagonal
+            above = current
+            this_transition = transition_next
+            transition_next = vone[j - start]
+            vone[j - start] = current = left
+            left = vzero[j - start]
+            if ai != bj
+                # Minimum between substitution, deletion and insertion
+                current = min(current + 1, above + 1, left + 1)
+                if i > start + 1 && j > start + 1 && ai == prev_b && prev_a == bj
+                    current = min(current, (this_transition += 1))
+                end
+            end
+            vzero[j - start] = current
+            prev_b = bj
+        end
+        prev_a = ai
+    end
+    current
+end
 
-function fuzzyscore(needle, haystack)
-    score = 0.
-    is, acro = bestmatch(needle, haystack)
-    score += (acro ? 2 : 1)*length(is) # Matched characters
-    score -= 2(length(needle)-length(is)) # Missing characters
-    !acro && (score -= avgdistance(is)/10) # Contiguous
-    !isempty(is) && (score -= sum(is)/length(is)/100) # Closer to beginning
-    return score
+function fuzzyscore(needle::AbstractString, haystack::AbstractString)
+    lena, lenb = length(needle), length(haystack)
+    1 - (string_distance(needle, lena, haystack, lenb) / max(lena, lenb))
 end
 
-function fuzzysort(search::String, candidates::Vector{String})
-    scores = map(cand -> (fuzzyscore(search, cand), -Float64(levenshtein(search, cand))), candidates)
+function fuzzyscore(needle::AbstractString, haystack::AccessibleBinding)
+    score = fuzzyscore(needle, haystack.name)
+    haystack.source === nothing && return score
+    # Apply a "penalty" of half an edit if the comparator binding is public but not
+    # exported so that exported/local names that exactly match the search query are
+    # listed first
+    penalty = 1 / (2 * max(length(needle), length(haystack.name)))
+    return max(score - penalty, 0)
+end
+
+function fuzzysort(search::String, candidates::Vector{AccessibleBinding})
+    scores = map(cand -> fuzzyscore(search, cand), candidates)
     candidates[sortperm(scores)] |> reverse
 end
 
@@ -663,12 +815,14 @@ function levenshtein(s1, s2)
     return d[m+1, n+1]
 end
 
-function levsort(search::String, candidates::Vector{String})
-    scores = map(cand -> (Float64(levenshtein(search, cand)), -fuzzyscore(search, cand)), candidates)
+function levsort(search::String, candidates::Vector{AccessibleBinding})
+    scores = map(candidates) do cand
+        (Float64(levenshtein(search, cand.name)), -fuzzyscore(search, cand))
+    end
     candidates = candidates[sortperm(scores)]
     i = 0
     for outer i = 1:length(candidates)
-        levenshtein(search, candidates[i]) > 3 && break
+        levenshtein(search, candidates[i].name) > 3 && break
     end
     return candidates[1:i]
 end
@@ -686,24 +840,39 @@ function printmatch(io::IO, word, match)
     end
 end
 
+function printmatch(io::IO, word, match::AccessibleBinding)
+    match.source === nothing || print(io, match.source, '.')
+    printmatch(io, word, match.name)
+end
+
+function matchlength(x::AccessibleBinding)
+    n = length(x.name)
+    if x.source !== nothing
+        n += length(x.source) + 1  # the +1 is for the `.` separator
+    end
+    return n
+end
+matchlength(x) = length(x)
+
 function printmatches(io::IO, word, matches; cols::Int = _displaysize(io)[2])
     total = 0
     for match in matches
-        total + length(match) + 1 > cols && break
-        fuzzyscore(word, match) < 0 && break
+        ml = matchlength(match)
+        total + ml + 1 > cols && break
+        fuzzyscore(word, match) < 0.5 && break
         print(io, " ")
         printmatch(io, word, match)
-        total += length(match) + 1
+        total += ml + 1
     end
 end
 
 printmatches(args...; cols::Int = _displaysize(stdout)[2]) = printmatches(stdout, args..., cols = cols)
 
-function print_joined_cols(io::IO, ss::Vector{String}, delim = "", last = delim; cols::Int = _displaysize(io)[2])
+function print_joined_cols(io::IO, ss::Vector{AccessibleBinding}, delim = "", last = delim; cols::Int = _displaysize(io)[2])
     i = 0
     total = 0
     for outer i = 1:length(ss)
-        total += length(ss[i])
+        total += matchlength(ss[i])
         total + max(i-2,0)*length(delim) + (i>1 ? 1 : 0)*length(last) > cols && (i-=1; break)
     end
     join(io, ss[1:i], delim, last)
@@ -725,27 +894,31 @@ print_correction(word, mod::Module) = print_correction(stdout, word, mod)
 
 # Completion data
 
-
 moduleusings(mod) = ccall(:jl_module_usings, Any, (Any,), mod)
 
-filtervalid(names) = filter(x->!occursin(r"#", x), map(string, names))
-
-accessible(mod::Module) =
-    Symbol[filter!(s -> !Base.isdeprecated(mod, s), names(mod, all=true, imported=true));
-           map(names, moduleusings(mod))...;
-           collect(keys(Base.Docs.keywords))] |> unique |> filtervalid
+function accessible(mod::Module)
+    bindings = Set(AccessibleBinding(s) for s in names(mod; all=true, imported=true)
+                   if !isdeprecated(mod, s))
+    for used in moduleusings(mod)
+        union!(bindings, (AccessibleBinding(used, s) for s in names(used)
+                          if !isdeprecated(used, s)))
+    end
+    union!(bindings, (AccessibleBinding(k) for k in keys(Base.Docs.keywords)))
+    filter!(b -> !occursin('#', b.name), bindings)
+    return collect(bindings)
+end
 
 function doc_completions(name, mod::Module=Main)
     res = fuzzysort(name, accessible(mod))
 
     # to insert an entry like `raw""` for `"@raw_str"` in `res`
-    ms = match.(r"^@(.*?)_str$", res)
+    ms = map(c -> match(r"^@(.*?)_str$", c.name), res)
     idxs = findall(!isnothing, ms)
 
     # avoid messing up the order while inserting
     for i in reverse!(idxs)
         c = only((ms[i]::AbstractMatch).captures)
-        insert!(res, i, "$(c)\"\"")
+        insert!(res, i, AccessibleBinding(res[i].source, "$(c)\"\""))
     end
     res
 end
@@ -822,18 +995,6 @@ stripmd(x::Markdown.Footnote) = "$(stripmd(x.id)) $(stripmd(x.text))"
 stripmd(x::Markdown.Table) =
     join([join(map(stripmd, r), " ") for r in x.rows], " ")
 
-"""
-    apropos([io::IO=stdout], pattern::Union{AbstractString,Regex})
-
-Search available docstrings for entries containing `pattern`.
-
-When `pattern` is a string, case is ignored. Results are printed to `io`.
-
-`apropos` can be called from the help mode in the REPL by wrapping the query in double quotes:
-```
-help?> "pattern"
-```
-"""
 apropos(string) = apropos(stdout, string)
 apropos(io::IO, string) = apropos(io, Regex("\\Q$string", "i"))
 
diff --git a/stdlib/REPL/src/emoji_symbols.jl b/stdlib/REPL/src/emoji_symbols.jl
index 49a55c97f6564..d6d4a03321d0a 100644
--- a/stdlib/REPL/src/emoji_symbols.jl
+++ b/stdlib/REPL/src/emoji_symbols.jl
@@ -27,6 +27,7 @@ result = mapfoldr(emoji_data, merge, [
     # overwrite the old with names that changed but still keep old ones that were removed
     "https://raw.githubusercontent.com/iamcal/emoji-data/0f0cf4ea8845eb52d26df2a48c3c31c3b8cad14e/emoji_pretty.json",
     "https://raw.githubusercontent.com/iamcal/emoji-data/e512953312c012f6bd00e3f2ef6bf152ca3710f8/emoji_pretty.json",
+    "https://raw.githubusercontent.com/iamcal/emoji-data/a8174c74675355c8c6a9564516b2e961fe7257ef/emoji_pretty.json",
     ];
     init=Dict()
 )
@@ -132,6 +133,7 @@ const emoji_symbols = Dict(
     "\\:bath:" => "🛀",
     "\\:bathtub:" => "🛁",
     "\\:battery:" => "🔋",
+    "\\:beans:" => "🫘",
     "\\:bear:" => "🐻",
     "\\:bearded_person:" => "🧔",
     "\\:beaver:" => "🦫",
@@ -151,6 +153,7 @@ const emoji_symbols = Dict(
     "\\:bird:" => "🐦",
     "\\:birthday:" => "🎂",
     "\\:bison:" => "🦬",
+    "\\:biting_lip:" => "🫦",
     "\\:black_circle:" => "⚫",
     "\\:black_heart:" => "🖤",
     "\\:black_joker:" => "🃏",
@@ -198,6 +201,7 @@ const emoji_symbols = Dict(
     "\\:broom:" => "🧹",
     "\\:brown_heart:" => "🤎",
     "\\:bubble_tea:" => "🧋",
+    "\\:bubbles:" => "🫧",
     "\\:bucket:" => "🪣",
     "\\:bug:" => "🐛",
     "\\:bulb:" => "💡",
@@ -309,6 +313,7 @@ const emoji_symbols = Dict(
     "\\:cool:" => "🆒",
     "\\:cop:" => "👮",
     "\\:copyright:" => "©",
+    "\\:coral:" => "🪸",
     "\\:corn:" => "🌽",
     "\\:couple:" => "👫",
     "\\:couple_with_heart:" => "💑",
@@ -325,6 +330,7 @@ const emoji_symbols = Dict(
     "\\:crossed_fingers:" => "🤞",
     "\\:crossed_flags:" => "🎌",
     "\\:crown:" => "👑",
+    "\\:crutch:" => "🩼",
     "\\:cry:" => "😢",
     "\\:crying_cat_face:" => "😿",
     "\\:crystal_ball:" => "🔮",
@@ -367,7 +373,9 @@ const emoji_symbols = Dict(
     "\\:dollar:" => "💵",
     "\\:dolls:" => "🎎",
     "\\:dolphin:" => "🐬",
+    "\\:donkey:" => "🫏",
     "\\:door:" => "🚪",
+    "\\:dotted_line_face:" => "🫥",
     "\\:doughnut:" => "🍩",
     "\\:dragon:" => "🐉",
     "\\:dragon_face:" => "🐲",
@@ -397,6 +405,7 @@ const emoji_symbols = Dict(
     "\\:elevator:" => "🛗",
     "\\:elf:" => "🧝",
     "\\:email:" => "✉",
+    "\\:empty_nest:" => "🪹",
     "\\:end:" => "🔚",
     "\\:envelope_with_arrow:" => "📩",
     "\\:euro:" => "💶",
@@ -408,12 +417,16 @@ const emoji_symbols = Dict(
     "\\:expressionless:" => "😑",
     "\\:eyeglasses:" => "👓",
     "\\:eyes:" => "👀",
+    "\\:face_holding_back_tears:" => "🥹",
     "\\:face_palm:" => "🤦",
     "\\:face_vomiting:" => "🤮",
     "\\:face_with_cowboy_hat:" => "🤠",
+    "\\:face_with_diagonal_mouth:" => "🫤",
     "\\:face_with_hand_over_mouth:" => "🤭",
     "\\:face_with_head_bandage:" => "🤕",
     "\\:face_with_monocle:" => "🧐",
+    "\\:face_with_open_eyes_and_hand_over_mouth:" => "🫢",
+    "\\:face_with_peeking_eye:" => "🫣",
     "\\:face_with_raised_eyebrow:" => "🤨",
     "\\:face_with_rolling_eyes:" => "🙄",
     "\\:face_with_symbols_on_mouth:" => "🤬",
@@ -452,10 +465,12 @@ const emoji_symbols = Dict(
     "\\:floppy_disk:" => "💾",
     "\\:flower_playing_cards:" => "🎴",
     "\\:flushed:" => "😳",
+    "\\:flute:" => "🪈",
     "\\:fly:" => "🪰",
     "\\:flying_disc:" => "🥏",
     "\\:flying_saucer:" => "🛸",
     "\\:foggy:" => "🌁",
+    "\\:folding_hand_fan:" => "🪭",
     "\\:fondue:" => "🫕",
     "\\:foot:" => "🦶",
     "\\:football:" => "🏈",
@@ -482,6 +497,7 @@ const emoji_symbols = Dict(
     "\\:ghost:" => "👻",
     "\\:gift:" => "🎁",
     "\\:gift_heart:" => "💝",
+    "\\:ginger_root:" => "🫚",
     "\\:giraffe_face:" => "🦒",
     "\\:girl:" => "👧",
     "\\:glass_of_milk:" => "🥛",
@@ -491,6 +507,7 @@ const emoji_symbols = Dict(
     "\\:goat:" => "🐐",
     "\\:goggles:" => "🥽",
     "\\:golf:" => "⛳",
+    "\\:goose:" => "🪿",
     "\\:gorilla:" => "🦍",
     "\\:grapes:" => "🍇",
     "\\:green_apple:" => "🍏",
@@ -498,6 +515,7 @@ const emoji_symbols = Dict(
     "\\:green_heart:" => "💚",
     "\\:green_salad:" => "🥗",
     "\\:grey_exclamation:" => "❕",
+    "\\:grey_heart:" => "🩶",
     "\\:grey_question:" => "❔",
     "\\:grimacing:" => "😬",
     "\\:grin:" => "😁",
@@ -506,11 +524,14 @@ const emoji_symbols = Dict(
     "\\:guide_dog:" => "🦮",
     "\\:guitar:" => "🎸",
     "\\:gun:" => "🔫",
+    "\\:hair_pick:" => "🪮",
     "\\:haircut:" => "💇",
     "\\:hamburger:" => "🍔",
     "\\:hammer:" => "🔨",
+    "\\:hamsa:" => "🪬",
     "\\:hamster:" => "🐹",
     "\\:hand:" => "✋",
+    "\\:hand_with_index_finger_and_thumb_crossed:" => "🫰",
     "\\:handbag:" => "👜",
     "\\:handball:" => "🤾",
     "\\:handshake:" => "🤝",
@@ -524,12 +545,14 @@ const emoji_symbols = Dict(
     "\\:heart_decoration:" => "💟",
     "\\:heart_eyes:" => "😍",
     "\\:heart_eyes_cat:" => "😻",
+    "\\:heart_hands:" => "🫶",
     "\\:heartbeat:" => "💓",
     "\\:heartpulse:" => "💗",
     "\\:hearts:" => "♥",
     "\\:heavy_check_mark:" => "✔",
     "\\:heavy_division_sign:" => "➗",
     "\\:heavy_dollar_sign:" => "💲",
+    "\\:heavy_equals_sign:" => "🟰",
     "\\:heavy_minus_sign:" => "➖",
     "\\:heavy_multiplication_x:" => "✖",
     "\\:heavy_plus_sign:" => "➕",
@@ -559,16 +582,19 @@ const emoji_symbols = Dict(
     "\\:hugging_face:" => "🤗",
     "\\:hushed:" => "😯",
     "\\:hut:" => "🛖",
+    "\\:hyacinth:" => "🪻",
     "\\:i_love_you_hand_sign:" => "🤟",
     "\\:ice_cream:" => "🍨",
     "\\:ice_cube:" => "🧊",
     "\\:ice_hockey_stick_and_puck:" => "🏒",
     "\\:icecream:" => "🍦",
     "\\:id:" => "🆔",
+    "\\:identification_card:" => "🪪",
     "\\:ideograph_advantage:" => "🉐",
     "\\:imp:" => "👿",
     "\\:inbox_tray:" => "📥",
     "\\:incoming_envelope:" => "📨",
+    "\\:index_pointing_at_the_viewer:" => "🫵",
     "\\:information_desk_person:" => "💁",
     "\\:information_source:" => "ℹ",
     "\\:innocent:" => "😇",
@@ -580,7 +606,9 @@ const emoji_symbols = Dict(
     "\\:japanese_castle:" => "🏯",
     "\\:japanese_goblin:" => "👺",
     "\\:japanese_ogre:" => "👹",
+    "\\:jar:" => "🫙",
     "\\:jeans:" => "👖",
+    "\\:jellyfish:" => "🪼",
     "\\:jigsaw:" => "🧩",
     "\\:joy:" => "😂",
     "\\:joy_cat:" => "😹",
@@ -589,6 +617,7 @@ const emoji_symbols = Dict(
     "\\:kangaroo:" => "🦘",
     "\\:key:" => "🔑",
     "\\:keycap_ten:" => "🔟",
+    "\\:khanda:" => "🪯",
     "\\:kimono:" => "👘",
     "\\:kiss:" => "💋",
     "\\:kissing:" => "😗",
@@ -631,11 +660,14 @@ const emoji_symbols = Dict(
     "\\:left_luggage:" => "🛅",
     "\\:left_right_arrow:" => "↔",
     "\\:leftwards_arrow_with_hook:" => "↩",
+    "\\:leftwards_hand:" => "🫲",
+    "\\:leftwards_pushing_hand:" => "🫷",
     "\\:leg:" => "🦵",
     "\\:lemon:" => "🍋",
     "\\:leo:" => "♌",
     "\\:leopard:" => "🐆",
     "\\:libra:" => "♎",
+    "\\:light_blue_heart:" => "🩵",
     "\\:light_rail:" => "🚈",
     "\\:link:" => "🔗",
     "\\:lion_face:" => "🦁",
@@ -650,10 +682,12 @@ const emoji_symbols = Dict(
     "\\:long_drum:" => "🪘",
     "\\:loop:" => "➿",
     "\\:lotion_bottle:" => "🧴",
+    "\\:lotus:" => "🪷",
     "\\:loud_sound:" => "🔊",
     "\\:loudspeaker:" => "📢",
     "\\:love_hotel:" => "🏩",
     "\\:love_letter:" => "💌",
+    "\\:low_battery:" => "🪫",
     "\\:low_brightness:" => "🔅",
     "\\:luggage:" => "🧳",
     "\\:lungs:" => "🫁",
@@ -679,6 +713,7 @@ const emoji_symbols = Dict(
     "\\:mans_shoe:" => "👞",
     "\\:manual_wheelchair:" => "🦽",
     "\\:maple_leaf:" => "🍁",
+    "\\:maracas:" => "🪇",
     "\\:martial_arts_uniform:" => "🥋",
     "\\:mask:" => "😷",
     "\\:massage:" => "💆",
@@ -688,6 +723,7 @@ const emoji_symbols = Dict(
     "\\:mechanical_leg:" => "🦿",
     "\\:mega:" => "📣",
     "\\:melon:" => "🍈",
+    "\\:melting_face:" => "🫠",
     "\\:memo:" => "📝",
     "\\:menorah_with_nine_branches:" => "🕎",
     "\\:mens:" => "🚹",
@@ -702,6 +738,7 @@ const emoji_symbols = Dict(
     "\\:minibus:" => "🚐",
     "\\:minidisc:" => "💽",
     "\\:mirror:" => "🪞",
+    "\\:mirror_ball:" => "🪩",
     "\\:mobile_phone_off:" => "📴",
     "\\:money_mouth_face:" => "🤑",
     "\\:money_with_wings:" => "💸",
@@ -711,6 +748,7 @@ const emoji_symbols = Dict(
     "\\:monorail:" => "🚝",
     "\\:moon:" => "🌔",
     "\\:moon_cake:" => "🥮",
+    "\\:moose:" => "🫎",
     "\\:mortar_board:" => "🎓",
     "\\:mosque:" => "🕌",
     "\\:mosquito:" => "🦟",
@@ -739,6 +777,7 @@ const emoji_symbols = Dict(
     "\\:necktie:" => "👔",
     "\\:negative_squared_cross_mark:" => "❎",
     "\\:nerd_face:" => "🤓",
+    "\\:nest_with_eggs:" => "🪺",
     "\\:nesting_dolls:" => "🪆",
     "\\:neutral_face:" => "😐",
     "\\:new:" => "🆕",
@@ -800,7 +839,9 @@ const emoji_symbols = Dict(
     "\\:page_facing_up:" => "📄",
     "\\:page_with_curl:" => "📃",
     "\\:pager:" => "📟",
+    "\\:palm_down_hand:" => "🫳",
     "\\:palm_tree:" => "🌴",
+    "\\:palm_up_hand:" => "🫴",
     "\\:palms_up_together:" => "🤲",
     "\\:pancakes:" => "🥞",
     "\\:panda_face:" => "🐼",
@@ -812,6 +853,7 @@ const emoji_symbols = Dict(
     "\\:partly_sunny:" => "⛅",
     "\\:partying_face:" => "🥳",
     "\\:passport_control:" => "🛂",
+    "\\:pea_pod:" => "🫛",
     "\\:peach:" => "🍑",
     "\\:peacock:" => "🦚",
     "\\:peanuts:" => "🥜",
@@ -829,6 +871,7 @@ const emoji_symbols = Dict(
     "\\:person_in_steamy_room:" => "🧖",
     "\\:person_in_tuxedo:" => "🤵",
     "\\:person_with_blond_hair:" => "👱",
+    "\\:person_with_crown:" => "🫅",
     "\\:person_with_headscarf:" => "🧕",
     "\\:person_with_pouting_face:" => "🙎",
     "\\:petri_dish:" => "🧫",
@@ -843,10 +886,12 @@ const emoji_symbols = Dict(
     "\\:pinched_fingers:" => "🤌",
     "\\:pinching_hand:" => "🤏",
     "\\:pineapple:" => "🍍",
+    "\\:pink_heart:" => "🩷",
     "\\:pisces:" => "♓",
     "\\:pizza:" => "🍕",
     "\\:placard:" => "🪧",
     "\\:place_of_worship:" => "🛐",
+    "\\:playground_slide:" => "🛝",
     "\\:pleading_face:" => "🥺",
     "\\:plunger:" => "🪠",
     "\\:point_down:" => "👇",
@@ -866,9 +911,12 @@ const emoji_symbols = Dict(
     "\\:pouch:" => "👝",
     "\\:poultry_leg:" => "🍗",
     "\\:pound:" => "💷",
+    "\\:pouring_liquid:" => "🫗",
     "\\:pouting_cat:" => "😾",
     "\\:pray:" => "🙏",
     "\\:prayer_beads:" => "📿",
+    "\\:pregnant_man:" => "🫃",
+    "\\:pregnant_person:" => "🫄",
     "\\:pregnant_woman:" => "🤰",
     "\\:pretzel:" => "🥨",
     "\\:prince:" => "🤴",
@@ -914,7 +962,10 @@ const emoji_symbols = Dict(
     "\\:rice_cracker:" => "🍘",
     "\\:rice_scene:" => "🎑",
     "\\:right-facing_fist:" => "🤜",
+    "\\:rightwards_hand:" => "🫱",
+    "\\:rightwards_pushing_hand:" => "🫸",
     "\\:ring:" => "💍",
+    "\\:ring_buoy:" => "🛟",
     "\\:ringed_planet:" => "🪐",
     "\\:robot_face:" => "🤖",
     "\\:rock:" => "🪨",
@@ -937,6 +988,7 @@ const emoji_symbols = Dict(
     "\\:sagittarius:" => "♐",
     "\\:sake:" => "🍶",
     "\\:salt:" => "🧂",
+    "\\:saluting_face:" => "🫡",
     "\\:sandal:" => "👡",
     "\\:sandwich:" => "🥪",
     "\\:santa:" => "🎅",
@@ -964,6 +1016,7 @@ const emoji_symbols = Dict(
     "\\:seedling:" => "🌱",
     "\\:selfie:" => "🤳",
     "\\:sewing_needle:" => "🪡",
+    "\\:shaking_face:" => "🫨",
     "\\:shallow_pan_of_food:" => "🥘",
     "\\:shark:" => "🦈",
     "\\:shaved_ice:" => "🍧",
@@ -1124,6 +1177,7 @@ const emoji_symbols = Dict(
     "\\:triangular_ruler:" => "📐",
     "\\:trident:" => "🔱",
     "\\:triumph:" => "😤",
+    "\\:troll:" => "🧌",
     "\\:trolleybus:" => "🚎",
     "\\:trophy:" => "🏆",
     "\\:tropical_drink:" => "🍹",
@@ -1188,6 +1242,7 @@ const emoji_symbols = Dict(
     "\\:wedding:" => "💒",
     "\\:whale2:" => "🐋",
     "\\:whale:" => "🐳",
+    "\\:wheel:" => "🛞",
     "\\:wheelchair:" => "♿",
     "\\:white_check_mark:" => "✅",
     "\\:white_circle:" => "⚪",
@@ -1202,7 +1257,9 @@ const emoji_symbols = Dict(
     "\\:wind_chime:" => "🎐",
     "\\:window:" => "🪟",
     "\\:wine_glass:" => "🍷",
+    "\\:wing:" => "🪽",
     "\\:wink:" => "😉",
+    "\\:wireless:" => "🛜",
     "\\:wolf:" => "🐺",
     "\\:woman:" => "👩",
     "\\:womans_clothes:" => "👚",
@@ -1215,6 +1272,7 @@ const emoji_symbols = Dict(
     "\\:worried:" => "😟",
     "\\:wrench:" => "🔧",
     "\\:wrestlers:" => "🤼",
+    "\\:x-ray:" => "🩻",
     "\\:x:" => "❌",
     "\\:yarn:" => "🧶",
     "\\:yawning_face:" => "🥱",
diff --git a/stdlib/REPL/src/latex_symbols.jl b/stdlib/REPL/src/latex_symbols.jl
index 9e71819f6562b..54e0e4c354de3 100644
--- a/stdlib/REPL/src/latex_symbols.jl
+++ b/stdlib/REPL/src/latex_symbols.jl
@@ -4,7 +4,7 @@
 # This is used for tab substitution in the REPL.
 
 # The initial symbol listing was generated from the W3C symbol mapping file:
-#         http://www.w3.org/Math/characters/unicode.xml
+#         https://www.w3.org/Math/characters/unicode.xml
 # by the following Julia script:
 #=
 import REPL
@@ -119,6 +119,46 @@ const latex_symbols = Dict(
     "\\euler" => "ℯ",
     "\\ohm" => "Ω",
 
+    # Music Symbols
+    # Music Symbols - Accidentals
+    "\\flatflat" => "𝄫",
+    "\\sharpsharp" => "𝄪",
+    # Music Symbols - Codas
+    "\\leftrepeatsign" => "𝄆",
+    "\\rightrepeatsign" => "𝄇",
+    "\\dalsegno" => "𝄉",
+    "\\dacapo" => "𝄊",
+    "\\segno" => "𝄋",
+    "\\coda" => "𝄌",
+    # Music Symbols - Clefs
+    "\\clefg" => "𝄞",
+    "\\clefg8va" => "𝄟",
+    "\\clefg8vb" => "𝄠",
+    "\\clefc" => "𝄡",
+    "\\cleff" => "𝄢",
+    "\\cleff8va" => "𝄣",
+    "\\cleff8vb" => "𝄤",
+     # Music Symbols - Rests
+    "\\restmulti" => "𝄺",
+    "\\restwhole" => "𝄻",
+    "\\resthalf" => "𝄼",
+    "\\restquarter" => "𝄽",
+    "\\rest8th" => "𝄾",
+    "\\rest16th" => "𝄿",
+    "\\rest32th" => "𝅀",
+    "\\rest64th" => "𝅁",
+    "\\rest128th" => "𝅂",
+    # Music Symbols - Notes
+    "\\notedoublewhole" => "𝅜",
+    "\\notewhole" => "𝅝",
+    "\\notehalf" => "𝅗𝅥",
+    "\\notequarter" => "𝅘𝅥",
+    "\\note8th" => "𝅘𝅥𝅮",
+    "\\note16th" => "𝅘𝅥𝅯",
+    "\\note32th" => "𝅘𝅥𝅰",
+    "\\note64th" => "𝅘𝅥𝅱",
+    "\\note128th" => "𝅘𝅥𝅲",
+
     # Superscripts
     "\\^0" => "⁰",
     "\\^1" => "¹",
@@ -151,6 +191,7 @@ const latex_symbols = Dict(
     "\\^n" => "ⁿ",
     "\\^o" => "ᵒ",
     "\\^p" => "ᵖ",
+    "\\^q" => "𐞥",
     "\\^r" => "ʳ",
     "\\^s" => "ˢ",
     "\\^t" => "ᵗ",
@@ -162,8 +203,10 @@ const latex_symbols = Dict(
     "\\^z" => "ᶻ",
     "\\^A" => "ᴬ",
     "\\^B" => "ᴮ",
+    "\\^C" => "ꟲ",
     "\\^D" => "ᴰ",
     "\\^E" => "ᴱ",
+    "\\^F" => "ꟳ",
     "\\^G" => "ᴳ",
     "\\^H" => "ᴴ",
     "\\^I" => "ᴵ",
@@ -174,6 +217,7 @@ const latex_symbols = Dict(
     "\\^N" => "ᴺ",
     "\\^O" => "ᴼ",
     "\\^P" => "ᴾ",
+    "\\^Q" => "ꟴ",
     "\\^R" => "ᴿ",
     "\\^T" => "ᵀ",
     "\\^U" => "ᵁ",
@@ -207,6 +251,8 @@ const latex_symbols = Dict(
     "\\_+" => "₊",
     "\\_-" => "₋",
     "\\_=" => "₌",
+    "\\_<" => "˱",
+    "\\_>" => "˲",
     "\\_(" => "₍",
     "\\_)" => "₎",
     "\\_a" => "ₐ",
@@ -475,6 +521,7 @@ const latex_symbols = Dict(
     "\\mapsto" => "↦",
     "\\hookleftarrow" => "↩",
     "\\hookrightarrow" => "↪",
+    "\\hookunderrightarrow" => "🢲",
     "\\looparrowleft" => "↫",
     "\\looparrowright" => "↬",
     "\\leftrightsquigarrow" => "↭",
@@ -984,17 +1031,16 @@ const latex_symbols = Dict(
     "\\droang" => "̚",  # left angle above (non-spacing)
     "\\wideutilde" => "̰",  # under tilde accent (multiple characters and non-spacing)
     "\\not" => "̸",  # combining long solidus overlay
-    "\\upMu" => "Μ",  # capital mu, greek
-    "\\upNu" => "Ν",  # capital nu, greek
-    "\\upOmicron" => "Ο",  # capital omicron, greek
-    "\\upepsilon" => "ε",  # rounded small epsilon, greek
-    "\\upomicron" => "ο",  # small omicron, greek
-    "\\upvarbeta" => "ϐ",  # rounded small beta, greek
-    "\\upoldKoppa" => "Ϙ",  # greek letter archaic koppa
-    "\\upoldkoppa" => "ϙ",  # greek small letter archaic koppa
-    "\\upstigma" => "ϛ",  # greek small letter stigma
-    "\\upkoppa" => "ϟ",  # greek small letter koppa
-    "\\upsampi" => "ϡ",  # greek small letter sampi
+    "\\Mu" => "Μ",  # capital mu, greek
+    "\\Nu" => "Ν",  # capital nu, greek
+    "\\Omicron" => "Ο",  # capital omicron, greek
+    "\\omicron" => "ο",  # small omicron, greek
+    "\\varbeta" => "ϐ",  # rounded small beta, greek
+    "\\oldKoppa" => "Ϙ",  # greek letter archaic koppa
+    "\\oldkoppa" => "ϙ",  # greek small letter archaic koppa
+    "\\stigma" => "ϛ",  # greek small letter stigma
+    "\\koppa" => "ϟ",  # greek small letter koppa
+    "\\sampi" => "ϡ",  # greek small letter sampi
     "\\tieconcat" => "⁀",  # character tie, z notation sequence concatenation
     "\\leftharpoonaccent" => "⃐",  # combining left harpoon above
     "\\rightharpoonaccent" => "⃑",  # combining right harpoon above
@@ -2623,10 +2669,10 @@ const latex_symbols = Dict(
     "\\4/5" => "⅘", # vulgar fraction four fifths
     "\\1/6" => "⅙", # vulgar fraction one sixth
     "\\5/6" => "⅚", # vulgar fraction five sixths
-    "\\1/8" => "⅛", # vulgar fraction one eigth
-    "\\3/8" => "⅜", # vulgar fraction three eigths
-    "\\5/8" => "⅝", # vulgar fraction five eigths
-    "\\7/8" => "⅞", # vulgar fraction seventh eigths
+    "\\1/8" => "⅛", # vulgar fraction one eighth
+    "\\3/8" => "⅜", # vulgar fraction three eighths
+    "\\5/8" => "⅝", # vulgar fraction five eighths
+    "\\7/8" => "⅞", # vulgar fraction seventh eighths
     "\\1/" => "⅟", # fraction numerator one
     "\\0/3" => "↉", # vulgar fraction zero thirds
     "\\1/4" => "¼", # vulgar fraction one quarter
diff --git a/stdlib/REPL/src/options.jl b/stdlib/REPL/src/options.jl
index 3ce0ab6ff00dc..bd881edd23382 100644
--- a/stdlib/REPL/src/options.jl
+++ b/stdlib/REPL/src/options.jl
@@ -27,6 +27,9 @@ mutable struct Options
     auto_indent_time_threshold::Float64
     # refresh after time delay
     auto_refresh_time_delay::Float64
+    hint_tab_completes::Bool
+    auto_insert_closing_bracket::Bool # automatically insert closing brackets, quotes, etc.
+    style_input::Bool # enable syntax highlighting for input
     # default IOContext settings at the REPL
     iocontext::Dict{Symbol,Any}
 end
@@ -46,7 +49,10 @@ Options(;
         auto_indent_tmp_off = false,
         auto_indent_bracketed_paste = false,
         auto_indent_time_threshold = 0.005,
-        auto_refresh_time_delay = Sys.iswindows() ? 0.05 : 0.0,
+        auto_refresh_time_delay = 0.0, # this no longer seems beneficial
+        hint_tab_completes = true,
+        auto_insert_closing_bracket = true,
+        style_input = true,
         iocontext = Dict{Symbol,Any}()) =
             Options(hascolor, extra_keymap, tabwidth,
                     kill_ring_max, region_animation_duration,
@@ -55,6 +61,7 @@ Options(;
                     backspace_align, backspace_adjust, confirm_exit,
                     auto_indent, auto_indent_tmp_off, auto_indent_bracketed_paste,
                     auto_indent_time_threshold, auto_refresh_time_delay,
+                    hint_tab_completes, auto_insert_closing_bracket, style_input,
                     iocontext)
 
 # for use by REPLs not having an options field
diff --git a/stdlib/REPL/src/precompile.jl b/stdlib/REPL/src/precompile.jl
new file mode 100644
index 0000000000000..9d925a7c3a0f9
--- /dev/null
+++ b/stdlib/REPL/src/precompile.jl
@@ -0,0 +1,232 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module Precompile
+
+import ..REPL
+
+# Ugly hack for our cache file to not have a dependency edge on the FakePTYs file.
+Base._track_dependencies[] = false
+try
+    Base.include(@__MODULE__, joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test", "testhelpers", "FakePTYs.jl"))
+    @Core.latestworld
+    import .FakePTYs: open_fake_pty
+finally
+    Base._track_dependencies[] = true
+end
+
+function repl_workload()
+    # these are intentionally triggered
+    allowed_errors = [
+        "BoundsError: attempt to access 0-element Vector{Any} at index [1]",
+        "MethodError: no method matching f(::$Int, ::$Int)",
+        "Padding of type", # reinterpret docstring has ERROR examples
+    ]
+    function check_errors(out)
+        str = String(out)
+        if occursin("ERROR:", str) && !any(occursin(e, str) for e in allowed_errors)
+            @error "Unexpected error (Review REPL precompilation with debug_output on):\n$str" exception=(
+                Base.PrecompilableError(), Base.backtrace())
+            exit(1)
+        end
+    end
+    ## Debugging options
+    # View the code sent to the repl by setting this to `stdout`
+    debug_output = devnull # or stdout
+
+    CTRL_C = '\x03'
+    CTRL_D = '\x04'
+    CTRL_R = '\x12'
+    UP_ARROW = "\e[A"
+    DOWN_ARROW = "\e[B"
+
+    # This is notified as soon as the first prompt appears
+    repl_init_event = Base.Event()
+    repl_init_done_event = Base.Event()
+
+    atreplinit() do repl
+        # Main is closed so we can't evaluate in it, but atreplinit runs at
+        # a time that repl.mistate === nothing so REPL.activate fails. So do
+        # it async and wait for the first prompt to know its ready.
+        t = @async begin
+            wait(repl_init_event)
+            REPL.activate(REPL.Precompile; interactive_utils=false)
+            notify(repl_init_done_event)
+        end
+        Base.errormonitor(t)
+    end
+
+    repl_script = """
+    2+2
+    print("")
+    printstyled("a", "b")
+    display([1])
+    display([1 2; 3 4])
+    display("a string")
+    foo(x) = 1
+    @time @eval foo(1)
+    ; pwd
+    $CTRL_C
+    $CTRL_R$CTRL_C#
+    ? reinterpret
+    using Ra\t$CTRL_C
+    \\alpha\t$CTRL_C
+    \e[200~paste here ;)\e[201~"$CTRL_C
+    $UP_ARROW$DOWN_ARROW$CTRL_C
+    123\b\b\b$CTRL_C
+    \b\b$CTRL_C
+    f(x) = x03
+    f(1,2)
+    [][1]
+    Base.Iterators.minimum
+    cd("complete_path\t\t$CTRL_C
+    \x12?\x7f\e[A\e[B\t history\r
+    println("done")
+    """
+
+    JULIA_PROMPT = "julia> "
+    # The help text for `reinterpret` has example `julia>` prompts in it,
+    # so use the longer prompt to avoid desychronization.
+    ACTIVATED_JULIA_PROMPT = "(REPL.Precompile) julia> "
+    PKG_PROMPT = "pkg> "
+    SHELL_PROMPT = "shell> "
+    HELP_PROMPT = "help?> "
+
+    tmphistfile = tempname()
+    write(tmphistfile, """
+    # time: 2020-10-31 13:16:39 AWST
+    # mode: julia
+    \tcos
+    # time: 2020-10-31 13:16:40 AWST
+    # mode: julia
+    \tsin
+    # time: 2020-11-01 02:19:36 AWST
+    # mode: help
+    \t?
+    """)
+
+    withenv("JULIA_HISTORY" => tmphistfile,
+            "JULIA_PROJECT" => nothing, # remove from environment
+            "JULIA_LOAD_PATH" => "@stdlib",
+            "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
+            "TERM" => "",
+            "JULIA_FALLBACK_REPL" => "0" # Make sure REPL.jl is turned on
+            ) do
+        rawpts, ptm = open_fake_pty()
+        pts = open(rawpts)::Base.TTY
+        if Sys.iswindows()
+            pts.ispty = false
+        else
+            # workaround libuv bug where it leaks pts
+            Base._fd(pts) == rawpts || Base.close_stdio(rawpts)
+        end
+        # Prepare a background process to copy output from `ptm` until `pts` is closed
+        output_copy = Base.BufferStream()
+        tee = @async try
+            while !eof(ptm)
+                l = readavailable(ptm)
+                write(debug_output, l)
+                write(output_copy, l)
+            end
+            write(debug_output, "\n#### EOF ####\n")
+        catch ex
+            if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
+                rethrow() # ignore EIO on ptm after pts dies
+            end
+        finally
+            close(output_copy)
+            close(ptm)
+        end
+        Base.errormonitor(tee)
+        orig_stdin = stdin
+        orig_stdout = stdout
+        orig_stderr = stderr
+        repltask = @task try
+            Base.run_std_repl(REPL, false, :yes, true)
+        finally
+            redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull)
+            redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull)
+            close(pts)
+        end
+        Base.errormonitor(repltask)
+        try
+            Base.REPL_MODULE_REF[] = REPL
+            redirect_stdin(pts)
+            redirect_stdout(pts)
+            redirect_stderr(pts)
+            try
+                REPL.print_qualified_access_warning(Base.Iterators, Base, :minimum) # trigger the warning while stderr is suppressed
+            finally
+                redirect_stderr(isopen(orig_stderr) ? orig_stderr : devnull)
+            end
+            schedule(repltask)
+            # wait for the definitive prompt before start writing to the TTY
+            check_errors(readuntil(output_copy, JULIA_PROMPT, keep=true))
+
+            # Switch to the activated prompt
+            notify(repl_init_event)
+            wait(repl_init_done_event)
+            write(ptm, "\n")
+            # The prompt prints twice - once for the restatement of the input, once
+            # to indicate ready for the new prompt.
+            check_errors(readuntil(output_copy, ACTIVATED_JULIA_PROMPT, keep=true))
+            check_errors(readuntil(output_copy, ACTIVATED_JULIA_PROMPT, keep=true))
+
+            write(debug_output, "\n#### REPL STARTED ####\n")
+            # Input our script
+            precompile_lines = split(repl_script::String, '\n'; keepempty=false)
+            curr = 0
+            for l in precompile_lines
+                sleep(0.01) # try to let a bit of output accumulate before reading again
+                curr += 1
+                # push our input
+                write(debug_output, "\n#### inputting statement: ####\n$(repr(l))\n####\n")
+                # If the line ends with a CTRL_C, don't write an extra newline, which would
+                # cause a second empty prompt. Our code below expects one new prompt per
+                # input line and can race out of sync with the unexpected second line.
+                endswith(l, CTRL_C) ? write(ptm, l) : write(ptm, l, "\n")
+                check_errors(readuntil(output_copy, "\n"))
+                # wait for the next prompt-like to appear
+                check_errors(readuntil(output_copy, "\n"))
+                strbuf = ""
+                while !eof(output_copy)
+                    strbuf *= String(readavailable(output_copy))
+                    occursin(ACTIVATED_JULIA_PROMPT, strbuf) && break
+                    occursin(PKG_PROMPT, strbuf) && break
+                    occursin(SHELL_PROMPT, strbuf) && break
+                    occursin(HELP_PROMPT, strbuf) && break
+                    sleep(0.01) # try to let a bit of output accumulate before reading again
+                end
+                check_errors(strbuf)
+            end
+            write(debug_output, "\n#### COMPLETED - Closing REPL ####\n")
+            write(ptm, "$CTRL_D")
+            wait(repltask)
+        finally
+            redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull)
+            redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull)
+            close(pts)
+        end
+        wait(tee)
+    end
+    write(debug_output, "\n#### FINISHED ####\n")
+    nothing
+end
+
+let
+    if Base.generating_output() && Base.JLOptions().use_pkgimages != 0
+        # Bare-bones PrecompileTools.jl
+        # Do we need latestworld-if-toplevel here
+        ccall(:jl_tag_newly_inferred_enable, Cvoid, ())
+        try
+            repl_workload()
+            precompile(Tuple{typeof(Base.setindex!), Base.Dict{Any, Any}, Any, Char})
+            precompile(Tuple{typeof(Base.setindex!), Base.Dict{Any, Any}, Any, Int})
+            precompile(Tuple{typeof(Base.delete!), Base.Set{Any}, String})
+            precompile(Tuple{typeof(Base.:(==)), Char, String})
+        finally
+            ccall(:jl_tag_newly_inferred_disable, Cvoid, ())
+        end
+    end
+end
+
+end # Precompile
diff --git a/stdlib/REPL/test/TerminalMenus/runtests.jl b/stdlib/REPL/test/TerminalMenus/runtests.jl
index c594958a36670..9455632d9f418 100644
--- a/stdlib/REPL/test/TerminalMenus/runtests.jl
+++ b/stdlib/REPL/test/TerminalMenus/runtests.jl
@@ -17,9 +17,9 @@ function simulate_input(menu::TerminalMenus.AbstractMenu, keys...; kwargs...)
             write(new_stdin, "$key")
         end
     end
-    TerminalMenus.terminal.in_stream = new_stdin
+    terminal = TerminalMenus.default_terminal(; in=new_stdin, out=devnull)
 
-    return request(menu; suppress_output=true, kwargs...)
+    return request(terminal, menu; suppress_output=true, kwargs...)
 end
 
 include("radio_menu.jl")
diff --git a/stdlib/REPL/test/bad_history_startup.jl b/stdlib/REPL/test/bad_history_startup.jl
new file mode 100644
index 0000000000000..27e03cdc2da74
--- /dev/null
+++ b/stdlib/REPL/test/bad_history_startup.jl
@@ -0,0 +1,70 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Test that interactive mode starts up without error when history file is bad
+
+using Test
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")
+isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
+import .Main.FakePTYs: with_fake_pty
+
+@testset "Bad history file startup" begin
+    mktempdir() do tmpdir
+        # Create a bad history file
+        hist_file = joinpath(tmpdir, "repl_history.jl")
+        write(hist_file, "{ invalid json content\nmore bad content\n")
+
+        julia_exe = Base.julia_cmd()[1]
+
+        # Test interactive Julia startup with bad history file
+        with_fake_pty() do pts, ptm
+            # Set up environment with our bad history file
+            nENV = copy(ENV)
+            nENV["JULIA_HISTORY"] = hist_file
+
+            # Start Julia in interactive mode
+            p = run(detach(setenv(`$julia_exe --startup-file=no --color=no -q`, nENV)), pts, pts, pts, wait=false)
+            Base.close_stdio(pts)
+
+            # Read output until we get the prompt, which indicates successful startup
+            output = readuntil(ptm, "julia> ", keep=true)
+            # println("====== subprocess output ======")
+            # println(output)
+            # println("====== end subprocess output ======")
+
+            # Test conditions:
+            # 1. We should see the invalid history file error
+            has_history_error = occursin("Invalid history file", output) ||
+                              occursin("Invalid character", output)
+            @test_broken has_history_error
+
+            # 3. We should see the "Disabling history file" message if the fix works
+            has_disable_message = occursin("Disabling history file for this session", output)
+            @test_broken has_disable_message
+
+            # Send exit command to clean shutdown
+            if isopen(ptm)
+                write(ptm, "exit()\n")
+            else
+                @warn "PTY master is already closed before sending exit command"
+            end
+
+            # Read any remaining output until the process exits
+            try
+                read(ptm, String)
+            catch ex
+                # Handle platform-specific EOF behavior
+                if ex isa Base.IOError && ex.code == Base.UV_EIO
+                    # This is expected on some platforms (e.g., Linux)
+                else
+                    rethrow()
+                end
+            end
+
+            # Wait for process to finish
+            wait(p)
+
+            @test p.exitcode == 0
+        end
+    end
+end
diff --git a/stdlib/REPL/test/docview.jl b/stdlib/REPL/test/docview.jl
index 22701ead7883d..0ac6f88d12ed6 100644
--- a/stdlib/REPL/test/docview.jl
+++ b/stdlib/REPL/test/docview.jl
@@ -4,21 +4,17 @@ using Test
 import REPL, REPL.REPLCompletions
 import Markdown
 
-@testset "symbol completion" begin
-    @test startswith(let buf = IOBuffer()
-            Core.eval(Main, REPL.helpmode(buf, "α"))
-            String(take!(buf))
-        end, "\"α\" can be typed by \\alpha<tab>\n")
-
-    @test startswith(let buf = IOBuffer()
-            Core.eval(Main, REPL.helpmode(buf, "🐨"))
-            String(take!(buf))
-        end, "\"🐨\" can be typed by \\:koala:<tab>\n")
+function get_help_io(input, mod=Main)
+    buf = IOBuffer()
+    eval(REPL.helpmode(buf, input, mod))
+    String(take!(buf))
+end
+get_help_standard(input) = string(eval(REPL.helpmode(IOBuffer(), input)))
 
-    @test startswith(let buf = IOBuffer()
-            Core.eval(Main, REPL.helpmode(buf, "ᵞ₁₂₃¹²³α"))
-            String(take!(buf))
-        end, "\"ᵞ₁₂₃¹²³α\" can be typed by \\^gamma<tab>\\_123<tab>\\^123<tab>\\alpha<tab>\n")
+@testset "symbol completion" begin
+    @test startswith(get_help_io("α"), "\"α\" can be typed by \\alpha<tab>\n")
+    @test startswith(get_help_io("🐨"), "\"🐨\" can be typed by \\:koala:<tab>\n")
+    @test startswith(get_help_io("ᵞ₁₂₃¹²³α"), "\"ᵞ₁₂₃¹²³α\" can be typed by \\^gamma<tab>\\_123<tab>\\^123<tab>\\alpha<tab>\n")
 
     # Check that all symbols with several completions have a canonical mapping (#39148)
     symbols = values(REPLCompletions.latex_symbols)
@@ -27,19 +23,28 @@ import Markdown
 end
 
 @testset "quoting in doc search" begin
-    str = let buf = IOBuffer()
-        Core.eval(Main, REPL.helpmode(buf, "mutable s"))
-        String(take!(buf))
-    end
+    str = get_help_io("mutable s")
     @test occursin("'mutable struct'", str)
     @test occursin("Couldn't find 'mutable s'", str)
 end
 
-@testset "Non-Markdown" begin
-    # https://github.com/JuliaLang/julia/issues/37765
-    @test isa(REPL.insert_hlines(IOBuffer(), Markdown.Text("foo")), Markdown.Text)
-    # https://github.com/JuliaLang/julia/issues/37757
-    @test REPL.insert_hlines(IOBuffer(), nothing) === nothing
+@testset "non-loaded packages in doc search" begin
+    temp_package = mktempdir()
+    write(joinpath(temp_package, "Project.toml"),
+        """
+        name = "FooPackage"
+        uuid = "2e6e0b2d-0e7f-4b7f-9f3b-6f3f3f3f3f3f"
+        """)
+    mkpath(joinpath(temp_package, "src"))
+    write(joinpath(temp_package, "src", "FooPackage.jl"),
+        """
+        module FooPackage
+        end
+        """)
+    push!(LOAD_PATH, temp_package)
+    str = get_help_io("FooPackage")
+    @test occursin("Couldn't find FooPackage, but a loadable package with that name exists.", str)
+    @test pop!(LOAD_PATH) == temp_package
 end
 
 @testset "Check @var_str also completes to var\"\" in REPL.doc_completions()" begin
@@ -47,13 +52,29 @@ end
     symbols = "@" .* checks .* "_str"
     results = checks .* "\"\""
     for (i,r) in zip(symbols,results)
-        @test r ∈ REPL.doc_completions(i)
+        @test r ∈ string.(REPL.doc_completions(i))
     end
 end
 @testset "fuzzy score" begin
     # https://github.com/JunoLab/FuzzyCompletions.jl/issues/7
     # shouldn't throw when there is a space in a middle of query
     @test (REPL.matchinds("a ", "a file.txt"); true)
+    @test isapprox(REPL.fuzzyscore("abcdef", ""), 0.0; atol=0.001)
+    @test 0.8 < REPL.fuzzyscore(
+    "supercalifragilisticexpialidocious",
+    "bupercalifragilisticexpialidocious"
+    ) < 1.0
+
+    # Unicode
+    @test 1.0 > REPL.fuzzyscore("αkδψm", "αkδm") > 0.0
+    @test 1.0 > REPL.fuzzyscore("αkδψm", "α") > 0.0
+
+    exact_match_export = REPL.fuzzyscore("thing", REPL.AccessibleBinding(:thing))
+    exact_match_public = REPL.fuzzyscore("thing", REPL.AccessibleBinding("A", "thing"))
+    inexact_match_export = REPL.fuzzyscore("thing", REPL.AccessibleBinding(:thang))
+    inexact_match_public = REPL.fuzzyscore("thing", REPL.AccessibleBinding("A", "thang"))
+    @test exact_match_export > exact_match_public > inexact_match_export > inexact_match_public
+    @test exact_match_export ≈ 1.0
 end
 
 @testset "Unicode doc lookup (#41589)" begin
@@ -65,3 +86,115 @@ end
     b = REPL.Binding(@__MODULE__, :R)
     @test REPL.summarize(b, Tuple{}) isa Markdown.MD
 end
+
+@testset "Struct field help (#51178)" begin
+    struct StructWithNoFields end
+    struct StructWithOneField
+        field1
+    end
+    struct StructWithTwoFields
+        field1
+        field2
+    end
+    struct StructWithThreeFields
+        field1
+        field2
+        field3
+    end
+
+    @test endswith(get_help_standard("StructWithNoFields.not_a_field"), "StructWithNoFields` has no fields.\n")
+    @test endswith(get_help_standard("StructWithOneField.not_a_field"), "StructWithOneField` has field `field1`.\n")
+    @test endswith(get_help_standard("StructWithTwoFields.not_a_field"), "StructWithTwoFields` has fields `field1`, and `field2`.\n")
+    @test endswith(get_help_standard("StructWithThreeFields.not_a_field"), "StructWithThreeFields` has fields `field1`, `field2`, and `field3`.\n")
+
+    # Shouldn't error if the struct doesn't have any field documentations at all.
+    @test endswith(get_help_standard("Int.not_a_field"), "`$Int` has no fields.\n")
+end
+
+@testset "Parametric struct field help (#59524)" begin
+    "NonParametricStruct docstring"
+    struct NonParametricStruct
+        "field_x docstring"
+        field_x::Float64
+    end
+
+    "ParametricStruct docstring"
+    struct ParametricStruct{T<:Real}
+        "field_y docstring"
+        field_y::T
+    end
+
+    @test occursin("field_x docstring", get_help_standard("NonParametricStruct.field_x"))
+    @test occursin("field_y docstring", get_help_standard("ParametricStruct.field_y"))
+    @test endswith(get_help_standard("ParametricStruct.not_a_field"), "ParametricStruct` has field `field_y`.\n")
+end
+
+module InternalWarningsTests
+
+    module A
+        public B, B3
+        module B
+            public e
+            c = 4
+            "d is 5"
+            d = 5
+            "e is 6"
+            e = 6
+        end
+
+        module B2
+            module C
+                public e
+                d = 1
+                "e is 2"
+                e = 2
+            end
+        end
+
+        module B3 end
+    end
+
+    using Test, REPL
+    @testset "internal warnings" begin
+        header = "!!! warning\n    The following bindings may be internal; they may change or be removed in future versions:\n\n"
+        prefix(warnings) = header * join("      * `$(@__MODULE__).$w`\n" for w in warnings) * "\n\n"
+        docstring(input) = string(eval(REPL.helpmode(IOBuffer(), input, @__MODULE__)))
+
+        @test docstring("A") == "No docstring or readme file found for internal module `$(@__MODULE__).A`.\n\n# Public names\n\n`B`, `B3`\n"
+        @test docstring("A.B") == "No docstring or readme file found for public module `$(@__MODULE__).A.B`.\n\n# Public names\n\n`e`\n"
+        @test startswith(docstring("A.B.c"), prefix(["A.B.c"]))
+        @test startswith(docstring("A.B.d"), prefix(["A.B.d"]))
+        @test docstring("A.B.e") == "e is 6\n"
+        @test startswith(docstring("A.B2"), prefix(["A.B2"]))
+        @test startswith(docstring("A.B2.C"), prefix(["A.B2", "A.B2.C"]))
+        @test startswith(docstring("A.B2.C.d"), prefix(["A.B2", "A.B2.C", "A.B2.C.d"]))
+        @test startswith(docstring("A.B2.C.e"), prefix(["A.B2", "A.B2.C"]))
+        @test docstring("A.B3") == "No docstring or readme file found for public module `$(@__MODULE__).A.B3`.\n\nModule does not have any public names.\n"
+    end
+end
+
+# Issue #51344, don't print "internal binding" warning for non-existent bindings.
+@test string(eval(REPL.helpmode("Base.no_such_symbol"))) == "No documentation found.\n\nBinding `Base.no_such_symbol` does not exist.\n"
+
+module TestSuggestPublic
+    export dingo
+    public dango
+    dingo(x) = x + 1
+    dango(x) = x = 2
+end
+using .TestSuggestPublic
+helplines(s) = map(strip, split(get_help_io(s, @__MODULE__), '\n'; keepempty=false))
+@testset "search lists public names" begin
+    lines = helplines("dango")
+    # Ensure that public names that exactly match the search query are listed first
+    # even if they aren't exported, as long as no exact exported/local match exists
+    @test startswith(lines[1], "search: TestSuggestPublic.dango dingo")
+    @test lines[2] == "Couldn't find dango"  # 🙈🍡
+    @test startswith(lines[3], "Perhaps you meant TestSuggestPublic.dango, dingo")
+end
+dango() = "🍡"
+@testset "search prioritizes exported names" begin
+    # Prioritize exported/local names if they exactly match
+    lines = helplines("dango")
+    @test startswith(lines[1], "search: dango TestSuggestPublic.dango dingo")
+end
diff --git a/stdlib/REPL/test/history.jl b/stdlib/REPL/test/history.jl
new file mode 100644
index 0000000000000..8c130cd0c0ee6
--- /dev/null
+++ b/stdlib/REPL/test/history.jl
@@ -0,0 +1,660 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+using REPL
+using Dates
+
+using REPL.History
+using REPL.History: HistoryFile, HistEntry, update!,
+    ConditionSet, FilterSpec, filterchunkrev!, ismorestrict,
+    SelectorState, componentrows, countlines_selected, hoveridx, ishover, gethover,
+    candidates, movehover, toggleselection, fullselection, addcache!
+
+const HISTORY_SAMPLE_FORMAT_1 = """
+# time: 2020-10-31 05:16:39 AWST
+# mode: julia
+\tcos
+# time: 2020-10-31 05:16:40 AWST
+# mode: help
+\tcos
+# time: 2021-03-12 09:03:06 AWST
+# mode: julia
+\tfunction is_leap_year(year)
+\t    if year % 4 == 0 && (! year % 100 == 0 || year % 400 == 0)
+\t        return true
+\t    else
+\t        return false
+\t    end
+\tend
+# time: 2021-03-23 16:48:55 AWST
+# mode: julia
+\tL²norm(x -> x^2, ℐ)
+# time: 2021-03-23 16:49:06 AWST
+# mode: julia
+\tL²norm(x -> 9x, ℐ)
+"""
+
+const HISTORY_SAMPLE_FORMAT_2 = """
+# time: 2025-10-18 18:21:03Z
+# mode: julia
+\tIterators.partition([1,2,3,4,5,6,7], 2) |> eltype
+# time: 2025-10-19 06:27:10Z
+# mode: julia
+\tusing Chairmarks
+# time: 2025-10-19 06:27:18Z
+# mode: julia
+\t@b REPL.History.HistoryFile("/home/tec/.julia/logs/repl_history.jl") REPL.History.update!
+"""
+
+const HISTORY_SAMPLE_MALFORMED = """
+time: 2025-10-18 18:20:59Z
+mode: julia
+"""
+
+const HISTORY_SAMPLE_BAD_SPACES = """
+# time: 2025-10-18 18:20:59Z
+# mode: julia
+    "Spaces instead of tabs :("
+"""
+
+const HISTORY_SAMPLE_INCOMPLETE = """
+# time: 2025-05-10 12:34:56Z
+# mode: julia
+\tfoo()
+# time: 2025-05-10 12:40:00Z
+# mode: julia
+"""
+
+@testset "Histfile" begin
+    hpath = tempname()
+    mkpath(dirname(hpath))
+    @testset "History reading" begin
+        @testset "Create empty HistoryFile" begin
+            hist = HistoryFile(hpath)
+            @test isempty(hist)
+            @test length(hist) == 0
+            close(hist)
+            @test read(hpath, String) == ""
+        end
+        @testset "Format 1" begin
+            write(hpath, HISTORY_SAMPLE_FORMAT_1)
+            hist = HistoryFile(hpath)
+            update!(hist)
+            @test length(hist) == 5
+            @test hist[1] == HistEntry(:julia, DateTime("2020-10-31T05:16:39"), "cos", 1)
+            @test hist[2] == HistEntry(:help, DateTime("2020-10-31T05:16:40"), "cos", 2)
+            funccontent = """
+        function is_leap_year(year)
+            if year % 4 == 0 && (! year % 100 == 0 || year % 400 == 0)
+                return true
+            else
+                return false
+            end
+        end"""
+            @test hist[3] == HistEntry(:julia, DateTime("2021-03-12T09:03:06"), funccontent, 3)
+            @test hist[4] == HistEntry(:julia, DateTime("2021-03-23T16:48:55"), "L²norm(x -> x^2, ℐ)", 4)
+            @test hist[5] == HistEntry(:julia, DateTime("2021-03-23T16:49:06"), "L²norm(x -> 9x, ℐ)", 5)
+            close(hist)
+        end
+        @testset "Format 2" begin
+            write(hpath, HISTORY_SAMPLE_FORMAT_2)
+            hist = HistoryFile(hpath)
+            update!(hist)
+            @test length(hist) == 3
+            @test hist[1] == HistEntry(:julia, DateTime("2025-10-18T18:21:03"), "Iterators.partition([1,2,3,4,5,6,7], 2) |> eltype", 1)
+            @test hist[2] == HistEntry(:julia, DateTime("2025-10-19T06:27:10"), "using Chairmarks", 2)
+            @test hist[3] == HistEntry(:julia, DateTime("2025-10-19T06:27:18"), "@b REPL.History.HistoryFile(\"/home/tec/.julia/logs/repl_history.jl\") REPL.History.update!", 3)
+            close(hist)
+        end
+        @testset "Malformed" begin
+            write(hpath, HISTORY_SAMPLE_MALFORMED)
+            hist = HistoryFile(hpath)
+            @test_warn "Malformed history entry" update!(hist)
+            @test length(hist) == 0
+            close(hist)
+        end
+        @testset "Spaces instead of tabs" begin
+            write(hpath, HISTORY_SAMPLE_BAD_SPACES)
+            hist = HistoryFile(hpath)
+            @test_warn "Malformed history content" update!(hist)
+            @test length(hist) == 0
+            close(hist)
+        end
+        @testset "Incomplete entry" begin
+            write(hpath, HISTORY_SAMPLE_INCOMPLETE)
+            hist = HistoryFile(hpath)
+            @test_nowarn update!(hist)
+            @test length(hist) == 1
+            @test hist[1] == HistEntry(:julia, DateTime("2025-05-10T12:34:56"), "foo()", 1)
+            close(hist)
+        end
+    end
+
+    @testset "History round trip" begin
+        write(hpath, "")
+        hist = HistoryFile(hpath)
+        entries = [
+            HistEntry(:julia, DateTime("2024-06-01T10:00:00"), "println(\"Hello, World!\")", 0),
+            HistEntry(:shell, DateTime("2024-06-01T10:05:00"), "ls -la", 0),
+            HistEntry(:help, DateTime("2024-06-01T10:10:00"), "? println", 0),
+        ]
+        for entry in entries
+            push!(hist, entry)
+        end
+        close(hist)
+        hist = HistoryFile(hpath)
+        update!(hist)
+        @test length(hist) == length(entries)
+        for (i, entry) in enumerate(entries)
+            @test hist[i].mode == entry.mode
+            @test hist[i].date == entry.date
+            @test hist[i].content == entry.content
+            @test hist[i].index == i
+        end
+        close(hist)
+    end
+
+    @testset "Incremental updating" begin
+        write(hpath, HISTORY_SAMPLE_FORMAT_1)
+        hist_a = HistoryFile(hpath)
+        hist_b = HistoryFile(hpath)
+        update!(hist_a)
+        update!(hist_b)
+        @test length(hist_b) == 5
+        push!(hist_a, HistEntry(:julia, now(UTC), "2 + 2", 0))
+        @test length(hist_a) == 6
+        update!(hist_b)
+        @test length(hist_b) == 6
+        @test hist_b[end] == hist_a[end]
+        push!(hist_b, HistEntry(:shell, now(UTC), "echo 'Hello'", 0))
+        @test length(hist_b) == 7
+        update!(hist_a)
+        @test length(hist_a) == 7
+        @test hist_a[end] == hist_b[end]
+        close(hist_a)
+        close(hist_b)
+    end
+end
+
+@testset "Filtering" begin
+    @testset "ConditionSet" begin
+        @testset "Parsing" begin
+            @testset "Basic" begin
+                cset = ConditionSet("hello world")
+                @test cset.words == [SubString("hello world")]
+                @test isempty(cset.exacts)
+                @test isempty(cset.negatives)
+                @test isempty(cset.initialisms)
+                @test isempty(cset.fuzzy)
+                @test isempty(cset.regexps)
+                @test isempty(cset.modes)
+            end
+            @testset "Exact match" begin
+                cset = ConditionSet("=exact")
+                @test cset.exacts == [SubString("exact")]
+            end
+            @testset "Negative match" begin
+                cset = ConditionSet("!exclude")
+                @test cset.negatives == [SubString("exclude")]
+            end
+            @testset "Initialism" begin
+                cset = ConditionSet("`im")
+                @test cset.initialisms == [SubString("im")]
+            end
+            @testset "Regexp" begin
+                cset = ConditionSet("/foo.*bar")
+                @test cset.regexps == [SubString("foo.*bar")]
+            end
+            @testset "Mode" begin
+                cset = ConditionSet("shell>")
+                @test cset.modes == [SubString("shell")]
+            end
+            @testset "Fuzzy" begin
+                cset = ConditionSet("~fuzzy")
+                @test cset.fuzzy == [SubString("fuzzy")]
+            end
+            @testset "Space trimming" begin
+                cset = ConditionSet("  word with spaces  ")
+                @test cset.words == [SubString("word with spaces")]
+            end
+            @testset "Escaped prefix" begin
+                cset = ConditionSet("\\=not exact")
+                @test cset.words == [SubString("=not exact")]
+            end
+            @testset "Multiple conditions" begin
+                cset = ConditionSet("word;=exact;!neg")
+                @test cset.words == [SubString("word")]
+                @test cset.exacts == [SubString("exact")]
+                @test cset.negatives == [SubString("neg")]
+            end
+            @testset "Escaped separator" begin
+                cset = ConditionSet("hello\\;world;=exact")
+                @test cset.words == [SubString("hello;world")]
+                @test cset.exacts == [SubString("exact")]
+                cset = ConditionSet("1 \\; 2")
+                @test cset.words == [SubString("1 ; 2")]
+            end
+            @testset "Complex query" begin
+                cset = ConditionSet("some = words ;; !error ; julia> ;/^def.*;")
+                @test cset.words == [SubString("some = words")]
+                @test cset.negatives == [SubString("error")]
+                @test cset.modes == [SubString("julia")]
+                @test cset.regexps == [SubString("^def.*")]
+            end
+        end
+    end
+    @testset "FilterSpec" begin
+        @testset "Construction" begin
+            @testset "Words" begin
+                cset = ConditionSet("bag of words")
+                spec = FilterSpec(cset)
+                @test isempty(spec.exacts)
+                @test spec.regexps == [r"\Qbag\E"i, r"\Qof\E"i, r"\Qwords\E"i]
+                cset2 = ConditionSet("Bag of Words")
+                spec2 = FilterSpec(cset2)
+                @test spec2.exacts == ["Bag", "of", "Words"]
+                @test isempty(spec2.regexps)
+            end
+            @testset "Complex query" begin
+                cset = ConditionSet("=exact;!neg;/foo.*bar;julia>")
+                spec = FilterSpec(cset)
+                @test spec.exacts == ["exact"]
+                @test spec.negatives == ["neg"]
+                @test spec.regexps == [r"foo.*bar"]
+                @test spec.modes == [:julia]
+            end
+        end
+        @testset "Matching" begin
+            entries = [
+                HistEntry(:julia, now(UTC), "println(\"hello world\")", 1),
+                HistEntry(:julia, now(UTC), "log2(1234.5)", 1),
+                HistEntry(:julia, now(UTC), "test case", 1),
+                HistEntry(:help, now(UTC), "cos", 1),
+                HistEntry(:julia, now(UTC), "cos(2π)", 1),
+                HistEntry(:julia, now(UTC), "case of tests", 1),
+                HistEntry(:shell, now(UTC), "echo 'Hello World'", 4),
+                HistEntry(:julia, now(UTC), "foo_bar(2, 7)", 5),
+                HistEntry(:julia, now(UTC), "test_fun()", 5),
+            ]
+            results = HistEntry[]
+            @testset "Words" begin
+                empty!(results)
+                cset = ConditionSet("hello")
+                spec = FilterSpec(cset)
+                seen = Set{Tuple{Symbol,String}}()
+                @test filterchunkrev!(results, entries, spec, seen) == 0
+                @test results == [entries[1], entries[7]]
+                empty!(results)
+                cset2 = ConditionSet("world")
+                spec2 = FilterSpec(cset2)
+                empty!(seen)
+                @test filterchunkrev!(results, entries, spec2, seen) == 0
+                @test results == [entries[1], entries[7]]
+                empty!(results)
+                cset3 = ConditionSet("World")
+                spec3 = FilterSpec(cset3)
+                empty!(seen)
+                @test filterchunkrev!(results, entries, spec3, seen) == 0
+                @test results == [entries[7]]
+            end
+            @testset "Exact" begin
+                empty!(results)
+                cset = ConditionSet("=test")
+                spec = FilterSpec(cset)
+                seen = Set{Tuple{Symbol,String}}()
+                @test filterchunkrev!(results, entries, spec, seen; maxresults = 2) == 5
+                @test results == [entries[6], entries[9]]
+                empty!(results)
+                cset2 = ConditionSet("=test case")
+                spec2 = FilterSpec(cset2)
+                empty!(seen)
+                @test filterchunkrev!(results, entries, spec2, seen) == 0
+                @test results == [entries[3]]
+            end
+            @testset "Negative" begin
+                empty!(results)
+                cset = ConditionSet("!hello ; !test;! cos")
+                spec = FilterSpec(cset)
+                seen = Set{Tuple{Symbol,String}}()
+                @test filterchunkrev!(results, entries, spec, seen) == 0
+                @test results == [entries[2], entries[7], entries[8]]
+            end
+            @testset "Initialism" begin
+                empty!(results)
+                cset = ConditionSet("`tc")
+                spec = FilterSpec(cset)
+                seen = Set{Tuple{Symbol,String}}()
+                @test filterchunkrev!(results, entries, spec, seen) == 0
+                @test results == [entries[3]]
+                empty!(results)
+                cset2 = ConditionSet("`fb")
+                spec2 = FilterSpec(cset2)
+                empty!(seen)
+                @test filterchunkrev!(results, entries, spec2, seen) == 0
+                @test results == [entries[8]]
+            end
+            @testset "Regexp" begin
+                empty!(results)
+                cset = ConditionSet("/^c.s\\b")
+                spec = FilterSpec(cset)
+                seen = Set{Tuple{Symbol,String}}()
+                @test filterchunkrev!(results, entries, spec, seen) == 0
+                @test results == [entries[4], entries[5]]
+            end
+            @testset "Mode" begin
+                empty!(results)
+                cset = ConditionSet("shell>")
+                spec = FilterSpec(cset)
+                seen = Set{Tuple{Symbol,String}}()
+                @test filterchunkrev!(results, entries, spec, seen) == 0
+                @test results == [entries[7]]
+            end
+            @testset "Fuzzy" begin
+                empty!(results)
+                cset = ConditionSet("~cs")
+                spec = FilterSpec(cset)
+                seen = Set{Tuple{Symbol,String}}()
+                @test filterchunkrev!(results, entries, spec, seen) == 0
+                @test results == entries[3:6]
+            end
+            @testset "Uniqueness" begin
+                empty!(results)
+                # Create entries with duplicate content in the same mode
+                dup_entries = [
+                    HistEntry(:julia, now(UTC), "println(\"hello\")", 1),
+                    HistEntry(:julia, now(UTC), "cos(2π)", 2),
+                    HistEntry(:julia, now(UTC), "println(\"hello\")", 3),  # duplicate
+                    HistEntry(:julia, now(UTC), "sin(π)", 4),
+                    HistEntry(:julia, now(UTC), "cos(2π)", 5),  # duplicate
+                    HistEntry(:julia, now(UTC), "println(\"hello\")", 6),  # duplicate
+                    HistEntry(:julia, now(UTC), "tan(π/4)", 7),
+                ]
+                # When filtering with seen Set, duplicates are removed
+                cset = ConditionSet("cos")
+                spec = FilterSpec(cset)
+                seen = Set{Tuple{Symbol,String}}()
+                @test filterchunkrev!(results, dup_entries, spec, seen) == 0
+                # Should only get unique entries matching the filter
+                # Since we iterate in reverse (7->1), we keep the most recent occurrence of each unique content
+                @test length(results) == 1
+                @test results[1] == dup_entries[5]  # cos(2π) - most recent
+                # When browsing without filtering, duplicates are kept
+                empty!(results)
+                append!(results, dup_entries)
+                @test length(results) == 7  # All entries, including duplicates
+                @test results == dup_entries
+                # Test that same content in different modes is NOT deduplicated
+                empty!(results)
+                mode_entries = [
+                    HistEntry(:julia, now(UTC), "ls", 1),
+                    HistEntry(:shell, now(UTC), "ls", 2),
+                    HistEntry(:julia, now(UTC), "ls", 3),  # duplicate in :julia mode
+                    HistEntry(:shell, now(UTC), "pwd", 4),
+                ]
+                empty!(seen)
+                cset3 = ConditionSet("ls")
+                spec3 = FilterSpec(cset3)
+                @test filterchunkrev!(results, mode_entries, spec3, seen) == 0
+                @test length(results) == 2  # "ls" from :julia and "ls" from :shell
+                @test results[1] == mode_entries[2]  # :shell ls
+                @test results[2] == mode_entries[3]  # :julia ls (most recent)
+            end
+        end
+        @testset "Strictness comparison" begin
+            c1 = ConditionSet("hello world")
+            c2 = ConditionSet("hello world more")
+            c3 = ConditionSet("hello world more;!exclude")
+            @test ismorestrict(c2, c1)
+            @test !ismorestrict(c1, c2)
+            @test ismorestrict(c3, c2)
+            @test !ismorestrict(c2, c3)
+            @test ismorestrict(c3, c1)
+            @test !ismorestrict(c1, c3)
+        end
+    end
+end
+
+@testset "Display calculations" begin
+    entries = [HistEntry(:julia, now(UTC), "test_$i", i) for i in 1:20]
+    @testset "componentrows" begin
+        @testset "Standard terminal" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries)
+            @test componentrows(state) == (candidates = 13, preview = 6)
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = [1, 3], gathered = HistEntry[]), 1)
+            @test componentrows(state) == (candidates = 13, preview = 6)
+            gathered = [HistEntry(:julia, now(UTC), "old", i) for i in 21:22]
+            state = SelectorState((30, 80), "", FilterSpec(), entries, gathered)
+            @test componentrows(state) == (candidates = 13, preview = 6)
+        end
+        @testset "Terminal size variations" begin
+            @test componentrows(SelectorState((10, 80), "", FilterSpec(), entries)) == (candidates = 6, preview = 0)
+            @test componentrows(SelectorState((5, 40), "", FilterSpec(), entries)) == (candidates = 2, preview = 0)
+            @test componentrows(SelectorState((1, 80), "", FilterSpec(), entries)) == (candidates = 0, preview = 0)
+            @test componentrows(SelectorState((100, 200), "", FilterSpec(), entries)) == (candidates = 44, preview = 22)
+        end
+        @testset "Preview clamping" begin
+            multiline = join(["line$i" for i in 1:20], '\n')
+            state = SelectorState((30, 80), "", FilterSpec(), [HistEntry(:julia, now(UTC), multiline, 1)], 0, (active = [1], gathered = HistEntry[]), 1)
+            @test componentrows(state) == (candidates = 7, preview = 12)
+        end
+    end
+    @testset "countlines_selected" begin
+        @testset "Basic counting" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries)
+            @test countlines_selected(state) == 0
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = [1], gathered = HistEntry[]), 1)
+            @test countlines_selected(state) == 1
+        end
+        @testset "Multi-line entries" begin
+            code = "begin\n    x = 10\n    y = 20\n    x + y\nend"
+            state = SelectorState((30, 80), "", FilterSpec(), [HistEntry(:julia, now(UTC), code, 1)], 0, (active = [1], gathered = HistEntry[]), 1)
+            @test countlines_selected(state) == 5
+            huge = join(["line" for _ in 1:1000], '\n')
+            state = SelectorState((30, 80), "", FilterSpec(), [HistEntry(:julia, now(UTC), huge, 1)], 0, (active = [1], gathered = HistEntry[]), 1)
+            @test countlines_selected(state) == 1000
+        end
+        @testset "With gathered entries" begin
+            gathered = [HistEntry(:julia, now(UTC), "old", i) for i in 21:22]
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = [1], gathered), 1)
+            @test countlines_selected(state) == 4
+        end
+    end
+    @testset "gethover" begin
+        @testset "Basic retrieval" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries)
+            @test gethover(state) == entries[20]
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 3)
+            @test gethover(state) == entries[18]
+        end
+        @testset "With gathered entries" begin
+            gathered = [HistEntry(:julia, now(UTC), "old_$i", i) for i in 21:22]
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered), -2)
+            @test gethover(state) == gathered[2]
+        end
+        @testset "Invalid hover positions" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 0)
+            @test gethover(state) === nothing
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 999)
+            @test gethover(state) === nothing
+        end
+    end
+    @testset "candidates" begin
+        @testset "Basic windowing" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries)
+            cands = candidates(state, 10)
+            @test cands.active.rows == 10
+            @test cands.active.width == 80
+            @test cands.active.entries == entries[11:20]
+            @test cands.active.selected == Int[]
+            @test cands.gathered.rows == 0
+        end
+        @testset "With selections" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = [5, 15, 18], gathered = HistEntry[]), 1)
+            cands = candidates(state, 10)
+            @test cands.active.selected == [-5, 5, 8]
+        end
+        @testset "With gathered entries" begin
+            gathered = [HistEntry(:julia, now(UTC), "gathered_$i", 20+i) for i in 1:2]
+            state = SelectorState((30, 80), "", FilterSpec(), entries, gathered)
+            state = SelectorState(state.area, state.query, state.filter, state.candidates, -2, state.selection, 1)
+            cands = candidates(state, 10)
+            @test cands.gathered.rows == 2
+            @test cands.gathered.entries == gathered
+            @test cands.gathered.selected == [1, 2]
+        end
+        @testset "Scrolling" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 6)
+            state = SelectorState(state.area, state.query, state.filter, state.candidates, 5, state.selection, 6)
+            cands = candidates(state, 10)
+            @test cands.active.entries[1] == entries[6]
+            @test cands.active.entries[end] == entries[15]
+        end
+        @testset "Edge cases" begin
+            state = SelectorState((30, 80), "", FilterSpec(), HistEntry[])
+            cands = candidates(state, 10)
+            @test isempty(cands.active.entries)
+            @test cands.active.rows == 10
+            gathered = [HistEntry(:julia, now(UTC), "old_$i", 20+i) for i in 1:15]
+            state = SelectorState((30, 80), "", FilterSpec(), entries, gathered)
+            state = SelectorState(state.area, state.query, state.filter, state.candidates, -10, state.selection, -1)
+            cands = candidates(state, 8)
+            @test cands.gathered.rows == 7
+            @test cands.active.rows == 0
+            few = [HistEntry(:julia, now(UTC), "entry_$i", i) for i in 1:3]
+            state = SelectorState((30, 80), "", FilterSpec(), few)
+            cands = candidates(state, 20)
+            @test cands.active.entries == few
+        end
+    end
+end
+
+@testset "Search state manipulation" begin
+    entries = [HistEntry(:julia, now(UTC), "test_$i", i) for i in 1:20]
+    @testset "movehover" begin
+        @testset "Single step moves" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 5)
+            @test movehover(state, false, false).hover == 4
+            @test movehover(state, true, false).hover == 6
+        end
+        @testset "Page moves" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 5)
+            @test movehover(state, false, true).hover == 1
+            @test movehover(state, true, true).hover == 17
+        end
+        @testset "Boundary clamping" begin
+            top = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 20)
+            @test movehover(top, true, false).hover == 20
+            bottom = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 1)
+            @test movehover(bottom, false, false).hover == 1
+        end
+        @testset "With gathered entries" begin
+            gathered = [HistEntry(:julia, now(UTC), "old_cmd", 21)]
+            state = SelectorState((30, 80), "", FilterSpec(), entries, gathered)
+            state = SelectorState(state.area, state.query, state.filter, state.candidates, -1, state.selection, 1)
+            @test movehover(state, false, false).hover == -1
+            state = SelectorState(state.area, state.query, state.filter, state.candidates, -1, state.selection, 1)
+            down = movehover(state, false, false)
+            @test down.hover == -1
+            up = movehover(down, true, false)
+            @test up.hover == 1
+        end
+        @testset "Empty candidates" begin
+            state = SelectorState((30, 80), "", FilterSpec(), HistEntry[])
+            @test movehover(state, true, false).hover == 1
+            @test movehover(state, false, false).hover == 1
+            gathered = [HistEntry(:julia, now(UTC), "old_cmd", 1)]
+            state = SelectorState((30, 80), "", FilterSpec(), HistEntry[], gathered)
+            state = SelectorState(state.area, state.query, state.filter, state.candidates, -1, state.selection, -1)
+            @test movehover(state, true, false).hover == 1
+            @test movehover(state, false, false).hover == -1
+        end
+        @testset "Single candidate" begin
+            one = [HistEntry(:julia, now(UTC), "only", 1)]
+            state = SelectorState((30, 80), "", FilterSpec(), one)
+            @test movehover(state, true, false).hover == 1
+            @test movehover(state, false, false).hover == 1
+        end
+    end
+    @testset "toggleselection" begin
+        @testset "Basic toggle" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries)
+            state = toggleselection(state)
+            @test state.selection.active == [20]
+            state = toggleselection(state)
+            @test state.selection.active == Int[]
+        end
+        @testset "Multiple selections" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries)
+            state = toggleselection(state)
+            state = movehover(state, true, false)
+            state = movehover(state, true, false)
+            state = toggleselection(state)
+            @test state.selection.active == [18, 20]
+        end
+        @testset "Gathered entries" begin
+            gathered = [HistEntry(:julia, now(UTC), "old_$i", 20+i) for i in 1:2]
+            state = SelectorState((30, 80), "", FilterSpec(), entries, -1, (active = Int[], gathered), -1)
+            @test toggleselection(state).selection.gathered == [gathered[2]]
+        end
+        @testset "Edge cases" begin
+            invalid = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 0)
+            @test toggleselection(invalid) === invalid
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], gathered = HistEntry[]), 1)
+            result = toggleselection(state)
+            @test 20 ∉ result.selection.active
+            state = SelectorState((30, 80), "", FilterSpec(), HistEntry[])
+            @test toggleselection(state) === state
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 100)
+            @test toggleselection(state) === state
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 20)
+            @test 1 in toggleselection(state).selection.active
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 1)
+            @test 20 in toggleselection(state).selection.active
+        end
+    end
+    @testset "fullselection" begin
+        entries = [
+            HistEntry(:julia, now(UTC), "using DataFrames", 1),
+            HistEntry(:julia, now(UTC), "df = load_data()", 2),
+            HistEntry(:shell, now(UTC), "cat data.csv", 3),
+            HistEntry(:julia, now(UTC), "describe(df)", 4),
+        ]
+        @testset "No selection" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries)
+            @test fullselection(state) == (mode = :julia, text = "describe(df)")
+        end
+        @testset "Single selection" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = [2], gathered = HistEntry[]), 1)
+            @test fullselection(state) == (mode = :julia, text = "df = load_data()")
+        end
+        @testset "Multiple selections" begin
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = [4, 1, 3], gathered = HistEntry[]), 1)
+            @test fullselection(state) == (mode = :julia, text = "using DataFrames\ncat data.csv\ndescribe(df)")
+        end
+        @testset "With gathered entries" begin
+            gathered = [HistEntry(:julia, now(UTC), "ENV[\"COLUMNS\"] = 120", 0)]
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = [2], gathered), 1)
+            @test fullselection(state) == (mode = :julia, text = "ENV[\"COLUMNS\"] = 120\ndf = load_data()")
+        end
+        @testset "Edge cases" begin
+            state = SelectorState((30, 80), "", FilterSpec(), HistEntry[], 0, (active = Int[], gathered = HistEntry[]), 1)
+            @test fullselection(state) == (mode = nothing, text = "")
+            state = SelectorState((30, 80), "", FilterSpec(), entries, 0, (active = Int[], gathered = HistEntry[]), 100)
+            @test fullselection(state) == (mode = nothing, text = "")
+            state = SelectorState((30, 80), "", FilterSpec(), HistEntry[], 0, (active = Int[], gathered = HistEntry[]), -1)
+            @test fullselection(state) == (mode = nothing, text = "")
+            gathered = [HistEntry(:julia, now(UTC), "old_1", 1)]
+            state = SelectorState((30, 80), "", FilterSpec(), HistEntry[], 0, (active = Int[], gathered), -1)
+            @test fullselection(state) == (mode = :julia, text = "old_1")
+        end
+    end
+    @testset "addcache!" begin
+        cache, state = Int[], zero(UInt8)
+        for i in 1:128
+            state = addcache!(cache, state, i)
+        end
+        @test cache == [1, 65, 97, 113, 121, 125, 127, 128]
+    end
+end
+
+# TODO: Prompt handling/events, terminal rendering, and end-to-end integration tests
diff --git a/stdlib/REPL/test/lineedit.jl b/stdlib/REPL/test/lineedit.jl
index cf87e811508a0..d86a6c6b5642a 100644
--- a/stdlib/REPL/test/lineedit.jl
+++ b/stdlib/REPL/test/lineedit.jl
@@ -940,3 +940,229 @@ end
     strings3 = ["abcdef", "123456\nijklmn"]
     @test getcompletion(strings3) == "\033[0B\nabcdef\n123456\nijklmn\n"
 end
+
+# Test bracket insertion functionality
+@testset "Bracket insertion" begin
+    # Test bracket insertion with a fake REPL that has bracket completion enabled
+    term = FakeTerminal(IOBuffer(), IOBuffer(), IOBuffer())
+    prompt = LineEdit.Prompt("test> ")
+
+    # Build keymap with bracket insertion enabled (as it would be in practice)
+    base_keymaps = Dict{Any,Any}[LineEdit.bracket_insert_keymap, LineEdit.default_keymap, LineEdit.escape_defaults]
+    prompt.keymap_dict = LineEdit.keymap(base_keymaps)
+
+    interface = LineEdit.ModalInterface([prompt])
+    s = LineEdit.init_state(term, interface)
+
+    # Helper to write characters as stdin input
+    write_input(s, str) = for c in str
+        buf = IOBuffer(string(c))
+        LineEdit.match_input(prompt.keymap_dict, s, buf)(s, buf)
+    end
+
+    # Test left bracket at EOF triggers auto-complete
+    write_input(s, "(")
+    @test content(s) == "()"
+    @test position(buffer(s)) == 1
+
+    # Test right bracket skips over matching bracket
+    write_input(s, ")")
+    @test content(s) == "()"
+    @test position(buffer(s)) == 2
+
+    # Test backspace removes both brackets
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "(")
+    write_input(s, "\b")
+    @test content(s) == ""
+    @test position(buffer(s)) == 0
+
+    # Test quote insertion at EOF
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "\"")
+    @test content(s) == "\"\""
+    @test position(buffer(s)) == 1
+
+    # Test quote skip over
+    write_input(s, "\"")
+    @test content(s) == "\"\""
+    @test position(buffer(s)) == 2
+
+    # Test transpose detection - single quote after letter shouldn't auto-complete
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "A")
+    write_input(s, "'")
+    @test content(s) == "A'"
+    @test position(buffer(s)) == 2
+
+    # Test single quote after space should auto-complete
+    s = LineEdit.init_state(term, interface)
+    write_input(s, " ")
+    write_input(s, "'")
+    @test content(s) == " ''"
+    @test position(buffer(s)) == 2
+
+    # Test bracket not inserted when next char is not whitespace
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "x")
+    charseek(buffer(s), 0)
+    write_input(s, "(")
+    @test content(s) == "(x"
+    @test position(buffer(s)) == 1
+
+    # Test all bracket types
+    for (left, right) in (('[', ']'), ('{', '}'))
+        s = LineEdit.init_state(term, interface)
+        write_input(s, string(left))
+        @test content(s) == string(left, right)
+        @test position(buffer(s)) == 1
+        write_input(s, string(right))
+        @test position(buffer(s)) == 2
+        write_input(s, "\b")
+        @test content(s) == string(left)
+        @test position(buffer(s)) == 1
+        write_input(s, "\b")
+        @test content(s) == ""
+        @test position(buffer(s)) == 0
+    end
+
+    # Test all quote types
+    for quote_char in ('`', '"', '\'')
+        s = LineEdit.init_state(term, interface)
+        write_input(s, string(quote_char))
+        @test content(s) == string(quote_char, quote_char)
+        @test position(buffer(s)) == 1
+    end
+
+    # Test nested brackets
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "(")
+    write_input(s, "[")
+    @test content(s) == "([])"
+    @test position(buffer(s)) == 2
+    write_input(s, "]")
+    @test position(buffer(s)) == 3
+    write_input(s, ")")
+    @test position(buffer(s)) == 4
+
+    # Test backspace in middle of nested brackets
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "(")
+    write_input(s, "{")
+    @test content(s) == "({})"
+    @test position(buffer(s)) == 2
+    write_input(s, "\b")
+    @test content(s) == "()"
+    @test position(buffer(s)) == 1
+
+    # Test triple quotes don't auto-complete
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "\"")
+    @test content(s) == "\"\""
+    @test position(buffer(s)) == 1
+    write_input(s, "\"")
+    @test content(s) == "\"\""
+    @test position(buffer(s)) == 2
+    write_input(s, "\"")
+    @test content(s) == "\"\"\""
+    @test position(buffer(s)) == 3
+
+    # Test transpose detection for various cases
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "x123")
+    write_input(s, "'")
+    @test content(s) == "x123'"
+    @test position(buffer(s)) == 5
+
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "arr]")
+    write_input(s, "'")
+    @test content(s) == "arr]'"
+    @test position(buffer(s)) == 5
+
+    # Test right bracket insert when not matching
+    s = LineEdit.init_state(term, interface)
+    write_input(s, ")")
+    @test content(s) == ")"
+    @test position(buffer(s)) == 1
+
+    # Test backspace doesn't remove mismatched brackets
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "(")
+    write_input(s, "]")
+    charseek(buffer(s), 1)
+    write_input(s, "\b")
+    @test content(s) == "])"
+    @test position(buffer(s)) == 0
+
+    # Test bracket insertion followed by whitespace
+    s = LineEdit.init_state(term, interface)
+    write_input(s, " ")
+    charseek(buffer(s), 0)
+    write_input(s, "(")
+    @test content(s) == "() "
+    @test position(buffer(s)) == 1
+
+    # Test quote behavior: |foo" + " -> "foo" (not ""foo")
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "foo\"")
+    charseek(buffer(s), 0)
+    write_input(s, "\"")
+    @test content(s) == "\"foo\""
+    @test position(buffer(s)) == 1
+
+    # Test quote behavior: foo| + " -> foo" (not foo"")
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "foo")
+    write_input(s, "\"")
+    @test content(s) == "foo\""
+    @test position(buffer(s)) == 4
+
+    # Test quote behavior: foo | + " -> foo ""
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "foo ")
+    write_input(s, "\"")
+    @test content(s) == "foo \"\""
+    @test position(buffer(s)) == 5
+
+    # Test quote behavior: | foo + " -> "" foo (space before foo means double quotes)
+    s = LineEdit.init_state(term, interface)
+    write_input(s, " foo")
+    charseek(buffer(s), 0)
+    write_input(s, "\"")
+    @test content(s) == "\"\" foo"
+    @test position(buffer(s)) == 1
+
+    # Test quote behavior:  | + " -> ""
+    s = LineEdit.init_state(term, interface)
+    write_input(s, " ")
+    write_input(s, "\"")
+    @test content(s) == " \"\""
+    @test position(buffer(s)) == 2
+
+    # Test quote behavior: (|) + " -> ("")
+    s = LineEdit.init_state(term, interface)
+    write_input(s, ")")
+    charseek(buffer(s), 0)
+    write_input(s, "(")
+    # Buffer is now () with cursor at 1
+    write_input(s, "\"")
+    @test content(s) == "(\"\"))"
+    @test position(buffer(s)) == 2
+
+    # Test quote behavior: (|bar) + " -> ("bar)
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "(bar)")
+    charseek(buffer(s), 1)
+    write_input(s, "\"")
+    @test content(s) == "(\"bar)"
+    @test position(buffer(s)) == 2
+
+    # Test bracket behavior: "|" + ( -> "()"
+    s = LineEdit.init_state(term, interface)
+    write_input(s, "\"\"")
+    charseek(buffer(s), 1)
+    write_input(s, "(")
+    @test content(s) == "\"()\""
+    @test position(buffer(s)) == 2
+end
diff --git a/stdlib/REPL/test/precompilation.jl b/stdlib/REPL/test/precompilation.jl
new file mode 100644
index 0000000000000..57b6b27be6e7c
--- /dev/null
+++ b/stdlib/REPL/test/precompilation.jl
@@ -0,0 +1,58 @@
+
+## Tests that compilation in the interactive session startup are as expected
+
+using Test
+Base.include(@__MODULE__, joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test", "testhelpers", "FakePTYs.jl"))
+import .FakePTYs: open_fake_pty
+
+if !Sys.iswindows()
+    # TODO: reenable this on Windows. Without it we're not checking that Windows startup has no compilation.
+    # On Windows CI runners using `open_fake_pty` is causing:
+    # ----
+    # `stty: 'standard input': Inappropriate ioctl for device
+    # Unhandled Task ERROR: failed process: Process(`stty raw -echo onlcr -ocrnl opost`, ProcessExited(1)) [1]
+    # ----
+    @testset "No interactive startup compilation" begin
+        f, _ = mktemp()
+
+        # start an interactive session, ensuring `TERM` is unset since it can trigger
+        # different amounts of precompilation stemming from `base/terminfo.jl` depending
+        # on the value, making the test here unreliable
+        cmd = addenv(`$(Base.julia_cmd()) --trace-compile=$f -q --startup-file=no -i`,
+                     Dict("TERM" => ""))
+        pts, ptm = open_fake_pty()
+        p = run(cmd, pts, pts, pts; wait=false)
+        Base.close_stdio(pts)
+        std = readuntil(ptm, "julia>")
+        # check for newlines instead of equality with "julia>" because color may be on
+        occursin("\n", std) && @info "There was output before the julia prompt:\n$std"
+        @async write(ptm, "\n")  # another prompt
+        readuntil(ptm, "julia>")
+        @async write(ptm, "\n")  # another prompt
+        readuntil(ptm, "julia>")
+        tracecompile_out = read(f, String)
+        close(ptm) # close after reading so we don't get precompiles from error shutdown
+
+        # given this test checks that startup is snappy, it's best to add workloads to
+        # contrib/generate_precompile.jl rather than increase this number. But if that's not
+        # possible, it'd be helpful to add a comment with the statement and a reason below
+        expected_precompiles = 0
+
+        n_precompiles = count(r"precompile\(", tracecompile_out)
+
+        @test n_precompiles <= expected_precompiles
+
+        if n_precompiles == 0
+            @debug "REPL: trace compile output: (none)"
+        elseif n_precompiles > expected_precompiles
+            @info "REPL: trace compile output:\n$tracecompile_out"
+        else
+            @debug "REPL: trace compile output:\n$tracecompile_out"
+        end
+        # inform if lowered
+        if expected_precompiles > 0 && (n_precompiles < expected_precompiles)
+            @info "REPL: Actual number of precompiles has dropped below expected." n_precompiles expected_precompiles
+        end
+
+    end
+end
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index f0d5052ff9e32..b93c3371306b6 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -3,10 +3,15 @@
 using Test
 using REPL
 using Random
+using Logging
 import REPL.LineEdit
 using Markdown
 
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+empty!(Base.Experimental._hint_handlers) # unregister error hints so they can be tested separately
+
+@test Base.REPL_MODULE_REF[] === REPL
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")
 isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
 import .Main.FakePTYs: with_fake_pty
 
@@ -16,7 +21,6 @@ include(joinpath(BASE_TEST_PATH, "testenv.jl"))
 include("FakeTerminals.jl")
 import .FakeTerminals.FakeTerminal
 
-
 function kill_timer(delay)
     # Give ourselves a generous timer here, just to prevent
     # this causing e.g. a CI hang when there's something unexpected in the output.
@@ -28,7 +32,7 @@ function kill_timer(delay)
         # **DON'T COPY ME.**
         # The correct way to handle timeouts is to close the handle:
         # e.g. `close(stdout_read); close(stdin_write)`
-        test_task.queue === nothing || Base.list_deletefirst!(test_task.queue, test_task)
+        test_task.queue === nothing || Base.list_deletefirst!(test_task.queue::Base.IntrusiveLinkedList{Task}, test_task)
         schedule(test_task, "hard kill repl test"; error=true)
         print(stderr, "WARNING: attempting hard kill of repl test after exceeding timeout\n")
     end
@@ -63,7 +67,7 @@ end
 #end
 
 # REPL tests
-function fake_repl(@nospecialize(f); options::REPL.Options=REPL.Options(confirm_exit=false))
+function fake_repl(@nospecialize(f); options::REPL.Options=REPL.Options(confirm_exit=false,style_input=false,auto_insert_closing_bracket=false))
     # Use pipes so we can easily do blocking reads
     # In the future if we want we can add a test that the right object
     # gets displayed by intercepting the display
@@ -111,13 +115,13 @@ fake_repl() do stdin_write, stdout_read, repl
     Base.wait(repltask)
 end
 
-# These are integration tests. If you want to unit test test e.g. completion, or
+# These are integration tests. If you want to unit test e.g. completion, or
 # exact LineEdit behavior, put them in the appropriate test files.
 # Furthermore since we are emulating an entire terminal, there may be control characters
 # in the mix. If verification needs to be done, keep it to the bare minimum. Basically
 # this should make sure nothing crashes without depending on how exactly the control
 # characters are being used.
-fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_write, stdout_read, repl
+fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true,style_input=false,auto_insert_closing_bracket=false)) do stdin_write, stdout_read, repl
     repl.specialdisplay = REPL.REPLDisplay(repl)
     repl.history_file = false
 
@@ -240,8 +244,9 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
         @test occursin("shell> ", s) # check for the echo of the prompt
         @test occursin("'", s) # check for the echo of the input
         s = readuntil(stdout_read, "\n\n")
-        @test startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
-              startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] ")
+        @test(startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
+            startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] "),
+            skip = Sys.iswindows() && Sys.WORD_SIZE == 32)
         write(stdin_write, "\b")
         wait(t)
     end
@@ -438,14 +443,13 @@ function AddCustomMode(repl, prompt)
         end
     )
 
-    search_prompt, skeymap = LineEdit.setup_search_keymap(hp)
     mk = REPL.mode_keymap(main_mode)
 
-    b = Dict{Any,Any}[skeymap, mk, LineEdit.history_keymap, LineEdit.default_keymap, LineEdit.escape_defaults]
+    b = Dict{Any,Any}[mk, LineEdit.history_keymap, LineEdit.default_keymap, LineEdit.escape_defaults]
     foobar_mode.keymap_dict = LineEdit.keymap(b)
 
     main_mode.keymap_dict = LineEdit.keymap_merge(main_mode.keymap_dict, foobar_keymap)
-    foobar_mode, search_prompt
+    foobar_mode
 end
 
 # Note: since the \t character matters for the REPL file history,
@@ -497,7 +501,8 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         repl_mode = repl.interface.modes[1]
         shell_mode = repl.interface.modes[2]
         help_mode = repl.interface.modes[3]
-        histp = repl.interface.modes[4]
+        pkg_mode = repl.interface.modes[4]
+        # histp = repl.interface.modes[5]
         prefix_mode = repl.interface.modes[5]
 
         hp = REPL.REPLHistoryProvider(Dict{Symbol,Any}(:julia => repl_mode,
@@ -505,13 +510,12 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
                                                        :help  => help_mode))
         hist_path = tempname()
         write(hist_path, fakehistory)
-        REPL.hist_from_file(hp, hist_path)
-        f = open(hist_path, read=true, write=true, create=true)
-        hp.history_file = f
-        seekend(f)
+        hp.history = REPL.History.HistoryFile(hist_path)
+        REPL.history_do_initialize(hp)
         REPL.history_reset_state(hp)
 
-        histp.hp = repl_mode.hist = shell_mode.hist = help_mode.hist = hp
+        # histp.hp = repl_mode.hist = shell_mode.hist = help_mode.hist = hp
+        repl_mode.hist = shell_mode.hist = help_mode.hist = hp
 
         # Some manual setup
         s = LineEdit.init_state(repl.t, repl.interface)
@@ -565,6 +569,7 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         @test buffercontents(LineEdit.buffer(s)) == "wip"
         @test position(LineEdit.buffer(s)) == 3
         # test that history_first jumps to beginning of current session's history
+        @test hp.start_idx == 11
         hp.start_idx -= 5 # temporarily alter history
         LineEdit.history_first(s, hp)
         @test hp.cur_idx == 6
@@ -613,115 +618,6 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         @test LineEdit.input_string(ps) == "wip"
         @test position(LineEdit.buffer(s)) == 3
         LineEdit.accept_result(s, prefix_mode)
-
-        # Test that searching backwards puts you into the correct mode and
-        # skips invalid modes.
-        LineEdit.enter_search(s, histp, true)
-        ss = LineEdit.state(s, histp)
-        write(ss.query_buffer, "l")
-        LineEdit.update_display_buffer(ss, ss)
-        LineEdit.accept_result(s, histp)
-        @test LineEdit.mode(s) == shell_mode
-        @test buffercontents(LineEdit.buffer(s)) == "ls"
-        @test position(LineEdit.buffer(s)) == 0
-
-        # Test that searching for `ll` actually matches `ll` after
-        # both letters are types rather than jumping to `shell`
-        LineEdit.history_prev(s, hp)
-        LineEdit.enter_search(s, histp, true)
-        write(ss.query_buffer, "l")
-        LineEdit.update_display_buffer(ss, ss)
-        @test buffercontents(ss.response_buffer) == "ll"
-        @test position(ss.response_buffer) == 1
-        write(ss.query_buffer, "l")
-        LineEdit.update_display_buffer(ss, ss)
-        LineEdit.accept_result(s, histp)
-        @test LineEdit.mode(s) == shell_mode
-        @test buffercontents(LineEdit.buffer(s)) == "ll"
-        @test position(LineEdit.buffer(s)) == 0
-
-        # Test that searching backwards with a one-letter query doesn't
-        # return indefinitely the same match (#9352)
-        LineEdit.enter_search(s, histp, true)
-        write(ss.query_buffer, "l")
-        LineEdit.update_display_buffer(ss, ss)
-        LineEdit.history_next_result(s, ss)
-        LineEdit.update_display_buffer(ss, ss)
-        LineEdit.accept_result(s, histp)
-        @test LineEdit.mode(s) == repl_mode
-        @test buffercontents(LineEdit.buffer(s)) == "shell"
-        @test position(LineEdit.buffer(s)) == 4
-
-        # Test that searching backwards doesn't skip matches (#9352)
-        # (for a search with multiple one-byte characters, or UTF-8 characters)
-        LineEdit.enter_search(s, histp, true)
-        write(ss.query_buffer, "é") # matches right-most "é" in "éé"
-        LineEdit.update_display_buffer(ss, ss)
-        @test position(ss.query_buffer) == sizeof("é")
-        LineEdit.history_next_result(s, ss) # matches left-most "é" in "éé"
-        LineEdit.update_display_buffer(ss, ss)
-        LineEdit.accept_result(s, histp)
-        @test buffercontents(LineEdit.buffer(s)) == "éé"
-        @test position(LineEdit.buffer(s)) == 0
-
-        # Issue #7551
-        # Enter search mode and try accepting an empty result
-        REPL.history_reset_state(hp)
-        LineEdit.edit_clear(s)
-        cur_mode = LineEdit.mode(s)
-        LineEdit.enter_search(s, histp, true)
-        LineEdit.accept_result(s, histp)
-        @test LineEdit.mode(s) == cur_mode
-        @test buffercontents(LineEdit.buffer(s)) == ""
-        @test position(LineEdit.buffer(s)) == 0
-
-        # Test that new modes can be dynamically added to the REPL and will
-        # integrate nicely
-        foobar_mode, custom_histp = AddCustomMode(repl, prompt)
-
-        # ^R l, should now find `ls` in foobar mode
-        LineEdit.enter_search(s, histp, true)
-        ss = LineEdit.state(s, histp)
-        write(ss.query_buffer, "l")
-        LineEdit.update_display_buffer(ss, ss)
-        LineEdit.accept_result(s, histp)
-        @test LineEdit.mode(s) == foobar_mode
-        @test buffercontents(LineEdit.buffer(s)) == "ls"
-        @test position(LineEdit.buffer(s)) == 0
-
-        # Try the same for prefix search
-        LineEdit.history_next(s, hp)
-        LineEdit.history_prev_prefix(ps, hp, "l")
-        @test ps.parent == foobar_mode
-        @test LineEdit.input_string(ps) == "ls"
-        @test position(LineEdit.buffer(s)) == 1
-
-        # Some Unicode handling testing
-        LineEdit.history_prev(s, hp)
-        LineEdit.enter_search(s, histp, true)
-        write(ss.query_buffer, "x")
-        LineEdit.update_display_buffer(ss, ss)
-        @test buffercontents(ss.response_buffer) == "x ΔxΔ"
-        @test position(ss.response_buffer) == 4
-        write(ss.query_buffer, " ")
-        LineEdit.update_display_buffer(ss, ss)
-        LineEdit.accept_result(s, histp)
-        @test LineEdit.mode(s) == repl_mode
-        @test buffercontents(LineEdit.buffer(s)) == "x ΔxΔ"
-        @test position(LineEdit.buffer(s)) == 0
-
-        LineEdit.edit_clear(s)
-        LineEdit.enter_search(s, histp, true)
-        ss = LineEdit.state(s, histp)
-        write(ss.query_buffer, "Å") # should not be in history
-        LineEdit.update_display_buffer(ss, ss)
-        @test buffercontents(ss.response_buffer) == ""
-        @test position(ss.response_buffer) == 0
-        LineEdit.history_next_result(s, ss) # should not throw BoundsError
-        LineEdit.accept_result(s, histp)
-
-        # Try entering search mode while in custom repl mode
-        LineEdit.enter_search(s, custom_histp, true)
     end
 end
 
@@ -746,11 +642,11 @@ fake_repl() do stdin_write, stdout_read, repl
 
     # Test removal of prefix in single statement paste
     sendrepl2("\e[200~julia> A = 2\e[201~\n")
-    @test Main.A == 2
+    @test @world(Main.A, ∞) == 2
 
     # Test removal of prefix in single statement paste
     sendrepl2("\e[200~In [12]: A = 2.2\e[201~\n")
-    @test Main.A == 2.2
+    @test @world(Main.A, ∞) == 2.2
 
     # Test removal of prefix in multiple statement paste
     sendrepl2("""\e[200~
@@ -762,10 +658,10 @@ fake_repl() do stdin_write, stdout_read, repl
 
                     julia> A = 3\e[201~
              """)
-    @test Main.A == 3
-    @test Base.invokelatest(Main.foo, 4)
-    @test Base.invokelatest(Main.T17599, 3).a == 3
-    @test !Base.invokelatest(Main.foo, 2)
+    @test @world(Main.A, ∞) == 3
+    @test @invokelatest(Main.foo(4))
+    @test @invokelatest(Main.T17599(3)).a == 3
+    @test !@invokelatest(Main.foo(2))
 
     sendrepl2("""\e[200~
             julia> goo(x) = x + 1
@@ -774,12 +670,12 @@ fake_repl() do stdin_write, stdout_read, repl
             julia> A = 4
             4\e[201~
              """)
-    @test Main.A == 4
-    @test Base.invokelatest(Main.goo, 4) == 5
+    @test @world(Main.A, ∞) == 4
+    @test @invokelatest(Main.goo(4)) == 5
 
     # Test prefix removal only active in bracket paste mode
     sendrepl2("julia = 4\n julia> 3 && (A = 1)\n")
-    @test Main.A == 1
+    @test @world(Main.A, ∞) == 1
 
     # Test that indentation corresponding to the prompt is removed
     s = sendrepl2("""\e[200~julia> begin\n           α=1\n           β=2\n       end\n\e[201~""")
@@ -814,8 +710,8 @@ fake_repl() do stdin_write, stdout_read, repl
             julia> B = 2
             2\e[201~
              """)
-    @test Main.A == 1
-    @test Main.B == 2
+    @test @world(Main.A, ∞) == 1
+    @test @world(Main.B, ∞) == 2
     end # redirect_stdout
 
     # Close repl
@@ -920,7 +816,7 @@ function test19864()
     @eval Base.showerror(io::IO, e::Error19864) = print(io, "correct19864")
     buf = IOBuffer()
     fake_response = (Base.ExceptionStack([(exception=Error19864(),backtrace=Ptr{Cvoid}[])]),true)
-    REPL.print_response(buf, fake_response, false, false, nothing)
+    REPL.print_response(buf, fake_response, nothing, false, false, nothing)
     return String(take!(buf))
 end
 @test occursin("correct19864", test19864())
@@ -978,6 +874,13 @@ let ends_with_semicolon = REPL.ends_with_semicolon
     @test ends_with_semicolon("f()= 1;")
     # the next result does not matter because this is not legal syntax
     @test_nowarn ends_with_semicolon("1; #=# 2")
+
+    # #46189 - adjoint operator with comment
+    @test ends_with_semicolon("W';") == true
+    @test ends_with_semicolon("W'; # comment")
+    @test !ends_with_semicolon("W'")
+    @test !ends_with_semicolon("x'")
+    @test !ends_with_semicolon("'a'")
 end
 
 # PR #20794, TTYTerminal with other kinds of streams
@@ -1022,7 +925,7 @@ function history_move_prefix(s::LineEdit.MIState,
     hist.last_idx = -1
     idxs = backwards ? ((cur_idx-1):-1:1) : ((cur_idx+1):length(hist.history))
     for idx in idxs
-        if startswith(hist.history[idx], prefix) && hist.history[idx] != allbuf
+        if startswith(hist.history[idx].content, prefix) && hist.history[idx].content != allbuf
             REPL.history_move(s, hist, idx)
             seek(LineEdit.buffer(s), pos)
             LineEdit.refresh_line(s)
@@ -1078,7 +981,7 @@ for keys = [altkeys, merge(altkeys...)],
 
             # Close the history file
             # (otherwise trying to delete it fails on Windows)
-            close(repl.interface.modes[1].hist.history_file)
+            close(repl.interface.modes[1].hist.history)
 
             # Check that the correct prompt was displayed
             output = readuntil(stdout_read, "1 * 1;", keep=true)
@@ -1164,7 +1067,7 @@ fake_repl() do stdin_write, stdout_read, repl
     Base.wait(repltask)
 end
 
-help_result(line, mod::Module=Base) = Core.eval(mod, REPL._helpmode(IOBuffer(), line))
+help_result(line, mod::Module=Base) = Core.eval(mod, REPL._helpmode(IOBuffer(), line, mod))
 
 # Docs.helpmode tests: we test whether the correct expressions are being generated here,
 # rather than complete integration with Julia's REPL mode system.
@@ -1211,9 +1114,9 @@ global some_undef_global
 @test occursin("does not exist", sprint(show, help_result("..")))
 # test that helpmode is sensitive to contextual module
 @test occursin("No documentation found", sprint(show, help_result("Fix2", Main)))
-@test occursin("A type representing a partially-applied version", # exact string may change
+@test occursin("Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).", # exact string may change
                sprint(show, help_result("Base.Fix2", Main)))
-@test occursin("A type representing a partially-applied version", # exact string may change
+@test occursin("Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).", # exact string may change
                sprint(show, help_result("Fix2", Base)))
 
 
@@ -1249,6 +1152,7 @@ let emptyH1 = Markdown.parse("# "),
 end
 
 module BriefExtended
+public f, f_plain
 """
     f()
 
@@ -1395,6 +1299,126 @@ end
     Base.wait(backend.backend_task)
 end
 
+# Mimic of JSON.jl's structure
+module JSON54872
+
+module Parser
+export parse
+function parse end
+end # Parser
+
+using .Parser: parse
+end # JSON54872
+
+# Test the public mechanism
+module JSON54872_public
+public tryparse
+end # JSON54872_public
+
+@testset "warn_on_non_owning_accesses AST transform" begin
+    @test REPL.has_ancestor(JSON54872.Parser, JSON54872)
+    @test !REPL.has_ancestor(JSON54872, JSON54872.Parser)
+
+    # JSON54872.Parser owns `parse`
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.Parser.parse
+    end)
+    @test isempty(warnings)
+
+    # A submodule of `JSON54872` owns `parse`
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.parse
+    end)
+    @test isempty(warnings)
+
+    # `JSON54872` does not own `tryparse` (nor is it public)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.tryparse
+    end)
+    @test length(warnings) == 1
+    @test only(warnings).owner == Base
+    @test only(warnings).name_being_accessed == :tryparse
+
+    # Same for nested access
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.Parser.tryparse
+    end)
+    @test length(warnings) == 1
+    @test only(warnings).owner == Base
+    @test only(warnings).name_being_accessed == :tryparse
+
+    test_logger = TestLogger()
+    with_logger(test_logger) do
+        REPL.warn_on_non_owning_accesses(@__MODULE__, :(JSON54872.tryparse))
+        REPL.warn_on_non_owning_accesses(@__MODULE__, :(JSON54872.tryparse))
+    end
+    # only 1 logging statement emitted thanks to `maxlog` mechanism
+    @test length(test_logger.logs) == 1
+    record = only(test_logger.logs)
+    @test record.level == Warn
+    @test record.message == "tryparse is defined in Base and is not public in $JSON54872"
+
+    # However JSON54872_public has `tryparse` declared public
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872_public.tryparse
+    end)
+    @test isempty(warnings)
+
+    # Now let us test some tricky cases
+    # No warning since `JSON54872` is local (LHS of `=`)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        let JSON54872 = (; tryparse=1)
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning for nested local access either
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        let JSON54872 = (; Parser = (; tryparse=1))
+            JSON54872.Parser.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (long-form function arg)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        function f(JSON54872=(; tryparse))
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (short-form function arg)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        f(JSON54872=(; tryparse)) = JSON54872.tryparse
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (long-form anonymous function)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        function (JSON54872=(; tryparse))
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (short-form anonymous function)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        (JSON54872 = (; tryparse)) -> begin
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # false-negative: missing warning
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        let JSON54872 = JSON54872
+            JSON54872.tryparse
+        end
+    end)
+    @test_broken !isempty(warnings)
+end
 
 backend = REPL.REPLBackend()
 frontend_task = @async begin
@@ -1424,58 +1448,94 @@ end
 
 @testset "Install missing packages via hooks" begin
     @testset "Parse AST for packages" begin
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo"))
+        test_find_packages(e) =
+            REPL.modules_to_be_loaded(Meta.lower(@__MODULE__, e))
+        test_find_packages(s::String) =
+            REPL.modules_to_be_loaded(Meta.lower(@__MODULE__, Meta.parse(s)))
+
+        mods = test_find_packages("using Foo")
         @test mods == [:Foo]
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo"))
+        mods = test_find_packages("import Foo")
         @test mods == [:Foo]
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo, Bar"))
+        mods = test_find_packages("using Foo, Bar")
         @test mods == [:Foo, :Bar]
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo, Bar"))
+        mods = test_find_packages("import Foo, Bar")
         @test mods == [:Foo, :Bar]
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo.bar, Foo.baz"))
+        mods = test_find_packages("using Foo.bar, Foo.baz")
         @test mods == [:Foo]
 
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo end"))
+        mods = test_find_packages("if false using Foo end")
         @test mods == [:Foo]
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false if false using Foo end end"))
+        mods = test_find_packages("if false if false using Foo end end")
         @test mods == [:Foo]
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo, Bar end"))
+        mods = test_find_packages("if false using Foo, Bar end")
         @test mods == [:Foo, :Bar]
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo: bar end"))
+        mods = test_find_packages("if false using Foo: bar end")
         @test mods == [:Foo]
 
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo.bar as baz"))
+        mods = test_find_packages("import Foo.bar as baz")
         @test mods == [:Foo]
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using .Foo"))
+        mods = test_find_packages("using .Foo")
         @test isempty(mods)
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Base"))
+        mods = test_find_packages("using Base")
         @test isempty(mods)
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Base: nope"))
+        mods = test_find_packages("using Base: nope")
         @test isempty(mods)
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Main"))
+        mods = test_find_packages("using Main")
         @test isempty(mods)
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Core"))
+        mods = test_find_packages("using Core")
         @test isempty(mods)
 
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line(":(using Foo)"))
-        @test isempty(mods)
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("ex = :(using Foo)"))
+        mods = test_find_packages(":(using Foo)")
         @test isempty(mods)
-
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("Foo"))
+        mods = test_find_packages("ex = :(using Foo)")
         @test isempty(mods)
 
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("@eval using Foo"))
+        mods = test_find_packages("@eval using Foo")
         @test isempty(mods)
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("begin using Foo; @eval using Bar end"))
+        mods = test_find_packages("begin using Foo; @eval using Bar end")
         @test mods == [:Foo]
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("Core.eval(Main,\"using Foo\")"))
+        mods = test_find_packages("Core.eval(Main,\"using Foo\")")
         @test isempty(mods)
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("begin using Foo; Core.eval(Main,\"using Foo\") end"))
+        mods = test_find_packages("begin using Foo; Core.eval(Main,\"using Foo\") end")
         @test mods == [:Foo]
+
+        mods = test_find_packages(:(import .Foo: a))
+        @test isempty(mods)
+        mods = test_find_packages(:(using .Foo: a))
+        @test isempty(mods)
     end
 end
 
+# Test that the REPL can find `using` statements inside macro expansions
+global packages_requested = Any[]
+old_hooks = copy(REPL.install_packages_hooks)
+empty!(REPL.install_packages_hooks)
+push!(REPL.install_packages_hooks, function(pkgs)
+    append!(packages_requested, pkgs)
+end)
+
+fake_repl() do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+
+    # Just consume all the output - we only test that the callback ran
+    read_resp_task = @async while !eof(stdout_read)
+        readavailable(stdout_read)
+    end
+
+    write(stdin_write, "macro usingfoo(); :(using FooNotFound); end\n")
+    write(stdin_write, "@usingfoo\n")
+    write(stdin_write, "\x4")
+    Base.wait(repltask)
+    close(stdin_write)
+    close(stdout_read)
+    Base.wait(read_resp_task)
+end
+@test packages_requested == Any[:FooNotFound]
+empty!(REPL.install_packages_hooks); append!(REPL.install_packages_hooks, old_hooks)
+
 # err should reprint error if deeper than top-level
 fake_repl() do stdin_write, stdout_read, repl
     repltask = @async begin
@@ -1489,13 +1549,13 @@ fake_repl() do stdin_write, stdout_read, repl
     # generate top-level error
     write(stdin_write, "foobar\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined"
-    @test readline(stdout_read) == ""
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined in `Main`"
+    @test readline(stdout_read) == "" skip = Sys.iswindows() && Sys.WORD_SIZE == 32
     readuntil(stdout_read, "julia> ", keep=true)
     # check that top-level error did not change `err`
     write(stdin_write, "err\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0m"
+    @test readline(stdout_read) == "\e[0m" skip = Sys.iswindows() && Sys.WORD_SIZE == 32
     readuntil(stdout_read, "julia> ", keep=true)
     # generate deeper error
     write(stdin_write, "foo() = foobar\n")
@@ -1504,13 +1564,13 @@ fake_repl() do stdin_write, stdout_read, repl
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, "foo()\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined"
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined in `Main`"
     readuntil(stdout_read, "julia> ", keep=true)
     # check that deeper error did set `err`
     write(stdin_write, "err\n")
     readline(stdout_read)
     @test readline(stdout_read) == "\e[0m1-element ExceptionStack:"
-    @test readline(stdout_read) == "UndefVarError: `foobar` not defined"
+    @test readline(stdout_read) == "UndefVarError: `foobar` not defined in `Main`"
     @test readline(stdout_read) == "Stacktrace:"
     readuntil(stdout_read, "\n\n", keep=true)
     readuntil(stdout_read, "julia> ", keep=true)
@@ -1555,7 +1615,8 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         repl_mode = repl.interface.modes[1]
         shell_mode = repl.interface.modes[2]
         help_mode = repl.interface.modes[3]
-        histp = repl.interface.modes[4]
+        pkg_mode = repl.interface.modes[4]
+        # histp = repl.interface.modes[5]
         prefix_mode = repl.interface.modes[5]
 
         hp = REPL.REPLHistoryProvider(Dict{Symbol,Any}(:julia => repl_mode,
@@ -1563,13 +1624,12 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
                                                        :help  => help_mode))
         hist_path = tempname()
         write(hist_path, fakehistory_2)
-        REPL.hist_from_file(hp, hist_path)
-        f = open(hist_path, read=true, write=true, create=true)
-        hp.history_file = f
-        seekend(f)
+        histfile = REPL.HistoryFile(hist_path)
+        hp.history = histfile
+        REPL.history_do_initialize(hp)
         REPL.history_reset_state(hp)
 
-        histp.hp = repl_mode.hist = shell_mode.hist = help_mode.hist = hp
+        # histp.hp = repl_mode.hist = shell_mode.hist = help_mode.hist = hp
 
         s = LineEdit.init_state(repl.t, prefix_mode)
         prefix_prev() = REPL.history_prev_prefix(s, hp, "x")
@@ -1659,3 +1719,323 @@ fake_repl() do stdin_write, stdout_read, repl
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
+
+fake_repl() do stdin_write, stdout_read, repl
+    backend = REPL.REPLBackend()
+    repltask = @async REPL.run_repl(repl; backend)
+    write(stdin_write,
+          "a = UInt8(81):UInt8(160); b = view(a, 1:64); c = reshape(b, (8, 8)); d = reinterpret(reshape, Float64, c); sqrteach(a) = [sqrt(x) for x in a]; sqrteach(d)\n\"ZZZZZ\"\n")
+    txt = readuntil(stdout_read, "ZZZZZ")
+    write(stdin_write, '\x04')
+    wait(repltask)
+    @test contains(txt, "Some type information was truncated. Use `show(err)` to see complete types.")
+end
+
+# Hints for tab completes
+
+fake_repl() do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+    write(stdin_write, "reada")
+    s1 = readuntil(stdout_read, "reada") # typed
+    s2 = readuntil(stdout_read, "vailable") # partial hint
+
+    write(stdin_write, "x") # "readax" doesn't tab complete so no hint
+    # we can't use readuntil given this doesn't print, so just wait for the hint state to be reset
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    write(stdin_write, "\b") # only tab complete while typing forward
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    write(stdin_write, "v")
+    s3 = readuntil(stdout_read, "ailable") # partial hint
+
+    write(stdin_write, "\t")
+    s4 = readuntil(stdout_read, "readavailable") # full completion is reprinted
+
+    write(stdin_write, "\x15")
+    write(stdin_write, "x") # single chars shouldn't hint e.g. `x` shouldn't hint at `xor`
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    # issue #52376
+    write(stdin_write, "\x15")
+    write(stdin_write, "\\_ailuj")
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+    s5 = readuntil(stdout_read, "\\_ailuj")
+    write(stdin_write, "\t")
+    s6 = readuntil(stdout_read, "ₐᵢₗᵤⱼ")
+
+    write(stdin_write, "\x15\x04")
+    Base.wait(repltask)
+end
+## hints disabled
+fake_repl(options=REPL.Options(confirm_exit=false,hascolor=true,hint_tab_completes=false,style_input=false,auto_insert_closing_bracket=false)) do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+    write(stdin_write, "reada")
+    s1 = readuntil(stdout_read, "reada") # typed
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    write(stdin_write, "\x15\x04")
+    Base.wait(repltask)
+    @test !occursin("vailable", String(readavailable(stdout_read)))
+end
+
+# banner
+let io = IOBuffer()
+    @test REPL.banner(io) === nothing
+    seek(io, 0)
+    @test countlines(io) == 9
+    take!(io)
+    @test REPL.banner(io; short=true) === nothing
+    seek(io, 0)
+    @test countlines(io) == 2
+end
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(REPL)
+    @test_broken isempty(undoc)
+    @test undoc == [:AbstractREPL, :BasicREPL, :LineEditREPL, :StreamREPL]
+end
+
+struct A40735
+    str::String
+end
+
+# https://github.com/JuliaLang/julia/issues/40735
+@testset "Long printing" begin
+    previous = REPL.SHOW_MAXIMUM_BYTES
+    try
+        REPL.SHOW_MAXIMUM_BYTES = 1000
+        str = string(('a':'z')...)^50
+        @test length(str) > 1100
+        # For a raw string, we correctly get the standard abbreviated output
+        output = sprint(REPL.show_limited, MIME"text/plain"(), str; context=:limit => true)
+        hint = """call `show(stdout, MIME"text/plain"(), ans)` to print without truncation"""
+        suffix = "[printing stopped after displaying 1000 bytes; $hint]"
+        @test !endswith(output, suffix)
+        @test contains(output, "bytes ⋯")
+        # For a struct without a custom `show` method, we don't hit the abbreviated
+        # 3-arg show on the inner string, so here we check that the REPL print-limiting
+        # feature is correctly kicking in.
+        a = A40735(str)
+        output = sprint(REPL.show_limited, MIME"text/plain"(), a; context=:limit => true)
+        @test endswith(output, suffix)
+        @test length(output) <= 1200
+        # We also check some extreme cases
+        REPL.SHOW_MAXIMUM_BYTES = 1
+        output = sprint(REPL.show_limited, MIME"text/plain"(), 1)
+        @test output == "1"
+        output = sprint(REPL.show_limited, MIME"text/plain"(), 12)
+        @test output == "1…[printing stopped after displaying 1 byte; $hint]"
+        REPL.SHOW_MAXIMUM_BYTES = 0
+        output = sprint(REPL.show_limited, MIME"text/plain"(), 1)
+        @test output == "…[printing stopped after displaying 0 bytes; $hint]"
+        @test sprint(io -> show(REPL.LimitIO(io, 5), "abc")) == "\"abc\""
+        @test_throws REPL.LimitIOException(1) sprint(io -> show(REPL.LimitIO(io, 1), "abc"))
+
+        # displaying objects at the REPL sometimes needs access to displaysize, like Dict
+        @test displaysize(IOContext(REPL.LimitIO(stdout, 100), stdout)) == displaysize(stdout)
+    finally
+        REPL.SHOW_MAXIMUM_BYTES = previous
+    end
+end
+
+@testset "`displaysize` return type inference" begin
+    @test Tuple{Int, Int} === Base.infer_return_type(displaysize, Tuple{REPL.Terminals.UnixTerminal})
+end
+
+@testset "Dummy Pkg prompt" begin
+    # do this in an empty depot to test default for new users
+    withenv("JULIA_DEPOT_PATH" => mktempdir() * (Sys.iswindows() ? ";" : ":"), "JULIA_LOAD_PATH" => nothing) do
+        prompt = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no -e "using REPL; print(REPL.Pkg_promptf())"`)
+        @test prompt == "(@v$(VERSION.major).$(VERSION.minor)) pkg> "
+    end
+
+    # Issue 55850
+    tmp_55850 = mktempdir()
+    tmp_sym_link = joinpath(tmp_55850, "sym")
+    symlink(tmp_55850, tmp_sym_link; dir_target=true)
+    withenv("JULIA_DEPOT_PATH" => tmp_sym_link * (Sys.iswindows() ? ";" : ":"), "JULIA_LOAD_PATH" => nothing) do
+        prompt = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no -e "using REPL; print(REPL.projname(REPL.find_project_file()))"`)
+        @test prompt == "@v$(VERSION.major).$(VERSION.minor)"
+    end
+
+    get_prompt(proj::String) = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no $(proj) -e "using REPL; print(REPL.Pkg_promptf())"`)
+
+    @test get_prompt("--project=$(pkgdir(REPL))") == "(REPL) pkg> "
+
+    tdir = mkpath(joinpath(mktempdir(), "foo"))
+    @test get_prompt("--project=$tdir") == "(foo) pkg> "
+
+    proj_file = joinpath(tdir, "Project.toml")
+    touch(proj_file) # make a bad Project.toml
+    @test get_prompt("--project=$proj_file") == "(foo) pkg> "
+
+    write(proj_file, "name = \"Bar\"\n")
+    @test get_prompt("--project=$proj_file") == "(Bar) pkg> "
+end
+
+# Issue #58158 add alias for Char display in REPL
+@testset "REPL show_repl Char alias" begin
+    # Test character with a known emoji alias
+    output = sprint(REPL.show_repl, MIME("text/plain"), '😼'; context=(:color => true))
+    # Check for base info and the specific alias
+    @test occursin("'😼': Unicode U+1F63C (category So: Symbol, other)", output)
+    @test occursin(", input as ", output) # Check for the prefix text
+    @test occursin("\\:smirk_cat:<tab>", output) # Check for the alias text (may be colored)
+
+    # Test character with a known LaTeX alias
+    output = sprint(REPL.show_repl, MIME("text/plain"), 'α'; context=(:color => true))
+    # Check for base info and the specific alias
+    @test occursin("'α': Unicode U+03B1 (category Ll: Letter, lowercase)", output)
+    @test occursin(", input as ", output) # Check for the prefix text
+    @test occursin("\\alpha<tab>", output) # Check for the alias text (may be colored)
+
+    # Test character without an alias
+    output = sprint(REPL.show_repl, MIME("text/plain"), 'X'; context=(:color => true))
+    # Check for base info only
+    @test occursin("'X': ASCII/Unicode U+0058 (category Lu: Letter, uppercase)", output)
+    # Ensure alias part is *not* printed
+    @test !occursin(", input as ", output)
+
+    # Test another character without an alias (symbol)
+    output = sprint(REPL.show_repl, MIME("text/plain"), '+'; context=(:color => true))
+    @test occursin("'+': ASCII/Unicode U+002B (category Sm: Symbol, math)", output)
+    @test !occursin(", input as ", output)
+end
+
+# Test syntax highlighting in REPL input
+@testset "Syntax highlighting" begin
+    using StyledStrings
+    using REPL.StylingPasses
+
+    # Use withfaces to ensure consistent face definitions regardless of user config
+    StyledStrings.withfaces(:julia_keyword => StyledStrings.Face(foreground=:red),
+                            :julia_number => StyledStrings.Face(foreground=:blue)) do
+
+        # Test that julia_prompt has syntax highlighting passes
+        fake_repl(options = REPL.Options(confirm_exit=false, style_input=true, auto_insert_closing_bracket=false)) do stdin_write, stdout_read, repl
+            repl.interface = REPL.setup_interface(repl)
+            julia_prompt = repl.interface.modes[1]
+            shell_mode = repl.interface.modes[3]
+
+            # Julia prompt should have syntax highlighting passes
+            @test length(julia_prompt.styling_passes) == 2
+            @test any(p -> p isa StylingPasses.SyntaxHighlightPass, julia_prompt.styling_passes)
+            @test any(p -> p isa StylingPasses.EnclosingParenHighlightPass, julia_prompt.styling_passes)
+
+            # Shell mode should not have syntax highlighting passes
+            @test length(shell_mode.styling_passes) == 0
+
+            # Test that syntax highlighting is actually applied
+            repltask = @async begin
+                REPL.run_repl(repl)
+            end
+
+            # Test 1: Simple keyword highlighting
+            write(stdin_write, "function # SENTINEL1")
+            s = readuntil(stdout_read, "# SENTINEL1", keep=true)
+            # The keyword "function" should be styled (have escape code before it)
+            # Look for "function" that appears after the prompt, not just anywhere
+            # Extract just the input portion after "julia> "
+            input_part = split(s, "julia> ", keepempty=false)
+            if !isempty(input_part)
+                input_text = input_part[end]
+                # If syntax highlighting is working, "function" will have an escape code before it
+                # like \e[31mfunction or similar
+                @test occursin(r"\e\[[0-9;]*m.*function", input_text)
+            end
+            write(stdin_write, "\x03")  # Ctrl-C to cancel
+
+            # Test 2: Unicode identifiers with syntax highlighting
+            readuntil(stdout_read, "julia> ")
+            write(stdin_write, "function αβ(a, β) # SENTINEL2")
+            s = readuntil(stdout_read, "# SENTINEL2", keep=true)
+            # Should highlight "function" keyword even with unicode following
+            input_part = split(s, "julia> ", keepempty=false)
+            if !isempty(input_part)
+                input_text = input_part[end]
+                # Keyword should be styled
+                @test occursin(r"\e\[[0-9;]*m.*function", input_text)
+            end
+            # Unicode should be preserved (may have ANSI codes interleaved, so check separately)
+            @test occursin("α", s)
+            @test occursin("β", s)
+            @test occursin("(", s)
+            @test occursin(")", s)
+            write(stdin_write, "\x03")  # Ctrl-C to cancel
+
+            # Test 3: Multi-line input with syntax highlighting
+            readuntil(stdout_read, "julia> ")
+            write(stdin_write, "begin\n")
+            readuntil(stdout_read, "begin")
+            write(stdin_write, "    local test_var_for_highlighting = 42 # SENTINEL3\n")
+            s = readuntil(stdout_read, "# SENTINEL3", keep=true)
+            # Should contain highlighting - the "local" keyword should be styled
+            @test occursin(r"\e\[[0-9;]*m.*local", s)
+            write(stdin_write, "\x03")  # Ctrl-C to cancel before executing
+            # Don't execute to avoid polluting Main module
+
+            # Test 4: Bracket highlighting (paren matching)
+            readuntil(stdout_read, "julia> ")
+            write(stdin_write, "(1 + (2 * 3)) # SENTINEL4")
+            # Move cursor to be inside the inner parens: between 2 and *
+            # Current position is at end: (1 + (2 * 3)) # SENTINEL4|
+            # Move left to get to: (1 + (2| * 3)) # SENTINEL4
+            # We need to move past " # SENTINEL4" which is 13 characters
+            for _ in 1:18  # 13 for " # SENTINEL4" + 5 to get between 2 and *
+                write(stdin_write, "\e[D")  # Left arrow
+            end
+            # Give it a moment to process and re-render
+            sleep(0.1)
+            # Now write a space to trigger re-render and capture output
+            write(stdin_write, " ")
+            s = readuntil(stdout_read, "# SENTINEL4", keep=true)
+            # The enclosing parens around "2 * 3" should be highlighted with bold/underline
+            # We can't easily test the exact positioning, but we can verify that
+            # there are ANSI codes for bold (\e[1m) or underline (\e[4m) present
+            @test occursin(r"\e\[[0-9;]*[14]m", s)  # Contains bold or underline codes
+            write(stdin_write, "\x03")  # Ctrl-C to cancel
+
+            write(stdin_write, '\x04')  # Exit
+            Base.wait(repltask)
+        end
+
+        # Test that syntax highlighting can be disabled
+        fake_repl(options = REPL.Options(confirm_exit=false, style_input=false, auto_insert_closing_bracket=false)) do stdin_write, stdout_read, repl
+            repl.interface = REPL.setup_interface(repl)
+
+            repltask = @async begin
+                REPL.run_repl(repl)
+            end
+
+            # Even though the prompt has styling passes, they shouldn't be applied
+            write(stdin_write, "function # SENTINEL5")
+            s = readuntil(stdout_read, "# SENTINEL5", keep=true)
+            # With style_input=false, there should be no color codes from syntax highlighting
+            # (there may still be prompt color codes, but not within the input text)
+            lines = split(s, '\n')
+            # The last line should contain just "function" without color codes around it
+            @test occursin("function", s)
+
+            write(stdin_write, "\x03")  # Ctrl-C to cancel
+            write(stdin_write, '\x04')  # Exit
+            Base.wait(repltask)
+        end
+    end
+end
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index b2199e10bef55..a0366a4fd0547 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -4,143 +4,180 @@ using REPL.REPLCompletions
 using Test
 using Random
 using REPL
-    @testset "Check symbols previously not shown by REPL.doc_completions()" begin
+
+@testset "Check symbols previously not shown by REPL.doc_completions()" begin
     symbols = ["?","=","[]","[","]","{}","{","}",";","","'","&&","||","julia","Julia","new","@var_str"]
-        for i in symbols
-            @test i ∈ REPL.doc_completions(i, Main)
-        end
+    for i in symbols
+        @test i ∈ string.(REPL.doc_completions(i, Main))
     end
-let ex = quote
-    module CompletionFoo
-        using Random
-        import Test
-
-        mutable struct Test_y
-            yy
-        end
-        mutable struct Test_x
-            xx :: Test_y
-        end
-        type_test = Test_x(Test_y(1))
-        (::Test_y)() = "", ""
-        module CompletionFoo2
+end
 
-        end
-        const bar = 1
-        foo() = bar
-        macro foobar()
-            :()
-        end
-        macro barfoo(ex)
-            ex
-        end
-        macro error_expanding()
-            error("cannot expand @error_expanding")
-            :()
-        end
-        macro error_lowering_conditional(a)
-            if isa(a, Number)
-                return a
+let ex =
+    quote
+        module CompletionFoo
+            using Random
+            import Test
+            # make everything public, so that nothing gets hidden unintentionally from completions
+            public Test_y, Text_x, type_test, unicode_αΒγ, CompletionFoo2, bar,
+            foo, @foobar, @barfoo, @error_expanding,
+            @error_lowering_conditional, @error_throwing, NonStruct, x,
+            CustomDict, NoLengthDict, test, test1, test2, test3, test4, test5,
+            test6, test7, test8, test9, test10, test11, a, test!12, kwtest,
+            kwtest2, kwtest3, kwtest4, kwtest5, named, fmsoebelkv, array,
+            varfloat, tuple, test_y_array, test_dict, test_customdict,
+            @teststr_str, @tϵsτstρ_str, @testcmd_cmd, @tϵsτcmδ_cmd,
+            var"complicated symbol with spaces", WeirdNames, @ignoremacro
+
+            mutable struct Test_y
+                yy
             end
-            throw(AssertionError("Not a Number"))
-            :()
-        end
-        macro error_throwing()
-            return quote
-                error("@error_throwing throws an error")
+            mutable struct Test_x
+                xx :: Test_y
             end
-        end
-
-        primitive type NonStruct 8 end
-        Base.propertynames(::NonStruct) = (:a, :b, :c)
-        x = reinterpret(NonStruct, 0x00)
-
-        # Support non-Dict AbstractDicts, #19441
-        mutable struct CustomDict{K, V} <: AbstractDict{K, V}
-            mydict::Dict{K, V}
-        end
-
-        Base.keys(d::CustomDict) = collect(keys(d.mydict))
-        Base.length(d::CustomDict) = length(d.mydict)
-
-        test(x::T, y::T) where {T<:Real} = pass
-        test(x::Real, y::Real) = pass
-        test(x::AbstractArray{T}, y) where {T<:Real} = pass
-        test(args...) = pass
-
-        test1(x::Type{Float64}) = pass
-
-        test2(x::AbstractString) = pass
-        test2(x::Char) = pass
-        test2(x::Cmd) = pass
-
-        test3(x::AbstractArray{Int}, y::Int) = pass
-        test3(x::AbstractArray{Float64}, y::Float64) = pass
-
-        test4(x::AbstractString, y::AbstractString) = pass
-        test4(x::AbstractString, y::Regex) = pass
-
-        test5(x::Array{Bool,1}) = pass
-        test5(x::BitArray{1}) = pass
-        test5(x::Float64) = pass
-        const a=x->x
-        test6()=[a, a]
-        test7() = rand(Bool) ? 1 : 1.0
-        test8() = Any[1][1]
-        test9(x::Char) = pass
-        test9(x::Char, i::Int) = pass
-
-        test10(a, x::Int...) = pass
-        test10(a::Integer, b::Integer, c) = pass
-        test10(a, y::Bool...) = pass
-        test10(a, d::Integer, z::Signed...) = pass
-        test10(s::String...) = pass
-
-        test11(a::Integer, b, c) = pass
-        test11(u, v::Integer, w) = pass
-        test11(x::Int, y::Int, z) = pass
-        test11(_, _, s::String) = pass
+            type_test = Test_x(Test_y(1))
+            (::Test_y)() = "", ""
+            unicode_αβγ = Test_y(1)
 
-        test!12() = pass
+            Base.:(+)(x::Test_x, y::Test_y) = Test_x(Test_y(x.xx.yy + y.yy))
+            module CompletionFoo2
 
-        kwtest(; x=1, y=2, w...) = pass
-        kwtest2(a; x=1, y=2, w...) = pass
-        kwtest3(a::Number; length, len2, foobar, kwargs...) = pass
-        kwtest3(a::Real; another!kwarg, len2) = pass
-        kwtest3(a::Integer; namedarg, foobar, slurp...) = pass
-        kwtest4(a::AbstractString; _a1b, x23) = pass
-        kwtest4(a::String; _a1b, xαβγ) = pass
-        kwtest4(a::SubString; x23, _something) = pass
-        kwtest5(a::Int, b, x...; somekwarg, somekotherkwarg) = pass
-        kwtest5(a::Char, b; xyz) = pass
-
-        const named = (; len2=3)
-
-        array = [1, 1]
-        varfloat = 0.1
-
-        const tuple = (1, 2)
+            end
+            const bar = 1
+            foo() = bar
+            macro foobar()
+                :()
+            end
+            macro barfoo(ex)
+                ex
+            end
+            macro error_expanding()
+                error("cannot expand @error_expanding")
+                :()
+            end
+            macro error_lowering_conditional(a)
+                if isa(a, Number)
+                    return a
+                end
+                throw(AssertionError("Not a Number"))
+                :()
+            end
+            macro error_throwing()
+                return quote
+                    error("@error_throwing throws an error")
+                end
+            end
 
-        test_y_array=[(@__MODULE__).Test_y(rand()) for i in 1:10]
-        test_dict = Dict("abc"=>1, "abcd"=>10, :bar=>2, :bar2=>9, Base=>3,
-                         occursin=>4, `ls`=>5, 66=>7, 67=>8, ("q",3)=>11,
-                         "α"=>12, :α=>13)
-        test_customdict = CustomDict(test_dict)
+            primitive type NonStruct 8 end
+            Base.propertynames(::NonStruct) = (:a, :b, :c)
+            x = reinterpret(NonStruct, 0x00)
 
-        macro teststr_str(s) end
-        macro tϵsτstρ_str(s) end
-        macro testcmd_cmd(s) end
-        macro tϵsτcmδ_cmd(s) end
+            # Support non-Dict AbstractDicts, #19441
+            mutable struct CustomDict{K, V} <: AbstractDict{K, V}
+                mydict::Dict{K, V}
+            end
 
-        var"complicated symbol with spaces" = 5
+            Base.keys(d::CustomDict) = collect(keys(d.mydict))
+            Base.length(d::CustomDict) = length(d.mydict)
 
-        struct WeirdNames end
-        Base.propertynames(::WeirdNames) = (Symbol("oh no!"), Symbol("oh yes!"))
+            # Support AbstractDict with unknown length, #55931
+            struct NoLengthDict{K,V} <: AbstractDict{K,V}
+                dict::Dict{K,V}
+                NoLengthDict{K,V}() where {K,V} = new(Dict{K,V}())
+            end
+            Base.iterate(d::NoLengthDict, s...) = iterate(d.dict, s...)
+            Base.IteratorSize(::Type{<:NoLengthDict}) = Base.SizeUnknown()
+            Base.eltype(::Type{NoLengthDict{K,V}}) where {K,V} = Pair{K,V}
+            Base.setindex!(d::NoLengthDict, v, k) = d.dict[k] = v
+
+            test(x::T, y::T) where {T<:Real} = pass
+            test(x::Real, y::Real) = pass
+            test(x::AbstractArray{T}, y) where {T<:Real} = pass
+            test(args...) = pass
+
+            test1(x::Type{Float64}) = pass
+
+            test2(x::AbstractString) = pass
+            test2(x::Char) = pass
+            test2(x::Cmd) = pass
+
+            test3(x::AbstractArray{Int}, y::Int) = pass
+            test3(x::AbstractArray{Float64}, y::Float64) = pass
+
+            test4(x::AbstractString, y::AbstractString) = pass
+            test4(x::AbstractString, y::Regex) = pass
+
+            test5(x::Array{Bool,1}) = pass
+            test5(x::BitArray{1}) = pass
+            test5(x::Float64) = pass
+            const a=x->x
+            test6()=[a, a]
+            test7() = rand(Bool) ? 1 : 1.0
+            test8() = Any[1][1]
+            test9(x::Char) = pass
+            test9(x::Char, i::Int) = pass
+
+            test10(a, x::Int...) = pass
+            test10(a::Integer, b::Integer, c) = pass
+            test10(a, y::Bool...) = pass
+            test10(a, d::Integer, z::Signed...) = pass
+            test10(s::String...) = pass
+
+            test11(a::Integer, b, c) = pass
+            test11(u, v::Integer, w) = pass
+            test11(x::Int, y::Int, z) = pass
+            test11(_, _, s::String) = pass
+
+            test!12() = pass
+
+            kwtest(; x=1, y=2, w...) = pass
+            kwtest2(a; x=1, y=2, w...) = pass
+            kwtest3(a::Number; length, len2, foobar, kwargs...) = pass
+            kwtest3(a::Real; another!kwarg, len2) = pass
+            kwtest3(a::Integer; namedarg, foobar, slurp...) = pass
+            kwtest4(a::AbstractString; _a1b, x23) = pass
+            kwtest4(a::String; _a1b, xαβγ) = pass
+            kwtest4(a::SubString; x23, _something) = pass
+            kwtest5(a::Int, b, x...; somekwarg, somekotherkwarg) = pass
+            kwtest5(a::Char, b; xyz) = pass
+            kwtest6(f::Function, arg1; somekwarg) = pass
+
+            const named = (; len2=3)
+            const fmsoebelkv = (; len2=3)
+
+            array = [1, 1]
+            varfloat = 0.1
+
+            const tuple = (1, 2)
+
+            test_y_array=[(@__MODULE__).Test_y(rand()) for i in 1:10]
+            test_dict = Dict("abc"=>1, "abcd"=>10, :bar=>2, :bar2=>9, Base=>3,
+                            occursin=>4, `ls`=>5, 66=>7, 67=>8, ("q",3)=>11,
+                            "α"=>12, :α=>13)
+            test_customdict = CustomDict(test_dict)
+
+            macro teststr_str(s) end
+            macro tϵsτstρ_str(s) end
+            macro testcmd_cmd(s) end
+            macro tϵsτcmδ_cmd(s) end
+
+            var"complicated symbol with spaces" = 5
+
+            struct WeirdNames end
+            Base.propertynames(::WeirdNames) = (Symbol("oh no!"), Symbol("oh yes!"))
+
+            # https://github.com/JuliaLang/julia/issues/52551#issuecomment-1858543413
+            export exported_symbol
+            exported_symbol(::WeirdNames) = nothing
+
+            macro ignoremacro(e...)
+                nothing
+            end
 
         end # module CompletionFoo
         test_repl_comp_dict = CompletionFoo.test_dict
         test_repl_comp_customdict = CompletionFoo.test_customdict
         test_dict_ℂ = Dict(1=>2)
+        test_dict_no_length = CompletionFoo.NoLengthDict{Int,Int}()
     end
     ex.head = :toplevel
     Core.eval(Main, ex)
@@ -148,16 +185,27 @@ end
 
 function map_completion_text(completions)
     c, r, res = completions
-    return map(completion_text, c), r, res
+    return map(x -> named_completion(x).completion, c), r, res
+end
+
+function map_named_completion(completions)
+    c, r, res = completions
+    return map(named_completion, c), r, res
 end
 
 test_complete(s) = map_completion_text(@inferred(completions(s, lastindex(s))))
 test_scomplete(s) =  map_completion_text(@inferred(shell_completions(s, lastindex(s))))
-test_bslashcomplete(s) =  map_completion_text(@inferred(bslash_completions(s, lastindex(s)))[2])
-test_complete_context(s, m) =  map_completion_text(@inferred(completions(s,lastindex(s), m)))
-test_complete_foo(s) = test_complete_context(s, Main.CompletionFoo)
+# | is reserved in test_complete_pos
+test_complete_pos(s) = map_completion_text(@inferred(completions(replace(s, '|' => ""), findfirst('|', s)-1)))
+test_complete_context(s, m=@__MODULE__; shift::Bool=true) =
+    map_completion_text(@inferred(completions(s,lastindex(s), m, shift)))
+test_complete_context_pos(s, m=@__MODULE__; shift::Bool=true) =
+    map_completion_text(@inferred(completions(replace(s, '|' => ""), findfirst('|', s)-1, m, shift)))
+test_complete_foo(s; shift::Bool=true) = test_complete_context(s, Main.CompletionFoo; shift)
 test_complete_noshift(s) = map_completion_text(@inferred(completions(s, lastindex(s), Main, false)))
 
+test_bslashcomplete(s) =  map_named_completion(@inferred(bslash_completions(s, lastindex(s)))[2])
+
 test_methods_list(@nospecialize(f), tt) = map(x -> string(x.method), Base._methods_by_ftype(Base.signature_type(f, tt), 10, Base.get_world_counter()))
 
 
@@ -185,8 +233,6 @@ end
 let s = "using REP"
     c, r = test_complete_32377(s)
     @test count(isequal("REPL"), c) == 1
-    # issue #30234
-    @test !Base.isbindingresolved(M32377, :tanh)
     # check what happens if REPL is already imported
     M32377.eval(:(using REPL))
     c, r = test_complete_32377(s)
@@ -253,17 +299,23 @@ let s = "Main.CompletionFoo.type_test.x"
     @test s[r] == "x"
 end
 
-let s = "Main.CompletionFoo.bar.no_val_available"
+let s = "Main.CompletionFoo.unicode_αβγ.y"
     c, r = test_complete(s)
-    @test length(c)==0
+    @test "yy" in c
 end
 
-#cannot do dot completion on infix operator
-let s = "+."
+let s = "Main.CompletionFoo.bar.no_val_available"
     c, r = test_complete(s)
     @test length(c)==0
 end
 
+#cannot do dot completion on infix operator (get default completions)
+let s1 = "", s2 = "+."
+    c1, r1 = test_complete(s1)
+    c2, r2 = test_complete(s2)
+    @test length(c1)==length(c2)
+end
+
 # To complete on a variable of a type, the type T of the variable
 # must be a concrete type, hence Base.isstructtype(T) returns true,
 # for the completion to succeed. That why `xx :: Test_y` of `Test_x`.
@@ -313,18 +365,26 @@ end
 # inexistent completion inside a cmd
 @test_nocompletion("run(`lol")
 
+# issue 55856: copy(A').<TAB> errors in the REPL
+let
+    c, r = test_complete("copy(A').")
+    @test isempty(c)
+end
+
 # test latex symbol completions
 let s = "\\alpha"
     c, r = test_bslashcomplete(s)
-    @test c[1] == "α"
-    @test r == 1:length(s)
+    @test c[1].completion == "α"
+    @test c[1].name == "α"
+    @test r == 1:lastindex(s)
     @test length(c) == 1
 end
 
 # test latex symbol completions after unicode #9209
 let s = "α\\alpha"
     c, r = test_bslashcomplete(s)
-    @test c[1] == "α"
+    @test c[1].completion == "α"
+    @test c[1].name == "α"
     @test r == 3:sizeof(s)
     @test length(c) == 1
 end
@@ -332,20 +392,25 @@ end
 # test emoji symbol completions
 let s = "\\:koala:"
     c, r = test_bslashcomplete(s)
-    @test c[1] == "🐨"
+    @test c[1].completion == "🐨"
+    @test c[1].name == "🐨"
     @test r == 1:sizeof(s)
     @test length(c) == 1
 end
 
 let s = "\\:ko"
     c, r = test_bslashcomplete(s)
-    @test "\\:koala:" in c
+    ko = only(filter(c) do namedcompletion
+        namedcompletion.completion == "\\:koala:"
+    end)
+    @test ko.name == "🐨 \\:koala:"
 end
 
 # test emoji symbol completions after unicode #9209
 let s = "α\\:koala:"
     c, r = test_bslashcomplete(s)
-    @test c[1] == "🐨"
+    @test c[1].name == "🐨"
+    @test c[1].completion == "🐨"
     @test r == 3:sizeof(s)
     @test length(c) == 1
 end
@@ -413,13 +478,13 @@ let
     c, r, res = test_complete(s)
     @test !res
     @test all(m -> string(m) in c, methods(isnothing))
-    @test s[r] == s[1:end-1]
+    @test s[r] == s[2:end-1]
 
     s = "!!isnothing("
     c, r, res = test_complete(s)
     @test !res
     @test all(m -> string(m) in c, methods(isnothing))
-    @test s[r] == s[1:end-1]
+    @test s[r] == s[3:end-1]
 end
 
 # Test completion of methods with input concrete args and args where typeinference determine their type
@@ -495,7 +560,7 @@ end
 let s = "CompletionFoo.test3([1, 2] .+ CompletionFoo.varfloat,"
     c, r, res = test_complete(s)
     @test !res
-    @test_broken only(c) == first(test_methods_list(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64, Vararg}))
+    @test only(c) == first(test_methods_list(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64, Vararg}))
 end
 
 let s = "CompletionFoo.test3([1.,2.], 1.,"
@@ -566,7 +631,7 @@ end
 let s = "CompletionFoo.test3(@time([1, 2] .+ CompletionFoo.varfloat),"
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 2
+    @test length(c) == 1
 end
 
 # method completions with kwargs
@@ -634,7 +699,7 @@ let s = "CompletionFoo.?([1,2,3], 2.0)"
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 1
-    @test occursin("test(x::AbstractArray{T}, y) where T<:Real", c[1])
+    @test occursin("test(x::AbstractArray{T}, y) where T<:Real", only(c))
     # In particular, this checks that test(args...) is not a valid completion
     # since it is strictly less specific than test(x::AbstractArray{T}, y)
 end
@@ -668,15 +733,15 @@ let s = "CompletionFoo.?(false, \"a\", 3, "
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 2
-    @test occursin("test(args...)", c[1])
-    @test occursin("test11(a::Integer, b, c)", c[2])
+    @test any(s->occursin("test(args...)", s), c)
+    @test any(s->occursin("test11(a::Integer, b, c)", s), c)
 end
 
 let s = "CompletionFoo.?(false, \"a\", 3, "
     c, r, res = test_complete_noshift(s)
     @test !res
     @test length(c) == 1
-    @test occursin("test11(a::Integer, b, c)", c[1])
+    @test occursin("test11(a::Integer, b, c)", only(c))
 end
 
 let s = "CompletionFoo.?(\"a\", 3, "
@@ -699,7 +764,7 @@ let s = "CompletionFoo.?()"
     c, r, res = test_complete_noshift(s)
     @test !res
     @test length(c) == 1
-    @test occursin("test10(s::String...)", c[1])
+    @test occursin("test10(s::String...)", only(c))
 end
 
 #= TODO: restrict the number of completions when a semicolon is present in ".?(" syntax
@@ -717,7 +782,7 @@ let s = "CompletionFoo.?(3; len2=5, "
     c, r, res = test_complete_noshift(s)
     @test !res
     @test length(c) == 1
-    @test occursin("kwtest3(a::Integer; namedarg, foobar, slurp...)", c[1])
+    @test occursin("kwtest3(a::Integer; namedarg, foobar, slurp...)", only(c))
     # the other two kwtest3 methods should not appear because of specificity
 end
 =#
@@ -732,6 +797,9 @@ end
 
 #TODO: @test_nocompletion("CompletionFoo.?(3; len2=5; ")
 
+# https://github.com/JuliaLang/julia/issues/52551
+@test !isempty(test_complete("?("))
+
 #################################################################
 
 # Test method completion with varargs
@@ -1004,10 +1072,43 @@ end
 let c, r, res
     c, r, res = test_scomplete("\$a")
     @test c == String[]
-    @test r === 0:-1
+    @test r === 1:0
     @test res === false
 end
 
+# A pair of utility function for the REPL completions test to test PATH_cache
+# dependent completions, which ordinarily happen asynchronously.
+# Only to be used from the test suite
+function test_only_arm_cache_refresh()
+    @lock REPL.REPLCompletions.PATH_cache_lock begin
+        @assert REPL.REPLCompletions.PATH_cache_condition === nothing
+
+        # Arm a condition we can wait on
+        REPL.REPLCompletions.PATH_cache_condition = Threads.Condition(REPL.REPLCompletions.PATH_cache_lock)
+
+        # Check if the previous update is still running - if so, wait for it to finish
+        while REPL.REPLCompletions.PATH_cache_task !== nothing
+            @assert !istaskdone(REPL.REPLCompletions.PATH_cache_task)
+            wait(REPL.REPLCompletions.PATH_cache_condition)
+        end
+
+        # force the next cache update to happen immediately
+        REPL.REPLCompletions.next_cache_update = 0
+    end
+    return nothing
+end
+
+function test_only_wait_cache_path_done()
+    @lock REPL.REPLCompletions.PATH_cache_lock begin
+        @assert REPL.REPLCompletions.PATH_cache_condition !== nothing
+
+        while REPL.REPLCompletions.next_cache_update == 0.
+            wait(REPL.REPLCompletions.PATH_cache_condition)
+        end
+        REPL.REPLCompletions.PATH_cache_condition = nothing
+    end
+end
+
 if Sys.isunix()
 let s, c, r
     #Assume that we can rely on the existence and accessibility of /tmp
@@ -1016,38 +1117,38 @@ let s, c, r
     # Issue #8047
     s = "@show \"/dev/nul"
     c,r = test_complete(s)
-    @test "null\"" in c
-    @test r == 13:15
-    @test s[r] == "nul"
+    @test "/dev/null\"" in c
+    @test r == 8:15
+    @test s[r] == "/dev/nul"
 
     # Tests path in Julia code and not closing " if it's a directory
     # Issue #8047
     s = "@show \"/tm"
     c,r = test_complete(s)
-    @test "tmp/" in c
-    @test r == 9:10
-    @test s[r] == "tm"
+    @test "/tmp/" in c
+    @test r == 8:10
+    @test s[r] == "/tm"
 
     # Tests path in Julia code and not double-closing "
     # Issue #8047
     s = "@show \"/dev/nul\""
     c,r = completions(s, 15)
-    c = map(completion_text, c)
-    @test "null" in c
-    @test r == 13:15
-    @test s[r] == "nul"
+    c = map(named_completion, c)
+    @test "/dev/null" in [_c.completion for _c in c]
+    @test r == 8:15
+    @test s[r] == "/dev/nul"
 
     s = "/t"
     c,r = test_scomplete(s)
-    @test "tmp/" in c
-    @test r == 2:2
-    @test s[r] == "t"
+    @test "/tmp/" in c
+    @test r == 1:2
+    @test s[r] == "/t"
 
     s = "/tmp"
     c,r = test_scomplete(s)
-    @test "tmp/" in c
-    @test r == 2:4
-    @test s[r] == "tmp"
+    @test "/tmp/" in c
+    @test r == 1:4
+    @test s[r] == "/tmp"
 
     # This should match things that are inside the tmp directory
     s = tempdir()
@@ -1057,8 +1158,8 @@ let s, c, r
     if !isdir(joinpath(s, "tmp"))
         c,r = test_scomplete(s)
         @test !("tmp/" in c)
-        @test r === length(s) + 1:0
-        @test s[r] == ""
+        @test !("$s/tmp/" in c)
+        @test r === 1:sizeof(s)
     end
 
     s = "cd \$(Iter"
@@ -1083,8 +1184,8 @@ let s, c, r
         touch(file)
         s = string(tempdir(), "/repl\\ ")
         c,r = test_scomplete(s)
-        @test ["repl\\ completions"] == c
-        @test s[r] == "repl\\ "
+        @test [Base.shell_escape_posixly(joinpath(tempdir(),  "repl completions"))] == c
+        @test s[r] == string(tempdir(), "/repl\\ ")
         rm(file)
     end
 
@@ -1096,12 +1197,19 @@ let s, c, r
             mkdir(dir)
             s = "\"" * path * "/tmpfoob"
             c,r = test_complete(s)
-            @test "tmpfoobar/" in c
-            l = 3 + length(path)
-            @test r == l:l+6
-            @test s[r] == "tmpfoob"
+            @test string(dir, "/") in c
+            @test r == 2:sizeof(s)
+            @test s[r] == joinpath(path, "tmpfoob")
+
+            # Homedir expansion inside Cmd string (#57624)
+            s = "`ls " * path * "/tmpfoob"
+            c,r = test_complete(s)
+            @test string(dir, "/") in c
+            @test r == 5:sizeof(s)
+            @test s[r] == joinpath(path, "tmpfoob")
+
             s = "\"~"
-            @test "tmpfoobar/" in c
+            @test joinpath(path, "tmpfoobar/") in c
             c,r = test_complete(s)
             s = "\"~user"
             c, r = test_complete(s)
@@ -1111,28 +1219,44 @@ let s, c, r
     end
 
     # Tests detecting of files in the env path (in shell mode)
-    let path, file
-        path = tempdir()
-        unreadable = joinpath(tempdir(), "replcompletion-unreadable")
+    mktempdir() do path
+        unreadable = joinpath(path, "replcompletion-unreadable")
+        file = joinpath(path, "tmp-executable")
+        touch(file)
+        chmod(file, 0o755)
+        mkdir(unreadable)
+        hidden_file = joinpath(unreadable, "hidden")
+        touch(hidden_file)
 
-        try
-            file = joinpath(path, "tmp-executable")
-            touch(file)
-            chmod(file, 0o755)
-            mkdir(unreadable)
-            chmod(unreadable, 0o000)
+        # Create symlink to a file that is in an unreadable directory
+        chmod(hidden_file, 0o755)
+        chmod(unreadable, 0o000)
+        symlink(hidden_file, joinpath(path, "replcompletions-link"))
 
+        try
             # PATH can also contain folders which we aren't actually allowed to read.
             withenv("PATH" => string(path, ":", unreadable)) do
                 s = "tmp-execu"
+                # Files reachable by PATH are cached async when PATH is seen to have been changed by `complete_path`
+                # so changes are unlikely to appear in the first complete. For testing purposes we can wait for
+                # caching to finish
+                test_only_arm_cache_refresh()
+                c,r = test_scomplete(s)
+                test_only_wait_cache_path_done()
                 c,r = test_scomplete(s)
                 @test "tmp-executable" in c
                 @test r == 1:9
                 @test s[r] == "tmp-execu"
+
+                c,r = test_scomplete("replcompletions-link")
+                if !Sys.isunix() || Libc.getuid() != 0
+                    # Root bypasses permissions
+                    @test isempty(c)
+                end
             end
         finally
-            rm(file)
-            rm(unreadable)
+            # If we don't fix the permissions here, our cleanup fails.
+            chmod(unreadable, 0o700)
         end
     end
 
@@ -1150,6 +1274,9 @@ let s, c, r
 
             withenv("PATH" => string(tempdir(), ":", dir)) do
                 s = string("repl-completio")
+                test_only_arm_cache_refresh()
+                c,r = test_scomplete(s)
+                test_only_wait_cache_path_done()
                 c,r = test_scomplete(s)
                 @test ["repl-completion"] == c
                 @test s[r] == "repl-completio"
@@ -1177,8 +1304,8 @@ let current_dir, forbidden
             catch e
                 e isa Base.IOError && occursin("ELOOP", e.msg)
             end
-            c, r = test_complete("\""*escape_string(joinpath(path, "selfsym")))
-            @test c == ["selfsymlink"]
+            c, r = test_complete("\"$(escape_string(path))/selfsym")
+            @test c == [escape_string(joinpath(path, "selfsymlink")) * "\""]
         end
     end
 
@@ -1214,23 +1341,23 @@ mktempdir() do path
         dir_space = replace(space_folder, " " => "\\ ")
         s = Sys.iswindows() ? "cd $dir_space\\\\space" : "cd $dir_space/space"
         c, r = test_scomplete(s)
-        @test s[r] == "space"
-        @test "space\\ .file" in c
+        @test s[r] == (Sys.iswindows() ? "$dir_space\\\\space" : "$dir_space/space")
+        @test "'$space_folder/space .file'" in c
         # Also use shell escape rules within cmd backticks
         s = "`$s"
         c, r = test_scomplete(s)
-        @test s[r] == "space"
-        @test "space\\ .file" in c
+        @test s[r] == (Sys.iswindows() ? "$dir_space\\\\space" : "$dir_space/space")
+        @test "'$space_folder/space .file'" in c
 
         # escape string according to Julia escaping rules
-        julia_esc(str) = escape_string(str, ('\"','$'))
+        julia_esc(str) = REPL.REPLCompletions.do_string_escape(str)
 
         # For normal strings the string should be properly escaped according to
         # the usual rules for Julia strings.
         s = "cd(\"" * julia_esc(joinpath(path, space_folder, "space"))
         c, r = test_complete(s)
-        @test s[r] == "space"
-        @test "space .file\"" in c
+        @test s[r] == julia_esc(joinpath(path, space_folder, "space"))
+        @test julia_esc(joinpath(path, space_folder, "space .file")) * "\"" in c
 
         # '$' is the only character which can appear in a windows filename and
         # which needs to be escaped in Julia strings (on unix we could do this
@@ -1239,23 +1366,23 @@ mktempdir() do path
         escpath = julia_esc(joinpath(path, space_folder, "needs_escape\$"))
         s = "cd(\"$escpath"
         c, r = test_complete(s)
-        @test s[r] == "needs_escape\\\$"
-        @test "needs_escape\\\$.file\"" in c
+        @test s[r] == julia_esc(joinpath(path, space_folder, "needs_escape\$"))
+        @test julia_esc(joinpath(path, space_folder, "needs_escape\$.file")) * "\"" in c
 
         if !Sys.iswindows()
             touch(joinpath(space_folder, "needs_escape2\n\".file"))
             escpath = julia_esc(joinpath(path, space_folder, "needs_escape2\n\""))
             s = "cd(\"$escpath"
             c, r = test_complete(s)
-            @test s[r] == "needs_escape2\\n\\\""
-            @test "needs_escape2\\n\\\".file\"" in c
+            @test s[r] == joinpath(path, space_folder, "needs_escape2\\n\\\"")
+            @test joinpath(path, space_folder, "needs_escape2\\n\\\".file\"") in c
 
             touch(joinpath(space_folder, "needs_escape3\\.file"))
             escpath = julia_esc(joinpath(path, space_folder, "needs_escape3\\"))
             s = "cd(\"$escpath"
             c, r = test_complete(s)
-            @test s[r] == "needs_escape3\\\\"
-            @test "needs_escape3\\\\.file\"" in c
+            @test s[r] == joinpath(path, space_folder, "needs_escape3\\\\")
+            @test joinpath(path, space_folder, "needs_escape3\\\\.file\"") in c
         end
 
         # Test for issue #10324
@@ -1269,16 +1396,17 @@ mktempdir() do path
             test_dir = "test$(c)test"
             mkdir(joinpath(path, test_dir))
             try
-                if !(c in ['\'','$']) # As these characters hold special meaning
+                # TODO: test on Windows when backslash-paths fixed
+                if !Sys.iswindows() && !(c in ['\'','$']) # As these characters hold special meaning
                     # in shell commands the shell path completion cannot complete
                     # paths with these characters
                     c, r, res = test_scomplete(test_dir)
-                    @test c[1] == test_dir*(Sys.iswindows() ? "\\\\" : "/")
+                    @test c[1] == "'$(joinpath(test_dir, ""))'"
                     @test res
                 end
                 escdir = julia_esc(test_dir)
                 c, r, res = test_complete("\""*escdir)
-                @test c[1] == escdir*(Sys.iswindows() ? "\\\\" : "/")
+                @test c[1] == julia_esc(joinpath(test_dir, ""))
                 @test res
             finally
                 rm(joinpath(path, test_dir), recursive=true)
@@ -1289,9 +1417,9 @@ mktempdir() do path
 end
 
 # Test tilde path completion
-let (c, r, res) = test_complete("\"~/julia")
+let (c, r, res) = test_complete("\"~/ka8w5rsz")
     if !Sys.iswindows()
-        @test res && c == String[homedir() * "/julia"]
+        @test res && c == String[homedir() * "/ka8w5rsz\""]
     else
         @test !res
     end
@@ -1299,12 +1427,76 @@ let (c, r, res) = test_complete("\"~/julia")
     c, r, res = test_complete("\"foo~bar")
     @test !res
 end
+if !Sys.iswindows()
+    # create a dir and file temporarily in the home directory
+    path = mkpath(joinpath(homedir(), "Zx6Wa0GkC0"))
+    touch(joinpath(path, "my_file"))
+    try
+        let (c, r, res) = test_complete("\"~/Zx6Wa0GkC")
+            @test res
+            @test c == String["~/Zx6Wa0GkC0/"]
+        end
+        let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0")
+            @test res
+            @test c == String[homedir() * "/Zx6Wa0GkC0"]
+        end
+        let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0/my_")
+            @test res
+            @test c == String["~/Zx6Wa0GkC0/my_file\""]
+        end
+        let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0/my_file")
+            @test res
+            @test c == String[homedir() * "/Zx6Wa0GkC0/my_file\""]
+        end
+    finally
+        rm(path, recursive=true)
+    end
+end
 
 # Test the completion returns nothing when the folder do not exist
 let (c, r) = test_complete("cd(\"folder_do_not_exist_77/file")
     @test length(c) == 0
 end
 
+# Test path completion in the middle of a line (issue #60050)
+mktempdir() do path
+    # Create test directory structure
+    foo_dir = joinpath(path, "foo_dir")
+    mkpath(foo_dir)
+    touch(joinpath(path, "foo_file.txt"))
+
+    # On Windows, use backslashes; on Unix, use forward slashes
+    sep = Sys.iswindows() ? "\\\\" : "/"
+    # On Windows, completion results have escaped backslashes
+    path_expected = Sys.iswindows() ? replace(path, "\\" => "\\\\") : path
+
+    # Completion at end of line should work
+    let (c, r, res) = test_complete("\"$(path)$(sep)foo")
+        @test res
+        @test length(c) == 2
+        @test "$(path_expected)$(sep)foo_dir$(sep)" in c
+        @test "$(path_expected)$(sep)foo_file.txt" in c
+    end
+
+    # Completion in middle of line should also work (regression in 1.12)
+    let (c, r, res) = test_complete_pos("\"$(path)$(sep)foo|$(sep)bar.toml\"")
+        @test res
+        @test length(c) == 2
+        @test "$(path_expected)$(sep)foo_dir$(sep)" in c
+        @test "$(path_expected)$(sep)foo_file.txt" in c
+        # Check that the range covers only the part before the cursor
+        @test findfirst("$(sep)bar", "\"$(path)$(sep)foo$(sep)bar.toml\"")[1] - 1 in r
+    end
+
+    # Completion in middle of function call with trailing arguments
+    let (c, r, res) = test_complete_pos("run_something(\"$(path)$(sep)foo|$(sep)bar.toml\"; kwarg=true)")
+        @test res
+        @test length(c) == 2
+        @test "$(path_expected)$(sep)foo_dir$(sep)" in c
+        @test "$(path_expected)$(sep)foo_file.txt" in c
+    end
+end
+
 if Sys.iswindows()
     tmp = tempname()
     touch(tmp)
@@ -1314,27 +1506,43 @@ if Sys.iswindows()
     cd(path) do
         s = "cd ..\\\\"
         c,r = test_scomplete(s)
-        @test r == length(s)+1:length(s)
-        @test temp_name * "\\\\" in c
+        @test r == lastindex(s)-3:lastindex(s)
+        @test "../$temp_name/" in c
+
+        s = "cd ../"
+        c,r = test_scomplete(s)
+        @test r == 4:6
+        @test "../$temp_name/" in c
 
         s = "ls $(file[1:2])"
         c,r = test_scomplete(s)
-        @test r == length(s)-1:length(s)
+        @test r == lastindex(s)-1:lastindex(s)
         @test file in c
 
         s = "cd(\"..\\\\"
         c,r = test_complete(s)
-        @test r == length(s)+1:length(s)
-        @test temp_name * "\\\\" in c
+        @test r == lastindex(s)-3:lastindex(s)
+        @test "..\\\\$temp_name\\\\" in c
+
+        s = "cd(\"../"
+        c,r = test_complete(s)
+        @test r == 5:7
+        @test "..\\\\$temp_name\\\\" in c
 
         s = "cd(\"$(file[1:2])"
         c,r = test_complete(s)
-        @test r == length(s) - 1:length(s)
+        @test r == lastindex(s) - 1:lastindex(s)
         @test (length(c) > 1 && file in c) || (["$file\""] == c)
     end
     rm(tmp)
 end
 
+# issue 51985
+let s = "`\\"
+    c,r = test_scomplete(s)
+    @test r == lastindex(s)+1:lastindex(s)
+end
+
 # auto completions of true and false... issue #14101
 let s = "tru"
     c, r, res = test_complete(s)
@@ -1356,7 +1564,9 @@ end
     @test "⁽¹²³⁾ⁿ" in test_complete("\\^(123)n")[1]
     @test "ⁿ" in test_complete("\\^n")[1]
     @test "ᵞ" in test_complete("\\^gamma")[1]
-    @test isempty(test_complete("\\^(123)nq")[1])
+    @test "⁽¹²³⁾ⁿ𐞥" in test_complete("\\^(123)nq")[1]
+    @test "⁽¹²³⁾ⁿꟴ" in test_complete("\\^(123)nQ")[1]
+    @test isempty(test_complete("\\^(123)nX")[1])
     @test "₍₁₂₃₎ₙ" in test_complete("\\_(123)n")[1]
     @test "ₙ" in test_complete("\\_n")[1]
     @test "ᵧ" in test_complete("\\_gamma")[1]
@@ -1374,10 +1584,10 @@ function test_dict_completion(dict_name)
     s = "$dict_name[ \"abcd"  # leading whitespace
     c, r = test_complete(s)
     @test c == Any["\"abcd\"]"]
-    s = "$dict_name[\"abcd]"  # trailing close bracket
+    s = "$dict_name[Bas]"  # trailing close bracket
     c, r = completions(s, lastindex(s) - 1)
-    c = map(completion_text, c)
-    @test c == Any["\"abcd\""]
+    c = map(x -> named_completion(x).completion, c)
+    @test c == Any["Base"]
     s = "$dict_name[:b"
     c, r = test_complete(s)
     @test c == Any[":bar", ":bar2"]
@@ -1429,8 +1639,16 @@ test_dict_completion("CompletionFoo.test_customdict")
 test_dict_completion("test_repl_comp_dict")
 test_dict_completion("test_repl_comp_customdict")
 
-# Issue #23004: this should not throw:
-@test REPLCompletions.dict_identifier_key("test_dict_ℂ[\\", :other) isa Tuple
+@testset "dict_identifier_key" begin
+    # Issue #23004: this should not throw:
+    let s = "test_dict_ℂ[\\"
+        @test REPLCompletions.completions(s, sizeof(s), Main.CompletionFoo) isa Tuple
+    end
+    # Issue #55931: neither should this:
+    let s = "test_dict_no_length["
+        @test REPLCompletions.completions(s, sizeof(s), Main.CompletionFoo) isa Tuple
+    end
+end
 
 @testset "completion of string/cmd macros (#22577)" begin
     c, r, res = test_complete("ra")
@@ -1443,28 +1661,38 @@ test_dict_completion("test_repl_comp_customdict")
     @test "testcmd`" in c
     c, r, res = test_complete("CompletionFoo.tϵsτc")
     @test "tϵsτcmδ`" in c
+
+    # Issue #56071: don't complete string and command macros when the input matches the internal name like `r_` to `r"`
+    c, r, res = test_complete("CompletionFoo.teststr_")
+    @test isempty(c)
+    c, r, res = test_complete("CompletionFoo.teststr_s")
+    @test isempty(c)
+    c, r, res = test_complete("CompletionFoo.testcmd_")
+    @test isempty(c)
+    c, r, res = test_complete("CompletionFoo.testcmd_c")
+    @test isempty(c)
 end
 
 @testset "Keyword-argument completion" begin
     c, r = test_complete("CompletionFoo.kwtest3(a;foob")
     @test c == ["foobar="]
     c, r = test_complete("CompletionFoo.kwtest3(a; le")
-    @test "length" ∈ c # provide this kind of completion in case the user wants to splat a variable
+    @test "length" ∉ c
     @test "length=" ∈ c
     @test "len2=" ∈ c
     @test "len2" ∉ c
     c, r = test_complete("CompletionFoo.kwtest3.(a;\nlength")
-    @test "length" ∈ c
+    @test "length" ∉ c
     @test "length=" ∈ c
     c, r = test_complete("CompletionFoo.kwtest3(a, length=4, l")
     @test "length" ∈ c
     @test "length=" ∉ c # since it was already used, do not suggest it again
     @test "len2=" ∈ c
     c, r = test_complete("CompletionFoo.kwtest3(a; kwargs..., fo")
-    @test "foreach" ∈ c # provide this kind of completion in case the user wants to splat a variable
+    @test "foreach" ∉ c
     @test "foobar=" ∈ c
     c, r = test_complete("CompletionFoo.kwtest3(a; another!kwarg=0, le")
-    @test "length" ∈ c
+    @test "length" ∉ c
     @test "length=" ∈ c # the first method could be called and `anotherkwarg` slurped
     @test "len2=" ∈ c
     c, r = test_complete("CompletionFoo.kwtest3(a; another!")
@@ -1478,7 +1706,7 @@ end
     c, r = test_complete_foo("kwtest3(blabla; unknown=4, namedar")
     @test c == ["namedarg="]
     c, r = test_complete_foo("kwtest3(blabla; named")
-    @test "named" ∈ c
+    @test "named" ∉ c
     @test "namedarg=" ∈ c
     @test "len2" ∉ c
     c, r = test_complete_foo("kwtest3(blabla; named.")
@@ -1486,11 +1714,11 @@ end
     c, r = test_complete_foo("kwtest3(blabla; named..., another!")
     @test c == ["another!kwarg="]
     c, r = test_complete_foo("kwtest3(blabla; named..., len")
-    @test "length" ∈ c
+    @test "length" ∉ c
     @test "length=" ∈ c
     @test "len2=" ∈ c
     c, r = test_complete_foo("kwtest3(1+3im; named")
-    @test "named" ∈ c
+    @test "named" ∉ c
     # TODO: @test "namedarg=" ∉ c
     @test "len2" ∉ c
     c, r = test_complete_foo("kwtest3(1+3im; named.")
@@ -1817,7 +2045,7 @@ function Base.getproperty(v::Issue36437, s::Symbol)
 end
 
 let s = "Issue36437(42)."
-    c, r, res = test_complete_context(s, @__MODULE__)
+    c, r, res = test_complete_context(s)
     @test res
     for n in ("a", "b", "c")
         @test n in c
@@ -1825,16 +2053,47 @@ let s = "Issue36437(42)."
 end
 
 let s = "Some(Issue36437(42)).value."
-    c, r, res = test_complete_context(s, @__MODULE__)
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+
+some_issue36437 = Some(Issue36437(42))
+
+let s = "some_issue36437.value."
+    c, r, res = test_complete_context(s)
     @test res
     for n in ("a", "b", "c")
         @test n in c
     end
 end
 
+# get completions for :toplevel/:tuple expressions
+let s = "some_issue36437.value.a, some_issue36437.value."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+let s = "@show some_issue36437.value.a; some_issue36437.value."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+# https://github.com/JuliaLang/julia/issues/51505
+let s = "()."
+    c, r, res = test_complete_context(s)
+    @test res
+end
+
 # aggressive concrete evaluation on mutable allocation in `repl_frame`
 let s = "Ref(Issue36437(42))[]."
-    c, r, res = test_complete_context(s, @__MODULE__)
+    c, r, res = test_complete_context(s)
     @test res
     for n in ("a", "b", "c")
         @test n in c
@@ -1842,14 +2101,132 @@ let s = "Ref(Issue36437(42))[]."
     @test "v" ∉ c
 end
 
+# concrete evaluation through `getindex`ing dictionary
+global_dict = Dict{Symbol, Any}(:r => r"foo")
+let s = "global_dict[:r]."
+    c, r, res = test_complete_context(s)
+    @test res
+    for fname in fieldnames(Regex)
+        @test String(fname) in c
+    end
+end
+global_dict_nested = Dict{Symbol, Any}(:g => global_dict)
+let s = "global_dict_nested[:g][:r]."
+    c, r, res = test_complete_context(s)
+    @test res
+    for fname in fieldnames(Regex)
+        @test String(fname) in c
+    end
+end
+
+# dict completions through nested `getindex`ing
+let s = "global_dict_nested["
+    c, r, res = test_complete_context(s)
+    @test res
+    @test ":g]" in c
+end
+let s = "global_dict_nested[:g]["
+    c, r, res = test_complete_context(s)
+    @test res
+    @test ":r]" in c
+end
+
 const global_xs = [Some(42)]
 let s = "pop!(global_xs)."
-    c, r, res = test_complete_context(s, @__MODULE__)
+    c, r, res = test_complete_context(s)
     @test res
     @test "value" in c
 end
 @test length(global_xs) == 1 # the completion above shouldn't evaluate `pop!` call
 
+# https://github.com/JuliaLang/julia/issues/51499
+# allow aggressive concrete evaluation for child uncached frames
+struct Issue51499CompletionDict
+    inner::Dict{Symbol,Any}
+    leaf_func # Function that gets invoked on leaf objects before being returned.
+    function Issue51499CompletionDict(inner::Dict, leaf_func=identity)
+        inner = Dict{Symbol,Any}(Symbol(k) => v for (k, v) in inner)
+        return new(inner, leaf_func)
+    end
+end
+function Base.getproperty(tcd::Issue51499CompletionDict, name::Symbol)
+    prop = getfield(tcd, :inner)[name]
+    isa(prop, Issue51499CompletionDict) && return prop
+    return getfield(tcd, :leaf_func)(prop)
+end
+Base.propertynames(tcd::Issue51499CompletionDict) = keys(getfield(tcd, :inner))
+
+const issue51499 = Ref{Any}(nothing)
+tcd3 = Issue51499CompletionDict(
+    Dict(:a => 1.0, :b => 2.0),
+    function (x)
+        issue51499[] = x
+        return sin(x)
+    end)
+tcd2 = Issue51499CompletionDict(
+    Dict(:v => tcd3, :w => 1.0))
+tcd1 = Issue51499CompletionDict(
+    Dict(:x => tcd2, :y => 1.0))
+let (c, r, res) = test_complete_context("tcd1.")
+    @test res
+    @test "x" in c && "y" in c
+    @test isnothing(issue51499[])
+end
+let (c, r, res) = test_complete_context("tcd1.x.")
+    @test res
+    @test "v" in c && "w" in c
+    @test isnothing(issue51499[])
+end
+let (c, r, res) = test_complete_context("tcd1.x.v.")
+    @test res
+    @test "a" in c && "b" in c
+    @test isnothing(issue51499[])
+end
+@test tcd1.x.v.a == sin(1.0)
+@test issue51499[] == 1.0
+
+# aggressive constant propagation for mutable `Const`s
+mutable_const_prop = Dict{Symbol,Any}(:key => Any[Some(r"x")])
+getkeyelem(d) = d[:key][1]
+let (c, r, res) = test_complete_context("getkeyelem(mutable_const_prop).")
+    @test res
+    @test "value" in c
+end
+let (c, r, res) = test_complete_context("getkeyelem(mutable_const_prop).value.")
+    @test res
+    for name in fieldnames(Regex)
+        @test String(name) in c
+    end
+end
+
+# JuliaLang/julia/#51548
+# don't return wrong result due to mutable inconsistency
+function issue51548(T, a)
+    # if we fold `xs = getindex(T)` to `xs::Const(Vector{T}())`, then we may wrongly
+    # constant-fold `isempty(xs)::Const(true)` and return wrong result
+    xs = T[]
+    if a isa T
+        push!(xs, a)
+    end
+    return Val(isempty(xs))
+end;
+let inferred = REPL.REPLCompletions.repl_eval_ex(
+        :(issue51548(Any, r"issue51548")), @__MODULE__; limit_aggressive_inference=true)
+    @test !isnothing(inferred)
+    RT = Core.Compiler.widenconst(inferred)
+    @test Val{false} <: RT
+end
+module TestLimitAggressiveInferenceGetProp
+global global_var = 1
+end
+function test_limit_aggressive_inference_getprop()
+    return getproperty(TestLimitAggressiveInferenceGetProp, :global_var)
+end
+let inferred = REPL.REPLCompletions.repl_eval_ex(
+        :(test_limit_aggressive_inference_getprop()), @__MODULE__; limit_aggressive_inference=true)
+    @test inferred == Core.Const(1)
+end
+
 # Test completion of var"" identifiers (#49280)
 let s = "var\"complicated "
     c, r = test_complete_foo(s)
@@ -1873,3 +2250,601 @@ let s = "`abc`.e"
     # (completions for the fields of `Cmd`)
     @test c == Any["env", "exec"]
 end
+
+# suppress false positive field completions (when `getproperty`/`propertynames` is overloaded)
+struct Issue51499_2
+    inner::Dict{Symbol,Any}
+end
+Base.getproperty(issue51499::Issue51499_2, name::Symbol) = getfield(issue51499, :inner)[name]
+Base.propertynames(issue51499::Issue51499_2) = keys(getfield(issue51499, :inner))
+const issue51499_2_1 = Issue51499_2(Dict(:a => nothing))
+const issue51499_2_2 = Issue51499_2(Dict(:b => nothing))
+let s = "(rand(Bool) ? issue51499_2_1 : issue51499_2_2)."
+    c, r, res = test_complete_context(s)
+    @test "inner" ∉ c
+end
+
+# Test completion for a case when type inference returned `Union` of the same types
+union_somes(a, b) = rand() < 0.5 ? Some(a) : Some(b)
+let s = "union_somes(1, 1.0)."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "value" in c
+end
+union_some_ref(a, b) = rand() < 0.5 ? Some(a) : Ref(b)
+let s = "union_some_ref(1, 1.0)."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "value" in c && "x" in c
+end
+
+Issue49892(x) = x
+let s = "Issue49892(fal"
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("false", "falses")
+        @test n in c
+    end
+end
+
+@testset "public but non-exported symbols only complete qualified (#51331)" begin
+    c, r, res = test_complete("ispub")
+    @test res
+    @test "ispublic" ∉ c
+
+    c, r, res = test_complete("Base.ispub")
+    @test res
+    @test "ispublic" ∈ c
+
+    @test Base.ispublic(Base, :ispublic)
+    # If this last test starts failing, that's okay, just pick a new example symbol:
+    @test !Base.isexported(Base, :ispublic)
+end
+
+# issue #51194
+for (s, compl) in (("2*CompletionFoo.fmsoe", "fmsoebelkv"),
+                   (":a isa CompletionFoo.test!1", "test!12"),
+                   ("-CompletionFoo.Test_y(3).", "yy"),
+                   ("99 ⨷⁻ᵨ⁷ CompletionFoo.type_test.", "xx"),
+                   ("CompletionFoo.type_test + CompletionFoo.Test_y(2).", "yy"),
+                   ("(CompletionFoo.type_test + CompletionFoo.Test_y(2)).", "xx"),
+                   ("CompletionFoo.type_test + CompletionFoo.unicode_αβγ.", "yy"),
+                   ("(CompletionFoo.type_test + CompletionFoo.unicode_αβγ).", "xx"),
+                   ("foo'CompletionFoo.test!1", "test!12"))
+    @testset let s=s, compl=compl
+        c, r = test_complete_noshift(s)
+        @test length(c) == 1
+        @test only(c) == compl
+    end
+end
+
+# allows symbol completion within incomplete :macrocall
+# https://github.com/JuliaLang/julia/issues/51827
+macro issue51827(args...)
+    length(args) ≥ 2 || error("@issue51827: incomplete arguments")
+    return args
+end
+let s = "@issue51827 Base.ac"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "acquire" in c
+end
+
+let t = REPLCompletions.repl_eval_ex(:(`a b`), @__MODULE__; limit_aggressive_inference=true)
+    @test t isa Core.Const
+    @test t.val == `a b`
+end
+
+# issue #51823
+@test "include" in test_complete_context("inc", Main)[1]
+
+# REPL completions should not try to concrete-evaluate !:noub methods
+function very_unsafe_method(i::Int)
+    xs = Any[]
+    @inbounds xs[i]
+end
+let t = REPLCompletions.repl_eval_ex(:(unsafe_method(42)), @__MODULE__)
+    @test isnothing(t)
+end
+
+# https://github.com/JuliaLang/julia/issues/52099
+const issue52099 = []
+let t = REPLCompletions.repl_eval_ex(:(Base.PersistentDict(issue52099 => 3)), @__MODULE__)
+    if t isa Core.Const
+        @test length(t.val) == 1
+    end
+end
+
+# test REPLInterpreter effects for `getindex(::Dict, key)`
+for (DictT, KeyT) = Any[(Dict{Symbol,Any}, Symbol),
+                        (Dict{Int,Any}, Int),
+                        (Dict{String,Any}, String)]
+    @testset let DictT=DictT, KeyT=KeyT
+        effects = Base.infer_effects(getindex, (DictT,KeyT); interp=REPL.REPLCompletions.REPLInterpreter())
+        @test Core.Compiler.is_effect_free(effects)
+        @test Core.Compiler.is_terminates(effects)
+        @test Core.Compiler.is_noub(effects)
+        effects = Base.infer_effects((DictT,KeyT); interp=REPL.REPLCompletions.REPLInterpreter()) do d, key
+            key in keys(d)
+        end
+        @test Core.Compiler.is_effect_free(effects)
+        @test Core.Compiler.is_terminates(effects)
+        @test Core.Compiler.is_noub(effects)
+    end
+end
+
+# test invalidation support
+replinterp_invalidation_callee(c::Bool=rand(Bool)) = Some(c ? r"foo" : r"bar")
+replinterp_invalidation_caller() = replinterp_invalidation_callee().value
+@test REPLCompletions.repl_eval_ex(:(replinterp_invalidation_caller()), @__MODULE__) == Regex
+replinterp_invalidation_callee(c::Bool=rand(Bool)) = Some(c ? "foo" : "bar")
+@test REPLCompletions.repl_eval_ex(:(replinterp_invalidation_caller()), @__MODULE__) == String
+
+# JuliaLang/julia#52922
+let s = "using Base.Th"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Threads" in c
+end
+let s = "using Base."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "BinaryPlatforms" in c
+end
+# JuliaLang/julia#53999
+let s = "using Base.Sort, Base.Th"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Threads" in c
+end
+# test cases with the `.` accessor
+module Issue52922
+module Inner1
+module Inner12 end
+end
+module Inner2 end
+end
+let s = "using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = " using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "@time using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = " @time using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "@time(using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "using .Issue52922.Inn"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Inner1" in c
+end
+let s = "using .Issue52922.Inner1."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Inner12" in c
+end
+let s = "using .Inner1.Inn"
+    c, r, res = test_complete_context(s, Issue52922)
+    @test res
+    @test "Inner12" in c
+end
+let s = "using ..Issue52922.Inn"
+    c, r, res = test_complete_context(s, Issue52922.Inner1)
+    @test res
+    @test "Inner2" in c
+end
+let s = "using ...Issue52922.Inn"
+    c, r, res = test_complete_context(s, Issue52922.Inner1.Inner12)
+    @test res
+    @test "Inner2" in c
+end
+
+struct Issue53126 end
+Base.propertynames(::Issue53126) = error("this should not be called")
+let s = "Issue53126()."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test isempty(c)
+end
+
+# complete explicitly `using`ed names
+baremodule TestExplicitUsing
+using Base: @assume_effects
+end # baremodule TestExplicitUsing
+let s = "@assu"
+    c, r, res = test_complete_context(s, TestExplicitUsing)
+    @test res
+    @test "@assume_effects" in c
+end
+let s = "TestExplicitUsing.@assu"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assume_effects" in c
+end
+baremodule TestExplicitUsingNegative end
+let s = "@assu"
+    c, r, res = test_complete_context(s, TestExplicitUsingNegative)
+    @test res
+    @test "@assume_effects" ∉ c
+end
+let s = "TestExplicitUsingNegative.@assu"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assume_effects" ∉ c
+end
+# should complete implicitly `using`ed names
+module TestImplicitUsing end
+let s = "@asse"
+    c, r, res = test_complete_context(s, TestImplicitUsing)
+    @test res
+    @test "@assert" in c
+end
+let s = "TestImplicitUsing.@asse"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assert" in c
+end
+
+# JuliaLang/julia#23374: completion for `import Mod.name`
+module Issue23374
+global v23374 = nothing
+global w23374 = missing
+end
+let s = "import .Issue23374.v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "v23374" in c
+end
+let s = "import Base.sin, .Issue23374.v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "v23374" in c
+end
+let s = "using .Issue23374.v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test isempty(c)
+end
+# JuliaLang/julia#23374: completion for `using Mod: name`
+let s = "using Base: @ass"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assume_effects" in c
+end
+let s = "using .Issue23374: v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "v23374" in c
+end
+let s = "using .Issue23374: v23374, w"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "w23374" in c
+end
+# completes `using ` to `using [list of available modules]`
+let s = "using "
+    c, r, res = test_complete_context(s)
+    @test res
+    @test !isempty(c)
+end
+
+baremodule _TestInternalBindingOnly
+export binding1, binding2
+global binding1 = global binding2 = nothing
+end
+baremodule TestInternalBindingOnly
+using .._TestInternalBindingOnly
+global binding = nothing
+export binding
+end
+for s = ("TestInternalBindingOnly.bind", "using .TestInternalBindingOnly: bind")
+    # when module is explicitly accessed, completion should show internal names only
+    let (c, r, res) = test_complete_context(s; shift=false)
+        @test res
+        @test "binding" ∈ c
+        @test "binding1" ∉ c && "binding2" ∉ c
+    end
+    # unless completion is forced via shift key
+    let (c, r, res) = test_complete_context(s, TestInternalBindingOnly)
+        @test res
+        @test "binding" ∈ c
+        @test "binding1" ∈ c && "binding2" ∈ c
+    end
+end
+# without explicit module access, completion should show all available names
+let (c, r, res) = test_complete_context("bind", TestInternalBindingOnly; shift=false)
+    @test res
+    @test "binding" ∈ c
+    @test "binding1" ∈ c && "binding2" ∈ c
+end
+let (c, r, res) = test_complete_context("si", Main; shift=false)
+    @test res
+    @test "sin" ∈ c
+end
+
+let (c, r, res) = test_complete_context("const xxx = Base.si", Main)
+    @test res
+    @test "sin" ∈ c
+end
+
+let (c, r, res) = test_complete_context("global xxx::Number = Base.", Main)
+    @test res
+    @test "pi" ∈ c
+end
+
+# #55842
+let (c, r) = test_complete_pos("@tim| using Date")
+    @test "@time" in c
+    @test r == 1:4
+end
+
+# #56389
+let s = "begin\n  using Ran"
+    c, r = test_complete(s)
+    @test "Random" in c
+    @test r == 15:17
+    @test s[r] == "Ran"
+end
+let s = "using .CompletionFoo: bar, type_"
+    c, r = test_complete(s)
+    @test "type_test" in c
+    @test r == 28:32
+    @test s[r] == "type_"
+end
+
+# #55518
+let s = "CompletionFoo.@barfoo nothi"
+    c, r = test_complete(s)
+    @test "nothing" in c
+    @test r == 23:27
+end
+let s = "CompletionFoo.@barfoo kwtest"
+    c, r = test_complete(s)
+    @test isempty(c)
+end
+let s = "CompletionFoo.kwtest(x=type"
+    c, r = test_complete(s)
+    @test "typeof" in c
+    @test !("type_test" in c)
+    @test r == 24:27
+end
+let s = "CompletionFoo.bar; nothi"
+    c, r = test_complete(s)
+    @test "nothing" in c
+    @test r == 20:24
+end
+let s = "CompletionFoo.bar; @ti"
+    c, r = test_complete(s)
+    @test "@time" in c
+    @test r == 20:22
+end
+let s = "x = sin.([1]); y = ex"
+    c, r = test_complete(s)
+    @test "exp" in c
+    @test r == 20:21
+end
+
+# #57611
+let s = "x = Base.BinaryPlatforms.ar"
+    c, r = test_complete(s)
+    @test "arch" in c
+    @test r == 26:27
+end
+
+# #55520
+let s = "@ignoremacro A .= A setup=(A=ident"
+    c, r = test_complete(s)
+    @test "identity"in c
+    @test r == 30:34
+end
+
+# #57307
+let s = "unicode_αβγ.yy = named.len"
+    c, r = test_complete_foo(s)
+    @test "len2" in c
+    @test r == 27:29
+end
+
+# #55429
+let s = "@time @eval CompletionFoo.Compl"
+    c, r = test_complete(s)
+    @test "CompletionFoo2" in c
+    @test r == 27:31
+end
+
+# #55420
+let s = "CompletionFoo.test(iden"
+    c, r = test_complete(s)
+    @test "identity" in c
+    @test r == 20:23
+end
+
+# #57772
+let s = "sum(!ismis"
+    c, r = test_complete(s)
+    @test "ismissing" in c
+    @test r == 6:10
+end
+let s = "sum(!!ismis"
+    c, r = test_complete(s)
+    @test "ismissing" in c
+    @test r == 7:11
+end
+
+# Don't trigger complete_methods! when the cursor is on the function name.
+let s = "prin|(\"hello\")"
+    c, r = test_complete_pos(s)
+    @test "print" in c
+    @test r == 1:4
+end
+
+# Don't crash when tab-completing paths that cause ispath() to throw
+let s = "include(\"" * repeat("a", 5000) # ENAMETOOLONG
+    c, r = test_complete(s)
+    @test isempty(c)
+end
+
+# JuliaLang/julia#57780
+const issue57780 = ["a", "b", "c"]
+const issue57780_orig = copy(issue57780)
+test_complete_context("empty!(issue57780).", Main)
+@test issue57780 == issue57780_orig
+
+function g54131 end
+for i in 1:498
+    @eval g54131(::Val{$i}) = i
+end
+g54131(::Val{499}; kwarg=true) = 499*kwarg
+struct F54131; end
+Base.getproperty(::F54131, ::Symbol) = Any[cos, sin, g54131][rand(1:3)]
+f54131 = F54131()
+@testset "performance of kwarg completion with large method tables" begin
+    # The goal here is to simply ensure we aren't hitting catestrophically bad
+    # behaviors when shift isn't pressed. The difference between good and bad
+    # is on the order of tens of milliseconds vs tens of seconds; using 1 sec as
+    # a very rough canary that is hopefully robust even in the noisy CI coalmines
+    s = "g54131(kwa"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, #= shift =# false)
+    @test REPLCompletions.KeywordArgumentCompletion("kwarg") in a
+    @test (@elapsed completions(s, lastindex(s), @__MODULE__, false)) < 1
+
+    s = "f54131.x("
+    a, b, c = completions(s, lastindex(s), @__MODULE__, false)
+    @test only(a) isa REPLCompletions.TextCompletion
+    @test (@elapsed completions(s, lastindex(s), @__MODULE__, false)) < 1
+
+    s = "f54131.x(kwa"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, false)
+    @test REPLCompletions.KeywordArgumentCompletion("kwarg") in a
+    @test (@elapsed completions(s, lastindex(s), @__MODULE__, false)) < 100
+end
+
+@kwdef struct T59244
+    asdf = 1
+    qwer = 2
+end
+@kwdef struct S59244{T}
+    asdf::T = 1
+    qwer::T = 2
+end
+@testset "kwarg completion of types" begin
+    s = "T59244(as"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, #= shift =# false)
+    @test REPLCompletions.KeywordArgumentCompletion("asdf") in a
+
+    s = "T59244(; qw"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, #= shift =# false)
+    @test REPLCompletions.KeywordArgumentCompletion("qwer") in a
+    @test REPLCompletions.KeywordArgumentCompletion("qwer") == only(a)
+
+    s = "S59244(as"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, #= shift =# false)
+    @test REPLCompletions.KeywordArgumentCompletion("asdf") in a
+
+    s = "S59244(; qw"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, #= shift =# false)
+    @test REPLCompletions.KeywordArgumentCompletion("qwer") in a
+    @test REPLCompletions.KeywordArgumentCompletion("qwer") == only(a)
+
+    s = "S59244{Int}(as"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, #= shift =# false)
+    @test REPLCompletions.KeywordArgumentCompletion("asdf") in a
+
+    s = "S59244{Int}(; qw"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, #= shift =# false)
+    @test REPLCompletions.KeywordArgumentCompletion("qwer") in a
+    @test REPLCompletions.KeywordArgumentCompletion("qwer") == only(a)
+
+    s = "S59244{Any}(as"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, #= shift =# false)
+    @test REPLCompletions.KeywordArgumentCompletion("asdf") in a
+
+    s = "S59244{Any}(; qw"
+    a, b, c = completions(s, lastindex(s), @__MODULE__, #= shift =# false)
+    @test REPLCompletions.KeywordArgumentCompletion("qwer") in a
+    @test REPLCompletions.KeywordArgumentCompletion("qwer") == only(a)
+end
+
+# Completion inside string interpolation
+let s = "\"example: \$varflo"
+    c, r = test_complete_foo(s)
+    @test "varfloat" in c
+    @test r == 12:17
+end
+
+let s = "\"example: \$(3 + findfir"
+    c, r = test_complete(s)
+    @test "findfirst" in c
+    @test r == 17:23
+end
+
+let s = "\"example: \$(named.len"
+    c, r = test_complete_foo(s)
+    @test "len2" in c
+    @test r == 19:21
+end
+
+# #58296 - complete positional arguments before semicolon
+let s = "string(findfi|; base=16)"
+    c, r = test_complete_pos(s)
+    @test "findfirst" in c
+    @test r == 8:13
+end
+
+# Unknown functions should not cause completions to fail
+let s = "foo58296(findfi"
+    c, r = test_complete(s)
+    @test "findfirst" in c
+    @test r == 10:15
+end
+
+# #58931 - only show local names when completing the empty string
+let s = ""
+    c, r = test_complete_foo(s)
+    @test "test" in c
+    @test !("rand" in c)
+end
+
+# #58309, #58832 - don't show every name when completing after a full keyword
+let s = "true"     # bool is a little different (Base.isidentifier special case)
+    c, r = test_complete(s)
+    @test "trues" in c
+    @test "true" in c
+    @test !("rand" in c)
+end
+
+let s = "for"
+    c, r = test_complete(s)
+    @test "for" in c
+    @test "foreach" in c
+    @test !("rand" in c)
+end
+
+# #58833 - Autocompletion of keyword arguments with do-blocks is broken
+let s = "kwtest6(123; som|) do x; x + 3 end"
+    c, r = test_complete_context_pos(s, Main.CompletionFoo)
+    @test "somekwarg=" in c
+    @test r == 14:16
+end
+
+# Test that `jl_resolve_definition_effects_in_ir` is called correctly and inference doesn't pass unexpected toplevel code
+let s = "(@ccall strlen(\"foo\"::Cstring)::Csize_t).|"
+    _, _, res  = test_complete_pos(s)
+    @test res
+end
diff --git a/stdlib/REPL/test/runtests.jl b/stdlib/REPL/test/runtests.jl
index e152677ccf7bb..2b842dd218f11 100644
--- a/stdlib/REPL/test/runtests.jl
+++ b/stdlib/REPL/test/runtests.jl
@@ -3,6 +3,10 @@
 # Make a copy of the original environment
 original_env = copy(ENV)
 
+module PrecompilationTests
+    include("precompilation.jl")
+end
+
 module REPLTests
     include("repl.jl")
 end
@@ -18,6 +22,12 @@ end
 module TerminalMenusTest
     include("TerminalMenus/runtests.jl")
 end
+module HistoryTest
+    include("history.jl")
+end
+module BadHistoryStartupTest
+    include("bad_history_startup.jl")
+end
 
 # Restore the original environment
 for k in keys(ENV)
diff --git a/stdlib/Random/Project.toml b/stdlib/Random/Project.toml
index f32fc3e2a4f84..5a9cc2dfc4cb7 100644
--- a/stdlib/Random/Project.toml
+++ b/stdlib/Random/Project.toml
@@ -1,5 +1,6 @@
 name = "Random"
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
 
 [deps]
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
@@ -9,7 +10,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [targets]
-test = ["Test", "SparseArrays", "LinearAlgebra", "Future", "Statistics"]
+test = ["Test", "SparseArrays", "LinearAlgebra", "Future"]
diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md
index e344e47947440..9ef86bb0d94f8 100644
--- a/stdlib/Random/docs/src/index.md
+++ b/stdlib/Random/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Random/docs/src/index.md"
+```
+
 # Random Numbers
 
 ```@meta
@@ -81,7 +85,7 @@ Random.MersenneTwister
 Random.RandomDevice
 ```
 
-## Hooking into the `Random` API
+## [Hooking into the `Random` API](@id rand-api-hook)
 
 There are two mostly orthogonal ways to extend `Random` functionalities:
 1) generating random values of custom types
@@ -90,7 +94,7 @@ There are two mostly orthogonal ways to extend `Random` functionalities:
 The API for 1) is quite functional, but is relatively recent so it may still have to evolve in subsequent releases of the `Random` module.
 For example, it's typically sufficient to implement one `rand` method in order to have all other usual methods work automatically.
 
-The API for 2) is still rudimentary, and may require more work than strictly necessary from the implementor,
+The API for 2) is still rudimentary, and may require more work than strictly necessary from the implementer,
 in order to support usual types of generated values.
 
 ### Generating random values of custom types
@@ -99,7 +103,7 @@ Generating random values for some distributions may involve various trade-offs.
 
 The `Random` module defines a customizable framework for obtaining random values that can address these issues. Each invocation of `rand` generates a *sampler* which can be customized with the above trade-offs in mind, by adding methods to `Sampler`, which in turn can dispatch on the random number generator, the object that characterizes the distribution, and a suggestion for the number of repetitions. Currently, for the latter, `Val{1}` (for a single sample) and `Val{Inf}` (for an arbitrary number) are used, with `Random.Repetition` an alias for both.
 
-The object returned by `Sampler` is then used to generate the random values. When implementing the random generation interface for a value `X` that can be sampled from, the implementor should define the method
+The object returned by `Sampler` is then used to generate the random values. When implementing the random generation interface for a value `X` that can be sampled from, the implementer should define the method
 
 ```julia
 rand(rng, sampler)
@@ -126,8 +130,8 @@ Random.SamplerSimple
 Decoupling pre-computation from actually generating the values is part of the API, and is also available to the user. As an example, assume that `rand(rng, 1:20)` has to be called repeatedly in a loop: the way to take advantage of this decoupling is as follows:
 
 ```julia
-rng = MersenneTwister()
-sp = Random.Sampler(rng, 1:20) # or Random.Sampler(MersenneTwister, 1:20)
+rng = Xoshiro()
+sp = Random.Sampler(rng, 1:20) # or Random.Sampler(Xoshiro, 1:20)
 for x in X
     n = rand(rng, sp) # similar to n = rand(rng, 1:20)
     # use n
@@ -159,8 +163,8 @@ Scalar and array methods for `Die` now work as expected:
 julia> rand(Die)
 Die(5)
 
-julia> rand(MersenneTwister(0), Die)
-Die(11)
+julia> rand(Xoshiro(0), Die)
+Die(10)
 
 julia> rand(Die, 3)
 3-element Vector{Die}:
@@ -215,7 +219,7 @@ and that we *always* want to build an alias table, regardless of the number of v
 Random.eltype(::Type{<:DiscreteDistribution}) = Int
 
 function Random.Sampler(::Type{<:AbstractRNG}, distribution::DiscreteDistribution, ::Repetition)
-    SamplerSimple(disribution, make_alias_table(distribution.probabilities))
+    SamplerSimple(distribution, make_alias_table(distribution.probabilities))
 end
 ```
 should be defined to return a sampler with pre-computed data, then
@@ -346,8 +350,8 @@ DocTestSetup = nothing
 
 By using an RNG parameter initialized with a given seed, you can reproduce the same pseudorandom
 number sequence when running your program multiple times. However, a minor release of Julia (e.g.
-1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed, in
-particular if `MersenneTwister` is used. (Even if the sequence produced by a low-level function like
+1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed.
+(Even if the sequence produced by a low-level function like
 [`rand`](@ref) does not change, the output of higher-level functions like [`randsubseq`](@ref) may
 change due to algorithm updates.) Rationale: guaranteeing that pseudorandom streams never change
 prohibits many algorithmic improvements.
diff --git a/stdlib/Random/src/DSFMT.jl b/stdlib/Random/src/DSFMT.jl
index 4c5cb8c522667..bdb0059bc0f36 100644
--- a/stdlib/Random/src/DSFMT.jl
+++ b/stdlib/Random/src/DSFMT.jl
@@ -65,7 +65,8 @@ function dsfmt_init_gen_rand(s::DSFMT_state, seed::UInt32)
           s.val, seed)
 end
 
-function dsfmt_init_by_array(s::DSFMT_state, seed::Vector{UInt32})
+function dsfmt_init_by_array(s::DSFMT_state, seed::StridedVector{UInt32})
+    strides(seed) == (1,) || throw(ArgumentError("seed must have its stride equal to 1"))
     ccall((:dsfmt_init_by_array,:libdSFMT),
           Cvoid,
           (Ptr{Cvoid}, Ptr{UInt32}, Int32),
@@ -125,20 +126,25 @@ function mulxmod!(f::GF2X, m::GF2X, deg=degree(m))::GF2X
 end
 
 # cache for X^(2i) mod m
-const _squares = Dict{GF2X, Vector{GF2X}}()
+const _squares = Base.Lockable(Dict{GF2X, Vector{GF2X}}())
 
 # compute f^2 mod m
 function sqrmod!(f::GF2X, m::GF2X)::GF2X
     d = degree(m)-1
     0 <= degree(f) <= d || throw(DomainError("f must satisfy 0 <= degree(f) <= degree(m)-1"))
-    sqrs = get!(_squares, m) do
+    sqrs = @lock _squares get(_squares[], m, nothing)
+    if sqrs === nothing
         x2i = GF2X(1)
-        GF2X[copy(mulxmod!(mulxmod!(x2i, m, d+1), m, d+1)) for i=1:d]
+        sqrs = GF2X[copy(mulxmod!(mulxmod!(x2i, m, d+1), m, d+1)) for i=1:d]
+        @lock _squares get!(_squares[], m, sqrs)
     end
-    foldl(filter(i->coeff(f, i), 0:degree(f)); init=GF2X(0)) do g, i
-        i <= d÷2 ? # optimization for "simple" squares
-            setcoeff!(g, 2i) :
-            xor!(g, sqrs[i])
+    let sqrs = sqrs  # work around the closure capture boxing issue, #15276
+        function sqrmod_closure(g, i)
+            i <= d÷2 ? # optimization for "simple" squares
+                setcoeff!(g, 2i) :
+                xor!(g, sqrs[i])
+        end
+        foldl(sqrmod_closure, filter(Base.Fix1(coeff, f), 0:degree(f)); init=GF2X(0))
     end
 end
 
@@ -153,16 +159,10 @@ function powxmod(e::BigInt, m::GF2X)::GF2X
 end
 
 "Cached jump polynomials for `MersenneTwister`."
-const JumpPolys = Dict{BigInt,GF2X}()
+const JumpPolys = Base.Lockable(Dict{BigInt,GF2X}())
 
-const CharPoly_ref = Ref{GF2X}()
-# Ref because it can not be initialized at load time
-function CharPoly()
-    if !isassigned(CharPoly_ref)
-        CharPoly_ref[] = GF2X(Poly19937)
-    end
-    return CharPoly_ref[]
-end
+# OncePerProcess because it can not be initialized at load time
+const CharPoly = OncePerProcess{GF2X}(() -> GF2X(Poly19937))
 
 """
     calc_jump(steps::Integer)
@@ -174,12 +174,17 @@ less than the period (e.g. ``steps ≪ 2^19937-1``).
 function calc_jump(steps::Integer,
                    charpoly::GF2X=CharPoly())::GF2X
     steps < 0 && throw(DomainError("jump steps must be >= 0 (got $steps)"))
-    if isempty(JumpPolys)
-        JumpPolys[big(10)^20] = GF2X(JPOLY1e20)
+    poly = @lock JumpPolys begin
+        if isempty(JumpPolys[])
+            JumpPolys[][big(10)^20] = GF2X(JPOLY1e20)
+        end
+        get(JumpPolys[], steps, nothing)
     end
-    get!(JumpPolys, steps) do
-        powxmod(big(steps), charpoly)
+    if poly === nothing
+        poly = powxmod(big(steps), charpoly)
+        @lock JumpPolys get!(JumpPolys[], steps, poly)
     end
+    poly
 end
 
 
@@ -194,9 +199,11 @@ function dsfmt_jump(s::DSFMT_state, jp::GF2X)
     work = zeros(Int32, JN32)
     rwork = reinterpret(UInt64, work)
     dsfmt = Vector{UInt64}(undef, nval >> 1)
-    GC.@preserve dsfmt val begin
-        pdsfmt = Base.unsafe_convert(Ptr{Cvoid}, dsfmt)
-        pval = Base.unsafe_convert(Ptr{Cvoid}, val)
+    dsfmtref = Base.cconvert(Ptr{Cvoid}, dsfmt)
+    valref = Base.cconvert(Ptr{Cvoid}, val)
+    GC.@preserve dsfmtref valref begin
+        pdsfmt = Base.unsafe_convert(Ptr{Cvoid}, dsfmtref)
+        pval = Base.unsafe_convert(Ptr{Cvoid}, valref)
         Base.Libc.memcpy(pdsfmt, pval, (nval - 1) * sizeof(Int32))
     end
     dsfmt[end] = UInt64(N*2)
diff --git a/stdlib/Random/src/MersenneTwister.jl b/stdlib/Random/src/MersenneTwister.jl
new file mode 100644
index 0000000000000..8c2995c300adb
--- /dev/null
+++ b/stdlib/Random/src/MersenneTwister.jl
@@ -0,0 +1,642 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## MersenneTwister
+
+const MT_CACHE_F = 501 << 1 # number of Float64 in the cache
+const MT_CACHE_I = 501 << 4 # number of bytes in the UInt128 cache
+
+@assert dsfmt_get_min_array_size() <= MT_CACHE_F
+
+mutable struct MersenneTwister <: AbstractRNG
+    seed::NTuple{2, UInt128}
+    const state::DSFMT_state
+    const vals::Memory{Float64}
+    const ints::Vector{UInt128} # it's temporarily resized internally
+    idxF::Int
+    idxI::Int
+
+    # counters for show
+    adv::Int64          # state of advance at the DSFMT_state level
+    adv_jump::BigInt    # number of skipped Float64 values via randjump
+    adv_vals::Int64     # state of advance when vals is filled-up
+    adv_ints::Int64     # state of advance when ints is filled-up
+
+    global _MersenneTwister(::UndefInitializer) =
+        new((UInt128(0), UInt128(0)), DSFMT_state(),
+            Memory{Float64}(undef, MT_CACHE_F),
+            Vector{UInt128}(undef, MT_CACHE_I >> 4),
+            MT_CACHE_F, 0, 0, Base.GMP.ZERO, -1, -1)
+end
+
+"""
+    MersenneTwister(seed)
+    MersenneTwister()
+
+Create a `MersenneTwister` RNG object. Different RNG objects can have
+their own seeds, which may be useful for generating different streams
+of random numbers.
+The `seed` may be an integer, a string, or a vector of `UInt32` integers.
+If no seed is provided, a randomly generated one is created (using entropy from the system).
+See the [`seed!`](@ref) function for reseeding an already existing `MersenneTwister` object.
+
+!!! compat "Julia 1.11"
+    Passing a negative integer seed requires at least Julia 1.11.
+
+# Examples
+```jldoctest
+julia> rng = MersenneTwister(123);
+
+julia> x1 = rand(rng, 2)
+2-element Vector{Float64}:
+ 0.37453777969575874
+ 0.8735343642013971
+
+julia> x2 = rand(MersenneTwister(123), 2)
+2-element Vector{Float64}:
+ 0.37453777969575874
+ 0.8735343642013971
+
+julia> x1 == x2
+true
+```
+"""
+MersenneTwister(seed=nothing) = seed!(_MersenneTwister(undef), seed)
+
+
+function copy!(dst::MersenneTwister, src::MersenneTwister)
+    dst.seed = src.seed
+    copy!(dst.state, src.state)
+    copyto!(dst.vals, src.vals)
+    copyto!(dst.ints, src.ints)
+    dst.idxF = src.idxF
+    dst.idxI = src.idxI
+    dst.adv = src.adv
+    dst.adv_jump = src.adv_jump
+    dst.adv_vals = src.adv_vals
+    dst.adv_ints = src.adv_ints
+    dst
+end
+
+copy(src::MersenneTwister) = copy!(_MersenneTwister(undef), src)
+
+==(r1::MersenneTwister, r2::MersenneTwister) =
+    r1.seed == r2.seed && r1.state == r2.state &&
+    isequal(r1.vals, r2.vals) &&
+    isequal(r1.ints, r2.ints) &&
+    r1.idxF == r2.idxF && r1.idxI == r2.idxI
+
+hash(r::MersenneTwister, h::UInt) =
+    foldr(hash, (r.seed, r.state, r.vals, r.ints, r.idxF, r.idxI); init=h)
+
+function show(io::IO, rng::MersenneTwister)
+    sep = ", "
+    # seed
+    print(io, MersenneTwister, "(", repr(rng.seed[1]), sep, repr(rng.seed[2]))
+    if rng.adv_jump == 0 && rng.adv == 0
+        return print(io, ")")
+    end
+    # state
+    print(io, sep, rng.adv_jump, sep, rng.adv)
+    if rng.adv_vals != -1 || rng.adv_ints != -1
+        # "(0, 0)" is nicer on the eyes than (-1, 1002)
+        s = rng.adv_vals != -1
+        print(io, sep, s ? rng.adv_vals : zero(rng.adv_vals),
+              sep, s ? rng.idxF : zero(rng.idxF))
+    end
+    if rng.adv_ints != -1
+        idxI = (length(rng.ints)*16 - rng.idxI) / 8 # 8 represents one Int64
+        idxI = Int(idxI) # idxI should always be an integer when using public APIs
+        print(io, sep, rng.adv_ints, sep, idxI)
+    end
+    print(io, ")")
+end
+
+### low level API
+
+function reset_caches!(r::MersenneTwister)
+    # zeroing the caches makes comparing two MersenneTwister RNGs easier
+    fill!(r.vals, 0.0)
+    fill!(r.ints, zero(UInt128))
+    mt_setempty!(r)
+    mt_setempty!(r, UInt128)
+    r.adv_vals = -1
+    r.adv_ints = -1
+    r
+end
+
+#### floats
+
+mt_avail(r::MersenneTwister) = MT_CACHE_F - r.idxF
+mt_empty(r::MersenneTwister) = r.idxF == MT_CACHE_F
+mt_setfull!(r::MersenneTwister) = r.idxF = 0
+mt_setempty!(r::MersenneTwister) = r.idxF = MT_CACHE_F
+mt_pop!(r::MersenneTwister) = @inbounds return r.vals[r.idxF+=1]
+
+@noinline function gen_rand(r::MersenneTwister)
+    r.adv_vals = r.adv
+    GC.@preserve r fill_array!(r, pointer(r.vals), length(r.vals), CloseOpen12())
+    mt_setfull!(r)
+end
+
+reserve_1(r::MersenneTwister) = (mt_empty(r) && gen_rand(r); nothing)
+# `reserve` allows one to call `rand_inbounds` n times
+# precondition: n <= MT_CACHE_F
+reserve(r::MersenneTwister, n::Int) = (mt_avail(r) < n && gen_rand(r); nothing)
+
+#### ints
+
+logsizeof(::Type{<:Union{Bool,Int8,UInt8}}) = 0
+logsizeof(::Type{<:Union{Int16,UInt16}}) = 1
+logsizeof(::Type{<:Union{Int32,UInt32}}) = 2
+logsizeof(::Type{<:Union{Int64,UInt64}}) = 3
+logsizeof(::Type{<:Union{Int128,UInt128}}) = 4
+
+idxmask(::Type{<:Union{Bool,Int8,UInt8}}) = 15
+idxmask(::Type{<:Union{Int16,UInt16}}) = 7
+idxmask(::Type{<:Union{Int32,UInt32}}) = 3
+idxmask(::Type{<:Union{Int64,UInt64}}) = 1
+idxmask(::Type{<:Union{Int128,UInt128}}) = 0
+
+
+mt_avail(r::MersenneTwister, ::Type{T}) where {T<:BitInteger} =
+    r.idxI >> logsizeof(T)
+
+function mt_setfull!(r::MersenneTwister, ::Type{<:BitInteger})
+    r.adv_ints = r.adv
+    ints = r.ints
+
+    @assert length(ints) == 501
+    # dSFMT natively randomizes 52 out of 64 bits of each UInt64 words,
+    # i.e. 12 bits are missing;
+    # by generating 5 words == 5*52 == 260 bits, we can fully
+    # randomize 4 UInt64 = 256 bits; IOW, at the array level, we must
+    # randomize ceil(501*1.25) = 627 UInt128 words (with 2*52 bits each),
+    # which we then condense into fully randomized 501 UInt128 words
+
+    len = 501 + 126 # 126 == ceil(501 / 4)
+    resize!(ints, len)
+    p = pointer(ints) # must be *after* resize!
+    GC.@preserve r fill_array!(r, Ptr{Float64}(p), len*2, CloseOpen12_64())
+
+    k = 501
+    n = 0
+    @inbounds while n != 500
+        u = ints[k+=1]
+        ints[n+=1] ⊻= u << 48
+        ints[n+=1] ⊻= u << 36
+        ints[n+=1] ⊻= u << 24
+        ints[n+=1] ⊻= u << 12
+    end
+    @assert k == len - 1
+    @inbounds ints[501] ⊻= ints[len] << 48
+    resize!(ints, 501)
+    r.idxI = MT_CACHE_I
+end
+
+mt_setempty!(r::MersenneTwister, ::Type{<:BitInteger}) = r.idxI = 0
+
+function reserve1(r::MersenneTwister, ::Type{T}) where T<:BitInteger
+    r.idxI < sizeof(T) && mt_setfull!(r, T)
+    nothing
+end
+
+function mt_pop!(r::MersenneTwister, ::Type{T}) where T<:BitInteger
+    reserve1(r, T)
+    r.idxI -= sizeof(T)
+    i = r.idxI
+    @inbounds x128 = r.ints[1 + i >> 4]
+    i128 = (i >> logsizeof(T)) & idxmask(T) # 0-based "indice" in x128
+    (x128 >> (i128 * (sizeof(T) << 3))) % T
+end
+
+function mt_pop!(r::MersenneTwister, ::Type{T}) where {T<:Union{Int128,UInt128}}
+    reserve1(r, T)
+    idx = r.idxI >> 4
+    r.idxI = idx << 4 - 16
+    @inbounds r.ints[idx] % T
+end
+
+
+#### seed!()
+
+function initstate!(r::MersenneTwister, seed)
+    r.seed = seed # store the seed for `show`
+    seedvec = view(r.ints, 1:2) # re-use r.ints to temporarily store the seed
+    seedvec .= seed
+    dsfmt_init_by_array(r.state, reinterpret(UInt32, seedvec))
+    reset_caches!(r)
+    r.adv = 0
+    r.adv_jump = Base.GMP.ZERO
+    return r
+end
+
+seed!(r::MersenneTwister, seeder::AbstractRNG) =
+    initstate!(r, rand(seeder, NTuple{2, UInt128}))
+
+
+### generation
+
+# MersenneTwister produces natively Float64
+rng_native_52(::MersenneTwister) = Float64
+
+#### helper functions
+
+# precondition: !mt_empty(r)
+rand_inbounds(r::MersenneTwister, ::CloseOpen12_64) = mt_pop!(r)
+rand_inbounds(r::MersenneTwister, ::CloseOpen01_64=CloseOpen01()) =
+    rand_inbounds(r, CloseOpen12()) - 1.0
+
+rand_inbounds(r::MersenneTwister, ::UInt52Raw{T}) where {T<:BitInteger} =
+    reinterpret(UInt64, rand_inbounds(r, CloseOpen12())) % T
+
+function rand(r::MersenneTwister, x::SamplerTrivial{UInt52Raw{UInt64}})
+    reserve_1(r)
+    rand_inbounds(r, x[])
+end
+
+function rand(r::MersenneTwister, ::SamplerTrivial{UInt2x52Raw{UInt128}})
+    reserve(r, 2)
+    rand_inbounds(r, UInt52Raw(UInt128)) << 64 | rand_inbounds(r, UInt52Raw(UInt128))
+end
+
+function rand(r::MersenneTwister, ::SamplerTrivial{UInt104Raw{UInt128}})
+    reserve(r, 2)
+    rand_inbounds(r, UInt52Raw(UInt128)) << 52 ⊻ rand_inbounds(r, UInt52Raw(UInt128))
+end
+
+#### floats
+
+rand(r::MersenneTwister, sp::SamplerTrivial{CloseOpen12_64}) =
+    (reserve_1(r); rand_inbounds(r, sp[]))
+
+#### integers
+
+rand(r::MersenneTwister, T::SamplerUnion(Int64, UInt64, Int128, UInt128)) =
+    mt_pop!(r, T[])
+
+rand(r::MersenneTwister, T::SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)) =
+    rand(r, UInt52Raw()) % T[]
+
+#### arrays of floats
+
+##### AbstractArray
+
+function rand!(r::MersenneTwister, A::AbstractArray{Float64},
+               I::SamplerTrivial{<:FloatInterval_64})
+    region = LinearIndices(A)
+    # what follows is equivalent to this simple loop but more efficient:
+    # for i=region
+    #     @inbounds A[i] = rand(r, I[])
+    # end
+    m = Base.checked_sub(first(region), 1)
+    n = last(region)
+    while m < n
+        s = mt_avail(r)
+        if s == 0
+            gen_rand(r)
+            s = mt_avail(r)
+        end
+        m2 = min(n, m+s)
+        for i=m+1:m2
+            @inbounds A[i] = rand_inbounds(r, I[])
+        end
+        m = m2
+    end
+    A
+end
+
+
+##### Array : internal functions
+
+# this is essentially equivalent to rand!(r, ::AbstractArray{Float64}, I) above, but due to
+# optimizations which can't be done currently when working with pointers, we have to re-order
+# manually the computation flow to get the performance
+# (see https://discourse.julialang.org/t/unsafe-store-sometimes-slower-than-arrays-setindex)
+function _rand_max383!(r::MersenneTwister, A::UnsafeView{Float64}, I::FloatInterval_64)
+    n = length(A)
+    @assert n <= dsfmt_get_min_array_size()+1 # == 383
+    mt_avail(r) == 0 && gen_rand(r)
+    # from now on, at most one call to gen_rand(r) will be necessary
+    m = min(n, mt_avail(r))
+    GC.@preserve r unsafe_copyto!(A.ptr, pointer(r.vals, r.idxF+1), m)
+    if m == n
+        r.idxF += m
+    else # m < n
+        gen_rand(r)
+        GC.@preserve r unsafe_copyto!(A.ptr+m*sizeof(Float64), pointer(r.vals), n-m)
+        r.idxF = n-m
+    end
+    if I isa CloseOpen01
+        for i=1:n
+            A[i] -= 1.0
+        end
+    end
+    A
+end
+
+function fill_array!(rng::MersenneTwister, A::Ptr{Float64}, n::Int, I)
+    rng.adv += n
+    fill_array!(rng.state, A, n, I)
+end
+
+fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::CloseOpen01_64) =
+    dsfmt_fill_array_close_open!(s, A, n)
+
+fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::CloseOpen12_64) =
+    dsfmt_fill_array_close1_open2!(s, A, n)
+
+
+function rand!(r::MersenneTwister, A::UnsafeView{Float64},
+               I::SamplerTrivial{<:FloatInterval_64})
+    # depending on the alignment of A, the data written by fill_array! may have
+    # to be left-shifted by up to 15 bytes (cf. unsafe_copyto! below) for
+    # reproducibility purposes;
+    # so, even for well aligned arrays, fill_array! is used to generate only
+    # the n-2 first values (or n-3 if n is odd), and the remaining values are
+    # generated by the scalar version of rand
+    n = length(A)
+    n2 = (n-2) ÷ 2 * 2
+    n2 < dsfmt_get_min_array_size() && return _rand_max383!(r, A, I[])
+
+    pA = A.ptr
+    align = Csize_t(pA) % 16
+    if align > 0
+        pA2 = pA + 16 - align
+        fill_array!(r, pA2, n2, I[]) # generate the data in-place, but shifted
+        unsafe_copyto!(pA, pA2, n2) # move the data to the beginning of the array
+    else
+        fill_array!(r, pA, n2, I[])
+    end
+    for i=n2+1:n
+        A[i] = rand(r, I[])
+    end
+    A
+end
+
+# fills up A reinterpreted as an array of Float64 with n64 values
+function _rand!(r::MersenneTwister, A::Array{T}, n64::Int, I::FloatInterval_64) where T
+    # n64 is the length in terms of `Float64` of the target
+    @assert sizeof(Float64)*n64 <= sizeof(T)*length(A) && isbitstype(T)
+    GC.@preserve A rand!(r, UnsafeView{Float64}(pointer(A), n64), SamplerTrivial(I))
+    A
+end
+
+##### Array: Float64, Float16, Float32
+
+rand!(r::MersenneTwister, A::Array{Float64}, I::SamplerTrivial{<:FloatInterval_64}) =
+    _rand!(r, A, length(A), I[])
+
+mask128(u::UInt128, ::Type{Float16}) =
+    (u & 0x03ff03ff03ff03ff03ff03ff03ff03ff) | 0x3c003c003c003c003c003c003c003c00
+
+mask128(u::UInt128, ::Type{Float32}) =
+    (u & 0x007fffff007fffff007fffff007fffff) | 0x3f8000003f8000003f8000003f800000
+
+for T in (Float16, Float32)
+    @eval function rand!(r::MersenneTwister, A::Array{$T}, ::SamplerTrivial{CloseOpen12{$T}})
+        n = length(A)
+        n128 = n * sizeof($T) ÷ 16
+        _rand!(r, A, 2*n128, CloseOpen12())
+        GC.@preserve A begin
+            A128 = UnsafeView{UInt128}(pointer(A), n128)
+            for i in 1:n128
+                u = A128[i]
+                u ⊻= u << 26
+                # at this point, the 64 low bits of u, "k" being the k-th bit of A128[i] and "+"
+                # the bit xor, are:
+                # [..., 58+32,..., 53+27, 52+26, ..., 33+7, 32+6, ..., 27+1, 26, ..., 1]
+                # the bits needing to be random are
+                # [1:10, 17:26, 33:42, 49:58] (for Float16)
+                # [1:23, 33:55] (for Float32)
+                # this is obviously satisfied on the 32 low bits side, and on the high side,
+                # the entropy comes from bits 33:52 of A128[i] and then from bits 27:32
+                # (which are discarded on the low side)
+                # this is similar for the 64 high bits of u
+                A128[i] = mask128(u, $T)
+            end
+        end
+        for i in 16*n128÷sizeof($T)+1:n
+            @inbounds A[i] = rand(r, $T) + one($T)
+        end
+        A
+    end
+
+    @eval function rand!(r::MersenneTwister, A::Array{$T}, ::SamplerTrivial{CloseOpen01{$T}})
+        rand!(r, A, CloseOpen12($T))
+        I32 = one(Float32)
+        for i in eachindex(A)
+            @inbounds A[i] = Float32(A[i])-I32 # faster than "A[i] -= one(T)" for T==Float16
+        end
+        A
+    end
+end
+
+#### arrays of integers
+
+function rand!(r::MersenneTwister, A::UnsafeView{UInt128}, ::SamplerType{UInt128})
+    n::Int=length(A)
+    i = n
+    while true
+        rand!(r, UnsafeView{Float64}(A.ptr, 2i), CloseOpen12())
+        n < 5 && break
+        i = 0
+        while n-i >= 5
+            u = A[i+=1]
+            A[n]    ⊻= u << 48
+            A[n-=1] ⊻= u << 36
+            A[n-=1] ⊻= u << 24
+            A[n-=1] ⊻= u << 12
+            n-=1
+        end
+    end
+    if n > 0
+        u = rand(r, UInt2x52Raw())
+        for i = 1:n
+            A[i] ⊻= u << (12*i)
+        end
+    end
+    A
+end
+
+for T in BitInteger_types
+    @eval function rand!(r::MersenneTwister, A::Array{$T}, sp::SamplerType{$T})
+        GC.@preserve A rand!(r, UnsafeView(pointer(A), length(A)), sp)
+        A
+    end
+
+    T == UInt128 && continue
+
+    @eval function rand!(r::MersenneTwister, A::UnsafeView{$T}, ::SamplerType{$T})
+        n = length(A)
+        n128 = n * sizeof($T) ÷ 16
+        rand!(r, UnsafeView{UInt128}(pointer(A), n128))
+        for i = 16*n128÷sizeof($T)+1:n
+            @inbounds A[i] = rand(r, $T)
+        end
+        A
+    end
+end
+
+
+#### arrays of Bool
+
+# similar to Array{UInt8}, but we need to mask the result so that only the LSB
+# in each byte can be non-zero
+
+function rand!(r::MersenneTwister, A1::Array{Bool}, sp::SamplerType{Bool})
+    n1 = length(A1)
+    n128 = n1 ÷ 16
+
+    if n128 == 0
+        bits = rand(r, UInt52Raw())
+    else
+        GC.@preserve A1 begin
+            A = UnsafeView{UInt128}(pointer(A1), n128)
+            rand!(r, UnsafeView{Float64}(A.ptr, 2*n128), CloseOpen12())
+            # without masking, non-zero bits could be observed in other
+            # positions than the LSB of each byte
+            mask = 0x01010101010101010101010101010101
+            # we need up to 15 bits of entropy in `bits` for the final loop,
+            # which we will extract from x = A[1] % UInt64;
+            # let y = x % UInt32; y contains 32 bits of entropy, but 4
+            # of them will be used for A[1] itself (the first of
+            # each byte). To compensate, we xor with (y >> 17),
+            # which gets the entropy from the second bit of each byte
+            # of the upper-half of y, and sets it in the first bit
+            # of each byte of the lower half; the first two bytes
+            # now contain 16 usable random bits
+            x = A[1] % UInt64
+            bits = x ⊻ x >> 17
+            for i = 1:n128
+                # << 5 to randomize the first bit of the 8th & 16th byte
+                # (i.e. we move bit 52 (resp. 52 + 64), which is unused,
+                # to position 57 (resp. 57 + 64))
+                A[i] = (A[i] ⊻ A[i] << 5) & mask
+            end
+        end
+    end
+    for i = 16*n128+1:n1
+        @inbounds A1[i] = bits % Bool
+        bits >>= 1
+    end
+    A1
+end
+
+
+### randjump
+
+# Old randjump methods are deprecated, the scalar version is in the Future module.
+
+function _randjump(r::MersenneTwister, jumppoly::DSFMT.GF2X)
+    adv = r.adv
+    adv_jump = r.adv_jump
+    s = _MersenneTwister(undef)
+    s.seed = r.seed
+    copy!(s.state, DSFMT.dsfmt_jump(r.state, jumppoly))
+    reset_caches!(s)
+    s.adv = adv
+    s.adv_jump = adv_jump
+    s
+end
+
+# NON-PUBLIC
+function jump(r::MersenneTwister, steps::Integer)
+    iseven(steps) || throw(DomainError(steps, "steps must be even"))
+    # steps >= 0 checked in calc_jump (`steps >> 1 < 0` if `steps < 0`)
+    j = _randjump(r, Random.DSFMT.calc_jump(steps >> 1))
+    j.adv_jump += steps
+    j
+end
+
+# NON-PUBLIC
+jump!(r::MersenneTwister, steps::Integer) = copy!(r, jump(r, steps))
+
+
+### constructors matching show (EXPERIMENTAL)
+
+# parameters in the tuples are:
+# 1: .adv_jump (jump steps)
+# 2: .adv (number of generated floats at the DSFMT_state level since seeding, besides jumps)
+# 3, 4: .adv_vals, .idxF (counters to reconstruct the float cache, optional if 5-6 not shown))
+# 5, 6: .adv_ints, .idxI (counters to reconstruct the integer cache, optional)
+
+MersenneTwister(s1::Integer, s2::Integer) = initstate!(_MersenneTwister(undef), (s1, s2))
+
+MersenneTwister(s1::Integer, s2::Integer, s3::Integer, s4::Integer,
+                s5::Integer, s6::Integer, s7::Integer, s8::Integer) =
+    advance!(MersenneTwister(s1, s2), s3, s4, s5, s6, s7, s8)
+
+MersenneTwister(s1::Integer, s2::Integer, s3::Integer, s4::Integer,
+                s5::Integer, s6::Integer) =
+    MersenneTwister(s1, s2, s3, s4, s5, s6, 0, 0)
+
+MersenneTwister(s1::Integer, s2::Integer, s3::Integer, s4::Integer) =
+    MersenneTwister(s1, s2, s3, s4, 0, 0, 0, 0)
+
+# advances raw state (per fill_array!) of r by n steps (Float64 values)
+function _advance_n!(r::MersenneTwister, n::Int64, work::Vector{Float64})
+    n == 0 && return
+    n < 0 && throw(DomainError(n, "can't advance $r to the specified state"))
+    ms = dsfmt_get_min_array_size() % Int64
+    @assert n >= ms
+    lw = ms + n % ms
+    resize!(work, lw)
+    GC.@preserve work fill_array!(r, pointer(work), lw, CloseOpen12())
+    c::Int64 = lw
+    GC.@preserve work while n > c
+        fill_array!(r, pointer(work), ms, CloseOpen12())
+        c += ms
+    end
+    @assert n == c
+end
+
+function _advance_to!(r::MersenneTwister, adv::Int64, work)
+    _advance_n!(r, adv - r.adv, work)
+    @assert r.adv == adv
+end
+
+function _advance_F!(r::MersenneTwister, adv_vals, idxF, work)
+    _advance_to!(r, adv_vals, work)
+    gen_rand(r)
+    @assert r.adv_vals == adv_vals
+    r.idxF = idxF
+end
+
+function _advance_I!(r::MersenneTwister, adv_ints, idxI, work)
+    _advance_to!(r, adv_ints, work)
+    mt_setfull!(r, Int) # sets r.adv_ints
+    @assert r.adv_ints == adv_ints
+    r.idxI = 16*length(r.ints) - 8*idxI
+end
+
+function advance!(r::MersenneTwister, adv_jump, adv, adv_vals, idxF, adv_ints, idxI)
+    adv_jump = BigInt(adv_jump)
+    adv, adv_vals, adv_ints = Int64.((adv, adv_vals, adv_ints))
+    idxF, idxI = Int.((idxF, idxI))
+
+    ms = dsfmt_get_min_array_size() % Int
+    work = sizehint!(Vector{Float64}(), 2ms)
+
+    adv_jump != 0 && jump!(r, adv_jump)
+    advF = (adv_vals, idxF) != (0, 0)
+    advI = (adv_ints, idxI) != (0, 0)
+
+    if advI && advF
+        @assert adv_vals != adv_ints
+        if adv_vals < adv_ints
+            _advance_F!(r, adv_vals, idxF, work)
+            _advance_I!(r, adv_ints, idxI, work)
+        else
+            _advance_I!(r, adv_ints, idxI, work)
+            _advance_F!(r, adv_vals, idxF, work)
+        end
+    elseif advF
+        _advance_F!(r, adv_vals, idxF, work)
+    elseif advI
+        _advance_I!(r, adv_ints, idxI, work)
+    else
+        @assert adv == 0
+    end
+    _advance_to!(r, adv, work)
+    r
+end
diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl
index 292ae00d33628..bd3df8e54f194 100644
--- a/stdlib/Random/src/RNGs.jl
+++ b/stdlib/Random/src/RNGs.jl
@@ -1,8 +1,67 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-## RandomDevice
+## default RNG
+
+"""
+    Random.default_rng() -> rng
+
+Return the default global random number generator (RNG), which is used by `rand`-related functions when
+no explicit RNG is provided.
+
+When the `Random` module is loaded, the default RNG is _randomly_ seeded, via [`Random.seed!()`](@ref):
+this means that each time a new julia session is started, the first call to `rand()` produces a different
+result, unless `seed!(seed)` is called first.
+
+It is thread-safe: distinct threads can safely call `rand`-related functions on `default_rng()` concurrently,
+e.g. `rand(default_rng())`.
+
+!!! note
+    The type of the default RNG is an implementation detail. Across different versions of
+    Julia, you should not expect the default RNG to always have the same type, nor that it will
+    produce the same stream of random numbers for a given seed.
+
+!!! compat "Julia 1.3"
+    This function was introduced in Julia 1.3.
+"""
+@inline default_rng() = TaskLocalRNG()
+@inline default_rng(tid::Int) = TaskLocalRNG()
+
+# defined only for backward compatibility with pre-v1.3 code when `default_rng()` didn't exist;
+# `GLOBAL_RNG` was never really documented, but was appearing in the docstring of `rand`
+const GLOBAL_RNG = default_rng()
+
+# In v1.0, the GLOBAL_RNG was storing the seed which was used to initialize it; this seed was used to implement
+# the following feature of `@testset`:
+# > Before the execution of the body of a `@testset`, there is an implicit
+# > call to `Random.seed!(seed)` where `seed` is the current seed of the global RNG.
+# But the global RNG is now `TaskLocalRNG()` and doesn't store its seed; in order to not break `@testset`,
+# in a call like `seed!(seed)` *without* an explicit RNG, we now store the state of `TaskLocalRNG()` in
+# `task_local_storage()`
+
+# GLOBAL_SEED is used as a fall-back when no tls seed is found
+# only `Random.__init__` is allowed to set it
+const GLOBAL_SEED = Xoshiro(0, 0, 0, 0, 0)
+
+get_tls_seed() = get!(() -> copy(GLOBAL_SEED), task_local_storage(),
+                      :__RANDOM_GLOBAL_RNG_SEED_uBlmfA8ZS__)::Xoshiro
+
+# seed the default RNG
+function seed!(seed=nothing)
+    seed!(default_rng(), seed)
+    copy!(get_tls_seed(), default_rng())
+    default_rng()
+end
+
+function __init__()
+    # do not call no-arg `seed!()` to not update `task_local_storage()` unnecessarily at startup
+    seed!(default_rng())
+    copy!(GLOBAL_SEED, TaskLocalRNG())
+    ccall(:jl_gc_init_finalizer_rng_state, Cvoid, ())
+end
 
 
+## RandomDevice
+
 """
     RandomDevice()
 
@@ -12,10 +71,17 @@ The entropy is obtained from the operating system.
 """
 struct RandomDevice <: AbstractRNG; end
 RandomDevice(seed::Nothing) = RandomDevice()
-seed!(rng::RandomDevice) = rng
+seed!(rng::RandomDevice, ::Nothing) = rng
 
 rand(rd::RandomDevice, sp::SamplerBoolBitInteger) = Libc.getrandom!(Ref{sp[]}())[]
 rand(rd::RandomDevice, ::SamplerType{Bool}) = rand(rd, UInt8) % Bool
+
+# specialization for homogeneous tuple types of builtin integers, to avoid
+# repeated system calls
+rand(rd::RandomDevice, sp::SamplerTag{Ref{Tuple{Vararg{T, N}}}, Tuple{S}}
+     ) where {T, N, S <: SamplerUnion(Base.BitInteger_types...)} =
+         Libc.getrandom!(Ref{gentype(sp)}())[]
+
 function rand!(rd::RandomDevice, A::Array{Bool}, ::SamplerType{Bool})
     Libc.getrandom!(A)
     # we need to mask the result so that only the LSB in each byte can be non-zero
@@ -36,772 +102,207 @@ end
 rng_native_52(::RandomDevice) = UInt64
 
 
-## MersenneTwister
-
-const MT_CACHE_F = 501 << 1 # number of Float64 in the cache
-const MT_CACHE_I = 501 << 4 # number of bytes in the UInt128 cache
-
-@assert dsfmt_get_min_array_size() <= MT_CACHE_F
-
-mutable struct MersenneTwister <: AbstractRNG
-    seed::Vector{UInt32}
-    state::DSFMT_state
-    vals::Vector{Float64}
-    ints::Vector{UInt128}
-    idxF::Int
-    idxI::Int
-
-    # counters for show
-    adv::Int64          # state of advance at the DSFMT_state level
-    adv_jump::BigInt    # number of skipped Float64 values via randjump
-    adv_vals::Int64     # state of advance when vals is filled-up
-    adv_ints::Int64     # state of advance when ints is filled-up
-
-    function MersenneTwister(seed, state, vals, ints, idxF, idxI,
-                             adv, adv_jump, adv_vals, adv_ints)
-        length(vals) == MT_CACHE_F && 0 <= idxF <= MT_CACHE_F ||
-            throw(DomainError((length(vals), idxF),
-                      "`length(vals)` and `idxF` must be consistent with $MT_CACHE_F"))
-        length(ints) == MT_CACHE_I >> 4 && 0 <= idxI <= MT_CACHE_I ||
-            throw(DomainError((length(ints), idxI),
-                      "`length(ints)` and `idxI` must be consistent with $MT_CACHE_I"))
-        new(seed, state, vals, ints, idxF, idxI,
-            adv, adv_jump, adv_vals, adv_ints)
-    end
-end
-
-MersenneTwister(seed::Vector{UInt32}, state::DSFMT_state) =
-    MersenneTwister(seed, state,
-                    Vector{Float64}(undef, MT_CACHE_F),
-                    Vector{UInt128}(undef, MT_CACHE_I >> 4),
-                    MT_CACHE_F, 0, 0, 0, -1, -1)
+## SeedHasher
 
 """
-    MersenneTwister(seed)
-    MersenneTwister()
+    Random.SeedHasher(seed=nothing)
 
-Create a `MersenneTwister` RNG object. Different RNG objects can have
-their own seeds, which may be useful for generating different streams
-of random numbers.
-The `seed` may be a non-negative integer or a vector of
-`UInt32` integers. If no seed is provided, a randomly generated one
-is created (using entropy from the system).
-See the [`seed!`](@ref) function for reseeding an already existing
-`MersenneTwister` object.
+Create a `Random.SeedHasher` RNG object, which generates random bytes with the help
+of a cryptographic hash function (SHA2), via calls to [`Random.hash_seed`](@ref).
 
+Given two seeds `s1` and `s2`, the random streams generated by
+`SeedHasher(s1)` and `SeedHasher(s2)` should be distinct if and only if
+`s1` and `s2` are distinct.
 
-# Examples
-```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> x1 = rand(rng, 2)
-2-element Vector{Float64}:
- 0.5908446386657102
- 0.7667970365022592
-
-julia> rng = MersenneTwister(1234);
-
-julia> x2 = rand(rng, 2)
-2-element Vector{Float64}:
- 0.5908446386657102
- 0.7667970365022592
+This RNG is used by default in `Random.seed!(::AbstractRNG, seed::Any)`, such that
+RNGs usually need only to implement `seed!(rng, ::AbstractRNG)`.
 
-julia> x1 == x2
-true
-```
+This is an internal type, subject to change.
 """
-MersenneTwister(seed=nothing) =
-    seed!(MersenneTwister(Vector{UInt32}(), DSFMT_state()), seed)
-
-
-function copy!(dst::MersenneTwister, src::MersenneTwister)
-    copyto!(resize!(dst.seed, length(src.seed)), src.seed)
-    copy!(dst.state, src.state)
-    copyto!(dst.vals, src.vals)
-    copyto!(dst.ints, src.ints)
-    dst.idxF = src.idxF
-    dst.idxI = src.idxI
-    dst.adv = src.adv
-    dst.adv_jump = src.adv_jump
-    dst.adv_vals = src.adv_vals
-    dst.adv_ints = src.adv_ints
-    dst
-end
-
-copy(src::MersenneTwister) =
-    MersenneTwister(copy(src.seed), copy(src.state), copy(src.vals), copy(src.ints),
-                    src.idxF, src.idxI, src.adv, src.adv_jump, src.adv_vals, src.adv_ints)
-
-
-==(r1::MersenneTwister, r2::MersenneTwister) =
-    r1.seed == r2.seed && r1.state == r2.state &&
-    isequal(r1.vals, r2.vals) &&
-    isequal(r1.ints, r2.ints) &&
-    r1.idxF == r2.idxF && r1.idxI == r2.idxI
-
-hash(r::MersenneTwister, h::UInt) =
-    foldr(hash, (r.seed, r.state, r.vals, r.ints, r.idxF, r.idxI); init=h)
-
-function show(io::IO, rng::MersenneTwister)
-    # seed
-    seed = from_seed(rng.seed)
-    seed_str = seed <= typemax(Int) ? string(seed) : "0x" * string(seed, base=16) # DWIM
-    if rng.adv_jump == 0 && rng.adv == 0
-        return print(io, MersenneTwister, "(", seed_str, ")")
-    end
-    print(io, MersenneTwister, "(", seed_str, ", (")
-    # state
-    adv = Integer[rng.adv_jump, rng.adv]
-    if rng.adv_vals != -1 || rng.adv_ints != -1
-        if rng.adv_vals == -1
-            @assert rng.idxF == MT_CACHE_F
-            push!(adv, 0, 0) # "(0, 0)" is nicer on the eyes than (-1, 1002)
-        else
-            push!(adv, rng.adv_vals, rng.idxF)
+mutable struct SeedHasher <: AbstractRNG
+    bytes::Vector{UInt8}
+    idx::Int
+    cnt::Int64
+
+    SeedHasher(seed=nothing) = seed!(new(), seed)
+end
+
+seed!(rng::SeedHasher, seeder::AbstractRNG) = seed!(rng, rand(seeder, UInt64, 4))
+seed!(rng::SeedHasher, ::Nothing) = seed!(rng, RandomDevice())
+
+function seed!(rng::SeedHasher, seed)
+    # typically, no more than 256 bits will be needed, so use
+    # SHA2_256 because it's faster
+    ctx = SHA2_256_CTX()
+    hash_seed(seed, ctx)
+    rng.bytes = SHA.digest!(ctx)::Vector{UInt8}
+    rng.idx = 0
+    rng.cnt = 0
+    rng
+end
+
+@noinline function rehash!(rng::SeedHasher)
+    # more random bytes are necessary, from now on use SHA2_512 to generate
+    # more bytes at once
+    ctx = SHA2_512_CTX()
+    SHA.update!(ctx, rng.bytes)
+    # also hash the counter, just for the extremely unlikely case where the hash of
+    # rng.bytes is equal to rng.bytes (i.e. rng.bytes is a "fixed point"), or more generally
+    # if there is a small cycle
+    SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, rng.cnt += 1))
+    rng.bytes = SHA.digest!(ctx)
+    rng.idx = 0
+    rng
+end
+
+function rand(rng::SeedHasher, ::SamplerType{UInt8})
+    rng.idx < length(rng.bytes) || rehash!(rng)
+    rng.bytes[rng.idx += 1]
+end
+
+for TT = Base.BitInteger_types
+    TT === UInt8 && continue
+    @eval function rand(rng::SeedHasher, ::SamplerType{$TT})
+        xx = zero($TT)
+        for ii = 0:sizeof($TT)-1
+            xx |= (rand(rng, UInt8) % $TT) << (8 * ii)
         end
+        xx
     end
-    if rng.adv_ints != -1
-        idxI = (length(rng.ints)*16 - rng.idxI) / 8 # 8 represents one Int64
-        idxI = Int(idxI) # idxI should always be an integer when using public APIs
-        push!(adv, rng.adv_ints, idxI)
-    end
-    join(io, adv, ", ")
-    print(io, "))")
 end
 
-### low level API
-
-function reset_caches!(r::MersenneTwister)
-    # zeroing the caches makes comparing two MersenneTwister RNGs easier
-    fill!(r.vals, 0.0)
-    fill!(r.ints, zero(UInt128))
-    mt_setempty!(r)
-    mt_setempty!(r, UInt128)
-    r.adv_vals = -1
-    r.adv_ints = -1
-    r
-end
+rand(rng::SeedHasher, ::SamplerType{Bool}) = rand(rng, UInt8) % Bool
 
-#### floats
+rng_native_52(::SeedHasher) = UInt64
 
-mt_avail(r::MersenneTwister) = MT_CACHE_F - r.idxF
-mt_empty(r::MersenneTwister) = r.idxF == MT_CACHE_F
-mt_setfull!(r::MersenneTwister) = r.idxF = 0
-mt_setempty!(r::MersenneTwister) = r.idxF = MT_CACHE_F
-mt_pop!(r::MersenneTwister) = @inbounds return r.vals[r.idxF+=1]
-
-@noinline function gen_rand(r::MersenneTwister)
-    r.adv_vals = r.adv
-    GC.@preserve r fill_array!(r, pointer(r.vals), length(r.vals), CloseOpen12())
-    mt_setfull!(r)
-end
-
-reserve_1(r::MersenneTwister) = (mt_empty(r) && gen_rand(r); nothing)
-# `reserve` allows one to call `rand_inbounds` n times
-# precondition: n <= MT_CACHE_F
-reserve(r::MersenneTwister, n::Int) = (mt_avail(r) < n && gen_rand(r); nothing)
-
-#### ints
-
-logsizeof(::Type{<:Union{Bool,Int8,UInt8}}) = 0
-logsizeof(::Type{<:Union{Int16,UInt16}}) = 1
-logsizeof(::Type{<:Union{Int32,UInt32}}) = 2
-logsizeof(::Type{<:Union{Int64,UInt64}}) = 3
-logsizeof(::Type{<:Union{Int128,UInt128}}) = 4
-
-idxmask(::Type{<:Union{Bool,Int8,UInt8}}) = 15
-idxmask(::Type{<:Union{Int16,UInt16}}) = 7
-idxmask(::Type{<:Union{Int32,UInt32}}) = 3
-idxmask(::Type{<:Union{Int64,UInt64}}) = 1
-idxmask(::Type{<:Union{Int128,UInt128}}) = 0
-
-
-mt_avail(r::MersenneTwister, ::Type{T}) where {T<:BitInteger} =
-    r.idxI >> logsizeof(T)
-
-function mt_setfull!(r::MersenneTwister, ::Type{<:BitInteger})
-    r.adv_ints = r.adv
-    ints = r.ints
-
-    @assert length(ints) == 501
-    # dSFMT natively randomizes 52 out of 64 bits of each UInt64 words,
-    # i.e. 12 bits are missing;
-    # by generating 5 words == 5*52 == 260 bits, we can fully
-    # randomize 4 UInt64 = 256 bits; IOW, at the array level, we must
-    # randomize ceil(501*1.25) = 627 UInt128 words (with 2*52 bits each),
-    # which we then condense into fully randomized 501 UInt128 words
-
-    len = 501 + 126 # 126 == ceil(501 / 4)
-    resize!(ints, len)
-    p = pointer(ints) # must be *after* resize!
-    GC.@preserve r fill_array!(r, Ptr{Float64}(p), len*2, CloseOpen12_64())
-
-    k = 501
-    n = 0
-    @inbounds while n != 500
-        u = ints[k+=1]
-        ints[n+=1] ⊻= u << 48
-        ints[n+=1] ⊻= u << 36
-        ints[n+=1] ⊻= u << 24
-        ints[n+=1] ⊻= u << 12
-    end
-    @assert k == len - 1
-    @inbounds ints[501] ⊻= ints[len] << 48
-    resize!(ints, 501)
-    r.idxI = MT_CACHE_I
-end
-
-mt_setempty!(r::MersenneTwister, ::Type{<:BitInteger}) = r.idxI = 0
-
-function reserve1(r::MersenneTwister, ::Type{T}) where T<:BitInteger
-    r.idxI < sizeof(T) && mt_setfull!(r, T)
-    nothing
-end
-
-function mt_pop!(r::MersenneTwister, ::Type{T}) where T<:BitInteger
-    reserve1(r, T)
-    r.idxI -= sizeof(T)
-    i = r.idxI
-    @inbounds x128 = r.ints[1 + i >> 4]
-    i128 = (i >> logsizeof(T)) & idxmask(T) # 0-based "indice" in x128
-    (x128 >> (i128 * (sizeof(T) << 3))) % T
-end
-
-function mt_pop!(r::MersenneTwister, ::Type{T}) where {T<:Union{Int128,UInt128}}
-    reserve1(r, T)
-    idx = r.idxI >> 4
-    r.idxI = idx << 4 - 16
-    @inbounds r.ints[idx] % T
-end
-
-
-### seeding
-
-#### make_seed()
-
-# make_seed produces values of type Vector{UInt32}, suitable for MersenneTwister seeding
-function make_seed()
-    try
-        return rand(RandomDevice(), UInt32, 4)
-    catch ex
-        ex isa IOError || rethrow()
-        @warn "Entropy pool not available to seed RNG; using ad-hoc entropy sources."
-        return make_seed(Libc.rand())
-    end
-end
-
-function make_seed(n::Integer)
-    n < 0 && throw(DomainError(n, "`n` must be non-negative."))
-    seed = UInt32[]
-    while true
-        push!(seed, n & 0xffffffff)
-        n >>= 32
-        if n == 0
-            return seed
-        end
-    end
-end
-
-# inverse of make_seed(::Integer)
-from_seed(a::Vector{UInt32})::BigInt = sum(a[i] * big(2)^(32*(i-1)) for i in 1:length(a))
 
+## seeding
 
-#### seed!()
-
-function seed!(r::MersenneTwister, seed::Vector{UInt32})
-    copyto!(resize!(r.seed, length(seed)), seed)
-    dsfmt_init_by_array(r.state, r.seed)
-    reset_caches!(r)
-    r.adv = 0
-    r.adv_jump = 0
-    return r
-end
-
-seed!(r::MersenneTwister) = seed!(r, make_seed())
-seed!(r::MersenneTwister, n::Integer) = seed!(r, make_seed(n))
-
-
-### Global RNG
-
-struct _GLOBAL_RNG <: AbstractRNG
-    global const GLOBAL_RNG = _GLOBAL_RNG.instance
-end
-
-# GLOBAL_RNG currently uses TaskLocalRNG
-typeof_rng(::_GLOBAL_RNG) = TaskLocalRNG
-
-"""
-    default_rng() -> rng
-
-Return the default global random number generator (RNG).
-
-!!! note
-    What the default RNG is is an implementation detail.  Across different versions of
-    Julia, you should not expect the default RNG to be always the same, nor that it will
-    return the same stream of random numbers for a given seed.
-
-!!! compat "Julia 1.3"
-    This function was introduced in Julia 1.3.
 """
-@inline default_rng() = TaskLocalRNG()
-@inline default_rng(tid::Int) = TaskLocalRNG()
+    seed!([rng=default_rng()], seed) -> rng
+    seed!([rng=default_rng()]) -> rng
 
-copy!(dst::Xoshiro, ::_GLOBAL_RNG) = copy!(dst, default_rng())
-copy!(::_GLOBAL_RNG, src::Xoshiro) = copy!(default_rng(), src)
-copy(::_GLOBAL_RNG) = copy(default_rng())
+Reseed the random number generator: `rng` will give a reproducible
+sequence of numbers if and only if a `seed` is provided. Some RNGs
+don't accept a seed, like `RandomDevice`.
+After the call to `seed!`, `rng` is equivalent to a newly created
+object initialized with the same seed.
 
-GLOBAL_SEED = 0
-set_global_seed!(seed) = global GLOBAL_SEED = seed
+The types of accepted seeds depend on the type of `rng`, but in general,
+integer seeds should work. Providing `nothing` as the seed should be
+equivalent to not providing one.
 
-function seed!(::_GLOBAL_RNG, seed=rand(RandomDevice(), UInt64, 4))
-    global GLOBAL_SEED = seed
-    seed!(default_rng(), seed)
-end
-
-seed!(rng::_GLOBAL_RNG, ::Nothing) = seed!(rng)  # to resolve ambiguity
-
-seed!(seed::Union{Nothing,Integer,Vector{UInt32},Vector{UInt64}}=nothing) =
-    seed!(GLOBAL_RNG, seed)
-
-rng_native_52(::_GLOBAL_RNG) = rng_native_52(default_rng())
-rand(::_GLOBAL_RNG, sp::SamplerBoolBitInteger) = rand(default_rng(), sp)
-for T in (:(SamplerTrivial{UInt52Raw{UInt64}}),
-          :(SamplerTrivial{UInt2x52Raw{UInt128}}),
-          :(SamplerTrivial{UInt104Raw{UInt128}}),
-          :(SamplerTrivial{CloseOpen01_64}),
-          :(SamplerTrivial{CloseOpen12_64}),
-          :(SamplerUnion(Int64, UInt64, Int128, UInt128)),
-          :(SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)),
-         )
-    @eval rand(::_GLOBAL_RNG, x::$T) = rand(default_rng(), x)
-end
+If `rng` is not specified, it defaults to seeding the state of the
+shared task-local generator.
 
-rand!(::_GLOBAL_RNG, A::AbstractArray{Float64}, I::SamplerTrivial{<:FloatInterval_64}) = rand!(default_rng(), A, I)
-rand!(::_GLOBAL_RNG, A::Array{Float64}, I::SamplerTrivial{<:FloatInterval_64}) = rand!(default_rng(), A, I)
-for T in (Float16, Float32)
-    @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerTrivial{CloseOpen12{$T}}) = rand!(default_rng(), A, I)
-    @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerTrivial{CloseOpen01{$T}}) = rand!(default_rng(), A, I)
-end
-for T in BitInteger_types
-    @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerType{$T}) = rand!(default_rng(), A, I)
-end
-
-function __init__()
-    seed!(GLOBAL_RNG)
-    ccall(:jl_gc_init_finalizer_rng_state, Cvoid, ())
-end
-
-
-### generation
-
-# MersenneTwister produces natively Float64
-rng_native_52(::MersenneTwister) = Float64
-
-#### helper functions
-
-# precondition: !mt_empty(r)
-rand_inbounds(r::MersenneTwister, ::CloseOpen12_64) = mt_pop!(r)
-rand_inbounds(r::MersenneTwister, ::CloseOpen01_64=CloseOpen01()) =
-    rand_inbounds(r, CloseOpen12()) - 1.0
-
-rand_inbounds(r::MersenneTwister, ::UInt52Raw{T}) where {T<:BitInteger} =
-    reinterpret(UInt64, rand_inbounds(r, CloseOpen12())) % T
-
-function rand(r::MersenneTwister, x::SamplerTrivial{UInt52Raw{UInt64}})
-    reserve_1(r)
-    rand_inbounds(r, x[])
-end
-
-function rand(r::MersenneTwister, ::SamplerTrivial{UInt2x52Raw{UInt128}})
-    reserve(r, 2)
-    rand_inbounds(r, UInt52Raw(UInt128)) << 64 | rand_inbounds(r, UInt52Raw(UInt128))
-end
-
-function rand(r::MersenneTwister, ::SamplerTrivial{UInt104Raw{UInt128}})
-    reserve(r, 2)
-    rand_inbounds(r, UInt52Raw(UInt128)) << 52 ⊻ rand_inbounds(r, UInt52Raw(UInt128))
-end
-
-#### floats
-
-rand(r::MersenneTwister, sp::SamplerTrivial{CloseOpen12_64}) =
-    (reserve_1(r); rand_inbounds(r, sp[]))
-
-#### integers
-
-rand(r::MersenneTwister, T::SamplerUnion(Int64, UInt64, Int128, UInt128)) =
-    mt_pop!(r, T[])
-
-rand(r::MersenneTwister, T::SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)) =
-    rand(r, UInt52Raw()) % T[]
+# Examples
+```jldoctest; filter = r"(true|false)"
+julia> Random.seed!(1234);
 
-#### arrays of floats
+julia> x1 = rand(2)
+2-element Vector{Float64}:
+ 0.32597672886359486
+ 0.5490511363155669
 
-##### AbstractArray
+julia> Random.seed!(1234);
 
-function rand!(r::MersenneTwister, A::AbstractArray{Float64},
-               I::SamplerTrivial{<:FloatInterval_64})
-    region = LinearIndices(A)
-    # what follows is equivalent to this simple loop but more efficient:
-    # for i=region
-    #     @inbounds A[i] = rand(r, I[])
-    # end
-    m = Base.checked_sub(first(region), 1)
-    n = last(region)
-    while m < n
-        s = mt_avail(r)
-        if s == 0
-            gen_rand(r)
-            s = mt_avail(r)
-        end
-        m2 = min(n, m+s)
-        for i=m+1:m2
-            @inbounds A[i] = rand_inbounds(r, I[])
-        end
-        m = m2
-    end
-    A
-end
+julia> x2 = rand(2)
+2-element Vector{Float64}:
+ 0.32597672886359486
+ 0.5490511363155669
 
+julia> x1 == x2
+true
 
-##### Array : internal functions
+julia> rng = Xoshiro(1234); rand(rng, 2) == x1
+true
 
-# internal array-like type to circumvent the lack of flexibility with reinterpret
-struct UnsafeView{T} <: DenseArray{T,1}
-    ptr::Ptr{T}
-    len::Int
-end
+julia> Xoshiro(1) == Random.seed!(rng, 1)
+true
 
-Base.length(a::UnsafeView) = a.len
-Base.getindex(a::UnsafeView, i::Int) = unsafe_load(a.ptr, i)
-Base.setindex!(a::UnsafeView, x, i::Int) = unsafe_store!(a.ptr, x, i)
-Base.pointer(a::UnsafeView) = a.ptr
-Base.size(a::UnsafeView) = (a.len,)
-Base.elsize(::Type{UnsafeView{T}}) where {T} = sizeof(T)
-
-# this is essentially equivalent to rand!(r, ::AbstractArray{Float64}, I) above, but due to
-# optimizations which can't be done currently when working with pointers, we have to re-order
-# manually the computation flow to get the performance
-# (see https://discourse.julialang.org/t/unsafe-store-sometimes-slower-than-arrays-setindex)
-function _rand_max383!(r::MersenneTwister, A::UnsafeView{Float64}, I::FloatInterval_64)
-    n = length(A)
-    @assert n <= dsfmt_get_min_array_size()+1 # == 383
-    mt_avail(r) == 0 && gen_rand(r)
-    # from now on, at most one call to gen_rand(r) will be necessary
-    m = min(n, mt_avail(r))
-    GC.@preserve r unsafe_copyto!(A.ptr, pointer(r.vals, r.idxF+1), m)
-    if m == n
-        r.idxF += m
-    else # m < n
-        gen_rand(r)
-        GC.@preserve r unsafe_copyto!(A.ptr+m*sizeof(Float64), pointer(r.vals), n-m)
-        r.idxF = n-m
-    end
-    if I isa CloseOpen01
-        for i=1:n
-            A[i] -= 1.0
-        end
-    end
-    A
-end
+julia> rand(Random.seed!(rng), Bool) # not reproducible
+true
 
-function fill_array!(rng::MersenneTwister, A::Ptr{Float64}, n::Int, I)
-    rng.adv += n
-    fill_array!(rng.state, A, n, I)
-end
+julia> rand(Random.seed!(rng), Bool) # not reproducible either
+false
 
-fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::CloseOpen01_64) =
-    dsfmt_fill_array_close_open!(s, A, n)
-
-fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::CloseOpen12_64) =
-    dsfmt_fill_array_close1_open2!(s, A, n)
-
-
-function rand!(r::MersenneTwister, A::UnsafeView{Float64},
-               I::SamplerTrivial{<:FloatInterval_64})
-    # depending on the alignment of A, the data written by fill_array! may have
-    # to be left-shifted by up to 15 bytes (cf. unsafe_copyto! below) for
-    # reproducibility purposes;
-    # so, even for well aligned arrays, fill_array! is used to generate only
-    # the n-2 first values (or n-3 if n is odd), and the remaining values are
-    # generated by the scalar version of rand
-    n = length(A)
-    n2 = (n-2) ÷ 2 * 2
-    n2 < dsfmt_get_min_array_size() && return _rand_max383!(r, A, I[])
-
-    pA = A.ptr
-    align = Csize_t(pA) % 16
-    if align > 0
-        pA2 = pA + 16 - align
-        fill_array!(r, pA2, n2, I[]) # generate the data in-place, but shifted
-        unsafe_copyto!(pA, pA2, n2) # move the data to the beginning of the array
+julia> rand(Xoshiro(), Bool) # not reproducible either
+true
+```
+"""
+seed!
+
+function seed!(rng::AbstractRNG, seed::Any=nothing)
+    if seed === nothing
+        seed!(rng, RandomDevice())
+    elseif seed isa AbstractRNG
+        # avoid getting into an infinite recursive call from the other branches
+        throw(MethodError(seed!, (rng, seed)))
     else
-        fill_array!(r, pA, n2, I[])
-    end
-    for i=n2+1:n
-        A[i] = rand(r, I[])
-    end
-    A
-end
-
-# fills up A reinterpreted as an array of Float64 with n64 values
-function _rand!(r::MersenneTwister, A::Array{T}, n64::Int, I::FloatInterval_64) where T
-    # n64 is the length in terms of `Float64` of the target
-    @assert sizeof(Float64)*n64 <= sizeof(T)*length(A) && isbitstype(T)
-    GC.@preserve A rand!(r, UnsafeView{Float64}(pointer(A), n64), SamplerTrivial(I))
-    A
-end
-
-##### Array: Float64, Float16, Float32
-
-rand!(r::MersenneTwister, A::Array{Float64}, I::SamplerTrivial{<:FloatInterval_64}) =
-    _rand!(r, A, length(A), I[])
-
-mask128(u::UInt128, ::Type{Float16}) =
-    (u & 0x03ff03ff03ff03ff03ff03ff03ff03ff) | 0x3c003c003c003c003c003c003c003c00
-
-mask128(u::UInt128, ::Type{Float32}) =
-    (u & 0x007fffff007fffff007fffff007fffff) | 0x3f8000003f8000003f8000003f800000
-
-for T in (Float16, Float32)
-    @eval function rand!(r::MersenneTwister, A::Array{$T}, ::SamplerTrivial{CloseOpen12{$T}})
-        n = length(A)
-        n128 = n * sizeof($T) ÷ 16
-        _rand!(r, A, 2*n128, CloseOpen12())
-        GC.@preserve A begin
-            A128 = UnsafeView{UInt128}(pointer(A), n128)
-            for i in 1:n128
-                u = A128[i]
-                u ⊻= u << 26
-                # at this point, the 64 low bits of u, "k" being the k-th bit of A128[i] and "+"
-                # the bit xor, are:
-                # [..., 58+32,..., 53+27, 52+26, ..., 33+7, 32+6, ..., 27+1, 26, ..., 1]
-                # the bits needing to be random are
-                # [1:10, 17:26, 33:42, 49:58] (for Float16)
-                # [1:23, 33:55] (for Float32)
-                # this is obviously satisfied on the 32 low bits side, and on the high side,
-                # the entropy comes from bits 33:52 of A128[i] and then from bits 27:32
-                # (which are discarded on the low side)
-                # this is similar for the 64 high bits of u
-                A128[i] = mask128(u, $T)
-            end
-        end
-        for i in 16*n128÷sizeof($T)+1:n
-            @inbounds A[i] = rand(r, $T) + one($T)
-        end
-        A
-    end
-
-    @eval function rand!(r::MersenneTwister, A::Array{$T}, ::SamplerTrivial{CloseOpen01{$T}})
-        rand!(r, A, CloseOpen12($T))
-        I32 = one(Float32)
-        for i in eachindex(A)
-            @inbounds A[i] = Float32(A[i])-I32 # faster than "A[i] -= one(T)" for T==Float16
-        end
-        A
+        seed!(rng, SeedHasher(seed))
     end
 end
 
-#### arrays of integers
 
-function rand!(r::MersenneTwister, A::UnsafeView{UInt128}, ::SamplerType{UInt128})
-    n::Int=length(A)
-    i = n
-    while true
-        rand!(r, UnsafeView{Float64}(A.ptr, 2i), CloseOpen12())
-        n < 5 && break
-        i = 0
-        while n-i >= 5
-            u = A[i+=1]
-            A[n]    ⊻= u << 48
-            A[n-=1] ⊻= u << 36
-            A[n-=1] ⊻= u << 24
-            A[n-=1] ⊻= u << 12
-            n-=1
-        end
-    end
-    if n > 0
-        u = rand(r, UInt2x52Raw())
-        for i = 1:n
-            A[i] ⊻= u << (12*i)
-        end
-    end
-    A
-end
-
-for T in BitInteger_types
-    @eval function rand!(r::MersenneTwister, A::Array{$T}, sp::SamplerType{$T})
-        GC.@preserve A rand!(r, UnsafeView(pointer(A), length(A)), sp)
-        A
-    end
-
-    T == UInt128 && continue
-
-    @eval function rand!(r::MersenneTwister, A::UnsafeView{$T}, ::SamplerType{$T})
-        n = length(A)
-        n128 = n * sizeof($T) ÷ 16
-        rand!(r, UnsafeView{UInt128}(pointer(A), n128))
-        for i = 16*n128÷sizeof($T)+1:n
-            @inbounds A[i] = rand(r, $T)
-        end
-        A
-    end
-end
+### hash_seed()
 
+"""
+    Random.hash_seed(seed, ctx::SHA_CTX)::AbstractVector{UInt8}
 
-#### arrays of Bool
+Update `ctx` via `SHA.update!` with the content of `seed`.
+This function is used by the [`SeedHasher`](@ref) RNG to produce
+random bytes.
 
-# similar to Array{UInt8}, but we need to mask the result so that only the LSB
-# in each byte can be non-zero
+`seed` can currently be of type
+`Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`,
+but modules can extend this function for types they own.
 
-function rand!(r::MersenneTwister, A1::Array{Bool}, sp::SamplerType{Bool})
-    n1 = length(A1)
-    n128 = n1 ÷ 16
+`hash_seed` is "injective" : for two equivalent context objects `cn` and `cm`,
+if `n != m`, then `cn` and `cm` will be distinct after calling
+`hash_seed(n, cn); hash_seed(m, cm)`.
+Moreover, if `n == m`, then `cn` and `cm` remain equivalent after calling
+`hash_seed(n, cn); hash_seed(m, cm)`.
+"""
+function hash_seed end
 
-    if n128 == 0
-        bits = rand(r, UInt52Raw())
-    else
-        GC.@preserve A1 begin
-            A = UnsafeView{UInt128}(pointer(A1), n128)
-            rand!(r, UnsafeView{Float64}(A.ptr, 2*n128), CloseOpen12())
-            # without masking, non-zero bits could be observed in other
-            # positions than the LSB of each byte
-            mask = 0x01010101010101010101010101010101
-            # we need up to 15 bits of entropy in `bits` for the final loop,
-            # which we will extract from x = A[1] % UInt64;
-            # let y = x % UInt32; y contains 32 bits of entropy, but 4
-            # of them will be used for A[1] itself (the first of
-            # each byte). To compensate, we xor with (y >> 17),
-            # which gets the entropy from the second bit of each byte
-            # of the upper-half of y, and sets it in the first bit
-            # of each byte of the lower half; the first two bytes
-            # now contain 16 usable random bits
-            x = A[1] % UInt64
-            bits = x ⊻ x >> 17
-            for i = 1:n128
-                # << 5 to randomize the first bit of the 8th & 16th byte
-                # (i.e. we move bit 52 (resp. 52 + 64), which is unused,
-                # to position 57 (resp. 57 + 64))
-                A[i] = (A[i] ⊻ A[i] << 5) & mask
-            end
-        end
+function hash_seed(seed::Integer, ctx::SHA_CTX)
+    neg = signbit(seed)
+    if neg
+        seed = ~seed
     end
-    for i = 16*n128+1:n1
-        @inbounds A1[i] = bits % Bool
-        bits >>= 1
+    @assert seed >= 0
+    while true
+        word = (seed % UInt32) & 0xffffffff
+        seed >>>= 32
+        SHA.update!(ctx, reinterpret(NTuple{4, UInt8}, word))
+        iszero(seed) && break
     end
-    A1
-end
-
-
-### randjump
-
-# Old randjump methods are deprecated, the scalar version is in the Future module.
-
-function _randjump(r::MersenneTwister, jumppoly::DSFMT.GF2X)
-    adv = r.adv
-    adv_jump = r.adv_jump
-    s = MersenneTwister(copy(r.seed), DSFMT.dsfmt_jump(r.state, jumppoly))
-    reset_caches!(s)
-    s.adv = adv
-    s.adv_jump = adv_jump
-    s
-end
-
-# NON-PUBLIC
-function jump(r::MersenneTwister, steps::Integer)
-    iseven(steps) || throw(DomainError(steps, "steps must be even"))
-    # steps >= 0 checked in calc_jump (`steps >> 1 < 0` if `steps < 0`)
-    j = _randjump(r, Random.DSFMT.calc_jump(steps >> 1))
-    j.adv_jump += steps
-    j
+    # make sure the hash of negative numbers is different from the hash of positive numbers
+    neg && SHA.update!(ctx, (0x01,))
+    nothing
 end
 
-# NON-PUBLIC
-jump!(r::MersenneTwister, steps::Integer) = copy!(r, jump(r, steps))
-
-
-### constructors matching show (EXPERIMENTAL)
-
-# parameters in the tuples are:
-# 1: .adv_jump (jump steps)
-# 2: .adv (number of generated floats at the DSFMT_state level since seeding, besides jumps)
-# 3, 4: .adv_vals, .idxF (counters to reconstruct the float cache, optional if 5-6 not shown))
-# 5, 6: .adv_ints, .idxI (counters to reconstruct the integer cache, optional)
-
-Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{6,Integer}) =
-    advance!(MersenneTwister(seed), advance...)
-
-Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{4,Integer}) =
-    MersenneTwister(seed, (advance..., 0, 0))
-
-Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{2,Integer}) =
-    MersenneTwister(seed, (advance..., 0, 0, 0, 0))
-
-# advances raw state (per fill_array!) of r by n steps (Float64 values)
-function _advance_n!(r::MersenneTwister, n::Int64, work::Vector{Float64})
-    n == 0 && return
-    n < 0 && throw(DomainError(n, "can't advance $r to the specified state"))
-    ms = dsfmt_get_min_array_size() % Int64
-    @assert n >= ms
-    lw = ms + n % ms
-    resize!(work, lw)
-    GC.@preserve work fill_array!(r, pointer(work), lw, CloseOpen12())
-    c::Int64 = lw
-    GC.@preserve work while n > c
-        fill_array!(r, pointer(work), ms, CloseOpen12())
-        c += ms
+function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}}, ctx::SHA_CTX)
+    for xx in seed
+        SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, UInt64(xx)))
     end
-    @assert n == c
-end
-
-function _advance_to!(r::MersenneTwister, adv::Int64, work)
-    _advance_n!(r, adv - r.adv, work)
-    @assert r.adv == adv
-end
-
-function _advance_F!(r::MersenneTwister, adv_vals, idxF, work)
-    _advance_to!(r, adv_vals, work)
-    gen_rand(r)
-    @assert r.adv_vals == adv_vals
-    r.idxF = idxF
-end
-
-function _advance_I!(r::MersenneTwister, adv_ints, idxI, work)
-    _advance_to!(r, adv_ints, work)
-    mt_setfull!(r, Int) # sets r.adv_ints
-    @assert r.adv_ints == adv_ints
-    r.idxI = 16*length(r.ints) - 8*idxI
-end
-
-function advance!(r::MersenneTwister, adv_jump, adv, adv_vals, idxF, adv_ints, idxI)
-    adv_jump = BigInt(adv_jump)
-    adv, adv_vals, adv_ints = Int64.((adv, adv_vals, adv_ints))
-    idxF, idxI = Int.((idxF, idxI))
-
-    ms = dsfmt_get_min_array_size() % Int
-    work = sizehint!(Vector{Float64}(), 2ms)
-
-    adv_jump != 0 && jump!(r, adv_jump)
-    advF = (adv_vals, idxF) != (0, 0)
-    advI = (adv_ints, idxI) != (0, 0)
-
-    if advI && advF
-        @assert adv_vals != adv_ints
-        if adv_vals < adv_ints
-            _advance_F!(r, adv_vals, idxF, work)
-            _advance_I!(r, adv_ints, idxI, work)
-        else
-            _advance_I!(r, adv_ints, idxI, work)
-            _advance_F!(r, adv_vals, idxF, work)
-        end
-    elseif advF
-        _advance_F!(r, adv_vals, idxF, work)
-    elseif advI
-        _advance_I!(r, adv_ints, idxI, work)
-    else
-        @assert adv == 0
+    # discriminate from hash_seed(::Integer)
+    SHA.update!(ctx, (0x10,))
+end
+
+function hash_seed(str::AbstractString, ctx::SHA_CTX)
+    # convert to String such that `codeunits(str)` below is consistent between equal
+    # strings of different types
+    str = String(str)
+    SHA.update!(ctx, codeunits(str))
+    # signature for strings: so far, all hash_seed functions end-up hashing a multiple
+    # of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many
+    # bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding),
+    # and then hash the signature 0x05; in order for strings of different lengths to have
+    # different hashes, padding bytes are set equal to the number of padding bytes
+    pad = 4 - mod(ncodeunits(str), 4)
+    for _=1:pad
+        SHA.update!(ctx, (pad % UInt8,))
     end
-    _advance_to!(r, adv, work)
-    r
+    SHA.update!(ctx, (0x05,))
 end
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
index 78d4f15e2beac..cc598094b1956 100644
--- a/stdlib/Random/src/Random.jl
+++ b/stdlib/Random/src/Random.jl
@@ -4,7 +4,7 @@
     Random
 
 Support for generating random numbers. Provides [`rand`](@ref), [`randn`](@ref),
-[`AbstractRNG`](@ref), [`MersenneTwister`](@ref), and [`RandomDevice`](@ref).
+[`AbstractRNG`](@ref), [`Xoshiro`](@ref), [`MersenneTwister`](@ref), and [`RandomDevice`](@ref).
 """
 module Random
 
@@ -13,9 +13,11 @@ include("DSFMT.jl")
 using .DSFMT
 using Base.GMP.MPZ
 using Base.GMP: Limb
-import SHA
+using SHA: SHA, SHA2_256_CTX, SHA2_512_CTX, SHA_CTX
+
+using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing,
+    _throw_argerror
 
-using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing
 import Base: copymutable, copy, copy!, ==, hash, convert,
              rand, randn, show
 
@@ -29,6 +31,8 @@ export rand!, randn!,
        randcycle, randcycle!,
        AbstractRNG, MersenneTwister, RandomDevice, TaskLocalRNG, Xoshiro
 
+public seed!, default_rng, Sampler, SamplerType, SamplerTrivial, SamplerSimple
+
 ## general definitions
 
 """
@@ -136,11 +140,9 @@ the amount of precomputation, if applicable.
 *types* and *values*, respectively. [`Random.SamplerSimple`](@ref) can be used to store
 pre-computed values without defining extra types for only this purpose.
 """
-Sampler(rng::AbstractRNG, x, r::Repetition=Val(Inf)) = Sampler(typeof_rng(rng), x, r)
+Sampler(rng::AbstractRNG, x, r::Repetition=Val(Inf)) = Sampler(typeof(rng), x, r)
 Sampler(rng::AbstractRNG, ::Type{X}, r::Repetition=Val(Inf)) where {X} =
-    Sampler(typeof_rng(rng), X, r)
-
-typeof_rng(rng::AbstractRNG) = typeof(rng)
+    Sampler(typeof(rng), X, r)
 
 # this method is necessary to prevent rand(rng::AbstractRNG, X) from
 # recursively constructing nested Sampler types.
@@ -165,6 +167,11 @@ Sampler(::Type{<:AbstractRNG}, ::Type{T}, ::Repetition) where {T} = SamplerType{
 
 Base.getindex(::SamplerType{T}) where {T} = T
 
+# SamplerUnion(X, Y, ...}) == Union{SamplerType{X}, SamplerType{Y}, ...}
+SamplerUnion(U...) = Union{Any[SamplerType{T} for T in U]...}
+const SamplerBoolBitInteger = SamplerUnion(Bool, BitInteger_types...)
+
+
 struct SamplerTrivial{T,E} <: Sampler{E}
     self::T
 end
@@ -293,19 +300,23 @@ rand(                ::Type{X}, dims::Dims) where {X} = rand(default_rng(), X, d
 rand(r::AbstractRNG, ::Type{X}, d::Integer, dims::Integer...) where {X} = rand(r, X, Dims((d, dims...)))
 rand(                ::Type{X}, d::Integer, dims::Integer...) where {X} = rand(X, Dims((d, dims...)))
 
-# SamplerUnion(X, Y, ...}) == Union{SamplerType{X}, SamplerType{Y}, ...}
-SamplerUnion(U...) = Union{Any[SamplerType{T} for T in U]...}
-const SamplerBoolBitInteger = SamplerUnion(Bool, BitInteger_types...)
 
+### UnsafeView
+# internal array-like type to circumvent the lack of flexibility with reinterpret
 
-include("Xoshiro.jl")
-include("RNGs.jl")
-include("generation.jl")
-include("normal.jl")
-include("misc.jl")
-include("XoshiroSimd.jl")
+struct UnsafeView{T} <: DenseArray{T,1}
+    ptr::Ptr{T}
+    len::Int
+end
 
-## rand & rand! & seed! docstrings
+Base.getindex(a::UnsafeView, i::Int) = unsafe_load(a.ptr, i)
+Base.setindex!(a::UnsafeView, x, i::Int) = unsafe_store!(a.ptr, x, i)
+Base.pointer(a::UnsafeView) = a.ptr
+Base.size(a::UnsafeView) = (a.len,)
+Base.elsize(::Type{UnsafeView{T}}) where {T} = sizeof(T)
+
+
+## rand & rand! docstrings
 
 """
     rand([rng=default_rng()], [S], [dims...])
@@ -313,12 +324,31 @@ include("XoshiroSimd.jl")
 Pick a random element or array of random elements from the set of values specified by `S`;
 `S` can be
 
-* an indexable collection (for example `1:9` or `('x', "y", :z)`),
-* an `AbstractDict` or `AbstractSet` object,
+* an indexable collection (for example `1:9` or `('x', "y", :z)`)
+
+* an `AbstractDict` or `AbstractSet` object
+
 * a string (considered as a collection of characters), or
-* a type: the set of values to pick from is then equivalent to `typemin(S):typemax(S)` for
-  integers (this is not applicable to [`BigInt`](@ref)), to ``[0, 1)`` for floating
-  point numbers and to ``[0, 1)+i[0, 1)`` for complex floating point numbers;
+
+* a type from the list below, corresponding to the specified set of values
+
+  + concrete integer types sample from `typemin(S):typemax(S)` (excepting [`BigInt`](@ref) which is not supported)
+
+  + concrete floating point types sample from `[0, 1)`
+
+  + concrete complex types `Complex{T}` if `T` is a sampleable type take their real and imaginary components
+    independently from the set of values corresponding to `T`, but are not supported if `T` is not sampleable.
+
+  + all `<:AbstractChar` types sample from the set of valid Unicode scalars
+
+  + a user-defined type and set of values; for implementation guidance please see [Hooking into the `Random` API](@ref rand-api-hook)
+
+  + a tuple type of known size and where each parameter of `S` is itself a sampleable type; return a value of type `S`.
+    Note that tuple types such as `Tuple{Vararg{T}}` (unknown size) and `Tuple{1:2}` (parameterized with a value) are not supported
+
+  + a `Pair` type, e.g. `Pair{X, Y}` such that `rand` is defined for `X` and `Y`,
+    in which case random pairs are produced.
+
 
 `S` defaults to [`Float64`](@ref).
 When only one argument is passed besides the optional `rng` and is a `Tuple`, it is interpreted
@@ -330,23 +360,26 @@ See also [`randn`](@ref) for normally distributed numbers, and [`rand!`](@ref) a
 !!! compat "Julia 1.1"
     Support for `S` as a tuple requires at least Julia 1.1.
 
+!!! compat "Julia 1.11"
+    Support for `S` as a `Tuple` type requires at least Julia 1.11.
+
 # Examples
 ```julia-repl
 julia> rand(Int, 2)
-2-element Array{Int64,1}:
+2-element Vector{Int64}:
  1339893410598768192
  1575814717733606317
 
 julia> using Random
 
-julia> rand(MersenneTwister(0), Dict(1=>2, 3=>4))
-1=>2
+julia> rand(Xoshiro(0), Dict(1=>2, 3=>4))
+3 => 4
 
 julia> rand((2, 3))
 3
 
 julia> rand(Float64, (2, 3))
-2×3 Array{Float64,2}:
+2×3 Matrix{Float64}:
  0.999717  0.0143835  0.540787
  0.696556  0.783855   0.938235
 ```
@@ -372,67 +405,24 @@ but without allocating a new array.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> rand!(rng, zeros(5))
+julia> rand!(Xoshiro(123), zeros(5))
 5-element Vector{Float64}:
- 0.5908446386657102
- 0.7667970365022592
- 0.5662374165061859
- 0.4600853424625171
- 0.7940257103317943
+ 0.521213795535383
+ 0.5868067574533484
+ 0.8908786980927811
+ 0.19090669902576285
+ 0.5256623915420473
 ```
 """
 rand!
 
-"""
-    seed!([rng=default_rng()], seed) -> rng
-    seed!([rng=default_rng()]) -> rng
-
-Reseed the random number generator: `rng` will give a reproducible
-sequence of numbers if and only if a `seed` is provided. Some RNGs
-don't accept a seed, like `RandomDevice`.
-After the call to `seed!`, `rng` is equivalent to a newly created
-object initialized with the same seed.
-
-If `rng` is not specified, it defaults to seeding the state of the
-shared task-local generator.
-
-# Examples
-```julia-repl
-julia> Random.seed!(1234);
-
-julia> x1 = rand(2)
-2-element Vector{Float64}:
- 0.32597672886359486
- 0.5490511363155669
-
-julia> Random.seed!(1234);
-
-julia> x2 = rand(2)
-2-element Vector{Float64}:
- 0.32597672886359486
- 0.5490511363155669
 
-julia> x1 == x2
-true
-
-julia> rng = Xoshiro(1234); rand(rng, 2) == x1
-true
-
-julia> Xoshiro(1) == Random.seed!(rng, 1)
-true
-
-julia> rand(Random.seed!(rng), Bool) # not reproducible
-true
-
-julia> rand(Random.seed!(rng), Bool) # not reproducible either
-false
-
-julia> rand(Xoshiro(), Bool) # not reproducible either
-true
-```
-"""
-seed!(rng::AbstractRNG, ::Nothing) = seed!(rng)
+include("Xoshiro.jl")
+include("RNGs.jl")
+include("MersenneTwister.jl")
+include("generation.jl")
+include("normal.jl")
+include("misc.jl")
+include("XoshiroSimd.jl")
 
 end # module
diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl
index 3be276ad23754..7d059261b69f5 100644
--- a/stdlib/Random/src/Xoshiro.jl
+++ b/stdlib/Random/src/Xoshiro.jl
@@ -4,13 +4,13 @@
 # Lots of implementation is shared with TaskLocalRNG
 
 """
-    Xoshiro(seed)
+    Xoshiro(seed::Union{Integer, AbstractString})
     Xoshiro()
 
 Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and
 Sebastiano Vigna in "Scrambled Linear Pseudorandom Number Generators",
 ACM Trans. Math. Softw., 2021. Reference implementation is available
-at http://prng.di.unimi.it
+at https://prng.di.unimi.it
 
 Apart from the high speed, Xoshiro has a small memory footprint, making it suitable for
 applications where many different random states need to be held for long time.
@@ -21,6 +21,12 @@ multiple interleaved xoshiro instances).
 The virtual PRNGs are discarded once the bulk request has been serviced (and should cause
 no heap allocations).
 
+If no seed is provided, a randomly generated one is created (using entropy from the system).
+See the [`seed!`](@ref) function for reseeding an already existing `Xoshiro` object.
+
+!!! compat "Julia 1.11"
+    Passing a negative integer seed requires at least Julia 1.11.
+
 # Examples
 ```jldoctest
 julia> using Random
@@ -48,47 +54,130 @@ mutable struct Xoshiro <: AbstractRNG
     s1::UInt64
     s2::UInt64
     s3::UInt64
+    s4::UInt64 # internal splitmix state
 
-    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer) = new(s0, s1, s2, s3)
+    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer, s4::Integer) = new(s0, s1, s2, s3, s4)
+    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer) = initstate!(new(), map(UInt64, (s0, s1, s2, s3)))
     Xoshiro(seed=nothing) = seed!(new(), seed)
 end
 
-function setstate!(x::Xoshiro, s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64)
+@inline function setstate!(x::Xoshiro, (s0, s1, s2, s3, s4))
     x.s0 = s0
     x.s1 = s1
     x.s2 = s2
     x.s3 = s3
+    if s4 !== nothing
+        x.s4 = s4
+    end
     x
 end
 
-copy(rng::Xoshiro) = Xoshiro(rng.s0, rng.s1, rng.s2, rng.s3)
+@inline getstate(x::Xoshiro) = (x.s0, x.s1, x.s2, x.s3, x.s4)
 
-function copy!(dst::Xoshiro, src::Xoshiro)
-    dst.s0, dst.s1, dst.s2, dst.s3 = src.s0, src.s1, src.s2, src.s3
-    dst
-end
+rng_native_52(::Xoshiro) = UInt64
 
-function ==(a::Xoshiro, b::Xoshiro)
-    a.s0 == b.s0 && a.s1 == b.s1 && a.s2 == b.s2 && a.s3 == b.s3
+# Jump functions from: https://xoshiro.di.unimi.it/xoshiro256plusplus.c
+
+for (fname, JUMP) in ((:jump_128, (0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c)),
+                      (:jump_192, (0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635)))
+    local fname! = Symbol(fname, :!)
+    @eval function $fname!(rng::Xoshiro)
+        _s0 = 0x0000000000000000
+        _s1 = 0x0000000000000000
+        _s2 = 0x0000000000000000
+        _s3 = 0x0000000000000000
+        s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3
+        for j in $JUMP
+            for b in 0x0000000000000000:0x000000000000003f
+                if (j & 0x0000000000000001 << b) != 0
+                    _s0 ⊻= s0
+                    _s1 ⊻= s1
+                    _s2 ⊻= s2
+                    _s3 ⊻= s3
+                end
+                t = s1 << 17
+                s2 = xor(s2, s0)
+                s3 = xor(s3, s1)
+                s1 = xor(s1, s2)
+                s0 = xor(s0, s3)
+                s2 = xor(s2, t)
+                s3 = s3 << 45 | s3 >> 19
+            end
+        end
+        setstate!(rng, (_s0, _s1, _s2, _s3, nothing))
+    end
+    @eval $fname(rng::Xoshiro) = $fname!(copy(rng))
+
+    @eval function $fname!(rng::Xoshiro, n::Integer)
+        n < 0 && throw(DomainError(n, "the number of jumps must be ≥ 0"))
+        i = zero(n)
+        while i < n
+            $fname!(rng)
+            i += one(n)
+        end
+        rng
+    end
+
+    @eval $fname(rng::Xoshiro, n::Integer) = $fname!(copy(rng), n)
 end
 
-rng_native_52(::Xoshiro) = UInt64
+for (fname, sz) in ((:jump_128, 128), (:jump_192, 192))
+    local fname! = Symbol(fname, :!)
+    local see_other = Symbol(fname === :jump_128 ? :jump_192 : :jump_128)
+    local see_other! = Symbol(see_other, :!)
+    local seq_pow = 256 - sz
+    @eval begin
+        """
+            $($fname!)(rng::Xoshiro, [n::Integer=1])
 
-@inline function rand(rng::Xoshiro, ::SamplerType{UInt64})
-    s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3
-    tmp = s0 + s3
-    res = ((tmp << 23) | (tmp >> 41)) + s0
-    t = s1 << 17
-    s2 = xor(s2, s0)
-    s3 = xor(s3, s1)
-    s1 = xor(s1, s2)
-    s0 = xor(s0, s3)
-    s2 = xor(s2, t)
-    s3 = s3 << 45 | s3 >> 19
-    rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
-    res
-end
+        Jump forward, advancing the state equivalent to `2^$($sz)` calls which consume
+        8 bytes (i.e. a full `UInt64`) each.
+
+        If `n > 0` is provided, the state is advanced equivalent to `n * 2^$($sz)` calls; if `n = 0`,
+        the state remains unchanged.
+
+        This can be used to generate `2^$($seq_pow)` non-overlapping subsequences for parallel computations.
+
+        See also: [`$($fname)`](@ref), [`$($see_other!)`](@ref)
+
+        # Examples
+        ```julia-repl
+        julia> $($fname!)($($fname!)(Xoshiro(1))) == $($fname!)(Xoshiro(1), 2)
+        true
+        ```
+        """
+        function $fname! end
+    end
+
+    @eval begin
+        """
+            $($fname)(rng::Xoshiro, [n::Integer=1])
 
+        Return a copy of `rng` with the state advanced equivalent to `n * 2^$($sz)` calls which consume
+        8 bytes (i.e. a full `UInt64`) each; if `n = 0`, the state of the returned copy will be
+        identical to `rng`.
+
+        This can be used to generate `2^$($seq_pow)` non-overlapping subsequences for parallel computations.
+
+        See also: [`$($fname!)`](@ref), [`$($see_other)`](@ref)
+
+        # Examples
+        ```julia-repl
+        julia> x = Xoshiro(1);
+
+        julia> $($fname)($($fname)(x)) == $($fname)(x, 2)
+        true
+
+        julia> $($fname)(x, 0) == x
+        true
+
+        julia> $($fname)(x, 0) === x
+        false
+        ```
+        """
+        function $fname end
+    end
+end
 
 ## Task local RNG
 
@@ -96,8 +185,8 @@ end
     TaskLocalRNG
 
 The `TaskLocalRNG` has state that is local to its task, not its thread.
-It is seeded upon task creation, from the state of its parent task.
-Therefore, task creation is an event that changes the parent's RNG state.
+It is seeded upon task creation, from the state of its parent task, but without
+advancing the state of the parent's RNG.
 
 As an upside, the `TaskLocalRNG` is pretty fast, and permits reproducible
 multithreaded simulations (barring race conditions), independent of scheduler
@@ -106,30 +195,60 @@ task creation, simulation results are also independent of the number of availabl
 threads / CPUs. The random stream should not depend on hardware specifics, up to
 endianness and possibly word size.
 
-Using or seeding the RNG of any other task than the one returned by `current_task()`
-is undefined behavior: it will work most of the time, and may sometimes fail silently.
+When seeding `TaskLocalRNG()` with [`seed!`](@ref), the passed seed, if any,
+may be any integer.
+
+!!! compat "Julia 1.11"
+    Seeding `TaskLocalRNG()` with a negative integer seed requires at least Julia 1.11.
+
+!!! compat "Julia 1.10"
+    Task creation no longer advances the parent task's RNG state as of Julia 1.10.
 """
 struct TaskLocalRNG <: AbstractRNG end
 TaskLocalRNG(::Nothing) = TaskLocalRNG()
-rng_native_52(::TaskLocalRNG) = UInt64
 
-function setstate!(
-    x::TaskLocalRNG,
-    s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64, # xoshiro256 state
-    s4::UInt64 = 1s0 + 3s1 + 5s2 + 7s3, # internal splitmix state
-)
+@inline function setstate!(x::TaskLocalRNG, (s0, s1, s2, s3, s4))
     t = current_task()
     t.rngState0 = s0
     t.rngState1 = s1
     t.rngState2 = s2
     t.rngState3 = s3
-    t.rngState4 = s4
+    if s4 !== nothing
+        t.rngState4 = s4
+    end
     x
 end
 
-@inline function rand(::TaskLocalRNG, ::SamplerType{UInt64})
-    task = current_task()
-    s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
+@inline function getstate(::TaskLocalRNG)
+    t = current_task()
+    (t.rngState0, t.rngState1, t.rngState2, t.rngState3, t.rngState4)
+end
+
+rng_native_52(::TaskLocalRNG) = UInt64
+
+
+## Shared implementation between Xoshiro and TaskLocalRNG
+
+# this variant of setstate! initializes the internal splitmix state, a.k.a. `s4`
+@inline function initstate!(x::Union{TaskLocalRNG, Xoshiro}, state)
+    length(state) == 4 && eltype(state) == UInt64 ||
+        _throw_argerror("initstate! expects a list of 4 `UInt64` values")
+    s0, s1, s2, s3 = state
+    setstate!(x, (s0, s1, s2, s3, 1s0 + 3s1 + 5s2 + 7s3))
+end
+
+copy(rng::Union{TaskLocalRNG, Xoshiro}) = Xoshiro(getstate(rng)...)
+copy!(dst::Union{TaskLocalRNG, Xoshiro}, src::Union{TaskLocalRNG, Xoshiro}) = setstate!(dst, getstate(src))
+==(x::Union{TaskLocalRNG, Xoshiro}, y::Union{TaskLocalRNG, Xoshiro}) = getstate(x) == getstate(y)
+# use a magic (random) number to scramble `h` so that `hash(x)` is distinct from `hash(getstate(x))`
+hash(x::Union{TaskLocalRNG, Xoshiro}, h::UInt) = hash(getstate(x), h + 0x49a62c2dda6fa9be % UInt)
+
+seed!(rng::Union{TaskLocalRNG, Xoshiro}, seeder::AbstractRNG) =
+    initstate!(rng, rand(seeder, NTuple{4, UInt64}))
+
+
+@inline function rand(x::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt64})
+    s0, s1, s2, s3 = getstate(x)
     tmp = s0 + s3
     res = ((tmp << 23) | (tmp >> 41)) + s0
     t = s1 << 17
@@ -139,28 +258,10 @@ end
     s0 ⊻= s3
     s2 ⊻= t
     s3 = s3 << 45 | s3 >> 19
-    task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
+    setstate!(x, (s0, s1, s2, s3, nothing))
     res
 end
 
-# Shared implementation between Xoshiro and TaskLocalRNG -- seeding
-
-function seed!(rng::Union{TaskLocalRNG,Xoshiro})
-    # as we get good randomness from RandomDevice, we can skip hashing
-    rd = RandomDevice()
-    setstate!(rng, rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64))
-end
-
-function seed!(rng::Union{TaskLocalRNG,Xoshiro}, seed::Union{Vector{UInt32}, Vector{UInt64}})
-    c = SHA.SHA2_256_CTX()
-    SHA.update!(c, reinterpret(UInt8, seed))
-    s0, s1, s2, s3 = reinterpret(UInt64, SHA.digest!(c))
-    setstate!(rng, s0, s1, s2, s3)
-end
-
-seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::Integer) = seed!(rng, make_seed(seed))
-
-
 @inline function rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt128})
     first = rand(rng, UInt64)
     second = rand(rng,UInt64)
@@ -176,41 +277,22 @@ end
     (rand(rng, UInt64) >>> (64 - 8*sizeof(S))) % S
 end
 
-function copy(rng::TaskLocalRNG)
-    t = current_task()
-    Xoshiro(t.rngState0, t.rngState1, t.rngState2, t.rngState3)
-end
-
-function copy!(dst::TaskLocalRNG, src::Xoshiro)
-    t = current_task()
-    setstate!(dst, src.s0, src.s1, src.s2, src.s3)
-    return dst
-end
-
-function copy!(dst::Xoshiro, src::TaskLocalRNG)
-    t = current_task()
-    setstate!(dst, t.rngState0, t.rngState1, t.rngState2, t.rngState3)
-    return dst
-end
-
-function ==(a::Xoshiro, b::TaskLocalRNG)
-    t = current_task()
-    a.s0 == t.rngState0 && a.s1 == t.rngState1 && a.s2 == t.rngState2 && a.s3 == t.rngState3
-end
-
-==(a::TaskLocalRNG, b::Xoshiro) = b == a
-
 # for partial words, use upper bits from Xoshiro
 
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52Raw{UInt64}}) = rand(r, UInt64) >>> 12
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52{UInt64}})    = rand(r, UInt64) >>> 12
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt104{UInt128}})  = rand(r, UInt104Raw())
 
-rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float16}}) =
-    Float16(Float32(rand(r, UInt16) >>> 5) * Float32(0x1.0p-11))
-
-rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float32}}) =
-    Float32(rand(r, UInt32) >>> 8) * Float32(0x1.0p-24)
-
-rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01_64}) =
-    Float64(rand(r, UInt64) >>> 11) * 0x1.0p-53
+for FT in (Float16, Float32, Float64)
+    UT = Base.uinttype(FT)
+    # Helper function: scale an unsigned integer to a floating point number of the same size
+    # in the interval [0, 1).  This is equivalent to, but more easily extensible than
+    #     Float16(i >>>  5) * Float16(0x1.0p-11)
+    #     Float32(i >>>  8) * Float32(0x1.0p-24)
+    #     Float32(i >>> 11) * Float64(0x1.0p-53)
+    @eval @inline _uint2float(i::$(UT), ::Type{$(FT)}) =
+        $(FT)(i >>> $(8 * sizeof(FT) - precision(FT))) * $(FT(2) ^ -precision(FT))
+
+    @eval rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{$(FT)}}) =
+        _uint2float(rand(r, $(UT)), $(FT))
+end
diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl
index 1a16baa4bce28..a23f5d7590b40 100644
--- a/stdlib/Random/src/XoshiroSimd.jl
+++ b/stdlib/Random/src/XoshiroSimd.jl
@@ -2,8 +2,8 @@
 
 module XoshiroSimd
 # Getting the xoroshiro RNG to reliably vectorize is somewhat of a hassle without Simd.jl.
-import ..Random: TaskLocalRNG, rand, rand!, Xoshiro, CloseOpen01, UnsafeView,
-                 SamplerType, SamplerTrivial
+import ..Random: rand!
+using ..Random: TaskLocalRNG, rand, Xoshiro, CloseOpen01, UnsafeView, SamplerType, SamplerTrivial, getstate, setstate!, _uint2float
 using Base: BitInteger_types
 using Base.Libc: memcpy
 using Core.Intrinsics: llvmcall
@@ -30,7 +30,12 @@ simdThreshold(::Type{Bool}) = 640
     Tuple{UInt64, Int64},
     x, y)
 
-@inline _bits2float(x::UInt64, ::Type{Float64}) = reinterpret(UInt64, Float64(x >>> 11) * 0x1.0p-53)
+# `_bits2float(x::UInt64, T)` takes `x::UInt64` as input, it splits it in `N` parts where
+# `N = sizeof(UInt64) / sizeof(T)` (`N = 1` for `Float64`, `N = 2` for `Float32, etc...), it
+# truncates each part to the unsigned type of the same size as `T`, scales all of these
+# numbers to a value of type `T` in the range [0,1) with `_uint2float`, and then
+# recomposes another `UInt64` using all these parts.
+@inline _bits2float(x::UInt64, ::Type{Float64}) = reinterpret(UInt64,  _uint2float(x, Float64))
 @inline function _bits2float(x::UInt64, ::Type{Float32})
     #=
     # this implementation uses more high bits, but is harder to vectorize
@@ -40,10 +45,21 @@ simdThreshold(::Type{Bool}) = 640
     =#
     ui = (x>>>32) % UInt32
     li = x % UInt32
-    u = Float32(ui >>> 8) * Float32(0x1.0p-24)
-    l = Float32(li >>> 8) * Float32(0x1.0p-24)
+    u = _uint2float(ui, Float32)
+    l = _uint2float(li, Float32)
     (UInt64(reinterpret(UInt32, u)) << 32) | UInt64(reinterpret(UInt32, l))
 end
+@inline function _bits2float(x::UInt64, ::Type{Float16})
+    i1 = (x>>>48) % UInt16
+    i2 = (x>>>32) % UInt16
+    i3 = (x>>>16) % UInt16
+    i4 = x % UInt16
+    f1 = _uint2float(i1, Float16)
+    f2 = _uint2float(i2, Float16)
+    f3 = _uint2float(i3, Float16)
+    f4 = _uint2float(i4, Float16)
+    return (UInt64(reinterpret(UInt16, f1)) << 48) | (UInt64(reinterpret(UInt16, f2)) << 32) | (UInt64(reinterpret(UInt16, f3)) << 16) | UInt64(reinterpret(UInt16, f4))
+end
 
 # required operations. These could be written more concisely with `ntuple`, but the compiler
 # sometimes refuses to properly vectorize.
@@ -118,6 +134,18 @@ for N in [4,8,16]
         ret <$N x i64> %i
         """
         @eval @inline _bits2float(x::$VT, ::Type{Float32}) = llvmcall($code, $VT, Tuple{$VT}, x)
+
+        code = """
+        %as16 = bitcast <$N x i64> %0 to <$(4N) x i16>
+        %shiftamt = shufflevector <1 x i16> <i16 5>, <1 x i16> undef, <$(4N) x i32> zeroinitializer
+        %sh = lshr <$(4N) x i16> %as16, %shiftamt
+        %f = uitofp <$(4N) x i16> %sh to <$(4N) x half>
+        %scale = shufflevector <1 x half> <half 0x3f40000000000000>, <1 x half> undef, <$(4N) x i32> zeroinitializer
+        %m = fmul <$(4N) x half> %f, %scale
+        %i = bitcast <$(4N) x half> %m to <$N x i64>
+        ret <$N x i64> %i
+        """
+        @eval @inline _bits2float(x::$VT, ::Type{Float16}) = llvmcall($code, $VT, Tuple{$VT}, x)
     end
 end
 
@@ -137,7 +165,7 @@ end
 
 _id(x, T) = x
 
-@inline function xoshiro_bulk(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, T::Union{Type{UInt8}, Type{Bool}, Type{Float32}, Type{Float64}}, ::Val{N}, f::F = _id) where {N, F}
+@inline function xoshiro_bulk(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, T::Union{Type{UInt8}, Type{Bool}, Type{Float16}, Type{Float32}, Type{Float64}}, ::Val{N}, f::F = _id) where {N, F}
     if len >= simdThreshold(T)
         written = xoshiro_bulk_simd(rng, dst, len, T, Val(N), f)
         len -= written
@@ -149,14 +177,9 @@ _id(x, T) = x
     nothing
 end
 
-@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, f::F) where {T, F}
-    if rng isa TaskLocalRNG
-        task = current_task()
-        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
-    else
-        (; s0, s1, s2, s3) = rng::Xoshiro
-    end
-
+@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, f::F
+                                       ) where {T, F}
+    s0, s1, s2, s3 = getstate(rng)
     i = 0
     while i+8 <= len
         res = _plus(_rotl23(_plus(s0,s3)),s0)
@@ -183,22 +206,12 @@ end
         # TODO: This may make the random-stream dependent on system endianness
         GC.@preserve ref memcpy(dst+i, Base.unsafe_convert(Ptr{Cvoid}, ref), len-i)
     end
-    if rng isa TaskLocalRNG
-        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
-    else
-       rng.s0, rng.s1, rng.s2, rng.s3 =  s0, s1, s2, s3
-    end
+    setstate!(rng, (s0, s1, s2, s3, nothing))
     nothing
 end
 
 @noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{Bool}, f)
-    if rng isa TaskLocalRNG
-        task = current_task()
-        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
-    else
-        (; s0, s1, s2, s3) = rng::Xoshiro
-    end
-
+    s0, s1, s2, s3 = getstate(rng)
     i = 0
     while i+8 <= len
         res = _plus(_rotl23(_plus(s0,s3)),s0)
@@ -232,11 +245,7 @@ end
         s2 = _xor(s2, t)
         s3 = _rotl45(s3)
     end
-    if rng isa TaskLocalRNG
-        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
-    else
-        rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
-    end
+    setstate!(rng, (s0, s1, s2, s3, nothing))
     nothing
 end
 
@@ -283,25 +292,21 @@ end
     return i
 end
 
+const MutableDenseArray = Union{Base.MutableDenseArrayType{T}, UnsafeView{T}} where {T}
 
-function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float32}, ::SamplerTrivial{CloseOpen01{Float32}})
-    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*4, Float32, xoshiroWidth(), _bits2float)
-    dst
-end
-
-function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float64}, ::SamplerTrivial{CloseOpen01{Float64}})
-    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*8, Float64, xoshiroWidth(), _bits2float)
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::MutableDenseArray{T}, ::SamplerTrivial{CloseOpen01{T}}) where {T<:Union{Float16,Float32,Float64}}
+    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*sizeof(T), T, xoshiroWidth(), _bits2float)
     dst
 end
 
 for T in BitInteger_types
-    @eval function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Union{Array{$T}, UnsafeView{$T}}, ::SamplerType{$T})
+    @eval function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::MutableDenseArray{$T}, ::SamplerType{$T})
         GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*sizeof($T), UInt8, xoshiroWidth())
         dst
     end
 end
 
-function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Bool}, ::SamplerType{Bool})
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::MutableDenseArray{Bool}, ::SamplerType{Bool})
     GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst), Bool, xoshiroWidth())
     dst
 end
diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl
index cc9840f678413..0a355ffdf96f3 100644
--- a/stdlib/Random/src/generation.jl
+++ b/stdlib/Random/src/generation.jl
@@ -19,7 +19,7 @@
 Sampler(::Type{RNG}, ::Type{T}, n::Repetition) where {RNG<:AbstractRNG,T<:AbstractFloat} =
     Sampler(RNG, CloseOpen01(T), n)
 
-# generic random generation function which can be used by RNG implementors
+# generic random generation function which can be used by RNG implementers
 # it is not defined as a fallback rand method as this could create ambiguities
 
 rand(r::AbstractRNG, ::SamplerTrivial{CloseOpen01{Float16}}) =
@@ -57,7 +57,7 @@ Sampler(::Type{<:AbstractRNG}, I::FloatInterval{BigFloat}, ::Repetition) =
     SamplerBigFloat{typeof(I)}(precision(BigFloat))
 
 function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat)
-    precision(z) == sp.prec || throw(ArgumentError("incompatible BigFloat precision"))
+    precision(z) == sp.prec || _throw_argerror("incompatible BigFloat precision")
     limbs = sp.limbs
     rand!(rng, limbs)
     @inbounds begin
@@ -66,7 +66,7 @@ function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat)
         limbs[end] |= Limb_high_bit
     end
     z.sign = 1
-    GC.@preserve limbs unsafe_copyto!(z.d, pointer(limbs), sp.nlimbs)
+    copyto!(z.d, limbs)
     randbool
 end
 
@@ -80,7 +80,7 @@ function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat, ::CloseOpen0
     randbool = _rand!(rng, z, sp)
     z.exp = 0
     randbool &&
-        ccall((:mpfr_sub_d, :libmpfr), Int32,
+        ccall((:mpfr_sub_d, Base.MPFR.libmpfr), Int32,
               (Ref{BigFloat}, Ref{BigFloat}, Cdouble, Base.MPFR.MPFRRoundingMode),
               z, z, 0.5, Base.MPFR.ROUNDING_MODE[])
     z
@@ -91,7 +91,7 @@ end
 function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat, ::CloseOpen01{BigFloat},
                 ::Nothing)
     _rand!(rng, z, sp, CloseOpen12(BigFloat))
-    ccall((:mpfr_sub_ui, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, Base.MPFR.MPFRRoundingMode),
+    ccall((:mpfr_sub_ui, Base.MPFR.libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, Base.MPFR.MPFRRoundingMode),
           z, z, 1, Base.MPFR.ROUNDING_MODE[])
     z
 end
@@ -130,7 +130,7 @@ rand(r::AbstractRNG, sp::SamplerTrivial{<:UniformBits{T}}) where {T} =
 
 #### BitInteger
 
-# rand_generic methods are intended to help RNG implementors with common operations
+# rand_generic methods are intended to help RNG implementers with common operations
 # we don't call them simply `rand` as this can easily contribute to create
 # ambiguities with user-side methods (forcing the user to resort to @eval)
 
@@ -167,6 +167,38 @@ function rand(r::AbstractRNG, ::SamplerType{T}) where {T<:AbstractChar}
     (c < 0xd800) ? T(c) : T(c+0x800)
 end
 
+### random tuples
+
+function Sampler(::Type{RNG}, ::Type{T}, n::Repetition) where {T<:Tuple, RNG<:AbstractRNG}
+    tail_sp_ = Sampler(RNG, Tuple{Base.tail(fieldtypes(T))...}, n)
+    SamplerTag{Ref{T}}((Sampler(RNG, fieldtype(T, 1), n), tail_sp_.data...))
+    # Ref so that the gentype is `T` in SamplerTag's constructor
+end
+
+function Sampler(::Type{RNG}, ::Type{Tuple{Vararg{T, N}}}, n::Repetition) where {T, N, RNG<:AbstractRNG}
+    if N > 0
+        SamplerTag{Ref{Tuple{Vararg{T, N}}}}((Sampler(RNG, T, n),))
+    else
+        SamplerTag{Ref{Tuple{}}}(())
+    end
+end
+
+function rand(rng::AbstractRNG, sp::SamplerTag{Ref{T}}) where T<:Tuple
+    ntuple(i -> rand(rng, sp.data[min(i, length(sp.data))]), Val{fieldcount(T)}())::T
+end
+
+### random pairs
+
+function Sampler(::Type{RNG}, ::Type{Pair{A, B}}, n::Repetition) where {RNG<:AbstractRNG, A, B}
+    sp1 = Sampler(RNG, A, n)
+    sp2 = A === B ? sp1 : Sampler(RNG, B, n)
+    SamplerTag{Ref{Pair{A,B}}}(sp1 => sp2) # Ref so that the gentype is Pair{A, B}
+                                           # in SamplerTag's constructor
+end
+
+rand(rng::AbstractRNG, sp::SamplerTag{<:Ref{<:Pair}}) =
+    rand(rng, sp.data.first) => rand(rng, sp.data.second)
+
 
 ## Generate random integer within a range
 
@@ -197,6 +229,9 @@ uint_sup(::Type{<:Base.BitInteger32}) = UInt32
 uint_sup(::Type{<:Union{Int64,UInt64}}) = UInt64
 uint_sup(::Type{<:Union{Int128,UInt128}}) = UInt128
 
+@noinline empty_collection_error() = throw(ArgumentError("collection must be non-empty"))
+
+
 #### Fast
 
 struct SamplerRangeFast{U<:BitUnsigned,T<:BitInteger} <: Sampler{T}
@@ -210,7 +245,7 @@ SamplerRangeFast(r::AbstractUnitRange{T}) where T<:BitInteger =
     SamplerRangeFast(r, uint_sup(T))
 
 function SamplerRangeFast(r::AbstractUnitRange{T}, ::Type{U}) where {T,U}
-    isempty(r) && throw(ArgumentError("collection must be non-empty"))
+    isempty(r) && empty_collection_error()
     m = (last(r) - first(r)) % unsigned(T) % U # % unsigned(T) to not propagate sign bit
     bw = (Base.top_set_bit(m)) % UInt # bit-width
     mask = ((1 % U) << bw) - (1 % U)
@@ -262,7 +297,7 @@ rem_knuth(a::T, b::T) where {T<:Unsigned} = b != 0 ? a % b : a
 # maximum multiple of k <= sup decremented by one,
 # that is 0xFFFF...FFFF if k = (typemax(T) - typemin(T)) + 1 and sup == typemax(T) - 1
 # with intentional underflow
-# see http://stackoverflow.com/questions/29182036/integer-arithmetic-add-1-to-uint-max-and-divide-by-n-without-overflow
+# see https://stackoverflow.com/questions/29182036/integer-arithmetic-add-1-to-uint-max-and-divide-by-n-without-overflow
 
 # sup == 0 means typemax(T) + 1
 maxmultiple(k::T, sup::T=zero(T)) where {T<:Unsigned} =
@@ -284,7 +319,7 @@ SamplerRangeInt(r::AbstractUnitRange{T}) where T<:BitInteger =
     SamplerRangeInt(r, uint_sup(T))
 
 function SamplerRangeInt(r::AbstractUnitRange{T}, ::Type{U}) where {T,U}
-    isempty(r) && throw(ArgumentError("collection must be non-empty"))
+    isempty(r) && empty_collection_error()
     a = first(r)
     m = (last(r) - first(r)) % unsigned(T) % U
     k = m + one(U)
@@ -330,7 +365,7 @@ struct SamplerRangeNDL{U<:Unsigned,T} <: Sampler{T}
 end
 
 function SamplerRangeNDL(r::AbstractUnitRange{T}) where {T}
-    isempty(r) && throw(ArgumentError("collection must be non-empty"))
+    isempty(r) && empty_collection_error()
     a = first(r)
     U = uint_sup(T)
     s = (last(r) - first(r)) % unsigned(T) % U + one(U) # overflow ok
@@ -343,16 +378,20 @@ function rand(rng::AbstractRNG, sp::SamplerRangeNDL{U,T}) where {U,T}
     s = sp.s
     x = widen(rand(rng, U))
     m = x * s
-    l = m % U
-    if l < s
-        t = mod(-s, s) # as s is unsigned, -s is equal to 2^L - s in the paper
-        while l < t
-            x = widen(rand(rng, U))
-            m = x * s
-            l = m % U
-        end
+    r::T = (m % U) < s ? rand_unlikely(rng, s, m) % T :
+           iszero(s)   ? x % T :
+                         (m >> (8*sizeof(U))) % T
+    r + sp.a
+end
+
+# similar to `randn_unlikely` : splitting this unlikely path out results in faster code
+@noinline function rand_unlikely(rng, s::U, m)::U where {U}
+    t = mod(-s, s) # as s is unsigned, -s is equal to 2^L - s in the paper
+    while (m % U) < t
+        x = widen(rand(rng, U))
+        m = x * s
     end
-    (s == 0 ? x : m >> (8*sizeof(U))) % T + sp.a
+    (m >> (8*sizeof(U))) % U
 end
 
 
@@ -369,7 +408,7 @@ end
 function SamplerBigInt(::Type{RNG}, r::AbstractUnitRange{BigInt}, N::Repetition=Val(Inf)
                        ) where {RNG<:AbstractRNG}
     m = last(r) - first(r)
-    m.size < 0 && throw(ArgumentError("collection must be non-empty"))
+    m.size < 0 && empty_collection_error()
     nlimbs = Int(m.size)
     hm = nlimbs == 0 ? Limb(0) : GC.@preserve m unsafe_load(m.d, nlimbs)
     highsp = Sampler(RNG, Limb(0):hm, N)
@@ -425,7 +464,7 @@ rand(rng::AbstractRNG, sp::SamplerSimple{<:AbstractArray,<:Sampler}) =
 ## random values from Dict
 
 function Sampler(::Type{RNG}, t::Dict, ::Repetition) where RNG<:AbstractRNG
-    isempty(t) && throw(ArgumentError("collection must be non-empty"))
+    isempty(t) && empty_collection_error()
     # we use Val(Inf) below as rand is called repeatedly internally
     # even for generating only one random value from t
     SamplerSimple(t, Sampler(RNG, LinearIndices(t.slots), Val(Inf)))
@@ -438,6 +477,12 @@ function rand(rng::AbstractRNG, sp::SamplerSimple{<:Dict,<:Sampler})
     end
 end
 
+rand(rng::AbstractRNG, sp::SamplerTrivial{<:Base.KeySet{<:Any,<:Dict}}) =
+    rand(rng, sp[].dict).first
+
+rand(rng::AbstractRNG, sp::SamplerTrivial{<:Base.ValueIterator{<:Dict}}) =
+    rand(rng, sp[].dict).second
+
 ## random values from Set
 
 Sampler(::Type{RNG}, t::Set{T}, n::Repetition) where {RNG<:AbstractRNG,T} =
@@ -448,7 +493,7 @@ rand(rng::AbstractRNG, sp::SamplerTag{<:Set,<:Sampler}) = rand(rng, sp.data).fir
 ## random values from BitSet
 
 function Sampler(RNG::Type{<:AbstractRNG}, t::BitSet, n::Repetition)
-    isempty(t) && throw(ArgumentError("collection must be non-empty"))
+    isempty(t) && empty_collection_error()
     SamplerSimple(t, Sampler(RNG, minimum(t):maximum(t), Val(Inf)))
 end
 
diff --git a/stdlib/Random/src/misc.jl b/stdlib/Random/src/misc.jl
index b128a7c47451b..0e7f8717387ee 100644
--- a/stdlib/Random/src/misc.jl
+++ b/stdlib/Random/src/misc.jl
@@ -17,16 +17,14 @@ Generate a `BitArray` of random boolean values.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> bitrand(rng, 10)
+julia> bitrand(Xoshiro(123), 10)
 10-element BitVector:
  0
- 0
- 0
+ 1
  0
  1
  0
+ 1
  0
  0
  1
@@ -55,8 +53,8 @@ number generator, see [Random Numbers](@ref).
 julia> Random.seed!(3); randstring()
 "Lxz5hUwn"
 
-julia> randstring(MersenneTwister(3), 'a':'z', 6)
-"ocucay"
+julia> randstring(Xoshiro(3), 'a':'z', 6)
+"iyzcsm"
 
 julia> randstring("ACGT")
 "TGCTCCTC"
@@ -99,7 +97,7 @@ end
 #  size-m subset of A where m is fixed!)
 function randsubseq!(r::AbstractRNG, S::AbstractArray, A::AbstractArray, p::Real)
     require_one_based_indexing(S, A)
-    0 <= p <= 1 || throw(ArgumentError("probability $p not in [0,1]"))
+    0 <= p <= 1 || _throw_argerror(LazyString("probability ", p, " not in [0,1]"))
     n = length(A)
     p == 1 && return copyto!(resize!(S, n), A)
     empty!(S)
@@ -141,19 +139,17 @@ Like [`randsubseq`](@ref), but the results are stored in `S`
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
 julia> S = Int64[];
 
-julia> randsubseq!(rng, S, 1:8, 0.3)
+julia> randsubseq!(Xoshiro(123), S, 1:8, 0.3)
 2-element Vector{Int64}:
+ 4
  7
- 8
 
 julia> S
 2-element Vector{Int64}:
+ 4
  7
- 8
 ```
 """
 randsubseq!(S::AbstractArray, A::AbstractArray, p::Real) = randsubseq!(default_rng(), S, A, p)
@@ -171,24 +167,45 @@ large.) Technically, this process is known as "Bernoulli sampling" of `A`.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> randsubseq(rng, 1:8, 0.3)
+julia> randsubseq(Xoshiro(123), 1:8, 0.3)
 2-element Vector{Int64}:
+ 4
  7
- 8
 ```
 """
 randsubseq(A::AbstractArray, p::Real) = randsubseq(default_rng(), A, p)
 
 
-## rand Less Than Masked 52 bits (helper function)
-
-"Return a sampler generating a random `Int` (masked with `mask`) in ``[0, n)``, when `n <= 2^52`."
-ltm52(n::Int, mask::Int=nextpow(2, n)-1) = LessThan(n-1, Masked(mask, UInt52Raw(Int)))
-
 ## shuffle & shuffle!
 
+function shuffle(rng::AbstractRNG, tup::NTuple{N}) where {N}
+    # `@inline` and `@inbounds` are here to help escape analysis eliminate the `Memory` allocation
+    #
+    # * `@inline` might be necessary because escape analysis relies on everything
+    #   touching the `Memory` being inlined because there's no interprocedural escape
+    #   analysis yet, relevant WIP PR: https://github.com/JuliaLang/julia/pull/56849
+    #
+    # * `@inbounds` might be necessary because escape analysis requires any throws of
+    #   `BoundsError` to be eliminated as dead code, because `BoundsError` stores the
+    #   array itself, making the throw escape the array from the function, relevant
+    #   WIP PR: https://github.com/JuliaLang/julia/pull/56167
+    @inline let
+        # use a narrow integer type to save stack space and prevent heap allocation
+        Ind = if N ≤ typemax(UInt8)
+            UInt8
+        elseif N ≤ typemax(UInt16)
+            UInt16
+        else
+            UInt
+        end
+        mem = @inbounds randperm!(rng, Memory{Ind}(undef, N))
+        function closure(i::Int)
+            @inbounds tup[mem[i]]
+        end
+        ntuple(closure, Val{N}())
+    end
+end
+
 """
     shuffle!([rng=default_rng(),] v::AbstractArray)
 
@@ -197,72 +214,70 @@ optionally supplying the random-number generator `rng`.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> shuffle!(rng, Vector(1:16))
-16-element Vector{Int64}:
-  2
- 15
-  5
- 14
-  1
-  9
- 10
-  6
- 11
-  3
- 16
-  7
-  4
- 12
-  8
- 13
+julia> shuffle!(Xoshiro(0), Vector(1:6))
+6-element Vector{Int64}:
+ 5
+ 1
+ 2
+ 6
+ 3
+ 4
 ```
 """
-function shuffle!(r::AbstractRNG, a::AbstractArray)
+function shuffle!(rng::AbstractRNG, a::AbstractArray)
+    # keep it consistent with `randperm!` and `randcycle!` if possible
     require_one_based_indexing(a)
-    n = length(a)
-    n <= 1 && return a # nextpow below won't work with n == 0
-    @assert n <= Int64(2)^52
-    mask = nextpow(2, n) - 1
-    for i = n:-1:2
-        (mask >> 1) == i && (mask >>= 1)
-        j = 1 + rand(r, ltm52(i, mask))
+    @inbounds for i = 2:length(a)
+        j = rand(rng, 1:i)
         a[i], a[j] = a[j], a[i]
     end
     return a
 end
 
+function shuffle!(r::AbstractRNG, a::AbstractArray{Bool})
+    old_count = count(a)
+    len = length(a)
+    uncommon_value = 2old_count <= len
+    fuel = uncommon_value ? old_count : len - old_count
+    fuel == 0 && return a
+    a .= !uncommon_value
+    while fuel > 0
+        k = rand(r, eachindex(a))
+        fuel -= a[k] != uncommon_value
+        a[k] = uncommon_value
+    end
+    a
+end
+
 shuffle!(a::AbstractArray) = shuffle!(default_rng(), a)
 
 """
-    shuffle([rng=default_rng(),] v::AbstractArray)
+    shuffle([rng=default_rng(),] v::Union{NTuple,AbstractArray})
 
 Return a randomly permuted copy of `v`. The optional `rng` argument specifies a random
 number generator (see [Random Numbers](@ref)).
 To permute `v` in-place, see [`shuffle!`](@ref). To obtain randomly permuted
 indices, see [`randperm`](@ref).
 
+!!! compat "Julia 1.13"
+    Shuffling an `NTuple` value requires Julia v1.13 or above.
+
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> shuffle(rng, Vector(1:10))
-10-element Vector{Int64}:
-  6
-  1
- 10
-  2
-  3
-  9
-  5
-  7
-  4
-  8
+julia> shuffle(Xoshiro(0), 1:6)
+6-element Vector{Int64}:
+ 5
+ 1
+ 2
+ 6
+ 3
+ 4
 ```
 """
+function shuffle end
+
 shuffle(r::AbstractRNG, a::AbstractArray) = shuffle!(r, copymutable(a))
-shuffle(a::AbstractArray) = shuffle(default_rng(), a)
+shuffle(a::Union{NTuple, AbstractArray}) = shuffle(default_rng(), a)
 
 shuffle(r::AbstractRNG, a::Base.OneTo) = randperm(r, last(a))
 
@@ -285,53 +300,59 @@ To randomly permute an arbitrary vector, see [`shuffle`](@ref) or
 
 # Examples
 ```jldoctest
-julia> randperm(MersenneTwister(1234), 4)
-4-element Vector{Int64}:
- 2
+julia> randperm(Xoshiro(0), 6)
+6-element Vector{Int64}:
+ 5
  1
- 4
+ 2
+ 6
  3
+ 4
 ```
 """
 randperm(r::AbstractRNG, n::T) where {T <: Integer} = randperm!(r, Vector{T}(undef, n))
 randperm(n::Integer) = randperm(default_rng(), n)
 
 """
-    randperm!([rng=default_rng(),] A::Array{<:Integer})
+    randperm!([rng=default_rng(),] A::AbstractArray{<:Integer})
 
 Construct in `A` a random permutation of length `length(A)`. The
 optional `rng` argument specifies a random number generator (see
 [Random Numbers](@ref)). To randomly permute an arbitrary vector, see
 [`shuffle`](@ref) or [`shuffle!`](@ref).
 
+!!! compat "Julia 1.13"
+    `A isa Array` was required prior to Julia v1.13.
+
 # Examples
 ```jldoctest
-julia> randperm!(MersenneTwister(1234), Vector{Int}(undef, 4))
-4-element Vector{Int64}:
- 2
+julia> randperm!(Xoshiro(0), Vector{Int}(undef, 6))
+6-element Vector{Int64}:
+ 5
  1
- 4
+ 2
+ 6
  3
+ 4
 ```
 """
-function randperm!(r::AbstractRNG, a::Array{<:Integer})
+function randperm!(rng::AbstractRNG, a::AbstractArray{<:Integer})
+    # keep it consistent with `shuffle!` and `randcycle!` if possible
+    Base.require_one_based_indexing(a)
     n = length(a)
-    @assert n <= Int64(2)^52
     n == 0 && return a
     a[1] = 1
-    mask = 3
     @inbounds for i = 2:n
-        j = 1 + rand(r, ltm52(i, mask))
+        j = rand(rng, 1:i)
         if i != j # a[i] is undef (and could be #undef)
             a[i] = a[j]
         end
         a[j] = i
-        i == 1+mask && (mask = 2mask + 1)
     end
     return a
 end
 
-randperm!(a::Array{<:Integer}) = randperm!(default_rng(), a)
+randperm!(a::AbstractArray{<:Integer}) = randperm!(default_rng(), a)
 
 
 ## randcycle & randcycle!
@@ -343,19 +364,25 @@ Construct a random cyclic permutation of length `n`. The optional `rng`
 argument specifies a random number generator, see [Random Numbers](@ref).
 The element type of the result is the same as the type of `n`.
 
+Here, a "cyclic permutation" means that all of the elements lie within
+a single cycle.  If `n > 0`, there are ``(n-1)!`` possible cyclic permutations,
+which are sampled uniformly.  If `n == 0`, `randcycle` returns an empty vector.
+
+[`randcycle!`](@ref) is an in-place variant of this function.
+
 !!! compat "Julia 1.1"
-    In Julia 1.1 and above, `randcycle` returns a vector `v` with 
+    In Julia 1.1 and above, `randcycle` returns a vector `v` with
     `eltype(v) == typeof(n)` while in Julia 1.0 `eltype(v) == Int`.
 
 # Examples
 ```jldoctest
-julia> randcycle(MersenneTwister(1234), 6)
+julia> randcycle(Xoshiro(0), 6)
 6-element Vector{Int64}:
- 3
  5
+ 1
  4
  6
- 1
+ 3
  2
 ```
 """
@@ -363,37 +390,46 @@ randcycle(r::AbstractRNG, n::T) where {T <: Integer} = randcycle!(r, Vector{T}(u
 randcycle(n::Integer) = randcycle(default_rng(), n)
 
 """
-    randcycle!([rng=default_rng(),] A::Array{<:Integer})
+    randcycle!([rng=default_rng(),] A::AbstractArray{<:Integer})
 
-Construct in `A` a random cyclic permutation of length `length(A)`.
+Construct in `A` a random cyclic permutation of length `n = length(A)`.
 The optional `rng` argument specifies a random number generator, see
 [Random Numbers](@ref).
 
+Here, a "cyclic permutation" means that all of the elements lie within a single cycle.
+If `A` is nonempty (`n > 0`), there are ``(n-1)!`` possible cyclic permutations,
+which are sampled uniformly.  If `A` is empty, `randcycle!` leaves it unchanged.
+
+[`randcycle`](@ref) is a variant of this function that allocates a new vector.
+
+!!! compat "Julia 1.13"
+    `A isa Array` was required prior to Julia v1.13.
+
 # Examples
 ```jldoctest
-julia> randcycle!(MersenneTwister(1234), Vector{Int}(undef, 6))
+julia> randcycle!(Xoshiro(0), Vector{Int}(undef, 6))
 6-element Vector{Int64}:
- 3
  5
+ 1
  4
  6
- 1
+ 3
  2
 ```
 """
-function randcycle!(r::AbstractRNG, a::Array{<:Integer})
+function randcycle!(rng::AbstractRNG, a::AbstractArray{<:Integer})
+    # keep it consistent with `shuffle!` and `randperm!` if possible
+    Base.require_one_based_indexing(a)
     n = length(a)
     n == 0 && return a
-    @assert n <= Int64(2)^52
     a[1] = 1
-    mask = 3
+    # Sattolo's algorithm:
     @inbounds for i = 2:n
-        j = 1 + rand(r, ltm52(i-1, mask))
+        j = rand(rng, 1:i-1)
         a[i] = a[j]
         a[j] = i
-        i == 1+mask && (mask = 2mask + 1)
     end
     return a
 end
 
-randcycle!(a::Array{<:Integer}) = randcycle!(default_rng(), a)
+randcycle!(a::AbstractArray{<:Integer}) = randcycle!(default_rng(), a)
diff --git a/stdlib/Random/src/normal.jl b/stdlib/Random/src/normal.jl
index c2738653a0438..267d9db48fee8 100644
--- a/stdlib/Random/src/normal.jl
+++ b/stdlib/Random/src/normal.jl
@@ -3,7 +3,7 @@
 # Normally distributed random numbers using Ziggurat algorithm
 
 # The Ziggurat Method for generating random variables - Marsaglia and Tsang
-# Paper and reference code: http://www.jstatsoft.org/v05/i08/
+# Paper and reference code: https://www.jstatsoft.org/v05/i08/
 
 # randmtzig (covers also exponential variates)
 
@@ -14,27 +14,51 @@
 
 Generate a normally-distributed random number of type `T`
 with mean 0 and standard deviation 1.
-Optionally generate an array of normally-distributed random numbers.
-The `Base` module currently provides an implementation for the types
-[`Float16`](@ref), [`Float32`](@ref), and [`Float64`](@ref) (the default), and their
-[`Complex`](@ref) counterparts. When the type argument is complex, the values are drawn
-from the circularly symmetric complex normal distribution of variance 1 (corresponding to real and imaginary part having independent normal distribution with mean zero and variance `1/2`).
+Given the optional `dims` argument(s), generate an array of size `dims` of such numbers.
+Julia's standard library supports `randn` for any floating-point type
+that implements [`rand`](@ref), e.g. the `Base` types
+[`Float16`](@ref), [`Float32`](@ref), [`Float64`](@ref) (the default), and [`BigFloat`](@ref),
+along with their [`Complex`](@ref) counterparts.
+
+(When `T` is complex, the values are drawn
+from the circularly symmetric complex normal distribution of variance 1, corresponding to real and imaginary parts
+having independent normal distribution with mean zero and variance `1/2`).
 
 See also [`randn!`](@ref) to act in-place.
 
 # Examples
+
+Generating a single random number (with the default `Float64` type):
+
+```julia-repl
+julia> randn()
+-0.942481877315864
+```
+
+Generating a matrix of normal random numbers (with the default `Float64` type):
+
+```julia-repl
+julia> randn(2,3)
+2×3 Matrix{Float64}:
+  1.18786   -0.678616   1.49463
+ -0.342792  -0.134299  -1.45005
+```
+
+Setting up of the random number generator `rng` with a user-defined seed (for reproducible numbers)
+and using it to generate a random `Float32` number or a matrix of `ComplexF32` random numbers:
+
 ```jldoctest
 julia> using Random
 
-julia> rng = MersenneTwister(1234);
+julia> rng = Xoshiro(123);
 
-julia> randn(rng, ComplexF64)
-0.6133070881429037 - 0.6376291670853887im
+julia> randn(rng, Float32)
+-0.6457307f0
 
 julia> randn(rng, ComplexF32, (2, 3))
 2×3 Matrix{ComplexF32}:
- -0.349649-0.638457im  0.376756-0.192146im  -0.396334-0.0136413im
-  0.611224+1.56403im   0.355204-0.365563im  0.0905552+1.31012im
+  -1.03467-1.14806im  0.693657+0.056538im   0.291442+0.419454im
+ -0.153912+0.34807im    1.0954-0.948661im  -0.543347-0.0538589im
 ```
 """
 @inline function randn(rng::AbstractRNG=default_rng())
@@ -72,8 +96,8 @@ end
 @noinline function randn_unlikely(rng, idx, rabs, x)
     @inbounds if idx == 0
         while true
-            xx = -ziggurat_nor_inv_r*log(rand(rng))
-            yy = -log(rand(rng))
+            xx = -ziggurat_nor_inv_r*log1p(-rand(rng))
+            yy = -log1p(-rand(rng))
             yy+yy > xx*xx &&
                 return (rabs >> 8) % Bool ? -ziggurat_nor_r-xx : ziggurat_nor_r+xx
         end
@@ -114,16 +138,16 @@ The `Base` module currently provides an implementation for the types
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> rng = Xoshiro(123);
 
 julia> randexp(rng, Float32)
-2.4835055f0
+1.1757717f0
 
 julia> randexp(rng, 3, 3)
 3×3 Matrix{Float64}:
- 1.5167    1.30652   0.344435
- 0.604436  2.78029   0.418516
- 0.695867  0.693292  0.643644
+ 1.37766  0.456653  0.236418
+ 3.40007  0.229917  0.0684921
+ 0.48096  0.577481  0.71835
 ```
 """
 randexp(rng::AbstractRNG=default_rng()) = _randexp(rng, rand(rng, UInt52Raw()))
@@ -140,7 +164,7 @@ end
 
 @noinline function randexp_unlikely(rng, idx, x)
     @inbounds if idx == 0
-        return ziggurat_exp_r - log(rand(rng))
+        return ziggurat_exp_r - log1p(-rand(rng))
     elseif (fe[idx] - fe[idx+1])*rand(rng) + fe[idx+1] < exp(-x)
         return x # return from the triangular area
     else
@@ -162,15 +186,13 @@ Also see the [`rand`](@ref) function.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> randn!(rng, zeros(5))
+julia> randn!(Xoshiro(123), zeros(5))
 5-element Vector{Float64}:
-  0.8673472019512456
- -0.9017438158568171
- -0.4944787535042339
- -0.9029142938652416
-  0.8644013132535154
+ -0.6457306721039767
+ -1.4632513788889214
+ -1.6236037455860806
+ -0.21766510678354617
+  0.4922456865251828
 ```
 """
 function randn! end
@@ -183,15 +205,13 @@ Fill the array `A` with random numbers following the exponential distribution
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> randexp!(rng, zeros(5))
+julia> randexp!(Xoshiro(123), zeros(5))
 5-element Vector{Float64}:
- 2.4835053723904896
- 1.516703605376473
- 0.6044364871025417
- 0.6958665886385867
- 1.3065196315496677
+ 1.1757716836348473
+ 1.758884569451514
+ 1.0083623637301151
+ 0.3510644315565272
+ 0.6348266443720407
 ```
 """
 function randexp! end
diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl
index 3f570d862b743..6b7f30622a411 100644
--- a/stdlib/Random/test/runtests.jl
+++ b/stdlib/Random/test/runtests.jl
@@ -3,101 +3,88 @@
 using Test, SparseArrays
 using Test: guardseed
 
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
 using Random
 using Random.DSFMT
 
-using Random: Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I
+using Random: default_rng, Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I
+using Random: jump_128, jump_192, jump_128!, jump_192!, SeedHasher
 
+import SHA
 import Future # randjump
 
-@testset "Issue #6573" begin
-    Random.seed!(0)
-    rand()
-    x = rand(384)
-    @test findall(x .== rand()) == []
+function test_uniform(xs::AbstractArray{T}) where {T<:AbstractFloat}
+    # TODO: refine
+    prec = isempty(xs) ? precision(T) : precision(first(xs))
+    proba_nocollision = prod((1.0 - i/2.0^prec for i=1:length(xs)-1), init=1.0) # rough estimate
+    xsu = Set(xs)
+    if (1.0 - proba_nocollision) < 2.0^-64
+        @test length(xsu) == length(xs)
+    elseif prec > 52 && length(xs) < 3000
+        # if proba of collisions is high enough, allow at most one collision;
+        # with the constraints on precision and length, more than one collision would happen
+        # with proba less than 2.0^-62
+        @test length(xsu) >= length(xs)-1
+    end
+    @test all(x -> zero(x) <= x < one(x), xs)
 end
 
-@test rand() != rand()
-@test 0.0 <= rand() < 1.0
-@test rand(UInt32) >= 0
-@test -10 <= rand(-10:-5) <= -5
-@test -10 <= rand(-10:5) <= 5
-@test minimum([rand(Int32(1):Int32(7^7)) for i = 1:100000]) > 0
-@test typeof(rand(false:true)) === Bool
-@test typeof(rand(Char)) === Char
-@test length(randn(4, 5)) == 20
-@test length(randn(ComplexF64, 4, 5)) == 20
-@test length(bitrand(4, 5)) == 20
-
-@test rand(MersenneTwister(0)) == 0.8236475079774124
-@test rand(MersenneTwister(42)) == 0.5331830160438613
-# Try a seed larger than 2^32
-@test rand(MersenneTwister(5294967296)) == 0.3498809918210497
-
-# Test array filling, Issues #7643, #8360
-@test rand(MersenneTwister(0), 1) == [0.8236475079774124]
-let A = zeros(2, 2)
-    rand!(MersenneTwister(0), A)
-    @test A == [0.8236475079774124  0.16456579813368521;
-                0.9103565379264364  0.17732884646626457]
-end
-let A = zeros(2, 2)
-    @test_throws MethodError rand!(MersenneTwister(0), A, 5)
-    @test rand(MersenneTwister(0), Int64, 1) == [-3433174948434291912]
-end
-let A = zeros(Int64, 2, 2)
-    rand!(MersenneTwister(0), A)
-    @test A == [858542123778948672  5715075217119798169;
-                8690327730555225005 8435109092665372532]
+function test_uniform(xs::AbstractArray{T}) where {T<:Base.BitInteger}
+    # TODO: refine
+    prec = 8*sizeof(T)
+    proba_nocollision = prod((1.0 - i/2.0^prec for i=1:length(xs)-1), init=1.0)
+    xsu = Set(xs)
+    if (1.0 - proba_nocollision) < 2.0^-64
+        @test length(xsu) == length(xs)
+    elseif prec > 52 && length(xs) < 3000
+        @test length(xsu) >= length(xs)-1
+    end
 end
 
-# rand from AbstractArray
-let mt = MersenneTwister()
-    @test rand(mt, 0:3:1000) in 0:3:1000
-    @test issubset(rand!(mt, Vector{Int}(undef, 100), 0:3:1000), 0:3:1000)
-    coll = Any[2, UInt128(128), big(619), "string"]
-    @test rand(mt, coll) in coll
-    @test issubset(rand(mt, coll, 2, 3), coll)
-
-    # check API with default RNG:
-    rand(0:3:1000)
-    rand!(Vector{Int}(undef, 100), 0:3:1000)
-    rand(coll)
-    rand(coll, 2, 3)
-end
 
-# randn
-@test randn(MersenneTwister(42)) == -0.5560268761463861
-let A = zeros(2, 2)
-    randn!(MersenneTwister(42), A)
-    @test A == [-0.5560268761463861  0.027155338009193845;
-                -0.444383357109696  -0.29948409035891055]
+@testset "MersenneTwister: do not do update the same global state in incompatible ways" begin
+    # Issue #6573
+    mm = MersenneTwister(rand(UInt128))
+    rand(mm)
+    xs = rand(mm, 384)
+    @test rand(mm) ∉ xs
+    test_uniform(xs)
 end
 
-let B = zeros(ComplexF64, 2)
-    randn!(MersenneTwister(42), B)
-    @test B == [ComplexF64(-0.5560268761463861,-0.444383357109696),
-                ComplexF64(0.027155338009193845,-0.29948409035891055)] * 0.7071067811865475244008
-end
+@testset "rand from AbstractArray" begin
+    seed = rand(UInt128)
+    for rng ∈ ([MersenneTwister(seed)], [Xoshiro(seed)], [])
+        # issue 8257
+        i8257 = 1:1/3:100
+        for _ = 1:100
+            @test rand(rng... ,i8257) in i8257
+        end
 
-for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt,
-          Float16, Float32, Float64, Rational{Int})
-    r = rand(convert(T, 97):convert(T, 122))
-    @test typeof(r) == T
-    @test 97 <= r <= 122
-    r = rand(convert(T, 97):convert(T,2):convert(T, 122),2)[1]
-    @test typeof(r) == T
-    @test 97 <= r <= 122
-    @test mod(r,2)==1
-
-    if T<:Integer && !(T===BigInt)
-        x = rand(typemin(T):typemax(T))
-        @test isa(x,T)
-        @test typemin(T) <= x <= typemax(T)
+        @test rand(rng..., 0:3:1000) in 0:3:1000
+        @test issubset(rand!(rng..., Vector{Int}(undef, 100), 0:3:1000), 0:3:1000)
+        coll = Any[2, UInt128(128), big(619), "string"]
+        @test rand(rng..., coll) in coll
+        @test issubset(rand(rng..., coll, 2, 3), coll)
+
+        for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt,
+                  Float16, Float32, Float64, Rational{Int})
+            r = rand(rng..., convert(T, 97):convert(T, 122))
+            @test typeof(r) == T
+            @test 97 <= r <= 122
+            r = rand(rng..., convert(T, 97):convert(T,2):convert(T, 122),2)[1]
+            @test typeof(r) == T
+            @test 97 <= r <= 122
+            @test mod(r,2)==1
+
+            if T<:Integer && !(T===BigInt)
+                x = rand(rng..., typemin(T):typemax(T))
+                @test isa(x,T)
+                @test typemin(T) <= x <= typemax(T)
+            end
+        end
     end
 end
 
@@ -227,54 +214,35 @@ for U in (Int64, UInt64)
               for k in 13 .+ Int64(2).^(1:30))
 end
 
-#issue 8257
-let i8257 = 1:1/3:100
-    for i = 1:100
-        @test rand(i8257) in i8257
-    end
-end
-
-# test code paths of rand!
-
-let mt = MersenneTwister(0)
-    A128 = Vector{UInt128}()
+@testset "test code paths of rand!(::MersenneTwister)" begin
+    mt = MersenneTwister(rand(UInt128))
+    A128 = UInt128[]
     @test length(rand!(mt, A128)) == 0
-    for (i,n) in enumerate([1, 3, 5, 6, 10, 11, 30])
+    for (i, n) in enumerate([1, 3, 5, 6, 10, 11, 30])
         resize!(A128, n)
         rand!(mt, A128)
         @test length(A128) == n
-        @test A128[end] == UInt128[0x15de6b23025813ad129841f537a04e40,
-                                   0xcfa4db38a2c65bc4f18c07dc91125edf,
-                                   0x33bec08136f19b54290982449b3900d5,
-                                   0xde41af3463e74cb830dad4add353ca20,
-                                   0x066d8695ebf85f833427c93416193e1f,
-                                   0x48fab49cc9fcee1c920d6dae629af446,
-                                   0x4b54632b4619f4eca22675166784d229][i]
-    end
-
-    Random.seed!(mt, 0)
-    Aend = Any[]
-    Bend = Any[]
-    for (i,T) in enumerate([Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, Float16, Float32])
+        test_uniform(A128)
+    end
+
+    for (i, T) in enumerate([Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, Float16, Float32])
         A = Vector{T}(undef, 16)
         B = Vector{T}(undef, 31)
         rand!(mt, A)
         rand!(mt, B)
-        push!(Aend, A[end])
-        push!(Bend, B[end])
+        @test length(A) == 16
+        @test length(B) == 31
+        test_uniform(A)
+        test_uniform(B)
     end
-    @test Aend == Any[21, 0x7b, 17385, 0x3086, -1574090021, 0xadcb4460, 6797283068698303107, 0x68a9f9865393cfd6,
-                      33687499368208574024854346399216845930, Float16(0.7744), 0.97259974f0]
-    @test Bend == Any[49, 0x65, -3725, 0x719d, 814246081, 0xdf61843a, -3433174948434291912, 0xd461716f27c91500,
-                      -85900088726243933988214632401750448432, Float16(0.10645), 0.13879478f0]
 
-    Random.seed!(mt, 0)
     AF64 = Vector{Float64}(undef, Random.dsfmt_get_min_array_size()-1)
-    @test rand!(mt, AF64)[end] == 0.957735065345398
-    @test rand!(mt, AF64)[end] == 0.6492481059865669
+    rand!(mt, AF64)
+    test_uniform(AF64)
     resize!(AF64, 2*length(mt.vals))
-    @test invoke(rand!, Tuple{MersenneTwister,AbstractArray{Float64},Random.SamplerTrivial{Random.CloseOpen01_64}},
-                 mt, AF64, Random.SamplerTrivial(Random.CloseOpen01()))[end]  == 0.1142787906708973
+    invoke(rand!, Tuple{MersenneTwister,AbstractArray{Float64},Random.SamplerTrivial{Random.CloseOpen01_64}},
+           mt, AF64, Random.SamplerTrivial(Random.CloseOpen01()))
+    test_uniform(AF64)
 end
 
 # Issue #9037
@@ -297,7 +265,7 @@ let mt = MersenneTwister(0)
         Random.seed!(mt, 0)
         rand(mt) # this is to fill mt.vals, cf. #9040
         rand!(mt, A) # must not segfault even if Int(pointer(A)) % 16 != 0
-        @test A[end-4:end] == [0.3371041633752143, 0.41147647589610803, 0.6063082992397912, 0.9103565379264364, 0.16456579813368521]
+        test_uniform(A)
     end
 end
 
@@ -330,16 +298,24 @@ for f in (:<, :<=, :>, :>=, :(==), :(!=))
 end
 
 # test all rand APIs
-for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
+for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro(0)], [SeedHasher(0)])
+    realrng = rng == [] ? default_rng() : only(rng)
     ftypes = [Float16, Float32, Float64, FakeFloat64, BigFloat]
     cftypes = [ComplexF16, ComplexF32, ComplexF64, ftypes...]
-    types = [Bool, Char, BigFloat, Base.BitInteger_types..., ftypes...]
+    types = [Bool, Char, BigFloat, Tuple{Bool, Tuple{Int, Char}}, Pair{Int8, UInt32},
+             Base.BitInteger_types..., cftypes...]
     randset = Set(rand(Int, 20))
     randdict = Dict(zip(rand(Int,10), rand(Int, 10)))
+
+    randwidetup = Tuple{Bool, Char, Vararg{Tuple{Int, Float64}, 14}}
+    @inferred rand(rng..., randwidetup)
+
     collections = [BitSet(rand(1:100, 20))          => Int,
                    randset                          => Int,
                    GenericSet(randset)              => Int,
                    randdict                         => Pair{Int,Int},
+                   keys(randdict)                   => Int,
+                   values(randdict)                 => Int,
                    GenericDict(randdict)            => Pair{Int,Int},
                    1:100                            => Int,
                    rand(Int, 100)                   => Int,
@@ -354,42 +330,56 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
     b2 = big(2)
     u3 = UInt(3)
     for f in [rand, randn, randexp]
-        f(rng...)                     ::Float64
-        f(rng..., 5)                  ::Vector{Float64}
-        f(rng..., 2, 3)               ::Array{Float64, 2}
-        f(rng..., b2, u3)             ::Array{Float64, 2}
+        f1 = f(rng...)                     ::Float64
+        f2 = f(rng..., 5)                  ::Vector{Float64}
+        f3 = f(rng..., 2, 3)               ::Array{Float64, 2}
+        f4 = f(rng..., b2, u3)             ::Array{Float64, 2}
+        @test size(f1) == ()
+        @test size(f2) == (5,)
+        @test size(f3) == size(f4) == (2, 3)
         for T in functypes[f]
-            a0 = f(rng..., T)         ::T
-            a1 = f(rng..., T, 5)      ::Vector{T}
-            a2 = f(rng..., T, 2, 3)   ::Array{T, 2}
-            a3 = f(rng..., T, b2, u3) ::Array{T, 2}
-            a4 = f(rng..., T, (2, 3)) ::Array{T, 2}
-            if T <: AbstractFloat && f === rand
-                for a in [a0, a1..., a2..., a3..., a4...]
-                    @test 0.0 <= a < 1.0
+            tts = f == rand ? (T, Sampler(realrng, T, Val(1)), Sampler(realrng, T, Val(Inf))) : (T,)
+            for tt in tts
+                a0 = f(rng..., tt)         ::T
+                a1 = f(rng..., tt, 5)      ::Vector{T}
+                a2 = f(rng..., tt, 2, 3)   ::Array{T, 2}
+                a3 = f(rng..., tt, b2, u3) ::Array{T, 2}
+                a4 = f(rng..., tt, (2, 3)) ::Array{T, 2}
+                if T <: Number
+                    @test size(a0) == ()
+                end
+                @test size(a1) == (5,)
+                @test size(a2) == size(a3) == size(a4) == (2, 3)
+                if T <: AbstractFloat && f === rand
+                    for a in T[a0, a1..., a2..., a3..., a4...]
+                        @test 0.0 <= a < 1.0
+                    end
                 end
             end
         end
     end
     for (C, T) in collections
-        a0  = rand(rng..., C)                                                       ::T
-        a1  = rand(rng..., C, 5)                                                    ::Vector{T}
-        a2  = rand(rng..., C, 2, 3)                                                 ::Array{T, 2}
-        a3  = rand(rng..., C, (2, 3))                                               ::Array{T, 2}
-        a4  = rand(rng..., C, b2, u3)                                               ::Array{T, 2}
-        a5  = rand!(rng..., Array{T}(undef, 5), C)                          ::Vector{T}
-        a6  = rand!(rng..., Array{T}(undef, 2, 3), C)                       ::Array{T, 2}
-        a7  = rand!(rng..., GenericArray{T}(undef, 5), C)                   ::GenericArray{T, 1}
-        a8  = rand!(rng..., GenericArray{T}(undef, 2, 3), C)                ::GenericArray{T, 2}
-        a9  = rand!(rng..., OffsetArray(Array{T}(undef, 5), 9), C)          ::OffsetArray{T, 1}
-        a10 = rand!(rng..., OffsetArray(Array{T}(undef, 2, 3), (-2, 4)), C) ::OffsetArray{T, 2}
-        @test size(a1) == (5,)
-        @test size(a2) == size(a3) == (2, 3)
-        for a in [a0, a1..., a2..., a3..., a4..., a5..., a6..., a7..., a8..., a9..., a10...]
-            if C isa Type
-                @test a isa C
-            else
-                @test a in C
+        for cc = (C, Sampler(realrng, C, Val(1)), Sampler(realrng, C, Val(Inf)))
+            a0  = rand(rng..., cc)                                               ::T
+            a1  = rand(rng..., cc, 5)                                            ::Vector{T}
+            a2  = rand(rng..., cc, 2, 3)                                         ::Array{T, 2}
+            a3  = rand(rng..., cc, (2, 3))                                       ::Array{T, 2}
+            a4  = rand(rng..., cc, b2, u3)                                       ::Array{T, 2}
+            a5  = rand!(rng..., Array{T}(undef, 5), cc)                          ::Vector{T}
+            a6  = rand!(rng..., Array{T}(undef, 2, 3), cc)                       ::Array{T, 2}
+            a7  = rand!(rng..., GenericArray{T}(undef, 5), cc)                   ::GenericArray{T, 1}
+            a8  = rand!(rng..., GenericArray{T}(undef, 2, 3), cc)                ::GenericArray{T, 2}
+            a9  = rand!(rng..., OffsetArray(Array{T}(undef, 5), 9), cc)          ::OffsetArray{T, 1}
+            a10 = rand!(rng..., OffsetArray(Array{T}(undef, 2, 3), (-2, 4)), cc) ::OffsetArray{T, 2}
+            a11 = rand!(rng..., Memory{T}(undef, 5), cc)                         ::Memory{T}
+            @test size(a1) == (5,)
+            @test size(a2) == size(a3) == (2, 3)
+            for a in [a0, a1..., a2..., a3..., a4..., a5..., a6..., a7..., a8..., a9..., a10..., a11...]
+                if C isa Type
+                    @test a isa C
+                else
+                    @test a in C
+                end
             end
         end
     end
@@ -401,15 +391,18 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
     end
     for f! in [rand!, randn!, randexp!]
         for T in functypes[f!]
+            (T <: Tuple || T <: Pair) && continue
             X = T == Bool ? T[0,1] : T[0,1,2]
             for A in (Vector{T}(undef, 5),
+                      Memory{T}(undef, 5),
                       Matrix{T}(undef, 2, 3),
                       GenericArray{T}(undef, 5),
                       GenericArray{T}(undef, 2, 3),
                       OffsetArray(Array{T}(undef, 5), -3),
                       OffsetArray(Array{T}(undef, 2, 3), (4, 5)))
                 local A
-                f!(rng..., A)                    ::typeof(A)
+                A2 = f!(rng..., A)               ::typeof(A)
+                @test A2 === A
                 if f! === rand!
                     f!(rng..., A, X)             ::typeof(A)
                     if A isa Array && T !== Char # Char/Integer comparison
@@ -421,11 +414,16 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
         end
     end
 
-    bitrand(rng..., 5)             ::BitArray{1}
-    bitrand(rng..., 2, 3)          ::BitArray{2}
-    bitrand(rng..., b2, u3)        ::BitArray{2}
-    rand!(rng..., BitVector(undef, 5))     ::BitArray{1}
-    rand!(rng..., BitMatrix(undef, 2, 3))  ::BitArray{2}
+    z1 = bitrand(rng..., 5)             ::BitArray{1}
+    @test size(z1) == (5,)
+    z2 = bitrand(rng..., 2, 3)          ::BitArray{2}
+    @test size(z2) == (2, 3)
+    z3 = bitrand(rng..., b2, u3)        ::BitArray{2}
+    @test size(z3) == (b2, u3)
+    z4 = rand!(rng..., BitVector(undef, 5))     ::BitArray{1}
+    @test size(z4) == (5,)
+    z5 = rand!(rng..., BitMatrix(undef, 2, 3))  ::BitArray{2}
+    @test size(z5) == (2, 3)
 
     # Test that you cannot call randn or randexp with non-Float types.
     for r in [randn, randexp]
@@ -441,6 +439,10 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
         @test_throws MethodError r(rng..., Number, (2,3))
         @test_throws MethodError r(rng..., Any, 1)
     end
+
+    # Test that you cannot call rand with a tuple type of unknown size or with isbits parameters
+    @test_throws ArgumentError rand(rng..., Tuple{Vararg{Int}})
+    @test_throws TypeError rand(rng..., Tuple{1:2})
 end
 
 function hist(X, n)
@@ -451,17 +453,23 @@ function hist(X, n)
     v
 end
 
-# test uniform distribution of floats
-for rng in [MersenneTwister(), RandomDevice(), Xoshiro()],
-    T in [Float16, Float32, Float64, BigFloat],
+@testset "uniform distribution of floats" begin
+    seed = rand(UInt128)
+    for rng in [MersenneTwister(seed), RandomDevice(), Xoshiro(seed), SeedHasher(seed)],
+        T in [Float16, Float32, Float64, BigFloat],
         prec in (T == BigFloat ? [3, 53, 64, 100, 256, 1000] : [256])
-    setprecision(BigFloat, prec) do
-        # array version
-        counts = hist(rand(rng, T, 2000), 4)
-        @test minimum(counts) > 300 # should fail with proba < 1e-26
-        # scalar version
-        counts = hist([rand(rng, T) for i in 1:2000], 4)
-        @test minimum(counts) > 300
+
+        setprecision(BigFloat, prec) do
+            if precision(T) >= precision(Float32)
+                @test rand(rng, T) != rand(rng, T)
+            end
+            # array version
+            counts = hist(rand(rng, T, 2000), 4)
+            @test minimum(counts) > 300 # should fail with proba < 1e-26
+            # scalar version
+            counts = hist([rand(rng, T) for i in 1:2000], 4)
+            @test minimum(counts) > 300
+        end
     end
 end
 
@@ -474,7 +482,8 @@ end
         # but also for 3 linear combinations of positions (for the array version)
         lcs = unique!.([rand(1:n, 2), rand(1:n, 3), rand(1:n, 5)])
         aslcs = zeros(Int, 3)
-        for rng = (MersenneTwister(), RandomDevice(), Xoshiro())
+        seed = rand(UInt128)
+        for rng = (MersenneTwister(seed), RandomDevice(), Xoshiro(seed), SeedHasher(seed))
             for scalar = [false, true]
                 fill!(a, 0)
                 fill!(as, 0)
@@ -523,6 +532,7 @@ end
     @test shuffle!(mta,Vector(1:10)) == shuffle!(mtb,Vector(1:10))
     @test shuffle(mta,Vector(2:11)) == shuffle(mtb,2:11)
     @test shuffle!(mta, rand(mta, 2, 3)) == shuffle!(mtb, rand(mtb, 2, 3))
+    @test shuffle!(mta, rand(mta, Bool, 2, 3)) == shuffle!(mtb, rand(mtb, Bool, 2, 3))
     @test shuffle(mta, rand(mta, 2, 3)) == shuffle(mtb, rand(mtb, 2, 3))
 
     @test randperm(mta,10) == randperm(mtb,10)
@@ -544,6 +554,14 @@ end
     @test randcycle!(mta, A) == randcycle!(mtb, B)
     @test randcycle!(A) === A
 
+    @testset "non-`Array` `randperm!` and `randcycle!`" begin
+        x, y = Memory{Int}(undef, 10), Memory{Int}(undef, 10)
+        @test randperm!(mta, x) == randperm!(mtb, y)
+        @test randperm!(x) === x
+        @test randcycle!(mta, x) == randcycle!(mtb, y)
+        @test randcycle!(x) === x
+    end
+
     let p = randcycle(UInt16(10))
         @test typeof(p) ≡ Vector{UInt16}
         @test sort!(p) == 1:10
@@ -588,72 +606,64 @@ end
     end
 end
 
-# test that the following is not an error (#16925)
-guardseed() do
-    Random.seed!(typemax(UInt))
-    Random.seed!(typemax(UInt128))
-end
-
-# copy, == and hash
-let seed = rand(UInt32, 10)
-    r = MersenneTwister(seed)
-    @test r == MersenneTwister(seed) # r.vals should be all zeros
-    @test hash(r) == hash(MersenneTwister(seed))
-    s = copy(r)
-    @test s == r && s !== r
-    @test hash(s) == hash(r)
-    skip, len = rand(0:2000, 2)
-    for j=1:skip
-        rand(r)
-        rand(s)
-    end
-    @test rand(r, len) == rand(s, len)
-    @test s == r
-    @test hash(s) == hash(r)
-    h = rand(UInt)
-    @test hash(s, h) == hash(r, h)
+@testset "copy, == and hash" begin
+    for RNG = (MersenneTwister, Xoshiro)
+        seed = rand(UInt32, 10)
+        r = RNG(seed)
+        t = RNG(seed)
+        @test r == t
+        @test hash(r) == hash(t)
+        s = copy(r)
+        @test s == r == t && s !== r
+        @test hash(s) == hash(r)
+        skip, len = rand(0:2000, 2)
+        for j=1:skip
+            rand(r)
+            @test r != s
+            @test hash(r) != hash(s)
+            rand(s)
+        end
+        @test rand(r, len) == rand(s, len)
+        @test s == r
+        @test hash(s) == hash(r)
+        h = rand(UInt)
+        @test hash(s, h) == hash(r, h)
+        if RNG == Xoshiro
+            t = copy(TaskLocalRNG())
+            @test hash(t) == hash(TaskLocalRNG())
+            @test hash(t, h) == hash(TaskLocalRNG(), h)
+            x = rand()
+            @test hash(t) != hash(TaskLocalRNG())
+            @test rand(t) == x
+            @test hash(t) == hash(TaskLocalRNG())
+            copy!(TaskLocalRNG(), r)
+            @test hash(TaskLocalRNG()) == hash(r)
+            @test TaskLocalRNG() == r
+        end
+    end
 end
 
 # MersenneTwister initialization with invalid values
 @test_throws DomainError DSFMT.DSFMT_state(zeros(Int32, rand(0:DSFMT.JN32-1)))
 
-@test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
-                                         zeros(Float64, 10), zeros(UInt128, MT_CACHE_I>>4), 0, 0, 0, 0, -1, -1)
-
-@test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
-                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>4), -1, 0, 0, 0, -1, -1)
-
-@test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
-                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>3), 0, 0, 0, 0, -1, -1)
-
-@test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
-                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>4), 0, -1, 0, 0, -1, -1)
-
-# seed is private to MersenneTwister
-let seed = rand(UInt32, 10)
-    r = MersenneTwister(seed)
-    @test r.seed == seed && r.seed !== seed
-    # RNGs do not share their seed in randjump
-    let r2 = Future.randjump(r, big(10)^20)
-        @test  r.seed !== r2.seed
-        Random.seed!(r2)
-        @test seed == r.seed != r2.seed
+@testset "Random.seed!(rng, ...) returns rng" begin
+    # issue #21248
+    seed = rand(UInt)
+    for m = ([MersenneTwister(seed)], [Xoshiro(seed)], [SeedHasher(seed)], [])
+        m2 = m == [] ? default_rng() : m[1]
+        @test Random.seed!(m...) === m2
+        @test Random.seed!(m..., rand(UInt)) === m2
+        @test Random.seed!(m..., rand(UInt32, rand(1:10))) === m2
+        @test Random.seed!(m..., rand(1:10)) === m2
+        # Try a seed larger than 2^32
+        @test Random.seed!(m..., 5294967296) === m2
+
+        # test that the following is not an error (#16925)
+        @test Random.seed!(m..., typemax(UInt)) === m2
+        @test Random.seed!(m..., typemax(UInt128)) === m2
+        @test Random.seed!(m..., "a random seed") === m2
+        @test Random.seed!(m..., Random.default_rng()) === m2
     end
-    resize!(seed, 4)
-    @test r.seed != seed
-end
-
-# Random.seed!(rng, ...) returns rng (#21248)
-guardseed() do
-    g = Random.default_rng()
-    m = MersenneTwister(0)
-    @test Random.seed!() === g
-    @test Random.seed!(rand(UInt)) === g
-    @test Random.seed!(rand(UInt32, rand(1:8))) === g
-    @test Random.seed!(m) === m
-    @test Random.seed!(m, rand(UInt)) === m
-    @test Random.seed!(m, rand(UInt32, rand(1:10))) === m
-    @test Random.seed!(m, rand(1:10)) === m
 end
 
 # Issue 20062 - ensure internal functions reserve_1, reserve are type-stable
@@ -685,7 +695,7 @@ end
 # this shouldn't crash (#22403)
 @test_throws MethodError rand!(Union{UInt,Int}[1, 2, 3])
 
-@testset "$RNG() & Random.seed!(rng::$RNG) initializes randomly" for RNG in (MersenneTwister, RandomDevice, Xoshiro)
+@testset "$RNG() & Random.seed!(rng::$RNG) initializes randomly" for RNG in (MersenneTwister, RandomDevice, Xoshiro, SeedHasher)
     m = RNG()
     a = rand(m, Int)
     m = RNG()
@@ -706,8 +716,8 @@ end
     @test rand(m, Int) ∉ (a, b, c, d)
 end
 
-@testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro)
-    seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...]
+@testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister, Xoshiro, SeedHasher)
+    seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), randstring(), randstring(), rand(UInt128, 3)...]
     if RNG == Xoshiro
         push!(seeds, rand(UInt64, rand(1:4)))
     end
@@ -717,40 +727,27 @@ end
         Random.seed!(m, seed)
         @test a == [rand(m) for _=1:100]
     end
+    # rng as a seed
+    m = RNG(Xoshiro(0))
+    a = [rand(m) for _=1:100]
+    Random.seed!(m, Xoshiro(0))
+    @test a == [rand(m) for _=1:100]
 end
 
 @testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin
-    seeds = Any[0, rand(UInt128), rand(UInt64, 4)]
+    seeds = Any[0, rand(UInt128), rand(UInt64, 4), randstring(20)]
 
     for seed=seeds
         Random.seed!(seed)
-        @test Random.GLOBAL_SEED === seed
+        @test Random.get_tls_seed() == default_rng()
     end
-    # two separate loops as otherwise we are no sure that the second call (with GLOBAL_RNG)
-    # actually sets GLOBAL_SEED
-    for seed=seeds
-        Random.seed!(Random.GLOBAL_RNG, seed)
-        @test Random.GLOBAL_SEED === seed
-    end
-
-    Random.seed!(nothing)
-    seed1 = Random.GLOBAL_SEED
-    @test seed1 isa Vector{UInt64} # could change, but must not be nothing
-
-    Random.seed!(Random.GLOBAL_RNG, nothing)
-    seed2 = Random.GLOBAL_SEED
-    @test seed2 isa Vector{UInt64}
-    @test seed2 != seed1
 
-    Random.seed!()
-    seed3 = Random.GLOBAL_SEED
-    @test seed3 isa Vector{UInt64}
-    @test seed3 != seed2
-
-    Random.seed!(Random.GLOBAL_RNG)
-    seed4 = Random.GLOBAL_SEED
-    @test seed4 isa Vector{UInt64}
-    @test seed4 != seed3
+    for ii = 1:8
+        iseven(ii) ? Random.seed!(nothing) : Random.seed!()
+        push!(seeds, copy(Random.get_tls_seed()))
+        @test Random.get_tls_seed() isa Xoshiro # could change, but must not be nothing
+    end
+    @test allunique(seeds)
 end
 
 struct RandomStruct23964 end
@@ -759,10 +756,24 @@ struct RandomStruct23964 end
     @test_throws MethodError rand(RandomStruct23964())
 end
 
-@testset "rand(::$(typeof(RNG)), ::UnitRange{$T}" for RNG ∈ (MersenneTwister(rand(UInt128)), RandomDevice(), Xoshiro()),
-                                                        T ∈ (Int8, Int16, Int32, UInt32, Int64, Int128, UInt128)
-    for S in (SamplerRangeInt, SamplerRangeFast, SamplerRangeNDL)
-        S == SamplerRangeNDL && sizeof(T) > 8 && continue
+@testset "rand(::$(typeof(RNG)), ::UnitRange{$T}" for RNG ∈ (MersenneTwister(rand(UInt128)),
+                                                             RandomDevice(),
+                                                             Xoshiro(rand(UInt128)),
+                                                             SeedHasher(rand(UInt128))),
+                                                        T ∈ (Bool, Int8, Int16, Int32, UInt32, Int64, Int128, UInt128)
+    if T === Bool
+        @test rand(RNG, false:true) ∈ (false, true)
+        @test rand(RNG, false:false) === false
+        @test rand(RNG, true:true) === true
+        @test_throws ArgumentError rand(RNG, true:false)
+        continue
+    end
+    for S in (identity, SamplerRangeInt, SamplerRangeFast, SamplerRangeNDL)
+        if T === Int32 && RNG isa MersenneTwister
+            @test minimum([rand(RNG, T(1):T(7^7)) for i = 1:100000]) > 0
+        end
+
+        (S == SamplerRangeNDL || S == identity) && sizeof(T) > 8 && continue
         r = T(1):T(108)
         @test rand(RNG, S(r)) ∈ r
         @test rand(RNG, S(typemin(T):typemax(T))) isa T
@@ -803,10 +814,32 @@ end
     end
 end
 
+@testset "rand(::Type{<:Tuple})" begin
+    @test_throws ArgumentError rand(Tuple)
+    @test rand(Tuple{}) == ()
+    @inferred rand(Tuple{Int32,Int64,Float64})
+    @inferred rand(NTuple{20,Int})
+    @test_throws TypeError rand(Tuple{1:2,3:4})
+
+    @testset "rand(::RandomDevice, ::Type{NTuple{N, Int}})" begin
+        # RandomDevice has a specialization for homogeneous tuple types of builtin integers
+        rd = RandomDevice()
+        @test () == rand(rd, Tuple{})
+        xs = rand(rd, Tuple{Int, Int})
+        @test xs isa Tuple{Int, Int} && xs[1] != xs[2]
+        xs = rand(rd, NTuple{2, Int})
+        @test xs isa Tuple{Int, Int} && xs[1] != xs[2]
+        xs = rand(rd, Tuple{Int, UInt}) # not NTuple
+        @test xs isa Tuple{Int, UInt} && xs[1] != xs[2]
+        xs = rand(rd, Tuple{Bool}) # not included in the specialization
+        @test xs isa Tuple{Bool}
+    end
+end
+
 @testset "GLOBAL_RNG" begin
+    @test VERSION < v"2" # deprecate this in v2 (GLOBAL_RNG must go)
     local GLOBAL_RNG = Random.GLOBAL_RNG
     local LOCAL_RNG = Random.default_rng()
-    @test VERSION < v"2" # deprecate this in v2
 
     @test Random.seed!(GLOBAL_RNG, nothing) === LOCAL_RNG
     @test Random.seed!(GLOBAL_RNG, UInt32[0]) === LOCAL_RNG
@@ -869,8 +902,11 @@ end
     @test rand(rng) == rand(GLOBAL_RNG)
 end
 
-@testset "RNGs broadcast as scalars: T" for T in (MersenneTwister, RandomDevice)
-    @test length.(rand.(T(), 1:3)) == 1:3
+@testset "RNGs broadcast as scalars: $(typeof(RNG))" for RNG in (MersenneTwister(0),
+                                                                 RandomDevice(),
+                                                                 Xoshiro(0),
+                                                                 SeedHasher(0))
+    @test length.(rand.(RNG, 1:3)) == 1:3
 end
 
 @testset "generated scalar integers do not overlap" begin
@@ -909,27 +945,28 @@ end
 @testset "show" begin
     @testset "MersenneTwister" begin
         m = MersenneTwister(123)
-        @test string(m) == "MersenneTwister(123)"
+        @test string(m) == "MersenneTwister(0xf80cc98e147960c1fefa8d41b8f5dca5, 0xea7a7dcb2e787c0120e2ccc17662fc1d)"
+        @test m == MersenneTwister(0xf80cc98e147960c1fefa8d41b8f5dca5, 0xea7a7dcb2e787c0120e2ccc17662fc1d)
         Random.jump!(m, 2*big(10)^20)
-        @test string(m) == "MersenneTwister(123, (200000000000000000000, 0))"
-        @test m == MersenneTwister(123, (200000000000000000000, 0))
+        @test string(m) == "MersenneTwister(0xf80cc98e147960c1fefa8d41b8f5dca5, 0xea7a7dcb2e787c0120e2ccc17662fc1d, 200000000000000000000, 0)"
+        @test m == MersenneTwister(0xf80cc98e147960c1fefa8d41b8f5dca5, 0xea7a7dcb2e787c0120e2ccc17662fc1d, 200000000000000000000, 0)
         rand(m)
-        @test string(m) == "MersenneTwister(123, (200000000000000000000, 1002, 0, 1))"
+        @test string(m) == "MersenneTwister(0xf80cc98e147960c1fefa8d41b8f5dca5, 0xea7a7dcb2e787c0120e2ccc17662fc1d, 200000000000000000000, 1002, 0, 1)"
 
-        @test m == MersenneTwister(123, (200000000000000000000, 1002, 0, 1))
+        @test m == MersenneTwister(0xf80cc98e147960c1fefa8d41b8f5dca5, 0xea7a7dcb2e787c0120e2ccc17662fc1d, 200000000000000000000, 1002, 0, 1)
         rand(m, Int64)
-        @test string(m) == "MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))"
-        @test m == MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))
+        @test string(m) == "MersenneTwister(0xf80cc98e147960c1fefa8d41b8f5dca5, 0xea7a7dcb2e787c0120e2ccc17662fc1d, 200000000000000000000, 2256, 0, 1, 1002, 1)"
+        @test m == MersenneTwister(0xf80cc98e147960c1fefa8d41b8f5dca5, 0xea7a7dcb2e787c0120e2ccc17662fc1d, 200000000000000000000, 2256, 0, 1, 1002, 1)
 
         m = MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b)
-        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b)"
+        @test string(m) == "MersenneTwister(0x07a0cc280198a55c39fa6f802d242f8b, 0x8472a002c9dd8879235ae29f67bc7496)"
         rand(m, Int64)
-        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))"
-        @test m == MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))
+        @test string(m) == "MersenneTwister(0x07a0cc280198a55c39fa6f802d242f8b, 0x8472a002c9dd8879235ae29f67bc7496, 0, 1254, 0, 0, 0, 1)"
+        @test m == MersenneTwister(0x07a0cc280198a55c39fa6f802d242f8b, 0x8472a002c9dd8879235ae29f67bc7496, 0, 1254, 0, 0, 0, 1)
 
         m = MersenneTwister(0); rand(m, Int64); rand(m)
-        @test string(m) == "MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))"
-        @test m == MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))
+        @test string(m) == "MersenneTwister(0x48d73dc42d195740db2fa90498613fdf, 0x1911b814c02405e88c49bc52dc8a77ea, 0, 2256, 1254, 1, 0, 1)"
+        @test m == MersenneTwister(0x48d73dc42d195740db2fa90498613fdf, 0x1911b814c02405e88c49bc52dc8a77ea, 0, 2256, 1254, 1, 0, 1)
     end
 
     @testset "RandomDevice" begin
@@ -996,6 +1033,23 @@ end
     @test maximum(m) <= 0.106
 end
 
+@testset "`shuffle(::NTuple)`" begin
+    @testset "sorted" begin
+        for n ∈ 0:20
+            tup = ntuple(identity, n)
+            @test tup === sort(@inferred shuffle(tup))
+        end
+    end
+    @testset "not identity" begin
+        function shuffle_is_identity()
+            tup = ntuple(identity, 9)
+            tup === shuffle(tup)
+        end
+        # shuffling may behave as the identity sometimes, but if it doesn't manage to actually reorder some of the elements at least once, something is wrong
+        @test any((_ -> !shuffle_is_identity()), 1:1000000)
+    end
+end
+
 # issue #42752
 # test that running finalizers that launch tasks doesn't change RNG stream
 function f42752(do_gc::Bool, cell = (()->Any[[]])())
@@ -1065,3 +1119,170 @@ end
         end
     end
 end
+
+@testset "TaskLocalRNG: copy and copy! handle the splitmix state" begin
+    seeds = rand(RandomDevice(), UInt64, 5)
+    for seed in seeds
+        Random.seed!(seed)
+        rng1 = copy(TaskLocalRNG())
+        x = fetch(@async rand(UInt64))
+        rng2 = copy(TaskLocalRNG())
+        y = fetch(@async rand(UInt64))
+        rng3 = copy(TaskLocalRNG())
+        @test x != y
+        @test rng1 != rng2
+        Random.seed!(seed)
+        @test TaskLocalRNG() == rng1
+        @test x == fetch(@async rand(UInt64))
+        @test TaskLocalRNG() == rng2
+        # this should be a no-op:
+        copy!(TaskLocalRNG(), copy(TaskLocalRNG()))
+        @test TaskLocalRNG() == rng2
+        @test y == fetch(@async rand(UInt64))
+        @test TaskLocalRNG() == rng3
+    end
+end
+
+# Xoshiro jumps
+@testset "Xoshiro jump, basic" begin
+    x1 = Xoshiro(1)
+    x2 = Xoshiro(1)
+
+    @test x1 === jump_128!(jump_128!(x1))
+    @test x2 === jump_128!(x2, 2)
+    @test x1 == x2
+
+    xo1 = Xoshiro(0xfff0241072ddab67, 0xc53bc12f4c3f0b4e, 0x56d451780b2dd4ba, 0x50a4aa153d208dd8)
+    @test rand(jump_128(xo1), UInt64) == 0x87c158da8c35824d
+    @test rand(jump_192(xo1), UInt64) == 0xcaecd5afdd0847d5
+
+    @test rand(jump_128(xo1, 98765), UInt64) == 0xcbec1d5053142608
+    @test rand(jump_192(xo1, 98765), UInt64) == 0x3b97a94c44d66216
+
+    # Throws where appropriate
+    @test_throws DomainError jump_128(Xoshiro(1), -1)
+    @test_throws DomainError jump_128!(Xoshiro(1), -1)
+    @test_throws DomainError jump_192(Xoshiro(1), -1)
+    @test_throws DomainError jump_192!(Xoshiro(1), -1)
+
+    # clean copy when non-mut and no state advance
+    x = Xoshiro(1)
+    @test jump_128(x, 0) == x
+    @test jump_128(x, 0) !== x
+    @test jump_192(x, 0) == x
+    @test jump_192(x, 0) !== x
+
+    y = Xoshiro(1)
+    @test jump_128!(x, 0) == y
+    @test jump_192!(x, 0) == y
+end
+
+@testset "Xoshiro jump_128, various seeds" begin
+    for seed in (0, 1, 0xa0a3f09d0cecd878, 0x7ff8)
+        x = Xoshiro(seed)
+        @test jump_128(jump_128(jump_128(x))) == jump_128(x, 3)
+        x1 = Xoshiro(seed)
+        @test jump_128!(jump_128!(jump_128!(x1))) == jump_128(x, 3)
+        jump_128!(x1, 997)
+        x2 = jump_128!(Xoshiro(seed), 1000)
+        for T ∈ (Float64, UInt64, Int, Char, Bool)
+            @test rand(x1, T, 5) == rand(x2, T, 5)
+            @test rand(jump_128!(x1), T, 5) == rand(jump_128!(x2), T, 5)
+        end
+    end
+end
+
+@testset "Xoshiro jump_192, various seeds" begin
+    for seed in (0, 1, 0xa0a3f09d0cecd878, 0x7ff8)
+        x = Xoshiro(seed)
+        @test jump_192(jump_192(jump_192(x))) == jump_192(x, 3)
+        x1 = Xoshiro(seed)
+        @test jump_192!(jump_192!(jump_192!(x1))) == jump_192(x, 3)
+        jump_192!(x1, 997)
+        x2 = jump_192!(Xoshiro(seed), 1000)
+        for T ∈ (Float64, UInt64, Int, Char, Bool)
+            @test rand(x1, T, 5) == rand(x2, T, 5)
+            @test rand(jump_192!(x1), T, 5) == rand(jump_192!(x2), T, 5)
+        end
+    end
+end
+
+
+@testset "seed! and hash_seed" begin
+    function hash_seed(seed)
+        ctx = SHA.SHA2_256_CTX()
+        Random.hash_seed(seed, ctx)
+        bytes2hex(SHA.digest!(ctx))
+    end
+
+    # Test that:
+    # 1) if n == m, then hash_seed(n) == hash_seed(m)
+    # 2) if n != m, then hash_seed(n) != hash_seed(m)
+    rngs = (Xoshiro(0), TaskLocalRNG(), MersenneTwister(0))
+    seeds = Any[]
+    for T = Base.BitInteger_types
+        append!(seeds, rand(T, 8))
+        push!(seeds, typemin(T), typemin(T) + T(1), typemin(T) + T(2),
+              typemax(T), typemax(T) - T(1), typemax(T) - T(2))
+        T <: Signed && push!(seeds, T(0), T(1), T(2), T(-1), T(-2))
+    end
+
+    vseeds = Dict{String, BigInt}()
+    for seed = seeds
+        bigseed = big(seed)
+        vseed = hash_seed(bigseed)
+        # test property 1) above
+        @test hash_seed(seed) == vseed
+        # test property 2) above
+        @test bigseed == get!(vseeds, vseed, bigseed)
+        # test that the property 1) is actually inherited by `seed!`
+        for rng = rngs
+            rng2 = copy(Random.seed!(rng, seed))
+            Random.seed!(rng, bigseed)
+            @test rng == rng2
+        end
+    end
+
+    seed32 = rand(UInt32, rand(1:9))
+    hash32 = hash_seed(seed32)
+    @test hash_seed(map(UInt64, seed32)) == hash32
+    @test hash32 ∉ keys(vseeds)
+
+    seed_str = randstring()
+    seed_gstr = GenericString(seed_str)
+    @test hash_seed(seed_str) == hash_seed(seed_gstr)
+    string_seeds = Set{String}()
+    for ch = 'A':'z'
+        vseed = hash_seed(string(ch))
+        @test vseed ∉ keys(vseeds)
+        @test vseed ∉ string_seeds
+        push!(string_seeds, vseed)
+    end
+end
+
+@testset "rand(::Type{<:Pair})" begin
+    @test rand(Pair{Int, Int}) isa Pair{Int, Int}
+    @test rand(Pair{Int, Float64}) isa Pair{Int, Float64}
+    @test rand(Pair{Int, Float64}, 3) isa Array{Pair{Int, Float64}}
+
+    # test that making an array out of a sampler works
+    # (i.e. that gentype(sp) is correct)
+    sp = Random.Sampler(AbstractRNG, Pair{Bool, Char})
+    xs = rand(sp, 3)
+    @test xs isa Vector{Pair{Bool, Char}}
+    @test length(xs) == 3
+end
+
+@testset "Float32 RNG typo" begin
+    for T in (Float16, Float32, Float64)
+        # Make sure generated numbers are sufficiently diverse
+        # for both SIMD and non-SIMD RNG code paths for all types.
+        @test length(unique!(rand(T, 7))) > 3
+        @test length(unique!(rand(T, 14))) > 10
+        @test length(unique!(rand(T, 34))) > 20
+    end
+end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Random))
+end
diff --git a/stdlib/SHA.version b/stdlib/SHA.version
index f2242a336c6fe..63a7ce3a0a4a1 100644
--- a/stdlib/SHA.version
+++ b/stdlib/SHA.version
@@ -1,4 +1,4 @@
 SHA_BRANCH = master
-SHA_SHA1 = 2d1f84e6f8417a1a368de48318640d948b023e7a
+SHA_SHA1 = 876bc0400f9a457eb2736388fc3d0fbe9460fc7d
 SHA_GIT_URL := https://github.com/JuliaCrypto/SHA.jl.git
 SHA_TAR_URL = https://api.github.com/repos/JuliaCrypto/SHA.jl/tarball/$1
diff --git a/stdlib/Serialization/Project.toml b/stdlib/Serialization/Project.toml
index 4a2f7874e3124..97e898d731c7d 100644
--- a/stdlib/Serialization/Project.toml
+++ b/stdlib/Serialization/Project.toml
@@ -1,5 +1,6 @@
 name = "Serialization"
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Serialization/docs/src/index.md b/stdlib/Serialization/docs/src/index.md
index 9f593a2e807d9..77c7558e0306a 100644
--- a/stdlib/Serialization/docs/src/index.md
+++ b/stdlib/Serialization/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Serialization/docs/src/index.md"
+```
+
 # Serialization
 
 Provides serialization of Julia objects.
@@ -7,3 +11,15 @@ Serialization.serialize
 Serialization.deserialize
 Serialization.writeheader
 ```
+
+### Recommended File Extension
+
+While the Serialization module does not mandate a specific file extension, the Julia community commonly uses the `.jls` extension for serialized Julia files.
+
+Example:
+
+```julia
+open("model.jls", "w") do io
+    serialize(io, my_model)
+end
+```
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index 7c1043f33bdfe..ee40ebdd4abad 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -7,7 +7,8 @@ Provide serialization of Julia objects via the functions
 """
 module Serialization
 
-import Base: GMP, Bottom, unsafe_convert, uncompressed_ast
+import Base: Bottom, unsafe_convert
+import Base.ScopedValues: ScopedValue, with
 import Core: svec, SimpleVector
 using Base: unaliascopy, unwrap_unionall, require_one_based_indexing, ntupleany
 using Core.IR
@@ -28,6 +29,8 @@ end
 
 Serializer(io::IO) = Serializer{typeof(io)}(io)
 
+const current_module = ScopedValue{Union{Nothing,Module}}(nothing)
+
 ## serializing values ##
 
 const n_int_literals = 33
@@ -80,7 +83,7 @@ const TAGS = Any[
 const NTAGS = length(TAGS)
 @assert NTAGS == 255
 
-const ser_version = 24 # do not make changes without bumping the version #!
+const ser_version = 30 # do not make changes without bumping the version #!
 
 format_version(::AbstractSerializer) = ser_version
 format_version(s::Serializer) = s.version
@@ -288,6 +291,31 @@ function serialize(s::AbstractSerializer, a::SubArray{T,N,A}) where {T,N,A<:Arra
     serialize_any(s, b)
 end
 
+serialize(s::AbstractSerializer, m::GenericMemory) = error("GenericMemory{:atomic} currently cannot be serialized")
+function serialize(s::AbstractSerializer, m::Memory)
+    serialize_cycle_header(s, m) && return
+    serialize(s, length(m))
+    elty = eltype(m)
+    if isbitstype(elty)
+        serialize_array_data(s.io, m)
+    else
+        sizehint!(s.table, div(length(m),4))  # prepare for lots of pointers
+        @inbounds for i in eachindex(m)
+            if isassigned(m, i)
+                serialize(s, m[i])
+            else
+                writetag(s.io, UNDEFREF_TAG)
+            end
+        end
+    end
+end
+
+function serialize(s::AbstractSerializer, x::GenericMemoryRef)
+    serialize_type(s, typeof(x))
+    serialize(s, getfield(x, :mem))
+    serialize(s, Base.memoryrefoffset(x))
+end
+
 function serialize(s::AbstractSerializer, ss::String)
     len = sizeof(ss)
     if len > 7
@@ -422,7 +450,7 @@ function serialize(s::AbstractSerializer, meth::Method)
     serialize(s, meth.constprop)
     serialize(s, meth.purity)
     if isdefined(meth, :source)
-        serialize(s, Base._uncompressed_ast(meth, meth.source))
+        serialize(s, Base._uncompressed_ast(meth))
     else
         serialize(s, nothing)
     end
@@ -442,14 +470,21 @@ function serialize(s::AbstractSerializer, meth::Method)
     nothing
 end
 
+function serialize(s::AbstractSerializer, mt::Core.MethodTable)
+    serialize_type(s, typeof(mt))
+    serialize(s, mt.name)
+    serialize(s, mt.module)
+    nothing
+end
+
+function serialize(s::AbstractSerializer, mc::Core.MethodCache)
+    error("cannot serialize MethodCache objects")
+end
+
+
 function serialize(s::AbstractSerializer, linfo::Core.MethodInstance)
     serialize_cycle(s, linfo) && return
     writetag(s.io, METHODINSTANCE_TAG)
-    if isdefined(linfo, :uninferred)
-        serialize(s, linfo.uninferred)
-    else
-        writetag(s.io, UNDEFREF_TAG)
-    end
     serialize(s, nothing)  # for backwards compat
     serialize(s, linfo.sparam_vals)
     serialize(s, Any)  # for backwards compat
@@ -511,16 +546,17 @@ function serialize_typename(s::AbstractSerializer, t::Core.TypeName)
     serialize(s, primary.super)
     serialize(s, primary.parameters)
     serialize(s, primary.types)
-    serialize(s, isdefined(primary, :instance))
+    serialize(s, Base.issingletontype(primary))
     serialize(s, t.flags & 0x1 == 0x1) # .abstract
     serialize(s, t.flags & 0x2 == 0x2) # .mutable
     serialize(s, Int32(length(primary.types) - t.n_uninitialized))
     serialize(s, t.max_methods)
-    if isdefined(t, :mt) && t.mt !== Symbol.name.mt
-        serialize(s, t.mt.name)
-        serialize(s, collect(Base.MethodList(t.mt)))
-        serialize(s, t.mt.max_args)
-        kws = collect(methods(Core.kwcall, (Any, t.wrapper, Vararg)))
+    ms = Base.matches_to_methods(Base._methods_by_ftype(Tuple{t.wrapper, Vararg}, -1, Base.get_world_counter()), t, nothing).ms
+    if t.singletonname !== t.name || !isempty(ms)
+        serialize(s, t.singletonname)
+        serialize(s, ms)
+        serialize(s, t.max_args)
+        kws = Base.matches_to_methods(Base._methods_by_ftype(Tuple{typeof(Core.kwcall), Any, t.wrapper, Vararg}, -1, Base.get_world_counter()), t, nothing).ms
         if isempty(kws)
             writetag(s.io, UNDEFREF_TAG)
         else
@@ -535,21 +571,17 @@ end
 # decide whether to send all data for a type (instead of just its name)
 function should_send_whole_type(s, t::DataType)
     tn = t.name
-    if isdefined(tn, :mt)
-        # TODO improve somehow
-        # send whole type for anonymous functions in Main
-        name = tn.mt.name
-        mod = tn.module
-        isanonfunction = mod === Main && # only Main
-            t.super === Function && # only Functions
-            unsafe_load(unsafe_convert(Ptr{UInt8}, tn.name)) == UInt8('#') && # hidden type
-            (!isdefined(mod, name) || t != typeof(getglobal(mod, name))) # XXX: 95% accurate test for this being an inner function
-            # TODO: more accurate test? (tn.name !== "#" name)
-        #TODO: iskw = startswith(tn.name, "#kw#") && ???
-        #TODO: iskw && return send-as-kwftype
-        return mod === __deserialized_types__ || isanonfunction
-    end
-    return false
+    # TODO improve somehow?
+    # send whole type for anonymous functions in Main
+    name = tn.singletonname
+    mod = tn.module
+    mod === __deserialized_types__ && return true
+    isanonfunction = mod === Main && # only Main
+        t.super === Function && # only Functions
+        unsafe_load(unsafe_convert(Ptr{UInt8}, tn.name)) == UInt8('#') && # hidden type
+        (!isdefined(mod, name) || t != typeof(getglobal(mod, name))) # XXX: 95% accurate test for this being an inner function
+        # TODO: more accurate test? (tn.name !== "#" name)
+    return isanonfunction
 end
 
 function serialize_type_data(s, @nospecialize(t::DataType))
@@ -654,6 +686,11 @@ end
 
 serialize(s::AbstractSerializer, @nospecialize(x)) = serialize_any(s, x)
 
+function serialize(s::AbstractSerializer, x::Core.AddrSpace)
+    serialize_type(s, typeof(x))
+    write(s.io, Core.bitcast(UInt8, x))
+end
+
 function serialize_any(s::AbstractSerializer, @nospecialize(x))
     tag = sertag(x)
     if tag > 0
@@ -1028,8 +1065,12 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
     isva = deserialize(s)::Bool
     is_for_opaque_closure = false
     nospecializeinfer = false
-    constprop = purity = 0x00
-    template_or_is_opaque = deserialize(s)
+    constprop = 0x00
+    purity = 0x0000
+    local template_or_is_opaque, template
+    with(current_module => mod) do
+        template_or_is_opaque = deserialize(s)
+    end
     if isa(template_or_is_opaque, Bool)
         is_for_opaque_closure = template_or_is_opaque
         if format_version(s) >= 24
@@ -1038,10 +1079,14 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         if format_version(s) >= 14
             constprop = deserialize(s)::UInt8
         end
-        if format_version(s) >= 17
-            purity = deserialize(s)::UInt8
+        if format_version(s) >= 26
+            purity = deserialize(s)::UInt16
+        elseif format_version(s) >= 17
+            purity = UInt16(deserialize(s)::UInt8)
+        end
+        with(current_module => mod) do
+            template = deserialize(s)
         end
-        template = deserialize(s)
     else
         template = template_or_is_opaque
     end
@@ -1052,6 +1097,7 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
     end
     if makenew
         meth.module = mod
+        meth.debuginfo = NullDebugInfo
         meth.name = name
         meth.file = file
         meth.line = line
@@ -1064,7 +1110,13 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         meth.purity = purity
         if template !== nothing
             # TODO: compress template
-            meth.source = template::CodeInfo
+            template = template::CodeInfo
+            if format_version(s) < 29
+                template.nargs = nargs
+                template.isva = isva
+            end
+            meth.source = template
+            meth.debuginfo = template.debuginfo
             if !@isdefined(slot_syms)
                 slot_syms = ccall(:jl_compress_argnames, Ref{String}, (Any,), meth.source.slotnames)
             end
@@ -1077,8 +1129,8 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
             meth.recursion_relation = recursion_relation
         end
         if !is_for_opaque_closure
-            mt = ccall(:jl_method_table_for, Any, (Any,), sig)
-            if mt !== nothing && nothing === ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), mt, sig, typemax(UInt))
+            mt = Core.methodtable
+            if nothing === ccall(:jl_methtable_lookup, Any, (Any, UInt), sig, Base.get_world_counter()) # XXX: quite sketchy?
                 ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, meth, C_NULL)
             end
         end
@@ -1087,12 +1139,22 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
     return meth
 end
 
+function deserialize(s::AbstractSerializer, ::Type{Core.MethodTable})
+    name = deserialize(s)::Symbol
+    mod = deserialize(s)::Module
+    return getglobal(mod, name)::Core.MethodTable
+end
+
 function deserialize(s::AbstractSerializer, ::Type{Core.MethodInstance})
     linfo = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, (Ptr{Cvoid},), C_NULL)
     deserialize_cycle(s, linfo)
-    tag = Int32(read(s.io, UInt8)::UInt8)
-    if tag != UNDEFREF_TAG
-        setfield!(linfo, :uninferred, handle_deserialize(s, tag)::CodeInfo, :monotonic)
+    if format_version(s) < 28
+        tag = Int32(read(s.io, UInt8)::UInt8)
+        if tag != UNDEFREF_TAG
+            code = handle_deserialize(s, tag)::CodeInfo
+            ci = ccall(:jl_new_codeinst_for_uninferred, Ref{CodeInstance}, (Any, Any), linfo, code)
+            @atomic linfo.cache = ci
+        end
     end
     tag = Int32(read(s.io, UInt8)::UInt8)
     if tag != UNDEFREF_TAG
@@ -1118,6 +1180,7 @@ function deserialize(s::AbstractSerializer, ::Type{Core.LineInfoNode})
     return Core.LineInfoNode(mod, method, deserialize(s)::Symbol, Int32(deserialize(s)::Union{Int32, Int}), Int32(deserialize(s)::Union{Int32, Int}))
 end
 
+
 function deserialize(s::AbstractSerializer, ::Type{PhiNode})
     edges = deserialize(s)
     if edges isa Vector{Any}
@@ -1127,11 +1190,28 @@ function deserialize(s::AbstractSerializer, ::Type{PhiNode})
     return PhiNode(edges, values)
 end
 
+# v1.12 disallows bare symbols in IR, but older CodeInfos might still have them
+function symbol_to_globalref(@nospecialize(x), m::Module)
+    mapper(@nospecialize(x)) = symbol_to_globalref(x, m)
+    if x isa Symbol
+        return GlobalRef(m, x)
+    elseif x isa Expr
+        return Expr(x.head, map(mapper, x.args)...)
+    elseif x isa ReturnNode
+        return ReturnNode(mapper(x.val))
+    elseif x isa GotoIfNot
+        return GotoIfNot(mapper(x.cond), x.dest)
+    else
+        return x
+    end
+end
+
 function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     ci = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
     deserialize_cycle(s, ci)
     code = deserialize(s)::Vector{Any}
     ci.code = code
+    ci.debuginfo = NullDebugInfo
     # allow older-style IR with return and gotoifnot Exprs
     for i in 1:length(code)
         stmt = code[i]
@@ -1144,30 +1224,50 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
             end
         end
     end
-    ci.codelocs = deserialize(s)::Vector{Int32}
+    if current_module[] !== nothing
+        map!(x->symbol_to_globalref(x, current_module[]), code)
+    end
+    _x = deserialize(s)
+    have_debuginfo = _x isa Core.DebugInfo
+    if have_debuginfo
+        ci.debuginfo = _x
+    else
+        codelocs = _x::Vector{Int32}
+        # TODO: convert codelocs to debuginfo format?
+    end
     _x = deserialize(s)
     if _x isa Array || _x isa Int
         pre_12 = false
-        ci.ssavaluetypes = _x
     else
         pre_12 = true
         # < v1.2
         ci.method_for_inference_limit_heuristics = _x
-        ci.ssavaluetypes = deserialize(s)
-        ci.linetable = deserialize(s)
+        _x = deserialize(s)
+    end
+    ci.ssavaluetypes = _x
+    if pre_12
+        linetable = deserialize(s)
+        # TODO: convert linetable to debuginfo format?
     end
     ssaflags = deserialize(s)
     if length(ssaflags) ≠ length(code)
         # make sure the length of `ssaflags` matches that of `code`
         # so that the latest inference doesn't throw on IRs serialized from old versions
-        ssaflags = UInt8[0x00 for _ in 1:length(code)]
+        ssaflags = UInt32[0x00 for _ in 1:length(code)]
+    elseif eltype(ssaflags) != UInt32
+        ssaflags = map(UInt32, ssaflags)
     end
     ci.ssaflags = ssaflags
     if pre_12
         ci.slotflags = deserialize(s)
     else
-        ci.method_for_inference_limit_heuristics = deserialize(s)
-        ci.linetable = deserialize(s)
+        if format_version(s) <= 26
+            ci.method_for_inference_limit_heuristics = deserialize(s)
+        end
+        if !have_debuginfo # pre v1.11 format
+            linetable = deserialize(s)
+            # TODO: convert linetable to debuginfo format?
+        end
     end
     ci.slotnames = deserialize(s)
     if !pre_12
@@ -1175,17 +1275,26 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
         ci.slottypes = deserialize(s)
         ci.rettype = deserialize(s)
         ci.parent = deserialize(s)
+        if format_version(s) < 29 && ci.parent isa MethodInstance && ci.parent.def isa Method
+            ci.nargs = ci.parent.def.nargs
+        end
         world_or_edges = deserialize(s)
-        pre_13 = isa(world_or_edges, Integer)
+        pre_13 = isa(world_or_edges, Union{UInt, Int})
         if pre_13
-            ci.min_world = world_or_edges
+            ci.min_world = reinterpret(UInt, world_or_edges)
+            ci.max_world = reinterpret(UInt, deserialize(s))
         else
             ci.edges = world_or_edges
-            ci.min_world = reinterpret(UInt, deserialize(s))
-            ci.max_world = reinterpret(UInt, deserialize(s))
+            ci.min_world = deserialize(s)::UInt
+            ci.max_world = deserialize(s)::UInt
         end
+        if format_version(s) >= 29
+            ci.method_for_inference_limit_heuristics = deserialize(s)
+        end
+    end
+    if format_version(s) <= 26
+        deserialize(s)::Bool # inferred
     end
-    ci.inferred = deserialize(s)
     if format_version(s) < 22
         inlining_cost = deserialize(s)
         if isa(inlining_cost, Bool)
@@ -1194,6 +1303,9 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
             ci.inlining_cost = inlining_cost
         end
     end
+    if format_version(s) >= 29
+        ci.nargs = deserialize(s)
+    end
     ci.propagate_inbounds = deserialize(s)
     if format_version(s) < 23
         deserialize(s) # `pure` field has been removed
@@ -1201,24 +1313,35 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     if format_version(s) >= 20
         ci.has_fcall = deserialize(s)
     end
+    if format_version(s) >= 30
+        ci.has_image_globalref = deserialize(s)::Bool
+    end
     if format_version(s) >= 24
         ci.nospecializeinfer = deserialize(s)::Bool
     end
+    if format_version(s) >= 29
+        ci.isva = deserialize(s)::Bool
+    end
     if format_version(s) >= 21
         ci.inlining = deserialize(s)::UInt8
     end
     if format_version(s) >= 14
         ci.constprop = deserialize(s)::UInt8
     end
-    if format_version(s) >= 17
+    if format_version(s) >= 26
+        ci.purity = deserialize(s)::UInt16
+    elseif format_version(s) >= 17
         ci.purity = deserialize(s)::UInt8
     end
     if format_version(s) >= 22
         ci.inlining_cost = deserialize(s)::UInt16
     end
+    ci.debuginfo = NullDebugInfo
     return ci
 end
 
+import Core: NullDebugInfo
+
 if Int === Int64
 const OtherInt = Int32
 else
@@ -1274,7 +1397,7 @@ function deserialize_array(s::AbstractSerializer)
     return A
 end
 
-function deserialize_fillarray!(A::Array{T}, s::AbstractSerializer) where {T}
+function deserialize_fillarray!(A::Union{Array{T},Memory{T}}, s::AbstractSerializer) where {T}
     for i = eachindex(A)
         tag = Int32(read(s.io, UInt8)::UInt8)
         if tag != UNDEFREF_TAG
@@ -1284,6 +1407,48 @@ function deserialize_fillarray!(A::Array{T}, s::AbstractSerializer) where {T}
     return A
 end
 
+function deserialize(s::AbstractSerializer, X::Type{Memory{T}} where T)
+    slot = pop!(s.pending_refs) # e.g. deserialize_cycle
+    n = deserialize(s)::Int
+    elty = eltype(X)
+    if isbitstype(elty)
+        A = X(undef, n)
+        if X === Memory{Bool}
+            i = 1
+            while i <= n
+                b = read(s.io, UInt8)::UInt8
+                v = (b >> 7) != 0
+                count = b & 0x7f
+                nxt = i + count
+                while i < nxt
+                    A[i] = v
+                    i += 1
+                end
+            end
+        else
+            A = read!(s.io, A)::X
+        end
+        s.table[slot] = A
+        return A
+    end
+    A = X(undef, n)
+    s.table[slot] = A
+    sizehint!(s.table, s.counter + div(n, 4))
+    deserialize_fillarray!(A, s)
+    return A
+end
+
+function deserialize(s::AbstractSerializer, X::Type{MemoryRef{T}} where T)
+    x = Core.memoryref(deserialize(s))::X
+    i = deserialize(s)::Int
+    i == 2 || (x = Core.memoryref(x, i, true))
+    return x::X
+end
+
+function deserialize(s::AbstractSerializer, X::Type{Core.AddrSpace{M}} where M)
+    Core.bitcast(X, read(s.io, UInt8))
+end
+
 function deserialize_expr(s::AbstractSerializer, len)
     e = Expr(:temp)
     resolve_ref_immediately(s, e)
@@ -1339,7 +1504,7 @@ function deserialize_typename(s::AbstractSerializer, number)
         tn.max_methods = maxm
         if has_instance
             ty = ty::DataType
-            if !Base.issingletontype(ty)
+            if !isdefined(ty, :instance)
                 singleton = ccall(:jl_new_struct, Any, (Any, Any...), ty)
                 # use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty
                 ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), ty, Base.fieldindex(DataType, :instance)-1, singleton)
@@ -1351,39 +1516,19 @@ function deserialize_typename(s::AbstractSerializer, number)
     if tag != UNDEFREF_TAG
         mtname = handle_deserialize(s, tag)
         defs = deserialize(s)
-        maxa = deserialize(s)::Int
+        maxa = deserialize(s)::Union{Int,Int32}
         if makenew
-            mt = ccall(:jl_new_method_table, Any, (Any, Any), name, tn.module)
-            if !isempty(parameters)
-                mt.offs = 0
-            end
-            mt.name = mtname
-            setfield!(mt, :max_args, maxa, :monotonic)
-            ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), tn, Base.fieldindex(Core.TypeName, :mt)-1, mt)
-            for def in defs
-                if isdefined(def, :sig)
-                    ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, def, C_NULL)
-                end
-            end
+            tn.singletonname = mtname
+            setfield!(tn, :max_args, Int32(maxa), :monotonic)
         end
         tag = Int32(read(s.io, UInt8)::UInt8)
         if tag != UNDEFREF_TAG
             kws = handle_deserialize(s, tag)
-            if makenew
-                if kws isa Vector{Method}
-                    for def in kws
-                        kwmt = typeof(Core.kwcall).name.mt
-                        ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, def, C_NULL)
-                    end
-                else
-                    # old object format -- try to forward from old to new
-                    @eval Core.kwcall(kwargs::NamedTuple, f::$ty, args...) = $kws(kwargs, f, args...)
-                end
+            if makenew && !(kws isa Vector{Method})
+                # old object format -- try to forward from old to new
+                @eval Core.kwcall(kwargs::NamedTuple, f::$ty, args...) = $kws(kwargs, f, args...)
             end
         end
-    elseif makenew
-        mt = Symbol.name.mt
-        ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), tn, Base.fieldindex(Core.TypeName, :mt)-1, mt)
     end
     return tn
 end
@@ -1460,11 +1605,11 @@ function deserialize(s::AbstractSerializer, ::Type{Task})
     t.storage = deserialize(s)
     state = deserialize(s)
     if state === :runnable
-        t._state = Base.task_state_runnable
+        @atomic :release t._state = Base.task_state_runnable
     elseif state === :done
-        t._state = Base.task_state_done
+        @atomic :release t._state = Base.task_state_done
     elseif state === :failed
-        t._state = Base.task_state_failed
+        @atomic :release t._state = Base.task_state_failed
     else
         @assert false
     end
diff --git a/stdlib/Serialization/test/runtests.jl b/stdlib/Serialization/test/runtests.jl
index 46749d4375538..e341c6e3eb9ec 100644
--- a/stdlib/Serialization/test/runtests.jl
+++ b/stdlib/Serialization/test/runtests.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, Random, Serialization, Base64
+using Base.ScopedValues: with
 
 # Check that serializer hasn't gone out-of-frame
 @test Serialization.sertag(Symbol) == 1
@@ -130,7 +131,7 @@ create_serialization_stream() do s # user-defined module
     modtype = eval(Meta.parse("$(modstring)"))
     serialize(s, modtype)
     seek(s, 0)
-    @test deserialize(s) === modtype
+    @test invokelatest(deserialize, s) === modtype
 end
 
 # DataType
@@ -151,7 +152,7 @@ create_serialization_stream() do s # user-defined type
     utype = eval(Meta.parse("$(usertype)"))
     serialize(s, utype)
     seek(s, 0)
-    @test deserialize(s) === utype
+    @test invokelatest(deserialize, s) === utype
 end
 
 create_serialization_stream() do s # user-defined type
@@ -160,7 +161,7 @@ create_serialization_stream() do s # user-defined type
     utype = eval(Meta.parse("$(usertype)"))
     serialize(s, utype)
     seek(s, 0)
-    @test deserialize(s) === utype
+    @test invokelatest(deserialize, s) === utype
 end
 
 create_serialization_stream() do s # user-defined type
@@ -169,7 +170,7 @@ create_serialization_stream() do s # user-defined type
     utype = eval(Meta.parse("$(usertype)"))
     serialize(s, utype)
     seek(s, 0)
-    @test deserialize(s) == utype
+    @test invokelatest(deserialize, s) == utype
 end
 
 create_serialization_stream() do s # immutable struct with 1 field
@@ -178,7 +179,7 @@ create_serialization_stream() do s # immutable struct with 1 field
     utype = eval(Meta.parse("$(usertype)"))
     serialize(s, utype)
     seek(s, 0)
-    @test deserialize(s) == utype
+    @test invokelatest(deserialize, s) == utype
 end
 
 create_serialization_stream() do s # immutable struct with 2 field
@@ -187,7 +188,7 @@ create_serialization_stream() do s # immutable struct with 2 field
     utval = eval(Meta.parse("$(usertype)(1,2)"))
     serialize(s, utval)
     seek(s, 0)
-    @test deserialize(s) === utval
+    @test invokelatest(deserialize, s) === utval
 end
 
 create_serialization_stream() do s # immutable struct with 3 field
@@ -196,7 +197,7 @@ create_serialization_stream() do s # immutable struct with 3 field
     utval = eval(Meta.parse("$(usertype)(1,2,3)"))
     serialize(s, utval)
     seek(s, 0)
-    @test deserialize(s) === utval
+    @test invokelatest(deserialize, s) === utval
 end
 
 create_serialization_stream() do s # immutable struct with 4 field
@@ -205,7 +206,7 @@ create_serialization_stream() do s # immutable struct with 4 field
     utval = eval(Meta.parse("$(usertype)(1,2,3,4)"))
     serialize(s, utval)
     seek(s, 0)
-    @test deserialize(s) === utval
+    @test invokelatest(deserialize, s) === utval
 end
 
 # Expression
@@ -577,7 +578,7 @@ let io = IOBuffer()
     serialize(io, f)
     seekstart(io)
     f2 = deserialize(io)
-    @test f2(1) === 1f0
+    @test invokelatest(f2, 1) === 1f0
 end
 
 # using a filename; #30151
@@ -595,7 +596,7 @@ let f_data
         f_data = "N0pMBwAAAAA0MxMAAAAAAAAAAAEFIyM1IzYiAAAAABBYH04BBE1haW6bRCIAAAAAIgAAAABNTEy+AQIjNRUAI78jAQAAAAAAAAAfTgEETWFpbkQBAiM1AQdSRVBMWzJdvxBTH04BBE1haW6bRAMAAAAzLAAARkYiAAAAAE7BTBsVRsEWA1YkH04BBE1haW5EAQEqwCXAFgNWJB9OAQRNYWluRJ0ovyXBFgFVKMAVAAbBAQAAAAEAAAABAAAATsEVRr80EAEMTGluZUluZm9Ob2RlH04BBE1haW6bRB9OAQRNYWluRAECIzUBB1JFUExbMl2/vhW+FcEAAAAVRsGifX5MTExMTsEp"
     end
     f = deserialize(IOBuffer(base64decode(f_data)))
-    @test f(10,3) == 23
+    @test invokelatest(f, 10,3) == 23
 end
 
 # issue #33466, IdDict
@@ -655,3 +656,20 @@ end
     @test l2 == l1
     @test l2.parts === ()
 end
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(Serialization)
+    @test_broken isempty(undoc)
+    @test undoc == [:AbstractSerializer, :Serializer]
+end
+
+# test method definitions from v1.11
+if Int === Int64
+    let f_data = "N0pMGgQAAAAWAQEFdGh1bmsbFUbnFgEBBXRodW5rGxVG4DoWAQEGbWV0aG9kAQtmMTExX3RvXzExMhUABuABAAAA4BUAB+AAAAAAThVG4DQQAQxMaW5lSW5mb05vZGUfTptEH04BBE1haW5EAQ90b3AtbGV2ZWwgc2NvcGUBBG5vbmW+vhUAAd8V305GTk4JAQAAAAAAAAAJ//////////9MTExMAwADAAUAAAX//xYBAQZtZXRob2QsBwAWAlYkH06bRAEGVHlwZW9mLAcAFgNWJB9Om0QBBHN2ZWMo4iQfTptETxYBViQfTptEAQRzdmVjFgRWJB9Om0QBBHN2ZWMo4yjkGhfgAQRub25lFgMBBm1ldGhvZCwHACjlGxVG5AEBXhYDViQfTptElyQfTp5EAQNWYWzhFgFWKOEWBFYkH06eRAELbGl0ZXJhbF9wb3co4CXhKOI6KOMVAAbkAQAAAAEAAAABAAAAAQAAAAAAAADkFQAH5AAAAAAAAAAAAAAAAAAAAAAAAAAAThVG4DQsCwAfTgEETWFpbkQBBG5vbmUBBG5vbmW/vhUAAeGifRXhAAhORk5OCQEAAAAAAAAACf//////////TExMTAMAAwAFAAAF//86ThUABucBAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAAAAAOcVAAfnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABOFUbgNCwLAB9OAQRNYWluRCwNAAEEbm9uZb6+FQAB3xXfTkZOTgkBAAAAAAAAAAn//////////0xMTEwDAAMABQAABf//"
+        @eval Main function f111_to_112 end
+        Core.eval(Main, with(Serialization.current_module => Main) do
+                 deserialize(IOBuffer(base64decode(f_data)))
+             end)
+        @test @invokelatest(Main.f111_to_112(16)) == 256
+    end
+end
diff --git a/stdlib/SharedArrays/Project.toml b/stdlib/SharedArrays/Project.toml
index 588785347c73d..46e5332f8d89d 100644
--- a/stdlib/SharedArrays/Project.toml
+++ b/stdlib/SharedArrays/Project.toml
@@ -1,5 +1,6 @@
 name = "SharedArrays"
 uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+version = "1.11.0"
 
 [deps]
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
diff --git a/stdlib/SharedArrays/docs/src/index.md b/stdlib/SharedArrays/docs/src/index.md
index 67ceabf42115a..91ef63bf18aed 100644
--- a/stdlib/SharedArrays/docs/src/index.md
+++ b/stdlib/SharedArrays/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/SharedArrays/docs/src/index.md"
+```
+
 # Shared Arrays
 
 `SharedArray` represents an array, which is shared across multiple processes, on a single machine.
diff --git a/stdlib/SharedArrays/src/SharedArrays.jl b/stdlib/SharedArrays/src/SharedArrays.jl
index f9f701c61fcea..6106bc9c3c81a 100644
--- a/stdlib/SharedArrays/src/SharedArrays.jl
+++ b/stdlib/SharedArrays/src/SharedArrays.jl
@@ -8,7 +8,8 @@ module SharedArrays
 using Mmap, Distributed, Random
 
 import Base: length, size, elsize, ndims, IndexStyle, reshape, convert, deepcopy_internal,
-             show, getindex, setindex!, fill!, similar, reduce, map!, copyto!, unsafe_convert
+             show, getindex, setindex!, fill!, similar, reduce, map!, copyto!, cconvert
+import Base: Array
 import Random
 using Serialization
 using Serialization: serialize_cycle_header, serialize_type, writetag, UNDEFREF_TAG, serialize, deserialize
@@ -358,8 +359,8 @@ for each worker process.
 """
 localindices(S::SharedArray) = S.pidx > 0 ? range_1dim(S, S.pidx) : 1:0
 
-unsafe_convert(::Type{Ptr{T}}, S::SharedArray{T}) where {T} = unsafe_convert(Ptr{T}, sdata(S))
-unsafe_convert(::Type{Ptr{T}}, S::SharedArray   ) where {T} = unsafe_convert(Ptr{T}, sdata(S))
+cconvert(::Type{Ptr{T}}, S::SharedArray{T}) where {T} = cconvert(Ptr{T}, sdata(S))
+cconvert(::Type{Ptr{T}}, S::SharedArray   ) where {T} = cconvert(Ptr{T}, sdata(S))
 
 function SharedArray(A::Array)
     S = SharedArray{eltype(A),ndims(A)}(size(A))
diff --git a/stdlib/SharedArrays/test/runtests.jl b/stdlib/SharedArrays/test/runtests.jl
index 7f1bbb6891ce0..1d4bd81e4b75f 100644
--- a/stdlib/SharedArrays/test/runtests.jl
+++ b/stdlib/SharedArrays/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, Distributed, SharedArrays, Random
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
+include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test", "testenv.jl"))
 
 # These processes explicitly want to share memory, we can't have
 # them in separate rr sessions
@@ -324,3 +324,7 @@ end
 @test SharedMatrix([0.1 0.2; 0.3 0.4]) == [0.1 0.2; 0.3 0.4]
 @test_throws MethodError SharedVector(rand(4,4))
 @test_throws MethodError SharedMatrix(rand(4))
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(SharedArrays))
+end
diff --git a/stdlib/Sockets/Project.toml b/stdlib/Sockets/Project.toml
index 5afb89b29f126..6a395465722f2 100644
--- a/stdlib/Sockets/Project.toml
+++ b/stdlib/Sockets/Project.toml
@@ -1,5 +1,6 @@
 name = "Sockets"
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Sockets/docs/src/index.md b/stdlib/Sockets/docs/src/index.md
index c294461151d7d..feb1744179261 100644
--- a/stdlib/Sockets/docs/src/index.md
+++ b/stdlib/Sockets/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Sockets/docs/src/index.md"
+```
+
 # Sockets
 
 ```@docs
diff --git a/stdlib/Sockets/src/IPAddr.jl b/stdlib/Sockets/src/IPAddr.jl
index 04710e400fe87..e324dee712b71 100644
--- a/stdlib/Sockets/src/IPAddr.jl
+++ b/stdlib/Sockets/src/IPAddr.jl
@@ -31,7 +31,7 @@ end
 """
     IPv4(host::Integer) -> IPv4
 
-Return an IPv4 object from ip address `host` formatted as an [`Integer`](@ref).
+Return an IPv4 object from IP address `host` formatted as an [`Integer`](@ref).
 
 # Examples
 ```jldoctest
@@ -49,7 +49,17 @@ function IPv4(host::Integer)
     end
 end
 
-# constructor: ("1.2.3.4")
+"""
+    IPv4(str::AbstractString) -> IPv4
+
+Parse an IPv4 address string into an `IPv4` object.
+
+# Examples
+```jldoctest
+julia> IPv4("127.0.0.1")
+ip"127.0.0.1"
+```
+"""
 IPv4(str::AbstractString) = parse(IPv4, str)
 
 show(io::IO,ip::IPv4) = print(io,"ip\"",ip,"\"")
@@ -84,7 +94,7 @@ end
 """
     IPv6(host::Integer) -> IPv6
 
-Return an IPv6 object from ip address `host` formatted as an [`Integer`](@ref).
+Return an IPv6 object from IP address `host` formatted as an [`Integer`](@ref).
 
 # Examples
 ```jldoctest
@@ -104,6 +114,17 @@ function IPv6(host::Integer)
     end
 end
 
+"""
+    IPv6(str::AbstractString) -> IPv6
+
+Parse an IPv6 address string into an `IPv6` object.
+
+# Examples
+```jldoctest
+julia> IPv6("::1")
+ip"::1"
+```
+"""
 IPv6(str::AbstractString) = parse(IPv6, str)
 
 # Suppress leading '0's and "0x"
@@ -119,7 +140,7 @@ end
 
 show(io::IO, ip::IPv6) = print(io,"ip\"",ip,"\"")
 # RFC 5952 compliant show function
-# http://tools.ietf.org/html/rfc5952
+# https://tools.ietf.org/html/rfc5952
 function print(io::IO,ip::IPv6)
     i = 8
     m = 0
@@ -265,7 +286,7 @@ julia> @ip_str "2001:db8:0:0:0:0:2:1"
 ip"2001:db8::2:1"
 ```
 """
-macro ip_str(str)
+macro ip_str(str::String)
     return parse(IPAddr, str)
 end
 
diff --git a/stdlib/Sockets/src/PipeServer.jl b/stdlib/Sockets/src/PipeServer.jl
index 4a8965c8f0462..8eb1c0848d02b 100644
--- a/stdlib/Sockets/src/PipeServer.jl
+++ b/stdlib/Sockets/src/PipeServer.jl
@@ -24,6 +24,23 @@ function PipeServer()
     return pipe
 end
 
+function PipeServer(handle::OS_HANDLE)
+    pipe = PipeServer()
+    return Base.open_pipe!(pipe, handle)
+end
+
+function Base.open_pipe!(p::PipeServer, handle::OS_HANDLE)
+    iolock_begin()
+    if p.status != StatusInit
+        error("pipe is already in use or has been closed")
+    end
+    err = ccall(:uv_pipe_open, Int32, (Ptr{Cvoid}, OS_HANDLE), p.handle, handle)
+    uv_error("pipe_open", err)
+    p.status = StatusOpen
+    iolock_end()
+    return p
+end
+
 ## server functions ##
 
 accept(server::PipeServer) = accept(server, PipeEndpoint())
@@ -86,7 +103,7 @@ function connect!(sock::PipeEndpoint, path::AbstractString)
     req = Libc.malloc(Base._sizeof_uv_connect)
     uv_req_set_data(req, C_NULL)
     ccall(:uv_pipe_connect, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), req, sock.handle, path,
-          @cfunction(uv_connectcb, Cvoid, (Ptr{Cvoid}, Cint)))
+          @cfunction(uv_connectcb_pipe, Cvoid, (Ptr{Cvoid}, Cint)))
     sock.status = StatusConnecting
     iolock_end()
     return sock
diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl
index 33767c2153211..1ea9fa1febb7e 100644
--- a/stdlib/Sockets/src/Sockets.jl
+++ b/stdlib/Sockets/src/Sockets.jl
@@ -31,7 +31,7 @@ export
     IPv4,
     IPv6
 
-import Base: isless, show, print, parse, bind, convert, isreadable, iswritable, alloc_buf_hook, _uv_hook_close
+import Base: isless, show, print, parse, bind, alloc_buf_hook, _uv_hook_close
 
 using Base: LibuvStream, LibuvServer, PipeEndpoint, @handle_as, uv_error, associate_julia_struct, uvfinalize,
     notify_error, uv_req_data, uv_req_set_data, preserve_handle, unpreserve_handle, _UVError, IOError,
@@ -107,6 +107,8 @@ if OS_HANDLE != RawFD
     TCPSocket(fd::RawFD) = TCPSocket(Libc._get_osfhandle(fd))
 end
 
+Base.fd(sock::TCPSocket) = Base._fd(sock)
+
 
 mutable struct TCPServer <: LibuvServer
     handle::Ptr{Cvoid}
@@ -139,6 +141,8 @@ function TCPServer(; delay=true)
     return tcp
 end
 
+Base.fd(server::TCPServer) = Base._fd(server)
+
 """
     accept(server[, client])
 
@@ -199,6 +203,8 @@ end
 
 show(io::IO, stream::UDPSocket) = print(io, typeof(stream), "(", uv_status_string(stream), ")")
 
+Base.fd(sock::UDPSocket) = Base._fd(sock)
+
 function _uv_hook_close(sock::UDPSocket)
     lock(sock.cond)
     try
@@ -214,7 +220,7 @@ end
 # Disables dual stack mode.
 const UV_TCP_IPV6ONLY = 1
 
-# Disables dual stack mode. Only available when using ipv6 binf
+# Disables dual stack mode. Only available when using ipv6 bind
 const UV_UDP_IPV6ONLY = 1
 
 # Indicates message was truncated because read buffer was too small. The
@@ -450,7 +456,7 @@ function send(sock::UDPSocket, ipaddr::IPAddr, port::Integer, msg)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::Base.IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
@@ -468,9 +474,19 @@ end
 
 
 #from `connect`
-function uv_connectcb(conn::Ptr{Cvoid}, status::Cint)
+function uv_connectcb_tcp(conn::Ptr{Cvoid}, status::Cint)
+    hand = ccall(:jl_uv_connect_handle, Ptr{Cvoid}, (Ptr{Cvoid},), conn)
+    sock = @handle_as hand TCPSocket
+    connectcb(conn, status, hand, sock)
+end
+
+function uv_connectcb_pipe(conn::Ptr{Cvoid}, status::Cint)
     hand = ccall(:jl_uv_connect_handle, Ptr{Cvoid}, (Ptr{Cvoid},), conn)
-    sock = @handle_as hand LibuvStream
+    sock = @handle_as hand PipeEndpoint
+    connectcb(conn, status, hand, sock)
+end
+
+function connectcb(conn::Ptr{Cvoid}, status::Cint, hand::Ptr{Cvoid}, sock::LibuvStream)
     lock(sock.cond)
     try
         if status >= 0 # success
@@ -502,7 +518,7 @@ function connect!(sock::TCPSocket, host::Union{IPv4, IPv6}, port::Integer)
     end
     host_in = Ref(hton(host.host))
     uv_error("connect", ccall(:jl_tcp_connect, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, UInt16, Ptr{Cvoid}, Cint),
-                              sock, host_in, hton(UInt16(port)), @cfunction(uv_connectcb, Cvoid, (Ptr{Cvoid}, Cint)),
+                              sock, host_in, hton(UInt16(port)), @cfunction(uv_connectcb_tcp, Cvoid, (Ptr{Cvoid}, Cint)),
                               host isa IPv6))
     sock.status = StatusConnecting
     iolock_end()
@@ -567,7 +583,11 @@ end
 """
     nagle(socket::Union{TCPServer, TCPSocket}, enable::Bool)
 
-Enables or disables Nagle's algorithm on a given TCP server or socket.
+Nagle's algorithm batches multiple small TCP packets into larger
+ones. This can improve throughput but worsen latency. Nagle's algorithm
+is enabled by default. This function sets whether Nagle's algorithm is
+active on a given TCP server or socket. The opposite option is called
+`TCP_NODELAY` in other languages.
 
 !!! compat "Julia 1.3"
     This function requires Julia 1.3 or later.
@@ -770,7 +790,7 @@ end
 """
     leave_multicast_group(sock::UDPSocket, group_addr, interface_addr = nothing)
 
-Remove a socket from  a particular multicast group defined by `group_addr`.
+Remove a socket from a particular multicast group defined by `group_addr`.
 If `interface_addr` is given, specifies a particular interface for multi-homed
 systems.  Use `join_multicast_group()` to enable reception of a group.
 """
diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl
index dda9dac308f38..116755cf6b431 100644
--- a/stdlib/Sockets/src/addrinfo.jl
+++ b/stdlib/Sockets/src/addrinfo.jl
@@ -55,10 +55,10 @@ end
 Gets all of the IP addresses of the `host`.
 Uses the operating system's underlying `getaddrinfo` implementation, which may do a DNS lookup.
 
-# Example
+# Examples
 ```julia-repl
 julia> getalladdrinfo("google.com")
-2-element Array{IPAddr,1}:
+2-element Vector{IPAddr}:
  ip"172.217.6.174"
  ip"2607:f8b0:4000:804::200e"
 ```
@@ -90,7 +90,7 @@ function getalladdrinfo(host::String)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::Base.IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
@@ -122,10 +122,20 @@ end
 getalladdrinfo(host::AbstractString) = getalladdrinfo(String(host))
 
 """
-    getaddrinfo(host::AbstractString, IPAddr=IPv4) -> IPAddr
+    getaddrinfo(host::AbstractString, IPAddr) -> IPAddr
 
 Gets the first IP address of the `host` of the specified `IPAddr` type.
-Uses the operating system's underlying getaddrinfo implementation, which may do a DNS lookup.
+Uses the operating system's underlying getaddrinfo implementation, which may do
+a DNS lookup.
+
+# Examples
+```jldoctest; filter = r"(ip\\"::1\\"|ERROR: DNSError:.*|Stacktrace:(\\n \\[0-9]+\\].*)*)"
+julia> getaddrinfo("localhost", IPv6)
+ip"::1"
+
+julia> getaddrinfo("localhost", IPv4)
+ip"127.0.0.1"
+```
 """
 function getaddrinfo(host::String, T::Type{<:IPAddr})
     addrs = getalladdrinfo(host)
@@ -137,6 +147,14 @@ function getaddrinfo(host::String, T::Type{<:IPAddr})
     throw(DNSError(host, UV_EAI_NONAME))
 end
 getaddrinfo(host::AbstractString, T::Type{<:IPAddr}) = getaddrinfo(String(host), T)
+
+"""
+    getaddrinfo(host::AbstractString) -> IPAddr
+
+Gets the first available IP address of `host`, which may be either an `IPv4` or
+`IPv6` address. Uses the operating system's underlying getaddrinfo
+implementation, which may do a DNS lookup.
+"""
 function getaddrinfo(host::AbstractString)
     addrs = getalladdrinfo(String(host))
     if !isempty(addrs)
@@ -205,7 +223,7 @@ function getnameinfo(address::Union{IPv4, IPv6})
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::Base.IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
@@ -264,16 +282,14 @@ See also [`getipaddrs`](@ref).
 """
 function getipaddr(addr_type::Type{T}) where T<:IPAddr
     addrs = getipaddrs(addr_type)
+    isempty(addrs) && error("No networking interface available")
 
-    if length(addrs) == 0
-        error("No networking interface available")
-    end
-
-    # Prefer the first IPv4 address
+    # When `addr_type` is `IPAddr`, `addrs` contain IP addresses of all types
+    # In that case, we prefer to return the first IPv4
     i = something(findfirst(ip -> ip isa IPv4, addrs), 1)
     return addrs[i]
 end
-getipaddr() = getipaddr(IPv4)
+getipaddr() = getipaddr(IPAddr)
 
 
 """
@@ -291,7 +307,7 @@ The `loopback` keyword argument dictates whether loopback addresses (e.g. `ip"12
 # Examples
 ```julia-repl
 julia> getipaddrs()
-5-element Array{IPAddr,1}:
+5-element Vector{IPAddr}:
  ip"198.51.100.17"
  ip"203.0.113.2"
  ip"2001:db8:8:4:445e:5fff:fe5d:5500"
@@ -299,7 +315,7 @@ julia> getipaddrs()
  ip"fe80::445e:5fff:fe5d:5500"
 
 julia> getipaddrs(IPv6)
-3-element Array{IPv6,1}:
+3-element Vector{IPv6}:
  ip"2001:db8:8:4:445e:5fff:fe5d:5500"
  ip"2001:db8:8:4:c164:402e:7e3c:3668"
  ip"fe80::445e:5fff:fe5d:5500"
@@ -344,7 +360,7 @@ are not guaranteed to be unique beyond their network segment,
 therefore routers do not forward them. Link-local addresses are from
 the address blocks `169.254.0.0/16` or `fe80::/10`.
 
-# Example
+# Examples
 ```julia
 filter(!islinklocaladdr, getipaddrs())
 ```
diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl
index 02a994460afbf..37921371b3bc5 100644
--- a/stdlib/Sockets/test/runtests.jl
+++ b/stdlib/Sockets/test/runtests.jl
@@ -3,12 +3,21 @@
 using Sockets, Random, Test
 using Base: Experimental
 
+# This is for debugging only - if the system doesn't have `netstat`, we just ignore it
+netstat() = try; read(ignorestatus(`netstat -ndi`), String); catch; return ""; end
+const netstat_before = netstat()
+
 # set up a watchdog alarm for 10 minutes
 # so that we can attempt to get a "friendly" backtrace if something gets stuck
 # (although this'll also terminate any attempted debugging session)
 # expected test duration is about 5-10 seconds
 function killjob(d)
     Core.print(Core.stderr, d)
+    Core.print(Core.stderr, "Netstat before:\n")
+    Core.print(Core.stderr, netstat_before)
+    Core.print(Core.stderr, "\nNetstat after:\n")
+    # This might fail if we're in a bad libuv state
+    Core.print(Core.stderr, netstat())
     if Sys.islinux()
         SIGINFO = 10
     elseif Sys.isbsd()
@@ -223,7 +232,8 @@ end
     end
     @test getnameinfo(ip"192.0.2.1") == "192.0.2.1"
     @test getnameinfo(ip"198.51.100.1") == "198.51.100.1"
-    @test getnameinfo(ip"203.0.113.1") == "203.0.113.1"
+    # Temporarily broken due to a DNS issue. See https://github.com/JuliaLang/julia/issues/55008
+    @test_skip getnameinfo(ip"203.0.113.1") == "203.0.113.1"
     @test getnameinfo(ip"0.1.1.1") == "0.1.1.1"
     @test getnameinfo(ip"::ffff:0.1.1.1") == "::ffff:0.1.1.1"
     @test getnameinfo(ip"::ffff:192.0.2.1") == "::ffff:192.0.2.1"
@@ -452,6 +462,8 @@ end
         catch e
             if isa(e, Base.IOError) && Base.uverrorname(e.code) == "EPERM"
                 @warn "UDP IPv4 broadcast test skipped (permission denied upon send, restrictive firewall?)"
+            elseif Sys.isapple() && isa(e, Base.IOError) && Base.uverrorname(e.code) == "EHOSTUNREACH"
+                @warn "UDP IPv4 broadcast test skipped (local network access not granted?)"
             else
                 rethrow()
             end
@@ -544,14 +556,12 @@ end
         fetch(r)
     end
 
-    let addr = Sockets.InetAddr(ip"127.0.0.1", 4444)
-        srv = listen(addr)
+    let addr = Sockets.InetAddr(ip"192.0.2.5", 4444)
         s = Sockets.TCPSocket()
         Sockets.connect!(s, addr)
         r = @async close(s)
         @test_throws Base._UVError("connect", Base.UV_ECANCELED) Sockets.wait_connected(s)
         fetch(r)
-        close(srv)
     end
 end
 
@@ -602,6 +612,31 @@ end
     end
 end
 
+@testset "fd() methods" begin
+    function valid_fd(x)
+        if Sys.iswindows()
+            return x isa Base.OS_HANDLE
+        elseif !Sys.iswindows()
+            value = Base.cconvert(Cint, x)
+
+            # 2048 is a bit arbitrary, it depends on the process not having too many
+            # file descriptors open. But select() has a limit of 1024 and people
+            # don't seem to hit it too often so let's hope twice that is safe.
+            return value > 0 && value < 2048
+        end
+    end
+
+    sock = TCPSocket(; delay=false)
+    @test valid_fd(fd(sock))
+
+    sock = UDPSocket()
+    bind(sock, Sockets.localhost, 0)
+    @test valid_fd(fd(sock))
+
+    server = listen(Sockets.localhost, 0)
+    @test valid_fd(fd(server))
+end
+
 @testset "TCPServer constructor" begin
     s = Sockets.TCPServer(; delay=false)
     if ccall(:jl_has_so_reuseport, Int32, ()) == 1
@@ -611,11 +646,26 @@ end
 
 @testset "getipaddrs" begin
     @test getipaddr() in getipaddrs()
-    try
-        getipaddr(IPv6) in getipaddrs(IPv6)
-    catch
-        if !isempty(getipaddrs(IPv6))
-            @test "getipaddr(IPv6) errored when it shouldn't have!"
+
+    has_ipv4 = !isempty(getipaddrs(IPv4))
+    if has_ipv4
+        @test getipaddr(IPv4) in getipaddrs(IPv4)
+    else
+        @test_throws "No networking interface available" getipaddr(IPv4)
+    end
+
+    has_ipv6 = !isempty(getipaddrs(IPv6))
+    if has_ipv6
+        @test getipaddr(IPv6) in getipaddrs(IPv6)
+    else
+        @test_throws "No networking interface available" getipaddr(IPv6)
+    end
+
+    @testset "getipaddr() prefers IPv4 over IPv6" begin
+        if has_ipv4
+            @test getipaddr() isa IPv4
+        else
+            @test getipaddr() isa IPv6
         end
     end
 
@@ -682,3 +732,7 @@ end
 
 
 close(sockets_watchdog_timer)
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Sockets))
+end
diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version
index d4a548daef5d7..764e5c96aa8ff 100644
--- a/stdlib/SparseArrays.version
+++ b/stdlib/SparseArrays.version
@@ -1,4 +1,4 @@
 SPARSEARRAYS_BRANCH = main
-SPARSEARRAYS_SHA1 = 8affe9e499379616e33fc60a24bb31500e8423d7
+SPARSEARRAYS_SHA1 = 26c80c8b45dc2dca92788332a40a99b6c360d05a
 SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git
 SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1
diff --git a/stdlib/Statistics.version b/stdlib/Statistics.version
index 27197b12be54c..e22fa135f74cd 100644
--- a/stdlib/Statistics.version
+++ b/stdlib/Statistics.version
@@ -1,4 +1,4 @@
 STATISTICS_BRANCH = master
-STATISTICS_SHA1 = a3feba2bb63f06b7f40024185e9fa5f6385e2510
+STATISTICS_SHA1 = 22dee82f9824d6045e87aa4b97e1d64fe6f01d8d
 STATISTICS_GIT_URL := https://github.com/JuliaStats/Statistics.jl.git
 STATISTICS_TAR_URL = https://api.github.com/repos/JuliaStats/Statistics.jl/tarball/$1
diff --git a/stdlib/StyledStrings.version b/stdlib/StyledStrings.version
new file mode 100644
index 0000000000000..4e1a61a7a8017
--- /dev/null
+++ b/stdlib/StyledStrings.version
@@ -0,0 +1,4 @@
+STYLEDSTRINGS_BRANCH = main
+STYLEDSTRINGS_SHA1 = 9bb8ffdd8c2858cced7b6b6fcee85be41c9a1867
+STYLEDSTRINGS_GIT_URL := https://github.com/JuliaLang/StyledStrings.jl.git
+STYLEDSTRINGS_TAR_URL = https://api.github.com/repos/JuliaLang/StyledStrings.jl/tarball/$1
diff --git a/stdlib/SuiteSparse_jll/Project.toml b/stdlib/SuiteSparse_jll/Project.toml
index d1fb2c25fa68b..ff454476db148 100644
--- a/stdlib/SuiteSparse_jll/Project.toml
+++ b/stdlib/SuiteSparse_jll/Project.toml
@@ -1,15 +1,16 @@
 name = "SuiteSparse_jll"
 uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-version = "5.10.1+6"
+version = "7.10.1+0"
 
 [deps]
-libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
 
 [compat]
-julia = "1.7"
+CompilerSupportLibraries_jll = "1.3.0"
+julia = "1.13"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
index a347a91721bad..1dcb2d24e4bc7 100644
--- a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
+++ b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
@@ -2,133 +2,276 @@
 
 ## dummy stub for https://github.com/JuliaBinaryWrappers/SuiteSparse_jll.jl
 baremodule SuiteSparse_jll
-using Base, Libdl, libblastrampoline_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+using Base, Libdl
+using libblastrampoline_jll
+if !(Sys.isfreebsd() || Sys.isapple())
+    using CompilerSupportLibraries_jll
+end
 
 export libamd, libbtf, libcamd, libccolamd, libcholmod, libcolamd, libklu, libldl, librbio, libspqr, libsuitesparseconfig, libumfpack
 
 # These get calculated in __init__()
 # Man I can't wait until these are automatically handled by an in-Base JLLWrappers clone.
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libamd_handle::Ptr{Cvoid} = C_NULL
-libamd_path::String = ""
-libbtf_handle::Ptr{Cvoid} = C_NULL
+
+libsuitesparseconfig_path::String = ""
+const libsuitesparseconfig = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libsuitesparseconfig.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libsuitesparseconfig.7.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libsuitesparseconfig.so.7")
+    else
+        error("SuiteSparse_jll: Library 'libsuitesparseconfig' is not available for $(Sys.KERNEL)")
+    end
+)
+
+libldl_path::String = ""
+const libldl = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libldl.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libldl.3.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libldl.so.3")
+    else
+        error("SuiteSparse_jll: Library 'libldl' is not available for $(Sys.KERNEL)")
+    end
+)
+
 libbtf_path::String = ""
-libcamd_handle::Ptr{Cvoid} = C_NULL
+const libbtf = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libbtf.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libbtf.2.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libbtf.so.2")
+    else
+        error("SuiteSparse_jll: Library 'libbtf' is not available for $(Sys.KERNEL)")
+    end
+)
+
+libcolamd_path::String = ""
+const libcolamd = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libcolamd.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libcolamd.3.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libcolamd.so.3")
+    else
+        error("SuiteSparse_jll: Library 'libcolamd' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows() && Sys.WORD_SIZE == 32
+        LazyLibrary[libsuitesparseconfig, libgcc_s]
+    else
+        LazyLibrary[libsuitesparseconfig]
+    end
+)
+
+libamd_path::String = ""
+const libamd = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libamd.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libamd.3.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libamd.so.3")
+    else
+        error("SuiteSparse_jll: Library 'libamd' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows() && Sys.WORD_SIZE == 32
+        LazyLibrary[libsuitesparseconfig, libgcc_s]
+    else
+        LazyLibrary[libsuitesparseconfig]
+    end
+)
+
 libcamd_path::String = ""
-libccolamd_handle::Ptr{Cvoid} = C_NULL
+const libcamd = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libcamd.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libcamd.3.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libcamd.so.3")
+    else
+        error("SuiteSparse_jll: Library 'libcamd' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows() && Sys.WORD_SIZE == 32
+        LazyLibrary[libsuitesparseconfig, libgcc_s]
+    else
+        LazyLibrary[libsuitesparseconfig]
+    end
+)
+
 libccolamd_path::String = ""
-libcholmod_handle::Ptr{Cvoid} = C_NULL
+const libccolamd = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libccolamd.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libccolamd.3.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libccolamd.so.3")
+    else
+        error("SuiteSparse_jll: Library 'libccolamd' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows() && Sys.WORD_SIZE == 32
+        LazyLibrary[libsuitesparseconfig, libgcc_s]
+    else
+        LazyLibrary[libsuitesparseconfig]
+    end
+)
+
+librbio_path::String = ""
+const librbio = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("librbio.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("librbio.4.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("librbio.so.4")
+    else
+        error("SuiteSparse_jll: Library 'librbio' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows() && Sys.WORD_SIZE == 32
+        LazyLibrary[libsuitesparseconfig, libgcc_s]
+    else
+        LazyLibrary[libsuitesparseconfig]
+    end
+)
+
 libcholmod_path::String = ""
-libcolamd_handle::Ptr{Cvoid} = C_NULL
-libcolamd_path::String = ""
-libklu_handle::Ptr{Cvoid} = C_NULL
+const libcholmod = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libcholmod.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libcholmod.5.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libcholmod.so.5")
+    else
+        error("SuiteSparse_jll: Library 'libcholmod' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows()
+        LazyLibrary[
+            libsuitesparseconfig, libamd, libcamd, libccolamd, libcolamd, libblastrampoline, libgcc_s
+        ]
+    else
+        LazyLibrary[
+            libsuitesparseconfig, libamd, libcamd, libccolamd, libcolamd, libblastrampoline
+        ]
+    end
+)
+
 libklu_path::String = ""
-libldl_handle::Ptr{Cvoid} = C_NULL
-libldl_path::String = ""
-librbio_handle::Ptr{Cvoid} = C_NULL
-librbio_path::String = ""
-libspqr_handle::Ptr{Cvoid} = C_NULL
+const libklu = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libklu.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libklu.2.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libklu.so.2")
+    else
+        error("SuiteSparse_jll: Library 'libklu' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows() && Sys.WORD_SIZE == 32
+        LazyLibrary[libsuitesparseconfig, libamd, libcolamd, libbtf, libgcc_s]
+    else
+        LazyLibrary[libsuitesparseconfig, libamd, libcolamd, libbtf]
+    end
+)
+
 libspqr_path::String = ""
-libsuitesparseconfig_handle::Ptr{Cvoid} = C_NULL
-libsuitesparseconfig_path::String = ""
-libumfpack_handle::Ptr{Cvoid} = C_NULL
+const libspqr = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libspqr.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libspqr.4.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libspqr.so.4")
+    else
+        error("SuiteSparse_jll: Library 'libspqr' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows()
+        LazyLibrary[libsuitesparseconfig, libcholmod, libblastrampoline, libgcc_s]
+    elseif Sys.isfreebsd() || Sys.isapple()
+        LazyLibrary[libsuitesparseconfig, libcholmod, libblastrampoline]
+    else
+        LazyLibrary[libsuitesparseconfig, libcholmod, libblastrampoline, libstdcxx, libgcc_s]
+    end
+)
+
 libumfpack_path::String = ""
+const libumfpack = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libumfpack.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libumfpack.6.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libumfpack.so.6")
+    else
+        error("SuiteSparse_jll: Library 'libumfpack' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows() && Sys.WORD_SIZE == 32
+        LazyLibrary[libsuitesparseconfig, libamd, libcholmod, libblastrampoline, libgcc_s]
+    else
+        LazyLibrary[libsuitesparseconfig, libamd, libcholmod, libblastrampoline]
+    end
+)
+
+function eager_mode()
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    libblastrampoline_jll.eager_mode()
 
-if Sys.iswindows()
-    const libamd = "libamd.dll"
-    const libbtf = "libbtf.dll"
-    const libcamd = "libcamd.dll"
-    const libccolamd = "libccolamd.dll"
-    const libcholmod = "libcholmod.dll"
-    const libcolamd = "libcolamd.dll"
-    const libklu = "libklu.dll"
-    const libldl = "libldl.dll"
-    const librbio = "librbio.dll"
-    const libspqr = "libspqr.dll"
-    const libsuitesparseconfig = "libsuitesparseconfig.dll"
-    const libumfpack = "libumfpack.dll"
-elseif Sys.isapple()
-    const libamd = "@rpath/libamd.2.dylib"
-    const libbtf = "@rpath/libbtf.1.dylib"
-    const libcamd = "@rpath/libcamd.2.dylib"
-    const libccolamd = "@rpath/libccolamd.2.dylib"
-    const libcholmod = "@rpath/libcholmod.3.dylib"
-    const libcolamd = "@rpath/libcolamd.2.dylib"
-    const libklu = "@rpath/libklu.1.dylib"
-    const libldl = "@rpath/libldl.2.dylib"
-    const librbio = "@rpath/librbio.2.dylib"
-    const libspqr = "@rpath/libspqr.2.dylib"
-    const libsuitesparseconfig = "@rpath/libsuitesparseconfig.5.dylib"
-    const libumfpack = "@rpath/libumfpack.5.dylib"
-else
-    const libamd = "libamd.so.2"
-    const libbtf = "libbtf.so.1"
-    const libcamd = "libcamd.so.2"
-    const libccolamd = "libccolamd.so.2"
-    const libcholmod = "libcholmod.so.3"
-    const libcolamd = "libcolamd.so.2"
-    const libklu = "libklu.so.1"
-    const libldl = "libldl.so.2"
-    const librbio = "librbio.so.2"
-    const libspqr = "libspqr.so.2"
-    const libsuitesparseconfig = "libsuitesparseconfig.so.5"
-    const libumfpack = "libumfpack.so.5"
+    dlopen(libamd)
+    dlopen(libbtf)
+    dlopen(libcamd)
+    dlopen(libccolamd)
+    dlopen(libcholmod)
+    dlopen(libcolamd)
+    dlopen(libklu)
+    dlopen(libldl)
+    dlopen(librbio)
+    dlopen(libspqr)
+    dlopen(libsuitesparseconfig)
+    dlopen(libumfpack)
 end
+is_available() = true
 
 function __init__()
+    # BSD-3-Clause
+    global libamd_path = string(libamd.path)
+    global libcamd_path = string(libcamd.path)
+    global libccolamd_path = string(libccolamd.path)
+    global libcolamd_path = string(libcolamd.path)
+    global libsuitesparseconfig_path = string(libsuitesparseconfig.path)
+
+    # LGPL-2.1+
+    global libbtf_path = string(libbtf.path)
+    global libklu_path = string(libklu.path)
+    global libldl_path = string(libldl.path)
+
+    # GPL-2.0+
     if Base.USE_GPL_LIBS
-        global libamd_handle = dlopen(libamd)
-        global libamd_path = dlpath(libamd_handle)
-        global libbtf_handle = dlopen(libbtf)
-        global libbtf_path = dlpath(libbtf_handle)
-        global libcamd_handle = dlopen(libcamd)
-        global libcamd_path = dlpath(libcamd_handle)
-        global libccolamd_handle = dlopen(libccolamd)
-        global libccolamd_path = dlpath(libccolamd_handle)
-        global libcholmod_handle = dlopen(libcholmod)
-        global libcholmod_path = dlpath(libcholmod_handle)
-        global libcolamd_handle = dlopen(libcolamd)
-        global libcolamd_path = dlpath(libcolamd_handle)
-        global libklu_handle = dlopen(libklu)
-        global libklu_path = dlpath(libklu_handle)
-        global libldl_handle = dlopen(libldl)
-        global libldl_path = dlpath(libldl_handle)
-        global librbio_handle = dlopen(librbio)
-        global librbio_path = dlpath(librbio_handle)
-        global libspqr_handle = dlopen(libspqr)
-        global libspqr_path = dlpath(libspqr_handle)
-        global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig)
-        global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle)
-        global libumfpack_handle = dlopen(libumfpack)
-        global libumfpack_path = dlpath(libumfpack_handle)
+        global libcholmod_path = string(libcholmod.path)
+        global librbio_path = string(librbio.path)
+        global libspqr_path = string(libspqr.path)
+        global libumfpack_path = string(libumfpack.path)
     end
     global artifact_dir = dirname(Sys.BINDIR)
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libamd_path() = libamd_path
-get_libbtf_path() = libbtf_path
-get_libcamd_path() = libcamd_path
-get_libccolamd_path() = libccolamd_path
-get_libcholmod_path() = libcholmod_path
-get_libcolamd_path() = libcolamd_path
-get_libklu_path() = libklu_path
-get_libldl_path() = libldl_path
-get_librbio_path() = librbio_path
-get_libspqr_path() = libspqr_path
-get_libsuitesparseconfig_path() = libsuitesparseconfig_path
-get_libumfpack_path() = libumfpack_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module SuiteSparse_jll
diff --git a/stdlib/SuiteSparse_jll/test/runtests.jl b/stdlib/SuiteSparse_jll/test/runtests.jl
index ca356951f99e2..922da55fa1881 100644
--- a/stdlib/SuiteSparse_jll/test/runtests.jl
+++ b/stdlib/SuiteSparse_jll/test/runtests.jl
@@ -2,6 +2,10 @@
 
 using Test, SuiteSparse_jll
 
+# SuiteSparse only uses SUITESPARSE_MAIN_VERSION and SUITESPARSE_SUB_VERSION to compute its version
+# The SUITESPARSE_SUBSUB_VERSION is not used
+# TODO before release: update to 7020 or above when upstreamed.
+# This should be safe and unnecessary since we specify exact version of the BB JLL.
 @testset "SuiteSparse_jll" begin
-    @test ccall((:SuiteSparse_version, libsuitesparseconfig), Cint, (Ptr{Cint},), C_NULL) == 5010
+    @test ccall((:SuiteSparse_version, libsuitesparseconfig), Cint, (Ptr{Cint},), C_NULL) > 7000
 end
diff --git a/stdlib/TOML/Project.toml b/stdlib/TOML/Project.toml
index 17fc8be19ec8e..ceb4acf8bbc65 100644
--- a/stdlib/TOML/Project.toml
+++ b/stdlib/TOML/Project.toml
@@ -6,12 +6,13 @@ version = "1.0.3"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
 [compat]
+Dates = "1.11.0"
 julia = "1.6"
 
 [extras]
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
 
 [targets]
diff --git a/stdlib/TOML/docs/src/index.md b/stdlib/TOML/docs/src/index.md
index 36e8ec6248108..ea1600cc6d44b 100644
--- a/stdlib/TOML/docs/src/index.md
+++ b/stdlib/TOML/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/TOML/docs/src/index.md"
+```
+
 # TOML
 
 TOML.jl is a Julia standard library for parsing and writing [TOML
@@ -36,7 +40,7 @@ none:1:16 error: failed to parse value
 ```
 
 There are other versions of the parse functions ([`TOML.tryparse`](@ref)
-and [`TOML.tryparsefile`]) that instead of throwing exceptions on parser error
+and [`TOML.tryparsefile`](@ref)) that instead of throwing exceptions on parser error
 returns a [`TOML.ParserError`](@ref) with information:
 
 ```jldoctest
@@ -62,7 +66,7 @@ julia> err.column
 The [`TOML.print`](@ref) function is used to print (or serialize) data into TOML
 format.
 
-```jldoctest
+```jldoctest; filter = r"^\s*\S+\s*=.*"m
 julia> using TOML
 
 julia> data = Dict(
diff --git a/stdlib/TOML/src/TOML.jl b/stdlib/TOML/src/TOML.jl
index a2ea1869b4079..575f78be779d2 100644
--- a/stdlib/TOML/src/TOML.jl
+++ b/stdlib/TOML/src/TOML.jl
@@ -1,7 +1,14 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+TOML.jl is a Julia standard library for parsing and writing TOML v1.0 files.
+This module provides functions to parse TOML strings and files into Julia data structures
+and to serialize Julia data structures to TOML format.
+"""
 module TOML
 
+using Dates
+
 module Internals
     # The parser is defined in Base
     using Base.TOML: Parser, parse, tryparse, ParserError, isvalid_barekey_char, reinit!
@@ -18,18 +25,25 @@ module Internals
 end
 
 # https://github.com/JuliaLang/julia/issues/36605
-readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file")
+_readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file")
 
 """
     Parser()
 
 Constructor for a TOML `Parser`.  Note that in most cases one does not need to
-explicitly create a `Parser` but instead one directly use use
+explicitly create a `Parser` but instead one directly uses
 [`TOML.parsefile`](@ref) or [`TOML.parse`](@ref).  Using an explicit parser
 will however reuse some internal data structures which can be beneficial for
 performance if a larger number of small files are parsed.
 """
-const Parser = Internals.Parser
+struct Parser
+    _p::Internals.Parser{Dates}
+end
+
+# Dates-enabled constructors
+Parser() = Parser(Internals.Parser{Dates}())
+Parser(io::IO) = Parser(Internals.Parser{Dates}(io))
+Parser(str::String; filepath=nothing) = Parser(Internals.Parser{Dates}(str; filepath))
 
 """
     parsefile(f::AbstractString)
@@ -41,9 +55,9 @@ Parse file `f` and return the resulting table (dictionary). Throw a
 See also [`TOML.tryparsefile`](@ref).
 """
 parsefile(f::AbstractString) =
-    Internals.parse(Parser(readstring(f); filepath=abspath(f)))
+    Internals.parse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f)))
 parsefile(p::Parser, f::AbstractString) =
-    Internals.parse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
+    Internals.parse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f)))
 
 """
     tryparsefile(f::AbstractString)
@@ -55,9 +69,9 @@ Parse file `f` and return the resulting table (dictionary). Return a
 See also [`TOML.parsefile`](@ref).
 """
 tryparsefile(f::AbstractString) =
-    Internals.tryparse(Parser(readstring(f); filepath=abspath(f)))
+    Internals.tryparse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f)))
 tryparsefile(p::Parser, f::AbstractString) =
-    Internals.tryparse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
+    Internals.tryparse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f)))
 
 """
     parse(x::Union{AbstractString, IO})
@@ -68,10 +82,11 @@ Throw a [`ParserError`](@ref) upon failure.
 
 See also [`TOML.tryparse`](@ref).
 """
+parse(p::Parser) = Internals.parse(p._p)
 parse(str::AbstractString) =
-    Internals.parse(Parser(String(str)))
+    Internals.parse(Internals.Parser{Dates}(String(str)))
 parse(p::Parser, str::AbstractString) =
-    Internals.parse(Internals.reinit!(p, String(str)))
+    Internals.parse(Internals.reinit!(p._p, String(str)))
 parse(io::IO) = parse(read(io, String))
 parse(p::Parser, io::IO) = parse(p, read(io, String))
 
@@ -84,10 +99,11 @@ Return a [`ParserError`](@ref) upon failure.
 
 See also [`TOML.parse`](@ref).
 """
+tryparse(p::Parser) = Internals.tryparse(p._p)
 tryparse(str::AbstractString) =
-    Internals.tryparse(Parser(String(str)))
+    Internals.tryparse(Internals.Parser{Dates}(String(str)))
 tryparse(p::Parser, str::AbstractString) =
-    Internals.tryparse(Internals.reinit!(p, String(str)))
+    Internals.tryparse(Internals.reinit!(p._p, String(str)))
 tryparse(io::IO) = tryparse(read(io, String))
 tryparse(p::Parser, io::IO) = tryparse(p, read(io, String))
 
@@ -105,10 +121,14 @@ const ParserError = Internals.ParserError
 
 
 """
-    print([to_toml::Function], io::IO [=stdout], data::AbstractDict; sorted=false, by=identity)
+    print([to_toml::Function], io::IO [=stdout], data::AbstractDict; sorted=false, by=identity, inline_tables::IdSet{<:AbstractDict})
 
 Write `data` as TOML syntax to the stream `io`. If the keyword argument `sorted` is set to `true`,
-sort tables according to the function given by the keyword argument `by`.
+sort tables according to the function given by the keyword argument `by`. If the keyword argument
+`inline_tables` is given, it should be a set of tables that should be printed "inline".
+
+!!! compat "Julia 1.11"
+    The `inline_tables` keyword argument is supported by Julia 1.11 or later.
 
 The following data types are supported: `AbstractDict`, `AbstractVector`, `AbstractString`, `Integer`, `AbstractFloat`, `Bool`,
 `Dates.DateTime`, `Dates.Time`, `Dates.Date`. Note that the integers and floats
@@ -118,4 +138,19 @@ supported type.
 """
 const print = Internals.Printer.print
 
+public Parser, parsefile, tryparsefile, parse, tryparse, ParserError, print
+
+# These methods are private Base interfaces, but we do our best to support them over
+# the TOML stdlib types anyway to minimize downstream breakage.
+Base.TOMLCache(p::Parser) = Base.TOMLCache(p._p, Dict{String, Base.CachedTOMLDict}())
+Base.TOMLCache(p::Parser, d::Base.CachedTOMLDict) = Base.TOMLCache(p._p, d)
+Base.TOMLCache(p::Parser, d::Dict{String, Dict{String, Any}}) = Base.TOMLCache(p._p, d)
+
+Internals.reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing) =
+    Internals.reinit!(p._p, str; filepath)
+Internals.parse(p::Parser) = Internals.parse(p._p)
+Internals.tryparse(p::Parser) = Internals.tryparse(p._p)
+
+include("precompile.jl")
+
 end
diff --git a/stdlib/TOML/src/precompile.jl b/stdlib/TOML/src/precompile.jl
new file mode 100644
index 0000000000000..f3f45478ad28b
--- /dev/null
+++ b/stdlib/TOML/src/precompile.jl
@@ -0,0 +1,38 @@
+if Base.generating_output()
+let
+    # Test TOML content
+    test_toml = """
+    title = "Example"
+    with_quotes = \"quoted\"
+    number = 42
+    float = 3.14
+    boolean = true
+    date = 2023-01-01
+    datetime = 2023-01-01T12:00:00Z
+    time = 12:00:00
+    array = [1, 2, 3]
+    inline = {a=1, b=1.0, c=[1,2,3], d="foo"}
+
+    [nested]
+    key = "value"
+    """
+
+    test_dict = TOML.parse(test_toml)
+
+    TOML.parse(test_toml)
+
+    io_stream = IOBuffer(test_toml)
+    TOML.parse(io_stream)
+
+    mktemp() do path, io
+        write(io, test_toml)
+        close(io)
+        TOML.parsefile(path)
+    end
+
+    mktemp() do path, io
+        TOML.print(io, test_dict)
+        close(io)
+    end
+end
+end
diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl
index 1fa9f97405504..aca013955b28f 100644
--- a/stdlib/TOML/src/print.jl
+++ b/stdlib/TOML/src/print.jl
@@ -33,8 +33,8 @@ function print_toml_escaped(io::IO, s::AbstractString)
     end
 end
 
-const MbyFunc = Union{Function, Nothing}
-const TOMLValue = Union{AbstractVector, AbstractDict, Dates.DateTime, Dates.Time, Dates.Date, Bool, Integer, AbstractFloat, AbstractString}
+const TOMLValue = Union{AbstractVector, AbstractDict, Bool, Integer, AbstractFloat, AbstractString,
+                        Dates.DateTime, Dates.Time, Dates.Date, Base.TOML.DateTime, Base.TOML.Time, Base.TOML.Date}
 
 
 ########
@@ -58,8 +58,8 @@ function printkey(io::IO, keys::Vector{String})
     end
 end
 
-function to_toml_value(f::MbyFunc, value)
-    if f === nothing
+function to_toml_value(@nospecialize(f::Function), value)
+    if f === identity
         error("type `$(typeof(value))` is not a valid TOML type, pass a conversion function to `TOML.print`")
     end
     toml_value = f(value)
@@ -74,21 +74,24 @@ end
 ##########
 
 # Fallback
-function printvalue(f::MbyFunc, io::IO, value)
+function printvalue(f::Function, io::IO, value, sorted::Bool)
     toml_value = to_toml_value(f, value)
-    @invokelatest printvalue(f, io, toml_value)
+    @invokelatest printvalue(f, io, toml_value, sorted)
 end
 
-function printvalue(f::MbyFunc, io::IO, value::AbstractVector)
+function printvalue(f::Function, io::IO, value::AbstractVector, sorted::Bool)
     Base.print(io, "[")
     for (i, x) in enumerate(value)
         i != 1 && Base.print(io, ", ")
-        printvalue(f, io, x)
+        printvalue(f, io, x, sorted)
     end
     Base.print(io, "]")
 end
 
-function printvalue(f::MbyFunc, io::IO, value::TOMLValue)
+function printvalue(f::Function, io::IO, value::TOMLValue, sorted::Bool)
+    value isa Base.TOML.DateTime && (value = Dates.DateTime(value))
+    value isa Base.TOML.Time && (value = Dates.Time(value))
+    value isa Base.TOML.Date && (value = Dates.Date(value))
     value isa Dates.DateTime ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd\THH:MM:SS.sss\Z")) :
     value isa Dates.Time     ? Base.print(io, Dates.format(value, Dates.dateformat"HH:MM:SS.sss")) :
     value isa Dates.Date     ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd")) :
@@ -97,10 +100,11 @@ function printvalue(f::MbyFunc, io::IO, value::TOMLValue)
     value isa AbstractFloat  ? Base.print(io, isnan(value) ? "nan" :
                                               isinf(value) ? string(value > 0 ? "+" : "-", "inf") :
                                               Float64(value)) :  # TOML specifies IEEE 754 binary64 for float
-    value isa AbstractString ? (Base.print(io, "\"");
+    value isa AbstractString ? (qmark = Base.contains(value, "\n") ? "\"\"\"" : "\"";
+                                Base.print(io, qmark);
                                 print_toml_escaped(io, value);
-                                Base.print(io, "\"")) :
-    value isa AbstractDict ? print_inline_table(f, io, value) :
+                                Base.print(io, qmark)) :
+    value isa AbstractDict ? print_inline_table(f, io, value, sorted) :
     error("internal error in TOML printing, unhandled value")
 end
 
@@ -112,13 +116,18 @@ function print_integer(io::IO, value::Integer)
     return
 end
 
-function print_inline_table(f::MbyFunc, io::IO, value::AbstractDict)
+function print_inline_table(f::Function, io::IO, value::AbstractDict, sorted::Bool)
+    vkeys = collect(keys(value))::AbstractArray
+    if sorted
+        sort!(vkeys)
+    end
     Base.print(io, "{")
-    for (i, (k,v)) in enumerate(value)
+    for (i, k) in enumerate(vkeys)
+        v = value[k]
         i != 1 && Base.print(io, ", ")
         printkey(io, [String(k)])
         Base.print(io, " = ")
-        printvalue(f, io, v)
+        printvalue(f, io, v, sorted)
     end
     Base.print(io, "}")
 end
@@ -128,24 +137,30 @@ end
 # Tables #
 ##########
 
-is_table(value)           = isa(value, AbstractDict)
-is_array_of_tables(value) = isa(value, AbstractArray) &&
-                            length(value) > 0 && (
-                                isa(value, AbstractArray{<:AbstractDict}) ||
-                                all(v -> isa(v, AbstractDict), value)
-                            )
-is_tabular(value)         = is_table(value) || @invokelatest(is_array_of_tables(value))
+is_table(@nospecialize(value)) = isa(value, AbstractDict)
+is_array_of_tables(@nospecialize(value)) =
+    isa(value, AbstractArray) &&
+    length(value) > 0 && (isa(value, AbstractArray{<:AbstractDict}) ||
+                          all(v -> isa(v, AbstractDict), value))
+is_tabular(@nospecialize(value)) = is_table(value) || @invokelatest(is_array_of_tables(value))
 
-function print_table(f::MbyFunc, io::IO, a::AbstractDict,
+function print_table(f::Function, io::IO, a::AbstractDict,
     ks::Vector{String} = String[];
     indent::Int = 0,
     first_block::Bool = true,
     sorted::Bool = false,
+    inline_tables::IdSet,
     by::Function = identity,
 )
+
+    if a in inline_tables
+        @invokelatest print_inline_table(f, io, a, sorted)
+        return
+    end
+
     akeys = keys(a)
     if sorted
-        akeys = sort!(collect(akeys); by=by)
+        akeys = sort!(collect(akeys); by)
     end
 
     # First print non-tabular entries
@@ -154,12 +169,14 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
         if !isa(value, TOMLValue)
             value = to_toml_value(f, value)
         end
-        is_tabular(value) && continue
+        if is_tabular(value) && !(value in inline_tables)
+            continue
+        end
 
         Base.print(io, ' '^4max(0,indent-1))
         printkey(io, [String(key)])
         Base.print(io, " = ") # print separator
-        printvalue(f, io, value)
+        printvalue(f, io, value, sorted)
         Base.print(io, "\n")  # new line?
         first_block = false
     end
@@ -169,10 +186,10 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
         if !isa(value, TOMLValue)
             value = to_toml_value(f, value)
         end
-        if is_table(value)
+        if is_table(value) && !(value in inline_tables)
             push!(ks, String(key))
             _values = @invokelatest values(value)
-            header = isempty(value) || !all(is_tabular(v) for v in _values)::Bool
+            header = isempty(value) || !all(is_tabular(v) for v in _values)::Bool || any(v in inline_tables for v in _values)::Bool
             if header
                 # print table
                 first_block || println(io)
@@ -183,7 +200,7 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]\n")
             end
             # Use runtime dispatch here since the type of value seems not to be enforced other than as AbstractDict
-            @invokelatest print_table(f, io, value, ks; indent = indent + header, first_block = header, sorted=sorted, by=by)
+            @invokelatest print_table(f, io, value, ks; indent = indent + header, first_block = header, sorted, by, inline_tables)
             pop!(ks)
         elseif @invokelatest(is_array_of_tables(value))
             # print array of tables
@@ -197,7 +214,7 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]]\n")
                 # TODO, nicer error here
                 !isa(v, AbstractDict) && error("array should contain only tables")
-                @invokelatest print_table(f, io, v, ks; indent = indent + 1, sorted=sorted, by=by)
+                @invokelatest print_table(f, io, v, ks; indent = indent + 1, sorted, by, inline_tables)
             end
             pop!(ks)
         end
@@ -209,7 +226,11 @@ end
 # API #
 #######
 
-print(f::MbyFunc, io::IO, a::AbstractDict; sorted::Bool=false, by=identity) = print_table(f, io, a; sorted=sorted, by=by)
-print(f::MbyFunc, a::AbstractDict; sorted::Bool=false, by=identity) = print(f, stdout, a; sorted=sorted, by=by)
-print(io::IO, a::AbstractDict; sorted::Bool=false, by=identity) = print_table(nothing, io, a; sorted=sorted, by=by)
-print(a::AbstractDict; sorted::Bool=false, by=identity) = print(nothing, stdout, a; sorted=sorted, by=by)
+print(f::Function, io::IO, a::AbstractDict; sorted::Bool=false, by=identity, inline_tables::IdSet{<:AbstractDict}=IdSet{Dict{String}}()) =
+    print_table(f, io, a; sorted, by, inline_tables)
+print(f::Function, a::AbstractDict; sorted::Bool=false, by=identity, inline_tables::IdSet{<:AbstractDict}=IdSet{Dict{String}}()) =
+    print(f, stdout, a; sorted, by, inline_tables)
+print(io::IO, a::AbstractDict; sorted::Bool=false, by=identity, inline_tables::IdSet{<:AbstractDict}=IdSet{Dict{String}}()) =
+    print_table(identity, io, a; sorted, by, inline_tables)
+print(a::AbstractDict; sorted::Bool=false, by=identity, inline_tables::IdSet{<:AbstractDict}=IdSet{Dict{String}}()) =
+    print(identity, stdout, a; sorted, by, inline_tables)
diff --git a/stdlib/TOML/test/parse.jl b/stdlib/TOML/test/parse.jl
index 12f68acbdb5bf..30400344f67cf 100644
--- a/stdlib/TOML/test/parse.jl
+++ b/stdlib/TOML/test/parse.jl
@@ -14,6 +14,7 @@ using TOML: ParserError
           TOML.parse(IOBuffer(str)) ==
           TOML.parse(p, str) == TOML.parse(p, SubString(str)) ==
           TOML.parse(p, IOBuffer(str)) == dict
+    @test TOML.parse("a\t=1") == dict
     @test_throws ParserError TOML.parse(invalid_str)
     @test_throws ParserError TOML.parse(SubString(invalid_str))
     @test_throws ParserError TOML.parse(IOBuffer(invalid_str))
diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl
index 765b6feb491a5..1f34b1baee916 100644
--- a/stdlib/TOML/test/print.jl
+++ b/stdlib/TOML/test/print.jl
@@ -58,7 +58,7 @@ end
     [option]
     """
     d = TOML.parse(s)
-    @test toml_str(d) == "user = \"me\"\n\n[julia]\n\n[option]\n"
+    @test toml_str(d; sorted=true) == "user = \"me\"\n\n[julia]\n\n[option]\n"
 end
 
 @testset "special characters" begin
@@ -83,16 +83,35 @@ loaders = ["gzip", { driver = "csv", args = {delim = "\t"}}]
 @testset "vec with dicts and non-dicts" begin
     # https://github.com/JuliaLang/julia/issues/45340
     d =  Dict("b" => Any[111, Dict("a" =>  222, "d" => 333)])
-    @test toml_str(d) == "b = [111, {a = 222, d = 333}]\n"
+    @test toml_str(d) == (sizeof(Int) == 4 ?
+        "b = [111, {a = 222, d = 333}]\n" :
+        "b = [111, {d = 333, a = 222}]\n")
+
 
     d =  Dict("b" => Any[Dict("a" =>  222, "d" => 333), 111])
-    @test toml_str(d) == "b = [{a = 222, d = 333}, 111]\n"
+    @test toml_str(d) == (sizeof(Int) == 4 ?
+        "b = [{a = 222, d = 333}, 111]\n" :
+        "b = [{d = 333, a = 222}, 111]\n")
 
     d =  Dict("b" => Any[Dict("a" =>  222, "d" => 333)])
-    @test toml_str(d) == """
-    [[b]]
-    a = 222
-    d = 333
+    @test toml_str(d) == (sizeof(Int) == 4 ?
+        """
+        [[b]]
+        a = 222
+        d = 333
+        """ :
+        """
+        [[b]]
+        d = 333
+        a = 222
+        """)
+
+    # https://github.com/JuliaLang/julia/pull/57584
+    d = Dict("b" => [MyStruct(1), MyStruct(2)])
+    @test toml_str(d) do x
+        x isa MyStruct && return Dict("a" => x.a)
+    end == """
+    b = [{a = 1}, {a = 2}]
     """
 end
 
@@ -140,3 +159,76 @@ d = "hello"
 a = 2
 b = 9.9
 """
+
+
+inline_dict = Dict("a" => [1,2], "b" => Dict("a" => "b"), "c" => "foo")
+d = Dict(
+    "x" => "y",
+    "y" => inline_dict,
+    "z" => [1,2,3],
+)
+inline_tables = IdSet{Dict}()
+push!(inline_tables, inline_dict)
+@test toml_str(d; sorted=true, inline_tables) ==
+"""
+x = "y"
+y = {a = [1, 2], b = {a = "b"}, c = "foo"}
+z = [1, 2, 3]
+"""
+
+
+d = Dict("deps" => Dict(
+        "LocalPkg" => "fcf55292-0d03-4e8a-9e0b-701580031fc3",
+        "Example" => "7876af07-990d-54b4-ab0e-23690620f79a"),
+   "sources" => Dict(
+        "LocalPkg" => Dict("path" => "LocalPkg"),
+        "Example" => Dict("url" => "https://github.com/JuliaLang/Example.jl")))
+
+inline_tables = IdSet{Dict}()
+push!(inline_tables, d["sources"]["LocalPkg"])
+push!(inline_tables, d["sources"]["Example"])
+
+@test toml_str(d; sorted=true, inline_tables) ==
+"""
+[deps]
+Example = "7876af07-990d-54b4-ab0e-23690620f79a"
+LocalPkg = "fcf55292-0d03-4e8a-9e0b-701580031fc3"
+
+[sources]
+Example = {url = "https://github.com/JuliaLang/Example.jl"}
+LocalPkg = {path = "LocalPkg"}
+"""
+
+inline_tables = IdSet{Dict}()
+push!(inline_tables, d["sources"]["LocalPkg"])
+s = """
+[deps]
+Example = "7876af07-990d-54b4-ab0e-23690620f79a"
+LocalPkg = "fcf55292-0d03-4e8a-9e0b-701580031fc3"
+
+[sources]
+LocalPkg = {path = "LocalPkg"}
+
+    [sources.Example]
+    url = "https://github.com/JuliaLang/Example.jl"
+"""
+@test toml_str(d; sorted=true, inline_tables) == s
+@test roundtrip(s)
+
+
+# https://github.com/JuliaLang/julia/pull/57584
+d = Dict("a" => 1, "b" => 2)
+inline_tables = IdSet{Dict}([d])
+s = "{a = 1, b = 2}"
+@test toml_str(d; sorted=true, inline_tables) == s
+
+
+# multiline strings (#55083)
+s = """
+a = \"\"\"lorem ipsum
+
+
+
+alpha\"\"\"
+"""
+@test roundtrip(s)
diff --git a/stdlib/TOML/test/runtests.jl b/stdlib/TOML/test/runtests.jl
index 7376fab914636..47c762d054711 100644
--- a/stdlib/TOML/test/runtests.jl
+++ b/stdlib/TOML/test/runtests.jl
@@ -25,3 +25,7 @@ include("print.jl")
 include("parse.jl")
 
 @inferred TOML.parse("foo = 3")
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(TOML))
+end
diff --git a/stdlib/TOML/test/toml_test.jl b/stdlib/TOML/test/toml_test.jl
index f4670058223a1..cdd8950677c75 100644
--- a/stdlib/TOML/test/toml_test.jl
+++ b/stdlib/TOML/test/toml_test.jl
@@ -36,7 +36,7 @@ end
 
 function check_valid(f)
     jsn = try jsn2data(@eval include($f * ".jl"))
-    # Some files cannot be reprsented with julias DateTime (timezones)
+    # Some files cannot be represented with julias DateTime (timezones)
     catch
         return false
     end
@@ -72,11 +72,7 @@ for (root, dirs, files) in walkdir(valid_test_folder)
                 rel = replace(rel, '\\' => '/')
             end
             v = check_valid(splitext(file)[1])
-            if rel in failures
-                @test_broken v
-            else
-                @test v
-            end
+            @test v broken=rel in failures
         end
     end
 end
@@ -145,11 +141,7 @@ for (root, dirs, files) in walkdir(invalid_test_folder)
                 rel = replace(rel, '\\' => '/')
             end
             v = check_invalid(file)
-            if rel in failures
-                @test_broken v
-            else
-                @test v
-            end
+            @test v broken=rel in failures
         end
     end
 end
diff --git a/stdlib/TOML/test/utils/utils.jl b/stdlib/TOML/test/utils/utils.jl
index c484a61cee25a..b01acf04a72fe 100644
--- a/stdlib/TOML/test/utils/utils.jl
+++ b/stdlib/TOML/test/utils/utils.jl
@@ -33,7 +33,7 @@ end
 function get_data()
     tmp = mktempdir()
     path = joinpath(tmp, basename(url))
-    Downloads.download(url, path)
+    retry(Downloads.download, delays=fill(10,5))(url, path)
     Tar.extract(`$(exe7z()) x $path -so`, joinpath(tmp, "testfiles"))
     return joinpath(tmp, "testfiles", "toml-test-julia-$version", "testfiles")
 end
diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl
index be2ed3acce5b5..b7c3730006723 100644
--- a/stdlib/TOML/test/values.jl
+++ b/stdlib/TOML/test/values.jl
@@ -4,16 +4,31 @@ using Test
 using TOML
 using TOML: Internals
 
+# Construct an explicit Parser to test the "cached" version of parsing
+const test_parser = TOML.Parser()
+
 function testval(s, v)
     f = "foo = $s"
+    # First, test with the standard entrypoint
     parsed = TOML.parse(f)["foo"]
     return isequal(v, parsed) && typeof(v) == typeof(parsed)
+    (!isequal(v, parsed) || typeof(v) != typeof(parsed)) && return false
+    # Next, test with the "cached" (explicit Parser) entrypoint
+    parsed = TOML.parse(test_parser, f)["foo"]
+    (!isequal(v, parsed) || typeof(v) != typeof(parsed)) && return false
+    return true
 end
 
 function failval(s, v)
     f = "foo = $s"
+    # First, test with the standard entrypoint
     err = TOML.tryparse(f);
     return err isa TOML.Internals.ParserError && err.type == v
+    (!isa(err, TOML.Internals.ParserError) || err.type != v) && return false
+    # Next, test with the "cached" (explicit Parser) entrypoint
+    err = TOML.tryparse(test_parser, f);
+    (!isa(err, TOML.Internals.ParserError) || err.type != v) && return false
+    return true
 end
 
 @testset "Numbers" begin
@@ -101,6 +116,9 @@ end
     @test testval("2016-09-09T09:09:09Z"    , DateTime(2016 , 9 , 9 , 9 , 9 , 9))
     @test testval("2016-09-09T09:09:09.0Z"  , DateTime(2016 , 9 , 9 , 9 , 9 , 9))
     @test testval("2016-09-09T09:09:09.012" , DateTime(2016 , 9 , 9 , 9 , 9 , 9  , 12))
+    @test testval("2016-09-09T09:09:09.2"   , DateTime(2016 , 9 , 9 , 9 , 9 , 9  , 200))
+    @test testval("2016-09-09T09:09:09.20"  , DateTime(2016 , 9 , 9 , 9 , 9 , 9  , 200))
+    @test testval("2016-09-09T09:09:09.02"  , DateTime(2016 , 9 , 9 , 9 , 9 , 9  , 20))
 
     @test failval("2016-09-09T09:09:09.0+10:00"   , Internals.ErrOffsetDateNotSupported)
     @test failval("2016-09-09T09:09:09.012-02:00" , Internals.ErrOffsetDateNotSupported)
@@ -117,8 +135,12 @@ end
 end
 
 @testset "Time" begin
-    @test testval("09:09:09.99"    , Time(9 , 9 , 9 , 99))
+    @test testval("09:09:09.99"    , Time(9 , 9 , 9 , 990))
     @test testval("09:09:09.99999" , Time(9 , 9 , 9 , 999))
+    @test testval("00:00:00.2"     , Time(0 , 0 , 0 , 200))
+    @test testval("00:00:00.20"    , Time(0 , 0 , 0 , 200))
+    @test testval("00:00:00.23"    , Time(0 , 0 , 0 , 230))
+    @test testval("00:00:00.234"   , Time(0 , 0 , 0 , 234))
 
     @test failval("09:09x09", Internals.ErrParsingDateTime)
 end
@@ -157,6 +179,6 @@ end
 @testset "Array" begin
     @test testval("[1,2,3]", Int64[1,2,3])
     @test testval("[1.0, 2.0, 3.0]", Float64[1.0, 2.0, 3.0])
-    @test testval("[1.0, 2.0, 3]", Union{Int64, Float64}[1.0, 2.0, Int64(3)])
+    @test testval("[1.0, 2.0, 3]", Any[1.0, 2.0, Int64(3)])
     @test testval("[1.0, 2, \"foo\"]", Any[1.0, Int64(2), "foo"])
 end
diff --git a/stdlib/Tar.version b/stdlib/Tar.version
index 44e829b5fea54..339464a31a4ca 100644
--- a/stdlib/Tar.version
+++ b/stdlib/Tar.version
@@ -1,4 +1,4 @@
 TAR_BRANCH = master
-TAR_SHA1 = ff55460f4d329949661a33e6c8168ce6d890676c
+TAR_SHA1 = d236fa0affb2ab90c78798b01bb1d64615785354
 TAR_GIT_URL := https://github.com/JuliaIO/Tar.jl.git
 TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
diff --git a/stdlib/Test/Project.toml b/stdlib/Test/Project.toml
index ee1ae15fd7154..f04b4f976196f 100644
--- a/stdlib/Test/Project.toml
+++ b/stdlib/Test/Project.toml
@@ -1,5 +1,6 @@
 name = "Test"
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
 
 [deps]
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
diff --git a/stdlib/Test/docs/src/index.md b/stdlib/Test/docs/src/index.md
index f1142409747bd..893efeaccbade 100644
--- a/stdlib/Test/docs/src/index.md
+++ b/stdlib/Test/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Test/docs/src/index.md"
+```
+
 # Unit Testing
 
 ```@meta
@@ -20,7 +24,7 @@ The `Test` module provides simple *unit testing* functionality. Unit testing is
 see if your code is correct by checking that the results are what you expect. It can be helpful
 to ensure your code still works after you make changes, and can be used when developing as a way
 of specifying the behaviors your code should have when complete. You may also want to look at the
-documentation for [adding tests to your Julia Package](https://pkgdocs.julialang.org/dev/creating-packages/#Adding-tests-to-the-package).
+documentation for [adding tests to your Julia Package](https://pkgdocs.julialang.org/dev/creating-packages/#adding-tests-to-packages).
 
 Simple unit testing can be performed with the `@test` and `@test_throws` macros:
 
@@ -55,7 +59,6 @@ julia> @test foo("f") == 20
 Test Failed at none:1
   Expression: foo("f") == 20
    Evaluated: 1 == 20
-
 ERROR: There was an error during testing
 ```
 
@@ -69,6 +72,8 @@ Error During Test
   Test threw an exception of type MethodError
   Expression: foo(:cat) == 1
   MethodError: no method matching length(::Symbol)
+  The function `length` exists, but no method is defined for this combination of argument types.
+
   Closest candidates are:
     length(::SimpleVector) at essentials.jl:256
     length(::Base.MethodList) at reflection.jl:521
@@ -172,6 +177,39 @@ Foo Tests     |    8      8  0.0s
   Arrays 3    |    2      2  0.0s
 ```
 
+### Environment Variable Support
+
+The `Test` module supports the `JULIA_TEST_VERBOSE` environment variable for controlling
+verbose behavior globally:
+
+- When `JULIA_TEST_VERBOSE=true`, testsets will automatically use `verbose=true` by default,
+  and additionally print "Starting testset" and "Finished testset" messages with timing
+  information as testsets are entered and exited.
+- When `JULIA_TEST_VERBOSE=false` or unset, testsets use `verbose=false` by default and
+  no entry/exit messages are printed.
+
+This environment variable provides a convenient way to enable comprehensive verbose output
+for debugging test suites without modifying the test code itself.
+
+```julia
+$ JULIA_TEST_VERBOSE=true julia -e '
+using Test
+@testset "Example" begin
+    @test 1 + 1 == 2
+    @testset "Nested" begin
+        @test 2 * 2 == 4
+    end
+end'
+
+Starting testset: Example
+  Starting testset: Nested
+  Finished testset: Nested (0.0s)
+Finished testset: Example (0.0s)
+Test Summary: | Pass  Total  Time
+Example       |    2      2  0.0s
+  Nested      |    1      1  0.0s
+```
+
 If we do have a test failure, only the details for the failed test sets will be shown:
 
 ```julia-repl; filter = r"[0-9\.]+s"
@@ -224,8 +262,6 @@ Test Passed
 julia> @test 1 ≈ 0.999999
 Test Failed at none:1
   Expression: 1 ≈ 0.999999
-   Evaluated: 1 ≈ 0.999999
-
 ERROR: There was an error during testing
 ```
 You can specify relative and absolute tolerances by setting the `rtol` and `atol` keyword arguments of `isapprox`, respectively,
@@ -316,8 +352,12 @@ function finish(ts::CustomTestSet)
     # just record if we're not the top-level parent
     if get_testset_depth() > 0
         record(get_testset(), ts)
+        return ts
     end
-    ts
+
+    # so the results are printed if we are at the top level
+    Test.print_test_results(ts)
+    return ts
 end
 ```
 
@@ -332,6 +372,45 @@ And using that testset looks like:
 end
 ```
 
+In order to use a custom testset and have the recorded results printed as part of any outer default testset,
+also define `Test.get_test_counts`. This might look like so:
+
+```julia
+using Test: AbstractTestSet, Pass, Fail, Error, Broken, get_test_counts, TestCounts, format_duration
+
+function Test.get_test_counts(ts::CustomTestSet)
+    passes, fails, errors, broken = 0, 0, 0, 0
+    # cumulative results
+    c_passes, c_fails, c_errors, c_broken = 0, 0, 0, 0
+
+    for t in ts.results
+        # count up results
+        isa(t, Pass)   && (passes += 1)
+        isa(t, Fail)   && (fails  += 1)
+        isa(t, Error)  && (errors += 1)
+        isa(t, Broken) && (broken += 1)
+        # handle children
+        if isa(t, AbstractTestSet)
+            tc = get_test_counts(t)::TestCounts
+            c_passes += tc.passes + tc.cumulative_passes
+            c_fails  += tc.fails + tc.cumulative_fails
+            c_errors += tc.errors + tc.cumulative_errors
+            c_broken += tc.broken + tc.cumulative_broken
+        end
+    end
+    # get a duration, if we have one
+    duration = format_duration(ts)
+    return TestCounts(true, passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration)
+end
+```
+
+```@docs
+Test.TestCounts
+Test.get_test_counts
+Test.format_duration
+Test.print_test_results
+```
+
 ## Test utilities
 
 ```@docs
@@ -368,6 +447,8 @@ Add the following to `src/Example.jl`:
 ```julia
 module Example
 
+export greet, simple_add, type_multiply
+
 function greet()
     "Hello world!"
 end
@@ -380,8 +461,6 @@ function type_multiply(a::Float64, b::Float64)
     a * b
 end
 
-export greet, simple_add, type_multiply
-
 end
 ```
 
@@ -430,20 +509,20 @@ Using our knowledge of `Test.jl`, here are some example tests we could add to `m
 @testset "Testset 1" begin
     @test 2 == simple_add(1, 1)
     @test 3.5 == simple_add(1, 2.5)
-        @test_throws MethodError simple_add(1, "A")
-        @test_throws MethodError simple_add(1, 2, 3)
+    @test_throws MethodError simple_add(1, "A")
+    @test_throws MethodError simple_add(1, 2, 3)
 end
 
 @testset "Testset 2" begin
     @test 1.0 == type_multiply(1.0, 1.0)
-        @test isa(type_multiply(2.0, 2.0), Float64)
+    @test isa(type_multiply(2.0, 2.0), Float64)
     @test_throws MethodError type_multiply(1, 2.5)
 end
 ```
 
 #### Writing Tests for `greeting_tests.jl`
 
-Using our knowledge of `Test.jl`, here are some example tests we could add to `math_tests.jl`:
+Using our knowledge of `Test.jl`, here are some example tests we could add to `greeting_tests.jl`:
 
 ```julia
 @testset "Testset 3" begin
@@ -491,3 +570,15 @@ Using `Test.jl`, more complicated tests can be added for packages but this shoul
 ```@meta
 DocTestSetup = nothing
 ```
+
+### Code Coverage
+
+Code coverage tracking during tests can be enabled using the `pkg> test --coverage` flag (or at a lower level using the
+[`--code-coverage`](@ref command-line-interface) julia arg). This is on by default in the
+[julia-runtest](https://github.com/julia-actions/julia-runtest) GitHub action.
+
+To evaluate coverage either manually inspect the `.cov` files that are generated beside the source files locally,
+or in CI use the [julia-processcoverage](https://github.com/julia-actions/julia-processcoverage) GitHub action.
+
+!!! compat "Julia 1.11"
+    Since Julia 1.11, coverage is not collected during the package precompilation phase.
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index 622c696b383a0..dcfc780084a19 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -10,6 +10,13 @@ All tests belong to a *test set*. There is a default, task-level
 test set that throws on the first failure. Users can choose to wrap
 their tests in (possibly nested) test sets that will store results
 and summarize them at the end of the test set with `@testset`.
+
+Environment variables:
+
+* `JULIA_TEST_VERBOSE`: Set to `true` to enable verbose test output, including
+  testset entry/exit messages and detailed hierarchical test summaries.
+* `JULIA_TEST_FAILFAST`: Set to `true` to stop testing on the first failure.
+* `JULIA_TEST_RECORD_PASSES`: Set to `true` to record passed tests (for debugging).
 """
 module Test
 
@@ -29,19 +36,11 @@ using Random: AbstractRNG, default_rng
 using InteractiveUtils: gen_call_with_extracted_types
 using Base: typesplit, remove_linenums!
 using Serialization: Serialization
+using Base.ScopedValues: LazyScopedValue, ScopedValue, @with
 
-const DISPLAY_FAILED = (
-    :isequal,
-    :isapprox,
-    :≈,
-    :occursin,
-    :startswith,
-    :endswith,
-    :isempty,
-    :contains
-)
-
-const FAIL_FAST = Ref{Bool}(false)
+const global_fail_fast = OncePerProcess{Bool}() do
+    return Base.get_bool_env("JULIA_TEST_FAILFAST", false)
+end
 
 #-----------------------------------------------------------------------
 
@@ -155,14 +154,16 @@ struct Fail <: Result
     context::Union{Nothing, String}
     source::LineNumberNode
     message_only::Bool
-    function Fail(test_type::Symbol, orig_expr, data, value, context, source::LineNumberNode, message_only::Bool)
+    backtrace::Union{Nothing, String}
+    function Fail(test_type::Symbol, orig_expr, data, value, context, source::LineNumberNode, message_only::Bool, backtrace=nothing)
         return new(test_type,
             string(orig_expr),
             data === nothing ? nothing : string(data),
             string(isa(data, Type) ? typeof(value) : value),
             context,
             source,
-            message_only)
+            message_only,
+            backtrace)
     end
 end
 
@@ -184,13 +185,18 @@ function Base.show(io::IO, t::Fail)
         else
             print(io, "\n    Expected: ", data)
             print(io, "\n      Thrown: ", value)
+            print(io, "\n")
+            if t.backtrace !== nothing
+                # Capture error message and indent to match
+                join(io, ("      " * line for line in filter!(!isempty, split(t.backtrace, "\n"))), "\n")
+            end
         end
     elseif t.test_type === :test_throws_nothing
         # An exception was expected, but no exception was thrown
         print(io, "\n    Expected: ", data)
         print(io, "\n  No exception thrown")
     elseif t.test_type === :test
-        if data !== nothing
+        if data !== nothing && t.orig_expr != data
             # The test was an expression, so display the term-by-term
             # evaluated version as well
             print(io, "\n   Evaluated: ", data)
@@ -199,7 +205,6 @@ function Base.show(io::IO, t::Fail)
             print(io, "\n     Context: ", t.context)
         end
     end
-    println(io) # add some visual space to separate sequential failures
 end
 
 """
@@ -215,15 +220,22 @@ struct Error <: Result
     orig_expr::String
     value::String
     backtrace::String
+    context::Union{Nothing, String}
     source::LineNumberNode
 
-    function Error(test_type::Symbol, orig_expr, value, bt, source::LineNumberNode)
-        if test_type === :test_error
-            bt = scrub_exc_stack(bt, nothing, extract_file(source))
-        end
-        if test_type === :test_error || test_type === :nontest_error
-            bt_str = try # try the latest world for this, since we might have eval'd new code for show
-                    Base.invokelatest(sprint, Base.show_exception_stack, bt; context=stdout)
+    function Error(test_type::Symbol, orig_expr, value, excs::Union{Base.ExceptionStack,Nothing},
+                   source::LineNumberNode, context::Union{Nothing, String}=nothing)
+        @nospecialize orig_expr value
+        bt_str = ""
+        if !isnothing(excs)
+            if test_type === :test_error
+                excs = scrub_exc_stack(excs, nothing, extract_file(source))
+            end
+            if test_type === :test_error || test_type === :nontest_error
+                bt_str = try
+                    # try the latest world for this, since we might have eval'd new code for show
+                    # Apply REPL backtrace scrubbing to hide REPL internals, similar to how REPL.jl handles it
+                    Base.invokelatest(sprint, Base.show_exception_stack, Base.scrub_repl_backtrace(excs); context=stdout)
                 catch ex
                     "#=ERROR showing exception stack=# " *
                         try
@@ -232,8 +244,7 @@ struct Error <: Result
                             "of type " * string(typeof(ex))
                         end
                 end
-        else
-            bt_str = ""
+            end
         end
         value = try # try the latest world for this, since we might have eval'd new code for show
                 Base.invokelatest(sprint, show, value, context = :limit => true)
@@ -249,8 +260,14 @@ struct Error <: Result
             string(orig_expr),
             value,
             bt_str,
+            context,
             source)
     end
+
+    # Internal constructor for creating Error with pre-processed values (used by ContextTestSet)
+    function Error(test_type::Symbol, orig_expr::String, value::String, backtrace::String, context::Union{Nothing, String}, source::LineNumberNode)
+        return new(test_type, orig_expr, value, backtrace, context, source)
+    end
 end
 
 function Base.show(io::IO, t::Error)
@@ -268,8 +285,11 @@ function Base.show(io::IO, t::Error)
     elseif t.test_type === :test_error
         println(io, "  Test threw exception")
         println(io, "  Expression: ", t.orig_expr)
+        if t.context !== nothing
+            println(io, "     Context: ", t.context)
+        end
         # Capture error message and indent to match
-        join(io, ("  " * line for line in split(t.backtrace, "\n")), "\n")
+        join(io, ("  " * line for line in filter!(!isempty, split(t.backtrace, "\n"))), "\n")
     elseif t.test_type === :test_unbroken
         # A test that was expected to fail did not
         println(io, " Unexpected Pass")
@@ -279,7 +299,7 @@ function Base.show(io::IO, t::Error)
         # we had an error outside of a @test
         println(io, "  Got exception outside of a @test")
         # Capture error message and indent to match
-        join(io, ("  " * line for line in split(t.backtrace, "\n")), "\n")
+        join(io, ("  " * line for line in filter!(!isempty, split(t.backtrace, "\n"))), "\n")
     end
 end
 
@@ -336,50 +356,61 @@ end
 
 struct Threw <: ExecutionResult
     exception
-    backtrace::Union{Nothing,Vector{Any}}
+    current_exceptions::Base.ExceptionStack
     source::LineNumberNode
 end
 
-function eval_test(evaluated::Expr, quoted::Expr, source::LineNumberNode, negate::Bool=false)
-    evaled_args = evaluated.args
+function eval_test_comparison(comparison::Expr, quoted::Expr, source::LineNumberNode, negate::Bool=false)
+    comparison.head === :comparison || throw(ArgumentError("$comparison is not a comparison expression"))
+    comparison_args = comparison.args
     quoted_args = quoted.args
-    n = length(evaled_args)
+    n = length(comparison_args)
     kw_suffix = ""
-    if evaluated.head === :comparison
-        args = evaled_args
-        res = true
-        i = 1
-        while i < n
-            a, op, b = args[i], args[i+1], args[i+2]
-            if res
-                res = op(a, b)
-            end
-            quoted_args[i] = a
-            quoted_args[i+2] = b
-            i += 2
-        end
 
-    elseif evaluated.head === :call
-        op = evaled_args[1]
-        kwargs = (evaled_args[2]::Expr).args  # Keyword arguments from `Expr(:parameters, ...)`
-        args = evaled_args[3:n]
-
-        res = op(args...; kwargs...)
-
-        # Create "Evaluated" expression which looks like the original call but has all of
-        # the arguments evaluated
-        func_sym = quoted_args[1]::Union{Symbol,Expr}
-        if isempty(kwargs)
-            quoted = Expr(:call, func_sym, args...)
-        elseif func_sym === :≈ && !res
-            quoted = Expr(:call, func_sym, args...)
-            kw_suffix = " ($(join(["$k=$v" for (k, v) in kwargs], ", ")))"
-        else
-            kwargs_expr = Expr(:parameters, [Expr(:kw, k, v) for (k, v) in kwargs]...)
-            quoted = Expr(:call, func_sym, kwargs_expr, args...)
+    res = true
+    i = 1
+    while i < n
+        a, op, b = comparison_args[i], comparison_args[i+1], comparison_args[i+2]
+        if res
+            res = op(a, b)
         end
+        quoted_args[i] = a
+        quoted_args[i+2] = b
+        i += 2
+    end
+
+    if negate
+        res = !res
+        quoted = Expr(:call, :!, quoted)
+    end
+
+    Returned(res,
+             # stringify arguments in case of failure, for easy remote printing
+             res === true ? quoted : sprint(print, quoted, context=(:limit => true)) * kw_suffix,
+             source)
+end
+
+function eval_test_function(func, args, kwargs, quoted_func::Union{Expr,Symbol}, source::LineNumberNode, negate::Bool=false)
+    res = func(args...; kwargs...)
+
+    # Create "Evaluated" expression which looks like the original call but has all of
+    # the arguments evaluated
+    kw_suffix = ""
+    if quoted_func === :≈ && !res
+        kw_suffix = " ($(join(["$k=$v" for (k, v) in kwargs], ", ")))"
+        quoted_args = args
+    elseif isempty(kwargs)
+        quoted_args = args
+    else
+        kwargs_expr = Expr(:parameters, [Expr(:kw, k, v) for (k, v) in kwargs]...)
+        quoted_args = [kwargs_expr, args...]
+    end
+
+    # Properly render broadcast function call syntax, e.g. `(==).(1, 2)` or `Base.:(==).(1, 2)`.
+    quoted = if isa(quoted_func, Expr) && quoted_func.head === :. && length(quoted_func.args) == 1
+        Expr(:., quoted_func.args[1], Expr(:tuple, quoted_args...))
     else
-        throw(ArgumentError("Unhandled expression type: $(evaluated.head)"))
+        Expr(:call, quoted_func, quoted_args...)
     end
 
     if negate
@@ -576,6 +607,94 @@ macro test_skip(ex, kws...)
     return :(record(get_testset(), $testres))
 end
 
+function _should_escape_call(@nospecialize ex)
+    isa(ex, Expr) || return false
+
+    args = if ex.head === :call
+        ex.args[2:end]
+    elseif ex.head === :. && length(ex.args) == 2 && isa(ex.args[2], Expr) && ex.args[2].head === :tuple
+        # Support for broadcasted function calls (e.g. `(==).(1, 2)`)
+        ex.args[2].args
+    else
+        # Expression is not a function call
+        return false
+    end
+
+    # Avoid further processing on calls without any arguments
+    return length(args) > 0
+end
+
+# Escapes all of the positional arguments and keywords of a function such that we can call
+# the function at runtime.
+function _escape_call(@nospecialize ex)
+    if isa(ex, Expr) && ex.head === :call
+        # Update broadcast comparison calls to the function call syntax
+        # (e.g. `1 .== 1` becomes `(==).(1, 1)`)
+        func_str = string(ex.args[1])
+        # Check if this is a broadcast operator (starts with '.' and has more characters that aren't '.')
+        is_broadcast = length(func_str) >= 2 && first(func_str) == '.' && any(c -> c != '.', func_str[2:end])
+        escaped_func = if is_broadcast
+            esc(Expr(:., Symbol(func_str[2:end])))
+        else
+            esc(ex.args[1])
+        end
+        quoted_func = QuoteNode(ex.args[1])
+        args = ex.args[2:end]
+    elseif isa(ex, Expr) && ex.head === :. && length(ex.args) == 2 && isa(ex.args[2], Expr) && ex.args[2].head === :tuple
+        # Support for broadcasted function calls (e.g. `(==).(1, 2)`)
+        escaped_func = if isa(ex.args[1], Expr) && ex.args[1].head == :.
+            Expr(:call, Expr(:., :Broadcast, QuoteNode(:BroadcastFunction)), esc(ex.args[1]))
+        else
+            Expr(:., esc(ex.args[1]))
+        end
+        quoted_func = QuoteNode(Expr(:., ex.args[1]))
+        args = ex.args[2].args
+    else
+        throw(ArgumentError("$ex is not a call expression"))
+    end
+
+    escaped_args = []
+    escaped_kwargs = []
+
+    # Positional arguments and keywords that occur before `;`. Note that the keywords are
+    # being revised into a form we can splat.
+    for a in args
+        if isa(a, Expr) && a.head === :parameters
+            continue
+        elseif isa(a, Expr) && a.head === :kw
+            # Keywords that occur before `;`. Note that the keywords are being revised into
+            # a form we can splat.
+            push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(a.args[1]), esc(a.args[2])))
+        elseif isa(a, Expr) && a.head === :...
+            push!(escaped_args, Expr(:..., esc(a.args[1])))
+        else
+            push!(escaped_args, esc(a))
+        end
+    end
+
+    # Keywords that occur after ';'
+    if length(args) > 0 && isa(args[1], Expr) && args[1].head === :parameters
+        for kw in args[1].args
+            if isa(kw, Expr) && kw.head === :kw
+                push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(kw.args[1]), esc(kw.args[2])))
+            elseif isa(kw, Expr) && kw.head === :...
+                push!(escaped_kwargs, Expr(:..., esc(kw.args[1])))
+            elseif isa(kw, Expr) && kw.head === :.
+                push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(kw.args[2].value), esc(Expr(:., kw.args[1], QuoteNode(kw.args[2].value)))))
+            elseif isa(kw, Symbol)
+                push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(kw), esc(kw)))
+            end
+        end
+    end
+
+    return (;
+        func=escaped_func,
+        args=escaped_args,
+        kwargs=escaped_kwargs,
+        quoted_func,
+    )
+end
+
 # An internal function, called by the code generated by the @test
 # macro to get results of the test expression.
 # In the special case of a comparison, e.g. x == 5, generate code to
@@ -603,60 +722,22 @@ function get_test_result(ex, source)
         ex = Expr(:comparison, ex.args[1], ex.head, ex.args[2])
     end
     if isa(ex, Expr) && ex.head === :comparison
-        # pass all terms of the comparison to `eval_comparison`, as an Expr
+        # pass all terms of the comparison to `eval_test_comparison`, as a tuple
         escaped_terms = [esc(arg) for arg in ex.args]
         quoted_terms = [QuoteNode(arg) for arg in ex.args]
-        testret = :(eval_test(
+        testret = :(eval_test_comparison(
             Expr(:comparison, $(escaped_terms...)),
             Expr(:comparison, $(quoted_terms...)),
             $(QuoteNode(source)),
             $negate,
         ))
-    elseif isa(ex, Expr) && ex.head === :call && ex.args[1] in DISPLAY_FAILED
-        escaped_func = esc(ex.args[1])
-        quoted_func = QuoteNode(ex.args[1])
-
-        escaped_args = []
-        escaped_kwargs = []
-
-        # Keywords that occur before `;`. Note that the keywords are being revised into
-        # a form we can splat.
-        for a in ex.args[2:end]
-            if isa(a, Expr) && a.head === :kw
-                push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(a.args[1]), esc(a.args[2])))
-            end
-        end
-
-        # Keywords that occur after ';'
-        parameters_expr = ex.args[2]
-        if isa(parameters_expr, Expr) && parameters_expr.head === :parameters
-            for a in parameters_expr.args
-                if isa(a, Expr) && a.head === :kw
-                    push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(a.args[1]), esc(a.args[2])))
-                elseif isa(a, Expr) && a.head === :...
-                    push!(escaped_kwargs, Expr(:..., esc(a.args[1])))
-                elseif isa(a, Expr) && a.head === :.
-                    push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(a.args[2].value), esc(Expr(:., a.args[1], QuoteNode(a.args[2].value)))))
-                elseif isa(a, Symbol)
-                    push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(a), esc(a)))
-                end
-            end
-        end
-
-        # Positional arguments
-        for a in ex.args[2:end]
-            isa(a, Expr) && a.head in (:kw, :parameters) && continue
-
-            if isa(a, Expr) && a.head === :...
-                push!(escaped_args, Expr(:..., esc(a.args[1])))
-            else
-                push!(escaped_args, esc(a))
-            end
-        end
-
-        testret = :(eval_test(
-            Expr(:call, $escaped_func, Expr(:parameters, $(escaped_kwargs...)), $(escaped_args...)),
-            Expr(:call, $quoted_func),
+    elseif _should_escape_call(ex)
+        call = _escape_call(ex)
+        testret = :(eval_test_function(
+            $(call.func),
+            ($(call.args...),),
+            ($(call.kwargs...),),
+            $(call.quoted_func),
             $(QuoteNode(source)),
             $negate,
         ))
@@ -677,7 +758,7 @@ end
 
 # An internal function, called by the code generated by the @test
 # macro to actually perform the evaluation and manage the result.
-function do_test(result::ExecutionResult, orig_expr)
+function do_test(result::ExecutionResult, @nospecialize orig_expr)
     # get_testset() returns the most recently added test set
     # We then call record() with this test set and the test result
     if isa(result, Returned)
@@ -693,30 +774,30 @@ function do_test(result::ExecutionResult, orig_expr)
                     Fail(:test, orig_expr, result.data, value, nothing, result.source, false)
         else
             # If the result is non-Boolean, this counts as an Error
-            Error(:test_nonbool, orig_expr, value, nothing, result.source)
+            Error(:test_nonbool, orig_expr, value, nothing, result.source, nothing)
         end
     else
         # The predicate couldn't be evaluated without throwing an
         # exception, so that is an Error and not a Fail
         @assert isa(result, Threw)
-        testres = Error(:test_error, orig_expr, result.exception, result.backtrace::Vector{Any}, result.source)
+        testres = Error(:test_error, orig_expr, result.exception, result.current_exceptions, result.source, nothing)
     end
     isa(testres, Pass) || trigger_test_failure_break(result)
     record(get_testset(), testres)
 end
 
-function do_broken_test(result::ExecutionResult, orig_expr)
+function do_broken_test(result::ExecutionResult, @nospecialize orig_expr)
     testres = Broken(:test, orig_expr)
     # Assume the test is broken and only change if the result is true
     if isa(result, Returned)
         value = result.value
         if isa(value, Bool)
             if value
-                testres = Error(:test_unbroken, orig_expr, value, nothing, result.source)
+                testres = Error(:test_unbroken, orig_expr, value, nothing, result.source, nothing)
             end
         else
             # If the result is non-Boolean, this counts as an Error
-            testres = Error(:test_nonbool, orig_expr, value, nothing, result.source)
+            testres = Error(:test_nonbool, orig_expr, value, nothing, result.source, nothing)
         end
     end
     record(get_testset(), testres)
@@ -726,17 +807,29 @@ end
 
 """
     @test_throws exception expr
+    @test_throws extype pattern expr
 
 Tests that the expression `expr` throws `exception`.
 The exception may specify either a type,
 a string, regular expression, or list of strings occurring in the displayed error message,
 a matching function,
 or a value (which will be tested for equality by comparing fields).
+
+In the two-argument form, `@test_throws exception expr`, the `exception` can be a type or a pattern.
+
+In the three-argument form, `@test_throws extype pattern expr`, both the exception type and
+a message pattern are tested. The `extype` must be a type, and `pattern` may be
+a string, regular expression, or list of strings occurring in the displayed error message,
+a matching function, or a value.
+
 Note that `@test_throws` does not support a trailing keyword form.
 
 !!! compat "Julia 1.8"
     The ability to specify anything other than a type or a value as `exception` requires Julia v1.8 or later.
 
+!!! compat "Julia 1.13"
+    The three-argument form `@test_throws extype pattern expr` requires Julia v1.12 or later.
+
 # Examples
 ```jldoctest
 julia> @test_throws BoundsError [1, 2, 3][4]
@@ -750,13 +843,19 @@ Test Passed
 julia> @test_throws "Try sqrt(Complex" sqrt(-1)
 Test Passed
      Message: "DomainError with -1.0:\\nsqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x))."
+
+julia> @test_throws ErrorException "error foo" error("error foo 1")
+Test Passed
+      Thrown: ErrorException
 ```
 
-In the final example, instead of matching a single string it could alternatively have been performed with:
+In the third example, instead of matching a single string it could alternatively have been performed with:
 
 - `["Try", "Complex"]` (a list of strings)
 - `r"Try sqrt\\([Cc]omplex"` (a regular expression)
 - `str -> occursin("complex", str)` (a matching function)
+
+In the final example, both the exception type (`ErrorException`) and message pattern (`"error foo"`) are tested.
 """
 macro test_throws(extype, ex)
     orig_ex = Expr(:inert, ex)
@@ -768,66 +867,144 @@ macro test_throws(extype, ex)
             if $(esc(extype)) != InterruptException && _e isa InterruptException
                 rethrow()
             end
-            Threw(_e, nothing, $(QuoteNode(__source__)))
+            Threw(_e, Base.current_exceptions(), $(QuoteNode(__source__)))
         end
     end
     return :(do_test_throws($result, $orig_ex, $(esc(extype))))
 end
 
+macro test_throws(extype, pattern, ex)
+    orig_ex = Expr(:inert, ex)
+    ex = Expr(:block, __source__, esc(ex))
+    result = quote
+        try
+            Returned($ex, nothing, $(QuoteNode(__source__)))
+        catch _e
+            if $(esc(extype)) != InterruptException && _e isa InterruptException
+                rethrow()
+            end
+            Threw(_e, Base.current_exceptions(), $(QuoteNode(__source__)))
+        end
+    end
+    return :(do_test_throws($result, $orig_ex, $(esc(extype)), $(esc(pattern))))
+end
+
 const MACROEXPAND_LIKE = Symbol.(("@macroexpand", "@macroexpand1", "macroexpand"))
 
+function isequalexception(@nospecialize(a), @nospecialize(b))
+    for fld in 1:nfields(b)
+        if !isequal(getfield(a, fld), getfield(b, fld))
+            return false
+        end
+    end
+    return true
+end
+function isequalexception(a::UndefVarError, b::UndefVarError)
+    # Ignore different world ages
+    return isequal(a.var, b.var) && isequal(a.scope, b.scope)
+end
+
 # An internal function, called by the code generated by @test_throws
 # to evaluate and catch the thrown exception - if it exists
-function do_test_throws(result::ExecutionResult, orig_expr, extype)
+function do_test_throws(result::ExecutionResult, @nospecialize(orig_expr), extype, pattern=nothing)
     if isa(result, Threw)
         # Check that the right type of exception was thrown
         success = false
         message_only = false
         exc = result.exception
-        # NB: Throwing LoadError from macroexpands is deprecated, but in order to limit
-        # the breakage in package tests we add extra logic here.
-        from_macroexpand =
-            orig_expr isa Expr &&
-            orig_expr.head in (:call, :macrocall) &&
-            orig_expr.args[1] in MACROEXPAND_LIKE
-        if isa(extype, Type)
-            success =
-                if from_macroexpand && extype == LoadError && exc isa Exception
-                    Base.depwarn("macroexpand no longer throws a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
-                    true
-                else
-                    isa(exc, extype)
-                end
-        elseif isa(extype, Exception) || !isa(exc, Exception)
-            if extype isa LoadError && !(exc isa LoadError) && typeof(extype.error) == typeof(exc)
-                extype = extype.error # deprecated
+
+        # Handle three-argument form (type + pattern)
+        if pattern !== nothing
+            # In 3-arg form, first argument must be a type
+            if !isa(extype, Type)
+                testres = Fail(:test_throws_wrong, orig_expr, extype, exc, nothing, result.source, false, "First argument must be an exception type in three-argument form")
+                record(get_testset(), testres)
+                return
             end
-            if isa(exc, typeof(extype))
-                success = true
-                for fld in 1:nfields(extype)
-                    if !isequal(getfield(extype, fld), getfield(exc, fld))
-                        success = false
-                        break
-                    end
-                end
+
+            # Format combined expected value for display
+            pattern_str = isa(pattern, AbstractString) ? repr(pattern) :
+                         isa(pattern, Function) ? "< match function >" :
+                         string(pattern)
+            combined_expected = string(extype) * " with pattern " * pattern_str
+
+            # Check both type and pattern
+            type_success = isa(exc, extype)
+            if type_success
+                exc_msg = sprint(showerror, exc)
+                pattern_success = contains_warn(exc_msg, pattern)
+                success = pattern_success
+            else
+                success = false
             end
+            extype = combined_expected  # Use combined format for all results
         else
-            message_only = true
-            exc = sprint(showerror, exc)
-            success = contains_warn(exc, extype)
-            exc = repr(exc)
-            if isa(extype, AbstractString)
-                extype = repr(extype)
-            elseif isa(extype, Function)
-                extype = "< match function >"
+            # Original two-argument form logic
+            # NB: Throwing LoadError from macroexpands is deprecated, but in order to limit
+            # the breakage in package tests we add extra logic here.
+            from_macroexpand =
+                orig_expr isa Expr &&
+                orig_expr.head in (:call, :macrocall) &&
+                orig_expr.args[1] in MACROEXPAND_LIKE
+            if isa(extype, Type)
+                success =
+                    if from_macroexpand && extype == LoadError && exc isa Exception
+                        Base.depwarn("macroexpand no longer throws a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
+                        true
+                    elseif extype == ErrorException && isa(exc, FieldError)
+                        Base.depwarn(lazy"Using ErrorException to test field access is deprecated; use FieldError instead.", :do_test_throws)
+                        true
+                    else
+                        isa(exc, extype)
+                    end
+            elseif isa(extype, Exception) || !isa(exc, Exception)
+                if extype isa LoadError && !(exc isa LoadError) && typeof(extype.error) == typeof(exc)
+                    extype = extype.error # deprecated
+                end
+                # Support `UndefVarError(:x)` meaning `UndefVarError(:x, scope)` for any `scope`.
+                # Retains the behaviour from pre-v1.11 when `UndefVarError` didn't have `scope`.
+                if isa(extype, UndefVarError) && !isdefined(extype, :scope)
+                    success = exc isa UndefVarError && exc.var == extype.var
+                else isa(exc, typeof(extype))
+                    success = isequalexception(exc, extype)
+                end
+            else
+                message_only = true
+                exc = sprint(showerror, exc)
+                success = contains_warn(exc, extype)
+                exc = repr(exc)
+                if isa(extype, AbstractString)
+                    extype = repr(extype)
+                elseif isa(extype, Function)
+                    extype = "< match function >"
+                end
             end
         end
         if success
             testres = Pass(:test_throws, orig_expr, extype, exc, result.source, message_only)
         else
-            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, nothing, result.source, message_only)
+            excs = result.current_exceptions
+            bt = scrub_exc_stack(excs, nothing, extract_file(result.source))
+            bt_str = try # try the latest world for this, since we might have eval'd new code for show
+                Base.invokelatest(sprint, Base.show_exception_stack, bt; context=stdout)
+            catch ex
+                "#=ERROR showing exception stack=# " *
+                    try
+                        sprint(Base.showerror, ex, catch_backtrace(); context=stdout)
+                    catch
+                        "of type " * string(typeof(ex))
+                    end
+            end
+            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, nothing, result.source, message_only, bt_str)
         end
     else
+        # Handle no exception case - need to format extype properly for 3-arg form
+        if pattern !== nothing
+            pattern_str = isa(pattern, AbstractString) ? repr(pattern) :
+                         isa(pattern, Function) ? "< match function >" :
+                         string(pattern)
+            extype = string(extype) * " with pattern " * pattern_str
+        end
         testres = Fail(:test_throws_nothing, orig_expr, extype, nothing, nothing, result.source, false)
     end
     record(get_testset(), testres)
@@ -858,21 +1035,7 @@ Note: Warnings generated by `@warn` cannot be tested with this macro. Use
 [`@test_logs`](@ref) instead.
 """
 macro test_warn(msg, expr)
-    quote
-        let fname = tempname()
-            try
-                ret = open(fname, "w") do f
-                    redirect_stderr(f) do
-                        $(esc(expr))
-                    end
-                end
-                @test contains_warn(read(fname, String), $(esc(msg)))
-                ret
-            finally
-                rm(fname, force=true)
-            end
-        end
-    end
+    test_warn_expr(expr, msg)
 end
 
 """
@@ -885,28 +1048,35 @@ Note: The absence of warnings generated by `@warn` cannot be tested
 with this macro. Use [`@test_logs`](@ref) instead.
 """
 macro test_nowarn(expr)
-    quote
-        # Duplicate some code from `@test_warn` to allow printing the content of
-        # `stderr` again to `stderr` here while suppressing it for `@test_warn`.
-        # If that shouldn't be used, it would be possible to just use
-        #     @test_warn isempty $(esc(expr))
-        # here.
-        let fname = tempname()
-            try
-                ret = open(fname, "w") do f
-                    redirect_stderr(f) do
-                        $(esc(expr))
-                    end
-                end
-                stderr_content = read(fname, String)
-                print(stderr, stderr_content) # this is helpful for debugging
-                @test isempty(stderr_content)
-                ret
+    # allow printing the content of `stderr` again to `stderr` here while suppressing it
+    # for `@test_warn`. If that shouldn't be used, this could just be `test_warn_expr(expr, #=msg=#isempty)`
+    test_warn_expr(expr, function (s)
+        print(stderr, s) # this is helpful for debugging
+        isempty(s)
+    end)
+end
+
+function test_warn_expr(@nospecialize(expr), @nospecialize(msg))
+    return :(let fname = tempname()
+        try
+            f = open(fname, "w")
+            stdold = stderr
+            redirect_stderr(f)
+            ret = try
+                # We deliberately don't use the thunk versions of open/redirect
+                # to ensure that adding the macro does not change the toplevel-ness
+                # of the resulting expression.
+                $(esc(expr))
             finally
-                rm(fname, force=true)
+                redirect_stderr(stdold)
+                close(f)
             end
+            @test contains_warn(read(fname, String), $(esc(msg)))
+            ret
+        finally
+            rm(fname, force=true)
         end
-    end
+    end)
 end
 
 #-----------------------------------------------------------------------
@@ -947,7 +1117,7 @@ if get_testset_depth() != 0
 end
 ```
 """
-function finish end
+finish(ts::AbstractTestSet) = ts
 
 """
     TestSetException
@@ -982,7 +1152,6 @@ end
 A simple fallback test set that throws immediately on a failure.
 """
 struct FallbackTestSet <: AbstractTestSet end
-fallback_testset = FallbackTestSet()
 
 struct FallbackTestSetException <: Exception
     msg::String
@@ -999,8 +1168,6 @@ function record(ts::FallbackTestSet, t::Union{Fail, Error})
     println(t)
     throw(FallbackTestSetException("There was an error during testing"))
 end
-# We don't need to do anything as we don't record anything
-finish(ts::FallbackTestSet) = ts
 
 #-----------------------------------------------------------------------
 
@@ -1028,6 +1195,13 @@ function record(c::ContextTestSet, t::Fail)
     context = t.context === nothing ? context : string(t.context, "\n              ", context)
     record(c.parent_ts, Fail(t.test_type, t.orig_expr, t.data, t.value, context, t.source, t.message_only))
 end
+function record(c::ContextTestSet, t::Error)
+    context = string(c.context_name, " = ", c.context)
+    context = t.context === nothing ? context : string(t.context, "\n              ", context)
+    # Create a new Error with the same data but updated context using internal constructor
+    new_error = Error(t.test_type, t.orig_expr, t.value, t.backtrace, context, t.source)
+    record(c.parent_ts, new_error)
+end
 
 #-----------------------------------------------------------------------
 
@@ -1039,28 +1213,48 @@ are any `Fail`s or `Error`s, an exception will be thrown only at the end,
 along with a summary of the test results.
 """
 mutable struct DefaultTestSet <: AbstractTestSet
-    description::String
+    const description::String
+    const verbose::Bool
+    const showtiming::Bool
+    const failfast::Bool
+    const file::Union{String,Nothing}
+    const time_start::Float64
+
+    # Warning: Not thread-safe
+    rng::Union{Nothing,AbstractRNG}
+
+    @atomic n_passed::Int
+    @atomic time_end::Float64
+
+    # Memoized test result state over `results` - Computed only once the test set is finished
+    # 0x0: Unknown
+    # 0x1: All passed
+    # 0x2: Some failed
+    @atomic anynonpass::UInt8
+
+    results_lock::ReentrantLock
     results::Vector{Any}
-    n_passed::Int
-    anynonpass::Bool
-    verbose::Bool
-    showtiming::Bool
-    time_start::Float64
-    time_end::Union{Float64,Nothing}
-    failfast::Bool
-    file::Union{String,Nothing}
-end
-function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming::Bool = true, failfast::Union{Nothing,Bool} = nothing, source = nothing)
+end
+function DefaultTestSet(desc::AbstractString;
+                        verbose::Bool = something(Base.ScopedValues.get(VERBOSE_TESTSETS)),
+                        showtiming::Bool = true,
+                        failfast::Union{Nothing,Bool} = nothing,
+                        source = nothing,
+                        time_start::Float64 = time(),
+                        rng = nothing,
+                        )
     if isnothing(failfast)
         # pass failfast state into child testsets
         parent_ts = get_testset()
         if parent_ts isa DefaultTestSet
             failfast = parent_ts.failfast
         else
-            failfast = false
+            failfast = global_fail_fast()
         end
     end
-    return DefaultTestSet(String(desc)::String, [], 0, false, verbose, showtiming, time(), nothing, failfast, extract_file(source))
+    return DefaultTestSet(String(desc)::String,
+        verbose, showtiming, failfast, extract_file(source),
+        time_start, rng, 0, 0., 0x00, ReentrantLock(), Any[])
 end
 extract_file(source::LineNumberNode) = extract_file(source.file)
 extract_file(file::Symbol) = string(file)
@@ -1069,56 +1263,114 @@ extract_file(::Nothing) = nothing
 struct FailFastError <: Exception end
 
 # For a broken result, simply store the result
-record(ts::DefaultTestSet, t::Broken) = (push!(ts.results, t); t)
-# For a passed result, do not store the result since it uses a lot of memory
-record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
+record(ts::DefaultTestSet, t::Broken) = ((@lock ts.results_lock push!(ts.results, t)); t)
+# For a passed result, do not store the result since it uses a lot of memory, unless
+# `TEST_RECORD_PASSES[]` is true. i.e. overridden by scoped value or with env var
+# `JULIA_TEST_RECORD_PASSES=true` set in the environment.
+function record(ts::DefaultTestSet, t::Pass)
+    @atomic :monotonic ts.n_passed += 1
+    if TEST_RECORD_PASSES[]
+        # throw away the captured data so it can be GC-ed
+        t_nodata = Pass(t.test_type, t.orig_expr, nothing, t.value, t.source, t.message_only)
+        @lock ts.results_lock push!(ts.results, t_nodata)
+        return t_nodata
+    end
+    return t
+end
 
 # For the other result types, immediately print the error message
 # but do not terminate. Print a backtrace.
 function record(ts::DefaultTestSet, t::Union{Fail, Error}; print_result::Bool=TESTSET_PRINT_ENABLE[])
     if print_result
+        println() # add some visual space to separate sequential failures
         print(ts.description, ": ")
         # don't print for interrupted tests
         if !(t isa Error) || t.test_type !== :test_interrupted
             print(t)
             if !isa(t, Error) # if not gets printed in the show method
-                Base.show_backtrace(stdout, scrub_backtrace(backtrace(), ts.file, extract_file(t.source)))
+                Base.show_backtrace(stdout, scrub_backtrace(backtrace(), ts.file, extract_file(t.source)); prefix="  ")
             end
             println()
         end
     end
-    push!(ts.results, t)
-    (FAIL_FAST[] || ts.failfast) && throw(FailFastError())
+    @lock ts.results_lock push!(ts.results, t)
+    ts.failfast && throw(FailFastError())
     return t
 end
 
+"""
+    print_verbose(::AbstractTestSet)::Bool
+
+Whether printing involving this `AbstractTestSet` should be verbose or not.
+
+Defaults to `false`.
+"""
+function print_verbose end
+
+"""
+    results(::AbstractTestSet)
+
+Return an iterator of results aggregated by this `AbstractTestSet`, if any were recorded.
+
+Defaults to the empty tuple.
+"""
+function results end
+
+print_verbose(ts::DefaultTestSet) = ts.verbose
+results(ts::DefaultTestSet) = ts.results
+
 # When a DefaultTestSet finishes, it records itself to its parent
 # testset, if there is one. This allows for recursive printing of
 # the results at the end of the tests
-record(ts::DefaultTestSet, t::AbstractTestSet) = push!(ts.results, t)
+record(ts::DefaultTestSet, t::AbstractTestSet) = @lock ts.results_lock push!(ts.results, t)
 
 @specialize
 
-function print_test_errors(ts::DefaultTestSet)
-    for t in ts.results
+"""
+    print_test_errors([io::IO], ts::AbstractTestSet)
+
+Prints the errors that were recorded by this `AbstractTestSet` after it
+was `finish`ed. If `io` is not provided, defaults to `stdout`.
+"""
+function print_test_errors(ts::AbstractTestSet)
+    print_test_errors(stdout, ts)
+end
+
+function print_test_errors(io::IO, ts::AbstractTestSet)
+    for t in results(ts)
         if isa(t, Error) || isa(t, Fail)
-            println("Error in testset $(ts.description):")
-            show(t)
-            println()
-        elseif isa(t, DefaultTestSet)
-            print_test_errors(t)
+            println(io, "Error in testset $(ts.description):")
+            show(io, t)
+            println(io)
+        elseif isa(t, AbstractTestSet)
+            print_test_errors(io, t)
         end
     end
 end
 
-function print_test_results(ts::DefaultTestSet, depth_pad=0)
+"""
+    print_test_results([io::IO], ts::AbstractTestSet, depth_pad=0)
+
+Print the results of an `AbstractTestSet` as a formatted table.
+
+`depth_pad` refers to how much padding should be added in front of all output.
+If `io` is not provided, defaults to `stdout`.
+
+Called inside of `Test.finish`, if the `finish`ed testset is the topmost
+testset.
+"""
+function print_test_results(ts::AbstractTestSet, depth_pad=0)
+    print_test_results(stdout, ts, depth_pad)
+end
+
+function print_test_results(io::IO, ts::AbstractTestSet, depth_pad=0)
     # Calculate the overall number for each type so each of
     # the test result types are aligned
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
-    total_pass   = passes + c_passes
-    total_fail   = fails  + c_fails
-    total_error  = errors + c_errors
-    total_broken = broken + c_broken
+    tc = get_test_counts(ts)
+    total_pass   = tc.passes + tc.cumulative_passes
+    total_fail   = tc.fails  + tc.cumulative_fails
+    total_error  = tc.errors + tc.cumulative_errors
+    total_broken = tc.broken + tc.cumulative_broken
     dig_pass   = total_pass   > 0 ? ndigits(total_pass)   : 0
     dig_fail   = total_fail   > 0 ? ndigits(total_fail)   : 0
     dig_error  = total_error  > 0 ? ndigits(total_error)  : 0
@@ -1131,44 +1383,50 @@ function print_test_results(ts::DefaultTestSet, depth_pad=0)
     fail_width   = dig_fail   > 0 ? max(length("Fail"),   dig_fail)   : 0
     error_width  = dig_error  > 0 ? max(length("Error"),  dig_error)  : 0
     broken_width = dig_broken > 0 ? max(length("Broken"), dig_broken) : 0
-    total_width  = dig_total  > 0 ? max(length("Total"),  dig_total)  : 0
-    duration_width = max(length("Time"), length(duration))
+    total_width  = max(textwidth("Total"),  dig_total)
+    duration_width = max(textwidth("Time"), textwidth(tc.duration))
     # Calculate the alignment of the test result counts by
     # recursively walking the tree of test sets
-    align = max(get_alignment(ts, 0), length("Test Summary:"))
+    align = max(get_alignment(ts, depth_pad), textwidth("Test Summary:"))
     # Print the outer test set header once
-    pad = total == 0 ? "" : " "
-    printstyled(rpad("Test Summary:", align, " "), " |", pad; bold=true)
+    printstyled(io, rpad("Test Summary:", align, " "), " |", " "; bold=true)
     if pass_width > 0
-        printstyled(lpad("Pass", pass_width, " "), "  "; bold=true, color=:green)
+        printstyled(io, lpad("Pass", pass_width, " "), "  "; bold=true, color=:green)
     end
     if fail_width > 0
-        printstyled(lpad("Fail", fail_width, " "), "  "; bold=true, color=Base.error_color())
+        printstyled(io, lpad("Fail", fail_width, " "), "  "; bold=true, color=Base.error_color())
     end
     if error_width > 0
-        printstyled(lpad("Error", error_width, " "), "  "; bold=true, color=Base.error_color())
+        printstyled(io, lpad("Error", error_width, " "), "  "; bold=true, color=Base.error_color())
     end
     if broken_width > 0
-        printstyled(lpad("Broken", broken_width, " "), "  "; bold=true, color=Base.warn_color())
+        printstyled(io, lpad("Broken", broken_width, " "), "  "; bold=true, color=Base.warn_color())
     end
-    if total_width > 0
-        printstyled(lpad("Total", total_width, " "), "  "; bold=true, color=Base.info_color())
+    if total_width > 0 || total == 0
+        printstyled(io, lpad("Total", total_width, " "), "  "; bold=true, color=Base.info_color())
     end
-    if ts.showtiming
-        printstyled(lpad("Time", duration_width, " "); bold=true)
+    timing = isdefined(ts, :showtiming) ? ts.showtiming : false
+    if timing
+        printstyled(io, lpad("Time", duration_width, " "); bold=true)
     end
-    println()
+    println(io)
     # Recursively print a summary at every level
-    print_counts(ts, depth_pad, align, pass_width, fail_width, error_width, broken_width, total_width, duration_width, ts.showtiming)
+    print_counts(io, ts, depth_pad, align, pass_width, fail_width, error_width, broken_width, total_width, duration_width, timing)
+    # Print the RNG of the outer testset if there are failures
+    if total != total_pass + total_broken
+        rng = get_rng(ts)
+        if !isnothing(rng)
+            println(io, "RNG of the outermost testset: ", rng)
+        end
+    end
 end
 
-
-const TESTSET_PRINT_ENABLE = Ref(true)
-
 # Called at the end of a @testset, behaviour depends on whether
 # this is a child of another testset, or the "root" testset
 function finish(ts::DefaultTestSet; print_results::Bool=TESTSET_PRINT_ENABLE[])
-    ts.time_end = time()
+    if (@atomicswap ts.time_end = time()) !== 0.
+        error("Test set was finished more than once")
+    end
     # If we are a nested test set, do not print a full summary
     # now - let the parent test set do the printing
     if get_testset_depth() != 0
@@ -1177,11 +1435,11 @@ function finish(ts::DefaultTestSet; print_results::Bool=TESTSET_PRINT_ENABLE[])
         record(parent_ts, ts)
         return ts
     end
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
-    total_pass   = passes + c_passes
-    total_fail   = fails  + c_fails
-    total_error  = errors + c_errors
-    total_broken = broken + c_broken
+    tc = get_test_counts(ts)
+    total_pass   = tc.passes + tc.cumulative_passes
+    total_fail   = tc.fails  + tc.cumulative_fails
+    total_error  = tc.errors + tc.cumulative_errors
+    total_broken = tc.broken + tc.cumulative_broken
     total = total_pass + total_fail + total_error + total_broken
 
     if print_results
@@ -1196,136 +1454,228 @@ function finish(ts::DefaultTestSet; print_results::Bool=TESTSET_PRINT_ENABLE[])
     end
 
     # return the testset so it is returned from the @testset macro
-    ts
-end
-
-# Recursive function that finds the column that the result counts
-# can begin at by taking into account the width of the descriptions
-# and the amount of indentation. If a test set had no failures, and
-# no failures in child test sets, there is no need to include those
-# in calculating the alignment
-function get_alignment(ts::DefaultTestSet, depth::Int)
-    # The minimum width at this depth is
-    ts_width = 2*depth + length(ts.description)
-    # If not verbose and all passing, no need to look at children
-    !ts.verbose && !ts.anynonpass && return ts_width
-    # Return the maximum of this width and the minimum width
-    # for all children (if they exist)
-    isempty(ts.results) && return ts_width
-    child_widths = map(t->get_alignment(t, depth+1), ts.results)
-    return max(ts_width, maximum(child_widths))
+    return ts
 end
-get_alignment(ts, depth::Int) = 0
 
 # Recursive function that fetches backtraces for any and all errors
 # or failures the testset and its children encountered
 function filter_errors(ts::DefaultTestSet)
-    efs = []
+    efs = Union{Fail, Error}[]
     for t in ts.results
         if isa(t, DefaultTestSet)
             append!(efs, filter_errors(t))
         elseif isa(t, Union{Fail, Error})
-            append!(efs, [t])
+            push!(efs, t)
         end
     end
-    efs
+    return efs
+end
+
+"""
+    Test.get_rng(ts::AbstractTestSet)::Union{Nothing,AbstractRNG}
+
+Return the global random number generator (RNG) associated to the input testset `ts`.
+If no RNG is associated to it, return `nothing`.
+"""
+get_rng(::AbstractTestSet) = nothing
+get_rng(ts::DefaultTestSet) = ts.rng
+"""
+    Test.set_rng!(ts::AbstractTestSet, rng::AbstractRNG)::AbstractRNG
+
+Set the global random number generator (RNG) associated to the input testset `ts` to `rng`.
+If no RNG is associated to it, do nothing.
+In any case, always return the input `rng`.
+"""
+set_rng!(::AbstractTestSet, rng::AbstractRNG) = rng
+set_rng!(ts::DefaultTestSet, rng::AbstractRNG) = ts.rng = rng
+
+"""
+    TestCounts
+
+Holds the state for recursively gathering the results of a test set for display purposes.
+
+Fields:
+
+ * `customized`: Whether the function `get_test_counts` was customized for the `AbstractTestSet`
+                 this counts object is for. If a custom method was defined, always pass `true`
+                 to the constructor.
+ * `passes`: The number of passing `@test` invocations.
+ * `fails`: The number of failing `@test` invocations.
+ * `errors`: The number of erroring `@test` invocations.
+ * `broken`: The number of broken `@test` invocations.
+ * `passes`: The cumulative number of passing `@test` invocations.
+ * `fails`: The cumulative number of failing `@test` invocations.
+ * `errors`: The cumulative number of erroring `@test` invocations.
+ * `broken`: The cumulative number of broken `@test` invocations.
+ * `duration`: The total duration the `AbstractTestSet` in question ran for, as a formatted `String`.
+"""
+struct TestCounts
+    customized::Bool
+    passes::Int
+    fails::Int
+    errors::Int
+    broken::Int
+    cumulative_passes::Int
+    cumulative_fails::Int
+    cumulative_errors::Int
+    cumulative_broken::Int
+    duration::String
 end
 
-# Recursive function that counts the number of test results of each
-# type directly in the testset, and totals across the child testsets
+""""
+    get_test_counts(::AbstractTestSet)::TestCounts
+
+Recursive function that counts the number of test results of each
+type directly in the testset, and totals across the child testsets.
+
+Custom `AbstractTestSet` should implement this function to get their totals
+counted & displayed with `DefaultTestSet` as well.
+
+If this is not implemented for a custom `TestSet`, the printing falls back to
+reporting `x` for failures and `?s` for the duration.
+"""
+function get_test_counts end
+
+get_test_counts(ts::AbstractTestSet) = TestCounts(false, 0,0,0,0,0,0,0,0, format_duration(ts))
+
 function get_test_counts(ts::DefaultTestSet)
     passes, fails, errors, broken = ts.n_passed, 0, 0, 0
+    # cumulative results
     c_passes, c_fails, c_errors, c_broken = 0, 0, 0, 0
-    for t in ts.results
+    @lock ts.results_lock for t in ts.results
         isa(t, Fail)   && (fails  += 1)
         isa(t, Error)  && (errors += 1)
         isa(t, Broken) && (broken += 1)
-        if isa(t, DefaultTestSet)
-            np, nf, ne, nb, ncp, ncf, nce , ncb, duration = get_test_counts(t)
-            c_passes += np + ncp
-            c_fails  += nf + ncf
-            c_errors += ne + nce
-            c_broken += nb + ncb
+        if isa(t, AbstractTestSet)
+            tc = get_test_counts(t)::TestCounts
+            c_passes += tc.passes + tc.cumulative_passes
+            c_fails  += tc.fails + tc.cumulative_fails
+            c_errors += tc.errors + tc.cumulative_errors
+            c_broken += tc.broken + tc.cumulative_broken
         end
     end
-    ts.anynonpass = (fails + errors + c_fails + c_errors > 0)
+    duration = format_duration(ts)
+    tc = TestCounts(true, passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration)
+    # Memoize for printing convenience
+    @atomic :monotonic ts.anynonpass = (anynonpass(tc) ? 0x02 : 0x01)
+    return tc
+end
+anynonpass(tc::TestCounts) = (tc.fails + tc.errors + tc.cumulative_fails + tc.cumulative_errors > 0)
+function anynonpass(ts::DefaultTestSet)
+    if (@atomic :monotonic ts.anynonpass) == 0x00
+        get_test_counts(ts) # fills in the anynonpass field
+    end
+    return (@atomic :monotonic ts.anynonpass) != 0x01
+end
+
+# Recursive function that finds the column that the result counts
+# can begin at by taking into account the width of the descriptions
+# and the amount of indentation. If a test set had no failures, and
+# no failures in child test sets, there is no need to include those
+# in calculating the alignment
+function get_alignment(ts::DefaultTestSet, depth::Int)
+    # The minimum width at this depth is
+    ts_width = 2*depth + length(ts.description)
+    # If not verbose and all passing, no need to look at children
+    !ts.verbose && !anynonpass(ts) && return ts_width
+    # Return the maximum of this width and the minimum width
+    # for all children (if they exist)
+    isempty(ts.results) && return ts_width
+    child_widths = map(t->get_alignment(t, depth+1), ts.results)
+    return max(ts_width, maximum(child_widths))
+end
+get_alignment(ts, depth::Int) = 0
+
+"""
+    format_duration(::AbstractTestSet)
+
+Return a formatted string for printing the duration the testset ran for.
+
+If not defined, falls back to `"?s"`.
+"""
+format_duration(::AbstractTestSet) = "?s"
+
+function format_duration(ts::DefaultTestSet)
     (; time_start, time_end) = ts
-    duration = if isnothing(time_end)
-        ""
+    time_end === 0. && return ""
+
+    dur_s = time_end - time_start
+    if dur_s < 60
+        string(round(dur_s, digits = 1), "s")
     else
-        dur_s = time_end - time_start
-        if dur_s < 60
-            string(round(dur_s, digits = 1), "s")
-        else
-            m, s = divrem(dur_s, 60)
-            s = lpad(string(round(s, digits = 1)), 4, "0")
-            string(round(Int, m), "m", s, "s")
-        end
+        m, s = divrem(dur_s, 60)
+        s = lpad(string(round(s, digits = 1)), 4, "0")
+        string(round(Int, m), "m", s, "s")
     end
-    return passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration
 end
 
+print_verbose(::AbstractTestSet) = false
+results(::AbstractTestSet) = ()
+
 # Recursive function that prints out the results at each level of
 # the tree of test sets
-function print_counts(ts::DefaultTestSet, depth, align,
+function print_counts(io::IO, ts::AbstractTestSet, depth, align,
                       pass_width, fail_width, error_width, broken_width, total_width, duration_width, showtiming)
     # Count results by each type at this level, and recursively
     # through any child test sets
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
-    subtotal = passes + fails + errors + broken + c_passes + c_fails + c_errors + c_broken
+    tc = get_test_counts(ts)
+    fallbackstr = tc.customized ? " " : "x"
+    subtotal = tc.passes + tc.fails + tc.errors + tc.broken +
+               tc.cumulative_passes + tc.cumulative_fails + tc.cumulative_errors + tc.cumulative_broken
     # Print test set header, with an alignment that ensures all
     # the test results appear above each other
-    print(rpad(string("  "^depth, ts.description), align, " "), " | ")
+    print(io, rpad(string("  "^depth, ts.description), align, " "), " | ")
 
-    np = passes + c_passes
-    if np > 0
-        printstyled(lpad(string(np), pass_width, " "), "  ", color=:green)
+    n_passes = tc.passes + tc.cumulative_passes
+    if n_passes > 0
+        printstyled(io, lpad(string(n_passes), pass_width, " "), "  ", color=:green)
     elseif pass_width > 0
         # No passes at this level, but some at another level
-        print(lpad(" ", pass_width), "  ")
+        printstyled(io, lpad(fallbackstr, pass_width, " "), "  ", color=:green)
     end
 
-    nf = fails + c_fails
-    if nf > 0
-        printstyled(lpad(string(nf), fail_width, " "), "  ", color=Base.error_color())
+    n_fails = tc.fails + tc.cumulative_fails
+    if n_fails > 0
+        printstyled(io, lpad(string(n_fails), fail_width, " "), "  ", color=Base.error_color())
     elseif fail_width > 0
         # No fails at this level, but some at another level
-        print(lpad(" ", fail_width), "  ")
+        printstyled(io, lpad(fallbackstr, fail_width, " "), "  ", color=Base.error_color())
     end
 
-    ne = errors + c_errors
-    if ne > 0
-        printstyled(lpad(string(ne), error_width, " "), "  ", color=Base.error_color())
+    n_errors = tc.errors + tc.cumulative_errors
+    if n_errors > 0
+        printstyled(io, lpad(string(n_errors), error_width, " "), "  ", color=Base.error_color())
     elseif error_width > 0
         # No errors at this level, but some at another level
-        print(lpad(" ", error_width), "  ")
+        printstyled(io, lpad(fallbackstr, error_width, " "), "  ", color=Base.error_color())
     end
 
-    nb = broken + c_broken
-    if nb > 0
-        printstyled(lpad(string(nb), broken_width, " "), "  ", color=Base.warn_color())
+    n_broken = tc.broken + tc.cumulative_broken
+    if n_broken > 0
+        printstyled(io, lpad(string(n_broken), broken_width, " "), "  ", color=Base.warn_color())
     elseif broken_width > 0
         # None broken at this level, but some at another level
-        print(lpad(" ", broken_width), "  ")
+        printstyled(io, lpad(fallbackstr, broken_width, " "), "  ", color=Base.warn_color())
     end
 
-    if np == 0 && nf == 0 && ne == 0 && nb == 0
-        printstyled(lpad("None", total_width, " "), "  ", color=Base.info_color())
+    if n_passes == 0 && n_fails == 0 && n_errors == 0 && n_broken == 0
+        total_str = tc.customized ? string(subtotal) : "?"
+        printstyled(io, lpad(total_str, total_width, " "), "  ", color=Base.info_color())
     else
-        printstyled(lpad(string(subtotal), total_width, " "), "  ", color=Base.info_color())
+        printstyled(io, lpad(string(subtotal), total_width, " "), "  ", color=Base.info_color())
     end
 
     if showtiming
-        printstyled(lpad(string(duration), duration_width, " "))
+        printstyled(io, lpad(tc.duration, duration_width, " "))
     end
-    println()
+    println(io)
 
     # Only print results at lower levels if we had failures or if the user
-    # wants.
-    if (np + nb != subtotal) || (ts.verbose)
-        for t in ts.results
-            if isa(t, DefaultTestSet)
-                print_counts(t, depth + 1, align,
+    # wants. Requires the given `AbstractTestSet` to have a vector of results
+    if ((n_passes + n_broken != subtotal) || print_verbose(ts))
+        for t in results(ts)
+            if isa(t, AbstractTestSet)
+                print_counts(io, t, depth + 1, align,
                     pass_width, fail_width, error_width, broken_width, total_width, duration_width, ts.showtiming)
             end
         end
@@ -1370,14 +1720,17 @@ along with a summary of the test results.
 Any custom testset type (subtype of `AbstractTestSet`) can be given and it will
 also be used for any nested `@testset` invocations. The given options are only
 applied to the test set where they are given. The default test set type
-accepts three boolean options:
-- `verbose`: if `true`, the result summary of the nested testsets is shown even
+accepts the following options:
+- `verbose::Bool`: if `true`, the result summary of the nested testsets is shown even
   when they all pass (the default is `false`).
-- `showtiming`: if `true`, the duration of each displayed testset is shown
+- `showtiming::Bool`: if `true`, the duration of each displayed testset is shown
   (the default is `true`).
-- `failfast`: if `true`, any test failure or error will cause the testset and any
+- `failfast::Bool`: if `true`, any test failure or error will cause the testset and any
   child testsets to return immediately (the default is `false`).
   This can also be set globally via the env var `JULIA_TEST_FAILFAST`.
+- `rng::Random.AbstractRNG`: use the given random number generator (RNG) as the global one
+  for the testset.  `rng` must be `copy!`-able.  This option can be useful to locally
+  reproduce stochastic test failures which only depend on the state of the global RNG.
 
 !!! compat "Julia 1.8"
     `@testset test_func()` requires at least Julia 1.8.
@@ -1385,6 +1738,9 @@ accepts three boolean options:
 !!! compat "Julia 1.9"
     `failfast` requires at least Julia 1.9.
 
+!!! compat "Julia 1.12"
+    The `rng` option requires at least Julia 1.12.
+
 The description string accepts interpolation from the loop indices.
 If no description is provided, one is constructed based on the variables.
 If a function call is provided, its name will be used.
@@ -1397,13 +1753,19 @@ method, which by default will return a list of the testset objects used in
 each iteration.
 
 Before the execution of the body of a `@testset`, there is an implicit
-call to `Random.seed!(seed)` where `seed` is the current seed of the global RNG.
+call to `copy!(Random.default_rng(), rng)` where `rng` is the RNG of the current task, or
+the value of the RNG passed via the `rng` option.
 Moreover, after the execution of the body, the state of the global RNG is
 restored to what it was before the `@testset`. This is meant to ease
 reproducibility in case of failure, and to allow seamless
 re-arrangements of `@testset`s regardless of their side-effect on the
 global RNG state.
 
+!!! note "RNG of nested testsets"
+    Unless changed with the `rng` option, the same RNG is set at the beginning of all
+    nested testsets.  The RNG printed to screen when a testset has failures is the global RNG of
+    the outermost testset even if inner testsets have different RNGs manually set by the user.
+
 ## Examples
 ```jldoctest; filter = r"trigonometric identities |    4      4  [0-9\\.]+s"
 julia> @testset "trigonometric identities" begin
@@ -1419,14 +1781,14 @@ trigonometric identities |    4      4  0.2s
 
 # `@testset for`
 
-When `@testset for` is used, the macro starts a new test for each iteration of
+When `@testset for` is used, the macro starts a new test set for each iteration of
 the provided loop. The semantics of each test set are otherwise identical to that
-of that `begin/end` case (as if used for each loop iteration).
+of the `begin/end` case (as if used for each loop iteration).
 
 # `@testset let`
 
 When `@testset let` is used, the macro starts a *transparent* test set with
-the given object added as a context object to any failing test contained
+the given object added as a context object to any failing or erroring test contained
 therein. This is useful when performing a set of related tests on one larger
 object and it is desirable to print this larger object when any of the
 individual tests fail. Transparent test sets do not introduce additional levels
@@ -1437,7 +1799,17 @@ parent test set (with the context object appended to any failing tests.)
     `@testset let` requires at least Julia 1.9.
 
 !!! compat "Julia 1.10"
-    Multiple `let` assignements are supported since Julia 1.10.
+    Multiple `let` assignments are supported since Julia 1.10.
+
+!!! compat "Julia 1.13"
+    Context is shown when a test errors since Julia 1.13.
+
+# Special implicit world age increment for `@testset begin`
+
+World age inside `@testset begin` increments implicitly after every statement.
+This matches the behavior of ordinary toplevel code, but not that of ordinary
+`begin/end` blocks, i.e. with respect to world age, `@testset begin` behaves
+as if the body of the `begin/end` block was written at toplevel.
 
 ## Examples
 ```jldoctest
@@ -1447,8 +1819,8 @@ julia> @testset let logi = log(im)
        end
 Test Failed at none:3
   Expression: !(iszero(real(logi)))
+   Evaluated: !(iszero(0.0))
      Context: logi = 0.0 + 1.5707963267948966im
-
 ERROR: There was an error during testing
 
 julia> @testset let logi = log(im), op = !iszero
@@ -1457,9 +1829,9 @@ julia> @testset let logi = log(im), op = !iszero
        end
 Test Failed at none:3
   Expression: op(real(logi))
+   Evaluated: op(0.0)
      Context: logi = 0.0 + 1.5707963267948966im
               op = !iszero
-
 ERROR: There was an error during testing
 ```
 """
@@ -1474,8 +1846,6 @@ macro testset(args...)
         error("Expected function call, begin/end block or for loop as argument to @testset")
     end
 
-    FAIL_FAST[] = Base.get_bool_env("JULIA_TEST_FAILFAST", false)
-
     if tests.head === :for
         return testset_forloop(args, tests, __source__)
     elseif tests.head === :let
@@ -1488,6 +1858,10 @@ end
 trigger_test_failure_break(@nospecialize(err)) =
     ccall(:jl_test_failure_breakpoint, Cvoid, (Any,), err)
 
+is_failfast_error(err::FailFastError) = true
+is_failfast_error(err::LoadError) = is_failfast_error(err.error) # handle `include` barrier
+is_failfast_error(err) = false
+
 """
 Generate the code for an `@testset` with a `let` argument.
 """
@@ -1515,22 +1889,27 @@ function testset_context(args, ex, source)
     else
         error("Malformed `let` expression is given")
     end
-    reverse!(contexts)
-
     test_ex = ex.args[2]
+    for context in contexts
+        test_ex = :($Test.@with_testset($ContextTestSet($(QuoteNode(context)), $context; $options...), $test_ex))
+    end
+    ex.args[2] = test_ex
+    return esc(ex)
+end
 
-    ex.args[2] = quote
-        $(map(contexts) do context
-            :($push_testset($(ContextTestSet)($(QuoteNode(context)), $context; $options...)))
-        end...)
-        try
-            $(test_ex)
-        finally
-            $(map(_->:($pop_testset()), contexts)...)
+function insert_toplevel_latestworld(@nospecialize(tests))
+    isa(tests, Expr) || return tests
+    (tests.head !== :block) && return tests
+    ret = Expr(:block)
+    for arg in tests.args
+        push!(ret.args, arg)
+        if isa(arg, LineNumberNode) ||
+          (isa(arg, Expr) && arg.head in (:latestworld, :var"latestworld-if-toplevel"))
+            continue
         end
+        push!(ret.args, Expr(:var"latestworld-if-toplevel"))
     end
-
-    return esc(ex)
+    return ret
 end
 
 """
@@ -1551,6 +1930,8 @@ function testset_beginend_call(args, tests, source)
         testsettype = :(get_testset_depth() == 0 ? DefaultTestSet : typeof(get_testset()))
     end
 
+    tests = insert_toplevel_latestworld(tests)
+
     # Generate a block of code that initializes a new testset, adds
     # it to the task local storage, evaluates the test(s), before
     # finally removing the testset and giving it a chance to take
@@ -1563,33 +1944,36 @@ function testset_beginend_call(args, tests, source)
         else
             $(testsettype)($desc; $options...)
         end
-        push_testset(ts)
+
         # we reproduce the logic of guardseed, but this function
         # cannot be used as it changes slightly the semantic of @testset,
         # by wrapping the body in a function
-        local RNG = default_rng()
-        local oldrng = copy(RNG)
-        local oldseed = Random.GLOBAL_SEED
+        local default_rng_orig = copy(default_rng())
+        local tls_seed_orig = copy(Random.get_tls_seed())
+        local ts_rng = get_rng(ts)
+        local tls_seed = isnothing(ts_rng) ? set_rng!(ts, tls_seed_orig) : ts_rng
         try
-            # RNG is re-seeded with its own seed to ease reproduce a failed test
-            Random.seed!(Random.GLOBAL_SEED)
-            let
-                $(esc(tests))
+            @with_testset ts begin
+                # default RNG is reset to its state from last `seed!()` to ease reproduce a failed test
+                copy!(Random.default_rng(), tls_seed)
+                copy!(Random.get_tls_seed(), Random.default_rng())
+                let
+                    $(esc(tests))
+                end
             end
         catch err
             err isa InterruptException && rethrow()
             # something in the test block threw an error. Count that as an
             # error in this test set
             trigger_test_failure_break(err)
-            if err isa FailFastError
-                get_testset_depth() > 1 ? rethrow() : failfast_print()
+            if is_failfast_error(err)
+                get_testset_depth() > 0 ? rethrow() : failfast_print()
             else
-                record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source))))
+                record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source)), nothing))
             end
         finally
-            copy!(RNG, oldrng)
-            Random.set_global_seed!(oldseed)
-            pop_testset()
+            copy!(default_rng(), default_rng_orig)
+            copy!(Random.get_tls_seed(), tls_seed_orig)
             ret = finish(ts)
         end
         ret
@@ -1646,59 +2030,41 @@ function testset_forloop(args, testloop, source)
     tests = testloop.args[2]
     blk = quote
         _check_testset($testsettype, $(QuoteNode(testsettype.args[1])))
-        # Trick to handle `break` and `continue` in the test code before
-        # they can be handled properly by `finally` lowering.
-        if !first_iteration
-            pop_testset()
-            finish_errored = true
-            push!(arr, finish(ts))
-            finish_errored = false
-
-            # it's 1000 times faster to copy from tmprng rather than calling Random.seed!
-            copy!(RNG, tmprng)
-
-        end
         ts = if ($testsettype === $DefaultTestSet) && $(isa(source, LineNumberNode))
-            $(testsettype)($desc; source=$(QuoteNode(source.file)), $options...)
+            $(testsettype)($desc; source=$(QuoteNode(source.file)), $options..., rng=tls_seed)
         else
             $(testsettype)($desc; $options...)
         end
-        push_testset(ts)
-        first_iteration = false
         try
-            $(esc(tests))
+            @with_testset ts begin
+                # default RNG is reset to its state from last `seed!()` to ease reproduce a failed test
+                copy!(Random.default_rng(), tls_seed)
+                $(esc(tests))
+            end
         catch err
             err isa InterruptException && rethrow()
             # Something in the test block threw an error. Count that as an
             # error in this test set
             trigger_test_failure_break(err)
-            if !isa(err, FailFastError)
-                record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source))))
+            if is_failfast_error(err)
+                get_testset_depth() > 0 ? rethrow() : failfast_print()
+            else
+                record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source)), nothing))
             end
+        finally
+            copy!(default_rng(), default_rng_orig)
+            copy!(Random.get_tls_seed(), tls_seed_orig)
+            push!(arr, finish(ts))
         end
     end
     quote
         local arr = Vector{Any}()
-        local first_iteration = true
-        local ts
-        local finish_errored = false
-        local RNG = default_rng()
-        local oldrng = copy(RNG)
-        local oldseed = Random.GLOBAL_SEED
-        Random.seed!(Random.GLOBAL_SEED)
-        local tmprng = copy(RNG)
-        try
-            let
-                $(Expr(:for, Expr(:block, [esc(v) for v in loopvars]...), blk))
-            end
-        finally
-            # Handle `return` in test body
-            if !first_iteration && !finish_errored
-                pop_testset()
-                push!(arr, finish(ts))
-            end
-            copy!(RNG, oldrng)
-            Random.set_global_seed!(oldseed)
+        local rng_option = get($(options), :rng, nothing)
+        local default_rng_orig = copy(default_rng())
+        local tls_seed_orig = copy(Random.get_tls_seed())
+        local tls_seed = isnothing(rng_option) ? copy(Random.get_tls_seed()) : rng_option
+        let
+            $(Expr(:for, Expr(:block, [esc(v) for v in loopvars]...), blk))
         end
         arr
     end
@@ -1715,10 +2081,21 @@ function parse_testset_args(args)
     options = :(Dict{Symbol, Any}())
     for arg in args
         # a standalone symbol is assumed to be the test set we should use
-        if isa(arg, Symbol)
+        # the same is true for a symbol that's not exported from a module
+        if isa(arg, Symbol) || Base.isexpr(arg, :.)
+            if testsettype !== nothing
+                msg = """Multiple testset types provided to @testset. \
+                    This is deprecated and may error in the future."""
+                Base.depwarn(msg, :testset_multiple_testset_types; force=true)
+            end
             testsettype = esc(arg)
         # a string is the description
         elseif isa(arg, AbstractString) || (isa(arg, Expr) && arg.head === :string)
+            if desc !== nothing
+                msg = """Multiple descriptions provided to @testset. \
+                    This is deprecated and may error in the future."""
+                Base.depwarn(msg, :testset_multiple_descriptions; force=true)
+            end
             desc = esc(arg)
         # an assignment is an option
         elseif isa(arg, Expr) && arg.head === :(=)
@@ -1736,6 +2113,27 @@ end
 #-----------------------------------------------------------------------
 # Various helper methods for test sets
 
+const CURRENT_TESTSET = ScopedValue{AbstractTestSet}(FallbackTestSet())
+const TESTSET_DEPTH = ScopedValue{Int}(0)
+const TESTSET_PRINT_ENABLE = ScopedValue{Bool}(true)
+const TEST_RECORD_PASSES = LazyScopedValue{Bool}(OncePerProcess{Bool}() do
+    return Base.get_bool_env("JULIA_TEST_RECORD_PASSES", false)
+end)
+const VERBOSE_TESTSETS = LazyScopedValue{Bool}(OncePerProcess{Bool}() do
+    return Base.get_bool_env("JULIA_TEST_VERBOSE", false)
+end)
+
+macro with_testset(ts, expr)
+    quote
+        print_testset_verbose(:enter, $(esc(ts)))
+        try
+            @with(CURRENT_TESTSET => $(esc(ts)), TESTSET_DEPTH => get_testset_depth() + 1, $(esc(expr)))
+        finally
+            print_testset_verbose(:exit, $(esc(ts)))
+        end
+    end
+end
+
 """
     get_testset()
 
@@ -1743,45 +2141,54 @@ Retrieve the active test set from the task's local storage. If no
 test set is active, use the fallback default test set.
 """
 function get_testset()
-    testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
-    return isempty(testsets) ? fallback_testset : testsets[end]
-end
-
-"""
-    push_testset(ts::AbstractTestSet)
-
-Adds the test set to the `task_local_storage`.
-"""
-function push_testset(ts::AbstractTestSet)
-    testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
-    push!(testsets, ts)
-    setindex!(task_local_storage(), testsets, :__BASETESTNEXT__)
+    something(Base.ScopedValues.get(CURRENT_TESTSET))
 end
 
 """
-    pop_testset()
+    get_testset_depth()
 
-Pops the last test set added to the `task_local_storage`. If there are no
-active test sets, returns the fallback default test set.
+Return the number of active test sets, not including the default test set
 """
-function pop_testset()
-    testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
-    ret = isempty(testsets) ? fallback_testset : pop!(testsets)
-    setindex!(task_local_storage(), testsets, :__BASETESTNEXT__)
-    return ret
+function get_testset_depth()
+    something(Base.ScopedValues.get(TESTSET_DEPTH))
 end
 
 """
-    get_testset_depth()
-
-Return the number of active test sets, not including the default test set
+Print testset entry/exit messages when JULIA_TEST_VERBOSE is set
 """
-function get_testset_depth()
-    testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
-    return length(testsets)
+function print_testset_verbose(action::Symbol, ts::AbstractTestSet)
+    something(Base.ScopedValues.get(VERBOSE_TESTSETS)) || return
+    indent = "  " ^ get_testset_depth()
+    desc = if hasfield(typeof(ts), :description)
+        ts.description
+    elseif isa(ts, ContextTestSet)
+        string(ts.context_name, " = ", ts.context)
+    else
+        string(typeof(ts))
+    end
+    if action === :enter
+        println("$(indent)Starting testset: $desc")
+    elseif action === :exit
+        duration_str = ""
+        # Calculate duration for testsets that have timing information
+        if hasfield(typeof(ts), :time_start) && hasfield(typeof(ts), :showtiming)
+            if ts.showtiming
+                current_time = time()
+                dur_s = current_time - ts.time_start
+                if dur_s < 60
+                    duration_str = " ($(round(dur_s, digits = 1))s)"
+                else
+                    m, s = divrem(dur_s, 60)
+                    s = lpad(string(round(s, digits = 1)), 4, "0")
+                    duration_str = " ($(round(Int, m))m$(s)s)"
+                end
+            end
+        end
+        println("$(indent)Finished testset: $desc$duration_str")
+    end
 end
 
-_args_and_call(args...; kwargs...) = (args[1:end-1], kwargs, args[end](args[1:end-1]...; kwargs...))
+_args_and_call((args..., f)...; kwargs...) = (args, kwargs, f(args...; kwargs...))
 _materialize_broadcasted(f, args...) = Broadcast.materialize(Broadcast.broadcasted(f, args...))
 
 """
@@ -1798,7 +2205,7 @@ matches the inferred type modulo `AllowedType`, or when the return type is a sub
 `AllowedType`. This is useful when testing type stability of functions returning a small
 union such as `Union{Nothing, T}` or `Union{Missing, T}`.
 
-```jldoctest; setup = :(using InteractiveUtils), filter = r"begin\\n(.|\\n)*end"
+```jldoctest; setup = :(using InteractiveUtils; using Base: >), filter = r"begin\\n(.|\\n)*end"
 julia> f(a) = a > 1 ? 1 : 1.0
 f (generic function with 1 method)
 
@@ -1812,8 +2219,9 @@ Arguments
   #self#::Core.Const(f)
   a::Int64
 Body::UNION{FLOAT64, INT64}
-1 ─ %1 = (a > 1)::Bool
-└──      goto #3 if not %1
+1 ─ %1 = :>::Core.Const(>)
+│   %2 = (%1)(a, 1)::Bool
+└──      goto #3 if not %2
 2 ─      return 1
 3 ─      return 1.0
 
@@ -1863,25 +2271,25 @@ function _inferred(ex, mod, allow = :(Union{}))
         quote
             let allow = $(esc(allow))
                 allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
-                $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
+                $(if any(@nospecialize(a)->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
                     # Has keywords
-                    args = gensym()
-                    kwargs = gensym()
+                    # Create the call expression with escaped user expressions
+                    call_expr = :($(esc(ex.args[1]))(args...; kwargs...))
                     quote
-                        $(esc(args)), $(esc(kwargs)), result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
-                        inftypes = $(gen_call_with_extracted_types(mod, Base.return_types, :($(ex.args[1])($(args)...; $(kwargs)...))))
+                        args, kwargs, result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
+                        # wrap in dummy hygienic-scope to work around scoping issues with `call_expr` already having `esc` on the necessary parts
+                        inftype = $(Expr(:var"hygienic-scope", gen_call_with_extracted_types(mod, Base.infer_return_type, call_expr; is_source_reflection = false), Test))
                     end
                 else
                     # No keywords
                     quote
                         args = ($([esc(ex.args[i]) for i = 2:length(ex.args)]...),)
                         result = $(esc(ex.args[1]))(args...)
-                        inftypes = Base.return_types($(esc(ex.args[1])), Base.typesof(args...))
+                        inftype = Base.infer_return_type($(esc(ex.args[1])), Base.typesof(args...))
                     end
                 end)
-                @assert length(inftypes) == 1
                 rettype = result isa Type ? Type{result} : typeof(result)
-                rettype <: allow || rettype == typesplit(inftypes[1], allow) || error("return type $rettype does not match inferred return type $(inftypes[1])")
+                rettype <: allow || rettype == typesplit(inftype, allow) || error("return type $rettype does not match inferred return type $inftype")
                 result
             end
         end
@@ -1934,7 +2342,6 @@ function detect_ambiguities(mods::Module...;
     end
     function examine(mt::Core.MethodTable)
         for m in Base.MethodList(mt)
-            m.sig == Tuple && continue # ignore Builtins
             is_in_mods(parentmodule(m), recursive, mods) || continue
             world = Base.get_world_counter()
             ambig = Ref{Int32}(0)
@@ -1951,30 +2358,7 @@ function detect_ambiguities(mods::Module...;
             end
         end
     end
-    work = Base.loaded_modules_array()
-    filter!(mod -> mod === parentmodule(mod), work) # some items in loaded_modules_array are not top modules (really just Base)
-    while !isempty(work)
-        mod = pop!(work)
-        for n in names(mod, all = true)
-            Base.isdeprecated(mod, n) && continue
-            if !isdefined(mod, n)
-                if is_in_mods(mod, recursive, mods)
-                    if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
-                        println("Skipping ", mod, '.', n)  # typically stale exports
-                    end
-                end
-                continue
-            end
-            f = Base.unwrap_unionall(getfield(mod, n))
-            if isa(f, Module) && f !== mod && parentmodule(f) === mod && nameof(f) === n
-                push!(work, f)
-            elseif isa(f, DataType) && isdefined(f.name, :mt) && parentmodule(f) === mod && nameof(f) === n && f.name.mt !== Symbol.name.mt && f.name.mt !== DataType.name.mt
-                examine(f.name.mt)
-            end
-        end
-    end
-    examine(Symbol.name.mt)
-    examine(DataType.name.mt)
+    examine(Core.methodtable)
     return collect(ambs)
 end
 
@@ -2022,30 +2406,7 @@ function detect_unbound_args(mods...;
             push!(ambs, m)
         end
     end
-    work = Base.loaded_modules_array()
-    filter!(mod -> mod === parentmodule(mod), work) # some items in loaded_modules_array are not top modules (really just Base)
-    while !isempty(work)
-        mod = pop!(work)
-        for n in names(mod, all = true)
-            Base.isdeprecated(mod, n) && continue
-            if !isdefined(mod, n)
-                if is_in_mods(mod, recursive, mods)
-                    if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
-                        println("Skipping ", mod, '.', n)  # typically stale exports
-                    end
-                end
-                continue
-            end
-            f = Base.unwrap_unionall(getfield(mod, n))
-            if isa(f, Module) && f !== mod && parentmodule(f) === mod && nameof(f) === n
-                push!(work, f)
-            elseif isa(f, DataType) && isdefined(f.name, :mt) && parentmodule(f) === mod && nameof(f) === n && f.name.mt !== Symbol.name.mt && f.name.mt !== DataType.name.mt
-                examine(f.name.mt)
-            end
-        end
-    end
-    examine(Symbol.name.mt)
-    examine(DataType.name.mt)
+    examine(Core.methodtable)
     return collect(ambs)
 end
 
@@ -2108,6 +2469,8 @@ for G in (GenericSet, GenericDict)
 end
 
 Base.get(s::GenericDict, x, y) = get(s.s, x, y)
+Base.pop!(s::GenericDict, k) = pop!(s.s, k)
+Base.setindex!(s::GenericDict, v, k) = setindex!(s.s, v, k)
 
 """
 The `GenericArray` can be used to test generic array APIs that program to
diff --git a/stdlib/Test/src/logging.jl b/stdlib/Test/src/logging.jl
index 4e444874d0fb8..a3a94a642f250 100644
--- a/stdlib/Test/src/logging.jl
+++ b/stdlib/Test/src/logging.jl
@@ -2,6 +2,7 @@
 
 using Logging: Logging, AbstractLogger, LogLevel, Info, with_logger
 import Base: occursin
+using Base: @lock
 
 #-------------------------------------------------------------------------------
 """
@@ -35,11 +36,15 @@ struct Ignored ; end
 #-------------------------------------------------------------------------------
 # Logger with extra test-related state
 mutable struct TestLogger <: AbstractLogger
-    logs::Vector{LogRecord}
+    lock::ReentrantLock
+    logs::Vector{LogRecord}  # Guarded by lock.
     min_level::LogLevel
     catch_exceptions::Bool
-    shouldlog_args
-    message_limits::Dict{Any,Int}
+    # Note: shouldlog_args only maintains the info for the most recent log message, which
+    # may not be meaningful in a multithreaded program. See:
+    # https://github.com/JuliaLang/julia/pull/54497#discussion_r1603691606
+    shouldlog_args  # Guarded by lock.
+    message_limits::Dict{Any,Int}  # Guarded by lock.
     respect_maxlog::Bool
 end
 
@@ -55,7 +60,7 @@ most `n` times.
 
 See also: [`LogRecord`](@ref).
 
-## Example
+## Examples
 
 ```jldoctest
 julia> using Test, Logging
@@ -80,15 +85,17 @@ Test Passed
 ```
 """
 TestLogger(; min_level=Info, catch_exceptions=false, respect_maxlog=true) =
-    TestLogger(LogRecord[], min_level, catch_exceptions, nothing, Dict{Any, Int}(), respect_maxlog)
+    TestLogger(ReentrantLock(), LogRecord[], min_level, catch_exceptions, nothing, Dict{Any, Int}(), respect_maxlog)
 Logging.min_enabled_level(logger::TestLogger) = logger.min_level
 
 function Logging.shouldlog(logger::TestLogger, level, _module, group, id)
-    if get(logger.message_limits, id, 1) > 0
-        logger.shouldlog_args = (level, _module, group, id)
-        true
-    else
-        false
+    @lock logger.lock begin
+        if get(logger.message_limits, id, 1) > 0
+            logger.shouldlog_args = (level, _module, group, id)
+            return true
+        else
+            return false
+        end
     end
 end
 
@@ -98,12 +105,17 @@ function Logging.handle_message(logger::TestLogger, level, msg, _module,
     if logger.respect_maxlog
         maxlog = get(kwargs, :maxlog, nothing)
         if maxlog isa Core.BuiltinInts
-            remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
-            logger.message_limits[id] = remaining - 1
-            remaining > 0 || return
+            @lock logger.lock begin
+                remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
+                remaining == 0 && return
+                logger.message_limits[id] = remaining - 1
+            end
         end
     end
-    push!(logger.logs, LogRecord(level, msg, _module, group, id, file, line, kwargs))
+    r = LogRecord(level, msg, _module, group, id, file, line, kwargs)
+    @lock logger.lock begin
+        push!(logger.logs, r)
+    end
 end
 
 # Catch exceptions for the test logger only if specified
@@ -112,7 +124,9 @@ Logging.catch_exceptions(logger::TestLogger) = logger.catch_exceptions
 function collect_test_logs(f; kwargs...)
     logger = TestLogger(; kwargs...)
     value = with_logger(f, logger)
-    logger.logs, value
+    @lock logger.lock begin
+        return copy(logger.logs), value
+    end
 end
 
 
@@ -149,7 +163,7 @@ function record(ts::DefaultTestSet, t::LogTestFailure)
     if TESTSET_PRINT_ENABLE[]
         printstyled(ts.description, ": ", color=:white)
         print(t)
-        Base.show_backtrace(stdout, scrub_backtrace(backtrace()))
+        Base.show_backtrace(stdout, scrub_backtrace(backtrace(), ts.file, extract_file(t.source)))
         println()
     end
     # Hack: convert to `Fail` so that test summarization works correctly
diff --git a/stdlib/Test/src/precompile.jl b/stdlib/Test/src/precompile.jl
index 2cb2fb7f3f0c6..04907f8425440 100644
--- a/stdlib/Test/src/precompile.jl
+++ b/stdlib/Test/src/precompile.jl
@@ -1,9 +1,15 @@
-redirect_stdout(devnull) do
-    @testset "example" begin
-        @test 1 == 1
-        @test_throws ErrorException error()
-        @test_logs (:info, "Doing foo with n=2") @info "Doing foo with n=2"
-        @test_broken 1 == 2
-        @test 1 ≈ 1.0000000000000001
+if Base.generating_output()
+let
+    function example_payload()
+        @testset "example" begin
+            @test 1 == 1
+            @test_throws ErrorException error()
+            @test_logs (:info, "Doing foo with n=2") @info "Doing foo with n=2"
+            @test_broken 1 == 2
+            @test 1 ≈ 1.0000000000000001
+        end
     end
+
+    redirect_stdout(example_payload, devnull)
+end
 end
diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
index 0388e2107e098..cc39dc72ce44f 100644
--- a/stdlib/Test/test/runtests.jl
+++ b/stdlib/Test/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, Random
-using Test: guardseed
+using Test: guardseed, _should_escape_call, _escape_call
 using Serialization
 using Distributed: RemoteException
 
@@ -24,6 +24,10 @@ import Logging: Debug, Info, Warn, with_logger
     @test isapprox(1, 1; [(:atol, 0)]...)
     @test isapprox(1, 2; atol)
     @test isapprox(1, 3; a.atol)
+    # Test custom .. operator (not a broadcast operator)
+    ..(x, y) = x == y
+    @test 'a' .. 'a'
+    @test !('a' .. 'b')
 end
 @testset "@test with skip/broken kwargs" begin
     # Make sure the local variables can be used in conditions
@@ -77,7 +81,7 @@ end
     @test 1234 === @test_nowarn(1234)
     @test 5678 === @test_warn("WARNING: foo", begin println(stderr, "WARNING: foo"); 5678; end)
     let a
-        @test_throws UndefVarError(:a) a
+        @test_throws UndefVarError(:a, :local) a
         @test_nowarn a = 1
         @test a === 1
     end
@@ -107,6 +111,25 @@ end
     @test_throws "\"" throw("\"")
     @test_throws Returns(false) throw(Returns(false))
 end
+
+@testset "Pass - exception with pattern (3-arg form)" begin
+    # Test 3-argument form: @test_throws ExceptionType pattern expr
+    @test_throws ErrorException "error foo" error("error foo 1")
+    @test_throws DomainError r"sqrt.*negative" sqrt(-1)
+    @test_throws BoundsError "at index [2]" [1][2]
+    @test_throws ErrorException ["error", "foo"] error("error foo bar")
+
+    # Test with function pattern
+    @test_throws ErrorException (s -> occursin("foo", s)) error("error foo bar")
+
+    # Test output format
+    let result = @test_throws ErrorException "error foo" error("error foo 1")
+        output = sprint(show, result)
+        @test occursin("Test Passed", output)
+        @test occursin("Thrown: ErrorException", output)
+    end
+end
+
 # Test printing of Fail results
 include("nothrow_testset.jl")
 
@@ -115,54 +138,65 @@ let fails = @testset NoThrowTestSet begin
         @test_throws OverflowError error()
         # 2 - Fail - no exception
         @test_throws OverflowError 1 + 1
-        # 3 - Fail - comparison
+        # 3 & 4 - Fail - comparison
+        @test 1 == 2
         @test 1+1 == 2+2
-        # 4 - Fail - approximate comparison
+        # 5 - Fail - approximate comparison
         @test 1/1 ≈ 2/1
-        # 5 - Fail - chained comparison
+        # 6 - Fail - chained comparison
         @test 1+0 == 2+0 == 3+0
-        # 6 - Fail - comparison call
+        # 7 - Fail - comparison call
         @test ==(1 - 2, 2 - 1)
-        # 7 - Fail - splatting
+        # 8 - Fail - splatting
         @test ==(1:2...)
-        # 8 - Fail - isequal
+        # 9 & 10 - Fail - broadcast
+        @test 1*1 .== 2*2
+        @test (==).(1*1, 2*2)
+        # 11 & 12 - Fail qualified functions
+        @test Base.:(==)(1*1, 2*2)
+        @test Base.:(==).(1*1, 2*2)
+        # 13 - Fail - isequal
         @test isequal(0 / 0, 1 / 0)
-        # 9 - Fail - function splatting
+        # 14 - Fail - function splatting
         @test isequal(1:2...)
-        # 10 - Fail - isapprox
+        # 15 - Fail - isapprox
         @test isapprox(0 / 1, -1 / 0)
-        # 11 & 12 - Fail - function with keyword
+        # 16 & 17 - Fail - function with keyword
         @test isapprox(1 / 2, 2 / 1, atol=1 / 1)
         @test isapprox(1 - 2, 2 - 1; atol=1 - 1)
-        # 13 - Fail - function keyword splatting
+        # 18 - Fail - function keyword splatting
         k = [(:atol, 0), (:nans, true)]
         @test isapprox(1, 2; k...)
-        # 14 - Fail - call negation
+        # 19 - Fail - call negation
         @test !isequal(1, 2 - 1)
-        # 15 - Fail - comparison negation
+        # 20 - Fail - comparison negation
         @test !(2 + 3 == 1 + 4)
-        # 16 - Fail - chained negation
+        # 21 - Fail - chained negation
         @test !(2 + 3 == 1 + 4 == 5)
-        # 17 - Fail - isempty
+        # 22 - Fail - isempty
         nonempty = [1, 2, 3]
         @test isempty(nonempty)
         str1 = "Hello"
         str2 = "World"
-        # 18 - Fail - occursin
+        # 23 - Fail - occursin
         @test occursin(str1, str2)
-        # 19 - Fail - startswith
+        # 24 - Fail - startswith
         @test startswith(str1, str2)
-        # 20 - Fail - endswith
+        # 25 - Fail - endswith
         @test endswith(str1, str2)
-        # 21 - Fail - contains
-        @test contains(str1, str2)
-        # 22 - Fail - Type Comparison
+        # 26 - Fail - contains
+        @test Base.contains(str1, str2)
+        # 27 - Fail - issetequal
+        @test issetequal([2, 3] .- 1, [1, 3])
+        # 28 - Fail - Type Comparison
         @test typeof(1) <: typeof("julia")
-        # 23 - 26 - Fail - wrong message
+        # 29 - Fail - assignment
+        @test (i = length([1, 2])) == 3
+        # 30 - 33 - Fail - wrong message
         @test_throws "A test" error("a test")
         @test_throws r"sqrt\([Cc]omplx" sqrt(-1)
         @test_throws str->occursin("a T", str) error("a test")
-        @test_throws ["BoundsError", "aquire", "1-element", "at index [2]"] [1][2]
+        @test_throws ["BoundsError", "acquire", "1-element", "at index [2]"] [1][2]
     end
     for fail in fails
         @test fail isa Test.Fail
@@ -179,122 +213,157 @@ let fails = @testset NoThrowTestSet begin
     end
 
     let str = sprint(show, fails[3])
+        @test occursin("Expression: 1 == 2", str)
+        @test !occursin("Evaluated", str)
+    end
+
+    let str = sprint(show, fails[4])
         @test occursin("Expression: 1 + 1 == 2 + 2", str)
         @test occursin("Evaluated: 2 == 4", str)
     end
 
-    let str = sprint(show, fails[4])
+    let str = sprint(show, fails[5])
         @test occursin("Expression: 1 / 1 ≈ 2 / 1", str)
         @test occursin("Evaluated: 1.0 ≈ 2.0", str)
     end
 
-    let str = sprint(show, fails[5])
+    let str = sprint(show, fails[6])
         @test occursin("Expression: 1 + 0 == 2 + 0 == 3 + 0", str)
         @test occursin("Evaluated: 1 == 2 == 3", str)
     end
 
-    let str = sprint(show, fails[6])
+    let str = sprint(show, fails[7])
         @test occursin("Expression: 1 - 2 == 2 - 1", str)
         @test occursin("Evaluated: -1 == 1", str)
     end
 
-    let str = sprint(show, fails[7])
+    let str = sprint(show, fails[8])
         @test occursin("Expression: (==)(1:2...)", str)
-        @test !occursin("Evaluated", str)
+        @test occursin("Evaluated: 1 == 2", str)
     end
 
-    let str = sprint(show, fails[8])
+    let str = sprint(show, fails[9])
+        @test occursin("Expression: 1 * 1 .== 2 * 2", str)
+        @test occursin("Evaluated: 1 .== 4", str)
+    end
+
+    let str = sprint(show, fails[10])
+        @test occursin("Expression: (==).(1 * 1, 2 * 2)", str)
+        @test occursin("Evaluated: (==).(1, 4)", str)
+    end
+
+    let str = sprint(show, fails[11])
+        @test occursin("Expression: Base.:(==)(1 * 1, 2 * 2)", str)
+        @test occursin("Evaluated: Base.:(==)(1, 4)", str)
+    end
+
+    let str = sprint(show, fails[12])
+        @test occursin("Expression: Base.:(==).(1 * 1, 2 * 2)", str)
+        @test occursin("Evaluated: Base.:(==).(1, 4)", str)
+    end
+
+    let str = sprint(show, fails[13])
         @test occursin("Expression: isequal(0 / 0, 1 / 0)", str)
         @test occursin("Evaluated: isequal(NaN, Inf)", str)
     end
 
-    let str = sprint(show, fails[9])
+    let str = sprint(show, fails[14])
         @test occursin("Expression: isequal(1:2...)", str)
         @test occursin("Evaluated: isequal(1, 2)", str)
     end
 
-    let str = sprint(show, fails[10])
+    let str = sprint(show, fails[15])
         @test occursin("Expression: isapprox(0 / 1, -1 / 0)", str)
         @test occursin("Evaluated: isapprox(0.0, -Inf)", str)
     end
 
-    let str = sprint(show, fails[11])
+    let str = sprint(show, fails[16])
         @test occursin("Expression: isapprox(1 / 2, 2 / 1, atol = 1 / 1)", str)
         @test occursin("Evaluated: isapprox(0.5, 2.0; atol = 1.0)", str)
     end
 
-    let str = sprint(show, fails[12])
+    let str = sprint(show, fails[17])
         @test occursin("Expression: isapprox(1 - 2, 2 - 1; atol = 1 - 1)", str)
         @test occursin("Evaluated: isapprox(-1, 1; atol = 0)", str)
     end
 
-    let str = sprint(show, fails[13])
+    let str = sprint(show, fails[18])
         @test occursin("Expression: isapprox(1, 2; k...)", str)
         @test occursin("Evaluated: isapprox(1, 2; atol = 0, nans = true)", str)
     end
 
-    let str = sprint(show, fails[14])
+    let str = sprint(show, fails[19])
         @test occursin("Expression: !(isequal(1, 2 - 1))", str)
         @test occursin("Evaluated: !(isequal(1, 1))", str)
     end
 
-    let str = sprint(show, fails[15])
+    let str = sprint(show, fails[20])
         @test occursin("Expression: !(2 + 3 == 1 + 4)", str)
         @test occursin("Evaluated: !(5 == 5)", str)
     end
 
-    let str = sprint(show, fails[16])
+    let str = sprint(show, fails[21])
         @test occursin("Expression: !(2 + 3 == 1 + 4 == 5)", str)
         @test occursin("Evaluated: !(5 == 5 == 5)", str)
     end
 
-    let str = sprint(show, fails[17])
+    let str = sprint(show, fails[22])
         @test occursin("Expression: isempty(nonempty)", str)
         @test occursin("Evaluated: isempty([1, 2, 3])", str)
     end
 
-    let str = sprint(show, fails[18])
+    let str = sprint(show, fails[23])
         @test occursin("Expression: occursin(str1, str2)", str)
         @test occursin("Evaluated: occursin(\"Hello\", \"World\")", str)
     end
 
-    let str = sprint(show, fails[19])
+    let str = sprint(show, fails[24])
         @test occursin("Expression: startswith(str1, str2)", str)
         @test occursin("Evaluated: startswith(\"Hello\", \"World\")", str)
     end
 
-    let str = sprint(show, fails[20])
+    let str = sprint(show, fails[25])
         @test occursin("Expression: endswith(str1, str2)", str)
         @test occursin("Evaluated: endswith(\"Hello\", \"World\")", str)
     end
 
-    let str = sprint(show, fails[21])
-        @test occursin("Expression: contains(str1, str2)", str)
-        @test occursin("Evaluated: contains(\"Hello\", \"World\")", str)
+    let str = sprint(show, fails[26])
+        @test occursin("Expression: Base.contains(str1, str2)", str)
+        @test occursin("Evaluated: Base.contains(\"Hello\", \"World\")", str)
     end
 
-    let str = sprint(show, fails[22])
+    let str = sprint(show, fails[27])
+        @test occursin("Expression: issetequal([2, 3] .- 1, [1, 3])", str)
+        @test occursin("Evaluated: issetequal([1, 2], [1, 3])", str)
+    end
+
+    let str = sprint(show, fails[28])
         @test occursin("Expression: typeof(1) <: typeof(\"julia\")", str)
         @test occursin("Evaluated: $(typeof(1)) <: $(typeof("julia"))", str)
     end
 
-    let str = sprint(show, fails[23])
+    let str = sprint(show, fails[29])
+        @test occursin("Expression: (i = length([1, 2])) == 3", str)
+        @test occursin("Evaluated: 2 == 3", str)
+    end
+
+    let str = sprint(show, fails[30])
         @test occursin("Expected: \"A test\"", str)
         @test occursin("Message: \"a test\"", str)
     end
 
-    let str = sprint(show, fails[24])
+    let str = sprint(show, fails[31])
         @test occursin("Expected: r\"sqrt\\([Cc]omplx\"", str)
         @test occursin(r"Message: .*Try sqrt\(Complex", str)
     end
 
-    let str = sprint(show, fails[25])
+    let str = sprint(show, fails[32])
         @test occursin("Expected: < match function >", str)
         @test occursin("Message: \"a test\"", str)
     end
 
-    let str = sprint(show, fails[26])
-        @test occursin("Expected: [\"BoundsError\", \"aquire\", \"1-element\", \"at index [2]\"]", str)
+    let str = sprint(show, fails[33])
+        @test occursin("Expected: [\"BoundsError\", \"acquire\", \"1-element\", \"at index [2]\"]", str)
         @test occursin(r"Message: \"BoundsError.* 1-element.*at index \[2\]", str)
     end
 
@@ -344,10 +413,10 @@ let retval_tests = @testset NoThrowTestSet begin
         ts = Test.DefaultTestSet("Mock for testing retval of record(::DefaultTestSet, ::T <: Result) methods")
         pass_mock = Test.Pass(:test, 1, 2, 3, LineNumberNode(0, "A Pass Mock"))
         @test Test.record(ts, pass_mock) isa Test.Pass
-        error_mock = Test.Error(:test, 1, 2, 3, LineNumberNode(0, "An Error Mock"))
-        @test Test.record(ts, error_mock) isa Test.Error
+        error_mock = Test.Error(:test, 1, 2, nothing, LineNumberNode(0, "An Error Mock"), nothing)
+        @test Test.record(ts, error_mock; print_result=false) isa Test.Error
         fail_mock = Test.Fail(:test, 1, 2, 3, nothing, LineNumberNode(0, "A Fail Mock"), false)
-        @test Test.record(ts, fail_mock) isa Test.Fail
+        @test Test.record(ts, fail_mock; print_result=false) isa Test.Fail
         broken_mock = Test.Broken(:test, LineNumberNode(0, "A Broken Mock"))
         @test Test.record(ts, broken_mock) isa Test.Broken
     end
@@ -356,6 +425,47 @@ let retval_tests = @testset NoThrowTestSet begin
     end
 end
 
+@testset "Fail - exception with pattern (3-arg form)" begin
+    # Test type mismatch
+    let fails = @testset NoThrowTestSet begin
+        @test_throws ArgumentError "error foo" error("error foo 1")  # Wrong type
+    end
+        @test length(fails) == 1
+        @test fails[1] isa Test.Fail
+        @test fails[1].test_type === :test_throws_wrong
+        @test occursin("ArgumentError with pattern \"error foo\"", fails[1].data)
+    end
+
+    # Test pattern mismatch
+    let fails = @testset NoThrowTestSet begin
+        @test_throws ErrorException "wrong pattern" error("error foo 1")  # Wrong pattern
+    end
+        @test length(fails) == 1
+        @test fails[1] isa Test.Fail
+        @test fails[1].test_type === :test_throws_wrong
+        @test occursin("ErrorException with pattern \"wrong pattern\"", fails[1].data)
+    end
+
+    # Test no exception thrown
+    let fails = @testset NoThrowTestSet begin
+        @test_throws ErrorException "error foo" 1 + 1  # No exception
+    end
+        @test length(fails) == 1
+        @test fails[1] isa Test.Fail
+        @test fails[1].test_type === :test_throws_nothing
+        @test occursin("ErrorException with pattern \"error foo\"", fails[1].data)
+    end
+
+    # Test first argument must be a type
+    let fails = @testset NoThrowTestSet begin
+        @test_throws "not a type" "error foo" error("error foo 1")  # First arg not a type
+    end
+        @test length(fails) == 1
+        @test fails[1] isa Test.Fail
+        @test fails[1].test_type === :test_throws_wrong
+    end
+end
+
 @testset "printing of a TestSetException" begin
     tse_str = sprint(show, Test.TestSetException(1, 2, 3, 4, Vector{Union{Test.Error, Test.Fail}}()))
     @test occursin("1 passed", tse_str)
@@ -468,17 +578,17 @@ end
     end
     @testset "ts results" begin
         @test isa(ts, Test.DefaultTestSet)
-        passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = Test.get_test_counts(ts)
-        total_pass   = passes + c_passes
-        total_fail   = fails  + c_fails
-        total_error  = errors + c_errors
-        total_broken = broken + c_broken
+        tc = Test.get_test_counts(ts)
+        total_pass   = tc.passes + tc.cumulative_passes
+        total_fail   = tc.fails  + tc.cumulative_fails
+        total_error  = tc.errors + tc.cumulative_errors
+        total_broken = tc.broken + tc.cumulative_broken
         @test total_pass   == 24
         @test total_fail   == 6
         @test total_error  == 6
         @test total_broken == 0
     end
-    ts.anynonpass = false
+    @atomic ts.anynonpass = 0x00
     deleteat!(Test.get_testset().results, 1)
 end
 
@@ -659,15 +769,15 @@ end
 @test tss.foo == 3
 
 # test @inferred
-uninferrable_function(i) = (1, "1")[i]
-uninferrable_small_union(i) = (1, nothing)[i]
-@test_throws ErrorException @inferred(uninferrable_function(1))
+uninferable_function(i) = (1, "1")[i]
+uninferable_small_union(i) = (1, nothing)[i]
+@test_throws ErrorException @inferred(uninferable_function(1))
 @test @inferred(identity(1)) == 1
-@test @inferred(Nothing, uninferrable_small_union(1)) === 1
-@test @inferred(Nothing, uninferrable_small_union(2)) === nothing
-@test_throws ErrorException @inferred(Missing, uninferrable_small_union(1))
-@test_throws ErrorException @inferred(Missing, uninferrable_small_union(2))
-@test_throws ArgumentError @inferred(nothing, uninferrable_small_union(1))
+@test @inferred(Nothing, uninferable_small_union(1)) === 1
+@test @inferred(Nothing, uninferable_small_union(2)) === nothing
+@test_throws ErrorException @inferred(Missing, uninferable_small_union(1))
+@test_throws ErrorException @inferred(Missing, uninferable_small_union(2))
+@test_throws ArgumentError @inferred(nothing, uninferable_small_union(1))
 
 # Ensure @inferred only evaluates the arguments once
 inferred_test_global = 0
@@ -692,12 +802,12 @@ end
 
 # Issue #17105
 # @inferred with kwargs
-inferrable_kwtest(x; y=1) = 2x
-uninferrable_kwtest(x; y=1) = 2x+y
-@test (@inferred inferrable_kwtest(1)) == 2
-@test (@inferred inferrable_kwtest(1; y=1)) == 2
-@test (@inferred uninferrable_kwtest(1)) == 3
-@test (@inferred uninferrable_kwtest(1; y=2)) == 4
+inferable_kwtest(x; y=1) = 2x
+uninferable_kwtest(x; y=1) = 2x+y
+@test (@inferred inferable_kwtest(1)) == 2
+@test (@inferred inferable_kwtest(1; y=1)) == 2
+@test (@inferred uninferable_kwtest(1)) == 3
+@test (@inferred uninferable_kwtest(1; y=2)) == 4
 
 @test_throws ErrorException @testset "$(error())" for i in 1:10
 end
@@ -770,9 +880,9 @@ end
     """)
     msg = read(pipeline(ignorestatus(`$(Base.julia_cmd()) --startup-file=no --color=no $runtests`), stderr=devnull), String)
     msg = win2unix(msg)
-    regex = r"((?:Tests|Other tests|Testset without source): Test Failed (?:.|\n)*?)\n\nStacktrace:(?:.|\n)*?(?=\n(?:Tests|Other tests))"
+    regex = r"((?:Tests|Other tests|Testset without source): Test Failed (?:.|\n)*?)\n  Stacktrace:(?:.|\n)*?(?=\n(?:Tests|Other tests))"
     failures = map(eachmatch(regex, msg)) do m
-        m = match(r"(Tests|Other tests|Testset without source): .*? at (.*?)\n  Expression: (.*)(?:.|\n)*\n+Stacktrace:\n((?:.|\n)*)", m.match)
+        m = match(r"(Tests|Other tests|Testset without source): .*? at (.*?)\n  Expression: (.*)(?:.|\n)*\n  Stacktrace:\n((?:.|\n)*)", m.match)
         (; testset = m[1], source = m[2], ex = m[3], stacktrace = m[4])
     end
     @test length(failures) == 8 # 8 failed tests
@@ -870,12 +980,12 @@ let msg = read(pipeline(ignorestatus(`$(Base.julia_cmd()) --startup-file=no --co
             end
         end'`), stderr=devnull), String)
     @test occursin(r"""
-        Test Summary: | Pass  Fail  Total  Time
-        Foo Tests     |    2     2      4  \s*\d*.\ds
-          Animals     |    1     1      2  \s*\d*.\ds
-            Felines   |    1            1  \s*\d*.\ds
-            Canines   |          1      1  \s*\d*.\ds
-          Arrays      |    1     1      2  \s*\d*.\ds
+        Test Summary: \| Pass  Fail  Total +Time
+        Foo Tests     \|    2     2      4  \s*\d*\.\ds
+          Animals     \|    1     1      2  \s*\d*\.\ds
+            Felines   \|    1            1  \s*\d*\.\ds
+            Canines   \|          1      1  \s*\d*\.\ds
+          Arrays      \|    1     1      2  \s*\d*\.\ds
         """, msg)
 end
 
@@ -1032,6 +1142,7 @@ end
     # i.e. it behaves as if it was wrapped in a `guardseed(GLOBAL_SEED)` block
     seed = rand(UInt128)
     Random.seed!(seed)
+    seeded_state = copy(Random.default_rng())
     a = rand()
     @testset begin
         # global RNG must re-seeded at the beginning of @testset
@@ -1043,31 +1154,82 @@ end
     # the @testset's above must have no consequence for rand() below
     b = rand()
     Random.seed!(seed)
+    @test Random.default_rng() == seeded_state
     @test a == rand()
     @test b == rand()
 
     # Even when seed!() is called within a testset A, subsequent testsets
     # should start with the same "global RNG state" as what A started with,
     # such that the test `refvalue == rand(Int)` below succeeds.
-    # Currently, this means that Random.GLOBAL_SEED has to be restored,
+    # Currently, this means that `Random.get_tls_seed()` has to be restored,
     # in addition to the state of Random.default_rng().
-    GLOBAL_SEED_orig = Random.GLOBAL_SEED
+    tls_seed_orig = copy(Random.get_tls_seed())
     local refvalue
-    @testset "GLOBAL_SEED is also preserved (setup)" begin
-        @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
+    @testset "TLS seed is also preserved (setup)" begin
+        @test tls_seed_orig == Random.get_tls_seed()
         refvalue = rand(Int)
         Random.seed!()
-        @test GLOBAL_SEED_orig != Random.GLOBAL_SEED
+        @test tls_seed_orig != Random.get_tls_seed()
     end
-    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
-    @testset "GLOBAL_SEED is also preserved (forloop)" for _=1:3
+    @test tls_seed_orig == Random.get_tls_seed()
+    @testset "TLS seed is also preserved (forloop)" for _=1:3
         @test refvalue == rand(Int)
         Random.seed!()
     end
-    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
-    @testset "GLOBAL_SEED is also preserved (beginend)" begin
+    @test tls_seed_orig == Random.get_tls_seed()
+    @testset "TLS seed is also preserved (beginend)" begin
         @test refvalue == rand(Int)
     end
+
+    # @testset below is not compatible with e.g. v1.9, but it still fails there (at "main task")
+    # when deleting lines using get_tls_seed() or GLOBAL_SEED
+    @testset "TLS seed and concurrency" begin
+        # Even with multi-tasking, the TLS seed must stay consistent: the default_rng() state
+        # is reset to the "global seed" at the beginning, and the "global seed" is reset to what
+        # it was at the end of the testset; make sure that distinct tasks don't see the mutation
+        # of this "global seed" (iow, it's task-local)
+        seed = rand(UInt128)
+        Random.seed!(seed)
+        seeded_state = copy(Random.default_rng())
+        a = rand()
+
+        ch = Channel{Nothing}()
+        @sync begin
+            @async begin
+                @testset "task 1" begin
+                    # tick 1
+                    # this task didn't call seed! explicitly (yet), so its TaskLocalRNG() should have been
+                    # reset to `Random.GLOBAL_SEED` at the beginning of `@testset`
+                    @test Random.GLOBAL_SEED == Random.default_rng()
+                    Random.seed!()
+                    put!(ch, nothing) # tick 1 -> tick 2
+                    take!(ch) # tick 3
+                end
+                put!(ch, nothing) # tick 3 -> tick 4
+            end
+            @async begin
+                take!(ch) # tick 2
+                # @testset below will record the current TLS "seed" and reset default_rng() to
+                # this value;
+                # it must not be affected by the fact that "task 1" called `seed!()` first
+                @test Random.get_tls_seed() == Random.GLOBAL_SEED
+
+                @testset "task 2" begin
+                    @test Random.GLOBAL_SEED == Random.default_rng()
+                    Random.seed!()
+                    put!(ch, nothing) # tick 2 -> tick 3
+                    take!(ch) # tick 4
+                end
+                # when `@testset` of task 2 finishes, which is after `@testset` from task 1,
+                # it resets `get_tls_seed()` to what it was before starting:
+                @test Random.get_tls_seed() == Random.GLOBAL_SEED
+            end
+        end
+        @testset "main task" begin
+            @test Random.default_rng() == seeded_state
+            @test a == rand()
+        end
+    end
 end
 
 @testset "InterruptExceptions #21043" begin
@@ -1141,7 +1303,7 @@ h25835(;x=1,y=1) = x isa Int ? x*y : (rand(Bool) ? 1.0 : 1)
     @test @inferred(f25835(x=nothing)) == ()
     @test @inferred(f25835(x=1)) == (1,)
 
-    # A global argument should make this uninferrable
+    # A global argument should make this uninferable
     global y25835 = 1
     @test f25835(x=y25835) == (1,)
     @test_throws ErrorException @inferred((()->f25835(x=y25835))()) == (1,)
@@ -1201,17 +1363,17 @@ end
 
 @testset "verbose option" begin
     expected = r"""
-    Test Summary:             | Pass  Total  Time
-    Parent                    |    9      9  \s*\d*.\ds
-      Child 1                 |    3      3  \s*\d*.\ds
-        Child 1.1 (long name) |    1      1  \s*\d*.\ds
-        Child 1.2             |    1      1  \s*\d*.\ds
-        Child 1.3             |    1      1  \s*\d*.\ds
-      Child 2                 |    3      3  \s*\d*.\ds
-      Child 3                 |    3      3  \s*\d*.\ds
-        Child 3.1             |    1      1  \s*\d*.\ds
-        Child 3.2             |    1      1  \s*\d*.\ds
-        Child 3.3             |    1      1  \s*\d*.\ds
+    Test Summary:             \| Pass  Total +Time
+    Parent                    \|    9      9  \s*\d*\.\ds
+      Child 1                 \|    3      3  \s*\d*\.\ds
+        Child 1\.1 \(long name\) \|    1      1  \s*\d*\.\ds
+        Child 1\.2             \|    1      1  \s*\d*\.\ds
+        Child 1\.3             \|    1      1  \s*\d*\.\ds
+      Child 2                 \|    3      3  \s*\d*\.\ds
+      Child 3                 \|    3      3  \s*\d*\.\ds
+        Child 3\.1             \|    1      1  \s*\d*\.\ds
+        Child 3\.2             \|    1      1  \s*\d*\.\ds
+        Child 3\.3             \|    1      1  \s*\d*\.\ds
     """
 
     mktemp() do f, _
@@ -1272,9 +1434,9 @@ end
 @testset "failfast option" begin
     @testset "non failfast (default)" begin
         expected = r"""
-        Test Summary: | Pass  Fail  Error  Total  Time
-        Foo           |    1     2      1      4  \s*\d*.\ds
-          Bar         |    1     1             2  \s*\d*.\ds
+        Test Summary: \| Pass  Fail  Error  Total +Time
+        Foo           \|    1     2      1      4  \s*\d*\.\ds
+          Bar         \|    1     1             2  \s*\d*\.\ds
         """
 
         mktemp() do f, _
@@ -1296,10 +1458,10 @@ end
             @test occursin(expected, result)
         end
     end
-    @testset "failfast" begin
+    @testset "failfast begin-end" begin
         expected = r"""
-        Test Summary: | Fail  Total  Time
-        Foo           |    1      1  \s*\d*.\ds
+        Test Summary: \| Fail  Total +Time
+        Foo           \|    1      1  \s*\d*\.\ds
         """
 
         mktemp() do f, _
@@ -1321,11 +1483,37 @@ end
             @test occursin(expected, result)
         end
     end
+    @testset "failfast for-loop" begin
+        expected = r"""
+        Test Summary: \| Fail  Total +Time
+        Foo           \|    1      1  \s*\d*\.\ds
+          1           \|    1      1  \s*\d*\.\ds
+        """
+        mktemp() do f, _
+            write(f,
+            """
+            using Test
+
+            @testset "Foo" failfast=true begin
+                @testset "\$x" for x in 1:2
+                    @test false
+                end
+                @testset "Bar" begin
+                    @test false
+                    @test true
+                end
+            end
+            """)
+            cmd    = `$(Base.julia_cmd()) --startup-file=no --color=no $f`
+            result = read(pipeline(ignorestatus(cmd), stderr=devnull), String)
+            @test occursin(expected, result)
+        end
+    end
     @testset "failfast passes to child testsets" begin
         expected = r"""
-        Test Summary: | Fail  Total  Time
-        PackageName   |    1      1  \s*\d*.\ds
-          1           |    1      1  \s*\d*.\ds
+        Test Summary: \| Fail  Total +Time
+        Foo           \|    1      1  \s*\d*\.\ds
+          1           \|    1      1  \s*\d*\.\ds
         """
 
         mktemp() do f, _
@@ -1349,15 +1537,15 @@ end
     end
     @testset "failfast via env var" begin
         expected = r"""
-        Test Summary: | Fail  Total  Time
-        Foo           |    1      1  \s*\d*.\ds
+        Test Summary: \| Fail  Total +Time
+        Foo           \|    1      1  \s*\d*\.\ds
         """
 
         mktemp() do f, _
             write(f,
             """
             using Test
-            ENV["JULIA_TEST_FAILFAST"] = true
+
             @testset "Foo" begin
                 @test false
                 @test error()
@@ -1367,7 +1555,7 @@ end
                 end
             end
             """)
-            cmd    = `$(Base.julia_cmd()) --startup-file=no --color=no $f`
+            cmd    = addenv(`$(Base.julia_cmd()) --startup-file=no --color=no $f`, "JULIA_TEST_FAILFAST"=>"true")
             result = read(pipeline(ignorestatus(cmd), stderr=devnull), String)
             @test occursin(expected, result)
         end
@@ -1482,6 +1670,22 @@ end
     @test_throws LoadError("file", 111, ErrorException("Real error")) @macroexpand @test_macro_throw_2
 end
 
+# Issue 54807
+struct FEexc
+    a::Nothing
+    b::Nothing
+end
+
+@testset "FieldError Shim tests and Softdeprecation of @test_throws ErrorException" begin
+    feexc = FEexc(nothing, nothing)
+    # This is redundant regular test for FieldError
+    @test_throws FieldError feexc.c
+    # This should raise ErrorException
+    @test_throws ErrorException feexc.a = 1
+    # This is test for FieldError shim and deprecation
+    @test_deprecated @test_throws ErrorException feexc.c
+end
+
 # Issue 25483
 mutable struct PassInformationTestSet <: Test.AbstractTestSet
     results::Vector
@@ -1530,3 +1734,365 @@ let
         end
     end
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Test))
+end
+
+module CustomTestSetModule
+    using Test
+    struct CustomTestSet <: Test.AbstractTestSet
+        description::String
+    end
+    Test.record(::CustomTestSet, result) = result
+    Test.finish(cts::CustomTestSet) = cts
+end
+
+@testset "Unexported custom TestSet" begin
+    using .CustomTestSetModule
+    let res = @testset CustomTestSetModule.CustomTestSet begin
+                @test true
+            end
+        @test res isa CustomTestSetModule.CustomTestSet
+    end
+end
+
+struct CustomPrintingTestSet <: AbstractTestSet
+    description::String
+    passes::Int
+    errors::Int
+    fails::Int
+    broken::Int
+end
+
+function Test.finish(cpts::CustomPrintingTestSet)
+    if Test.get_testset_depth() != 0
+        push!(Test.get_current_testset(), cpts)
+        # printing is handled by the parent
+        return cpts
+    end
+
+    Test.print_testset_results(cpts)
+    cpts
+end
+
+@testset "Custom testsets participate in printing" begin
+    mktemp() do f, _
+        write(f,
+        """
+        using Test
+
+        mutable struct CustomPrintingTestSet <: Test.AbstractTestSet
+            description::String
+            passes::Int
+            fails::Int
+            errors::Int
+            broken::Int
+        end
+        CustomPrintingTestSet(desc::String) = CustomPrintingTestSet(desc, 0,0,0,0)
+
+        Test.record(cpts::CustomPrintingTestSet, ::Test.Pass) = cpts.passes += 1
+        Test.record(cpts::CustomPrintingTestSet, ::Test.Error) = cpts.errors += 1
+        Test.record(cpts::CustomPrintingTestSet, ::Test.Fail) = cpts.fails += 1
+        Test.record(cpts::CustomPrintingTestSet, ::Test.Broken) = cpts.broken += 1
+        Test.get_test_counts(ts::CustomPrintingTestSet) = Test.TestCounts(
+                                                                true,
+                                                                ts.passes,
+                                                                ts.fails,
+                                                                ts.errors,
+                                                                ts.broken,
+                                                                0,
+                                                                0,
+                                                                0,
+                                                                0,
+                                                                Test.format_duration(ts))
+
+        function Test.finish(cpts::CustomPrintingTestSet)
+            if Test.get_testset_depth() != 0
+                Test.record(Test.get_testset(), cpts)
+                # printing is handled by the parent
+                return cpts
+            end
+
+            Test.print_test_results(cpts)
+            cpts
+        end
+
+        struct NonRecordingTestSet <: Test.AbstractTestSet
+            description::String
+        end
+        Test.record(nrts::NonRecordingTestSet, ::Test.Result) = nrts
+        Test.finish(nrts::NonRecordingTestSet) = Test.record(Test.get_testset(), nrts)
+
+         @testset "outer" begin
+            @testset "a" begin
+                @test true
+            end
+            @testset CustomPrintingTestSet "custom" begin
+                @test false
+                @test true
+                @test_broken false
+                @test error()
+            end
+            @testset NonRecordingTestSet "no-record" begin
+                @test false
+                @test true
+                @test_broken false
+                @test error()
+            end
+            @testset "b" begin
+                @test true
+            end
+        end
+        """)
+
+        # this tests both the `TestCounts` parts as well as the fallback `x`s
+        expected = r"""
+                    Test Summary: \| Pass  Fail  Error  Broken  Total +Time
+                    outer         \|    3     1      1       1      6  \s*\d*.\ds
+                      a           \|    1                           1  \s*\d*.\ds
+                      custom      \|    1     1      1       1      4  \s*\?s
+                      no-record   \|    x     x      x       x      \?  \s*\?s
+                      b           \|    1                           1  \s*\d*.\ds
+                    RNG of the outermost testset: .*
+                    """
+
+        cmd    = `$(Base.julia_cmd()) --startup-file=no --color=no $f`
+        result = read(pipeline(ignorestatus(cmd), stderr=devnull), String)
+        @test occursin(expected, result)
+    end
+
+end
+
+@testset "Deprecated multiple arguments" begin
+    msg1 = """Multiple descriptions provided to @testset. \
+        This is deprecated and may error in the future."""
+    @test_deprecated msg1 @macroexpand @testset "name1" "name2" begin end
+    msg2 = """Multiple testset types provided to @testset. \
+        This is deprecated and may error in the future."""
+    @test_deprecated msg2 @macroexpand @testset DefaultTestSet DefaultTestSet begin end
+end
+
+# Issue #54082
+module M54082 end
+@testset "@test_throws UndefVarError(:var)" begin
+    # Single-arg `UndefVarError` should match all `UndefVarError` for the
+    # same variable name, regardless of scope, to keep pre-v1.11 behaviour.
+    f54082() = var
+    @test_throws UndefVarError(:var) f54082()
+    # But if scope is set, then it has to match.
+    @test_throws UndefVarError(:var, M54082) M54082.var
+    let result = @testset NoThrowTestSet begin
+            # Wrong module scope
+            @test_throws UndefVarError(:var, Main) M54082.var
+        end
+        @test only(result) isa Test.Fail
+    end
+end
+
+@testset "Set RNG of testset" begin
+    rng1 = Xoshiro(0x2e026445595ed28e, 0x07bb81ac4c54926d, 0x83d7d70843e8bad6, 0xdbef927d150af80b, 0xdbf91ddf2534f850)
+    rng2 = Xoshiro(0xc380f460355639ee, 0xb39bc754b7d63bbf, 0x1551dbcfb5ed5668, 0x71ab5a18fec21a25, 0x649d0c1be1ca5436)
+    rng3 = Xoshiro(0xee97f5b53f7cdc49, 0x480ac387b0527d3d, 0x614b416502a9e0f5, 0x5250cb36e4a4ceb1, 0xed6615c59e475fa0)
+
+    @testset rng=rng1 begin
+        @test rand() == rand(rng1)
+    end
+
+    @testset rng=rng2 "Outer" begin
+        @test rand() == rand(rng2)
+        @testset rng=rng3 "Inner: $(i)" for i in 1:10
+            @test rand() == rand(rng3)
+        end
+    end
+end
+
+@testset "_should_escape_call" begin
+    @test !_should_escape_call(:(f()))
+    @test _should_escape_call(:(f(x)))
+    @test _should_escape_call(:(x == y))
+    @test _should_escape_call(:(f.(x)))
+    @test !_should_escape_call(:f)
+    @test !_should_escape_call(:(f = 1))
+    @test !_should_escape_call(:(f.x))
+end
+
+@testset "_escape_call" begin
+    @testset "invalid call" begin
+        @test_throws ArgumentError _escape_call(:f)
+        @test_throws ArgumentError _escape_call(:(f = 1))
+        @test_throws ArgumentError _escape_call(:(f.x))
+    end
+
+    @testset "positional arguments" begin
+        func = esc(:f)
+        quoted_func = :(:f)
+        @test _escape_call(:(f())) == (; func, args=[], kwargs=[], quoted_func)
+        @test _escape_call(:(f(x))) == (; func, args=[esc(:x)], kwargs=[], quoted_func)
+        @test _escape_call(:(f(x...))) ==  (; func, args=[:($(esc(:x))...)], kwargs=[], quoted_func)
+    end
+
+    @testset "keyword arguments" begin
+        func = esc(:f)
+        quoted_func = :(:f)
+        @test _escape_call(:(f(y=1))) == (; func, args=[], kwargs=[:(:y => $(esc(1)))], quoted_func)
+        @test _escape_call(:(f(; y))) == (; func, args=[], kwargs=[:(:y => $(esc(:y)))], quoted_func)
+        @test _escape_call(:(f(; y=1))) == (; func, args=[], kwargs=[:(:y => $(esc(1)))], quoted_func)
+        @test _escape_call(:(f(y=1; z))) == (; func, args=[], kwargs=[:(:y => $(esc(1))), :(:z => $(esc(:z)))], quoted_func)
+        @test _escape_call(:(f(; y.z))) == (; func, args=[], kwargs=[:(:z => $(esc(:(y.z))))], quoted_func)
+        @test _escape_call(:(f(; y...))) ==  (; func, args=[], kwargs=[:($(esc(:y))...)], quoted_func)
+    end
+
+    @testset "comparison" begin
+        @test _escape_call(:(x == y)) ==  (; func=esc(:(==)), args=[esc(:x), esc(:y)], kwargs=[], quoted_func=:(:(==)))
+    end
+
+    @testset "broadcast" begin
+        args = [esc(:x), esc(:y)]
+        kwargs = []
+        @test _escape_call(:(f.(x, y))) == (; func=Expr(:., esc(:f)), args, kwargs, quoted_func=QuoteNode(Expr(:., :f)))
+        @test _escape_call(:(Main.f.(x, y))) == (; func=:(Broadcast.BroadcastFunction($(esc(:(Main.f))))), args, kwargs, quoted_func=QuoteNode(Expr(:., :(Main.f))))
+        @test _escape_call(:(x .== y)) == (; func=esc(:(.==)), args, kwargs, quoted_func=:(:.==))
+        @test _escape_call(:((==).(x, y))) == (; func=Expr(:., esc(:(==))), args, kwargs, quoted_func=QuoteNode(Expr(:., :(==))))
+        # Test that .. operator is not treated as a broadcast operator
+        @test _escape_call(:(x .. y)) == (; func=esc(:(..)), args, kwargs, quoted_func=:(:..))
+    end
+end
+
+@testset "Context display in @testset let blocks" begin
+    # Mock parent testset that just captures results
+    struct MockParentTestSet <: Test.AbstractTestSet
+        results::Vector{Any}
+        MockParentTestSet() = new([])
+    end
+    Test.record(ts::MockParentTestSet, t) = (push!(ts.results, t); t)
+    Test.finish(ts::MockParentTestSet) = ts
+
+    @testset "context shown when a context testset fails" begin
+        mock_parent1 = MockParentTestSet()
+        ctx_ts1 = Test.ContextTestSet(mock_parent1, :x, 42)
+
+        fail_result = Test.Fail(:test, "x == 99", "42 == 99", "42", nothing, LineNumberNode(1, :test), false)
+        Test.record(ctx_ts1, fail_result)
+
+        @test length(mock_parent1.results) == 1
+        recorded_fail = mock_parent1.results[1]
+        @test recorded_fail isa Test.Fail
+        @test recorded_fail.context !== nothing
+        @test occursin("x = 42", recorded_fail.context)
+    end
+
+    @testset "context shown when a context testset errors" begin
+        mock_parent2 = MockParentTestSet()
+        ctx_ts2 = Test.ContextTestSet(mock_parent2, :x, 42)
+
+        # Use internal constructor to create Error with pre-processed values
+        error_result = Test.Error(:test_error, "error(\"test\")", "ErrorException(\"test\")", "test\nStacktrace:\n [1] error()", nothing, LineNumberNode(1, :test))
+        Test.record(ctx_ts2, error_result)
+
+        @test length(mock_parent2.results) == 1
+        recorded_error = mock_parent2.results[1]
+        @test recorded_error isa Test.Error
+        @test recorded_error.context !== nothing
+        @test occursin("x = 42", recorded_error.context)
+
+        # Context shows up in string representation
+        error_str = sprint(show, recorded_error)
+        @test occursin("Context:", error_str)
+        @test occursin("x = 42", error_str)
+
+        # Multiple variables context
+        mock_parent3 = MockParentTestSet()
+        ctx_ts3 = Test.ContextTestSet(mock_parent3, :(x, y), (42, "hello"))
+
+        error_result2 = Test.Error(:test_error, "error(\"test\")", "ErrorException(\"test\")", "test\nStacktrace:\n [1] error()", nothing, LineNumberNode(1, :test))
+        Test.record(ctx_ts3, error_result2)
+
+        recorded_error2 = mock_parent3.results[1]
+        @test recorded_error2 isa Test.Error
+        @test recorded_error2.context !== nothing
+        @test occursin("(x, y) = (42, \"hello\")", recorded_error2.context)
+    end
+end
+
+@testset "io argument for Test output functions" begin
+    # Test print_test_results and print_test_errors with io redirection
+    io = IOBuffer()
+
+    # Create a testset with passing and failing tests
+    ts = Test.DefaultTestSet("IO Test"; time_start=1.36071654e9)
+    Test.record(ts, Test.Pass(:test, nothing, nothing, nothing, LineNumberNode(1), false))
+    fail = Test.Fail(:test, "1 == 2", nothing, nothing, LineNumberNode(2, Symbol("test.jl")))
+    push!(ts.results, fail)
+
+    # Test print_test_results with io
+    Test.print_test_results(io, ts)
+    output = String(take!(io))
+    @test occursin("Test Summary:", output)
+    @test occursin("IO Test", output)
+    @test occursin("Pass", output)
+    @test occursin("Fail", output)
+
+    # Test print_test_errors with io
+    Test.print_test_errors(io, ts)
+    output = String(take!(io))
+    @test occursin("Error in testset", output)
+    @test occursin("1 == 2", output)
+end
+
+@testset "JULIA_TEST_VERBOSE" begin
+    # Test the verbose testset entry/exit functionality
+    Base.ScopedValues.@with Test.VERBOSE_TESTSETS => true begin
+        # Capture output
+        output = mktemp() do fname, f
+            redirect_stdout(f) do
+                @testset "Verbose Test" begin
+                    @test true
+                    @testset "Nested Verbose Test" begin
+                        sleep(0.01)  # Add some duration
+                        @test 1 + 1 == 2
+                    end
+                end
+            end
+            seekstart(f)
+            read(f, String)
+        end
+
+        # Check that verbose messages are present
+        @test occursin("Starting testset: Verbose Test", output)
+        @test occursin("Finished testset: Verbose Test", output)
+        @test occursin("Starting testset: Nested Verbose Test", output)
+        @test occursin("Finished testset: Nested Verbose Test", output)
+
+        # Check that timing information is included in exit messages
+        @test occursin(r"Finished testset: Nested Verbose Test \([0-9\.]+s\)", output)
+
+        # Check indentation for nested testsets
+        lines = split(output, '\n')
+        entering_nested = findfirst(line -> occursin("Starting testset: Nested Verbose Test", line), lines)
+        exiting_nested = findfirst(line -> occursin("Finished testset: Nested Verbose Test", line), lines)
+
+        if entering_nested !== nothing && exiting_nested !== nothing
+            # Both nested messages should have more indentation than outer messages
+            @test startswith(lines[entering_nested], "  ")
+            @test startswith(lines[exiting_nested], "  ")
+        end
+    end
+
+    # Test that verbose output is disabled by default
+    Base.ScopedValues.@with Test.VERBOSE_TESTSETS => false begin
+        output = mktemp() do fname, f
+            redirect_stdout(f) do
+                @testset "Non-Verbose Test" begin
+                    @test true
+                end
+            end
+            seekstart(f)
+            read(f, String)
+        end
+
+        # Should not contain verbose messages
+        @test !occursin("Starting testset:", output)
+        @test !occursin("Finished testset:", output)
+    end
+end
diff --git a/stdlib/UUIDs/Project.toml b/stdlib/UUIDs/Project.toml
index 11dbcda5c4944..4eb31dc9572c0 100644
--- a/stdlib/UUIDs/Project.toml
+++ b/stdlib/UUIDs/Project.toml
@@ -1,5 +1,6 @@
 name = "UUIDs"
 uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
 
 [deps]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/stdlib/UUIDs/docs/src/index.md b/stdlib/UUIDs/docs/src/index.md
index 1e6c950dd8999..c9529a4a38170 100644
--- a/stdlib/UUIDs/docs/src/index.md
+++ b/stdlib/UUIDs/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/UUIDs/docs/src/index.md"
+```
+
 # UUIDs
 
 ```@docs
diff --git a/stdlib/UUIDs/src/UUIDs.jl b/stdlib/UUIDs/src/UUIDs.jl
index 41d5319fec24d..39381c5278fe6 100644
--- a/stdlib/UUIDs/src/UUIDs.jl
+++ b/stdlib/UUIDs/src/UUIDs.jl
@@ -2,7 +2,7 @@
 
 """
 This module provides universally unique identifiers (UUIDs),
-along with functions creating the different variants.
+along with functions for creating the different variants.
 """
 module UUIDs
 
@@ -10,15 +10,15 @@ using Random
 
 import SHA
 
-export UUID, uuid1, uuid4, uuid5, uuid_version
+export UUID, uuid1, uuid4, uuid5, uuid7, uuid_version
 
 import Base: UUID
 
 """
-    uuid_version(u::UUID) -> Int
+    uuid_version(u::UUID)::Int
 
 Inspects the given UUID and returns its version
-(see [RFC 4122](https://www.ietf.org/rfc/rfc4122)).
+(see [RFC 4122](https://tools.ietf.org/html/rfc4122)).
 
 # Examples
 ```jldoctest
@@ -36,23 +36,25 @@ const namespace_oid  = UUID(0x6ba7b8129dad11d180b400c04fd430c8) # 6ba7b812-9dad-
 const namespace_x500 = UUID(0x6ba7b8149dad11d180b400c04fd430c8) # 6ba7b814-9dad-11d1-80b4-00c04fd430c8
 
 """
-    uuid1([rng::AbstractRNG]) -> UUID
+    uuid1([rng::AbstractRNG])::UUID
 
 Generates a version 1 (time-based) universally unique identifier (UUID), as specified
-by RFC 4122. Note that the Node ID is randomly generated (does not identify the host)
+by [RFC 4122](https://tools.ietf.org/html/rfc4122). Note that the Node ID is randomly generated (does not identify the host)
 according to section 4.5 of the RFC.
 
-The default rng used by `uuid1` is not `GLOBAL_RNG` and every invocation of `uuid1()` without
+The default rng used by `uuid1` is not `Random.default_rng()` and every invocation of `uuid1()` without
 an argument should be expected to return a unique identifier. Importantly, the outputs of
 `uuid1` do not repeat even when `Random.seed!(seed)` is called. Currently (as of Julia 1.6),
 `uuid1` uses `Random.RandomDevice` as the default rng. However, this is an implementation
 detail that may change in the future.
 
 !!! compat "Julia 1.6"
-    The output of `uuid1` does not depend on `GLOBAL_RNG` as of Julia 1.6.
+    The output of `uuid1` does not depend on `Random.default_rng()` as of Julia 1.6.
 
 # Examples
 ```jldoctest; filter = r"[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}"
+julia> using Random
+
 julia> rng = MersenneTwister(1234);
 
 julia> uuid1(rng)
@@ -60,17 +62,21 @@ UUID("cfc395e8-590f-11e8-1f13-43a2532b2fa8")
 ```
 """
 function uuid1(rng::AbstractRNG=Random.RandomDevice())
+    # 0x01b21dd213814000 is the number of 100 nanosecond intervals
+    # between the UUID epoch and Unix epoch
+    timestamp = round(UInt64, time() * 1e7) + 0x01b21dd213814000
+    _build_uuid1(rng, timestamp)
+end
+
+function _build_uuid1(rng::AbstractRNG, timestamp::UInt64)
     u = rand(rng, UInt128)
 
     # mask off clock sequence and node
     u &= 0x00000000000000003fffffffffffffff
 
-    # set the unicast/multicast bit and version
-    u |= 0x00000000000010000000010000000000
+    # set the version, variant, and unicast/multicast bit
+    u |= 0x00000000000010008000010000000000
 
-    # 0x01b21dd213814000 is the number of 100 nanosecond intervals
-    # between the UUID epoch and Unix epoch
-    timestamp = round(UInt64, time() * 1e7) + 0x01b21dd213814000
     ts_low = timestamp & typemax(UInt32)
     ts_mid = (timestamp >> 32) & typemax(UInt16)
     ts_hi = (timestamp >> 48) & 0x0fff
@@ -79,30 +85,32 @@ function uuid1(rng::AbstractRNG=Random.RandomDevice())
     u |= UInt128(ts_mid) << 80
     u |= UInt128(ts_hi) << 64
 
-    UUID(u)
+    return UUID(u)
 end
 
 """
-    uuid4([rng::AbstractRNG]) -> UUID
+    uuid4([rng::AbstractRNG])::UUID
 
 Generates a version 4 (random or pseudo-random) universally unique identifier (UUID),
-as specified by RFC 4122.
+as specified by [RFC 4122](https://tools.ietf.org/html/rfc4122).
 
-The default rng used by `uuid4` is not `GLOBAL_RNG` and every invocation of `uuid4()` without
+The default rng used by `uuid4` is not `Random.default_rng()` and every invocation of `uuid4()` without
 an argument should be expected to return a unique identifier. Importantly, the outputs of
 `uuid4` do not repeat even when `Random.seed!(seed)` is called. Currently (as of Julia 1.6),
 `uuid4` uses `Random.RandomDevice` as the default rng. However, this is an implementation
 detail that may change in the future.
 
 !!! compat "Julia 1.6"
-    The output of `uuid4` does not depend on `GLOBAL_RNG` as of Julia 1.6.
+    The output of `uuid4` does not depend on `Random.default_rng()` as of Julia 1.6.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> using Random
+
+julia> rng = Xoshiro(123);
 
 julia> uuid4(rng)
-UUID("7a052949-c101-4ca3-9a7e-43a2532b2fa8")
+UUID("856e446e-0c6a-472a-9638-f7b8557cd282")
 ```
 """
 function uuid4(rng::AbstractRNG=Random.RandomDevice())
@@ -113,23 +121,25 @@ function uuid4(rng::AbstractRNG=Random.RandomDevice())
 end
 
 """
-    uuid5(ns::UUID, name::String) -> UUID
+    uuid5(ns::UUID, name::String)::UUID
 
 Generates a version 5 (namespace and domain-based) universally unique identifier (UUID),
-as specified by RFC 4122.
+as specified by [RFC 4122](https://tools.ietf.org/html/rfc4122).
 
 !!! compat "Julia 1.1"
     This function requires at least Julia 1.1.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> using Random
+
+julia> rng = Xoshiro(123);
 
 julia> u4 = uuid4(rng)
-UUID("7a052949-c101-4ca3-9a7e-43a2532b2fa8")
+UUID("856e446e-0c6a-472a-9638-f7b8557cd282")
 
 julia> u5 = uuid5(u4, "julia")
-UUID("086cc5bb-2461-57d8-8068-0aed7f5b5cd1")
+UUID("2df91e3f-da06-5362-a6fe-03772f2e14c9")
 ```
 """
 function uuid5(ns::UUID, name::String)
@@ -151,4 +161,47 @@ function uuid5(ns::UUID, name::String)
     return UUID(v)
 end
 
+"""
+    uuid7([rng::AbstractRNG])::UUID
+
+Generates a version 7 (random or pseudo-random) universally unique identifier (UUID),
+as specified by [RFC 9562](https://tools.ietf.org/html/rfc9562).
+
+The default rng used by `uuid7` is not `Random.default_rng()` and every invocation of `uuid7()` without
+an argument should be expected to return a unique identifier. Importantly, the outputs of
+`uuid7` do not repeat even when `Random.seed!(seed)` is called. Currently (as of Julia 1.12),
+`uuid7` uses `Random.RandomDevice` as the default rng. However, this is an implementation
+detail that may change in the future.
+
+!!! compat "Julia 1.12"
+    `uuid7()` is available as of Julia 1.12.
+
+# Examples
+```jldoctest; filter = r"[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}"
+julia> using Random
+
+julia> rng = Xoshiro(123);
+
+julia> uuid7(rng)
+UUID("019026ca-e086-772a-9638-f7b8557cd282")
+```
+"""
+function uuid7(rng::AbstractRNG=Random.RandomDevice())
+    # current time in ms, rounded to an Integer
+    timestamp = round(UInt128, time() * 1e3)
+    _build_uuid7(rng, timestamp)
+end
+
+function _build_uuid7(rng::AbstractRNG, timestamp::UInt128)
+    bytes = rand(rng, UInt128)
+    # make space for the timestamp
+    bytes &= 0x0000000000000fff3fffffffffffffff
+    # version & variant
+    bytes |= 0x00000000000070008000000000000000
+
+    bytes |= timestamp << UInt128(80)
+
+    return UUID(bytes)
+end
+
 end
diff --git a/stdlib/UUIDs/test/runtests.jl b/stdlib/UUIDs/test/runtests.jl
index 5085fa33e8573..27bcad1f20dc5 100644
--- a/stdlib/UUIDs/test/runtests.jl
+++ b/stdlib/UUIDs/test/runtests.jl
@@ -1,23 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, UUIDs, Random
-
-u1 = uuid1()
-u4 = uuid4()
-u5 = uuid5(u1, "julia")
-@test uuid_version(u1) == 1
-@test uuid_version(u4) == 4
-@test uuid_version(u5) == 5
-@test u1 == UUID(string(u1)) == UUID(GenericString(string(u1)))
-@test u4 == UUID(string(u4)) == UUID(GenericString(string(u4)))
-@test u5 == UUID(string(u5)) == UUID(GenericString(string(u5)))
-@test u1 == UUID(UInt128(u1))
-@test u4 == UUID(UInt128(u4))
-@test u5 == UUID(UInt128(u5))
-@test uuid4(MersenneTwister(0)) == uuid4(MersenneTwister(0))
-@test_throws ArgumentError UUID("550e8400e29b-41d4-a716-446655440000")
-@test_throws ArgumentError UUID("550e8400e29b-41d4-a716-44665544000098")
-@test_throws ArgumentError UUID("z50e8400-e29b-41d4-a716-446655440000")
+using UUIDs: _build_uuid1, _build_uuid7
 
 # results similar to Python builtin uuid
 # To reproduce the sequence
@@ -37,11 +21,6 @@ const following_uuids = [
     UUID("d8cc6298-75d5-57e0-996c-279259ab365c"),
 ]
 
-for (idx, init_uuid) in enumerate(following_uuids[1:end-1])
-    next_id = uuid5(init_uuid, "julia")
-    @test next_id == following_uuids[idx+1]
-end
-
 # Python-generated UUID following each of the standard namespaces
 const standard_namespace_uuids = [
     (UUIDs.namespace_dns,  UUID("00ca23ad-40ef-500c-a910-157de3950d07")),
@@ -50,26 +29,107 @@ const standard_namespace_uuids = [
     (UUIDs.namespace_x500, UUID("993c6684-82e7-5cdb-bd46-9bff0362e6a9")),
 ]
 
-for (init_uuid, next_uuid) in standard_namespace_uuids
-    result = uuid5(init_uuid, "julia")
-    @test next_uuid == result
-end
-
-# Issue 35860
-Random.seed!(Random.GLOBAL_RNG, 10)
+@testset "UUIDs" begin
 u1 = uuid1()
 u4 = uuid4()
-Random.seed!(Random.GLOBAL_RNG, 10)
-@test u1 != uuid1()
-@test u4 != uuid4()
-
-@test_throws ArgumentError UUID("22b4a8a1ae548-4eeb-9270-60426d66a48e")
-@test_throws ArgumentError UUID("22b4a8a1-e548a4eeb-9270-60426d66a48e")
-@test_throws ArgumentError UUID("22b4a8a1-e548-4eeba9270-60426d66a48e")
-@test_throws ArgumentError UUID("22b4a8a1-e548-4eeb-9270a60426d66a48e")
-str = "22b4a8a1-e548-4eeb-9270-60426d66a48e"
-@test UUID(uppercase(str)) == UUID(str)
-
-for r in rand(UInt128, 10^3)
-    @test UUID(r) == UUID(string(UUID(r)))
+u5 = uuid5(u1, "julia")
+u7 = uuid7()
+
+@testset "Extraction of version numbers" begin
+    @test uuid_version(u1) == 1
+    @test uuid_version(u4) == 4
+    @test uuid_version(u5) == 5
+    @test uuid_version(u7) == 7
+end
+
+@testset "Extraction of variant bits" begin
+    # RFC 4122, section 4.1.1
+    uuid_variant(u::UUID) = Int((u.value >> 62) & 0x3)
+    @test uuid_variant(u1) == 2
+    @test uuid_variant(u4) == 2
+    @test uuid_variant(u5) == 2
+    @test uuid_variant(u7) == 2
+end
+
+@testset "Parsing from string" begin
+    @test u1 == UUID(string(u1)) == UUID(GenericString(string(u1)))
+    @test u4 == UUID(string(u4)) == UUID(GenericString(string(u4)))
+    @test u5 == UUID(string(u5)) == UUID(GenericString(string(u5)))
+    @test u7 == UUID(string(u7)) == UUID(GenericString(string(u7)))
+end
+
+@testset "UInt128 conversion" begin
+    @test u1 == UUID(UInt128(u1))
+    @test u4 == UUID(UInt128(u4))
+    @test u5 == UUID(UInt128(u5))
+    @test u7 == UUID(UInt128(u7))
+end
+
+@testset "Passing an RNG" begin
+    rng = Xoshiro(0)
+    @test uuid1(rng) isa UUID
+    @test uuid4(rng) isa UUID
+    @test uuid7(rng) isa UUID
+end
+
+@testset "uuid1, uuid4 & uuid7 RNG stability" begin
+    @test uuid4(Xoshiro(0)) == uuid4(Xoshiro(0))
+
+    time_uuid1 = rand(UInt64)
+    time_uuid7 = rand(UInt128)
+
+    # we need to go through the internal function to test RNG stability
+    @test _build_uuid1(Xoshiro(0), time_uuid1) == _build_uuid1(Xoshiro(0), time_uuid1)
+    @test _build_uuid7(Xoshiro(0), time_uuid7) == _build_uuid7(Xoshiro(0), time_uuid7)
+end
+
+@testset "Rejection of invalid UUID strings" begin
+    @test_throws ArgumentError UUID("550e8400e29b-41d4-a716-446655440000")
+    @test_throws ArgumentError UUID("550e8400e29b-41d4-a716-44665544000098")
+    @test_throws ArgumentError UUID("z50e8400-e29b-41d4-a716-446655440000")
+    @test_throws ArgumentError UUID("22b4a8a1ae548-4eeb-9270-60426d66a48e")
+    @test_throws ArgumentError UUID("22b4a8a1-e548a4eeb-9270-60426d66a48e")
+    @test_throws ArgumentError UUID("22b4a8a1-e548-4eeba9270-60426d66a48e")
+    @test_throws ArgumentError UUID("22b4a8a1-e548-4eeb-9270a60426d66a48e")
+end
+
+@testset "UUID sequence" begin
+    for (idx, init_uuid) in enumerate(following_uuids[1:end-1])
+        next_id = uuid5(init_uuid, "julia")
+        @test next_id == following_uuids[idx+1]
+    end
+end
+
+@testset "Standard namespace UUIDs" begin
+    for (init_uuid, next_uuid) in standard_namespace_uuids
+        result = uuid5(init_uuid, "julia")
+        @test next_uuid == result
+    end
+end
+
+@testset "Use of Random.RandomDevice (#35860)" begin
+    Random.seed!(Random.default_rng(), 10)
+    u1 = uuid1()
+    u4 = uuid4()
+    u7 = uuid7()
+    Random.seed!(Random.default_rng(), 10)
+    @test u1 != uuid1()
+    @test u4 != uuid4()
+    @test u7 != uuid7()
+end
+
+@testset "case invariance" begin
+    str = "22b4a8a1-e548-4eeb-9270-60426d66a48e"
+    @test UUID(uppercase(str)) == UUID(str)
+end
+
+@testset "Equality of string parsing & direct UInt128 passing" begin
+    for r in rand(UInt128, 10^3)
+        @test UUID(r) == UUID(string(UUID(r)))
+    end
+end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(UUIDs))
+end
 end
diff --git a/stdlib/Unicode/Project.toml b/stdlib/Unicode/Project.toml
index 5e3040ce9e3db..781da423c63e8 100644
--- a/stdlib/Unicode/Project.toml
+++ b/stdlib/Unicode/Project.toml
@@ -1,9 +1,10 @@
 name = "Unicode"
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [targets]
-test = ["Test"]
+test = ["Test", "Random"]
diff --git a/stdlib/Unicode/docs/src/index.md b/stdlib/Unicode/docs/src/index.md
index 2771c8a9f01cc..fdf07685a4492 100644
--- a/stdlib/Unicode/docs/src/index.md
+++ b/stdlib/Unicode/docs/src/index.md
@@ -1,6 +1,14 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Unicode/docs/src/index.md"
+```
+
 # Unicode
 
+The `Unicode` module provides essential functionality for managing Unicode characters and strings.
+It includes validation, category determination, normalization, case transformation, and grapheme segmentation, enabling effective Unicode data handling.
+
 ```@docs
+Unicode
 Unicode.julia_chartransform
 Unicode.isassigned
 Unicode.isequal_normalized
diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl
index e0ae78bd911a7..5126f59325410 100644
--- a/stdlib/Unicode/src/Unicode.jl
+++ b/stdlib/Unicode/src/Unicode.jl
@@ -1,9 +1,15 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+"""
+The `Unicode` module provides essential functionality for managing Unicode characters and strings.
+It includes validation, category determination, normalization, case transformation, and grapheme segmentation,
+enabling effective Unicode data handling.
+"""
 module Unicode
 
 export graphemes, isequal_normalized
 
+public normalize
+
 """
     Unicode.julia_chartransform(c::Union{Char,Integer})
 
@@ -87,7 +93,7 @@ options (which all default to `false` except for `compose`) are specified:
 * `stable=true`: enforce Unicode versioning stability (never introduce characters missing from earlier Unicode versions)
 
 You can also use the `chartransform` keyword (which defaults to `identity`) to pass an arbitrary
-*function* mapping `Integer` codepoints to codepoints, which is is called on each
+*function* mapping `Integer` codepoints to codepoints, which is called on each
 character in `s` as it is processed, in order to perform arbitrary additional normalizations.
 For example, by passing `chartransform=Unicode.julia_chartransform`, you can apply a few Julia-specific
 character normalizations that are performed by Julia when parsing identifiers (in addition to
@@ -118,7 +124,7 @@ normalize(s::AbstractString, nf::Symbol) = Base.Unicode.normalize(s, nf)
 normalize(s::AbstractString; kwargs...) = Base.Unicode.normalize(s; kwargs...)
 
 """
-    Unicode.isassigned(c) -> Bool
+    Unicode.isassigned(c)::Bool
 
 Return `true` if the given char or integer is an assigned Unicode code point.
 
@@ -134,7 +140,7 @@ true
 isassigned(c) = Base.Unicode.isassigned(c)
 
 """
-    graphemes(s::AbstractString) -> GraphemeIterator
+    graphemes(s::AbstractString)::GraphemeIterator
 
 Return an iterator over substrings of `s` that correspond to the extended graphemes in the
 string, as defined by Unicode UAX #29. (Roughly, these are what users would perceive as
@@ -144,7 +150,7 @@ letter combined with an accent mark is a single grapheme.)
 graphemes(s::AbstractString) = Base.Unicode.GraphemeIterator{typeof(s)}(s)
 
 """
-    graphemes(s::AbstractString, m:n) -> SubString
+    graphemes(s::AbstractString, m:n)::SubString
 
 Returns a [`SubString`](@ref) of `s` consisting of the `m`-th
 through `n`-th graphemes of the string `s`, where the second
@@ -208,12 +214,19 @@ end
 
 using Base.Unicode: utf8proc_error, UTF8PROC_DECOMPOSE, UTF8PROC_CASEFOLD, UTF8PROC_STRIPMARK
 
-function _decompose_char!(codepoint::Union{Integer,Char}, dest::Vector{UInt32}, options::Integer)
-    ret = @ccall utf8proc_decompose_char(codepoint::UInt32, dest::Ptr{UInt32}, length(dest)::Int, options::Cint, C_NULL::Ptr{Cint})::Int
+function _decompose_char!(codepoint::Union{Integer,Char}, dest::Vector{UInt32}, offset::Integer, options::Integer)
+    ret = GC.@preserve dest @ccall utf8proc_decompose_char(codepoint::UInt32, pointer(dest, 1+offset)::Ptr{UInt32}, (length(dest)-offset)::Int, options::Cint, C_NULL::Ptr{Cint})::Int
     ret < 0 && utf8proc_error(ret)
     return ret
 end
 
+# would be good to have higher-level accessor functions in utf8proc.  alternatively,
+# we could mirror the whole utf8proc_property_t struct in Julia, but that is annoying
+# because of the bitfields.
+combining_class(uc::Integer) =
+    0x000301 ≤ uc ≤ 0x10ffff ? unsafe_load(ccall(:utf8proc_get_property, Ptr{UInt16}, (UInt32,), uc), 2) : 0x0000
+combining_class(c::AbstractChar) = ismalformed(c) ? 0x0000 : combining_class(UInt32(c))
+
 """
     isequal_normalized(s1::AbstractString, s2::AbstractString; casefold=false, stripmark=false, chartransform=identity)
 
@@ -225,6 +238,9 @@ As with [`Unicode.normalize`](@ref), you can also pass an arbitrary
 function via the `chartransform` keyword (mapping `Integer` codepoints to codepoints)
 to perform custom normalizations, such as [`Unicode.julia_chartransform`](@ref).
 
+!!! compat "Julia 1.8"
+    The `isequal_normalized` function was added in Julia 1.8.
+
 # Examples
 
 For example, the string `"noël"` can be constructed in two canonically equivalent ways
@@ -251,29 +267,78 @@ julia> isequal_normalized(s1, "NOËL", casefold=true)
 true
 ```
 """
-function isequal_normalized(s1::AbstractString, s2::AbstractString; casefold::Bool=false, stripmark::Bool=false, chartransform=identity)
-    function decompose_next_char!(c, state, d, options, s)
-        n = _decompose_char!(c, d, options)
-        if n > length(d) # may be possible in future Unicode versions?
-            n = _decompose_char!(c, resize!(d, n), options)
+isequal_normalized(s1::AbstractString, s2::AbstractString; casefold::Bool=false, stripmark::Bool=false, chartransform=identity) =
+    _isequal_normalized!(s1, s2, Vector{UInt32}(undef, 4), Vector{UInt32}(undef, 4), chartransform; casefold, stripmark)
+
+# like isequal_normalized, but takes pre-allocated codepoint buffers as arguments, and chartransform is a positional argument
+function _isequal_normalized!(s1::AbstractString, s2::AbstractString,
+                              d1::Vector{UInt32}, d2::Vector{UInt32}, chartransform::F=identity;
+                              casefold::Bool=false, stripmark::Bool=false) where {F}
+    function decompose_next_chars!(state, d, options, s)
+        local n
+        offset = 0
+        @inbounds while true
+            # read a char and decompose it to d
+            c = chartransform(UInt32(state[1]))
+            state = iterate(s, state[2])
+            if c < 0x80 # fast path for common ASCII case
+                n = 1 + offset
+                n > length(d) && resize!(d, 2n)
+                d[n] = casefold ? (0x41 ≤ c ≤ 0x5A ? c+0x20 : c) : c
+                break # ASCII characters are all zero combining class
+            else
+                while true
+                    n = _decompose_char!(c, d, offset, options) + offset
+                    if n > length(d)
+                        resize!(d, 2n)
+                        continue
+                    end
+                    break
+                end
+            end
+
+            # decomposed chars must be sorted in ascending order of combining class,
+            # which means we need to keep fetching chars until we get to non-combining
+            (iszero(combining_class(d[n])) || isnothing(state)) && break # non-combining
+            offset = n
         end
-        return 1, n, iterate(s, state)
+
+        # sort by combining class
+        if n < 32 # almost always true
+            for j1 = 2:n # insertion sort
+                cc = combining_class(d[j1])
+                iszero(cc) && continue # don't re-order non-combiners
+                for j2 = j1:-1:2
+                    combining_class(d[j2-1]) ≤ cc && break
+                    d[j2-1], d[j2] = d[j2], d[j2-1]
+                end
+            end
+        else # avoid n^2 complexity in crazy large-n case
+            j = 1
+            @views while j < n
+                j₀ = j + something(findnext(iszero ∘ combining_class, d[j+1:n], 1), n+1-j)
+                sort!(d[j:j₀-1], by=combining_class)
+                j = j₀
+            end
+        end
+
+        # split return statement to help type inference:
+        return state === nothing ? (1, n, nothing) : (1, n, state)
     end
     options = UTF8PROC_DECOMPOSE
     casefold && (options |= UTF8PROC_CASEFOLD)
     stripmark && (options |= UTF8PROC_STRIPMARK)
     i1,i2 = iterate(s1),iterate(s2)
-    d1,d2 = Vector{UInt32}(undef, 4), Vector{UInt32}(undef, 4) # codepoint buffers
     n1 = n2 = 0 # lengths of codepoint buffers
     j1 = j2 = 1 # indices in d1, d2
     while true
         if j1 > n1
             i1 === nothing && return i2 === nothing && j2 > n2
-            j1, n1, i1 = decompose_next_char!(chartransform(UInt32(i1[1])), i1[2], d1, options, s1)
+            j1, n1, i1 = decompose_next_chars!(i1, d1, options, s1)
         end
         if j2 > n2
             i2 === nothing && return false
-            j2, n2, i2 = decompose_next_char!(chartransform(UInt32(i2[1])), i2[2], d2, options, s2)
+            j2, n2, i2 = decompose_next_chars!(i2, d2, options, s2)
         end
         d1[j1] == d2[j2] || return false
         j1 += 1; j2 += 1
diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl
index 5c5a75b33e363..2af7015afa249 100644
--- a/stdlib/Unicode/test/runtests.jl
+++ b/stdlib/Unicode/test/runtests.jl
@@ -3,6 +3,9 @@
 using Test
 using Unicode
 using Unicode: normalize, isassigned, julia_chartransform
+import Random
+
+Random.seed!(12345)
 
 @testset "string normalization" begin
     # normalize (Unicode normalization etc.):
@@ -27,14 +30,14 @@ using Unicode: normalize, isassigned, julia_chartransform
     @test normalize("\u0072\u0307\u0323", :NFC) == "\u1E5B\u0307" #26917
 
     # julia_chartransform identifier normalization
-    @test normalize("julia\u025B\u00B5\u00B7\u0387\u2212", chartransform=julia_chartransform) ==
-        "julia\u03B5\u03BC\u22C5\u22C5\u002D"
+    @test normalize("julia\u025B\u00B5\u00B7\u0387\u2212\u210F", chartransform=julia_chartransform) ==
+        "julia\u03B5\u03BC\u22C5\u22C5\u002D\u0127"
     @test julia_chartransform('\u00B5') === '\u03BC'
 end
 
 @testset "unicode sa#15" begin
     #Tests from Unicode SA#15, "Unicode normalization forms"
-    #http://www.unicode.org/reports/tr15/
+    #https://www.unicode.org/reports/tr15/
 
     @testset "canonical equivalence" begin
         let ==(a::Array{Char},b::Array{Char}) = normalize(string(a...), :NFC)==normalize(string(b...), :NFC)
@@ -281,6 +284,8 @@ end
     @test_throws BoundsError graphemes("äöüx", 2:5)
     @test_throws BoundsError graphemes("äöüx", 5:5)
     @test_throws ArgumentError graphemes("äöüx", 0:1)
+
+    @test @allocated(length(graphemes("äöüx"))) == 0
 end
 
 @testset "#3721, #6939 up-to-date character widths" begin
@@ -455,6 +460,9 @@ end
     @test !Base.Unicode.isvalid(Char, overlong_char)
 end
 
+# the obvious, but suboptimal, algorithm:
+isequal_normalized_naive(s1, s2; kws...) = normalize(s1; kws...) == normalize(s2; kws...)
+
 @testset "Unicode equivalence" begin
     @test isequal_normalized("no\u00EBl", "noe\u0308l")
     @test !isequal_normalized("no\u00EBl", "noe\u0308l ")
@@ -466,4 +474,69 @@ end
     @test isequal_normalized("no\u00EBl", "noel", stripmark=true)
     @test isequal_normalized("no\u00EBl", "NOEL", stripmark=true, casefold=true)
     @test isequal_normalized("\u00B5\u0302m", "\u03BC\u0302m", chartransform=julia_chartransform)
+
+    # issue #52408
+    @testset "Sorting combining characters" begin
+        for str in ("\u5bc\u5b0", "j\u5ae\u5bf\u5b2\u5b4") # julia#52408 examples
+            @test isequal_normalized(str, normalize(str))
+        end
+
+        # first codepoint in every possible Unicode combining class
+        let cc_chars = UInt32[0x00000334, 0x00016ff0, 0x0000093c, 0x00003099, 0x0000094d, 0x000005b0, 0x000005b1, 0x000005b2, 0x000005b3, 0x000005b4, 0x000005b5, 0x000005b6, 0x000005b7, 0x000005b8, 0x000005b9, 0x000005bb, 0x000005bc, 0x000005bd, 0x000005bf, 0x000005c1, 0x000005c2, 0x0000fb1e, 0x0000064b, 0x0000064c, 0x0000064d, 0x00000618, 0x00000619, 0x0000061a, 0x00000651, 0x00000652, 0x00000670, 0x00000711, 0x00000c55, 0x00000c56, 0x00000e38, 0x00000e48, 0x00000eb8, 0x00000ec8, 0x00000f71, 0x00000f72, 0x00000f74, 0x00000321, 0x00001dce, 0x0000031b, 0x00001dfa, 0x00000316, 0x0000059a, 0x0000302e, 0x0001d16d, 0x000005ae, 0x00000301, 0x00000315, 0x0000035c, 0x0000035d, 0x00000345],
+            vowels = ['a', 'e', 'i', 'o', 'u', 'å', 'é', 'î', 'ö', 'ü'], Vowels = [vowels; uppercase.(vowels)]
+            function randcc(n, n_cc) # random string with lots of combining chars
+                buf = IOBuffer()
+                for _ = 1:n
+                    print.(buf, rand(Vowels, rand(1:5)))
+                    print.(buf, Char.(rand(cc_chars, rand(0:n_cc))))
+                end
+                return String(take!(buf))
+            end
+            for _ = 1:100
+                s = randcc(10,10)
+                ns = normalize(s)
+                cs = normalize(s, casefold=true)
+                @test isequal_normalized(s, s)
+                if !isequal_normalized(s, ns)
+                    @show s
+                end
+                @test isequal_normalized(s, ns)
+                @test isequal_normalized(cs, ns) == isequal_normalized_naive(cs, ns)
+                @test isequal_normalized(cs, ns, casefold=true) ==
+                      isequal_normalized_naive(cs, ns, casefold=true)
+            end
+            for _ = 1:3
+                s = randcc(5,1000) # exercise sort!-based fallback
+                @test isequal_normalized(s, normalize(s))
+            end
+            function randcc2(n, n_cc) # 2 strings with equivalent reordered combiners
+                buf1 = IOBuffer()
+                buf2 = IOBuffer()
+                p = n_cc / length(cc_chars)
+                for _ = 1:n
+                    a = join(rand(Vowels, rand(1:5)))
+                    print(buf1, a)
+                    print(buf2, a)
+
+                    # chars from distinct combining classes
+                    # are canonically equivalent when re-ordered
+                    c = Random.randsubseq(cc_chars, p)
+                    print.(buf1, Char.(Random.shuffle!(c)))
+                    print.(buf2, Char.(Random.shuffle!(c)))
+                end
+                return String(take!(buf1)), String(take!(buf2))
+            end
+            for _ = 1:100
+                s1, s2 = randcc2(10,10)
+                @test isequal_normalized(s1, s2)
+            end
+        end
+
+        # combining characters in the same class are inequivalent if re-ordered:
+        @test !isequal_normalized("x\u0334\u0335", "x\u0335\u0334")
+    end
+end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Unicode))
 end
diff --git a/stdlib/Zlib_jll/Project.toml b/stdlib/Zlib_jll/Project.toml
index 575863062d8bb..40acd335c2327 100644
--- a/stdlib/Zlib_jll/Project.toml
+++ b/stdlib/Zlib_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "Zlib_jll"
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-version = "1.2.13+0"
+version = "1.3.1+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/Zlib_jll/src/Zlib_jll.jl b/stdlib/Zlib_jll/src/Zlib_jll.jl
index ea381b8b0683c..a52168bf244b0 100644
--- a/stdlib/Zlib_jll/src/Zlib_jll.jl
+++ b/stdlib/Zlib_jll/src/Zlib_jll.jl
@@ -3,43 +3,44 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/Zlib_jll.jl
 baremodule Zlib_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
 
 export libz
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libz_handle::Ptr{Cvoid} = C_NULL
-libz_path::String = ""
 
-if Sys.iswindows()
-    const libz = "libz.dll"
-elseif Sys.isapple()
-    const libz = "@rpath/libz.1.dylib"
-else
-    const libz = "libz.so.1"
+libz_path::String = ""
+const libz = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libz.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libz.1.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libz.so.1")
+    else
+        error("Zlib_jll: Library 'libz' is not available for $(Sys.KERNEL)")
+    end
+)
+
+function eager_mode()
+    dlopen(libz)
 end
+is_available() = true
 
 function __init__()
-    global libz_handle = dlopen(libz)
-    global libz_path = dlpath(libz_handle)
+    global libz_path = string(libz.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libz_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libz_path() = libz_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module Zlib_jll
diff --git a/stdlib/Zlib_jll/test/runtests.jl b/stdlib/Zlib_jll/test/runtests.jl
index f04f9c70a7054..81eb742a172fe 100644
--- a/stdlib/Zlib_jll/test/runtests.jl
+++ b/stdlib/Zlib_jll/test/runtests.jl
@@ -3,5 +3,5 @@
 using Test, Zlib_jll
 
 @testset "Zlib_jll" begin
-    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.2.13"
+    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.3.1"
 end
diff --git a/stdlib/Zstd_jll/Project.toml b/stdlib/Zstd_jll/Project.toml
new file mode 100644
index 0000000000000..1f8172cdc75bc
--- /dev/null
+++ b/stdlib/Zstd_jll/Project.toml
@@ -0,0 +1,17 @@
+name = "Zstd_jll"
+uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
+version = "1.5.7+1"
+
+[deps]
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[compat]
+CompilerSupportLibraries_jll = "1.3.0"
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/Zstd_jll/src/Zstd_jll.jl b/stdlib/Zstd_jll/src/Zstd_jll.jl
new file mode 100644
index 0000000000000..30481a12e0979
--- /dev/null
+++ b/stdlib/Zstd_jll/src/Zstd_jll.jl
@@ -0,0 +1,109 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/Zstd_jll.j:
+#
+baremodule Zstd_jll
+using Base, Libdl
+if Sys.iswindows() && Sys.WORD_SIZE == 32
+    using CompilerSupportLibraries_jll
+end
+
+export libzstd, zstd, zstdmt
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const PATH_list = String[]
+const LIBPATH = Ref("")
+const LIBPATH_list = String[]
+artifact_dir::String = ""
+
+libzstd_path::String = ""
+zstd_path::String = ""
+zstdmt_path::String = ""
+const libzstd = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libzstd-1.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libzstd.1.dylib")
+    elseif Sys.islinux() || Sys.isfreebsd()
+        BundledLazyLibraryPath("libzstd.so.1")
+    else
+        error("Zstd_jll: Library 'libzstd' is not available for $(Sys.KERNEL)")
+    end;
+    dependencies = if Sys.iswindows() && Sys.WORD_SIZE == 32
+        LazyLibrary[libgcc_s]
+    else
+        LazyLibrary[]
+    end
+)
+
+if Sys.iswindows()
+    const zstd_exe = "zstd.exe"
+    const zstdmt_exe = "zstdmt.exe"
+else
+    const zstd_exe = "zstd"
+    const zstdmt_exe = "zstdmt"
+end
+
+if Sys.iswindows()
+    const pathsep = ';'
+elseif Sys.isapple()
+    const pathsep = ':'
+else
+    const pathsep = ':'
+end
+
+if Sys.iswindows()
+function adjust_ENV(cmd::Cmd)
+    dllPATH = Sys.BINDIR
+    oldPATH = get(ENV, "PATH", "")
+    newPATH = isempty(oldPATH) ? dllPATH : "$dllPATH$pathsep$oldPATH"
+    return addenv(cmd, "PATH"=>newPATH)
+end
+else
+adjust_ENV(cmd::Cmd) = cmd
+end
+
+function adjust_ENV()
+    addPATH = joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR)
+    oldPATH = get(ENV, "PATH", "")
+    newPATH = isempty(oldPATH) ? addPATH : "$addPATH$pathsep$oldPATH"
+    return ("PATH"=>newPATH,)
+end
+
+function zstd(f::Function; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true) # deprecated, for compat only
+    withenv((adjust_PATH ? adjust_ENV() : ())...) do
+        f(zstd())
+    end
+end
+function zstdmt(f::Function; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true) # deprecated, for compat only
+    withenv((adjust_PATH ? adjust_ENV() : ())...) do
+        f(zstdmt())
+    end
+end
+zstd() = adjust_ENV(`$zstd_path`)
+zstdmt() = adjust_ENV(`$zstdmt_path`)
+
+# Function to eagerly dlopen our library and thus resolve all dependencies
+function eager_mode()
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    dlopen(libzstd)
+end
+
+is_available() = true
+
+function __init__()
+    global libzstd_path = string(libzstd.path)
+    global zstd_path = joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, zstd_exe)
+    global zstdmt_path = joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, zstdmt_exe)
+    global artifact_dir = dirname(Sys.BINDIR)
+end
+
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
+
+end  # module Zstd_jll
diff --git a/stdlib/Zstd_jll/test/runtests.jl b/stdlib/Zstd_jll/test/runtests.jl
new file mode 100644
index 0000000000000..5cfa2a1375c73
--- /dev/null
+++ b/stdlib/Zstd_jll/test/runtests.jl
@@ -0,0 +1,7 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Zstd_jll
+
+@testset "Zstd_jll" begin
+    @test ccall((:ZSTD_versionNumber, libzstd), Cuint, ()) == 1_05_07
+end
diff --git a/stdlib/dSFMT_jll/Project.toml b/stdlib/dSFMT_jll/Project.toml
index 4e3e80f918f0b..30209421a9994 100644
--- a/stdlib/dSFMT_jll/Project.toml
+++ b/stdlib/dSFMT_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "dSFMT_jll"
 uuid = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
-version = "2.2.4+1"
+version = "2.2.5+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/dSFMT_jll/src/dSFMT_jll.jl b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
index 35ada23778a94..93fc26dc87a53 100644
--- a/stdlib/dSFMT_jll/src/dSFMT_jll.jl
+++ b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
@@ -4,43 +4,42 @@
 
 baremodule dSFMT_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
 
 export libdSFMT
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libdSFMT_handle::Ptr{Cvoid} = C_NULL
-libdSFMT_path::String = ""
 
-if Sys.iswindows()
-    const libdSFMT = "libdSFMT.dll"
-elseif Sys.isapple()
-    const libdSFMT = "@rpath/libdSFMT.dylib"
-else
-    const libdSFMT = "libdSFMT.so"
+libdSFMT_path::String = ""
+const libdSFMT = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libdSFMT.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libdSFMT.dylib")
+    else
+        BundledLazyLibraryPath("libdSFMT.so")
+    end
+)
+
+function eager_mode()
+    dlopen(libdSFMT)
 end
+is_available() = true
 
 function __init__()
-    global libdSFMT_handle = dlopen(libdSFMT)
-    global libdSFMT_path = dlpath(libdSFMT_handle)
+    global libdSFMT_path = string(libdSFMT.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libdSFMT_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libdSFMT_path() = libdSFMT_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module dSFMT_jll
diff --git a/stdlib/libLLVM_jll/Project.toml b/stdlib/libLLVM_jll/Project.toml
index 87519e5a824b0..8d9be45e1fefd 100644
--- a/stdlib/libLLVM_jll/Project.toml
+++ b/stdlib/libLLVM_jll/Project.toml
@@ -1,13 +1,19 @@
 name = "libLLVM_jll"
 uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
-version = "15.0.7+5"
+version = "20.1.8+0"
 
 [deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
+Zstd_jll = "3161d3a3-bdf6-5164-811a-617609db77b4"
 
 [compat]
-julia = "1.8"
+CompilerSupportLibraries_jll = "1.3.0"
+Zlib_jll = "1"
+Zstd_jll = "1.5.7"
+julia = "1.13"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/libLLVM_jll/src/libLLVM_jll.jl b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
index 3140dc3989a72..2edff186b13a1 100644
--- a/stdlib/libLLVM_jll/src/libLLVM_jll.jl
+++ b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
@@ -3,44 +3,59 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/libLLVM_jll.jl
 
 baremodule libLLVM_jll
-using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+using Base, Libdl, Zlib_jll, Zstd_jll
 
-const PATH_list = String[]
-const LIBPATH_list = String[]
+if !Sys.isapple()
+    using CompilerSupportLibraries_jll
+end
 
 export libLLVM
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libLLVM_handle::Ptr{Cvoid} = C_NULL
-libLLVM_path::String = ""
 
-if Sys.iswindows()
-    const libLLVM = "$(Base.libllvm_name).dll"
-elseif Sys.isapple()
-    const libLLVM = "@rpath/libLLVM.dylib"
-else
-    const libLLVM = "$(Base.libllvm_name).so"
+libLLVM_path::String = ""
+const libLLVM = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("$(Base.libllvm_name).dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libLLVM.dylib")
+    else
+        BundledLazyLibraryPath("$(Base.libllvm_name).so")
+    end,
+    dependencies = if Sys.isapple()
+        LazyLibrary[libz, libzstd]
+    elseif Sys.isfreebsd()
+        LazyLibrary[libz, libzstd, libgcc_s]
+    else
+        LazyLibrary[libz, libzstd, libstdcxx, libgcc_s]
+    end
+)
+
+function eager_mode()
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    Zlib_jll.eager_mode()
+    # Zstd_jll.eager_mode() # Not lazy yet
+    dlopen(libLLVM)
 end
+is_available() = true
 
 function __init__()
-    global libLLVM_handle = dlopen(libLLVM)
-    global libLLVM_path = dlpath(libLLVM_handle)
+    global libLLVM_path = string(libLLVM.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libLLVM_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libLLVM_path() = libLLVM_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module libLLVM_jll
diff --git a/stdlib/libLLVM_jll/test/runtests.jl b/stdlib/libLLVM_jll/test/runtests.jl
index ea678108ae012..8025c2cb2e693 100644
--- a/stdlib/libLLVM_jll/test/runtests.jl
+++ b/stdlib/libLLVM_jll/test/runtests.jl
@@ -3,5 +3,6 @@
 using Test, Libdl, libLLVM_jll
 
 @testset "libLLVM_jll" begin
-    @test dlsym(libLLVM_jll.libLLVM_handle, :LLVMInitializeTarget; throw_error=false) !== nothing
+    # Try to find a symbol from the C API of libLLVM as a simple sanity check.
+    @test dlsym(libLLVM_jll.libLLVM, :LLVMContextCreate; throw_error=false) !== nothing
 end
diff --git a/stdlib/libblastrampoline_jll/Project.toml b/stdlib/libblastrampoline_jll/Project.toml
index 4699baa7dad23..c8ed7b4313406 100644
--- a/stdlib/libblastrampoline_jll/Project.toml
+++ b/stdlib/libblastrampoline_jll/Project.toml
@@ -1,13 +1,13 @@
 name = "libblastrampoline_jll"
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.8.0+0"
+version = "5.15.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.10"
+julia = "1.13"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
index 49e7932a6b701..a75e2e30db2fd 100644
--- a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
+++ b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
@@ -4,44 +4,65 @@
 
 baremodule libblastrampoline_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
 
 export libblastrampoline
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libblastrampoline_handle::Ptr{Cvoid} = C_NULL
+
+# Because LBT needs to have a weak-dependence on OpenBLAS (or any other BLAS)
+# we must manually construct a list of which modules and libraries we're going
+# to be using with it, as well as the on load callbacks they may or may not need.
+const on_load_callbacks::Vector{Function} = Function[]
+const eager_mode_modules::Vector{Module} = Module[]
+function libblastrampoline_on_load_callback()
+    for callback = on_load_callbacks
+        callback()
+    end
+end
+
+function add_dependency!(mod::Module, lib::LazyLibrary, on_load_callback::Function = () -> nothing)
+    Libdl.add_dependency!(libblastrampoline, lib)
+    push!(eager_mode_modules, mod)
+    push!(on_load_callbacks, on_load_callback)
+end
+
 libblastrampoline_path::String = ""
+const libblastrampoline = LazyLibrary(
+    # NOTE: keep in sync with `Base.libblas_name` and `Base.liblapack_name`.
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libblastrampoline-5.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libblastrampoline.5.dylib")
+    else
+        BundledLazyLibraryPath("libblastrampoline.so.5")
+    end,
+    dependencies = LazyLibrary[],
+    on_load_callback = libblastrampoline_on_load_callback
+)
 
-# NOTE: keep in sync with `Base.libblas_name` and `Base.liblapack_name`.
-const libblastrampoline = if Sys.iswindows()
-    "libblastrampoline-5.dll"
-elseif Sys.isapple()
-    "@rpath/libblastrampoline.5.dylib"
-else
-    "libblastrampoline.so.5"
+function eager_mode()
+    for mod in eager_mode_modules
+        mod.eager_mode()
+    end
+    dlopen(libblastrampoline)
 end
+is_available() = true
 
 function __init__()
-    global libblastrampoline_handle = dlopen(libblastrampoline)
-    global libblastrampoline_path = dlpath(libblastrampoline_handle)
+    global libblastrampoline_path = string(libblastrampoline.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libblastrampoline_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libblastrampoline_path() = libblastrampoline_path
+if Base.generating_output()
+    precompile(eager_mode, ())
+    precompile(is_available, ())
+end
 
 end  # module libblastrampoline_jll
diff --git a/stdlib/libblastrampoline_jll/test/runtests.jl b/stdlib/libblastrampoline_jll/test/runtests.jl
index e64fc328771be..9e14d2ccfe77b 100644
--- a/stdlib/libblastrampoline_jll/test/runtests.jl
+++ b/stdlib/libblastrampoline_jll/test/runtests.jl
@@ -3,5 +3,5 @@
 using Test, Libdl, libblastrampoline_jll
 
 @testset "libblastrampoline_jll" begin
-    @test isa(Libdl.dlsym(libblastrampoline_jll.libblastrampoline_handle, :dgemm_64_), Ptr{Nothing})
+    @test isa(Libdl.dlsym(libblastrampoline_jll.libblastrampoline, :dgemm_64_), Ptr{Nothing})
 end
diff --git a/stdlib/nghttp2_jll/Project.toml b/stdlib/nghttp2_jll/Project.toml
index b8a9394c50e37..c86e12740961f 100644
--- a/stdlib/nghttp2_jll/Project.toml
+++ b/stdlib/nghttp2_jll/Project.toml
@@ -1,13 +1,15 @@
 name = "nghttp2_jll"
 uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-version = "1.52.0+0"
+version = "1.68.0+1"
 
 [deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 
 [compat]
-julia = "1.6"
+CompilerSupportLibraries_jll = "1.3.0"
+julia = "1.11"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/nghttp2_jll/src/nghttp2_jll.jl b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
index 76e8d3582c402..e0a1559a85628 100644
--- a/stdlib/nghttp2_jll/src/nghttp2_jll.jl
+++ b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
@@ -3,43 +3,48 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/nghttp2_jll.jl
 baremodule nghttp2_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
+if Sys.iswindows() && Sys.WORD_SIZE == 32
+    using CompilerSupportLibraries_jll
+end
 
 export libnghttp2
 
 # These get calculated in __init__()
 const PATH = Ref("")
+const PATH_list = String[]
 const LIBPATH = Ref("")
+const LIBPATH_list = String[]
 artifact_dir::String = ""
-libnghttp2_handle::Ptr{Cvoid} = C_NULL
-libnghttp2_path::String = ""
 
-if Sys.iswindows()
-    const libnghttp2 = "libnghttp2-14.dll"
-elseif Sys.isapple()
-    const libnghttp2 = "@rpath/libnghttp2.14.dylib"
-else
-    const libnghttp2 = "libnghttp2.so.14"
+libnghttp2_path::String = ""
+const libnghttp2 = LazyLibrary(
+    if Sys.iswindows()
+        BundledLazyLibraryPath("libnghttp2-14.dll")
+    elseif Sys.isapple()
+        BundledLazyLibraryPath("libnghttp2.14.dylib")
+    else
+        BundledLazyLibraryPath("libnghttp2.so.14")
+    end,
+    dependencies = if Sys.iswindows() && Sys.WORD_SIZE == 32
+        LazyLibrary[libgcc_s]
+    else
+        LazyLibrary[]
+    end
+)
+
+function eager_mode()
+    @static if @isdefined CompilerSupportLibraries_jll
+        CompilerSupportLibraries_jll.eager_mode()
+    end
+    dlopen(libnghttp2)
 end
+is_available() = true
 
 function __init__()
-    global libnghttp2_handle = dlopen(libnghttp2)
-    global libnghttp2_path = dlpath(libnghttp2_handle)
+    global libnghttp2_path = string(libnghttp2.path)
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libnghttp2_path)
     push!(LIBPATH_list, LIBPATH[])
 end
 
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libnghttp2_path() = libnghttp2_path
-
 end  # module nghttp2_jll
diff --git a/stdlib/nghttp2_jll/test/runtests.jl b/stdlib/nghttp2_jll/test/runtests.jl
index 2f9af6d6a3338..6c0fdc7a20ec2 100644
--- a/stdlib/nghttp2_jll/test/runtests.jl
+++ b/stdlib/nghttp2_jll/test/runtests.jl
@@ -11,5 +11,5 @@ end
 
 @testset "nghttp2_jll" begin
     info = unsafe_load(ccall((:nghttp2_version,libnghttp2), Ptr{nghttp2_info}, (Cint,), 0))
-    @test VersionNumber(unsafe_string(info.version_str)) == v"1.52.0"
+    @test VersionNumber(unsafe_string(info.version_str)) == v"1.68.0"
 end
diff --git a/stdlib/p7zip_jll/Project.toml b/stdlib/p7zip_jll/Project.toml
index 4c9bf62ad7ec1..2c7159f9a4766 100644
--- a/stdlib/p7zip_jll/Project.toml
+++ b/stdlib/p7zip_jll/Project.toml
@@ -1,10 +1,11 @@
 name = "p7zip_jll"
 uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
-version = "17.4.0+0"
+version = "17.7.0+0"
 
 [deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 
 [compat]
 julia = "1.6"
diff --git a/stdlib/p7zip_jll/src/p7zip_jll.jl b/stdlib/p7zip_jll/src/p7zip_jll.jl
index 01f26de936e78..af461c6719632 100644
--- a/stdlib/p7zip_jll/src/p7zip_jll.jl
+++ b/stdlib/p7zip_jll/src/p7zip_jll.jl
@@ -3,16 +3,11 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/p7zip_jll.jl
 baremodule p7zip_jll
 using Base
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
 
 export p7zip
 
 # These get calculated in __init__()
 const PATH = Ref("")
-const LIBPATH = Ref("")
 artifact_dir::String = ""
 p7zip_path::String = ""
 if Sys.iswindows()
@@ -22,71 +17,44 @@ else
 end
 
 if Sys.iswindows()
-    const LIBPATH_env = "PATH"
-    const LIBPATH_default = ""
     const pathsep = ';'
 elseif Sys.isapple()
-    const LIBPATH_env = "DYLD_FALLBACK_LIBRARY_PATH"
-    const LIBPATH_default = "~/lib:/usr/local/lib:/lib:/usr/lib"
     const pathsep = ':'
 else
-    const LIBPATH_env = "LD_LIBRARY_PATH"
-    const LIBPATH_default = ""
     const pathsep = ':'
 end
 
-function adjust_ENV!(env::Dict{keytype(Base.EnvDict),valtype(Base.EnvDict)}, PATH::String, LIBPATH::String, adjust_PATH::Bool, adjust_LIBPATH::Bool)
-    if adjust_LIBPATH
-        LIBPATH_base = get(env, LIBPATH_env, expanduser(LIBPATH_default))
-        if !isempty(LIBPATH_base)
-            env[LIBPATH_env] = string(LIBPATH, pathsep, LIBPATH_base)
-        else
-            env[LIBPATH_env] = LIBPATH
-        end
-    end
-    if adjust_PATH && (LIBPATH_env != "PATH" || !adjust_LIBPATH)
-        if adjust_PATH
-            if !isempty(get(env, "PATH", ""))
-                env["PATH"] = string(PATH, pathsep, env["PATH"])
-            else
-                env["PATH"] = PATH
-            end
-        end
-    end
-    return env
+function adjust_ENV()
+    addPATH = PATH[]
+    oldPATH = get(ENV, "PATH", "")
+    newPATH = isempty(oldPATH) ? addPATH : "$addPATH$pathsep$oldPATH"
+    return ("PATH"=>newPATH,)
 end
 
-function p7zip(f::Function; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
-    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
-    withenv(env...) do
-        return f(p7zip_path)
+function p7zip(f::Function; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true) # deprecated, for compat only
+    withenv((adjust_PATH ? adjust_ENV() : ())...) do
+        return f(p7zip())
     end
 end
-function p7zip(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
-    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
-    return Cmd(Cmd([p7zip_path]); env)
-end
+# the 7z.exe we ship has no dependencies, so it needs no PATH adjustment
+p7zip(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true) = `$p7zip_path`
 
 function init_p7zip_path()
     # Prefer our own bundled p7zip, but if we don't have one, pick it up off of the PATH
-    # If this is an in-tree build, `7z` will live in `bindir`.  Otherwise, it'll be in `private_libexecdir`
-    for bundled_p7zip_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, p7zip_exe),
-                               joinpath(Sys.BINDIR, p7zip_exe))
-        if isfile(bundled_p7zip_path)
-            global p7zip_path = abspath(bundled_p7zip_path)
-            return
-        end
+    # Our `7z` lives in `private_libexecdir`
+    bundled_p7zip_path = joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, p7zip_exe)
+    if isfile(bundled_p7zip_path)
+        global p7zip_path = abspath(bundled_p7zip_path)
+    else
+        global p7zip_path = something(Sys.which(p7zip_exe), p7zip_exe)
     end
-    global p7zip_path = something(Sys.which(p7zip_exe), p7zip_exe)
 end
 
 function __init__()
     global artifact_dir = dirname(Sys.BINDIR)
     init_p7zip_path()
-    PATH[] = dirname(p7zip_path)
-    push!(PATH_list, PATH[])
-    append!(LIBPATH_list, [joinpath(Sys.BINDIR, Base.LIBDIR, "julia"), joinpath(Sys.BINDIR, Base.LIBDIR)])
-    LIBPATH[] = join(LIBPATH_list, pathsep)
+    PATH[] = path = dirname(p7zip_path)
+    nothing
 end
 
 # JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
diff --git a/stdlib/stdlib.mk b/stdlib/stdlib.mk
new file mode 100644
index 0000000000000..3184ac9c3305f
--- /dev/null
+++ b/stdlib/stdlib.mk
@@ -0,0 +1,30 @@
+STDLIBS_WITHIN_SYSIMG := \
+	Artifacts FileWatching Libdl SHA libblastrampoline_jll OpenBLAS_jll Random \
+	LinearAlgebra Sockets
+
+INDEPENDENT_STDLIBS := \
+	ArgTools Base64 CRC32c Dates DelimitedFiles Distributed Downloads Future \
+	InteractiveUtils JuliaSyntaxHighlighting LazyArtifacts LibGit2 LibCURL Logging \
+	Markdown Mmap NetworkOptions Profile Printf Pkg REPL Serialization SharedArrays \
+	SparseArrays Statistics StyledStrings SuiteSparse_jll Tar Test TOML Unicode UUIDs \
+	dSFMT_jll GMP_jll libLLVM_jll LLD_jll LLVMLibUnwind_jll LibUnwind_jll LibUV_jll \
+	LibCURL_jll LibSSH2_jll LibGit2_jll nghttp2_jll  MozillaCACerts_jll \
+	MPFR_jll OpenLibm_jll OpenSSL_jll PCRE2_jll p7zip_jll Zlib_jll Zstd_jll
+
+STDLIBS := $(STDLIBS_WITHIN_SYSIMG) $(INDEPENDENT_STDLIBS)
+VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
+
+SYSIMG_STDLIBS_SRCS =
+INDEPENDENT_STDLIBS_SRCS =
+define STDLIB_srcs
+$1_SRCS := $$(shell find $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/src -name \*.jl) \
+$$(wildcard $$(build_prefix)/manifest/$$(VERSDIR)/$1) $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/Project.toml
+
+ifneq ($(filter $(1),$(STDLIBS_WITHIN_SYSIMG)),)
+	SYSIMG_STDLIBS_SRCS += $$($1_SRCS)
+else
+	INDEPENDENT_STDLIBS_SRCS += $$($1_SRCS)
+endif
+endef
+
+$(foreach stdlib,$(STDLIBS),$(eval $(call STDLIB_srcs,$(stdlib))))
diff --git a/sysimage.mk b/sysimage.mk
index 993ee9a990058..67464947eefdb 100644
--- a/sysimage.mk
+++ b/sysimage.mk
@@ -2,79 +2,134 @@ SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 BUILDDIR := .
 JULIAHOME := $(SRCDIR)
 include $(JULIAHOME)/Make.inc
+include $(JULIAHOME)/stdlib/stdlib.mk
 
 default: sysimg-$(JULIA_BUILD_MODE) # contains either "debug" or "release"
 all: sysimg-release sysimg-debug
-sysimg-ji: $(build_private_libdir)/sys.ji
+basecompiler-ji: $(build_private_libdir)/basecompiler.ji
+sysimg-ji: $(build_private_libdir)/sysbase.ji
 sysimg-bc: $(build_private_libdir)/sys-bc.a
 sysimg-release: $(build_private_libdir)/sys.$(SHLIB_EXT)
 sysimg-debug: $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
+sysbase-release: $(build_private_libdir)/sysbase.$(SHLIB_EXT)
+sysbase-debug: $(build_private_libdir)/sysbase-debug.$(SHLIB_EXT)
 
 VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 
 $(build_private_libdir)/%.$(SHLIB_EXT): $(build_private_libdir)/%-o.a
 	@$(call PRINT_LINK, $(CXX) $(LDFLAGS) -shared $(fPIC) -L$(build_private_libdir) -L$(build_libdir) -L$(build_shlibdir) -o $@ \
-		$(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) \
+		$(call whole_archive,$<) \
 		$(if $(findstring -debug,$(notdir $@)),-ljulia-internal-debug -ljulia-debug,-ljulia-internal -ljulia) \
-		$$([ $(OS) = WINNT ] && echo '' -lssp))
+		$$([ $(OS) = WINNT ] && echo '' $(LIBM) -lssp -Wl,--disable-auto-import -Wl,--disable-runtime-pseudo-reloc))
 	@$(INSTALL_NAME_CMD)$(notdir $@) $@
 	@$(DSYMUTIL) $@
 
 COMPILER_SRCS := $(addprefix $(JULIAHOME)/, \
+		base/Base_compiler.jl \
 		base/boot.jl \
 		base/docs/core.jl \
 		base/abstractarray.jl \
 		base/abstractdict.jl \
+		base/abstractset.jl \
+		base/iddict.jl \
+		base/idset.jl \
+		base/anyall.jl \
 		base/array.jl \
+		base/baseext.jl \
 		base/bitarray.jl \
 		base/bitset.jl \
 		base/bool.jl \
+		base/c.jl \
+		base/checked.jl \
+		base/cmem.jl \
+		base/coreio.jl \
+		base/coreir.jl \
 		base/ctypes.jl \
 		base/error.jl \
 		base/essentials.jl \
 		base/expr.jl \
+		base/exports.jl \
+		base/flfrontend.jl \
+		base/float.jl \
+		base/gcutils.jl \
 		base/generator.jl \
+		base/genericmemory.jl \
 		base/int.jl \
 		base/indices.jl \
 		base/iterators.jl \
+		base/module.jl \
 		base/namedtuple.jl \
+		base/ntuple.jl \
 		base/number.jl \
 		base/operators.jl \
 		base/options.jl \
+		base/ordering.jl \
 		base/pair.jl \
 		base/pointer.jl \
 		base/promotion.jl \
+		base/public.jl \
 		base/range.jl \
-		base/reflection.jl \
-		base/traits.jl \
 		base/refvalue.jl \
+		base/rounding.jl \
+		base/runtime_internals.jl \
+		base/strings/lazy.jl \
+		base/traits.jl \
 		base/tuple.jl)
-COMPILER_SRCS += $(shell find $(JULIAHOME)/base/compiler -name \*.jl)
+COMPILER_SRCS += $(shell find $(JULIAHOME)/Compiler/src -name \*.jl -and -not -name verifytrim.jl -and -not -name show.jl)
+# Julia-based compiler frontend is bootstrapped into Base for now
+COMPILER_FRONTEND_SRCS = $(shell find $(JULIAHOME)/JuliaSyntax/src -name \*.jl)
 # sort these to remove duplicates
 BASE_SRCS := $(sort $(shell find $(JULIAHOME)/base -name \*.jl -and -not -name sysimg.jl) \
-                    $(shell find $(BUILDROOT)/base -name \*.jl  -and -not -name sysimg.jl))
-STDLIB_SRCS := $(JULIAHOME)/base/sysimg.jl $(shell find $(build_datarootdir)/julia/stdlib/$(VERSDIR)/*/src -name \*.jl) \
-                    $(wildcard $(build_prefix)/manifest/$(VERSDIR)/*)
+                    $(shell find $(BUILDROOT)/base -name \*.jl  -and -not -name sysimg.jl)) \
+			 $(COMPILER_FRONTEND_SRCS) \
+             $(JULIAHOME)/Compiler/src/ssair/show.jl \
+             $(JULIAHOME)/Compiler/src/verifytrim.jl
+STDLIB_SRCS := $(JULIAHOME)/base/sysimg.jl $(SYSIMG_STDLIBS_SRCS)
 RELBUILDROOT := $(call rel_path,$(JULIAHOME)/base,$(BUILDROOT)/base)/ # <-- make sure this always has a trailing slash
+RELDATADIR := $(call rel_path,$(JULIAHOME)/base,$(build_datarootdir))/ # <-- make sure this always has a trailing slash
 
-$(build_private_libdir)/corecompiler.ji: $(COMPILER_SRCS)
+$(build_private_libdir)/basecompiler.ji: $(COMPILER_SRCS)
 	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
-	$(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp \
-		--startup-file=no --warn-overwrite=yes -g$(BOOTSTRAP_DEBUG_LEVEL) -O0 compiler/compiler.jl)
+	JULIA_NUM_THREADS=1 $(call spawn,$(JULIA_EXECUTABLE)) $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp \
+		--startup-file=no --warn-overwrite=yes --depwarn=error -g$(BOOTSTRAP_DEBUG_LEVEL) -O1 Base_compiler.jl --buildroot $(RELBUILDROOT) --dataroot $(RELDATADIR))
 	@mv $@.tmp $@
 
-$(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAHOME)/VERSION $(BASE_SRCS) $(STDLIB_SRCS)
-	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
-	if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
-			$(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
-			--startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl $(RELBUILDROOT); then \
-		echo '*** This error might be fixed by running `make clean`. If the error persists$(COMMA) try `make cleanall`. ***'; \
+define base_builder
+$$(build_private_libdir)/basecompiler$1-o.a $$(build_private_libdir)/basecompiler$1-bc.a : $$(build_private_libdir)/basecompiler$1-%.a : $(COMPILER_SRCS)
+	@$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \
+	WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \
+	JULIA_NUM_THREADS=1 \
+		$$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" $$(HEAPLIM) --output-$$* $$(call cygpath_w,$$@).tmp \
+		--startup-file=no --warn-overwrite=yes --depwarn=error -g$$(BOOTSTRAP_DEBUG_LEVEL) Base_compiler.jl --buildroot $$(RELBUILDROOT) --dataroot $$(RELDATADIR))
+	@mv $$@.tmp $$@
+$$(build_private_libdir)/sysbase$1.ji: $$(build_private_libdir)/basecompiler$1.$$(SHLIB_EXT) $$(JULIAHOME)/VERSION $$(BASE_SRCS) $$(STDLIB_SRCS)
+	@$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \
+	if ! JULIA_BINDIR=$$(call cygpath_w,$$(build_bindir)) \
+	     WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \
+		 JULIA_NUM_THREADS=1 \
+			$$(call spawn, $$(JULIA_EXECUTABLE)) -g1 $2 -C "$$(JULIA_CPU_TARGET)" $$(HEAPLIM) --output-ji $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \
+			--startup-file=no --warn-overwrite=yes --depwarn=error --sysimage $$(call cygpath_w,$$<) sysimg.jl --buildroot $$(RELBUILDROOT) --dataroot $$(RELDATADIR); then \
+		echo '*** This error might be fixed by running `make clean`. If the error persists$$(COMMA) try `make cleanall`. ***'; \
 		false; \
 	fi )
-	@mv $@.tmp $@
+	@mv $$@.tmp $$@
+.SECONDARY: $$(build_private_libdir)/basecompiler$1-o.a $$(build_private_libdir)/basecompiler$1-bc.a $$(build_private_libdir)/sysbase$1.ji # request Make to keep these files around
+endef
 
 define sysimg_builder
-$$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_private_libdir)/sys.ji $$(JULIAHOME)/contrib/generate_precompile.jl
+$$(build_private_libdir)/sysbase$1-o.a $$(build_private_libdir)/sysbase$1-bc.a : $$(build_private_libdir)/sysbase$1-%.a : $$(build_private_libdir)/basecompiler$1.$$(SHLIB_EXT) $$(JULIAHOME)/VERSION $$(BASE_SRCS) $$(STDLIB_SRCS)
+	@$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \
+	if ! JULIA_BINDIR=$$(call cygpath_w,$$(build_bindir)) \
+	     WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \
+		 JULIA_NUM_THREADS=1 \
+			$$(call spawn, $$(JULIA_EXECUTABLE)) -g1 $2 -C "$$(JULIA_CPU_TARGET)" $$(HEAPLIM) --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \
+			--startup-file=no --warn-overwrite=yes --depwarn=error --sysimage $$(call cygpath_w,$$<) sysimg.jl --buildroot $$(RELBUILDROOT) --dataroot $$(RELDATADIR); then \
+		echo '*** This error might be fixed by running `make clean`. If the error persists$$(COMMA) try `make cleanall`. ***'; \
+		false; \
+	fi )
+	@mv $$@.tmp $$@
+build_sysbase_$1 := $$(or $$(CROSS_BOOTSTRAP_SYSBASE),$$(build_private_libdir)/sysbase$1.$$(SHLIB_EXT))
+$$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_sysbase_$1) $$(JULIAHOME)/contrib/generate_precompile.jl
 	@$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \
 	if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) \
 		 WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \
@@ -83,12 +138,16 @@ $$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(buil
 		 JULIA_DEPOT_PATH=':' \
 		 JULIA_NUM_THREADS=1 \
 			$$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" $$(HEAPLIM) --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \
-			--startup-file=no --warn-overwrite=yes --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \
+			$(bootstrap_julia_flags) \
+			--startup-file=no --warn-overwrite=yes --depwarn=error --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \
 		echo '*** This error is usually fixed by running `make clean`. If the error persists$$(COMMA) try `make cleanall`. ***'; \
 		false; \
 	fi )
 	@mv $$@.tmp $$@
 .SECONDARY: $$(build_private_libdir)/sys$1-o.a $(build_private_libdir)/sys$1-bc.a # request Make to keep these files around
+.SECONDARY: $$(build_private_libdir)/sysbase$1-o.a $(build_private_libdir)/sysbase$1-bc.a # request Make to keep these files around
 endef
+$(eval $(call base_builder,,-O1,$(JULIA_EXECUTABLE_release)))
+$(eval $(call base_builder,-debug,-O0,$(JULIA_EXECUTABLE_debug)))
 $(eval $(call sysimg_builder,,-O3,$(JULIA_EXECUTABLE_release)))
 $(eval $(call sysimg_builder,-debug,-O0,$(JULIA_EXECUTABLE_debug)))
diff --git a/test/.gitignore b/test/.gitignore
index a1af9ae3d44bf..20bf199b87c74 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -2,3 +2,6 @@
 /ccalltest
 /ccalltest.s
 /libccalltest.*
+/relocatedepot
+/RelocationTestPkg2/src/foo.txt
+/RelocationTestPkg*/Manifest.toml
diff --git a/test/Makefile b/test/Makefile
index 88dbe5b2b4ed6..33b74eaf93d3b 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -7,14 +7,14 @@ STDLIBDIR := $(build_datarootdir)/julia/stdlib/$(VERSDIR)
 # TODO: this Makefile ignores BUILDDIR, except for computing JULIA_EXECUTABLE
 
 export JULIA_DEPOT_PATH := $(build_prefix)/share/julia
-export JULIA_LOAD_PATH := @stdlib
+export JULIA_LOAD_PATH := @$(PATHSEP)@stdlib
 unexport JULIA_PROJECT :=
 unexport JULIA_BINDIR :=
 
-TESTGROUPS = unicode strings compiler
+TESTGROUPS = unicode strings compiler Compiler JuliaSyntax JuliaLowering
 TESTS = all default stdlib $(TESTGROUPS) \
 		$(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \
-		$(filter-out runtests testdefs, \
+		$(filter-out runtests testdefs relocatedepot, \
 			$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/*.jl))) \
 		$(foreach group,$(TESTGROUPS), \
 			$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/$(group)/*.jl)))
@@ -24,27 +24,75 @@ EMBEDDING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/embedding" "CC=$(CC
 
 GCEXT_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/gcext" "CC=$(CC)"
 
+TRIMMING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/trimming" "CC=$(CC)"
+TEST_JULIA_OPTIONS := --check-bounds=yes --startup-file=no --depwarn=error
+TEST_SCRIPT_OPTIONS := --buildroot=$(call cygpath_w,$(BUILDROOT))
+
+.PHONY: default
 default:
 
+.PHONY: $(TESTS)
 $(TESTS):
 	@cd $(SRCDIR) && \
-	$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@)
+	$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) $(TEST_JULIA_OPTIONS) ./runtests.jl $(TEST_SCRIPT_OPTIONS) $@)
+
+.PHONY: install-revise-deps
+install-revise-deps:
+	$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) $(TEST_JULIA_OPTIONS) ./runtests.jl $(TEST_SCRIPT_OPTIONS) --revise --help-list install_revise_deps)
 
+.PHONY: $(addprefix revise-, $(TESTS))
 $(addprefix revise-, $(TESTS)): revise-% :
 	@cd $(SRCDIR) && \
-    $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
+    $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) $(TEST_JULIA_OPTIONS) ./runtests.jl $(TEST_SCRIPT_OPTIONS) --revise $*)
+
+.PHONY: relocatedepot
+relocatedepot:
+	@rm -rf $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) $(TEST_JULIA_OPTIONS) ./runtests.jl $(TEST_SCRIPT_OPTIONS) $@)
+	@mkdir $(SRCDIR)/relocatedepot
+	@cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg3 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg4 $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" $(JULIA_EXECUTABLE)) $(TEST_JULIA_OPTIONS) ./runtests.jl $(TEST_SCRIPT_OPTIONS) $@)
 
+.PHONY: revise-relocatedepot
+revise-relocatedepot: revise-% :
+	@rm -rf $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) $(TEST_JULIA_OPTIONS) ./runtests.jl $(TEST_SCRIPT_OPTIONS) --revise $*)
+	@mkdir $(SRCDIR)/relocatedepot
+	@cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg3 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg4 $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" $(JULIA_EXECUTABLE)) $(TEST_JULIA_OPTIONS) ./runtests.jl $(TEST_SCRIPT_OPTIONS) --revise $*)
+
+.PHONY: embedding
 embedding:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(EMBEDDING_ARGS)
 
+.PHONY: gcext
 gcext:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(GCEXT_ARGS)
 
+.PHONY: trimming
+trimming:
+	@$(MAKE) -C $(SRCDIR)/$@ check $(TRIMMING_ARGS)
+	@$(MAKE) -C $(SRCDIR)/$@ clean $(TRIMMING_ARGS)
+
+.PHONY: clangsa
 clangsa:
 	@$(MAKE) -C $(SRCDIR)/$@
 
+.PHONY: clean
 clean:
 	@$(MAKE) -C embedding $@ $(EMBEDDING_ARGS)
 	@$(MAKE) -C gcext $@ $(GCEXT_ARGS)
-
-.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) embedding gcext clangsa clean
+	@$(MAKE) -C llvmpasses $@
+	@$(MAKE) -C trimming $@ $(TRIMMING_ARGS)
diff --git a/test/RelocationTestPkg1/Project.toml b/test/RelocationTestPkg1/Project.toml
new file mode 100644
index 0000000000000..4b5b67c3aef2d
--- /dev/null
+++ b/test/RelocationTestPkg1/Project.toml
@@ -0,0 +1,3 @@
+name = "RelocationTestPkg1"
+uuid = "854e1adb-5a97-46bf-a391-1cfe05ac726d"
+version = "0.1.0"
diff --git a/test/RelocationTestPkg1/src/RelocationTestPkg1.jl b/test/RelocationTestPkg1/src/RelocationTestPkg1.jl
new file mode 100644
index 0000000000000..a86543a61b3f8
--- /dev/null
+++ b/test/RelocationTestPkg1/src/RelocationTestPkg1.jl
@@ -0,0 +1,5 @@
+module RelocationTestPkg1
+
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg1
diff --git a/test/RelocationTestPkg1/src/foo.txt b/test/RelocationTestPkg1/src/foo.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/RelocationTestPkg2/Project.toml b/test/RelocationTestPkg2/Project.toml
new file mode 100644
index 0000000000000..b909269a0894c
--- /dev/null
+++ b/test/RelocationTestPkg2/Project.toml
@@ -0,0 +1,3 @@
+name = "RelocationTestPkg2"
+uuid = "8d933983-b090-4b0b-a37e-c34793f459d1"
+version = "0.1.0"
diff --git a/test/RelocationTestPkg2/src/RelocationTestPkg2.jl b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl
new file mode 100644
index 0000000000000..4b1fd2708a727
--- /dev/null
+++ b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl
@@ -0,0 +1,7 @@
+module RelocationTestPkg2
+
+include_dependency("foo.txt", track_content=false)
+include_dependency("foodir", track_content=false)
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg2
diff --git a/test/RelocationTestPkg2/src/foo.txt b/test/RelocationTestPkg2/src/foo.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/RelocationTestPkg3/Project.toml b/test/RelocationTestPkg3/Project.toml
new file mode 100644
index 0000000000000..61882cb5cda65
--- /dev/null
+++ b/test/RelocationTestPkg3/Project.toml
@@ -0,0 +1,3 @@
+name = "RelocationTestPkg3"
+uuid = "1ba4f954-9da9-4cd2-9ca7-6250235df52c"
+version = "0.1.0"
diff --git a/test/RelocationTestPkg3/src/RelocationTestPkg3.jl b/test/RelocationTestPkg3/src/RelocationTestPkg3.jl
new file mode 100644
index 0000000000000..6ed8e1e560a99
--- /dev/null
+++ b/test/RelocationTestPkg3/src/RelocationTestPkg3.jl
@@ -0,0 +1,7 @@
+module RelocationTestPkg3
+
+include_dependency("bar.txt", track_content=true)
+include_dependency("bardir", track_content=true)
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg3
diff --git a/test/RelocationTestPkg3/src/bar.txt b/test/RelocationTestPkg3/src/bar.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/RelocationTestPkg4/Project.toml b/test/RelocationTestPkg4/Project.toml
new file mode 100644
index 0000000000000..8334a684f064e
--- /dev/null
+++ b/test/RelocationTestPkg4/Project.toml
@@ -0,0 +1,6 @@
+name = "RelocationTestPkg4"
+uuid = "d423d817-d7e9-49ac-b245-9d9d6db0b429"
+version = "0.1.0"
+
+[deps]
+RelocationTestPkg1 = "854e1adb-5a97-46bf-a391-1cfe05ac726d"
diff --git a/test/RelocationTestPkg4/src/RelocationTestPkg4.jl b/test/RelocationTestPkg4/src/RelocationTestPkg4.jl
new file mode 100644
index 0000000000000..d24a51d19a918
--- /dev/null
+++ b/test/RelocationTestPkg4/src/RelocationTestPkg4.jl
@@ -0,0 +1,5 @@
+module RelocationTestPkg4
+
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg4
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index 912e0d5883d12..122109cbff006 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -2,6 +2,20 @@
 
 using Random, LinearAlgebra
 
+include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
+
+isdefined(Main, :InfiniteArrays) || @eval Main include("testhelpers/InfiniteArrays.jl")
+using .Main.InfiniteArrays
+
+isdefined(Main, :StructArrays) || @eval Main include("testhelpers/StructArrays.jl")
+using .Main.StructArrays
+
+isdefined(Main, :FillArrays) || @eval Main include("testhelpers/FillArrays.jl")
+using .Main.FillArrays
+
+isdefined(Main, :SizedArrays) || @eval Main include("testhelpers/SizedArrays.jl")
+using .Main.SizedArrays
+
 A = rand(5,4,3)
 @testset "Bounds checking" begin
     @test checkbounds(Bool, A, 1, 1, 1) == true
@@ -56,6 +70,20 @@ end
     @test checkbounds(Bool, A, CartesianIndex((5,)), CartesianIndex((4,)), CartesianIndex((4,)))  == false
 end
 
+@testset "Infinite axes" begin
+    r = OneToInf()
+    @testset "CartesianIndices" begin
+        C = CartesianIndices(size(r))
+        ax = to_indices(r, (C,))[1]
+        @test ax === r
+    end
+    @testset "LinearIndices" begin
+        L = LinearIndices(size(r))
+        ax = to_indices(r, (L,))[1]
+        @test ax === L
+    end
+end
+
 @testset "vector indices" begin
     @test checkbounds(Bool, A, 1:5, 1:4, 1:3) == true
     @test checkbounds(Bool, A, 0:5, 1:4, 1:3) == false
@@ -67,6 +95,7 @@ end
     @test checkbounds(Bool, A, 1:60) == true
     @test checkbounds(Bool, A, 1:61) == false
     @test checkbounds(Bool, A, 2, 2, 2, 1:1) == true  # extra indices
+    @test checkbounds(Bool, A, 2, 2, 2, 10:9) == true
     @test checkbounds(Bool, A, 2, 2, 2, 1:2) == false
     @test checkbounds(Bool, A, 1:5, 1:4) == false
     @test checkbounds(Bool, A, 1:5, 1:12) == false
@@ -87,6 +116,7 @@ end
     @test checkbounds(Bool, A, trues(5), trues(13)) == false
     @test checkbounds(Bool, A, trues(6), trues(12)) == false
     @test checkbounds(Bool, A, trues(5, 4, 3)) == true
+    @test checkbounds(Bool, A, trues(5, 4, 3, 1)) == true # issue 45867
     @test checkbounds(Bool, A, trues(5, 4, 2)) == false
     @test checkbounds(Bool, A, trues(5, 12)) == false
     @test checkbounds(Bool, A, trues(1, 5), trues(1, 4, 1), trues(1, 1, 3)) == false
@@ -94,7 +124,9 @@ end
     @test checkbounds(Bool, A, trues(1, 5), trues(1, 5, 1), trues(1, 1, 3)) == false
     @test checkbounds(Bool, A, trues(1, 5), :, 2) == false
     @test checkbounds(Bool, A, trues(5, 4), trues(3)) == true
-    @test checkbounds(Bool, A, trues(4, 4), trues(3)) == true
+    @test checkbounds(Bool, A, trues(5), trues(4, 3, 1)) == true
+    @test checkbounds(Bool, A, trues(5, 4), trues(3, 2)) == false
+    @test checkbounds(Bool, A, trues(4, 4), trues(3)) == false
     @test checkbounds(Bool, A, trues(5, 4), trues(2)) == false
     @test checkbounds(Bool, A, trues(6, 4), trues(3)) == false
     @test checkbounds(Bool, A, trues(5, 4), trues(4)) == false
@@ -117,6 +149,10 @@ end
     @test checkbounds(Bool, A, [CartesianIndex((6, 4))], 3) == false
     @test checkbounds(Bool, A, [CartesianIndex((5, 5))], 3) == false
     @test checkbounds(Bool, A, [CartesianIndex((5, 4))], 4) == false
+    @test checkbounds(Bool, A, 5, [CartesianIndex((4, 3, 1))]) == true
+    @test checkbounds(Bool, A, 5, [CartesianIndex((4, 3, 2))]) == false
+    @test_throws ArgumentError checkbounds(Bool, A, [CartesianIndex((4, 3)), CartesianIndex((4,))])
+    @test_throws ArgumentError checkbounds(Bool, A, [CartesianIndex((1,)), 1])
 end
 
 @testset "index conversion" begin
@@ -299,6 +335,22 @@ end
         R = LinearIndices((Base.IdentityUnitRange(0:1), 0:1))
         @test axes(R) == (Base.IdentityUnitRange(0:1), Base.OneTo(2))
     end
+
+    @testset "show" begin
+        A = zeros(2,3)
+        for B in (A, view(A, Base.IdentityUnitRange(2:4)))
+            l = LinearIndices(B)
+            s = sprint(show, l)
+            @test s == "LinearIndices($(axes(B)))"
+        end
+    end
+end
+
+@testset "copy for LinearIndices/CartesianIndices" begin
+    C = CartesianIndices((1:2, 1:4))
+    @test copy(C) === C
+    L = LinearIndices((1:2, 1:4))
+    @test copy(L) === L
 end
 
 # token type on which to dispatch testing methods in order to avoid potential
@@ -470,6 +522,13 @@ function test_vector_indexing(::Type{T}, shape, ::Type{TestAbstractArray}) where
 
         mask = bitrand(shape)
         @testset "test logical indexing" begin
+            let
+                masks1 = (mask,)
+                @test only(@inferred(to_indices(A, masks1))) isa Base.LogicalIndex{Int}
+                if IndexStyle(B) isa IndexCartesian
+                    @test only(@inferred(to_indices(B, masks1))) === Base.LogicalIndex(mask)
+                end
+            end
             @test B[mask] == A[mask] == B[findall(mask)] == A[findall(mask)] == LinearIndices(mask)[findall(mask)]
             @test B[vec(mask)] == A[vec(mask)] == LinearIndices(mask)[findall(mask)]
             mask1 = bitrand(size(A, 1))
@@ -479,10 +538,15 @@ function test_vector_indexing(::Type{T}, shape, ::Type{TestAbstractArray}) where
             @test B[mask1, 1, trailing2] == A[mask1, 1, trailing2] == LinearIndices(mask)[findall(mask1)]
 
             if ndims(B) > 1
+                slice = ntuple(Returns(:), ndims(B)-1)
                 maskfront = bitrand(shape[1:end-1])
-                Bslice = B[ntuple(i->(:), ndims(B)-1)..., 1]
-                @test B[maskfront,1] == Bslice[maskfront]
+                Bslicefront = B[slice..., 1]
+                @test B[maskfront, 1] == Bslicefront[maskfront]
                 @test size(B[maskfront, 1:1]) == (sum(maskfront), 1)
+                maskend = bitrand(shape[2:end])
+                Bsliceend = B[1, slice...]
+                @test B[1 ,maskend] == Bsliceend[maskend]
+                @test size(B[1:1, maskend]) == (1, sum(maskend))
             end
         end
     end
@@ -505,12 +569,24 @@ function test_primitives(::Type{T}, shape, ::Type{TestAbstractArray}) where T
     @test firstindex(B, 1) == firstindex(A, 1) == first(axes(B, 1))
     @test firstindex(B, 2) == firstindex(A, 2) == first(axes(B, 2))
 
-    # isassigned(a::AbstractArray, i::Int...)
+    @test !isassigned(B)
+    # isassigned(a::AbstractArray, i::Integer...)
     j = rand(1:length(B))
     @test isassigned(B, j)
     if T == T24Linear
         @test !isassigned(B, length(B) + 1)
     end
+    # isassigned(a::AbstractArray, i::CartesianIndex)
+    @test isassigned(B, first(CartesianIndices(B)))
+    ind = last(CartesianIndices(B))
+    @test !isassigned(B, ind + oneunit(ind))
+    # isassigned(a::AbstractArray, i::Union{Integer,CartesianIndex}...)
+    @test isassigned(B, Int16.(first.(axes(B)))..., CartesianIndex(1,1))
+    # Bool isn't a valid index
+    @test_throws ArgumentError isassigned(B, Bool.(first.(axes(B)))..., CartesianIndex(1,1))
+    @test_throws ArgumentError isassigned(B, Bool.(first.(axes(B)))...)
+    @test_throws ArgumentError isassigned(B, true)
+    @test_throws ArgumentError isassigned(B, false)
 
     # reshape(a::AbstractArray, dims::Dims)
     @test_throws DimensionMismatch reshape(B, (0, 1))
@@ -683,8 +759,8 @@ function test_cat(::Type{TestAbstractArray})
     @test hcat() == Any[]
     @test vcat(1, 1.0, 3, 3.0) == [1.0, 1.0, 3.0, 3.0]
     @test hcat(1, 1.0, 3, 3.0) == [1.0 1.0 3.0 3.0]
-    @test_throws ArgumentError hcat(B1, B2)
-    @test_throws ArgumentError vcat(C1, C2)
+    @test_throws DimensionMismatch hcat(B1, B2)
+    @test_throws DimensionMismatch vcat(C1, C2)
 
     @test vcat(B) == B
     @test hcat(B) == B
@@ -713,9 +789,9 @@ function test_cat(::Type{TestAbstractArray})
     end
 
     @test_throws ArgumentError hvcat(7, 1:20...)
-    @test_throws ArgumentError hvcat((2), C1, C3)
-    @test_throws ArgumentError hvcat((1), C1, C2)
-    @test_throws ArgumentError hvcat((1), C2, C3)
+    @test_throws DimensionMismatch hvcat((2), C1, C3)
+    @test_throws DimensionMismatch hvcat((1), C1, C2)
+    @test_throws DimensionMismatch hvcat((1), C2, C3)
 
     tup = tuple(rand(1:10, i)...)
     @test hvcat(tup) == []
@@ -724,8 +800,8 @@ function test_cat(::Type{TestAbstractArray})
     @test_throws ArgumentError hvcat((2, 2), 1, 2, 3, 4, 5)
     @test_throws ArgumentError Base.typed_hvcat(Int, (2, 2), 1, 2, 3, 4, 5)
     # check for # of columns mismatch b/w rows
-    @test_throws ArgumentError hvcat((3, 2), 1, 2, 3, 4, 5, 6)
-    @test_throws ArgumentError Base.typed_hvcat(Int, (3, 2), 1, 2, 3, 4, 5, 6)
+    @test_throws DimensionMismatch hvcat((3, 2), 1, 2, 3, 4, 5, 6)
+    @test_throws DimensionMismatch Base.typed_hvcat(Int, (3, 2), 1, 2, 3, 4, 5, 6)
 
     # 18395
     @test isa(Any["a" 5; 2//3 1.0][2,1], Rational{Int})
@@ -755,6 +831,9 @@ function test_cat(::Type{TestAbstractArray})
     r = rand(Float32, 56, 56, 64, 1);
     f(r) = cat(r, r, dims=(3,))
     @inferred f(r);
+
+    #58866 - ensure proper dimension calculation for 0-dimension elements
+    @test [zeros(1, 0) zeros(1,0); zeros(0,0) zeros(0, 0)] == Matrix{Float64}(undef, 1, 0)
 end
 
 function test_ind2sub(::Type{TestAbstractArray})
@@ -782,9 +861,8 @@ Base.getindex(A::TSlowNIndexes{T,2}, i::Int, j::Int) where {T} = A.data[i,j]
     @test isa(map(Set, Array[[1,2],[3,4]]), Vector{Set{Int}})
 end
 
-@testset "mapping over scalars and empty arguments:" begin
+@testset "mapping over scalars" begin
     @test map(sin, 1) === sin(1)
-    @test map(()->1234) === 1234
 end
 
 function test_UInt_indexing(::Type{TestAbstractArray})
@@ -833,7 +911,27 @@ test_ind2sub(TestAbstractArray)
 
 include("generic_map_tests.jl")
 generic_map_tests(map, map!)
-@test_throws ArgumentError map!(-, [1])
+@test map!(-, [1]) == [-1]
+
+@testset "#30624" begin
+    ### unstructured
+    @test map!(+, ones(3), ones(3), ones(3), [1]) == [3, 1, 1]
+    @test map!(+, ones(3), [1], ones(3), ones(3)) == [3, 1, 1]
+    @test map!(+, [1], [1], [], []) == [1]
+    @test map!(+, [[1]], [1], [], []) == [[1]]
+
+    # TODO: decide if input axes & lengths should be validated
+    # @test_throws BoundsError map!(+, ones(1), ones(2))
+    # @test_throws BoundsError map!(+, ones(1), ones(2, 2))
+
+    @test map!(+, ones(3), view(ones(2, 3), 1:2, 2:3), ones(3)) == [2, 2, 2]
+    @test map!(+, ones(3), ones(2, 2), ones(3)) == [2, 2, 2]
+
+    ### structured (all mapped arguments are <:AbstractArray equal ndims > 1)
+    @test map!(+, ones(4), ones(2, 2), ones(2, 2)) == [2, 2, 2, 2]
+    @test map!(+, ones(4), ones(2, 2), ones(1, 2)) == [2, 2, 1, 1]
+    # @test_throws BoundsError map!(+, ones(3), ones(2, 2), ones(2, 2))
+end
 
 test_UInt_indexing(TestAbstractArray)
 test_13315(TestAbstractArray)
@@ -982,6 +1080,16 @@ end
     @test isempty(v)
     @test isempty(v2::Vector{Int})
     @test isempty(v3::Vector{Float64})
+
+    S = StructArrays.StructArray{Complex{Int}}((v, v))
+    for T in (Complex{Int}, ComplexF64)
+        S0 = empty(S, T)
+        @test S0 isa StructArrays.StructArray{T}
+        @test length(S0) == 0
+    end
+    S0 = empty(S, String)
+    @test S0 isa Vector{String}
+    @test length(S0) == 0
 end
 
 @testset "CartesianIndices" begin
@@ -1062,6 +1170,7 @@ end
 @testset "IndexStyle for various types" begin
     @test Base.IndexStyle(UpperTriangular) == IndexCartesian() # subtype of AbstractArray, not of Array
     @test Base.IndexStyle(Vector) == IndexLinear()
+    @test Base.IndexStyle(Memory) == IndexLinear()
     @test Base.IndexStyle(UnitRange) == IndexLinear()
     @test Base.IndexStyle(UpperTriangular(rand(3, 3)), [1; 2; 3]) == IndexCartesian()
     @test Base.IndexStyle(UpperTriangular(rand(3, 3)), rand(3, 3), [1; 2; 3]) == IndexCartesian()
@@ -1091,23 +1200,23 @@ end
 @testset "sizeof" begin
     let arrUInt8 = zeros(UInt8, 10)
         @test sizeof(arrUInt8) == 10
-        @test Core.sizeof(arrUInt8) == 10
+        @test Core.sizeof(arrUInt8) == 3 * sizeof(Int)
     end
 
     let arrUInt32 = zeros(UInt32, 10)
         @test sizeof(arrUInt32) == 40
-        @test Core.sizeof(arrUInt32) == 40
+        @test Core.sizeof(arrUInt32) == 3 * sizeof(Int)
     end
 
     let arrFloat64 = zeros(Float64, 10, 10)
         @test sizeof(arrFloat64) == 800
-        @test Core.sizeof(arrFloat64) == 800
+        @test Core.sizeof(arrFloat64) == 4 * sizeof(Int)
     end
 
     # Test union arrays (Issue #23321)
     let arrUnion = Union{Int64, Cvoid}[rand(Bool) ? k : nothing for k = 1:10]
         @test sizeof(arrUnion) == 80
-        @test Core.sizeof(arrUnion) == 80
+        @test Core.sizeof(arrUnion) == 3 * sizeof(Int)
     end
 
     # Test non-power of 2 types (Issue #35884)
@@ -1121,7 +1230,7 @@ end
     let arrayOfUInt48 = [a, b, c]
         f35884(x) = sizeof(x)
         @test f35884(arrayOfUInt48) == 24
-        @test Core.sizeof(arrayOfUInt48) == 24
+        @test Core.sizeof(arrayOfUInt48) == 3 * sizeof(Int)
     end
 end
 
@@ -1147,7 +1256,7 @@ function Base.getindex(S::Strider{<:Any,N}, I::Vararg{Int,N}) where {N}
 end
 Base.strides(S::Strider) = S.strides
 Base.elsize(::Type{<:Strider{T}}) where {T} = Base.elsize(Vector{T})
-Base.unsafe_convert(::Type{Ptr{T}}, S::Strider{T}) where {T} = pointer(S.data, S.offset)
+Base.cconvert(::Type{Ptr{T}}, S::Strider{T}) where {T} = memoryref(S.data.ref, S.offset)
 
 @testset "Simple 3d strided views and permutes" for sz in ((5, 3, 2), (7, 11, 13))
     A = collect(reshape(1:prod(sz), sz))
@@ -1206,6 +1315,9 @@ Base.unsafe_convert(::Type{Ptr{T}}, S::Strider{T}) where {T} = pointer(S.data, S
             end
         end
     end
+    # constant propagation in the PermutedDimsArray constructor
+    X = @inferred (A -> PermutedDimsArray(A, (2,3,1)))(A)
+    @test @inferred((X -> PermutedDimsArray(X, (3,1,2)))(X)) == A
 end
 
 @testset "simple 2d strided views, permutes, transposes" for sz in ((5, 3), (7, 11))
@@ -1325,6 +1437,8 @@ end
 Base.push!(tpa::TestPushArray{T}, a::T) where T = push!(tpa.data, a)
 Base.pushfirst!(tpa::TestPushArray{T}, a::T) where T = pushfirst!(tpa.data, a)
 
+push_slightly_abstract_namedtuple(v::Vector{@NamedTuple{x::Int,y::Any}}, x::Int, @nospecialize(y)) = push!(v, (; x, y))
+
 @testset "push! and pushfirst!" begin
     a_orig = [1]
     tpa = TestPushArray{Int, 2}(a_orig)
@@ -1334,6 +1448,58 @@ Base.pushfirst!(tpa::TestPushArray{T}, a::T) where T = pushfirst!(tpa.data, a)
     tpa = TestPushArray{Int, 2}(a_orig)
     pushfirst!(tpa, 6, 5, 4, 3, 2)
     @test tpa.data == reverse(collect(1:6))
+
+    let src = code_typed1(push_slightly_abstract_namedtuple, (Vector{@NamedTuple{x::Int,y::Any}},Int,Any))
+        # After optimization, all `push!` and `convert` calls should have been inlined
+        @test all((x)->!iscall((src, push!))(x) && !iscall((src, convert))(x), src.code)
+    end
+end
+
+mutable struct SimpleArray{T} <: AbstractVector{T}
+    els::Vector{T}
+end
+Base.size(sa::SimpleArray) = size(sa.els)
+Base.getindex(sa::SimpleArray, idx...) = getindex(sa.els, idx...)
+Base.setindex!(sa::SimpleArray, v, idx...) = setindex!(sa.els, v, idx...)
+Base.resize!(sa::SimpleArray, n) = resize!(sa.els, n)
+Base.copy(sa::SimpleArray) = SimpleArray(copy(sa.els))
+
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
+@testset "Failing `$f` should not grow the array $a" for
+        f in (push!, append!, pushfirst!, prepend!),
+        a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1))
+    for args in ((1,), (1,2), ([1], [2]), [1])
+        orig = copy(a)
+        @test_throws Exception f(a, args...)
+        @test a == orig
+    end
+end
+
+@testset "Check push!($a, $args...)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1)),
+    args in (("eenie",), ("eenie", "minie"), ("eenie", "minie", "mo"))
+        orig = copy(a)
+        push!(a, args...)
+        @test length(a) == length(orig) + length(args)
+        @test a[axes(orig,1)] == orig
+        @test all(a[end-length(args)+1:end] .== args)
+end
+
+@testset "Check append!($a, $args)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1)),
+    args in (("eenie",), ("eenie", "minie"), ("eenie", "minie", "mo"))
+        orig = copy(a)
+        append!(a, args)
+        @test length(a) == length(orig) + length(args)
+        @test a[axes(orig,1)] == orig
+        @test all(a[end-length(args)+1:end] .== args)
+end
+
+@testset "Check sizehint!($a)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1))
+        @test sizehint!(a, 10) === a
 end
 
 @testset "splatting into hvcat" begin
@@ -1344,7 +1510,7 @@ end
 
     @test Int[t...; 3 4] == [1 2; 3 4]
     @test Int[0 t...; t... 0] == [0 1 2; 1 2 0]
-    @test_throws ArgumentError Int[t...; 3 4 5]
+    @test_throws DimensionMismatch Int[t...; 3 4 5]
 end
 
 @testset "issue #39896, modified getindex " begin
@@ -1398,15 +1564,15 @@ using Base: typed_hvncat
     @test [1;;] == fill(1, (1,1))
 
     for v in (1, fill(1), fill(1,1,1), fill(1, 1, 1, 1))
-        @test_throws ArgumentError [v; v;; v]
-        @test_throws ArgumentError [v; v;; v; v; v]
-        @test_throws ArgumentError [v; v; v;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v; v;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v; v]
+        @test_throws DimensionMismatch [v; v;; v]
+        @test_throws DimensionMismatch [v; v;; v; v; v]
+        @test_throws DimensionMismatch [v; v; v;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v;; v; v;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v; v;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v;; v; v; v]
         # ensure a wrong shape with the right number of elements doesn't pass through
-        @test_throws ArgumentError [v; v;; v; v;;; v; v; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v; v; v]
 
         @test [v; v;; v; v] == fill(1, ndims(v) == 3 ? (2, 2, 1) : (2,2))
         @test [v; v;; v; v;;;] == fill(1, 2, 2, 1)
@@ -1474,7 +1640,7 @@ using Base: typed_hvncat
     end
 
     # reject shapes that don't nest evenly between levels (e.g. 1 + 2 does not fit into 2)
-    @test_throws ArgumentError hvncat(((1, 2, 1), (2, 2), (4,)), true, [1 2], [3], [4], [1 2; 3 4])
+    @test_throws DimensionMismatch hvncat(((1, 2, 1), (2, 2), (4,)), true, [1 2], [3], [4], [1 2; 3 4])
 
     # zero-length arrays are handled appropriately
     @test [zeros(Int, 1, 2, 0) ;;; 1 3] == [1 3;;;]
@@ -1489,18 +1655,18 @@ using Base: typed_hvncat
     for v1 ∈ (zeros(Int, 0, 0), zeros(Int, 0, 0, 0, 0), zeros(Int, 0, 0, 0, 0, 0, 0, 0))
         for v2 ∈ (1, [1])
             for v3 ∈ (2, [2])
-                @test_throws ArgumentError [v1 ;;; v2]
-                @test_throws ArgumentError [v1 ;;; v2 v3]
-                @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+                @test_throws DimensionMismatch [v1 ;;; v2]
+                @test_throws DimensionMismatch [v1 ;;; v2 v3]
+                @test_throws DimensionMismatch [v1 v1 ;;; v2 v3]
             end
         end
     end
     v1 = zeros(Int, 0, 0, 0)
     for v2 ∈ (1, [1])
         for v3 ∈ (2, [2])
-            @test_throws ArgumentError [v1 ;;; v2 v3]
-            @test_throws ArgumentError [v1 ;;; v2]
-            @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+            @test_throws DimensionMismatch [v1 ;;; v2 v3]
+            @test_throws DimensionMismatch [v1 ;;; v2]
+            @test_throws DimensionMismatch [v1 v1 ;;; v2 v3]
         end
     end
 
@@ -1568,8 +1734,8 @@ using Base: typed_hvncat
     @test Array{Int, 3}(undef, 0, 0, 0) == typed_hvncat(Int, 3) isa Array{Int, 3}
 
     # Issue 43933 - semicolon precedence mistake should produce an error
-    @test_throws ArgumentError [[1 1]; 2 ;; 3 ; [3 4]]
-    @test_throws ArgumentError [[1 ;;; 1]; 2 ;;; 3 ; [3 ;;; 4]]
+    @test_throws DimensionMismatch [[1 1]; 2 ;; 3 ; [3 4]]
+    @test_throws DimensionMismatch [[1 ;;; 1]; 2 ;;; 3 ; [3 ;;; 4]]
 
     @test [[1 2; 3 4] [5; 6]; [7 8] 9;;;] == [1 2 5; 3 4 6; 7 8 9;;;]
 
@@ -1580,6 +1746,9 @@ using Base: typed_hvncat
     @test ["A";;"B";;"C";;"D"] == ["A" "B" "C" "D"]
     @test ["A";"B";;"C";"D"] == ["A" "C"; "B" "D"]
     @test [["A";"B"];;"C";"D"] == ["A" "C"; "B" "D"]
+
+    #58866 - ensure proper dimension calculation for 0-dimension elements
+    @test [zeros(1, 0) zeros(1,0);;; zeros(0,0) zeros(0, 0)] == Array{Float64, 3}(undef, 1, 0, 0)
 end
 
 @testset "stack" begin
@@ -1684,6 +1853,9 @@ end
     @test_throws ArgumentError stack([1:3, 4:6]; dims=3)
     @test_throws ArgumentError stack(abs2, 1:3; dims=2)
 
+    @test stack(["hello", "world"]) isa Matrix{Char}
+    @test_throws DimensionMismatch stack(["hello", "world!"])  # had a bug in error printing
+
     # Empty
     @test_throws ArgumentError stack(())
     @test_throws ArgumentError stack([])
@@ -1708,6 +1880,33 @@ end
     end
 end
 
+@testset "issue 56771, stack(; dims) on containers with HasLength eltype & HasShape elements" begin
+    for T in (Matrix, Array, Any)
+        xs = T[rand(2,3) for _ in 1:4]
+        @test size(stack(xs; dims=1)) == (4,2,3)
+        @test size(stack(xs; dims=2)) == (2,4,3)  # this was the problem case, for T=Array
+        @test size(stack(xs; dims=3)) == (2,3,4)
+        @test size(stack(identity, xs; dims=2)) == (2,4,3)
+        @test size(stack(x for x in xs if true; dims=2)) == (2,4,3)
+
+        xmat = T[rand(2,3) for _ in 1:4, _ in 1:5]
+        @test size(stack(xmat; dims=1)) == (20,2,3)
+        @test size(stack(xmat; dims=2)) == (2,20,3)
+        @test size(stack(xmat; dims=3)) == (2,3,20)
+    end
+
+    it = Iterators.product(1:2, 3:5)
+    @test size(it) == (2,3)
+    @test Base.IteratorSize(typeof(it)) == Base.HasShape{2}()
+    @test Base.IteratorSize(Iterators.ProductIterator) == Base.HasLength()
+    for T in (typeof(it), Iterators.ProductIterator, Any)
+        ys = T[it for _ in 1:4]
+        @test size(stack(ys; dims=2)) == (2,4,3)
+        @test size(stack(identity, ys; dims=2)) == (2,4,3)
+        @test size(stack(y for y in ys if true; dims=2)) == (2,4,3)
+    end
+end
+
 @testset "keepat!" begin
     a = [1:6;]
     @test a === keepat!(a, 1:5)
@@ -1739,50 +1938,71 @@ end
 end
 
 module IRUtils
-    include("compiler/irutils.jl")
+    include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
+end
+
+function check_pointer_strides(A::AbstractArray)
+    # Make sure stride(A, i) is equivalent with strides(A)[i] (if 1 <= i <= ndims(A))
+    dims = ntuple(identity, ndims(A))
+    map(i -> stride(A, i), dims) == @inferred(strides(A)) || return false
+    # Test pointer via value check.
+    first(A) === Base.unsafe_load(pointer(A)) || return false
+    # Test strides via value check.
+    for i in eachindex(IndexLinear(), A)
+        A[i] === Base.unsafe_load(pointer(A, i)) || return false
+    end
+    return true
 end
 
-@testset "strides for ReshapedArray" begin
-    function check_strides(A::AbstractArray)
-        # Make sure stride(A, i) is equivalent with strides(A)[i] (if 1 <= i <= ndims(A))
-        dims = ntuple(identity, ndims(A))
-        map(i -> stride(A, i), dims) == @inferred(strides(A)) || return false
-        # Test strides via value check.
-        for i in eachindex(IndexLinear(), A)
-            A[i] === Base.unsafe_load(pointer(A, i)) || return false
+@testset "colonful `reshape`, #54245" begin
+    @test reshape([], (0, :)) isa Matrix
+    @test_throws DimensionMismatch reshape([7], (0, :))
+    let b = prevpow(2, typemax(Int))
+        @test iszero(b*b)
+        @test_throws ArgumentError reshape([7], (b, :, b))
+        @test reshape([], (b, :, b)) isa Array{<:Any, 3}
+    end
+    for iterator ∈ (7:6, 7:7, 7:8)
+        for it ∈ (iterator, map(BigInt, iterator))
+            @test reshape(it, (:, Int(length(it)))) isa AbstractMatrix
+            @test reshape(it, (Int(length(it)), :)) isa AbstractMatrix
+            @test reshape(it, (1, :))               isa AbstractMatrix
+            @test reshape(it, (:, 1))               isa AbstractMatrix
         end
-        return true
     end
+end
+
+@testset "strides for ReshapedArray" begin
     # Type-based contiguous Check
     a = vec(reinterpret(reshape, Int16, reshape(view(reinterpret(Int32, randn(10)), 2:11), 5, :)))
     f(a) = only(strides(a));
     @test IRUtils.fully_eliminated(f, Base.typesof(a)) && f(a) == 1
     # General contiguous check
     a = view(rand(10,10), 1:10, 1:10)
-    @test check_strides(vec(a))
+    @test check_pointer_strides(vec(a))
     b = view(parent(a), 1:9, 1:10)
     @test_throws "Input is not strided." strides(vec(b))
     # StridedVector parent
     for n in 1:3
         a = view(collect(1:60n), 1:n:60n)
-        @test check_strides(reshape(a, 3, 4, 5))
-        @test check_strides(reshape(a, 5, 6, 2))
+        @test check_pointer_strides(reshape(a, 3, 4, 5))
+        @test check_pointer_strides(reshape(a, 5, 6, 2))
         b = view(parent(a), 60n:-n:1)
-        @test check_strides(reshape(b, 3, 4, 5))
-        @test check_strides(reshape(b, 5, 6, 2))
+        @test check_pointer_strides(reshape(b, 3, 4, 5))
+        @test check_pointer_strides(reshape(b, 5, 6, 2))
     end
     # StridedVector like parent
     a = randn(10, 10, 10)
     b = view(a, 1:10, 1:1, 5:5)
-    @test check_strides(reshape(b, 2, 5))
+    @test check_pointer_strides(reshape(b, 2, 5))
     # Other StridedArray parent
     a = view(randn(10,10), 1:9, 1:10)
-    @test check_strides(reshape(a,3,3,2,5))
-    @test check_strides(reshape(a,3,3,5,2))
-    @test check_strides(reshape(a,9,5,2))
-    @test check_strides(reshape(a,3,3,10))
-    @test check_strides(reshape(a,1,3,1,3,1,5,1,2))
-    @test check_strides(reshape(a,3,3,5,1,1,2,1,1))
+    @test check_pointer_strides(reshape(a,3,3,2,5))
+    @test check_pointer_strides(reshape(a,3,3,5,2))
+    @test check_pointer_strides(reshape(a,9,5,2))
+    @test check_pointer_strides(reshape(a,3,3,10))
+    @test check_pointer_strides(reshape(a,1,3,1,3,1,5,1,2))
+    @test check_pointer_strides(reshape(a,3,3,5,1,1,2,1,1))
     @test_throws "Input is not strided." strides(reshape(a,3,6,5))
     @test_throws "Input is not strided." strides(reshape(a,3,2,3,5))
     @test_throws "Input is not strided." strides(reshape(a,3,5,3,2))
@@ -1795,7 +2015,14 @@ end
     @test @inferred(strides(a)) == (1, 1, 1)
     # Dense parent (but not StridedArray)
     A = reinterpret(Int8, reinterpret(reshape, Int16, rand(Int8, 2, 3, 3)))
-    @test check_strides(reshape(A, 3, 2, 3))
+    @test check_pointer_strides(reshape(A, 3, 2, 3))
+end
+
+@testset "pointer for SubArray with none-dense parent." begin
+    a = view(Matrix(reshape(0x01:0xc8, 20, :)), 1:2:20, :)
+    b = reshape(a, 20, :)
+    @test check_pointer_strides(view(b, 2:11, 1:5))
+    @test check_pointer_strides(view(b, reshape(2:11, 2, :), 1:5))
 end
 
 @testset "stride for 0 dims array #44087" begin
@@ -1825,13 +2052,17 @@ end
 
 @testset "type-based offset axes check" begin
     a = randn(ComplexF64, 10)
+    b = randn(ComplexF64, 4, 4, 4, 4)
     ta = reinterpret(Float64, a)
     tb = reinterpret(Float64, view(a, 1:2:10))
     tc = reinterpret(Float64, reshape(view(a, 1:3:10), 2, 2, 1))
+    td = view(b, :, :, 1, 1)
     # Issue #44040
     @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(ta, tc))
     @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(tc, tc))
     @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(ta, tc, tb))
+    # Issue #49332
+    @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(td, td, td))
     # Ranges && CartesianIndices
     @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(1:10, Base.OneTo(10), 1.0:2.0, LinRange(1.0, 2.0, 2), 1:2:10, CartesianIndices((1:2:10, 1:2:10))))
     # Remind us to call `any` in `Base.has_offset_axes` once our compiler is ready.
@@ -1844,3 +2075,317 @@ end
 # type stable [x;;] (https://github.com/JuliaLang/julia/issues/45952)
 f45952(x) = [x;;]
 @inferred f45952(1.0)
+
+@testset "isassigned with a Bool index" begin
+    A = zeros(2,2)
+    @test_throws "invalid index: true of type Bool" isassigned(A, 1, true)
+    @test_throws "invalid index: true of type Bool" isassigned(A, true)
+end
+
+@testset "repeat for FillArrays" begin
+    f = FillArrays.Fill(3, (4,))
+    @test repeat(f, 2) === FillArrays.Fill(3, (8,))
+    @test repeat(f, 2, 3) === FillArrays.Fill(3, (8, 3))
+    @test repeat(f, inner=(1,2), outer=(3,1)) === repeat(f, 3, 2) === FillArrays.Fill(3, (12,2))
+    f = FillArrays.Fill(3, (4, 2))
+    @test repeat(f, 2, 3) === FillArrays.Fill(3, (8, 6))
+    @test repeat(f, 2, 3, 4) === FillArrays.Fill(3, (8, 6, 4))
+    @test repeat(f, inner=(1,2), outer=(3,1)) === FillArrays.Fill(3, (12, 4))
+end
+
+@testset "zero" begin
+    @test zero([1 2; 3 4]) isa Matrix{Int}
+    @test zero([1 2; 3 4]) == [0 0; 0 0]
+
+    @test zero([1.0]) isa Vector{Float64}
+    @test zero([1.0]) == [0.0]
+
+    @test zero([[2,2], [3,3,3]]) isa Vector{Vector{Int}}
+    @test zero([[2,2], [3,3,3]]) == [[0,0], [0, 0, 0]]
+
+
+    @test zero(Union{Float64, Missing}[missing]) == [0.0]
+    struct CustomNumber <: Number
+        val::Float64
+    end
+    Base.zero(::Type{CustomNumber}) = CustomNumber(0.0)
+    @test zero([CustomNumber(5.0)]) == [CustomNumber(0.0)]
+    @test zero(Union{CustomNumber, Missing}[missing]) == [CustomNumber(0.0)]
+    @test zero(Vector{Union{CustomNumber, Missing}}(undef, 1)) == [CustomNumber(0.0)]
+end
+
+@testset "`_prechecked_iterate` optimization" begin
+    function test_prechecked_iterate(iter)
+        Js = Base._prechecked_iterate(iter)
+        for I in iter
+            J, s = Js::NTuple{2,Any}
+            @test J === I
+            Js = Base._prechecked_iterate(iter, s)
+        end
+    end
+    test_prechecked_iterate(1:10)
+    test_prechecked_iterate(Base.OneTo(10))
+    test_prechecked_iterate(CartesianIndices((3, 3)))
+    test_prechecked_iterate(CartesianIndices(()))
+    test_prechecked_iterate(LinearIndices((3, 3)))
+    test_prechecked_iterate(LinearIndices(()))
+    test_prechecked_iterate(Base.SCartesianIndices2{3}(1:3))
+end
+
+@testset "IndexStyles in copyto!" begin
+    A = rand(3,2)
+    B = zeros(size(A))
+    colons = ntuple(_->:, ndims(B))
+    # Ensure that the AbstractArray methods are hit
+    # by using views instead of Arrays
+    @testset "IndexLinear - IndexLinear" begin
+        B .= 0
+        copyto!(view(B, colons...), A)
+        @test B == A
+    end
+    @testset "IndexLinear - IndexCartesian" begin
+        B .= 0
+        copyto!(view(B, colons...), view(A, axes(A)...))
+        @test B == A
+    end
+    @testset "IndexCartesian - IndexLinear" begin
+        B .= 0
+        copyto!(view(B, axes(B)...), A)
+        @test B == A
+    end
+    @testset "IndexCartesian - IndexCartesian" begin
+        B .= 0
+        copyto!(view(B, axes(B)...), view(A, axes(A)...))
+        @test B == A
+    end
+end
+
+@testset "reshape for offset arrays" begin
+    p = Base.IdentityUnitRange(3:4)
+    r = reshape(p, :, 1)
+    @test r[eachindex(r)] == UnitRange(p)
+    @test collect(r) == r
+
+    struct ZeroBasedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
+        a :: A
+        function ZeroBasedArray(a::AbstractArray)
+            Base.require_one_based_indexing(a)
+            new{eltype(a), ndims(a), typeof(a)}(a)
+        end
+    end
+    Base.parent(z::ZeroBasedArray) = z.a
+    Base.size(z::ZeroBasedArray) = size(parent(z))
+    Base.axes(z::ZeroBasedArray) = map(x -> Base.IdentityUnitRange(0:x - 1), size(parent(z)))
+    Base.getindex(z::ZeroBasedArray{<:Any, N}, i::Vararg{Int,N}) where {N} = parent(z)[map(x -> x + 1, i)...]
+    Base.setindex!(z::ZeroBasedArray{<:Any, N}, val, i::Vararg{Int,N}) where {N} = parent(z)[map(x -> x + 1, i)...] = val
+
+    z = ZeroBasedArray(collect(1:4))
+    r2 = reshape(z, :, 1)
+    @test r2[CartesianIndices(r2)] == r2[LinearIndices(r2)]
+    r2[firstindex(r2)] = 34
+    @test z[0] == 34
+    r2[eachindex(r2)] = r2 .* 2
+    for (i, j) in zip(eachindex(r2), eachindex(z))
+        @test r2[i] == z[j]
+    end
+end
+
+@testset "zero for arbitrary axes" begin
+    r = SizedArrays.SOneTo(2)
+    s = Base.OneTo(2)
+    _to_oneto(x::Integer) = Base.OneTo(2)
+    _to_oneto(x::Union{Base.OneTo, SizedArrays.SOneTo}) = x
+    for (f, v) in ((zeros, 0), (ones, 1), ((x...)->fill(3,x...),3))
+        for ax in ((r,r), (s, r), (2, r))
+            A = f(ax...)
+            @test axes(A) == map(_to_oneto, ax)
+            if all(x -> x isa SizedArrays.SOneTo, ax)
+                @test A isa SizedArrays.SizedArray && parent(A) isa Array
+            else
+                @test A isa Array
+            end
+            @test all(==(v), A)
+        end
+    end
+end
+
+@testset "one" begin
+    @test one([1 2; 3 4]) == [1 0; 0 1]
+    @test one([1 2; 3 4]) isa Matrix{Int}
+
+    struct Mat <: AbstractMatrix{Int}
+        p::Matrix{Int}
+    end
+    Base.size(m::Mat) = size(m.p)
+    Base.IndexStyle(::Type{<:Mat}) = IndexLinear()
+    Base.getindex(m::Mat, i::Int) = m.p[i]
+    Base.setindex!(m::Mat, v, i::Int) = m.p[i] = v
+    Base.similar(::Mat, ::Type{Int}, size::NTuple{2,Int}) = Mat(Matrix{Int}(undef, size))
+
+    @test one(Mat([1 2; 3 4])) == Mat([1 0; 0 1])
+    @test one(Mat([1 2; 3 4])) isa Mat
+
+    @testset "SizedArray" begin
+        S = [1 2; 3 4]
+        A = SizedArrays.SizedArray{(2,2)}(S)
+        @test one(A) == one(typeof(A))
+        @test oneunit(A) == oneunit(typeof(A))
+        M = fill(A, 2, 2)
+        O = one(M)
+        for I in CartesianIndices(M)
+            if I[1] == I[2]
+                @test O[I] == one(S)
+            else
+                @test O[I] == zero(S)
+            end
+        end
+    end
+end
+
+@testset "copyto! with non-AbstractArray src" begin
+    A = zeros(4)
+    x = (i for i in axes(A,1))
+    copyto!(A, 1, x, 1, length(A))
+    @test A == axes(A,1)
+    A .= 0
+    copyto!(A, 1, x, 1, 2)
+    @test A[1:2] == first(x,2)
+    @test iszero(A[3:end])
+    A .= 0
+    copyto!(A, 1, x, 1)
+    @test A == axes(A,1)
+end
+
+@testset "reshape with Integer sizes" begin
+    @test reshape(1:4, big(2), big(2)) == reshape(1:4, 2, 2)
+    a = [1 2 3; 4 5 6]
+    reshaped_arrays = (
+        reshape(a, 3, 2),
+        reshape(a, (3, 2)),
+        reshape(a, big(3), big(2)),
+        reshape(a, (big(3), big(2))),
+        reshape(a, :, big(2)),
+        reshape(a, (:, big(2))),
+        reshape(a, big(3), :),
+        reshape(a, (big(3), :)),
+    )
+    @test allequal(reshaped_arrays)
+    for b ∈ reshaped_arrays
+        @test b isa Matrix{Int}
+        @test b.ref === a.ref
+    end
+end
+@testset "AbstractArrayMath" begin
+    @testset "IsReal" begin
+        A = [1, 2, 3, 4]
+        @test isreal(A) == true
+        B = [1.1, 2.2, 3.3, 4.4]
+        @test isreal(B) == true
+        C = [1, 2.2, 3]
+        @test isreal(C) == true
+        D = Real[]
+        @test isreal(D) == true
+        E = [1 + 1im, 2 - 2im]
+        @test isreal(E) == false
+        struct MyReal <: Real
+            value::Float64
+        end
+        F = [MyReal(1.0), MyReal(2.0)]
+        @test isreal(F) == true
+        G = ["a", "b", "c"]
+        @test_throws MethodError isreal(G)
+    end
+end
+
+@testset "similar/reshape for AbstractOneTo" begin
+    A = [1,2]
+    @testset "reshape" begin
+        @test reshape(A, 2, SizedArrays.SOneTo(1)) == reshape(A, 2, 1)
+        @test reshape(A, Base.OneTo(2), SizedArrays.SOneTo(1)) == reshape(A, 2, 1)
+        @test reshape(A, SizedArrays.SOneTo(1), 2) == reshape(A, 1, 2)
+        @test reshape(A, SizedArrays.SOneTo(1), Base.OneTo(2)) == reshape(A, 1, 2)
+    end
+    @testset "similar" begin
+        b = similar(A, SizedArrays.SOneTo(1), big(2))
+        @test b isa Array{Int, 2}
+        @test size(b) == (1, 2)
+        b = similar(A, SizedArrays.SOneTo(1), Base.OneTo(2))
+        @test b isa Array{Int, 2}
+        @test size(b) == (1, 2)
+        b = similar(A, SizedArrays.SOneTo(1), 2, Base.OneTo(2))
+        @test b isa Array{Int, 3}
+        @test size(b) == (1, 2, 2)
+
+        @test_throws "no method matching $Int(::$Infinity)" similar(ones(2), OneToInf())
+    end
+end
+
+@testset "AbstractOneTo" begin
+    s = SizedArrays.SizedArray{(2,2)}(ones(2,2))
+    v = view(s, :, 1)
+    @test axes(v,1) isa SizedArrays.SOneTo{2}
+    @test eachindex(v) isa SizedArrays.SOneTo{2}
+
+    ax = axes(v,1)
+    @test ax[Base.IdentityUnitRange(ax)] == ax
+    @test ax[Base.IdentityUnitRange(2:2)] == Base.IdentityUnitRange(2:2)
+
+    # check that IdentityUnitRange behaves like Slice
+    @test axes(Base.IdentityUnitRange(ax), 1) === ax
+    @test eachindex(Base.IdentityUnitRange(ax)) === ax
+end
+
+@testset "effect inference for `iterate` for `Array` and for `Memory`" begin
+    for El ∈ (Float32, Real, Any)
+        for Arr ∈ (Memory{El}, Array{El, 0}, Vector{El}, Matrix{El}, Array{El, 3})
+            effects = Base.infer_effects(iterate, Tuple{Arr, Int})
+            @test Base.Compiler.is_effect_free(effects)
+            @test Base.Compiler.is_terminates(effects)
+            @test Base.Compiler.is_notaskstate(effects)
+            @test Base.Compiler.is_noub(effects)
+            @test Base.Compiler.is_nonoverlayed(effects)
+            @test Base.Compiler.is_nortcall(effects)
+        end
+    end
+end
+
+@testset "iterate for linear indexing" begin
+    A = [1 2; 3 4]
+    v = view(A, :)
+    @test sum(x for x in v) == sum(A)
+    v = view(A, 1:2:lastindex(A))
+    @test sum(x for x in v) == sum(A[1:2:end])
+    v2 = view(A, Base.IdentityUnitRange(1:length(A)))
+    @test sum(x for x in v2) == sum(A)
+end
+
+@testset "self referential" begin
+    v = Any[1,2,3]
+    v[1] = v
+    io = IOBuffer()
+    show(io, v)
+    @test String(take!(io)) == "Any[Any[#= circular reference @-1 =#], 2, 3]"
+
+    m1 = Any[1 2; 3 4]
+    m1[1] = m1
+    show(io, m1)
+    @test String(take!(io)) == "Any[#= circular reference @-1 =# 2; 3 4]"
+
+    m2 = Any[1; 2;; 3; 4;;; 5; 6;; 7; 8]
+    m2[1] = m2
+    show(io, m2)
+    @test String(take!(io)) == "Any[#= circular reference @-1 =# 3; 2 4;;; 5 7; 6 8]"
+end
+
+@testset "size promotion in addition/subtraction" begin
+    for A in Any[ones(), ones(1), ones(1,1,1)]
+        @test +(A) == A
+        for B in Any[ones(), ones(1), ones(1,1,1)]
+            sz = ndims(A) > ndims(B) ? size(A) : size(B)
+            @test A + B == fill(2.0,sz)
+            @test A - B == zeros(sz)
+            @test A + B + zeros() == A + B
+            @test A - B - zeros() == A - B
+        end
+    end
+end
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index 5056fc626e84a..a7d9adb9ce054 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -19,7 +19,7 @@ include("testenv.jl")
 
 @test length(methods(ambig, (Int, Int))) == 1
 @test length(methods(ambig, (UInt8, Int))) == 0
-@test length(Base.methods_including_ambiguous(ambig, (UInt8, Int))) == 3
+@test length(Base.methods_including_ambiguous(ambig, (UInt8, Int))) == 2
 
 @test ambig("hi", "there") == 1
 @test ambig(3.1, 3.2) == 5
@@ -42,7 +42,6 @@ let err = try
     errstr = String(take!(io))
     @test occursin("  ambig(x, y::Integer)\n    @ $curmod_str", errstr)
     @test occursin("  ambig(x::Integer, y)\n    @ $curmod_str", errstr)
-    @test occursin("  ambig(x::Number, y)\n    @ $curmod_str", errstr)
     @test occursin("Possible fix, define\n  ambig(::Integer, ::Integer)", errstr)
 end
 
@@ -75,7 +74,7 @@ let io = IOBuffer()
     cf = @eval @cfunction(ambig, Int, (UInt8, Int))  # test for a crash (doesn't throw an error)
     @test_throws(MethodError(ambig, (UInt8(1), Int(2)), get_world_counter()),
                  ccall(cf, Int, (UInt8, Int), 1, 2))
-    @test_throws(ErrorException("no unique matching method found for the specified argument types"),
+    @test_throws("Calling invoke(f, t, args...) would throw:\nMethodError: no method matching ambig",
                  which(ambig, (UInt8, Int)))
     @test length(code_typed(ambig, (UInt8, Int))) == 0
 end
@@ -97,10 +96,7 @@ ambig(x::Union{Char, Int16}) = 's'
 
 # Automatic detection of ambiguities
 
-const allowed_undefineds = Set([
-    GlobalRef(Base, :active_repl),
-    GlobalRef(Base, :active_repl_backend),
-])
+const allowed_undefineds = Set([GlobalRef(Base, :active_repl)])
 
 let Distributed = get(Base.loaded_modules,
                       Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed"),
@@ -163,7 +159,19 @@ ambig(::Signed, ::Int) = 3
 ambig(::Int, ::Signed) = 4
 end
 ambs = detect_ambiguities(Ambig48312)
-@test length(ambs) == 4
+@test length(ambs) == 1 # only ambiguous over (Int, Int), which is 3 or 4
+
+module UnboundAmbig55868
+    module B
+        struct C end
+        export C
+        Base.@deprecate_binding D C
+    end
+    using .B
+    export C, D
+end
+@test isempty(detect_unbound_args(UnboundAmbig55868))
+@test isempty(detect_ambiguities(UnboundAmbig55868))
 
 # Test that Core and Base are free of ambiguities
 # not using isempty so this prints more information when it fails
@@ -179,8 +187,7 @@ ambs = detect_ambiguities(Ambig48312)
 
     # some ambiguities involving Union{} type parameters may be expected, but not required
     let ambig = Set(detect_ambiguities(Core; recursive=true, ambiguous_bottom=true))
-        @test !isempty(ambig)
-        @test length(ambig) < 30
+        @test isempty(ambig)
     end
 
     STDLIB_DIR = Sys.STDLIB
@@ -279,7 +286,7 @@ end
 @test isempty(methods(Ambig8.f, (Int,)))
 @test isempty(methods(Ambig8.g, (Int,)))
 for f in (Ambig8.f, Ambig8.g)
-    @test length(methods(f, (Integer,))) == 2 # 1 is also acceptable
+    @test length(methods(f, (Integer,))) == 2 # 3 is also acceptable
     @test length(methods(f, (Signed,))) == 1 # 2 is also acceptable
     @test length(Base.methods_including_ambiguous(f, (Signed,))) == 2
     @test f(0x00) == 1
@@ -288,6 +295,30 @@ for f in (Ambig8.f, Ambig8.g)
     @test f(Int8(0)) == 4
     @test_throws MethodError f(0)
     @test_throws MethodError f(pi)
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, 10, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 2
+        @test ambig[] == 1
+    end
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, -1, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 2
+        @test ambig[] == 1
+    end
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, 10, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 3
+        @test ambig[] == 1
+    end
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, -1, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 3
+        @test ambig[] == 1
+    end
 end
 
 module Ambig9
@@ -308,32 +339,22 @@ end
 @test length(detect_unbound_args(M25341; recursive=true)) == 1
 
 # Test that Core and Base are free of UndefVarErrors
-# not using isempty so this prints more information when it fails
 @testset "detect_unbound_args in Base and Core" begin
     # TODO: review this list and remove everything between test_broken and test
     let need_to_handle_undef_sparam =
             Set{Method}(detect_unbound_args(Core; recursive=true))
-        pop!(need_to_handle_undef_sparam, which(Core.Compiler.eltype, Tuple{Type{Tuple{Any}}}))
-        @test_broken need_to_handle_undef_sparam == Set()
-        pop!(need_to_handle_undef_sparam, which(Core.Compiler._cat, Tuple{Any, AbstractArray}))
-        pop!(need_to_handle_undef_sparam, first(methods(Core.Compiler.same_names)))
-        @test need_to_handle_undef_sparam == Set()
+        @test isempty(need_to_handle_undef_sparam)
     end
     let need_to_handle_undef_sparam =
             Set{Method}(detect_unbound_args(Base; recursive=true, allowed_undefineds))
         pop!(need_to_handle_undef_sparam, which(Base._totuple, (Type{Tuple{Vararg{E}}} where E, Any, Any)))
-        pop!(need_to_handle_undef_sparam, which(Base.eltype, Tuple{Type{Tuple{Any}}}))
+        pop!(need_to_handle_undef_sparam, which(Base._eltype_ntuple, Tuple{Type{Tuple{Any}}}))
         pop!(need_to_handle_undef_sparam, first(methods(Base.same_names)))
-        @test_broken need_to_handle_undef_sparam == Set()
+        @test_broken isempty(need_to_handle_undef_sparam)
         pop!(need_to_handle_undef_sparam, which(Base._cat, Tuple{Any, AbstractArray}))
         pop!(need_to_handle_undef_sparam, which(Base.byteenv, (Union{AbstractArray{Pair{T,V}, 1}, Tuple{Vararg{Pair{T,V}}}} where {T<:AbstractString,V},)))
         pop!(need_to_handle_undef_sparam, which(Base.float, Tuple{AbstractArray{Union{Missing, T},N} where {T, N}}))
-        pop!(need_to_handle_undef_sparam, which(Base.float, Tuple{Type{Union{Missing, T}} where T}))
-        pop!(need_to_handle_undef_sparam, which(Base.complex, Tuple{Type{Union{Missing, T}} where T}))
-        pop!(need_to_handle_undef_sparam, which(Base.zero, Tuple{Type{Union{Missing, T}} where T}))
-        pop!(need_to_handle_undef_sparam, which(Base.one, Tuple{Type{Union{Missing, T}} where T}))
-        pop!(need_to_handle_undef_sparam, which(Base.oneunit, Tuple{Type{Union{Missing, T}} where T}))
-        @test need_to_handle_undef_sparam == Set()
+        @test isempty(need_to_handle_undef_sparam)
     end
 end
 
@@ -386,7 +407,7 @@ let has_ambig = Ref(Int32(0))
     ms = Base._methods_by_ftype(Tuple{typeof(fnoambig), Any, Any}, nothing, 4, Base.get_world_counter(), false, Ref(typemin(UInt)), Ref(typemax(UInt)), has_ambig)
     @test ms isa Vector
     @test length(ms) == 4
-    @test has_ambig[] == 0
+    @test has_ambig[] == 1 # 0 is better, but expensive and probably unnecessary to compute
 end
 
 # issue #11407
@@ -427,4 +448,43 @@ cc46601(::Type{T}, x::Int) where {T<:AbstractString} = 7
 @test length(methods(cc46601, Tuple{Type{<:Integer}, Integer})) == 2
 @test length(Base.methods_including_ambiguous(cc46601, Tuple{Type{<:Integer}, Integer})) == 7
 
+# Issue #55231
+struct U55231{P} end
+struct V55231{P} end
+U55231(::V55231) = nothing
+(::Type{T})(::V55231) where {T<:U55231} = nothing
+@test length(methods(U55231)) == 1
+U55231(a, b) = nothing
+@test length(methods(U55231)) == 2
+struct S55231{P} end
+struct T55231{P} end
+(::Type{T})(::T55231) where {T<:S55231} = nothing
+S55231(::T55231) = nothing
+@test length(methods(S55231)) == 1
+S55231(a, b) = nothing
+@test length(methods(S55231)) == 2
+
+ambig10() = 1
+ambig10(a::Vararg{Any}) = 2
+ambig10(a::Vararg{Union{Int32,Int64}}) = 6
+ambig10(a::Vararg{Matrix}) = 4
+ambig10(a::Vararg{Number}) = 7
+ambig10(a::Vararg{N}) where {N<:Number} = 5
+let ambig = Ref{Int32}(0)
+    ms = Base._methods_by_ftype(Tuple{typeof(ambig10), Vararg}, nothing, -1, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    @test ms isa Vector
+    @test length(ms) == 6
+    @test_broken ambig[] == 0
+end
+let ambig = Ref{Int32}(0)
+    ms = Base._methods_by_ftype(Tuple{typeof(ambig10), Vararg{Number}}, nothing, -1, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    @test ms isa Vector
+    @test length(ms) == 4
+    @test_broken ambig[] == 0
+    @test ms[1].method === which(ambig10, ())
+    @test ms[2].method === which(ambig10, (Vararg{Union{Int32, Int64}},))
+    @test ms[3].method === which(ambig10, Tuple{Vararg{N}} where N<:Number,)
+    @test ms[4].method === which(ambig10, (Vararg{Number},))
+end
+
 nothing
diff --git a/test/arrayops.jl b/test/arrayops.jl
index 770cec3705038..6e549b641f63b 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -100,6 +100,18 @@ using Dates
     @test Array{eltype(a)}(a) !== a
     @test Vector(a) !== a
 end
+@testset "effect inference for `reshape` for `Array`" begin
+    for Arr ∈ (Array{<:Any, 0}, Vector, Matrix, Array{<:Any, 3})
+        for Shape ∈ (Tuple{Int}, Tuple{Int, Int})
+            effects = Base.infer_effects(reshape, Tuple{Arr{Float32}, Shape})
+            @test Base.Compiler.is_effect_free(effects)
+            @test Base.Compiler.is_terminates(effects)
+            @test Base.Compiler.is_notaskstate(effects)
+            @test Base.Compiler.is_noub(effects)
+            @test Base.Compiler.is_nortcall(effects)
+        end
+    end
+end
 @testset "reshaping SubArrays" begin
     a = Array(reshape(1:5, 1, 5))
     @testset "linearfast" begin
@@ -115,7 +127,10 @@ end
         @test convert(Array{Int,1}, r) == [2,3,4]
         @test_throws MethodError convert(Array{Int,2}, r)
         @test convert(Array{Int}, r) == [2,3,4]
-        @test Base.unsafe_convert(Ptr{Int}, r) == Base.unsafe_convert(Ptr{Int}, s)
+        let rc = Base.cconvert(Ptr{Int}, r), rs = Base.cconvert(Ptr{Int}, s)
+            @test rc == rs
+            @test Base.unsafe_convert(Ptr{Int}, rc) == Base.unsafe_convert(Ptr{Int}, rs)
+        end
         @test isa(r, StridedArray)  # issue #22411
     end
     @testset "linearslow" begin
@@ -131,6 +146,7 @@ end
         @test convert(Array{Int,1}, r) == [2,3,5]
         @test_throws MethodError convert(Array{Int,2}, r)
         @test convert(Array{Int}, r) == [2,3,5]
+        # @test_throws ErrorException Base.cconvert(Ptr{Int}, r) broken=true
         @test_throws ErrorException Base.unsafe_convert(Ptr{Int}, r)
         r[2] = -1
         @test a[3] == -1
@@ -304,6 +320,47 @@ end
     @test_throws ArgumentError dropdims(a, dims=4)
     @test_throws ArgumentError dropdims(a, dims=6)
 
+    h1 = HeterogeneousAxisArray(rand(4, 1))
+    h2 = HeterogeneousAxisArray(rand(1, 4))
+    @test size(dropdims(h1, dims=2)) == (4,)
+    @test size(dropdims(h2, dims=1)) == (4,)
+
+    @testset "insertdims" begin
+        a = rand(8, 7)
+        @test @inferred(insertdims(a, dims=1)) == @inferred(insertdims(a, dims=(1,))) == reshape(a, (1, 8, 7))
+        @test @inferred(insertdims(a, dims=3))  == @inferred(insertdims(a, dims=(3,))) == reshape(a, (8, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 3)))  == reshape(a, (1, 8, 1, 7))
+        @test @inferred(insertdims(a, dims=(1, 2, 3)))  == reshape(a, (1, 1, 1, 8, 7))
+        @test @inferred(insertdims(a, dims=(1, 4)))  == reshape(a, (1, 8, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 3, 5)))  == reshape(a, (1, 8, 1, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 2, 4, 6)))  == reshape(a, (1, 1, 8, 1, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 3, 4, 6)))  == reshape(a, (1, 8, 1, 1, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 4, 6, 3)))  == reshape(a, (1, 8, 1, 1, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 3, 5, 6)))  == reshape(a, (1, 8, 1, 7, 1, 1))
+        @test_throws ArgumentError insertdims(a, dims=(1, 1, 2, 3))
+        @test_throws ArgumentError insertdims(a, dims=(1, 2, 2, 3))
+        @test_throws ArgumentError insertdims(a, dims=(1, 2, 3, 3))
+        @test_throws UndefKeywordError insertdims(a)
+        @test_throws ArgumentError insertdims(a, dims=0)
+        @test_throws ArgumentError insertdims(a, dims=(1, 2, 1))
+        @test_throws ArgumentError insertdims(a, dims=4)
+        @test_throws ArgumentError insertdims(a, dims=6)
+        A = reshape(1:6, 2, 3)
+        @test_throws ArgumentError insertdims(A, dims=(2, 2))
+        D = insertdims(A, dims=())
+        @test size(D) == size(A)
+        @test D == A
+        E = ones(2, 3, 4)
+        F = insertdims(E, dims=(2, 4, 6))
+        @test size(F) == (2, 1, 3, 1, 4, 1)
+        # insertdims and dropdims are inverses
+        b = rand(1,1,1,5,1,1,7)
+        for dims in [1, (1,), 2, (2,), 3, (3,), (1,3), (1,2,3), (1,2), (1,3,5), (1,2,5,6), (1,3,5,6), (1,3,5,6), (1,6,5,3)]
+            @test dropdims(insertdims(a; dims); dims) == a
+            @test insertdims(dropdims(b; dims); dims) == b
+        end
+    end
+
     sz = (5,8,7)
     A = reshape(1:prod(sz),sz...)
     @test A[2:6] == [2:6;]
@@ -463,6 +520,17 @@ end
         @test vc == [v[1:(i-1)]; 5; v[i:end]]
     end
     @test_throws BoundsError insert!(v, 5, 5)
+
+    # test that data is copied when there is plenty of room to do so
+    v = empty!(collect(1:100))
+    pushfirst!(v, 1)
+    @test length(v.ref.mem) == 100
+
+    # test that insert! at position 1 doesn't allocate for empty arrays with capacity (issue #58640)
+    v = empty!(Vector{Int}(undef, 5))
+    insert!(v, 1, 10)
+    @test v == [10]
+    @test length(v.ref.mem) == 5
 end
 
 @testset "popat!(::Vector, i, [default])" begin
@@ -558,32 +626,32 @@ end
     @test findall(!, m) == [k for (k,v) in pairs(m) if !v]
     @test findfirst(!iszero, a) == 2
     @test findfirst(a.==0) == 1
-    @test findfirst(a.==5) == nothing
+    @test findfirst(a.==5) === nothing
     @test findfirst(Dict(1=>false, 2=>true)) == 2
-    @test findfirst(Dict(1=>false)) == nothing
+    @test findfirst(Dict(1=>false)) === nothing
     @test findfirst(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findfirst(!isequal(1), [1,2,4,1,2,3,4]) == 2
     @test findfirst(isodd, [2,4,6,3,9,2,0]) == 4
-    @test findfirst(isodd, [2,4,6,2,0]) == nothing
+    @test findfirst(isodd, [2,4,6,2,0]) === nothing
     @test findnext(!iszero,a,4) == 4
     @test findnext(!iszero,a,5) == 6
     @test findnext(!iszero,a,1) == 2
     @test findnext(isequal(1),a,4) == 6
-    @test findnext(isequal(5),a,4) == nothing
+    @test findnext(isequal(5),a,4) === nothing
     @test findlast(!iszero, a) == 8
     @test findlast(a.==0) == 5
-    @test findlast(a.==5) == nothing
-    @test findlast(false) == nothing # test non-AbstractArray findlast
+    @test findlast(a.==5) === nothing
+    @test findlast(false) === nothing # test non-AbstractArray findlast
     @test findlast(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findlast(isodd, [2,4,6,3,9,2,0]) == 5
-    @test findlast(isodd, [2,4,6,2,0]) == nothing
+    @test findlast(isodd, [2,4,6,2,0]) === nothing
     @test findprev(!iszero,a,4) == 4
     @test findprev(!iszero,a,5) == 4
-    @test findprev(!iszero,a,1) == nothing
+    @test findprev(!iszero,a,1) === nothing
     @test findprev(isequal(1),a,4) == 2
     @test findprev(isequal(1),a,8) == 6
     @test findprev(isodd, [2,4,5,3,9,2,0], 7) == 5
-    @test findprev(isodd, [2,4,5,3,9,2,0], 2) == nothing
+    @test findprev(isodd, [2,4,5,3,9,2,0], 2) === nothing
     @test findfirst(isequal(0x00), [0x01, 0x00]) == 2
     @test findlast(isequal(0x00), [0x01, 0x00]) == 2
     @test findnext(isequal(0x00), [0x00, 0x01, 0x00], 2) == 3
@@ -604,6 +672,15 @@ end
     @testset "issue 43078" begin
         @test_throws TypeError findall([1])
     end
+
+    @testset "issue #46425" begin
+        counter = 0
+        function pred46425(x)
+            counter += 1
+            counter < 4 && x
+        end
+        @test findall(pred46425, [false, false, true, true]) == [3]
+    end
 end
 @testset "find with Matrix" begin
     A = [1 2 0; 3 4 0]
@@ -728,6 +805,9 @@ end
     v = [1,2,3]
     @test permutedims(v) == [1 2 3]
 
+    zd = fill(0)
+    @test permutedims(zd, ()) == zd
+
     x = PermutedDimsArray([1 2; 3 4], (2, 1))
     @test size(x) == (2, 2)
     @test copy(x) == [1 3; 2 4]
@@ -781,6 +861,14 @@ end
     oa = OffsetVector(copy(a), -1)
     @test circshift!(oa, 1) === oa
     @test oa == circshift(OffsetVector(a, -1), 1)
+
+    # 1d circshift! (#53554)
+    a = []
+    @test circshift!(a, 1) === a
+    @test circshift!(a, 1) == []
+    a = [1:5;]
+    @test circshift!(a, 10) === a
+    @test circshift!(a, 10) == 1:5
 end
 
 @testset "circcopy" begin
@@ -1170,6 +1258,18 @@ end
         @test setdiff((1, 2), (3, 2)) == [1]
         @test symdiff((1, 2), (3, 2)) == [1, 3]
     end
+
+    @testset "setdiff preserves element type of first argument" begin
+        @test setdiff([1, 2, 3], [1.0, 2.0]) isa Vector{Int}
+        @test setdiff([1.0, 2.0, 3.0], [1, 2]) isa Vector{Float64}
+        @test setdiff(['a', 'b', 'c'], [98]) isa Vector{Char}
+        @test setdiff([1, 2], [1.0, 2.0]) isa Vector{Int}
+    end
+    @testset "intersect promotes element types of arguments" begin
+        @test intersect([1, 2, 3], [1.0, 2.0]) isa Vector{Float64}
+        @test intersect([1.0, 2.0, 3.0], [1, 2]) isa Vector{Float64}
+        @test intersect(['a', 'b', 'c'], Int[]) isa Vector{Any}
+    end
 end
 
 @testset "mapslices" begin
@@ -1239,6 +1339,10 @@ end
     @test @inferred(mapslices(hcat, [1 2; 3 4], dims=1)) == [1 2; 3 4] # previously an error, now allowed
     @test mapslices(identity, [1 2; 3 4], dims=(2,2)) == [1 2; 3 4] # previously an error
     @test_broken @inferred(mapslices(identity, [1 2; 3 4], dims=(2,2))) == [1 2; 3 4]
+
+    # type inference in mapslices
+    a_ = @inferred (a -> mapslices(identity, reshape(a, size(a)..., 1, 1), dims=(3,4)))(a)
+    @test a_ == reshape(a, size(a)..., 1, 1)
 end
 
 @testset "single multidimensional index" begin
@@ -1347,6 +1451,14 @@ end
 end
 
 @testset "lexicographic comparison" begin
+    @testset "zero-dimensional" begin
+        vals = (0, 0.0, 1, 1.0)
+        for l ∈ vals
+            for r ∈ vals
+                @test cmp(fill(l), fill(r)) == cmp(l, r)
+            end
+        end
+    end
     @test cmp([1.0], [1]) == 0
     @test cmp([1], [1.0]) == 0
     @test cmp([1, 1], [1, 1]) == 0
@@ -1359,6 +1471,18 @@ end
     @test cmp([UInt8(1), UInt8(0)], [UInt8(0), UInt8(0)]) == 1
     @test cmp([UInt8(1), UInt8(0)], [UInt8(1), UInt8(0)]) == 0
     @test cmp([UInt8(0), UInt8(0)], [UInt8(1), UInt8(1)]) == -1
+
+    x = [1, 2, 3]
+    y = OffsetVector(x, -1)
+    @test cmp(x, y) == 1
+    @test cmp(y, x) == -1
+    @test !isless(x, y)
+    @test isless(y, x)
+
+    y2 = OffsetVector([1, 2, 3], 0)
+    @test cmp(x, y2) == 0
+    @test !isless(x, y2)
+    @test !isless(y2, x)
 end
 
 @testset "sort on arrays" begin
@@ -1433,6 +1557,15 @@ end
     @test sortslices(B, dims=(1,3)) == B
 end
 
+@testset "sortslices inference (#52019)" begin
+    x = rand(3, 2)
+    @inferred sortslices(x, dims=1)
+    @inferred sortslices(x, dims=(2,))
+    x = rand(1, 2, 3)
+    @inferred sortslices(x, dims=(1,2))
+    @inferred sortslices(x, dims=3, by=sum)
+end
+
 @testset "fill" begin
     @test fill!(Float64[1.0], -0.0)[1] === -0.0
     A = fill(1.,3,3)
@@ -1676,6 +1809,12 @@ end
     end
 end
 
+@testset "reverse zero dims" begin
+    a = fill(3)
+    @test a == reverse(a)
+    @test a === reverse!(a)
+end
+
 @testset "isdiag, istril, istriu" begin
     # Scalar
     @test isdiag(3)
@@ -1773,6 +1912,32 @@ end
     # offset array
     @test append!([1,2], OffsetArray([9,8], (-3,))) == [1,2,9,8]
     @test prepend!([1,2], OffsetArray([9,8], (-3,))) == [9,8,1,2]
+
+    # Error recovery
+    A = [1, 2]
+    @test_throws MethodError append!(A, [1, 2, "hi"])
+    @test A == [1, 2, 1, 2]
+
+    oA = OffsetVector(A, 0:3)
+    @test_throws InexactError append!(oA, [1, 2, 3.01])
+    @test oA == OffsetVector([1, 2, 1, 2, 1, 2], 0:5)
+
+    @test_throws InexactError append!(A, (x for x in [1, 2, 3.1]))
+    @test A == [1, 2, 1, 2, 1, 2, 1, 2]
+
+    @test_throws InexactError append!(A, (x for x in [1, 2, 3.1] if isfinite(x)))
+    @test A == [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
+
+    @test_throws MethodError prepend!(A, [1, 2, "hi"])
+    @test A == [2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
+
+    A = [1, 2]
+    @test_throws InexactError prepend!(A, (x for x in [1, 2, 3.1]))
+    @test A == [2, 1, 1, 2]
+
+    A = [1, 2]
+    @test_throws InexactError prepend!(A, (x for x in [1, 2, 3.1] if isfinite(x)))
+    @test A == [2, 1, 1, 2]
 end
 
 let A = [1,2]
@@ -2013,6 +2178,7 @@ end
 
     I1 = CartesianIndex((2,3,0))
     I2 = CartesianIndex((-1,5,2))
+    @test +I1 == I1
     @test -I1 == CartesianIndex((-2,-3,0))
     @test I1 + I2 == CartesianIndex((1,8,2))
     @test I2 + I1 == CartesianIndex((1,8,2))
@@ -2085,7 +2251,7 @@ end
 
 # All we really care about is that we have an optimized
 # implementation, but the seed is a useful way to check that.
-@test hash(CartesianIndex()) == Base.IteratorsMD.cartindexhash_seed
+@test hash(CartesianIndex()) == Base.IteratorsMD.cartindexhash_seed ⊻ Base.HASH_SEED
 @test hash(CartesianIndex(1, 2)) != hash((1, 2))
 
 @testset "itr, iterate" begin
@@ -2115,6 +2281,8 @@ R = CartesianIndices((3,0))
     @test @inferred(eachindex(Base.IndexLinear(), a, b)) == 1:4
     @test @inferred(eachindex(a, b)) == CartesianIndices((2,2))
     @test @inferred(eachindex(a, a)) == 1:4
+    @test @inferred(eachindex(a, a, a)) == 1:4
+    @test @inferred(eachindex(a, a, b)) == CartesianIndices((2,2))
     @test_throws DimensionMismatch eachindex(a, rand(3,3))
     @test_throws DimensionMismatch eachindex(b, rand(3,3))
 end
@@ -2270,7 +2438,7 @@ end
     M = [1 2 3; 4 5 6; 7 8 9]
     @test eachrow(M) == eachslice(M, dims = 1) == [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
     @test eachcol(M) == eachslice(M, dims = 2) == [[1, 4, 7], [2, 5, 8], [3, 6, 9]]
-    @test_throws DimensionMismatch eachslice(M, dims = 4)
+    @test eachslice(M, dims = 4) == [[1 2 3; 4 5 6; 7 8 9;;;]]
 
     SR = @inferred eachrow(M)
     @test SR[2] isa eltype(SR)
@@ -2335,6 +2503,32 @@ end
         @test_throws BoundsError A[2,3] = [4,5]
         @test_throws BoundsError A[2,3] .= [4,5]
     end
+
+    @testset "trailing dimensions" begin
+        v = collect(1:3)
+
+        S2  = eachslice(v; dims = 2, drop=true)
+        @test S2 isa AbstractSlices{<:AbstractVector, 1}
+        @test size(S2) == (1,)
+        @test S2[1] == v
+
+        S2K = eachslice(v; dims = 2, drop=false)
+        @test S2K isa AbstractSlices{<:AbstractVector, 2}
+        @test size(S2K) == (1,1)
+        @test S2K[1,1] == v
+
+        M = reshape(1:6, 2, 3)
+
+        S13 = eachslice(M; dims = (1,3))
+        @test size(S13) == (2,1)
+        @test S13[2,1] == M[2,:,1]
+
+        S13K = eachslice(M; dims = (1,3), drop=false)
+        @test size(S13K) == (2,1,1)
+        @test S13K[1,1,1] == M[1,:]
+        @test S13K[2,1,1] == M[2,:]
+    end
+
 end
 
 ###
@@ -2500,9 +2694,9 @@ end
 @inferred map(Int8, Int[0])
 
 # make sure @inbounds isn't used too much
-mutable struct OOB_Functor{T}; a::T; end
-(f::OOB_Functor)(i::Int) = f.a[i]
-let f = OOB_Functor([1,2])
+mutable struct OOB_Callable{T}; a::T; end
+(f::OOB_Callable)(i::Int) = f.a[i]
+let f = OOB_Callable([1,2])
     @test_throws BoundsError map(f, [1,2,3,4,5])
 end
 
@@ -2556,29 +2750,36 @@ function f15894(d)
     s
 end
 @test f15894(fill(1, 100)) == 100
+@test (@nexprs 2 i -> "_i_: $i") == "_i_: 2"
 end
 
 @testset "sign, conj[!], ~" begin
-    local A, B, C
-    A = [-10,0,3]
-    B = [-10.0,0.0,3.0]
-    C = [1,im,0]
-
-    @test sign.(A) == [-1,0,1]
-    @test sign.(B) == [-1,0,1]
-    @test typeof(sign.(A)) == Vector{Int}
-    @test typeof(sign.(B)) == Vector{Float64}
-
-    @test conj(A) == A
-    @test conj!(copy(A)) == A
-    @test conj(B) == A
-    @test conj(C) == [1,-im,0]
-    @test typeof(conj(A)) == Vector{Int}
-    @test typeof(conj(B)) == Vector{Float64}
-    @test typeof(conj(C)) == Vector{Complex{Int}}
-
-    @test .~A == [9,-1,-4]
-    @test typeof(.~A) == Vector{Int}
+    let A, B, C, D, E # Suppress :latestworld to get good inference for the allocations test
+        A = [-10,0,3]
+        B = [-10.0,0.0,3.0]
+        C = [1,im,0]
+
+        @test sign.(A) == [-1,0,1]
+        @test sign.(B) == [-1,0,1]
+        @test typeof(sign.(A)) == Vector{Int}
+        @test typeof(sign.(B)) == Vector{Float64}
+
+        @test conj(A) == A
+        @test conj!(copy(A)) == A
+        @test conj(B) == A
+        @test conj(C) == [1,-im,0]
+        @test typeof(conj(A)) == Vector{Int}
+        @test typeof(conj(B)) == Vector{Float64}
+        @test typeof(conj(C)) == Vector{Complex{Int}}
+        D = [C copy(C); copy(C) copy(C)]
+        @test conj(D) == conj!(copy(D))
+        E = [D, copy(D)]
+        @test conj(E) == conj!(copy(E))
+        @test (@allocations conj!(E)) == 0
+
+        @test .~A == [9,-1,-4]
+        @test typeof(.~A) == Vector{Int}
+    end
 end
 
 # @inbounds is expression-like, returning its value; #15558
@@ -2715,7 +2916,7 @@ end
 end
 
 @testset "accumulate, accumulate!" begin
-    @test accumulate(+, [1,2,3]) == [1, 3, 6]
+    @test accumulate(+, [1, 2, 3]) == [1, 3, 6]
     @test accumulate(min, [1 2; 3 4], dims=1) == [1 2; 1 2]
     @test accumulate(max, [1 2; 3 0], dims=2) == [1 2; 3 3]
     @test accumulate(+, Bool[]) == Int[]
@@ -2732,12 +2933,15 @@ end
     @test accumulate(min, [1 0; 0 1], dims=1) == [1 0; 0 0]
     @test accumulate(min, [1 0; 0 1], dims=2) == [1 0; 0 0]
 
+    @test accumulate(+, [1, 2, 3], dims=1, init=1) == [2, 4, 7]
+    @test accumulate(*, [1, 4, 2], dims=1, init=2) == [2, 8, 16]
+
     @test accumulate(min, [3 2 1; 3 2 1], dims=2) == [3 2 1; 3 2 1]
     @test accumulate(min, [3 2 1; 3 2 1], dims=2, init=2) == [2 2 1; 2 2 1]
 
     @test isa(accumulate(+, Int[]), Vector{Int})
     @test isa(accumulate(+, Int[]; init=1.), Vector{Float64})
-    @test accumulate(+, [1,2]; init=1) == [2, 4]
+    @test accumulate(+, [1, 2]; init=1) == [2, 4]
     arr = randn(4)
     @test accumulate(*, arr; init=1) ≈ accumulate(*, arr)
 
@@ -2781,7 +2985,7 @@ end
 
     # asymmetric operation
     op(x,y) = 2x+y
-    @test accumulate(op, [10,20, 30]) == [10, op(10, 20), op(op(10, 20), 30)] == [10, 40, 110]
+    @test accumulate(op, [10, 20, 30]) == [10, op(10, 20), op(op(10, 20), 30)] == [10, 40, 110]
     @test accumulate(op, [10 20 30], dims=2) == [10 op(10, 20) op(op(10, 20), 30)] == [10 40 110]
 
     #25506
@@ -2790,6 +2994,33 @@ end
     @inferred accumulate(*, String[])
     @test accumulate(*, ['a' 'b'; 'c' 'd'], dims=1) == ["a" "b"; "ac" "bd"]
     @test accumulate(*, ['a' 'b'; 'c' 'd'], dims=2) == ["a" "ab"; "c" "cd"]
+
+    # #53438
+    v = [(1, 2), (3, 4)]
+    @test_throws MethodError accumulate(+, v)
+    @test_throws MethodError cumsum(v)
+    @test_throws MethodError cumprod(v)
+    @test_throws MethodError accumulate(+, v; init=(0, 0))
+    @test_throws MethodError accumulate(+, v; dims=1, init=(0, 0))
+
+    # Some checks to ensure we're identifying the widest needed eltype
+    # as identified in PR 53461
+    @testset "Base._accumulate_promote_op" begin
+        # A somewhat contrived example where each call to `foo`
+        # will return a different type
+        foo(x::Bool, y::Int)::Int = x + y
+        foo(x::Int, y::Int)::Float64 = x + y
+        foo(x::Float64, y::Int)::ComplexF64 = x + y * im
+        foo(x::ComplexF64, y::Int)::String = string(x, "+", y)
+
+        v = collect(1:5)
+        @test Base._accumulate_promote_op(foo, v; init=true) === Base._accumulate_promote_op(foo, v) == Union{Float64, String, ComplexF64}
+        @test Base._accumulate_promote_op(/, v) === Base._accumulate_promote_op(/, v; init=0) == Float64
+        @test Base._accumulate_promote_op(+, v) === Base._accumulate_promote_op(+, v; init=0) === Int
+        @test Base._accumulate_promote_op(+, v; init=0.0) === Float64
+        @test Base._accumulate_promote_op(+, Union{Int, Missing}[v...]) === Union{Int, Missing}
+        @test Base._accumulate_promote_op(+, Union{Int, Nothing}[v...]) === Union{Int, Nothing}
+    end
 end
 
 struct F21666{T <: Base.ArithmeticStyle}
@@ -2798,7 +3029,7 @@ end
 
 Base.ArithmeticStyle(::Type{F21666{T}}) where {T} = T()
 Base.:+(x::F, y::F) where {F <: F21666} = F(x.x + y.x)
-Float64(x::F21666) = Float64(x.x)
+Base.Float64(x::F21666) = Float64(x.x)
 @testset "Exactness of cumsum # 21666" begin
     # test that cumsum uses more stable algorithm
     # for types with unknown/rounding arithmetic
@@ -3078,6 +3309,28 @@ end
         "BoundsError: attempt to access 2×2 Matrix{Float64} at index [10, \"bad index\"]"
 end
 
+@testset "return type inference of function that calls `length(::Array)`" begin
+    f(x) = length(x)
+    @test Int === Base.infer_return_type(f, Tuple{Array})
+end
+
+@testset "return type inference of `sizeof(::Array)`" begin
+    @test isconcretetype(Base.infer_return_type(sizeof, Tuple{Array}))
+end
+
+@testset "return type inference of `getindex(::Array, ::Colon)`" begin
+    f = a -> a[:]
+    @test Vector == Base.infer_return_type(f, Tuple{Array})
+    @test Vector{Float32} === Base.infer_return_type(f, Tuple{Array{Float32}})
+end
+
+@testset "return type inference of linear `eachindex` for `Array` and `Memory`" begin
+    f = a -> eachindex(IndexLinear(), a)
+    for typ in (Array, Memory, Union{Array, Memory})
+        @test isconcretetype(Base.infer_return_type(f, Tuple{typ}))
+    end
+end
+
 @testset "inference of Union{T,Nothing} arrays 26771" begin
     f(a) = (v = [1, nothing]; [v[x] for x in a])
     @test only(Base.return_types(f, (Int,))) === Union{Array{Int,0}, Array{Nothing,0}}
@@ -3117,3 +3370,44 @@ end
         @test c + zero(c) == c
     end
 end
+
+@testset "Wrapping Memory into Arrays" begin
+    mem = Memory{Int}(undef, 10) .= 1
+    memref = memoryref(mem)
+    @test_throws DimensionMismatch Base.wrap(Array, mem, (10, 10))
+    @test Base.wrap(Array, mem, (5,)) == ones(Int, 5)
+    @test Base.wrap(Array, mem, 2) == ones(Int, 2)
+    @test Base.wrap(Array, memref, 10) == ones(Int, 10)
+    @test Base.wrap(Array, memref, (2,2,2)) == ones(Int,2,2,2)
+    @test Base.wrap(Array, mem, (5, 2)) == ones(Int, 5, 2)
+
+    memref2 = memoryref(mem, 3)
+    @test Base.wrap(Array, memref2, (5,)) == ones(Int, 5)
+    @test Base.wrap(Array, memref2, 2) == ones(Int, 2)
+    @test Base.wrap(Array, memref2, (2,2,2)) == ones(Int,2,2,2)
+    @test Base.wrap(Array, memref2, (3, 2)) == ones(Int, 3, 2)
+    @test_throws DimensionMismatch Base.wrap(Array, memref2, 9)
+    @test_throws DimensionMismatch Base.wrap(Array, memref2, 10)
+end
+
+@testset "Memory size" begin
+    len = 5
+    mem = Memory{Int}(undef, len)
+    @test size(mem, 1) == len
+    @test size(mem, 0x1) == len
+    @test size(mem, 2) == 1
+    @test size(mem, 0x2) == 1
+end
+
+@testset "MemoryRef" begin
+    mem = Memory{Float32}(undef, 3)
+    ref = memoryref(mem, 2)
+    @test parent(ref) === mem
+    @test Base.memoryindex(ref) === 2
+
+    # Test for zero-sized structs
+    mem = Memory{Nothing}(undef, 10)
+    ref = memoryref(mem, 8)
+    @test parent(ref) === mem
+    @test Base.memoryindex(ref) === 8
+end
diff --git a/test/atexit.jl b/test/atexit.jl
index 64b56e32466df..08a8e0c4b46a2 100644
--- a/test/atexit.jl
+++ b/test/atexit.jl
@@ -214,12 +214,13 @@ using Test
             # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
             # 3. attempting to register a hook after all hooks have finished (disallowed)
             """
-            const atexit_has_finished = Threads.Atomic{Bool}(false)
+            const atexit_has_finished = Threads.Atomic{Int}(0)
             atexit() do
                 Threads.@spawn begin
                     # Block until the atexit hooks have all finished. We use a manual "spin
                     # lock" because task switch is disallowed inside the finalizer, below.
-                    while !atexit_has_finished[] end
+                    atexit_has_finished[] = 1
+                    while atexit_has_finished[] == 1; GC.safepoint(); end
                     try
                         # By the time this runs, all the atexit hooks will be done.
                         # So this will throw.
@@ -231,15 +232,16 @@ using Test
                         exit(22)
                     end
                 end
+                while atexit_has_finished[] == 0; GC.safepoint(); end
             end
             # Finalizers run after the atexit hooks, so this blocks exit until the spawned
             # task above gets a chance to run.
             x = []
             finalizer(x) do x
                 # Allow the spawned task to finish
-                atexit_has_finished[] = true
+                atexit_has_finished[] = 2
                 # Then spin forever to prevent exit.
-                while atexit_has_finished[] end
+                while atexit_has_finished[] == 2; GC.safepoint(); end
             end
             exit(0)
             """ => 22,
diff --git a/test/atomics.jl b/test/atomics.jl
index dd50fb96be49f..369a63f7d5fbf 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -23,27 +23,43 @@ mutable struct Refxy{T}
 end
 
 modname = String(nameof(@__MODULE__))
-@test_throws ErrorException("invalid redefinition of constant $modname.ARefxy") @eval mutable struct ARefxy{T}
+const orig_Refxy = Refxy
+const orig_ARefxy = ARefxy
+mutable struct ARefxy{T}
     @atomic x::T
     @atomic y::T
 end
-@test_throws ErrorException("invalid redefinition of constant $modname.ARefxy") @eval mutable struct ARefxy{T}
+@test orig_ARefxy !== ARefxy
+const ARefxy = orig_ARefxy
+mutable struct ARefxy{T}
     x::T
     y::T
 end
-@test_throws ErrorException("invalid redefinition of constant $modname.ARefxy") @eval mutable struct ARefxy{T}
+@test orig_ARefxy !== ARefxy
+const ARefxy = orig_ARefxy
+mutable struct ARefxy{T}
     x::T
     @atomic y::T
 end
-@test_throws ErrorException("invalid redefinition of constant $modname.Refxy") @eval mutable struct Refxy{T}
+@test orig_ARefxy !== ARefxy
+const ARefxy = orig_ARefxy
+mutable struct Refxy{T}
     x::T
     @atomic y::T
 end
+@test orig_Refxy !== Refxy
+const Refxy = orig_Refxy
 
 copy(r::Union{Refxy,ARefxy}) = typeof(r)(r.x, r.y)
 function add(x::T, y)::T where {T}; x + y; end
 swap(x, y) = y
 
+struct UndefComplex{T}
+    re::T
+    im::T
+end
+Base.convert(T::Type{<:UndefComplex}, S) = T(S, 0)
+
 let T1 = Refxy{NTuple{3,UInt8}},
     T2 = ARefxy{NTuple{3,UInt8}}
     @test sizeof(T1) == 6
@@ -60,10 +76,13 @@ end
 
 # check that very large types are getting locks
 let (x, y) = (Complex{Int128}(10, 30), Complex{Int128}(20, 40))
-    ar = ARefxy(x, y)
     r = Refxy(x, y)
+    ar = ARefxy(x, y)
+    mr = AtomicMemory{Pair{typeof(x),typeof(y)}}(undef, 20)
     @test 64 == sizeof(r) < sizeof(ar)
-    @test sizeof(r) == sizeof(ar) - Int(fieldoffset(typeof(ar), 1))
+    @test sizeof(ar) == sizeof(r) + Int(fieldoffset(typeof(ar), 1))
+    @test_broken Base.elsize(mr) == sizeof(ar)
+    @test sizeof(mr) == length(mr) * (sizeof(r) + 16)
 end
 
 struct PadIntA <: Number # internal padding
@@ -81,13 +100,17 @@ primitive type Int24 <: Signed 24 end # integral padding
 Int24(x::Int) = Core.Intrinsics.trunc_int(Int24, x)
 Base.Int(x::PadIntB) = x.a + (Int(x.b) << 8) + (Int(x.c) << 16)
 Base.:(+)(x::PadIntA, b::Int) = PadIntA(x.b + b)
+Base.:(==)(x::PadIntA, b::Int) = x == PadIntA(b)
 Base.:(+)(x::PadIntB, b::Int) = PadIntB(Int(x) + b)
 Base.:(+)(x::Int24, b::Int) = Core.Intrinsics.add_int(x, Int24(b))
 Base.show(io::IO, x::PadIntA) = print(io, "PadIntA(", x.b, ")")
 Base.show(io::IO, x::PadIntB) = print(io, "PadIntB(", Int(x), ")")
 Base.show(io::IO, x::Int24) = print(io, "Int24(", Core.Intrinsics.zext_int(Int, x), ")")
 
+## Fields
+
 @noinline function _test_field_operators(r)
+    GC.gc(false)
     r = r[]
     TT = fieldtype(typeof(r), :x)
     T = typeof(getfield(r, :x))
@@ -116,6 +139,7 @@ test_field_operators(ARefxy{Any}(123_10, 123_20))
 test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing))
 test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20))
 test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20))
+test_field_operators(ARefxy{Complex{Real}}(123_10, 123_20))
 test_field_operators(ARefxy{PadIntA}(123_10, 123_20))
 test_field_operators(ARefxy{PadIntB}(123_10, 123_20))
 #FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20))
@@ -123,6 +147,7 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
 
 @noinline function _test_field_orderings(r, x, y)
     @nospecialize x y
+    GC.gc(false)
     r = r[]
     TT = fieldtype(typeof(r), :x)
 
@@ -264,6 +289,26 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((x, true))
     @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((y, x === y))
     @test replacefield!(r, :x, y, x, :sequentially_consistent) === ReplaceType{TT}((y, true))
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :u, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be written non-atomically") setfieldonce!(r, :x, x)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be written non-atomically") setfieldonce!(r, :x, y, :not_atomic, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x,  x, :unordered, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :monotonic, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :acquire, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :release, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :acquire_release, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :sequentially_consistent, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :unordered)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :monotonic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :acquire_release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :sequentially_consistent)
+    @test setfieldonce!(r, :x, y, :sequentially_consistent, :sequentially_consistent) === false
+    @test setfieldonce!(r, :x, y, :sequentially_consistent, :sequentially_consistent) === false
+    @test setfieldonce!(r, :x, x, :sequentially_consistent) === false
     nothing
 end
 @noinline function test_field_orderings(r, x, y)
@@ -284,15 +329,12 @@ test_field_orderings(ARefxy{Any}(true, false), true, false)
 test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing)
 test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1)
 test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_field_orderings(Complex{Real}(10.5, 30.5), Complex{Real}(20.5, 40.5))
+test_field_orderings(Complex{Rational{Integer}}(10, 30), Complex{Rational{Integer}}(20, 40))
+test_field_orderings(Pair{NTuple{3,Float64},NTuple{3,Real}}((10.5,11.5,12.5), (30.5,40.5,50.5)), Pair{NTuple{3,Float64},NTuple{3,Real}}((110.5,111.5,112.5), (130.5,140.5,150.5)))
 test_field_orderings(10.0, 20.0)
 test_field_orderings(NaN, Inf)
 
-struct UndefComplex{T}
-    re::T
-    im::T
-    UndefComplex{T}() where {T} = new{T}()
-end
-Base.convert(T::Type{<:UndefComplex}, S) = T()
 @noinline function _test_field_undef(r)
     r = r[]
     TT = fieldtype(typeof(r), :x)
@@ -318,6 +360,29 @@ test_field_undef(ARefxy{Union{Nothing,Integer}})
 test_field_undef(ARefxy{UndefComplex{Any}})
 test_field_undef(ARefxy{UndefComplex{UndefComplex{Any}}})
 
+@noinline function _test_once_undef(r)
+    r = r[]
+    TT = fieldtype(typeof(r), :x)
+    x = convert(TT, 12345_10)
+    @test_throws UndefRefError getfield(r, :x)
+    @test setfieldonce!(r, :x, x, :sequentially_consistent) === true
+    @test getfield(r, :x, :sequentially_consistent) === x
+    @test setfieldonce!(r, :x, convert(TT, 12345_20), :sequentially_consistent) === false
+    nothing
+end
+
+@noinline function test_once_undef(TT)
+    _test_once_undef(Ref(TT()))
+    _test_once_undef(Ref{Any}(TT()))
+    nothing
+end
+
+test_once_undef(ARefxy{BigInt})
+test_once_undef(ARefxy{Any})
+test_once_undef(ARefxy{Union{Nothing,Integer}})
+test_once_undef(ARefxy{UndefComplex{Any}})
+test_once_undef(ARefxy{UndefComplex{UndefComplex{Any}}})
+
 @test_throws ErrorException @macroexpand @atomic foo()
 @test_throws ErrorException @macroexpand @atomic foo += bar
 @test_throws ErrorException @macroexpand @atomic foo += bar
@@ -374,6 +439,99 @@ let a = ARefxy(1, -1)
     @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x xchg
 end
 
+function _test_atomic_get_set_swap_modify(T, x, y, z)
+    @testset "atomic get,set,swap,modify" begin
+        mem = AtomicMemory{T}(undef, 2)
+        @test_throws CanonicalIndexError mem[1] = 3
+
+        @test Base.setindex_atomic!(mem, Base.default_access_order(mem), x, 1) == x
+        @test mem[1] == x
+        @test Base.setindex_atomic!(mem, Base.default_access_order(mem), y, 2) == y
+        @test mem[2] == y
+
+        idx = UInt32(2)
+
+        @test (@atomic mem[1]) == x
+        @test (@atomic mem[idx]) == y
+
+        (old, new) = (mem[idx], z)
+        # old and new are intentionally of different types to test inner conversion
+        @test (@atomic mem[idx] = new) == new
+        @test mem[idx] == new
+        @atomic mem[idx] = old
+
+        @test (@atomicswap mem[idx] = new) == old
+        @test mem[idx] == new
+        @atomic mem[idx] = old
+
+        try
+            old + new
+            @test (@atomic mem[idx] += new) == old + new
+            @test mem[idx] == old + new
+            @atomic mem[idx] = old
+        catch err
+            if !(err isa MethodError)
+                rethrow(err)
+            end
+        end
+    end
+end
+
+function _test_atomic_setonce_replace(T, initial, desired)
+    @testset "atomic setonce,replace" begin
+        mem = AtomicMemory{T}(undef, 2)
+        if isassigned(mem, 2)
+            @test (@atomiconce mem[2] = initial) == false
+            @atomic mem[2] = initial
+        else
+            @test (@atomiconce mem[2] = initial) == true
+            @test mem[2] == initial
+            @test (@atomiconce mem[2] = desired) == false
+            @test mem[2] == initial
+            @test !isassigned(mem, 1)
+        end
+
+        idx = UInt(2)
+
+        expected = @atomic mem[idx]
+        @test (@atomicreplace mem[idx] expected => desired) == (old=expected, success=true)
+        @test mem[idx] == desired
+
+        @atomic mem[idx] = expected
+        @test (@atomicreplace mem[idx] desired => desired) == (old=expected, success=false)
+        @test mem[idx] == expected
+
+        @atomic mem[idx] = expected
+        @test (@atomicreplace mem[idx] Pair(expected, desired)) == (old=expected, success=true)
+        @test mem[idx] == desired
+
+        @atomic mem[idx] = expected
+        @test (@atomicreplace mem[idx] Pair(desired, desired)) == (old=initial, success=false)
+        @test mem[idx] == expected
+    end
+end
+@testset "@atomic with AtomicMemory" begin
+
+    _test_atomic_get_set_swap_modify(Float64, rand(), rand(), 10)
+    _test_atomic_get_set_swap_modify(PadIntA, 123_1, 123_2, 10)
+    _test_atomic_get_set_swap_modify(Union{Nothing,Int}, 123_1, nothing, 10)
+    _test_atomic_get_set_swap_modify(Union{Nothing,Int}, 123_1, 234_5, 10)
+    _test_atomic_get_set_swap_modify(Vector{BigInt}, BigInt[1, 2, 3], BigInt[1, 2], [2, 4])
+
+    _test_atomic_setonce_replace(Float64, rand(), 42)
+    _test_atomic_setonce_replace(PadIntA, 123_1, 123_2)
+    _test_atomic_setonce_replace(Union{Nothing,Int}, 123_1, nothing)
+    _test_atomic_setonce_replace(Vector{BigInt}, BigInt[1, 2], [3, 4])
+    _test_atomic_setonce_replace(String, "abc", "cab")
+end
+
+let a = ARefxy{Union{Nothing,Integer}}()
+    @test_throws ConcurrencyViolationError @atomiconce :not_atomic a.x = 2
+    @test true === @atomiconce a.x = 1
+    @test 1 === @atomic a.x
+    @test false === @atomiconce a.x = 2
+end
+
 # atomic getfield with boundcheck
 # via codegen
 getx(a, boundcheck) = getfield(a, :x, :sequentially_consistent, boundcheck)
@@ -384,3 +542,576 @@ ans = getfield(ARefxy{Any}(42, 42), :x, :sequentially_consistent, true)
 @test ans == 42
 ans = getfield(ARefxy{Any}(42, 42), :x, :sequentially_consistent, false)
 @test ans == 42
+
+
+## Globals
+
+# the optimizer is terrible at handling PhiC nodes, so this must be a function
+# generator with a custom inlining here of r, instead of being able to assume
+# the inlining pass can inline a constant value correctly
+function gen_test_global_operators(@nospecialize r)
+    M = @__MODULE__
+    return quote
+        TT = Core.get_binding_type($M, $r)
+        T = typeof(getglobal($M, $r))
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_10)
+        @test setglobal!($M, $r, T(123_1), :sequentially_consistent) === T(123_1)
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_1)
+        @test replaceglobal!($M, $r, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), false))
+        @test replaceglobal!($M, $r, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), true))
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_30)
+        @test replaceglobal!($M, $r, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_30), false))
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_30)
+        @test modifyglobal!($M, $r, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_30), T(123_31))
+        @test modifyglobal!($M, $r, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_31), T(123_32))
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_32)
+        @test swapglobal!($M, $r, T(123_1), :sequentially_consistent) === T(123_32)
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_1)
+    end
+end
+@noinline function test_global_operators(T::Type)
+    r = Symbol("g1_$T")
+    @eval global $r::$T = 123_10
+    invokelatest(@eval(() -> $(gen_test_global_operators(QuoteNode(r)))))
+    r = Symbol("g2_$T")
+    @eval global $r::$T = 123_10
+    invokelatest(@eval(r -> $(gen_test_global_operators(:r))), r)
+    nothing
+end
+test_global_operators(Int)
+test_global_operators(Any)
+test_global_operators(Union{Nothing,Int})
+test_global_operators(Complex{Int32})
+test_global_operators(Complex{Int128})
+test_global_operators(Complex{Real})
+test_global_operators(PadIntA)
+test_global_operators(PadIntB)
+#FIXME: test_global_operators(Int24)
+test_global_operators(Float64)
+
+function gen_test_global_orderings(@nospecialize r)
+    M = @__MODULE__
+    return quote
+        @nospecialize x y
+        TT = Core.get_binding_type($M, $r)
+
+        @test getglobal($M, $r) === x
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") getglobal($M, $r, :u)
+        @test_throws ConcurrencyViolationError("getglobal: module binding cannot be read non-atomically") getglobal($M, $r, :not_atomic)
+        @test getglobal($M, $r, :unordered) === x
+        @test getglobal($M, $r, :monotonic) === x
+        @test getglobal($M, $r, :acquire) === x
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") getglobal($M, $r, :release) === x
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") getglobal($M, $r, :acquire_release) === x
+        @test getglobal($M, $r, :sequentially_consistent) === x
+        @test isdefined($M, $r)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined($M, $r, :u)
+        @test_throws ConcurrencyViolationError("isdefined: module binding cannot be accessed non-atomically") isdefined($M, $r, :not_atomic)
+        @test isdefined($M, $r, :unordered)
+        @test isdefined($M, $r, :monotonic)
+        @test isdefined($M, $r, :acquire)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined($M, $r, :release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined($M, $r, :acquire_release)
+        @test isdefined($M, $r, :sequentially_consistent)
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobal!($M, $r, y, :u)
+        @test_throws ConcurrencyViolationError("setglobal!: module binding cannot be written non-atomically") setglobal!($M, $r, y, :not_atomic)
+        @test getglobal($M, $r) === x
+        @test setglobal!($M, $r, y) === y
+        @test setglobal!($M, $r, y, :unordered) === y
+        @test setglobal!($M, $r, y, :monotonic) === y
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobal!($M, $r, y, :acquire) === y
+        @test setglobal!($M, $r, y, :release) === y
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobal!($M, $r, y, :acquire_release) === y
+        @test setglobal!($M, $r, y, :sequentially_consistent) === y
+        @test getglobal($M, $r) === y
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") swapglobal!($M, $r, x, :u)
+        @test_throws ConcurrencyViolationError("swapglobal!: module binding cannot be written non-atomically") swapglobal!($M, $r, x, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") swapglobal!($M, $r, x, :unordered) === y
+        @test swapglobal!($M, $r, x, :monotonic) === y
+        @test swapglobal!($M, $r, x, :acquire) === x
+        @test swapglobal!($M, $r, x, :release) === x
+        @test swapglobal!($M, $r, x, :acquire_release) === x
+        @test swapglobal!($M, $r, x, :sequentially_consistent) === x
+        @test swapglobal!($M, $r, x) === x
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyglobal!($M, $r, swap, x, :u)
+        @test_throws ConcurrencyViolationError("modifyglobal!: module binding cannot be written non-atomically") modifyglobal!($M, $r, swap, x, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyglobal!($M, $r, swap, x, :unordered)
+        @test modifyglobal!($M, $r, swap, x, :monotonic) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x, :acquire) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x, :release) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x, :acquire_release) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x, :sequentially_consistent) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x) === Pair{TT,TT}(x, x)
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :u, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be written non-atomically") replaceglobal!($M, $r, y, x, :not_atomic, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :unordered, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :monotonic, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :acquire, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :release, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :acquire_release, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :sequentially_consistent, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :u)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :unordered)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :monotonic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :acquire)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :acquire_release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :sequentially_consistent)
+        @test replaceglobal!($M, $r, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((x, true))
+        @test replaceglobal!($M, $r, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((y, x === y))
+        @test replaceglobal!($M, $r, y, x, :sequentially_consistent) === ReplaceType{TT}((y, true))
+        @test replaceglobal!($M, $r, x, x) === ReplaceType{TT}((x, true))
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :u, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be written non-atomically") setglobalonce!($M, $r, y, :not_atomic, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r,  x, :unordered, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :monotonic, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :acquire, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :release, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :acquire_release, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :sequentially_consistent, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :u)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :unordered)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :monotonic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :acquire)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :acquire_release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :sequentially_consistent)
+        @test setglobalonce!($M, $r, x) === false
+        @test setglobalonce!($M, $r, y, :sequentially_consistent, :sequentially_consistent) === false
+        @test setglobalonce!($M, $r, y, :sequentially_consistent, :sequentially_consistent) === false
+        @test setglobalonce!($M, $r, x, :sequentially_consistent) === false
+    end
+end
+@noinline function test_global_orderings(T::Type, x, y)
+    @nospecialize
+    r = Symbol("h1_$T")
+    @eval global $r::$T = $(QuoteNode(x))
+    invokelatest(@eval((x, y) -> $(gen_test_global_orderings(QuoteNode(r)))), x, y)
+    r = Symbol("h2_$T")
+    @eval global $r::$T = $(QuoteNode(x))
+    invokelatest(@eval((r, x, y) -> $(gen_test_global_orderings(:r))), r, x, y)
+    nothing
+end
+test_global_orderings(Int, 10, 20)
+test_global_orderings(Bool, true, false)
+test_global_orderings(String, "hi", "bye")
+test_global_orderings(Symbol, :hi, :bye)
+test_global_orderings(Nothing, nothing, nothing)
+test_global_orderings(Any, 123_10, 123_20)
+test_global_orderings(Any, true, false)
+test_global_orderings(Union{Nothing,Missing}, nothing, missing)
+test_global_orderings(Union{Nothing,Int}, nothing, 123_1)
+test_global_orderings(Complex{Int128}, Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_global_orderings(Complex{Real}, Complex{Real}(10.5, 30.5), Complex{Real}(20.5, 40.5))
+test_global_orderings(Float64, 10.0, 20.0)
+test_global_orderings(Float64, NaN, Inf)
+
+function gen_test_global_undef(@nospecialize r)
+    M = @__MODULE__
+    return quote
+        TT = Core.get_binding_type($M, $r)
+        x = convert(TT, 12345_10)
+        @test_throws UndefVarError getglobal($M, $r)
+        @test_throws UndefVarError getglobal($M, $r, :sequentially_consistent)
+        @test_throws UndefVarError modifyglobal!($M, $r, add, 1, :sequentially_consistent)
+        @test_throws (TT === Any ? UndefVarError : Union{TypeError,ErrorException}) replaceglobal!($M, $r, 1, 1.0, :sequentially_consistent) # TODO: should this be TypeError or ErrorException
+        @test_throws UndefVarError replaceglobal!($M, $r, 1, x, :sequentially_consistent)
+        @test_throws UndefVarError getglobal($M, $r, :sequentially_consistent)
+        @test_throws UndefVarError swapglobal!($M, $r, x, :sequentially_consistent)
+        @test getglobal($M, $r, :sequentially_consistent) === x === getglobal($M, $r)
+    end
+end
+@noinline function test_global_undef(T)
+    r = Symbol("u1_$T")
+    @eval global $r::$T
+    invokelatest(@eval(() -> $(gen_test_global_undef(QuoteNode(r)))))
+    r = Symbol("u2_$T")
+    @eval global $r::$T
+    invokelatest(@eval(r -> $(gen_test_global_undef(:r))), r)
+    nothing
+end
+test_global_undef(BigInt)
+test_global_undef(Any)
+test_global_undef(Union{Nothing,Integer})
+test_global_undef(UndefComplex{Any})
+test_global_undef(UndefComplex{UndefComplex{Any}})
+test_global_undef(Int)
+
+function gen_test_globalonce(@nospecialize r)
+    M = @__MODULE__
+    return quote
+        TT = Core.get_binding_type($M, $r)
+        x = convert(TT, 12345_10)
+        @test_throws UndefVarError getglobal($M, $r)
+        @test setglobalonce!($M, $r, x, :sequentially_consistent) === true
+        @test getglobal($M, $r, :sequentially_consistent) === x
+        @test setglobalonce!($M, $r, convert(TT, 12345_20), :sequentially_consistent) === false
+    end
+end
+@noinline function test_globalonce(T)
+    r = Symbol("o1_$T")
+    @eval global $r::$T
+    invokelatest(@eval(() -> $(gen_test_globalonce(QuoteNode(r)))))
+    r = Symbol("o2_$T")
+    @eval global $r::$T
+    invokelatest(@eval(r -> $(gen_test_globalonce(:r))), r)
+    nothing
+end
+test_globalonce(BigInt)
+test_globalonce(Any)
+test_globalonce(Union{Nothing,Integer})
+test_globalonce(UndefComplex{Any})
+test_globalonce(UndefComplex{UndefComplex{Any}})
+test_globalonce(Int)
+
+# test macroexpansions
+global x::Int
+let a = @__MODULE__
+    @test_throws ConcurrencyViolationError @atomiconce :not_atomic a.x = 2
+    @test true === @atomiconce a.x = 1
+    @test 1 === @atomic a.x
+    @test false === @atomiconce a.x = 2
+end
+let a = @__MODULE__
+    @test 1 === @atomic a.x
+    @test 2 === @atomic :sequentially_consistent a.x = 2
+    @test 3 === @atomic :monotonic a.x = 3
+    local four = 4
+    @test 4 === @atomic :monotonic a.x = four
+    @test 3 === @atomic :monotonic a.x = four - 1
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x = 2
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x += 1
+
+    @test 3 === @atomic :monotonic a.x
+    @test 5 === @atomic a.x += 2
+    @test 4 === @atomic :monotonic a.x -= 1
+    @test 12 === @atomic :monotonic a.x *= 3
+
+    @test 12 === @atomic a.x
+    @test (12 => 13) === @atomic a.x + 1
+    @test (13 => 15) === @atomic :monotonic a.x + 2
+    @test (15 => 19) === @atomic a.x max 19
+    @test (19 => 20) === @atomic :monotonic a.x max 20
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x + 1
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x max 30
+
+    @test 20 === @atomic a.x
+    @test 20 === @atomicswap a.x = 1
+    @test 1 === @atomicswap :monotonic a.x = 2
+    @test_throws ConcurrencyViolationError @atomicswap :not_atomic a.x = 1
+
+    @test 2 === @atomic a.x
+    @test ReplaceType{Int}((2, true)) === @atomicreplace a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic :monotonic a.x 2 => 1
+    @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x 1 => 2
+    @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x 1 => 2
+
+    @test 1 === @atomic a.x
+    xchg = 1 => 2
+    @test ReplaceType{Int}((1, true)) === @atomicreplace a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :monotonic a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :acquire_release :monotonic a.x xchg
+    @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x xchg
+    @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x xchg
+end
+
+## Memory
+
+using InteractiveUtils
+using Core: memoryrefget, memoryrefset!, memoryrefswap!, memoryrefreplace!, memoryrefmodify!, memoryrefsetonce!, memoryref_isassigned
+
+@noinline function _test_memory_operators(r)
+    r = r[]
+    TT = eltype(r)
+    T = typeof(r[])
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_10)
+    @test memoryrefset!(r, T(123_1), :sequentially_consistent, true) === T(123_1)
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_1)
+    @test memoryrefreplace!(r, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((T(123_1), false))
+    @test memoryrefreplace!(r, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((T(123_1), true))
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_30)
+    @test memoryrefreplace!(r, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((T(123_30), false))
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_30)
+    @test memoryrefmodify!(r, add, 1, :sequentially_consistent, true) === Pair{TT,TT}(T(123_30), T(123_31))
+    @test memoryrefmodify!(r, add, 1, :sequentially_consistent, true) === Pair{TT,TT}(T(123_31), T(123_32))
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_32)
+    @test memoryrefswap!(r, T(123_1), :sequentially_consistent, true) === T(123_32)
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_1)
+    nothing
+end
+@noinline function test_memory_operators(T::Type)
+    x = convert(T, 123_10)
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    memoryrefset!(r, x, :unordered, true) # @atomic r[] = x
+    _test_memory_operators(Ref(r))
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    memoryrefset!(r, x, :unordered, true) # @atomic r[] = x
+    _test_memory_operators(Ref{Any}(r))
+    nothing
+end
+test_memory_operators(Int)
+test_memory_operators(Any)
+test_memory_operators(Union{Nothing,Int})
+test_memory_operators(Complex{Int32})
+test_memory_operators(Complex{Int128})
+test_memory_operators(Complex{Real})
+test_memory_operators(PadIntA)
+test_memory_operators(PadIntB)
+#FIXME: test_memory_operators(Int24)
+test_memory_operators(Float64)
+
+@noinline function _test_memory_orderings(xr, yr, x, y)
+    @nospecialize x y
+    xr = xr[]
+    yr = yr[]
+    TT = eltype(yr)
+    @test TT == eltype(xr)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(xr, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefget: atomic memory cannot be accessed non-atomically") memoryrefget(xr, :not_atomic, true)
+    @test memoryrefget(xr, :unordered, true) === x
+    @test memoryrefget(xr, :monotonic, true) === x
+    @test memoryrefget(xr, :acquire, true) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(xr, :release, true) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(xr, :acquire_release, true) === x
+    @test memoryrefget(xr, :sequentially_consistent, true) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(xr, :u, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: atomic memory cannot be accessed non-atomically") memoryref_isassigned(xr, :not_atomic, true)
+    @test memoryref_isassigned(xr, :unordered, true)
+    @test memoryref_isassigned(xr, :monotonic, true)
+    @test memoryref_isassigned(xr, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(xr, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(xr, :acquire_release, true)
+    @test memoryref_isassigned(xr, :sequentially_consistent, true)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(yr, :u, true)
+    @test memoryrefget(yr, :not_atomic, true) === y
+    @test_throws ConcurrencyViolationError("memoryrefget: non-atomic memory cannot be accessed atomically") memoryrefget(yr, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryrefget: non-atomic memory cannot be accessed atomically") memoryrefget(yr, :monotonic, true)
+    @test_throws ConcurrencyViolationError("memoryrefget: non-atomic memory cannot be accessed atomically") memoryrefget(yr, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(yr, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(yr, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryrefget: non-atomic memory cannot be accessed atomically") memoryrefget(yr, :sequentially_consistent, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(yr, :u, true)
+    @test memoryref_isassigned(yr, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: non-atomic memory cannot be accessed atomically") memoryref_isassigned(yr, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: non-atomic memory cannot be accessed atomically") memoryref_isassigned(yr, :monotonic, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: non-atomic memory cannot be accessed atomically") memoryref_isassigned(yr, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(yr, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(yr, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: non-atomic memory cannot be accessed atomically") memoryref_isassigned(yr, :sequentially_consistent, true)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(xr, y, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: atomic memory cannot be written non-atomically") memoryrefset!(xr, y, :not_atomic, true)
+    @test memoryrefget(xr, :unordered, true) === x
+    @test memoryrefset!(xr, y, :unordered, true) === y
+    @test memoryrefset!(xr, y, :monotonic, true) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(xr, y, :acquire, true) === y
+    @test memoryrefset!(xr, y, :release, true) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(xr, y, :acquire_release, true) === y
+    @test memoryrefset!(xr, y, :sequentially_consistent, true) === y
+    @test memoryrefget(xr, :unordered, true) === y
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(yr, x, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: non-atomic memory cannot be written atomically") memoryrefset!(yr, x, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: non-atomic memory cannot be written atomically") memoryrefset!(yr, x, :monotonic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(yr, x, :acquire, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: non-atomic memory cannot be written atomically") memoryrefset!(yr, x, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(yr, x, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: non-atomic memory cannot be written atomically") memoryrefset!(yr, x, :sequentially_consistent, true)
+    @test memoryrefget(yr, :not_atomic, true) === y
+    @test memoryrefset!(yr, x, :not_atomic, true) === x
+    @test memoryrefset!(yr, x, :not_atomic, true) === x
+    @test memoryrefget(yr, :not_atomic, true) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefswap!(yr, y, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefswap!(yr, y, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :monotonic, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :acquire, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :release, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :sequentially_consistent, true)
+    @test memoryrefswap!(yr, y, :not_atomic, true) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefmodify!(yr, swap, y, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefmodify!(yr, swap, y, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :monotonic, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :acquire, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :release, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :sequentially_consistent, true)
+    @test memoryrefmodify!(yr, swap, x, :not_atomic, true) === Pair{TT,TT}(y, x)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :u, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :unordered, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :monotonic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :acquire, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :acquire_release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :sequentially_consistent, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :unordered, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :monotonic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :sequentially_consistent, true)
+    @test memoryrefreplace!(yr, x, y, :not_atomic, :not_atomic, true) === ReplaceType{TT}((x, true))
+    @test memoryrefreplace!(yr, x, y, :not_atomic, :not_atomic, true) === ReplaceType{TT}((y, x === y))
+    @test memoryrefreplace!(yr, y, y, :not_atomic, :not_atomic, true) === ReplaceType{TT}((y, true))
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefswap!(xr, x, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: atomic memory cannot be written non-atomically") memoryrefswap!(xr, x, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefswap!(xr, x, :unordered, true) === y
+    @test memoryrefswap!(xr, x, :monotonic, true) === y
+    @test memoryrefswap!(xr, x, :acquire, true) === x
+    @test memoryrefswap!(xr, x, :release, true) === x
+    @test memoryrefswap!(xr, x, :acquire_release, true) === x
+    @test memoryrefswap!(xr, x, :sequentially_consistent, true) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefmodify!(xr, swap, x, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: atomic memory cannot be written non-atomically") memoryrefmodify!(xr, swap, x, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefmodify!(xr, swap, x, :unordered, true)
+    @test memoryrefmodify!(xr, swap, x, :monotonic, true) === Pair{TT,TT}(x, x)
+    @test memoryrefmodify!(xr, swap, x, :acquire, true) === Pair{TT,TT}(x, x)
+    @test memoryrefmodify!(xr, swap, x, :release, true) === Pair{TT,TT}(x, x)
+    @test memoryrefmodify!(xr, swap, x, :acquire_release, true) === Pair{TT,TT}(x, x)
+    @test memoryrefmodify!(xr, swap, x, :sequentially_consistent, true) === Pair{TT,TT}(x, x)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :u, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be written non-atomically") memoryrefreplace!(xr, y, x, :not_atomic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :unordered, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :monotonic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :acquire, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :acquire_release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :sequentially_consistent, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :unordered, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :monotonic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :sequentially_consistent, true)
+    @test memoryrefreplace!(xr, x, y, :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((x, true))
+    @test memoryrefreplace!(xr, x, y, :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((y, x === y))
+    @test memoryrefreplace!(xr, y, x, :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((y, true))
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :u, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be written non-atomically") memoryrefsetonce!(xr, y, :not_atomic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr,  x, :unordered, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :monotonic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :acquire, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :acquire_release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :sequentially_consistent, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :unordered, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :monotonic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :sequentially_consistent, true)
+    @test memoryrefsetonce!(xr, y, :sequentially_consistent, :sequentially_consistent, true) === false
+    @test memoryrefsetonce!(xr, y, :sequentially_consistent, :sequentially_consistent, true) === false
+    @test memoryrefsetonce!(xr, x, :sequentially_consistent, :sequentially_consistent, true) === false
+    nothing
+end
+@noinline function test_memory_orderings(T::Type, x, y)
+    @nospecialize
+    xr = GenericMemoryRef(AtomicMemory{T}(undef, 2), 2)
+    memoryrefset!(xr, x, :unordered, true) # @atomic xr[2] = x
+    yr = GenericMemoryRef(Memory{T}(undef, 2), 2)
+    yr[] = y
+    GC.gc(false)
+    _test_memory_orderings(Ref(xr), Ref(yr), x, y)
+    xr = GenericMemoryRef(AtomicMemory{T}(undef, 2), 2)
+    memoryrefset!(xr, x, :unordered, true) # @atomic xr[2] = x
+    yr = GenericMemoryRef(Memory{T}(undef, 2), 2)
+    yr[] = y
+    GC.gc(false)
+    _test_memory_orderings(Ref{Any}(xr), Ref{Any}(yr), x, y)
+    nothing
+end
+@noinline test_memory_orderings(x, y) = (@nospecialize; test_memory_orderings(typeof(x), x, y))
+test_memory_orderings(10, 20)
+test_memory_orderings(true, false)
+test_memory_orderings("hi", "bye")
+test_memory_orderings(:hi, :bye)
+test_memory_orderings(nothing, nothing)
+test_memory_orderings(Any, 123_10, 123_20)
+test_memory_orderings(Any, true, false)
+test_memory_orderings(Union{Nothing,Missing}, nothing, missing)
+test_memory_orderings(Union{Nothing,Int}, nothing, 123_1)
+test_memory_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_memory_orderings(Complex{Real}(10.5, 30.5), Complex{Real}(20.5, 40.5))
+test_memory_orderings(Pair{NTuple{3,Float64},NTuple{3,Real}}((10.5,11.5,12.5), (30.5,40.5,50.5)), Pair{NTuple{3,Float64},NTuple{3,Real}}((110.5,111.5,112.5), (130.5,140.5,150.5)))
+test_memory_orderings(10.0, 20.0)
+test_memory_orderings(NaN, Inf)
+
+@noinline function _test_memory_undef(r)
+    r = r[]
+    TT = eltype(r)
+    x = convert(TT, 12345_10)
+    @test_throws UndefRefError memoryrefget(r, :sequentially_consistent, true)
+    @test_throws UndefRefError memoryrefmodify!(r, add, 1, :sequentially_consistent, true)
+    @test_throws (TT === Any ? UndefRefError : TypeError) memoryrefreplace!(r, 1, 1.0, :sequentially_consistent, :sequentially_consistent, true)
+    @test_throws UndefRefError memoryrefreplace!(r, 1, x, :sequentially_consistent, :sequentially_consistent, true)
+    @test_throws UndefRefError memoryrefget(r, :sequentially_consistent, true)
+    @test_throws UndefRefError memoryrefswap!(r, x, :sequentially_consistent, true)
+    @test memoryrefget(r, :sequentially_consistent, true) === x
+    nothing
+end
+@noinline function test_memory_undef(T)
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    _test_memory_undef(Ref(r))
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    _test_memory_undef(Ref{Any}(r))
+    nothing
+end
+test_memory_undef(BigInt)
+test_memory_undef(Any)
+test_memory_undef(Union{Nothing,Integer})
+test_memory_undef(UndefComplex{Any})
+test_memory_undef(UndefComplex{UndefComplex{Any}})
+
+@noinline function _test_once_undef(r)
+    r = r[]
+    TT = eltype(r)
+    x = convert(TT, 12345_10)
+    @test_throws UndefRefError memoryrefget(r, :sequentially_consistent, true)
+    @test memoryrefsetonce!(r, x, :sequentially_consistent, :sequentially_consistent, true) === true
+    @test memoryrefget(r, :sequentially_consistent, true) === x
+    @test memoryrefsetonce!(r, convert(TT, 12345_20), :sequentially_consistent, :sequentially_consistent, true) === false
+    nothing
+end
+@noinline function test_once_undef(T)
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    _test_once_undef(Ref(r))
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    _test_once_undef(Ref{Any}(r))
+    nothing
+end
+test_once_undef(BigInt)
+test_once_undef(Any)
+test_once_undef(Union{Nothing,Integer})
+test_once_undef(UndefComplex{Any})
+test_once_undef(UndefComplex{UndefComplex{Any}})
+
+mutable struct Atomic57190
+    @atomic x::Int
+end
+
+
+function add_one57190!()
+    @atomic (Atomic57190(0).x) += 1
+end
+
+@test add_one57190!() == 1
diff --git a/test/backtrace.jl b/test/backtrace.jl
index 50a50100488c4..5acb754834a84 100644
--- a/test/backtrace.jl
+++ b/test/backtrace.jl
@@ -202,6 +202,15 @@ let trace = try
     end
     @test trace[1].func === Symbol("top-level scope")
 end
+let trace = try
+        eval(Expr(:toplevel, LineNumberNode(3, :a_filename), Expr(:error, 1)))
+    catch
+        stacktrace(catch_backtrace())
+    end
+    @test trace[1].func === Symbol("top-level scope")
+    @test trace[1].file === :a_filename
+    @test trace[1].line == 3
+end
 let trace = try
         include_string(@__MODULE__,
             """
@@ -228,7 +237,7 @@ let trace = try
     end
     @test trace[1].func === Symbol("top-level scope")
     @test trace[1].file === :a_filename
-    @test trace[1].line == 3
+    @test trace[1].line in (2, 3)
 end
 
 # issue #45171
@@ -253,10 +262,14 @@ let code = """
                   if ip isa Base.InterpreterIP && ip.code isa Core.MethodInstance]
     num_fs = sum(meth_names .== :f29695)
     num_gs = sum(meth_names .== :g29695)
-    print(num_fs, ' ', num_gs)
+    if num_fs != 1000 || num_gs != 1000
+        Base.show_backtrace(stderr, bt)
+        error("Expected 1000 frames each, got \$num_fs, \$num_fs")
+    end
+    exit()
     """
 
-    @test read(`$(Base.julia_cmd()) --startup-file=no --compile=min -e $code`, String) == "1000 1000"
+    @test success(pipeline(`$(Base.julia_cmd()) --startup-file=no --compile=min -e $code`; stderr))
 end
 
 # Test that modules make it into InterpreterIP for top-level code
@@ -363,3 +376,7 @@ end
     @test sp[1] < ptr1
     @test all(diff(Int128.(UInt.(sp))) .> 0)
 end
+
+@testset "`lookup` return type inference" begin
+    @test Vector{StackTraces.StackFrame} === Base.infer_return_type(lookup)
+end
diff --git a/test/bitarray.jl b/test/bitarray.jl
index 5d0bff62ab6e1..fd5c1421a256f 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -1,7 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Base: findprevnot, findnextnot
-using Random, LinearAlgebra, Test
+using Random, Test, LinearAlgebra # Ideally, these tests should not depend on LinearAlgebra
+
+isdefined(Main, :SizedArrays) || @eval Main include("testhelpers/SizedArrays.jl")
+using .Main.SizedArrays
 
 tc(r1::NTuple{N,Any}, r2::NTuple{N,Any}) where {N} = all(x->tc(x...), [zip(r1,r2)...])
 tc(r1::BitArray{N}, r2::Union{BitArray{N},Array{Bool,N}}) where {N} = true
@@ -12,7 +15,6 @@ tc(r1,r2) = false
 
 bitcheck(b::BitArray) = Test._check_bitarray_consistency(b)
 bitcheck(x) = true
-bcast_setindex!(b, x, I...) = (b[I...] .= x; b)
 
 function check_bitop_call(ret_type, func, args...; kwargs...)
     r2 = func(map(x->(isa(x, BitArray) ? Array(x) : x), args)...; kwargs...)
@@ -31,6 +33,9 @@ macro check_bit_operation(ex)
     Expr(:call, :check_bitop_call, nothing, map(esc, ex.args)...)
 end
 
+bcast_setindex!(b, x, I...) = (b[I...] .= x; b)
+
+
 let t0 = time_ns()
     global timesofar
     function timesofar(str)
@@ -82,6 +87,25 @@ allsizes = [((), BitArray{0}), ((v1,), BitVector),
     @test !isassigned(b, length(b) + 1)
 end
 
+@testset "trues and falses with custom axes" begin
+    for ax in ((SizedArrays.SOneTo(2),), (SizedArrays.SOneTo(2), Base.OneTo(2)))
+        t = trues(ax)
+        if all(x -> x isa SizedArrays.SOneTo, ax)
+            @test t isa SizedArrays.SizedArray && parent(t) isa BitArray
+        else
+            @test t isa BitArray
+        end
+        @test all(t)
+
+        f = falses(ax)
+        if all(x -> x isa SizedArrays.SOneTo, ax)
+            @test t isa SizedArrays.SizedArray && parent(t) isa BitArray
+        else
+            @test t isa BitArray
+        end
+        @test !any(f)
+    end
+end
 
 @testset "Conversions for size $sz" for (sz, T) in allsizes
     b1 = rand!(falses(sz...))
@@ -1335,11 +1359,11 @@ timesofar("find")
     @test findprev(b1, 777)  == findprevnot(b2, 777)  == findprev(!, b2, 777)  == 777
     @test findprev(b1, 776)  == findprevnot(b2, 776)  == findprev(!, b2, 776)  == 77
     @test findprev(b1, 77)   == findprevnot(b2, 77)   == findprev(!, b2, 77)   == 77
-    @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   == nothing
-    @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   == nothing
-    @test findprev(identity, b1, -1) == nothing
-    @test findprev(Returns(false), b1, -1) == nothing
-    @test findprev(Returns(true), b1, -1) == nothing
+    @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   === nothing
+    @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   === nothing
+    @test findprev(identity, b1, -1) === nothing
+    @test findprev(Returns(false), b1, -1) === nothing
+    @test findprev(Returns(true), b1, -1) === nothing
     @test_throws BoundsError findnext(b1, -1)
     @test_throws BoundsError findnextnot(b2, -1)
     @test_throws BoundsError findnext(!, b2, -1)
@@ -1350,28 +1374,28 @@ timesofar("find")
     @test findnext(b1, 77)   == findnextnot(b2, 77)   == findnext(!, b2, 77)   == 77
     @test findnext(b1, 78)   == findnextnot(b2, 78)   == findnext(!, b2, 78)   == 777
     @test findnext(b1, 777)  == findnextnot(b2, 777)  == findnext(!, b2, 777)  == 777
-    @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  == nothing
-    @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) == nothing
-    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) == nothing
+    @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  === nothing
+    @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) === nothing
+    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) === nothing
 
     @test findlast(b1) == Base.findlastnot(b2) == 777
     @test findfirst(b1) == Base.findfirstnot(b2) == 77
 
     b0 = BitVector()
-    @test findprev(Returns(true), b0, -1) == nothing
+    @test findprev(Returns(true), b0, -1) === nothing
     @test_throws BoundsError findprev(Returns(true), b0, 1)
     @test_throws BoundsError findnext(Returns(true), b0, -1)
-    @test findnext(Returns(true), b0, 1) == nothing
+    @test findnext(Returns(true), b0, 1) === nothing
 
     b1 = falses(10)
     @test findprev(Returns(true), b1, 5) == 5
     @test findnext(Returns(true), b1, 5) == 5
-    @test findprev(Returns(true), b1, -1) == nothing
-    @test findnext(Returns(true), b1, 11) == nothing
-    @test findprev(Returns(false), b1, 5) == nothing
-    @test findnext(Returns(false), b1, 5) == nothing
-    @test findprev(Returns(false), b1, -1) == nothing
-    @test findnext(Returns(false), b1, 11) == nothing
+    @test findprev(Returns(true), b1, -1) === nothing
+    @test findnext(Returns(true), b1, 11) === nothing
+    @test findprev(Returns(false), b1, 5) === nothing
+    @test findnext(Returns(false), b1, 5) === nothing
+    @test findprev(Returns(false), b1, -1) === nothing
+    @test findnext(Returns(false), b1, 11) === nothing
     @test_throws BoundsError findprev(Returns(true), b1, 11)
     @test_throws BoundsError findnext(Returns(true), b1, -1)
 
@@ -1393,7 +1417,7 @@ timesofar("find")
     for l = [1, 63, 64, 65, 127, 128, 129]
         f = falses(l)
         t = trues(l)
-        @test findprev(f, l) == findprevnot(t, l) == nothing
+        @test findprev(f, l) == findprevnot(t, l) === nothing
         @test findprev(t, l) == findprevnot(f, l) == l
         b1 = falses(l)
         b1[end] = true
@@ -1539,6 +1563,21 @@ timesofar("reductions")
             end
         end
     end
+    @testset "Issue #50780, map! bitarray map! where dest aliases source" begin
+        a = BitVector([1,0])
+        b = map(!, a)
+        map!(!, a, a) # a .= !.a
+        @test a == b == BitVector([0,1])
+
+        a = BitVector([1,0])
+        c = map(|, a, b)
+        map!(|, a, a, b)
+        @test c == a == BitVector([1, 1])
+
+        a = BitVector([1,0])
+        map!(|, b, a, b)
+        @test c == b == BitVector([1, 1])
+    end
 end
 
 ## Filter ##
@@ -1604,69 +1643,6 @@ end
 
 timesofar("cat")
 
-@testset "Linear algebra" begin
-    b1 = bitrand(v1)
-    b2 = bitrand(v1)
-    @check_bit_operation dot(b1, b2) Int
-
-    b1 = bitrand(n1, n2)
-    @test_throws ArgumentError tril(b1, -n1 - 2)
-    @test_throws ArgumentError tril(b1, n2)
-    @test_throws ArgumentError triu(b1, -n1)
-    @test_throws ArgumentError triu(b1, n2 + 2)
-    for k in (-n1 - 1):(n2 - 1)
-        @check_bit_operation tril(b1, k) BitMatrix
-    end
-    for k in (-n1 + 1):(n2 + 1)
-        @check_bit_operation triu(b1, k) BitMatrix
-    end
-
-    for sz = [(n1,n1), (n1,n2), (n2,n1)], (f,isf) = [(tril,istril), (triu,istriu)]
-        b1 = bitrand(sz...)
-        @check_bit_operation isf(b1) Bool
-        b1 = f(bitrand(sz...))
-        @check_bit_operation isf(b1) Bool
-    end
-
-    b1 = bitrand(n1,n1)
-    b1 .|= copy(b1')
-    @check_bit_operation issymmetric(b1) Bool
-    @check_bit_operation ishermitian(b1) Bool
-
-    b1 = bitrand(n1)
-    b2 = bitrand(n2)
-    @check_bit_operation kron(b1, b2) BitVector
-
-    b1 = bitrand(s1, s2)
-    b2 = bitrand(s3, s4)
-    @check_bit_operation kron(b1, b2) BitMatrix
-
-    b1 = bitrand(v1)
-    @check_bit_operation diff(b1) Vector{Int}
-
-    b1 = bitrand(n1, n2)
-    @check_bit_operation diff(b1, dims=1) Matrix{Int}
-    @check_bit_operation diff(b1, dims=2) Matrix{Int}
-
-    b1 = bitrand(n1, n1)
-    @test ((svdb1, svdb1A) = (svd(b1), svd(Array(b1)));
-            svdb1.U == svdb1A.U && svdb1.S == svdb1A.S && svdb1.V == svdb1A.V)
-    @test ((qrb1, qrb1A) = (qr(b1), qr(Array(b1)));
-            Matrix(qrb1.Q) == Matrix(qrb1A.Q) && qrb1.R == qrb1A.R)
-
-    b1 = bitrand(v1)
-    @check_bit_operation diagm(0 => b1) BitMatrix
-
-    b1 = bitrand(v1)
-    b2 = bitrand(v1)
-    @check_bit_operation diagm(-1 => b1, 1 => b2) BitMatrix
-
-    b1 = bitrand(n1, n1)
-    @check_bit_operation diag(b1)
-end
-
-timesofar("linalg")
-
 @testset "findmax, findmin" begin
     b1 = trues(0)
     @test_throws ArgumentError findmax(b1)
diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl
index f2eb2ea630893..f81cd2dc9e39a 100644
--- a/test/boundscheck_exec.jl
+++ b/test/boundscheck_exec.jl
@@ -239,6 +239,13 @@ if bc_opt != bc_off
     @test_throws BoundsError BadVector20469([1,2,3])[:]
 end
 
+# Accumulate: do not set inbounds context for user-supplied functions
+if bc_opt != bc_off
+    Base.@propagate_inbounds op58200(a, b) = (1, 2)[a] + (1, 2)[b]
+    @test_throws BoundsError accumulate(op58200, 1:10)
+    @test_throws BoundsError Base.accumulate_pairwise(op58200, 1:10)
+end
+
 # Ensure iteration over arrays is vectorizable
 function g27079(X)
     r = 0
@@ -252,10 +259,9 @@ end
 
 # Boundschecking removal of indices with different type, see #40281
 getindex_40281(v, a, b, c) = @inbounds getindex(v, a, b, c)
-typed_40281 = sprint((io, args...) -> code_warntype(io, args...; optimize=true), getindex_40281, Tuple{Array{Float64, 3}, Int, UInt8, Int})
+llvm_40281 = sprint((io, args...) -> code_llvm(io, args...; optimize=true), getindex_40281, Tuple{Array{Float64, 3}, Int, UInt8, Int})
 if bc_opt == bc_default || bc_opt == bc_off
-    @test occursin("arrayref(false", typed_40281)
-    @test !occursin("arrayref(true", typed_40281)
+    @test !occursin("call void @ijl_bounds_error_ints", llvm_40281)
 end
 
 # Given this is a sub-processed test file, not using @testsets avoids
@@ -298,4 +304,70 @@ end
     typeintersect(Int, Integer)
 end |> only === Type{Int}
 
+if bc_opt == bc_default
+    # Array/Memory escape analysis
+    function no_allocate(T::Type{<:Union{Memory}})
+        v = T(undef, 2)
+        v[1] = 2
+        v[2] = 3
+        return v[1] + v[2]
+    end
+    function test_alloc(::Type{T}; broken=false) where T
+        @test (@allocated no_allocate(T)) == 0 broken=broken
+    end
+    for T in [Memory] # This requires changing the pointer_from_objref to something llvm sees through
+        for ET in [Int, Float32, Union{Int, Float64}]
+            no_allocate(T{ET}) #compile
+            # allocations aren't removed for Union eltypes which they theoretically could be eventually
+            test_alloc(T{ET}, broken=(ET==Union{Int, Float64}))
+        end
+    end
+    function f() # this was causing a bug on an in progress version of #55913.
+        m = Memory{Float64}(undef, 4)
+        m .= 1.0
+        s = 0.0
+        for x ∈ m
+            s += x
+        end
+        s
+    end
+    @test f() === 4.0
+    function confuse_alias_analysis()
+       mem0 = Memory{Int}(undef, 1)
+       mem1 = Memory{Int}(undef, 1)
+       @inbounds mem0[1] = 3
+       for width in 1:2
+            @inbounds mem1[1] = mem0[1]
+            mem0 = mem1
+       end
+       mem0[1]
+    end
+    @test confuse_alias_analysis() == 3
+    @test (@allocated confuse_alias_analysis()) == 0
+    function no_alias_prove(n)
+        m1 = Memory{Int}(undef,n)
+        m2 = Memory{Int}(undef,n)
+        m1 === m2
+    end
+    no_alias_prove5() = no_alias_prove(5)
+    no_alias_prove5()
+    @test (@allocated no_alias_prove5()) == 0
+end
+
+@testset "automatic boundscheck elision for iteration on some important types" begin
+    if bc_opt != bc_on
+        @test !contains(sprint(code_llvm, iterate, (Memory{UInt8}, Int)), "unreachable")
+
+        @test !contains(sprint(code_llvm, iterate, (Vector{UInt8}, Int)), "unreachable")
+        @test !contains(sprint(code_llvm, iterate, (Matrix{UInt8}, Int)), "unreachable")
+        @test !contains(sprint(code_llvm, iterate, (Array{UInt8,3}, Int)), "unreachable")
+
+        @test !contains(sprint(code_llvm, iterate, (SubArray{Float64, 1, Vector{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}}, true}, Int)), "unreachable")
+        @test !contains(sprint(code_llvm, iterate, (SubArray{Float64, 2, Matrix{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, true}, Int)), "unreachable")
+        @test !contains(sprint(code_llvm, iterate, (SubArray{Float64, 2, Matrix{Float64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}, Int)), "unreachable")
+
+        @test !contains(sprint(code_llvm, iterate, (Base.CodeUnits{UInt8,String}, Int)), "unreachable")
+    end
+end
+
 end
diff --git a/test/broadcast.jl b/test/broadcast.jl
index 87858dd0f08fc..7896cb0abdb8c 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -49,10 +49,11 @@ ci(x) = CartesianIndex(x)
 @test @inferred(newindex(ci((2,2)), (true, false), (-1,-1)))  == ci((2,-1))
 @test @inferred(newindex(ci((2,2)), (false, true), (-1,-1)))  == ci((-1,2))
 @test @inferred(newindex(ci((2,2)), (false, false), (-1,-1))) == ci((-1,-1))
-@test @inferred(newindex(ci((2,2)), (true,), (-1,-1)))   == ci((2,))
-@test @inferred(newindex(ci((2,2)), (true,), (-1,)))   == ci((2,))
-@test @inferred(newindex(ci((2,2)), (false,), (-1,))) == ci((-1,))
+@test @inferred(newindex(ci((2,2)), (true,), (-1,-1))) == 2
+@test @inferred(newindex(ci((2,2)), (true,), (-1,)))   == 2
+@test @inferred(newindex(ci((2,2)), (false,), (-1,)))  == -1
 @test @inferred(newindex(ci((2,2)), (), ())) == ci(())
+@test @inferred(newindex(ci((2,)), (true, false, false), (-1, -1, -1))) == ci((2, -1))
 
 end
 
@@ -592,6 +593,16 @@ end
     end
 end
 
+@testset "convert behavior of logical broadcast" begin
+    a = mod.(1:4, 2)
+    @test !isa(a, BitArray)
+    for T in (Array{Bool}, BitArray)
+        la = T(a)
+        la .= mod.(0:3, 2)
+        @test la == [false; true; false; true]
+    end
+end
+
 # Test that broadcast treats type arguments as scalars, i.e. containertype yields Any,
 # even for subtypes of abstract array. (https://github.com/JuliaStats/DataArrays.jl/issues/229)
 @testset "treat type arguments as scalars, DataArrays issue 229" begin
@@ -774,19 +785,32 @@ let X = zeros(2, 3)
 end
 
 # issue #27988: inference of Broadcast.flatten
-using .Broadcast: Broadcasted
+using .Broadcast: Broadcasted, cat_nested
 let
     bc = Broadcasted(+, (Broadcasted(*, (1, 2)), Broadcasted(*, (Broadcasted(*, (3, 4)), 5))))
-    @test @inferred(Broadcast.cat_nested(bc)) == (1,2,3,4,5)
+    @test @inferred(cat_nested(bc)) == (1,2,3,4,5)
     @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == @inferred(Broadcast.materialize(bc)) == 62
     bc = Broadcasted(+, (Broadcasted(*, (1, Broadcasted(/, (2.0, 2.5)))), Broadcasted(*, (Broadcasted(*, (3, 4)), 5))))
-    @test @inferred(Broadcast.cat_nested(bc)) == (1,2.0,2.5,3,4,5)
+    @test @inferred(cat_nested(bc)) == (1,2.0,2.5,3,4,5)
     @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == @inferred(Broadcast.materialize(bc)) == 60.8
+    # 1 .* 1 .- 1 .* 1 .^2 .+ 1 .* 1 .+ 1 .^ 3
+    bc = Broadcasted(+, (Broadcasted(+, (Broadcasted(-, (Broadcasted(*, (1, 1)), Broadcasted(*, (1, Broadcasted(Base.literal_pow, (Ref(^), 1, Ref(Val(2)))))))), Broadcasted(*, (1, 1)))), Broadcasted(Base.literal_pow, (Base.RefValue{typeof(^)}(^), 1, Base.RefValue{Val{3}}(Val{3}())))))
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == @inferred(Broadcast.materialize(bc)) == 2
+    # @. 1 + 1 * (1 + 1 + 1 + 1)
+    bc = Broadcasted(+, (1, Broadcasted(*, (1, Broadcasted(+, (1, 1, 1, 1))))))
+    @test @inferred(cat_nested(bc)) == (1, 1, 1, 1, 1, 1) # `cat_nested` failed to infer this
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == Broadcast.materialize(bc)
+    # @. 1 + (1 + 1) + 1 + (1 + 1) + 1 + (1 + 1) + 1
+    bc = Broadcasted(+, (1, Broadcasted(+, (1, 1)), 1, Broadcasted(+, (1, 1)), 1, Broadcasted(+, (1, 1)), 1))
+    @test @inferred(cat_nested(bc)) == (1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == Broadcast.materialize(bc)
+    bc = Broadcasted(Float32, (Broadcasted(+, (1, 1)),))
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == Broadcast.materialize(bc)
 end
 
 let
-  bc = Broadcasted(+, (Broadcasted(*, ([1, 2, 3], 4)), 5))
-  @test isbits(Broadcast.flatten(bc).f)
+    bc = Broadcasted(+, (Broadcasted(*, ([1, 2, 3], 4)), 5))
+    @test isbits(Broadcast.flatten(bc).f)
 end
 
 # Issue #26127: multiple splats in a fused dot-expression
@@ -830,29 +854,59 @@ let
     @test Dict(c .=> d) == Dict("foo" => 1, "bar" => 2)
 end
 
-# Broadcasted iterable/indexable APIs
-let
-    bc = Broadcast.instantiate(Broadcast.broadcasted(+, zeros(5), 5))
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+@testset "Broadcasted iterable/indexable APIs" begin
+    for f in (identity, x -> OffsetArray(x, ntuple(Returns(-1), ndims(x))))
+        a = f(zeros(5))
+        bc = Broadcast.instantiate(Broadcast.broadcasted(+, a, 5))
+        @test IndexStyle(bc) == IndexLinear()
+        @test eachindex(bc) === eachindex(a)
+        @test length(bc) === 5
+        @test ndims(bc) === 1
+        @test ndims(typeof(bc)) === 1
+        @test bc[1] === bc[CartesianIndex((1,))] === 5.0
+        @test copy(bc) == [v for v in bc] == collect(bc)
+        @test eltype(copy(bc)) == eltype([v for v in bc]) == eltype(collect(bc))
+        @test ndims(copy(bc)) == ndims([v for v in bc]) == ndims(collect(bc)) == ndims(bc)
+
+        b = f(5*ones(1, 4))
+        bc = Broadcast.instantiate(Broadcast.broadcasted(+, a, b))
+        @test IndexStyle(bc) == IndexCartesian()
+        @test eachindex(bc) === CartesianIndices((axes(a, 1), axes(b, 2)))
+        @test length(bc) === 20
+        @test ndims(bc) === 2
+        @test ndims(typeof(bc)) === 2
+        @test bc[1,1] == bc[CartesianIndex((1,1))] === 5.0
+        @test copy(bc) == [v for v in bc] == collect(bc)
+        @test eltype(copy(bc)) == eltype([v for v in bc]) == eltype(collect(bc))
+        @test ndims(copy(bc)) == ndims([v for v in bc]) == ndims(collect(bc)) == ndims(bc)
+    end
+
+    struct MyFill{T,N} <: AbstractArray{T,N}
+        val :: T
+        sz :: NTuple{N,Int}
+    end
+    Base.size(M::MyFill) = M.sz
+    function Base.getindex(M::MyFill{<:Any,N}, i::Vararg{Int, N}) where {N}
+        checkbounds(M, i...)
+        M.val
+    end
+    Base.IndexStyle(::Type{<:Base.Broadcast.Broadcasted{<:Any,<:Any,<:Any,<:Tuple{MyFill}}}) = IndexLinear()
+    bc = Broadcast.instantiate(Broadcast.broadcasted(+, MyFill(2, (3,3))))
     @test IndexStyle(bc) == IndexLinear()
-    @test eachindex(bc) === Base.OneTo(5)
-    @test length(bc) === 5
-    @test ndims(bc) === 1
-    @test ndims(typeof(bc)) === 1
-    @test bc[1] === bc[CartesianIndex((1,))] === 5.0
-    @test copy(bc) == [v for v in bc] == collect(bc)
-    @test eltype(copy(bc)) == eltype([v for v in bc]) == eltype(collect(bc))
-    @test ndims(copy(bc)) == ndims([v for v in bc]) == ndims(collect(bc)) == ndims(bc)
-
-    bc = Broadcast.instantiate(Broadcast.broadcasted(+, zeros(5), 5*ones(1, 4)))
-    @test IndexStyle(bc) == IndexCartesian()
-    @test eachindex(bc) === CartesianIndices((Base.OneTo(5), Base.OneTo(4)))
-    @test length(bc) === 20
-    @test ndims(bc) === 2
-    @test ndims(typeof(bc)) === 2
-    @test bc[1,1] == bc[CartesianIndex((1,1))] === 5.0
-    @test copy(bc) == [v for v in bc] == collect(bc)
-    @test eltype(copy(bc)) == eltype([v for v in bc]) == eltype(collect(bc))
-    @test ndims(copy(bc)) == ndims([v for v in bc]) == ndims(collect(bc)) == ndims(bc)
+    @test eachindex(bc) === Base.OneTo(9)
+    @test bc[2] == bc[CartesianIndex(2,1)]
+
+    for bc in Any[
+                Broadcast.broadcasted(+, collect(reshape(1:9, 3, 3)), 1:3), # IndexCartesian
+                Broadcast.broadcasted(+, [1,2], 2), # IndexLinear
+            ]
+        bci = Broadcast.instantiate(bc)
+        for (Ilin, Icart) in zip(eachindex(IndexLinear(), bc), eachindex(IndexCartesian(), bc))
+            @test bc[Ilin] == bc[Icart]
+        end
+    end
 end
 
 # issue 43847: collect preserves shape of broadcasted
@@ -882,11 +936,16 @@ let
 
     @test @inferred(Base.IteratorSize(Broadcast.broadcasted(+, (1,2,3), a1, zeros(3,3,3)))) === Base.HasShape{3}()
 
+    @test @inferred(Base.IteratorSize(Base.broadcasted(randn))) === Base.HasShape{0}()
+
+    @test @inferred(Base.IteratorSize(convert(Broadcast.Broadcasted{Nothing}, Base.broadcasted(randn)))) === Base.HasShape{0}()
+
     # inference on nested
-    bc = Base.broadcasted(+, AD1(randn(3)), AD1(randn(3)))
-    bc_nest = Base.broadcasted(+, bc , bc)
-    @test @inferred(Base.IteratorSize(bc_nest)) === Base.HasShape{1}()
- end
+    bc = Base.broadcasted(+, AD1(randn(3)), AD1(randn(3)), AD1(randn(3)))
+    bc_nest = Base.broadcasted(*, bc, bc, bc, bc, AD1(randn(3)))
+    bc_nest2 = Base.broadcasted(-, bc_nest, bc_nest)
+    @test @inferred(Base.IteratorSize(bc_nest2)) === Base.HasShape{1}()
+end
 
 # issue #31295
 let a = rand(5), b = rand(5), c = copy(a)
@@ -956,6 +1015,10 @@ end
     @test sum(bc, dims=1, init=0) == [5]
     bc = Broadcast.instantiate(Broadcast.broadcasted(*, ['a','b'], 'c'))
     @test prod(bc, dims=1, init="") == ["acbc"]
+
+    a = rand(-10:10,32,4); b = rand(-10:10,32,4)
+    bc = Broadcast.instantiate(Broadcast.broadcasted(+,a,b))
+    @test sum(bc; dims = 1, init = 0.0) == sum(collect(bc); dims = 1, init = 0.0)
 end
 
 # treat Pair as scalar:
@@ -1064,24 +1127,14 @@ end
 end
 
 @testset "inplace broadcast with trailing singleton dims" begin
-    for (a, b, c) in (([1, 2], reshape([3 4], :, 1), reshape([5, 6], :, 1, 1)),
+    for (a_, b_, c_) in (([1, 2], reshape([3 4], :, 1), reshape([5, 6], :, 1, 1)),
             ([1 2; 3 4], reshape([5 6; 7 8], 2, 2, 1), reshape([9 10; 11 12], 2, 2, 1, 1)))
-
-        a_ = copy(a)
-        a_ .= b
-        @test a_ == dropdims(b, dims=(findall(==(1), size(b))...,))
-
-        a_ = copy(a)
-        a_ .= b
-        @test a_ == dropdims(b, dims=(findall(==(1), size(b))...,))
-
-        a_ = copy(a)
-        a_ .= b .+ c
-        @test a_ == dropdims(b .+ c, dims=(findall(==(1), size(c))...,))
-
-        a_ = copy(a)
-        a_ .*= c
-        @test a_ == dropdims(a .* c, dims=(findall(==(1), size(c))...,))
+        for fun in (x -> OffsetArray(x, ntuple(Returns(1), ndims(x))), identity)
+            a, b, c = fun(a_), fun(b_), fun(c_)
+            @test (deepcopy(a) .= b) == dropdims(b, dims=(findall(==(1), size(b))...,))
+            @test (deepcopy(a) .= b .+ c) == dropdims(b .+ c, dims=(findall(==(1), size(c))...,))
+            @test (deepcopy(a) .*= c)  == dropdims(a .* c, dims=(findall(==(1), size(c))...,))
+        end
     end
 end
 
@@ -1129,7 +1182,45 @@ end
     @test CartesianIndex(1,2) .+ [CartesianIndex(3,4), CartesianIndex(5,6)] == [CartesianIndex(4, 6), CartesianIndex(6, 8)]
 end
 
+struct MyBroadcastStyleWithField <: Broadcast.BroadcastStyle
+    i::Int
+end
+# asymmetry intended
+Base.BroadcastStyle(a::MyBroadcastStyleWithField, b::MyBroadcastStyleWithField) = a
+
+@testset "issue #50937: styles that have fields" begin
+    @test Broadcast.result_style(MyBroadcastStyleWithField(1), MyBroadcastStyleWithField(1)) ==
+        MyBroadcastStyleWithField(1)
+    @test_throws ErrorException Broadcast.result_style(MyBroadcastStyleWithField(1),
+                                                       MyBroadcastStyleWithField(2))
+    dest = [0, 0]
+    dest .= Broadcast.Broadcasted(MyBroadcastStyleWithField(1), +, (1:2, 2:3))
+    @test dest == [3, 5]
+end
+
 # test that `Broadcast` definition is defined as total and eligible for concrete evaluation
 import Base.Broadcast: BroadcastStyle, DefaultArrayStyle
 @test Base.infer_effects(BroadcastStyle, (DefaultArrayStyle{1},DefaultArrayStyle{2},)) |>
     Core.Compiler.is_foldable
+
+f51129(v, x) = (1 .- (v ./ x) .^ 2)
+@test @inferred(f51129([13.0], 6.5)) == [-3.0]
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(Broadcast)
+    @test_broken isempty(undoc)
+    @test undoc == [:dotview]
+end
+
+@testset "broadcast for `AbstractArray` without `CartesianIndex` support" begin
+    struct BVec52775 <: AbstractVector{Int}
+        a::Vector{Int}
+    end
+    Base.size(a::BVec52775) = size(a.a)
+    Base.getindex(a::BVec52775, i::Real) = a.a[i]
+    Base.getindex(a::BVec52775, i) = error("unsupported index!")
+    a = BVec52775([1,2,3])
+    bc = Base.broadcasted(identity, a)
+    @test bc[1] == bc[CartesianIndex(1)] == bc[1, CartesianIndex()]
+    @test a .+ [1 2] == a.a .+ [1 2]
+end
diff --git a/test/buildkitetestjson.jl b/test/buildkitetestjson.jl
new file mode 100644
index 0000000000000..48a4144a3dc53
--- /dev/null
+++ b/test/buildkitetestjson.jl
@@ -0,0 +1,296 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Convert test(set) results to a Buildkite-compatible JSON representation.
+# Based on <https://buildkite.com/docs/test-analytics/importing-json#json-test-results-data-reference>.
+
+module BuildkiteTestJSON
+
+using Test
+using Dates
+using Serialization
+
+export serialize_testset_result_file, write_testset_json_files
+
+# Bootleg JSON writer
+
+"""
+    json_repr(io::IO, value; kwargs...) -> Nothing
+
+Obtain a JSON representation of `value`, and print it to `io`.
+
+This may not be the best, most feature-complete, or fastest implementation.
+However, it works for its intended purpose.
+"""
+function json_repr end
+
+function json_repr(io::IO, val::String; indent::Int=0)
+    print(io, '"')
+    escape_string(io, val, ('"',))
+    print(io, '"')
+end
+json_repr(io::IO, val::Integer; indent::Int=0) = print(io, val)
+json_repr(io::IO, val::Float64; indent::Int=0) = print(io, val)
+function json_repr(io::IO, val::AbstractVector; indent::Int=0)
+    print(io, '[')
+    for i in eachindex(val)
+        print(io, '\n', ' '^(indent + 2))
+        json_repr(io, val[i]; indent=indent + 2)
+        i == lastindex(val) || print(io, ',')
+    end
+    print(io, '\n', ' '^indent, ']')
+end
+function json_repr(io::IO, val::Dict; indent::Int=0)
+    print(io, '{')
+    len = length(val)
+    for (i, (k, v)) in enumerate(pairs(val))
+        print(io, '\n', ' '^(indent + 2))
+        json_repr(io, string(k))
+        print(io, ": ")
+        json_repr(io, v; indent=indent + 2)
+        i == len || print(io, ',')
+    end
+    print(io, '\n', ' '^indent, '}')
+end
+json_repr(io::IO, val::Any; indent::Int=0) = json_repr(io, string(val))
+
+# Test result processing
+
+function result_dict(testset::Test.DefaultTestSet, prefix::String="")
+    scope = if isempty(prefix)
+        testset.description == "Overall" ? "" : testset.description
+    else
+        join((prefix, testset.description), '/')
+    end
+    data = Dict{String,Any}(
+        "id" => Base.UUID(rand(UInt128)),
+        "scope" => scope,
+        "tags" => Dict{String,String}(
+            "os" => string(Sys.KERNEL),
+            "arch" => string(Sys.ARCH),
+            "julia_version" => string(VERSION),
+            "testset" => testset.description,
+            "job_group" => get(ENV, "BUILDKITE_GROUP_LABEL", "unknown"),
+            "job_label" => get(ENV, "BUILDKITE_LABEL", "unknown"),
+        ),
+        # note we drop some of this from common_data before merging into individual results
+        "history" => if !isnothing(testset.time_end)
+            Dict{String,Any}(
+                "start_at" => testset.time_start,
+                "end_at" => testset.time_end,
+                "duration" => testset.time_end - testset.time_start)
+        else
+            Dict{String,Any}("start_at" => testset.time_start, "duration" => 0.0)
+        end)
+    return data
+end
+
+# Test paths on runners are often in deep directories, so just make them contain enough information
+# to be able to identify the file. Also convert Windows-style paths to Unix-style paths so tests can
+# be grouped by file.
+const generalize_file_paths_cache = Dict{AbstractString,AbstractString}()
+const norm_sourcedir = normpath(Base.SOURCEDIR)
+const bindir_dir = dirname(Sys.BINDIR)
+const pathsep = Sys.iswindows() ? '\\' : '/'
+function generalize_file_paths(path::AbstractString)
+    return get!(generalize_file_paths_cache, path) do
+        path = replace(path,
+            Sys.STDLIB => "stdlib",
+            string(norm_sourcedir, pathsep) => "",
+            string(bindir_dir, pathsep) => ""
+        )
+        @static if Sys.iswindows()
+            path = replace(path, "\\" => "/")
+        end
+        return replace(path, "share/julia/" => "")
+    end
+end
+
+# raw_file_path,line => file,location
+const location_cache = Dict{Tuple{Symbol,Int},Tuple{String,String}}()
+function get_location(file::Symbol, line::Int)
+    return get!(location_cache, (file, line)) do
+        _file = generalize_file_paths(string(file))
+        _location = string(_file, ":", line)
+        return _file, _location
+    end
+end
+
+# passed, failed, skipped, or unknown
+function get_status(result)
+    if result isa Test.Pass && result.test_type === :skipped
+        "skipped"
+    elseif result isa Test.Broken
+        "skipped" # buildkite don't have a "broken" status
+    elseif result isa Test.Pass
+        "passed"
+    elseif result isa Test.Fail || result isa Test.Error
+        "failed"
+    else
+        "unknown"
+    end
+end
+
+# An attempt to reconstruct the test call.
+# Note we can't know if broken or skip was via the broken/skip macros or kwargs.
+const TEST_TYPE_MAP = Dict(
+    :test => "@test",
+    :test_nonbool => "@test",
+    :test_error => "@test",
+    :test_interrupted => "@test",
+    :test_unbroken => "@test_broken",
+    :skipped => "@test_skip",
+    :test_throws => "@test_throws",
+    :test_throws_wrong => "@test_throws",
+    :test_throws_nothing => "@test_throws"
+)
+function get_test_call_str(result)
+    if result.test_type === :nontest_error
+        return "Got exception outside of a @test"
+    end
+    prefix = get(TEST_TYPE_MAP, result.test_type, nothing)
+    prefix === nothing && return error("Unknown test type $(repr(result.test_type))")
+    return prefix == "@test_throws" ? "@test_throws $(result.data) $(result.orig_expr)" : "$prefix $(result.orig_expr)"
+end
+
+get_rid(rdata) = (rdata["location"], rdata["result"], haskey(rdata, "failure_expanded") ? hash(rdata["failure_expanded"]) : UInt64(0))
+
+const ResultCountDict = Dict{Tuple{String,String,UInt64},Int}
+
+function is_duplicate_pass(result::Test.Pass, location, status, result_counts::ResultCountDict)
+    rid = (location, status, UInt64(0))
+    count = get(result_counts, rid, nothing)
+    if count !== nothing
+        result_counts[rid] = count + 1
+        return true
+    end
+    return false
+end
+is_duplicate_pass(result::Test.Result, location, status, result_counts::ResultCountDict) = false
+
+function result_dict(result::Test.Result, result_counts::ResultCountDict)
+    file, line = if !hasproperty(result, :source) || isnothing(result.source)
+        :unknown, 0
+    else
+        something(result.source.file, :unknown), result.source.line
+    end
+    file, location = get_location(file, line)
+    status = get_status(result)
+
+    # Early exit for passed tests before more expensive operations
+    if is_duplicate_pass(result, location, status, result_counts)
+        return nothing
+    end
+
+    data = Dict{String,Any}(
+        "location" => location,
+        "result" => status,
+        "name" => get_test_call_str(result),
+        "file_name" => file)
+
+    if result isa Test.Fail || result isa Test.Error
+        job_label = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
+        result_show = sprint(show, result; context=:color => false)
+        firstline = split(result_show, '\n')[1]
+        # put the job label at the end here because of the way buildkite UI is laid out
+        data["failure_reason"] = generalize_file_paths(firstline) * " | $job_label"
+        err_trace = split(result_show, "\nStacktrace:\n", limit=2)
+        if length(err_trace) == 2
+            err, trace = err_trace
+            data["failure_expanded"] = [Dict{String,Any}("expanded" => split(err, '\n'), "backtrace" => split(trace, '\n'))]
+        else
+            data["failure_expanded"] = [Dict{String,Any}("expanded" => split(result_show, '\n'), "backtrace" => [])]
+        end
+    end
+
+    rid = get_rid(data)
+    duplicate = haskey(result_counts, rid)
+
+    if duplicate
+        result_counts[rid] += 1
+        return nothing
+    else
+        result_counts[rid] = 1
+        return data
+    end
+end
+
+function collect_results!(results::Vector{Dict{String,Any}}, result::Test.Result, common_data::Dict{String,Any}, result_counts::ResultCountDict)
+    rdata = result_dict(result, result_counts)
+    if rdata !== nothing # nothing if it's a duplicate that's been counted
+        push!(results, merge(common_data, rdata))
+    end
+end
+function collect_results!(results::Vector{Dict{String,Any}}, result::Test.DefaultTestSet, common_data::Dict{String,Any}, result_counts::ResultCountDict)
+    collect_results!(results, result, common_data["scope"])
+end
+function collect_results!(results::Vector{Dict{String,Any}}, result, common_data::Dict{String,Any}, result_counts::ResultCountDict)
+    return nothing
+end
+
+function collect_results!(results::Vector{Dict{String,Any}}, testset::Test.DefaultTestSet, prefix::String="")
+    common_data = result_dict(testset, prefix)
+    # testset duration is not relevant for individual test results
+    common_data["history"]["duration"] = 0.0 # required field
+    delete!(common_data["history"], "end_at")
+    result_offset = length(results) + 1
+    result_counts = ResultCountDict()
+
+    for result in testset.results
+        collect_results!(results, result, common_data, result_counts)
+    end
+    # Add a tag for count of each result
+    for result in results[result_offset:end]
+        rid = get_rid(result)
+        result["tags"]["count"] = string(get(result_counts, rid, 1))
+    end
+    return results
+end
+
+function serialize_testset_result_file(dir::String, testset::Test.DefaultTestSet)
+    data = Dict{String,Any}[]
+    t = @elapsed collect_results!(data, testset)
+    if t > 20 # most are << 5s
+        @warn "Collating test result data was slow: $t seconds" collated_results=length(data)
+    end
+    name = replace(testset.description, r"[^a-zA-Z0-9]" => "_")
+    res_file = joinpath(dir, "results_$(name).dat")
+    t = @elapsed Serialization.serialize(res_file, data)
+    if t > 10
+        @warn "Serializing test result data was slow: $t seconds" file = res_file size = Base.format_bytes(filesize(res_file))
+    end
+    return res_file
+end
+
+# deserilalizes the results files and writes them to collated JSON files of 5000 max results
+function write_testset_json_files(dir::String, testset::Test.DefaultTestSet)
+    data = Dict{String,Any}[]
+    read_files = String[]
+    # Set one result to represent the overall duration, given results have no duration
+    overall_ts = result_dict(testset)
+    # don't set location or file name for this result. They aren't required by BK
+    overall_ts["result"] = "unknown"
+    overall_ts["name"] = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
+    push!(data, overall_ts)
+    # Load all the serialized results files
+    for res_dat in filter!(x -> occursin(r"^results.*\.dat$", x), readdir(dir))
+        res_file = joinpath(dir, res_dat)
+        append!(data, Serialization.deserialize(res_file))
+        @debug "Loaded $(basename(res_file)) ($(Base.format_bytes(filesize(res_file))))"
+        push!(read_files, res_file)
+    end
+    files = String[]
+    # Buildkite is limited to 5000 results per file https://buildkite.com/docs/test-analytics/importing-json
+    for (i, chunk) in enumerate(Iterators.partition(data, 5000))
+        res_file = joinpath(dir, "results_$(lpad(i, 3, '0')).json")
+        open(io -> json_repr(io, chunk), res_file, "w")
+        push!(files, res_file)
+        @debug "Saved $(basename(res_file)) ($(length(chunk)) results, $(Base.format_bytes(filesize(res_file))))"
+    end
+    for res_file in read_files
+        rm(res_file)
+        @debug "Deleted $(basename(res_file))"
+    end
+    return files
+end
+
+end
diff --git a/test/cartesian.jl b/test/cartesian.jl
index ed33f2c1035f7..e2c064ec0c55f 100644
--- a/test/cartesian.jl
+++ b/test/cartesian.jl
@@ -1,12 +1,20 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-@test Base.Cartesian.exprresolve(:(1 + 3)) == 4
+
 ex = Base.Cartesian.exprresolve(:(if 5 > 4; :x; else :y; end))
 @test ex.args[2] == QuoteNode(:x)
 
-@test Base.Cartesian.lreplace!("val_col", Base.Cartesian.LReplace{String}(:col, "col", 1)) == "val_1"
+@test Base.Cartesian.lreplace_string!("val_col", Base.Cartesian.LReplace{String}(:col, "col", 1)) == "val_1"
 @test Base.setindex(CartesianIndex(1,5,4),3,2) == CartesianIndex(1, 3, 4)
-
+@testset "Expression Resolve" begin
+    @test Base.Cartesian.exprresolve(:(1 + 3)) == 4
+    ex1 = Expr(:ref, [1, 2, 3], 2)
+    result1 = Base.Cartesian.exprresolve(ex1)
+    @test result1 == 2
+    ex2 = Expr(:ref, [1, 2, 3], "non-real-index")
+    result2 = Base.Cartesian.exprresolve(ex2)
+    @test result2 == ex2
+end
 @testset "CartesianIndices constructions" begin
     @testset "AbstractUnitRange" begin
         for oinds in [
@@ -296,8 +304,7 @@ end
     R = CartesianIndex(1, 1):CartesianIndex(2, 3):CartesianIndex(4, 5)
     @test R.indices == (1:2:3, 1:3:4)
     i = CartesianIndex(4, 1)
-    i_next = CartesianIndex(1, 4)
-    @test !(i in R) && iterate(R, i) == (i_next, i_next)
+    @test !(i in R)
 
     for R in [
         CartesianIndices((1:-1:-1, 1:2:5)),
@@ -393,19 +400,20 @@ end
 
 @testset "CartesianIndices overflow" begin
     @testset "incremental steps" begin
+        # n.b. typemax is an odd number
         I = CartesianIndices((1:typemax(Int),))
         i = last(I)
         @test iterate(I, i) === nothing
 
         I = CartesianIndices((1:2:typemax(Int), ))
-        i = CartesianIndex(typemax(Int)-1)
+        i = CartesianIndex(typemax(Int))
         @test iterate(I, i) === nothing
 
         I = CartesianIndices((1:(typemax(Int)-1),))
-        i = CartesianIndex(typemax(Int))
+        i = CartesianIndex(typemax(Int)-1)
         @test iterate(I, i) === nothing
 
-        I = CartesianIndices((1:2:typemax(Int)-1, ))
+        I = CartesianIndices((2:2:typemax(Int)-1, ))
         i = CartesianIndex(typemax(Int)-1)
         @test iterate(I, i) === nothing
 
@@ -413,7 +421,7 @@ end
         i = last(I)
         @test iterate(I, i) === nothing
 
-        I = CartesianIndices((1:2:typemax(Int), 1:2:typemax(Int)))
+        I = CartesianIndices((2:2:typemax(Int), 2:2:typemax(Int)))
         i = CartesianIndex(typemax(Int)-1, typemax(Int)-1)
         @test iterate(I, i) === nothing
 
@@ -421,9 +429,9 @@ end
         i = CartesianIndex(typemax(Int), 1)
         @test iterate(I, i) === (CartesianIndex(1, 2), CartesianIndex(1,2))
 
-        I = CartesianIndices((1:2:typemax(Int), 1:2:typemax(Int)))
+        I = CartesianIndices((2:2:typemax(Int), 2:2:typemax(Int)))
         i = CartesianIndex(typemax(Int)-1, 1)
-        @test iterate(I, i) === (CartesianIndex(1, 3), CartesianIndex(1, 3))
+        @test iterate(I, i) === (CartesianIndex(2, 3), CartesianIndex(2, 3))
 
         I = CartesianIndices((typemin(Int):(typemin(Int)+3),))
         i = last(I)
@@ -493,15 +501,6 @@ end
     end
     @test length(I) == length(indices)
     @test vec(collect(I)) == indices
-
-    # test invalid state
-    I = CartesianIndices((2:4, 3:5))
-    @test iterate(I, CartesianIndex(typemax(Int), 3))[1] == CartesianIndex(2,4)
-    @test iterate(I, CartesianIndex(typemax(Int), 4))[1] == CartesianIndex(2,5)
-    @test iterate(I, CartesianIndex(typemax(Int), 5))    === nothing
-
-    @test iterate(I, CartesianIndex(3, typemax(Int)))[1] == CartesianIndex(4,typemax(Int))
-    @test iterate(I, CartesianIndex(4, typemax(Int)))    === nothing
 end
 
 @testset "CartesianIndices operations" begin
@@ -542,3 +541,89 @@ end
     inds2 = (1, CI(1, 2), 1, CI(1, 2), 1, CI(1, 2), 1)
     @test (@inferred CI(inds2)) == CI(1, 1, 2, 1, 1, 2, 1, 1, 2, 1)
 end
+
+@testset "@ncallkw" begin
+    f(x...; a, b = 1, c = 2, d = 3) = +(x..., a, b, c, d)
+    x_1, x_2 = (-1, -2)
+    kw = (a = 0, c = 0, d = 0)
+    @test x_1 + x_2 + 1 + 4 == Base.Cartesian.@ncallkw 2 f kw 4 x
+    b = 0
+    kw = (c = 0, d = 0)
+    @test x_1 + x_2 + 4 == Base.Cartesian.@ncallkw 2 f (; a = 0, b, kw...) 4 x
+end
+
+@testset "if with and without else branch" begin
+    t1 = Base.Cartesian.@ntuple 3 i -> i == 1 ? 1 : 0
+    t2 = Base.Cartesian.@ntuple 3 i -> begin
+        m = 0
+        if i == 1
+            m = 1
+        end
+        m
+    end
+    @test t1 == t2
+    t3 = Base.Cartesian.@ntuple 3 i -> begin
+        m = 0
+        if i == 1
+            m = 1
+        elseif i == 2
+            m = 2
+        end
+        m
+    end
+    @test t3 == (1, 2, 0)
+end
+
+@testset "CartesianIndex show" begin
+    c = CartesianIndex()
+    @test sprint(show, c) == "CartesianIndex()"
+    c = CartesianIndex(3)
+    @test sprint(show, c) == "CartesianIndex(3)"
+    c = CartesianIndex(3, 3)
+    @test sprint(show, c) == "CartesianIndex(3, 3)"
+end
+
+@testset "CartesianIndex indexing with begin/end" begin
+    I = CartesianIndex(3,4)
+    @test I[begin] == I[1]
+    @test I[end] == I[2]
+end
+
+@testset "in for a CartesianIndex StepRangeLen" begin
+    @testset for l in [0, 1, 4], r in Any[
+            StepRangeLen(CartesianIndex(), CartesianIndex(), l),
+            StepRangeLen(CartesianIndex(1), CartesianIndex(0), l),
+            StepRangeLen(CartesianIndex(1), CartesianIndex(1), l),
+            StepRangeLen(CartesianIndex(1), CartesianIndex(4), l),
+            StepRangeLen(CartesianIndex(1), CartesianIndex(-4), l),
+            StepRangeLen(CartesianIndex(-1, 2), CartesianIndex(0, 0), l),
+            StepRangeLen(CartesianIndex(-1, 2), CartesianIndex(0, 4), l),
+            StepRangeLen(CartesianIndex(-1, 2), CartesianIndex(0, -4), l),
+            StepRangeLen(CartesianIndex(-1, 2), CartesianIndex(4, 0), l),
+            StepRangeLen(CartesianIndex(-1, 2), CartesianIndex(-4, 0), l),
+            StepRangeLen(CartesianIndex(-1, 2), CartesianIndex(4, 2), l),
+            StepRangeLen(CartesianIndex(-1, 2), CartesianIndex(-4, 2), l),
+            StepRangeLen(CartesianIndex(-1, 2), CartesianIndex(4, -2), l),
+            StepRangeLen(CartesianIndex(-1, 2), CartesianIndex(-4, -2), l),
+            StepRangeLen(CartesianIndex(-1, 2, 0), CartesianIndex(0, 0, 0), l),
+            StepRangeLen(CartesianIndex(-1, 2, 0), CartesianIndex(0, 0, -2), l),
+            ]
+
+        if length(r) == 0
+            @test !(first(r) in r)
+            @test !(last(r) in r)
+        end
+        for x in r
+            @test x in r
+            if step(r) != oneunit(x)
+                @test !((x + oneunit(x)) in r)
+            end
+        end
+        @test !(CartesianIndex(ntuple(x->0, ndims(r))) in r)
+        @test !(CartesianIndex(ntuple(x->typemax(Int), ndims(r))) in r)
+        @test !(CartesianIndex(ntuple(x->typemin(Int), ndims(r))) in r)
+        if ndims(r) > 1
+            @test !(CartesianIndex(ntuple(x->0, ndims(r)-1)...) in r)
+        end
+    end
+end
diff --git a/test/ccall.jl b/test/ccall.jl
index 7e166ddbd9041..633af8e6a8495 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -1477,7 +1477,7 @@ end
 # issue #20835
 @test_throws(ErrorException("could not evaluate ccall argument type (it might depend on a local variable)"),
              eval(:(f20835(x) = ccall(:fn, Cvoid, (Ptr{typeof(x)},), x))))
-@test_throws(UndefVarError(:Something_not_defined_20835),
+@test_throws(UndefVarError(:Something_not_defined_20835, @__MODULE__),
              eval(:(f20835(x) = ccall(:fn, Something_not_defined_20835, (Ptr{typeof(x)},), x))))
 @test isempty(methods(f20835))
 
@@ -1537,6 +1537,14 @@ fn45187() = nothing
 @test Expr(:error, "only the trailing ccall argument type should have \"...\"") == Meta.lower(@__MODULE__, :(ccall(:fn, A, (A, B..., C...), a, x, y, z)))
 @test Expr(:error, "more types than arguments for ccall") == Meta.lower(@__MODULE__, :(ccall(:fn, A, (B, C...), )))
 
+# test for ccall first argument tuple validation errors
+@test_throws "ccall function name cannot be empty tuple" eval(:(f() = ccall((), A, (), )))
+@test_throws "ccall function name tuple can have at most 2 elements" eval(:(f() = ccall((:a, :b, :c), A, (), )))
+@test_throws "ccall function name tuple can have at most 2 elements" eval(:(f() = ccall((:a, :b, :c, :d), A, (), )))
+@test_throws TypeError eval(:(f() = ccall((1 + 2,), A, (), )))
+@test_throws TypeError eval(:(f() = ccall((:a, 1 + 2), A, (), )))
+@test_throws TypeError eval(:(ccall_lazy_lib_name(x) = ccall((:testUcharX, compute_lib_name()), Int32, (UInt8,), x % UInt8)))
+
 # cfunction on non-function singleton
 struct CallableSingleton
 end
@@ -1745,23 +1753,22 @@ using Base: ccall_macro_parse, ccall_macro_lower
         :Cvoid,                           # returntype
         Any[:Cstring, :Cstring, :Cint],   # argument types
         Any["%s = %d\n", :name, :value],  # argument symbols
+        false,                            # is gc_safe
         1                                 # number of required arguments (for varargs)
     )
 end
 
 @testset "ensure the base-case of @ccall works, including library name and pointer interpolation" begin
-    call = ccall_macro_lower(:ccall, ccall_macro_parse( :( libstring.func(
+    ccallmacro = ccall_macro_lower(:ccall, ccall_macro_parse( :( libstring.func(
         str::Cstring,
         num1::Cint,
         num2::Cint
     )::Cstring))...)
-    @test call == Base.remove_linenums!(
-        quote
-        ccall($(Expr(:escape, :((:func, libstring)))), $(Expr(:cconv, :ccall, 0)), $(Expr(:escape, :Cstring)), ($(Expr(:escape, :Cstring)), $(Expr(:escape, :Cint)), $(Expr(:escape, :Cint))), $(Expr(:escape, :str)), $(Expr(:escape, :num1)), $(Expr(:escape, :num2)))
-        end)
+    ccallfunction = :(ccall($(Expr(:escape, :((:func, libstring)))), $(Expr(:cconv, (:ccall, UInt16(0), false), 0)), $(Expr(:escape, :Cstring)), ($(Expr(:escape, :Cstring)), $(Expr(:escape, :Cint)), $(Expr(:escape, :Cint))), $(Expr(:escape, :str)), $(Expr(:escape, :num1)), $(Expr(:escape, :num2))))
+    @test ccallmacro == ccallfunction
 
     local fptr = :x
-    @test_throws ArgumentError("interpolated function `fptr` was not a Ptr{Cvoid}, but Symbol") @ccall $fptr()::Cvoid
+    @test_throws TypeError @ccall $fptr()::Cvoid
 end
 
 @testset "check error paths" begin
@@ -1776,7 +1783,7 @@ end
     # no required args on varargs call
     @test_throws ArgumentError("C ABI prohibits vararg without one required argument") ccall_macro_parse(:( foo(; x::Cint)::Cint ))
     # not a function pointer
-    @test_throws ArgumentError("interpolated function `PROGRAM_FILE` was not a Ptr{Cvoid}, but String") @ccall $PROGRAM_FILE("foo"::Cstring)::Cvoid
+    @test_throws TypeError @ccall $PROGRAM_FILE("foo"::Cstring)::Cvoid
 end
 
 @testset "check error path for @cfunction" begin
@@ -1833,12 +1840,8 @@ end
 end
 
 # issue #36458
-compute_lib_name() = "libcc" * "alltest"
-ccall_lazy_lib_name(x) = ccall((:testUcharX, compute_lib_name()), Int32, (UInt8,), x % UInt8)
-@test ccall_lazy_lib_name(0) == 0
-@test ccall_lazy_lib_name(3) == 1
 ccall_with_undefined_lib() = ccall((:time, xx_nOt_DeFiNeD_xx), Cint, (Ptr{Cvoid},), C_NULL)
-@test_throws UndefVarError(:xx_nOt_DeFiNeD_xx) ccall_with_undefined_lib()
+@test_throws UndefVarError(:xx_nOt_DeFiNeD_xx, @__MODULE__) ccall_with_undefined_lib()
 
 @testset "transcode for UInt8 and UInt16" begin
     a   = [UInt8(1), UInt8(2), UInt8(3)]
@@ -1915,3 +1918,114 @@ end
     ctest_total_const() = Val{ctest_total(1 + 2im)}()
     Core.Compiler.return_type(ctest_total_const, Tuple{}) == Val{2 + 0im}
 end
+
+const libfrobozz = ""
+
+function somefunction_not_found()
+    ccall((:somefunction, libfrobozz), Cvoid, ())
+end
+
+function somefunction_not_found_libc()
+    ccall(:test,Int,())
+end
+
+@testset "library not found" begin
+    if Sys.islinux()
+        @test_throws "could not load symbol \"somefunction\"" somefunction_not_found()
+    else
+        @test_throws "could not load library \"\"" somefunction_not_found()
+    end
+    @test_throws "could not load symbol \"test\"" somefunction_not_found_libc()
+end
+
+# issue #52025
+@test Base.unsafe_convert(Ptr{Ptr{Cchar}}, Base.cconvert(Ptr{Ptr{Cchar}}, map(pointer, ["ab"]))) isa Ptr{Ptr{Cchar}}
+#issue #54725
+for A in (reinterpret(UInt, [0]), reshape([0, 0], 1, 2))
+    @test pointer(A) == Base.unsafe_convert(Ptr{Cvoid}, A) == Base.unsafe_convert(Ptr{Int}, A)
+end
+# Cglobal with non-static symbols doesn't error
+function cglobal_non_static1()
+    sym = (:global_var, libccalltest)
+    cglobal(sym)
+end
+global the_sym = (:global_var, libccalltest)
+cglobal_non_static2() = cglobal(the_sym)
+
+@test isa(cglobal_non_static1(), Ptr)
+@test isa(cglobal_non_static2(), Ptr)
+
+@generated function generated_world_counter()
+    return :($(Base.get_world_counter()))
+end
+function world_counter()
+    return Base.get_world_counter()
+end
+let llvm = sprint(code_llvm, world_counter, ())
+    # check that we got a reasonable value for the world age
+    @test (world_counter() != 0) && (world_counter() != -1)
+    # no call to the runtime should be left over
+    @test !occursin("call i64", llvm)
+    # the world age should be -1 in generated functions (or other pure contexts)
+    @test (generated_world_counter() == reinterpret(UInt, -1))
+end
+
+function gc_safe_ccall()
+    # jl_rand is marked as JL_NOTSAFEPOINT
+    Base.@assume_effects :nothrow @ccall gc_safe=true jl_rand()::UInt64
+end
+
+let llvm = sprint(code_llvm, gc_safe_ccall, ())
+    # check that the call works
+    @test gc_safe_ccall() isa UInt64
+    # check for the gc_safe store
+    @test occursin("store atomic i8 2", llvm)
+    @test Base.infer_effects(gc_safe_ccall, Tuple{}).nothrow == true
+end
+
+@testset "jl_dlfind and dlsym" begin
+    # Test that jl_dlfind finds things in the expected places.
+    @test ccall(:jl_dlfind, Int, (Cstring,), "doesnotexist") == 0       # not found (RTLD_DEFAULT)
+    @static if !Sys.iswindows()
+        @test ccall(:jl_dlfind, Int, (Cstring,), "main") == 1               # JL_EXE_LIBNAME
+    end
+    @test ccall(:jl_dlfind, Int, (Cstring,), "jl_gc_safepoint") == 2    # JL_LIBJULIA_DL_LIBNAME
+    @test ccall(:jl_dlfind, Int, (Cstring,), "ijl_gc_small_alloc") == 3 # JL_LIBJULIA_INTERNAL_DL_LIBNME
+    @test ccall(:jl_dlfind, Int, (Cstring,), "malloc") ∉ (1, 2, 3)      # Either 0 or msvcrt.dll on Windows
+    let hdl = Libdl.dlopen(libccalltest, Libdl.RTLD_GLOBAL)
+        try
+            @static if Sys.iswindows()
+                @test_throws ErrorException ccall(:get_c_int, Cint, ())
+            else
+                @test ccall(:get_c_int, Cint, ()) isa Cint
+            end
+        finally
+            Libdl.dlclose(hdl)
+        end
+    end
+end
+
+# issue #51293: Ensure we can load libraries even when a directory with the same name exists
+@testset "dlload with directory collision" begin
+    mktempdir() do dir
+        # Create a subdirectory with the same name as our test library
+        libdir = joinpath(dir, "libccalltest")
+        mkdir(libdir)
+
+        # Try to load libccalltest from within this directory
+        cd(dir) do
+            # This should successfully load the library from DL_LOAD_PATH, not fail due to the directory
+            hdl = Libdl.dlopen(libccalltest)
+            @test hdl != C_NULL
+            Libdl.dlclose(hdl)
+        end
+    end
+end
+
+module Test57749
+using Test, Zstd_jll
+const prefix = "Zstd version: "
+const sym = :ZSTD_versionString
+get_zstd_version() = prefix * unsafe_string(ccall((sym, libzstd), Cstring, ()))
+@test startswith(get_zstd_version(), "Zstd")
+end
diff --git a/test/channel_threadpool.jl b/test/channel_threadpool.jl
new file mode 100644
index 0000000000000..54c2fc0f83e09
--- /dev/null
+++ b/test/channel_threadpool.jl
@@ -0,0 +1,12 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+using Base.Threads
+
+c = Channel{Symbol}() do c; put!(c, threadpool(current_task())); end
+@test take!(c) === threadpool(current_task())
+c = Channel{Symbol}(spawn = true) do c; put!(c, threadpool(current_task())); end
+@test take!(c) === :default
+c = Channel{Symbol}(threadpool = :interactive) do c; put!(c, threadpool(current_task())); end
+@test take!(c) === :interactive
+@test_throws ArgumentError Channel{Symbol}(threadpool = :foo) do c; put!(c, :foo); end
diff --git a/test/channels.jl b/test/channels.jl
index dbda5cf069081..721eb478bd13a 100644
--- a/test/channels.jl
+++ b/test/channels.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Random
+using Base.Threads
 using Base: Experimental
 using Base: n_avail
 
@@ -12,6 +13,9 @@ using Base: n_avail
     end
     @test wait(a) == "success"
     @test fetch(t) == "finished"
+
+    # Test printing
+    @test repr(a) == "Condition()"
 end
 
 @testset "wait first behavior of wait on Condition" begin
@@ -36,10 +40,27 @@ end
     @test fetch(t) == "finished"
 end
 
+@testset "wait_with_timeout on Condition" begin
+    a = Threads.Condition()
+    @test @lock a Experimental.wait_with_timeout(a; timeout=0.1)==:timed_out
+    lock(a)
+    @spawn begin
+        @lock a notify(a)
+    end
+    @test try
+        Experimental.wait_with_timeout(a; timeout=2)
+        true
+    finally
+        unlock(a)
+    end
+end
+
 @testset "various constructors" begin
     c = Channel()
     @test eltype(c) == Any
     @test c.sz_max == 0
+    @test isempty(c) == true  # Nothing in it
+    @test isfull(c) == true   # But no more room
 
     c = Channel(1)
     @test eltype(c) == Any
@@ -49,6 +70,11 @@ end
     @test isready(c) == false
     @test eltype(Channel(1.0)) == Any
 
+    c = Channel(1)
+    @test isfull(c) == false
+    put!(c, 1)
+    @test isfull(c) == true
+
     c = Channel{Int}(1)
     @test eltype(c) == Int
     @test_throws MethodError put!(c, "Hello")
@@ -107,6 +133,11 @@ end
     @test taskref[].sticky == false
     @test collect(c) == [0]
 end
+let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no channel_threadpool.jl`
+    new_env = copy(ENV)
+    new_env["JULIA_NUM_THREADS"] = "1,1"
+    run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))
+end
 
 @testset "multiple concurrent put!/take! on a channel for different sizes" begin
     function testcpt(sz)
@@ -370,7 +401,7 @@ end
         """error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer")""", output))
     # test for invalid state in Workqueue during yield
     t = @async nothing
-    t._state = 66
+    @atomic t._state = 66
     newstderr = redirect_stderr()
     try
         errstream = @async read(newstderr[1], String)
@@ -432,15 +463,14 @@ end
         cb = first(async.cond.waitq)
         @test isopen(async)
         ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async)
-        ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async)
-        @test isempty(Base.Workqueue)
         Base.process_events() # schedule event
         Sys.iswindows() && Base.process_events() # schedule event (windows?)
-        @test length(Base.Workqueue) == 1
         ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async)
         @test tc[] == 0
         yield() # consume event
         @test tc[] == 1
+        ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async)
+        Base.process_events()
         Sys.iswindows() && Base.process_events() # schedule event (windows?)
         yield() # consume event
         @test tc[] == 2
@@ -452,8 +482,8 @@ end
         Sys.iswindows() && Base.process_events() # schedule event (windows?)
         close(async) # and close
         @test !isopen(async)
-        @test tc[] == 2
-        @test tc[] == 2
+        @test tc[] == 3
+        @test tc[] == 3
         yield() # consume event & then close
         @test tc[] == 3
         sleep(0.1) # no further events
@@ -474,7 +504,7 @@ end
         close(async)
         @test !isopen(async)
         Base.process_events() # and close
-        @test tc[] == 0
+        @test tc[] == 1
         yield() # consume event & then close
         @test tc[] == 1
         sleep(0.1) # no further events
@@ -484,12 +514,35 @@ end
     end
 end
 
+struct CustomError <: Exception end
+
 @testset "check_channel_state" begin
     c = Channel(1)
     close(c)
     @test !isopen(c)
-    c.excp == nothing # to trigger the branch
+    c.excp === nothing # to trigger the branch
     @test_throws InvalidStateException Base.check_channel_state(c)
+
+    # Issue 52974 - closed channels with exceptions
+    # must be thrown on iteration, if channel is empty
+    c = Channel(2)
+    put!(c, 5)
+    close(c, CustomError())
+    @test take!(c) == 5
+    @test_throws CustomError iterate(c)
+
+    c = Channel(Inf)
+    put!(c, 1)
+    close(c)
+    @test take!(c) == 1
+    @test_throws InvalidStateException take!(c)
+    @test_throws InvalidStateException put!(c, 5)
+
+    c = Channel(3)
+    put!(c, 1)
+    close(c)
+    @test first(iterate(c)) == 1
+    @test isnothing(iterate(c))
 end
 
 # PR #36641
@@ -535,8 +588,11 @@ end
 # make sure 1-shot timers work
 let a = []
     Timer(t -> push!(a, 1), 0.01, interval = 0)
-    sleep(0.2)
-    @test a == [1]
+    @test timedwait(() -> a == [1], 10) === :ok
+end
+let a = []
+    Timer(t -> push!(a, 1), 0.01, interval = 0, spawn = true)
+    @test timedwait(() -> a == [1], 10) === :ok
 end
 
 # make sure that we don't accidentally create a one-shot timer
@@ -554,7 +610,7 @@ end
     e = @elapsed for i = 1:5
         wait(t)
     end
-    @test 1.5 > e >= 0.4
+    @test e >= 0.4
     @test a[] == 0
     nothing
 end
@@ -564,6 +620,16 @@ let a = Ref(0)
     @test a[] == 1
 end
 
+@testset "Timer properties" begin
+    t = Timer(1.0, interval = 0.5)
+    @test t.timeout == 1.0
+    @test t.interval == 0.5
+    close(t)
+    @test !isopen(t)
+    @test t.timeout == 1.0
+    @test t.interval == 0.5
+end
+
 # trying to `schedule` a finished task
 let t = @async nothing
     wait(t)
@@ -626,3 +692,11 @@ end
         @test n_avail(c) == 0
     end
 end
+
+@testset "Task properties" begin
+    f() = rand(2,2)
+    t = Task(f)
+    message = "Querying a Task's `scope` field is disallowed.\nThe private `Core.current_scope()` function is better, though still an implementation detail."
+    @test_throws ErrorException(message) t.scope
+    @test t.state == :runnable
+end
diff --git a/test/char.jl b/test/char.jl
index 1639c62ec819d..a576ad072b1d8 100644
--- a/test/char.jl
+++ b/test/char.jl
@@ -1,7 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 @testset "basic properties" begin
-
     @test typemax(Char) == reinterpret(Char, typemax(UInt32))
     @test typemin(Char) == Char(0)
     @test typemax(Char) == reinterpret(Char, 0xffffffff)
@@ -122,7 +121,7 @@ end
     #iterate(c::Char)
     for x in testarrays
         @test iterate(x)[1] == x
-        @test iterate(x, iterate(x)[2]) == nothing
+        @test iterate(x, iterate(x)[2]) === nothing
     end
 
     #isless(x::Char, y::Integer) = isless(UInt32(x), y)
@@ -214,10 +213,39 @@ end
     end
 end
 
+# issue #50532
+@testset "invalid read(io, Char)" begin
+    # byte values with different numbers of leading bits
+    B = UInt8[
+        0x3f, 0x4d, 0x52, 0x63, 0x81, 0x83, 0x89, 0xb6,
+        0xc0, 0xc8, 0xd3, 0xe3, 0xea, 0xeb, 0xf0, 0xf2,
+        0xf4, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+    ]
+    f = tempname()
+    for b1 in B, b2 in B, t = 0:3
+        bytes = [b1, b2]
+        append!(bytes, rand(B, t))
+        s = String(bytes)
+        write(f, s)
+        @test s == read(f, String)
+        chars = collect(s)
+        ios = [IOBuffer(s), open(f), Base.Filesystem.open(f, 0)]
+        for io in ios
+            chars′ = Char[]
+            while !eof(io)
+                push!(chars′, read(io, Char))
+            end
+            @test chars == chars′
+            close(io)
+        end
+    end
+    rm(f)
+end
+
 @testset "overlong codes" begin
     function test_overlong(c::Char, n::Integer, rep::String)
-        if isvalid(c)
-            @test Int(c) == n
+        if !Base.ismalformed(c)
+            @test Int(c) == n == codepoint(c)
         else
             @test_throws Base.InvalidCharError UInt32(c)
         end
@@ -260,6 +288,11 @@ Base.codepoint(c::ASCIIChar) = reinterpret(UInt8, c)
     @test string(ASCIIChar('x')) == "x"
     @test length(ASCIIChar('x')) == 1
     @test !isempty(ASCIIChar('x'))
+    @test ndims(ASCIIChar('x')) == 0
+    @test ndims(ASCIIChar) == 0
+    @test Base.IteratorSize(ASCIIChar) === Base.HasShape{0}()
+    @test firstindex(ASCIIChar('x')) == 1
+    @test lastindex(ASCIIChar('x')) == 1
     @test eltype(ASCIIChar) == ASCIIChar
     @test_throws MethodError write(IOBuffer(), ASCIIChar('x'))
     @test_throws MethodError read(IOBuffer('x'), ASCIIChar)
@@ -324,6 +357,15 @@ end
         "'\\xc0': Malformed UTF-8 (category Ma: Malformed, bad data)"
 end
 
+@testset "overlong, non-malformed chars" begin
+    c = ['\xc0\xa0', '\xf0\x8e\x80\x80']
+    @test all(Base.isoverlong, c)
+    @test !any(Base.ismalformed, c)
+    @test repr("text/plain", c[1]) == "'\\xc0\\xa0': [overlong] ASCII/Unicode U+0020 (category Zs: Separator, space)"
+    @test codepoint.(c) == [0x20, 0xE000]
+    @test isuppercase(c[1]) == isuppercase(c[2]) == false # issue #54343
+end
+
 @testset "More fallback tests" begin
     @test length(ASCIIChar('x')) == 1
     @test firstindex(ASCIIChar('x')) == 1
@@ -332,3 +374,31 @@ end
     @test Base.IteratorSize(Char) == Base.HasShape{0}()
     @test convert(ASCIIChar, 1) == Char(1)
 end
+
+@testset "foldable functions" begin
+    v = @inferred (() -> Val(isuppercase('C')))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(islowercase('C')))()
+    @test v isa Val{false}
+
+    v = @inferred (() -> Val(isletter('C')))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(isnumeric('C')))()
+    @test v isa Val{false}
+
+    struct MyChar <: AbstractChar
+        x :: Char
+    end
+    Base.codepoint(m::MyChar) = codepoint(m.x)
+    MyChar(x::UInt32) = MyChar(Char(x))
+
+    v = @inferred (() -> Val(isuppercase(MyChar('C'))))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(islowercase(MyChar('C'))))()
+    @test v isa Val{false}
+
+    v = @inferred (() -> Val(isletter(MyChar('C'))))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(isnumeric(MyChar('C'))))()
+    @test v isa Val{false}
+end
diff --git a/test/checked.jl b/test/checked.jl
index bacda3db75dec..b93c8796162c5 100644
--- a/test/checked.jl
+++ b/test/checked.jl
@@ -3,7 +3,7 @@
 # Checked integer arithmetic
 
 import Base: checked_abs, checked_neg, checked_add, checked_sub, checked_mul,
-             checked_div, checked_rem, checked_fld, checked_mod, checked_cld,
+             checked_div, checked_rem, checked_fld, checked_mod, checked_cld, checked_pow,
              add_with_overflow, sub_with_overflow, mul_with_overflow
 
 # checked operations
@@ -166,6 +166,19 @@ import Base: checked_abs, checked_neg, checked_add, checked_sub, checked_mul,
     @test checked_cld(typemin(T), T(1)) === typemin(T)
     @test_throws DivideError checked_cld(typemin(T), T(0))
     @test_throws DivideError checked_cld(typemin(T), T(-1))
+
+    @test checked_pow(T(1), T(0)) === T(1)
+    @test checked_pow(typemax(T), T(0)) === T(1)
+    @test checked_pow(typemin(T), T(0)) === T(1)
+    @test checked_pow(T(1), T(1)) === T(1)
+    @test checked_pow(T(1), typemax(T)) === T(1)
+    @test checked_pow(T(2), T(2)) === T(4)
+    @test_throws OverflowError checked_pow(T(2), typemax(T))
+    @test_throws OverflowError checked_pow(T(-2), typemax(T))
+    @test_throws OverflowError checked_pow(typemax(T), T(2))
+    @test_throws OverflowError checked_pow(typemin(T), T(2))
+    @test_throws DomainError checked_pow(T(2), -T(1))
+    @test_throws DomainError checked_pow(-T(2), -T(1))
 end
 
 @testset for T in (UInt8, UInt16, UInt32, UInt64, UInt128)
@@ -296,6 +309,10 @@ end
     @test checked_cld(true, true) === true
     @test checked_cld(false, true) === false
     @test_throws DivideError checked_cld(true, false)
+
+    @test checked_pow(true, 1) === true
+    @test checked_pow(true, 1000000) === true
+    @test checked_pow(false, 1000000) === false
 end
 @testset "BigInt" begin
     @test checked_abs(BigInt(-1)) == BigInt(1)
@@ -310,6 +327,16 @@ end
     @test checked_fld(BigInt(10), BigInt(3)) == BigInt(3)
     @test checked_mod(BigInt(9), BigInt(4)) == BigInt(1)
     @test checked_cld(BigInt(10), BigInt(3)) == BigInt(4)
+
+    @test checked_pow(BigInt(2), 2) == BigInt(4)
+    @test checked_pow(BigInt(2), 100) == BigInt(1267650600228229401496703205376)
+
+    # FIXME: Issue #57103: the following test may fail because
+    # allocation may not be logged via MMTk's fastpath allocation
+    @static if Base.USING_STOCK_GC
+        # Perf test: Make sure BigInts allocs don't scale with the power:
+        @test @allocations(checked_pow(BigInt(2), 2)) ≈ @allocations(checked_pow(BigInt(2), 10000)) rtol=0.9
+    end
 end
 
 @testset "Additional tests" begin
@@ -358,3 +385,7 @@ end
     @test checked_mul(1, 2, 3, 4, 5, 6, 7) === 5040
     @test checked_mul(1, 2, 3, 4, 5, 6, 7, 8) === 40320
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Base.Checked))
+end
diff --git a/test/choosetests.jl b/test/choosetests.jl
index 18af88ea191e9..3e315523a40a1 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -6,7 +6,7 @@ const STDLIB_DIR = Sys.STDLIB
 const STDLIBS = filter!(x -> isfile(joinpath(STDLIB_DIR, x, "src", "$(x).jl")), readdir(STDLIB_DIR))
 
 const TESTNAMES = [
-        "subarray", "core", "compiler", "worlds", "atomics",
+        "subarray", "core", "compiler", "compiler_extras", "worlds", "atomics",
         "keywordargs", "numbers", "subtype",
         "char", "strings", "triplequote", "unicode", "intrinsics",
         "dict", "hashing", "iobuffer", "staged", "offsetarray",
@@ -19,16 +19,19 @@ const TESTNAMES = [
         "mpfr", "broadcast", "complex",
         "floatapprox", "stdlib", "reflection", "regex", "float16",
         "combinatorics", "sysinfo", "env", "rounding", "ranges", "mod2pi",
-        "euler", "show", "client",
+        "euler", "show", "client", "terminfo",
         "errorshow", "sets", "goto", "llvmcall", "llvmcall2", "ryu",
         "some", "meta", "stacktraces", "docs", "gc",
-        "misc", "threads", "stress", "binaryplatforms", "atexit",
+        "misc", "threads", "stress", "binaryplatforms","stdlib_dependencies", "atexit",
         "enums", "cmdlineargs", "int", "interpreter",
-        "checked", "bitset", "floatfuncs", "precompile",
+        "checked", "bitset", "floatfuncs", "precompile", "relocatedepot",
         "boundscheck", "error", "ambiguous", "cartesian", "osutils",
         "channels", "iostream", "secretbuffer", "specificity",
         "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap",
         "smallarrayshrink", "opaque_closure", "filesystem", "download",
+        "scopedvalues", "compileall", "rebinding",
+        "faulty_constructor_method_should_not_cause_stack_overflows",
+        "JuliaSyntax", "JuliaLowering",
 ]
 
 const INTERNET_REQUIRED_LIST = [
@@ -44,6 +47,38 @@ const INTERNET_REQUIRED_LIST = [
 
 const NETWORK_REQUIRED_LIST = vcat(INTERNET_REQUIRED_LIST, ["Sockets"])
 
+const TOP_LEVEL_PKGS = [
+    "Compiler",
+    "JuliaSyntax",
+    "JuliaLowering",
+]
+
+function test_path(test)
+    t = split(test, '/')
+    if t[1] in STDLIBS
+        pkgdir = abspath(Base.find_package(String(t[1])), "..", "..")
+        if length(t) == 2
+            return joinpath(pkgdir, "test", t[2])
+        else
+            return joinpath(pkgdir, "test", "runtests")
+        end
+    elseif t[1] == "Compiler" && length(t) ≥ 3 && t[2] == "extras"
+        testpath = length(t) >= 4 ? t[4:end] : ("runtests",)
+        return joinpath(@__DIR__, "..", t[1], t[2], t[3], "test", testpath...)
+    elseif t[1] == "Compiler"
+        testpath = length(t) >= 2 ? t[2:end] : ("runtests",)
+        return joinpath(@__DIR__, "..", t[1], "test", testpath...)
+    elseif t[1] == "JuliaSyntax"
+        testpath = length(t) >= 2 ? t[2:end] : ("runtests_vendored",)
+        return joinpath(@__DIR__, "..", t[1], "test", testpath...)
+    elseif t[1] == "JuliaLowering"
+        testpath = length(t) >= 2 ? t[2:end] : ("runtests_vendored",)
+        return joinpath(@__DIR__, "..", t[1], "test", testpath...)
+    else
+        return joinpath(@__DIR__, test)
+    end
+end
+
 """
 `(; tests, net_on, exit_on_error, seed) = choosetests(choices)` selects a set of tests to be
 run. `choices` should be a vector of test names; if empty or set to
@@ -79,6 +114,7 @@ function choosetests(choices = [])
     seed = rand(RandomDevice(), UInt128)
     ci_option_passed = false
     dryrun = false
+    buildroot = joinpath(@__DIR__, "..")
 
     for (i, t) in enumerate(choices)
         if t == "--skip"
@@ -88,6 +124,8 @@ function choosetests(choices = [])
             exit_on_error = true
         elseif t == "--revise"
             use_revise = true
+        elseif startswith(t, "--buildroot=")
+            buildroot = t[(length("--buildroot=") + 1):end]
         elseif startswith(t, "--seed=")
             seed = parse(UInt128, t[(length("--seed=") + 1):end])
         elseif t == "--ci"
@@ -103,6 +141,7 @@ function choosetests(choices = [])
                   --help-list          : prints the options computed without running them
                   --revise             : load Revise
                   --seed=<SEED>        : set the initial seed for all testgroups (parsed as a UInt128)
+                  --buildroot=<PATH>   : set the build root directory (default: in-tree)
                   --skip <NAMES>...    : skip test or collection tagged with <NAMES>
                 TESTS:
                   Can be special tokens, such as "all", "unicode", "stdlib", the names of stdlib \
@@ -150,17 +189,11 @@ function choosetests(choices = [])
 
     filtertests!(tests, "unicode", ["unicode/utf8"])
     filtertests!(tests, "strings", ["strings/basic", "strings/search", "strings/util",
-                   "strings/io", "strings/types"])
+                   "strings/io", "strings/types", "strings/annotated"])
     # do subarray before sparse but after linalg
     filtertests!(tests, "subarray")
-    filtertests!(tests, "compiler", [
-        "compiler/datastructures", "compiler/inference", "compiler/effects",
-        "compiler/validation", "compiler/ssair", "compiler/irpasses",
-        "compiler/codegen", "compiler/inline", "compiler/contextual",
-        "compiler/invalidation", "compiler/AbstractInterpreter",
-        "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
-    filtertests!(tests, "compiler/EscapeAnalysis", [
-        "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
+    filtertests!(tests, "compiler", ["Compiler"])
+    filtertests!(tests, "compiler_extras", ["Compiler/extras/CompilerDevTools/testpkg"])
     filtertests!(tests, "stdlib", STDLIBS)
     filtertests!(tests, "internet_required", INTERNET_REQUIRED_LIST)
     # do ambiguous first to avoid failing if ambiguities are introduced by other tests
@@ -205,10 +238,12 @@ function choosetests(choices = [])
     filter!(!in(tests), unhandled)
     filter!(!in(skip_tests), tests)
 
+    is_package_test(testname) = testname in STDLIBS || testname in TOP_LEVEL_PKGS
+
     new_tests = String[]
     for test in tests
-        if test in STDLIBS
-            testfile = joinpath(STDLIB_DIR, test, "test", "testgroups")
+        if is_package_test(test)
+            testfile = test_path("$test/testgroups")
             if isfile(testfile)
                 testgroups = readlines(testfile)
                 length(testgroups) == 0 && error("no testgroups defined for $test")
@@ -218,7 +253,7 @@ function choosetests(choices = [])
             end
         end
     end
-    filter!(x -> (x != "stdlib" && !(x in STDLIBS)) , tests)
+    filter!(x -> (x != "stdlib" && !is_package_test(x)) , tests)
     append!(tests, new_tests)
 
     requested_all || explicit_pkg            || filter!(x -> x != "Pkg",            tests)
@@ -246,5 +281,5 @@ function choosetests(choices = [])
         empty!(tests)
     end
 
-    return (; tests, net_on, exit_on_error, use_revise, seed)
+    return (; tests, net_on, exit_on_error, use_revise, buildroot, seed)
 end
diff --git a/test/clangsa/GCPushPop.cpp b/test/clangsa/GCPushPop.cpp
index a62c1501bf323..79cad28f4b9a5 100644
--- a/test/clangsa/GCPushPop.cpp
+++ b/test/clangsa/GCPushPop.cpp
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -x c++ %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -x c++ %s
 
 #include "julia.h"
 #include <string>
@@ -8,17 +8,17 @@
 void missingPop() {
   jl_value_t *x = NULL;
   JL_GC_PUSH1(&x); // expected-note{{GC frame changed here}}
-} // expected-warning{{Non-popped GC frame present at end of function}}
-  // expected-note@-1{{Non-popped GC frame present at end of function}}
+} // expected-warning@-1{{Non-popped GC frame present at end of function}}
+  // expected-note@-2{{Non-popped GC frame present at end of function}}
 
 
 void missingPop2() {
   jl_value_t **x;
   JL_GC_PUSHARGS(x, 2); // expected-note{{GC frame changed here}}
-} // expected-warning{{Non-popped GC frame present at end of function}}
-  // expected-note@-1{{Non-popped GC frame present at end of function}}
+} // expected-warning@-1{{Non-popped GC frame present at end of function}}
+  // expected-note@-2{{Non-popped GC frame present at end of function}}
 
-void superflousPop() {
+void superfluousPop() {
   JL_GC_POP(); // expected-warning{{JL_GC_POP without corresponding push}}
 }              // expected-note@-1{{JL_GC_POP without corresponding push}}
 
diff --git a/test/clangsa/ImplicitAtomicsTest.c b/test/clangsa/ImplicitAtomicsTest.c
index 87154347d9757..cfac3f38e679a 100644
--- a/test/clangsa/ImplicitAtomicsTest.c
+++ b/test/clangsa/ImplicitAtomicsTest.c
@@ -1,7 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -x c -std=c11 | FileCheck --check-prefixes=CHECK,CHECK-C %s
-// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} ${CXXFLAGS} -x c++ -std=c++11 | FileCheck --check-prefixes=CHECK,CHECK-CXX %s
+// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} -x c -std=c11 | FileCheck --check-prefixes=CHECK,CHECK-C %s
+// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} ${CXXFLAGS} -x c++ -std=c++11 | FileCheck --check-prefixes=CHECK,CHECK-CXX %s
 
 #include "julia_atomics.h"
 
diff --git a/test/clangsa/Makefile b/test/clangsa/Makefile
index 3bebd45c9a5a6..609809884fce1 100644
--- a/test/clangsa/Makefile
+++ b/test/clangsa/Makefile
@@ -13,7 +13,7 @@ TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.c) $(wildcard $(SRCDIR)/
 	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
 	LD_LIBRARY_PATH="${build_libdir}:$$LD_LIBRARY_PATH" \
 	CLANGSA_FLAGS="${CLANGSA_FLAGS}" \
-	CLANGSACXX_FLAGS="${CLANGSACXX_FLAGS}" \
+	CLANGSA_CXXFLAGS="${CLANGSA_CXXFLAGS}" \
 	CPPFLAGS_FLAGS="${CPPFLAGS_FLAGS}" \
 	CFLAGS_FLAGS="${CFLAGS_FLAGS}" \
 	CXXFLAGS_FLAGS="${CXXFLAGS_FLAGS}" \
diff --git a/test/clangsa/MissingRoots.c b/test/clangsa/MissingRoots.c
index 0ff5e633622ce..84341f9410e1e 100644
--- a/test/clangsa/MissingRoots.c
+++ b/test/clangsa/MissingRoots.c
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
 
 #include "julia.h"
 #include "julia_internal.h"
@@ -277,20 +277,6 @@ void nonconst_loads2()
 static inline void look_at_value2(jl_value_t *v) {
   look_at_value(v);
 }
-void mtable(jl_value_t *f) {
-  look_at_value2((jl_value_t*)jl_gf_mtable(f));
-  jl_value_t *val = NULL;
-  JL_GC_PUSH1(&val);
-  val = (jl_value_t*)jl_gf_mtable(f);
-  JL_GC_POP();
-}
-
-void mtable2(jl_value_t **v) {
-  jl_value_t *val = NULL;
-  JL_GC_PUSH1(&val);
-  val = (jl_value_t*)jl_gf_mtable(v[2]);
-  JL_GC_POP();
-}
 
 void tparam0(jl_value_t *atype) {
    look_at_value(jl_tparam0(atype));
@@ -328,7 +314,7 @@ void scopes() {
 jl_module_t *propagation(jl_module_t *m JL_PROPAGATES_ROOT);
 void module_member(jl_module_t *m)
 {
-    for(int i=(int)m->usings.len-1; i >= 0; --i) {
+    for(int i=(int)m->usings.len-1; i >= 0; i -= 3) {
       jl_module_t *imp = propagation(m);
       jl_gc_safepoint();
       look_at_value((jl_value_t*)imp);
@@ -415,7 +401,7 @@ void stack_rooted(jl_value_t *lb JL_MAYBE_UNROOTED, jl_value_t *ub JL_MAYBE_UNRO
 JL_DLLEXPORT jl_value_t *jl_totally_used_function(int i)
 {
     jl_value_t *v = jl_box_int32(i); // expected-note{{Started tracking value here}}
-    jl_safepoint(); // expected-note{{Value may have been GCed here}}
+    jl_gc_safepoint(); // expected-note{{Value may have been GCed here}}
     return v; // expected-warning{{Return value may have been GCed}}
               // expected-note@-1{{Return value may have been GCed}}
 }
diff --git a/test/client.jl b/test/client.jl
index 0649ab3241d62..fd129712e2db0 100644
--- a/test/client.jl
+++ b/test/client.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+import Base.StackTraces: StackFrame
+
 nested_error_expr = quote
     try
         __not_a_binding__
@@ -12,14 +14,14 @@ nested_error_pattern = r"""
     ERROR: DivideError: integer division error
     Stacktrace:.*
 
-    caused by: UndefVarError: `__not_a_binding__` not defined
+    caused by: UndefVarError: `__not_a_binding__` not defined in `Main`
     Stacktrace:.*
     """s
 
 @testset "display_error" begin
     # Display of errors which cause more than one entry on the exception stack
     excs = try
-        eval(nested_error_expr)
+        Core.eval(Main, nested_error_expr)
     catch
         Base.current_exceptions()
     end
@@ -31,7 +33,7 @@ nested_error_pattern = r"""
         DivideError: integer division error
         Stacktrace:.*
 
-        caused by: UndefVarError: `__not_a_binding__` not defined
+        caused by: UndefVarError: `__not_a_binding__` not defined in `Main`
         Stacktrace:.*
         """s, sprint(show, excs))
 end
@@ -52,3 +54,39 @@ end
         ERROR: ErrorException
         """s, err_str)
 end
+
+@testset "defining `ans` and `err`" begin
+    @test eval(:(ans = 1)) == 1
+    @test eval(:(err = 1)) == 1
+end
+
+@testset "scrub REPL-related frames" begin
+    repl_bt = [StackFrame(:foo, "foo.jl", 1),
+          StackFrame(:__repl_entry_anysuffix, "client.jl", 2),
+          StackFrame(:bar, "bar.jl", 3)]
+    scrubbed_repl_bt = Base.scrub_repl_backtrace(repl_bt)
+
+    nonrepl_bt = [StackFrame(:foo, "foo.jl", 1),
+          StackFrame(:baz, "baz.jl", 2),
+          StackFrame(:bar, "bar.jl", 3)]
+    scrubbed_nonrepl_bt = Base.scrub_repl_backtrace(nonrepl_bt)
+
+    @test length(scrubbed_repl_bt) == 1
+    @test scrubbed_repl_bt[1].func == :foo
+    @test length(scrubbed_nonrepl_bt) == 3
+
+    errio = IOBuffer()
+    lower_errexpr = :(@bad)
+    Base.eval_user_input(errio, lower_errexpr, false)
+    outstr = String(take!(errio))
+    @test occursin("ERROR: LoadError: UndefVarError: `@bad`", outstr)
+    @test !occursin("_repl_entry", outstr)
+    @test !occursin(r"\.[/\\]client.jl", outstr)
+
+    errexpr = :(error("fail"))
+    Base.eval_user_input(errio, errexpr, false)
+    outstr = String(take!(errio))
+    @test occursin("ERROR: fail", outstr)
+    @test !occursin("_repl_entry", outstr)
+    @test !occursin(r"\.[/\\]client.jl", outstr)
+end
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index 917031b57fe5f..5da669b920138 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -4,8 +4,8 @@ import Libdl
 
 # helper function for passing input to stdin
 # and returning the stdout result
-function writereadpipeline(input, exename)
-    p = open(exename, "w+")
+function writereadpipeline(input, exename; stderr=nothing)
+    p = open(pipeline(exename; stderr), "w+")
     @async begin
         write(p.in, input)
         close(p.in)
@@ -24,6 +24,21 @@ function readchomperrors(exename::Cmd)
     return (success(p), fetch(o), fetch(e))
 end
 
+# helper function for tests that expect successful command execution
+# logs detailed error information if the command fails
+function test_read_success(cmd::Cmd, expected_type::Type=String)
+    success, out, err = readchomperrors(cmd)
+    if !success
+        println("---- Command failed: ")
+        show(cmd)
+        println("stdout:\n", out)
+        println("stderr:\n", err)
+        println("----")
+    end
+    @test success
+    return expected_type == String ? out : parse(expected_type, out)
+end
+
 function format_filename(s)
     p = ccall(:jl_format_filename, Cstring, (Cstring,), s)
     r = unsafe_string(p)
@@ -60,13 +75,60 @@ let
     @test format_filename("%a%%b") == "a%b"
 end
 
+if Sys.isunix()
+    @testset "SIGQUIT prints task backtraces" begin
+        script = """
+            mutable struct RLimit
+                cur::Int64
+                max::Int64
+            end
+            const RLIMIT_CORE = 4 # from /usr/include/sys/resource.h
+            ccall(:setrlimit, Cint, (Cint, Ref{RLimit}), RLIMIT_CORE, Ref(RLimit(0, 0)))
+            write(stdout, "r")
+            wait()
+        """
+        exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
+        errp = PipeBuffer()
+        # disable coredumps for this process
+        p = open(pipeline(`$exename -e $script`, stderr=errp), "r")
+        @test read(p, UInt8) == UInt8('r')
+        # The process might ignore the first SIGQUIT, since it will try to then run cleanup,
+        # which may fail for many reasons.
+        # The process will not ignore the second SIGQUIT, but the kernel might ignore it.
+        # So keep sending SIGQUIT every few seconds until the kernel delivers the second one
+        # and `p` exits.
+        t = Timer(0, interval=10) do t; Base.kill(p, Base.SIGQUIT); end
+        wait(p)
+        close(t)
+        err_s = readchomp(errp)
+        @test Base.process_signaled(p) && p.termsignal == Base.SIGQUIT
+        @test occursin("==== Thread ", err_s)
+        @test occursin("==== Done", err_s)
+    end
+end
+
 @testset "julia_cmd" begin
     julia_basic = Base.julia_cmd()
+    function get_julia_cmd(arg)
+        io = Base.BufferStream()
+        cmd = `$julia_basic $arg -e 'print(repr(Base.julia_cmd()))'`
+        try
+            run(pipeline(cmd, stdout=io, stderr=io))
+        catch
+            closewrite(io)
+            @error "cmd failed" cmd read(io, String)
+            rethrow()
+        end
+        closewrite(io)
+        return read(io, String)
+    end
+
     opts = Base.JLOptions()
-    get_julia_cmd(arg) = strip(read(`$julia_basic $arg -e 'print(repr(Base.julia_cmd()))'`, String), ['`'])
 
     for (arg, default) in (
-                            ("-C$(unsafe_string(opts.cpu_target))",  false),
+                            # Use a Cmd to handle space nicely when
+                            # interpolating inside another Cmd.
+                            (`-C $(unsafe_string(opts.cpu_target))`,  false),
 
                             ("-J$(unsafe_string(opts.image_file))",  false),
 
@@ -116,38 +178,49 @@ end
                             ("--startup-file=no",   false),
                             ("--startup-file=yes",  true),
 
-                            # ("--sysimage-native-code=no",   false), # takes a lot longer (30s)
-                            ("--sysimage-native-code=yes",  true),
-
                             ("--pkgimages=yes", true),
                             ("--pkgimages=no",  false),
                         )
         @testset "$arg" begin
+            str = arg isa Cmd ? join(arg.exec, ' ') : arg
             if default
-                @test !occursin(arg, get_julia_cmd(arg))
+                @test !occursin(str, get_julia_cmd(arg))
             else
-                @test occursin(arg, get_julia_cmd(arg))
+                @test occursin(str, get_julia_cmd(arg))
             end
         end
     end
+
+    # Test empty `cpu_target` gives a helpful error message, issue #52209.
+    io = IOBuffer()
+    p = run(pipeline(`$(Base.julia_cmd(; cpu_target="")) --startup-file=no -e ''`; stderr=io); wait=false)
+    wait(p)
+    @test p.exitcode == 1
+    @test occursin("empty CPU name", String(take!(io)))
 end
 
 let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # tests for handling of ENV errors
-    let v = writereadpipeline(
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
             "println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))",
             setenv(`$exename -i -E '@assert isempty(LOAD_PATH); push!(LOAD_PATH, "@stdlib"); @isdefined InteractiveUtils'`,
                     "JULIA_LOAD_PATH" => "",
                     "JULIA_DEPOT_PATH" => ";:",
-                    "HOME" => homedir()))
-        @test v == ("false\nREPL: InteractiveUtilstrue\n", true)
+                    "HOME" => homedir());
+            stderr=io)
+        # @which is undefined
+        @test_broken v == ("false\nREPL: InteractiveUtilstrue\n", true)
+        stderr = String(take!(io))
+        @test_broken isempty(stderr)
     end
     let v = writereadpipeline("println(\"REPL: \", InteractiveUtils)",
                 setenv(`$exename -i -e 'const InteractiveUtils = 3'`,
                     "JULIA_LOAD_PATH" => ";;;:::",
                     "JULIA_DEPOT_PATH" => ";;;:::",
                     "HOME" => homedir()))
-        # TODO: ideally, `@which`, etc. would still work, but Julia can't handle `using $InterativeUtils`
+        # TODO: ideally, `@which`, etc. would still work, but Julia can't handle `using $InteractiveUtils`
         @test v == ("REPL: 3\n", true)
     end
     @testset let v = readchomperrors(`$exename -i -e '
@@ -159,7 +232,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         # make sure this is a non-fatal error and the REPL still loads
         @test v[1]
         @test isempty(v[2])
-        @test startswith(v[3], "┌ Warning: Failed to import InteractiveUtils into module Main\n")
+        # Can't load REPL if it's outside the sysimg if we break the load path.
+        # Need to rewrite this test nicer
+        # ┌ Warning: REPL provider not available: using basic fallback
+        # └ @ Base client.jl:459
+        @test_broken startswith(v[3], "┌ Warning: Failed to import InteractiveUtils into module Main\n")
     end
     real_threads = string(ccall(:jl_cpu_threads, Int32, ()))
     for nc in ("0", "-2", "x", "2x", " ", "")
@@ -211,6 +288,10 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test startswith(read(`$exename --help`, String), header)
     end
 
+    # Test to make sure that command line --help and --help-hidden do not return a description which is more than 100 characters wide
+    @test isempty(filter(x->length(x) > 100, readlines(`$exename -h`)))
+    @test isempty(filter(x->length(x) > 100, readlines(`$exename --help-hidden`)))
+
     # ~ expansion in --project and JULIA_PROJECT
     if !Sys.iswindows()
         let expanded = abspath(expanduser("~/foo/Project.toml"))
@@ -225,35 +306,52 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test expanded == readchomp(addenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "@foo", "HOME" => homedir()))
     end
 
+    # --project=@script handling
+    let expanded = abspath(joinpath(@__DIR__, "project", "ScriptProject"))
+        script = joinpath(expanded, "bin", "script.jl")
+        # Check running julia with --project=@script both within and outside the script directory
+        @testset "--@script from $name" for (name, dir) in [("project", expanded), ("outside", pwd())]
+            @test joinpath(expanded, "Project.toml") == readchomp(Cmd(`$exename --project=@script $script`; dir))
+            @test joinpath(expanded, "SubProject", "Project.toml") == readchomp(Cmd(`$exename --project=@script/../SubProject $script`; dir))
+        end
+    end
+
+    # handling of `@temp` in --project and JULIA_PROJECT
+    @test tempdir() == readchomp(`$exename --project=@temp -e 'println(Base.active_project())'`)[1:lastindex(tempdir())]
+    @test tempdir() == readchomp(addenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "@temp", "HOME" => homedir()))[1:lastindex(tempdir())]
+
     # --quiet, --banner
-    let t(q,b) = "Base.JLOptions().quiet == $q && Base.JLOptions().banner == $b"
-        @test success(`$exename                 -e $(t(0, -1))`)
-        @test success(`$exename -q              -e $(t(1,  0))`)
-        @test success(`$exename --quiet         -e $(t(1,  0))`)
-        @test success(`$exename --banner=no     -e $(t(0,  0))`)
-        @test success(`$exename --banner=yes    -e $(t(0,  1))`)
-        @test success(`$exename -q --banner=no  -e $(t(1,  0))`)
-        @test success(`$exename -q --banner=yes -e $(t(1,  1))`)
-        @test success(`$exename --banner=no  -q -e $(t(1,  0))`)
-        @test success(`$exename --banner=yes -q -e $(t(1,  1))`)
+    let p = "print((Base.JLOptions().quiet, Base.JLOptions().banner))"
+        @test read(`$exename                   -e $p`, String) == "(0, -1)"
+        @test read(`$exename -q                -e $p`, String) == "(1, 0)"
+        @test read(`$exename --quiet           -e $p`, String) == "(1, 0)"
+        @test read(`$exename --banner=no       -e $p`, String) == "(0, 0)"
+        @test read(`$exename --banner=yes      -e $p`, String) == "(0, 1)"
+        @test read(`$exename --banner=short    -e $p`, String) == "(0, 2)"
+        @test read(`$exename -q --banner=no    -e $p`, String) == "(1, 0)"
+        @test read(`$exename -q --banner=yes   -e $p`, String) == "(1, 1)"
+        @test read(`$exename -q --banner=short -e $p`, String) == "(1, 2)"
+        @test read(`$exename --banner=no  -q   -e $p`, String) == "(1, 0)"
+        @test read(`$exename --banner=yes -q   -e $p`, String) == "(1, 1)"
+        @test read(`$exename --banner=short -q -e $p`, String) == "(1, 2)"
     end
 
     # --home
-    @test success(`$exename -H $(Sys.BINDIR)`)
-    @test success(`$exename --home=$(Sys.BINDIR)`)
+    @test "" == test_read_success(`$exename -H $(Sys.BINDIR)`)
+    @test "" == test_read_success(`$exename --home=$(Sys.BINDIR)`)
 
     # --eval
-    @test  success(`$exename -e "exit(0)"`)
+    @test "" == test_read_success(`$exename -e "exit(0)"`)
     @test errors_not_signals(`$exename -e "exit(1)"`)
-    @test  success(`$exename --eval="exit(0)"`)
+    @test "" == test_read_success(`$exename --eval="exit(0)"`)
     @test errors_not_signals(`$exename --eval="exit(1)"`)
     @test errors_not_signals(`$exename -e`)
     @test errors_not_signals(`$exename --eval`)
     # --eval --interactive (replaced --post-boot)
-    @test  success(`$exename -i -e "exit(0)"`)
+    @test "" == test_read_success(`$exename -i -e "exit(0)"`)
     @test errors_not_signals(`$exename -i -e "exit(1)"`)
     # issue #34924
-    @test  success(`$exename -e 'const LOAD_PATH=1'`)
+    @test "" == test_read_success(`$exename -e 'const LOAD_PATH=1'`)
 
     # --print
     @test read(`$exename -E "1+1"`, String) == "2\n"
@@ -300,43 +398,43 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test errors_not_signals(`$exename -C invalidtarget`)
     @test errors_not_signals(`$exename --cpu-target=invalidtarget`)
 
-    if Sys.iswindows()
-        # -t, --threads
-        code = "print(Threads.threadpoolsize())"
-        cpu_threads = ccall(:jl_effective_threads, Int32, ())
-        @test string(cpu_threads) ==
-            read(`$exename --threads auto -e $code`, String) ==
-            read(`$exename --threads=auto -e $code`, String) ==
-            read(`$exename -tauto -e $code`, String) ==
-            read(`$exename -t auto -e $code`, String)
-        for nt in (nothing, "1")
-            withenv("JULIA_NUM_THREADS" => nt) do
-                @test read(`$exename --threads=2 -e $code`, String) ==
-                    read(`$exename -t 2 -e $code`, String) == "2"
+    # -t, --threads
+    code = "print(Threads.threadpoolsize())"
+    code2 = "print(Threads.maxthreadid())"
+    cpu_threads = ccall(:jl_effective_threads, Int32, ())
+    @test string(cpu_threads) ==
+        read(`$exename --threads auto -e $code`, String) ==
+        read(`$exename --threads=auto -e $code`, String) ==
+        read(`$exename -tauto -e $code`, String) ==
+        read(`$exename -t auto -e $code`, String)
+    for nt in (nothing, "1")
+        withenv("JULIA_NUM_THREADS" => nt) do
+            @test read(`$exename --threads=2 -e $code`, String) ==
+                read(`$exename -t 2 -e $code`, String) == "2"
+            if nt === nothing
+                @test read(`$exename -e $code2`, String) == "2" #default + interactive
+            elseif nt == "1"
+                @test read(`$exename -e $code2`, String) == "1" #if user asks for 1 give 1
             end
         end
-        # We want to test oversubscription, but on manycore machines, this can
-        # actually exhaust limited PID spaces
-        cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
-        if Sys.WORD_SIZE == 32
-            cpu_threads = min(cpu_threads, 50)
-        end
-        @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
-        withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
-            @test read(`$exename -e $code`, String) == string(cpu_threads)
-        end
-        @test errors_not_signals(`$exename -t 0`)
-        @test errors_not_signals(`$exename -t -1`)
+    end
+    # We want to test oversubscription, but on manycore machines, this can
+    # actually exhaust limited PID spaces
+    cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
+    if Sys.WORD_SIZE == 32
+        cpu_threads = min(cpu_threads, 50)
+    end
+    @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
+    withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
+        @test read(`$exename -e $code`, String) == string(cpu_threads)
+    end
+    @test errors_not_signals(`$exename -t 0`)
+    @test errors_not_signals(`$exename -t -1`)
 
-        # Combining --threads and --procs: --threads does propagate
-        withenv("JULIA_NUM_THREADS" => nothing) do
-            code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
-            @test read(`$exename -p2 -t2 -e $code`, String) == "6"
-        end
-    else
-        @test_skip "Command line tests with -t are flakey on non-Windows OS"
-        # Known issue: https://github.com/JuliaLang/julia/issues/49154
-        # These tests should be fixed and reenabled on all operating systems.
+    # Combining --threads and --procs: --threads does propagate
+    withenv("JULIA_NUM_THREADS" => nothing) do
+        code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
+        @test read(`$exename -p2 -t2 -e $code`, String) == "6"
     end
 
     # Combining --threads and invalid -C should yield a decent error
@@ -350,29 +448,33 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test p.exitcode == 1 && p.termsignal == 0
     end
 
-    # --gcthreads
-    code = "print(Threads.ngcthreads())"
-    cpu_threads = ccall(:jl_effective_threads, Int32, ())
-    @test (cpu_threads == 1 ? "1" : string(div(cpu_threads, 2))) ==
-          read(`$exename --threads auto -e $code`, String) ==
-          read(`$exename --threads=auto -e $code`, String) ==
-          read(`$exename -tauto -e $code`, String) ==
-          read(`$exename -t auto -e $code`, String)
-    for nt in (nothing, "1")
-        withenv("JULIA_NUM_GC_THREADS" => nt) do
-            @test read(`$exename --gcthreads=2 -e $code`, String) == "2"
-        end
-        withenv("JULIA_NUM_GC_THREADS" => nt) do
-            @test read(`$exename --gcthreads=2,1 -e $code`, String) == "3"
+    # FIXME: Issue #57103 --gcthreads does not have the same semantics
+    # for Stock GC and MMTk, so the tests below are specific to the Stock GC
+    @static if Base.USING_STOCK_GC
+        # --gcthreads
+        code = "print(Threads.ngcthreads())"
+        cpu_threads = ccall(:jl_effective_threads, Int32, ())
+        @test string(cpu_threads) ==
+            read(`$exename --threads auto -e $code`, String) ==
+            read(`$exename --threads=auto -e $code`, String) ==
+            read(`$exename -tauto -e $code`, String) ==
+            read(`$exename -t auto -e $code`, String)
+        for nt in (nothing, "1")
+            withenv("JULIA_NUM_GC_THREADS" => nt) do
+                @test read(`$exename --gcthreads=2 -e $code`, String) == "2"
+            end
+            withenv("JULIA_NUM_GC_THREADS" => nt) do
+                @test read(`$exename --gcthreads=2,1 -e $code`, String) == "3"
+            end
         end
-    end
 
-    withenv("JULIA_NUM_GC_THREADS" => 2) do
-        @test read(`$exename -e $code`, String) == "2"
-    end
+        withenv("JULIA_NUM_GC_THREADS" => 2) do
+            @test read(`$exename -e $code`, String) == "2"
+        end
 
-    withenv("JULIA_NUM_GC_THREADS" => "2,1") do
-        @test read(`$exename -e $code`, String) == "3"
+        withenv("JULIA_NUM_GC_THREADS" => "2,1") do
+            @test read(`$exename -e $code`, String) == "3"
+        end
     end
 
     # --machine-file
@@ -394,9 +496,30 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test readchomp(`$exename -E "isinteractive()" -i`) == "true"
 
     # --color
-    @test readchomp(`$exename --color=yes -E "Base.have_color"`) == "true"
-    @test readchomp(`$exename --color=no -E "Base.have_color"`) == "false"
-    @test errors_not_signals(`$exename --color=false`)
+    function color_cmd(; flag, no_color=nothing, force_color=nothing)
+        cmd = `$exename --color=$flag -E "Base.have_color"`
+        return addenv(cmd, "NO_COLOR" => no_color, "FORCE_COLOR" => force_color)
+    end
+
+    @test readchomp(color_cmd(flag="auto")) == "nothing"
+    @test readchomp(color_cmd(flag="no")) == "false"
+    @test readchomp(color_cmd(flag="yes")) == "true"
+    @test errors_not_signals(color_cmd(flag="false"))
+    @test errors_not_signals(color_cmd(flag="true"))
+
+    @test readchomp(color_cmd(flag="auto", no_color="")) == "nothing"
+    @test readchomp(color_cmd(flag="auto", no_color="1")) == "false"
+    @test readchomp(color_cmd(flag="no", no_color="1")) == "false"
+    @test readchomp(color_cmd(flag="yes", no_color="1")) == "true"
+
+    @test readchomp(color_cmd(flag="auto", force_color="")) == "nothing"
+    @test readchomp(color_cmd(flag="auto", force_color="1")) == "true"
+    @test readchomp(color_cmd(flag="no", force_color="1")) == "false"
+    @test readchomp(color_cmd(flag="yes", force_color="1")) == "true"
+
+    @test readchomp(color_cmd(flag="auto", no_color="1", force_color="1")) == "true"
+    @test readchomp(color_cmd(flag="no", no_color="1", force_color="1")) == "false"
+    @test readchomp(color_cmd(flag="yes", no_color="1", force_color="1")) == "true"
 
     # --history-file
     @test readchomp(`$exename -E "Bool(Base.JLOptions().historyfile)"
@@ -407,85 +530,177 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
 
     # --code-coverage
     mktempdir() do dir
+        # don't inherit global coverage settings that could confuse these tests
+        cov_exename = `$(Base.julia_cmd()[1]) --startup-file=no --color=no`
         helperdir = joinpath(@__DIR__, "testhelpers")
         inputfile = joinpath(helperdir, "coverage_file.jl")
-        expected = replace(read(joinpath(helperdir, "coverage_file.info.bad"), String),
-            "<FILENAME>" => realpath(inputfile))
-        expected_good = replace(read(joinpath(helperdir, "coverage_file.info"), String),
+        expected = replace(read(joinpath(helperdir, "coverage_file.info"), String),
             "<FILENAME>" => realpath(inputfile))
         covfile = replace(joinpath(dir, "coverage.info"), "%" => "%%")
         @test !isfile(covfile)
-        defaultcov = readchomp(`$exename -E "Base.JLOptions().code_coverage != 0" -L $inputfile`)
+        defaultcov = readchomp(`$cov_exename -E "Base.JLOptions().code_coverage != 0" -L $inputfile`)
         opts = Base.JLOptions()
         coverage_file = (opts.output_code_coverage != C_NULL) ?  unsafe_string(opts.output_code_coverage) : ""
         @test !isfile(covfile)
         @test defaultcov == string(opts.code_coverage != 0 && (isempty(coverage_file) || occursin("%p", coverage_file)))
-        @test readchomp(`$exename -E "Base.JLOptions().code_coverage" -L $inputfile
+        @test readchomp(`$cov_exename -E "Base.JLOptions().code_coverage" -L $inputfile
             --code-coverage=$covfile --code-coverage=none`) == "0"
         @test !isfile(covfile)
-        @test readchomp(`$exename -E "Base.JLOptions().code_coverage" -L $inputfile
+        @test readchomp(`$cov_exename -E "Base.JLOptions().code_coverage" -L $inputfile
             --code-coverage=$covfile --code-coverage`) == "1"
         @test isfile(covfile)
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
-        @test readchomp(`$exename -E "Base.JLOptions().code_coverage" -L $inputfile
+        @test readchomp(`$cov_exename -E "Base.JLOptions().code_coverage" -L $inputfile
             --code-coverage=$covfile --code-coverage=user`) == "1"
         @test isfile(covfile)
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
-        @test readchomp(`$exename -E "Base.JLOptions().code_coverage" -L $inputfile
+        @test readchomp(`$cov_exename -E "Base.JLOptions().code_coverage" -L $inputfile
             --code-coverage=$covfile --code-coverage=all`) == "2"
         @test isfile(covfile)
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in specific file
         tfile = realpath(inputfile)
-        @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+        @test readchomp(`$cov_exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
             --code-coverage=$covfile --code-coverage=@$tfile`) == "(3, $(repr(tfile)))"
         @test isfile(covfile)
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in directory
         tdir = dirname(realpath(inputfile))
-        @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+        @test readchomp(`$cov_exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
             --code-coverage=$covfile --code-coverage=@$tdir`) == "(3, $(repr(tdir)))"
         @test isfile(covfile)
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
+
+        # Ask for coverage in current directory
+        tdir = dirname(realpath(inputfile))
+        cd(tdir) do
+            # there may be atrailing separator here so use rstrip
+            @test readchomp(`$cov_exename -E "(Base.JLOptions().code_coverage, rstrip(unsafe_string(Base.JLOptions().tracked_path), Base.Filesystem.path_separator[1]))" -L $inputfile
+                --code-coverage=$covfile --code-coverage=@`) == "(3, $(repr(tdir)))"
+        end
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
+
+        # Ask for coverage in relative directory
+        tdir = dirname(realpath(inputfile))
+        cd(dirname(tdir)) do
+            @test readchomp(`$cov_exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+                --code-coverage=$covfile --code-coverage=@testhelpers`) == "(3, $(repr(tdir)))"
+        end
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
+
+        # Ask for coverage in relative directory with dot-dot notation
+        tdir = dirname(realpath(inputfile))
+        cd(tdir) do
+            @test readchomp(`$cov_exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+                --code-coverage=$covfile --code-coverage=@../testhelpers`) == "(3, $(repr(tdir)))"
+        end
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
 
         # Ask for coverage in a different directory
         tdir = mktempdir() # a dir that contains no code
-        @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
-            --code-coverage=$covfile --code-coverage=@$tdir`) == "(3, $(repr(tdir)))"
+        @test readchomp(`$cov_exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+            --code-coverage=$covfile --code-coverage=@$tdir`) == "(3, $(repr(realpath(tdir))))"
         @test isfile(covfile)
         got = read(covfile, String)
         @test isempty(got)
         rm(covfile)
+
+        function coverage_info_for(src::String)
+            mktemp(dir) do srcfile, io
+                write(io, src); close(io)
+                outfile = tempname(dir, cleanup=false)*".info"
+                run(`$cov_exename --code-coverage=$outfile $srcfile`)
+                result = read(outfile, String)
+                rm(outfile, force=true)
+                result
+            end
+        end
+        @test contains(coverage_info_for("""
+            function cov_bug(x, p)
+                if p > 2
+                    print("")  # runs
+                end
+                if Base.compilerbarrier(:const, false)
+                    println("Does not run")
+                end
+            end
+            function do_test()
+                cov_bug(5, 3)
+            end
+            do_test()
+            """), """
+            DA:2,1
+            DA:3,1
+            DA:5,1
+            DA:6,0
+            DA:9,1
+            DA:10,1
+            LH:5
+            LF:6
+            """)
+        @test contains(coverage_info_for("""
+            function cov_bug()
+                if Base.compilerbarrier(:const, true)
+                    if Base.compilerbarrier(:const, true)
+                        if Base.compilerbarrier(:const, false)
+                            println("Does not run")
+                        end
+                    else
+                        print("Does not run either")
+                    end
+                else
+                    print("")
+                end
+                return nothing
+            end
+            cov_bug()
+            """), """
+            DA:1,1
+            DA:2,1
+            DA:3,1
+            DA:4,1
+            DA:5,0
+            DA:8,0
+            DA:11,0
+            DA:13,1
+            LH:5
+            LF:8
+            """)
     end
 
     # --track-allocation
-    @test readchomp(`$exename -E "Base.JLOptions().malloc_log != 0"`) == "false"
-    @test readchomp(`$exename -E "Base.JLOptions().malloc_log != 0" --track-allocation=none`) == "false"
+    alloc_exename = `$(Base.julia_cmd()) --startup-file=no --color=no --track-allocation=none`
+    @test readchomp(`$alloc_exename -E "Base.JLOptions().malloc_log != 0"`) == "false"
+    @test readchomp(`$alloc_exename -E "Base.JLOptions().malloc_log != 0" --track-allocation=none`) == "false"
 
-    @test readchomp(`$exename -E "Base.JLOptions().malloc_log != 0" --track-allocation`) == "true"
-    @test readchomp(`$exename -E "Base.JLOptions().malloc_log != 0" --track-allocation=user`) == "true"
+    @test readchomp(`$alloc_exename -E "Base.JLOptions().malloc_log != 0" --track-allocation`) == "true"
+    @test readchomp(`$alloc_exename -E "Base.JLOptions().malloc_log != 0" --track-allocation=user`) == "true"
     mktempdir() do dir
         helperdir = joinpath(@__DIR__, "testhelpers")
         inputfile = joinpath(dir, "allocation_file.jl")
         cp(joinpath(helperdir,"allocation_file.jl"), inputfile)
-        pid = readchomp(`$exename -E "getpid()" -L $inputfile --track-allocation=user`)
+        pid = readchomp(`$alloc_exename -E "getpid()" -L $inputfile --track-allocation=user`)
         memfile = "$inputfile.$pid.mem"
         got = readlines(memfile)
         rm(memfile)
@@ -505,9 +720,9 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
             @test popfirst!(got) == "       32     Base.invokelatest(g, x)"
         end
         if Sys.WORD_SIZE == 64
-            @test popfirst!(got) == "       48     []"
-        else
             @test popfirst!(got) == "       32     []"
+        else
+            @test popfirst!(got) == "       16     []"
         end
         @test popfirst!(got) == "        - end"
         @test popfirst!(got) == "        - f(1.23)"
@@ -528,30 +743,34 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test readchomp(`$exename -E "Base.JLOptions().debug_level" -g`) == "2"
     # --print-before/--print-after with pass names is broken on Windows due to no-gnu-unique issues
     if !Sys.iswindows()
-        withenv("JULIA_LLVM_ARGS" => "--print-before=FinalLowerGC") do
+        withenv("JULIA_LLVM_ARGS" => "--print-before=BeforeOptimization") do
             let code = readchomperrors(`$exename -g0 -E "@eval Int64(1)+Int64(1)"`)
                 @test code[1]
                 code = code[3]
                 @test occursin("llvm.module.flags", code)
                 @test !occursin("llvm.dbg.cu", code)
                 @test !occursin("int.jl", code)
-                @test !occursin("\"Int64\"", code)
+                @test !occursin("name: \"Int64\"", code)
             end
             let code = readchomperrors(`$exename -g1 -E "@eval Int64(1)+Int64(1)"`)
                 @test code[1]
                 code = code[3]
                 @test occursin("llvm.module.flags", code)
                 @test occursin("llvm.dbg.cu", code)
-                @test occursin("int.jl", code)
-                @test !occursin("\"Int64\"", code)
+                # TODO: consider moving test to llvmpasses as this fails on some platforms
+                # without clear reason
+                @test_skip occursin("int.jl", code)
+                @test !occursin("name: \"Int64\"", code)
             end
             let code = readchomperrors(`$exename -g2 -E "@eval Int64(1)+Int64(1)"`)
                 @test code[1]
                 code = code[3]
                 @test occursin("llvm.module.flags", code)
                 @test occursin("llvm.dbg.cu", code)
-                @test occursin("int.jl", code)
-                @test occursin("\"Int64\"", code)
+                # TODO: consider moving test to llvmpasses as this fails on some platforms
+                # without clear reason
+                @test_skip occursin("int.jl", code)
+                @test occursin("name: \"Int64\"", code)
             end
         end
     end
@@ -602,7 +821,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test errors_not_signals(`$exename -E "$code" --depwarn=error`)
 
         @test readchomperrors(`$exename -E "$code" --depwarn=yes`) ==
-            (true, "true", "WARNING: Foo.Deprecated is deprecated, use NotDeprecated instead.\n  likely near none:8")
+            (true, "true", "WARNING: Use of Foo.Deprecated is deprecated, use NotDeprecated instead.\n  likely near none:8")
 
         @test readchomperrors(`$exename -E "$code" --depwarn=no`) ==
             (true, "true", "")
@@ -636,9 +855,132 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
             "Int(Base.JLOptions().fast_math)"`)) == JL_OPTIONS_FAST_MATH_DEFAULT
     end
 
+    let JL_OPTIONS_TASK_METRICS_OFF = 0, JL_OPTIONS_TASK_METRICS_ON = 1
+        @test parse(Int,readchomp(`$exename -E
+            "Int(Base.JLOptions().task_metrics)"`)) == JL_OPTIONS_TASK_METRICS_OFF
+        @test parse(Int, readchomp(`$exename --task-metrics=yes -E
+            "Int(Base.JLOptions().task_metrics)"`)) == JL_OPTIONS_TASK_METRICS_ON
+        @test !parse(Bool, readchomp(`$exename  -E "current_task().metrics_enabled"`))
+        @test parse(Bool, readchomp(`$exename --task-metrics=yes -E "current_task().metrics_enabled"`))
+    end
+
     # --worker takes default / custom as argument (default/custom arguments
     # tested in test/parallel.jl)
-    @test errors_not_signals(`$exename --worker=true`)
+    # shorten the worker timeout as this test relies on it timing out
+    withenv("JULIA_WORKER_TIMEOUT" => "10") do
+        @test errors_not_signals(`$exename --worker=true`)
+    end
+
+    # --trace-compile
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-compile=stderr -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
+    # --trace-compile-timing
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-compile=stderr --trace-compile-timing -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin(" ms =# precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
+    # Base.@trace_compile (local version of the 2 above args)
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            """
+            f(x::Int) = 1
+            applyf(container) = f(container[1])
+            Base.@trace_compile @eval applyf([100])
+            Base.@trace_compile @eval applyf(Any[100])
+            f(::Bool) = 2
+            Base.@trace_compile @eval applyf([true])
+            Base.@trace_compile @eval applyf(Any[true])
+            """,
+            `$exename -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test length(findall(r"precompile\(", _stderr)) == 5
+        @test length(findall(r" # recompile", _stderr)) == 1
+    end
+
+    # --trace-dispatch
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-dispatch=stderr -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
+    # --trace-eval
+    let
+        # Test --trace-eval=loc (location only)
+        mktempdir() do dir
+            testfile = joinpath(dir, "test.jl")
+            write(testfile, "x = 1 + 1\ny = x * 2")
+            success, out, err = readchomperrors(`$exename --trace-eval=loc $testfile`)
+            @test success
+            @test occursin("eval: #=", err)
+            @test !occursin("eval: \$(Expr(:toplevel", err)  # Should not show full expressions
+        end
+    end
+
+    let
+        # Test --trace-eval=full (full expressions)
+        mktempdir() do dir
+            testfile = joinpath(dir, "test.jl")
+            write(testfile, "x = 1 + 1\ny = x * 2")
+            success, out, err = readchomperrors(`$exename --trace-eval=full $testfile`)
+            @test success
+            @test occursin("eval: \$(Expr(:toplevel", err)  # Should show full expressions
+            @test occursin("x = 1 + 1", err)
+        end
+    end
+
+    let
+        # Test --trace-eval=no (disabled)
+        mktempdir() do dir
+            testfile = joinpath(dir, "test.jl")
+            write(testfile, "x = 1 + 1\ny = x * 2")
+            success, out, err = readchomperrors(`$exename --trace-eval=no $testfile`)
+            @test success
+            @test !occursin("eval:", err)  # Should not show any eval traces
+        end
+    end
+
+    let
+        # Test Base.TRACE_EVAL global control takes priority
+        mktempdir() do dir
+            testfile = joinpath(dir, "test.jl")
+            write(testfile, """
+                Base.TRACE_EVAL = :full
+                x = 1 + 1
+                Base.TRACE_EVAL = :no
+                y = x * 2
+                """)
+            success, out, err = readchomperrors(`$exename --trace-eval=loc $testfile`)  # Command line says :loc, but code overrides
+            @test success
+            # Should show full expression for x = 1 + 1 (Base.TRACE_EVAL = :full)
+            @test occursin("eval: \$(Expr(:toplevel", err)
+            @test occursin("x = 1 + 1", err)
+            # Should not show trace for y = x * 2 (Base.TRACE_EVAL = :no)
+            lines = split(err, '\n')
+            y_lines = filter(line -> occursin("y = x * 2", line), lines)
+            @test length(y_lines) == 0  # No eval trace for y assignment
+        end
+    end
 
     # test passing arguments
     mktempdir() do dir
@@ -755,10 +1097,10 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         cd(testdir) do
             rm(testdir)
             @test Base.current_project() === nothing
-            @test success(`$exename -e "exit(0)"`)
+            @test "" == test_read_success(`$exename -e "exit(0)"`)
             for load_path in ["", "@", "@."]
                 withenv("JULIA_LOAD_PATH" => load_path) do
-                    @test success(`$exename -e "exit(!(Base.load_path() == []))"`)
+                    @test "" == test_read_success(`$exename -e "exit(!(Base.load_path() == []))"`)
                 end
             end
         end
@@ -795,6 +1137,14 @@ end
         @test v[2] == ""
         @test contains(v[3], "More than one command line CPU targets specified")
     end
+
+    # Testing this more precisely would be very platform and build system dependent and brittle.
+    withenv("JULIA_CPU_TARGET" => "sysimage") do
+        v = readchomp(`$julia_path -E "Sys.sysimage_target()"`)
+        # Local builds will likely be "native" but CI shouldn't be.
+        invalid_results = Base.get_bool_env("CI", false) ? ("", "native", "sysimage") : ("", "sysimage",)
+        @test !in(v, invalid_results)
+    end
 end
 
 # Find the path of libjulia (or libjulia-debug, as the case may be)
@@ -830,7 +1180,7 @@ let exename = `$(Base.julia_cmd().exec[1]) -t 1`
         p = run(pipeline(`$exename --sysimage=$libjulia`, stderr=err), wait=false)
         close(err.in)
         let s = read(err, String)
-            @test s == "ERROR: System image file failed consistency check: maybe opened the wrong version?\n"
+            @test s == "ERROR: Image file failed consistency check: maybe opened the wrong version?\n"
         end
         @test errors_not_signals(p)
         @test p.exitcode == 1
@@ -860,17 +1210,12 @@ run(pipeline(devnull, `$(joinpath(Sys.BINDIR, Base.julia_exename())) --lisp`, de
 @test readchomperrors(`$(joinpath(Sys.BINDIR, Base.julia_exename())) -Cnative --lisp`) ==
     (false, "", "ERROR: --lisp must be specified as the first argument")
 
-# --sysimage-native-code={yes|no}
-let exename = `$(Base.julia_cmd()) --startup-file=no`
-    @test readchomp(`$exename --sysimage-native-code=yes -E
-        "Bool(Base.JLOptions().use_sysimage_native_code)"`) == "true"
-    @test readchomp(`$exename --sysimage-native-code=no -E
-        "Bool(Base.JLOptions().use_sysimage_native_code)"`) == "false"
-end
-
 # backtrace contains line number info (esp. on windows #17179)
-for precomp in ("yes", "no")
-    succ, out, bt = readchomperrors(`$(Base.julia_cmd()) --startup-file=no --sysimage-native-code=$precomp -E 'sqrt(-2)'`)
+let
+    # TODO: Make this safe in the presence of two single-thread threadpools with
+    # --sysimage-native-code=no, though that option is deprecated.
+    # see https://github.com/JuliaLang/julia/issues/57198
+    succ, out, bt = readchomperrors(`$(Base.julia_cmd()) --startup-file=no -E 'sqrt(-2)'`)
     @test !succ
     @test out == ""
     @test occursin(r"\.jl:(\d+)", bt)
@@ -952,7 +1297,7 @@ for yn in ("no", "yes")
 end
 
 # issue #39259, shadowing `ARGS`
-@test success(`$(Base.julia_cmd()) --startup-file=no -e 'ARGS=1'`)
+@test "" == test_read_success(`$(Base.julia_cmd()) --startup-file=no -e 'ARGS=1'`)
 
 @testset "- as program file reads from stdin" begin
     for args in (`- foo bar`, `-- - foo bar`)
@@ -971,6 +1316,151 @@ end
         @test lines[3] == "foo"
         @test lines[4] == "bar"
     end
-#heap-size-hint
-@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "524288000"
 end
+
+# FIXME: Issue #57103: MMTK currently does not use --heap-size-hint since it only
+# supports setting up a hard limit unlike the Stock GC
+# which takes it as a soft limit. For now, we skip the tests below for MMTk
+@static if Base.USING_STOCK_GC
+@testset "heap size hint" begin
+    #heap-size-hint, we reserve 250 MB for non GC memory (llvm, etc.)
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "$((500-250)*1024*1024)"
+
+    mem = ccall(:uv_get_total_memory, UInt64, ())
+    cmem = ccall(:uv_get_constrained_memory, UInt64, ())
+    if cmem > 0 && cmem < mem
+        mem = cmem
+    end
+    maxmem = parse(UInt64, readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=25% -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`))
+    hint = max(mem÷4, 251*1024*1024) - 250*1024*1024
+    MAX32HEAP = 1536 * 1024 * 1024
+    if Int === Int32 && hint > MAX32HEAP
+        hint = MAX32HEAP
+    end
+    @test abs(Float64(maxmem) - hint)/maxmem < 0.05
+
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=10M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "$(1*1024*1024)"
+end
+
+@testset "hard heap limit" begin
+    cmd = `$(Base.julia_cmd()) --hard-heap-limit=30M -E "mutable struct ListNode; v::Int64; next::Union{ListNode, Nothing}; end\n
+        l = ListNode(0, nothing); while true; l = ListNode(0, l); end"`
+    @test !success(cmd)
+end
+end
+
+## `Main.main` entrypoint
+
+# Basic usage
+@test readchomp(`$(Base.julia_cmd()) -e '(@main)(args) = println("hello")'`) == "hello"
+
+# Test ARGS with -e
+@test readchomp(`$(Base.julia_cmd()) -e '(@main)(args) = println(args)' a b`) == repr(["a", "b"])
+
+# Test import from module
+@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(args) = println("hello"); end; using .Hello'`) == "hello"
+@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(args) = println("hello"); end; import .Hello'`) == ""
+
+# test --bug-report=rr
+if Sys.islinux() && Sys.ARCH in (:i686, :x86_64) # rr is only available on these platforms
+    mktempdir() do temp_trace_dir
+        test_read_success(setenv(`$(Base.julia_cmd()) --bug-report=rr-local -e 'exit()'`,
+                                 "JULIA_RR_RECORD_ARGS" => "-n --nested=ignore",
+                                 "_RR_TRACE_DIR" => temp_trace_dir))
+    end
+end
+
+@testset "--heap-size-hint" begin
+    exename = `$(Base.julia_cmd())`
+    @test errors_not_signals(`$exename --heap-size-hint -e "exit(0)"`)
+    @testset "--heap-size-hint=$str" for str in ["asdf","","0","1.2vb","b","GB","2.5GB̂","1.2gb2","42gigabytes","5gig","2GiB","NaNt"]
+        @test errors_not_signals(`$exename --heap-size-hint=$str -e "exit(0)"`)
+    end
+    k = 1024
+    m = 1024k
+    g = 1024m
+    t = 1024g
+    @testset "--heap-size-hint=$str" for (str, val) in [("1", 1), ("1e7", 1e7), ("2.5e7", 2.5e7), ("1MB", 1m), ("2.5g", 2.5g), ("1e4kB", 1e4k),
+        ("1e100", typemax(UInt64)), ("1e500g", typemax(UInt64)), ("1e-12t", 1), ("500000000b", 500000000)]
+        @test parse(UInt64,read(`$exename --heap-size-hint=$str -E "Base.JLOptions().heap_size_hint"`, String)) == val
+    end
+end
+
+@testset "--hard-heap-limit" begin
+    exename = `$(Base.julia_cmd())`
+    @test errors_not_signals(`$exename --hard-heap-limit -e "exit(0)"`)
+    @testset "--hard-heap-limit=$str" for str in ["asdf","","0","1.2vb","b","GB","2.5GB̂","1.2gb2","42gigabytes","5gig","2GiB","NaNt"]
+        @test errors_not_signals(`$exename --hard-heap-limit=$str -e "exit(0)"`)
+    end
+    k = UInt64(1) << 10
+    m = UInt64(1) << 20
+    g = UInt64(1) << 30
+    t = UInt64(1) << 40
+    one_hundred_mb_strs_and_vals = [
+        ("100000000", 100000000), ("1e8", 1e8), ("100MB", 100m), ("100m", 100m), ("1e5kB", 1e5k),
+    ]
+    @testset "--hard-heap-limit=$str" for (str, val) in one_hundred_mb_strs_and_vals
+        @test test_read_success(`$exename --hard-heap-limit=$str -E "Base.JLOptions().hard_heap_limit"`, UInt64) == val
+    end
+    two_and_a_half_gigabytes_strs_and_vals = [
+        ("2500000000", 2500000000), ("2.5e9", 2.5e9), ("2.5g", 2.5g), ("2.5GB", 2.5g), ("2.5e6mB", 2.5e6m),
+    ]
+    @testset "--hard-heap-limit=$str" for (str, val) in two_and_a_half_gigabytes_strs_and_vals
+        @test test_read_success(`$exename --hard-heap-limit=$str -E "Base.JLOptions().hard_heap_limit"`, UInt64) == val
+    end
+    one_terabyte_strs_and_vals = [
+        ("1TB", 1t), ("1024GB", 1t),
+    ]
+    @testset "--hard-heap-limit=$str" for (str, val) in one_terabyte_strs_and_vals
+        @test test_read_success(`$exename --hard-heap-limit=$str -E "Base.JLOptions().hard_heap_limit"`, UInt64) == val
+    end
+end
+
+@testset "--heap-target-increment" begin
+    exename = `$(Base.julia_cmd())`
+    @test errors_not_signals(`$exename --heap-target-increment -e "exit(0)"`)
+    @testset "--heap-target-increment=$str" for str in ["asdf","","0","1.2vb","b","GB","2.5GB̂","1.2gb2","42gigabytes","5gig","2GiB","NaNt"]
+        @test errors_not_signals(`$exename --heap-target-increment=$str -e "exit(0)"`)
+    end
+    k = UInt64(1) << 10
+    m = UInt64(1) << 20
+    g = UInt64(1) << 30
+    t = UInt64(1) << 40
+    one_hundred_mb_strs_and_vals = [
+        ("100000000", 100000000), ("1e8", 1e8), ("100MB", 100m), ("100m", 100m), ("1e5kB", 1e5k),
+    ]
+    @testset "--heap-target-increment=$str" for (str, val) in one_hundred_mb_strs_and_vals
+        @test test_read_success(`$exename --heap-target-increment=$str -E "Base.JLOptions().heap_target_increment"`, UInt64) == val
+    end
+    two_and_a_half_gigabytes_strs_and_vals = [
+        ("2500000000", 2500000000), ("2.5e9", 2.5e9), ("2.5g", 2.5g), ("2.5GB", 2.5g), ("2.5e6mB", 2.5e6m),
+    ]
+    @testset "--heap-target-increment=$str" for (str, val) in two_and_a_half_gigabytes_strs_and_vals
+        @test test_read_success(`$exename --heap-target-increment=$str -E "Base.JLOptions().heap_target_increment"`, UInt64) == val
+    end
+    one_terabyte_strs_and_vals = [
+        ("1TB", 1t), ("1024GB", 1t),
+    ]
+    @testset "--heap-target-increment=$str" for (str, val) in one_terabyte_strs_and_vals
+        @test test_read_success(`$exename --heap-target-increment=$str -E "Base.JLOptions().heap_target_increment"`, UInt64) == val
+    end
+end
+
+@testset "--timeout-for-safepoint-straggler" begin
+    exename = `$(Base.julia_cmd())`
+    timeout = 120
+    @test test_read_success(`$exename --timeout-for-safepoint-straggler=$timeout -E "Base.JLOptions().timeout_for_safepoint_straggler_s"`, Int) == timeout
+end
+
+@testset "--strip-metadata" begin
+    mktempdir() do dir
+        @test "" == test_read_success(`$(Base.julia_cmd()) --strip-metadata -t1,0 --output-o $(dir)/sys.o.a -e 0`)
+        if isfile(joinpath(dir, "sys.o.a"))
+            Base.Linking.link_image(joinpath(dir, "sys.o.a"), joinpath(dir, "sys.so"))
+            @test readchomp(`$(Base.julia_cmd()) -t1,0 -J $(dir)/sys.so -E 'hasmethod(sort, (Vector{Int},), (:dims,))'`) == "true"
+        end
+    end
+end
+
+# https://github.com/JuliaLang/julia/issues/58229 Recursion in jitlinking with inline=no
+@test "" == test_read_success(`$(Base.julia_cmd()) --inline=no -e 'Base.compilecache(Base.identify_package("Pkg"))'`)
diff --git a/test/combinatorics.jl b/test/combinatorics.jl
index f8fe4e0bd0829..527bd86963a6f 100644
--- a/test/combinatorics.jl
+++ b/test/combinatorics.jl
@@ -2,6 +2,9 @@
 
 using Random: randcycle
 
+isdefined(Main, :ImmutableArrays) || @eval Main include("testhelpers/ImmutableArrays.jl")
+using .Main.ImmutableArrays
+
 @testset "binomial" begin
     @test binomial(5,-1) == 0
     @test binomial(5,10) == 0
@@ -67,20 +70,19 @@ end
         @test isperm(T) == true
         @test isperm(K) == false
     end
+
+    # issue #47847
+    p = ImmutableArrays.ImmutableArray([2,3,1])
+    @test invperm(p) == invperm([2,3,1])
 end
 
 @testset "factorial" begin
-    @test factorial(7) == 5040
-    @test factorial(Int8(7)) == 5040
-    @test factorial(UInt8(7)) == 5040
-    @test factorial(Int16(7)) == 5040
-    @test factorial(UInt16(7)) == 5040
-    @test factorial(Int32(7)) == 5040
-    @test factorial(UInt32(7)) == 5040
-    @test factorial(Int64(7)) == 5040
-    @test factorial(UInt64(7)) == 5040
-    @test factorial(Int128(7)) == 5040
-    @test factorial(UInt128(7)) == 5040
+    for T = Base.uniontypes(Union{Base.Checked.SignedInt,Base.Checked.UnsignedInt})
+        @testset let T = T
+            @test factorial(T(7)) == 5040
+            @test Core.Compiler.is_foldable(Base.infer_effects(factorial, (T,)))
+        end
+    end
     @test factorial(0) == 1
     @test_throws DomainError factorial(-1)
     @test factorial(Int64(20)) == 2432902008176640000
@@ -122,3 +124,24 @@ end
         end
     end
 end
+
+@testset "permute!" begin
+    #simple array
+    @test permute!([1,2,3,4,5],[3,2,1,5,4]) == [3,2,1,5,4]
+    #empty array
+    @test permute!([],[]) == []
+    #single-element array
+    @test permute!([5],[1]) == [5]
+    #repeated elements in array
+    @test permute!([1,2,2,3,3,3],[2,1,3,5,4,6]) == [2,1,2,3,3,3]
+    #permutation vector contains zero
+    @test_throws BoundsError permute!([1,2,3],[0,1,2])
+    #permutation vector contains negative indices
+    @test_throws BoundsError permute!([1,2,3],[2,-1,1])
+    #permutation vector contains indices larger than array size
+    @test_throws BoundsError permute!([1,2,3],[2,4,1])
+    #permutation vector is empty
+    @test_throws DimensionMismatch permute!([1,2,3],[])
+    #array is empty
+    @test_throws BoundsError permute!([],[2,1])
+end
diff --git a/test/compileall.jl b/test/compileall.jl
new file mode 100644
index 0000000000000..726cadbba68c8
--- /dev/null
+++ b/test/compileall.jl
@@ -0,0 +1,63 @@
+# This test builds a full system image, so it can take a little while.
+# We make it a separate test target here, so that it can run in parallel
+# with the rest of the tests.
+
+include("tempdepot.jl")
+
+function precompile_test_harness(@nospecialize(f))
+    load_path = mkdepottempdir()
+    try
+        pushfirst!(LOAD_PATH, load_path)
+        pushfirst!(DEPOT_PATH, load_path)
+        f(load_path)
+    finally
+        try
+            rm(load_path, force=true, recursive=true)
+        catch err
+            @show err
+        end
+        filter!((≠)(load_path), LOAD_PATH)
+        filter!((≠)(load_path), DEPOT_PATH)
+    end
+    return nothing
+end
+
+precompile_test_harness() do dir
+    Foo_file = joinpath(dir, "OncePerFoo.jl")
+    image_file = joinpath(dir, "img.jl")
+    write(Foo_file,
+    """module OncePerFoo
+
+    const f = OncePerThread{Nothing}() do
+        println(Core.stdout, "Running thread init...")
+    end
+
+    f() # Executed during pre-compilation
+
+    end # module OncePerFoo
+    """)
+
+    write(image_file,
+    """
+    Base.init_depot_path()
+    Base.init_load_path()
+    using OncePerFoo
+
+    function main()
+        OncePerFoo.f()
+        return 0
+    end
+
+    OncePerFoo.f() # fire init during compilation time
+
+    """)
+    Base.compilecache(Base.PkgId("OncePerFoo"))
+    new_env = Dict(["JULIA_DEPOT_PATH" => join(DEPOT_PATH, Sys.iswindows() ? ';' : ':'),
+               "JULIA_LOAD_PATH" => join(LOAD_PATH, Sys.iswindows() ? ';' : ':')])
+    @test success(pipeline(addenv(`$(Base.julia_cmd()) --compile=all -t1,0 --strip-ir --output-o $(dir)/sys.o.a $(image_file) `, new_env), stderr=stderr, stdout=stdout)) skip=(Sys.WORD_SIZE == 32)
+    if isfile(joinpath(dir, "sys.o.a"))
+        Base.Linking.link_image(joinpath(dir, "sys.o.a"), joinpath(dir, "sys.so"))
+        str = readchomp(`$(Base.julia_cmd()) -t1,0 -J  $(dir)/sys.so -e 'Base.scrub_repl_backtrace(nothing); println("loaded"); main()'`)
+        @test split(str, '\n') == ["loaded", "Running thread init..."]
+    end
+end
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
deleted file mode 100644
index 9db0a8903593d..0000000000000
--- a/test/compiler/AbstractInterpreter.jl
+++ /dev/null
@@ -1,355 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test
-const CC = Core.Compiler
-
-include("irutils.jl")
-include("newinterp.jl")
-
-# OverlayMethodTable
-# ==================
-
-import Base.Experimental: @MethodTable, @overlay
-
-@newinterp MTOverlayInterp
-@MethodTable(OverlayedMT)
-CC.method_table(interp::MTOverlayInterp) = CC.OverlayMethodTable(CC.get_world_counter(interp), OverlayedMT)
-
-function CC.add_remark!(interp::MTOverlayInterp, ::CC.InferenceState, remark)
-    if interp.meta !== nothing
-        # Core.println(remark)
-        push!(interp.meta, remark)
-    end
-    return nothing
-end
-
-strangesin(x) = sin(x)
-@overlay OverlayedMT strangesin(x::Float64) = iszero(x) ? nothing : cos(x)
-
-# inference should use the overlayed method table
-@test Base.return_types((Float64,); interp=MTOverlayInterp()) do x
-    strangesin(x)
-end |> only === Union{Float64,Nothing}
-@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
-    @invoke strangesin(x::Float64)
-end |> only === Union{Float64,Nothing}
-
-# effect analysis should figure out that the overlayed method is used
-@test Base.infer_effects((Float64,); interp=MTOverlayInterp()) do x
-    strangesin(x)
-end |> !Core.Compiler.is_nonoverlayed
-@test Base.infer_effects((Any,); interp=MTOverlayInterp()) do x
-    @invoke strangesin(x::Float64)
-end |> !Core.Compiler.is_nonoverlayed
-
-# account for overlay possibility in unanalyzed matching method
-callstrange(::Float64) = strangesin(x)
-callstrange(::Nothing) = Core.compilerbarrier(:type, nothing) # trigger inference bail out
-callstrange_entry(x) = callstrange(x) # needs to be defined here because of world age
-let interp = MTOverlayInterp(Set{Any}())
-    matches = Core.Compiler.findall(Tuple{typeof(callstrange),Any}, Core.Compiler.method_table(interp)).matches
-    @test Core.Compiler.length(matches) == 2
-    if Core.Compiler.getindex(matches, 1).method == which(callstrange, (Nothing,))
-        @test Base.infer_effects(callstrange_entry, (Any,); interp) |> !Core.Compiler.is_nonoverlayed
-        @test "Call inference reached maximally imprecise information. Bailing on." in interp.meta
-    else
-        @warn "`nonoverlayed` test for inference bailing out is skipped since the method match sort order is changed."
-    end
-end
-
-# but it should never apply for the native compilation
-@test Base.infer_effects((Float64,)) do x
-    strangesin(x)
-end |> Core.Compiler.is_nonoverlayed
-@test Base.infer_effects((Any,)) do x
-    @invoke strangesin(x::Float64)
-end |> Core.Compiler.is_nonoverlayed
-
-# fallback to the internal method table
-@test Base.return_types((Int,); interp=MTOverlayInterp()) do x
-    cos(x)
-end |> only === Float64
-@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
-    @invoke cos(x::Float64)
-end |> only === Float64
-
-# not fully covered overlay method match
-overlay_match(::Any) = nothing
-@overlay OverlayedMT overlay_match(::Int) = missing
-@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
-    overlay_match(x)
-end |> only === Union{Nothing,Missing}
-
-# partial concrete evaluation
-@test Base.return_types(; interp=MTOverlayInterp()) do
-    isbitstype(Int) ? nothing : missing
-end |> only === Nothing
-Base.@assume_effects :terminates_globally function issue41694(x)
-    res = 1
-    1 < x < 20 || throw("bad")
-    while x > 1
-        res *= x
-        x -= 1
-    end
-    return res
-end
-@test Base.return_types(; interp=MTOverlayInterp()) do
-    issue41694(3) == 6 ? nothing : missing
-end |> only === Nothing
-
-# disable partial concrete evaluation when tainted by any overlayed call
-Base.@assume_effects :total totalcall(f, args...) = f(args...)
-@test Base.return_types(; interp=MTOverlayInterp()) do
-    if totalcall(strangesin, 1.0) == cos(1.0)
-        return nothing
-    else
-        return missing
-    end
-end |> only === Nothing
-
-# GPUCompiler needs accurate inference through kwfunc with the overlay of `Core.throw_inexacterror`
-# https://github.com/JuliaLang/julia/issues/48097
-@newinterp Issue48097Interp
-@MethodTable Issue48097MT
-CC.method_table(interp::Issue48097Interp) = CC.OverlayMethodTable(CC.get_world_counter(interp), Issue48097MT)
-CC.InferenceParams(::Issue48097Interp) = CC.InferenceParams(; unoptimize_throw_blocks=false)
-@overlay Issue48097MT @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return
-issue48097(; kwargs...) = return 42
-@test fully_eliminated(; interp=Issue48097Interp(), retval=42) do
-    issue48097(; a=1f0, b=1.0)
-end
-
-# AbstractLattice
-# ===============
-
-using Core: SlotNumber, Argument
-using Core.Compiler: slot_id, tmerge_fast_path
-import .CC:
-    AbstractLattice, BaseInferenceLattice, IPOResultLattice, InferenceLattice,
-    widenlattice, is_valid_lattice_norec, typeinf_lattice, ipo_lattice, optimizer_lattice,
-    widenconst, tmeet, tmerge, ⊑, abstract_eval_special_value, widenreturn
-
-@newinterp TaintInterpreter
-struct TaintLattice{PL<:AbstractLattice} <: CC.AbstractLattice
-    parent::PL
-end
-CC.widenlattice(𝕃::TaintLattice) = 𝕃.parent
-CC.is_valid_lattice_norec(::TaintLattice, @nospecialize(elm)) = isa(elm, Taint)
-
-struct InterTaintLattice{PL<:AbstractLattice} <: CC.AbstractLattice
-    parent::PL
-end
-CC.widenlattice(𝕃::InterTaintLattice) = 𝕃.parent
-CC.is_valid_lattice_norec(::InterTaintLattice, @nospecialize(elm)) = isa(elm, InterTaint)
-
-const AnyTaintLattice{L} = Union{TaintLattice{L},InterTaintLattice{L}}
-
-CC.typeinf_lattice(::TaintInterpreter) = InferenceLattice(TaintLattice(BaseInferenceLattice.instance))
-CC.ipo_lattice(::TaintInterpreter) = InferenceLattice(InterTaintLattice(IPOResultLattice.instance))
-CC.optimizer_lattice(::TaintInterpreter) = InterTaintLattice(SimpleInferenceLattice.instance)
-
-struct Taint
-    typ
-    slots::BitSet
-    function Taint(@nospecialize(typ), slots::BitSet)
-        if typ isa Taint
-            slots = typ.slots ∪ slots
-            typ = typ.typ
-        end
-        return new(typ, slots)
-    end
-end
-Taint(@nospecialize(typ), id::Int) = Taint(typ, push!(BitSet(), id))
-function Base.:(==)(a::Taint, b::Taint)
-    return a.typ == b.typ && a.slots == b.slots
-end
-
-struct InterTaint
-    typ
-    slots::BitSet
-    function InterTaint(@nospecialize(typ), slots::BitSet)
-        if typ isa InterTaint
-            slots = typ.slots ∪ slots
-            typ = typ.typ
-        end
-        return new(typ, slots)
-    end
-end
-InterTaint(@nospecialize(typ), id::Int) = InterTaint(typ, push!(BitSet(), id))
-function Base.:(==)(a::InterTaint, b::InterTaint)
-    return a.typ == b.typ && a.slots == b.slots
-end
-
-const AnyTaint = Union{Taint, InterTaint}
-
-function CC.tmeet(𝕃::AnyTaintLattice, @nospecialize(v), @nospecialize(t::Type))
-    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
-    if isa(v, T)
-        v = v.typ
-    end
-    return tmeet(widenlattice(𝕃), v, t)
-end
-function CC.tmerge(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
-    r = tmerge_fast_path(𝕃, typea, typeb)
-    r !== nothing && return r
-    # type-lattice for Taint
-    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
-    if isa(typea, T)
-        if isa(typeb, T)
-            return T(
-                tmerge(widenlattice(𝕃), typea.typ, typeb.typ),
-                typea.slots ∪ typeb.slots)
-        else
-            typea = typea.typ
-        end
-    elseif isa(typeb, T)
-        typeb = typeb.typ
-    end
-    return tmerge(widenlattice(𝕃), typea, typeb)
-end
-function CC.:⊑(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
-    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
-    if isa(typea, T)
-        if isa(typeb, T)
-            typea.slots ⊆ typeb.slots || return false
-            return ⊑(widenlattice(𝕃), typea.typ, typeb.typ)
-        end
-        typea = typea.typ
-    elseif isa(typeb, T)
-        return false
-    end
-    return ⊑(widenlattice(𝕃), typea, typeb)
-end
-CC.widenconst(taint::AnyTaint) = widenconst(taint.typ)
-
-function CC.abstract_eval_special_value(interp::TaintInterpreter,
-    @nospecialize(e), vtypes::CC.VarTable, sv::CC.InferenceState)
-    ret = @invoke CC.abstract_eval_special_value(interp::CC.AbstractInterpreter,
-        e::Any, vtypes::CC.VarTable, sv::CC.InferenceState)
-    if isa(e, SlotNumber) || isa(e, Argument)
-        return Taint(ret, slot_id(e))
-    end
-    return ret
-end
-
-function CC.widenreturn(𝕃::InferenceLattice{<:InterTaintLattice}, @nospecialize(rt), @nospecialize(bestguess), nargs::Int, slottypes::Vector{Any}, changes::CC.VarTable)
-    if isa(rt, Taint)
-        return InterTaint(rt.typ, BitSet((id for id in rt.slots if id ≤ nargs)))
-    end
-    return CC.widenreturn(widenlattice(𝕃), rt, bestguess, nargs, slottypes, changes)
-end
-
-@test CC.tmerge(typeinf_lattice(TaintInterpreter()), Taint(Int, 1), Taint(Int, 2)) == Taint(Int, BitSet(1:2))
-
-# code_typed(ifelse, (Bool, Int, Int); interp=TaintInterpreter())
-
-# External lattice without `Conditional`
-
-import .CC:
-    AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice,
-    typeinf_lattice, ipo_lattice, optimizer_lattice
-
-@newinterp NonconditionalInterpreter
-CC.typeinf_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
-CC.ipo_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
-CC.optimizer_lattice(::NonconditionalInterpreter) = PartialsLattice(ConstsLattice())
-
-@test Base.return_types((Any,); interp=NonconditionalInterpreter()) do x
-    c = isa(x, Int) || isa(x, Float64)
-    if c
-        return x
-    else
-        return nothing
-    end
-end |> only === Any
-
-# CallInfo × inlining
-# ===================
-
-@newinterp NoinlineInterpreter
-noinline_modules(interp::NoinlineInterpreter) = interp.meta::Set{Module}
-
-import .CC: CallInfo
-
-struct NoinlineCallInfo <: CallInfo
-    info::CallInfo # wrapped call
-end
-CC.nsplit_impl(info::NoinlineCallInfo) = CC.nsplit(info.info)
-CC.getsplit_impl(info::NoinlineCallInfo, idx::Int) = CC.getsplit(info.info, idx)
-CC.getresult_impl(info::NoinlineCallInfo, idx::Int) = CC.getresult(info.info, idx)
-
-function CC.abstract_call(interp::NoinlineInterpreter,
-    arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
-    ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter,
-        arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
-    if sv.mod in noinline_modules(interp)
-        return CC.CallMeta(ret.rt, ret.effects, NoinlineCallInfo(ret.info))
-    end
-    return ret
-end
-function CC.inlining_policy(interp::NoinlineInterpreter,
-    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt8, mi::MethodInstance,
-    argtypes::Vector{Any})
-    if isa(info, NoinlineCallInfo)
-        return nothing
-    end
-    return @invoke CC.inlining_policy(interp::CC.AbstractInterpreter,
-        src::Any, info::CallInfo, stmt_flag::UInt8, mi::MethodInstance,
-        argtypes::Vector{Any})
-end
-
-@inline function inlined_usually(x, y, z)
-    return x * y + z
-end
-
-# check if the inlining algorithm works as expected
-let src = code_typed1((Float64,Float64,Float64)) do x, y, z
-        inlined_usually(x, y, z)
-    end
-    @test count(isinvoke(:inlined_usually), src.code) == 0
-    @test count(iscall((src, inlined_usually)), src.code) == 0
-end
-let NoinlineModule = Module()
-    interp = NoinlineInterpreter(Set((NoinlineModule,)))
-
-    # this anonymous function's context is Main -- it should be inlined as usual
-    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
-            inlined_usually(x, y, z)
-        end
-        @test count(isinvoke(:inlined_usually), src.code) == 0
-        @test count(iscall((src, inlined_usually)), src.code) == 0
-    end
-
-    # it should work for cached results
-    method = only(methods(inlined_usually, (Float64,Float64,Float64,)))
-    mi = CC.specialize_method(method, Tuple{typeof(inlined_usually),Float64,Float64,Float64}, Core.svec())
-    @test haskey(interp.code_cache.dict, mi)
-    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
-            inlined_usually(x, y, z)
-        end
-        @test count(isinvoke(:inlined_usually), src.code) == 0
-        @test count(iscall((src, inlined_usually)), src.code) == 0
-    end
-
-    # now the context module is `NoinlineModule` -- it should not be inlined
-    let src = @eval NoinlineModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
-            $inlined_usually(x, y, z)
-        end
-        @test count(isinvoke(:inlined_usually), src.code) == 1
-        @test count(iscall((src, inlined_usually)), src.code) == 0
-    end
-
-    # the context module is totally irrelevant -- it should be inlined as usual
-    OtherModule = Module()
-    let src = @eval OtherModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
-            $inlined_usually(x, y, z)
-        end
-        @test count(isinvoke(:inlined_usually), src.code) == 0
-        @test count(iscall((src, inlined_usually)), src.code) == 0
-    end
-end
-
-# Make sure that Core.Compiler has enough NamedTuple infrastructure
-# to properly give error messages for basic kwargs...
-Core.eval(Core.Compiler, quote f(;a=1) = a end)
-@test_throws MethodError Core.Compiler.f(;b=2)
diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl
deleted file mode 100644
index bb3273b3e707a..0000000000000
--- a/test/compiler/EscapeAnalysis/EAUtils.jl
+++ /dev/null
@@ -1,366 +0,0 @@
-module EAUtils
-
-export code_escapes, @code_escapes, __clear_cache!
-
-const CC = Core.Compiler
-const EA = CC.EscapeAnalysis
-
-# entries
-# -------
-
-import Base: unwrap_unionall, rewrap_unionall
-import InteractiveUtils: gen_call_with_extracted_types_and_kwargs
-
-"""
-    @code_escapes [options...] f(args...)
-
-Evaluates the arguments to the function call, determines its types, and then calls
-[`code_escapes`](@ref) on the resulting expression.
-As with `@code_typed` and its family, any of `code_escapes` keyword arguments can be given
-as the optional arguments like `@code_escapes optimize=false myfunc(myargs...)`.
-"""
-macro code_escapes(ex0...)
-    return gen_call_with_extracted_types_and_kwargs(__module__, :code_escapes, ex0)
-end
-
-"""
-    code_escapes(f, argtypes=Tuple{}; [debuginfo::Symbol = :none], [optimize::Bool = true]) -> result::EscapeResult
-
-Runs the escape analysis on optimized IR of a generic function call with the given type signature.
-
-# Keyword Arguments
-
-- `optimize::Bool = true`:
-  if `true` returns escape information of post-inlining IR (used for local optimization),
-  otherwise returns escape information of pre-inlining IR (used for interprocedural escape information generation)
-- `debuginfo::Symbol = :none`:
-  controls the amount of code metadata present in the output, possible options are `:none` or `:source`.
-"""
-function code_escapes(@nospecialize(f), @nospecialize(types=Base.default_tt(f));
-                      world::UInt = get_world_counter(),
-                      interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world),
-                      debuginfo::Symbol = :none,
-                      optimize::Bool = true)
-    tt = Base.signature_type(f, types)
-    interp = EscapeAnalyzer(interp, tt, optimize)
-    results = Base.code_typed_by_type(tt; optimize=true, world, interp)
-    isone(length(results)) || throw(ArgumentError("`code_escapes` only supports single analysis result"))
-    return EscapeResult(interp.ir, interp.state, interp.linfo, debuginfo === :source)
-end
-
-# in order to run a whole analysis from ground zero (e.g. for benchmarking, etc.)
-__clear_cache!() = empty!(GLOBAL_CODE_CACHE)
-
-# AbstractInterpreter
-# -------------------
-
-# imports
-import .CC:
-    AbstractInterpreter, NativeInterpreter, WorldView, WorldRange,
-    InferenceParams, OptimizationParams, get_world_counter, get_inference_cache, code_cache
-# usings
-import Core:
-    CodeInstance, MethodInstance, CodeInfo
-import .CC:
-    InferenceResult, OptimizationState, IRCode, copy as cccopy,
-    @timeit, convert_to_ircode, slot2reg, compact!, ssa_inlining_pass!, sroa_pass!,
-    adce_pass!, JLOptions, verify_ir, verify_linetable
-import .EA: analyze_escapes, ArgEscapeCache, EscapeInfo, EscapeState, is_ipo_profitable
-
-# when working outside of Core.Compiler,
-# cache entire escape state for later inspection and debugging
-struct EscapeCache
-    cache::ArgEscapeCache
-    state::EscapeState # preserved just for debugging purpose
-    ir::IRCode         # preserved just for debugging purpose
-end
-
-mutable struct EscapeAnalyzer{State} <: AbstractInterpreter
-    native::NativeInterpreter
-    cache::IdDict{InferenceResult,EscapeCache}
-    entry_tt
-    optimize::Bool
-    ir::IRCode
-    state::State
-    linfo::MethodInstance
-    EscapeAnalyzer(native::NativeInterpreter, @nospecialize(tt), optimize::Bool) =
-        new{EscapeState}(native, IdDict{InferenceResult,EscapeCache}(), tt, optimize)
-end
-
-CC.InferenceParams(interp::EscapeAnalyzer)    = InferenceParams(interp.native)
-CC.OptimizationParams(interp::EscapeAnalyzer) = OptimizationParams(interp.native)
-CC.get_world_counter(interp::EscapeAnalyzer)  = get_world_counter(interp.native)
-
-CC.get_inference_cache(interp::EscapeAnalyzer) = get_inference_cache(interp.native)
-
-const GLOBAL_CODE_CACHE = IdDict{MethodInstance,CodeInstance}()
-
-function CC.code_cache(interp::EscapeAnalyzer)
-    worlds = WorldRange(get_world_counter(interp))
-    return WorldView(GlobalCache(), worlds)
-end
-
-struct GlobalCache end
-
-CC.haskey(wvc::WorldView{GlobalCache}, mi::MethodInstance) = haskey(GLOBAL_CODE_CACHE, mi)
-
-CC.get(wvc::WorldView{GlobalCache}, mi::MethodInstance, default) = get(GLOBAL_CODE_CACHE, mi, default)
-
-CC.getindex(wvc::WorldView{GlobalCache}, mi::MethodInstance) = getindex(GLOBAL_CODE_CACHE, mi)
-
-function CC.setindex!(wvc::WorldView{GlobalCache}, ci::CodeInstance, mi::MethodInstance)
-    GLOBAL_CODE_CACHE[mi] = ci
-    add_callback!(mi) # register the callback on invalidation
-    return nothing
-end
-
-function add_callback!(linfo)
-    if !isdefined(linfo, :callbacks)
-        linfo.callbacks = Any[invalidate_cache!]
-    else
-        if !any(@nospecialize(cb)->cb===invalidate_cache!, linfo.callbacks)
-            push!(linfo.callbacks, invalidate_cache!)
-        end
-    end
-    return nothing
-end
-
-function invalidate_cache!(replaced, max_world, depth = 0)
-    delete!(GLOBAL_CODE_CACHE, replaced)
-
-    if isdefined(replaced, :backedges)
-        for mi in replaced.backedges
-            mi = mi::MethodInstance
-            if !haskey(GLOBAL_CODE_CACHE, mi)
-                continue # otherwise fall into infinite loop
-            end
-            invalidate_cache!(mi, max_world, depth+1)
-        end
-    end
-    return nothing
-end
-
-function CC.optimize(interp::EscapeAnalyzer,
-    opt::OptimizationState, caller::InferenceResult)
-    ir = run_passes_with_ea(interp, opt.src, opt, caller)
-    return CC.finish(interp, opt, ir, caller)
-end
-
-function CC.cache_result!(interp::EscapeAnalyzer, caller::InferenceResult)
-    if haskey(interp.cache, caller)
-        GLOBAL_ESCAPE_CACHE[caller.linfo] = interp.cache[caller]
-    end
-    return @invoke CC.cache_result!(interp::AbstractInterpreter, caller::InferenceResult)
-end
-
-const GLOBAL_ESCAPE_CACHE = IdDict{MethodInstance,EscapeCache}()
-
-"""
-    cache_escapes!(caller::InferenceResult, estate::EscapeState, cacheir::IRCode)
-
-Transforms escape information of call arguments of `caller`,
-and then caches it into a global cache for later interprocedural propagation.
-"""
-function cache_escapes!(interp::EscapeAnalyzer,
-    caller::InferenceResult, estate::EscapeState, cacheir::IRCode)
-    cache = ArgEscapeCache(estate)
-    ecache = EscapeCache(cache, estate, cacheir)
-    interp.cache[caller] = ecache
-    return cache
-end
-
-function get_escape_cache(interp::EscapeAnalyzer)
-    return function (linfo::Union{InferenceResult,MethodInstance})
-        if isa(linfo, InferenceResult)
-            ecache = get(interp.cache, linfo, nothing)
-        else
-            ecache = get(GLOBAL_ESCAPE_CACHE, linfo, nothing)
-        end
-        return ecache !== nothing ? ecache.cache : nothing
-    end
-end
-
-function run_passes_with_ea(interp::EscapeAnalyzer, ci::CodeInfo, sv::OptimizationState,
-    caller::InferenceResult)
-    @timeit "convert"   ir = convert_to_ircode(ci, sv)
-    @timeit "slot2reg"  ir = slot2reg(ir, ci, sv)
-    # TODO: Domsorting can produce an updated domtree - no need to recompute here
-    @timeit "compact 1" ir = compact!(ir)
-    nargs = let def = sv.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
-    local state
-    if is_ipo_profitable(ir, nargs) || caller.linfo.specTypes === interp.entry_tt
-        try
-            @timeit "[IPO EA]" begin
-                state = analyze_escapes(ir, nargs, false, get_escape_cache(interp))
-                cache_escapes!(interp, caller, state, cccopy(ir))
-            end
-        catch err
-            @error "error happened within [IPO EA], inspect `Main.ir` and `Main.nargs`"
-            @eval Main (ir = $ir; nargs = $nargs)
-            rethrow(err)
-        end
-    end
-    if caller.linfo.specTypes === interp.entry_tt && !interp.optimize
-        # return back the result
-        interp.ir = cccopy(ir)
-        interp.state = state
-        interp.linfo = sv.linfo
-    end
-    @timeit "Inlining"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
-    # @timeit "verify 2" verify_ir(ir)
-    @timeit "compact 2" ir = compact!(ir)
-    if caller.linfo.specTypes === interp.entry_tt && interp.optimize
-        try
-            @timeit "[Local EA]" state = analyze_escapes(ir, nargs, true, get_escape_cache(interp))
-        catch err
-            @error "error happened within [Local EA], inspect `Main.ir` and `Main.nargs`"
-            @eval Main (ir = $ir; nargs = $nargs)
-            rethrow(err)
-        end
-        # return back the result
-        interp.ir = cccopy(ir)
-        interp.state = state
-        interp.linfo = sv.linfo
-    end
-    @timeit "SROA"      ir = sroa_pass!(ir)
-    @timeit "ADCE"      ir = adce_pass!(ir)
-    @timeit "compact 3" ir = compact!(ir)
-    if JLOptions().debug_level == 2
-        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
-    end
-    return ir
-end
-
-# printing
-# --------
-
-import Core: Argument, SSAValue
-import .CC: widenconst, singleton_type
-
-Base.getindex(estate::EscapeState, @nospecialize(x)) = CC.getindex(estate, x)
-
-function get_name_color(x::EscapeInfo, symbol::Bool = false)
-    getname(x) = string(nameof(x))
-    if x === EA.⊥
-        name, color = (getname(EA.NotAnalyzed), "◌"), :plain
-    elseif EA.has_no_escape(EA.ignore_argescape(x))
-        if EA.has_arg_escape(x)
-            name, color = (getname(EA.ArgEscape), "✓"), :cyan
-        else
-            name, color = (getname(EA.NoEscape), "✓"), :green
-        end
-    elseif EA.has_all_escape(x)
-        name, color = (getname(EA.AllEscape), "X"), :red
-    elseif EA.has_return_escape(x)
-        name = (getname(EA.ReturnEscape), "↑")
-        color = EA.has_thrown_escape(x) ? :yellow : :blue
-    else
-        name = (nothing, "*")
-        color = EA.has_thrown_escape(x) ? :yellow : :bold
-    end
-    name = symbol ? last(name) : first(name)
-    if name !== nothing && !isa(x.AliasInfo, Bool)
-        name = string(name, "′")
-    end
-    return name, color
-end
-
-# pcs = sprint(show, collect(x.EscapeSites); context=:limit=>true)
-function Base.show(io::IO, x::EscapeInfo)
-    name, color = get_name_color(x)
-    if isnothing(name)
-        @invoke show(io::IO, x::Any)
-    else
-        printstyled(io, name; color)
-    end
-end
-function Base.show(io::IO, ::MIME"application/prs.juno.inline", x::EscapeInfo)
-    name, color = get_name_color(x)
-    if isnothing(name)
-        return x # use fancy tree-view
-    else
-        printstyled(io, name; color)
-    end
-end
-
-struct EscapeResult
-    ir::IRCode
-    state::EscapeState
-    linfo::Union{Nothing,MethodInstance}
-    source::Bool
-    function EscapeResult(ir::IRCode, state::EscapeState,
-        linfo::Union{Nothing,MethodInstance} = nothing,
-        source::Bool=false)
-        return new(ir, state, linfo, source)
-    end
-end
-Base.show(io::IO, result::EscapeResult) = print_with_info(io, result)
-@eval Base.iterate(res::EscapeResult, state=1) =
-    return state > $(fieldcount(EscapeResult)) ? nothing : (getfield(res, state), state+1)
-
-Base.show(io::IO, cached::EscapeCache) = show(io, EscapeResult(cached.ir, cached.state, nothing))
-
-# adapted from https://github.com/JuliaDebug/LoweredCodeUtils.jl/blob/4612349432447e868cf9285f647108f43bd0a11c/src/codeedges.jl#L881-L897
-function print_with_info(io::IO, (; ir, state, linfo, source)::EscapeResult)
-    # print escape information on SSA values
-    function preprint(io::IO)
-        ft = ir.argtypes[1]
-        f = singleton_type(ft)
-        if f === nothing
-            f = widenconst(ft)
-        end
-        print(io, f, '(')
-        for i in 1:state.nargs
-            arg = state[Argument(i)]
-            i == 1 && continue
-            c, color = get_name_color(arg, true)
-            printstyled(io, c, ' ', '_', i, "::", ir.argtypes[i]; color)
-            i ≠ state.nargs && print(io, ", ")
-        end
-        print(io, ')')
-        if !isnothing(linfo)
-            def = linfo.def
-            printstyled(io, " in ", (isa(def, Module) ? (def,) : (def.module, " at ", def.file, ':', def.line))...; color=:bold)
-        end
-        println(io)
-    end
-
-    # print escape information on SSA values
-    # nd = ndigits(length(ssavalues))
-    function preprint(io::IO, idx::Int)
-        c, color = get_name_color(state[SSAValue(idx)], true)
-        # printstyled(io, lpad(idx, nd), ' ', c, ' '; color)
-        printstyled(io, rpad(c, 2), ' '; color)
-    end
-
-    print_with_info(preprint, (args...)->nothing, io, ir, source)
-end
-
-function print_with_info(preprint, postprint, io::IO, ir::IRCode, source::Bool)
-    io = IOContext(io, :displaysize=>displaysize(io))
-    used = Base.IRShow.stmts_used(io, ir)
-    if source
-        line_info_preprinter = function (io::IO, indent::String, idx::Int)
-            r = Base.IRShow.inline_linfo_printer(ir)(io, indent, idx)
-            idx ≠ 0 && preprint(io, idx)
-            return r
-        end
-    else
-        line_info_preprinter = Base.IRShow.lineinfo_disabled
-    end
-    line_info_postprinter = Base.IRShow.default_expr_type_printer
-    preprint(io)
-    bb_idx_prev = bb_idx = 1
-    for idx = 1:length(ir.stmts)
-        preprint(io, idx)
-        bb_idx = Base.IRShow.show_ir_stmt(io, ir, idx, line_info_preprinter, line_info_postprinter, used, ir.cfg, bb_idx)
-        postprint(io, idx, bb_idx != bb_idx_prev)
-        bb_idx_prev = bb_idx
-    end
-    max_bb_idx_size = ndigits(length(ir.cfg.blocks))
-    line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
-    postprint(io)
-    return nothing
-end
-
-end # module EAUtils
diff --git a/test/compiler/EscapeAnalysis/interprocedural.jl b/test/compiler/EscapeAnalysis/interprocedural.jl
deleted file mode 100644
index 756e5489ed637..0000000000000
--- a/test/compiler/EscapeAnalysis/interprocedural.jl
+++ /dev/null
@@ -1,262 +0,0 @@
-# IPO EA Test
-# ===========
-# EA works on pre-inlining IR
-
-include(normpath(@__DIR__, "setup.jl"))
-
-# callsites
-# ---------
-
-noescape(a) = nothing
-noescape(a, b) = nothing
-function global_escape!(x)
-    GR[] = x
-    return nothing
-end
-union_escape!(x) = global_escape!(x)
-union_escape!(x::SafeRef) = nothing
-union_escape!(x::SafeRefs) = nothing
-Base.@constprop :aggressive function conditional_escape!(cnd, x)
-    cnd && global_escape!(x)
-    return nothing
-end
-
-# MethodMatchInfo -- global cache
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return noescape(x)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        identity(x)
-        return nothing
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return identity(x)
-    end
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return Ref(x)
-    end
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        r = Ref{SafeRef{String}}()
-        r[] = x
-        return r
-    end
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        global_escape!(x)
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-# UnionSplitInfo
-let result = code_escapes((Bool,Vector{Any}); optimize=false) do c, s
-        x = c ? s : SafeRef(s)
-        union_escape!(x)
-    end
-    @test has_all_escape(result.state[Argument(3)]) # s
-end
-let result = code_escapes((Bool,Vector{Any}); optimize=false) do c, s
-        x = c ? SafeRef(s) : SafeRefs(s, s)
-        union_escape!(x)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-# ConstCallInfo -- local cache
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return conditional_escape!(false, x)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-# InvokeCallInfo
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return @invoke noescape(x::Any)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return @invoke conditional_escape!(false::Any, x::Any)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-
-# MethodError
-# -----------
-# accounts for ThrownEscape via potential MethodError
-
-# no method error
-identity_if_string(x::SafeRef) = nothing
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        identity_if_string(x)
-    end
-    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test !has_thrown_escape(result.state[Argument(2)], i)
-    @test !has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((Union{SafeRef{String},Vector{String}},); optimize=false) do x
-        identity_if_string(x)
-    end
-    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_thrown_escape(result.state[Argument(2)], i)
-    @test !has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        try
-            identity_if_string(x)
-        catch err
-            global GV = err
-        end
-        return nothing
-    end
-    @test !has_all_escape(result.state[Argument(2)])
-end
-let result = code_escapes((Union{SafeRef{String},Vector{String}},); optimize=false) do x
-        try
-            identity_if_string(x)
-        catch err
-            global GV = err
-        end
-        return nothing
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-# method ambiguity error
-ambig_error_test(a::SafeRef, b) = nothing
-ambig_error_test(a, b::SafeRef) = nothing
-ambig_error_test(a, b) = nothing
-let result = code_escapes((SafeRef{String},Any); optimize=false) do x, y
-        ambig_error_test(x, y)
-    end
-    i = only(findall(iscall((result.ir, ambig_error_test)), result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_thrown_escape(result.state[Argument(2)], i)  # x
-    @test has_thrown_escape(result.state[Argument(3)], i)  # y
-    @test !has_return_escape(result.state[Argument(2)], r)  # x
-    @test !has_return_escape(result.state[Argument(3)], r)  # y
-end
-let result = code_escapes((SafeRef{String},Any); optimize=false) do x, y
-        try
-            ambig_error_test(x, y)
-        catch err
-            global GV = err
-        end
-    end
-    @test has_all_escape(result.state[Argument(2)])  # x
-    @test has_all_escape(result.state[Argument(3)])  # y
-end
-
-# Local EA integration
-# --------------------
-
-# propagate escapes imposed on call arguments
-
-# FIXME handle _apply_iterate
-# FIXME currently we can't prove the effect-freeness of `getfield(RefValue{String}, :x)`
-# because of this check https://github.com/JuliaLang/julia/blob/94b9d66b10e8e3ebdb268e4be5f7e1f43079ad4e/base/compiler/tfuncs.jl#L745
-# and thus it leads to the following two broken tests
-
-@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
-let result = code_escapes() do
-        broadcast_noescape1(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test_broken !has_return_escape(result.state[SSAValue(i)])
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)])
-end
-@noinline broadcast_noescape2(b) = broadcast(identity, b)
-let result = code_escapes() do
-        broadcast_noescape2(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test_broken !has_return_escape(result.state[SSAValue(i)])
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)])
-end
-@noinline allescape_argument(a) = (global GV = a) # obvious escape
-let result = code_escapes() do
-        allescape_argument(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test has_all_escape(result.state[SSAValue(i)])
-end
-# if we can't determine the matching method statically, we should be conservative
-let result = code_escapes((Ref{Any},)) do a
-        may_exist(a)
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-let result = code_escapes((Ref{Any},)) do a
-        Base.@invokelatest broadcast_noescape1(a)
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-
-# handling of simple union-split (just exploit the inliner's effort)
-@noinline unionsplit_noescape(a)      = string(nothing)
-@noinline unionsplit_noescape(a::Int) = a + 10
-let result = code_escapes((Union{Int,Nothing},)) do x
-        s = SafeRef{Union{Int,Nothing}}(x)
-        unionsplit_noescape(s[])
-        return nothing
-    end
-    inds = findall(isnew, result.ir.stmts.inst) # find allocation statement
-    @assert !isempty(inds)
-    for i in inds
-        @test has_no_escape(result.state[SSAValue(i)])
-    end
-end
-
-@noinline function unused_argument(a)
-    println("prevent inlining")
-    return Base.inferencebarrier(nothing)
-end
-let result = code_escapes() do
-        a = Ref("foo") # shouldn't be "return escape"
-        b = unused_argument(a)
-        nothing
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)], r)
-
-    result = code_escapes() do
-        a = Ref("foo") # still should be "return escape"
-        b = unused_argument(a)
-        return a
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-end
-
-# should propagate escape information imposed on return value to the aliased call argument
-@noinline returnescape_argument(a) = (println("prevent inlining"); a)
-let result = code_escapes() do
-        obj = Ref("foo")           # should be "return escape"
-        ret = returnescape_argument(obj)
-        return ret                 # alias of `obj`
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-end
-@noinline noreturnescape_argument(a) = (println("prevent inlining"); identity("hi"))
-let result = code_escapes() do
-        obj = Ref("foo")              # better to not be "return escape"
-        ret = noreturnescape_argument(obj)
-        return ret                    # must not alias to `obj`
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)], r)
-end
diff --git a/test/compiler/EscapeAnalysis/local.jl b/test/compiler/EscapeAnalysis/local.jl
deleted file mode 100644
index 27e4fdeee28c6..0000000000000
--- a/test/compiler/EscapeAnalysis/local.jl
+++ /dev/null
@@ -1,2205 +0,0 @@
-# Local EA Test
-# =============
-# EA works on post-inlining IR
-
-include(normpath(@__DIR__, "setup.jl"))
-
-@testset "basics" begin
-    let # arg return
-        result = code_escapes((Any,)) do a # return to caller
-            return nothing
-        end
-        @test has_arg_escape(result.state[Argument(2)])
-        # return
-        result = code_escapes((Any,)) do a
-            return a
-        end
-        i = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_arg_escape(result.state[Argument(1)]) # self
-        @test !has_return_escape(result.state[Argument(1)], i) # self
-        @test has_arg_escape(result.state[Argument(2)]) # a
-        @test has_return_escape(result.state[Argument(2)], i) # a
-    end
-    let # global store
-        result = code_escapes((Any,)) do a
-            global GV = a
-            nothing
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let # global load
-        result = code_escapes() do
-            global GV
-            return GV
-        end
-        i = only(findall(has_return_escape, map(i->result.state[SSAValue(i)], 1:length(result.ir.stmts))))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # global store / load (https://github.com/aviatesk/EscapeAnalysis.jl/issues/56)
-        result = code_escapes((Any,)) do s
-            global GV
-            GV = s
-            return GV
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-    end
-    let # :gc_preserve_begin / :gc_preserve_end
-        result = code_escapes((String,)) do s
-            m = SafeRef(s)
-            GC.@preserve m begin
-                return nothing
-            end
-        end
-        i = findfirst(isT(SafeRef{String}), result.ir.stmts.type) # find allocation statement
-        @test !isnothing(i)
-        @test has_no_escape(result.state[SSAValue(i)])
-    end
-    let # :isdefined
-        result = code_escapes((String, Bool, )) do a, b
-            if b
-                s = Ref(a)
-            end
-            return @isdefined(s)
-        end
-        i = findfirst(isT(Base.RefValue{String}), result.ir.stmts.type) # find allocation statement
-        @test isnothing(i) || has_no_escape(result.state[SSAValue(i)])
-    end
-    let # ϕ-node
-        result = code_escapes((Bool,Any,Any)) do cond, a, b
-            c = cond ? a : b # ϕ(a, b)
-            return c
-        end
-        @assert any(@nospecialize(x)->isa(x, Core.PhiNode), result.ir.stmts.inst)
-        i = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], i) # a
-        @test has_return_escape(result.state[Argument(4)], i) # b
-    end
-    let # π-node
-        result = code_escapes((Any,)) do a
-            if isa(a, Regex) # a::π(Regex)
-                return a
-            end
-            return nothing
-        end
-        @assert any(@nospecialize(x)->isa(x, Core.PiNode), result.ir.stmts.inst)
-        @test any(findall(isreturn, result.ir.stmts.inst)) do i
-            has_return_escape(result.state[Argument(2)], i)
-        end
-    end
-    let # φᶜ-node / ϒ-node
-        result = code_escapes((Any,String)) do a, b
-            local x::String
-            try
-                x = a
-            catch err
-                x = b
-            end
-            return x
-        end
-        @assert any(@nospecialize(x)->isa(x, Core.PhiCNode), result.ir.stmts.inst)
-        @assert any(@nospecialize(x)->isa(x, Core.UpsilonNode), result.ir.stmts.inst)
-        i = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], i)
-        @test has_return_escape(result.state[Argument(3)], i)
-    end
-    let # branching
-        result = code_escapes((Any,Bool,)) do a, c
-            if c
-                return nothing # a doesn't escape in this branch
-            else
-                return a # a escapes to a caller
-            end
-        end
-        @test has_return_escape(result.state[Argument(2)])
-    end
-    let # loop
-        result = code_escapes((Int,)) do n
-            c = SafeRef{Bool}(false)
-            while n > 0
-                rand(Bool) && return c
-            end
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)])
-    end
-    let # try/catch
-        result = code_escapes((Any,)) do a
-            try
-                nothing
-            catch err
-                return a # return escape
-            end
-        end
-        @test has_return_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do a
-            try
-                nothing
-            finally
-                return a # return escape
-            end
-        end
-        @test has_return_escape(result.state[Argument(2)])
-    end
-    let # :foreigncall
-        result = code_escapes((Any,)) do x
-            ccall(:some_ccall, Any, (Any,), x)
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-end
-
-let # simple allocation
-    result = code_escapes((Bool,)) do c
-        mm = SafeRef{Bool}(c) # just allocated, never escapes
-        return mm[] ? nothing : 1
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test has_no_escape(result.state[SSAValue(i)])
-end
-
-@testset "builtins" begin
-    let # throw
-        r = code_escapes((Any,)) do a
-            throw(a)
-        end
-        @test has_thrown_escape(r.state[Argument(2)])
-    end
-
-    let # implicit throws
-        r = code_escapes((Any,)) do a
-            getfield(a, :may_not_field)
-        end
-        @test has_thrown_escape(r.state[Argument(2)])
-
-        r = code_escapes((Any,)) do a
-            sizeof(a)
-        end
-        @test has_thrown_escape(r.state[Argument(2)])
-    end
-
-    let # :===
-        result = code_escapes((Bool, SafeRef{String})) do cond, s
-            m = cond ? s : nothing
-            c = m === nothing
-            return c
-        end
-        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-    end
-
-    let # sizeof
-        result = code_escapes((Vector{Any},)) do xs
-            sizeof(xs)
-        end
-        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-    end
-
-    let # ifelse
-        result = code_escapes((Bool,)) do c
-            r = ifelse(c, Ref("yes"), Ref("no"))
-            return r
-        end
-        inds = findall(isnew, result.ir.stmts.inst)
-        @assert !isempty(inds)
-        for i in inds
-            @test has_return_escape(result.state[SSAValue(i)])
-        end
-    end
-    let # ifelse (with constant condition)
-        result = code_escapes() do
-            r = ifelse(true, Ref("yes"), Ref(nothing))
-            return r
-        end
-        for i in 1:length(result.ir.stmts)
-            if isnew(result.ir.stmts.inst[i]) && isT(Base.RefValue{String})(result.ir.stmts.type[i])
-                @test has_return_escape(result.state[SSAValue(i)])
-            elseif isnew(result.ir.stmts.inst[i]) && isT(Base.RefValue{Nothing})(result.ir.stmts.type[i])
-                @test has_no_escape(result.state[SSAValue(i)])
-            end
-        end
-    end
-
-    let # typeassert
-        result = code_escapes((Any,)) do x
-            y = x::String
-            return y
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test !has_all_escape(result.state[Argument(2)])
-    end
-
-    let # isdefined
-        result = code_escapes((Any,)) do x
-            isdefined(x, :foo) ? x : throw("undefined")
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test !has_all_escape(result.state[Argument(2)])
-
-        result = code_escapes((Module,)) do m
-            isdefined(m, 10) # throws
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-end
-
-@testset "flow-sensitivity" begin
-    # ReturnEscape
-    let result = code_escapes((Bool,)) do cond
-            r = Ref("foo")
-            if cond
-                return cond
-            end
-            return r
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        rts = findall(isreturn, result.ir.stmts.inst)
-        @assert length(rts) == 2
-        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 1
-    end
-    let result = code_escapes((Bool,)) do cond
-            r = Ref("foo")
-            cnt = 0
-            while rand(Bool)
-                cnt += 1
-                rand(Bool) && return r
-            end
-            rand(Bool) && return r
-            return cnt
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        rts = findall(isreturn, result.ir.stmts.inst) # return statement
-        @assert length(rts) == 3
-        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 2
-    end
-end
-
-@testset "escape through exceptions" begin
-    M = @eval Module() begin
-        unsafeget(x) = isassigned(x) ? x[] : throw(x)
-        @noinline function escape_rethrow!()
-            try
-                rethrow()
-            catch err
-                GR[] = err
-            end
-        end
-        @noinline function escape_current_exceptions!()
-            excs = Base.current_exceptions()
-            GR[] = excs
-        end
-        const GR = Ref{Any}()
-        @__MODULE__
-    end
-
-    let # simple: return escape
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch err
-                ret = err
-            end
-            return ret
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)])
-    end
-
-    let # simple: global escape
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret # prevent DCE
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch err
-                global GV = err
-            end
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-
-    let # account for possible escapes via nested throws
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            try
-                try
-                    unsafeget(r)
-                catch err1
-                    throw(err1)
-                end
-            catch err2
-                GR[] = err2
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `rethrow`
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            try
-                try
-                    unsafeget(r)
-                catch err1
-                    rethrow(err1)
-                end
-            catch err2
-                GR[] = err2
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `rethrow`
-        result = @eval M $code_escapes() do
-            try
-                r = Ref{String}()
-                unsafeget(r)
-            catch
-                escape_rethrow!()
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `rethrow`
-        result = @eval M $code_escapes() do
-            local t
-            try
-                r = Ref{String}()
-                t = unsafeget(r)
-            catch err
-                t = typeof(err)
-                escape_rethrow!()
-            end
-            return t
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `Base.current_exceptions`
-        result = @eval M $code_escapes() do
-            try
-                r = Ref{String}()
-                unsafeget(r)
-            catch
-                GR[] = Base.current_exceptions()
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `Base.current_exceptions`
-        result = @eval M $code_escapes() do
-            try
-                r = Ref{String}()
-                unsafeget(r)
-            catch
-                escape_current_exceptions!()
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-
-    let # contextual: escape information imposed on `err` shouldn't propagate to `r2`, but only to `r1`
-        result = @eval M $code_escapes() do
-            r1 = Ref{String}()
-            r2 = Ref{String}()
-            local ret
-            try
-                s1 = unsafeget(r1)
-                ret = sizeof(s1)
-            catch err
-                global GV = err
-            end
-            s2 = unsafeget(r2)
-            return s2, r2
-        end
-        is = findall(isnew, result.ir.stmts.inst)
-        @test length(is) == 2
-        i1, i2 = is
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i1)])
-        @test !has_all_escape(result.state[SSAValue(i2)])
-        @test has_return_escape(result.state[SSAValue(i2)], r)
-    end
-
-    # XXX test cases below are currently broken because of the technical reason described in `escape_exception!`
-
-    let # limited propagation: exception is caught within a frame => doesn't escape to a caller
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch
-                ret = nothing
-            end
-            return ret
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
-    end
-    let # sequential: escape information imposed on `err1` and `err2 should propagate separately
-        result = @eval M $code_escapes() do
-            r1 = Ref{String}()
-            r2 = Ref{String}()
-            local ret
-            try
-                s1 = unsafeget(r1)
-                ret = sizeof(s1)
-            catch err1
-                global GV = err1
-            end
-            try
-                s2 = unsafeget(r2)
-                ret = sizeof(s2)
-            catch err2
-                ret = err2
-            end
-            return ret
-        end
-        is = findall(isnew, result.ir.stmts.inst)
-        @test length(is) == 2
-        i1, i2 = is
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i1)])
-        @test has_return_escape(result.state[SSAValue(i2)], r)
-        @test_broken !has_all_escape(result.state[SSAValue(i2)])
-    end
-    let # nested: escape information imposed on `inner` shouldn't propagate to `s`
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                try
-                    ret = sizeof(s)
-                catch inner
-                    return inner
-                end
-            catch outer
-                ret = nothing
-            end
-            return ret
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[SSAValue(i)])
-    end
-    let # merge: escape information imposed on `err1` and `err2 should be merged
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch err1
-                return err1
-            end
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch err2
-                return err2
-            end
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        rs = findall(isreturn, result.ir.stmts.inst)
-        @test_broken !has_all_escape(result.state[SSAValue(i)])
-        for r in rs
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let # no exception handling: should keep propagating the escape
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            finally
-                if !@isdefined(ret)
-                    ret = 42
-                end
-            end
-            return ret
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
-    end
-end
-
-@testset "field analysis / alias analysis" begin
-    # escaped allocations
-    # -------------------
-
-    # escaped object should escape its fields as well
-    let result = code_escapes((Any,)) do a
-            global GV = SafeRef{Any}(a)
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do a
-            global GV = (a,)
-            nothing
-        end
-        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do a
-            o0 = SafeRef{Any}(a)
-            global GV = SafeRef(o0)
-            nothing
-        end
-        is = findall(isnew, result.ir.stmts.inst)
-        @test length(is) == 2
-        i0, i1 = is
-        @test has_all_escape(result.state[SSAValue(i0)])
-        @test has_all_escape(result.state[SSAValue(i1)])
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do a
-            t0 = (a,)
-            global GV = (t0,)
-            nothing
-        end
-        inds = findall(iscall((result.ir, tuple)), result.ir.stmts.inst)
-        @assert length(inds) == 2
-        for i in inds; @test has_all_escape(result.state[SSAValue(i)]); end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    # global escape through `setfield!`
-    let result = code_escapes((Any,)) do a
-            r = SafeRef{Any}(:init)
-            global GV = r
-            r[] = a
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,Any)) do a, b
-            r = SafeRef{Any}(a)
-            global GV = r
-            r[] = b
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-        @test has_all_escape(result.state[Argument(2)]) # a
-        @test has_all_escape(result.state[Argument(3)]) # b
-    end
-    let result = @eval EATModule() begin
-            const Rx = SafeRef{String}("Rx")
-            $code_escapes((String,)) do s
-                Rx[] = s
-                Core.sizeof(Rx[])
-            end
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = @eval EATModule() begin
-            const Rx = SafeRef{String}("Rx")
-            $code_escapes((String,)) do s
-                setfield!(Rx, :x, s)
-                Core.sizeof(Rx[])
-            end
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let M = EATModule()
-        @eval M module ___xxx___
-            import ..SafeRef
-            const Rx = SafeRef("Rx")
-        end
-        result = @eval M begin
-            $code_escapes((String,)) do s
-                rx = getfield(___xxx___, :Rx)
-                rx[] = s
-                nothing
-            end
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-
-    # field escape
-    # ------------
-
-    # field escape should propagate to :new arguments
-    let result = code_escapes((String,)) do a
-            o = SafeRef(a)
-            f = o[]
-            return f
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    let result = code_escapes((String,)) do a
-            t = SafeRef((a,))
-            f = t[][1]
-            return f
-        end
-        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        result.state[SSAValue(i)].AliasInfo
-    end
-    let result = code_escapes((String, String)) do a, b
-            obj = SafeRefs(a, b)
-            fld1 = obj[1]
-            fld2 = obj[2]
-            return (fld1, fld2)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # field escape should propagate to `setfield!` argument
-    let result = code_escapes((String,)) do a
-            o = SafeRef("foo")
-            o[] = a
-            f = o[]
-            return f
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    # propagate escape information imposed on return value of `setfield!` call
-    let result = code_escapes((String,)) do a
-            obj = SafeRef("foo")
-            return (obj[] = a)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # nested allocations
-    let result = code_escapes((String,)) do a
-            o1 = SafeRef(a)
-            o2 = SafeRef(o1)
-            return o2[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        for i in 1:length(result.ir.stmts)
-            if isnew(result.ir.stmts.inst[i]) && isT(SafeRef{String})(result.ir.stmts.type[i])
-                @test has_return_escape(result.state[SSAValue(i)], r)
-            elseif isnew(result.ir.stmts.inst[i]) && isT(SafeRef{SafeRef{String}})(result.ir.stmts.type[i])
-                @test is_load_forwardable(result.state[SSAValue(i)])
-            end
-        end
-    end
-    let result = code_escapes((String,)) do a
-            o1 = (a,)
-            o2 = (o1,)
-            return o2[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        for i in 1:length(result.ir.stmts)
-            if isnew(result.ir.stmts.inst[i]) && isT(Tuple{String})(result.ir.stmts.type[i])
-                @test has_return_escape(result.state[SSAValue(i)], r)
-            elseif isnew(result.ir.stmts.inst[i]) && isT(Tuple{Tuple{String}})(result.ir.stmts.type[i])
-                @test is_load_forwardable(result.state[SSAValue(i)])
-            end
-        end
-    end
-    let result = code_escapes((String,)) do a
-            o1  = SafeRef(a)
-            o2  = SafeRef(o1)
-            o1′ = o2[]
-            a′  = o1′[]
-            return a′
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    let result = code_escapes() do
-            o1 = SafeRef("foo")
-            o2 = SafeRef(o1)
-            return o2
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let result = code_escapes() do
-            o1   = SafeRef("foo")
-            o2′  = SafeRef(nothing)
-            o2   = SafeRef{SafeRef}(o2′)
-            o2[] = o1
-            return o2
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        findall(1:length(result.ir.stmts)) do i
-            if isnew(result.ir.stmts[i][:inst])
-                t = result.ir.stmts[i][:type]
-                return t === SafeRef{String}  || # o1
-                       t === SafeRef{SafeRef}    # o2
-            end
-            return false
-        end |> x->foreach(x) do i
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let result = code_escapes((String,)) do x
-            broadcast(identity, Ref(x))
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # ϕ-node allocations
-    let result = code_escapes((Bool,Any,Any)) do cond, x, y
-            if cond
-                ϕ = SafeRef{Any}(x)
-            else
-                ϕ = SafeRef{Any}(y)
-            end
-            return ϕ[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], r) # x
-        @test has_return_escape(result.state[Argument(4)], r) # y
-        i = only(findall(isϕ, result.ir.stmts.inst))
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    let result = code_escapes((Bool,Any,Any)) do cond, x, y
-            if cond
-                ϕ2 = ϕ1 = SafeRef{Any}(x)
-            else
-                ϕ2 = ϕ1 = SafeRef{Any}(y)
-            end
-            return ϕ1[], ϕ2[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], r) # x
-        @test has_return_escape(result.state[Argument(4)], r) # y
-        for i in findall(isϕ, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    # when ϕ-node merges values with different types
-    let result = code_escapes((Bool,String,String,String)) do cond, x, y, z
-            local out
-            if cond
-                ϕ = SafeRef(x)
-                out = ϕ[]
-            else
-                ϕ = SafeRefs(z, y)
-            end
-            return @isdefined(out) ? out : throw(ϕ)
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.inst))
-        ϕ = only(findall(isT(Union{SafeRef{String},SafeRefs{String,String}}), result.ir.stmts.type))
-        @test has_return_escape(result.state[Argument(3)], r) # x
-        @test !has_return_escape(result.state[Argument(4)], r) # y
-        @test has_return_escape(result.state[Argument(5)], r) # z
-        @test has_thrown_escape(result.state[SSAValue(ϕ)], t)
-    end
-
-    # alias analysis
-    # --------------
-
-    # alias via getfield & Expr(:new)
-    let result = code_escapes((String,)) do s
-            r = SafeRef(s)
-            return r[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        @test !isaliased(Argument(2), SSAValue(i), result.state)
-    end
-    let result = code_escapes((String,)) do s
-            r1 = SafeRef(s)
-            r2 = SafeRef(r1)
-            return r2[]
-        end
-        i1, i2 = findall(isnew, result.ir.stmts.inst)
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
-        @test isaliased(SSAValue(i1), val, result.state)
-        @test !isaliased(SSAValue(i2), val, result.state)
-    end
-    let result = code_escapes((String,)) do s
-            r1 = SafeRef(s)
-            r2 = SafeRef(r1)
-            return r2[][]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test !isaliased(SSAValue(i), val, result.state)
-        end
-    end
-    let result = @eval EATModule() begin
-            const Rx = SafeRef("Rx")
-            $code_escapes((String,)) do s
-                r = SafeRef(Rx)
-                rx = r[] # rx aliased to Rx
-                rx[] = s
-                nothing
-            end
-        end
-        i = findfirst(isnew, result.ir.stmts.inst)
-        @test has_all_escape(result.state[Argument(2)])
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    # alias via getfield & setfield!
-    let result = code_escapes((String,)) do s
-            r = Ref{String}()
-            r[] = s
-            return r[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        @test !isaliased(Argument(2), SSAValue(i), result.state)
-    end
-    let result = code_escapes((String,)) do s
-            r1 = Ref(s)
-            r2 = Ref{Base.RefValue{String}}()
-            r2[] = r1
-            return r2[]
-        end
-        i1, i2 = findall(isnew, result.ir.stmts.inst)
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
-        @test isaliased(SSAValue(i1), val, result.state)
-        @test !isaliased(SSAValue(i2), val, result.state)
-    end
-    let result = code_escapes((String,)) do s
-            r1 = Ref{String}()
-            r2 = Ref{Base.RefValue{String}}()
-            r2[] = r1
-            r1[] = s
-            return r2[][]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test !isaliased(SSAValue(i), val, result.state)
-        end
-        result = code_escapes((String,)) do s
-            r1 = Ref{String}()
-            r2 = Ref{Base.RefValue{String}}()
-            r1[] = s
-            r2[] = r1
-            return r2[][]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test !isaliased(SSAValue(i), val, result.state)
-        end
-    end
-    let result = @eval EATModule() begin
-            const Rx = SafeRef("Rx")
-            $code_escapes((SafeRef{String}, String,)) do _rx, s
-                r = SafeRef(_rx)
-                r[] = Rx
-                rx = r[] # rx aliased to Rx
-                rx[] = s
-                nothing
-            end
-        end
-        i = findfirst(isnew, result.ir.stmts.inst)
-        @test has_all_escape(result.state[Argument(3)])
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    # alias via typeassert
-    let result = code_escapes((Any,)) do a
-            r = a::String
-            return r
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test isaliased(Argument(2), val, result.state)       # a <-> r
-    end
-    let result = code_escapes((Any,)) do a
-            global GV
-            (g::SafeRef{Any})[] = a
-            nothing
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    # alias via ifelse
-    let result = code_escapes((Bool,Any,Any)) do c, a, b
-            r = ifelse(c, a, b)
-            return r
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(3)], r) # a
-        @test has_return_escape(result.state[Argument(4)], r) # b
-        @test !isaliased(Argument(2), val, result.state)      # c <!-> r
-        @test isaliased(Argument(3), val, result.state)       # a <-> r
-        @test isaliased(Argument(4), val, result.state)       # b <-> r
-    end
-    let result = @eval EATModule() begin
-            const Lx, Rx = SafeRef("Lx"), SafeRef("Rx")
-            $code_escapes((Bool,String,)) do c, a
-                r = ifelse(c, Lx, Rx)
-                r[] = a
-                nothing
-            end
-        end
-        @test has_all_escape(result.state[Argument(3)]) # a
-    end
-    # alias via ϕ-node
-    let result = code_escapes((Bool,String)) do cond, x
-            if cond
-                ϕ2 = ϕ1 = SafeRef("foo")
-            else
-                ϕ2 = ϕ1 = SafeRef("bar")
-            end
-            ϕ2[] = x
-            return ϕ1[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(3)], r) # x
-        @test isaliased(Argument(3), val, result.state) # x
-        for i in findall(isϕ, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    let result = code_escapes((Bool,Bool,String)) do cond1, cond2, x
-            if cond1
-                ϕ2 = ϕ1 = SafeRef("foo")
-            else
-                ϕ2 = ϕ1 = SafeRef("bar")
-            end
-            cond2 && (ϕ2[] = x)
-            return ϕ1[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(4)], r) # x
-        @test isaliased(Argument(4), val, result.state) # x
-        for i in findall(isϕ, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    # alias via π-node
-    let result = code_escapes((Any,)) do x
-            if isa(x, String)
-                return x
-            end
-            throw("error!")
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        rval = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(2)], r) # x
-        @test isaliased(Argument(2), rval, result.state)
-    end
-    let result = code_escapes((String,)) do x
-            global GV
-            l = g
-            if isa(l, SafeRef{String})
-                l[] = x
-            end
-            nothing
-        end
-        @test has_all_escape(result.state[Argument(2)]) # x
-    end
-    # circular reference
-    let result = code_escapes() do
-            x = Ref{Any}()
-            x[] = x
-            return x[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-    end
-    let result = @eval Module() begin
-            const Rx = Ref{Any}()
-            Rx[] = Rx
-            $code_escapes() do
-                r = Rx[]::Base.RefValue{Any}
-                return r[]
-            end
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(iscall((result.ir, getfield)), result.ir.stmts.inst)
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let result = @eval Module() begin
-            @noinline function genr()
-                r = Ref{Any}()
-                r[] = r
-                return r
-            end
-            $code_escapes() do
-                x = genr()
-                return x[]
-            end
-        end
-        i = only(findall(isinvoke(:genr), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-    end
-
-    # dynamic semantics
-    # -----------------
-
-    # conservatively handle untyped objects
-    let result = @eval code_escapes((Any,Any,)) do T, x
-            obj = $(Expr(:new, :T, :x))
-        end
-        t = only(findall(isnew, result.ir.stmts.inst))
-        @test #=T=# has_thrown_escape(result.state[Argument(2)], t) # T
-        @test #=x=# has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
-            obj = $(Expr(:new, :T, :x, :y))
-            return getfield(obj, :x)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test #=x=# has_return_escape(result.state[Argument(3)], r)
-        @test #=y=# has_return_escape(result.state[Argument(4)], r)
-        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
-    end
-    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
-            obj = $(Expr(:new, :T, :x))
-            setfield!(obj, :x, y)
-            return getfield(obj, :x)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test #=x=# has_return_escape(result.state[Argument(3)], r)
-        @test #=y=# has_return_escape(result.state[Argument(4)], r)
-        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
-    end
-
-    # conservatively handle unknown field:
-    # all fields should be escaped, but the allocation itself doesn't need to be escaped
-    let result = code_escapes((String, Symbol)) do a, fld
-            obj = SafeRef(a)
-            return getfield(obj, fld)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, String, Symbol)) do a, b, fld
-            obj = SafeRefs(a, b)
-            return getfield(obj, fld) # should escape both `a` and `b`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, String, Int)) do a, b, idx
-            obj = SafeRefs(a, b)
-            return obj[idx] # should escape both `a` and `b`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, String, Symbol)) do a, b, fld
-            obj = SafeRefs("a", "b")
-            setfield!(obj, fld, a)
-            return obj[2] # should escape `a`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test !has_return_escape(result.state[Argument(3)], r) # b
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, Symbol)) do a, fld
-            obj = SafeRefs("a", "b")
-            setfield!(obj, fld, a)
-            return obj[1] # this should escape `a`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, String, Int)) do a, b, idx
-            obj = SafeRefs("a", "b")
-            obj[idx] = a
-            return obj[2] # should escape `a`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test !has_return_escape(result.state[Argument(3)], r) # b
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-
-    # interprocedural
-    # ---------------
-
-    let result = @eval EATModule() begin
-            @noinline getx(obj) = obj[]
-            $code_escapes((String,)) do a
-                obj = SafeRef(a)
-                fld = getx(obj)
-                return fld
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        # NOTE we can't scalar replace `obj`, but still we may want to stack allocate it
-        @test_broken is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # TODO interprocedural alias analysis
-    let result = code_escapes((SafeRef{String},)) do s
-            s[] = "bar"
-            global GV = s[]
-            nothing
-        end
-        @test_broken !has_all_escape(result.state[Argument(2)])
-    end
-
-    # aliasing between arguments
-    let result = @eval EATModule() begin
-            @noinline setxy!(x, y) = x[] = y
-            $code_escapes((String,)) do y
-                x = SafeRef("init")
-                setxy!(x, y)
-                return x
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-        @test has_return_escape(result.state[Argument(2)], r) # y
-    end
-    let result = @eval EATModule() begin
-            @noinline setxy!(x, y) = x[] = y
-            $code_escapes((String,)) do y
-                x1 = SafeRef("init")
-                x2 = SafeRef(y)
-                setxy!(x1, x2[])
-                return x1
-            end
-        end
-        i1, i2 = findall(isnew, result.ir.stmts.inst)
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i1)], r)
-        @test !has_return_escape(result.state[SSAValue(i2)], r)
-        @test has_return_escape(result.state[Argument(2)], r) # y
-    end
-    let result = @eval EATModule() begin
-            @noinline mysetindex!(x, a) = x[1] = a
-            const Ax = Vector{Any}(undef, 1)
-            $code_escapes((String,)) do s
-                mysetindex!(Ax, s)
-            end
-        end
-        @test has_all_escape(result.state[Argument(2)]) # s
-    end
-
-    # TODO flow-sensitivity?
-    # ----------------------
-
-    let result = code_escapes((Any,Any)) do a, b
-            r = SafeRef{Any}(a)
-            r[] = b
-            return r[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    let result = code_escapes((Any,Any)) do a, b
-            r = SafeRef{Any}(:init)
-            r[] = a
-            r[] = b
-            return r[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    let result = code_escapes((Any,Any,Bool)) do a, b, cond
-            r = SafeRef{Any}(:init)
-            if cond
-                r[] = a
-                return r[]
-            else
-                r[] = b
-                return nothing
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        r = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isreturn(x) && isa(x.val, Core.SSAValue)
-        end)
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test_broken !has_return_escape(result.state[Argument(3)], r) # b
-    end
-
-    # handle conflicting field information correctly
-    let result = code_escapes((Bool,String,String,)) do cnd, baz, qux
-            if cnd
-                o = SafeRef("foo")
-            else
-                o = SafeRefs("bar", baz)
-                r = getfield(o, 2)
-            end
-            if cnd
-                o = o::SafeRef
-                setfield!(o, 1, qux)
-                r = getfield(o, 1)
-            end
-            r
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], r) # baz
-        @test has_return_escape(result.state[Argument(4)], r) # qux
-        for new in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(new)])
-        end
-    end
-    let result = code_escapes((Bool,String,String,)) do cnd, baz, qux
-            if cnd
-                o = SafeRefs("foo", "bar")
-                r = setfield!(o, 2, baz)
-            else
-                o = SafeRef(qux)
-            end
-            if !cnd
-                o = o::SafeRef
-                r = getfield(o, 1)
-            end
-            r
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], r) # baz
-        @test has_return_escape(result.state[Argument(4)], r) # qux
-    end
-
-    # foreigncall should disable field analysis
-    let result = code_escapes((Any,Nothing,Int,UInt)) do t, mt, lim, world
-            ambig = false
-            min = Ref{UInt}(typemin(UInt))
-            max = Ref{UInt}(typemax(UInt))
-            has_ambig = Ref{Int32}(0)
-            mt = ccall(:jl_matching_methods, Any,
-                (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ref{Int32}),
-                t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
-            return mt, has_ambig[]
-        end
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test !is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-end
-
-# demonstrate the power of our field / alias analysis with a realistic end to end example
-abstract type AbstractPoint{T} end
-mutable struct MPoint{T} <: AbstractPoint{T}
-    x::T
-    y::T
-end
-add(a::P, b::P) where P<:AbstractPoint = P(a.x + b.x, a.y + b.y)
-function compute(T, ax, ay, bx, by)
-    a = T(ax, ay)
-    b = T(bx, by)
-    for i in 0:(100000000-1)
-        c = add(a, b) # replaceable
-        a = add(c, b) # replaceable
-    end
-    a.x, a.y
-end
-let result = @code_escapes compute(MPoint, 1+.5im, 2+.5im, 2+.25im, 4+.75im)
-    for i in findall(1:length(result.ir.stmts)) do idx
-                 inst = EscapeAnalysis.getinst(result.ir, idx)
-                 stmt = inst[:inst]
-                 return (isnew(stmt) || isϕ(stmt)) && inst[:type] <: MPoint
-             end
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-end
-function compute(a, b)
-    for i in 0:(100000000-1)
-        c = add(a, b) # replaceable
-        a = add(c, b) # unreplaceable (aliased to the call argument `a`)
-    end
-    a.x, a.y
-end
-let result = @code_escapes compute(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
-    idxs = findall(1:length(result.ir.stmts)) do idx
-        inst = EscapeAnalysis.getinst(result.ir, idx)
-        stmt = inst[:inst]
-        return isnew(stmt) && inst[:type] <: MPoint
-    end
-    @assert length(idxs) == 2
-    @test count(i->is_load_forwardable(result.state[SSAValue(i)]), idxs) == 1
-end
-function compute!(a, b)
-    for i in 0:(100000000-1)
-        c = add(a, b)  # replaceable
-        a′ = add(c, b) # replaceable
-        a.x = a′.x
-        a.y = a′.y
-    end
-end
-let result = @code_escapes compute!(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
-    for i in findall(1:length(result.ir.stmts)) do idx
-                 inst = EscapeAnalysis.getinst(result.ir, idx)
-                 stmt = inst[:inst]
-                 return isnew(stmt) && inst[:type] <: MPoint
-             end
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-end
-
-@testset "array primitives" begin
-    inbounds = Base.JLOptions().check_bounds == 0
-
-    # arrayref
-    let result = code_escapes((Vector{String},Int)) do xs, i
-            s = Base.arrayref(true, xs, i)
-            return s
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)   # xs
-        @test has_thrown_escape(result.state[Argument(2)])      # xs
-        @test !has_return_escape(result.state[Argument(3)], r)  # i
-    end
-    let result = code_escapes((Vector{String},Int)) do xs, i
-            s = Base.arrayref(false, xs, i)
-            return s
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)   # xs
-        @test !has_thrown_escape(result.state[Argument(2)])     # xs
-        @test !has_return_escape(result.state[Argument(3)], r)  # i
-    end
-    inbounds && let result = code_escapes((Vector{String},Int)) do xs, i
-            s = @inbounds xs[i]
-            return s
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)   # xs
-        @test !has_thrown_escape(result.state[Argument(2)])     # xs
-        @test !has_return_escape(result.state[Argument(3)], r)  # i
-    end
-    let result = code_escapes((Vector{String},Bool)) do xs, i
-            c = Base.arrayref(true, xs, i) # TypeError will happen here
-            return c
-        end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((String,Int)) do xs, i
-            c = Base.arrayref(true, xs, i) # TypeError will happen here
-            return c
-        end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((AbstractVector{String},Int)) do xs, i
-            c = Base.arrayref(true, xs, i) # TypeError may happen here
-            return c
-        end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((Vector{String},Any)) do xs, i
-            c = Base.arrayref(true, xs, i) # TypeError may happen here
-            return c
-        end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-
-    # arrayset
-    let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
-            Base.arrayset(true, xs, x, i)
-            return xs
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # xs
-        @test has_thrown_escape(result.state[Argument(2)])    # xs
-        @test has_return_escape(result.state[Argument(3)], r) # x
-    end
-    let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
-            Base.arrayset(false, xs, x, i)
-            return xs
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # xs
-        @test !has_thrown_escape(result.state[Argument(2)])    # xs
-        @test has_return_escape(result.state[Argument(3)], r) # x
-    end
-    inbounds && let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
-            @inbounds xs[i] = x
-            return xs
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # xs
-        @test !has_thrown_escape(result.state[Argument(2)])    # xs
-        @test has_return_escape(result.state[Argument(3)], r) # x
-    end
-    let result = code_escapes((String,String,String,)) do s, t, u
-            xs = Vector{String}(undef, 3)
-            Base.arrayset(true, xs, s, 1)
-            Base.arrayset(true, xs, t, 2)
-            Base.arrayset(true, xs, u, 3)
-            return xs
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-        for i in 2:result.state.nargs
-            @test has_return_escape(result.state[Argument(i)], r)
-        end
-    end
-    let result = code_escapes((Vector{String},String,Bool,)) do xs, x, i
-            Base.arrayset(true, xs, x, i) # TypeError will happen here
-            return xs
-        end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    let result = code_escapes((String,String,Int,)) do xs, x, i
-            Base.arrayset(true, xs, x, i) # TypeError will happen here
-            return xs
-        end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs::String
-        @test has_thrown_escape(result.state[Argument(3)], t) # x::String
-    end
-    let result = code_escapes((AbstractVector{String},String,Int,)) do xs, x, i
-            Base.arrayset(true, xs, x, i) # TypeError may happen here
-            return xs
-        end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    let result = code_escapes((Vector{String},AbstractString,Int,)) do xs, x, i
-            Base.arrayset(true, xs, x, i) # TypeError may happen here
-            return xs
-        end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-
-    # arrayref and arrayset
-    let result = code_escapes() do
-            a = Vector{Vector{Any}}(undef, 1)
-            b = Any[]
-            a[1] = b
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        ai = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isarrayalloc(x) && x.args[2] === Vector{Vector{Any}}
-        end)
-        bi = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isarrayalloc(x) && x.args[2] === Vector{Any}
-        end)
-        @test !has_return_escape(result.state[SSAValue(ai)], r)
-        @test has_return_escape(result.state[SSAValue(bi)], r)
-    end
-    let result = code_escapes() do
-            a = Vector{Vector{Any}}(undef, 1)
-            b = Any[]
-            a[1] = b
-            return a
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        ai = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isarrayalloc(x) && x.args[2] === Vector{Vector{Any}}
-        end)
-        bi = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isarrayalloc(x) && x.args[2] === Vector{Any}
-        end)
-        @test has_return_escape(result.state[SSAValue(ai)], r)
-        @test has_return_escape(result.state[SSAValue(bi)], r)
-    end
-    let result = code_escapes((Vector{Any},String,Int,Int)) do xs, s, i, j
-            x = SafeRef(s)
-            xs[i] = x
-            xs[j] # potential error
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(3)], t) # s
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # x
-    end
-
-    # arraysize
-    let result = code_escapes((Vector{Any},)) do xs
-            Core.arraysize(xs, 1)
-        end
-        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t)
-    end
-    let result = code_escapes((Vector{Any},Int,)) do xs, dim
-            Core.arraysize(xs, dim)
-        end
-        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t)
-    end
-    let result = code_escapes((Any,)) do xs
-            Core.arraysize(xs, 1)
-        end
-        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t)
-    end
-
-    # arraylen
-    let result = code_escapes((Vector{Any},)) do xs
-            Base.arraylen(xs)
-        end
-        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((String,)) do xs
-            Base.arraylen(xs)
-        end
-        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((Vector{Any},)) do xs
-            Base.arraylen(xs, 1)
-        end
-        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-
-    # array resizing
-    # without BoundsErrors
-    let result = code_escapes((Vector{Any},String)) do xs, x
-            @ccall jl_array_grow_beg(xs::Any, 2::UInt)::Cvoid
-            xs[1] = x
-            xs
-        end
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test !has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    let result = code_escapes((Vector{Any},String)) do xs, x
-            @ccall jl_array_grow_end(xs::Any, 2::UInt)::Cvoid
-            xs[1] = x
-            xs
-        end
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test !has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    # with possible BoundsErrors
-    let result = code_escapes((String,)) do x
-            xs = Any[1,2,3]
-            xs[3] = x
-            @ccall jl_array_del_beg(xs::Any, 2::UInt)::Cvoid # can potentially throw
-            xs
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-    let result = code_escapes((String,)) do x
-            xs = Any[1,2,3]
-            xs[1] = x
-            @ccall jl_array_del_end(xs::Any, 2::UInt)::Cvoid # can potentially throw
-            xs
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-    let result = code_escapes((String,)) do x
-            xs = Any[x]
-            @ccall jl_array_grow_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-    let result = code_escapes((String,)) do x
-            xs = Any[x]
-            @ccall jl_array_del_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-    inbounds && let result = code_escapes((String,)) do x
-            xs = @inbounds Any[x]
-            @ccall jl_array_del_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-
-    # array copy
-    let result = code_escapes((Vector{Any},)) do xs
-            return copy(xs)
-        end
-        i = only(findall(isarraycopy, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-        @test_broken !has_return_escape(result.state[Argument(2)], r)
-    end
-    let result = code_escapes((String,)) do s
-            xs = String[s]
-            xs′ = copy(xs)
-            return xs′[1]
-        end
-        i1 = only(findall(isarrayalloc, result.ir.stmts.inst))
-        i2 = only(findall(isarraycopy, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i1)])
-        @test !has_return_escape(result.state[SSAValue(i2)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-    end
-    let result = code_escapes((Vector{Any},)) do xs
-            xs′ = copy(xs)
-            return xs′[1] # may potentially throw BoundsError, should escape `xs` conservatively (i.e. escape its elements)
-        end
-        i = only(findall(isarraycopy, result.ir.stmts.inst))
-        ref = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        ret = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_thrown_escape(result.state[SSAValue(i)], ref)
-        @test_broken !has_return_escape(result.state[SSAValue(i)], ret)
-        @test has_thrown_escape(result.state[Argument(2)], ref)
-        @test has_return_escape(result.state[Argument(2)], ret)
-    end
-    let result = code_escapes((String,)) do s
-            xs = Vector{String}(undef, 1)
-            xs[1] = s
-            xs′ = copy(xs)
-            length(xs′) > 2 && throw(xs′)
-            return xs′
-        end
-        i1 = only(findall(isarrayalloc, result.ir.stmts.inst))
-        i2 = only(findall(isarraycopy, result.ir.stmts.inst))
-        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_thrown_escape(result.state[SSAValue(i1)], t)
-        @test_broken !has_return_escape(result.state[SSAValue(i1)], r)
-        @test has_thrown_escape(result.state[SSAValue(i2)], t)
-        @test has_return_escape(result.state[SSAValue(i2)], r)
-        @test has_thrown_escape(result.state[Argument(2)], t)
-        @test has_return_escape(result.state[Argument(2)], r)
-    end
-
-    # isassigned
-    let result = code_escapes((Vector{Any},Int)) do xs, i
-            return isassigned(xs, i)
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[Argument(2)], r)
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-
-    # indexing analysis
-    # -----------------
-
-    # safe case
-    let result = code_escapes((String,String)) do s, t
-            a = Vector{Any}(undef, 2)
-            a[1] = s
-            a[2] = t
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-        @test !has_return_escape(result.state[Argument(3)], r) # t
-    end
-    let result = code_escapes((String,String)) do s, t
-            a = Matrix{Any}(undef, 1, 2)
-            a[1, 1] = s
-            a[1, 2] = t
-            return a[1, 1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-        @test !has_return_escape(result.state[Argument(3)], r) # t
-    end
-    let result = code_escapes((Bool,String,String,String)) do c, s, t, u
-            a = Vector{Any}(undef, 2)
-            if c
-                a[1] = s
-                a[2] = u
-            else
-                a[1] = t
-                a[2] = u
-            end
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test has_return_escape(result.state[Argument(3)], r) # s
-        @test has_return_escape(result.state[Argument(4)], r) # t
-        @test !has_return_escape(result.state[Argument(5)], r) # u
-    end
-    let result = code_escapes((Bool,String,String,String)) do c, s, t, u
-            a = Any[nothing, nothing] # TODO how to deal with loop indexing?
-            if c
-                a[1] = s
-                a[2] = u
-            else
-                a[1] = t
-                a[2] = u
-            end
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test_broken is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(3)], r) # s
-        @test has_return_escape(result.state[Argument(4)], r) # t
-        @test_broken !has_return_escape(result.state[Argument(5)], r) # u
-    end
-    let result = code_escapes((String,)) do s
-            a = Vector{Vector{Any}}(undef, 1)
-            b = Any[s]
-            a[1] = b
-            return a[1][1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        is = findall(isarrayalloc, result.ir.stmts.inst)
-        @assert length(is) == 2
-        ia, ib = is
-        @test !has_return_escape(result.state[SSAValue(ia)], r)
-        @test is_load_forwardable(result.state[SSAValue(ia)])
-        @test !has_return_escape(result.state[SSAValue(ib)], r)
-        @test_broken is_load_forwardable(result.state[SSAValue(ib)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-    end
-    let result = code_escapes((Bool,String,String,Regex,Regex,)) do c, s1, s2, t1, t2
-            if c
-                a = Vector{String}(undef, 2)
-                a[1] = s1
-                a[2] = s2
-            else
-                a = Vector{Regex}(undef, 2)
-                a[1] = t1
-                a[2] = t2
-            end
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(isarrayalloc, result.ir.stmts.inst)
-            @test !has_return_escape(result.state[SSAValue(i)], r)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-        @test has_return_escape(result.state[Argument(3)], r) # s1
-        @test !has_return_escape(result.state[Argument(4)], r) # s2
-        @test has_return_escape(result.state[Argument(5)], r) # t1
-        @test !has_return_escape(result.state[Argument(6)], r) # t2
-    end
-    let result = code_escapes((String,String,Int)) do s, t, i
-            a = Any[s]
-            push!(a, t)
-            return a[2]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test_broken is_load_forwardable(result.state[SSAValue(i)])
-        @test_broken !has_return_escape(result.state[Argument(2)], r) # s
-        @test has_return_escape(result.state[Argument(3)], r) # t
-    end
-    # unsafe cases
-    let result = code_escapes((String,String,Int)) do s, t, i
-            a = Vector{Any}(undef, 2)
-            a[1] = s
-            a[2] = t
-            return a[i]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test !is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-        @test has_return_escape(result.state[Argument(3)], r) # t
-    end
-    let result = code_escapes((String,String,Int)) do s, t, i
-            a = Vector{Any}(undef, 2)
-            a[1] = s
-            a[i] = t
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test !is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-        @test has_return_escape(result.state[Argument(3)], r) # t
-    end
-    let result = code_escapes((String,String,Int,Int,Int)) do s, t, i, j, k
-            a = Vector{Any}(undef, 2)
-            a[3] = s # BoundsError
-            a[1] = t
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test !is_load_forwardable(result.state[SSAValue(i)])
-    end
-    let result = @eval Module() begin
-            @noinline some_resize!(a) = pushfirst!(a, nothing)
-            $code_escapes((String,String,Int)) do s, t, i
-                a = Vector{Any}(undef, 2)
-                a[1] = s
-                some_resize!(a)
-                return a[2]
-            end
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
-        @test !is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # circular reference
-    let result = code_escapes() do
-            xs = Vector{Any}(undef, 1)
-            xs[1] = xs
-            return xs[1]
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-    end
-    let result = @eval Module() begin
-            const Ax = Vector{Any}(undef, 1)
-            Ax[1] = Ax
-            $code_escapes() do
-                xs = Ax[1]::Vector{Any}
-                return xs[1]
-            end
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(iscall((result.ir, Core.arrayref)), result.ir.stmts.inst)
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let result = @eval Module() begin
-            @noinline function genxs()
-                xs = Vector{Any}(undef, 1)
-                xs[1] = xs
-                return xs
-            end
-            $code_escapes() do
-                xs = genxs()
-                return xs[1]
-            end
-        end
-        i = only(findall(isinvoke(:genxs), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-    end
-end
-
-# demonstrate array primitive support with a realistic end to end example
-let result = code_escapes((Int,String,)) do n,s
-        xs = String[]
-        for i in 1:n
-            push!(xs, s)
-        end
-        xs
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-    @test !has_thrown_escape(result.state[SSAValue(i)])
-    @test has_return_escape(result.state[Argument(3)], r) # s
-    @test !has_thrown_escape(result.state[Argument(3)])    # s
-end
-let result = code_escapes((Int,String,)) do n,s
-        xs = String[]
-        for i in 1:n
-            pushfirst!(xs, s)
-        end
-        xs
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r) # xs
-    @test !has_thrown_escape(result.state[SSAValue(i)])    # xs
-    @test has_return_escape(result.state[Argument(3)], r) # s
-    @test !has_thrown_escape(result.state[Argument(3)])    # s
-end
-let result = code_escapes((String,String,String)) do s, t, u
-        xs = String[]
-        resize!(xs, 3)
-        xs[1] = s
-        xs[1] = t
-        xs[1] = u
-        xs
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-    @test has_thrown_escape(result.state[SSAValue(i)])    # xs
-    @test has_return_escape(result.state[Argument(2)], r) # s
-    @test has_return_escape(result.state[Argument(3)], r) # t
-    @test has_return_escape(result.state[Argument(4)], r) # u
-end
-
-@static if isdefined(Core, :ImmutableArray)
-
-import Core: ImmutableArray, arrayfreeze, mutating_arrayfreeze, arraythaw
-
-@testset "ImmutableArray" begin
-    # arrayfreeze
-    let result = code_escapes((Vector{Any},)) do xs
-            arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Vector,)) do xs
-            arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do xs
-            arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((ImmutableArray{Any,1},)) do xs
-            arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes() do
-            xs = Any[]
-            arrayfreeze(xs)
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test has_no_escape(result.state[SSAValue(1)])
-    end
-
-    # mutating_arrayfreeze
-    let result = code_escapes((Vector{Any},)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Vector,)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((ImmutableArray{Any,1},)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes() do
-            xs = Any[]
-            mutating_arrayfreeze(xs)
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test has_no_escape(result.state[SSAValue(1)])
-    end
-
-    # arraythaw
-    let result = code_escapes((ImmutableArray{Any,1},)) do xs
-            arraythaw(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((ImmutableArray,)) do xs
-            arraythaw(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do xs
-            arraythaw(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Vector{Any},)) do xs
-            arraythaw(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes() do
-            xs = ImmutableArray(Any[])
-            arraythaw(xs)
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test has_no_escape(result.state[SSAValue(1)])
-    end
-end
-
-# demonstrate some arrayfreeze optimizations
-# !has_return_escape(ary) means ary is eligible for arrayfreeze to mutating_arrayfreeze optimization
-let result = code_escapes((Int,)) do n
-        xs = collect(1:n)
-        ImmutableArray(xs)
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)])
-end
-let result = code_escapes((Vector{Float64},)) do xs
-        ys = sin.(xs)
-        ImmutableArray(ys)
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)])
-end
-let result = code_escapes((Vector{Pair{Int,String}},)) do xs
-        n = maximum(first, xs)
-        ys = Vector{String}(undef, n)
-        for (i, s) in xs
-            ys[i] = s
-        end
-        ImmutableArray(xs)
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)])
-end
-
-end # @static if isdefined(Core, :ImmutableArray)
-
-# demonstrate a simple type level analysis can sometimes improve the analysis accuracy
-# by compensating the lack of yet unimplemented analyses
-@testset "special-casing bitstype" begin
-    let result = code_escapes((Nothing,)) do a
-            global GV = a
-        end
-        @test !(has_all_escape(result.state[Argument(2)]))
-    end
-
-    let result = code_escapes((Int,)) do a
-            o = SafeRef(a)
-            f = o[]
-            return f
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-    end
-
-    # an escaped tuple stmt will not propagate to its Int argument (since `Int` is of bitstype)
-    let result = code_escapes((Int,Any,)) do a, b
-            t = tuple(a, b)
-            return t
-        end
-        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[Argument(2)], r)
-        @test has_return_escape(result.state[Argument(3)], r)
-    end
-end
-
-# # TODO implement a finalizer elision pass
-# mutable struct WithFinalizer
-#     v
-#     function WithFinalizer(v)
-#         x = new(v)
-#         f(t) = @async println("Finalizing $t.")
-#         return finalizer(x, x)
-#     end
-# end
-# make_m(v = 10) = MyMutable(v)
-# function simple(cond)
-#     m = make_m()
-#     if cond
-#         # println(m.v)
-#         return nothing # <= insert `finalize` call here
-#     end
-#     return m
-# end
diff --git a/test/compiler/EscapeAnalysis/setup.jl b/test/compiler/EscapeAnalysis/setup.jl
deleted file mode 100644
index 18221e5afc524..0000000000000
--- a/test/compiler/EscapeAnalysis/setup.jl
+++ /dev/null
@@ -1,59 +0,0 @@
-include(normpath(@__DIR__, "..", "irutils.jl"))
-include(normpath(@__DIR__, "EAUtils.jl"))
-using Test, Core.Compiler.EscapeAnalysis, .EAUtils
-import Core: Argument, SSAValue, ReturnNode
-const EA = Core.Compiler.EscapeAnalysis
-import .EA: ignore_argescape
-
-isT(T) = (@nospecialize x) -> x === T
-isreturn(@nospecialize x) = isa(x, Core.ReturnNode) && isdefined(x, :val)
-isthrow(@nospecialize x) = Meta.isexpr(x, :call) && Core.Compiler.is_throw_call(x)
-isϕ(@nospecialize x) = isa(x, Core.PhiNode)
-function with_normalized_name(@nospecialize(f), @nospecialize(x))
-    if Meta.isexpr(x, :foreigncall)
-        name = x.args[1]
-        nn = EA.normalize(name)
-        return isa(nn, Symbol) && f(nn)
-    end
-    return false
-end
-isarrayalloc(@nospecialize x) = with_normalized_name(nn->!isnothing(Core.Compiler.alloc_array_ndims(nn)), x)
-isarrayresize(@nospecialize x) = with_normalized_name(nn->!isnothing(EA.array_resize_info(nn)), x)
-isarraycopy(@nospecialize x) = with_normalized_name(nn->EA.is_array_copy(nn), x)
-
-"""
-    is_load_forwardable(x::EscapeInfo) -> Bool
-
-Queries if `x` is elibigle for store-to-load forwarding optimization.
-"""
-function is_load_forwardable(x::EA.EscapeInfo)
-    AliasInfo = x.AliasInfo
-    # NOTE technically we also need to check `!has_thrown_escape(x)` here as well,
-    # but we can also do equivalent check during forwarding
-    return isa(AliasInfo, EA.IndexableFields) || isa(AliasInfo, EA.IndexableElements)
-end
-
-let setup_ex = quote
-        mutable struct SafeRef{T}
-            x::T
-        end
-        Base.getindex(s::SafeRef) = getfield(s, 1)
-        Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
-
-        mutable struct SafeRefs{S,T}
-            x1::S
-            x2::T
-        end
-        Base.getindex(s::SafeRefs, idx::Int) = getfield(s, idx)
-        Base.setindex!(s::SafeRefs, x, idx::Int) = setfield!(s, idx, x)
-
-        global GV::Any
-        const global GR = Ref{Any}()
-    end
-    global function EATModule(setup_ex = setup_ex)
-        M = Module()
-        Core.eval(M, setup_ex)
-        return M
-    end
-    Core.eval(@__MODULE__, setup_ex)
-end
diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl
deleted file mode 100644
index bbcf7b0dfb959..0000000000000
--- a/test/compiler/contextual.jl
+++ /dev/null
@@ -1,222 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Cassette
-# ========
-
-module MiniCassette
-    # A minimal demonstration of the cassette mechanism. Doesn't support all the
-    # fancy features, but sufficient to exercise this code path in the compiler.
-
-    using Core.Compiler: retrieve_code_info, CodeInfo,
-        MethodInstance, SSAValue, GotoNode, GotoIfNot, ReturnNode, SlotNumber, quoted,
-        signature_type, anymap
-    using Base: _methods_by_ftype
-    using Base.Meta: isexpr
-    using Test
-
-    export Ctx, overdub
-
-    struct Ctx; end
-
-    # A no-op cassette-like transform
-    function transform_expr(expr, map_slot_number, map_ssa_value, sparams::Core.SimpleVector)
-        @nospecialize expr
-        transform(@nospecialize expr) = transform_expr(expr, map_slot_number, map_ssa_value, sparams)
-        if isexpr(expr, :call)
-            return Expr(:call, overdub, SlotNumber(2), anymap(transform, expr.args)...)
-        elseif isa(expr, GotoIfNot)
-            return GotoIfNot(transform(expr.cond), map_ssa_value(SSAValue(expr.dest)).id)
-        elseif isexpr(expr, :static_parameter)
-            return quoted(sparams[expr.args[1]])
-        elseif isa(expr, ReturnNode)
-            return ReturnNode(transform(expr.val))
-        elseif isa(expr, Expr)
-            return Expr(expr.head, anymap(transform, expr.args)...)
-        elseif isa(expr, GotoNode)
-            return GotoNode(map_ssa_value(SSAValue(expr.label)).id)
-        elseif isa(expr, SlotNumber)
-            return map_slot_number(expr.id)
-        elseif isa(expr, SSAValue)
-            return map_ssa_value(expr)
-        else
-            return expr
-        end
-    end
-
-    function transform!(ci::CodeInfo, nargs::Int, sparams::Core.SimpleVector)
-        code = ci.code
-        ci.slotnames = Symbol[Symbol("#self#"), :ctx, :f, :args, ci.slotnames[nargs+1:end]...]
-        ci.slotflags = UInt8[(0x00 for i = 1:4)..., ci.slotflags[nargs+1:end]...]
-        # Insert one SSAValue for every argument statement
-        prepend!(code, Any[Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
-        prepend!(ci.codelocs, fill(0, nargs))
-        prepend!(ci.ssaflags, fill(0x00, nargs))
-        ci.ssavaluetypes += nargs
-        function map_slot_number(slot::Int)
-            if slot == 1
-                # self in the original function is now `f`
-                return SlotNumber(3)
-            elseif 2 <= slot <= nargs + 1
-                # Arguments get inserted as ssa values at the top of the function
-                return SSAValue(slot - 1)
-            else
-                # The first non-argument slot will be 5
-                return SlotNumber(slot - (nargs + 1) + 4)
-            end
-        end
-        map_ssa_value(ssa::SSAValue) = SSAValue(ssa.id + nargs)
-        for i = (nargs+1:length(code))
-            code[i] = transform_expr(code[i], map_slot_number, map_ssa_value, sparams)
-        end
-    end
-
-    function overdub_generator(world::UInt, source, self, c, f, args)
-        @nospecialize
-        if !Base.issingletontype(f)
-            # (c, f, args..) -> f(args...)
-            code_info = :(return f(args...))
-            return Core.GeneratedFunctionStub(identity, Core.svec(:overdub, :c, :f, :args), Core.svec())(world, source, code_info)
-        end
-
-        tt = Tuple{f, args...}
-        match = Base._which(tt; world)
-        mi = Core.Compiler.specialize_method(match)
-        # Unsupported in this mini-cassette
-        @assert !mi.def.isva
-        code_info = retrieve_code_info(mi, world)
-        @assert isa(code_info, CodeInfo)
-        code_info = copy(code_info)
-        @assert code_info.edges === nothing
-        code_info.edges = MethodInstance[mi]
-        transform!(code_info, length(args), match.sparams)
-        # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
-        # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
-        return code_info
-    end
-
-    @inline function overdub(c::Ctx, f::Union{Core.Builtin, Core.IntrinsicFunction}, args...)
-        f(args...)
-    end
-
-    @eval function overdub(c::Ctx, f, args...)
-        $(Expr(:meta, :generated_only))
-        $(Expr(:meta, :generated, overdub_generator))
-    end
-end
-
-using .MiniCassette
-
-# Test #265 for Cassette
-f() = 1
-@test overdub(Ctx(), f) === 1
-f() = 2
-@test overdub(Ctx(), f) === 2
-
-# Test that MiniCassette is at least somewhat capable by overdubbing gcd
-@test overdub(Ctx(), gcd, 10, 20) === gcd(10, 20)
-
-@generated bar(::Val{align}) where {align} = :(42)
-foo(i) = i+bar(Val(1))
-
-@test @inferred(overdub(Ctx(), foo, 1)) == 43
-
-# overlay method tables
-# =====================
-
-module OverlayModule
-
-using Base.Experimental: @MethodTable, @overlay
-
-@MethodTable(mt)
-
-@overlay mt function sin(x::Float64)
-    1
-end
-
-# short function def
-@overlay mt cos(x::Float64) = 2
-
-# parametric function def
-@overlay mt tan(x::T) where {T} = 3
-
-end # module OverlayModule
-
-methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, nothing, 1, Base.get_world_counter())
-@test only(methods).method.module === Base.Math
-
-methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, OverlayModule.mt, 1, Base.get_world_counter())
-@test only(methods).method.module === OverlayModule
-
-methods = Base._methods_by_ftype(Tuple{typeof(sin), Int}, OverlayModule.mt, 1, Base.get_world_counter())
-@test isempty(methods)
-
-# precompilation
-
-load_path = mktempdir()
-depot_path = mktempdir()
-try
-    pushfirst!(LOAD_PATH, load_path)
-    pushfirst!(DEPOT_PATH, depot_path)
-
-    write(joinpath(load_path, "Foo.jl"),
-          """
-          module Foo
-          Base.Experimental.@MethodTable(mt)
-          Base.Experimental.@overlay mt sin(x::Int) = 1
-          end
-          """)
-
-     # precompiling Foo serializes the overlay method through the `mt` binding in the module
-     Foo = Base.require(Main, :Foo)
-     @test length(Foo.mt) == 1
-
-    write(joinpath(load_path, "Bar.jl"),
-          """
-          module Bar
-          Base.Experimental.@MethodTable(mt)
-          end
-          """)
-
-    write(joinpath(load_path, "Baz.jl"),
-          """
-          module Baz
-          using Bar
-          Base.Experimental.@overlay Bar.mt sin(x::Int) = 1
-          end
-          """)
-
-     # when referring an method table in another module,
-     # the overlay method needs to be discovered explicitly
-     Bar = Base.require(Main, :Bar)
-     @test length(Bar.mt) == 0
-     Baz = Base.require(Main, :Baz)
-     @test length(Bar.mt) == 1
-finally
-    filter!((≠)(load_path), LOAD_PATH)
-    filter!((≠)(depot_path), DEPOT_PATH)
-    rm(load_path, recursive=true, force=true)
-    try
-        rm(depot_path, force=true, recursive=true)
-    catch err
-        @show err
-    end
-end
-
-# Test that writing a bad cassette-style pass gives the expected error (#49715)
-function generator49715(world, source, self, f, tt)
-    tt = tt.parameters[1]
-    sig = Tuple{f, tt.parameters...}
-    mi = Base._which(sig; world)
-
-    error("oh no")
-
-    stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec())
-    stub(world, source, :(nothing))
-end
-
-@eval function doit49715(f, tt)
-  $(Expr(:meta, :generated, generator49715))
-  $(Expr(:meta, :generated_only))
-end
-
-@test_throws "oh no" doit49715(sin, Tuple{Int})
diff --git a/test/compiler/datastructures.jl b/test/compiler/datastructures.jl
deleted file mode 100644
index 8dbaee61503d0..0000000000000
--- a/test/compiler/datastructures.jl
+++ /dev/null
@@ -1,100 +0,0 @@
-using Test
-
-@testset "CachedMethodTable" begin
-    # cache result should be separated per `limit` and `sig`
-    # https://github.com/JuliaLang/julia/pull/46799
-    interp = Core.Compiler.NativeInterpreter()
-    table = Core.Compiler.method_table(interp)
-    sig = Tuple{typeof(*), Any, Any}
-    result1 = Core.Compiler.findall(sig, table; limit=-1)
-    result2 = Core.Compiler.findall(sig, table; limit=Core.Compiler.InferenceParams().max_methods)
-    @test result1 !== nothing && !Core.Compiler.isempty(result1.matches)
-    @test result2 === nothing
-end
-
-@testset "BitSetBoundedMinPrioritySet" begin
-    bsbmp = Core.Compiler.BitSetBoundedMinPrioritySet(5)
-    Core.Compiler.push!(bsbmp, 2)
-    Core.Compiler.push!(bsbmp, 2)
-    @test Core.Compiler.popfirst!(bsbmp) == 2
-    Core.Compiler.push!(bsbmp, 1)
-    @test Core.Compiler.popfirst!(bsbmp) == 1
-    @test Core.Compiler.isempty(bsbmp)
-end
-
-@testset "basic heap functionality" begin
-    v = [2,3,1]
-    @test Core.Compiler.heapify!(v, Core.Compiler.Forward) === v
-    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 1
-    @test Core.Compiler.heappush!(v, 4, Core.Compiler.Forward) === v
-    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 2
-    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 3
-    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 4
-end
-
-@testset "randomized heap correctness tests" begin
-    order = Core.Compiler.By(x -> -x[2])
-    for i in 1:6
-        heap = Tuple{Int, Int}[(rand(1:i), rand(1:i)) for _ in 1:2i]
-        mock = copy(heap)
-        @test Core.Compiler.heapify!(heap, order) === heap
-        sort!(mock, by=last)
-
-        for _ in 1:6i
-            if rand() < .5 && !isempty(heap)
-                # The first entries may differ because heaps are not stable
-                @test last(Core.Compiler.heappop!(heap, order)) === last(pop!(mock))
-            else
-                new = (rand(1:i), rand(1:i))
-                Core.Compiler.heappush!(heap, new, order)
-                push!(mock, new)
-                sort!(mock, by=last)
-            end
-        end
-    end
-end
-
-@testset "searchsorted" begin
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 0) === Core.Compiler.UnitRange(1, 0)
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 1) === Core.Compiler.UnitRange(1, 2)
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2) === Core.Compiler.UnitRange(3, 4)
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 4) === Core.Compiler.UnitRange(7, 6)
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2.5; lt=<) === Core.Compiler.UnitRange(5, 4)
-
-    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 0) === Core.Compiler.UnitRange(1, 0)
-    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 1) === Core.Compiler.UnitRange(1, 1)
-    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 2) === Core.Compiler.UnitRange(2, 2)
-    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 4) === Core.Compiler.UnitRange(4, 3)
-
-    @test Core.Compiler.searchsorted([1:10;], 1, by=(x -> x >= 5)) === Core.Compiler.UnitRange(1, 4)
-    @test Core.Compiler.searchsorted([1:10;], 10, by=(x -> x >= 5)) === Core.Compiler.UnitRange(5, 10)
-    @test Core.Compiler.searchsorted([1:5; 1:5; 1:5], 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 6)
-    @test Core.Compiler.searchsorted(fill(1, 15), 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 10)
-
-    for (rg,I) in Any[(Core.Compiler.UnitRange(49, 57),   47:59),
-                      (Core.Compiler.StepRange(1, 2, 17), -1:19)]
-        rg_r = Core.Compiler.reverse(rg)
-        rgv, rgv_r = Core.Compiler.collect(rg), Core.Compiler.collect(rg_r)
-        for i = I
-            @test Core.Compiler.searchsorted(rg,i) === Core.Compiler.searchsorted(rgv,i)
-            @test Core.Compiler.searchsorted(rg_r,i,rev=true) === Core.Compiler.searchsorted(rgv_r,i,rev=true)
-        end
-    end
-end
-
-@testset "basic sort" begin
-    v = [3,1,2]
-    @test v == [3,1,2]
-    @test Core.Compiler.sort!(v) === v == [1,2,3]
-    @test Core.Compiler.sort!(v, by = x -> -x) === v == [3,2,1]
-    @test Core.Compiler.sort!(v, by = x -> -x, < = >) === v == [1,2,3]
-end
-
-@testset "randomized sorting tests" begin
-    for n in [0, 1, 3, 10, 30, 100, 300], k in [0, 30, 2n]
-        v = rand(-1:k, n)
-        for by in [identity, x -> -x, x -> x^2 + .1x], lt in [<, >]
-            @test sort(v; by, lt) == Core.Compiler.sort!(copy(v); by, < = lt)
-        end
-    end
-end
diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl
deleted file mode 100644
index a4b21da523a8e..0000000000000
--- a/test/compiler/effects.jl
+++ /dev/null
@@ -1,1000 +0,0 @@
-using Test
-include("irutils.jl")
-
-# Test that the Core._apply_iterate bail path taints effects
-function f_apply_bail(f)
-    f(()...)
-    return nothing
-end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(f_apply_bail))
-@test !fully_eliminated((Function,)) do f
-    f_apply_bail(f)
-    nothing
-end
-
-# Test that effect modeling for return_type doesn't incorrectly pick
-# up the effects of the function being analyzed
-f_throws() = error()
-@noinline function return_type_unused(x)
-    Core.Compiler.return_type(f_throws, Tuple{})
-    return x+1
-end
-@test Core.Compiler.is_removable_if_unused(Base.infer_effects(return_type_unused, (Int,)))
-@test fully_eliminated((Int,)) do x
-    return_type_unused(x)
-    return nothing
-end
-
-# Test that ambiguous calls don't accidentally get nothrow effect
-ambig_effects_test(a::Int, b) = 1
-ambig_effects_test(a, b::Int) = 1
-ambig_effects_test(a, b) = 1
-@test !Core.Compiler.is_nothrow(Base.infer_effects(ambig_effects_test, (Int, Any)))
-global ambig_unknown_type_global::Any = 1
-@noinline function conditionally_call_ambig(b::Bool, a)
-    if b
-        ambig_effects_test(a, ambig_unknown_type_global)
-    end
-    return 0
-end
-@test !fully_eliminated((Bool,)) do b
-    conditionally_call_ambig(b, 1)
-    return nothing
-end
-
-# Test that a missing methtable identification gets tainted
-# appropriately
-struct FCallback; f::Union{Nothing, Function}; end
-f_invoke_callback(fc) = let f=fc.f; (f !== nothing && f(); nothing); end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(f_invoke_callback, (FCallback,)))
-@test !fully_eliminated((FCallback,)) do fc
-    f_invoke_callback(fc)
-    return nothing
-end
-
-# @assume_effects override
-const ___CONST_DICT___ = Dict{Any,Any}(Symbol(c) => i for (i, c) in enumerate('a':'z'))
-Base.@assume_effects :foldable concrete_eval(
-    f, args...; kwargs...) = f(args...; kwargs...)
-@test fully_eliminated() do
-    concrete_eval(getindex, ___CONST_DICT___, :a)
-end
-
-# :removable override
-Base.@assume_effects :removable removable_call(
-    f, args...; kwargs...) = f(args...; kwargs...)
-@test fully_eliminated() do
-    @noinline removable_call(getindex, ___CONST_DICT___, :a)
-    nothing
-end
-
-# terminates_globally override
-# https://github.com/JuliaLang/julia/issues/41694
-Base.@assume_effects :terminates_globally function issue41694(x)
-    res = 1
-    1 < x < 20 || throw("bad")
-    while x > 1
-        res *= x
-        x -= 1
-    end
-    return res
-end
-@test Core.Compiler.is_foldable(Base.infer_effects(issue41694, (Int,)))
-@test fully_eliminated() do
-    issue41694(2)
-end
-
-Base.@assume_effects :terminates_globally function recur_termination1(x)
-    x == 1 && return 1
-    1 < x < 20 || throw("bad")
-    return x * recur_termination1(x-1)
-end
-@test Core.Compiler.is_foldable(Base.infer_effects(recur_termination1, (Int,)))
-@test fully_eliminated() do
-    recur_termination1(12)
-end
-
-Base.@assume_effects :terminates_globally function recur_termination21(x)
-    x == 1 && return 1
-    1 < x < 20 || throw("bad")
-    return recur_termination22(x)
-end
-recur_termination22(x) = x * recur_termination21(x-1)
-@test Core.Compiler.is_foldable(Base.infer_effects(recur_termination21, (Int,)))
-@test Core.Compiler.is_foldable(Base.infer_effects(recur_termination22, (Int,)))
-@test fully_eliminated() do
-    recur_termination21(12) + recur_termination22(12)
-end
-
-# anonymous function support for `@assume_effects`
-@test fully_eliminated() do
-    map((2,3,4)) do x
-        # this :terminates_locally allows this anonymous function to be constant-folded
-        Base.@assume_effects :terminates_locally
-        res = 1
-        1 < x < 20 || error("bad pow")
-        while x > 1
-            res *= x
-            x -= 1
-        end
-        return res
-    end
-end
-
-# control flow backedge should taint `terminates`
-@test Base.infer_effects((Int,)) do n
-    for i = 1:n; end
-end |> !Core.Compiler.is_terminates
-
-# interprocedural-recursion should taint `terminates` **appropriately**
-function sumrecur(a, x)
-    isempty(a) && return x
-    return sumrecur(Base.tail(a), x + first(a))
-end
-@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int},Int)) |> Core.Compiler.is_terminates
-@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int,Vararg{Int}},Int)) |> !Core.Compiler.is_terminates
-
-# https://github.com/JuliaLang/julia/issues/45781
-@test Base.infer_effects((Float32,)) do a
-    out1 = promote_type(Irrational{:π}, Bool)
-    out2 = sin(a)
-    out1, out2
-end |> Core.Compiler.is_terminates
-
-# refine :consistent-cy effect inference using the return type information
-@test Base.infer_effects((Any,)) do x
-    taint = Ref{Any}(x) # taints :consistent-cy, but will be adjusted
-    throw(taint)
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    if x < 0
-        taint = Ref(x) # taints :consistent-cy, but will be adjusted
-        throw(DomainError(x, taint))
-    end
-    return nothing
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    if x < 0
-        taint = Ref(x) # taints :consistent-cy, but will be adjusted
-        throw(DomainError(x, taint))
-    end
-    return x == 0 ? nothing : x # should `Union` of isbitstype objects nicely
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Symbol,Any)) do s, x
-    if s === :throw
-        taint = Ref{Any}(":throw option given") # taints :consistent-cy, but will be adjusted
-        throw(taint)
-    end
-    return s # should handle `Symbol` nicely
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    return Ref(x)
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    return x < 0 ? Ref(x) : nothing
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    if x < 0
-        throw(DomainError(x, lazy"$x is negative"))
-    end
-    return nothing
-end |> Core.Compiler.is_foldable
-
-# :the_exception expression should taint :consistent-cy
-global inconsistent_var::Int = 42
-function throw_inconsistent() # this is still :consistent
-    throw(inconsistent_var)
-end
-function catch_inconsistent()
-    try
-        throw_inconsistent()
-    catch err
-        err
-    end
-end
-@test !Core.Compiler.is_consistent(Base.infer_effects(catch_inconsistent))
-cache_inconsistent() = catch_inconsistent()
-function compare_inconsistent()
-    a = cache_inconsistent()
-    global inconsistent_var = 0
-    b = cache_inconsistent()
-    global inconsistent_var = 42
-    return a === b
-end
-@test !compare_inconsistent()
-# return type information shouldn't be able to refine it also
-function catch_inconsistent(x::T) where T
-    v = x
-    try
-        throw_inconsistent()
-    catch err
-        v = err::T
-    end
-    return v
-end
-@test !Core.Compiler.is_consistent(Base.infer_effects(catch_inconsistent, (Int,)))
-cache_inconsistent(x) = catch_inconsistent(x)
-function compare_inconsistent(x::T) where T
-    x = one(T)
-    a = cache_inconsistent(x)
-    global inconsistent_var = 0
-    b = cache_inconsistent(x)
-    global inconsistent_var = 42
-    return a === b
-end
-@test !compare_inconsistent(3)
-
-# Effect modeling for Core.compilerbarrier
-@test Base.infer_effects(Base.inferencebarrier, Tuple{Any}) |> Core.Compiler.is_removable_if_unused
-
-# allocation/access of uninitialized fields should taint the :consistent-cy
-struct Maybe{T}
-    x::T
-    Maybe{T}() where T = new{T}()
-    Maybe{T}(x) where T = new{T}(x)
-    Maybe(x::T) where T = new{T}(x)
-end
-Base.getindex(x::Maybe) = x.x
-
-struct SyntacticallyDefined{T}
-    x::T
-end
-
-import Core.Compiler: Const, getfield_notundefined
-for T = (Base.RefValue, Maybe) # both mutable and immutable
-    for name = (Const(1), Const(:x))
-        @test getfield_notundefined(T{String}, name)
-        @test getfield_notundefined(T{Integer}, name)
-        @test getfield_notundefined(T{Union{String,Integer}}, name)
-        @test getfield_notundefined(Union{T{String},T{Integer}}, name)
-        @test !getfield_notundefined(T{Int}, name)
-        @test !getfield_notundefined(T{<:Integer}, name)
-        @test !getfield_notundefined(T{Union{Int32,Int64}}, name)
-        @test !getfield_notundefined(T, name)
-    end
-    # throw doesn't account for undefined behavior
-    for name = (Const(0), Const(2), Const(1.0), Const(:y), Const("x"),
-                Float64, String, Nothing)
-        @test getfield_notundefined(T{String}, name)
-        @test getfield_notundefined(T{Int}, name)
-        @test getfield_notundefined(T{Integer}, name)
-        @test getfield_notundefined(T{<:Integer}, name)
-        @test getfield_notundefined(T{Union{Int32,Int64}}, name)
-        @test getfield_notundefined(T, name)
-    end
-    # should not be too conservative when field isn't known very well but object information is accurate
-    @test getfield_notundefined(T{String}, Int)
-    @test getfield_notundefined(T{String}, Symbol)
-    @test getfield_notundefined(T{Integer}, Int)
-    @test getfield_notundefined(T{Integer}, Symbol)
-    @test !getfield_notundefined(T{Int}, Int)
-    @test !getfield_notundefined(T{Int}, Symbol)
-    @test !getfield_notundefined(T{<:Integer}, Int)
-    @test !getfield_notundefined(T{<:Integer}, Symbol)
-end
-# should be conservative when object information isn't accurate
-@test !getfield_notundefined(Any, Const(1))
-@test !getfield_notundefined(Any, Const(:x))
-# tuples and namedtuples should be okay if not given accurate information
-for TupleType = Any[Tuple{Int,Int,Int}, Tuple{Int,Vararg{Int}}, Tuple{Any}, Tuple,
-                    NamedTuple{(:a, :b), Tuple{Int,Int}}, NamedTuple{(:x,),Tuple{Any}}, NamedTuple],
-    FieldType = Any[Int, Symbol, Any]
-    @test getfield_notundefined(TupleType, FieldType)
-end
-# skip analysis on fields that are known to be defined syntactically
-@test Core.Compiler.getfield_notundefined(SyntacticallyDefined{Float64}, Symbol)
-@test Core.Compiler.getfield_notundefined(Const(Main), Const(:var))
-@test Core.Compiler.getfield_notundefined(Const(Main), Const(42))
-# high-level tests for `getfield_notundefined`
-@test Base.infer_effects() do
-    Maybe{Int}()
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects() do
-    Maybe{Int}()[]
-end |> !Core.Compiler.is_consistent
-@test !fully_eliminated() do
-    Maybe{Int}()[]
-end
-@test Base.infer_effects() do
-    Maybe{String}()
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects() do
-    Maybe{String}()[]
-end |> Core.Compiler.is_consistent
-let f() = Maybe{String}()[]
-    @test Base.return_types() do
-        f() # this call should be concrete evaluated
-    end |> only === Union{}
-end
-@test Base.infer_effects() do
-    Ref{Int}()
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects() do
-    Ref{Int}()[]
-end |> !Core.Compiler.is_consistent
-@test !fully_eliminated() do
-    Ref{Int}()[]
-end
-@test Base.infer_effects() do
-    Ref{String}()[]
-end |> Core.Compiler.is_consistent
-let f() = Ref{String}()[]
-    @test Base.return_types() do
-        f() # this call should be concrete evaluated
-    end |> only === Union{}
-end
-@test Base.infer_effects((SyntacticallyDefined{Float64}, Symbol)) do w, s
-    getfield(w, s)
-end |> Core.Compiler.is_foldable
-
-# effects propagation for `Core.invoke` calls
-# https://github.com/JuliaLang/julia/issues/44763
-global x44763::Int = 0
-increase_x44763!(n) = (global x44763; x44763 += n)
-invoke44763(x) = @invoke increase_x44763!(x)
-@test Base.return_types() do
-    invoke44763(42)
-end |> only === Int
-@test x44763 == 0
-
-# `@inbounds`/`@boundscheck` expression should taint :consistent-cy correctly
-# https://github.com/JuliaLang/julia/issues/48099
-function A1_inbounds()
-    r = 0
-    @inbounds begin
-        @boundscheck r += 1
-    end
-    return r
-end
-@test !Core.Compiler.is_consistent(Base.infer_effects(A1_inbounds))
-
-# Test that purity doesn't try to accidentally run unreachable code due to
-# boundscheck elimination
-function f_boundscheck_elim(n)
-    # Inbounds here assumes that this is only ever called with `n==0`, but of
-    # course the compiler has no way of knowing that, so it must not attempt
-    # to run the `@inbounds getfield(sin, 1)` that `ntuple` generates.
-    ntuple(x->(@inbounds ()[x]), n)
-end
-@test_broken !Core.Compiler.is_consistent(Base.infer_effects(f_boundscheck_elim, (Int,)))
-@test Tuple{} <: only(Base.return_types(f_boundscheck_elim, (Int,)))
-
-# Test that purity modeling doesn't accidentally introduce new world age issues
-f_redefine_me(x) = x+1
-f_call_redefine() = f_redefine_me(0)
-f_mk_opaque() = Base.Experimental.@opaque ()->Base.inferencebarrier(f_call_redefine)()
-const op_capture_world = f_mk_opaque()
-f_redefine_me(x) = x+2
-@test op_capture_world() == 1
-@test f_mk_opaque()() == 2
-
-# backedge insertion for Any-typed, effect-free frame
-const CONST_DICT = let d = Dict()
-    for c in 'A':'z'
-        push!(d, c => Int(c))
-    end
-    d
-end
-Base.@assume_effects :foldable getcharid(c) = CONST_DICT[c]
-@noinline callf(f, args...) = f(args...)
-function entry_to_be_invalidated(c)
-    return callf(getcharid, c)
-end
-@test Base.infer_effects((Char,)) do x
-    entry_to_be_invalidated(x)
-end |> Core.Compiler.is_foldable
-@test fully_eliminated(; retval=97) do
-    entry_to_be_invalidated('a')
-end
-getcharid(c) = CONST_DICT[c] # now this is not eligible for concrete evaluation
-@test Base.infer_effects((Char,)) do x
-    entry_to_be_invalidated(x)
-end |> !Core.Compiler.is_foldable
-@test !fully_eliminated() do
-    entry_to_be_invalidated('a')
-end
-
-@test !Core.Compiler.builtin_nothrow(Core.Compiler.fallback_lattice, Core.get_binding_type, Any[Rational{Int}, Core.Const(:foo)], Any)
-
-# Nothrow for assignment to globals
-global glob_assign_int::Int = 0
-f_glob_assign_int() = global glob_assign_int += 1
-let effects = Base.infer_effects(f_glob_assign_int, ())
-    @test !Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-# Nothrow for setglobal!
-global SETGLOBAL!_NOTHROW::Int = 0
-let effects = Base.infer_effects() do
-        setglobal!(@__MODULE__, :SETGLOBAL!_NOTHROW, 42)
-    end
-    @test Core.Compiler.is_nothrow(effects)
-end
-
-# we should taint `nothrow` if the binding doesn't exist and isn't fixed yet,
-# as the cached effects can be easily wrong otherwise
-# since the inference currently doesn't track "world-age" of global variables
-@eval global_assignment_undefinedyet() = $(GlobalRef(@__MODULE__, :UNDEFINEDYET)) = 42
-setglobal!_nothrow_undefinedyet() = setglobal!(@__MODULE__, :UNDEFINEDYET, 42)
-let effects = Base.infer_effects() do
-        global_assignment_undefinedyet()
-    end
-    @test !Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        setglobal!_nothrow_undefinedyet()
-    end
-    @test !Core.Compiler.is_nothrow(effects)
-end
-global UNDEFINEDYET::String = "0"
-let effects = Base.infer_effects() do
-        global_assignment_undefinedyet()
-    end
-    @test !Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        setglobal!_nothrow_undefinedyet()
-    end
-    @test !Core.Compiler.is_nothrow(effects)
-end
-@test_throws ErrorException setglobal!_nothrow_undefinedyet()
-
-# Nothrow for setfield!
-mutable struct SetfieldNothrow
-    x::Int
-end
-f_setfield_nothrow() = SetfieldNothrow(0).x = 1
-let effects = Base.infer_effects(f_setfield_nothrow, ())
-    # Technically effect free even though we use the heap, since the
-    # object doesn't escape, but the compiler doesn't know that.
-    #@test Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-
-# even if 2-arg `getfield` may throw, it should be still `:consistent`
-@test Core.Compiler.is_consistent(Base.infer_effects(getfield, (NTuple{5, Float64}, Int)))
-
-# SimpleVector allocation is consistent
-@test Core.Compiler.is_consistent(Base.infer_effects(Core.svec))
-@test Base.infer_effects() do
-    Core.svec(nothing, 1, "foo")
-end |> Core.Compiler.is_consistent
-
-# fastmath operations are inconsistent
-@test !Core.Compiler.is_consistent(Base.infer_effects((a,b)->@fastmath(a+b), (Float64,Float64)))
-
-# issue 46122: @assume_effects for @ccall
-@test Base.infer_effects((Vector{Int},)) do a
-    Base.@assume_effects :effect_free @ccall jl_array_ptr(a::Any)::Ptr{Int}
-end |> Core.Compiler.is_effect_free
-
-# `getfield_effects` handles access to union object nicely
-let 𝕃 = Core.Compiler.fallback_lattice
-    @test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(𝕃, Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Some{String}, Core.Const(:value)]), String))
-    @test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(𝕃, Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Some{Symbol}, Core.Const(:value)]), Symbol))
-    @test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(𝕃, Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Union{Some{Symbol},Some{String}}, Core.Const(:value)]), Union{Symbol,String}))
-end
-@test Base.infer_effects((Bool,)) do c
-    obj = c ? Some{String}("foo") : Some{Symbol}(:bar)
-    return getfield(obj, :value)
-end |> Core.Compiler.is_consistent
-
-# getfield is nothrow when bounds checking is turned off
-@test Base.infer_effects((Tuple{Int,Int},Int)) do t, i
-    getfield(t, i, false)
-end |> Core.Compiler.is_nothrow
-@test Base.infer_effects((Tuple{Int,Int},Symbol)) do t, i
-    getfield(t, i, false)
-end |> Core.Compiler.is_nothrow
-@test Base.infer_effects((Tuple{Int,Int},String)) do t, i
-    getfield(t, i, false) # invalid name type
-end |> !Core.Compiler.is_nothrow
-
-@test Core.Compiler.is_consistent(Base.infer_effects(setindex!, (Base.RefValue{Int}, Int)))
-
-# :inaccessiblememonly effect
-const global constant_global::Int = 42
-const global ConstantType = Ref
-global nonconstant_global::Int = 42
-const global constant_mutable_global = Ref(0)
-const global constant_global_nonisbits = Some(:foo)
-@test Base.infer_effects() do
-    constant_global
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    ConstantType
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    ConstantType{Any}()
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    constant_global_nonisbits
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :constant_global)
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    nonconstant_global
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :nonconstant_global)
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Symbol,)) do name
-    getglobal(@__MODULE__, name)
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Int,)) do v
-    global nonconstant_global = v
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Int,)) do v
-    setglobal!(@__MODULE__, :nonconstant_global, v)
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Int,)) do v
-    constant_mutable_global[] = v
-end |> !Core.Compiler.is_inaccessiblememonly
-module ConsistentModule
-const global constant_global::Int = 42
-const global ConstantType = Ref
-end # module
-@test Base.infer_effects() do
-    ConsistentModule.constant_global
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    ConsistentModule.ConstantType
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    ConsistentModule.ConstantType{Any}()
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :ConsistentModule).constant_global
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :ConsistentModule).ConstantType
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :ConsistentModule).ConstantType{Any}()
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Module,)) do M
-    M.constant_global
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Module,)) do M
-    M.ConstantType
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do M
-    M.ConstantType{Any}()
-end |> !Core.Compiler.is_inaccessiblememonly
-
-# the `:inaccessiblememonly` helper effect allows us to prove `:consistent`-cy of frames
-# including `getfield` / `isdefined` accessing to local mutable object
-
-mutable struct SafeRef{T}
-    x::T
-end
-Base.getindex(x::SafeRef) = x.x;
-Base.setindex!(x::SafeRef, v) = x.x = v;
-Base.isassigned(x::SafeRef) = true;
-
-function mutable_consistent(s)
-    SafeRef(s)[]
-end
-@test Core.Compiler.is_inaccessiblememonly(Base.infer_effects(mutable_consistent, (Symbol,)))
-@test fully_eliminated(; retval=:foo) do
-    mutable_consistent(:foo)
-end
-
-function nested_mutable_consistent(s)
-    SafeRef(SafeRef(SafeRef(SafeRef(SafeRef(s)))))[][][][][]
-end
-@test Core.Compiler.is_inaccessiblememonly(Base.infer_effects(nested_mutable_consistent, (Symbol,)))
-@test fully_eliminated(; retval=:foo) do
-    nested_mutable_consistent(:foo)
-end
-
-const consistent_global = Some(:foo)
-@test Base.infer_effects() do
-    consistent_global.value
-end |> Core.Compiler.is_consistent
-const inconsistent_global = SafeRef(:foo)
-@test Base.infer_effects() do
-    inconsistent_global[]
-end |> !Core.Compiler.is_consistent
-const inconsistent_condition_ref = Ref{Bool}(false)
-@test Base.infer_effects() do
-    if inconsistent_condition_ref[]
-        return 0
-    else
-        return 1
-    end
-end |> !Core.Compiler.is_consistent
-
-# should handle va-method properly
-callgetfield1(xs...) = getfield(getfield(xs, 1), 1)
-@test !Core.Compiler.is_inaccessiblememonly(Base.infer_effects(callgetfield1, (Base.RefValue{Symbol},)))
-const GLOBAL_XS = Ref(:julia)
-global_getfield() = callgetfield1(GLOBAL_XS)
-@test let
-    Base.Experimental.@force_compile
-    global_getfield()
-end === :julia
-GLOBAL_XS[] = :julia2
-@test let
-    Base.Experimental.@force_compile
-    global_getfield()
-end === :julia2
-
-# the `:inaccessiblememonly` helper effect allows us to prove `:effect_free`-ness of frames
-# including `setfield!` modifying local mutable object
-
-const global_ref = Ref{Any}()
-global const global_bit::Int = 42
-makeref() = Ref{Any}()
-setref!(ref, @nospecialize v) = ref[] = v
-
-@noinline function removable_if_unused1()
-    x = makeref()
-    setref!(x, 42)
-    x
-end
-@noinline function removable_if_unused2()
-    x = makeref()
-    setref!(x, global_bit)
-    x
-end
-for f = Any[removable_if_unused1, removable_if_unused2]
-    effects = Base.infer_effects(f)
-    @test Core.Compiler.is_inaccessiblememonly(effects)
-    @test Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_removable_if_unused(effects)
-    @test @eval fully_eliminated() do
-        $f()
-        nothing
-    end
-end
-@noinline function removable_if_unused3(v)
-    x = makeref()
-    setref!(x, v)
-    x
-end
-let effects = Base.infer_effects(removable_if_unused3, (Int,))
-    @test Core.Compiler.is_inaccessiblememonly(effects)
-    @test Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_removable_if_unused(effects)
-end
-@test fully_eliminated((Int,)) do v
-    removable_if_unused3(v)
-    nothing
-end
-
-@noinline function unremovable_if_unused1!(x)
-    setref!(x, 42)
-end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (typeof(global_ref),)))
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (Any,)))
-
-@noinline function unremovable_if_unused2!()
-    setref!(global_ref, 42)
-end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused2!))
-
-@noinline function unremovable_if_unused3!()
-    getfield(@__MODULE__, :global_ref)[] = nothing
-end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused3!))
-
-# array ops
-# =========
-
-# allocation
-# ----------
-
-# low-level constructor
-@noinline construct_array(@nospecialize(T), args...) = Array{T}(undef, args...)
-# should eliminate safe but dead allocations
-let good_dims = @static Int === Int64 ? (1:10) : (1:8)
-    Ns = @static Int === Int64 ? (1:10) : (1:8)
-    for dim = good_dims, N = Ns
-        dims = ntuple(i->dim, N)
-        @test @eval Base.infer_effects() do
-            $construct_array(Int, $(dims...))
-        end |> Core.Compiler.is_removable_if_unused
-        @test @eval fully_eliminated() do
-            $construct_array(Int, $(dims...))
-            nothing
-        end
-    end
-end
-# should analyze throwness correctly
-let bad_dims = [-1, typemax(Int)]
-    for dim in bad_dims, N in 1:10
-        dims = ntuple(i->dim, N)
-        @test @eval Base.infer_effects() do
-            $construct_array(Int, $(dims...))
-        end |> !Core.Compiler.is_removable_if_unused
-        @test @eval !fully_eliminated() do
-            $construct_array(Int, $(dims...))
-            nothing
-        end
-        @test_throws "invalid Array" @eval $construct_array(Int, $(dims...))
-    end
-end
-
-# high-level interfaces
-# getindex
-for safesig = Any[
-        (Type{Int},)
-        (Type{Int}, Int)
-        (Type{Int}, Int, Int)
-        (Type{Number},)
-        (Type{Number}, Number)
-        (Type{Number}, Int)
-        (Type{Any},)
-        (Type{Any}, Any,)
-        (Type{Any}, Any, Any)
-    ]
-    let effects = Base.infer_effects(getindex, safesig)
-        @test Core.Compiler.is_consistent_if_notreturned(effects)
-        @test Core.Compiler.is_removable_if_unused(effects)
-    end
-end
-for unsafesig = Any[
-        (Type{Int}, String)
-        (Type{Int}, Any)
-        (Type{Number}, AbstractString)
-        (Type{Number}, Any)
-    ]
-    let effects = Base.infer_effects(getindex, unsafesig)
-        @test !Core.Compiler.is_nothrow(effects)
-    end
-end
-# vect
-for safesig = Any[
-        ()
-        (Int,)
-        (Int, Int)
-    ]
-    let effects = Base.infer_effects(Base.vect, safesig)
-        @test Core.Compiler.is_consistent_if_notreturned(effects)
-        @test Core.Compiler.is_removable_if_unused(effects)
-    end
-end
-
-# arrayref
-# --------
-
-for tt = Any[(Bool,Vector{Any},Int),
-             (Bool,Matrix{Any},Int,Int)]
-    @testset let effects = Base.infer_effects(Base.arrayref, tt)
-        @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
-        @test Core.Compiler.is_effect_free(effects)
-        @test !Core.Compiler.is_nothrow(effects)
-        @test Core.Compiler.is_terminates(effects)
-    end
-end
-
-# arrayset
-# --------
-
-for tt = Any[(Bool,Vector{Any},Any,Int),
-             (Bool,Matrix{Any},Any,Int,Int)]
-    @testset let effects = Base.infer_effects(Base.arrayset, tt)
-        @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
-        @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects)
-        @test !Core.Compiler.is_nothrow(effects)
-        @test Core.Compiler.is_terminates(effects)
-    end
-end
-# nothrow for arrayset
-@test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i
-    Base.arrayset(true, a, v, i)
-end |> !Core.Compiler.is_nothrow
-@test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i
-    a[i] = v # may throw
-end |> !Core.Compiler.is_nothrow
-# when bounds checking is turned off, it should be safe
-@test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i
-    Base.arrayset(false, a, v, i)
-end |> Core.Compiler.is_nothrow
-@test Base.infer_effects((Vector{Number},Number,Int)) do a, v, i
-    Base.arrayset(false, a, v, i)
-end |> Core.Compiler.is_nothrow
-
-# arraysize
-# ---------
-
-let effects = Base.infer_effects(Base.arraysize, (Array,Int))
-    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
-    @test Core.Compiler.is_effect_free(effects)
-    @test !Core.Compiler.is_nothrow(effects)
-    @test Core.Compiler.is_terminates(effects)
-end
-# Test that arraysize has proper effect modeling
-@test fully_eliminated(M->(size(M, 2); nothing), (Matrix{Float64},))
-
-# arraylen
-# --------
-
-let effects = Base.infer_effects(Base.arraylen, (Vector{Any},))
-    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
-    @test Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_nothrow(effects)
-    @test Core.Compiler.is_terminates(effects)
-end
-
-# resize
-# ------
-
-for op = Any[
-        Base._growbeg!,
-        Base._growend!,
-        Base._deletebeg!,
-        Base._deleteend!,
-    ]
-    let effects = Base.infer_effects(op, (Vector, Int))
-        @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects)
-        @test Core.Compiler.is_terminates(effects)
-        @test !Core.Compiler.is_nothrow(effects)
-    end
-end
-
-# end to end
-# ----------
-
-function simple_vec_ops(T, op!, op, xs...)
-    a = T[]
-    op!(a, xs...)
-    return op(a)
-end
-for T = Any[Int,Any], op! = Any[push!,pushfirst!], op = Any[length,size],
-    xs = Any[(Int,), (Int,Int,)]
-    let effects = Base.infer_effects(simple_vec_ops, (Type{T},typeof(op!),typeof(op),xs...))
-        @test Core.Compiler.is_foldable(effects)
-    end
-end
-
-# Test that builtin_effects handles vararg correctly
-@test !Core.Compiler.is_nothrow(Core.Compiler.builtin_effects(Core.Compiler.fallback_lattice, Core.isdefined,
-    Core.Compiler.ArgInfo(nothing, Any[Core.Compiler.Const(Core.isdefined), String, Vararg{Any}]), Bool))
-
-# Test that :new can be eliminated even if an sparam is unknown
-struct SparamUnused{T}
-    x
-    SparamUnused(x::T) where {T} = new{T}(x)
-end
-mksparamunused(x) = (SparamUnused(x); nothing)
-let src = code_typed1(mksparamunused, (Any,))
-    @test count(isnew, src.code) == 0
-end
-
-struct WrapperOneField{T}
-    x::T
-end
-
-# Effects for getfield of type instance
-@test Base.infer_effects(Tuple{Nothing}) do x
-    WrapperOneField{typeof(x)}.instance
-end |> Core.Compiler.is_foldable_nothrow
-@test Base.infer_effects(Tuple{WrapperOneField{Float64}, Symbol}) do w, s
-    getfield(w, s)
-end |> Core.Compiler.is_foldable
-@test Core.Compiler.getfield_notundefined(WrapperOneField{Float64}, Symbol)
-@test Base.infer_effects(Tuple{WrapperOneField{Symbol}, Symbol}) do w, s
-    getfield(w, s)
-end |> Core.Compiler.is_foldable
-
-# Flow-sensitive consistenct for _typevar
-@test Base.infer_effects() do
-    return WrapperOneField == (WrapperOneField{T} where T)
-end |> Core.Compiler.is_foldable_nothrow
-
-# Test that dead `@inbounds` does not taint consistency
-# https://github.com/JuliaLang/julia/issues/48243
-@test Base.infer_effects(Tuple{Int64}) do i
-    false && @inbounds (1,2,3)[i]
-    return 1
-end |> Core.Compiler.is_foldable_nothrow
-
-@test Base.infer_effects(Tuple{Int64}) do i
-    @inbounds (1,2,3)[i]
-end |> !Core.Compiler.is_consistent
-
-@test Base.infer_effects(Tuple{Tuple{Int64}}) do x
-    @inbounds x[1]
-end |> Core.Compiler.is_foldable_nothrow
-
-# Test that :new of non-concrete, but otherwise known type
-# does not taint consistency.
-@eval struct ImmutRef{T}
-    x::T
-    ImmutRef(x) = $(Expr(:new, :(ImmutRef{typeof(x)}), :x))
-end
-@test Core.Compiler.is_foldable(Base.infer_effects(ImmutRef, Tuple{Any}))
-
-@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(typejoin, ()))
-
-# nothrow-ness of subtyping operations
-# https://github.com/JuliaLang/julia/pull/48566
-@test !Core.Compiler.is_nothrow(Base.infer_effects((A,B)->A<:B, (Any,Any)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects((A,B)->A>:B, (Any,Any)))
-
-# GotoIfNot should properly mark itself as throwing when given a non-Bool
-# https://github.com/JuliaLang/julia/pull/48583
-gotoifnot_throw_check_48583(x) = x ? x : 0
-@test !Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Missing,)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Any,)))
-@test Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Bool,)))
-
-# unknown :static_parameter should taint :nothrow
-# https://github.com/JuliaLang/julia/issues/46771
-unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = (T; nothing)
-unknown_sparam_nothrow1(x::Ref{T}) where T = (T; nothing)
-unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = (T; nothing)
-@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{Int},)))
-@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{<:Integer},)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type,)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Nothing,)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Union{Type{Int},Nothing},)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Any,)))
-@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow1, (Ref,)))
-@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,)))
-
-# purely abstract recursion should not taint :terminates
-# https://github.com/JuliaLang/julia/issues/48983
-abstractly_recursive1() = abstractly_recursive2()
-abstractly_recursive2() = (Core.Compiler._return_type(abstractly_recursive1, Tuple{}); 1)
-abstractly_recursive3() = abstractly_recursive2()
-@test Core.Compiler.is_terminates(Base.infer_effects(abstractly_recursive3, ()))
-actually_recursive1(x) = actually_recursive2(x)
-actually_recursive2(x) = (x <= 0) ? 1 : actually_recursive1(x - 1)
-actually_recursive3(x) = actually_recursive2(x)
-@test !Core.Compiler.is_terminates(Base.infer_effects(actually_recursive3, (Int,)))
-
-# `isdefined` effects
-struct MaybeSome{T}
-    value::T
-    MaybeSome(x::T) where T = new{T}(x)
-    MaybeSome{T}(x::T) where T = new{T}(x)
-    MaybeSome{T}() where T = new{T}()
-end
-const undefined_ref = Ref{String}()
-const defined_ref = Ref{String}("julia")
-const undefined_some = MaybeSome{String}()
-const defined_some = MaybeSome{String}("julia")
-let effects = Base.infer_effects() do
-        isdefined(undefined_ref, :x)
-    end
-    @test !Core.Compiler.is_consistent(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        isdefined(defined_ref, :x)
-    end
-    @test Core.Compiler.is_consistent(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        isdefined(undefined_some, :value)
-    end
-    @test Core.Compiler.is_consistent(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        isdefined(defined_some, :value)
-    end
-    @test Core.Compiler.is_consistent(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-# high-level interface test
-isassigned_effects(s) = isassigned(Ref(s))
-@test Core.Compiler.is_consistent(Base.infer_effects(isassigned_effects, (Symbol,)))
-@test fully_eliminated(; retval=true) do
-    isassigned_effects(:foo)
-end
-
-# Effects of Base.hasfield (#50198)
-hf50198(s) = hasfield(typeof((;x=1, y=2)), s)
-f50198() = (hf50198(Ref(:x)[]); nothing)
-@test fully_eliminated(f50198)
-
-# Effects properly applied to flags by irinterp (#50311)
-f50311(x, s) = Symbol(s)
-g50311(x) = Val{f50311((1.0, x), "foo")}()
-@test fully_eliminated(g50311, Tuple{Float64})
diff --git a/test/compiler/interpreter_exec.jl b/test/compiler/interpreter_exec.jl
deleted file mode 100644
index a310a2740131d..0000000000000
--- a/test/compiler/interpreter_exec.jl
+++ /dev/null
@@ -1,122 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# tests that interpreter matches codegen
-using Test
-using Core: GotoIfNot, ReturnNode
-
-# test that interpreter correctly handles PhiNodes (#29262)
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
-    src.code = Any[
-        # block 1
-        QuoteNode(:a),
-        QuoteNode(:b),
-        GlobalRef(@__MODULE__, :test29262),
-        GotoIfNot(Core.SSAValue(3), 6),
-        # block 2
-        Core.PhiNode(Int32[4], Any[Core.SSAValue(1)]),
-        Core.PhiNode(Int32[4, 5], Any[Core.SSAValue(2), Core.SSAValue(5)]),
-        ReturnNode(Core.SSAValue(6)),
-    ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = Any[ Any for _ = 1:nstmts ]
-    src.codelocs = fill(Int32(1), nstmts)
-    src.inferred = true
-    Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
-    global test29262 = true
-    @test :a === @eval $m
-    global test29262 = false
-    @test :b === @eval $m
-end
-
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
-    src.code = Any[
-        # block 1
-        QuoteNode(:a),
-        QuoteNode(:b),
-        QuoteNode(:c),
-        GlobalRef(@__MODULE__, :test29262),
-        # block 2
-        Core.PhiNode(Int32[4, 16], Any[false, true]), # false, true
-        Core.PhiNode(Int32[4, 16], Any[Core.SSAValue(1), Core.SSAValue(2)]), # :a, :b
-        Core.PhiNode(Int32[4, 16], Any[Core.SSAValue(3), Core.SSAValue(6)]), # :c, :a
-        Core.PhiNode(Int32[16], Any[Core.SSAValue(7)]), # NULL, :c
-        # block 3
-        Core.PhiNode(Int32[], Any[]), # NULL, NULL
-        Core.PhiNode(Int32[17, 8], Any[true, Core.SSAValue(4)]), # test29262, test29262, [true]
-        Core.PhiNode(Int32[17], Vector{Any}(undef, 1)), # NULL, NULL
-        Core.PhiNode(Int32[8], Vector{Any}(undef, 1)), # NULL, NULL
-        Core.PhiNode(Int32[], Any[]), # NULL, NULL
-        Core.PhiNode(Int32[17, 8], Any[Core.SSAValue(2), Core.SSAValue(8)]), # NULL, :c, [:b]
-        Core.PhiNode(Int32[], Any[]), # NULL, NULL
-        GotoIfNot(Core.SSAValue(5), 5),
-        # block 4
-        GotoIfNot(Core.SSAValue(10), 9),
-        # block 5
-        Expr(:call, GlobalRef(Core, :tuple), Core.SSAValue(6), Core.SSAValue(7), Core.SSAValue(8), Core.SSAValue(14)),
-        ReturnNode(Core.SSAValue(18)),
-    ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = Any[ Any for _ = 1:nstmts ]
-    src.codelocs = fill(Int32(1), nstmts)
-    src.inferred = true
-    Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
-    global test29262 = true
-    @test (:b, :a, :c, :c) === @eval $m
-    global test29262 = false
-    @test (:b, :a, :c, :b) === @eval $m
-end
-
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
-    src.code = Any[
-        # block 1
-        QuoteNode(:a),
-        QuoteNode(:b),
-        GlobalRef(@__MODULE__, :test29262),
-        # block 2
-        Expr(:enter, 11),
-        # block 3
-        Core.UpsilonNode(),
-        Core.UpsilonNode(),
-        Core.UpsilonNode(Core.SSAValue(2)),
-        GotoIfNot(Core.SSAValue(3), 10),
-        # block 4
-        Core.UpsilonNode(Core.SSAValue(1)),
-        # block 5
-        Expr(:throw_undef_if_not, :expected, false),
-        # block 6
-        Core.PhiCNode(Any[Core.SSAValue(5), Core.SSAValue(7), Core.SSAValue(9)]), # NULL, :a, :b
-        Core.PhiCNode(Any[Core.SSAValue(6)]), # NULL
-        Expr(:leave, 1),
-        # block 7
-        ReturnNode(Core.SSAValue(11)),
-    ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = Any[ Any for _ = 1:nstmts ]
-    src.codelocs = fill(Int32(1), nstmts)
-    src.inferred = true
-    Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
-    global test29262 = true
-    @test :a === @eval $m
-    global test29262 = false
-    @test :b === @eval $m
-end
-
-# https://github.com/JuliaLang/julia/issues/47065
-# `Core.Compiler.sort!` should be able to handle a big list
-let n = 1000
-    ex = :(return 1)
-    for _ in 1:n
-        ex = :(rand() < .1 && $(ex))
-    end
-    @eval global function f_1000_blocks()
-        $ex
-        return 0
-    end
-end
-@test f_1000_blocks() == 0
diff --git a/test/compiler/invalidation.jl b/test/compiler/invalidation.jl
deleted file mode 100644
index 20ab2483aa378..0000000000000
--- a/test/compiler/invalidation.jl
+++ /dev/null
@@ -1,258 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# setup
-# -----
-
-include("irutils.jl")
-
-using Test
-const CC = Core.Compiler
-import Core: MethodInstance, CodeInstance
-import .CC: WorldRange, WorldView
-
-struct InvalidationTesterCache
-    dict::IdDict{MethodInstance,CodeInstance}
-end
-InvalidationTesterCache() = InvalidationTesterCache(IdDict{MethodInstance,CodeInstance}())
-
-const INVALIDATION_TESTER_CACHE = InvalidationTesterCache()
-
-struct InvalidationTester <: CC.AbstractInterpreter
-    callback!
-    world::UInt
-    inf_params::CC.InferenceParams
-    opt_params::CC.OptimizationParams
-    inf_cache::Vector{CC.InferenceResult}
-    code_cache::InvalidationTesterCache
-    function InvalidationTester(callback! = nothing;
-                                world::UInt = Base.get_world_counter(),
-                                inf_params::CC.InferenceParams = CC.InferenceParams(),
-                                opt_params::CC.OptimizationParams = CC.OptimizationParams(),
-                                inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[],
-                                code_cache::InvalidationTesterCache = INVALIDATION_TESTER_CACHE)
-        if callback! === nothing
-            callback! = function (replaced::MethodInstance)
-                # Core.println(replaced) # debug
-                delete!(code_cache.dict, replaced)
-            end
-        end
-        return new(callback!, world, inf_params, opt_params, inf_cache, code_cache)
-    end
-end
-
-struct InvalidationTesterCacheView
-    interp::InvalidationTester
-    dict::IdDict{MethodInstance,CodeInstance}
-end
-
-CC.InferenceParams(interp::InvalidationTester) = interp.inf_params
-CC.OptimizationParams(interp::InvalidationTester) = interp.opt_params
-CC.get_world_counter(interp::InvalidationTester) = interp.world
-CC.get_inference_cache(interp::InvalidationTester) = interp.inf_cache
-CC.code_cache(interp::InvalidationTester) = WorldView(InvalidationTesterCacheView(interp, interp.code_cache.dict), WorldRange(interp.world))
-CC.get(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
-CC.getindex(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
-CC.haskey(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
-function CC.setindex!(wvc::WorldView{InvalidationTesterCacheView}, ci::CodeInstance, mi::MethodInstance)
-    add_callback!(wvc.cache.interp.callback!, mi)
-    setindex!(wvc.cache.dict, ci, mi)
-end
-
-function add_callback!(@nospecialize(callback!), mi::MethodInstance)
-    callback = function (replaced::MethodInstance, max_world,
-                         seen::Base.IdSet{MethodInstance} = Base.IdSet{MethodInstance}())
-        push!(seen, replaced)
-        callback!(replaced)
-        if isdefined(replaced, :backedges)
-            for item in replaced.backedges
-                isa(item, MethodInstance) || continue # might be `Type` object representing an `invoke` signature
-                mi = item
-                mi in seen && continue # otherwise fail into an infinite loop
-                var"#self#"(mi, max_world, seen)
-            end
-        end
-        return nothing
-    end
-
-    if !isdefined(mi, :callbacks)
-        mi.callbacks = Any[callback]
-    else
-        callbacks = mi.callbacks::Vector{Any}
-        if !any(@nospecialize(cb)->cb===callback, callbacks)
-            push!(callbacks, callback)
-        end
-    end
-    return nothing
-end
-
-
-# basic functionality test
-# ------------------------
-
-basic_callee(x) = x
-basic_caller(x) = basic_callee(x)
-
-# run inference and check that cache exist
-@test Base.return_types((Float64,); interp=InvalidationTester()) do x
-    basic_caller(x)
-end |> only === Float64
-@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_callee
-end
-@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_caller
-end
-
-# this redefinition below should invalidate the cache
-basic_callee(x) = x, x
-@test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_callee
-end
-@test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_caller
-end
-
-# re-run inference and check the result is updated (and new cache exists)
-@test Base.return_types((Float64,); interp=InvalidationTester()) do x
-    basic_caller(x)
-end |> only === Tuple{Float64,Float64}
-@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_callee
-end
-@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_caller
-end
-
-# backedge optimization
-# ---------------------
-
-const GLOBAL_BUFFER = IOBuffer()
-
-# test backedge optimization when the callee's type and effects information are maximized
-begin take!(GLOBAL_BUFFER)
-
-    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
-    pr48932_caller(x) = pr48932_callee(Base.inferencebarrier(x))
-
-    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
-    let rt = only(Base.return_types(pr48932_callee, (Any,)))
-        @test rt === Any
-        effects = Base.infer_effects(pr48932_callee, (Any,))
-        @test Core.Compiler.Effects(effects; noinbounds=false) == Core.Compiler.Effects()
-    end
-
-    # run inference on both `pr48932_caller` and `pr48932_callee`
-    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
-            @inline pr48932_caller(x)
-        end |> only
-        @test rt === Any
-        @test any(iscall((src, pr48932_callee)), src.code)
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller
-    end
-    @test 42 == pr48932_caller(42)
-    @test "42" == String(take!(GLOBAL_BUFFER))
-
-    # test that we didn't add the backedge from `pr48932_callee` to `pr48932_caller`:
-    # this redefinition below should invalidate the cache of `pr48932_callee` but not that of `pr48932_caller`
-    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); nothing)
-    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller
-    end
-    @test isnothing(pr48932_caller(42))
-    @test "42" == String(take!(GLOBAL_BUFFER))
-end
-
-# we can avoid adding backedge even if the callee's return type is not the top
-# when the return value is not used within the caller
-begin take!(GLOBAL_BUFFER)
-
-    pr48932_callee_inferrable(x) = (print(GLOBAL_BUFFER, x); nothing)
-    pr48932_caller_unuse(x) = (pr48932_callee_inferrable(Base.inferencebarrier(x)); nothing)
-
-    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
-    let rt = only(Base.return_types(pr48932_callee_inferrable, (Any,)))
-        @test rt === Nothing
-        effects = Base.infer_effects(pr48932_callee_inferrable, (Any,))
-        @test Core.Compiler.Effects(effects; noinbounds=false) == Core.Compiler.Effects()
-    end
-
-    # run inference on both `pr48932_caller` and `pr48932_callee`:
-    # we don't need to add backedge to `pr48932_callee` from `pr48932_caller`
-    # since the inference result of `pr48932_callee` is maximized and it's not inlined
-    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
-            @inline pr48932_caller_unuse(x)
-        end |> only
-        @test rt === Nothing
-        @test any(iscall((src, pr48932_callee_inferrable)), src.code)
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee_inferrable
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller_unuse
-    end
-    @test isnothing(pr48932_caller_unuse(42))
-    @test "42" == String(take!(GLOBAL_BUFFER))
-
-    # test that we didn't add the backedge from `pr48932_callee_inferrable` to `pr48932_caller_unuse`:
-    # this redefinition below should invalidate the cache of `pr48932_callee_inferrable` but not that of `pr48932_caller_unuse`
-    pr48932_callee_inferrable(x) = (print(GLOBAL_BUFFER, "foo"); x)
-    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee_inferrable
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller_unuse
-    end
-    @test isnothing(pr48932_caller_unuse(42))
-    @test "foo" == String(take!(GLOBAL_BUFFER))
-end
-
-# we need to add backedge when the callee is inlined
-begin take!(GLOBAL_BUFFER)
-
-    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
-    pr48932_caller_inlined(x) = pr48932_callee_inlined(Base.inferencebarrier(x))
-
-    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
-    let rt = only(Base.return_types(pr48932_callee_inlined, (Any,)))
-        @test rt === Any
-        effects = Base.infer_effects(pr48932_callee_inlined, (Any,))
-        @test Core.Compiler.Effects(effects; noinbounds=false) == Core.Compiler.Effects()
-    end
-
-    # run inference on `pr48932_caller_inlined` and `pr48932_callee_inlined`
-    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
-            @inline pr48932_caller_inlined(x)
-        end |> only
-        @test rt === Any
-        @test any(isinvoke(:pr48932_callee_inlined), src.code)
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee_inlined
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller_inlined
-    end
-    @test 42 == pr48932_caller_inlined(42)
-    @test "42" == String(take!(GLOBAL_BUFFER))
-
-    # test that we added the backedge from `pr48932_callee_inlined` to `pr48932_caller_inlined`:
-    # this redefinition below should invalidate the cache of `pr48932_callee_inlined` but not that of `pr48932_caller_inlined`
-    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); nothing)
-    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee_inlined
-    end
-    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller_inlined
-    end
-    @test isnothing(pr48932_caller_inlined(42))
-    @test "42" == String(take!(GLOBAL_BUFFER))
-end
diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl
deleted file mode 100644
index 00de9b2472de4..0000000000000
--- a/test/compiler/irutils.jl
+++ /dev/null
@@ -1,57 +0,0 @@
-using Core: CodeInfo, ReturnNode, MethodInstance
-using Core.Compiler: IRCode, IncrementalCompact, singleton_type, VarState
-using Base.Meta: isexpr
-using InteractiveUtils: gen_call_with_extracted_types_and_kwargs
-
-argextype(@nospecialize args...) = Core.Compiler.argextype(args..., VarState[])
-code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::CodeInfo
-macro code_typed1(ex0...)
-    return gen_call_with_extracted_types_and_kwargs(__module__, :code_typed1, ex0)
-end
-get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
-macro get_code(ex0...)
-    return gen_call_with_extracted_types_and_kwargs(__module__, :get_code, ex0)
-end
-
-# check if `x` is a statement with a given `head`
-isnew(@nospecialize x) = isexpr(x, :new)
-issplatnew(@nospecialize x) = isexpr(x, :splatnew)
-isreturn(@nospecialize x) = isa(x, ReturnNode)
-
-# check if `x` is a dynamic call of a given function
-iscall(y) = @nospecialize(x) -> iscall(y, x)
-function iscall((src, f)::Tuple{IR,Base.Callable}, @nospecialize(x)) where IR<:Union{CodeInfo,IRCode,IncrementalCompact}
-    return iscall(x) do @nospecialize x
-        singleton_type(argextype(x, src)) === f
-    end
-end
-function iscall(pred::Base.Callable, @nospecialize(x))
-    if isexpr(x, :(=))
-        x = x.args[2]
-    end
-    return isexpr(x, :call) && pred(x.args[1])
-end
-
-# check if `x` is a statically-resolved call of a function whose name is `sym`
-isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
-isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
-isinvoke(pred::Function, @nospecialize(x)) = isexpr(x, :invoke) && pred(x.args[1]::MethodInstance)
-
-function fully_eliminated(@nospecialize args...; retval=(@__FILE__), kwargs...)
-    code = code_typed1(args...; kwargs...).code
-    if retval !== (@__FILE__)
-        length(code) == 1 || return false
-        code1 = code[1]
-        isreturn(code1) || return false
-        val = code1.val
-        if val isa QuoteNode
-            val = val.value
-        end
-        return val == retval
-    else
-        return length(code) == 1 && isreturn(code[1])
-    end
-end
-macro fully_eliminated(ex0...)
-    return gen_call_with_extracted_types_and_kwargs(__module__, :fully_eliminated, ex0)
-end
diff --git a/test/compiler/newinterp.jl b/test/compiler/newinterp.jl
deleted file mode 100644
index 56a68f2a09545..0000000000000
--- a/test/compiler/newinterp.jl
+++ /dev/null
@@ -1,45 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    @newinterp NewInterpreter
-
-Defines new `NewInterpreter <: AbstractInterpreter` whose cache is separated
-from the native code cache, satisfying the minimum interface requirements.
-"""
-macro newinterp(InterpName)
-    InterpCacheName = esc(Symbol(string(InterpName, "Cache")))
-    InterpName = esc(InterpName)
-    C = Core
-    CC = Core.Compiler
-    quote
-        struct $InterpCacheName
-            dict::IdDict{$C.MethodInstance,$C.CodeInstance}
-        end
-        $InterpCacheName() = $InterpCacheName(IdDict{$C.MethodInstance,$C.CodeInstance}())
-        struct $InterpName <: $CC.AbstractInterpreter
-            meta # additional information
-            world::UInt
-            inf_params::$CC.InferenceParams
-            opt_params::$CC.OptimizationParams
-            inf_cache::Vector{$CC.InferenceResult}
-            code_cache::$InterpCacheName
-            function $InterpName(meta = nothing;
-                                 world::UInt = Base.get_world_counter(),
-                                 inf_params::$CC.InferenceParams = $CC.InferenceParams(),
-                                 opt_params::$CC.OptimizationParams = $CC.OptimizationParams(),
-                                 inf_cache::Vector{$CC.InferenceResult} = $CC.InferenceResult[],
-                                 code_cache::$InterpCacheName = $InterpCacheName())
-                return new(meta, world, inf_params, opt_params, inf_cache, code_cache)
-            end
-        end
-        $CC.InferenceParams(interp::$InterpName) = interp.inf_params
-        $CC.OptimizationParams(interp::$InterpName) = interp.opt_params
-        $CC.get_world_counter(interp::$InterpName) = interp.world
-        $CC.get_inference_cache(interp::$InterpName) = interp.inf_cache
-        $CC.code_cache(interp::$InterpName) = $CC.WorldView(interp.code_cache, $CC.WorldRange(interp.world))
-        $CC.get(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance, default) = get(wvc.cache.dict, mi, default)
-        $CC.getindex(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = getindex(wvc.cache.dict, mi)
-        $CC.haskey(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = haskey(wvc.cache.dict, mi)
-        $CC.setindex!(wvc::$CC.WorldView{$InterpCacheName}, ci::$C.CodeInstance, mi::$C.MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
-    end
-end
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
deleted file mode 100644
index 43f17d4ad69f2..0000000000000
--- a/test/compiler/ssair.jl
+++ /dev/null
@@ -1,617 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Base.Meta
-using Core.IR
-const Compiler = Core.Compiler
-using .Compiler: CFG, BasicBlock, NewSSAValue
-
-include(normpath(@__DIR__, "irutils.jl"))
-
-make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs)
-
-function make_ci(code)
-    ci = (Meta.@lower 1 + 1).args[1]
-    ci.code = code
-    nstmts = length(ci.code)
-    ci.ssavaluetypes = nstmts
-    ci.codelocs = fill(Int32(1), nstmts)
-    ci.ssaflags = fill(Int32(0), nstmts)
-    return ci
-end
-
-# TODO: this test is broken
-#let code = Any[
-#        GotoIfNot(SlotNumber(2), 4),
-#        Expr(:(=), SlotNumber(3), 2),
-#        # Test a SlotNumber as a value of a PhiNode
-#        PhiNode(Int32[2,3], Any[1, SlotNumber(3)]),
-#        ReturnNode(SSAValue(3))
-#    ]
-#
-#    ci = eval(Expr(:new, CodeInfo,
-#        code,
-#        nothing,
-#        Any[Any, Any, Any],
-#        Any[Any],
-#        UInt8[0, 0, 0],
-#        Any[Symbol("Self"), :arg, :slot],
-#        false, false, false, false
-#    ))
-#
-#    NullLineInfo = Core.LineInfoNode(Main, Symbol(""), Symbol(""), Int32(0), Int32(0))
-#    Compiler.run_passes(ci, 1, [NullLineInfo])
-#    # XXX: missing @test
-#end
-
-# Issue #31121
-
-# We have the following CFG and corresponding DFS numbering:
-#
-#     CFG     DFS
-#
-#      A       1
-#      | \     | \
-#      B C     2 5
-#     /|/     /|/
-#    | D     | 3
-#     \|      \|
-#      E       4
-#
-# In the bug `E` got the wrong dominator (`B` instead of `A`), because the DFS
-# tree had the wrong parent (i.e. we recorded the parent of `4` as `2` rather
-# than `3`, so the idom search missed that `1` is `3`'s semi-dominator). Here
-# we manually construct that CFG and verify that the DFS records the correct
-# parent.
-let cfg = CFG(BasicBlock[
-    make_bb([]     , [2, 3]),
-    make_bb([1]    , [4, 5]),
-    make_bb([1]    , [4]   ),
-    make_bb([2, 3] , [5]   ),
-    make_bb([2, 4] , []    ),
-], Int[])
-    dfs = Compiler.DFS(cfg.blocks)
-    @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4
-    let correct_idoms = Compiler.naive_idoms(cfg.blocks),
-        correct_pidoms = Compiler.naive_idoms(cfg.blocks, true)
-        @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms
-        @test Compiler.construct_postdomtree(cfg.blocks).idoms_bb == correct_pidoms
-        # For completeness, reverse the order of pred/succ in the CFG and verify
-        # the answer doesn't change (it does change the which node is chosen
-        # as the semi-dominator, since it changes the DFS numbering).
-        for (a, b, c, d) in Iterators.product(((true, false) for _ = 1:4)...)
-            let blocks = copy(cfg.blocks)
-                a && (blocks[1] = make_bb(blocks[1].preds, reverse(blocks[1].succs)))
-                b && (blocks[2] = make_bb(blocks[2].preds, reverse(blocks[2].succs)))
-                c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs))
-                d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs))
-                cfg′ = CFG(blocks, cfg.index)
-                @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms
-                @test Compiler.construct_postdomtree(cfg′.blocks).idoms_bb == correct_pidoms
-            end
-        end
-    end
-end
-
-# test >:
-let
-    f(a, b) = a >: b
-    code_typed(f, Tuple{Any, Any})
-    # XXX: missing @test
-end
-
-for compile in ("min", "yes")
-    cmd = `$(Base.julia_cmd()) --compile=$compile interpreter_exec.jl`
-    if !success(pipeline(Cmd(cmd, dir=@__DIR__); stdout=stdout, stderr=stderr))
-        error("Interpreter test failed, cmd : $cmd")
-    end
-end
-
-# PR #32145
-# Make sure IncrementalCompact can handle blocks with predecessors of index 0
-# while removing blocks with no predecessors.
-let cfg = CFG(BasicBlock[
-    make_bb([]        , [2, 4]),
-    make_bb([1]       , [4, 5]),
-    make_bb([]        , [4]   ), # should be removed
-    make_bb([0, 1, 2] , [5]   ), # 0 predecessor should be preserved
-    make_bb([2, 3]    , []    ),
-], Int[])
-    insts = Compiler.InstructionStream([], [], Any[], Int32[], UInt8[])
-    ir = Compiler.IRCode(insts, cfg, Core.LineInfoNode[], Any[], Expr[], Compiler.VarState[])
-    compact = Compiler.IncrementalCompact(ir, true)
-    @test length(compact.cfg_transform.result_bbs) == 4 && 0 in compact.cfg_transform.result_bbs[3].preds
-end
-
-# Issue #32579 - Optimizer bug involving type constraints
-function f32579(x::Int, b::Bool)
-    if b
-        x = nothing
-    end
-    if isa(x, Int)
-        y = x
-    else
-        y = x
-    end
-    if isa(y, Nothing)
-        z = y
-    else
-        z = y
-    end
-    return z === nothing
-end
-@test f32579(0, true) === true
-@test f32579(0, false) === false
-
-# Test for bug caused by renaming blocks improperly, related to PR #32145
-let ci = make_ci([
-        # block 1
-        Core.Compiler.GotoIfNot(Expr(:boundscheck), 6),
-        # block 2
-        Expr(:call, GlobalRef(Base, :size), Core.Compiler.Argument(3)),
-        Core.Compiler.ReturnNode(),
-        # block 3
-        Core.PhiNode(),
-        Core.Compiler.ReturnNode(),
-        # block 4
-        GlobalRef(Main, :something),
-        GlobalRef(Main, :somethingelse),
-        Expr(:call, Core.SSAValue(6), Core.SSAValue(7)),
-        Core.Compiler.GotoIfNot(Core.SSAValue(8), 11),
-        # block 5
-        Core.Compiler.ReturnNode(Core.SSAValue(8)),
-        # block 6
-        Core.Compiler.ReturnNode(Core.SSAValue(8))
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    ir = Core.Compiler.compact!(ir, true)
-    @test Core.Compiler.verify_ir(ir) === nothing
-end
-
-# Test that the verifier doesn't choke on cglobals (which aren't linearized)
-let ci = make_ci([
-        Expr(:call, GlobalRef(Main, :cglobal),
-                    Expr(:call, Core.tuple, :(:c)), Nothing),
-                    Core.Compiler.ReturnNode()
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    @test Core.Compiler.verify_ir(ir) === nothing
-end
-
-# Test that GlobalRef in value position is non-canonical
-let ci = make_ci([
-        Expr(:call, GlobalRef(Main, :something_not_defined_please))
-        ReturnNode(SSAValue(1))
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    ir = Core.Compiler.compact!(ir, true)
-    @test_throws ErrorException Core.Compiler.verify_ir(ir, false)
-end
-
-# Issue #29107
-let ci = make_ci([
-        # Block 1
-        Core.Compiler.GotoNode(6),
-        # Block 2
-        # The following phi node gets deleted because it only has one edge, so
-        # the call to `something` is made to use the value of `something2()`,
-        # even though this value is defined after it. We don't want this to
-        # happen even though this block is dead because subsequent optimization
-        # passes may look at all code, dead or not.
-        Core.PhiNode(Int32[2], Any[Core.SSAValue(4)]),
-        Expr(:call, :something, Core.SSAValue(2)),
-        Expr(:call, :something2),
-        Core.Compiler.GotoNode(2),
-        # Block 3
-        Core.Compiler.ReturnNode(1000)
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    ir = Core.Compiler.compact!(ir, true)
-    # Make sure that if there is a call to `something` (block 2 should be
-    # removed entirely with working DCE), it doesn't use any SSA values that
-    # come after it.
-    for i in 1:length(ir.stmts)
-        s = ir.stmts[i]
-        if Meta.isexpr(s, :call) && s.args[1] === :something
-            if isa(s.args[2], SSAValue)
-                @test s.args[2].id <= i
-            end
-        end
-    end
-end
-
-# Make sure dead blocks that are removed are not still referenced in live phi
-# nodes
-let ci = make_ci([
-        # Block 1
-        Core.Compiler.GotoNode(3),
-        # Block 2 (no predecessors)
-        Core.Compiler.ReturnNode(3),
-        # Block 3
-        Core.PhiNode(Int32[1, 2], Any[100, 200]),
-        Core.Compiler.ReturnNode(Core.SSAValue(3))
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    ir = Core.Compiler.compact!(ir, true)
-    @test Core.Compiler.verify_ir(ir) == nothing
-end
-
-# issue #37919
-let ci = code_lowered(()->@isdefined(_not_def_37919_), ())[1]
-    ir = Core.Compiler.inflate_ir(ci)
-    @test Core.Compiler.verify_ir(ir) === nothing
-end
-
-# Test dynamic update of domtree with edge insertions and deletions in the
-# following CFG:
-#
-#     1,1
-#     |  \
-#     |   \
-#     |    3,4 <
-#     |    |    \
-#     2,2  4,5   |
-#     |    |    /
-#     |    6,6 /
-#     |   /
-#     |  /
-#     5,3
-#
-# Nodes indicate BB number, preorder number
-# Edges point down, except the arrow that points up
-let cfg = CFG(BasicBlock[
-        make_bb([],     [3, 2]), # the order of the successors is deliberate
-        make_bb([1],    [5]),    # and is to determine the preorder numbers
-        make_bb([1, 6], [4]),
-        make_bb([3],    [6]),
-        make_bb([2, 6], []),
-        make_bb([4],    [5, 3]),
-    ], Int[])
-    domtree = Compiler.construct_domtree(cfg.blocks)
-    @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6]
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
-
-    # Test removal of edge between a parent and child in the DFS tree, which
-    # should trigger complete recomputation of domtree (first case in algorithm
-    # for removing edge from domtree dynamically)
-    Compiler.cfg_delete_edge!(cfg, 2, 5)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 2, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4]
-    # Add edge back (testing first case for insertion)
-    Compiler.cfg_insert_edge!(cfg, 2, 5)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 2, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
-
-    # Test second case in algorithm for removing edges from domtree, in which
-    # `from` is on a semidominator path from the semidominator of `to` to `to`
-    Compiler.cfg_delete_edge!(cfg, 6, 5)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4]
-    # Add edge back (testing second case for insertion)
-    Compiler.cfg_insert_edge!(cfg, 6, 5)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
-
-    # Test last case for removing edges, in which edge does not satisfy either
-    # of the above conditions
-    Compiler.cfg_delete_edge!(cfg, 6, 3)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
-    # Add edge back (testing second case for insertion)
-    Compiler.cfg_insert_edge!(cfg, 6, 3)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
-
-    # Try removing all edges from root
-    Compiler.cfg_delete_edge!(cfg, 1, 2)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 2)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4]
-    Compiler.cfg_delete_edge!(cfg, 1, 3)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0]
-    # Add edges back
-    Compiler.cfg_insert_edge!(cfg, 1, 2)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 2)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0]
-    Compiler.cfg_insert_edge!(cfg, 1, 3)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
-end
-
-# Issue #41975 - SSA conversion drops type check
-f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
-@test_throws TypeError f_if_typecheck()
-
-let # https://github.com/JuliaLang/julia/issues/42258
-    code = """
-        function foo()
-            a = @noinline rand(rand(0:10))
-            if isempty(a)
-                err = BoundsError(a)
-                throw(err)
-                return nothing
-            end
-            return a
-        end
-        code_typed(foo; optimize=true)
-
-        code_typed(Core.Compiler.setindex!, (Core.Compiler.UseRef,Core.Compiler.NewSSAValue); optimize=true)
-        """
-    cmd = `$(Base.julia_cmd()) -g 2 -e $code`
-    stderr = Base.BufferStream()
-    @test success(pipeline(Cmd(cmd); stdout, stderr))
-    @test readchomp(stderr) == ""
-end
-
-@testset "code_ircode" begin
-    @test first(only(Base.code_ircode(+, (Float64, Float64)))) isa Compiler.IRCode
-    @test first(only(Base.code_ircode(+, (Float64, Float64); optimize_until = 3))) isa
-          Compiler.IRCode
-    @test first(only(Base.code_ircode(+, (Float64, Float64); optimize_until = "SROA"))) isa
-          Compiler.IRCode
-
-    function demo(f)
-        f()
-        f()
-        f()
-    end
-    @test first(only(Base.code_ircode(demo))) isa Compiler.IRCode
-    @test first(only(Base.code_ircode(demo; optimize_until = 3))) isa Compiler.IRCode
-    @test first(only(Base.code_ircode(demo; optimize_until = "SROA"))) isa Compiler.IRCode
-end
-
-# slots after SSA conversion
-function f_with_slots(a, b)
-    # `c` and `d` are local variables
-    c = a + b
-    d = c > 0
-    return (c, d)
-end
-let # #self#, a, b, c, d
-    unopt = code_typed1(f_with_slots, (Int,Int); optimize=false)
-    @test length(unopt.slotnames) == length(unopt.slotflags) == length(unopt.slottypes) == 5
-    ir_withslots = first(only(Base.code_ircode(f_with_slots, (Int,Int); optimize_until="convert")))
-    @test length(ir_withslots.argtypes) == 5
-    # #self#, a, b
-    opt = code_typed1(f_with_slots, (Int,Int); optimize=true)
-    @test length(opt.slotnames) == length(opt.slotflags) == length(opt.slottypes) == 3
-    ir_ssa = first(only(Base.code_ircode(f_with_slots, (Int,Int); optimize_until="slot2reg")))
-    @test length(ir_ssa.argtypes) == 3
-end
-
-let
-    function test_useref(stmt, v, op)
-        if isa(stmt, Expr)
-            @test stmt.args[op] === v
-        elseif isa(stmt, GotoIfNot)
-            @test stmt.cond === v
-        elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode)
-            @test stmt.val === v
-        elseif isa(stmt, SSAValue) || isa(stmt, NewSSAValue)
-            @test stmt === v
-        elseif isa(stmt, PiNode)
-            @test stmt.val === v && stmt.typ === typeof(stmt)
-        elseif isa(stmt, PhiNode) || isa(stmt, PhiCNode)
-            @test stmt.values[op] === v
-        end
-    end
-
-    function _test_userefs(@nospecialize stmt)
-        ex = Expr(:call, :+, Core.SSAValue(3), 1)
-        urs = Core.Compiler.userefs(stmt)::Core.Compiler.UseRefIterator
-        it = Core.Compiler.iterate(urs)
-        while it !== nothing
-            ur = getfield(it, 1)::Core.Compiler.UseRef
-            op = getfield(it, 2)::Int
-            v1 = Core.Compiler.getindex(ur)
-            # set to dummy expression and then back to itself to test `_useref_setindex!`
-            v2 = Core.Compiler.setindex!(ur, ex)
-            test_useref(v2, ex, op)
-            Core.Compiler.setindex!(ur, v1)
-            @test Core.Compiler.getindex(ur) === v1
-            it = Core.Compiler.iterate(urs, op)
-        end
-    end
-
-    function test_userefs(body)
-        for stmt in body
-            _test_userefs(stmt)
-        end
-    end
-
-    # this isn't valid code, we just care about looking at a variety of IR nodes
-    body = Any[
-        Expr(:enter, 11),
-        Expr(:call, :+, SSAValue(3), 1),
-        Expr(:throw_undef_if_not, :expected, false),
-        Expr(:leave, 1),
-        Expr(:(=), SSAValue(1), Expr(:call, :+, SSAValue(3), 1)),
-        UpsilonNode(),
-        UpsilonNode(SSAValue(2)),
-        PhiCNode(Any[SSAValue(5), SSAValue(7), SSAValue(9)]),
-        PhiCNode(Any[SSAValue(6)]),
-        PhiNode(Int32[8], Any[SSAValue(7)]),
-        PiNode(SSAValue(6), GotoNode),
-        GotoIfNot(SSAValue(3), 10),
-        GotoNode(5),
-        SSAValue(7),
-        NewSSAValue(9),
-        ReturnNode(SSAValue(11)),
-    ]
-
-    test_userefs(body)
-end
-
-let ir = Base.code_ircode((Bool,Any)) do c, x
-        println(x, 1) #1
-        if c
-            println(x, 2) #2
-        else
-            println(x, 3) #3
-        end
-        println(x, 4) #4
-    end |> only |> first
-    # IR legality check
-    @test length(ir.cfg.blocks) == 4
-    for i = 1:4
-        @test any(ir.cfg.blocks[i].stmts) do j
-            inst = ir.stmts[j][:inst]
-            iscall((ir, println), inst) &&
-            inst.args[3] == i
-        end
-    end
-    # domination analysis
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
-    @test Core.Compiler.dominates(domtree, 1, 2)
-    @test Core.Compiler.dominates(domtree, 1, 3)
-    @test Core.Compiler.dominates(domtree, 1, 4)
-    for i = 2:4
-        for j = 1:4
-            i == j && continue
-            @test !Core.Compiler.dominates(domtree, i, j)
-        end
-    end
-    # post domination analysis
-    post_domtree = Core.Compiler.construct_postdomtree(ir.cfg.blocks)
-    @test Core.Compiler.postdominates(post_domtree, 4, 1)
-    @test Core.Compiler.postdominates(post_domtree, 4, 2)
-    @test Core.Compiler.postdominates(post_domtree, 4, 3)
-    for i = 1:3
-        for j = 1:4
-            i == j && continue
-            @test !Core.Compiler.postdominates(post_domtree, i, j)
-        end
-    end
-end
-
-@testset "issue #46967: undef stmts introduced by compaction" begin
-    # generate some IR
-    function foo(i)
-        j = i+42
-        j == 1 ? 1 : 2
-    end
-    ir = only(Base.code_ircode(foo, (Int,)))[1]
-    instructions = length(ir.stmts)
-
-    # get the addition instruction
-    add_stmt = ir.stmts[1]
-    @test Meta.isexpr(add_stmt[:inst], :call) && add_stmt[:inst].args[3] == 42
-
-    # replace the addition with a slightly different one
-    inst = Core.Compiler.NewInstruction(Expr(:call, add_stmt[:inst].args[1], add_stmt[:inst].args[2], 999), Int)
-    node = Core.Compiler.insert_node!(ir, 1, inst)
-    Core.Compiler.setindex!(add_stmt, node, :inst)
-
-    # perform compaction (not by calling compact! because with DCE the bug doesn't trigger)
-    compact = Core.Compiler.IncrementalCompact(ir)
-    state = Core.Compiler.iterate(compact)
-    while state !== nothing
-        state = Core.Compiler.iterate(compact, state[2])
-    end
-    ir = Core.Compiler.complete(compact)
-
-    # test that the inserted node was compacted
-    @test Core.Compiler.length(ir.new_nodes) == 0
-
-    # test that we performed copy propagation, but that the undef node was trimmed
-    @test length(ir.stmts) == instructions
-
-    @test show(devnull, ir) === nothing
-end
-
-@testset "IncrementalCompact statefulness" begin
-    foo(i) = i == 1 ? 1 : 2
-    ir = only(Base.code_ircode(foo, (Int,)))[1]
-    compact = Core.Compiler.IncrementalCompact(ir)
-
-    # set up first iterator
-    x = Core.Compiler.iterate(compact)
-    x = Core.Compiler.iterate(compact, x[2])
-
-    # set up second iterator
-    x = Core.Compiler.iterate(compact)
-
-    # consume remainder
-    while x !== nothing
-        x = Core.Compiler.iterate(compact, x[2])
-    end
-
-    ir = Core.Compiler.complete(compact)
-    @test Core.Compiler.verify_ir(ir) === nothing
-end
-
-# insert_node! operations
-# =======================
-
-import Core: SSAValue
-import Core.Compiler: NewInstruction, insert_node!
-
-# insert_node! for pending node
-let ir = Base.code_ircode((Int,Int); optimize_until="inlining") do a, b
-        a^b
-    end |> only |> first
-    @test length(ir.stmts) == 2
-    @test Meta.isexpr(ir.stmts[1][:inst], :invoke)
-
-    newssa = insert_node!(ir, SSAValue(1), NewInstruction(Expr(:call, println, SSAValue(1)), Nothing), #=attach_after=#true)
-    newssa = insert_node!(ir, newssa, NewInstruction(Expr(:call, println, newssa), Nothing), #=attach_after=#true)
-
-    ir = Core.Compiler.compact!(ir)
-    @test length(ir.stmts) == 4
-    @test Meta.isexpr(ir.stmts[1][:inst], :invoke)
-    call1 = ir.stmts[2][:inst]
-    @test iscall((ir,println), call1)
-    @test call1.args[2] === SSAValue(1)
-    call2 = ir.stmts[3][:inst]
-    @test iscall((ir,println), call2)
-    @test call2.args[2] === SSAValue(2)
-end
-
-# insert_node! with new instruction with flag computed
-let ir = Base.code_ircode((Int,Int); optimize_until="inlining") do a, b
-        a^b
-    end |> only |> first
-    invoke_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-        Meta.isexpr(x, :invoke)
-    end
-    @test invoke_idx !== nothing
-    invoke_expr = ir.stmts.inst[invoke_idx]
-
-    # effect-ful node
-    let compact = Core.Compiler.IncrementalCompact(Core.Compiler.copy(ir))
-        insert_node!(compact, SSAValue(1), NewInstruction(Expr(:call, println, SSAValue(1)), Nothing), #=attach_after=#true)
-        state = Core.Compiler.iterate(compact)
-        while state !== nothing
-            state = Core.Compiler.iterate(compact, state[2])
-        end
-        ir = Core.Compiler.finish(compact)
-        new_invoke_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-            x == invoke_expr
-        end
-        @test new_invoke_idx !== nothing
-        new_call_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-            iscall((ir,println), x) && x.args[2] === SSAValue(invoke_idx)
-        end
-        @test new_call_idx !== nothing
-        @test new_call_idx == new_invoke_idx+1
-    end
-
-    # effect-free node
-    let compact = Core.Compiler.IncrementalCompact(Core.Compiler.copy(ir))
-        insert_node!(compact, SSAValue(1), NewInstruction(Expr(:call, GlobalRef(Base, :add_int), SSAValue(1), SSAValue(1)), Int), #=attach_after=#true)
-        state = Core.Compiler.iterate(compact)
-        while state !== nothing
-            state = Core.Compiler.iterate(compact, state[2])
-        end
-        ir = Core.Compiler.finish(compact)
-
-        ir = Core.Compiler.finish(compact)
-        new_invoke_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-            x == invoke_expr
-        end
-        @test new_invoke_idx !== nothing
-        new_call_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-            iscall((ir,Base.add_int), x) && x.args[2] === SSAValue(invoke_idx)
-        end
-        @test new_call_idx === nothing # should be deleted during the compaction
-    end
-end
diff --git a/test/complex.jl b/test/complex.jl
index 2b87655f1ebe0..fd9e5186343d2 100644
--- a/test/complex.jl
+++ b/test/complex.jl
@@ -24,10 +24,32 @@ for T in (Int64, Float64)
     @test complex(Complex{T}) == Complex{T}
 end
 
-#show
-@test sprint(show, complex(1, 0), context=:compact => true) == "1+0im"
-@test sprint(show, complex(true, true)) == "Complex(true,true)"
-@test sprint(show, Complex{Int8}(0, typemin(Int8))) == "0 - 128im"
+@testset "show for complex" begin
+    @test sprint(show, complex(1, 0), context=:compact => true) == "1+0im"
+    @test sprint(show, complex(true, true)) == "Complex(true,true)"
+    @test sprint(show, Complex{Int8}(0, typemin(Int8))) == "0 - 128im"
+
+    @test sprint(show, prevfloat(BigFloat(-1, precision=32))im) == "-0.0 - 1.0000000005im"
+    @test sprint(show, prevfloat(BigFloat(-1, precision=512))im) == "-0.0 - 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000015im"
+
+    @test sprint(show, prevfloat(BigFloat(-1, precision=32))im, context=:compact => true) == "-0.0-1.0im"
+    @test sprint(show, prevfloat(BigFloat(-1, precision=512))im, context=:compact => true) == "-0.0-1.0im"
+end
+
+@testset "show" begin
+    @test sprint(show, complex(1, 0), context=:compact => true) == "1+0im"
+    @test sprint(show, complex(true, true)) == "Complex(true,true)"
+    @test sprint(show, Complex{Int8}(0, typemin(Int8))) == "0 - 128im"
+    @test sprint(show, complex(typemin(Int16), typemax(Int16))) == "-32768 + 32767im"
+    @test sprint(show, complex(0x26, 0x26), context=:compact => true) == "0x26+0x26*im"
+    @test sprint(show, complex(0o77, 0o77), context=:compact => true) == "0x3f+0x3f*im"
+    @test sprint(show, complex(0b10, 0b11)) == "0x02 + 0x03*im"
+    @test sprint(show, complex(-0x1A, 0x2F), context=:compact => true) == "0xe6+0x2f*im"
+    @test sprint(show, complex(typemax(UInt16), typemin(UInt16))) =="0xffff + 0x0000*im"
+    @test sprint(show, complex(-Inf, Inf)) == "-Inf + Inf*im"
+    @test sprint(show, complex(-Inf, NaN)) == "-Inf + NaN*im"
+    @test sprint(show, complex(0, -Inf)) == "0.0 - Inf*im"
+end
 
 @testset "unary operator on complex boolean" begin
     @test +Complex(true, true) === Complex(1, 1)
@@ -383,6 +405,7 @@ import Base.Math.@horner
     @test isequal(log1p(complex(-2, 1e-10)), log(1 + complex(-2, 1e-10)))
     @test isequal(log1p(complex(1, Inf)), complex(Inf, pi/2))
     @test isequal(log1p(complex(1, -Inf)), complex(Inf, -pi/2))
+    @test isequal(log1p(complex(1e-200, 5e-175)), complex(1e-200, 5e-175))
 
     for z in (1e-10+1e-9im, 1e-10-1e-9im, -1e-10+1e-9im, -1e-10-1e-9im)
         @test log1p(z) ≈ @horner(z, 0, 1, -0.5, 1/3, -0.25, 0.2)
@@ -920,6 +943,13 @@ end
     end
 end
 
+@testset "eps" begin
+    @test eps(1.0+1.0im) === 3.1401849173675503e-16
+    @test eps(Complex{Float64}) === eps(1.0+1.0im)
+    @test eps(Complex{Float32}) === 1.6858739f-7
+    @test eps(Float32(1.0)+Float32(1.0)im) === eps(Complex{Float32})
+end
+
 @testset "cis" begin
     @test cis(0.0+1.0im) ≈ 0.367879441171442321595523770161460867445811131031767834507836+0.0im
     @test cis(1.0+0.0im) ≈ 0.54030230586813971740093660744297660373231042061+0.84147098480789650665250232163029899962256306079im
@@ -983,9 +1013,9 @@ end
 # issue #10926
 @test typeof(π - 1im) == ComplexF64
 
-@testset "issue #15969" begin
+@testset "issues #15969 #59684" begin
     # specialized muladd for complex types
-    for x in (3, 3+13im), y in (2, 2+7im), z in (5, 5+11im)
+    for x in (3, 3+13im, 1im), y in (2, 2+7im, 1im), z in (5, 5+11im, 0x01, 0x01 + 0x00*im)
         @test muladd(x,y,z) === x*y + z
     end
 end
@@ -1214,3 +1244,9 @@ end
     @test !iseven(7+0im) && isodd(7+0im)
     @test !iseven(6+1im) && !isodd(7+1im)
 end
+
+@testset "issue #55266" begin
+    for T in (Float16, Float32, Float64)
+        @test isapprox(atanh(1+im*floatmin(T)), Complex{T}(atanh(1+im*big(floatmin(T)))))
+    end
+end
diff --git a/test/copy.jl b/test/copy.jl
index 633beee5f2af3..f5cc57c86feaa 100644
--- a/test/copy.jl
+++ b/test/copy.jl
@@ -49,6 +49,15 @@ chnlprod(x) = Channel(c->for i in x; put!(c,i); end)
 
         @test_throws Union{BoundsError, ArgumentError} copyto!(dest, 1, src(), 2, 2)
     end
+
+    v = rand(Float32, 4)
+    a = Memory{Float32}(v)
+    b = similar(a)
+    copyto!(b, a)
+    @test a == b
+
+    c = Memory{Float32}(undef, 3)
+    @test_throws BoundsError copyto!(c, a)
 end
 
 @testset "with CartesianIndices" begin
@@ -189,7 +198,7 @@ end
         bar = Bar19921(foo, Dict(foo => 3))
         bar2 = deepcopy(bar)
         @test bar2.foo ∈ keys(bar2.fooDict)
-        @test bar2.fooDict[bar2.foo] != nothing
+        @test bar2.fooDict[bar2.foo] !== nothing
     end
 
     let d = IdDict(rand(2) => rand(2) for i = 1:100)
@@ -213,11 +222,13 @@ end
 @testset "copying CodeInfo" begin
     _testfunc() = nothing
     ci,_ = code_typed(_testfunc, ())[1]
-    ci.edges = [_testfunc]
+    if isdefined(ci, :edges)
+        ci.edges = [_testfunc]
 
-    ci2 = copy(ci)
-    # Test that edges are not shared
-    @test ci2.edges !== ci.edges
+        ci2 = copy(ci)
+        # Test that edges are not shared
+        @test ci2.edges !== ci.edges
+    end
 end
 
 @testset "issue #34025" begin
@@ -242,10 +253,35 @@ end
     @test copyto!(s, String[]) == [1, 2] # No error
 end
 
+@testset "circular reference arrays" begin
+    # issue 56775
+    p = Any[nothing]
+    p[1] = p
+    p2 = deepcopy(p)
+    @test p2 === p2[1]
+    @test p2 !== p
+end
+
 @testset "deepcopy_internal arrays" begin
     @test (@inferred Base.deepcopy_internal(zeros(), IdDict())) == zeros()
 end
 
+@testset "deepcopy_internal inference" begin
+    @inferred Base.deepcopy_internal(1, IdDict())
+    @inferred Base.deepcopy_internal(1.0, IdDict())
+    @inferred Base.deepcopy_internal(big(1), IdDict())
+    @inferred Base.deepcopy_internal(big(1.0), IdDict())
+    @inferred Base.deepcopy_internal('a', IdDict())
+    @inferred Base.deepcopy_internal("abc", IdDict())
+    @inferred Base.deepcopy_internal([1,2,3], IdDict())
+
+    # structs without custom deepcopy_internal method
+    struct Immutable2; x::Int; end
+    mutable struct Mutable2; x::Int; end
+    @inferred Base.deepcopy_internal(Immutable2(1), IdDict())
+    @inferred Base.deepcopy_internal(Mutable2(1), IdDict())
+end
+
 @testset "`copyto!`'s unaliasing" begin
     a = view([1:3;], :)
     @test copyto!(a, 2, a, 1, 2) == [1;1:2;]
@@ -255,6 +291,8 @@ end
 
 @testset "`deepcopy` a `GenericCondition`" begin
     a = Base.GenericCondition(ReentrantLock())
+    # Test printing
+    @test repr(a) == "Base.GenericCondition(ReentrantLock())"
     @test !islocked(a.lock)
     lock(a.lock)
     @test islocked(a.lock)
@@ -267,4 +305,6 @@ end
     @test a.lock !== b.lock
     @test islocked(a.lock)
     @test !islocked(b.lock)
+    @inferred deepcopy(a)
+    @inferred deepcopy(a.lock)
 end
diff --git a/test/core.jl b/test/core.jl
index a87c45b698e49..490b39625a962 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -9,20 +9,27 @@ const Bottom = Union{}
 # For curmod_*
 include("testenv.jl")
 
+include("tempdepot.jl")
+
 ## tests that `const` field declarations
 
 # sanity tests that our built-in types are marked correctly for const fields
 for (T, c) in (
         (Core.CodeInfo, []),
-        (Core.CodeInstance, [:def, :rettype, :rettype_const, :ipo_purity_bits, :argescapes]),
+        (Core.CodeInstance, [:def, :owner, :rettype, :exctype, :rettype_const, :time_infer_total, :time_infer_cache_saved, :time_infer_self]),
         (Core.Method, [#=:name, :module, :file, :line, :primary_world, :sig, :slot_syms, :external_mt, :nargs, :called, :nospecialize, :nkw, :isva, :is_for_opaque_closure, :constprop=#]),
         (Core.MethodInstance, [#=:def, :specTypes, :sparam_vals=#]),
-        (Core.MethodTable, [:module]),
-        (Core.TypeMapEntry, [:sig, :simplesig, :guardsigs, :min_world, :max_world, :func, :isleafsig, :issimplesig, :va]),
+        (Core.MethodTable, [:cache, :module, :name]),
+        (Core.MethodCache, []),
+        (Core.TypeMapEntry, [:sig, :simplesig, :guardsigs, :func, :isleafsig, :issimplesig, :va]),
         (Core.TypeMapLevel, []),
-        (Core.TypeName, [:name, :module, :names, :atomicfields, :constfields, :wrapper, :mt, :hash, :n_uninitialized, :flags]),
+        (Core.TypeName, [:name, :module, :names, :wrapper, :hash, :n_uninitialized, :flags]),
         (DataType, [:name, :super, :parameters, :instance, :hash]),
         (TypeVar, [:name, :ub, :lb]),
+        (Core.Memory, [:length, :ptr]),
+        (Core.GenericMemoryRef, [:mem, :ptr_or_offset]),
+        (Task, [:metrics_enabled]),
+        (Core.BindingPartition, [:restriction, :kind]),
     )
     @test Set((fieldname(T, i) for i in 1:fieldcount(T) if isconst(T, i))) == Set(c)
 end
@@ -30,14 +37,19 @@ end
 # sanity tests that our built-in types are marked correctly for atomic fields
 for (T, c) in (
         (Core.CodeInfo, []),
-        (Core.CodeInstance, [:next, :inferred, :purity_bits, :invoke, :specptr, :precompile]),
-        (Core.Method, []),
-        (Core.MethodInstance, [:uninferred, :cache, :precompiled]),
-        (Core.MethodTable, [:defs, :leafcache, :cache, :max_args]),
-        (Core.TypeMapEntry, [:next]),
+        (Core.CodeInstance, [:next, :min_world, :max_world, :inferred, :edges, :debuginfo, :ipo_purity_bits, :invoke, :specptr, :flags, :precompile, :time_compile]),
+        (Core.Method, [:primary_world, :did_scan_source, :dispatch_status, :interferences]),
+        (Core.MethodInstance, [:cache, :flags, :dispatch_status]),
+        (Core.MethodTable, [:defs]),
+        (Core.MethodCache, [:leafcache, :cache, :var""]),
+        (Core.TypeMapEntry, [:next, :min_world, :max_world]),
         (Core.TypeMapLevel, [:arg1, :targ, :name1, :tname, :list, :any]),
-        (Core.TypeName, [:cache, :linearcache]),
+        (Core.TypeName, [:cache, :linearcache, :Typeofwrapper, :max_args, :cache_entry_count]),
         (DataType, [:types, :layout]),
+        (Core.Memory, []),
+        (Core.GenericMemoryRef, []),
+        (Task, [:_state, :running_time_ns, :finished_at, :first_enqueued_at, :last_started_running_at]),
+        (Core.BindingPartition, [:min_world, :max_world, :next]),
     )
     @test Set((fieldname(T, i) for i in 1:fieldcount(T) if Base.isfieldatomic(T, i))) == Set(c)
 end
@@ -57,20 +69,7 @@ mutable struct ABCDconst
     c
     const d::Union{Int,Nothing}
 end
-@test_throws(ErrorException("invalid redefinition of constant $(nameof(curmod)).ABCDconst"),
-    mutable struct ABCDconst
-        const a
-        const b::Int
-        c
-        d::Union{Int,Nothing}
-    end)
-@test_throws(ErrorException("invalid redefinition of constant $(nameof(curmod)).ABCDconst"),
-    mutable struct ABCDconst
-        a
-        b::Int
-        c
-        d::Union{Int,Nothing}
-    end)
+
 let abcd = ABCDconst(1, 2, 3, 4)
     @test (1, 2, 3, 4) === (abcd.a, abcd.b, abcd.c, abcd.d)
     @test_throws(ErrorException("setfield!: const field .a of type ABCDconst cannot be changed"),
@@ -107,6 +106,36 @@ let abcd = ABCDconst(1, 2, 3, 4)
         abcd.d = nothing)
     @test (1, 2, "not constant", 4) === (abcd.a, abcd.b, abcd.c, abcd.d)
 end
+const orig_ABCDconst = ABCDconst
+mutable struct ABCDconst
+    const a
+    const b::Int
+    c
+    d::Union{Int,Nothing}
+end
+@test ABCDconst !== orig_ABCDconst
+mutable struct ABCDconst
+    a
+    b::Int
+    c
+    d::Union{Int,Nothing}
+end
+@test ABCDconst !== orig_ABCDconst
+# Issue #52686
+struct A52686{T} end
+struct B52686{T, S}
+    a::A52686{<:T}
+end
+function func52686()
+    @eval begin
+        struct A52686{T} end
+        struct B52686{T, S}
+            a::A52686{<:T}
+        end
+    end
+    return true
+end
+@test func52686()
 
 # test `===` handling null pointer in struct #44712
 struct N44712
@@ -216,12 +245,68 @@ k11840(::Type{Union{Tuple{Int32}, Tuple{Int64}}}) = '2'
 @test k11840(Tuple{Union{Int32, Int64}}) == '2'
 @test k11840(Union{Tuple{Int32}, Tuple{Int64}}) == '2'
 
+# issue #59327
+@noinline f59327(f, x) = Any[f, x]
+g59327(x) = f59327(+, Any[x][1])
+g59327(1)
+@test any(
+    mi->mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(f59327), Function, Int},
+    methods(f59327)[1].specializations)
+
+@noinline h59327(f::Union{Function, Nothing}, x) = Any[f, x]
+i59327(x) = h59327(+, Any[x][1])
+i59327(1)
+@test any(
+    mi->mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(h59327), Function, Int},
+    methods(h59327)[1].specializations)
+
+@noinline j59327(f::Function, x) = Any[f, x]
+k59327(x) = j59327(+, Any[x][1])
+k59327(1)
+@test any(
+    mi->mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(j59327), Function, Int},
+    methods(j59327)[1].specializations
+)
+
+@noinline l59327(f::Base.Callable, x) = Any[f, x]
+m59327(x) = l59327(+, Any[x][1])
+m59327(1)
+@test any(
+    mi->mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(l59327), Function, Int},
+    methods(l59327)[1].specializations
+)
+
+# _do_ specialize if the signature has a `where`
+@noinline n59327(f::F, x) where F = Any[f, x]
+o59327(x) = n59327(+, Any[x][1])
+o59327(1)
+@test !any(
+    mi->mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(n59327), Function, Int},
+    methods(n59327)[1].specializations
+)
+@test any(
+    mi->mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(n59327), typeof(+), Int},
+    methods(n59327)[1].specializations
+)
+
+# _do_ specialize if the signature is specific
+@noinline n59327(f::typeof(+), x) = Any[f, x]
+o59327(x) = n59327(+, Any[x][1])
+o59327(1)
+@test !any(
+    mi->mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(n59327), Function, Int},
+    methods(n59327)[1].specializations
+)
+@test any(
+    mi->mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(n59327), typeof(+), Int},
+    methods(n59327)[1].specializations
+)
 
 # issue #20511
 f20511(x::DataType) = 0
 f20511(x) = 1
-Type{Integer}  # cache this
-@test f20511(Union{Integer,T} where T <: Unsigned) == 1
+Type{AbstractSet}  # cache this
+@test f20511(Union{AbstractSet,Set{T}} where T) == 1
 
 # join
 @test typejoin(Int8,Int16) === Signed
@@ -282,22 +367,9 @@ end  |> only == Type{typejoin(Int, UInt)}
     typejoin(Int, UInt, Float64)
 end  |> only == Type{typejoin(Int, UInt, Float64)}
 
-let res = @test_throws TypeError let
-        Base.Experimental.@force_compile
-        typejoin(1, 2)
-        nothing
-    end
-    err = res.value
-    @test err.func === :<:
-end
-let res = @test_throws TypeError let
-        Base.Experimental.@force_compile
-        typejoin(1, 2, 3)
-        nothing
-    end
-    err = res.value
-    @test err.func === :<:
-end
+@test typejoin(1, 2) === Any
+@test typejoin(1, 2, 3) === Any
+@test typejoin(Int, Int, 3) === Any
 
 # promote_typejoin returns a Union only with Nothing/Missing combined with concrete types
 for T in (Nothing, Missing)
@@ -374,8 +446,8 @@ let ft = Base.datatype_fieldtypes
     @test ft(elT2.body)[1].parameters[1] === elT2
     @test Base.isconcretetype(ft(elT2.body)[1])
 end
-#struct S22624{A,B,C} <: Ref{S22624{Int64,A}}; end
-@test_broken @isdefined S22624
+struct S22624{A,B,C} <: Ref{S22624{Int,A}}; end
+@test sizeof(S22624) == sizeof(S22624{Int,Int,Int}) == 0
 
 # issue #42297
 mutable struct Node42297{T, V}
@@ -390,12 +462,12 @@ end
 mutable struct A3890{T1}
     x::Matrix{Complex{T1}}
 end
-@test A3890{Float64}.types[1] === Array{ComplexF64,2}
+@test A3890{Float64}.types[1] === Matrix{ComplexF64}
 # make sure the field type Matrix{Complex{T1}} isn't cached
 mutable struct B3890{T2}
     x::Matrix{Complex{T2}}
 end
-@test B3890{Float64}.types[1] === Array{ComplexF64,2}
+@test B3890{Float64}.types[1] === Matrix{ComplexF64}
 
 # issue #786
 mutable struct Node{T}
@@ -414,6 +486,18 @@ mutable struct FooFoo{A,B} y::FooFoo{A} end
 
 @test FooFoo{Int} <: FooFoo{Int,AbstractString}.types[1]
 
+# make sure this self-referential struct doesn't crash type layout
+struct SelfTyA{V}
+    a::Base.RefValue{V}
+end
+struct SelfTyB{T}
+    a::T
+    b::SelfTyA{SelfTyB{T}}
+end
+let T = Base.RefValue{SelfTyB{Int}}
+    @test sizeof(T) === sizeof(Int)
+    @test sizeof(T.types[1]) === 2 * sizeof(Int)
+end
 
 let x = (2,3)
     @test +(x...) == 5
@@ -536,7 +620,7 @@ function i18408()
     return (x -> i)
 end
 let f = i18408()
-    @test_throws UndefVarError(:i) f(0)
+    @test_throws UndefVarError(:i, :local) f(0)
 end
 
 # issue #23558
@@ -596,7 +680,7 @@ begin
         global f7234_cnt += -10000
     end
 end
-@test_throws UndefVarError(:glob_x2) f7234_a()
+@test_throws UndefVarError(:glob_x2, :local) f7234_a()
 @test f7234_cnt == 1
 begin
     global glob_x2 = 24
@@ -606,7 +690,7 @@ begin
         global f7234_cnt += -10000
     end
 end
-@test_throws UndefVarError(:glob_x2) f7234_b()
+@test_throws UndefVarError(:glob_x2, :local) f7234_b()
 @test f7234_cnt == 2
 # globals can accessed if declared
 for i = 1:2
@@ -721,11 +805,11 @@ function f21900()
     global f21900_cnt += -1000
     nothing
 end
-@test_throws UndefVarError(:x_global_undefined_error) f21900()
+@test_throws UndefVarError(:x_global_undefined_error, @__MODULE__) f21900()
 @test f21900_cnt == 1
 
 # use @eval so this runs as a toplevel scope block
-@test_throws UndefVarError(:foo21900) @eval begin
+@test_throws UndefVarError(:foo21900, @__MODULE__) @eval begin
     for i21900 = 1:10
         local bar21900
         for j21900 = 1:10
@@ -738,7 +822,7 @@ end
 @test !@isdefined(foo21900)
 @test !@isdefined(bar21900)
 bar21900 = 0
-@test_throws UndefVarError(:foo21900) @eval begin
+@test_throws UndefVarError(:foo21900, @__MODULE__) @eval begin
     for i21900 = 1:10
         global bar21900
         for j21900 = 1:10
@@ -764,6 +848,34 @@ end
 @test foo21900 == 10
 @test bar21900 == 11
 
+let f = g -> x -> g(x)
+    @test f(Int)(1.0) === 1
+    @test @inferred(f(Int)) isa Function
+    @test fieldtype(typeof(f(Int)), 1) === Type{Int}
+    @test @inferred(f(Rational{Int})) isa Function
+    @test fieldtype(typeof(f(Rational{Int})), 1) === Type{Rational{Int}}
+    @test_broken @inferred(f(Rational)) isa Function
+    @test fieldtype(typeof(f(Rational)), 1) === Type{Rational}
+    @test_broken @inferred(f(Rational{Core.TypeVar(:T)})) isa Function
+    @test fieldtype(typeof(f(Rational{Core.TypeVar(:T)})), 1) === DataType
+end
+let f() = (T = Rational{Core.TypeVar(:T)}; () -> T)
+    @test f() isa Function
+    @test Base.infer_return_type(f()) == DataType
+    @test fieldtype(typeof(f()), 1) === DataType
+    t = f()()
+    @test t isa DataType
+    @test t.name.wrapper == Rational
+    @test length(t.parameters) == 1
+    @test t.parameters[1] isa Core.TypeVar
+end
+function issue23618(a::AbstractVector)
+    T = eltype(a)
+    b = Vector{T}()
+    return [Set{T}() for x in a]
+end
+@test Base.infer_return_type(issue23618, (Vector{Int},)) == Vector{Set{Int}}
+
 # ? syntax
 @test (true ? 1 : false ? 2 : 3) == 1
 
@@ -1152,8 +1264,8 @@ end
 # Module() constructor
 @test names(Module(:anonymous), all = true, imported = true) == [:anonymous]
 @test names(Module(:anonymous, false), all = true, imported = true) == [:anonymous]
-@test Module(:anonymous, false, true).Core == Core
-@test_throws UndefVarError Module(:anonymous, false, false).Core
+@test invokelatest(getglobal, Module(:anonymous, false, true), :Core) == Core
+@test_throws UndefVarError invokelatest(getglobal, Module(:anonymous, false, false), :Core)
 
 # exception from __init__()
 let didthrow =
@@ -1429,6 +1541,9 @@ let
     @test unsafe_load(p2) == 101
     unsafe_store!(p2, 909, 3)
     @test a2 == [101,102,909]
+    # test for issue 51954
+    @test pointer(a.ref.mem)===pointer(a)
+    @test pointer(a.ref.mem,2)===pointer(a,2)
 end
 
 @test unsafe_pointer_to_objref(ccall(:jl_call1, Ptr{Cvoid}, (Any,Any),
@@ -1886,9 +2001,9 @@ end
 
 # issue #4526
 f4526(x) = isa(x.a, Nothing)
-@test_throws ErrorException f4526(1)
-@test_throws ErrorException f4526(im)
-@test_throws ErrorException f4526(1+2im)
+@test_throws FieldError f4526(1)
+@test_throws FieldError f4526(im)
+@test_throws FieldError f4526(1+2im)
 
 # issue #4528
 function f4528(A, B)
@@ -2223,6 +2338,31 @@ end
 x6074 = 6074
 @test @X6074() == 6074
 
+# issues #48910, 54417
+macro X43151_nested()
+    quote my_value = "from_nested_macro" end
+end
+macro X43151_parent()
+    quote
+        my_value = "from_parent_macro"
+        @X43151_nested()
+        my_value
+    end
+end
+@test @X43151_parent() == "from_parent_macro"
+
+macro X43151_nested_escaping()
+    quote $(esc(:my_value)) = "from_nested_macro" end
+end
+macro X43151_parent_escaping()
+    quote
+        my_value = "from_parent_macro"
+        @X43151_nested_escaping()
+        my_value
+    end
+end
+@test @X43151_parent_escaping() == "from_nested_macro"
+
 # issue #5536
 test5536(a::Union{Real, AbstractArray}...) = "Splatting"
 test5536(a::Union{Real, AbstractArray}) = "Non-splatting"
@@ -2554,7 +2694,7 @@ end
 # issue #8338
 let ex = Expr(:(=), :(f8338(x;y=4)), :(x*y))
     eval(ex)
-    @test f8338(2) == 8
+    @test (@invokelatest f8338(2)) == 8
 end
 
 # call overloading (#2403)
@@ -2579,11 +2719,17 @@ struct D14919 <: Function; end
 @test B14919()() == "It's a brand new world"
 @test C14919()() == D14919()() == "Boo."
 
-for f in (:Any, :Function, :(Core.Builtin), :(Union{Nothing, Type}), :(Union{typeof(+), Type}), :(Union{typeof(+), typeof(-)}), :(Base.Callable))
-    @test_throws ErrorException("Method dispatch is unimplemented currently for this method signature") @eval (::$f)() = 1
-end
-for f in (:(Core.arrayref), :((::typeof(Core.arrayref))), :((::Core.IntrinsicFunction)))
-    @test_throws ErrorException("cannot add methods to a builtin function") @eval $f() = 1
+let ex_t = ErrorException, ex_r = r"cannot add methods to builtin function"
+    for f in (:(Core.Any), :(Core.Function), :(Core.Builtin), :(Base.Callable), :(Union{Nothing,F} where F), :(typeof(Core.getfield)), :(Core.IntrinsicFunction))
+        @test_throws ex_t @eval (::$f)() = 1
+        @test_throws ex_r @eval (::$f)() = 1
+    end
+    @test_throws ex_t @eval (::Union{Nothing,F})() where {F<:Function} = 1
+    @test_throws ex_r @eval (::Union{Nothing,F})() where {F<:Function} = 1
+    for f in (:(Core.getfield),)
+        @test_throws ex_t @eval $f() = 1
+        @test_throws ex_r @eval $f() = 1
+    end
 end
 
 # issue #33370
@@ -2672,6 +2818,14 @@ const T24460 = Tuple{T,T} where T
 g24460() = invoke(f24460, T24460, 1, 2)
 @test @inferred(g24460()) === 2.0
 
+@testset "invoke with builtins" begin
+    @test invoke(getfield, Tuple{Any, Symbol}, (a = 42,), :a) == 42
+    @test invoke(setfield!, Tuple{Any, Symbol, Any},  Base.RefValue(1), :x, 2) == 2
+    @test invoke(isdefined, Tuple{Any, Symbol}, (a = 1,), :a) == true
+    @test invoke(isdefined, Tuple{Any, Symbol}, (a = 1,), :b) == false
+    @test invoke(invoke, Tuple{Any, Type, Vararg}, sin, Tuple{Real}, 0) == 0.0
+end
+
 # issue #30679
 @noinline function f30679(::DataType)
     b = IOBuffer()
@@ -2786,7 +2940,7 @@ mutable struct Obj; x; end
         push!(wr, WeakRef(x))
         nothing
     end
-    @noinline test_wr(r, wr) = @test r[1] == wr[1].value
+    @noinline test_wr(r, wr) = r[1] == wr[1].value
     function test_wr()
         # we need to be very careful here that we never
         # use the value directly in this function, so we aren't dependent
@@ -2794,7 +2948,7 @@ mutable struct Obj; x; end
         ref = []
         wref = []
         mk_wr(ref, wref)
-        test_wr(ref, wref)
+        @test test_wr(ref, wref)
         GC.gc()
         test_wr(ref, wref)
         empty!(ref)
@@ -3740,14 +3894,14 @@ f12092(x::Int, y::Int...) = 2
 
 # issue #12063
 # NOTE: should have > MAX_TUPLETYPE_LEN arguments
-f12063(tt, g, p, c, b, v, cu::T, d::AbstractArray{T, 2}, ve) where {T} = 1
+f12063(tt, g, p, c, b, v, cu::T, d::AbstractMatrix{T}, ve) where {T} = 1
 f12063(args...) = 2
 g12063() = f12063(0, 0, 0, 0, 0, 0, 0.0, zeros(0,0), Int[])
 @test g12063() == 1
 
 # issue #11587
 mutable struct Sampler11587{N}
-    clampedpos::Array{Int,2}
+    clampedpos::Matrix{Int}
     buf::Array{Float64,N}
 end
 function Sampler11587()
@@ -3817,11 +3971,13 @@ end
 struct NInitializedTestType
     a
 end
+const orig_NInitializedTestType = NInitializedTestType
 
-@test_throws ErrorException @eval struct NInitializedTestType
+struct NInitializedTestType
     a
     NInitializedTestType() = new()
 end
+@test orig_NInitializedTestType !== NInitializedTestType
 
 # issue #12394
 mutable struct Empty12394 end
@@ -3961,6 +4117,14 @@ end
 end
 @test f13432b(true) == true
 @test f13432b(false) == false
+@noinline function f13432c(x)
+    offset = x ? Base.Bottom : 1
+    # Barrier for inference, so the optimizer cannot optimize this,
+    # but codegen can still see this is a constant
+    return ===(offset, Base.inferencebarrier(Base.Bottom))
+end
+@test f13432c(true) == true
+@test f13432c(false) == false
 
 #13433, read!(::IO, a::Vector{UInt8}) should return a
 mutable struct IO13433 <: IO end
@@ -4110,7 +4274,29 @@ end
 let z1 = Z14477()
     @test isa(z1, Z14477)
     @test isa(z1.fld, Z14477)
+    @test isdefined(z1, :fld)
+    @test !isdefined(z1.fld, :fld)
+end
+struct Z14477B
+    fld::Union{Nothing,Z14477B}
+    Z14477B() = new(new(nothing))
+end
+let z1 = Z14477B()
+    @test isa(z1, Z14477B)
+    @test isa(z1.fld, Z14477B)
+    @test isa(z1.fld.fld, Nothing)
+end
+struct Z14477C{T}
+    fld::Z14477C{Int8}
+    Z14477C() = new{Int16}(new{Int8}())
 end
+let z1 = Z14477C()
+    @test isa(z1, Z14477C)
+    @test isa(z1.fld, Z14477C)
+    @test isdefined(z1, :fld)
+    @test !isdefined(z1.fld, :fld)
+end
+
 
 # issue #8846, generic macros
 macro m8846(a, b=0)
@@ -4135,7 +4321,7 @@ let foo(x::Union{T, Nothing}, y::Union{T, Nothing}) where {T} = 1
 end
 let foo(x::Union{T, Nothing}, y::Union{T, Nothing}) where {T} = T
     @test foo(1, nothing) === Int
-    @test_throws UndefVarError(:T) foo(nothing, nothing)
+    @test_throws UndefVarError(:T, :static_parameter) foo(nothing, nothing)
 end
 
 module TestMacroGlobalFunction
@@ -4189,14 +4375,14 @@ foo9677(x::Array) = invoke(foo9677, Tuple{AbstractArray}, x)
 
 # issue #6846
 f6846() = (please6846; 2)
-@test_throws UndefVarError(:please6846) f6846()
+@test_throws UndefVarError(:please6846, @__MODULE__) f6846()
 
 module M6846
     macro f()
         return esc(:(please6846; 2))
     end
 end
-@test_throws UndefVarError(:please6846) @M6846.f()
+@test_throws UndefVarError(:please6846, @__MODULE__) @M6846.f()
 
 # issue #14758
 @test isa(@eval(f14758(; $([]...)) = ()), Function)
@@ -4238,13 +4424,13 @@ end
 abstract type abstest_14825 end
 
 mutable struct t1_14825{A <: abstest_14825, B}
-  x::A
-  y::B
+    x::A
+    y::B
 end
 
 mutable struct t2_14825{C, B} <: abstest_14825
-  x::C
-  y::t1_14825{t2_14825{C, B}, B}
+    x::C
+    y::t1_14825{t2_14825{C, B}, B}
 end
 
 @test t2_14825{Int,Int}.types[2] <: t1_14825
@@ -4300,6 +4486,7 @@ function f15180(x::T) where T
 end
 @test map(f15180(1), [1,2]) == [(Int,1),(Int,1)]
 
+using Base: _growbeg!, _deletebeg!, _growend!, _deleteend!
 struct ValueWrapper
     vpadding::NTuple{2,VecElement{UInt}}
     value
@@ -4308,43 +4495,44 @@ end
 Base.convert(::Type{ValueWrapper}, x) = ValueWrapper(x)
 for T in (Any, ValueWrapper)
     let ary = Vector{T}(undef, 10)
-        check_undef_and_fill(ary, rng) = for i in rng
-            @test !isassigned(ary, i)
+        check_undef_and_fill(ary, rng) = all(i -> begin
+            isassigned(ary, i) && return false
             ary[i] = (Float64(i), i) # some non-cached content
-            @test isassigned(ary, i)
-        end
+            isassigned(ary, i) || return false
+            return true
+        end, rng)
         # Check if the memory is initially zerod and fill it with value
         # to check if these values are not reused later.
-        check_undef_and_fill(ary, 1:10)
+        @test check_undef_and_fill(ary, 1:10)
         # Check if the memory grown at the end are zerod
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 11:20)
+        _growend!(ary, 10)
+        @test check_undef_and_fill(ary, 11:20)
         # Make sure the content of the memory deleted at the end are not reused
-        ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 5)
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 5)
-        check_undef_and_fill(ary, 16:20)
+        _deleteend!(ary, 5)
+        _growend!(ary, 5)
+        @test check_undef_and_fill(ary, 16:20)
 
         # Now check grow/del_end
         ary = Vector{T}(undef, 1010)
-        check_undef_and_fill(ary, 1:1010)
+        @test check_undef_and_fill(ary, 1:1010)
         # This del_beg should move the buffer
-        ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 1000)
-        ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 1000)
-        check_undef_and_fill(ary, 1:1000)
+        _deletebeg!(ary, 1000)
+        _growbeg!(ary, 1000)
+        @test check_undef_and_fill(ary, 1:1000)
         ary = Vector{T}(undef, 1010)
-        check_undef_and_fill(ary, 1:1010)
+        @test check_undef_and_fill(ary, 1:1010)
         # This del_beg should not move the buffer
-        ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10)
-        ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 1:10)
+        _deletebeg!(ary, 10)
+        _growbeg!(ary, 10)
+        @test check_undef_and_fill(ary, 1:10)
 
         ary = Vector{T}(undef, 1010)
-        check_undef_and_fill(ary, 1:1010)
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 1011:1020)
-        ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 10)
-        ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 1:10)
+        @test check_undef_and_fill(ary, 1:1010)
+        _growend!(ary, 10)
+        @test check_undef_and_fill(ary, 1011:1020)
+        _deleteend!(ary, 10)
+        _growbeg!(ary, 10)
+        @test check_undef_and_fill(ary, 1:10)
 
         # Make sure newly malloc'd buffers are filled with 0
         # test this for a few different sizes since we need to make sure
@@ -4357,33 +4545,60 @@ for T in (Any, ValueWrapper)
             GC.gc()
             GC.gc()
             GC.gc()
-            ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4)
-            ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 4)
-            ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, n)
-            ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4)
-            check_undef_and_fill(ary, 1:(2n + 4))
+            _growbeg!(ary, 4)
+            _deletebeg!(ary, 4)
+            _growend!(ary, n)
+            _growbeg!(ary, 4)
+            @test check_undef_and_fill(ary, 1:(2n + 4))
         end
 
         ary = Vector{T}(undef, 100)
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10000)
+        _growend!(ary, 10000)
         ary[:] = 1:length(ary)
-        ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10000)
+        _deletebeg!(ary, 10000)
         # grow on the back until a buffer reallocation happens
         cur_ptr = pointer(ary)
         while cur_ptr == pointer(ary)
             len = length(ary)
-            ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10)
-            for i in (len + 1):(len + 10)
-                @test !isassigned(ary, i)
-            end
+            _growend!(ary, 10)
+            result = @test all(i -> !isassigned(ary, i), (len + 1):(len + 10))
+            result isa Test.Pass || break
         end
 
-        ary = Vector{T}(undef, 100)
-        ary[:] = 1:length(ary)
-        ccall(:jl_array_grow_at, Cvoid, (Any, Csize_t, Csize_t), ary, 50, 10)
-        for i in 51:60
-            @test !isassigned(ary, i)
-        end
+        # growat when copy into start of same buffer
+        ary = Vector{T}(undef, 10)
+        ary[:] = 1:10
+        pushfirst!(ary, 0)
+        Base._growat!(ary, 3, 5)
+        @test all(i -> !isassigned(ary, i), 3:7)
+        @test all(i -> isassigned(ary, i), 8:length(ary))
+        @test all(i -> isassigned(ary, i), 1:2)
+
+        # growat when copy into end of same buffer
+        ary = Vector{T}(undef, 10)
+        ary[:] = 1:10
+        push!(ary, 11)
+        Base._growat!(ary, 6, 10)
+        @test all(i -> !isassigned(ary, i), 6:15)
+        @test all(i -> isassigned(ary, i), 16:length(ary))
+        @test all(i -> isassigned(ary, i), 1:5)
+
+        # growat when copy to new buffer
+        ary = Vector{T}(undef, 10)
+        ary[:] = 1:10
+        Base._growat!(ary, 6, 10)
+        @test all(i -> !isassigned(ary, i), 6:15)
+        @test all(i -> isassigned(ary, i), 16:length(ary))
+        @test all(i -> isassigned(ary, i), 1:5)
+    end
+end
+
+#test grow_end ccall directly since it's used in the C source
+for ET in [Nothing, Int, Union{Int, Nothing}, Any]
+    for n in [0, 1, 10]
+        arr = Vector{ET}(undef, n)
+        ccall(:jl_array_grow_end, Cvoid, (Any, UInt), arr, 1)
+        @test length(arr) == n+1
     end
 end
 
@@ -4469,8 +4684,13 @@ end
 # Make sure arrayset can handle `Array{T}` (where `T` is a type and not a
 # `TypeVar`) without crashing
 let
-    function arrayset_unknown_dim(::Type{T}, n) where T
-        Base.arrayset(true, reshape(Vector{T}(undef, 1), fill(1, n)...), 2, 1)
+    @noinline function arrayset_unknown_dim(::Type{T}, n) where T
+        a = Vector{T}(undef, 1)
+        fill!(a, 0)
+        a = reshape(a, fill(1, n)...)::Array{T}
+        @test a[1] === 0
+        Core.memoryrefset!(a.ref, 2, :not_atomic, true)
+        @test a[1] === 2
     end
     arrayset_unknown_dim(Any, 1)
     arrayset_unknown_dim(Any, 2)
@@ -4480,88 +4700,6 @@ let
     arrayset_unknown_dim(Int, 3)
 end
 
-module TestSharedArrayResize
-using Test
-# Attempting to change the shape of a shared array should unshare it and
-# not modify the original data
-function test_shared_array_resize(::Type{T}) where T
-    len = 100
-    a = Vector{T}(undef, len)
-    function test_unshare(f)
-        a′ = reshape(reshape(a, (len ÷ 2, 2)), len)
-        a[:] = 1:length(a)
-        # The operation should fail on the owner shared array
-        # and has no side effect.
-        @test_throws ErrorException f(a)
-        @test a == [1:len;]
-        @test a′ == [1:len;]
-        @test pointer(a) == pointer(a′)
-        # The operation should pass on the non-owner shared array
-        # and should unshare the arrays with no effect on the original one.
-        f(a′)
-        @test a == [1:len;]
-        @test pointer(a) != pointer(a′)
-    end
-
-    test_unshare(a->ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->deleteat!(a, 10))
-    test_unshare(a->deleteat!(a, 90))
-    test_unshare(a->ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->insert!(a, 10, 10))
-    test_unshare(a->insert!(a, 90, 90))
-end
-test_shared_array_resize(Int)
-test_shared_array_resize(Any)
-end
-
-module TestArrayNUL
-using Test
-function check_nul(a::Vector{UInt8})
-    b = ccall(:jl_array_cconvert_cstring,
-              Ref{Vector{UInt8}}, (Vector{UInt8},), a)
-    @test unsafe_load(pointer(b), length(b) + 1) == 0x0
-    return b === a
-end
-
-a = UInt8[]
-b = "aaa"
-c = [0x2, 0x1, 0x3]
-
-@test check_nul(a)
-@test check_nul(unsafe_wrap(Vector{UInt8},b))
-@test check_nul(c)
-d = [0x2, 0x1, 0x3]
-@test check_nul(d)
-push!(d, 0x3)
-@test check_nul(d)
-push!(d, 0x3)
-@test check_nul(d)
-ccall(:jl_array_del_end, Cvoid, (Any, UInt), d, 2)
-@test check_nul(d)
-ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 1)
-@test check_nul(d)
-ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 1)
-@test check_nul(d)
-ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 10)
-@test check_nul(d)
-ccall(:jl_array_del_beg, Cvoid, (Any, UInt), d, 8)
-@test check_nul(d)
-ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), d, 8)
-@test check_nul(d)
-ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), d, 8)
-@test check_nul(d)
-f = unsafe_wrap(Array, pointer(d), length(d))
-@test !check_nul(f)
-f = unsafe_wrap(Array, ccall(:malloc, Ptr{UInt8}, (Csize_t,), 10), 10, own = true)
-@test !check_nul(f)
-end
-
 # Copy of `#undef`
 copyto!(Vector{Any}(undef, 10), Vector{Any}(undef, 10))
 function test_copy_alias(::Type{T}) where T
@@ -4635,8 +4773,28 @@ end
 @test Macro_Yielding_Global_Assignment.x == 2
 
 # issue #15718
-@test :(f($NaN)) == :(f($NaN))
-@test isequal(:(f($NaN)), :(f($NaN)))
+function compare_test(x, y)
+    lx = Meta.lower(@__MODULE__, x)
+    ly = Meta.lower(@__MODULE__, y)
+    if isequal(x, y)
+        @test x == y
+        @test hash(x) == hash(y)
+        @test isequal(lx, ly)
+        @test lx == ly
+        @test hash(lx) == hash(ly)
+        true
+    else
+        @test x != y
+        @test !isequal(lx, ly)
+        @test lx != ly
+        false
+    end
+end
+@test compare_test(:(f($NaN)), :(f($NaN)))
+@test !compare_test(:(1 + (1 * 1)), :(1 + (1 * 1.0)))
+@test compare_test(:(1 + (1 * $NaN)), :(1 + (1 * $NaN)))
+@test compare_test(QuoteNode(NaN), QuoteNode(NaN))
+@test !compare_test(QuoteNode(1), QuoteNode(1.0))
 
 # PR #16011 Make sure dead code elimination doesn't delete push and pop
 # of metadata
@@ -4901,6 +5059,9 @@ let ft = Base.datatype_fieldtypes
     @test !isdefined(ft(B12238.body.body)[1], :instance)  # has free type vars
 end
 
+# issue #54969
+@test !isdefined(Memory.body, :instance)
+
 # `where` syntax in constructor definitions
 (A12238{T} where T<:Real)(x) = 0
 @test A12238{<:Real}(0) == 0
@@ -4956,7 +5117,7 @@ function trigger14878()
     w.ext[:14878] = B14878(junk)  # global junk not defined!
     return w
 end
-@test_throws UndefVarError(:junk) trigger14878()
+@test_throws UndefVarError(:junk, @__MODULE__) trigger14878()
 
 # issue #1090
 function f1090(x)::Int
@@ -5037,7 +5198,7 @@ function f16340(x::T) where T
     return g
 end
 let g = f16340(1)
-    @test isa(typeof(g).name.mt.defs.sig, UnionAll)
+    @test isa(only(methods(g)).sig, UnionAll)
 end
 
 # issue #16793
@@ -5196,9 +5357,9 @@ let x = 1
     @noinline g18444(a) = (x += 1; a[])
     f18444_1(a) = invoke(sin, Tuple{Int}, g18444(a))
     f18444_2(a) = invoke(sin, Tuple{Integer}, g18444(a))
-    @test_throws ErrorException("invoke: argument type error") f18444_1(Ref{Any}(1.0))
+    @test_throws "TypeError: in invoke: argument type error, expected" f18444_1(Ref{Any}(1.0))
     @test x == 2
-    @test_throws ErrorException("invoke: argument type error") f18444_2(Ref{Any}(1.0))
+    @test_throws "TypeError: in invoke: argument type error, expected" f18444_2(Ref{Any}(1.0))
     @test x == 3
     @test f18444_1(Ref{Any}(1)) === sin(1)
     @test x == 4
@@ -5274,9 +5435,9 @@ GC.enable(true)
 @test isa(which(bad_tvars, ()), Method)
 @test bad_tvars() === 1
 @test_warn "declares type variable T but does not use it" @eval bad_tvars2() where {T} = T
-@test_throws UndefVarError(:T) bad_tvars2()
+@test_throws UndefVarError(:T, :static_parameter) bad_tvars2()
 missing_tvar(::T...) where {T} = T
-@test_throws UndefVarError(:T) missing_tvar()
+@test_throws UndefVarError(:T, :static_parameter) missing_tvar()
 @test missing_tvar(1) === Int
 @test missing_tvar(1, 2, 3) === Int
 @test_throws MethodError missing_tvar(1, 2, "3")
@@ -5403,6 +5564,21 @@ function g37690()
 end
 @test g37690().x === 0
 
+# issue #48889
+function f48889()
+    let j=0, f, i
+        while j < 3
+            i = j + 1
+            if j == 0
+                f = ()->i
+            end
+            j += 1
+        end
+        f
+    end
+end
+@test f48889()() == 3
+
 function _assigns_and_captures_arg(a)
     a = a
     return ()->a
@@ -5513,77 +5689,135 @@ struct A16424
     x
     y
 end
+const orig_A16424 = A16424
 
 struct A16424  # allowed
     x
     y
 end
+@test A16424 === orig_A16424
 
-@test_throws ErrorException @eval struct A16424
+struct A16424
     x
     z
 end
+@test A16424 !== orig_A16424
+const A16424 = orig_A16424
 
-@test_throws ErrorException @eval struct A16424
+struct A16424
     x
     y::Real
 end
+@test A16424 !== orig_A16424
+const A16424 = orig_A16424
 
 struct B16424{T}
     a
 end
+const orig_B16424 = B16424
 
 struct B16424{T}
     a
 end
+@test B16424 === orig_B16424
 
-@test_throws ErrorException @eval struct B16424{S}
+struct B16424{S}
     a
 end
+@test B16424 !== orig_B16424
 
 struct C16424{T,S}
     x::T
     y::S
 end
+const orig_C16424 = C16424
 
 struct C16424{T,S}
     x::T
     y::S
 end
+@test C16424 === orig_C16424
 
-@test_throws ErrorException @eval struct C16424{T,S}
+struct C16424{T,S}
     x::S
     y::T
 end
+@test C16424 !== orig_C16424
 
 struct D16424{T<:Real,S<:T}
     x::Vector{S}
     y::Vector{T}
 end
+const orig_D16424 = D16424
 
 struct D16424{T<:Real,S<:T}
     x::Vector{S}
     y::Vector{T}
 end
+@test D16424 === orig_D16424
 
-@test_throws ErrorException struct D16424{T<:Real,S<:Real}
+struct D16424{T<:Real,S<:Real}
     x::Vector{S}
     y::Vector{T}
 end
+@test D16424 !== orig_D16424
 
 # issue #20999, allow more type redefinitions
 struct T20999
     x::Array{T} where T<:Real
 end
+const orig_T20999 = T20999
 
 struct T20999
     x::Array{T} where T<:Real
 end
+@test T20999 === orig_T20999
 
-@test_throws ErrorException struct T20999
+struct T20999
     x::Array{T} where T<:Integer
 end
+@test T20999 !== orig_T20999
+
+# issue #54757, type redefinitions with recursive reference in supertype
+struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A,N}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+const orig_T54757 = T54757
+
+struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A,N}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+# The type is identical - either answer is semantically allowed here
+# However, knowing that the type is identical would require reasoning about the purity of the
+# field definitions exprs, which we do not do. Thus, simply check that this doesn't error and
+# then reset to the original for the next test.
+const T54757 = orig_T54757
+
+struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+@test orig_T54757 !== T54757
+
+# Type redefinition with multiple tvars and reference in the field types
+struct DictLike{K, V} <: AbstractDict{K, V}
+    self::DictLike{K, V}
+end
+const orig_DictLike = DictLike
+
+struct DictLike{K, V} <: AbstractDict{K, V}
+    self::DictLike{K, V}
+end
+# It is semantically allowable to re-use the old type, but we need to
+# make sure in either case that the field type matches the definition
+@test fieldtype(DictLike, 1) === DictLike
 
+# initialization of Vector{Core.TypeofBottom}
 let a = Vector{Core.TypeofBottom}(undef, 2)
     @test a[1] == Union{}
     @test a == [Union{}, Union{}]
@@ -5650,6 +5884,13 @@ let ni128 = sizeof(FP128test) ÷ sizeof(Int),
     @test reinterpret(UInt128, arr[2].fp) == expected
 end
 
+# make sure VecElement Tuple has the C alignment and ABI for supported types
+primitive type Int24 24 end
+@test Base.datatype_alignment(NTuple{10,VecElement{Int16}}) == 32
+@test Base.datatype_alignment(NTuple{10,VecElement{Int24}}) == 4
+@test Base.datatype_alignment(NTuple{10,VecElement{Int64}}) == 128
+@test Base.datatype_alignment(NTuple{10,VecElement{Int128}}) == 256
+
 # issue #21516
 struct T21516
     x::Vector{Float64}
@@ -5877,7 +6118,7 @@ function f_unused_undefined_sp(::T...) where T
     T
     return 0
 end
-@test_throws UndefVarError(:T) f_unused_undefined_sp()
+@test_throws UndefVarError(:T, :static_parameter) f_unused_undefined_sp()
 
 # note: the constant `5` here should be > DataType.ninitialized.
 # This tests that there's no crash due to accessing Type.body.layout.
@@ -5990,7 +6231,6 @@ module GlobalDef18933
         global sincos
         nothing
     end
-    @test which(@__MODULE__, :sincos) === Base.Math
     @test @isdefined sincos
     @test sincos === Base.sincos
 end
@@ -6019,10 +6259,10 @@ const unboxedunions = [Union{Int8, Nothing},
 @test Base.isbitsunion(unboxedunions[2])
 @test Base.isbitsunion(unboxedunions[3])
 
-@test Base.bitsunionsize(unboxedunions[1]) == 1
-@test Base.bitsunionsize(unboxedunions[2]) == 2
-@test Base.bitsunionsize(unboxedunions[3]) == 16
-@test Base.bitsunionsize(unboxedunions[4]) == 8
+@test Base.aligned_sizeof(unboxedunions[1]) == 1
+@test Base.aligned_sizeof(unboxedunions[2]) == 2
+@test Base.aligned_sizeof(unboxedunions[3]) == 16
+@test Base.aligned_sizeof(unboxedunions[4]) == 8
 
 @test sizeof(unboxedunions[1]) == 1
 @test sizeof(unboxedunions[2]) == 2
@@ -6230,6 +6470,16 @@ let
     @test_throws ArgumentError unsafe_wrap(Array, convert(Ptr{Union{Int, Nothing}}, pointer(A5)), 6)
 end
 
+# More unsafe_wrap
+let
+    a = [1, 2, 3]
+    GC.@preserve a begin
+        m = unsafe_wrap(Memory{Int}, pointer(a), (3,))
+        @test m == a
+        @test m isa Memory{Int}
+    end
+end
+
 # copyto!
 A23567 = Vector{Union{Float64, Nothing}}(undef, 5)
 B23567 = collect(Union{Float64, Nothing}, 1.0:3.0)
@@ -6330,7 +6580,7 @@ for U in unboxedunions
             resize!(A, len)
             @test length(A) === len
             @test A[1] === initvalue2(F2)
-            @test typeof(A[end]) === F
+            @test typeof(A[end]) === F2
 
             # deleteat!
             F = Base.uniontypes(U)[2]
@@ -6418,304 +6668,288 @@ for U in unboxedunions
     end
 end
 
-@testset "jl_array_grow_at_end" begin
+@testset "array _growatend!" begin
 
 # start w/ array, set & check elements, grow it, check that elements stayed correct, set & check elements
 A = Vector{Union{Missing, UInt8}}(undef, 2)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
 
-# grow_at_end 2
 resize!(A, 5)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+# The rest of the values are unspecified
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
+@test isequal(A, [0x01, missing, 0x03, missing, 0x05])
 
 # grow_at_end 1
 Base._growat!(A, 4, 1)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
-
-Base.arrayset(true, A, missing, 1)
-Base.arrayset(true, A, 0x02, 2)
-Base.arrayset(true, A, missing, 3)
-Base.arrayset(true, A, 0x04, 4)
-Base.arrayset(true, A, missing, 5)
-Base.arrayset(true, A, 0x06, 6)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x04
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x06
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+#A[4] is unspecified
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
+
+setindex!(A, missing, 1)
+setindex!(A, 0x02, 2)
+setindex!(A, missing, 3)
+setindex!(A, 0x04, 4)
+setindex!(A, missing, 5)
+setindex!(A, 0x06, 6)
+@test isequal(A, [missing, 0x2, missing, 0x4, missing, 0x6])
 
 # grow_at_end 5
 Base._growat!(A, 4, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x04
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x06
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x02
+@test getindex(A, 3) === missing
+#A[4] is unspecified
+@test getindex(A, 5) === 0x04
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x06
 
 # grow_at_end 6
 resize!(A, 8)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x04
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x06
-@test Base.arrayref(true, A, 8) === missing
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x02
+@test getindex(A, 3) === missing
+# A[4] still unspecified
+@test getindex(A, 5) === 0x04
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x06
+# A[8] is unspecified but test that it exists
+@test getindex(A, 8) isa Any
 
 # grow_at_end 4
 resize!(A, 1048576)
 resize!(A, 1048577)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x04
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x06
-@test Base.arrayref(true, A, 8) === missing
-foreach(9:1048577) do i
-    @test Base.arrayref(true, A, i) === missing
-end
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x02
+@test getindex(A, 3) === missing
+# A[4] is stil still unspecified
+@test getindex(A, 5) === 0x04
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x06
+@test getindex(A, 8) === missing
+# 9:1048577 are unspecified
 foreach(9:1048577) do i
-    Base.arrayset(true, A, i % UInt8, i)
-    @test Base.arrayref(true, A, i) === i % UInt8
+    setindex!(A, i % UInt8, i)
+    @test getindex(A, i) === i % UInt8
 end
 
 # grow_at_end 3
 A = Vector{Union{Missing, UInt8}}(undef, 1048577)
 foreach(1:1048577) do i
-    @test Base.arrayref(true, A, i) === missing
-    Base.arrayset(true, A, i % UInt8, i)
-    @test Base.arrayref(true, A, i) === i % UInt8
+    @test getindex(A, i) === missing
+    setindex!(A, i % UInt8, i)
+    @test getindex(A, i) === i % UInt8
 end
 Base._growat!(A, 1048576, 1)
 @test length(A) == 1048578
 foreach(1:1048575) do i
-    @test Base.arrayref(true, A, i) === i % UInt8
+    @test getindex(A, i) === i % UInt8
     @test A[i] === i % UInt8
 end
-@test Base.arrayref(true, A, 1048576) === missing
-@test Base.arrayref(true, A, 1048577) === 1048576 % UInt8
-@test Base.arrayref(true, A, 1048578) === 1048577 % UInt8
+@test getindex(A, 1048576) === missing
+@test getindex(A, 1048577) === 1048576 % UInt8
+@test getindex(A, 1048578) === 1048577 % UInt8
 
 end # @testset
 
-@testset "jl_array_grow_at_beg" begin
+@testset "array _growatbeg!" begin
 
 # grow_at_beg 4
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 1, 1)
 
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x01
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x03
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x01
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x03
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
 
 # grow_at_beg 2
 Base._growat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x01
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x03
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x01
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x03
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x05
 
 # grow_at_beg 1
 Base._growat!(A, 2, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x01
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x03
-@test Base.arrayref(true, A, 7) === missing
-@test Base.arrayref(true, A, 8) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x01
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x03
+@test getindex(A, 7) === missing
+@test getindex(A, 8) === 0x05
 
 # grow_at_beg 9
 Base._growat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x01
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x03
-@test Base.arrayref(true, A, 8) === missing
-@test Base.arrayref(true, A, 9) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x01
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x03
+@test getindex(A, 8) === missing
+@test getindex(A, 9) === 0x05
 
 # grow_at_beg 8
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 2, 1)
 Base._growat!(A, 2, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x03
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x03
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x05
 
 # grow_at_beg 5
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 4, 1)
 Base._growat!(A, 4, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x05
 
 # grow_at_beg 6
 Base._growat!(A, 2, 3)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x03
-@test Base.arrayref(true, A, 7) === missing
-@test Base.arrayref(true, A, 8) === missing
-@test Base.arrayref(true, A, 9) === missing
-@test Base.arrayref(true, A, 10) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 6) === 0x03
+@test getindex(A, 7) === missing
+@test getindex(A, 8) === missing
+@test getindex(A, 9) === missing
+@test getindex(A, 10) === 0x05
 
 # grow_at_beg 3
 A = Vector{Union{Missing, UInt8}}(undef, 1048577)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 2, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x03
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x03
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
 
 foreach(7:length(A)) do i
-    @test Base.arrayref(true, A, i) === missing
-    Base.arrayset(true, A, i % UInt8, i)
-    @test Base.arrayref(true, A, i) === i % UInt8
+    @test getindex(A, i) === missing
+    setindex!(A, i % UInt8, i)
+    @test getindex(A, i) === i % UInt8
 end
 
 end # @testset
 
-@testset "jl_array_del_at_beg" begin
+@testset "array _deleteatbeg!" begin
 
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._deleteat!(A, 2, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === 0x03
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === 0x03
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x05
 
 Base._deleteat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === 0x03
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x05
+@test getindex(A, 1) === 0x03
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x05
 
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x01
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x03
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x01
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x03
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
 Base._deleteat!(A, 2, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x05
 Base._deleteat!(A, 1, 2)
-@test Base.arrayref(true, A, 1) === 0x03
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x05
+@test getindex(A, 1) === 0x03
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x05
 Base._deleteat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x05
 
 end # @testset
 
-@testset "jl_array_del_at_end" begin
+@testset "array _deleteatend!" begin
 
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._deleteat!(A, 5, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+@test getindex(A, 4) === missing
 
 Base._deleteat!(A, 3, 1)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
 
 end # @testset
 
@@ -6737,23 +6971,23 @@ end
 
 # jl_array_shrink
 let A=Vector{Union{UInt8, Missing}}(undef, 1048577)
-    Base.arrayset(true, A, 0x01, 1)
-    Base.arrayset(true, A, missing, 2)
-    Base.arrayset(true, A, 0x03, 3)
-    Base.arrayset(true, A, missing, 4)
-    Base.arrayset(true, A, 0x05, 5)
+    setindex!(A, 0x01, 1)
+    setindex!(A, missing, 2)
+    setindex!(A, 0x03, 3)
+    setindex!(A, missing, 4)
+    setindex!(A, 0x05, 5)
     deleteat!(A, 6:1048577)
-    @test Base.arrayref(true, A, 1) === 0x01
-    @test Base.arrayref(true, A, 2) === missing
-    @test Base.arrayref(true, A, 3) === 0x03
-    @test Base.arrayref(true, A, 4) === missing
-    @test Base.arrayref(true, A, 5) === 0x05
+    @test getindex(A, 1) === 0x01
+    @test getindex(A, 2) === missing
+    @test getindex(A, 3) === 0x03
+    @test getindex(A, 4) === missing
+    @test getindex(A, 5) === 0x05
     sizehint!(A, 5)
-    @test Base.arrayref(true, A, 1) === 0x01
-    @test Base.arrayref(true, A, 2) === missing
-    @test Base.arrayref(true, A, 3) === 0x03
-    @test Base.arrayref(true, A, 4) === missing
-    @test Base.arrayref(true, A, 5) === 0x05
+    @test getindex(A, 1) === 0x01
+    @test getindex(A, 2) === missing
+    @test getindex(A, 3) === 0x03
+    @test getindex(A, 4) === missing
+    @test getindex(A, 5) === 0x05
 end
 
 # copyto!/vcat w/ internal padding
@@ -6771,14 +7005,14 @@ primitive type TypeWith24Bits 24 end
 TypeWith24Bits(x::UInt32) = Core.Intrinsics.trunc_int(TypeWith24Bits, x)
 let x = TypeWith24Bits(0x112233), y = TypeWith24Bits(0x445566), z = TypeWith24Bits(0x778899)
     a = [x, x]
-    Core.arrayset(true, a, y, 2)
+    Core.memoryrefset!(Core.memoryrefnew(a.ref, 2, true), y, :not_atomic, true)
     @test a == [x, y]
     a[2] = z
     @test a == [x, z]
     @test pointer(a, 2) - pointer(a, 1) == 4
 
     b = [(x, x), (x, x)]
-    Core.arrayset(true, b, (x, y), 2)
+    Core.memoryrefset!(Core.memoryrefnew(b.ref, 2, true), (x, y), :not_atomic, true)
     @test b == [(x, x), (x, y)]
     b[2] = (y, z)
     @test b == [(x, x), (y, z)]
@@ -6880,7 +7114,7 @@ end
 # issue #21004
 const PTuple_21004{N,T} = NTuple{N,VecElement{T}}
 @test_throws ArgumentError("too few elements for tuple type $PTuple_21004") PTuple_21004(1)
-@test_throws UndefVarError(:T) PTuple_21004_2{N,T} = NTuple{N, VecElement{T}}(1)
+@test_throws UndefVarError(:T, :static_parameter) PTuple_21004_2{N,T} = NTuple{N, VecElement{T}}(1)
 
 #issue #22792
 foo_22792(::Type{<:Union{Int8,Int,UInt}}) = 1;
@@ -7007,7 +7241,7 @@ translate27368(::Type{Val{name}}) where {name} =
 # issue #27456
 @inline foo27456() = try baz_nonexistent27456(); catch; nothing; end
 bar27456() = foo27456()
-@test bar27456() == nothing
+@test bar27456() === nothing
 
 # issue #27365
 mutable struct foo27365
@@ -7178,7 +7412,7 @@ end
 c28399 = 42
 @test g28399(0)() == 42
 @test g28399(1)() == 42
-@test_throws UndefVarError(:__undef_28399__) f28399()
+@test_throws UndefVarError(:__undef_28399__, @__MODULE__) f28399()
 
 # issue #28445
 mutable struct foo28445
@@ -7201,9 +7435,23 @@ end
 @test repackage28445()
 
 # issue #28597
-@test_throws ArgumentError Array{Int, 2}(undef, 0, -10)
-@test_throws ArgumentError Array{Int, 2}(undef, -10, 0)
-@test_throws ArgumentError Array{Int, 2}(undef, -1, -1)
+@test_throws ArgumentError Matrix{Int}(undef, 0, -10)
+@test_throws ArgumentError Matrix{Int}(undef, -10, 0)
+@test_throws ArgumentError Matrix{Int}(undef, -1, -1)
+
+# issue #54244
+# test that zero sized array doesn't throw even with large axes
+bignum = Int==Int64 ? 2^32 : 2^16
+Array{Int}(undef, 0, bignum, bignum)
+Array{Int}(undef, bignum, bignum, 0)
+Array{Int}(undef, bignum, bignum, 0, bignum, bignum)
+# but also test that it does throw if the axes multiply to a multiple of typemax(UInt)
+@test_throws ArgumentError Array{Int}(undef, bignum, bignum)
+@test_throws ArgumentError Array{Int}(undef, 1, bignum, bignum)
+# also test that we always throw errors for negative dims even if other dims are 0 or the product is positive
+@test_throws ArgumentError Array{Int}(undef, 0, -4, -4)
+@test_throws ArgumentError Array{Int}(undef, -4, 1, 0)
+@test_throws ArgumentError Array{Int}(undef, -4, -4, 1)
 
 # issue #28812
 @test Tuple{Vararg{Array{T} where T,3}} === Tuple{Array,Array,Array}
@@ -7348,6 +7596,7 @@ end
 @test isa(Core.eval(@__MODULE__, :(Bar31062(()))), Bar31062)
 @test precompile(identity, (Foo31062,))
 
+using Core: SSAValue
 ftype_eval = Ref(0)
 FieldTypeA = String
 FieldTypeE = UInt32
@@ -7371,21 +7620,41 @@ let fc = FieldConvert(1.0, [2.0], 0x3, 0x4, 0x5)
 end
 @test ftype_eval[] == 1
 let code = code_lowered(FieldConvert)[1].code
-    @test code[1] == Expr(:call, GlobalRef(Core, :apply_type), GlobalRef(@__MODULE__, :FieldConvert), GlobalRef(@__MODULE__, :FieldTypeA), Expr(:static_parameter, 1))
-    @test code[2] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 1)
-    @test code[7] == Expr(:(=), Core.SlotNumber(10), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(2), Core.SlotNumber(10)))
-    @test code[8] == Core.SlotNumber(10)
-    @test code[9] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 2)
-    @test code[14] == Expr(:(=), Core.SlotNumber(9), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(9), Core.SlotNumber(9)))
-    @test code[15] == Core.SlotNumber(9)
-    @test code[16] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 4)
-    @test code[21] == Expr(:(=), Core.SlotNumber(8), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(16), Core.SlotNumber(8)))
-    @test code[22] == Core.SlotNumber(8)
-    @test code[23] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 5)
-    @test code[28] == Expr(:(=), Core.SlotNumber(7), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(23), Core.SlotNumber(7)))
-    @test code[29] == Core.SlotNumber(7)
-    @test code[30] == Expr(:new, Core.SSAValue(1), Core.SSAValue(8), Core.SSAValue(15), Core.SlotNumber(4), Core.SSAValue(22), Core.SSAValue(29))
-    @test code[31] == Core.ReturnNode(Core.SSAValue(30))
+    calls = Vector{Pair{SSAValue, Expr}}(undef, 0)
+    for i = 1:length(code)
+        expr = code[i]
+        if Meta.isexpr(expr, :call) || (Meta.isexpr(expr, :(=)) && Meta.isexpr(expr.args[2], :call))
+            push!(calls, SSAValue(i)=>expr)
+        end
+    end
+
+    function is_globalref(arg, gr)
+        while isa(arg, SSAValue)
+            arg = code[arg.id]
+        end
+        arg == gr
+    end
+
+    # calls[1]
+    @test all(is_globalref.(calls[1][2].args[1:3], (GlobalRef(Core, :apply_type), GlobalRef(@__MODULE__, :FieldConvert), GlobalRef(@__MODULE__, :FieldTypeA))))
+
+    # calls[2]
+    @test all(is_globalref.(calls[2][2].args[1:1], (GlobalRef(Core, :fieldtype),)))
+    @test all(calls[2][2].args[2:3] .== (calls[1][1], 1))
+
+    # calls[3] - isa
+
+    # calls[4]
+    let calle = calls[4][2]
+        @test Meta.isexpr(calle, :(=))
+        call = calle.args[2]
+        @test is_globalref(call.args[1], GlobalRef(Base, :convert))
+        @test call.args[2] == calls[2][1]
+    end
+
+    # calls[5]
+    @test all(is_globalref.(calls[5][2].args[1:1], (GlobalRef(Core, :fieldtype),)))
+    @test all(calls[5][2].args[2:3] .== (calls[1][1], 2))
 end
 
 # Issue #32820
@@ -7452,6 +7721,13 @@ struct A43411{S, T}
 end
 @test isbitstype(A43411{(:a,), Tuple{Int}})
 
+# issue #55189
+struct A55189{N}
+    children::NTuple{N,A55189{N}}
+end
+@test fieldtype(A55189{2}, 1) === Tuple{A55189{2}, A55189{2}}
+@assert !isbitstype(A55189{2})
+
 # issue #44614
 struct T44614_1{T}
     m::T
@@ -7497,16 +7773,11 @@ function f34482()
     Base.not_int("ABC")
     1
 end
-function g34482()
-    Core.Intrinsics.arraylen(1)
-    1
-end
 function h34482()
     Core.Intrinsics.bitcast(1, 1)
     1
 end
 @test_throws ErrorException f34482()
-@test_throws TypeError g34482()
 @test_throws TypeError h34482()
 
 struct NFANode34126
@@ -7544,29 +7815,48 @@ end
 struct T36104   # check that redefining it works, issue #21816
     v::Vector{T36104}
 end
-# with a gensymmed unionall
-struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
+struct S36104{K,V}
+    v::S36104{K,V}
+    S36104{K,V}() where {K,V} = new()
+    S36104{K,V}(x::S36104) where {K,V} = new(x)
+end
+@test !isdefined(Base.unwrap_unionall(Base.ImmutableDict).name, :partial)
+@test !isdefined(S36104.body.body.name, :partial)
+@test hasfield(typeof(S36104.body.body.name), :partial)
+struct S36104{K,V}   # check that redefining it works
+    v::S36104{K,V}
+    S36104{K,V}() where {K,V} = new()
+    S36104{K,V}(x::S36104) where {K,V} = new(x)
+end
+
+# with a gensymmed unionall (#39778)
+struct Symmetric39778{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     data::S
     uplo::Char
 end
-struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
+const orig_Symmetric39778 = Symmetric39778
+struct Symmetric39778{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     data::S
     uplo::Char
 end
-@test_throws ErrorException begin
-    struct Symmetric{T,S<:AbstractMatrix{T}} <: AbstractMatrix{T}
-        data::S
-        uplo::Char
-    end
-end
+@test Symmetric39778 === orig_Symmetric39778
+struct Symmetric39778{T,S<:AbstractMatrix{T}} <: AbstractMatrix{T}
+    data::S
+    uplo::Char
 end
+@test Symmetric39778 !== orig_Symmetric39778
+
+end # module M36104
+
 @test fieldtypes(M36104.T36104) == (Vector{M36104.T36104},)
 @test_throws ErrorException("expected") @eval(struct X36104; x::error("expected"); end)
-@test @isdefined(X36104)
+@test !@isdefined(X36104)
 struct X36104; x::Int; end
 @test fieldtypes(X36104) == (Int,)
 primitive type P36104 8 end
-@test_throws ErrorException("invalid redefinition of constant $(nameof(curmod)).P36104") @eval(primitive type P36104 16 end)
+const orig_P36104 = P36104
+primitive type P36104 16 end
+@test P36104 !== orig_P36104
 
 # Malformed invoke
 f_bad_invoke(x::Int) = invoke(x, (Any,), x)
@@ -7707,13 +7997,17 @@ struct ContainsPointerNopadding{T}
 end
 
 @test !Base.datatype_haspadding(PointerNopadding{Symbol})
+@test Base.datatype_isbitsegal(PointerNopadding{Int})
 @test !Base.datatype_haspadding(PointerNopadding{Int})
+@test Base.datatype_isbitsegal(PointerNopadding{Int})
 # Sanity check to make sure the meaning of haspadding didn't change.
-@test Base.datatype_haspadding(PointerNopadding{Any})
+@test !Base.datatype_haspadding(PointerNopadding{Any})
+@test !Base.datatype_isbitsegal(PointerNopadding{Any})
 @test !Base.datatype_haspadding(Tuple{PointerNopadding{Symbol}})
 @test !Base.datatype_haspadding(Tuple{PointerNopadding{Int}})
 @test !Base.datatype_haspadding(ContainsPointerNopadding{Symbol})
-@test Base.datatype_haspadding(ContainsPointerNopadding{Int})
+@test !Base.datatype_haspadding(ContainsPointerNopadding{Int})
+@test !Base.datatype_isbitsegal(ContainsPointerNopadding{Int})
 
 # Test the codegen optimized version as well as the unoptimized version of `jl_egal`
 @noinline unopt_jl_egal(@nospecialize(a), @nospecialize(b)) =
@@ -7859,7 +8153,10 @@ end
     setglobal!(m, :x, 2, :release)
     @test m.x === 2
     @test_throws ConcurrencyViolationError setglobal!(m, :x, 3, :not_atomic)
-    @test_throws ErrorException setglobal!(m, :x, 4., :release)
+    @test_throws TypeError setglobal!(m, :x, 4., :release)
+
+    f_set_bad_type(m) = setglobal!(m, :x, 4., :release)
+    @test_throws TypeError f_set_bad_type(m)
 
     m.x = 1
     @test m.x === 1
@@ -7966,14 +8263,14 @@ code_typed(f47476, (Int, Int, Vararg{Union{Int, NTuple{2,Int}}},))
 code_typed(f47476, (Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},))
 code_typed(f47476, (Int, Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},))
 @test f47476(1, 2, 3, 4, 5, 6, (7, 8)) === 2
-@test_throws UndefVarError(:N) f47476(1, 2, 3, 4, 5, 6, 7)
+@test_throws UndefVarError(:N, :static_parameter) f47476(1, 2, 3, 4, 5, 6, 7)
 
 vect47476(::Type{T}) where {T} = T
 @test vect47476(Type{Type{Type{Int32}}}) === Type{Type{Type{Int32}}}
 @test vect47476(Type{Type{Type{Int64}}}) === Type{Type{Type{Int64}}}
 
 g47476(::Union{Nothing,Int,Val{T}}...) where {T} = T
-@test_throws UndefVarError(:T) g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5)
+@test_throws UndefVarError(:T, :static_parameter) g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5)
 @test g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5, Val(6)) === 6
 let spec = only(methods(g47476)).specializations::Core.SimpleVector
     @test !isempty(spec)
@@ -8000,12 +8297,11 @@ for T in (Int, String, Symbol, Module)
     @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T},)))
     @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{T,T},)))
     @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T,T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Ref{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{Ref{T}},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{Vector{T}},)))
 end
-@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Ref{Int},)))
-@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Tuple{Ref{Int}},)))
-# objectid for datatypes is inconsistant for types that have unbound type parameters.
-@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (DataType,)))
-@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Tuple{Vector{Int}},)))
+@test Core.Compiler.is_foldable(Base.infer_effects(objectid, (DataType,)))
 
 # donotdelete should not taint consistency of the containing function
 f_donotdete(x) = (Core.Compiler.donotdelete(x); 1)
@@ -8040,3 +8336,313 @@ bar50293(@nospecialize(u)) = (Base.issingletontype(u.a), baz50293(u.a))
 let u = Union{Type{Union{}}, Type{Any}}, ab = bar50293(u)
     @test ab[1] == ab[2] == false
 end
+
+# `SimpleVector`-operations should be concrete-eval eligible
+@test Core.Compiler.is_foldable(Base.infer_effects(length, (Core.SimpleVector,)))
+@test Core.Compiler.is_foldable(Base.infer_effects(getindex, (Core.SimpleVector,Int)))
+
+# Test that a the lowering of nothrow globalref
+module WellKnownGlobal
+    global well_known = 1
+end
+macro insert_global()
+    Expr(:call, GlobalRef(Base, :println), GlobalRef(WellKnownGlobal, :well_known))
+end
+check_globalref_lowering() = @insert_global
+let src = code_lowered(check_globalref_lowering)[1]
+    @test length(src.code) == 4
+end
+
+# Test correctness of widen_diagonal
+let widen_diagonal(x::UnionAll) = Base.rewrap_unionall(Base.widen_diagonal(Base.unwrap_unionall(x), x), x)
+    @test Tuple{Int,Float64} <: widen_diagonal(NTuple)
+    @test Tuple{Int,Float64} <: widen_diagonal(Tuple{T,T} where {T})
+    @test Tuple{Real,Int,Float64} <: widen_diagonal(Tuple{S,Vararg{T}} where {S, T<:S})
+    @test Tuple{Int,Int,Float64,Float64} <: widen_diagonal(Tuple{S,S,Vararg{T}} where {S, T<:S})
+    @test Union{Tuple{T}, Tuple{T,Int}} where {T} === widen_diagonal(Union{Tuple{T}, Tuple{T,Int}} where {T})
+    @test Tuple === widen_diagonal(Union{Tuple{Vararg{S}}, Tuple{Vararg{T}}} where {S, T})
+    @test Tuple{Vararg{Val{<:Set}}} == widen_diagonal(Tuple{Vararg{T}} where T<:Val{<:Set})
+end
+
+# Test try/catch/else ordering
+function test_try_catch_else()
+    local x
+    try
+        x = 1
+    catch
+        rethrow()
+    else
+        return x
+    end
+end
+@test test_try_catch_else() == 1
+
+# #52433
+@test_throws ErrorException Core.Intrinsics.pointerref(Ptr{Vector{Int64}}(C_NULL), 1, 0)
+
+# #53034 (Union normalization for typevar elimination)
+@test Tuple{Int,Any} <: Tuple{Union{Int,T},T} where {T>:Int}
+@test Tuple{Int,Any} <: Tuple{Union{Int,T},T} where {T>:Integer}
+# #53034 (Union normalization for Type elimination)
+@test Int isa Type{Union{Int,T2} where {T2<:T1}} where {T1}
+@test Int isa Type{Union{Int,T1}} where {T1}
+@test Int isa Union{UnionAll, Type{Union{Int,T2} where {T2<:T1}}} where {T1}
+@test Int isa Union{Union, Type{Union{Int,T1}}} where {T1}
+@test_broken Int isa Union{UnionAll, Type{Union{Int,T2} where {T2<:T1}} where {T1}}
+@test_broken Int isa Union{Union, Type{Union{Int,T1}} where {T1}}
+
+let M = @__MODULE__
+    Core.eval(M, :(global a_typed_global))
+    @test Core.eval(M, :(global a_typed_global::$(Tuple{Union{Integer,Nothing}}))) === nothing
+    @Core.latestworld
+    @test Core.get_binding_type(M, :a_typed_global) === Tuple{Union{Integer,Nothing}}
+    @test Core.eval(M, :(global a_typed_global::$(Tuple{Union{Integer,Nothing}}))) === nothing
+    @test Core.eval(M, :(global a_typed_global::$(Union{Tuple{Integer},Tuple{Nothing}}))) === nothing
+    @test_throws(ErrorException("cannot set type for global $(nameof(M)).a_typed_global. It already has a value or is already set to a different type."),
+                 Core.eval(M, :(global a_typed_global::$(Union{Nothing,Tuple{Union{Integer,Nothing}}}))))
+    @test Core.eval(M, :(global a_typed_global)) === nothing
+    @test Core.get_binding_type(M, :a_typed_global) == Tuple{Union{Integer,Nothing}}
+end
+
+@test Base.unsafe_convert(Ptr{Int}, [1]) !== C_NULL
+
+# Test that new macros are allowed to be defined inside Expr(:toplevel) returned by macros
+macro macroception()
+    Expr(:toplevel, :(macro foo() 1 end), :(@foo))
+end
+
+@test (@macroception()) === 1
+
+# overlay method tables
+# =====================
+
+module OverlayModule
+
+using Base.Experimental: @MethodTable, @overlay
+using Test
+
+@MethodTable mt
+# long function def
+let m = @overlay mt function sin(x::Float64); 1; end
+    @test isa(m, Method)
+end
+# short function def
+@overlay mt cos(x::Float64) = 2
+# parametric function def
+@overlay mt tan(x::T) where {T} = 3
+
+end # module OverlayModule
+
+let ms = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, nothing, 1, Base.get_world_counter())
+    @test only(ms).method.module === Base.Math
+end
+let ms = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, OverlayModule.mt, 1, Base.get_world_counter())
+    @test only(ms).method.module === OverlayModule
+end
+let ms = Base._methods_by_ftype(Tuple{typeof(sin), Int}, OverlayModule.mt, 1, Base.get_world_counter())
+    @test isempty(ms)
+end
+
+# precompilation
+let load_path = mktempdir()
+    depot_path = mkdepottempdir()
+    try
+        pushfirst!(LOAD_PATH, load_path)
+        pushfirst!(DEPOT_PATH, depot_path)
+
+        write(joinpath(load_path, "Foo.jl"),
+            """
+            module Foo
+            Base.Experimental.@MethodTable(mt)
+            Base.Experimental.@overlay mt sin(x::Int) = 1
+            end
+            """)
+
+        # precompiling Foo serializes the overlay method through the `mt` binding in the module
+        Foo = Base.require(Main, :Foo)
+        @test length(Foo.mt) == 1
+
+        write(joinpath(load_path, "Bar.jl"),
+            """
+            module Bar
+            Base.Experimental.@MethodTable(mt)
+            end
+            """)
+
+        write(joinpath(load_path, "Baz.jl"),
+            """
+            module Baz
+            using Bar
+            Base.Experimental.@overlay Bar.mt sin(x::Int) = 1
+            end
+            """)
+
+        # when referring an method table in another module,
+        # the overlay method needs to be discovered explicitly
+        Bar = Base.require(Main, :Bar)
+        @test length(Bar.mt) == 0
+        Baz = Base.require(Main, :Baz)
+        @test length(Bar.mt) == 1
+    finally
+        filter!((≠)(load_path), LOAD_PATH)
+        filter!((≠)(depot_path), DEPOT_PATH)
+        rm(load_path, recursive=true, force=true)
+    end
+end
+
+# merging va tuple unions
+@test Tuple === Union{Tuple{},Tuple{Any,Vararg}}
+@test Tuple{Any,Vararg} === Union{Tuple{Any},Tuple{Any,Any,Vararg}}
+@test Core.Compiler.return_type(Base.front, Tuple{Tuple{Int,Vararg{Int}}}) === Tuple{Vararg{Int}}
+@test Tuple{Vararg{Int}} === Union{Tuple{Int}, Tuple{}, Tuple{Int, Int, Vararg{Int}}}
+@test (Tuple{Vararg{T}} where T) === (Union{Tuple{T, T, Vararg{T}}, Tuple{}, Tuple{T}} where T)
+@test_broken (Tuple{Vararg{T}} where T) === Union{Tuple{T, T, Vararg{T}} where T, Tuple{}, Tuple{T} where T}
+
+@test sizeof(Pair{Union{typeof(Union{}),Nothing}, Union{Type{Union{}},Nothing}}(Union{}, Union{})) == 2
+
+# Make sure that Core.Compiler has enough NamedTuple infrastructure
+# to properly give error messages for basic kwargs...
+Core.eval(Core.Compiler, quote issue50174(;a=1) = a end)
+@test_throws MethodError Core.Compiler.issue50174(;b=2)
+
+let s = mktemp() do path, io
+        xxx = 42
+        redirect_stdout(io) do
+            Base.@assume_effects :nothrow @show xxx
+        end
+        flush(io)
+        read(path, String)
+    end
+    @test strip(s) == "xxx = 42"
+end
+
+# `module` has an implicit world-age increment
+let foo = eval(Expr(:toplevel, :(module BarModuleInc; struct FooModuleInc; end; end), :(BarModuleInc.FooModuleInc())))
+    @Core.latestworld
+    @test foo == BarModuleInc.FooModuleInc()
+end
+
+let
+    eval(:(module BarModuleInc2; module BazModuleInc; struct FooModuleInc; end; end; const foo = BazModuleInc.FooModuleInc(); end))
+    @Core.latestworld
+    @test BarModuleInc2.foo == BarModuleInc2.BazModuleInc.FooModuleInc()
+end
+
+# `toplevel` has implicit world age increment between expansion and evaluation
+macro define_call(sym)
+    Core.eval(__module__, :($sym() = 1))
+    :($sym())
+end
+@test eval(Expr(:toplevel, :(@define_call(f_macro_defined1)))) == 1
+@test @define_call(f_macro_defined2) == 1
+
+# `invoke` of `Method`
+let m = which(+, (Int, Int))
+    @eval f56692(i) = invoke(+, $m, i, 4)
+    global g56692() = f56692(5) == 9 ? "true" : false
+end
+@test @inferred(f56692(3)) == 7
+@test @inferred(g56692()) == "true"
+
+# `invoke` of `CodeInstance`
+f_invalidate_me() = return 1
+f_invoke_me() = return f_invalidate_me()
+@test f_invoke_me() == 1
+const f_invoke_me_ci = Base.specialize_method(Base._which(Tuple{typeof(f_invoke_me)})).cache
+f_call_me() = invoke(f_invoke_me, f_invoke_me_ci)
+@test invoke(f_invoke_me, f_invoke_me_ci) == 1
+@test f_call_me() == 1
+@test_throws TypeError invoke(f_invoke_me, f_invoke_me_ci, 1)
+f_invalidate_me() = 2
+@test_throws ErrorException invoke(f_invoke_me, f_invoke_me_ci)
+@test_throws ErrorException f_call_me()
+
+myfun57023a(::Type{T}) where {T} = (x = @ccall mycfun()::Ptr{T}; x)
+@test only(code_lowered(myfun57023a)).has_fcall
+myfun57023b(::Type{T}) where {T} = (x = @cfunction myfun57023a Ptr{T} (Ref{T},); x)
+@test only(code_lowered(myfun57023b)).has_fcall
+
+# issue #57315
+global flag57315=false
+function f57315()
+    global flag57315
+    if flag57315
+        flag_2=true
+    else
+        if flag_2
+            return 2
+        end
+    end
+    return 1
+end
+@test_throws UndefVarError(:flag_2, :local) f57315()
+
+# issue #57446
+module GlobalAssign57446
+    using Test
+    global theglobal
+    (@__MODULE__).theglobal = 1
+    @test theglobal == 1
+end
+
+# issue #57638 - circular imports
+module M57638
+module I
+    using ..M57638
+end
+using .I
+end
+convert(Core.Binding, GlobalRef(M57638.I, :Base))
+@test M57638.Base === Base
+
+module M57638_2
+module I
+    using ..M57638_2
+    export Base
+end
+using .I
+export Base
+end
+@test M57638_2.Base === Base
+
+module M57638_3
+    module M2
+        using ..M57638_3
+        module M3
+            const x = 1
+            export x
+        end
+        using .M3
+        export x
+    end
+    using .M2
+    export x
+end
+@test M57638_3.x === 1
+
+@testset "no unnecessary methods for comparison functions with generically correct and performant fallback methods" begin
+    @test (isone ∘ length ∘ methods)(>, Tuple{Any, Any})
+    @test (isone ∘ length ∘ methods)(>=, Tuple{Any, Any})
+end
+
+module GlobalBindingMulti
+    module M
+        export S
+        module C
+            export S
+            struct A end
+            S = A() # making S const makes the error go away
+        end
+        using .C
+    end
+
+    using .M
+    using .M.C
+end
+@test GlobalBindingMulti.S === GlobalBindingMulti.M.C.S
+
+#58434 bitsegal comparison of oddly sized fields
+primitive type ByteString58434 (18 * 8) end
+
+@test Base.datatype_isbitsegal(Tuple{ByteString58434}) == false
+@test Base.datatype_haspadding(Tuple{ByteString58434}) == (length(Base.padding(Tuple{ByteString58434})) > 0)
diff --git a/test/corelogging.jl b/test/corelogging.jl
index 9626f48e4b407..154202da759c2 100644
--- a/test/corelogging.jl
+++ b/test/corelogging.jl
@@ -103,17 +103,30 @@ end
         logmsg = (function() @info msg x=y end,
                   function() @info msg x=y z=1+1 end)[i]
         @test_logs (Error, Test.Ignored(), Test.Ignored(), :logevent_error) catch_exceptions=true logmsg()
-        @test_throws UndefVarError(:msg) collect_test_logs(logmsg)
-        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:msg)
+        @test_throws UndefVarError(:msg, :local) collect_test_logs(logmsg)
+        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:msg, :local)
         msg = "the msg"
         @test_logs (Error, Test.Ignored(), Test.Ignored(), :logevent_error) catch_exceptions=true logmsg()
-        @test_throws UndefVarError(:y) collect_test_logs(logmsg)
-        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:y)
+        @test_throws UndefVarError(:y, :local) collect_test_logs(logmsg)
+        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:y, :local)
         y = "the y"
         @test_logs (Info,"the msg") logmsg()
         @test only(collect_test_logs(logmsg)[1]).kwargs[:x] === "the y"
     end
 end
+@testset "Log message handle_message exception handling" begin
+    # Exceptions in log handling (printing) of msg are caught by default.
+    struct Foo end
+    Base.show(::IO, ::Foo) = 1 ÷ 0
+
+    # We cannot use `@test_logs` here, since test_logs does not actually _print_ the message
+    # (i.e. it does not invoke handle_message). To test exception handling during printing,
+    # we have to use `@test_warn` to see what was printed.
+    @test_warn r"Error: Exception while generating log record in module .*DivideError: integer division error"s @info Foo()
+
+    # Exceptions in log handling (printing) of attributes are caught by default
+    @test_warn r"Error: Exception while generating log record in module .*DivideError: integer division error"s @info "foo" x=Foo()
+end
 
 @testset "Special keywords" begin
     logger = TestLogger()
@@ -140,9 +153,9 @@ end
     end
     @test length(logger.logs) == 1
     record = logger.logs[1]
-    @test record._module == nothing
-    @test record.file == nothing
-    @test record.line == nothing
+    @test record._module === nothing
+    @test record.file === nothing
+    @test record.line === nothing
 end
 
 # PR #28209
diff --git a/test/deprecation_exec.jl b/test/deprecation_exec.jl
index 61ffcc2a59ac6..8209b0e920a18 100644
--- a/test/deprecation_exec.jl
+++ b/test/deprecation_exec.jl
@@ -68,6 +68,7 @@ begin # @deprecate
     ex = :(module M22845; import ..DeprecationTests: bar;
                           bar(x::Number) = x + 3; end)
     @test_warn "importing deprecated binding" eval(ex)
+    @Core.latestworld
     @test @test_nowarn(DeprecationTests.bar(4)) == 7
 
     @test @test_warn "`f1` is deprecated, use `f` instead." f1()
diff --git a/test/dict.jl b/test/dict.jl
index 6a47c3c6eea8b..b2941088772f9 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -8,7 +8,7 @@ using Random
     @test isequal(p,10=>20)
     @test iterate(p)[1] == 10
     @test iterate(p, iterate(p)[2])[1] == 20
-    @test iterate(p, iterate(p, iterate(p)[2])[2]) == nothing
+    @test iterate(p, iterate(p, iterate(p)[2])[2]) === nothing
     @test firstindex(p) == 1
     @test lastindex(p) == length(p) == 2
     @test Base.indexed_iterate(p, 1, nothing) == (10,2)
@@ -162,6 +162,30 @@ end
 
     # issue #39117
     @test Dict(t[1]=>t[2] for t in zip((1,"2"), (2,"2"))) == Dict{Any,Any}(1=>2, "2"=>"2")
+
+    @testset "issue #33147" begin
+        expected = try; Base._throw_dict_kv_error(); catch e; e; end
+        @test_throws expected Dict(i for i in 1:2)
+        @test_throws expected Dict(nothing for i in 1:2)
+        @test_throws expected Dict(() for i in 1:2)
+        @test_throws expected Dict((i, i, i) for i in 1:2)
+        @test_throws expected Dict(nothing)
+        @test_throws expected Dict((1,))
+        @test_throws expected Dict(1:2)
+        @test_throws expected Dict(((),))
+        @test_throws expected IdDict(((),))
+        @test_throws expected WeakKeyDict(((),))
+        @test_throws expected IdDict(nothing)
+        @test_throws expected WeakKeyDict(nothing)
+        @test Dict(1:0) isa Dict
+        @test Dict(()) isa Dict
+        try
+            Dict(i => error("$i") for i in 1:3)
+        catch ex
+            @test ex isa ErrorException
+            @test length(Base.current_exceptions()) == 1
+        end
+    end
 end
 
 @testset "empty tuple ctor" begin
@@ -270,6 +294,14 @@ end
     @test eq(Dict{Int,Int}(), Dict{AbstractString,AbstractString}())
 end
 
+@testset "sizehint!" begin
+    d = Dict()
+    sizehint!(d, UInt(3))
+    @test d == Dict()
+    sizehint!(d, 5)
+    @test isempty(d)
+end
+
 @testset "equality special cases" begin
     @test Dict(1=>0.0) == Dict(1=>-0.0)
     @test !isequal(Dict(1=>0.0), Dict(1=>-0.0))
@@ -639,13 +671,13 @@ end
     @test d == IdDict(1=>1, 2=>2, 3=>3)
     @test eltype(d) == Pair{Int,Int}
     @test_throws KeyError d[:a]
-    @test_throws ArgumentError d[:a] = 1
+    @test_throws TypeError d[:a] = 1
     @test_throws MethodError d[1] = :a
 
     # copy constructor
     d = IdDict(Pair(1,1), Pair(2,2), Pair(3,3))
     @test collect(values(IdDict{Int,Float64}(d))) == collect(values(d))
-    @test_throws ArgumentError IdDict{Float64,Int}(d)
+    @test_throws TypeError IdDict{Float64,Int}(d)
 
     # misc constructors
     @test typeof(IdDict(1=>1, :a=>2)) == IdDict{Any,Int}
@@ -659,9 +691,9 @@ end
     @inferred setindex!(d, -1, 10)
     @test d[10] == -1
     @test 1 == @inferred d[1]
-    @test get(d, -111, nothing) == nothing
+    @test get(d, -111, nothing) === nothing
     @test 1 == @inferred get(d, 1, 1)
-    @test pop!(d, -111, nothing) == nothing
+    @test pop!(d, -111, nothing) === nothing
     @test 1 == @inferred pop!(d, 1)
 
     # get! and delete!
@@ -672,7 +704,7 @@ end
     @test_throws MethodError get!(d, "b", "b")
     @test delete!(d, "a") === d
     @test !haskey(d, "a")
-    @test_throws ArgumentError get!(IdDict{Symbol,Any}(), 2, "b")
+    @test_throws TypeError get!(IdDict{Symbol,Any}(), 2, "b")
     @test get!(IdDict{Int,Int}(), 1, 2.0) === 2
     @test get!(()->2.0, IdDict{Int,Int}(), 1) === 2
 
@@ -763,6 +795,13 @@ end
           [v for (k, v) in d] == [d[x[1]] for (i, x) in enumerate(d)]
 end
 
+@testset "consistency of dict iteration order (issue #56841)" begin
+    dict = Dict(randn() => randn() for _ = 1:100)
+    @test all(zip(dict, keys(dict), values(dict), pairs(dict))) do (d, k, v, p)
+        d == p && first(d) == first(p) == k && last(d) == last(p) == v
+    end
+end
+
 @testset "generators, similar" begin
     d = Dict(:a=>"a")
     # TODO: restore when 0.7 deprecation is removed
@@ -1025,7 +1064,7 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep
 
     # issue #26939
     d26939 = WeakKeyDict()
-    (@noinline d -> d[big"1.0" + 1.1] = 1)(d26939)
+    (@noinline d -> d[big"1" + 1] = 1)(d26939)
     GC.gc() # primarily to make sure this doesn't segfault
     @test count(d26939) == 0
     @test length(d26939.ht) == 1
@@ -1084,6 +1123,119 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep
     GC.@preserve A B C D nothing
 end
 
+import Base.PersistentDict
+@testset "PersistentDict" begin
+    @testset "HAMT HashState" begin
+        key = :key
+        h = Base.HAMT.HashState(key)
+        h1 = Base.HAMT.HashState(key, objectid(key), 0, 0)
+        h2 = Base.HAMT.HashState(h, key) # reconstruct
+        @test h.hash == h1.hash
+        @test h.hash == h2.hash
+
+        hs = Base.HAMT.next(h1)
+        @test hs.depth == 1
+        recompute_depth = (Base.HAMT.MAX_SHIFT ÷ Base.HAMT.BITS_PER_LEVEL) + 1
+        for i in 2:recompute_depth
+            hs = Base.HAMT.next(hs)
+            @test hs.depth == i
+        end
+        @test hs.depth == recompute_depth
+        @test hs.shift == 0
+        hsr = Base.HAMT.HashState(hs, key)
+        @test hs.hash == hsr.hash
+        @test hs.depth == hsr.depth
+        @test hs.shift == hsr.shift
+
+        @test Core.Compiler.is_removable_if_unused(Base.infer_effects(Base.HAMT.init_hamt, (Type{Vector{Any}},Type{Int},Vector{Any},Int)))
+        @test Core.Compiler.is_removable_if_unused(Base.infer_effects(Base.HAMT.HAMT{Vector{Any},Int}, (Pair{Vector{Any},Int},)))
+    end
+    @testset "basics" begin
+        dict = PersistentDict{Int, Int}()
+        @test_throws KeyError dict[1]
+        @test length(dict) == 0
+        @test isempty(dict)
+
+        dict = PersistentDict{Int, Int}(1=>2.0)
+        @test dict[1] == 2
+
+        dict = PersistentDict(1=>2)
+        @test dict[1] == 2
+
+        dict = PersistentDict(dict, 1=>3.0)
+        @test dict[1] == 3
+
+        dict = PersistentDict(dict, 1, 1)
+        @test dict[1] == 1
+        @test get(dict, 2, 1) == 1
+        @test get(()->1, dict, 2) == 1
+
+        @test (1 => 1) ∈ dict
+        @test (1 => 2) ∉ dict
+        @test (2 => 1) ∉ dict
+
+        @test haskey(dict, 1)
+        @test !haskey(dict, 2)
+
+        dict2 = PersistentDict{Int, Int}(dict, 1=>2)
+        @test dict[1] == 1
+        @test dict2[1] == 2
+
+        dict3 = Base.delete(dict2, 1)
+        @test_throws KeyError dict3[1]
+        @test dict3 == Base.delete(dict3, 1)
+        @test dict3.trie != Base.delete(dict3, 1).trie
+
+        dict = PersistentDict(dict, 1, 3)
+        @test dict[1] == 3
+        @test dict2[1] == 2
+
+        @test length(dict) == 1
+        @test length(dict2) == 1
+
+        dict = PersistentDict(1=>2, 2=>3, 4=>1)
+        @test eltype(dict) == Pair{Int, Int}
+        @test dict[1] == 2
+        @test dict[2] == 3
+        @test dict[4] == 1
+    end
+
+    @testset "objectid" begin
+        c = [0]
+        dict = PersistentDict{Any, Int}(c => 1, [1] => 2)
+        @test dict[c] == 1
+        c[1] = 1
+        @test dict[c] == 1
+
+        c[1] = 0
+        dict = PersistentDict{Any, Int}((c,) => 1, ([1],) => 2)
+        @test dict[(c,)] == 1
+
+        c[1] = 1
+        @test dict[(c,)] == 1
+    end
+
+    @testset "stress" begin
+        N = 2^14
+        dict = PersistentDict{Int, Int}()
+        for i in 1:N
+            dict = PersistentDict(dict, i, i)
+        end
+        @test length(dict) == N
+        length(collect(dict)) == N
+        values = sort!(collect(dict))
+        @test values[1] == (1=>1)
+        @test values[end] == (N=>N)
+
+        dict = Base.delete(dict, 16384)
+        @test !haskey(dict, 16384)
+        for i in 1:N
+            dict = Base.delete(dict, i)
+        end
+        @test isempty(dict)
+    end
+end
+
 @testset "issue #19995, hash of dicts" begin
     @test hash(Dict(Dict(1=>2) => 3, Dict(4=>5) => 6)) != hash(Dict(Dict(4=>5) => 3, Dict(1=>2) => 6))
     a = Dict(Dict(3 => 4, 2 => 3) => 2, Dict(1 => 2, 5 => 6) => 1)
@@ -1137,8 +1289,6 @@ struct NonFunctionCallable end
     @test @inferred mergewith(NonFunctionCallable(), d1, d2) == Dict("A" => 1, "B" => 5, "C" => 4)
     @test foldl(mergewith(+), [d1, d2]; init=Dict{Union{},Union{}}()) ==
         Dict("A" => 1, "B" => 5, "C" => 4)
-    # backward compatibility
-    @test @inferred merge(+, d1, d2) == Dict("A" => 1, "B" => 5, "C" => 4)
 end
 
 @testset "Dict merge!" begin
@@ -1362,12 +1512,68 @@ end
     filter!(x -> x.first < 10, d)
     sizehint!(d, 10)
     @test length(d.slots) < 100
+    sizehint!(d, 1000)
+    sizehint!(d, 1; shrink = false)
+    @test length(d.slots) >= 1000
+    sizehint!(d, 1; shrink = true)
+    @test length(d.slots) < 1000
 end
 
 # getindex is :effect_free and :terminates but not :consistent
 for T in (Int, Float64, String, Symbol)
-    @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-    @test Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-    @test !Core.Compiler.is_nothrow(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-    @test Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+    @testset let T=T
+        @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test !Core.Compiler.is_nothrow(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+    end
+end
+
+struct BadHash
+    i::Int
+end
+Base.hash(::BadHash, ::UInt)=UInt(1)
+@testset "maxprobe reset #51595" begin
+    d = Dict(BadHash(i)=>nothing for i in 1:20)
+    empty!(d)
+    sizehint!(d, 0)
+    @test d.maxprobe < length(d.keys)
+    d[BadHash(1)]=nothing
+    @test !(BadHash(2) in keys(d))
+    d = Dict(BadHash(i)=>nothing for i in 1:20)
+    for _ in 1:20
+        pop!(d)
+    end
+    sizehint!(d, 0)
+    @test d.maxprobe < length(d.keys)
+    d[BadHash(1)]=nothing
+    @test !(BadHash(2) in keys(d))
+end
+
+# Issue #52066
+let d = Dict()
+    d[1] = 'a'
+    d[1.0] = 'b'
+    @test only(d) === Pair{Any,Any}(1.0, 'b')
+end
+
+@testset "UnionAll `keytype` and `valtype` (issue #53115)" begin
+    K = Int8
+    V = Int16
+    dicts = (
+        AbstractDict, IdDict, Dict, WeakKeyDict, Base.ImmutableDict,
+        Base.PersistentDict, Iterators.Pairs
+    )
+
+    @testset "D: $D" for D ∈ dicts
+        @test_throws MethodError keytype(D)
+        @test_throws MethodError keytype(D{<:Any,V})
+        @test                    keytype(D{K      }) == K
+        @test                    keytype(D{K,    V}) == K
+
+        @test_throws MethodError valtype(D)
+        @test                    valtype(D{<:Any,V}) == V
+        @test_throws MethodError valtype(D{K      })
+        @test                    valtype(D{K,    V}) == V
+    end
 end
diff --git a/test/docs.jl b/test/docs.jl
index 7f6ece4e76ab4..148c0cf8ca649 100644
--- a/test/docs.jl
+++ b/test/docs.jl
@@ -1,6 +1,14 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-import Base.Docs: meta, @var, DocStr, parsedoc
+import Base.Docs: meta, DocStr, parsedoc, bindingexpr, namify
+
+macro var(x) # just for testing bindingexpr/nameify more conveniently
+    esc(bindingexpr(namify(x)))
+end
+
+# check that @doc can work before REPL is loaded
+@test !startswith(read(`$(Base.julia_cmd()) -E '@doc sin'`, String), "nothing")
+@test !startswith(read(`$(Base.julia_cmd()) -E '@doc @time'`, String), "nothing")
 
 using Markdown
 using REPL
@@ -54,6 +62,8 @@ macro macro_doctest() end
 @test (@eval @doc $(Meta.parse("``"))) == (@doc @cmd)
 @test (@eval @doc $(Meta.parse("123456789012345678901234567890"))) == (@doc @int128_str)
 @test (@eval @doc $(Meta.parse("1234567890123456789012345678901234567890"))) == (@doc @big_str)
+# Test that @doc doesn't crash on empty tuple expression (issue #XXXXX)
+@test (@doc :()) == (@doc Expr)
 
 # test that random stuff interpolated into docstrings doesn't break search or other methods here
 @doc doc"""
@@ -69,6 +79,37 @@ $$latex literal$$
 """
 function break_me_docs end
 
+
+# `hasdoc` returns `true` on a name with a docstring.
+@test Docs.hasdoc(Base, :map)
+# `hasdoc` returns `false` on a name without a docstring.
+@test !isdefined(Base, :_this_name_doesnt_exist_) && !Docs.hasdoc(Base, :_this_name_doesnt_exist_)
+@test isdefined(Base, :_typed_vcat) && !Docs.hasdoc(Base, :_typed_vcat)
+
+"This module has names without documentation."
+module _ModuleWithUndocumentedNames
+export f
+public ⨳, @foo
+f() = 1
+g() = 2
+⨳(a,b) = a * b
+macro foo(); nothing; end
+⊕(a,b) = a + b
+end
+
+"This module has some documentation."
+module _ModuleWithSomeDocumentedNames
+export f
+"f() is 1."
+f() = 1
+g() = 2
+end
+
+@test Docs.undocumented_names(_ModuleWithUndocumentedNames) == [Symbol("@foo"), :f, :⨳]
+@test isempty(Docs.undocumented_names(_ModuleWithSomeDocumentedNames))
+@test Docs.undocumented_names(_ModuleWithSomeDocumentedNames; private=true) == [:g]
+
+
 # issue #11548
 
 module ModuleMacroDoc
@@ -84,12 +125,12 @@ end
 # issue #38819
 
 module NoDocStrings end
-@test meta(NoDocStrings) === getfield(NoDocStrings, Base.Docs.META)
+@test meta(NoDocStrings) === invokelatest(getglobal, NoDocStrings, Base.Docs.META)
 
 # General tests for docstrings.
 
 const LINE_NUMBER = @__LINE__() + 1
-"DocsTest"
+"DocsTest, evaluating $(K)"     # test that module docstring is evaluated within module
 module DocsTest
 
 using Markdown
@@ -234,7 +275,7 @@ fnospecialize(@nospecialize(x::AbstractArray)) = 2
 end
 
 let md = meta(DocsTest)[@var(DocsTest)]
-    @test docstrings_equal(md.docs[Union{}], doc"DocsTest")
+    @test docstrings_equal(md.docs[Union{}], doc"DocsTest, evaluating K")
     # Check that plain docstrings store a module reference.
     # https://github.com/JuliaLang/julia/pull/13017#issuecomment-138618663
     @test md.docs[Union{}].data[:module] == DocsTest
@@ -540,8 +581,8 @@ end
 
 let T = meta(DocVars)[@var(DocVars.T)],
     S = meta(DocVars)[@var(DocVars.S)],
-    Tname = Markdown.parse("```\n$(curmod_prefix)DocVars.T\n```"),
-    Sname = Markdown.parse("```\n$(curmod_prefix)DocVars.S\n```")
+    Tname = Markdown.parse("```julia\n$(curmod_str).DocVars.T\n```"),
+    Sname = Markdown.parse("```julia\n$(curmod_str).DocVars.S\n```")
     # Splicing the expression directly doesn't work
     @test docstrings_equal(T.docs[Union{}],
         doc"""
@@ -623,6 +664,7 @@ end
 
 let d = @doc(I15424.LazyHelp)
     @test repr("text/plain", d) == "LazyHelp\nLazyHelp(text)\n"
+    # (no internal warning is inserted for non-markdown content)
 end
 
 # Issue #13385.
@@ -648,9 +690,11 @@ end
 @doc "This should document @m1... since its the result of expansion" @m2_11993
 @test (@doc @m1_11993) !== nothing
 let d = (@doc :@m2_11993),
-    macro_doc = Markdown.parse("`$(curmod_prefix == "Main." ? "" : curmod_prefix)@m2_11993` is a macro.")
+    varstr = "$(curmod_prefix)@m2_11993"
+    docstr = Markdown.Code("", "$(curmod_str).@m2_11993")
+    macro_doc = Markdown.parse("`$varstr` is a macro.")
     @test docstring_startswith(d, doc"""
-    No documentation found.
+    No documentation found for private binding $docstr.
 
     $macro_doc""")
 end
@@ -849,9 +893,9 @@ undocumented(x,y) = 3
 end # module
 
 doc_str = Markdown.parse("""
-No docstring or readme file found for module `$(curmod_prefix)Undocumented`.
+No docstring or readme file found for internal module `$(curmod_str).Undocumented`.
 
-# Exported names
+# Public names
 
 `A`, `B`, `C`, `at0`, `pt2`
 """)
@@ -860,67 +904,67 @@ No docstring or readme file found for module `$(curmod_prefix)Undocumented`.
 doc_str = Markdown.parse("""
 No documentation found.
 
-Binding `$(curmod_prefix)Undocumented.bindingdoesnotexist` does not exist.
+Binding `$(curmod_str).Undocumented.bindingdoesnotexist` does not exist.
 """)
 @test docstrings_equal(@doc(Undocumented.bindingdoesnotexist), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public binding `$(curmod_str).Undocumented.A`.
 
 # Summary
 ```
-abstract type $(curmod_prefix)Undocumented.A
+abstract type $(curmod_str).Undocumented.A
 ```
 
 # Subtypes
 ```
-$(curmod_prefix)Undocumented.B
-$(curmod_prefix)Undocumented.C
+$(curmod_str).Undocumented.B
+$(curmod_str).Undocumented.C
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.A), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public binding `$(curmod_str).Undocumented.B`.
 
 # Summary
 ```
-abstract type $(curmod_prefix)Undocumented.B
+abstract type $(curmod_str).Undocumented.B
 ```
 
 # Subtypes
 ```
-$(curmod_prefix)Undocumented.D
+$(curmod_str).Undocumented.D
 ```
 
 # Supertype Hierarchy
 ```
-$(curmod_prefix)Undocumented.B <: $(curmod_prefix)Undocumented.A <: Any
+$(curmod_str).Undocumented.B <: $(curmod_str).Undocumented.A <: Any
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.B), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public binding `$(curmod_str).Undocumented.C`.
 
 # Summary
 ```
-mutable struct $(curmod_prefix)Undocumented.C
+mutable struct $(curmod_str).Undocumented.C
 ```
 
 # Supertype Hierarchy
 ```
-$(curmod_prefix)Undocumented.C <: $(curmod_prefix)Undocumented.A <: Any
+$(curmod_str).Undocumented.C <: $(curmod_str).Undocumented.A <: Any
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.C), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.D`.
 
 # Summary
 ```
-struct $(curmod_prefix)Undocumented.D
+struct $(curmod_str).Undocumented.D
 ```
 
 # Fields
@@ -932,220 +976,220 @@ three :: Float64
 
 # Supertype Hierarchy
 ```
-$(curmod_prefix)Undocumented.D <: $(curmod_prefix)Undocumented.B <: $(curmod_prefix)Undocumented.A <: Any
+$(curmod_str).Undocumented.D <: $(curmod_str).Undocumented.B <: $(curmod_str).Undocumented.A <: Any
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.D), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public binding `$(curmod_str).Undocumented.at0`.
 
 # Summary
 
 ```
-abstract type $(curmod_prefix)Undocumented.at0{T<:Number, N}
+abstract type $(curmod_str).Undocumented.at0{T<:Number, N}
 ```
 
 # Subtypes
 
 ```
-$(curmod_prefix)Undocumented.at1{Integer<:T<:Number, N}
-$(curmod_prefix)Undocumented.pt2{T<:Number, N, A>:Integer}
-$(curmod_prefix)Undocumented.st3{T<:Integer, N}
-$(curmod_prefix)Undocumented.st4{T<:Number, N}
+$(curmod_str).Undocumented.at1{Integer<:T<:Number, N}
+$(curmod_str).Undocumented.pt2{T<:Number, N, A>:Integer}
+$(curmod_str).Undocumented.st3{T<:Integer, N}
+$(curmod_str).Undocumented.st4{T<:Number, N}
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.at0), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.at1`.
 
 # Summary
 
 ```
-abstract type $(curmod_prefix)Undocumented.at1{T>:Integer, N}
+abstract type $(curmod_str).Undocumented.at1{T>:Integer, N}
 ```
 
 # Subtypes
 
 ```
-$(curmod_prefix)Undocumented.mt6{Integer, N}
-$(curmod_prefix)Undocumented.st5{T>:Integer, N}
+$(curmod_str).Undocumented.mt6{Integer, N}
+$(curmod_str).Undocumented.st5{T>:Integer, N}
 ```
 
 # Supertype Hierarchy
 ```
-$(curmod_prefix)Undocumented.at1{T>:Integer, N} <: $(curmod_prefix)Undocumented.at0{T>:Integer, N} <: Any
+$(curmod_str).Undocumented.at1{T>:Integer, N} <: $(curmod_str).Undocumented.at0{T>:Integer, N} <: Any
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.at1), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.at_`.
 
 # Summary
 
 ```
-abstract type $(curmod_prefix)Undocumented.at0{Int64, N}
+abstract type $(curmod_str).Undocumented.at0{Int64, N}
 ```
 
 # Subtypes
 
 ```
-$(curmod_prefix)Undocumented.pt2{Int64, N, A>:Integer}
-$(curmod_prefix)Undocumented.st3{Int64, N}
-$(curmod_prefix)Undocumented.st4{Int64, N}
+$(curmod_str).Undocumented.pt2{Int64, N, A>:Integer}
+$(curmod_str).Undocumented.st3{Int64, N}
+$(curmod_str).Undocumented.st4{Int64, N}
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.at_), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public binding `$(curmod_str).Undocumented.pt2`.
 
 # Summary
 
 ```
-primitive type $(curmod_prefix)Undocumented.pt2{T<:Number, N, A>:Integer}
+primitive type $(curmod_str).Undocumented.pt2{T<:Number, N, A>:Integer}
 ```
 
 # Supertype Hierarchy
 
 ```
-$(curmod_prefix)Undocumented.pt2{T<:Number, N, A>:Integer} <: $(curmod_prefix)Undocumented.at0{T<:Number, N} <: Any
+$(curmod_str).Undocumented.pt2{T<:Number, N, A>:Integer} <: $(curmod_str).Undocumented.at0{T<:Number, N} <: Any
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.pt2), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.st3`.
 
 # Summary
 
 ```
-struct $(curmod_prefix)Undocumented.st3{T<:Integer, N}
+struct $(curmod_str).Undocumented.st3{T<:Integer, N}
 ```
 
 # Fields
 ```
-a :: Tuple{Vararg{T<:Integer, N}}
+a :: NTuple{N, T<:Integer}
 b :: Array{Int64, N}
 c :: Int64
 ```
 
 # Supertype Hierarchy
 ```
-$(curmod_prefix)Undocumented.st3{T<:Integer, N} <: $(curmod_prefix)Undocumented.at0{T<:Integer, N} <: Any
+$(curmod_str).Undocumented.st3{T<:Integer, N} <: $(curmod_str).Undocumented.at0{T<:Integer, N} <: Any
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.st3), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.st4`.
 
 # Summary
 
 ```
-struct $(curmod_prefix)Undocumented.st4{T, N}
+struct $(curmod_str).Undocumented.st4{T, N}
 ```
 
 # Fields
 ```
 a :: T
-b :: Tuple{Vararg{T, N}}
+b :: NTuple{N, T}
 ```
 
 # Supertype Hierarchy
 ```
-$(curmod_prefix)Undocumented.st4{T, N} <: $(curmod_prefix)Undocumented.at0{T, N} <: Any
+$(curmod_str).Undocumented.st4{T, N} <: $(curmod_str).Undocumented.at0{T, N} <: Any
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.st4), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.st5`.
 
 # Summary
 
 ```
-struct $(curmod_prefix)Undocumented.st5{T>:Int64, N}
+struct $(curmod_str).Undocumented.st5{T>:Int64, N}
 ```
 
 # Fields
 ```
-c :: $(curmod_prefix)Undocumented.st3{T>:Int64, N}
+c :: $(curmod_str).Undocumented.st3{T>:Int64, N}
 ```
 
 # Supertype Hierarchy
 ```
-$(curmod_prefix)Undocumented.st5{T>:Int64, N} <: $(curmod_prefix)Undocumented.at1{T>:Int64, N} <: $(curmod_prefix)Undocumented.at0{T>:Int64, N} <: Any
+$(curmod_str).Undocumented.st5{T>:Int64, N} <: $(curmod_str).Undocumented.at1{T>:Int64, N} <: $(curmod_str).Undocumented.at0{T>:Int64, N} <: Any
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.st5), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.mt6`.
 
 # Summary
 
 ```
-mutable struct $(curmod_prefix)Undocumented.mt6{T<:Integer, N}
+mutable struct $(curmod_str).Undocumented.mt6{T<:Integer, N}
 ```
 
 # Fields
 ```
-d :: $(curmod_prefix)Undocumented.st5{T<:Integer, N}
+d :: $(curmod_str).Undocumented.st5{T<:Integer, N}
 ```
 
 # Supertype Hierarchy
 ```
-$(curmod_prefix)Undocumented.mt6{T<:Integer, N} <: $(curmod_prefix)Undocumented.at1{T<:Integer, N} <: $(curmod_prefix)Undocumented.at0{T<:Integer, N} <: Any
+$(curmod_str).Undocumented.mt6{T<:Integer, N} <: $(curmod_str).Undocumented.at1{T<:Integer, N} <: $(curmod_str).Undocumented.at0{T<:Integer, N} <: Any
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.mt6), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.ut7`.
 
 # Summary
 
-`$(curmod_prefix)Undocumented.ut7` is of type `Union`.
+`$(curmod_str).Undocumented.ut7` is of type `Union`.
 
 # Union Composed of Types
 
- - `$(curmod_prefix)Undocumented.mt6`
- - `$(curmod_prefix)Undocumented.st5`
+ - `$(curmod_str).Undocumented.mt6`
+ - `$(curmod_str).Undocumented.st5`
 """)
 @test docstrings_equal(@doc(Undocumented.ut7), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.ut8`.
 
 # Summary
 
-`$(curmod_prefix)Undocumented.ut8` is of type `Union`.
+`$(curmod_str).Undocumented.ut8` is of type `Union`.
 
 # Union Composed of Types
 
- - `$(curmod_prefix)Undocumented.at1`
- - `$(curmod_prefix)Undocumented.pt2`
- - `$(curmod_prefix)Undocumented.st3`
- - `$(curmod_prefix)Undocumented.st4`
+ - `$(curmod_str).Undocumented.at1`
+ - `$(curmod_str).Undocumented.pt2`
+ - `$(curmod_str).Undocumented.st3`
+ - `$(curmod_str).Undocumented.st4`
 """)
 @test docstrings_equal(@doc(Undocumented.ut8), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private binding `$(curmod_str).Undocumented.ut9`.
 
 # Summary
 
-`$(curmod_prefix)Undocumented.ut9` is of type `UnionAll`.
+`$(curmod_str).Undocumented.ut9` is of type `UnionAll`.
 
 # Union Composed of Types
 
- - `$(curmod_prefix)Undocumented.at1{T} where T`
- - `$(curmod_prefix)Undocumented.pt2{T} where T`
- - `$(curmod_prefix)Undocumented.st3{T} where T`
- - `$(curmod_prefix)Undocumented.st4`
+ - `$(curmod_str).Undocumented.at1{T} where T`
+ - `$(curmod_str).Undocumented.pt2{T} where T`
+ - `$(curmod_str).Undocumented.st3{T} where T`
+ - `$(curmod_str).Undocumented.st4`
 """)
 @test docstrings_equal(@doc(Undocumented.ut9), doc"$doc_str")
 
@@ -1153,9 +1197,9 @@ let d = @doc(Undocumented.f)
     io = IOBuffer()
     show(io, MIME"text/markdown"(), d)
     @test startswith(String(take!(io)),"""
-    No documentation found.
+    No documentation found for private binding `$(curmod_str).Undocumented.f`.
 
-    `$(curmod_prefix)Undocumented.f` is a `Function`.
+    `$(curmod_str).Undocumented.f` is a `Function`.
     """)
 end
 
@@ -1163,9 +1207,9 @@ let d = @doc(Undocumented.undocumented)
     io = IOBuffer()
     show(io, MIME"text/markdown"(), d)
     @test startswith(String(take!(io)), """
-    No documentation found.
+    No documentation found for private binding `$(curmod_str).Undocumented.undocumented`.
 
-    `$(curmod_prefix)Undocumented.undocumented` is a `Function`.
+    `$(curmod_str).Undocumented.undocumented` is a `Function`.
     """)
 end
 
@@ -1201,7 +1245,7 @@ end
 
 # Bindings.
 
-import Base.Docs: @var, Binding, defined
+import Base.Docs: Binding, defined
 
 let x = Binding(Base, Symbol("@inline"))
     @test defined(x) == true
@@ -1309,30 +1353,30 @@ end
 let dt1 = striptrimdocs(_repl(:(dynamic_test(1.0))))
     @test dt1 isa Expr
     @test dt1.args[1] isa Expr
-    @test dt1.args[1].head === :macrocall
-    @test dt1.args[1].args[1] === Symbol("@doc")
-    @test dt1.args[1].args[3] == :(dynamic_test(::typeof(1.0)))
+    @test dt1.args[1].head === :call
+    @test dt1.args[1].args[1] === Base.Docs.doc
+    @test dt1.args[1].args[3] == :(Union{Tuple{typeof(1.0)}})
 end
 let dt2 = striptrimdocs(_repl(:(dynamic_test(::String))))
     @test dt2 isa Expr
     @test dt2.args[1] isa Expr
-    @test dt2.args[1].head === :macrocall
-    @test dt2.args[1].args[1] === Symbol("@doc")
-    @test dt2.args[1].args[3] == :(dynamic_test(::String))
+    @test dt2.args[1].head === :call
+    @test dt2.args[1].args[1] === Base.Docs.doc
+    @test dt2.args[1].args[3] == :(Union{Tuple{String}})
 end
 let dt3 = striptrimdocs(_repl(:(dynamic_test(a))))
     @test dt3 isa Expr
     @test dt3.args[1] isa Expr
-    @test dt3.args[1].head === :macrocall
-    @test dt3.args[1].args[1] === Symbol("@doc")
-    @test dt3.args[1].args[3].args[2].head === :(::) # can't test equality due to line numbers
+    @test dt3.args[1].head === :call
+    @test dt3.args[1].args[1] === Base.Docs.doc
+    @test dt3.args[1].args[3].args[2].head === :curly # can't test equality due to line numbers
 end
 let dt4 = striptrimdocs(_repl(:(dynamic_test(1.0,u=2.0))))
     @test dt4 isa Expr
     @test dt4.args[1] isa Expr
-    @test dt4.args[1].head === :macrocall
-    @test dt4.args[1].args[1] === Symbol("@doc")
-    @test dt4.args[1].args[3] == :(dynamic_test(::typeof(1.0); u::typeof(2.0)=2.0))
+    @test dt4.args[1].head === :call
+    @test dt4.args[1].args[1] === Base.Docs.doc
+    @test dt4.args[1].args[3] == :(Union{Tuple{typeof(1.0)}})
 end
 
 # Equality testing
@@ -1433,7 +1477,7 @@ end
 end
 
 struct t_docs_abc end
-@test "t_docs_abc" in accessible(@__MODULE__)
+@test "t_docs_abc" in string.(accessible(@__MODULE__))
 
 # Call overloading issues #20087 and #44889
 """
@@ -1482,7 +1526,7 @@ struct B_20087 end
 # issue #27832
 
 _last_atdoc = Core.atdoc
-Core.atdoc!(Core.Compiler.CoreDocs.docm)  # test bootstrap doc system
+Core.atdoc!(Base.CoreDocs.docm)  # test bootstrap doc system
 
 """
 """
@@ -1516,3 +1560,111 @@ struct S41727
 end
 @test S41727(1) isa S41727
 @test string(@repl S41727.x) == "x is 4\n"
+
+"ensure we can document ccallable functions"
+Base.@ccallable c51586_short()::Int = 2
+"ensure we can document ccallable functions"
+Base.@ccallable c51586_long()::Int = 3
+
+@test docstrings_equal(@doc(c51586_short()), doc"ensure we can document ccallable functions")
+@test docstrings_equal(@doc(c51586_long()), doc"ensure we can document ccallable functions")
+
+@testset "Docs docstrings" begin
+    undoc = Docs.undocumented_names(Docs)
+    @test isempty(undoc)
+end
+
+# Docing the macroception macro
+macro docmacroception()
+    Expr(:toplevel, macroexpand(__module__, :(@Base.__doc__ macro docmacrofoo() 1 end); recursive=false), :(@docmacrofoo))
+end
+
+"""
+This docmacroception has a docstring
+"""
+@docmacroception()
+
+@test Docs.hasdoc(@__MODULE__, :var"@docmacrofoo")
+
+# Test that @doc returns the value of the documented expression
+module DocReturnValue
+    using Test
+    # Test function definition returns the function
+    result = begin
+        "docstring for f"
+        function f end
+    end
+    @test result === f
+    # Test with regular function syntax
+    result2 = begin
+        "docstring for g"
+        g(x) = x + 1
+    end
+    @test result2 === g
+    # Test with struct definition
+    result3 = begin
+        "docstring for S"
+        struct S; x; end
+    end
+    @test result3 === nothing
+    # Test with const binding
+    result4 = begin
+        "docstring for K"
+        const K = 42
+    end
+    @test result4 === 42
+    # Test that documenting a global declaration returns nothing to avoid syntax errors
+    result5 = begin
+        "docstring for global x"
+        global x
+    end
+    @test result5 === nothing
+    @test Base.binding_module(DocReturnValue, :x) === DocReturnValue
+    # Test that assignment returns the RHS
+    result6 = begin
+        "docstring for global y"
+        global y = 4
+    end
+    @test result6 === 4
+    @test y === 4
+    # Test that assignment returns the RHS
+    result7 = begin
+        "docstring for const z"
+        const z = 5
+    end
+    @test result7 === z === 5
+    # Test module returns module
+    result8 = begin
+        "docstring for module A"
+        module A end
+    end
+    @test result8 === A
+    # Tests without definition effect
+    function t end
+    result9 = begin
+        "docstring for existing value t"
+        :t
+    end
+    @test result9 isa Base.Docs.Binding
+    macro s end
+    result10 = begin
+        "docstring for existing macro s"
+        :@s
+    end
+    @test result10 isa Base.Docs.Binding
+    function h end
+    result11 = begin
+        "docstring for existing function"
+        h()
+    end
+    @test result11 isa Base.Docs.Binding
+end
+
+# https://github.com/JuliaLang/julia/issues/59949
+struct Foo59949{T} end
+
+"""
+Bar59949{T}
+"""
+Bar59949{T} = Foo59949{T}
+@test docstrings_equal(@doc(Bar59949), doc"Bar59949{T}")
diff --git a/test/embedding/Makefile b/test/embedding/Makefile
index df31c3735c9de..4be4974e864cd 100644
--- a/test/embedding/Makefile
+++ b/test/embedding/Makefile
@@ -21,6 +21,7 @@ EXE := $(suffix $(abspath $(JULIA)))
 
 # get compiler and linker flags. (see: `contrib/julia-config.jl`)
 JULIA_CONFIG := $(JULIA) -e 'include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "julia-config.jl"))' --
+JULIA_LIBDIR := $(shell $(JULIA) -e 'println(joinpath(Sys.BINDIR, "..", "lib"))' --)
 CPPFLAGS_ADD :=
 CFLAGS_ADD = $(shell $(JULIA_CONFIG) --cflags)
 LDFLAGS_ADD = -lm $(shell $(JULIA_CONFIG) --ldflags --ldlibs)
@@ -29,8 +30,8 @@ DEBUGFLAGS += -g
 
 #=============================================================================
 
-release: $(BIN)/embedding$(EXE)
-debug:   $(BIN)/embedding-debug$(EXE)
+release: $(BIN)/embedding$(EXE) $(BIN)/libdl-embedding$(EXE)
+debug:   $(BIN)/embedding-debug$(EXE) $(BIN)/libdl-embedding$(EXE)
 
 $(BIN)/embedding$(EXE): $(SRCDIR)/embedding.c
 	$(CC) $^ -o $@ $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS)
@@ -38,6 +39,12 @@ $(BIN)/embedding$(EXE): $(SRCDIR)/embedding.c
 $(BIN)/embedding-debug$(EXE): $(SRCDIR)/embedding.c
 	$(CC) $^ -o $@ $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) $(DEBUGFLAGS)
 
+$(BIN)/libdl-embedding$(EXE): $(SRCDIR)/libdl_embedding.c
+	$(CC) $^ -o $@ $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -ldl -DLIBJULIA_PATH=\"$(JULIA_LIBDIR)/libjulia.so\"
+
+$(BIN)/libdl-embedding-debug$(EXE): $(SRCDIR)/libdl_embedding.c
+	$(CC) $^ -o $@ $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(DEBUGFLAGS) -ldl -DLIBJULIA_PATH=\"$(JULIA_LIBDIR)/libjulia.so\"
+
 ifneq ($(abspath $(BIN)),$(abspath $(SRCDIR)))
 # for demonstration purposes, our demo code is also installed
 # in $BIN, although this would likely not be typical
@@ -45,7 +52,8 @@ $(BIN)/LocalModule.jl: $(SRCDIR)/LocalModule.jl
 	cp $< $@
 endif
 
-check: $(BIN)/embedding$(EXE) $(BIN)/LocalModule.jl
+check: $(BIN)/embedding$(EXE) $(BIN)/libdl-embedding$(EXE) $(BIN)/LocalModule.jl
+	$(BIN)/libdl-embedding$(EXE) # run w/o error
 	$(JULIA) --depwarn=error $(SRCDIR)/embedding-test.jl $<
 	@echo SUCCESS
 
diff --git a/test/embedding/embedding-test.jl b/test/embedding/embedding-test.jl
index c10cc6a16fee8..0744cac679698 100644
--- a/test/embedding/embedding-test.jl
+++ b/test/embedding/embedding-test.jl
@@ -21,6 +21,7 @@ end
     close(err.in)
     out_task = @async readlines(out)
     @test readline(err) == "MethodError: no method matching this_function_has_no_methods()"
+    @test readline(err) == "The function `this_function_has_no_methods` exists, but no method is defined for this combination of argument types."
     @test success(p)
     lines = fetch(out_task)
     @test length(lines) == 11
@@ -31,5 +32,5 @@ end
     @test lines[9] == "called bar"
     @test lines[10] == "calling new bar"
     @test lines[11] == "      From worker 2:\tTaking over the world..."
-    @test readline(err) == "exception caught from C"
+    @test "exception caught from C" in readlines(err)
 end
diff --git a/test/embedding/embedding.c b/test/embedding/embedding.c
index 1294d4cdafb45..0e1b991205094 100644
--- a/test/embedding/embedding.c
+++ b/test/embedding/embedding.c
@@ -61,7 +61,7 @@ int main()
     {
         // Same as above but with function handle (more flexible)
 
-        jl_function_t *func = jl_get_function(jl_base_module, "sqrt");
+        jl_value_t *func = jl_get_function(jl_base_module, "sqrt");
         jl_value_t* argument = jl_box_float64(2.0);
         jl_value_t* ret = jl_call1(func, argument);
         double retDouble = jl_unbox_float64(ret);
@@ -86,17 +86,17 @@ int main()
         // (aka, is gc-rooted until) the program reaches the corresponding JL_GC_POP()
         JL_GC_PUSH1(&x);
 
-        double* xData = jl_array_data(x);
+        double* xData = jl_array_data(x, double);
 
         size_t i;
-        for (i = 0; i < jl_array_len(x); i++)
+        for (i = 0; i < jl_array_nrows(x); i++)
             xData[i] = i;
 
-        jl_function_t *func  = jl_get_function(jl_base_module, "reverse!");
+        jl_value_t *func  = jl_get_function(jl_base_module, "reverse!");
         jl_call1(func, (jl_value_t*) x);
 
         printf("x = [");
-        for (i = 0; i < jl_array_len(x); i++)
+        for (i = 0; i < jl_array_nrows(x); i++)
             printf("%e ", xData[i]);
         printf("]\n");
         fflush(stdout);
@@ -109,7 +109,7 @@ int main()
 
         checked_eval_string("my_func(x) = 2 * x");
 
-        jl_function_t *func = jl_get_function(jl_main_module, "my_func");
+        jl_value_t *func = jl_get_function(jl_main_module, "my_func");
         jl_value_t* arg = jl_box_float64(5.0);
         double ret = jl_unbox_float64(jl_call1(func, arg));
 
@@ -192,6 +192,12 @@ int main()
         checked_eval_string("f28825()");
     }
 
+    {
+        // jl_typeof works (#50714)
+        jl_value_t *v = checked_eval_string("sqrt(2.0)");
+        jl_value_t *t = jl_typeof(v);
+    }
+
     JL_TRY {
         jl_error("exception thrown");
     }
diff --git a/test/embedding/libdl_embedding.c b/test/embedding/libdl_embedding.c
new file mode 100644
index 0000000000000..6cd040d5f9abf
--- /dev/null
+++ b/test/embedding/libdl_embedding.c
@@ -0,0 +1,12 @@
+#include <dlfcn.h>
+#include <stdio.h>
+#include <signal.h>
+
+int main(int argc, char *argv[])
+{
+    // This test doesn't do much yet, except check
+    // https://github.com/JuliaLang/julia/issues/57240
+    signal(SIGCHLD, SIG_IGN);
+    void *handle = dlopen(LIBJULIA_PATH, RTLD_LAZY);
+    return 0;
+}
diff --git a/test/env.jl b/test/env.jl
index de5cf92d9edb9..7f6962cf675aa 100644
--- a/test/env.jl
+++ b/test/env.jl
@@ -52,6 +52,11 @@ end
     @test get!(ENV, key, "default") == "default"
     @test haskey(ENV, key)
     @test ENV[key] == "default"
+
+    key = randstring(25)
+    @test !haskey(ENV, key)
+    @test get!(ENV, key, 0) == 0
+    @test ENV[key] == "0"
 end
 @testset "#17956" begin
     @test length(ENV) > 1
@@ -128,7 +133,9 @@ end
             for _v in (v, uppercasefirst(v), uppercase(v))
                 ENV["testing_gbe"] = _v
                 @test Base.get_bool_env("testing_gbe", false) == true
+                @test Base.get_bool_env(() -> false, "testing_gbe") == true
                 @test Base.get_bool_env("testing_gbe", true) == true
+                @test Base.get_bool_env(() -> true, "testing_gbe") == true
             end
         end
     end
@@ -137,26 +144,34 @@ end
             for _v in (v, uppercasefirst(v), uppercase(v))
                 ENV["testing_gbe"] = _v
                 @test Base.get_bool_env("testing_gbe", true) == false
+                @test Base.get_bool_env(() -> true, "testing_gbe") == false
                 @test Base.get_bool_env("testing_gbe", false) == false
+                @test Base.get_bool_env(() -> false, "testing_gbe") == false
             end
         end
     end
     @testset "empty" begin
         ENV["testing_gbe"] = ""
         @test Base.get_bool_env("testing_gbe", true) == true
+        @test Base.get_bool_env(() -> true, "testing_gbe") == true
         @test Base.get_bool_env("testing_gbe", false) == false
+        @test Base.get_bool_env(() -> false, "testing_gbe") == false
     end
     @testset "undefined" begin
         delete!(ENV, "testing_gbe")
         @test !haskey(ENV, "testing_gbe")
         @test Base.get_bool_env("testing_gbe", true) == true
+        @test Base.get_bool_env(() -> true, "testing_gbe") == true
         @test Base.get_bool_env("testing_gbe", false) == false
+        @test Base.get_bool_env(() -> false, "testing_gbe") == false
     end
     @testset "unrecognized" begin
         for v in ("truw", "falls")
             ENV["testing_gbe"] = v
             @test Base.get_bool_env("testing_gbe", true) === nothing
+            @test_throws ArgumentError Base.get_bool_env("testing_gbe", true, throw=true)
             @test Base.get_bool_env("testing_gbe", false) === nothing
+            @test_throws ArgumentError Base.get_bool_env("testing_gbe", false, throw=true)
         end
     end
 
@@ -168,7 +183,7 @@ end
 end
 
 # Restore the original environment
-for k in keys(ENV)
+for k in collect(keys(ENV))
     if !haskey(original_env, k)
         delete!(ENV, k)
     end
diff --git a/test/error.jl b/test/error.jl
index e9cdfa100bc81..0d8047aa92a44 100644
--- a/test/error.jl
+++ b/test/error.jl
@@ -93,10 +93,14 @@ end
 @testset "MethodError for methods without line numbers" begin
     try
         eval(Expr(:function, :(f44319()), 0))
-        f44319(1)
+        @invokelatest f44319()
     catch e
         s = sprint(showerror, e)
-        @test s == "MethodError: no method matching f44319(::Int$(Sys.WORD_SIZE))\n\nClosest candidates are:\n  f44319()\n   @ $curmod_str none:0\n"
+        @test s == """MethodError: no method matching f44319(::Int$(Sys.WORD_SIZE))
+                      The function `f44319` exists, but no method is defined for this combination of argument types.
+
+                      Closest candidates are:\n  f44319()\n   @ $curmod_str none:0
+                      """
     end
 end
 
@@ -105,10 +109,10 @@ end
         if mod ∉ visited
             push!(visited, mod)
             for name in names(mod, all=true)
-                isdefined(mod, name) || continue
-                value = getfield(mod, name)
-
+                isdefinedglobal(mod, name) || continue
+                value = getglobal(mod, name)
                 if value isa Module
+                    value === Main && continue
                     test_exceptions(value, visited)
                 elseif value isa Type
                     str = string(value)
@@ -123,3 +127,36 @@ end
     visited = test_exceptions(Base)
     test_exceptions(Core, visited)
 end
+
+# inference quality test for `error`
+@test Base.infer_return_type(error, (Any,)) === Union{}
+@test Base.infer_return_type(xs->error(xs...), (Vector{Any},)) === Union{}
+module Issue54029
+export raise54029
+Base.Experimental.@max_methods 1
+raise54029(x) = error(x)
+end
+using .Issue54029
+@test Base.infer_return_type(raise54029, (Any,)) === Union{}
+@test Base.infer_return_type(xs->raise54029(xs...), (Vector{Any},)) === Union{}
+
+@testset "CompositeException" begin
+    ce = CompositeException()
+    @test isempty(ce)
+    @test length(ce) == 0
+    @test eltype(ce) == Any
+    str = sprint(showerror, ce)
+    @test str == "CompositeException()\n"
+    push!(ce, ErrorException("something sad has happened"))
+    @test !isempty(ce)
+    @test length(ce) == 1
+    pushfirst!(ce, ErrorException("something sad has happened even earlier"))
+    @test length(ce) == 2
+    # test iterate
+    for ex in ce
+        @test ex isa ErrorException
+    end
+    push!(ce, ErrorException("something sad has happened yet again"))
+    str = sprint(showerror, ce)
+    @test str == "something sad has happened even earlier\n\n...and 2 more exceptions.\n"
+end
diff --git a/test/errorshow.jl b/test/errorshow.jl
index 28ae3fd32365a..941c5e2074f51 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -5,7 +5,13 @@ using Random, LinearAlgebra
 # For curmod_*
 include("testenv.jl")
 
-
+# re-register only the error hints that are being tested here (
+Base.Experimental.register_error_hint(Base.noncallable_number_hint_handler, MethodError)
+Base.Experimental.register_error_hint(Base.string_concatenation_hint_handler, MethodError)
+Base.Experimental.register_error_hint(Base.methods_on_iterable, MethodError)
+Base.Experimental.register_error_hint(Base.nonsetable_type_hint_handler, MethodError)
+Base.Experimental.register_error_hint(Base.fielderror_listfields_hint_handler, FieldError)
+Base.Experimental.register_error_hint(Base.fielderror_dict_hint_handler, FieldError)
 @testset "SystemError" begin
     err = try; systemerror("reason", Cint(0)); false; catch ex; ex; end::SystemError
     errs = sprint(Base.showerror, err)
@@ -74,8 +80,12 @@ Base.show_method_candidates(buf, Base.MethodError(method_c1,(1, "", "")))
 Base.show_method_candidates(buf, Base.MethodError(method_c1,(1., "", "")))
 @test occursin("\n\nClosest candidates are:\n  method_c1(::Float64, ::AbstractString...)$cmod$cfile$c1line\n", String(take!(buf)))
 
-# Have no matches so should return empty
+# Have no matches, but still print up to 3
 Base.show_method_candidates(buf, Base.MethodError(method_c1,(1, 1, 1)))
+@test occursin("\n\nClosest candidates are:\n  method_c1(!Matched::Float64, !Matched::AbstractString...)$cmod$cfile$c1line\n", String(take!(buf)))
+
+function nomethodsfunc end
+Base.show_method_candidates(buf, Base.MethodError(nomethodsfunc,(1, 1, 1)))
 @test isempty(String(take!(buf)))
 
 # matches the implicit constructor -> convert method
@@ -204,14 +214,18 @@ Base.show_method_candidates(buf, try bad_vararg_decl("hello", 3) catch e e end)
 @test occursin("bad_vararg_decl(!Matched::$Int, ::Any...)", String(take!(buf)))
 
 macro except_str(expr, err_type)
+    source_info = __source__
+    errmsg = "expected failure, but no exception thrown for $expr"
     return quote
         let err = nothing
             try
                 $(esc(expr))
             catch err
             end
-            err === nothing && error("expected failure, but no exception thrown")
-            @test typeof(err) === $(esc(err_type))
+            err === nothing && error($errmsg)
+            @testset let expr=$(repr(expr))
+                $(Expr(:macrocall, Symbol("@test"), source_info, :(typeof(err) === $(esc(err_type)))))
+            end
             buf = IOBuffer()
             showerror(buf, err)
             String(take!(buf))
@@ -220,6 +234,7 @@ macro except_str(expr, err_type)
 end
 
 macro except_strbt(expr, err_type)
+    source_info = __source__
     errmsg = "expected failure, but no exception thrown for $expr"
     return quote
         let err = nothing
@@ -228,7 +243,9 @@ macro except_strbt(expr, err_type)
             catch err
             end
             err === nothing && error($errmsg)
-            @test typeof(err) === $(esc(err_type))
+            @testset let expr=$(repr(expr))
+                $(Expr(:macrocall, Symbol("@test"), source_info, :(typeof(err) === $(esc(err_type)))))
+            end
             buf = IOBuffer()
             showerror(buf, err, catch_backtrace())
             String(take!(buf))
@@ -237,6 +254,8 @@ macro except_strbt(expr, err_type)
 end
 
 macro except_stackframe(expr, err_type)
+    source_info = __source__
+    errmsg = "expected failure, but no exception thrown for $expr"
     return quote
        let err = nothing
            local st
@@ -245,8 +264,10 @@ macro except_stackframe(expr, err_type)
            catch err
                st = stacktrace(catch_backtrace())
            end
-           err === nothing && error("expected failure, but no exception thrown")
-           @test typeof(err) === $(esc(err_type))
+           err === nothing && error($errmsg)
+           @testset let expr=$(repr(expr))
+               $(Expr(:macrocall, Symbol("@test"), source_info, :(typeof(err) === $(esc(err_type)))))
+           end
            sprint(show, st[1])
        end
     end
@@ -277,6 +298,7 @@ err_str = @except_str 1 + 2 MethodError
 err_str = @except_str Float64[](1) MethodError
 @test !occursin("import Base.Array", err_str)
 
+global Array
 Array() = 1
 err_str = @except_str Array([1]) MethodError
 @test occursin("import Base.Array", err_str)
@@ -350,7 +372,7 @@ let undefvar
     err_str = @except_str Vector{Any}(undef, 1)[1] UndefRefError
     @test err_str == "UndefRefError: access to undefined reference"
     err_str = @except_str undefvar UndefVarError
-    @test err_str == "UndefVarError: `undefvar` not defined"
+    @test err_str == "UndefVarError: `undefvar` not defined in local scope"
     err_str = @except_str read(IOBuffer(), UInt8) EOFError
     @test err_str == "EOFError: read end of file"
     err_str = @except_str Dict()[:doesnotexist] KeyError
@@ -384,6 +406,8 @@ let err_str,
     @test occursin("MethodError: no method matching Bool()", err_str)
     err_str = @except_str :a() MethodError
     @test occursin("MethodError: objects of type Symbol are not callable", err_str)
+    err_str = @except_str missing(1) MethodError
+    @test occursin("MethodError: objects of type Missing are not callable", err_str)
     err_str = @except_str EightBitType() MethodError
     @test occursin("MethodError: no method matching $(curmod_prefix)EightBitType()", err_str)
     err_str = @except_str i() MethodError
@@ -397,7 +421,10 @@ let err_str,
     err_str = @except_str FunctionLike()() MethodError
     @test occursin("MethodError: no method matching (::$(curmod_prefix)FunctionLike)()", err_str)
     err_str = @except_str [1,2](1) MethodError
-    @test occursin("MethodError: objects of type Vector{$Int} are not callable\nUse square brackets [] for indexing an Array.", err_str)
+    @test occursin("MethodError: objects of type Vector{$Int} are not callable.\n"*
+        "In case you did not try calling it explicitly, check if a Vector{$Int}"*
+        " has been passed as an argument to a method that expects a callable instead.\n"*
+        "In case you're trying to index into the array, use square brackets [] instead of parentheses ().", err_str)
     # Issue 14940
     err_str = @except_str randn(1)() MethodError
     @test occursin("MethodError: objects of type Vector{Float64} are not callable", err_str)
@@ -415,7 +442,7 @@ let err_str
     @test occursin("For element-wise subtraction, use broadcasting with dot syntax: array .- scalar", err_str)
 end
 
-
+import Core: String
 method_defs_lineno = @__LINE__() + 1
 String() = throw(ErrorException("1"))
 (::String)() = throw(ErrorException("2"))
@@ -460,7 +487,7 @@ let err_str,
     @test startswith(sprint(show, which(StructWithUnionAllMethodDefs{<:Integer}, (Any,))),
                      "($(curmod_prefix)StructWithUnionAllMethodDefs{T} where T<:Integer)(x)")
     @test repr("text/plain", FunctionLike()) == "(::$(curmod_prefix)FunctionLike) (generic function with 1 method)"
-    @test repr("text/plain", Core.arraysize) == "arraysize (built-in function)"
+    @test repr("text/plain", Core.getfield) == "getfield (built-in function)"
 
     err_str = @except_stackframe String() ErrorException
     @test err_str == "String() at $sn:$(method_defs_lineno + 0)"
@@ -501,7 +528,7 @@ let
     @test (@macroexpand @fastmath +      ) == :(Base.FastMath.add_fast)
     @test (@macroexpand @fastmath min(1) ) == :(Base.FastMath.min_fast(1))
     let err = try; @macroexpand @doc "" f() = @x; catch ex; ex; end
-        @test err == UndefVarError(Symbol("@x"))
+        @test err == UndefVarError(Symbol("@x"), @__MODULE__)
     end
     @test (@macroexpand @seven_dollar $bar) == 7
     x = 2
@@ -534,6 +561,14 @@ end
     @test (@macroexpand1 @nest2b 42) == _macroexpand1(:(@nest2b 42))
 end
 
+module TwoargMacroExpand
+macro modulecontext(); return __module__; end
+end
+@test (@__MODULE__) == @macroexpand TwoargMacroExpand.@modulecontext
+@test TwoargMacroExpand == @macroexpand TwoargMacroExpand @modulecontext
+@test (@__MODULE__) == @macroexpand1 TwoargMacroExpand.@modulecontext
+@test TwoargMacroExpand == @macroexpand1 TwoargMacroExpand @modulecontext
+
 foo_9965(x::Float64; w=false) = x
 foo_9965(x::Int) = 2x
 
@@ -549,17 +584,37 @@ foo_9965(x::Int) = 2x
     @test occursin("got unsupported keyword argument \"w\"", String(take!(io)))
 end
 
+@testset "MethodError with long types (#50803)" begin
+    a = view(reinterpret(reshape, UInt8, PermutedDimsArray(rand(5, 7), (2, 1))), 2:3, 2:4, 1:4) # a mildly-complex type
+    function f50803 end
+    ex50803 = try
+        f50803(a, a, a, a, a, a)
+    catch e
+        e
+    end::MethodError
+    tlf = Ref(false)
+    str = sprint(Base.showerror, ex50803; context=(:displaysize=>(1000, 120), :stacktrace_types_limited=>tlf))
+    @test tlf[]
+    @test occursin("::SubArray{…}", str)
+    tlf[] = false
+    str = sprint(Base.showerror, ex50803; context=(:displaysize=>(1000, 10000), :stacktrace_types_limited=>tlf))
+    @test !tlf[]
+    str = sprint(Base.showerror, ex50803; context=(:displaysize=>(1000, 120)))
+    @test !occursin("::SubArray{…}", str)
+end
+
 # Issue #20556
 import REPL
 module EnclosingModule
     abstract type AbstractTypeNoConstructors end
 end
 let
-    method_error = MethodError(EnclosingModule.AbstractTypeNoConstructors, ())
+    method_error = MethodError(EnclosingModule.AbstractTypeNoConstructors, (), Base.get_world_counter())
 
     # Test that it shows a special message when no constructors have been defined by the user.
-    @test sprint(showerror, method_error) ==
-        "MethodError: no constructors have been defined for $(EnclosingModule.AbstractTypeNoConstructors)"
+    @test startswith(sprint(showerror, method_error),
+        """MethodError: no constructors have been defined for $(EnclosingModule.AbstractTypeNoConstructors)
+           The type `$(EnclosingModule.AbstractTypeNoConstructors)` exists, but no method is defined for this combination of argument types when trying to construct it.""")
 
     # Does it go back to previous behaviour when there *is* at least
     # one constructor defined?
@@ -618,6 +673,24 @@ end
         @test startswith(str, "MethodError: no method matching f21006(::Tuple{})")
         @test !occursin("The applicable method may be too new", str)
     end
+
+    str = sprint(Base.showerror, MethodError(+, (1.0, 2.0)))
+    @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64)")
+    @test occursin("This error has been manually thrown, explicitly", str)
+
+    str = sprint(Base.showerror, MethodError(+, (1.0, 2.0), Base.get_world_counter()))
+    @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64)")
+    @test occursin("This error has been manually thrown, explicitly", str)
+
+    str = sprint(Base.showerror, MethodError(Core.kwcall, ((; a=3.0), +, 1.0, 2.0)))
+    @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64; a::Float64)")
+    @test occursin("This error has been manually thrown, explicitly", str)
+
+    str = sprint(Base.showerror, MethodError(Core.kwcall, ((; a=3.0), +, 1.0, 2.0), Base.get_world_counter()))
+    @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64; a::Float64)")
+    @test occursin("This method does not support all of the given keyword arguments", str)
+
+    @test_throws "MethodError: no method matching kwcall()" Core.kwcall()
 end
 
 # Issue #50200
@@ -626,8 +699,9 @@ using Base.Experimental: @opaque
     test_no_error(f) = @test f() === nothing
     function test_worldage_error(f)
         ex = try; f(); error("Should not have been reached") catch ex; ex; end
-        @test occursin("The applicable method may be too new", sprint(Base.showerror, ex))
-        @test !occursin("!Matched::", sprint(Base.showerror, ex))
+        strex = sprint(Base.showerror, ex)
+        @test occursin("The applicable method may be too new", strex)
+        @test !occursin("!Matched::", sprint(Base.showerror, strex))
     end
 
     global callback50200
@@ -666,7 +740,7 @@ let err_str
     @test occursin(Regex("MethodError: no method matching one\\(::.*HasNoOne; value::$(Int)\\)"), err_str)
     @test occursin("`one` doesn't take keyword arguments, that would be silly", err_str)
 end
-pop!(Base.Experimental._hint_handlers[MethodError])  # order is undefined, don't copy this
+pop!(Base.Experimental._hint_handlers[Core.typename(MethodError)])  # order is undefined, don't copy this
 
 function busted_hint(io, exc, notarg)  # wrong number of args
     print(io, "\nI don't have a hint for you, sorry")
@@ -678,11 +752,10 @@ catch ex
     io = IOBuffer()
     @test_logs (:error, "Hint-handler busted_hint for DomainError in $(@__MODULE__) caused an error") showerror(io, ex)
 end
-pop!(Base.Experimental._hint_handlers[DomainError])  # order is undefined, don't copy this
+pop!(Base.Experimental._hint_handlers[Core.typename(DomainError)])  # order is undefined, don't copy this
 
 struct ANumber <: Number end
-let err_str
-    err_str = @except_str ANumber()(3 + 4) MethodError
+let err_str = @except_str ANumber()(3 + 4) MethodError
     @test occursin("objects of type $(curmod_prefix)ANumber are not callable", err_str)
     @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
     # issue 40478
@@ -690,6 +763,25 @@ let err_str
     @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
 end
 
+let a = [1 2; 3 4];
+    err_str = @except_str (a[1][2] = 5) MethodError
+    @test occursin("\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ", err_str)
+    @test occursin("a[1, 2]", err_str)
+    @test occursin("rather than a[1][2]", err_str)
+end
+
+let d = Dict
+    err_str = @except_str (d[1] = 5) MethodError
+    @test occursin("\nYou attempted to index the type Dict, rather than an instance of the type. Make sure you create the type using its constructor: ", err_str)
+    @test occursin("d = Dict([...])", err_str)
+    @test occursin(" rather than d = Dict", err_str)
+end
+
+let s = Some("foo")
+    err_str = @except_str (s[] = "bar") MethodError
+    @test !occursin("You attempted to index the type String", err_str)
+end
+
 # Execute backtrace once before checking formatting, see #38858
 backtrace()
 
@@ -701,12 +793,118 @@ backtrace()
     io = IOBuffer()
     Base.show_backtrace(io, bt)
     output = split(String(take!(io)), '\n')
-    @test lstrip(output[3])[1:3] == "[1]"
+    length(output) >= 8 || println(output) # for better errors when this fails
+    @test lstrip(output[3])[1] == '┌'
+    @test lstrip(lstrip(output[3])[4:end])[1:3] == "[1]"
     @test occursin("g28442", output[3])
-    @test lstrip(output[5])[1:3] == "[2]"
+    @test lstrip(output[5])[1] == '├'
+    @test lstrip(lstrip(output[5])[4:end])[1:3] == "[2]"
     @test occursin("f28442", output[5])
-    @test occursin("the last 2 lines are repeated 5000 more times", output[7])
-    @test lstrip(output[8])[1:7] == "[10003]"
+    is_windows_32_bit = Sys.iswindows() && (Sys.WORD_SIZE == 32)
+    if is_windows_32_bit
+        # These tests are currently broken (intermittently/non-determistically) on 32-bit Windows.
+        # https://github.com/JuliaLang/julia/issues/55900
+        # Instead of skipping them entirely, we skip one, and we loosen the other.
+
+        # Broken test: @test occursin("repeated 5001 times", output[7])
+        @test occursin("repeated ", output[7])
+        @test occursin(" times", output[7])
+
+        # Broken test: @test lstrip(output[8])[1:7] == "[10003]"
+        @test_broken false
+    else
+        @test occursin("repeated 5001 times", output[7])
+        @test lstrip(output[8])[1:7] == "[10003]"
+    end
+end
+
+@testset "Long stacktrace printing - nested repeated single frame" begin
+    f28442a(n) = n ≤ 0 ? (return backtrace()) : g28442a(n - 1)
+    g28442a(n) = 80 > n > 20 ? h28442a(n - 1) : f28442a(n - 1)
+    h28442a(n) = n % 10 == 0 ? g28442a(n - 1) : h28442a(n - 1)
+    bt = f28442a(100)
+    io = IOBuffer()
+    Base.show_backtrace(io, bt)
+    output = split(String(take!(io)), '\n')
+    length(output) >= 21 || println(output) # for better errors when this fails
+    @test startswith(lstrip(output[3]), "┌ ")
+    @test lstrip(lstrip(output[3])[4:end])[1:3] == "[1]"
+    @test occursin("f28442a", output[3])
+    @test startswith(lstrip(output[5]), "├ ")
+    @test lstrip(lstrip(output[5])[4:end])[1:3] == "[2]"
+    @test occursin("g28442a", output[5])
+
+    @test startswith(lstrip(output[8]), "┌┌ ")
+    @test occursin("h28442a", output[8])
+    @test startswith(lstrip(output[11]), "├ ")
+    @test occursin("g28442a", output[11])
+
+    @test startswith(lstrip(output[14]), "┌ ")
+    @test occursin("f28442a", output[14])
+    @test startswith(lstrip(output[16]), "├ ")
+    @test occursin("g28442a", output[16])
+
+    @test occursin("f28442a", output[19])
+
+    is_windows_32_bit = Sys.iswindows() && (Sys.WORD_SIZE == 32)
+    if is_windows_32_bit
+        # Assuming tests are broken on 32-bit Windows as above, no need to repeat loose tests here.
+    else
+        @test occursin("repeated 10 times", output[7])
+        @test lstrip(lstrip(output[8])[7:end])[1:4] == "[21]"
+        @test occursin("repeated 9 times", output[10])
+        @test lstrip(lstrip(output[11])[4:end])[1:4] == "[30]"
+        @test occursin("repeated 6 times", output[13])
+        @test lstrip(lstrip(output[14])[4:end])[1:4] == "[81]"
+        @test lstrip(lstrip(output[16])[4:end])[1:4] == "[82]"
+        @test lstrip(output[19])[1:5] == "[101]"
+        @test lstrip(output[21])[1:5] == "[102]"
+    end
+end
+
+@testset "Long stacktrace printing - nested cycles" begin
+    f28442b(n) = n ≤ 0 ? (return backtrace()) : g28442b(n - 1)
+    g28442b(n) = 80 > n > 60 || 40 > n > 20 ? h28442b(n - 1) : f28442b(n - 1)
+    h28442b(n) = g28442b(n - 1)
+    bt = f28442b(100)
+    io = IOBuffer()
+    Base.show_backtrace(io, bt)
+    output = split(String(take!(io)), '\n')
+    length(output) >= 21 || println(output) # for better errors when this fails
+    @test startswith(lstrip(output[3]), "┌ ")
+    @test lstrip(lstrip(output[3])[4:end])[1:3] == "[1]"
+    @test occursin("f28442b", output[3])
+    @test startswith(lstrip(output[5]), "├ ")
+    @test lstrip(lstrip(output[5])[4:end])[1:3] == "[2]"
+    @test occursin("g28442b", output[5])
+
+    is_windows_32_bit = Sys.iswindows() && (Sys.WORD_SIZE == 32)
+    if is_windows_32_bit
+        # Assuming tests are broken on 32-bit Windows as above, no need to repeat loose tests here.
+    else
+        @test startswith(lstrip(output[8]), "┌┌ ")
+        @test occursin("h28442b", output[8])
+        @test startswith(lstrip(output[10]), "├├ ")
+        @test occursin("g28442b", output[10])
+
+        @test startswith(lstrip(output[13]), "├┌ ")
+        @test occursin("f28442b", output[13])
+        @test startswith(lstrip(output[15]), "├├ ")
+        @test occursin("g28442b", output[15])
+
+        @test occursin("f28442b", output[19])
+
+        @test occursin("repeated 10 times", output[7])
+        @test lstrip(lstrip(output[8])[7:end])[1:4] == "[21]"
+        @test lstrip(lstrip(output[10])[7:end])[1:4] == "[22]"
+        @test occursin("repeated 10 times", output[12])
+        @test lstrip(lstrip(output[13])[7:end])[1:4] == "[41]"
+        @test lstrip(lstrip(output[15])[7:end])[1:4] == "[42]"
+        @test occursin("repeated 10 times", output[17])
+        @test occursin("repeated 2 times", output[18])
+        @test lstrip(output[19])[1:5] == "[101]"
+        @test lstrip(output[21])[1:5] == "[102]"
+    end
 end
 
 @testset "Line number correction" begin
@@ -734,6 +932,145 @@ end
 @test_throws ArgumentError("invalid index: \"foo\" of type String") [1]["foo"]
 @test_throws ArgumentError("invalid index: nothing of type Nothing") [1][nothing]
 
+# issue #53618, pr #55165
+@testset "FieldErrorHints" begin
+    struct FieldFoo
+        a::Float32
+        b::Int
+    end
+    Base.propertynames(foo::FieldFoo) = (:a, :x, :y)
+
+    s = FieldFoo(1, 2)
+
+    test = @test_throws FieldError s.c
+
+    ex = test.value::FieldError
+
+    # Check error message first
+    errorMsg = sprint(Base.showerror, ex)
+    @test occursin("FieldError: type", errorMsg)
+    @test occursin("FieldFoo has no field `c`", errorMsg)
+    @test occursin("available fields: `a`, `b`", errorMsg)
+    @test occursin("Available properties: `x`, `y`", errorMsg)
+
+    d = Dict(s => 1)
+
+    for fld in fieldnames(Dict)
+        ex = try
+            getfield(d, fld)
+        catch e
+            print(e)
+        end
+        @test !(ex isa Type) || ex <: FieldError
+    end
+    test = @test_throws FieldError d.c
+
+    ex = test.value::FieldError
+
+    errorMsg = sprint(Base.showerror, ex)
+    @test occursin("FieldError: type Dict has no field `c`", errorMsg)
+    # Check hint message
+    hintExpected = "Did you mean to access dict values using key: `:c` ? Consider using indexing syntax dict[:c]\n"
+    @test occursin(hintExpected, errorMsg)
+end
+
+module FieldErrorTest
+struct Point end
+p = Point()
+end
+
+@testset "FieldError with changing fields" begin
+    # https://discourse.julialang.org/t/better-error-message-for-modified-structs-in-julia-1-12/129265
+    err_str1 = @except_str FieldErrorTest.p.x FieldError
+    @test occursin("FieldErrorTest.Point", err_str1)
+    @eval FieldErrorTest struct Point{T}
+        x::T
+        y::T
+    end
+    err_str2 = @except_str FieldErrorTest.p.x FieldError
+    @test occursin("@world", err_str2)
+    @test occursin("FieldErrorTest.Point", err_str2)
+end
+
+# UndefVar error hints
+module A53000
+    export f
+    f() = 0.0
+end
+
+module C_outer_53000
+    import ..A53000: f
+    public f
+
+    module C_inner_53000
+    import ..C_outer_53000: f
+    export f
+    end
+end
+
+module D_53000
+    public f
+    f() = 1.0
+end
+
+C_inner_53000 = "I'm a decoy with the same name as C_inner_53000!"
+
+Base.Experimental.register_error_hint(Base.UndefVarError_hint, UndefVarError)
+
+@testset "undefvar error hints" begin
+    old_modules_order = Base.loaded_modules_order
+    append!(Base.loaded_modules_order, [A53000, C_outer_53000, C_outer_53000.C_inner_53000, D_53000])
+    test = @test_throws UndefVarError f
+    ex = test.value::UndefVarError
+    errormsg = sprint(Base.showerror, ex)
+    mod = @__MODULE__
+    @test occursin("Hint: a global variable of this name also exists in $mod.A53000.", errormsg)
+    @test occursin("Hint: a global variable of this name also exists in $mod.D_53000.", errormsg)
+    @test occursin("- Also declared public in $mod.C_outer_53000", errormsg)
+    @test occursin("- Also exported by $mod.C_outer_53000.C_inner_53000 (loaded but not imported in Main).", errormsg)
+    copy!(Base.loaded_modules_order, old_modules_order)
+end
+@testset " test the functionality of `UndefVarError_hint` against import clashes" begin
+    @eval module X
+        module A
+        export x
+        x = 1
+        end # A
+
+        module B
+        export x
+        x = 2
+        end # B
+
+        using .A, .B
+
+    end # X
+
+    expected_message = string("\nHint: It looks like two or more modules export different ",
+                              "bindings with this name, resulting in ambiguity. Try explicitly ",
+                              "importing it from a particular module, or qualifying the name ",
+                              "with the module it should come from.")
+    @test_throws expected_message X.x
+end
+
+# Module for UndefVarError world age testing
+module TestWorldAgeUndef end
+
+@testset "UndefVarError world age hint" begin
+    ex = try
+        TestWorldAgeUndef.newvar
+    catch e
+        e
+    end
+    @test ex isa UndefVarError
+
+    Core.eval(TestWorldAgeUndef, :(newvar = 42))
+
+    err_str = sprint(Base.showerror, ex)
+    @test occursin("The binding may be too new: running in world age", err_str)
+    @test occursin("while current world is", err_str)
+end
+
 # test showing MethodError with type argument
 struct NoMethodsDefinedHere; end
 let buf = IOBuffer()
@@ -773,6 +1110,20 @@ for (func,str) in ((TestMethodShadow.:+,":+"), (TestMethodShadow.:(==),":(==)"),
     @test occursin("You may have intended to import Base.$str", sprint(Base.showerror, ex))
 end
 
+# Test hint for functions in modules of argument types (issue #58682)
+module TestModuleHint
+    struct Bar end
+    length(x::Bar) = 42
+end
+let ex = try
+        # Call Base.length on TestModuleHint.Bar - should suggest importing TestModuleHint.length
+        length(TestModuleHint.Bar())
+    catch e
+        e
+    end::MethodError
+    @test occursin("may have intended to extend", sprint(Base.showerror, ex))
+end
+
 # Test that implementation detail of include() is hidden from the user by default
 let bt = try
         @noinline include("testhelpers/include_error.jl")
@@ -831,7 +1182,7 @@ if (Sys.isapple() || Sys.islinux()) && Sys.ARCH === :x86_64
                 catch_backtrace()
             end
             bt_str = sprint(Base.show_backtrace, bt)
-            @test occursin(r"repeats \d+ times", bt_str)
+            @test occursin(r"repeated \d+ times", bt_str)
         end
 
         let bt = try
@@ -840,7 +1191,7 @@ if (Sys.isapple() || Sys.islinux()) && Sys.ARCH === :x86_64
                 catch_backtrace()
             end
             bt_str = sprint(Base.show_backtrace, bt)
-            @test occursin(r"the last 2 lines are repeated \d+ more times", bt_str)
+            @test occursin(r"repeated \d+ times", bt_str)
         end
     end
 end
@@ -967,6 +1318,40 @@ let err_str
     @test occursin("String concatenation is performed with *", err_str)
 end
 
+# https://github.com/JuliaLang/julia/issues/55745
+let err_str
+    err_str = @except_str +() MethodError
+    @test !occursin("String concatenation is performed with *", err_str)
+end
+
+struct MissingLength; end
+struct MissingSize; end
+Base.IteratorSize(::Type{MissingSize}) = Base.HasShape{2}()
+Base.iterate(::MissingLength) = nothing
+Base.iterate(::MissingSize) = nothing
+
+let err_str
+    expected = "Finding the minimum of an iterable is performed with `minimum`."
+    err_str = @except_str min([1,2,3]) MethodError
+    @test occursin(expected, err_str)
+    err_str = @except_str min((i for i in 1:3)) MethodError
+    @test occursin(expected, err_str)
+    expected = "Finding the maximum of an iterable is performed with `maximum`."
+    err_str = @except_str max([1,2,3]) MethodError
+    @test occursin(expected, err_str)
+
+    expected = "You may need to implement the `length` method or define `IteratorSize` for this type to be `SizeUnknown`."
+    err_str = @except_str length(MissingLength()) MethodError
+    @test occursin(expected, err_str)
+    err_str = @except_str collect(MissingLength()) MethodError
+    @test occursin(expected, err_str)
+    expected = "You may need to implement the `length` and `size` methods for `IteratorSize` `HasShape`."
+    err_str = @except_str size(MissingSize()) MethodError
+    @test occursin(expected, err_str)
+    err_str = @except_str collect(MissingSize()) MethodError
+    @test occursin(expected, err_str)
+end
+
 @testset "unused argument names" begin
     g(::Int) = backtrace()
     bt = g(1)
@@ -1059,3 +1444,70 @@ let e = @test_throws MethodError convert(TypeCompareError{Float64,1}, TypeCompar
     @test  occursin("TypeCompareError{Float64,1}", str)
     @test !occursin("TypeCompareError{Float64{},2}", str) # No {...} for types without params
 end
+
+@testset "InexactError for Inf16 should print '16' (#51087)" begin
+    @test sprint(showerror, InexactError(:UInt128, UInt128, Inf16)) == "InexactError: UInt128(Inf16)"
+
+    for IntType in [Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128]
+        IntStr = string(IntType)
+        for InfVal in Any[Inf, Inf16, Inf32, Inf64]
+            InfStr = repr(InfVal)
+            e = @test_throws InexactError IntType(InfVal)
+            str = sprint(Base.showerror, e.value)
+            @test occursin("InexactError: $IntStr($InfStr)", str)
+        end
+    end
+end
+
+# error message hint from PR #22647
+@test_throws "Many shells" cd("~")
+@test occursin("Many shells", sprint(showerror, Base.IOError("~", Base.UV_ENOENT)))
+
+# issue #47559"
+@test_throws("MethodError: no method matching invoke Returns(::Any, ::Val{N}) where N",
+             invoke(Returns, Tuple{Any,Val{N}} where N, 1, Val(1)))
+
+f33793(x::Float32, y::Float32) = 1
+@test_throws "\nClosest candidates are:\n  f33793(!Matched::Float32, !Matched::Float32)\n" f33793(Float64(0.0), Float64(0.0))
+
+# https://github.com/JuliaLang/julia/issues/56325
+let err_str
+    f56325 = x->x+1
+    err_str = @except_str f56325(1,2) MethodError
+    @test occursin("The anonymous function", err_str)
+end
+
+# Test that error hints catch abstract exception supertypes (issue #58367)
+
+module Hinterland
+
+abstract type AbstractHintableException <: Exception end
+struct ConcreteHintableException <: AbstractHintableException end
+gonnathrow() = throw(ConcreteHintableException())
+
+function Base.showerror(io::IO, exc::ConcreteHintableException)
+    print(io, "This is my exception")
+    Base.Experimental.show_error_hints(io, exc)
+end
+
+function __init__()
+    Base.Experimental.register_error_hint(ConcreteHintableException) do io, exc
+        print(io, "\nThis hint caught my concrete exception type")
+    end
+    Base.Experimental.register_error_hint(AbstractHintableException) do io, exc
+        print(io, "\nThis other hint caught my abstract exception supertype")
+    end
+end
+
+end
+
+@testset "Hints for abstract exception supertypes" begin
+    exc = try
+        Hinterland.gonnathrow()
+    catch e
+        e
+    end
+    exc_print = sprint(Base.showerror, exc)
+    @test occursin("This hint caught my concrete exception type", exc_print)
+    @test occursin("This other hint caught my abstract exception supertype", exc_print)
+end
diff --git a/test/euler.jl b/test/euler.jl
index 9af79a44cc0d3..c8d0e9a734fd2 100644
--- a/test/euler.jl
+++ b/test/euler.jl
@@ -2,7 +2,7 @@
 
 ## Project Euler
 #
-#  problems: http://projecteuler.net/problems
+#  problems: https://projecteuler.net/problems
 #  solutions: https://code.google.com/p/projecteuler-solutions/wiki/ProjectEulerSolutions
 
 #1: 233168
diff --git a/test/exceptions.jl b/test/exceptions.jl
index eb0bbaec35090..1e52c7a2fe2c3 100644
--- a/test/exceptions.jl
+++ b/test/exceptions.jl
@@ -241,6 +241,18 @@ end
         end
     end)()
     @test length(Base.current_exceptions()) == 0
+
+    (()-> begin
+        while true
+            try
+                error("foo")
+            finally
+                break
+            end
+        end
+        @test length(Base.current_exceptions()) == 0
+    end)()
+    @test length(Base.current_exceptions()) == 0
 end
 
 @testset "Deep exception stacks" begin
diff --git a/test/fastmath.jl b/test/fastmath.jl
index 21f3ebc1e603f..efca5b85c6642 100644
--- a/test/fastmath.jl
+++ b/test/fastmath.jl
@@ -1,7 +1,30 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using InteractiveUtils: code_llvm
 # fast math
 
+@testset "check fast present in LLVM" begin
+    for T in (Float16, Float32, Float64, ComplexF32, ComplexF64)
+        f(x) = @fastmath x + x + x
+        llvm = sprint(code_llvm, f, (T,))
+        @test occursin("fast", llvm)
+
+        g(x) = @fastmath x * x * x
+        llvm = sprint(code_llvm, g, (T,))
+        @test occursin("fast", llvm)
+    end
+
+    for T in (Float16, Float32, Float64)
+        f(x, y, z) = @fastmath min(x, y, z)
+        llvm = sprint(code_llvm, f, (T,T,T))
+        @test occursin("fast", llvm)
+
+        g(x, y, z) = @fastmath max(x, y, z)
+        llvm = sprint(code_llvm, g, (T,T,T))
+        @test occursin("fast", llvm)
+    end
+end
+
 @testset "check expansions" begin
     @test macroexpand(Main, :(@fastmath 1+2)) == :(Base.FastMath.add_fast(1,2))
     @test macroexpand(Main, :(@fastmath +)) == :(Base.FastMath.add_fast)
@@ -256,6 +279,28 @@ end
 
 @testset "literal powers" begin
     @test @fastmath(2^-2) == @fastmath(2.0^-2) == 0.25
+    # Issue #53817
+    # Note that exponent -2^63 fails testing because of issue #53881
+    # Therefore we test with -(2^63-1). For Int == Int32 there is an analogue restriction.
+    # See also PR #53860.
+    if Int == Int64
+        @test @fastmath(2^-9223372036854775807) === 0.0
+        @test_throws DomainError @fastmath(2^-9223372036854775809)
+        @test @fastmath(1^-9223372036854775807) isa Float64
+        @test @fastmath(1^-9223372036854775809) isa Int
+    elseif Int == Int32
+        @test @fastmath(2^-2147483647) === 0.0
+        @test_throws DomainError @fastmath(2^-2147483649)
+        @test @fastmath(1^-2147483647) isa Float64
+        @test @fastmath(1^-2147483649) isa Int
+    end
+    @test_throws MethodError @fastmath(^(2))
+end
+# issue #53857
+@testset "fast_pow" begin
+    n = Int64(2)^52
+    @test @fastmath (1 + 1 / n) ^ n ≈ ℯ
+    @test @fastmath (1 + 1 / n) ^ 4503599627370496 ≈ ℯ
 end
 
 @testset "sincos fall-backs" begin
@@ -293,3 +338,14 @@ end
     @test x == [1, 1]
     @test i == 1
 end
+
+@testset "@fastmath-related crash (#49907)" begin
+    x = @fastmath maximum(Float16[1,2,3]; init = Float16(0))
+    @test x == Float16(3)
+end
+
+@testset "Test promotion of >=3 arg fastmath" begin
+    # Bug caught in https://github.com/JuliaLang/julia/pull/54513#discussion_r1620553369
+    x = @fastmath 1. + 1. + 1f0
+    @test x == 3.0
+end
diff --git a/test/faulty_constructor_method_should_not_cause_stack_overflows.jl b/test/faulty_constructor_method_should_not_cause_stack_overflows.jl
new file mode 100644
index 0000000000000..8dede73e2ae7a
--- /dev/null
+++ b/test/faulty_constructor_method_should_not_cause_stack_overflows.jl
@@ -0,0 +1,54 @@
+for (typ, sup) in (
+    (:Char, :AbstractChar),
+    (:String, :AbstractString),
+    (:Int, :Integer),
+    (:UInt32, :Integer),
+)
+    fau = Symbol("Faulty", typ)
+    @eval struct $fau <: $sup end
+    @eval function Base.$typ(x::$fau) x end
+end
+
+using Test
+using Unicode: Unicode
+
+@testset "faulty constructor method for new type should not cause stack overflows" begin
+    exc = Union{TypeError,MethodError}
+    @testset let x = FaultyChar()
+        @test_throws exc isless(x, x)
+        @test_throws exc x == x
+        @test_throws exc hash(x, UInt(3))
+        @test_throws exc print(devnull, x)
+        @test_throws exc hex2bytes!(Vector{UInt8}(undef, 1), (x, x))
+    end
+    @testset let x = FaultyString()
+        @test_throws exc hash(x, UInt(3))
+        @test_throws exc repeat(x, 3)
+        @test_throws exc startswith(devnull, x)
+        @test_throws exc relpath(x, x)
+        @test_throws exc tryparse(Float32, x)
+        @test_throws exc tryparse(Float64, x)
+        for f in (
+            Symbol, ascii, splitpath, isdirpath, splitdir, splitdrive, splitext, normpath, abspath, ispath,
+            Base.Filesystem.uripath,
+            Sys.which, Sys.isexecutable, Sys.isreadable, Sys.iswritable,
+            Unicode.normalize,
+        )
+            @test_throws exc f(x)
+        end
+    end
+    @testset let x = FaultyInt()
+        @test_throws exc readbytes!(IOBuffer(), Vector{UInt8}(undef, 0), x)
+        @test_throws exc length("", x, x)
+        @test_throws exc thisind("", x)
+        @test_throws exc prevind("", x)
+        @test_throws exc prevind("", x, x)
+        @test_throws exc nextind("", x)
+        @test_throws exc nextind("", x, x)
+        @test_throws exc codeunit("", x)
+        @test_throws exc SubString("", x, x)
+    end
+    @testset let x = FaultyUInt32()
+        @test_throws exc Char(x)
+    end
+end
diff --git a/test/file.jl b/test/file.jl
index 1d2ac4c6f9132..92e85125c8451 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -31,6 +31,8 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
     symlink(subdir, dirlink)
     @test stat(dirlink) == stat(subdir)
     @test readdir(dirlink) == readdir(subdir)
+    @test map(o->o.names, Base.Filesystem._readdirx(dirlink)) == map(o->o.names, Base.Filesystem._readdirx(subdir))
+    @test realpath.(Base.Filesystem._readdirx(dirlink)) == realpath.(Base.Filesystem._readdirx(subdir))
 
     # relative link
     relsubdirlink = joinpath(subdir, "rel_subdirlink")
@@ -38,6 +40,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
     symlink(reldir, relsubdirlink)
     @test stat(relsubdirlink) == stat(subdir2)
     @test readdir(relsubdirlink) == readdir(subdir2)
+    @test Base.Filesystem._readdirx(relsubdirlink) == Base.Filesystem._readdirx(subdir2)
 
     # creation of symlink to directory that does not yet exist
     new_dir = joinpath(subdir, "new_dir")
@@ -56,6 +59,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
     mkdir(new_dir)
     touch(foo_file)
     @test readdir(new_dir) == readdir(nedlink)
+    @test realpath.(Base.Filesystem._readdirx(new_dir)) == realpath.(Base.Filesystem._readdirx(nedlink))
 
     rm(foo_file)
     rm(new_dir)
@@ -124,6 +128,9 @@ end
     end
     @test_throws ArgumentError tempname(randstring())
 end
+@testset "tempname with suffix" begin
+    @test !isfile(tempname(suffix = "_foo.txt"))
+end
 
 child_eval(code::String) = eval(Meta.parse(readchomp(`$(Base.julia_cmd()) -E $code`)))
 
@@ -253,13 +260,14 @@ no_error_logging(f::Function) =
         @test TEMP_CLEANUP_MAX[] == 3
         local t, f
         temps = String[]
+        npending = 0
         # mktemp is normally cleaned up on completion
         mktemp(d) do path, _
             @test isfile(path)
             t = path
         end
         @test !ispath(t)
-        @test length(TEMP_CLEANUP) == 0
+        @test length(TEMP_CLEANUP) == npending
         @test TEMP_CLEANUP_MAX[] == 3
         # mktemp when cleanup is prevented
         no_error_logging() do
@@ -270,19 +278,26 @@ no_error_logging(f::Function) =
                 t = path
             end
         end
+        # Make deleteable again
         chmod(d, 0o700)
         close(f)
-        @test isfile(t)
-        @test length(TEMP_CLEANUP) == 1
-        @test TEMP_CLEANUP_MAX[] == 3
-        push!(temps, t)
+        if Libc.geteuid() == 0
+            # Root can delete anything
+            @test !isfile(t)
+        else
+            npending += 1
+            @test isfile(t)
+            @test length(TEMP_CLEANUP) == npending
+            @test TEMP_CLEANUP_MAX[] == 3
+            push!(temps, t)
+        end
         # mktempdir is normally cleaned up on completion
         mktempdir(d) do path
             @test isdir(path)
             t = path
         end
         @test !ispath(t)
-        @test length(TEMP_CLEANUP) == 1
+        @test length(TEMP_CLEANUP) == npending
         @test TEMP_CLEANUP_MAX[] == 3
         # mktempdir when cleanup is prevented
         no_error_logging() do
@@ -294,16 +309,24 @@ no_error_logging(f::Function) =
                 t = path
             end
         end
+        # Make deleteable again
         chmod(d, 0o700)
         close(f)
-        @test isdir(t)
-        @test length(TEMP_CLEANUP) == 2
-        @test TEMP_CLEANUP_MAX[] == 3
-        push!(temps, t)
+        if Libc.geteuid() == 0
+            # Root can delete anything
+            @test !isdir(t)
+        else
+            @test isdir(t)
+            npending += 1
+            @test length(TEMP_CLEANUP) == npending
+            @test TEMP_CLEANUP_MAX[] == 3
+            push!(temps, t)
+        end
         # make one more temp file
         t = mktemp()[1]
+        npending += 1
         @test isfile(t)
-        @test length(TEMP_CLEANUP) == 3
+        @test length(TEMP_CLEANUP) == npending
         @test TEMP_CLEANUP_MAX[] == 3
         # nothing has been deleted yet
         for t in temps
@@ -312,8 +335,9 @@ no_error_logging(f::Function) =
         # another temp file triggers purge
         t = mktempdir()
         @test isdir(t)
-        @test length(TEMP_CLEANUP) == 2
-        @test TEMP_CLEANUP_MAX[] == 4
+        npending = 2
+        @test length(TEMP_CLEANUP) == npending
+        @test TEMP_CLEANUP_MAX[] == (Libc.geteuid() == 0 ? 3 : 4)
         # now all the temps are gone
         for t in temps
             @test !ispath(t)
@@ -355,7 +379,7 @@ chmod(file, filemode(file) | 0o222)
 @test filesize(file) == 0
 
 # issue #26685
-@test !isfile("http://google.com")
+@test !isfile("https://google.com")
 
 if Sys.iswindows()
     permissions = 0o444
@@ -413,6 +437,9 @@ function test_stat_error(stat::Function, pth)
     if stat === lstat && !(pth isa AbstractString)
         return # no lstat for fd handles
     end
+    if Libc.geteuid() == 0
+        return # root bypasses permission checks
+    end
     ex = try; stat(pth); false; catch ex; ex; end::Base.IOError
     @test ex.code == (pth isa AbstractString ? Base.UV_EACCES : Base.UV_EBADF)
     pth isa AbstractString || (pth = Base.INVALID_OS_HANDLE)
@@ -435,8 +462,7 @@ end
                 for pth in ("afile",
                             joinpath("afile", "not_file"),
                             SubString(joinpath(dir, "afile")),
-                            Base.RawFD(-1),
-                            -1)
+                            Base.RawFD(-1))
                     test_stat_error(stat, pth)
                     test_stat_error(lstat, pth)
                 end
@@ -453,6 +479,11 @@ end
     end
 end
 
+# Issue #51710 and PR #54855
+@test_throws MethodError stat(7)
+@test_throws MethodError ispath(false)
+@test_throws MethodError ispath(1)
+
 # On windows the filesize of a folder is the accumulation of all the contained
 # files and is thus zero in this case.
 if Sys.iswindows()
@@ -460,7 +491,7 @@ if Sys.iswindows()
 else
     @test filesize(dir) > 0
 end
-# We need both: one to check passed time, one to comapare file's mtime()
+# We need both: one to check passed time, one to compare file's mtime()
 nowtime = time_ns() / 1e9
 nowwall = time()
 # Allow 10s skew in addition to the time it took us to actually execute this code
@@ -539,16 +570,23 @@ function multiple_uv_errors(pfx::AbstractString, codes::AbstractVector{<:Integer
     return [Base._UVError(pfx, code) for code in codes]
 end
 
+read_linux_id_map_max(file) = parse(Int, split(strip(read(file, String)), " ", keepempty = false)[end]) % Cint
 if !Sys.iswindows()
     # chown will give an error if the user does not have permissions to change files
     uid = Libc.geteuid()
     @test stat(file).uid == uid
     @test uid == Libc.getuid()
+    maxuid = maxgid = -1
+    # Containers may have restricted uid/gid ranges
+    if Sys.islinux() && isfile("/proc/self/uid_map")
+        maxuid = read_linux_id_map_max("/proc/self/uid_map")
+        maxgid = read_linux_id_map_max("/proc/self/gid_map")
+    end
     if uid == 0 # root user
-        chown(file, -2, -1)  # Change the file owner to nobody
-        @test stat(file).uid != 0
-        chown(file, 0, -2)  # Change the file group to nogroup (and owner back to root)
-        @test stat(file).gid != 0
+        chown(file, maxuid-1, -1)  # Change the file owner to nobody
+        @test maxuid == 1 || stat(file).uid != 0
+        chown(file, 0, maxgid-1)  # Change the file group to nogroup (and owner back to root)
+        @test maxgid == 1 || stat(file).gid != 0
         @test stat(file).uid == 0
         @test chown(file, -1, 0) == file
         @test stat(file).gid == 0
@@ -637,9 +675,11 @@ end
     MAX_PATH = (Sys.iswindows() ? 260 - length(PATH_PREFIX) : 255)  - 9
     for i = 0:9
         local tmp = joinpath(PATH_PREFIX, "x"^MAX_PATH * "123456789"[1:i])
-        @test withenv(var => tmp) do
-            tempdir()
-        end == tmp
+        no_error_logging() do
+            @test withenv(var => tmp) do
+                tempdir()
+            end == tmp
+        end
     end
 end
 
@@ -815,6 +855,303 @@ mktempdir() do tmpdir
     rm(b_tmpdir)
 end
 
+@testset "rename" begin
+    # some of the windows specific behavior may be fixed in new versions of julia
+    mktempdir() do dir
+        # see if can make symlinks
+        local can_symlink = try
+            symlink("foo", joinpath(dir, "link"))
+            rm(joinpath(dir, "link"))
+            true
+        catch
+            false
+        end
+        local f1 = joinpath(dir, "file1")
+        local f2 = joinpath(dir, "file2")
+        local d1 = joinpath(dir, "dir1")
+        local d2 = joinpath(dir, "dir2")
+        local subd1f1 = joinpath(d1, "file1")
+        local subd1f2 = joinpath(d1, "file2")
+        local subd2f1 = joinpath(d2, "file1")
+        local subd2f2 = joinpath(d2, "file2")
+        local h1 = joinpath(dir, "hlink1")
+        local h2 = joinpath(dir, "hlink2")
+        local s1 = joinpath(dir, "slink1")
+        local s2 = joinpath(dir, "slink2")
+        @testset "renaming to non existing newpath in same directory" begin
+            # file, make sure isexecutable is copied
+            for mode in (0o644, 0o755)
+                write(f1, b"data")
+                chmod(f1, mode)
+                Base.rename(f1, f2)
+                @test !isfile(f1)
+                @test isfile(f2)
+                @test read(f2) == b"data"
+                if mode == 0o644
+                    @test !isexecutable(f2)
+                else
+                    @test isexecutable(f2)
+                end
+                rm(f2)
+            end
+            # empty directory
+            mkdir(d1)
+            Base.rename(d1, d2)
+            @test !isdir(d1)
+            @test isdir(d2)
+            @test isempty(readdir(d2))
+            rm(d2)
+            # non empty directory
+            mkdir(d1)
+            write(subd1f1, b"data")
+            chmod(subd1f1, 0o644)
+            write(subd1f2, b"exe")
+            chmod(subd1f2, 0o755)
+            Base.rename(d1, d2)
+            @test !isdir(d1)
+            @test isdir(d2)
+            @test read(subd2f1) == b"data"
+            @test read(subd2f2) == b"exe"
+            @test !isexecutable(subd2f1)
+            @test isexecutable(subd2f2)
+            rm(d2; recursive=true)
+            # hardlink
+            write(f1, b"data")
+            hardlink(f1, h1)
+            Base.rename(h1, h2)
+            @test isfile(f1)
+            @test !isfile(h1)
+            @test isfile(h2)
+            @test read(h2) == b"data"
+            write(h2, b"data2")
+            @test read(f1) == b"data2"
+            rm(h2)
+            rm(f1)
+            # symlink
+            if can_symlink
+                symlink("foo", s1)
+                Base.rename(s1, s2)
+                @test !islink(s1)
+                @test islink(s2)
+                @test readlink(s2) == "foo"
+                rm(s2)
+            end
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+
+        # Get the error code from failed rename, or nothing if it worked
+        function rename_errorcodes(oldpath, newpath)
+            try
+                Base.rename(oldpath, newpath)
+                nothing
+            catch e
+                e.code
+            end
+        end
+        @testset "errors" begin
+            # invalid paths
+            @test_throws ArgumentError Base.rename(f1*"\0", "")
+            @test Base.UV_ENOENT == rename_errorcodes("", "")
+            write(f1, b"data")
+            @test Base.UV_ENOENT == rename_errorcodes(f1, "")
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes("", f1)
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes(f2, f1)
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes(f1, subd1f1)
+            @test read(f1) == b"data"
+            rm(f1)
+            # attempt to make a directory a subdirectory of itself
+            mkdir(d1)
+            if Sys.iswindows()
+                @test rename_errorcodes(d1, joinpath(d1, "subdir")) ∈ (Base.UV_EINVAL, Base.UV_EBUSY)
+            else
+                @test Base.UV_EINVAL == rename_errorcodes(d1, joinpath(d1, "subdir"))
+            end
+            rm(d1)
+            # rename to child of a file
+            mkdir(d1)
+            write(f2, "foo")
+            if Sys.iswindows()
+                @test Base.UV_EINVAL == rename_errorcodes(d1, joinpath(f2, "subdir"))
+            else
+                @test Base.UV_ENOTDIR == rename_errorcodes(d1, joinpath(f2, "subdir"))
+            end
+            # replace a file with a directory
+            if !Sys.iswindows()
+                @test Base.UV_ENOTDIR == rename_errorcodes(d1, f2)
+            else
+                # this should work on windows
+                Base.rename(d1, f2)
+                @test isdir(f2)
+                @test !ispath(d1)
+            end
+            rm(f2; force=true)
+            rm(d1; force=true)
+            # symlink loop
+            if can_symlink
+                symlink(s1, s2)
+                symlink(s2, s1)
+                @test Base.UV_ELOOP == rename_errorcodes(joinpath(s1, "foo"), f2)
+                write(f2, b"data")
+                @test Base.UV_ELOOP == rename_errorcodes(f2, joinpath(s1, "foo"))
+                rm(s1)
+                rm(s2)
+                rm(f2)
+            end
+            # newpath is a nonempty directory
+            mkdir(d1)
+            mkdir(d2)
+            write(subd2f1, b"data")
+            write(f1, b"otherdata")
+            if Sys.iswindows()
+                @test Base.UV_EACCES == rename_errorcodes(f1, d1)
+                @test Base.UV_EACCES == rename_errorcodes(f1, d2)
+                @test Base.UV_EACCES == rename_errorcodes(d1, d2)
+                @test Base.UV_EACCES == rename_errorcodes(subd2f1, d2)
+            else
+                @test Base.UV_EISDIR == rename_errorcodes(f1, d1)
+                @test Base.UV_EISDIR == rename_errorcodes(f1, d2)
+                @test rename_errorcodes(d1, d2) ∈ (Base.UV_ENOTEMPTY, Base.UV_EEXIST)
+                @test rename_errorcodes(subd2f1, d2) ∈ (Base.UV_ENOTEMPTY, Base.UV_EEXIST, Base.UV_EISDIR)
+            end
+            rm(f1)
+            rm(d1)
+            rm(d2; recursive=true)
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+
+        @testset "replacing existing file" begin
+            write(f2, b"olddata")
+            chmod(f2, 0o755)
+            write(f1, b"newdata")
+            chmod(f1, 0o644)
+            @test isexecutable(f2)
+            @test !isexecutable(f1)
+            Base.rename(f1, f2)
+            @test !ispath(f1)
+            @test read(f2) == b"newdata"
+            @test !isexecutable(f2)
+            rm(f2)
+        end
+
+        @testset "replacing file with itself" begin
+            write(f1, b"data")
+            Base.rename(f1, f1)
+            @test read(f1) == b"data"
+            hardlink(f1, h1)
+            Base.rename(f1, h1)
+            if Sys.iswindows()
+                # On Windows f1 gets deleted
+                @test !ispath(f1)
+            else
+                @test read(f1) == b"data"
+            end
+            @test read(h1) == b"data"
+            rm(h1)
+            rm(f1; force=true)
+        end
+
+        @testset "replacing existing file in different directories" begin
+            mkdir(d1)
+            mkdir(d2)
+            write(subd2f2, b"olddata")
+            chmod(subd2f2, 0o755)
+            write(subd1f1, b"newdata")
+            chmod(subd1f1, 0o644)
+            @test isexecutable(subd2f2)
+            @test !isexecutable(subd1f1)
+            Base.rename(subd1f1, subd2f2)
+            @test !ispath(subd1f1)
+            @test read(subd2f2) == b"newdata"
+            @test !isexecutable(subd2f2)
+            @test isdir(d1)
+            @test isdir(d2)
+            rm(d1; recursive=true)
+            rm(d2; recursive=true)
+        end
+
+        @testset "rename with open files" begin
+            # both open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f1) do handle1
+                open(f2) do handle2
+                    if Sys.iswindows()
+                        # currently this doesn't work on windows
+                        @test Base.UV_EBUSY == rename_errorcodes(f1, f2)
+                    else
+                        Base.rename(f1, f2)
+                        @test !ispath(f1)
+                        @test read(f2) == b"newdata"
+                    end
+                    # rename doesn't break already opened files
+                    @test read(handle1) == b"newdata"
+                    @test read(handle2) == b"olddata"
+                end
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+
+            # oldpath open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f1) do handle1
+                if Sys.iswindows()
+                    # currently this doesn't work on windows
+                    @test Base.UV_EBUSY == rename_errorcodes(f1, f2)
+                else
+                    Base.rename(f1, f2)
+                    @test !ispath(f1)
+                    @test read(f2) == b"newdata"
+                end
+                # rename doesn't break already opened files
+                @test read(handle1) == b"newdata"
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+
+            # newpath open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f2) do handle2
+                if Sys.iswindows()
+                    # currently this doesn't work on windows
+                    @test Base.UV_EACCES == rename_errorcodes(f1, f2)
+                else
+                    Base.rename(f1, f2)
+                    @test !ispath(f1)
+                    @test read(f2) == b"newdata"
+                end
+                # rename doesn't break already opened files
+                @test read(handle2) == b"olddata"
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+        end
+
+        @testset "replacing empty directory with directory" begin
+            mkdir(d1)
+            mkdir(d2)
+            write(subd1f1, b"data")
+            if Sys.iswindows()
+                # currently this doesn't work on windows
+                @test Base.UV_EACCES == rename_errorcodes(d1, d2)
+                rm(d1; recursive=true)
+                rm(d2)
+            else
+                Base.rename(d1, d2)
+                @test isdir(d2)
+                @test read(subd2f1) == b"data"
+                @test !ispath(d1)
+                rm(d2; recursive=true)
+            end
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+    end
+end
+
 # issue #10506 #10434
 ## Tests for directories and links to directories
 if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
@@ -1023,7 +1360,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
         @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=false)
         @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=true)
         # mv
-        @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
+        @test_throws Base._UVError("rename($(repr(nonexisting_src)), $(repr(dst)))", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
     end
 end
 
@@ -1073,7 +1410,7 @@ if !Sys.iswindows()
         stat_d_mv = stat(d_mv)
         # make sure d does not exist anymore
         @test !ispath(d)
-        # comare s, with d_mv
+        # compare s, with d_mv
         @test isfile(s) == isfile(d_mv)
         @test islink(s) == islink(d_mv)
         islink(s) && @test readlink(s) == readlink(d_mv)
@@ -1423,6 +1760,13 @@ cd(dirwalk) do
         @test dirs == []
         @test files == ["foo"]
     end
+
+    # pwd() as default directory
+    for ((r1, d1, f1), (r2, d2, f2)) in zip(walkdir(), walkdir(pwd()))
+        @test r1 == r2
+        @test d1 == d2
+        @test f1 == f2
+    end
 end
 rm(dirwalk, recursive=true)
 
@@ -1436,6 +1780,10 @@ rm(dirwalk, recursive=true)
                 touch(randstring())
             end
             @test issorted(readdir())
+            @test issorted(Base.Filesystem._readdirx())
+            @test map(o->o.name, Base.Filesystem._readdirx()) == readdir()
+            @test map(o->o.path, Base.Filesystem._readdirx()) == readdir(join=true)
+            @test count(isfile, readdir(join=true)) == count(isfile, Base.Filesystem._readdirx())
         end
     end
 end
@@ -1460,7 +1808,7 @@ rm(dir)
 
 
 ##################
-# Return values of mkpath, mkdir, cp, mv and touch
+# Return values of mkpath, mkdir, cp, mv, rename and touch
 ####################
 mktempdir() do dir
     name1 = joinpath(dir, "apples")
@@ -1477,8 +1825,11 @@ mktempdir() do dir
     @test cp(name2, name1) == name1
     @test isfile(name1)
     @test isfile(name2)
+    @test Base.rename(name1, name2) == name2
+    @test !ispath(name1)
+    @test isfile(name2)
     namedir = joinpath(dir, "chalk")
-    namepath = joinpath(dir, "chalk","cheese","fresh")
+    namepath = joinpath(dir, "chalk", "cheese", "fresh")
     @test !ispath(namedir)
     @test mkdir(namedir) == namedir
     @test isdir(namedir)
@@ -1487,7 +1838,12 @@ mktempdir() do dir
     @test isdir(namepath)
     @test mkpath(namepath) == namepath
     @test isdir(namepath)
+    # issue 54826
+    namepath_dirpath = joinpath(dir, "x", "y", "z", "")
+    @test mkpath(namepath_dirpath) == namepath_dirpath
 end
+@test mkpath("") == ""
+@test mkpath("/") == "/"
 
 # issue #30588
 @test realpath(".") == realpath(pwd())
@@ -1535,8 +1891,10 @@ if !Sys.iswindows()
             @test !isdir(joinpath(d, "empty_outer"))
 
             # But a non-empty directory is not
-            @test_throws Base.IOError rm(joinpath(d, "nonempty"); recursive=true)
-            chmod(joinpath(d, "nonempty"), 0o777)
+            if Libc.geteuid() != 0 # root can override permissions
+                @test_throws Base.IOError rm(joinpath(d, "nonempty"); recursive=true)
+                chmod(joinpath(d, "nonempty"), 0o777)
+            end
             rm(joinpath(d, "nonempty"); recursive=true, force=true)
             @test !isdir(joinpath(d, "nonempty"))
         end
@@ -1591,6 +1949,26 @@ end
     end
 end
 
+@testset "pwd tests" begin
+    mktempdir() do dir
+        cd(dir) do
+            withenv("OLDPWD" => nothing) do
+                io = IOBuffer()
+                Base.repl_cmd(@cmd("cd"), io)
+                Base.repl_cmd(@cmd("cd -"), io)
+                @test realpath(pwd()) == realpath(dir)
+                if !Sys.iswindows()
+                    # Delete the working directory and check we can cd out of it
+                    # Cannot delete the working directory on Windows
+                    rm(dir)
+                    @test_throws Base._UVError("pwd()", Base.UV_ENOENT) pwd()
+                    Base.repl_cmd(@cmd("cd \\~"), io)
+                end
+            end
+        end
+    end
+end
+
 @testset "readdir tests" begin
     ≛(a, b) = sort(a) == sort(b)
     mktempdir() do dir
@@ -1636,23 +2014,70 @@ end
     end
 end
 
-@testset "chmod/isexecutable" begin
+if Sys.isunix()
+    @testset "mkfifo" begin
+        mktempdir() do dir
+            path = Libc.mkfifo(joinpath(dir, "fifo"))
+            @sync begin
+                @async write(path, "hello")
+                cat_exec = `$(Base.julia_cmd()) --startup-file=no -e "write(stdout, read(ARGS[1]))"`
+                @test read(`$cat_exec $path`, String) == "hello"
+            end
+
+            existing_file = joinpath(dir, "existing")
+            write(existing_file, "")
+            @test_throws SystemError Libc.mkfifo(existing_file)
+        end
+    end
+else
+    @test_throws(
+        "mkfifo: Operation not supported",
+        Libc.mkfifo(joinpath(pwd(), "dummy_path")),
+    )
+end
+
+@testset "chmod/isexecutable/isreadable/iswritable" begin
     mktempdir() do dir
-        mkdir(joinpath(dir, "subdir"))
+        subdir = joinpath(dir, "subdir")
         fpath = joinpath(dir, "subdir", "foo")
 
-        # Test that we can actually set the executable bit on all platforms.
+        @test !ispath(subdir)
+        mkdir(subdir)
+        @test ispath(subdir)
+
+        @test !ispath(fpath)
         touch(fpath)
+        @test ispath(fpath)
+
+        # Test that we can actually set the executable/readable/writeable bit on all platforms.
         chmod(fpath, 0o644)
         @test !Sys.isexecutable(fpath)
+        @test Sys.isreadable(fpath)
+        @test Sys.iswritable(fpath) skip=Sys.iswindows()
         chmod(fpath, 0o755)
         @test Sys.isexecutable(fpath)
+        @test Sys.isreadable(fpath)
+        @test Sys.iswritable(fpath) skip=Sys.iswindows()
+        chmod(fpath, 0o444)
+        @test !Sys.isexecutable(fpath)
+        @test Sys.isreadable(fpath)
+        @test !Sys.iswritable(fpath) skip=Libc.getuid() == 0
+        chmod(fpath, 0o244)
+        @test !Sys.isexecutable(fpath)
+        @test !Sys.isreadable(fpath) skip=(Sys.iswindows() || Libc.getuid() == 0)
+        @test Sys.iswritable(fpath) skip=Sys.iswindows()
 
         # Ensure that, on Windows, where inheritance is default,
         # chmod still behaves as we expect.
         if Sys.iswindows()
-            chmod(joinpath(dir, "subdir"), 0o666)
-            @test Sys.isexecutable(fpath)
+            chmod(subdir, 0o666)
+            @test !Sys.isexecutable(fpath)
+            # Possibly broken (or changed) by libuv commit 84896d52 which applies "other" permissions
+            # to all groups we are not a part of, affecting inherited permissions
+            # https://github.com/JuliaLang/libuv/commit/84896d522a51de50a8090fac56ec19740f5b603e
+            @test_broken Sys.isreadable(fpath)
+            @test_skip Sys.iswritable(fpath)
+            chmod(fpath, 0o777)
         end
 
         # Reset permissions to all at the end, so it can be deleted properly.
@@ -1673,6 +2098,38 @@ if Sys.iswindows()
 end
 end
 
+# Unusually for structs, we test this explicitly because the fields of StatStruct
+# is part of its documentation, and therefore cannot change.
+@testset "StatStruct has promised fields" begin
+    f, io = mktemp()
+    s = stat(f)
+    @test s isa Base.StatStruct
+
+    @test s.desc isa Union{String, Base.OS_HANDLE}
+    @test s.size isa Int64
+    @test s.device isa UInt
+    @test s.inode isa UInt
+    @test s.mode isa UInt
+    @test s.nlink isa Int
+    @test s.uid isa UInt
+    @test s.gid isa UInt
+    @test s.rdev isa UInt
+    @test s.blksize isa Int64
+    @test s.blocks isa Int64
+    @test s.mtime isa Float64
+    @test s.ctime isa Float64
+
+    @test s === stat((f,))
+    @test s === lstat((f,))
+    @test s === stat(".", f)
+    @test s === lstat(".", f)
+end
+
+mutable struct URI50890; f::String; end
+Base.joinpath(x::URI50890) = URI50890(x.f)
+@test_throws "stat not implemented" stat(URI50890("."))
+@test_throws "lstat not implemented" lstat(URI50890("."))
+
 @testset "StatStruct show's extended details" begin
     f, io = mktemp()
     s = stat(f)
@@ -1716,18 +2173,30 @@ end
         @test !isnothing(Base.Filesystem.getusername(s.uid))
         @test !isnothing(Base.Filesystem.getgroupname(s.gid))
     end
+    s = Base.Filesystem.StatStruct()
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(\"\" ENOENT: ") && endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for \"\"\n ENOENT: ") && !endswith(stat_show_str_multi, r"\s")
+    s = Base.Filesystem.StatStruct("my/test", Ptr{UInt8}(0), Int32(Base.UV_ENOTDIR))
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(\"my/test\" ENOTDIR: ") && endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for \"my/test\"\n ENOTDIR: ") && !endswith(stat_show_str_multi, r"\s")
 end
 
 @testset "diskstat() works" begin
-    # Sanity check assuming disk is smaller than 32PB
-    PB = Int64(2)^44
+    # Sanity check assuming disk is smaller than 32PiB
+    PiB = Int64(2)^50
 
     dstat = diskstat()
-    @test dstat.total < 32PB
+    @test dstat.total < 32PiB
     @test dstat.used + dstat.available == dstat.total
     @test occursin(r"^DiskStat\(total=\d+, used=\d+, available=\d+\)$", sprint(show, dstat))
     # Test diskstat(::AbstractString)
     dstat = diskstat(pwd())
-    @test dstat.total < 32PB
+    @test dstat.total < 32PiB
     @test dstat.used + dstat.available == dstat.total
 end
+
+@test Base.infer_return_type(stat, (String,)) == Base.Filesystem.StatStruct
diff --git a/test/filesystem.jl b/test/filesystem.jl
index 79beea9f66ac1..036a3dda30cca 100644
--- a/test/filesystem.jl
+++ b/test/filesystem.jl
@@ -2,41 +2,51 @@
 
 mktempdir() do dir
 
-  # Create test file
-  filename = joinpath(dir, "file.txt")
-  text = "123456"
-  write(filename, text)
-
-  # test filesystem truncate (shorten)
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  Base.Filesystem.truncate(file, 2)
-  text = text[1:2]
-  @test length(read(file)) == 2
-  close(file)
-
-  # test filesystem truncate (lengthen)
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  Base.Filesystem.truncate(file, 20)
-  @test length(read(file)) == 20
-  close(file)
-
-  # test filesystem futime
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  Base.Filesystem.futime(file, 1.0, 2.0)
-  @test Base.Filesystem.stat(file).mtime == 2.0
-  close(file)
-
-  # test filesystem readbytes!
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  res = ones(UInt8, 80)
-  Base.Filesystem.readbytes!(file, res)
-  @test res == UInt8[text..., (i > 20 for i in (length(text) + 1):length(res))...]
-  close(file)
+    # Create test file
+    filename = joinpath(dir, "file.txt")
+    text = "123456"
+    write(filename, text)
+
+    # test filesystem truncate (shorten)
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    Base.Filesystem.truncate(file, 2)
+    text = text[1:2]
+    @test length(read(file)) == 2
+    close(file)
+
+    # test filesystem truncate (lengthen)
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    Base.Filesystem.truncate(file, 20)
+    @test length(read(file)) == 20
+    close(file)
+
+    # test filesystem futime
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    Base.Filesystem.futime(file, 1.0, 2.0)
+    @test Base.Filesystem.stat(file).mtime == 2.0
+    close(file)
+
+    # test filesystem readbytes!
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    res = ones(UInt8, 80)
+    Base.Filesystem.readbytes!(file, res)
+    @test res == UInt8[text..., (i > 20 for i in (length(text) + 1):length(res))...]
+    close(file)
 
 end
 
 import Base.Filesystem: S_IRUSR, S_IRGRP, S_IROTH
 @testset "types of permission mask constants" begin
-  @test S_IRUSR & ~S_IRGRP == S_IRUSR
-  @test typeof(S_IRUSR) == typeof(S_IRGRP) == typeof(S_IROTH)
+    @test S_IRUSR & ~S_IRGRP == S_IRUSR
+    @test typeof(S_IRUSR) == typeof(S_IRGRP) == typeof(S_IROTH)
+end
+
+@testset "Base.Filesystem docstrings" begin
+    undoc = Docs.undocumented_names(Base.Filesystem)
+    @test_broken isempty(undoc)
+    @test undoc == [:File, :Filesystem, :cptree, :futime, :sendfile, :unlink]
+end
+
+@testset "write return type" begin
+    @test Base.return_types(write, (Base.Filesystem.File, UInt8)) == [Int]
 end
diff --git a/test/float16.jl b/test/float16.jl
index 75f9b55b6d51c..4ff7cc663d07b 100644
--- a/test/float16.jl
+++ b/test/float16.jl
@@ -79,7 +79,8 @@ end
     @test unsafe_trunc(Int16, Float16(3)) === Int16(3)
     @test unsafe_trunc(UInt128, Float16(3)) === UInt128(3)
     @test unsafe_trunc(Int128, Float16(3)) === Int128(3)
-    @test unsafe_trunc(Int16, NaN16) === Int16(0)  #18771
+    # `unsafe_trunc` of `NaN` can be any value, see #56582
+    @test unsafe_trunc(Int16, NaN16) isa Int16 # #18771
 end
 @testset "fma and muladd" begin
     @test fma(Float16(0.1),Float16(0.9),Float16(0.5)) ≈ fma(0.1,0.9,0.5)
@@ -203,6 +204,11 @@ const minsubf16_32 = Float32(minsubf16)
 # issues #33076
 @test Float16(1f5) == Inf16
 
+# issue #52394
+@test Float16(10^8 // (10^9 + 1)) == convert(Float16, 10^8 // (10^9 + 1)) == Float16(0.1)
+@test Float16((typemax(UInt128)-0x01) // typemax(UInt128)) == Float16(1.0)
+@test Float32((typemax(UInt128)-0x01) // typemax(UInt128)) == Float32(1.0)
+
 @testset "conversion to Float16 from" begin
     for T in (Float32, Float64, BigFloat)
         @testset "conversion from $T" begin
diff --git a/test/floatfuncs.jl b/test/floatfuncs.jl
index 7e9d8021ac5df..d5d697634bcfa 100644
--- a/test/floatfuncs.jl
+++ b/test/floatfuncs.jl
@@ -139,9 +139,10 @@ end
 end
 
 @testset "literal pow matches runtime pow matches optimized pow" begin
-    two = 2
-    @test 1.0000000105367122^2 == 1.0000000105367122^two
-    @test 1.0041504f0^2 == 1.0041504f0^two
+    let two = 2
+        @test 1.0000000105367122^2 == 1.0000000105367122^two
+        @test 1.0041504f0^2 == 1.0041504f0^two
+    end
 
     function g2(start, two, N)
         x = start
@@ -192,11 +193,13 @@ end
     finv(x) = f(x, -1)
     f2(x) = f(x, 2)
     f3(x) = f(x, 3)
-    x = 1.0000000105367122
-    @test x^2 == f(x, 2) == f2(x) == x*x == Float64(big(x)*big(x))
-    @test x^3 == f(x, 3) == f3(x) == x*x*x == Float64(big(x)*big(x)*big(x))
-    x = 1.000000007393669
-    @test x^-1 == f(x, -1) == finv(x) == 1/x == inv(x) == Float64(1/big(x)) == Float64(inv(big(x)))
+    let x = 1.0000000105367122
+        @test x^2 == f(x, 2) == f2(x) == x*x == Float64(big(x)*big(x))
+        @test x^3 == f(x, 3) == f3(x) == x*x*x == Float64(big(x)*big(x)*big(x))
+    end
+    let x = 1.000000007393669
+        @test x^-1 == f(x, -1) == finv(x) == 1/x == inv(x) == Float64(1/big(x)) == Float64(inv(big(x)))
+    end
 end
 
 @testset "curried approximation" begin
@@ -209,3 +212,107 @@ end
     struct CustomNumber <: Number end
     @test !isnan(CustomNumber())
 end
+
+@testset "isapprox and integer overflow" begin
+    for T in (Int8, Int16, Int32)
+        T === Int && continue
+        @test !isapprox(typemin(T), T(0))
+        @test !isapprox(typemin(T), unsigned(T)(0))
+        @test !isapprox(typemin(T), 0)
+        @test !isapprox(typemin(T), T(0), atol=0.99)
+        @test !isapprox(typemin(T), unsigned(T)(0), atol=0.99)
+        @test !isapprox(typemin(T), 0, atol=0.99)
+        @test_broken !isapprox(typemin(T), T(0), atol=1)
+        @test_broken !isapprox(typemin(T), unsigned(T)(0), atol=1)
+        @test !isapprox(typemin(T), 0, atol=1)
+
+        @test !isapprox(typemin(T)+T(10), T(10))
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10))
+        @test !isapprox(typemin(T)+T(10), 10)
+        @test !isapprox(typemin(T)+T(10), T(10), atol=0.99)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=0.99)
+        @test !isapprox(typemin(T)+T(10), 10, atol=0.99)
+        @test_broken !isapprox(typemin(T)+T(10), T(10), atol=1)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=1)
+        @test !isapprox(typemin(T)+T(10), 10, atol=1)
+
+        @test isapprox(typemin(T), 0.0, rtol=1)
+    end
+    for T in (Int, Int64, Int128)
+        @test !isapprox(typemin(T), T(0))
+        @test !isapprox(typemin(T), unsigned(T)(0))
+        @test !isapprox(typemin(T), T(0), atol=0.99)
+        @test !isapprox(typemin(T), unsigned(T)(0), atol=0.99)
+        @test_broken !isapprox(typemin(T), T(0), atol=1)
+        @test_broken !isapprox(typemin(T), unsigned(T)(0), atol=1)
+
+        @test !isapprox(typemin(T)+T(10), T(10))
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10))
+        @test !isapprox(typemin(T)+T(10), T(10), atol=0.99)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=0.99)
+        @test_broken !isapprox(typemin(T)+T(10), T(10), atol=1)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=1)
+
+        @test isapprox(typemin(T), 0.0, rtol=1)
+    end
+end
+
+@testset "isapprox and unsigned integers" begin
+    for T in Base.BitUnsigned_types
+        # Test also combinations of different integer types
+        W = widen(T)
+        # The order of the operands for difference between unsigned integers is
+        # very important, test both combinations.
+        @test isapprox(T(42), T(42); rtol=T(0), atol=0.5)
+        @test isapprox(T(42), W(42); rtol=T(0), atol=0.5)
+        @test !isapprox(T(0), T(1); rtol=T(0), atol=0.5)
+        @test !isapprox(T(1), T(0); rtol=T(0), atol=0.5)
+        @test isapprox(T(1), T(3); atol=T(2))
+        @test isapprox(T(4), T(2); atol=T(2))
+        @test isapprox(T(1), W(3); atol=T(2))
+        @test isapprox(T(4), W(2); atol=T(2))
+        @test isapprox(T(5), T(7); atol=typemax(T))
+        @test isapprox(T(8), T(6); atol=typemax(T))
+        @test isapprox(T(1), T(2); rtol=1)
+        @test isapprox(T(6), T(3); rtol=1)
+        @test isapprox(T(1), W(2); rtol=1)
+        @test isapprox(T(6), W(3); rtol=1)
+        @test !isapprox(typemin(T), typemax(T))
+        @test !isapprox(typemax(T), typemin(T))
+        @test !isapprox(typemin(T), typemax(T); atol=typemax(T)-T(1))
+        @test !isapprox(typemax(T), typemin(T); atol=typemax(T)-T(1))
+        @test isapprox(typemin(T), typemax(T); atol=typemax(T))
+        @test isapprox(typemax(T), typemin(T); atol=typemax(T))
+    end
+end
+
+@testset "Conversion from floating point to unsigned integer near extremes (#51063)" begin
+    @test_throws InexactError UInt32(4.2949673f9)
+    @test_throws InexactError UInt64(1.8446744f19)
+    @test_throws InexactError UInt64(1.8446744073709552e19)
+    @test_throws InexactError UInt128(3.402823669209385e38)
+end
+
+@testset "Conversion from floating point to integer near extremes (exhaustive)" begin
+    for Ti in Base.BitInteger_types, Tf in (Float16, Float32, Float64), x in (typemin(Ti), typemax(Ti))
+        y = Tf(x)
+        for i in -3:3
+            z = nextfloat(y, i)
+
+            result = isfinite(z) ? round(BigInt, z) : error
+            result = result !== error && typemin(Ti) <= result <= typemax(Ti) ? result : error
+
+            if result === error
+                @test_throws InexactError round(Ti, z)
+                @test_throws InexactError Ti(z)
+            else
+                @test result == round(Ti, z)
+                if isinteger(z)
+                    @test result == Ti(z)
+                else
+                    @test_throws InexactError Ti(z)
+                end
+            end
+        end
+    end
+end
diff --git a/test/functional.jl b/test/functional.jl
index fce64c0e5720a..84c4098308ebd 100644
--- a/test/functional.jl
+++ b/test/functional.jl
@@ -52,9 +52,6 @@ end
 
 # foreach
 let a = []
-    foreach(()->push!(a,0))
-    @test a == [0]
-    a = []
     foreach(x->push!(a,x), [1,5,10])
     @test a == [1,5,10]
     a = []
@@ -238,3 +235,129 @@ end
 let (:)(a,b) = (i for i in Base.:(:)(1,10) if i%2==0)
     @test Int8[ i for i = 1:2 ] == [2,4,6,8,10]
 end
+
+@testset "Basic tests of Fix1, Fix2, and Fix" begin
+    function test_fix1(Fix1=Base.Fix1)
+        increment = Fix1(+, 1)
+        @test increment(5) == 6
+        @test increment(-1) == 0
+        @test increment(0) == 1
+        @test map(increment, [1, 2, 3]) == [2, 3, 4]
+
+        concat_with_hello = Fix1(*, "Hello ")
+        @test concat_with_hello("World!") == "Hello World!"
+        # Make sure inference is good:
+        @inferred concat_with_hello("World!")
+
+        one_divided_by = Fix1(/, 1)
+        @test one_divided_by(10) == 1/10.0
+        @test one_divided_by(-5) == 1/-5.0
+
+        return nothing
+    end
+
+    function test_fix2(Fix2=Base.Fix2)
+        return_second = Fix2((x, y) -> y, 999)
+        @test return_second(10) == 999
+        @inferred return_second(10)
+        @test return_second(-5) == 999
+
+        divide_by_two = Fix2(/, 2)
+        @test map(divide_by_two, (2, 4, 6)) == (1.0, 2.0, 3.0)
+        @inferred map(divide_by_two, (2, 4, 6))
+
+        concat_with_world = Fix2(*, " World!")
+        @test concat_with_world("Hello") == "Hello World!"
+        @inferred concat_with_world("Hello World!")
+
+        return nothing
+    end
+
+    # Test with normal Base.Fix1 and Base.Fix2
+    test_fix1()
+    test_fix2()
+
+    # Now, repeat the Fix1 and Fix2 tests, but
+    # with a Fix lambda function used in their place
+    test_fix1((op, arg) -> Base.Fix{1}(op, arg))
+    test_fix2((op, arg) -> Base.Fix{2}(op, arg))
+
+    # Now, we do more complex tests of Fix:
+    let Fix=Base.Fix
+        @testset "Argument Fixation" begin
+            let f = (x, y, z) -> x + y * z
+                fixed_f1 = Fix{1}(f, 10)
+                @test fixed_f1(2, 3) == 10 + 2 * 3
+
+                fixed_f2 = Fix{2}(f, 5)
+                @test fixed_f2(1, 4) == 1 + 5 * 4
+
+                fixed_f3 = Fix{3}(f, 3)
+                @test fixed_f3(1, 2) == 1 + 2 * 3
+            end
+        end
+        @testset "Helpful errors" begin
+            let g = (x, y) -> x - y
+                # Test minimum N
+                fixed_g1 = Fix{1}(g, 100)
+                @test fixed_g1(40) == 100 - 40
+
+                # Test maximum N
+                fixed_g2 = Fix{2}(g, 100)
+                @test fixed_g2(150) == 150 - 100
+
+                # One over
+                fixed_g3 = Fix{3}(g, 100)
+                @test_throws ArgumentError("expected at least 2 arguments to `Fix{3}`, but got 1") fixed_g3(1)
+            end
+        end
+        @testset "Type Stability and Inference" begin
+            let h = (x, y) -> x / y
+                fixed_h = Fix{2}(h, 2.0)
+                @test @inferred(fixed_h(4.0)) == 2.0
+            end
+        end
+        @testset "Interaction with varargs" begin
+            vararg_f = (x, y, z...) -> x + 10 * y + sum(z; init=zero(x))
+            fixed_vararg_f = Fix{2}(vararg_f, 6)
+
+            # Can call with variable number of arguments:
+            @test fixed_vararg_f(1, 2, 3, 4) == 1 + 10 * 6 + sum((2, 3, 4))
+            @inferred fixed_vararg_f(1, 2, 3, 4)
+            @test fixed_vararg_f(5) == 5 + 10 * 6
+            @inferred fixed_vararg_f(5)
+        end
+        @testset "Errors should propagate normally" begin
+            error_f = (x, y) -> sin(x * y)
+            fixed_error_f = Fix{2}(error_f, Inf)
+            @test_throws DomainError fixed_error_f(10)
+        end
+        @testset "Chaining Fix together" begin
+            f1 = Fix{1}(*, "1")
+            f2 = Fix{1}(f1, "2")
+            f3 = Fix{1}(f2, "3")
+            @test f3() == "123"
+
+            g1 = Fix{2}(*, "1")
+            g2 = Fix{2}(g1, "2")
+            g3 = Fix{2}(g2, "3")
+            @test g3("") == "123"
+        end
+        @testset "Zero arguments" begin
+            f = Fix{1}(x -> x, 'a')
+            @test f() == 'a'
+        end
+        @testset "Dummy-proofing" begin
+            @test_throws ArgumentError("expected `N` in `Fix{N}` to be integer greater than 0, but got 0") Fix{0}(>, 1)
+            @test_throws ArgumentError("expected type parameter in `Fix` to be `Int`, but got `0.5::Float64`") Fix{0.5}(>, 1)
+            @test_throws ArgumentError("expected type parameter in `Fix` to be `Int`, but got `1::UInt64`") Fix{UInt64(1)}(>, 1)
+        end
+        @testset "Specialize to structs not in `Base`" begin
+            struct MyStruct
+                x::Int
+            end
+            f = Fix{1}(MyStruct, 1)
+            @test f isa Fix{1,Type{MyStruct},Int}
+        end
+    end
+end
diff --git a/test/gc.jl b/test/gc.jl
index e085c1d8658e5..7d4a3655a2438 100644
--- a/test/gc.jl
+++ b/test/gc.jl
@@ -5,16 +5,57 @@ using Test
 function run_gctest(file)
     let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $file`
         @testset for test_nthreads in (1, 2, 4)
-            @testset for concurrent_sweep in (0, 1)
-                new_env = copy(ENV)
-                new_env["JULIA_NUM_THREADS"] = string(test_nthreads)
-                new_env["JULIA_NUM_GC_THREADS"] = "$(test_nthreads),$(concurrent_sweep)"
-                @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr)))
+            @testset for test_nithreads in (0, 1)
+                @testset for concurrent_sweep in (0, 1)
+                    new_env = copy(ENV)
+                    new_env["JULIA_NUM_THREADS"] = "$test_nthreads,$test_nithreads"
+                    new_env["JULIA_NUM_GC_THREADS"] = "$(test_nthreads),$(concurrent_sweep)"
+                    @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr)))
+                end
             end
         end
     end
 end
 
+function run_nonzero_page_utilization_test()
+    GC.gc()
+    page_utilization = Base.gc_page_utilization_data()
+    # at least one of the pools should have nonzero page_utilization
+    @test any(page_utilization .> 0)
+end
+
+function run_pg_size_test()
+    page_size = @ccall jl_get_pg_size()::UInt64
+    # supported page sizes: 4KB and 16KB
+    @test page_size == (1 << 12) || page_size == (1 << 14)
+end
+
+function issue_54275_alloc_string()
+    String(UInt8['a' for i in 1:10000000])
+end
+
+function issue_54275_test()
+    GC.gc(true)
+    baseline = Base.gc_live_bytes()
+    live_bytes_has_grown_too_much = false
+    for _ in 1:10
+        issue_54275_alloc_string()
+        GC.gc(true)
+        if Base.gc_live_bytes() - baseline > 1_000_000
+            live_bytes_has_grown_too_much = true
+            break
+        end
+    end
+    @test !live_bytes_has_grown_too_much
+end
+
+function full_sweep_reasons_test()
+    GC.gc()
+    reasons = Base.full_sweep_reasons()
+    @test reasons[:FULL_SWEEP_REASON_FORCED_FULL_SWEEP] >= 1
+    @test keys(reasons) == Set(Base.FULL_SWEEP_REASONS)
+end
+
 # !!! note:
 #     Since we run our tests on 32bit OS as well we confine ourselves
 #     to parameters that allocate about 512MB of objects. Max RSS is lower
@@ -25,3 +66,47 @@ end
     run_gctest("gc/objarray.jl")
     run_gctest("gc/chunks.jl")
 end
+
+#FIXME: Issue #57103 disabling tests for MMTk, since
+# they rely on information that is specific to the stock GC.
+@static if Base.USING_STOCK_GC
+@testset "GC page metrics" begin
+    run_nonzero_page_utilization_test()
+    run_pg_size_test()
+end
+
+@testset "issue-54275" begin
+    issue_54275_test()
+end
+
+@testset "Full GC reasons" begin
+    full_sweep_reasons_test()
+end
+
+@testset "GC Always Full" begin
+    prog = "using Test;\n
+        for _ in 1:10; GC.gc(); end;\n
+        reasons = Base.full_sweep_reasons();\n
+        @test reasons[:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL] >= 10;"
+    cmd = `$(Base.julia_cmd()) --depwarn=error --startup-file=no --gc-sweep-always-full -e $prog`
+    @test success(cmd)
+end
+end
+
+@testset "Base.GC docstrings" begin
+    @test isempty(Docs.undocumented_names(GC))
+end
+
+#testset doesn't work here because this needs to run in top level
+#Check that we ensure objects in toplevel exprs are rooted
+global dims54422 = [] # allocate the Binding
+GC.gc(); GC.gc(); # force the binding to be old
+GC.enable(false); # prevent new objects from being old
+@eval begin
+    Base.Experimental.@force_compile # use the compiler
+    dims54422 = $([])
+    nothing
+end
+GC.enable(true); GC.gc(false) # incremental collection
+@test typeof(dims54422) == Vector{Any}
+@test isempty(dims54422)
diff --git a/test/gc/linkedlist.jl b/test/gc/linkedlist.jl
index 669e5f8ec21d9..3eb1480417e50 100644
--- a/test/gc/linkedlist.jl
+++ b/test/gc/linkedlist.jl
@@ -1,11 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 mutable struct ListNode
-  key::Int64
-  next::ListNode
-  ListNode() = new()
-  ListNode(x)= new(x)
-  ListNode(x,y) = new(x,y);
+    key::Int64
+    next::ListNode
+    ListNode() = new()
+    ListNode(x)= new(x)
+    ListNode(x,y) = new(x,y);
 end
 
 function list(N=16*1024^2)
diff --git a/test/gcext/gcext-test.jl b/test/gcext/gcext-test.jl
index 81637392e3c5d..11b0504b71c4d 100644
--- a/test/gcext/gcext-test.jl
+++ b/test/gcext/gcext-test.jl
@@ -2,7 +2,6 @@
 
 # tests the output of the embedding example is correct
 using Test
-using Pkg
 
 if Sys.iswindows()
     # libjulia needs to be in the same directory as the embedding executable or in path
@@ -31,7 +30,7 @@ end
     # @test success(p)
     errlines = fetch(err_task)
     lines = fetch(out_task)
-    @test length(errlines) == 0
+    @test isempty(errlines)
     # @test length(lines) == 6
     @test length(lines) == 5
     @test checknum(lines[2], r"([0-9]+) full collections", n -> n >= 10)
diff --git a/test/gcext/gcext.c b/test/gcext/gcext.c
index 90b5ee82d80b5..3da44a388a5b8 100644
--- a/test/gcext/gcext.c
+++ b/test/gcext/gcext.c
@@ -32,7 +32,7 @@ static inline int lt_ptr(void *a, void *b)
     return (uintptr_t)a < (uintptr_t)b;
 }
 
-/* align pointer to full word if mis-aligned */
+/* align pointer to full word if misaligned */
 static inline void *align_ptr(void *p)
 {
     uintptr_t u = (uintptr_t)p;
@@ -600,6 +600,13 @@ int main()
     jl_gc_set_cb_notify_external_alloc(alloc_bigval, 1);
     jl_gc_set_cb_notify_external_free(free_bigval, 1);
 
+    // single threaded mode
+    // Note: with -t1,1 a signal 10 occurs in task_scanner
+    jl_options.nthreadpools = 1;
+    jl_options.nthreads = 1;
+    int16_t ntpp[] = {jl_options.nthreads};
+    jl_options.nthreads_per_pool = ntpp;
+
     jl_init();
     if (jl_gc_enable_conservative_gc_support() < 0)
         abort();
diff --git a/test/generic_map_tests.jl b/test/generic_map_tests.jl
index b155370dd6465..7f19d60fe31fb 100644
--- a/test/generic_map_tests.jl
+++ b/test/generic_map_tests.jl
@@ -43,7 +43,7 @@ function generic_map_tests(mapf, inplace_mapf=nothing)
     @test mapf(f, Int[], Int[], Complex{Int}[]) == Union{}[]
 
     # In-place map
-    if inplace_mapf != nothing
+    if inplace_mapf !== nothing
         A = Float64[1:10...]
         inplace_mapf(x -> x*x, A, A)
         @test A == map(x -> x*x, Float64[1:10...])
diff --git a/test/gmp.jl b/test/gmp.jl
index 8f6be13c38054..0a8b253a92a81 100644
--- a/test/gmp.jl
+++ b/test/gmp.jl
@@ -11,6 +11,11 @@ ee = typemax(Int64)
     @test BigInt <: Signed
     @test big(1) isa Signed
 
+    if sizeof(Culong) >= 8
+        @test_throws OutOfMemoryError big(96608869069402268615522366320733234710)^16374500563449903721
+        @test_throws OutOfMemoryError 555555555555555555555555555555555555555555555555555^55555555555555555
+    end
+
     let x = big(1)
         @test signed(x) === x
         @test convert(Signed, x) === x
@@ -215,6 +220,8 @@ end
 end
 @testset "combinatorics" begin
     @test factorial(BigInt(40)) == parse(BigInt,"815915283247897734345611269596115894272000000000")
+    @test_throws DomainError factorial(BigInt(-1))
+    @test_throws DomainError factorial(BigInt(rand(-999:-2)))
     @test binomial(BigInt(1), -1) == BigInt(0)
     @test binomial(BigInt(1), 2)  == BigInt(0)
     @test binomial(BigInt(-53), 42) == parse(BigInt,"959509335087854414441273718")
@@ -228,6 +235,11 @@ let a, b
     @test prod(b) == foldl(*, b)
     @test 1 == prod(BigInt[]) isa BigInt
     @test prod(BigInt[0, 0, 0]) == 0 # issue #46665
+    # Test prod with negative numbers
+    @test prod(BigInt[-2, 3, -4]) == 24
+    @test prod(BigInt[-1, -2, -3]) == -6
+    @test prod(BigInt[-5]) == -5
+    @test prod(BigInt[-2, -2, -2, -2]) == 16
 end
 
 @testset "Iterated arithmetic" begin
@@ -438,11 +450,106 @@ end
     @test string(big(0), base = rand(2:62), pad = 0) == ""
 end
 
+@testset "Base.GMP.MPZ.export!" begin
+
+    function Base_GMP_MPZ_import!(x::BigInt, n::AbstractVector{T}; order::Integer=-1, nails::Integer=0, endian::Integer=0) where {T<:Base.BitInteger}
+        ccall((:__gmpz_import, Base.GMP.MPZ.libgmp),
+               Cvoid,
+               (Base.GMP.MPZ.mpz_t, Csize_t, Cint, Csize_t, Cint, Csize_t, Ptr{Cvoid}),
+               x, length(n), order, sizeof(T), endian, nails, n)
+        return x
+    end
+    # test import
+    bytes_to_import_from = Vector{UInt8}([1, 0])
+    int_to_import_to = BigInt()
+    Base_GMP_MPZ_import!(int_to_import_to, bytes_to_import_from, order=0)
+    @test int_to_import_to == BigInt(256)
+
+    # test export
+    int_to_export_from = BigInt(256)
+    bytes_to_export_to = Vector{UInt8}(undef, 2)
+    Base.GMP.MPZ.export!(bytes_to_export_to, int_to_export_from, order=0)
+    @test all(bytes_to_export_to .== bytes_to_import_from)
+
+    # test both composed import(export) is identity
+    int_to_export_from = BigInt(256)
+    bytes_to_export_to = Vector{UInt8}(undef, 2)
+    Base.GMP.MPZ.export!(bytes_to_export_to, int_to_export_from, order=0)
+    int_to_import_to = BigInt()
+    Base_GMP_MPZ_import!(int_to_import_to, bytes_to_export_to, order=0)
+    @test int_to_export_from == int_to_import_to
+
+    # test both composed export(import) is identity
+    bytes_to_import_from = Vector{UInt8}([1, 0])
+    int_to_import_to = BigInt()
+    Base_GMP_MPZ_import!(int_to_import_to, bytes_to_import_from, order=0)
+    bytes_to_export_to = Vector{UInt8}(undef, 2)
+    Base.GMP.MPZ.export!(bytes_to_export_to, int_to_export_from, order=0)
+    @test all(bytes_to_export_to .== bytes_to_import_from)
+
+    # test export of 0 is T[0]
+    zero_to_export = BigInt(0)
+    bytes_to_export_to = Vector{UInt8}(undef, 0)
+    Base.GMP.MPZ.export!(bytes_to_export_to, zero_to_export, order=0)
+    @test bytes_to_export_to == UInt8[0]
+
+    # test export on nonzero vector
+    x_to_export = BigInt(6)
+    bytes_to_export_to = UInt8[1, 2, 3, 4, 5]
+    Base.GMP.MPZ.export!(bytes_to_export_to, x_to_export, order=0)
+    @test bytes_to_export_to == UInt8[6, 0, 0, 0, 0]
+end
+
 @test isqrt(big(4)) == 2
 @test isqrt(big(5)) == 2
 
-@test big(5)^true == big(5)
-@test big(5)^false == one(BigInt)
+
+@testset "Exponentiation operator" begin
+    @test big(5)^true == big(5)
+    @test big(5)^false == one(BigInt)
+    testvals = Int8[-128:-126; -3:3; 125:127]
+    @testset "BigInt and Int8 are consistent: $i^$j" for i in testvals, j in testvals
+        int8_res = try
+            i^j
+        catch e
+            e
+        end
+        if int8_res isa Int8
+            @test (big(i)^big(j)) % Int8 === int8_res
+        else
+            # Test both have exception of the same type
+            @test_throws typeof(int8_res) big(i)^big(j)
+        end
+    end
+end
+
+@testset "modular invert" begin
+    # test invert is correct and does not mutate
+    a = BigInt(3)
+    b = BigInt(7)
+    i = BigInt(5)
+    @test Base.GMP.MPZ.invert(a, b) == i
+    @test a == BigInt(3)
+    @test b == BigInt(7)
+
+    # test in place invert does mutate first argument
+    a = BigInt(3)
+    b = BigInt(7)
+    i = BigInt(5)
+    i_inplace = BigInt(3)
+    Base.GMP.MPZ.invert!(i_inplace, b)
+    @test i_inplace == i
+
+    # test in place invert does mutate only first argument
+    a = BigInt(3)
+    b = BigInt(7)
+    i = BigInt(5)
+    i_inplace = BigInt(0)
+    Base.GMP.MPZ.invert!(i_inplace, a, b)
+    @test i_inplace == i
+    @test a == BigInt(3)
+    @test b == BigInt(7)
+end
 
 @testset "math ops returning BigFloat" begin
     # operations that when applied to Int64 give Float64, should give BigFloat
@@ -706,3 +813,27 @@ t = Rational{BigInt}(0, 1)
         @test Base.GMP.MPQ.div!(-oo, zo) == -oz
     end
 end
+
+@testset "hashing" begin
+    for i in 1:10:100
+        for shift in vcat(0:8, 9:8:81)
+            for sgn in (1, -1)
+                bint = sgn * (big(11)^i << shift)
+                bfloat = float(bint)
+                @test (hash(bint) == hash(bfloat)) == (bint == bfloat)
+                @test hash(bint, Base.HASH_SEED) ==
+                    @invoke(hash(bint::Real, Base.HASH_SEED))
+                @test Base.hash_integer(bint, Base.HASH_SEED) ==
+                    @invoke(Base.hash_integer(bint::Integer, Base.HASH_SEED))
+            end
+        end
+    end
+
+    bint = big(0)
+    bfloat = float(bint)
+    @test (hash(bint) == hash(bfloat)) == (bint == bfloat)
+    @test hash(bint, Base.HASH_SEED) ==
+        @invoke(hash(bint::Real, Base.HASH_SEED))
+    @test Base.hash_integer(bint, Base.HASH_SEED) ==
+        @invoke(Base.hash_integer(bint::Integer, Base.HASH_SEED))
+end
diff --git a/test/hashing.jl b/test/hashing.jl
index 1c7c37d00f93b..f9ce78fdf8f00 100644
--- a/test/hashing.jl
+++ b/test/hashing.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Random, LinearAlgebra, SparseArrays
+using Random, LinearAlgebra
 isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
 using .Main.OffsetArrays
 
@@ -37,6 +37,7 @@ for T = types[2:end], x = vals
     a = coerce(T, x)
     @test hash(a, zero(UInt)) == invoke(hash, Tuple{Real, UInt}, a, zero(UInt))
     @test hash(a, one(UInt)) == invoke(hash, Tuple{Real, UInt}, a, one(UInt))
+    @test hash(a) == hash(complex(a))
 end
 
 let collides = 0
@@ -88,13 +89,13 @@ vals = Any[
     Dict(42 => 101, 77 => 93), Dict{Any,Any}(42 => 101, 77 => 93),
     (1,2,3,4), (1.0,2.0,3.0,4.0), (1,3,2,4),
     ("a","b"), (SubString("a",1,1), SubString("b",1,1)),
+    join('c':'s'), SubString(join('a':'z'), 3, 19),
     # issue #6900
     Dict(x => x for x in 1:10),
     Dict(7=>7,9=>9,4=>4,10=>10,2=>2,3=>3,8=>8,5=>5,6=>6,1=>1),
     [], [1], [2], [1, 1], [1, 2], [1, 3], [2, 2], [1, 2, 2], [1, 3, 3],
-    zeros(2, 2), spzeros(2, 2), Matrix(1.0I, 2, 2), sparse(1.0I, 2, 2),
-    sparse(fill(1., 2, 2)), fill(1., 2, 2), sparse([0 0; 1 0]), [0 0; 1 0],
-    [-0. 0; -0. 0.], SparseMatrixCSC(2, 2, [1, 3, 3], [1, 2], [-0., -0.]),
+    zeros(2, 2), Matrix(1.0I, 2, 2), fill(1., 2, 2),
+    [-0. 0; -0. 0.],
     # issue #16364
     1:4, 1:1:4, 1:-1:0, 1.0:4.0, 1.0:1.0:4.0, range(1, stop=4, length=4),
     # issue #35597, when `LinearIndices` does not begin at 1
@@ -109,7 +110,7 @@ vals = Any[
     ["a", "b", 1, 2], ["a", 1, 2], ["a", "b", 2, 2], ["a", "a", 1, 2], ["a", "b", 2, 3]
 ]
 
-for a in vals, b in vals
+for (i, a) in enumerate(vals), b in vals[i:end]
     @test isequal(a,b) == (hash(a)==hash(b))
 end
 
@@ -141,13 +142,6 @@ vals = Any[
     [5 1; 0 0], [1 1; 0 1], [0 2; 3 0], [0 2; 4 6], [4 0; 0 1],
     [0 0 0; 0 0 0], [1 0 0; 0 0 1], [0 0 2; 3 0 0], [0 0 7; 6 1 2],
     [4 0 0; 3 0 1], [0 2 4; 6 0 0],
-    # various stored zeros patterns
-    sparse([1], [1], [0]), sparse([1], [1], [-0.0]),
-    sparse([1, 2], [1, 1], [-0.0, 0.0]), sparse([1, 2], [1, 1], [0.0, -0.0]),
-    sparse([1, 2], [1, 1], [-0.0, 0.0], 3, 1), sparse([1, 2], [1, 1], [0.0, -0.0], 3, 1),
-    sparse([1, 3], [1, 1], [-0.0, 0.0], 3, 1), sparse([1, 3], [1, 1], [0.0, -0.0], 3, 1),
-    sparse([1, 2, 3], [1, 1, 1], [-1, 0, 1], 3, 1), sparse([1, 2, 3], [1, 1, 1], [-1.0, -0.0, 1.0], 3, 1),
-    sparse([1, 3], [1, 1], [-1, 0], 3, 1), sparse([1, 2], [1, 1], [-1, 0], 3, 1)
 ]
 
 for a in vals
@@ -155,7 +149,6 @@ for a in vals
     @test hash(convert(Array{Any}, a)) == hash(b)
     @test hash(convert(Array{supertype(eltype(a))}, a)) == hash(b)
     @test hash(convert(Array{Float64}, a)) == hash(b)
-    @test hash(sparse(a)) == hash(b)
     if !any(x -> isequal(x, -0.0), a)
         @test hash(convert(Array{Int}, a)) == hash(b)
         if all(x -> typemin(Int8) <= x <= typemax(Int8), a)
@@ -169,20 +162,6 @@ end
 @test hash(Any[Int8(127), Int8(-128), 129, 130]) ==
     hash([127, -128, 129, 130]) != hash([127,  128, 129, 130])
 
-# Test hashing sparse matrix with type which does not support -
-struct CustomHashReal
-    x::Float64
-end
-Base.hash(x::CustomHashReal, h::UInt) = hash(x.x, h)
-Base.:(==)(x::CustomHashReal, y::Number) = x.x == y
-Base.:(==)(x::Number, y::CustomHashReal) = x == y.x
-Base.zero(::Type{CustomHashReal}) = CustomHashReal(0.0)
-Base.zero(x::CustomHashReal) = zero(CustomHashReal)
-
-let a = sparse([CustomHashReal(0), CustomHashReal(3), CustomHashReal(3)])
-    @test hash(a) == hash(Array(a))
-end
-
 vals = Any[
     0.0:0.1:0.3, 0.3:-0.1:0.0,
     0:-1:1, 0.0:-1.0:1.0, 0.0:1.1:10.0, -4:10,
@@ -201,8 +180,14 @@ end
 @test hash([1,2]) == hash(view([1,2,3,4],1:2))
 
 let a = QuoteNode(1), b = QuoteNode(1.0)
-    @test (hash(a)==hash(b)) == (a==b)
+    @test hash(a) == hash(b)
+    @test a != b
 end
+let a = QuoteNode(:(1 + 2)), b = QuoteNode(:(1 + 2))
+    @test hash(a) == hash(b)
+    @test a == b
+end
+
 
 let a = Expr(:block, Core.SlotNumber(1)),
     b = Expr(:block, Core.SlotNumber(1)),
@@ -272,7 +257,9 @@ end
         )
 
         for a in vals, b in vals
-            @test isequal(a, b) == (Base.hash_64_32(a) == Base.hash_64_32(b))
+            ha = Base.hash_64_32(a)
+            hb = Base.hash_64_32(b)
+            @test isequal(a, b) == (ha == hb)
         end
     end
 
@@ -283,7 +270,9 @@ end
         )
 
         for a in vals, b in vals
-            @test isequal(a, b) == (Base.hash_32_32(a) == Base.hash_32_32(b))
+            ha = Base.hash_32_32(a)
+            hb = Base.hash_32_32(b)
+            @test isequal(a, b) == (ha == hb)
         end
     end
 end
@@ -310,3 +299,45 @@ struct AUnionParam{T<:Union{Nothing,Float32,Float64}} end
 @test Type{AUnionParam{<:Union{Nothing,Float32,Float64}}} === Type{AUnionParam}
 @test Type{AUnionParam.body}.hash == 0
 @test Type{Base.Broadcast.Broadcasted}.hash != 0
+
+
+@testset "issue 50628" begin
+    # test hashing of rationals that equal floats are equal to the float hash
+    @test hash(5//2) == hash(big(5)//2) == hash(2.5)
+    # test hashing of rational that are integers hash to the integer
+    @test hash(Int64(5)^25) == hash(big(5)^25) == hash(Int64(5)^25//1) == hash(big(5)^25//1)
+    # test integer/rational that don't fit in Float64 don't hash as Float64
+    @test hash(Int64(5)^25) != hash(5.0^25)
+    @test hash((Int64(5)//2)^25) == hash(big(5//2)^25)
+    # test integer/rational that don't fit in Float64 don't hash as Float64
+    @test hash((Int64(5)//2)^25) != hash(2.5^25)
+    # test hashing of rational with odd denominator
+    @test hash(5//3) == hash(big(5)//3)
+end
+
+@testset "concrete eval type hash" begin
+    @test Core.Compiler.is_foldable_nothrow(Base.infer_effects(hash, Tuple{Type{Int}, UInt}))
+
+    f(h...) = hash(Char, h...);
+    src = only(code_typed(f, Tuple{UInt}))[1]
+    @test count(stmt -> Meta.isexpr(stmt, :foreigncall), src.code) == 0
+end
+
+@testset "hash_bytes consistency" begin
+    # Test that hash_bytes(::Array), hash_bytes(Generator(identity, Array)), and hash_bytes(pointer(Array)) return the same values
+
+    for n in 0:1000
+        b = rand(UInt8, n)
+        a = Base.Generator(identity, b)
+
+        # Test hash_bytes(::Array) vs hash_bytes(pointer(Array))
+        hash_array = Base.hash_bytes(b, UInt64(Base.HASH_SEED), Base.HASH_SECRET)
+        hash_pointer = Base.hash_bytes(pointer(b), length(b), UInt64(Base.HASH_SEED), Base.HASH_SECRET)
+        @test hash_array isa UInt64
+        @test hash_array === hash_pointer
+
+        # Test hash_bytes(Generator(identity, Array)) vs hash_bytes(pointer(Array))
+        hash_generator = Base.hash_bytes(a, UInt64(Base.HASH_SEED), Base.HASH_SECRET)
+        @test hash_generator === hash_pointer
+    end
+end
diff --git a/test/int.jl b/test/int.jl
index f79bc5a9781d0..715625cf8b6d9 100644
--- a/test/int.jl
+++ b/test/int.jl
@@ -118,6 +118,10 @@ end
     @test big"1.0" == BigFloat(1.0)
     @test_throws ArgumentError big"1.0.3"
     @test_throws ArgumentError big"pi"
+
+    @test_throws ArgumentError big"_æ1"
+    @test_throws ArgumentError big"æ_1"
+    @test_throws ArgumentError big"_ææ"
 end
 
 @test round(UInt8, 123) == 123
@@ -358,6 +362,23 @@ end
             end
         end
     end
+    # exhaustive UInt8/Int8 tests for mixed signedness
+    for f in (mod, rem)
+        for i in -128:127
+            for j in 0:255
+                if iszero(i)
+                    @test_throws DivideError f(UInt8(j), Int8(i))
+                else
+                    @test f(UInt8(j), Int8(i)) == f(j, i)
+                end
+                if iszero(j)
+                    @test_throws DivideError f(Int8(i), UInt8(j))
+                else
+                    @test f(Int8(i), UInt8(j)) == f(i,j)
+                end
+            end
+        end
+    end
 end
 
 @testset "Underscores in big_str" begin
diff --git a/test/interpreter.jl b/test/interpreter.jl
index 0fea42e0aecdb..012a0f7fe7859 100644
--- a/test/interpreter.jl
+++ b/test/interpreter.jl
@@ -30,3 +30,11 @@ let p = Pipe(),
     wait(proc)
     close(p)
 end
+
+# Test generated function behavior in interpreter
+@test success(pipeline(`$(Base.julia_cmd()) --compile=min -E 'include("staged.jl")'`; stderr))
+
+# Test contextual execution mechanism in interpreter (#54360)
+let compiler_contextual_test = escape_string(joinpath(@__DIR__,"../Compiler/test/contextual.jl"))
+    @test success(pipeline(`$(Base.julia_cmd()) --compile=min -E "include(\"$compiler_contextual_test\")"`; stderr))
+end
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
index ceaac235a3da9..9774903c23f0a 100644
--- a/test/intfuncs.jl
+++ b/test/intfuncs.jl
@@ -4,40 +4,44 @@ using Random
 
 is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args...))
 
+⟷(a::T, b::T) where T <: Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128} = a === b
+⟷(a::T, b::T) where T <: BigInt = a == b
+
 @testset "gcd/lcm" begin
     # All Integer data types take different code paths -- test all
-    # TODO: Test gcd and lcm for BigInt.
-    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128)
-        @test gcd(T(3)) === T(3)
-        @test gcd(T(3), T(5)) === T(1)
-        @test gcd(T(3), T(15)) === T(3)
-        @test gcd(T(0), T(15)) === T(15)
-        @test gcd(T(15), T(0)) === T(15)
+    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt)
+        @test gcd(T(3)) ⟷ T(3)
+        @test gcd(T(3), T(5)) ⟷ T(1)
+        @test gcd(T(3), T(15)) ⟷ T(3)
+        @test gcd(T(0), T(15)) ⟷ T(15)
+        @test gcd(T(15), T(0)) ⟷ T(15)
         if T <: Signed
-            @test gcd(T(-12)) === T(12)
-            @test gcd(T(0), T(-15)) === T(15)
-            @test gcd(T(-15), T(0)) === T(15)
-            @test gcd(T(3), T(-15)) === T(3)
-            @test gcd(T(-3), T(-15)) === T(3)
+            @test gcd(T(-12)) ⟷ T(12)
+            @test gcd(T(0), T(-15)) ⟷ T(15)
+            @test gcd(T(-15), T(0)) ⟷ T(15)
+            @test gcd(T(3), T(-15)) ⟷ T(3)
+            @test gcd(T(-3), T(-15)) ⟷ T(3)
         end
-        @test gcd(T(0), T(0)) === T(0)
+        @test gcd(T(0), T(0)) ⟷ T(0)
 
-        @test gcd(T(2), T(4), T(6)) === T(2)
+        @test gcd(T(2), T(4), T(6)) ⟷ T(2)
         if T <: Signed
-            @test gcd(T(2), T(4), T(-6)) === T(2)
-            @test gcd(T(2), T(-4), T(-6)) === T(2)
-            @test gcd(T(-2), T(4), T(-6)) === T(2)
-            @test gcd(T(-2), T(-4), T(-6)) === T(2)
+            @test gcd(T(2), T(4), T(-6)) ⟷ T(2)
+            @test gcd(T(2), T(-4), T(-6)) ⟷ T(2)
+            @test gcd(T(-2), T(4), T(-6)) ⟷ T(2)
+            @test gcd(T(-2), T(-4), T(-6)) ⟷ T(2)
         end
 
-        @test gcd(typemax(T), T(1)) === T(1)
-        @test gcd(T(1), typemax(T)) === T(1)
-        @test gcd(typemax(T), T(0)) === typemax(T)
-        @test gcd(T(0), typemax(T)) === typemax(T)
-        @test gcd(typemax(T), typemax(T)) === typemax(T)
-        @test gcd(typemax(T), typemax(T)-T(1)) === T(1)     # gcd(n, n-1) = 1. n and n-1 are always coprime.
+        if T != BigInt
+            @test gcd(typemax(T), T(1)) === T(1)
+            @test gcd(T(1), typemax(T)) === T(1)
+            @test gcd(typemax(T), T(0)) === typemax(T)
+            @test gcd(T(0), typemax(T)) === typemax(T)
+            @test gcd(typemax(T), typemax(T)) === typemax(T)
+            @test gcd(typemax(T), typemax(T)-T(1)) === T(1)     # gcd(n, n-1) = 1. n and n-1 are always coprime.
+        end
 
-        if T <: Signed
+        if T <: Signed && T != BigInt
             @test gcd(-typemax(T), T(1)) === T(1)
             @test gcd(T(1), -typemax(T)) === T(1)
             @test gcd(-typemax(T), T(0)) === typemax(T)
@@ -52,7 +56,7 @@ is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args..
             @test_throws OverflowError gcd(typemin(T), typemin(T))
             @test_throws OverflowError gcd(typemin(T), T(0))
             @test_throws OverflowError gcd(T(0), typemin(T))
-        else
+        elseif T != BigInt
             # For Unsigned Integer types, -typemax(T) == 1.
             @test gcd(-typemax(T), T(1)) === T(1)
             @test gcd(T(1), -typemax(T)) === T(1)
@@ -71,83 +75,86 @@ is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args..
             @test gcd(T(0), typemin(T)) === T(0)
         end
 
-        @test lcm(T(0)) === T(0)
-        @test lcm(T(2)) === T(2)
-        @test lcm(T(2), T(3)) === T(6)
-        @test lcm(T(3), T(2)) === T(6)
-        @test lcm(T(4), T(6)) === T(12)
-        @test lcm(T(6), T(4)) === T(12)
-        @test lcm(T(3), T(0)) === T(0)
-        @test lcm(T(0), T(3)) === T(0)
-        @test lcm(T(0), T(0)) === T(0)
+        @test lcm(T(0)) ⟷ T(0)
+        @test lcm(T(2)) ⟷ T(2)
+        @test lcm(T(2), T(3)) ⟷ T(6)
+        @test lcm(T(3), T(2)) ⟷ T(6)
+        @test lcm(T(4), T(6)) ⟷ T(12)
+        @test lcm(T(6), T(4)) ⟷ T(12)
+        @test lcm(T(3), T(0)) ⟷ T(0)
+        @test lcm(T(0), T(3)) ⟷ T(0)
+        @test lcm(T(0), T(0)) ⟷ T(0)
         if T <: Signed
-            @test lcm(T(-12)) === T(12)
-            @test lcm(T(0), T(-4)) === T(0)
-            @test lcm(T(-4), T(0)) === T(0)
-            @test lcm(T(4), T(-6)) === T(12)
-            @test lcm(T(-4), T(-6)) === T(12)
+            @test lcm(T(-12)) ⟷ T(12)
+            @test lcm(T(0), T(-4)) ⟷ T(0)
+            @test lcm(T(-4), T(0)) ⟷ T(0)
+            @test lcm(T(4), T(-6)) ⟷ T(12)
+            @test lcm(T(-4), T(-6)) ⟷ T(12)
         end
 
-        @test lcm(T(2), T(4), T(6)) === T(12)
-        @test lcm(T(2), T(4), T(0)) === T(0)
+        @test lcm(T(2), T(4), T(6)) ⟷ T(12)
+        @test lcm(T(2), T(4), T(0)) ⟷ T(0)
         if T <: Signed
-            @test lcm(T(2), T(4), T(-6)) === T(12)
-            @test lcm(T(2), T(-4), T(-6)) === T(12)
-            @test lcm(T(-2), T(-4), T(-6)) === T(12)
-            @test lcm(T(-2), T(0), T(-6)) === T(0)
+            @test lcm(T(2), T(4), T(-6)) ⟷ T(12)
+            @test lcm(T(2), T(-4), T(-6)) ⟷ T(12)
+            @test lcm(T(-2), T(-4), T(-6)) ⟷ T(12)
+            @test lcm(T(-2), T(0), T(-6)) ⟷ T(0)
         end
 
-        @test lcm(typemax(T), T(1)) === typemax(T)
-        @test lcm(T(1), typemax(T)) === typemax(T)
-        @test lcm(typemax(T), T(0)) === T(0)
-        @test lcm(T(0), typemax(T)) === T(0)
-        @test lcm(typemax(T), typemax(T)) === typemax(T)
-        @test_throws OverflowError lcm(typemax(T), typemax(T)-T(1)) # lcm(n, n-1) = n*(n-1). Since n and n-1 are always coprime.
-        @test_throws OverflowError lcm(typemax(T), T(2))
-
-        let x = isqrt(typemax(T))+T(1) # smallest number x such that x^2 > typemax(T)
-            @test lcm(x, x) === x
-            @test_throws OverflowError lcm(x, x+T(1))   # lcm(n, n+1) = n*(n+1). Since n and n+1 are always coprime.
-        end
-
-        if T <: Signed
-            @test lcm(-typemax(T), T(1)) === typemax(T)
-            @test lcm(T(1), -typemax(T)) === typemax(T)
-            @test lcm(-typemax(T), T(0)) === T(0)
-            @test lcm(T(0), -typemax(T)) === T(0)
-            @test lcm(-typemax(T), -typemax(T)) === typemax(T)
-            @test lcm(typemax(T), -typemax(T)) === typemax(T)
-            @test lcm(-typemax(T), typemax(T)) === typemax(T)
-
-            @test_throws OverflowError lcm(typemin(T), T(1))
-            @test_throws OverflowError lcm(T(1), typemin(T))
-            @test lcm(typemin(T), T(0)) === T(0)
-            @test lcm(T(0), typemin(T)) === T(0)
-            @test_throws OverflowError lcm(typemin(T), typemin(T)+T(1)) # lcm(n, n+1) = n*(n+1).
-            @test_throws OverflowError lcm(typemin(T), typemin(T))
-        else
-            # For Unsigned Integer types, -typemax(T) == 1.
-            @test lcm(-typemax(T), T(1)) === T(1)
-            @test lcm(T(1), -typemax(T)) === T(1)
-            @test lcm(-typemax(T), T(0)) === T(0)
-            @test lcm(T(0), -typemax(T)) === T(0)
-            @test lcm(-typemax(T), -typemax(T)) === T(1)
-            @test lcm(-typemax(T), typemax(T)) === typemax(T)
-            @test lcm(typemax(T), -typemax(T)) === typemax(T)
+        if T != BigInt
+            @test lcm(typemax(T), T(1)) === typemax(T)
+            @test lcm(T(1), typemax(T)) === typemax(T)
+            @test lcm(typemax(T), T(0)) === T(0)
+            @test lcm(T(0), typemax(T)) === T(0)
+            @test lcm(typemax(T), typemax(T)) === typemax(T)
+            @test_throws OverflowError lcm(typemax(T), typemax(T)-T(1)) # lcm(n, n-1) = n*(n-1). Since n and n-1 are always coprime.
+            @test_throws OverflowError lcm(typemax(T), T(2))
+
+            let x = isqrt(typemax(T))+T(1) # smallest number x such that x^2 > typemax(T)
+                @test lcm(x, x) === x
+                @test_throws OverflowError lcm(x, x+T(1))   # lcm(n, n+1) = n*(n+1). Since n and n+1 are always coprime.
+            end
 
-            # For Unsigned Integer types, typemin(T) == 0.
-            @test lcm(typemin(T), T(1)) === lcm(T(0), T(1)) === T(0)
-            @test lcm(T(1), typemin(T)) === T(0)
-            @test lcm(typemin(T), T(0)) === T(0)
-            @test lcm(T(0), typemin(T)) === T(0)
-            @test lcm(typemin(T), typemin(T)) === T(0)
-            @test lcm(typemin(T), typemin(T)+T(1)) === T(0)
+            if T <: Signed
+                @test lcm(-typemax(T), T(1)) === typemax(T)
+                @test lcm(T(1), -typemax(T)) === typemax(T)
+                @test lcm(-typemax(T), T(0)) === T(0)
+                @test lcm(T(0), -typemax(T)) === T(0)
+                @test lcm(-typemax(T), -typemax(T)) === typemax(T)
+                @test lcm(typemax(T), -typemax(T)) === typemax(T)
+                @test lcm(-typemax(T), typemax(T)) === typemax(T)
+
+                @test_throws OverflowError lcm(typemin(T), T(1))
+                @test_throws OverflowError lcm(T(1), typemin(T))
+                @test lcm(typemin(T), T(0)) === T(0)
+                @test lcm(T(0), typemin(T)) === T(0)
+                @test_throws OverflowError lcm(typemin(T), typemin(T)+T(1)) # lcm(n, n+1) = n*(n+1).
+                @test_throws OverflowError lcm(typemin(T), typemin(T))
+            else
+                # For Unsigned Integer types, -typemax(T) == 1.
+                @test lcm(-typemax(T), T(1)) === T(1)
+                @test lcm(T(1), -typemax(T)) === T(1)
+                @test lcm(-typemax(T), T(0)) === T(0)
+                @test lcm(T(0), -typemax(T)) === T(0)
+                @test lcm(-typemax(T), -typemax(T)) === T(1)
+                @test lcm(-typemax(T), typemax(T)) === typemax(T)
+                @test lcm(typemax(T), -typemax(T)) === typemax(T)
+
+                # For Unsigned Integer types, typemin(T) == 0.
+                @test lcm(typemin(T), T(1)) === lcm(T(0), T(1)) === T(0)
+                @test lcm(T(1), typemin(T)) === T(0)
+                @test lcm(typemin(T), T(0)) === T(0)
+                @test lcm(T(0), typemin(T)) === T(0)
+                @test lcm(typemin(T), typemin(T)) === T(0)
+                @test lcm(typemin(T), typemin(T)+T(1)) === T(0)
+            end
         end
     end
     @test lcm(0x5, 3) == 15
     @test gcd(0xf, 20) == 5
     @test gcd(UInt32(6), Int8(-50)) == 2
     @test gcd(typemax(UInt), -16) == 1
+    @test gcd(typemax(UInt), BigInt(1236189723689716298376189726398761298361892)) == 1
 
     @testset "effects" begin
         @test is_effect_free(gcd, Tuple{Int,Int})
@@ -156,49 +163,101 @@ is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args..
 end
 
 @testset "gcd/lcm for arrays" begin
-    # TODO: Test gcd and lcm for BigInt arrays.
-    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128)
-        @test gcd(T[]) === T(0)
-        @test gcd(T[3, 5]) === T(1)
-        @test gcd(T[3, 15]) === T(3)
-        @test gcd(T[0, 15]) === T(15)
+    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt)
+        @test gcd(T[]) ⟷ T(0)
+        @test gcd(T[3, 5]) ⟷ T(1)
+        @test gcd(T[3, 15]) ⟷ T(3)
+        @test gcd(T[0, 15]) ⟷ T(15)
         if T <: Signed
-            @test gcd(T[-12]) === T(12)
-            @test gcd(T[3,-15]) === T(3)
-            @test gcd(T[-3,-15]) === T(3)
+            @test gcd(T[-12]) ⟷ T(12)
+            @test gcd(T[3,-15]) ⟷ T(3)
+            @test gcd(T[-3,-15]) ⟷ T(3)
         end
-        @test gcd(T[0, 0]) === T(0)
+        @test gcd(T[0, 0]) ⟷ T(0)
 
-        @test gcd(T[2, 4, 6]) === T(2)
-        @test gcd(T[2, 4, 3, 5]) === T(1)
+        @test gcd(T[2, 4, 6]) ⟷ T(2)
+        @test gcd(T[2, 4, 3, 5]) ⟷ T(1)
 
-        @test lcm(T[]) === T(1)
-        @test lcm(T[2, 3]) === T(6)
-        @test lcm(T[4, 6]) === T(12)
-        @test lcm(T[3, 0]) === T(0)
-        @test lcm(T[0, 0]) === T(0)
+        @test lcm(T[]) ⟷ T(1)
+        @test lcm(T[2, 3]) ⟷ T(6)
+        @test lcm(T[4, 6]) ⟷ T(12)
+        @test lcm(T[3, 0]) ⟷ T(0)
+        @test lcm(T[0, 0]) ⟷ T(0)
         if T <: Signed
-            @test lcm(T[-2]) === T(2)
-            @test lcm(T[4, -6]) === T(12)
-            @test lcm(T[-4, -6]) === T(12)
+            @test lcm(T[-2]) ⟷ T(2)
+            @test lcm(T[4, -6]) ⟷ T(12)
+            @test lcm(T[-4, -6]) ⟷ T(12)
         end
 
-        @test lcm(T[2, 4, 6]) === T(12)
+        @test lcm(T[2, 4, 6]) ⟷ T(12)
     end
+
+    # Issue #55379
+    @test lcm([1//2; 1//2]) === lcm([1//2, 1//2]) === lcm(1//2, 1//2) === 1//2
+    @test gcd(Int[]) === 0
+    @test lcm(Int[]) === 1
+    @test gcd(Rational{Int}[]) === 0//1
+    @test_throws ArgumentError("lcm has no identity for Rational{$Int}") lcm(Rational{Int}[])
 end
 
+⟷(a::Tuple{T, T, T}, b::Tuple{T, T, T}) where T <: Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128} = a === b
+⟷(a::Tuple{T, T, T}, b::Tuple{T, T, T}) where T <: BigInt = a == b
 @testset "gcdx" begin
-    # TODO: Test gcdx for BigInt.
-    for T in (Int8, Int16, Int32, Int64, Int128)
-        @test gcdx(T(5), T(12)) === (T(1), T(5), T(-2))
-        @test gcdx(T(5), T(-12)) === (T(1), T(5), T(2))
-        @test gcdx(T(-5), T(12)) === (T(1), T(-5), T(-2))
-        @test gcdx(T(-5), T(-12)) === (T(1), T(-5), T(2))
-        @test gcdx(T(-25), T(-4)) === (T(1), T(-1), T(6))
+    for T in (Int8, Int16, Int32, Int64, Int128, BigInt)
+        @test gcdx(T(5), T(12)) ⟷ (T(1), T(5), T(-2))
+        @test gcdx(T(5), T(-12)) ⟷ (T(1), T(5), T(2))
+        @test gcdx(T(-5), T(12)) ⟷ (T(1), T(-5), T(-2))
+        @test gcdx(T(-5), T(-12)) ⟷ (T(1), T(-5), T(2))
+        @test gcdx(T(-25), T(-4)) ⟷ (T(1), T(-1), T(6))
+        @test gcdx(T(0), T(0)) ⟷ (T(0), T(0), T(0))
+        @test gcdx(T(8), T(0)) ⟷ (T(8), T(1), T(0))
+        @test gcdx(T(0), T(-8)) ⟷ (T(8), T(0), T(-1))
     end
     x, y = Int8(-12), UInt(100)
     d, u, v = gcdx(x, y)
     @test x*u + y*v == d
+
+    for T in (Int8, Int16, Int32, Int64, Int128)
+        @test_throws DomainError gcdx(typemin(T), typemin(T))
+        @test_throws DomainError gcdx(typemin(T), T(0))
+        @test_throws DomainError gcdx(T(0), typemin(T))
+        d, u, v = gcdx(typemin(T), T(-1))
+        @test d == T(1)
+        @test typemin(T) * u + T(-1) * v == T(1)
+        @test gcdx(T(-1), typemin(T)) == (d, v, u)
+        d, u, v = gcdx(typemin(T), T(1))
+        @test d == T(1)
+        @test typemin(T) * u + T(1) * v == T(1)
+        @test gcdx(T(1), typemin(T)) == (d, v, u)
+    end
+end
+
+# issue #58025
+@testset "Mixed signed/unsigned types" begin
+    cases = [ # adapted from https://github.com/JuliaLang/julia/pull/59487#issuecomment-3258209203
+        (UInt16(100), Int8(-101)),
+        (Int8(-50), UInt16(75)),
+        (UInt32(12), Int16(-18)),
+        (Int64(-24), UInt8(36)),
+        (UInt8(15), Int16(-25)),
+        (Int32(-42), UInt64(56)),
+        (UInt128(1000), Int32(-1500)),
+        (UInt64(0), Int32(-5)),
+        (Int16(-7), UInt8(0)),
+        (Int8(-14), UInt8(13)),
+    ]
+    for (a, b) in cases
+        g1 = gcd(a, b)
+        g2, s, t = gcdx(a, b)
+        @test g1 === g2
+        @test s*a + t*b == g2
+        @test g2 >= 0
+        @test lcm(a, b) === convert(typeof(g1), lcm(widen(a), widen(b)))
+    end
+
+    @test gcdx(Int16(-32768), Int8(-128)) === (Int16(128), Int16(0), Int16(-1))
+    @test gcdx(Int8(-128), UInt16(256)) === (0x0080, 0xffff, 0x0000)
+    @test gcd(Int8(-128), UInt16(256)) === 0x0080
 end
 
 @testset "gcd/lcm/gcdx for custom types" begin
@@ -221,7 +280,7 @@ end
     @test_throws MethodError gcdx(MyOtherRational(2//3), MyOtherRational(3//4))
 end
 
-@testset "invmod" begin
+@testset "invmod(n, m)" begin
     @test invmod(6, 31) === 26
     @test invmod(-1, 3) === 2
     @test invmod(1, -3) === -2
@@ -241,10 +300,27 @@ end
         @test invmod(T(3), T(124))::T == 83
     end
 
+    for T in (Int8, Int16, Int32, Int64, Int128)
+        @test invmod(T(3), unsigned(T)(124)) == 83
+    end
+
+    # Verify issue described in PR 58010 is fixed
+    @test invmod(UInt8(3), UInt16(50000)) === 0x411b
+
+    @test invmod(0x00000001, Int8(-128)) === Int32(-127)
+    @test invmod(0xffffffff, Int8(-38)) === Int32(-15)
+    @test invmod(Int8(-1), 0xffffffff) === 0xfffffffe
+    @test invmod(Int32(-1), typemin(Int64)) === Int64(-1)
+    @test invmod(0x3e81, Int16(-5716)) === Int16(-2407)
+
     for T in (Int8, UInt8)
         for x in typemin(T):typemax(T)
             for m in typemin(T):typemax(T)
-                if m != 0 && try gcdx(x, m)[1] == 1 catch _ true end
+                if !(
+                    iszero(m) ||
+                    iszero(mod(x, m)) && !isone(abs(m)) ||
+                    !isone(gcd(x, m))
+                )
                     y = invmod(x, m)
                     @test mod(widemul(y, x), m) == mod(1, m)
                     @test div(y, m) == 0
@@ -256,6 +332,37 @@ end
     end
 end
 
+@testset "invmod(n)" begin
+    for T in (Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128)
+        if sizeof(T) ≤ 2
+            # test full domain for small types
+            for a = typemin(T)+true:T(2):typemax(T)
+                b = invmod(a)
+                @test a * b == 1
+            end
+        else
+            # test random sample for large types
+            for _ = 1:2^12
+                a = rand(T) | true
+                b = invmod(a)
+                @test a * b == 1
+            end
+        end
+    end
+end
+
+@testset "invmod(n, T)" begin
+    for S in (Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128),
+        T in (Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128)
+        for _ = 1:2^8
+            a = rand(S) | true
+            b = invmod(a, T)
+            @test (a * b) % T == 1
+            @test (a % T) * b == 1
+        end
+    end
+end
+
 @testset "powermod" begin
     @test powermod(2, 3, 5) == 3
     @test powermod(2, 3, -5) == -2
@@ -275,9 +382,24 @@ end
     @test powermod(2, big(3), -5) == -2
     @inferred  powermod(2, -2, -5)
     @inferred  powermod(big(2), -2, UInt(5))
+
+    @test powermod(-3, 0x80, 7) === 2
+    @test powermod(0x03, 0x80, 0x07) === 0x02
+
+    @test powermod(511, 1, 0x00000021) === 0x00000010
+    @test powermod(Int8(-1), 0xff, Int8(33)) === Int8(32)
+    @test powermod(0, 10, -5) === 0
 end
 
 @testset "nextpow/prevpow" begin
+    fs = (prevpow, nextpow)
+    types = (Int8, BigInt, BigFloat)
+    for f ∈ fs, P ∈ types, R ∈ types, p ∈ 1:20, r ∈ 2:5
+        q = P(p)
+        n = R(r)
+        @test f(r, p) == f(n, q)
+    end
+
     @test nextpow(2, 3) == 4
     @test nextpow(2, 4) == 4
     @test nextpow(2, 7) == 8
@@ -291,7 +413,14 @@ end
     @test prevpow(10, 101.0) === 100
     @test prevpow(10.0, 101) === 100.0
     @test_throws DomainError prevpow(0, 3)
-    @test_throws DomainError prevpow(0, 3)
+    @test_throws DomainError prevpow(3, 0)
+
+    # "argument is beyond the range of type of the base"
+    @test_throws DomainError prevpow(Int8(3), 243)
+    @test_throws DomainError nextpow(Int8(3), 243)
+
+    # "result is beyond the range of type of the base"
+    @test_throws OverflowError nextpow(Int8(3), 82)
 end
 
 @testset "ndigits/ndigits0z" begin
@@ -464,7 +593,7 @@ end
         x::Int
     end
     MyInt(x::MyInt) = x
-    Base.:+(a::MyInt, b::MyInt) = a.x + b.x
+    Base.uabs(x::MyInt) = Base.uabs(x.x)
 
     for n in 0:100
         x = ceil(Int, log2(n + 1))
@@ -480,9 +609,6 @@ end
         @test 32  == Base.top_set_bit(Int32(n)) == Base.top_set_bit(unsigned(Int32(n)))
         @test 8   == Base.top_set_bit(Int8(n)) == Base.top_set_bit(unsigned(Int8(n)))
         @test_throws DomainError Base.top_set_bit(big(n))
-        # This error message should never be exposed to the end user anyway.
-        err = n == -1 ? InexactError : DomainError
-        @test_throws err Base.top_set_bit(MyInt(n))
     end
 
     @test count_zeros(Int64(1)) == 63
@@ -502,6 +628,70 @@ end
     @test isqrt(Int8(5)) === Int8(2)
 end
 
+@testset "exponent and top_set_bit consistency" begin
+    for _T in (Int8, Int16, Int32, Int64, Int128)
+        for issigned in (false, true)
+            T = issigned ? _T : unsigned(_T)
+            nbits = 8sizeof(T)
+            @test_throws DomainError exponent(T(0))
+            @test Base.top_set_bit(T(0)) == 0
+            @test Base.top_set_bit(T(0)) == invoke(Base.top_set_bit, Tuple{Integer}, T(0))
+
+            for i in 0:(nbits - (issigned ? 2 : 1))
+                p2 = T(1) << i
+                @test exponent(p2) == i
+                @test exponent(p2) == invoke(exponent, Tuple{Integer}, p2)
+                @test Base.top_set_bit(p2) == i + 1
+                @test Base.top_set_bit(p2) == invoke(Base.top_set_bit, Tuple{Integer}, p2)
+
+                p2m1 = p2 - T(1)
+                if p2m1 != 0
+                    @test exponent(p2m1) == i - 1
+                    @test exponent(p2m1) == invoke(exponent, Tuple{Integer}, p2m1)
+                    @test Base.top_set_bit(p2m1) == i
+                    @test Base.top_set_bit(p2m1) == invoke(Base.top_set_bit, Tuple{Integer}, p2m1)
+                end
+
+                p2p1 = p2 + T(1)
+                if p2p1 != 0
+                    @test exponent(p2p1) == max(i, 1)
+                    @test exponent(p2p1) == invoke(exponent, Tuple{Integer}, p2p1)
+                    @test Base.top_set_bit(p2p1) == max(i, 1) + 1
+                    @test Base.top_set_bit(p2p1) == invoke(Base.top_set_bit, Tuple{Integer}, p2p1)
+                end
+            end
+
+            @test exponent(typemax(T)) == nbits - (issigned ? 2 : 1)
+            @test exponent(typemax(T)) == invoke(exponent, Tuple{Integer}, typemax(T))
+            expected_max = !issigned ? nbits : nbits - 1
+            @test Base.top_set_bit(typemax(T)) == expected_max
+            @test Base.top_set_bit(typemax(T)) == invoke(Base.top_set_bit, Tuple{Integer}, typemax(T))
+
+            if issigned
+                for val in [T(-1), T(-2), T(-17), typemin(T)]
+                    expected = exponent(abs(BigInt(val)))
+                    @test exponent(val) == expected
+                    @test exponent(val) == invoke(exponent, Tuple{Integer}, val)
+                    @test Base.top_set_bit(val) == nbits
+                    @test invoke(Base.top_set_bit, Tuple{Integer}, val) == expected + 1
+                end
+            end
+        end
+
+        @test exponent(big(2)^100) == 100
+        @test exponent(big(2)^100 - 1) == 99
+        @test exponent(big(2)^100 + 1) == 100
+        @test exponent(big(-1)) == 0
+        @test_throws DomainError exponent(big(0))
+
+        @test Base.top_set_bit(big(0)) == 0
+        @test Base.top_set_bit(big(2)^100) == 101
+        @test Base.top_set_bit(big(2)^100 - 1) == 100
+        @test Base.top_set_bit(big(2)^100 + 1) == 101
+        @test_throws DomainError Base.top_set_bit(big(-1))
+    end
+end
+
 @testset "issue #4884" begin
     @test isqrt(9223372030926249000) == 3037000498
     @test isqrt(typemax(Int128)) == parse(Int128,"13043817825332782212")
@@ -552,6 +742,10 @@ end
             x>=0 && @test binomial(x,x-T(2)) == div(x*(x-1), 2)
         end
         @test @inferred(binomial(one(T),one(T))) isa T
+
+        # Arguments of different Integer types do not lead to computation of
+        # generalized binomial coefficient (issue #54296)
+        @test @inferred(binomial(Int64(5), T(2))) === Int64(10)
     end
     for x in ((false,false), (false,true), (true,false), (true,true))
         @test binomial(x...) == (x != (false,true))
@@ -571,3 +765,33 @@ end
 @test Base.infer_effects(gcdx, (Int,Int)) |> Core.Compiler.is_foldable
 @test Base.infer_effects(invmod, (Int,Int)) |> Core.Compiler.is_foldable
 @test Base.infer_effects(binomial, (Int,Int)) |> Core.Compiler.is_foldable
+@testset "concrete-foldability: `hastypemax`" begin
+    @test Base.infer_effects(Base.hastypemax, (Type,)) |> Core.Compiler.is_foldable
+    @test Base.infer_effects(Base.hastypemax, (DataType,)) |> Core.Compiler.is_foldable
+    for t in (Bool, Int, BigInt)
+        @test Base.infer_effects(Base.hastypemax, (Type{t},)) |> Core.Compiler.is_foldable
+    end
+end
+
+@testset "`hastypemax`" begin
+    @test Base.hastypemax(Bool)
+    @test Base.hastypemax(Int)
+    @test !Base.hastypemax(BigInt)
+end
+
+@testset "literal power" begin
+    @testset for T in Base.uniontypes(Base.HWReal)
+        ns = (T(0), T(1), T(5))
+        if T <: AbstractFloat
+            ns = (ns..., T(3.14), T(-2.71))
+        end
+        for n in ns
+            @test n ^ 0 === T(1)
+            @test n ^ 1 === n
+            @test n ^ 2 === n * n
+            @test n ^ 3 === n * n * n
+            @test n ^ -1 ≈ inv(n)
+            @test n ^ -2 ≈ inv(n) * inv(n)
+        end
+    end
+end
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index 3c49afe2c4d7e..5e18c1fb3672a 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -18,15 +18,32 @@ truncbool(u) = reinterpret(UInt8, reinterpret(Bool, u))
 @testset "runtime intrinsics" begin
     @test Core.Intrinsics.add_int(1, 1) == 2
     @test Core.Intrinsics.sub_int(1, 1) == 0
-    @test_throws ErrorException("fpext: output bitsize must be >= input bitsize")    Core.Intrinsics.fpext(Int32, 0x0000_0000_0000_0000)
+
+    @test_throws ErrorException("fpext: output bitsize must be > input bitsize")    Core.Intrinsics.fpext(Float32, 1.0)
+    @test_throws ErrorException("fpext: output bitsize must be > input bitsize")    Core.Intrinsics.fpext(Float32, 1.0)
+
     @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Int32, 0x0000_0000)
     @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Int64, 0x0000_0000)
+    @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Float16, Float16(1.0))
+    @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Core.BFloat16, Float16(1.0))
+    @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Float32, Float16(1.0))
+    @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Float32, 1.0f0)
+    @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Float64, 1.0)
+
+    let bf16_1 = Core.Intrinsics.bitcast(Core.BFloat16, 0x3f80)
+        @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Core.BFloat16, bf16_1)
+        @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Float16, bf16_1)
+        @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Float32, bf16_1)
+    end
+
     @test_throws ErrorException("ZExt: output bitsize must be > input bitsize")     Core.Intrinsics.zext_int(Int8, 0x00)
     @test_throws ErrorException("SExt: output bitsize must be > input bitsize")     Core.Intrinsics.sext_int(Int8, 0x00)
     @test_throws ErrorException("ZExt: output bitsize must be > input bitsize")     Core.Intrinsics.zext_int(Int8, 0x0000)
     @test_throws ErrorException("SExt: output bitsize must be > input bitsize")     Core.Intrinsics.sext_int(Int8, 0x0000)
     @test_throws ErrorException("Trunc: output bitsize must be < input bitsize")    Core.Intrinsics.trunc_int(Int8, 0x00)
     @test_throws ErrorException("Trunc: output bitsize must be < input bitsize")    Core.Intrinsics.trunc_int(Int16, 0x00)
+
+    @test_throws ErrorException("add_float: runtime floating point intrinsics require both arguments to be Float16, BFloat16, Float32, or Float64") Core.Intrinsics.add_float(1, 2)
 end
 
 # issue #4581
@@ -51,7 +68,7 @@ end
 # test functionality of non-power-of-2 primitive type constants
 primitive type Int24 24 end
 Int24(x::Int) = Core.Intrinsics.trunc_int(Int24, x)
-Int(x::Int24) = Core.Intrinsics.zext_int(Int, x)
+Base.Int(x::Int24) = Core.Intrinsics.zext_int(Int, x)
 let x, y, f
     x = Int24(Int(0x12345678)) # create something (via truncation)
     @test Int(0x345678) === Int(x)
@@ -77,7 +94,7 @@ compiled_addi(x, y) = Core.Intrinsics.add_int(x, y)
 @test compiled_addi(true, true) === false
 
 compiled_addf(x, y) = Core.Intrinsics.add_float(x, y)
-@test compiled_addf(C_NULL, C_NULL) === C_NULL
+@test_throws ErrorException compiled_addf(C_NULL, C_NULL)
 @test_throws ErrorException compiled_addf(C_NULL, 1)
 @test compiled_addf(0.5, 5.0e-323) === 0.5
 @test_throws ErrorException compiled_addf(im, im)
@@ -87,16 +104,57 @@ function compiled_conv(::Type{T}, x) where T
     t = Core.Intrinsics.trunc_int(T, x)
     z = Core.Intrinsics.zext_int(typeof(x), t)
     s = Core.Intrinsics.sext_int(typeof(x), t)
-    fpt = Core.Intrinsics.fptrunc(T, x)
-    fpe = Core.Intrinsics.fpext(typeof(x), fpt)
-    return (t, z, s, fpt, fpe)
+    return (t, z, s)
 end
 @test compiled_conv(UInt32, Int64(0x8000_0000)) ==
-    (0x80000000, Int64(0x80000000), -Int64(0x80000000), 0x00000000, 0)
+    (0x80000000, Int64(0x80000000), -Int64(0x80000000))
 @test compiled_conv(UInt32, UInt64(0xC000_BA98_8765_4321)) ==
-    (0x87654321, 0x0000000087654321, 0xffffffff87654321, 0xc005d4c4, 0xc000ba9880000000)
+    (0x87654321, 0x0000000087654321, 0xffffffff87654321)
 @test_throws ErrorException compiled_conv(Bool, im)
 
+function compiled_fptrunc(::Type{T}, x) where T
+    return Core.Intrinsics.fptrunc(T, x)
+
+end
+#           1.234
+#           0 01111111 00111011111001110110110
+#   float32 0 01111111 00111011111001110110110
+#   float16 0    01111 0011101111              (truncated/rtz)
+#   float16 0    01111 0011110000              (round-to-nearest)
+#  bfloat16 0 01111111 0011110                 (round-to-nearest)
+@test compiled_fptrunc(Float16, 1.234) === reinterpret(Float16, 0b0_01111_0011110000)
+# On arm64, LLVM gives an assertion failure when compiling this:
+# LLVM ERROR: Cannot select: 0x106c8e570: bf16 = fp_round 0x106c8df50, TargetConstant:i64<0>, intrinsics.jl:114
+#   0x106c8df50: f64,ch = CopyFromReg 0x104545960, Register:f64 %1
+#     0x106c8dee0: f64 = Register %1
+#   0x106c8e3b0: i64 = TargetConstant<0>
+# In function: julia_compiled_fptrunc_3480
+# @test compiled_fptrunc(Core.BFloat16, 1.234) === reinterpret(Core.BFloat16, 0b0_01111111_0011110)
+@test compiled_fptrunc(Float32, 1.234) === 1.234f0
+@test_throws ErrorException compiled_fptrunc(Float64, 1.234f0)
+@test_throws ErrorException compiled_fptrunc(Int32, 1.234)
+@test_throws ErrorException compiled_fptrunc(Float32, 1234)
+
+function compiled_fpext(::Type{T}, x) where T
+    return Core.Intrinsics.fpext(T, x)
+end
+#           1.234
+#   float16 0    01111 0011110000
+#           0 01111111 00111100000000000000000 = 1.234375
+
+#           1.234
+#   float32 0 01111111    00111011111001110110110
+#   float64 0 01111111111 0011101111100111011011000000000000000000000000000000
+#                         3be76c
+@test compiled_fpext(Float32, reinterpret(Float16, 0b0_01111_0011110000)) === 1.234375f0
+@test compiled_fpext(Float64, reinterpret(Float16, 0b0_01111_0011110000)) === 1.234375
+@test compiled_fpext(Float64, 1.234f0) === 0x1.3be76cp0
+@test_throws ErrorException compiled_fpext(Float16, Float16(1.0))
+@test_throws ErrorException compiled_fpext(Float16, 1.0f0)
+@test_throws ErrorException compiled_fpext(Float32, 1.0f0)
+@test_throws ErrorException compiled_fpext(Float32, 1.0)
+@test_throws ErrorException compiled_fpext(Float64, 1.0)
+
 let f = Core.Intrinsics.ashr_int
     @test f(Int8(-17), 1) == -9
     @test f(Int32(-1), 33) == -1
@@ -143,27 +201,175 @@ macro test_intrinsic(intr, args...)
             $intr($(inputs...))
         end
         @test f() === Base.invokelatest($intr, $(inputs...))
-        @test f() == $output
+        @test f() === $output
     end
 end
 
+macro test_intrinsic_pred(intr, args...)
+    p = args[end]
+    inputs = args[1:end-1]
+    quote
+        function f()
+            $intr($(inputs...))
+        end
+        @test $(p)(Base.invokelatest($intr, $(inputs...)))
+        @test $(p)(f())
+    end
+end
+
+@testset "Float64 intrinsics" begin
+    # unary
+    @test_intrinsic Core.Intrinsics.abs_float Float64(-3.3) Float64(3.3)
+    @test_intrinsic Core.Intrinsics.neg_float Float64(3.3) Float64(-3.3)
+
+    # binary
+    @test_intrinsic Core.Intrinsics.add_float Float64(3.3) Float64(2) Float64(5.3)
+    @test_intrinsic Core.Intrinsics.sub_float Float64(3.3) Float64(2) Float64(1.2999999999999998)
+    @test_intrinsic Core.Intrinsics.mul_float Float64(3.3) Float64(2) Float64(6.6)
+    @test_intrinsic Core.Intrinsics.div_float Float64(3.3) Float64(2) Float64(1.65)
+    @test_intrinsic Core.Intrinsics.max_float Float64(1.0) Float64(2.0) Float64(2.0)
+    @test_intrinsic Core.Intrinsics.min_float Float64(1.0) Float64(2.0) Float64(1.0)
+
+    # ternary
+    @test_intrinsic Core.Intrinsics.fma_float Float64(3.3) Float64(4.4) Float64(5.5) Float64(20.02)
+    @test_intrinsic Core.Intrinsics.muladd_float Float64(3.3) Float64(4.4) Float64(5.5) Float64(20.02)
+    @test_intrinsic Core.Intrinsics.fma_float 0x1.0000000000001p0 1.25 0x1p-54 0x1.4000000000002p0
+    @test 0x1.0000000000001p0*1.25+0x1p-54 === 0x1.4000000000001p0 # for comparison
+
+    # boolean
+    @test_intrinsic Core.Intrinsics.eq_float Float64(3.3) Float64(3.3) true
+    @test_intrinsic Core.Intrinsics.eq_float Float64(3.3) Float64(2) false
+    @test_intrinsic Core.Intrinsics.ne_float Float64(3.3) Float64(3.3) false
+    @test_intrinsic Core.Intrinsics.ne_float Float64(3.3) Float64(2) true
+    @test_intrinsic Core.Intrinsics.le_float Float64(3.3) Float64(3.3) true
+    @test_intrinsic Core.Intrinsics.le_float Float64(3.3) Float64(2) false
+
+    # conversions
+    @test_intrinsic Core.Intrinsics.sitofp Float64 3 Float64(3.0)
+    @test_intrinsic Core.Intrinsics.uitofp Float64 UInt(3) Float64(3.0)
+    @test_intrinsic Core.Intrinsics.fptosi Int Float64(3.3) 3
+    @test_intrinsic Core.Intrinsics.fptoui UInt Float64(3.3) UInt(3)
+
+    # #57384
+    @test_intrinsic Core.Intrinsics.fptosi Int 1.5 1
+    @test_intrinsic Core.Intrinsics.fptosi Int128 1.5 Int128(1)
+end
+
+@testset "Float32 intrinsics" begin
+    # unary
+    @test_intrinsic Core.Intrinsics.abs_float Float32(-3.3) Float32(3.3)
+    @test_intrinsic Core.Intrinsics.neg_float Float32(3.3) Float32(-3.3)
+    @test_intrinsic Core.Intrinsics.fpext Float64 Float32(3.3) 3.299999952316284
+    @test_intrinsic Core.Intrinsics.fptrunc Float32 Float64(3.3) Float32(3.3)
+
+    # binary
+    @test_intrinsic Core.Intrinsics.add_float Float32(3.3) Float32(2) Float32(5.3)
+    @test_intrinsic Core.Intrinsics.sub_float Float32(3.3) Float32(2) Float32(1.3)
+    @test_intrinsic Core.Intrinsics.mul_float Float32(3.3) Float32(2) Float32(6.6)
+    @test_intrinsic Core.Intrinsics.div_float Float32(3.3) Float32(2) Float32(1.65)
+    @test_intrinsic Core.Intrinsics.max_float Float32(1.0) Float32(2.0) Float32(2.0)
+    @test_intrinsic Core.Intrinsics.min_float Float32(1.0) Float32(2.0) Float32(1.0)
+
+    # ternary
+    @test_intrinsic Core.Intrinsics.fma_float Float32(3.3) Float32(4.4) Float32(5.5) Float32(20.02)
+    @test_intrinsic Core.Intrinsics.muladd_float Float32(3.3) Float32(4.4) Float32(5.5) Float32(20.02)
+    @test_intrinsic Core.Intrinsics.fma_float Float32(0x1.000002p0) 1.25f0 Float32(0x1p-25) Float32(0x1.400004p0)
+    @test Float32(0x1.000002p0)*1.25f0+Float32(0x1p-25) === Float32(0x1.400002p0) # for comparison
+
+
+    # boolean
+    @test_intrinsic Core.Intrinsics.eq_float Float32(3.3) Float32(3.3) true
+    @test_intrinsic Core.Intrinsics.eq_float Float32(3.3) Float32(2) false
+    @test_intrinsic Core.Intrinsics.ne_float Float32(3.3) Float32(3.3) false
+    @test_intrinsic Core.Intrinsics.ne_float Float32(3.3) Float32(2) true
+    @test_intrinsic Core.Intrinsics.le_float Float32(3.3) Float32(3.3) true
+    @test_intrinsic Core.Intrinsics.le_float Float32(3.3) Float32(2) false
+
+    # conversions
+    @test_intrinsic Core.Intrinsics.sitofp Float32 3 Float32(3.0)
+    @test_intrinsic Core.Intrinsics.uitofp Float32 UInt(3) Float32(3.0)
+    @test_intrinsic Core.Intrinsics.fptosi Int Float32(3.3) 3
+    @test_intrinsic Core.Intrinsics.fptoui UInt Float32(3.3) UInt(3)
+end
+
+function f16(sign, exp, sig)
+    x = (sign&1)<<15 | (exp&((1<<5)-1))<<10 | sig&((1<<10)-1)
+    return reinterpret(Float16, UInt16(x))
+end
+function f32(sign, exp, sig)
+    x = (sign&1)<<31 | (exp&((1<<8)-1))<<23 | sig&((1<<23)-1)
+    return reinterpret(Float32, UInt32(x))
+end
+function f64(sign, exp, sig)
+    x = Int64(sign&1)<<31 | Int64(exp&((1<<11)-1))<<52 | sig&((Int64(1)<<52)-1)
+    return reinterpret(Float64, UInt64(x))
+end
+
 @testset "Float16 intrinsics" begin
     # unary
+    @test_intrinsic Core.Intrinsics.abs_float Float16(-3.3) Float16(3.3)
     @test_intrinsic Core.Intrinsics.neg_float Float16(3.3) Float16(-3.3)
+    # See <https://github.com/JuliaLang/julia/issues/57130>
     @test_intrinsic Core.Intrinsics.fpext Float32 Float16(3.3) 3.3007812f0
     @test_intrinsic Core.Intrinsics.fpext Float64 Float16(3.3) 3.30078125
     @test_intrinsic Core.Intrinsics.fptrunc Float16 Float32(3.3) Float16(3.3)
     @test_intrinsic Core.Intrinsics.fptrunc Float16 Float64(3.3) Float16(3.3)
 
+    # #57805 - cases where rounding Float64 -> Float32 -> Float16 would fail
+    #     2^-25 * 0b1.0000000000000000000000000000000000000001 binary
+    #   0 01111100110 0000000000000000000000000000000000000001000000000000
+    #     2^-25 * 0b1.0                                        binary
+    #   0    01100110 00000000000000000000000
+    #     2^-14 * 0b0.0000000001 (subnormal)
+    #   0       00000 0000000001 (correct)
+    #   0       00000 0000000000 (incorrect)
+    @test_intrinsic Core.Intrinsics.fptrunc Float16 0x1.0000000001p-25 Float16(6.0e-8)
+    @test_intrinsic Core.Intrinsics.fptrunc Float16 -0x1.0000000001p-25 Float16(-6.0e-8)
+
+    # float_to_half/bfloat_to_float special cases
+    @test_intrinsic Core.Intrinsics.fptrunc Float16 Inf32 Inf16
+    @test_intrinsic Core.Intrinsics.fptrunc Float16 -Inf32 -Inf16
+    @test_intrinsic Core.Intrinsics.fptrunc Float16 Inf64 Inf16
+    @test_intrinsic Core.Intrinsics.fptrunc Float16 -Inf64 -Inf16
+
+    # LLVM gives us three things that may happen to NaNs in an fptrunc on
+    # "normal" platforms (x86, ARM):
+    # - Return a canonical NaN (quiet, all-zero payload)
+    # - Copy high bits of payload to output, and:
+    #   - Set the quiet bit
+    #   - Leave the quiet bit as-is.  This option isn't possible if doing so
+    #     would result in an infinity (all-zero payload and quiet bit clear)
+    #
+    # We'll just test a NaN is returned at all.
+    #
+    # Refer to #49353 and https://llvm.org/docs/LangRef.html#floatnan
+
+    # Canonical NaN
+    @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 NaN32 isnan
+    @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 NaN isnan
+    # Quiet NaN
+    @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f32(0, 0xff, 1<<22 | 1<<13) isnan
+    @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, Int64(1)<<51 | Int64(1)<<42) isnan
+    # Signalling NaN that can be propagated to Float16
+    @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f32(0, 0xff, 1<<13) isnan
+    @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, Int64(1)<<42) isnan
+    # Signalling NaN that cannot be propagated to Float16
+    @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f32(0, 0xff, 1) isnan
+    @test_intrinsic_pred Core.Intrinsics.fptrunc Float16 f64(0, 0x7ff, 1) isnan
+
     # binary
     @test_intrinsic Core.Intrinsics.add_float Float16(3.3) Float16(2) Float16(5.3)
     @test_intrinsic Core.Intrinsics.sub_float Float16(3.3) Float16(2) Float16(1.301)
     @test_intrinsic Core.Intrinsics.mul_float Float16(3.3) Float16(2) Float16(6.6)
     @test_intrinsic Core.Intrinsics.div_float Float16(3.3) Float16(2) Float16(1.65)
+    @test_intrinsic Core.Intrinsics.max_float Float16(1.0) Float16(2.0) Float16(2.0)
+    @test_intrinsic Core.Intrinsics.min_float Float16(1.0) Float16(2.0) Float16(1.0)
 
     # ternary
     @test_intrinsic Core.Intrinsics.fma_float Float16(3.3) Float16(4.4) Float16(5.5) Float16(20.02)
     @test_intrinsic Core.Intrinsics.muladd_float Float16(3.3) Float16(4.4) Float16(5.5) Float16(20.02)
+    @test_intrinsic Core.Intrinsics.fma_float Float16(0x1.004p0) Float16(1.25) Float16(0x1p-12) Float16(0x1.408p0)
+    @test Float16(0x1.004p0)*Float16(1.25)+Float16(0x1p-12) === Float16(0x1.404p0) # for comparison
 
     # boolean
     @test_intrinsic Core.Intrinsics.eq_float Float16(3.3) Float16(3.3) true
@@ -174,34 +380,18 @@ end
     @test_intrinsic Core.Intrinsics.le_float Float16(3.3) Float16(2) false
 
     # conversions
-    @test_intrinsic Core.Intrinsics.sitofp Float16 3 Float16(3f0)
-    @test_intrinsic Core.Intrinsics.uitofp Float16 UInt(3) Float16(3f0)
+    @test_intrinsic Core.Intrinsics.sitofp Float16 3 Float16(3.0)
+    @test_intrinsic Core.Intrinsics.uitofp Float16 UInt(3) Float16(3.0)
     @test_intrinsic Core.Intrinsics.fptosi Int Float16(3.3) 3
     @test_intrinsic Core.Intrinsics.fptoui UInt Float16(3.3) UInt(3)
 end
 
-if Sys.ARCH == :aarch64 ||  Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
-    # On AArch64 we are following the `_Float16` ABI. Buthe these functions expect `Int16`.
-    # TODO: SHould we have `Chalf == Int16` and `Cfloat16 == Float16`?
-    extendhfsf2(x::Float16) = ccall("extern __extendhfsf2", llvmcall, Float32, (UInt16,), reinterpret(UInt16, x))
-    gnu_h2f_ieee(x::Float16) = ccall("extern __gnu_h2f_ieee", llvmcall, Float32, (UInt16,), reinterpret(UInt16, x))
-    truncsfhf2(x::Float32) = reinterpret(Float16, ccall("extern __truncsfhf2", llvmcall, UInt16, (Float32,), x))
-    gnu_f2h_ieee(x::Float32) = reinterpret(Float16, ccall("extern __gnu_f2h_ieee", llvmcall, UInt16, (Float32,), x))
-    truncdfhf2(x::Float64) = reinterpret(Float16, ccall("extern __truncdfhf2", llvmcall, UInt16, (Float64,), x))
-else
-    extendhfsf2(x::Float16) = ccall("extern __extendhfsf2", llvmcall, Float32, (Float16,), x)
-    gnu_h2f_ieee(x::Float16) = ccall("extern __gnu_h2f_ieee", llvmcall, Float32, (Float16,), x)
-    truncsfhf2(x::Float32) = ccall("extern __truncsfhf2", llvmcall, Float16, (Float32,), x)
-    gnu_f2h_ieee(x::Float32) = ccall("extern __gnu_f2h_ieee", llvmcall, Float16, (Float32,), x)
-    truncdfhf2(x::Float64) = ccall("extern __truncdfhf2", llvmcall, Float16, (Float64,), x)
-end
-
 @testset "Float16 intrinsics (crt)" begin
-    @test extendhfsf2(Float16(3.3)) == 3.3007812f0
+    gnu_h2f_ieee(x::Float16) = ccall("julia__gnu_h2f_ieee", Float32, (Float16,), x)
+    gnu_f2h_ieee(x::Float32) = ccall("julia__gnu_f2h_ieee", Float16, (Float32,), x)
+
     @test gnu_h2f_ieee(Float16(3.3)) == 3.3007812f0
-    @test truncsfhf2(3.3f0) == Float16(3.3)
     @test gnu_f2h_ieee(3.3f0) == Float16(3.3)
-    @test truncdfhf2(3.3) == Float16(3.3)
 end
 
 using Base.Experimental: @force_compile
@@ -213,8 +403,8 @@ for order in (:not_atomic, :monotonic, :acquire, :release, :acquire_release, :se
     @test (order -> Core.Intrinsics.atomic_fence(order))(order) === nothing
     @test Base.invokelatest(@eval () -> Core.Intrinsics.atomic_fence($(QuoteNode(order)))) === nothing
 end
-@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) == nothing
-@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) == nothing
+@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) === nothing
+@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) === nothing
 
 primitive type Int256 <: Signed 256 end
 Int256(i::Int) = Core.Intrinsics.sext_int(Int256, i)
@@ -236,7 +426,7 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co
                 @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(10), S(3), :sequentially_consistent, :sequentially_consistent)
             end
             @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
-            if sizeof(r) > 8
+            if sizeof(r) > 2*sizeof(Int)
                 @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") unsafe_load(p, :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") unsafe_store!(p, T(1), :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") unsafe_swap!(p, T(100), :sequentially_consistent)
@@ -361,3 +551,16 @@ Base.show(io::IO, a::IntWrap) = print(io, "IntWrap(", a.x, ")")
         @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[]
     end
 end)()
+
+@testset "issue #54548" begin
+    @inline passthrough(ptr::Core.LLVMPtr{T,A}) where {T,A} = Base.llvmcall(("""
+            define ptr addrspace(1) @entry(ptr addrspace(1) %0) #0 {
+            entry:
+                ret ptr addrspace(1) %0
+            }
+
+            attributes #0 = { alwaysinline }""", "entry"),
+        Core.LLVMPtr{T,A}, Tuple{Core.LLVMPtr{T,A}}, ptr)
+    f(gws) = passthrough(Core.bitcast(Core.LLVMPtr{UInt32,1}, gws))
+    f(C_NULL)
+end
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index ec77903b4a5b8..6163e59beb567 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -6,6 +6,273 @@ ioslength(io::IOBuffer) = (io.seekable ? io.size : bytesavailable(io))
 
 bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
 
+# Julia Base's internals uses the PipeBuffer, which is an unseekable IOBuffer.
+# There are no public constructors to build such a buffer, but we need to test
+# it anyway.
+# I make a new method here such that if the implementation of Base.PipeBuffer
+# changes, these tests will still work.
+new_unseekable_buffer() = Base.GenericIOBuffer(Memory{UInt8}(), true, true, false, true, typemax(Int), false)
+
+@testset "Basic tests" begin
+    @test_throws ArgumentError IOBuffer(;maxsize=-1)
+    @test_throws ArgumentError IOBuffer([0x01]; maxsize=-1)
+
+    # Test that sizehint actually will sizehint the vector,
+    v = UInt8[]
+    buf = IOBuffer(v; sizehint=64, write=true)
+    @test length(v.ref.mem) >= 64
+
+    # Test that you can't make an IOBuffer with a maxsize
+    # smaller than the size you actually give it
+    @test_throws ArgumentError IOBuffer([0x01, 0x02]; maxsize=1)
+    @test_throws ArgumentError IOBuffer(b"abcdefghij"; maxsize=8)
+end
+
+@testset "Basic reading" begin
+    # Readavailable is equal to read
+    buf = IOBuffer("abcdef")
+    @test read(buf, UInt8) == UInt8('a')
+    @test bytesavailable(buf) == 5
+    @test readavailable(buf) == b"bcdef"
+
+    # Reading less than all the bytes
+    buf = IOBuffer(b"ABCDEFGHIJ")
+    @test read(buf, 1) == b"A"
+    @test read(buf, 3) == b"BCD"
+
+    # Reading more bytes than available will not error
+    @test read(buf, 100) == b"EFGHIJ"
+
+    # Passing truncate=false will still truncate an IOBuffer with no
+    # initialized data
+    @test isempty(read(IOBuffer(;sizehint=34, truncate=false)))
+end
+
+@testset "Byte occursin GenericIOBuffer" begin
+    buf = IOBuffer(@view(collect(0x1f:0x3d)[1:end]))
+    @test occursin(0x1f, buf)
+    @test occursin(0x3d, buf)
+    @test occursin(0x2a, buf)
+
+    @test !occursin(0xff, buf)
+    @test !occursin(0x00, buf)
+
+    v = Vector{UInt8}("bcdefg")
+    pushfirst!(v, UInt8('a'))
+    buf = IOBuffer(v)
+    @test occursin(UInt8('a'), buf)
+    read(buf, UInt8)
+    @test !occursin(UInt8('a'), buf)
+    @test !occursin(0x00, buf)
+
+    buf = IOBuffer("abcdefg")
+    @test occursin(UInt8('a'), buf)
+end
+
+@testset "Non-Memory backed IOBuffer" begin
+    buf = IOBuffer(Test.GenericArray(collect(0x02:0x0d)), read=true)
+    @test read(buf) == 0x02:0x0d
+
+    buf = IOBuffer(Test.GenericArray(collect(0x02:0x0d)), read=true)
+    @test read(buf, UInt8) == 0x02
+    @test read(buf) == 0x03:0x0d
+
+    v = view(collect(UInt8('a'):UInt8('z')), 4:10)
+    buf = IOBuffer(v, read=true, write=true)
+    @test read(buf, UInt8) == UInt8('d')
+    @test read(buf) == UInt8('e'):UInt8('j')
+    seekstart(buf)
+    @test read(buf, UInt8) == UInt8('d')
+    write(buf, UInt8('x'))
+    write(buf, "ABC")
+    seekstart(buf)
+    @test read(buf) == b"dxABCij"
+end
+
+@testset "Copying" begin
+    # Test offset is preserved when copying
+    v = UInt8[]
+    pushfirst!(v, UInt8('a'), UInt8('b'), UInt8('c'))
+    buf = IOBuffer(v; write=true, read=true, append=true)
+    write(buf, "def")
+    read(buf, UInt16)
+    buf2 = copy(buf)
+    @test String(read(buf)) == "cdef"
+    @test String(read(buf2)) == "cdef"
+
+    # Test copying with non-Memory backed GenericIOBuffer
+    buf = IOBuffer(Test.GenericArray(collect(0x02:0x0d)), read=true)
+    @test read(buf, UInt16) == 0x0302
+    buf2 = copy(buf)
+    @test isreadable(buf2)
+    @test !iswritable(buf2)
+    @test read(buf2) == 0x04:0x0d
+
+    # Test copying a non-seekable stream
+    buf = new_unseekable_buffer()
+    write(buf, "abcdef")
+    read(buf, UInt16)
+    mark(buf)
+    read(buf, UInt16)
+    buf2 = copy(buf)
+    @test read(buf2) == b"ef"
+    reset(buf2)
+    @test read(buf2) == b"cdef"
+
+    # Test copying seekable stream
+    buf = IOBuffer()
+    write(buf, "abcdef")
+    seekstart(buf)
+    read(buf)
+    mark(buf)
+    buf2 = copy(buf)
+    @test reset(buf2) == 6
+    seekstart(buf2)
+    @test read(buf2) == b"abcdef"
+
+    # Test copying a taken buffer
+    buf = IOBuffer()
+    write(buf, "abcdef")
+    take!(buf)
+    buf2 = copy(buf)
+    @test eof(buf2)
+    seekstart(buf2)
+    @test eof(buf2)
+end
+
+@testset "copyuntil" begin
+    a = IOBuffer(b"abcdeajdgabdfg")
+    b = IOBuffer(collect(b"xx"); write=true, read=true, append=true)
+    copyuntil(b, a, UInt8('a'))
+    @test read(b) == b"xx"
+    seekstart(b)
+    copyuntil(b, a, UInt8('a'); keep=true)
+    @test read(b) == b"xxbcdea"
+    seekstart(b)
+    copyuntil(b, a, UInt('w'))
+    @test read(b) == b"xxbcdeajdgabdfg"
+end
+
+@testset "copyline" begin
+    a = IOBuffer(b"abcde\nabc\r\nabc\n\r\nac")
+    b = IOBuffer()
+    copyline(b, a)
+    @test take!(copy(b)) == b"abcde"
+    copyline(b, a)
+    @test take!(copy(b)) == b"abcdeabc"
+    copyline(b, a; keep=true)
+    @test take!(copy(b)) == b"abcdeabcabc\n"
+    copyline(b, a; keep=false)
+    @test take!(copy(b)) == b"abcdeabcabc\n"
+    copyline(b, a; keep=false)
+    @test take!(copy(b)) == b"abcdeabcabc\nac"
+
+    # Test a current bug in copyline
+    a = Base.SecretBuffer("abcde\r\n")
+    b = IOBuffer()
+    write(b, "xxxxxxxxxx")
+    seek(b, 2)
+    copyline(b, a; keep=false)
+    Base.shred!(a)
+    @test take!(b) == b"xxabcdexxx"
+end
+
+@testset "take!" begin
+    a = IOBuffer("abc")
+    @test take!(a) == b"abc"
+
+    v = UInt8[]
+    pushfirst!(v, 0x0a)
+    buf = IOBuffer(v; write=true, append=true)
+    write(buf, "def")
+    @test take!(buf) == b"\ndef"
+
+    v = view(collect(b"abcdefghij"), 3:9)
+    buf = IOBuffer(v; write=true, read=true)
+    read(buf, UInt8)
+    write(buf, "xxy")
+    @test take!(buf) == b"cxxyghi"
+
+    v = view(collect(b"abcdefghij"), 3:9)
+    buf = IOBuffer(v; write=true, read=true)
+
+    # Take on unseekable buffer does not return used bytes.
+    buf = new_unseekable_buffer()
+    write(buf, 0x61)
+    write(buf, "bcd")
+    @test read(buf, UInt8) == 0x61
+    @test take!(buf) == b"bcd"
+
+    # Compaction is reset after take!
+    buf = Base.GenericIOBuffer(Memory{UInt8}(), true, true, false, true, 100, false)
+    write(buf, rand(UInt8, 50))
+    read(buf, 40)
+    write(buf, rand(UInt8, 100))
+    mark(buf)
+    read(buf, 70)
+    @test position(buf) == 110
+    @test length(buf.data) <= 100
+    v = take!(buf)
+    write(buf, 0xf1)
+    @test position(buf) == 0
+    @test !ismarked(buf)
+end
+
+@testset "maxsize is preserved" begin
+    # After take!
+    buf = IOBuffer(; maxsize=3)
+    print(buf, "abcdef")
+    @test take!(buf) == b"abc"
+    print(buf, "abcdef")
+    @test take!(buf) == b"abc"
+
+    # After resizing
+    buf = IOBuffer(;maxsize=128)
+    write(buf, collect(0x00:0x10))
+    write(buf, collect(0x11:0x30))
+    write(buf, collect(0x31:0x98))
+    write(buf, collect(0x99:0xff))
+    seekstart(buf)
+    @test read(buf) == 0x00:UInt8(127)
+
+    # Edge case: When passing a Vector, does not error if the
+    # underlying mem is larger than maxsize
+    v = pushfirst!([0x01], 0x02)
+    io = IOBuffer(v; maxsize=2)
+    @test read(io) == b"\x02\x01"
+
+    # Buffer will not write past maxsize, even if given a larger buffer
+    # And also even if the data is taken and replaced
+    v = sizehint!(UInt8[], 128)
+    io = IOBuffer(v; write=true, read=true, maxsize=12)
+    write(io, 0x01:0x0f)
+    seekstart(io)
+    @test read(io) == 0x01:0x0c
+    @test write(io, 0x01) == 0
+    @test write(io, "abc") == 0
+    @test take!(io).ref.mem === v.ref.mem
+    write(io, 0x01:0x0f)
+    @test take!(io) == 0x01:0x0c
+end
+
+@testset "Write to self" begin
+    buffer = IOBuffer()
+    @test_throws ArgumentError write(buffer, buffer)
+
+    # Write to another IOBuffer with limited size
+    to = IOBuffer(;maxsize=4)
+    from = IOBuffer(collect(b"abcdefghi"))
+    write(to, from)
+    @test String(take!(to)) == "abcd"
+    @test eof(from)
+
+    # Write to another IOBuffer when closed
+    to = IOBuffer()
+    from = IOBuffer(collect(b"abcdefghi"))
+    close(from)
+    @test_throws ArgumentError write(to, from)
+end
+
 @testset "Read/write empty IOBuffer" begin
     io = IOBuffer()
     @test eof(io)
@@ -33,7 +300,7 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
     @test position(io) == 0
     truncate(io, 10)
     @test position(io) == 0
-    @test all(io.data .== 0)
+    @test all(view(io.data, 1:10) .== 0)
     @test write(io, Int16[1, 2, 3, 4, 5, 6]) === 12
     seek(io, 2)
     truncate(io, 10)
@@ -54,6 +321,36 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
     @test_throws ArgumentError seek(io, 0)
 end
 
+@testset "takestring!" begin
+    buf = IOBuffer()
+    write(buf, "abcø")
+    s = takestring!(buf)
+    @test isempty(takestring!(buf))
+    @test s == "abcø"
+    write(buf, "xyz")
+    @test takestring!(buf) == "xyz"
+    buf = IOBuffer()
+
+    # Test with a nonzero offset in the buffer
+    v = rand(UInt8, 8)
+    for i in 1:8
+        pushfirst!(v, rand(UInt8))
+    end
+    buf = IOBuffer(v)
+    s = String(copy(v))
+    @test takestring!(buf) == s
+
+    # Test with a non-writable IOBuffer
+    buf = IOBuffer(b"abcdef")
+    read(buf, UInt8)
+    @test takestring!(buf) == "abcdef"
+
+    buf = new_unseekable_buffer()
+    write(buf, "abcde")
+    read(buf, UInt16)
+    @test takestring!(buf) == "cde"
+end
+
 @testset "Read/write readonly IOBuffer" begin
     io = IOBuffer("hamster\nguinea pig\nturtle")
     @test position(io) == 0
@@ -67,22 +364,89 @@ end
     @test_throws ArgumentError write(io,UInt8[0])
     @test String(take!(io)) == "hamster\nguinea pig\nturtle"
     @test String(take!(io)) == "hamster\nguinea pig\nturtle" #should be unchanged
-    @test_throws ArgumentError Base.compact(io) # not writeable
     close(io)
 end
 
+@testset "Truncate" begin
+    # Fails for non-writable and non-seekable
+    @test_throws ArgumentError truncate(PipeBuffer(), 0)
+    @test_throws ArgumentError truncate(IOBuffer(b"abcde"), 3)
+
+    # Standard use
+    buf = IOBuffer(collect(b"abcdef"); write=true, read=true)
+    truncate(buf, 4)
+    @test read(buf) == b"abcd"
+    @test take!(buf) == b"abcd"
+
+    # Mark is removed if beyond the size
+    buf = IOBuffer()
+    write(buf, "abcde")
+    seek(buf, 4)
+    mark(buf)
+    truncate(buf, 4)
+    @test !ismarked(buf)
+
+    # Making it larger
+    buf = IOBuffer(collect(b"abcdef"); write=true, read=true)
+    seek(buf, 3)
+    truncate(buf, 3)
+    write(buf, 'X')
+    mark(buf)
+    truncate(buf, 5)
+    @test ismarked(buf)
+    @test reset(buf) == 4
+    @test take!(buf) == b"abcX\0"
+
+    # With offset
+    v = pushfirst!(UInt8[0x62, 0x63, 0x64], 0x61)
+    buf = IOBuffer(v; write=true, read=true)
+    seekstart(buf)
+    read(buf, UInt8)
+    mark(buf)
+    truncate(buf, 7)
+    @test reset(buf) == 1
+    @test take!(buf) == b"abcd\0\0\0"
+end
+
+@testset "Position of compactable buffer" begin
+    # Set maxsize, because otherwise compaction it too hard to reason about,
+    # and this test will be brittle
+    io = Base.GenericIOBuffer(Memory{UInt8}(), true, true, false, true, 100, false)
+    write(io, "abcd")
+    read(io, UInt16)
+    @test position(io) == 2
+    write(io, "abcde"^80)
+    @test position(io) == 2
+    read(io, 60)
+    @test position(io) == 62
+    mark(io)
+    # Trigger compaction
+    write(io, rand(UInt8, 50))
+    @test position(io) == 62
+    v1 = read(io, 20)
+    @test position(io) == 82
+    @test reset(io) == 62
+    @test position(io) == 62
+    v2 = read(io, 20)
+    @test v1 == v2
+end
+
 @testset "PipeBuffer" begin
-    io = PipeBuffer()
+    io = new_unseekable_buffer()
     @test_throws EOFError read(io,UInt8)
     @test write(io,"pancakes\nwaffles\nblueberries\n") > 0
+
+    # PipeBuffer is append, so writing to it does not advance the position
     @test position(io) == 0
     @test readline(io) == "pancakes"
-    Base.compact(io)
     @test readline(io) == "waffles"
     @test write(io,"whipped cream\n") > 0
     @test readline(io) == "blueberries"
+
+    # Pipebuffers do not support seeking, and therefore do not support truncation.
     @test_throws ArgumentError seek(io,0)
     @test_throws ArgumentError truncate(io,0)
+
     @test readline(io) == "whipped cream"
     @test write(io,"pancakes\nwaffles\nblueberries\n") > 0
     @test readlines(io) == String["pancakes", "waffles", "blueberries"]
@@ -116,57 +480,6 @@ end
         end
         rm(fname)
     end
-
-    Base.compact(io)
-    @test position(io) == 0
-    @test ioslength(io) == 0
-    Base.ensureroom(io,50)
-    @test position(io) == 0
-    @test ioslength(io) == 0
-    @test length(io.data) == 50
-    Base.ensureroom(io,10)
-    @test ioslength(io) == 0
-    @test length(io.data) == 50
-    io.maxsize = 75
-    Base.ensureroom(io,100)
-    @test ioslength(io) == 0
-    @test length(io.data) == 75
-    seekend(io)
-    @test ioslength(io) == 0
-    @test position(io) == 0
-    write(io,zeros(UInt8,200))
-    @test ioslength(io) == 75
-    @test length(io.data) == 75
-    write(io,1)
-    @test ioslength(io) == 75
-    @test length(io.data) == 75
-    write(io,[1,2,3])
-    @test ioslength(io) == 75
-    @test length(io.data) == 75
-    skip(io,1)
-    @test write(io,UInt8(104)) === 1
-    skip(io,3)
-    @test write(io,b"apples") === 3
-    skip(io,71)
-    @test write(io,'y') === 1
-    @test read(io, String) == "happy"
-    @test eof(io)
-    write(io,zeros(UInt8,73))
-    write(io,'a')
-    write(io,'b')
-    write(io,'c')
-    write(io,'d')
-    write(io,'e')
-    @test ioslength(io) == 75
-    @test length(io.data) == 75
-    @test position(io) == 0
-    skip(io,72)
-    @test String(take!(io)) == "\0ab"
-    @test String(take!(io)) == ""
-
-    # issues 4021
-    print(io, true)
-    close(io)
 end
 
 @testset "issue 5453" begin
@@ -195,6 +508,38 @@ end
     @test position(skip(io, -3)) == 0
 end
 
+@testset "issue #53908" begin
+    @testset "offset $first" for first in (false, true)
+        b = collect(0x01:0x05)
+        sizehint!(b, 100; first) # make offset non zero
+        io = IOBuffer(b)
+        @test position(skip(io, 4)) == 4
+        @test position(skip(io, typemax(Int))) == 5
+        @test position(skip(io, typemax(Int128))) == 5
+        @test position(skip(io, typemax(Int32))) == 5
+        @test position(skip(io, typemin(Int))) == 0
+        @test position(skip(io, typemin(Int128))) == 0
+        @test position(skip(io, typemin(Int32))) == 0
+        @test position(skip(io, 4)) == 4
+        @test position(skip(io, -2)) == 2
+        @test position(skip(io, -2)) == 0
+        @test position(seek(io, -2)) == 0
+        @test position(seek(io, typemax(Int))) == 5
+        @test position(seek(io, typemax(Int128))) == 5
+        @test position(seek(io, typemax(Int32))) == 5
+        @test position(seek(io, typemin(Int))) == 0
+        @test position(seek(io, typemin(Int128))) == 0
+        @test position(seek(io, typemin(Int32))) == 0
+    end
+end
+
+@testset "issue #57962" begin
+    io = IOBuffer(repeat("x", 400))
+    skip(io, 10)
+    skip(io, 400)
+    @test isempty(read(io))
+end
+
 @testset "pr #11554" begin
     io  = IOBuffer(SubString("***αhelloworldω***", 4, 16))
     io2 = IOBuffer(Vector{UInt8}(b"goodnightmoon"), read=true, write=true)
@@ -222,9 +567,6 @@ end
     truncate(io2, io2.size - 2)
     @test read(io2, String) == "goodnightmoonhelloworld"
     seek(io2, 0)
-    write(io2, io2)
-    @test read(io2, String) == ""
-    @test bufcontents(io2) == "goodnightmoonhelloworld"
 end
 
 # issue #11917
@@ -251,6 +593,7 @@ end
     c = zeros(UInt8,8)
     @test bytesavailable(bstream) == 8
     @test !eof(bstream)
+    @test Base.reseteof(bstream) === nothing # TODO: Actually test intended effect
     read!(bstream,c)
     @test c == a[3:10]
     @test closewrite(bstream) === nothing
@@ -320,24 +663,42 @@ end
     @test n == 5
 end
 
-@testset "Base.compact" begin
-    a = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
-    mark(a) # mark at position 0
-    write(a, "Hello!")
-    @test Base.compact(a) == nothing # because pointer > mark
-    close(a)
-    b = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
-    write(b, "Hello!")
-    read(b)
-    mark(b) # mark at position 6
-    write(b, "Goodbye!") # now pointer is > mark but mark is > 0
-    Base.compact(b)
-    @test readline(b) == "Goodbye!"
-    close(b)
+@testset "Compacting" begin
+    # Compacting works
+    buf = Base.GenericIOBuffer(UInt8[], true, true, false, true, 20, false)
+    mark(buf)
+    write(buf, "Hello"^5)
+    reset(buf)
+    unmark(buf)
+    read(buf, UInt8)
+    read(buf, UInt8)
+    write(buf, "a!")
+    @test length(buf.data) == 20
+    @test String(take!(buf)) == "llo" * "Hello"^3 * "a!"
+
+    # Compacting does not do anything when mark == 0
+    buf = Base.GenericIOBuffer(UInt8[], true, true, false, true, 5, false)
+    mark(buf)
+    write(buf, "Hello")
+    reset(buf)
+    mark(buf)
+    read(buf, UInt8)
+    read(buf, UInt8)
+    @test write(buf, "a!") == 0
+    @test take!(buf) == b"llo"
+
+    # Compacting without maxsize still works
+    buf = new_unseekable_buffer()
+    data = repeat(b"abcdefg", 100)
+    write(buf, data)
+    read(buf, 600)
+    data_len = length(buf.data)
+    write(buf, view(data, 1:500))
+    @test length(buf.data) == data_len
 end
 
 @testset "peek(::GenericIOBuffer)" begin
-    io = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
+    io = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int), false)
     write(io, "こんにちは")
     @test peek(io) == 0xe3
     @test peek(io, Char) == 'こ'
@@ -354,6 +715,41 @@ end
     v = @view a[1:2]
     io = IOBuffer()
     write(io,1)
+    write(io,0)
     seek(io,0)
-    @test Base.read_sub(io,v,1,1) == [1,0]
+    @test read!(io, v) == [1, 0]
+end
+
+@testset "with offset" begin
+    b = pushfirst!([0x02], 0x01)
+    @test take!(IOBuffer(b)) == [0x01, 0x02]
+
+    # Read-only buffer does not take control of underlying buffer
+    v = pushfirst!([0x62, 0x63], 0x61)
+    buf = IOBuffer(v; write=false)
+    @test read(buf) == b"abc"
+    @test v == b"abc" # v is unchanged
+
+    # Truncate
+end
+
+@testset "#54636 reading from non-dense vectors" begin
+    data = 0x00:0xFF
+    io = IOBuffer(data)
+    @test read(io) == data
+    seekstart(io)
+    @test read(io, UInt16) === ltoh(0x0100)
+    out = IOBuffer()
+    write(out, io)
+    @test take!(out) == data[3:end]
+
+    data = @view(collect(0x00:0x0f)[begin:2:end])
+    io = IOBuffer(data)
+    @test read(io) == data
+end
+
+@testset "Writing Char to full buffer" begin
+    io = IOBuffer(;maxsize=1)
+    write(io, 'a')
+    @test write(io, 'a') == 0
 end
diff --git a/test/iostream.jl b/test/iostream.jl
index bc4751fb1fca7..13d01e61bbf8c 100644
--- a/test/iostream.jl
+++ b/test/iostream.jl
@@ -119,6 +119,24 @@ end
     end
 end
 
+@testset "read!/write(::IO, A::StridedArray)" begin
+    s1 = reshape(view(rand(UInt8, 16), 1:16), 2, 2, 2, 2)
+    s2 = view(s1, 1:2, 1:2, 1:2, 1:2)
+    s3 = view(s1, 1:2, 1:2, 1, 1:2)
+    mktemp() do path, io
+        b = Vector{UInt8}(undef, 17)
+        for s::StridedArray in (s3, s1, s2)
+            @test write(io, s) == length(s)
+            seek(io, 0)
+            @test readbytes!(io, b) == length(s)
+            seek(io, 0)
+            @test view(b, 1:length(s)) == vec(s)
+            @test read!(io, fill!(deepcopy(s), 0)) == s
+            seek(io, 0)
+        end
+    end
+end
+
 @test Base.open_flags(read=false, write=true, append=false) == (read=false, write=true, create=true, truncate=true, append=false)
 
 @testset "issue #30978" begin
@@ -172,3 +190,7 @@ end
     @test all(T -> T <: Union{UInt, Int}, Base.return_types(unsafe_write, (IO, Ptr{UInt8}, UInt)))
     @test all(T -> T === Bool, Base.return_types(eof, (IO,)))
 end
+
+@testset "fd" begin
+    @test open(fd, tempname(), "w") isa RawFD
+end
diff --git a/test/iterators.jl b/test/iterators.jl
index b361c7e8391e1..ffa4a4d3b2045 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -5,34 +5,41 @@ using Random
 using Base: IdentityUnitRange
 using Dates: Date, Day
 
-@test Base.IteratorSize(Any) isa Base.SizeUnknown
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
+@test (@inferred Base.IteratorSize(Any)) isa Base.SizeUnknown
 
 # zip and filter iterators
 # issue #4718
 @test collect(Iterators.filter(x->x[1], zip([true, false, true, false],"abcd"))) == [(true,'a'),(true,'c')]
 
+# issue #45085
+@test_throws ArgumentError Iterators.reverse(zip("abc", "abcd"))
+@test_throws ArgumentError Iterators.reverse(zip("abc", Iterators.cycle("ab")))
+
 let z = zip(1:2)
-    @test size(z) == (2,)
+    @test (@inferred size(z)) == (2,)
     @test collect(z) == [(1,), (2,)]
     # Issue #13979
-    @test eltype(z) == Tuple{Int}
+    @test (@inferred eltype(z)) == Tuple{Int}
 end
 
 for z in (zip(1:2, 3:4), zip(1:2, 3:5))
     @test collect(z) == [(1,3), (2,4)]
-    @test eltype(z) == Tuple{Int,Int}
-    @test size(z) == (2,)
-    @test axes(z) == (Base.OneTo(2),)
-    @test length(z) == 2
+    @test (@inferred eltype(z)) == Tuple{Int,Int}
+    @test (@inferred size(z)) == (2,)
+    @test (@inferred axes(z)) == (Base.OneTo(2),)
+    @test (@inferred length(z)) == 2
 end
 
 let z = zip(1:2, Iterators.countfrom(3))
     @test collect(z) == [(1,3), (2,4)]
-    @test eltype(z) == Tuple{Int,Int}
+    @test (@inferred eltype(z)) == Tuple{Int,Int}
     @test_throws MethodError size(z) # by convention, the zip of a finite and
                          # an infinite iterator has only `length`
     @test_throws MethodError axes(z)
-    @test length(z) == 2
+    @test (@inferred length(z)) == 2
 end
 
 let z = zip([i*j for i in 1:3, j in -1:2:1], 1:6)
@@ -42,29 +49,29 @@ let z = zip([i*j for i in 1:3, j in -1:2:1], 1:6)
                          (1, 4)
                          (2, 5)
                          (3, 6) ]
-    @test eltype(z) == Tuple{Int,Int}
+    @test (@inferred eltype(z)) == Tuple{Int,Int}
     @test_throws DimensionMismatch size(z)
     @test_throws DimensionMismatch axes(z)
-    @test length(z) == 6
+    @test (@inferred length(z)) == 6
 end
 
 let z = zip([i*j for i in 1:3, j in -1:2:1], [i*j for i in 1:3, j in -1:2:1])
     @test collect(z) == [(-1, -1) (1, 1)
                         (-2, -2) (2, 2)
                         (-3, -3) (3, 3)]
-    @test eltype(z) == Tuple{Int,Int}
-    @test size(z) == (3, 2)
-    @test axes(z) == (Base.OneTo(3), Base.OneTo(2))
-    @test length(z) == 6
+    @test (@inferred eltype(z)) == Tuple{Int,Int}
+    @test (@inferred size(z)) == (3, 2)
+    @test (@inferred axes(z)) == (Base.OneTo(3), Base.OneTo(2))
+    @test (@inferred length(z)) == 6
 end
 
 let z = zip(1:2, 3:4, 5:6)
-    @test size(z) == (2,)
+    @test (@inferred size(z)) == (2,)
     @test collect(z) == [(1,3,5), (2,4,6)]
-    @test eltype(z) == Tuple{Int,Int,Int}
+    @test (@inferred eltype(z)) == Tuple{Int,Int,Int}
 end
 
-@test eltype(Iterators.filter(isodd, 1:5)) == Int
+@test (@inferred eltype(Iterators.filter(isodd, 1:5))) == Int
 
 # typed `collect`
 @test collect(Float64, Iterators.filter(isodd, [1,2,3,4]))[1] === 1.0
@@ -98,10 +105,10 @@ let zeb = IOBuffer("1\n2\n3\n4\n5\n"),
     @test res == [(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')]
 end
 
-@test length(zip(cycle(1:3), 1:7)) == 7
-@test length(zip(cycle(1:3), 1:7, cycle(1:3))) == 7
-@test length(zip(1:3,product(1:7,cycle(1:3)))) == 3
-@test length(zip(1:3,product(1:7,cycle(1:3)),8)) == 1
+@test (@inferred length(zip(cycle(1:3), 1:7))) == 7
+@test (@inferred length(zip(cycle(1:3), 1:7, cycle(1:3)))) == 7
+@test (@inferred length(zip(1:3,product(1:7,cycle(1:3))))) == 3
+@test (@inferred length(zip(1:3,product(1:7,cycle(1:3)),8))) == 1
 @test_throws ArgumentError length(zip()) # length of zip of empty tuple
 
 # map
@@ -150,7 +157,7 @@ end
 # take
 # ----
 let t = take(0:2:8, 10), i = 0
-    @test length(collect(t)) == 5 == length(t)
+    @test length(collect(t)) == 5 == @inferred length(t)
 
     for j = t
         @test j == i*2
@@ -167,11 +174,11 @@ let i = 0
     @test i == 10
 end
 
-@test isempty(take(0:2:8, 0))
+@test @inferred isempty(take(0:2:8, 0))
 @test_throws ArgumentError take(0:2:8, -1)
-@test length(take(1:3,typemax(Int))) == 3
-@test length(take(countfrom(1),3)) == 3
-@test length(take(1:6,3)) == 3
+@test (@inferred length(take(1:3,typemax(Int)))) == 3
+@test (@inferred length(take(countfrom(1),3))) == 3
+@test (@inferred length(take(1:6,3))) == 3
 
 # drop
 # ----
@@ -183,15 +190,15 @@ let i = 0
     @test i == 4
 end
 
-@test isempty(drop(0:2:10, 100))
-@test isempty(collect(drop(0:2:10, 100)))
+@test @inferred isempty(drop(0:2:10, 100))
+@test @inferred isempty(collect(drop(0:2:10, 100)))
 @test_throws ArgumentError drop(0:2:8, -1)
-@test length(drop(1:3,typemax(Int))) == 0
-@test length(drop(UInt(1):2, 3)) == 0
-@test length(drop(StepRangeLen(1, 1, UInt(2)), 3)) == 0
-@test Base.IteratorSize(drop(countfrom(1),3)) == Base.IsInfinite()
+@test (@inferred length(drop(1:3,typemax(Int)))) == 0
+@test (@inferred length(drop(UInt(1):2, 3))) == 0
+@test (@inferred length(drop(StepRangeLen(1, 1, UInt(2)), 3))) == 0
+@test (@inferred Base.IteratorSize(drop(countfrom(1),3))) == Base.IsInfinite()
 @test_throws MethodError length(drop(countfrom(1), 3))
-@test Base.IteratorSize(Iterators.drop(Iterators.filter(i -> i>0, 1:10), 2)) == Base.SizeUnknown()
+@test (@inferred Base.IteratorSize(Iterators.drop(Iterators.filter(i -> i>0, 1:10), 2))) == Base.SizeUnknown()
 
 let x = Iterators.drop(Iterators.Stateful("abc"), 2)
     @test !Base.isdone(x, nothing)
@@ -208,7 +215,7 @@ for xs in Any["abc", [1, 2, 3]]
     @test drop(drop(xs, 1), 1) === drop(xs, 2)
     @test take(drop(xs, 1), 1) === drop(take(xs, 2), 1)
     @test take(drop(xs, 3), 0) === drop(take(xs, 2), 3)
-    @test isempty(drop(drop(xs, 2), 2))
+    @test @inferred isempty(drop(drop(xs, 2), 2))
     @test drop(take(drop(xs, 1), 2), 1) === take(drop(xs, 2), 1)
     @test take(drop(take(xs, 3), 1), 1) === take(drop(xs, 1), 1)
 end
@@ -222,7 +229,7 @@ end
     @test collect(takewhile(Returns(true),5:10)) == 5:10
     @test collect(takewhile(isodd,[1,1,2,3])) == [1,1]
     @test collect(takewhile(<(2), takewhile(<(3), [1,1,2,3]))) == [1,1]
-    @test Base.IteratorEltype(typeof(takewhile(<(4),Iterators.map(identity, 1:10)))) isa Base.EltypeUnknown
+    @test (@inferred Base.IteratorEltype(typeof(takewhile(<(4),Iterators.map(identity, 1:10))))) isa Base.EltypeUnknown
 end
 
 # dropwhile
@@ -230,12 +237,29 @@ end
 @testset begin
     @test collect(dropwhile(<(4), 1:10)) == 4:10
     @test collect(dropwhile(<(4), 1:10)) isa Vector{Int}
-    @test isempty(dropwhile(<(4), []))
+    @test @inferred isempty(dropwhile(<(4), []))
     @test collect(dropwhile(Returns(false),1:3)) == 1:3
-    @test isempty(dropwhile(Returns(true), 1:3))
+    @test @inferred isempty(dropwhile(Returns(true), 1:3))
     @test collect(dropwhile(isodd,[1,1,2,3])) == [2,3]
     @test collect(dropwhile(iseven,dropwhile(isodd,[1,1,2,3]))) == [3]
-    @test Base.IteratorEltype(typeof(dropwhile(<(4),Iterators.map(identity, 1:10)))) isa Base.EltypeUnknown
+    @test (@inferred Base.IteratorEltype(typeof(dropwhile(<(4),Iterators.map(identity, 1:10))))) isa Base.EltypeUnknown
+end
+
+# findeach
+# ----------------
+@testset "Iterators.findeach" begin
+    let findeach = Iterators.findeach
+        f = findeach(isnumeric, "abc257wf")
+        @test !(f isa AbstractArray) # it's lazy
+        @test collect(f) == [4,5,6]
+
+        f = findeach(isodd, Dict(1 => 2, 2 => 4, 3 => 6))
+        @test isempty(f)
+        @test isnothing(iterate(f)) # test isempty works correctly
+
+        f = findeach(isodd, Dict(1 => 2, 2 => 3, 3 => 4))
+        @test only(f) == 2
+    end
 end
 
 # cycle
@@ -250,6 +274,22 @@ let i = 0
     @test !Base.isdone(cycle(0:3), 1)
 end
 
+@testset "cycle(iter, n)"  begin
+    @test collect(cycle(0:3, 2)) == [0, 1, 2, 3, 0, 1, 2, 3]
+    @test collect(cycle(Iterators.filter(iseven, 1:4), 2)) == [2, 4, 2, 4]
+    @test collect(take(cycle(countfrom(11), 3), 4)) == 11:14
+
+    @test (@inferred isempty(cycle(1:0))) == (@inferred isempty(cycle(1:0, 3))) == true
+    @test @inferred isempty(cycle(1:5, 0))
+    @test @inferred isempty(cycle(Iterators.filter(iseven, 1:4), 0))
+
+    @test (@inferred eltype(cycle(0:3, 2))) === Int
+    @test (@inferred Base.IteratorEltype(cycle(0:3, 2))) == Base.HasEltype()
+
+    Base.haslength(cycle(0:3, 2)) == false  # but not sure we should test these
+    (@inferred Base.IteratorSize(cycle(0:3, 2))) == Base.SizeUnknown()
+end
+
 # repeated
 # --------
 let i = 0
@@ -266,13 +306,13 @@ let i = 0
         i <= 10 || break
     end
 end
-@test eltype(repeated(0))    == Int
-@test eltype(repeated(0, 5)) == Int
-@test Base.IteratorSize(repeated(0))      == Base.IsInfinite()
-@test Base.IteratorSize(repeated(0, 5))   == Base.HasLength()
-@test Base.IteratorEltype(repeated(0))    == Base.HasEltype()
-@test Base.IteratorEltype(repeated(0, 5)) == Base.HasEltype()
-@test Base.IteratorSize(zip(repeated(0), repeated(0))) == Base.IsInfinite()
+@test (@inferred eltype(repeated(0)))    == Int
+@test (@inferred eltype(repeated(0, 5))) == Int
+@test (@inferred Base.IteratorSize(repeated(0)))      == Base.IsInfinite()
+@test (@inferred Base.IteratorSize(repeated(0, 5)))   == Base.HasLength()
+@test (@inferred Base.IteratorEltype(repeated(0)))    == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(repeated(0, 5))) == Base.HasEltype()
+@test (@inferred Base.IteratorSize(zip(repeated(0), repeated(0)))) == Base.IsInfinite()
 
 # product
 # -------
@@ -284,8 +324,8 @@ for itr in [product(1:0),
             product(1:0, 1:1, 1:2),
             product(1:1, 1:0, 1:2),
             product(1:1, 1:2 ,1:0)]
-    @test isempty(itr)
-    @test isempty(collect(itr))
+    @test @inferred isempty(itr)
+    @test @inferred isempty(collect(itr))
 end
 
 # collect a product - first iterators runs faster
@@ -305,10 +345,10 @@ end
 let (a, b) = (1:3, [4 6;
                     5 7])
     p = product(a, b)
-    @test size(p)    == (3, 2, 2)
-    @test length(p)  == 12
-    @test ndims(p)   == 3
-    @test eltype(p)  == NTuple{2, Int}
+    @test (@inferred size(p))    == (3, 2, 2)
+    @test (@inferred length(p))  == 12
+    @test (@inferred ndims(p))   == 3
+    @test (@inferred eltype(p))  == NTuple{2, Int}
     cp = collect(p)
     for i = 1:3
         @test cp[i, :, :] == [(i, 4) (i, 6);
@@ -336,28 +376,28 @@ let a = 1:2,
     c = Int32(1):Int32(0)
 
     # length
-    @test length(product())        == 1
-    @test length(product(a))       == 2
-    @test length(product(a, b))    == 20
-    @test length(product(a, b, c)) == 0
+    @test (@inferred length(product()))        == 1
+    @test (@inferred length(product(a)))       == 2
+    @test (@inferred length(product(a, b)))    == 20
+    @test (@inferred length(product(a, b, c))) == 0
 
     # size
-    @test size(product())          == tuple()
-    @test size(product(a))         == (2,)
-    @test size(product(a, b))      == (2, 10)
-    @test size(product(a, b, c))   == (2, 10, 0)
+    @test (@inferred size(product()))          == tuple()
+    @test (@inferred size(product(a)))         == (2,)
+    @test (@inferred size(product(a, b)))      == (2, 10)
+    @test (@inferred size(product(a, b, c)))   == (2, 10, 0)
 
     # eltype
-    @test eltype(product())        == Tuple{}
-    @test eltype(product(a))       == Tuple{Int}
-    @test eltype(product(a, b))    == Tuple{Int, Float64}
-    @test eltype(product(a, b, c)) == Tuple{Int, Float64, Int32}
+    @test (@inferred eltype(product()))        == Tuple{}
+    @test (@inferred eltype(product(a)))       == Tuple{Int}
+    @test (@inferred eltype(product(a, b)))    == Tuple{Int, Float64}
+    @test (@inferred eltype(product(a, b, c))) == Tuple{Int, Float64, Int32}
 
     # ndims
-    @test ndims(product())         == 0
-    @test ndims(product(a))        == 1
-    @test ndims(product(a, b))     == 2
-    @test ndims(product(a, b, c))  == 3
+    @test (@inferred ndims(product()))         == 0
+    @test (@inferred ndims(product(a)))        == 1
+    @test (@inferred ndims(product(a, b)))     == 2
+    @test (@inferred ndims(product(a, b, c)))  == 3
 end
 
 # with multidimensional inputs
@@ -377,7 +417,7 @@ let a = randn(4, 4),
                 (4, 4, 3, 3, 3, 2, 2, 2, 2)]
     for (method, fun) in zip([size, ndims, length], [x->x, length, prod])
         for i in 1:length(args)
-            @test method(product(args[i]...)) == method(collect(product(args[i]...))) == fun(sizes[i])
+            @test (@inferred method(product(args[i]...))) == method(collect(product(args[i]...))) == fun(sizes[i])
         end
     end
 end
@@ -393,7 +433,7 @@ let iters = (1:2,
     for method in [size, length, ndims, eltype]
         for i = 1:length(iters)
             args = (iters[i],)
-            @test method(product(args...)) == method(collect(product(args...)))
+            @test (@inferred method(product(args...))) == method(collect(product(args...)))
             for j = 1:length(iters)
                 args = iters[i], iters[j]
                 @test method(product(args...)) == method(collect(product(args...)))
@@ -435,51 +475,51 @@ end
 
 # IteratorSize trait business
 let f1 = Iterators.filter(i->i>0, 1:10)
-    @test Base.IteratorSize(product(f1))               == Base.SizeUnknown()
-    @test Base.IteratorSize(product(1:2, f1))          == Base.SizeUnknown()
-    @test Base.IteratorSize(product(f1, 1:2))          == Base.SizeUnknown()
-    @test Base.IteratorSize(product(f1, f1))           == Base.SizeUnknown()
-    @test Base.IteratorSize(product(f1, countfrom(1))) == Base.IsInfinite()
-    @test Base.IteratorSize(product(countfrom(1), f1)) == Base.IsInfinite()
-end
-@test Base.IteratorSize(product(1:2, countfrom(1)))          == Base.IsInfinite()
-@test Base.IteratorSize(product(countfrom(2), countfrom(1))) == Base.IsInfinite()
-@test Base.IteratorSize(product(countfrom(1), 1:2))          == Base.IsInfinite()
-@test Base.IteratorSize(product(1:2))                        == Base.HasShape{1}()
-@test Base.IteratorSize(product(1:2, 1:2))                   == Base.HasShape{2}()
-@test Base.IteratorSize(product(take(1:2, 1), take(1:2, 1))) == Base.HasShape{2}()
-@test Base.IteratorSize(product(take(1:2, 2)))               == Base.HasShape{1}()
-@test Base.IteratorSize(product([1 2; 3 4]))                 == Base.HasShape{2}()
-@test Base.IteratorSize(product((1,2,3,4), (5, 6, 7, 8)))    == Base.HasShape{2}()  # product of ::HasLength and ::HasLength
-@test Base.IteratorSize(product(1:2, 3:5, 5:6))              == Base.HasShape{3}()  # product of 3 iterators
-@test Base.IteratorSize(product([1 2; 3 4], 1:4))            == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasShape{1}
-@test Base.IteratorSize(product([1 2; 3 4], (1,2)))          == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasLength
+    @test (@inferred Base.IteratorSize(product(f1)))               == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(product(1:2, f1)))          == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(product(f1, 1:2)))          == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(product(f1, f1)))           == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(product(f1, countfrom(1)))) == Base.IsInfinite()
+    @test (@inferred Base.IteratorSize(product(countfrom(1), f1))) == Base.IsInfinite()
+end
+@test (@inferred Base.IteratorSize(product(1:2, countfrom(1))))          == Base.IsInfinite()
+@test (@inferred Base.IteratorSize(product(countfrom(2), countfrom(1)))) == Base.IsInfinite()
+@test (@inferred Base.IteratorSize(product(countfrom(1), 1:2)))          == Base.IsInfinite()
+@test (@inferred Base.IteratorSize(product(1:2)))                        == Base.HasShape{1}()
+@test (@inferred Base.IteratorSize(product(1:2, 1:2)))                   == Base.HasShape{2}()
+@test (@inferred Base.IteratorSize(product(take(1:2, 1), take(1:2, 1)))) == Base.HasShape{2}()
+@test (@inferred Base.IteratorSize(product(take(1:2, 2))))               == Base.HasShape{1}()
+@test (@inferred Base.IteratorSize(product([1 2; 3 4])))                 == Base.HasShape{2}()
+@test (@inferred Base.IteratorSize(product((1,2,3,4), (5, 6, 7, 8))))    == Base.HasShape{2}()  # product of ::HasLength and ::HasLength
+@test (@inferred Base.IteratorSize(product(1:2, 3:5, 5:6)))              == Base.HasShape{3}()  # product of 3 iterators
+@test (@inferred Base.IteratorSize(product([1 2; 3 4], 1:4)))            == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasShape{1}
+@test (@inferred Base.IteratorSize(product([1 2; 3 4], (1,2))))          == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasLength
 
 # IteratorEltype trait business
 let f1 = Iterators.filter(i->i>0, 1:10)
-    @test Base.IteratorEltype(product(f1))               == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(1:2, f1))          == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(f1, 1:2))          == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(f1, f1))           == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(f1, countfrom(1))) == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(countfrom(1), f1)) == Base.HasEltype() # FIXME? eltype(f1) is Any
-end
-@test Base.IteratorEltype(product(1:2, countfrom(1)))          == Base.HasEltype()
-@test Base.IteratorEltype(product(countfrom(1), 1:2))          == Base.HasEltype()
-@test Base.IteratorEltype(product(1:2))                        == Base.HasEltype()
-@test Base.IteratorEltype(product(1:2, 1:2))                   == Base.HasEltype()
-@test Base.IteratorEltype(product(take(1:2, 1), take(1:2, 1))) == Base.HasEltype()
-@test Base.IteratorEltype(product(take(1:2, 2)))               == Base.HasEltype()
-@test Base.IteratorEltype(product([1 2; 3 4]))                 == Base.HasEltype()
-@test Base.IteratorEltype(product())                           == Base.HasEltype()
+    @test (@inferred Base.IteratorEltype(product(f1)))               == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(1:2, f1)))          == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(f1, 1:2)))          == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(f1, f1)))           == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(f1, countfrom(1)))) == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(countfrom(1), f1))) == Base.HasEltype() # FIXME? eltype(f1) is Any
+end
+@test (@inferred Base.IteratorEltype(product(1:2, countfrom(1))))          == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(countfrom(1), 1:2)))          == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(1:2)))                        == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(1:2, 1:2)))                   == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(take(1:2, 1), take(1:2, 1)))) == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(take(1:2, 2))))               == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product([1 2; 3 4])))                 == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product()))                           == Base.HasEltype()
 
 @test collect(product(1:2,3:4)) == [(1,3) (1,4); (2,3) (2,4)]
-@test isempty(collect(product(1:0,1:2)))
-@test length(product(1:2,1:10,4:6)) == 60
-@test Base.IteratorSize(product(1:2, countfrom(1))) == Base.IsInfinite()
+@test @inferred isempty(collect(product(1:0,1:2)))
+@test (@inferred length(product(1:2,1:10,4:6))) == 60
+@test (@inferred Base.IteratorSize(product(1:2, countfrom(1)))) == Base.IsInfinite()
 
 @test Base.iterate(product()) == ((), true)
-@test Base.iterate(product(), 1) == nothing
+@test Base.iterate(product(), 1) === nothing
 
 # intersection
 @test intersect(product(1:3, 4:6), product(2:4, 3:5)) == Iterators.ProductIterator((2:3, 4:5))
@@ -492,21 +532,31 @@ end
 @test collect(flatten(Any[flatten(Any[1:2, 4:5]), flatten(Any[6:7, 8:9])])) == Any[1,2,4,5,6,7,8,9]
 @test collect(flatten(Any[flatten(Any[1:2, 6:5]), flatten(Any[6:7, 8:9])])) == Any[1,2,6,7,8,9]
 @test collect(flatten(Any[2:1])) == Any[]
-@test eltype(flatten(UnitRange{Int8}[1:2, 3:4])) == Int8
-@test length(flatten(zip(1:3, 4:6))) == 6
-@test length(flatten(1:6)) == 6
+@test (@inferred eltype(flatten(UnitRange{Int8}[1:2, 3:4]))) == Int8
+@test (@inferred eltype(flatten(([1, 2], [3.0, 4.0])))) == Real
+@test (@inferred eltype(flatten((a = [1, 2], b = Int8[3, 4])))) == Signed
+@test (@inferred eltype(flatten((Int[], Nothing[], Int[])))) == Union{Int, Nothing}
+@test (@inferred eltype(flatten((String[],)))) == String
+@test (@inferred eltype(flatten((Int[], UInt[], Int8[],)))) == Integer
+@test (@inferred eltype(flatten((; a = Int[], b = Nothing[], c = Int[])))) == Union{Int, Nothing}
+@test (@inferred eltype(flatten((; a = String[],)))) == String
+@test (@inferred eltype(flatten((; a = Int[], b = UInt[], c = Int8[],)))) == Integer
+@test (@inferred eltype(flatten(()))) == Union{}
+@test (@inferred eltype(flatten((;)))) == Union{}
+@test (@inferred length(flatten(zip(1:3, 4:6)))) == 6
+@test (@inferred length(flatten(1:6))) == 6
 @test collect(flatten(Any[])) == Any[]
 @test collect(flatten(())) == Union{}[]
 @test_throws ArgumentError length(flatten(NTuple[(1,), ()])) # #16680
 @test_throws ArgumentError length(flatten([[1], [1]]))
 
 @testset "IteratorSize trait for flatten" begin
-    @test Base.IteratorSize(Base.Flatten((i for i=1:2) for j=1:1)) == Base.SizeUnknown()
-    @test Base.IteratorSize(Base.Flatten((1,2))) == Base.HasLength()
-    @test Base.IteratorSize(Base.Flatten(1:2:4)) == Base.HasLength()
+    @test (@inferred Base.IteratorSize(Base.Flatten((i for i=1:2) for j=1:1))) == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(Base.Flatten((1,2)))) == Base.HasLength()
+    @test (@inferred Base.IteratorSize(Base.Flatten(1:2:4))) == Base.HasLength()
 end
 
-@test Base.IteratorEltype(Base.Flatten((i for i=1:2) for j=1:1)) == Base.EltypeUnknown()
+@test (@inferred Base.IteratorEltype(Base.Flatten((i for i=1:2) for j=1:1))) == Base.EltypeUnknown()
 # see #29112, #29464, #29548
 @test Base.return_types(Base.IteratorEltype, Tuple{Array}) == [Base.HasEltype]
 
@@ -626,21 +676,21 @@ end
     @test_throws ArgumentError partition(1:10, -1)
     @test_throws ArgumentError partition(1:0, 0)
     @test_throws ArgumentError partition(1:0, -1)
-    @test isempty(partition(1:0, 1))
-    @test isempty(partition(CartesianIndices((0,1)), 1))
+    @test @inferred isempty(partition(1:0, 1))
+    @test @inferred isempty(partition(CartesianIndices((0,1)), 1))
 end
 @testset "exact partition eltypes" for a in (Base.OneTo(24), 1:24, 1:1:24, LinRange(1,10,24), .1:.1:2.4, Vector(1:24),
                                              CartesianIndices((4, 6)), Dict((1:24) .=> (1:24)))
     P = partition(a, 2)
-    @test eltype(P) === typeof(first(P))
-    @test Iterators.IteratorEltype(P) == Iterators.HasEltype()
+    @test (@inferred eltype(P)) === typeof(first(P))
+    @test (@inferred Iterators.IteratorEltype(P)) == Iterators.HasEltype()
     if a isa AbstractArray
         P = partition(vec(a), 2)
-        @test eltype(P) === typeof(first(P))
+        @test (@inferred eltype(P)) === typeof(first(P))
         P = partition(reshape(a, 6, 4), 2)
-        @test eltype(P) === typeof(first(P))
+        @test (@inferred eltype(P)) === typeof(first(P))
         P = partition(reshape(a, 2, 3, 4), 2)
-        @test eltype(P) === typeof(first(P))
+        @test (@inferred eltype(P)) === typeof(first(P))
     end
 end
 
@@ -661,19 +711,19 @@ let s = "Monkey 🙈🙊🙊"
     @test tf(1) == "M|o|n|k|e|y| |🙈|🙊|🙊"
 end
 
-@test Base.IteratorEltype(partition([1,2,3,4], 2)) == Base.HasEltype()
-@test Base.IteratorEltype(partition((2x for x in 1:3), 2)) == Base.EltypeUnknown()
+@test (@inferred Base.IteratorEltype(partition([1,2,3,4], 2))) == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(partition((2x for x in 1:3), 2))) == Base.EltypeUnknown()
 
 # take and friends with arbitrary integers (#19214)
 for T in (UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int128, BigInt)
-    @test length(take(1:6, T(3))) == 3
-    @test length(drop(1:6, T(3))) == 3
-    @test length(repeated(1, T(5))) == 5
+    @test (@inferred length(take(1:6, T(3)))) == 3
+    @test (@inferred length(drop(1:6, T(3)))) == 3
+    @test (@inferred length(repeated(1, T(5)))) == 5
     @test collect(partition(1:5, T(5)))[1] == 1:5
 end
 
 @testset "collect finite iterators issue #12009" begin
-    @test eltype(collect(enumerate(Iterators.Filter(x -> x>0, randn(10))))) == Tuple{Int, Float64}
+    @test (@inferred eltype(collect(enumerate(Iterators.Filter(x -> x>0, randn(10)))))) == Tuple{Int, Float64}
 end
 
 @testset "product iterator infinite loop" begin
@@ -682,8 +732,8 @@ end
 
 @testset "filter empty iterable #16704" begin
     arr = filter(Returns(true), 1:0)
-    @test length(arr) == 0
-    @test eltype(arr) == Int
+    @test (@inferred length(arr)) == 0
+    @test (@inferred eltype(arr)) == Int
 end
 
 @testset "Pairs type" begin
@@ -697,19 +747,19 @@ end
              )
         d = pairs(A)
         @test d === pairs(d)
-        @test isempty(d) == isempty(A)
-        @test length(d) == length(A)
+        @test (@inferred isempty(d)) == isempty(A)
+        @test (@inferred length(d)) == length(A)
         @test keys(d) == keys(A)
         @test values(d) == A
-        @test Base.IteratorSize(d) == Base.IteratorSize(A)
-        @test Base.IteratorEltype(d) == Base.HasEltype()
-        @test Base.IteratorSize(pairs([1 2;3 4])) isa Base.HasShape{2}
-        @test isempty(d) || haskey(d, first(keys(d)))
+        @test (@inferred Base.IteratorSize(d)) == Base.IteratorSize(A)
+        @test (@inferred Base.IteratorEltype(d)) == Base.HasEltype()
+        @test (@inferred Base.IteratorSize(pairs([1 2;3 4]))) isa Base.HasShape{2}
+        @test (@inferred isempty(d)) || haskey(d, first(keys(d)))
         @test collect(v for (k, v) in d) == collect(A)
         if A isa NamedTuple
             K = Symbol
             V = isempty(d) ? Union{} : Float64
-            @test isempty(d) || haskey(d, :a)
+            @test (@inferred isempty(d)) || haskey(d, :a)
             @test !haskey(d, :abc)
             @test !haskey(d, 1)
             @test get(A, :key) do; 99; end == 99
@@ -729,7 +779,7 @@ end
         end
         @test keytype(d) == K
         @test valtype(d) == V
-        @test eltype(d) == Pair{K, V}
+        @test (@inferred eltype(d)) == Pair{K, V}
     end
 
     let io = IOBuffer()
@@ -776,7 +826,7 @@ end
 
 @testset "Iterators.Stateful" begin
     let a = @inferred(Iterators.Stateful("abcdef"))
-        @test !isempty(a)
+        @test !(@inferred isempty(a))
         @test popfirst!(a) == 'a'
         @test collect(Iterators.take(a, 3)) == ['b','c','d']
         @test collect(a) == ['e', 'f']
@@ -787,70 +837,72 @@ end
         @test peek(a) == 3
         @test sum(a) == 7
     end
-    @test eltype(Iterators.Stateful("a")) == Char
+    @test (@inferred eltype(Iterators.Stateful("a"))) == Char
     # Interaction of zip/Stateful
     let a = Iterators.Stateful("a"), b = ""
-    @test isempty(collect(zip(a,b)))
-    @test !isempty(a)
-    @test isempty(collect(zip(b,a)))
-    @test !isempty(a)
+    @test @inferred isempty(collect(zip(a,b)))
+    @test !(@inferred isempty(a))
+    @test @inferred isempty(collect(zip(b,a)))
+    @test !(@inferred isempty(a))
     end
     let a = Iterators.Stateful("a"), b = "", c = Iterators.Stateful("c")
-        @test isempty(collect(zip(a,b,c)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(a,c,b)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(b,a,c)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(b,c,a)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(c,a,b)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(c,b,a)))
-        @test !isempty(a)
-        @test !isempty(c)
+        @test @inferred isempty(collect(zip(a,b,c)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(a,c,b)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(b,a,c)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(b,c,a)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(c,a,b)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(c,b,a)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(a,b,c))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(a,b,c)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(a,c,b))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(a,c,b)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(b,a,c))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(b,a,c)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(b,c,a))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(b,c,a)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(c,a,b))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(c,a,b)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(c,b,a))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(c,b,a)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let z = zip(Iterators.Stateful("ab"), Iterators.Stateful("b"), Iterators.Stateful("c"))
         v, s = iterate(z)
         @test Base.isdone(z, s)
     end
-    # Stateful wrapping mutable iterators of known length (#43245)
-    @test length(Iterators.Stateful(Iterators.Stateful(1:5))) == 5
+    # Stateful does not define length
+    let s = Iterators.Stateful(Iterators.Stateful(1:5))
+        @test_throws MethodError length(s)
+    end
 end
 
 @testset "pair for Svec" begin
@@ -862,6 +914,10 @@ end
 @testset "inference for large zip #26765" begin
     x = zip(1:2, ["a", "b"], (1.0, 2.0), Base.OneTo(2), Iterators.repeated("a"), 1.0:0.2:2.0,
             (1 for i in 1:2), Iterators.Stateful(["a", "b", "c"]), (1.0 for i in 1:2, j in 1:3))
+    @test (@inferred Base.IteratorSize(x)) isa Base.SizeUnknown
+    x = zip(1:2, ["a", "b"], (1.0, 2.0), Base.OneTo(2), Iterators.repeated("a"), 1.0:0.2:2.0,
+            (1 for i in 1:2), Iterators.cycle(Iterators.Stateful(["a", "b", "c"])), (1.0 for i in 1:2, j in 1:3))
+    @test (@inferred Base.IteratorSize(x)) isa Base.HasLength
     @test @inferred(length(x)) == 2
     z = Iterators.filter(x -> x[1] >= 1, x)
     @test @inferred(eltype(z)) <: Tuple{Int,String,Float64,Int,String,Float64,Any,String,Any}
@@ -870,23 +926,23 @@ end
 end
 
 @testset "Stateful fix #30643" begin
-    @test Base.IteratorSize(1:10) isa Base.HasShape
+    @test (@inferred Base.IteratorSize(1:10)) isa Base.HasShape{1}
     a = Iterators.Stateful(1:10)
-    @test Base.IteratorSize(a) isa Base.HasLength
-    @test length(a) == 10
+    @test (@inferred Base.IteratorSize(a)) isa Base.SizeUnknown
+    @test !Base.isdone(a)
     @test length(collect(a)) == 10
-    @test length(a) == 0
+    @test Base.isdone(a)
     b = Iterators.Stateful(Iterators.take(1:10,3))
-    @test Base.IteratorSize(b) isa Base.HasLength
-    @test length(b) == 3
+    @test (@inferred Base.IteratorSize(b)) isa Base.SizeUnknown
+    @test !Base.isdone(b)
     @test length(collect(b)) == 3
-    @test length(b) == 0
+    @test Base.isdone(b)
     c = Iterators.Stateful(Iterators.countfrom(1))
-    @test Base.IteratorSize(c) isa Base.IsInfinite
-    @test length(Iterators.take(c,3)) == 3
+    @test (@inferred Base.IteratorSize(c)) isa Base.IsInfinite
+    @test !Base.isdone(Iterators.take(c,3))
     @test length(collect(Iterators.take(c,3))) == 3
     d = Iterators.Stateful(Iterators.filter(isodd,1:10))
-    @test Base.IteratorSize(d) isa Base.SizeUnknown
+    @test (@inferred Base.IteratorSize(d)) isa Base.SizeUnknown
     @test length(collect(Iterators.take(d,3))) == 3
     @test length(collect(d)) == 2
     @test length(collect(d)) == 0
@@ -928,7 +984,7 @@ end
 end
 
 @testset "flatten empty tuple" begin
-    @test isempty(collect(Iterators.flatten(())))
+    @test @inferred isempty(collect(Iterators.flatten(())))
 end
 
 @testset "Iterators.accumulate" begin
@@ -940,10 +996,10 @@ end
     @test collect(Iterators.accumulate(+, (x for x in [true])))::Vector{Int} == [1]
     @test collect(Iterators.accumulate(+, (x for x in [true, true, false])))::Vector{Int} == [1, 2, 2]
     @test collect(Iterators.accumulate(+, (x for x in [true]), init=10.0))::Vector{Float64} == [11.0]
-    @test length(Iterators.accumulate(+, [10,20,30])) == 3
-    @test size(Iterators.accumulate(max, rand(2,3))) == (2,3)
-    @test Base.IteratorSize(Iterators.accumulate(max, rand(2,3))) === Base.IteratorSize(rand(2,3))
-    @test Base.IteratorEltype(Iterators.accumulate(*, ())) isa Base.EltypeUnknown
+    @test (@inferred length(Iterators.accumulate(+, [10,20,30]))) == 3
+    @test (@inferred size(Iterators.accumulate(max, rand(2,3)))) == (2,3)
+    @test (@inferred Base.IteratorSize(Iterators.accumulate(max, rand(2,3)))) === Base.IteratorSize(rand(2,3))
+    @test (@inferred Base.IteratorEltype(Iterators.accumulate(*, ()))) isa Base.EltypeUnknown
 end
 
 @testset "Base.accumulate" begin
@@ -952,21 +1008,71 @@ end
     @test accumulate(+, (x^2 for x in 1:3); init=100) == [101, 105, 114]
 end
 
-
-@testset "Iterators.tail_if_any" begin
-    @test Iterators.tail_if_any(()) == ()
-    @test Iterators.tail_if_any((1, 2)) == (2,)
-    @test Iterators.tail_if_any((1,)) == ()
+@testset "issue #58109" begin
+    i = Iterators.map(identity, 3)
+    j = Iterators.map(sqrt, 7)
+    @test (@inferred Base.IteratorSize(i)) === @inferred Base.IteratorSize(eltype([i, j]))
 end
 
 @testset "IteratorSize trait for zip" begin
-    @test Base.IteratorSize(zip()) == Base.IsInfinite()                     # for zip of empty tuple
-    @test Base.IteratorSize(zip((1,2,3), repeated(0))) == Base.HasLength()  # for zip of ::HasLength and ::IsInfinite
-    @test Base.IteratorSize(zip( 1:5, repeated(0) )) == Base.HasLength()    # for zip of ::HasShape and ::IsInfinite
-    @test Base.IteratorSize(zip(repeated(0), (1,2,3))) == Base.HasLength()  # for zip of ::IsInfinite and ::HasLength
-    @test Base.IteratorSize(zip(repeated(0), 1:5 )) == Base.HasLength()     # for zip of ::IsInfinite and ::HasShape
-    @test Base.IteratorSize(zip((1,2,3), 1:5) ) == Base.HasLength()         # for zip of ::HasLength and ::HasShape
-    @test Base.IteratorSize(zip(1:5, (1,2,3)) ) == Base.HasLength()         # for zip of ::HasShape and ::HasLength
+    @test (@inferred Base.IteratorSize(zip())) == Base.IsInfinite()                     # for zip of empty tuple
+    @test (@inferred Base.IteratorSize(zip((1,2,3), repeated(0)))) == Base.HasLength()  # for zip of ::HasLength and ::IsInfinite
+    @test (@inferred Base.IteratorSize(zip( 1:5, repeated(0) ))) == Base.HasLength()    # for zip of ::HasShape and ::IsInfinite
+    @test (@inferred Base.IteratorSize(zip(repeated(0), (1,2,3)))) == Base.HasLength()  # for zip of ::IsInfinite and ::HasLength
+    @test (@inferred Base.IteratorSize(zip(repeated(0), 1:5 ))) == Base.HasLength()     # for zip of ::IsInfinite and ::HasShape
+    @test (@inferred Base.IteratorSize(zip((1,2,3), 1:5) )) == Base.HasLength()         # for zip of ::HasLength and ::HasShape
+    @test (@inferred Base.IteratorSize(zip(1:5, (1,2,3)) )) == Base.HasLength()         # for zip of ::HasShape and ::HasLength
+end
+
+@testset "foldability inference" begin
+    functions = (eltype, Base.IteratorSize, Base.IteratorEltype)
+    helper(type::UnionAll) = (type{n} for n ∈ 1:10) # helper for trying with multiple iterator counts
+    iterator_types = (  # each element here takes an iterator type as first parameter
+        Base.Generator,
+        Iterators.Reverse,
+        Iterators.Enumerate,
+        Iterators.Filter{F, I} where {I, F},
+        Iterators.Accumulate{F, I} where {I, F},
+        Iterators.Rest,
+        Iterators.Count,
+        Iterators.Take,
+        Iterators.Drop,
+        Iterators.TakeWhile,
+        Iterators.DropWhile,
+        Iterators.Cycle,
+        Iterators.Repeated,
+        Iterators.PartitionIterator,
+        Iterators.Stateful,
+        helper(Iterators.ProductIterator{Tuple{Vararg{I, N}}} where {N, I})...,
+    )
+    iterator_types_extra = (
+        iterator_types...,
+        helper(Iterators.Zip{Tuple{Vararg{I, N}}} where {N, I})...,
+        helper(Iterators.Flatten{Tuple{Vararg{I, N}}} where {N, I})...,
+    )
+    simple_types = (Vector, NTuple, NamedTuple{X, Y} where {X, Y <: NTuple})
+    example_type = Tuple{Bool, Int8, Vararg{Int16, 20}}
+    function test_foldability_inference(f, S::Type)
+        @test Core.Compiler.is_foldable(Base.infer_effects(f, Tuple{S}))
+        @test Core.Compiler.is_foldable(Base.infer_effects(f, Tuple{Type{<:S}}))
+    end
+    @testset "concrete" begin  # weaker test, only checks foldability for certain concrete types
+        @testset "f: $f" for f ∈ functions
+            for U ∈ iterator_types_extra
+                test_foldability_inference(f, U{example_type})
+            end
+        end
+    end
+    @testset "nonconcrete" begin  # stronger test, checks foldability for large families of types
+        @testset "f: $f" for f ∈ functions
+            for V ∈ simple_types
+                test_foldability_inference(f, V)  # sanity check
+                for U ∈ iterator_types
+                    test_foldability_inference(f, U{<:V})
+                end
+            end
+        end
+    end
 end
 
 @testset "proper partition for non-1-indexed vector" begin
@@ -974,7 +1080,7 @@ end
 end
 
 @testset "Iterators.peel" begin
-    @test Iterators.peel([]) == nothing
+    @test Iterators.peel([]) === nothing
     @test Iterators.peel(1:10)[1] == 1
     @test Iterators.peel(1:10)[2] |> collect == 2:10
     @test Iterators.peel(x^2 for x in 2:4)[1] == 4
@@ -984,12 +1090,13 @@ end
 @testset "last for iterators" begin
     @test last(Iterators.map(identity, 1:3)) == 3
     @test last(Iterators.filter(iseven, (Iterators.map(identity, 1:3)))) == 2
+    @test last(enumerate(Iterators.flatten((1,2,3)))) == (3,3)
 end
 
 @testset "isempty and isdone for Generators" begin
     itr = eachline(IOBuffer("foo\n"))
     gen = (x for x in itr)
-    @test !isempty(gen)
+    @test !(@inferred isempty(gen))
     @test !Base.isdone(gen)
     @test collect(gen) == ["foo"]
 end
@@ -1002,6 +1109,146 @@ end
     @test v == ()
 end
 
+@testset "collect partition substring" begin
+    @test collect(Iterators.partition(lstrip("01111", '0'), 2)) == ["11", "11"]
+end
+
+@testset "IterableStringPairs" begin
+    for s in ["", "a", "abcde", "γ", "∋γa"]
+        for T in (String, SubString, GenericString)
+            sT = T(s)
+            p = pairs(sT)
+            @test collect(p) == [k=>v for (k,v) in zip(keys(sT), sT)]
+            rv = Iterators.reverse(p)
+            @test collect(rv) == reverse([k=>v for (k,v) in zip(keys(sT), sT)])
+            rrv = Iterators.reverse(rv)
+            @test collect(rrv) == collect(p)
+        end
+    end
+end
+
+let itr = (i for i in 1:9) # Base.eltype == Any
+    @test first(Iterators.partition(itr, 3)) isa Vector{Any}
+    @test collect(zip(repeat([Iterators.Stateful(itr)], 3)...)) == [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
+end
+
+@testset "map/reduce/mapreduce without an iterator argument" begin
+    maps = map(Returns, (nothing, 3, 3:2, 3:3, (), (3,)))
+    mappers1 = (Iterators.map, map, foreach, reduce, foldl, foldr)
+    mappers2 = (mapreduce, mapfoldl, mapfoldr)
+
+    @testset "map/reduce" begin
+        @testset "r: $r" for r ∈ mappers1
+            @testset "f: $f" for f ∈ maps
+                @test_throws MethodError r(f)
+                @test !applicable(r, f)
+                @test !hasmethod(r, Tuple{typeof(f)})
+            end
+        end
+    end
+
+    @testset "mapreduce" begin
+        @testset "mr: $mr" for mr ∈ mappers2
+            @testset "f: $f" for f ∈ maps
+                @testset "g: $g" for g ∈ maps
+                    @test_throws MethodError mr(f, g)
+                    @test !applicable(mr, f, g)
+                    @test !hasmethod(mr, Tuple{typeof(f),typeof(g)})
+                end
+            end
+        end
+    end
+end
+
+@testset "nth" begin
+    Z = Array{Int,0}(undef)
+    Z[] = 17
+    it_result_pairs = Dict(
+        (Z, 1) => 17,
+        (collect(1:100), 23) => 23,
+        (10:6:1000, 123) => 10 + 6 * 122,
+        ("∀ϵ>0", 3) => '>',
+        ((1, 3, 5, 10, 78), 2) => 3,
+        (reshape(1:30, (5, 6)), 21) => 21,
+        (3, 1) => 3,
+        (true, 1) => true,
+        ('x', 1) => 'x',
+        (4 => 5, 2) => 5,
+        (view(Z), 1) => 17,
+        (view(reshape(1:30, (5, 6)), 2:4, 2:6), 10) => 22,
+        ((x^2 for x in 1:10), 9) => 81,
+        (Iterators.Filter(isodd, 1:10), 3) => 5,
+        (Iterators.flatten((1:10, 50:60)), 15) => 54,
+        (pairs(50:60), 7) => 7 => 56,
+        (zip(1:10, 21:30, 51:60), 6) => (6, 26, 56),
+        (Iterators.product(1:3, 10:12), 3) => (3, 10),
+        (Iterators.repeated(3.14159, 5), 4) => 3.14159,
+        ((a=2, b=3, c=5, d=7, e=11), 4) => 7,
+        (Iterators.cycle(collect(1:100)), 9999) => 99,
+        (Iterators.cycle([1, 2, 3, 4, 5], 5), 25) => 5,
+        (Iterators.cycle("String", 10), 16) => 'i',
+        (Iterators.cycle(((),)), 1000) => ()
+    )
+
+    @testset "iter: $IT" for (IT, n) in keys(it_result_pairs)
+        @test it_result_pairs[(IT, n)] == nth(IT, n)
+        @test_throws BoundsError nth(IT, -42)
+
+        IT isa Iterators.Cycle && continue # cycles are infinite so never OOB
+        @test_throws BoundsError nth(IT, 999999999)
+    end
+
+    empty_cycle = Iterators.cycle([])
+    @test_throws BoundsError nth(empty_cycle, 42)
+
+    # test the size unknown branch for cycles
+    # only generate odd numbers so we know the actual length
+    # but the iterator is still SizeUnknown()
+    it_size_unknown = Iterators.filter(isodd, 1:2:10)
+    @test Base.IteratorSize(it_size_unknown) isa Base.SizeUnknown
+    @test length(collect(it_size_unknown)) == 5
+
+    cycle_size_unknown = Iterators.cycle(it_size_unknown)
+    finite_cycle_size_unknown = Iterators.cycle(it_size_unknown, 5)
+    @test nth(cycle_size_unknown, 2) == 3
+    @test nth(cycle_size_unknown, 20) == 9 # mod1(20, 5) = 5, wraps 4 times
+    @test nth(finite_cycle_size_unknown, 2) == 3
+    @test nth(finite_cycle_size_unknown, 20) == 9
+    @test_throws BoundsError nth(finite_cycle_size_unknown, 30) # only wraps 5 times, max n is 5 * 5 = 25
+end
+
+@testset "Iterators docstrings" begin
+    @test isempty(Docs.undocumented_names(Iterators))
+end
+
+# Filtered list comprehension (`Filter` construct) type inference
+@test Base.infer_return_type((Vector{Any},)) do xs
+    [x for x in xs if x isa Int]
+end == Vector{Int}
+
+@testset "issue #58922" begin
+    # `last` short circuits correctly
+    @test last(zip(1:10, 2:11)) == (10, 11)  # same length
+    @test last(zip(1:3, 2:11)) == (3, 4)     # different length
+
+    # Finite-guarded zip iterator: one iterator bounded and the other is not
+    @test last(zip(1:3, Iterators.countfrom(2))) == (3, 4)
+    @test last(zip(1:3, Iterators.cycle(('x', 'y')))) == (3, 'x')
+    @test last(zip(1:3, Iterators.repeated('x'))) == (3, 'x')
+    @test last(zip(OffsetArray(1:10, 2), OffsetArray(1:10, 3))) == (10, 10)
+
+    # Cannot statically know length of zipped iterator if any of its components are of
+    # unknown length
+    @test_throws MethodError last(zip(1:3, Iterators.filter(x -> x > 0, -5:5))) # (3, 3)
+    @test_throws MethodError last(zip(Iterators.filter(x -> x > 0, -5:5), 1:3)) # (3, 3)
+    @test_throws MethodError last(zip(1:10, Iterators.filter(x -> x > 0, -5:5))) # (5, 5)
+
+    # We also can't know the length of zipped iterators when all constituents are of an
+    # unknown length.  In this test, the answer is (5, 4), but we can't know that without
+    # a greedy algorithm
+    @test_throws MethodError last(zip(Iterators.filter(x -> x > 0, -5:5), Iterators.filter(x -> x % 2 == 0, -5:5)))  # (5, 4)
+end
+
 @testset "unzip" begin
     for itrs in ((1:3,), (1:3, 4:6), (1:3,4:6,7:9),
                  ((), ()), (Bool[], Int8[]),
diff --git a/test/keywordargs.jl b/test/keywordargs.jl
index 0aed0544b7e2e..43013ab1d721e 100644
--- a/test/keywordargs.jl
+++ b/test/keywordargs.jl
@@ -394,3 +394,9 @@ let m = first(methods(Core.kwcall, (NamedTuple,typeof(kwf1),Vararg)))
     @test Core.kwcall(1) == "hi 1"
     @test which(Core.kwcall, (Int,)).name === :kwcall
 end
+
+# issue #50518
+function f50518(xs...=["a", "b", "c"]...; debug=false)
+    return xs[1]
+end
+@test f50518() == f50518(;debug=false) == "a"
diff --git a/test/llvmcall.jl b/test/llvmcall.jl
index f7f6b44b29e62..9fd0505e24319 100644
--- a/test/llvmcall.jl
+++ b/test/llvmcall.jl
@@ -70,13 +70,13 @@ end
        ret i32 %3""", Int32, Tuple{Int32, Int32},
         Int32(1), Int32(2))) # llvmcall must be compiled to be called
 
-# Test whether declarations work properly
+#Since LLVM 18, LLVM does a best effort to automatically include the intrinsics
 function undeclared_ceil(x::Float64)
     llvmcall("""%2 = call double @llvm.ceil.f64(double %0)
         ret double %2""", Float64, Tuple{Float64}, x)
 end
-@test_throws ErrorException undeclared_ceil(4.2)
-@test_throws ErrorException undeclared_ceil(4.2)
+@test undeclared_ceil(4.2) == 5.0
+@test undeclared_ceil(4.2) == 5.0
 
 function declared_floor(x::Float64)
     llvmcall(
@@ -143,36 +143,6 @@ function call_jl_errno()
 end
 call_jl_errno()
 
-module ObjLoadTest
-    using Base: llvmcall, @ccallable
-    using Test
-    didcall = false
-    @ccallable Cvoid function jl_the_callback()
-        global didcall
-        didcall = true
-        nothing
-    end
-    @test_throws(ErrorException("@ccallable was already defined for this method name"),
-                 @eval @ccallable Cvoid jl_the_callback(not_the_method::Int) = "other")
-    # Make sure everything up until here gets compiled
-    @test jl_the_callback() === nothing
-    @test jl_the_callback(1) == "other"
-    didcall = false
-    function do_the_call()
-        llvmcall(
-            ("""declare void @jl_the_callback()
-                define void @entry() #0 {
-                0:
-                    call void @jl_the_callback()
-                    ret void
-                }
-                attributes #0 = { alwaysinline }
-            """, "entry"),Cvoid,Tuple{})
-    end
-    do_the_call()
-    @test didcall
-end
-
 # Test for proper parenting
 local foo
 function foo()
@@ -185,26 +155,6 @@ function foo()
 end
 code_llvm(devnull, foo, ())
 
-module CcallableRetTypeTest
-    using Base: llvmcall, @ccallable
-    using Test
-    @ccallable function jl_test_returns_float()::Float64
-        return 42
-    end
-    function do_the_call()
-        llvmcall(
-            ("""declare double @jl_test_returns_float()
-                define double @entry() #0 {
-                0:
-                    %1 = call double @jl_test_returns_float()
-                    ret double %1
-                }
-                attributes #0 = { alwaysinline }
-            """, "entry"),Float64,Tuple{})
-    end
-    @test do_the_call() === 42.0
-end
-
 # Issue #48093 - test that non-external globals are not deduplicated
 function kernel()
     Base.llvmcall(("""
diff --git a/test/llvmcall2.jl b/test/llvmcall2.jl
index 07b27fc407433..e3e89bb916f2d 100644
--- a/test/llvmcall2.jl
+++ b/test/llvmcall2.jl
@@ -73,3 +73,12 @@ end
     jl_str = unsafe_string(str)
     @test length(jl_str) > 4
 end
+
+
+# boolean structs
+const NT4I = NTuple{4, VecElement{Int}}
+const NT4B = NTuple{4, VecElement{Bool}}
+f_nt4b(x, y) = ccall("llvm.sadd.with.overflow", llvmcall, Pair{NT4B, NT4B}, (NT4B, NT4B), x, y)
+f_nt4i(x, y) = ccall("llvm.sadd.with.overflow", llvmcall, Pair{NT4I, NT4B}, (NT4I, NT4I), x, y)
+@test f_nt4b((false, true, false, true), (false, false, true, true)) === (NT4B((false, true, true, false)) => NT4B((false, false, false, true)))
+@test f_nt4i((typemin(Int), 0, typemax(Int), typemax(Int)), (-1, typemax(Int),-1, 1)) === (NT4I((typemax(Int), typemax(Int), typemax(Int)-1, typemin(Int))) => NT4B((true, false, false, true)))
diff --git a/test/llvmpasses/Makefile b/test/llvmpasses/Makefile
index 7318d1b67da02..d9fdfa190f3cf 100644
--- a/test/llvmpasses/Makefile
+++ b/test/llvmpasses/Makefile
@@ -30,4 +30,7 @@ update-help:
 	$(JULIAHOME)/deps/srccache/llvm/llvm/utils/update_test_checks.py \
 	--help
 
-.PHONY: $(TESTS) $(addprefix update-,$(TESTS_ll)) check all .
+clean:
+	rm -rf .lit_test_times.txt Output
+
+.PHONY: $(TESTS) $(addprefix update-,$(TESTS_ll)) check all clean update-help .
diff --git a/test/llvmpasses/alloc-opt-bits.ll b/test/llvmpasses/alloc-opt-bits.ll
new file mode 100644
index 0000000000000..e19093f46f815
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-bits.ll
@@ -0,0 +1,37 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s 
+
+
+@tag = external addrspace(10) global {}
+
+@glob = external addrspace(10) global {}
+
+; Test that the gc_preserve intrinsics are deleted directly.
+
+; CHECK-LABEL: @ptr_and_bits
+; CHECK-NOT: alloca 
+; CHECK: call noalias ptr addrspace(10) @julia.gc_alloc_obj
+
+define void @ptr_and_bits(ptr %fptr, i1 %b, i1 %b2, i32 %idx) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 16, ptr addrspace(10) @tag)
+  
+  %g0 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 1
+  store ptr addrspace(10) @glob, ptr addrspace(10) %g0
+  
+  %g1 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 0
+  store i64 7, ptr addrspace(10) %g1
+
+  %res = load ptr addrspace(10), ptr addrspace(10) %g0
+  %res2 = load i64, ptr addrspace(10) %g1
+  ret void
+}
+
+declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))
+
+declare ptr @julia.ptls_states()
+
+declare ptr @julia.get_pgcstack()
diff --git a/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
index b33f2cdac7dd4..b96c9385e38eb 100644
--- a/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
+++ b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -19,15 +15,10 @@ declare {}* @julia.pointer_from_objref({} addrspace(11)*)
 ; Test that non-0 addrspace allocas are properly emitted and handled
 
 ; CHECK-LABEL: @non_zero_addrspace
-; CHECK: %1 = alloca i32, align 8, addrspace(5)
-
-; TYPED: %2 = bitcast i32 addrspace(5)* %1 to i8 addrspace(5)*
-; TYPED: %3 = bitcast i8 addrspace(5)* %2 to {} addrspace(5)*
-; TYPED: %var1 = addrspacecast {} addrspace(5)* %3 to {} addrspace(10)*
-; TYPED: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %2)
 
-; OPAQUE: %var1 = addrspacecast ptr addrspace(5) %1 to ptr addrspace(10)
-; OPAQUE: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %1)
+; OPAQUE: %var1 = alloca i32, align 8, addrspace(5)
+; OPAQUE: %1 = addrspacecast ptr addrspace(5) %var1 to ptr
+; OPAQUE: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %var1)
 
 ; CHECK: ret void
 define void @non_zero_addrspace() {
diff --git a/test/llvmpasses/alloc-opt-gcframe.ll b/test/llvmpasses/alloc-opt-gcframe.ll
index a04d6566cec0a..f53a4d5c01df7 100644
--- a/test/llvmpasses/alloc-opt-gcframe.ll
+++ b/test/llvmpasses/alloc-opt-gcframe.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -13,18 +9,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-LABEL: @return_obj
 ; CHECK-NOT: @julia.gc_alloc_obj
 
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16)
-; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
-
 ; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %gcstack, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
+; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
 ; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16)
+; OPAQUE-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 
 define {} addrspace(10)* @return_obj() {
@@ -39,8 +27,7 @@ define {} addrspace(10)* @return_obj() {
 ; CHECK-LABEL: @return_load
 ; CHECK: alloca i64
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
-; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
 ; CHECK-NOT: @tag
 ; CHECK-NOT: @llvm.lifetime.end
@@ -59,11 +46,9 @@ define i64 @return_load(i64 %i) {
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @ccall_obj
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK: @ijl_gc_pool_alloc
-; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+; CHECK: @ijl_gc_small_alloc
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_obj(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -78,17 +63,12 @@ define void @ccall_obj(i8* %fptr) {
 
 ; CHECK-LABEL: @ccall_ptr
 ; CHECK: alloca i64
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
-; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
-; TYPED: %f = bitcast i8* %fptr to void (i8*)*
-
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
 ; OPAQUE: %f = bitcast ptr %fptr to ptr
 ; Currently the GC frame lowering pass strips away all operand bundles
-; TYPED-NEXT: call void %f(i8*
 ; OPAQUE-NEXT: call void %f(ptr
 ; CHECK-NEXT: ret void
 define void @ccall_ptr(i8* %fptr) {
@@ -106,11 +86,9 @@ define void @ccall_ptr(i8* %fptr) {
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @ccall_unknown_bundle
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK: @ijl_gc_pool_alloc
-; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+; CHECK: @ijl_gc_small_alloc
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_unknown_bundle(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -128,13 +106,10 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 
 ; CHECK-LABEL: @lifetime_branches
 ; CHECK: alloca i64
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK: L1:
 ; CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8,
 
-; TYPED: %f = bitcast i8* %fptr to void (i8*)*
-; TYPED-NEXT: call void %f(i8*
 
 ; OPAQUE: %f = bitcast ptr %fptr to ptr
 ; OPAQUE-NEXT: call void %f(ptr
@@ -142,10 +117,8 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 ; CHECK-NEXT: br i1 %b2, label %L2, label %L3
 
 ; CHECK: L2:
-; TYPED-NEXT: %f2 = bitcast i8* %fptr to void ({}*)*
 ; OPAQUE-NEXT: %f2 = bitcast ptr %fptr to ptr
 ; CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
-; TYPED-NEXT: call void %f2({}* null)
 ; OPAQUE-NEXT: call void %f2(ptr null)
 
 ; CHECK: L3:
@@ -176,10 +149,9 @@ L3:
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @object_field
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4
 define void @object_field({} addrspace(10)* %field) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -195,11 +167,9 @@ define void @object_field({} addrspace(10)* %field) {
 
 ; CHECK-LABEL: @memcpy_opt
 ; CHECK: alloca [16 x i8], align 16
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
-; TYPED: call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.memcpy.p0.p0.i64
 define void @memcpy_opt(i8* %v22) {
 top:
@@ -215,10 +185,9 @@ top:
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @preserve_opt
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; CHECK-NOT: @llvm.lifetime.end
 ; CHECK: @external_function
 define void @preserve_opt(i8* %v22) {
@@ -238,7 +207,6 @@ top:
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @preserve_branches
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK: L1:
 ; CHECK-NEXT: @external_function()
@@ -270,10 +238,7 @@ L3:
 }
 ; CHECK-LABEL: }{{$}}
 
-; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*,
-; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*,
-
-; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_pool_alloc(ptr,
+; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_small_alloc(ptr,
 ; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_big_alloc(ptr,
 declare void @external_function()
 declare {}*** @julia.get_pgcstack()
diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll
index b7e0647263caa..c6c279ae36fc6 100644
--- a/test/llvmpasses/alloc-opt-pass.ll
+++ b/test/llvmpasses/alloc-opt-pass.ll
@@ -1,17 +1,12 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}
 
 ; Test that the gc_preserve intrinsics are deleted directly.
 
 ; CHECK-LABEL: @preserve_branches
-; TYPED: call {}*** @julia.ptls_states()
 ; OPAQUE: call ptr @julia.ptls_states()
 ; CHECK: L1:
 ; CHECK-NOT: @llvm.julia.gc_preserve_begin
@@ -23,32 +18,30 @@
 ; CHECK-NEXT: br label %L3
 
 ; CHECK: L3:
-define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
+define void @preserve_branches(ptr %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
   br i1 %b, label %L1, label %L3
 
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* nonnull %v)
+L1:                                               ; preds = %0
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin(ptr addrspace(10) nonnull %v)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
 
-L2:
+L2:                                               ; preds = %L1
   call void @external_function()
   br label %L3
 
-L3:
+L3:                                               ; preds = %L2, %L1, %0
   ret void
 }
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @preserve_branches2
-; TYPED: call {}*** @julia.ptls_states()
 ; OPAQUE: call ptr @julia.ptls_states()
 ; CHECK: L1:
-; TYPED-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2
 ; OPAQUE-NEXT: @llvm.julia.gc_preserve_begin{{.*}}ptr addrspace(10) %v2
 ; CHECK-NEXT: @external_function()
 ; CHECK-NEXT: br i1 %b2, label %L2, label %L3
@@ -58,107 +51,296 @@ L3:
 ; CHECK-NEXT: br label %L3
 
 ; CHECK: L3:
-define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v2 = call {} addrspace(10)* @external_function2()
+define void @preserve_branches2(ptr %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v2 = call ptr addrspace(10) @external_function2()
   br i1 %b, label %L1, label %L3
 
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v, {} addrspace(10)* nonnull %v2)
+L1:                                               ; preds = %0
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin(ptr addrspace(10) %v, ptr addrspace(10) nonnull %v2)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
 
-L2:
+L2:                                               ; preds = %L1
   call void @external_function()
   br label %L3
 
-L3:
+L3:                                               ; preds = %L2, %L1, %0
   ret void
 }
 ; CHECK-LABEL: }{{$}}
 
+declare void @external_function()
+
+declare ptr addrspace(10) @external_function2()
+
+
 ; CHECK-LABEL: @legal_int_types
 ; CHECK: alloca [12 x i8]
 ; CHECK-NOT: alloca i96
+; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %var1,
 ; CHECK: ret void
 define void @legal_int_types() {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 12, {} addrspace(10)* @tag)
-  %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
-  %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 12, ptr addrspace(10) @tag)
+  %var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
+  %var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
   ret void
 }
 ; CHECK-LABEL: }{{$}}
 
 
-declare void @external_function()
-declare {} addrspace(10)* @external_function2()
-declare {}*** @julia.ptls_states()
-declare {}*** @julia.get_pgcstack()
-declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
-declare {}* @julia.pointer_from_objref({} addrspace(11)*)
-declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
-declare token @llvm.julia.gc_preserve_begin(...)
-declare void @llvm.julia.gc_preserve_end(token)
-
 ; CHECK-LABEL: @memref_collision
-; TYPED: call {}*** @julia.ptls_states()
 ; OPAQUE: call ptr @julia.ptls_states()
-; TYPED-NOT: store {}
 ; OPAQUE-NOT: store ptr
 ; CHECK: store i
-; TYPED-NOT: store {}
 ; OPAQUE-NOT: store ptr
 ; CHECK: L1:
-; TYPED: load {}
 ; OPAQUE: load ptr
 ; CHECK: L2:
 ; CHECK: load i
 define void @memref_collision(i64 %x) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %v_p = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-  store i64 %x, i64 addrspace(10)* %v_p
-  br i1 0, label %L1, label %L2
-
-L1:
-  %v1 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-  %v1_x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %v1
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %v_p = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+  store i64 %x, ptr addrspace(10) %v_p, align 4
+  br i1 false, label %L1, label %L2
+
+L1:                                               ; preds = %0
+  %v1 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+  %v1_x = load ptr addrspace(10), ptr addrspace(10) %v1, align 8
   ret void
 
-L2:
-  %v2 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-  %v2_x = load i64, i64 addrspace(10)* %v2
+L2:                                               ; preds = %0
+  %v2 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+  %v2_x = load i64, ptr addrspace(10) %v2, align 4
   ret void
 }
+
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @lifetime_no_preserve_end
 ; CHECK: alloca
 ; CHECK-NOT: call token(...) @llvm.julia.gc_preserve_begin
 ; CHECK: call void @llvm.lifetime.start
+; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %v,
 ; CHECK-NOT: call void @llvm.lifetime.end
-define void @lifetime_no_preserve_end({}* noalias nocapture noundef nonnull sret({}) %0) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
-  %v_derived = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
-  %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %v_derived)
-  %ptr_raw = bitcast {}* %ptr to i8*
-  call void @external_function() ; safepoint
-  %ret_raw = bitcast {}* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %ret_raw, i8 * align 8 %ptr_raw, i64 0, i1 false)
-  %ret_raw2 = bitcast {}* %0 to i8*
+define void @lifetime_no_preserve_end(ptr noalias nocapture noundef nonnull sret({}) %0) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %token = call token (...) @llvm.julia.gc_preserve_begin(ptr addrspace(10) %v)
+  %v_derived = addrspacecast ptr addrspace(10) %v to ptr addrspace(11)
+  %ptr = call nonnull ptr @julia.pointer_from_objref(ptr addrspace(11) %v_derived)
+  %ptr_raw = bitcast ptr %ptr to ptr
+  call void @external_function()
+  %ret_raw = bitcast ptr %0 to ptr
+  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %ret_raw, ptr align 8 %ptr_raw, i64 0, i1 false)
+  %ret_raw2 = bitcast ptr %0 to ptr
+  ret void
+}
+; CHECK-LABEL: }{{$}}
+
+
+; CHECK-LABEL: @initializers
+; CHECK: alloca [1 x i8]
+; CHECK-DAG: alloca [2 x i8]
+; CHECK-DAG: alloca [3 x i8]
+; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 1 %var1,
+; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 4 %var7,
+; CHECK: ret void
+define void @initializers() {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #4
+  %var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
+  %var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
+  %var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #7
+  %var5 = addrspacecast ptr addrspace(10) %var4 to ptr addrspace(11)
+  %var6 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var5)
+  %var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #1
+  %var8 = addrspacecast ptr addrspace(10) %var7 to ptr addrspace(11)
+  %var9 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var8)
   ret void
 }
 ; CHECK-LABEL: }{{$}}
+
+; Test that the pass handles dead basic blocks with references to the allocation
+; CHECK-LABEL: @nopreds
+; CHECK: alloca i8, i64 0, align 1
+; CHECK: call void @llvm.lifetime.start
+define swiftcc { ptr addrspace(10), i8 } @nopreds() {
+top:
+  %0 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr null, i64 0, ptr addrspace(10) null)
+  %1 = addrspacecast ptr addrspace(10) %0 to ptr addrspace(11)
+  br label %common.ret
+
+common.ret:                                       ; preds = %union_move9, %top
+  ret { ptr addrspace(10), i8 } zeroinitializer
+
+union_move9:                                      ; No predecessors!
+  call void @llvm.memcpy.p0.p11.i64(ptr null, ptr addrspace(11) %1, i64 0, i1 false)
+  br label %common.ret
+}
+; CHECK-LABEL: }{{$}}
+
+@0 = private unnamed_addr constant ptr inttoptr (i64 4373799056 to ptr), !julia.constgv !0
+@1 = private unnamed_addr constant i64 0, align 8
+
+; CHECK-LABEL: @cmpxchg
+; CHECK: alloca
+; CHECK: alloca
+; CHECK:  %20 = cmpxchg ptr %2,
+define swiftcc i64 @"cmpxchg"(ptr nonnull swiftself "gcstack" %0) #0 {
+  %2 = alloca i64, align 16
+  %3 = call ptr @julia.get_pgcstack()
+  %4 = getelementptr inbounds i8, ptr %3, i32 -152
+  %5 = getelementptr inbounds i8, ptr %4, i32 168
+  %6 = load ptr, ptr %5, align 8, !tbaa !4
+  %7 = getelementptr inbounds i8, ptr %6, i32 16
+  %8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
+  fence syncscope("singlethread") seq_cst
+  call void @julia.safepoint(ptr %8)
+  fence syncscope("singlethread") seq_cst
+  %9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
+  %10 = ptrtoint ptr %9 to i64
+  %11 = inttoptr i64 %10 to ptr
+  %12 = getelementptr inbounds i8, ptr %3, i32 -152
+  %13 = addrspacecast ptr %11 to ptr addrspace(10)
+  call void @llvm.lifetime.start.p0(i64 8, ptr %2)
+  %14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
+  %15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
+  call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
+  %16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
+  %17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
+  br label %19
+
+18:                                               ; preds = %19
+  ret i64 %21
+
+19:                                               ; preds = %19, %1
+  %20 = phi i64 [ %17, %1 ], [ %23, %19 ]
+  %21 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself "gcstack" %3, i64 signext %20, i64 signext 1)
+  %22 = cmpxchg ptr addrspace(11) %16, i64 %20, i64 %21 seq_cst monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
+  %23 = extractvalue { i64, i1 } %22, 0
+  %24 = extractvalue { i64, i1 } %22, 1
+  br i1 %24, label %18, label %19
+}
+
+; CHECK-LABEL: }{{$}}
+; CHECK-LABEL: @atomicrmw
+; CHECK: alloca
+; CHECK: alloca
+; CHECK: atomicrmw xchg ptr %2,
+define swiftcc i64 @"atomicrmw"(ptr nonnull swiftself "gcstack" %0) #0 {
+  %2 = alloca i64, align 16
+  %3 = call ptr @julia.get_pgcstack()
+  %4 = getelementptr inbounds i8, ptr %3, i32 -152
+  %5 = getelementptr inbounds i8, ptr %4, i32 168
+  %6 = load ptr, ptr %5, align 8, !tbaa !4
+  %7 = getelementptr inbounds i8, ptr %6, i32 16
+  %8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
+  fence syncscope("singlethread") seq_cst
+  call void @julia.safepoint(ptr %8)
+  fence syncscope("singlethread") seq_cst
+  %9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
+  %10 = ptrtoint ptr %9 to i64
+  %11 = inttoptr i64 %10 to ptr
+  %12 = getelementptr inbounds i8, ptr %3, i32 -152
+  %13 = addrspacecast ptr %11 to ptr addrspace(10)
+  call void @llvm.lifetime.start.p0(i64 8, ptr %2)
+  %14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
+  %15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
+  call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
+  %16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
+  %17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
+  %18 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself "gcstack" %3, i64 signext %17, i64 signext 1)
+  %19 = atomicrmw xchg ptr addrspace(11) %16, i64 %18 seq_cst, align 8, !tbaa !25, !alias.scope !23, !noalias !24                                    ; preds = %19
+  ret i64 %19
+}
+
+declare ptr @julia.ptls_states()
+
+declare ptr @julia.pointer_from_objref(ptr addrspace(11))
+
+declare token @llvm.julia.gc_preserve_begin(...)
+
+declare void @llvm.julia.gc_preserve_end(token)
+
+declare ptr @julia.get_pgcstack()
+
+; Function Attrs: mustprogress nounwind willreturn memory(inaccessiblemem: readwrite)
+declare nonnull align 8 dereferenceable(8) ptr addrspace(10) @ijl_box_int64(i64 signext) #2
+
+; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
+declare void @julia.safepoint(ptr) #3
+
+; Function Attrs: mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
+declare noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) #4
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #5
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5
+
+declare swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself, i64 signext, i64 signext) #0
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6
+
+attributes #0 = { "probe-stack"="inline-asm" }
+attributes #1 = { nounwind willreturn allockind("alloc,zeroed") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
+attributes #2 = { mustprogress nounwind willreturn memory(inaccessiblemem: readwrite) }
+attributes #3 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
+attributes #4 = { mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
+attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #7 = { nounwind willreturn allockind("alloc,uninitialized") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
+attributes #8 = { nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.module.flags = !{!1, !2, !3}
+
+!0 = !{}
+!1 = !{i32 2, !"Dwarf Version", i32 4}
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 2, !"julia.optlevel", i32 2}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"jtbaa_gcframe", !6, i64 0}
+!6 = !{!"jtbaa", !7, i64 0}
+!7 = !{!"jtbaa"}
+!8 = !{!9, !9, i64 0, i64 1}
+!9 = !{!"jtbaa_const", !6, i64 0}
+!10 = !{!11}
+!11 = !{!"jnoalias_const", !12}
+!12 = !{!"jnoalias"}
+!13 = !{!14, !15, !16, !17}
+!14 = !{!"jnoalias_gcframe", !12}
+!15 = !{!"jnoalias_stack", !12}
+!16 = !{!"jnoalias_data", !12}
+!17 = !{!"jnoalias_typemd", !12}
+!18 = !{i64 56}
+!19 = !{i64 16}
+!20 = !{!21, !21, i64 0}
+!21 = !{!"jtbaa_value", !22, i64 0}
+!22 = !{!"jtbaa_data", !6, i64 0}
+!23 = !{!16}
+!24 = !{!14, !15, !17, !11}
+!25 = !{!26, !26, i64 0}
+!26 = !{!"jtbaa_mutab", !21, i64 0}
+
diff --git a/test/llvmpasses/alloc-opt-pipeline.jl b/test/llvmpasses/alloc-opt-pipeline.jl
index 9437913e4054b..e84348ec4a8c6 100644
--- a/test/llvmpasses/alloc-opt-pipeline.jl
+++ b/test/llvmpasses/alloc-opt-pipeline.jl
@@ -17,7 +17,7 @@ end
 
 # CHECK-LABEL: @julia_haszerolayout
 # CHECK: top:
-# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK-NOT: @jl_gc_small_alloc
 # CHECK: extractelement
 # CHECK: ret i8
 emit(haszerolayout, NTuple{32,VecElement{UInt8}})
diff --git a/test/llvmpasses/alloc-opt-unsized.ll b/test/llvmpasses/alloc-opt-unsized.ll
index d3868548a00d7..d435ab1490cfc 100644
--- a/test/llvmpasses/alloc-opt-unsized.ll
+++ b/test/llvmpasses/alloc-opt-unsized.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=OPAQUE
 
 source_filename = "text"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
@@ -31,15 +27,6 @@ entry:
   ret void
 }
 
-; TYPED:   %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16
-; TYPED:   %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8*
-; TYPED:   %i18 = bitcast i8* %[[i1]] to {}*
-; TYPED:   %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)**
-; TYPED:   %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i
-; TYPED:   store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8
-; TYPED:   %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}*
-; TYPED:   %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8
-
 ; OPAQUE:   %[[i0:.+]] = alloca ptr addrspace(10), i64 1000, align 16
 ; OPAQUE:   %i23 = getelementptr inbounds ptr addrspace(10), ptr %i18, i64 %iv.i
 ; OPAQUE:   store ptr addrspace(10) %arg, ptr %i23, align 8
diff --git a/test/llvmpasses/atomic-modify.ll b/test/llvmpasses/atomic-modify.ll
new file mode 100644
index 0000000000000..23e1949f3ad0a
--- /dev/null
+++ b/test/llvmpasses/atomic-modify.ll
@@ -0,0 +1,288 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='ExpandAtomicModify' -S %s | FileCheck %s
+
+declare {i8, i8} @julia.atomicmodify.i8(ptr, ptr, i8, i8, ...)
+declare {double, double} @julia.atomicmodify.f64(ptr, ptr, i8, i8, ...)
+declare double @llvm.maxnum.f64(double %Val0, double %Val1)
+
+define i8 @add.i8(i8 %x, i8 %y) {
+    %z = add i8 %x, %y
+    ret i8 %z
+}
+
+define i8 @sub.i8(i8 %x, i8 %y) {
+    %z = sub i8 %x, %y
+    ret i8 %z
+}
+
+define i8 @subx.i8(i8 %x, i8 %y) {
+    %z = sub i8 %y, %x
+    ret i8 %z
+}
+
+define i8 @add.i8.zext(i8 %x, i1 %y) {
+    %y8 = zext i1 %y to i8
+    %z = add i8 %x, %y8
+    ret i8 %z
+}
+
+define i8 @and.i8(i8 %x, i8 %y) {
+    %z = and i8 %x, %y
+    ret i8 %z
+}
+
+define i8 @nand.i8(i8 %x, i8 %y) {
+    %z = and i8 %x, %y
+    %w = xor i8 %z, -1
+    ret i8 %w
+}
+
+define i8 @nand.i8.zext(i8 %x, i1 %y) {
+    %y8 = zext i1 %y to i8
+    %z = and i8 %y8, %x
+    %w = xor i8 %z, -1
+    ret i8 %w
+}
+
+define i8 @xchg.i8(i8 %x, i8 %y) {
+    ret i8 %y
+}
+
+define double @fadd.f64(double %x, double %y) {
+    %z = fadd double %y, %x
+    ret double %z
+}
+
+define double @fmax.f64(double %x, double %y) {
+    %z = call double @llvm.maxnum.f64(double %y, double %x)
+    ret double %z
+}
+
+define internal i8 @0(i8 %x, i8 %y) unnamed_addr {
+    %z = call i8 @add.i8(i8 %x, i8 %y)
+    ret i8 %z
+}
+
+define internal i8 @1(i8 %x, i8 %y) unnamed_addr {
+    %z = call i8 @0(i8 %x, i8 %y)
+    ret i8 %z
+}
+
+define internal i8 @2(i8 %x, i8 %y, ptr %f) unnamed_addr {
+    %z = call i8 %f(i8 %x, i8 %y)
+    ret i8 %z
+}
+
+define i8 @mod_i8_add(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_add
+; CHECK: %0 = atomicrmw add ptr %a, i8 %b release, align 1
+; CHECK: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @add.i8, i8 5, i8 1, i8 %b)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_add_new(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_add
+; CHECK: %0 = atomicrmw add ptr %a, i8 %b release, align 1
+; CHECK-NEXT: [[newval:%.*]] = add i8 %0, %b
+; CHECK-NEXT: ret i8 [[newval]]
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @add.i8, i8 5, i8 1, i8 %b)
+  %newval = extractvalue {i8, i8} %oldnew, 1
+  ret i8 %newval
+}
+
+define i8 @mod_i8_addfence(ptr %a) {
+; CHECK-LABEL: @mod_i8_addfence
+; CHECK: %0 = atomicrmw or ptr %a, i8 0 release, align 1
+; CHECK-NEXT: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @add.i8, i8 5, i8 1, i8 0)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_add_zext(ptr %a, i1 %b) {
+; CHECK-LABEL: @mod_i8_add_zext
+; CHECK: [[b8:%.*]] = zext i1 %b to i8
+; CHECK: %0 = atomicrmw add ptr %a, i8 [[b8]] release, align 1
+; CHECK: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @add.i8.zext, i8 5, i8 1, i1 %b)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_add_zext_new(ptr %a, i1 %b) {
+; CHECK-LABEL: @mod_i8_add_zext
+; CHECK: [[b8:%.*]] = zext i1 %b to i8
+; CHECK-NEXT: %0 = atomicrmw add ptr %a, i8 [[b8]] release, align 1
+; CHECK-NEXT: [[newval:%.*]] = add i8 %0, [[b8]]
+; CHECK-NEXT: ret i8 [[newval]]
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @add.i8.zext, i8 5, i8 1, i1 %b)
+  %newval = extractvalue {i8, i8} %oldnew, 1
+  ret i8 %newval
+}
+
+define i8 @mod_i8_sub(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_sub
+; CHECK: %0 = atomicrmw sub ptr %a, i8 %b release, align 1
+; CHECK: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @sub.i8, i8 5, i8 1, i8 %b)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_subx(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_subx
+; CHECK: [[newval:%.*]] = call i8 @subx.i8(i8 %loaded, i8 %b)
+; CHECK: [[success:%.*]] = cmpxchg ptr %a, i8 %loaded, i8 [[newval]]
+; CHECK: [[oldval:%.*]] = extractvalue { i8, i1 } [[success:%.*]], 0
+; CHECK: ret i8 [[oldval]]
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @subx.i8, i8 5, i8 1, i8 %b)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_subx_new(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_subx_new
+; CHECK: [[newval:%.*]] = call i8 @subx.i8(i8 %loaded, i8 %b)
+; CHECK: [[oldval:%.*]] = cmpxchg ptr %a, i8 %loaded, i8 [[newval]]
+; CHECK: ret i8 [[newval]]
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @subx.i8, i8 5, i8 1, i8 %b)
+  %newval = extractvalue {i8, i8} %oldnew, 1
+  ret i8 %newval
+}
+
+define i8 @mod_i8_nand(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_nand
+; CHECK: %0 = atomicrmw nand ptr %a, i8 %b release, align 1
+; CHECK: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @nand.i8, i8 5, i8 1, i8 %b)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_nand_new(ptr %a, i1 %b) {
+; CHECK-LABEL: @mod_i8_nand_new
+; CHECK: [[b8:%.*]] = zext i1 %b to i8
+; CHECK: %0 = atomicrmw nand ptr %a, i8 [[b8]] release, align 1
+; CHECK: [[newand:%.*]] = and i8 [[b8]], %0
+; CHECK: [[newval:%.*]] = xor i8 [[newand:%.*]], -1
+; CHECK: ret i8 [[newval]]
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @nand.i8.zext, i8 5, i8 1, i1 %b)
+  %newval = extractvalue {i8, i8} %oldnew, 1
+  ret i8 %newval
+}
+
+define i8 @mod_i8_andxchg(ptr %a) {
+; CHECK-LABEL: @mod_i8_andxchg
+; CHECK: %0 = atomicrmw xchg ptr %a, i8 0 release, align 1
+; CHECK-NEXT: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @and.i8, i8 5, i8 1, i8 0)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_xchg(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_xchg
+; CHECK: %0 = atomicrmw xchg ptr %a, i8 %b release, align 1
+; CHECK-NEXT: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @xchg.i8, i8 5, i8 1, i8 %b)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_xchg_new(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_xchg_new
+; CHECK: %0 = atomicrmw xchg ptr %a, i8 %b release, align 1
+; CHECK-NEXT: ret i8 %b
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @xchg.i8, i8 5, i8 1, i8 %b)
+  %newval = extractvalue {i8, i8} %oldnew, 1
+  ret i8 %newval
+}
+
+define double @mod_i8_fadd(ptr %a, double %b) {
+; CHECK-LABEL: @mod_i8_fadd
+; CHECK: %0 = atomicrmw fadd ptr %a, double %b release, align 8
+; CHECK: ret double %0
+top:
+  %oldnew = call {double, double} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.f64(ptr align(8) %a, ptr @fadd.f64, i8 5, i8 1, double %b)
+  %oldval = extractvalue {double, double} %oldnew, 0
+  ret double %oldval
+}
+
+define double @mod_i8_fmax(ptr %a, double %b) {
+; CHECK-LABEL: @mod_i8_fmax
+; CHECK: %0 = atomicrmw fmax ptr %a, double %b release, align 8
+; CHECK: ret double %0
+top:
+  %oldnew = call {double, double} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.f64(ptr align(8) %a, ptr @fmax.f64, i8 5, i8 1, double %b)
+  %oldval = extractvalue {double, double} %oldnew, 0
+  ret double %oldval
+}
+
+define i8 @mod_i8_indirect0(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_indirect0
+; CHECK: %0 = atomicrmw add ptr %a, i8 %b release, align 1
+; CHECK: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @0, i8 5, i8 1, i8 %b)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_indirect1(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_indirect1
+; CHECK: %0 = atomicrmw add ptr %a, i8 %b release, align 1
+; CHECK: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @1, i8 5, i8 1, i8 %b)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_indirect2(ptr %a, i8 %b, ptr %f) {
+; CHECK-LABEL: @mod_i8_indirect2
+; CHECK: [[newval:%.*]] = call i8 %f(i8 %loaded, i8 %b)
+; CHECK: [[success:%.*]] = cmpxchg ptr %a, i8 %loaded, i8 [[newval]]
+; CHECK: [[oldval:%.*]] = extractvalue { i8, i1 } [[success:%.*]], 0
+; CHECK: ret i8 [[oldval]]
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @2, i8 5, i8 1, i8 %b, ptr %f)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
+
+define i8 @mod_i8_indirect2_new(ptr %a, i8 %b, ptr %f) {
+; CHECK-LABEL: @mod_i8_indirect2_new
+; CHECK: [[newval:%.*]] = call i8 %f(i8 %loaded, i8 %b)
+; CHECK: [[oldval:%.*]] = cmpxchg ptr %a, i8 %loaded, i8 [[newval]]
+; CHECK: ret i8 [[newval]]
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @2, i8 5, i8 1, i8 %b, ptr %f)
+  %newval = extractvalue {i8, i8} %oldnew, 1
+  ret i8 %newval
+}
+
+define i8 @mod_i8_indirect3(ptr %a, i8 %b) {
+; CHECK-LABEL: @mod_i8_indirect3
+; CHECK: %0 = atomicrmw add ptr %a, i8 %b release, align 1
+; CHECK: ret i8 %0
+top:
+  %oldnew = call {i8, i8} (ptr, ptr, i8, i8, ...) @julia.atomicmodify.i8(ptr align(1) %a, ptr @2, i8 5, i8 1, i8 %b, ptr @0)
+  %oldval = extractvalue {i8, i8} %oldnew, 0
+  ret i8 %oldval
+}
diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll
index eea3d1b288204..073597fbcdc66 100644
--- a/test/llvmpasses/cpu-features.ll
+++ b/test/llvmpasses/cpu-features.ll
@@ -1,10 +1,8 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+target triple = "x86_64-linux-gnu"
 
 declare i1 @julia.cpu.have_fma.f64()
 declare double @with_fma(double %0, double %1, double %2)
diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl
index 7338d1c3ccc5a..3c4c1d491ec28 100644
--- a/test/llvmpasses/fastmath.jl
+++ b/test/llvmpasses/fastmath.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll
+# RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s
 
 ## Notes:
@@ -14,21 +14,5 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
 
 import Base.FastMath
 
-# CHECK: call fast float @llvm.sqrt.f32(float %{{[0-9]+}})
+# CHECK: call fast float @llvm.sqrt.f32(float %"x::Float32")
 emit(FastMath.sqrt_fast, Float32)
-
-
-# Float16 operations should be performed as Float32, unless @fastmath is specified
-# TODO: this is not true for platforms that natively support Float16
-
-foo(x::T,y::T) where T = x-y == zero(T)
-# LOWER: fsub half %0, %1
-# FINAL: %2 = fpext half %0 to float
-# FINAL: %3 = fpext half %1 to float
-# FINAL: fsub half %2, %3
-emit(foo, Float16, Float16)
-
-@fastmath foo(x::T,y::T) where T = x-y == zero(T)
-# LOWER: fsub fast half %0, %1
-# FINAL: fsub fast half %0, %1
-emit(foo, Float16, Float16)
diff --git a/test/llvmpasses/final-lower-gc-addrspaces.ll b/test/llvmpasses/final-lower-gc-addrspaces.ll
index d3cdea7454972..db80188fc206d 100644
--- a/test/llvmpasses/final-lower-gc-addrspaces.ll
+++ b/test/llvmpasses/final-lower-gc-addrspaces.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -27,11 +23,8 @@ attributes #0 = { allocsize(1) }
 define void @gc_frame_addrspace(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_addrspace
-; TYPED: %0 = alloca {} addrspace(10)*, i32 4, align 16, addrspace(5)
 ; OPAQUE: %0 = alloca ptr addrspace(10), i32 4, align 16, addrspace(5)
-; TYPED: %gcframe = addrspacecast {} addrspace(10)* addrspace(5)* %0 to {} addrspace(10)**
 ; OPAQUE: %gcframe = addrspacecast ptr addrspace(5) %0 to ptr
-; TYPED: %1 = bitcast {} addrspace(10)** %gcframe to i8*
   %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
   %pgcstack = call {}*** @julia.get_pgcstack()
   call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll
index 5bbaa2f4d81ea..f8e123fdc6aea 100644
--- a/test/llvmpasses/final-lower-gc.ll
+++ b/test/llvmpasses/final-lower-gc.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 @tag = external addrspace(10) global {}
@@ -18,28 +14,17 @@ declare noalias nonnull {} addrspace(10)** @julia.new_gc_frame(i32)
 declare void @julia.push_gc_frame({} addrspace(10)**, i32)
 declare {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)**, i32)
 declare void @julia.pop_gc_frame({} addrspace(10)**)
-declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8*, i64) #0
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8*, i64, i64) #0
 
 attributes #0 = { allocsize(1) }
 
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
   %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack()
 ; OPAQUE: [[GCFRAME_SLOT:%.*]] = call ptr @julia.get_pgcstack()
   %pgcstack = call {}*** @julia.get_pgcstack()
-; TYPED-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0
-; TYPED-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64*
-; TYPED-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0
-; TYPED-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
-; TYPED-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}***
-; TYPED-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8
-; TYPED-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0
-; TYPED-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)***
-; TYPED-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8
 
 ; OPAQUE-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 0
 ; OPAQUE-DAG: store i64 8, ptr [[GCFRAME_SIZE_PTR]], align 8, !tbaa !0
@@ -49,22 +34,15 @@ top:
 ; OPAQUE-NEXT: store ptr %gcframe, ptr [[GCFRAME_SLOT]], align 8
   call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
   %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
-; TYPED: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3
 ; OPAQUE: %frame_slot_1 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 3
   %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
   store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8
   %bboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %b)
-; TYPED: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
 ; OPAQUE: %frame_slot_2 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
   %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
   store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8
-; TYPED: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
 ; OPAQUE: call void @boxed_simple(ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed)
   call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
-; TYPED-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
-; TYPED-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
-; TYPED-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)**
-; TYPED-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0
 
 ; OPAQUE-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 1
 ; OPAQUE-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load ptr addrspace(10), ptr [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
@@ -80,9 +58,8 @@ top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; TYPED: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc
-; OPAQUE: %v = call noalias nonnull dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc
-  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8)
+; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc
+  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8, i64 12341234)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
   store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, align 8, !tbaa !0
@@ -95,10 +72,8 @@ top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %0 = add i64 %size, 8
-; TYPED: %v = call noalias nonnull dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i8* null)
-; OPAQUE: %v = call noalias nonnull dereferenceable(8) ptr addrspace(10) @ijl_gc_alloc_typed(ptr %ptls_i8, i64 %0, ptr null)
-  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size)
+; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable(8) ptr addrspace(10) @ijl_gc_alloc_typed(ptr %ptls_i8, i64 %size, i64 12341234)
+  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size, i64 12341234)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
   store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, align 8, !tbaa !0
diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll
index ab1425ec12fa5..d1dfb6aca11dd 100644
--- a/test/llvmpasses/float16.ll
+++ b/test/llvmpasses/float16.ll
@@ -1,13 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0  -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
+; RUN: opt  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1  -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
-
-define half @demotehalf_test(half %a, half %b) #0 {
+define half @demote_half_test(half %a, half %b) #0 {
 top:
-; CHECK-LABEL: @demotehalf_test(
+; CHECK-LABEL: @demote_half_test(
 ; CHECK-NEXT:  top:
 ; CHECK-NEXT:    %0 = fpext half %a to float
 ; CHECK-NEXT:    %1 = fpext half %b to float
@@ -103,5 +99,131 @@ top:
   ret half %13
 }
 
-attributes #0 = { "target-features"="-avx512fp16" }
-attributes #1 = { "target-features"="+avx512fp16" }
+define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) #2 {
+top:
+; CHECK-LABEL: @demote_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %0 = fpext bfloat %a to float
+; CHECK-NEXT:    %1 = fpext bfloat %b to float
+; CHECK-NEXT:    %2 = fadd float %0, %1
+; CHECK-NEXT:    %3 = fptrunc float %2 to bfloat
+; CHECK-NEXT:    %4 = fpext bfloat %3 to float
+; CHECK-NEXT:    %5 = fpext bfloat %b to float
+; CHECK-NEXT:    %6 = fadd float %4, %5
+; CHECK-NEXT:    %7 = fptrunc float %6 to bfloat
+; CHECK-NEXT:    %8 = fpext bfloat %7 to float
+; CHECK-NEXT:    %9 = fpext bfloat %b to float
+; CHECK-NEXT:    %10 = fadd float %8, %9
+; CHECK-NEXT:    %11 = fptrunc float %10 to bfloat
+; CHECK-NEXT:    %12 = fpext bfloat %11 to float
+; CHECK-NEXT:    %13 = fpext bfloat %b to float
+; CHECK-NEXT:    %14 = fmul float %12, %13
+; CHECK-NEXT:    %15 = fptrunc float %14 to bfloat
+; CHECK-NEXT:    %16 = fpext bfloat %15 to float
+; CHECK-NEXT:    %17 = fpext bfloat %b to float
+; CHECK-NEXT:    %18 = fdiv float %16, %17
+; CHECK-NEXT:    %19 = fptrunc float %18 to bfloat
+; CHECK-NEXT:    %20 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+; CHECK-NEXT:    %21 = insertelement <2 x bfloat> %20, bfloat %b, i32 1
+; CHECK-NEXT:    %22 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+; CHECK-NEXT:    %23 = insertelement <2 x bfloat> %22, bfloat %b, i32 1
+; CHECK-NEXT:    %24 = fpext <2 x bfloat> %21 to <2 x float>
+; CHECK-NEXT:    %25 = fpext <2 x bfloat> %23 to <2 x float>
+; CHECK-NEXT:    %26 = fadd <2 x float> %24, %25
+; CHECK-NEXT:    %27 = fptrunc <2 x float> %26 to <2 x bfloat>
+; CHECK-NEXT:    %28 = extractelement <2 x bfloat> %27, i32 0
+; CHECK-NEXT:    %29 = extractelement <2 x bfloat> %27, i32 1
+; CHECK-NEXT:    %30 = fpext bfloat %28 to float
+; CHECK-NEXT:    %31 = fpext bfloat %29 to float
+; CHECK-NEXT:    %32 = fadd float %30, %31
+; CHECK-NEXT:    %33 = fptrunc float %32 to bfloat
+; CHECK-NEXT:    %34 = fpext bfloat %33 to float
+; CHECK-NEXT:    %35 = fpext bfloat %19 to float
+; CHECK-NEXT:    %36 = fadd float %34, %35
+; CHECK-NEXT:    %37 = fptrunc float %36 to bfloat
+; CHECK-NEXT:    ret bfloat %37
+;
+  %0 = fadd bfloat %a, %b
+  %1 = fadd bfloat %0, %b
+  %2 = fadd bfloat %1, %b
+  %3 = fmul bfloat %2, %b
+  %4 = fdiv bfloat %3, %b
+  %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+  %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+  %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+  %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+  %9 = fadd <2 x bfloat> %6, %8
+  %10 = extractelement <2 x bfloat> %9, i32 0
+  %11 = extractelement <2 x bfloat> %9, i32 1
+  %12 = fadd bfloat %10, %11
+  %13 = fadd bfloat %12, %4
+  ret bfloat %13
+}
+
+define bfloat @native_bfloat_test(bfloat %a, bfloat %b) #3 {
+top:
+; CHECK-LABEL: @native_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %0 = fadd bfloat %a, %b
+; CHECK-NEXT:    %1 = fadd bfloat %0, %b
+; CHECK-NEXT:    %2 = fadd bfloat %1, %b
+; CHECK-NEXT:    %3 = fmul bfloat %2, %b
+; CHECK-NEXT:    %4 = fdiv bfloat %3, %b
+; CHECK-NEXT:    %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+; CHECK-NEXT:    %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+; CHECK-NEXT:    %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+; CHECK-NEXT:    %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+; CHECK-NEXT:    %9 = fadd <2 x bfloat> %6, %8
+; CHECK-NEXT:    %10 = extractelement <2 x bfloat> %9, i32 0
+; CHECK-NEXT:    %11 = extractelement <2 x bfloat> %9, i32 1
+; CHECK-NEXT:    %12 = fadd bfloat %10, %11
+; CHECK-NEXT:    %13 = fadd bfloat %12, %4
+; CHECK-NEXT:    ret bfloat %13
+;
+  %0 = fadd bfloat %a, %b
+  %1 = fadd bfloat %0, %b
+  %2 = fadd bfloat %1, %b
+  %3 = fmul bfloat %2, %b
+  %4 = fdiv bfloat %3, %b
+  %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+  %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+  %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+  %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+  %9 = fadd <2 x bfloat> %6, %8
+  %10 = extractelement <2 x bfloat> %9, i32 0
+  %11 = extractelement <2 x bfloat> %9, i32 1
+  %12 = fadd bfloat %10, %11
+  %13 = fadd bfloat %12, %4
+  ret bfloat %13
+}
+
+define i1 @fast_half_test(half %0, half %1) #0 {
+top:
+; CHECK-LABEL: @fast_half_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast half %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq half %2, 0xH0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast half %0, %1
+  %3 = fcmp fast oeq half %2, 0xH0000
+  ret i1 %3
+}
+
+define i1 @fast_bfloat_test(bfloat %0, bfloat %1) #2 {
+top:
+; CHECK-LABEL: @fast_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast bfloat %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq bfloat %2, 0xR0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast bfloat %0, %1
+  %3 = fcmp fast oeq bfloat %2, 0xR0000
+  ret i1 %3
+}
+
+attributes #0 = { "julia.hasfp16"="false" }
+attributes #1 = { "julia.hasfp16"="true" }
+attributes #2 = { "julia.hasbf16"="false" }
+attributes #3 = { "julia.hasbf16"="true" }
diff --git a/test/llvmpasses/gc-invariant-verifier.ll b/test/llvmpasses/gc-invariant-verifier.ll
new file mode 100644
index 0000000000000..652fabc742aad
--- /dev/null
+++ b/test/llvmpasses/gc-invariant-verifier.ll
@@ -0,0 +1,13 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(GCInvariantVerifier)' -S %s | FileCheck %s
+
+; CHECK-LABEL: @vectorized_addrspacecast
+define ptr addrspace(10) @vectorized_addrspacecast() {
+top:
+  ret ptr addrspace(10) null
+
+vector.ph:
+  %0 = addrspacecast <4 x ptr addrspace(10)> zeroinitializer to <4 x ptr addrspace(11)>
+  unreachable
+}
diff --git a/test/llvmpasses/gc-writebarrier-volatile.ll b/test/llvmpasses/gc-writebarrier-volatile.ll
new file mode 100644
index 0000000000000..c4bfafdb670f8
--- /dev/null
+++ b/test/llvmpasses/gc-writebarrier-volatile.ll
@@ -0,0 +1,41 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC,gvn)' -S %s | FileCheck %s
+
+; Test for issue #59547: Ensure write barrier GC tag loads are volatile
+; This test verifies that the LateLowerGCFrame pass marks GC tag loads as volatile
+; to prevent GVN from incorrectly constant-folding them, which would eliminate
+; necessary write barrier checks.
+
+@tag = external addrspace(10) global {}, align 16
+
+declare void @julia.write_barrier({} addrspace(10)*, {} addrspace(10)*)
+declare {}*** @julia.get_pgcstack()
+declare {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
+
+; Test that write barrier expansion produces volatile GC tag loads
+; CHECK-LABEL: @test_writebarrier_volatile_tags
+define {} addrspace(10)* @test_writebarrier_volatile_tags() {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+  %parent = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
+  %child = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
+  call void @julia.write_barrier({} addrspace(10)* %parent, {} addrspace(10)* %child)
+  ret {} addrspace(10)* %parent
+
+; The critical test: GC tag loads must be volatile to prevent constant folding
+; CHECK: load atomic volatile i64, ptr {{.*}} unordered, align 8, {{.*}}!tbaa
+; CHECK: and i64 {{.*}}, 3
+; CHECK: icmp eq i64 {{.*}}, 3
+; CHECK: br i1 {{.*}}, label %may_trigger_wb, label
+
+; CHECK: may_trigger_wb:
+; CHECK: load atomic volatile i64, ptr {{.*}} unordered, align 8, {{.*}}!tbaa
+; CHECK: and i64 {{.*}}, 1
+; CHECK: icmp eq i64 {{.*}}, 0
+; CHECK: br i1 {{.*}}, label %trigger_wb, label
+
+; CHECK: trigger_wb:
+; CHECK: call void @ijl_gc_queue_root(ptr {{.*}})
+}
diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll
index 7d29a9e3b1f9e..d8c1438e4ff63 100644
--- a/test/llvmpasses/gcroots.ll
+++ b/test/llvmpasses/gcroots.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
@@ -19,13 +15,9 @@ top:
 ; CHECK-LABEL: @simple
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
-; TYPED: call {} addrspace(10)* @jl_box_int64
 ; OPAQUE: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
@@ -33,9 +25,6 @@ top:
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
 
-; TYPED-NOT: getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
-; TYPED: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]
-; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
 
 ; OPAQUE-NOT: getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]]
 ; OPAQUE: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]
@@ -51,7 +40,6 @@ define void @leftover_alloca({} addrspace(10)* %a) {
 ; If this pass encounters an alloca, it'll just sink it into the gcframe,
 ; relying on mem2reg to catch simple cases such as this earlier
 ; CHECK-LABEL: @leftover_alloca
-; TYPED: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe
 ; OPAQUE: %var = getelementptr inbounds ptr addrspace(10), ptr %gcframe
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -70,12 +58,8 @@ define void @simple_union() {
 ; CHECK-LABEL: @simple_union
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; TYPED: %a = call { {} addrspace(10)*, i8 } @union_ret()
 ; OPAQUE: %a = call { ptr addrspace(10), i8 } @union_ret()
     %a = call { {} addrspace(10)*, i8 } @union_ret()
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED-NEXT: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %a, 0
-; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE-NEXT: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %a, 0
@@ -101,7 +85,6 @@ define void @select_simple(i64 %a, i64 %b) {
 define void @phi_simple(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_simple
-; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -115,8 +98,6 @@ blabel:
     br label %common
 common:
     %phi = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ]
-; TYPED:  [[GEP:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP]]
 
 ; OPAQUE:  [[GEP:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP]]
@@ -128,7 +109,6 @@ declare void @one_arg_decayed(i64 addrspace(12)*)
 
 define void @select_lift(i64 %a, i64 %b) {
 ; CHECK-LABEL: @select_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -137,7 +117,6 @@ define void @select_lift(i64 %a, i64 %b) {
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
     %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)*
     %cmp = icmp eq i64 %a, %b
-; TYPED: %gclift = select i1 %cmp, {} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed
 ; OPAQUE: %gclift = select i1 %cmp, ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed
     %selectb = select i1 %cmp, i64 addrspace(12)* %adecayed, i64 addrspace(12)* %bdecayed
     call void @one_arg_decayed(i64 addrspace(12)* %selectb)
@@ -147,7 +126,6 @@ define void @select_lift(i64 %a, i64 %b) {
 define void @phi_lift(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_lift
-; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
 ; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -177,7 +155,6 @@ top:
     br i1 %cmp, label %alabel, label %blabel
 alabel:
     %u = call { {} addrspace(10)*, i8 } @union_ret()
-; TYPED: %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0
 ; OPAQUE: %aboxed = extractvalue { ptr addrspace(10), i8 } %u, 0
     %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
@@ -188,7 +165,6 @@ blabel:
     %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)*
     br label %common
 common:
-; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ]
 ; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ]
     %phi = phi i64 addrspace(12)* [ %adecayed, %alabel ], [ %bdecayed, %blabel ]
     call void @one_arg_decayed(i64 addrspace(12)* %phi)
@@ -198,7 +174,6 @@ common:
 define void @live_if_live_out(i64 %a, i64 %b) {
 ; CHECK-LABEL: @live_if_live_out
 top:
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -217,12 +192,10 @@ succ:
 ; safepoint
 define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 ; CHECK-LABEL: @ret_use
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
     ret {} addrspace(10)* %aboxed
@@ -230,16 +203,11 @@ define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 
 define {{} addrspace(10)*, i8} @ret_use_struct() {
 ; CHECK-LABEL: @ret_use_struct
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; TYPED: %aunion = call { {} addrspace(10)*, i8 } @union_ret()
 ; OPAQUE: %aunion = call { ptr addrspace(10), i8 } @union_ret()
     %aunion = call { {} addrspace(10)*, i8 } @union_ret()
-; TYPED-DAG: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED-DAG: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %aunion, 0
-; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE-DAG: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE-DAG: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %aunion, 0
@@ -273,12 +241,10 @@ top:
 
 define void @global_ref() {
 ; CHECK-LABEL: @global_ref
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load {} addrspace(10)*, {} addrspace(10)** getelementptr ({} addrspace(10)*, {} addrspace(10)** inttoptr (i64 140540744325952 to {} addrspace(10)**), i64 1)
-; TYPED: store {} addrspace(10)* %loaded, {} addrspace(10)**
 ; OPAQUE: store ptr addrspace(10) %loaded, ptr
     call void @one_arg_boxed({} addrspace(10)* %loaded)
     ret void
@@ -286,13 +252,11 @@ define void @global_ref() {
 
 define {} addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) {
 ; CHECK-LABEL: @no_redundant_rerooting
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
 ; CHECK-NEXT: call void @jl_safepoint()
     call void @jl_safepoint()
@@ -313,13 +277,11 @@ declare void @llvm.memcpy.p064.p10i8.i64(i64*, i8 addrspace(10)*, i64, i32, i1)
 
 define void @memcpy_use(i64 %a, i64 *%aptr) {
 ; CHECK-LABEL: @memcpy_use
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %acast = bitcast {} addrspace(10)* %aboxed to i8 addrspace(10)*
@@ -332,23 +294,19 @@ declare void @llvm.julia.gc_preserve_end(token)
 
 define void @gc_preserve(i64 %a) {
 ; CHECK-LABEL: @gc_preserve
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %aboxed)
     %aboxed2 = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed2
 ; OPAQUE: store ptr addrspace(10) %aboxed2
     call void @jl_safepoint()
     call void @llvm.julia.gc_preserve_end(token %tok)
     %aboxed3 = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed3
 ; OPAQUE: store ptr addrspace(10) %aboxed3
     call void @jl_safepoint()
     call void @one_arg_boxed({} addrspace(10)* %aboxed2)
@@ -358,24 +316,11 @@ top:
 
 define void @gc_preserve_vec([2 x <2 x {} addrspace(10)*>] addrspace(11)* nocapture nonnull readonly dereferenceable(16)) {
 ; CHECK-LABEL: @gc_preserve_vec
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 6
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 6
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %v = load [2 x <2 x {} addrspace(10)*>], [2 x <2 x {} addrspace(10)*>] addrspace(11)* %0, align 8
-; TYPED-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
-; TYPED-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
-; TYPED-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
-; TYPED-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
-; TYPED-DAG: [[V11:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT11]], i32 0
-; TYPED-DAG: [[V12:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT12]], i32 1
-; TYPED-DAG: [[V21:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT21]], i32 0
-; TYPED-DAG: [[V22:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT22]], i32 1
-; TYPED-DAG: store {} addrspace(10)* [[V11]]
-; TYPED-DAG: store {} addrspace(10)* [[V12]]
-; TYPED-DAG: store {} addrspace(10)* [[V21]]
-; TYPED-DAG: store {} addrspace(10)* [[V22]]
 
 ; OPAQUE-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0
 ; OPAQUE-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0
@@ -428,7 +373,6 @@ declare {} addrspace(10) *@alloc()
 
 define {} addrspace(10)* @vec_loadobj() {
 ; CHECK-LABEL: @vec_loadobj
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
   %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
@@ -443,7 +387,6 @@ define {} addrspace(10)* @vec_loadobj() {
 
 define {} addrspace(10)* @vec_gep() {
 ; CHECK-LABEL: @vec_gep
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
   %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
@@ -459,7 +402,6 @@ define {} addrspace(10)* @vec_gep() {
 declare i1 @check_property({} addrspace(10)* %val)
 define void @loopyness(i1 %cond1, {} addrspace(10) *%arg) {
 ; CHECK-LABEL: @loopyness
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -473,8 +415,6 @@ header:
 a:
 ; This needs a store
 ; CHECK-LABEL: a:
-; TYPED:  [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP1]]
 
 ; OPAQUE:  [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP1]]
@@ -483,8 +423,6 @@ a:
 
 latch:
 ; This as well in case we went the other path
-; TYPED:  [[GEP2:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
-; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP2]]
 
 ; OPAQUE:  [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]]
 ; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP2]]
@@ -498,7 +436,6 @@ exit:
 
 define {} addrspace(10)* @phi_union(i1 %cond) {
 ; CHECK-LABEL: @phi_union
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -524,7 +461,6 @@ join:
 
 define {} addrspace(10)* @select_union(i1 %cond) {
 ; CHECK-LABEL: @select_union
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -541,7 +477,6 @@ top:
 
 define i8 @simple_arrayptr() {
 ; CHECK-LABEL: @simple_arrayptr
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
    %pgcstack = call {}*** @julia.get_pgcstack()
@@ -559,7 +494,6 @@ top:
 
 define {} addrspace(10)* @vecstoreload(<2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecstoreload
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -574,7 +508,6 @@ top:
 
 define void @vecphi(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecphi
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -601,7 +534,6 @@ common:
 
 define i8 @phi_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @phi_arrayptr
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -641,7 +573,6 @@ common:
 
 define void @vecselect(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -659,14 +590,12 @@ top:
 
 define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
 ; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}}
     %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed
     call void @jl_safepoint()
@@ -679,14 +608,12 @@ define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 
 define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecvecselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
 ; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}}
     %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed
     call void @jl_safepoint()
@@ -699,7 +626,6 @@ define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 
 define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 ; CHECK-LABEL: @vecscalarselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -707,7 +633,6 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
     %avec = getelementptr i64, i64 addrspace(12)*  %adecayed, <2 x i32> zeroinitializer
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %aboxed
 ; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %aboxed
     %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec
     call void @jl_safepoint()
@@ -720,7 +645,6 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 
 define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 ; CHECK-LABEL: @scalarvecselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -728,7 +652,6 @@ define void @scalarvecselect_lift(i1 %cond, i64 %a) {
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
     %avec = getelementptr i64, i64 addrspace(12)*  %adecayed, <2 x i32> zeroinitializer
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %aboxed
 ; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %aboxed
     %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec
     call void @jl_safepoint()
@@ -741,7 +664,6 @@ define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 
 define i8 @select_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @select_arrayptr
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -769,11 +691,8 @@ top:
 
 define i8 @vector_arrayptrs() {
 ; CHECK-LABEL: @vector_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]]
@@ -795,12 +714,8 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8 (<2 x i8 ad
 
 define i8 @masked_arrayptrs() {
 ; CHECK-LABEL: @masked_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8(<2 x i8 addrspace(13)*> addrspace(11)* %arrayptrptr, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.load.v2p13.p11(ptr addrspace(11) %arrayptrptr, i32 16, <2 x i1> <i1 true, i1 false>, <2 x ptr addrspace(13)> zeroinitializer)
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
@@ -823,12 +738,8 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8 (<2 x i8
 
 define i8 @gather_arrayptrs() {
 ; CHECK-LABEL: @gather_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 false>, <2 x ptr addrspace(13)> zeroinitializer)
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
@@ -850,14 +761,10 @@ top:
 
 define i8 @gather_arrayptrs_alltrue() {
 ; CHECK-LABEL: @gather_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 true>, <2 x i8 addrspace(13)*> zeroinitializer)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
-; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 true>, <2 x ptr addrspace(13)> zeroinitializer)
+; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> {{(<i1 true, i1 true>|splat \(i1 true\))}}, <2 x ptr addrspace(13)> zeroinitializer)
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]]
 ;
@@ -877,11 +784,8 @@ top:
 
 define i8 @lost_select_decayed(i1 %arg1) {
 ; CHECK-LABEL: @lost_select_decayed
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE: store ptr addrspace(10) [[SOMETHING:%.*]], ptr [[GEP0]]
diff --git a/test/llvmpasses/image-codegen.jl b/test/llvmpasses/image-codegen.jl
index 8132dc4faa22a..d594c02a4392e 100644
--- a/test/llvmpasses/image-codegen.jl
+++ b/test/llvmpasses/image-codegen.jl
@@ -2,7 +2,7 @@
 # RUN: export JULIA_LLVM_ARGS="--print-before=loop-vectorize --print-module-scope"
 # RUN: rm -rf %t
 # RUN: mkdir %t
-# RUN: julia --image-codegen --startup-file=no %s 2> %t/output.txt
+# RUN: julia --image-codegen -t1,0 --startup-file=no %s 2> %t/output.txt
 # RUN: FileCheck %s < %t/output.txt
 
 # COM: checks that global variables compiled in imaging codegen
@@ -13,8 +13,8 @@
 # CHECK-NOT: internal global
 # CHECK-NOT: private global
 # CHECK: jl_global
-# CHECK-SAME: = global
-# CHECK: julia_f_
+# COM: we emit both declarations and definitions, so we may see either style in the IR
+# CHECK-SAME: = {{(external )?}}
 # CHECK-NOT: internal global
 # CHECK-NOT: private global
 
diff --git a/test/llvmpasses/julia-licm-fail.ll b/test/llvmpasses/julia-licm-fail.ll
index 464a96f1413d9..76ce19af96e94 100644
--- a/test/llvmpasses/julia-licm-fail.ll
+++ b/test/llvmpasses/julia-licm-fail.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 ; COM: This file contains functions that should not trigger allocations to be hoisted out of loops
 
@@ -25,10 +21,8 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
 ; OPAQUE-NEXT: %ignore = call ptr addrspace(10) @escape(ptr addrspace(10) %alloc)
   %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
   br i1 %ret, label %return, label %loop
@@ -51,13 +45,10 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
 ; OPAQUE-NEXT: %cast = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11)
   %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
-; TYPED-NEXT: %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
 ; OPAQUE-NEXT: %ptr = call nonnull ptr @julia.pointer_from_objref(ptr addrspace(11) %cast)
   %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
   br i1 %ret, label %return, label %loop
@@ -82,7 +73,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-licm-memoryssa.ll b/test/llvmpasses/julia-licm-memoryssa.ll
index e1684c7577578..4f25a99f7e615 100644
--- a/test/llvmpasses/julia-licm-memoryssa.ll
+++ b/test/llvmpasses/julia-licm-memoryssa.ll
@@ -1,8 +1,6 @@
 ; COM: NewPM-only test, tests that memoryssa is preserved correctly
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -116,8 +114,6 @@ top:
 preheader:
 ; CHECK-NEXT: [[ALLOC:[0-9]+]] = MemoryDef([[PGCSTACK]])
 
-; TYPED-NEXT: %alloc = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 0, {} addrspace(10)* @tag)
-; TYPED-NEXT: %[[BCAST:.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)*
 
 ; OPAQUE-NEXT: %alloc = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %current_task, i64 0, ptr addrspace(10) @tag)
 
diff --git a/test/llvmpasses/julia-licm-missed.ll b/test/llvmpasses/julia-licm-missed.ll
index 941b2d072a1cc..37a547c9861b7 100644
--- a/test/llvmpasses/julia-licm-missed.ll
+++ b/test/llvmpasses/julia-licm-missed.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 ; COM: This file contains functions that currently do not trigger allocations to be hoisted out of loops
 ; COM: i.e. they are missed optimizations
@@ -29,16 +25,12 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
 ; OPAQUE-NEXT: %derived = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11)
   %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
-; TYPED-NEXT: %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
 ; OPAQUE-NEXT: %ptr = bitcast ptr addrspace(11) %derived to ptr addrspace(11)
   %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
-; TYPED-NEXT: store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
 ; OPAQUE-NEXT: store ptr addrspace(10) %obj, ptr addrspace(11) %ptr, align 8
   store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
   br i1 %ret, label %return, label %loop
@@ -63,13 +55,11 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
   br label %other
 ; CHECK: other:
 other:
-; TYPED-NEXT: %phi = phi {} addrspace(10)* [ %alloc, %loop ]
 ; OPAQUE-NEXT: %phi = phi ptr addrspace(10) [ %alloc, %loop ]
   %phi = phi {} addrspace(10)* [ %alloc, %loop ]
   br i1 %ret, label %return, label %loop
@@ -96,7 +86,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll
index 8bedc5db75d96..732b62788f13c 100644
--- a/test/llvmpasses/julia-licm.ll
+++ b/test/llvmpasses/julia-licm.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -100,9 +96,6 @@ L3:                                               ; preds = %L3.loopexit, %top
 L4:                                               ; preds = %top
   %current_task112 = getelementptr inbounds {}**, {}*** %1, i64 -12
   %current_task1 = bitcast {}*** %current_task112 to {}**
-  ; TYPED: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag)
-  ; TYPED-NEXT: %4 = bitcast {} addrspace(10)* %3 to i8 addrspace(10)*
-  ; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} %4, i8 0, i64 8, i1 false)
 
   ; OPAQUE: %3 = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task1, i64 8, ptr addrspace(10) @tag)
   ; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %3, i8 0, i64 8, i1 false)
@@ -112,8 +105,6 @@ L4:                                               ; preds = %top
 
 L22:                                              ; preds = %L4, %L22
   %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
-  ; TYPED: %value_phi5 = phi i64 [ 1, %L4 ], [ %6, %L22 ]
-  ; TYPED-NEXT %5 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
 
   ; OPAQUE: %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
   ; OPAQUE-NEXT %4 = bitcast ptr addrspace(10) %3 to ptr addrspace(10)
@@ -135,9 +126,6 @@ top:
   br label %preheader
 ; CHECK: preheader:
 preheader:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: [[casted:%.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)*
-; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} [[casted]], i8 0, i64 8, i1 false)
 
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
 ; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %alloc, i8 0, i64 8, i1 false)
@@ -164,7 +152,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-simdloop-memoryssa.ll b/test/llvmpasses/julia-simdloop-memoryssa.ll
new file mode 100644
index 0000000000000..b99fb4f57db20
--- /dev/null
+++ b/test/llvmpasses/julia-simdloop-memoryssa.ll
@@ -0,0 +1,53 @@
+; COM: NewPM-only test, tests that memoryssa is preserved correctly
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK
+
+; CHECK-LABEL: MemorySSA for function: simd_test
+; CHECK-LABEL: @simd_test(
+define void @simd_test(double *%a, double *%b) {
+; CHECK: top:
+top:
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({top,liveOnEntry},{loop,[[MSSA_USE:[0-9]+]]})
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %aptr = getelementptr double, double *%a, i64 %i
+  %bptr = getelementptr double, double *%b, i64 %i
+; CHECK: MemoryUse([[MPHI]])
+; CHECK: llvm.mem.parallel_loop_access
+  %aval = load double, double *%aptr
+; CHECK: MemoryUse([[MPHI]])
+  %bval = load double, double *%aptr
+  %cval = fadd double %aval, %bval
+; CHECK: [[MSSA_USE]] = MemoryDef([[MPHI]])
+  store double %cval, double *%bptr
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !1
+loopdone:
+  ret void
+}
+
+; CHECK-LABEL: MemorySSA for function: simd_test_sub2
+; CHECK-LABEL: @simd_test_sub2(
+define double @simd_test_sub2(double *%a) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
+  %aptr = getelementptr double, double *%a, i64 %i
+; CHECK: MemoryUse(liveOnEntry) 
+  %aval = load double, double *%aptr
+  %nextv = fsub double %v, %aval
+; CHECK: fsub reassoc contract double %v, %aval
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !0
+loopdone:
+  ret double %nextv
+}
+
+!0 = distinct !{!0, !"julia.simdloop"}
+!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"}
\ No newline at end of file
diff --git a/test/llvmpasses/julia-simdloop.ll b/test/llvmpasses/julia-simdloop.ll
new file mode 100644
index 0000000000000..9a23a2826da70
--- /dev/null
+++ b/test/llvmpasses/julia-simdloop.ll
@@ -0,0 +1,133 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s
+
+; CHECK-LABEL: @simd_test(
+define void @simd_test(ptr %a, ptr %b) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %aptr = getelementptr double, ptr %a, i64 %i
+  %bptr = getelementptr double, ptr %b, i64 %i
+; CHECK: llvm.mem.parallel_loop_access
+  %aval = load double, ptr %aptr
+  %bval = load double, ptr %aptr
+  %cval = fadd double %aval, %bval
+  store double %cval, ptr %bptr
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !1
+loopdone:
+  ret void
+}
+
+; CHECK-LABEL: @simd_test_sub(
+define double @simd_test_sub(ptr %a) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
+  %aptr = getelementptr double, ptr %a, i64 %i
+; CHECK: llvm.mem.parallel_loop_access
+  %aval = load double, ptr %aptr
+  %nextv = fsub double %v, %aval
+; CHECK: fsub reassoc contract double %v, %aval
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !1
+loopdone:
+  ret double %nextv
+}
+
+; CHECK-LABEL: @simd_test_sub2(
+define double @simd_test_sub2(ptr %a) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
+  %aptr = getelementptr double, ptr %a, i64 %i
+  %aval = load double, ptr %aptr
+  %nextv = fsub double %v, %aval
+; CHECK: fsub reassoc contract double %v, %aval
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !0
+loopdone:
+  ret double %nextv
+}
+
+; CHECK-LABEL: @simd_test_sub4(
+define double @simd_test_sub4(ptr %a) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
+  %aptr = getelementptr double, double *%a, i64 %i
+  %aval = load double, double *%aptr
+  %nextv2 = fmul double %aval, %aval
+  ; CHECK: fmul contract double %aval, %aval
+  %nextv = fsub double %v, %nextv2
+; CHECK: fsub reassoc contract double %v, %nextv2
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !0
+loopdone:
+  ret double %nextv
+}
+
+; Tests if we correctly pass through other metadata
+; CHECK-LABEL: @disabled(
+define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %N
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 48
+; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
+
+for.end:                                          ; preds = %for.body
+  %1 = load i32, i32* %a, align 4
+  ret i32 %1
+}
+
+; Check that we don't add contract to non loop things
+; CHECK-LABEL: @dont_add_no_loop(
+define double @dont_add_no_loop(ptr nocapture noundef nonnull readonly align 8 dereferenceable(72) %"a::Tuple", ptr nocapture noundef nonnull readonly align 8 dereferenceable(24) %"b::Tuple") #0 {
+top:
+   %"a::Tuple[9]_ptr" = getelementptr inbounds i8, ptr %"a::Tuple", i64 64
+   %"b::Tuple[3]_ptr" = getelementptr inbounds i8, ptr %"b::Tuple", i64 16
+   %"a::Tuple[6]_ptr" = getelementptr inbounds i8, ptr %"a::Tuple", i64 40
+   %"b::Tuple[2]_ptr" = getelementptr inbounds i8, ptr %"b::Tuple", i64 8
+   %"a::Tuple[3]_ptr" = getelementptr inbounds i8, ptr %"a::Tuple", i64 16
+   %"a::Tuple[3]_ptr.unbox" = load double, ptr %"a::Tuple[3]_ptr", align 8
+   %"b::Tuple.unbox" = load double, ptr %"b::Tuple", align 8
+   %0 = fmul double %"a::Tuple[3]_ptr.unbox", %"b::Tuple.unbox"
+; CHECK: fmul double %
+   %"a::Tuple[6]_ptr.unbox" = load double, ptr %"a::Tuple[6]_ptr", align 8
+   %"b::Tuple[2]_ptr.unbox" = load double, ptr %"b::Tuple[2]_ptr", align 8
+   %1 = fmul contract double %"a::Tuple[6]_ptr.unbox", %"b::Tuple[2]_ptr.unbox"
+   %2 = fadd contract double %0, %1
+   %"a::Tuple[9]_ptr.unbox" = load double, ptr %"a::Tuple[9]_ptr", align 8
+   %"b::Tuple[3]_ptr.unbox" = load double, ptr %"b::Tuple[3]_ptr", align 8
+   %3 = fmul contract double %"a::Tuple[9]_ptr.unbox", %"b::Tuple[3]_ptr.unbox"
+   %4 = fadd contract double %2, %3
+   ret double %4
+}
+
+
+!0 = distinct !{!0, !"julia.simdloop"}
+!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"}
+!2 = distinct !{!2, !"julia.simdloop", !"julia.ivdep", !3}
+!3 = !{!"llvm.loop.vectorize.disable", i1 0}
diff --git a/test/llvmpasses/late-lower-gc-addrspaces.ll b/test/llvmpasses/late-lower-gc-addrspaces.ll
index 9849f432fb9a7..9c041664a9682 100644
--- a/test/llvmpasses/late-lower-gc-addrspaces.ll
+++ b/test/llvmpasses/late-lower-gc-addrspaces.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -22,39 +18,29 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  %pgcstack = call {}*** @julia.get_pgcstack()
 
-; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
+; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK:  %pgcstack = call ptr @julia.get_pgcstack()
     %pgcstack = call {}*** @julia.get_pgcstack()
-; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
-; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64
 
-; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
-; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
+; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
 
-; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
+; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
-; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]])
-; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
 
-; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
-; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
 
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
-; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
-; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
+; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
     ret void
 }
 
@@ -64,25 +50,15 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8)
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: ret {} addrspace(10)* %v
-; OPAQUE-NEXT: ret ptr addrspace(10) %v
+; CHECK-NEXT: ret ptr addrspace(10) %v
     ret {} addrspace(10)* %v
 }
 
@@ -97,34 +73,21 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8)
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
-; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
+; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
-; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
+; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7
-; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
+; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
diff --git a/test/llvmpasses/late-lower-gc-sret.ll b/test/llvmpasses/late-lower-gc-sret.ll
new file mode 100644
index 0000000000000..b8593f691bb6f
--- /dev/null
+++ b/test/llvmpasses/late-lower-gc-sret.ll
@@ -0,0 +1,151 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
+
+declare ptr @julia.get_pgcstack()
+
+declare swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]), ptr nonnull swiftself, ptr addrspace(10) nonnull)
+
+define hidden swiftcc nonnull ptr addrspace(10) @sret_select(ptr nonnull swiftself "gcstack" %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) {
+  ; CHECK-LABEL: @sret_select
+  ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+  ; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
+  ; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6)
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %3 = alloca [3 x i64], align 8
+  %4 = alloca [3 x i64], align 8
+  %5 = select i1 %unpredictable, ptr %3, ptr %4
+  call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %5, ptr nonnull swiftself "gcstack" %0, ptr addrspace(10) nonnull %1)
+  ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
+  ret ptr addrspace(10) %1
+}
+
+define hidden swiftcc nonnull ptr addrspace(10) @sret_phi(ptr nonnull swiftself "gcstack" %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) {
+top:
+  ; CHECK-LABEL: @sret_phi
+  ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+  ; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
+  ; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6)
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %2 = alloca [3 x i64], align 8
+  %3 = alloca [3 x i64], align 8
+  br i1 %unpredictable, label %true, label %false
+
+true:                                             ; preds = %top
+  br label %ret
+
+false:                                            ; preds = %top
+  br label %ret
+
+ret:                                              ; preds = %false, %true
+  %4 = phi ptr [ %2, %true ], [ %3, %false ]
+  call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %4, ptr nonnull swiftself "gcstack" %0, ptr addrspace(10) nonnull %1)
+  ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
+  ret ptr addrspace(10) %1
+}
+
+declare swiftcc void @sret_call_gc(ptr noalias nocapture noundef sret({ ptr addrspace(10), i64, i64 }), ptr noalias nocapture noundef, ptr nonnull swiftself)
+
+define hidden swiftcc void @sret_gc_root_phi(ptr nonnull swiftself "gcstack" %0, i1 %unpredictable) {
+top:
+  ; CHECK-LABEL: @sret_gc_root_phi
+  ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+  ; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
+  ; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+  ; CHECK: alloca [3 x i64], align 8
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %1 = alloca [3 x i64], align 8
+  %2 = alloca ptr addrspace(10), align 8
+  %3 = alloca ptr addrspace(10), align 8
+  store i64 0, ptr %2, align 8
+  store i64 0, ptr %3, align 8
+  br i1 %unpredictable, label %true, label %false
+
+true:                                             ; preds = %top
+  br label %ret
+
+false:                                            ; preds = %top
+  br label %ret
+
+ret:                                              ; preds = %false, %true
+  %4 = phi ptr [ %2, %true ], [ %3, %false ]
+  call swiftcc void @sret_call_gc(ptr noalias nocapture noundef sret({ ptr addrspace(10), i64, i64 }) %1, ptr noalias nocapture noundef %4, ptr nonnull swiftself "gcstack" %0)
+   ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
+  ret void
+}
+
+
+define hidden swiftcc void @sret_gc_root_phi_select(ptr nonnull swiftself "gcstack" %0, i1 %unpredictable, i1 %unpredictable2) {
+top:
+  ; CHECK-LABEL: @sret_gc_root_phi_select
+  ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 3)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 2)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+  ; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
+  ; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 3)
+  ; CHECK: alloca [3 x i64], align 8
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %1 = alloca [3 x i64], align 8
+  %2 = alloca ptr addrspace(10), align 8
+  %3 = alloca ptr addrspace(10), align 8
+  %4 = alloca ptr addrspace(10), align 8
+  store i64 0, ptr %2, align 8
+  store i64 0, ptr %3, align 8
+  store i64 0, ptr %4, align 8
+  br i1 %unpredictable, label %true, label %false
+
+true:                                             ; preds = %top
+  br label %ret
+
+false:                                            ; preds = %top
+  br label %ret
+
+ret:                                              ; preds = %false, %true
+  %5 = phi ptr [ %2, %true ], [ %3, %false ]
+  %6 = select i1 %unpredictable2, ptr %4, ptr %5
+  call swiftcc void @sret_call_gc(ptr noalias nocapture noundef sret({ ptr addrspace(10), i64, i64 }) %1, ptr noalias nocapture noundef %6, ptr nonnull swiftself "gcstack" %0)
+   ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
+  ret void
+}
+
+define hidden swiftcc void @sret_gc_root_select_phi(ptr nonnull swiftself "gcstack" %0, i1 %unpredictable, i1 %unpredictable2) {
+top:
+  ; CHECK-LABEL: @sret_gc_root_select_phi
+  ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 3)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 2)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
+  ; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+  ; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
+  ; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 3)
+  ; CHECK: alloca [3 x i64], align 8
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %1 = alloca [3 x i64], align 8
+  %2 = alloca ptr addrspace(10), align 8
+  %3 = alloca ptr addrspace(10), align 8
+  %4 = alloca ptr addrspace(10), align 8
+  store i64 0, ptr %2, align 8
+  store i64 0, ptr %3, align 8
+  store i64 0, ptr %4, align 8
+  %5 = select i1 %unpredictable2, ptr %3, ptr %4
+  br i1 %unpredictable, label %true, label %false
+
+true:                                             ; preds = %top
+  br label %ret
+
+false:                                            ; preds = %top
+  br label %ret
+
+ret:                                              ; preds = %false, %true
+  %6 = phi ptr [ %2, %true ], [ %5, %false ]
+
+  call swiftcc void @sret_call_gc(ptr noalias nocapture noundef sret({ ptr addrspace(10), i64, i64 }) %1, ptr noalias nocapture noundef %6, ptr nonnull swiftself "gcstack" %0)
+   ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
+  ret void
+}
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index 36e581993c176..346e19e537819 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s -check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s -check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -19,39 +15,29 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  %pgcstack = call {}*** @julia.get_pgcstack()
 
-; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
+; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK:  %pgcstack = call ptr @julia.get_pgcstack()
     %pgcstack = call {}*** @julia.get_pgcstack()
-; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
-; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64
 
-; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
-; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
+; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
 
-; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
+; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
-; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]])
-; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
 
-; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
-; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
 
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
-; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
-; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
+; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
     ret void
 }
 
@@ -61,25 +47,15 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8)
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: ret {} addrspace(10)* %v
-; OPAQUE-NEXT: ret ptr addrspace(10) %v
+; CHECK-NEXT: ret ptr addrspace(10) %v
     ret {} addrspace(10)* %v
 }
 
@@ -94,39 +70,40 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8)
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
-; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
+; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
-; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
+; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7
-; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
+; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
 }
 
+; Confirm that `invariant.load` on other loads survive
+define void @gc_keep_invariant(float addrspace(1)* %0) {
+top:
+; CHECK-LABEL: @gc_keep_invariant
+    %pgcstack = call {}*** @julia.get_pgcstack()
+    %1 = bitcast {}*** %pgcstack to {}**
+    %current_task = getelementptr inbounds {}*, {}** %1, i64 -12
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %1, i64 -12
+    %2 = load float, ptr addrspace(1) %0, align 4, !invariant.load !1
+; CHECK-NEXT: %2 = load float, ptr addrspace(1) %0, align 4, !invariant.load
+    ret void
+}
+
 define i32 @callee_root({} addrspace(10)* %v0, {} addrspace(10)* %v1) {
 top:
 ; CHECK-LABEL: @callee_root
@@ -187,32 +164,54 @@ define {} addrspace(10)* @gclift_switch({} addrspace(13)* addrspace(10)* %input,
   ret {} addrspace(10)* %ret
 }
 
+; Shouldn't hang
+define void @vector_insert(<4 x {} addrspace(10)* > %0, <2 x {} addrspace(10)* > %1) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %2 = call <4 x {} addrspace(10)*> @llvm.vector.insert.v4p10.v2p10(<4 x {} addrspace(10)*> %0, <2 x {} addrspace(10)*> %1, i64 2)
+  ret void
+}
+
+define void @vector_extract(<4 x {} addrspace(10)* > %0, <2 x {} addrspace(10)* > %1) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %2 = call <2 x {} addrspace(10)*> @llvm.vector.extract.v2p10.v4p10(<4 x {} addrspace(10)* > %0, i64 2)
+  ret void
+}
+
 define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) {
   %v2 = call {}*** @julia.get_pgcstack()
   %e0 = extractvalue [2 x {} addrspace(10)* addrspace(11)*] %ar, 0
   %l0 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %e0
   %e1 = extractvalue [2 x {} addrspace(10)* addrspace(11)*] %ar, 1
   %l1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %e1
-  %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1) 
+  %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1)
   ret void
 }
 
 ; CHECK-LABEL: @decayar
-; TYPED:  %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  %1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
-; TYPED:  store {} addrspace(10)* %l0, {} addrspace(10)** %1, align 8
-; TYPED:  %2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
-; TYPED: store {} addrspace(10)* %l1, {} addrspace(10)** %2, align 8
-; TYPED: %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1)
-; TYPED: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
-
-; OPAQUE:  %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
-; OPAQUE:  store ptr addrspace(10) %l0, ptr %1, align 8
-; OPAQUE:  %2 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
-; OPAQUE: store ptr addrspace(10) %l1, ptr %2, align 8
-; OPAQUE: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1)
-; OPAQUE: call void @julia.pop_gc_frame(ptr %gcframe)
+
+; CHECK:  %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK: [[gc_slot_addr_:%.*]]1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
+; CHECK:  store ptr addrspace(10) %l0, ptr [[gc_slot_addr_:%.*]], align 8
+; CHECK:  [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+; CHECK: store ptr addrspace(10) %l1, ptr [[gc_slot_addr_:%.*]], align 8
+; CHECK: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1)
+; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
+
+define swiftcc ptr addrspace(10) @insert_element(ptr swiftself "gcstack" %0) {
+; CHECK-LABEL: @insert_element
+  %2 = alloca [10 x i64], i32 1, align 8
+; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 10)
+; CHECK: [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 10)
+  call void null(ptr sret([2 x [5 x ptr addrspace(10)]]) %2, ptr null, ptr addrspace(11) null, ptr null)
+  %4 = insertelement <4 x ptr> zeroinitializer, ptr %2, i32 0
+; CHECK: [[gc_slot_addr_:%.*]] = insertelement <4 x ptr> zeroinitializer, ptr [[gc_slot_addr_:%.*]], i32 0
+; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
+  ret ptr addrspace(10) null
+}
+
 
 !0 = !{i64 0, i64 23}
 !1 = !{!1}
@@ -237,3 +236,4 @@ define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) {
 ; CHECK-NEXT: !10 = distinct !{!10}
 ; CHECK-NEXT: !11 = !{!12, !12, i64 0}
 ; CHECK-NEXT: !12 = !{!"jtbaa_const", !3}
+
diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl
index 3e0df7a8885a7..294c657196142 100644
--- a/test/llvmpasses/llvmcall.jl
+++ b/test/llvmpasses/llvmcall.jl
@@ -1,11 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no %s %t
-# RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,TYPED
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
+# RUN: export JULIA_LLVM_ARGS=""
 
 # RUN: julia --startup-file=no %s %t
 # RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,OPAQUE
@@ -17,7 +12,7 @@ struct Foo
     y::Int32
 end
 
-@generated foo(x)=:(ccall("extern foo", llvmcall, $x, ($x,), x))
+@generated foo(x) = :(ccall("extern foo", llvmcall, $x, ($x,), x))
 bar(x) = ntuple(i -> VecElement{Float16}(x[i]), 2)
 
 # CHECK: define
@@ -48,7 +43,7 @@ emit(foo, Float16)
 # CHECK: ret [2 x half]
 # CHECK-NOT: define
 # CHECK: }
-emit(foo, NTuple{2, Float16})
+emit(foo, NTuple{2,Float16})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
@@ -67,7 +62,7 @@ emit(foo, NTuple{2, Float16})
 # CHECK: ret <2 x half>
 # CHECK-NOT: define
 # CHECK: }
-emit(foo, NTuple{2, VecElement{Float16}})
+emit(foo, NTuple{2,VecElement{Float16}})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
@@ -89,7 +84,7 @@ emit(foo, NTuple{2, VecElement{Float16}})
 # OPAQUE: ret ptr addrspace(3)
 # CHECK-NOT: define
 # CHECK: }
-emit(foo, Core.LLVMPtr{Float32, 3})
+emit(foo, Core.LLVMPtr{Float32,3})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
@@ -127,7 +122,7 @@ emit(foo, Foo)
 # CHECK: ret <2 x half>
 # CHECK-NOT: define
 # CHECK: }
-emit(bar, NTuple{2, Float16})
+emit(bar, NTuple{2,Float16})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl
index b9b388c73d0c5..759ff09499deb 100644
--- a/test/llvmpasses/loopinfo.jl
+++ b/test/llvmpasses/loopinfo.jl
@@ -2,8 +2,7 @@
 
 # RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s
-# RUN: cat %t/module.ll | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S - | FileCheck %s -check-prefix=LOWER
-# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S - | FileCheck %s -check-prefix=LOWER
+# RUN: cat %t/module.ll | opt --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S - | FileCheck %s -check-prefix=LOWER
 # RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s -check-prefix=FINAL
 
@@ -27,12 +26,11 @@ function simdf(X)
     acc = zero(eltype(X))
     @simd for x in X
         acc += x
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO:![0-9]+]]
-# LOWER-NOT: llvm.mem.parallel_loop_access
-# LOWER: fadd reassoc contract double
-# LOWER-NOT: call void @julia.loopinfo_marker()
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
-# FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double>
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
+        # LOWER-NOT: llvm.mem.parallel_loop_access
+        # LOWER: fadd reassoc contract double
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
+        # FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double>
     end
     acc
 end
@@ -43,11 +41,10 @@ function simdf2(X)
     acc = zero(eltype(X))
     @simd ivdep for x in X
         acc += x
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO2:![0-9]+]]
-# LOWER: llvm.mem.parallel_loop_access
-# LOWER-NOT: call void @julia.loopinfo_marker()
-# LOWER: fadd reassoc contract double
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
+        # LOWER: llvm.mem.parallel_loop_access
+        # LOWER: fadd reassoc contract double
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
     end
     acc
 end
@@ -61,14 +58,13 @@ end
     for i in 1:N
         iteration(i)
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 3)))
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]]
-# LOWER-NOT: call void @julia.loopinfo_marker()
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
-# FINAL: br
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: br
     end
 end
 
@@ -83,24 +79,23 @@ end
     for i in 1:10
         for j in J
             1 <= j <= I && continue
-            @show (i,j)
+            @show (i, j)
             iteration(i)
         end
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"),)))
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]]
-# LOWER-NOT: call void @julia.loopinfo_marker()
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
     end
 end
 
@@ -109,21 +104,21 @@ end
     for i in 1:10
         for j in J
             1 <= j <= I && continue
-            @show (i,j)
+            @show (i, j)
             iteration(i)
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
+            # FINAL: call {{(swiftcc )?}}void @j_iteration
+            # FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
         end
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),)))
     end
 end
 
 ## Check all the MD nodes
-# CHECK: [[LOOPINFO]] = !{!"julia.simdloop"}
-# CHECK: [[LOOPINFO2]] = !{!"julia.simdloop", !"julia.ivdep"}
-# CHECK: [[LOOPINFO3]] = !{[[LOOPUNROLL:![0-9]+]]}
+# CHECK: [[LOOPID]] = distinct !{[[LOOPID]], !"julia.simdloop"}
+# CHECK: [[LOOPID2]] = distinct !{[[LOOPID2]], !"julia.simdloop", !"julia.ivdep"}
+# CHECK: [[LOOPID3]] = distinct !{[[LOOPID3]], [[LOOPUNROLL:![0-9]+]]}
 # CHECK: [[LOOPUNROLL]] = !{!"llvm.loop.unroll.count", i64 3}
-# CHECK: [[LOOPINFO4]] = !{[[LOOPUNROLL2:![0-9]+]]}
+# CHECK: [[LOOPID4]] = distinct !{[[LOOPID4]], [[LOOPUNROLL2:![0-9]+]]}
 # CHECK: [[LOOPUNROLL2]] = !{!"llvm.loop.unroll.full"}
 # LOWER: [[LOOPID]] = distinct !{[[LOOPID]]}
 # LOWER: [[LOOPID2]] = distinct !{[[LOOPID2]]}
diff --git a/test/llvmpasses/lower-handlers-addrspaces.ll b/test/llvmpasses/lower-handlers-addrspaces.ll
deleted file mode 100644
index 744bf09082646..0000000000000
--- a/test/llvmpasses/lower-handlers-addrspaces.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; This file is a part of Julia. License is MIT: https://julialang.org/license
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
-
-target triple = "amdgcn-amd-amdhsa"
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
-
-attributes #1 = { returns_twice }
-declare i32 @julia.except_enter() #1
-declare void @ijl_pop_handler(i32)
-declare i8**** @julia.ptls_states()
-declare i8**** @julia.get_pgcstack()
-
-define void @simple() {
-top:
-    %pgcstack = call i8**** @julia.get_pgcstack()
-; CHECK: call void @llvm.lifetime.start
-; CHECK: call void @ijl_enter_handler
-; CHECK: setjmp
-    %r = call i32 @julia.except_enter()
-    %cmp = icmp eq i32 %r, 0
-    br i1 %cmp, label %try, label %catch
-try:
-    br label %after
-catch:
-    br label %after
-after:
-    call void @ijl_pop_handler(i32 1)
-; CHECK: llvm.lifetime.end
-    ret void
-}
diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll
deleted file mode 100644
index 2f5dea6cf0892..0000000000000
--- a/test/llvmpasses/lower-handlers.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; This file is a part of Julia. License is MIT: https://julialang.org/license
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
-
-attributes #1 = { returns_twice }
-declare i32 @julia.except_enter() #1
-declare void @ijl_pop_handler(i32)
-declare i8**** @julia.ptls_states()
-declare i8**** @julia.get_pgcstack()
-
-define void @simple() {
-top:
-    %pgcstack = call i8**** @julia.get_pgcstack()
-; CHECK: call void @llvm.lifetime.start
-; CHECK: call void @ijl_enter_handler
-; CHECK: setjmp
-    %r = call i32 @julia.except_enter()
-    %cmp = icmp eq i32 %r, 0
-    br i1 %cmp, label %try, label %catch
-try:
-    br label %after
-catch:
-    br label %after
-after:
-    call void @ijl_pop_handler(i32 1)
-; CHECK: llvm.lifetime.end
-    ret void
-}
diff --git a/test/llvmpasses/memoryref-addrspace.jl b/test/llvmpasses/memoryref-addrspace.jl
new file mode 100644
index 0000000000000..4b12c47400b6f
--- /dev/null
+++ b/test/llvmpasses/memoryref-addrspace.jl
@@ -0,0 +1,21 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+# RUN: julia --startup-file=no --check-bounds=yes %s %t -O
+# RUN: cat %t/* | FileCheck %s
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# Test for GenericMemoryRef address space bug
+# Issue: stores incorrectly use addrspace(10) instead of addrspace(11)
+# in bounds checking code, causing LLVM assertion failures
+function bf(i, x)
+    x[i] *= x[i]
+    nothing
+end
+
+# CHECK-LABEL: @julia_bf
+# CHECK: oob:
+# CHECK: store ptr {{.*}}, ptr %"box::GenericMemoryRef"
+# CHECK-NOT: store {{.*}} addrspace(10) {{.*}}GenericMemoryRef
+# CHECK: call void @ijl_bounds_error_int
+
+emit(bf, Int, Vector{Float64})
diff --git a/test/llvmpasses/muladd.ll b/test/llvmpasses/muladd.ll
deleted file mode 100644
index afeb068317844..0000000000000
--- a/test/llvmpasses/muladd.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; This file is a part of Julia. License is MIT: https://julialang.org/license
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
-
-
-; CHECK-LABEL: @fast_muladd1
-define double @fast_muladd1(double %a, double %b, double %c) {
-top:
-; CHECK: {{contract|fmuladd}}
-  %v1 = fmul double %a, %b
-  %v2 = fadd fast double %v1, %c
-; CHECK: ret double
-  ret double %v2
-}
-
-; CHECK-LABEL: @fast_mulsub1
-define double @fast_mulsub1(double %a, double %b, double %c) {
-top:
-; CHECK: {{contract|fmuladd}}
-  %v1 = fmul double %a, %b
-  %v2 = fsub fast double %v1, %c
-; CHECK: ret double
-  ret double %v2
-}
-
-; CHECK-LABEL: @fast_mulsub_vec1
-define <2 x double> @fast_mulsub_vec1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-top:
-; CHECK: {{contract|fmuladd}}
-  %v1 = fmul <2 x double> %a, %b
-  %v2 = fsub fast <2 x double> %c, %v1
-; CHECK: ret <2 x double>
-  ret <2 x double> %v2
-}
-
-; COM: Should not mark fmul as contract when multiple uses of fmul exist
-; CHECK-LABEL: @slow_muladd1
-define double @slow_muladd1(double %a, double %b, double %c) {
-top:
-; CHECK: %v1 = fmul double %a, %b
-  %v1 = fmul double %a, %b
-; CHECK: %v2 = fadd fast double %v1, %c
-  %v2 = fadd fast double %v1, %c
-; CHECK: %v3 = fadd fast double %v1, %b
-  %v3 = fadd fast double %v1, %b
-; CHECK: %v4 = fadd fast double %v3, %v2
-  %v4 = fadd fast double %v3, %v2
-; CHECK: ret double %v4
-  ret double %v4
-}
-
-; COM: Should not mark fadd->fadd fast as contract
-; CHECK-LABEL: @slow_addadd1
-define double @slow_addadd1(double %a, double %b, double %c) {
-top:
-; CHECK: %v1 = fadd double %a, %b
-  %v1 = fadd double %a, %b
-; CHECK: %v2 = fadd fast double %v1, %c
-  %v2 = fadd fast double %v1, %c
-; CHECK: ret double %v2
-  ret double %v2
-}
diff --git a/test/llvmpasses/multiversioning-annotate-only.ll b/test/llvmpasses/multiversioning-annotate-only.ll
index 0109010f4c1a1..849cf57c78aa3 100644
--- a/test/llvmpasses/multiversioning-annotate-only.ll
+++ b/test/llvmpasses/multiversioning-annotate-only.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
 
 ; COM: This test checks that multiversioning correctly picks up on features that should trigger cloning
 ; COM: Note that for annotations alone, we don't need jl_fvars or jl_gvars
diff --git a/test/llvmpasses/multiversioning-clone-only.ll b/test/llvmpasses/multiversioning-clone-only.ll
index e37eefdc362f7..c4f5257a59988 100644
--- a/test/llvmpasses/multiversioning-clone-only.ll
+++ b/test/llvmpasses/multiversioning-clone-only.ll
@@ -1,38 +1,27 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
 
+; CHECK: @jl_gvar_base = hidden constant i64 0
+; CHECK: @jl_gvar_offsets = hidden constant [0 x i32] zeroinitializer
 ; CHECK: @jl_fvar_idxs = hidden constant [1 x i32] zeroinitializer
 ; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer
-; TYPED: @subtarget_cloned_gv = hidden global i64* null
 ; OPAQUE: @subtarget_cloned_gv = hidden global ptr null
-; TYPED: @subtarget_cloned.reloc_slot = hidden global i32 (i32)* null
-; OPAQUE: @subtarget_cloned.reloc_slot = hidden global ptr null
-; CHECK: @jl_fvar_offsets = hidden constant [2 x i32] [i32 1, i32 0]
-; CHECK: @jl_gvar_base = hidden constant i64 0
-; CHECK: @jl_gvar_offsets = hidden constant [1 x i32] zeroinitializer
+; OPAQUE: @subtarget_cloned.reloc_slot = hidden global ptr @subtarget_cloned.autoinit_trampoline
+; CHECK: @jl_fvar_count = hidden constant i64 1
+; OPAQUE: @jl_fvar_ptrs = hidden global [1 x ptr] [ptr @subtarget_cloned]
 ; CHECK: @jl_clone_slots = hidden constant [5 x i32]
-; CHECK-SAME: i32 2, i32 0, {{.*}} sub {{.*}}@subtarget_cloned.reloc_slot{{.*}}@jl_gvar_base
+; CHECK-SAME: i32 2, i32 0, {{.*}} sub {{.*}}@subtarget_cloned.reloc_slot{{.*}}@jl_clone_slots
 ; CHECK: @jl_clone_idxs = hidden constant [13 x i32]
 ; COM: TODO actually check the clone idxs maybe?
-; CHECK: @jl_clone_offsets = hidden constant [4 x i32]
-; CHECK-SAME: sub
-; CHECK-SAME: @subtarget_cloned.1
-; CHECK-SAME: @subtarget_cloned
-; CHECK-SAME: sub
-; CHECK-SAME: @subtarget_cloned.2
-; CHECK-SAME: @subtarget_cloned
-; CHECK-SAME: sub
-
-@jl_fvars = global [1 x i64*] [i64* bitcast (i32 (i32)* @subtarget_cloned to i64*)], align 16
-@jl_gvars = global [0 x i64*] zeroinitializer, align 16
-@jl_fvar_idxs = hidden constant [1 x i32] [i32 0], align 16
-@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 16
-@subtarget_cloned_gv = hidden global i64* bitcast (i32 (i32)* @subtarget_cloned to i64*), align 16
+; OPAQUE: @jl_clone_ptrs = hidden constant [4 x ptr] [ptr @subtarget_cloned.1, ptr @subtarget_cloned.2, ptr @subtarget_cloned, ptr @subtarget_cloned]
+
+@jl_fvars = global [1 x i64*] [i64* bitcast (i32 (i32)* @subtarget_cloned to i64*)], align 8
+@jl_gvar_base = hidden constant i64 zeroinitializer, align 8
+@jl_gvar_offsets = hidden constant [0 x i32] zeroinitializer, align 8
+@jl_fvar_idxs = hidden constant [1 x i32] [i32 0], align 8
+@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 8
+@subtarget_cloned_gv = hidden global i64* bitcast (i32 (i32)* @subtarget_cloned to i64*), align 8
 
 @subtarget_cloned_aliased = alias i32 (i32), i32 (i32)* @subtarget_cloned
 
@@ -68,7 +57,7 @@ define noundef i32 @subtarget_cloned(i32 noundef %0) #2 {
 ; COM: should fixup this callsite since 2 is cloned for a subtarget
 ; CHECK: define{{.*}}@call_subtarget_cloned({{.*}}#[[CALL_SUBTARGET_CLONED_DEFAULT_ATTRS:[0-9]+]]
 ; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA:[0-9]+]], !invariant.load
-; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]({{.*}})
 ; CHECK: ret i32
 define noundef i32 @call_subtarget_cloned(i32 noundef %0) #3 {
     %2 = call noundef i32 @subtarget_cloned(i32 noundef %0)
@@ -77,13 +66,23 @@ define noundef i32 @call_subtarget_cloned(i32 noundef %0) #3 {
 
 ; CHECK: define{{.*}}@call_subtarget_cloned_but_not_cloned({{.*}}#[[BORING_DEFAULT_ATTRS]]
 ; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
-; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]({{.*}})
 ; CHECK: ret i32
 define noundef i32 @call_subtarget_cloned_but_not_cloned(i32 noundef %0) #0 {
     %2 = call noundef i32 @subtarget_cloned(i32 noundef %0)
     ret i32 %2
 }
 
+; COM: check that the autoinit trampoline is generated correctly
+; CHECK: define{{.*}}@subtarget_cloned.autoinit_trampoline({{.*}}
+; CHECK-NEXT: top:
+; CHECK-NEXT: call ptr @ijl_autoinit_and_adopt_thread()
+; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load ptr, ptr @subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]({{.*}})
+; CHECK: ret i32
+
+declare ptr @ijl_autoinit_and_adopt_thread()
+
 ; CHECK: define{{.*}}@boring.1({{.*}}#[[BORING_CLONEALL_ATTRS:[0-9]+]]
 ; CHECK-NEXT: ret i32 %0
 
@@ -117,10 +116,10 @@ define noundef i32 @call_subtarget_cloned_but_not_cloned(i32 noundef %0) #0 {
 ; CHECK-NOT: @subtarget_cloned_but_not_cloned.2
 
 ; COM: check for alias being rewritten to a function trampoline
-; CHECK: define{{.*}}@subtarget_cloned_aliased{{.*}}#[[SUBTARGET_ALIASED_ATTRS:[0-9]+]]
+; CHECK: define{{.*}}@subtarget_cloned_aliased{{[^.]*}}#[[SUBTARGET_ALIASED_ATTRS:[0-9]+]]
 ; CHECK-NOT: }
 ; CHECK: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
-; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]({{.*}})
 ; CHECK: ret i32
 
 ; CHECK: attributes #[[BORING_DEFAULT_ATTRS]]
diff --git a/test/llvmpasses/multiversioning-x86.ll b/test/llvmpasses/multiversioning-x86.ll
new file mode 100644
index 0000000000000..e2918d0c20eec
--- /dev/null
+++ b/test/llvmpasses/multiversioning-x86.ll
@@ -0,0 +1,123 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning,CPUFeatures' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
+
+
+; COM: This test checks that multiversioning actually happens from start to finish
+; COM: We need the fvars for a proper test
+
+
+
+; OPAQUE: @jl_gvar_ptrs = global [0 x ptr] zeroinitializer, align 8
+; CHECK: @jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 8
+; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 8
+; OPAQUE: @simd_test.reloc_slot = hidden global ptr @simd_test.autoinit_trampoline
+; OPAQUE: @jl_fvar_ptrs = hidden global [5 x ptr] [ptr @boring, ptr @fastmath_test, ptr @loop_test, ptr @simd_test, ptr @simd_test_call]
+; OPAQUE: @jl_clone_slots = hidden constant [3 x i32] [i32 1, i32 3, i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.reloc_slot to i64), i64 ptrtoint (ptr @jl_clone_slots to i64)) to i32)]
+; CHECK: @jl_clone_idxs = hidden constant [10 x i32] [i32 -2147483647, i32 3, i32 -2147483647, i32 3, i32 4, i32 1, i32 1, i32 2, i32 -2147483645, i32 4]
+; OPAQUE: @jl_clone_ptrs = hidden constant [9 x ptr] [ptr @boring.1, ptr @fastmath_test.1, ptr @loop_test.1, ptr @simd_test.1, ptr @simd_test_call.1, ptr @fastmath_test.2, ptr @loop_test.2, ptr @simd_test.2, ptr @simd_test_call.2]
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+target triple = "x86_64-linux-gnu"
+
+@jl_fvars = global [5 x i64*] [i64* bitcast (i32 (i32)* @boring to i64*),
+                               i64* bitcast (float (float, float)* @fastmath_test to i64*),
+                               i64* bitcast (i32 (i32)* @loop_test to i64*),
+                               i64* bitcast (i32 (<4 x i32>)* @simd_test to i64*),
+                               i64* bitcast (i32 (<4 x i32>)* @simd_test_call to i64*)
+                              ], align 8
+@jl_gvar_ptrs = global [0 x i64*] zeroinitializer, align 8
+@jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 8
+@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 8
+
+declare i1 @julia.cpu.have_fma.f32()
+
+; CHECK: @boring{{.*}}#[[BORING_BASE:[0-9]+]]
+define noundef i32 @boring(i32 noundef %0) {
+  ret i32 %0
+}
+
+; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
+; CHECK: %3 = sitofp i1 false to float
+define noundef float @fastmath_test(float noundef %0, float noundef %1) {
+  %3 = call i1 @julia.cpu.have_fma.f32()
+  %4 = sitofp i1 %3 to float
+  %5 = fadd fast float %0, %4
+  ret float %5
+}
+
+; CHECK: @loop_test{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
+define noundef i32 @loop_test(i32 noundef %0) {
+  %2 = icmp sgt i32 %0, 0
+  br i1 %2, label %5, label %3
+
+3:                                                ; preds = %5, %1
+  %4 = phi i32 [ 0, %1 ], [ %9, %5 ]
+  ret i32 %4
+
+5:                                                ; preds = %1, %5
+  %6 = phi i32 [ %10, %5 ], [ 0, %1 ]
+  %7 = phi i32 [ %9, %5 ], [ 0, %1 ]
+  %8 = lshr i32 %6, 1
+  %9 = add nuw nsw i32 %8, %7
+  %10 = add nuw nsw i32 %6, 1
+  %11 = icmp eq i32 %10, %0
+  br i1 %11, label %3, label %5;, !llvm.loop -
+}
+
+; CHECK: @simd_test{{.*}}#[[SIMD_BASE_RELOC:[0-9]+]]
+define noundef i32 @simd_test(<4 x i32> noundef %0) {
+  %2 = extractelement <4 x i32> %0, i64 0
+  ret i32 %2
+}
+
+; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
+define noundef i32 @simd_test_call(<4 x i32> noundef %0) {
+  %2 = call noundef i32 @simd_test(<4 x i32> noundef %0)
+  ret i32 %2
+}
+
+; CHECK: @boring{{.*}}#[[BORING_CLONE:[0-9]+]]
+
+; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
+; CHECK: %3 = sitofp i1 false to float
+
+; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
+; CHECK: %3 = sitofp i1 true to float
+
+; CHECK: @loop_test{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
+
+; CHECK: @loop_test{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
+
+; CHECK: @simd_test{{.*}}#[[SIMD_CLONE1:[0-9]+]]
+
+; CHECK: @simd_test{{.*}}#[[SIMD_CLONE2:[0-9]+]]
+
+; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
+; OPAQUE: %2 = load ptr, ptr @simd_test.reloc_slot, align 8, !tbaa !8, !invariant.load !12
+; CHECK: %3 = call noundef i32 %2(<4 x i32> noundef %0)
+
+; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
+; CHECK: %2 = call noundef i32 @simd_test.2(<4 x i32> noundef %0)
+
+; CHECK-DAG: attributes #[[BORING_BASE]] = { "julia.mv.clone"="0" "julia.mv.clones"="2" "julia.mv.fvar" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[NOT_BORING_BASE]] = { "julia.mv.clone"="0" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[SIMD_BASE_RELOC]] = { "julia.mv.clone"="0" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[BORING_CLONE]] = { "julia.mv.clone"="1" "julia.mv.clones"="2" "julia.mv.fvar" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[NOT_BORING_CLONE1]] = { "julia.mv.clone"="1" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[NOT_BORING_CLONE2]] =  { "julia.mv.clone"="2" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="haswell" "target-features"="+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[SIMD_CLONE1]] = { "julia.mv.clone"="1" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[SIMD_CLONE2]] = { "julia.mv.clone"="2" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="haswell" "target-features"="+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+
+
+!llvm.module.flags = !{!0, !2}
+
+
+!0 = !{i32 1, !"julia.mv.enable", i32 1}
+!1 = !{!1}
+!2 = !{i32 1, !"julia.mv.specs", !3}
+!3 = !{!4, !5, !6}
+!4 = !{!"x86-64", !"+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 0, i32 0}
+!5 = !{!"sandybridge", !"+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 0, i32 2}
+!6 = !{!"haswell", !"+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 1, i32 284}
diff --git a/test/llvmpasses/names.jl b/test/llvmpasses/names.jl
new file mode 100644
index 0000000000000..1ab2204044804
--- /dev/null
+++ b/test/llvmpasses/names.jl
@@ -0,0 +1,183 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
+# RUN: cat %t/module.ll | FileCheck %s
+
+## Notes:
+# This script uses the `emit` function (defined llvmpasses.jl) to emit either
+# optimized or unoptimized LLVM IR. Each function is emitted individually and
+# `llvm-link` is used to create a single module that can be passed to opt.
+# The order in which files are emitted and linked is important since `lit` will
+# process the test cases in order.
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# COM: check basic parameter names
+function f1(a, b, c, d)
+    return a + b + c + d
+end
+
+# COM: check basic parameter names + varargs
+function f2(a, b, c, d, e...)
+    return a + b + c + d + sum(e)
+end
+
+mutable struct D
+    i::Int64
+end
+struct C
+    d::D
+end
+struct B
+    c::C
+end
+struct A
+    b::B
+end
+
+# COM: check getfield/setfield names
+function f5(a)
+    a.b.c.d.i = 0
+    return a.b.c.d
+end
+
+struct H end
+struct G
+    h::Ref{H}
+end
+struct F
+    g::Ref{G}
+end
+struct E
+    f::Ref{F}
+end
+
+# COM: check gc lowering names
+function f6(e)
+    return e.f[].g[].h[]
+end
+
+# COM: check getfield for Tuples
+function f7(a)
+    return a[2]
+end
+
+# COM: check write barrier names and struct names
+mutable struct Barrier
+    b
+end
+
+# COM: check write barrier names
+function f8(b,y)
+    b.b = y
+    return b
+end
+
+struct Named
+    x::Int
+end
+
+function fmemory(nel)
+    return Memory{Int64}(undef,nel)
+end
+# CHECK-LABEL: define {{(swiftcc )?}}double @julia_f1
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+
+# CHECK: fadd double
+# CHECK-DAG: %"a::Float64"
+# CHECK-DAG: %"b::Float64"
+# CHECK-DAG: fadd double
+# CHECK-DAG: %"c::Float64"
+# CHECK-DAG: fadd double
+# CHECK-DAG: %"d::Float64"
+# CHECK: ret double
+# CHECK: }
+
+# CHECK-LABEL: define nonnull ptr @jfptr_f1
+# CHECK-SAME: %"function::Core.Function"
+# CHECK-SAME: %"args::Any[]"
+# CHECK-SAME: %"nargs::UInt32"
+# CHECK: %"+Core.Float64
+# CHECK: ret ptr
+# CHECK: }
+emit(f1, Float64, Float64, Float64, Float64)
+
+# CHECK: define {{(swiftcc )?}}double @julia_f2
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+# CHECK-SAME: double %"e[1]::Float64"
+emit(f2, Float64, Float64, Float64, Float64, Float64)
+
+# CHECK: define {{(swiftcc )?}}double @julia_f2
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+# CHECK-SAME: double %"e[1]::Float64"
+# CHECK-SAME: double %"e[2]::Float64"
+emit(f2, Float64, Float64, Float64, Float64, Float64, Float64)
+
+
+# CHECK: define {{(swiftcc )?}}double @julia_f2
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+# CHECK-SAME: double %"e[1]::Float64"
+# CHECK-SAME: double %"e[2]::Float64"
+# CHECK-SAME: double %"e[3]::Float64"
+emit(f2, Float64, Float64, Float64, Float64, Float64, Float64, Float64)
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f5
+# CHECK-SAME: %"a::A"
+# CHECK: %"a::A.d
+# COM: this text check relies on our LLVM code emission being relatively poor, which is not always the case
+emit(f5, A)
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f6
+# CHECK-SAME: %"e::E"
+# CHECK: %jlcallframe
+# CHECK: %gcframe
+# CHECK: %frame.prev
+# CHECK: %task.gcstack
+# CHECK: %ptls_field
+# CHECK: %ptls_load
+# CHECK: %safepoint
+# CHECK: %"e::E.f"
+# CHECK: %"e::E.f.tag_addr"
+# CHECK: %"e::E.f.tag"
+# CHECK: @"+Main.Base.RefValue
+# CHECK: %gc_slot_addr_0
+# CHECK: @"jl_sym#g
+# CHECK: @"jl_sym#h
+emit(f6, E)
+
+
+# CHECK: define {{(swiftcc )?}}i64 @julia_f7
+# CHECK-SAME: %"a::Tuple"
+# CHECK: %"a::Tuple[2]_ptr.unbox
+emit(f7, Tuple{Int,Int})
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f8
+# CHECK-SAME: %"y::Int64"
+# CHECK: %parent_bits
+# CHECK: %parent_old_marked
+# CHECK: %child_bit
+# CHECK: %child_not_marked
+emit(f8, Barrier, Int)
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_Barrier
+# CHECK-SAME: %"b::Named"
+# CHECK: %"new::Barrier"
+# CHECK: %"box::Named"
+emit(Barrier, Named)
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_fmemory
+# CHECK-SAME: %"nel::Int64"
+# CHECK: %"Memory{Int64}[]"
+emit(fmemory, Int64)
diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll
index 434ffbb26c95f..6f9ed186a2dd0 100644
--- a/test/llvmpasses/parsing.ll
+++ b/test/llvmpasses/parsing.ll
@@ -1,6 +1,9 @@
 ; COM: NewPM-only test, tests for ability to parse Julia passes
 
-; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,LowerSIMDLoop,FinalLowerGC,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;llvm_only>" -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_llvm_only>" -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_enable_vector_pipeline>" -S %s -o /dev/null
 
 define void @test() {
     ret void
diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl
index e48a5f7df111f..5dab675f2b547 100644
--- a/test/llvmpasses/pipeline-o0.jl
+++ b/test/llvmpasses/pipeline-o0.jl
@@ -1,14 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
@@ -19,7 +10,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
 # CHECK-LABEL: @julia_simple
 # CHECK-NOT: julia.get_pgcstack
 # CHECK-NOT: julia.gc_alloc_obj
-# CHECK: ijl_gc_pool_alloc
+# CHECK: ijl_gc_small_alloc
 # COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes
 function simple()
     Ref(0)
diff --git a/test/llvmpasses/pipeline-o2-allocs.jl b/test/llvmpasses/pipeline-o2-allocs.jl
index 86ab9125f2f27..999e63e2725c4 100644
--- a/test/llvmpasses/pipeline-o2-allocs.jl
+++ b/test/llvmpasses/pipeline-o2-allocs.jl
@@ -1,12 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 
@@ -60,8 +53,7 @@ end
 # CHECK-NOT: julia.gc_preserve_end
 function nopreserve()
     ref = Ref(0)
-    GC.@preserve ref begin
-    end
+    GC.@preserve ref begin end
 end
 
 # COM: this cordons off the attributes/function declarations from the actual
diff --git a/test/llvmpasses/pipeline-o2-broadcast.jl b/test/llvmpasses/pipeline-o2-broadcast.jl
index 83a4450522c79..584e8855f0f8c 100644
--- a/test/llvmpasses/pipeline-o2-broadcast.jl
+++ b/test/llvmpasses/pipeline-o2-broadcast.jl
@@ -1,12 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
 
diff --git a/test/llvmpasses/pipeline-o2.jl b/test/llvmpasses/pipeline-o2.jl
index 9fd42562f96aa..ea2fa293c7ebc 100644
--- a/test/llvmpasses/pipeline-o2.jl
+++ b/test/llvmpasses/pipeline-o2.jl
@@ -1,20 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
-
-# RUN: julia --startup-file=no -O2 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
-# RUN: julia --startup-file=no -O3 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
-
-# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
-# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
+# RUNx: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
+# RUNx: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
 
 # RUN: julia --startup-file=no -O2 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
 # RUN: julia --startup-file=no -O3 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
@@ -144,6 +131,18 @@ function loopedlength(arr)
     end
     len
 end
+# COM: Vector
+# ALL-LABEL: @julia_memset_like
+# ALL: vector.body
+
+# COM: Memory
+# ALL-LABEL: @julia_memset_like
+# ALL: vector.body
+function memset_like(mem)
+    for idx in eachindex(mem)
+        mem[idx] = 1.0
+    end
+end
 
 emit(iterate_read, Vector{Int64})
 emit(iterate_write, Vector{Int64}, Vector{Int64})
@@ -163,3 +162,6 @@ emit(sumloop, Int64)
 emit(simd_sumloop, Float32)
 
 emit(loopedlength, Vector{Int64})
+
+emit(memset_like, Vector{Float64})
+emit(memset_like, Memory{Float64})
diff --git a/test/llvmpasses/pipeline-prints.ll b/test/llvmpasses/pipeline-prints.ll
index 0c0d81420d9fe..9b5732981faf6 100644
--- a/test/llvmpasses/pipeline-prints.ll
+++ b/test/llvmpasses/pipeline-prints.ll
@@ -1,46 +1,25 @@
 ; COM: This is a newpm-only test, no legacypm command
 ; COM: we run all the prefixes even though some don't have tests because we want to make sure they don't crash
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -force-vector-width=2 -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
 
 ; ModuleID = 'f'
 source_filename = "f"
@@ -306,25 +285,18 @@ attributes #2 = { inaccessiblemem_or_argmemonly }
 
 ; COM: InstSimplify/InstCombine should kill this zext-trunc pair
 ; AFTEREARLYSIMPLIFICATION: [[ZEXT:%.*]] = zext i1 {{%.*}} to i8
-; AFTEREARLYSIMPLIFICATION-NEXT: trunc i8 [[ZEXT]] to i1
-
-; BEFOREEARLYOPTIMIZATION: [[ZEXT:%.*]] = zext i1 {{%.*}} to i8
-; BEFOREEARLYOPTIMIZATION-NEXT: trunc i8 [[ZEXT]] to i1
 
 ; AFTEREARLYOPTIMIZATION-NOT: zext i1 {{%.*}} to i8
-; AFTEREARLYOPTIMIZATION-NOT: trunc i8 {{%.*}} to i1
 
 ; BEFORELOOPOPTIMIZATION-NOT: zext i1 {{%.*}} to i8
-; BEFORELOOPOPTIMIZATION-NOT: trunc i8 {{%.*}} to i1
 
 ; COM: Loop simplification makes the exit condition obvious
 ; AFTERLOOPSIMPLIFICATION: L35.lr.ph:
-; AFTERLOOPSIMPLIFICATION-NEXT: add nuw nsw
+; AFTERLOOPSIMPLIFICATION: add nuw nsw
 
-; COM: Scalar optimization removes the previous add from the preheader
-; AFTERSCALAROPTIMIZATION: L35.preheader:
-; AFTERSCALAROPTIMIZATION-NOT: add nuw nsw
-; AFTERSCALAROPTIMIZATION-NEXT: br label %L35
+; COM: Scalar optimization removes the preheader
+; AFTERSCALAROPTIMIZATION: L17:
+; AFTERSCALAROPTIMIZATION: icmp eq i64 {{%.*}}, 1,
 
 ; COM: Vectorization does stuff
 ; AFTERVECTORIZATION: vector.body
@@ -332,4 +304,4 @@ attributes #2 = { inaccessiblemem_or_argmemonly }
 
 ; COM: Intrinsics are lowered and cleaned up by the time optimization is finished
 ; AFTEROPTIMIZATION-NOT: call void @julia.safepoint
-; AFTEROPTIMIZATION: load volatile i64{{.*}}%safepoint
\ No newline at end of file
+; AFTEROPTIMIZATION: load volatile i64{{.*}}%safepoint
diff --git a/test/llvmpasses/propagate-addrspace-non-zero.ll b/test/llvmpasses/propagate-addrspace-non-zero.ll
index ac491000ba1e5..996b995a58556 100644
--- a/test/llvmpasses/propagate-addrspace-non-zero.ll
+++ b/test/llvmpasses/propagate-addrspace-non-zero.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
diff --git a/test/llvmpasses/propagate-addrspace.ll b/test/llvmpasses/propagate-addrspace.ll
index ffed83ddb615a..033fbd6f0386e 100644
--- a/test/llvmpasses/propagate-addrspace.ll
+++ b/test/llvmpasses/propagate-addrspace.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
 define i64 @simple() {
 ; CHECK-LABEL: @simple
diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll
index 4637fc4b45071..da32758c1dc5b 100644
--- a/test/llvmpasses/refinements.ll
+++ b/test/llvmpasses/refinements.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 declare {}*** @julia.ptls_states()
@@ -30,14 +26,12 @@ define void @argument_refinement({} addrspace(10)* %a) {
 ; Check that we reuse the gc slot from the box
 define void @heap_refinement1(i64 %a) {
 ; CHECK-LABEL: @heap_refinement1
-; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE:   %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)*
@@ -49,14 +43,12 @@ define void @heap_refinement1(i64 %a) {
 ; Check that we don't root the allocated value here, just the derived value
 define void @heap_refinement2(i64 %a) {
 ; CHECK-LABEL: @heap_refinement2
-; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE:   %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; TYPED: store {} addrspace(10)* %loaded1
 ; OPAQUE: store ptr addrspace(10) %loaded1
     call void @jl_safepoint()
     %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)*
@@ -67,20 +59,14 @@ define void @heap_refinement2(i64 %a) {
 ; Check that the way we compute rooting is compatible with refinements
 define void @issue22770() {
 ; CHECK-LABEL: @issue22770
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %y = call {} addrspace(10)* @allocate_some_value()
     %casted1 = bitcast {} addrspace(10)* %y to {} addrspace(10)* addrspace(10)*
     %x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; TYPED: store {} addrspace(10)* %y,
 ; OPAQUE: store ptr addrspace(10) %y,
     %a = call {} addrspace(10)* @allocate_some_value()
-; TYPED: store {} addrspace(10)* %a
-; TYPED: call void @one_arg_boxed({} addrspace(10)* %x)
-; TYPED: call void @one_arg_boxed({} addrspace(10)* %a)
-; TYPED: call void @one_arg_boxed({} addrspace(10)* %y)
 
 ; OPAQUE: store ptr addrspace(10) %a
 ; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %x)
@@ -89,10 +75,8 @@ define void @issue22770() {
     call void @one_arg_boxed({} addrspace(10)* %x)
     call void @one_arg_boxed({} addrspace(10)* %a)
     call void @one_arg_boxed({} addrspace(10)* %y)
-; TYPED: store {} addrspace(10)* %x
 ; OPAQUE: store ptr addrspace(10) %x
     %c = call {} addrspace(10)* @allocate_some_value()
-; TYPED: store {} addrspace(10)* %c
 ; OPAQUE: store ptr addrspace(10) %c
     call void @one_arg_boxed({} addrspace(10)* %x)
     call void @one_arg_boxed({} addrspace(10)* %c)
@@ -123,7 +107,6 @@ L3:
 
 define void @dont_refine_loop({} addrspace(10)* %x) {
 ; CHECK-LABEL: @dont_refine_loop
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -167,7 +150,6 @@ L2:
 
 define void @refine_loop_indirect({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -193,7 +175,6 @@ L2:
 
 define void @refine_loop_indirect2({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect2
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
diff --git a/test/llvmpasses/remove-addrspaces.ll b/test/llvmpasses/remove-addrspaces.ll
index b2d14ae49c8e7..99acd92b0e03b 100644
--- a/test/llvmpasses/remove-addrspaces.ll
+++ b/test/llvmpasses/remove-addrspaces.ll
@@ -1,11 +1,15 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; COM: check that the addrspace of the global itself is removed
+; OPAQUE: @ejl_enz_runtime_exc = external global {}
+@ejl_enz_runtime_exc = external addrspace(10) global {}
 
+; COM: check that package image fptrs work
+@pjlsys_BoundsError_32 = internal global {} addrspace(10)* ({}***, {} addrspace(10)*, [1 x i64] addrspace(11)*)* null
+; CHECK: @pjlsys_BoundsError_32 = internal global
+; OPAQUE-SAME: ptr null
 
 define i64 @getindex({} addrspace(10)* nonnull align 16 dereferenceable(40)) {
 ; CHECK-LABEL: @getindex
@@ -37,7 +41,6 @@ top:
 define nonnull {} addrspace(10)* @constexpr(i64) {
 ; CHECK-LABEL: @constexpr
 top:
-; TYPED: call {}* inttoptr (i64 139806640486784 to {}* ({}*, i64)*)({}* inttoptr (i64 139806425039920 to {}*), i64 1)
 ; OPAQUE: call ptr inttoptr (i64 139806640486784 to ptr)(ptr inttoptr (i64 139806425039920 to ptr), i64 1)
   %1 = call {} addrspace(10)* inttoptr (i64 139806640486784 to {} addrspace(10)* ({} addrspace(10)*, i64)*)({} addrspace(10)* addrspacecast ({}* inttoptr (i64 139806425039920 to {}*) to {} addrspace(10)*), i64 1)
 ; CHECK-NOT: addrspacecast
@@ -67,22 +70,16 @@ top:
   %c.cdr = getelementptr %list, %list* %c, i32 0, i32 1
 ; COM: Allow remove-addrspaces to rename the type but expect it to use the same prefix.
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %a
 ; OPAQUE-SAME: ptr %a
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %a
 ; OPAQUE-SAME: ptr %a
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %b
 ; OPAQUE-SAME: ptr %b
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %b
 ; OPAQUE-SAME: ptr %b
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %c
 ; OPAQUE-SAME: ptr %c
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %c
 ; OPAQUE-SAME: ptr %c
   store i64 111, i64* %a.car
   store i64 222, i64* %b.car
@@ -112,12 +109,18 @@ exit:
 
 ; COM: check that address spaces in byval types are processed correctly
 define void @byval_type([1 x {} addrspace(10)*] addrspace(11)* byval([1 x {} addrspace(10)*]) %0) {
-; TYPED: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0)
 ; OPAQUE: define void @byval_type(ptr byval([1 x ptr]) %0)
   ret void
 }
 
 
+define private fastcc void @diffejulia__mapreduce_97() {
+L6:
+; OPAQUE: store atomic ptr @ejl_enz_runtime_exc, ptr null unordered
+  store atomic {} addrspace(10)* @ejl_enz_runtime_exc, {} addrspace(10)* addrspace(10)* null unordered, align 8
+  unreachable
+}
+
 ; COM: check that function attributes are preserved on declarations too
 declare void @convergent_function() #0
 attributes #0 = { convergent }
diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll
index 699d89f7257d0..eb1c6444129c3 100644
--- a/test/llvmpasses/returnstwicegc.ll
+++ b/test/llvmpasses/returnstwicegc.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
@@ -17,8 +13,12 @@ declare void @one_arg_boxed({} addrspace(10)*)
 define void @try_catch(i64 %a, i64 %b)
 {
 ; Because of the returns_twice function, we need to keep aboxed live everywhere
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
-; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
+; CHECK: %gcframe = alloca ptr addrspace(10), i32 4
+; CHECK:  store ptr addrspace(10) %aboxed, ptr [[slot_0:%.*]],
+; CHECK-NOT:  store {{.*}} ptr [[slot_0]]
+; CHECK:  store ptr addrspace(10) %bboxed, ptr {{%.*}}
+; CHECK-NOT:  store {{.*}} ptr [[slot_0]]
+
 top:
     %sigframe = alloca [208 x i8], align 16
     %sigframe.sub = getelementptr inbounds [208 x i8], [208 x i8]* %sigframe, i64 0, i64 0
diff --git a/test/llvmpasses/safepoint_stress.jl b/test/llvmpasses/safepoint_stress.jl
index dc6752e76d595..173058df12fb1 100644
--- a/test/llvmpasses/safepoint_stress.jl
+++ b/test/llvmpasses/safepoint_stress.jl
@@ -1,7 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S - | FileCheck %s
+# RUN: julia --startup-file=no %s | opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S - | FileCheck %s
 
 
 println("""
@@ -15,7 +14,7 @@ define void @stress(i64 %a, i64 %b) {
     %ptls = call {}*** @julia.ptls_states()
 """)
 
-# CHECK: %gcframe = alloca {} addrspace(10)*, i32 10002
+# CHECK: %gcframe = alloca ptr addrspace(10), i32 10002
 for i = 1:10000
     println("\t%arg$i = call {} addrspace(10)* @alloc()")
 end
diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll
deleted file mode 100644
index 929fbeea2c3f5..0000000000000
--- a/test/llvmpasses/simdloop.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; This file is a part of Julia. License is MIT: https://julialang.org/license
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s
-
-declare void @julia.loopinfo_marker()
-
-; CHECK-LABEL: @simd_test(
-define void @simd_test(double *%a, double *%b) {
-top:
-  br label %loop
-loop:
-  %i = phi i64 [0, %top], [%nexti, %loop]
-  %aptr = getelementptr double, double *%a, i64 %i
-  %bptr = getelementptr double, double *%b, i64 %i
-; CHECK: llvm.mem.parallel_loop_access
-  %aval = load double, double *%aptr
-  %bval = load double, double *%aptr
-  %cval = fadd double %aval, %bval
-  store double %cval, double *%bptr
-  %nexti = add i64 %i, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !3
-  %done = icmp sgt i64 %nexti, 500
-  br i1 %done, label %loopdone, label %loop
-loopdone:
-  ret void
-}
-
-; CHECK-LABEL: @simd_test_sub(
-define double @simd_test_sub(double *%a) {
-top:
-  br label %loop
-loop:
-  %i = phi i64 [0, %top], [%nexti, %loop]
-  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
-  %aptr = getelementptr double, double *%a, i64 %i
-; CHECK: llvm.mem.parallel_loop_access
-  %aval = load double, double *%aptr
-  %nextv = fsub double %v, %aval
-; CHECK: fsub reassoc contract double %v, %aval
-  %nexti = add i64 %i, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !3
-  %done = icmp sgt i64 %nexti, 500
-  br i1 %done, label %loopdone, label %loop
-loopdone:
-  ret double %nextv
-}
-
-; CHECK-LABEL: @simd_test_sub2(
-define double @simd_test_sub2(double *%a) {
-top:
-  br label %loop
-loop:
-  %i = phi i64 [0, %top], [%nexti, %loop]
-  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
-  %aptr = getelementptr double, double *%a, i64 %i
-  %aval = load double, double *%aptr
-  %nextv = fsub double %v, %aval
-; CHECK: fsub reassoc contract double %v, %aval
-  %nexti = add i64 %i, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !2
-  %done = icmp sgt i64 %nexti, 500
-  br i1 %done, label %loopdone, label %loop
-loopdone:
-  ret double %nextv
-}
-
-; Tests if we correctly pass through other metadata
-; CHECK-LABEL: @disabled(
-define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %add = add nsw i32 %0, %N
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx2, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !4
-  %exitcond = icmp eq i64 %indvars.iv.next, 48
-; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.body
-  %1 = load i32, i32* %a, align 4
-  ret i32 %1
-}
-
-!1 = !{}
-!2 = !{!"julia.simdloop"}
-!3 = !{!"julia.simdloop", !"julia.ivdep"}
-!4 = !{!"julia.simdloop", !"julia.ivdep", !5}
-!5 = !{!"llvm.loop.vectorize.disable", i1 0}
-; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[LOOP_DISABLE:![0-9]+]]}
-; CHECK-NEXT: [[LOOP_DISABLE]] = !{!"llvm.loop.vectorize.disable", i1 false}
diff --git a/test/loading.jl b/test/loading.jl
index 394c13c5f2962..4c0fe81876783 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -1,10 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-original_depot_path = copy(Base.DEPOT_PATH)
-
 using Test
 
 # Tests for @__LINE__ inside and outside of macros
+# NOTE: the __LINE__ numbers for these first couple tests are significant, so
+# adding any lines here will make those tests fail
 @test (@__LINE__) == 8
 
 macro macro_caller_lineno()
@@ -33,6 +33,10 @@ end
 @test @nested_LINE_expansion() == ((@__LINE__() - 4, @__LINE__() - 12), @__LINE__())
 @test @nested_LINE_expansion2() == ((@__LINE__() - 5, @__LINE__() - 9), @__LINE__())
 
+original_depot_path = copy(Base.DEPOT_PATH)
+include("tempdepot.jl")
+include("precompile_utils.jl")
+
 loaded_files = String[]
 push!(Base.include_callbacks, (mod::Module, fn::String) -> push!(loaded_files, fn))
 include("test_sourcepath.jl")
@@ -60,7 +64,7 @@ let exename = `$(Base.julia_cmd()) --compiled-modules=yes --startup-file=no --co
     @test !endswith(s_dir, Base.Filesystem.path_separator)
 end
 
-@test Base.in_sysimage(Base.PkgId(Base.UUID("cf7118a7-6976-5b1a-9a39-7adc72f591a4"), "UUIDs"))
+@test Base.in_sysimage(Base.PkgId(Base.UUID("8f399da3-3557-5675-b5ff-fb832c97cbdb"), "Libdl"))
 @test Base.in_sysimage(Base.PkgId(Base.UUID("3a7fdc7e-7467-41b4-9f64-ea033d046d5b"), "NotAPackage")) == false
 
 ## Unit tests for safe file operations ##
@@ -167,7 +171,7 @@ end
 
             @test root.uuid == root_uuid
             @test this.uuid == this_uuid
-            @test that == nothing
+            @test that === nothing
 
             write(project_file, """
             name = "Root"
@@ -180,8 +184,8 @@ end
             that = Base.identify_package("That")
 
             @test root.uuid == proj_uuid
-            @test this == nothing
-            @test that == nothing
+            @test this === nothing
+            @test that === nothing
         finally
             copy!(LOAD_PATH, old_load_path)
         end
@@ -213,8 +217,8 @@ end
             that = Base.identify_package("That")
 
             @test root.uuid == root_uuid
-            @test this == nothing
-            @test that == nothing
+            @test this === nothing
+            @test that === nothing
 
             @test Base.get_uuid_name(project_file, this_uuid) == "This"
         finally
@@ -223,7 +227,6 @@ end
     end
 end
 
-
 ## functional testing of package identification, location & loading ##
 
 saved_load_path = copy(LOAD_PATH)
@@ -233,8 +236,9 @@ watcher_counter = Ref(0)
 push!(Base.active_project_callbacks, () -> watcher_counter[] += 1)
 push!(Base.active_project_callbacks, () -> error("broken"))
 
+const testdefaultdepot = mkdepottempdir()
 push!(empty!(LOAD_PATH), joinpath(@__DIR__, "project"))
-append!(empty!(DEPOT_PATH), [mktempdir(), joinpath(@__DIR__, "depot")])
+append!(empty!(DEPOT_PATH), [testdefaultdepot, joinpath(@__DIR__, "depot")])
 @test watcher_counter[] == 0
 @test_logs (:error, r"active project callback .* failed") Base.set_active_project(nothing)
 @test watcher_counter[] == 1
@@ -273,8 +277,8 @@ end
         @test joinpath(@__DIR__, normpath(path)) == locate_package(pkg)
         @test Base.compilecache_path(pkg, UInt64(0)) == Base.compilecache_path(pkg, UInt64(0))
     end
-    @test identify_package("Baz") == nothing
-    @test identify_package("Qux") == nothing
+    @test identify_package("Baz") === nothing
+    @test identify_package("Qux") === nothing
     @testset "equivalent package names" begin
          classes = [
             ["Foo"],
@@ -458,8 +462,8 @@ function make_env(flat, root, roots, graph, paths, dummies)
     )
 end
 
-const depots = [mktempdir() for _ = 1:3]
-const envs = Dict{String,Any}()
+const depots = [mkdepottempdir() for _ = 1:3]
+const envs = Pair{String, Any}[]
 
 append!(empty!(DEPOT_PATH), depots)
 
@@ -553,7 +557,7 @@ for (flat, root, roots, graph) in graphs
         end
     end
 
-    envs[dir] = make_env(flat, root, roots, graph, paths, dummies)
+    push!(envs, dir => make_env(flat, root, roots, graph, paths, dummies))
 end
 
 # materialize dependency graphs as implicit environments (if possible)
@@ -586,7 +590,7 @@ for (flat, root, roots, graph) in graphs
         end
     end
 
-    envs[dir] = make_env(flat, root, roots, graph, paths, dummies)
+    push!(envs, dir => make_env(flat, root, roots, graph, paths, dummies))
 end
 
 ## use generated environments to test package loading ##
@@ -608,30 +612,38 @@ function test_find(
         where.uuid === nothing && continue
         deps = get(graph, where, Dict(where.name => where))
         for name in NAMES
-            id = identify_package(where, name)
-            @test id == get(deps, name, nothing)
-            path = id === nothing ? nothing : locate_package(id)
-            @test path == get(paths, id, nothing)
+            @testset let where=where, name=name
+                id = identify_package(where, name)
+                @test id == get(deps, name, nothing)
+                path = id === nothing ? nothing : locate_package(id)
+                @test path == get(paths, id, nothing)
+            end
         end
     end
 end
 
 @testset "find_package with one env in load path" begin
-    for (env, (_, _, roots, graph, paths)) in envs
-        push!(empty!(LOAD_PATH), env)
-        test_find(roots, graph, paths)
+    for idx in eachindex(envs)
+        @testset let idx=idx
+            (env, (_, _, roots, graph, paths)) = envs[idx]
+            push!(empty!(LOAD_PATH), env)
+            test_find(roots, graph, paths)
+        end
     end
 end
 
 @testset "find_package with two envs in load path" begin
-    for x = false:true,
-        (env1, (_, _, roots1, graph1, paths1)) in (x ? envs : rand(envs, 10)),
-        (env2, (_, _, roots2, graph2, paths2)) in (x ? rand(envs, 10) : envs)
-        push!(empty!(LOAD_PATH), env1, env2)
-        roots = merge(roots2, roots1)
-        graph = merge(graph2, graph1)
-        paths = merge(paths2, paths1)
-        test_find(roots, graph, paths)
+    for x = false:true, env1idx in (x ? (1:length(envs)) : rand(1:length(envs), 10)),
+                        env2idx in (x ? rand(1:length(envs), 10) : (1:length(envs)))
+        @testset let env1idx=env1idx, env2idx=env2idx
+            (env1, (_, _, roots1, graph1, paths1)) = envs[env1idx]
+            (env2, (_, _, roots2, graph2, paths2)) = envs[env2idx]
+            push!(empty!(LOAD_PATH), env1, env2)
+            roots = merge(roots2, roots1)
+            graph = merge(graph2, graph1)
+            paths = merge(paths2, paths1)
+            test_find(roots, graph, paths)
+        end
     end
 end
 
@@ -699,6 +711,112 @@ mktempdir() do dir
     @test success(cmd)
 end
 
+function _with_empty_load_path(f::Function)
+    old_load_path = copy(Base.LOAD_PATH)
+    try
+        empty!(Base.LOAD_PATH)
+        f()
+    finally
+        append!(Base.LOAD_PATH, old_load_path)
+    end
+end
+old_act_proj = Base.ACTIVE_PROJECT[]
+function _with_activate(f::Function, project_file::Union{AbstractString, Nothing})
+    try
+        Base.ACTIVE_PROJECT[] = project_file
+        f()
+    finally
+        Base.ACTIVE_PROJECT[] = old_act_proj
+    end
+end
+function _activate_and_get_active_manifest_noarg(project_file::Union{AbstractString, Nothing})
+    _with_activate(project_file) do
+        Base.active_manifest()
+    end
+end
+
+@testset "Base.active_manifest()" begin
+    test_dir = @__DIR__
+    test_cases = [
+        (joinpath(test_dir, "TestPkg", "Project.toml"), joinpath(test_dir, "TestPkg", "Manifest.toml")),
+        (joinpath(test_dir, "project", "Project.toml"), joinpath(test_dir, "project", "Manifest.toml")),
+    ]
+
+    @testset "active_manifest() - no argument passed" begin
+        for (proj, expected_man) in test_cases
+            @test _activate_and_get_active_manifest_noarg(proj) == expected_man
+            # Base.active_manifest() should never return a file that doesn't exist:
+            @test isfile(_activate_and_get_active_manifest_noarg(proj))
+        end
+        mktempdir() do dir
+            proj = joinpath(dir, "Project.toml")
+
+            # If the project file doesn't exist, active_manifest() should return `nothing`:
+            @test _activate_and_get_active_manifest_noarg(proj) === nothing
+
+            # If the project file exists but the manifest file does not, active_manifest() should still return `nothing`:
+            touch(proj)
+            @test _activate_and_get_active_manifest_noarg(proj) === nothing
+
+            # If the project and manifest files both exist, active_manifest() should return the path to the manifest:
+            manif = joinpath(dir, "Manifest.toml")
+            touch(manif)
+            @test _activate_and_get_active_manifest_noarg(proj) == manif
+            # Base.active_manifest() should never return a file that doesn't exist:
+            @test isfile(_activate_and_get_active_manifest_noarg(proj))
+
+            # If the manifest file exists but the project file does not, active_manifest() should return `nothing`:
+            rm(proj)
+            @test _activate_and_get_active_manifest_noarg(proj) == nothing
+        end
+    end
+
+    @testset "active_manifest(proj::AbstractString)" begin
+        Base.ACTIVE_PROJECT[] = old_act_proj
+        for (proj, expected_man) in test_cases
+            @test Base.active_manifest(proj) == expected_man
+            # Base.active_manifest() should never return a file that doesn't exist:
+            @test isfile(Base.active_manifest(proj))
+        end
+        mktempdir() do dir
+            proj = joinpath(dir, "Project.toml")
+
+            # If the project file doesn't exist, active_manifest(proj) should return `nothing`:
+            @test Base.active_manifest(proj) === nothing
+
+            # If the project file exists but the manifest file does not, active_manifest(proj) should still return `nothing`:
+            touch(proj)
+            @test Base.active_manifest(proj) === nothing
+
+            # If the project and manifest files both exist, active_manifest(proj) should return the path to the manifest:
+            manif = joinpath(dir, "Manifest.toml")
+            touch(manif)
+            @test Base.active_manifest(proj) == manif
+            # Base.active_manifest() should never return a file that doesn't exist:
+            @test isfile(Base.active_manifest(proj))
+
+            # If the manifest file exists but the project file does not, active_manifest(proj) should return `nothing`:
+            rm(proj)
+            @test Base.active_manifest(proj) === nothing
+        end
+    end
+
+    @testset "ACTIVE_PROJECT[] is `nothing` => active_manifest() is nothing" begin
+        _with_activate(nothing) do; _with_empty_load_path() do
+            @test Base.active_manifest() === nothing
+            @test Base.active_manifest(nothing) === nothing
+        end; end
+    end
+
+    @testset "Project file does not exist => active_manifest() is nothing" begin
+        mktempdir() do dir
+            proj = joinpath(dir, "Project.toml")
+            @test Base.active_manifest(proj) === nothing
+            @test _activate_and_get_active_manifest_noarg(proj) === nothing
+        end
+    end
+end
+
 @testset "expansion of JULIA_LOAD_PATH" begin
     s = Sys.iswindows() ? ';' : ':'
     tmp = "/this/does/not/exist"
@@ -721,16 +839,17 @@ end
 @testset "expansion of JULIA_DEPOT_PATH" begin
     s = Sys.iswindows() ? ';' : ':'
     tmp = "/this/does/not/exist"
-    DEFAULT = Base.append_default_depot_path!(String[])
+    default = joinpath(homedir(), ".julia")
+    bundled = Base.append_bundled_depot_path!(String[])
     cases = Dict{Any,Vector{String}}(
-        nothing => DEFAULT,
+        nothing => [default; bundled],
         "" => [],
-        "$s" => DEFAULT,
-        "$tmp$s" => [tmp; DEFAULT],
-        "$s$tmp" => [DEFAULT; tmp],
+        "$s" => [default; bundled],
+        "$tmp$s" => [tmp; bundled],
+        "$s$tmp" => [default; bundled; tmp],
         )
     for (env, result) in pairs(cases)
-        script = "DEPOT_PATH == $(repr(result)) || error()"
+        script = "DEPOT_PATH == $(repr(result)) || error(\"actual depot \" * join(DEPOT_PATH,':') * \" does not match expected depot \" * join($(repr(result)), ':'))"
         cmd = `$(Base.julia_cmd()) --startup-file=no -e $script`
         cmd = addenv(cmd, "JULIA_DEPOT_PATH" => env)
         cmd = pipeline(cmd; stdout, stderr)
@@ -748,16 +867,9 @@ end
 
 ## cleanup after tests ##
 
-for env in keys(envs)
+for (env, _) in envs
     rm(env, force=true, recursive=true)
 end
-for depot in depots
-    try
-        rm(depot, force=true, recursive=true)
-    catch err
-        @show err
-    end
-end
 
 append!(empty!(LOAD_PATH), saved_load_path)
 append!(empty!(DEPOT_PATH), saved_depot_path)
@@ -792,11 +904,25 @@ import .Foo28190.Libdl; import Libdl
     end
 end
 
+@testset "`::AbstractString` constraint on the path argument to `include`" begin
+    for m ∈ (NotPkgModule, evalfile("testhelpers/just_module.jl"))
+        @Core.latestworld
+        let i = m.include
+            @test !applicable(i, (nothing,))
+            @test !applicable(i, (identity, nothing,))
+            @test !hasmethod(i, Tuple{Nothing})
+            @test !hasmethod(i, Tuple{Function,Nothing})
+        end
+    end
+end
+
 @testset "`Base.project_names` and friends" begin
     # Some functions in Pkg assumes that these tuples have the same length
     n = length(Base.project_names)
-    @test length(Base.manifest_names) == n
     @test length(Base.preferences_names) == n
+
+    # there are two manifest names per project name
+    @test length(Base.manifest_names) == 2n
 end
 
 @testset "Manifest formats" begin
@@ -825,20 +951,31 @@ end
     end
 end
 
-@testset "error message loading pkg bad module name" begin
+@testset "Manifest name preferential loading" begin
     mktempdir() do tmp
-        old_loadpath = copy(LOAD_PATH)
-        try
-            push!(LOAD_PATH, tmp)
-            write(joinpath(tmp, "BadCase.jl"), "module badcase end")
-            @test_logs (:warn, r"The call to compilecache failed.*") match_mode=:any begin
-                @test_throws ErrorException("package `BadCase` did not define the expected module `BadCase`, \
-                    check for typos in package module name") (@eval using BadCase)
-            end
-        finally
-            copy!(LOAD_PATH, old_loadpath)
+        proj = joinpath(tmp, "Project.toml")
+        touch(proj)
+        for man_name in (
+            "Manifest.toml",
+            "JuliaManifest.toml",
+            "Manifest-v$(VERSION.major).$(VERSION.minor).toml",
+            "JuliaManifest-v$(VERSION.major).$(VERSION.minor).toml"
+            )
+            touch(joinpath(tmp, man_name))
+            man = basename(Base.project_file_manifest_path(proj))
+            @test man == man_name
         end
     end
+    mktempdir() do tmp
+        # check that another version isn't preferred
+        proj = joinpath(tmp, "Project.toml")
+        touch(proj)
+        touch(joinpath(tmp, "Manifest-v1.5.toml"))
+        @test Base.project_file_manifest_path(proj) === nothing
+        touch(joinpath(tmp, "Manifest.toml"))
+        man = basename(Base.project_file_manifest_path(proj))
+        @test man == "Manifest.toml"
+    end
 end
 
 @testset "Preferences loading" begin
@@ -994,7 +1131,7 @@ end
             write(joinpath(tmp, "Env1", "Manifest.toml"), """
             """)
             # Package in current env not present in manifest
-            pkg, env = Base.identify_package_env("Baz")
+            pkg, env = @lock Base.require_lock Base.identify_package_env("Baz")
             @test Base.locate_package(pkg, env) === nothing
         finally
             copy!(LOAD_PATH, old_load_path)
@@ -1004,9 +1141,20 @@ end
 end
 
 @testset "Extensions" begin
-    depot_path = mktempdir()
-    try
-        proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl")
+    test_ext = """
+    function test_ext(parent::Module, ext::Symbol)
+        _ext = Base.get_extension(parent, ext)
+        _ext isa Module || error("expected extension \$ext to be loaded")
+        _pkgdir = pkgdir(_ext)
+        _pkgdir == pkgdir(parent) != nothing || error("unexpected extension \$ext pkgdir path: \$_pkgdir")
+        _pkgversion = pkgversion(_ext)
+        _pkgversion == pkgversion(parent) || error("unexpected extension \$ext version: \$_pkgversion")
+    end
+    """
+    depot_path = mkdepottempdir()
+    proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl")
+
+    begin
 
         function gen_extension_cmd(compile, distr=false)
             load_distr = distr ? "using Distributed; addprocs(1)" : ""
@@ -1014,6 +1162,7 @@ end
             cmd = """
             $load_distr
             begin
+                $ew $test_ext
                 $ew push!(empty!(DEPOT_PATH), $(repr(depot_path)))
                 using HasExtensions
                 $ew using HasExtensions
@@ -1021,13 +1170,16 @@ end
                 $ew HasExtensions.ext_loaded && error("ext_loaded set")
                 using HasDepWithExtensions
                 $ew using HasDepWithExtensions
+                $ew test_ext(HasExtensions, :Extension)
                 $ew Base.get_extension(HasExtensions, :Extension).extvar == 1 || error("extvar in Extension not set")
                 $ew HasExtensions.ext_loaded || error("ext_loaded not set")
                 $ew HasExtensions.ext_folder_loaded && error("ext_folder_loaded set")
                 $ew HasDepWithExtensions.do_something() || error("do_something errored")
                 using ExtDep2
                 $ew using ExtDep2
-                $ew HasExtensions.ext_folder_loaded || error("ext_folder_loaded not set")
+                using ExtDep3
+                $ew using ExtDep3
+                $ew HasExtensions.ext_dep_loaded || error("ext_dep_loaded not set")
             end
             """
             return `$(Base.julia_cmd()) $compile --startup-file=no -e $cmd`
@@ -1070,11 +1222,14 @@ end
 
         test_ext_proj = """
         begin
+            $test_ext
             using HasExtensions
             using ExtDep
-            Base.get_extension(HasExtensions, :Extension) isa Module || error("expected extension to load")
+            test_ext(HasExtensions, :Extension)
             using ExtDep2
-            Base.get_extension(HasExtensions, :ExtensionFolder) isa Module || error("expected extension to load")
+            test_ext(HasExtensions, :ExtensionFolder)
+            using ExtDep3
+            test_ext(HasExtensions, :ExtensionDep)
         end
         """
         for compile in (`--compiled-modules=no`, ``)
@@ -1083,16 +1238,135 @@ end
             cmd_proj_ext = addenv(cmd_proj_ext, "JULIA_LOAD_PATH" => join([joinpath(proj, "HasExtensions.jl"), joinpath(proj, "EnvWithDeps")], sep))
             run(cmd_proj_ext)
         end
-    finally
+
+        # Extensions in implicit environments
+        old_load_path = copy(LOAD_PATH)
         try
-            rm(depot_path, force=true, recursive=true)
-        catch err
-            @show err
+            empty!(LOAD_PATH)
+            push!(LOAD_PATH, joinpath(@__DIR__, "project", "Extensions", "ImplicitEnv"))
+            pkgid_B = Base.PkgId(Base.uuid5(Base.identify_package("A").uuid, "BExt"), "BExt")
+            @test Base.identify_package(pkgid_B, "B") isa Base.PkgId
+        finally
+            copy!(LOAD_PATH, old_load_path)
+        end
+
+        # Extension with cycles in dependencies
+        code = """
+        using CyclicExtensions
+        Base.get_extension(CyclicExtensions, :ExtA) isa Module || error("expected extension to load")
+        Base.get_extension(CyclicExtensions, :ExtB) isa Module || error("expected extension to load")
+        CyclicExtensions.greet()
+        """
+        proj = joinpath(@__DIR__, "project", "Extensions", "CyclicExtensions")
+        cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+        cmd = addenv(cmd, "JULIA_LOAD_PATH" => proj)
+        @test occursin("Hello Cycles!", String(read(cmd)))
+
+        # Extension-to-extension dependencies
+
+        mkdepottempdir() do depot # Parallel pre-compilation
+            code = """
+            Base.disable_parallel_precompile = false
+            using ExtToExtDependency
+            Base.get_extension(ExtToExtDependency, :ExtA) isa Module || error("expected extension to load")
+            Base.get_extension(ExtToExtDependency, :ExtAB) isa Module || error("expected extension to load")
+            ExtToExtDependency.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "ExtToExtDependency")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello ext-to-ext!", String(read(cmd)))
+        end
+        mkdepottempdir() do depot # Serial pre-compilation
+            code = """
+            Base.disable_parallel_precompile = true
+            using ExtToExtDependency
+            Base.get_extension(ExtToExtDependency, :ExtA) isa Module || error("expected extension to load")
+            Base.get_extension(ExtToExtDependency, :ExtAB) isa Module || error("expected extension to load")
+            ExtToExtDependency.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "ExtToExtDependency")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello ext-to-ext!", String(read(cmd)))
+        end
+
+        mkdepottempdir() do depot # Parallel pre-compilation
+            code = """
+            Base.disable_parallel_precompile = false
+            using CrossPackageExtToExtDependency
+            Base.get_extension(CrossPackageExtToExtDependency.CyclicExtensions, :ExtA) isa Module || error("expected extension to load")
+            Base.get_extension(CrossPackageExtToExtDependency, :ExtAB) isa Module || error("expected extension to load")
+            CrossPackageExtToExtDependency.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "CrossPackageExtToExtDependency")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello x-package ext-to-ext!", String(read(cmd)))
+        end
+        mkdepottempdir() do depot # Serial pre-compilation
+            code = """
+            Base.disable_parallel_precompile = true
+            using CrossPackageExtToExtDependency
+            Base.get_extension(CrossPackageExtToExtDependency.CyclicExtensions, :ExtA) isa Module || error("expected extension to load")
+            Base.get_extension(CrossPackageExtToExtDependency, :ExtAB) isa Module || error("expected extension to load")
+            CrossPackageExtToExtDependency.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "CrossPackageExtToExtDependency")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello x-package ext-to-ext!", String(read(cmd)))
+        end
+
+        # Extensions for "parent" dependencies
+        # (i.e. an `ExtAB`  where A depends on / loads B, but B provides the extension)
+
+        mkdepottempdir() do depot # Parallel pre-compilation
+            code = """
+            Base.disable_parallel_precompile = false
+            using Parent
+            Base.get_extension(getfield(Parent, :DepWithParentExt), :ParentExt) isa Module || error("expected extension to load")
+            Parent.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "Parent.jl")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello parent!", String(read(cmd)))
+        end
+        mkdepottempdir() do depot # Serial pre-compilation
+            code = """
+            Base.disable_parallel_precompile = true
+            using Parent
+            Base.get_extension(getfield(Parent, :DepWithParentExt), :ParentExt) isa Module || error("expected extension to load")
+            Parent.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "Parent.jl")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello parent!", String(read(cmd)))
         end
     end
 end
 
-pkgimage(val) = val == 1 ? `--pkgimage=yes` : `--pkgimage=no`
+pkgimage(val) = val == 1 ? `--pkgimages=yes` : `--pkgimages=no`
 opt_level(val) = `-O$val`
 debug_level(val) = `-g$val`
 inline(val) = val == 1 ? `--inline=yes` : `--inline=no`
@@ -1137,49 +1411,509 @@ end
     @test cf.check_bounds == 3
     @test cf.inline
     @test cf.opt_level == 3
+    @test repr(cf) == "CacheFlags(; use_pkgimages=true, debug_level=3, check_bounds=3, inline=true, opt_level=3)"
 
-    io = PipeBuffer()
-    show(io, cf)
-    @test read(io, String) == "use_pkgimages = true, debug_level = 3, check_bounds = 3, inline = true, opt_level = 3"
+    # Round trip CacheFlags
+    @test parse(Base.CacheFlags, repr(cf)) == cf
 end
 
 empty!(Base.DEPOT_PATH)
 append!(Base.DEPOT_PATH, original_depot_path)
 
+module loaded_pkgid1 end
+module loaded_pkgid2 end
+module loaded_pkgid3 end
+module loaded_pkgid4 end
+
 @testset "loading deadlock detector" begin
     pkid1 = Base.PkgId("pkgid1")
     pkid2 = Base.PkgId("pkgid2")
     pkid3 = Base.PkgId("pkgid3")
     pkid4 = Base.PkgId("pkgid4")
+    build_id = UInt128(0)
     e = Base.Event()
-    @test nothing === @lock Base.require_lock Base.start_loading(pkid4)     # module pkgid4
-    @test nothing === @lock Base.require_lock Base.start_loading(pkid1)     # module pkgid1
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid4, build_id, false)     # module pkgid4
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid1, build_id, false)     # module pkgid1
     t1 = @async begin
-        @test nothing === @lock Base.require_lock Base.start_loading(pkid2) # @async module pkgid2; using pkgid1; end
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid2, build_id, false) # @async module pkgid2; using pkgid1; end
         notify(e)
-        @test "loaded_pkgid1" == @lock Base.require_lock Base.start_loading(pkid1)
-        @lock Base.require_lock Base.end_loading(pkid2, "loaded_pkgid2")
+        @test loaded_pkgid1 == @lock Base.require_lock Base.start_loading(pkid1, build_id, false)
+        @lock Base.require_lock Base.end_loading(pkid2, loaded_pkgid2)
     end
     wait(e)
     reset(e)
     t2 = @async begin
-        @test nothing === @lock Base.require_lock Base.start_loading(pkid3) # @async module pkgid3; using pkgid2; end
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid3, build_id, false) # @async module pkgid3; using pkgid2; end
         notify(e)
-        @test "loaded_pkgid2" == @lock Base.require_lock Base.start_loading(pkid2)
-        @lock Base.require_lock Base.end_loading(pkid3, "loaded_pkgid3")
+        @test loaded_pkgid2 == @lock Base.require_lock Base.start_loading(pkid2, build_id, false)
+        @lock Base.require_lock Base.end_loading(pkid3, loaded_pkgid3)
     end
     wait(e)
     reset(e)
-    @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid3 -> pkgid2 -> pkgid1 -> pkgid3 && pkgid4"),
-        @lock Base.require_lock Base.start_loading(pkid3)).value            # try using pkgid3
-    @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid4 -> pkgid4 && pkgid1"),
-        @lock Base.require_lock Base.start_loading(pkid4)).value            # try using pkgid4
-    @lock Base.require_lock Base.end_loading(pkid1, "loaded_pkgid1")        # end
-    @lock Base.require_lock Base.end_loading(pkid4, "loaded_pkgid4")        # end
+    @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid3 using pkgid2 using pkgid1 using pkgid3 (while loading pkgid4)"),
+        @lock Base.require_lock Base.start_loading(pkid3, build_id, false)).value            # try using pkgid3
+    @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid4 using pkgid4 (while loading pkgid1)"),
+        @lock Base.require_lock Base.start_loading(pkid4, build_id, false)).value            # try using pkgid4
+    @lock Base.require_lock Base.end_loading(pkid1, loaded_pkgid1)        # end
+    @lock Base.require_lock Base.end_loading(pkid4, loaded_pkgid4)        # end
     wait(t2)
     wait(t1)
 end
 
 @testset "Upgradable stdlibs" begin
     @test success(`$(Base.julia_cmd()) --startup-file=no -e 'using DelimitedFiles'`)
+    @test success(`$(Base.julia_cmd()) --startup-file=no -e 'using Statistics'`)
+end
+
+@testset "relocatable upgrades #51989" begin
+    mkdepottempdir() do depot
+        # realpath is needed because Pkg is used for one of the precompile paths below, and Pkg calls realpath on the
+        # project path so the cache file slug will be different if the tempdir is given as a symlink
+        # (which it often is on MacOS) which would break the test.
+
+        # Create fake `Foo.jl` package with two files:
+        foo_path = joinpath(depot, "dev", "Foo51989")
+        mkpath(joinpath(foo_path, "src"))
+        write(joinpath(foo_path, "src", "Foo51989.jl"),
+            """
+            module Foo51989
+            include("internal.jl")
+            end
+            """)
+        write(joinpath(foo_path, "src", "internal.jl"),
+            "const a = \"asd\"")
+        write(joinpath(foo_path, "Project.toml"),
+            """
+            name = "Foo51989"
+            uuid = "00000000-0000-0000-0000-000000000001"
+            version = "1.0.0"
+            """)
+        write(joinpath(foo_path, "Manifest.toml"),
+            """
+            julia_version = "1.13.0"
+            manifest_format = "2.0"
+
+            [[deps.Foo51989]]
+            path = "."
+            uuid = "00000000-0000-0000-0000-000000000001"
+            version = "1.0.0"
+            """)
+
+        # In our depot, `precompile` this `Foo` package.
+        @test success(pipeline(addenv(
+            `$(Base.julia_cmd()) --project=$foo_path --startup-file=no -e 'Base.Precompilation.precompilepkgs(["Foo51989"]); exit(0)'`,
+            "JULIA_DEPOT_PATH" => depot,
+        ); stdout, stderr))
+
+        # Get the size of the generated `.ji` file so that we can ensure that it gets altered
+        foo_compiled_path = joinpath(depot, "compiled", "v$(VERSION.major).$(VERSION.minor)", "Foo51989")
+        cache_path = joinpath(foo_compiled_path, only(filter(endswith(".ji"), readdir(foo_compiled_path))))
+        cache_size = filesize(cache_path)
+
+        # Next, remove the dependence on `internal.jl` and delete it:
+        rm(joinpath(foo_path, "src", "internal.jl"))
+        open(joinpath(foo_path, "src", "Foo51989.jl"); write=true) do io
+            truncate(io, 0)
+            println(io, """
+            module Foo51989
+            end
+            """)
+        end
+
+        # Try to load `Foo`; this should trigger recompilation, not an error!
+        @test success(pipeline(addenv(
+            `$(Base.julia_cmd()) --project=$foo_path --startup-file=no -e 'using Foo51989; exit(0)'`,
+            "JULIA_DEPOT_PATH" => depot,
+        ); stdout, stderr))
+
+        # Ensure that there is still only one `.ji` file (it got replaced
+        # and the file size changed).
+        @test length(filter(endswith(".ji"), readdir(foo_compiled_path))) == 1
+        @test filesize(cache_path) != cache_size
+    end
+end
+
+@testset "Fallback for stdlib deps if manifest deps aren't found" begin
+    s = Sys.iswindows() ? ';' : ':'
+    mktempdir() do depot
+        # This manifest has a LibGit2 entry that is missing LibGit2_jll, which should be
+        # handled by falling back to the stdlib Project.toml for dependency truth.
+        badmanifest_test_dir = joinpath(@__DIR__, "project", "deps", "BadStdlibDeps")
+        @test success(addenv(
+            `$(Base.julia_cmd()) --project=$badmanifest_test_dir --startup-file=no -e 'using LibGit2'`,
+            "JULIA_DEPOT_PATH" => string(depot * Base.Filesystem.pathsep(), s),
+        ))
+    end
+    mktempdir() do depot
+        # This manifest has a LibGit2 entry that has a LibGit2_jll with a git-tree-hash1
+        # which simulates an old manifest where LibGit2_jll was not a stdlib
+        badmanifest_test_dir2 = joinpath(@__DIR__, "project", "deps", "BadStdlibDeps2")
+        @test success(addenv(
+            `$(Base.julia_cmd()) --project=$badmanifest_test_dir2 --startup-file=no -e 'using LibGit2'`,
+            "JULIA_DEPOT_PATH" => string(depot * Base.Filesystem.pathsep(), s),
+        ))
+    end
+end
+
+@testset "code coverage disabled during precompilation" begin
+    mkdepottempdir() do depot
+        cov_test_dir = joinpath(@__DIR__, "project", "deps", "CovTest.jl")
+        cov_cache_dir = joinpath(depot, "compiled", "v$(VERSION.major).$(VERSION.minor)", "CovTest")
+        function rm_cov_files()
+            for cov_file in filter(endswith(".cov"), readdir(joinpath(cov_test_dir, "src"), join=true))
+                rm(cov_file)
+            end
+            @test !cov_exists()
+        end
+        cov_exists() = !isempty(filter(endswith(".cov"), readdir(joinpath(cov_test_dir, "src"))))
+
+        rm_cov_files() # clear out any coverage files first
+        @test !cov_exists()
+
+        cd(cov_test_dir) do
+            # In our depot, precompile CovTest.jl with coverage on
+            @test success(addenv(
+                `$(Base.julia_cmd()) --startup-file=no --pkgimage=yes --code-coverage=@ --project -e 'using CovTest; exit(0)'`,
+                "JULIA_DEPOT_PATH" => depot,
+            ))
+            @test !isempty(filter(!endswith(".ji"), readdir(cov_cache_dir))) # check that object cache file(s) exists
+            @test !cov_exists()
+            rm_cov_files()
+
+            # same again but call foo(), which is in the pkgimage, and should generate coverage
+            @test success(addenv(
+                `$(Base.julia_cmd()) --startup-file=no --pkgimage=yes --code-coverage=@ --project -e 'using CovTest; foo(); exit(0)'`,
+                "JULIA_DEPOT_PATH" => depot,
+            ))
+            @test cov_exists()
+            rm_cov_files()
+
+            # same again but call bar(), which is NOT in the pkgimage, and should generate coverage
+            @test success(addenv(
+                `$(Base.julia_cmd()) --startup-file=no --pkgimage=yes --code-coverage=@ --project -e 'using CovTest; bar(); exit(0)'`,
+                "JULIA_DEPOT_PATH" => depot,
+            ))
+            @test cov_exists()
+            rm_cov_files()
+        end
+    end
+end
+
+@testset "command-line flags" begin
+    mkdepottempdir() do depot_path mktempdir() do dir
+        # generate a Parent.jl and Child.jl package, with Parent depending on Child
+        open(joinpath(dir, "Child.jl"), "w") do io
+            println(io, """
+                module Child
+                end""")
+        end
+        open(joinpath(dir, "Parent.jl"), "w") do io
+            println(io, """
+                module Parent
+                using Child
+                end""")
+        end
+
+        # helper function to load a package and return the output
+        function load_package(name, args=``)
+            code = "Base.disable_parallel_precompile = true; using $name"
+            cmd = addenv(`$(Base.julia_cmd()) -e $code $args`,
+                        "JULIA_LOAD_PATH" => dir,
+                        "JULIA_DEPOT_PATH" => depot_path,
+                        "JULIA_DEBUG" => "loading")
+
+            out = Base.PipeEndpoint()
+            log = @async read(out, String)
+            try
+                proc = run(pipeline(cmd, stdout=out, stderr=out))
+                @test success(proc)
+            catch
+                @show fetch(log)
+                rethrow()
+            end
+            return fetch(log)
+        end
+
+        log = load_package("Parent", `--compiled-modules=no --pkgimages=no`)
+        @test !occursin(r"Generating (cache|object cache) file", log)
+        @test !occursin(r"Loading (cache|object cache) file", log)
+
+
+        ## tests for `--compiled-modules`, which generates cache files
+
+        log = load_package("Child", `--compiled-modules=yes --pkgimages=no`)
+        @test occursin(r"Generating cache file for Child", log)
+        @test occursin(r"Loading cache file .+ for Child", log)
+
+        # with `--compiled-modules=existing` we should only precompile Child
+        log = load_package("Parent", `--compiled-modules=existing --pkgimages=no`)
+        @test !occursin(r"Generating cache file for Child", log)
+        @test occursin(r"Loading cache file .+ for Child", log)
+        @test !occursin(r"Generating cache file for Parent", log)
+        @test !occursin(r"Loading cache file .+ for Parent", log)
+
+        # the default is `--compiled-modules=yes`, which should now precompile Parent
+        log = load_package("Parent", `--pkgimages=no`)
+        @test !occursin(r"Generating cache file for Child", log)
+        @test occursin(r"Loading cache file .+ for Child", log)
+        @test occursin(r"Generating cache file for Parent", log)
+        @test occursin(r"Loading cache file .+ for Parent", log)
+
+
+        ## tests for `--pkgimages`, which generates object cache files
+
+        log = load_package("Child", `--compiled-modules=yes --pkgimages=yes`)
+        @test occursin(r"Generating object cache file for Child", log)
+        @test occursin(r"Loading object cache file .+ for Child", log)
+
+        # with `--pkgimages=existing` we should only generate code for Child
+        log = load_package("Parent", `--compiled-modules=yes --pkgimages=existing`)
+        @test !occursin(r"Generating object cache file for Child", log)
+        @test occursin(r"Loading object cache file .+ for Child", log)
+        @test !occursin(r"Generating object cache file for Parent", log)
+        @test !occursin(r"Loading object cache file .+ for Parent", log)
+
+        # the default is `--pkgimages=yes`, which should now generate code for Parent
+        log = load_package("Parent")
+        @test !occursin(r"Generating object cache file for Child", log)
+        @test occursin(r"Loading object cache file .+ for Child", log)
+        @test occursin(r"Generating object cache file for Parent", log)
+        @test occursin(r"Loading object cache file .+ for Parent", log)
+    end end
+end
+
+@testset "including non-existent file throws proper error #52462" begin
+    mkdepottempdir() do depot
+        project_path = joinpath(depot, "project")
+        mkpath(project_path)
+
+        # Create a `Foo.jl` package
+        foo_path = joinpath(depot, "dev", "Foo52462")
+        mkpath(joinpath(foo_path, "src"))
+        open(joinpath(foo_path, "src", "Foo52462.jl"); write=true) do io
+            println(io, """
+            module Foo52462
+            include("non-existent.jl")
+            end
+            """)
+        end
+        open(joinpath(foo_path, "Project.toml"); write=true) do io
+            println(io, """
+            name = "Foo52462"
+            uuid = "00000000-0000-0000-0000-000000000001"
+            version = "1.0.0"
+            """)
+        end
+
+        file = joinpath(depot, "dev", "non-existent.jl")
+        @test try
+            include(file); false
+        catch e
+            @test e isa SystemError
+            @test e.prefix == "opening file $(repr(file))"
+            true
+        end
+        touch(file)
+        @test include_dependency(file) === nothing
+        chmod(file, 0x000)
+
+        # same for include_dependency: #52063
+        dir = mktempdir() do dir
+            @test include_dependency(dir) === nothing
+            dir
+        end
+        @test try
+            include_dependency(dir); false
+        catch e
+            @test e isa SystemError
+            @test e.prefix == "opening file or folder $(repr(dir))"
+            true
+        end
+    end
+end
+
+@testset "-m" begin
+    rot13proj = joinpath(@__DIR__, "project", "Rot13")
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --project=$rot13proj -m Rot13 --project nowhere ABJURER`) == "--cebwrpg abjurer NOWHERE "
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --project=$rot13proj -m Rot13.Rot26 --project nowhere ABJURER`) == "--project nowhere ABJURER "
+end
+
+@testset "workspace loading" begin
+   old_load_path = copy(LOAD_PATH)
+   try
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject"))
+       @test Base.get_preferences()["value"] == 1
+       @test Base.get_preferences()["x"] == 1
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "sub"))
+       id = Base.identify_package("Devved")
+       @test isfile(Base.locate_package(id))
+       @test Base.identify_package("Devved2") === nothing
+       id3 = Base.identify_package("MyPkg")
+       @test isfile(Base.locate_package(id3))
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "PackageThatIsSub"))
+       id_pkg = Base.identify_package("PackageThatIsSub")
+       @test Base.identify_package(id_pkg, "Devved") === nothing
+       id_dev2 = Base.identify_package(id_pkg, "Devved2")
+       @test isfile(Base.locate_package(id_dev2))
+       id_mypkg = Base.identify_package("MyPkg")
+       @test isfile(Base.locate_package(id_mypkg))
+       id_dev = Base.identify_package(id_mypkg, "Devved")
+       @test isfile(Base.locate_package(id_dev))
+       @test Base.get_preferences()["value"] == 2
+       @test Base.get_preferences()["x"] == 1
+       @test Base.get_preferences()["y"] == 2
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "PackageThatIsSub", "test"))
+       id_pkg = Base.identify_package("PackageThatIsSub")
+       @test isfile(Base.locate_package(id_pkg))
+       @test Base.identify_package(id_pkg, "Devved") === nothing
+       id_dev2 = Base.identify_package(id_pkg, "Devved2")
+       @test isfile(Base.locate_package(id_dev2))
+       id_mypkg = Base.identify_package("MyPkg")
+       @test isfile(Base.locate_package(id_mypkg))
+       id_dev = Base.identify_package(id_mypkg, "Devved")
+       @test isfile(Base.locate_package(id_dev))
+       @test Base.get_preferences()["value"] == 3
+       @test Base.get_preferences()["x"] == 1
+       @test Base.get_preferences()["y"] == 2
+       @test Base.get_preferences()["z"] == 3
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "test"))
+       id_mypkg = Base.identify_package("MyPkg")
+       id_dev = Base.identify_package(id_mypkg, "Devved")
+       @test isfile(Base.locate_package(id_dev))
+       @test Base.identify_package("Devved2") === nothing
+
+       # Test that workspace projects can be specified with subfolder paths
+       # and that base_project searches upward through multiple directory levels
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "nested", "deep"))
+       proj_file = joinpath(@__DIR__, "project", "SubProject", "nested", "deep", "Project.toml")
+       base_proj = Base.base_project(proj_file)
+       @test base_proj == joinpath(@__DIR__, "project", "SubProject", "Project.toml")
+
+    finally
+       copy!(LOAD_PATH, old_load_path)
+    end
+end
+
+@testset "project path handling" begin
+    old_load_path = copy(LOAD_PATH)
+    try
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "ProjectPath"))
+        id_project = Base.identify_package("ProjectPath")
+        Base.locate_package(id_project)
+        @test Base.locate_package(id_project) == joinpath(@__DIR__, "project", "ProjectPath", "CustomPath.jl")
+
+        id_dep = Base.identify_package("ProjectPathDep")
+        @test Base.locate_package(id_dep) == joinpath(@__DIR__, "project", "ProjectPath", "ProjectPathDep", "CustomPath.jl")
+    finally
+        copy!(LOAD_PATH, old_load_path)
+    end
+end
+
+@testset "extension path computation name collision" begin
+    old_load_path = copy(LOAD_PATH)
+    try
+        empty!(LOAD_PATH)
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "Extensions", "ExtNameCollision_A"))
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "Extensions", "ExtNameCollision_B"))
+        ext_B = Base.PkgId(Base.uuid5(Base.identify_package("ExtNameCollision_B").uuid, "REPLExt"), "REPLExt")
+        @test Base.locate_package(ext_B) == joinpath(@__DIR__, "project",  "Extensions", "ExtNameCollision_B", "ext", "REPLExt.jl")
+    finally
+        copy!(LOAD_PATH, old_load_path)
+    end
+end
+
+@testset "require_stdlib loading duplication" begin
+    depot_path = mkdepottempdir()
+    oldBase64 = nothing
+    try
+        push!(empty!(DEPOT_PATH), depot_path)
+        Base64_key = Base.PkgId(Base.UUID("2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"), "Base64")
+        oldBase64 = Base.unreference_module(Base64_key)
+        cc = Base.compilecache(Base64_key)
+        sourcespec = Base.locate_package_load_spec(Base64_key)
+        @test Base.stale_cachefile(Base64_key, UInt128(0), sourcespec, cc[1]) !== true
+        empty!(DEPOT_PATH)
+        Base.require_stdlib(Base64_key)
+        push!(DEPOT_PATH, depot_path)
+        append!(DEPOT_PATH, original_depot_path)
+        oldloaded = @lock(Base.require_lock, length(get(Base.loaded_precompiles, Base64_key, Module[])))
+        Base.require(Base64_key)
+        @test @lock(Base.require_lock, length(get(Base.loaded_precompiles, Base64_key, Module[]))) == oldloaded
+        Base.unreference_module(Base64_key)
+        empty!(DEPOT_PATH)
+        push!(DEPOT_PATH, depot_path)
+        Base.require(Base64_key)
+        @test @lock(Base.require_lock, length(get(Base.loaded_precompiles, Base64_key, Module[]))) == oldloaded + 1
+        Base.unreference_module(Base64_key)
+    finally
+        oldBase64 === nothing || Base.register_root_module(oldBase64)
+        copy!(DEPOT_PATH, original_depot_path)
+    end
+end
+
+# Test `import Package as M`
+module M57965
+    import Random as R
+end
+@test M57965.R === Base.require(M57965, :Random)
+
+# #58272 - _eval_import accidentally reuses evaluated "from" path
+module M58272_1
+    const x = 1
+    module M58272_2
+        const y = 3
+        const x = 2
+    end
+end
+module M58272_to end
+@eval M58272_to import ..M58272_1: M58272_2.y, x
+@test @eval M58272_to x === 1
+
+@testset "Syntax Versioning" begin
+    old_load_path = copy(LOAD_PATH)
+    try
+        # Test implicit environments (packages loaded from directories)
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "SyntaxVersioning", "implicit"))
+        # Explicit syntax.julia_version = "1.13"
+        @test invokelatest(getglobal, (@eval (using Versioned1; Versioned1)), :ver) == v"1.13"
+        # Explicit syntax.julia_version = "1.14"
+        @test invokelatest(getglobal, (@eval (using Versioned2; Versioned2)), :ver) == v"1.14"
+        # Inherited from compat.julia = "1.13-2"
+        @test invokelatest(getglobal, (@eval (using Versioned3; Versioned3)), :ver) == v"1.13"
+        # No syntax.julia_version, falls back to current VERSION
+        @test invokelatest(getglobal, (@eval (using Versioned4; Versioned4)), :ver) == VersionNumber(VERSION.major, VERSION.minor)
+        # Inherited from compat.julia = "1.14-2"
+        @test invokelatest(getglobal, (@eval (using Versioned5; Versioned5)), :ver) == v"1.14"
+    finally
+        copy!(LOAD_PATH, old_load_path)
+    end
+
+    # Test explicit environments (packages loaded from Manifest.toml)
+    old_load_path = copy(LOAD_PATH)
+    old_active_project = Base.ACTIVE_PROJECT[]
+    try
+        explicit_env = joinpath(@__DIR__, "project", "SyntaxVersioning", "explicit")
+        Base.ACTIVE_PROJECT[] = joinpath(explicit_env, "Project.toml")
+        empty!(LOAD_PATH)
+        push!(LOAD_PATH, "@")
+        # syntax.julia_version from Manifest = "1.13"
+        @test invokelatest(getglobal, (@eval (using VersionedDep1; VersionedDep1)), :ver) == v"1.13"
+        # syntax.julia_version from Manifest = "1.14"
+        @test invokelatest(getglobal, (@eval (using VersionedDep2; VersionedDep2)), :ver) == v"1.14"
+        # syntax.julia_version from Manifest = "1.0" should be clamped to "1.13"
+        @test invokelatest(getglobal, (@eval (using VersionedDep3; VersionedDep3)), :ver) == v"1.13"
+    finally
+        Base.ACTIVE_PROJECT[] = old_active_project
+        copy!(LOAD_PATH, old_load_path)
+    end
 end
diff --git a/test/math.jl b/test/math.jl
index 19d9f7893a496..d04b027e60575 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -1,5 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+include("testhelpers/EvenIntegers.jl")
+using .EvenIntegers
+
+include("testhelpers/ULPError.jl")
+using .ULPError
+
 using Random
 using LinearAlgebra
 using Base.Experimental: @force_compile
@@ -22,30 +28,96 @@ has_fma = Dict(
     BigFloat => true,
 )
 
-@testset "clamp" begin
-    @test clamp(0, 1, 3) == 1
-    @test clamp(1, 1, 3) == 1
-    @test clamp(2, 1, 3) == 2
-    @test clamp(3, 1, 3) == 3
-    @test clamp(4, 1, 3) == 3
+@testset "meta test: ULPError" begin
+    examples_f64 = (-3e0, -2e0, -1e0, -1e-1, -1e-10, -0e0, 0e0, 1e-10, 1e-1, 1e0, 2e0, 3e0)::Tuple{Vararg{Float64}}
+    examples_f16 = Float16.(examples_f64)
+    examples_f32 = Float32.(examples_f64)
+    examples = (examples_f16..., examples_f32..., examples_f64...)
+    @testset "edge cases" begin
+        @testset "zero error - two equivalent values" begin
+            @test 0 == @inferred ulp_error(NaN, NaN)
+            @test 0 == @inferred ulp_error(-NaN, -NaN)
+            @test 0 == @inferred ulp_error(-NaN, NaN)
+            @test 0 == @inferred ulp_error(NaN, -NaN)
+            @test 0 == @inferred ulp_error(Inf, Inf)
+            @test 0 == @inferred ulp_error(-Inf, -Inf)
+            @test 0 == @inferred ulp_error(0.0, 0.0)
+            @test 0 == @inferred ulp_error(-0.0, -0.0)
+            @test 0 == @inferred ulp_error(-0.0, 0.0)
+            @test 0 == @inferred ulp_error(0.0, -0.0)
+            @test 0 == @inferred ulp_error(3.0, 3.0)
+            @test 0 == @inferred ulp_error(-3.0, -3.0)
+        end
+        @testset "infinite error" begin
+            @test Inf == @inferred ulp_error(NaN, 0.0)
+            @test Inf == @inferred ulp_error(0.0, NaN)
+            @test Inf == @inferred ulp_error(NaN, 3.0)
+            @test Inf == @inferred ulp_error(3.0, NaN)
+            @test Inf == @inferred ulp_error(NaN, Inf)
+            @test Inf == @inferred ulp_error(Inf, NaN)
+            @test Inf == @inferred ulp_error(Inf, -Inf)
+            @test Inf == @inferred ulp_error(-Inf, Inf)
+            @test Inf == @inferred ulp_error(0.0, Inf)
+            @test Inf == @inferred ulp_error(Inf, 0.0)
+            @test Inf == @inferred ulp_error(3.0, Inf)
+            @test Inf == @inferred ulp_error(Inf, 3.0)
+        end
+    end
+    @testset "faithful" begin
+        for x in examples
+            @test 1 == @inferred ulp_error(x, nextfloat(x, 1))
+            @test 1 == @inferred ulp_error(x, nextfloat(x, -1))
+        end
+    end
+    @testset "midpoint" begin
+        for x in examples
+            a = abs(x)
+            @test 1 == 2 * @inferred ulp_error(copysign((widen(a) + nextfloat(a, 1))/2, x), x)
+        end
+    end
+end
 
-    @test clamp(0.0, 1, 3) == 1.0
-    @test clamp(1.0, 1, 3) == 1.0
-    @test clamp(2.0, 1, 3) == 2.0
-    @test clamp(3.0, 1, 3) == 3.0
-    @test clamp(4.0, 1, 3) == 3.0
+@testset "clamp" begin
+    let
+        @test clamp(0, 1, 3) == 1
+        @test clamp(1, 1, 3) == 1
+        @test clamp(2, 1, 3) == 2
+        @test clamp(3, 1, 3) == 3
+        @test clamp(4, 1, 3) == 3
+
+        @test clamp(0.0, 1, 3) == 1.0
+        @test clamp(1.0, 1, 3) == 1.0
+        @test clamp(2.0, 1, 3) == 2.0
+        @test clamp(3.0, 1, 3) == 3.0
+        @test clamp(4.0, 1, 3) == 3.0
+
+        @test clamp.([0, 1, 2, 3, 4], 1.0, 3.0) == [1.0, 1.0, 2.0, 3.0, 3.0]
+        @test clamp.([0 1; 2 3], 1.0, 3.0) == [1.0 1.0; 2.0 3.0]
+
+        @test clamp(-200, Int8) === typemin(Int8)
+        @test clamp(100, Int8) === Int8(100)
+        @test clamp(200, Int8) === typemax(Int8)
+
+        let x = [0.0, 1.0, 2.0, 3.0, 4.0]
+            clamp!(x, 1, 3)
+            @test x == [1.0, 1.0, 2.0, 3.0, 3.0]
+        end
 
-    @test clamp.([0, 1, 2, 3, 4], 1.0, 3.0) == [1.0, 1.0, 2.0, 3.0, 3.0]
-    @test clamp.([0 1; 2 3], 1.0, 3.0) == [1.0 1.0; 2.0 3.0]
+        @test clamp(typemax(UInt64), Int64) === typemax(Int64)
+        @test clamp(typemin(Int), UInt64) === typemin(UInt64)
+        @test clamp(Int16(-1), UInt16) === UInt16(0)
+        @test clamp(-1, 2, UInt(0)) === UInt(2)
+        @test clamp(typemax(UInt16), Int16) === Int16(32767)
 
-    @test clamp(-200, Int8) === typemin(Int8)
-    @test clamp(100, Int8) === Int8(100)
-    @test clamp(200, Int8) === typemax(Int8)
+        # clamp should not allocate a BigInt for typemax(Int16)
+        let x = big(2) ^ 100
+            @test (@allocated clamp(x, Int16)) == 0
+        end
 
-    begin
-        x = [0.0, 1.0, 2.0, 3.0, 4.0]
-        clamp!(x, 1, 3)
-        @test x == [1.0, 1.0, 2.0, 3.0, 3.0]
+        let x = clamp(2.0, BigInt)
+            @test x isa BigInt
+            @test x == big(2)
+        end
     end
 end
 
@@ -188,6 +260,7 @@ end
             @test exp10(x) ≈ exp10(big(x))
             @test exp2(x) ≈ exp2(big(x))
             @test expm1(x) ≈ expm1(big(x))
+            @test expm1(T(-1.1)) ≈ expm1(big(T(-1.1)))
             @test hypot(x,y) ≈ hypot(big(x),big(y))
             @test hypot(x,x,y) ≈ hypot(hypot(big(x),big(x)),big(y))
             @test hypot(x,x,y,y) ≈ hypot(hypot(big(x),big(x)),hypot(big(y),big(y)))
@@ -365,6 +438,19 @@ end
     end
 end
 
+@testset "https://github.com/JuliaLang/julia/issues/56782" begin
+    @test isnan(exp(reinterpret(Float64, 0x7ffbb14880000000)))
+end
+
+@testset "issue #57463" begin
+    for T in (Int16, Int32, Int64, Int128)
+        @test iszero(1.1^typemin(T))
+        @test iszero(0.9^typemax(T))
+        @test isinf(1.1^typemax(T))
+        @test isinf(0.9^typemin(T))
+    end
+end
+
 @testset "test abstractarray trig functions" begin
     TAA = rand(2,2)
     TAA = (TAA + TAA')/2.
@@ -423,7 +509,7 @@ end
 end
 
 @testset "deg2rad/rad2deg" begin
-    @testset "$T" for T in (Int, Float64, BigFloat)
+    @testset "$T" for T in (Int, Float16, Float32, Float64, BigFloat)
         @test deg2rad(T(180)) ≈ 1pi
         @test deg2rad.(T[45, 60]) ≈ [pi/T(4), pi/T(3)]
         @test rad2deg.([pi/T(4), pi/T(3)]) ≈ [45, 60]
@@ -431,6 +517,16 @@ end
         @test rad2deg(T(1)) ≈ rad2deg(true)
         @test deg2rad(T(1)) ≈ deg2rad(true)
     end
+    @testset "accuracy" begin
+        @testset "$T" for T in (Float16, Float32, Float64)
+            @test rad2deg(T(1)) === setprecision(BigFloat, 500) do
+                T(180 / BigFloat(pi))
+            end
+            @test deg2rad(T(1)) === setprecision(BigFloat, 500) do
+                T(BigFloat(pi) / 180)
+            end
+        end
+    end
     @test deg2rad(180 + 60im) ≈ pi + (pi/3)*im
     @test rad2deg(pi + (pi/3)*im) ≈ 180 + 60im
 end
@@ -555,6 +651,39 @@ end
     @test ismissing(scdm[2])
 end
 
+@testset "behavior at signed zero of monotonic floating-point functions mapping zero to zero" begin
+    function rounder(rm::RoundingMode)
+        function closure(::Type{T}, x::AbstractFloat) where {T <: AbstractFloat}
+            round(T, x, rm)
+        end
+    end
+    rounding_modes = (
+        RoundNearest, RoundNearestTiesAway, RoundNearestTiesUp, RoundToZero, RoundFromZero, RoundUp, RoundDown,
+    )
+    rounders = map(rounder, rounding_modes)
+    @testset "typ: $typ" for typ in (Float16, Float32, Float64, BigFloat)
+        (n0, n1) = typ.(0:1)
+        rounders_typ = Base.Fix1.(rounders, typ)
+        @testset "f: $f" for f in (
+            # all strictly increasing
+            identity, deg2rad, rad2deg, cbrt, log1p, expm1, sinh, tanh, asinh, atanh,
+            sin, sind, sinpi, tan, tand, tanpi, asin, asind, atan, atand, Base.Fix2(atan, n1), Base.Fix2(atand, n1),
+            Base.Fix1(round, typ), Base.Fix1(trunc, typ), +, ∘(-, -), ∘(-, cosc),
+            rounders_typ...,
+            Base.Fix1(*, n1), Base.Fix2(*, n1), Base.Fix2(/, n1),
+        )
+            @testset "s: $s" for s in (-1, 1)
+                z = s * n0
+                z::typ
+                @test z == f(z)::typ
+                @test signbit(z) === signbit(f(z))
+                isbitstype(typ) &&
+                @test z === @inferred f(z)
+            end
+        end
+    end
+end
+
 @testset "Integer and Inf args for sinpi/cospi/tanpi/sinc/cosc" begin
     for (sinpi, cospi) in ((sinpi, cospi), (x->sincospi(x)[1], x->sincospi(x)[2]))
         @test sinpi(1) === 0.0
@@ -617,6 +746,12 @@ end
         @test cosc(big"0.5") ≈ big"-1.273239544735162686151070106980114896275677165923651589981338752471174381073817" rtol=1e-76
         @test cosc(big"0.499") ≈ big"-1.272045747741181369948389133250213864178198918667041860771078493955590574971317" rtol=1e-76
     end
+
+    @testset "accuracy of `cosc` around the origin" begin
+        for t in (Float32, Float64)
+            @test ulp_error_maximum(cosc, range(start = t(-1), stop = t(1), length = 5000)) < 4
+        end
+    end
 end
 
 @testset "Irrational args to sinpi/cospi/tanpi/sinc/cosc" begin
@@ -870,14 +1005,14 @@ end
 end
 
 @testset "isapprox" begin
-  # #22742: updated isapprox semantics
-  @test !isapprox(1.0, 1.0+1e-12, atol=1e-14)
-  @test isapprox(1.0, 1.0+0.5*sqrt(eps(1.0)))
-  @test !isapprox(1.0, 1.0+1.5*sqrt(eps(1.0)), atol=sqrt(eps(1.0)))
+    # #22742: updated isapprox semantics
+    @test !isapprox(1.0, 1.0+1e-12, atol=1e-14)
+    @test isapprox(1.0, 1.0+0.5*sqrt(eps(1.0)))
+    @test !isapprox(1.0, 1.0+1.5*sqrt(eps(1.0)), atol=sqrt(eps(1.0)))
 
-  # #13132: Use of `norm` kwarg for scalar arguments
-  @test isapprox(1, 1+1.0e-12, norm=abs)
-  @test !isapprox(1, 1+1.0e-12, norm=x->1)
+    # #13132: Use of `norm` kwarg for scalar arguments
+    @test isapprox(1, 1+1.0e-12, norm=abs)
+    @test !isapprox(1, 1+1.0e-12, norm=x->1)
 end
 
 # test AbstractFloat fallback pr22716
@@ -1097,7 +1232,7 @@ pcnfloat(x) = prevfloat(x), x, nextfloat(x)
 import Base.Math: COSH_SMALL_X, H_SMALL_X, H_MEDIUM_X, H_LARGE_X
 
 @testset "sinh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test sinh(zero(T)) === zero(T)
         @test sinh(-zero(T)) === -zero(T)
         @test sinh(nextfloat(zero(T))) === nextfloat(zero(T))
@@ -1105,15 +1240,17 @@ import Base.Math: COSH_SMALL_X, H_SMALL_X, H_MEDIUM_X, H_LARGE_X
         @test sinh(T(1000)) === T(Inf)
         @test sinh(-T(1000)) === -T(Inf)
         @test isnan_type(T, sinh(T(NaN)))
-        for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
-            @test sinh(x) ≈ sinh(big(x)) rtol=eps(T)
-            @test sinh(-x) ≈ sinh(big(-x)) rtol=eps(T)
+        if T ∈ (Float32, Float64)
+            for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
+                @test sinh(x) ≈ sinh(big(x)) rtol=eps(T)
+                @test sinh(-x) ≈ sinh(big(-x)) rtol=eps(T)
+            end
         end
     end
 end
 
 @testset "cosh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test cosh(zero(T)) === one(T)
         @test cosh(-zero(T)) === one(T)
         @test cosh(nextfloat(zero(T))) === one(T)
@@ -1121,15 +1258,17 @@ end
         @test cosh(T(1000)) === T(Inf)
         @test cosh(-T(1000)) === T(Inf)
         @test isnan_type(T, cosh(T(NaN)))
-        for x in Iterators.flatten(pcnfloat.([COSH_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
-            @test cosh(x) ≈ cosh(big(x)) rtol=eps(T)
-            @test cosh(-x) ≈ cosh(big(-x)) rtol=eps(T)
+        if T ∈ (Float32, Float64)
+            for x in Iterators.flatten(pcnfloat.([COSH_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
+                @test cosh(x) ≈ cosh(big(x)) rtol=eps(T)
+                @test cosh(-x) ≈ cosh(big(-x)) rtol=eps(T)
+            end
         end
     end
 end
 
 @testset "tanh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test tanh(zero(T)) === zero(T)
         @test tanh(-zero(T)) === -zero(T)
         @test tanh(nextfloat(zero(T))) === nextfloat(zero(T))
@@ -1137,9 +1276,11 @@ end
         @test tanh(T(1000)) === one(T)
         @test tanh(-T(1000)) === -one(T)
         @test isnan_type(T, tanh(T(NaN)))
-        for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), T(1.0), H_MEDIUM_X(T)]))
-            @test tanh(x) ≈ tanh(big(x)) rtol=eps(T)
-            @test tanh(-x) ≈ -tanh(big(x)) rtol=eps(T)
+        if T ∈ (Float32, Float64)
+            for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), T(1.0), H_MEDIUM_X(T)]))
+                @test tanh(x) ≈ tanh(big(x)) rtol=eps(T)
+                @test tanh(-x) ≈ -tanh(big(x)) rtol=eps(T)
+            end
         end
     end
     @test tanh(18.0) ≈ tanh(big(18.0)) rtol=eps(Float64)
@@ -1147,7 +1288,7 @@ end
 end
 
 @testset "asinh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test asinh(zero(T)) === zero(T)
         @test asinh(-zero(T)) === -zero(T)
         @test asinh(nextfloat(zero(T))) === nextfloat(zero(T))
@@ -1161,7 +1302,7 @@ end
 end
 
 @testset "acosh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test_throws DomainError acosh(T(0.1))
         @test acosh(one(T)) === zero(T)
         @test isnan_type(T, acosh(T(NaN)))
@@ -1172,7 +1313,7 @@ end
 end
 
 @testset "atanh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test_throws DomainError atanh(T(1.1))
         @test atanh(zero(T)) === zero(T)
         @test atanh(-zero(T)) === -zero(T)
@@ -1354,6 +1495,16 @@ end
     # hypot on Complex returns Real
     @test (@inferred hypot(3, 4im)) === 5.0
     @test (@inferred hypot(3, 4im, 12)) === 13.0
+    @testset "promotion, issue #53505" begin
+        @testset "Int,$T" for T in (Float16, Float32, Float64, BigFloat)
+            for args in ((3, 4), (3, 4, 12))
+                for i in eachindex(args)
+                    targs = ntuple(j -> (j == i) ? T(args[j]) : args[j], length(args))
+                    @test (@inferred hypot(targs...)) isa float(eltype(promote(targs...)))
+                end
+            end
+        end
+    end
 end
 
 struct BadFloatWrapper <: AbstractFloat
@@ -1403,6 +1554,7 @@ end
                     Float32=>[.51, .51, .51, 2.0, 1.5],
                     Float64=>[.55, 0.8, 1.5, 2.0, 1.5])
     for T in (Float16, Float32, Float64)
+        @inferred T T(1.1)^T(1.1) #test that we always return the right type
         for x in (0.0, -0.0, 1.0, 10.0, 2.0, Inf, NaN, -Inf, -NaN)
             for y in (0.0, -0.0, 1.0, -3.0,-10.0 , Inf, NaN, -Inf, -NaN)
                 got, expected = T(x)^T(y), T(big(x)^T(y))
@@ -1448,6 +1600,100 @@ end
     # two cases where we have observed > 1 ULP in the past
     @test 0.0013653274095082324^-97.60372292227069 == 4.088393948750035e279
     @test 8.758520413376658e-5^70.55863059215994 == 5.052076767078296e-287
+
+    # issue #53881
+    c53881 = 2.2844135865398217e222 # check correctness within 2 ULPs
+    @test prevfloat(1.0) ^ -Int64(2)^62 ≈ c53881 atol=2eps(c53881)
+    @test 2.0 ^ typemin(Int) == 0.0
+    @test (-1.0) ^ typemin(Int) == 1.0
+    Z = Int64(2)
+    E = prevfloat(1.0)
+    @test E ^ (-Z^54) ≈ 7.38905609893065
+    @test E ^ (-Z^62) ≈ 2.2844135865231613e222
+    @test E ^ (-Z^63) == Inf
+    @test abs(E ^ (Z^62-1) * E ^ (-Z^62+1) - 1) <= eps(1.0)
+    n, x = -1065564664, 0.9999997040311492
+    @test abs(x^n - Float64(big(x)^n)) / eps(x^n) == 0 # ULPs
+    @test E ^ (big(2)^100 + 1) == 0
+    @test E ^ 6705320061009595392 == nextfloat(0.0)
+    n = Int64(1024 / log2(E))
+    @test E^n == Inf
+    @test E^float(n) == Inf
+
+    # issue #55831
+    @testset "literal pow zero sign" begin
+        @testset "T: $T" for T ∈ (Float16, Float32, Float64, BigFloat)
+            @testset "literal `-1`" begin
+                @test -0.0 === Float64(T(-Inf)^-1)
+            end
+            @testset "`Int(-1)`" begin
+                @test -0.0 === Float64(T(-Inf)^Int(-1))
+            end
+        end
+    end
+
+    # issue #55633
+    struct Issue55633_1 <: Number end
+    struct Issue55633_3 <: Number end
+    struct Issue55633_9 <: Number end
+    Base.one(::Issue55633_3) = Issue55633_1()
+    Base.:(*)(::Issue55633_3, ::Issue55633_3) = Issue55633_9()
+    Base.promote_rule(::Type{Issue55633_1}, ::Type{Issue55633_3}) = Int
+    Base.promote_rule(::Type{Issue55633_3}, ::Type{Issue55633_9}) = Int
+    Base.promote_rule(::Type{Issue55633_1}, ::Type{Issue55633_9}) = Int
+    Base.promote_rule(::Type{Issue55633_1}, ::Type{Int}) = Int
+    Base.promote_rule(::Type{Issue55633_3}, ::Type{Int}) = Int
+    Base.promote_rule(::Type{Issue55633_9}, ::Type{Int}) = Int
+    Base.convert(::Type{Int}, ::Issue55633_1) = 1
+    Base.convert(::Type{Int}, ::Issue55633_3) = 3
+    Base.convert(::Type{Int}, ::Issue55633_9) = 9
+    for x ∈ (im, pi, Issue55633_3())
+        p = promote(one(x), x, x*x)
+        for y ∈ 0:2
+            @test all((t -> ===(t...)), zip(x^y, p[y + 1]))
+        end
+    end
+
+    @testset "rng exponentiation, issue #57590" begin
+        @test EvenInteger(16) === @inferred EvenInteger(2)^4
+        @test EvenInteger(16) === @inferred EvenInteger(2)^Int(4)  # avoid `literal_pow`
+        @test EvenInteger(16) === @inferred EvenInteger(2)^EvenInteger(4)
+    end
+
+    # issue #57464
+    @test Float32(1.1)^typemin(Int) == Float32(0.0)
+    @test Float16(1.1)^typemin(Int) == Float16(0.0)
+    @test Float32(1.1)^unsigned(0) === Float32(1.0)
+    @test Float32(1.1)^big(0) === Float32(1.0)
+
+    # By using a limited-precision integer (3 bits) we can trigger issue 57464
+    # for a case where the answer isn't zero.
+    struct Int3 <: Integer
+        x::Int8
+        function Int3(x::Integer)
+            if x < -4 || x > 3
+                Core.throw_inexacterror(:Int3, Int3, x)
+            end
+            return new(x)
+        end
+    end
+    Base.typemin(::Type{Int3}) = Int3(-4)
+    Base.promote_rule(::Type{Int3}, ::Type{T}) where {T<:Integer} = T
+    Base.convert(::Type{T}, x::Int3) where {T<:Integer} = convert(T, x.x)
+    Base.:-(x::Int3) = x.x == -4 ? x : Int3(-x.x)
+    Base.trailing_zeros(x::Int3) = trailing_zeros(x.x)
+    Base.:>>(x::Int3, n::UInt64) = Int3(x.x>>n)
+
+    @test 1.001f0^-3 == 1.001f0^Int3(-3)
+    @test 1.001f0^-4 == 1.001f0^typemin(Int3)
+end
+
+@testset "special function `::Real` fallback shouldn't recur without bound, issue #57789" begin
+    mutable struct Issue57789 <: Real end
+    Base.float(::Issue57789) = Issue57789()
+    for f ∈ (sin, sinpi, log, exp)
+        @test_throws MethodError f(Issue57789())
+    end
 end
 
 # Test that sqrt behaves correctly and doesn't exhibit fp80 double rounding.
@@ -1536,25 +1782,77 @@ function f44336()
     @inline hypot(as...)
 end
 @testset "Issue #44336" begin
-    f44336()
-    @test (@allocated f44336()) == 0
+    let
+        f44336()
+        @test (@allocated f44336()) == 0
+    end
 end
 
-# test constant-foldability
-for fn in (:sin, :cos, :tan, :log, :log2, :log10, :log1p, :exponent, :sqrt, :cbrt, :fourthroot,
-           :asin, :atan, :acos, :sinh, :cosh, :tanh, :asinh, :acosh, :atanh,
-           :exp, :exp2, :exp10, :expm1
-           )
-    for T in (Float16, Float32, Float64)
-        f = getfield(@__MODULE__, fn)
-        eff = Base.infer_effects(f, (T,))
-        @test Core.Compiler.is_foldable(eff)
+@testset "constant-foldability of core math functions" begin
+    for T = Any[Float16, Float32, Float64]
+        @testset let T = T
+            for f = Any[sin, cos, tan, log, log2, log10, log1p, exponent, sqrt, cbrt, fourthroot,
+                        asin, atan, acos, sinh, cosh, tanh, asinh, acosh, atanh, exp, exp2, exp10, expm1]
+                @testset let f = f,
+                             rt = Base.infer_return_type(f, (T,)),
+                             effects = Base.infer_effects(f, (T,))
+                    @test rt != Union{}
+                    @test Core.Compiler.is_foldable(effects)
+                end
+            end
+            @testset let effects = Base.infer_effects(^, (T,Int))
+                @test Core.Compiler.is_foldable(effects)
+            end
+            @testset let effects = Base.infer_effects(^, (T,T))
+                @test Core.Compiler.is_foldable(effects)
+            end
+        end
     end
-end
-for T in (Float16, Float32, Float64)
-    for f in (exp, exp2, exp10)
-        @test Core.Compiler.is_removable_if_unused(Base.infer_effects(f, (T,)))
+end;
+@testset "removability of core math functions" begin
+    for T = Any[Float16, Float32, Float64]
+        @testset let T = T
+            for f = Any[exp, exp2, exp10, expm1]
+                @testset let f = f
+                    @test Core.Compiler.is_removable_if_unused(Base.infer_effects(f, (T,)))
+                end
+            end
+        end
+    end
+end;
+@testset "exception type inference of core math functions" begin
+    MathErrorT = Union{DomainError, InexactError}
+    for T = (Float16, Float32, Float64)
+        @testset let T = T
+            for f = Any[sin, cos, tan, log, log2, log10, log1p, exponent, sqrt, cbrt, fourthroot,
+                        asin, atan, acos, sinh, cosh, tanh, asinh, acosh, atanh, exp, exp2, exp10, expm1]
+                @testset let f = f
+                    @test Base.infer_exception_type(f, (T,)) <: MathErrorT
+                end
+            end
+            @test Base.infer_exception_type(^, (T,Int)) <: MathErrorT
+            @test Base.infer_exception_type(^, (T,T)) <: MathErrorT
+        end
     end
-    @test Core.Compiler.is_foldable(Base.infer_effects(^, (T,Int)))
-    @test Core.Compiler.is_foldable(Base.infer_effects(^, (T,T)))
+end;
+@test Base.infer_return_type((Int,)) do x
+    local r = nothing
+    try
+        r = sin(x)
+    catch err
+        if err isa DomainError
+            r = 0.0
+        end
+    end
+    return r
+end === Float64
+
+@testset "BigInt Rationals with special funcs" begin
+    @test sinpi(big(1//1)) == big(0.0)
+    @test tanpi(big(1//1)) == big(0.0)
+    @test cospi(big(1//1)) == big(-1.0)
+end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(MathConstants))
 end
diff --git a/test/meta.jl b/test/meta.jl
index 36a8acbfe08dd..3d5fc08ee24e9 100644
--- a/test/meta.jl
+++ b/test/meta.jl
@@ -43,77 +43,70 @@ end
 @test foundfunc(h_inlined(), :g_inlined)
 @test foundfunc(h_noinlined(), :g_noinlined)
 
-using Base: pushmeta!, popmeta!
+using Base: popmeta!
 
-macro attach(val, ex)
-    esc(_attach(val, ex))
+macro attach_meta(val, ex)
+    esc(_attach_meta(val, ex))
 end
+_attach_meta(val, ex) = Base.pushmeta!(ex, Expr(:test, val))
 
-_attach(val, ex) = pushmeta!(ex, :test, val)
-
-@attach 42 function dummy()
+@attach_meta 42 function dummy()
     false
 end
-
-asts = code_lowered(dummy, Tuple{})
-@test length(asts) == 1
-ast = asts[1]
-
-body = Expr(:block)
-body.args = ast.code
-
-@test popmeta!(body, :test) == (true, [42])
-@test popmeta!(body, :nonexistent) == (false, [])
+let ast = only(code_lowered(dummy, Tuple{}))
+    body = Expr(:block)
+    body.args = ast.code
+    @test popmeta!(body, :test) == (true, [42])
+    @test popmeta!(body, :nonexistent) == (false, [])
+end
 
 # Simple popmeta!() tests
-ex1 = quote
-    $(Expr(:meta, :foo))
-    x*x+1
+let ex1 = quote
+        $(Expr(:meta, :foo))
+        x*x+1
+    end
+    @test popmeta!(ex1, :foo)[1]
+    @test !popmeta!(ex1, :foo)[1]
+    @test !popmeta!(ex1, :bar)[1]
+    @test !(popmeta!(:(x*x+1), :foo)[1])
 end
-@test popmeta!(ex1, :foo)[1]
-@test !popmeta!(ex1, :foo)[1]
-@test !popmeta!(ex1, :bar)[1]
-@test !(popmeta!(:(x*x+1), :foo)[1])
 
 # Find and pop meta information from general ast locations
-multi_meta = quote
-    $(Expr(:meta, :foo1))
-    y = x
-    $(Expr(:meta, :foo2, :foo3))
-    begin
-        $(Expr(:meta, :foo4, Expr(:foo5, 1, 2)))
+let multi_meta = quote
+        $(Expr(:meta, :foo1))
+        y = x
+        $(Expr(:meta, :foo2, :foo3))
+        begin
+            $(Expr(:meta, :foo4, Expr(:foo5, 1, 2)))
+        end
+        x*x+1
     end
-    x*x+1
-end
-@test popmeta!(deepcopy(multi_meta), :foo1) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo2) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo3) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo4) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo5) == (true, [1,2])
-@test popmeta!(deepcopy(multi_meta), :bar)  == (false, [])
-
-# Test that popmeta!() removes meta blocks entirely when they become empty.
-for m in [:foo1, :foo2, :foo3, :foo4, :foo5]
-    @test popmeta!(multi_meta, m)[1]
+    @test popmeta!(deepcopy(multi_meta), :foo1) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo2) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo3) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo4) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo5) == (true, [1,2])
+    @test popmeta!(deepcopy(multi_meta), :bar)  == (false, [])
+
+    # Test that popmeta!() removes meta blocks entirely when they become empty.
+    ast = :(dummy() = $multi_meta)
+    for m in [:foo1, :foo2, :foo3, :foo4, :foo5]
+        @test popmeta!(multi_meta, m)[1]
+    end
+    @test Base.findmeta(ast)[1] == 0
 end
-@test Base.findmeta(multi_meta.args)[1] == 0
 
 # Test that pushmeta! can push across other macros,
 # in the case multiple pushmeta!-based macros are combined
-
-@attach 40 @attach 41 @attach 42 dummy_multi() = return nothing
-
-asts = code_lowered(dummy_multi, Tuple{})
-@test length(asts) == 1
-ast = asts[1]
-
-body = Expr(:block)
-body.args = ast.code
-
-@test popmeta!(body, :test) == (true, [40])
-@test popmeta!(body, :test) == (true, [41])
-@test popmeta!(body, :test) == (true, [42])
-@test popmeta!(body, :nonexistent) == (false, [])
+@attach_meta 40 @attach_meta 41 @attach_meta 42 dummy_multi() = return nothing
+let ast = only(code_lowered(dummy_multi, Tuple{}))
+    body = Expr(:block)
+    body.args = ast.code
+    @test popmeta!(body, :test) == (true, [40])
+    @test popmeta!(body, :test) == (true, [41])
+    @test popmeta!(body, :test) == (true, [42])
+    @test popmeta!(body, :nonexistent) == (false, [])
+end
 
 # tests to fully cover functions in base/meta.jl
 using Base.Meta
@@ -241,7 +234,7 @@ let ex = Meta.parseall("@foo", filename=:bar)
     @test isa(arg2arg2, LineNumberNode) && arg2arg2.file === :bar
 end
 
-_lower(m::Module, ex, world::UInt) = ccall(:jl_expand_in_world, Any, (Any, Ref{Module}, Cstring, Cint, Csize_t), ex, m, "none", 0, world)
+_lower(m::Module, ex, world::UInt) = Base.fl_lower(ex, m, "none", 0, world, false)[1]
 
 module TestExpandInWorldModule
 macro m() 1 end
@@ -254,14 +247,14 @@ end
 f(::T) where {T} = T
 ci = code_lowered(f, Tuple{Int})[1]
 @test Meta.partially_inline!(ci.code, [], Tuple{typeof(f),Int}, Any[Int], 0, 0, :propagate) ==
-    Any[Core.ReturnNode(QuoteNode(Int))]
+    Any[QuoteNode(Int), Core.ReturnNode(Core.SSAValue(1))]
 
 g(::Val{x}) where {x} = x ? 1 : 0
 ci = code_lowered(g, Tuple{Val{true}})[1]
-@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 0, :propagate)[1] ==
-   Core.GotoIfNot(QuoteNode(true), 3)
-@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 2, :propagate)[1] ==
-   Core.GotoIfNot(QuoteNode(true), 5)
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 0, :propagate)[2] ==
+   Core.GotoIfNot(Core.SSAValue(1), 4)
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 2, :propagate)[2] ==
+   Core.GotoIfNot(Core.SSAValue(3), 6)
 
 @testset "inlining with isdefined" begin
     isdefined_slot(x) = @isdefined(x)
@@ -281,6 +274,15 @@ ci = code_lowered(g, Tuple{Val{true}})[1]
     @eval isdefined_globalref(x) = $(Expr(:isdefined, GlobalRef(Base, :foo)))
     ci = code_lowered(isdefined_globalref, Tuple{Int})[1]
     @test Meta.partially_inline!(copy(ci.code), Any[isdefined_globalref, 1], Tuple{typeof(isdefined_globalref), Int},
-                                 [], 0, 0, :propagate)[1] == Expr(:isdefined, GlobalRef(Base, :foo))
+                                 [], 0, 0, :propagate)[1] == Expr(:call, GlobalRef(Core, :isdefinedglobal), Base, QuoteNode(:foo))
+
+    withunreachable(s::String) = sin(s)
+    ci = code_lowered(withunreachable, Tuple{String})[1]
+    ci.code[end] = Core.ReturnNode()
+    @test Meta.partially_inline!(copy(ci.code), Any[withunreachable, "foo"], Tuple{typeof(withunreachable), String},
+                                 [], 0, 0, :propagate)[end] == Core.ReturnNode()
+end
 
+@testset "Base.Meta docstrings" begin
+    @test isempty(Docs.undocumented_names(Meta))
 end
diff --git a/test/misc.jl b/test/misc.jl
index 79b684badf1e0..7df2d5b725b67 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -129,6 +129,46 @@ let l = ReentrantLock()
     @test_throws ErrorException unlock(l)
 end
 
+# Lockable{T, L<:AbstractLock}
+using Base: Lockable
+let
+    @test Base.isexported(Base, :Lockable)
+    lockable = Lockable(Dict("foo" => "hello"), ReentrantLock())
+    # note field access is non-public
+    @test lockable.value["foo"] == "hello"
+    @test @lock(lockable, lockable[]["foo"]) == "hello"
+    lock(lockable) do d
+        @test d["foo"] == "hello"
+    end
+    lock(lockable) do d
+        d["foo"] = "goodbye"
+    end
+    @test lockable.value["foo"] == "goodbye"
+    @lock lockable begin
+        @test lockable[]["foo"] == "goodbye"
+    end
+    l = trylock(lockable)
+    try
+        @test l
+    finally
+        unlock(lockable)
+    end
+    # Test 1-arg constructor
+    lockable2 = Lockable(Dict("foo" => "hello"))
+    @test lockable2.lock isa ReentrantLock
+    @test @lock(lockable2, lockable2[]["foo"]) == "hello"
+end
+
+@testset "`show` for ReentrantLock" begin
+    l = ReentrantLock()
+    @test repr(l) == "ReentrantLock()"
+    @test repr("text/plain", l) == "ReentrantLock() (unlocked)"
+    @lock l begin
+        @test startswith(repr("text/plain", l), "ReentrantLock() (locked by current Task (")
+    end
+    @test repr("text/plain", l) == "ReentrantLock() (unlocked)"
+end
+
 for l in (Threads.SpinLock(), ReentrantLock())
     @test get_finalizers_inhibited() == 0
     @test lock(get_finalizers_inhibited, l) == 1
@@ -197,6 +237,24 @@ end
     @test all(<=(sem_size), history)
     @test all(>=(0), history)
     @test history[end] == 0
+
+    # macro form
+    clock = Threads.Atomic{Int}(1)
+    occupied = Threads.Atomic{Int}(0)
+    history = fill!(Vector{Int}(undef, 2n), -1)
+    @sync for _ in 1:n
+        @async begin
+            @test Base.@acquire s begin
+                history[Threads.atomic_add!(clock, 1)] = Threads.atomic_add!(occupied, 1) + 1
+                sleep(rand(0:0.01:0.1))
+                history[Threads.atomic_add!(clock, 1)] = Threads.atomic_sub!(occupied, 1) - 1
+                return :resultvalue
+            end === :resultvalue
+        end
+    end
+    @test all(<=(sem_size), history)
+    @test all(>=(0), history)
+    @test history[end] == 0
 end
 
 # task switching
@@ -221,12 +279,14 @@ let c = Ref(0),
     @test c[] == 100
 end
 
-@test_throws ErrorException("deadlock detected: cannot wait on current task") wait(current_task())
+@test_throws ConcurrencyViolationError("deadlock detected: cannot wait on current task") wait(current_task())
+
+@test_throws ConcurrencyViolationError("Cannot yield to currently running task!") yield(current_task())
 
 # issue #41347
 let t = @async 1
     wait(t)
-    @test_throws ErrorException yield(t)
+    @test_throws ConcurrencyViolationError yield(t)
 end
 
 let t = @async error(42)
@@ -265,6 +325,9 @@ let
     stats = @timed sin(1)
     @test stats.value == sin(1)
     @test isa(stats.time, Real) && stats.time >= 0
+    @test isa(stats.compile_time, Real) && stats.compile_time >= 0
+    @test isa(stats.recompile_time, Real) && stats.recompile_time >= 0
+    @test stats.compile_time <= stats.time
 
     # The return type of gcstats was changed in Julia 1.4 (# 34147)
     # Test that the 1.0 API still works
@@ -286,25 +349,43 @@ v11801, t11801 = @timed sin(1)
 @test names(@__MODULE__, all = true) == names_before_timing
 
 redirect_stdout(devnull) do # suppress time prints
+
 # Accepted @time argument formats
 @test @time true
 @test @time "message" true
+@test @time 1 true
 let msg = "message"
     @test @time msg true
 end
 let foo() = "message"
     @test @time foo() true
 end
+let foo() = 1
+    @test @time foo() true
+end
 
 # Accepted @timev argument formats
 @test @timev true
 @test @timev "message" true
+@test @timev 1 true
 let msg = "message"
     @test @timev msg true
 end
 let foo() = "message"
     @test @timev foo() true
 end
+let foo() = 1
+    @test @timev foo() true
+end
+
+# this is internal, but used for easy testing
+@test sprint(Base.time_print, 1e9) == "  1.000000 seconds"
+@test sprint(Base.time_print, 1e9, 111, 0, 222) == "  1.000000 seconds (222 allocations: 111 bytes)"
+@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222) == "  1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time)"
+@test sprint(Base.time_print, 1e9, 111, 0, 222, 333) == "  1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts)"
+@test sprint(Base.time_print, 1e9, 0, 0, 0, 333) == "  1.000000 seconds (333 lock conflicts)"
+@test sprint(Base.time_print, 1e9, 111, 0, 222, 333, 0.25e9) == "  1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts, 25.00% compilation time)"
+@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222, 333, 0.25e9, 0.175e9) == "  1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time, 333 lock conflicts, 25.00% compilation time: 70% of which was recompilation)"
 
 # @showtime
 @test @showtime true
@@ -418,12 +499,12 @@ begin
     local second = @capture_stdout @time @eval calldouble2(1.0)
 
     # these functions were not recompiled
-    local matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", first))
+    local matches = collect(eachmatch(r"(\d+(?:\.\d+)?)% compilation", first))
     @test length(matches) == 1
     @test parse(Float64, matches[1][1]) > 0.0
     @test parse(Float64, matches[1][1]) <= 100.0
 
-    matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", second))
+    matches = collect(eachmatch(r"(\d+(?:\.\d+)?)% compilation", second))
     @test length(matches) == 1
     @test parse(Float64, matches[1][1]) > 0.0
     @test parse(Float64, matches[1][1]) <= 100.0
@@ -497,7 +578,7 @@ struct ambigconvert; end # inject a problematic `convert` method to ensure it st
 Base.convert(::Any, v::ambigconvert) = v
 
 import Base.summarysize
-@test summarysize(Core) > (summarysize(Core.Compiler) + Base.summarysize(Core.Intrinsics)) > Core.sizeof(Core)
+@test summarysize(Core) > Base.summarysize(Core.Intrinsics) > Core.sizeof(Core)
 @test summarysize(Base) > 100_000 * sizeof(Ptr)
 
 let R = Ref{Any}(nothing), depth = 10^6
@@ -538,6 +619,29 @@ end
 # issue #44780
 @test summarysize(BigInt(2)^1000) > summarysize(BigInt(2))
 
+# issue #53061
+mutable struct S53061
+    x::Union{Float64, Tuple{Float64, Float64}}
+    y::Union{Float64, Tuple{Float64, Float64}}
+end
+let s = S53061[S53061(rand(), (rand(),rand())) for _ in 1:10^4]
+    @test allequal(summarysize(s) for i in 1:10)
+end
+struct Z53061
+    x::S53061
+    y::Int64
+end
+let z = Z53061[Z53061(S53061(rand(), (rand(),rand())), 0) for _ in 1:10^4]
+    @test allequal(summarysize(z) for i in 1:10)
+    # broken on i868 linux. issue #54895
+    @test abs(summarysize(z) - 640000)/640000 <= 0.01 broken = Sys.WORD_SIZE == 32 && Sys.islinux()
+end
+
+# issue #57506
+let len = 100, m1 = Memory{UInt8}(1:len), m2 = Memory{Union{Nothing,UInt8}}(1:len)
+    @test summarysize(m2) == summarysize(m1) + len
+end
+
 ## test conversion from UTF-8 to UTF-16 (for Windows APIs)
 
 # empty arrays
@@ -1058,7 +1162,7 @@ Base.setindex!(xs::InvokeXs2, @nospecialize(v::Any), idx::Int) = xs.xs[idx] = v
         @test @invoke(f2(1::Real)) === Integer
     end
 
-    # when argment's type annotation is omitted, it should be specified as `Core.Typeof(x)`
+    # when argument's type annotation is omitted, it should be specified as `Core.Typeof(x)`
     let f(_) = Any
         f(x::Integer) = Integer
         @test f(1) === Integer
@@ -1130,18 +1234,15 @@ include("testenv.jl")
 
 let flags = Cmd(filter(a->!occursin("depwarn", a), collect(test_exeflags)))
     local cmd = `$test_exename $flags --depwarn=yes deprecation_exec.jl`
-
-    if !success(pipeline(cmd; stdout=stdout, stderr=stderr))
-        error("Deprecation test failed, cmd : $cmd")
-    end
+    run(cmd, devnull)
 end
 
 # PR #23664, make sure names don't get added to the default `Main` workspace
 @test readlines(`$(Base.julia_cmd()) --startup-file=no -e 'foreach(println, names(Main))'`) == ["Base","Core","Main"]
 
 # issue #26310
-@test_warn "could not import" Core.eval(@__MODULE__, :(import .notdefined_26310__))
-@test_warn "could not import" Core.eval(Main,        :(import ........notdefined_26310__))
+@test_warn "undeclared at import time" Core.eval(@__MODULE__, :(import .notdefined_26310__))
+@test_warn "undeclared at import time" Core.eval(Main,        :(import ........notdefined_26310__))
 @test_nowarn Core.eval(Main, :(import .Main))
 @test_nowarn Core.eval(Main, :(import ....Main))
 
@@ -1284,10 +1385,56 @@ end
     end
 end
 
+module KwdefWithEsc
+    const Int1 = Int
+    const val1 = 42
+    macro define_struct()
+        quote
+            @kwdef struct $(esc(:Struct))
+                a
+                b = val1
+                c::Int1
+                d::Int1 = val1
+
+                $(esc(quote
+                    e
+                    f = val2
+                    g::Int2
+                    h::Int2 = val2
+                end))
+
+                $(esc(:(i = val2)))
+                $(esc(:(j::Int2)))
+                $(esc(:(k::Int2 = val2)))
+
+                l::$(esc(:Int2))
+                m::$(esc(:Int2)) = val1
+
+                n = $(esc(:val2))
+                o::Int1 = $(esc(:val2))
+
+                $(esc(:p))
+                $(esc(:q)) = val1
+                $(esc(:s))::Int1
+                $(esc(:t))::Int1 = val1
+            end
+        end
+    end
+end
+
+module KwdefWithEsc_TestModule
+    using ..KwdefWithEsc
+    const Int2 = Int
+    const val2 = 42
+    KwdefWithEsc.@define_struct()
+end
+@test isdefined(KwdefWithEsc_TestModule, :Struct)
+
 @testset "exports of modules" begin
-    for (_, mod) in Base.loaded_modules
+    @testset "$mod" for (_, mod) in Base.loaded_modules
         mod === Main && continue # Main exports everything
-        for v in names(mod)
+        @testset "$v" for v in names(mod)
+            isdefined(mod, v) || @error "missing $v in $mod"
             @test isdefined(mod, v)
         end
     end
@@ -1300,6 +1447,11 @@ end
     @test sort([a, b]) == [b, a]
 end
 
+@testset "UUID display" begin
+    a = Base.UUID("dbd321ed-e87e-4f33-9511-65b7d01cdd55")
+    @test repr(a) == "$(Base.UUID)(\"dbd321ed-e87e-4f33-9511-65b7d01cdd55\")"
+end
+
 @testset "Libc.rand" begin
     low, high = extrema(Libc.rand(Float64) for i=1:10^4)
     # these fail with probability 2^(-10^4) ≈ 5e-3011
@@ -1324,7 +1476,8 @@ end
 @test_throws ErrorException finalizer(x->nothing, 1)
 @test_throws ErrorException finalizer(C_NULL, 1)
 
-
+# FIXME: Issue #57103 Test is specific to Stock GC
+@static if Base.USING_STOCK_GC
 @testset "GC utilities" begin
     GC.gc()
     GC.gc(true); GC.gc(false)
@@ -1335,13 +1488,16 @@ end
         open(tmppath, "w") do tmpio
             redirect_stderr(tmpio) do
                 GC.enable_logging(true)
+                @test GC.logging_enabled()
                 GC.gc()
                 GC.enable_logging(false)
+                @test !GC.logging_enabled()
             end
         end
         @test occursin("GC: pause", read(tmppath, String))
     end
 end
+end
 
 @testset "fieldtypes Module" begin
     @test fieldtypes(Module) === ()
@@ -1353,9 +1509,9 @@ end
 end
 
 # Test that read fault on a prot-none region does not incorrectly give
-# ReadOnlyMemoryEror, but rather crashes the program
+# ReadOnlyMemoryError, but rather crashes the program
 const MAP_ANONYMOUS_PRIVATE = Sys.isbsd() ? 0x1002 : 0x22
-let script = :(
+let script = """
         let ptr = Ptr{Cint}(ccall(:jl_mmap, Ptr{Cvoid},
                                   (Ptr{Cvoid}, Csize_t, Cint, Cint, Cint, Int),
                                   C_NULL, 16*1024, 0, $MAP_ANONYMOUS_PRIVATE, -1, 0))
@@ -1365,27 +1521,83 @@ let script = :(
                 println(e)
             end
         end
-    )
+    """
     cmd = if Sys.isunix()
         # Set the maximum core dump size to 0 to keep this expected crash from
         # producing a (and potentially overwriting an existing) core dump file
-        `sh -c "ulimit -c 0; $(Base.shell_escape(Base.julia_cmd())) -e '$script'"`
+        `sh -c "ulimit -c 0; $(Base.shell_escape(Base.julia_cmd())) -e $(Base.shell_escape(script))"`
+    else
+        `$(Base.julia_cmd()) -e $script`
+    end
+    p = run(ignorestatus(cmd), devnull, stdout, devnull)
+    if p.termsignal == 0
+        Sys.isunix() ? @test(p.exitcode ∈ (128+7, 128+10, 128+11)) : @test(p.exitcode != 0) # expect SIGBUS (7 on BSDs or 10 on Linux) or SIGSEGV (11)
     else
-        `$(Base.julia_cmd()) -e '$script'`
+        @test(p.termsignal ∈ (7, 10, 11))
     end
-    @test !success(cmd)
 end
 
 # issue #41656
-@test success(`$(Base.julia_cmd()) -e 'isempty(x) = true'`)
+run(`$(Base.julia_cmd()) -e 'isempty(x) = true'`)
+
+function treshape59278(X::AbstractArray, n, m)
+    Y = reshape(X, n, m)
+    Y .= 1.0
+    return X
+end
+
+# a function that allocates iff no constprop
+@inline maybealloc59278(n, _) = ntuple(i->rand(), n)
 
 @testset "Base/timing.jl" begin
     @test Base.jit_total_bytes() >= 0
 
-    # sanity check `@allocations` returns what we expect in some very simple cases
-    @test (@allocations "a") == 0
-    @test (@allocations "a" * "b") == 0 # constant propagation
-    @test (@allocations "a" * Base.inferencebarrier("b")) == 1
+    # sanity check `@allocations` returns what we expect in some very simple cases.
+    @test (() -> @allocations "a")() == 0
+    "a" * Base.inferencebarrier("b")
+    @test (() -> @allocations "a" * Base.inferencebarrier("b"))() == 1
+    # test that you can grab the value from @allocated
+    @allocated _x = 1+2
+    @test _x === 3
+
+    n, m = 10, 20
+    X = rand(n, m)
+    treshape59278(X, n, m)
+    # test that @allocated and @allocations are consistent about whether anything was
+    # allocated in a case where the compiler can sometimes remove an allocation
+    # https://github.com/JuliaLang/julia/issues/58634#issuecomment-2940840651
+    @test ((@allocated treshape59278(X, n, m))==0) == ((@allocations treshape59278(X, n, m))==0)
+    # TODO: would be nice to have but not yet reliable
+    #@test ((@allocated begin treshape59278(X, n, m) end)==0) == ((@allocations begin treshape59278(X, n, m) end)==0)
+
+    # test that all wrapped allocations are counted and constprop is not done
+    @test (@allocated @noinline maybealloc59278(10, [])) > (@allocated maybealloc59278(10, 0)) > 0
+    # but if you wrap it in another function it can be constprop'd
+    @test (@allocated (()->maybealloc59278(10, []))()) == 0
+
+    _lock_conflicts, _nthreads = eval(Meta.parse(read(`$(Base.julia_cmd()) -tauto -E '
+        _lock_conflicts = @lock_conflicts begin
+            l = ReentrantLock()
+            Threads.@threads for i in 1:Threads.nthreads()
+                 lock(l) do
+                    sleep(1)
+                end
+            end
+        end
+        _lock_conflicts,Threads.nthreads()
+    '`, String)))
+    @test _lock_conflicts > 0 skip=(_nthreads < 2) # can only test if the worker can multithread
+
+    # Test the output of `format_bytes()`
+    inputs = [(factor * (Int64(1000)^e),binary) for binary in (false,true), factor in (1,2), e in 0:6][:]
+    expected_output = ["1 byte", "1 byte", "2 bytes", "2 bytes", "1000 bytes", "1000 bytes", "2.000 kB", "1.953 KiB",
+                        "1000.000 kB", "976.562 KiB", "2.000 MB", "1.907 MiB", "1000.000 MB", "953.674 MiB",
+                        "2.000 GB", "1.863 GiB", "1000.000 GB", "931.323 GiB", "2.000 TB", "1.819 TiB",
+                        "1000.000 TB", "909.495 TiB", "2.000 PB", "1.776 PiB", "1000.000 PB", "888.178 PiB",
+                        "2000.000 PB", "1776.357 PiB"]
+    for ((n, binary), expected) in zip(inputs, expected_output)
+        @test Base.format_bytes(n; binary) == expected
+    end
 end
 
 @testset "in_finalizer" begin
@@ -1402,3 +1614,64 @@ end
     GC.gc(true); yield()
     @test in_fin[]
 end
+
+@testset "Base docstrings" begin
+    undoc = Docs.undocumented_names(Base)
+    @test_broken isempty(undoc)
+    @test isempty(setdiff(undoc, [:BufferStream, :CanonicalIndexError, :CapturedException, :Filesystem, :IOServer, :InvalidStateException, :Order, :PipeEndpoint, :ScopedValues, :Sort, :TTY, :AtomicMemoryRef, :Exception, :GenericMemoryRef, :GlobalRef, :IO, :LineNumberNode, :MemoryRef, :Method, :SegmentationFault, :TypeVar, :arrayref, :arrayset, :arraysize, :const_arrayref]))
+end
+
+exported_names(m) = filter(s -> Base.isexported(m, s), names(m))
+@testset "Base re-exports Core" begin
+    @test issubset(exported_names(Core), exported_names(Base))
+end
+
+@testset "Base.Libc docstrings" begin
+    @test isempty(Docs.undocumented_names(Libc))
+end
+
+@testset "Silenced missed transformations" begin
+    # Ensure the WarnMissedTransformationsPass is not on by default
+    src = """
+        @noinline iteration(i) = (@show(i); return nothing)
+        @eval function loop_unroll_full_fail(N)
+            for i in 1:N
+              iteration(i)
+              \$(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"), 1)))
+          end
+       end
+       loop_unroll_full_fail(3)
+    """
+    out_err = mktemp() do _, f
+        run(`$(Base.julia_cmd()) -e "$src"`, devnull, devnull, f)
+        seekstart(f)
+        read(f, String)
+    end
+    @test !occursin("loop not unrolled", out_err)
+end
+
+let errs = IOBuffer()
+    run(`$(Base.julia_cmd()) -e '
+        using Test
+        @test !isempty(Core.methodtable.backedges)
+        Base.Experimental.disable_new_worlds()
+        @test_throws "disable_new_worlds" @eval f() = 1
+        @test isempty(Core.methodtable.backedges)
+        @test_throws "disable_new_worlds" Base.delete_method(which(+, (Int, Int)))
+        @test 1+1 == 2
+        using Dates
+        '`, devnull, stdout, errs)
+    @test occursin("disable_new_worlds", String(take!(errs)))
+end
+
+@testset "`@constprop`, `@assume_effects` handling of an unknown setting" begin
+    for x ∈ ("constprop", "assume_effects")
+        try
+            eval(Meta.parse("Base.@$x :unknown f() = 3"))
+            error("unexpectedly reached")
+        catch e
+            e::LoadError
+            @test e.error isa ArgumentError
+        end
+    end
+end
diff --git a/test/missing.jl b/test/missing.jl
index 36155eb32fe49..f588b2dabe904 100644
--- a/test/missing.jl
+++ b/test/missing.jl
@@ -596,7 +596,7 @@ end
     @test @coalesce(missing) === missing
 
     @test @coalesce(1, error("failed")) === 1
-    @test_throws ErrorException @coalesce(missing, error("failed"))
+    @test_throws ErrorException("failed") @coalesce(missing, error("failed"))
 end
 
 mutable struct Obj; x; end
@@ -615,8 +615,7 @@ mutable struct Obj; x; end
 end
 
 @testset "showerror missing function" begin
-    me = try missing(1) catch e e end
-    @test sprint(showerror, me) == "MethodError: objects of type Missing are not callable"
+    @test_throws "MethodError: objects of type Missing are not callable" missing(1)
 end
 
 @testset "sort and sortperm with $(eltype(X))" for (X, P, RP) in
@@ -651,3 +650,29 @@ for func in (round, ceil, floor, trunc)
         @test Core.Compiler.is_foldable(Base.infer_effects(func, (Type{Int},Union{Int,Missing})))
     end
 end
+
+@testset "Custom Missing type" begin
+    struct NewMissing end
+    Base.ismissing(::NewMissing) = true
+    Base.coalesce(x::NewMissing, y...) = coalesce(y...)
+    Base.isless(::NewMissing, ::NewMissing) = false
+    Base.isless(::NewMissing, ::Any) = false
+    Base.isless(::Any, ::NewMissing) = true
+    Base.isequal(::NewMissing, ::Missing) = true
+    Base.isequal(::Missing, ::NewMissing) = true
+    arr = [missing 1 2 3 missing 10 11 12 missing]
+    newarr = Union{Int, NewMissing}[ismissing(v) ? NewMissing() : v for v in arr]
+
+    @test all(skipmissing(arr) .== skipmissing(newarr))
+    @test all(eachindex(skipmissing(arr)) .== eachindex(skipmissing(newarr)))
+    @test all(keys(skipmissing(arr)) .== keys(skipmissing(newarr)))
+    @test_broken sum(skipmissing(arr)) == sum(skipmissing(newarr))
+    @test filter(>(10), skipmissing(arr)) == filter(>(10), skipmissing(newarr))
+    @test isequal(sort(vec(arr)), sort(vec(newarr)))
+
+    @test_throws MissingException skipmissing(newarr)[findfirst(ismissing, newarr)]
+    @test coalesce(NewMissing(), 1) == coalesce(NewMissing(), NewMissing(), 1) == 1
+    @test coalesce(NewMissing()) === coalesce(NewMissing(), NewMissing()) === missing
+    @test @coalesce(NewMissing(), 1) == @coalesce(NewMissing(), NewMissing(), 1) == 1
+    @test @coalesce(NewMissing()) === @coalesce(NewMissing(), NewMissing()) === missing
+end
diff --git a/test/mod2pi.jl b/test/mod2pi.jl
index 5b0cb906bcef2..0eeac6f1e3ce4 100644
--- a/test/mod2pi.jl
+++ b/test/mod2pi.jl
@@ -26,7 +26,7 @@
 # 3.14159265359, -3.14159265359
 # pi/16*k +/- 0.00001 for k in [-20:20] # to cover all quadrants
 # numerators of continuous fraction approximations to pi
-#   see http://oeis.org/A002485
+#   see https://oeis.org/A002485
 #   (reason: for max cancellation, we want x = k*pi + eps for small eps, so x/k ≈ pi)
 
 testCases = [
diff --git a/test/mpfr.jl b/test/mpfr.jl
index 1a0a0041bf94e..48477fc4dbcb7 100644
--- a/test/mpfr.jl
+++ b/test/mpfr.jl
@@ -35,6 +35,9 @@ import Base.MPFR
 
     @test typeof(BigFloat(1//1)) == BigFloat
     @test typeof(BigFloat(one(Rational{BigInt}))) == BigFloat
+    rat = 1 // (big(2)^300 + 1)
+    @test BigFloat(rat, RoundDown) < rat < BigFloat(rat, RoundUp)
+    @test BigFloat(-rat, RoundUp) < -rat < BigFloat(-rat, RoundDown)
 
     # BigFloat constructor respects global precision when not specified
     let prec = precision(BigFloat) < 16 ? 256 : precision(BigFloat) ÷ 2
@@ -667,16 +670,19 @@ end
         @test string(parse(BigFloat, "0.1")) == "0.10000002"
         @test string(parse(BigFloat, "0.5")) == "0.5"
         @test string(parse(BigFloat, "-9.9")) == "-9.9000015"
+        @test string(parse(BigFloat, "1e6")) == "1.0e6"
     end
     setprecision(40) do
         @test string(parse(BigFloat, "0.1")) == "0.10000000000002"
         @test string(parse(BigFloat, "0.5")) == "0.5"
         @test string(parse(BigFloat, "-9.9")) == "-9.8999999999942"
+        @test string(parse(BigFloat, "1e6")) == "1.0e6"
     end
     setprecision(123) do
         @test string(parse(BigFloat, "0.1")) == "0.0999999999999999999999999999999999999953"
         @test string(parse(BigFloat, "0.5")) == "0.5"
         @test string(parse(BigFloat, "-9.9")) == "-9.8999999999999999999999999999999999997"
+        @test string(parse(BigFloat, "1e6")) == "1.0e6"
     end
 end
 @testset "eps" begin
@@ -998,7 +1004,7 @@ end
 
     test_show_bigfloat(big"1.23456789", contains_e=false, starts="1.23")
     test_show_bigfloat(big"-1.23456789", contains_e=false, starts="-1.23")
-    test_show_bigfloat(big"2.3457645687563543266576889678956787e10000", starts="2.345", ends="e+10000")
+    test_show_bigfloat(big"2.3457645687563543266576889678956787e10000", starts="2.345", ends="e10000")
     test_show_bigfloat(big"-2.3457645687563543266576889678956787e-10000", starts="-2.345", ends="e-10000")
     test_show_bigfloat(big"42.0", contains_e=false, starts="42.0")
     test_show_bigfloat(big"420.0", contains_e=false, starts="420.0") # '0's have to be added on the right before point
@@ -1006,10 +1012,10 @@ end
     test_show_bigfloat(big"420000.0", contains_e=false, starts="420000.0")
     test_show_bigfloat(big"654321.0", contains_e=false, starts="654321.0")
     test_show_bigfloat(big"-654321.0", contains_e=false, starts="-654321.0")
-    test_show_bigfloat(big"6543210.0", contains_e=true, starts="6.5", ends="e+06")
+    test_show_bigfloat(big"6543210.0", contains_e=true, starts="6.5", ends="e6")
     test_show_bigfloat(big"0.000123", contains_e=false, starts="0.000123")
     test_show_bigfloat(big"-0.000123", contains_e=false, starts="-0.000123")
-    test_show_bigfloat(big"0.00001234", contains_e=true, starts="1.23", ends="e-05")
+    test_show_bigfloat(big"0.00001234", contains_e=true, starts="1.23", ends="e-5")
 
     for to_string in [string,
                       x->sprint(show, x),
@@ -1039,3 +1045,68 @@ end
         end
     end
 end
+
+@testset "issue #50642" begin
+    setprecision(BigFloat, 500) do
+        bf = big"1.4901162082026128889687591176485489397376143775948511e-07"
+        @test Float16(bf) == Float16(2.0e-7)
+    end
+end
+
+# PR #54284
+import Base.MPFR: clear_flags, had_underflow, had_overflow, had_divbyzero,
+    had_nan, had_inexact_exception, had_range_exception
+
+function all_flags_54284()
+    (
+        had_underflow(),
+        had_overflow(),
+        had_divbyzero(),
+        had_nan(),
+        had_inexact_exception(),
+        had_range_exception(),
+    )
+end
+@testset "MPFR flags" begin
+    let x, a = floatmin(BigFloat), b = floatmax(BigFloat), c = zero(BigFloat)
+        clear_flags()
+        @test !any(all_flags_54284())
+
+        x = a - a # normal
+        @test all_flags_54284() == (false, false, false, false, false, false)
+        x = 1 / c # had_divbyzero
+        @test all_flags_54284() == (false, false, true, false, false, false)
+        clear_flags()
+        x = nextfloat(a) - a # underflow
+        @test all_flags_54284() == (true, false, false, false, true, false)
+        clear_flags()
+        x = 1 / a # overflow
+        @test all_flags_54284() == (false, true, false, false, true, false)
+        clear_flags()
+        x = c / c # nan
+        @test all_flags_54284() == (false, false, false, true, false, false)
+        clear_flags()
+        x = prevfloat(BigFloat(1.0)) * 100 # inexact
+        @test all_flags_54284() == (false, false, false, false, true, false)
+        clear_flags()
+        try convert(Int, b); catch; end # range exception
+        @test all_flags_54284() == (false, false, false, false, false, true)
+        clear_flags()
+    end
+end
+
+@testset "BigFloatData truncation OOB read" begin
+    @testset "T: $T" for T ∈ (UInt8, UInt16, UInt32, UInt64, UInt128)
+        v = Base.MPFR.BigFloatData{T}(fill(typemax(T), 1 + Base.MPFR.offset_p_limbs))
+        @testset "bit_count: $bit_count" for bit_count ∈ (0:10:80)
+            @test Base.MPFR.truncated(UInt128, v, bit_count) isa Any
+        end
+    end
+end
+
+# BigFloatData is the Ref type for BigFloat in ccall:
+@testset "cconvert(Ref{BigFloat}, x)" begin
+    for x in (1.0, big"1.0", Ref(big"1.0"))
+        @test Base.cconvert(Ref{BigFloat}, x) isa Base.MPFR.BigFloatData
+    end
+end
diff --git a/test/namedtuple.jl b/test/namedtuple.jl
index eb3846c8cbffd..0f54196879a43 100644
--- a/test/namedtuple.jl
+++ b/test/namedtuple.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using Base: delete
+
 @test_throws TypeError NamedTuple{1,Tuple{}}
 @test_throws TypeError NamedTuple{(),1}
 @test_throws TypeError NamedTuple{(:a,1),Tuple{Int}}
@@ -28,13 +30,13 @@
 @test (x=4, y=5, z=6)[()] == NamedTuple()
 @test (x=4, y=5, z=6)[:] == (x=4, y=5, z=6)
 @test NamedTuple()[()] == NamedTuple()
-@test_throws ErrorException (x=4, y=5, z=6).a
+@test_throws FieldError (x=4, y=5, z=6).a
 @test_throws BoundsError (a=2,)[0]
 @test_throws BoundsError (a=2,)[2]
-@test_throws ErrorException (x=4, y=5, z=6)[(:a,)]
-@test_throws ErrorException (x=4, y=5, z=6)[(:x, :a)]
-@test_throws ErrorException (x=4, y=5, z=6)[[:a]]
-@test_throws ErrorException (x=4, y=5, z=6)[[:x, :a]]
+@test_throws FieldError (x=4, y=5, z=6)[(:a,)]
+@test_throws FieldError (x=4, y=5, z=6)[(:x, :a)]
+@test_throws FieldError (x=4, y=5, z=6)[[:a]]
+@test_throws FieldError (x=4, y=5, z=6)[[:x, :a]]
 @test_throws ErrorException (x=4, y=5, z=6)[(:x, :x)]
 
 @test length(NamedTuple()) == 0
@@ -94,6 +96,9 @@ end
 
     conv_res = @test_throws MethodError convert(NamedTuple{(:a,),Tuple{I}} where I<:AbstractString, (;a=1))
     @test conv_res.value.f === convert && conv_res.value.args === (AbstractString, 1)
+
+    conv6 = convert(NamedTuple{(:a,),Tuple{NamedTuple{(:b,), Tuple{Int}}}}, ((1,),))
+    @test conv6 === (a = (b = 1,),)
 end
 
 @test NamedTuple{(:a,:c)}((b=1,z=2,c=3,aa=4,a=5)) === (a=5, c=3)
@@ -134,6 +139,14 @@ end
 @test map(string, (x=1, y=2)) == (x="1", y="2")
 @test map(round, (x=UInt, y=Int), (x=3.1, y=2//3)) == (x=UInt(3), y=1)
 
+@testset "filter" begin
+    @test filter(isodd, (a=1,b=2,c=3)) === (a=1, c=3)
+    @test filter(i -> true, (;)) === (;)
+    longnt = NamedTuple{ntuple(i -> Symbol(:a, i), 20)}(ntuple(identity, 20))
+    @test filter(iseven, longnt) === NamedTuple{ntuple(i -> Symbol(:a, 2i), 10)}(ntuple(i -> 2i, 10))
+    @test filter(x -> x<2, (longnt..., z=1.5)) === (a1=1, z=1.5)
+end
+
 @test merge((a=1, b=2), (a=10,)) == (a=10, b=2)
 @test merge((a=1, b=2), (a=10, z=20)) == (a=10, b=2, z=20)
 @test merge((a=1, b=2), (z=20,)) == (a=1, b=2, z=20)
@@ -150,6 +163,10 @@ end
 @test merge(NamedTuple(), [:a=>1, :b=>2, :c=>3, :a=>4, :c=>5]) == (a=4, b=2, c=5)
 @test merge((c=0, z=1), [:a=>1, :b=>2, :c=>3, :a=>4, :c=>5]) == (c=5, z=1, a=4, b=2)
 
+# https://github.com/JuliaLang/julia/issues/59292
+@test merge((; a = 1), Base.Pairs((; b = 2, c = 3), (:b,))) == (a = 1, b = 2)
+@test merge((; a = 1), Base.pairs((; b = 2, c = 3))) == (a = 1, b = 2, c = 3)
+
 @test keys((a=1, b=2, c=3)) == (:a, :b, :c)
 @test keys(NamedTuple()) == ()
 @test keys((a=1,)) == (:a,)
@@ -244,7 +261,7 @@ function abstr_nt_22194_2()
     a = NamedTuple[(a=1,), (b=2,)]
     return a[1].b
 end
-@test_throws ErrorException abstr_nt_22194_2()
+@test_throws FieldError abstr_nt_22194_2()
 @test Base.return_types(abstr_nt_22194_2, ()) == Any[Any]
 
 mutable struct HasAbstractNamedTuples
@@ -271,6 +288,11 @@ end
 abstr_nt_22194_3()
 @test Base.return_types(abstr_nt_22194_3, ()) == Any[Any]
 
+@test delete((a=1,), :a) == NamedTuple()
+@test delete((a=1, b=2), :a) == (b=2,)
+@test delete((a=1, b=2, c=3), :b) == (a=1, c=3)
+@test delete((a=1, b=2, c=3), :z) == (a=1, b=2, c=3)
+
 @test Base.structdiff((a=1, b=2), (b=3,)) == (a=1,)
 @test Base.structdiff((a=1, b=2, z=20), (b=3,)) == (a=1, z=20)
 @test Base.structdiff((a=1, b=2, z=20), (b=3, q=20, z=1)) == (a=1,)
@@ -382,10 +404,22 @@ end
 
 # Test effect/inference for merge/diff of unknown NamedTuples
 for f in (Base.merge, Base.structdiff)
-    let eff = Base.infer_effects(f, Tuple{NamedTuple, NamedTuple})
-        @test Core.Compiler.is_foldable(eff) && eff.nonoverlayed
+    @testset let f = f
+        # test the effects of the fallback path
+        fallback_func(a::NamedTuple, b::NamedTuple) = @invoke f(a::NamedTuple, b::NamedTuple)
+        @testset let eff = Base.infer_effects(fallback_func)
+            @test Core.Compiler.is_foldable(eff)
+            @test Core.Compiler.is_nonoverlayed(eff)
+        end
+        @test only(Base.return_types(fallback_func)) == NamedTuple
+        # test if `max_methods = 4` setting works as expected
+        general_func(a::NamedTuple, b::NamedTuple) = f(a, b)
+        @testset let eff = Base.infer_effects(general_func)
+            @test Core.Compiler.is_foldable(eff)
+            @test Core.Compiler.is_nonoverlayed(eff)
+        end
+        @test only(Base.return_types(general_func)) == NamedTuple
     end
-    @test Core.Compiler.return_type(f, Tuple{NamedTuple, NamedTuple}) == NamedTuple
 end
 @test Core.Compiler.is_foldable(Base.infer_effects(pairs, Tuple{NamedTuple}))
 
@@ -394,3 +428,38 @@ let a = Base.NamedTuple{(:a, :b), Tuple{Any, Any}}((1, 2)), b = Base.NamedTuple{
     @test typeof(Base.merge(a, b)) == Base.NamedTuple{(:a, :b), Tuple{Any, Float64}}
     @test typeof(Base.structdiff(a, b)) == Base.NamedTuple{(:a,), Tuple{Any}}
 end
+
+function mergewith51009(combine, a::NamedTuple{an}, b::NamedTuple{bn}) where {an, bn}
+    names = Base.merge_names(an, bn)
+    NamedTuple{names}(ntuple(Val{nfields(names)}()) do i
+                          n = getfield(names, i)
+                          if Base.sym_in(n, an)
+                              if Base.sym_in(n, bn)
+                                  combine(getfield(a, n), getfield(b, n))
+                              else
+                                  getfield(a, n)
+                              end
+                          else
+                              getfield(b, n)
+                          end
+                      end)
+end
+let c = (a=1, b=2),
+    d = (b=3, c=(d=1,))
+    @test @inferred(mergewith51009((x,y)->y, c, d)) === (a = 1, b = 3, c = (d = 1,))
+end
+
+@test_throws ErrorException NamedTuple{(), Union{}}
+for NT in (NamedTuple{(:a, :b), Union{}}, NamedTuple{(:a, :b), T} where T<:Union{})
+    @test fieldtype(NT, 1) == Union{}
+    @test fieldtype(NT, :b) == Union{}
+    @test_throws FieldError fieldtype(NT, :c)
+    @test_throws BoundsError fieldtype(NT, 0)
+    @test_throws BoundsError fieldtype(NT, 3)
+    @test Base.return_types((Type{NT},)) do NT; fieldtype(NT, :a); end == Any[Type{Union{}}]
+    @test fieldtype(NamedTuple{<:Any, Union{}}, 1) == Union{}
+end
+let NT = NamedTuple{<:Any, Union{}}
+    @test fieldtype(NT, 100) == Union{}
+    @test only(Base.return_types((Type{NT},)) do NT; fieldtype(NT, 100); end) >: Type{Union{}}
+end
diff --git a/test/numbers.jl b/test/numbers.jl
index e89dffd8e33cf..426fa75167393 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -154,7 +154,8 @@ end
             x = unorded[i], unorded[i]
             y = unorded[j], unorded[j]
             z = Base._extrema_rf(x, y)
-            @test z === x || z === y
+            @test (z[1] === x[1] || z[1] === y[1]) &&
+                  (z[2] === x[1] || z[2] === y[1])
         end
     end
 end
@@ -262,7 +263,7 @@ end
 
 # GMP allocation overflow should not cause crash
 if Base.GMP.ALLOC_OVERFLOW_FUNCTION[] && sizeof(Int) > 4
-  @test_throws OutOfMemoryError BigInt(2)^(typemax(Culong))
+    @test_throws OutOfMemoryError BigInt(2)^(typemax(Culong))
 end
 
 # exponentiating with a negative base
@@ -679,6 +680,9 @@ end
     @test copysign(big(-1), 0x02) == 1
     @test copysign(big(-1.0), 0x02) == 1.0
     @test copysign(-1//2, 0x01) == 1//2
+
+    # Verify overflow is checked with rational
+    @test_throws OverflowError copysign(typemin(Int)//1, 1)
 end
 
 @testset "isnan/isinf/isfinite" begin
@@ -827,6 +831,28 @@ end
     @test cmp(isless, 1, NaN) == -1
     @test cmp(isless, NaN, NaN) == 0
 end
+@testset "ispositive/isnegative" begin
+    for T in [Base.uniontypes(Base.BitInteger)..., Bool, Rational{Int}, BigInt, Base.uniontypes(Base.IEEEFloat)..., BigFloat, Missing]
+        values = T[zero(T), one(T)]
+        if T <: AbstractFloat
+            push!(values, Inf, NaN) # also check Infs and NaNs
+        elseif T <: Rational
+            push!(values, 1//0) # also check Infs
+        end
+        @testset "$T" begin
+            for value in values
+                # https://github.com/JuliaLang/julia/pull/53677#discussion_r1534044582
+                # Use eval to explicitly show expressions when they fail
+                @eval begin
+                    @test ispositive($value) === ($value > 0)
+                    @test ispositive(-$value) === (-$value > 0)
+                    @test isnegative($value) === ($value < 0)
+                    @test isnegative(-$value) === (-$value < 0)
+                end
+            end
+        end
+    end
+end
 @testset "Float vs Integer comparison" begin
     for x=-5:5, y=-5:5
         @test (x==y)==(Float64(x)==Int64(y))
@@ -1111,10 +1137,30 @@ end
 end
 
 @testset "Irrational zero and one" begin
-    @test one(pi) === true
-    @test zero(pi) === false
-    @test one(typeof(pi)) === true
-    @test zero(typeof(pi)) === false
+    for i in (π, ℯ, γ, catalan)
+        @test one(i) === true
+        @test zero(i) === false
+        @test one(typeof(i)) === true
+        @test zero(typeof(i)) === false
+    end
+end
+
+@testset "Irrational iszero, isfinite, isinteger, and isone" begin
+    for i in (π, ℯ, γ, catalan)
+        @test !iszero(i)
+        @test !isone(i)
+        @test !isinteger(i)
+        @test isfinite(i)
+    end
+end
+
+@testset "Irrational promote_type" begin
+    for T in (Float16, Float32, Float64)
+        for i in (π, ℯ, γ, catalan)
+            @test T(2.0) * i ≈ T(2.0) * T(i)
+            @test T(2.0) * i isa T
+        end
+    end
 end
 
 @testset "Irrationals compared with Irrationals" begin
@@ -1135,6 +1181,8 @@ end
 end
 
 @testset "Irrationals compared with Rationals and Floats" begin
+    @test pi != Float64(pi)
+    @test Float64(pi) != pi
     @test Float64(pi,RoundDown) < pi
     @test Float64(pi,RoundUp) > pi
     @test !(Float64(pi,RoundDown) > pi)
@@ -1153,6 +1201,7 @@ end
     @test nextfloat(big(pi)) > pi
     @test !(prevfloat(big(pi)) > pi)
     @test !(nextfloat(big(pi)) < pi)
+    @test big(typeof(pi)) == BigFloat
 
     @test 2646693125139304345//842468587426513207 < pi
     @test !(2646693125139304345//842468587426513207 > pi)
@@ -1168,6 +1217,17 @@ Base.@irrational i46051 4863.185427757 1548big(pi)
     # issue #46051
     @test sprint(show, "text/plain", i46051) == "i46051 = 4863.185427757..."
 end
+
+@testset "Irrational round, float, ceil" begin
+    using .MathConstants
+    @test round(π) === 3.0
+    @test round(Int, ℯ) === 3
+    @test floor(ℯ) === 2.0
+    @test floor(Int, φ) === 1
+    @test ceil(γ) === 1.0
+    @test ceil(Int, catalan) === 1
+end
+
 @testset "issue #6365" begin
     for T in (Float32, Float64)
         for i = 9007199254740992:9007199254740996
@@ -1558,36 +1618,44 @@ end
         end
     end
 
-    for x=0:5, y=1:5
-        @test div(UInt(x),UInt(y)) == div(x,y)
-        @test div(UInt(x),y) == div(x,y)
-        @test div(x,UInt(y)) == div(x,y)
-        @test div(UInt(x),-y) == reinterpret(UInt,div(x,-y))
-        @test div(-x,UInt(y)) == div(-x,y)
-
-        @test fld(UInt(x),UInt(y)) == fld(x,y)
-        @test fld(UInt(x),y) == fld(x,y)
-        @test fld(x,UInt(y)) == fld(x,y)
-        @test fld(UInt(x),-y) == reinterpret(UInt,fld(x,-y))
-        @test fld(-x,UInt(y)) == fld(-x,y)
-
-        @test cld(UInt(x),UInt(y)) == cld(x,y)
-        @test cld(UInt(x),y) == cld(x,y)
-        @test cld(x,UInt(y)) == cld(x,y)
-        @test cld(UInt(x),-y) == reinterpret(UInt,cld(x,-y))
-        @test cld(-x,UInt(y)) == cld(-x,y)
-
-        @test rem(UInt(x),UInt(y)) == rem(x,y)
-        @test rem(UInt(x),y) == rem(x,y)
-        @test rem(x,UInt(y)) == rem(x,y)
-        @test rem(UInt(x),-y) == rem(x,-y)
-        @test rem(-x,UInt(y)) == rem(-x,y)
-
-        @test mod(UInt(x),UInt(y)) == mod(x,y)
-        @test mod(UInt(x),y) == mod(x,y)
-        @test mod(x,UInt(y)) == mod(x,y)
-        @test mod(UInt(x),-y) == mod(x,-y)
-        @test mod(-x,UInt(y)) == mod(-x,y)
+    @test isnan(mod(NaN, Inf))
+    @test isnan(mod(NaN, -Inf))
+    for x=0:5
+        @test mod(x, Inf) == x
+        @test mod(x, -Inf) == x
+        @test mod(-x, Inf) == -x
+        @test mod(-x, -Inf) == -x
+        for y=1:5
+            @test div(UInt(x),UInt(y)) == div(x,y)
+            @test div(UInt(x),y) == div(x,y)
+            @test div(x,UInt(y)) == div(x,y)
+            @test div(UInt(x),-y) == reinterpret(UInt,div(x,-y))
+            @test div(-x,UInt(y)) == div(-x,y)
+
+            @test fld(UInt(x),UInt(y)) == fld(x,y)
+            @test fld(UInt(x),y) == fld(x,y)
+            @test fld(x,UInt(y)) == fld(x,y)
+            @test fld(UInt(x),-y) == reinterpret(UInt,fld(x,-y))
+            @test fld(-x,UInt(y)) == fld(-x,y)
+
+            @test cld(UInt(x),UInt(y)) == cld(x,y)
+            @test cld(UInt(x),y) == cld(x,y)
+            @test cld(x,UInt(y)) == cld(x,y)
+            @test cld(UInt(x),-y) == reinterpret(UInt,cld(x,-y))
+            @test cld(-x,UInt(y)) == cld(-x,y)
+
+            @test rem(UInt(x),UInt(y)) == rem(x,y)
+            @test rem(UInt(x),y) == rem(x,y)
+            @test rem(x,UInt(y)) == rem(x,y)
+            @test rem(UInt(x),-y) == rem(x,-y)
+            @test rem(-x,UInt(y)) == rem(-x,y)
+
+            @test mod(UInt(x),UInt(y)) == mod(x,y)
+            @test mod(UInt(x),y) == mod(x,y)
+            @test mod(x,UInt(y)) == mod(x,y)
+            @test mod(UInt(x),-y) == mod(x,-y)
+            @test mod(-x,UInt(y)) == mod(-x,y)
+        end
     end
 
     @test div(typemax(UInt64)  , 1) ==  typemax(UInt64)
@@ -1702,6 +1770,27 @@ end
         @test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp)   ==  ceil(big(-1.1)/big(0.1)) == -11.0
         @test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0
     end
+    @testset "issue  #49450" begin
+        @test div(514, Float16(0.75)) === Float16(685)
+        @test fld(514, Float16(0.75)) === Float16(685)
+        @test cld(515, Float16(0.75)) === Float16(687)
+
+        @test cld(1, Float16(0.000999)) === Float16(1001)
+        @test cld(2, Float16(0.001999)) === Float16(1001)
+        @test cld(3, Float16(0.002934)) === Float16(1023)
+        @test cld(4, Float16(0.003998)) === Float16(1001)
+        @test fld(5, Float16(0.004925)) === Float16(1015)
+
+        @test div(4_194_307, Float32(0.75)) === Float32(5_592_409)
+        @test fld(4_194_307, Float32(0.75)) === Float32(5_592_409)
+        @test cld(4_194_308, Float32(0.75)) === Float32(5_592_411)
+
+        @test fld(5, Float32(6.556511e-7)) === Float32(7_626_007)
+        @test fld(10, Float32(1.3113022e-6)) === Float32(7_626_007)
+        @test fld(11, Float32(1.4305115e-6)) === Float32(7_689_557)
+        @test cld(16, Float32(2.8014183e-6)) === Float32(5_711_393)
+        @test cld(17, Float32(2.2053719e-6)) === Float32(7_708_451)
+    end
 end
 @testset "return types" begin
     for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)
@@ -2123,6 +2212,10 @@ end
     @test nextfloat(Inf32) === Inf32
     @test prevfloat(-Inf32) === -Inf32
     @test isequal(nextfloat(NaN32), NaN32)
+    @test nextfloat(1.0, UInt(5)) == nextfloat(1.0, 5)
+    @test prevfloat(1.0, UInt(5)) == prevfloat(1.0, 5)
+    @test nextfloat(0.0, typemax(UInt64)) == Inf
+    @test prevfloat(0.0, typemax(UInt64)) == -Inf
 end
 @testset "issue #16206" begin
     @test prevfloat(Inf) == 1.7976931348623157e308
@@ -2155,11 +2248,36 @@ for T = (UInt8,Int8,UInt16,Int16,UInt32,Int32,UInt64,Int64,UInt128,Int128)
     end
 end
 
-@testset "Irrational/Bool multiplication" begin
+@testset "Bool multiplication" begin
     @test false*pi === 0.0
     @test pi*false === 0.0
     @test true*pi === Float64(pi)
     @test pi*true === Float64(pi)
+
+    @test false*Inf === 0.0
+    @test Inf*false === 0.0
+    @test true*Inf === Inf
+    @test Inf*true === Inf
+
+    @test false*NaN === 0.0
+    @test NaN*false === 0.0
+    @test true*NaN === NaN
+    @test NaN*true === NaN
+
+    @test false*-Inf === -0.0
+    @test -Inf*false === -0.0
+    @test true*-Inf === -Inf
+    @test -Inf*true === -Inf
+
+    @test false*1//0 === 0//1
+    @test 1//0*false === 0//1
+    @test true*1//0 === 1//0
+    @test 1//0*true === 1//0
+
+    @test false*-1//0 === 0//1
+    @test -1//0*false === 0//1
+    @test true*-1//0 === -1//0
+    @test -1//0*true === -1//0
 end
 # issue #5492
 @test -0.0 + false === -0.0
@@ -2327,8 +2445,8 @@ end
 
 function allsubtypes!(m::Module, x::DataType, sts::Set)
     for s in names(m, all = true)
-        if isdefined(m, s) && !Base.isdeprecated(m, s)
-            t = getfield(m, s)
+        if isdefinedglobal(m, s) && !Base.isdeprecated(m, s)
+            t = getglobal(m, s)
             if isa(t, Type) && t <: x && t != Union{}
                 push!(sts, t)
             elseif isa(t, Module) && t !== m && nameof(t) === s && parentmodule(t) === m
@@ -2486,6 +2604,22 @@ Base.:(==)(x::TestNumber, y::TestNumber) = x.inner == y.inner
 Base.abs(x::TestNumber) = TestNumber(abs(x.inner))
 @test abs2(TestNumber(3+4im)) == TestNumber(25)
 
+@testset "mul_hi" begin
+    n = 1000
+    ground_truth(x, y) = ((widen(x)*y) >> (8*sizeof(typeof(x)))) % typeof(x)
+    for T in [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128]
+        for trait1 in [typemin, typemax]
+            for trait2 in [typemin, typemax]
+                x, y = trait1(T), trait2(T)
+                @test Base.mul_hi(x, y) === ground_truth(x, y)
+            end
+        end
+        for (x, y) in zip(rand(T, n), rand(T, n))
+            @test Base.mul_hi(x, y) === ground_truth(x, y)
+        end
+    end
+end
+
 @testset "multiplicative inverses" begin
     function testmi(numrange, denrange)
         for d in denrange
@@ -2675,7 +2809,7 @@ end
     @test divrem(a,-(a-20), RoundDown) == (div(a,-(a-20), RoundDown), rem(a,-(a-20), RoundDown))
 end
 
-@testset "rem2pi $T" for T in (Float16, Float32, Float64, BigFloat)
+@testset "rem2pi $T" for T in (Float16, Float32, Float64, BigFloat, Int8, Int16, Int32, Int64, Int128)
     @test rem2pi(T(1), RoundToZero)  == 1
     @test rem2pi(T(1), RoundNearest) == 1
     @test rem2pi(T(1), RoundDown)    == 1
@@ -2762,6 +2896,20 @@ Base.literal_pow(::typeof(^), ::PR20530, ::Val{p}) where {p} = 2
     @test [2,4,8].^-2 == [0.25, 0.0625, 0.015625]
     @test [2, 4, 8].^-2 .* 4 == [1.0, 0.25, 0.0625] # nested literal_pow
     @test ℯ^-2 == exp(-2) ≈ inv(ℯ^2) ≈ (ℯ^-1)^2 ≈ sqrt(ℯ^-4)
+
+    if Int === Int32
+        p = 2147483647
+        @test x^p == 1
+        @test x^2147483647 == 2
+        @test (@fastmath x^p) == 1
+        @test (@fastmath x^2147483647) == 2
+    elseif Int === Int64
+        p = 9223372036854775807
+        @test x^p == 1
+        @test x^9223372036854775807 == 2
+        @test (@fastmath x^p) == 1
+        @test (@fastmath x^9223372036854775807) == 2
+    end
 end
 module M20889 # do we get the expected behavior without importing Base.^?
     using Test
@@ -2886,10 +3034,19 @@ end
     @test log(π,ComplexF32(2)) isa ComplexF32
 end
 
+@testset "irrational promotion shouldn't recurse without bound, issue #51001" begin
+    for s ∈ (:π, :ℯ)
+        T = Irrational{s}
+        @test promote_type(Complex{T}, T) <: Complex
+        @test promote_type(T, Complex{T}) <: Complex
+    end
+end
+
 @testset "printing non finite floats" begin
     let float_types = Set()
         allsubtypes!(Base, AbstractFloat, float_types)
         allsubtypes!(Core, AbstractFloat, float_types)
+        filter!(!isequal(Core.BFloat16), float_types)   # defined externally
         @test !isempty(float_types)
 
         for T in float_types
@@ -3112,3 +3269,49 @@ end
     end
 
 end
+
+@testset "FP(inf) == inf" begin
+    # Iterate through all pairs of FP types
+    fp_types = (Float16, Float32, Float64, BigFloat)
+    for F ∈ fp_types, G ∈ fp_types, f ∈ (typemin, typemax)
+        i = f(F)
+        @test i == G(i)
+    end
+end
+
+@testset "small int FP conversion" begin
+    fp_types = (Float16, Float32, Float64, BigFloat)
+    m = Int(maxintfloat(Float16))
+    for F ∈ fp_types, G ∈ fp_types, n ∈ (-m):m
+        @test n == G(F(n)) == F(G(n))
+    end
+end
+
+@testset "`precision`" begin
+    Fs = (Float16, Float32, Float64, BigFloat)
+
+    @testset "type vs instance" begin
+        @testset "F: $F" for F ∈ Fs
+            @test precision(F) == precision(one(F))
+            @test precision(F, base = 2) == precision(one(F), base = 2)
+            @test precision(F, base = 3) == precision(one(F), base = 3)
+        end
+    end
+
+    @testset "`precision` of `Union` shouldn't recur infinitely, #52909" begin
+        @testset "i: $i" for i ∈ eachindex(Fs)
+            @testset "j: $j" for j ∈ (i + 1):lastindex(Fs)
+                S = Fs[i]
+                T = Fs[j]
+                @test_throws MethodError precision(Union{S,T})
+                @test_throws MethodError precision(Union{S,T}, base = 3)
+            end
+        end
+    end
+end
+
+@testset "irrational special values" begin
+    for v ∈ (π, ℯ, γ, catalan, φ)
+        @test v === typemin(v) === typemax(v)
+    end
+end
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index 257e91db5f49e..4b6c2e9019980 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -247,17 +247,17 @@ PV = view(P, 2:3, :)
 
 # Similar
 B = similar(A, Float32)
-@test isa(B, OffsetArray{Float32,2})
+@test isa(B, OffsetMatrix{Float32})
 @test axes(B) === axes(A)
 B = similar(A, (3,4))
-@test isa(B, Array{Int,2})
+@test isa(B, Matrix{Int})
 @test size(B) == (3,4)
 @test axes(B) === (Base.OneTo(3), Base.OneTo(4))
 B = similar(A, (-3:3,1:4))
-@test isa(B, OffsetArray{Int,2})
+@test isa(B, OffsetMatrix{Int})
 @test axes(B) === (OffsetArrays.IdOffsetRange(Base.OneTo(7), -4), OffsetArrays.IdOffsetRange(Base.OneTo(4)))
 B = similar(parent(A), (-3:3,1:4))
-@test isa(B, OffsetArray{Int,2})
+@test isa(B, OffsetMatrix{Int})
 @test axes(B) === (OffsetArrays.IdOffsetRange(Base.OneTo(7), -4), OffsetArrays.IdOffsetRange(Base.OneTo(4)))
 
 # Indexing with OffsetArray indices
@@ -383,6 +383,18 @@ v2 = copy(v)
 @test v2[end-1] == 2
 @test v2[end] == 1
 
+# push!(v::AbstractVector, x...)
+v2 = copy(v)
+@test @invoke(push!(v2::AbstractVector, 3)) === v2
+@test v2[axes(v,1)] == v
+@test v2[end] == 3
+@test v2[begin] == v[begin] == v[-2]
+v2 = copy(v)
+@test @invoke(push!(v2::AbstractVector, 5, 6)) == v2
+@test v2[axes(v,1)] == v
+@test v2[end-1] == 5
+@test v2[end] == 6
+
 # append! from array
 v2 = copy(v)
 @test append!(v2, [2, 1]) === v2
@@ -399,11 +411,29 @@ v2 = copy(v)
 @test v2[axes(v, 1)] == v
 @test v2[lastindex(v)+1:end] == [2, 1]
 
+# append!(::AbstractVector, ...)
+# append! from array
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, [2, 1]::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+# append! from HasLength iterator
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, (v for v in [2, 1])::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+# append! from SizeUnknown iterator
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, (v for v in [2, 1] if true)::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+
 # other functions
 v = OffsetArray(v0, (-3,))
 @test lastindex(v) == 1
 @test v ≈ v
-@test axes(v') === (Base.OneTo(1), OffsetArrays.IdOffsetRange(Base.OneTo(4), -3))
+@test (@inferred axes(v')[1]) === OffsetArrays.IdOffsetRange(Base.OneTo(1))
+@test (@inferred axes(v')[2]) === OffsetArrays.IdOffsetRange(Base.OneTo(4), -3)
 @test parent(v) == collect(v)
 rv = reverse(v)
 @test axes(rv) == axes(v)
@@ -567,6 +597,8 @@ A = OffsetArray(view(rand(4,4), 1:4, 4:-1:1), (-3,5))
 # issue #33614
 A = OffsetArray(-1:0, (-2,))
 @test reshape(A, :) === A
+@test axes(similar(typeof(A),axes(A))) == axes(A)
+@test eltype(similar(typeof(A),axes(A))) == eltype(A)
 Arsc = reshape(A, :, 1)
 Arss = reshape(A, 2, 1)
 @test Arsc[1,1] == Arss[1,1] == -1
@@ -627,15 +659,15 @@ end
     B = OffsetArray(reshape(1:24, 4, 3, 2), -5, 6, -7)
     for R in (fill(0, -4:-1), fill(0, -4:-1, 7:7), fill(0, -4:-1, 7:7, -6:-6))
         @test @inferred(maximum!(R, B)) == reshape(maximum(B, dims=(2,3)), axes(R)) == reshape(21:24, axes(R))
-        @test @allocated(maximum!(R, B)) <= 1300
+        @test @allocated(maximum!(R, B)) <= 400
         @test @inferred(minimum!(R, B)) == reshape(minimum(B, dims=(2,3)), axes(R)) == reshape(1:4, axes(R))
-        @test @allocated(minimum!(R, B)) <= 1300
+        @test @allocated(minimum!(R, B)) <= 400
     end
     for R in (fill(0, -4:-4, 7:9), fill(0, -4:-4, 7:9, -6:-6))
         @test @inferred(maximum!(R, B)) == reshape(maximum(B, dims=(1,3)), axes(R)) == reshape(16:4:24, axes(R))
-        @test @allocated(maximum!(R, B)) <= 1300
+        @test @allocated(maximum!(R, B)) <= 400
         @test @inferred(minimum!(R, B)) == reshape(minimum(B, dims=(1,3)), axes(R)) == reshape(1:4:9, axes(R))
-        @test @allocated(minimum!(R, B)) <= 1300
+        @test @allocated(minimum!(R, B)) <= 400
     end
     @test_throws DimensionMismatch maximum!(fill(0, -4:-1, 7:7, -6:-6, 1:1), B)
     @test_throws DimensionMismatch minimum!(fill(0, -4:-1, 7:7, -6:-6, 1:1), B)
@@ -863,3 +895,35 @@ end
     # this is fixed in #40038, so the evaluation of its CartesianIndices should work
     @test CartesianIndices(A) == CartesianIndices(B)
 end
+
+@testset "overflowing show" begin
+    A = OffsetArray(repeat([1], 1), typemax(Int)-1)
+    b = IOBuffer(maxsize=10)
+    show(b, A)
+    @test String(take!(b)) == "[1]"
+    show(b, (A, A))
+    @test String(take!(b)) == "([1], [1])"
+end
+
+@testset "indexing views (#53249)" begin
+    v = view([1,2,3,4], :)
+    @test v[Base.IdentityUnitRange(2:3)] == OffsetArray(2:3, 2:3)
+end
+
+@testset "mapreduce with OffsetRanges" begin
+    r = 5:100
+    a = OffsetArray(r, 2)
+    b = sum(a, dims=1)
+    @test b[begin] == sum(r)
+end
+
+@testset "reshape" begin
+    A0 = [1 3; 2 4]
+    A = reshape(A0, 2:3, 4:5)
+    @test axes(A) == Base.IdentityUnitRange.((2:3, 4:5))
+
+    B = reshape(A0, -10:-9, 9:10)
+    @test isa(B, OffsetMatrix{Int})
+    @test parent(B) == A0
+    @test axes(B) == Base.IdentityUnitRange.((-10:-9, 9:10))
+end
diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl
index e6490f5e9d345..2de5193ec4f35 100644
--- a/test/opaque_closure.jl
+++ b/test/opaque_closure.jl
@@ -10,7 +10,7 @@ const lno = LineNumberNode(1, :none)
 
 let ci = @code_lowered const_int()
     @eval function oc_trivial()
-        $(Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
@@ -19,7 +19,7 @@ end
 
 let ci = @code_lowered const_int()
     @eval function oc_simple_inf()
-        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
@@ -33,7 +33,7 @@ end
 (a::OcClos2Int)() = getfield(a, 1) + getfield(a, 2)
 let ci = @code_lowered OcClos2Int(1, 2)();
     @eval function oc_trivial_clos()
-        $(Expr(:new_opaque_closure, Tuple{}, Int, Int,
+        $(Expr(:new_opaque_closure, Tuple{}, Int, Int, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             1, 2))
     end
@@ -42,7 +42,7 @@ end
 
 let ci = @code_lowered OcClos2Int(1, 2)();
     @eval function oc_self_call_clos()
-        $(Expr(:new_opaque_closure, Tuple{}, Int, Int,
+        $(Expr(:new_opaque_closure, Tuple{}, Int, Int, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             1, 2))()
     end
@@ -59,7 +59,7 @@ end
 (a::OcClos1Any)() = getfield(a, 1)
 let ci = @code_lowered OcClos1Any(1)()
     @eval function oc_pass_clos(x)
-        $(Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             :x))
     end
@@ -69,7 +69,7 @@ end
 
 let ci = @code_lowered OcClos1Any(1)()
     @eval function oc_infer_pass_clos(x)
-        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             :x))
     end
@@ -81,7 +81,7 @@ end
 
 let ci = @code_lowered identity(1)
     @eval function oc_infer_pass_id()
-        $(Expr(:new_opaque_closure, Tuple{Any}, Any, Any,
+        $(Expr(:new_opaque_closure, Tuple{Any}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 1, false, lno, ci)))
     end
 end
@@ -103,7 +103,7 @@ end
 
 let ci = @code_lowered OcOpt([1 2])()
     @eval function oc_opt_ndims(A)
-        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             :A))
     end
@@ -151,26 +151,33 @@ end # module test_world_age
 
 function maybe_vararg(isva::Bool)
     T = isva ? Vararg{Int} : Int
-    @opaque Tuple{T} (x...)->x
+    @opaque Tuple{T}->_ (x...)->x
 end
 @test maybe_vararg(false)(1) == (1,)
 @test_throws MethodError maybe_vararg(false)(1,2,3)
 @test maybe_vararg(true)(1) == (1,)
 @test maybe_vararg(true)(1,2,3) == (1,2,3)
-@test (@opaque Tuple{Int, Int} (a, b, x...)->x)(1,2) === ()
-@test (@opaque Tuple{Int, Int} (a, x...)->x)(1,2) === (2,)
-@test (@opaque Tuple{Int, Vararg{Int}} (a, x...)->x)(1,2,3,4) === (2,3,4)
+@test (@opaque Tuple{Int, Int}->_ (a, b, x...)->x)(1,2) === ()
+@test (@opaque Tuple{Int, Int}->Tuple{} (a, b, x...)->x)(1,2) === ()
+@test (@opaque _->Tuple{Vararg{Int}} (a, b, x...)->x)(1,2) === ()
+@test (@opaque Tuple{Int, Int}->_ (a, x...)->x)(1,2) === (2,)
+@test (@opaque Tuple{Int, Int}->Tuple{Int} (a, x...)->x)(1,2) === (2,)
+@test (@opaque _->Tuple{Vararg{Int}} (a, x...)->x)(1,2) === (2,)
+@test (@opaque Tuple{Int, Vararg{Int}}->_ (a, x...)->x)(1,2,3,4) === (2,3,4)
+@test (@opaque Tuple{Int, Vararg{Int}}->Tuple{Vararg{Int}} (a, x...)->x)(1,2,3,4) === (2,3,4)
 @test (@opaque (a::Int, x::Int...)->x)(1,2,3) === (2,3)
+@test (@opaque _->Tuple{Vararg{Int}} (a::Int, x::Int...)->x)(1,2,3) === (2,3)
+@test (@opaque _->_ (a::Int, x::Int...)->x)(1,2,3) === (2,3)
 
-@test_throws ErrorException (@opaque Tuple{Vararg{Int}} x->x)
-@test_throws ErrorException (@opaque Tuple{Int, Vararg{Int}} x->x)
-@test_throws ErrorException (@opaque Tuple{Int, Int} x->x)
-@test_throws ErrorException (@opaque Tuple{Any} (x,y)->x)
-@test_throws ErrorException (@opaque Tuple{Vararg{Int}} (x,y...)->x)
-@test_throws ErrorException (@opaque Tuple{Int} (x,y,z...)->x)
+@test_throws ErrorException (@opaque Tuple{Vararg{Int}}->_ x->x)
+@test_throws ErrorException (@opaque Tuple{Int, Vararg{Int}}->_ x->x)
+@test_throws ErrorException (@opaque Tuple{Int, Int}->_ x->x)
+@test_throws ErrorException (@opaque Tuple{Any}->_ (x,y)->x)
+@test_throws ErrorException (@opaque Tuple{Vararg{Int}}->_ (x,y...)->x)
+@test_throws ErrorException (@opaque Tuple{Int}->_ (x,y,z...)->x)
 
 # cannot specify types both on arguments and separately
-@test_throws ErrorException @eval @opaque Tuple{Any} (x::Int)->x
+@test_throws ErrorException @eval @opaque Tuple{Any}->_ (x::Int)->x
 
 # Vargarg in complied mode
 mk_va_opaque() = @opaque (x...)->x
@@ -178,19 +185,19 @@ mk_va_opaque() = @opaque (x...)->x
 @test mk_va_opaque()(1,2) == (1,2)
 
 # OpaqueClosure show method
-@test repr(@opaque x->Base.inferencebarrier(1)) == "(::Any)::Any->◌"
+@test repr(@opaque x->Base.inferencebarrier(1)) == "(::Any)->◌::Any"
 
 # Opaque closure in CodeInfo returned from generated functions
 let ci = @code_lowered const_int()
     global function mk_ocg(world::UInt, source, args...)
         @nospecialize
-        cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci))).args[1]
         cig.slotnames = Symbol[Symbol("#self#")]
         cig.slottypes = Any[Any]
         cig.slotflags = UInt8[0x00]
-        @assert cig.min_world == UInt(1)
-        @assert cig.max_world == typemax(UInt)
+        cig.nargs = 1
+        cig.isva = false
         return cig
     end
 end
@@ -241,13 +248,22 @@ let foo::Int = 42
 end
 
 let oc = @opaque a->sin(a)
-    @test length(code_typed(oc, (Int,))) == 1
+    let opt = code_typed(oc, (Int,))
+        @test length(opt) == 1
+        @test opt[1][2] === Float64
+    end
+    let unopt = code_typed(oc, (Int,); optimize=false)
+        @test length(unopt) == 1
+    end
 end
 
 # constructing an opaque closure from IRCode
 let src = first(only(code_typed(+, (Int, Int))))
-    ir = Core.Compiler.inflate_ir(src)
-    @test OpaqueClosure(src)(40, 2) == 42
+    ir = Core.Compiler.inflate_ir(src, Core.Compiler.VarState[], src.slottypes)
+    ir.argtypes[1] = Tuple{}
+    @test ir.debuginfo.def === nothing
+    ir.debuginfo.def = Symbol(@__FILE__)
+    @test OpaqueClosure(src; sig=Tuple{Int, Int}, rettype=Int, nargs=2)(40, 2) == 42
     oc = OpaqueClosure(ir)
     @test oc(40, 2) == 42
     @test isa(oc, OpaqueClosure{Tuple{Int,Int}, Int})
@@ -255,9 +271,12 @@ let src = first(only(code_typed(+, (Int, Int))))
     @test OpaqueClosure(ir)(40, 2) == 42 # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
 end
 let ir = first(only(Base.code_ircode(sin, (Int,))))
+    ir.argtypes[1] = Tuple{}
     @test OpaqueClosure(ir)(42) == sin(42)
     @test OpaqueClosure(ir)(42) == sin(42) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
+    @test length(code_typed(OpaqueClosure(ir))) == 1
     ir = first(only(Base.code_ircode(sin, (Float64,))))
+    ir.argtypes[1] = Tuple{}
     @test OpaqueClosure(ir)(42.) == sin(42.)
     @test OpaqueClosure(ir)(42.) == sin(42.) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
 end
@@ -266,15 +285,32 @@ end
 let src = code_typed((Int,Int)) do x, y...
         return (x, y)
     end |> only |> first
-    let oc = OpaqueClosure(src)
+    src.slottypes[1] = Tuple{}
+    let oc = OpaqueClosure(src; rettype=Tuple{Int, Tuple{Int}}, sig=Tuple{Int, Int}, nargs=2, isva=true)
         @test oc(1,2) === (1,(2,))
         @test_throws MethodError oc(1,2,3)
     end
-    ir = Core.Compiler.inflate_ir(src)
+    ir = Core.Compiler.inflate_ir(src, Core.Compiler.VarState[], src.slottypes)
+    @test ir.debuginfo.def === nothing
+    ir.debuginfo.def = Symbol(@__FILE__)
     let oc = OpaqueClosure(ir; isva=true)
         @test oc(1,2) === (1,(2,))
         @test_throws MethodError oc(1,2,3)
     end
+
+    # with manually constructed IRCode, without round-trip to CodeInfo
+    f59222(xs...) = length(xs)
+    ir = Base.code_ircode_by_type(Tuple{typeof(f59222), Symbol, Symbol})[1][1]
+    ir.argtypes[1] = Tuple{}
+    let oc = OpaqueClosure(ir; isva=true)
+        @test oc(:a, :b) == 2
+    end
+    ir = Base.code_ircode_by_type(Tuple{typeof(f59222), Symbol, Vararg{Symbol}})[1][1]
+    ir.argtypes[1] = Tuple{}
+    let oc = OpaqueClosure(ir; isva=true)
+        @test oc(:a) == 1
+        @test oc(:a, :b, :c) == 3
+    end
 end
 
 # Check for correct handling in case of broken return type.
@@ -283,7 +319,7 @@ eval_oc_spec(oc) = oc()
 for f in (const_int, const_int_barrier)
     ci = code_lowered(f, Tuple{})[1]
     for compiled in (true, false)
-        oc_expr = Expr(:new_opaque_closure, Tuple{}, Union{}, Float64,
+        oc_expr = Expr(:new_opaque_closure, Tuple{}, Union{}, Float64, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci))
         oc_mismatch = let ci = code_lowered(f, Tuple{})[1]
             if compiled
@@ -297,3 +333,94 @@ for f in (const_int, const_int_barrier)
         @test_throws TypeError eval_oc_spec(oc_mismatch)
     end
 end
+
+
+# Attempting to construct an opaque closure backtrace after the oc is GC'ed
+f_oc_throws() = error("oops")
+@noinline function make_oc_and_collect_bt()
+    did_gc = Ref{Bool}(false)
+    bt = let ir = first(only(Base.code_ircode(f_oc_throws, ())))
+        ir.argtypes[1] = Tuple
+        sentinel = Ref{Any}(nothing)
+        oc = OpaqueClosure(ir, sentinel)
+        finalizer(sentinel) do x
+            did_gc[] = true
+        end
+        try
+            oc()
+            @test false
+        catch e
+            bt = catch_backtrace()
+            @test isa(e, ErrorException)
+            bt
+        end
+    end
+    return bt, did_gc
+end
+let (bt, did_gc) = make_oc_and_collect_bt()
+    GC.gc(true); GC.gc(true); GC.gc(true);
+    @test did_gc[]
+    @test any(stacktrace(bt)) do frame
+        li = frame.linfo
+        isa(li, Core.CodeInstance) && (li = li.def)
+        isa(li, Core.ABIOverride) && (li = li.def)
+        isa(li, Core.MethodInstance) || return false
+        isa(li.def, Method) || return false
+        return li.def.is_for_opaque_closure
+    end
+end
+
+# Opaque closure with mismatch struct argtype
+const op_arg_restrict2 = @opaque (x::Tuple{Int64}, y::Base.RefValue{Int64})->x+y
+ccall_op_arg_restrict2_bad_args() = op_arg_restrict2((1.,), 2)
+
+@test_throws TypeError ccall_op_arg_restrict2_bad_args()
+
+# code_llvm for opaque closures
+let ir = Base.code_ircode((Int,Int)) do x, y
+        @noinline x * y
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    io = IOBuffer()
+    code_llvm(io, oc, Tuple{Int,Int})
+    @test occursin("j_*_", String(take!(io)))
+    code_llvm(io, oc, (Int,Int))
+    @test occursin("j_*_", String(take!(io)))
+end
+
+foopaque() = Base.Experimental.@opaque(@noinline x::Int->println(x))(1)
+
+code_llvm(devnull,foopaque,()) #shouldn't crash
+
+let ir = first(only(Base.code_ircode(sin, (Int,))))
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    @test (Base.show_method(IOBuffer(), oc.source::Method); true)
+end
+
+let ir = first(only(Base.code_ircode(sin, (Int,))))
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir; do_compile=false)
+    @test oc(1) == sin(1)
+end
+
+function typed_add54236(::Type{T}) where T
+    return @opaque (x::Int)->T(x) + T(1)
+end
+let f = typed_add54236(Float64)
+    @test f isa Core.OpaqueClosure
+    @test f(32) === 33.0
+end
+
+f54357(g, ::Type{AT}) where {AT} = Base.Experimental.@opaque AT->_ (args...) -> g((args::AT)...)
+let f = f54357(+, Tuple{Int,Int})
+    @test f isa Core.OpaqueClosure
+    @test f(32, 34) === 66
+    g = f54357(+, Tuple{Float64,Float64})
+    @test g isa Core.OpaqueClosure
+    @test g(32.0, 34.0) === 66.0
+end
+
+# 49659: signature-scoped typevar shouldn't fail in lowering
+@test_throws "must be a tuple type" @opaque ((x::T,y::T) where {T}) -> 123
diff --git a/test/operators.jl b/test/operators.jl
index 715212a80a54f..78aa29cf5251e 100644
--- a/test/operators.jl
+++ b/test/operators.jl
@@ -2,7 +2,7 @@
 
 using Random: randstring
 
-include("compiler/irutils.jl")
+include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
 
 @testset "ifelse" begin
     @test ifelse(true, 1, 2) == 1
@@ -179,7 +179,7 @@ end
     @test ∘(x -> (x, 3), x -> (x, 2), x->(x,1))(0) === (((0, 1), 2), 3)
     @test ∘(x -> (x, 4), x -> (x, 3), x->(x,2), x-> (x, 1))(0) === ((((0, 1), 2), 3), 4)
 
-    # test that user defined functors only need to overload the two arg version
+    # test that user defined callable structs only need to overload the two arg version
     struct FreeMagma
         word
     end
@@ -195,7 +195,7 @@ end
     @test repr(uppercase ∘ first) == "uppercase ∘ first"
     @test sprint(show, "text/plain", uppercase ∘ first) == "uppercase ∘ first"
 
-    # test keyword ags in composition
+    # test keyword args in composition
     function kwf(a;b,c); a + b + c; end
     @test (abs2 ∘ kwf)(1,b=2,c=3) == 36
 
@@ -328,16 +328,36 @@ end
     @test lt5(4) && !lt5(5)
 end
 
+@testset "in tuples" begin
+    @test ∈(5, (1,5,10,11))
+    @test ∉(0, (1,5,10,11))
+    @test ∈(5, (1,"hi","hey",5.0))
+    @test ∉(0, (1,"hi","hey",5.0))
+    @test ∈(5, (5,))
+    @test ∉(0, (5,))
+    @test ∉(5, ())
+end
+
 @testset "ni" begin
     @test ∋([1,5,10,11], 5)
     @test !∋([1,10,11], 5)
+    @test ∋((1,5,10,11), 5)
+    @test ∌((1,10,11), 5)
     @test ∋(5)([5,1])
     @test !∋(42)([0,1,100])
     @test ∌(0)(1:10)
     @test ∋(0)(-2:2)
 end
 
+@testset "in" begin
+    @test in(0x00, [0x04, 0x03, 0x02, 0x00])
+    @test !in(UInt8('a'), b"lkefjldk")
+    @test in(Int8(-1), Int8[2, 5, -1, 2])
+    @test !in(Int8(-1), UInt8[1, 3, 2, 0xff])
+end
+
 @test [Base.afoldl(+, 1:i...) for i = 1:40] == [i * (i + 1) ÷ 2 for i = 1:40]
+@test Core.Compiler.is_terminates(Base.infer_effects(Base.afoldl, Tuple{typeof(+), Vararg{Int, 100}}))
 
 @testset "Returns" begin
     @test @inferred(Returns(1)()   ) === 1
@@ -366,3 +386,35 @@ end
     Base.:(<)(::B46327, ::B46327) = false
     @test B46327() <= B46327()
 end
+
+@testset "inference for `x in itr::Tuple`" begin
+    # concrete evaluation
+    @test Core.Compiler.is_foldable(Base.infer_effects(in, (Int,Tuple{Int,Int,Int})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(in, (Char,Tuple{Char,Char,Char})))
+    for i = (1,2,3)
+        @testset let i = i
+            @test @eval Base.return_types() do
+                Val($i in (1,2,3))
+            end |> only == Val{true}
+        end
+    end
+    @test Base.infer_return_type() do
+        Val(4 in (1,2,3))
+    end == Val{false}
+    @test Base.infer_return_type() do
+        Val('1' in ('1','2','3'))
+    end == Val{true}
+
+    # constant propagation
+    @test Base.infer_return_type((Int,Int)) do x, y
+        Val(1 in (x,2,y))
+    end >: Val{true}
+    @test Base.infer_return_type((Int,Int)) do x, y
+        Val(2 in (x,2,y))
+    end == Val{true}
+
+    # should use the loop implementation given large tuples to avoid inference blowup
+    let t = ntuple(x->'A', 10000);
+        @test Base.infer_return_type(in, (Char,typeof(t))) == Bool
+    end
+end
diff --git a/test/ordering.jl b/test/ordering.jl
index 547d8d8dd0e8b..3b5385b99be68 100644
--- a/test/ordering.jl
+++ b/test/ordering.jl
@@ -2,21 +2,24 @@
 
 using Test
 
-import Base.Order: Forward, Reverse
+import Base.Order: Forward, Reverse, ord, Lt, By, ReverseOrdering
 
 # every argument can flip the integer order by passing the right value. Here,
 # we enumerate a few of these combinations and check that all these flips
 # compound so that in total we either have an increasing or decreasing sort.
 for (s1, rev) in enumerate([true, false])
-    for (s2, lt) in enumerate([>, <, (a, b) -> a - b > 0, (a, b) -> a - b < 0])
+    for (s2, lt) in enumerate([(a, b)->isless(b, a), isless, >, <, (a, b) -> a - b > 0, (a, b) -> a - b < 0])
         for (s3, by) in enumerate([-, +])
             for (s4, order) in enumerate([Reverse, Forward])
-                if iseven(s1 + s2 + s3 + s4)
-                    target = [1, 2, 3]
-                else
-                    target = [3, 2, 1]
-                end
+                is_fwd = iseven(s1 + s2 + s3 + s4)
+                target = is_fwd ? (1:3) : (3:-1:1)
+                # arrays, integer and float ranges sometimes have different code paths
                 @test target == sort([2, 3, 1], rev=rev, lt=lt, by=by, order=order)
+
+                @test target == sort(1:3, rev=rev, lt=lt, by=by, order=order)
+                @test target == sort(3:-1:1, rev=rev, lt=lt, by=by, order=order)
+                @test float(target) == sort(1.0:3, rev=rev, lt=lt, by=by, order=order)
+                @test float(target) == sort(3.0:-1:1, rev=rev, lt=lt, by=by, order=order)
             end
         end
     end
@@ -40,3 +43,17 @@ struct SomeOtherOrder <: Base.Order.Ordering end
 
 @test reverse(Forward) === Reverse
 @test reverse(Reverse) === Forward
+
+@test ord(isless, identity, false, Forward) === Forward
+@test ord(isless, identity, true, Forward) === Reverse
+@test ord(<, identity, false, Forward) === Lt(<)
+@test ord(isless, abs, false, Forward) === By(abs)
+@test ord(<, abs, false, Forward) === By(abs, Lt(<))
+@test ord(<, abs, true, Forward) === ReverseOrdering(By(abs, Lt(<)))
+@test ord(<, abs, true, Reverse) === By(abs, Lt(<))
+
+@testset "Base.Order docstrings" begin
+    undoc = Docs.undocumented_names(Base.Order)
+    @test_broken isempty(undoc)
+    @test undoc == [:DirectOrdering, :ForwardOrdering, :Order, :ordtype]
+end
diff --git a/test/osutils.jl b/test/osutils.jl
index 5e72675279cbc..9eb708b670298 100644
--- a/test/osutils.jl
+++ b/test/osutils.jl
@@ -29,6 +29,11 @@ using Libdl
     else
         @test Sys.windows_version() >= v"1.0.0-"
     end
+
+    # TODO: When we have a WSL CI, add a new test here `@test detectwsl()`
+    if !Sys.islinux()
+        @test !Sys.detectwsl()
+    end
 end
 
 @testset "@static" begin
diff --git a/test/parse.jl b/test/parse.jl
index 69092b2c4188d..e2b94a45cc446 100644
--- a/test/parse.jl
+++ b/test/parse.jl
@@ -296,6 +296,8 @@ end
         @test_throws ArgumentError parse(Complex{T}, bad)
     end
     @test_throws ArgumentError parse(Complex{Int}, "3 + 4.2im")
+    @test_throws ArgumentError parse(ComplexF64, "3 β+ 4im")
+    @test_throws ArgumentError parse(ComplexF64, "3 + 4αm")
 end
 
 @testset "parse and tryparse type inference" begin
diff --git a/test/path.jl b/test/path.jl
index 2f4f2d0983a58..54e4ef6fa3d20 100644
--- a/test/path.jl
+++ b/test/path.jl
@@ -65,7 +65,7 @@
         end
 
         if Sys.iswindows()
-            @test joinpath(S("foo"),S("bar:baz")) == "bar:baz"
+            @test joinpath(S("foo"),S("D:bar")) == "D:bar"
             @test joinpath(S("C:"),S("foo"),S("D:"),S("bar")) == "D:bar"
             @test joinpath(S("C:"),S("foo"),S("D:bar"),S("baz")) == "D:bar$(sep)baz"
 
@@ -181,6 +181,9 @@
                 ("\\\\servername.com\\hello.world","\\filename.ext")
             @test splitdrive(S("C:\\foo\\bar")) ==
                 ("C:","\\foo\\bar")
+            # only single characters followed by a colon are drives
+            @test splitdrive(S("foo:bar")) ==
+                ("", "foo:bar")
         end
 
         @test splitext(S("")) == ("", "")
@@ -311,6 +314,26 @@
         test_relpath()
     end
 
+    @testset "uripath" begin
+        host = if Sys.iswindows()
+            ""
+        elseif Sys.detectwsl()
+            distro = get(ENV, "WSL_DISTRO_NAME", "") # See <https://patrickwu.space/wslconf/>
+            "wsl%24/$distro" # See <https://github.com/microsoft/terminal/pull/14993> and <https://learn.microsoft.com/en-us/windows/wsl/filesystems>
+        else
+            gethostname()
+        end
+        sysdrive, uridrive = if Sys.iswindows() "C:\\", "C:/" else "/", "" end
+        @test Base.Filesystem.uripath("$(sysdrive)some$(sep)file.txt") == "file://$host/$(uridrive)some/file.txt"
+        @test Base.Filesystem.uripath("$(sysdrive)another$(sep)$(sep)folder$(sep)file.md") == "file://$host/$(uridrive)another/folder/file.md"
+        @test Base.Filesystem.uripath("$(sysdrive)some file with ^odd% chars") == "file://$host/$(uridrive)some%20file%20with%20%5Eodd%25%20chars"
+        @test Base.Filesystem.uripath("$(sysdrive)weird chars like @#&()[]{}") == "file://$host/$(uridrive)weird%20chars%20like%20%40%23%26%28%29%5B%5D%7B%7D"
+        @test Base.Filesystem.uripath("$sysdrive") == "file://$host/$uridrive"
+        @test Base.Filesystem.uripath(".") == Base.Filesystem.uripath(pwd())
+        @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)Δεδομένα") == "file://$host/$(uridrive)unicode/%CE%94%CE%B5%CE%B4%CE%BF%CE%BC%CE%AD%CE%BD%CE%B1"
+        @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)🧮🐛🔨") == "file://$host/$(uridrive)unicode/%F0%9F%A7%AE%F0%9F%90%9B%F0%9F%94%A8"
+    end
+
     if Sys.iswindows()
         @testset "issue #23646" begin
             @test lowercase(relpath("E:\\a\\b", "C:\\c")) == "e:\\a\\b"
diff --git a/test/precompile.jl b/test/precompile.jl
index 62d862c384040..f63fc5e631125 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -1,12 +1,14 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-original_depot_path = copy(Base.DEPOT_PATH)
-original_load_path = copy(Base.LOAD_PATH)
+using Test, Distributed, Random, Logging, Libdl
+using REPL # testing the doc lookup function should be outside of the scope of this file, but is currently tested here
 
-using Test, Distributed, Random
-using REPL # doc lookup function
+include("precompile_utils.jl")
+include("tempdepot.jl")
 
 Foo_module = :Foo4b3a94a1a081a8cb
+foo_incl_dep = :foo4b3a94a1a081a8cb
+bar_incl_dep = :bar4b3a94a1a081a8cb
 Foo2_module = :F2oo4b3a94a1a081a8cb
 FooBase_module = :FooBase4b3a94a1a081a8cb
 @eval module ConflictingBindings
@@ -16,35 +18,9 @@ FooBase_module = :FooBase4b3a94a1a081a8cb
 end
 using .ConflictingBindings
 
-function precompile_test_harness(@nospecialize(f), testset::String)
-    @testset "$testset" begin
-        precompile_test_harness(f, true)
-    end
-end
-function precompile_test_harness(@nospecialize(f), separate::Bool)
-    load_path = mktempdir()
-    load_cache_path = separate ? mktempdir() : load_path
-    try
-        pushfirst!(LOAD_PATH, load_path)
-        pushfirst!(DEPOT_PATH, load_cache_path)
-        f(load_path)
-    finally
-        try
-            rm(load_path, force=true, recursive=true)
-        catch err
-            @show err
-        end
-        if separate
-            try
-                rm(load_cache_path, force=true, recursive=true)
-            catch err
-                @show err
-            end
-        end
-        filter!((≠)(load_path), LOAD_PATH)
-        separate && filter!((≠)(load_cache_path), DEPOT_PATH)
-    end
-    nothing
+@testset "object_build_id" begin
+    @test Base.object_build_id([1]) === nothing
+    @test Base.object_build_id(Base) == Base.module_build_id(Base)
 end
 
 # method root provenance
@@ -102,6 +78,8 @@ precompile_test_harness(false) do dir
     Foo_file = joinpath(dir, "$Foo_module.jl")
     Foo2_file = joinpath(dir, "$Foo2_module.jl")
     FooBase_file = joinpath(dir, "$FooBase_module.jl")
+    foo_file = joinpath(dir, "$foo_incl_dep.jl")
+    bar_file = joinpath(dir, "$bar_incl_dep.jl")
 
     write(FooBase_file,
           """
@@ -115,6 +93,24 @@ precompile_test_harness(false) do dir
                   d = den(a)
                   return h
               end
+              abstract type AbstractAlgebraMap{A} end
+              struct GAPGroupHomomorphism{A, B} <: AbstractAlgebraMap{GAPGroupHomomorphism{B, A}} end
+
+              global process_state_calls::Int = 0
+              const process_state = Base.OncePerProcess{typeof(getpid())}() do
+                  @assert (global process_state_calls += 1) == 1
+                  return getpid()
+              end
+              const mypid = process_state()
+              @assert process_state_calls === 1
+              process_state_calls = 0
+              @assert process_state() === process_state()
+              @assert process_state_calls === 0
+
+              const empty_state = Base.OncePerProcess{Nothing}() do
+                  return nothing
+              end
+              @assert empty_state() === nothing
           end
           """)
     write(Foo2_file,
@@ -130,10 +126,11 @@ precompile_test_harness(false) do dir
     write(Foo_file,
           """
           module $Foo_module
-              import $FooBase_module, $FooBase_module.typeA
+              import $FooBase_module, $FooBase_module.typeA, $FooBase_module.GAPGroupHomomorphism
               import $Foo2_module: $Foo2_module, override, overridenc
               import $FooBase_module.hash
               import Test
+              public foo, Bar
               module Inner
                   import $FooBase_module.hash
                   using ..$Foo_module
@@ -147,10 +144,11 @@ precompile_test_harness(false) do dir
 
               # test that docs get reconnected
               @doc "foo function" foo(x) = x + 1
-              include_dependency("foo.jl")
-              include_dependency("foo.jl")
+              include_dependency("$foo_incl_dep.jl")
+              include_dependency("$foo_incl_dep.jl")
               module Bar
-                  include_dependency("bar.jl")
+                  public bar
+                  include_dependency("$bar_incl_dep.jl")
               end
               @doc "Bar module" Bar # this needs to define the META dictionary via eval
               @eval Bar @doc "bar function" bar(x) = x + 2
@@ -211,6 +209,8 @@ precompile_test_harness(false) do dir
               Base.convert(::Type{Some{Value18343}}, ::Value18343{Some}) = 2
               Base.convert(::Type{Ref}, ::Value18343{T}) where {T} = 3
 
+              const GAPType1 = GAPGroupHomomorphism{Nothing, Nothing}
+              const GAPType2 = GAPGroupHomomorphism{1, 2}
 
               # issue #28297
               mutable struct Result
@@ -239,11 +239,14 @@ precompile_test_harness(false) do dir
               gnc() = overridenc(1.0)
               Test.@test 1 < gnc() < 5 # compile this
 
-              const abigfloat_f() = big"12.34"
+              abigfloat_f() = big"12.34"
               const abigfloat_x = big"43.21"
-              const abigint_f() = big"123"
+              abigint_f() = big"123"
               const abigint_x = big"124"
 
+              # issue #51111
+              abigfloat_to_f32() = Float32(big"1.5")
+
               # issue #31488
               _v31488 = Base.StringVector(2)
               resize!(_v31488, 0)
@@ -262,12 +265,38 @@ precompile_test_harness(false) do dir
 
               # check that @ccallable works from precompiled modules
               Base.@ccallable Cint f35014(x::Cint) = x+Cint(1)
+              Base.@ccallable "f35014_other" f35014_2(x::Cint)::Cint = x+Cint(1)
 
               # check that Tasks work from serialized state
               ch1 = Channel(x -> nothing)
               ch2 = Channel(x -> (push!(x, 2); nothing), Inf)
+
+              # check that Memory aliasing is respected
+              a_vec_int = Int[]
+              push!(a_vec_int, 1, 2)
+              a_mat_int = reshape(a_vec_int, (1, 2))
+
+              a_vec_any = Any[]
+              push!(a_vec_any, 1, 2)
+              a_mat_any = reshape(a_vec_any, (1, 2))
+
+              a_vec_union = Union{Int,Nothing}[]
+              push!(a_vec_union, 1, 2)
+              a_mat_union = reshape(a_vec_union, (1, 2))
+
+              a_vec_inline = Pair{Int,Any}[]
+              push!(a_vec_inline, 1=>2, 3=>4)
+              a_mat_inline = reshape(a_vec_inline, (1, 2))
+
+              oid_vec_int = objectid(a_vec_int)
+              oid_mat_int = objectid(a_mat_int)
+
+              using $FooBase_module: process_state, mypid as FooBase_pid, process_state_calls
+              const mypid = process_state()
           end
           """)
+    # Issue #52063
+    touch(foo_file); touch(bar_file)
     # Issue #12623
     @test __precompile__(false) === nothing
 
@@ -299,6 +328,9 @@ precompile_test_harness(false) do dir
         @test Foo.abigint_f()::BigInt == big"123"
         @test Foo.abigint_x::BigInt + 1 == big"125"
 
+        # Issue #51111
+        @test Foo.abigfloat_to_f32() == 1.5f0
+
         @test Foo.x28297.result === missing
 
         @test Foo.d29936a === Dict
@@ -322,38 +354,69 @@ precompile_test_harness(false) do dir
         @test isready(Foo.ch2)
         @test take!(Foo.ch2) === 2
         @test !isready(Foo.ch2)
-    end
 
-    @eval begin function ccallable_test()
-        Base.llvmcall(
-        ("""declare i32 @f35014(i32)
-            define i32 @entry() {
-            0:
-                %1 = call i32 @f35014(i32 3)
-                ret i32 %1
-            }""", "entry"
-        ), Cint, Tuple{})
+        @test Foo.process_state_calls === 0
+        @test Foo.process_state() === getpid()
+        @test Foo.mypid !== getpid()
+        @test Foo.FooBase_pid !== getpid()
+        @test Foo.mypid !== Foo.FooBase_pid
+        @test Foo.process_state_calls === 1
     end
-    @test ccallable_test() == 4
+
+    let
+        @test Foo.a_vec_int == Int[1, 2]
+        @test Foo.a_mat_int == Int[1 2]
+        Foo.a_mat_int[1, 2] = 3
+        @test Foo.a_vec_int[2] === 3
+
+        @test Foo.a_vec_any == Int[1, 2]
+        @test Foo.a_mat_any == Int[1 2]
+        Foo.a_mat_any[1, 2] = 3
+        @test Foo.a_vec_any[2] === 3
+
+        @test Foo.a_vec_union == Union{Int,Nothing}[1, 2]
+        @test Foo.a_mat_union == Union{Int,Nothing}[1 2]
+        Foo.a_mat_union[1, 2] = 3
+        @test Foo.a_vec_union[2] === 3
+        Foo.a_mat_union[1, 2] = nothing
+        @test Foo.a_vec_union[2] === nothing
+
+        @test Foo.a_vec_inline == Pair{Int,Any}[1=>2, 3=>4]
+        @test Foo.a_mat_inline == Pair{Int,Any}[1=>2 3=>4]
+        Foo.a_mat_inline[1, 2] = 5=>6
+        @test Foo.a_vec_inline[2] === Pair{Int,Any}(5, 6)
+
+        @test objectid(Foo.a_vec_int) === Foo.oid_vec_int
+        @test objectid(Foo.a_mat_int) === Foo.oid_mat_int
+        @test Foo.oid_vec_int !== Foo.oid_mat_int
+        @test Base.object_build_id(Foo.a_vec_int) == Base.object_build_id(Foo.a_mat_int)
+        @test Base.object_build_id(Foo) == Base.module_build_id(Foo)
+        @test Base.object_build_id(Foo.a_vec_int) == Base.module_build_id(Foo)
     end
 
     cachedir = joinpath(dir, "compiled", "v$(VERSION.major).$(VERSION.minor)")
     cachedir2 = joinpath(dir2, "compiled", "v$(VERSION.major).$(VERSION.minor)")
     cachefile = joinpath(cachedir, "$Foo_module.ji")
+    @test isfile(cachefile)
     do_pkgimg = Base.JLOptions().use_pkgimages == 1 && Base.JLOptions().permalloc_pkgimg == 1
     if do_pkgimg ||  Base.JLOptions().use_pkgimages == 0
         if do_pkgimg
-            ocachefile = Base.ocachefile_from_cachefile(cachefile)
+            ocachefile = Base.ocachefile_from_cachefile(cachefile)::String
+            @test isfile(ocachefile)
+            let foo_ptr = Libdl.dlopen(ocachefile::String, RTLD_NOLOAD)
+                f35014_ptr = Libdl.dlsym(foo_ptr, :f35014)
+                @test ccall(f35014_ptr, Int32, (Int32,), 3) == 4
+                f35014_other_ptr = Libdl.dlsym(foo_ptr, :f35014_other)
+                @test ccall(f35014_other_ptr, Int32, (Int32,), 3) == 4
+            end
         else
             ocachefile = nothing
         end
-            # use _require_from_serialized to ensure that the test fails if
-            # the module doesn't reload from the image:
-        @test_warn "@ccallable was already defined for this method name" begin
-            @test_logs (:warn, "Replacing module `$Foo_module`") begin
-                m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile)
-                @test isa(m, Module)
-            end
+        # use _require_from_serialized to ensure that the test fails if
+        # the module doesn't reload from the image:
+        @test_logs (:warn, "Replacing module `$Foo_module`") begin
+            m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile, Foo_file)
+            @test isa(m, Module)
         end
     end
 
@@ -373,10 +436,10 @@ precompile_test_harness(false) do dir
         @test string(Base.Docs.doc(Foo.Bar.bar)) == "bar function\n"
         @test string(Base.Docs.doc(Foo.Bar)) == "Bar module\n"
 
-        modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
-        discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime)
+        modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
+        discard_module = mod_fl_mt -> mod_fl_mt.filename
         @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ]
-        @test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ]
+        @test map(x -> x.filename, deps) == [ Foo_file, joinpath("@depot", foo_file), joinpath("@depot", bar_file) ]
         @test requires == [ Base.PkgId(Foo) => Base.PkgId(string(FooBase_module)),
                             Base.PkgId(Foo) => Base.PkgId(Foo2),
                             Base.PkgId(Foo) => Base.PkgId(Test),
@@ -385,30 +448,39 @@ precompile_test_harness(false) do dir
         @test !isempty(srctxt) && srctxt == read(Foo_file, String)
         @test_throws ErrorException Base.read_dependency_src(cachefile, "/tmp/nonexistent.txt")
         # dependencies declared with `include_dependency` should not be stored
-        @test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, "foo.jl"))
+        @test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, foo_file))
 
         modules, deps1 = Base.cache_dependencies(cachefile)
-        @test Dict(modules) == merge(
+        modules_ok = merge(
             Dict(let m = Base.PkgId(s)
                     m => Base.module_build_id(Base.root_module(m))
                  end for s in
                  [ "Base", "Core", "Main",
-                   string(Foo2_module), string(FooBase_module) ]),
+                   string(Foo2_module), string(FooBase_module),]),
             # plus modules included in the system image
             Dict(let m = Base.root_module(Base, s)
                      Base.PkgId(m) => Base.module_build_id(m)
-                 end for s in
-                [:ArgTools, :Artifacts, :Base64, :CRC32c, :Dates,
-                 :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll,
-                 :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra,
-                 :Logging, :Markdown, :Mmap, :MozillaCACerts_jll, :NetworkOptions, :OpenBLAS_jll, :Pkg, :Printf,
-                 :p7zip_jll, :REPL, :Random, :SHA, :Serialization, :Sockets,
-                 :TOML, :Tar, :Test, :UUIDs, :Unicode,
-                 :nghttp2_jll]
-            ),
+                 end for s in [Symbol(x.name) for x in Base._sysimage_modules if !(x.name in ["Base", "Core", "Main"])]),
+            # plus test module,
+            Dict(Base.PkgId(Base.root_module(Base, :Test)) => Base.module_build_id(Base.root_module(Base, :Test))),
+            # plus dependencies of test module
+            Dict(Base.PkgId(Base.root_module(Base, :InteractiveUtils)) => Base.module_build_id(Base.root_module(Base, :InteractiveUtils))),
+            Dict(Base.PkgId(Base.root_module(Base, :Logging)) => Base.module_build_id(Base.root_module(Base, :Logging))),
+            Dict(Base.PkgId(Base.root_module(Base, :Random)) => Base.module_build_id(Base.root_module(Base, :Random))),
+            Dict(Base.PkgId(Base.root_module(Base, :Serialization)) => Base.module_build_id(Base.root_module(Base, :Serialization))),
+            # and their dependencies
+            Dict(Base.PkgId(Base.root_module(Base, :SHA)) => Base.module_build_id(Base.root_module(Base, :SHA))),
+            Dict(Base.PkgId(Base.root_module(Base, :Markdown)) => Base.module_build_id(Base.root_module(Base, :Markdown))),
+            Dict(Base.PkgId(Base.root_module(Base, :JuliaSyntaxHighlighting)) => Base.module_build_id(Base.root_module(Base, :JuliaSyntaxHighlighting))),
+            Dict(Base.PkgId(Base.root_module(Base, :StyledStrings)) => Base.module_build_id(Base.root_module(Base, :StyledStrings))),
+
+            # and their dependencies
+            Dict(Base.PkgId(Base.root_module(Base, :Base64)) => Base.module_build_id(Base.root_module(Base, :Base64))),
         )
+        @test Dict(modules) == modules_ok
+
         @test discard_module.(deps) == deps1
-        modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile; srcfiles_only=true)
+        modules, (_, deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
         @test map(x -> x.filename, deps) == [Foo_file]
 
         @test current_task()(0x01, 0x4000, 0x30031234) == 2
@@ -471,7 +543,7 @@ precompile_test_harness(false) do dir
         """)
     Nest = Base.require(Main, Nest_module)
     cachefile = joinpath(cachedir, "$Nest_module.ji")
-    modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
+    modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
     @test last(deps).modpath == ["NestInner"]
 
     UsesB_module = :UsesB4b3a94a1a081a8cb
@@ -493,7 +565,7 @@ precompile_test_harness(false) do dir
         """)
     UsesB = Base.require(Main, UsesB_module)
     cachefile = joinpath(cachedir, "$UsesB_module.ji")
-    modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
+    modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
     id1, id2 = only(requires)
     @test Base.pkgorigins[id1].cachepath == cachefile
     @test Base.pkgorigins[id2].cachepath == joinpath(cachedir, "$B_module.ji")
@@ -501,15 +573,58 @@ precompile_test_harness(false) do dir
     Baz_file = joinpath(dir, "Baz.jl")
     write(Baz_file,
           """
-          true && __precompile__(false)
+          haskey(Base.loaded_modules, Base.PkgId("UseBaz")) || __precompile__(false)
           module Baz
           baz() = 1
           end
           """)
 
     @test Base.compilecache(Base.PkgId("Baz")) == Base.PrecompilableError() # due to __precompile__(false)
+
+    OverwriteMethodError_file = joinpath(dir, "OverwriteMethodError.jl")
+    write(OverwriteMethodError_file,
+          """
+          module OverwriteMethodError
+              Base.:(+)(x::Bool, y::Bool) = false
+          end
+          """)
+
+    @test (@test_warn "overwritten in module OverwriteMethodError" Base.compilecache(Base.PkgId("OverwriteMethodError"))) == Base.PrecompilableError() # due to piracy
+
+    UseBaz_file = joinpath(dir, "UseBaz.jl")
+    write(UseBaz_file,
+          """
+          module UseBaz
+          biz() = 1
+          @assert haskey(Base.loaded_modules, Base.PkgId("UseBaz"))
+          @assert !haskey(Base.loaded_modules, Base.PkgId("Baz"))
+          using Baz
+          @assert haskey(Base.loaded_modules, Base.PkgId("Baz"))
+          buz() = 2
+          const generating = ccall(:jl_generating_output, Cint, ())
+          const incremental = Base.JLOptions().incremental
+          end
+          """)
+
+    @test Base.compilecache(Base.PkgId("UseBaz")) == Base.PrecompilableError() # due to __precompile__(false)
+    @eval using UseBaz
+    @test haskey(Base.loaded_modules, Base.PkgId("UseBaz"))
+    @test haskey(Base.loaded_modules, Base.PkgId("Baz"))
+    invokelatest() do
+        @test UseBaz.biz() === 1
+        @test UseBaz.buz() === 2
+        @test UseBaz.generating == 0
+        @test UseBaz.incremental == 0
+    end
     @eval using Baz
-    @test Base.invokelatest(Baz.baz) == 1
+    invokelatest() do
+        @test Baz.baz() === 1
+        @test Baz === UseBaz.Baz
+    end
+
+    # should not throw if the cachefile does not exist
+    @test !isfile("DoesNotExist.ji")
+    @test Base.stale_cachefile("", "DoesNotExist.ji") === true
 
     # Issue #12720
     FooBar1_file = joinpath(dir, "FooBar1.jl")
@@ -527,34 +642,36 @@ precompile_test_harness(false) do dir
           end
           """)
 
-    cachefile, _ = Base.compilecache(Base.PkgId("FooBar"))
+    cachefile, _ = @test_logs (:debug, r"Generating object cache file for FooBar") min_level=Logging.Debug match_mode=:any Base.compilecache(Base.PkgId("FooBar"))
     empty_prefs_hash = Base.get_preferences_hash(nothing, String[])
     @test cachefile == Base.compilecache_path(Base.PkgId("FooBar"), empty_prefs_hash)
     @test isfile(joinpath(cachedir, "FooBar.ji"))
-    Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String} : Tuple{<:Vector, Nothing}
+    Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String, UInt128} : Tuple{<:Vector, Nothing, UInt128}
     @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
     @test !isdefined(Main, :FooBar)
     @test !isdefined(Main, :FooBar1)
 
     relFooBar_file = joinpath(dir, "subfolder", "..", "FooBar.jl")
-    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String} : Bool) # `..` is not a symlink on Windows
+    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String, UInt128} : Bool) # `..` is not a symlink on Windows
     mkdir(joinpath(dir, "subfolder"))
     @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
 
     @eval using FooBar
-    fb_uuid = Base.module_build_id(FooBar)
+    fb_uuid = invokelatest(()->Base.module_build_id(FooBar))
     sleep(2); touch(FooBar_file)
     insert!(DEPOT_PATH, 1, dir2)
-    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true
+    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
     @eval using FooBar1
     @test !isfile(joinpath(cachedir2, "FooBar.ji"))
     @test !isfile(joinpath(cachedir, "FooBar1.ji"))
     @test isfile(joinpath(cachedir2, "FooBar1.ji"))
-    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true
+    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
     @test Base.stale_cachefile(FooBar1_file, joinpath(cachedir2, "FooBar1.ji")) isa Tsc
-    @test fb_uuid == Base.module_build_id(FooBar)
-    fb_uuid1 = Base.module_build_id(FooBar1)
-    @test fb_uuid != fb_uuid1
+    invokelatest() do
+        @test fb_uuid == Base.module_build_id(FooBar)
+        fb_uuid1 = Base.module_build_id(FooBar1)
+        @test fb_uuid != fb_uuid1
+    end
 
     # test checksum
     open(joinpath(cachedir2, "FooBar1.ji"), "a") do f
@@ -570,25 +687,19 @@ precompile_test_harness(false) do dir
           error("break me")
           end
           """)
-    @test_warn r"LoadError: break me\nStacktrace:\n \[1\] [\e01m\[]*error" try
-            Base.require(Main, :FooBar2)
-            error("the \"break me\" test failed")
-        catch exc
-            isa(exc, ErrorException) || rethrow()
-            occursin("ERROR: LoadError: break me", exc.msg) && rethrow()
-        end
+    @test_throws Base.Precompilation.PkgPrecompileError Base.require(Main, :FooBar2)
 
     # Test that trying to eval into closed modules during precompilation is an error
     FooBar3_file = joinpath(dir, "FooBar3.jl")
     FooBar3_inc = joinpath(dir, "FooBar3_inc.jl")
     write(FooBar3_inc, "x=1\n")
     for code in ["Core.eval(Base, :(x=1))", "Base.include(Base, \"FooBar3_inc.jl\")"]
-        write(FooBar3_file, code)
-        @test_warn "Evaluation into the closed module `Base` breaks incremental compilation" try
-                Base.require(Main, :FooBar3)
-            catch exc
-                isa(exc, ErrorException) || rethrow()
-            end
+        write(FooBar3_file, """
+        module FooBar3
+        $code
+        end
+        """)
+        @test_throws Base.Precompilation.PkgPrecompileError Base.require(Main, :FooBar3)
     end
 
     # Test transitive dependency for #21266
@@ -626,7 +737,6 @@ end
 
 # method root provenance & external code caching
 precompile_test_harness("code caching") do dir
-    Bid = rootid(Base)
     Cache_module = :Cacheb8321416e8a3e2f1
     # Note: calling setindex!(::Dict{K,V}, ::Any, ::K) adds both compression and codegen roots
     write(joinpath(dir, "$Cache_module.jl"),
@@ -635,10 +745,9 @@ precompile_test_harness("code caching") do dir
               struct X end
               struct X2 end
               @noinline function f(d)
-                  @noinline
-                  d[X()] = nothing
+                  @noinline d[X()] = nothing
               end
-              @noinline fpush(dest) = push!(dest, X())
+              @noinline fpush(dest) = @noinline push!(dest, X())
               function callboth()
                   f(Dict{X,Any}())
                   fpush(X[])
@@ -660,92 +769,86 @@ precompile_test_harness("code caching") do dir
     Base.compilecache(pkgid)
     @test Base.isprecompiled(pkgid)
     @eval using $Cache_module
-    M = getfield(@__MODULE__, Cache_module)
-    # Test that this cache file "owns" all the roots
+    M = invokelatest(getglobal, @__MODULE__, Cache_module)
     Mid = rootid(M)
-    for name in (:f, :fpush, :callboth)
-        func = getfield(M, name)
-        m = only(collect(methods(func)))
-        @test all(i -> root_provenance(m, i) == Mid, 1:length(m.roots))
-    end
-    # Check that we can cache external CodeInstances:
-    # length(::Vector) has an inferred specialization for `Vector{X}`
-    msize = which(length, (Vector{<:Any},))
-    hasspec = false
-    for mi in Base.specializations(msize)
-        if mi.specTypes == Tuple{typeof(length),Vector{Cacheb8321416e8a3e2f1.X}}
-            if (isdefined(mi, :cache) && isa(mi.cache, Core.CodeInstance) &&
-                mi.cache.max_world == typemax(UInt) && mi.cache.inferred !== nothing)
-                hasspec = true
-                break
+    invokelatest() do
+        # Test that this cache file "owns" all the roots
+        for name in (:f, :fpush, :callboth)
+            func = getglobal(M, name)
+            m = only(collect(methods(func)))
+            @test all(i -> root_provenance(m, i) == Mid, 1:length(m.roots))
+        end
+        # Check that we can cache external CodeInstances:
+        # length(::Vector) has an inferred specialization for `Vector{X}`
+        msize = which(length, (Vector{<:Any},))
+        hasspec = false
+        for mi in Base.specializations(msize)
+            if mi.specTypes == Tuple{typeof(length),Vector{Cacheb8321416e8a3e2f1.X}}
+                if (isdefined(mi, :cache) && isa(mi.cache, Core.CodeInstance) &&
+                    mi.cache.max_world == typemax(UInt) && mi.cache.inferred !== nothing)
+                    hasspec = true
+                    break
+                end
             end
         end
+        @test hasspec
+
+        # Check that internal methods and their roots are accounted appropriately
+        minternal = which(M.getelsize, (Vector,))
+        mi = minternal.specializations::Core.MethodInstance
+        @test mi.specTypes == Tuple{typeof(M.getelsize),Vector{Int32}}
+        ci = mi.cache
+        @test (codeunits(ci.inferred::String)[end]) === 0x01
+        @test ci.inferred !== nothing
+        # ...and that we can add "untracked" roots & non-relocatable CodeInstances to them too
+        Base.invokelatest() do
+            M.getelsize(M.X2[])
+        end
+        mispecs = minternal.specializations::Core.SimpleVector
+        @test mispecs[1] === mi
+        mi = mispecs[2]::Core.MethodInstance
+        mi.specTypes == Tuple{typeof(M.getelsize),Vector{M.X2}}
+        ci = mi.cache
+        @test (codeunits(ci.inferred::String)[end]) == 0x00
     end
-    @test hasspec
-    # Test that compilation adds to method roots with appropriate provenance
-    m = which(setindex!, (Dict{M.X,Any}, Any, M.X))
-    @test M.X ∈ m.roots
-    # Check that roots added outside of incremental builds get attributed to a moduleid of 0
-    Base.invokelatest() do
-        Dict{M.X2,Any}()[M.X2()] = nothing
-    end
-    @test M.X2 ∈ m.roots
-    groups = group_roots(m)
-    @test M.X ∈ groups[Mid]           # attributed to M
-    @test M.X2 ∈ groups[0]            # activate module is not known
-    @test !isempty(groups[Bid])
-    # Check that internal methods and their roots are accounted appropriately
-    minternal = which(M.getelsize, (Vector,))
-    mi = minternal.specializations::Core.MethodInstance
-    @test mi.specTypes == Tuple{typeof(M.getelsize),Vector{Int32}}
-    ci = mi.cache
-    @test ci.relocatability == 1
-    @test ci.inferred !== nothing
-    # ...and that we can add "untracked" roots & non-relocatable CodeInstances to them too
-    Base.invokelatest() do
-        M.getelsize(M.X2[])
-    end
-    mispecs = minternal.specializations::Core.SimpleVector
-    @test mispecs[1] === mi
-    mi = mispecs[2]::Core.MethodInstance
-    ci = mi.cache
-    @test ci.relocatability == 0
     # PkgA loads PkgB, and both add roots to the same `push!` method (both before and after loading B)
     Cache_module2 = :Cachea1544c83560f0c99
     write(joinpath(dir, "$Cache_module2.jl"),
           """
           module $Cache_module2
               struct Y end
-              @noinline f(dest) = push!(dest, Y())
+              @noinline f(dest) = @noinline push!(dest, Y())
               callf() = f(Y[])
               callf()
               using $(Cache_module)
               struct Z end
-              @noinline g(dest) = push!(dest, Z())
+              @noinline g(dest) = @noinline push!(dest, Z())
               callg() = g(Z[])
               callg()
           end
           """)
     Base.compilecache(Base.PkgId(string(Cache_module2)))
     @eval using $Cache_module2
-    M2 = getfield(@__MODULE__, Cache_module2)
-    M2id = rootid(M2)
-    dest = []
-    Base.invokelatest() do  # use invokelatest to see the results of loading the compile
-        M2.f(dest)
-        M.fpush(dest)
-        M2.g(dest)
-        @test dest == [M2.Y(), M.X(), M2.Z()]
-        @test M2.callf() == [M2.Y()]
-        @test M2.callg() == [M2.Z()]
-        @test M.fpush(M.X[]) == [M.X()]
+    invokelatest() do
+        M2 = getfield(@__MODULE__, Cache_module2)
+        M2id = rootid(M2)
+        dest = []
+        Base.invokelatest() do  # use invokelatest to see the results of loading the compile
+            M2.f(dest)
+            M.fpush(dest)
+            M2.g(dest)
+            @test dest == [M2.Y(), M.X(), M2.Z()]
+            @test M2.callf() == [M2.Y()]
+            @test M2.callg() == [M2.Z()]
+            @test M.fpush(M.X[]) == [M.X()]
+        end
+        mT = which(push!, (Vector{T} where T, Any))
+        groups = group_roots(mT)
+        @test Memory{M2.Y} ∈ groups[M2id]
+        @test Memory{M2.Z} ∈ groups[M2id]
+        @test Memory{M.X} ∈ groups[Mid]
+        @test Memory{M.X} ∉ groups[M2id]
     end
-    mT = which(push!, (Vector{T} where T, Any))
-    groups = group_roots(mT)
-    @test M2.Y ∈ groups[M2id]
-    @test M2.Z ∈ groups[M2id]
-    @test M.X ∈ groups[Mid]
-    @test M.X ∉ groups[M2id]
     # backedges of external MethodInstances
     # Root gets used by RootA and RootB, and both consumers end up inferring the same MethodInstance from Root
     # Do both callers get listed as backedges?
@@ -788,31 +891,33 @@ precompile_test_harness("code caching") do dir
     Base.compilecache(Base.PkgId(string(RootB)))
     @eval using $RootA
     @eval using $RootB
-    MA = getfield(@__MODULE__, RootA)
-    MB = getfield(@__MODULE__, RootB)
-    M = getfield(MA, RootModule)
-    m = which(M.f, (Any,))
-    for mi in Base.specializations(m)
-        mi === nothing && continue
-        mi = mi::Core.MethodInstance
-        if mi.specTypes.parameters[2] === Int8
-            # external callers
-            mods = Module[]
-            for be in mi.backedges
-                push!(mods, be.def.module)
-            end
-            @test MA ∈ mods
-            @test MB ∈ mods
-            @test length(mods) == 2
-        elseif mi.specTypes.parameters[2] === Int16
-            # internal callers
-            meths = Method[]
-            for be in mi.backedges
-                push!(meths, be.def)
+    invokelatest() do
+        MA = getfield(@__MODULE__, RootA)
+        MB = getfield(@__MODULE__, RootB)
+        M = getfield(MA, RootModule)
+        m = which(M.f, (Any,))
+        for mi in Base.specializations(m)
+            mi === nothing && continue
+            mi = mi::Core.MethodInstance
+            if mi.specTypes.parameters[2] === Int8
+                # external callers
+                mods = Module[]
+                for be in mi.backedges
+                    push!(mods, ((be.def::Core.MethodInstance).def::Method).module) # XXX
+                end
+                @test MA ∈ mods
+                @test MB ∈ mods
+                @test length(mods) == 2
+            elseif mi.specTypes.parameters[2] === Int16
+                # internal callers
+                meths = Method[]
+                for be in mi.backedges
+                    push!(meths, (be.def::Method).def) # XXX
+                end
+                @test which(M.g1, ()) ∈ meths
+                @test which(M.g2, ()) ∈ meths
+                @test length(meths) == 2
             end
-            @test which(M.g1, ()) ∈ meths
-            @test which(M.g2, ()) ∈ meths
-            @test length(meths) == 2
         end
     end
 
@@ -842,6 +947,23 @@ precompile_test_harness("code caching") do dir
         use_stale(c) = stale(c[1]) + not_stale("hello")
         build_stale(x) = use_stale(Any[x])
 
+        # bindings
+        struct InvalidatedBinding
+            x::Int
+        end
+        struct Wrapper
+            ib::InvalidatedBinding
+        end
+        makewib(x) = Wrapper(InvalidatedBinding(x))
+        const gib = makewib(1)
+        fib() = gib.ib.x
+
+        struct LogBindingInvalidation
+            x::Int
+        end
+        const glbi = LogBindingInvalidation(1)
+        flbi() = @__MODULE__().glbi.x
+
         # force precompilation
         build_stale(37)
         stale('c')
@@ -866,11 +988,15 @@ precompile_test_harness("code caching") do dir
         useA() = $StaleA.stale("hello")
         useA2() = useA()
 
-        # force precompilation
+        useflbi() = $StaleA.flbi()
+
+        # force precompilation, force call so that inlining heuristics don't affect the result
         begin
             Base.Experimental.@force_compile
-            useA2()
+            @noinline useA2()
+            @noinline useflbi()
         end
+        precompile($StaleA.fib, ())
 
         ## Reporting tests
         call_nbits(x::Integer) = $StaleA.nbits(x)
@@ -898,66 +1024,136 @@ precompile_test_harness("code caching") do dir
         Base.compilecache(Base.PkgId(string(pkg)))
     end
     @eval using $StaleA
-    MA = getfield(@__MODULE__, StaleA)
+    MA = invokelatest(getglobal, @__MODULE__, StaleA)
     Base.eval(MA, :(nbits(::UInt8) = 8))
+    Base.eval(MA, quote
+        struct InvalidatedBinding
+            x::Float64
+        end
+        struct Wrapper
+            ib::InvalidatedBinding
+        end
+        const gib = makewib(2.0)
+    end)
+    # TODO: test a "method_globalref" invalidation also
+    Base.eval(MA, quote
+        struct LogBindingInvalidation # binding invalidations can't be done during precompilation
+            x::Float64
+        end
+        const glbi = LogBindingInvalidation(2.0)
+    end)
     @eval using $StaleC
-    invalidations = ccall(:jl_debug_method_invalidation, Any, (Cint,), 1)
+    invalidations = Base.ReinferUtils.debug_method_invalidation(true)
     @eval using $StaleB
-    ccall(:jl_debug_method_invalidation, Any, (Cint,), 0)
-    MB = getfield(@__MODULE__, StaleB)
-    MC = getfield(@__MODULE__, StaleC)
-    world = Base.get_world_counter()
-    m = only(methods(MA.use_stale))
-    mi = m.specializations::Core.MethodInstance
-    @test hasvalid(mi, world)   # it was re-inferred by StaleC
-    m = only(methods(MA.build_stale))
-    mis = filter(!isnothing, collect(m.specializations::Core.SimpleVector))
-    @test length(mis) == 2
-    for mi in mis
-        mi = mi::Core.MethodInstance
-        if mi.specTypes.parameters[2] == Int
-            @test mi.cache.max_world < world
-        else
-            # The variant for String got "healed" by recompilation in StaleC
-            @test mi.specTypes.parameters[2] == String
-            @test mi.cache.max_world == typemax(UInt)
+    Base.ReinferUtils.debug_method_invalidation(false)
+    invokelatest() do
+        MB = getfield(@__MODULE__, StaleB)
+        MC = getfield(@__MODULE__, StaleC)
+        world = Base.get_world_counter()
+        m = only(methods(MA.use_stale))
+        mi = m.specializations::Core.MethodInstance
+        @test hasvalid(mi, world)   # it was re-inferred by StaleC
+        m = only(methods(MA.build_stale))
+        mis = filter(!isnothing, collect(m.specializations::Core.SimpleVector))
+        @test length(mis) == 2
+        for mi in mis
+            mi = mi::Core.MethodInstance
+            if mi.specTypes.parameters[2] == Int
+                @test mi.cache.max_world < world
+            else
+                # The variant for String got "healed" by recompilation in StaleC
+                @test mi.specTypes.parameters[2] == String
+                @test mi.cache.max_world == typemax(UInt)
+            end
         end
+        m = only(methods(MB.useA))
+        mi = m.specializations::Core.MethodInstance
+        @test !hasvalid(mi, world)      # invalidated by the stale(x::String) method in StaleC
+        m = only(methods(MC.call_buildstale))
+        mi = m.specializations::Core.MethodInstance
+        @test hasvalid(mi, world)       # was compiled with the new method
+        m = only(methods(MA.fib))
+        mi = m.specializations::Core.MethodInstance
+        @test !hasvalid(mi, world)      # invalidated by redefining `gib` before loading StaleB
+        @test MA.fib() === 2.0
+
+        # Reporting test (ensure SnoopCompile works)
+        @test all(i -> isassigned(invalidations, i), eachindex(invalidations))
+        m = only(methods(MB.call_nbits))
+        for mi in Base.specializations(m)
+            hv = hasvalid(mi, world)
+            @test mi.specTypes.parameters[end] === Integer ? !hv : hv
+        end
+
+        idxs = findall(==("verify_methods"), invalidations)
+        idxsbits = filter(idxs) do i
+            mi = invalidations[i-1]
+            mi.def.def === m
+        end
+        idx = only(idxsbits)
+        tagbad = invalidations[idx+1]
+        @test isa(tagbad, Core.CodeInstance)
+        j = findfirst(==(tagbad), invalidations)
+        @test invalidations[j-1] == "insert_backedges_callee"
+        @test isa(invalidations[j-2], Type)
+        @test isa(invalidations[j+1], Vector{Any}) # [nbits(::UInt8)]
+        m = only(methods(MB.useA2))
+        mi = only(Base.specializations(m))
+        @test !hasvalid(mi, world)
+        @test any(x -> x isa Core.CodeInstance && x.def === mi, invalidations)
+
+        idxb = findfirst(x -> x isa Core.Binding, invalidations)
+        @test invalidations[idxb+1] == "insert_backedges_callee"
+        idxv = findnext(==("verify_methods"), invalidations, idxb)
+        if invalidations[idxv-1].def.def.name === :getproperty
+            idxv = findnext(==("verify_methods"), invalidations, idxv+1)
+        end
+        idxv = findnext(==(invalidations[idxv-1]), invalidations, idxv+1)
+        @test invalidations[idxv-1] == "verify_methods"
+        @test invalidations[idxv-2].def.def.name === :useflbi
+
+        m = only(methods(MB.map_nbits))
+        @test !hasvalid(m.specializations::Core.MethodInstance, world+1) # insert_backedges invalidations also trigger their backedges
     end
-    m = only(methods(MB.useA))
-    mi = m.specializations::Core.MethodInstance
-    @test !hasvalid(mi, world)      # invalidated by the stale(x::String) method in StaleC
-    m = only(methods(MC.call_buildstale))
-    mi = m.specializations::Core.MethodInstance
-    @test hasvalid(mi, world)       # was compiled with the new method
-
-    # Reporting test (ensure SnoopCompile works)
-    @test all(i -> isassigned(invalidations, i), eachindex(invalidations))
-    m = only(methods(MB.call_nbits))
-    for mi in Base.specializations(m)
-        hv = hasvalid(mi, world)
-        @test mi.specTypes.parameters[end] === Integer ? !hv : hv
-    end
+end
 
-    setglobal!(Main, :inval, invalidations)
-    idxs = findall(==("verify_methods"), invalidations)
-    idxsbits = filter(idxs) do i
-        mi = invalidations[i-1]
-        mi.def == m
+precompile_test_harness("precompiletools") do dir
+    PrecompileToolsModule = :PCTb8321416e8a3e2f1
+    write(joinpath(dir, "$PrecompileToolsModule.jl"),
+        """
+        module $PrecompileToolsModule
+            struct MyType
+                x::Int
+            end
+
+            function call_findfirst(x, list)
+                # call a method defined in Base by runtime dispatch
+                return findfirst(==(Base.inferencebarrier(x)), Base.inferencebarrier(list))
+            end
+
+            let
+                ccall(:jl_tag_newly_inferred_enable, Cvoid, ())
+                call_findfirst(MyType(2), [MyType(1), MyType(2), MyType(3)])
+                ccall(:jl_tag_newly_inferred_disable, Cvoid, ())
+            end
+        end
+        """
+    )
+    pkgid = Base.PkgId(string(PrecompileToolsModule))
+    @test !Base.isprecompiled(pkgid)
+    Base.compilecache(pkgid)
+    @test Base.isprecompiled(pkgid)
+    @eval using $PrecompileToolsModule
+    M = invokelatest(getglobal, @__MODULE__, PrecompileToolsModule)
+    invokelatest() do
+        m = which(Tuple{typeof(findfirst), Base.Fix2{typeof(==), T}, Vector{T}} where T)
+        success = 0
+        for mi in Base.specializations(m)
+            sig = Base.unwrap_unionall(mi.specTypes)
+            success += sig.parameters[3] === Vector{M.MyType}
+        end
+        @test success == 1
     end
-    idx = only(idxsbits)
-    tagbad = invalidations[idx+1]
-    @test isa(tagbad, Int32)
-    j = findfirst(==(tagbad), invalidations)
-    @test invalidations[j-1] == "insert_backedges_callee"
-    @test isa(invalidations[j-2], Type)
-    @test isa(invalidations[j+1], Vector{Any}) # [nbits(::UInt8)]
-    m = only(methods(MB.useA2))
-    mi = only(Base.specializations(m))
-    @test !hasvalid(mi, world)
-    @test mi ∈ invalidations
-
-    m = only(methods(MB.map_nbits))
-    @test !hasvalid(m.specializations::Core.MethodInstance, world+1) # insert_backedges invalidations also trigger their backedges
 end
 
 precompile_test_harness("invoke") do dir
@@ -992,6 +1188,17 @@ precompile_test_harness("invoke") do dir
               f44320(::Any) = 2
               g44320() = invoke(f44320, Tuple{Any}, 0)
               g44320()
+              # Issue #57115
+              f57115(@nospecialize(::Any)) = error("unimplemented")
+              function g57115(@nospecialize(x))
+                  if @noinline rand(Bool)
+                      # Add an 'invoke' edge from 'foo' to 'bar'
+                      Core.invoke(f57115, Tuple{Any}, x)
+                  else
+                      # ... and also an identical 'call' edge
+                      @noinline f57115(x)
+                  end
+              end
 
               # Adding new specializations should not invalidate `invoke`s
               function getlast(itr)
@@ -1008,6 +1215,8 @@ precompile_test_harness("invoke") do dir
           """
           module $CallerModule
               using $InvokeModule
+              import $InvokeModule: f57115, g57115
+
               # involving external modules
               callf(x) = f(x)
               callg(x) = x < 5 ? g(x) : invoke(g, Tuple{Real}, x)
@@ -1028,26 +1237,29 @@ precompile_test_harness("invoke") do dir
 
               # Issue #44320
               f44320(::Real) = 3
+              # Issue #57115
+              f57115(::Int) = 1
 
               call_getlast(x) = getlast(x)
 
-              # force precompilation
+              # force precompilation, force call so that inlining heuristics don't affect the result
               begin
                   Base.Experimental.@force_compile
-                  callf(3)
-                  callg(3)
-                  callh(3)
-                  callq(3)
-                  callqi(3)
-                  callfnc(3)
-                  callgnc(3)
-                  callhnc(3)
-                  callqnc(3)
-                  callqnci(3)
-                  internal(3)
-                  internalnc(3)
-                  call_getlast([1,2,3])
+                  @noinline callf(3)
+                  @noinline callg(3)
+                  @noinline callh(3)
+                  @noinline callq(3)
+                  @noinline callqi(3)
+                  @noinline callfnc(3)
+                  @noinline callgnc(3)
+                  @noinline callhnc(3)
+                  @noinline callqnc(3)
+                  @noinline callqnci(3)
+                  @noinline internal(3)
+                  @noinline internalnc(3)
+                  @noinline call_getlast([1,2,3])
               end
+              precompile(g57115, (Any,))
 
               # Now that we've precompiled, invalidate with a new method that overrides the `invoke` dispatch
               $InvokeModule.h(x::Integer) = -1
@@ -1059,63 +1271,90 @@ precompile_test_harness("invoke") do dir
           """)
     Base.compilecache(Base.PkgId(string(CallerModule)))
     @eval using $InvokeModule: $InvokeModule
-    MI = getfield(@__MODULE__, InvokeModule)
+    MI = invokelatest(getglobal, @__MODULE__, InvokeModule)
     @eval $MI.getlast(a::UnitRange) = a.stop
     @eval using $CallerModule
-    M = getfield(@__MODULE__, CallerModule)
+    invokelatest() do
+        M = getfield(@__MODULE__, CallerModule)
+
+        get_method_for_type(func, @nospecialize(T)) = which(func, (T,)) # return the method func(::T)
+        function nvalid(mi::Core.MethodInstance)
+            isdefined(mi, :cache) || return 0
+            ci = mi.cache
+            n = Int(ci.max_world == typemax(UInt))
+            while isdefined(ci, :next)
+                ci = ci.next
+                n += ci.max_world == typemax(UInt)
+            end
+            return n
+        end
 
-    function get_method_for_type(func, @nospecialize(T))   # return the method func(::T)
-        for m in methods(func)
-            m.sig.parameters[end] === T && return m
+        for func in (M.f, M.g, M.internal, M.fnc, M.gnc, M.internalnc)
+            m = get_method_for_type(func, Real)
+            mi = m.specializations::Core.MethodInstance
+            @test length(mi.backedges) == 2 || length(mi.backedges) == 4 # internalnc might have a constprop edge
+            @test mi.backedges[1] === Tuple{typeof(func), Real}
+            @test isa(mi.backedges[2], Core.CodeInstance)
+            if length(mi.backedges) == 4
+                @test mi.backedges[3] === Tuple{typeof(func), Real}
+                @test isa(mi.backedges[4], Core.CodeInstance)
+                @test mi.backedges[2] !== mi.backedges[4]
+                @test mi.backedges[2].def === mi.backedges[4].def
+            end
+            @test mi.cache.max_world == typemax(mi.cache.max_world)
         end
-        error("no ::Real method found for $func")
-    end
-    function nvalid(mi::Core.MethodInstance)
-        isdefined(mi, :cache) || return 0
-        ci = mi.cache
-        n = Int(ci.max_world == typemax(UInt))
-        while isdefined(ci, :next)
-            ci = ci.next
-            n += ci.max_world == typemax(UInt)
+        for func in (M.q, M.qnc)
+            m = get_method_for_type(func, Integer)
+            mi = m.specializations::Core.MethodInstance
+            @test length(mi.backedges) == 2
+            @test mi.backedges[1] === Tuple{typeof(func), Integer}
+            @test isa(mi.backedges[2], Core.CodeInstance)
+            @test mi.cache.max_world == typemax(mi.cache.max_world)
         end
-        return n
-    end
 
-    for func in (M.f, M.g, M.internal, M.fnc, M.gnc, M.internalnc)
-        m = get_method_for_type(func, Real)
+        m = get_method_for_type(M.h, Real)
+        @test nvalid(m.specializations::Core.MethodInstance) == 1
+        m = get_method_for_type(M.hnc, Real)
+        @test nvalid(m.specializations::Core.MethodInstance) == 1
+        m = only(methods(M.callq))
+        @test nvalid(m.specializations::Core.MethodInstance) == 1
+        m = only(methods(M.callqnc))
+        @test nvalid(m.specializations::Core.MethodInstance) == 1
+        m = only(methods(M.callqi))
+        @test (m.specializations::Core.MethodInstance).specTypes == Tuple{typeof(M.callqi), Int}
+        m = only(methods(M.callqnci))
+        @test (m.specializations::Core.MethodInstance).specTypes == Tuple{typeof(M.callqnci), Int}
+
+        m = only(methods(M.g44320))
+        @test (m.specializations::Core.MethodInstance).cache.max_world == typemax(UInt)
+
+        m = only(methods(M.g57115))
         mi = m.specializations::Core.MethodInstance
-        @test length(mi.backedges) == 2
-        @test mi.backedges[1] === Tuple{typeof(func), Real}
-        @test isa(mi.backedges[2], Core.MethodInstance)
-        @test mi.cache.max_world == typemax(mi.cache.max_world)
-    end
-    for func in (M.q, M.qnc)
-        m = get_method_for_type(func, Integer)
-        mi = m.specializations::Core.MethodInstance
-        @test length(mi.backedges) == 2
-        @test mi.backedges[1] === Tuple{typeof(func), Integer}
-        @test isa(mi.backedges[2], Core.MethodInstance)
-        @test mi.cache.max_world == typemax(mi.cache.max_world)
-    end
 
-    m = get_method_for_type(M.h, Real)
-    @test isempty(Base.specializations(m))
-    m = get_method_for_type(M.hnc, Real)
-    @test isempty(Base.specializations(m))
-    m = only(methods(M.callq))
-    @test isempty(Base.specializations(m)) || nvalid(m.specializations::Core.MethodInstance) == 0
-    m = only(methods(M.callqnc))
-    @test isempty(Base.specializations(m)) || nvalid(m.specializations::Core.MethodInstance) == 0
-    m = only(methods(M.callqi))
-    @test (m.specializations::Core.MethodInstance).specTypes == Tuple{typeof(M.callqi), Int}
-    m = only(methods(M.callqnci))
-    @test (m.specializations::Core.MethodInstance).specTypes == Tuple{typeof(M.callqnci), Int}
-
-    m = only(methods(M.g44320))
-    @test (m.specializations::Core.MethodInstance).cache.max_world == typemax(UInt)
-
-    m = which(MI.getlast, (Any,))
-    @test (m.specializations::Core.MethodInstance).cache.max_world == typemax(UInt)
+        f_m = get_method_for_type(M.f57115, Any)
+        f_mi = f_m.specializations::Core.MethodInstance
+
+        # Make sure that f57115(::Any) has a 'call' backedge to 'g57115'
+        has_f_call_backedge = false
+        i = 1
+        while i ≤ length(f_mi.backedges)
+            if f_mi.backedges[i] isa DataType
+                # invoke edge - skip
+                i += 2
+            else
+                caller = f_mi.backedges[i]::Core.CodeInstance
+                if caller.def === mi
+                    has_f_call_backedge = true
+                    break
+                end
+                i += 1
+            end
+        end
+        @test has_f_call_backedge
+
+        m = which(MI.getlast, (Any,))
+        @test (m.specializations::Core.MethodInstance).cache.max_world == typemax(UInt)
+    end
 
     # Precompile specific methods for arbitrary arg types
     invokeme(x) = 1
@@ -1212,7 +1451,6 @@ precompile_test_harness("package_callbacks") do dir
               """)
         Base.compilecache(Base.PkgId("$(Test2_module)"))
 
-        @test !Base.isbindingresolved(Main, Test2_module)
         Base.require(Main, Test2_module)
         @test take!(loaded_modules) == Test1_module
         @test take!(loaded_modules) == Test2_module
@@ -1248,6 +1486,25 @@ precompile_test_harness("package_callbacks") do dir
     finally
         pop!(Base.package_callbacks)
     end
+    Test5_module = :Teste4095a85
+    write(joinpath(dir, "$(Test5_module).jl"),
+    """
+    module $(Test5_module)
+    end
+    """)
+    Base.compilecache(Base.PkgId("$(Test5_module)"))
+    cnt = 0
+    push!(Base.package_callbacks, _->(cnt += 1))
+    try
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @test cnt == 1
+    finally
+        pop!(Base.package_callbacks)
+    end
 end
 
 # Issue #19960
@@ -1255,11 +1512,11 @@ end
     test_workers = addprocs(1)
     push!(test_workers, myid())
     save_cwd = pwd()
-    temp_path = mktempdir()
+    temp_path = mkdepottempdir()
     try
         cd(temp_path)
         load_path = mktempdir(temp_path)
-        load_cache_path = mktempdir(temp_path)
+        load_cache_path = mkdepottempdir(temp_path)
 
         ModuleA = :Issue19960A
         ModuleB = :Issue19960B
@@ -1288,13 +1545,15 @@ end
         end
         try
             @eval using $ModuleB
-            uuid = Base.module_build_id(Base.root_module(Main, ModuleB))
-            for wid in test_workers
-                @test Distributed.remotecall_eval(Main, wid, quote
-                        Base.module_build_id(Base.root_module(Main, $(QuoteNode(ModuleB))))
-                    end) == uuid
-                if wid != myid() # avoid world-age errors on the local proc
-                    @test remotecall_fetch(g, wid) == wid
+            invokelatest() do
+                uuid = Base.module_build_id(Base.root_module(Main, ModuleB))
+                for wid in test_workers
+                    @test Distributed.remotecall_eval(Main, wid, quote
+                            Base.module_build_id(Base.root_module(Main, $(QuoteNode(ModuleB))))
+                        end) == uuid
+                    if wid != myid() # avoid world-age errors on the local proc
+                        @test remotecall_fetch(g, wid) == wid
+                    end
                 end
             end
         finally
@@ -1305,11 +1564,6 @@ end
         end
     finally
         cd(save_cwd)
-        try
-            rm(temp_path, recursive=true)
-        catch err
-            @show err
-        end
         pop!(test_workers) # remove myid
         rmprocs(test_workers)
     end
@@ -1415,21 +1669,28 @@ precompile_test_harness("Issue #26028") do load_path
         """
         module Foo26028
         module Bar26028
+            using Foo26028: Foo26028 as InnerFoo1
+            using ..Foo26028: Foo26028 as InnerFoo2
             x = 0
+            y = 0
         end
         function __init__()
-            include(joinpath(@__DIR__, "Baz26028.jl"))
+            Baz = @eval module Baz26028
+                  using Test
+                  public @test_throws
+                  import Foo26028.Bar26028.y as y1
+                  import ..Foo26028.Bar26028.y as y2
+                  end
+            @eval Base \$Baz.@test_throws(ConcurrencyViolationError("deadlock detected in loading Foo26028 using Foo26028"),
+                                         import Foo26028.Bar26028.x)
         end
         end
         """)
-    write(joinpath(load_path, "Baz26028.jl"),
-        """
-        module Baz26028
-        import Foo26028.Bar26028.x
-        end
-        """)
     Base.compilecache(Base.PkgId("Foo26028"))
     @test_nowarn @eval using Foo26028
+    invokelatest() do
+        @test Foo26028 === Foo26028.Bar26028.InnerFoo1 === Foo26028.Bar26028.InnerFoo2
+    end
 end
 
 precompile_test_harness("Issue #29936") do load_path
@@ -1442,7 +1703,9 @@ precompile_test_harness("Issue #29936") do load_path
           end
           """)
     @eval using Foo29936
-    @test [("Plan", Foo29936.m), ("Plan", Foo29936.h),] isa Vector{Tuple{String,Val}}
+    invokelatest() do
+        @test [("Plan", Foo29936.m), ("Plan", Foo29936.h),] isa Vector{Tuple{String,Val}}
+    end
 end
 
 precompile_test_harness("Issue #25971") do load_path
@@ -1587,14 +1850,15 @@ precompile_test_harness("Issue #46558") do load_path
     @test (@eval $Foo.foo(1)) == 2
 end
 
+# TODO: Decide if we need to keep supporting this.
 precompile_test_harness("issue #46296") do load_path
     write(joinpath(load_path, "CodeInstancePrecompile.jl"),
         """
         module CodeInstancePrecompile
 
         mi = first(Base.specializations(first(methods(identity))))
-        ci = Core.CodeInstance(mi, Any, nothing, nothing, zero(Int32), typemin(UInt),
-                               typemax(UInt), zero(UInt32), zero(UInt32), nothing, 0x00)
+        ci = Core.CodeInstance(mi, nothing, Any, Any, nothing, nothing, zero(Int32), typemin(UInt),
+                               typemax(UInt), zero(UInt32), nothing, Core.DebugInfo(mi), Core.svec())
 
         __init__() = @assert ci isa Core.CodeInstance
 
@@ -1604,6 +1868,12 @@ precompile_test_harness("issue #46296") do load_path
     (@eval (using CodeInstancePrecompile))
 end
 
+@testset "Precompile external abstract interpreter" begin
+    dir = @__DIR__
+    @test success(pipeline(Cmd(`$(Base.julia_cmd()) --startup-file=no precompile_absint1.jl`; dir); stdout, stderr))
+    @test success(pipeline(Cmd(`$(Base.julia_cmd()) --startup-file=no precompile_absint2.jl`; dir); stdout, stderr))
+end
+
 precompile_test_harness("Recursive types") do load_path
     write(joinpath(load_path, "RecursiveTypeDef.jl"),
         """
@@ -1618,8 +1888,10 @@ precompile_test_harness("Recursive types") do load_path
         """)
     Base.compilecache(Base.PkgId("RecursiveTypeDef"))
     (@eval (using RecursiveTypeDef))
-    a = Base.invokelatest(RecursiveTypeDef.A{Float64,2,String}, (3, 3))
-    @test isa(a, AbstractArray)
+    invokelatest() do
+        a = Base.invokelatest(RecursiveTypeDef.A{Float64,2,String}, (3, 3))
+        @test isa(a, AbstractArray)
+    end
 end
 
 @testset "issue 46778" begin
@@ -1643,7 +1915,9 @@ precompile_test_harness("Module tparams") do load_path
         """)
     Base.compilecache(Base.PkgId("ModuleTparams"))
     (@eval (using ModuleTparams))
-    @test ModuleTparams.the_struct === Base.invokelatest(ModuleTparams.ParamStruct{ModuleTparams.TheTParam})
+    invokelatest() do
+        @test ModuleTparams.the_struct === Base.invokelatest(ModuleTparams.ParamStruct{ModuleTparams.TheTParam})
+    end
 end
 
 precompile_test_harness("PkgCacheInspector") do load_path
@@ -1669,7 +1943,7 @@ precompile_test_harness("PkgCacheInspector") do load_path
         try
             # isvalid_cache_header returns checksum id or zero
             Base.isvalid_cache_header(io) == 0 && throw(ArgumentError("Invalid header in cache file $cachefile."))
-            depmodnames = Base.parse_cache_header(io)[3]
+            depmodnames = Base.parse_cache_header(io, cachefile)[3]
             Base.isvalid_file_crc(io) || throw(ArgumentError("Invalid checksum in cache file $cachefile."))
         finally
             close(io)
@@ -1687,17 +1961,25 @@ precompile_test_harness("PkgCacheInspector") do load_path
     end
 
     if ocachefile !== nothing
-        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring), ocachefile, depmods, true, "PCI")
+        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring, Cint),
+            ocachefile, depmods, #=completeinfo=#true, "PCI", false)
     else
-        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring), cachefile, depmods, true, "PCI")
+        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring),
+            cachefile, depmods, #=completeinfo=#true, "PCI")
     end
 
-    modules, init_order, external_methods, new_specializations, new_method_roots, external_targets, edges = sv
-    m = only(external_methods)
-    @test m.name == :repl_cmd && m.nargs < 2
-    @test any(new_specializations) do ci
-        mi = ci.def
-        mi.specTypes == Tuple{typeof(Base.repl_cmd), Int, String}
+    modules, init_order, internal_methods, new_method_roots, cache_sizes = sv
+    for m in internal_methods::Vector{Any}
+        m isa Core.MethodInstance || continue
+        m = m.func::Method
+        if m.name !== :f
+            @test m.name == :repl_cmd && m.nargs == 1
+        end
+    end
+    @test any(internal_methods) do ci
+        ci isa Core.CodeInstance || return false
+        mi = ci.def::Core.MethodInstance
+        return mi.specTypes == Tuple{typeof(Base.repl_cmd), Int, String}
     end
 end
 
@@ -1715,7 +1997,9 @@ precompile_test_harness("DynamicExpressions") do load_path
         """)
     Base.compilecache(Base.PkgId("Float16MWE"))
     @eval using Float16MWE
-    @test @invokelatest(Float16MWE.doconvert(Float16MWE.Node{Float16}, -1.2)) === Float16(-1.2)
+    invokelatest() do
+        @test Float16MWE.doconvert(Float16MWE.Node{Float16}, -1.2) === Float16(-1.2)
+    end
 end
 
 precompile_test_harness("BadInvalidations") do load_path
@@ -1730,7 +2014,9 @@ precompile_test_harness("BadInvalidations") do load_path
     Base.compilecache(Base.PkgId("BadInvalidations"))
     @eval Base a_method_to_overwrite_in_test() = inferencebarrier(2)
     @eval using BadInvalidations
-    @test Base.invokelatest(BadInvalidations.getval) === 2
+    invokelatest() do
+        @test BadInvalidations.getval() === 2
+    end
 end
 
 # https://github.com/JuliaLang/julia/issues/48074
@@ -1766,12 +2052,638 @@ precompile_test_harness("Issue #48391") do load_path
         """)
     ji, ofile = Base.compilecache(Base.PkgId("I48391"))
     @eval using I48391
-    x = Base.invokelatest(I48391.SurrealFinite)
+    x = invokelatest(()->I48391.SurrealFinite())
     @test Base.invokelatest(isless, x, x) === "good"
     @test_throws ErrorException isless(x, x)
 end
 
-empty!(Base.DEPOT_PATH)
-append!(Base.DEPOT_PATH, original_depot_path)
-empty!(Base.LOAD_PATH)
-append!(Base.LOAD_PATH, original_load_path)
+precompile_test_harness("Generator nospecialize") do load_path
+    write(joinpath(load_path, "GenNoSpec.jl"),
+        """
+        module GenNoSpec
+        @generated function f(x...)
+            :((\$(Base.Meta.quot(x)),))
+        end
+        @assert precompile(Tuple{typeof(which(f, (Any,Any)).generator.gen), Any, Any})
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("GenNoSpec"))
+    @eval using GenNoSpec
+end
+
+precompile_test_harness("Issue #50538") do load_path
+    write(joinpath(load_path, "I50538.jl"),
+        """
+        module I50538
+        const newglobal = try
+            eval(Expr(:global, GlobalRef(Base, :newglobal)))
+        catch ex
+            ex isa ErrorException || rethrow()
+            ex
+        end
+        const newtype = try
+            Core.eval(Base, :(global newglobal::Any))
+        catch ex
+            ex isa ErrorException || rethrow()
+            ex
+        end
+        global undefglobal::Any
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("I50538"))
+    @eval using I50538
+    invokelatest() do
+        @test I50538.newglobal.msg == "Creating a new global in closed module `Base` (`newglobal`) breaks incremental compilation because the side effects will not be permanent."
+        @test I50538.newtype.msg == "Evaluation into the closed module `Base` breaks incremental compilation because the side effects will not be permanent. This is likely due to some other module mutating `Base` with `eval` during precompilation - don't do this."
+        @test_throws(ErrorException("cannot set type for global I50538.undefglobal. It already has a value or is already set to a different type."),
+                    Core.eval(I50538, :(global undefglobal::Int)))
+        Core.eval(I50538, :(global undefglobal::Any))
+        invokelatest() do
+            @test Core.get_binding_type(I50538, :undefglobal) === Any
+            @test !isdefined(I50538, :undefglobal)
+        end
+    end
+end
+
+precompile_test_harness("Test flags") do load_path
+    write(joinpath(load_path, "TestFlags.jl"),
+          """
+          module TestFlags
+          end
+          """)
+
+    current_flags = Base.CacheFlags()
+    modified_flags = Base.CacheFlags(
+        current_flags.use_pkgimages,
+        current_flags.debug_level,
+        2,
+        current_flags.inline,
+        3
+    )
+    ji, ofile = Base.compilecache(Base.PkgId("TestFlags"); flags=`--check-bounds=no -O3`)
+    open(ji, "r") do io
+        Base.isvalid_cache_header(io)
+        _, _, _, _, _, _, _, flags = Base.parse_cache_header(io, ji)
+        cacheflags = Base.CacheFlags(flags)
+        @test cacheflags.check_bounds == 2
+        @test cacheflags.opt_level == 3
+    end
+end
+
+if Base.get_bool_env("CI", false) && (Sys.ARCH === :x86_64 || Sys.ARCH === :aarch64)
+    @testset "Multiversioning" begin # This test isn't the most robust because it relies on being in CI,
+        pkg = Base.identify_package("Test")  # but we need better target reflection to make a better one.
+        cachefiles = Base.find_all_in_cache_path(pkg)
+        pkgpath = Base.locate_package(pkg)
+        idx = findfirst(cachefiles) do cf
+            Base.stale_cachefile(pkgpath, cf) !== true
+        end
+        targets = Base.parse_image_targets(Base.parse_cache_header(cachefiles[idx])[7])
+        @test length(targets) > 1
+    end
+end
+
+precompile_test_harness("No backedge precompile") do load_path
+    # Test that the system doesn't accidentally forget to revalidate a method without backedges
+    write(joinpath(load_path, "NoBackEdges.jl"),
+          """
+          module NoBackEdges
+          @eval f(a::Int, b::Int) = \$(Core.Intrinsics.add_int)(a, b)
+          precompile(f, (Int, Int))
+          end
+          """)
+    ji, ofile = Base.compilecache(Base.PkgId("NoBackEdges"))
+    @eval using NoBackEdges
+    invokelatest() do
+        @test first(methods(NoBackEdges.f)).specializations.cache.max_world === typemax(UInt)
+    end
+end
+
+precompile_test_harness("Pre-compile Core methods") do load_path
+    # Core methods should support pre-compilation as external CI's like anything else
+    # https://github.com/JuliaLang/julia/issues/58497
+    write(joinpath(load_path, "CorePrecompilation.jl"),
+          """
+          module CorePrecompilation
+          struct Foo end
+          precompile(Tuple{Type{Vector{Foo}}, UndefInitializer, Tuple{Int}})
+          end
+          """)
+    ji, ofile = Base.compilecache(Base.PkgId("CorePrecompilation"))
+    @eval using CorePrecompilation
+    invokelatest() do
+        let tt = Tuple{Type{Vector{CorePrecompilation.Foo}}, UndefInitializer, Tuple{Int}},
+            match = first(Base._methods_by_ftype(tt, -1, Base.get_world_counter())),
+            mi = Base.specialize_method(match)
+            @test isdefined(mi, :cache)
+            @test mi.cache.max_world === typemax(UInt)
+            @test mi.cache.invoke != C_NULL
+        end
+    end
+end
+
+# Test precompilation of generated functions that return opaque closures
+# (with constprop marker set to false).
+precompile_test_harness("Generated Opaque") do load_path
+    write(joinpath(load_path, "GeneratedOpaque.jl"),
+        """
+        module GeneratedOpaque
+        using Base.Experimental: @opaque
+        using InteractiveUtils
+        const_int_barrier() = Base.inferencebarrier(1)::typeof(1)
+        const lno = LineNumberNode(1, :none)
+
+        const ci = @code_lowered const_int_barrier()
+        @generated function oc_re_generated_no_partial()
+            Expr(:new_opaque_closure, Tuple{}, Any, Any, false,
+                Expr(:opaque_closure_method, nothing, 0, false, lno, ci))
+        end
+        @assert oc_re_generated_no_partial()() === 1
+        @generated function oc_re_generated_no_partial_macro()
+            AT = nothing
+            RT = nothing
+            allow_partial = false # makes this legal to generate during pre-compile
+            return Expr(:opaque_closure, AT, RT, RT, allow_partial, :(()->const_int_barrier()))
+        end
+        @assert oc_re_generated_no_partial_macro()() === 1
+        end
+        """)
+    Base.compilecache(Base.PkgId("GeneratedOpaque"))
+    @eval using GeneratedOpaque
+    let oc = invokelatest(()->GeneratedOpaque.oc_re_generated_no_partial())
+        @test oc.source.specializations.cache.max_world === typemax(UInt)
+        @test oc() === 1
+    end
+end
+
+precompile_test_harness("Issue #52063") do load_path
+    fname = joinpath(load_path, "i_do_not_exist.jl")
+    @test try
+        include_dependency(fname); false
+    catch e
+        @test e isa SystemError
+        @test e.prefix == "opening file or folder $(repr(fname))"
+        true
+    end
+    touch(fname)
+    @test include_dependency(fname) === nothing
+    chmod(fname, 0x000)
+    @test try
+        include_dependency(fname); false
+    catch e
+        @test e isa SystemError
+        @test e.prefix == "opening file or folder $(repr(fname))"
+        true
+    end skip = (Sys.isunix() && Libc.geteuid() == 0)
+    dir = mktempdir() do dir
+        @test include_dependency(dir) === nothing
+        chmod(dir, 0x000)
+        @test try
+             include_dependency(dir); false
+        catch e
+            @test e isa SystemError
+            @test e.prefix == "opening file or folder $(repr(dir))"
+            true
+        end skip = (Sys.isunix() && Libc.geteuid() == 0)
+        dir
+    end
+    @test try
+        include_dependency(dir); false
+    catch e
+        @test e isa SystemError
+        @test e.prefix == "opening file or folder $(repr(dir))"
+        true
+    end
+end
+
+precompile_test_harness("Binding Unique") do load_path
+    write(joinpath(load_path, "UniqueBinding1.jl"),
+        """
+        module UniqueBinding1
+            export x
+            global x = 1
+        end
+        """)
+    write(joinpath(load_path, "UniqueBinding2.jl"),
+        """
+        module UniqueBinding2
+            using UniqueBinding1
+            const thebinding = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding1, :x, true)
+            const thebinding2 = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), @__MODULE__, :thebinding, true)
+        end
+        """)
+
+    @eval using UniqueBinding1
+    @eval using UniqueBinding2
+    invokelatest() do
+        @test UniqueBinding2.thebinding === ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding1, :x, true)
+        @test UniqueBinding2.thebinding2 === ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding2, :thebinding, true)
+    end
+end
+
+precompile_test_harness("Detecting importing outside of a package module") do load_path
+    io = IOBuffer()
+    write(joinpath(load_path, "ImportBeforeMod.jl"),
+    """
+    import Printf
+    module ImportBeforeMod
+    end #module
+    """)
+    @test_throws r"Failed to precompile ImportBeforeMod" Base.compilecache(Base.identify_package("ImportBeforeMod"), io, io)
+    @test occursin(
+        "`using/import Printf` outside of a Module detected. Importing a package outside of a module is not allowed during package precompilation.",
+        String(take!(io)))
+
+
+    write(joinpath(load_path, "HarmlessComments.jl"),
+    """
+    # import Printf
+    #=
+    import Printf
+    =#
+    module HarmlessComments
+    end #module
+    # import Printf
+    #=
+    import Printf
+    =#
+    """)
+    Base.compilecache(Base.identify_package("HarmlessComments"))
+
+
+    write(joinpath(load_path, "ImportAfterMod.jl"), """
+    module ImportAfterMod
+    end #module
+    import Printf
+    """)
+    @test_throws r"Failed to precompile ImportAfterMod" Base.compilecache(Base.identify_package("ImportAfterMod"), io, io)
+    @test occursin(
+        "`using/import Printf` outside of a Module detected. Importing a package outside of a module is not allowed during package precompilation.",
+        String(take!(io)))
+end
+
+precompile_test_harness("No package module") do load_path
+    io = IOBuffer()
+    write(joinpath(load_path, "NoModule.jl"),
+    """
+    1
+    """)
+    @test_throws r"Failed to precompile NoModule" Base.compilecache(Base.identify_package("NoModule"), io, io)
+    @test occursin(
+        "package `NoModule` did not define the expected module `NoModule`, check for typos in package module name",
+        String(take!(io)))
+
+
+    write(joinpath(load_path, "WrongModuleName.jl"),
+    """
+    module DifferentName
+    x = 1
+    end #module
+    """)
+    @test_throws r"Failed to precompile WrongModuleName" Base.compilecache(Base.identify_package("WrongModuleName"), io, io)
+    @test occursin(
+        "package `WrongModuleName` did not define the expected module `WrongModuleName`, check for typos in package module name",
+        String(take!(io)))
+
+
+    write(joinpath(load_path, "NoModuleWithImport.jl"), """
+    import Printf
+    """)
+    @test_throws r"Failed to precompile NoModuleWithImport" Base.compilecache(Base.identify_package("NoModuleWithImport"), io, io)
+    @test occursin(
+        "`using/import Printf` outside of a Module detected. Importing a package outside of a module is not allowed during package precompilation.",
+        String(take!(io)))
+end
+
+precompile_test_harness("Constprop CodeInstance invalidation") do load_path
+    write(joinpath(load_path, "DefineTheMethod.jl"),
+        """
+        module DefineTheMethod
+            export the_method
+            the_method_val(::Val{x}) where {x} = x
+            the_method_val(::Val{1}) = 0xdeadbeef
+            the_method_val(::Val{2}) = 2
+            the_method_val(::Val{3}) = 3
+            the_method_val(::Val{4}) = 4
+            the_method_val(::Val{5}) = 5
+            Base.@constprop :aggressive the_method(x) = the_method_val(Val{x}())
+            the_method(2)
+        end
+        """)
+    Base.compilecache(Base.PkgId("DefineTheMethod"))
+    write(joinpath(load_path, "CallTheMethod.jl"),
+        """
+        module CallTheMethod
+            using DefineTheMethod
+            call_the_method() = the_method(1)
+            call_the_method()
+        end
+        """)
+    Base.compilecache(Base.PkgId("CallTheMethod"))
+    @eval using DefineTheMethod
+    @eval using CallTheMethod
+    @eval DefineTheMethod.the_method_val(::Val{1}) = Int(0)
+    invokelatest() do
+        @test Int(0) == CallTheMethod.call_the_method()
+    end
+end
+
+precompile_test_harness("llvmcall validation") do load_path
+    write(joinpath(load_path, "LLVMCall.jl"),
+        """
+        module LLVMCall
+        using Base: llvmcall
+        @noinline do_llvmcall() = llvmcall("ret i32 0", UInt32, Tuple{})
+        do_llvmcall2() = do_llvmcall()
+        do_llvmcall2()
+        end
+        """)
+    # Also test with --pkgimages=no
+    testcode = """
+        insert!(LOAD_PATH, 1, $(repr(load_path)))
+        insert!(DEPOT_PATH, 1, $(repr(load_path)))
+        using LLVMCall
+        LLVMCall.do_llvmcall2()
+    """
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --pkgimages=no -E $(testcode)`) == repr(UInt32(0))
+    # Now the regular way
+    @eval using LLVMCall
+    invokelatest() do
+        @test LLVMCall.do_llvmcall2() == UInt32(0)
+        @test first(methods(LLVMCall.do_llvmcall)).specializations.cache.max_world === typemax(UInt)
+    end
+end
+
+precompile_test_harness("BindingReplaceDisallow") do load_path
+    write(joinpath(load_path, "BindingReplaceDisallow.jl"),
+        """
+        module BindingReplaceDisallow
+        const sinreplace = try
+            eval(Expr(:block,
+                Expr(:const, GlobalRef(Base, :sin), 1),
+                nothing))
+        catch ex
+            ex isa ErrorException || rethrow()
+            ex
+        end
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("BindingReplaceDisallow"))
+    @eval using BindingReplaceDisallow
+    invokelatest() do
+        @test BindingReplaceDisallow.sinreplace.msg == "Creating a new global in closed module `Base` (`sin`) breaks incremental compilation because the side effects will not be permanent."
+    end
+end
+
+precompile_test_harness("MainImportDisallow") do load_path
+    write(joinpath(load_path, "MainImportDisallow.jl"),
+        """
+        module MainImportDisallow
+            const importvar = try
+                import Base.Main: cant_get_at_me
+            catch ex
+                ex isa ErrorException || rethrow()
+                ex
+            end
+            const usingmain = try
+                using Base.Main
+            catch ex
+                ex isa ErrorException || rethrow()
+                ex
+            end
+            # Import `Main` is permitted, because it does not look at bindings inside `Main`
+            import Base.Main
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("MainImportDisallow"))
+    @eval using MainImportDisallow
+    invokelatest() do
+        @test MainImportDisallow.importvar.msg == "Any `import` or `using` from `Main` is prohibited during incremental compilation."
+        @test MainImportDisallow.usingmain.msg == "Any `import` or `using` from `Main` is prohibited during incremental compilation."
+    end
+end
+
+precompile_test_harness("Package top-level load itself") do load_path
+    write(joinpath(load_path, "UsingSelf.jl"),
+        """
+        __precompile__(false)
+        module UsingSelf
+        using UsingSelf
+        x = 3
+        end
+          """)
+    @eval using UsingSelf
+    invokelatest() do
+        @test UsingSelf.x == 3
+    end
+end
+
+precompile_test_harness("Package precompilation works without manifest") do load_path
+    pkg_dir = joinpath(load_path, "TestPkgNoManifest")
+    mkpath(pkg_dir)
+
+    # Create Project.toml with stdlib dependencies
+    write(joinpath(pkg_dir, "Project.toml"), """
+        name = "TestPkgNoManifest"
+        uuid = "f47a8e44-5f82-4c5c-9076-4b4e8b7e8e8e"
+        version = "0.1.0"
+
+        [deps]
+        Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+        Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+        """)
+
+    # Create src directory and main module file
+    src_dir = joinpath(pkg_dir, "src")
+    mkpath(src_dir)
+    write(joinpath(src_dir, "TestPkgNoManifest.jl"), """
+        module TestPkgNoManifest
+        end
+        """)
+
+    old_active_project = Base.active_project()
+    try
+        # Activate the new package environment
+        Base.set_active_project(joinpath(pkg_dir, "Project.toml"))
+
+        # Ensure there's no manifest file (this is the key to the test)
+        manifest_path = joinpath(pkg_dir, "Manifest.toml")
+        isfile(manifest_path) && rm(manifest_path)
+
+        # This should work without errors - precompiling a package with no manifest
+        @eval using TestPkgNoManifest
+    finally
+        # Restore original load path and active project
+        Base.set_active_project(old_active_project)
+    end
+end
+
+# Verify that inference / caching was not performed for any macros in the sysimage
+let m = only(methods(Base.var"@big_str"))
+    @test m.specializations === Core.svec() || !isdefined(m.specializations, :cache)
+end
+
+# Issue #58841 - make sure we don't accidentally throw away code for inference
+let io = IOBuffer()
+    run(pipeline(`$(Base.julia_cmd()) --startup-file=no --trace-compile=stderr -e 'f() = sin(1.) == 0. ? 1 : 0; exit(f())'`, stderr=io))
+    @test isempty(String(take!(io)))
+end
+
+# Test --compiled-modules=strict in precompilepkgs
+@testset "compiled-modules=strict with dependencies" begin
+    mkdepottempdir() do depot
+        # Create three packages: one that fails to precompile, one that loads it, one that doesn't
+        project_path = joinpath(depot, "testenv")
+        mkpath(project_path)
+
+        # Create FailPkg - a package that can't be precompiled
+        fail_pkg_path = joinpath(depot, "dev", "FailPkg")
+        mkpath(joinpath(fail_pkg_path, "src"))
+        write(joinpath(fail_pkg_path, "Project.toml"),
+              """
+              name = "FailPkg"
+              uuid = "10000000-0000-0000-0000-000000000001"
+              version = "0.1.0"
+              """)
+        write(joinpath(fail_pkg_path, "src", "FailPkg.jl"),
+              """
+              module FailPkg
+              print("Now FailPkg is running.\n")
+              error("expected fail")
+              end
+              """)
+
+        # Create LoadsFailPkg - depends on and loads FailPkg (should fail with strict)
+        loads_pkg_path = joinpath(depot, "dev", "LoadsFailPkg")
+        mkpath(joinpath(loads_pkg_path, "src"))
+        write(joinpath(loads_pkg_path, "Project.toml"),
+              """
+              name = "LoadsFailPkg"
+              uuid = "20000000-0000-0000-0000-000000000002"
+              version = "0.1.0"
+
+              [deps]
+              FailPkg = "10000000-0000-0000-0000-000000000001"
+              """)
+        write(joinpath(loads_pkg_path, "src", "LoadsFailPkg.jl"),
+              """
+              module LoadsFailPkg
+              print("Now LoadsFailPkg is running.\n")
+              import FailPkg
+              print("unreachable\n")
+              end
+              """)
+
+        # Create DependsOnly - depends on FailPkg but doesn't load it (should succeed)
+        depends_pkg_path = joinpath(depot, "dev", "DependsOnly")
+        mkpath(joinpath(depends_pkg_path, "src"))
+        write(joinpath(depends_pkg_path, "Project.toml"),
+              """
+              name = "DependsOnly"
+              uuid = "30000000-0000-0000-0000-000000000003"
+              version = "0.1.0"
+
+              [deps]
+              FailPkg = "10000000-0000-0000-0000-000000000001"
+              """)
+        write(joinpath(depends_pkg_path, "src", "DependsOnly.jl"),
+              """
+              module DependsOnly
+              # Has FailPkg as a dependency but doesn't load it
+              print("Now DependsOnly is running.\n")
+              end
+              """)
+
+        # Create main project with all packages
+        write(joinpath(project_path, "Project.toml"),
+              """
+              [deps]
+              LoadsFailPkg = "20000000-0000-0000-0000-000000000002"
+              DependsOnly = "30000000-0000-0000-0000-000000000003"
+              """)
+        write(joinpath(project_path, "Manifest.toml"),
+              """
+              julia_version = "1.13.0"
+              manifest_format = "2.0"
+
+              [[DependsOnly]]
+              deps = ["FailPkg"]
+              uuid = "30000000-0000-0000-0000-000000000003"
+              version = "0.1.0"
+
+              [[FailPkg]]
+              uuid = "10000000-0000-0000-0000-000000000001"
+              version = "0.1.0"
+
+              [[LoadsFailPkg]]
+              deps = ["FailPkg"]
+              uuid = "20000000-0000-0000-0000-000000000002"
+              version = "0.1.0"
+
+              [[deps.DependsOnly]]
+              deps = ["FailPkg"]
+              path = "../dev/DependsOnly/"
+              uuid = "30000000-0000-0000-0000-000000000003"
+              version = "0.1.0"
+
+              [[deps.FailPkg]]
+              path = "../dev/FailPkg/"
+              uuid = "10000000-0000-0000-0000-000000000001"
+              version = "0.1.0"
+
+              [[deps.LoadsFailPkg]]
+              deps = ["FailPkg"]
+              path = "../dev/LoadsFailPkg/"
+              uuid = "20000000-0000-0000-0000-000000000002"
+              version = "0.1.0"
+              """)
+
+        # Call precompilepkgs with output redirected to a file
+        LoadsFailPkg_output = joinpath(depot, "LoadsFailPkg_output.txt")
+        DependsOnly_output = joinpath(depot, "DependsOnly_output.txt")
+        original_depot_path = copy(Base.DEPOT_PATH)
+        old_proj = Base.active_project()
+        try
+            push!(empty!(DEPOT_PATH), depot)
+            Base.set_active_project(project_path)
+            precompile_capture(file, pkg) = open(file, "w") do io
+                try
+                    r = Base.Precompilation.precompilepkgs([pkg]; io, fancyprint=true)
+                    @test r isa Vector{String}
+                    r
+                catch ex
+                    ex isa Base.Precompilation.PkgPrecompileError || rethrow()
+                    ex
+                end
+            end
+            loadsfailpkg = precompile_capture(LoadsFailPkg_output, "LoadsFailPkg")
+            @test loadsfailpkg isa Base.Precompilation.PkgPrecompileError
+            dependsonly = precompile_capture(DependsOnly_output, "DependsOnly")
+            @test length(dependsonly) == 1
+        finally
+            Base.set_active_project(old_proj)
+            append!(empty!(DEPOT_PATH), original_depot_path)
+        end
+
+        output = read(LoadsFailPkg_output, String)
+        # LoadsFailPkg should fail because it tries to load FailPkg with --compiled-modules=strict
+        @test count("LoadError: expected fail", output) == 1
+        @test count("expected fail", output) == 1
+        @test count("✗ FailPkg", output) > 0
+        @test count("✗ LoadsFailPkg", output) > 0
+        @test count("Now FailPkg is running.", output) == 1
+        @test count("Now LoadsFailPkg is running.", output) == 1
+        @test count("DependsOnly precompiling.", output) == 0
+
+        # DependsOnly should succeed because it doesn't actually load FailPkg
+        output = read(DependsOnly_output, String)
+        @test count("LoadError: expected fail", output) == 0
+        @test count("expected fail", output) == 0
+        @test count("✗ FailPkg", output) > 0
+        @test count("Precompiling DependsOnly finished.", output) == 1
+        @test count("Now FailPkg is running.", output) == 0
+        @test count("Now DependsOnly is running.", output) == 1
+    end
+end
+
+finish_precompile_test!()
diff --git a/test/precompile_absint1.jl b/test/precompile_absint1.jl
new file mode 100644
index 0000000000000..08ec6788a356f
--- /dev/null
+++ b/test/precompile_absint1.jl
@@ -0,0 +1,83 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+import Base.Compiler: Compiler
+
+include("precompile_utils.jl")
+
+precompile_test_harness() do load_path
+    write(joinpath(load_path, "SimpleModule.jl"), :(module SimpleModule
+        basic_callee(x) = x
+        basic_caller(x) = basic_callee(x)
+    end) |> string)
+
+    newinterp_path = abspath(joinpath(@__DIR__,"../Compiler/test/newinterp.jl"))
+    write(joinpath(load_path, "TestAbsIntPrecompile1.jl"), :(module TestAbsIntPrecompile1
+        import SimpleModule: basic_caller, basic_callee
+
+        module Custom
+            import Base.Compiler: Compiler
+            include($newinterp_path)
+            @newinterp PrecompileInterpreter
+        end
+
+        Base.return_types((Float64,)) do x
+            basic_caller(x)
+        end
+        Base.return_types((Float64,); interp=Custom.PrecompileInterpreter()) do x
+            basic_caller(x)
+        end
+        Base.return_types((Vector{Float64},)) do x
+            sum(x)
+        end
+        Base.return_types((Vector{Float64},); interp=Custom.PrecompileInterpreter()) do x
+            sum(x)
+        end
+    end) |> string)
+    Base.compilecache(Base.PkgId("TestAbsIntPrecompile1"))
+
+    @eval let
+        using TestAbsIntPrecompile1
+        cache_owner = Compiler.cache_owner(
+            TestAbsIntPrecompile1.Custom.PrecompileInterpreter())
+        let m = only(methods(TestAbsIntPrecompile1.basic_callee))
+            mi = only(Base.specializations(m))
+            ci = mi.cache
+            @test isdefined(ci, :next)
+            @test ci.owner === cache_owner
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                Base.object_build_id(ci)
+            ci = ci.next
+            @test !isdefined(ci, :next)
+            @test ci.owner === nothing
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                Base.object_build_id(ci)
+        end
+        let m = only(methods(sum, (Vector{Float64},)))
+            found = false
+            for mi in Base.specializations(m)
+                if mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(sum),Vector{Float64}}
+                    ci = mi.cache
+                    @test isdefined(ci, :next)
+                    @test ci.owner === cache_owner
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                        Base.object_build_id(ci)
+                    ci = ci.next
+                    @test !isdefined(ci, :next)
+                    @test ci.owner === nothing
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                        Base.object_build_id(ci)
+                    found = true
+                    break
+                end
+            end
+            @test found
+        end
+    end
+end
+
+finish_precompile_test!()
diff --git a/test/precompile_absint2.jl b/test/precompile_absint2.jl
new file mode 100644
index 0000000000000..0ec03d802fa82
--- /dev/null
+++ b/test/precompile_absint2.jl
@@ -0,0 +1,103 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("precompile_utils.jl")
+
+precompile_test_harness() do load_path
+    write(joinpath(load_path, "SimpleModule.jl"), :(module SimpleModule
+        basic_callee(x) = x
+        basic_caller(x) = basic_callee(x)
+    end) |> string)
+
+    newinterp_path = abspath(joinpath(@__DIR__,"../Compiler/test/newinterp.jl"))
+    write(joinpath(load_path, "TestAbsIntPrecompile2.jl"), :(module TestAbsIntPrecompile2
+        import SimpleModule: basic_caller, basic_callee
+
+        module Custom
+            import Base.Compiler: Compiler
+            include($newinterp_path)
+            @newinterp PrecompileInterpreter
+            struct CustomData
+                inferred
+                CustomData(@nospecialize inferred) = new(inferred)
+            end
+            function Compiler.transform_result_for_cache(interp::PrecompileInterpreter, result::Compiler.InferenceResult, edges::Core.SimpleVector)
+                inferred_result = @invoke Compiler.transform_result_for_cache(
+                    interp::Compiler.AbstractInterpreter, result::Compiler.InferenceResult, edges::Core.SimpleVector)
+                return CustomData(inferred_result)
+            end
+            function Compiler.src_inlining_policy(interp::PrecompileInterpreter, @nospecialize(src),
+                                            @nospecialize(info::Compiler.CallInfo), stmt_flag::UInt32)
+                if src isa CustomData
+                    src = src.inferred
+                end
+                return @invoke Compiler.src_inlining_policy(interp::Compiler.AbstractInterpreter, src::Any,
+                                                      info::Compiler.CallInfo, stmt_flag::UInt32)
+            end
+            Compiler.retrieve_ir_for_inlining(cached_result::Core.CodeInstance, src::CustomData) =
+                Compiler.retrieve_ir_for_inlining(cached_result, src.inferred)
+            Compiler.retrieve_ir_for_inlining(mi::Core.MethodInstance, src::CustomData, preserve_local_sources::Bool) =
+                Compiler.retrieve_ir_for_inlining(mi, src.inferred, preserve_local_sources)
+        end
+
+        Base.return_types((Float64,)) do x
+            basic_caller(x)
+        end
+        Base.return_types((Float64,); interp=Custom.PrecompileInterpreter()) do x
+            basic_caller(x)
+        end
+        Base.return_types((Vector{Float64},)) do x
+            sum(x)
+        end
+        Base.return_types((Vector{Float64},); interp=Custom.PrecompileInterpreter()) do x
+            sum(x)
+        end
+    end) |> string)
+    Base.compilecache(Base.PkgId("TestAbsIntPrecompile2"))
+
+    @eval let
+        using TestAbsIntPrecompile2
+        cache_owner = Core.Compiler.cache_owner(
+            TestAbsIntPrecompile2.Custom.PrecompileInterpreter())
+        let m = only(methods(TestAbsIntPrecompile2.basic_callee))
+            mi = only(Base.specializations(m))
+            ci = mi.cache
+            @test isdefined(ci, :next)
+            @test ci.owner === cache_owner
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                Base.object_build_id(ci)
+            ci = ci.next
+            @test !isdefined(ci, :next)
+            @test ci.owner === nothing
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                Base.object_build_id(ci)
+        end
+        let m = only(methods(sum, (Vector{Float64},)))
+            found = false
+            for mi = Base.specializations(m)
+                if mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(sum),Vector{Float64}}
+                    ci = mi.cache
+                    @test isdefined(ci, :next)
+                    @test ci.owner === cache_owner
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                        Base.object_build_id(ci)
+                    ci = ci.next
+                    @test !isdefined(ci, :next)
+                    @test ci.owner === nothing
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                        Base.object_build_id(ci)
+                    found = true
+                    break
+                end
+            end
+            @test found
+        end
+    end
+end
+
+finish_precompile_test!()
diff --git a/test/precompile_utils.jl b/test/precompile_utils.jl
new file mode 100644
index 0000000000000..c9a7c98d262e0
--- /dev/null
+++ b/test/precompile_utils.jl
@@ -0,0 +1,31 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+include("tempdepot.jl")
+
+function precompile_test_harness(@nospecialize(f), testset::String)
+    @testset "$testset" precompile_test_harness(f, true)
+end
+function precompile_test_harness(@nospecialize(f), separate::Bool=true)
+    load_path = mkdepottempdir()
+    load_cache_path = separate ? mkdepottempdir() : load_path
+    try
+        pushfirst!(LOAD_PATH, load_path)
+        pushfirst!(DEPOT_PATH, load_cache_path)
+        f(load_path)
+    finally
+        filter!((≠)(load_path), LOAD_PATH)
+        separate && filter!((≠)(load_cache_path), DEPOT_PATH)
+    end
+    return nothing
+end
+
+let original_depot_path = copy(Base.DEPOT_PATH)
+    original_load_path = copy(Base.LOAD_PATH)
+
+    global function finish_precompile_test!()
+        empty!(Base.DEPOT_PATH)
+        append!(Base.DEPOT_PATH, original_depot_path)
+        empty!(Base.LOAD_PATH)
+        append!(Base.LOAD_PATH, original_load_path)
+    end
+end
diff --git a/test/project/Extensions/CrossPackageExtToExtDependency/Manifest.toml b/test/project/Extensions/CrossPackageExtToExtDependency/Manifest.toml
new file mode 100644
index 0000000000000..5497fdb7091bb
--- /dev/null
+++ b/test/project/Extensions/CrossPackageExtToExtDependency/Manifest.toml
@@ -0,0 +1,32 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.11.1"
+manifest_format = "2.0"
+project_hash = "dc35c2cf8c6b82fb5b9624c9713c2df34ca30499"
+
+[[deps.CyclicExtensions]]
+deps = ["ExtDep"]
+path = "../CyclicExtensions"
+uuid = "17d4f0df-b55c-4714-ac4b-55fa23f7355c"
+version = "0.1.0"
+weakdeps = ["SomePackage"]
+
+    [deps.CyclicExtensions.extensions]
+    ExtA = ["SomePackage"]
+    ExtB = ["SomePackage"]
+
+[[deps.ExtDep]]
+deps = ["SomeOtherPackage", "SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/CrossPackageExtToExtDependency/Project.toml b/test/project/Extensions/CrossPackageExtToExtDependency/Project.toml
new file mode 100644
index 0000000000000..76ffb7bd1c882
--- /dev/null
+++ b/test/project/Extensions/CrossPackageExtToExtDependency/Project.toml
@@ -0,0 +1,12 @@
+name = "CrossPackageExtToExtDependency"
+uuid = "30f07f2e-c47e-40db-93a2-cbc4d1b301cc"
+version = "0.1.0"
+
+[deps]
+CyclicExtensions = "17d4f0df-b55c-4714-ac4b-55fa23f7355c"
+
+[weakdeps]
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+
+[extensions]
+ExtAB = ["CyclicExtensions", "SomePackage"]
diff --git a/test/project/Extensions/CrossPackageExtToExtDependency/ext/ExtAB.jl b/test/project/Extensions/CrossPackageExtToExtDependency/ext/ExtAB.jl
new file mode 100644
index 0000000000000..1ded9f2df5097
--- /dev/null
+++ b/test/project/Extensions/CrossPackageExtToExtDependency/ext/ExtAB.jl
@@ -0,0 +1,12 @@
+module ExtAB
+
+using CrossPackageExtToExtDependency
+using SomePackage
+using CyclicExtensions
+
+const ExtA = Base.get_extension(CyclicExtensions, :ExtA)
+if !(ExtA isa Module)
+    error("expected extension to load")
+end
+
+end
diff --git a/test/project/Extensions/CrossPackageExtToExtDependency/src/CrossPackageExtToExtDependency.jl b/test/project/Extensions/CrossPackageExtToExtDependency/src/CrossPackageExtToExtDependency.jl
new file mode 100644
index 0000000000000..28b229e2d61bf
--- /dev/null
+++ b/test/project/Extensions/CrossPackageExtToExtDependency/src/CrossPackageExtToExtDependency.jl
@@ -0,0 +1,7 @@
+module CrossPackageExtToExtDependency
+
+using CyclicExtensions
+
+greet() = print("Hello x-package ext-to-ext!")
+
+end # module CrossPackageExtToTextDependency
diff --git a/test/project/Extensions/CyclicExtensions/Manifest.toml b/test/project/Extensions/CyclicExtensions/Manifest.toml
new file mode 100644
index 0000000000000..0f280293c07b6
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/Manifest.toml
@@ -0,0 +1,26 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.10.4"
+manifest_format = "2.0"
+project_hash = "ec25ff8df3a5e2212a173c3de2c7d716cc47cd36"
+
+[[deps.ExtDep]]
+deps = ["SomePackage", "SomeOtherPackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.ExtDep2]]
+path = "../ExtDep2"
+uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+version = "0.1.0"
+
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/CyclicExtensions/Project.toml b/test/project/Extensions/CyclicExtensions/Project.toml
new file mode 100644
index 0000000000000..08d539dcc40ae
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/Project.toml
@@ -0,0 +1,13 @@
+name = "CyclicExtensions"
+uuid = "17d4f0df-b55c-4714-ac4b-55fa23f7355c"
+version = "0.1.0"
+
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+
+[weakdeps]
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+
+[extensions]
+ExtA = ["SomePackage"]
+ExtB = ["SomePackage"]
diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtA.jl b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl
new file mode 100644
index 0000000000000..fa0c0961633cb
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl
@@ -0,0 +1,6 @@
+module ExtA
+
+using CyclicExtensions
+using SomePackage
+
+end
diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtB.jl b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl
new file mode 100644
index 0000000000000..8f6da556d39b8
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl
@@ -0,0 +1,6 @@
+module ExtB
+
+using CyclicExtensions
+using SomePackage
+
+end
diff --git a/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl
new file mode 100644
index 0000000000000..f1c2ec2077562
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl
@@ -0,0 +1,7 @@
+module CyclicExtensions
+
+using ExtDep
+
+greet() = print("Hello Cycles!")
+
+end # module CyclicExtensions
diff --git a/test/project/Extensions/DepWithParentExt.jl/Project.toml b/test/project/Extensions/DepWithParentExt.jl/Project.toml
new file mode 100644
index 0000000000000..bc487252ced4e
--- /dev/null
+++ b/test/project/Extensions/DepWithParentExt.jl/Project.toml
@@ -0,0 +1,9 @@
+name = "DepWithParentExt"
+uuid = "8a35c396-5ffc-40d2-b7ec-e8ed2248da32"
+version = "0.1.0"
+
+[weakdeps]
+Parent = "58cecb9c-f68a-426e-b92a-89d456ae7acc"
+
+[extensions]
+ParentExt = "Parent"
diff --git a/test/project/Extensions/DepWithParentExt.jl/ext/ParentExt.jl b/test/project/Extensions/DepWithParentExt.jl/ext/ParentExt.jl
new file mode 100644
index 0000000000000..56176d2f5921d
--- /dev/null
+++ b/test/project/Extensions/DepWithParentExt.jl/ext/ParentExt.jl
@@ -0,0 +1,6 @@
+module ParentExt
+
+using Parent
+using DepWithParentExt
+
+end
diff --git a/test/project/Extensions/DepWithParentExt.jl/src/DepWithParentExt.jl b/test/project/Extensions/DepWithParentExt.jl/src/DepWithParentExt.jl
new file mode 100644
index 0000000000000..3d4ebc4ebf8a0
--- /dev/null
+++ b/test/project/Extensions/DepWithParentExt.jl/src/DepWithParentExt.jl
@@ -0,0 +1,5 @@
+module DepWithParentExt
+
+greet() = print("Hello dep w/ ext for parent dep!")
+
+end # module DepWithParentExt
diff --git a/test/project/Extensions/EnvWithDeps/Manifest.toml b/test/project/Extensions/EnvWithDeps/Manifest.toml
index 85ff259f0a4d5..554a317b370eb 100644
--- a/test/project/Extensions/EnvWithDeps/Manifest.toml
+++ b/test/project/Extensions/EnvWithDeps/Manifest.toml
@@ -5,7 +5,7 @@ manifest_format = "2.0"
 project_hash = "ec25ff8df3a5e2212a173c3de2c7d716cc47cd36"
 
 [[deps.ExtDep]]
-deps = ["SomePackage"]
+deps = ["SomePackage", "SomeOtherPackage"]
 path = "../ExtDep.jl"
 uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 version = "0.1.0"
@@ -15,6 +15,11 @@ path = "../ExtDep2"
 uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
 version = "0.1.0"
 
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
 [[deps.SomePackage]]
 path = "../SomePackage"
 uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/EnvWithHasExtensions/Manifest.toml b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
index 8ac961fa1a9a9..ca2be57c61596 100644
--- a/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
+++ b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
@@ -1,11 +1,11 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.9.0-beta4"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "caa716752e6dff3d77c3de929ebbb5d2024d04ef"
+project_hash = "a4c480cfa7da9610333d5c42623bf746bd286c5f"
 
 [[deps.ExtDep]]
-deps = ["SomePackage"]
+deps = ["SomePackage", "SomeOtherPackage"]
 path = "../ExtDep.jl"
 uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 version = "0.1.0"
@@ -18,10 +18,17 @@ version = "0.1.0"
     [deps.HasExtensions.extensions]
     Extension = "ExtDep"
     ExtensionFolder = ["ExtDep", "ExtDep2"]
+    LinearAlgebraExt = "LinearAlgebra"
 
     [deps.HasExtensions.weakdeps]
     ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
     ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+    LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
 
 [[deps.SomePackage]]
 path = "../SomePackage"
diff --git a/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml b/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
index 66781a5701363..9f8c717041b6e 100644
--- a/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
+++ b/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
@@ -5,7 +5,7 @@ manifest_format = "2.0"
 project_hash = "caa716752e6dff3d77c3de929ebbb5d2024d04ef"
 
 [[deps.ExtDep]]
-deps = ["SomePackage"]
+deps = ["SomePackage", "SomeOtherPackage"]
 path = "../ExtDep.jl"
 uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 version = "0.1.0"
@@ -19,6 +19,11 @@ weakdeps = ["ExtDep"]
     [deps.HasExtensions.extensions]
     Extension2 = "ExtDep"
 
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
 [[deps.SomePackage]]
 path = "../SomePackage"
 uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/ExtDep.jl/Project.toml b/test/project/Extensions/ExtDep.jl/Project.toml
index d246934b7f958..1ece7bf11f95a 100644
--- a/test/project/Extensions/ExtDep.jl/Project.toml
+++ b/test/project/Extensions/ExtDep.jl/Project.toml
@@ -4,3 +4,4 @@ version = "0.1.0"
 
 [deps]
 SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+SomeOtherPackage = "178f68a2-4498-45ee-a775-452b36359b63"
diff --git a/test/project/Extensions/ExtDep.jl/src/ExtDep.jl b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl
index 1c0022d879f51..2d3c6b7f28827 100644
--- a/test/project/Extensions/ExtDep.jl/src/ExtDep.jl
+++ b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl
@@ -2,6 +2,7 @@ module ExtDep
 
 # loading this package makes the check for loading extensions trigger
 # which tests #47921
+using SomeOtherPackage
 using SomePackage
 
 struct ExtDepStruct end
diff --git a/test/project/Extensions/ExtDep3.jl/Project.toml b/test/project/Extensions/ExtDep3.jl/Project.toml
new file mode 100644
index 0000000000000..690b2f1cffff4
--- /dev/null
+++ b/test/project/Extensions/ExtDep3.jl/Project.toml
@@ -0,0 +1,4 @@
+name = "ExtDep3"
+uuid = "a5541f1e-a556-4fdc-af15-097880d743a1"
+version = "0.1.0"
+authors = ["Kristoffer <kcarlsson89@gmail.com>"]
diff --git a/test/project/Extensions/ExtDep3.jl/src/ExtDep3.jl b/test/project/Extensions/ExtDep3.jl/src/ExtDep3.jl
new file mode 100644
index 0000000000000..96a0b472d06c5
--- /dev/null
+++ b/test/project/Extensions/ExtDep3.jl/src/ExtDep3.jl
@@ -0,0 +1,5 @@
+module ExtDep3
+
+greet() = print("Hello World!")
+
+end # module ExtDep3
diff --git a/test/project/Extensions/ExtNameCollision_A/Project.toml b/test/project/Extensions/ExtNameCollision_A/Project.toml
new file mode 100644
index 0000000000000..f4cc37786f508
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_A/Project.toml
@@ -0,0 +1,9 @@
+name = "ExtNameCollision_A"
+uuid = "9f48de98-8f56-4937-aa32-2a5530882eaa"
+version = "0.1.0"
+
+[weakdeps]
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[extensions]
+REPLExt = "REPL"
diff --git a/test/project/Extensions/ExtNameCollision_A/ext/REPLExt.jl b/test/project/Extensions/ExtNameCollision_A/ext/REPLExt.jl
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/project/Extensions/ExtNameCollision_A/src/ExtNameCollision_A.jl b/test/project/Extensions/ExtNameCollision_A/src/ExtNameCollision_A.jl
new file mode 100644
index 0000000000000..2f47a862dd9c5
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_A/src/ExtNameCollision_A.jl
@@ -0,0 +1,5 @@
+module ExtNameCollision_A
+
+greet() = print("Hello World!")
+
+end # module ExtNameCollision_A
diff --git a/test/project/Extensions/ExtNameCollision_B/Project.toml b/test/project/Extensions/ExtNameCollision_B/Project.toml
new file mode 100644
index 0000000000000..ac52d64a82a7c
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_B/Project.toml
@@ -0,0 +1,9 @@
+name = "ExtNameCollision_B"
+uuid = "597d654f-44d8-4443-9b1e-1f2f4b45906f"
+version = "0.1.0"
+
+[weakdeps]
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[extensions]
+REPLExt = "REPL"
diff --git a/test/project/Extensions/ExtNameCollision_B/ext/REPLExt.jl b/test/project/Extensions/ExtNameCollision_B/ext/REPLExt.jl
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/project/Extensions/ExtNameCollision_B/src/ExtNameCollision_B.jl b/test/project/Extensions/ExtNameCollision_B/src/ExtNameCollision_B.jl
new file mode 100644
index 0000000000000..e7665982a79b3
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_B/src/ExtNameCollision_B.jl
@@ -0,0 +1,5 @@
+module ExtNameCollision_B
+
+greet() = print("Hello World!")
+
+end # module ExtNameCollision_B
diff --git a/test/project/Extensions/ExtToExtDependency/Manifest.toml b/test/project/Extensions/ExtToExtDependency/Manifest.toml
new file mode 100644
index 0000000000000..41546213cdd41
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/Manifest.toml
@@ -0,0 +1,21 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.11.1"
+manifest_format = "2.0"
+project_hash = "90b427e837c654fabb1434527ea698dabad46d29"
+
+[[deps.ExtDep]]
+deps = ["SomeOtherPackage", "SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/ExtToExtDependency/Project.toml b/test/project/Extensions/ExtToExtDependency/Project.toml
new file mode 100644
index 0000000000000..980db74c04dc4
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/Project.toml
@@ -0,0 +1,14 @@
+name = "ExtToExtDependency"
+uuid = "594ddb71-72fb-4cfe-9471-775d48a5b70b"
+version = "0.1.0"
+
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+
+[weakdeps]
+SomeOtherPackage = "178f68a2-4498-45ee-a775-452b36359b63"
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+
+[extensions]
+ExtA = ["SomePackage"]
+ExtAB = ["SomePackage", "SomeOtherPackage"]
diff --git a/test/project/Extensions/ExtToExtDependency/ext/ExtA.jl b/test/project/Extensions/ExtToExtDependency/ext/ExtA.jl
new file mode 100644
index 0000000000000..71ed09795157c
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/ext/ExtA.jl
@@ -0,0 +1,6 @@
+module ExtA
+
+using ExtToExtDependency
+using SomePackage
+
+end
diff --git a/test/project/Extensions/ExtToExtDependency/ext/ExtAB.jl b/test/project/Extensions/ExtToExtDependency/ext/ExtAB.jl
new file mode 100644
index 0000000000000..a5b2c43cafd58
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/ext/ExtAB.jl
@@ -0,0 +1,12 @@
+module ExtAB
+
+using ExtToExtDependency
+using SomePackage
+using SomeOtherPackage
+
+const ExtA = Base.get_extension(ExtToExtDependency, :ExtA)
+if !(ExtA isa Module)
+    error("expected extension to load")
+end
+
+end
diff --git a/test/project/Extensions/ExtToExtDependency/src/ExtToExtDependency.jl b/test/project/Extensions/ExtToExtDependency/src/ExtToExtDependency.jl
new file mode 100644
index 0000000000000..ec2bf58f18641
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/src/ExtToExtDependency.jl
@@ -0,0 +1,7 @@
+module ExtToExtDependency
+
+using ExtDep
+
+greet() = print("Hello ext-to-ext!")
+
+end # module ExtToExtDependency
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
index 52542fc822094..98510dcb27733 100644
--- a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
+++ b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
@@ -1,11 +1,11 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.10.0-DEV"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "d523b3401f72a1ed34b7b43749fd2655c6b78542"
+project_hash = "4e196b07f2ee7adc48ac9d528d42b3cf3737c7a0"
 
 [[deps.ExtDep]]
-deps = ["SomePackage"]
+deps = ["SomePackage", "SomeOtherPackage"]
 path = "../ExtDep.jl"
 uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 version = "0.1.0"
@@ -15,7 +15,13 @@ path = "../ExtDep2"
 uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
 version = "0.1.0"
 
+[[deps.ExtDep3]]
+path = "../ExtDep3.jl"
+uuid = "a5541f1e-a556-4fdc-af15-097880d743a1"
+version = "0.1.0"
+
 [[deps.HasExtensions]]
+deps = ["ExtDep3"]
 path = "../HasExtensions.jl"
 uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
 version = "0.1.0"
@@ -23,8 +29,14 @@ weakdeps = ["ExtDep", "ExtDep2"]
 
     [deps.HasExtensions.extensions]
     Extension = "ExtDep"
+    ExtensionDep = "ExtDep3"
     ExtensionFolder = ["ExtDep", "ExtDep2"]
 
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
 [[deps.SomePackage]]
 path = "../SomePackage"
 uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Project.toml b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
index 8f308a9fbee72..aa4956caada74 100644
--- a/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
+++ b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
@@ -5,4 +5,5 @@ version = "0.1.0"
 [deps]
 ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+ExtDep3 = "a5541f1e-a556-4fdc-af15-097880d743a1"
 HasExtensions = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
diff --git a/test/project/Extensions/HasExtensions.jl/Manifest.toml b/test/project/Extensions/HasExtensions.jl/Manifest.toml
index 55f7958701a75..429c6598fc4f4 100644
--- a/test/project/Extensions/HasExtensions.jl/Manifest.toml
+++ b/test/project/Extensions/HasExtensions.jl/Manifest.toml
@@ -1,7 +1,10 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.10.0-DEV"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "c87947f1f1f070eea848950c304d668a112dec3d"
+project_hash = "c0bb526b75939a74a6195ee4819e598918a22ad7"
 
-[deps]
+[[deps.ExtDep3]]
+path = "../ExtDep3.jl"
+uuid = "a5541f1e-a556-4fdc-af15-097880d743a1"
+version = "0.1.0"
diff --git a/test/project/Extensions/HasExtensions.jl/Project.toml b/test/project/Extensions/HasExtensions.jl/Project.toml
index 72577de36d65d..a02f5662d602d 100644
--- a/test/project/Extensions/HasExtensions.jl/Project.toml
+++ b/test/project/Extensions/HasExtensions.jl/Project.toml
@@ -2,10 +2,14 @@ name = "HasExtensions"
 uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
 version = "0.1.0"
 
+[deps]
+ExtDep3 = "a5541f1e-a556-4fdc-af15-097880d743a1"
+
 [weakdeps]
 ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
 
 [extensions]
 Extension = "ExtDep"
+ExtensionDep = "ExtDep3"
 ExtensionFolder = ["ExtDep", "ExtDep2"]
diff --git a/test/project/Extensions/HasExtensions.jl/ext/ExtensionDep.jl b/test/project/Extensions/HasExtensions.jl/ext/ExtensionDep.jl
new file mode 100644
index 0000000000000..e2710d4d89bbb
--- /dev/null
+++ b/test/project/Extensions/HasExtensions.jl/ext/ExtensionDep.jl
@@ -0,0 +1,9 @@
+module ExtensionDep
+
+using HasExtensions, ExtDep3
+
+function __init__()
+    HasExtensions.ext_dep_loaded = true
+end
+
+end
diff --git a/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
index dbfaeec4f8812..9d9785f87f790 100644
--- a/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
+++ b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
@@ -6,5 +6,6 @@ foo(::HasExtensionsStruct) = 1
 
 ext_loaded = false
 ext_folder_loaded = false
+ext_dep_loaded = false
 
 end # module
diff --git a/test/project/Extensions/ImplicitEnv/A/Project.toml b/test/project/Extensions/ImplicitEnv/A/Project.toml
new file mode 100644
index 0000000000000..043272d4bd015
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/A/Project.toml
@@ -0,0 +1,9 @@
+name = "A"
+uuid = "299a509a-2181-4868-8714-15151945d902"
+version = "0.1.0"
+
+[weakdeps]
+B = "c2c18cb0-3543-497c-ac2a-523c527589e5"
+
+[extensions]
+BExt = "B"
diff --git a/test/project/Extensions/ImplicitEnv/A/ext/BExt.jl b/test/project/Extensions/ImplicitEnv/A/ext/BExt.jl
new file mode 100644
index 0000000000000..70be6435bcbe8
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/A/ext/BExt.jl
@@ -0,0 +1,3 @@
+module BExt
+
+end
diff --git a/test/project/Extensions/ImplicitEnv/A/src/A.jl b/test/project/Extensions/ImplicitEnv/A/src/A.jl
new file mode 100644
index 0000000000000..ab16fa1de96af
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/A/src/A.jl
@@ -0,0 +1,5 @@
+module A
+
+greet() = print("Hello World!")
+
+end # module A
diff --git a/test/project/Extensions/ImplicitEnv/B/Project.toml b/test/project/Extensions/ImplicitEnv/B/Project.toml
new file mode 100644
index 0000000000000..d919c27be0467
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/B/Project.toml
@@ -0,0 +1,3 @@
+name = "B"
+uuid = "c2c18cb0-3543-497c-ac2a-523c527589e5"
+version = "0.1.0"
diff --git a/test/project/Extensions/ImplicitEnv/B/src/B.jl b/test/project/Extensions/ImplicitEnv/B/src/B.jl
new file mode 100644
index 0000000000000..79b5a1204765f
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/B/src/B.jl
@@ -0,0 +1,5 @@
+module B
+
+greet() = print("Hello World!")
+
+end # module B
diff --git a/test/project/Extensions/Parent.jl/Manifest.toml b/test/project/Extensions/Parent.jl/Manifest.toml
new file mode 100644
index 0000000000000..eb0c323ac36f5
--- /dev/null
+++ b/test/project/Extensions/Parent.jl/Manifest.toml
@@ -0,0 +1,20 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "b6ac643184d62cc94427c9aa665ff1fb63d66038"
+
+[[deps.DepWithParentExt]]
+path = "../DepWithParentExt.jl"
+uuid = "8a35c396-5ffc-40d2-b7ec-e8ed2248da32"
+version = "0.1.0"
+weakdeps = ["Parent"]
+
+    [deps.DepWithParentExt.extensions]
+    ParentExt = "Parent"
+
+[[deps.Parent]]
+deps = ["DepWithParentExt"]
+path = "."
+uuid = "58cecb9c-f68a-426e-b92a-89d456ae7acc"
+version = "0.1.0"
diff --git a/test/project/Extensions/Parent.jl/Project.toml b/test/project/Extensions/Parent.jl/Project.toml
new file mode 100644
index 0000000000000..d62594cf15d3f
--- /dev/null
+++ b/test/project/Extensions/Parent.jl/Project.toml
@@ -0,0 +1,7 @@
+name = "Parent"
+uuid = "58cecb9c-f68a-426e-b92a-89d456ae7acc"
+version = "0.1.0"
+authors = ["Cody Tapscott <topolarity@tapscott.me>"]
+
+[deps]
+DepWithParentExt = "8a35c396-5ffc-40d2-b7ec-e8ed2248da32"
diff --git a/test/project/Extensions/Parent.jl/src/Parent.jl b/test/project/Extensions/Parent.jl/src/Parent.jl
new file mode 100644
index 0000000000000..471f4b13ecca3
--- /dev/null
+++ b/test/project/Extensions/Parent.jl/src/Parent.jl
@@ -0,0 +1,7 @@
+module Parent
+
+using DepWithParentExt
+
+greet() = print("Hello parent!")
+
+end # module Parent
diff --git a/test/project/Extensions/SomeOtherPackage/Project.toml b/test/project/Extensions/SomeOtherPackage/Project.toml
new file mode 100644
index 0000000000000..6e7eee40c7be2
--- /dev/null
+++ b/test/project/Extensions/SomeOtherPackage/Project.toml
@@ -0,0 +1,4 @@
+name = "SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+authors = ["Cody Tapscott <topolarity@tapscott.me>"]
+version = "0.1.0"
diff --git a/test/project/Extensions/SomeOtherPackage/src/SomeOtherPackage.jl b/test/project/Extensions/SomeOtherPackage/src/SomeOtherPackage.jl
new file mode 100644
index 0000000000000..ba23eb3914561
--- /dev/null
+++ b/test/project/Extensions/SomeOtherPackage/src/SomeOtherPackage.jl
@@ -0,0 +1,5 @@
+module SomeOtherPackage
+
+greet() = print("Hello World!")
+
+end # module SomeOtherPackage
diff --git a/test/project/ProjectPath/CustomPath.jl b/test/project/ProjectPath/CustomPath.jl
new file mode 100644
index 0000000000000..8fe764fa066dc
--- /dev/null
+++ b/test/project/ProjectPath/CustomPath.jl
@@ -0,0 +1,5 @@
+module ProjectPath
+
+greet() = print("Hello World!")
+
+end # module ProjectPath
diff --git a/test/project/ProjectPath/Manifest.toml b/test/project/ProjectPath/Manifest.toml
new file mode 100644
index 0000000000000..123e7f575062a
--- /dev/null
+++ b/test/project/ProjectPath/Manifest.toml
@@ -0,0 +1,18 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "51ade905d618e4aa369bc869841376219cc36cb1"
+
+[[deps.ProjectPath]]
+deps = ["ProjectPathDep"]
+path = "."
+entryfile = "CustomPath.jl"
+uuid = "32833bde-7fc1-4d28-8365-9d01e1bcbc1b"
+version = "0.1.0"
+
+[[deps.ProjectPathDep]]
+path = "ProjectPathDep"
+entryfile = "CustomPath.jl"
+uuid = "f18633fc-8799-43ff-aa06-99ed830dc572"
+version = "0.1.0"
diff --git a/test/project/ProjectPath/Project.toml b/test/project/ProjectPath/Project.toml
new file mode 100644
index 0000000000000..a434f78e9c211
--- /dev/null
+++ b/test/project/ProjectPath/Project.toml
@@ -0,0 +1,7 @@
+name = "ProjectPath"
+uuid = "32833bde-7fc1-4d28-8365-9d01e1bcbc1b"
+entryfile = "CustomPath.jl"
+version = "0.1.0"
+
+[deps]
+ProjectPathDep = "f18633fc-8799-43ff-aa06-99ed830dc572"
diff --git a/test/project/ProjectPath/ProjectPathDep/CustomPath.jl b/test/project/ProjectPath/ProjectPathDep/CustomPath.jl
new file mode 100644
index 0000000000000..adbe508f0c7f9
--- /dev/null
+++ b/test/project/ProjectPath/ProjectPathDep/CustomPath.jl
@@ -0,0 +1,5 @@
+module ProjectPathDep
+
+greet() = print("Hello World!")
+
+end # module ProjectPathDep
diff --git a/test/project/ProjectPath/ProjectPathDep/Project.toml b/test/project/ProjectPath/ProjectPathDep/Project.toml
new file mode 100644
index 0000000000000..c69e54e8c9390
--- /dev/null
+++ b/test/project/ProjectPath/ProjectPathDep/Project.toml
@@ -0,0 +1,4 @@
+name = "ProjectPathDep"
+uuid = "f18633fc-8799-43ff-aa06-99ed830dc572"
+version = "0.1.0"
+entryfile = "CustomPath.jl"
diff --git a/test/project/Rot13/Project.toml b/test/project/Rot13/Project.toml
new file mode 100644
index 0000000000000..eb03cb84d588e
--- /dev/null
+++ b/test/project/Rot13/Project.toml
@@ -0,0 +1,3 @@
+name = "Rot13"
+uuid = "43ef800a-eac4-47f4-949b-25107b932e8f"
+version = "0.1.0"
diff --git a/test/project/Rot13/src/Rot13.jl b/test/project/Rot13/src/Rot13.jl
new file mode 100644
index 0000000000000..66f077812d878
--- /dev/null
+++ b/test/project/Rot13/src/Rot13.jl
@@ -0,0 +1,28 @@
+module Rot13
+
+function rot13(c::Char)
+    shft = islowercase(c) ? 'a' : 'A'
+    isletter(c) ? c = shft + (c - shft + 13) % 26 : c
+end
+
+rot13(str::AbstractString) = map(rot13, str)
+
+function (@main)(args)
+    foreach(arg -> print(rot13(arg), " "), args)
+    return 0
+end
+
+module Rot26 # LOL
+
+import ..rot13
+
+rot26(str::AbstractString) = map(rot13 ∘ rot13, str)
+
+function (@main)(args)
+    foreach(arg -> print(rot26(arg), " "), args)
+    return 0
+end
+
+end
+
+end # module Rot13
diff --git a/test/project/ScriptProject/Project.toml b/test/project/ScriptProject/Project.toml
new file mode 100644
index 0000000000000..3301f2b79da83
--- /dev/null
+++ b/test/project/ScriptProject/Project.toml
@@ -0,0 +1,2 @@
+name = "ScriptProject"
+uuid = "6646321a-c4de-46ad-9761-435e5bb1f223"
diff --git a/test/project/ScriptProject/SubProject/Project.toml b/test/project/ScriptProject/SubProject/Project.toml
new file mode 100644
index 0000000000000..e6c472c7a33f6
--- /dev/null
+++ b/test/project/ScriptProject/SubProject/Project.toml
@@ -0,0 +1,2 @@
+name = "SubProject"
+uuid = "50d58d6a-5ae2-46f7-9677-83c51ca667d5"
diff --git a/test/project/ScriptProject/bin/script.jl b/test/project/ScriptProject/bin/script.jl
new file mode 100644
index 0000000000000..e38351c9ab9a8
--- /dev/null
+++ b/test/project/ScriptProject/bin/script.jl
@@ -0,0 +1 @@
+println(Base.active_project())
diff --git a/test/project/SubProject/Devved/Project.toml b/test/project/SubProject/Devved/Project.toml
new file mode 100644
index 0000000000000..63088a132cb77
--- /dev/null
+++ b/test/project/SubProject/Devved/Project.toml
@@ -0,0 +1,3 @@
+name = "Devved"
+uuid = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+version = "0.1.0"
diff --git a/test/project/SubProject/Devved/src/Devved.jl b/test/project/SubProject/Devved/src/Devved.jl
new file mode 100644
index 0000000000000..f3eb267409ece
--- /dev/null
+++ b/test/project/SubProject/Devved/src/Devved.jl
@@ -0,0 +1,5 @@
+module Devved
+
+greet() = print("Hello World!")
+
+end # module Devved
diff --git a/test/project/SubProject/Devved2/Project.toml b/test/project/SubProject/Devved2/Project.toml
new file mode 100644
index 0000000000000..c761630566116
--- /dev/null
+++ b/test/project/SubProject/Devved2/Project.toml
@@ -0,0 +1,3 @@
+name = "Devved2"
+uuid = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+version = "0.1.0"
diff --git a/test/project/SubProject/Devved2/src/Devved2.jl b/test/project/SubProject/Devved2/src/Devved2.jl
new file mode 100644
index 0000000000000..9bd5df2793671
--- /dev/null
+++ b/test/project/SubProject/Devved2/src/Devved2.jl
@@ -0,0 +1,5 @@
+module Devved2
+
+greet() = print("Hello World!")
+
+end # module Devved2
diff --git a/test/project/SubProject/Manifest.toml b/test/project/SubProject/Manifest.toml
new file mode 100644
index 0000000000000..5d791a74652d4
--- /dev/null
+++ b/test/project/SubProject/Manifest.toml
@@ -0,0 +1,68 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "620b9377bc807ff657e6618c8ccc24887eb40285"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.Devved]]
+path = "Devved"
+uuid = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+version = "0.1.0"
+
+[[deps.Devved2]]
+path = "Devved2"
+uuid = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+version = "0.1.0"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.Logging]]
+deps = ["StyledStrings"]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
+
+[[deps.Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.MyPkg]]
+deps = ["Devved", "Devved2"]
+path = "."
+uuid = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+version = "0.0.0"
+
+[[deps.PackageThatIsSub]]
+deps = ["Devved2", "MyPkg"]
+path = "PackageThatIsSub"
+uuid = "1efb588c-9412-4e40-90a4-710420bd84aa"
+version = "0.1.0"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
+
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
diff --git a/test/project/SubProject/PackageThatIsSub/Project.toml b/test/project/SubProject/PackageThatIsSub/Project.toml
new file mode 100644
index 0000000000000..e41dd998c5a1c
--- /dev/null
+++ b/test/project/SubProject/PackageThatIsSub/Project.toml
@@ -0,0 +1,14 @@
+name = "PackageThatIsSub"
+uuid = "1efb588c-9412-4e40-90a4-710420bd84aa"
+version = "0.1.0"
+
+[workspace]
+projects = ["test"]
+
+[deps]
+Devved2 = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+
+[preferences]
+value = 2
+y = 2
diff --git a/test/project/SubProject/PackageThatIsSub/src/PackageThatIsSub.jl b/test/project/SubProject/PackageThatIsSub/src/PackageThatIsSub.jl
new file mode 100644
index 0000000000000..7f9ea94ccb156
--- /dev/null
+++ b/test/project/SubProject/PackageThatIsSub/src/PackageThatIsSub.jl
@@ -0,0 +1,5 @@
+module PackageThatIsSub
+
+greet() = print("Hello World!")
+
+end # module PackageThatIsSub
diff --git a/test/project/SubProject/PackageThatIsSub/test/Project.toml b/test/project/SubProject/PackageThatIsSub/test/Project.toml
new file mode 100644
index 0000000000000..dc8186e2b735e
--- /dev/null
+++ b/test/project/SubProject/PackageThatIsSub/test/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+PackageThatIsSub = "1efb588c-9412-4e40-90a4-710420bd84aa"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[preferences]
+value = 3
+z = 3
diff --git a/test/project/SubProject/Project.toml b/test/project/SubProject/Project.toml
new file mode 100644
index 0000000000000..40fb20eee6826
--- /dev/null
+++ b/test/project/SubProject/Project.toml
@@ -0,0 +1,13 @@
+name = "MyPkg"
+uuid = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+
+[workspace]
+projects = ["sub", "PackageThatIsSub", "test", "nested/deep"]
+
+[deps]
+Devved = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+Devved2 = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+
+[preferences]
+value = 1
+x = 1
diff --git a/test/project/SubProject/nested/deep/Project.toml b/test/project/SubProject/nested/deep/Project.toml
new file mode 100644
index 0000000000000..5567e6f4974db
--- /dev/null
+++ b/test/project/SubProject/nested/deep/Project.toml
@@ -0,0 +1,2 @@
+name = "DeepNested"
+uuid = "d5e3a334-7f12-4e5f-9ab8-123456789abc"
diff --git a/test/project/SubProject/nested/deep/src/DeepNested.jl b/test/project/SubProject/nested/deep/src/DeepNested.jl
new file mode 100644
index 0000000000000..3e5553c1fd9ba
--- /dev/null
+++ b/test/project/SubProject/nested/deep/src/DeepNested.jl
@@ -0,0 +1,2 @@
+module DeepNested
+end
diff --git a/test/project/SubProject/src/MyPkg.jl b/test/project/SubProject/src/MyPkg.jl
new file mode 100644
index 0000000000000..6d84954645d55
--- /dev/null
+++ b/test/project/SubProject/src/MyPkg.jl
@@ -0,0 +1,3 @@
+module MyPkg
+
+end
diff --git a/test/project/SubProject/sub/Project.toml b/test/project/SubProject/sub/Project.toml
new file mode 100644
index 0000000000000..50aa238e91d57
--- /dev/null
+++ b/test/project/SubProject/sub/Project.toml
@@ -0,0 +1,3 @@
+[deps]
+Devved = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
diff --git a/test/project/SubProject/test/Project.toml b/test/project/SubProject/test/Project.toml
new file mode 100644
index 0000000000000..b64312e4b1ee2
--- /dev/null
+++ b/test/project/SubProject/test/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Devved = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
diff --git a/test/project/SyntaxVersioning/explicit/Manifest.toml b/test/project/SyntaxVersioning/explicit/Manifest.toml
new file mode 100644
index 0000000000000..eeeedf53296c8
--- /dev/null
+++ b/test/project/SyntaxVersioning/explicit/Manifest.toml
@@ -0,0 +1,19 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.14.0-DEV"
+manifest_format = "2.1"
+
+[[deps.VersionedDep1]]
+path = "VersionedDep1"
+uuid = "f08855a0-36cb-4a32-8ae5-a227b709c612"
+syntax.julia_version = "1.13.0"
+
+[[deps.VersionedDep2]]
+path = "VersionedDep2"
+uuid = "e127e659-a899-4a00-b565-5b74face18ba"
+syntax.julia_version = "1.14.0"
+
+[[deps.VersionedDep3]]
+path = "VersionedDep3"
+uuid = "6ae26a88-8a9e-4c29-a00a-ea2b9a48e6e1"
+syntax.julia_version = "1.0.0"
diff --git a/test/project/SyntaxVersioning/explicit/Project.toml b/test/project/SyntaxVersioning/explicit/Project.toml
new file mode 100644
index 0000000000000..52501f4979bf1
--- /dev/null
+++ b/test/project/SyntaxVersioning/explicit/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+VersionedDep1 = "f08855a0-36cb-4a32-8ae5-a227b709c612"
+VersionedDep2 = "e127e659-a899-4a00-b565-5b74face18ba"
+VersionedDep3 = "6ae26a88-8a9e-4c29-a00a-ea2b9a48e6e1"
diff --git a/test/project/SyntaxVersioning/explicit/VersionedDep1/Project.toml b/test/project/SyntaxVersioning/explicit/VersionedDep1/Project.toml
new file mode 100644
index 0000000000000..0b35c64ade6c9
--- /dev/null
+++ b/test/project/SyntaxVersioning/explicit/VersionedDep1/Project.toml
@@ -0,0 +1,3 @@
+name = "VersionedDep1"
+uuid = "f08855a0-36cb-4a32-8ae5-a227b709c612"
+syntax.julia_version = "1.13"
diff --git a/test/project/SyntaxVersioning/explicit/VersionedDep1/src/VersionedDep1.jl b/test/project/SyntaxVersioning/explicit/VersionedDep1/src/VersionedDep1.jl
new file mode 100644
index 0000000000000..6154c45f97ee2
--- /dev/null
+++ b/test/project/SyntaxVersioning/explicit/VersionedDep1/src/VersionedDep1.jl
@@ -0,0 +1,3 @@
+module VersionedDep1
+    const ver = (@Base.Experimental.VERSION).syntax
+end
diff --git a/test/project/SyntaxVersioning/explicit/VersionedDep2/Project.toml b/test/project/SyntaxVersioning/explicit/VersionedDep2/Project.toml
new file mode 100644
index 0000000000000..2876fa5f51e55
--- /dev/null
+++ b/test/project/SyntaxVersioning/explicit/VersionedDep2/Project.toml
@@ -0,0 +1,3 @@
+name = "VersionedDep2"
+uuid = "e127e659-a899-4a00-b565-5b74face18ba"
+syntax.julia_version = "1.14"
diff --git a/test/project/SyntaxVersioning/explicit/VersionedDep2/src/VersionedDep2.jl b/test/project/SyntaxVersioning/explicit/VersionedDep2/src/VersionedDep2.jl
new file mode 100644
index 0000000000000..f3bf4197e66c1
--- /dev/null
+++ b/test/project/SyntaxVersioning/explicit/VersionedDep2/src/VersionedDep2.jl
@@ -0,0 +1,3 @@
+module VersionedDep2
+    const ver = (@Base.Experimental.VERSION).syntax
+end
diff --git a/test/project/SyntaxVersioning/explicit/VersionedDep3/Project.toml b/test/project/SyntaxVersioning/explicit/VersionedDep3/Project.toml
new file mode 100644
index 0000000000000..0fc0cf7d21895
--- /dev/null
+++ b/test/project/SyntaxVersioning/explicit/VersionedDep3/Project.toml
@@ -0,0 +1,3 @@
+name = "VersionedDep3"
+uuid = "6ae26a88-8a9e-4c29-a00a-ea2b9a48e6e1"
+syntax.julia_version = "1.0"
diff --git a/test/project/SyntaxVersioning/explicit/VersionedDep3/src/VersionedDep3.jl b/test/project/SyntaxVersioning/explicit/VersionedDep3/src/VersionedDep3.jl
new file mode 100644
index 0000000000000..7ad86a56308c6
--- /dev/null
+++ b/test/project/SyntaxVersioning/explicit/VersionedDep3/src/VersionedDep3.jl
@@ -0,0 +1,3 @@
+module VersionedDep3
+    const ver = (@Base.Experimental.VERSION).syntax
+end
diff --git a/test/project/SyntaxVersioning/implicit/Versioned1/Project.toml b/test/project/SyntaxVersioning/implicit/Versioned1/Project.toml
new file mode 100644
index 0000000000000..30d8e2686b73f
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned1/Project.toml
@@ -0,0 +1,3 @@
+name = "Versioned1"
+uuid = "5039f352-f8db-42c3-a2c5-1d61ed1e55b8"
+syntax.julia_version = "1.13"
diff --git a/test/project/SyntaxVersioning/implicit/Versioned1/src/Versioned1.jl b/test/project/SyntaxVersioning/implicit/Versioned1/src/Versioned1.jl
new file mode 100644
index 0000000000000..0622ad832c31e
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned1/src/Versioned1.jl
@@ -0,0 +1,3 @@
+module Versioned1
+    const ver = (@Base.Experimental.VERSION).syntax
+end
diff --git a/test/project/SyntaxVersioning/implicit/Versioned2/Project.toml b/test/project/SyntaxVersioning/implicit/Versioned2/Project.toml
new file mode 100644
index 0000000000000..576b6a53d083f
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned2/Project.toml
@@ -0,0 +1,3 @@
+name = "Versioned2"
+uuid = "3a4c0187-8e98-47c1-abc0-783d1a175621"
+syntax.julia_version = "1.14"
diff --git a/test/project/SyntaxVersioning/implicit/Versioned2/src/Versioned2.jl b/test/project/SyntaxVersioning/implicit/Versioned2/src/Versioned2.jl
new file mode 100644
index 0000000000000..0cf90ce3dabde
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned2/src/Versioned2.jl
@@ -0,0 +1,3 @@
+module Versioned2
+    const ver = (@Base.Experimental.VERSION).syntax
+end
diff --git a/test/project/SyntaxVersioning/implicit/Versioned3/Project.toml b/test/project/SyntaxVersioning/implicit/Versioned3/Project.toml
new file mode 100644
index 0000000000000..3522ebf04a699
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned3/Project.toml
@@ -0,0 +1,5 @@
+name = "Versioned3"
+uuid = "06d511b3-69b4-4d20-8d3b-d39263331254"
+
+[compat]
+julia = "1.13 - 2"
diff --git a/test/project/SyntaxVersioning/implicit/Versioned3/src/Versioned3.jl b/test/project/SyntaxVersioning/implicit/Versioned3/src/Versioned3.jl
new file mode 100644
index 0000000000000..72ba851940b88
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned3/src/Versioned3.jl
@@ -0,0 +1,3 @@
+module Versioned3
+    const ver = (@Base.Experimental.VERSION).syntax
+end
diff --git a/test/project/SyntaxVersioning/implicit/Versioned4/Project.toml b/test/project/SyntaxVersioning/implicit/Versioned4/Project.toml
new file mode 100644
index 0000000000000..0bfee425a3cb7
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned4/Project.toml
@@ -0,0 +1,2 @@
+name = "Versioned4"
+uuid = "3a4c0187-8e98-47c1-abc0-783d1a175621"
diff --git a/test/project/SyntaxVersioning/implicit/Versioned4/src/Versioned4.jl b/test/project/SyntaxVersioning/implicit/Versioned4/src/Versioned4.jl
new file mode 100644
index 0000000000000..837e942386fc3
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned4/src/Versioned4.jl
@@ -0,0 +1,3 @@
+module Versioned4
+    const ver = (@Base.Experimental.VERSION).syntax
+end
diff --git a/test/project/SyntaxVersioning/implicit/Versioned5/Project.toml b/test/project/SyntaxVersioning/implicit/Versioned5/Project.toml
new file mode 100644
index 0000000000000..f17617f42ce72
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned5/Project.toml
@@ -0,0 +1,5 @@
+name = "Versioned5"
+uuid = "1805a4d1-8cc9-402d-b9fb-3f94ad9a89b5"
+
+[compat]
+julia = "1.14 - 2"
diff --git a/test/project/SyntaxVersioning/implicit/Versioned5/src/Versioned5.jl b/test/project/SyntaxVersioning/implicit/Versioned5/src/Versioned5.jl
new file mode 100644
index 0000000000000..43a6dbd102291
--- /dev/null
+++ b/test/project/SyntaxVersioning/implicit/Versioned5/src/Versioned5.jl
@@ -0,0 +1,3 @@
+module Versioned5
+    const ver = (@Base.Experimental.VERSION).syntax
+end
diff --git a/test/project/deps/BadStdlibDeps/Manifest.toml b/test/project/deps/BadStdlibDeps/Manifest.toml
new file mode 100644
index 0000000000000..32aaa0b83dc0a
--- /dev/null
+++ b/test/project/deps/BadStdlibDeps/Manifest.toml
@@ -0,0 +1,51 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "dc9d33b0ee13d9466bdb75b8d375808a534a79ec"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
+# This is intentionally missing LibGit2_jll for testing purposes
+[[deps.LibGit2]]
+deps = ["NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.8.0+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.0+1"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.6+1"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
diff --git a/test/project/deps/BadStdlibDeps/Project.toml b/test/project/deps/BadStdlibDeps/Project.toml
new file mode 100644
index 0000000000000..223889185ea15
--- /dev/null
+++ b/test/project/deps/BadStdlibDeps/Project.toml
@@ -0,0 +1,2 @@
+[deps]
+LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
diff --git a/test/project/deps/BadStdlibDeps2/Manifest.toml b/test/project/deps/BadStdlibDeps2/Manifest.toml
new file mode 100644
index 0000000000000..988efc8da56f3
--- /dev/null
+++ b/test/project/deps/BadStdlibDeps2/Manifest.toml
@@ -0,0 +1,54 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "dc9d33b0ee13d9466bdb75b8d375808a534a79ec"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
+[[deps.LibGit2]]
+deps = ["NetworkOptions", "Printf", "SHA", "LibGit2_jll"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+# This is an stdlib but intentionally has a git-tree-sha1 because
+# we are emulating that the manifest comes from a version where
+# LibGit2_jll was not an stdlib
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"]
+git-tree-sha1 = "1111111111111111111111111111111111111111"
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.8.0+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.0+1"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.6+1"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
diff --git a/test/project/deps/BadStdlibDeps2/Project.toml b/test/project/deps/BadStdlibDeps2/Project.toml
new file mode 100644
index 0000000000000..223889185ea15
--- /dev/null
+++ b/test/project/deps/BadStdlibDeps2/Project.toml
@@ -0,0 +1,2 @@
+[deps]
+LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
diff --git a/test/project/deps/CovTest.jl/Project.toml b/test/project/deps/CovTest.jl/Project.toml
new file mode 100644
index 0000000000000..97fb2c7d9cfce
--- /dev/null
+++ b/test/project/deps/CovTest.jl/Project.toml
@@ -0,0 +1,3 @@
+name = "CovTest"
+uuid = "f1f4390d-b815-473a-b5dd-5af6e1d717cb"
+version = "0.1.0"
diff --git a/test/project/deps/CovTest.jl/src/CovTest.jl b/test/project/deps/CovTest.jl/src/CovTest.jl
new file mode 100644
index 0000000000000..bd172fc3a00f4
--- /dev/null
+++ b/test/project/deps/CovTest.jl/src/CovTest.jl
@@ -0,0 +1,26 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module CovTest
+
+function foo()
+    x = 1
+    y = 2
+    z = x * y
+    return z
+end
+
+function bar()
+    x = 1
+    y = 2
+    z = x * y
+    return z
+end
+
+if Base.generating_output()
+    # precompile foo but not bar
+    foo()
+end
+
+export foo, bar
+
+end #module
diff --git a/test/ranges.jl b/test/ranges.jl
index 0e195f5dde24e..9bad12e6692d2 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -3,6 +3,9 @@
 using Base.Checked: checked_length
 using InteractiveUtils: code_llvm
 
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
 @testset "range construction" begin
     @test_throws ArgumentError range(start=1, step=1, stop=2, length=10)
     @test_throws ArgumentError range(start=1, step=1, stop=10, length=11)
@@ -62,6 +65,10 @@ using InteractiveUtils: code_llvm
 
     unitrangeerrstr = "promotion of types Char and Char failed to change any arguments"
     @test_throws unitrangeerrstr UnitRange('a', 'b')
+
+    @test step(false:true) === true # PR 56405
+    @test eltype((false:true) + (Int8(0):Int8(1))) === Int8
+    @test eltype((false:true:true) + (Int8(0):Int8(1))) === Int8
 end
 
 using Dates, Random
@@ -223,7 +230,7 @@ end
             @test cmp_sn2(Tw(xw+yw), astuple(x+y)..., slopbits)
             @test cmp_sn2(Tw(xw-yw), astuple(x-y)..., slopbits)
             @test cmp_sn2(Tw(xw*yw), astuple(x*y)..., slopbits)
-            @test cmp_sn2(Tw(xw/yw), astuple(x/y)..., slopbits)
+            @test cmp_sn2(Tw(xw/yw), astuple(x/y)..., slopbits+1) # extra bit because division is hard
             y = rand(T)
             yw = widen(widen(y))
             @test cmp_sn2(Tw(xw+yw), astuple(x+y)..., slopbits)
@@ -232,9 +239,23 @@ end
             @test cmp_sn2(Tw(xw/yw), astuple(x/y)..., slopbits)
         end
     end
+    @testset "high precision of varying types" begin
+        x = Float32(π)
+        y = Float64(Base.MathConstants.γ)
+        @test Base.mul12(x, y)[1] ≈ Base.mul12(Float64(π), y)[1] rtol=1e-6
+        @test Base.mul12(x, y)[2] ≈ Base.mul12(Float64(π), y)[2] atol=1e-15
+        @test Base.div12(x, y)[1] ≈ Base.div12(Float64(π), y)[1] rtol=1e-6
+        @test Base.div12(x, y)[2] ≈ Base.div12(Float64(π), y)[2] atol=1e-15
+        xtp = Base.TwicePrecision{Float32}(π)
+        ytp = Base.TwicePrecision{Float64}(Base.MathConstants.γ)
+        @test Float32(xtp + ytp) ≈ Float32(Base.TwicePrecision{Float64}(π) + ytp)
+    end
 
     x1 = Base.TwicePrecision{Float64}(1)
     x0 = Base.TwicePrecision{Float64}(0)
+    @test eltype(x1) == Float64
+    @test eltype(typeof(x1)) == Float64
+    @test zero(typeof(x1)) === x0
     xinf = Base.TwicePrecision{Float64}(Inf)
     @test Float64(x1+x0)  == 1
     @test Float64(x1+0)   == 1
@@ -275,15 +296,10 @@ end
 
     rand_twiceprecision(::Type{T}) where {T<:Number} = Base.TwicePrecision{T}(rand(widen(T)))
 
-    rand_twiceprecision_is_ok(::Type{T}) where {T<:Number} = @test !iszero(rand_twiceprecision(T).lo)
-
     # For this test the `BigFloat` mantissa needs to be just a bit
     # larger than the `Float64` mantissa
     setprecision(BigFloat, 70) do
         n = 10
-        @testset "rand twiceprecision is ok" for T ∈ (Float32, Float64), i ∈ 1:n
-            rand_twiceprecision_is_ok(T)
-        end
         @testset "twiceprecision roundtrip is not lossy 1" for i ∈ 1:n
             twiceprecision_roundtrip_is_not_lossy(Float64, rand(BigFloat))
         end
@@ -297,6 +313,13 @@ end
             twiceprecision_is_normalized(Base.TwicePrecision{Float64}(rand_twiceprecision(Float32)))
         end
     end
+
+    @testset "displaying a complex range (#52713)" begin
+        r = 1.0*(1:5) .+ im
+        @test startswith(repr(r), repr(first(r)))
+        @test endswith(repr(r), repr(last(r)))
+        @test occursin(repr(step(r)), repr(r))
+    end
 end
 @testset "ranges" begin
     @test size(10:1:0) == (0,)
@@ -413,17 +436,57 @@ end
     @testset "findfirst" begin
         @test findfirst(==(1), Base.IdentityUnitRange(-1:1)) == 1
         @test findfirst(isequal(3), Base.OneTo(10)) == 3
-        @test findfirst(==(0), Base.OneTo(10)) == nothing
-        @test findfirst(==(11), Base.OneTo(10)) == nothing
+        @test findfirst(==(0), Base.OneTo(10)) === nothing
+        @test findfirst(==(11), Base.OneTo(10)) === nothing
+        @test @inferred((r -> Val(findfirst(iszero, r)))(Base.OneTo(10))) == Val(nothing)
+        @test findfirst(isone, Base.OneTo(10)) === 1
+        @test findfirst(isone, Base.OneTo(0)) === nothing
         @test findfirst(==(4), Int16(3):Int16(7)) === Int(2)
-        @test findfirst(==(2), Int16(3):Int16(7)) == nothing
-        @test findfirst(isequal(8), 3:7) == nothing
+        @test findfirst(==(2), Int16(3):Int16(7)) === nothing
+        @test findfirst(isequal(8), 3:7) === nothing
+        @test findfirst(==(0), UnitRange(-0.5, 0.5)) === nothing
+        @test findfirst(==(2), big(1):big(2)) === 2
         @test findfirst(isequal(7), 1:2:10) == 4
+        @test findfirst(iszero, -5:5) == 6
+        @test findfirst(iszero, 2:5) === nothing
+        @test findfirst(iszero, 6:5) === nothing
+        @test findfirst(isone, -5:5) == 7
+        @test findfirst(isone, 2:5) === nothing
+        @test findfirst(isone, 6:5) === nothing
         @test findfirst(==(7), 1:2:10) == 4
-        @test findfirst(==(10), 1:2:10) == nothing
-        @test findfirst(==(11), 1:2:10) == nothing
+        @test findfirst(==(10), 1:2:10) === nothing
+        @test findfirst(==(11), 1:2:10) === nothing
         @test findfirst(==(-7), 1:-1:-10) == 9
-        @test findfirst(==(2),1:-1:2) == nothing
+        @test findfirst(==(2),1:-1:2) === nothing
+        @test findfirst(iszero, 5:-2:-5) === nothing
+        @test findfirst(iszero, 6:-2:-6) == 4
+        @test findfirst(==(Int128(2)), Int128(1):Int128(1):Int128(4)) === 2
+    end
+    @testset "findlast" begin
+        @test findlast(==(1), Base.IdentityUnitRange(-1:1)) == 1
+        @test findlast(isequal(3), Base.OneTo(10)) == 3
+        @test findlast(==(0), Base.OneTo(10)) === nothing
+        @test findlast(==(11), Base.OneTo(10)) === nothing
+        @test @inferred((() -> Val(findlast(iszero, Base.OneTo(10))))()) == Val(nothing)
+        @test findlast(isone, Base.OneTo(10)) == 1
+        @test findlast(isone, Base.OneTo(0)) === nothing
+        @test findlast(==(4), Int16(3):Int16(7)) === Int(2)
+        @test findlast(==(2), Int16(3):Int16(7)) === nothing
+        @test findlast(isequal(8), 3:7) === nothing
+        @test findlast(==(0), UnitRange(-0.5, 0.5)) === nothing
+        @test findlast(==(2), big(1):big(2)) === 2
+        @test findlast(isequal(7), 1:2:10) == 4
+        @test findlast(iszero, -5:5) == 6
+        @test findlast(iszero, 2:5) === nothing
+        @test findlast(iszero, 6:5) === nothing
+        @test findlast(==(7), 1:2:10) == 4
+        @test findlast(==(10), 1:2:10) === nothing
+        @test findlast(==(11), 1:2:10) === nothing
+        @test findlast(==(-7), 1:-1:-10) == 9
+        @test findlast(==(2),1:-1:2) === nothing
+        @test findlast(iszero, 5:-2:-5) === nothing
+        @test findlast(iszero, 6:-2:-6) == 4
+        @test findlast(==(Int128(2)), Int128(1):Int128(1):Int128(4)) === 2
     end
     @testset "reverse" begin
         @test reverse(reverse(1:10)) == 1:10
@@ -545,6 +608,13 @@ end
         @test sort(1:10, rev=true) == 10:-1:1
         @test sort(-3:3, by=abs) == [0,-1,1,-2,2,-3,3]
         @test partialsort(1:10, 4) == 4
+
+        @testset "offset ranges" begin
+            x = OffsetArrays.IdOffsetRange(values=4:13, indices=4:13)
+            @test sort(x) === x === sort!(x)
+            @test sortperm(x) == eachindex(x)
+            @test issorted(x[sortperm(x)])
+        end
     end
     @testset "in" begin
         @test 0 in UInt(0):100:typemax(UInt)
@@ -629,13 +699,44 @@ end
         @test Duck(4) ∈ Duck(1):Duck(5)
         @test Duck(0) ∉ Duck(1):Duck(5)
     end
+    @testset "unique" begin
+        struct MyStepRangeLen{T,R} <: AbstractRange{T}
+           x :: R
+        end
+        MyStepRangeLen(s::StepRangeLen{T}) where {T} = MyStepRangeLen{T,typeof(s)}(s)
+        Base.first(s::MyStepRangeLen) = first(s.x)
+        Base.last(s::MyStepRangeLen) = last(s.x)
+        Base.length(s::MyStepRangeLen) = length(s.x)
+        Base.step(s::MyStepRangeLen) = step(s.x)
+        sr = StepRangeLen(1,0,4)
+        @test unique(MyStepRangeLen(sr)) == unique(sr)
+    end
 end
 @testset "indexing range with empty range (#4309)" begin
-    @test (3:6)[5:4] === 7:6
+    @test (@inferred (3:6)[5:4]) === 7:6
     @test_throws BoundsError (3:6)[5:5]
     @test_throws BoundsError (3:6)[5]
-    @test (0:2:10)[7:6] === 12:2:10
+    @test (@inferred (0:2:10)[7:6]) === 12:2:11
     @test_throws BoundsError (0:2:10)[7:7]
+
+    for start in [true], stop in [true, false]
+        @test (@inferred (start:stop)[1:0]) === true:false
+    end
+    @test (@inferred (true:false)[true:false]) == true:false
+
+    @testset "issue #40760" begin
+        empty_range = 1:0
+        r = range(false, length = 0)
+        @test r isa UnitRange && first(r) == 0 && last(r) == -1
+        r = (true:true)[empty_range]
+        @test r isa UnitRange && first(r) == true && last(r) == false
+        @testset for r in Any[true:true, true:true:true, 1:2, 1:1:2]
+            @test (@inferred r[1:0]) isa AbstractRange
+            @test r[1:0] == empty_range
+            @test (@inferred r[1:1:0]) isa AbstractRange
+            @test r[1:1:0] == empty_range
+        end
+    end
 end
 # indexing with negative ranges (#8351)
 for a=AbstractRange[3:6, 0:2:10], b=AbstractRange[0:1, 2:-1:0]
@@ -654,6 +755,8 @@ end
         @test length(typemin(T):typemax(T)) == T(0)
         @test length(zero(T):one(T):typemax(T)) == typemin(T)
         @test length(typemin(T):one(T):typemax(T)) == T(0)
+        @test length(StepRange{T,BigInt}(zero(T), 1, typemax(T))) == typemin(T)
+        @test length(StepRange{T,BigInt}(typemin(T), 1, typemax(T))) == T(0)
         @test_throws OverflowError checked_length(zero(T):typemax(T))
         @test_throws OverflowError checked_length(typemin(T):typemax(T))
         @test_throws OverflowError checked_length(zero(T):one(T):typemax(T))
@@ -1007,6 +1110,7 @@ end
         end
         a = prevfloat(a)
     end
+    @test (1:2:3)[StepRangeLen{Bool}(true,-1,2)] == [1]
 end
 
 # issue #20380
@@ -1209,6 +1313,20 @@ end
     @test convert(StepRange, 0:5) === 0:1:5
     @test convert(StepRange{Int128,Int128}, 0.:5) === Int128(0):Int128(1):Int128(5)
 
+    @test StepRange(1:1:4) === 1:1:4
+    @test StepRange{Int32}(1:1:4) === StepRange{Int32,Int}(1,1,4)
+
+    struct MyStepRange57718{T,S} <: OrdinalRange{T,S}
+        r :: StepRange{T,S}
+    end
+    Base.first(mr::MyStepRange57718) = first(mr.r)
+    Base.last(mr::MyStepRange57718) = last(mr.r)
+    Base.step(mr::MyStepRange57718) = step(mr.r)
+    Base.length(mr::MyStepRange57718) = length(mr.r)
+
+    @test StepRange(MyStepRange57718(1:1:4)) === 1:1:4
+    @test StepRange{Int32}(MyStepRange57718(1:1:4)) === StepRange{Int32,Int}(1,1,4)
+
     @test_throws ArgumentError StepRange(1.1,1,5.1)
 
     @test promote(0f0:inv(3f0):1f0, 0.:2.:5.) === (0:1/3:1, 0.:2.:5.)
@@ -1286,6 +1404,8 @@ end
 
     @test sprint(show, UnitRange(1, 2)) == "1:2"
     @test sprint(show, StepRange(1, 2, 5)) == "1:2:5"
+
+    @test sprint(show, LinRange{Float32}(1.5, 2.5, 10)) == "LinRange{Float32}(1.5, 2.5, 10)"
 end
 
 @testset "Issue 11049, and related" begin
@@ -1462,8 +1582,8 @@ end
             (range(10, stop=20, length=5), 1, 5),
             (range(10.3, step=-2, length=7), 7, 1),
            ]
-        @test minimum(r) === r[imin]
-        @test maximum(r) === r[imax]
+        @test minimum(r) === minimum(r, init=typemax(eltype(r))) === r[imin]
+        @test maximum(r) === maximum(r, init=typemin(eltype(r))) === r[imax]
         @test imin === argmin(r)
         @test imax === argmax(r)
         @test extrema(r) === (r[imin], r[imax])
@@ -1491,6 +1611,9 @@ end
         @test size(r) == (3,)
         @test step(r) == 1
         @test first(r) == 1
+        @test first(r,2) === Base.OneTo(2)
+        @test first(r,20) === r
+        @test_throws ArgumentError first(r,-20)
         @test last(r) == 3
         @test minimum(r) == 1
         @test maximum(r) == 3
@@ -1522,6 +1645,9 @@ end
         @test findall(in(2:(length(r) - 1)), r) === 2:(length(r) - 1)
         @test findall(in(r), 2:(length(r) - 1)) === 1:(length(r) - 2)
     end
+    let r = Base.OneTo(Int8(4))
+        @test first(r,4) === r
+    end
     @test convert(Base.OneTo, 1:2) === Base.OneTo{Int}(2)
     @test_throws ArgumentError("first element must be 1, got 2") convert(Base.OneTo, 2:3)
     @test_throws ArgumentError("step must be 1, got 2") convert(Base.OneTo, 1:2:5)
@@ -1802,6 +1928,7 @@ Base.div(x::Displacement, y::Displacement) = Displacement(div(x.val, y.val))
 # required for collect (summing lengths); alternatively, should length return Int by default?
 Base.promote_rule(::Type{Displacement}, ::Type{Int}) = Int
 Base.convert(::Type{Int}, x::Displacement) = x.val
+Base.Int(x::Displacement) = x.val
 
 # Unsigned complement, for testing checked_length
 struct UPosition <: Unsigned
@@ -1959,8 +2086,10 @@ end
 end
 
 @testset "allocation of TwicePrecision call" begin
-    @test @allocated(0:286.493442:360) == 0
-    @test @allocated(0:286:360) == 0
+    let
+        @test @allocated(0:286.493442:360) == 0
+        @test @allocated(0:286:360) == 0
+    end
 end
 
 @testset "range with start and stop" begin
@@ -2302,6 +2431,7 @@ end
     @test_throws BoundsError r[true:true:false]
     @test_throws BoundsError r[true:true:true]
 end
+
 @testset "Non-Int64 endpoints that are identical (#39798)" begin
     for T in DataType[Float16,Float32,Float64,Bool,Int8,Int16,Int32,Int64,Int128,UInt8,UInt16,UInt32,UInt64,UInt128],
         r in [ LinRange(1, 1, 10), StepRangeLen(7, 0, 5) ]
@@ -2328,13 +2458,46 @@ end
     @test 0.2 * (-2:2:2) == [-0.4, 0, 0.4]
 end
 
-@testset "Indexing OneTo with IdentityUnitRange" begin
-    for endpt in Any[10, big(10), UInt(10)]
-        r = Base.OneTo(endpt)
-        inds = Base.IdentityUnitRange(3:5)
-        rs = r[inds]
-        @test rs === inds
-        @test_throws BoundsError r[Base.IdentityUnitRange(-1:100)]
+@testset "IdentityUnitRange indexing" begin
+    @testset "Indexing into an IdentityUnitRange" begin
+        @testset for r in Any[-1:20, Base.OneTo(20)]
+            ri = Base.IdentityUnitRange(r)
+            @test_throws "invalid index" ri[true]
+            @testset for s in Any[Base.OneTo(6), Base.OneTo{BigInt}(6), 3:6, big(3):big(6), 3:2:7]
+                @test mapreduce(==, &, ri[s], ri[s[begin]]:step(s):ri[s[end]])
+                @test axes(ri[s]) == axes(s)
+                @test eltype(ri[s]) == eltype(ri)
+            end
+        end
+        @testset "Bool indices" begin
+            r = 1:1
+            @test Base.IdentityUnitRange(r)[true:true] == r[true:true]
+            @test Base.IdentityUnitRange(r)[true:true:true] == r[true:true:true]
+            @test_throws BoundsError Base.IdentityUnitRange(1:2)[true:true]
+            @test_throws BoundsError Base.IdentityUnitRange(1:2)[true:true:true]
+        end
+    end
+    @testset "Indexing with IdentityUnitRange" begin
+        @testset "OneTo" begin
+            @testset for endpt in Any[10, big(12), UInt(11)]
+                r = Base.OneTo(endpt)
+                inds = Base.IdentityUnitRange(3:5)
+                rs = r[inds]
+                @test rs == inds
+                @test axes(rs) == axes(inds)
+                @test_throws BoundsError r[Base.IdentityUnitRange(-1:100)]
+            end
+        end
+        @testset "IdentityUnitRange" begin
+            @testset for r in Any[Base.IdentityUnitRange(1:4), Base.IdentityUnitRange(Base.OneTo(4)), Base.Slice(1:4), Base.Slice(Base.OneTo(4))]
+                @testset for s in Any[Base.IdentityUnitRange(3:3), Base.IdentityUnitRange(Base.OneTo(2)), Base.Slice(3:3), Base.Slice(Base.OneTo(2))]
+                    rs = r[s]
+                    @test rs == s
+                    @test axes(rs) == axes(s)
+                end
+                @test_throws BoundsError r[Base.IdentityUnitRange(first(r):last(r) + 1)]
+            end
+        end
     end
 end
 
@@ -2498,3 +2661,173 @@ end
     # a case that using mul_with_overflow & add_with_overflow might get wrong:
     @test (-10:2:typemax(Int))[typemax(Int)÷2+2] == typemax(Int)-9
 end
+
+@testset "collect with specialized vcat" begin
+    struct OneToThree <: AbstractUnitRange{Int} end
+    Base.size(r::OneToThree) = (3,)
+    Base.first(r::OneToThree) = 1
+    Base.length(r::OneToThree) = 3
+    Base.last(r::OneToThree) = 3
+    function Base.getindex(r::OneToThree, i::Int)
+        checkbounds(r, i)
+        i
+    end
+    Base.vcat(r::OneToThree) = r
+    r = OneToThree()
+    a = Array(r)
+    @test a isa Vector{Int}
+    @test a == r
+    @test collect(r) isa Vector{Int}
+    @test collect(r) == r
+end
+
+@testset "isassigned" begin
+    for (r, val) in ((1:3, 3), (1:big(2)^65, big(2)^65))
+        @test isassigned(r, lastindex(r))
+        # test that the indexing actually succeeds
+        @test r[end] == val
+        @test_throws ArgumentError isassigned(r, true)
+    end
+
+end
+
+@testset "unsigned index #44895" begin
+    x = range(-1,1,length=11)
+    @test x[UInt(1)] == -1.0
+    a = StepRangeLen(1,2,3,2)
+    @test a[UInt(1)] == -1
+end
+
+@testset "StepRangeLen of CartesianIndex-es" begin
+    CIstart = CartesianIndex(2,3)
+    CIstep = CartesianIndex(1,1)
+    r = StepRangeLen(CIstart, CIstep, 4)
+    @test length(r) == 4
+    @test first(r) == CIstart
+    @test step(r) == CIstep
+    @test last(r) == CartesianIndex(5,6)
+    @test r[2] == CartesianIndex(3,4)
+
+    @test repr(r) == "StepRangeLen($CIstart, $CIstep, 4)"
+
+    r = StepRangeLen(CartesianIndex(), CartesianIndex(), 3)
+    @test all(==(CartesianIndex()), r)
+    @test length(r) == 3
+    @test repr(r) == "StepRangeLen(CartesianIndex(), CartesianIndex(), 3)"
+
+    errmsg = ("deliberately unsupported for CartesianIndex", "StepRangeLen")
+    @test_throws errmsg range(CartesianIndex(1), step=CartesianIndex(1), length=3)
+end
+
+@testset "logrange" begin
+    # basic idea
+    @test logrange(2, 16, 4) ≈ [2, 4, 8, 16]
+    @test logrange(1/8, 8.0, 7) ≈ [0.125, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0]
+    @test logrange(1000, 1, 4) ≈ [1000, 100, 10, 1]
+    @test logrange(1, 10^9, 19)[1:2:end] ≈ 10 .^ (0:9)
+
+    # endpoints
+    @test logrange(0.1f0, 100, 33)[1] === 0.1f0
+    @test logrange(0.789, 123_456, 135_790)[[begin, end]] == [0.789, 123_456]
+    @test logrange(nextfloat(0f0), floatmax(Float32), typemax(Int))[end] === floatmax(Float32)
+    @test logrange(nextfloat(Float16(0)), floatmax(Float16), 66_000)[end] === floatmax(Float16)
+    @test first(logrange(pi, 2pi, 3000)) === logrange(pi, 2pi, 3000)[1] === Float64(pi)
+    if Int == Int64
+        @test logrange(0.1, 1000, 2^54)[end] === 1000.0
+    end
+
+    # empty, only, constant
+    @test first(logrange(1, 2, 0)) === 1.0
+    @test last(logrange(1, 2, 0)) === 2.0
+    @test collect(logrange(1, 2, 0)) == Float64[]
+    @test only(logrange(2pi, 2pi, 1)) === logrange(2pi, 2pi, 1)[1] === 2pi
+    @test logrange(1, 1, 3) == fill(1.0, 3)
+
+    # subnormal Float64
+    x = logrange(1e-320, 1e-300, 21) .* 1e300
+    @test x ≈ logrange(1e-20, 1, 21) rtol=1e-6
+
+    # types
+    @test eltype(logrange(1, 10, 3)) == Float64
+    @test eltype(logrange(1, 10, Int32(3))) == Float64
+    @test eltype(logrange(1, 10f0, 3)) == Float32
+    @test eltype(logrange(1f0, 10, 3)) == Float32
+    @test eltype(logrange(1, big(10), 3)) == BigFloat
+    @test logrange(big"0.3", big(pi), 50)[1] == big"0.3"
+    @test logrange(big"0.3", big(pi), 50)[end] == big(pi)
+
+    # more constructors
+    @test logrange(1,2,length=3) === Base.LogRange(1,2,3) == Base.LogRange{Float64}(1,2,3)
+    @test logrange(1f0, 2f0, length=3) == Base.LogRange{Float32}(1,2,3)
+
+    # errors
+    @test_throws UndefKeywordError logrange(1, 10)  # no default length
+    @test_throws ArgumentError logrange(1, 10, -1)  # negative length
+    @test_throws ArgumentError logrange(1, 10, 1) # endpoints must not differ
+    @test_throws DomainError logrange(1, -1, 3)   # needs complex numbers
+    @test_throws DomainError logrange(-1, -2, 3)  # not supported, for now
+    @test_throws MethodError logrange(1, 2+3im, length=4)  # not supported, for now
+    @test_throws ArgumentError logrange(1, 10, 2)[true]  # bad index
+    @test_throws BoundsError logrange(1, 10, 2)[3]
+    @test_throws ArgumentError Base.LogRange{Int}(1,4,5)  # no integer ranges
+    @test_throws MethodError Base.LogRange(1,4, length=5)  # type does not take keyword
+    # (not sure if these should ideally be DomainError or ArgumentError)
+    @test_throws DomainError logrange(1, Inf, 3)
+    @test_throws DomainError logrange(0, 2, 3)
+    @test_throws DomainError logrange(1, NaN, 3)
+    @test_throws DomainError logrange(NaN, 2, 3)
+
+    # printing
+    @test repr(Base.LogRange(1,2,3)) == "LogRange{Float64}(1.0, 2.0, 3)"  # like 2-arg show
+    @test repr("text/plain", Base.LogRange(1,2,3)) == "3-element Base.LogRange{Float64, Base.TwicePrecision{Float64}}:\n 1.0, 1.41421, 2.0"
+    @test repr("text/plain", Base.LogRange(1,2,0)) == "LogRange{Float64}(1.0, 2.0, 0)"  # empty case
+end
+
+@testset "_log_twice64_unchecked" begin
+    # it roughly works
+    @test big(Base._log_twice64_unchecked(exp(1))) ≈ 1.0
+    @test big(Base._log_twice64_unchecked(exp(123))) ≈ 123.0
+
+    # it gets high accuracy
+    @test abs(big(log(4.0)) - log(big(4.0))) < 1e-16
+    @test abs(big(Base._log_twice64_unchecked(4.0)) - log(big(4.0))) < 1e-30
+
+    # it handles subnormals
+    @test abs(big(Base._log_twice64_unchecked(1e-310)) - log(big(1e-310))) < 1e-20
+
+    # it accepts negative, NaN, etc without complaint:
+    @test Base._log_twice64_unchecked(-0.0).lo isa Float64
+    @test Base._log_twice64_unchecked(-1.23).lo isa Float64
+    @test Base._log_twice64_unchecked(NaN).lo isa Float64
+    @test Base._log_twice64_unchecked(Inf).lo isa Float64
+end
+
+@testset "OneTo promotion" begin
+    struct MyUnitRange{T} <: AbstractUnitRange{T}
+        range::UnitRange{T}
+    end
+    Base.first(r::MyUnitRange) = first(r.range)
+    Base.last(r::MyUnitRange) = last(r.range)
+    Base.size(r::MyUnitRange) = size(r.range)
+    Base.length(r::MyUnitRange) = length(r.range)
+    Base.getindex(r::MyUnitRange, i::Int) = getindex(r.range, i)
+    @test promote(MyUnitRange(2:3), Base.OneTo(3)) == (2:3, 1:3)
+    @test promote(MyUnitRange(UnitRange(3.0, 4.0)), Base.OneTo(3)) == (3.0:4.0, 1.0:3.0)
+end
+
+@testset "StepRange(::StepRangeLen)" begin
+    ind = StepRangeLen(2, -1, 2)
+    @test StepRange(ind) == ind
+    @test StepRange(ind) isa StepRange{eltype(ind), typeof(step(ind))}
+    @test StepRange{Int8}(ind) == ind
+    @test StepRange{Int8}(ind) isa StepRange{Int8}
+    @test StepRange{Int8,Int8}(ind) == ind
+    @test StepRange{Int8,Int8}(ind) isa StepRange{Int8,Int8}
+
+    r = StepRangeLen(3, 0, 4)
+    @test_throws "step cannot be zero" StepRange(r)
+
+    r = StepRangeLen(Date(2020,1,1), Day(1), 4)
+    @test StepRange(r) == r
+    @test StepRange(r) isa StepRange{Date,Day}
+end
diff --git a/test/rational.jl b/test/rational.jl
index 4b29618bd15e0..93b049f22f465 100644
--- a/test/rational.jl
+++ b/test/rational.jl
@@ -28,8 +28,8 @@ using Test
     @test (1//typemax(Int)) / (1//typemax(Int)) == 1
     @test_throws OverflowError (1//2)^63
     @test inv((1+typemin(Int))//typemax(Int)) == -1
-    @test_throws ArgumentError inv(typemin(Int)//typemax(Int))
-    @test_throws ArgumentError Rational(0x1, typemin(Int32))
+    @test_throws OverflowError inv(typemin(Int)//typemax(Int))
+    @test_throws OverflowError Rational(0x1, typemin(Int32))
 
     @test @inferred(rationalize(Int, 3.0, 0.0)) === 3//1
     @test @inferred(rationalize(Int, 3.0, 0)) === 3//1
@@ -43,15 +43,63 @@ using Test
     # issue 26823
     @test_throws InexactError rationalize(Int, NaN)
     # issue 32569
-    @test_throws ArgumentError 1 // typemin(Int)
+    @test_throws OverflowError 1 // typemin(Int)
     @test_throws ArgumentError 0 // 0
     @test -2 // typemin(Int) == -1 // (typemin(Int) >> 1)
     @test 2 // typemin(Int) == 1 // (typemin(Int) >> 1)
+    # issue 32443
+    @test Int8(-128)//Int8(1) == -128
+    @test_throws OverflowError Int8(-128)//Int8(-1)
+    @test_throws OverflowError Int8(-1)//Int8(-128)
+    @test Int8(-128)//Int8(-2) == 64
+    # issue 51731
+    @test Rational{Int8}(-128) / Rational{Int8}(-128) === Rational{Int8}(1)
+    # issue 51731
+    @test Rational{Int8}(-128) / Rational{Int8}(0) === Rational{Int8}(-1, 0)
+    @test Rational{Int8}(0) / Rational{Int8}(-128) === Rational{Int8}(0, 1)
 
     @test_throws InexactError Rational(UInt(1), typemin(Int32))
     @test iszero(Rational{Int}(UInt(0), 1))
     @test Rational{BigInt}(UInt(1), Int(-1)) == -1
-    @test_broken Rational{Int64}(UInt(1), typemin(Int32)) == Int64(1) // Int64(typemin(Int32))
+    @test Rational{Int64}(UInt(1), typemin(Int32)) == Int64(1) // Int64(typemin(Int32))
+
+    @testset "Rational{T} constructor with concrete T" begin
+        test_types = [Bool, Int8, Int64, Int128, UInt8, UInt64, UInt128, BigInt]
+        test_values = Any[
+            Any[zero(T) for T in test_types];
+            Any[one(T) for T in test_types];
+            big(-1);
+            collect(Iterators.flatten(
+                (T(j) for T in (Int8, Int64, Int128)) for j in [-3:-1; -128:-126;]
+            ));
+            collect(Iterators.flatten(
+                (T(j) for T in (Int8, Int64, Int128, UInt8, UInt64, UInt128)) for j in [2:3; 126:127;]
+            ));
+            Any[typemax(T) for T in (Int64, Int128, UInt8, UInt64, UInt128)];
+            Any[typemax(T)-one(T) for T in (Int64, Int128, UInt8, UInt64, UInt128)];
+            Any[typemin(T) for T in (Int64, Int128)];
+            Any[typemin(T)+one(T) for T in (Int64, Int128)];
+        ]
+        for x in test_values, y in test_values
+            local big_r = iszero(x) && iszero(y) ? nothing : big(x) // big(y)
+            for T in test_types
+                if iszero(x) && iszero(y)
+                    @test_throws Exception Rational{T}(x, y)
+                elseif Base.hastypemax(T)
+                    local T_range = typemin(T):typemax(T)
+                    if numerator(big_r) ∈ T_range && denominator(big_r) ∈ T_range
+                        @test big_r == Rational{T}(x, y)
+                        @test Rational{T} == typeof(Rational{T}(x, y))
+                    else
+                        @test_throws Exception Rational{T}(x, y)
+                    end
+                else
+                    @test big_r == Rational{T}(x, y)
+                    @test Rational{T} == typeof(Rational{T}(x, y))
+                end
+            end
+        end
+    end
 
     for a = -5:5, b = -5:5
         if a == b == 0; continue; end
@@ -155,6 +203,29 @@ end
     end
 
     @test Rational(rand_int, 3)/Complex(3, 2) == Complex(Rational(rand_int, 13), -Rational(rand_int*2, 39))
+    @test (1//1) / complex(0, 1) === 0//1 - 1//1*im
+    @test (0//1) / complex(0, 1) === 0//1 + 0//1*im
+    @test (0//1) / complex(1, 0) === 0//1 + 0//1*im
+    @test (0//1) / complex(1, 1) === 0//1 + 0//1*im
+    @test (1//1) / complex(1, 1) === 1//2 - 1//2*im
+    @test (0//1) / complex(1//1, 1//1) === 0//1 + 0//1*im
+    @test (1//1) / complex(1//1, 1//1) === 1//2 - 1//2*im
+    @test (0//1) / complex(1//0, 0//1) === 0//1 + 0//1*im
+    @test (1//1) / complex(1//1, 1//0) === 0//1 + 0//1*im
+    @test_throws DivideError (0//1) / complex(0, 0)
+    @test_throws DivideError (1//1) / complex(0, 0)
+    @test_throws DivideError (1//0) / complex(0, 0)
+    @test_throws DivideError complex(1//0) // complex(1//0, 1//0)
+    @test_throws DivideError 1 // complex(0, 0)
+    @test_throws DivideError 0 // complex(0, 0)
+    @test_throws DivideError complex(1) // complex(0, 0)
+    @test_throws DivideError complex(0) // complex(0, 0)
+
+    # 1//200 - 1//200*im cannot be represented as Complex{Rational{Int8}}
+    @test_throws OverflowError (Int8(1)//Int8(1)) / (Int8(100) + Int8(100)im)
+    @test_throws OverflowError (Int8(1)//Int8(1)) // (Int8(100) + Int8(100)im)
+    @test_throws OverflowError Int8(1) // (Int8(100) + Int8(100)im)
+    @test_throws OverflowError complex(Int8(1)) // (Int8(100) + Int8(100)im)
 
     @test Complex(rand_int, 0) == Rational(rand_int)
     @test Rational(rand_int) == Complex(rand_int, 0)
@@ -180,6 +251,14 @@ end
             end
         end
     end
+    @testset "exact division by an infinite complex number" begin
+        for y ∈ (1 // 0, -1 // 0)
+            @test (7 // complex(y)) == 0
+            @test (Rational(7) // complex(y)) == 0
+            @test (complex(7) // complex(y)) == 0
+            @test (complex(Rational(7)) // complex(y)) == 0
+        end
+    end
 end
 
 # check type of constructed rationals
@@ -537,6 +616,7 @@ end
              100798//32085
              103993//33102
              312689//99532 ]
+    @test rationalize(pi) === rationalize(BigFloat(pi))
 end
 
 @testset "issue #12536" begin
@@ -562,6 +642,10 @@ end
 # issue #16282
 @test_throws MethodError 3 // 4.5im
 
+# issue #60137
+@test_throws MethodError 3.0 // (1 + 0im)
+@test_throws MethodError 3.0 // (1//0 + 0im)
+
 # issue #31396
 @test round(1//2, RoundNearestTiesUp) === 1//1
 
@@ -600,49 +684,49 @@ end
         @test lcm(a, T(0)//T(1)) === T(0)//T(1)
         @test gcdx(a, T(0)//T(1)) === (a, T(1), T(0))
 
-        @test gcdx(T(1)//T(0), T(1)//T(2)) === (T(1)//T(0), T(1), T(0))
-        @test gcdx(T(1)//T(2), T(1)//T(0)) === (T(1)//T(0), T(0), T(1))
-        @test gcdx(T(1)//T(0), T(1)//T(1)) === (T(1)//T(0), T(1), T(0))
-        @test gcdx(T(1)//T(1), T(1)//T(0)) === (T(1)//T(0), T(0), T(1))
+        @test_throws ArgumentError gcdx(T(1)//T(0), T(1)//T(2))
+        @test_throws ArgumentError gcdx(T(1)//T(2), T(1)//T(0))
+        @test_throws ArgumentError gcdx(T(1)//T(0), T(1)//T(1))
+        @test_throws ArgumentError gcdx(T(1)//T(1), T(1)//T(0))
         @test gcdx(T(1)//T(0), T(1)//T(0)) === (T(1)//T(0), T(1), T(1))
-        @test gcdx(T(1)//T(0), T(0)//T(1)) === (T(1)//T(0), T(1), T(0))
-        @test gcdx(T(0)//T(1), T(0)//T(1)) === (T(0)//T(1), T(1), T(0))
+        @test_throws ArgumentError gcdx(T(1)//T(0), T(0)//T(1))
+        @test gcdx(T(0)//T(1), T(0)//T(1)) === (T(0)//T(1), T(0), T(0))
 
         if T <: Signed
-            @test gcdx(T(-1)//T(0), T(1)//T(2)) === (T(1)//T(0), T(1), T(0))
-            @test gcdx(T(1)//T(2), T(-1)//T(0)) === (T(1)//T(0), T(0), T(1))
-            @test gcdx(T(-1)//T(0), T(1)//T(1)) === (T(1)//T(0), T(1), T(0))
-            @test gcdx(T(1)//T(1), T(-1)//T(0)) === (T(1)//T(0), T(0), T(1))
+            @test_throws ArgumentError gcdx(T(-1)//T(0), T(1)//T(2))
+            @test_throws ArgumentError gcdx(T(1)//T(2), T(-1)//T(0))
+            @test_throws ArgumentError gcdx(T(-1)//T(0), T(1)//T(1))
+            @test_throws ArgumentError gcdx(T(1)//T(1), T(-1)//T(0))
             @test gcdx(T(-1)//T(0), T(1)//T(0)) === (T(1)//T(0), T(1), T(1))
             @test gcdx(T(1)//T(0), T(-1)//T(0)) === (T(1)//T(0), T(1), T(1))
             @test gcdx(T(-1)//T(0), T(-1)//T(0)) === (T(1)//T(0), T(1), T(1))
-            @test gcdx(T(-1)//T(0), T(0)//T(1)) === (T(1)//T(0), T(1), T(0))
-            @test gcdx(T(0)//T(1), T(-1)//T(0)) === (T(1)//T(0), T(0), T(1))
+            @test_throws ArgumentError gcdx(T(-1)//T(0), T(0)//T(1))
+            @test_throws ArgumentError gcdx(T(0)//T(1), T(-1)//T(0))
         end
 
         @test gcdx(T(1)//T(3), T(2)) === (T(1)//T(3), T(1), T(0))
         @test lcm(T(1)//T(3), T(1)) === T(1)//T(1)
-        @test lcm(T(3)//T(1), T(1)//T(0)) === T(3)//T(1)
-        @test lcm(T(0)//T(1), T(1)//T(0)) === T(0)//T(1)
+        @test_throws ArgumentError lcm(T(3)//T(1), T(1)//T(0))
+        @test_throws ArgumentError lcm(T(0)//T(1), T(1)//T(0))
 
-        @test lcm(T(1)//T(0), T(1)//T(2)) === T(1)//T(2)
-        @test lcm(T(1)//T(2), T(1)//T(0)) === T(1)//T(2)
-        @test lcm(T(1)//T(0), T(1)//T(1)) === T(1)//T(1)
-        @test lcm(T(1)//T(1), T(1)//T(0)) === T(1)//T(1)
+        @test_throws ArgumentError lcm(T(1)//T(0), T(1)//T(2))
+        @test_throws ArgumentError lcm(T(1)//T(2), T(1)//T(0))
+        @test_throws ArgumentError lcm(T(1)//T(0), T(1)//T(1))
+        @test_throws ArgumentError lcm(T(1)//T(1), T(1)//T(0))
         @test lcm(T(1)//T(0), T(1)//T(0)) === T(1)//T(0)
-        @test lcm(T(1)//T(0), T(0)//T(1)) === T(0)//T(1)
+        @test_throws ArgumentError lcm(T(1)//T(0), T(0)//T(1))
         @test lcm(T(0)//T(1), T(0)//T(1)) === T(0)//T(1)
 
         if T <: Signed
-            @test lcm(T(-1)//T(0), T(1)//T(2)) === T(1)//T(2)
-            @test lcm(T(1)//T(2), T(-1)//T(0)) === T(1)//T(2)
-            @test lcm(T(-1)//T(0), T(1)//T(1)) === T(1)//T(1)
-            @test lcm(T(1)//T(1), T(-1)//T(0)) === T(1)//T(1)
+            @test_throws ArgumentError lcm(T(-1)//T(0), T(1)//T(2))
+            @test_throws ArgumentError lcm(T(1)//T(2), T(-1)//T(0))
+            @test_throws ArgumentError lcm(T(-1)//T(0), T(1)//T(1))
+            @test_throws ArgumentError lcm(T(1)//T(1), T(-1)//T(0))
             @test lcm(T(-1)//T(0), T(1)//T(0)) === T(1)//T(0)
             @test lcm(T(1)//T(0), T(-1)//T(0)) === T(1)//T(0)
             @test lcm(T(-1)//T(0), T(-1)//T(0)) === T(1)//T(0)
-            @test lcm(T(-1)//T(0), T(0)//T(1)) === T(0)//T(1)
-            @test lcm(T(0)//T(1), T(-1)//T(0)) === T(0)//T(1)
+            @test_throws ArgumentError lcm(T(-1)//T(0), T(0)//T(1))
+            @test_throws ArgumentError lcm(T(0)//T(1), T(-1)//T(0))
         end
 
         @test gcd([T(5), T(2), T(1)//T(2)]) === T(1)//T(2)
@@ -650,9 +734,28 @@ end
 
         @test lcm([T(5), T(2), T(1)//T(2)]) === T(10)//T(1)
         @test lcm(T(5), T(2), T(1)//T(2)) === T(10)//T(1)
+
+        @test_throws ArgumentError gcd(T(1)//T(1), T(1)//T(0))
+        @test_throws ArgumentError gcd(T(1)//T(0), T(0)//T(1))
     end
 end
 
+@testset "gcdx for 1 and 3+ arguments" begin
+    # one-argument
+    @test gcdx(7) == (7, 1)
+    @test gcdx(-7) == (7, -1)
+    @test gcdx(1//4) == (1//4, 1)
+
+    # 3+ arguments
+    @test gcdx(2//3) == gcdx(2//3) == (2//3, 1)
+    @test gcdx(15, 12, 20) == (1, 7, -7, -1)
+    @test gcdx(60//4, 60//5, 60//3) == (1//1, 7, -7, -1)
+    abcd = (105, 1638, 2145, 3185)
+    d, uvwp... = gcdx(abcd...)
+    @test d == sum(abcd .* uvwp) # u*a + v*b + w*c + p*d == gcd(a, b, c, d)
+    @test (@inferred gcdx(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) isa NTuple{11, Int}
+end
+
 @testset "Binary operations with Integer" begin
     @test 1//2 - 1 == -1//2
     @test -1//2 + 1 == 1//2
@@ -715,6 +818,19 @@ end
 
 @testset "Rational{T} with non-concrete T (issue #41222)" begin
     @test @inferred(Rational{Integer}(2,3)) isa Rational{Integer}
+    @test @inferred(Rational{Unsigned}(2,3)) isa Rational{Unsigned}
+    @test @inferred(Rational{Signed}(2,3)) isa Rational{Signed}
+    @test_throws InexactError Rational{Unsigned}(-1,1)
+    @test_throws InexactError Rational{Unsigned}(-1)
+    @test Rational{Unsigned}(Int8(-128), Int8(-128)) === Rational{Unsigned}(0x01, 0x01)
+    @test Rational{Unsigned}(Int8(-128), Int8(-1)) === Rational{Unsigned}(0x80, 0x01)
+    @test Rational{Unsigned}(Int8(0), Int8(-128)) === Rational{Unsigned}(0x00, 0x01)
+    # Numerator and denominator should have the same type.
+    @test Rational{Integer}(0x02) === Rational{Integer}(0x02, 0x01)
+    @test Rational{Integer}(Int16(3)) === Rational{Integer}(Int16(3), Int16(1))
+    @test Rational{Integer}(0x01,-1) === Rational{Integer}(-1, 1)
+    @test Rational{Integer}(-1, 0x01) === Rational{Integer}(-1, 1)
+    @test_throws InexactError Rational{Integer}(Int8(-1), UInt8(1))
 end
 
 @testset "issue #41489" begin
@@ -739,3 +855,27 @@ end
     @test rationalize(Int64, nextfloat(0.1) * im; tol=0) == precise_next * im
     @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im)
 end
+
+@testset "complex numerator, denominator" begin
+    z = complex(3*3, 2*3*5)
+    @test z === numerator(z) === numerator(z // 2) === numerator(z // 5)
+    @test complex(3, 2*5) === numerator(z // 3)
+    @test isone(denominator(z))
+    @test 2 === denominator(z // 2)
+    @test 1 === denominator(z // 3)
+    @test 5 === denominator(z // 5)
+    for den ∈ 1:10
+        q = z // den
+        @test q === (numerator(q)//denominator(q))
+    end
+    @testset "do not overflow silently" begin
+        @test_throws OverflowError numerator(Int8(1)//Int8(31) + Int8(8)im//Int8(3))
+    end
+end
+
+@testset "Float-Rational comparison" begin
+    @test Float16(6.0e-8) == big(1//16777216) == 1//16777216
+    @test Float16(6.0e-8) == 1//16777216
+    @test 1.0 != big(1//0)
+    @test Inf == big(1//0)
+end
diff --git a/test/read.jl b/test/read.jl
index ff0952b1495da..2a02513ac23b4 100644
--- a/test/read.jl
+++ b/test/read.jl
@@ -170,6 +170,10 @@ for (name, f) in l
         local t, s, m, kept
         @test readuntil(io(t), s) == m
         @test readuntil(io(t), s, keep=true) == kept
+        if isone(length(s))
+            @test readuntil(io(t), first(s)) == m
+            @test readuntil(io(t), first(s), keep=true) == kept
+        end
         @test readuntil(io(t), SubString(s, firstindex(s))) == m
         @test readuntil(io(t), SubString(s, firstindex(s)), keep=true) == kept
         @test readuntil(io(t), GenericString(s)) == m
@@ -264,13 +268,27 @@ for (name, f) in l
             n2 = readbytes!(s2, a2)
             @test n1 == n2
             @test length(a1) == length(a2)
-            @test a1[1:n1] == a2[1:n2]
+            let l = min(l, n)
+                @test a1[1:l] == a2[1:l]
+            end
             @test n <= length(text) || eof(s1)
             @test n <= length(text) || eof(s2)
 
             cleanup()
         end
 
+        # Test growing output array
+        let x = UInt8[],
+            io = io()
+            n = readbytes!(io, x)
+            @test n == 0
+            @test isempty(x)
+            n = readbytes!(io, x, typemax(Int))
+            @test n == length(x)
+            @test x == codeunits(text)
+            cleanup()
+        end
+
         verbose && println("$name read!...")
         l = length(text)
         for n = [1, 2, l-2, l-1, l]
@@ -473,12 +491,6 @@ let s = "qwerty"
     @test read(IOBuffer(s)) == codeunits(s)
     @test read(IOBuffer(s), 10) == codeunits(s)
     @test read(IOBuffer(s), 1) == codeunits(s)[1:1]
-
-    # Test growing output array
-    x = UInt8[]
-    n = readbytes!(IOBuffer(s), x, 10)
-    @test x == codeunits(s)
-    @test n == length(x)
 end
 
 
@@ -666,7 +678,12 @@ let p = Pipe()
     @test data_read[1:nread] == data[2:nread+1]
     @test read(p.out, 49) == data[end-48:end]
     wait(t)
+
+    closewrite(p)
+    @test !isopen(p.in)
+    @test isopen(p.out)
     close(p)
+    @test !isopen(p.out)
 end
 
 @testset "issue #27412" for itr in [eachline(IOBuffer("a")), readeach(IOBuffer("a"), Char)]
@@ -677,6 +694,21 @@ end
     @test  isempty(itr) # now it is empty
 end
 
+@testset "readuntil/copyuntil fallbacks" begin
+    # test fallback for generic delim::T
+    buf = IOBuffer()
+    fib = [1,1,2,3,5,8,13,21]
+    write(buf, fib)
+    @test readuntil(seekstart(buf), 21) == fib[1:end-1]
+    @test readuntil(buf, 21) == Int[]
+    @test readuntil(seekstart(buf), 21; keep=true) == fib
+    out = IOBuffer()
+    @test copyuntil(out, seekstart(buf), 21) === out
+    @test reinterpret(Int, take!(out)) == fib[1:end-1]
+    @test copyuntil(out, seekstart(buf), 21; keep=true) === out
+    @test reinterpret(Int, take!(out)) == fib
+end
+
 # more tests for reverse(eachline)
 @testset "reverse(eachline)" begin
     lines = vcat(repr.(1:4), ' '^50000 .* repr.(5:10), repr.(11:10^5))
@@ -705,3 +737,21 @@ end
         @test isempty(r) && isempty(collect(r))
     end
 end
+
+@testset "Ref API" begin
+    io = PipeBuffer()
+    @test write(io, Ref{Any}(0xabcd_1234)) === 4
+    @test read(io, UInt32) === 0xabcd_1234
+    @test_throws ErrorException("write cannot copy from a Ptr") invoke(write, Tuple{typeof(io), Ref{Cvoid}}, io, C_NULL)
+    @test_throws ErrorException("write cannot copy from a Ptr") invoke(write, Tuple{typeof(io), Ref{Int}}, io, Ptr{Int}(0))
+    @test_throws ErrorException("write cannot copy from a Ptr") invoke(write, Tuple{typeof(io), Ref{Any}}, io, Ptr{Any}(0))
+    @test_throws ErrorException("read! cannot copy into a Ptr") read!(io, C_NULL)
+    @test_throws ErrorException("read! cannot copy into a Ptr") read!(io, Ptr{Int}(0))
+    @test_throws ErrorException("read! cannot copy into a Ptr") read!(io, Ptr{Any}(0))
+    @test eof(io)
+    @test write(io, C_NULL) === sizeof(Int)
+    @test write(io, Ptr{Int}(4)) === sizeof(Int)
+    @test write(io, Ptr{Any}(5)) === sizeof(Int)
+    @test read!(io, Int[1, 2, 3]) == [0, 4, 5]
+    @test eof(io)
+end
diff --git a/test/rebinding.jl b/test/rebinding.jl
new file mode 100644
index 0000000000000..709cb0859f6f5
--- /dev/null
+++ b/test/rebinding.jl
@@ -0,0 +1,471 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+module Rebinding
+    using Test
+    make_foo() = Foo(1)
+
+    @test Base.binding_kind(@__MODULE__, :Foo) == Base.PARTITION_KIND_GUARD
+    struct Foo
+        x::Int
+    end
+    const defined_world_age = Base.tls_world_age()
+    x = Foo(1)
+
+    @test Base.binding_kind(@__MODULE__, :Foo) == Base.PARTITION_KIND_CONST
+    @test !contains(repr(x), "@world")
+    Base.delete_binding(@__MODULE__, :Foo)
+
+    @test Base.binding_kind(@__MODULE__, :Foo) == Base.PARTITION_KIND_GUARD
+    @test contains(repr(x), "@world")
+
+    # Test that it still works if Foo is redefined to a non-type
+    const Foo = 1
+
+    @test Base.binding_kind(@__MODULE__, :Foo) == Base.PARTITION_KIND_CONST
+    @test contains(repr(x), "@world")
+    Base.delete_binding(@__MODULE__, :Foo)
+
+    struct Foo
+        x::Int
+    end
+    @test Foo != typeof(x)
+
+    # This tests that the compiler uses the correct world, but does not test
+    # invalidation.
+    @test typeof(Base.invoke_in_world(defined_world_age, make_foo)) == typeof(x)
+    @test typeof(make_foo()) == Foo
+
+    # Tests for @world syntax
+    @test Base.@world(Foo, defined_world_age) == typeof(x)
+    nameof(@__MODULE__) === :Rebinding && @test Base.@world(Rebinding.Foo, defined_world_age) == typeof(x)
+    @test Base.@world((@__MODULE__).Foo, defined_world_age) == typeof(x)
+
+    # Test invalidation (const -> undefined)
+    const delete_me = 1
+    f_return_delete_me() = delete_me
+    @test f_return_delete_me() == 1
+    Base.delete_binding(@__MODULE__, :delete_me)
+    @test_throws UndefVarError f_return_delete_me()
+
+    # + foreign module
+    module NotTheDefinitionModule
+        const delete_me_other = 2
+    end
+    @eval f_return_delete_me_foreign_module() = $(GlobalRef(NotTheDefinitionModule, :delete_me_other))
+    @test f_return_delete_me_foreign_module() == 2
+    Base.delete_binding(NotTheDefinitionModule, :delete_me_other)
+    @test_throws UndefVarError f_return_delete_me_foreign_module()
+
+    ## + via indirect access
+    const delete_me = 3
+    f_return_delete_me_indirect() = getglobal(@__MODULE__, :delete_me)
+    @test f_return_delete_me_indirect() == 3
+    Base.delete_binding(@__MODULE__, :delete_me)
+    @test_throws UndefVarError f_return_delete_me_indirect()
+
+    # + via generated function
+    const delete_me = 4
+    @generated f_generated_return_delete_me() = return :(delete_me)
+    @test f_generated_return_delete_me() == 4
+    Base.delete_binding(@__MODULE__, :delete_me)
+    @test_throws UndefVarError f_generated_return_delete_me()
+
+    module DeleteMeModule
+        export delete_me_implicit
+        const delete_me_explicit = 5
+        const delete_me_implicit = 6
+    end
+
+    # + via import
+    using .DeleteMeModule: delete_me_explicit
+    f_return_delete_me_explicit() = delete_me_explicit
+    @test f_return_delete_me_explicit() == 5
+    Base.delete_binding(DeleteMeModule, :delete_me_explicit)
+    @test_throws UndefVarError f_return_delete_me_explicit()
+
+    # + via using
+    using .DeleteMeModule
+    f_return_delete_me_implicit() = delete_me_implicit
+    @test f_return_delete_me_implicit() == 6
+    Base.delete_binding(DeleteMeModule, :delete_me_implicit)
+    @test_throws UndefVarError f_return_delete_me_implicit()
+end
+
+module RebindingPrecompile
+    using Test
+    include("precompile_utils.jl")
+
+    precompile_test_harness("rebinding precompile") do load_path
+        # Test that the system doesn't accidentally forget to revalidate a method without backedges
+        write(joinpath(load_path, "LotsOfBindingsToDelete.jl"),
+              """
+              module LotsOfBindingsToDelete
+                const delete_me_1 = 1
+                const delete_me_2 = 2
+                const delete_me_3 = 3
+                const delete_me_4 = 4
+                export delete_me_5
+                const delete_me_5 = 5
+                const delete_me_6 = 6
+              end
+              """)
+        Base.compilecache(Base.PkgId("LotsOfBindingsToDelete"))
+        write(joinpath(load_path, "UseTheBindings.jl"),
+              """
+              module UseTheBindings
+                using LotsOfBindingsToDelete
+                @eval f_use_bindings1() = \$(GlobalRef(LotsOfBindingsToDelete, :delete_me_1))
+                @eval f_use_bindings2() = \$(GlobalRef(LotsOfBindingsToDelete, :delete_me_2))
+                f_use_bindings3() = LotsOfBindingsToDelete.delete_me_3
+                f_use_bindings4() = LotsOfBindingsToDelete.delete_me_4
+                f_use_bindings5() = delete_me_5
+                import LotsOfBindingsToDelete: delete_me_6
+                f_use_bindings6() = delete_me_6
+                # Code Instances for each of these
+                @assert (f_use_bindings1(), f_use_bindings2(), f_use_bindings3(),
+                         f_use_bindings4(), f_use_bindings5(), f_use_bindings6()) ==
+                    (1, 2, 3, 4, 5, 6)
+              end
+              """)
+        Base.compilecache(Base.PkgId("UseTheBindings"))
+        @eval using LotsOfBindingsToDelete
+        invokelatest() do
+            # Delete some bindings before loading the dependent package
+            Base.delete_binding(LotsOfBindingsToDelete, :delete_me_1)
+            Base.delete_binding(LotsOfBindingsToDelete, :delete_me_3)
+        end
+        # Load the dependent package
+        @eval using UseTheBindings
+        invokelatest() do
+            @test_throws UndefVarError UseTheBindings.f_use_bindings1()
+            @test UseTheBindings.f_use_bindings2() == 2
+            @test_throws UndefVarError UseTheBindings.f_use_bindings3()
+            @test UseTheBindings.f_use_bindings4() == 4
+            @test UseTheBindings.f_use_bindings5() == 5
+            @test UseTheBindings.f_use_bindings6() == 6
+            # Delete remaining bindings
+            Base.delete_binding(LotsOfBindingsToDelete, :delete_me_2)
+            Base.delete_binding(LotsOfBindingsToDelete, :delete_me_4)
+            Base.delete_binding(LotsOfBindingsToDelete, :delete_me_5)
+            Base.delete_binding(LotsOfBindingsToDelete, :delete_me_6)
+            invokelatest() do
+                @test_throws UndefVarError UseTheBindings.f_use_bindings2()
+                @test_throws UndefVarError UseTheBindings.f_use_bindings4()
+                @test_throws UndefVarError UseTheBindings.f_use_bindings5()
+                @test_throws UndefVarError UseTheBindings.f_use_bindings6()
+            end
+        end
+    end
+
+    precompile_test_harness("export change") do load_path
+        write(joinpath(load_path, "Export1.jl"),
+              """
+              module Export1
+                export import_me1
+                const import_me1 = 11
+                export import_me2
+                const import_me2 = 12
+              end
+              """)
+        write(joinpath(load_path, "Export2.jl"),
+              """
+              module Export2
+              end
+              """)
+        write(joinpath(load_path, "ImportTest.jl"),
+              """
+              module ImportTest
+                using Export1, Export2
+                f_use_binding1() = import_me1
+                f_use_binding2() = import_me2
+                @assert f_use_binding1() == 11
+                @assert f_use_binding2() == 12
+
+            end
+              """)
+        @eval using Export1
+        @eval using Export2
+        # Change the import resolution for ImportTest
+        invokelatest() do
+            Core.eval(Export2, :(export import_me1))
+            Core.eval(Export2, :(const import_me1 = 21))
+        end
+        @eval using ImportTest
+        invokelatest() do
+            @test_throws UndefVarError ImportTest.f_use_binding1()
+            @test ImportTest.f_use_binding2() == 12
+        end
+        invokelatest() do
+            Core.eval(Export2, :(export import_me2))
+            Core.eval(Export2, :(const import_me2 = 22))
+        end
+        invokelatest() do
+            @test_throws UndefVarError ImportTest.f_use_binding2()
+        end
+    end
+
+    finish_precompile_test!()
+end
+
+module Regression
+    using Test
+
+    # Issue #57377
+    module GeoParams57377
+        module B
+            using ...GeoParams57377
+            export S
+            struct S end
+            module C
+                using ..GeoParams57377
+                h() = S()
+                x -> nothing
+            end
+        end
+
+        using .B
+        export S
+    end
+    @test GeoParams57377.B.C.h() == GeoParams57377.B.C.S()
+end
+
+# Test that the validation bypass fast path is not defeated by loading InteractiveUtils
+@test parse(UInt, readchomp(`$(Base.julia_cmd()) -e 'using InteractiveUtils; show(unsafe_load(cglobal(:jl_first_image_replacement_world, UInt)))'`)) == typemax(UInt)
+
+# Test that imported module binding backedges are still added in a new module that has the fast path active
+let test_code =
+    """
+    using Test
+    @assert unsafe_load(cglobal(:jl_first_image_replacement_world, UInt)) == typemax(UInt)
+    include("precompile_utils.jl")
+
+    precompile_test_harness("rebinding precompile") do load_path
+        write(joinpath(load_path, "LotsOfBindingsToDelete2.jl"),
+              "module LotsOfBindingsToDelete2
+                 const delete_me_6 = 6
+               end")
+        Base.compilecache(Base.PkgId("LotsOfBindingsToDelete2"))
+        write(joinpath(load_path, "UseTheBindings2.jl"),
+              "module UseTheBindings2
+                 import LotsOfBindingsToDelete2: delete_me_6
+                 f_use_bindings6() = delete_me_6
+                 # Code Instances for each of these
+                 @assert (f_use_bindings6(),) == (6,)
+               end")
+        Base.compilecache(Base.PkgId("UseTheBindings2"))
+        @eval using LotsOfBindingsToDelete2
+        @eval using UseTheBindings2
+        invokelatest() do
+            @test UseTheBindings2.f_use_bindings6() == 6
+            Base.delete_binding(LotsOfBindingsToDelete2, :delete_me_6)
+            invokelatest() do
+                @test_throws UndefVarError UseTheBindings2.f_use_bindings6()
+            end
+        end
+    end
+
+    finish_precompile_test!()
+    """
+    @test success(pipeline(`$(Base.julia_cmd()) -e $test_code`; stderr))
+end
+
+# Image Globalref smoke test
+module ImageGlobalRefFlag
+    using Test
+    @eval fimage() = $(GlobalRef(Base, :sin))
+    fnoimage() = x
+    @test Base.has_image_globalref(first(methods(fimage)))
+    @test !Base.has_image_globalref(first(methods(fnoimage)))
+end
+
+# Test that inference can merge ranges for partitions as long as what's being imported doesn't change
+module RangeMerge
+    using Test
+    using InteractiveUtils
+
+    function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true)
+        params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false, debug_info_level=Cint(2))
+        d = InteractiveUtils._dump_function(InteractiveUtils.ArgInfo(f, t), false, false, raw, dump_module, :att, optimize, :none, false, params)
+        sprint(print, d)
+    end
+
+    global x = 1
+    const after_def_world = Base.get_world_counter()
+    export x
+    f() = x
+    @test f() == 1
+    @test only(methods(f)).specializations.cache.min_world <= after_def_world
+
+    @test !contains(get_llvm(f, Tuple{}), "jl_get_binding_value")
+end
+
+# Test that we invalidate for undefined -> defined transitions (#54733)
+module UndefinedTransitions
+    using Test
+    function foo54733()
+        for i = 1:1_000_000_000
+            bar54733(i)
+        end
+        return 1
+    end
+    @test_throws UndefVarError foo54733()
+    let ci = first(methods(foo54733)).specializations.cache
+        @test !Base.Compiler.is_nothrow(Base.Compiler.decode_effects(ci.ipo_purity_bits))
+    end
+    bar54733(x) = 3x
+    @test foo54733() === 1
+    let ci = first(methods(foo54733)).specializations.cache
+        @test Base.Compiler.is_nothrow(Base.Compiler.decode_effects(ci.ipo_purity_bits))
+    end
+end
+
+# Identical implicit partitions should be merge (#57923)
+for binding in (convert(Core.Binding, GlobalRef(Base, :Math)),
+                convert(Core.Binding, GlobalRef(Base, :Intrinsics)))
+    # Test that these both only have two partitions
+    @test isdefined(binding, :partitions)
+    @test isdefined(binding.partitions, :next)
+    @test !isdefined(binding.partitions.next, :next)
+end
+
+# Test various scenarios for implicit partition merging
+module MergeStress
+    for i = 1:5
+        @eval module $(Symbol("M$i"))
+            export x, y
+            const x = 1
+            const y = 2
+        end
+    end
+    const before = Base.get_world_counter()
+    using .M1
+    const afterM1 = Base.get_world_counter()
+    using .M2
+    const afterM2 = Base.get_world_counter()
+    using .M3
+    const afterM3 = Base.get_world_counter()
+    using .M4
+    const afterM4 = Base.get_world_counter()
+    using .M5
+    const afterM5 = Base.get_world_counter()
+end
+
+function count_partitions(b::Core.Binding)
+    n = 0
+    isdefined(b, :partitions) || return n
+    bpart = b.partitions
+    while true
+        n += 1
+        isdefined(bpart, :next) || break
+        bpart = bpart.next
+    end
+    return n
+end
+using Base: invoke_in_world
+
+const xbinding = convert(Core.Binding, GlobalRef(MergeStress, :x))
+function access_and_count(point)
+    invoke_in_world(getglobal(MergeStress, point), getglobal, MergeStress, :x)
+    count_partitions(xbinding)
+end
+
+@test count_partitions(xbinding) == 0
+@test access_and_count(:afterM1) == 1
+# M2 is the first change to the `usings` table after M1. The partitions
+# can and should be merged
+@test access_and_count(:afterM2) == 1
+
+# There is a gap between M2 and M5 - the partitions should not be merged
+@test access_and_count(:afterM5) == 2
+
+# M4 and M5 are adjacent, these partitions should also be merged (in the opposite direction)
+@test access_and_count(:afterM4) == 2
+
+# M3 connects all, so we should have a single partition
+@test access_and_count(:afterM3) == 1
+
+# Test that delete_binding in an outdated world age works
+module BindingTestModule; end
+function create_and_delete_binding()
+    Core.eval(BindingTestModule, :(const x = 1))
+    Base.delete_binding(BindingTestModule, :x)
+end
+create_and_delete_binding()
+@test Base.binding_kind(BindingTestModule, :x) == Base.PARTITION_KIND_GUARD
+
+# Test that we properly invalidate bindings if the value changes, not just the
+# export status (#59272)
+module Invalidate59272
+    using Test
+    module Foo
+        export Bar
+        struct Bar
+        # x
+        end
+    end
+    using .Foo
+    @test isa(Bar(), Foo.Bar)
+    Core.eval(Foo, :(struct Bar; x; end))
+    @test Bar(1) == Foo.Bar(1)
+end
+
+# Test @reexport
+module ReexportTests
+    using Test
+    using Base.Experimental: @reexport
+
+    # Test dynamic export additions through reexport
+    module Source1
+        export s1
+        s1() = "s1"
+    end
+    module Reexporter1
+        import ..@reexport
+        @reexport using ..Source1
+    end
+    module User1
+        using ..Reexporter1
+    end
+    @test (:s1,) ⊆ names(Reexporter1)
+    @test User1.s1() == "s1"
+    Core.eval(Source1, :(s2() = "s2"; export s2))
+    @test (:s1, :s2) ⊆ names(Reexporter1)
+    @test User1.s2() == "s2"
+
+    # Test reexport syntax, multiple modules
+    module Source2
+        export s3
+        s3() = "s3"
+    end
+    module Reexporter2
+        import ..@reexport
+        @reexport using ..Source2, ..Source1
+    end
+    module User2
+        using ..Reexporter2
+    end
+    @test (:s1, :s3) ⊆ names(Reexporter2)
+    @test User2.s1() == "s1"
+    @test User2.s3() == "s3"
+
+    # Test same name from different modules - one with reexport, one without
+    module Source3
+        export same_name
+        const same_name = 42
+    end
+    module Source4
+        export same_name
+        const same_name = 42
+    end
+    module Reexporter3
+        import ..@reexport
+        using ..Source4  # without reexport
+        @reexport using ..Source3
+    end
+    module User3
+        using ..Reexporter3
+    end
+    @test User3.same_name == 42
+end
diff --git a/test/reduce.jl b/test/reduce.jl
index aea1f1e60f6cd..66b1c957c5d61 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -53,8 +53,8 @@ end
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1:5; init=1000) == (1000 + 1 + 2 + 3 + 4 + 5)
 @test reduce(+, 1) == 1
-@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, ())
-@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, Union{}[])
+@test_throws "reducing over an empty collection is not allowed" reduce(*, ())
+@test_throws "reducing over an empty collection is not allowed" reduce(*, Union{}[])
 
 # mapreduce
 @test mapreduce(-, +, [-10 -9 -3]) == ((10 + 9) + 3)
@@ -91,8 +91,7 @@ end
 @test mapreduce(abs2, *, Float64[]) === 1.0
 @test mapreduce(abs2, max, Float64[]) === 0.0
 @test mapreduce(abs, max, Float64[]) === 0.0
-@test_throws ["reducing over an empty collection is not allowed",
-              "consider supplying `init`"] mapreduce(abs2, &, Float64[])
+@test_throws "reducing over an empty collection is not allowed" mapreduce(abs2, &, Float64[])
 @test_throws str -> !occursin("Closest candidates are", str) mapreduce(abs2, &, Float64[])
 @test_throws "reducing over an empty collection is not allowed" mapreduce(abs2, |, Float64[])
 
@@ -144,9 +143,8 @@ fz = float(z)
 @test sum(z) === 136
 @test sum(fz) === 136.0
 
-@test_throws "reducing with add_sum over an empty collection of element type Union{} is not allowed" sum(Union{}[])
-@test_throws ["reducing over an empty collection is not allowed",
-              "consider supplying `init`"] sum(sin, Int[])
+@test_throws "reducing over an empty collection is not allowed" sum(Union{}[])
+@test_throws "reducing over an empty collection is not allowed" sum(sin, Int[])
 @test sum(sin, 3) == sin(3.0)
 @test sum(sin, [3]) == sin(3.0)
 a = sum(sin, z)
@@ -301,19 +299,37 @@ end
             arr = zeros(N)
             @test minimum(arr) === 0.0
             @test maximum(arr) === 0.0
+            @test minimum(abs, arr) === 0.0
+            @test maximum(abs, arr) === 0.0
+            @test minimum(-, arr) === -0.0
+            @test maximum(-, arr) === -0.0
 
             arr[i] = -0.0
             @test minimum(arr) === -0.0
             @test maximum(arr) ===  0.0
+            @test minimum(abs, arr) === 0.0
+            @test maximum(abs, arr) === 0.0
+            @test minimum(-, arr) === -0.0
+            @test maximum(-, arr) ===  0.0
 
             arr = -zeros(N)
             @test minimum(arr) === -0.0
             @test maximum(arr) === -0.0
+            @test minimum(abs, arr) === 0.0
+            @test maximum(abs, arr) === 0.0
+            @test minimum(-, arr) === 0.0
+            @test maximum(-, arr) === 0.0
             arr[i] = 0.0
             @test minimum(arr) === -0.0
-            @test maximum(arr) === 0.0
+            @test maximum(arr) ===  0.0
+            @test minimum(abs, arr) === 0.0
+            @test maximum(abs, arr) === 0.0
+            @test minimum(-, arr) === -0.0
+            @test maximum(-, arr) ===  0.0
         end
     end
+
+    @test minimum(abs, fill(-0.0, 16)) === mapreduce(abs, (x,y)->min(x,y), fill(-0.0, 16)) === 0.0
 end
 
 @testset "maximum works on generic order #30320" begin
@@ -516,13 +532,13 @@ let f(x) = x == 1 ? true : x == 2 ? false : 1
     @test_throws TypeError all(map(f,[1,3]))
 end
 
-# any and all with functors
+# any and all with callable structs
 
-struct SomeFunctor end
-(::SomeFunctor)(x) = true
+struct SomeCallable end
+(::SomeCallable)(x) = true
 
-@test @inferred any(SomeFunctor(), 1:10)
-@test @inferred all(SomeFunctor(), 1:10)
+@test @inferred any(SomeCallable(), 1:10)
+@test @inferred all(SomeCallable(), 1:10)
 
 
 # in
@@ -570,11 +586,11 @@ struct NonFunctionIsZero end
 @test count(NonFunctionIsZero(), [0]) == 1
 @test count(NonFunctionIsZero(), [1]) == 0
 
-@test count(Iterators.repeated(true, 3), init=0x04) === 0x07
-@test count(!=(2), Iterators.take(1:7, 3), init=Int32(0)) === Int32(2)
-@test count(identity, [true, false], init=Int8(5)) === Int8(6)
-@test count(!, [true false; false true], dims=:, init=Int16(0)) === Int16(2)
-@test isequal(count(identity, [true false; false true], dims=2, init=UInt(4)), reshape(UInt[5, 5], 2, 1))
+@test count(Iterators.repeated(true, 3), init=UInt(0)) === UInt(3)
+@test count(!=(2), Iterators.take(1:7, 3), init=Int32(0)) === 2
+@test count(identity, [true, false], init=Int8(0)) === 1
+@test count(!, [true false; false true], dims=:, init=Int16(0)) === 2
+@test isequal(count(identity, [true false; false true], dims=2, init=UInt(0)), reshape(UInt[1, 1], 2, 1))
 
 ## cumsum, cummin, cummax
 
@@ -683,6 +699,27 @@ end
     end
 end
 
+@testset "issue #45562" begin
+    @test all([true, true, true], dims = 1) == [true]
+    @test any([true, true, true], dims = 1) == [true]
+    @test_throws TypeError all([3, 3, 3], dims = 1)
+    @test_throws TypeError any([3, 3, 3], dims = 1)
+    @test_throws TypeError all(Any[true, 3, 3], dims = 1)
+    @test_throws TypeError any(Any[false, 3, 3], dims = 1)
+    @test_throws TypeError all([1, 1, 1], dims = 1)
+    @test_throws TypeError any([0, 0, 0], dims = 1)
+    @test_throws TypeError all!([false], [3, 3, 3])
+    @test_throws TypeError any!([false], [3, 3, 3])
+    @test_throws TypeError all!([false], Any[true, 3, 3])
+    @test_throws TypeError any!([false], Any[false, 3, 3])
+    @test_throws TypeError all!([false], [1, 1, 1])
+    @test_throws TypeError any!([false], [0, 0, 0])
+    @test reduce(|, Bool[]) == false
+    @test reduce(&, Bool[]) == true
+    @test reduce(|, Bool[], dims=1) == [false]
+    @test reduce(&, Bool[], dims=1) == [true]
+end
+
 # issue #45748
 @testset "foldl's stability for nested Iterators" begin
     a = Iterators.flatten((1:3, 1:3))
@@ -705,3 +742,38 @@ let a = NamedTuple(Symbol(:x,i) => i for i in 1:33),
     b = (a...,)
     @test fold_alloc(a) == fold_alloc(b) == 0
 end
+
+@testset "concrete eval `[any|all](f, itr::Tuple)`" begin
+    intf = in((1,2,3)); Intf = typeof(intf)
+    symf = in((:one,:two,:three)); Symf = typeof(symf)
+    @test Core.Compiler.is_foldable(Base.infer_effects(intf, (Int,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(symf, (Symbol,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(all, (Intf,Tuple{Int,Int,Int})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(all, (Symf,Tuple{Symbol,Symbol,Symbol})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(any, (Intf,Tuple{Int,Int,Int})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(any, (Symf,Tuple{Symbol,Symbol,Symbol})))
+    @test Base.return_types() do
+        Val(all(in((1,2,3)), (1,2,3)))
+    end |> only == Val{true}
+    @test Base.return_types() do
+        Val(all(in((1,2,3)), (1,2,3,4)))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val(any(in((1,2,3)), (4,5,3)))
+    end |> only == Val{true}
+    @test Base.return_types() do
+        Val(any(in((1,2,3)), (4,5,6)))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val(all(in((:one,:two,:three)),(:three,:four)))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val(any(in((:one,:two,:three)),(:four,:three)))
+    end |> only == Val{true}
+end
+
+# `reduce(vcat, A)` should not alias the input for length-1 collections
+let A=[1;;]
+    @test reduce(vcat, Any[A]) !== A
+    @test reduce(hcat, Any[A]) !== A
+end
diff --git a/test/reducedim.jl b/test/reducedim.jl
index daa0a3fbe1f92..1664b2708d7e3 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -88,12 +88,12 @@ safe_minabs(A::Array{T}, region) where {T} = safe_mapslices(minimum, abs.(A), re
     @test @inferred(count(!, Breduc, dims=region)) ≈ safe_count(.!Breduc, region)
 
     @test isequal(
-        @inferred(count(Breduc, dims=region, init=0x02)),
-        safe_count(Breduc, region) .% UInt8 .+ 0x02,
+        @inferred(Array{UInt8,ndims(Breduc)}, count(Breduc, dims=region, init=0x00)),
+        safe_count(Breduc, region),
     )
     @test isequal(
-        @inferred(count(!, Breduc, dims=region, init=Int16(0))),
-        safe_count(.!Breduc, region) .% Int16,
+        @inferred(Array{Int16,ndims(Breduc)}, count(!, Breduc, dims=region, init=Int16(0))),
+        safe_count(.!Breduc, region),
     )
 end
 
@@ -124,6 +124,18 @@ fill!(r, -6.3)
 fill!(r, -1.1)
 @test sum!(abs2, r, Breduc, init=false) ≈ safe_sumabs2(Breduc, 1) .- 1.1
 
+# issue #35199
+function issue35199_test(sizes, dims)
+    M = rand(Float64, sizes)
+    ax = axes(M)
+    n1 = @allocations Base.reduced_indices(ax, dims)
+    return @test n1 == 0
+end
+for dims in (1, 2, (1,), (2,), (1,2))
+    sizes = (64, 3)
+    issue35199_test(sizes, dims)
+end
+
 # Small arrays with init=false
 let A = reshape(1:15, 3, 5)
     R = fill(1, 3)
@@ -564,8 +576,8 @@ end
 @testset "type of sum(::Array{$T}" for T in [UInt8, Int8, Int32, Int64, BigInt]
     result = sum(T[1 2 3; 4 5 6; 7 8 9], dims=2)
     @test result == hcat([6, 15, 24])
-    @test eltype(result) === (T <: Base.SmallSigned ? Int :
-                              T <: Base.SmallUnsigned ? UInt :
+    @test eltype(result) === (T <: Base.BitSignedSmall ? Int :
+                              T <: Base.BitUnsignedSmall ? UInt :
                               T)
 end
 
@@ -575,6 +587,30 @@ end
     @test B[argmin(B, dims=[2, 3])] == @inferred(minimum(B, dims=[2, 3]))
 end
 
+@testset "careful with @inbounds" begin
+    Base.@propagate_inbounds f(x) = x == 2 ? x[-10000] : x
+    Base.@propagate_inbounds op(x,y) = x[-10000] + y[-10000]
+    for (arr, dims) in (([1,1,2], 1), ([1 1 2], 2), ([ones(Int,256);2], 1))
+        @test_throws BoundsError mapreduce(f, +, arr)
+        @test_throws BoundsError mapreduce(f, +, arr; dims)
+        @test_throws BoundsError mapreduce(f, +, arr; dims, init=0)
+        @test_throws BoundsError mapreduce(identity, op, arr)
+        try
+            #=@test_throws BoundsError=# mapreduce(identity, op, arr; dims)
+        catch ex
+            @test_broken ex isa BoundsError
+        end
+        @test_throws BoundsError mapreduce(identity, op, arr; dims, init=0)
+
+        @test_throws BoundsError findmin(f, arr)
+        @test_throws BoundsError findmin(f, arr; dims)
+
+        @test_throws BoundsError mapreduce(f, max, arr)
+        @test_throws BoundsError mapreduce(f, max, arr; dims)
+        @test_throws BoundsError mapreduce(f, max, arr; dims, init=0)
+    end
+end
+
 @testset "in-place reductions with mismatched dimensionalities" begin
     B = reshape(1:24, 4, 3, 2)
     for R in (fill(0, 4), fill(0, 4, 1), fill(0, 4, 1, 1))
@@ -608,7 +644,7 @@ end
 end
 @testset "NaN/missing test for extrema with dims #43599" begin
     for sz = (3, 10, 100)
-        for T in (Int, Float64, BigFloat)
+        for T in (Int, Float64, BigFloat, BigInt)
             Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz))
             Aₘ[rand(1:sz*sz, sz)] .= missing
             unordered_test_for_extrema(Aₘ)
@@ -622,9 +658,16 @@ end
         end
     end
 end
-@test_broken minimum([missing;BigInt(1)], dims = 1)
-@test_broken maximum([missing;BigInt(1)], dims = 1)
-@test_broken extrema([missing;BigInt(1)], dims = 1)
+
+@testset "minimum/maximum over dims with missing (#35308)" begin
+    for T in (Int, Float64, BigInt, BigFloat)
+        x = Union{T, Missing}[1 missing; 2 missing]
+        @test isequal(minimum(x, dims=1), reshape([1, missing], 1, :))
+        @test isequal(maximum(x, dims=1), reshape([2, missing], 1, :))
+        @test isequal(minimum(x, dims=2), reshape([missing, missing], :, 1))
+        @test isequal(maximum(x, dims=2), reshape([missing, missing], :, 1))
+    end
+end
 
 # issue #26709
 @testset "dimensional reduce with custom non-bitstype types" begin
@@ -650,7 +693,7 @@ end
     @test_throws TypeError count!([1], [1])
 end
 
-@test @inferred(count(false:true, dims=:, init=0x0004)) === 0x0005
+@test @inferred(UInt16, count(false:true, dims=:, init=0x0000)) === 1
 @test @inferred(count(isodd, reshape(1:9, 3, 3), dims=:, init=Int128(0))) === Int128(5)
 
 @testset "reduced_index for BigInt (issue #39995)" begin
diff --git a/test/reflection.jl b/test/reflection.jl
index c13e7d88d8cfd..905e55afb5c3a 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -2,7 +2,7 @@
 
 using Test
 
-include("compiler/irutils.jl")
+include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
 
 # code_native / code_llvm (issue #8239)
 # It's hard to really test these, but just running them should be
@@ -16,6 +16,11 @@ function test_ir_reflection(freflect, f, types)
     nothing
 end
 
+function test_ir_reflection(freflect, argtypes)
+    @test !isempty(freflect(argtypes))
+    nothing
+end
+
 function test_bin_reflection(freflect, f, types)
     iob = IOBuffer()
     freflect(iob, f, types)
@@ -27,6 +32,9 @@ end
 function test_code_reflection(freflect, f, types, tester)
     tester(freflect, f, types)
     tester(freflect, f, (types.parameters...,))
+    tt = Base.signature_type(f, types)
+    tester(freflect, tt)
+    tester(freflect, (tt.parameters...,))
     nothing
 end
 
@@ -43,6 +51,7 @@ end
 
 test_code_reflections(test_ir_reflection, code_lowered)
 test_code_reflections(test_ir_reflection, code_typed)
+test_code_reflections(test_ir_reflection, Base.code_ircode)
 
 io = IOBuffer()
 Base.print_statement_costs(io, map, (typeof(sqrt), Tuple{Int}))
@@ -110,6 +119,7 @@ not_const = 1
 @test isconst(@__MODULE__, :a_const) == true
 @test isconst(Base, :pi) == true
 @test isconst(@__MODULE__, :pi) == true
+@test isconst(GlobalRef(@__MODULE__, :pi)) == true
 @test isconst(@__MODULE__, :not_const) == false
 @test isconst(@__MODULE__, :is_not_defined) == false
 
@@ -125,11 +135,18 @@ not_const = 1
 # For curmod_*
 include("testenv.jl")
 
+module TestMod36529
+    x36529 = 0
+    y36529 = 1
+    export y36529
+end
+
 module TestMod7648
 using Test
 import Base.convert
 import ..curmod_name, ..curmod
-export a9475, foo9475, c7648, foo7648, foo7648_nomethods, Foo7648
+using ..TestMod36529: x36529   # doesn't import TestMod36529 or y36529, even though it's exported
+export a9475, c7648, f9475, foo7648, foo7648_nomethods, Foo7648
 
 const c7648 = 8
 d7648 = 9
@@ -142,10 +159,11 @@ module TestModSub9475
     using Test
     using ..TestMod7648
     import ..curmod_name
-    export a9475, foo9475
+    export a9475, f9475, f54609
     a9475 = 5
     b9475 = 7
-    foo9475(x) = x
+    f9475(x) = x
+    f54609(x) = x
     let
         @test Base.binding_module(@__MODULE__, :a9475) == @__MODULE__
         @test Base.binding_module(@__MODULE__, :c7648) == TestMod7648
@@ -169,18 +187,104 @@ let
     @test Base.binding_module(TestMod7648, :d7648) == TestMod7648
     @test Base.binding_module(TestMod7648, :a9475) == TestMod7648.TestModSub9475
     @test Base.binding_module(TestMod7648.TestModSub9475, :b9475) == TestMod7648.TestModSub9475
-    @test Set(names(TestMod7648))==Set([:TestMod7648, :a9475, :foo9475, :c7648, :foo7648, :foo7648_nomethods, :Foo7648])
-    @test Set(names(TestMod7648, all = true)) == Set([:TestMod7648, :TestModSub9475, :a9475, :foo9475, :c7648, :d7648, :f7648,
-                                                :foo7648, Symbol("#foo7648"), :foo7648_nomethods, Symbol("#foo7648_nomethods"),
-                                                :Foo7648, :eval, Symbol("#eval"), :include, Symbol("#include")])
-    @test Set(names(TestMod7648, all = true, imported = true)) == Set([:TestMod7648, :TestModSub9475, :a9475, :foo9475, :c7648, :d7648, :f7648,
-                                                      :foo7648, Symbol("#foo7648"), :foo7648_nomethods, Symbol("#foo7648_nomethods"),
-                                                      :Foo7648, :eval, Symbol("#eval"), :include, Symbol("#include"),
-                                                      :convert, :curmod_name, :curmod])
+    defaultset = Set(Symbol[:Foo7648, :TestMod7648, :a9475, :c7648, :f9475, :foo7648, :foo7648_nomethods])
+    allset = defaultset ∪ Set(Symbol[
+        Symbol("#foo7648"), Symbol("#foo7648_nomethods"),
+        :TestModSub9475, :d7648, :eval, :f7648, :include])
+    imported = Set(Symbol[:convert, :curmod_name, :curmod])
+    usings_from_Test = Set(Symbol[
+        Symbol("@inferred"), Symbol("@test"), Symbol("@test_broken"), Symbol("@test_deprecated"),
+        Symbol("@test_logs"), Symbol("@test_nowarn"), Symbol("@test_skip"), Symbol("@test_throws"),
+        Symbol("@test_warn"), Symbol("@testset"), :GenericArray, :GenericDict, :GenericOrder,
+        :GenericSet, :GenericString, :LogRecord, :Test, :TestLogger, :TestSetException,
+        :detect_ambiguities, :detect_unbound_args])
+    usings_from_Base = delete!(Set(names(Module(); usings=true)), :anonymous) # the name of the anonymous module itself
+    usings = Set(Symbol[:x36529, :TestModSub9475, :f54609]) ∪ usings_from_Test ∪ usings_from_Base
+    @test Set(names(TestMod7648)) == defaultset
+    @test Set(names(TestMod7648, all=true)) == allset
+    @test Set(names(TestMod7648, all=true, imported=true)) == allset ∪ imported
+    @test Set(names(TestMod7648, usings=true)) == defaultset ∪ usings
+    @test Set(names(TestMod7648, all=true, usings=true)) == allset ∪ usings
     @test isconst(TestMod7648, :c7648)
     @test !isconst(TestMod7648, :d7648)
 end
 
+# tests for `names(...; usings=true)`
+
+baremodule Test54609Simple
+module Inner
+export exported
+global exported::Int = 1
+global unexported::Int = 0
+end
+using Base: @assume_effects
+using .Inner
+end
+let usings = names(Test54609Simple; usings=true)
+    @test Symbol("@assume_effects") ∈ usings
+    @test :Base ∉ usings
+    @test :exported ∈ usings
+    @test :unexported ∉ usings
+end # baremodule Test54609Simple
+
+baremodule _Test54609Complex
+export exported_new
+using Base: @deprecate_binding
+global exported_new = nothing
+@deprecate_binding exported_old exported_new
+end # baremodule _Test54609Complex
+baremodule Test54609Complex
+using .._Test54609Complex
+end # baremodule Test54609Complex
+let usings = names(Test54609Complex; usings=true)
+    @test :exported_new ∈ usings
+    @test :exported_old ∉ usings
+    @test :_Test54609Complex ∈ usings # should include the `using`ed module itself
+    usings_all = names(Test54609Complex; usings=true, all=true)
+    @test :exported_new ∈ usings_all
+    @test :exported_old ∈ usings_all # deprecated names should be included with `all=true`
+end
+
+module TestMod54609
+module M1
+    const m1_x = 1
+    export m1_x
+end
+module M2
+    const m2_x = 1
+    export m2_x
+end
+module A
+    module B
+        f(x) = 1
+        secret = 1
+        module Inner2 end
+    end
+    module C
+        x = 1
+        y = 2
+        export y
+    end
+    using .B: f
+    using .C
+    using ..M1
+    import ..M2
+end
+end # module TestMod54609
+let defaultset = Set((:A,))
+    imported = Set((:M2,))
+    usings_from_Base = delete!(Set(names(Module(); usings=true)), :anonymous) # the name of the anonymous module itself
+    usings = Set((:A, :f, :C, :y, :M1, :m1_x)) ∪ usings_from_Base
+    allset = Set((:A, :B, :C, :eval, :include))
+    @test Set(names(TestMod54609.A)) == defaultset
+    @test Set(names(TestMod54609.A, imported=true)) == defaultset ∪ imported
+    @test Set(names(TestMod54609.A, usings=true)) == defaultset ∪ usings
+    @test Set(names(TestMod54609.A, all=true)) == allset
+    @test Set(names(TestMod54609.A, all=true, usings=true)) == allset ∪ usings
+    @test Set(names(TestMod54609.A, imported=true, usings=true)) == defaultset ∪ imported ∪ usings
+    @test Set(names(TestMod54609.A, all=true, imported=true, usings=true)) == allset ∪ imported ∪ usings
+end
+
 let
     using .TestMod7648
     @test Base.binding_module(@__MODULE__, :a9475) == TestMod7648.TestModSub9475
@@ -189,10 +293,10 @@ let
     @test parentmodule(foo7648, (Any,)) == TestMod7648
     @test parentmodule(foo7648) == TestMod7648
     @test parentmodule(foo7648_nomethods) == TestMod7648
-    @test parentmodule(foo9475, (Any,)) == TestMod7648.TestModSub9475
-    @test parentmodule(foo9475) == TestMod7648.TestModSub9475
+    @test parentmodule(f9475, (Any,)) == TestMod7648.TestModSub9475
+    @test parentmodule(f9475) == TestMod7648.TestModSub9475
     @test parentmodule(Foo7648) == TestMod7648
-    @test parentmodule(first(methods(foo9475))) == TestMod7648.TestModSub9475
+    @test parentmodule(first(methods(f9475))) == TestMod7648.TestModSub9475
     @test parentmodule(first(methods(foo7648))) == TestMod7648
     @test nameof(Foo7648) === :Foo7648
     @test basename(functionloc(foo7648, (Any,))[1]) == "reflection.jl"
@@ -211,15 +315,21 @@ include("testenv.jl") # for curmod_str
 import Base.isexported
 global this_is_not_defined
 export this_is_not_defined
+public this_is_public
 @test_throws ErrorException("\"this_is_not_defined\" is not defined in module Main") which(Main, :this_is_not_defined)
 @test_throws ErrorException("\"this_is_not_exported\" is not defined in module Main") which(Main, :this_is_not_exported)
 @test isexported(@__MODULE__, :this_is_not_defined)
 @test !isexported(@__MODULE__, :this_is_not_exported)
+@test !isexported(@__MODULE__, :this_is_public)
 const a_value = 1
 @test which(@__MODULE__, :a_value) === @__MODULE__
 @test_throws ErrorException("\"a_value\" is not defined in module Main") which(Main, :a_value)
 @test which(Main, :Core) === Main
 @test !isexported(@__MODULE__, :a_value)
+@test !Base.ispublic(@__MODULE__, :a_value)
+@test Base.ispublic(@__MODULE__, :this_is_not_defined)
+@test Base.ispublic(@__MODULE__, :this_is_public)
+@test !Base.ispublic(@__MODULE__, :this_is_not_exported)
 end
 
 # PR 13825
@@ -245,6 +355,7 @@ tlayout = TLayout(5,7,11)
 @test !hasproperty(tlayout, :p)
 @test [(fieldoffset(TLayout,i), fieldname(TLayout,i), fieldtype(TLayout,i)) for i = 1:fieldcount(TLayout)] ==
     [(0, :x, Int8), (2, :y, Int16), (4, :z, Int32)]
+@test [fieldoffset(TLayout, s) for s = (:x, :y, :z)] == [0, 2, 4]
 @test fieldnames(Complex) === (:re, :im)
 @test_throws BoundsError fieldtype(TLayout, 0)
 @test_throws ArgumentError fieldname(TLayout, 0)
@@ -260,6 +371,10 @@ tlayout = TLayout(5,7,11)
 @test fieldtype(Union{Tuple{Char},Tuple{Char,Char}},2) === Char
 @test_throws BoundsError fieldtype(Union{Tuple{Char},Tuple{Char,Char}},3)
 
+@test [fieldindex(TLayout, i) for i = (:x, :y, :z)] == [1, 2, 3]
+@test fieldname(TLayout, fieldindex(TLayout, :z)) === :z
+@test fieldindex(TLayout, fieldname(TLayout, 3)) === 3
+
 @test fieldnames(NTuple{3, Int}) == ntuple(i -> fieldname(NTuple{3, Int}, i), 3) == (1, 2, 3)
 @test_throws ArgumentError fieldnames(Union{})
 @test_throws BoundsError fieldname(NTuple{3, Int}, 0)
@@ -422,13 +537,13 @@ test_typed_ir_printing(g15714, Tuple{Vector{Float32}},
 #@test used_dup_var_tested15715
 @test used_unique_var_tested15714
 
-let li = typeof(fieldtype).name.mt.cache.func::Core.MethodInstance,
+let li = only(methods(fieldtype)).unspecialized,
     lrepr = string(li),
     mrepr = string(li.def),
     lmime = repr("text/plain", li),
     mmime = repr("text/plain", li.def)
 
-    @test lrepr == lmime == "MethodInstance for fieldtype(...)"
+    @test lrepr == lmime == "MethodInstance for fieldtype(::Vararg{Any})"
     @test mrepr == "fieldtype(...) @ Core none:0"       # simple print
     @test mmime == "fieldtype(...)\n     @ Core none:0" # verbose print
 end
@@ -471,6 +586,32 @@ fLargeTable(::Union, ::Union) = "b"
 @test length(methods(fLargeTable)) == 205
 @test fLargeTable(Union{Int, Missing}, Union{Int, Missing}) == "b"
 
+# issue #58479
+fLargeTable(::Type) = "Type"
+fLargeTable(::Type{<:DataType}) = "DataType"
+@test fLargeTable(Type) == "Type"
+@test fLargeTable(DataType) == "DataType"
+@test fLargeTable(Type{DataType}) == "DataType"
+@test fLargeTable(Type{UnionAll}) == "DataType"
+@test fLargeTable(Type{Int}) == "DataType"
+@test fLargeTable(Type{Vector}) == "Type"
+@test fLargeTable(Type{Type{Union{}}}) == "DataType"
+@test fLargeTable(Type{Union{}}) == "Type"
+@test fLargeTable(Union{}) == "DataType"
+@test fLargeTable(Type{<:DataType}) == "Type"
+fLargeTable(::Type{<:UnionAll}) = "UnionAll"
+@test fLargeTable(UnionAll) == "UnionAll"
+@test fLargeTable(Type{Vector}) == "UnionAll"
+@test fLargeTable(Type{Int}) == "DataType"
+@test fLargeTable(Type{Type{Union{}}}) == "DataType"
+@test fLargeTable(Type{Union{}}) == "Type"
+@test_throws MethodError fLargeTable(Union{})
+@test fLargeTable(Type{<:DataType}) == "Type"
+@test fLargeTable(Type{Vector{T}} where T) == "DataType"
+@test fLargeTable(Union{DataType,Type{Vector{T}} where T}) == "DataType"
+@test fLargeTable(Union{DataType,UnionAll,Type{Vector{T}} where T}) == "Type"
+@test fLargeTable(Union{Type{Vector},Type{Vector{T}} where T}) == "Type"
+
 # issue #15280
 function f15280(x) end
 @test functionloc(f15280)[2] > 0
@@ -485,9 +626,9 @@ function module_depth(from::Module, to::Module)
 end
 function has_backslashes(mod::Module)
     for n in names(mod, all = true, imported = true)
-        isdefined(mod, n) || continue
+        isdefinedglobal(mod, n) || continue
         Base.isdeprecated(mod, n) && continue
-        f = getfield(mod, n)
+        f = getglobal(mod, n)
         if isa(f, Module) && module_depth(Main, f) <= module_depth(Main, mod)
             continue
         end
@@ -550,6 +691,10 @@ end
 @test Base.code_typed_by_type(Tuple{Type{<:Val}})[2][2] == Val
 @test Base.code_typed_by_type(Tuple{typeof(sin), Float64})[1][2] === Float64
 
+# signature-based code_typed(...)
+@test Base.code_typed((Type{<:Val},))[2][2] == Val
+@test Base.code_typed((typeof(sin), Float64))[1][2] === Float64
+
 # New reflection methods in 0.6
 struct ReflectionExample{T<:AbstractFloat, N}
     x::Tuple{T, N}
@@ -586,7 +731,7 @@ let
     @test @inferred wrapperT(ReflectionExample{T, Int64} where T) == ReflectionExample
     @test @inferred wrapperT(ReflectionExample) == ReflectionExample
     @test @inferred wrapperT(Union{ReflectionExample{Union{},1},ReflectionExample{Float64,1}}) == ReflectionExample
-    @test_throws(ErrorException("typename does not apply to unions whose components have different typenames"),
+    @test_throws(Core.TypeNameError(Union{Int, Float64}),
                  Base.typename(Union{Int, Float64}))
 end
 
@@ -608,11 +753,16 @@ end
              sizeof(Real))
 @test sizeof(Union{ComplexF32,ComplexF64}) == 16
 @test sizeof(Union{Int8,UInt8}) == 1
-@test_throws ErrorException sizeof(AbstractArray)
+@test sizeof(MemoryRef{Int}) == 2 * sizeof(Int)
+@test sizeof(GenericMemoryRef{:atomic,Int,Core.CPU}) == 2 * sizeof(Int)
+@test sizeof(Array{Int,0}) == 2 * sizeof(Int)
+@test sizeof(Array{Int,1}) == 3 * sizeof(Int)
+@test sizeof(Array{Int,2}) == 4 * sizeof(Int)
+@test sizeof(Array{Int,20}) == 22 * sizeof(Int)
 @test_throws ErrorException sizeof(Tuple)
 @test_throws ErrorException sizeof(Tuple{Any,Any})
 @test_throws ErrorException sizeof(String)
-@test_throws ErrorException sizeof(Vector{Int})
+@test_throws ErrorException sizeof(Memory{false,Int})
 @test_throws ErrorException sizeof(Symbol)
 @test_throws ErrorException sizeof(Core.SimpleVector)
 @test_throws ErrorException sizeof(Union{})
@@ -825,6 +975,7 @@ f(x::Int; y=3) = x + y
 @test hasmethod(f, Tuple{Int})
 @test hasmethod(f, Tuple{Int}, ())
 @test hasmethod(f, Tuple{Int}, (:y,))
+@test !hasmethod(f, Tuple{Int}, (:x,))
 @test !hasmethod(f, Tuple{Int}, (:jeff,))
 @test !hasmethod(f, Tuple{Int}, (:y,), world=typemin(UInt))
 g(; b, c, a) = a + b + c
@@ -864,10 +1015,6 @@ f20872(::Val, ::Val) = false
 @test_throws ErrorException which(f20872, Tuple{Any,Val{N}} where N)
 @test which(Tuple{typeof(f20872), Val{1}, Val{2}}).sig == Tuple{typeof(f20872), Val, Val}
 
-module M29962 end
-# make sure checking if a binding is deprecated does not resolve it
-@test !Base.isdeprecated(M29962, :sin) && !Base.isbindingresolved(M29962, :sin)
-
 # @locals
 using Base: @locals
 let
@@ -904,11 +1051,12 @@ _test_at_locals2(1,1,0.5f0)
 
 @testset "issue #31687" begin
     import InteractiveUtils._dump_function
+    import InteractiveUtils.ArgInfo
 
     @noinline f31687_child(i) = f31687_nonexistent(i)
     f31687_parent() = f31687_child(0)
     params = Base.CodegenParams()
-    _dump_function(f31687_parent, Tuple{},
+    _dump_function(ArgInfo(f31687_parent, Tuple{}),
                    #=native=#false, #=wrapper=#false, #=raw=#true,
                    #=dump_module=#true, #=syntax=#:att, #=optimize=#false, :none,
                    #=binary=#false)
@@ -917,7 +1065,7 @@ end
 @test nameof(Any) === :Any
 @test nameof(:) === :Colon
 @test nameof(Core.Intrinsics.mul_int) === :mul_int
-@test nameof(Core.Intrinsics.arraylen) === :arraylen
+@test nameof(Core.Intrinsics.cglobal) === :cglobal
 
 module TestMod33403
 f(x) = 1
@@ -993,9 +1141,21 @@ end
     @test Base.default_tt(m.f4) == Tuple
 end
 
+@testset "lookup mi" begin
+    @test 1+1 == 2
+    mi1 = Base.method_instance(+, (Int, Int))
+    @test mi1.def.name == :+
+    mi2 = Base.method_instance((typeof(+), Int, Int))
+    @test mi2.def.name == :+
+    # Note `jl_method_lookup` doesn't return CNull if not found
+    mi3 = @ccall jl_method_lookup(Any[+, 1, 1]::Ptr{Any}, 3::Csize_t, Base.get_world_counter()::Csize_t)::Ref{Core.MethodInstance}
+    @test mi1 == mi3
+    @test mi2 == mi3
+end
+
 Base.@assume_effects :terminates_locally function issue41694(x::Int)
     res = 1
-    1 < x < 20 || throw("bad")
+    0 ≤ x < 20 || error("bad fact")
     while x > 1
         res *= x
         x -= 1
@@ -1009,7 +1169,22 @@ ambig_effects_test(a::Int, b) = 1
 ambig_effects_test(a, b::Int) = 1
 ambig_effects_test(a, b) = 1
 
-@testset "infer_effects" begin
+@testset "Base.infer_return_type[s]" begin
+    # generic function case
+    @test only(Base.return_types(issue41694, (Int,))) == Base.infer_return_type(issue41694, (Int,)) == Int
+    # case when it's not fully covered
+    @test only(Base.return_types(issue41694, (Integer,))) == Base.infer_return_type(issue41694, (Integer,)) == Int
+    # MethodError case
+    @test isempty(Base.return_types(issue41694, (Float64,)))
+    @test Base.infer_return_type(issue41694, (Float64,)) == Union{}
+    # builtin case
+    @test only(Base.return_types(typeof, (Any,))) == Base.infer_return_type(typeof, (Any,)) == DataType
+    @test only(Base.return_types(===, (Any,Any))) == Base.infer_return_type(===, (Any,Any)) == Bool
+    @test only(Base.return_types(setfield!, ())) == Base.infer_return_type(setfield!, ()) == Union{}
+    @test only(Base.return_types(Core.Intrinsics.mul_int, ())) == Base.infer_return_type(Core.Intrinsics.mul_int, ()) == Union{}
+end
+
+@testset "Base.infer_effects" begin
     # generic functions
     @test Base.infer_effects(issue41694, (Int,)) |> Core.Compiler.is_terminates
     @test Base.infer_effects((Int,)) do x
@@ -1033,7 +1208,34 @@ ambig_effects_test(a, b) = 1
     @test Base.infer_effects(typeof, (Any,)) |> Core.Compiler.is_foldable_nothrow
     @test Base.infer_effects(===, (Any,Any)) |> Core.Compiler.is_foldable_nothrow
     @test (Base.infer_effects(setfield!, ()); true) # `builtin_effects` shouldn't throw on empty `argtypes`
-    @test (Base.infer_effects(Core.Intrinsics.arraylen, ()); true) # `intrinsic_effects` shouldn't throw on empty `argtypes`
+    @test (Base.infer_effects(Core.Intrinsics.mul_int, ()); true) # `intrinsic_effects` shouldn't throw on empty `argtypes`
+end
+
+@testset "Base.infer_exception_type[s]" begin
+    # generic functions
+    @test Base.infer_exception_type(issue41694, (Int,)) == only(Base.infer_exception_types(issue41694, (Int,))) == ErrorException
+    @test Base.infer_exception_type((Int,)) do x
+        issue41694(x)
+    end == Base.infer_exception_types((Int,)) do x
+        issue41694(x)
+    end |> only == ErrorException
+    @test Base.infer_exception_type(issue41694) == only(Base.infer_exception_types(issue41694)) == ErrorException # use `default_tt`
+    let excts = Base.infer_exception_types(maybe_effectful, (Any,))
+        @test any(==(Any), excts)
+        @test any(==(Union{}), excts)
+    end
+    @test Base.infer_exception_type(maybe_effectful, (Any,)) == Any
+    # `infer_exception_type` should account for MethodError
+    @test Base.infer_exception_type(issue41694, (Float64,)) == MethodError # definitive dispatch error
+    @test Base.infer_exception_type(issue41694, (Integer,)) == Union{MethodError,ErrorException} # possible dispatch error
+    @test Base.infer_exception_type(f_no_methods) == MethodError # no possible matching methods
+    @test Base.infer_exception_type(ambig_effects_test, (Int,Int)) == MethodError # ambiguity error
+    @test Base.infer_exception_type(ambig_effects_test, (Int,Any)) == MethodError # ambiguity error
+    # builtins
+    @test Base.infer_exception_type(typeof, (Any,)) === only(Base.infer_exception_types(typeof, (Any,))) === Union{}
+    @test Base.infer_exception_type(===, (Any,Any)) === only(Base.infer_exception_types(===, (Any,Any))) === Union{}
+    @test (Base.infer_exception_type(setfield!, ()); Base.infer_exception_types(setfield!, ()); true) # `infer_exception_type[s]` shouldn't throw on empty `argtypes`
+    @test (Base.infer_exception_type(Core.Intrinsics.mul_int, ()); Base.infer_exception_types(Core.Intrinsics.mul_int, ()); true) # `infer_exception_type[s]` shouldn't throw on empty `argtypes`
 end
 
 @test Base._methods_by_ftype(Tuple{}, -1, Base.get_world_counter()) == Any[]
@@ -1057,3 +1259,111 @@ end
 @test !Base.ismutationfree(Vector{UInt64})
 
 @test Base.ismutationfree(Type{Union{}})
+
+@test !Base.ismutationfree(Core.SimpleVector)
+
+module TestNames
+
+public publicized
+export exported
+
+publicized() = 1
+exported() = 1
+private() = 1
+
+end
+
+@test names(TestNames) == [:TestNames, :exported, :publicized]
+
+# reflections for generated function with abstract input types
+
+# :generated_only function should return failed results if given abstract input types
+@generated function generated_only_simple(x)
+    if x <: Integer
+        return :(x ^ 2)
+    else
+        return :(x)
+    end
+end
+@test only(Base.return_types(generated_only_simple, (Real,))) ==
+      Base.infer_return_type(generated_only_simple, (Real,)) ==
+      Core.Compiler.return_type(generated_only_simple, Tuple{Real}) == Any
+let (src, rt) = only(code_typed(generated_only_simple, (Real,)))
+    @test src isa Method
+    @test rt == Any
+end
+
+# optionally generated function should return fallback results if given abstract input types
+function sub2ind_gen_impl(dims::Type{NTuple{N,Int}}, I...) where N
+    ex = :(I[$N] - 1)
+    for i = (N - 1):-1:1
+        ex = :(I[$i] - 1 + dims[$i] * $ex)
+    end
+    return :($ex + 1)
+end;
+function sub2ind_gen_fallback(dims::NTuple{N,Int}, I) where N
+    ind = I[N] - 1
+    for i = (N - 1):-1:1
+        ind = I[i] - 1 + dims[i]*ind
+    end
+    return ind + 1
+end;
+function sub2ind_gen(dims::NTuple{N,Int}, I::Integer...) where N
+    length(I) == N || error("partial indexing is unsupported")
+    if @generated
+        return sub2ind_gen_impl(dims, I...)
+    else
+        return sub2ind_gen_fallback(dims, I)
+    end
+end;
+@test only(Base.return_types(sub2ind_gen, (NTuple,Int,Int,))) == Int
+let (src, rt) = only(code_typed(sub2ind_gen, (NTuple,Int,Int,); optimize=false))
+    @test src isa CodeInfo
+    @test rt == Int
+    @test any(iscall((src,sub2ind_gen_fallback)), src.code)
+    @test any(iscall((src,error)), src.code)
+end
+
+# marking a symbol as public should not "unexport" it
+# https://github.com/JuliaLang/julia/issues/52812
+module Mod52812
+using Test
+export a, b
+@test_throws ErrorException eval(Expr(:public, :a))
+public c
+@test_throws ErrorException eval(Expr(:export, :c))
+export b
+public c
+end
+
+@test Base.isexported(Mod52812, :a)
+@test Base.isexported(Mod52812, :b)
+@test Base.ispublic(Mod52812, :a)
+@test Base.ispublic(Mod52812, :b)
+@test Base.ispublic(Mod52812, :c) && !Base.isexported(Mod52812, :c)
+
+@test Base.infer_return_type(code_lowered, (Any,)) == Vector{Core.CodeInfo}
+@test Base.infer_return_type(code_lowered, (Any,Any)) == Vector{Core.CodeInfo}
+
+@test methods(Union{}) == Any[m.method for m in Base._methods_by_ftype(Tuple{Core.TypeofBottom, Vararg}, 1, Base.get_world_counter())] # issue #55187
+
+# which should not look through const bindings, even if they have the same value
+# as a previous implicit import
+module SinConst
+const sin = Base.sin
+end
+
+@test which(SinConst, :sin) === SinConst
+
+# `which` should error if there is not a unique binding that a constant was imported from
+module X1ConstConflict
+const xconstconflict = 1
+export xconstconflict
+end
+module X2ConstConflict
+const xconstconflict = 1
+export xconstconflict
+end
+using .X1ConstConflict, .X2ConstConflict
+
+@test_throws ErrorException which(@__MODULE__, :xconstconflict)
diff --git a/test/regex.jl b/test/regex.jl
index e5f1428527512..51802125a3467 100644
--- a/test/regex.jl
+++ b/test/regex.jl
@@ -101,15 +101,34 @@
         @test haskey(m, 3)
         @test !haskey(m, 44)
         @test (m[1], m[2], m[3]) == ("x", "y", "z")
+        @test Tuple(m) == ("x", "y", "z")
+        @test NamedTuple(m) == (var"1"="x", var"2"="y", var"3"="z")
+        @test Dict(m) == Dict([1=>"x", 2=>"y", 3=>"z"])
         @test sprint(show, m) == "RegexMatch(\"xyz\", 1=\"x\", 2=\"y\", 3=\"z\")"
     end
 
     # Named subpatterns
+    let m = match(r"(?<a>.)(?<c>.)(?<b>.)", "xyz")
+        @test haskey(m, :a)
+        @test haskey(m, "b")
+        @test !haskey(m, "foo")
+        @test (m[:a], m[:c], m["b"]) == ("x", "y", "z")
+        @test Tuple(m) == ("x", "y", "z")
+        @test NamedTuple(m) == (a="x", c="y", b="z")
+        @test Dict(m) == Dict(["a"=>"x", "c"=>"y", "b"=>"z"])
+        @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", c=\"y\", b=\"z\")"
+        @test keys(m) == ["a", "c", "b"]
+    end
+
+    # Named and unnamed subpatterns
     let m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
         @test haskey(m, :a)
         @test haskey(m, "b")
         @test !haskey(m, "foo")
         @test (m[:a], m[2], m["b"]) == ("x", "y", "z")
+        @test Tuple(m) == ("x", "y", "z")
+        @test NamedTuple(m) == (a="x", var"2"="y", b="z")
+        @test Dict(m) == Dict(["a"=>"x", 2=>"y", "b"=>"z"])
         @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
         @test keys(m) == ["a", 2, "b"]
     end
@@ -194,7 +213,7 @@
 
         r = r"" * raw"a\Eb|c"
         @test match(r, raw"a\Eb|c").match == raw"a\Eb|c"
-        @test match(r, raw"c") == nothing
+        @test match(r, raw"c") === nothing
 
         # error for really incompatible options
         @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS & ~Base.PCRE.UCP, Base.DEFAULT_MATCH_OPTS)
diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl
index 501e9f4a9b57f..71afe1e0a3696 100644
--- a/test/reinterpretarray.jl
+++ b/test/reinterpretarray.jl
@@ -6,20 +6,35 @@ using .Main.OffsetArrays
 isdefined(Main, :TSlow) || @eval Main include("testhelpers/arrayindexingtypes.jl")
 using .Main: TSlow, WrapperArray
 
+tslow(a::AbstractArray) = TSlow(a)
+wrapper(a::AbstractArray) = WrapperArray(a)
+fcviews(a::AbstractArray) = view(a, ntuple(Returns(:),ndims(a)-1)..., axes(a)[end])
+fcviews(a::AbstractArray{<:Any, 0}) = view(a)
+offset_nominal(a::AbstractArray) = OffsetArray(a)
+offset_maybe(a::AbstractArray) = (eltype(a) <: Real) ? a : OffsetArray(a, (1-ndims(A)):2:(ndims(A)-1)...)
+tslow(t::Tuple) = map(tslow, t)
+wrapper(t::Tuple) = map(wrapper, t)
+fcviews(t::Tuple) = map(fcviews, t)
+offset_nominal(t::Tuple) = map(offset_nominal, t)
+offset_maybe(t::Tuple) = map(offset_maybe, t)
+
+test_many_wrappers(testf, A, wrappers) = foreach(w -> testf(w(A)), wrappers)
+test_many_wrappers(testf, A) = test_many_wrappers(
+    testf, A, (identity, tslow, wrapper, fcviews, offset_nominal, offset_maybe)
+)
+
 A = Int64[1, 2, 3, 4]
-As = TSlow(A)
 Ars = Int64[1 3; 2 4]
-Arss = TSlow(Ars)
 B = Complex{Int64}[5+6im, 7+8im, 9+10im]
-Bs = TSlow(B)
 Av = [Int32[1,2], Int32[3,4]]
+C = view([1,1], [1,2])
 
-for Ar in (Ars, Arss)
+test_many_wrappers(Ars, (identity, tslow)) do Ar
     @test @inferred(ndims(reinterpret(reshape, Complex{Int64}, Ar))) == 1
     @test @inferred(axes(reinterpret(reshape, Complex{Int64}, Ar))) === (Base.OneTo(2),)
     @test @inferred(size(reinterpret(reshape, Complex{Int64}, Ar))) == (2,)
 end
-for _B in (B, Bs)
+test_many_wrappers(B, (identity, tslow)) do _B
     @test @inferred(ndims(reinterpret(reshape, Int64, _B))) == 2
     @test @inferred(axes(reinterpret(reshape, Int64, _B))) === (Base.OneTo(2), Base.OneTo(3))
     @test @inferred(size(reinterpret(reshape, Int64, _B))) == (2, 3)
@@ -32,9 +47,8 @@ end
 @test_throws ArgumentError("cannot reinterpret `Vector{Int32}` as `Int32`, type `Vector{Int32}` is not a bits type") reinterpret(Int32, Av)
 @test_throws ArgumentError("cannot reinterpret a zero-dimensional `Int64` array to `Int32` which is of a different size") reinterpret(Int32, reshape([Int64(0)]))
 @test_throws ArgumentError("cannot reinterpret a zero-dimensional `Int32` array to `Int64` which is of a different size") reinterpret(Int64, reshape([Int32(0)]))
-@test_throws ArgumentError("""cannot reinterpret an `$Int` array to `Tuple{$Int, $Int}` whose first dimension has size `5`.
-                              The resulting array would have non-integral first dimension.
-                              """) reinterpret(Tuple{Int,Int}, [1,2,3,4,5])
+@test_throws ArgumentError("cannot reinterpret an `$Int` array to `Tuple{$Int, $Int}` whose first dimension has size `5`."*
+                              " The resulting array would have a non-integral first dimension.") reinterpret(Tuple{Int,Int}, [1,2,3,4,5])
 
 @test_throws ArgumentError("`reinterpret(reshape, Complex{Int64}, a)` where `eltype(a)` is Int64 requires that `axes(a, 1)` (got Base.OneTo(4)) be equal to 1:2 (from the ratio of element sizes)") reinterpret(reshape, Complex{Int64}, A)
 @test_throws ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got 24) and `sizeof(eltype(a))` (got 16) be an integer multiple of the other") reinterpret(reshape, NTuple{3, Int64}, B)
@@ -42,24 +56,63 @@ end
 @test_throws ArgumentError("cannot reinterpret a zero-dimensional `UInt8` array to `UInt16` which is of a larger size") reinterpret(reshape, UInt16, reshape([0x01]))
 
 # getindex
-for _A in (A, As)
+test_many_wrappers(A) do _A
     @test reinterpret(Complex{Int64}, _A) == [1 + 2im, 3 + 4im]
     @test reinterpret(Float64, _A) == reinterpret.(Float64, A)
     @test reinterpret(reshape, Float64, _A) == reinterpret.(Float64, A)
 end
-for Ar in (Ars, Arss)
+test_many_wrappers(Ars) do Ar
     @test reinterpret(reshape, Complex{Int64}, Ar) == [1 + 2im, 3 + 4im]
     @test reinterpret(reshape, Float64, Ar) == reinterpret.(Float64, Ars)
 end
 
-for _B in (B, Bs)
+test_many_wrappers(B) do _B
     @test reinterpret(NTuple{3, Int64}, _B) == [(5,6,7),(8,9,10)]
     @test reinterpret(reshape, Int64, _B) == [5 7 9; 6 8 10]
 end
 
+@testset "setindex! converts before reinterpreting" begin
+    for dims in ((), 1)
+        z = reinterpret(UInt64, fill(1.0, dims))
+        @test z[] == z[1] == 0x3ff0000000000000
+        z[] = Int32(1)//Int32(1)
+        @test z[] == z[1] == 0x0000000000000001
+        z[1] = Int32(2)//Int32(1)
+        @test z[] == z[1] == 0x0000000000000002
+        z[1] = 3//1
+        @test z[] == z[1] == 0x0000000000000003
+        @test_throws InexactError z[] = 3//2
+        @test_throws InexactError z[] = 1.5
+        @test_throws InexactError z[1] = 3//2
+        @test_throws InexactError z[1] = 1.5
+
+        z = reinterpret(UInt64, fill(Int32(16)//Int32(1), dims))
+        @test z[] == z[1] == 0x0000000100000010
+        z[] = Int32(1)//Int32(1)
+        @test z[] == z[1] == 0x0000000000000001
+        z[1] = Int32(2)//Int32(1)
+        @test z[] == z[1] == 0x0000000000000002
+        z[1] = 3//1
+        @test z[] == z[1] == 0x0000000000000003
+        @test_throws InexactError z[] = 3//2
+        @test_throws InexactError z[] = 1.5
+        @test_throws InexactError z[1] = 3//2
+        @test_throws InexactError z[1] = 1.5
+
+        z = reinterpret(Missing, fill(nothing, dims))
+        @test z[] === missing
+        @test z[1] === missing
+        @test_throws "cannot convert" z[] = nothing
+        @test_throws "cannot convert" z[1] = nothing
+        @test z[] === missing
+        @test z[1] === missing
+    end
+end
+
 # setindex
-for (_A, Ar, _B) in ((A, Ars, B), (As, Arss, Bs))
-    let Ac = copy(_A), Arsc = copy(Ar), Bc = copy(_B)
+test_many_wrappers((A, Ars, B)) do (A, Ars, B)
+    _A, Ar, _B = deepcopy(A), deepcopy(Ars), deepcopy(B)
+    let Ac = deepcopy(_A), Arsc = deepcopy(Ar), Bc = deepcopy(_B)
         reinterpret(Complex{Int64}, Ac)[2] = -1 - 2im
         @test Ac == [1, 2, -1, -2]
         reinterpret(Complex{Int64}, Arsc)[2] = -1 - 2im
@@ -94,50 +147,79 @@ for (_A, Ar, _B) in ((A, Ars, B), (As, Arss, Bs))
     end
 end
 A3 = collect(reshape(1:18, 2, 3, 3))
-A3r = reinterpret(reshape, Complex{Int}, A3)
-@test A3r[4] === A3r[1,2] === A3r[CartesianIndex(1, 2)] === 7+8im
-A3r[2,3] = -8-15im
-@test A3[1,2,3] == -8
-@test A3[2,2,3] == -15
-A3r[4] = 100+200im
-@test A3[1,1,2] == 100
-@test A3[2,1,2] == 200
-A3r[CartesianIndex(1,2)] = 300+400im
-@test A3[1,1,2] == 300
-@test A3[2,1,2] == 400
+test_many_wrappers(A3) do A3_
+    A3 = deepcopy(A3_)
+    A3r = reinterpret(reshape, Complex{Int}, A3)
+    @test A3r[4] === A3r[1,2] === A3r[CartesianIndex(1, 2)] === 7+8im
+    A3r[2,3] = -8-15im
+    @test A3[1,2,3] == -8
+    @test A3[2,2,3] == -15
+    A3r[4] = 100+200im
+    @test A3[1,1,2] == 100
+    @test A3[2,1,2] == 200
+    A3r[CartesianIndex(1,2)] = 300+400im
+    @test A3[1,1,2] == 300
+    @test A3[2,1,2] == 400
+end
+
+test_many_wrappers(C) do Cr_
+    Cr = deepcopy(Cr_)
+    r = reinterpret(reshape, Tuple{Int, Int}, Cr)
+    @test r == fill((1,1))
+    r[] = (2,2)
+    @test r[] === (2,2)
+    r[1] = (3,3)
+    @test r[1] === (3,3)
+    r[1,1] = (4,4)
+    @test r[1,1] === (4,4)
+end
 
 # same-size reinterpret where one of the types is non-primitive
-let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(Float32, a)
-    @test ra[1] == reinterpret(Float32, 0x04030201)
-    @test setindex!(ra, 2.0) === ra
-    @test reinterpret(Float32, a)[1] == 2.0
+let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)]
+    test_many_wrappers(a, (identity, wrapper, fcviews)) do a_
+        a = deepcopy(a_)
+        ra = reinterpret(Float32, a)
+        @test ra[1] == reinterpret(Float32, 0x04030201)
+        @test setindex!(ra, 2.0) === ra
+        @test reinterpret(Float32, a)[1] == 2.0
+    end
 end
-let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(reshape, Float32, a)
-    @test ra[1] == reinterpret(Float32, 0x04030201)
-    @test setindex!(ra, 2.0) === ra
-    @test reinterpret(reshape, Float32, a)[1] == 2.0
+let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)]
+    test_many_wrappers(a, (identity, wrapper, fcviews)) do a_
+        a = deepcopy(a_)
+        ra = reinterpret(reshape, Float32, a)
+        @test ra[1] == reinterpret(Float32, 0x04030201)
+        @test setindex!(ra, 2.0) === ra
+        @test reinterpret(reshape, Float32, a)[1] == 2.0
+    end
 end
 
 # Pass-through indexing
 B = Complex{Int64}[5+6im, 7+8im, 9+10im]
-Br = reinterpret(reshape, Int64, B)
-W = WrapperArray(Br)
-for (b, w) in zip(5:10, W)
-    @test b == w
-end
-for (i, j) in zip(eachindex(W), 11:16)
-    W[i] = j
+test_many_wrappers(B) do B_
+    B = deepcopy(B_)
+    Br = reinterpret(reshape, Int64, B)
+    W = WrapperArray(Br)
+    for (b, w) in zip(5:10, W)
+        @test b == w
+    end
+    for (i, j) in zip(eachindex(W), 11:16)
+        W[i] = j
+    end
+    @test B[1] === Complex{Int64}(11+12im)
+    @test B[2] === Complex{Int64}(13+14im)
+    @test B[3] === Complex{Int64}(15+16im)
 end
-@test B[1] === Complex{Int64}(11+12im)
-@test B[2] === Complex{Int64}(13+14im)
-@test B[3] === Complex{Int64}(15+16im)
 z3 = (0x00, 0x00, 0x00)
 Az = [z3 z3; z3 z3]
-Azr = reinterpret(reshape, UInt8, Az)
-W = WrapperArray(Azr)
-copyto!(W, fill(0x01, 3, 2, 2))
-@test all(isequal((0x01, 0x01, 0x01)), Az)
-@test eachindex(W, W) == eachindex(W)
+test_many_wrappers(Az, (identity, wrapper)) do Az_
+    Az = deepcopy(Az_)
+    Azr = reinterpret(reshape, UInt8, Az)
+    W = WrapperArray(Azr)
+    copyto!(W, fill(0x01, 3, 2, 2))
+    @test all(isequal((0x01, 0x01, 0x01)), Az)
+    @test eachindex(W, W) == eachindex(W)
+end
 
 # ensure that reinterpret arrays aren't erroneously classified as strided
 let A = reshape(1:20, 5, 4)
@@ -169,7 +251,7 @@ function check_strides(A::AbstractArray)
 end
 
 @testset "strides for NonReshapedReinterpretArray" begin
-    A = Array{Int32}(reshape(1:88, 11, 8))
+    A = WrapperArray(Array{Int32}(reshape(1:88, 11, 8)))
     for viewax2 in (1:8, 1:2:6, 7:-1:1, 5:-2:1, 2:3:8, 7:-6:1, 3:5:11)
         # dim1 is contiguous
         for T in (Int16, Float32)
@@ -203,7 +285,7 @@ end
 end
 
 @testset "strides for ReshapedReinterpretArray" begin
-    A = Array{Int32}(reshape(1:192, 3, 8, 8))
+    A = WrapperArray(Array{Int32}(reshape(1:192, 3, 8, 8)))
     for viewax1 in (1:8, 1:2:8, 8:-1:1, 8:-2:1), viewax2 in (1:2, 4:-1:1)
         for T in (Int16, Float32)
             @test check_strides(reinterpret(reshape, T, view(A, 1:2, viewax1, viewax2)))
@@ -240,13 +322,14 @@ end
 end
 
 # IndexStyle
-let a = fill(1.0, 5, 3)
+test_many_wrappers(fill(1.0, 5, 3), (identity, wrapper)) do a_
+    a = deepcopy(a_)
     r = reinterpret(Int64, a)
     @test @inferred(IndexStyle(r)) == IndexLinear()
     fill!(r, 2)
     @test all(a .=== reinterpret(Float64, [Int64(2)])[1])
     @test all(r .=== Int64(2))
-    for badinds in (0, 16, (0,1), (1,0), (6,3), (5,4))
+    for badinds in ((), 0, 16, (0,1), (1,0), (6,3), (5,4))
         @test_throws BoundsError r[badinds...]
         @test_throws BoundsError r[badinds...] = -2
     end
@@ -259,7 +342,7 @@ let a = fill(1.0, 5, 3)
     fill!(r, 3)
     @test all(a .=== reinterpret(Float64, [(Int32(3), Int32(3))])[1])
     @test all(r .=== Int32(3))
-    for badinds in (0, 31, (0,1), (1,0), (11,3), (10,4))
+    for badinds in ((), 0, 31, (0,1), (1,0), (11,3), (10,4))
         @test_throws BoundsError r[badinds...]
         @test_throws BoundsError r[badinds...] = -3
     end
@@ -272,7 +355,7 @@ let a = fill(1.0, 5, 3)
     fill!(r, 4)
     @test all(a[1:2:5,:] .=== reinterpret(Float64, [Int64(4)])[1])
     @test all(r .=== Int64(4))
-    for badinds in (0, 10, (0,1), (1,0), (4,3), (3,4))
+    for badinds in ((), 0, 10, (0,1), (1,0), (4,3), (3,4))
         @test_throws BoundsError r[badinds...]
         @test_throws BoundsError r[badinds...] = -4
     end
@@ -285,7 +368,7 @@ let a = fill(1.0, 5, 3)
     fill!(r, 5)
     @test all(a[1:2:5,:] .=== reinterpret(Float64, [(Int32(5), Int32(5))])[1])
     @test all(r .=== Int32(5))
-    for badinds in (0, 19, (0,1), (1,0), (7,3), (6,4))
+    for badinds in ((), 0, 19, (0,1), (1,0), (7,3), (6,4))
         @test_throws BoundsError r[badinds...]
         @test_throws BoundsError r[badinds...] = -5
     end
@@ -293,14 +376,32 @@ let a = fill(1.0, 5, 3)
         @test setindex!(r, -5, goodinds...) === r
         @test r[goodinds...] == -5
     end
+end
+
+let a = rand(ComplexF32, 5)
+    r = reinterpret(reshape, Float32, a)
+    ref = Array(r)
+
+    @test all(r .== OffsetArray(r)[:, :, :])
+
+    @test r[1, :, 1]        == ref[1, :]
+    @test r[1, :, 1, 1, 1]  == ref[1, :]
+    @test r[1, :, UInt8(1)] == ref[1, :]
+
+    r[2, :, 1] .= 0f0
+    ref[2,  :] .= 0f0
+    @test r[2, :, 1] == ref[2, :]
+
+    @test r[4] == ref[4]
+    @test_throws BoundsError r[1, :, 2]
+end
 
-    ar = [(1,2), (3,4)]
+let ar = [(1,2), (3,4)]
     arr = reinterpret(reshape, Int, ar)
     @test @inferred(IndexStyle(arr)) == Base.IndexSCartesian2{2}()
     @test @inferred(eachindex(arr)) == Base.SCartesianIndices2{2}(Base.OneTo(2))
     @test @inferred(eachindex(arr, arr)) == Base.SCartesianIndices2{2}(Base.OneTo(2))
 end
-
 # Error on reinterprets that would expose padding
 struct S1
     a::Int8
@@ -314,11 +415,14 @@ end
 
 A1 = S1[S1(0, 0)]
 A2 = S2[S2(0, 0)]
-@test reinterpret(S1, A2)[1] == S1(0, 0)
-@test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2))
-@test_throws Base.PaddingError reinterpret(S2, A1)[1]
-reinterpret(S2, A1)[1] = S2(1, 2)
-@test A1[1] == S1(1, 2)
+test_many_wrappers((A1, A2), (identity, wrapper)) do (A1_, A2_)
+    A1, A2 = deepcopy(A1_), deepcopy(A2_)
+    @test reinterpret(S1, A2)[1] == S1(0, 0)
+    @test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2))
+    @test_throws Base.PaddingError reinterpret(S2, A1)[1]
+    reinterpret(S2, A1)[1] = S2(1, 2)
+    @test A1[1] == S1(1, 2)
+end
 
 # Unconventional axes
 let a = [0.1 0.2; 0.3 0.4], at = reshape([(i,i+1) for i = 1:2:8], 2, 2)
@@ -371,50 +475,59 @@ end
 
 # Test 0-dimensional Arrays
 A = zeros(UInt32)
-B = reinterpret(Int32, A)
-Brs = reinterpret(reshape,Int32, A)
-C = reinterpret(Tuple{UInt32}, A) # non-primitive type
-Crs = reinterpret(reshape, Tuple{UInt32}, A)  # non-primitive type
-@test size(B) == size(Brs) == size(C) == size(Crs) == ()
-@test axes(B) == axes(Brs) == axes(C) == axes(Crs) == ()
-@test setindex!(B, Int32(5)) === B
-@test B[] === Int32(5)
-@test Brs[] === Int32(5)
-@test C[] === (UInt32(5),)
-@test Crs[] === (UInt32(5),)
-@test A[] === UInt32(5)
-@test setindex!(Brs, Int32(12)) === Brs
-@test A[] === UInt32(12)
-@test setindex!(C, (UInt32(7),)) === C
-@test A[] === UInt32(7)
-@test setindex!(Crs, (UInt32(3),)) === Crs
-@test A[] === UInt32(3)
-
-
-a = [(1.0,2.0)]
-af = @inferred(reinterpret(reshape, Float64, a))
-anew = @inferred(reinterpret(reshape, Tuple{Float64,Float64}, vec(af)))
-@test anew[1] == a[1]
-@test ndims(anew) == 0
+test_many_wrappers(A, (identity, wrapper)) do A_
+    A = deepcopy(A_)
+    B = reinterpret(Int32, A)
+    Brs = reinterpret(reshape,Int32, A)
+    C = reinterpret(Tuple{UInt32}, A) # non-primitive type
+    Crs = reinterpret(reshape, Tuple{UInt32}, A)  # non-primitive type
+    @test size(B) == size(Brs) == size(C) == size(Crs) == ()
+    @test axes(B) == axes(Brs) == axes(C) == axes(Crs) == ()
+    @test setindex!(B, Int32(5)) === B
+    @test B[] === Int32(5)
+    @test Brs[] === Int32(5)
+    @test C[] === (UInt32(5),)
+    @test Crs[] === (UInt32(5),)
+    @test A[] === UInt32(5)
+    @test setindex!(Brs, Int32(12)) === Brs
+    @test A[] === UInt32(12)
+    @test setindex!(C, (UInt32(7),)) === C
+    @test A[] === UInt32(7)
+    @test setindex!(Crs, (UInt32(3),)) === Crs
+    @test A[] === UInt32(3)
+end
+
+test_many_wrappers([(1.0,2.0)], (identity, wrapper)) do a
+    af = @inferred(reinterpret(reshape, Float64, a))
+    anew = @inferred(reinterpret(reshape, Tuple{Float64,Float64}, vec(af)))
+    @test anew[1] == a[1]
+    @test ndims(anew) == 0
+end
 
 # re-reinterpret
 a0 = reshape([0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04], 4, 2)
-a = reinterpret(reshape, NTuple{4,UInt8}, a0)
-@test a == [(0x22, 0x44, 0x88, 0xf0), (0x01, 0x02, 0x03, 0x04)]
-@test reinterpret(UInt8, a) == [0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04]
-@test reinterpret(reshape, UInt8, a) === a0
+test_many_wrappers(a0, (identity, wrapper)) do a0
+    a = reinterpret(reshape, NTuple{4,UInt8}, a0)
+    @test a == [(0x22, 0x44, 0x88, 0xf0), (0x01, 0x02, 0x03, 0x04)]
+    @test reinterpret(UInt8, a) == [0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04]
+    @test reinterpret(reshape, UInt8, a) === a0
+end
 
 # reductions
 a = [(1,2,3), (4,5,6)]
-ars = reinterpret(reshape, Int, a)
-@test sum(ars) == 21
-@test sum(ars; dims=1) == [6 15]
-@test sum(ars; dims=2) == reshape([5,7,9], (3, 1))
-@test sum(ars; dims=(1,2)) == reshape([21], (1, 1))
+test_many_wrappers(a, (identity, wrapper)) do a
+    ars = reinterpret(reshape, Int, a)
+    @test sum(ars) == 21
+    @test sum(ars; dims=1) == [6 15]
+    @test sum(ars; dims=2) == reshape([5,7,9], (3, 1))
+    @test sum(ars; dims=(1,2)) == reshape([21], (1, 1))
+end
 # also test large sizes for the pairwise algorithm
 a = [(k,k+1,k+2) for k = 1:3:4000]
-ars = reinterpret(reshape, Int, a)
-@test sum(ars) == 8010003
+test_many_wrappers(a, (identity, wrapper)) do a
+    ars = reinterpret(reshape, Int, a)
+    @test sum(ars) == 8010003
+end
 
 @testset "similar(::ReinterpretArray)" begin
     a = reinterpret(NTuple{2,Float64}, TSlow(rand(Float64, 4, 4)))
@@ -427,6 +540,9 @@ ars = reinterpret(reshape, Int, a)
     @test as isa TSlow{Int,3}
     @test size(as) == (3, 5, 1)
 
+    as = similar(typeof(a),(3, 5, 1))
+    @test as isa TSlow{Float64,3}
+    @test size(as) == (3, 5, 1)
     a = reinterpret(reshape, NTuple{4,Float64}, TSlow(rand(Float64, 4, 4)))
 
     as = similar(a)
@@ -514,6 +630,21 @@ end
     @test_throws MethodError x[2,4] = nothing
 end
 
+@testset "pointer for StridedArray" begin
+    a = rand(Float64, 251)
+    v = view(a, UInt(2):UInt(251));
+    A = reshape(v, 25, 10);
+    @test A isa StridedArray && pointer(A) === pointer(a, 2)
+    Av = view(A, 1:20, 1:2)
+    @test Av isa StridedArray && pointer(Av) === pointer(a, 2)
+    @test Av * Av' isa Array
+end
+
+@testset "effect of StridedReinterpretArray's getindex" begin
+    eff = Base.infer_effects(getindex, Base.typesof(reinterpret(Int8, Int[1]), 1))
+    @test Core.Compiler.is_effect_free(eff)
+end
+
 # reinterpret of arbitrary bitstypes
 @testset "Reinterpret arbitrary bitstypes" begin
     struct Bytes15
@@ -535,3 +666,29 @@ end
 
     @test_throws ArgumentError reinterpret(Tuple{Int32, Int64}, (Int16(1), Int64(4)))
 end
+
+let R = reinterpret(Float32, ComplexF32[1.0f0+2.0f0*im, 4.0f0+3.0f0*im])
+    @test !isassigned(R, 0)
+    @test isassigned(R, 1)
+    @test isassigned(R, 4)
+    @test isassigned(R, Int8(2), Int16(1), Int32(1), Int64(1))
+    @test !isassigned(R, 1, 2)
+    @test !isassigned(R, 5)
+    @test Array(R)::Vector{Float32} == [1.0f0, 2.0f0, 4.0f0, 3.0f0]
+end
+
+let R = reinterpret(reshape, Float32, ComplexF32[1.0f0+2.0f0*im, 4.0f0+3.0f0*im])
+    @test !isassigned(R, 0)
+    @test isassigned(R, 1)
+    @test isassigned(R, 4)
+    @test isassigned(R, Int8(2), Int16(2), Int32(1), Int64(1))
+    @test !isassigned(R, 1, 1, 2)
+    @test !isassigned(R, 5)
+    @test Array(R)::Matrix{Float32} == [1.0f0 4.0f0; 2.0f0 3.0f0]
+end
+
+@testset "issue #54623" begin
+    x = 0xabcdef01234567
+    @test reinterpret(reshape, UInt8, fill(x)) == [0x67, 0x45, 0x23, 0x01, 0xef, 0xcd, 0xab, 0x00]
+    @test reinterpret(reshape, UInt8, [x]) == [0x67; 0x45; 0x23; 0x01; 0xef; 0xcd; 0xab; 0x00;;]
+end
diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl
new file mode 100644
index 0000000000000..e8758365e3ff4
--- /dev/null
+++ b/test/relocatedepot.jl
@@ -0,0 +1,321 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+
+include("testenv.jl")
+include("tempdepot.jl")
+
+
+function test_harness(@nospecialize(fn); empty_load_path=true, empty_depot_path=true)
+    load_path = copy(LOAD_PATH)
+    depot_path = copy(DEPOT_PATH)
+    try
+        empty_load_path && empty!(LOAD_PATH)
+        empty_depot_path && empty!(DEPOT_PATH)
+        fn()
+    finally
+        copy!(LOAD_PATH, load_path)
+        copy!(DEPOT_PATH, depot_path)
+    end
+end
+
+# We test relocation with these dummy pkgs:
+# - RelocationTestPkg1 - pkg with no include_dependency
+# - RelocationTestPkg2 - pkg with include_dependency tracked by `mtime`
+# - RelocationTestPkg3 - pkg with include_dependency tracked by content
+# - RelocationTestPkg4 - pkg with no dependencies; will be compiled such that the pkgimage is
+#                        not relocatable, but no repeated recompilation happens upon loading
+
+if !test_relocated_depot
+
+    @testset "edge cases when inserting @depot tag in path" begin
+
+        # insert @depot only once for first match
+        test_harness() do
+            mkdepottempdir() do dir
+                pushfirst!(DEPOT_PATH, dir)
+                if Sys.iswindows()
+                    # dirs start with a drive letter instead of a path separator
+                    path = dir*Base.Filesystem.pathsep()*dir
+                    @test Base.replace_depot_path(path) == "@depot"*Base.Filesystem.pathsep()*dir
+                else
+                    path = dir*dir
+                    @test Base.replace_depot_path(path) == "@depot"*dir
+                end
+            end
+
+            # 55340
+            empty!(DEPOT_PATH)
+            mkdepottempdir() do dir
+                jlrc = joinpath(dir, "julia-rc2")
+                jl   = joinpath(dir, "julia")
+                mkdir(jl)
+                push!(DEPOT_PATH, jl)
+                @test Base.replace_depot_path(jl) == "@depot"
+                @test Base.replace_depot_path(string(jl,Base.Filesystem.pathsep())) ==
+                            string("@depot",Base.Filesystem.pathsep())
+                @test Base.replace_depot_path(jlrc) != "@depot-rc2"
+                @test Base.replace_depot_path(jlrc) == jlrc
+            end
+        end
+
+        # deal with and without trailing path separators
+        test_harness() do
+            mkdepottempdir() do dir
+                pushfirst!(DEPOT_PATH, dir)
+                path = joinpath(dir, "foo")
+                if isdirpath(DEPOT_PATH[1])
+                    DEPOT_PATH[1] = dirname(DEPOT_PATH[1]) # strip trailing pathsep
+                end
+                tag = string("@depot", Base.Filesystem.pathsep())
+                @test startswith(Base.replace_depot_path(path), tag)
+                DEPOT_PATH[1] = string(DEPOT_PATH[1], Base.Filesystem.pathsep())
+                @test startswith(Base.replace_depot_path(path), tag)
+                popfirst!(DEPOT_PATH)
+                @test !startswith(Base.replace_depot_path(path), tag)
+            end
+        end
+
+    end
+
+    @testset "restore path from @depot tag" begin
+
+        tmp = tempdir()
+
+        path = joinpath("@depot", "foo", "bar")
+        tmppath = joinpath(tmp, "foo", "bar")
+        @test Base.restore_depot_path(path, tmp) == tmppath
+
+        path = joinpath("no@depot", "foo", "bar")
+        @test Base.restore_depot_path(path, tmp) == path
+
+        path = joinpath("@depot", "foo", "bar\n", "@depot", "foo")
+        tmppath = joinpath(tmp, "foo", "bar\n", "@depot", "foo")
+        @test Base.restore_depot_path(path, tmp) == tmppath
+
+    end
+
+    @testset "precompile RelocationTestPkg1" begin
+        pkgname = "RelocationTestPkg1"
+        test_harness(empty_depot_path=false) do
+            push!(LOAD_PATH, @__DIR__)
+            push!(DEPOT_PATH, @__DIR__) # make src files available for relocation
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == true
+        end
+    end
+
+    @testset "precompile RelocationTestPkg2" begin
+        pkgname = "RelocationTestPkg2"
+        test_harness(empty_depot_path=false) do
+            push!(LOAD_PATH, @__DIR__)
+            push!(DEPOT_PATH, @__DIR__) # make src files available for relocation
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            rm(joinpath(@__DIR__, pkgname, "src", "foodir"), force=true, recursive=true)
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            touch(joinpath(@__DIR__, pkgname, "src", "foo.txt"))
+            mkdir(joinpath(@__DIR__, pkgname, "src", "foodir"))
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == false # because tracked by mtime
+        end
+    end
+
+    @testset "precompile RelocationTestPkg3" begin
+        pkgname = "RelocationTestPkg3"
+        test_harness(empty_depot_path=false) do
+            push!(LOAD_PATH, @__DIR__)
+            push!(DEPOT_PATH, @__DIR__) # make src files available for relocation
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            rm(joinpath(@__DIR__, pkgname, "src", "bardir"), force=true, recursive=true)
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            touch(joinpath(@__DIR__, pkgname, "src", "bar.txt"))
+            mkdir(joinpath(@__DIR__, pkgname, "src", "bardir"))
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == true
+        end
+    end
+
+    @testset "precompile RelocationTestPkg4" begin
+        # test for #52346 and https://github.com/JuliaLang/julia/issues/53859#issuecomment-2027352004
+        # If a pkgimage is not relocatable, no repeated precompilation should occur.
+        pkgname = "RelocationTestPkg4"
+        test_harness(empty_depot_path=false) do
+            push!(LOAD_PATH, @__DIR__)
+            # skip this dir to make the pkgimage not relocatable
+            filter!(DEPOT_PATH) do depot
+                !startswith(@__DIR__, depot)
+            end
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == false
+        end
+    end
+
+    @testset "#52161" begin
+        # Take the src files from two pkgs Example1 and Example2,
+        # which are each located in depot1 and depot2, respectively, and
+        # add them as include_dependency()s to a new pkg Foo, which will be precompiled into depot3.
+        # After loading the include_dependency()s of Foo should refer to depot1 depot2 each.
+        test_harness() do
+            mkdepottempdir() do depot1
+                # precompile Example in depot1
+                example1_root = joinpath(depot1, "Example1")
+                mkpath(joinpath(example1_root, "src"))
+                open(joinpath(example1_root, "src", "Example1.jl"); write=true) do io
+                    println(io, """
+                    module Example1
+                    greet() = println("Hello from Example1!")
+                    end
+                    """)
+                end
+                open(joinpath(example1_root, "Project.toml"); write=true) do io
+                    println(io, """
+                    name = "Example1"
+                    uuid = "00000000-0000-0000-0000-000000000001"
+                    version = "1.0.0"
+                    """)
+                end
+                pushfirst!(LOAD_PATH, depot1); pushfirst!(DEPOT_PATH, depot1)
+                pkg = Base.identify_package("Example1"); Base.require(pkg)
+                mkdepottempdir() do depot2
+                    # precompile Example in depot2
+                    example2_root = joinpath(depot2, "Example2")
+                    mkpath(joinpath(example2_root, "src"))
+                    open(joinpath(example2_root, "src", "Example2.jl"); write=true) do io
+                        println(io, """
+                        module Example2
+                        greet() = println("Hello from Example2!")
+                        end
+                        """)
+                    end
+                    open(joinpath(example2_root, "Project.toml"); write=true) do io
+                        println(io, """
+                        name = "Example2"
+                        uuid = "00000000-0000-0000-0000-000000000002"
+                        version = "1.0.0"
+                        """)
+                    end
+                    pushfirst!(LOAD_PATH, depot2); pushfirst!(DEPOT_PATH, depot2)
+                    pkg = Base.identify_package("Example2"); Base.require(pkg)
+                    mkdepottempdir() do depot3
+                        # precompile Foo in depot3
+                        open(joinpath(depot3, "Module52161.jl"), write=true) do io
+                            println(io, """
+                            module Module52161
+                            using Example1
+                            using Example2
+                            srcfile1 = joinpath(pkgdir(Example1), "src", "Example1.jl")
+                            srcfile2 = joinpath(pkgdir(Example2), "src", "Example2.jl")
+                            include_dependency(srcfile1)
+                            include_dependency(srcfile2)
+                            end
+                            """)
+                        end
+                        pushfirst!(LOAD_PATH, depot3); pushfirst!(DEPOT_PATH, depot3)
+                        pkg = Base.identify_package("Module52161"); Base.compilecache(pkg)
+                        cachefile = joinpath(depot3, "compiled",
+                                             "v$(VERSION.major).$(VERSION.minor)", "Module52161.ji")
+                        _, (deps, _, _), _... = Base.parse_cache_header(cachefile)
+                        @test map(x -> x.filename, deps) ==
+                            [ joinpath(depot3, "Module52161.jl"),
+                              joinpath(depot1, "Example1", "src", "Example1.jl"),
+                              joinpath(depot2, "Example2", "src", "Example2.jl") ]
+                    end
+                end
+            end
+        end
+    end
+
+
+else
+
+    @testset "load stdlib from test/relocatedepot" begin
+        test_harness() do
+            push!(LOAD_PATH, "@stdlib")
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia"))
+            # stdlib should be already precompiled
+            pkg = Base.identify_package("DelimitedFiles")
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == true
+        end
+    end
+
+    @testset "load RelocationTestPkg1 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg1"
+        test_harness() do
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
+            pkg = Base.identify_package(pkgname)
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == true
+        end
+    end
+
+    @testset "load RelocationTestPkg2 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg2"
+        test_harness() do
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
+            pkg = Base.identify_package(pkgname)
+            @test Base.isprecompiled(pkg) == false # moving depot changes mtime of include_dependency
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == false # because tracked by mtime
+            touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg2", "src", "foodir", "foofoo"))
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because tracked by mtime
+        end
+    end
+
+    @testset "load RelocationTestPkg3 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg3"
+        test_harness() do
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
+            pkg = Base.identify_package(pkgname)
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == true
+            touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg3", "src", "bardir", "barbar"))
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+        end
+    end
+
+    @testset "load RelocationTestPkg4 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg4"
+        test_harness() do
+            push!(LOAD_PATH, @__DIR__, "relocatedepot")
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
+            pkg = Base.identify_package(pkgname)
+            # precompiled but not relocatable
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == false
+        end
+    end
+
+end
diff --git a/test/rounding.jl b/test/rounding.jl
index 508a68032e083..0de57af5b41f8 100644
--- a/test/rounding.jl
+++ b/test/rounding.jl
@@ -57,7 +57,7 @@ end
         @test pu - pd == eps(pz)
     end
 
-    for T in [Float32,Float64]
+    for T in [Float16,Float32,Float64]
         for v in [sqrt(big(2.0)),-big(1.0)/big(3.0),nextfloat(big(1.0)),
                   prevfloat(big(1.0)),nextfloat(big(0.0)),prevfloat(big(0.0)),
                   pi,ℯ,eulergamma,catalan,golden,
@@ -351,3 +351,160 @@ end
         Base.Rounding.setrounding_raw(T, Base.Rounding.to_fenv(old))
     end
 end
+
+@testset "rounding floats with specified return type #50778" begin
+    @test round(Float64, 1.2) === 1.0
+    @test round(Float32, 1e60) === Inf32
+    x = floatmax(Float32)-1.0
+    @test round(Float32, x) == x
+end
+
+@testset "rounding complex numbers (#42060, #47128)" begin
+    # 42060
+    @test ceil(Complex(4.6, 2.2)) === Complex(5.0, 3.0)
+    @test floor(Complex(4.6, 2.2)) === Complex(4.0, 2.0)
+    @test trunc(Complex(4.6, 2.2)) === Complex(4.0, 2.0)
+    @test round(Complex(4.6, 2.2)) === Complex(5.0, 2.0)
+    @test ceil(Complex(-4.6, -2.2)) === Complex(-4.0, -2.0)
+    @test floor(Complex(-4.6, -2.2)) === Complex(-5.0, -3.0)
+    @test trunc(Complex(-4.6, -2.2)) === Complex(-4.0, -2.0)
+    @test round(Complex(-4.6, -2.2)) === Complex(-5.0, -2.0)
+
+    # 47128
+    @test round(Complex{Int}, Complex(4.6, 2.2)) === Complex(5, 2)
+    @test ceil(Complex{Int}, Complex(4.6, 2.2)) === Complex(5, 3)
+end
+
+@testset "rounding to custom integers" begin
+    struct Int50812 <: Integer
+        x::Int
+    end
+    @test round(Int50812, 1.2) === Int50812(1)
+    @test round(Int50812, π) === Int50812(3)
+    @test ceil(Int50812, π) === Int50812(4)
+end
+
+const MPFRRM = Base.MPFR.MPFRRoundingMode
+
+function mpfr_to_ieee(::Type{Float32}, x::BigFloat, r::MPFRRM)
+    ccall((:mpfr_get_flt, Base.MPFR.libmpfr), Float32, (Ref{BigFloat}, MPFRRM), x, r)
+end
+function mpfr_to_ieee(::Type{Float64}, x::BigFloat, r::MPFRRM)
+    ccall((:mpfr_get_d, Base.MPFR.libmpfr), Float64, (Ref{BigFloat}, MPFRRM), x, r)
+end
+
+function mpfr_to_ieee(::Type{G}, x::BigFloat, r::RoundingMode) where {G}
+    mpfr_to_ieee(G, x, convert(MPFRRM, r))
+end
+
+const mpfr_rounding_modes = map(
+    Base.Fix1(convert, MPFRRM),
+    (RoundNearest, RoundToZero, RoundFromZero, RoundDown, RoundUp)
+)
+
+sample_float(::Type{T}, e::Integer) where {T<:AbstractFloat} = ldexp(rand(T) + true, e)::T
+
+function float_samples(::Type{T}, exponents, n::Int) where {T<:AbstractFloat}
+    ret = T[]
+    for e ∈ exponents, i ∈ 1:n
+        push!(ret, sample_float(T, e), -sample_float(T, e))
+    end
+    ret
+end
+
+# a reasonable range of values for testing behavior between 1:200
+const fib200 = [1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 200]
+
+@testset "IEEEFloat(::BigFloat) against MPFR" begin
+    for pr ∈ fib200
+        setprecision(BigFloat, pr) do
+            exp = exponent(floatmax(Float64)) + 10
+            bf_samples = float_samples(BigFloat, (-exp):exp, 20) # about 82680 random values
+            for mpfr_rm ∈ mpfr_rounding_modes, bf ∈ bf_samples, F ∈ (Float32, Float64)
+                @test (
+                    mpfr_to_ieee(F, bf, mpfr_rm) ===
+                    F(bf, mpfr_rm) === F(bf, convert(RoundingMode, mpfr_rm))
+                )
+            end
+        end
+    end
+end
+
+const native_rounding_modes = (
+    RoundNearest, RoundNearestTiesAway, RoundNearestTiesUp,
+    RoundToZero, RoundFromZero, RoundUp, RoundDown
+)
+
+# Checks that each rounding mode is faithful.
+@testset "IEEEFloat(::BigFloat) faithful rounding" begin
+    for pr ∈ fib200
+        setprecision(BigFloat, pr) do
+            exp = 500
+            bf_samples = float_samples(BigFloat, (-exp):exp, 20) # about 40040 random values
+            for rm ∈ (mpfr_rounding_modes..., Base.MPFR.MPFRRoundFaithful,
+                      native_rounding_modes...),
+                bf ∈ bf_samples,
+                F ∈ (Float16, Float32, Float64)
+                f = F(bf, rm)
+                @test (f === F(bf, RoundDown)) | (f === F(bf, RoundUp))
+            end
+        end
+    end
+end
+
+@testset "round(Int, -Inf16) should throw (#51113)" begin
+    @test_throws InexactError round(Int32, -Inf16)
+    @test_throws InexactError round(Int64, -Inf16)
+    @test_throws InexactError round(Int128, -Inf16)
+    # More comprehensive testing is present in test/floatfuncs.jl
+end
+
+@testset "floor(<:AbstractFloat, large_number) (#52355)" begin
+    @test floor(Float32, 0xffff_ffff) == prevfloat(2f0^32) <= 0xffff_ffff
+    @test trunc(Float16, typemax(UInt128)) == floatmax(Float16)
+    @test round(Float16, typemax(UInt128)) == Inf16
+    for i in [-BigInt(floatmax(Float64)), -BigInt(floatmax(Float64))*100, BigInt(floatmax(Float64)), BigInt(floatmax(Float64))*100]
+        f = ceil(Float64, i)
+        @test f >= i
+        @test isinteger(f) || isinf(f)
+        @test prevfloat(f) < i
+    end
+end
+
+@testset "π to `BigFloat` with `setrounding`" begin
+    function irrational_to_big_float(c::AbstractIrrational)
+        BigFloat(c)
+    end
+
+    function irrational_to_big_float_with_rounding_mode(c::AbstractIrrational, rm::RoundingMode)
+        f = () -> irrational_to_big_float(c)
+        setrounding(f, BigFloat, rm)
+    end
+
+    function irrational_to_big_float_with_rounding_mode_and_precision(c::AbstractIrrational, rm::RoundingMode, prec::Int)
+        f = () -> irrational_to_big_float_with_rounding_mode(c, rm)
+        setprecision(f, BigFloat, prec)
+    end
+
+    for c ∈ (π, MathConstants.γ, MathConstants.catalan)
+        for p ∈ 1:40
+            @test (
+                irrational_to_big_float_with_rounding_mode_and_precision(c, RoundDown, p) < c <
+                irrational_to_big_float_with_rounding_mode_and_precision(c, RoundUp, p)
+            )
+        end
+    end
+end
+
+@testset "Rounding to floating point types with RoundFromZero #55820" begin
+    @testset "Testing float types: $f" for f ∈ (Float16, Float32, Float64, BigFloat)
+        @testset "Testing value types: $t" for t ∈ (Bool, Rational{Int8})
+            @test iszero(f(zero(t), RoundFromZero))
+        end
+    end
+    @test Float16(100000, RoundToZero) === floatmax(Float16)
+    @test Float16(100000, RoundFromZero) === Inf16
+    @test Float16(-100000, RoundToZero) === -floatmax(Float16)
+    @test Float16(-100000, RoundFromZero) === -Inf16
+    @test Float32(nextfloat(0.0), RoundFromZero) === nextfloat(0.0f0)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 1264acae985b0..4c847884939ac 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,14 +3,18 @@
 using Test
 using Distributed
 using Dates
-import REPL
 using Printf: @sprintf
 using Base: Experimental
+using Base.ScopedValues
 
 include("choosetests.jl")
 include("testenv.jl")
+include("buildkitetestjson.jl")
 
-(; tests, net_on, exit_on_error, use_revise, seed) = choosetests(ARGS)
+const longrunning_delay = parse(Int, get(ENV, "JULIA_TEST_LONGRUNNING_DELAY", "45")) * 60 # minutes
+const longrunning_interval = parse(Int, get(ENV, "JULIA_TEST_LONGRUNNING_INTERVAL", "15")) * 60 # minutes
+
+(; tests, net_on, exit_on_error, use_revise, buildroot, seed) = choosetests(ARGS)
 tests = unique(tests)
 
 if Sys.islinux()
@@ -22,9 +26,19 @@ else
     global running_under_rr() = false
 end
 
+const rmwait_timeout = running_under_rr() ? 300 : 30
+
+ENV["JULIA_TEST_BUILDROOT"] = buildroot
 if use_revise
+    # First put this at the top of the DEPOT PATH to install revise if necessary.
+    # Once it's loaded, we swizzle it to the end, to avoid confusing any tests.
+    pushfirst!(DEPOT_PATH, joinpath(buildroot, "deps", "jlutilities", "depot"))
+    using Pkg
+    Pkg.activate(joinpath(@__DIR__, "..", "deps", "jlutilities", "revise"))
+    Pkg.instantiate()
     using Revise
     union!(Revise.stdlib_names, Symbol.(STDLIBS))
+    push!(DEPOT_PATH, popfirst!(DEPOT_PATH))
     # Remote-eval the following to initialize Revise in workers
     const revise_init_expr = quote
         using Revise
@@ -34,6 +48,11 @@ if use_revise
     end
 end
 
+if isempty(tests)
+    println("No tests selected. Exiting.")
+    exit()
+end
+
 const max_worker_rss = if haskey(ENV, "JULIA_TEST_MAXRSS_MB")
     parse(Int, ENV["JULIA_TEST_MAXRSS_MB"]) * 2^20
 else
@@ -41,19 +60,6 @@ else
 end
 limited_worker_rss = max_worker_rss != typemax(Csize_t)
 
-function test_path(test)
-    t = split(test, '/')
-    if t[1] in STDLIBS
-        if length(t) == 2
-            return joinpath(STDLIB_DIR, t[1], "test", t[2])
-        else
-            return joinpath(STDLIB_DIR, t[1], "test", "runtests")
-        end
-    else
-        return joinpath(@__DIR__, test)
-    end
-end
-
 # Check all test files exist
 isfiles = isfile.(test_path.(tests) .* ".jl")
 if !all(isfiles)
@@ -84,12 +90,14 @@ move_to_node1("stress")
 # since it starts a lot of workers and can easily exceed the maximum memory
 limited_worker_rss && move_to_node1("Distributed")
 
-# Shuffle LinearAlgebra tests to the front, because they take a while, so we might
+# Move LinearAlgebra and Pkg tests to the front, because they take a while, so we might
 # as well get them all started early.
-linalg_test_ids = findall(x->occursin("LinearAlgebra", x), tests)
-linalg_tests = tests[linalg_test_ids]
-deleteat!(tests, linalg_test_ids)
-prepend!(tests, linalg_tests)
+for prependme in ["LinearAlgebra", "Pkg"]
+    prependme_test_ids = findall(x->occursin(prependme, x), tests)
+    prependme_tests = tests[prependme_test_ids]
+    deleteat!(tests, prependme_test_ids)
+    prepend!(tests, prependme_tests)
+end
 
 import LinearAlgebra
 cd(@__DIR__) do
@@ -109,7 +117,7 @@ cd(@__DIR__) do
     # multiple worker processes regardless of the value of `net_on`.
     # Otherwise, we use multiple worker processes if and only if `net_on` is true.
     if net_on || JULIA_TEST_USE_MULTIPLE_WORKERS
-        n = min(Sys.CPU_THREADS, length(tests))
+        n = min(Sys.EFFECTIVE_CPU_THREADS, length(tests))
         n > 1 && addprocs_with_testenv(n)
         LinearAlgebra.BLAS.set_num_threads(1)
     end
@@ -118,17 +126,20 @@ cd(@__DIR__) do
     @everywhere include("testdefs.jl")
 
     if use_revise
-        Base.invokelatest(revise_trackall)
+        @invokelatest revise_trackall()
         Distributed.remotecall_eval(Main, workers(), revise_init_expr)
     end
 
     println("""
         Running parallel tests with:
+          getpid() = $(getpid())
           nworkers() = $(nworkers())
-          nthreads() = $(Threads.threadpoolsize())
+          nthreads(:interactive) = $(Threads.threadpoolsize(:interactive))
+          nthreads(:default) = $(Threads.threadpoolsize(:default))
           Sys.CPU_THREADS = $(Sys.CPU_THREADS)
           Sys.total_memory() = $(Base.format_bytes(Sys.total_memory()))
           Sys.free_memory() = $(Base.format_bytes(Sys.free_memory()))
+          Sys.uptime() = $(Sys.uptime()) ($(round(Sys.uptime() / (60 * 60), digits=1)) hours)
         """)
 
     #pretty print the information about gc and mem usage
@@ -179,10 +190,10 @@ cd(@__DIR__) do
         at = lpad("($wrkr)", name_align - textwidth(name) + 1, " ")
         lock(print_lock)
         try
-            printstyled(name, at, " |", " "^elapsed_align,
-                    "started at $(now())",
+            printstyled(name, at, " |", " "^elapsed_align, color=:white)
+            printstyled("started at $(now())",
                     (pid > 0 ? " on pid $pid" : ""),
-                    "\n", color=:white)
+                    "\n", color=:light_black)
         finally
             unlock(print_lock)
         end
@@ -219,12 +230,16 @@ cd(@__DIR__) do
         # Monitor stdin and kill this task on ^C
         # but don't do this on Windows, because it may deadlock in the kernel
         running_tests = Dict{String, DateTime}()
+
+        # Track timeout timers for each test
+        test_timers = Dict{String, Timer}()
+
         if !Sys.iswindows() && isa(stdin, Base.TTY)
             t = current_task()
             stdin_monitor = @async begin
-                term = REPL.Terminals.TTYTerminal("xterm", stdin, stdout, stderr)
+                term = Base.Terminals.TTYTerminal("xterm", stdin, stdout, stderr)
                 try
-                    REPL.Terminals.raw!(term, true)
+                    Base.Terminals.raw!(term, true)
                     while true
                         c = read(term, Char)
                         if c == '\x3'
@@ -241,7 +256,7 @@ cd(@__DIR__) do
                 catch e
                     isa(e, InterruptException) || rethrow()
                 finally
-                    REPL.Terminals.raw!(term, false)
+                    Base.Terminals.raw!(term, false)
                 end
             end
         end
@@ -253,15 +268,46 @@ cd(@__DIR__) do
                         test = popfirst!(tests)
                         running_tests[test] = now()
                         wrkr = p
+
+                        # Create a timer for this test to report long-running status
+                        test_timers[test] = Timer(longrunning_delay, interval=longrunning_interval) do timer
+                            if haskey(running_tests, test)  # Check test is still running
+                                start_time = running_tests[test]
+                                elapsed = now() - start_time
+                                elapsed_minutes = elapsed.value ÷ (1000 * 60)
+
+                                elapsed_str = if elapsed_minutes >= 60
+                                    hours, mins = divrem(elapsed_minutes, 60)
+                                    "$(hours)h $(mins)m"
+                                else
+                                    "$(elapsed_minutes)m"
+                                end
+
+                                @lock print_lock begin
+                                    print(test)
+                                    print(lpad("($(wrkr))", name_align - textwidth(test) + 1, " "), " | ")
+                                    # Calculate total width of data columns: "Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB)"
+                                    # This is: elapsed_align + 3 + gc_align + 3 + percent_align + 3 + alloc_align + 3 + rss_align
+                                    data_width = elapsed_align + gc_align + percent_align + alloc_align + rss_align + 12  # 12 = 4 * " | "
+                                    message = "has been running for $(elapsed_str)"
+                                    centered_message = lpad(rpad(message, (data_width + textwidth(message)) ÷ 2), data_width)
+                                    printstyled(centered_message, "\n", color=:light_black)
+                                end
+                            end
+                        end
                         before = time()
                         resp, duration = try
-                                r = remotecall_fetch(runtests, wrkr, test, test_path(test); seed=seed)
+                                r = remotecall_fetch(@Base.world(runtests, ∞), wrkr, test, test_path(test); seed=seed)
                                 r, time() - before
                             catch e
                                 isa(e, InterruptException) && return
                                 Any[CapturedException(e, catch_backtrace())], time() - before
                             end
                         delete!(running_tests, test)
+                        if haskey(test_timers, test)
+                            close(test_timers[test])
+                            delete!(test_timers, test)
+                        end
                         push!(results, (test, resp, duration))
                         if length(resp) == 1
                             print_testworker_errored(test, wrkr, exit_on_error ? nothing : resp[1])
@@ -271,7 +317,7 @@ cd(@__DIR__) do
                             elseif n > 1
                                 # the worker encountered some failure, recycle it
                                 # so future tests get a fresh environment
-                                rmprocs(wrkr, waitfor=30)
+                                rmprocs(wrkr, waitfor=rmwait_timeout)
                                 p = addprocs_with_testenv(1)[1]
                                 remotecall_fetch(include, p, "testdefs.jl")
                                 if use_revise
@@ -284,7 +330,7 @@ cd(@__DIR__) do
                                 # the worker has reached the max-rss limit, recycle it
                                 # so future tests start with a smaller working set
                                 if n > 1
-                                    rmprocs(wrkr, waitfor=30)
+                                    rmprocs(wrkr, waitfor=rmwait_timeout)
                                     p = addprocs_with_testenv(1)[1]
                                     remotecall_fetch(include, p, "testdefs.jl")
                                     if use_revise
@@ -298,7 +344,7 @@ cd(@__DIR__) do
                     end
                     if p != 1
                         # Free up memory =)
-                        rmprocs(p, waitfor=30)
+                        rmprocs(p, waitfor=rmwait_timeout)
                     end
                 end
             end
@@ -315,7 +361,7 @@ cd(@__DIR__) do
             t == "SharedArrays" && (isolate = false)
             before = time()
             resp, duration = try
-                    r = Base.invokelatest(runtests, t, test_path(t), isolate, seed=seed) # runtests is defined by the include above
+                    r = @invokelatest runtests(t, test_path(t), isolate, seed=seed) # runtests is defined by the include above
                     r, time() - before
                 catch e
                     isa(e, InterruptException) && rethrow()
@@ -346,6 +392,9 @@ cd(@__DIR__) do
         if @isdefined stdin_monitor
             schedule(stdin_monitor, InterruptException(); error=true)
         end
+        if @isdefined test_timers
+            foreach(close, values(test_timers))
+        end
     end
 
     #=
@@ -370,65 +419,68 @@ cd(@__DIR__) do
     Errored, and execution continues until the summary at the end of the test
     run, where the test file is printed out as the "failed expression".
     =#
-    Test.TESTSET_PRINT_ENABLE[] = false
-    o_ts = Test.DefaultTestSet("Overall")
-    o_ts.time_end = o_ts.time_start + o_ts_duration # manually populate the timing
-    Test.push_testset(o_ts)
-    completed_tests = Set{String}()
-    for (testname, (resp,), duration) in results
-        push!(completed_tests, testname)
-        if isa(resp, Test.DefaultTestSet)
-            resp.time_end = resp.time_start + duration
-            Test.push_testset(resp)
-            Test.record(o_ts, resp)
-            Test.pop_testset()
-        elseif isa(resp, Test.TestSetException)
-            fake = Test.DefaultTestSet(testname)
-            fake.time_end = fake.time_start + duration
-            for i in 1:resp.pass
-                Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, LineNumberNode(@__LINE__, @__FILE__)))
-            end
-            for i in 1:resp.broken
-                Test.record(fake, Test.Broken(:test, nothing))
-            end
-            for t in resp.errors_and_fails
-                Test.record(fake, t)
+    @with Test.TESTSET_PRINT_ENABLE=>false begin
+        o_ts = Test.DefaultTestSet("Overall")
+        @atomic o_ts.time_end = o_ts.time_start + o_ts_duration # manually populate the timing
+        BuildkiteTestJSON.write_testset_json_files(@__DIR__, o_ts)
+        Test.@with_testset o_ts begin
+            completed_tests = Set{String}()
+            for (testname, (resp,), duration) in results
+                push!(completed_tests, testname)
+                if isa(resp, Test.DefaultTestSet)
+                    @atomic resp.time_end = resp.time_start + duration
+                    Test.@with_testset resp begin
+                        Test.record(o_ts, resp)
+                    end
+                elseif isa(resp, Test.TestSetException)
+                    fake = Test.DefaultTestSet(testname)
+                    @atomic fake.time_end = fake.time_start + duration
+                    for i in 1:resp.pass
+                        Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, LineNumberNode(@__LINE__, @__FILE__)))
+                    end
+                    for i in 1:resp.broken
+                        Test.record(fake, Test.Broken(:test, nothing))
+                    end
+                    for t in resp.errors_and_fails
+                        Test.record(fake, t)
+                    end
+                    Test.@with_testset fake begin
+                        Test.record(o_ts, fake)
+                    end
+                else
+                    if !isa(resp, Exception)
+                        resp = ErrorException(string("Unknown result type : ", typeof(resp)))
+                    end
+                    # If this test raised an exception that is not a remote testset exception,
+                    # i.e. not a RemoteException capturing a TestSetException that means
+                    # the test runner itself had some problem, so we may have hit a segfault,
+                    # deserialization errors or something similar.  Record this testset as Errored.
+                    fake = Test.DefaultTestSet(testname)
+                    @atomic fake.time_end = fake.time_start + duration
+                    Test.record(fake, Test.Error(:nontest_error, testname, nothing, Base.ExceptionStack(NamedTuple[(;exception = resp, backtrace = [])]), LineNumberNode(1), nothing))
+                    Test.@with_testset fake begin
+                        Test.record(o_ts, fake)
+                    end
+                end
             end
-            Test.push_testset(fake)
-            Test.record(o_ts, fake)
-            Test.pop_testset()
-        else
-            if !isa(resp, Exception)
-                resp = ErrorException(string("Unknown result type : ", typeof(resp)))
+            for test in all_tests
+                (test in completed_tests) && continue
+                fake = Test.DefaultTestSet(test)
+                Test.record(fake, Test.Error(:test_interrupted, test, nothing, Base.ExceptionStack(NamedTuple[(;exception = "skipped", backtrace = [])]), LineNumberNode(1), nothing))
+                Test.@with_testset fake begin
+                    Test.record(o_ts, fake)
+                end
             end
-            # If this test raised an exception that is not a remote testset exception,
-            # i.e. not a RemoteException capturing a TestSetException that means
-            # the test runner itself had some problem, so we may have hit a segfault,
-            # deserialization errors or something similar.  Record this testset as Errored.
-            fake = Test.DefaultTestSet(testname)
-            fake.time_end = fake.time_start + duration
-            Test.record(fake, Test.Error(:nontest_error, testname, nothing, Any[(resp, [])], LineNumberNode(1)))
-            Test.push_testset(fake)
-            Test.record(o_ts, fake)
-            Test.pop_testset()
         end
     end
-    for test in all_tests
-        (test in completed_tests) && continue
-        fake = Test.DefaultTestSet(test)
-        Test.record(fake, Test.Error(:test_interrupted, test, nothing, [("skipped", [])], LineNumberNode(1)))
-        Test.push_testset(fake)
-        Test.record(o_ts, fake)
-        Test.pop_testset()
-    end
-    Test.TESTSET_PRINT_ENABLE[] = true
+
     println()
     # o_ts.verbose = true # set to true to show all timings when successful
     Test.print_test_results(o_ts, 1)
-    if !o_ts.anynonpass
-        println("    \033[32;1mSUCCESS\033[0m")
+    if !Test.anynonpass(o_ts)
+        printstyled("    SUCCESS\n"; bold=true, color=:green)
     else
-        println("    \033[31;1mFAILURE\033[0m\n")
+        printstyled("    FAILURE\n\n"; bold=true, color=:red)
         skipped > 0 &&
             println("$skipped test", skipped > 1 ? "s were" : " was", " skipped due to failure.")
         println("The global RNG seed was 0x$(string(seed, base = 16)).\n")
diff --git a/test/ryu.jl b/test/ryu.jl
index 0b10bd7e49ba5..e885d6c10838f 100644
--- a/test/ryu.jl
+++ b/test/ryu.jl
@@ -19,6 +19,34 @@ todouble(sign, exp, mant) = Core.bitcast(Float64, (UInt64(sign) << 63) | (UInt64
     @test Ryu.writeshortest(-Inf) == "-Inf"
 end
 
+@testset "OutputOptions" begin
+    # plus
+    @test "+1" == Base.Ryu.writeshortest(1.0, true, false, false)
+    @test "-1" == Base.Ryu.writeshortest(-1.0, true, false, false)
+
+    # space
+    @test " 1" == Ryu.writeshortest(1.0, false,  true,  false)
+
+    # hash
+    @test "0" == Ryu.writeshortest(0.0, false, false, false)
+
+    # precision
+    @test "9.9900" == Ryu.writeshortest(9.99, false, false, true, 5)
+    @test "1." == Ryu.writeshortest(1.0, false, false, true, 1)
+
+    # expchar
+    @test "1.0d6" == Ryu.writeshortest(1e6, false, false, true, -1, UInt8('d'))
+
+    # padexp
+    @test "3.0e+08" == Ryu.writeshortest(3e8, false, false, true, -1, UInt8('e'), true)
+
+    # decchar
+    @test "3,14" == Ryu.writeshortest(3.14, false, false, true, -1, UInt8('e'), false, UInt8(','))
+
+    # compact
+    @test "0.333333" == Ryu.writeshortest(1/3, false, false, true, -1, UInt8('e'), false, UInt8('.'), false, true)
+end
+
 @testset "SwitchToSubnormal" begin
     @test "2.2250738585072014e-308" == Ryu.writeshortest(2.2250738585072014e-308)
 end
@@ -241,6 +269,17 @@ end # Float64
     @test "-Inf" == Ryu.writeshortest(Float32(-Inf))
 end
 
+@testset "OutputOptions" begin
+    # typed
+    @test "1.0f0" == Ryu.writeshortest(Float32(1.0), false, false, true, -1, UInt8('e'), false, UInt8('.'), true)
+    @test "Inf32" == Ryu.writeshortest(Float32(Inf), false, false, true, -1, UInt8('e'), false, UInt8('.'), true)
+    @test "NaN32" == Ryu.writeshortest(Float32(NaN), false, false, true, -1, UInt8('e'), false, UInt8('.'), true)
+    @test "3.14f0" == Ryu.writeshortest(Float32(3.14), false, false, true, -1, UInt8('e'), false, UInt8('.'), true)
+
+    # typed and no-hash
+    @test "1f0" == Ryu.writeshortest(1.0f0, false, false, false, -1, UInt8('e'), false, UInt8('.'), true)
+end
+
 @testset "SwitchToSubnormal" begin
     @test "1.1754944e-38" == Ryu.writeshortest(1.1754944f-38)
 end
@@ -341,6 +380,17 @@ end # Float32
     @test "-Inf" == Ryu.writeshortest(Float16(-Inf))
 end
 
+@testset "OutputOptions" begin
+    # typed
+    @test "Float16(1.0)" == Ryu.writeshortest(Float16(1.0), false, false, true, -1, UInt8('e'), false, UInt8('.'), true)
+    @test "Inf16" == Ryu.writeshortest(Float16(Inf), false, false, true, -1, UInt8('e'), false, UInt8('.'), true)
+    @test "NaN16" == Ryu.writeshortest(Float16(NaN), false, false, true, -1, UInt8('e'), false, UInt8('.'), true)
+    @test "Float16(3.14)" == Ryu.writeshortest(Float16(3.14), false, false, true, -1, UInt8('e'), false, UInt8('.'), true)
+
+    # typed and no-hash
+    @test "Float16(1)" == Ryu.writeshortest(Float16(1.0), false, false, false, -1, UInt8('e'), false, UInt8('.'), true)
+end
+
 let x=floatmin(Float16)
     while x <= floatmax(Float16)
         @test parse(Float16, Ryu.writeshortest(x)) == x
@@ -370,6 +420,23 @@ end
 
 end # Float16
 
+@testset "writeshortest(::AbstractVector, pos, ...)" begin
+    @testset for Vec in (Vector{UInt8}, Memory{UInt8})
+        buf = Vec(undef, 4)
+        @test Ryu.writeshortest(buf, 1, -0.0) == 5
+        @test String(buf) == "-0.0"
+
+        buf = Vec(undef, 100)
+        xx = 4.7223665f21
+        expected = "4.7223665e21"
+        start_pos = 42
+        nwritten = length(expected)
+        end_pos = start_pos + nwritten
+        @test Ryu.writeshortest(buf, start_pos, xx) == end_pos
+        @test String(buf[start_pos:end_pos-1]) == expected
+    end
+end
+
 @testset "Ryu.writefixed" begin
     @testset "Basic" begin
         @test Ryu.writefixed(todouble(false, 1234, 99999), 0) ==
@@ -558,6 +625,28 @@ end # Float16
         @test Ryu.writefixed(1.25e+5, 1, false, false, false, UInt8('.'), true) == "125000"
         @test Ryu.writefixed(1.25e+5, 2, false, false, false, UInt8('.'), true) == "125000"
     end
+
+    @test Ryu.writefixed(100.0-eps(100.0), 0, false, false, true, UInt8('.'), false) == "100."
+    @test Ryu.writefixed(-100.0+eps(-100.0), 0, false, false, true, UInt8('.'), false) == "-100."
+    @test Ryu.writefixed(100.0-eps(100.0), 1, false, false, true, UInt8('.'), false) == "100.0"
+    @test Ryu.writefixed(-100.0+eps(-100.0), 1, false, false, true, UInt8('.'), false) == "-100.0"
+
+    @testset "writefixed(::AbstractVector, pos, ...)" begin
+        @testset for Vec in (Vector{UInt8}, Memory{UInt8})
+            buf = Vec(undef, 6)
+            @test Ryu.writefixed(buf, 1, 0.0, 4) == 7
+            @test String(buf) == "0.0000"
+
+            buf = Vec(undef, 100)
+            xx = 1729.142857142857
+            prec = 8
+            start_pos = 42
+            nwritten = 4 + 1 + prec
+            end_pos = start_pos + nwritten
+            @test Ryu.writefixed(buf, start_pos, xx, prec) == end_pos
+            @test String(buf[start_pos:end_pos-1]) == "1729.14285714"
+        end
+    end
 end # fixed
 
 @testset "Ryu.writeexp" begin
@@ -756,6 +845,23 @@ end
     @test Ryu.writeexp(2.0, 1, false, false, false, UInt8('e'), UInt8('.'), true) == "2e+00"
 end
 
+@testset "writeexp(::AbstractVector, pos, ...)" begin
+    @testset for Vec in (Vector{UInt8}, Memory{UInt8})
+        buf = Vec(undef, 10)
+        @test Ryu.writeexp(buf, 1, 0.0, 4) == 11
+        @test String(buf) == "0.0000e+00"
+
+        buf = Vec(undef, 100)
+        xx = 1729.142857142857
+        prec = 8
+        start_pos = 42
+        nwritten = 1 + 1 + prec + 4
+        end_pos = start_pos + nwritten
+        @test Ryu.writeexp(buf, start_pos, xx, prec) == end_pos
+        @test String(buf[start_pos:end_pos-1]) == "1.72914286e+03"
+    end
+end
+
 end # exp
 
 @testset "compact" begin
diff --git a/test/scopedvalues.jl b/test/scopedvalues.jl
new file mode 100644
index 0000000000000..69d83e0a091c4
--- /dev/null
+++ b/test/scopedvalues.jl
@@ -0,0 +1,227 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Base.ScopedValues
+
+include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
+
+@testset "errors" begin
+    @test ScopedValue{Float64}(1)[] == 1.0
+    @test_throws InexactError ScopedValue{Int}(1.5)
+    let val = ScopedValue(1)
+        @test_throws MethodError val[] = 2
+        with() do
+            @test_throws MethodError val[] = 2
+        end
+    end
+    let val = ScopedValue{String}()
+        @test_throws KeyError val[]
+    end
+    let val = ScopedValue{Int}()
+        @test_throws KeyError val[]
+    end
+    @test_throws MethodError ScopedValue()
+end
+
+const sval = ScopedValue(1)
+@testset "inheritance" begin
+    @test sval[] == 1
+    with() do
+        @test sval[] == 1
+        with() do
+            @test sval[] == 1
+        end
+        with(sval => 2) do
+            @test sval[] == 2
+        end
+        @test sval[] == 1
+    end
+    @test sval[] == 1
+end
+
+const sval_float = ScopedValue(1.0)
+
+@testset "multiple scoped values" begin
+    with(sval => 2, sval_float => 2.0) do
+        @test sval[] == 2
+        @test sval_float[] == 2.0
+    end
+    with(sval => 2, sval => 3) do
+        @test sval[] == 3
+    end
+end
+
+emptyf() = nothing
+
+@testset "conversion" begin
+    with(emptyf, sval_float=>2)
+    @test_throws MethodError with(emptyf, sval_float=>"hello")
+    a = ScopedValue(1)
+    with(a => 2.0) do
+        @test a[] == 2
+        @test a[] isa Int
+    end
+    a = ScopedValue(1.0)
+    with(a => 2) do
+        @test a[] == 2.0
+        @test a[] isa Float64
+    end
+end
+
+import Base.Threads: @spawn
+@testset "tasks" begin
+    @test fetch(@spawn begin
+        sval[]
+    end) == 1
+    with(sval => 2) do
+        @test fetch(@spawn begin
+            sval[]
+        end) == 2
+    end
+end
+
+@testset "show" begin
+    @test sprint(show, ScopedValue{Int}(), context=(:module=>Core,)) == "Base.ScopedValues.ScopedValue{$Int}()"
+    @test sprint(show, sval, context=(:module=>Core,)) == "Base.ScopedValues.ScopedValue{$Int}(1)"
+    with(sval => 2.0) do
+        @test sprint(show, sval, context=(:module=>Core,)) == "Base.ScopedValues.ScopedValue{$Int}(2)"
+        objid = sprint(show, Base.objectid(sval))
+        let str = sprint(show, Core.current_scope(), context=(:module=>Core,))
+            @test startswith(str, "Base.ScopedValues.Scope")
+            @test contains(str, "Base.ScopedValues.ScopedValue{$Int}@$objid => 2")
+        end
+    end
+end
+
+const depth = ScopedValue(0)
+function nth_with(f, n)
+    if n <= 0
+        f()
+    else
+        with(depth => n) do
+            nth_with(f, n-1)
+        end
+    end
+end
+
+
+@testset "nested with" begin
+    @testset for depth in 1:16
+        nth_with(depth) do
+            @test sval_float[] == 1.0
+        end
+        with(sval_float=>2.0) do
+            nth_with(depth) do
+                @test sval_float[] == 2.0
+            end
+        end
+        nth_with(depth) do
+            with(sval_float=>2.0) do
+                @test sval_float[] == 2.0
+            end
+        end
+    end
+    with(sval_float=>2.0) do
+        nth_with(15) do
+            @test sval_float[] == 2.0
+            with(sval_float => 3.0) do
+                @test sval_float[] == 3.0
+            end
+        end
+    end
+end
+
+@testset "macro" begin
+    @with sval=>2 sval_float=>2.0 begin
+        @test sval[] == 2
+        @test sval_float[] == 2.0
+    end
+    # Doesn't do much...
+    @with begin
+        @test sval[] == 1
+        @test sval_float[] == 1.0
+    end
+    @with sval=>2 sval_float=>2.0 begin
+        @with begin
+            @test sval[] == 2
+            @test sval_float[] == 2.0
+        end
+    end
+end
+
+@testset "isassigned" begin
+    sv = ScopedValue(1)
+    @test isassigned(sv)
+    sv = ScopedValue{Int}()
+    @test !isassigned(sv)
+    with(sv => 2) do
+        @test isassigned(sv)
+    end
+end
+
+# Test that the `@with` macro doesn't introduce unnecessary PhiC nodes
+# (which can be hard for the optimizer to remove).
+function with_macro_slot_cross()
+    a = 1
+    @with sval=>1 begin
+        a = sval_float[]
+    end
+    return a
+end
+
+let code = code_typed(with_macro_slot_cross)[1][1].code
+    @test !any(x->isa(x, Core.PhiCNode), code)
+end
+
+# inline constant scoped values
+const inlineable_const_sv = ScopedValue(1)
+@test fully_eliminated(; retval=(inlineable_const_sv => 1)) do
+    inlineable_const_sv => 1
+end
+
+# Handle nothrow scope bodies correctly (#56609)
+@eval @noinline function nothrow_scope(@nospecialize(scope_at_entry))
+    $(Expr(:tryfinally, :(), nothing, 1))
+    @test Core.current_scope() === scope_at_entry
+end
+nothrow_scope(Core.current_scope())
+
+# https://github.com/JuliaLang/julia/issues/56062
+@testset "issue #56062" begin
+    ts = Int[]
+    try
+        @with begin
+            return
+        end
+    catch err
+    finally
+        push!(ts, 2)
+    end
+end
+
+# LazyScopedValue
+global lsv_ncalled = 0
+const lsv = LazyScopedValue{Int}(OncePerProcess(() -> (global lsv_ncalled; lsv_ncalled += 1; 1)))
+@testset "LazyScopedValue" begin
+    @test (@with lsv=>2 lsv[]) == 2
+    @test lsv_ncalled == 0
+    @test lsv[] == 1
+    @test lsv_ncalled == 1
+    @test lsv[] == 1
+    @test lsv_ncalled == 1
+end
+
+@testset "ScopedThunk" begin
+    function check_svals()
+        @test sval[] == 8
+        @test sval_float[] == 8.0
+    end
+    sf = nothing
+    @with sval=>8 sval_float=>8.0 begin
+        sf = ScopedThunk(check_svals)
+    end
+    sf()
+    @with sval=>8 sval_float=>8.0 begin
+        sf2 = ScopedThunk{Function}(check_svals)
+    end
+    sf2()
+end
diff --git a/test/secretbuffer.jl b/test/secretbuffer.jl
index 976c757deea57..996471c5b3830 100644
--- a/test/secretbuffer.jl
+++ b/test/secretbuffer.jl
@@ -1,7 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")
+isdefined(Main, :ChallengePrompts) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ChallengePrompts.jl"))
+using .Main.ChallengePrompts: challenge_prompt
+
 using Base: SecretBuffer, SecretBuffer!, shred!, isshredded
-using Test
+using Test, Random
 
 @testset "SecretBuffer" begin
     @testset "original unmodified" begin
@@ -129,4 +133,70 @@ using Test
         @test_throws ArgumentError Base.unsafe_SecretBuffer!(null_ptr)
         @test_throws ArgumentError Base.unsafe_SecretBuffer!(null_ptr, 0)
     end
+
+    @testset "copiers" begin
+        s1 = SecretBuffer()
+        write(s1, "hello world")
+        seekstart(s1)
+
+        s2 = copy(s1)
+        write(s2, 'c')
+        seekstart(s2)
+
+        @test read(s1) == codeunits("hello world")
+        @test read(s2) == codeunits("cello world")
+
+        shred!(s1)
+        @test isshredded(s1)
+        @test !isshredded(s2)
+        shred!(s2)
+
+        # Copying into a bigger destination
+        s3 = SecretBuffer()
+        s4 = SecretBuffer()
+        write(s3, "original")
+        seekstart(s3)
+        write(s4, randstring(1234))
+        s4data = s4.data
+        copy!(s4, s3)
+        @test s3.data == s4.data
+        @test read(s3) == read(s4) == codeunits("original")
+        @test all(iszero, s4data)
+        shred!(s3); shred!(s4)
+
+        # Copying into a smaller destination
+        s5 = SecretBuffer()
+        s6 = SecretBuffer("sekrit")
+        str = randstring(321)
+        write(s5, str)
+        seekstart(s5)
+        copy!(s6, s5)
+        @test read(s5) == read(s6) == codeunits(str)
+        shred!(s5); shred!(s6)
+    end
+
+    if !Sys.iswindows()
+        @testset "getpass" begin
+            v1, s1 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I"); (read(s), Base.shred!(s))), ["LPAwVZM8D4I: " => "too many secrets\n"])
+            s2 = SecretBuffer("too many secrets")
+            @test s1 isa SecretBuffer
+            @test isshredded(s1)
+            @test v1 == read(s2) == codeunits("too many secrets")
+            shred!(s1); shred!(s2)
+
+            v3, s3 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I> ", with_suffix=false); (read(s), Base.shred!(s))), ["LPAwVZM8D4I> " => "frperg\n"])
+            s4 = SecretBuffer("frperg")
+            @test s3 isa SecretBuffer
+            @test isshredded(s3)
+            @test v3 == read(s4) == codeunits("frperg")
+            shred!(s3); shred!(s4)
+
+            v5, s5 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I> ", with_suffix=true); (read(s), Base.shred!(s))), ["LPAwVZM8D4I> : " => "frperg\n"])
+            s6 = SecretBuffer("frperg")
+            @test s5 isa SecretBuffer
+            @test isshredded(s5)
+            @test v5 == read(s6) == codeunits("frperg")
+            shred!(s5); shred!(s6)
+        end
+    end
 end
diff --git a/test/sets.jl b/test/sets.jl
index 65444153c90d9..4d52cb243620c 100644
--- a/test/sets.jl
+++ b/test/sets.jl
@@ -124,7 +124,40 @@ end
     @test isempty(s)
     @test_throws ArgumentError pop!(s)
     @test length(Set(['x',120])) == 2
+
+    # Test that pop! returns the element in the set, not the query
+    s = Set{Any}(Any[0x01, UInt(2), 3, 4.0])
+    @test pop!(s, 1) === 0x01
+    @test pop!(s, 2) === UInt(2)
+    @test pop!(s, 3) === 3
+    @test pop!(s, 4) === 4.0
+    @test_throws KeyError pop!(s, 5)
+end
+
+@testset "in!" begin
+    s = Set()
+    @test !(in!(0x01, s))
+    @test !(in!(Int32(2), s))
+    @test in!(1, s)
+    @test in!(2.0, s)
+    (a, b, c...) = sort!(collect(s))
+    @test a === 0x01
+    @test b === Int32(2)
+    @test isempty(c)
+
+    # in! will convert to the right type automatically
+    s = Set{Int32}()
+    @test !(in!(1, s))
+    @test only(s) === Int32(1)
+    @test_throws Exception in!("hello", s)
+
+    # Other set types
+    s = BitSet()
+    @test !(in!(13, s))
+    @test in!(UInt16(13), s)
+    @test only(s) === 13
 end
+
 @testset "copy" begin
     data_in = (1,2,9,8,4)
     s = Set(data_in)
@@ -164,6 +197,19 @@ end
     sizehint!(s2, 10)
     @test s2 == GenericSet(s)
 end
+
+@testset "shrinking" begin # Similar test as for the underlying Dict
+    d = Set(i for i = 1:1000)
+    filter!(x -> x < 10, d)
+    sizehint!(d, 10)
+    @test length(d.dict.slots) < 100
+    sizehint!(d, 1000)
+    sizehint!(d, 1; shrink = false)
+    @test length(d.dict.slots) >= 1000
+    sizehint!(d, 1; shrink = true)
+    @test length(d.dict.slots) < 1000
+end
+
 @testset "rehash!" begin
     # Use a pointer type to have defined behavior for uninitialized
     # array element
@@ -364,7 +410,9 @@ end
             @test issubset(intersect(l,r), r)
             @test issubset(l, union(l,r))
             @test issubset(r, union(l,r))
+            @test issubset(union(l,r))(r)
             @test isdisjoint(l,l) == isempty(l)
+            @test isdisjoint(l)(l) == isempty(l)
             @test isdisjoint(l,r) == isempty(intersect(l,r))
             if S === Vector
                 @test sort(union(intersect(l,r),symdiff(l,r))) == sort(union(l,r))
@@ -381,6 +429,15 @@ end
             @test ⊋(S([1,2]), S([1]))
             @test !⊋(S([1]), S([1]))
             @test ⊉(S([1]), S([2]))
+
+            @test ⊆(S([1,2]))(S([1]))
+            @test ⊊(S([1,2]))(S([1]))
+            @test !⊊(S([1]))(S([1]))
+            @test ⊈(S([2]))(S([1]))
+            @test ⊇(S([1]))(S([1,2]))
+            @test ⊋(S([1]))(S([1,2]))
+            @test !⊋(S([1]))(S([1]))
+            @test ⊉(S([2]))(S([1]))
         end
         let s1 = S([1,2,3,4])
             @test s1 !== symdiff(s1) == s1
@@ -548,6 +605,9 @@ end
     @test !allunique([1,1,2])
     @test !allunique([:a,:b,:c,:a])
     @test allunique(unique(randn(100)))  # longer than 32
+    @test allunique(collect(1:100)) # sorted/unique && longer than 32
+    @test allunique(collect(100:-1:1)) # sorted/unique && longer than 32
+    @test !allunique(fill(1,100)) # sorted/repeating && longer than 32
     @test allunique(collect('A':'z')) # 58-element Vector{Char}
     @test !allunique(repeat(1:99, 1, 2))
     @test !allunique(vcat(pi, randn(1998), pi))  # longer than 1000
@@ -582,21 +642,39 @@ end
     @test !allunique((1,2,3,4,3))
     @test allunique((0.0, -0.0))
     @test !allunique((NaN, NaN))
+    # Known length 1, need not evaluate:
+    @test allunique(error(x) for x in [1])
+    # @test_opt allunique(Int[])
+end
+
+@testset "allunique(f, xs)" begin
+    @test allunique(sin, 1:3)
+    @test !allunique(sin, [1,2,3,1])
+    @test allunique(sin, (1, 2, pi, im))  # eltype Any
+    @test allunique(abs2, 1:100)
+    @test !allunique(abs, -10:10)
+    @test allunique(abs2, Vector{Any}(1:100))
+    # These cases don't call the function at all:
+    @test allunique(error, [])
+    @test allunique(error, [1])
 end
 
 @testset "allequal" begin
+    # sets & dictionaries
     @test allequal(Set())
     @test allequal(Set(1))
     @test !allequal(Set([1, 2]))
     @test allequal(Dict())
     @test allequal(Dict(:a => 1))
     @test !allequal(Dict(:a => 1, :b => 2))
+    # vectors
     @test allequal([])
     @test allequal([1])
     @test allequal([1, 1])
     @test !allequal([1, 1, 2])
     @test allequal([:a, :a])
     @test !allequal([:a, :b])
+    # ranges
     @test !allequal(1:2)
     @test allequal(1:1)
     @test !allequal(4.0:0.3:7.0)
@@ -610,6 +688,26 @@ end
     @test allequal(LinRange(1, 1, 1))
     @test allequal(LinRange(1, 1, 2))
     @test !allequal(LinRange(1, 2, 2))
+    # Known length 1, need not evaluate:
+    @test allequal(error(x) for x in [1])
+    # Empty, but !haslength:
+    @test allequal(error(x) for x in 1:3 if false)
+end
+
+@testset "allequal(f, xs)" begin
+    @test allequal(abs2, [3, -3])
+    @test allequal(x -> 1, rand(3))
+    @test !allequal(x -> rand(), [1,1,1])
+    # tuples
+    @test allequal(abs2, (3, -3))
+    @test allequal(x -> 1, Tuple(rand(3)))
+    @test !allequal(x -> rand(), (1,1,1))
+    # These cases don't call the function at all:
+    @test allequal(error, [])
+    @test allequal(error, ())
+    @test allequal(error, (x for x in 1:3 if false))
+    @test allequal(error, [1])
+    @test allequal(error, (1,))
 end
 
 @testset "filter(f, ::$S)" for S = (Set, BitSet)
@@ -810,6 +908,28 @@ end
     @test replace((NaN, 1.0), NaN=>0.0) === (0.0, 1.0)
     @test replace([1, missing], missing=>0) == [1, 0]
     @test replace((1, missing), missing=>0) === (1, 0)
+
+    # test that MethodError is thrown for pairs
+    @test_throws MethodError replace(identity, 1=>2)
+    @test_throws MethodError replace(identity, 1=>2, 3=>4)
+    @test_throws MethodError replace!(identity, 1=>2)
+    @test_throws MethodError replace!(identity, 1=>2, 3=>4)
+
+    # test replace and friends for AbstractDicts
+    d1 = GenericDict(Dict(1=>2, 3=>4))
+    d2 = replace(d1, (1=>2) => (1=>"a"))
+    @test d2 == Dict(1=>"a", 3=>4)
+    @test d2 isa Dict{Int, Any}
+    @test d1 === replace!(d1, (1=>2) => (1=>-2))
+    @test d1 == Dict(1=>-2, 3=>4)
+
+    dd = Dict(1=>2, 3=>1, 5=>1, 7=>1)
+    for d1 in (dd, GenericDict(dd))
+        @test replace(d1, (1=>2) => (1=>"a"), count=0) == d1
+        d2 = replace(kv->(kv[2] == 1 ? kv[1]=>2 : kv), d1, count=2)
+        @test count(==(2), values(d2)) == 3
+        @test count(==(1), values(d2)) == 1
+    end
 end
 
 @testset "⊆, ⊊, ⊈, ⊇, ⊋, ⊉, <, <=, issetequal" begin
@@ -817,8 +937,8 @@ end
     b = [2, 3, 1, 3]
     ua = unique(a)
     ub = unique(b)
-    for TA in (Tuple, identity, Set, BitSet, Base.IdSet{Int}),
-        TB in (Tuple, identity, Set, BitSet, Base.IdSet{Int}),
+    for TA in (Tuple, identity, Set, BitSet, IdSet{Int}),
+        TB in (Tuple, identity, Set, BitSet, IdSet{Int}),
         uA = false:true,
         uB = false:true
         A = TA(uA ? ua : a)
@@ -837,7 +957,9 @@ end
         @test !(B ⊉ A)
         @test !issetequal(A, B)
         @test !issetequal(B, A)
-        for T = (Tuple, identity, Set, BitSet, Base.IdSet{Int})
+        @test !issetequal(B)(A)
+        @test !issetequal(A)(B)
+        for T = (Tuple, identity, Set, BitSet, IdSet{Int})
             @test issetequal(A, T(A))
             @test issetequal(B, T(B))
         end
@@ -898,7 +1020,7 @@ end
     c = [3]
     d = [4]
     e = [5]
-    A = Base.IdSet{Vector{Int}}([a, b, c, d])
+    A = IdSet{Vector{Int}}([a, b, c, d])
     @test !isempty(A)
     B = copy(A)
     @test A ⊆ B
@@ -915,6 +1037,8 @@ end
     @test !isempty(A)
     A = empty!(A)
     @test isempty(A)
+    @test isnothing(sizehint!(A, 10))
+    @test Base.copymutable(A) == copy(A)
 end
 
 @testset "⊊, ⊋" begin
@@ -931,4 +1055,6 @@ end
     end
     set = TestSet{Any}()
     @test sizehint!(set, 1) === set
+    @test sizehint!(set, 1; shrink = true) === set
+    @test sizehint!(set, 1; shrink = false) === set
 end
diff --git a/test/show.jl b/test/show.jl
index 8b781ff62af24..3b3de18bde9c0 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -8,6 +8,8 @@ include("testenv.jl")
 replstr(x, kv::Pair...) = sprint((io,x) -> show(IOContext(io, :limit => true, :displaysize => (24, 80), kv...), MIME("text/plain"), x), x)
 showstr(x, kv::Pair...) = sprint((io,x) -> show(IOContext(io, :limit => true, :displaysize => (24, 80), kv...), x), x)
 
+const IRShow = Base.Compiler.IRShow
+
 @testset "IOContext" begin
     io = IOBuffer()
     ioc = IOContext(io)
@@ -326,7 +328,7 @@ end
             # line meta
             if d < 0
                 # line meta
-                error(\"dimension size must be nonnegative (got \$d)\")
+                error(\"dimension size must be non-negative (got \$d)\")
             end
             # line meta
             n *= d
@@ -523,6 +525,13 @@ end
 # Hidden macro names
 @test sprint(show, Expr(:macrocall, Symbol("@#"), nothing, :a)) == ":(@var\"#\" a)"
 
+# Test that public expressions are rendered nicely
+# though they are hard to create with quotes because public is not a context dependant keyword
+@test sprint(show, Expr(:public, Symbol("@foo"))) == ":(public @foo)"
+@test sprint(show, Expr(:public, :f,:o,:o)) == ":(public f, o, o)"
+s = sprint(show, :(module A; public x; end))
+@test match(r"^:\(module A\n  #= .* =#\n  #= .* =#\n  public x\n  end\)$", s) !== nothing
+
 # PR #38418
 module M1 var"#foo#"() = 2 end
 @test occursin("M1.var\"#foo#\"", sprint(show, M1.var"#foo#", context = :module=>@__MODULE__))
@@ -696,7 +705,7 @@ let oldout = stdout, olderr = stderr
         redirect_stderr(olderr)
         close(wrout)
         close(wrerr)
-        @test fetch(out) == "Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123\"C\"\n"
+        @test fetch(out) == "primitive type Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123.0000000000000000\"C\"\n"
         @test fetch(err) == "TESTA\nTESTB\nΑ1Β2\"A\"\n"
     finally
         redirect_stdout(oldout)
@@ -748,6 +757,69 @@ end
 
 @test startswith(sprint(show, typeof(x->x), context = :module=>@__MODULE__), "var\"")
 
+# PR 53719
+module M53719
+    f = x -> x + 1
+    function foo(x)
+        function bar(y)
+            function baz(z)
+                return x + y + z
+            end
+            return baz
+        end
+        return bar
+    end
+    function foo2(x)
+        function bar2(y)
+            return z -> x + y + z
+        end
+        return bar2
+    end
+    lambda1 = (x)->begin
+        function foo(y)
+            return x + y
+        end
+        return foo
+    end
+    lambda2 = (x)->begin
+        y -> x + y
+    end
+end
+
+@testset "PR 53719 function names" begin
+    # M53719.f should be printed as var"#[0-9]+"
+    @test occursin(r"var\"#[0-9]+", sprint(show, M53719.f, context = :module=>M53719))
+    # M53719.foo(1) should be printed as var"#bar"
+    @test occursin(r"var\"#bar", sprint(show, M53719.foo(1), context = :module=>M53719))
+    # M53719.foo(1)(2) should be printed as var"#baz"
+    @test occursin(r"var\"#baz", sprint(show, M53719.foo(1)(2), context = :module=>M53719))
+    # M53719.foo2(1) should be printed as var"#bar2"
+    @test occursin(r"var\"#bar2", sprint(show, M53719.foo2(1), context = :module=>M53719))
+    # M53719.foo2(1)(2) should be printed as var"#foo2##[0-9]+"
+    @test occursin(r"var\"#foo2##[0-9]+", sprint(show, M53719.foo2(1)(2), context = :module=>M53719))
+    # M53719.lambda1(1) should be printed as var"#foo"
+    @test occursin(r"var\"#foo", sprint(show, M53719.lambda1(1), context = :module=>M53719))
+    # M53719.lambda2(1) should be printed as var"#[0-9]+"
+    @test occursin(r"var\"#[0-9]+", sprint(show, M53719.lambda2(1), context = :module=>M53719))
+end
+
+@testset "PR 53719 function types" begin
+    # typeof(M53719.f) should be printed as var"#[0-9]+#[0-9]+"
+    @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.f), context = :module=>M53719))
+    #typeof(M53719.foo(1)) should be printed as var"#bar#foo##[0-9]+"
+    @test occursin(r"var\"#bar#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)), context = :module=>M53719))
+    #typeof(M53719.foo(1)(2)) should be printed as var"#baz#foo##[0-9]+"
+    @test occursin(r"var\"#baz#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)(2)), context = :module=>M53719))
+    #typeof(M53719.foo2(1)) should be printed as var"#bar2#foo2##[0-9]+"
+    @test occursin(r"var\"#bar2#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)), context = :module=>M53719))
+    #typeof(M53719.foo2(1)(2)) should be printed as var"#foo2##[0-9]+#foo2##[0-9]+"
+    @test occursin(r"var\"#foo2##[0-9]+#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)(2)), context = :module=>M53719))
+    #typeof(M53719.lambda1(1)) should be printed as var"#foo#[0-9]+"
+    @test occursin(r"var\"#foo#[0-9]+", sprint(show, typeof(M53719.lambda1(1)), context = :module=>M53719))
+    #typeof(M53719.lambda2(1)) should be printed as var"#[0-9]+#[0-9]+"
+    @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.lambda2(1)), context = :module=>M53719))
+end
+
 #test methodshow.jl functions
 @test Base.inbase(Base)
 @test !Base.inbase(LinearAlgebra)
@@ -788,7 +860,7 @@ struct S45879{P} end
 let ms = methods(S45879)
     @test ms isa Base.MethodList
     @test length(ms) == 0
-    @test sprint(show, Base.MethodList(Method[], typeof(S45879).name.mt)) isa String
+    @test sprint(show, Base.MethodList(Method[], typeof(S45879).name)) isa String
 end
 
 function f49475(a=12.0; b) end
@@ -805,6 +877,10 @@ else
     @test occursin("https://github.com/JuliaLang/julia/tree/$(Base.GIT_VERSION_INFO.commit)/base/special/trig.jl#L", Base.url(which(sin, (Float64,))))
 end
 
+@testset "method show: method url return type inference" begin
+    @test isconcretetype(Base.infer_return_type(Base.url))
+end
+
 # Method location correction (Revise integration)
 dummyloc(m::Method) = :nofile, Int32(123456789)
 Base.methodloc_callback[] = dummyloc
@@ -858,19 +934,19 @@ end
 # string show with elision
 @testset "string show with elision" begin
     @testset "elision logic" begin
-        strs = ["A", "∀", "∀A", "A∀", "😃"]
+        strs = ["A", "∀", "∀A", "A∀", "😃", "x̂"]
         for limit = 0:100, len = 0:100, str in strs
             str = str^len
             str = str[1:nextind(str, 0, len)]
             out = sprint() do io
                 show(io, MIME"text/plain"(), str; limit)
             end
-            lower = length("\"\" ⋯ $(ncodeunits(str)) bytes ⋯ \"\"")
+            lower = textwidth("\"\" ⋯ $(ncodeunits(str)) bytes ⋯ \"\"")
             limit = max(limit, lower)
-            if length(str) + 2 ≤ limit
+            if textwidth(str) + 2 ≤ limit+1 && !contains(out, '⋯')
                 @test eval(Meta.parse(out)) == str
             else
-                @test limit-!isascii(str) <= length(out) <= limit
+                @test limit-2 <= textwidth(out) <= limit
                 re = r"(\"[^\"]*\") ⋯ (\d+) bytes ⋯ (\"[^\"]*\")"
                 m = match(re, out)
                 head = eval(Meta.parse(m.captures[1]))
@@ -886,11 +962,11 @@ end
 
     @testset "default elision limit" begin
         r = replstr("x"^1000)
-        @test length(r) == 7*80
-        @test r == repr("x"^271) * " ⋯ 459 bytes ⋯ " * repr("x"^270)
+        @test length(r) == 7*80-1
+        @test r == repr("x"^270) * " ⋯ 460 bytes ⋯ " * repr("x"^270)
         r = replstr(["x"^1000])
         @test length(r) < 120
-        @test r == "1-element Vector{String}:\n " * repr("x"^31) * " ⋯ 939 bytes ⋯ " * repr("x"^30)
+        @test r == "1-element Vector{String}:\n " * repr("x"^30) * " ⋯ 940 bytes ⋯ " * repr("x"^30)
     end
 end
 
@@ -1205,6 +1281,7 @@ let x = [], y = [], z = Base.ImmutableDict(x => y)
     push!(y, x)
     push!(y, z)
     @test replstr(x) == "1-element Vector{Any}:\n Any[Any[#= circular reference @-2 =#], Base.ImmutableDict{Vector{Any}, Vector{Any}}([#= circular reference @-3 =#] => [#= circular reference @-2 =#])]"
+    @test replstr(x, :color => true) == "1-element Vector{Any}:\n Any[Any[\e[33m#= circular reference @-2 =#\e[39m], Base.ImmutableDict{Vector{Any}, Vector{Any}}([\e[33m#= circular reference @-3 =#\e[39m] => [\e[33m#= circular reference @-2 =#\e[39m])]"
     @test repr(z) == "Base.ImmutableDict{Vector{Any}, Vector{Any}}([Any[Any[#= circular reference @-2 =#], Base.ImmutableDict{Vector{Any}, Vector{Any}}(#= circular reference @-3 =#)]] => [Any[Any[#= circular reference @-2 =#]], Base.ImmutableDict{Vector{Any}, Vector{Any}}(#= circular reference @-2 =#)])"
     @test sprint(dump, x) == """
         Array{Any}((1,))
@@ -1243,70 +1320,73 @@ end
 @testset "PR 17117: print_array" begin
     s = IOBuffer(Vector{UInt8}(), read=true, write=true)
     Base.print_array(s, [1, 2, 3])
-    @test String(resize!(s.data, s.size)) == " 1\n 2\n 3"
+    @test String(take!(s)) == " 1\n 2\n 3"
     close(s)
     s2 = IOBuffer(Vector{UInt8}(), read=true, write=true)
     z = zeros(0,0,0,0,0,0,0,0)
     Base.print_array(s2, z)
-    @test String(resize!(s2.data, s2.size)) == ""
+    @test String(take!(s2)) == ""
     close(s2)
 end
 
-let repr = sprint(dump, :(x = 1))
-    @test repr == "Expr\n  head: Symbol =\n  args: Array{Any}((2,))\n    1: Symbol x\n    2: $Int 1\n"
-end
-let repr = sprint(dump, Pair{String,Int64})
-    @test repr == "Pair{String, Int64} <: Any\n  first::String\n  second::Int64\n"
-end
-let repr = sprint(dump, Tuple)
-    @test repr == "Tuple <: Any\n"
-end
-let repr = sprint(dump, Int64)
-    @test repr == "Int64 <: Signed\n"
-end
-let repr = sprint(dump, Any)
-    @test length(repr) == 4
-    @test occursin(r"^Any\n", repr)
-    @test endswith(repr, '\n')
-end
-let repr = sprint(dump, Integer)
-    @test occursin("Integer <: Real", repr)
-    @test !occursin("Any", repr)
-end
-let repr = sprint(dump, Union{Integer, Float32})
-    @test repr == "Union{Integer, Float32}\n" || repr == "Union{Float32, Integer}\n"
-end
 module M30442
     struct T end
 end
-let repr = sprint(show, Union{String, M30442.T})
-    @test repr == "Union{$(curmod_prefix)M30442.T, String}" ||
-          repr == "Union{String, $(curmod_prefix)M30442.T}"
-end
-let repr = sprint(dump, Ptr{UInt8}(UInt(1)))
-    @test repr == "Ptr{UInt8} @$(Base.repr(UInt(1)))\n"
-end
-let repr = sprint(dump, Core.svec())
-    @test repr == "empty SimpleVector\n"
-end
-let repr = sprint(dump, sin)
-    @test repr == "sin (function of type typeof(sin))\n"
-end
-let repr = sprint(dump, Test)
-    @test repr == "Module Test\n"
-end
-let repr = sprint(dump, nothing)
-    @test repr == "Nothing nothing\n"
-end
-let a = Vector{Any}(undef, 10000)
-    a[2] = "elemA"
-    a[4] = "elemB"
-    a[11] = "elemC"
-    repr = sprint(dump, a; context=(:limit => true), sizehint=0)
-    @test repr == "Array{Any}((10000,))\n  1: #undef\n  2: String \"elemA\"\n  3: #undef\n  4: String \"elemB\"\n  5: #undef\n  ...\n  9996: #undef\n  9997: #undef\n  9998: #undef\n  9999: #undef\n  10000: #undef\n"
-end
-@test occursin("NamedTuple", sprint(dump, NamedTuple))
+@testset "Dump types" begin
+    let repr = sprint(dump, :(x = 1))
+        @test repr == "Expr\n  head: Symbol =\n  args: Array{Any}((2,))\n    1: Symbol x\n    2: $Int 1\n"
+    end
+    let repr = sprint(dump, Pair{String,Int64})
+        @test repr == "struct Pair{String, Int64} <: Any\n  first::String\n  second::Int64\n"
+    end
+    let repr = sprint(dump, Tuple)
+        @test repr == "Tuple <: Any\n"
+    end
+    let repr = sprint(dump, Int64)
+        @test repr == "primitive type Int64 <: Signed\n"
+    end
+    let repr = sprint(dump, Any)
+        @test repr == "abstract type Any\n"
+    end
+    let repr = sprint(dump, Integer)
+        @test occursin("abstract type Integer <: Real", repr)
+        @test !occursin("Any", repr)
+    end
+    let repr = sprint(dump, Union{Integer, Float32})
+        @test repr == "Union{Integer, Float32}\n" || repr == "Union{Float32, Integer}\n"
+    end
 
+    let repr = sprint(show, Union{String, M30442.T})
+        @test repr == "Union{$(curmod_prefix)M30442.T, String}" ||
+              repr == "Union{String, $(curmod_prefix)M30442.T}"
+    end
+    let repr = sprint(dump, Ptr{UInt8}(UInt(1)))
+        @test repr == "Ptr{UInt8}($(Base.repr(UInt(1))))\n"
+    end
+    let repr = sprint(dump, Core.svec())
+        @test repr == "empty SimpleVector\n"
+    end
+    let repr = sprint(dump, sin)
+        @test repr == "sin (function of type typeof(sin))\n"
+    end
+    let repr = sprint(dump, Test)
+        @test repr == "Module Test\n"
+    end
+    let repr = sprint(dump, nothing)
+        @test repr == "Nothing nothing\n"
+    end
+    let a = Vector{Any}(undef, 10000)
+        a[2] = "elemA"
+        a[4] = "elemB"
+        a[11] = "elemC"
+        repr = sprint(dump, a; context=(:limit => true), sizehint=0)
+        @test repr == "Array{Any}((10000,))\n  1: #undef\n  2: String \"elemA\"\n  3: #undef\n  4: String \"elemB\"\n  5: #undef\n  ...\n  9996: #undef\n  9997: #undef\n  9998: #undef\n  9999: #undef\n  10000: #undef\n"
+    end
+    @test occursin("NamedTuple", sprint(dump, NamedTuple))
+
+    # issue 36495, dumping a partial NamedTupled shouldn't error
+    @test occursin("NamedTuple", sprint(dump, NamedTuple{(:foo,:bar)}))
+end
 # issue #17338
 @test repr(Core.svec(1, 2)) == "svec(1, 2)"
 
@@ -1361,6 +1441,9 @@ test_repr("(:).a")
 @test repr(Tuple{Float32, Float32, Float32}) == "Tuple{Float32, Float32, Float32}"
 @test repr(Tuple{String, Int64, Int64, Int64}) == "Tuple{String, Int64, Int64, Int64}"
 @test repr(Tuple{String, Int64, Int64, Int64, Int64}) == "Tuple{String, Vararg{Int64, 4}}"
+@test repr(NTuple) == "NTuple{N, T} where {N, T}"
+@test repr(Tuple{NTuple{N}, Vararg{NTuple{N}, 4}} where N) == "NTuple{5, NTuple{N, T} where T} where N"
+@test repr(Tuple{Float64, NTuple{N}, Vararg{NTuple{N}, 4}} where N) == "Tuple{Float64, Vararg{NTuple{N, T} where T, 5}} where N"
 
 # Test printing of NamedTuples using the macro syntax
 @test repr(@NamedTuple{kw::Int64}) == "@NamedTuple{kw::Int64}"
@@ -1368,22 +1451,26 @@ test_repr("(:).a")
 @test repr(@NamedTuple{kw::@NamedTuple{kw2::Int64}}) == "@NamedTuple{kw::@NamedTuple{kw2::Int64}}"
 @test repr(@NamedTuple{kw::NTuple{7, Int64}}) == "@NamedTuple{kw::NTuple{7, Int64}}"
 @test repr(@NamedTuple{a::Float64, b}) == "@NamedTuple{a::Float64, b}"
+@test repr(@NamedTuple{var"#"::Int64}) == "@NamedTuple{var\"#\"::Int64}"
 
 # Test general printing of `Base.Pairs` (it should not use the `@Kwargs` macro syntax)
-@test repr(@Kwargs{init::Int}) == "Base.Pairs{Symbol, $Int, Tuple{Symbol}, @NamedTuple{init::$Int}}"
+@test repr(@Kwargs{init::Int}) == "Base.Pairs{Symbol, $Int, Nothing, @NamedTuple{init::$Int}}"
 
 @testset "issue #42931" begin
-    @test repr(NTuple{4, :A}) == "NTuple{4, :A}"
+    @test repr(NTuple{4, :A}) == "Tuple{:A, :A, :A, :A}"
     @test repr(NTuple{3, :A}) == "Tuple{:A, :A, :A}"
     @test repr(NTuple{2, :A}) == "Tuple{:A, :A}"
     @test repr(NTuple{1, :A}) == "Tuple{:A}"
     @test repr(NTuple{0, :A}) == "Tuple{}"
 
     @test repr(Tuple{:A, :A, :A, :B}) == "Tuple{:A, :A, :A, :B}"
-    @test repr(Tuple{:A, :A, :A, :A}) == "NTuple{4, :A}"
+    @test repr(Tuple{:A, :A, :A, :A}) == "Tuple{:A, :A, :A, :A}"
     @test repr(Tuple{:A, :A, :A}) == "Tuple{:A, :A, :A}"
     @test repr(Tuple{:A}) == "Tuple{:A}"
     @test repr(Tuple{}) == "Tuple{}"
+
+    @test repr(Tuple{Vararg{N, 10}} where N) == "NTuple{10, N} where N"
+    @test repr(Tuple{Vararg{10, N}} where N) == "Tuple{Vararg{10, N}} where N"
 end
 
 # Test that REPL/mime display of invalid UTF-8 data doesn't throw an exception:
@@ -1446,12 +1533,20 @@ end
 @test static_shown(:+) == ":+"
 @test static_shown(://) == "://"
 @test static_shown(://=) == "://="
-@test static_shown(Symbol("")) == "Symbol(\"\")"
-@test static_shown(Symbol("a/b")) == "Symbol(\"a/b\")"
-@test static_shown(Symbol("a-b")) == "Symbol(\"a-b\")"
+@test static_shown(Symbol("")) == ":var\"\""
+@test static_shown(Symbol("a/b")) == ":var\"a/b\""
+@test static_shown(Symbol("a-b")) == ":var\"a-b\""
 @test static_shown(UnionAll) == "UnionAll"
-
 @test static_shown(QuoteNode(:x)) == ":(:x)"
+@test static_shown(:!) == ":!"
+@test static_shown("\"") == "\"\\\"\""
+@test static_shown("\$") == "\"\\\$\""
+@test static_shown("\\") == "\"\\\\\""
+@test static_shown("a\x80b") == "\"a\\x80b\""
+@test static_shown("a\x80\$\\b") == "\"a\\x80\\\$\\\\b\""
+@test static_shown(GlobalRef(Main, :var"a#b")) == "Main.var\"a#b\""
+@test static_shown(GlobalRef(Main, :+)) == "Main.:(+)"
+@test static_shown((a = 3, ! = 4, var"a b" = 5)) == "(a=3, (!)=4, var\"a b\"=5)"
 
 # PR #38049
 @test static_shown(sum) == "Base.sum"
@@ -1481,8 +1576,61 @@ struct var"%X%" end  # Invalid name without '#'
             typeof(+),
             var"#f#",
             typeof(var"#f#"),
+
+            # Integers should round-trip (#52677)
+            1, UInt(1),
+            Int8(1),  Int16(1),  Int32(1),  Int64(1),
+            UInt8(1), UInt16(1), UInt32(1), UInt64(1),
+
+            # Float round-trip
+            Float16(1),                  Float32(1),                  Float64(1),
+            Float16(1.5),                Float32(1.5),                Float64(1.5),
+            Float16(0.4893243538921085), Float32(0.4893243538921085), Float64(0.4893243538921085),
+            # Examples that require the full 5, 9, and 17 digits of precision
+            Float16(0.00010014),         Float32(1.00000075f-36),     Float64(-1.561051336605761e-182),
+            floatmax(Float16),           floatmax(Float32),           floatmax(Float64),
+            floatmin(Float16),           floatmin(Float32),           floatmin(Float64),
+            Float16(0.0),                0.0f0,                       0.0,
+            Float16(-0.0),               -0.0f0,                      -0.0,
+            Inf16,                       Inf32,                       Inf,
+            -Inf16,                      -Inf32,                      -Inf,
+            nextfloat(Float16(0)),       nextfloat(Float32(0)),       nextfloat(Float64(0)),
+            NaN16,                       NaN32,                       NaN,
+            Float16(1e3),                1f7,                         1e16,
+            Float16(-1e3),               -1f7,                        -1e16,
+            Float16(1e4),                1f8,                         1e17,
+            Float16(-1e4),               -1f8,                        -1e17,
+
+            # Pointers should round-trip
+            Ptr{Cvoid}(0), Ptr{Cvoid}(typemax(UInt)), Ptr{Any}(0), Ptr{Any}(typemax(UInt)),
+
+            # :var"" escaping rules differ from strings (#58484)
+            :foo,
+            :var"bar baz",
+            :var"a $b",         # No escaping for $ in raw string
+            :var"a\b",          # No escaping for backslashes in middle
+            :var"a\\",          # Backslashes must be escaped at the end
+            :var"a\\\\",
+            :var"a\"b",
+            :var"a\"",
+            :var"\\\"",
+            :+, :var"+-",
+            :(=), :(:), :(::),  # Requires quoting
+            Symbol("a\nb"),
+
+            Val(Float16(1.0)), Val(1f0),      Val(1.0),
+            Val(:abc),         Val(:(=)),     Val(:var"a\b"),
+
+            Val(1),       Val(Int8(1)),  Val(Int16(1)),  Val(Int32(1)),  Val(Int64(1)),  Val(Int128(1)),
+            Val(UInt(1)), Val(UInt8(1)), Val(UInt16(1)), Val(UInt32(1)), Val(UInt64(1)), Val(UInt128(1)),
+
+            # BROKEN
+            # Symbol("a\xffb"),
+            # User-defined primitive types
+            # Non-canonical NaNs
+            # BFloat16
         )
-        @test v == eval(Meta.parse(static_shown(v)))
+        @test v === eval(Meta.parse(static_shown(v)))
     end
 end
 
@@ -1509,7 +1657,7 @@ struct f_with_params{t} <: Function end
 end
 
 let io = IOBuffer()
-    show(io, MIME"text/html"(), ModFWithParams.f_with_params.body.name.mt)
+    show(io, MIME"text/html"(), methods(ModFWithParams.f_with_params{Int}()))
     @test occursin("ModFWithParams.f_with_params", String(take!(io)))
 end
 
@@ -1619,6 +1767,13 @@ end
         "[3.141592653589793 3.141592653589793; 3.141592653589793 3.141592653589793]"
 end
 
+@testset "`displaysize` return type inference" begin
+    @test Tuple{Int, Int} === Base.infer_return_type(displaysize, Tuple{})
+    @test Tuple{Int, Int} === Base.infer_return_type(displaysize, Tuple{IO})
+    @test Tuple{Int, Int} === Base.infer_return_type(displaysize, Tuple{IOContext})
+    @test Tuple{Int, Int} === Base.infer_return_type(displaysize, Tuple{Base.TTY})
+end
+
 @testset "Array printing with limited rows" begin
     arrstr = let buf = IOBuffer()
         function (A, rows)
@@ -1644,6 +1799,29 @@ end
               string("4×30 Matrix{Float64}:\n",
                      " 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0\n",
                      " ⋮                        ⋮              ⋱            ⋮                   ")
+
+    @testset "extremely large arrays" begin
+        struct MyBigFill{T,N} <: AbstractArray{T,N}
+            val :: T
+            axes :: NTuple{N,Base.OneTo{BigInt}}
+        end
+        MyBigFill(val, sz::Tuple{}) = MyBigFill{typeof(val),0}(val, sz)
+        MyBigFill(val, sz::NTuple{N,BigInt}) where {N} = MyBigFill(val, map(Base.OneTo, sz))
+        MyBigFill(val, sz::Tuple{Vararg{Integer}}) = MyBigFill(val, map(BigInt, sz))
+        Base.size(M::MyBigFill) = map(length, M.axes)
+        Base.axes(M::MyBigFill) = M.axes
+        function Base.getindex(M::MyBigFill{<:Any,N}, ind::Vararg{Integer,N}) where {N}
+            checkbounds(M, ind...)
+            M.val
+        end
+        function Base.isassigned(M::MyBigFill{<:Any,N}, ind::Vararg{BigInt,N}) where {N}
+            checkbounds(M, ind...)
+            true
+        end
+        M = MyBigFill(4, (big(2)^65, 3))
+        @test arrstr(M, 3) == "36893488147419103232×3 $MyBigFill{$Int, 2}: …"
+        @test arrstr(M, 8) == "36893488147419103232×3 $MyBigFill{$Int, 2}:\n 4  4  4\n 4  4  4\n ⋮     \n 4  4  4"
+    end
 end
 
 module UnexportedOperators
@@ -1661,10 +1839,10 @@ end
     anonfn_type_repr = "$modname.var\"$(typeof(anonfn).name.name)\""
     @test repr(typeof(anonfn)) == anonfn_type_repr
     @test repr(anonfn) == anonfn_type_repr * "()"
-    @test repr("text/plain", anonfn) == "$(typeof(anonfn).name.mt.name) (generic function with 1 method)"
+    @test repr("text/plain", anonfn) == "$(typeof(anonfn).name.singletonname) (generic function with 1 method)"
     mkclosure = x->y->x+y
     clo = mkclosure(10)
-    @test repr("text/plain", clo) == "$(typeof(clo).name.mt.name) (generic function with 1 method)"
+    @test repr("text/plain", clo) == "$(typeof(clo).name.singletonname) (generic function with 1 method)"
     @test repr(UnionAll) == "UnionAll"
 end
 
@@ -1772,6 +1950,9 @@ end
     B = @view ones(2)[r]
     Base.showarg(io, B, false)
     @test String(take!(io)) == "view(::Vector{Float64}, $(repr(r)))"
+
+    Base.showarg(io, reshape(UnitRange{Int64}(1,1)), false)
+    @test String(take!(io)) == "reshape(::UnitRange{Int64})"
 end
 
 @testset "Methods" begin
@@ -1811,15 +1992,6 @@ end
     b = IOBuffer()
     show(IOContext(b, :module => @__MODULE__), TypeA)
     @test String(take!(b)) == "TypeA"
-
-    # issue #26354; make sure testing for symbol visibility doesn't cause
-    # spurious binding resolutions
-    show(IOContext(b, :module => TestShowType), Base.Pair)
-    @test !Base.isbindingresolved(TestShowType, :Pair)
-    @test String(take!(b)) == "Core.Pair"
-    show(IOContext(b, :module => TestShowType), Base.Complex)
-    @test Base.isbindingresolved(TestShowType, :Complex)
-    @test String(take!(b)) == "Complex"
 end
 
 @testset "typeinfo" begin
@@ -1844,9 +2016,9 @@ end
     @test replstr(view(A, [1], :)) == "1×1 view(::Matrix{Float64}, [1], :) with eltype Float64:\n 0.0"
 
     # issue #27680
-    @test showstr(Set([(1.0,1.0), (2.0,2.0), (3.0, 3.0)])) == (sizeof(Int) == 8 ?
-              "Set([(1.0, 1.0), (3.0, 3.0), (2.0, 2.0)])" :
-              "Set([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)])")
+    @test showstr(Set([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)])) == (sizeof(Int) == 8 ?
+              "Set([(2.0, 2.0), (1.0, 1.0), (3.0, 3.0)])" :
+              "Set([(2.0, 2.0), (1.0, 1.0), (3.0, 3.0)])")
 
     # issue #27747
     let t = (x = Integer[1, 2],)
@@ -1868,6 +2040,7 @@ end
 
     @test showstr(Pair{Integer,Integer}(1, 2), :typeinfo => Pair{Integer,Integer}) == "1 => 2"
     @test showstr([Pair{Integer,Integer}(1, 2)]) == "Pair{Integer, Integer}[1 => 2]"
+    @test showstr([(a=1,)]) == "[(a = 1,)]"
     @test showstr(Dict{Integer,Integer}(1 => 2)) == "Dict{Integer, Integer}(1 => 2)"
     @test showstr(Dict(true=>false)) == "Dict{Bool, Bool}(1 => 0)"
     @test showstr(Dict((1 => 2) => (3 => 4))) == "Dict((1 => 2) => (3 => 4))"
@@ -1956,12 +2129,12 @@ end
 end
 
 @testset "Intrinsic printing" begin
-    @test sprint(show, Core.Intrinsics.arraylen) == "Core.Intrinsics.arraylen"
-    @test repr(Core.Intrinsics.arraylen) == "Core.Intrinsics.arraylen"
+    @test sprint(show, Core.Intrinsics.cglobal) == "Core.Intrinsics.cglobal"
+    @test repr(Core.Intrinsics.cglobal) == "Core.Intrinsics.cglobal"
     let io = IOBuffer()
-        show(io, MIME"text/plain"(), Core.Intrinsics.arraylen)
+        show(io, MIME"text/plain"(), Core.Intrinsics.cglobal)
         str = String(take!(io))
-        @test occursin("arraylen", str)
+        @test occursin("cglobal", str)
         @test occursin("(intrinsic function", str)
     end
     @test string(Core.Intrinsics.add_int) == "add_int"
@@ -1994,7 +2167,7 @@ end
 function compute_annotations(f, types)
     src = code_typed(f, types, debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
-    la, lb, ll = Base.IRShow.compute_ir_line_annotations(ir)
+    la, lb, ll = IRShow.compute_ir_line_annotations(ir)
     max_loc_method = maximum(length(s) for s in la)
     return join((strip(string(a, " "^(max_loc_method-length(a)), b)) for (a, b) in zip(la, lb)), '\n')
 end
@@ -2036,6 +2209,7 @@ eval(Meta._parse_string("""function my_fun28173(x)
             r = 1
             s = try
                 r = 2
+                Base.inferencebarrier(false) && error()
                 "BYE"
             catch
                 r = 3
@@ -2048,8 +2222,10 @@ eval(Meta._parse_string("""function my_fun28173(x)
     return y
 end""", "a"^80, 1, 1, :statement)[1]) # use parse to control the line numbers
 let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
+    @test_throws "must be one of the following" sprint(IRShow.show_ir, src; context = :debuginfo => :_)
+    @test !contains(sprint(IRShow.show_ir, src; context = :debuginfo => :source_inline), "a"^80)
     ir = Core.Compiler.inflate_ir(src)
-    fill!(src.codelocs, 0) # IRCode printing is only capable of printing partial line info
+    src.debuginfo = Core.DebugInfo(src.debuginfo.def) # IRCode printing defaults to incomplete line info printing, so turn it off completely for CodeInfo too
     let source_slotnames = String["my_fun28173", "x"],
         repr_ir = split(repr(ir, context = :SOURCE_SLOTNAMES=>source_slotnames), '\n'),
         repr_ir = "CodeInfo(\n" * join((l[4:end] for l in repr_ir), "\n") * ")" # remove line numbers
@@ -2059,8 +2235,8 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     @test all(isspace, pop!(lines1))
     Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(1), Val{1}), false)
     Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(2), Val{2}), true)
-    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(3), Val{3}), false)
-    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(4), Val{4}), true)
+    Core.Compiler.insert_node!(ir, length(ir.stmts.stmt), Core.Compiler.NewInstruction(QuoteNode(3), Val{3}), false)
+    Core.Compiler.insert_node!(ir, length(ir.stmts.stmt), Core.Compiler.NewInstruction(QuoteNode(4), Val{4}), true)
     lines2 = split(repr(ir), '\n')
     @test all(isspace, pop!(lines2))
     @test popfirst!(lines2) == "2  1 ──       $(QuoteNode(1))"
@@ -2072,23 +2248,21 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     end
     @test popfirst!(lines2) == "   │          $(QuoteNode(2))"
     @test pop!(lines2) == "   └───       \$(QuoteNode(4))"
-    @test pop!(lines1) == "17 └───       return %18"
-    @test pop!(lines2) == "   │          return %18"
-    @test pop!(lines2) == "17 │          \$(QuoteNode(3))"
+    @test pop!(lines1) == "18 └───       return %21"
+    @test pop!(lines2) == "   │          return %21"
+    @test pop!(lines2) == "18 │          \$(QuoteNode(3))"
     @test lines1 == lines2
 
-    # verbose linetable
-    io = IOBuffer()
-    Base.IRShow.show_ir(io, ir, Base.IRShow.default_config(ir; verbose_linetable=true))
-    seekstart(io)
-    @test count(contains(r"@ a{80}:\d+ within `my_fun28173"), eachline(io)) == 10
+    # debuginfo = :source
+    output = sprint(Base.IRShow.show_ir, ir, Base.IRShow.default_config(ir; debuginfo=:source))
+    @test count(contains(r"@ a{80}:\d+ within `my_fun28173"), split(output, '\n')) == 10
+    @test output == sprint(show, ir; context = :debuginfo => :source)
+    @test output != sprint(show, ir)
+    @test_throws "must be one of the following" sprint(show, ir; context = :debuginfo => :_)
 
     # Test that a bad :invoke doesn't cause an error during printing
     Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(Expr(:invoke, nothing, sin), Any), false)
-    io = IOBuffer()
-    Base.IRShow.show_ir(io, ir)
-    seekstart(io)
-    @test contains(String(take!(io)), "Expr(:invoke, nothing")
+    @test contains(string(ir), "Expr(:invoke, nothing")
 end
 
 # Verify that extra instructions at the end of the IR
@@ -2096,7 +2270,7 @@ end
 # with as unnamed "!" BB.
 let src = code_typed(gcd, (Int, Int), debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
-    push!(ir.stmts.inst, Core.Compiler.ReturnNode())
+    push!(ir.stmts.stmt, Core.Compiler.ReturnNode())
     lines = split(sprint(show, ir), '\n')
     @test all(isspace, pop!(lines))
     @test pop!(lines) == "   !!! ──       unreachable::#UNDEF"
@@ -2153,6 +2327,20 @@ replstrcolor(x) = sprint((io, x) -> show(IOContext(io, :limit => true, :color =>
     @test_repr "Bool[1, 0]"
 end
 
+@testset "Unions with Bool (#39590)" begin
+    @test repr([missing, false]) == "Union{Missing, Bool}[missing, 0]"
+    @test_repr "Union{Bool, Nothing}[1, 0, nothing]"
+end
+
+# issue #26847
+@test_repr "Union{Missing, Float32}[1.0]"
+
+# intersection of #45396 and #48822
+@test_repr "Union{Missing, Rational{Int64}}[missing, 1//2, 2]"
+
+# Don't go too far with #48822
+@test_repr "Union{String, Bool}[true]"
+
 # issue #30505
 @test repr(Union{Tuple{Char}, Tuple{Char, Char}}[('a','b')]) == "Union{Tuple{Char}, Tuple{Char, Char}}[('a', 'b')]"
 
@@ -2248,9 +2436,9 @@ end
 
 # begin/end indices
 @weak_test_repr "a[begin, end, (begin; end)]"
-@test repr(Base.remove_linenums!(:(a[begin, end, (begin; end)]))) == ":(a[begin, end, (begin;\n          end)])"
+@test_broken repr(Base.remove_linenums!(:(a[begin, end, (begin; end)]))) == ":(a[begin, end, (begin;\n          end)])"
 @weak_test_repr "a[begin, end, let x=1; (x+1;); end]"
-@test repr(Base.remove_linenums!(:(a[begin, end, let x=1; (x+1;); end]))) ==
+@test_broken repr(Base.remove_linenums!(:(a[begin, end, let x=1; (x+1;); end]))) ==
         ":(a[begin, end, let x = 1\n          begin\n              x + 1\n          end\n      end])"
 @test_repr "a[(bla;)]"
 @test_repr "a[(;;)]"
@@ -2306,6 +2494,7 @@ end
 @test string(Union{M37012.SimpleU, Nothing, T} where T) == "Union{Nothing, $(curmod_prefix)M37012.SimpleU, T} where T"
 @test string(Union{AbstractVector{T}, T} where T) == "Union{AbstractVector{T}, T} where T"
 @test string(Union{AbstractVector, T} where T) == "Union{AbstractVector, T} where T"
+@test string(Union{Array, Memory}) == "Union{Array, Memory}"
 
 @test sprint(show, :(./)) == ":((./))"
 @test sprint(show, :((.|).(.&, b))) == ":((.|).((.&), b))"
@@ -2434,7 +2623,7 @@ end
     mktemp() do f, io
         redirect_stdout(io) do
             let io = IOBuffer()
-                for i = 1:10
+                for i = 1:length(Base.Compiler.ALL_PASS_NAMES)
                     # make sure we don't error on printing IRs at any optimization level
                     ir = only(Base.code_ircode(sin, (Float64,); optimize_until=i))[1]
                     @test try; show(io, ir); true; catch; false; end
@@ -2458,9 +2647,9 @@ end
 
     # replace an instruction
     add_stmt = ir.stmts[1]
-    inst = Core.Compiler.NewInstruction(Expr(:call, add_stmt[:inst].args[1], add_stmt[:inst].args[2], 999), Int)
+    inst = Core.Compiler.NewInstruction(Expr(:call, add_stmt[:stmt].args[1], add_stmt[:stmt].args[2], 999), Int)
     node = Core.Compiler.insert_node!(ir, 1, inst)
-    Core.Compiler.setindex!(add_stmt, node, :inst)
+    Core.Compiler.setindex!(add_stmt, node, :stmt)
 
     # the new node should be colored green (as it's uncompacted IR),
     # and its uses shouldn't be colored at all (since they're just plain valid references)
@@ -2471,7 +2660,7 @@ end
     @test contains(str, "%1 = %6")
 
     # if we insert an invalid node, it should be colored appropriately
-    Core.Compiler.setindex!(add_stmt, Core.Compiler.SSAValue(node.id+1), :inst)
+    Core.Compiler.setindex!(add_stmt, Core.Compiler.SSAValue(node.id+1), :stmt)
     str = sprint(; context=:color=>true) do io
         show(io, ir)
     end
@@ -2635,3 +2824,83 @@ let buf = IOBuffer()
     Base.show_tuple_as_call(buf, Symbol(""), Tuple{Function,Any})
     @test String(take!(buf)) == "(::Function)(::Any)"
 end
+
+module Issue49382
+    abstract type Type49382 end
+end
+using .Issue49382
+(::Type{Issue49382.Type49382})() = 1
+@test sprint(show, methods(Issue49382.Type49382)) isa String
+
+# Showing of bad SlotNumber in Expr(:toplevel)
+let lowered = Meta.lower(Main, Expr(:let, Expr(:block), Expr(:block, Expr(:toplevel, :(x = 1)), :(y = 1))))
+    ci = lowered.args[1]
+    @assert isa(ci, Core.CodeInfo)
+    @test !isempty(ci.slotnames)
+    @assert ci.code[1].head === :toplevel
+    ci.code[1].args[1] = :($(Core.SlotNumber(1)) = 1)
+    # Check that this gets printed as `_1 = 1` not `y = 1`
+    @test contains(sprint(show, ci), "_1 = 1")
+end
+
+# Pointers should be reprable
+@test is_juliarepr(pointer([1]))
+@test is_juliarepr(Ptr{Vector{Complex{Float16}}}(UInt(0xdeadbeef)))
+
+# Toplevel MethodInstance with undef :uninferred
+let topmi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
+    topmi.specTypes = Tuple{}
+    topmi.def = Main
+    @test contains(repr(topmi), "Toplevel MethodInstance")
+end
+
+@testset "show(<do-block expr>) no trailing whitespace" begin
+    do_expr1 = :(foo() do; bar(); end)
+    @test !contains(sprint(show, do_expr1), " \n")
+end
+
+struct NoLengthDict{K,V} <: AbstractDict{K,V}
+    dict::Dict{K,V}
+    NoLengthDict{K,V}() where {K,V} = new(Dict{K,V}())
+end
+Base.iterate(d::NoLengthDict, s...) = iterate(d.dict, s...)
+Base.IteratorSize(::Type{<:NoLengthDict}) = Base.SizeUnknown()
+Base.eltype(::Type{NoLengthDict{K,V}}) where {K,V} = Pair{K,V}
+Base.setindex!(d::NoLengthDict, v, k) = d.dict[k] = v
+
+# Issue 55931
+@testset "show AbstractDict with unknown length" begin
+    x = NoLengthDict{Int,Int}()
+    x[1] = 2
+    str = sprint(io->show(io, MIME("text/plain"), x))
+    @test contains(str, "NoLengthDict")
+    @test contains(str, "1 => 2")
+end
+
+# Issue 56936
+@testset "code printing of var\"keyword\" identifiers" begin
+    @test_repr """:(var"do" = 1)"""
+    @weak_test_repr """:(let var"let" = 1; var"let"; end)"""
+end
+
+# Issue 57076
+@testset "show raw string given var\"str\"" begin
+    # In show_sym, only backslashes and quotes should be escaped when printing var"this".
+    @test_repr """:(var"\$" = 1)"""
+    @test_repr """:(var"\\"" = 1)""" # var name is one quote character
+    @test_repr """:(var"~!@#\$%^&*[]_+?" = 1)"""
+    @test_repr """:(var"\a\b\t\n\v\f\r\e" = 1)"""
+    @test_repr """:(var"\x01\u03c0\U03c0" = 1)"""
+end
+
+# test `print_signature_only::Bool` argument of `Base.show_method`
+f_show_method(x::T) where T<:Integer = :integer
+let m = only(methods(f_show_method))
+    let io = IOBuffer()
+        Base.show_method(io, m; print_signature_only=true)
+        @test "f_show_method(x::T) where T<:Integer" == String(take!(io))
+    end
+    let s = sprint(show, m; context=:print_method_signature_only=>true)
+        @test "f_show_method(x::T) where T<:Integer" == s
+    end
+end
diff --git a/test/smallarrayshrink.jl b/test/smallarrayshrink.jl
index a1a7df5aee5a5..680a882e432d4 100644
--- a/test/smallarrayshrink.jl
+++ b/test/smallarrayshrink.jl
@@ -1,45 +1,20 @@
 @testset "shrink small array" begin
-    x = [1, 2, 3, 4]
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
-    sizehint!(x, 10000)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 10000
-    sizehint!(x, 4)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
-
-    x = [1, 2, 3, 4]
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
-    sizehint!(x, 1000000)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 1000000
-    sizehint!(x, 4)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
+    function check_array(x, size, capacity)
+        @test x[1] == 1
+        @test x[2] == 2
+        @test x[3] == 3
+        @test x[4] == 4
+        @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == size
+        @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == capacity
+    end
+    for hint_size = [10000, 1000000]
+        x = [1, 2, 3, 4]
+        check_array(x, 4, 4)
+        sizehint!(x, hint_size)
+        check_array(x, 4, hint_size)
+        sizehint!(x, 4; shrink = false)
+        check_array(x, 4, hint_size)
+        sizehint!(x, 4)
+        check_array(x, 4, 4)
+    end
 end
diff --git a/test/some.jl b/test/some.jl
index e49fc586a3a6e..c677458fab002 100644
--- a/test/some.jl
+++ b/test/some.jl
@@ -3,7 +3,7 @@
 ## promote()
 
 @test promote_type(Some{Int}, Some{Float64}) == Some
-@test promote_type(Some{Int}, Some{Real}) == Some{Real}
+@test promote_type(Some{Int}, Some{Real}) == Some
 @test promote_type(Some{Int}, Nothing) == Union{Some{Int},Nothing}
 
 ## convert()
@@ -44,11 +44,37 @@
 
 ##  == and isequal nothing
 
-@test Some(1) != nothing
-@test Some(nothing) != nothing
+@test Some(1) !== nothing
+@test Some(nothing) !== nothing
 @test !isequal(Some(1), nothing)
 @test !isequal(Some(nothing), nothing)
 
+# Some with something else is false
+@test !=(Some(nothing), nothing)
+@test !=(nothing, Some(nothing))
+
+# Two `Some`s forward to their wrapped things
+@test ==(Some([0x1]), Some([1]))
+
+# propagate wrapped missings
+@test !=(Some(1), Some(missing)) isa Missing
+@test !=(Some(missing), Some(1)) isa Missing
+@test ==(Some(missing), Some(missing)) isa Missing
+
+# Make sure to still propagate non-wrapped Missing
+@test ==(Some(1), missing) isa Missing
+@test ==(missing, Some(1)) isa Missing
+
+@test isequal(Some([0x1]), Some([1]))
+@test !isequal(missing, Some(missing))
+@test !isequal(Some(missing), missing)
+@test isequal(Some(missing), Some(missing))
+
+# hashing implications
+@test hash(Some(0x1)) != hash(0x1)
+@test hash(Some(0x1)) == hash(Some(1))
+@test hash((Some(1),)) != hash((1, Some))
+
 @testset "something" begin
     @test_throws ArgumentError something()
     @test something(1) === 1
diff --git a/test/sorting.jl b/test/sorting.jl
index 147a70a5db7d9..15794abcbec25 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -9,6 +9,12 @@ using Test
 isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
 using .Main.OffsetArrays
 
+@testset "Base.Sort docstrings" begin
+    undoc = Docs.undocumented_names(Base.Sort)
+    @test_broken isempty(undoc)
+    @test undoc == [:Algorithm, :SMALL_THRESHOLD, :Sort]
+end
+
 @testset "Order" begin
     @test Forward == ForwardOrdering()
     @test ReverseOrdering(Forward) == ReverseOrdering() == Reverse
@@ -94,12 +100,22 @@ function tuple_sort_test(x)
     @test 0 == @allocated sort(x)
 end
 @testset "sort(::NTuple)" begin
+    @test sort(()) == ()
     @test sort((9,8,3,3,6,2,0,8)) == (0,2,3,3,6,8,8,9)
     @test sort((9,8,3,3,6,2,0,8), by=x->x÷3) == (2,0,3,3,8,6,8,9)
     for i in 1:40
-        tuple_sort_test(tuple(rand(i)...))
+        tuple_sort_test(rand(NTuple{i, Float64}))
     end
-    @test_throws ArgumentError sort((1,2,3.0))
+    @test_throws MethodError sort((1,2,3.0))
+    @test Base.infer_return_type(sort, Tuple{Tuple{Vararg{Int}}}) == Tuple{Vararg{Int}}
+end
+
+@testset "KeySet and ValueIterator" begin
+    x = Dict(rand() => randstring() for _ in 1:10)
+    x0 = deepcopy(x)
+    @test issorted(sort(keys(x))::Vector{Float64})
+    @test issorted(sort(values(x))::Vector{String})
+    @test x == x0
 end
 
 @testset "partialsort" begin
@@ -544,23 +560,6 @@ end
     @test isequal(a, [8,6,7,NaN,5,3,0,9])
 end
 
-@testset "sort!(iterable)" begin
-    gen = (x % 7 + 0.1x for x in 1:50)
-    @test sort(gen) == sort!(collect(gen))
-    gen = (x % 7 + 0.1y for x in 1:10, y in 1:5)
-    @test sort(gen; dims=1) == sort!(collect(gen); dims=1)
-    @test sort(gen; dims=2) == sort!(collect(gen); dims=2)
-
-    @test_throws ArgumentError("dimension out of range") sort(gen; dims=3)
-
-    @test_throws UndefKeywordError(:dims) sort(gen)
-    @test_throws UndefKeywordError(:dims) sort(collect(gen))
-    @test_throws UndefKeywordError(:dims) sort!(collect(gen))
-
-    @test_throws ArgumentError sort("string")
-    @test_throws ArgumentError("1 cannot be sorted") sort(1)
-end
-
 @testset "sort!(::AbstractVector{<:Integer}) with short int range" begin
     a = view([9:-1:0;], :)::SubArray
     sort!(a)
@@ -606,6 +605,26 @@ end
     @test searchsortedfirst(o, 1.5) == 0
     @test searchsortedlast(o, 0) == firstindex(o) - 1
     @test searchsortedlast(o, 1.5) == -1
+
+    # Issue #56457
+    o2 = OffsetArray([2,2,3], typemax(Int)-3);
+    @test searchsorted(o2, 2) == firstindex(o2):firstindex(o2)+1
+
+    struct IdentityVector <: AbstractVector{Int}
+        lo::Int
+        hi::Int
+    end
+    function Base.getindex(s::IdentityVector, i::Int)
+        s.lo <= i <= s.hi || throw(BoundsError(s, i))
+        i
+    end
+    Base.axes(s::IdentityVector) = (s.lo:s.hi,)
+    Base.size(s::IdentityVector) = length.(axes(s))
+
+    o3 = IdentityVector(typemin(Int), typemin(Int)+5)
+    @test searchsortedfirst(o3, typemin(Int)+2) === typemin(Int)+2
+    @test searchsortedlast(o3, typemin(Int)+2) === typemin(Int)+2
+    @test searchsorted(o3, typemin(Int)+2) === typemin(Int)+2:typemin(Int)+2
 end
 
 function adaptive_sort_test(v; trusted=InsertionSort, kw...)
@@ -735,6 +754,7 @@ end
     safe_algs = [InsertionSort, MergeSort, Base.Sort.ScratchQuickSort(), Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
 
     n = 1000
+    Random.seed!(0x3588d23f15e74060);
     v = rand(1:5, n);
     s = sort(v);
 
@@ -752,8 +772,9 @@ end
     for alg in safe_algs
         @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm
     end
-    @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172]
-    @test partialsort(1:n, 315:415, lt = (i,j) -> v[i]<=v[j]) == perm[315:415]
+    # Broken by the introduction of BracketedSort in #52006 which is unstable
+    # @test_broken partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172] (sometimes passes due to RNG)
+    @test_broken partialsort(1:n, 315:415, lt = (i,j) -> v[i]<=v[j]) == perm[315:415]
 
     # lt can be very poorly behaved and sort will still permute its input in some way.
     for alg in safe_algs
@@ -787,6 +808,16 @@ end
     end
 end
 
+@testset "partialsort(x; scratch)" begin
+    for n in [1,10,100,1000]
+        v = rand(n)
+        scratch = [0.0]
+        k = n ÷ 2 + 1
+        @test partialsort(v, k) == partialsort(v, k; scratch)
+        @test partialsort!(copy(v), k) == partialsort!(copy(v), k; scratch)
+    end
+end
+
 @testset "sorting preserves identity" begin
     a = BigInt.([2, 2, 2, 1, 1, 1]) # issue #39620
     sort!(a)
@@ -822,9 +853,9 @@ end
     let
         requires_uint_mappable = Union{Base.Sort.RadixSort, Base.Sort.ConsiderRadixSort,
             Base.Sort.CountingSort, Base.Sort.ConsiderCountingSort,
-            typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes),
-            typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big),
-            typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big.next)}
+            typeof(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS.next.next.next.big.next.yes),
+            typeof(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS.next.next.next.big.next.yes.big),
+            typeof(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS.next.next.next.big.next.yes.big.next)}
 
         function test_alg(kw, alg, float=true)
             for order in [Base.Forward, Base.Reverse, Base.By(x -> x^2)]
@@ -864,15 +895,18 @@ end
             end
         end
 
-        test_alg_rec(Base.DEFAULT_STABLE)
+        test_alg_rec(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS)
     end
 end
 
 @testset "show(::Algorithm)" begin
-    @test eval(Meta.parse(string(Base.DEFAULT_STABLE))) === Base.DEFAULT_STABLE
-    lines = split(string(Base.DEFAULT_STABLE), '\n')
+    @test eval(Meta.parse(string(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS))) === Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS
+    lines = split(string(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS), '\n')
     @test 10 < maximum(length, lines) < 100
     @test 1 < length(lines) < 30
+
+    @test eval(Meta.parse(string(Base.DEFAULT_STABLE))) === Base.DEFAULT_STABLE
+    @test string(Base.DEFAULT_STABLE) == "Base.Sort.DefaultStable()"
 end
 
 @testset "Extensibility" begin
@@ -913,6 +947,20 @@ end
     end
     @test sort([1,2,3], alg=MySecondAlg()) == [9,9,9]
     @test all(sort(v, alg=Base.Sort.InitialOptimizations(MySecondAlg())) .=== vcat(fill(9, 100), fill(missing, 10)))
+
+    # Tuple extensions (custom alg)
+    @test_throws MethodError sort((1,2,3), alg=MyFirstAlg())
+    Base.Sort._sort(v::NTuple, ::MyFirstAlg, o::Base.Order.Ordering, kw) = (17,2,9)
+    @test sort((1,2,3), alg=MyFirstAlg()) == (17,2,9)
+
+    struct TupleFoo
+        x::Int
+    end
+
+    # Tuple extensions (custom type)
+    @test_throws MethodError sort(TupleFoo.((3,1,2)))
+    Base.Sort._sort(v::NTuple{N, TupleFoo}, ::Base.Sort.DefaultStable, o::Base.Order.Ordering, kw) where N = v
+    @test sort(TupleFoo.((3,1,2))) === TupleFoo.((3,1,2))
 end
 
 @testset "sort!(v, lo, hi, alg, order)" begin
@@ -971,9 +1019,10 @@ end
 end
 
 @testset "ScratchQuickSort allocations on non-concrete eltype" begin
-    v = Vector{Union{Nothing, Bool}}(rand(Bool, 10000))
-    @test 4 == @allocations sort(v)
-    @test 4 == @allocations sort(v; alg=Base.Sort.ScratchQuickSort())
+    let v = Vector{Union{Nothing, Bool}}(rand(Bool, 10000))
+        @test 10 > @allocations sort(v)
+        @test 10 > @allocations sort(v; alg=Base.Sort.ScratchQuickSort())
+    end
     # it would be nice if these numbers were lower (1 or 2), but these
     # test that we don't have O(n) allocations due to type instability
 end
@@ -981,15 +1030,15 @@ end
 function test_allocs()
     v = rand(10)
     i = randperm(length(v))
-    @test 1 == @allocations sort(v)
+    @test 2 >= @allocations sort(v)
     @test 0 == @allocations sortperm!(i, v)
     @test 0 == @allocations sort!(i)
     @test 0 == @allocations sortperm!(i, v, rev=true)
-    @test 1 == @allocations sortperm(v, rev=true)
-    @test 1 == @allocations sortperm(v, rev=false)
+    @test 2 >= @allocations sortperm(v, rev=true)
+    @test 2 >= @allocations sortperm(v, rev=false)
     @test 0 == @allocations sortperm!(i, v, order=Base.Reverse)
-    @test 1 == @allocations sortperm(v)
-    @test 1 == @allocations sortperm(i, by=sqrt)
+    @test 2 >= @allocations sortperm(v)
+    @test 2 >= @allocations sortperm(i, by=sqrt)
     @test 0 == @allocations sort!(v, lt=(a, b) -> hash(a) < hash(b))
     sort!(Int[], rev=false) # compile
     @test 0 == @allocations sort!(i, rev=false)
@@ -1065,6 +1114,37 @@ end
     @test issorted(sort!(rand(100), Base.Sort.InitialOptimizations(DispatchLoopTestAlg()), Base.Order.Forward))
 end
 
+# Pathologize 0 is a noop, pathologize 3 is fully pathological
+function pathologize!(x, level)
+    Base.require_one_based_indexing(x)
+    k2 = Int(cbrt(length(x))^2)
+    seed = hash(length(x), Int === Int64 ? 0x85eb830e0216012d : 0xae6c4e15)
+    for a in 1:level
+        seed = hash(a, seed)
+        x[mod.(hash.(1:k2, seed), range.(1:k2,lastindex(x)))] .= a
+    end
+    x
+end
+
+@testset "partialsort tests added for BracketedSort #52006" begin
+    for x in [pathologize!.(Ref(rand(Int, 1000)), 0:3); pathologize!.(Ref(rand(1000)), 0:3); [pathologize!(rand(Int, 1_000_000), 3)]]
+        @test partialsort(x, 1) == minimum(x)
+        @test partialsort(x, lastindex(x)) == maximum(x)
+        sx = sort(x)
+        for i in [1, 2, 4, 10, 11, 425, 500, 845, 991, 997, 999, 1000]
+            @test partialsort(x, i) == sx[i]
+        end
+        for i in [1:1, 1:2, 1:5, 1:8, 1:9, 1:11, 1:108, 135:812, 220:586, 363:368, 450:574, 458:597, 469:638, 487:488, 500:501, 584:594, 1000:1000]
+            @test partialsort(x, i) == sx[i]
+        end
+    end
+end
+
+@testset "partialsort! for UnwrappableSubArray with non-zero offset on 1.11 (#59569)" begin
+    a = reshape(6000:-1:1, 1000, :) |> collect;
+    @test partialsort!(view(copy(a), :, 6), 500:501) == [500, 501]
+end
+
 # This testset is at the end of the file because it is slow.
 @testset "searchsorted" begin
     numTypes = [ Int8,  Int16,  Int32,  Int64,  Int128,
@@ -1225,6 +1305,16 @@ end
             @test searchsorted(v, 0.1, rev=true) === 4:3
         end
     end
+
+    @testset "ranges issue #44102, PR #50365" begin
+        # range sorting test for different Ordering parameter combinations
+        @test searchsorted(-1000.0:1:1000, -0.0) === 1001:1000
+        @test searchsorted(-1000.0:1:1000, -0.0; lt=<) === 1001:1001
+        @test searchsorted(-1000.0:1:1000, -0.0; lt=<, by=x->x) === 1001:1001
+        @test searchsorted(reverse(-1000.0:1:1000), -0.0; lt=<, by=-) === 1001:1001
+        @test searchsorted(reverse(-1000.0:1:1000), -0.0, rev=true) === 1002:1001
+        @test searchsorted(reverse(-1000.0:1:1000), -0.0; lt=<, rev=true) === 1001:1001
+    end
 end
 # The "searchsorted" testset is at the end of the file because it is slow.
 
diff --git a/test/spawn.jl b/test/spawn.jl
index 0241c65573886..0a3a84ad9690b 100644
--- a/test/spawn.jl
+++ b/test/spawn.jl
@@ -4,7 +4,7 @@
 # Cross Platform tests for spawn. #
 ###################################
 
-using Random, Sockets
+using Random, Sockets, SHA
 using Downloads: Downloads, download
 
 valgrind_off = ccall(:jl_running_on_valgrind, Cint, ()) == 0
@@ -21,9 +21,11 @@ sleepcmd = `sleep`
 lscmd = `ls`
 havebb = false
 
+busybox_hash_correct(file) = bytes2hex(open(SHA.sha256, file)) == "ed2f95da9555268e93c7af52feb48e148534ee518b9128f65dda9a2767b61b9e"
+
 function _tryonce_download_from_cache(desired_url::AbstractString)
-    cache_url = "https://cache.julialang.org/foo/$(desired_url)"
-    cache_output_filename = joinpath(mktempdir(), "myfile")
+    cache_url = "https://cache.julialang.org/$(desired_url)"
+    cache_output_filename = joinpath(mktempdir(), "busybox" * (Sys.iswindows() ? ".exe" : ""))
     cache_response = Downloads.request(
         cache_url;
         output = cache_output_filename,
@@ -32,9 +34,14 @@ function _tryonce_download_from_cache(desired_url::AbstractString)
     )
     if cache_response isa Downloads.Response
         if Downloads.status_ok(cache_response.proto, cache_response.status)
-            return cache_output_filename
+            if busybox_hash_correct(cache_output_filename)
+                return cache_output_filename
+            else
+                @warn "The busybox executable downloaded from the cache has an incorrect hash" cache_output_filename bytes2hex(open(SHA.sha256, cache_output_filename))
+            end
         end
     end
+    @warn "Could not download from cache at $cache_url, falling back to primary source at $desired_url"
     return Downloads.download(desired_url; timeout = 60)
 end
 
@@ -46,7 +53,11 @@ function download_from_cache(desired_url::AbstractString)
 end
 
 if Sys.iswindows()
-    busybox = download_from_cache("https://frippery.org/files/busybox/busybox.exe")
+    # See https://frippery.org/files/busybox/
+    # latest as of 2024-09-20 18:08
+    busybox = download_from_cache("https://frippery.org/files/busybox/busybox-w32-FRP-5467-g9376eebd8.exe")
+    busybox_hash_correct(busybox) || error("The busybox executable downloaded has an incorrect hash")
+
     havebb = try # use busybox-w32 on windows, if available
         success(`$busybox`)
         true
@@ -78,7 +89,7 @@ out = read(`$echocmd hello` & `$echocmd world`, String)
 @test occursin("hello", out)
 @test read(pipeline(`$echocmd hello` & `$echocmd world`, sortcmd), String) == "hello\nworld\n"
 
-@test (run(`$printfcmd "       \033[34m[stdio passthrough ok]\033[0m\n"`); true)
+@test_warn r"[stdio passthrough ok]" run(pipeline(`$printfcmd "       \033[34m[stdio passthrough ok]\033[0m\n"`, stdout=stderr, stderr=stderr))
 
 # Test for SIGPIPE being a failure condition
 @test_throws ProcessFailedException run(pipeline(yescmd, `head`, devnull))
@@ -113,6 +124,7 @@ end
 @test !success(ignorestatus(falsecmd) & falsecmd)
 @test_broken  success(ignorestatus(pipeline(falsecmd, falsecmd)))
 @test_broken  success(ignorestatus(falsecmd & falsecmd))
+@test run(ignorestatus(pipeline(falsecmd; stderr=devnull, stdout=devnull))).exitcode != 0
 
 # stdin Redirection
 let file = tempname()
@@ -573,7 +585,19 @@ end
 @test Cmd(`foo`, env=["A=true"]).env      == ["A=true"]
 @test Cmd(`foo`, env=("A"=>true,)).env    == ["A=true"]
 @test Cmd(`foo`, env=["A"=>true]).env     == ["A=true"]
-@test Cmd(`foo`, env=nothing).env         == nothing
+@test Cmd(`foo`, env=nothing).env         === nothing
+
+# uid/gid - exercise code path with current effective ids (doesn't test privilege change)
+if !Sys.iswindows()
+    @test success(setuid(setgid(`$(Base.julia_cmd()) -e "exit(0)"`, Libc.getegid()), Libc.geteuid()))
+    # test show method for uid/gid
+    cmd_gid = setgid(`echo test`, 1000)
+    @test string(cmd_gid) == "setgid(`echo test`, 1000)"
+    cmd_uid = setuid(`echo test`, 1001)
+    @test string(cmd_uid) == "setuid(`echo test`, 1001)"
+    cmd_both = setuid(setgid(`echo test`, 1000), 1001)
+    @test string(cmd_both) == "setgid(setuid(`echo test`, 1001), 1000)"
+end
 
 # test for interpolation of Cmd
 let c = setenv(`x`, "A"=>true)
@@ -609,7 +633,9 @@ end
 @test reduce(&, [`$echocmd abc`, `$echocmd def`, `$echocmd hij`]) == `$echocmd abc` & `$echocmd def` & `$echocmd hij`
 
 # readlines(::Cmd), accidentally broken in #20203
-@test sort(readlines(`$lscmd -A`)) == sort(readdir())
+let str = "foo\nbar"
+    @test readlines(`$echocmd $str`) == split(str)
+end
 
 # issue #19864 (PR #20497)
 let c19864 = readchomp(pipeline(ignorestatus(
@@ -660,9 +686,21 @@ let p = run(`$sleepcmd 100`, wait=false)
     kill(p)
 end
 
-# Second argument of shell_parse
+# Second return of shell_parse
 let s = "   \$abc   "
-    @test s[Base.shell_parse(s)[2]] == "abc"
+    @test Base.shell_parse(s)[2] === findfirst('a', s)
+    s = "abc def"
+    @test Base.shell_parse(s)[2] === findfirst('d', s)
+    s = "abc 'de'f\"\"g"
+    @test Base.shell_parse(s)[2] === findfirst('\'', s)
+    s = "abc \$x'de'f\"\"g"
+    @test Base.shell_parse(s)[2] === findfirst('\'', s)
+    s = "abc def\$x'g'"
+    @test Base.shell_parse(s)[2] === findfirst('\'', s)
+    s = "abc def\$x "
+    @test Base.shell_parse(s)[2] === findfirst('x', s)
+    s = "abc \$(d)ef\$(x "
+    @test Base.shell_parse(s)[2] === findfirst('x', s) - 1
 end
 
 # Logging macros should not output to finalized streams (#26687)
@@ -795,8 +833,9 @@ let text = "input-test-text"
     out = Base.BufferStream()
     proc = run(catcmd, IOBuffer(text), out, wait=false)
     @test proc.out === out
-    @test read(out, String) == text
     @test success(proc)
+    closewrite(out)
+    @test read(out, String) == text
 
     out = PipeBuffer()
     proc = run(catcmd, IOBuffer(SubString(text)), out)
@@ -1003,5 +1042,62 @@ end
     args = ["ab ^` c", " \" ", "\"", ascii95, ascii95,
             "\"\\\"\\", "", "|", "&&", ";"];
     @test Base.shell_escape_wincmd(Base.escape_microsoft_c_args(args...)) == "\"ab ^` c\" \" \\\" \" \"\\\"\" \" !\\\"#\$%^&'^(^)*+,-./0123456789:;^<=^>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^^_`abcdefghijklmnopqrstuvwxyz{^|}~\" \" ^!\\\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\" \"\\\"\\\\\\\"\\\\\" \"\" ^| ^&^& ;"
+end
+
+# effects for Cmd construction
+for f in (() -> `a b c`, () -> `a a$("bb")a $("c")`)
+    effects = Base.infer_effects(f)
+    @test Core.Compiler.is_effect_free(effects)
+    @test Core.Compiler.is_terminates(effects)
+    @test Core.Compiler.is_noub(effects)
+    @test !Core.Compiler.is_consistent(effects)
+end
+let effects = Base.infer_effects(x -> `a $x`, (Any,))
+    @test !Core.Compiler.is_effect_free(effects)
+    @test !Core.Compiler.is_terminates(effects)
+    @test !Core.Compiler.is_noub(effects)
+    @test !Core.Compiler.is_consistent(effects)
+end
 
+# Test that Cmd accepts various AbstractStrings
+@testset "AbstractStrings" begin
+    args = split("-l /tmp")
+    @assert eltype(args) != String
+    @test Cmd(["ls", args...]) == `ls -l /tmp`
+end
+
+let buf = IOBuffer()
+    run(pipeline(`$(Base.julia_cmd()) -e 'println(Base.PipeEndpoint(RawFD(3)), "Hello")'`, 3=>buf))
+    @test String(take!(buf)) == "Hello\n"
+end
+
+# Test passing a pipe server as an addition fd
+@testset "Pipe server as additional fd" begin
+    if !Sys.iswindows()
+        # Windows CRT does not support passing server sockets as stdio fds
+        mktempdir() do dir
+            path = joinpath(dir, "test.sock")
+            server = Sockets.PipeServer()
+            bind(server, path)
+            Base.errormonitor(@async begin
+                local client
+                while true
+                    try
+                        client = Sockets.connect(path)
+                        break
+                    catch e
+                        isa(e, Base.IOError) || rethrow(e)
+                    end
+                    sleep(1)
+                end
+                println(client, "Hello Socket!")
+                closewrite(client)
+            end)
+            buf = IOBuffer()
+            proc = run(`$(Base.julia_cmd()) -e 'using Sockets; s = listen(Sockets.PipeServer(RawFD(3))); c = accept(s); print(read(c, String))'`, devnull, buf, stderr, server)
+            close(server)
+            @test success(proc)
+            @test String(take!(buf)) == "Hello Socket!\n"
+        end
+    end
 end
diff --git a/test/specificity.jl b/test/specificity.jl
index 9b605444bad42..46253d02b0c7f 100644
--- a/test/specificity.jl
+++ b/test/specificity.jl
@@ -1,9 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function args_morespecific(a, b)
-    sp = (ccall(:jl_type_morespecific, Cint, (Any,Any), a, b) != 0)
+    sp = Base.morespecific(a, b)
     if sp  # make sure morespecific(a,b) implies !morespecific(b,a)
-        @test ccall(:jl_type_morespecific, Cint, (Any,Any), b, a) == 0
+        @test !Base.morespecific(b, a)
     end
     return sp
 end
@@ -170,7 +170,7 @@ let A = Tuple{Ref, Tuple{T}} where T,
 end
 
 # issue #22339
-let A = Tuple{T, Array{T, 1}} where T,
+let A = Tuple{T, Vector{T}} where T,
     B = Tuple{T} where T,
     C = Tuple{T} where T<:AbstractFloat
     @test args_morespecific(B, A)
@@ -316,3 +316,14 @@ end
 @test args_morespecific(Tuple{typeof(Union{}), Any}, Tuple{Any, Type{Union{}}})
 @test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any}, Tuple{Type{Union{}}, Any, Type{Union{}}})
 @test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any, Type{Union{}}}, Tuple{Type{Union{}}, Any, Type{Union{}}, Type{Union{}}})
+
+# requires assertions enabled
+let root = NTuple
+    N = root.var
+    T = root.body.var
+    x1 = root.body.body
+    x2 = Dict{T,Tuple{N}}
+    A = UnionAll(N, UnionAll(T, Tuple{Union{x1, x2}}))
+    B = Tuple{Union{UnionAll(N, UnionAll(T, x1)), UnionAll(N, UnionAll(T, x2))}}
+    @ccall jl_type_morespecific_no_subtype(A::Any, B::Any)::Cint
+end
diff --git a/test/stacktraces.jl b/test/stacktraces.jl
index 590abb90c590f..028588bb73a53 100644
--- a/test/stacktraces.jl
+++ b/test/stacktraces.jl
@@ -90,10 +90,12 @@ f(x) = (y = h(x); y)
 trace = (try; f(3); catch; stacktrace(catch_backtrace()); end)[1:3]
 can_inline = Bool(Base.JLOptions().can_inline)
 for (frame, func, inlined) in zip(trace, [g,h,f], (can_inline, can_inline, false))
-    @test frame.func === typeof(func).name.mt.name
-    @test frame.linfo.def.module === which(func, (Any,)).module
-    @test frame.linfo.def === which(func, (Any,))
-    @test frame.linfo.specTypes === Tuple{typeof(func), Int}
+    @test frame.func === typeof(func).name.singletonname
+    # broken until #50082 can be addressed
+    mi = isa(frame.linfo, Core.CodeInstance) ? frame.linfo.def : frame.linfo
+    @test mi.def.module === which(func, (Any,)).module broken=inlined
+    @test mi.def === which(func, (Any,)) broken=inlined
+    @test mi.specTypes === Tuple{typeof(func), Int} broken=inlined
     # line
     @test frame.file === Symbol(@__FILE__)
     @test !frame.from_c
@@ -101,21 +103,16 @@ for (frame, func, inlined) in zip(trace, [g,h,f], (can_inline, can_inline, false
 end
 end
 
-let src = Meta.lower(Main, quote let x = 1 end end).args[1]::Core.CodeInfo,
-    li = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ()),
-    sf
-
-    setfield!(li, :uninferred, src, :monotonic)
-    li.specTypes = Tuple{}
-    li.def = @__MODULE__
+let src = Meta.lower(Main, quote let x = 1 end end).args[1]::Core.CodeInfo
+    li = ccall(:jl_method_instance_for_thunk, Ref{Core.MethodInstance}, (Any, Any), src, @__MODULE__)
     sf = StackFrame(:a, :b, 3, li, false, false, 0)
     repr = string(sf)
     @test repr == "Toplevel MethodInstance thunk at b:3"
 end
-let li = typeof(fieldtype).name.mt.cache.func::Core.MethodInstance,
+let li = only(methods(fieldtype)).unspecialized,
     sf = StackFrame(:a, :b, 3, li, false, false, 0),
     repr = string(sf)
-    @test repr == "fieldtype(...) at b:3"
+    @test repr == "fieldtype(::Vararg{Any}) at b:3"
 end
 
 let ctestptr = cglobal((:ctest, "libccalltest")),
@@ -159,6 +156,22 @@ end
 @test bt[1].line == topline+4
 end
 
+# Accidental incorrect phi block computation in interpreter
+global global_false_bool = false
+let bt, topline = @__LINE__
+    try
+        let
+            global read_write_global_bt_test, global_false_bool
+            if global_false_bool
+            end
+            (read_write_global_bt_test, (read_write_global_bt_test=2;))
+        end
+    catch
+        bt = stacktrace(catch_backtrace())
+    end
+    @test bt[1].line == topline+6
+end
+
 # issue #28990
 let bt
 try
@@ -234,15 +247,54 @@ struct F49231{a,b,c,d,e,f,g} end
     catch e
         stacktrace(catch_backtrace())
     end
-    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :color=>true, :displaysize=>(50,105)))
-    @test endswith(str, "to see complete types.")
-    @test contains(str, "[5] \e[0m\e[1mcollect_to!\e[22m\e[0m\e[1m(\e[22m\e[90mdest\e[39m::\e[0mVector\e[90m{…}\e[39m, \e[90mitr\e[39m::\e[0mBase.Generator\e[90m{…}\e[39m, \e[90moffs\e[39m::\e[0m$Int, \e[90mst\e[39m::\e[0mTuple\e[90m{…}\e[39m\e[0m\e[1m)\e[22m\n\e[90m")
+    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :stacktrace_types_limited => Ref(false), :color=>true, :displaysize=>(50,105)))
+    @test contains(str, "[5] \e[0m\e[1mcollect_to!\e[22m\e[0m\e[1m(\e[22m\e[90mdest\e[39m::\e[0mVector\e[90m{…}\e[39m, \e[90mitr\e[39m::\e[0mBase.Generator\e[90m{…}\e[39m, \e[90moffs\e[39m::\e[0m$Int, \e[90mst\e[39m::\e[0m$Int\e[0m\e[1m)\e[22m\n")
 
     st = try
         F49231{Vector,Val{'}'},Vector{Vector{Vector{Vector}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}()(1,2,3)
     catch e
         stacktrace(catch_backtrace())
     end
-    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :color=>true, :displaysize=>(50,132)))
-    @test contains(str, "[2] \e[0m\e[1m(::$F49231{Vector, Val{…}, Vector{…}, NTuple{…}, $Int, $Int, $Int})\e[22m\e[0m\e[1m(\e[22m\e[90ma\e[39m::\e[0m$Int, \e[90mb\e[39m::\e[0m$Int, \e[90mc\e[39m::\e[0m$Int\e[0m\e[1m)\e[22m\n\e[90m")
+    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :stacktrace_types_limited => Ref(false), :color=>true, :displaysize=>(50,132)))
+    @test contains(str, "[2] \e[0m\e[1m(::$F49231{Vector, Val{…}, Vector{…}, NTuple{…}, $Int, $Int, $Int})\e[22m\e[0m\e[1m(\e[22m\e[90ma\e[39m::\e[0m$Int, \e[90mb\e[39m::\e[0m$Int, \e[90mc\e[39m::\e[0m$Int\e[0m\e[1m)\e[22m\n")
+end
+
+@testset "Base.StackTraces docstrings" begin
+    @test isempty(Docs.undocumented_names(StackTraces))
+end
+
+
+@testset "Dispatch backtraces" begin
+    # Check that it's possible to capture a backtrace upon entrance to inference
+    # This test ensures that SnoopCompile will continue working
+    # See in particular SnoopCompile/SnoopCompileCore/src/snoop_inference.jl
+    # and the "diagnostics" devdoc.
+    @noinline callee(x::Int) = sin(x)
+    caller(x) = invokelatest(callee, x)
+
+    @test sin(0) == 0  # force compilation of sin(::Int)
+    dispatch_backtraces = []
+    ccall(:jl_set_inference_entrance_backtraces, Cvoid, (Any,), dispatch_backtraces)
+    caller(3)
+    ccall(:jl_set_inference_entrance_backtraces, Cvoid, (Any,), nothing)
+    ln = @__LINE__() - 2
+    fl = Symbol(@__FILE__())
+    @test length(dispatch_backtraces) == 4  # 2 ci-backtrace pairs, stored as 4 separate elements
+    mcallee, mcaller = only(methods(callee)), only(methods(caller))
+    # Extract pairs from the flattened array format: ci at odd indices, backtrace at even indices
+    pairs = [(dispatch_backtraces[i], dispatch_backtraces[i+1]) for i in 1:2:length(dispatch_backtraces)]
+    @test any(pairs) do (ci, trace)
+        # trace is a SimpleVector from jl_backtrace_from_here, need to reformat before stacktrace
+        bt = Base._reformat_bt(trace[1], trace[2])
+        ci.def.def === mcallee && any(stacktrace(bt)) do sf
+            sf.file == fl && sf.line == ln
+        end
+    end
+    @test any(pairs) do (ci, trace)
+        # trace is a SimpleVector from jl_backtrace_from_here, need to reformat before stacktrace
+        bt = Base._reformat_bt(trace[1], trace[2])
+        ci.def.def === mcaller && any(stacktrace(bt)) do sf
+            sf.file == fl && sf.line == ln
+        end
+    end
 end
diff --git a/test/staged.jl b/test/staged.jl
index df351d8d47b96..4fb5d03331711 100644
--- a/test/staged.jl
+++ b/test/staged.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# N.B.: This file is also run from interpreter.jl, so needs to be standalone-executable
+using Test
+
 using Random
 using InteractiveUtils: code_llvm, code_native
 
@@ -200,7 +203,7 @@ let gf_err2
     @test_throws Expected gf_err2(code_typed)
     @test_throws Expected gf_err2(code_llvm)
     @test_throws Expected gf_err2(code_native)
-    @test gf_err_ref[] == 88
+    @test gf_err_ref[] < 1000
 end
 
 # issue #15043
@@ -267,12 +270,12 @@ end
 
 # PR #23168
 
-function f23168(a, x)
+@eval function f23168(a, x)
     push!(a, 1)
     if @generated
-        :(y = x + x)
+        :(y = $(+)(x, x))
     else
-        y = 2x
+        y = $(*)(2, x)
     end
     push!(a, y)
     if @generated
@@ -287,9 +290,9 @@ end
 let a = Any[]
     @test f23168(a, 3) == (6, Int)
     @test a == [1, 6, 3]
-    @test occursin(" + ", string(code_lowered(f23168, (Vector{Any},Int))))
-    @test occursin("2 * ", string(Base.uncompressed_ir(first(methods(f23168)))))
-    @test occursin("2 * ", string(code_lowered(f23168, (Vector{Any},Int), generated=false)))
+    @test occursin("(+)(", string(code_lowered(f23168, (Vector{Any},Int))))
+    @test occursin("(*)(2", string(Base.uncompressed_ir(first(methods(f23168)))))
+    @test occursin("(*)(2", string(code_lowered(f23168, (Vector{Any},Int), generated=false)))
     @test occursin("Base.add_int", string(code_typed(f23168, (Vector{Any},Int))))
 end
 
@@ -308,6 +311,8 @@ end
 @generated function f33243()
     :(global x33243 = 2)
 end
+@test_throws ErrorException f33243()
+global x33243::Any
 @test f33243() === 2
 @test x33243 === 2
 
@@ -335,12 +340,142 @@ let world = Base.get_world_counter()
     match = Base._which(Tuple{typeof(sin), Int}; world)
     mi = Core.Compiler.specialize_method(match)
     lwr = Core.Compiler.retrieve_code_info(mi, world)
-    @test all(lin->lin.method === :sin, lwr.linetable)
+    nstmts = length(lwr.code)
+    di = Core.DebugInfo(Core.Compiler.DebugInfoStream(mi, lwr.debuginfo, nstmts), nstmts)
+    lwr.debuginfo = di
     @eval function sin_generated(a)
         $(Expr(:meta, :generated, Returns(lwr)))
         $(Expr(:meta, :generated_only))
     end
     src = only(code_lowered(sin_generated, (Int,)))
-    @test all(lin->lin.method === :sin, src.linetable)
+    @test src.debuginfo === di
     @test sin_generated(42) == sin(42)
 end
+
+# Allow passing unreachable insts in generated codeinfo
+let
+    dummy() = return
+    dummy_m = which(dummy, Tuple{})
+
+    src = Base.uncompressed_ir(dummy_m)
+    src.code = Any[
+        # block 1
+        Core.ReturnNode(nothing),
+        # block 2
+        Core.ReturnNode(),
+    ]
+    nstmts = length(src.code)
+    nslots = 1
+    src.ssavaluetypes = nstmts
+    src.debuginfo = Core.DebugInfo(:f_unreachable_generated)
+    src.ssaflags = fill(Int32(0), nstmts)
+    src.slotflags = fill(0, nslots)
+    src.slottypes = Any[Any]
+
+    @eval function f_unreachable()
+        $(Expr(:meta, :generated, Returns(src)))
+        $(Expr(:meta, :generated_only))
+    end
+
+    ir, _ = Base.code_ircode(f_unreachable, ()) |> only
+    @test length(ir.cfg.blocks) == 1
+end
+
+function generate_lambda_ex(world::UInt, source::Method,
+                            argnames, spnames, @nospecialize body)
+    stub = Core.GeneratedFunctionStub(identity, Core.svec(argnames...), Core.svec(spnames...))
+    return stub(world, source, body)
+end
+
+# Test that `Core.CachedGenerator` works as expected
+struct Generator54916 <: Core.CachedGenerator end
+function (::Generator54916)(world::UInt, source::Method, args...)
+    return generate_lambda_ex(world, source,
+        (:doit54916, :func, :arg), (), :(func(arg)))
+end
+@eval function doit54916(func, arg)
+    $(Expr(:meta, :generated, Generator54916()))
+    $(Expr(:meta, :generated_only))
+end
+@test doit54916(sin, 1) == sin(1)
+let mi = only(methods(doit54916)).specializations
+    ci = mi.cache::Core.CodeInstance
+    found = false
+    while true
+        if ci.owner === :uninferred && ci.inferred isa Core.CodeInfo
+            found = true
+            break
+        end
+        isdefined(ci, :next) || break
+        ci = ci.next
+    end
+    @test found
+end
+
+# Test that writing a bad cassette-style pass gives the expected error (#49715)
+function generator49715(world, source, self, f, tt)
+    tt = tt.parameters[1]
+    sig = Tuple{f, tt.parameters...}
+    mi = Base._which(sig; world)
+    error("oh no")
+    return generate_lambda_ex(world, source,
+        (:doit49715, :f, :tt), (), nothing)
+end
+@eval function doit49715(f, tt)
+    $(Expr(:meta, :generated, generator49715))
+    $(Expr(:meta, :generated_only))
+end
+@test_throws "oh no" doit49715(sin, Tuple{Int})
+
+# Test that the CodeInfo returned from generated function need not match the generator.
+function overdubbee54341(a, b)
+    a + b
+end
+const overdubee_codeinfo54341 = code_lowered(overdubbee54341, Tuple{Any, Any})[1]
+function overdub_generator54341(world::UInt, source::Method, selftype, fargtypes)
+    if length(fargtypes) != 2
+        return generate_lambda_ex(world, source,
+            (:overdub54341, :args), (), :(error("Wrong number of arguments")))
+    else
+        return copy(overdubee_codeinfo54341)
+    end
+end
+@eval function overdub54341(args...)
+    $(Expr(:meta, :generated, overdub_generator54341))
+    $(Expr(:meta, :generated_only))
+end
+@test overdub54341(1, 2) == 3
+# check if the inlining pass handles `nargs`/`isva` correctly
+@test first(only(code_typed((Int,Int)) do x, y; @inline overdub54341(x, y); end)) isa Core.CodeInfo
+@test first(only(code_typed((Int,)) do x; @inline overdub54341(x, 1); end)) isa Core.CodeInfo
+@test_throws "Wrong number of arguments" overdub54341(1, 2, 3)
+
+# Test the module resolution scope of generated methods that are type constructors
+module GeneratedScope57417
+    using Test
+    import ..generate_lambda_ex
+    const x = 1
+    struct Generator; end
+    @generated (::Generator)() = :x
+    f(x::Int) = 1
+    module OtherModule
+        import ..f
+        const x = 2
+        @generated f(::Float64) = :x
+    end
+    import .OtherModule: f
+    @test Generator()() == 1
+    @test f(1.0) == 2
+
+    function g_generator(world::UInt, source::Method, _)
+        return generate_lambda_ex(world, source, (:g,), (), :(return x))
+    end
+
+    @eval function g()
+        $(Expr(:meta, :generated, g_generator))
+        $(Expr(:meta, :generated_only))
+    end
+    @test g() == 1
+end
+
+@test_throws "syntax: expression too large" code_lowered(ntuple, (Returns{Nothing}, Val{1000000}))
diff --git a/test/stdlib_dependencies.jl b/test/stdlib_dependencies.jl
new file mode 100644
index 0000000000000..f034c650c75a0
--- /dev/null
+++ b/test/stdlib_dependencies.jl
@@ -0,0 +1,341 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Libdl
+using Test
+
+# Load ObjectFile.jl from the vendored jlutilities depot
+buildroot = get(ENV, "JULIA_TEST_BUILDROOT", joinpath(@__DIR__, ".."))
+depspath = joinpath(buildroot, "deps", "jlutilities")
+if ispath(depspath)
+    depspath = realpath(depspath)
+    # With a source-tree use the vendored depot
+    pushfirst!(DEPOT_PATH, joinpath(depspath, "depot"))
+    using Pkg
+    old_active_project = Base.active_project()
+    Base.redirect_stdout(devnull) do
+        Base.redirect_stderr(devnull) do
+            Pkg.activate(realpath(joinpath(@__DIR__, "..", "deps", "jlutilities", "objectfile")))
+            Pkg.instantiate()
+        end
+    end
+    using ObjectFile
+    popfirst!(DEPOT_PATH)
+    Base.set_active_project(old_active_project)
+else
+    # Without a source-tree - expect that the user has installed it for us - warn otherwise
+    ObjectFile_pkgid = Base.PkgId(Base.UUID("d8793406-e978-5875-9003-1fc021f44a92"), "ObjectFile")
+    if Base.locate_package(ObjectFile_pkgid) !== nothing
+        @eval using ObjectFile
+    end
+end
+
+if !@isdefined(ObjectFile)
+    @warn("ObjectFile.jl not available; skipping stdlib JLL dependency tests")
+else
+    strip_soversion(lib::AbstractString) = Base.BinaryPlatforms.parse_dl_name_version(lib)[1]
+
+    function get_deps_objectfile_macos(lib_path::String)
+        open(lib_path, "r") do io
+            obj_handles = readmeta(io)
+            obj = only(obj_handles)  # If more than one its unclear what to do
+            raw_libs = String[]
+
+            # For Mach-O files, get load commands
+            if isa(obj, ObjectFile.MachOHandle)
+                for lc in ObjectFile.MachOLoadCmds(obj)
+                    if lc isa ObjectFile.MachO.MachOLoadDylibCmd
+                        # Extract the library name from the load command
+                        lib_name = ObjectFile.dylib_name(lc)
+                        if lib_name !== nothing
+                            # Remove @rpath/ prefix if present
+                            lib_name = last(split(lib_name, "@rpath/"))
+                            # Get basename
+                            lib_name = basename(lib_name)
+                            isempty(splitext(lib_name)[2]) && continue # skip frameworks
+                            push!(raw_libs, lib_name)
+                        end
+                    end
+                end
+            end
+            libs = strip_soversion.(raw_libs)
+            # Get rid of any self-referential links
+            self_lib = strip_soversion(basename(lib_path))
+            libs = filter(!=(self_lib), libs)
+            return libs
+        end
+    end
+
+    function get_deps_objectfile_linux_freebsd(lib_path::String)
+        open(lib_path, "r") do io
+            obj_handles = readmeta(io)
+            obj = first(obj_handles)  # Take the first handle from the vector
+            raw_libs = String[]
+
+            # For ELF files, get dynamic dependencies
+            if isa(obj, ObjectFile.ELFHandle)
+                # Get all dynamic entries
+                dyn_entries = ObjectFile.ELFDynEntries(obj)
+                for entry in dyn_entries
+                    # Check if the entry is of type DT_NEEDED
+                    if ObjectFile.dyn_entry_type(entry) == ObjectFile.ELF.DT_NEEDED
+                        lib_name = ObjectFile.strtab_lookup(entry)
+                        if lib_name !== nothing && !isempty(lib_name)
+                            push!(raw_libs, basename(lib_name))
+                        end
+                    end
+                end
+            end
+
+            libs = strip_soversion.(raw_libs)
+            # Self-reference is typically not listed in NEEDED for ELF, so no explicit filter here.
+            return libs
+        end
+    end
+
+    function get_deps_objectfile_windows(lib_path::String)
+        open(lib_path, "r") do io
+            obj_handles = readmeta(io)
+            obj = first(obj_handles)  # Take the first handle from the vector
+            raw_libs_set = Set{String}() # Use Set for uniqueness of DLL names
+
+            # For COFF/PE files, get import table
+            if isa(obj, ObjectFile.COFFHandle)
+                # Get dynamic links
+                dls = ObjectFile.DynamicLinks(obj)
+                for link in dls
+                    lib_name = ObjectFile.path(link)
+                    if lib_name !== nothing && !isempty(lib_name)
+                        # COFF library names are case-insensitive
+                        push!(raw_libs_set, lowercase(lib_name))
+                    end
+                end
+            end
+
+            libs = strip_soversion.(collect(raw_libs_set))
+            # Get rid of any self-referential links
+            self_lib = strip_soversion(lowercase(basename(lib_path)))
+            libs = filter(!=(self_lib), libs)
+            return libs
+        end
+    end
+
+    function get_deps_objectfile(lib_path::String)
+        if Sys.isapple()
+            return get_deps_objectfile_macos(lib_path)
+        elseif Sys.islinux() || Sys.isfreebsd()
+            return get_deps_objectfile_linux_freebsd(lib_path)
+        elseif Sys.iswindows()
+            return get_deps_objectfile_windows(lib_path)
+        else
+            error("Unsupported platform for ObjectFile.jl dependency extraction")
+        end
+    end
+
+    function is_system_lib_macos(lib)
+        system_libs = [
+            "libSystem.B",
+            "libc++", # While we package libstdc++, we do NOT package libc++.
+            "libiconv", # some things (like git) link against system libiconv
+
+            # macOS frameworks used by things like LibCurl
+            "CoreFoundation",
+            "CoreServices",
+            "Security",
+            "SystemConfiguration"
+        ]
+        return lib ∈ system_libs
+    end
+
+    function is_system_lib_linux(lib)
+        system_libs = [
+            "libdl",
+            "libc",
+            "libm",
+            "librt",
+            "libpthread",
+            "ld-linux",
+            "ld-linux-x86-64",
+            "ld-linux-x86",
+            "ld-linux-aarch64",
+            "ld-linux-armhf",
+            "ld-linux-i386",
+        ]
+        return lib ∈ system_libs
+    end
+
+    function is_system_lib_freebsd(lib)
+        system_libs = [
+            "libdl",
+            "libc",
+            "libm",
+            "libthr",      # primary threading library
+            "libpthread",  # alias kept for compatibility
+            "librt",
+            "libutil",
+            "libexecinfo",
+            "libc++",
+            "libcxxrt",
+        ]
+        return lib ∈ system_libs
+    end
+
+    function is_system_lib_windows(lib)
+        system_libs = [
+            "kernel32",
+            "user32",
+            "gdi32",
+            "advapi32",
+            "ole32",
+            "oleaut32",
+            "shell32",
+            "ws2_32",
+            "comdlg32",
+            "shlwapi",
+            "rpcrt4",
+            "msvcrt",
+            "comctl32",
+            "ucrtbase",
+            "vcruntime140",
+            "msvcp140",
+            "libwinpthread",
+            "ntdll",
+            "crypt32",
+            "bcrypt",
+            "winhttp",
+            "secur32",
+            "iphlpapi",
+        ]
+        return any(syslib -> lowercase(lib) == syslib, system_libs)
+    end
+
+    # Set up platform-specific functions
+    if Sys.islinux() || Sys.isfreebsd()
+        is_system_lib = Sys.islinux() ? is_system_lib_linux : is_system_lib_freebsd
+    elseif Sys.isapple()
+        is_system_lib = is_system_lib_macos
+    elseif Sys.iswindows()
+        is_system_lib = is_system_lib_windows
+    else
+        error("Unsupported platform for `stdlib_dependencies.jl`. Only Linux, FreeBSD, macOS, and Windows are supported.")
+    end
+
+    # Iterate over all JLL stdlibs, check their lazy libraries to ensure
+    # that they list all valid library dependencies, avoiding a situation
+    # where the JLL wrapper code has fallen out of sync with the binaries
+    # themselves.
+    @testset "Stdlib JLL dependency check" begin
+        for (_, (stdlib_name, _)) in Pkg.Types.stdlibs()
+            if !endswith(stdlib_name, "_jll")
+                continue
+            end
+
+            # Import the stdlib, skip it if it's not available on this platform
+            m = eval(Meta.parse("import $(stdlib_name); $(stdlib_name)"))
+            if !Base.invokelatest(getproperty(m, :is_available))
+                continue
+            end
+
+            for prop_name in names(m)
+                prop = getproperty(m, prop_name)
+                if isa(prop, Libdl.LazyLibrary)
+                    lib_path = dlpath(prop)
+                    lazy_lib_deps = strip_soversion.(basename.(dlpath.(prop.dependencies())))
+                    real_lib_deps = filter(!is_system_lib, get_deps_objectfile(lib_path))
+
+                    # See if there are missing dependencies in the lazy library deps
+                    missing_deps = setdiff(real_lib_deps, lazy_lib_deps)
+                    extraneous_deps = setdiff(lazy_lib_deps, real_lib_deps)
+
+                    # The library name is `libpcre2-8`, with a dash in
+                    # its name. That works fine on Unix. On Windows, a
+                    # suffix starting with a dash denotes the
+                    # library's soversion. So we think (on Windows)
+                    # that this is the library `libpcre2`, soversion
+                    # 8, and things don't match.
+                    if Sys.iswindows()
+                        if "libpcre2-8" in missing_deps && "libpcre2" in extraneous_deps
+                            missing_deps = setdiff(missing_deps, ["libpcre2-8"])
+                            extraneous_deps = setdiff(extraneous_deps, ["libpcre2"])
+                        end
+                    end
+
+                    if prop_name == :libspqr
+                        # Allow libstdc++ to not be linked - spqr only uses std::complex,
+                        # which may be header-only, so doesn't get linked on as-needed distributions.
+                        # However, in general, we can't assume that, so we need to take the dependency
+                        # and just allow this here.
+                        extraneous_deps = setdiff(extraneous_deps, ["libstdc++"])
+                    end
+
+                    # We expect there to be no missing or extraneous deps
+                    deps_mismatch = !isempty(missing_deps) || !isempty(extraneous_deps)
+
+                    # This is a manually-managed special case
+                    if stdlib_name == "libblastrampoline_jll" &&
+                        prop_name == :libblastrampoline &&
+                        extraneous_deps in (["libopenblas64_"], ["libopenblas"])
+                        deps_mismatch = false
+                    end
+
+                    @test !deps_mismatch
+
+                    # Print out the deps mismatch if we find one
+                    if deps_mismatch
+                        @warn("Dependency mismatch",
+                            jll = stdlib_name,
+                            library = string(prop_name),
+                            missing_deps = join(missing_deps, ", "),
+                            extraneous_deps = join(extraneous_deps, ", "),
+                            actual_deps = join(real_lib_deps, ", "),
+                        )
+                    end
+                end
+            end
+            if isdefined(m, :eager_mode)
+                # If the JLL has an eager_mode function, call it
+                Base.invokelatest(getproperty(m, :eager_mode))
+            end
+        end
+    end
+
+    # Check that any JLL stdlib that defines executables also provides corresponding *_path variables
+    @testset "Stdlib JLL executable path variables" begin
+        for (_, (stdlib_name, _)) in Pkg.Types.stdlibs()
+            if !endswith(stdlib_name, "_jll")
+                continue
+            end
+
+            # Import the stdlib, skip it if it's not available on this platform
+            m = eval(Meta.parse("import $(stdlib_name); $(stdlib_name)"))
+            if !Base.invokelatest(getproperty(m, :is_available))
+                continue
+            end
+
+            # Look for *_exe constants that indicate executable definitions
+            exe_constants = Symbol[]
+            for name in names(m, all=true)
+                name_str = string(name)
+                if endswith(name_str, "_exe") && isdefined(m, name)
+                    push!(exe_constants, name)
+                end
+            end
+
+            # For each *_exe constant, check if there's a corresponding *_path variable
+            for exe_const in exe_constants
+                exe_name_str = string(exe_const)
+                # Convert from *_exe to *_path (e.g., zstd_exe -> zstd_path)
+                expected_path_var = Symbol(replace(exe_name_str, "_exe" => "_path"))
+
+                @test isdefined(m, expected_path_var)
+
+                if !isdefined(m, expected_path_var)
+                    @warn("Missing path variable",
+                        jll = stdlib_name,
+                        exe_constant = exe_const,
+                        expected_path_var = expected_path_var
+                    )
+                end
+            end
+        end
+    end
+end
diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl
new file mode 100644
index 0000000000000..aca044d171747
--- /dev/null
+++ b/test/strings/annotated.jl
@@ -0,0 +1,814 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+@testset "AnnotatedString" begin
+    str = Base.AnnotatedString("some string")
+    @test str == Base.AnnotatedString(str.string, Base.RegionAnnotation[])
+    @test length(str) == 11
+    @test ncodeunits(str) == 11
+    @test codeunits(str) == codeunits("some string")
+    @test codeunit(str) == UInt8
+    @test codeunit(str, 1) == codeunit("some string", 1)
+    @test firstindex(str) == firstindex("some string")
+    @test convert(Base.AnnotatedString, str) === str
+    @test eltype(str) == Base.AnnotatedChar{eltype(str.string)}
+    @test first(str) == Base.AnnotatedChar(first(str.string), Pair{Symbol, Any}[])
+    @test str[1:4] isa SubString{typeof(str)}
+    @test str[1:4] == Base.AnnotatedString("some")
+    big_byte_str = Base.AnnotatedString("आख")
+    @test_throws StringIndexError big_byte_str[5]
+    @test "a" * str == Base.AnnotatedString("asome string")
+    @test str * "a" == Base.AnnotatedString("some stringa")
+    @test str * str == Base.AnnotatedString("some stringsome string")
+    @test cmp(str, "some stringy thingy") == -1
+    @test cmp("some stringy thingy", str) == 1
+    @test str[3:4] == SubString("me")
+    @test SubString("me") == str[3:4]
+    Base.annotate!(str, 1:4, :thing, 0x01)
+    Base.annotate!(str, 6:11, :other, 0x02)
+    Base.annotate!(str, 1:11, :all, 0x03)
+    #  :thing :other
+    #  ┌┸─┐ ┌──┸─┐
+    # "some string"
+    #  └───┰─────┘
+    #     :all
+    @test str[3:4] == SubString(str, 3, 4)
+    @test str[3:4] != SubString("me")
+    @test SubString("me") != str[3:4]
+    @test Base.AnnotatedString(str[3:4]) == SubString(str, 3, 4)
+    @test repeat(SubString(str, 3, 4), 2) == repeat(Base.AnnotatedString(str[3:4]), 2)
+    @test reverse(SubString(str, 3, 4)) == reverse(Base.AnnotatedString(str[3:4]))
+    @test Base.AnnotatedString(str[3:4]) ==
+        Base.AnnotatedString("me", [(1:2, :thing, 0x01), (1:2, :all, 0x03)])
+    @test Base.AnnotatedString(str[3:6]) ==
+        Base.AnnotatedString("me s", [(1:2, :thing, 0x01), (4:4, :other, 0x02), (1:4, :all, 0x03)])
+    @test str == Base.AnnotatedString("some string", [(1:4, :thing, 0x01), (6:11, :other, 0x02), (1:11, :all, 0x03)])
+    @test str != Base.AnnotatedString("some string")
+    @test str != Base.AnnotatedString("some string", [(1:1, :thing, 0x01), (1:11, :all, 0x03), (6:6, :other, 0x02)])
+    @test str != Base.AnnotatedString("some string", [(1:4, :thing, 0x11), (1:11, :all, 0x13), (6:11, :other, 0x12)])
+    @test str != Base.AnnotatedString("some thingg", [(1:4, :thing, 0x01), (1:11, :all, 0x03), (6:11, :other, 0x02)])
+    @test Base.AnnotatedString([Base.AnnotatedChar('a', [(:a, 1)]), Base.AnnotatedChar('b', [(:b, 2)])]) ==
+        Base.AnnotatedString("ab", [(1:1, :a, 1), (2:2, :b, 2)])
+    let allstrings =
+        ['a', Base.AnnotatedChar('a'), Base.AnnotatedChar('a', [(:aaa, 0x04)]),
+         "a string", Base.AnnotatedString("a string"),
+         Base.AnnotatedString("a string", [(1:2, :hmm, '%')]),
+         SubString(Base.AnnotatedString("a string", [(1:2, :hmm, '%')]), 1:1)]
+        for str1 in repeat(allstrings, 2)
+            for str2 in repeat(allstrings, 2)
+                @test String(str1 * str2) ==
+                    String(string(str1, str2)) ==
+                    String(string(str1)) * String(string(str2))
+                @test Base.annotatedstring(str1 * str2) ==
+                    Base.annotatedstring(str1, str2) ==
+                    Base.annotatedstring(str1) * Base.annotatedstring(str2)
+            end
+        end
+    end
+    # @test collect(Base.eachstyle(str)) ==
+    #     [("some", [:thing, 0x01, :all, 0x03]),
+    #     (" string", [:all, 0x03, :other, 0x02])]
+    @test chopprefix(sprint(show, str), "Base.") ==
+        "AnnotatedString{String}(\"some string\", [(1:4, :thing, 0x01), (6:11, :other, 0x02), (1:11, :all, 0x03)])"
+    @test eval(Meta.parse(repr(str))) == str
+    @test sprint(show, MIME("text/plain"), str) == "\"some string\""
+
+    a = Base.AnnotatedString("hello", [(1:5, :label, 1)])
+    @test first(a) == Base.AnnotatedChar('h', [(:label, 1)])
+
+    @test Bool === Base.infer_return_type(isvalid, Tuple{Base.AnnotatedString, Vararg})
+    @test Int === Base.infer_return_type(ncodeunits, Tuple{Base.AnnotatedString})
+end
+
+@testset "AnnotatedChar" begin
+    chr = Base.AnnotatedChar('c')
+    @test Base.AnnotatedChar(UInt32('c')) == chr
+    @test convert(Base.AnnotatedChar, chr) === chr
+    @test chr == Base.AnnotatedChar(chr.char, Pair{Symbol, Any}[])
+    @test uppercase(chr) == Base.AnnotatedChar('C')
+    @test titlecase(chr) == Base.AnnotatedChar('C')
+    @test lowercase(Base.AnnotatedChar('C')) == chr
+    str = Base.AnnotatedString("hmm", [(1:1, :attr, "h0h0"),
+                               (1:2, :attr, "h0m1"),
+                               (2:3, :attr, "m1m2")])
+    @test str[1] == Base.AnnotatedChar('h', [(:attr, "h0h0")])
+    @test str[2] == Base.AnnotatedChar('m', [(:attr, "h0m1"), (:attr, "m1m2")])
+    @test str[3] == Base.AnnotatedChar('m', [(:attr, "m1m2")])
+end
+
+@testset "Styling preservation" begin
+    str = Base.AnnotatedString("some string", [(1:4, :thing, 0x01), (1:11, :all, 0x03), (6:11, :other, 0x02)])
+    @test match(r".e", str).match == str[3:4]
+    @test  match(r"(.e)", str).captures == [str[3:4]]
+    let m0 = match(r"(.)e", str)
+        m1 = first(eachmatch(r"(.)e", str))
+        for f in fieldnames(RegexMatch)
+            @test getfield(m0, f) == getfield(m1, f)
+        end
+    end
+    @test lpad(str, 12) ==
+        Base.AnnotatedString(" some string", [(2:5, :thing, 0x01),
+                                      (2:12, :all, 0x03),
+                                      (7:12, :other, 0x02)])
+    @test rpad(str, 12) ==
+        Base.AnnotatedString("some string ", [(1:4, :thing, 0x01),
+                                      (1:11, :all, 0x03),
+                                      (6:11, :other, 0x02)])
+    str1 = Base.AnnotatedString("test", [(1:4, :label, 5)])
+    str2 = Base.AnnotatedString("case", [(2:3, :label, "oomph")])
+    @test join([str1, str1], ' ') ==
+        Base.AnnotatedString("test test",
+                     [(1:4, :label, 5),
+                      (6:9, :label, 5)])
+    @test join([str1, str1], Base.AnnotatedString(" ", [(1:1, :label, 2)])) ==
+        Base.AnnotatedString("test test",
+                     [(1:4, :label, 5),
+                      (5:5, :label, 2),
+                      (6:9, :label, 5)])
+    @test join((String(str1), str1), ' ') ==
+        Base.AnnotatedString("test test", [(6:9, :label, 5)])
+    @test repeat(str1, 2) == Base.AnnotatedString("testtest", [(1:8, :label, 5)])
+    @test repeat(str2, 2) == Base.AnnotatedString("casecase", [(2:3, :label, "oomph"),
+                                                       (6:7, :label, "oomph")])
+    @test repeat(str1[1], 3) == Base.AnnotatedString("ttt", [(1:3, :label, 5)])
+    @test reverse(str1) == Base.AnnotatedString("tset", [(1:4, :label, 5)])
+    @test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label, "oomph")])
+end
+
+@testset "Unicode" begin
+    for words in (["ᲃase", "cɦɒnɡeȿ", "can", "CHⱯNGE", "Сodeunıts"],
+                  ["Сodeunıts", "ᲃase", "cɦɒnɡeȿ", "can", "CHⱯNGE"])
+        ann_words = [Base.AnnotatedString(w, [(1:ncodeunits(w), :i, i)])
+                     for (i, w) in enumerate(words)]
+        ann_str = join(ann_words, '-')
+        for transform in (lowercase, uppercase, titlecase)
+            t_words = map(transform, words)
+            ann_t_words = [Base.AnnotatedString(w, [(1:ncodeunits(w), :i, i)])
+                        for (i, w) in enumerate(t_words)]
+            ann_t_str = join(ann_t_words, '-')
+            t_ann_str = transform(ann_str)
+            @test String(ann_t_str) == String(t_ann_str)
+            @test Base.annotations(ann_t_str) == Base.annotations(t_ann_str)
+        end
+        for transform in (uppercasefirst, lowercasefirst)
+            t_words = vcat(transform(first(words)), words[2:end])
+            ann_t_words = [Base.AnnotatedString(w, [(1:ncodeunits(w), :i, i)])
+                        for (i, w) in enumerate(t_words)]
+            ann_t_str = join(ann_t_words, '-')
+            t_ann_str = transform(ann_str)
+            @test String(ann_t_str) == String(t_ann_str)
+            @test Base.annotations(ann_t_str) == Base.annotations(t_ann_str)
+        end
+    end
+end
+
+@testset "AnnotatedIOBuffer" begin
+    aio = Base.AnnotatedIOBuffer()
+    vec2ann(v::Vector{<:Tuple}) = collect(Base.RegionAnnotation, v)
+    # Append-only writing
+    @test write(aio, Base.AnnotatedString("hello", [(1:5, :tag, 1)])) == 5
+    @test write(aio, ' ') == 1
+    @test write(aio, Base.AnnotatedString("world", [(1:5, :tag, 2)])) == 5
+    @test Base.annotations(aio) == vec2ann([(1:5, :tag, 1), (7:11, :tag, 2)])
+    # Check `annotate!`, including region sorting
+    @test truncate(aio, 0).io.size == 0
+    @test write(aio, "hello world") == ncodeunits("hello world")
+    @test Base.annotate!(aio, 1:5, :tag, 1) === aio
+    @test Base.annotate!(aio, 7:11, :tag, 2) === aio
+    @test Base.annotations(aio) == vec2ann([(1:5, :tag, 1), (7:11, :tag, 2)])
+    # Reading
+    @test read(seekstart(deepcopy(aio.io)), String) == "hello world"
+    @test read(seekstart(deepcopy(aio)), String) == "hello world"
+    @test read(seek(aio, 0), Base.AnnotatedString) == Base.AnnotatedString("hello world", [(1:5, :tag, 1), (7:11, :tag, 2)])
+    @test read(seek(aio, 1), Base.AnnotatedString) == Base.AnnotatedString("ello world", [(1:4, :tag, 1), (6:10, :tag, 2)])
+    @test read(seek(aio, 4), Base.AnnotatedString) == Base.AnnotatedString("o world", [(1:1, :tag, 1), (3:7, :tag, 2)])
+    @test read(seek(aio, 5), Base.AnnotatedString) == Base.AnnotatedString(" world", [(2:6, :tag, 2)])
+    @test read(seekend(aio), Base.AnnotatedString) == Base.AnnotatedString("")
+    @test read(seekstart(truncate(deepcopy(aio), 5)), Base.AnnotatedString) == Base.AnnotatedString("hello", [(1:5, :tag, 1)])
+    @test read(seekstart(truncate(deepcopy(aio), 6)), Base.AnnotatedString) == Base.AnnotatedString("hello ", [(1:5, :tag, 1)])
+    @test read(seekstart(truncate(deepcopy(aio), 7)), Base.AnnotatedString) == Base.AnnotatedString("hello w", [(1:5, :tag, 1), (7:7, :tag, 2)])
+    @test read(seek(aio, 0), Base.AnnotatedChar) == Base.AnnotatedChar('h', [(:tag, 1)])
+    @test read(seek(aio, 5), Base.AnnotatedChar) == Base.AnnotatedChar(' ', [])
+    @test read(seek(aio, 6), Base.AnnotatedChar) == Base.AnnotatedChar('w', [(:tag, 2)])
+    # Check method compatibility with IOBuffer
+    @test position(aio) == 7
+    @test seek(aio, 4) === aio
+    @test skip(aio, 2) === aio
+    @test Base.annotations(copy(aio)) == Base.annotations(aio)
+    @test take!(copy(aio).io) == take!(copy(aio.io))
+    # Writing into the middle of the buffer
+    @test write(seek(aio, 6), "alice") == 5 # Replace 'world' with 'alice'
+    @test read(seekstart(aio), String) == "hello alice"
+    @test Base.annotations(aio) == vec2ann([(1:5, :tag, 1), (7:11, :tag, 2)]) # Should be unchanged
+    @test write(seek(aio, 0), Base.AnnotatedString("hey-o", [(1:5, :hey, 'o')])) == 5
+    @test read(seekstart(aio), String) == "hey-o alice"
+    @test Base.annotations(aio) == vec2ann([(7:11, :tag, 2), (1:5, :hey, 'o')]) # First annotation should have been entirely replaced
+    @test write(seek(aio, 7), Base.AnnotatedString("bbi", [(1:3, :hey, 'a')])) == 3 # a[lic, bbi]e ('alice', 'abbie')
+    @test read(seekstart(aio), String) == "hey-o abbie"
+    @test Base.annotations(aio) == vec2ann([(7:7, :tag, 2), (11:11, :tag, 2), (1:5, :hey, 'o'), (8:10, :hey, 'a')])
+    @test write(seek(aio, 0), Base.AnnotatedString("ab")) == 2 # Check first annotation's region is adjusted correctly
+    @test read(seekstart(aio), String) == "aby-o abbie"
+    @test Base.annotations(aio) == vec2ann([(7:7, :tag, 2), (11:11, :tag, 2), (3:5, :hey, 'o'), (8:10, :hey, 'a')])
+    @test write(seek(aio, 3), Base.AnnotatedString("ss")) == 2
+    @test read(seekstart(aio), String) == "abyss abbie"
+    @test Base.annotations(aio) == vec2ann([(7:7, :tag, 2), (11:11, :tag, 2), (3:3, :hey, 'o'), (8:10, :hey, 'a')])
+    # Writing one buffer to another
+    newaio = Base.AnnotatedIOBuffer()
+    @test write(newaio, seekstart(aio)) == 11
+    @test read(seekstart(newaio), String) == "abyss abbie"
+    @test Base.annotations(newaio) == Base.annotations(aio)
+    @test write(seek(newaio, 5), seek(aio, 5)) == 6
+    @test sort(Base.annotations(newaio)) == sort(Base.annotations(aio))
+    @test write(newaio, seek(aio, 5)) == 6
+    @test read(seekstart(newaio), String) == "abyss abbie abbie"
+    @test sort(Base.annotations(newaio)) ==
+        sort(vcat(Base.annotations(aio), vec2ann([(13:13, :tag, 2), (14:16, :hey, 'a'), (17:17, :tag, 2)])))
+    # The `_insert_annotations!` cautious-merging optimisation
+    aio = Base.AnnotatedIOBuffer()
+    @test write(aio, Base.AnnotatedChar('a', [(:a, 1), (:b, 2)])) == 1
+    @test Base.annotations(aio) == vec2ann([(1:1, :a, 1), (1:1, :b, 2)])
+    @test write(aio, Base.AnnotatedChar('b', [(:a, 1), (:b, 2)])) == 1
+    @test Base.annotations(aio) == vec2ann([(1:2, :a, 1), (1:2, :b, 2)])
+    let aio2 = copy(aio) # A different start makes merging too risky to do.
+        @test write(aio2, Base.AnnotatedChar('c', [(:a, 0), (:b, 2)])) == 1
+        @test Base.annotations(aio2) == vec2ann([(1:2, :a, 1), (1:2, :b, 2), (3:3, :a, 0), (3:3, :b, 2)])
+    end
+    let aio2 = copy(aio) # Merging some run of the most recent annotations is fine though.
+        @test write(aio2, Base.AnnotatedChar('c', [(:b, 2)])) == 1
+        @test Base.annotations(aio2) == vec2ann([(1:2, :a, 1), (1:3, :b, 2)])
+    end
+    let aio2 = copy(aio) # ...and any subsequent annotations after a matching run can just be copied over.
+        @test write(aio2, Base.AnnotatedChar('c', [(:b, 2), (:c, 3), (:d, 4)])) == 1
+        @test Base.annotations(aio2) == vec2ann([(1:2, :a, 1), (1:3, :b, 2), (3:3, :c, 3), (3:3, :d, 4)])
+    end
+    let aio2 = Base.AnnotatedIOBuffer()
+        @test write(aio2, Base.AnnotatedChar('a', [(:b, 1)])) == 1
+        @test write(aio2, Base.AnnotatedChar('b', [(:a, 1), (:b, 1)])) == 1
+        @test read(seekstart(aio2), Base.AnnotatedString) ==
+            Base.AnnotatedString("ab", [(1:1, :b, 1), (2:2, :a, 1), (2:2, :b, 1)])
+    end
+    # Working through an IOContext
+    aio = Base.AnnotatedIOBuffer()
+    wrapio = IOContext(aio)
+    @test write(wrapio, Base.AnnotatedString("hey", [(1:3, :x, 1)])) == 3
+    @test write(wrapio, Base.AnnotatedChar('a', [(:y, 2)])) == 1
+    @test read(seekstart(aio), Base.AnnotatedString) ==
+        Base.AnnotatedString("heya", [(1:3, :x, 1), (4:4, :y, 2)])
+    # show-ing an AnnotatedIOBuffer
+    aio = Base.AnnotatedIOBuffer()
+    write(aio, Base.AnnotatedString("hello", [(1:5, :tag, 1)]))
+    @test sprint(show, aio) == "Base.AnnotatedIOBuffer(5 bytes, 1 annotation)"
+end
+
+@testset "Eachregion" begin
+    annregions(str::String, annots::Vector{<:Tuple{UnitRange{Int}, Symbol, <:Any}}) =
+        [(s, Tuple.(a)) for (s, a) in Base.eachregion(Base.AnnotatedString(str, annots))]
+    # Regions that do/don't extend to the left/right edges
+    @test annregions(" abc ", [(2:4, :face, :bold)]) ==
+        [(" ", []),
+         ("abc", [(:face, :bold)]),
+         (" ", [])]
+    @test annregions(" x ", [(2:2, :face, :bold)]) ==
+        [(" ", []),
+         ("x", [(:face, :bold)]),
+         (" ", [])]
+    @test annregions(" x", [(2:2, :face, :bold)]) ==
+        [(" ", []),
+         ("x", [(:face, :bold)])]
+    @test annregions("x ", [(1:1, :face, :bold)]) ==
+        [("x", [(:face, :bold)]),
+         (" ", [])]
+    @test annregions("x", [(1:1, :face, :bold)]) ==
+        [("x", [(:face, :bold)])]
+    # Overlapping/nested regions
+    @test annregions(" abc ", [(2:4, :face, :bold), (3:3, :face, :italic)]) ==
+        [(" ", []),
+         ("a", [(:face, :bold)]),
+         ("b", [(:face, :bold), (:face, :italic)]),
+         ("c", [(:face, :bold)]),
+         (" ", [])]
+    @test annregions("abc-xyz", [(1:7, :face, :bold), (1:3, :face, :green), (4:4, :face, :yellow), (4:7, :face, :italic)]) ==
+        [("abc", [(:face, :bold), (:face, :green)]),
+         ("-", [(:face, :bold), (:face, :yellow), (:face, :italic)]),
+         ("xyz", [(:face, :bold), (:face, :italic)])]
+    # Preserving annotation order
+    @test annregions("abcd", [(1:3, :face, :red), (2:2, :face, :yellow), (2:3, :face, :green), (2:4, :face, :blue)]) ==
+        [("a", [(:face, :red)]),
+         ("b", [(:face, :red), (:face, :yellow), (:face, :green), (:face, :blue)]),
+         ("c", [(:face, :red), (:face, :green), (:face, :blue)]),
+         ("d", [(:face, :blue)])]
+    @test annregions("abcd", [(2:4, :face, :blue), (1:3, :face, :red), (2:3, :face, :green), (2:2, :face, :yellow)]) ==
+        [("a", [(:face, :red)]),
+         ("b", [(:face, :blue), (:face, :red), (:face, :green), (:face, :yellow)]),
+         ("c", [(:face, :blue), (:face, :red), (:face, :green)]),
+         ("d", [(:face, :blue)])]
+    # Region starting after a character spanning multiple codepoints.
+    @test annregions("𝟏x", [(1:4, :face, :red)]) ==
+        [("𝟏", [(:face, :red)]),
+         ("x", [])]
+end
+
+@testset "Replacement" begin
+    astr(s::String, faceregions::Tuple{UnitRange{Int}, Symbol}...) =
+        Base.AnnotatedString(s, [(r, :face, f) for (r, f) in faceregions])
+
+    @testset "Basic Transformations" begin
+        @testset "Deletion" begin
+            @test replace(astr("hello world", (1:5, :red)), "hello" => "hi") ==
+                astr("hi world")
+            @test replace(astr("foofoo", (1:3, :red), (4:6, :green)), "foo" => "x") ==
+                astr("xx")
+            @test replace(astr("foofoo", (1:3, :red), (4:6, :green)), "foo" => "x", count=1) ==
+                astr("xfoo", (2:4, :green))
+            @test replace(astr("abcdef", (1:6, :red), (3:4, :green)), "cd" => "X") ==
+                astr("abXef", (1:2, :red), (4:5, :red))
+            @test replace(astr("a b c", (1:1, :red), (3:3, :green), (5:5, :blue)),
+                         "a" => "x", "b" => "y", "c" => "z") ==
+                astr("x y z")
+        end
+
+        @testset "Shifting" begin
+            @test replace(astr("hello world", (7:11, :red)), "hello" => "hi") ==
+                astr("hi world", (4:8, :red))
+            @test replace(astr("hello world", (7:11, :red)), "hello" => "greetings") ==
+                astr("greetings world", (11:15, :red))
+            @test replace(astr("a b c", (3:3, :red)), "a" => "xxx", "c" => "y") ==
+                astr("xxx b y", (5:5, :red))
+            @test replace(astr("abc def", (5:7, :green)), "abc" => "x") ==
+                astr("x def", (3:5, :green))
+            @test replace(astr("a b c d", (3:3, :red), (5:5, :green), (7:7, :blue)), "a" => "AA") ==
+                astr("AA b c d", (4:4, :red), (6:6, :green), (8:8, :blue))
+            @test replace(astr("hello world", (7:11, :green)), " world" => " Julia") ==
+                astr("hello Julia")
+        end
+
+        @testset "Splitting" begin
+            @test replace(astr("hello world", (1:11, :red)), " " => "_") ==
+                astr("hello_world", (1:5, :red), (7:11, :red))
+            @test replace(astr("a b c", (1:5, :red)), " " => "_") ==
+                astr("a_b_c", (1:1, :red), (3:3, :red), (5:5, :red))
+            @test replace(astr("foobarbaz", (1:9, :green)), "o" => "0", "a" => "A") ==
+                astr("f00bArbAz", (1:1, :green), (4:4, :green), (6:7, :green), (9:9, :green))
+            @test replace(astr("a b c", (1:5, :red)), " " => "_", count=1) ==
+                astr("a_b c", (1:1, :red), (3:5, :red))
+            @test replace(astr("abcde", (2:4, :red)), "c" => "X") ==
+                astr("abXde", (2:2, :red), (4:4, :red))
+            @test replace(astr("a a a a", (1:7, :blue)), "a" => "b") ==
+                astr("b b b b", (2:2, :blue), (4:4, :blue), (6:6, :blue))
+        end
+
+        @testset "Addition" begin
+            @test replace(astr("hello world"), "world" => astr("Julia", (1:5, :red))) ==
+                astr("hello Julia", (7:11, :red))
+            @test replace(astr("hello"), "hello" => astr("hi there", (1:2, :red), (4:8, :green))) ==
+                astr("hi there", (1:2, :red), (4:8, :green))
+            @test replace(astr("hello world", (7:11, :green)), "hello" => astr("hi", (1:2, :red))) ==
+                astr("hi world", (4:8, :green), (1:2, :red))
+            @test replace(astr("a b", (1:3, :yellow)), " " => astr("_", (1:1, :red))) ==
+                astr("a_b", (1:1, :yellow), (3:3, :yellow), (2:2, :red))
+            @test replace(astr("a b"), "a" => astr("X", (1:1, :red)), "b" => astr("Y", (1:1, :blue))) ==
+                astr("X Y", (1:1, :red), (3:3, :blue))
+        end
+
+        @testset "Combinations" begin
+            @test replace(astr("a b c", (1:1, :red), (5:5, :blue)), "b" => "B") ==
+                astr("a B c", (1:1, :red), (5:5, :blue))
+            @test replace(astr("a b", (1:3, :red)), " " => astr("_", (1:1, :blue))) ==
+                astr("a_b", (1:1, :red), (3:3, :red), (2:2, :blue))
+            @test replace(astr("foo bar baz", (1:3, :red), (5:7, :green), (9:11, :blue)),
+                         "foo" => "F", "a" => astr("A", (1:1, :yellow))) ==
+                astr("F bAr bAz", (3:3, :green), (5:5, :green), (7:7, :blue), (9:9, :blue),
+                     (4:4, :yellow), (8:8, :yellow))
+        end
+    end
+
+    @testset "Pattern Types" begin
+        @testset "Char" begin
+            @test replace(astr("hello", (1:5, :red)), 'l' => 'L') ==
+                astr("heLLo", (1:2, :red), (5:5, :red))
+            @test replace(astr("hello"), 'o' => astr("O", (1:1, :red))) ==
+                astr("hellO", (5:5, :red))
+            @test replace(astr("aaa", (1:3, :red)), 'a' => 'b') ==
+                astr("bbb")
+            @test replace(astr("aaa", (1:3, :red)), 'a' => 'b', count=2) ==
+                astr("bba", (3:3, :red))
+            @test replace(astr("café", (1:5, :green)), 'é' => 'e') ==
+                astr("cafe", (1:3, :green))
+            @test replace(astr("test"), 't' => astr("TTT", (1:3, :blue))) ==
+                astr("TTTesTTT", (1:3, :blue), (6:8, :blue))
+            @test replace(astr("hello", (1:5, :red)), 'l' => Base.AnnotatedChar('L', [(label=:face, value=:blue)])) ==
+                astr("heLLo", (1:2, :red), (5:5, :red), (3:4, :blue))
+            @test replace(astr("abc", (1:3, :green)), 'b' => Base.AnnotatedChar('B', [(label=:face, value=:bold)])) ==
+                astr("aBc", (1:1, :green), (3:3, :green), (2:2, :bold))
+        end
+
+        @testset "Regex" begin
+            @test replace(astr("foo bar", (1:7, :green)), r"o+" => "0") ==
+                astr("f0 bar", (1:1, :green), (3:6, :green))
+            @test replace(astr("hello"), r"l+" => astr("L", (1:1, :red))) ==
+                astr("heLo", (3:3, :red))
+            @test replace(astr("ab", (1:2, :red)), r"" => "^") ==
+                astr("^a^b^", (2:2, :red), (4:4, :red))
+            @test replace(astr("abc", (1:3, :red)), r"b?" => "X") ==
+                astr("XaXcX", (2:2, :red), (4:4, :red))
+            @test replace(astr("aaa", (1:3, :red)), r"a+" => "b") ==
+                astr("b")
+        end
+
+        @testset "Predicate" begin
+            @test replace(astr("abc", (1:3, :red)), islowercase => 'X') ==
+                astr("XXX")
+        end
+
+        @testset "Count" begin
+            @test replace(astr("hello", (1:5, :red)), "l" => "L", count=0) ==
+                astr("hello", (1:5, :red))
+            @test replace(astr("a b c", (5:5, :red)), "a" => "A", count=1) ==
+                astr("A b c", (5:5, :red))
+            @test replace(astr("a b c", (1:5, :red)), " " => "_", count=1) ==
+                astr("a_b c", (1:1, :red), (3:5, :red))
+            @test replace(astr("a b"), "a" => astr("X", (1:1, :red)),
+                         "b" => astr("Y", (1:1, :blue)), count=1) ==
+                astr("X b", (1:1, :red))
+            @test replace(astr("abc", (1:3, :red)), "x" => "y", count=10) ==
+                astr("abc", (1:3, :red))
+        end
+
+        @testset "AnnotatedChar" begin
+            @test replace(astr("test", (1:4, :red)), 't' => Base.AnnotatedChar('T', [(label=:face, value=:blue)])) ==
+                astr("TesT", (2:3, :red), (1:1, :blue), (4:4, :blue))
+            @test replace(astr("hello"), 'l' => Base.AnnotatedChar('L', [(label=:face, value=:bold), (label=:face, value=:red)])) ==
+                astr("heLLo", (3:4, :bold), (3:4, :red))
+            @test replace(astr("a b c", (1:5, :green)), ' ' => Base.AnnotatedChar('_', [(label=:face, value=:underline)])) ==
+                astr("a_b_c", (1:1, :green), (3:3, :green), (5:5, :green), (2:2, :underline), (4:4, :underline))
+        end
+
+        @testset "SubString" begin
+            source = astr("WORLD", (1:5, :blue))
+            @test replace(astr("hello world", (1:11, :red)), "world" => SubString(source, 1:5)) ==
+                astr("hello WORLD", (1:6, :red), (7:11, :blue))
+            source2 = astr("TEST", (1:2, :green), (3:4, :cyan))
+            @test replace(astr("foo bar"), "bar" => SubString(source2, 1:4)) ==
+                astr("foo TEST", (5:6, :green), (7:8, :cyan))
+            source3 = astr("annotation", (1:10, :emphasis))
+            @test replace(astr("replace me", (1:10, :red)), "me" => SubString(source3, 1:2)) ==
+                astr("replace an", (1:8, :red), (9:10, :emphasis))
+        end
+    end
+
+    @testset "Multiple Replacements" begin
+        @test replace(astr("foo bar baz", (1:3, :red), (5:7, :green), (9:11, :blue)),
+                     "foo" => "F", "bar" => "B", "baz" => "Z") ==
+            astr("F B Z")
+        @test replace(astr("foo bar"), "foo" => astr("F", (1:1, :red)), "bar" => "B") ==
+            astr("F B", (1:1, :red))
+        @test replace(astr("abc", (1:3, :red)), "a" => "A", "b" => "B", "c" => "C") ==
+            astr("ABC")
+        @test replace(astr("foo bar foo", (1:3, :red), (9:11, :green)),
+                     "foo" => "F", "bar" => "B", count=2) ==
+            astr("F B foo", (5:7, :green))
+        @test replace(astr("a b c", (5:5, :cyan)),
+                     "a" => astr("X", (1:1, :red)), "b" => astr("Y", (1:1, :blue))) ==
+            astr("X Y c", (5:5, :cyan), (1:1, :red), (3:3, :blue))
+        @test replace(astr("xaybzc", (2:2, :red), (4:4, :green), (6:6, :blue)),
+                     "x" => "X", "y" => "Y", "z" => "Z") ==
+            astr("XaYbZc", (2:2, :red), (4:4, :green), (6:6, :blue))
+        @test replace(astr("foo123bar", (1:3, :red), (7:9, :green)),
+                     r"(\d+)" => astr("NUM", (1:3, :blue))) ==
+            astr("fooNUMbar", (1:3, :red), (7:9, :green), (4:6, :blue))
+
+        @testset "Pattern type combinations" begin
+            @test replace(astr("a1b2c", (1:5, :red)), 'a' => "A", r"\d" => "X") ==
+                astr("AXbXc", (3:3, :red), (5:5, :red))
+            @test replace(astr("HeLLo", (1:5, :green)), isuppercase => 'x', "LL" => "ll") ==
+                astr("xexxo", (2:2, :green), (5:5, :green))
+            @test replace(astr("test123", (1:7, :blue)), isdigit => 'X', "test" => "TEST") ==
+                astr("TESTXXX")
+        end
+
+        @testset "Overlapping patterns" begin
+            @test replace(astr("aaaa", (1:4, :red)), "aa" => "b") ==
+                astr("bb")
+            @test replace(astr("abcabc", (1:3, :red), (4:6, :green)), "abc" => "X", "bc" => "Y") ==
+                astr("XX")
+        end
+
+        @testset "Count with multiple patterns" begin
+            @test replace(astr("a b a b", (1:7, :red)), "a" => "A", "b" => "B", count=3) ==
+                astr("A B A b", (2:2, :red), (4:4, :red), (6:7, :red))
+            @test replace(astr("x o x o x o", (1:11, :blue)), 'x' => "X", 'o' => "O", count=4) ==
+                astr("X O X O x o", (2:2, :blue), (4:4, :blue), (6:6, :blue), (8:11, :blue))
+        end
+    end
+
+    @testset "Edge Cases" begin
+        @testset "Boundaries" begin
+            @test replace(astr("abcdef", (4:6, :red)), "abc" => "x") ==
+                astr("xdef", (2:4, :red))
+            @test replace(astr("abcdef", (1:3, :red)), "def" => "x") ==
+                astr("abcx", (1:3, :red))
+            @test replace(astr("abcdef", (3:6, :red)), "abcd" => "X") ==
+                astr("Xef", (2:3, :red))
+            @test replace(astr("abcdef", (1:4, :red)), "cdef" => "X") ==
+                astr("abX", (1:2, :red))
+            @test replace(astr("abc", (2:2, :red)), "b" => "B") ==
+                astr("aBc")
+            @test replace(astr("abc", (3:3, :red)), "a" => "A") ==
+                astr("Abc", (3:3, :red))
+            @test replace(astr("foobar", (1:3, :red)), "foo" => "x") ==
+                astr("xbar")
+            @test replace(astr("foobar", (4:6, :red)), "bar" => "x") ==
+                astr("foox")
+        end
+
+        @testset "Empty" begin
+            @test replace(astr("hello", (1:5, :red)), "x" => "y") ==
+                astr("hello", (1:5, :red))
+            @test replace(astr(""), "x" => "y") == astr("")
+            @test replace(astr("", (1:0, :red)), "" => "x") == astr("x")
+            @test replace(astr("ab", (1:2, :red)), "" => "^") ==
+                astr("^a^b^", (2:2, :red), (4:4, :red))
+            @test replace(astr("hello", (1:5, :red)), "l" => "") ==
+                astr("heo", (1:2, :red), (3:3, :red))
+            @test replace(astr("hello world", (7:11, :green)), "hello " => astr("")) ==
+                astr("world", (1:5, :green))
+        end
+
+        @testset "Unicode" begin
+            @test replace(astr("føø bar", (1:4, :red)), "føø" => "foo") ==
+                astr("foo bar")
+            @test replace(astr("hello", (1:5, :red)), "llo" => "ḻḻø") ==
+                astr("heḻḻø", (1:2, :red))
+            @test replace(astr("foo"), "foo" => astr("ƀäṙ", (1:6, :red))) ==
+                astr("ƀäṙ", (1:6, :red))
+            @test replace(astr("a𝟏b", (1:6, :red)), "𝟏" => "1") ==
+                astr("a1b", (1:1, :red), (3:3, :red))
+            @test replace(astr("ḟøø bär", (1:12, :green)), " " => "_") ==
+                astr("ḟøø_bär", (1:7, :green), (9:12, :green))
+            @test replace(astr("a𝟏b𝟏c", (1:9, :red)), "𝟏" => "1") ==
+                astr("a1b1c", (1:1, :red), (3:3, :red))
+        end
+
+        @testset "Special characters" begin
+            @test replace(astr("a\nb", (1:3, :red)), "\n" => " ") ==
+                astr("a b", (1:1, :red), (3:3, :red))
+            @test replace(astr("a\tb", (1:3, :red)), "\t" => " ") ==
+                astr("a b", (1:1, :red), (3:3, :red))
+            @test replace(astr("a\0b", (1:3, :red)), "\0" => "x") ==
+                astr("axb", (1:1, :red), (3:3, :red))
+        end
+
+        @testset "Annotation edge cases" begin
+            @test replace(astr("hello", (1:5, :blue)), "l" => "L") ==
+                astr("heLLo", (1:2, :blue), (5:5, :blue))
+            @test replace(astr("aabb", (1:4, :red)), "a" => "x", "b" => "y") ==
+                astr("xxyy")
+            @test replace(astr("hello", (1:3, :red), (2:4, :green)), "el" => "X") ==
+                astr("hXlo", (1:1, :red), (3:3, :green))
+            str_multi = astr("test", (1:4, :red), (1:4, :en))
+            @test replace(str_multi, "test" => "ok") == astr("ok")
+            str2 = astr("a b", (1:3, :red), (1:3, :bold))
+            result = replace(str2, " " => "_")
+            @test String(result) == "a_b"
+            @test length(Base.annotations(result)) == 4
+            str3 = astr("hi world", (4:8, :red), (4:8, :bold))
+            result2 = replace(str3, "hi" => "hello")
+            @test String(result2) == "hello world"
+            @test length(Base.annotations(result2)) == 2
+
+            str_triple = astr("abc", (1:3, :red), (1:3, :bold), (1:3, :italic))
+            @test replace(str_triple, "b" => "B") ==
+                astr("aBc", (1:1, :red), (3:3, :red),
+                     (1:1, :bold), (3:3, :bold),
+                     (1:1, :italic), (3:3, :italic))
+
+            str_nested = astr("abcde", (1:5, :outer), (2:4, :middle), (3:3, :inner))
+            @test replace(str_nested, "c" => "X") ==
+                astr("abXde", (1:2, :outer), (4:5, :outer), (2:2, :middle), (4:4, :middle))
+
+            str_same_label = astr("test", (1:2, :val1), (3:4, :val2))
+            @test replace(str_same_label, "es" => "X") ==
+                astr("tXt", (1:1, :val1), (3:3, :val2))
+        end
+
+        @testset "Size variations" begin
+            @test replace(astr("hello", (1:5, :red)), "hello" => "world") ==
+                astr("world")
+            @test replace(astr("hi", (1:2, :red)), "hi" => "hello") ==
+                astr("hello")
+            @test replace(astr("hello", (1:5, :red)), "hello" => "hi") ==
+                astr("hi")
+            @test replace(astr("a b c", (1:1, :red), (3:3, :green), (5:5, :blue)), "b" => "B") ==
+                astr("a B c", (1:1, :red), (5:5, :blue))
+            @test replace(astr("hello world", (1:5, :red)), "world" => "there") ==
+                astr("hello there", (1:5, :red))
+            @test replace(astr("hello world", (7:11, :green)), "hello" => "hi") ==
+                astr("hi world", (4:8, :green))
+            @test replace(astr("hi"), "hi" => astr("hello world", (1:5, :red), (7:11, :green))) ==
+                astr("hello world", (1:5, :red), (7:11, :green))
+            @test replace(astr("hello world"), "hello world" => astr("hi", (1:2, :red))) ==
+                astr("hi", (1:2, :red))
+            @test replace(astr("aabbcc", (1:6, :red)), "a" => "x", "b" => "y", "c" => "z") ==
+                astr("xxyyzz")
+            @test replace(astr("hello", (1:5, :red)), "hello" => astr("hello", (1:5, :blue))) ==
+                astr("hello", (1:5, :blue))
+        end
+
+        @testset "Complex" begin
+            @test replace(astr("a b c d", (1:7, :red)), " " => "_") ==
+                astr("a_b_c_d", (1:1, :red), (3:3, :red), (5:5, :red), (7:7, :red))
+            annots = [(i:i, :red) for i in 1:2:9]
+            @test replace(astr("a b c d e", annots...), " " => "_") ==
+                astr("a_b_c_d_e", (1:1, :red), (3:3, :red), (5:5, :red), (7:7, :red), (9:9, :red))
+            @test replace(astr("abcdefgh", (1:8, :red)), "cd" => "X") ==
+                astr("abXefgh", (1:2, :red), (4:7, :red))
+            @test replace(astr("hello world"), "world" => "Julia") ==
+                astr("hello Julia")
+            @test replace(astr("hello", (1:5, :red)), "ello" => uppercase) ==
+                astr("hELLO", (1:1, :red))
+
+            str_code = astr("function test()", (1:8, :keyword), (10:13, :identifier))
+            @test replace(str_code, "test" => "demo", "(" => "[", ")" => "]") ==
+                astr("function demo[]", (1:8, :keyword))
+
+            str_markdown = astr("This is *bold* text", (9:13, :emphasis))
+            @test replace(str_markdown, "*" => "", "bold" => astr("BOLD", (1:4, :strong))) ==
+                astr("This is BOLD text", (9:12, :strong))
+
+            str_chain = astr("aaa", (1:3, :red))
+            str_chain = replace(str_chain, "a" => astr("b", (1:1, :green)))
+            str_chain = replace(str_chain, "b" => astr("c", (1:1, :blue)))
+            @test String(str_chain) == "ccc"
+            @test length(Base.annotations(str_chain)) == 1
+
+            str_interleaved = astr("a1b2c3", (1:1, :red), (3:3, :green), (5:5, :blue))
+            @test replace(str_interleaved, r"\d" => astr("X", (1:1, :yellow))) ==
+                astr("aXbXcX", (1:1, :red), (3:3, :green), (5:5, :blue),
+                     (2:2, :yellow), (4:4, :yellow), (6:6, :yellow))
+        end
+
+
+
+        @testset "Overlapping annotations" begin
+            # Annotations that overlap on the same text
+            str_overlap = astr("hello world", (1:5, :red), (3:9, :bold))
+            @test replace(str_overlap, "ll" => "LL") ==
+                astr("heLLo world", (1:2, :red), (5:5, :red), (5:9, :bold))
+            # Multiple overlapping annotations
+            str_multi = astr("testing", (1:7, :outer), (2:6, :middle), (3:5, :inner))
+            @test replace(str_multi, "es" => "ES") ==
+                astr("tESting", (1:1, :outer), (4:7, :outer),
+                     (4:6, :middle),
+                     (4:5, :inner))
+        end
+
+        @testset "Multiple annotations same region" begin
+            # Same region with different labels
+            str = astr("test", (1:4, :red), (1:4, :bold), (2:3, :italic))
+            @test replace(str, "es" => "ES") ==
+                astr("tESt", (1:1, :red), (4:4, :red),
+                     (1:1, :bold), (4:4, :bold))
+            # Same label, different values on different regions
+            str2 = astr("test", (1:2, :red), (3:4, :blue))
+            @test replace(str2, "es" => "ES") ==
+                astr("tESt", (1:1, :red), (4:4, :blue))
+        end
+
+        @testset "Annotation merging" begin
+            # Adjacent replacements with same annotations should merge
+            str = astr("abc", (1:3, :red))
+            result = replace(str, 'a' => astr("A", (1:1, :red)), 'b' => astr("B", (1:1, :red)), 'c' => astr("C", (1:1, :red)))
+            @test String(result) == "ABC"
+            @test Base.annotations(result) == [(region=1:3, label=:face, value=:red)]
+
+            # Non-adjacent should not merge
+            str2 = astr("axbxc", (1:5, :green))
+            result2 = replace(str2, 'a' => astr("A", (1:1, :red)), 'c' => astr("C", (1:1, :red)))
+            @test String(result2) == "AxbxC"
+            # The middle section should be green, ends should be red (not merged)
+            @test length(Base.annotations(result2)) >= 2
+        end
+
+        @testset "Pattern with itself" begin
+            # Replace pattern with itself but different annotations
+            @test replace(astr("test", (1:4, :red)), "t" => astr("t", (1:1, :blue))) ==
+                astr("test", (2:3, :red), (1:1, :blue), (4:4, :blue))
+            # Same length, same content, different annotation
+            @test replace(astr("hello", (1:5, :red)), "hello" => astr("hello", (1:5, :blue))) ==
+                astr("hello", (1:5, :blue))
+        end
+
+        @testset "Many replacements" begin
+            # Many occurrences of same pattern
+            str = astr("a" * "b"^20, (1:21, :red))
+            result = replace(str, "b" => "B")
+            @test String(result) == "a" * "B"^20
+            @test Base.annotations(result) == [(region=1:1, label=:face, value=:red)]
+
+            # Multiple different patterns
+            str2 = astr("ababababab", (1:10, :green))
+            result2 = replace(str2, "a" => "A", "b" => "B")
+            @test String(result2) == "ABABABABAB"
+            @test Base.annotations(result2) == []
+        end
+
+        @testset "Annotation spanning replacements" begin
+            # Annotation covers entire string with multiple replacements
+            @test replace(astr("a-b-c-d", (1:7, :red)), "-" => "_") ==
+                astr("a_b_c_d", (1:1, :red), (3:3, :red), (5:5, :red), (7:7, :red))
+            # Multiple replacements at different positions
+            str = astr("abcdefgh", (1:8, :blue))
+            @test replace(str, "b" => "B", "d" => "D", "f" => "F") ==
+                astr("aBcDeFgh", (1:1, :blue), (3:3, :blue), (5:5, :blue), (7:8, :blue))
+        end
+
+        @testset "edge annotation regions" begin
+            # Empty region (0:0) outside string bounds gets filtered out
+            str_empty = astr("test", (0:0, :red))
+            @test replace(str_empty, "t" => "T") == astr("TesT")
+
+            # Backward range within bounds is preserved (even though it's empty)
+            str_backward = astr("test", (3:2, :red))
+            @test replace(str_backward, "t" => "T") == astr("TesT", (3:2, :red))
+
+            # Backward range outside bounds gets filtered out
+            str_backward_out = astr("test", (5:3, :red))
+            @test replace(str_backward_out, "t" => "T") == astr("TesT")
+        end
+    end
+
+    @testset "IO" begin
+        buf = Base.AnnotatedIOBuffer()
+        replace(buf, astr("hello", (1:5, :red)), "l" => "L")
+        result = read(seekstart(buf), Base.AnnotatedString)
+        @test result == astr("heLLo", (1:2, :red), (5:5, :red))
+
+        buf = Base.AnnotatedIOBuffer()
+        replace(buf, astr("a", (1:1, :red)), "a" => "x")
+        replace(buf, astr("b", (1:1, :blue)), "b" => "y")
+        result = read(seekstart(buf), Base.AnnotatedString)
+        @test result == astr("xy")
+
+        buf = IOBuffer()
+        replace(buf, astr("hello", (1:5, :red)), "l" => "L")
+        @test String(take!(buf)) == "heLLo"
+
+        buf = Base.AnnotatedIOBuffer()
+        write(buf, "prefix ")
+        replace(buf, astr("test", (1:4, :green)), "t" => "T")
+        result = read(seekstart(buf), Base.AnnotatedString)
+        @test String(result) == "prefix TesT"
+        @test Base.annotations(result) == [(region=9:10, label=:face, value=:green)]
+
+        buf = Base.AnnotatedIOBuffer()
+        replace(buf, astr("line1", (1:5, :red)), "1" => "A")
+        write(buf, "\n")
+        replace(buf, astr("line2", (1:5, :blue)), "2" => "B")
+        result = read(seekstart(buf), Base.AnnotatedString)
+        @test String(result) == "lineA\nlineB"
+
+        buf = Base.AnnotatedIOBuffer()
+        replace(buf, astr("test", (1:4, :green)), "t" => "T")
+        truncate(buf, 4)
+        result = read(seekstart(buf), Base.AnnotatedString)
+        @test String(result) == "TesT"
+
+        @testset "Non-appending operations" begin
+            # Write, seek back, then replace (matches standard IOBuffer behavior - no truncation)
+            buf = Base.AnnotatedIOBuffer()
+            write(buf, astr("original", (1:8, :red)))
+            seekstart(buf)
+            replace(buf, astr("test", (1:4, :blue)), "t" => "T")
+            result = read(seekstart(buf), Base.AnnotatedString)
+            # Standard IOBuffer doesn't truncate, so we get "TesTinal" not "TesT"
+            @test String(result) == "TesTinal"
+            # Annotations in the replaced region should be cleared, old ones shifted
+            @test Base.annotations(result) == [(region=5:8, label=:face, value=:red),
+                                                (region=2:3, label=:face, value=:blue)]
+
+            # Multiple sequential replacements to same buffer (appending)
+            buf2 = Base.AnnotatedIOBuffer()
+            replace(buf2, astr("first", (1:5, :red)), "i" => "I")
+            write(buf2, " ")
+            replace(buf2, astr("second", (1:6, :blue)), "e" => "E")
+            result2 = read(seekstart(buf2), Base.AnnotatedString)
+            @test String(result2) == "fIrst sEcond"
+            # Check annotations are present and positioned correctly
+            red_annots = filter(a -> a.value == :red, Base.annotations(result2))
+            blue_annots = filter(a -> a.value == :blue, Base.annotations(result2))
+            @test !isempty(red_annots)
+            @test !isempty(blue_annots)
+
+            # Writing at different positions within buffer
+            buf3 = Base.AnnotatedIOBuffer()
+            write(buf3, "start ")
+            pos = position(buf3)
+            replace(buf3, astr("middle", (1:6, :green)), "d" => "D")
+            write(buf3, " end")
+            result3 = read(seekstart(buf3), Base.AnnotatedString)
+            @test String(result3) == "start miDDle end"
+            # Check that green annotation is offset correctly
+            green_annots = filter(a -> a.value == :green, Base.annotations(result3))
+            @test all(a -> first(a.region) >= pos + 1, green_annots)
+        end
+    end
+end
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
index 13f2f5197187a..c4f8c7537457e 100644
--- a/test/strings/basic.jl
+++ b/test/strings/basic.jl
@@ -49,6 +49,24 @@ using Random
     end
 end
 
+@testset "takestring!" begin
+    v = [0x61, 0x62, 0x63]
+    old_mem = v.ref.mem
+    @test takestring!(v) == "abc"
+    @test isempty(v)
+    @test v.ref.mem !== old_mem # memory is changed
+    for v in [
+        UInt8[],
+        [0x01, 0x02, 0x03],
+        collect(codeunits("æøå"))
+    ]
+        cp = copy(v)
+        s = takestring!(v)
+        @test isempty(v)
+        @test codeunits(s) == cp
+    end
+end
+
 @testset "{starts,ends}with" begin
     @test startswith("abcd", 'a')
     @test startswith('a')("abcd")
@@ -203,6 +221,12 @@ end
         @test (@views (x[3], x[1:2], x[[1,4]])) isa Tuple{Char, SubString, String}
         @test (@views (x[3], x[1:2], x[[1,4]])) == ('c', "ab", "ad")
     end
+
+    @testset ":noshift constructor" begin
+        @test SubString("", 0, 0, Val(:noshift)) == ""
+        @test SubString("abcd", 0, 1, Val(:noshift)) == "a"
+        @test SubString("abcd", 0, 4, Val(:noshift)) == "abcd"
+    end
 end
 
 
@@ -337,9 +361,7 @@ end
     @test_throws StringIndexError get(utf8_str, 2, 'X')
 end
 
-#=
-# issue #7764
-let
+@testset "issue #7764" begin
     srep = repeat("Σβ",2)
     s="Σβ"
     ss=SubString(s,1,lastindex(s))
@@ -352,16 +374,15 @@ let
     @test iterate(srep, 7) == ('β',9)
 
     @test srep[7] == 'β'
-    @test_throws BoundsError srep[8]
+    @test_throws StringIndexError srep[8]
 end
-=#
 
 # This caused JuliaLang/JSON.jl#82
 @test first('\x00':'\x7f') === '\x00'
 @test last('\x00':'\x7f') === '\x7f'
 
-# make sure substrings do not accept code unit if it is not start of codepoint
-let s = "x\u0302"
+@testset "make sure substrings do not accept code unit if it is not start of codepoint" begin
+    s = "x\u0302"
     @test s[1:2] == s
     @test_throws BoundsError s[0:3]
     @test_throws BoundsError s[1:4]
@@ -381,7 +402,7 @@ end
 end
 # test AbstractString functions at beginning of string.jl
 struct tstStringType <: AbstractString
-    data::Array{UInt8,1}
+    data::Vector{UInt8}
 end
 @testset "AbstractString functions" begin
     tstr = tstStringType(unsafe_wrap(Vector{UInt8},"12"))
@@ -427,6 +448,8 @@ end
 
     @test Symbol(gstr) === Symbol("12")
 
+    @test eltype(gstr) == Char
+    @test firstindex(gstr) == 1
     @test sizeof(gstr) == 2
     @test ncodeunits(gstr) == 2
     @test length(gstr) == 2
@@ -872,6 +895,11 @@ end
             end
         end
     end
+
+    @testset "return type infers to `Int`" begin
+        @test Int === Base.infer_return_type(prevind, Tuple{AbstractString, Vararg})
+        @test Int === Base.infer_return_type(nextind, Tuple{AbstractString, Vararg})
+    end
 end
 
 @testset "first and last" begin
@@ -1068,10 +1096,11 @@ let s = "∀x∃y", u = codeunits(s)
     @test_throws Base.CanonicalIndexError (u[1] = 0x00)
     @test collect(u) == b"∀x∃y"
     @test Base.elsize(u) == Base.elsize(typeof(u)) == 1
+    @test similar(typeof(u), 3) isa Vector{UInt8}
 end
 
-# issue #24388
-let v = unsafe_wrap(Vector{UInt8}, "abc")
+@testset "issue #24388" begin
+    v = unsafe_wrap(Vector{UInt8}, "abc")
     s = String(v)
     @test_throws BoundsError v[1]
     push!(v, UInt8('x'))
@@ -1087,6 +1116,17 @@ let v = [0x40,0x41,0x42]
     @test String(view(v, 2:3)) == "AB"
 end
 
+@testset "issue #54369" begin
+    v = Base.StringMemory(3)
+    v .= [0x41,0x42,0x43]
+    s = String(v)
+    @test s == "ABC"
+    @test v == [0x41,0x42,0x43]
+    v[1] = 0x43
+    @test s == "ABC"
+    @test v == [0x43,0x42,0x43]
+end
+
 # make sure length for identical String and AbstractString return the same value, PR #25533
 let rng = MersenneTwister(1), strs = ["∀εa∀aε"*String(rand(rng, UInt8, 100))*"∀εa∀aε",
                                    String(rand(rng, UInt8, 200))]
@@ -1099,8 +1139,8 @@ let rng = MersenneTwister(1), strs = ["∀εa∀aε"*String(rand(rng, UInt8, 100
     end
 end
 
-# conversion of SubString to the same type, issue #25525
-let x = SubString("ab", 1, 1)
+@testset "conversion of SubString to the same type, issue #25525" begin
+    x = SubString("ab", 1, 1)
     y = convert(SubString{String}, x)
     @test y === x
     chop("ab") === chop.(["ab"])[1]
@@ -1153,9 +1193,10 @@ end
     apple_uint8 = Vector{UInt8}("Apple")
     @test apple_uint8 == [0x41, 0x70, 0x70, 0x6c, 0x65]
 
-    Base.String(::tstStringType) = "Test"
+    Base.codeunit(::tstStringType) = UInt8
+    Base.codeunits(t::tstStringType) = t.data
     abstract_apple = tstStringType(apple_uint8)
-    @test hash(abstract_apple, UInt(1)) == hash("Test", UInt(1))
+    @test hash(abstract_apple, UInt(1)) == hash("Apple", UInt(1))
 
     @test length("abc", 1, 3) == length("abc", UInt(1), UInt(3))
 
@@ -1164,7 +1205,7 @@ end
     code_units = Base.CodeUnits("abc")
     @test Base.IndexStyle(Base.CodeUnits) == IndexLinear()
     @test Base.elsize(code_units) == sizeof(UInt8)
-    @test Base.unsafe_convert(Ptr{Int8}, code_units) == Base.unsafe_convert(Ptr{Int8}, code_units.s)
+    @test Base.unsafe_convert(Ptr{Int8}, Base.cconvert(Ptr{UInt8}, code_units)) == Base.unsafe_convert(Ptr{Int8}, Base.cconvert(Ptr{Int8}, code_units.s))
 end
 
 @testset "LazyString" begin
@@ -1178,6 +1219,7 @@ end
     @test codeunit(l) == UInt8
     @test codeunit(l,2) == 0x2b
     @test isvalid(l, 1)
+    @test lastindex(l) == lastindex("1+2")
     @test Base.infer_effects((Any,)) do a
         throw(lazy"a is $a")
     end |> Core.Compiler.is_foldable
@@ -1229,6 +1271,8 @@ end
         @test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
     end
     @test_throws ArgumentError Symbol("a\0a")
+
+    @test Base._string_n_override == Base.encode_effects_override(Base.compute_assumed_settings((:total, :(!:consistent))))
 end
 
 @testset "Ensure UTF-8 DFA can never leave invalid state" begin
@@ -1382,3 +1426,48 @@ end
         end
     end
 end
+
+@testset "transcode" begin
+    # string starting with an ASCII character
+    str_1 = "zβγ"
+    # string starting with a 2 byte UTF-8 character
+    str_2 = "αβγ"
+    # string starting with a 3 byte UTF-8 character
+    str_3 = "आख"
+    # string starting with a 4 byte UTF-8 character
+    str_4 = "𒃵𒃰"
+    @testset for str in (str_1, str_2, str_3, str_4)
+        @test transcode(String, str) === str
+        @test transcode(String, transcode(UInt16, str)) == str
+        @test transcode(String, transcode(UInt16, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(Int32, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(UInt32, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(UInt8, transcode(UInt16, str))) == str
+    end
+end
+
+if Sys.iswindows()
+    @testset "cwstring" begin
+        # empty string
+        str_0 = ""
+        # string with embedded NUL character
+        str_1 = "Au\000B"
+        # string with terminating NUL character
+        str_2 = "Wordu\000"
+        # "Regular" string with UTF-8 characters of differing byte counts
+        str_3 = "aܣ𒀀"
+        @test Base.cwstring(str_0) == UInt16[0x0000]
+        @test_throws ArgumentError Base.cwstring(str_1)
+        @test_throws ArgumentError Base.cwstring(str_2)
+        @test Base.cwstring(str_3) == UInt16[0x0061, 0x0723, 0xd808, 0xdc00, 0x0000]
+    end
+end
+
+
+@testset "eltype for AbstractString subtypes" begin
+    @test eltype(String) == Char
+    @test eltype(SubString{String}) == Char
+
+    u = b"hello"
+    @test eltype(u) === UInt8
+end
diff --git a/test/strings/io.jl b/test/strings/io.jl
index aed1f800d4d49..209844580b3cd 100644
--- a/test/strings/io.jl
+++ b/test/strings/io.jl
@@ -156,6 +156,20 @@
         @test "aaa \\g \\n" == unescape_string(str, ['g', 'n'])
     end
     @test Base.escape_raw_string(raw"\"\\\"\\-\\") == "\\\"\\\\\\\"\\\\-\\\\"
+    @test Base.escape_raw_string(raw"`\`\\-\\") == "\`\\\`\\\\-\\\\"
+    @test Base.escape_raw_string(raw"\"\\\"\\-\\", '`') == "\"\\\"\\\\-\\\\"
+    @test Base.escape_raw_string(raw"`\`\\-\\", '`') == "\\\`\\\\\\\`\\\\-\\\\"
+    @test Base.escape_raw_string(raw"some`string") == "some`string"
+    @test Base.escape_raw_string(raw"some\"string", '`') == "some\"string"
+    @test Base.escape_raw_string(raw"some`string\\") == "some`string\\\\"
+    @test Base.escape_raw_string(raw"some\"string\\", '`') == "some\"string\\\\"
+    @test Base.escape_raw_string(raw"some\"string") == "some\\\"string"
+    @test Base.escape_raw_string(raw"some`string", '`') == "some\\`string"
+
+    # ascii and fullhex flags:
+    @test escape_string("\u00e4\u00f6\u00fc") == "\u00e4\u00f6\u00fc"
+    @test escape_string("\u00e4\u00f6\u00fc", ascii=true) == "\\ue4\\uf6\\ufc"
+    @test escape_string("\u00e4\u00f6\u00fc", ascii=true, fullhex=true) == "\\u00e4\\u00f6\\u00fc"
 end
 @testset "join()" begin
     @test join([]) == join([],",") == ""
@@ -321,3 +335,17 @@ end
     # test empty args
     @test string() == ""
 end
+
+module StringsIOStringReturnTypesTestModule
+    struct S end
+    Base.joinpath(::S) = S()
+end
+
+@testset "`string` return types" begin
+    @test all(T -> T <: AbstractString, Base.return_types(string))
+end
+
+@testset "type stable `join` (#55389)" begin
+    itr = ("foo" for _ in 1:100)
+    @test Base.return_types(join, (typeof(itr),))[] == String
+end
diff --git a/test/strings/search.jl b/test/strings/search.jl
index d328168bfa466..9d0ea7844ec7f 100644
--- a/test/strings/search.jl
+++ b/test/strings/search.jl
@@ -4,389 +4,419 @@
 astr = "Hello, world.\n"
 u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
 
-# I think these should give error on 4 also, and "" is not treated
-# consistently with SubString("",1,1), nor with Char[]
-for ind in (0, 5)
-    @test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
-    @test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
-end
+@testset "BoundsError for findnext/findprev" begin
+    # I think these should give error on 4 also, and "" is not treated
+    # consistently with SubString("",1,1), nor with Char[]
+    for ind in (0, 5)
+        @test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
+        @test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
+    end
 
-# Note: the commented out test will be enabled after fixes to make
-# sure that findnext/findprev are consistent
-# no matter what type of AbstractString the second argument is
-@test_throws BoundsError findnext(isequal('a'), "foo", 0)
-@test_throws BoundsError findnext(in(Char[]), "foo", 5)
-# @test_throws BoundsError findprev(in(Char[]), "foo", 0)
-@test_throws BoundsError findprev(in(Char[]), "foo", 5)
+    # Note: the commented out test will be enabled after fixes to make
+    # sure that findnext/findprev are consistent
+    # no matter what type of AbstractString the second argument is
+    @test_throws BoundsError findnext(isequal('a'), "foo", 0)
+    @test_throws BoundsError findnext(in(Char[]), "foo", 5)
+    # @test_throws BoundsError findprev(in(Char[]), "foo", 0)
+    @test_throws BoundsError findprev(in(Char[]), "foo", 5)
 
-# @test_throws ErrorException in("foobar","bar")
-@test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0)
+    # @test_throws ErrorException in("foobar","bar")
+    @test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0)
+end
 
-# ascii forward search
-for str in [astr, GenericString(astr)]
+@testset "ascii forward search $(typeof(str))" for str in [astr, GenericString(astr)]
     @test_throws BoundsError findnext(isequal('z'), str, 0)
     @test_throws BoundsError findnext(isequal('∀'), str, 0)
-    @test findfirst(isequal('x'), str) == nothing
-    @test findfirst(isequal('\0'), str) == nothing
-    @test findfirst(isequal('\u80'), str) == nothing
-    @test findfirst(isequal('∀'), str) == nothing
+    @test findfirst(isequal('x'), str) === nothing
+    @test findfirst(isequal('\0'), str) === nothing
+    @test findfirst(isequal('\u80'), str) === nothing
+    @test findfirst(isequal('∀'), str) === nothing
     @test findfirst(isequal('H'), str) == 1
     @test findfirst(isequal('l'), str) == 3
     @test findnext(isequal('l'), str, 4) == 4
     @test findnext(isequal('l'), str, 5) == 11
-    @test findnext(isequal('l'), str, 12) == nothing
+    @test findnext(isequal('l'), str, 12) === nothing
     @test findfirst(isequal(','), str) == 6
-    @test findnext(isequal(','), str, 7) == nothing
+    @test findnext(isequal(','), str, 7) === nothing
     @test findfirst(isequal('\n'), str) == 14
-    @test findnext(isequal('\n'), str, 15) == nothing
+    @test findnext(isequal('\n'), str, 15) === nothing
     @test_throws BoundsError findnext(isequal('ε'), str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1)
-end
 
-for str in [astr, GenericString(astr)]
     @test_throws BoundsError findnext('z', str, 0)
     @test_throws BoundsError findnext('∀', str, 0)
-    @test findfirst('x', str) == nothing
-    @test findfirst('\0', str) == nothing
-    @test findfirst('\u80', str) == nothing
-    @test findfirst('∀', str) == nothing
+    @test findfirst('x', str) === nothing
+    @test findfirst('\0', str) === nothing
+    @test findfirst('\u80', str) === nothing
+    @test findfirst('∀', str) === nothing
     @test findfirst('H', str) == 1
     @test findfirst('l', str) == 3
     @test findfirst('e', str) == 2
-    @test findfirst('u', str) == nothing
+    @test findfirst('u', str) === nothing
     @test findnext('l', str, 4) == 4
     @test findnext('l', str, 5) == 11
-    @test findnext('l', str, 12) == nothing
+    @test findnext('l', str, 12) === nothing
     @test findfirst(',', str) == 6
-    @test findnext(',', str, 7) == nothing
+    @test findnext(',', str, 7) === nothing
     @test findfirst('\n', str) == 14
-    @test findnext('\n', str, 15) == nothing
+    @test findnext('\n', str, 15) === nothing
     @test_throws BoundsError findnext('ε', str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext('a', str, nextind(str,lastindex(str))+1)
 end
 
-# ascii backward search
-for str in [astr]
-    @test findlast(isequal('x'), str) == nothing
-    @test findlast(isequal('\0'), str) == nothing
-    @test findlast(isequal('\u80'), str) == nothing
-    @test findlast(isequal('∀'), str) == nothing
+@testset "ascii backward search" begin
+    str = astr
+    @test findlast(isequal('x'), str) === nothing
+    @test findlast(isequal('\0'), str) === nothing
+    @test findlast(isequal('\u80'), str) === nothing
+    @test findlast(isequal('∀'), str) === nothing
     @test findlast(isequal('H'), str) == 1
-    @test findprev(isequal('H'), str, 0) == nothing
+    @test findprev(isequal('H'), str, 0) === nothing
     @test findlast(isequal('l'), str) == 11
     @test findprev(isequal('l'), str, 5) == 4
     @test findprev(isequal('l'), str, 4) == 4
     @test findprev(isequal('l'), str, 3) == 3
-    @test findprev(isequal('l'), str, 2) == nothing
+    @test findprev(isequal('l'), str, 2) === nothing
     @test findlast(isequal(','), str) == 6
-    @test findprev(isequal(','), str, 5) == nothing
+    @test findprev(isequal(','), str, 5) === nothing
     @test findlast(isequal('\n'), str) == 14
-end
 
-for str in [astr]
-    @test findlast('x', str) == nothing
-    @test findlast('\0', str) == nothing
-    @test findlast('\u80', str) == nothing
-    @test findlast('∀', str) == nothing
+    @test findlast('x', str) === nothing
+    @test findlast('\0', str) === nothing
+    @test findlast('\u80', str) === nothing
+    @test findlast('∀', str) === nothing
     @test findlast('H', str) == 1
-    @test findprev('H', str, 0) == nothing
+    @test findprev('H', str, 0) === nothing
     @test findlast('l', str) == 11
     @test findprev('l', str, 5) == 4
     @test findprev('l', str, 4) == 4
     @test findprev('l', str, 3) == 3
-    @test findprev('l', str, 2) == nothing
+    @test findprev('l', str, 2) === nothing
     @test findlast(',', str) == 6
-    @test findprev(',', str, 5) == nothing
-    @test findlast(str, "") == nothing
-    @test findlast(str^2, str) == nothing
+    @test findprev(',', str, 5) === nothing
+    @test findlast(str, "") === nothing
+    @test findlast(str^2, str) === nothing
     @test findlast('\n', str) == 14
 end
 
-# utf-8 forward search
-for str in (u8str, GenericString(u8str))
+@testset "utf-8 forward search $(typeof(str))" for str in (u8str, GenericString(u8str))
     @test_throws BoundsError findnext(isequal('z'), str, 0)
     @test_throws BoundsError findnext(isequal('∀'), str, 0)
-    @test findfirst(isequal('z'), str) == nothing
-    @test findfirst(isequal('\0'), str) == nothing
-    @test findfirst(isequal('\u80'), str) == nothing
-    @test findfirst(isequal('∄'), str) == nothing
+    @test findfirst(isequal('z'), str) === nothing
+    @test findfirst(isequal('\0'), str) === nothing
+    @test findfirst(isequal('\u80'), str) === nothing
+    @test findfirst(isequal('∄'), str) === nothing
     @test findfirst(isequal('∀'), str) == 1
     @test_throws StringIndexError findnext(isequal('∀'), str, 2)
-    @test findnext(isequal('∀'), str, 4) == nothing
+    @test findnext(isequal('∀'), str, 4) === nothing
     @test findfirst(isequal('∃'), str) == 13
     @test_throws StringIndexError findnext(isequal('∃'), str, 15)
-    @test findnext(isequal('∃'), str, 16) == nothing
+    @test findnext(isequal('∃'), str, 16) === nothing
     @test findfirst(isequal('x'), str) == 26
     @test findnext(isequal('x'), str, 27) == 43
-    @test findnext(isequal('x'), str, 44) == nothing
+    @test findnext(isequal('x'), str, 44) === nothing
     @test findfirst(isequal('δ'), str) == 17
     @test_throws StringIndexError findnext(isequal('δ'), str, 18)
     @test findnext(isequal('δ'), str, nextind(str,17)) == 33
-    @test findnext(isequal('δ'), str, nextind(str,33)) == nothing
+    @test findnext(isequal('δ'), str, nextind(str,33)) === nothing
     @test findfirst(isequal('ε'), str) == 5
     @test findnext(isequal('ε'), str, nextind(str,5)) == 54
-    @test findnext(isequal('ε'), str, nextind(str,54)) == nothing
-    @test findnext(isequal('ε'), str, nextind(str,lastindex(str))) == nothing
-    @test findnext(isequal('a'), str, nextind(str,lastindex(str))) == nothing
+    @test findnext(isequal('ε'), str, nextind(str,54)) === nothing
+    @test findnext(isequal('ε'), str, nextind(str,lastindex(str))) === nothing
+    @test findnext(isequal('a'), str, nextind(str,lastindex(str))) === nothing
     @test_throws BoundsError findnext(isequal('ε'), str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1)
 end
 
-# utf-8 backward search
-for str in [u8str]
-    @test findlast(isequal('z'), str) == nothing
-    @test findlast(isequal('\0'), str) == nothing
-    @test findlast(isequal('\u80'), str) == nothing
-    @test findlast(isequal('∄'), str) == nothing
+@testset "utf-8 backward search" begin
+    str = u8str
+    @test findlast(isequal('z'), str) === nothing
+    @test findlast(isequal('\0'), str) === nothing
+    @test findlast(isequal('\u80'), str) === nothing
+    @test findlast(isequal('∄'), str) === nothing
     @test findlast(isequal('∀'), str) == 1
-    @test findprev(isequal('∀'), str, 0) == nothing
+    @test findprev(isequal('∀'), str, 0) === nothing
     @test findlast(isequal('∃'), str) == 13
     @test findprev(isequal('∃'), str, 14) == 13
     @test findprev(isequal('∃'), str, 13) == 13
-    @test findprev(isequal('∃'), str, 12) == nothing
+    @test findprev(isequal('∃'), str, 12) === nothing
     @test findlast(isequal('x'), str) == 43
     @test findprev(isequal('x'), str, 42) == 26
-    @test findprev(isequal('x'), str, 25) == nothing
+    @test findprev(isequal('x'), str, 25) === nothing
     @test findlast(isequal('δ'), str) == 33
     @test findprev(isequal('δ'), str, 32) == 17
-    @test findprev(isequal('δ'), str, 16) == nothing
+    @test findprev(isequal('δ'), str, 16) === nothing
     @test findlast(isequal('ε'), str) == 54
     @test findprev(isequal('ε'), str, 53) == 5
-    @test findprev(isequal('ε'), str, 4) == nothing
+    @test findprev(isequal('ε'), str, 4) === nothing
+end
+
+@testset "string forward search with a single-char string" begin
+    @test findfirst("x", astr) === nothing
+    @test findfirst("H", astr) == 1:1
+    @test findnext("H", astr, 2) === nothing
+    @test findfirst("l", astr) == 3:3
+    @test findnext("l", astr, 4) == 4:4
+    @test findnext("l", astr, 5) == 11:11
+    @test findnext("l", astr, 12) === nothing
+    @test findfirst("\n", astr) == 14:14
+    @test findnext("\n", astr, 15) === nothing
+
+    @test findfirst("z", u8str) === nothing
+    @test findfirst("∄", u8str) === nothing
+    @test findfirst("∀", u8str) == 1:1
+    @test findnext("∀", u8str, 4) === nothing
+    @test findfirst("∃", u8str) == 13:13
+    @test findnext("∃", u8str, 16) === nothing
+    @test findfirst("x", u8str) == 26:26
+    @test findnext("x", u8str, 27) == 43:43
+    @test findnext("x", u8str, 44) === nothing
+    @test findfirst("ε", u8str) == 5:5
+    @test findnext("ε", u8str, 7) == 54:54
+    @test findnext("ε", u8str, 56) === nothing
 end
 
-# string forward search with a single-char string
-@test findfirst("x", astr) == nothing
-@test findfirst("H", astr) == 1:1
-@test findnext("H", astr, 2) == nothing
-@test findfirst("l", astr) == 3:3
-@test findnext("l", astr, 4) == 4:4
-@test findnext("l", astr, 5) == 11:11
-@test findnext("l", astr, 12) == nothing
-@test findfirst("\n", astr) == 14:14
-@test findnext("\n", astr, 15) == nothing
-
-@test findfirst("z", u8str) == nothing
-@test findfirst("∄", u8str) == nothing
-@test findfirst("∀", u8str) == 1:1
-@test findnext("∀", u8str, 4) == nothing
-@test findfirst("∃", u8str) == 13:13
-@test findnext("∃", u8str, 16) == nothing
-@test findfirst("x", u8str) == 26:26
-@test findnext("x", u8str, 27) == 43:43
-@test findnext("x", u8str, 44) == nothing
-@test findfirst("ε", u8str) == 5:5
-@test findnext("ε", u8str, 7) == 54:54
-@test findnext("ε", u8str, 56) == nothing
-
-# strifindprev  backward search with a single-char string
-@test findlast("x", astr) == nothing
-@test findlast("H", astr) == 1:1
-@test findprev("H", astr, 2) == 1:1
-@test findprev("H", astr, 0) == nothing
-@test findlast("l", astr) == 11:11
-@test findprev("l", astr, 10) == 4:4
-@test findprev("l", astr, 4) == 4:4
-@test findprev("l", astr, 3) == 3:3
-@test findprev("l", astr, 2) == nothing
-@test findlast("\n", astr) == 14:14
-@test findprev("\n", astr, 13) == nothing
-
-@test findlast("z", u8str) == nothing
-@test findlast("∄", u8str) == nothing
-@test findlast("∀", u8str) == 1:1
-@test findprev("∀", u8str, 0) == nothing
-#TODO: setting the limit in the middle of a wide char
-#      makes findnext fail but findprev succeed.
-#      Should findprev fail as well?
-#@test findprev("∀", u8str, 2) == nothing # gives 1:3
-@test findlast("∃", u8str) == 13:13
-@test findprev("∃", u8str, 12) == nothing
-@test findlast("x", u8str) == 43:43
-@test findprev("x", u8str, 42) == 26:26
-@test findprev("x", u8str, 25) == nothing
-@test findlast("ε", u8str) == 54:54
-@test findprev("ε", u8str, 53) == 5:5
-@test findprev("ε", u8str, 4) == nothing
-
-# string forward search with a single-char regex
-@test findfirst(r"x", astr) == nothing
-@test findfirst(r"H", astr) == 1:1
-@test findnext(r"H", astr, 2) == nothing
-@test findfirst(r"l", astr) == 3:3
-@test findnext(r"l", astr, 4) == 4:4
-@test findnext(r"l", astr, 5) == 11:11
-@test findnext(r"l", astr, 12) == nothing
-@test findfirst(r"\n", astr) == 14:14
-@test findnext(r"\n", astr, 15) == nothing
-@test findfirst(r"z", u8str) == nothing
-@test findfirst(r"∄", u8str) == nothing
-@test findfirst(r"∀", u8str) == 1:1
-@test findnext(r"∀", u8str, 4) == nothing
-@test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str)
-@test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4)
-@test findfirst(r"∃", u8str) == 13:13
-@test findnext(r"∃", u8str, 16) == nothing
-@test findfirst(r"x", u8str) == 26:26
-@test findnext(r"x", u8str, 27) == 43:43
-@test findnext(r"x", u8str, 44) == nothing
-@test findfirst(r"ε", u8str) == 5:5
-@test findnext(r"ε", u8str, 7) == 54:54
-@test findnext(r"ε", u8str, 56) == nothing
-for i = 1:lastindex(astr)
-    @test findnext(r"."s, astr, i) == i:i
+@testset "findprev backward search with a single-char string" begin
+    @test findlast("x", astr) === nothing
+    @test findlast("H", astr) == 1:1
+    @test findprev("H", astr, 2) == 1:1
+    @test findprev("H", astr, 0) === nothing
+    @test findlast("l", astr) == 11:11
+    @test findprev("l", astr, 10) == 4:4
+    @test findprev("l", astr, 4) == 4:4
+    @test findprev("l", astr, 3) == 3:3
+    @test findprev("l", astr, 2) === nothing
+    @test findlast("\n", astr) == 14:14
+    @test findprev("\n", astr, 13) === nothing
+
+    @test findlast("z", u8str) === nothing
+    @test findlast("∄", u8str) === nothing
+    @test findlast("∀", u8str) == 1:1
+    @test findprev("∀", u8str, 0) === nothing
+    #TODO: setting the limit in the middle of a wide char
+    #      makes findnext fail but findprev succeed.
+    #      Should findprev fail as well?
+    #@test findprev("∀", u8str, 2) === nothing # gives 1:3
+    @test findlast("∃", u8str) == 13:13
+    @test findprev("∃", u8str, 12) === nothing
+    @test findlast("x", u8str) == 43:43
+    @test findprev("x", u8str, 42) == 26:26
+    @test findprev("x", u8str, 25) === nothing
+    @test findlast("ε", u8str) == 54:54
+    @test findprev("ε", u8str, 53) == 5:5
+    @test findprev("ε", u8str, 4) === nothing
 end
-for i = 1:lastindex(u8str)
-    if isvalid(u8str,i)
-        @test findnext(r"."s, u8str, i) == i:i
+
+@testset "string forward search with a single-char regex" begin
+    @test findfirst(r"x", astr) === nothing
+    @test findfirst(r"H", astr) == 1:1
+    @test findnext(r"H", astr, 2) === nothing
+    @test findfirst(r"l", astr) == 3:3
+    @test findnext(r"l", astr, 4) == 4:4
+    @test findnext(r"l", astr, 5) == 11:11
+    @test findnext(r"l", astr, 12) === nothing
+    @test findfirst(r"\n", astr) == 14:14
+    @test findnext(r"\n", astr, 15) === nothing
+    @test findfirst(r"z", u8str) === nothing
+    @test findfirst(r"∄", u8str) === nothing
+    @test findfirst(r"∀", u8str) == 1:1
+    @test findnext(r"∀", u8str, 4) === nothing
+    @test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str)
+    @test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4)
+    @test findfirst(r"∃", u8str) == 13:13
+    @test findnext(r"∃", u8str, 16) === nothing
+    @test findfirst(r"x", u8str) == 26:26
+    @test findnext(r"x", u8str, 27) == 43:43
+    @test findnext(r"x", u8str, 44) === nothing
+    @test findfirst(r"ε", u8str) == 5:5
+    @test findnext(r"ε", u8str, 7) == 54:54
+    @test findnext(r"ε", u8str, 56) === nothing
+    for i = 1:lastindex(astr)
+        @test findnext(r"."s, astr, i) == i:i
+    end
+    for i = 1:lastindex(u8str)
+        if isvalid(u8str,i)
+            @test findnext(r"."s, u8str, i) == i:i
+        end
+    end
+end
+
+@testset "string forward search with a zero-char string" begin
+    for i = 1:lastindex(astr)
+        @test findnext("", astr, i) == i:i-1
     end
+    for i = 1:lastindex(u8str)
+        @test findnext("", u8str, i) == i:i-1
+    end
+    @test findfirst("", "") === 1:0
 end
 
-# string forward search with a zero-char string
-for i = 1:lastindex(astr)
-    @test findnext("", astr, i) == i:i-1
+@testset "string backward search with a zero-char string" begin
+    for i = 1:lastindex(astr)
+        @test findprev("", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        @test findprev("", u8str, i) == i:i-1
+    end
+    @test findlast("", "") === 1:0
 end
-for i = 1:lastindex(u8str)
-    @test findnext("", u8str, i) == i:i-1
+
+@testset "string forward search with a zero-char regex" begin
+    for i = 1:lastindex(astr)
+        @test findnext(r"", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        # TODO: should regex search fast-forward invalid indices?
+        if isvalid(u8str,i)
+            @test findnext(r"", u8str, i) == i:i-1
+        end
+    end
 end
-@test findfirst("", "") === 1:0
 
-# string backward search with a zero-char string
-for i = 1:lastindex(astr)
-    @test findprev("", astr, i) == i:i-1
+# See the comments in #54579
+@testset "Search for invalid chars" begin
+    @test findfirst(==('\xff'), "abc\xffde") == 4
+    @test findprev(isequal('\xa6'), "abc\xa69", 5) == 4
+    @test isnothing(findfirst(==('\xff'), "abcdeæd"))
+
+    @test isnothing(findnext(==('\xa6'), "æ", 1))
+    @test isnothing(findprev(==('\xa6'), "æa", 2))
 end
-for i = 1:lastindex(u8str)
-    @test findprev("", u8str, i) == i:i-1
+
+@testset "string forward search with a two-char string literal" begin
+    @test findfirst("xx", "foo,bar,baz") === nothing
+    @test findfirst("fo", "foo,bar,baz") == 1:2
+    @test findnext("fo", "foo,bar,baz", 3) === nothing
+    @test findfirst("oo", "foo,bar,baz") == 2:3
+    @test findnext("oo", "foo,bar,baz", 4) === nothing
+    @test findfirst("o,", "foo,bar,baz") == 3:4
+    @test findnext("o,", "foo,bar,baz", 5) === nothing
+    @test findfirst(",b", "foo,bar,baz") == 4:5
+    @test findnext(",b", "foo,bar,baz", 6) == 8:9
+    @test findnext(",b", "foo,bar,baz", 10) === nothing
+    @test findfirst("az", "foo,bar,baz") == 10:11
+    @test findnext("az", "foo,bar,baz", 12) === nothing
 end
-@test findlast("", "") === 1:0
 
-# string forward search with a zero-char regex
-for i = 1:lastindex(astr)
-    @test findnext(r"", astr, i) == i:i-1
+# See the comments in #54579
+@testset "Search for invalid chars" begin
+    @test findfirst(==('\xff'), "abc\xffde") == 4
+    @test findprev(isequal('\xa6'), "abc\xa69", 5) == 4
+    @test isnothing(findfirst(==('\xff'), "abcdeæd"))
+
+    @test isnothing(findnext(==('\xa6'), "æ", 1))
+    @test isnothing(findprev(==('\xa6'), "æa", 2))
 end
-for i = 1:lastindex(u8str)
-    # TODO: should regex search fast-forward invalid indices?
-    if isvalid(u8str,i)
-        @test findnext(r"", u8str, i) == i:i-1
-    end
+
+@testset "issue #9365" begin
+    # string forward search with a two-char UTF-8 (2 byte) string literal
+    @test findfirst("éé", "ééé") == 1:3
+    @test findnext("éé", "ééé", 1) == 1:3
+    # string forward search with a two-char UTF-8 (3 byte) string literal
+    @test findfirst("€€", "€€€") == 1:4
+    @test findnext("€€", "€€€", 1) == 1:4
+    # string forward search with a two-char UTF-8 (4 byte) string literal
+    @test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5
+    @test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5
+
+    # string forward search with a two-char UTF-8 (2 byte) string literal
+    @test findfirst("éé", "éé") == 1:3
+    @test findnext("éé", "éé", 1) == 1:3
+    # string forward search with a two-char UTF-8 (3 byte) string literal
+    @test findfirst("€€", "€€") == 1:4
+    @test findnext("€€", "€€", 1) == 1:4
+    # string forward search with a two-char UTF-8 (4 byte) string literal
+    @test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
+    @test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
+
+    # string backward search with a two-char UTF-8 (2 byte) string literal
+    @test findlast("éé", "ééé") == 3:5
+    @test findprev("éé", "ééé", lastindex("ééé")) == 3:5
+    # string backward search with a two-char UTF-8 (3 byte) string literal
+    @test findlast("€€", "€€€") == 4:7
+    @test findprev("€€", "€€€", lastindex("€€€")) == 4:7
+    # string backward search with a two-char UTF-8 (4 byte) string literal
+    @test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9
+    @test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9
+
+    # string backward search with a two-char UTF-8 (2 byte) string literal
+    @test findlast("éé", "éé") == 1:3        # should really be 1:4!
+    @test findprev("éé", "éé", lastindex("ééé")) == 1:3
+    # string backward search with a two-char UTF-8 (3 byte) string literal
+    @test findlast("€€", "€€") == 1:4        # should really be 1:6!
+    @test findprev("€€", "€€", lastindex("€€€")) == 1:4
+    # string backward search with a two-char UTF-8 (4 byte) string literal
+    @test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
+    @test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5
 end
 
-# string forward search with a two-char string literal
-@test findfirst("xx", "foo,bar,baz") == nothing
-@test findfirst("fo", "foo,bar,baz") == 1:2
-@test findnext("fo", "foo,bar,baz", 3) == nothing
-@test findfirst("oo", "foo,bar,baz") == 2:3
-@test findnext("oo", "foo,bar,baz", 4) == nothing
-@test findfirst("o,", "foo,bar,baz") == 3:4
-@test findnext("o,", "foo,bar,baz", 5) == nothing
-@test findfirst(",b", "foo,bar,baz") == 4:5
-@test findnext(",b", "foo,bar,baz", 6) == 8:9
-@test findnext(",b", "foo,bar,baz", 10) == nothing
-@test findfirst("az", "foo,bar,baz") == 10:11
-@test findnext("az", "foo,bar,baz", 12) == nothing
-
-# issue #9365
-# string forward search with a two-char UTF-8 (2 byte) string literal
-@test findfirst("éé", "ééé") == 1:3
-@test findnext("éé", "ééé", 1) == 1:3
-# string forward search with a two-char UTF-8 (3 byte) string literal
-@test findfirst("€€", "€€€") == 1:4
-@test findnext("€€", "€€€", 1) == 1:4
-# string forward search with a two-char UTF-8 (4 byte) string literal
-@test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5
-@test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5
-
-# string forward search with a two-char UTF-8 (2 byte) string literal
-@test findfirst("éé", "éé") == 1:3
-@test findnext("éé", "éé", 1) == 1:3
-# string forward search with a two-char UTF-8 (3 byte) string literal
-@test findfirst("€€", "€€") == 1:4
-@test findnext("€€", "€€", 1) == 1:4
-# string forward search with a two-char UTF-8 (4 byte) string literal
-@test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
-@test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
-
-# string backward search with a two-char UTF-8 (2 byte) string literal
-@test findlast("éé", "ééé") == 3:5
-@test findprev("éé", "ééé", lastindex("ééé")) == 3:5
-# string backward search with a two-char UTF-8 (3 byte) string literal
-@test findlast("€€", "€€€") == 4:7
-@test findprev("€€", "€€€", lastindex("€€€")) == 4:7
-# string backward search with a two-char UTF-8 (4 byte) string literal
-@test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9
-@test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9
-
-# string backward search with a two-char UTF-8 (2 byte) string literal
-@test findlast("éé", "éé") == 1:3        # should really be 1:4!
-@test findprev("éé", "éé", lastindex("ééé")) == 1:3
-# string backward search with a two-char UTF-8 (3 byte) string literal
-@test findlast("€€", "€€") == 1:4        # should really be 1:6!
-@test findprev("€€", "€€", lastindex("€€€")) == 1:4
-# string backward search with a two-char UTF-8 (4 byte) string literal
-@test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
-@test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5
-
-# string backward search with a two-char string literal
-@test findlast("xx", "foo,bar,baz") == nothing
-@test findlast("fo", "foo,bar,baz") == 1:2
-@test findprev("fo", "foo,bar,baz", 1) == nothing
-@test findlast("oo", "foo,bar,baz") == 2:3
-@test findprev("oo", "foo,bar,baz", 2) == nothing
-@test findlast("o,", "foo,bar,baz") == 3:4
-@test findprev("o,", "foo,bar,baz", 1) == nothing
-@test findlast(",b", "foo,bar,baz") == 8:9
-@test findprev(",b", "foo,bar,baz", 6) == 4:5
-@test findprev(",b", "foo,bar,baz", 3) == nothing
-@test findlast("az", "foo,bar,baz") == 10:11
-@test findprev("az", "foo,bar,baz", 10) == nothing
-
-# string search with a two-char regex
-@test findfirst(r"xx", "foo,bar,baz") == nothing
-@test findfirst(r"fo", "foo,bar,baz") == 1:2
-@test findnext(r"fo", "foo,bar,baz", 3) == nothing
-@test findfirst(r"oo", "foo,bar,baz") == 2:3
-@test findnext(r"oo", "foo,bar,baz", 4) == nothing
-@test findfirst(r"o,", "foo,bar,baz") == 3:4
-@test findnext(r"o,", "foo,bar,baz", 5) == nothing
-@test findfirst(r",b", "foo,bar,baz") == 4:5
-@test findnext(r",b", "foo,bar,baz", 6) == 8:9
-@test findnext(r",b", "foo,bar,baz", 10) == nothing
-@test findfirst(r"az", "foo,bar,baz") == 10:11
-@test findnext(r"az", "foo,bar,baz", 12) == nothing
-
-# occursin with a String and Char needle
-@test occursin("o", "foo")
-@test occursin('o', "foo")
-# occursin in curried form
-@test occursin("foo")("o")
-@test occursin("foo")('o')
-
-# contains
-@test contains("foo", "o")
-@test contains("foo", 'o')
-# contains in curried form
-@test contains("o")("foo")
-@test contains('o')("foo")
-
-@test_throws ErrorException "ab" ∈ "abc"
-
-# issue #15723
-@test findfirst(isequal('('), "⨳(") == 4
-@test findnext(isequal('('), "(⨳(", 2) == 5
-@test findlast(isequal('('), "(⨳(") == 5
-@test findprev(isequal('('), "(⨳(", 2) == 1
-
-@test @inferred findall(isequal('a'), "éa") == [3]
-@test @inferred findall(isequal('€'), "€€") == [1, 4]
-@test @inferred isempty(findall(isequal('é'), ""))
-
-# issue #18109
-s_18109 = "fooα🐨βcd3"
-@test findlast(isequal('o'), s_18109) == 3
-@test findfirst(isequal('d'), s_18109) == 13
-
-# findall (issue #31788)
-@testset "findall" begin
+@testset "string backward search with a two-char string literal" begin
+    @test findlast("xx", "foo,bar,baz") === nothing
+    @test findlast("fo", "foo,bar,baz") == 1:2
+    @test findprev("fo", "foo,bar,baz", 1) === nothing
+    @test findlast("oo", "foo,bar,baz") == 2:3
+    @test findprev("oo", "foo,bar,baz", 2) === nothing
+    @test findlast("o,", "foo,bar,baz") == 3:4
+    @test findprev("o,", "foo,bar,baz", 1) === nothing
+    @test findlast(",b", "foo,bar,baz") == 8:9
+    @test findprev(",b", "foo,bar,baz", 6) == 4:5
+    @test findprev(",b", "foo,bar,baz", 3) === nothing
+    @test findlast("az", "foo,bar,baz") == 10:11
+    @test findprev("az", "foo,bar,baz", 10) === nothing
+end
+
+@testset "string search with a two-char regex" begin
+    @test findfirst(r"xx", "foo,bar,baz") === nothing
+    @test findfirst(r"fo", "foo,bar,baz") == 1:2
+    @test findnext(r"fo", "foo,bar,baz", 3) === nothing
+    @test findfirst(r"oo", "foo,bar,baz") == 2:3
+    @test findnext(r"oo", "foo,bar,baz", 4) === nothing
+    @test findfirst(r"o,", "foo,bar,baz") == 3:4
+    @test findnext(r"o,", "foo,bar,baz", 5) === nothing
+    @test findfirst(r",b", "foo,bar,baz") == 4:5
+    @test findnext(r",b", "foo,bar,baz", 6) == 8:9
+    @test findnext(r",b", "foo,bar,baz", 10) === nothing
+    @test findfirst(r"az", "foo,bar,baz") == 10:11
+    @test findnext(r"az", "foo,bar,baz", 12) === nothing
+end
+
+@testset "occursin/contains" begin
+    # occursin with a String and Char needle
+    @test occursin("o", "foo")
+    @test occursin('o', "foo")
+    # occursin in curried form
+    @test occursin("foo")("o")
+    @test occursin("foo")('o')
+
+    # contains
+    @test contains("foo", "o")
+    @test contains("foo", 'o')
+    # contains in curried form
+    @test contains("o")("foo")
+    @test contains('o')("foo")
+
+    @test_throws ErrorException "ab" ∈ "abc"
+end
+
+@testset "issue #15723" begin
+    @test findfirst(isequal('('), "⨳(") == 4
+    @test findnext(isequal('('), "(⨳(", 2) == 5
+    @test findlast(isequal('('), "(⨳(") == 5
+    @test findprev(isequal('('), "(⨳(", 2) == 1
+
+    @test @inferred findall(isequal('a'), "éa") == [3]
+    @test @inferred findall(isequal('€'), "€€") == [1, 4]
+    @test @inferred isempty(findall(isequal('é'), ""))
+end
+
+
+@testset "issue #18109" begin
+    s_18109 = "fooα🐨βcd3"
+    @test findlast(isequal('o'), s_18109) == 3
+    @test findfirst(isequal('d'), s_18109) == 13
+end
+
+@testset "findall (issue #31788)" begin
     @test findall("fooo", "foo") == UnitRange{Int}[]
     @test findall("ing", "Spinning laughing dancing") == [6:8, 15:17, 23:25]
     @test all(findall("", "foo") .=== [1:0, 2:1, 3:2, 4:3]) # use === to compare empty ranges
@@ -395,13 +425,29 @@ s_18109 = "fooα🐨βcd3"
     @test findall("aa", "aaaaaa", overlap=true) == [1:2, 2:3, 3:4, 4:5, 5:6]
 end
 
+@testset "Findall char in string" begin
+    @test findall(==('w'), "wabcwewwawk") == [1, 5, 7, 8, 10]
+    @test isempty(findall(isequal("w"), "abcde!,"))
+    @test findall(==('读'), "联国读大会一九四二月十读日第号决通过并颁布读") == [7, 34, 64]
+
+    # Empty string
+    @test isempty(findall(isequal('K'), ""))
+    @test isempty(findall(isequal('α'), ""))
+
+    # Finds an invalid char ONLY if it's at a char boundary in the string,
+    # i.e. iterating the string would emit the given char.
+    @test findall(==('\xfe'), "abκæøc\xfeα\xfeβå!") == [10, 13]
+    @test isempty(findall(==('\xaf'), "abκæ读α\xe8\xaf\xfeβå!"))
+    @test isempty(findall(==('\xc3'), ";æ"))
+end
+
 # issue 37280
 @testset "UInt8, Int8 vector" begin
     for T in [Int8, UInt8], VT in [Int8, UInt8]
         A = T[0x40, 0x52, 0x00, 0x52, 0x00]
 
         for A in (A, @view(A[1:end]), codeunits(String(copyto!(Vector{UInt8}(undef,5), A))))
-            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) == nothing
+            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) === nothing
             @test findfirst(VT[0x52], A) === 2:2
             @test findfirst(==(VT(0x52)), A) === 2
             @test findlast(VT[0x30], A) === findlast(==(VT(0x30)), A) === nothing
@@ -429,6 +475,74 @@ end
             @test_throws BoundsError findprev(pattern, A, -3)
         end
     end
+
+    @test findall([0x01, 0x02], [0x03, 0x01, 0x02, 0x01, 0x02, 0x06]) == [2:3, 4:5]
+    @test isempty(findall([0x04, 0x05], [0x03, 0x04, 0x06]))
+end
+
+# Issue 54578
+@testset "No conflation of Int8 and UInt8" begin
+    # Work for mixed types if the values are the same
+    @test findfirst(==(Int8(1)), [0x01]) == 1
+    @test findnext(iszero, Int8[0, -2, 0, -3], 2) == 3
+    @test findfirst(Int8[1,4], UInt8[0, 2, 4, 1, 8, 1, 4, 2]) == 6:7
+    @test findprev(UInt8[5, 6], Int8[1, 9, 2, 5, 6, 3], 6) == 4:5
+
+    # Returns nothing for the same methods if the values are different,
+    # even if the bitpatterns are the same
+    @test isnothing(findfirst(==(Int8(-1)), [0xff]))
+    @test isnothing(findnext(isequal(0xff), Int8[-1, -2, -1], 2))
+    @test isnothing(findfirst(UInt8[0xff, 0xfe], Int8[0, -1, -2, 1, 8, 1, 4, 2]))
+    @test isnothing(findprev(UInt8[0xff, 0xfe], Int8[1, 9, 2, -1, -2, 3], 6))
+end
+
+@testset "DenseArray with offsets" begin
+    isdefined(Main, :OffsetDenseArrays) || @eval Main include("../testhelpers/OffsetDenseArrays.jl")
+    OffsetDenseArrays = Main.OffsetDenseArrays
+
+    A = OffsetDenseArrays.OffsetDenseArray(collect(0x61:0x69), 100)
+    @test findfirst(==(0x61), A) == 101
+    @test findlast(==(0x61), A) == 101
+    @test findfirst(==(0x00), A) === nothing
+
+    @test findfirst([0x62, 0x63, 0x64], A) == 102:104
+    @test findlast([0x63, 0x64], A) == 103:104
+    @test findall([0x62, 0x63], A) == [102:103]
+
+    @test findfirst(iszero, A) === nothing
+    A = OffsetDenseArrays.OffsetDenseArray([0x01, 0x02, 0x00, 0x03], -100)
+    @test findfirst(iszero, A) == -97
+    @test findnext(==(0x02), A, -99) == -98
+    @test findnext(==(0x02), A, -97) === nothing
+end
+
+# NOTE: The strange edge cases are tested for here, but that does not mean
+# they are intentional. Ideally, the behaviour should be changed. See issue 54584
+@testset "Edge behaviours of findnext/last" begin
+    # Empty haystack causes no errors
+    @test isempty(findall(==('\x00'), ""))
+
+    # Findnext errors when i is not a valid index, findprev does not
+    @test_throws StringIndexError findnext(==('a'), "æøå", 2)
+    @test findprev(==('æ'), "æøå", 4) == 1
+
+    # Findnext errors when i < 1 or i > ncodeunits(s) + 1
+    @test_throws BoundsError findnext(==('a'), "abc", 0)
+    @test_throws BoundsError findnext(==('a'), "abc", -1)
+    @test_throws BoundsError findnext(==('a'), "abc", 5)
+    @test_throws BoundsError findnext(==('a'), "æøå", 8)
+    @test findnext(==('a'), "æøå", 7) === nothing
+
+    # Findprev errors when i > ncodeunits(s) + 1 or i < 0
+    @test findprev(==('a'), "abc", 0) === nothing
+    @test findprev(==('æ'), "æøå", 5) == 1
+    @test_throws BoundsError findprev(==('a'), "abc", -1)
+    @test_throws BoundsError findprev(==('æ'), "æøå", 8)
+
+    # Findprev returns nothing when i == ncodeunits(s) + 1
+    @test findprev(==('æ'), "æøå", 7) === nothing
+    @test findprev(==('a'), "abc", 4) === nothing
+    @test findprev(==('a'), "abc", 3) == 1
 end
 
 # issue 32568
diff --git a/test/strings/types.jl b/test/strings/types.jl
index 771be253b1ec9..c09652c3a608d 100644
--- a/test/strings/types.jl
+++ b/test/strings/types.jl
@@ -2,196 +2,211 @@
 
 ## SubString and Cstring tests ##
 
-## SubString tests ##
-u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
-u8str2 = u8str^2
-len_u8str = length(u8str)
-slen_u8str = length(u8str)
-len_u8str2 = length(u8str2)
-slen_u8str2 = length(u8str2)
-
-@test len_u8str2 == 2 * len_u8str
-@test slen_u8str2 == 2 * slen_u8str
-
-u8str2plain = String(u8str2)
-
-for i1 = 1:length(u8str2)
-    if !isvalid(u8str2, i1); continue; end
-    for i2 = i1:length(u8str2)
-        if !isvalid(u8str2, i2); continue; end
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test u8str2[i1:i2] == u8str2plain[i1:i2]
+@testset "SubString" begin
+    u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
+    u8str2 = u8str^2
+    len_u8str = length(u8str)
+    slen_u8str = length(u8str)
+    len_u8str2 = length(u8str2)
+    slen_u8str2 = length(u8str2)
+
+    @test len_u8str2 == 2 * len_u8str
+    @test slen_u8str2 == 2 * slen_u8str
+
+    u8str2plain = String(u8str2)
+    @test !isascii(u8str2)
+    @test cmp(u8str2, u8str^3) == -1
+    @test cmp(u8str2, u8str2)  == 0
+    @test cmp(u8str^3, u8str2) == 1
+    @test codeunit(u8str2) == codeunit(u8str2plain)
+
+    @test convert(Union{String, SubString{String}}, u8str2)      === u8str2
+    @test convert(Union{String, SubString{String}}, u8str2plain) === u8str2plain
+
+    for i1 = 1:ncodeunits(u8str2)
+        if !isvalid(u8str2, i1); continue; end
+        for i2 = i1:ncodeunits(u8str2)
+            if !isvalid(u8str2, i2); continue; end
+            @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+            @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+            @test u8str2[i1:i2] == u8str2plain[i1:i2]
+        end
     end
-end
 
-# tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
-# gives the same result as `getindex` (except that it is a view not a copy)
-for idx in 0:1
-    @test SubString("∀", 1, idx) == "∀"[1:idx]
-end
+    # tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
+    # gives the same result as `getindex` (except that it is a view not a copy)
+    for idx in 0:1
+        @test SubString("∀", 1, idx) == "∀"[1:idx]
+    end
 
-# Substring provided with invalid end index throws BoundsError
-@test_throws StringIndexError SubString("∀", 1, 2)
-@test_throws StringIndexError SubString("∀", 1, 3)
-@test_throws BoundsError SubString("∀", 1, 4)
-
-# Substring provided with invalid start index throws BoundsError
-@test SubString("∀∀", 1:1) == "∀"
-@test SubString("∀∀", 1:4) == "∀∀"
-@test SubString("∀∀", 4:4) == "∀"
-@test_throws StringIndexError SubString("∀∀", 1:2)
-@test_throws StringIndexError SubString("∀∀", 1:5)
-@test_throws StringIndexError SubString("∀∀", 2:4)
-@test_throws BoundsError SubString("∀∀", 0:1)
-@test_throws BoundsError SubString("∀∀", 0:4)
-@test_throws BoundsError SubString("∀∀", 1:7)
-@test_throws BoundsError SubString("∀∀", 4:7)
-
-# tests for SubString of more than one multibyte `Char` string
-# we are consistent with `getindex` for `String`
-for idx in [0, 1, 4]
-    @test SubString("∀∀", 1, idx) == "∀∀"[1:idx]
-    @test SubString("∀∀", 4, idx) == "∀∀"[4:idx]
-end
+    @testset "invalid end index" begin
+        # Substring provided with invalid end index throws BoundsError
+        @test_throws StringIndexError SubString("∀", 1, 2)
+        @test_throws StringIndexError SubString("∀", 1, 3)
+        @test_throws BoundsError SubString("∀", 1, 4)
+    end
 
-# index beyond lastindex("∀∀")
-for idx in [2:3; 5:6]
-    @test_throws StringIndexError SubString("∀∀", 1, idx)
-end
-for idx in 7:8
-    @test_throws BoundsError SubString("∀∀", 1, idx)
-end
+    @testset "invalid start index" begin
+        # Substring provided with invalid start index throws BoundsError
+        @test SubString("∀∀", 1:1) == "∀"
+        @test SubString("∀∀", 1:4) == "∀∀"
+        @test SubString("∀∀", 4:4) == "∀"
+        @test_throws StringIndexError SubString("∀∀", 1:2)
+        @test_throws StringIndexError SubString("∀∀", 1:5)
+        @test_throws StringIndexError SubString("∀∀", 2:4)
+        @test_throws BoundsError SubString("∀∀", 0:1)
+        @test_throws BoundsError SubString("∀∀", 0:4)
+        @test_throws BoundsError SubString("∀∀", 1:7)
+        @test_throws BoundsError SubString("∀∀", 4:7)
+    end
 
-let str="tempus fugit"              #length(str)==12
-    ss=SubString(str,1,lastindex(str)) #match source string
-    @test length(ss)==length(str)
+    # tests for SubString of more than one multibyte `Char` string
+    # we are consistent with `getindex` for `String`
+    for idx in [0, 1, 4]
+        @test SubString("∀∀", 1, idx) == "∀∀"[1:idx]
+        @test SubString("∀∀", 4, idx) == "∀∀"[4:idx]
+    end
 
-    ss=SubString(str,1:lastindex(str))
-    @test length(ss)==length(str)
+    @testset "index beyond lastindex(\"∀∀\")" begin
+        for idx in [2:3; 5:6]
+            @test_throws StringIndexError SubString("∀∀", 1, idx)
+        end
+        for idx in 7:8
+            @test_throws BoundsError SubString("∀∀", 1, idx)
+        end
+    end
 
-    ss=SubString(str,1,0)    #empty SubString
-    @test length(ss)==0
+    let str="tempus fugit"              #length(str)==12
+        ss=SubString(str,1,lastindex(str)) #match source string
+        @test length(ss)==length(str)
 
-    ss=SubString(str,1:0)
-    @test length(ss)==0
+        ss=SubString(str,1:lastindex(str))
+        @test length(ss)==length(str)
 
-    @test_throws BoundsError SubString(str, 14, 20)  #start indexing beyond source string length
-    @test_throws BoundsError SubString(str, 10, 16)  #end indexing beyond source string length
+        ss=SubString(str,1,0)    #empty SubString
+        @test length(ss)==0
 
-    @test_throws BoundsError SubString("", 1, 4)  #empty source string
-    @test_throws BoundsError SubString("", 1, 1)  #empty source string, identical start and end index
-    @test_throws BoundsError SubString("", 10, 12)
-    @test SubString("", 12, 10) == ""
-end
+        ss=SubString(str,1:0)
+        @test length(ss)==0
 
-@test SubString("foobar", big(1), big(3)) == "foo"
-
-let str = "aa\u2200\u2222bb"
-    u = SubString(str, 3, 6)
-    @test length(u) == 2
-    b = IOBuffer()
-    write(b, u)
-    @test String(take!(b)) == "\u2200\u2222"
-
-    @test_throws StringIndexError SubString(str, 4, 5)
-    @test_throws BoundsError iterate(u, 0)
-    @test_throws BoundsError iterate(u, 8)
-    @test_throws BoundsError getindex(u, 0)
-    @test_throws BoundsError getindex(u, 7)
-    @test_throws BoundsError getindex(u, 0:1)
-    @test_throws BoundsError getindex(u, 7:7)
-    @test reverseind(u, 1) == 4
-    @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String}
-    @test Base.cconvert(Ptr{Int8}, u) == u
-end
+        @test_throws BoundsError SubString(str, 14, 20)  #start indexing beyond source string length
+        @test_throws BoundsError SubString(str, 10, 16)  #end indexing beyond source string length
 
-let str = "føøbar"
-    @test_throws BoundsError SubString(str, 10, 10)
-    u = SubString(str, 4, 3)
-    @test length(u) == 0
-    b = IOBuffer()
-    write(b, u)
-    @test String(take!(b)) == ""
-end
+        @test_throws BoundsError SubString("", 1, 4)  #empty source string
+        @test_throws BoundsError SubString("", 1, 1)  #empty source string, identical start and end index
+        @test_throws BoundsError SubString("", 10, 12)
+        @test SubString("", 12, 10) == ""
+    end
 
-# search and SubString (issue #5679)
-let str = "Hello, world!"
-    u = SubString(str, 1, 5)
-    @test findlast("World", u) == nothing
-    @test findlast(isequal('z'), u) == nothing
-    @test findlast("ll", u) == 3:4
-end
+    @test SubString("foobar", big(1), big(3)) == "foo"
+
+    let str = "aa\u2200\u2222bb"
+        u = SubString(str, 3, 6)
+        @test length(u) == 2
+        b = IOBuffer()
+        write(b, u)
+        @test String(take!(b)) == "\u2200\u2222"
+
+        @test_throws StringIndexError SubString(str, 4, 5)
+        @test_throws BoundsError iterate(u, 0)
+        @test_throws BoundsError iterate(u, 8)
+        @test_throws BoundsError getindex(u, 0)
+        @test_throws BoundsError getindex(u, 7)
+        @test_throws BoundsError getindex(u, 0:1)
+        @test_throws BoundsError getindex(u, 7:7)
+        @test reverseind(u, 1) == 4
+        @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String}
+        @test Base.cconvert(Ptr{Int8}, u) == u
+    end
 
-# SubString created from SubString
-let str = "Hello, world!"
-    u = SubString(str, 2, 5)
-    for idx in 1:4
-        @test SubString(u, 2, idx) == u[2:idx]
-        @test SubString(u, 2:idx) == u[2:idx]
+    let str = "føøbar"
+        @test_throws BoundsError SubString(str, 10, 10)
+        u = SubString(str, 4, 3)
+        @test length(u) == 0
+        b = IOBuffer()
+        write(b, u)
+        @test String(take!(b)) == ""
     end
-    @test_throws BoundsError SubString(u, 1, 10)
-    @test_throws BoundsError SubString(u, 1:10)
-    @test_throws BoundsError SubString(u, 20:30)
-    @test SubString(u, 20:15) == ""
-    @test_throws BoundsError SubString(u, -1:10)
-    @test SubString(u, -1, -10) == ""
-    @test SubString(SubString("123", 1, 2), -10, -20) == ""
-end
 
-# sizeof
-@test sizeof(SubString("abc\u2222def",4,4)) == 3
-
-# issue #3710
-@test prevind(SubString("{var}",2,4),4) == 3
-
-# issue #4183
-@test split(SubString("x", 2, 0), "y") == [""]
-
-# issue #6772
-@test parse(Float64, SubString("10",1,1)) === 1.0
-@test parse(Float64, SubString("1 0",1,1)) === 1.0
-@test parse(Float32, SubString("10",1,1)) === 1.0f0
-
-# issue #5870
-@test !occursin(Regex("aa"), SubString("",1,0))
-@test occursin(Regex(""), SubString("",1,0))
-
-# isvalid, length, prevind, nextind for SubString{String}
-let s = "lorem ipsum", sdict = Dict(
-    SubString(s, 1, 11)  => "lorem ipsum",
-    SubString(s, 1, 6)   => "lorem ",
-    SubString(s, 1, 0)   => "",
-    SubString(s, 2, 4)   => "ore",
-    SubString(s, 2, 11)  => "orem ipsum",
-    SubString(s, 15, 14) => "",
-)
-    for (ss, s) in sdict
-        @test ncodeunits(ss) == ncodeunits(s)
-        for i in -2:13
-            @test isvalid(ss, i) == isvalid(s, i)
-        end
-        for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss)
-            @test length(ss, i, j) == length(s, i, j)
+    @testset "search and SubString (issue #5679)" begin
+        str = "Hello, world!"
+        u = SubString(str, 1, 5)
+        @test findlast("World", u) === nothing
+        @test findlast(isequal('z'), u) === nothing
+        @test findlast("ll", u) == 3:4
+    end
+
+    @testset "SubString created from SubString" begin
+        str = "Hello, world!"
+        u = SubString(str, 2, 5)
+        for idx in 1:4
+            @test SubString(u, 2, idx) == u[2:idx]
+            @test SubString(u, 2:idx) == u[2:idx]
         end
+        @test_throws BoundsError SubString(u, 1, 10)
+        @test_throws BoundsError SubString(u, 1:10)
+        @test_throws BoundsError SubString(u, 20:30)
+        @test SubString(u, 20:15) == ""
+        @test_throws BoundsError SubString(u, -1:10)
+        @test SubString(u, -1, -10) == ""
+        @test SubString(SubString("123", 1, 2), -10, -20) == ""
+    end
+
+    # sizeof
+    @test sizeof(SubString("abc\u2222def",4,4)) == 3
+
+    # issue #3710
+    @test prevind(SubString("{var}",2,4),4) == 3
+
+    # issue #4183
+    @test split(SubString("x", 2, 0), "y") == [""]
+
+    @testset "issue #6772" begin
+        @test parse(Float64, SubString("10",1,1)) === 1.0
+        @test parse(Float64, SubString("1 0",1,1)) === 1.0
+        @test parse(Float32, SubString("10",1,1)) === 1.0f0
     end
-    for (ss, s) in sdict
-        @test length(ss) == length(s)
-        for i in 0:ncodeunits(ss), j = 0:length(ss)+1
-            @test prevind(ss, i+1, j) == prevind(s, i+1, j)
-            @test nextind(ss, i, j) == nextind(s, i, j)
+
+    @testset "issue #5870" begin
+        @test !occursin(Regex("aa"), SubString("",1,0))
+        @test occursin(Regex(""), SubString("",1,0))
+    end
+    @testset" isvalid, length, prevind, nextind for SubString{String}" begin
+        s = "lorem ipsum"
+        sdict = Dict(
+            SubString(s, 1, 11)  => "lorem ipsum",
+            SubString(s, 1, 6)   => "lorem ",
+            SubString(s, 1, 0)   => "",
+            SubString(s, 2, 4)   => "ore",
+            SubString(s, 2, 11)  => "orem ipsum",
+            SubString(s, 15, 14) => "",
+        )
+        for (ss, s) in sdict
+            @test ncodeunits(ss) == ncodeunits(s)
+            for i in -2:13
+                @test isvalid(ss, i) == isvalid(s, i)
+            end
+            for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss)
+                @test length(ss, i, j) == length(s, i, j)
+            end
+        end
+        for (ss, s) in sdict
+            @test length(ss) == length(s)
+            for i in 0:ncodeunits(ss), j = 0:length(ss)+1
+                @test prevind(ss, i+1, j) == prevind(s, i+1, j)
+                @test nextind(ss, i, j) == nextind(s, i, j)
+            end
+            @test_throws BoundsError prevind(s, 0)
+            @test_throws BoundsError prevind(ss, 0)
+            @test_throws BoundsError nextind(s, ncodeunits(ss)+1)
+            @test_throws BoundsError nextind(ss, ncodeunits(ss)+1)
         end
-        @test_throws BoundsError prevind(s, 0)
-        @test_throws BoundsError prevind(ss, 0)
-        @test_throws BoundsError nextind(s, ncodeunits(ss)+1)
-        @test_throws BoundsError nextind(ss, ncodeunits(ss)+1)
     end
-end
 
-# proper nextind/prevind/thisind for SubString{String}
-let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀",
+    rng = MersenneTwister(1)
+    strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀",
                                       String(rand(rng, UInt8, 50))]
-    for s in strs
+    @testset "proper nextind/prevind/thisind for SubString{String}: $(repr(s))" for s in strs
         a = 0
         while a <= ncodeunits(s)
             a = nextind(s, a)
@@ -223,111 +238,115 @@ let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"
             end
         end
     end
-end
 
-# for isvalid(SubString{String})
-let s = "Σx + βz - 2"
-    for i in -1:ncodeunits(s)+2
-        if checkbounds(Bool, s, i)
-            if isvalid(s, i)
-                ss = SubString(s, 1, i)
-                for j = 1:ncodeunits(ss)
-                    @test isvalid(ss, j) == isvalid(s, j)
+    # for isvalid(SubString{String})
+    let s = "Σx + βz - 2"
+        for i in -1:ncodeunits(s)+2
+            if checkbounds(Bool, s, i)
+                if isvalid(s, i)
+                    ss = SubString(s, 1, i)
+                    for j = 1:ncodeunits(ss)
+                        @test isvalid(ss, j) == isvalid(s, j)
+                    end
+                else
+                    @test_throws StringIndexError SubString(s, 1, i)
                 end
+            elseif i > 0
+                @test_throws BoundsError SubString(s, 1, i)
             else
-                @test_throws StringIndexError SubString(s, 1, i)
+                @test SubString(s, 1, i) == ""
             end
-        elseif i > 0
-            @test_throws BoundsError SubString(s, 1, i)
-        else
-            @test SubString(s, 1, i) == ""
         end
     end
-end
 
-let ss = SubString("hello", 1, 5)
-    @test length(ss, 1, 0) == 0
-    @test_throws BoundsError length(ss, 1, -1)
-    @test_throws BoundsError length(ss, 1, 6)
-    @test_throws BoundsError length(ss, 1, 10)
-    @test_throws BoundsError prevind(ss, 0, 1)
-    @test prevind(ss, 1, 1) == 0
-    @test prevind(ss, 6, 1) == 5
-    @test_throws BoundsError prevind(ss, 7, 1)
-    @test_throws BoundsError nextind(ss, -1, 1)
-    @test nextind(ss, 0, 1) == 1
-    @test nextind(ss, 5, 1) == 6
-    @test_throws BoundsError nextind(ss, 6, 1)
-end
+    let ss = SubString("hello", 1, 5)
+        @test length(ss, 1, 0) == 0
+        @test_throws BoundsError length(ss, 1, -1)
+        @test_throws BoundsError length(ss, 1, 6)
+        @test_throws BoundsError length(ss, 1, 10)
+        @test_throws BoundsError prevind(ss, 0, 1)
+        @test prevind(ss, 1, 1) == 0
+        @test prevind(ss, 6, 1) == 5
+        @test_throws BoundsError prevind(ss, 7, 1)
+        @test_throws BoundsError nextind(ss, -1, 1)
+        @test nextind(ss, 0, 1) == 1
+        @test nextind(ss, 5, 1) == 6
+        @test_throws BoundsError nextind(ss, 6, 1)
+    end
 
-# length(SubString{String}) performance specialization
-let s = "|η(α)-ϕ(κ)| < ε"
-    @test length(SubString(s, 1, 0)) == length(s[1:0])
-    @test length(SubString(s, 4, 4)) == length(s[4:4])
-    @test length(SubString(s, 1, 7)) == length(s[1:7])
-    @test length(SubString(s, 4, 11)) == length(s[4:11])
-end
+    # length(SubString{String}) performance specialization
+    let s = "|η(α)-ϕ(κ)| < ε"
+        @test length(SubString(s, 1, 0)) == length(s[1:0])
+        @test length(SubString(s, 4, 4)) == length(s[4:4])
+        @test length(SubString(s, 1, 7)) == length(s[1:7])
+        @test length(SubString(s, 4, 11)) == length(s[4:11])
+    end
 
-@testset "reverseind" for T in (String, SubString, GenericString)
-    for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
-        for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
-            for c in ('X', 'δ', '\U0001d6a5')
-                s = convert(T, string(prefix, c, suffix))
-                r = reverse(s)
-                ri = findfirst(isequal(c), r)
-                @test c == s[reverseind(s, ri)] == r[ri]
-                s = convert(T, string(prefix, prefix, c, suffix, suffix))
-                pre = convert(T, prefix)
-                sb = SubString(s, nextind(pre, lastindex(pre)),
-                               lastindex(convert(T, string(prefix, prefix, c, suffix))))
-                r = reverse(sb)
-                ri = findfirst(isequal(c), r)
-                @test c == sb[reverseind(sb, ri)] == r[ri]
+    @testset "reverseind" for T in (String, SubString, GenericString)
+        for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
+            for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
+                for c in ('X', 'δ', '\U0001d6a5')
+                    s = convert(T, string(prefix, c, suffix))
+                    r = reverse(s)
+                    ri = findfirst(isequal(c), r)
+                    @test c == s[reverseind(s, ri)] == r[ri]
+                    s = convert(T, string(prefix, prefix, c, suffix, suffix))
+                    pre = convert(T, prefix)
+                    sb = SubString(s, nextind(pre, lastindex(pre)),
+                                   lastindex(convert(T, string(prefix, prefix, c, suffix))))
+                    r = reverse(sb)
+                    ri = findfirst(isequal(c), r)
+                    @test c == sb[reverseind(sb, ri)] == r[ri]
+                end
             end
         end
     end
-end
 
-@testset "reverseind of empty strings" begin
-    for s in ("",
-              SubString("", 1, 0),
-              SubString("ab", 1, 0),
-              SubString("ab", 2, 1),
-              SubString("ab", 3, 2),
-              GenericString(""))
-        @test reverseind(s, 0) == 1
-        @test reverseind(s, 1) == 0
+    @testset "reverseind of empty strings" begin
+        for s in ("",
+                  SubString("", 1, 0),
+                  SubString("ab", 1, 0),
+                  SubString("ab", 2, 1),
+                  SubString("ab", 3, 2),
+                  GenericString(""))
+            @test reverseind(s, 0) == 1
+            @test reverseind(s, 1) == 0
+        end
     end
 end
 
-## Cstring tests ##
-
-# issue #13974: comparison against pointers
-let
-    str = String("foobar")
-    ptr = pointer(str)
-    cstring = Cstring(ptr)
-    @test ptr == cstring
-    @test cstring == ptr
-
-    # convenient NULL string creation from Ptr{Cvoid}
-    nullstr = Cstring(C_NULL)
-
-    # Comparisons against NULL strings
-    @test ptr != nullstr
-    @test nullstr != ptr
+@testset "Cstring" begin
+    @testset "issue #13974: comparison against pointers" begin
+        str = String("foobar")
+        ptr = pointer(str)
+        cstring = Cstring(ptr)
+        @test ptr == cstring
+        @test cstring == ptr
+
+        # convenient NULL string creation from Ptr{Cvoid}
+        nullstr = Cstring(C_NULL)
+
+        # Comparisons against NULL strings
+        @test ptr != nullstr
+        @test nullstr != ptr
+
+        # Short-hand comparison against C_NULL
+        @test nullstr == C_NULL
+        @test C_NULL == nullstr
+        @test cstring != C_NULL
+        @test C_NULL != cstring
+    end
 
-    # Short-hand comparison against C_NULL
-    @test nullstr == C_NULL
-    @test C_NULL == nullstr
-    @test cstring != C_NULL
-    @test C_NULL != cstring
+    @testset "issue #31381: eltype(Cstring) != Cchar" begin
+        s = Cstring(C_NULL)
+        @test eltype(Cstring) == Cchar
+        @test eltype(s) == Cchar
+        @test pointer(s) isa Ptr{Cchar}
+    end
 end
 
-# issue #31381: eltype(Cstring) != Cchar
-let
-    s = Cstring(C_NULL)
-    @test eltype(Cstring) == Cchar
-    @test eltype(s) == Cchar
-    @test pointer(s) isa Ptr{Cchar}
+@testset "Codeunits" begin
+    s = "I'm a string!"
+    @test codeunit(s) == UInt8
+    @test codeunit(s, Int8(1)) == codeunit(s, 1)
 end
diff --git a/test/strings/util.jl b/test/strings/util.jl
index 8b58c2f36d8c4..95b1b917aefd1 100644
--- a/test/strings/util.jl
+++ b/test/strings/util.jl
@@ -2,6 +2,22 @@
 
 SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
 
+@testset "textwidth" begin
+    for (c, w) in [('x', 1), ('α', 1), ('🍕', 2), ('\0', 0), ('\u0302', 0), ('\xc0', 1)]
+        @test textwidth(c) == w
+        @test textwidth(c^3) == w*3
+        @test w == @invoke textwidth(c::AbstractChar)
+    end
+    @test textwidth('\xc0\xa0') == 1 # overlong
+    @test textwidth('\xf0\x80\x80') == 1 # malformed
+    for i in 0x00:0x7f # test all ASCII chars (which have fast path)
+        w = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), i))
+        c = Char(i)
+        @test textwidth(c) == w
+        @test w == @invoke textwidth(c::AbstractChar)
+    end
+end
+
 @testset "padding (lpad and rpad)" begin
     @test lpad("foo", 2) == "foo"
     @test rpad("foo", 2) == "foo"
@@ -51,6 +67,59 @@ SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
     @test rpad("⟨k|H₁|k̃⟩", 12) |> textwidth == 12
     @test lpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
     @test rpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
+    for pad in (rpad, lpad), p in ('\0', "\0", "\0\0", "\u302")
+        if ncodeunits(p) == 1
+            @test_throws r".*has zero textwidth.*maybe you want.*bytes.*" pad("foo", 10, p)
+        else
+            @test_throws r".*has zero textwidth$" pad("foo", 10, p)
+        end
+    end
+end
+
+@testset "string truncation (ltruncate, rtruncate, ctruncate)" begin
+    @test ltruncate("foo", 4) == "foo"
+    @test ltruncate("foo", 3) == "foo"
+    @test ltruncate("foo", 2) == "…o"
+    @test ltruncate("🍕🍕 I love 🍕", 10) == "…I love 🍕" # handle wide emojis
+    @test ltruncate("🍕🍕 I love 🍕", 10, "[…]") == "[…]love 🍕"
+    # when the replacement string is longer than the trunc
+    # trust that the user wants the replacement string rather than erroring
+    @test ltruncate("abc", 2, "xxxxxx") == "xxxxxx"
+
+    @inferred ltruncate("xxx", 4)
+    @inferred ltruncate("xxx", 2)
+    @inferred ltruncate(@view("xxxxxxx"[1:4]), 4)
+    @inferred ltruncate(@view("xxxxxxx"[1:4]), 2)
+
+    @test rtruncate("foo", 4) == "foo"
+    @test rtruncate("foo", 3) == "foo"
+    @test rtruncate("foo", 2) == "f…"
+    @test rtruncate("🍕🍕 I love 🍕", 10) == "🍕🍕 I lo…"
+    @test rtruncate("🍕🍕 I love 🍕", 10, "[…]") == "🍕🍕 I […]"
+    @test rtruncate("abc", 2, "xxxxxx") == "xxxxxx"
+
+    @inferred rtruncate("xxx", 4)
+    @inferred rtruncate("xxx", 2)
+    @inferred rtruncate(@view("xxxxxxx"[1:4]), 4)
+    @inferred rtruncate(@view("xxxxxxx"[1:4]), 2)
+
+    @test ctruncate("foo", 4) == "foo"
+    @test ctruncate("foo", 3) == "foo"
+    @test ctruncate("foo", 2) == "f…"
+    @test ctruncate("foo", 2; prefer_left=true) == "f…"
+    @test ctruncate("foo", 2; prefer_left=false) == "…o"
+    @test ctruncate("foobar", 6) == "foobar"
+    @test ctruncate("foobar", 5) == "fo…ar"
+    @test ctruncate("foobar", 4) == "fo…r"
+    @test ctruncate("🍕🍕 I love 🍕", 10) == "🍕🍕 …e 🍕"
+    @test ctruncate("🍕🍕 I love 🍕", 10, "[…]") == "🍕🍕[…] 🍕"
+    @test ctruncate("abc", 2, "xxxxxx") == "xxxxxx"
+    @test ctruncate("🍕🍕🍕🍕🍕🍕xxxxxxxxxxx", 9) == "🍕🍕…xxxx"
+
+    @inferred ctruncate("xxxxx", 5)
+    @inferred ctruncate("xxxxx", 3)
+    @inferred ctruncate(@view("xxxxxxx"[1:5]), 5)
+    @inferred ctruncate(@view("xxxxxxx"[1:5]), 3)
 end
 
 # string manipulation
@@ -89,6 +158,10 @@ end
     @test rstrip(isnumeric, "abc0123") == "abc"
     @test lstrip("ello", ['e','o']) == "llo"
     @test rstrip("ello", ['e','o']) == "ell"
+
+    @test_throws ArgumentError strip("", "")
+    @test_throws ArgumentError lstrip("", "")
+    @test_throws ArgumentError rstrip("", "")
 end
 
 @testset "partition" begin
@@ -208,6 +281,28 @@ end
     @test split("α β γ", "β") == rsplit("α β γ", "β") == ["α "," γ"]
 end
 
+@testset "eachrsplit" begin
+    @test collect(eachrsplit("", 'a')) == [""]
+    @test collect(eachrsplit("", isspace; limit=3)) == [""]
+    @test collect(eachrsplit("b c  d"; limit=2)) == ["d", "b c "]
+    @test collect(eachrsplit("a.b.c", '.'; limit=1)) == ["a.b.c"]
+    @test collect(eachrsplit("a..b..c", '.')) == ["c", "", "b", "", "a"]
+    @test collect(eachrsplit("ax  b  c")) == ["c", "b", "ax"]
+    @test collect(eachrsplit(" a 12 4 v ", isnumeric)) == [" v ", " ", "", " a "]
+    @test collect(eachrsplit("ba", 'a')) == ["", "b"]
+    @test collect(eachrsplit("   ")) == []
+    @test collect(eachrsplit("aaaa", 'a'; keepempty=false)) == []
+    @test collect(eachrsplit("aaaa", 'a'; limit=2)) == ["", "aaa"]
+    @test collect(eachrsplit("abcdef", ['b', 'e'])) == ["f", "cd", "a"]
+    @test collect(eachrsplit("abc", isletter)) == ["", "", "", ""]
+
+    # This behaviour is quite surprising, but is consistent with split
+    # See issue 45916
+    @test collect(eachrsplit("a  b"; limit=2)) == ["b", "a "] # only one trailing space
+    @test collect(eachrsplit("a "; limit=1)) == ["a "]
+    @test collect(eachrsplit("  a  b  c  d"; limit=3)) == ["d", "c", "  a  b "]
+end
+
 @testset "replace" begin
     @test replace("\u2202", '*' => '\0') == "\u2202"
 
@@ -614,6 +709,11 @@ end
 
         @test isa(chopprefix(S("foo"), "fo"), SubString)
         @test isa(chopsuffix(S("foo"), "oo"), SubString)
+
+        @test chopprefix(S(""), 'z') == chopsuffix(S(""), 'z') == ""
+        @test chopprefix(S("吃齋"), '🍖') == chopsuffix(S("吃齋"), '🍖') == "吃齋"
+        @test chopprefix(S("äwesome"), 'ä') == "wesome"
+        @test chopsuffix(S("äwesome"), 'e') == "äwesom"
     end
 end
 
diff --git a/test/subarray.jl b/test/subarray.jl
index e22c1394cbfc2..321f8a6cc27e2 100644
--- a/test/subarray.jl
+++ b/test/subarray.jl
@@ -275,9 +275,6 @@ end
 # with the exception of Int-slicing
 oindex = (:, 6, 3:7, reshape([12]), [8,4,6,12,5,7], [3:7 1:5 2:6 4:8 5:9], reshape(2:11, 2, 5))
 
-_ndims(::AbstractArray{T,N}) where {T,N} = N
-_ndims(x) = 1
-
 if testfull
     let B = copy(reshape(1:13^3, 13, 13, 13))
         @testset "full tests: ($o1,$o2,$o3)" for o3 in oindex, o2 in oindex, o1 in oindex
@@ -342,6 +339,7 @@ end
     A = copy(reshape(1:120, 3, 5, 8))
     sA = view(A, 2:2, 1:5, :)
     @test @inferred(strides(sA)) == (1, 3, 15)
+    @test IndexStyle(sA) == IndexStyle(typeof(sA)) == IndexCartesian()
     @test parent(sA) == A
     @test parentindices(sA) == (2:2, 1:5, Base.Slice(1:8))
     @test size(sA) == (1, 5, 8)
@@ -383,6 +381,8 @@ end
     sA = view(A, 1:2, 3, [1 3; 4 2])
     @test ndims(sA) == 3
     @test axes(sA) === (Base.OneTo(2), Base.OneTo(2), Base.OneTo(2))
+    @test axes(similar(typeof(A),axes(A))) == axes(A)
+    @test eltype(similar(typeof(A),axes(A))) == eltype(A)
 end
 
 @testset "logical indexing #4763" begin
@@ -468,6 +468,113 @@ end
     @test sA[[1 2 4 4; 6 1 1 4]] == [34 35 38 38; 50 34 34 38]
 end
 
+@testset "fast linear indexing with AbstractUnitRange or Colon indices" begin
+    @testset "getindex" begin
+        @testset "1D" begin
+            for a1 in Any[1:5, [1:5;]]
+                b1 = @view a1[:]; # FastContiguousSubArray
+                c1 = @view a1[eachindex(a1)]; # FastContiguousSubArray
+                d1 = @view a1[begin:1:end]; # FastSubArray
+
+                ax1 = eachindex(a1);
+                @test b1[ax1] == c1[ax1] == d1[ax1] == a1[ax1]
+                @test b1[:] == c1[:] == d1[:] == a1[:]
+
+                # some arbitrary indices
+                inds1 = 2:4
+                c1 = @view a1[inds1]
+                @test c1[axes(c1,1)] == c1[:] == a1[inds1]
+
+                inds12 = Base.IdentityUnitRange(Base.OneTo(4))
+                c1 = @view a1[inds12]
+                @test c1[axes(c1,1)] == c1[:] == a1[inds12]
+
+                inds2 = 3:2:5
+                d1 = @view a1[inds2]
+                @test d1[axes(d1,1)] == d1[:] == a1[inds2]
+            end
+        end
+
+        @testset "2D" begin
+            a2_ = reshape(1:25, 5, 5)
+            for a2 in Any[a2_, collect(a2_)]
+                b2 = @view a2[:, :]; # 2D FastContiguousSubArray
+                b22 = @view a2[:]; # 1D FastContiguousSubArray
+                c2 = @view a2[eachindex(a2)]; # 1D FastContiguousSubArray
+                d2 = @view a2[begin:1:end]; # 1D FastSubArray
+
+                ax2 = eachindex(a2);
+                @test b2[ax2] == b22[ax2] == c2[ax2] == d2[ax2] == a2[ax2]
+                @test b2[:] == b22[:] == c2[:] == d2[:] == a2[:]
+
+                # some arbitrary indices
+                inds1 = 2:4
+                c2 = @view a2[inds1]
+                @test c2[axes(c2,1)] == c2[:] == a2[inds1]
+
+                inds12 = Base.IdentityUnitRange(Base.OneTo(4))
+                c2 = @view a2[inds12]
+                @test c2[axes(c2,1)] == c2[:] == a2[inds12]
+
+                inds2 = 2:2:4
+                d2 = @view a2[inds2];
+                @test d2[axes(d2,1)] == d2[:] == a2[inds2]
+            end
+        end
+    end
+    @testset "setindex!" begin
+        @testset "1D" begin
+            a1 = rand(10);
+            a12 = copy(a1);
+            b1 = @view a1[:]; # 1D FastContiguousSubArray
+            c1 = @view a1[eachindex(a1)]; # 1D FastContiguousSubArray
+            d1 = @view a1[begin:1:end]; # 1D FastSubArray
+
+            ax1 = eachindex(a1);
+            @test (b1[ax1] = a12; b1) == (c1[ax1] = a12; c1) == (d1[ax1] = a12; d1) == (a1[ax1] = a12; a1)
+            @test (b1[:] = a12; b1) == (c1[:] = a12; c1) == (d1[:] = a12; d1) == (a1[:] = a12; a1)
+
+            # some arbitrary indices
+            ind1 = 2:4
+            c1 = a12[ind1]
+            @test (c1[axes(c1,1)] = a12[ind1]; c1) == (c1[:] = a12[ind1]; c1) == a12[ind1]
+
+            inds1 = Base.IdentityUnitRange(Base.OneTo(4))
+            c1 = @view a1[inds1]
+            @test (c1[eachindex(c1)] = @view(a12[inds1]); c1) == @view(a12[inds1])
+
+            ind2 = 2:2:8
+            d1 = a12[ind2]
+            @test (d1[axes(d1,1)] = a12[ind2]; d1) == (d1[:] = a12[ind2]; d1) == a12[ind2]
+        end
+
+        @testset "2D" begin
+            a2 = rand(10, 10);
+            a22 = copy(a2);
+            a2v = vec(a22);
+            b2 = @view a2[:, :]; # 2D FastContiguousSubArray
+            c2 = @view a2[eachindex(a2)]; # 1D FastContiguousSubArray
+            d2 = @view a2[begin:1:end]; # 1D FastSubArray
+
+            @test (b2[eachindex(b2)] = a2v; vec(b2)) == (c2[eachindex(c2)] = a2v; c2) == a2v
+            @test (d2[eachindex(d2)] = a2v; d2) == a2v
+
+            # some arbitrary indices
+            inds1 = 3:9
+            c2 = @view a2[inds1]
+            @test (c2[eachindex(c2)] = @view(a22[inds1]); c2) == @view(a22[inds1])
+
+            inds1 = Base.IdentityUnitRange(Base.OneTo(4))
+            c2 = @view a2[inds1]
+            @test (c2[eachindex(c2)] = @view(a22[inds1]); c2) == @view(a22[inds1])
+
+            inds2 = 3:3:9
+            d2 = @view a2[inds2]
+            @test (d2[eachindex(d2)] = @view(a22[inds2]); d2) == @view(a22[inds2])
+        end
+    end
+end
+
 @testset "issue #11871" begin
     a = fill(1., (2,2))
     b = view(a, 1:2, 1:2)
@@ -533,6 +640,44 @@ end
         @test foo == [X, X]
     end
 
+    # Test as an assignment's left hand side
+    let x = [1,2,3,4]
+        @test Meta.@lower(@view(x[1]) = 1).head == :error
+        @test Meta.@lower(@view(x[1]) += 1).head == :error
+        @test Meta.@lower(@view(x[end]) = 1).head == :error
+        @test Meta.@lower(@view(x[end]) += 1).head == :error
+        @test Meta.@lower(@view(f(x)[end]) = 1).head == :error
+        @test Meta.@lower(@view(f(x)[end]) += 1).head == :error
+        @test (@view(x[1]) .+= 1) == fill(2)
+        @test x == [2,2,3,4]
+        @test (@view(reshape(x,2,2)[1,1]) .+= 10) == fill(12)
+        @test x == [12,2,3,4]
+        @test (@view(x[end]) .+= 1) == fill(5)
+        @test x == [12,2,3,5]
+        @test (@view(reshape(x,2,2)[end]) .+= 10) == fill(15)
+        @test x == [12,2,3,15]
+        @test (@view(reshape(x,2,2)[[begin],[begin,end]])::AbstractMatrix{Int} .+= [2]) == [14 5]
+        @test x == [14,2,5,15]
+
+        x = [1,2,3,4]
+        @test Meta.@lower(@views(x[[1]]) = 1).head == :error
+        @test Meta.@lower(@views(x[[1]]) += 1).head == :error
+        @test Meta.@lower(@views(x[[end]]) = 1).head == :error
+        @test Meta.@lower(@views(x[[end]]) += 1).head == :error
+        @test Meta.@lower(@views(f(x)[end]) = 1).head == :error
+        @test Meta.@lower(@views(f(x)[end]) += 1).head == :error
+        @test (@views(x[[1]]) .+= 1) == [2]
+        @test x == [2,2,3,4]
+        @test (@views(reshape(x,2,2)[[1],1]) .+= 10) == [12]
+        @test x == [12,2,3,4]
+        @test (@views(x[[end]]) .+= 1) == [5]
+        @test x == [12,2,3,5]
+        @test (@views(reshape(x,2,2)[[end]]) .+= 10) == [15]
+        @test x == [12,2,3,15]
+        @test (@views(reshape(x,2,2)[[begin],[begin,end]])::AbstractMatrix{Int} .+= [2]) == [14 5]
+        @test x == [14,2,5,15]
+    end
+
     # test @views macro
     @views let f!(x) = x[begin:end-1] .+= x[begin+1:end].^2
         x = [1,2,3,4]
@@ -559,6 +704,12 @@ end
         @test x == [5,8,12,9] && i == [4,3]
         @. x[3:end] = 0       # make sure @. works with end expressions in @views
         @test x == [5,8,0,0]
+        x[begin:end] .+= 1
+        @test x == [6,9,1,1]
+        x[[begin,2,end]] .-= [1,2,3]
+        @test x == [5,7,1,-2]
+        @. x[[begin,2,end]] .+= [1,2,3]
+        @test x == [6,9,1,1]
     end
     @views @test isa(X[1:3], SubArray)
     @test X[begin:end] == @views X[begin:end]
@@ -663,8 +814,40 @@ end
 @testset "unaliascopy trimming; Issue #26263" begin
     A = rand(5,5,5,5)
     V = view(A, 2:5, :, 2:5, 1:2:5)
-    @test @inferred(Base.unaliascopy(V)) == V == A[2:5, :, 2:5, 1:2:5]
-    @test @inferred(sum(Base.unaliascopy(V))) ≈ sum(V) ≈ sum(A[2:5, :, 2:5, 1:2:5])
+    V′ = @inferred(Base.unaliascopy(V))
+    @test size(V′.parent) == size(V)
+    @test V′::typeof(V) == V == A[2:5, :, 2:5, 1:2:5]
+    @test @inferred(sum(V′)) ≈ sum(V) ≈ sum(A[2:5, :, 2:5, 1:2:5])
+    V = view(A, Base.IdentityUnitRange(2:4), :, Base.StepRangeLen(1,1,3), 1:2:5)
+    V′ = @inferred(Base.unaliascopy(V))
+    @test size(V.parent) != size(V′.parent)
+    @test V′ == V && V′ isa typeof(V)
+    i1 = collect(CartesianIndices((2:5)))
+    i2 = [CartesianIndex(), CartesianIndex()]
+    i3 = collect(CartesianIndices((2:5, 1:2:5)))
+    V = view(A, i1, 1:5, i2, i3)
+    @test @inferred(Base.unaliascopy(V))::typeof(V) == V == A[i1, 1:5, i2, i3]
+    V = view(A, i1, 1:5, i3, i2)
+    @test @inferred(Base.unaliascopy(V))::typeof(V) == V == A[i1, 1:5, i3, i2]
+
+    @testset "custom ranges" begin
+        struct MyStepRange{T} <: OrdinalRange{T,T}
+            r::StepRange{T,T}
+        end
+
+        for f in (:first, :last, :step, :length, :size)
+            @eval Base.$f(r::MyStepRange) = $f(r.r)
+        end
+        Base.getindex(r::MyStepRange, i::Int) = r.r[i]
+
+        a = rand(6)
+        V = view(a, MyStepRange(2:2:4))
+        @test @inferred(Base.unaliascopy(V))::typeof(V) == V
+
+        # empty range
+        V = view(a, MyStepRange(2:2:1))
+        @test @inferred(Base.unaliascopy(V))::typeof(V) == V
+    end
 end
 
 @testset "issue #27632" begin
@@ -762,9 +945,9 @@ end
 
 @testset "issue #41221: view(::Vector, :, 1)" begin
     v = randn(3)
-    @test view(v,:,1) == v
-    @test parent(view(v,:,1)) === v
-    @test parent(view(v,2:3,1,1)) === v
+    @test @inferred(view(v,:,1)) == v
+    @test parent(@inferred(view(v,:,1))) === v
+    @test parent(@inferred(view(v,2:3,1,1))) === v
     @test_throws BoundsError view(v,:,2)
     @test_throws BoundsError view(v,:,1,2)
 
@@ -772,3 +955,203 @@ end
     @test view(m, 1:2, 3, 1, 1) == m[1:2, 3]
     @test parent(view(m, 1:2, 3, 1, 1)) === m
 end
+
+@testset "issue #53209: avoid invalid elimination of singleton indices" begin
+    A = randn(4,5)
+    @test A[CartesianIndices(()), :, 3] == @inferred(view(A, CartesianIndices(()), :, 3))
+    @test parent(@inferred(view(A, :, 3, 1, CartesianIndices(()), 1))) === A
+    @test_throws BoundsError view(A, :, 3, 2, CartesianIndices(()), 1)
+end
+
+@testset "replace_in_print_matrix" begin
+    struct MyIdentity <: AbstractMatrix{Bool}
+        n :: Int
+    end
+    Base.size(M::MyIdentity) = (M.n, M.n)
+    function Base.getindex(M::MyIdentity, i::Int, j::Int)
+        checkbounds(M, i, j)
+        i == j
+    end
+    function Base.replace_in_print_matrix(M::MyIdentity, i::Integer, j::Integer, s::AbstractString)
+        i == j ? s : Base.replace_with_centered_mark(s)
+    end
+    V = view(MyIdentity(3), 1:2, 1:3)
+    @test sprint(show, "text/plain", V) == "$(summary(V)):\n 1  ⋅  ⋅\n ⋅  1  ⋅"
+
+    struct OneElVec <: AbstractVector{Bool}
+        n :: Int
+        ind :: Int
+    end
+    Base.size(M::OneElVec) = (M.n,)
+    function Base.getindex(M::OneElVec, i::Int)
+        checkbounds(M, i)
+        i == M.ind
+    end
+    function Base.replace_in_print_matrix(M::OneElVec, i::Integer, j::Integer, s::AbstractString)
+        i == M.ind ? s : Base.replace_with_centered_mark(s)
+    end
+    V = view(OneElVec(6, 2), 1:5)
+    @test sprint(show, "text/plain", V) == "$(summary(V)):\n ⋅\n 1\n ⋅\n ⋅\n ⋅"
+
+    V = view(1:2, [CartesianIndex(2)])
+    @test sprint(show, "text/plain", V) == "$(summary(V)):\n 2"
+end
+
+@testset "Base.first_index for offset indices" begin
+    a = Vector(1:10)
+    b = view(a, Base.IdentityUnitRange(4:7))
+    @test first(b) == a[Base.first_index(b)]
+end
+
+@testset "StepRangeLen of CartesianIndex-es" begin
+    v = view(1:2, StepRangeLen(CartesianIndex(1,1), CartesianIndex(1,1), 0))
+    @test isempty(v)
+    r = StepRangeLen(CartesianIndex(1), CartesianIndex(1), 1)
+    v = view(1:2, r)
+    @test v == view(1:2, collect(r))
+end
+
+# https://github.com/JuliaLang/julia/pull/53064
+# `@view(A[idx]) = xxx` should raise syntax error always
+@test try
+    Core.eval(@__MODULE__, :(@view(A[idx]) = 2))
+    false
+catch err
+    err isa ErrorException && startswith(err.msg, "syntax:")
+end
+module Issue53064
+import Base: view
+end
+@test try
+    Core.eval(Issue53064, :(@view(A[idx]) = 2))
+    false
+catch err
+    err isa ErrorException && startswith(err.msg, "syntax:")
+end
+
+@testset "isassigned" begin
+    a = Vector{BigFloat}(undef, 5)
+    a[2] = 0
+    for v in (view(a, 2:3), # FastContiguousSubArray
+               view(a, 2:2:4), # FastSubArray
+               view(a, [2:2:4;]), # SlowSubArray
+            )
+        @test !isassigned(v, 0) # out-of-bounds
+        @test isassigned(v, 1) # inbounds and assigned
+        @test !isassigned(v, 2) # inbounds but not assigned
+        @test !isassigned(v, 4) # out-of-bounds
+    end
+
+    a = Array{BigFloat}(undef,3,3,3)
+    a[1,1,1] = 0
+    for v in (view(a, :, 1:3, 1), # FastContiguousSubArray
+               view(a, 1, :, 1:2), # FastSubArray
+            )
+        @test !isassigned(v, 0, 0) # out-of-bounds
+        @test isassigned(v, 1, 1) # inbounds and assigned
+        @test !isassigned(v, 1, 2) # inbounds but not assigned
+        @test !isassigned(v, 3, 3) # out-of-bounds
+    end
+end
+
+@testset "aliasing checks with shared indices" begin
+    indices = [1,3]
+    a = rand(3)
+    av = @view a[indices]
+    b = rand(3)
+    bv = @view b[indices]
+    @test !Base.mightalias(av, bv)
+    @test Base.mightalias(a, av)
+    @test Base.mightalias(b, bv)
+    @test Base.mightalias(indices, av)
+    @test Base.mightalias(indices, bv)
+    @test Base.mightalias(view(indices, :), av)
+    @test Base.mightalias(view(indices, :), bv)
+end
+
+@testset "aliasing checks with disjoint arrays" begin
+    A = rand(3,4,5)
+    @test Base.mightalias(view(A, :, :, 1), view(A, :, :, 1))
+    @test !Base.mightalias(view(A, :, :, 1), view(A, :, :, 2))
+
+    B = reinterpret(UInt64, A)
+    @test Base.mightalias(view(B, :, :, 1), view(A, :, :, 1))
+    @test !Base.mightalias(view(B, :, :, 1), view(A, :, :, 2))
+
+    C = reinterpret(UInt32, A)
+    @test Base.mightalias(view(C, :, :, 1), view(A, :, :, 1))
+    @test Base.mightalias(view(C, :, :, 1), view(A, :, :, 2)) # This is overly conservative
+    @test Base.mightalias(@view(C[begin:2:end, :, 1]), view(A, :, :, 1))
+    @test Base.mightalias(@view(C[begin:2:end, :, 1]), view(A, :, :, 2)) # This is overly conservative
+end
+
+@testset "aliasing check with reshaped subarrays" begin
+    C = rand(2,1)
+    V1 = @view C[1, :]
+    V2 = @view C[2, :]
+
+    @test !Base.mightalias(V1, V2)
+    @test !Base.mightalias(V1, permutedims(V2))
+    @test !Base.mightalias(permutedims(V1), V2)
+    @test !Base.mightalias(permutedims(V1), permutedims(V2))
+
+    @test Base.mightalias(V1, V1)
+    @test Base.mightalias(V1, permutedims(V1))
+    @test Base.mightalias(permutedims(V1), V1)
+    @test Base.mightalias(permutedims(V1), permutedims(V1))
+end
+
+
+@test @views quote var"begin" + var"end" end isa Expr
+
+@testset "@views handling of assignment" begin
+    @test @macroexpand(@views x[a:b] = c) == :(x[a:b] = c)
+    # Assignments should still work
+    let array = [1, 2, 3, 4, 5, 6, 7, 8]
+        @views array[begin:2] = [-1, -2]
+        @test array == [-1, -2, 3, 4, 5, 6, 7, 8]
+        @views array[7:end] = [-7, -8]
+        @test array == [-1, -2, 3, 4, 5, 6, -7, -8]
+        @views array[begin + 2:end - 4] = [-3, -4]
+        @test array == [-1, -2, -3, -4, 5, 6, -7, -8]
+        @views identity(array)[begin + 4:end - 2] = [-5, -6]
+        @test array == [-1, -2, -3, -4, -5, -6, -7, -8]
+
+        @views array[begin:2] .= 100
+        @test array == [100, 100, -3, -4, -5, -6, -7, -8]
+        @views array[7:end] .= 200
+        @test array == [100, 100, -3, -4, -5, -6, 200, 200]
+        @views array[begin + 2:end - 4] .= 300
+        @test array == [100, 100, 300, 300, -5, -6, 200, 200]
+        @views identity(array)[begin + 4:end - 2] .= 400
+        @test array == [100, 100, 300, 300, 400, 400, 200, 200]
+
+        @views identity(array)[begin:end] .-= 1
+        @test array == [99, 99, 299, 299, 399, 399, 199, 199]
+
+        @views identity(array)[begin:end] += [1, 2, 3, 4, 5, 6, 7, 8]
+        @test array == [100, 101, 302, 303, 404, 405, 206, 207]
+    end
+    # Nested getindex in assignment should be transformed
+    let array = [1, 2, 3, 4, 5, 6, 7, 8], array2 = [1, 2, 3, 4, 5, 6, 7, 8]
+        array[begin + 1:end - 2][2] = -1
+        array[begin + 1:end - 2][end] = -2
+        @test array == [1, 2, 3, 4, 5, 6, 7, 8]
+
+        @views array[begin + 1:end - 2][2] = -1
+        @views array[begin + 1:end - 2][end] = -2
+        @test array == [1, 2, -1, 4, 5, -2, 7, 8]
+
+        function swap_ele(ary, i, v)
+            res = ary[i]
+            ary[i] = v
+            return res
+        end
+        array2[swap_ele(array[begin:end], 1, -3):swap_ele(array[begin:end], 7, -4)] = [-1, -2, -3, -4, -5, -6, -7]
+        @test array == [1, 2, -1, 4, 5, -2, 7, 8]
+        @test array2 == [-1, -2, -3, -4, -5, -6, -7, 8]
+        @views array2[swap_ele(array[begin:end], 1, -3):swap_ele(array[begin:end], 7, -4)] = [-10, 2, -30, 4, -50, 6, -70]
+        @test array == [-3, 2, -1, 4, 5, -2, -4, 8]
+        @test array2 == [-10, 2, -30, 4, -50, 6, -70, 8]
+    end
+end
diff --git a/test/subtype.jl b/test/subtype.jl
index de11689e9e7c4..c9ceada7e90f5 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -20,22 +20,22 @@ intersection_env(@nospecialize(x), @nospecialize(y)) = ccall(:jl_type_intersecti
 # level 1: no varags, union, UnionAll
 function test_1()
     @test issub_strict(Int, Integer)
-    @test issub_strict(Array{Int,1}, AbstractArray{Int,1})
+    @test issub_strict(Vector{Int}, AbstractVector{Int})
 
     @test isequal_type(Int, Int)
     @test isequal_type(Integer, Integer)
-    @test isequal_type(Array{Int,1}, Array{Int,1})
-    @test isequal_type(AbstractArray{Int,1}, AbstractArray{Int,1})
+    @test isequal_type(Vector{Int}, Vector{Int})
+    @test isequal_type(AbstractVector{Int}, AbstractVector{Int})
 
     @test issub_strict(Tuple{Int,Int}, Tuple{Integer,Integer})
-    @test issub_strict(Tuple{Array{Int,1}}, Tuple{AbstractArray{Int,1}})
+    @test issub_strict(Tuple{Vector{Int}}, Tuple{AbstractVector{Int}})
 
     @test isequal_type(Tuple{Integer,Integer}, Tuple{Integer,Integer})
 
     @test !issub(Tuple{Int,Int}, Tuple{Int})
     @test !issub(Tuple{Int}, Tuple{Integer,Integer})
 
-    @test !issub(Array{Int,1}, Array{Integer,1})
+    @test !issub(Vector{Int}, Vector{Integer})
 end
 
 # level 2: varargs
@@ -68,6 +68,13 @@ function test_2()
     @test !(Tuple{Int,Vararg{Int,2}} <: Tuple{Int,Int,Int,Vararg{Int,1}})
     @test Tuple{Int,Vararg{Int}} == Tuple{Int,Vararg{Int}}
     @test (@UnionAll N Tuple{Int,Vararg{Int,N}}) == (@UnionAll N Tuple{Int,Vararg{Int,N}})
+    @test Union{Tuple{}, Tuple{Int}, Tuple{UInt}} <: Union{
+        Tuple{},
+        Tuple{Int},
+        Tuple{UInt},
+        Tuple{UInt, Vararg{UInt}},
+        Tuple{Int, Int, Vararg{Int}}
+    }
 
     @test issub_strict(Tuple{Tuple{Int,Int},Tuple{Int,Int}}, Tuple{NTuple{N,Int},NTuple{N,Int}} where N)
     @test !issub(Tuple{Tuple{Int,Int},Tuple{Int,}}, Tuple{NTuple{N,Int},NTuple{N,Int}} where N)
@@ -102,7 +109,7 @@ function test_diagonal()
     @test !issub(Tuple{Real,Real}, @UnionAll T<:Real Tuple{T,T})
 
     @test issub((@UnionAll S<:Int (@UnionAll R<:AbstractString Tuple{S,R,Vector{Any}})),
-                (@UnionAll T Tuple{T, T, Array{T,1}}))
+                (@UnionAll T Tuple{T, T, Vector{T}}))
 
     @test issub_strict(Tuple{String, Real, Ref{Number}},
                        (@UnionAll T Tuple{Union{T,String}, T, Ref{T}}))
@@ -146,6 +153,14 @@ function test_diagonal()
     @test  isequal_type(Ref{Tuple{T, T} where Int<:T<:Int},
                         Ref{Tuple{S, S}} where Int<:S<:Int)
 
+    # issue #53021
+    @test Tuple{X, X} where {X<:Union{}} <: Tuple{X, X, Vararg{Any}} where {Int<:X<:Int}
+    @test Tuple{Integer, X, Vararg{X}} where {X<:Int} <: Tuple{Any, Vararg{X}} where {X>:Int}
+    @test Tuple{Any, X, Vararg{X}} where {X<:Int} <: Tuple{Vararg{X}} where X>:Integer
+    @test Tuple{Integer, Integer, Any, Vararg{Any}} <: Tuple{Vararg{X}} where X>:Integer
+    # issue #53019
+    @test Tuple{T,T} where {T<:Int} <: Tuple{T,T} where {T>:Int}
+
     let A = Tuple{Int,Int8,Vector{Integer}},
         B = Tuple{T,T,Vector{T}} where T>:Integer,
         C = Tuple{T,T,Vector{Union{Integer,T}}} where T
@@ -156,7 +171,7 @@ function test_diagonal()
     end
 
     # #26108
-    @test !issub((Tuple{T, T, Array{T, 1}} where T), Tuple{T, T, Any} where T)
+    @test !issub((Tuple{T, T, Vector{T}} where T), Tuple{T, T, Any} where T)
 
     # #26716
     @test !issub((Union{Tuple{Int,Bool}, Tuple{P,Bool}} where P), Tuple{Union{T,Int}, T} where T)
@@ -173,7 +188,7 @@ end
 
 # level 3: UnionAll
 function test_3()
-    @test issub_strict(Array{Int,1}, @UnionAll T Vector{T})
+    @test issub_strict(Vector{Int}, @UnionAll T Vector{T})
     @test issub_strict((@UnionAll T Pair{T,T}), Pair)
     @test issub(Pair{Int,Int8}, Pair)
     @test issub(Pair{Int,Int8}, (@UnionAll S Pair{Int,S}))
@@ -188,8 +203,8 @@ function test_3()
     @test issub_strict((@UnionAll T Tuple{Array{T},Array{T}}),
                        Tuple{Array, Array})
 
-    AUA = Array{(@UnionAll T Array{T,1}), 1}
-    UAA = (@UnionAll T Array{Array{T,1}, 1})
+    AUA = Vector{(@UnionAll T Vector{T})}
+    UAA = (@UnionAll T Vector{Vector{T}})
 
     @test !issub(AUA, UAA)
     @test !issub(UAA, AUA)
@@ -206,68 +221,68 @@ function test_3()
     @test isequal_type((@UnionAll T Tuple{T}), Tuple{Any})
     @test isequal_type((@UnionAll T<:Real Tuple{T}), Tuple{Real})
 
-    @test  issub(Tuple{Array{Integer,1}, Int},
-                 @UnionAll T<:Integer @UnionAll S<:T Tuple{Array{T,1},S})
+    @test  issub(Tuple{Vector{Integer}, Int},
+                 @UnionAll T<:Integer @UnionAll S<:T Tuple{Vector{T},S})
 
-    @test !issub(Tuple{Array{Integer,1}, Real},
-                 @UnionAll T<:Integer Tuple{Array{T,1},T})
+    @test !issub(Tuple{Vector{Integer}, Real},
+                 @UnionAll T<:Integer Tuple{Vector{T},T})
 
     @test !issub(Tuple{Int,String,Vector{Integer}},
-                 @UnionAll T Tuple{T, T, Array{T,1}})
+                 @UnionAll T Tuple{T, T, Vector{T}})
     @test !issub(Tuple{String,Int,Vector{Integer}},
-                 @UnionAll T Tuple{T, T, Array{T,1}})
+                 @UnionAll T Tuple{T, T, Vector{T}})
     @test !issub(Tuple{Int,String,Vector{Tuple{Integer}}},
-                 @UnionAll T Tuple{T,T,Array{Tuple{T},1}})
+                 @UnionAll T Tuple{T,T,Vector{Tuple{T}}})
 
     @test issub(Tuple{Int,String,Vector{Any}},
-                @UnionAll T Tuple{T, T, Array{T,1}})
+                @UnionAll T Tuple{T, T, Vector{T}})
 
-    @test isequal_type(Array{Int,1}, Array{(@UnionAll T<:Int T), 1})
-    @test isequal_type(Array{Tuple{Any},1}, Array{(@UnionAll T Tuple{T}), 1})
+    @test isequal_type(Vector{Int}, Vector{(@UnionAll T<:Int T)})
+    @test isequal_type(Vector{Tuple{Any}}, Vector{(@UnionAll T Tuple{T})})
 
-    @test isequal_type(Array{Tuple{Int,Int},1},
-                       Array{(@UnionAll T<:Int Tuple{T,T}), 1})
-    @test !issub(Array{Tuple{Int,Integer},1},
-                 Array{(@UnionAll T<:Integer Tuple{T,T}), 1})
+    @test isequal_type(Vector{Tuple{Int,Int}},
+                       Vector{(@UnionAll T<:Int Tuple{T,T})})
+    @test !issub(Vector{Tuple{Int,Integer}},
+                 Vector{(@UnionAll T<:Integer Tuple{T,T})})
 
     @test !issub(Pair{Int,Int8}, (@UnionAll T Pair{T,T}))
 
-    @test !issub(Tuple{Array{Int,1}, Integer},
-                 @UnionAll T<:Integer Tuple{Array{T,1},T})
+    @test !issub(Tuple{Vector{Int}, Integer},
+                 @UnionAll T<:Integer Tuple{Vector{T},T})
 
-    @test !issub(Tuple{Integer, Array{Int,1}},
-                 @UnionAll T<:Integer Tuple{T, Array{T,1}})
+    @test !issub(Tuple{Integer, Vector{Int}},
+                 @UnionAll T<:Integer Tuple{T, Vector{T}})
 
-    @test !issub(Pair{Array{Int,1},Integer}, @UnionAll T Pair{Array{T,1},T})
-    @test  issub(Pair{Array{Int,1},Int}, @UnionAll T Pair{Array{T,1},T})
+    @test !issub(Pair{Vector{Int},Integer}, @UnionAll T Pair{Vector{T},T})
+    @test  issub(Pair{Vector{Int},Int}, @UnionAll T Pair{Vector{T},T})
 
     @test  issub(Tuple{Integer,Int}, @UnionAll T<:Integer @UnionAll S<:T Tuple{T,S})
     @test !issub(Tuple{Integer,Int}, @UnionAll T<:Int     @UnionAll S<:T Tuple{T,S})
     @test !issub(Tuple{Integer,Int}, @UnionAll T<:String  @UnionAll S<:T Tuple{T,S})
 
-    @test issub(Tuple{Float32,Array{Float32,1}},
-                @UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S})
+    @test issub(Tuple{Float32,Vector{Float32}},
+                @UnionAll T<:Real @UnionAll S<:AbstractVector{T} Tuple{T,S})
 
-    @test !issub(Tuple{Float32,Array{Float64,1}},
-                 @UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S})
+    @test !issub(Tuple{Float32,Vector{Float64}},
+                 @UnionAll T<:Real @UnionAll S<:AbstractVector{T} Tuple{T,S})
 
-    @test issub(Tuple{Float32,Array{Real,1}},
-                @UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S})
+    @test issub(Tuple{Float32,Vector{Real}},
+                @UnionAll T<:Real @UnionAll S<:AbstractVector{T} Tuple{T,S})
 
-    @test !issub(Tuple{Number,Array{Real,1}},
-                 @UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S})
+    @test !issub(Tuple{Number,Vector{Real}},
+                 @UnionAll T<:Real @UnionAll S<:AbstractVector{T} Tuple{T,S})
 
     @test issub((@UnionAll Int<:T<:Integer T), @UnionAll T<:Real T)
-    @test issub((@UnionAll Int<:T<:Integer Array{T,1}),
-                (@UnionAll T<:Real Array{T,1}))
+    @test issub((@UnionAll Int<:T<:Integer Vector{T}),
+                (@UnionAll T<:Real Vector{T}))
 
     @test  issub((@UnionAll Int<:T<:Integer T), (@UnionAll Integer<:T<:Real T))
-    @test !issub((@UnionAll Int<:T<:Integer Array{T,1}), (@UnionAll Integer<:T<:Real Array{T,1}))
+    @test !issub((@UnionAll Int<:T<:Integer Vector{T}), (@UnionAll Integer<:T<:Real Vector{T}))
 
-    X = (@UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S})
-    Y = (@UnionAll A<:Real @UnionAll B<:AbstractArray{A,1} Tuple{A,B})
+    X = (@UnionAll T<:Real @UnionAll S<:AbstractVector{T} Tuple{T,S})
+    Y = (@UnionAll A<:Real @UnionAll B<:AbstractVector{A} Tuple{A,B})
     @test isequal_type(X,Y)
-    Z = (@UnionAll A<:Real @UnionAll B<:AbstractArray{A,1} Tuple{Real,B})
+    Z = (@UnionAll A<:Real @UnionAll B<:AbstractVector{A} Tuple{Real,B})
     @test issub_strict(X,Z)
 
     @test issub_strict((@UnionAll T @UnionAll S<:T Pair{T,S}),
@@ -314,7 +329,7 @@ function test_3()
     @test !issub((Type{AbstractArray{Array{T}} where T}), Type{AbstractArray{S}} where S)
     @test !issub((Val{AbstractArray{Array{T}} where T}), Val{AbstractArray{T}} where T)
     @test !issub((Array{Array{Array{T}} where T}), Array{Array{T}} where T)
-    @test !issub((Array{Array{T, 1}, 1} where T), AbstractArray{Vector})
+    @test !issub((Vector{Vector{T}} where T), AbstractArray{Vector})
 
     @test !issub((Ref{Pair{Pair{T, R}, R} where R} where T),
                  (Ref{Pair{A,          B} where B} where A))
@@ -375,36 +390,36 @@ end
 function test_5()
     u = Union{Int8,Int}
 
-    @test issub(Tuple{String,Array{Int,1}},
-                (@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}}))
+    @test issub(Tuple{String,Vector{Int}},
+                (@UnionAll T Union{Tuple{T,Vector{T}}, Tuple{T,Vector{Int}}}))
 
     @test issub(Tuple{Union{Vector{Int},Vector{Int8}}},
-                @UnionAll T Tuple{Array{T,1}})
+                @UnionAll T Tuple{Vector{T}})
 
     @test !issub(Tuple{Union{Vector{Int},Vector{Int8}},Vector{Int}},
-                 @UnionAll T Tuple{Array{T,1}, Array{T,1}})
+                 @UnionAll T Tuple{Vector{T}, Vector{T}})
 
     @test !issub(Tuple{Union{Vector{Int},Vector{Int8}},Vector{Int8}},
-                 @UnionAll T Tuple{Array{T,1}, Array{T,1}})
+                 @UnionAll T Tuple{Vector{T}, Vector{T}})
 
-    @test !issub(Vector{Int}, @UnionAll T>:u Array{T,1})
-    @test  issub(Vector{Integer}, @UnionAll T>:u Array{T,1})
-    @test  issub(Vector{Union{Int,Int8}}, @UnionAll T>:u Array{T,1})
+    @test !issub(Vector{Int}, @UnionAll T>:u Vector{T})
+    @test  issub(Vector{Integer}, @UnionAll T>:u Vector{T})
+    @test  issub(Vector{Union{Int,Int8}}, @UnionAll T>:u Vector{T})
 
-    @test issub((@UnionAll Int<:T<:u Array{T,1}), (@UnionAll Int<:T<:u Array{T,1}))
+    @test issub((@UnionAll Int<:T<:u Vector{T}), (@UnionAll Int<:T<:u Vector{T}))
 
     # with varargs
     @test !issub(Array{Tuple{Array{Int},Array{Vector{Int16}},Array{Vector{Int}},Array{Int}}},
-                 @UnionAll T<:(@UnionAll S Tuple{Vararg{Union{Array{S}, Array{Array{S,1}}}}}) Array{T})
+                 @UnionAll T<:(@UnionAll S Tuple{Vararg{Union{Array{S}, Array{Vector{S}}}}}) Array{T})
 
     @test  issub(Array{Tuple{Array{Int},Array{Vector{Int}},Array{Vector{Int}},Array{Int}}},
-                 @UnionAll T<:(@UnionAll S Tuple{Vararg{Union{Array{S}, Array{Array{S,1}}}}}) Array{T})
+                 @UnionAll T<:(@UnionAll S Tuple{Vararg{Union{Array{S}, Array{Vector{S}}}}}) Array{T})
 
     @test !issub(Tuple{Array{Int},Array{Vector{Int16}},Array{Vector{Int}},Array{Int}},
-                 @UnionAll S Tuple{Vararg{Union{Array{S},Array{Array{S,1}}}}})
+                 @UnionAll S Tuple{Vararg{Union{Array{S},Array{Vector{S}}}}})
 
     @test  issub(Tuple{Array{Int},Array{Vector{Int}},Array{Vector{Int}},Array{Int}},
-                 @UnionAll S Tuple{Vararg{Union{Array{S},Array{Array{S,1}}}}})
+                 @UnionAll S Tuple{Vararg{Union{Array{S},Array{Vector{S}}}}})
 
     B = @UnionAll S<:u Tuple{S, Tuple{Any,Any,Any}, Ref{S}}
     # these tests require renaming in issub_unionall
@@ -413,8 +428,8 @@ function test_5()
 
     # the `convert(Type{T},T)` pattern, where T is a Union
     # required changing priority of unions and vars
-    @test issub(Tuple{Array{u,1},Int}, @UnionAll T Tuple{Array{T,1}, T})
-    @test issub(Tuple{Array{u,1},Int}, @UnionAll T @UnionAll S<:T Tuple{Array{T,1}, S})
+    @test issub(Tuple{Vector{u},Int}, @UnionAll T Tuple{Vector{T}, T})
+    @test issub(Tuple{Vector{u},Int}, @UnionAll T @UnionAll S<:T Tuple{Vector{T}, S})
 
     @test !issub(Ref{Union{Ref{Int},Ref{Int8}}}, @UnionAll T Ref{Ref{T}})
     @test  issub(Tuple{Union{Ref{Int},Ref{Int8}}}, @UnionAll T Tuple{Ref{T}})
@@ -434,10 +449,10 @@ end
 # tricky type variable lower bounds
 function test_6()
     @test  issub((@UnionAll S<:Int (@UnionAll R<:String Tuple{S,R,Vector{Any}})),
-                 (@UnionAll T Tuple{T, T, Array{T,1}}))
+                 (@UnionAll T Tuple{T, T, Vector{T}}))
 
     @test !issub((@UnionAll S<:Int (@UnionAll R<:String Tuple{S,R,Vector{Integer}})),
-                 (@UnionAll T Tuple{T, T, Array{T,1}}))
+                 (@UnionAll T Tuple{T, T, Vector{T}}))
 
     t = @UnionAll T Tuple{T,T,Ref{T}}
     @test isequal_type(t, @UnionAll S Tuple{S,S,Ref{S}})
@@ -546,8 +561,8 @@ function test_old()
     @test Int8 <: Integer
     @test Int32 <: Integer
     @test Tuple{Int8,Int8} <: Tuple{Integer,Integer}
-    @test !(AbstractArray{Float64,2} <: AbstractArray{Number,2})
-    @test !(AbstractArray{Float64,1} <: AbstractArray{Float64,2})
+    @test !(AbstractMatrix{Float64} <: AbstractMatrix{Number})
+    @test !(AbstractVector{Float64} <: AbstractMatrix{Float64})
     @test Tuple{Integer,Vararg{Integer}} <: Tuple{Integer,Vararg{Real}}
     @test Tuple{Integer,Float64,Vararg{Integer}} <: Tuple{Integer,Vararg{Number}}
     @test Tuple{Integer,Float64} <: Tuple{Integer,Vararg{Number}}
@@ -556,9 +571,9 @@ function test_old()
     @test !(Tuple{Vararg{Int32}} <: Tuple{Int32,})
     @test !(Tuple{Vararg{Int32}} <: Tuple{Number,Integer})
     @test !(Tuple{Vararg{Integer}} <: Tuple{Integer,Integer,Vararg{Integer}})
-    @test !(Array{Int8,1} <: Array{Any,1})
-    @test !(Array{Any,1} <: Array{Int8,1})
-    @test Array{Int8,1} <: Array{Int8,1}
+    @test !(Vector{Int8} <: Vector{Any})
+    @test !(Vector{Any} <: Vector{Int8})
+    @test Vector{Int8} <: Vector{Int8}
     @test !(Type{Bottom} <: Type{Int32})
     @test !(Vector{Float64} <: Vector{Union{Float64,Float32}})
 
@@ -589,10 +604,10 @@ end
 
 const easy_menagerie =
     Any[Any, Int, Int8, Integer, Real,
-        Array{Int,1}, AbstractArray{Int,1},
+        Vector{Int}, AbstractVector{Int},
         Tuple{Int,Vararg{Integer}}, Tuple{Integer,Vararg{Int}}, Tuple{},
         Union{Int,Int8},
-        (@UnionAll T Array{T,1}),
+        (@UnionAll T Vector{T}),
         (@UnionAll T Pair{T,T}),
         (@UnionAll T @UnionAll S Pair{T,S}),
         Pair{Int,Int8},
@@ -602,15 +617,15 @@ const easy_menagerie =
         (@UnionAll T @UnionAll S Tuple{T,S}),
         (@UnionAll T<:Integer @UnionAll S<:Number Tuple{T,S}),
         (@UnionAll T<:Integer @UnionAll S<:Number Tuple{S,T}),
-        Array{(@UnionAll T Array{T,1}),1},
-        (@UnionAll T Array{Array{T,1},1}),
-        Array{(@UnionAll T<:Int T), 1},
-        (@UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S}),
+        Vector{(@UnionAll T Vector{T})},
+        (@UnionAll T Vector{Vector{T}}),
+        Vector{(@UnionAll T<:Int T)},
+        (@UnionAll T<:Real @UnionAll S<:AbstractVector{T} Tuple{T,S}),
         Union{Int,Ref{Union{Int,Int8}}},
         ]
 
 const hard_menagerie =
-    Any[(@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}})]
+    Any[(@UnionAll T Union{Tuple{T,Vector{T}}, Tuple{T,Vector{Int}}})]
 
 function add_variants!(types)
     new = Any[]
@@ -699,16 +714,17 @@ macro testintersect(a, b, result)
     a = esc(a)
     b = esc(b)
     result = esc(result)
-    Base.remove_linenums!(quote
+    # use a manual macrocall expression since Test will examine this __source__ value
+    return quote
         # test real intersect
-        @test $cmp(_type_intersect($a, $b), $result)
-        @test $cmp(_type_intersect($b, $a), $result)
+        $(Expr(:macrocall, :var"@test", __source__, :($cmp(_type_intersect($a, $b), $result))))
+        $(Expr(:macrocall, :var"@test", __source__, :($cmp(_type_intersect($b, $a), $result))))
         # test simplified intersect
         if !($result === Union{})
-            @test typeintersect($a, $b) != Union{}
-            @test typeintersect($b, $a) != Union{}
+            $(Expr(:macrocall, :var"@test", __source__, :(typeintersect($a, $b) != Union{})))
+            $(Expr(:macrocall, :var"@test", __source__, :(typeintersect($b, $a) != Union{})))
         end
-    end)
+    end
 end
 
 abstract type IT4805_2{N, T} end
@@ -753,11 +769,11 @@ function test_intersection()
     @testintersect((@UnionAll T<:Number Array{T}), (@UnionAll T<:String Array{T}),
                    Array{Bottom})
 
-    @testintersect((@UnionAll T Tuple{T, AbstractArray{T}}), Tuple{Number, Array{Int,1}},
-                   Tuple{Int, Array{Int,1}})
+    @testintersect((@UnionAll T Tuple{T, AbstractArray{T}}), Tuple{Number, Vector{Int}},
+                   Tuple{Int, Vector{Int}})
 
-    @testintersect((@UnionAll T Tuple{T, AbstractArray{T}}), Tuple{Int, Array{Number,1}},
-                   Tuple{Int, Array{Number,1}})
+    @testintersect((@UnionAll T Tuple{T, AbstractArray{T}}), Tuple{Int, Vector{Number}},
+                   Tuple{Int, Vector{Number}})
 
     # TODO: improve this result
     #@testintersect((@UnionAll S Tuple{S,Vector{S}}), (@UnionAll T<:Real Tuple{T,AbstractVector{T}}),
@@ -768,10 +784,10 @@ function test_intersection()
     # typevar corresponding to a type it will end up being neither greater than nor
     # less than
     @testintersect((@UnionAll T Tuple{T, Ref{T}}), Tuple{Array{Int}, Ref{AbstractVector}},
-                   Tuple{Array{Int,1}, Ref{AbstractVector}})
+                   Tuple{Vector{Int}, Ref{AbstractVector}})
 
-    @testintersect((@UnionAll T Tuple{T, AbstractArray{T}}), Tuple{Any, Array{Number,1}},
-                   Tuple{Number, Array{Number,1}})
+    @testintersect((@UnionAll T Tuple{T, AbstractArray{T}}), Tuple{Any, Vector{Number}},
+                   Tuple{Number, Vector{Number}})
     @testintersect((@UnionAll T Tuple{Array{T}, Array{T}}), Tuple{Array, Array{Any}}, !Bottom)
 
     @testintersect((@UnionAll T Tuple{T,T}), Tuple{Real, Real}, (@UnionAll T<:Real Tuple{T,T}))
@@ -784,9 +800,9 @@ function test_intersection()
                    @UnionAll Z<:Integer Pair{Z,Z})
 
     @testintersect((@UnionAll T<:Vector Type{T}), (@UnionAll N Type{@UnionAll S<:Number Array{S,N}}),
-                   Type{@UnionAll S<:Number Array{S,1}})
+                   Type{@UnionAll S<:Number Vector{S}})
 
-    @testintersect((@UnionAll T Tuple{Type{Array{T,1}},Array{T,1}}),
+    @testintersect((@UnionAll T Tuple{Type{Vector{T}},Vector{T}}),
                    Tuple{Type{AbstractVector},Vector{Int}}, Bottom)
 
     @testintersect(Tuple{Type{Vector{ComplexF64}}, AbstractVector},
@@ -827,8 +843,8 @@ function test_intersection()
                        @UnionAll N Tuple{Tuple{Int,Vararg{Int}},Array{Int,N}})
 
     @testintersect((@UnionAll N Tuple{NTuple{N,Any},Array{Int,N}}),
-                   Tuple{Tuple{Int,Vararg{Int}},Array{Int,2}},
-                   Tuple{Tuple{Int,Int}, Array{Int,2}})
+                   Tuple{Tuple{Int,Vararg{Int}},Matrix{Int}},
+                   Tuple{Tuple{Int,Int}, Matrix{Int}})
     @testintersect(Type{Any},Type{Complex}, Bottom)
     @testintersect(Type{Any},(@UnionAll T<:Real Type{T}), Bottom)
 
@@ -851,7 +867,7 @@ function test_intersection()
     @testintersect((@UnionAll N Tuple{Array{Int,N},Vararg{Int,N}}), Tuple{Vector{Int},Real,Real,Real}, Bottom)
 
     @testintersect((@UnionAll N Tuple{Array{Int,N},Vararg{Int,N}}), Tuple{Array{Int,0}}, Tuple{Array{Int,0}})
-    @testintersect((@UnionAll N Tuple{Array{Int,N},Vararg{Int,N}}), Tuple{Array{Int,2}}, Bottom)
+    @testintersect((@UnionAll N Tuple{Array{Int,N},Vararg{Int,N}}), Tuple{Matrix{Int}}, Bottom)
 
     @testintersect(Tuple{Int,Vararg{Int}}, Tuple{Int,Int,Int,Vararg{Float64}}, Tuple{Int,Int,Int})
     @testintersect(Tuple{Int,Vararg{Int}}, Tuple{Int,Vararg{Float64}}, Tuple{Int})
@@ -861,11 +877,11 @@ function test_intersection()
     @testintersect((@UnionAll N Tuple{Array{Int,N},Vararg{Int,N}}),
                    Tuple{Matrix{Int},Int,Vararg{Float64}}, Bottom)
 
-    @testintersect(Tuple{Array{Any,1}, Tuple{Int64, Int64, Vararg{Int64}}},
+    @testintersect(Tuple{Vector{Any}, Tuple{Int64, Int64, Vararg{Int64}}},
                    Tuple{Array{T,N}, Tuple{Vararg{Int64,N}}} where N where T,
                    Bottom)
 
-    @testintersect((@UnionAll T<:Union{Float64,Array{Float64,1}} T), Real, Float64)
+    @testintersect((@UnionAll T<:Union{Float64,Vector{Float64}} T), Real, Float64)
 
     # issue #4805
     @testintersect((@UnionAll T<:Int Type{IT4805_2{1,T}}),
@@ -939,20 +955,20 @@ function test_intersection()
                    Tuple{Type{S}, Tuple{Any, Vararg{Any}}} where S<:Tuple{Any, Vararg{Any}})
 
     # part of issue #20450
-    @testintersect(Tuple{Array{Ref{T}, 1}, Array{Pair{M, V}, 1}} where V where T where M,
-                   Tuple{Array{Ref{T}, 1}, Array{Pair{M, T}, 1}, SS} where T where M where SS,
+    @testintersect(Tuple{Vector{Ref{T}}, Vector{Pair{M, V}}} where V where T where M,
+                   Tuple{Vector{Ref{T}}, Vector{Pair{M, T}}, SS} where T where M where SS,
                    Union{})
 
-    @testintersect(Tuple{Array{Ref{T}, 1}, Array{Pair{M, V}, 1}, Int} where V where T where M,
-                   Tuple{Array{Ref{T}, 1}, Array{Pair{M, T}, 1}, Any} where T where M,
-                   Tuple{Array{Ref{T}, 1}, Array{Pair{M, T}, 1}, Int} where T where M)
+    @testintersect(Tuple{Vector{Ref{T}}, Vector{Pair{M, V}}, Int} where V where T where M,
+                   Tuple{Vector{Ref{T}}, Vector{Pair{M, T}}, Any} where T where M,
+                   Tuple{Vector{Ref{T}}, Vector{Pair{M, T}}, Int} where T where M)
 
     @testintersect(Tuple{Int, Ref{Pair{K,V}}} where V where K,
                    Tuple{Any, Ref{Pair{T,T}} where T },
                    Tuple{Int, Ref{Pair{T,T}} where T })
 
     @test_broken isequal_type(_type_intersect(Tuple{T,T} where T,
-                                              Union{Tuple{S,Array{Int64,1}},Tuple{S,Array{S,1}}} where S),
+                                              Union{Tuple{S,Vector{Int64}},Tuple{S,Vector{S}}} where S),
                               Union{Tuple{Vector{Int64},Vector{Int64}},
                                     Tuple{Vector{T},Vector{T}} where T>:Vector})
 
@@ -1030,8 +1046,8 @@ function test_intersection()
                    Tuple{Ref{T}, Ref{T}} where T,
                    Tuple{Ref{Ref{Int}}, Ref{Ref{Int}}})
     @testintersect(Tuple{Vector{Pair{K,V}}, Vector{Pair{K,V}}} where K where V,
-                   Tuple{(Array{Pair{Ref{_2},_1},1} where _2 where _1),
-                         Array{Pair{Ref{Int64},Rational{Int64}},1}},
+                   Tuple{(Vector{Pair{Ref{_2},_1}} where _2 where _1),
+                         Vector{Pair{Ref{Int64},Rational{Int64}}}},
                    Tuple{Vector{Pair{Ref{Int64},Rational{Int64}}},
                          Vector{Pair{Ref{Int64},Rational{Int64}}}})
     @testintersect(Vector{>:Missing}, Vector{Int}, Union{})
@@ -1119,7 +1135,7 @@ let S = ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), UnionAll, [TypeVa
 end
 
 # issue #20121
-@test NTuple{170,Matrix{Int}} <: (Tuple{Vararg{Union{Array{T,1},Array{T,2},Array{T,3}}}} where T)
+@test NTuple{170,Matrix{Int}} <: (Tuple{Vararg{Union{Vector{T},Matrix{T},Array{T,3}}}} where T)
 
 # Issue #12580
 abstract type AbstractMyType12580{T} end
@@ -1260,14 +1276,7 @@ let a = Tuple{Tuple{T2,4},T6} where T2 where T6,
 end
 let a = Tuple{T3,Int64,Tuple{T3}} where T3,
     b = Tuple{S3,S3,S4} where S4 where S3
-    I1 = typeintersect(a, b)
-    I2 = typeintersect(b, a)
-    @test I1 <: I2
-    @test I2 <: I1
-    @test_broken I1 <: a
-    @test I2 <: a
-    @test I1 <: b
-    @test I2 <: b
+    @testintersect(a, b, Tuple{Int64, Int64, Tuple{Int64}})
 end
 let a = Tuple{T1,Val{T2},T2} where T2 where T1,
     b = Tuple{Float64,S1,S2} where S2 where S1
@@ -1403,24 +1412,24 @@ end
 let
     triangular(::Type{<:AbstractArray{T}}) where {T} = T
     triangular(::Type{<:AbstractArray}) = Any
-    @test triangular(Array{Array{T, 1}, 1} where T) === Any
+    @test triangular(Vector{Vector{T}} where T) === Any
 end
 
 # issue #23908
-@test Array{Union{Int128, Int16, Int32, Int8}, 1} <: Array{Union{Int128, Int32, Int8, _1}, 1} where _1
-let A = Pair{Nothing, Pair{Array{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8}, 1}, Nothing}},
-    B = Pair{Nothing, Pair{Array{Union{Int8, UInt128, UInt16, UInt32, UInt64, UInt8, _1}, 1}, Nothing}} where _1
+@test Vector{Union{Int128, Int16, Int32, Int8}} <: Vector{Union{Int128, Int32, Int8, _1}} where _1
+let A = Pair{Nothing, Pair{Vector{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8}}, Nothing}},
+    B = Pair{Nothing, Pair{Vector{Union{Int8, UInt128, UInt16, UInt32, UInt64, UInt8, _1}}, Nothing}} where _1
     @test A <: B
     @test !(B <: A)
 end
 
 # issue #22688
-let X = Ref{Tuple{Array{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8}, 1}}}
+let X = Ref{Tuple{Vector{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8}}}}
     @test !(X <: Ref{Tuple{Array{Union{Int8, UInt128, UInt16, UInt32, UInt64, UInt8, S}}}} where S)
-    @test X <: Ref{Tuple{Array{Union{Int8, UInt128, UInt16, UInt32, UInt64, UInt8, S}, 1}}} where S
+    @test X <: Ref{Tuple{Vector{Union{Int8, UInt128, UInt16, UInt32, UInt64, UInt8, S}}}} where S
 end
-let X = Ref{Tuple{Array{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8}, 1}, Array{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8}, 1}}},
-    Y = Ref{Tuple{Array{Union{Int8, UInt128, UInt16, UInt32, UInt64, UInt8, S}, 1}, Array{Union{Int8, UInt128, UInt16, UInt32, UInt64, UInt8, T}, 1}}} where S where T
+let X = Ref{Tuple{Vector{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8}}, Vector{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8}}}},
+    Y = Ref{Tuple{Vector{Union{Int8, UInt128, UInt16, UInt32, UInt64, UInt8, S}}, Vector{Union{Int8, UInt128, UInt16, UInt32, UInt64, UInt8, T}}}} where S where T
     @test X <: Y
 end
 
@@ -1436,6 +1445,9 @@ struct A23764_2{T, N, S} <: AbstractArray{Union{Ref{T}, S}, N}; end
 @test Tuple{A23764_2{T, 1, Nothing} where T} <: Tuple{AbstractArray{T,N}} where {T,N}
 @test Tuple{A23764_2{T, 1, Nothing} where T} <: Tuple{AbstractArray{T,N} where {T,N}}
 
+# issue #50716
+@test !<:(Ref{Vector{Tuple{K}} where K}, Ref{<:Vector{K}} where K)
+
 # issue #26131
 @test !(Vector{Vector{Number}} <: Vector{Union{Vector{Number}, Vector{S}}} where S<:Integer)
 
@@ -1517,7 +1529,7 @@ f26453(x::T,y::T) where {S,T>:S} = 0
 @test f26453(1,2) == 0
 @test f26453(1,"") == 0
 g26453(x::T,y::T) where {S,T>:S} = T
-@test_throws UndefVarError(:T) g26453(1,1)
+@test_throws UndefVarError(:T, :static_parameter) g26453(1,1)
 @test issub_strict((Tuple{T,T} where T), (Tuple{T,T} where {S,T>:S}))
 
 # issue #27632
@@ -1579,9 +1591,9 @@ let A = Tuple{Any, Type{Union{Nothing, Int64}}},
     @test I >: Tuple{Int64,Type{Union{Nothing, Int64}}}
     @test J >: Tuple{Int64,Type{Union{Nothing, Int64}}}
 end
-@testintersect(Union{Array{T,1},Array{T,2}} where T<:Union{Float32,Float64},
+@testintersect(Union{Vector{T},Matrix{T}} where T<:Union{Float32,Float64},
                Union{AbstractMatrix{Float32},AbstractVector{Float32}},
-               Union{Array{Float32,2}, Array{Float32,1}})
+               Union{Matrix{Float32}, Vector{Float32}})
 let A = Tuple{Type{Union{Missing,T}},Any} where T,
     B = Tuple{Type{Union{Nothing,T}},Any} where T
     I = typeintersect(A, B)
@@ -1595,16 +1607,16 @@ struct M29955{T, TV<:AbstractVector{T}}
 end
 @testintersect(M29955,
                M29955{<:Any,TV} where TV>:Vector{Float64},
-               M29955{Float64,TV} where Array{Float64,1}<:TV<:AbstractArray{Float64,1})
+               M29955{Float64,TV} where Vector{Float64}<:TV<:AbstractVector{Float64})
 
 struct A29955{T, TV<:AbstractVector{T}, TModel<:M29955{T,TV}}
 end
-@testintersect(Tuple{Type{A29955{Float64,Array{Float64,1},_1}} where _1,
+@testintersect(Tuple{Type{A29955{Float64,Vector{Float64},_1}} where _1,
                      Any},
                Tuple{Type{A29955{T,TV,TM}},
                      TM} where {T,TV<:AbstractVector{T},TM<:M29955{T,TV}},
-               Tuple{Type{A29955{Float64,Array{Float64,1},TM}},
-                   M29955{Float64,Vector{Float64}}} where TM<:M29955{Float64,Array{Float64,1}})
+               Tuple{Type{A29955{Float64,Vector{Float64},TM}},
+                   M29955{Float64,Vector{Float64}}} where TM<:M29955{Float64,Vector{Float64}})
 let M = M29955{T,Vector{Float64}} where T
     @test M == (M29955{T,Vector{Float64}} where T)
     @test M{Float64} == M29955{Float64,Vector{Float64}}
@@ -1677,18 +1689,16 @@ f31082(::Pair{B, C}, ::C, ::C) where {B, C} = 1
                Tuple{Type{Val{T}},Int,Int} where T>:Integer)
 
 # issue #31496
-CovType{T} = Union{AbstractArray{T,2},
+CovType{T} = Union{AbstractMatrix{T},
                    Vector{UpperTriangular{T,Matrix{T}}}}
 @testintersect(Pair{<:Any, <:AbstractMatrix},
                Pair{T,     <:CovType{T}} where T<:AbstractFloat,
-               Pair{T,S} where S<:AbstractArray{T,2} where T<:AbstractFloat)
+               Pair{T,S} where S<:AbstractMatrix{T} where T<:AbstractFloat)
 
 # issue #31703
 @testintersect(Pair{<:Any, Ref{Tuple{Ref{Ref{Tuple{Int}}},Ref{Float64}}}},
                Pair{T, S} where S<:(Ref{A} where A<:(Tuple{C,Ref{T}} where C<:(Ref{D} where D<:(Ref{E} where E<:Tuple{FF}) where FF<:B)) where B) where T,
-               Pair{T, Ref{Tuple{Ref{Ref{Tuple{Int}}},Ref{Float64}}}} where T)
-# TODO: should be able to get this result
-#              Pair{Float64, Ref{Tuple{Ref{Ref{Tuple{Int}}},Ref{Float64}}}}
+               Pair{Float64, Ref{Tuple{Ref{Ref{Tuple{Int}}},Ref{Float64}}}})
 
 module I31703
 using Test, LinearAlgebra
@@ -1712,11 +1722,11 @@ const RadialOperator{T,B,M<:AbstractMatrix{T}} = Mul{<:Any,<:Tuple{<:BasisOrRest
 const HFPotentialOperator{T,B} = RadialOperator{T,B,Diagonal{T,Vector{T}}}
 struct HFPotential{kind,T,B,RO<:HFPotentialOperator{T,B},P<:Integer} end
 
-T = HFPotential{_A,Float64,Any,Applied{Int,Tuple{ApplyQuasiArray{Float64,2,Applied{Int,Tuple{Any,BandedMatrix{Int,Ones{Int,2,Tuple{OneTo{Int},OneTo{Int}}},OneTo{Int}}}}},Diagonal{Float64,Array{Float64,1}},ApplyQuasiArray{Float64,2,Applied{Int,Tuple{Adjoint{Int,BandedMatrix{Int,Ones{Int,2,Tuple{OneTo{Int},OneTo{Int}}},OneTo{Int}}},QuasiAdjoint{Float64,Any}}}}}},_B} where _B where _A
+T = HFPotential{_A,Float64,Any,Applied{Int,Tuple{ApplyQuasiArray{Float64,2,Applied{Int,Tuple{Any,BandedMatrix{Int,Ones{Int,2,Tuple{OneTo{Int},OneTo{Int}}},OneTo{Int}}}}},Diagonal{Float64,Vector{Float64}},ApplyQuasiArray{Float64,2,Applied{Int,Tuple{Adjoint{Int,BandedMatrix{Int,Ones{Int,2,Tuple{OneTo{Int},OneTo{Int}}},OneTo{Int}}},QuasiAdjoint{Float64,Any}}}}}},_B} where _B where _A
 
 let A = typeintersect(HFPotential, T),
     B = typeintersect(T, HFPotential)
-    @test A == B == HFPotential{kind,Float64,Any,Applied{Int,Tuple{ApplyQuasiArray{Float64,2,Applied{Int,Tuple{Any,BandedMatrix{Int,Ones{Int,2,Tuple{OneTo{Int},OneTo{Int}}},OneTo{Int}}}}},Diagonal{Float64,Array{Float64,1}},ApplyQuasiArray{Float64,2,Applied{Int,Tuple{Adjoint{Int,BandedMatrix{Int,Ones{Int,2,Tuple{OneTo{Int},OneTo{Int}}},OneTo{Int}}},QuasiAdjoint{Float64,Any}}}}}},P} where P<:Integer where kind
+    @test A == B == HFPotential{kind,Float64,Any,Applied{Int,Tuple{ApplyQuasiArray{Float64,2,Applied{Int,Tuple{Any,BandedMatrix{Int,Ones{Int,2,Tuple{OneTo{Int},OneTo{Int}}},OneTo{Int}}}}},Diagonal{Float64,Vector{Float64}},ApplyQuasiArray{Float64,2,Applied{Int,Tuple{Adjoint{Int,BandedMatrix{Int,Ones{Int,2,Tuple{OneTo{Int},OneTo{Int}}},OneTo{Int}}},QuasiAdjoint{Float64,Any}}}}}},P} where P<:Integer where kind
 end
 end
 
@@ -1740,8 +1750,7 @@ end
                Tuple{Type{SA{2, L}}, Type{SA{2, L}}} where L)
 @testintersect(Tuple{Type{SA{2, L}}, Type{SA{2, 16}}} where L,
                Tuple{Type{<:SA{N, L}}, Type{<:SA{N, L}}} where {N,L},
-               # TODO: this could be narrower
-               Tuple{Type{SA{2, L}}, Type{SA{2, 16}}} where L)
+               Tuple{Type{SA{2, 16}}, Type{SA{2, 16}}})
 
 # issue #31993
 @testintersect(Tuple{Type{<:AbstractVector{T}}, Int} where T,
@@ -1750,9 +1759,9 @@ end
 @testintersect(Tuple{Type{<:AbstractVector{T}}, Int} where T,
                Tuple{Type{Vector{T} where Int<:T<:Int}, Any},
                Tuple{Type{Vector{Int}}, Int})
-let X = LinearAlgebra.Symmetric{T, S} where S<:(AbstractArray{U, 2} where U<:T) where T,
-    Y = Union{LinearAlgebra.Hermitian{T, S} where S<:(AbstractArray{U, 2} where U<:T) where T,
-              LinearAlgebra.Symmetric{T, S} where S<:(AbstractArray{U, 2} where U<:T) where T}
+let X = LinearAlgebra.Symmetric{T, S} where S<:(AbstractMatrix{U} where U<:T) where T,
+    Y = Union{LinearAlgebra.Hermitian{T, S} where S<:(AbstractMatrix{U} where U<:T) where T,
+              LinearAlgebra.Symmetric{T, S} where S<:(AbstractMatrix{U} where U<:T) where T}
     @test X <: Y
 end
 
@@ -1790,9 +1799,9 @@ let T31805 = Tuple{Type{Tuple{}}, Tuple{Vararg{Int8, A}}} where A,
 end
 
 @testintersect(
-    Tuple{Array{Tuple{Vararg{Int64,N}},N},Tuple{Vararg{Array{Int64,1},N}}} where N,
-    Tuple{Array{Tuple{Int64},1}, Tuple},
-    Tuple{Array{Tuple{Int64},1},Tuple{Array{Int64,1}}})
+    Tuple{Array{Tuple{Vararg{Int64,N}},N},Tuple{Vararg{Vector{Int64},N}}} where N,
+    Tuple{Vector{Tuple{Int64}}, Tuple},
+    Tuple{Vector{Tuple{Int64}},Tuple{Vector{Int64}}})
 
 @test !isequal_type(Tuple{Int, Vararg{T, 3}} where T<:Real, Tuple{Int, Real, Vararg{T, 2}} where T<:Integer)
 
@@ -1803,7 +1812,7 @@ let (_, E) = intersection_env(Tuple{Tuple{Vararg{Int}}}, Tuple{Tuple{Vararg{Int,
     @test !isa(E[1], Type)
 end
 
-# this is is a timing test, so it would fail on debug builds
+# this is a timing test, so it would fail on debug builds
 #let T = Type{Tuple{(Union{Int, Nothing} for i = 1:23)..., Union{String, Nothing}}},
 #    S = Type{T} where T<:Tuple{E, Vararg{E}} where E
 #    @test @elapsed (@test T != S) < 5
@@ -1846,9 +1855,9 @@ c32703(::Type{<:Str{C}}, str::Str{C}) where {C<:CSE} = str
                Tuple{Type{<:Str{C}}, Str{C}} where {C<:CSE},
                Union{})
 @test c32703(UTF16Str, ASCIIStr()) == 42
-@test_broken typeintersect(Tuple{Vector{Vector{Float32}},Matrix,Matrix},
-                           Tuple{Vector{V},Matrix{Int},Matrix{S}} where {S, V<:AbstractVector{S}}) ==
-             Tuple{Array{Array{Float32,1},1},Array{Int,2},Array{Float32,2}}
+@testintersect(Tuple{Vector{Vector{Float32}},Matrix,Matrix},
+               Tuple{Vector{V},Matrix{Int},Matrix{S}} where {S, V<:AbstractVector{S}},
+               Tuple{Vector{Vector{Float32}},Matrix{Int},Matrix{Float32}})
 
 @testintersect(Tuple{Pair{Int, DataType}, Any},
                Tuple{Pair{A, B} where B<:Type, Int} where A,
@@ -1887,7 +1896,7 @@ s26065 = Ref{Tuple{T,Ref{Union{Ref{Tuple{Ref{Union{Ref{Ref{Tuple{Ref{Tuple{Union
       Val{Tuple{Missing, Vararg{Union{}}}} === Val{Tuple{Missing}}
 
 # issue #36869
-struct F36869{T, V} <: AbstractArray{Union{T, V}, 1}
+struct F36869{T, V} <: AbstractVector{Union{T, V}}
 end
 @testintersect(Tuple{Type{T}, AbstractVector{T}} where T,
                Tuple{Union, F36869{Int64, Missing}},
@@ -2010,9 +2019,9 @@ end
     Type{S} where {N, S<:(Base.IteratorsMD.CartesianIndices{N, B} where B<:Tuple{Vararg{Any, N}})})
 
 # issue #39948
-@testintersect(Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1} where T, Vector},
+@testintersect(Tuple{Vector{Pair{T, JT} where JT<:Ref{T}} where T, Vector},
     Tuple{Vararg{Vector{T}}} where T,
-    Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1}, Array{Pair{T, JT} where JT<:Ref{T}, 1}} where T)
+    Tuple{Vector{Pair{T, JT} where JT<:Ref{T}}, Vector{Pair{T, JT} where JT<:Ref{T}}} where T)
 
 # issue #8915
 struct D8915{T<:Union{Float32,Float64}}
@@ -2085,8 +2094,7 @@ let A = Tuple{Any, Type{Ref{_A}} where _A},
     I = typeintersect(A, B)
     @test I != Union{}
     @test Tuple{Type{Ref{Integer}}, Type{Ref{Integer}}} <: I
-    # TODO: this intersection result seems too wide (I == B) ?
-    @test_broken !<:(Tuple{Type{Int}, Type{Int}}, I)
+    @test !<:(Tuple{Type{Int}, Type{Int}}, I)
 end
 
 @testintersect(Tuple{Type{T}, T} where T<:(Tuple{Vararg{_A, _B}} where _B where _A),
@@ -2094,8 +2102,8 @@ end
                Bottom)
 
 # issue #42409
-@testintersect(Tuple{Type{Pair{_A, S} where S<:AbstractArray{<:_A, 2}}, Dict} where _A,
-               Tuple{Type{Pair{_A, S} where S<:AbstractArray{<:_A, 2}} where _A, Union{Array, Pair}},
+@testintersect(Tuple{Type{Pair{_A, S} where S<:AbstractMatrix{<:_A}}, Dict} where _A,
+               Tuple{Type{Pair{_A, S} where S<:AbstractMatrix{<:_A}} where _A, Union{Array, Pair}},
                Bottom)
 
 # https://github.com/JuliaLang/julia/issues/44735
@@ -2207,13 +2215,19 @@ let A = Tuple{NTuple{N, Int}, NTuple{N, Int}} where N,
     Bs = (Tuple{Tuple{Int, Vararg{Any}}, Tuple{Int, Int, Vararg{Any}}},
           Tuple{Tuple{Int, Vararg{Any,N1}}, Tuple{Int, Int, Vararg{Any,N2}}} where {N1,N2},
           Tuple{Tuple{Int, Vararg{Any,N}} where {N}, Tuple{Int, Int, Vararg{Any,N}} where {N}})
-    Cerr = Tuple{Tuple{Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    C = Tuple{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
+        @testintersect(A, B, C)
+    end
+    A = Tuple{NTuple{N, Int}, Tuple{Int, Vararg{Int, N}}} where N
+    C = Tuple{Tuple{Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
+        @testintersect(A, B, C)
+    end
+    A = Tuple{Tuple{Int, Vararg{Int, N}}, NTuple{N, Int}} where N
+    C = Tuple{Tuple{Int, Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
     for B in Bs
-        C = typeintersect(A, B)
-        @test C == typeintersect(B, A) != Union{}
-        @test C != Cerr
-        # TODO: The ideal result is Tuple{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
-        @test_broken C != Tuple{Tuple{Int, Vararg{Int}}, Tuple{Int, Int, Vararg{Int}}}
+        @testintersect(A, B, C)
     end
 end
 
@@ -2226,9 +2240,8 @@ let A = Pair{NTuple{N, Int}, NTuple{N, Int}} where N,
     Bs = (Pair{<:Tuple{Int, Vararg{Int}}, <:Tuple{Int, Int, Vararg{Int}}},
           Pair{Tuple{Int, Vararg{Int,N1}}, Tuple{Int, Int, Vararg{Int,N2}}} where {N1,N2},
           Pair{<:Tuple{Int, Vararg{Int,N}} where {N}, <:Tuple{Int, Int, Vararg{Int,N}} where {N}})
-    Cs = (Bs[2], Bs[2], Bs[3])
-    for (B, C) in zip(Bs, Cs)
-        # TODO: The ideal result is Pair{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    C = Pair{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
         @testintersect(A, B, C)
     end
 end
@@ -2258,31 +2271,46 @@ let S = Tuple{Integer, U} where {II<:Array, U<:Tuple{Vararg{II, 1}}}
     @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Any,Any,Vararg{Any,N}}}, Union{})
 end
 
+function equal_envs(env1, env2)
+    length(env1) == length(env2) || return false
+    for i = 1:length(env1)
+        a = env1[i]
+        b = env2[i]
+        if a isa TypeVar
+            if !(b isa TypeVar && a.name == b.name && a.lb == b.lb && a.ub == b.ub)
+                return false
+            end
+        elseif !(a == b)
+            return false
+        end
+    end
+    return true
+end
+
 # issue #43064
 let
-    env_tuple(@nospecialize(x), @nospecialize(y)) = (intersection_env(x, y)[2]...,)
-    all_var(x::UnionAll) = (x.var, all_var(x.body)...)
-    all_var(x::DataType) = ()
+    env_tuple(@nospecialize(x), @nospecialize(y)) = intersection_env(x, y)[2]
     TT0 = Tuple{Type{T},Union{Real,Missing,Nothing}} where {T}
     TT1 = Union{Type{Int8},Type{Int16}}
     @test env_tuple(Tuple{TT1,Missing}, TT0) ===
           env_tuple(Tuple{TT1,Nothing}, TT0) ===
-          env_tuple(Tuple{TT1,Int}, TT0) === all_var(TT0)
+          env_tuple(Tuple{TT1,Int}, TT0) ===
+          Core.svec(TT0.var)
 
     TT0 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T1,T2}
     TT1 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T2,T1}
     TT2 = Tuple{Union{Int,Int8},Union{Int,Int8},Int}
     TT3 = Tuple{Int,Union{Int,Int8},Int}
-    @test env_tuple(TT2, TT0) === all_var(TT0)
-    @test env_tuple(TT2, TT1) === all_var(TT1)
-    @test env_tuple(TT3, TT0) === Base.setindex(all_var(TT0), Int, 1)
-    @test env_tuple(TT3, TT1) === Base.setindex(all_var(TT1), Int, 2)
+    @test equal_envs(env_tuple(TT2, TT0), Core.svec(TypeVar(:T1, Union{Int, Int8}), TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT2, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), TypeVar(:T1, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT3, TT0), Core.svec(Int, TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT3, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), Int))
 
     TT0 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T1,T2}
     TT1 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T2,T1}
     TT2 = Tuple{Int,Union{Int,Int8},Int,Int}
-    @test env_tuple(TT2, TT0) === Base.setindex(all_var(TT0), Int, 1)
-    @test env_tuple(TT2, TT1) === Base.setindex(all_var(TT1), Int, 2)
+    @test equal_envs(env_tuple(TT2, TT0), Core.svec(Int, TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT2, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), Int))
 end
 
 #issue #46735
@@ -2327,9 +2355,10 @@ T46784{B<:Val, M<:AbstractMatrix} = Tuple{<:Union{B, <:Val{<:B}}, M, Union{Abstr
 #issue 36185
 let S = Tuple{Type{T},Array{Union{T,Missing},N}} where {T,N},
     T = Tuple{Type{T},Array{Union{T,Nothing},N}} where {T,N}
-    @testintersect(S, T, !Union{})
-    @test_broken typeintersect(S, T) != S
-    @test_broken typeintersect(T, S) != T
+    I = typeintersect(S, T)
+    @test I == typeintersect(T, S) != Union{}
+    @test_broken I <: S
+    @test_broken I <: T
 end
 
 #issue 46736
@@ -2370,12 +2399,41 @@ let S = Tuple{T2, V2} where {T2, N2, V2<:(Array{S2, N2} where {S2 <: T2})},
     @testintersect(S, T, !Union{})
 end
 
-# A simple case which has a small local union.
-# make sure the env is not widened too much when we intersect(Int8, Int8).
-struct T48006{A1,A2,A3} end
-@testintersect(Tuple{T48006{Float64, Int, S1}, Int} where {F1<:Real, S1<:Union{Int8, Val{F1}}},
-               Tuple{T48006{F2, I, S2}, I} where {F2<:Real, I<:Int, S2<:Union{Int8, Val{F2}}},
-               Tuple{T48006{Float64, Int, S1}, Int} where S1<:Union{Val{Float64}, Int8})
+let S = Dict{Int, S1} where {F1, S1<:Union{Int8, Val{F1}}},
+    T = Dict{F2, S2} where {F2, S2<:Union{Int8, Val{F2}}}
+    @test_broken typeintersect(S, T) == Dict{Int, S} where S<:Union{Val{Int}, Int8}
+    @test typeintersect(T, S) == Dict{Int, S} where S<:Union{Val{Int}, Int8}
+end
+
+# Ensure inner `intersect_all` never under-esitimate.
+let S = Tuple{F1, Dict{Int, S1}} where {F1, S1<:Union{Int8, Val{F1}}},
+    T = Tuple{Any, Dict{F2, S2}} where {F2, S2<:Union{Int8, Val{F2}}}
+    @test Tuple{Nothing, Dict{Int, Int8}} <: S
+    @test Tuple{Nothing, Dict{Int, Int8}} <: T
+    @test Tuple{Nothing, Dict{Int, Int8}} <: typeintersect(S, T)
+    @test Tuple{Nothing, Dict{Int, Int8}} <: typeintersect(T, S)
+end
+
+let S = Tuple{F1, Val{S1}} where {F1, S1<:Dict{F1}}
+    T = Tuple{Any, Val{S2}} where {F2, S2<:Union{map(T->Dict{T}, Base.BitInteger_types)...}}
+    ST = typeintersect(S, T)
+    TS = typeintersect(S, T)
+    for U in Base.BitInteger_types
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: S
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: T
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: ST
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: TS
+    end
+end
+
+#issue 55206
+struct T55206{A,B<:Complex{A},C<:Union{Dict{Nothing},Dict{A}}} end
+@testintersect(T55206, T55206{<:Any,<:Any,<:Dict{Nothing}}, T55206{A,<:Complex{A},<:Dict{Nothing}} where {A})
+@testintersect(
+    Tuple{Dict{Int8, Int16}, Val{S1}} where {F1, S1<:AbstractSet{F1}},
+    Tuple{Dict{T1, T2}, Val{S2}} where {T1, T2, S2<:Union{Set{T1},Set{T2}}},
+    Tuple{Dict{Int8, Int16}, Val{S1}} where {S1<:Union{Set{Int8},Set{Int16}}}
+)
 
 f48167(::Type{Val{L2}}, ::Type{Union{Val{L1}, Set{R}}}) where {L1, R, L2<:L1} = 1
 f48167(::Type{Val{L1}}, ::Type{Union{Val{L2}, Set{R}}}) where {L1, R, L2<:L1} = 2
@@ -2414,6 +2472,11 @@ end
 abstract type P47654{A} end
 @test Wrapper47654{P47654, Vector{Union{P47654,Nothing}}} <: Wrapper47654
 
+#issue 41561
+@testintersect(Tuple{Vector{VT}, Vector{VT}} where {N1, VT<:AbstractVector{N1}},
+               Tuple{Vector{VN} where {N, VN<:AbstractVector{N}}, Vector{Vector{Float64}}},
+               Tuple{Vector{Vector{Float64}}, Vector{Vector{Float64}}})
+
 @testset "known subtype/intersect issue" begin
     #issue 45874
     let S = Pair{Val{P}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where P,
@@ -2421,9 +2484,6 @@ abstract type P47654{A} end
         @test S <: T
     end
 
-    #issue 41561
-    @test_broken typeintersect(Tuple{Vector{VT}, Vector{VT}} where {N1, VT<:AbstractVector{N1}},
-                Tuple{Vector{VN} where {N, VN<:AbstractVector{N}}, Vector{Vector{Float64}}}) !== Union{}
     #issue 40865
     @test Tuple{Set{Ref{Int}}, Set{Ref{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Ref{K}}}
     @test Tuple{Set{Val{Int}}, Set{Val{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Val{K}}}
@@ -2434,10 +2494,10 @@ abstract type P47654{A} end
     @test_broken !(A <: B)
 
     #issue 35698
-    @test_broken typeintersect(Type{Tuple{Array{T,1} where T}}, UnionAll) != Union{}
+    @test_broken typeintersect(Type{Tuple{Vector{T} where T}}, UnionAll) != Union{}
 
     #issue 33137
-    @test_broken (Tuple{Q,Int} where Q<:Int) <: Tuple{T,T} where T
+    @test (Tuple{Q,Int} where Q<:Int) <: Tuple{T,T} where T
 
     # issue 24333
     @test (Type{Union{Ref,Cvoid}} <: Type{Union{T,Cvoid}} where T)
@@ -2544,7 +2604,7 @@ end
 let T = Tuple{Union{Type{T}, Type{S}}, Union{Val{T}, Val{S}}, Union{Val{T}, S}} where T<:Val{A} where A where S<:Val,
     S = Tuple{Type{T}, T, Val{T}} where T<:(Val{S} where S<:Val)
     # optimal = Union{}?
-    @test typeintersect(T, S) == Tuple{Type{A}, Union{Val{A}, Val{S} where S<:Union{Val, A}, Val{x} where x<:Val, Val{x} where x<:Union{Val, A}}, Val{A}} where A<:(Val{S} where S<:Val)
+    @test typeintersect(T, S) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {S<:Val, T<:Val}
     @test typeintersect(S, T) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {T<:Val, S<:(Union{Val{A}, Val} where A)}
 end
 
@@ -2552,8 +2612,184 @@ end
 @test !<:(Type{Vector{Union{Base.BitInteger, Base.IEEEFloat, StridedArray, Missing, Nothing, Val{T}}}} where {T}, Type{Array{T}} where {T})
 
 #issue 50195
-T50195{S} = Pair{S,Set{S}}
 let a = Tuple{Type{X} where X<:Union{Nothing, Val{X1} where {X4, X1<:(Pair{X2, Val{X2}} where X2<:Val{X4})}}},
     b = Tuple{Type{Y} where Y<:(Val{Y1} where {Y4<:Src, Y1<:(Pair{Y2, Val{Y2}} where Y2<:Union{Val{Y4}, Y4})})} where Src
     @test typeintersect(a, b) <: Any
 end
+
+#issue 50195
+let a = Tuple{Union{Nothing, Type{Pair{T1}} where T1}}
+    b = Tuple{Type{X2} where X2<:(Pair{T2, Y2} where {Src, Z2<:Src, Y2<:Union{Val{Z2}, Z2}})} where T2
+    @test !Base.has_free_typevars(typeintersect(a, b))
+end
+
+#issue 53366
+let Y = Tuple{Val{T}, Val{Val{T}}} where T
+    A = Val{Val{T}} where T
+    T = TypeVar(:T, UnionAll(A.var, Val{A.var}))
+    B = UnionAll(T, Val{T})
+    X = Tuple{A, B}
+    @testintersect(X, Y, !Union{})
+end
+
+#issue 53621 (requires assertions enabled)
+abstract type A53621{T, R, C, U} <: AbstractSet{Union{C, U}} end
+struct T53621{T, R<:Real, C, U} <: A53621{T, R, C, U} end
+let
+    U = TypeVar(:U)
+    C = TypeVar(:C)
+    T = TypeVar(:T)
+    R = TypeVar(:R)
+    CC = TypeVar(:CC, Union{C, U})
+    UU = TypeVar(:UU, Union{C, U})
+    S1 = UnionAll(T, UnionAll(R, Type{UnionAll(C, UnionAll(U, T53621{T, R, C, U}))}))
+    S2 = UnionAll(C, UnionAll(U, UnionAll(CC, UnionAll(UU, UnionAll(T, UnionAll(R, T53621{T, R, CC, UU}))))))
+    S = Tuple{S1, S2}
+    T = Tuple{Type{T53621{T, R}}, AbstractSet{T}} where {T, R}
+    @testintersect(S, T, !Union{})
+end
+
+#issue 53371
+struct T53371{A,B,C,D,E} end
+S53371{A} = Union{Int, <:A}
+R53371{A} = Val{V} where V<:(T53371{B,C,D,E,F} where {B<:Val{A}, C<:S53371{B}, D<:S53371{B}, E<:S53371{B}, F<:S53371{B}})
+let S = Type{T53371{A, B, C, D, E}} where {A, B<:R53371{A}, C<:R53371{A}, D<:R53371{A}, E<:R53371{A}},
+    T = Type{T53371{A, B, C, D, E} where {A, B<:R53371{A}, C<:R53371{A}, D<:R53371{A}, E<:R53371{A}}}
+    @test !(S <: T)
+end
+
+#issue 54356
+let S = Tuple{Val{Val{Union{Val{A2}, A2}}}, Val{Val{Union{Val{A2}, Val{A4}, A4}}}} where {A2, A4<:Union{Val{A2}, A2}},
+    T = Tuple{Vararg{Val{V}}} where {V}
+    @testintersect(S, T, !Union{})
+end
+
+#issue 54356
+abstract type A54356{T<:Real} end
+struct B54356{T} <: A54356{T} end
+struct C54356{S,T<:Union{S,Complex{S}}} end
+struct D54356{S<:Real,T} end
+let S = Tuple{Val, Val{T}} where {T}, R = Tuple{Val{Val{T}}, Val{T}} where {T},
+    SS = Tuple{Val, Val{T}, Val{T}} where {T}, RR = Tuple{Val{Val{T}}, Val{T}, Val{T}} where {T}
+    # parameters check for self
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Complex{B}}}, S{1}, R{1})
+    # parameters check for supertype (B54356 -> A54356)
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, B54356{B}}}, S{1}, R{1})
+    # enure unused TypeVar skips the `UnionAll` wrapping
+    @testintersect(Tuple{Val{A}, A} where {B, A<:(Union{Val{B}, D54356{B,C}} where {C})}, S{1}, R{1})
+    # invariant parameter should not get narrowed
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Val{Union{Int,Complex{B}}}}}, S{1}, R{1})
+    # bit value could not be `Union` element
+    @testintersect(Tuple{Val{A}, A, Val{B}} where {B, A<:Union{B, Val{B}}}, SS{1}, RR{1})
+    @testintersect(Tuple{Val{A}, A, Val{B}} where {B, A<:Union{B, Complex{B}}}, SS{1}, Union{})
+    # `check_datatype_parameters` should ignore bad `Union` elements in constraint's ub
+    T = Tuple{Val{Union{Val{Nothing}, Val{C54356{V,V}}}}, Val{Nothing}} where {Nothing<:V<:Nothing}
+    @test T <: S{Nothing}
+    @test T <: Tuple{Val{A}, A} where {B, C, A<:Union{Val{B}, Val{C54356{B,C}}}}
+    @test T <: typeintersect(Tuple{Val{A}, A} where {B, C, A<:Union{Val{B}, Val{C54356{B,C}}}}, S{Nothing})
+    # extra check for Vararg
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NTuple{B,Any}}}, S{-1}, R{-1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Tuple{Any,Vararg{Any,B}}}}, S{-1}, R{-1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Tuple{Vararg{Int,Union{Int,Complex{B}}}}}}, S{1}, R{1})
+    # extra check for NamedTuple
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{B,Tuple{Int}}}}, S{1}, R{1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{B,Tuple{Int}}}}, S{(1,)}, R{(1,)})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{(:a),B}}}, S{NTuple{2,Int}}, R{NTuple{2,Int}})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{B,Tuple{Int,Int}}}}, S{(:a,:a)}, R{(:a,:a)})
+    # extra check for GenericMemory/GenericMemoryRef
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, GenericMemory{B}}}, S{1}, R{1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, GenericMemory{:not_atomic,Int,B}}}, S{1}, R{1})
+end
+
+#issue 54516
+let S = Tuple{Val{<:T}, Union{Int,T}} where {T},
+    T = Tuple{Union{Int,T}, Val{<:T}} where {T}
+    @testintersect(S, T, !Union{})
+    @test !Base.has_free_typevars(typeintersect(S, T))
+end
+
+#issue 55230
+let T1 = NTuple{12, Union{Val{1}, Val{2}, Val{3}, Val{4}, Val{5}, Val{6}}}
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Val}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Real}
+    @test !(T1 <: T2)
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Union{Val,Real}}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Union{String,Real}}
+    @test !(T1 <: T2)
+    T2 = Tuple{<:Union{Val,Real},<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test T1 <: T2
+    T2 = Tuple{<:Union{String,Real},<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test !(T1 <: T2)
+    @test Tuple{Union{Val{1},Val{2}}} <: Tuple{S} where {T, S<:Val{T}}
+end
+
+#issue 56040
+let S = Dict{V,V} where {V},
+    T = Dict{Ref{Union{Set{A2}, Set{A3}, A3}}, Ref{Union{Set{A3}, Set{A2}, Set{A1}, Set{A4}, A4}}} where {A1, A2<:Set{A1}, A3<:Union{Set{A1}, Set{A2}}, A4<:Union{Set{A2}, Set{A1}, Set{A3}}},
+    A = Dict{Ref{Set{Union{}}}, Ref{Set{Union{}}}}
+    @testintersect(S, T, !Union{})
+    @test A <: typeintersect(S, T)
+    @test A <: typeintersect(T, S)
+end
+
+#issue 56606
+let
+    A = Tuple{Val{1}}
+    B = Tuple{Val}
+    for _ in 1:30
+        A = Tuple{Val{A}}
+        B = Tuple{Val{<:B}}
+    end
+    @test A <: B
+end
+@testintersect(
+    Val{Tuple{Int,S,T}} where {S<:Any,T<:Vector{Vector{Int}}},
+    Val{Tuple{T,R,S}} where {T,R<:Vector{T},S<:Vector{R}},
+    Val{Tuple{Int, Vector{Int}, T}} where T<:Vector{Vector{Int}},
+)
+
+#issue 57429
+@testintersect(
+    Pair{<:Any, <:Tuple{Int}},
+    Pair{N, S} where {N, NTuple{N,Int}<:S<:NTuple{M,Int} where {M}},
+    !Union{}
+)
+@testintersect(
+    Pair{N, T} where {N,NTuple{N,Int}<:T<:NTuple{N,Int}},
+    Pair{N, T} where {N,NTuple{N,Int}<:T<:Tuple{Int,Vararg{Int}}},
+    !Union{}
+)
+
+#issue 57852
+@testintersect(
+    Tuple{Type{T}, Type{<:F}, Type{<:F}} where {T, F<:Union{String, T}},
+    Tuple{Type{Complex{T}} where T, Type{Complex{T}} where T, Type{String}},
+    Tuple{Type{Complex{T}}, Type{Complex{T}}, Type{String}} where T
+)
+@testintersect(
+    Tuple{Type{T}, Type{<:Union{F, Nothing}}, Type{<:Union{F, Nothing}}} where {T, F<:Union{String, T}},
+    Tuple{Type{Complex{T}} where T, Type{Complex{T}} where T, Type{String}},
+    Tuple{Type{Complex{T}}, Type{Complex{T}}, Type{String}} where T
+)
+
+#issue 58129
+for k in 1:500
+    @eval struct $(Symbol(:T58129, k)){T} end
+end
+let Tvar = TypeVar(:Tvar)
+    V = UnionAll(Tvar, Union{(@eval($(Symbol(:T58129, k)){$Tvar}) for k in 1:500)...})
+    @test Set{<:V} <: AbstractSet{<:V}
+end
+let Tvar1 = TypeVar(:Tvar1), Tvar2 = TypeVar(:Tvar2)
+    V1 = UnionAll(Tvar1, Union{(@eval($(Symbol(:T58129, k)){$Tvar1}) for k in 1:100)...})
+    V2 = UnionAll(Tvar2, Union{(@eval($(Symbol(:T58129, k)){$Tvar2}) for k in 1:100)...})
+    @test Set{<:V2} <: AbstractSet{<:V1}
+end
+
+#issue 58115
+@test Tuple{Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{             Union{Tuple{}, Tuple{Tuple{}}}}}}}}}}}}}  , Tuple{}} <:
+      Tuple{Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{Union{Tuple{}, Tuple{Tuple{}}}}}}}}}}}}}}}, Tuple{}}
diff --git a/test/syntax.jl b/test/syntax.jl
index 4d1b167693adb..5691ca7135993 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -345,9 +345,19 @@ end
 # issue #15828
 @test Meta.lower(Main, Meta.parse("x...")) == Expr(:error, "\"...\" expression outside call")
 
+# issue #57153 - malformed "..." expr
+@test Meta.lower(@__MODULE__, :(identity($(Expr(:(...), 1, 2, 3))))) ==
+    (Expr(:error, "wrong number of expressions following \"...\""))
+
 # issue #15830
 @test Meta.lower(Main, Meta.parse("foo(y = (global x)) = y")) == Expr(:error, "misplaced \"global\" declaration")
 
+# Using the value of a `global` declaration is allowed, provided that value came
+# from something that isn't another `global` declaration:
+@test_nowarn Meta.lower(Main, Meta.parse("foo = global bar = baz()"))
+
+@test_nowarn Meta.lower(Main, Meta.parse("begin global foo; global bar end"))
+
 # issue #15844
 function f15844(x)
     x
@@ -377,8 +387,8 @@ add_method_to_glob_fn!()
 @test_parseerror "function finally() end"
 
 # PR #16170
-@test Meta.lower(Main, Meta.parse("true(x) = x")) == Expr(:error, "invalid function name \"true\"")
-@test Meta.lower(Main, Meta.parse("false(x) = x")) == Expr(:error, "invalid function name \"false\"")
+@test Meta.lower(Main, Meta.parse("true(x) = x")) == Expr(:error, "\"true\" is not a valid function argument name")
+@test Meta.lower(Main, Meta.parse("false(x) = x")) == Expr(:error, "\"false\" is not a valid function argument name")
 
 # issue #16355
 @test Meta.lower(Main, :(f(d:Int...) = nothing)) == Expr(:error, "\"d:Int\" is not a valid function argument name")
@@ -476,7 +486,7 @@ let err = try
     catch e
         e
     end
-    @test err.line == 7
+    @test err.line in (5, 7)
 end
 
 # PR #17393
@@ -501,6 +511,10 @@ let m_error, error_out, filename = Base.source_path()
     m_error = try @eval foo(types::NTuple{N}, values::Vararg{Any,N}, c) where {N} = nothing; catch e; e; end
     error_out = sprint(showerror, m_error)
     @test startswith(error_out, "ArgumentError: Vararg on non-final argument")
+
+    m_error = try @eval method_c6(a::Vararg{:A}) = 1; catch e; e; end
+    error_out = sprint(showerror, m_error)
+    @test startswith(error_out, "ArgumentError: invalid type for argument a in method definition for method_c6 at $filename:")
 end
 
 # issue #7272
@@ -549,7 +563,14 @@ for (str, tag) in Dict("" => :none, "\"" => :string, "#=" => :comment, "'" => :c
 end
 
 # meta nodes for optional positional arguments
-let src = Meta.lower(Main, :(@inline f(p::Int=2) = 3)).args[1].code[end-1].args[3]
+let code = Meta.lower(Main, :(@inline f(p::Int=2) = 3)).args[1].code
+    local src
+    for i = length(code):-1:1
+        if Meta.isexpr(code[i], :method)
+            src = code[i].args[3]
+            break
+        end
+    end
     @test Core.Compiler.is_declared_inline(src)
 end
 
@@ -574,16 +595,16 @@ let thismodule = @__MODULE__,
     @test isa(ex, Expr)
     @test !isdefined(M16096, :foo16096)
     local_foo16096 = Core.eval(@__MODULE__, ex)
+    Core.@latestworld
     @test local_foo16096(2.0) == 1
     @test !@isdefined foo16096
     @test !@isdefined it
     @test !isdefined(M16096, :foo16096)
     @test !isdefined(M16096, :it)
     @test typeof(local_foo16096).name.module === thismodule
-    @test typeof(local_foo16096).name.mt.module === thismodule
-    @test getfield(thismodule, typeof(local_foo16096).name.mt.name) === local_foo16096
+    @test getfield(thismodule, typeof(local_foo16096).name.singletonname) === local_foo16096
     @test getfield(thismodule, typeof(local_foo16096).name.name) === typeof(local_foo16096)
-    @test !isdefined(M16096, typeof(local_foo16096).name.mt.name)
+    @test !isdefined(M16096, typeof(local_foo16096).name.singletonname)
     @test !isdefined(M16096, typeof(local_foo16096).name.name)
 end
 
@@ -709,36 +730,10 @@ m1_exprs = get_expr_list(Meta.lower(@__MODULE__, quote @m1 end))
 let low3 = Meta.lower(@__MODULE__, quote @m3 end)
     m3_exprs = get_expr_list(low3)
     ci = low3.args[1]::Core.CodeInfo
-    @test ci.codelocs == [4, 2]
+    #@test ci.codelocs in ([4, 4, 0], [4, 0])
     @test is_return_ssavalue(m3_exprs[end])
 end
 
-function f1(a)
-    b = a + 100
-    b
-end
-
-@generated function f2(a)
-    quote
-        b = a + 100
-        b
-    end
-end
-
-f1_ci = code_typed(f1, (Int,), debuginfo=:source)[1][1]
-f2_ci = code_typed(f2, (Int,), debuginfo=:source)[1][1]
-
-f1_exprs = get_expr_list(f1_ci)
-f2_exprs = get_expr_list(f2_ci)
-
-if Base.JLOptions().can_inline != 0
-    @test length(f1_ci.linetable) == 3
-    @test length(f2_ci.linetable) >= 3
-else
-    @test length(f1_ci.linetable) == 2
-    @test length(f2_ci.linetable) >= 3
-end
-
 # Check that string and command literals are parsed to the appropriate macros
 @test :(x"s") == :(@x_str "s")
 @test :(x"s"flag) == :(@x_str "s" "flag")
@@ -939,8 +934,8 @@ g21054(>:) = >:2
 @test g21054(-) == -2
 
 # issue #21168
-@test Meta.lower(Main, :(a.[1])) == Expr(:error, "invalid syntax \"a.[1]\"")
-@test Meta.lower(Main, :(a.{1})) == Expr(:error, "invalid syntax \"a.{1}\"")
+@test_broken Meta.lower(Main, :(a.[1])) == Expr(:error, "invalid syntax \"a.[1]\"")
+@test_broken Meta.lower(Main, :(a.{1})) == Expr(:error, "invalid syntax \"a.{1}\"")
 
 # Issue #21225
 let abstr = Meta.parse("abstract type X end")
@@ -1306,6 +1301,10 @@ let args = (Int, Any)
     @test >:(reverse(args)...)
 end
 
+# Chaining of <: and >: in `where`
+@test isa(Vector{T} where Int<:T<:Number, UnionAll)
+@test isa(Vector{T} where Number>:T>:Int, UnionAll)
+
 # issue #25947
 let getindex = 0, setindex! = 1, colon = 2, vcat = 3, hcat = 4, hvcat = 5
     a = [10,9,8]
@@ -1510,8 +1509,8 @@ end
 
 # issue #26739
 let exc = try Core.eval(@__MODULE__, :(sin.[1])) catch exc ; exc end
-    @test exc isa ErrorException
-    @test startswith(exc.msg, "syntax: invalid syntax \"sin.[1]\"")
+    @test_broken exc isa ErrorException
+    @test_broken startswith(exc.msg, "syntax: invalid syntax \"sin.[1]\"")
 end
 
 # issue #26873
@@ -1533,8 +1532,11 @@ end
 @test Meta.lower(@__MODULE__, :(return 0 for i=1:2)) == Expr(:error, "\"return\" not allowed inside comprehension or generator")
 @test Meta.lower(@__MODULE__, :([ return 0 for i=1:2 ])) == Expr(:error, "\"return\" not allowed inside comprehension or generator")
 @test Meta.lower(@__MODULE__, :(Int[ return 0 for i=1:2 ])) == Expr(:error, "\"return\" not allowed inside comprehension or generator")
+@test Meta.lower(@__MODULE__, :([ $(Expr(:thisfunction)) for i=1:2 ])) == Expr(:error, "\"@__FUNCTION__\" not allowed inside comprehension or generator")
+@test Meta.lower(@__MODULE__, :($(Expr(:thisfunction)) for i=1:2)) == Expr(:error, "\"@__FUNCTION__\" not allowed inside comprehension or generator")
 @test [ ()->return 42 for i = 1:1 ][1]() == 42
 @test Function[ identity() do x; return 2x; end for i = 1:1 ][1](21) == 42
+@test @eval let f=[ ()->$(Expr(:thisfunction)) for i = 1:1 ][1]; f() === f; end
 
 # issue #27155
 macro test27155()
@@ -1682,6 +1684,9 @@ end
 # #16356
 @test_parseerror "0xapi"
 
+# #60189
+@test_parseerror "0x1p3.2"
+
 # #22523 #22712
 @test_parseerror "a?b:c"
 @test_parseerror "a ?b:c"
@@ -1702,7 +1707,7 @@ end
 @test Meta.parse("(a...)") == Expr(Symbol("..."), :a)
 
 # #19324
-@test_throws UndefVarError(:x) eval(:(module M19324
+@test_throws UndefVarError(:x, :local) eval(:(module M19324
                  x=1
                  for i=1:10
                      x += i
@@ -1779,6 +1784,43 @@ end
 @test B28593.var.name === :S
 @test C28593.var.name === :S
 
+# issue #51899
+macro struct_macro_51899()
+    quote
+        mutable struct Struct51899
+            const const_field
+            const const_field_with_type::Int
+            $(esc(Expr(:const, :(escaped_const_field::MyType))))
+            @atomic atomic_field
+            @atomic atomic_field_with_type::Int
+        end
+    end
+end
+
+let ex = @macroexpand @struct_macro_51899()
+    const_field, const_field_with_type, escaped_const_field,
+    atomic_field, atomic_field_with_type = filter(x -> isa(x, Expr), ex.args[end].args[end].args)
+    @test Meta.isexpr(const_field, :const)
+    @test const_field.args[1] === :const_field
+
+    @test Meta.isexpr(const_field_with_type, :const)
+    @test Meta.isexpr(const_field_with_type.args[1], :(::))
+    @test const_field_with_type.args[1].args[1] === :const_field_with_type
+    @test const_field_with_type.args[1].args[2] == GlobalRef(@__MODULE__, :Int)
+
+    @test Meta.isexpr(escaped_const_field, :const)
+    @test Meta.isexpr(const_field_with_type.args[1], :(::))
+    @test escaped_const_field.args[1].args[1] === :escaped_const_field
+    @test escaped_const_field.args[1].args[2] === :MyType
+
+    @test Meta.isexpr(atomic_field, :atomic)
+    @test atomic_field.args[1] === :atomic_field
+
+    @test Meta.isexpr(atomic_field_with_type, :atomic)
+    @test atomic_field_with_type.args[1].args[1] === :atomic_field_with_type
+    @test atomic_field_with_type.args[1].args[2] == GlobalRef(@__MODULE__, :Int)
+end
+
 # issue #25955
 macro noeffect25955(e)
     return e
@@ -1882,7 +1924,7 @@ function capture_with_conditional_label()
     return y->x
 end
 let f = capture_with_conditional_label()  # should not throw
-    @test_throws UndefVarError(:x) f(0)
+    @test_throws UndefVarError(:x, :local) f(0)
 end
 
 # `_` should not create a global (or local)
@@ -1908,7 +1950,7 @@ end
 # eval'ing :const exprs
 eval(Expr(:const, :_var_30877))
 @test !isdefined(@__MODULE__, :_var_30877)
-@test isconst(@__MODULE__, :_var_30877)
+@test !isconst(@__MODULE__, :_var_30877)
 
 # anonymous kw function in value position at top level
 f30926 = function (;k=0)
@@ -2174,6 +2216,16 @@ end
 end
 @test z28789 == 42
 
+const warn28789 = "Assignment to `s28789` in soft scope is ambiguous because a global variable by the same name exists: "*
+    "`s28789` will be treated as a new local. Disambiguate by using `local s28789` to suppress this warning or "*
+    "`global s28789` to assign to the existing global variable."
+@test_logs (:warn, warn28789) @test_throws UndefVarError @eval begin
+    s28789 = 0
+    for i = 1:10
+        s28789 += i
+    end
+end
+
 # issue #38650, `struct` should always be a hard scope
 f38650() = 0
 @eval begin
@@ -2246,6 +2298,11 @@ end
     @test Meta.parse("a ⥷ b") == Expr(:call, :⥷, :a, :b)
 end
 
+# issue 57143
+@testset "binary 🢲" begin
+    @test Meta.parse("a 🢲 b") == Expr(:call, :🢲, :a, :b)
+end
+
 # only allow certain characters after interpolated vars (#25231)
 @test_parseerror("\"\$x෴  \"",
                  "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.")
@@ -2376,15 +2433,6 @@ macro a35391(b)
 end
 @test @a35391(0) === (0,)
 
-# global declarations from the top level are not inherited by functions.
-# don't allow such a declaration to override an outer local, since it's not
-# clear what it should do.
-@test Meta.lower(Main, :(let
-                           x = 1
-                           let
-                             global x
-                           end
-                         end)) == Expr(:error, "`global x`: x is a local variable in its enclosing scope")
 # note: this `begin` block must be at the top level
 _temp_33553 = begin
     global _x_this_remains_undefined
@@ -2396,6 +2444,70 @@ end
 @test _temp_33553 == 2
 @test !@isdefined(_x_this_remains_undefined)
 
+module GlobalContainment
+using Test
+@testset "scope of global declarations" begin
+
+    # global declarations from the top level are not inherited by functions.
+    # don't allow such a declaration to override an outer local, since it's not
+    # clear what it should do.
+    @test Meta.lower(
+        Main,
+        :(let
+              x = 1
+              let
+                  global x
+              end
+          end)) == Expr(:error, "`global x`: x is a local variable in its enclosing scope")
+
+    # a declared global can shadow a local in an outer scope
+    @test let
+        function f()
+            g0 = 2
+            let; global g0 = 1; end
+            a = () -> (global g0 = 1); a();
+            return g0
+        end
+        (f(), g0);
+    end === (2, 1)
+    @test let
+        function f()
+            let; global g2 = 1; end;
+            let; try; g2 = 2; catch _; end; end;
+        end
+        (f(), g2)
+    end === (2, 1)
+
+    # an inner global declaration should not interfere with the closure (#57547)
+    @test let
+        g3 = 1
+        function f()
+            let; global g3 = 2; end;
+            return g3
+        end
+        f()
+    end === 1
+    @test_throws UndefVarError let
+        function returns_global()
+            for i in 1
+                global ge = 2
+            end
+            return ge # local declared below
+        end
+        ge = returns_global()
+    end
+    @test let
+        function f(x::T) where T
+            function g(x)
+                let; global T = 1; end
+                x::T
+            end; g(x)
+        end; f(1)
+    end === 1
+
+end
+end
+
 # lowering of adjoint
 @test (1 + im)' == 1 - im
 x = let var"'"(x) = 2x
@@ -2470,7 +2582,14 @@ end
 function ncalls_in_lowered(ex, fname)
     lowered_exprs = Meta.lower(Main, ex).args[1].code
     return count(lowered_exprs) do ex
-        Meta.isexpr(ex, :call) && ex.args[1] == fname
+        if Meta.isexpr(ex, :call)
+            arg = ex.args[1]
+            if isa(arg, Core.SSAValue)
+                arg = lowered_exprs[arg.id]
+            end
+            return arg == fname
+        end
+        return false
     end
 end
 
@@ -2486,6 +2605,15 @@ end
     @test ncalls_in_lowered(:((.+)(a, b .- (.^)(c, 2))), GlobalRef(Base, :BroadcastFunction)) == 0
 end
 
+module M59008 # dotop with global LHS in macro
+using Test
+global a = 1
+macro counter()
+    :(a += 1)
+end
+@test @counter() === 2 === a
+end
+
 # issue #37656
 @test :(if true 'a' else 1 end) == Expr(:if, true, quote 'a' end, quote 1 end)
 
@@ -2510,19 +2638,19 @@ end
                                                               3 4 5]
 
 @test Meta.parse("for x in 1:10 g(x) end") ==
-  Meta.parse("for#==#x#==#in#==#1:10#==#g(x)#==#end")
+    Meta.parse("for#==#x#==#in#==#1:10#==#g(x)#==#end")
 @test Meta.parse("(f->f(1))() do x x+1 end") ==
-  Meta.parse("(f->f(1))()#==#do#==#x#==#x+1#==#end")
+    Meta.parse("(f->f(1))()#==#do#==#x#==#x+1#==#end")
 @test Meta.parse("while i < 10 i += 1 end") ==
-  Meta.parse("while#==#i#==#<#==#10#==#i#==#+=#==#1#==#end")
+    Meta.parse("while#==#i#==#<#==#10#==#i#==#+=#==#1#==#end")
 @test Meta.parse("begin x=1 end") == Meta.parse("begin#==#x=1#==#end")
 @test Meta.parse("if x<y x+1 elseif y>0 y+1 else z end") ==
-  Meta.parse("if#==#x<y#==#x+1#==#elseif#==#y>0#==#y+1#==#else#==#z#==#end")
+    Meta.parse("if#==#x<y#==#x+1#==#elseif#==#y>0#==#y+1#==#else#==#z#==#end")
 @test Meta.parse("function(x) x end") == Meta.parse("function(x)#==#x#==#end")
 @test Meta.parse("a ? b : c") == Meta.parse("a#==#?#==#b#==#:#==#c")
 @test_parseerror("f#==#(x)=x", "space before \"(\" not allowed in \"f (\" at none:1")
 @test Meta.parse("try f() catch e g() finally h() end") ==
-  Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end")
+    Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end")
 @test Meta.parse("@m a b") == Meta.parse("@m#==#a#==#b")
 
 # issue #37540
@@ -2610,12 +2738,12 @@ using ..Mod
 end
 @test Mod3.f(10) == 21
 @test !isdefined(Mod3, :func)
-@test_throws ErrorException("invalid method definition in Mod3: function Mod3.f must be explicitly imported to be extended") Core.eval(Mod3, :(f(x::Int) = x))
+@test_throws ErrorException("invalid method definition in Mod3: function Mod.f must be explicitly imported to be extended") Core.eval(Mod3, :(f(x::Int) = x))
 @test !isdefined(Mod3, :always_undef) # resolve this binding now in Mod3
-@test_throws ErrorException("invalid method definition in Mod3: exported function Mod.always_undef does not exist") Core.eval(Mod3, :(always_undef(x::Int) = x))
-@test_throws ErrorException("cannot assign a value to imported variable Mod.always_undef from module Mod3") Core.eval(Mod3, :(const always_undef = 3))
-@test_throws ErrorException("cannot assign a value to imported variable Mod3.f") Core.eval(Mod3, :(const f = 3))
-@test_throws ErrorException("cannot declare Mod.maybe_undef constant; it already has a value") Core.eval(Mod, :(const maybe_undef = 3))
+@test Core.eval(Mod3, :(always_undef(x::Int) = x)) == invokelatest(getglobal, Mod3, :always_undef)
+@test Core.eval(Mod3, :(const always_undef = 3)) == invokelatest(getglobal, Mod3, :always_undef)
+@test_throws ErrorException("cannot declare Mod3.f constant; it was already declared as an import") Core.eval(Mod3, :(const f = 3))
+@test_throws ErrorException("cannot declare Mod.maybe_undef constant; it was already declared global") Core.eval(Mod, :(const maybe_undef = 3))
 
 z = 42
 import .z as also_z
@@ -2645,6 +2773,18 @@ import .TestImportAs.Mod2 as M2
 @test !@isdefined(Mod2)
 @test M2 === TestImportAs.Mod2
 
+# 57702: nearby bindings shouldn't cause us to closure-convert in import/using
+module OddImports
+using Test
+module ABC end
+x = let; let; import .ABC; end; let; ABC() = (ABC,); end; end
+y = let; let; using  .ABC; end; let; ABC() = (ABC,); end; end
+z = let; let; import SHA: R; end; let; R(x...) = R(x); end; end
+@test x isa Function
+@test y isa Function
+@test z isa Function
+end
+
 @testset "unicode modifiers after '" begin
     @test Meta.parse("a'ᵀ") == Expr(:call, Symbol("'ᵀ"), :a)
     @test Meta.parse("a'⁻¹") == Expr(:call, Symbol("'⁻¹"), :a)
@@ -2771,6 +2911,23 @@ end
 @m38386
 @test isempty(methods(f38386))
 
+@testset "non-lhs all-underscore vars should fail in lowering" begin
+    # OK
+    @test (_ = 1) === 1
+    @test ((_, _) = (1, 2)) == (1, 2)
+    @test Meta.isexpr(Meta.lower(Main, :(for _ in 1:2; 1; end)), :thunk)
+    @test (try; throw(1); catch _; 2; end) === 2
+    @test (let _ = 1; 2; end) === 2
+    @test (function f(_, _); 2; end)(0,0) === 2
+    @test (function f(_, _=1); 2; end)(0,0) === 2
+    @test (function f(_, _; kw1=2); kw1; end)(0,0) === 2
+    # ERROR: syntax: all-underscore identifiers are write-only and their values cannot be used in expressions
+    @test Meta.isexpr(Meta.lower(Main, :(_ = 1; a = _)), :error)
+    @test Meta.isexpr(Meta.lower(Main, :(let; function f(); _; end; end)), :error)
+    @test Meta.isexpr(Meta.lower(Main, :(let; function f(); _; 1; end; end)), :error)
+    @test Meta.isexpr(Meta.lower(Main, :(begin; _; 1; end)), :error)
+end
+
 @testset "all-underscore varargs on the rhs" begin
     @test ncalls_in_lowered(quote _..., = a end, GlobalRef(Base, :rest)) == 0
     @test ncalls_in_lowered(quote ___..., = a end, GlobalRef(Base, :rest)) == 0
@@ -2799,7 +2956,7 @@ end
     @test a == 5
     @test b == 6
 
-    @test_throws ErrorException (; a, b) = (x=1,)
+    @test_throws FieldError (; a, b) = (x=1,)
 
     @test Meta.isexpr(Meta.@lower(begin (a, b; c) = x end), :error)
     @test Meta.isexpr(Meta.@lower(begin (a, b; c) = x, y end), :error)
@@ -2808,7 +2965,7 @@ end
     f((; a, b)) = a, b
     @test f((b=3, a=4)) == (4, 3)
     @test f((b=3, c=2, a=4)) == (4, 3)
-    @test_throws ErrorException f((;))
+    @test_throws FieldError f((;))
 
     # with type annotation
     let num, den, a, b
@@ -3070,6 +3227,7 @@ end
     ex = Expr(:block)
     ex.args = fill!(Vector{Any}(undef, 700000), 1)
     f = eval(Expr(:function, :(), ex))
+    @Core.latestworld
     @test f() == 1
     ex = Expr(:vcat)
     ex.args = fill!(Vector{Any}(undef, 600000), 1)
@@ -3183,6 +3341,22 @@ end
     end
     @test err == 5 + 6
     @test x == 1
+
+    x = 0
+    try
+    catch
+    else
+        x = 1
+    end
+    @test x == 1
+
+    try
+    catch
+    else
+        tryelse_in_local_scope = true
+    end
+
+    @test !@isdefined(tryelse_in_local_scope)
 end
 
 @test_parseerror """
@@ -3246,6 +3420,7 @@ const typeof = error
 end
 let ex = :(const $(esc(:x)) = 1; (::typeof($(esc(:foo43993))))() = $(esc(:x)))
     Core.eval(M43993, Expr(:var"hygienic-scope", ex, Core))
+    @Core.latestworld
     @test M43993.x === 1
     @test invokelatest(M43993.foo43993) === 1
 end
@@ -3426,6 +3601,8 @@ end
 # issue #45162
 f45162(f) = f(x=1)
 @test first(methods(f45162)).called != 0
+f45162_2(f) = f([]...)
+@test first(methods(f45162_2)).called != 0
 
 # issue #45024
 @test_parseerror "const x" "expected assignment after \"const\""
@@ -3507,3 +3684,992 @@ let x = 1 => 2
     @test_throws ErrorException @eval a => b = 2
     @test_throws "function Base.=> must be explicitly imported to be extended" @eval a => b = 2
 end
+
+# Splatting in non-final default value (Ref #50518)
+for expr in (quote
+    function g1(a=(1,2)..., b...=3)
+        b
+    end
+end,quote
+    function g2(a=(1,2)..., b=3, c=4)
+        (b, c)
+    end
+end,quote
+    function g3(a=(1,2)..., b=3, c...=4)
+        (b, c)
+    end
+end)
+    let exc = try eval(expr); catch exc; exc end
+        @test isa(exc, ErrorException)
+        @test startswith(exc.msg, "syntax: invalid \"...\" in non-final positional argument default value")
+    end
+end
+
+# Test that bad lowering does not segfault (ref #50518)
+@test_throws ErrorException("syntax: Attempted to use slot marked unused") @eval function funused50518(::Float64)
+    $(Symbol("#unused#"))
+end
+
+@testset "public keyword" begin
+    p(str) = Base.remove_linenums!(Meta.parse(str))
+    # tests ported from JuliaSyntax.jl
+    @test p("function f(public)\n    public + 3\nend") == Expr(:function, Expr(:call, :f, :public), Expr(:block, Expr(:call, :+, :public, 3)))
+    @test p("public A, B") == Expr(:public, :A, :B)
+    @test p("if true \n public *= 4 \n end") == Expr(:if, true, Expr(:block, Expr(:*=, :public, 4)))
+    @test p("module Mod\n public A, B \n end") == Expr(:module, true, :Mod, Expr(:block, Expr(:public, :A, :B)))
+    @test p("module Mod2\n a = 3; b = 6; public a, b\n end") == Expr(:module, true, :Mod2, Expr(:block, Expr(:(=), :a, 3), Expr(:(=), :b, 6), Expr(:public, :a, :b)))
+    @test p("a = 3; b = 6; public a, b") == Expr(:toplevel, Expr(:(=), :a, 3), Expr(:(=), :b, 6), Expr(:public, :a, :b))
+    @test_throws Meta.ParseError p("begin \n public A, B \n end")
+    @test_throws Meta.ParseError p("if true \n public A, B \n end")
+    @test_throws Meta.ParseError p("public export=true foo, bar")
+    @test_throws Meta.ParseError p("public experimental=true foo, bar")
+    @test p("public(x::String) = false") == Expr(:(=), Expr(:call, :public, Expr(:(::), :x, :String)), Expr(:block, false))
+    @test p("module M; export @a; end") == Expr(:module, true, :M, Expr(:block, Expr(:export, :var"@a")))
+    @test p("module M; public @a; end") == Expr(:module, true, :M, Expr(:block, Expr(:public, :var"@a")))
+    @test p("module M; export ⤈; end") == Expr(:module, true, :M, Expr(:block, Expr(:export, :⤈)))
+    @test p("module M; public ⤈; end") == Expr(:module, true, :M, Expr(:block, Expr(:public, :⤈)))
+    @test p("public = 4") == Expr(:(=), :public, 4)
+    @test p("public[7] = 5") == Expr(:(=), Expr(:ref, :public, 7), 5)
+    @test p("public() = 6") == Expr(:(=), Expr(:call, :public), Expr(:block, 6))
+end
+
+@testset "removing argument side effects" begin
+    # Allow let blocks in broadcasted LHSes, but only evaluate them once:
+    execs = 0
+    array = [1]
+    let x = array; execs += 1; x; end .+= 2
+    @test array == [3]
+    @test execs == 1
+    let; execs += 1; array; end .= 4
+    @test array == [4]
+    @test execs == 2
+    let x = array; execs += 1; x; end::Vector{Int} .+= 2
+    @test array == [6]
+    @test execs == 3
+    let; execs += 1; array; end::Vector{Int} .= 7
+    @test array == [7]
+    @test execs == 4
+
+    # remove argument side effects on lhs kwcall
+    pa_execs = 0
+    kw_execs = 0
+    f60152(v, pa; kw) = copy(v)
+    @test (f60152([1, 2, 3], 0; kw=0) .*= 2) == [2,4,6]
+    @test (f60152([1, 2, 3], (pa_execs+=1); kw=(kw_execs+=1)) .*= 2) == [2,4,6]
+    @test pa_execs === 1
+    @test kw_execs === 1
+end
+
+# Allow GlobalRefs in macro definition
+module MyMacroModule
+    macro mymacro end
+end
+macro MyMacroModule.mymacro()
+    1
+end
+@eval macro $(GlobalRef(MyMacroModule, :mymacro))(x)
+    2
+end
+@test (@MyMacroModule.mymacro) == 1
+@test (@MyMacroModule.mymacro(a)) == 2
+
+# Issue #53673 - missing macro hygiene for for/generator
+baremodule MacroHygieneFor
+    import ..Base
+    using Base: esc, Expr, +
+    macro for1()
+        :(let a=(for i=10; end; 1); a; end)
+    end
+    macro for2()
+        :(let b=(for j=11, k=12; end; 2); b; end)
+    end
+    macro for3()
+        :(let c=($(Expr(:for, esc(Expr(:block, :(j=11), :(k=12))), :())); 3); c; end)
+    end
+    macro for4()
+        :(begin; local j; let a=(for outer j=10; end; 4); j+a; end; end)
+    end
+end
+let nnames = length(names(MacroHygieneFor; all=true))
+    @test (@MacroHygieneFor.for1) == 1
+    @test (@MacroHygieneFor.for2) == 2
+    @test (@MacroHygieneFor.for3) == 3
+    @test (@MacroHygieneFor.for4) == 14
+    @test length(names(MacroHygieneFor; all=true)) == nnames
+end
+
+baremodule MacroHygieneGenerator
+    using ..Base: Any, !
+    my!(x) = !x
+    macro gen1()
+        :(let a=Any[x for x in 1]; a; end)
+    end
+    macro gen2()
+        :(let a=Bool[x for x in (true, false) if my!(x)]; a; end)
+    end
+    macro gen3()
+        :(let a=Bool[x for x in (true, false), y in (true, false) if my!(x) && my!(y)]; a; end)
+    end
+end
+let nnames = length(names(MacroHygieneGenerator; all=true))
+    @test (MacroHygieneGenerator.@gen1) == Any[x for x in 1]
+    @test (MacroHygieneGenerator.@gen2) == Bool[false]
+    @test (MacroHygieneGenerator.@gen3) == Bool[false]
+    @test length(names(MacroHygieneGenerator; all=true)) == nnames
+end
+
+# Issue #53729 - Lowering recursion into Expr(:toplevel)
+@test eval(Expr(:let, Expr(:block), Expr(:block, Expr(:toplevel, :(f53729(x) = x)), :(x=1)))) == 1
+@test f53729(2) == 2
+
+# Issue #54701 - Macro hygiene of argument destructuring
+macro makef54701()
+    quote
+        call(f) = f((1, 2))
+        function $(esc(:f54701))()
+            call() do (a54701, b54701)
+                return a54701+b54701
+            end
+        end
+    end
+end
+@makef54701
+@test f54701() == 3
+@test !@isdefined(a54701)
+@test !@isdefined(b54701)
+
+# Issue #54607 - binding creation in foreign modules should not be permitted
+module Foreign54607
+    # Syntactic, not dynamic
+    try_to_create_binding1() = (Foreign54607.foo = 2)
+    # GlobalRef is allowed for same-module assignment and declares the binding
+    @eval try_to_create_binding2() = ($(GlobalRef(Foreign54607, :foo2)) = 2)
+    function global_create_binding()
+        global bar
+        bar = 3
+    end
+    baz = 4
+    begin;
+        @Base.Experimental.force_compile
+        compiled_assign = 5
+    end
+    @eval $(GlobalRef(Foreign54607, :gr_assign)) = 6
+end
+@test_throws ErrorException (Foreign54607.foo = 1)
+@test_throws ErrorException Foreign54607.try_to_create_binding1()
+Foreign54607.try_to_create_binding2()
+function assign_in_foreign_module()
+    (Foreign54607.foo = 1)
+    nothing
+end
+@test !Core.Compiler.is_nothrow(Base.infer_effects(assign_in_foreign_module))
+@test_throws ErrorException begin
+    @Base.Experimental.force_compile
+    (Foreign54607.foo = 1)
+end
+@test_throws ErrorException @eval (GlobalRef(Foreign54607, :gr_assign2)) = 7
+Foreign54607.global_create_binding()
+@test isdefined(Foreign54607, :bar)
+@test isdefined(Foreign54607, :baz)
+@test isdefined(Foreign54607, :compiled_assign)
+@test isdefined(Foreign54607, :gr_assign)
+@test isdefined(Foreign54607, :foo2)
+Foreign54607.bar = 8
+@test Foreign54607.bar == 8
+begin
+    @Base.Experimental.force_compile
+    Foreign54607.bar = 9
+end
+@test Foreign54607.bar == 9
+
+# Issue #54805 - export mislowering
+module Export54805
+let
+    local b54805=1
+    export b54805
+end
+b54805 = 2
+end
+using .Export54805
+@test b54805 == 2
+
+# F{T} = ... has special syntax semantics, not found anywhere else in the language
+# that make `F` `const` iff an assignment to `F` is global in the relevant scope.
+# We implicitly test this elsewhere, but there's some tricky interactions with
+# explicit declarations that we test here.
+module ImplicitCurlies
+    using ..Test
+    let
+        ImplicitCurly1{T} = Ref{T}
+    end
+    @test !@isdefined(ImplicitCurly1)
+    let
+        global ImplicitCurly2
+        ImplicitCurly2{T} = Ref{T}
+    end
+    @test @isdefined(ImplicitCurly2) && isconst(@__MODULE__, :ImplicitCurly2)
+    begin
+        ImplicitCurly3{T} = Ref{T}
+    end
+    @test @isdefined(ImplicitCurly3) && isconst(@__MODULE__, :ImplicitCurly3)
+    begin
+        local ImplicitCurly4
+        ImplicitCurly4{T} = Ref{T}
+    end
+    @test !@isdefined(ImplicitCurly4)
+    @test_throws "syntax: `global const` declaration not allowed inside function" Core.eval(@__MODULE__, :(function implicit5()
+        global ImplicitCurly5
+        ImplicitCurly5{T} = Ref{T}
+    end))
+    @test !@isdefined(ImplicitCurly5)
+    function implicit6()
+        ImplicitCurly6{T} = Ref{T}
+        return ImplicitCurly6
+    end
+    @test !@isdefined(ImplicitCurly6)
+    # Check return value of assignment expr
+    @test isa(Core.eval(@__MODULE__, :(const ImplicitCurly7{T} = Ref{T})), UnionAll)
+    @test isa(begin; ImplicitCurly8{T} = Ref{T}; end, UnionAll)
+end
+
+# `const` does not distribute over assignments
+const aconstassign = bconstassign = 2
+@test isconst(@__MODULE__, :aconstassign)
+@test !isconst(@__MODULE__, :bconstassign)
+@test aconstassign == bconstassign
+
+const afunc_constassign() = bfunc_constassign() = 2
+@test afunc_constassign()() == 2
+@test !@isdefined(bfunc_constassign)
+
+# `const` RHS is regular toplevel scope (not `let`)
+const arhs_toplevel = begin
+    athis_should_be_a_global = 1
+    2
+end
+@test isconst(@__MODULE__, :arhs_toplevel)
+@test !isconst(@__MODULE__, :athis_should_be_a_global)
+@test arhs_toplevel == 2
+@test athis_should_be_a_global == 1
+
+# `const` is permitted before function assignment for legacy reasons
+const fconst_assign() = 1
+const (gconst_assign(), hconst_assign()) = (2, 3)
+@test (fconst_assign(), gconst_assign(), hconst_assign()) == (1, 2, 3)
+@test isconst(@__MODULE__, :fconst_assign)
+@test isconst(@__MODULE__, :gconst_assign)
+@test isconst(@__MODULE__, :hconst_assign)
+
+# `const` assignment to `_` drops the assignment effect,
+# and the conversion, but not the rhs.
+struct CantConvert; end
+Base.convert(::Type{CantConvert}, x) = error()
+# @test splices into a function, where const cannot appear
+@test Core.eval(@__MODULE__, :(const _::CantConvert = 1)) == 1
+@test !isconst(@__MODULE__, :_)
+@test_throws ErrorException("expected") (const _ = error("expected"))
+
+# Issue #54787
+const (destruct_const54787...,) = (1,2,3)
+@test destruct_const54787 == (1,2,3)
+@test isconst(@__MODULE__, :destruct_const54787)
+const a54787, b54787, c54787 = destruct_const54787
+@test (a54787, b54787, c54787) == (1,2,3)
+@test isconst(@__MODULE__, :a54787)
+@test isconst(@__MODULE__, :b54787)
+@test isconst(@__MODULE__, :c54787)
+
+# Same number of statements on lhs and rhs, but non-atom
+const c54787_1,c54787_2 = 1,(2*1)
+@test isconst(@__MODULE__, :c54787_1)
+@test isconst(@__MODULE__, :c54787_2)
+@test c54787_1 == 1
+@test c54787_2 == 2
+
+# Methods can be added to any singleton not just generic functions
+struct SingletonMaker; end
+const no_really_this_is_a_function_i_promise = Val{SingletonMaker()}()
+no_really_this_is_a_function_i_promise(a) = 2 + a
+@test Val{SingletonMaker()}()(2) == 4
+
+# Test that lowering doesn't accidentally put a `Module` in the Method name slot
+let src = @Meta.lower let capture=1
+    global foo_lower_block
+    foo_lower_block() = capture
+end
+    code = src.args[1].code
+    for i = length(code):-1:1
+        expr = code[i]
+        Meta.isexpr(expr, :method) || continue
+        @test isa(expr.args[1], Union{GlobalRef, Symbol})
+    end
+end
+
+let src = Meta.@lower let
+    try
+        try
+            return 1
+        catch
+        end
+    finally
+        nothing
+    end
+end
+    code = src.args[1].code
+    for stmt in code
+        if Meta.isexpr(stmt, :leave) && length(stmt.args) > 1
+            # Expr(:leave, ...) should list the arguments to pop from
+            # inner-most scope to outer-most
+            @test issorted(Int[
+                (arg::Core.SSAValue).id
+                for arg in stmt.args
+            ]; rev=true)
+        end
+    end
+end
+
+# Test that globals can be `using`'d even if they are not yet defined
+module UndefGlobal54954
+    global theglobal54954::Int
+end
+using .UndefGlobal54954: theglobal54954
+@test Core.get_binding_type(@__MODULE__, :theglobal54954) === Int
+
+# Extended isdefined
+module ExtendedIsDefined
+    using Test
+    module Import
+        export x2, x3
+        x2 = 2
+        x3 = 3
+        x4 = 4
+    end
+    const x1 = 1
+    using .Import
+    import .Import.x4
+    @test x2 == 2 # Resolve the binding
+    @eval begin
+        @test Core.isdefinedglobal(@__MODULE__, :x1)
+        @test Core.isdefinedglobal(@__MODULE__, :x2)
+        @test Core.isdefinedglobal(@__MODULE__, :x3)
+        @test Core.isdefinedglobal(@__MODULE__, :x4)
+
+        @test Core.isdefinedglobal(@__MODULE__, :x1, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x2, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x3, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x4, false)
+    end
+
+    @eval begin
+        @Base.Experimental.force_compile
+        @test Core.isdefinedglobal(@__MODULE__, :x1)
+        @test Core.isdefinedglobal(@__MODULE__, :x2)
+        @test Core.isdefinedglobal(@__MODULE__, :x3)
+        @test Core.isdefinedglobal(@__MODULE__, :x4)
+
+        @test Core.isdefinedglobal(@__MODULE__, :x1, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x2, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x3, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x4, false)
+    end
+end
+
+# Test importing the same module twice using two different paths
+module FooDualImport
+end
+module BarDualImport
+import ..FooDualImport
+import ..FooDualImport.FooDualImport
+end
+
+# Test trying to define a constant and then importing the same constant
+const ImportConstant = 1
+module ImportConstantTestModule
+    using Test
+    const ImportConstant = 1
+    import ..ImportConstant
+    @test ImportConstant == 1
+    @test isconst(@__MODULE__, :ImportConstant)
+end
+
+# Test trying to define a constant and then trying to assign to the same value
+module AssignConstValueTest
+    using Test
+    const x = 1
+    @test_throws ErrorException @eval x = 1
+    @test_throws ErrorException @eval begin
+        @Base.Experimental.force_compile
+        global x = 1
+    end
+end
+@test isconst(AssignConstValueTest, :x)
+
+# Module Replacement
+module ReplacementContainer
+    using Test
+    module ReplaceMe
+        const x = 1
+    end
+    const Old = ReplaceMe
+    @eval module ReplaceMe
+        const x = 2
+    end
+end
+@test ReplacementContainer.Old !== ReplacementContainer.ReplaceMe
+@test ReplacementContainer.ReplaceMe.x === 2
+
+# Setglobal of previously declared global
+module DeclareSetglobal
+    using Test
+    @test_throws ErrorException setglobal!(@__MODULE__, :DeclareMe, 1)
+    global DeclareMe
+    setglobal!(@__MODULE__, :DeclareMe, 1)
+    @test DeclareMe === 1
+end
+
+# Binding type of const (N.B.: This may change in the future)
+module ConstBindingType
+    using Test
+    const x = 1
+    @test Core.get_binding_type(@__MODULE__, :x) === Any
+end
+
+# Explicit import may resolve using failed
+module UsingFailedExplicit
+    using Test
+    module A; export x; x = 1; end
+    module B; export x; x = 2; end
+    using .A, .B
+    @test_throws UndefVarError x
+    using .A: x as x
+    @test x === 1
+end
+
+# issue #45494
+begin
+  local b::Tuple{<:Any} = (0,)
+  function f45494()
+    b = b
+    b
+  end
+end
+@test f45494() === (0,)
+
+@test_throws "\"esc(...)\" used outside of macro expansion" eval(esc(:(const x=1)))
+
+# Inner function declaration world age
+function create_inner_f_no_methods()
+    function inner_f end
+end
+@test isa(create_inner_f_no_methods(), Function)
+@test length(methods(create_inner_f_no_methods())) == 0
+
+function create_inner_f_one_method()
+    inner_f() = 1
+end
+@test isa(create_inner_f_no_methods(), Function)
+@test length(methods(create_inner_f_no_methods())) == 0
+@test Base.invoke_in_world(first(methods(create_inner_f_one_method)).primary_world, create_inner_f_one_method()) == 1
+
+# Issue 56711 - Scope of signature hoisting
+function fs56711()
+    f(lhs::Integer) = 1
+    f(lhs::Integer, rhs::(local x_should_not_be_defined=Integer; x_should_not_be_defined)) = 2
+    return f
+end
+@test !@isdefined(x_should_not_be_defined)
+
+# Test that importing twice is allowed without warning
+@test_nowarn @eval baremodule ImportTwice
+    import ..Base
+    using .Base: zero, zero
+end
+
+# PR# 55040 - Macrocall as function sig
+@test :(function @f()() end) == :(function (@f)() end)
+
+function callme end
+macro callmemacro(args...)
+    Expr(:call, esc(:callme), map(esc, args)...)
+end
+function @callmemacro(a::Int)
+    return 1
+end
+@callmemacro(b::Float64) = 2
+function @callmemacro(a::T, b::T) where T <: Int
+    return 3
+end
+function @callmemacro(a::Int, b::Int, c::Int)::Float64
+    return 4
+end
+function @callmemacro(d::String)
+    (a, b, c)
+    # ^ Should not be accidentally parsed as an argument list
+    return 4
+end
+
+@test callme(1) === 1
+@test callme(2.0) === 2
+@test callme(3, 3) === 3
+@test callme(4, 4, 4) === 4.0
+
+# Ambiguous 1-arg anymous vs macrosig
+@test_parseerror "function (@foo(a)) end"
+
+# #57267 - Missing `latestworld` after typealias
+abstract type A57267{S, T} end
+@test_nowarn @eval begin
+    B57267{S} = A57267{S, 1}
+    const C57267 = B57267
+end
+
+# #57404 - Binding ambiguity resolution ignores guard bindings
+module Ambig57404
+    module A
+        export S
+    end
+    using .A
+    module B
+        const S = 1
+        export S
+    end
+    using .B
+end
+@test Ambig57404.S == 1
+
+# Issue #56904 - lambda linearized twice
+@test (let; try 3; finally try 1; f(() -> x); catch x; end; end; x = 7; end) === 7
+@test (let; try 3; finally try 4; finally try 1; f(() -> x); catch x; end; end; end; x = 7; end) === 7
+
+# Issue #57546 - explicit function declaration should create new global
+module FuncDecl57546
+    using Test
+    @test_nowarn @eval function Any end
+    @test isa(Any, Function)
+    @test isempty(methods(Any))
+end
+
+# #57334
+let
+    x57334 = Ref(1)
+    @test_throws "syntax: cannot declare \"x57334[]\" `const`" Core.eval(@__MODULE__, :(const x57334[] = 1))
+end
+
+# #57470
+module M57470
+using ..Test
+
+@test_throws(
+    "syntax: `global const` declaration not allowed inside function",
+    Core.eval(@__MODULE__, :(function f57470()
+                                 const global x57470 = 1
+                             end)))
+@test_throws(
+    "unsupported `const` declaration on local variable",
+    Core.eval(@__MODULE__, :(let
+                                 const y57470 = 1
+                             end))
+)
+
+let
+    global a57470
+    const a57470 = 1
+end
+@test a57470 === 1
+
+let
+    global const z57470 = 1
+    const global w57470 = 1
+end
+
+@test z57470 === 1
+@test w57470 === 1
+
+const (; field57470_1, field57470_2) = (field57470_1 = 1, field57470_2 = 2)
+@test field57470_1 === 1
+@test field57470_2 === 2
+
+# TODO: 1.11 allows these, but should we?
+const X57470{T}, Y57470{T} = Int, Bool
+@test X57470 === Int
+@test Y57470 === Bool
+const A57470{T}, B57470{T} = [Int, Bool]
+@test A57470 === Int
+@test B57470 === Bool
+const a57470, f57470(x), T57470{U} = [1, 2, Int]
+@test a57470 === 1
+@test f57470(0) === 2
+@test T57470 === Int
+
+module M57470_sub end
+@test_throws("syntax: cannot declare \"M57470_sub.x\" `const`",
+             Core.eval(@__MODULE__, :(const M57470_sub.x = 1)))
+
+# # `const global` should not trample previously declared `local`
+@test_throws(
+    "syntax: variable \"v57470\" declared both local and global",
+    Core.eval(@__MODULE__, :(let
+                                 local v57470
+                                 const global v57470 = 1
+                             end))
+)
+
+# Chain of assignments must happen right-to-left:
+let
+    x = [0, 0]; i = 1
+    i = x[i] = 2
+    @test x == [2, 0]
+    x = [0, 0]; i = 1
+    x[i] = i = 2
+    @test x == [0, 2]
+end
+
+# Global const decl inside local scope
+let
+    const global letf_57470(x)::Int = 2+x
+    const global letT_57470{T} = Int64
+end
+@test letf_57470(3) == 5
+@test letT_57470 === Int64
+
+# Closure conversion should happen on const assignment rhs
+module M59128
+using Test
+const        x0::Int = (()->1)()
+global       x1::Int = (()->1)()
+global const x2::Int = (()->1)()
+const global x3::Int = (()->1)()
+@test x0 === x1 === x2 === x3 === 1
+let g = 1
+    global       x4::Vector{T} where {T<:Number} = let; (()->[g])(); end
+    const global x5::Vector{T} where {T<:Number} = let; (()->[g])(); end
+    global const x6::Vector{T} where {T<:Number} = let; (()->[g])(); end
+end
+@test x4 == x5 == x6 == [1]
+const letT_57470{T} = (()->Int64)()
+@test letT_57470 == Int64
+end
+
+end # M57470_sub
+
+# lowering globaldecl with complex type
+module M58609
+using Test
+global x::T where T
+global y::Type{<:Number}
+
+@test Core.get_binding_type(M58609, :x) === Any
+@test Core.get_binding_type(M58609, :y) == Type{<:Number}
+end
+
+# #57574
+module M57574
+struct A{T} end
+out = let
+    for B in ()
+    end
+    let
+        B{T} = A{T}
+        B
+    end
+end
+end
+@test M57574.out === M57574.A
+
+# Double import of CONST_IMPORT symbol
+module DoubleImport
+    import Test: Random
+    import Random
+end
+@test DoubleImport.Random === Test.Random
+
+# Expr(:method) returns the method
+let ex = @Meta.lower function return_my_method(); 1; end
+    code = ex.args[1].code
+    idx = findfirst(ex->Meta.isexpr(ex, :method) && length(ex.args) > 1, code)
+    code[end] = Core.ReturnNode(Core.SSAValue(idx))
+    @test isa(Core.eval(@__MODULE__, ex), Method)
+end
+
+# Capturing a @nospecialize argument should result in an Any field in the closure
+module NoSpecClosure
+    K(@nospecialize(x)) = y -> x
+end
+let f = NoSpecClosure.K(1)
+    @test f(2) == 1
+    @test typeof(f).parameters == Core.svec()
+end
+
+@testset "@__FUNCTION__ and Expr(:thisfunction)" begin
+    @testset "Basic usage" begin
+        # @__FUNCTION__ in regular functions
+        test_function_basic() = @__FUNCTION__
+        @test test_function_basic() === test_function_basic
+
+        # Expr(:thisfunction) in regular functions
+        @eval regular_func() = $(Expr(:thisfunction))
+        @test regular_func() === regular_func
+    end
+
+    @testset "Recursion" begin
+        # Factorial with @__FUNCTION__
+        factorial_function(n) = n <= 1 ? 1 : n * (@__FUNCTION__)(n - 1)
+        @test factorial_function(5) == 120
+
+        # Fibonacci with Expr(:thisfunction)
+        struct RecursiveCallableStruct; end
+        @eval (::RecursiveCallableStruct)(n) = n <= 1 ? n : $(Expr(:thisfunction))(n-1) + $(Expr(:thisfunction))(n-2)
+        @test RecursiveCallableStruct()(10) === 55
+
+        # Anonymous function recursion
+        @test (n -> n <= 1 ? 1 : n * (@__FUNCTION__)(n - 1))(5) == 120
+    end
+
+    @testset "Closures and nested functions" begin
+        # Prevents boxed closures
+        function make_closure()
+            fib(n) = n <= 1 ? 1 : (@__FUNCTION__)(n - 1) + (@__FUNCTION__)(n - 2)
+            return fib
+        end
+        Test.@inferred make_closure()
+        closure = make_closure()
+        @test closure(5) == 8
+        Test.@inferred closure(5)
+
+        # Complex closure of closures
+        function f1()
+            function f2()
+                function f3()
+                    return @__FUNCTION__
+                end
+                return (@__FUNCTION__), f3()
+            end
+            return (@__FUNCTION__), f2()...
+        end
+        Test.@inferred f1()
+        @test f1()[1] === f1
+        @test f1()[2] !== f1
+        @test f1()[3] !== f1
+        @test f1()[3]() === f1()[3]
+        @test f1()[2]()[2]() === f1()[3]
+    end
+
+    @testset "Do blocks" begin
+        function test_do_block()
+            result = map([1, 2, 3]) do x
+                return (@__FUNCTION__, x)
+            end
+            # All should refer to the same do-block function
+            @test all(r -> r[1] === result[1][1], result)
+            # Values should be different
+            @test [r[2] for r in result] == [1, 2, 3]
+            # It should be different than `test_do_block`
+            @test result[1][1] !== test_do_block
+        end
+        test_do_block()
+    end
+
+    @testset "Keyword arguments" begin
+        # @__FUNCTION__ with kwargs
+        foo(; n) = n <= 1 ? 1 : n * (@__FUNCTION__)(; n = n - 1)
+        @test foo(n = 5) == 120
+
+        # Expr(:thisfunction) with kwargs
+        let
+            @eval f2(; n=1) = n <= 1 ? n : n * $(Expr(:thisfunction))(; n=n-1)
+            result = f2(n=5)
+            @test result == 120
+        end
+    end
+
+    @testset "Callable structs" begin
+        # @__FUNCTION__ in callable structs
+        @gensym A
+        @eval module $A
+            struct CallableStruct{T}; val::T; end
+            (c::CallableStruct)() = @__FUNCTION__
+        end
+        @eval using .$A: CallableStruct
+        c = CallableStruct(5)
+        @test c() === c
+
+        # In closures, var"#self#" should refer to the enclosing function,
+        # NOT the enclosing struct instance
+        struct CallableStruct2; end
+        @eval function (obj::CallableStruct2)()
+            function inner_func()
+                $(Expr(:thisfunction))
+            end
+            inner_func
+        end
+
+        let cs = CallableStruct2()
+            @test cs()() === cs()
+            @test cs()() !== cs
+        end
+
+        # Accessing values via self-reference
+        struct CallableStruct3
+            value::Int
+        end
+        @eval (obj::CallableStruct3)() = $(Expr(:thisfunction))
+        @eval (obj::CallableStruct3)(x) = $(Expr(:thisfunction)).value + x
+
+        let cs = CallableStruct3(42)
+            @test cs() === cs
+            @test cs(10) === 52
+        end
+
+        # Callable struct with args and kwargs
+        struct CallableStruct4
+        end
+        @eval function (obj::CallableStruct4)(x, args...; y=2, kws...)
+            return (; func=(@__FUNCTION__), x, args, y, kws)
+        end
+        c = CallableStruct4()
+        @test c(1).func === c
+        @test c(2, 3).args == (3,)
+        @test c(2; y=4).y == 4
+        @test c(2; y=4, a=5, b=6, c=7).kws[:c] == 7
+    end
+
+    @testset "Special cases" begin
+        # Generated functions
+        let @generated foo2() = Expr(:thisfunction)
+            @test foo2() === foo2
+        end
+
+        # Struct constructors
+        let
+            @eval struct Cols{T<:Tuple}
+                cols::T
+                operator
+                Cols(args...; operator=union) = (new{typeof(args)}(args, operator); string($(Expr(:thisfunction))))
+            end
+            result = Cols(1, 2, 3)
+            @test occursin("Cols", result)
+        end
+
+        # Should not access arg-map for local variables
+        @gensym f
+        @eval begin
+            function $f end
+            function ($f::typeof($f))()
+                $f = 1
+                $(Expr(:thisfunction))
+            end
+        end
+        @test @eval($f() === $f)
+    end
+
+    @testset "Error upon misuse" begin
+        @gensym B
+        @test_throws(
+            "\"@__FUNCTION__\" can only be used inside a function",
+            @eval(module $B; @__FUNCTION__; end)
+        )
+
+        @test_throws(
+            "\"@__FUNCTION__\" not allowed inside comprehension or generator",
+            @eval([(@__FUNCTION__) for _ in 1:10])
+        )
+    end
+end
+
+let d = Dict(:a=>1)
+    # quoted symbols should not be recognized as argument uses
+    foo(a=d[:a], b=d[:a]) = 1
+    foo(a::Int) = 2
+    @test foo() == 1
+end
+
+# Test new macroexpand functionality - define test module at top level
+module MacroExpandTestModule
+    macro test_basic(x)
+        return :($x + 1)
+    end
+end
+
+@testset "hygienic-scope" begin
+    # Test macroexpand! (in-place expansion)
+    expr = :(MacroExpandTestModule.@test_basic(5))
+    result = macroexpand!(@__MODULE__, expr)
+    # macroexpand! returns a hygienic-scope wrapper with legacyscope=false (default)
+    @test Meta.isexpr(result, Symbol("hygienic-scope"))
+    @test result.args[1] == :(5 + 1)
+    @test result.args[2] === MacroExpandTestModule
+    @test result.args[3] isa Core.LineNumberNode
+
+    # Test legacyscope parameter
+    hygiene_expr = :(MacroExpandTestModule.@test_basic(100))
+
+    # With legacyscope=true (default for macroexpand)
+    expanded_with_scope = macroexpand(@__MODULE__, hygiene_expr; legacyscope=true)
+    @test expanded_with_scope == :($(GlobalRef(MacroExpandTestModule, :(+)))(100, 1))
+
+    # With legacyscope=false
+    expanded_no_scope = macroexpand(@__MODULE__, hygiene_expr; legacyscope=false)
+    @test Meta.isexpr(expanded_no_scope, Symbol("hygienic-scope"))
+    @test expanded_no_scope.args[1] == :(100 + 1)
+    @test expanded_no_scope.args[2] === MacroExpandTestModule
+    @test expanded_no_scope.args[3] isa Core.LineNumberNode
+
+    # Test macroexpand! with legacyscope=false (default for macroexpand!)
+    hygiene_copy = copy(hygiene_expr)
+    result_no_scope = macroexpand!(@__MODULE__, hygiene_copy; legacyscope=false)
+    @test Meta.isexpr(result_no_scope, Symbol("hygienic-scope"))
+    @test result_no_scope.args[1] == :(100 + 1)
+    @test result_no_scope.args[2] === MacroExpandTestModule
+    @test result_no_scope.args[3] isa Core.LineNumberNode
+end
+
+# Test error handling for malformed macro calls
+@testset "macroexpand error handling" begin
+    # Test with undefined macro
+    @test_throws UndefVarError macroexpand(@__MODULE__, :(@undefined_macro(x)))
+    @test_throws UndefVarError macroexpand!(@__MODULE__, :(@undefined_macro(x)))
+end
+
+# #59755 - Don't hoist global declarations out of toplevel-preserving syntax
+module M59755 end
+@testset "toplevel-preserving syntax" begin
+    Core.eval(M59755, :(if true
+                            global v1::Bool
+                        else
+                            const v1 = 1
+                        end))
+    @test !isdefined(M59755, :v1)
+    @test Base.binding_kind(M59755, :v1) == Base.PARTITION_KIND_GLOBAL
+    @test Core.get_binding_type(M59755, :v1) == Bool
+
+    Core.eval(M59755, :(if false
+                            global v2::Bool
+                        else
+                            const v2 = 2
+                        end))
+    @test M59755.v2 === 2
+    @test Base.binding_kind(M59755, :v2) == Base.PARTITION_KIND_CONST
+
+    Core.eval(M59755, :(v3 = if true
+                            global v4::Bool
+                            4
+                        else
+                            const v4 = 5
+                            6
+                        end))
+    @test M59755.v3 == 4
+    @test !isdefined(M59755, :v4)
+    @test Base.binding_kind(M59755, :v4) == Base.PARTITION_KIND_GLOBAL
+    @test Core.get_binding_type(M59755, :v4) == Bool
+
+    Core.eval(M59755, :(v5 = if false
+                            global v6::Bool
+                            4
+                        else
+                            const v6 = 5
+                            6
+                        end))
+    @test M59755.v5 === 6
+    @test M59755.v6 === 5
+    @test Base.binding_kind(M59755, :v6) == Base.PARTITION_KIND_CONST
+end
diff --git a/test/sysinfo.jl b/test/sysinfo.jl
index cb943cfd38843..f213e48dc73b7 100644
--- a/test/sysinfo.jl
+++ b/test/sysinfo.jl
@@ -12,6 +12,8 @@ Base.Sys.loadavg()
 
 @test length(ccall(:jl_get_cpu_name, String, ())) != 0
 @test length(ccall(:jl_get_cpu_features, String, ())) >= 0
+foo_fma() = Core.Intrinsics.have_fma(Int64)
+@test ccall(:jl_cpu_has_fma, Bool, (Cint,), 64) == foo_fma()
 
 if Sys.isunix()
     mktempdir() do tempdir
@@ -33,7 +35,12 @@ if Sys.isunix()
         original_path = ENV["PATH"]
         ENV["PATH"] = string(firstdir, ":", seconddir, ":", original_path)
         try
-            @test abspath(Base.Sys.which("foo")) == abspath(joinpath(seconddir, "foo"))
+            if Libc.geteuid() == 0
+                # Root bypasses permission checks
+                @test abspath(Base.Sys.which("foo")) == abspath(joinpath(firstdir, "foo"))
+            else
+                @test abspath(Base.Sys.which("foo")) == abspath(joinpath(seconddir, "foo"))
+            end
         finally
             # clean up
             chmod(firstdir, 0o777)
@@ -41,3 +48,31 @@ if Sys.isunix()
         end
     end
 end
+
+@testset "username()" begin
+    if Sys.isunix()
+        passwd = Libc.getpwuid(Libc.getuid())
+        @test Sys.username() == passwd.username
+    elseif Sys.iswindows()
+        @test Sys.username() == ENV["USERNAME"]
+    else
+        @test !isempty(Sys.username())
+    end
+end
+
+@testset "Base.Sys docstrings" begin
+    @test isempty(Docs.undocumented_names(Sys))
+end
+
+@testset "show" begin
+    example_cpus = [Base.Sys.CPUinfo("Apple M1 Pro", 2400, 0x000000000d913b08, 0x0000000000000000, 0x0000000005f4243c, 0x00000000352a550a, 0x0000000000000000)
+    Base.Sys.CPUinfo("Apple M1 Pro", 2400, 0x000000000d9040c2, 0x0000000000000000, 0x0000000005d4768c, 0x00000000356b3d22, 0x0000000000000000)
+    Base.Sys.CPUinfo("Apple M1 Pro", 2400, 0x00000000026784da, 0x0000000000000000, 0x0000000000fda30e, 0x0000000046a731ea, 0x0000000000000000)
+    Base.Sys.CPUinfo("Apple M1 Pro", 2400, 0x00000000017726c0, 0x0000000000000000, 0x00000000009491de, 0x0000000048134f1e, 0x0000000000000000)]
+
+    Sys.SC_CLK_TCK, save_SC_CLK_TCK = 100, Sys.SC_CLK_TCK # use platform-independent tick units
+    @test repr(example_cpus[1]) == "Base.Sys.CPUinfo(\"Apple M1 Pro\", 2400, 0x000000000d913b08, 0x0000000000000000, 0x0000000005f4243c, 0x00000000352a550a, 0x0000000000000000)"
+    @test repr("text/plain", example_cpus[1]) == "Apple M1 Pro: \n        speed         user         nice          sys         idle          irq\n     2400 MHz    2276216 s          0 s     998861 s    8919667 s          0 s"
+    @test sprint(Sys.cpu_summary, example_cpus) == "Apple M1 Pro: \n       speed         user         nice          sys         idle          irq\n#1  2400 MHz    2276216 s          0 s     998861 s    8919667 s          0 s\n#2  2400 MHz    2275576 s          0 s     978101 s    8962204 s          0 s\n#3  2400 MHz     403386 s          0 s     166224 s   11853624 s          0 s\n#4  2400 MHz     245859 s          0 s      97367 s   12092250 s          0 s\n"
+    Sys.SC_CLK_TCK = save_SC_CLK_TCK
+end
diff --git a/test/tempdepot.jl b/test/tempdepot.jl
new file mode 100755
index 0000000000000..18fad0cf15346
--- /dev/null
+++ b/test/tempdepot.jl
@@ -0,0 +1,43 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# This includes the `mkdepottempdir` and `rmdepot` functions, used to
+# respectively create and remove temporary depots to use in tests.
+# `mktempdir` and `rm` cannot be used because, on Windows, the DLLs generated by
+# precompilation in the depots cannot be removed by the program that uses them.
+# This file can be included multiple times in the same module if necessary,
+# which can happen with unisolated test runs.
+
+if Sys.iswindows() && !@isdefined(DEPOTS_TOREMOVE)
+    const DEPOTS_TOREMOVE = String[]
+    atexit(() -> Base.Filesystem.temp_cleanup_postprocess(DEPOTS_TOREMOVE))
+end
+
+function rmdepot(depot)
+    try
+        @static if Sys.iswindows() # on Windows, delay the rm
+            push!(DEPOTS_TOREMOVE, depot)
+        else # on the other systems, do it immediately
+            rm(depot, force=true, recursive=true)
+        end
+    catch err
+        @show err
+    end
+end
+
+function mkdepottempdir(f::Function, parent=tempdir(); prefix="jltestdepot_")
+    tmpdir = mktempdir(parent; prefix, cleanup=false)
+    try
+        f(tmpdir)
+    finally
+        rmdepot(tmpdir)
+    end
+end
+function mkdepottempdir(parent=tempdir(); prefix="jltestdepot_", cleanup=true)
+    @static if Sys.iswindows()
+        tmpdir = mktempdir(parent; prefix, cleanup=false)
+        cleanup && push!(DEPOTS_TOREMOVE, tmpdir)
+        tmpdir
+    else
+        mktempdir(parent; prefix, cleanup)
+    end
+end
diff --git a/test/terminfo.jl b/test/terminfo.jl
new file mode 100644
index 0000000000000..07aa21704fef5
--- /dev/null
+++ b/test/terminfo.jl
@@ -0,0 +1,931 @@
+let
+    dumb_terminfo = UInt8[
+        0x1a, 0x01, 0x18, 0x00, 0x02, 0x00, 0x01, 0x00, 0x82, 0x00, 0x08, 0x00,
+        0x64, 0x75, 0x6d, 0x62, 0x7c, 0x38, 0x30, 0x2d, 0x63, 0x6f, 0x6c, 0x75,
+        0x6d, 0x6e, 0x20, 0x64, 0x75, 0x6d, 0x62, 0x20, 0x74, 0x74, 0x79, 0x00,
+        0x00, 0x01, 0x50, 0x00, 0xff, 0xff, 0x00, 0x00, 0x02, 0x00, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0x04, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x06, 0x00,
+        0x07, 0x00, 0x0d, 0x00, 0x0a, 0x00, 0x0a, 0x00]
+
+    dumb_capabilities = Dict{Symbol, Union{Bool, Int, String}}(
+        :am => true,
+        :auto_right_margin => true,
+        :bw => false,
+        :auto_left_margin => false,
+        :bel => "\a",
+        :bell => "\a",
+        :cr => "\r",
+        :carriage_return => "\r",
+        :cols => 80,
+        :columns => 80,
+        :cud1 => "\n",
+        :cursor_down => "\n",
+        :ind => "\n",
+        :scroll_forward => "\n")
+
+    xterm_terminfo = UInt8[
+        0x1a, 0x01, 0x30, 0x00, 0x26, 0x00, 0x0f, 0x00, 0x9d, 0x01, 0xe6, 0x05,
+        0x78, 0x74, 0x65, 0x72, 0x6d, 0x7c, 0x78, 0x74, 0x65, 0x72, 0x6d, 0x20,
+        0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x61, 0x6c, 0x20, 0x65, 0x6d, 0x75,
+        0x6c, 0x61, 0x74, 0x6f, 0x72, 0x20, 0x28, 0x58, 0x20, 0x57, 0x69, 0x6e,
+        0x64, 0x6f, 0x77, 0x20, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x29, 0x00,
+        0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+        0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x50, 0x00, 0x08, 0x00, 0x18, 0x00, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0x08, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00,
+        0x06, 0x00, 0x08, 0x00, 0x19, 0x00, 0x1e, 0x00, 0x26, 0x00, 0x2a, 0x00,
+        0x2e, 0x00, 0xff, 0xff, 0x39, 0x00, 0x4a, 0x00, 0x4c, 0x00, 0x50, 0x00,
+        0x57, 0x00, 0xff, 0xff, 0x59, 0x00, 0x66, 0x00, 0xff, 0xff, 0x6a, 0x00,
+        0x6e, 0x00, 0x78, 0x00, 0x7c, 0x00, 0xff, 0xff, 0xff, 0xff, 0x80, 0x00,
+        0x84, 0x00, 0x89, 0x00, 0x8e, 0x00, 0xff, 0xff, 0xa0, 0x00, 0xa5, 0x00,
+        0xaa, 0x00, 0xff, 0xff, 0xaf, 0x00, 0xb4, 0x00, 0xb9, 0x00, 0xbe, 0x00,
+        0xc7, 0x00, 0xcb, 0x00, 0xd2, 0x00, 0xff, 0xff, 0xe4, 0x00, 0xe9, 0x00,
+        0xef, 0x00, 0xf5, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x01,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x19, 0x01, 0xff, 0xff, 0x1d, 0x01,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x01, 0xff, 0xff, 0x24, 0x01,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x28, 0x01, 0x2c, 0x01,
+        0x32, 0x01, 0x36, 0x01, 0x3a, 0x01, 0x3e, 0x01, 0x44, 0x01, 0x4a, 0x01,
+        0x50, 0x01, 0x56, 0x01, 0x5c, 0x01, 0x60, 0x01, 0xff, 0xff, 0x65, 0x01,
+        0xff, 0xff, 0x69, 0x01, 0x6e, 0x01, 0x73, 0x01, 0x77, 0x01, 0x7e, 0x01,
+        0xff, 0xff, 0x85, 0x01, 0x89, 0x01, 0x91, 0x01, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x99, 0x01, 0xa2, 0x01, 0xff, 0xff,
+        0xff, 0xff, 0xab, 0x01, 0xb4, 0x01, 0xbd, 0x01, 0xc6, 0x01, 0xcf, 0x01,
+        0xd8, 0x01, 0xe1, 0x01, 0xea, 0x01, 0xf3, 0x01, 0xfc, 0x01, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0x05, 0x02, 0x09, 0x02, 0x0e, 0x02, 0x13, 0x02,
+        0x27, 0x02, 0x2a, 0x02, 0xff, 0xff, 0xff, 0xff, 0x3c, 0x02, 0x3f, 0x02,
+        0x4a, 0x02, 0x4d, 0x02, 0x4f, 0x02, 0x52, 0x02, 0xaf, 0x02, 0xff, 0xff,
+        0xb2, 0x02, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb4, 0x02,
+        0xb8, 0x02, 0xbc, 0x02, 0xc0, 0x02, 0xc4, 0x02, 0xff, 0xff, 0xff, 0xff,
+        0xc8, 0x02, 0xff, 0xff, 0xfd, 0x02, 0xff, 0xff, 0xff, 0xff, 0x01, 0x03,
+        0x07, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0x0d, 0x03, 0x11, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x15, 0x03, 0xff, 0xff, 0xff, 0xff,
+        0x1c, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x23, 0x03,
+        0x2a, 0x03, 0x31, 0x03, 0xff, 0xff, 0xff, 0xff, 0x38, 0x03, 0xff, 0xff,
+        0x3f, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x46, 0x03, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x4d, 0x03, 0x53, 0x03,
+        0x59, 0x03, 0x60, 0x03, 0x67, 0x03, 0x6e, 0x03, 0x75, 0x03, 0x7d, 0x03,
+        0x85, 0x03, 0x8d, 0x03, 0x95, 0x03, 0x9d, 0x03, 0xa5, 0x03, 0xad, 0x03,
+        0xb5, 0x03, 0xbc, 0x03, 0xc3, 0x03, 0xca, 0x03, 0xd1, 0x03, 0xd9, 0x03,
+        0xe1, 0x03, 0xe9, 0x03, 0xf1, 0x03, 0xf9, 0x03, 0x01, 0x04, 0x09, 0x04,
+        0x11, 0x04, 0x18, 0x04, 0x1f, 0x04, 0x26, 0x04, 0x2d, 0x04, 0x35, 0x04,
+        0x3d, 0x04, 0x45, 0x04, 0x4d, 0x04, 0x55, 0x04, 0x5d, 0x04, 0x65, 0x04,
+        0x6d, 0x04, 0x74, 0x04, 0x7b, 0x04, 0x82, 0x04, 0x89, 0x04, 0x91, 0x04,
+        0x99, 0x04, 0xa1, 0x04, 0xa9, 0x04, 0xb1, 0x04, 0xb9, 0x04, 0xc1, 0x04,
+        0xc9, 0x04, 0xd0, 0x04, 0xd7, 0x04, 0xde, 0x04, 0xe3, 0x04, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xea, 0x04, 0xf5, 0x04, 0xfa, 0x04,
+        0x0d, 0x05, 0x11, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0x1a, 0x05, 0x60, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa6, 0x05, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xab, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb1, 0x05,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb5, 0x05, 0xbf, 0x05, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xc9, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xe0, 0x05, 0xe3, 0x05, 0x1b, 0x5b, 0x5a, 0x00, 0x07, 0x00,
+        0x0d, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31, 0x25, 0x64, 0x3b,
+        0x25, 0x70, 0x32, 0x25, 0x64, 0x72, 0x00, 0x1b, 0x5b, 0x33, 0x67, 0x00,
+        0x1b, 0x5b, 0x48, 0x1b, 0x5b, 0x32, 0x4a, 0x00, 0x1b, 0x5b, 0x4b, 0x00,
+        0x1b, 0x5b, 0x4a, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31, 0x25,
+        0x64, 0x47, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31, 0x25, 0x64,
+        0x3b, 0x25, 0x70, 0x32, 0x25, 0x64, 0x48, 0x00, 0x0a, 0x00, 0x1b, 0x5b,
+        0x48, 0x00, 0x1b, 0x5b, 0x3f, 0x32, 0x35, 0x6c, 0x00, 0x08, 0x00, 0x1b,
+        0x5b, 0x3f, 0x31, 0x32, 0x6c, 0x1b, 0x5b, 0x3f, 0x32, 0x35, 0x68, 0x00,
+        0x1b, 0x5b, 0x43, 0x00, 0x1b, 0x5b, 0x41, 0x00, 0x1b, 0x5b, 0x3f, 0x31,
+        0x32, 0x3b, 0x32, 0x35, 0x68, 0x00, 0x1b, 0x5b, 0x50, 0x00, 0x1b, 0x5b,
+        0x4d, 0x00, 0x1b, 0x28, 0x30, 0x00, 0x1b, 0x5b, 0x35, 0x6d, 0x00, 0x1b,
+        0x5b, 0x31, 0x6d, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x34, 0x39, 0x68,
+        0x1b, 0x5b, 0x32, 0x32, 0x3b, 0x30, 0x3b, 0x30, 0x74, 0x00, 0x1b, 0x5b,
+        0x32, 0x6d, 0x00, 0x1b, 0x5b, 0x34, 0x68, 0x00, 0x1b, 0x5b, 0x38, 0x6d,
+        0x00, 0x1b, 0x5b, 0x37, 0x6d, 0x00, 0x1b, 0x5b, 0x37, 0x6d, 0x00, 0x1b,
+        0x5b, 0x34, 0x6d, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x58,
+        0x00, 0x1b, 0x28, 0x42, 0x00, 0x1b, 0x28, 0x42, 0x1b, 0x5b, 0x6d, 0x00,
+        0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x34, 0x39, 0x6c, 0x1b, 0x5b, 0x32, 0x33,
+        0x3b, 0x30, 0x3b, 0x30, 0x74, 0x00, 0x1b, 0x5b, 0x34, 0x6c, 0x00, 0x1b,
+        0x5b, 0x32, 0x37, 0x6d, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x6d, 0x00, 0x1b,
+        0x5b, 0x3f, 0x35, 0x68, 0x24, 0x3c, 0x31, 0x30, 0x30, 0x2f, 0x3e, 0x1b,
+        0x5b, 0x3f, 0x35, 0x6c, 0x00, 0x1b, 0x5b, 0x21, 0x70, 0x1b, 0x5b, 0x3f,
+        0x33, 0x3b, 0x34, 0x6c, 0x1b, 0x5b, 0x34, 0x6c, 0x1b, 0x3e, 0x00, 0x1b,
+        0x5b, 0x4c, 0x00, 0x08, 0x00, 0x1b, 0x5b, 0x33, 0x7e, 0x00, 0x1b, 0x4f,
+        0x42, 0x00, 0x1b, 0x4f, 0x50, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x7e, 0x00,
+        0x1b, 0x4f, 0x51, 0x00, 0x1b, 0x4f, 0x52, 0x00, 0x1b, 0x4f, 0x53, 0x00,
+        0x1b, 0x5b, 0x31, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x37, 0x7e, 0x00,
+        0x1b, 0x5b, 0x31, 0x38, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x7e, 0x00,
+        0x1b, 0x5b, 0x32, 0x30, 0x7e, 0x00, 0x1b, 0x4f, 0x48, 0x00, 0x1b, 0x5b,
+        0x32, 0x7e, 0x00, 0x1b, 0x4f, 0x44, 0x00, 0x1b, 0x5b, 0x36, 0x7e, 0x00,
+        0x1b, 0x5b, 0x35, 0x7e, 0x00, 0x1b, 0x4f, 0x43, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x32, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x41, 0x00, 0x1b,
+        0x4f, 0x41, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x6c, 0x1b, 0x3e, 0x00, 0x1b,
+        0x5b, 0x3f, 0x31, 0x68, 0x1b, 0x3d, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30,
+        0x33, 0x34, 0x6c, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x33, 0x34, 0x68,
+        0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x50, 0x00, 0x1b, 0x5b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x4d, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x42, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x40,
+        0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x53, 0x00, 0x1b, 0x5b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x4c, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x44, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x43,
+        0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x54, 0x00, 0x1b, 0x5b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x41, 0x00, 0x1b, 0x5b, 0x69, 0x00, 0x1b,
+        0x5b, 0x34, 0x69, 0x00, 0x1b, 0x5b, 0x35, 0x69, 0x00, 0x25, 0x70, 0x31,
+        0x25, 0x63, 0x1b, 0x5b, 0x25, 0x70, 0x32, 0x25, 0x7b, 0x31, 0x7d, 0x25,
+        0x2d, 0x25, 0x64, 0x62, 0x00, 0x1b, 0x63, 0x00, 0x1b, 0x5b, 0x21, 0x70,
+        0x1b, 0x5b, 0x3f, 0x33, 0x3b, 0x34, 0x6c, 0x1b, 0x5b, 0x34, 0x6c, 0x1b,
+        0x3e, 0x00, 0x1b, 0x38, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x64, 0x00, 0x1b, 0x37, 0x00, 0x0a, 0x00, 0x1b, 0x4d, 0x00,
+        0x25, 0x3f, 0x25, 0x70, 0x39, 0x25, 0x74, 0x1b, 0x28, 0x30, 0x25, 0x65,
+        0x1b, 0x28, 0x42, 0x25, 0x3b, 0x1b, 0x5b, 0x30, 0x25, 0x3f, 0x25, 0x70,
+        0x36, 0x25, 0x74, 0x3b, 0x31, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70, 0x35,
+        0x25, 0x74, 0x3b, 0x32, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70, 0x32, 0x25,
+        0x74, 0x3b, 0x34, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70, 0x31, 0x25, 0x70,
+        0x33, 0x25, 0x7c, 0x25, 0x74, 0x3b, 0x37, 0x25, 0x3b, 0x25, 0x3f, 0x25,
+        0x70, 0x34, 0x25, 0x74, 0x3b, 0x35, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70,
+        0x37, 0x25, 0x74, 0x3b, 0x38, 0x25, 0x3b, 0x6d, 0x00, 0x1b, 0x48, 0x00,
+        0x09, 0x00, 0x1b, 0x4f, 0x77, 0x00, 0x1b, 0x4f, 0x79, 0x00, 0x1b, 0x4f,
+        0x75, 0x00, 0x1b, 0x4f, 0x71, 0x00, 0x1b, 0x4f, 0x73, 0x00, 0x60, 0x60,
+        0x61, 0x61, 0x66, 0x66, 0x67, 0x67, 0x69, 0x69, 0x6a, 0x6a, 0x6b, 0x6b,
+        0x6c, 0x6c, 0x6d, 0x6d, 0x6e, 0x6e, 0x6f, 0x6f, 0x70, 0x70, 0x71, 0x71,
+        0x72, 0x72, 0x73, 0x73, 0x74, 0x74, 0x75, 0x75, 0x76, 0x76, 0x77, 0x77,
+        0x78, 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b, 0x7c, 0x7c, 0x7d, 0x7d,
+        0x7e, 0x7e, 0x00, 0x1b, 0x5b, 0x5a, 0x00, 0x1b, 0x5b, 0x3f, 0x37, 0x68,
+        0x00, 0x1b, 0x5b, 0x3f, 0x37, 0x6c, 0x00, 0x1b, 0x4f, 0x46, 0x00, 0x1b,
+        0x4f, 0x4d, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x32, 0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x48, 0x00,
+        0x1b, 0x5b, 0x32, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32,
+        0x44, 0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x35,
+        0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x43, 0x00, 0x1b,
+        0x5b, 0x32, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x32, 0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x51,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x32, 0x53, 0x00, 0x1b, 0x5b, 0x31, 0x35, 0x3b, 0x32, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x37, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x38, 0x3b,
+        0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x3b, 0x32, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x30, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x3b,
+        0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x3b, 0x32, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x34, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35,
+        0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x51, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x35, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x53, 0x00, 0x1b,
+        0x5b, 0x31, 0x35, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x37, 0x3b,
+        0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x38, 0x3b, 0x35, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x39, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x30, 0x3b,
+        0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x3b, 0x35, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x33, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x3b,
+        0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x50, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x36, 0x51, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x52, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x53, 0x00, 0x1b, 0x5b, 0x31, 0x35, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x37, 0x3b, 0x36, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x38, 0x3b, 0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x30, 0x3b, 0x36, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x31, 0x3b, 0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x3b, 0x36, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x33, 0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x51,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x33, 0x53, 0x00, 0x1b, 0x5b, 0x31, 0x35, 0x3b, 0x33, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x37, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x38, 0x3b,
+        0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x3b, 0x33, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x30, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x3b,
+        0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x3b, 0x33, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x34, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34,
+        0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34, 0x51, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x34, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x4b, 0x00, 0x1b, 0x5b, 0x3f,
+        0x36, 0x39, 0x6c, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x64, 0x3b, 0x25,
+        0x64, 0x52, 0x00, 0x1b, 0x5b, 0x36, 0x6e, 0x00, 0x1b, 0x5b, 0x3f, 0x25,
+        0x5b, 0x3b, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+        0x5d, 0x63, 0x00, 0x1b, 0x5b, 0x63, 0x00, 0x1b, 0x5b, 0x33, 0x39, 0x3b,
+        0x34, 0x39, 0x6d, 0x00, 0x1b, 0x5b, 0x33, 0x25, 0x3f, 0x25, 0x70, 0x31,
+        0x25, 0x7b, 0x31, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x34, 0x25, 0x65, 0x25,
+        0x70, 0x31, 0x25, 0x7b, 0x33, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x36, 0x25,
+        0x65, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x34, 0x7d, 0x25, 0x3d, 0x25, 0x74,
+        0x31, 0x25, 0x65, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x36, 0x7d, 0x25, 0x3d,
+        0x25, 0x74, 0x33, 0x25, 0x65, 0x25, 0x70, 0x31, 0x25, 0x64, 0x25, 0x3b,
+        0x6d, 0x00, 0x1b, 0x5b, 0x34, 0x25, 0x3f, 0x25, 0x70, 0x31, 0x25, 0x7b,
+        0x31, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x34, 0x25, 0x65, 0x25, 0x70, 0x31,
+        0x25, 0x7b, 0x33, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x36, 0x25, 0x65, 0x25,
+        0x70, 0x31, 0x25, 0x7b, 0x34, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x31, 0x25,
+        0x65, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x36, 0x7d, 0x25, 0x3d, 0x25, 0x74,
+        0x33, 0x25, 0x65, 0x25, 0x70, 0x31, 0x25, 0x64, 0x25, 0x3b, 0x6d, 0x00,
+        0x1b, 0x5b, 0x33, 0x6d, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x6d, 0x00, 0x1b,
+        0x5b, 0x3c, 0x00, 0x1b, 0x5b, 0x33, 0x25, 0x70, 0x31, 0x25, 0x64, 0x6d,
+        0x00, 0x1b, 0x5b, 0x34, 0x25, 0x70, 0x31, 0x25, 0x64, 0x6d, 0x00, 0x1b,
+        0x5b, 0x3f, 0x36, 0x39, 0x68, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x3b, 0x25, 0x70, 0x32, 0x25, 0x64, 0x73, 0x00, 0x1b, 0x6c,
+        0x00, 0x1b, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x96, 0x00,
+        0xac, 0x03, 0x01, 0x01, 0x00, 0x00, 0x07, 0x00, 0x13, 0x00, 0x18, 0x00,
+        0x2a, 0x00, 0x30, 0x00, 0x3a, 0x00, 0x5a, 0x00, 0x61, 0x00, 0x68, 0x00,
+        0x6f, 0x00, 0x76, 0x00, 0x7d, 0x00, 0x84, 0x00, 0x8b, 0x00, 0x92, 0x00,
+        0x99, 0x00, 0xa0, 0x00, 0xa7, 0x00, 0xae, 0x00, 0xb5, 0x00, 0xbc, 0x00,
+        0xc3, 0x00, 0xca, 0x00, 0xd1, 0x00, 0xd8, 0x00, 0xdf, 0x00, 0xe6, 0x00,
+        0xed, 0x00, 0xf4, 0x00, 0xfb, 0x00, 0x02, 0x01, 0x09, 0x01, 0x10, 0x01,
+        0x17, 0x01, 0x1e, 0x01, 0x25, 0x01, 0x2c, 0x01, 0x33, 0x01, 0x3a, 0x01,
+        0x41, 0x01, 0x48, 0x01, 0x4f, 0x01, 0x56, 0x01, 0x5d, 0x01, 0x64, 0x01,
+        0x6b, 0x01, 0x72, 0x01, 0x79, 0x01, 0x80, 0x01, 0x87, 0x01, 0x8e, 0x01,
+        0x95, 0x01, 0x9c, 0x01, 0xa3, 0x01, 0xaa, 0x01, 0xb1, 0x01, 0xb8, 0x01,
+        0xbf, 0x01, 0xc6, 0x01, 0xca, 0x01, 0xce, 0x01, 0xd2, 0x01, 0xd6, 0x01,
+        0xda, 0x01, 0xde, 0x01, 0xe2, 0x01, 0xe6, 0x01, 0xea, 0x01, 0xee, 0x01,
+        0xf2, 0x01, 0xf6, 0x01, 0xfc, 0x01, 0x01, 0x02, 0x00, 0x00, 0x03, 0x00,
+        0x06, 0x00, 0x09, 0x00, 0x0c, 0x00, 0x0f, 0x00, 0x12, 0x00, 0x15, 0x00,
+        0x18, 0x00, 0x1b, 0x00, 0x20, 0x00, 0x25, 0x00, 0x2a, 0x00, 0x2f, 0x00,
+        0x34, 0x00, 0x38, 0x00, 0x3d, 0x00, 0x42, 0x00, 0x47, 0x00, 0x4c, 0x00,
+        0x51, 0x00, 0x57, 0x00, 0x5d, 0x00, 0x63, 0x00, 0x69, 0x00, 0x6f, 0x00,
+        0x75, 0x00, 0x7b, 0x00, 0x81, 0x00, 0x87, 0x00, 0x8d, 0x00, 0x92, 0x00,
+        0x97, 0x00, 0x9c, 0x00, 0xa1, 0x00, 0xa6, 0x00, 0xac, 0x00, 0xb2, 0x00,
+        0xb8, 0x00, 0xbe, 0x00, 0xc4, 0x00, 0xca, 0x00, 0xd0, 0x00, 0xd6, 0x00,
+        0xdc, 0x00, 0xe2, 0x00, 0xe8, 0x00, 0xee, 0x00, 0xf4, 0x00, 0xfa, 0x00,
+        0x00, 0x01, 0x06, 0x01, 0x0c, 0x01, 0x12, 0x01, 0x18, 0x01, 0x1e, 0x01,
+        0x22, 0x01, 0x27, 0x01, 0x2c, 0x01, 0x31, 0x01, 0x36, 0x01, 0x3b, 0x01,
+        0x3f, 0x01, 0x43, 0x01, 0x47, 0x01, 0x4b, 0x01, 0x4f, 0x01, 0x55, 0x01,
+        0x5b, 0x01, 0x61, 0x01, 0x67, 0x01, 0x6d, 0x01, 0x73, 0x01, 0x79, 0x01,
+        0x7e, 0x01, 0x83, 0x01, 0x1b, 0x5d, 0x31, 0x31, 0x32, 0x07, 0x00, 0x1b,
+        0x5d, 0x31, 0x32, 0x3b, 0x25, 0x70, 0x31, 0x25, 0x73, 0x07, 0x00, 0x1b,
+        0x5b, 0x33, 0x4a, 0x00, 0x1b, 0x5d, 0x35, 0x32, 0x3b, 0x25, 0x70, 0x31,
+        0x25, 0x73, 0x3b, 0x25, 0x70, 0x32, 0x25, 0x73, 0x07, 0x00, 0x1b, 0x5b,
+        0x32, 0x20, 0x71, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x20,
+        0x71, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x30, 0x36, 0x3b, 0x31, 0x30,
+        0x30, 0x30, 0x25, 0x3f, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x31, 0x7d, 0x25,
+        0x3d, 0x25, 0x74, 0x68, 0x25, 0x65, 0x6c, 0x25, 0x3b, 0x00, 0x1b, 0x5b,
+        0x33, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x34, 0x7e, 0x00,
+        0x1b, 0x5b, 0x33, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x36,
+        0x7e, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x37, 0x7e, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x32, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x42, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x34, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x42,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x37, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x46, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x34, 0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x46, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37,
+        0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x48, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x34, 0x48, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x48, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x36, 0x48, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37, 0x48,
+        0x00, 0x1b, 0x5b, 0x32, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x3b,
+        0x34, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b,
+        0x32, 0x3b, 0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x3b, 0x37, 0x7e, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x44, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34,
+        0x44, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x44, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x36, 0x44, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37, 0x44, 0x00, 0x1b,
+        0x5b, 0x36, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x34, 0x7e,
+        0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x36, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x37, 0x7e, 0x00, 0x1b, 0x5b,
+        0x35, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x35, 0x3b, 0x34, 0x7e, 0x00,
+        0x1b, 0x5b, 0x35, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x35, 0x3b, 0x36,
+        0x7e, 0x00, 0x1b, 0x5b, 0x35, 0x3b, 0x37, 0x7e, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x33, 0x43, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34, 0x43, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x35, 0x43, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x43,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37, 0x43, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x32, 0x41, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x41, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x34, 0x41, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x41, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x41, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37,
+        0x41, 0x00, 0x1b, 0x4f, 0x78, 0x00, 0x1b, 0x4f, 0x74, 0x00, 0x1b, 0x4f,
+        0x76, 0x00, 0x1b, 0x4f, 0x72, 0x00, 0x1b, 0x4f, 0x45, 0x00, 0x1b, 0x4f,
+        0x6b, 0x00, 0x1b, 0x4f, 0x6c, 0x00, 0x1b, 0x4f, 0x6f, 0x00, 0x1b, 0x4f,
+        0x6e, 0x00, 0x1b, 0x4f, 0x6a, 0x00, 0x1b, 0x4f, 0x6d, 0x00, 0x1b, 0x4f,
+        0x70, 0x00, 0x1b, 0x5b, 0x32, 0x39, 0x6d, 0x00, 0x1b, 0x5b, 0x39, 0x6d,
+        0x00, 0x1b, 0x5b, 0x3c, 0x25, 0x69, 0x25, 0x70, 0x33, 0x25, 0x64, 0x3b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x3b, 0x25, 0x70, 0x32, 0x25, 0x64, 0x3b,
+        0x25, 0x3f, 0x25, 0x70, 0x34, 0x25, 0x74, 0x4d, 0x25, 0x65, 0x6d, 0x25,
+        0x3b, 0x00, 0x41, 0x58, 0x00, 0x58, 0x54, 0x00, 0x43, 0x72, 0x00, 0x43,
+        0x73, 0x00, 0x45, 0x33, 0x00, 0x4d, 0x73, 0x00, 0x53, 0x65, 0x00, 0x53,
+        0x73, 0x00, 0x58, 0x4d, 0x00, 0x6b, 0x44, 0x43, 0x33, 0x00, 0x6b, 0x44,
+        0x43, 0x34, 0x00, 0x6b, 0x44, 0x43, 0x35, 0x00, 0x6b, 0x44, 0x43, 0x36,
+        0x00, 0x6b, 0x44, 0x43, 0x37, 0x00, 0x6b, 0x44, 0x4e, 0x00, 0x6b, 0x44,
+        0x4e, 0x33, 0x00, 0x6b, 0x44, 0x4e, 0x34, 0x00, 0x6b, 0x44, 0x4e, 0x35,
+        0x00, 0x6b, 0x44, 0x4e, 0x36, 0x00, 0x6b, 0x44, 0x4e, 0x37, 0x00, 0x6b,
+        0x45, 0x4e, 0x44, 0x33, 0x00, 0x6b, 0x45, 0x4e, 0x44, 0x34, 0x00, 0x6b,
+        0x45, 0x4e, 0x44, 0x35, 0x00, 0x6b, 0x45, 0x4e, 0x44, 0x36, 0x00, 0x6b,
+        0x45, 0x4e, 0x44, 0x37, 0x00, 0x6b, 0x48, 0x4f, 0x4d, 0x33, 0x00, 0x6b,
+        0x48, 0x4f, 0x4d, 0x34, 0x00, 0x6b, 0x48, 0x4f, 0x4d, 0x35, 0x00, 0x6b,
+        0x48, 0x4f, 0x4d, 0x36, 0x00, 0x6b, 0x48, 0x4f, 0x4d, 0x37, 0x00, 0x6b,
+        0x49, 0x43, 0x33, 0x00, 0x6b, 0x49, 0x43, 0x34, 0x00, 0x6b, 0x49, 0x43,
+        0x35, 0x00, 0x6b, 0x49, 0x43, 0x36, 0x00, 0x6b, 0x49, 0x43, 0x37, 0x00,
+        0x6b, 0x4c, 0x46, 0x54, 0x33, 0x00, 0x6b, 0x4c, 0x46, 0x54, 0x34, 0x00,
+        0x6b, 0x4c, 0x46, 0x54, 0x35, 0x00, 0x6b, 0x4c, 0x46, 0x54, 0x36, 0x00,
+        0x6b, 0x4c, 0x46, 0x54, 0x37, 0x00, 0x6b, 0x4e, 0x58, 0x54, 0x33, 0x00,
+        0x6b, 0x4e, 0x58, 0x54, 0x34, 0x00, 0x6b, 0x4e, 0x58, 0x54, 0x35, 0x00,
+        0x6b, 0x4e, 0x58, 0x54, 0x36, 0x00, 0x6b, 0x4e, 0x58, 0x54, 0x37, 0x00,
+        0x6b, 0x50, 0x52, 0x56, 0x33, 0x00, 0x6b, 0x50, 0x52, 0x56, 0x34, 0x00,
+        0x6b, 0x50, 0x52, 0x56, 0x35, 0x00, 0x6b, 0x50, 0x52, 0x56, 0x36, 0x00,
+        0x6b, 0x50, 0x52, 0x56, 0x37, 0x00, 0x6b, 0x52, 0x49, 0x54, 0x33, 0x00,
+        0x6b, 0x52, 0x49, 0x54, 0x34, 0x00, 0x6b, 0x52, 0x49, 0x54, 0x35, 0x00,
+        0x6b, 0x52, 0x49, 0x54, 0x36, 0x00, 0x6b, 0x52, 0x49, 0x54, 0x37, 0x00,
+        0x6b, 0x55, 0x50, 0x00, 0x6b, 0x55, 0x50, 0x33, 0x00, 0x6b, 0x55, 0x50,
+        0x34, 0x00, 0x6b, 0x55, 0x50, 0x35, 0x00, 0x6b, 0x55, 0x50, 0x36, 0x00,
+        0x6b, 0x55, 0x50, 0x37, 0x00, 0x6b, 0x61, 0x32, 0x00, 0x6b, 0x62, 0x31,
+        0x00, 0x6b, 0x62, 0x33, 0x00, 0x6b, 0x63, 0x32, 0x00, 0x6b, 0x70, 0x35,
+        0x00, 0x6b, 0x70, 0x41, 0x44, 0x44, 0x00, 0x6b, 0x70, 0x43, 0x4d, 0x41,
+        0x00, 0x6b, 0x70, 0x44, 0x49, 0x56, 0x00, 0x6b, 0x70, 0x44, 0x4f, 0x54,
+        0x00, 0x6b, 0x70, 0x4d, 0x55, 0x4c, 0x00, 0x6b, 0x70, 0x53, 0x55, 0x42,
+        0x00, 0x6b, 0x70, 0x5a, 0x52, 0x4f, 0x00, 0x72, 0x6d, 0x78, 0x78, 0x00,
+        0x73, 0x6d, 0x78, 0x78, 0x00, 0x78, 0x6d, 0x00]
+
+    xterm_extensions =
+        [:AX, :E3, :XM, :XT, :enter_strikeout_mode, :exit_strikeout_mode, :ka2,
+         :kb1, :kb3, :kc2, :key_alt_control_delete_character,
+         :key_alt_control_down_cursor, :key_alt_control_end,
+         :key_alt_control_home, :key_alt_control_insert_character,
+         :key_alt_control_left_cursor, :key_alt_control_next,
+         :key_alt_control_previous, :key_alt_control_right_cursor,
+         :key_alt_control_up_cursor, :key_alt_delete_character,
+         :key_alt_down_cursor, :key_alt_end, :key_alt_home,
+         :key_alt_insert_character, :key_alt_left_cursor, :key_alt_next,
+         :key_alt_previous, :key_alt_right_cursor, :key_alt_up_cursor,
+         :key_control_delete_character, :key_control_down_cursor,
+         :key_control_end, :key_control_home, :key_control_insert_character,
+         :key_control_left_cursor, :key_control_next, :key_control_previous,
+         :key_control_right_cursor, :key_control_up_cursor,
+         :key_shift_alt_delete_character, :key_shift_alt_down_cursor,
+         :key_shift_alt_end, :key_shift_alt_home,
+         :key_shift_alt_insert_character, :key_shift_alt_left_cursor,
+         :key_shift_alt_next, :key_shift_alt_previous,
+         :key_shift_alt_right_cursor, :key_shift_alt_up_cursor,
+         :key_shift_control_delete_character, :key_shift_control_down_cursor,
+         :key_shift_control_end, :key_shift_control_home,
+         :key_shift_control_insert_character, :key_shift_control_left_cursor,
+         :key_shift_control_next, :key_shift_control_previous,
+         :key_shift_control_right_cursor, :key_shift_control_up_cursor,
+         :key_shift_down_cursor, :key_shift_up_cursor, :kp5, :kpADD, :kpCMA,
+         :kpDIV, :kpDOT, :kpMUL, :kpSUB, :kpZRO, :reset_cursor_color,
+         :reset_cursor_style, :set_cursor_color, :set_cursor_style,
+         :set_host_clipboard, :xm]
+
+    xterm_capabilities = Dict{Symbol, Union{Bool, Int, String}}(
+        :AX => true,
+        :Cr => "\e]112\a",
+        :Cs => "\e]12;%p1%s\a",
+        :E3 => "\e[3J",
+        :Ms => "\e]52;%p1%s;%p2%s\a",
+        :OTbs => true,
+        :Se => "\e[2 q",
+        :Ss => "\e[%p1%d q",
+        :XM => "\e[?1006;1000%?%p1%{1}%=%th%el%;",
+        :XT => true,
+        :acs_chars => "``aaffggiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz{{||}}~~",
+        :acsc => "``aaffggiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz{{||}}~~",
+        :am => true,
+        :auto_left_margin => false,
+        :auto_right_margin => true,
+        :back_color_erase => true,
+        :back_tab => "\e[Z",
+        :backspaces_with_bs => true,
+        :bce => true,
+        :bel => "\a",
+        :bell => "\a",
+        :blink => "\e[5m",
+        :bold => "\e[1m",
+        :bw => false,
+        :can_change => false,
+        :carriage_return => "\r",
+        :cbt => "\e[Z",
+        :ccc => false,
+        :ceol_standout_glitch => false,
+        :change_scroll_region => "\e[%i%p1%d;%p2%dr",
+        :chts => false,
+        :civis => "\e[?25l",
+        :clear => "\e[H\e[2J",
+        :clear_all_tabs => "\e[3g",
+        :clear_margins => "\e[?69l",
+        :clear_screen => "\e[H\e[2J",
+        :clr_bol => "\e[1K",
+        :clr_eol => "\e[K",
+        :clr_eos => "\e[J",
+        :cnorm => "\e[?12l\e[?25h",
+        :col_addr_glitch => false,
+        :colors => 8,
+        :cols => 80,
+        :column_address => "\e[%i%p1%dG",
+        :columns => 80,
+        :cpi_changes_res => false,
+        :cpix => false,
+        :cr => "\r",
+        :cr_cancels_micro_mode => false,
+        :crxm => false,
+        :csr => "\e[%i%p1%d;%p2%dr",
+        :cub => "\e[%p1%dD",
+        :cub1 => "\b",
+        :cud => "\e[%p1%dB",
+        :cud1 => "\n",
+        :cuf => "\e[%p1%dC",
+        :cuf1 => "\e[C",
+        :cup => "\e[%i%p1%d;%p2%dH",
+        :cursor_address => "\e[%i%p1%d;%p2%dH",
+        :cursor_down => "\n",
+        :cursor_home => "\e[H",
+        :cursor_invisible => "\e[?25l",
+        :cursor_left => "\b",
+        :cursor_normal => "\e[?12l\e[?25h",
+        :cursor_right => "\e[C",
+        :cursor_up => "\e[A",
+        :cursor_visible => "\e[?12;25h",
+        :cuu => "\e[%p1%dA",
+        :cuu1 => "\e[A",
+        :cvvis => "\e[?12;25h",
+        :da => false,
+        :daisy => false,
+        :db => false,
+        :dch => "\e[%p1%dP",
+        :dch1 => "\e[P",
+        :delete_character => "\e[P",
+        :delete_line => "\e[M",
+        :dest_tabs_magic_smso => false,
+        :dim => "\e[2m",
+        :dl => "\e[%p1%dM",
+        :dl1 => "\e[M",
+        :eat_newline_glitch => true,
+        :ech => "\e[%p1%dX",
+        :ed => "\e[J",
+        :el => "\e[K",
+        :el1 => "\e[1K",
+        :enter_alt_charset_mode => "\e(0",
+        :enter_am_mode => "\e[?7h",
+        :enter_blink_mode => "\e[5m",
+        :enter_bold_mode => "\e[1m",
+        :enter_ca_mode => "\e[?1049h\e[22;0;0t",
+        :enter_dim_mode => "\e[2m",
+        :enter_insert_mode => "\e[4h",
+        :enter_italics_mode => "\e[3m",
+        :enter_reverse_mode => "\e[7m",
+        :enter_secure_mode => "\e[8m",
+        :enter_standout_mode => "\e[7m",
+        :enter_underline_mode => "\e[4m",
+        :eo => false,
+        :erase_chars => "\e[%p1%dX",
+        :erase_overstrike => false,
+        :eslok => false,
+        :exit_alt_charset_mode => "\e(B",
+        :exit_am_mode => "\e[?7l",
+        :exit_attribute_mode => "\e(B\e[m",
+        :exit_ca_mode => "\e[?1049l\e[23;0;0t",
+        :exit_insert_mode => "\e[4l",
+        :exit_italics_mode => "\e[23m",
+        :exit_standout_mode => "\e[27m",
+        :exit_underline_mode => "\e[24m",
+        :flash => "\e[?5h\$<100/>\e[?5l",
+        :flash_screen => "\e[?5h\$<100/>\e[?5l",
+        :generic_type => false,
+        :gn => false,
+        :hard_copy => false,
+        :hard_cursor => false,
+        :has_meta_key => true,
+        :has_print_wheel => false,
+        :has_status_line => false,
+        :hc => false,
+        :hls => false,
+        :home => "\e[H",
+        :hpa => "\e[%i%p1%dG",
+        :hs => false,
+        :ht => "\t",
+        :hts => "\eH",
+        :hue_lightness_saturation => false,
+        :hz => false,
+        :ich => "\e[%p1%d@",
+        :il => "\e[%p1%dL",
+        :il1 => "\e[L",
+        :in => false,
+        :ind => "\n",
+        :indn => "\e[%p1%dS",
+        :init_2string => "\e[!p\e[?3;4l\e[4l\e>",
+        :init_tabs => 8,
+        :insert_line => "\e[L",
+        :insert_null_glitch => false,
+        :invis => "\e[8m",
+        :is2 => "\e[!p\e[?3;4l\e[4l\e>",
+        :it => 8,
+        :kDC => "\e[3;2~",
+        :kDC3 => "\e[3;3~",
+        :kDC4 => "\e[3;4~",
+        :kDC5 => "\e[3;5~",
+        :kDC6 => "\e[3;6~",
+        :kDC7 => "\e[3;7~",
+        :kDN => "\e[1;2B",
+        :kDN3 => "\e[1;3B",
+        :kDN4 => "\e[1;4B",
+        :kDN5 => "\e[1;5B",
+        :kDN6 => "\e[1;6B",
+        :kDN7 => "\e[1;7B",
+        :kEND => "\e[1;2F",
+        :kEND3 => "\e[1;3F",
+        :kEND4 => "\e[1;4F",
+        :kEND5 => "\e[1;5F",
+        :kEND6 => "\e[1;6F",
+        :kEND7 => "\e[1;7F",
+        :kHOM => "\e[1;2H",
+        :kHOM3 => "\e[1;3H",
+        :kHOM4 => "\e[1;4H",
+        :kHOM5 => "\e[1;5H",
+        :kHOM6 => "\e[1;6H",
+        :kHOM7 => "\e[1;7H",
+        :kIC => "\e[2;2~",
+        :kIC3 => "\e[2;3~",
+        :kIC4 => "\e[2;4~",
+        :kIC5 => "\e[2;5~",
+        :kIC6 => "\e[2;6~",
+        :kIC7 => "\e[2;7~",
+        :kLFT => "\e[1;2D",
+        :kLFT3 => "\e[1;3D",
+        :kLFT4 => "\e[1;4D",
+        :kLFT5 => "\e[1;5D",
+        :kLFT6 => "\e[1;6D",
+        :kLFT7 => "\e[1;7D",
+        :kNXT => "\e[6;2~",
+        :kNXT3 => "\e[6;3~",
+        :kNXT4 => "\e[6;4~",
+        :kNXT5 => "\e[6;5~",
+        :kNXT6 => "\e[6;6~",
+        :kNXT7 => "\e[6;7~",
+        :kPRV => "\e[5;2~",
+        :kPRV3 => "\e[5;3~",
+        :kPRV4 => "\e[5;4~",
+        :kPRV5 => "\e[5;5~",
+        :kPRV6 => "\e[5;6~",
+        :kPRV7 => "\e[5;7~",
+        :kRIT => "\e[1;2C",
+        :kRIT3 => "\e[1;3C",
+        :kRIT4 => "\e[1;4C",
+        :kRIT5 => "\e[1;5C",
+        :kRIT6 => "\e[1;6C",
+        :kRIT7 => "\e[1;7C",
+        :kUP => "\e[1;2A",
+        :kUP3 => "\e[1;3A",
+        :kUP4 => "\e[1;4A",
+        :kUP5 => "\e[1;5A",
+        :kUP6 => "\e[1;6A",
+        :kUP7 => "\e[1;7A",
+        :ka1 => "\eOw",
+        :ka2 => "\eOx",
+        :ka3 => "\eOy",
+        :kb1 => "\eOt",
+        :kb2 => "\eOu",
+        :kb3 => "\eOv",
+        :kbs => "\b",
+        :kc1 => "\eOq",
+        :kc2 => "\eOr",
+        :kc3 => "\eOs",
+        :kcbt => "\e[Z",
+        :kcub1 => "\eOD",
+        :kcud1 => "\eOB",
+        :kcuf1 => "\eOC",
+        :kcuu1 => "\eOA",
+        :kdch1 => "\e[3~",
+        :kend => "\eOF",
+        :kent => "\eOM",
+        :key_a1 => "\eOw",
+        :key_a3 => "\eOy",
+        :key_b2 => "\eOu",
+        :key_backspace => "\b",
+        :key_btab => "\e[Z",
+        :key_c1 => "\eOq",
+        :key_c3 => "\eOs",
+        :key_dc => "\e[3~",
+        :key_down => "\eOB",
+        :key_end => "\eOF",
+        :key_enter => "\eOM",
+        :key_f1 => "\eOP",
+        :key_f10 => "\e[21~",
+        :key_f11 => "\e[23~",
+        :key_f12 => "\e[24~",
+        :key_f13 => "\e[1;2P",
+        :key_f14 => "\e[1;2Q",
+        :key_f15 => "\e[1;2R",
+        :key_f16 => "\e[1;2S",
+        :key_f17 => "\e[15;2~",
+        :key_f18 => "\e[17;2~",
+        :key_f19 => "\e[18;2~",
+        :key_f2 => "\eOQ",
+        :key_f20 => "\e[19;2~",
+        :key_f21 => "\e[20;2~",
+        :key_f22 => "\e[21;2~",
+        :key_f23 => "\e[23;2~",
+        :key_f24 => "\e[24;2~",
+        :key_f25 => "\e[1;5P",
+        :key_f26 => "\e[1;5Q",
+        :key_f27 => "\e[1;5R",
+        :key_f28 => "\e[1;5S",
+        :key_f29 => "\e[15;5~",
+        :key_f3 => "\eOR",
+        :key_f30 => "\e[17;5~",
+        :key_f31 => "\e[18;5~",
+        :key_f32 => "\e[19;5~",
+        :key_f33 => "\e[20;5~",
+        :key_f34 => "\e[21;5~",
+        :key_f35 => "\e[23;5~",
+        :key_f36 => "\e[24;5~",
+        :key_f37 => "\e[1;6P",
+        :key_f38 => "\e[1;6Q",
+        :key_f39 => "\e[1;6R",
+        :key_f4 => "\eOS",
+        :key_f40 => "\e[1;6S",
+        :key_f41 => "\e[15;6~",
+        :key_f42 => "\e[17;6~",
+        :key_f43 => "\e[18;6~",
+        :key_f44 => "\e[19;6~",
+        :key_f45 => "\e[20;6~",
+        :key_f46 => "\e[21;6~",
+        :key_f47 => "\e[23;6~",
+        :key_f48 => "\e[24;6~",
+        :key_f49 => "\e[1;3P",
+        :key_f5 => "\e[15~",
+        :key_f50 => "\e[1;3Q",
+        :key_f51 => "\e[1;3R",
+        :key_f52 => "\e[1;3S",
+        :key_f53 => "\e[15;3~",
+        :key_f54 => "\e[17;3~",
+        :key_f55 => "\e[18;3~",
+        :key_f56 => "\e[19;3~",
+        :key_f57 => "\e[20;3~",
+        :key_f58 => "\e[21;3~",
+        :key_f59 => "\e[23;3~",
+        :key_f6 => "\e[17~",
+        :key_f60 => "\e[24;3~",
+        :key_f61 => "\e[1;4P",
+        :key_f62 => "\e[1;4Q",
+        :key_f63 => "\e[1;4R",
+        :key_f7 => "\e[18~",
+        :key_f8 => "\e[19~",
+        :key_f9 => "\e[20~",
+        :key_home => "\eOH",
+        :key_ic => "\e[2~",
+        :key_left => "\eOD",
+        :key_mouse => "\e[<",
+        :key_npage => "\e[6~",
+        :key_ppage => "\e[5~",
+        :key_right => "\eOC",
+        :key_sdc => "\e[3;2~",
+        :key_send => "\e[1;2F",
+        :key_sf => "\e[1;2B",
+        :key_shome => "\e[1;2H",
+        :key_sic => "\e[2;2~",
+        :key_sleft => "\e[1;2D",
+        :key_snext => "\e[6;2~",
+        :key_sprevious => "\e[5;2~",
+        :key_sr => "\e[1;2A",
+        :key_sright => "\e[1;2C",
+        :key_up => "\eOA",
+        :keypad_local => "\e[?1l\e>",
+        :keypad_xmit => "\e[?1h\e=",
+        :kf1 => "\eOP",
+        :kf10 => "\e[21~",
+        :kf11 => "\e[23~",
+        :kf12 => "\e[24~",
+        :kf13 => "\e[1;2P",
+        :kf14 => "\e[1;2Q",
+        :kf15 => "\e[1;2R",
+        :kf16 => "\e[1;2S",
+        :kf17 => "\e[15;2~",
+        :kf18 => "\e[17;2~",
+        :kf19 => "\e[18;2~",
+        :kf2 => "\eOQ",
+        :kf20 => "\e[19;2~",
+        :kf21 => "\e[20;2~",
+        :kf22 => "\e[21;2~",
+        :kf23 => "\e[23;2~",
+        :kf24 => "\e[24;2~",
+        :kf25 => "\e[1;5P",
+        :kf26 => "\e[1;5Q",
+        :kf27 => "\e[1;5R",
+        :kf28 => "\e[1;5S",
+        :kf29 => "\e[15;5~",
+        :kf3 => "\eOR",
+        :kf30 => "\e[17;5~",
+        :kf31 => "\e[18;5~",
+        :kf32 => "\e[19;5~",
+        :kf33 => "\e[20;5~",
+        :kf34 => "\e[21;5~",
+        :kf35 => "\e[23;5~",
+        :kf36 => "\e[24;5~",
+        :kf37 => "\e[1;6P",
+        :kf38 => "\e[1;6Q",
+        :kf39 => "\e[1;6R",
+        :kf4 => "\eOS",
+        :kf40 => "\e[1;6S",
+        :kf41 => "\e[15;6~",
+        :kf42 => "\e[17;6~",
+        :kf43 => "\e[18;6~",
+        :kf44 => "\e[19;6~",
+        :kf45 => "\e[20;6~",
+        :kf46 => "\e[21;6~",
+        :kf47 => "\e[23;6~",
+        :kf48 => "\e[24;6~",
+        :kf49 => "\e[1;3P",
+        :kf5 => "\e[15~",
+        :kf50 => "\e[1;3Q",
+        :kf51 => "\e[1;3R",
+        :kf52 => "\e[1;3S",
+        :kf53 => "\e[15;3~",
+        :kf54 => "\e[17;3~",
+        :kf55 => "\e[18;3~",
+        :kf56 => "\e[19;3~",
+        :kf57 => "\e[20;3~",
+        :kf58 => "\e[21;3~",
+        :kf59 => "\e[23;3~",
+        :kf6 => "\e[17~",
+        :kf60 => "\e[24;3~",
+        :kf61 => "\e[1;4P",
+        :kf62 => "\e[1;4Q",
+        :kf63 => "\e[1;4R",
+        :kf7 => "\e[18~",
+        :kf8 => "\e[19~",
+        :kf9 => "\e[20~",
+        :khome => "\eOH",
+        :kich1 => "\e[2~",
+        :kind => "\e[1;2B",
+        :km => true,
+        :kmous => "\e[<",
+        :knp => "\e[6~",
+        :kp5 => "\eOE",
+        :kpADD => "\eOk",
+        :kpCMA => "\eOl",
+        :kpDIV => "\eOo",
+        :kpDOT => "\eOn",
+        :kpMUL => "\eOj",
+        :kpSUB => "\eOm",
+        :kpZRO => "\eOp",
+        :kpp => "\e[5~",
+        :kri => "\e[1;2A",
+        :lines => 24,
+        :lpi_changes_res => false,
+        :lpix => false,
+        :max_colors => 8,
+        :max_pairs => 64,
+        :mc0 => "\e[i",
+        :mc4 => "\e[4i",
+        :mc5 => "\e[5i",
+        :mc5i => true,
+        :meml => "\el",
+        :memory_above => false,
+        :memory_below => false,
+        :memory_lock => "\el",
+        :memory_unlock => "\em",
+        :memu => "\em",
+        :meta_off => "\e[?1034l",
+        :meta_on => "\e[?1034h",
+        :mgc => "\e[?69l",
+        :mir => true,
+        :move_insert_mode => true,
+        :move_standout_mode => true,
+        :msgr => true,
+        :ndscr => false,
+        :needs_xon_xoff => false,
+        :no_esc_ctlc => false,
+        :no_pad_char => true,
+        :non_dest_scroll_region => false,
+        :non_rev_rmcup => false,
+        :npc => true,
+        :nrrmc => false,
+        :nxon => false,
+        :op => "\e[39;49m",
+        :orig_pair => "\e[39;49m",
+        :os => false,
+        :over_strike => false,
+        :pairs => 64,
+        :parm_dch => "\e[%p1%dP",
+        :parm_delete_line => "\e[%p1%dM",
+        :parm_down_cursor => "\e[%p1%dB",
+        :parm_ich => "\e[%p1%d@",
+        :parm_index => "\e[%p1%dS",
+        :parm_insert_line => "\e[%p1%dL",
+        :parm_left_cursor => "\e[%p1%dD",
+        :parm_right_cursor => "\e[%p1%dC",
+        :parm_rindex => "\e[%p1%dT",
+        :parm_up_cursor => "\e[%p1%dA",
+        :print_screen => "\e[i",
+        :prtr_off => "\e[4i",
+        :prtr_on => "\e[5i",
+        :prtr_silent => true,
+        :rc => "\e8",
+        :rep => "%p1%c\e[%p2%{1}%-%db",
+        :repeat_char => "%p1%c\e[%p2%{1}%-%db",
+        :reset_1string => "\ec",
+        :reset_2string => "\e[!p\e[?3;4l\e[4l\e>",
+        :restore_cursor => "\e8",
+        :rev => "\e[7m",
+        :ri => "\eM",
+        :rin => "\e[%p1%dT",
+        :ritm => "\e[23m",
+        :rmacs => "\e(B",
+        :rmam => "\e[?7l",
+        :rmcup => "\e[?1049l\e[23;0;0t",
+        :rmir => "\e[4l",
+        :rmkx => "\e[?1l\e>",
+        :rmm => "\e[?1034l",
+        :rmso => "\e[27m",
+        :rmul => "\e[24m",
+        :rmxx => "\e[29m",
+        :row_addr_glitch => false,
+        :row_address => "\e[%i%p1%dd",
+        :rs1 => "\ec",
+        :rs2 => "\e[!p\e[?3;4l\e[4l\e>",
+        :sam => false,
+        :save_cursor => "\e7",
+        :sc => "\e7",
+        :scroll_forward => "\n",
+        :scroll_reverse => "\eM",
+        :semi_auto_right_margin => false,
+        :set_a_background => "\e[4%p1%dm",
+        :set_a_foreground => "\e[3%p1%dm",
+        :set_attributes => "%?%p9%t\e(0%e\e(B%;\e[0%?%p6%t;1%;%?%p5%t;2%;%?%p2%t;4%;%?%p1%p3%|%t;7%;%?%p4%t;5%;%?%p7%t;8%;m",
+        :set_background => "\e[4%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :set_foreground => "\e[3%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :set_lr_margin => "\e[?69h\e[%i%p1%d;%p2%ds",
+        :set_tab => "\eH",
+        :setab => "\e[4%p1%dm",
+        :setaf => "\e[3%p1%dm",
+        :setb => "\e[4%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :setf => "\e[3%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :sgr => "%?%p9%t\e(0%e\e(B%;\e[0%?%p6%t;1%;%?%p5%t;2%;%?%p2%t;4%;%?%p1%p3%|%t;7%;%?%p4%t;5%;%?%p7%t;8%;m",
+        :sgr0 => "\e(B\e[m",
+        :sitm => "\e[3m",
+        :smacs => "\e(0",
+        :smam => "\e[?7h",
+        :smcup => "\e[?1049h\e[22;0;0t",
+        :smglr => "\e[?69h\e[%i%p1%d;%p2%ds",
+        :smir => "\e[4h",
+        :smkx => "\e[?1h\e=",
+        :smm => "\e[?1034h",
+        :smso => "\e[7m",
+        :smul => "\e[4m",
+        :smxx => "\e[9m",
+        :status_line_esc_ok => false,
+        :tab => "\t",
+        :tbc => "\e[3g",
+        :tilde_glitch => false,
+        :transparent_underline => false,
+        :u6 => "\e[%i%d;%dR",
+        :u7 => "\e[6n",
+        :u8 => "\e[?%[;0123456789]c",
+        :u9 => "\e[c",
+        :ul => false,
+        :user6 => "\e[%i%d;%dR",
+        :user7 => "\e[6n",
+        :user8 => "\e[?%[;0123456789]c",
+        :user9 => "\e[c",
+        :vpa => "\e[%i%p1%dd",
+        :xenl => true,
+        :xhp => false,
+        :xhpa => false,
+        :xm => "\e[<%i%p3%d;%p1%d;%p2%d;%?%p4%tM%em%;",
+        :xon => false,
+        :xon_xoff => false,
+        :xsb => false,
+        :xt => false,
+        :xvpa => false)
+
+@testset "terminfo" begin
+    dumb = Base.TermInfo(read(IOBuffer(dumb_terminfo), Base.TermInfoRaw))
+    @test dumb.names == ["dumb", "80-column dumb tty"]
+    @test length(dumb.flags) == 2
+    @test length(dumb.numbers) == 1
+    @test length(dumb.strings) == 4
+    @test isnothing(dumb.extensions)
+    for (key, value) in dumb_capabilities
+        @test dumb[key] == value
+    end
+
+    xterm = Base.TermInfo(read(IOBuffer(xterm_terminfo), Base.TermInfoRaw))
+    @test xterm.names == ["xterm", "xterm terminal emulator (X Window System)"]
+    @test length(xterm.flags) == 40
+    @test length(xterm.numbers) == 15
+    @test length(xterm.strings) == 253
+    @test sort(xterm.extensions |> collect) == sort(xterm_extensions)
+    for (key, value) in xterm_capabilities
+        @test xterm[key] == value
+    end
+end
+
+end
diff --git a/test/testdefs.jl b/test/testdefs.jl
index 4aac988cda7fb..b4df86b62792d 100644
--- a/test/testdefs.jl
+++ b/test/testdefs.jl
@@ -2,9 +2,13 @@
 
 using Test, Random
 
+include("buildkitetestjson.jl")
+
 function runtests(name, path, isolate=true; seed=nothing)
-    old_print_setting = Test.TESTSET_PRINT_ENABLE[]
-    Test.TESTSET_PRINT_ENABLE[] = false
+    @Base.ScopedValues.with Test.TESTSET_PRINT_ENABLE=>false Test.TEST_RECORD_PASSES=>Base.get_bool_env("CI", false) begin
+    # remove all hint_handlers, so that errorshow tests are not changed by which packages have been loaded on this worker already
+    # packages that call register_error_hint should also call this again, and then re-add any hooks they want to test
+    empty!(Base.Experimental._hint_handlers)
     try
         if isolate
             # Simple enough to type and random enough so that no one will hard
@@ -20,13 +24,20 @@ function runtests(name, path, isolate=true; seed=nothing)
         end
         res_and_time_data = @timed @testset "$name" begin
             # Random.seed!(nothing) will fail
-            seed != nothing && Random.seed!(seed)
+            seed !== nothing && Random.seed!(seed)
 
             original_depot_path = copy(Base.DEPOT_PATH)
             original_load_path = copy(Base.LOAD_PATH)
             original_env = copy(ENV)
+            original_project = Base.active_project()
 
-            Base.include(m, "$path.jl")
+            try
+                Base.include(m, "$path.jl")
+            finally
+                if Base.get_bool_env("CI", false)
+                    BuildkiteTestJSON.serialize_testset_result_file(@__DIR__, Test.get_testset())
+                end
+            end
 
             if Base.DEPOT_PATH != original_depot_path
                 msg = "The `$(name)` test set mutated Base.DEPOT_PATH and did not restore the original values"
@@ -51,25 +62,38 @@ function runtests(name, path, isolate=true; seed=nothing)
                 error(msg)
             end
             if copy(ENV) != original_env
-                msg = "The `$(name)` test set mutated ENV and did not restore the original values"
-                @error(
-                    msg,
-                    testset_name = name,
-                    testset_path = path,
-                )
                 throw_error_str = get(ENV, "JULIA_TEST_CHECK_MUTATED_ENV", "true")
                 throw_error_b = parse(Bool, throw_error_str)
                 if throw_error_b
+                    msg = "The `$(name)` test set mutated ENV and did not restore the original values"
+                    @error(
+                        msg,
+                        testset_name = name,
+                        testset_path = path,
+                    )
                     error(msg)
                 end
             end
+            if Base.active_project() != original_project
+                msg = "The `$(name)` test set changed the active project and did not restore the original value"
+                @error(
+                    msg,
+                    original_project,
+                    Base.active_project(),
+                    testset_name = name,
+                    testset_path = path,
+                )
+                error(msg)
+            end
         end
         rss = Sys.maxrss()
         #res_and_time_data[1] is the testset
         ts = res_and_time_data[1]
-        passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = Test.get_test_counts(ts)
+        tc = Test.get_test_counts(ts)
         # simplify our stored data to just contain the counts
-        res_and_time_data = (TestSetException(passes+c_passes, fails+c_fails, errors+c_errors, broken+c_broken, Test.filter_errors(ts)),
+        res_and_time_data = (TestSetException(tc.passes+tc.cumulative_passes, tc.fails+tc.cumulative_fails,
+                             tc.errors+tc.cumulative_errors, tc.broken+tc.cumulative_broken,
+                             Test.filter_errors(ts)),
                              res_and_time_data[2],
                              res_and_time_data[3],
                              res_and_time_data[4],
@@ -77,10 +101,10 @@ function runtests(name, path, isolate=true; seed=nothing)
                              rss)
         return res_and_time_data
     catch ex
-        Test.TESTSET_PRINT_ENABLE[] = old_print_setting
         ex isa TestSetException || rethrow()
         return Any[ex]
     end
+    end # TESET_PRINT_ENABLE
 end
 
 # looking in . messes things up badly
diff --git a/test/testenv.jl b/test/testenv.jl
index 41706dd24e75e..3ef1126e0e927 100644
--- a/test/testenv.jl
+++ b/test/testenv.jl
@@ -35,8 +35,14 @@ if !@isdefined(testenv_defined)
         const rr_exename = ``
     end
 
+    const test_relocated_depot = haskey(ENV, "RELOCATEDEPOT")
+
     function addprocs_with_testenv(X; rr_allowed=true, kwargs...)
         exename = rr_allowed ? `$rr_exename $test_exename` : test_exename
+        if X isa Integer
+            heap_size=round(Int,(Sys.total_memory()/(1024^2)/(X+1)))
+            push!(test_exeflags.exec, "--heap-size-hint=$(heap_size)M")
+        end
         addprocs(X; exename=exename, exeflags=test_exeflags, kwargs...)
     end
 
diff --git a/test/testhelpers/ChallengePrompts.jl b/test/testhelpers/ChallengePrompts.jl
new file mode 100644
index 0000000000000..10dd1553afbbd
--- /dev/null
+++ b/test/testhelpers/ChallengePrompts.jl
@@ -0,0 +1,123 @@
+module ChallengePrompts
+
+include("FakePTYs.jl")
+using .FakePTYs: with_fake_pty
+using Serialization: serialize, deserialize
+
+const timeout = 60
+
+"""
+    challenge_prompt(code::Expr, challenges; pkgs=[])
+
+Execute the passed code in a separate process, looking for
+the passed prompts and responding as defined in the pairs of
+(prompt, response) in the collection of challenges.
+
+Optionally `import` the given `pkgs`.
+
+Returns the value of the last expression.
+"""
+function challenge_prompt(code::Expr, challenges; pkgs=[])
+    input_code = tempname()
+    open(input_code, "w") do fp
+        serialize(fp, code)
+    end
+    output_file = tempname()
+    torun = """
+        $(isempty(pkgs) ? "" : string("import ", join(pkgs, ", ")))
+        using Serialization
+        result = open($(repr(input_code))) do fp
+            eval(deserialize(fp))
+        end
+        open($(repr(output_file)), "w") do fp
+            serialize(fp, result)
+        end"""
+    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
+    try
+        challenge_prompt(cmd, challenges)
+        return open(output_file, "r") do fp
+            deserialize(fp)
+        end
+    finally
+        isfile(output_file) && rm(output_file)
+        isfile(input_code) && rm(input_code)
+    end
+    return nothing
+end
+
+function challenge_prompt(cmd::Cmd, challenges)
+    function format_output(output)
+        str = read(seekstart(output), String)
+        isempty(str) && return ""
+        return "Process output found:\n\"\"\"\n$str\n\"\"\""
+    end
+    out = IOBuffer()
+    with_fake_pty() do pts, ptm
+        p = run(detach(cmd), pts, pts, pts, wait=false) # getpass uses stderr by default
+        Base.close_stdio(pts)
+
+        # Kill the process if it takes too long. Typically occurs when process is waiting
+        # for input.
+        timer = Channel{Symbol}(1)
+        watcher = @async begin
+            waited = 0
+            while waited < timeout && process_running(p)
+                sleep(1)
+                waited += 1
+            end
+
+            if process_running(p)
+                kill(p)
+                put!(timer, :timeout)
+            elseif success(p)
+                put!(timer, :success)
+            else
+                put!(timer, :failure)
+            end
+
+            # SIGKILL stubborn processes
+            if process_running(p)
+                sleep(3)
+                process_running(p) && kill(p, Base.SIGKILL)
+            end
+            wait(p)
+        end
+
+        wroteall = false
+        try
+            for (challenge, response) in challenges
+                write(out, readuntil(ptm, challenge, keep=true))
+                if !isopen(ptm)
+                    error("Could not locate challenge: \"$challenge\". ",
+                          format_output(out))
+                end
+                write(ptm, response)
+            end
+            wroteall = true
+
+            # Capture output from process until `pts` is closed
+            write(out, ptm)
+        catch ex
+            if !(wroteall && ex isa Base.IOError && ex.code == Base.UV_EIO)
+                # ignore EIO from `ptm` after `pts` dies
+                error("Process failed possibly waiting for a response. ",
+                      format_output(out))
+            end
+        end
+
+        status = fetch(timer)
+        close(ptm)
+        if status !== :success
+            if status === :timeout
+                error("Process timed out possibly waiting for a response. ",
+                      format_output(out))
+            else
+                error("Failed process. ", format_output(out), "\n", p)
+            end
+        end
+        wait(watcher)
+    end
+    nothing
+end
+
+end
diff --git a/test/testhelpers/DualNumbers.jl b/test/testhelpers/DualNumbers.jl
index 9f62e3bf0d429..5c481aef47f76 100644
--- a/test/testhelpers/DualNumbers.jl
+++ b/test/testhelpers/DualNumbers.jl
@@ -41,6 +41,6 @@ Base.sqrt(x::Dual) = Dual(sqrt(x.val), x.eps/(2sqrt(x.val)))
 Base.isless(x::Dual, y::Dual) = x.val < y.val
 Base.isless(x::Real, y::Dual) = x < y.val
 Base.isinf(x::Dual) = isinf(x.val) & isfinite(x.eps)
-Base.real(x::Dual) = x # since we curently only consider Dual{<:Real}
+Base.real(x::Dual) = x # since we currently only consider Dual{<:Real}
 
 end # module
diff --git a/test/testhelpers/EvenIntegers.jl b/test/testhelpers/EvenIntegers.jl
new file mode 100644
index 0000000000000..2926d1ce65109
--- /dev/null
+++ b/test/testhelpers/EvenIntegers.jl
@@ -0,0 +1,87 @@
+"""
+The even integers, an example of set with an additive identity and closed under
+addition and multiplication, but lacking a multiplicative identity, a
+[*rng*](https://en.wikipedia.org/wiki/Rng_(algebra)).
+"""
+module EvenIntegers
+    export EvenInteger
+
+    struct EvenInteger{T <: Integer} <: Integer
+        x::T
+        function EvenInteger(x::Integer)
+            if isodd(x)
+                throw(ArgumentError("can't convert odd integer to even integer"))
+            end
+            new{typeof(x)}(x)
+        end
+    end
+    function EvenInteger(x::EvenInteger)
+        x
+    end
+    function EvenInteger{T}(x::EvenInteger{T}) where {T <: Integer}
+        x
+    end
+    function EvenInteger{T}(x::T) where {T <: Integer}
+        EvenInteger(x)
+    end
+    function EvenInteger{T}(x::Integer) where {T <: Integer}
+        throw(ArgumentError("not implemented"))
+    end
+    function Base.Int(n::EvenInteger)
+        Int(n.x)
+    end
+    function Base.iseven(::EvenInteger)
+        true
+    end
+    function Base.isodd(::EvenInteger)
+        false
+    end
+    function Base.iszero(n::EvenInteger)
+        iszero(n.x)
+    end
+    function Base.isone(::EvenInteger)
+        false
+    end
+    function Base.zero(n::EvenInteger)
+        EvenInteger(zero(n.x))
+    end
+    function Base.zero(::Type{EvenInteger{T}}) where {T <: Integer}
+        EvenInteger(zero(T))
+    end
+    function Base.:(==)(l::EvenInteger, r::EvenInteger)
+        l.x == r.x
+    end
+    function Base.:(<)(l::EvenInteger, r::EvenInteger)
+        l.x < r.x
+    end
+    function Base.promote_rule(::Type{EvenInteger{L}}, ::Type{EvenInteger{R}}) where {L <: Integer, R <: Integer}
+        EvenInteger{promote_type(L, R)}
+    end
+    function Base.promote_rule(::Type{EvenInteger{L}}, ::Type{R}) where {L <: Integer, R <: Integer}
+        promote_type(L, R)
+    end
+    function Base.:(+)(l::EvenInteger, r::EvenInteger)
+        EvenInteger(l.x + r.x)
+    end
+    function Base.:(*)(l::EvenInteger, r::EvenInteger)
+        EvenInteger(l.x * r.x)
+    end
+    function Base.:(-)(n::EvenInteger)
+        EvenInteger(-n.x)
+    end
+    function Base.:(-)(l::EvenInteger, r::EvenInteger)
+        l + (-r)
+    end
+    function right_shift(l::EvenInteger, r::Integer)
+        l.x >> r
+    end
+    function Base.:(>>)(l::EvenInteger, r::Integer)
+        right_shift(l, r)
+    end
+    function Base.:(>>)(l::EvenInteger, r::Int)  # resolve dispatch ambiguity
+        right_shift(l, r)
+    end
+    function Base.trailing_zeros(n::EvenInteger)
+        trailing_zeros(n.x)
+    end
+end
diff --git a/test/testhelpers/FakePTYs.jl b/test/testhelpers/FakePTYs.jl
index c592699440ee0..56ce6dc7d3a49 100644
--- a/test/testhelpers/FakePTYs.jl
+++ b/test/testhelpers/FakePTYs.jl
@@ -1,5 +1,4 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
 module FakePTYs
 
 if Sys.iswindows()
@@ -24,10 +23,7 @@ function open_fake_pty()
         close(pts)
         pts = fds
         # convert pts handle to a TTY
-        #fds = pts.handle
-        #pts.status = Base.StatusClosed
-        #pts.handle = C_NULL
-        #pts = Base.TTY(fds, Base.StatusOpen)
+        #pts = open(fds)::Base.TTY
     else
         O_RDWR = Base.Filesystem.JL_O_RDWR
         O_NOCTTY = Base.Filesystem.JL_O_NOCTTY
@@ -44,8 +40,9 @@ function open_fake_pty()
         pts = RawFD(fds)
 
         # pts = fdio(fds, true)
-        # pts = Base.Filesystem.File(RawFD(fds))
-        # pts = Base.TTY(RawFD(fds); readable = false)
+        # pts = Base.Filesystem.File(pts)
+        # pts = Base.TTY(pts)
+        # pts = Base.open(pts)
         ptm = Base.TTY(RawFD(fdm))
     end
     return pts, ptm
diff --git a/test/testhelpers/FillArrays.jl b/test/testhelpers/FillArrays.jl
index 1f36a77bf8c12..d3b8d74da7148 100644
--- a/test/testhelpers/FillArrays.jl
+++ b/test/testhelpers/FillArrays.jl
@@ -9,6 +9,10 @@ Fill(v, size::Vararg{Integer}) = Fill(v, size)
 
 Base.size(F::Fill) = F.size
 
+Base.copy(F::Fill) = F
+
+Base.AbstractArray{T,N}(F::Fill{<:Any,N}) where {T,N} = Fill(T(F.value), F.size)
+
 @inline getindex_value(F::Fill) = F.value
 
 @inline function Base.getindex(F::Fill{<:Any,N}, i::Vararg{Int,N}) where {N}
@@ -27,7 +31,36 @@ end
     F
 end
 
+Base.zero(F::Fill) = Fill(zero(F.value), size(F))
+
 Base.show(io::IO, F::Fill) = print(io, "Fill($(F.value), $(F.size))")
 Base.show(io::IO, ::MIME"text/plain", F::Fill) = show(io, F)
 
+_first_or_one(t::Tuple) = t[1]
+_first_or_one(t::Tuple{}) = 1
+
+_match_size(sz::Tuple{}, inner::Tuple{}, outer::Tuple{}) = ()
+function _match_size(sz::Tuple, inner::Tuple, outer::Tuple)
+    t1 = (_first_or_one(sz), _first_or_one(inner), _first_or_one(outer))
+    t2 = _match_size(sz[2:end], inner[2:end], outer[2:end])
+    (t1, t2...)
+end
+
+function _repeat_size(sz::Tuple, inner::Tuple, outer::Tuple)
+    t = _match_size(sz, inner, outer)
+    map(*, getindex.(t, 1), getindex.(t, 2), getindex.(t, 3))
+end
+
+function Base.repeat(A::Fill; inner=ntuple(x->1, ndims(A)), outer=ntuple(x->1, ndims(A)))
+    Base.require_one_based_indexing(A)
+    length(inner) >= ndims(A) ||
+        throw(ArgumentError("number of inner repetitions $(length(inner)) cannot be "*
+            "less than number of dimensions of input array $(ndims(A))"))
+    length(outer) >= ndims(A) ||
+        throw(ArgumentError("number of outer repetitions $(length(outer)) cannot be "*
+            "less than number of dimensions of input array $(ndims(A))"))
+    sz = _repeat_size(size(A), Tuple(inner), Tuple(outer))
+    Fill(getindex_value(A), sz)
+end
+
 end
diff --git a/test/testhelpers/Furlongs.jl b/test/testhelpers/Furlongs.jl
index f63b5460c7c16..3ddf42bf1a82c 100644
--- a/test/testhelpers/Furlongs.jl
+++ b/test/testhelpers/Furlongs.jl
@@ -99,5 +99,12 @@ for op in (:rem, :mod)
     end
 end
 Base.sqrt(x::Furlong) = _div(sqrt(x.val), x, Val(2))
+Base.muladd(x::Furlong, y::Furlong, z::Furlong) = x*y + z
+Base.muladd(x::Furlong, y::Number, z::Number) = x*y + z
+Base.muladd(x::Furlong, y::Furlong, z::Number) = x*y + z
+Base.muladd(x::Number, y::Furlong, z::Number) = x*y + z
+Base.muladd(x::Number, y::Number, z::Furlong) = x*y + z
+Base.muladd(x::Number, y::Furlong, z::Furlong) = x*y + z
+Base.muladd(x::Furlong, y::Number, z::Furlong) = x*y + z
 
 end
diff --git a/test/testhelpers/ImmutableArrays.jl b/test/testhelpers/ImmutableArrays.jl
index df2a78387e07b..8f2d23be3a7a7 100644
--- a/test/testhelpers/ImmutableArrays.jl
+++ b/test/testhelpers/ImmutableArrays.jl
@@ -25,4 +25,7 @@ Base.getindex(A::ImmutableArray, i...) = getindex(A.data, i...)
 AbstractArray{T}(A::ImmutableArray) where {T} = ImmutableArray(AbstractArray{T}(A.data))
 AbstractArray{T,N}(A::ImmutableArray{S,N}) where {S,T,N} = ImmutableArray(AbstractArray{T,N}(A.data))
 
+Base.copy(A::ImmutableArray) = ImmutableArray(copy(A.data))
+Base.zero(A::ImmutableArray) = ImmutableArray(zero(A.data))
+
 end
diff --git a/test/testhelpers/InfiniteArrays.jl b/test/testhelpers/InfiniteArrays.jl
index 14b2e56daf1c6..e1cf38c104167 100644
--- a/test/testhelpers/InfiniteArrays.jl
+++ b/test/testhelpers/InfiniteArrays.jl
@@ -31,13 +31,13 @@ Base.max(::Infinity, ::Int) = Infinity()
 Base.max(::Int, ::Infinity) = Infinity()
 
 """
-    OneToInf(n)
+    OneToInf()
 
 Define an `AbstractInfUnitRange` that behaves like `1:∞`, with the added
 distinction that the limits are guaranteed (by the type system) to
 be 1 and ∞.
 """
-struct OneToInf{T<:Integer} <: AbstractUnitRange{T} end
+struct OneToInf{T<:Integer} <: Base.AbstractOneTo{T} end
 
 OneToInf() = OneToInf{Int}()
 
@@ -48,5 +48,6 @@ Base.length(r::OneToInf) = Infinity()
 Base.last(r::OneToInf) = Infinity()
 Base.unitrange(r::OneToInf) = r
 Base.oneto(::Infinity) = OneToInf()
+Base.unchecked_oneto(::Infinity) = OneToInf()
 
 end
diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl
index 705bd07b2878c..5acaa88064245 100644
--- a/test/testhelpers/OffsetArrays.jl
+++ b/test/testhelpers/OffsetArrays.jl
@@ -5,7 +5,7 @@
 # This test file is designed to exercise support for generic indexing,
 # even though offset arrays aren't implemented in Base.
 
-# OffsetArrays v1.11.2
+# OffsetArrays v1.15.0
 # No compat patch and docstrings
 module OffsetArrays
 
@@ -73,10 +73,15 @@ end
 IdOffsetRange(r::IdOffsetRange) = r
 
 # Constructor to make `show` round-trippable
+# try to preserve typeof(values) if the indices are known to be 1-based
+_subtractindexoffset(values, indices::Union{Base.OneTo, IdentityUnitRange{<:Base.OneTo}}, offset) = values
+_subtractindexoffset(values, indices, offset) = _subtractoffset(values, offset)
 function IdOffsetRange(; values::AbstractUnitRange{<:Integer}, indices::AbstractUnitRange{<:Integer})
     length(values) == length(indices) || throw(ArgumentError("values and indices must have the same length"))
+    values_nooffset = no_offset_view(values)
     offset = first(indices) - 1
-    return IdOffsetRange(values .- offset, offset)
+    values_minus_offset = _subtractindexoffset(values_nooffset, indices, offset)
+    return IdOffsetRange(values_minus_offset, offset)
 end
 
 # Conversions to an AbstractUnitRange{Int} (and to an OrdinalRange{Int,Int} on Julia v"1.6") are necessary
@@ -110,12 +115,19 @@ offset_coerce(::Type{I}, r::AbstractUnitRange) where I<:AbstractUnitRange =
 @inline Base.unsafe_indices(r::IdOffsetRange) = (Base.axes1(r),)
 @inline Base.length(r::IdOffsetRange) = length(r.parent)
 @inline Base.isempty(r::IdOffsetRange) = isempty(r.parent)
+#= We specialize on reduced_indices to work around cases where the parent axis type doesn't
+support reduced_index, but the axes do support reduced_indices
+The difference is that reduced_index expects the axis type to remain unchanged,
+which may not always be possible, eg. for statically sized axes
+See https://github.com/JuliaArrays/OffsetArrays.jl/issues/204
+=#
+function Base.reduced_indices(inds::Tuple{IdOffsetRange, Vararg{IdOffsetRange}}, d::Int)
+    parents_reduced = Base.reduced_indices(map(parent, inds), d)
+    ntuple(i -> IdOffsetRange(parents_reduced[i], inds[i].offset), Val(length(inds)))
+end
 Base.reduced_index(i::IdOffsetRange) = typeof(i)(first(i):first(i))
 # Workaround for #92 on Julia < 1.4
 Base.reduced_index(i::IdentityUnitRange{<:IdOffsetRange}) = typeof(i)(first(i):first(i))
-for f in [:firstindex, :lastindex]
-    @eval @inline Base.$f(r::IdOffsetRange) = $f(r.parent) + r.offset
-end
 for f in [:first, :last]
     # coerce the type to deal with values that get promoted on addition (eg. Bool)
     @eval @inline Base.$f(r::IdOffsetRange) = eltype(r)($f(r.parent) + r.offset)
@@ -186,17 +198,20 @@ for R in [:IIUR, :IdOffsetRange]
 end
 
 # offset-preserve broadcasting
-Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(-), r::IdOffsetRange{T}, x::Integer) where T =
-    IdOffsetRange{T}(r.parent .- x, r.offset)
-Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), r::IdOffsetRange{T}, x::Integer) where T =
-    IdOffsetRange{T}(r.parent .+ x, r.offset)
-Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::IdOffsetRange{T}) where T =
-    IdOffsetRange{T}(x .+ r.parent, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(-), r::IdOffsetRange, x::Integer) =
+    IdOffsetRange(r.parent .- x, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), r::IdOffsetRange, x::Integer) =
+    IdOffsetRange(r.parent .+ x, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::IdOffsetRange) =
+    IdOffsetRange(x .+ r.parent, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(big), r::IdOffsetRange) =
+    IdOffsetRange(big.(r.parent), r.offset)
 
 Base.show(io::IO, r::IdOffsetRange) = print(io, IdOffsetRange, "(values=",first(r), ':', last(r),", indices=",first(eachindex(r)),':',last(eachindex(r)), ")")
 
 # Optimizations
 @inline Base.checkindex(::Type{Bool}, inds::IdOffsetRange, i::Real) = Base.checkindex(Bool, inds.parent, i - inds.offset)
+Base._firstslice(i::IdOffsetRange) = IdOffsetRange(Base._firstslice(i.parent), i.offset)
 
 ########################################################################################################
 # origin.jl
@@ -308,12 +323,12 @@ _popreshape(A::AbstractArray, ax, inds) = A
 
 # Technically we know the length of CartesianIndices but we need to convert it first, so here we
 # don't put it in OffsetAxisKnownLength.
-const OffsetAxisKnownLength = Union{Integer,AbstractUnitRange}
-const OffsetAxis = Union{OffsetAxisKnownLength,Colon}
-const ArrayInitializer = Union{UndefInitializer,Missing,Nothing}
+const OffsetAxisKnownLength = Union{Integer, AbstractUnitRange}
+const OffsetAxis = Union{OffsetAxisKnownLength, Colon}
+const ArrayInitializer = Union{UndefInitializer, Missing, Nothing}
 
 ## OffsetArray
-struct OffsetArray{T,N,AA<:AbstractArray} <: AbstractArray{T,N}
+struct OffsetArray{T,N,AA<:AbstractArray{T,N}} <: AbstractArray{T,N}
     parent::AA
     offsets::NTuple{N,Int}
     @inline function OffsetArray{T, N, AA}(parent::AA, offsets::NTuple{N, Int}; checkoverflow = true) where {T, N, AA<:AbstractArray{T,N}}
@@ -481,6 +496,10 @@ Base.parent(A::OffsetArray) = A.parent
 # Base.Broadcast.BroadcastStyle(::Type{<:OffsetArray{<:Any, <:Any, AA}}) where AA = Base.Broadcast.BroadcastStyle(AA)
 
 @inline Base.size(A::OffsetArray) = size(parent(A))
+# specializing length isn't necessary, as length(A) = prod(size(A)),
+# but specializing length enables constant-propagation for statically sized arrays
+# see https://github.com/JuliaArrays/OffsetArrays.jl/pull/304
+@inline Base.length(A::OffsetArray) = length(parent(A))
 
 @inline Base.axes(A::OffsetArray) = map(IdOffsetRange, axes(parent(A)), A.offsets)
 @inline Base.axes(A::OffsetArray, d) = d <= ndims(A) ? IdOffsetRange(axes(parent(A), d), A.offsets[d]) : IdOffsetRange(axes(parent(A), d))
@@ -527,8 +546,10 @@ _similar_axes_or_length(A, T, ax::I, ::I) where {I} = similar(A, T, map(_indexle
 _similar_axes_or_length(AT, ax::I, ::I) where {I} = similar(AT, map(_indexlength, ax))
 
 # reshape accepts a single colon
-Base.reshape(A::AbstractArray, inds::OffsetAxis...) = reshape(A, inds)
-function Base.reshape(A::AbstractArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}})
+# this method is limited to AbstractUnitRange{<:Integer} to avoid method overwritten errors if Base defines the same,
+# see https://github.com/JuliaLang/julia/pull/56850
+Base.reshape(A::AbstractArray, inds::Union{Integer, Colon, AbstractUnitRange{<:Integer}}...) = reshape(A, inds)
+function Base.reshape(A::AbstractArray, inds::Tuple{Vararg{OffsetAxis}})
     AR = reshape(no_offset_view(A), map(_indexlength, inds))
     O = OffsetArray(AR, map(_offset, axes(AR), inds))
     return _popreshape(O, axes(AR), _filterreshapeinds(inds))
@@ -552,36 +573,14 @@ _reshape2(A, inds) = reshape(A, inds)
 _reshape2(A::OffsetArray, inds) = reshape(parent(A), inds)
 _reshape_nov(A, inds) = _reshape(no_offset_view(A), inds)
 
-Base.reshape(A::OffsetArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}}) =
-    OffsetArray(_reshape(parent(A), inds), map(_toaxis, inds))
 # And for non-offset axes, we can just return a reshape of the parent directly
-Base.reshape(A::OffsetArray, inds::Tuple{Union{Integer,Base.OneTo},Vararg{Union{Integer,Base.OneTo}}}) = _reshape_nov(A, inds)
+Base.reshape(A::OffsetArray, inds::Tuple{Integer,Vararg{Integer}}) = _reshape_nov(A, inds)
 Base.reshape(A::OffsetArray, inds::Dims) = _reshape_nov(A, inds)
-Base.reshape(A::OffsetVector, ::Colon) = A
-Base.reshape(A::OffsetVector, ::Tuple{Colon}) = A
-Base.reshape(A::OffsetArray, ::Colon) = reshape(A, (Colon(),))
-Base.reshape(A::OffsetArray, inds::Union{Int,Colon}...) = reshape(A, inds)
-Base.reshape(A::OffsetArray, inds::Tuple{Vararg{Union{Int,Colon}}}) = _reshape_nov(A, inds)
-# The following two additional methods for Colon are added to resolve method ambiguities to
-# Base: https://github.com/JuliaLang/julia/pull/45387#issuecomment-1132859663
-Base.reshape(A::OffsetArray, inds::Colon) = _reshape_nov(A, inds)
-Base.reshape(A::OffsetArray, inds::Tuple{Colon}) = _reshape_nov(A, inds)
 
 # permutedims in Base does not preserve axes, and can not be fixed in a non-breaking way
 # This is a stopgap solution
 Base.permutedims(v::OffsetVector) = reshape(v, (1, axes(v, 1)))
 
-Base.fill(v, inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {N} =
-    fill!(similar(Array{typeof(v)}, inds), v)
-Base.zeros(::Type{T}, inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {T, N} =
-    fill!(similar(Array{T}, inds), zero(T))
-Base.ones(::Type{T}, inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {T, N} =
-    fill!(similar(Array{T}, inds), one(T))
-Base.trues(inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {N} =
-    fill!(similar(BitArray, inds), true)
-Base.falses(inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {N} =
-    fill!(similar(BitArray, inds), false)
-
 Base.zero(A::OffsetArray) = parent_call(zero, A)
 Base.fill!(A::OffsetArray, x) = parent_call(Ap -> fill!(Ap, x), A)
 
@@ -641,7 +640,7 @@ Base.copy(A::OffsetArray) = parent_call(copy, A)
 
 Base.strides(A::OffsetArray) = strides(parent(A))
 Base.elsize(::Type{OffsetArray{T,N,A}}) where {T,N,A} = Base.elsize(A)
-@inline Base.unsafe_convert(::Type{Ptr{T}}, A::OffsetArray{T}) where {T} = Base.unsafe_convert(Ptr{T}, parent(A))
+Base.cconvert(P::Type{Ptr{T}}, A::OffsetArray{T}) where {T} = Base.cconvert(P, parent(A))
 
 # For fast broadcasting: ref https://discourse.julialang.org/t/why-is-there-a-performance-hit-on-broadcasting-with-offsetarrays/32194
 Base.dataids(A::OffsetArray) = Base.dataids(parent(A))
@@ -741,15 +740,6 @@ if eltype(IIUR) === Int
     Base.map(::Type{T}, r::IdentityUnitRange) where {T<:Real} = _indexedby(map(T, UnitRange(r)), axes(r))
 end
 
-# mapreduce is faster with an IdOffsetRange than with an OffsetUnitRange
-# We therefore convert OffsetUnitRanges to IdOffsetRanges with the same values and axes
-function Base.mapreduce(f, op, A1::OffsetUnitRange{<:Integer}, As::OffsetUnitRange{<:Integer}...; kw...)
-    As = (A1, As...)
-    ofs = map(A -> first(axes(A,1)) - 1, As)
-    AIds = map((A, of) -> IdOffsetRange(_subtractoffset(parent(A), of), of), As, ofs)
-    mapreduce(f, op, AIds...; kw...)
-end
-
 # Optimize certain reductions that treat an OffsetVector as a list
 for f in [:minimum, :maximum, :extrema, :sum]
     @eval Base.$f(r::OffsetRange) = $f(parent(r))
@@ -771,7 +761,8 @@ Base.append!(A::OffsetVector, items) = (append!(A.parent, items); A)
 Base.empty!(A::OffsetVector) = (empty!(A.parent); A)
 
 # These functions keep the summary compact
-function Base.inds2string(inds::Tuple{Vararg{Union{IdOffsetRange, IdentityUnitRange{<:IdOffsetRange}}}})
+const OffsetIndices = Union{IdOffsetRange, IdentityUnitRange{<:IdOffsetRange}}
+function Base.inds2string(inds::Tuple{OffsetIndices, Vararg{OffsetIndices}})
     Base.inds2string(map(UnitRange, inds))
 end
 Base.showindices(io::IO, ind1::IdOffsetRange, inds::IdOffsetRange...) = Base.showindices(io, map(UnitRange, (ind1, inds...))...)
@@ -795,7 +786,33 @@ function Base.replace_in_print_matrix(A::OffsetArray{<:Any,1}, i::Integer, j::In
     Base.replace_in_print_matrix(parent(A), ip, j, s)
 end
 
+# Actual unsafe_wrap implementation
+@inline function _unsafe_wrap(pointer::Ptr{T}, inds::NTuple{N, OffsetAxisKnownLength}; own = false, kw...) where {T,N}
+    _checkindices(N, inds, "indices")
+    AA = Base.unsafe_wrap(Array, pointer, map(_indexlength, inds); own=own)
+    OffsetArray{T, N, typeof(AA)}(AA, map(_indexoffset, inds); kw...)
+end
+const OffsetArrayUnion{T,N} = Union{Type{OffsetArray}, Type{OffsetArray{T}}, Type{OffsetArray{T,N}}, Type{OffsetArray{T1, N} where T1}} where {T,N}
+
+@inline function Base.unsafe_wrap(::OffsetArrayUnion{T,N}, pointer::Ptr{T}, inds::NTuple{N, OffsetAxisKnownLength}; kw...) where {T,N}
+    _unsafe_wrap(pointer, inds; kw...)
+end
+# Avoid ambiguity
+@inline function Base.unsafe_wrap(::OffsetArrayUnion{T,N}, pointer::Ptr{T}, inds::NTuple{N, <:Integer}; kw...) where {T,N}
+    _unsafe_wrap(pointer, inds; kw...)
+end
+@inline function Base.unsafe_wrap(::OffsetArrayUnion{T,N}, pointer::Ptr{T}, inds::Vararg{OffsetAxisKnownLength,N}; kw...) where {T,N}
+    _unsafe_wrap(pointer, inds; kw...)
+end
+# Avoid ambiguity
+@inline function Base.unsafe_wrap(::OffsetArrayUnion{T,N}, pointer::Ptr{T}, inds::Vararg{Integer,N}; kw...) where {T,N}
+    _unsafe_wrap(pointer, inds; kw...)
+end
+
 no_offset_view(A::OffsetArray) = no_offset_view(parent(A))
+no_offset_view(a::Base.Slice{<:Base.OneTo}) = a
+no_offset_view(a::Base.Slice) = Base.Slice(UnitRange(a))
+no_offset_view(S::SubArray) = view(parent(S), map(no_offset_view, parentindices(S))...)
 no_offset_view(a::Array) = a
 no_offset_view(i::Number) = i
 no_offset_view(A::AbstractArray) = _no_offset_view(axes(A), A)
@@ -811,9 +828,12 @@ _no_offset_view(::Any, A::AbstractUnitRange) = UnitRange(A)
 # These two helpers are deliberately not exported; their meaning can be very different in
 # other scenarios and will be very likely to cause name conflicts if exported.
 #####
+
+_halfroundInt(v, r::RoundingMode) = div(v, 2, r)
+
 function center(A::AbstractArray, r::RoundingMode=RoundDown)
     map(axes(A)) do inds
-        round(Int, (length(inds)-1)/2, r) + first(inds)
+        _halfroundInt(length(inds)-1, r) + first(inds)
     end
 end
 
@@ -821,17 +841,6 @@ centered(A::AbstractArray, cp::Dims=center(A)) = OffsetArray(A, .-cp)
 
 centered(A::AbstractArray, i::CartesianIndex) = centered(A, Tuple(i))
 
-# we may pass the searchsorted* functions to the parent, and wrap the offset
-for f in [:searchsortedfirst, :searchsortedlast, :searchsorted]
-    _safe_f = Symbol("_safe_" * String(f))
-    @eval function $_safe_f(v::OffsetArray, x, ilo, ihi, o::Base.Ordering)
-        offset = firstindex(v) - firstindex(parent(v))
-        $f(parent(v), x, ilo - offset, ihi - offset, o) .+ offset
-    end
-    @eval Base.$f(v::OffsetVector, x, ilo::T, ihi::T, o::Base.Ordering) where T<:Integer =
-        $_safe_f(v, x, ilo, ihi, o)
-end
-
 ##
 # Deprecations
 ##
diff --git a/test/testhelpers/OffsetDenseArrays.jl b/test/testhelpers/OffsetDenseArrays.jl
new file mode 100644
index 0000000000000..fb256234e2099
--- /dev/null
+++ b/test/testhelpers/OffsetDenseArrays.jl
@@ -0,0 +1,31 @@
+"""
+    module OffsetDenseArrays
+
+A minimal implementation of an offset array which is also <: DenseArray.
+"""
+module OffsetDenseArrays
+
+struct OffsetDenseArray{A <: DenseVector, T} <: DenseVector{T}
+    x::A
+    offset::Int
+end
+OffsetDenseArray(x::AbstractVector{T}, i::Integer) where {T} = OffsetDenseArray{typeof(x), T}(x, Int(i))
+
+Base.size(x::OffsetDenseArray) = size(x.x)
+Base.pointer(x::OffsetDenseArray) = pointer(x.x)
+
+function Base.getindex(x::OffsetDenseArray, i::Integer)
+    @boundscheck checkbounds(x.x, i - x.offset)
+    x.x[i - x.offset]
+end
+
+function Base.setindex(x::OffsetDenseArray, v, i::Integer)
+    @boundscheck checkbounds(x.x, i - x.offset)
+    x.x[i - x.offset] = v
+end
+
+Base.IndexStyle(::Type{<:OffsetDenseArray}) = Base.IndexLinear()
+Base.axes(x::OffsetDenseArray) = (x.offset + 1 : x.offset + length(x.x),)
+Base.keys(x::OffsetDenseArray) = only(axes(x))
+
+end # module
diff --git a/test/testhelpers/Quaternions.jl b/test/testhelpers/Quaternions.jl
index 1eddad322ec40..b1a414266bb34 100644
--- a/test/testhelpers/Quaternions.jl
+++ b/test/testhelpers/Quaternions.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 module Quaternions
+using LinearAlgebra
 using Random
 
 export Quaternion
@@ -13,16 +14,23 @@ struct Quaternion{T<:Real} <: Number
     v2::T
     v3::T
 end
+Quaternion{T}(s::Real) where {T<:Real} = Quaternion{T}(T(s), zero(T), zero(T), zero(T))
 Quaternion(s::Real, v1::Real, v2::Real, v3::Real) = Quaternion(promote(s, v1, v2, v3)...)
 Base.convert(::Type{Quaternion{T}}, s::Real) where {T <: Real} =
     Quaternion{T}(convert(T, s), zero(T), zero(T), zero(T))
+Base.promote_rule(::Type{Quaternion{T}}, ::Type{S}) where {T <: Real, S <: Real} =
+    Quaternion{promote_type(T, S)}
 Base.abs2(q::Quaternion) = q.s*q.s + q.v1*q.v1 + q.v2*q.v2 + q.v3*q.v3
 Base.float(z::Quaternion{T}) where T = Quaternion(float(z.s), float(z.v1), float(z.v2), float(z.v3))
 Base.abs(q::Quaternion) = sqrt(abs2(q))
 Base.real(::Type{Quaternion{T}}) where {T} = T
+Base.real(q::Quaternion) = q.s
 Base.conj(q::Quaternion) = Quaternion(q.s, -q.v1, -q.v2, -q.v3)
 Base.isfinite(q::Quaternion) = isfinite(q.s) & isfinite(q.v1) & isfinite(q.v2) & isfinite(q.v3)
+Base.isreal(q::Quaternion) = iszero(q.v1) & iszero(q.v2) & iszero(q.v3)
 Base.zero(::Type{Quaternion{T}}) where T = Quaternion{T}(zero(T), zero(T), zero(T), zero(T))
+# avoid defining sqrt(::Quaternion)
+LinearAlgebra.choltype(::AbstractArray{Quaternion{T}}) where T = Quaternion{promote_type(T, Float32)}
 
 Base.:(+)(ql::Quaternion, qr::Quaternion) =
  Quaternion(ql.s + qr.s, ql.v1 + qr.v1, ql.v2 + qr.v2, ql.v3 + qr.v3)
@@ -33,9 +41,14 @@ Base.:(*)(q::Quaternion, w::Quaternion) = Quaternion(q.s*w.s - q.v1*w.v1 - q.v2*
                                             q.s*w.v2 - q.v1*w.v3 + q.v2*w.s + q.v3*w.v1,
                                             q.s*w.v3 + q.v1*w.v2 - q.v2*w.v1 + q.v3*w.s)
 Base.:(*)(q::Quaternion, r::Real) = Quaternion(q.s*r, q.v1*r, q.v2*r, q.v3*r)
-Base.:(*)(q::Quaternion, b::Bool) = b * q # remove method ambiguity
+Base.:(*)(q::Quaternion, r::Bool) = Quaternion(q.s*r, q.v1*r, q.v2*r, q.v3*r) # remove method ambiguity
+Base.:(*)(r::Real, q::Quaternion) = q * r
+Base.:(*)(r::Bool, q::Quaternion) = q * r # remove method ambiguity
 Base.:(/)(q::Quaternion, w::Quaternion) = q * conj(w) * (1.0 / abs2(w))
 Base.:(\)(q::Quaternion, w::Quaternion) = conj(q) * w * (1.0 / abs2(q))
+Base.:(/)(q::Quaternion, r::Real) = Quaternion(q.s / r, q.v1 / r, q.v2 / r, q.v3 / r)
+Base.:(==)(q::Quaternion, w::Quaternion) =
+    (q.s == w.s) & (q.v1 == w.v1) & (q.v2 == w.v2) & (q.v3 == w.v3)
 
 # adapted from https://github.com/JuliaGeometry/Quaternions.jl/pull/42
 function Base.rand(rng::AbstractRNG, ::Random.SamplerType{Quaternion{T}}) where {T<:Real}
diff --git a/test/testhelpers/SizedArrays.jl b/test/testhelpers/SizedArrays.jl
index dfcc5b79f1387..bd0272d78987d 100644
--- a/test/testhelpers/SizedArrays.jl
+++ b/test/testhelpers/SizedArrays.jl
@@ -9,8 +9,23 @@ module SizedArrays
 
 import Base: +, *, ==
 
+using LinearAlgebra
+import LinearAlgebra: mul!
+
 export SizedArray
 
+struct SOneTo{N} <: Base.AbstractOneTo{Int} end
+SOneTo(N) = SOneTo{N}()
+Base.length(::SOneTo{N}) where {N} = N
+Base.size(r::SOneTo) = (length(r),)
+Base.axes(r::SOneTo) = (r,)
+Base.first(::SOneTo) = 1
+Base.last(r::SOneTo) = length(r)
+Base.show(io::IO, r::SOneTo) = print(io, "SOneTo(", length(r), ")")
+
+Broadcast.axistype(a::Base.OneTo, s::SOneTo) = s
+Broadcast.axistype(s::SOneTo, a::Base.OneTo) = s
+
 struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
     data::A
     function SizedArray{SZ}(data::AbstractArray{T,N}) where {SZ,T,N}
@@ -21,20 +36,71 @@ struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
         SZ == size(data) || throw(ArgumentError("size mismatch!"))
         new{SZ,T,N,A}(A(data))
     end
+    function SizedArray{SZ,T,N}(data::A) where {SZ,T,N,A<:AbstractArray{T,N}}
+        SizedArray{SZ,T,N,A}(data)
+    end
+    function SizedArray{SZ,T}(data::A) where {SZ,T,N,A<:AbstractArray{T,N}}
+        SizedArray{SZ,T,N,A}(data)
+    end
 end
-Base.convert(::Type{SizedArray{SZ,T,N,A}}, data::AbstractArray) where {SZ,T,N,A} = SizedArray{SZ,T,N,A}(data)
+SizedMatrix{SZ,T,A<:AbstractArray} = SizedArray{SZ,T,2,A}
+SizedVector{SZ,T,A<:AbstractArray} = SizedArray{SZ,T,1,A}
+Base.convert(::Type{S}, data::AbstractArray) where {S<:SizedArray} = data isa S ? data : S(data)
 
 # Minimal AbstractArray interface
 Base.size(a::SizedArray) = size(typeof(a))
 Base.size(::Type{<:SizedArray{SZ}}) where {SZ} = SZ
+Base.axes(a::SizedArray) = map(SOneTo, size(a))
 Base.getindex(A::SizedArray, i...) = getindex(A.data, i...)
+Base.setindex!(A::SizedArray, v, i...) = setindex!(A.data, v, i...)
 Base.zero(::Type{T}) where T <: SizedArray = SizedArray{size(T)}(zeros(eltype(T), size(T)))
+function Base.one(::Type{SizedMatrix{SZ,T,A}}) where {SZ,T,A}
+    allequal(SZ) || throw(DimensionMismatch("multiplicative identity defined only for square matrices"))
+    D = diagm(fill(one(T), SZ[1]))
+    SizedArray{SZ}(convert(A, D))
+end
+Base.parent(S::SizedArray) = S.data
 +(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = SizedArray{SZ}(S1.data + S2.data)
 ==(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = S1.data == S2.data
-function *(S1::SizedArray, S2::SizedArray)
+
+function Base.similar(::Type{A}, shape::Tuple{SOneTo, Vararg{SOneTo}}) where {A<:AbstractArray}
+    R = similar(A, length.(shape))
+    SizedArray{length.(shape)}(R)
+end
+function Base.similar(x::SizedArray, ::Type{T}, shape::Tuple{SOneTo, Vararg{SOneTo}}) where {T}
+    sz = map(length, shape)
+    SizedArray{sz}(similar(parent(x), T, sz))
+end
+function Base.reshape(x::AbstractArray, shape::Tuple{SOneTo, Vararg{SOneTo}})
+    sz = map(length, shape)
+    SizedArray{length.(sz)}(reshape(x, length.(sz)))
+end
+
+const SizedMatrixLike = Union{SizedMatrix, Transpose{<:Any, <:SizedMatrix}, Adjoint{<:Any, <:SizedMatrix}}
+
+_data(S::SizedArray) = S.data
+_data(T::Transpose{<:Any, <:SizedArray}) = transpose(_data(parent(T)))
+_data(T::Adjoint{<:Any, <:SizedArray}) = adjoint(_data(parent(T)))
+
+function *(S1::SizedMatrixLike, S2::SizedMatrixLike)
     0 < ndims(S1) < 3 && 0 < ndims(S2) < 3 && size(S1, 2) == size(S2, 1) || throw(ArgumentError("size mismatch!"))
-    data = S1.data * S2.data
+    data = _data(S1) * _data(S2)
     SZ = ndims(data) == 1 ? (size(S1, 1), ) : (size(S1, 1), size(S2, 2))
     SizedArray{SZ}(data)
 end
+
+# deliberately wide method definitions to test for method ambiguties in LinearAlgebra
+*(S1::SizedMatrixLike, M::AbstractMatrix) = _data(S1) * M
+mul!(dest::AbstractMatrix, S1::SizedMatrix, M::AbstractMatrix, α::Number, β::Number) =
+    mul!(dest, _data(S1), M, α, β)
+mul!(dest::AbstractMatrix, M::AbstractMatrix, S2::SizedMatrix, α::Number, β::Number) =
+    mul!(dest, M, _data(S2), α, β)
+mul!(dest::AbstractMatrix, S1::SizedMatrix, S2::SizedMatrix, α::Number, β::Number) =
+    mul!(dest, _data(S1), _data(S2), α, β)
+mul!(dest::AbstractVector, M::AbstractMatrix, v::SizedVector, α::Number, β::Number) =
+    mul!(dest, M, _data(v), α, β)
+
+LinearAlgebra.zeroslike(::Type{S}, ax::Tuple{SizedArrays.SOneTo, Vararg{SizedArrays.SOneTo}}) where {S<:SizedArray} =
+            zeros(eltype(S), ax)
+
 end
diff --git a/test/testhelpers/StructArrays.jl b/test/testhelpers/StructArrays.jl
new file mode 100644
index 0000000000000..f03b07f4e60ad
--- /dev/null
+++ b/test/testhelpers/StructArrays.jl
@@ -0,0 +1,39 @@
+module StructArrays
+
+struct StructArray{T,N,C <: Tuple{Vararg{AbstractArray{<:Any,N}}}} <: AbstractArray{T,N}
+    components :: C
+
+    function StructArray{T,N,C}(components::C) where {T,N,C}
+        fieldcount(T) == length(components) || throw(ArgumentError("number of components incompatible with eltype"))
+        allequal(axes.(components)) || throw(ArgumentError("component arrays must have the same axes"))
+        new{T,N,C}(components)
+    end
+end
+
+function StructArray{T}(components::Tuple{Vararg{AbstractArray{<:Any,N}}}) where {T,N}
+    StructArray{T,N,typeof(components)}(components)
+end
+
+Base.size(S::StructArray) = size(S.components[1])
+Base.axes(S::StructArray) = axes(S.components[1])
+function Base.getindex(S::StructArray{T,N}, inds::Vararg{Int,N}) where {T,N}
+    vals = map(x -> x[inds...], S.components)
+    T(vals...)
+end
+function Base.setindex!(S::StructArray{T,N}, val, inds::Vararg{Int,N}) where {T,N}
+    vals = getfield.(Ref(convert(T, val)), fieldnames(T))
+    for (A,v) in zip(S.components, vals)
+        A[inds...] = v
+    end
+    S
+end
+
+isnonemptystructtype(::Type{T}) where {T} = isstructtype(T) && fieldcount(T) != 0
+
+function Base.similar(S::StructArray, ::Type{T}, dims::Tuple{Int, Vararg{Int}}) where {T}
+    isnonemptystructtype(T) || return similar(S.components[1], T, dims)
+    arrs = similar.(S.components, fieldtypes(T), Ref(dims))
+    StructArray{T}(arrs)
+end
+
+end
diff --git a/test/testhelpers/ULPError.jl b/test/testhelpers/ULPError.jl
new file mode 100644
index 0000000000000..6ca7d365ebaff
--- /dev/null
+++ b/test/testhelpers/ULPError.jl
@@ -0,0 +1,47 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module ULPError
+    export ulp_error, ulp_error_maximum
+    function ulp_error(accurate::AbstractFloat, approximate::AbstractFloat)
+        # the ULP error is usually not required to great accuracy, so `Float32` should be precise enough
+        zero_return = 0f0
+        inf_return = Inf32
+        # handle floating-point edge cases
+        if !(isfinite(accurate) && isfinite(approximate))
+            accur_is_nan = isnan(accurate)
+            approx_is_nan = isnan(approximate)
+            if accur_is_nan || approx_is_nan
+                return if accur_is_nan === approx_is_nan
+                    zero_return
+                else
+                    inf_return
+                end
+            end
+            if isinf(approximate)
+                return if isinf(accurate) && (signbit(accurate) == signbit(approximate))
+                    zero_return
+                else
+                    inf_return
+                end
+            end
+        end
+        acc = if accurate isa Union{Float16, Float32}
+            # widen for better accuracy when doing so does not impact performance too much
+            widen(accurate)
+        else
+            accurate
+        end
+        abs(Float32((approximate - acc) / eps(approximate))::Float32)
+    end
+    function ulp_error(accurate, approximate, x::AbstractFloat)
+        acc = accurate(x)
+        app = approximate(x)
+        ulp_error(acc, app)
+    end
+    function ulp_error(func::Func, x::AbstractFloat) where {Func}
+        ulp_error(func ∘ BigFloat, func, x)
+    end
+    function ulp_error_maximum(func::Func, iterator) where {Func}
+        maximum(Base.Fix1(ulp_error, func), iterator)
+    end
+end
diff --git a/test/testhelpers/arrayindexingtypes.jl b/test/testhelpers/arrayindexingtypes.jl
index 0e956b5216c94..21ab882dd04c3 100644
--- a/test/testhelpers/arrayindexingtypes.jl
+++ b/test/testhelpers/arrayindexingtypes.jl
@@ -51,6 +51,8 @@ end
 
 Base.size(A::TSlow) = A.dims
 Base.similar(A::TSlow, ::Type{T}, dims::Dims) where {T} = TSlow(T, dims)
+Base.similar(::Type{TSlow{T,N}}, dims::Dims) where {T,N} = TSlow(T, dims)
+
 Base.IndexStyle(::Type{A}) where {A<:TSlow} = IndexCartesian()
 Base.getindex(A::TSlow{T,N}, i::Vararg{Int,N}) where {T,N} = get(A.data, i, zero(T))
 Base.setindex!(A::TSlow{T,N}, v, i::Vararg{Int,N}) where {T,N} = (A.data[i] = v)
@@ -66,3 +68,26 @@ Base.axes(A::WrapperArray) = axes(A.parent)
 Base.getindex(A::WrapperArray, i::Int...) = A.parent[i...]
 Base.setindex!(A::WrapperArray, v, i::Int...) = A.parent[i...] = v
 Base.similar(A::WrapperArray, ::Type{T}, dims::Dims) where T = similar(A.parent, T, dims)
+Base.cconvert(::Type{Ptr{T}}, A::WrapperArray{T}) where {T} = Base.cconvert(Ptr{T}, A.parent)
+Base.strides(A::WrapperArray) = strides(A.parent)
+Base.elsize(::Type{WrapperArray{T,N,A}}) where {T,N,A<:AbstractArray{T,N}} = Base.elsize(A)
+
+# An array type with heterogenous axis types
+struct TestAxis{N}
+    len::Int
+end
+Base.length(a::TestAxis) = a.len
+Base.iterate(::TestAxis{N}) where N = (1, 1)
+Base.iterate(a::TestAxis{N}, state) where N = state < a.len ? (state+1, state+1) : nothing
+Base.firstindex(a::TestAxis) = 1
+Base.lastindex(a::TestAxis) = a.len
+Base.getindex(::TestAxis, i::Int) = i
+
+struct HeterogeneousAxisArray{T,N} <: AbstractArray{T,N}
+    data::Array{T,N}
+end
+Base.size(A::HeterogeneousAxisArray) = size(A.data)
+Base.getindex(A::HeterogeneousAxisArray, i::Int...) = A.data[i...]
+Base.setindex!(A::HeterogeneousAxisArray, v, i::Int...) = (A.data[i...] = v)
+Base.axes(A::HeterogeneousAxisArray{T,2}) where T = (TestAxis{1}(size(A.data, 1)), TestAxis{2}(size(A.data, 2)))
+Base.reshape(A::HeterogeneousAxisArray, ax) = reshape(A.data, map(length, ax))
diff --git a/test/testhelpers/coverage_file.info b/test/testhelpers/coverage_file.info
index c83e75dee8060..61410a72bc849 100644
--- a/test/testhelpers/coverage_file.info
+++ b/test/testhelpers/coverage_file.info
@@ -1,6 +1,6 @@
 SF:<FILENAME>
 DA:3,1
-DA:4,1
+DA:4,2
 DA:5,0
 DA:7,1
 DA:8,1
@@ -10,9 +10,10 @@ DA:11,1
 DA:12,1
 DA:14,0
 DA:17,1
-DA:19,2
+DA:18,1
+DA:19,1
 DA:20,1
 DA:22,1
-LH:12
-LF:14
+LH:13
+LF:15
 end_of_record
diff --git a/test/testhelpers/coverage_file.info.bad b/test/testhelpers/coverage_file.info.bad
deleted file mode 100644
index 311f6379381ee..0000000000000
--- a/test/testhelpers/coverage_file.info.bad
+++ /dev/null
@@ -1,20 +0,0 @@
-SF:<FILENAME>
-DA:3,1
-DA:4,1
-DA:5,0
-DA:7,1
-DA:8,1
-DA:9,3
-DA:10,5
-DA:11,1
-DA:12,1
-DA:14,0
-DA:17,1
-DA:18,0
-DA:19,2
-DA:20,1
-DA:22,1
-DA:1234,0
-LH:12
-LF:16
-end_of_record
diff --git a/test/testhelpers/coverage_file.info.bad2 b/test/testhelpers/coverage_file.info.bad2
deleted file mode 100644
index a766597be4c17..0000000000000
--- a/test/testhelpers/coverage_file.info.bad2
+++ /dev/null
@@ -1,20 +0,0 @@
-SF:<FILENAME>
-DA:3,1
-DA:4,1
-DA:5,0
-DA:7,1
-DA:8,1
-DA:9,3
-DA:10,5
-DA:11,0
-DA:12,1
-DA:14,0
-DA:17,1
-DA:18,0
-DA:19,0
-DA:20,0
-DA:22,1
-DA:1234,0
-LH:9
-LF:16
-end_of_record
diff --git a/test/testhelpers/coverage_file.jl b/test/testhelpers/coverage_file.jl
index e8e0355952d80..577cc6bb5d2ca 100644
--- a/test/testhelpers/coverage_file.jl
+++ b/test/testhelpers/coverage_file.jl
@@ -24,6 +24,6 @@ end
 
 success = code_coverage_test() == [1, 2, 3] &&
           short_form_func_coverage_test(2) == 4
-exit(success ?  0 : 1)
+exit(success ? 0 : 1)
 
 # end of file
diff --git a/test/testhelpers/just_module.jl b/test/testhelpers/just_module.jl
new file mode 100644
index 0000000000000..71bd87e660eae
--- /dev/null
+++ b/test/testhelpers/just_module.jl
@@ -0,0 +1 @@
+@__MODULE__
diff --git a/test/threadpool_use.jl b/test/threadpool_use.jl
index 7523991fdf6a7..e76d50c7a3fd1 100644
--- a/test/threadpool_use.jl
+++ b/test/threadpool_use.jl
@@ -9,6 +9,15 @@ using Base.Threads
 @test fetch(Threads.@spawn Threads.threadpool()) === :default
 @test fetch(Threads.@spawn :default Threads.threadpool()) === :default
 @test fetch(Threads.@spawn :interactive Threads.threadpool()) === :interactive
+@test fetch(Threads.@spawn :samepool Threads.threadpool()) === Threads.threadpool()
+@sync for tp in [:interactive, :default]
+    Threads.@spawn tp begin
+        @test fetch(Threads.@spawn :samepool Threads.threadpool()) === Threads.threadpool()
+    end
+end
+wait(Threads.@spawn :interactive begin
+    @test fetch(Threads.@spawn :samepool Threads.threadpool()) === Threads.threadpool()
+end)
 tp = :default
 @test fetch(Threads.@spawn tp Threads.threadpool()) === :default
 tp = :interactive
diff --git a/test/threads.jl b/test/threads.jl
index 8189311739e31..fa0b33a6352f3 100644
--- a/test/threads.jl
+++ b/test/threads.jl
@@ -16,7 +16,8 @@ let lk = ReentrantLock()
     t2 = @async (notify(c2); trylock(lk))
     wait(c1)
     wait(c2)
-    @test t1.queue === lk.cond_wait.waitq
+    # wait for the task to park in the queue (it may be spinning)
+    @test timedwait(() -> t1.queue === lk.cond_wait.waitq, 1.0) == :ok
     @test t2.queue !== lk.cond_wait.waitq
     @test istaskdone(t2)
     @test !fetch(t2)
@@ -71,11 +72,24 @@ let e = Event(true), started1 = Event(true), started2 = Event(true), done = Even
     end
 end
 
-let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no threads_exec.jl`
-    for test_nthreads in (1, 2, 4, 4) # run once to try single-threaded mode, then try a couple times to trigger bad races
+
+let cmd1 = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no threads_exec.jl`,
+    cmd2 = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no -e 'print(Threads.threadpoolsize(:default), ",", Threads.threadpoolsize(:interactive))'`
+    for (test_nthreads, test_nthreadsi) in (
+            (1, 0),
+            (1, 1),
+            (2, 0),
+            (2, 1),
+            (4, 0),
+            (4, 0)) # try a couple times to trigger bad races
         new_env = copy(ENV)
-        new_env["JULIA_NUM_THREADS"] = string(test_nthreads)
-        run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))
+        new_env["JULIA_NUM_THREADS"] = string(test_nthreads, ",", test_nthreadsi)
+        run(pipeline(setenv(cmd1, new_env), stdout = stdout, stderr = stderr))
+        threads_config = "$test_nthreads,$test_nthreadsi"
+        # threads set via env var
+        @test chomp(read(setenv(cmd2, new_env), String)) == threads_config
+        # threads set via -t
+        @test chomp(read(`$cmd2 -t$test_nthreads,$test_nthreadsi`, String)) == threads_config
     end
 end
 
@@ -122,10 +136,11 @@ if AFFINITY_SUPPORTED
     end
 end
 
-function get_nthreads(options = ``; cpus = nothing)
+function get_nthreads(options = ``; cpus = nothing, exclusive = false)
     cmd = `$(Base.julia_cmd()) --startup-file=no $(options)`
     cmd = `$cmd -e "print(Threads.threadpoolsize())"`
-    cmd = addenv(cmd, "JULIA_EXCLUSIVE" => "0", "JULIA_NUM_THREADS" => "auto")
+    cmd = addenv(cmd, "JULIA_EXCLUSIVE" => exclusive ? "1" : "0",
+        "JULIA_NUM_THREADS" => "auto")
     if cpus !== nothing
         cmd = setcpuaffinity(cmd, cpus)
     end
@@ -137,6 +152,7 @@ end
         allowed_cpus = findall(uv_thread_getaffinity())
         if length(allowed_cpus) ≥ 2
             @test get_nthreads() ≥ 2
+            @test get_nthreads(exclusive = true) ≥ 2
             @test get_nthreads(cpus = allowed_cpus[1:1]) == 1
             @test get_nthreads(cpus = allowed_cpus[2:2]) == 1
             @test get_nthreads(cpus = allowed_cpus[1:2]) == 2
@@ -288,18 +304,16 @@ close(proc.in)
         proc = run(cmd; wait = false)
         done = Threads.Atomic{Bool}(false)
         timeout = false
-        timer = Timer(100) do _
+        timer = Timer(200) do _
             timeout = true
-            for sig in [Base.SIGTERM, Base.SIGHUP, Base.SIGKILL]
-                for _ in 1:1000
+            for sig in (Base.SIGQUIT, Base.SIGKILL)
+                for _ in 1:3
                     kill(proc, sig)
+                    sleep(1)
                     if done[]
-                        if sig != Base.SIGTERM
-                            @warn "Terminating `$script` required signal $sig"
-                        end
+                        @warn "Terminating `$script` required signal $sig"
                         return
                     end
-                    sleep(0.001)
                 end
             end
         end
@@ -309,16 +323,11 @@ close(proc.in)
             done[] = true
             close(timer)
         end
-        if ( !success(proc) ) || ( timeout )
+        if !success(proc) || timeout
             @error "A \"spawn and wait lots of tasks\" test failed" n proc.exitcode proc.termsignal success(proc) timeout
         end
-        if Sys.iswindows() || Sys.isapple()
-            # Known failure: https://github.com/JuliaLang/julia/issues/43124
-            @test_skip success(proc)
-        else
-            @test success(proc)
-            @test !timeout
-        end
+        @test success(proc)
+        @test !timeout
     end
 end
 
@@ -327,3 +336,252 @@ end
     @test_throws ArgumentError @macroexpand(@threads 1) # arg isn't an Expr
     @test_throws ArgumentError @macroexpand(@threads if true 1 end) # arg doesn't start with for
 end
+
+@testset "rand_ptls underflow" begin
+    @test Base.Partr.cong(UInt32(0)) == 0
+end
+
+@testset "num_stack_mappings metric" begin
+    @test @ccall(jl_get_num_stack_mappings()::Cint) >= 1
+    # There must be at least two: one for the root test task and one for the async task:
+    @test fetch(@async(@ccall(jl_get_num_stack_mappings()::Cint))) >= 2
+end
+
+@testset "Base.Threads docstrings" begin
+    @test isempty(Docs.undocumented_names(Threads))
+end
+
+@testset "wait failed task" begin
+    @testset "wait without throw keyword" begin
+        t = Threads.@spawn error("Error")
+        @test_throws TaskFailedException wait(t)
+    end
+
+    @testset "wait with throw=false" begin
+        t = Threads.@spawn error("Error")
+        wait(t; throw=false)
+        @test istaskfailed(t)
+    end
+end
+
+@testset "jl_*affinity" begin
+    cpumasksize = @ccall uv_cpumask_size()::Cint
+    if cpumasksize > 0 # otherwise affinities are not supported on the platform (UV_ENOTSUP)
+        jl_getaffinity = (tid, mask, cpumasksize) -> ccall(:jl_getaffinity, Int32, (Int16, Ptr{Cchar}, Int32), tid, mask, cpumasksize)
+        jl_setaffinity = (tid, mask, cpumasksize) -> ccall(:jl_setaffinity, Int32, (Int16, Ptr{Cchar}, Int32), tid, mask, cpumasksize)
+        mask = zeros(Cchar, cpumasksize)
+        @test jl_getaffinity(0, mask, cpumasksize) == 0
+        @test !all(iszero, mask)
+        @test jl_setaffinity(0, mask, cpumasksize) == 0
+    end
+end
+
+@testset "io_thread" begin
+    function io_thread_test()
+        # This test creates a thread that does IO and then blocks the main julia thread
+        # This test hangs if you don't spawn an IO thread.
+        # It hanging or not is technically a race but I haven't seen julia win that race yet.
+        cmd = """
+        Base.Experimental.make_io_thread()
+        function callback()::Cvoid
+            println("Running a command")
+            run(`echo 42`)
+            return
+        end
+        function call_on_thread(callback::Ptr{Nothing})
+            tid = UInt[0]
+            threadwork = @cfunction function(arg::Ptr{Cvoid})
+                current_task().donenotify = Base.ThreadSynchronizer()
+                Base.errormonitor(current_task())
+                println("Calling Julia from thread")
+                ccall(arg, Cvoid, ())
+                nothing
+            end Cvoid (Ptr{Cvoid},)
+            err = @ccall uv_thread_create(tid::Ptr{UInt}, threadwork::Ptr{Cvoid}, callback::Ptr{Cvoid})::Cint
+            err == 0 || Base.uv_error("uv_thread_create", err)
+            gc_state = @ccall jl_gc_safe_enter()::Int8
+            err = @ccall uv_thread_join(tid::Ptr{UInt})::Cint
+            @ccall jl_gc_safe_leave(gc_state::Int8)::Cvoid
+            err == 0 || Base.uv_error("uv_thread_join", err)
+            return
+        end
+        function main()
+            callback_ptr = @cfunction(callback, Cvoid, ())
+            call_on_thread(callback_ptr)
+            println("Done")
+        end
+        main()
+
+        """
+        proc = run(pipeline(`$(Base.julia_cmd()) -e $cmd`), wait=false)
+        t = Timer(60) do t; kill(proc); end;
+        @test success(proc)
+        close(t)
+        return true
+    end
+    @test io_thread_test()
+end
+
+# Make sure default number of BLAS threads respects CPU affinity: issue #55572.
+@testset "LinearAlgebra number of default threads" begin
+    if AFFINITY_SUPPORTED
+        allowed_cpus = findall(uv_thread_getaffinity())
+        cmd = addenv(`$(Base.julia_cmd()) --startup-file=no -E 'using LinearAlgebra; BLAS.get_num_threads()'`,
+                     # Remove all variables which could affect the default number of threads
+                     "OPENBLAS_NUM_THREADS"=>nothing,
+                     "GOTO_NUM_THREADS"=>nothing,
+                     "OMP_NUM_THREADS"=>nothing)
+        for n in 1:min(length(allowed_cpus), 8) # Cap to 8 to avoid too many tests on large systems
+            @test readchomp(setcpuaffinity(cmd, allowed_cpus[1:n])) == string(max(1, n ÷ 2))
+        end
+    end
+end
+
+let once = OncePerProcess(() -> return [nothing])
+    @test typeof(once) <: OncePerProcess{Vector{Nothing}}
+    x = @inferred once()
+    @test x === once()
+    @atomic once.state = 0xff
+    @test_throws ErrorException("invalid state for OncePerProcess") once()
+    @test_throws ErrorException("OncePerProcess initializer failed previously") once()
+    @atomic once.state = 0x01
+    @test x === once()
+end
+let once1 = OncePerProcess(BigFloat), once2 = OncePerProcess{BigFloat}(BigFloat)
+    # Using a type as a constructor should create a OncePerProcess with
+    # Type{...} as its initializer (rather than DataType)
+    @test typeof(once1) <: OncePerProcess{BigFloat,Type{BigFloat}}
+    @test typeof(once2) <: OncePerProcess{BigFloat,Type{BigFloat}}
+end
+let once = OncePerProcess{Int}(() -> error("expected"))
+    @test_throws ErrorException("expected") once()
+    @test_throws ErrorException("OncePerProcess initializer failed previously") once()
+end
+
+let e = Base.Event(true),
+    started = Channel{Int16}(Inf),
+    finish = Channel{Nothing}(Inf),
+    exiting = Channel{Nothing}(Inf),
+    starttest2 = Base.Event(),
+    once = OncePerThread() do
+        push!(started, threadid())
+        take!(finish)
+        return [nothing]
+    end
+    alls = OncePerThread() do
+        return [nothing]
+    end
+    @test typeof(once) <: OncePerThread{Vector{Nothing}}
+    push!(finish, nothing)
+    @test_throws ArgumentError once[0]
+    x = @inferred once()
+    @test_throws ArgumentError once[0]
+    @test x === once() === fetch(@async once()) === once[threadid()]
+    @test take!(started) == threadid()
+    @test isempty(started)
+    tids = zeros(UInt, 50)
+    newthreads = zeros(Int16, length(tids))
+    onces = Vector{Vector{Nothing}}(undef, length(tids))
+    allonces = Vector{Vector{Vector{Nothing}}}(undef, length(tids))
+    # allocate closure memory to last until all threads are started
+    cls = [function cl()
+            GC.gc(false) # stress test the GC-safepoint mechanics of jl_adopt_thread
+            try
+                newthreads[i] = threadid()
+                local y = once()
+                onces[i] = y
+                @test x !== y === once() === once[threadid()]
+                wait(starttest2)
+                allonces[i] = Vector{Nothing}[alls[tid] for tid in newthreads]
+            catch ex
+                close(started, ErrorException("failed"))
+                close(finish, ErrorException("failed"))
+                @lock stderr Base.display_error(current_exceptions())
+            end
+            push!(exiting, nothing)
+            GC.gc(false) # stress test the GC-safepoint mechanics of jl_delete_thread
+            nothing
+        end
+    for i = 1:length(tids)]
+    GC.@preserve cls begin # this memory must survive until each corresponding thread exits (waitallthreads / uv_thread_join)
+        Base.preserve_handle(cls)
+        for i = 1:length(tids)
+            function threadcallclosure(tid::Ref{UInt}, cl::Ref{F}) where {F} # create sparam so we can reference the type of cl in the ccall type
+                threadwork = @cfunction cl -> cl() Cvoid (Ref{F},) # create a cfunction that specializes on cl as an argument and calls it
+                err = @ccall uv_thread_create(tid::Ptr{UInt}, threadwork::Ptr{Cvoid}, cl::Ref{F})::Cint # call that on a thread
+                err == 0 || Base.uv_error("uv_thread_create", err)
+                nothing
+            end
+            threadcallclosure(Ref(tids, i), Ref(cls, i))
+        end
+        @noinline function waitallthreads(tids, cls)
+            for i = 1:length(tids)
+                tid = Ref(tids, i)
+                tidp = Base.unsafe_convert(Ptr{UInt}, tid)::Ptr{UInt}
+                gc_state = @ccall jl_gc_safe_enter()::Int8
+                GC.@preserve tid err = @ccall uv_thread_join(tidp::Ptr{UInt})::Cint
+                @ccall jl_gc_safe_leave(gc_state::Int8)::Cvoid
+                err == 0 || Base.uv_error("uv_thread_join", err)
+            end
+            Base.unpreserve_handle(cls)
+        end
+        try
+            # let them finish in batches of 10
+            for i = 1:length(tids) ÷ 10
+                for i = 1:10
+                    newid = take!(started)
+                    @test newid != threadid()
+                end
+                for i = 1:10
+                    push!(finish, nothing)
+                end
+            end
+            @test isempty(started)
+            # now run the second part of the test where they all try to access the other threads elements
+            notify(starttest2)
+        finally
+            for _ = 1:length(tids)
+                # run IO loop until all threads are close to exiting
+                take!(exiting)
+            end
+            waitallthreads(tids, cls)
+        end
+    end
+    @test isempty(started)
+    @test isempty(finish)
+    @test length(IdSet{eltype(onces)}(onces)) == length(onces) # make sure every object is unique
+    allexpected = Vector{Nothing}[alls[tid] for tid in newthreads]
+    @test length(IdSet{eltype(allexpected)}(allexpected)) == length(allexpected) # make sure every object is unique
+    @test all(i -> allonces[i] !== allexpected && all(j -> allonces[i][j] === allexpected[j], eachindex(allexpected)), eachindex(allonces)) # make sure every thread saw the same elements
+    @test_throws ArgumentError once[Threads.maxthreadid() + 1]
+    @test_throws ArgumentError once[-1]
+
+end
+let once1 = OncePerThread(BigFloat), once2 = OncePerThread{BigFloat}(BigFloat)
+    # Using a type as a constructor should create a OncePerThread with
+    # Type{...} as its initializer (rather than DataType)
+    @test typeof(once1) <: OncePerThread{BigFloat,Type{BigFloat}}
+    @test typeof(once2) <: OncePerThread{BigFloat,Type{BigFloat}}
+end
+let once = OncePerThread{Int}(() -> error("expected"))
+    @test_throws ErrorException("expected") once()
+    @test_throws ErrorException("OncePerThread initializer failed previously") once()
+end
+
+let once = OncePerTask(() -> return [nothing])
+    @test typeof(once) <: OncePerTask{Vector{Nothing}}
+    x = @inferred once()
+    @test x === once() !== fetch(@async once())
+    delete!(task_local_storage(), once)
+    @test x !== once() === once()
+end
+let once1 = OncePerTask(BigFloat), once2 = OncePerTask{BigFloat}(BigFloat)
+    # Using a type as a constructor should create a OncePerTask with
+    # Type{...} as its initializer (rather than DataType)
+    @test typeof(once1) <: OncePerTask{BigFloat,Type{BigFloat}}
+    @test typeof(once2) <: OncePerTask{BigFloat,Type{BigFloat}}
+end
+let once = OncePerTask{Int}(() -> error("expected"))
+    @test_throws ErrorException("expected") once()
+    @test_throws ErrorException("expected") once()
+end
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index 9c7c524febeff..2780888546964 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -3,6 +3,7 @@
 using Test
 using Base.Threads
 using Base.Threads: SpinLock, threadpoolsize
+using LinearAlgebra: peakflops
 
 # for cfunction_closure
 include("testenv.jl")
@@ -27,13 +28,69 @@ end
 # (expected test duration is about 18-180 seconds)
 Timer(t -> killjob("KILLING BY THREAD TEST WATCHDOG\n"), 1200)
 
+module ConcurrencyUtilities
+    function new_task_nonsticky(f)
+        t = Task(f)
+        t.sticky = false
+        t
+    end
+
+    """
+        run_concurrently(worker, n)::Nothing
+
+    Run `n` tasks of `worker` concurrently. Return when all workers are done.
+    """
+    function run_concurrently(worker, n)
+        tasks = map(new_task_nonsticky ∘ Returns(worker), Base.OneTo(n))
+        foreach(schedule, tasks)
+        foreach(fetch, tasks)
+    end
+
+    """
+        run_concurrently_in_new_task(worker, n)::Task
+
+    Return a task that:
+    * is not started yet
+    * when started, runs `n` tasks of `worker` concurrently
+    * returns when all workers are done
+    """
+    function run_concurrently_in_new_task(worker, n)
+        function f(t)
+            run_concurrently(t...)
+        end
+        new_task_nonsticky(f ∘ Returns((worker, n)))
+    end
+end
+
+module AbstractIrrationalExamples
+    for n ∈ 0:9
+        name_aa = Symbol(:aa, n)
+        name_ab = Symbol(:ab, n)
+        name_ba = Symbol(:ba, n)
+        name_bb = Symbol(:bb, n)
+        @eval begin
+            Base.@irrational $name_aa exp(BigFloat(2)^$n)
+            Base.@irrational $name_ab exp(BigFloat(2)^-$n)
+            Base.@irrational $name_ba exp(-(BigFloat(2)^$n))
+            Base.@irrational $name_bb exp(-(BigFloat(2)^-$n))
+        end
+    end
+    const examples = (
+        aa0, aa1, aa2, aa3, aa4, aa5, aa6, aa7, aa8, aa9,
+        ab0, ab1, ab2, ab3, ab4, ab5, ab6, ab7, ab8, ab9,
+        ba0, ba1, ba2, ba3, ba4, ba5, ba6, ba7, ba8, ba9,
+        bb0, bb1, bb2, bb3, bb4, bb5, bb6, bb7, bb8, bb9,
+    )
+end
+
 @testset """threads_exec.jl with JULIA_NUM_THREADS == $(ENV["JULIA_NUM_THREADS"])""" begin
 
 @test Threads.threadid() == 1
-@test 1 <= threadpoolsize() <= Threads.maxthreadid()
+@test threadpool() in (:interactive, :default) # thread 1 could be in the interactive pool
+@test 1 <= threadpoolsize(:default) <= Threads.maxthreadid()
 
 # basic lock check
-if threadpoolsize() > 1
+if threadpoolsize(:default) > 1
     let lk = SpinLock()
         c1 = Base.Event()
         c2 = Base.Event()
@@ -53,9 +110,40 @@ if threadpoolsize() > 1
     end
 end
 
+if threadpoolsize() > 1
+    let lk = Base.Threads.PaddedSpinLock()
+        c1 = Base.Event()
+        c2 = Base.Event()
+        @test trylock(lk)
+        @test !trylock(lk)
+        t1 = Threads.@spawn (notify(c1); lock(lk); unlock(lk); trylock(lk))
+        t2 = Threads.@spawn (notify(c2); trylock(lk))
+        Libc.systemsleep(0.1) # block our thread from scheduling for a bit
+        wait(c1)
+        wait(c2)
+        @test !fetch(t2)
+        @test istaskdone(t2)
+        @test !istaskdone(t1)
+        unlock(lk)
+        @test fetch(t1)
+        @test istaskdone(t1)
+    end
+end
+
 # threading constructs
 
-let a = zeros(Int, 2 * threadpoolsize())
+@testset "@threads and @spawn threadpools" begin
+    @threads for i in 1:1
+        @test threadpool() == :default
+    end
+    @test fetch(Threads.@spawn threadpool()) == :default
+    @test fetch(Threads.@spawn :default threadpool()) == :default
+    if threadpoolsize(:interactive) > 0
+        @test fetch(Threads.@spawn :interactive threadpool()) == :interactive
+    end
+end
+
+let a = zeros(Int, 2 * threadpoolsize(:default))
     @threads for i = 1:length(a)
         @sync begin
             @async begin
@@ -75,7 +163,7 @@ end
 
 # parallel loop with parallel atomic addition
 function threaded_loop(a, r, x)
-    counter = Threads.Atomic{Int}(min(threadpoolsize(), length(r)))
+    counter = Threads.Atomic{Int}(min(threadpoolsize(:default), length(r)))
     @threads for i in r
         # synchronize the start given that each partition is started sequentially,
         # meaning that without the wait, if the loop is too fast the iteration can happen in order
@@ -266,29 +354,12 @@ using Base.Threads
 end
 end
 
-# Ensure only LLVM-supported types can be atomic
-@test_throws TypeError Atomic{BigInt}
-@test_throws TypeError Atomic{ComplexF64}
-
-if Sys.ARCH === :i686 || startswith(string(Sys.ARCH), "arm") ||
-   Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
-
-    @test_throws TypeError Atomic{Int128}()
-    @test_throws TypeError Atomic{UInt128}()
-end
-
-if Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
-    @test_throws TypeError Atomic{Float16}()
-    @test_throws TypeError Atomic{Float32}()
-    @test_throws TypeError Atomic{Float64}()
-end
-
 function test_atomic_bools()
     x = Atomic{Bool}(false)
-    # Arithmetic functions are not defined.
-    @test_throws MethodError atomic_add!(x, true)
-    @test_throws MethodError atomic_sub!(x, true)
-    # All the rest are:
+    # Arithmetic functions such as true+true returns Int
+    @test_throws TypeError atomic_add!(x, true)
+    @test_throws TypeError atomic_sub!(x, true)
+    # All the rest are supported:
     for v in [true, false]
         @test x[] == atomic_xchg!(x, v)
         @test v == atomic_cas!(x, v, !v)
@@ -394,10 +465,9 @@ end
 test_fence()
 
 # Test load / store with various types
-let atomictypes = intersect((Int8, Int16, Int32, Int64, Int128,
-                             UInt8, UInt16, UInt32, UInt64, UInt128,
-                             Float16, Float32, Float64),
-                            Base.Threads.atomictypes)
+let atomictypes = (Int8, Int16, Int32, Int64, Int128,
+                   UInt8, UInt16, UInt32, UInt64, UInt128,
+                   Float16, Float32, Float64)
     for T in atomictypes
         var = Atomic{T}()
         var[] = 42
@@ -425,10 +495,10 @@ function test_atomic_cas!(var::Atomic{T}, range::StepRange{Int,Int}) where T
         end
     end
 end
-for T in intersect((Int32, Int64, Float32, Float64), Base.Threads.atomictypes)
+for T in (Int32, Int64, Float32, Float64)
     var = Atomic{T}()
     nloops = 1000
-    di = threadpoolsize()
+    di = threadpoolsize(:default)
     @threads for i in 1:di
         test_atomic_cas!(var, i:di:nloops)
     end
@@ -439,7 +509,7 @@ function test_atomic_xchg!(var::Atomic{T}, i::Int, accum::Atomic{Int}) where T
     old = atomic_xchg!(var, T(i))
     atomic_add!(accum, Int(old))
 end
-for T in intersect((Int32, Int64, Float32, Float64), Base.Threads.atomictypes)
+for T in (Int32, Int64, Float32, Float64)
     accum = Atomic{Int}()
     var = Atomic{T}()
     nloops = 1000
@@ -454,7 +524,7 @@ function test_atomic_float(varadd::Atomic{T}, varmax::Atomic{T}, varmin::Atomic{
     atomic_max!(varmax, T(i))
     atomic_min!(varmin, T(i))
 end
-for T in intersect((Int32, Int64, Float16, Float32, Float64), Base.Threads.atomictypes)
+for T in (Int32, Int64, Float16, Float32, Float64)
     varadd = Atomic{T}()
     varmax = Atomic{T}()
     varmin = Atomic{T}()
@@ -518,13 +588,13 @@ function test_thread_cfunction()
     @test cfs[1] == cf1
     @test cfs[2] == cf(fs[2])
     @test length(unique(cfs)) == 1000
-    ok = zeros(Int, threadpoolsize())
+    ok = zeros(Int, threadpoolsize(:default))
     @threads :static for i in 1:10000
         i = mod1(i, 1000)
         fi = fs[i]
         cfi = cf(fi)
         GC.@preserve cfi begin
-            ok[threadid()] += (cfi === cfs[i])
+            ok[threadid() - threadpoolsize(:interactive)] += (cfi === cfs[i])
         end
     end
     @test sum(ok) == 10000
@@ -533,20 +603,6 @@ if cfunction_closure
     test_thread_cfunction()
 end
 
-function test_thread_range()
-    a = zeros(Int, threadpoolsize())
-    @threads for i in 1:threadid()
-        a[i] = 1
-    end
-    for i in 1:threadid()
-        @test a[i] == 1
-    end
-    for i in (threadid() + 1):threadpoolsize()
-        @test a[i] == 0
-    end
-end
-test_thread_range()
-
 # Thread safety of `jl_load_and_lookup`.
 function test_load_and_lookup_18020(n)
     @threads for i in 1:n
@@ -581,17 +637,17 @@ test_nested_loops()
 
 function test_thread_too_few_iters()
     x = Atomic()
-    a = zeros(Int, threadpoolsize()+2)
-    threaded_loop(a, 1:threadpoolsize()-1, x)
-    found = zeros(Bool, threadpoolsize()+2)
-    for i=1:threadpoolsize()-1
+    a = zeros(Int, threadpoolsize(:default)+2)
+    threaded_loop(a, 1:threadpoolsize(:default)-1, x)
+    found = zeros(Bool, threadpoolsize(:default)+2)
+    for i=1:threadpoolsize(:default)-1
         found[a[i]] = true
     end
-    @test x[] == threadpoolsize()-1
+    @test x[] == threadpoolsize(:default)-1
     # Next test checks that all loop iterations ran,
     # and were unique (via pigeon-hole principle).
-    @test !(false in found[1:threadpoolsize()-1])
-    @test !(true in found[threadpoolsize():end])
+    @test !(false in found[1:threadpoolsize(:default)-1])
+    @test !(true in found[threadpoolsize(:default):end])
 end
 test_thread_too_few_iters()
 
@@ -733,10 +789,10 @@ function _atthreads_with_error(a, err)
     end
     a
 end
-@test_throws CompositeException _atthreads_with_error(zeros(threadpoolsize()), true)
-let a = zeros(threadpoolsize())
+@test_throws CompositeException _atthreads_with_error(zeros(threadpoolsize(:default)), true)
+let a = zeros(threadpoolsize(:default))
     _atthreads_with_error(a, false)
-    @test a == [1:threadpoolsize();]
+    @test a == [threadpoolsize(:interactive) .+ (1:threadpoolsize(:default));]
 end
 
 # static schedule
@@ -747,11 +803,11 @@ function _atthreads_static_schedule(n)
     end
     return ids
 end
-@test _atthreads_static_schedule(threadpoolsize()) == 1:threadpoolsize()
-@test _atthreads_static_schedule(1) == [1;]
+@test _atthreads_static_schedule(threadpoolsize(:default)) == threadpoolsize(:interactive) .+ (1:threadpoolsize(:default))
+@test _atthreads_static_schedule(1) == [threadpoolsize(:interactive) + 1;]
 @test_throws(
     "`@threads :static` cannot be used concurrently or nested",
-    @threads(for i = 1:1; _atthreads_static_schedule(threadpoolsize()); end),
+    @threads(for i = 1:1; _atthreads_static_schedule(threadpoolsize(:default)); end),
 )
 
 # dynamic schedule
@@ -764,35 +820,35 @@ function _atthreads_dynamic_schedule(n)
     end
     return inc[], flags
 end
-@test _atthreads_dynamic_schedule(threadpoolsize()) == (threadpoolsize(), ones(threadpoolsize()))
+@test _atthreads_dynamic_schedule(threadpoolsize(:default)) == (threadpoolsize(:default), ones(threadpoolsize(:default)))
 @test _atthreads_dynamic_schedule(1) == (1, ones(1))
 @test _atthreads_dynamic_schedule(10) == (10, ones(10))
-@test _atthreads_dynamic_schedule(threadpoolsize() * 2) == (threadpoolsize() * 2, ones(threadpoolsize() * 2))
+@test _atthreads_dynamic_schedule(threadpoolsize(:default) * 2) == (threadpoolsize(:default) * 2, ones(threadpoolsize(:default) * 2))
 
 # nested dynamic schedule
 function _atthreads_dynamic_dynamic_schedule()
     inc = Threads.Atomic{Int}(0)
-    Threads.@threads :dynamic for _ = 1:threadpoolsize()
-        Threads.@threads :dynamic for _ = 1:threadpoolsize()
+    Threads.@threads :dynamic for _ = 1:threadpoolsize(:default)
+        Threads.@threads :dynamic for _ = 1:threadpoolsize(:default)
             Threads.atomic_add!(inc, 1)
         end
     end
     return inc[]
 end
-@test _atthreads_dynamic_dynamic_schedule() == threadpoolsize() * threadpoolsize()
+@test _atthreads_dynamic_dynamic_schedule() == threadpoolsize(:default) * threadpoolsize(:default)
 
 function _atthreads_static_dynamic_schedule()
-    ids = zeros(Int, threadpoolsize())
+    ids = zeros(Int, threadpoolsize(:default))
     inc = Threads.Atomic{Int}(0)
-    Threads.@threads :static for i = 1:threadpoolsize()
+    Threads.@threads :static for i = 1:threadpoolsize(:default)
         ids[i] = Threads.threadid()
-        Threads.@threads :dynamic for _ = 1:threadpoolsize()
+        Threads.@threads :dynamic for _ = 1:threadpoolsize(:default)
             Threads.atomic_add!(inc, 1)
         end
     end
     return ids, inc[]
 end
-@test _atthreads_static_dynamic_schedule() == (1:threadpoolsize(), threadpoolsize() * threadpoolsize())
+@test _atthreads_static_dynamic_schedule() == (threadpoolsize(:interactive) .+ (1:threadpoolsize(:default)), threadpoolsize(:default) * threadpoolsize(:default))
 
 # errors inside @threads :dynamic
 function _atthreads_dynamic_with_error(a)
@@ -801,7 +857,85 @@ function _atthreads_dynamic_with_error(a)
     end
     a
 end
-@test_throws "user error in the loop body" _atthreads_dynamic_with_error(zeros(threadpoolsize()))
+@test_throws "user error in the loop body" _atthreads_dynamic_with_error(zeros(threadpoolsize(:default)))
+
+####
+# :greedy
+###
+
+function _atthreads_greedy_schedule(n)
+    inc = Threads.Atomic{Int}(0)
+    flags = zeros(Int, n)
+    Threads.@threads :greedy for i = 1:n
+        Threads.atomic_add!(inc, 1)
+        flags[i] = 1
+    end
+    return inc[], flags
+end
+@test _atthreads_greedy_schedule(threadpoolsize(:default)) == (threadpoolsize(:default), ones(threadpoolsize(:default)))
+@test _atthreads_greedy_schedule(1) == (1, ones(1))
+@test _atthreads_greedy_schedule(10) == (10, ones(10))
+@test _atthreads_greedy_schedule(threadpoolsize(:default) * 2) == (threadpoolsize(:default) * 2, ones(threadpoolsize(:default) * 2))
+
+# nested greedy schedule
+function _atthreads_greedy_greedy_schedule()
+    inc = Threads.Atomic{Int}(0)
+    Threads.@threads :greedy for _ = 1:threadpoolsize(:default)
+        Threads.@threads :greedy for _ = 1:threadpoolsize(:default)
+            Threads.atomic_add!(inc, 1)
+        end
+    end
+    return inc[]
+end
+@test _atthreads_greedy_greedy_schedule() == threadpoolsize(:default) * threadpoolsize(:default)
+
+function _atthreads_greedy_dynamic_schedule()
+    inc = Threads.Atomic{Int}(0)
+    Threads.@threads :greedy for _ = 1:threadpoolsize(:default)
+        Threads.@threads :dynamic for _ = 1:threadpoolsize(:default)
+            Threads.atomic_add!(inc, 1)
+        end
+    end
+    return inc[]
+end
+@test _atthreads_greedy_dynamic_schedule() == threadpoolsize(:default) * threadpoolsize(:default)
+
+function _atthreads_dynamic_greedy_schedule()
+    inc = Threads.Atomic{Int}(0)
+    Threads.@threads :dynamic for _ = 1:threadpoolsize(:default)
+        Threads.@threads :greedy for _ = 1:threadpoolsize(:default)
+            Threads.atomic_add!(inc, 1)
+        end
+    end
+    return inc[]
+end
+@test _atthreads_dynamic_greedy_schedule() == threadpoolsize(:default) * threadpoolsize(:default)
+
+function _atthreads_static_greedy_schedule()
+    ids = zeros(Int, threadpoolsize(:default))
+    inc = Threads.Atomic{Int}(0)
+    Threads.@threads :static for i = 1:threadpoolsize(:default)
+        ids[i] = Threads.threadid()
+        Threads.@threads :greedy for _ = 1:threadpoolsize(:default)
+            Threads.atomic_add!(inc, 1)
+        end
+    end
+    return ids, inc[]
+end
+@test _atthreads_static_greedy_schedule() == (threadpoolsize(:interactive) .+ (1:threadpoolsize(:default)), threadpoolsize(:default) * threadpoolsize(:default))
+
+# errors inside @threads :greedy
+function _atthreads_greedy_with_error(a)
+    Threads.@threads :greedy for i in eachindex(a)
+        error("user error in the loop body")
+    end
+    a
+end
+@test_throws "user error in the loop body" _atthreads_greedy_with_error(zeros(threadpoolsize(:default)))
+
+####
+# multi-argument loop
+####
 
 try
     @macroexpand @threads(for i = 1:10, j = 1:10; end)
@@ -1030,7 +1164,7 @@ function check_sync_end_race()
                 nnotscheduled += y === :notscheduled
             end
             # Useful for tuning the test:
-            @debug "`check_sync_end_race` done" threadpoolsize() ncompleted nnotscheduled nerror
+            @debug "`check_sync_end_race` done" threadpoolsize(:default) ncompleted nnotscheduled nerror
         finally
             done[] = true
         end
@@ -1044,23 +1178,25 @@ end
 
 # issue #41546, thread-safe package loading
 @testset "package loading" begin
-    ch = Channel{Bool}(threadpoolsize())
+    ntasks = max(threadpoolsize(:default), 4)
+    ch = Channel{Bool}(ntasks)
     barrier = Base.Event()
     old_act_proj = Base.ACTIVE_PROJECT[]
     try
         pushfirst!(LOAD_PATH, "@")
         Base.ACTIVE_PROJECT[] = joinpath(@__DIR__, "TestPkg")
         @sync begin
-            for _ in 1:threadpoolsize()
+            for _ in 1:ntasks
                 Threads.@spawn begin
                     put!(ch, true)
                     wait(barrier)
                     @eval using TestPkg
                 end
             end
-            for _ in 1:threadpoolsize()
+            for _ in 1:ntasks
                 take!(ch)
             end
+            close(ch)
             notify(barrier)
         end
         @test Base.root_module(@__MODULE__, :TestPkg) isa Module
@@ -1090,4 +1226,442 @@ end
     end
 end
 
+#Thread safety of threacall
+function threadcall_threads()
+    Threads.@threads for i = 1:8
+        ptr = @threadcall(:jl_malloc, Ptr{Cint}, (Csize_t,), sizeof(Cint))
+        @test ptr != C_NULL
+        unsafe_store!(ptr, 3)
+        @test unsafe_load(ptr) == 3
+        ptr = @threadcall(:jl_realloc, Ptr{Cint}, (Ptr{Cint}, Csize_t,), ptr, 2 * sizeof(Cint))
+        @test ptr != C_NULL
+        unsafe_store!(ptr, 4, 2)
+        @test unsafe_load(ptr, 1) == 3
+        @test unsafe_load(ptr, 2) == 4
+        @threadcall(:jl_free, Cvoid, (Ptr{Cint},), ptr)
+    end
+end
+@testset "threadcall + threads" begin
+    threadcall_threads() #Shouldn't crash!
+end
+
+@testset "Wait multiple tasks" begin
+    convert_tasks(t, x) = x
+    convert_tasks(::Set{Task}, x::Vector{Task}) = Set{Task}(x)
+    convert_tasks(::Tuple{Task}, x::Vector{Task}) = tuple(x...)
+
+    function create_tasks()
+        tasks = Task[]
+        event = Threads.Event()
+        push!(tasks,
+              Threads.@spawn begin
+                  sleep(0.01)
+              end)
+        push!(tasks,
+              Threads.@spawn begin
+                  sleep(0.02)
+              end)
+        push!(tasks,
+              Threads.@spawn begin
+                  wait(event)
+              end)
+        return tasks, event
+    end
+
+    function teardown(tasks, event)
+        notify(event)
+        waitall(resize!(tasks, 3), throw=true)
+    end
+
+    for tasks_type in (Vector{Task}, Set{Task}, Tuple{Task})
+        @testset "waitany" begin
+            @testset "throw=false" begin
+                tasks, event = create_tasks()
+                wait(tasks[1])
+                wait(tasks[2])
+                done,  pending = waitany(convert_tasks(tasks_type, tasks); throw=false)
+                @test length(done) == 2
+                @test tasks[1] ∈ done
+                @test tasks[2] ∈ done
+                @test length(pending) == 1
+                @test tasks[3] ∈ pending
+                teardown(tasks, event)
+            end
+
+            @testset "throw=true" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+                wait(tasks[end]; throw=false)
+
+                @test_throws CompositeException begin
+                    waitany(convert_tasks(tasks_type, tasks); throw=true)
+                end
+
+                teardown(tasks, event)
+            end
+        end
+
+        @testset "waitall" begin
+            @testset "All tasks succeed" begin
+                tasks, event = create_tasks()
+
+                wait(tasks[1])
+                wait(tasks[2])
+                waiter = Threads.@spawn waitall(convert_tasks(tasks_type, tasks))
+                @test !istaskdone(waiter)
+
+                notify(event)
+                done, pending = fetch(waiter)
+                @test length(done) == 3
+                @test tasks[1] ∈ done
+                @test tasks[2] ∈ done
+                @test tasks[3] ∈ done
+                @test length(pending) == 0
+            end
+
+            @testset "failfast=true, throw=false" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                wait(tasks[1])
+                wait(tasks[2])
+                waiter = Threads.@spawn waitall(convert_tasks(tasks_type, tasks); failfast=true, throw=false)
+
+                done, pending = fetch(waiter)
+                @test length(done) == 3
+                @test tasks[1] ∈ done
+                @test tasks[2] ∈ done
+                @test tasks[4] ∈ done
+                @test length(pending) == 1
+                @test tasks[3] ∈ pending
+
+                teardown(tasks, event)
+            end
+
+            @testset "failfast=false, throw=true" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                notify(event)
+
+                @test_throws CompositeException begin
+                    waitall(convert_tasks(tasks_type, tasks); failfast=false, throw=true)
+                end
+
+                @test all(istaskdone.(tasks))
+
+                teardown(tasks, event)
+            end
+
+            @testset "failfast=true, throw=true" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                @test_throws CompositeException begin
+                    waitall(convert_tasks(tasks_type, tasks); failfast=true, throw=true)
+                end
+
+                @test !istaskdone(tasks[3])
+
+                teardown(tasks, event)
+
+                @test_throws CompositeException begin
+                    waitall(Threads.@spawn(div(1, i)) for i = 0:1)
+                end
+
+                tasks = [Threads.@spawn(div(1, i)) for i = 0:1]
+                wait(tasks[1]; throw=false)
+                wait(tasks[2]; throw=false)
+                @test_throws CompositeException waitall(tasks)
+            end
+        end
+    end
+end
+
+@testset "Base.Experimental.task_metrics" begin
+    t = Task(() -> nothing)
+    @test_throws "const field" t.metrics_enabled = true
+    is_task_metrics_enabled() = fetch(Threads.@spawn current_task().metrics_enabled)
+    @test !is_task_metrics_enabled()
+    try
+        @testset "once" begin
+            Base.Experimental.task_metrics(true)
+            @test is_task_metrics_enabled()
+            Base.Experimental.task_metrics(false)
+            @test !is_task_metrics_enabled()
+        end
+        @testset "multiple" begin
+            Base.Experimental.task_metrics(true)  # 1
+            Base.Experimental.task_metrics(true)  # 2
+            Base.Experimental.task_metrics(true)  # 3
+            @test is_task_metrics_enabled()
+            Base.Experimental.task_metrics(false) # 2
+            @test is_task_metrics_enabled()
+            Base.Experimental.task_metrics(false) # 1
+            @test is_task_metrics_enabled()
+            @sync for i in 1:5                    # 0 (not negative)
+                Threads.@spawn Base.Experimental.task_metrics(false)
+            end
+            @test !is_task_metrics_enabled()
+            Base.Experimental.task_metrics(true)  # 1
+            @test is_task_metrics_enabled()
+        end
+    finally
+        while is_task_metrics_enabled()
+            Base.Experimental.task_metrics(false)
+        end
+    end
+end
+
+@testset "race on `BigFloat` precision when constructing `Rational` from `AbstractIrrational`" begin
+    function test_racy_rational_from_irrational(::Type{Rational{I}}, c::AbstractIrrational) where {I}
+        function construct()
+            Rational{I}(c)
+        end
+        function is_racy_rational_from_irrational()
+            worker_count = 10 * Threads.nthreads()
+            task = ConcurrencyUtilities.run_concurrently_in_new_task(construct, worker_count)
+            schedule(task)
+            ok = true
+            while !istaskdone(task)
+                for _ ∈ 1:1000000
+                    ok &= precision(BigFloat) === prec
+                end
+                GC.safepoint()
+                yield()
+            end
+            fetch(task)
+            ok
+        end
+        prec = precision(BigFloat)
+        task = ConcurrencyUtilities.new_task_nonsticky(is_racy_rational_from_irrational)
+        schedule(task)
+        ok = fetch(task)::Bool
+        setprecision(BigFloat, prec)
+        ok
+    end
+    @testset "c: $c" for c ∈ AbstractIrrationalExamples.examples
+        Q = Rational{Int128}
+        # metatest: `test_racy_rational_from_irrational` needs the constructor
+        # to not be constant folded away, otherwise it's not testing anything.
+        @test !Core.Compiler.is_foldable(Base.infer_effects(Q, Tuple{typeof(c)}))
+        # test for race
+        @test test_racy_rational_from_irrational(Q, c)
+    end
+end
+
+@testset "task time counters" begin
+    @testset "enabled" begin
+        try
+            Base.Experimental.task_metrics(true)
+            start_time = time_ns()
+            t = Threads.@spawn peakflops()
+            wait(t)
+            end_time = time_ns()
+            wall_time_delta = end_time - start_time
+            @test t.metrics_enabled
+            @test Base.Experimental.task_running_time_ns(t) > 0
+            @test Base.Experimental.task_wall_time_ns(t) > 0
+            @test Base.Experimental.task_wall_time_ns(t) >= Base.Experimental.task_running_time_ns(t)
+            @test wall_time_delta > Base.Experimental.task_wall_time_ns(t)
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "disabled" begin
+        t = Threads.@spawn peakflops()
+        wait(t)
+        @test !t.metrics_enabled
+        @test isnothing(Base.Experimental.task_running_time_ns(t))
+        @test isnothing(Base.Experimental.task_wall_time_ns(t))
+    end
+    @testset "task not run" begin
+        t1 = Task(() -> nothing)
+        @test !t1.metrics_enabled
+        @test isnothing(Base.Experimental.task_running_time_ns(t1))
+        @test isnothing(Base.Experimental.task_wall_time_ns(t1))
+        try
+            Base.Experimental.task_metrics(true)
+            t2 = Task(() -> nothing)
+            @test t2.metrics_enabled
+            @test Base.Experimental.task_running_time_ns(t2) == 0
+            @test Base.Experimental.task_wall_time_ns(t2) == 0
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "task failure" begin
+        try
+            Base.Experimental.task_metrics(true)
+            t = Threads.@spawn error("this task failed")
+            @test_throws "this task failed" wait(t)
+            @test Base.Experimental.task_running_time_ns(t) > 0
+            @test Base.Experimental.task_wall_time_ns(t) > 0
+            @test Base.Experimental.task_wall_time_ns(t) >= Base.Experimental.task_running_time_ns(t)
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "direct yield(t)" begin
+        try
+            Base.Experimental.task_metrics(true)
+            start = time_ns()
+            t_outer = Threads.@spawn begin
+                t_inner = Task(() -> peakflops())
+                t_inner.sticky = false
+                # directly yield to `t_inner` rather calling `schedule(t_inner)`
+                yield(t_inner)
+                wait(t_inner)
+                @test Base.Experimental.task_running_time_ns(t_inner) > 0
+                @test Base.Experimental.task_wall_time_ns(t_inner) > 0
+                @test Base.Experimental.task_wall_time_ns(t_inner) >= Base.Experimental.task_running_time_ns(t_inner)
+            end
+            wait(t_outer)
+            delta = time_ns() - start
+            @test Base.Experimental.task_running_time_ns(t_outer) > 0
+            @test Base.Experimental.task_wall_time_ns(t_outer) > 0
+            @test Base.Experimental.task_wall_time_ns(t_outer) >= Base.Experimental.task_running_time_ns(t_outer)
+            @test Base.Experimental.task_wall_time_ns(t_outer) < delta
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "bad schedule" begin
+        try
+            Base.Experimental.task_metrics(true)
+            t1 = Task((x) -> 1)
+            schedule(t1) # MethodError
+            yield()
+            @assert istaskfailed(t1)
+            @test Base.Experimental.task_running_time_ns(t1) > 0
+            @test Base.Experimental.task_wall_time_ns(t1) > 0
+            foo(a, b) = a + b
+            t2 = Task(() -> (peakflops(); foo(wait())))
+            schedule(t2)
+            yield()
+            @assert istaskstarted(t1) && !istaskdone(t2)
+            schedule(t2, 1)
+            yield()
+            @assert istaskfailed(t2)
+            @test Base.Experimental.task_running_time_ns(t2) > 0
+            @test Base.Experimental.task_wall_time_ns(t2) > 0
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "continuously update until task done" begin
+        try
+            Base.Experimental.task_metrics(true)
+            last_running_time = Ref(typemax(Int))
+            last_wall_time = Ref(typemax(Int))
+            t = Threads.@spawn begin
+                running_time = Base.Experimental.task_running_time_ns()
+                wall_time = Base.Experimental.task_wall_time_ns()
+                for _ in 1:5
+                    x = time_ns()
+                    while time_ns() < x + 100
+                    end
+                    new_running_time = Base.Experimental.task_running_time_ns()
+                    new_wall_time = Base.Experimental.task_wall_time_ns()
+                    @test new_running_time > running_time
+                    @test new_wall_time > wall_time
+                    running_time = new_running_time
+                    wall_time = new_wall_time
+                end
+                last_running_time[] = running_time
+                last_wall_time[] = wall_time
+            end
+            wait(t)
+            final_running_time = Base.Experimental.task_running_time_ns(t)
+            final_wall_time = Base.Experimental.task_wall_time_ns(t)
+            @test last_running_time[] < final_running_time
+            @test last_wall_time[] < final_wall_time
+            # ensure many more tasks are run to make sure the counters are
+            # not being updated after a task is done e.g. only when a new task is found
+            @sync for _ in 1:Threads.nthreads()
+                Threads.@spawn rand()
+            end
+            @test final_running_time == Base.Experimental.task_running_time_ns(t)
+            @test final_wall_time == Base.Experimental.task_wall_time_ns(t)
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+end
+
+@testset "task time counters: lots of spawns" begin
+    using Dates
+    try
+        Base.Experimental.task_metrics(true)
+        # create more tasks than we have threads.
+        # - all tasks must have: cpu time <= wall time
+        # - some tasks must have: cpu time < wall time
+        # - summing across all tasks we must have: total cpu time <= available cpu time
+        n_tasks = 2 * Threads.nthreads(:default)
+        cpu_times = Vector{UInt64}(undef, n_tasks)
+        wall_times = Vector{UInt64}(undef, n_tasks)
+        start_time = time_ns()
+        @sync begin
+            for i in 1:n_tasks
+                start_time_i = time_ns()
+                task_i = Threads.@spawn peakflops(1024)
+                Threads.@spawn begin
+                    wait(task_i)
+                    end_time_i = time_ns()
+                    wall_time_delta_i = end_time_i - start_time_i
+                    cpu_times[$i] = cpu_time_i = Base.Experimental.task_running_time_ns(task_i)
+                    wall_times[$i] = wall_time_i = Base.Experimental.task_wall_time_ns(task_i)
+                    # task should have recorded some cpu-time and some wall-time
+                    @test cpu_time_i > 0
+                    @test wall_time_i > 0
+                    # task cpu-time cannot be greater than its wall-time
+                    @test wall_time_i >= cpu_time_i
+                    # task wall-time must be less than our manually measured wall-time
+                    # between calling `@spawn` and returning from `wait`.
+                    @test wall_time_delta_i > wall_time_i
+                end
+            end
+        end
+        end_time = time_ns()
+        wall_time_delta = (end_time - start_time)
+        available_cpu_time = wall_time_delta * Threads.nthreads(:default)
+        summed_cpu_time = sum(cpu_times)
+        # total CPU time from all tasks can't exceed what was actually available.
+        @test available_cpu_time > summed_cpu_time
+        # some tasks must have cpu-time less than their wall-time, because we had more tasks
+        # than threads.
+        summed_wall_time = sum(wall_times)
+        @test summed_wall_time > summed_cpu_time
+    finally
+        Base.Experimental.task_metrics(false)
+    end
+end
+
+@testset "--timeout-for-safepoint-straggler command-line flag" begin
+    program = "
+        function main()
+            t = Threads.@spawn begin
+                ccall(:uv_sleep, Cvoid, (Cuint,), 20_000)
+            end
+            # Force a GC
+            ccall(:uv_sleep, Cvoid, (Cuint,), 1_000)
+            GC.gc()
+            wait(t)
+        end
+        main()
+    "
+    for timeout in ("1", "4", "16")
+        tmp_output_filename = tempname()
+        tmp_output_file = open(tmp_output_filename, "w")
+        if isnothing(tmp_output_file)
+            error("Failed to open file $tmp_output_filename")
+        end
+        run(pipeline(`$(Base.julia_cmd()) --threads=4 --timeout-for-safepoint-straggler=$(timeout) -e $program`, stderr=tmp_output_file))
+        # Check whether we printed the straggler's backtrace
+        @test !isempty(read(tmp_output_filename, String))
+        close(tmp_output_file)
+        rm(tmp_output_filename)
+    end
+end
+
 end # main testset
diff --git a/test/trimming/Makefile b/test/trimming/Makefile
new file mode 100644
index 0000000000000..f0ee34dcc3703
--- /dev/null
+++ b/test/trimming/Makefile
@@ -0,0 +1,77 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# This Makefile template requires the following variables to be set
+# in the environment or on the command-line:
+#   JULIA: path to julia[.exe] executable
+#   BIN:   binary build directory
+
+ifndef JULIA
+  $(error "Please pass JULIA=[path of target julia binary], or set as environment variable!")
+endif
+ifndef BIN
+  $(error "Please pass BIN=[path of build directory], or set as environment variable!")
+endif
+
+#=============================================================================
+# location of test source
+SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
+JULIAHOME := $(abspath $(SRCDIR)/../..)
+include $(JULIAHOME)/Make.inc
+
+# get the executable suffix, if any
+EXE := $(suffix $(abspath $(JULIA)))
+
+# get compiler and linker flags. (see: `contrib/julia-config.jl`)
+JULIA_CONFIG := $(JULIA) -e 'include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "julia-config.jl"))' --
+JULIA_LIBDIR := $(shell $(JULIA)  --startup-file=no -e 'println(joinpath(Sys.BINDIR, "..", "lib"))' --)
+CPPFLAGS_ADD :=
+CFLAGS_ADD = $(shell $(JULIA_CONFIG) --cflags)
+LDFLAGS_ADD = -lm $(shell $(JULIA_CONFIG) --ldflags --ldlibs) -ljulia-internal
+
+# get the JuliaC build script
+JULIAC_BUILDSCRIPT := $(shell $(JULIA) --startup-file=no -e 'print(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "juliac", "juliac-buildscript.jl"))')
+
+#=============================================================================
+
+release: $(BIN)/hello$(EXE) $(BIN)/trimmability$(EXE) $(BIN)/basic_jll$(EXE) $(BIN)/capplication$(EXE)
+
+# Complete any needed Pkg operations before trimming (some Pkg code may not be trimmable)
+Manifest.toml: $(SRCDIR)/Project.toml
+	$(JULIA) --startup-file=no --history-file=no -e 'using Pkg; Pkg.activate("$(SRCDIR)"); Pkg.instantiate()'
+
+$(BIN)/hello-o.a: $(SRCDIR)/hello.jl $(JULIAC_BUILDSCRIPT) Manifest.toml
+	$(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-exe true
+
+$(BIN)/trimmability-o.a: $(SRCDIR)/trimmability.jl $(JULIAC_BUILDSCRIPT) Manifest.toml
+	$(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-exe true
+
+$(BIN)/basic_jll-o.a: $(SRCDIR)/basic_jll.jl $(JULIAC_BUILDSCRIPT) Manifest.toml
+	$(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --project=$(SRCDIR) --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-exe true
+
+$(BIN)/libsimple-o.a: $(SRCDIR)/libsimple.jl $(JULIAC_BUILDSCRIPT)
+	$(JULIA) -t 1 -J $(JULIA_LIBDIR)/julia/sys.$(SHLIB_EXT) --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(JULIAC_BUILDSCRIPT) $< --output-lib true $(BIN)/bindinginfo_libsimple.json
+
+$(BIN)/hello$(EXE): $(BIN)/hello-o.a
+	$(CC) -o $@ $(call whole_archive,$<) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS)
+
+$(BIN)/trimmability$(EXE): $(BIN)/trimmability-o.a
+	$(CC) -o $@ $(call whole_archive,$<) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS)
+
+$(BIN)/basic_jll$(EXE): $(BIN)/basic_jll-o.a
+	$(CC) -o $@ $(call whole_archive,$<) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS)
+
+$(BIN)/capplication$(EXE): $(SRCDIR)/capplication.c $(SRCDIR)/libsimple.h $(BIN)/libsimple-o.a
+	$(CC) -I$(BIN) -I$(SRCDIR) -I$(JULIA_LIBDIR) -o $@ $< -Wl,--whole-archive $(BIN)/libsimple-o.a -Wl,--no-whole-archive $(LDFLAGS_ADD) $(LDFLAGS) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS)
+
+check: $(BIN)/hello$(EXE) $(BIN)/trimmability$(EXE) $(BIN)/basic_jll$(EXE) $(BIN)/capplication$(EXE)
+	$(JULIA) --startup-file=no --history-file=no --depwarn=error $(SRCDIR)/trimming.jl $<
+
+clean:
+	-rm -f $(BIN)/hello$(EXE) $(BIN)/trimmability$(EXE) $(BIN)/basic_jll$(EXE) $(BIN)/hello-o.a $(BIN)/trimmability-o.a $(BIN)/basic_jll-o.a $(BIN)/libsimple-o.a $(BIN)/libsimple.$(SHLIB_EXT) $(BIN)/capplication$(EXE) $(BIN)/bindinginfo_libsimple.json $(BIN)/Manifest.toml
+
+.PHONY: release clean check
+
+# Makefile debugging trick:
+# call print-VARIABLE to see the runtime value of any variable
+print-%:
+	@echo '$*=$($*)'
diff --git a/test/trimming/Project.toml b/test/trimming/Project.toml
new file mode 100644
index 0000000000000..ceb1d00de3bf9
--- /dev/null
+++ b/test/trimming/Project.toml
@@ -0,0 +1,12 @@
+[deps]
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Zstd_jll = "3161d3a3-bdf6-5164-811a-617609db77b4"
+
+[sources]
+Zstd_jll = {path = "Zstd_jll"}
+
+[compat]
+JSON = "1"
diff --git a/test/trimming/Zstd_jll/Project.toml b/test/trimming/Zstd_jll/Project.toml
new file mode 100644
index 0000000000000..467516843390a
--- /dev/null
+++ b/test/trimming/Zstd_jll/Project.toml
@@ -0,0 +1,15 @@
+name = "Zstd_jll"
+uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
+version = "1.5.7+1"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/test/trimming/Zstd_jll/src/Zstd_jll.jl b/test/trimming/Zstd_jll/src/Zstd_jll.jl
new file mode 100644
index 0000000000000..c16413f963d0b
--- /dev/null
+++ b/test/trimming/Zstd_jll/src/Zstd_jll.jl
@@ -0,0 +1,73 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/Zstd_jll.j:
+#
+baremodule Zstd_jll
+using Base, Libdl
+
+export libzstd, zstd, zstdmt
+
+# These get calculated in __init__()
+libzstd_handle::Ptr{Cvoid} = C_NULL
+
+if Sys.iswindows()
+    const libzstd = "libzstd-1.dll"
+elseif Sys.isapple()
+    const libzstd = "@rpath/libzstd.1.dylib"
+else
+    const libzstd = "libzstd.so.1"
+end
+
+if Sys.iswindows()
+    const zstd_exe = "zstd.exe"
+    const zstdmt_exe = "zstdmt.exe"
+else
+    const zstd_exe = "zstd"
+    const zstdmt_exe = "zstdmt"
+end
+
+if Sys.iswindows()
+    const pathsep = ';'
+elseif Sys.isapple()
+    const pathsep = ':'
+else
+    const pathsep = ':'
+end
+
+if Sys.iswindows()
+function adjust_ENV(cmd::Cmd)
+    dllPATH = Sys.BINDIR
+    oldPATH = get(ENV, "PATH", "")
+    newPATH = isempty(oldPATH) ? dllPATH : "$dllPATH$pathsep$oldPATH"
+    return addenv(cmd, "PATH"=>newPATH)
+end
+else
+adjust_ENV(cmd::Cmd) = cmd
+end
+
+function adjust_ENV()
+    addPATH = joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR)
+    oldPATH = get(ENV, "PATH", "")
+    newPATH = isempty(oldPATH) ? addPATH : "$addPATH$pathsep$oldPATH"
+    return ("PATH"=>newPATH,)
+end
+
+function zstd(f::Function; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true) # deprecated, for compat only
+    withenv((adjust_PATH ? adjust_ENV() : ())...) do
+        f(zstd())
+    end
+end
+function zstdmt(f::Function; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true) # deprecated, for compat only
+    withenv((adjust_PATH ? adjust_ENV() : ())...) do
+        f(zstdmt())
+    end
+end
+zstd() = adjust_ENV(`$(joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, zstd_exe))`)
+zstdmt() = adjust_ENV(`$(joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, zstdmt_exe))`)
+
+function __init__()
+    global libzstd_handle = dlopen(libzstd)
+    nothing
+end
+
+end  # module Zstd_jll
diff --git a/test/trimming/Zstd_jll/test/runtests.jl b/test/trimming/Zstd_jll/test/runtests.jl
new file mode 100644
index 0000000000000..5cfa2a1375c73
--- /dev/null
+++ b/test/trimming/Zstd_jll/test/runtests.jl
@@ -0,0 +1,7 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Zstd_jll
+
+@testset "Zstd_jll" begin
+    @test ccall((:ZSTD_versionNumber, libzstd), Cuint, ()) == 1_05_07
+end
diff --git a/test/trimming/basic_jll.jl b/test/trimming/basic_jll.jl
new file mode 100644
index 0000000000000..748bc2585c050
--- /dev/null
+++ b/test/trimming/basic_jll.jl
@@ -0,0 +1,28 @@
+using Libdl
+using Zstd_jll # Note this uses the vendored older non-LazyLibrary version of Zstd_jll
+
+# JLL usage at build-time should function as expected
+Zstd_jll.__init__()
+const build_ver = unsafe_string(ccall((:ZSTD_versionString, libzstd), Cstring, ()))
+
+function print_string(fptr::Ptr{Cvoid})
+    println(Core.stdout, unsafe_string(ccall(fptr, Cstring, ())))
+end
+
+function @main(args::Vector{String})::Cint
+    # Test the basic "Hello, world!"
+    println(Core.stdout, "Julia! Hello, world!")
+
+    # JLL usage at run-time should function as expected
+    ver = unsafe_string(ccall((:ZSTD_versionString, libzstd), Cstring, ()))
+    println(Core.stdout, ver)
+    @assert ver == build_ver
+
+    sleep(0.01)
+
+    # Add an indirection via `@cfunction` / 1-arg ccall
+    cfunc = @cfunction(print_string, Cvoid, (Ptr{Cvoid},))
+    fptr = dlsym(Zstd_jll.libzstd_handle, :ZSTD_versionString)
+    ccall(cfunc, Cvoid, (Ptr{Cvoid},), fptr)
+    return 0
+end
diff --git a/test/trimming/capplication.c b/test/trimming/capplication.c
new file mode 100644
index 0000000000000..390faa282fdab
--- /dev/null
+++ b/test/trimming/capplication.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include "libsimple.h"
+
+int main() {
+    // Example usage of the functions defined in libsimple.h
+    CVectorPair_Float32 vecPair;
+    vecPair.from.length = 3;
+    vecPair.from.data = (float[]){1.0f, 2.0f, 3.0f};
+    vecPair.to.length = 3;
+    vecPair.to.data = (float[]){4.0f, 5.0f, 6.0f};
+
+    float sum = copyto_and_sum(vecPair);
+    printf("Sum of copied values: %f\n", sum);
+
+    MyTwoVec list[] = {{1, 2}, {5, 5}, {3, 4}};
+    int32_t count = countsame(list, 3);
+    printf("Count of same vectors: %d\n", count);
+
+    return 0;
+}
diff --git a/test/trimming/hello.jl b/test/trimming/hello.jl
new file mode 100644
index 0000000000000..620a55b171544
--- /dev/null
+++ b/test/trimming/hello.jl
@@ -0,0 +1,6 @@
+# Test that minimal executable size stays low
+
+function @main(args::Vector{String})::Cint
+    println(Core.stdout, "Hello, world!")
+    return 0
+end
diff --git a/test/trimming/libsimple.h b/test/trimming/libsimple.h
new file mode 100644
index 0000000000000..ddd745f8f8ecc
--- /dev/null
+++ b/test/trimming/libsimple.h
@@ -0,0 +1,31 @@
+#ifndef JULIALIB_LIBSIMPLE_H
+#define JULIALIB_LIBSIMPLE_H
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+struct CTree_Float64;
+typedef struct CVector_CTree_Float64 {
+    int32_t length;
+    struct CTree_Float64* data;
+} CVector_CTree_Float64;
+typedef struct CTree_Float64 {
+    CVector_CTree_Float64 children;
+} CTree_Float64;
+typedef struct MyTwoVec {
+    int32_t x;
+    int32_t y;
+} MyTwoVec;
+typedef struct CVector_Float32 {
+    int32_t length;
+    float* data;
+} CVector_Float32;
+typedef struct CVectorPair_Float32 {
+    CVector_Float32 from;
+    CVector_Float32 to;
+} CVectorPair_Float32;
+
+float copyto_and_sum(CVectorPair_Float32 fromto);
+int64_t tree_size(CTree_Float64 tree);
+int32_t countsame(MyTwoVec* list, int32_t n);
+#endif // JULIALIB_LIBSIMPLE_H
diff --git a/test/trimming/libsimple.jl b/test/trimming/libsimple.jl
new file mode 100644
index 0000000000000..64126ff8c384e
--- /dev/null
+++ b/test/trimming/libsimple.jl
@@ -0,0 +1,58 @@
+module SimpleLib
+# Test the logging of entrypoints and types in a C-callable Julia library.
+
+struct CVector{T}
+    length::Cint
+    data::Ptr{T}
+end
+
+struct CVectorPair{T}
+    from::CVector{T}
+    to::CVector{T}
+end
+
+struct MyTwoVec
+    x::Int32
+    y::Int32
+end
+
+struct CTree{T}
+    # test that recursive datatypes work as expected
+    children::CVector{CTree{T}}
+end
+
+Base.@ccallable "tree_size" function size(tree::CTree{Float64})::Int64
+    children = unsafe_wrap(Array, tree.children.data, tree.children.length)
+    # Return the size of this sub-tree
+    return sum(Int64[
+        size(child)
+        for child in children
+    ]; init=1)
+end
+
+Base.@ccallable "copyto_and_sum" function badname(fromto::CVectorPair{Float32})::Float32
+    from, to = unsafe_wrap(Array, fromto.from.data, fromto.from.length), unsafe_wrap(Array, fromto.to.data, fromto.to.length)
+    copyto!(to, from)
+    return sum(to)
+end
+
+Base.@ccallable function countsame(list::Ptr{MyTwoVec}, n::Int32)::Int32
+    list = unsafe_wrap(Array, list, n)
+    count = 0
+    for v in list
+        count += v.x == v.y
+    end
+    return count
+end
+
+export countsame, copyto_and_sum
+
+# FIXME? varargs
+# Base.@ccallable function printints(x::Cint...)::Nothing
+#     for i in 1:length(x)
+#         print(x[i], " ")
+#     end
+#     println()
+# end
+
+end
diff --git a/test/trimming/trimmability.jl b/test/trimming/trimmability.jl
new file mode 100644
index 0000000000000..209a27343d18d
--- /dev/null
+++ b/test/trimming/trimmability.jl
@@ -0,0 +1,54 @@
+# Test that various constructs support trimming
+
+using Sockets
+
+world::String = "world!"
+const str = OncePerProcess{String}() do
+    return "Hello, " * world
+end
+
+abstract type Shape end
+struct Square <: Shape
+    side::Float64
+end
+struct Circle <: Shape
+    radius::Float64
+end
+area(s::Square) = s.side^2
+area(c::Circle) = pi*c.radius^2
+
+sum_areas(v::Vector{Shape}) = sum(area, v)
+
+function @main(args::Vector{String})::Cint
+    println(Core.stdout, str())
+    println(Core.stdout, PROGRAM_FILE)
+    foreach(x->println(Core.stdout, x), args)
+
+    # test map/mapreduce; should work but relies on inlining and other optimizations
+    # test that you can dispatch to some number of concrete cases
+    println(Core.stdout, sum_areas(Shape[Circle(1), Square(2)]))
+
+    arr = rand(10)
+    sorted_arr = sort(arr)
+    tot = sum(sorted_arr)
+    tot = prod(sorted_arr)
+    a = any(x -> x > 0, sorted_arr)
+    b = all(x -> x >= 0, sorted_arr)
+    c = map(x -> x^2, sorted_arr)
+    d = mapreduce(x -> x^2, +, sorted_arr)
+    # e = reduce(xor, rand(Int, 10))
+
+    try
+        sock = connect("localhost", 4900)
+        if isopen(sock)
+            write(sock, "Hello")
+            flush(sock)
+            close(sock)
+        end
+    catch
+    end
+
+    Base.donotdelete(reshape([1,2,3],:,1,1))
+
+    return 0
+end
diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl
new file mode 100644
index 0000000000000..7e15a4d196a0c
--- /dev/null
+++ b/test/trimming/trimming.jl
@@ -0,0 +1,84 @@
+import Pkg
+
+Pkg.activate(".")
+
+using Test
+using JSON
+
+@test length(ARGS) == 1
+bindir = dirname(ARGS[1])
+
+let exe_suffix = splitext(Base.julia_exename())[2]
+
+    hello_exe = joinpath(bindir, "hello" * exe_suffix)
+    @test readchomp(`$hello_exe arg1 arg2`) == "Hello, world!"
+    @test filesize(hello_exe) < 1_900_000
+
+    trimmability_exe = joinpath(bindir, "trimmability" * exe_suffix)
+    @test readchomp(`$trimmability_exe arg1 arg2`) == "Hello, world!\n$trimmability_exe\narg1\narg2\n$(4.0+pi)"
+
+    basic_jll_exe = joinpath(bindir, "basic_jll" * exe_suffix)
+    lines = split(readchomp(`$basic_jll_exe`), "\n")
+    @test lines[1] == "Julia! Hello, world!"
+    @test lines[2] == lines[3]
+    @test Base.VersionNumber(lines[2]) ≥ v"1.5.7"
+    @test filesize(basic_jll_exe) < filesize(unsafe_string(Base.JLOptions().image_file))/10
+
+    # Test that the shared library can be used in a C application
+    capplication_exe = joinpath(bindir, "capplication" * exe_suffix)
+    lines = split(readchomp(`$capplication_exe`), "\n")
+    @test length(lines) == 2
+    @test lines[1] == "Sum of copied values: 6.000000"
+    @test lines[2] == "Count of same vectors: 1"
+
+    # Test that the logging of entrypoints and types works correctly
+    str = read(joinpath(bindir, "bindinginfo_libsimple.json"), String)
+
+    # The log should parse as valid JSON
+    abi = JSON.parse(str)
+
+    # `copyto_and_sum` should have been exported
+    @test any(Bool[func["symbol"] == "copyto_and_sum" for func in abi["functions"]])
+
+    # `CVector{Float32}` should have been exported with the correct info
+    @test any(Bool[type["name"] == "CVector{Float32}" for type in abi["types"]])
+    CVector_Float32 = abi["types"][findfirst(type["name"] == "CVector{Float32}" for type in abi["types"])]
+    @test length(CVector_Float32["fields"]) == 2
+    @test CVector_Float32["fields"][1]["offset"] == 0
+    @test CVector_Float32["fields"][2]["offset"] == 8
+    @test abi["types"][CVector_Float32["fields"][1]["type_id"]]["name"] == "Int32"
+    @test abi["types"][CVector_Float32["fields"][2]["type_id"]]["name"] == "Ptr{Float32}"
+    @test CVector_Float32["size"] == 16
+
+    # `CVectorPair{Float32}` should have been exported with the correct info
+    @test any(Bool[type["name"] == "CVectorPair{Float32}" for type in abi["types"]])
+    CVectorPair_Float32 = abi["types"][findfirst(type["name"] == "CVectorPair{Float32}" for type in abi["types"])]
+    @test length(CVectorPair_Float32["fields"]) == 2
+    @test CVectorPair_Float32["fields"][1]["offset"] == 0
+    @test CVectorPair_Float32["fields"][2]["offset"] == 16
+    @test abi["types"][CVectorPair_Float32["fields"][1]["type_id"]]["name"] == "CVector{Float32}"
+    @test abi["types"][CVectorPair_Float32["fields"][2]["type_id"]]["name"] == "CVector{Float32}"
+    @test CVectorPair_Float32["size"] == 32
+
+    # `CTree{Float64}` should have been exported with the correct info
+    @test any(Bool[type["name"] == "CTree{Float64}" for type in abi["types"]])
+    CTree_Float64_id = findfirst(type["name"] == "CTree{Float64}" for type in abi["types"])
+    CTree_Float64 = abi["types"][CTree_Float64_id]
+    @test length(CTree_Float64["fields"]) == 1
+    @test CTree_Float64["fields"][1]["offset"] == 0
+    CVector_CTree_Float64 = abi["types"][CTree_Float64["fields"][1]["type_id"]]
+    @test CVector_CTree_Float64["name"] == "CVector{CTree{Float64}}"
+    @test CTree_Float64["size"] == sizeof(UInt) * 2
+
+    # `CVector{CTree{Float64}}` should have been exported with the correct info
+    @test length(CVector_CTree_Float64["fields"]) == 2
+    @test CVector_CTree_Float64["fields"][1]["offset"] == 0
+    @test CVector_CTree_Float64["fields"][2]["offset"] == sizeof(UInt)
+    @test abi["types"][CVector_CTree_Float64["fields"][1]["type_id"]]["name"] == "Int32"
+    @test abi["types"][CVector_CTree_Float64["fields"][2]["type_id"]]["name"] == "Ptr{CTree{Float64}}"
+    @test CVector_CTree_Float64["size"] == sizeof(UInt) * 2
+
+    # `Ptr{CTree{Float64}}` should refer (recursively) back to the original type id
+    Ptr_CTree_Float64 = abi["types"][CVector_CTree_Float64["fields"][2]["type_id"]]
+    @test Ptr_CTree_Float64["pointee_type_id"] == CTree_Float64_id
+end
diff --git a/test/tuple.jl b/test/tuple.jl
index 71770b6a553c2..1f5714415e21c 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -208,11 +208,13 @@ end
     @test iterate(t, y3[2]) === nothing
 
     @test eachindex((2,5,"foo")) === Base.OneTo(3)
-    @test eachindex((2,5,"foo"), (1,2,5,7)) === Base.OneTo(4)
+    @test_throws DimensionMismatch eachindex((2,5,"foo"), (1,2,5,7))
+
+    @test Core.Compiler.is_nothrow(Base.infer_effects(iterate, (Tuple{Int,Int,Int}, Int)))
 end
 
 
-@testset "element type" begin
+@testset "elelement/value/key types" begin
     @test eltype((1,2,3)) === Int
     @test eltype((1.0,2.0,3.0)) <: AbstractFloat
     @test eltype((true, false)) === Bool
@@ -227,6 +229,11 @@ end
         typejoin(Int, Float64, Bool)
     @test eltype(Tuple{Int, Missing}) === Union{Missing, Int}
     @test eltype(Tuple{Int, Nothing}) === Union{Nothing, Int}
+
+    @test valtype((1,2,3)) === eltype((1,2,3))
+    @test valtype(Tuple{Int, Missing}) === eltype(Tuple{Int, Missing})
+    @test keytype((1,2,3)) === Int
+    @test keytype(Tuple{Int, Missing}) === Int
 end
 
 @testset "map with Nothing and Missing" begin
@@ -362,9 +369,9 @@ end
     @test !isless((1,2), (1,2))
     @test !isless((2,1), (1,2))
 
-    @test hash(()) === Base.tuplehash_seed
-    @test hash((1,)) === hash(1, Base.tuplehash_seed)
-    @test hash((1,2)) === hash(1, hash(2, Base.tuplehash_seed))
+    @test hash(()) === Base.tuplehash_seed ⊻ Base.HASH_SEED
+    @test hash((1,)) === hash(1, Base.tuplehash_seed ⊻ Base.HASH_SEED)
+    @test hash((1,2)) === hash(1, hash(2, Base.tuplehash_seed ⊻ Base.HASH_SEED))
 
     # Test Any32 methods
     t = ntuple(identity, 32)
@@ -386,7 +393,7 @@ end
     @test !isless((t...,1,2), (t...,1,2))
     @test !isless((t...,2,1), (t...,1,2))
 
-    @test hash(t) === foldr(hash, [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,(),UInt(0)])
+    @test hash(t) === foldr(hash, vcat(1:32, (), Base.HASH_SEED))
 end
 
 @testset "functions" begin
@@ -525,6 +532,21 @@ end
     for n = 0:15
         @test ntuple(identity, Val(n)) == ntuple(identity, n)
     end
+
+    @test Base.infer_return_type(ntuple, Tuple{typeof(identity), Val}) == Tuple{Vararg{Int}}
+
+    # issue #55790
+    for n in 1:32
+        @test typeof(ntuple(identity, UInt64(n))) == NTuple{n, Int}
+    end
+
+    @test Tuple == Base.infer_return_type(((f, n) -> ntuple(f, n)), Tuple{Any, Any})
+    @test Tuple{Vararg{Int}} == Base.infer_return_type(((f, n) -> ntuple(f, n)), Tuple{typeof(identity), Any})
+
+    for n in 0:15
+        @test NTuple{n, Any} == Base.infer_return_type(((f, n) -> ntuple(f, n)), Tuple{Any, Val{n}})
+        @test NTuple{n, Int} == Base.infer_return_type(((f, n) -> ntuple(f, n)), Tuple{typeof(identity), Val{n}})
+    end
 end
 
 struct A_15703{N}
@@ -647,6 +669,8 @@ end
 
     f() = Base.setindex((1:1, 2:2, 3:3), 9, 1)
     @test @inferred(f()) == (9, 2:2, 3:3)
+
+    @test Base.return_types(Base.setindex, Tuple{Tuple,Nothing,Int}) == [Tuple]
 end
 
 @testset "inferable range indexing with constant values" begin
@@ -763,6 +787,12 @@ g42457(a, b) = Base.isequal(a, b) ? 1 : 2.0
 # issue #46049: setindex(::Tuple) regression
 @inferred Base.setindex((1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16), 42, 1)
 
+# issue #50562
+f50562(r) = in(:i_backward, r[])
+r50562 = Ref((:b_back, :foofakgka, :i_backw))
+f50562(r50562)
+@test @allocated(f50562(r50562)) == 0
+
 # issue #47326
 function fun1_47326(args...)
     head..., tail = args
@@ -785,6 +815,29 @@ namedtup = (;a=1, b=2, c=3)
         NamedTuple{(:c,), Tuple{Int}},
     }
 
+@testset "`Base.split_rest(::Tuple, ::Vararg)` return type inference" begin
+    let f(t) = Base.split_rest(t, 3)
+        tuple_types_of_length(n::Int) = (NTuple{n, Any}, NTuple{n}, NTuple{n, Float32})
+        @testset "inferred return type must subtype `NTuple{2, Tuple}`" begin
+            for T in (
+                Tuple, Tuple{Vararg{Float32}},  # any length
+                Tuple{Any, Vararg{Any}}, (Tuple{T, Vararg{T}} where {T}), Tuple{Float32, Vararg{Float32}},  # length greater than one
+                tuple_types_of_length(5)...,  # length five
+            )
+                @test Base.infer_return_type(f, Tuple{T}) <: NTuple{2, Tuple}
+                for S in (Tuple{T, Any, Any}, Tuple{T, Any}, Tuple{T, Int, Any}, Tuple{T, Int}, Tuple{T, Int, Int})
+                    @test Base.infer_return_type(Base.split_rest, S) <: NTuple{2, Tuple}
+                end
+            end
+        end
+        @testset "with exactly-known length: `5 == 2 + 3`" begin
+            for T in tuple_types_of_length(5)
+                @test Base.infer_return_type(f, Tuple{T}) <: Tuple{Tuple{Any, Any}, Tuple{Any, Any, Any}}
+            end
+        end
+    end
+end
+
 # Make sure that tuple iteration is foldable
 @test Core.Compiler.is_foldable(Base.infer_effects(iterate, Tuple{NTuple{4, Float64}, Int}))
 @test Core.Compiler.is_foldable(Base.infer_effects(eltype, Tuple{Tuple}))
@@ -796,3 +849,38 @@ namedtup = (;a=1, b=2, c=3)
 @test_throws ErrorException("Tuple field type cannot be Union{}") Tuple{Vararg{Union{},1}}
 @test Tuple{} <: Tuple{Vararg{Union{},N}} where N
 @test !(Tuple{} >: Tuple{Vararg{Union{},N}} where N)
+
+@test Val{Tuple{T,T,T} where T} === Val{Tuple{Vararg{T,3}} where T}
+@test Val{Tuple{Vararg{T,4}} where T} === Val{Tuple{T,T,T,T} where T}
+@test Val{Tuple{Int64, Vararg{Int32,N}} where N} === Val{Tuple{Int64, Vararg{Int32}}}
+@test Val{Tuple{Int32, Vararg{Int64}}} === Val{Tuple{Int32, Vararg{Int64,N}} where N}
+
+@testset "from Pair, issue #52636" begin
+    pair = (1 => "2")
+    @test (1, "2") == @inferred Tuple(pair)
+    @test (1, "2") == @inferred Tuple{Int,String}(pair)
+end
+
+@testset "circshift" begin
+    t1 = (1, 2, 3, 4, 5)
+    t2 = (1, 'a', -7.0, 3)
+    t3 = ('a', 'b', 'c', 'd')
+    @test @inferred(Base.circshift(t1, 2)) == (4, 5, 1, 2, 3)
+    # The return type of mixed tuples with runtime shift cannot be inferred.
+    @test Base.circshift(t2, 3) == ('a', -7.0, 3, 1)
+    @test @inferred(Base.circshift(t3, 7)) == ('b', 'c', 'd', 'a')
+    @test @inferred(Base.circshift(t3, -1)) == ('b', 'c', 'd', 'a')
+    @test_throws MethodError circshift(t1, 'a')
+    @test Base.infer_return_type(circshift, Tuple{Tuple,Integer}) <: Tuple
+    @test Base.infer_return_type(circshift, Tuple{Tuple{Vararg{Any,10}},Integer}) <: Tuple{Vararg{Any,10}}
+    for len ∈ 0:5
+        v = 1:len
+        t = Tuple(v)
+        for shift ∈ -6:6
+            @test circshift(v, shift) == collect(circshift(t, shift))
+        end
+    end
+end
+
+@test NTuple == Base.infer_return_type(reverse, Tuple{NTuple})
+@test Tuple{Vararg{Int}} == Base.infer_return_type(reverse, Tuple{Tuple{Vararg{Int}}})
diff --git a/test/vecelement.jl b/test/vecelement.jl
index 6638f06f4f358..b89eb097ee560 100644
--- a/test/vecelement.jl
+++ b/test/vecelement.jl
@@ -1,5 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+using InteractiveUtils
 make_value(::Type{T}, i::Integer) where {T<:Integer} = 3*i%T
 make_value(::Type{T},i::Integer) where {T<:AbstractFloat} = T(3*i)
 
@@ -120,3 +120,9 @@ for T in (Float64, Float32, Int64, Int32)
         @test b == result
     end
 end
+@testset "vecelement overalignment" begin
+    io = IOBuffer()
+    code_llvm(io,getindex, (Array{NTuple{5, VecElement{Float64}}, 1}, Int64), optimize=false)
+    ir = String(take!(io))
+    @test match(r"align 64", ir) === nothing
+end
diff --git a/test/version.jl b/test/version.jl
index 3723bb0f788e2..242b32c47cbdc 100644
--- a/test/version.jl
+++ b/test/version.jl
@@ -219,11 +219,14 @@ for major=0:3, minor=0:3, patch=0:3
     end
 end
 
-# banner
-import Base.banner
-io = IOBuffer()
-@test banner(io) === nothing
-@test length(String(take!(io))) > 50
+# VersionNumber has the promised fields
+let v = v"4.2.1-1.x+a.9"
+    @test v.major isa Integer
+    @test v.minor isa Integer
+    @test v.patch isa Integer
+    @test v.prerelease isa Tuple{Vararg{Union{Integer, AbstractString}}}
+    @test v.build isa Tuple{Vararg{Union{Integer, AbstractString}}}
+end
 
 # julia_version.h version test
 @test VERSION.major == ccall(:jl_ver_major, Cint, ())
diff --git a/test/worlds.jl b/test/worlds.jl
index b5a8f1c5449ac..96685129c9e99 100644
--- a/test/worlds.jl
+++ b/test/worlds.jl
@@ -2,10 +2,12 @@
 
 # tests for accurate updating of method tables
 
-using Base: get_world_counter
-tls_world_age() = ccall(:jl_get_tls_world_age, UInt, ())
+using Base: get_world_counter, tls_world_age
 @test typemax(UInt) > get_world_counter() == tls_world_age() > 0
 
+# issue #58013
+@test_throws ArgumentError invokelatest()
+
 # test simple method replacement
 begin
     g265a() = f265a(0)
@@ -108,7 +110,7 @@ end
 g265() = [f265(x) for x in 1:3.]
 wc265 = get_world_counter()
 wc265_41332a = Task(tls_world_age)
-@test tls_world_age() == wc265
+@test tls_world_age() == wc265 + 1
 (function ()
     global wc265_41332b = Task(tls_world_age)
     @eval f265(::Any) = 1.0
@@ -116,24 +118,24 @@ wc265_41332a = Task(tls_world_age)
     global wc265_41332d = Task(tls_world_age)
     nothing
 end)()
-@test wc265 + 2 == get_world_counter() == tls_world_age()
+@test wc265 + 11 == get_world_counter() == tls_world_age()
 schedule(wc265_41332a)
 schedule(wc265_41332b)
 schedule(wc265_41332c)
 schedule(wc265_41332d)
-@test wc265 == fetch(wc265_41332a)
-@test wc265 + 1 == fetch(wc265_41332b)
-@test wc265 + 2 == fetch(wc265_41332c)
-@test wc265 + 1 == fetch(wc265_41332d)
+@test wc265 + 1 == fetch(wc265_41332a)
+@test wc265 + 9 == fetch(wc265_41332b)
+@test wc265 + 11 == fetch(wc265_41332c)
+@test wc265 + 9 == fetch(wc265_41332d)
 chnls, tasks = Base.channeled_tasks(2, wfunc)
 t265 = tasks[1]
 
 wc265 = get_world_counter()
 @test put_n_take!(get_world_counter, ()) == wc265
-@test put_n_take!(tls_world_age, ()) == wc265
+@test put_n_take!(tls_world_age, ()) + 3 == wc265
 f265(::Int) = 1
 @test put_n_take!(get_world_counter, ()) == wc265 + 1 == get_world_counter() == tls_world_age()
-@test put_n_take!(tls_world_age, ()) == wc265
+@test put_n_take!(tls_world_age, ()) + 3 == wc265
 
 @test g265() == Int[1, 1, 1]
 @test Core.Compiler.return_type(f265, Tuple{Any,}) == Union{Float64, Int}
@@ -163,12 +165,12 @@ let ex = t265.exception
     @test ex isa MethodError
     @test ex.f == h265
     @test ex.args == ()
-    @test ex.world == wc265
+    @test ex.world == wc265-3
     str = sprint(showerror, ex)
     wc = get_world_counter()
     cmps = """
         MethodError: no method matching h265()
-        The applicable method may be too new: running in world age $wc265, while current world is $wc."""
+        The applicable method may be too new: running in world age $(wc265-3), while current world is $wc."""
     @test startswith(str, cmps)
     cmps = "\n  h265() (method too new to be called from this world context.)\n   $loc_h265"
     @test occursin(cmps, str)
@@ -192,31 +194,26 @@ f_gen265(x::Type{Int}) = 3
 # would have capped those specializations if they were still valid
 f26506(@nospecialize(x)) = 1
 g26506(x) = Base.inferencebarrier(f26506)(x[1])
-z = Any["ABC"]
+z26506 = Any["ABC"]
 f26506(x::Int) = 2
-g26506(z) # Places an entry for f26506(::String) in mt.name.cache
+g26506(z26506) # Places an entry for f26506(::String) in MethodTable cache
+w26506 = Base.get_world_counter()
+cache26506 = ccall(:jl_mt_find_cache_entry, Any, (Any, Any, UInt), Core.methodtable.cache, Tuple{typeof(f26506),String}, w26506)::Core.TypeMapEntry
+@test cache26506.max_world === typemax(UInt)
+w26506 = Base.get_world_counter()
 f26506(x::String) = 3
-let cache = typeof(f26506).name.mt.cache
-    # The entry we created above should have been truncated
-    @test cache.min_world == cache.max_world
-end
-c26506_1, c26506_2 = Condition(), Condition()
-# Captures the world age
-result26506 = Any[]
-t = Task(()->begin
-    wait(c26506_1)
-    push!(result26506, g26506(z))
-    notify(c26506_2)
-end)
-yield(t)
+@test w26506+1 === Base.get_world_counter()
+# The entry we created above should have been truncated
+@test cache26506.max_world == w26506
+# Captures the world age on creation
+t26506 = @task g26506(z26506)
 f26506(x::Float64) = 4
-let cache = typeof(f26506).name.mt.cache
-    # The entry we created above should have been truncated
-    @test cache.min_world == cache.max_world
-end
-notify(c26506_1)
-wait(c26506_2)
-@test result26506[1] == 3
+@test cache26506.max_world == w26506
+f26506(x::String) = 5
+# The entry we created above should not have been changed
+@test cache26506.max_world == w26506
+@test fetch(schedule(t26506)) === 3
+@test g26506(z26506) === 5
 
 # issue #38435
 f38435(::Int, ::Any) = 1
@@ -258,15 +255,13 @@ end
 # avoid adding this to Base
 function equal(ci1::Core.CodeInfo, ci2::Core.CodeInfo)
     return ci1.code == ci2.code &&
-           ci1.codelocs == ci2.codelocs &&
+           ci1.debuginfo == ci2.debuginfo &&
            ci1.ssavaluetypes == ci2.ssavaluetypes &&
            ci1.ssaflags == ci2.ssaflags &&
            ci1.method_for_inference_limit_heuristics == ci2.method_for_inference_limit_heuristics &&
-           ci1.linetable == ci2.linetable &&
            ci1.slotnames == ci2.slotnames &&
            ci1.slotflags == ci2.slotflags &&
-           ci1.slottypes == ci2.slottypes &&
-           ci1.rettype == ci2.rettype
+           ci1.slottypes == ci2.slottypes
 end
 equal(p1::Pair, p2::Pair) = p1.second == p2.second && equal(p1.first, p2.first)
 
@@ -419,3 +414,205 @@ ccall(:jl_debug_method_invalidation, Any, (Cint,), 0)
     which(mc48954, (AbstractFloat, Int)),
     "jl_method_table_insert"
 ]
+
+# logging issue #58080
+f58080(::Integer) = 1
+callsf58080rts(x) = f58080(Base.inferencebarrier(x)::Signed)
+invokesf58080s(x) = invoke(f58080, Tuple{Signed}, x)
+# compilation
+invokesf58080s(1)                        # invoked callee
+callsf58080rts(1)                        # runtime-dispatched callee
+# invalidation
+logmeths = ccall(:jl_debug_method_invalidation, Any, (Cint,), 1);
+f58080(::Int) = 2
+f58080(::Signed) = 4
+ccall(:jl_debug_method_invalidation, Any, (Cint,), 0);
+@test logmeths[1].def.name === :callsf58080rts
+m58080i = which(f58080, (Int,))
+m58080s = which(f58080, (Signed,))
+idxi = findfirst(==(m58080i), logmeths)
+@test logmeths[idxi+1] == "jl_method_table_insert"
+@test logmeths[idxi+2].def.name === :invokesf58080s
+@test logmeths[end-1] == m58080s
+@test logmeths[end] == "jl_method_table_insert"
+
+# logging binding invalidations
+struct LogBindingInvalidation
+    x::Int
+end
+makelbi(x) = LogBindingInvalidation(x)
+const glbi = makelbi(1)
+oLBI, oglbi = LogBindingInvalidation, glbi
+flbi() = @__MODULE__().glbi.x
+flbi()
+milbi1 = only(Base.specializations(only(methods(makelbi))))
+milbi2 = only(Base.specializations(only(methods(flbi))))
+logmeths = ccall(:jl_debug_method_invalidation, Any, (Cint,), 1)
+struct LogBindingInvalidation
+    x::Float64
+end
+const glbi = makelbi(2.0)
+@test flbi() === 2.0
+ccall(:jl_debug_method_invalidation, Any, (Cint,), 0)
+@test milbi1.cache.def ∈ logmeths
+@test milbi2.cache.next.def ∈ logmeths
+i = findfirst(x -> isa(x, Core.BindingPartition), logmeths)
+T = logmeths[i].restriction
+@test T === oLBI
+@test logmeths[i+1] == "jl_maybe_log_binding_invalidation"
+T = logmeths[end-1].restriction
+@test T === oglbi
+@test logmeths[end] == "jl_maybe_log_binding_invalidation"
+
+# issue #50091 -- missing invoke edge affecting nospecialized dispatch
+module ExceptionUnwrapping
+@nospecialize
+unwrap_exception(@nospecialize(e)) = e
+unwrap_exception(e::Base.TaskFailedException) = e.task.exception
+@noinline function _summarize_task_exceptions(io::IO, exc, prefix = nothing)
+    _summarize_exception((;prefix,), io, exc)
+    nothing
+end
+@noinline function _summarize_exception(kws, io::IO, e::TaskFailedException)
+    _summarize_task_exceptions(io, e.task, kws.prefix)
+end
+# This is the overload that prints the actual exception that occurred.
+result = Bool[]
+@noinline function _summarize_exception(kws, io::IO, @nospecialize(exc))
+    global result
+    push!(result, unwrap_exception(exc) === exc)
+    if unwrap_exception(exc) !== exc # something uninferrable
+        return _summarize_exception(kws, io, unwrap_exception(exc))
+    end
+end
+struct X; x; end
+end
+let e = ExceptionUnwrapping.X(nothing)
+    @test ExceptionUnwrapping.unwrap_exception(e) === e
+    ExceptionUnwrapping._summarize_task_exceptions(devnull, e)
+    @test ExceptionUnwrapping.result == [true]
+    empty!(ExceptionUnwrapping.result)
+end
+ExceptionUnwrapping.unwrap_exception(e::ExceptionUnwrapping.X) = e.x
+let e = ExceptionUnwrapping.X(nothing)
+    @test !(ExceptionUnwrapping.unwrap_exception(e) === e)
+    ExceptionUnwrapping._summarize_task_exceptions(devnull, e)
+    @test ExceptionUnwrapping.result == [false, true]
+    empty!(ExceptionUnwrapping.result)
+end
+
+fshadow() = 1
+gshadow() = fshadow()
+@test fshadow() === 1
+@test gshadow() === 1
+fshadow_m1 = which(fshadow, ())
+fshadow() = 2
+fshadow() = 3
+@test fshadow() === 3
+@test gshadow() === 3
+fshadow_m3 = which(fshadow, ())
+Base.delete_method(fshadow_m1)
+@test fshadow() === 3
+@test gshadow() === 3
+Base.delete_method(fshadow_m3)
+fshadow_m2 = which(fshadow, ())
+@test fshadow() === 2
+@test gshadow() === 2
+Base.delete_method(fshadow_m2)
+@test_throws MethodError(fshadow, (), Base.tls_world_age()) gshadow()
+@test Base.morespecific(fshadow_m3, fshadow_m2)
+@test Base.morespecific(fshadow_m2, fshadow_m1)
+@test Base.morespecific(fshadow_m3, fshadow_m1)
+@test !Base.morespecific(fshadow_m2, fshadow_m3)
+
+@test_throws "Method of fshadow already disabled" Base.delete_method(fshadow_m2)
+
+# Generated functions without edges must have min_world = 1.
+# N.B.: If changing this, move this test to precompile and make sure
+# that the specialization survives revalidation.
+function generated_no_edges_gen(world, args...)
+    src = ccall(:jl_new_code_info_uninit, Ref{Core.CodeInfo}, ())
+    src.code = Any[Core.ReturnNode(nothing)]
+    src.slotnames = Symbol[:self]
+    src.slotflags = UInt8[0x00]
+    src.ssaflags = UInt32[0x00]
+    src.ssavaluetypes = 1
+    src.nargs = 1
+    src.min_world = first(Base._methods(generated_no_edges, Tuple{}, -1, world)).method.primary_world
+
+    return src
+end
+
+@eval function generated_no_edges()
+    $(Expr(:meta, :generated, generated_no_edges_gen))
+    $(Expr(:meta, :generated_only))
+end
+
+@test_throws ErrorException("Generated function result with `edges == nothing` and `max_world == typemax(UInt)` must have `min_world == 1`") generated_no_edges()
+
+# Test that backdating of constants is working for structs
+before_backdate_age = Base.tls_world_age()
+struct FooBackdated
+    x::Vector{FooBackdated}
+
+    FooBackdated() = new(FooBackdated[])
+end
+# For depwarn == 1, this throws a warning on access, for depwarn == 2, it throws an error.
+# `isdefinedglobal` changes with that, but doesn't error.
+if Base.JLOptions().depwarn <= 1
+    @test Base.invoke_in_world(before_backdate_age, isdefinedglobal, @__MODULE__, :FooBackdated)
+else
+    @test !Base.invoke_in_world(before_backdate_age, isdefinedglobal, @__MODULE__, :FooBackdated)
+end
+
+# Test that ambiguous binding intersect the using'd binding's world ranges
+module AmbigWorldTest
+    using Test
+    module M1; export x; end
+    module M2; export x; end
+    using .M1, .M2
+    Core.eval(M1, :(x=1))
+    Core.eval(M2, :(x=2))
+    @test_throws UndefVarError x
+    @test convert(Core.Binding, GlobalRef(@__MODULE__, :x)).partitions.min_world == max(
+        convert(Core.Binding, GlobalRef(M1, :x)).partitions.min_world,
+        convert(Core.Binding, GlobalRef(M2, :x)).partitions.min_world
+    )
+end
+
+module X57316; module Y57316; end; end
+module A57316; using ..X57316.Y57316, .Y57316.Y57316; end
+module B57316; import ..X57316.Y57316, .Y57316.Y57316; end
+module C57316; import ..X57316.Y57316 as Z, .Z.Y57316 as W; end
+@test X57316.Y57316 === A57316.Y57316 === B57316.Y57316 === C57316.Z === C57316.W
+@test !isdefined(A57316, :X57316)
+@test !isdefined(B57316, :X57316)
+@test !isdefined(C57316, :X57316)
+@test !isdefined(C57316, :Y57316)
+
+# jl_module_import should always manipulate the latest world
+module M57965
+function f()
+    @eval Random = 1
+    Core._eval_import(true, @__MODULE__, nothing, Expr(:., :Random))
+end
+end
+@test_throws ErrorException("importing Random into M57965 conflicts with an existing global") M57965.f()
+
+# issue #59429 - world age semantics with toplevel in macros
+module M59429
+using Test
+macro new_enum(T::Symbol, args...)
+   esc(quote
+      @enum $T $(args...)
+      function Base.hash(x::$T, h::UInt)
+        rand(UInt)
+      end
+    end)
+end
+
+@new_enum Foo59429 bar59429 baz59429
+
+# Test that the hash function works without world age issues
+@test hash(bar59429, UInt(0)) isa UInt
+end
diff --git a/typos.toml b/typos.toml
new file mode 100644
index 0000000000000..f4cabf1dd540d
--- /dev/null
+++ b/typos.toml
@@ -0,0 +1,5 @@
+[default]
+extend-ignore-words-re = ["^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?$"]
+
+[default.extend-words]
+indexin = "indexin"